lyber-core 0.9.6.2.3 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +18 -0
- data/lib/dlss_service.rb +0 -1
- data/lib/dor_service.rb +125 -72
- data/lib/lyber_core.rb +6 -3
- data/lib/lyber_core/config.rb +13 -0
- data/lib/lyber_core/connection.rb +34 -1
- data/lib/lyber_core/destroyer.rb +0 -1
- data/lib/lyber_core/exceptions/chained_error.rb +21 -0
- data/lib/lyber_core/exceptions/fatal_error.rb +10 -0
- data/lib/lyber_core/exceptions/item_error.rb +19 -0
- data/lib/lyber_core/exceptions/service_error.rb +10 -0
- data/lib/lyber_core/log.rb +17 -4
- data/lib/lyber_core/robots/robot.rb +185 -66
- data/lib/lyber_core/robots/service_controller.rb +174 -0
- data/lib/lyber_core/robots/work_item.rb +24 -15
- data/lib/lyber_core/robots/work_queue.rb +27 -4
- data/lib/lyber_core/robots/workspace.rb +2 -2
- data/lib/lyber_core/utils/checksum_validate.rb +1 -1
- data/lib/lyber_core/utils/file_utilities.rb +10 -10
- data/lib/xml_models/identity_metadata/dublin_core.rb +116 -0
- data/lib/xml_models/identity_metadata/identity_metadata.rb +264 -0
- metadata +338 -249
- checksums.yaml +0 -15
- data/lib/dor/base.rb +0 -18
- data/lib/dor/suri_service.rb +0 -28
- data/lib/dor/workflow_service.rb +0 -112
- data/lib/lyber_core/rake/dlss_release.rb +0 -126
- data/lib/roxml_models/identity_metadata/dublin_core.rb +0 -46
- data/lib/roxml_models/identity_metadata/identity_metadata.rb +0 -118
data/lib/lyber_core/destroyer.rb
CHANGED
@@ -0,0 +1,21 @@
|
|
1
|
+
# Provices a wrapping a caught exception inside a new exception.
|
2
|
+
# The original exception is optionally passed in as the cause parameter of the constructor
|
3
|
+
# see: http://ruby.runpaint.org/exceptions
|
4
|
+
# see: http://en.wikipedia.org/wiki/Exception_chaining
|
5
|
+
# see: http://www.ruby-forum.com/topic/148193
|
6
|
+
# see: http://jqr.github.com/2009/02/11/passing-data-with-ruby-exceptions.html
|
7
|
+
module LyberCore
|
8
|
+
module Exceptions
|
9
|
+
class ChainedError < StandardError
|
10
|
+
def initialize(message, cause=nil)
|
11
|
+
if (cause && cause.is_a?(Exception))
|
12
|
+
# exaample: "My message; caused by #<Interrupt: interrupt message>"
|
13
|
+
super("#{message}; caused by #{cause.inspect}")
|
14
|
+
self.set_backtrace(cause.backtrace)
|
15
|
+
else
|
16
|
+
super(message)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'chained_error')
|
2
|
+
|
3
|
+
# A ServiceError is used to wrap timeouts, HTTP exceptions, etc
|
4
|
+
# And create a new exception that is usually treated as a fatal error
|
5
|
+
module LyberCore
|
6
|
+
module Exceptions
|
7
|
+
class FatalError < LyberCore::Exceptions::ChainedError
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'chained_error')
|
2
|
+
|
3
|
+
# A ItemError is used to wrap a causal exception
|
4
|
+
# And create a new exception that usually terminates processing of the current item
|
5
|
+
# the druid parameter makes it convenient to include the object id using a std message syntax
|
6
|
+
module LyberCore
|
7
|
+
module Exceptions
|
8
|
+
class ItemError < LyberCore::Exceptions::ChainedError
|
9
|
+
def initialize(druid, msg, cause=nil)
|
10
|
+
if (druid)
|
11
|
+
message = "#{druid} - #{msg}"
|
12
|
+
else
|
13
|
+
message= msg
|
14
|
+
end
|
15
|
+
super(message, cause)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'fatal_error')
|
2
|
+
|
3
|
+
# A ServiceError is used to wrap timeouts, HTTP exceptions, etc
|
4
|
+
# And create a new exception that is usually treated as a fatal error
|
5
|
+
module LyberCore
|
6
|
+
module Exceptions
|
7
|
+
class ServiceError < LyberCore::Exceptions::FatalError
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
data/lib/lyber_core/log.rb
CHANGED
@@ -45,7 +45,7 @@ module LyberCore
|
|
45
45
|
@@log.level = current_log_level
|
46
46
|
@@log.formatter = current_formatter
|
47
47
|
rescue Exception => e
|
48
|
-
raise e, "Couldn't initialize logfile #{new_logfile}: #{e.backtrace}"
|
48
|
+
raise e, "Couldn't initialize logfile #{new_logfile} because\n#{e.message}: #{e.backtrace.join(%{\n})}}"
|
49
49
|
end
|
50
50
|
|
51
51
|
end
|
@@ -60,7 +60,7 @@ module LyberCore
|
|
60
60
|
# Logger::DEBUG (0): low-level information for developers
|
61
61
|
def Log.set_level(loglevel)
|
62
62
|
begin
|
63
|
-
if [0,1,2,3,4].
|
63
|
+
if [0,1,2,3,4].include? loglevel
|
64
64
|
@@log.level = loglevel
|
65
65
|
@@log.debug "Setting LyberCore::Log.level to #{loglevel}"
|
66
66
|
else
|
@@ -69,7 +69,7 @@ module LyberCore
|
|
69
69
|
@@log.level = 0
|
70
70
|
end
|
71
71
|
rescue Exception => e
|
72
|
-
raise e, "Couldn't set log level: #{e.backtrace}"
|
72
|
+
raise e, "Couldn't set log level because\n#{e.message}: #{e.backtrace.join(%{\n})}"
|
73
73
|
end
|
74
74
|
end
|
75
75
|
|
@@ -97,7 +97,20 @@ module LyberCore
|
|
97
97
|
def Log.debug(msg)
|
98
98
|
@@log.add(Logger::DEBUG) { msg }
|
99
99
|
end
|
100
|
-
|
100
|
+
|
101
|
+
def Log.exception(e)
|
102
|
+
msg = Log.exception_message(e)
|
103
|
+
if e.is_a?(LyberCore::Exceptions::FatalError)
|
104
|
+
Log.fatal(msg)
|
105
|
+
else
|
106
|
+
Log.error(msg)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
def Log.exception_message(e)
|
111
|
+
msg = e.inspect.split($/).join('; ') + "\n"
|
112
|
+
msg << e.backtrace.join("\n") if(e.backtrace)
|
113
|
+
end
|
101
114
|
|
102
115
|
end
|
103
116
|
|
@@ -1,21 +1,27 @@
|
|
1
|
-
# == Usage
|
1
|
+
# == Usage
|
2
2
|
# ruby_cl_skeleton [options] source_file
|
3
3
|
#
|
4
4
|
# For help use: ruby_cl_skeleton -h
|
5
5
|
|
6
6
|
module LyberCore
|
7
7
|
module Robots
|
8
|
+
|
9
|
+
CONTINUE = 0
|
10
|
+
SLEEP = 1
|
11
|
+
HALT = 2
|
12
|
+
|
8
13
|
require 'optparse'
|
9
14
|
require 'ostruct'
|
10
15
|
|
11
16
|
# ===== Usage
|
12
17
|
# User defined robots should derive from this class and override the #process_item method
|
13
18
|
class Robot
|
19
|
+
|
14
20
|
attr_accessor :workflow_name
|
15
21
|
attr_accessor :workflow_step
|
16
|
-
|
22
|
+
|
17
23
|
# A LyberCore::Robots::Workflow object
|
18
|
-
attr_accessor :workflow
|
24
|
+
attr_accessor :workflow
|
19
25
|
attr_accessor :collection_name
|
20
26
|
attr_accessor :workspace
|
21
27
|
attr_accessor :args
|
@@ -23,7 +29,7 @@ module LyberCore
|
|
23
29
|
|
24
30
|
|
25
31
|
# Available options
|
26
|
-
# - :collection_name - The collection this workflow should work with.
|
32
|
+
# - :collection_name - The collection this workflow should work with.
|
27
33
|
# Defined as a subdirectory within ROBOT_ROOT/config/workflows/your_workflow/your_collection
|
28
34
|
# - :workspace - Full path of where to find content for a particular workflow
|
29
35
|
# - :logfile - Where to write log messages
|
@@ -31,6 +37,7 @@ module LyberCore
|
|
31
37
|
def initialize(workflow_name, workflow_step, args = {})
|
32
38
|
@workflow_name = workflow_name
|
33
39
|
@workflow_step = workflow_step
|
40
|
+
#TODO: Replace 'dor.' with actual repository ID
|
34
41
|
@collection_name = args[:collection_name]
|
35
42
|
@opts = args
|
36
43
|
|
@@ -42,30 +49,32 @@ module LyberCore
|
|
42
49
|
LyberCore::Log.set_logfile(robot_logfile)
|
43
50
|
end
|
44
51
|
|
45
|
-
LyberCore::Log.set_level(args[:loglevel]) if args[:loglevel]
|
46
|
-
|
52
|
+
LyberCore::Log.set_level(args[:loglevel]) if args[:loglevel]
|
53
|
+
|
47
54
|
# Set defaults
|
48
55
|
@options = OpenStruct.new
|
49
56
|
self.parse_options
|
50
57
|
self.create_workflow
|
51
58
|
self.set_workspace
|
52
|
-
end
|
53
59
|
|
60
|
+
@msg_queue_name = "/queue/#{@workflow.repository}.#{@workflow_name}.#{@workflow_step}"
|
61
|
+
end
|
62
|
+
|
54
63
|
# Some workflows require a directory where their content lives
|
55
64
|
# If a robot is invoked with a :workspace => true option, its @workspace
|
56
|
-
# should be set from the value in
|
65
|
+
# should be set from the value in
|
57
66
|
def set_workspace
|
58
|
-
if(
|
67
|
+
if(Dor::Config.robots.workspace)
|
59
68
|
@workspace = LyberCore::Robots::Workspace.new(@workflow_name, @collection_name)
|
60
69
|
LyberCore::Log.debug("workspace = #{workspace.inspect}")
|
61
70
|
end
|
62
71
|
end
|
63
|
-
|
72
|
+
|
64
73
|
# Create the workflow at instantiation, not when we start running the robot.
|
65
74
|
# That way we can do better error checking and ensure that everything is going
|
66
75
|
# to run okay before we actually start things.
|
67
76
|
def create_workflow
|
68
|
-
|
77
|
+
|
69
78
|
unless defined?(WORKFLOW_URI)
|
70
79
|
LyberCore::Log.fatal "FATAL: WORKFLOW_URI is not defined"
|
71
80
|
LyberCore::Log.fatal "Usually this is a value like 'http://lyberservices-dev.stanford.edu/workflow'"
|
@@ -75,46 +84,138 @@ module LyberCore
|
|
75
84
|
LyberCore::Log.debug("About to instatiate a Workflow object
|
76
85
|
-- LyberCore::Robots::Workflow.new(#{@workflow_name},#{collection_name}")
|
77
86
|
@workflow = LyberCore::Robots::Workflow.new(@workflow_name, {:logger => @logger, :collection_name => @collection_name})
|
78
|
-
|
87
|
+
|
79
88
|
end
|
80
|
-
|
81
|
-
# == Create a new workflow
|
82
|
-
def
|
83
|
-
|
89
|
+
|
90
|
+
# == Create a new workflow
|
91
|
+
def start_standalone()
|
92
|
+
LyberCore::Log.debug("Running as standalone...")
|
93
|
+
queue = establish_queue()
|
94
|
+
process_queue(queue)
|
95
|
+
return false if(queue.max_errors_reached?)
|
96
|
+
|
97
|
+
true
|
98
|
+
end
|
99
|
+
|
100
|
+
def start_master(stomp)
|
101
|
+
LyberCore::Log.info("Running as master...")
|
102
|
+
LyberCore::Log.info("Publishing to #{@msg_queue_name}")
|
103
|
+
queue = establish_queue()
|
104
|
+
while work_item = queue.next_item do
|
105
|
+
stomp.begin("enqueue_#{work_item.druid}")
|
106
|
+
begin
|
107
|
+
timeout(MSG_BROKER_TIMEOUT) do
|
108
|
+
begin
|
109
|
+
stomp.publish(@msg_queue_name, work_item.druid, :persistent => true)
|
110
|
+
work_item.set_status('queued')
|
111
|
+
stomp.commit("enqueue_#{work_item.druid}")
|
112
|
+
rescue
|
113
|
+
stomp.abort("enqueue_#{work_item.druid}")
|
114
|
+
end
|
115
|
+
end
|
116
|
+
rescue Timeout::Error
|
117
|
+
# the FatalError will be trapped and logged by the start() method
|
118
|
+
raise LyberCore::Exceptions::FatalError.new("Message broker unreachable for more than #{MSG_BROKER_TIMEOUT} seconds. Aborting master mode.")
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def start_slave(stomp)
|
124
|
+
LyberCore::Log.info("Running as slave...")
|
125
|
+
# Note: stomp is a Stomp::Connection, not a Stomp::Client!
|
126
|
+
LyberCore::Log.info("Subscribing to #{@msg_queue_name}")
|
127
|
+
stomp.subscribe(@msg_queue_name, :ack => :client)
|
128
|
+
msg = nil
|
129
|
+
interrupt = false
|
130
|
+
old_trap = trap "SIGINT", proc {
|
131
|
+
interrupt = true
|
132
|
+
LyberCore::Log.info("Shutting down due to user interrupt...")
|
133
|
+
}
|
84
134
|
begin
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
135
|
+
until interrupt
|
136
|
+
begin
|
137
|
+
timeout(MSG_BROKER_TIMEOUT) do
|
138
|
+
msg = stomp.receive
|
139
|
+
end
|
140
|
+
if msg.command == 'MESSAGE'
|
141
|
+
queue = @workflow.queue(@workflow_step)
|
142
|
+
queue.enqueue_druids([msg.body.strip])
|
143
|
+
process_queue(queue)
|
144
|
+
end
|
145
|
+
# TODO: Generate statistics about the work
|
146
|
+
rescue Timeout::Error
|
147
|
+
msg = nil
|
148
|
+
break
|
149
|
+
ensure
|
150
|
+
unless msg.nil?
|
151
|
+
stomp.ack msg.headers['message-id']
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
ensure
|
156
|
+
trap "SIGINT", old_trap
|
157
|
+
end
|
158
|
+
# TODO: Decouple work_item, work_queue, and identity logic
|
159
|
+
end
|
160
|
+
|
161
|
+
def start()
|
162
|
+
LyberCore::Log.debug("Starting robot...")
|
163
|
+
if @options.mode == :master or @options.mode == :slave
|
164
|
+
require 'stomp'
|
165
|
+
|
166
|
+
msg_broker_config = {
|
167
|
+
:hosts => [{:host => MSG_BROKER_HOST, :port => MSG_BROKER_PORT}],
|
168
|
+
:initial_reconnect_delay => 1.0,
|
169
|
+
:use_exponential_back_off => true,
|
170
|
+
:back_off_multiplier => 1.05,
|
171
|
+
:max_reconnect_delay => 3.0,
|
172
|
+
:reliable => true
|
173
|
+
}
|
174
|
+
|
175
|
+
stomp = Stomp::Connection.new(msg_broker_config)
|
176
|
+
if @options.mode == :master
|
177
|
+
start_master(stomp)
|
178
|
+
end
|
179
|
+
# Run as slave when master is done
|
180
|
+
start_slave(stomp)
|
181
|
+
else
|
182
|
+
did_not_halt = start_standalone()
|
183
|
+
if(did_not_halt)
|
184
|
+
return LyberCore::Robots::CONTINUE
|
92
185
|
else
|
93
|
-
|
186
|
+
return LyberCore::Robots::HALT
|
94
187
|
end
|
95
|
-
|
96
|
-
# TODO: Implement a FatalError class
|
97
|
-
# rescue LyberCore::Exceptions::FatalError => e
|
98
|
-
# LyberCore::Log.fatal("e.msg")
|
99
|
-
# exit
|
188
|
+
end
|
100
189
|
rescue LyberCore::Exceptions::EmptyQueue
|
101
190
|
LyberCore::Log.info("Empty queue -- no objects to process")
|
191
|
+
return LyberCore::Robots::SLEEP
|
102
192
|
rescue Exception => e
|
103
|
-
LyberCore::Log.
|
104
|
-
LyberCore::Log.error(e.backtrace.join("\n"))
|
105
|
-
end
|
193
|
+
LyberCore::Log.exception(e)
|
106
194
|
end
|
107
195
|
|
196
|
+
# Generate a queue of work items based from file, druid, or service
|
197
|
+
def establish_queue()
|
198
|
+
queue = @workflow.queue(@workflow_step)
|
199
|
+
|
200
|
+
# If we have arguments, parse out the parts that indicate druids
|
201
|
+
if(@options.file or @options.druid)
|
202
|
+
queue.enqueue_druids(get_druid_list)
|
203
|
+
else
|
204
|
+
queue.enqueue_workstep_waiting()
|
205
|
+
end
|
206
|
+
return queue
|
207
|
+
end
|
208
|
+
|
108
209
|
# Generate a list of druids to process
|
109
210
|
def get_druid_list
|
110
|
-
|
211
|
+
|
111
212
|
druid_list = Array.new
|
112
|
-
|
213
|
+
|
113
214
|
# append any druids passed explicitly
|
114
215
|
if(@options.druid)
|
115
216
|
druid_list << @options.druid
|
116
217
|
end
|
117
|
-
|
218
|
+
|
118
219
|
# identifier list is in a file
|
119
220
|
if (@options.file && File.exist?(@options.file))
|
120
221
|
File.open(@options.file) do |file|
|
@@ -126,36 +227,41 @@ module LyberCore
|
|
126
227
|
end
|
127
228
|
end
|
128
229
|
end
|
129
|
-
|
230
|
+
|
130
231
|
return druid_list
|
131
232
|
end
|
132
233
|
|
133
234
|
def process_queue(queue)
|
134
235
|
while work_item = queue.next_item do
|
135
|
-
|
136
|
-
#call overridden method
|
137
|
-
process_item(work_item)
|
138
|
-
work_item.set_success
|
139
|
-
rescue Exception => e
|
140
|
-
# LyberCore::Log.error("Encountered exception processing #{work_item.druid}: #{e.to_s}")
|
141
|
-
# LyberCore::Log.debug("Encountered exception processing #{work_item.druid}: #{e.backtrace.join("\n")}")
|
142
|
-
work_item.set_error(e)
|
143
|
-
end
|
236
|
+
process_work_item(work_item)
|
144
237
|
end
|
145
|
-
queue.print_stats()
|
146
238
|
end
|
147
|
-
|
239
|
+
|
240
|
+
def process_work_item(work_item)
|
241
|
+
begin
|
242
|
+
#call overridden method
|
243
|
+
process_item(work_item)
|
244
|
+
work_item.set_success
|
245
|
+
rescue LyberCore::Exceptions::FatalError => fatal_error
|
246
|
+
# ToDo cleanup/rollback transaction
|
247
|
+
raise fatal_error
|
248
|
+
rescue Exception => e
|
249
|
+
# ToDo cleanup/rollback transaction
|
250
|
+
work_item.set_error(e)
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
148
254
|
# Override this method in your robot instance. The method in this base class will throw an exception if it is not overriden.
|
149
255
|
def process_item(work_item)
|
150
256
|
#to be overridden by child classes
|
151
257
|
raise 'You must implement this method in your subclass'
|
152
|
-
end
|
153
|
-
|
258
|
+
end
|
259
|
+
|
154
260
|
# ###########################
|
155
|
-
# command line option parsing
|
156
|
-
|
261
|
+
# command line option parsing
|
262
|
+
|
157
263
|
def parse_options
|
158
|
-
|
264
|
+
|
159
265
|
options = {}
|
160
266
|
|
161
267
|
o = OptionParser.new do |opts|
|
@@ -165,50 +271,63 @@ module LyberCore
|
|
165
271
|
opts.on("-d DRUID", "--druid DRUID", "Pass in a druid to process") do |d|
|
166
272
|
@options.druid = d
|
167
273
|
end
|
168
|
-
|
274
|
+
|
169
275
|
opts.on("-f", "--file FILE", "Pass in a file of druids to process") do |f|
|
170
276
|
@options.file = f
|
171
277
|
end
|
172
|
-
|
278
|
+
|
279
|
+
opts.on("-m MODE", "--mode MODE", "Specify the mode to run in") do |m|
|
280
|
+
case m
|
281
|
+
when "master"
|
282
|
+
@options.mode = :master
|
283
|
+
when "slave"
|
284
|
+
@options.mode = :slave
|
285
|
+
when "default"
|
286
|
+
@options.mode = :default
|
287
|
+
else
|
288
|
+
raise OptionParser::InvalidArgument, "Invalid mode: #{m}"
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
173
292
|
end
|
174
|
-
|
293
|
+
|
175
294
|
# Parse the command line options and ignore anything not specified above
|
176
295
|
begin
|
177
|
-
o.parse!
|
296
|
+
o.parse!(@opts[:argv] || ARGV)
|
178
297
|
rescue OptionParser::InvalidOption => e
|
179
298
|
LyberCore::Log.debug("e.inspect")
|
180
299
|
rescue OptionParser::ParseError => e
|
181
|
-
LyberCore::Log.error("Couldn't parse options: #{e.backtrace}")
|
300
|
+
LyberCore::Log.error("Couldn't parse options: #{e.backtrace}")
|
182
301
|
raise e
|
183
302
|
end
|
184
|
-
|
303
|
+
|
185
304
|
end
|
186
305
|
|
187
306
|
# def output_options
|
188
307
|
# puts "Options:\n"
|
189
|
-
#
|
190
|
-
# @options.marshal_dump.each do |name, val|
|
308
|
+
#
|
309
|
+
# @options.marshal_dump.each do |name, val|
|
191
310
|
# puts " #{name} = #{val}"
|
192
311
|
# end
|
193
312
|
# end
|
194
|
-
#
|
313
|
+
#
|
195
314
|
# def output_help
|
196
315
|
# output_version
|
197
316
|
# RDoc::usage() #exits app
|
198
317
|
# end
|
199
|
-
#
|
318
|
+
#
|
200
319
|
# def output_usage
|
201
320
|
# RDoc::usage('usage') # gets usage from comments above
|
202
321
|
# end
|
203
|
-
#
|
322
|
+
#
|
204
323
|
# def output_version
|
205
324
|
# puts "#{File.basename(__FILE__)} version #{VERSION}"
|
206
325
|
# end
|
207
|
-
|
326
|
+
|
208
327
|
# ##################################
|
209
|
-
# end of command line option parsing
|
328
|
+
# end of command line option parsing
|
210
329
|
# ##################################
|
211
|
-
|
330
|
+
|
212
331
|
end # end of class
|
213
332
|
end # end of Robots module
|
214
333
|
end # end of LyberCore module
|