lyber-core 0.9.6.2.3 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,6 @@ module LyberCore
2
2
 
3
3
  class LyberCore::Destroyer
4
4
 
5
- require 'rubygems'
6
5
  require 'active-fedora'
7
6
  require 'open-uri'
8
7
 
@@ -0,0 +1,21 @@
1
+ # Provices a wrapping a caught exception inside a new exception.
2
+ # The original exception is optionally passed in as the cause parameter of the constructor
3
+ # see: http://ruby.runpaint.org/exceptions
4
+ # see: http://en.wikipedia.org/wiki/Exception_chaining
5
+ # see: http://www.ruby-forum.com/topic/148193
6
+ # see: http://jqr.github.com/2009/02/11/passing-data-with-ruby-exceptions.html
7
+ module LyberCore
8
+ module Exceptions
9
+ class ChainedError < StandardError
10
+ def initialize(message, cause=nil)
11
+ if (cause && cause.is_a?(Exception))
12
+ # exaample: "My message; caused by #<Interrupt: interrupt message>"
13
+ super("#{message}; caused by #{cause.inspect}")
14
+ self.set_backtrace(cause.backtrace)
15
+ else
16
+ super(message)
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,10 @@
1
+ require File.join(File.dirname(__FILE__), 'chained_error')
2
+
3
+ # A ServiceError is used to wrap timeouts, HTTP exceptions, etc
4
+ # And create a new exception that is usually treated as a fatal error
5
+ module LyberCore
6
+ module Exceptions
7
+ class FatalError < LyberCore::Exceptions::ChainedError
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,19 @@
1
+ require File.join(File.dirname(__FILE__), 'chained_error')
2
+
3
+ # A ItemError is used to wrap a causal exception
4
+ # And create a new exception that usually terminates processing of the current item
5
+ # the druid parameter makes it convenient to include the object id using a std message syntax
6
+ module LyberCore
7
+ module Exceptions
8
+ class ItemError < LyberCore::Exceptions::ChainedError
9
+ def initialize(druid, msg, cause=nil)
10
+ if (druid)
11
+ message = "#{druid} - #{msg}"
12
+ else
13
+ message= msg
14
+ end
15
+ super(message, cause)
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,10 @@
1
+ require File.join(File.dirname(__FILE__), 'fatal_error')
2
+
3
+ # A ServiceError is used to wrap timeouts, HTTP exceptions, etc
4
+ # And create a new exception that is usually treated as a fatal error
5
+ module LyberCore
6
+ module Exceptions
7
+ class ServiceError < LyberCore::Exceptions::FatalError
8
+ end
9
+ end
10
+ end
@@ -45,7 +45,7 @@ module LyberCore
45
45
  @@log.level = current_log_level
46
46
  @@log.formatter = current_formatter
47
47
  rescue Exception => e
48
- raise e, "Couldn't initialize logfile #{new_logfile}: #{e.backtrace}"
48
+ raise e, "Couldn't initialize logfile #{new_logfile} because\n#{e.message}: #{e.backtrace.join(%{\n})}}"
49
49
  end
50
50
 
51
51
  end
@@ -60,7 +60,7 @@ module LyberCore
60
60
  # Logger::DEBUG (0): low-level information for developers
61
61
  def Log.set_level(loglevel)
62
62
  begin
63
- if [0,1,2,3,4].contains? loglevel
63
+ if [0,1,2,3,4].include? loglevel
64
64
  @@log.level = loglevel
65
65
  @@log.debug "Setting LyberCore::Log.level to #{loglevel}"
66
66
  else
@@ -69,7 +69,7 @@ module LyberCore
69
69
  @@log.level = 0
70
70
  end
71
71
  rescue Exception => e
72
- raise e, "Couldn't set log level: #{e.backtrace}"
72
+ raise e, "Couldn't set log level because\n#{e.message}: #{e.backtrace.join(%{\n})}"
73
73
  end
74
74
  end
75
75
 
@@ -97,7 +97,20 @@ module LyberCore
97
97
  def Log.debug(msg)
98
98
  @@log.add(Logger::DEBUG) { msg }
99
99
  end
100
-
100
+
101
+ def Log.exception(e)
102
+ msg = Log.exception_message(e)
103
+ if e.is_a?(LyberCore::Exceptions::FatalError)
104
+ Log.fatal(msg)
105
+ else
106
+ Log.error(msg)
107
+ end
108
+ end
109
+
110
+ def Log.exception_message(e)
111
+ msg = e.inspect.split($/).join('; ') + "\n"
112
+ msg << e.backtrace.join("\n") if(e.backtrace)
113
+ end
101
114
 
102
115
  end
103
116
 
@@ -1,21 +1,27 @@
1
- # == Usage
1
+ # == Usage
2
2
  # ruby_cl_skeleton [options] source_file
3
3
  #
4
4
  # For help use: ruby_cl_skeleton -h
5
5
 
6
6
  module LyberCore
7
7
  module Robots
8
+
9
+ CONTINUE = 0
10
+ SLEEP = 1
11
+ HALT = 2
12
+
8
13
  require 'optparse'
9
14
  require 'ostruct'
10
15
 
11
16
  # ===== Usage
12
17
  # User defined robots should derive from this class and override the #process_item method
13
18
  class Robot
19
+
14
20
  attr_accessor :workflow_name
15
21
  attr_accessor :workflow_step
16
-
22
+
17
23
  # A LyberCore::Robots::Workflow object
18
- attr_accessor :workflow
24
+ attr_accessor :workflow
19
25
  attr_accessor :collection_name
20
26
  attr_accessor :workspace
21
27
  attr_accessor :args
@@ -23,7 +29,7 @@ module LyberCore
23
29
 
24
30
 
25
31
  # Available options
26
- # - :collection_name - The collection this workflow should work with.
32
+ # - :collection_name - The collection this workflow should work with.
27
33
  # Defined as a subdirectory within ROBOT_ROOT/config/workflows/your_workflow/your_collection
28
34
  # - :workspace - Full path of where to find content for a particular workflow
29
35
  # - :logfile - Where to write log messages
@@ -31,6 +37,7 @@ module LyberCore
31
37
  def initialize(workflow_name, workflow_step, args = {})
32
38
  @workflow_name = workflow_name
33
39
  @workflow_step = workflow_step
40
+ #TODO: Replace 'dor.' with actual repository ID
34
41
  @collection_name = args[:collection_name]
35
42
  @opts = args
36
43
 
@@ -42,30 +49,32 @@ module LyberCore
42
49
  LyberCore::Log.set_logfile(robot_logfile)
43
50
  end
44
51
 
45
- LyberCore::Log.set_level(args[:loglevel]) if args[:loglevel]
46
-
52
+ LyberCore::Log.set_level(args[:loglevel]) if args[:loglevel]
53
+
47
54
  # Set defaults
48
55
  @options = OpenStruct.new
49
56
  self.parse_options
50
57
  self.create_workflow
51
58
  self.set_workspace
52
- end
53
59
 
60
+ @msg_queue_name = "/queue/#{@workflow.repository}.#{@workflow_name}.#{@workflow_step}"
61
+ end
62
+
54
63
  # Some workflows require a directory where their content lives
55
64
  # If a robot is invoked with a :workspace => true option, its @workspace
56
- # should be set from the value in
65
+ # should be set from the value in
57
66
  def set_workspace
58
- if(@opts[:workspace])
67
+ if(Dor::Config.robots.workspace)
59
68
  @workspace = LyberCore::Robots::Workspace.new(@workflow_name, @collection_name)
60
69
  LyberCore::Log.debug("workspace = #{workspace.inspect}")
61
70
  end
62
71
  end
63
-
72
+
64
73
  # Create the workflow at instantiation, not when we start running the robot.
65
74
  # That way we can do better error checking and ensure that everything is going
66
75
  # to run okay before we actually start things.
67
76
  def create_workflow
68
-
77
+
69
78
  unless defined?(WORKFLOW_URI)
70
79
  LyberCore::Log.fatal "FATAL: WORKFLOW_URI is not defined"
71
80
  LyberCore::Log.fatal "Usually this is a value like 'http://lyberservices-dev.stanford.edu/workflow'"
@@ -75,46 +84,138 @@ module LyberCore
75
84
  LyberCore::Log.debug("About to instatiate a Workflow object
76
85
  -- LyberCore::Robots::Workflow.new(#{@workflow_name},#{collection_name}")
77
86
  @workflow = LyberCore::Robots::Workflow.new(@workflow_name, {:logger => @logger, :collection_name => @collection_name})
78
-
87
+
79
88
  end
80
-
81
- # == Create a new workflow
82
- def start()
83
-
89
+
90
+ # == Create a new workflow
91
+ def start_standalone()
92
+ LyberCore::Log.debug("Running as standalone...")
93
+ queue = establish_queue()
94
+ process_queue(queue)
95
+ return false if(queue.max_errors_reached?)
96
+
97
+ true
98
+ end
99
+
100
+ def start_master(stomp)
101
+ LyberCore::Log.info("Running as master...")
102
+ LyberCore::Log.info("Publishing to #{@msg_queue_name}")
103
+ queue = establish_queue()
104
+ while work_item = queue.next_item do
105
+ stomp.begin("enqueue_#{work_item.druid}")
106
+ begin
107
+ timeout(MSG_BROKER_TIMEOUT) do
108
+ begin
109
+ stomp.publish(@msg_queue_name, work_item.druid, :persistent => true)
110
+ work_item.set_status('queued')
111
+ stomp.commit("enqueue_#{work_item.druid}")
112
+ rescue
113
+ stomp.abort("enqueue_#{work_item.druid}")
114
+ end
115
+ end
116
+ rescue Timeout::Error
117
+ # the FatalError will be trapped and logged by the start() method
118
+ raise LyberCore::Exceptions::FatalError.new("Message broker unreachable for more than #{MSG_BROKER_TIMEOUT} seconds. Aborting master mode.")
119
+ end
120
+ end
121
+ end
122
+
123
+ def start_slave(stomp)
124
+ LyberCore::Log.info("Running as slave...")
125
+ # Note: stomp is a Stomp::Connection, not a Stomp::Client!
126
+ LyberCore::Log.info("Subscribing to #{@msg_queue_name}")
127
+ stomp.subscribe(@msg_queue_name, :ack => :client)
128
+ msg = nil
129
+ interrupt = false
130
+ old_trap = trap "SIGINT", proc {
131
+ interrupt = true
132
+ LyberCore::Log.info("Shutting down due to user interrupt...")
133
+ }
84
134
  begin
85
- LyberCore::Log.debug("Starting robot...")
86
-
87
- queue = @workflow.queue(@workflow_step)
88
-
89
- # If we have arguments, parse out the parts that indicate druids
90
- if(@options.file or @options.druid)
91
- queue.enqueue_druids(get_druid_list)
135
+ until interrupt
136
+ begin
137
+ timeout(MSG_BROKER_TIMEOUT) do
138
+ msg = stomp.receive
139
+ end
140
+ if msg.command == 'MESSAGE'
141
+ queue = @workflow.queue(@workflow_step)
142
+ queue.enqueue_druids([msg.body.strip])
143
+ process_queue(queue)
144
+ end
145
+ # TODO: Generate statistics about the work
146
+ rescue Timeout::Error
147
+ msg = nil
148
+ break
149
+ ensure
150
+ unless msg.nil?
151
+ stomp.ack msg.headers['message-id']
152
+ end
153
+ end
154
+ end
155
+ ensure
156
+ trap "SIGINT", old_trap
157
+ end
158
+ # TODO: Decouple work_item, work_queue, and identity logic
159
+ end
160
+
161
+ def start()
162
+ LyberCore::Log.debug("Starting robot...")
163
+ if @options.mode == :master or @options.mode == :slave
164
+ require 'stomp'
165
+
166
+ msg_broker_config = {
167
+ :hosts => [{:host => MSG_BROKER_HOST, :port => MSG_BROKER_PORT}],
168
+ :initial_reconnect_delay => 1.0,
169
+ :use_exponential_back_off => true,
170
+ :back_off_multiplier => 1.05,
171
+ :max_reconnect_delay => 3.0,
172
+ :reliable => true
173
+ }
174
+
175
+ stomp = Stomp::Connection.new(msg_broker_config)
176
+ if @options.mode == :master
177
+ start_master(stomp)
178
+ end
179
+ # Run as slave when master is done
180
+ start_slave(stomp)
181
+ else
182
+ did_not_halt = start_standalone()
183
+ if(did_not_halt)
184
+ return LyberCore::Robots::CONTINUE
92
185
  else
93
- queue.enqueue_workstep_waiting()
186
+ return LyberCore::Robots::HALT
94
187
  end
95
- process_queue(queue)
96
- # TODO: Implement a FatalError class
97
- # rescue LyberCore::Exceptions::FatalError => e
98
- # LyberCore::Log.fatal("e.msg")
99
- # exit
188
+ end
100
189
  rescue LyberCore::Exceptions::EmptyQueue
101
190
  LyberCore::Log.info("Empty queue -- no objects to process")
191
+ return LyberCore::Robots::SLEEP
102
192
  rescue Exception => e
103
- LyberCore::Log.error(e.message)
104
- LyberCore::Log.error(e.backtrace.join("\n"))
105
- end
193
+ LyberCore::Log.exception(e)
106
194
  end
107
195
 
196
+ # Generate a queue of work items based from file, druid, or service
197
+ def establish_queue()
198
+ queue = @workflow.queue(@workflow_step)
199
+
200
+ # If we have arguments, parse out the parts that indicate druids
201
+ if(@options.file or @options.druid)
202
+ queue.enqueue_druids(get_druid_list)
203
+ else
204
+ queue.enqueue_workstep_waiting()
205
+ end
206
+ return queue
207
+ end
208
+
108
209
  # Generate a list of druids to process
109
210
  def get_druid_list
110
-
211
+
111
212
  druid_list = Array.new
112
-
213
+
113
214
  # append any druids passed explicitly
114
215
  if(@options.druid)
115
216
  druid_list << @options.druid
116
217
  end
117
-
218
+
118
219
  # identifier list is in a file
119
220
  if (@options.file && File.exist?(@options.file))
120
221
  File.open(@options.file) do |file|
@@ -126,36 +227,41 @@ module LyberCore
126
227
  end
127
228
  end
128
229
  end
129
-
230
+
130
231
  return druid_list
131
232
  end
132
233
 
133
234
  def process_queue(queue)
134
235
  while work_item = queue.next_item do
135
- begin
136
- #call overridden method
137
- process_item(work_item)
138
- work_item.set_success
139
- rescue Exception => e
140
- # LyberCore::Log.error("Encountered exception processing #{work_item.druid}: #{e.to_s}")
141
- # LyberCore::Log.debug("Encountered exception processing #{work_item.druid}: #{e.backtrace.join("\n")}")
142
- work_item.set_error(e)
143
- end
236
+ process_work_item(work_item)
144
237
  end
145
- queue.print_stats()
146
238
  end
147
-
239
+
240
+ def process_work_item(work_item)
241
+ begin
242
+ #call overridden method
243
+ process_item(work_item)
244
+ work_item.set_success
245
+ rescue LyberCore::Exceptions::FatalError => fatal_error
246
+ # ToDo cleanup/rollback transaction
247
+ raise fatal_error
248
+ rescue Exception => e
249
+ # ToDo cleanup/rollback transaction
250
+ work_item.set_error(e)
251
+ end
252
+ end
253
+
148
254
  # Override this method in your robot instance. The method in this base class will throw an exception if it is not overriden.
149
255
  def process_item(work_item)
150
256
  #to be overridden by child classes
151
257
  raise 'You must implement this method in your subclass'
152
- end
153
-
258
+ end
259
+
154
260
  # ###########################
155
- # command line option parsing
156
-
261
+ # command line option parsing
262
+
157
263
  def parse_options
158
-
264
+
159
265
  options = {}
160
266
 
161
267
  o = OptionParser.new do |opts|
@@ -165,50 +271,63 @@ module LyberCore
165
271
  opts.on("-d DRUID", "--druid DRUID", "Pass in a druid to process") do |d|
166
272
  @options.druid = d
167
273
  end
168
-
274
+
169
275
  opts.on("-f", "--file FILE", "Pass in a file of druids to process") do |f|
170
276
  @options.file = f
171
277
  end
172
-
278
+
279
+ opts.on("-m MODE", "--mode MODE", "Specify the mode to run in") do |m|
280
+ case m
281
+ when "master"
282
+ @options.mode = :master
283
+ when "slave"
284
+ @options.mode = :slave
285
+ when "default"
286
+ @options.mode = :default
287
+ else
288
+ raise OptionParser::InvalidArgument, "Invalid mode: #{m}"
289
+ end
290
+ end
291
+
173
292
  end
174
-
293
+
175
294
  # Parse the command line options and ignore anything not specified above
176
295
  begin
177
- o.parse!
296
+ o.parse!(@opts[:argv] || ARGV)
178
297
  rescue OptionParser::InvalidOption => e
179
298
  LyberCore::Log.debug("e.inspect")
180
299
  rescue OptionParser::ParseError => e
181
- LyberCore::Log.error("Couldn't parse options: #{e.backtrace}")
300
+ LyberCore::Log.error("Couldn't parse options: #{e.backtrace}")
182
301
  raise e
183
302
  end
184
-
303
+
185
304
  end
186
305
 
187
306
  # def output_options
188
307
  # puts "Options:\n"
189
- #
190
- # @options.marshal_dump.each do |name, val|
308
+ #
309
+ # @options.marshal_dump.each do |name, val|
191
310
  # puts " #{name} = #{val}"
192
311
  # end
193
312
  # end
194
- #
313
+ #
195
314
  # def output_help
196
315
  # output_version
197
316
  # RDoc::usage() #exits app
198
317
  # end
199
- #
318
+ #
200
319
  # def output_usage
201
320
  # RDoc::usage('usage') # gets usage from comments above
202
321
  # end
203
- #
322
+ #
204
323
  # def output_version
205
324
  # puts "#{File.basename(__FILE__)} version #{VERSION}"
206
325
  # end
207
-
326
+
208
327
  # ##################################
209
- # end of command line option parsing
328
+ # end of command line option parsing
210
329
  # ##################################
211
-
330
+
212
331
  end # end of class
213
332
  end # end of Robots module
214
333
  end # end of LyberCore module