lyber-core 1.3.0 → 3.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,13 +0,0 @@
1
- require 'dor-services'
2
-
3
- module Dor
4
-
5
- Config.declare do
6
-
7
- robots do
8
- workspace nil
9
- end
10
-
11
- end
12
-
13
- end
@@ -1,130 +0,0 @@
1
- require 'net/https'
2
- require 'uri'
3
- require 'cgi'
4
-
5
- # Extend the Integer class to facilitate retries of code blocks if specified exception(s) occur
6
- # see: http://blog.josh-nesbitt.net/2010/02/08/writing-contingent-ruby-code-with-retryable/
7
- RETRYABLE_SLEEP_VALUE = 300
8
- class Integer
9
- def tries(options={}, &block)
10
- attempts = self
11
- exception_classes = [*options[:on] || StandardError]
12
- begin
13
- # First attempt
14
- return yield
15
- rescue *exception_classes
16
- sleep RETRYABLE_SLEEP_VALUE
17
- # 2nd to n-1 attempts
18
- retry if (attempts -= 1) > 1
19
- end
20
- # final (nth) attempt
21
- yield
22
- end
23
- end
24
-
25
-
26
- module LyberCore
27
- class Connection
28
- def Connection.get_https_connection(url)
29
- LyberCore::Log.debug("Establishing connection to #{url.host} on port #{url.port}")
30
- https = Net::HTTP.new(url.host, url.port)
31
- if(url.scheme == 'https')
32
- https.use_ssl = true
33
- LyberCore::Log.debug("Using SSL")
34
- https.cert = OpenSSL::X509::Certificate.new( File.read(LyberCore::CERT_FILE) )
35
- LyberCore::Log.debug("Using cert file #{LyberCore::CERT_FILE}")
36
- https.key = OpenSSL::PKey::RSA.new( File.read(LyberCore::KEY_FILE), LyberCore::KEY_PASS )
37
- LyberCore::Log.debug("Using key file #{LyberCore::KEY_FILE} with pass #{LyberCore::KEY_PASS}")
38
- https.verify_mode = OpenSSL::SSL::VERIFY_NONE
39
- LyberCore::Log.debug("https.verify_mode = #{https.verify_mode} (should eql #{OpenSSL::SSL::VERIFY_NONE})")
40
- end
41
- https
42
- end
43
-
44
- # Returns body of the HTTP response, or passes the response to the block if it's passed in
45
- #
46
- # == Required Parameters
47
- # - <b>full_url</b> - A string containing the full url to the resource you're trying to connect to
48
- # - <b>method</b> - Recognizes the following symbols which correspond to an HTTP verb. The convenience methods take care of this
49
- # :get for HTTP GET
50
- # :post for HTTP POST
51
- # :put for HTTP PUT
52
- # - <b>body</b> The body of your request. Can be nil if you don't have one.
53
- #
54
- # == Options
55
- # - <b>:auth_user</b> for basic HTTP authentication. :auth_user and :auth_password must both be set if using authentication
56
- # - <b>:auth_password</b> for basic HTTP authentication. :auth_user and :auth_password must both be set if using authentication
57
- # - <b>:content_type</b> if not passed in as an option, then it is set to 'application/xml'
58
- #
59
- # == Block
60
- # By default, this method returns the body of the response, Net::HTTPResponse.body . If you want to work with the Net::HTTPResponse
61
- # object, you can pass in a block, and the response will be passed to it.
62
- #
63
- # == Exceptions
64
- # Any exceptions thrown while trying to connect should be handled by the caller
65
- def Connection.connect(full_url, method, body, options = {}, &block)
66
- url = URI.parse(full_url)
67
- case method
68
- when :get
69
- req = Net::HTTP::Get.new(url.request_uri)
70
- when :post
71
- req = Net::HTTP::Post.new(url.request_uri)
72
- when :put
73
- req = Net::HTTP::Put.new(url.request_uri)
74
- end
75
- req.body = body unless(body.nil?)
76
- if(options.include?(:content_type))
77
- req.content_type = options[:content_type]
78
- else
79
- req.content_type = 'application/xml'
80
- end
81
-
82
- if(options.include?(:auth_user))
83
- req.basic_auth options[:auth_user], options[:auth_password]
84
- end
85
-
86
- res = Connection.send_request(url, req)
87
- case res
88
- when Net::HTTPSuccess
89
- if(block_given?)
90
- block.call(res)
91
- else
92
- return res.body
93
- end
94
- else
95
- raise res.error!
96
- # ??? raise LyberCore::Exceptions::ServiceError.new('HTTP Request failed',res.error!)
97
- end
98
-
99
- end
100
-
101
-
102
- # Send the request to the server, with multiple retries if specified exceptions occur
103
- def Connection.send_request(url, req)
104
- 3.tries :on => [Timeout::Error, EOFError, Errno::ECONNRESET] do
105
- Connection.get_https_connection(url).start {|http| http.request(req) }
106
- end
107
- rescue Exception => e
108
- raise LyberCore::Exceptions::ServiceError.new('HTTP Request failed',e)
109
- end
110
-
111
- end
112
-
113
-
114
- # Convenience method for performing an HTTP GET using Connection.connect
115
- def Connection.get(full_url, options = {}, &b)
116
- Connection.connect(full_url, :get, nil, options, &b)
117
- end
118
-
119
- # Convenience method for performing an HTTP POST using Connection.connect
120
- def Connection.post(full_url, body, options = {}, &b)
121
- Connection.connect(full_url, :post, body, options, &b)
122
- end
123
-
124
- # Convenience method for performing an HTTP PUT using Connection.connect
125
- def Connection.put(full_url, body, options = {}, &b)
126
- Connection.connect(full_url, :put, body, options, &b)
127
- end
128
-
129
-
130
- end
@@ -1,21 +0,0 @@
1
- # Provices a wrapping a caught exception inside a new exception.
2
- # The original exception is optionally passed in as the cause parameter of the constructor
3
- # see: http://ruby.runpaint.org/exceptions
4
- # see: http://en.wikipedia.org/wiki/Exception_chaining
5
- # see: http://www.ruby-forum.com/topic/148193
6
- # see: http://jqr.github.com/2009/02/11/passing-data-with-ruby-exceptions.html
7
- module LyberCore
8
- module Exceptions
9
- class ChainedError < StandardError
10
- def initialize(message, cause=nil)
11
- if (cause && cause.is_a?(Exception))
12
- # exaample: "My message; caused by #<Interrupt: interrupt message>"
13
- super("#{message}; caused by #{cause.inspect}")
14
- self.set_backtrace(cause.backtrace)
15
- else
16
- super(message)
17
- end
18
- end
19
- end
20
- end
21
- end
@@ -1,9 +0,0 @@
1
- require 'dor_service'
2
-
3
- module LyberCore
4
- module Exceptions
5
- class EmptyQueue < RuntimeError
6
-
7
- end
8
- end
9
- end
@@ -1,10 +0,0 @@
1
- require File.join(File.dirname(__FILE__), 'chained_error')
2
-
3
- # A ServiceError is used to wrap timeouts, HTTP exceptions, etc
4
- # And create a new exception that is usually treated as a fatal error
5
- module LyberCore
6
- module Exceptions
7
- class FatalError < LyberCore::Exceptions::ChainedError
8
- end
9
- end
10
- end
@@ -1,19 +0,0 @@
1
- require File.join(File.dirname(__FILE__), 'chained_error')
2
-
3
- # A ItemError is used to wrap a causal exception
4
- # And create a new exception that usually terminates processing of the current item
5
- # the druid parameter makes it convenient to include the object id using a std message syntax
6
- module LyberCore
7
- module Exceptions
8
- class ItemError < LyberCore::Exceptions::ChainedError
9
- def initialize(druid, msg, cause=nil)
10
- if (druid)
11
- message = "#{druid} - #{msg}"
12
- else
13
- message= msg
14
- end
15
- super(message, cause)
16
- end
17
- end
18
- end
19
- end
@@ -1,10 +0,0 @@
1
- require File.join(File.dirname(__FILE__), 'fatal_error')
2
-
3
- # A ServiceError is used to wrap timeouts, HTTP exceptions, etc
4
- # And create a new exception that is usually treated as a fatal error
5
- module LyberCore
6
- module Exceptions
7
- class ServiceError < LyberCore::Exceptions::FatalError
8
- end
9
- end
10
- end
@@ -1,333 +0,0 @@
1
- # == Usage
2
- # ruby_cl_skeleton [options] source_file
3
- #
4
- # For help use: ruby_cl_skeleton -h
5
-
6
- module LyberCore
7
- module Robots
8
-
9
- CONTINUE = 0
10
- SLEEP = 1
11
- HALT = 2
12
-
13
- require 'optparse'
14
- require 'ostruct'
15
-
16
- # ===== Usage
17
- # User defined robots should derive from this class and override the #process_item method
18
- class Robot
19
-
20
- attr_accessor :workflow_name
21
- attr_accessor :workflow_step
22
-
23
- # A LyberCore::Robots::Workflow object
24
- attr_accessor :workflow
25
- attr_accessor :collection_name
26
- attr_accessor :workspace
27
- attr_accessor :args
28
- attr_accessor :options
29
-
30
-
31
- # Available options
32
- # - :collection_name - The collection this workflow should work with.
33
- # Defined as a subdirectory within ROBOT_ROOT/config/workflows/your_workflow/your_collection
34
- # - :workspace - Full path of where to find content for a particular workflow
35
- # - :logfile - Where to write log messages
36
- # - :loglevel - Level of logging from 0 - 4 where 0 = DEBUG and 4 = FATAL
37
- def initialize(workflow_name, workflow_step, args = {})
38
- @workflow_name = workflow_name
39
- @workflow_step = workflow_step
40
- #TODO: Replace 'dor.' with actual repository ID
41
- @collection_name = args[:collection_name]
42
- @opts = args
43
-
44
- if args[:logfile]
45
- LyberCore::Log.set_logfile(args[:logfile])
46
- else
47
- FileUtils.mkdir(File.join(ROBOT_ROOT, 'log')) unless(File.exists?(File.join(ROBOT_ROOT, 'log')))
48
- robot_logfile = File.join(ROBOT_ROOT,'log',workflow_step+'.log')
49
- LyberCore::Log.set_logfile(robot_logfile)
50
- end
51
-
52
- LyberCore::Log.set_level(args[:loglevel]) if args[:loglevel]
53
-
54
- # Set defaults
55
- @options = OpenStruct.new
56
- self.parse_options
57
- self.create_workflow
58
- self.set_workspace
59
-
60
- @msg_queue_name = "/queue/#{@workflow.repository}.#{@workflow_name}.#{@workflow_step}"
61
- end
62
-
63
- # Some workflows require a directory where their content lives
64
- # If a robot is invoked with a :workspace => true option, its @workspace
65
- # should be set from the value in
66
- def set_workspace
67
- if(Dor::Config.robots.workspace)
68
- @workspace = LyberCore::Robots::Workspace.new(@workflow_name, @collection_name)
69
- LyberCore::Log.debug("workspace = #{workspace.inspect}")
70
- end
71
- end
72
-
73
- # Create the workflow at instantiation, not when we start running the robot.
74
- # That way we can do better error checking and ensure that everything is going
75
- # to run okay before we actually start things.
76
- def create_workflow
77
-
78
- unless defined?(WORKFLOW_URI)
79
- LyberCore::Log.fatal "FATAL: WORKFLOW_URI is not defined"
80
- LyberCore::Log.fatal "Usually this is a value like 'http://lyberservices-dev.stanford.edu/workflow'"
81
- LyberCore::Log.fatal "Usually you load it by setting ROBOT_ENVIRONMENT when you invoke your robot"
82
- raise "WORKFLOW_URI is not set! Do you need to set your ROBOT_ENVIRONMENT value?"
83
- end
84
- LyberCore::Log.debug("About to instatiate a Workflow object
85
- -- LyberCore::Robots::Workflow.new(#{@workflow_name},#{collection_name}")
86
- @workflow = LyberCore::Robots::Workflow.new(@workflow_name, {:logger => @logger, :collection_name => @collection_name})
87
-
88
- end
89
-
90
- # == Create a new workflow
91
- def start_standalone()
92
- LyberCore::Log.debug("Running as standalone...")
93
- queue = establish_queue()
94
- process_queue(queue)
95
- return false if(queue.max_errors_reached?)
96
-
97
- true
98
- end
99
-
100
- def start_master(stomp)
101
- LyberCore::Log.info("Running as master...")
102
- LyberCore::Log.info("Publishing to #{@msg_queue_name}")
103
- queue = establish_queue()
104
- while work_item = queue.next_item do
105
- stomp.begin("enqueue_#{work_item.druid}")
106
- begin
107
- timeout(MSG_BROKER_TIMEOUT) do
108
- begin
109
- stomp.publish(@msg_queue_name, work_item.druid, :persistent => true)
110
- work_item.set_status('queued')
111
- stomp.commit("enqueue_#{work_item.druid}")
112
- rescue
113
- stomp.abort("enqueue_#{work_item.druid}")
114
- end
115
- end
116
- rescue Timeout::Error
117
- # the FatalError will be trapped and logged by the start() method
118
- raise LyberCore::Exceptions::FatalError.new("Message broker unreachable for more than #{MSG_BROKER_TIMEOUT} seconds. Aborting master mode.")
119
- end
120
- end
121
- end
122
-
123
- def start_slave(stomp)
124
- LyberCore::Log.info("Running as slave...")
125
- # Note: stomp is a Stomp::Connection, not a Stomp::Client!
126
- LyberCore::Log.info("Subscribing to #{@msg_queue_name}")
127
- stomp.subscribe(@msg_queue_name, :ack => :client)
128
- msg = nil
129
- interrupt = false
130
- old_trap = trap "SIGINT", proc {
131
- interrupt = true
132
- LyberCore::Log.info("Shutting down due to user interrupt...")
133
- }
134
- begin
135
- until interrupt
136
- begin
137
- timeout(MSG_BROKER_TIMEOUT) do
138
- msg = stomp.receive
139
- end
140
- if msg.command == 'MESSAGE'
141
- queue = @workflow.queue(@workflow_step)
142
- queue.enqueue_druids([msg.body.strip])
143
- process_queue(queue)
144
- end
145
- # TODO: Generate statistics about the work
146
- rescue Timeout::Error
147
- msg = nil
148
- break
149
- ensure
150
- unless msg.nil?
151
- stomp.ack msg.headers['message-id']
152
- end
153
- end
154
- end
155
- ensure
156
- trap "SIGINT", old_trap
157
- end
158
- # TODO: Decouple work_item, work_queue, and identity logic
159
- end
160
-
161
- def start()
162
- LyberCore::Log.debug("Starting robot...")
163
- if @options.mode == :master or @options.mode == :slave
164
- require 'stomp'
165
-
166
- msg_broker_config = {
167
- :hosts => [{:host => MSG_BROKER_HOST, :port => MSG_BROKER_PORT}],
168
- :initial_reconnect_delay => 1.0,
169
- :use_exponential_back_off => true,
170
- :back_off_multiplier => 1.05,
171
- :max_reconnect_delay => 3.0,
172
- :reliable => true
173
- }
174
-
175
- stomp = Stomp::Connection.new(msg_broker_config)
176
- if @options.mode == :master
177
- start_master(stomp)
178
- end
179
- # Run as slave when master is done
180
- start_slave(stomp)
181
- else
182
- did_not_halt = start_standalone()
183
- if(did_not_halt)
184
- return LyberCore::Robots::CONTINUE
185
- else
186
- return LyberCore::Robots::HALT
187
- end
188
- end
189
- rescue LyberCore::Exceptions::EmptyQueue
190
- LyberCore::Log.info("Empty queue -- no objects to process")
191
- return LyberCore::Robots::SLEEP
192
- rescue Exception => e
193
- LyberCore::Log.exception(e)
194
- end
195
-
196
- # Generate a queue of work items based from file, druid, or service
197
- def establish_queue()
198
- queue = @workflow.queue(@workflow_step)
199
-
200
- # If we have arguments, parse out the parts that indicate druids
201
- if(@options.file or @options.druid)
202
- queue.enqueue_druids(get_druid_list)
203
- else
204
- queue.enqueue_workstep_waiting()
205
- end
206
- return queue
207
- end
208
-
209
- # Generate a list of druids to process
210
- def get_druid_list
211
-
212
- druid_list = Array.new
213
-
214
- # append any druids passed explicitly
215
- if(@options.druid)
216
- druid_list << @options.druid
217
- end
218
-
219
- # identifier list is in a file
220
- if (@options.file && File.exist?(@options.file))
221
- File.open(@options.file) do |file|
222
- file.each_line do |line|
223
- druid = line.strip
224
- if (druid.length > 0)
225
- druid_list << druid
226
- end
227
- end
228
- end
229
- end
230
-
231
- return druid_list
232
- end
233
-
234
- def process_queue(queue)
235
- while work_item = queue.next_item do
236
- process_work_item(work_item)
237
- end
238
- end
239
-
240
- def process_work_item(work_item)
241
- begin
242
- #call overridden method
243
- process_item(work_item)
244
- work_item.set_success
245
- rescue LyberCore::Exceptions::FatalError => fatal_error
246
- # ToDo cleanup/rollback transaction
247
- raise fatal_error
248
- rescue Exception => e
249
- # ToDo cleanup/rollback transaction
250
- work_item.set_error(e)
251
- end
252
- end
253
-
254
- # Override this method in your robot instance. The method in this base class will throw an exception if it is not overriden.
255
- def process_item(work_item)
256
- #to be overridden by child classes
257
- raise 'You must implement this method in your subclass'
258
- end
259
-
260
- # ###########################
261
- # command line option parsing
262
-
263
- def parse_options
264
-
265
- options = {}
266
-
267
- o = OptionParser.new do |opts|
268
- opts.banner = "Usage: example.rb [options]"
269
- opts.separator ""
270
-
271
- opts.on("-d DRUID", "--druid DRUID", "Pass in a druid to process") do |d|
272
- @options.druid = d
273
- end
274
-
275
- opts.on("-f", "--file FILE", "Pass in a file of druids to process") do |f|
276
- @options.file = f
277
- end
278
-
279
- opts.on("-m MODE", "--mode MODE", "Specify the mode to run in") do |m|
280
- case m
281
- when "master"
282
- @options.mode = :master
283
- when "slave"
284
- @options.mode = :slave
285
- when "default"
286
- @options.mode = :default
287
- else
288
- raise OptionParser::InvalidArgument, "Invalid mode: #{m}"
289
- end
290
- end
291
-
292
- end
293
-
294
- # Parse the command line options and ignore anything not specified above
295
- begin
296
- o.parse!(@opts[:argv] || ARGV)
297
- rescue OptionParser::InvalidOption => e
298
- LyberCore::Log.debug("e.inspect")
299
- rescue OptionParser::ParseError => e
300
- LyberCore::Log.error("Couldn't parse options: #{e.backtrace}")
301
- raise e
302
- end
303
-
304
- end
305
-
306
- # def output_options
307
- # puts "Options:\n"
308
- #
309
- # @options.marshal_dump.each do |name, val|
310
- # puts " #{name} = #{val}"
311
- # end
312
- # end
313
- #
314
- # def output_help
315
- # output_version
316
- # RDoc::usage() #exits app
317
- # end
318
- #
319
- # def output_usage
320
- # RDoc::usage('usage') # gets usage from comments above
321
- # end
322
- #
323
- # def output_version
324
- # puts "#{File.basename(__FILE__)} version #{VERSION}"
325
- # end
326
-
327
- # ##################################
328
- # end of command line option parsing
329
- # ##################################
330
-
331
- end # end of class
332
- end # end of Robots module
333
- end # end of LyberCore module