lyber-core 1.3.0 → 3.2.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,13 +0,0 @@
1
- require 'dor-services'
2
-
3
- module Dor
4
-
5
- Config.declare do
6
-
7
- robots do
8
- workspace nil
9
- end
10
-
11
- end
12
-
13
- end
@@ -1,130 +0,0 @@
1
- require 'net/https'
2
- require 'uri'
3
- require 'cgi'
4
-
5
- # Extend the Integer class to facilitate retries of code blocks if specified exception(s) occur
6
- # see: http://blog.josh-nesbitt.net/2010/02/08/writing-contingent-ruby-code-with-retryable/
7
- RETRYABLE_SLEEP_VALUE = 300
8
- class Integer
9
- def tries(options={}, &block)
10
- attempts = self
11
- exception_classes = [*options[:on] || StandardError]
12
- begin
13
- # First attempt
14
- return yield
15
- rescue *exception_classes
16
- sleep RETRYABLE_SLEEP_VALUE
17
- # 2nd to n-1 attempts
18
- retry if (attempts -= 1) > 1
19
- end
20
- # final (nth) attempt
21
- yield
22
- end
23
- end
24
-
25
-
26
- module LyberCore
27
- class Connection
28
- def Connection.get_https_connection(url)
29
- LyberCore::Log.debug("Establishing connection to #{url.host} on port #{url.port}")
30
- https = Net::HTTP.new(url.host, url.port)
31
- if(url.scheme == 'https')
32
- https.use_ssl = true
33
- LyberCore::Log.debug("Using SSL")
34
- https.cert = OpenSSL::X509::Certificate.new( File.read(LyberCore::CERT_FILE) )
35
- LyberCore::Log.debug("Using cert file #{LyberCore::CERT_FILE}")
36
- https.key = OpenSSL::PKey::RSA.new( File.read(LyberCore::KEY_FILE), LyberCore::KEY_PASS )
37
- LyberCore::Log.debug("Using key file #{LyberCore::KEY_FILE} with pass #{LyberCore::KEY_PASS}")
38
- https.verify_mode = OpenSSL::SSL::VERIFY_NONE
39
- LyberCore::Log.debug("https.verify_mode = #{https.verify_mode} (should eql #{OpenSSL::SSL::VERIFY_NONE})")
40
- end
41
- https
42
- end
43
-
44
- # Returns body of the HTTP response, or passes the response to the block if it's passed in
45
- #
46
- # == Required Parameters
47
- # - <b>full_url</b> - A string containing the full url to the resource you're trying to connect to
48
- # - <b>method</b> - Recognizes the following symbols which correspond to an HTTP verb. The convenience methods take care of this
49
- # :get for HTTP GET
50
- # :post for HTTP POST
51
- # :put for HTTP PUT
52
- # - <b>body</b> The body of your request. Can be nil if you don't have one.
53
- #
54
- # == Options
55
- # - <b>:auth_user</b> for basic HTTP authentication. :auth_user and :auth_password must both be set if using authentication
56
- # - <b>:auth_password</b> for basic HTTP authentication. :auth_user and :auth_password must both be set if using authentication
57
- # - <b>:content_type</b> if not passed in as an option, then it is set to 'application/xml'
58
- #
59
- # == Block
60
- # By default, this method returns the body of the response, Net::HTTPResponse.body . If you want to work with the Net::HTTPResponse
61
- # object, you can pass in a block, and the response will be passed to it.
62
- #
63
- # == Exceptions
64
- # Any exceptions thrown while trying to connect should be handled by the caller
65
- def Connection.connect(full_url, method, body, options = {}, &block)
66
- url = URI.parse(full_url)
67
- case method
68
- when :get
69
- req = Net::HTTP::Get.new(url.request_uri)
70
- when :post
71
- req = Net::HTTP::Post.new(url.request_uri)
72
- when :put
73
- req = Net::HTTP::Put.new(url.request_uri)
74
- end
75
- req.body = body unless(body.nil?)
76
- if(options.include?(:content_type))
77
- req.content_type = options[:content_type]
78
- else
79
- req.content_type = 'application/xml'
80
- end
81
-
82
- if(options.include?(:auth_user))
83
- req.basic_auth options[:auth_user], options[:auth_password]
84
- end
85
-
86
- res = Connection.send_request(url, req)
87
- case res
88
- when Net::HTTPSuccess
89
- if(block_given?)
90
- block.call(res)
91
- else
92
- return res.body
93
- end
94
- else
95
- raise res.error!
96
- # ??? raise LyberCore::Exceptions::ServiceError.new('HTTP Request failed',res.error!)
97
- end
98
-
99
- end
100
-
101
-
102
- # Send the request to the server, with multiple retries if specified exceptions occur
103
- def Connection.send_request(url, req)
104
- 3.tries :on => [Timeout::Error, EOFError, Errno::ECONNRESET] do
105
- Connection.get_https_connection(url).start {|http| http.request(req) }
106
- end
107
- rescue Exception => e
108
- raise LyberCore::Exceptions::ServiceError.new('HTTP Request failed',e)
109
- end
110
-
111
- end
112
-
113
-
114
- # Convenience method for performing an HTTP GET using Connection.connect
115
- def Connection.get(full_url, options = {}, &b)
116
- Connection.connect(full_url, :get, nil, options, &b)
117
- end
118
-
119
- # Convenience method for performing an HTTP POST using Connection.connect
120
- def Connection.post(full_url, body, options = {}, &b)
121
- Connection.connect(full_url, :post, body, options, &b)
122
- end
123
-
124
- # Convenience method for performing an HTTP PUT using Connection.connect
125
- def Connection.put(full_url, body, options = {}, &b)
126
- Connection.connect(full_url, :put, body, options, &b)
127
- end
128
-
129
-
130
- end
@@ -1,21 +0,0 @@
1
- # Provices a wrapping a caught exception inside a new exception.
2
- # The original exception is optionally passed in as the cause parameter of the constructor
3
- # see: http://ruby.runpaint.org/exceptions
4
- # see: http://en.wikipedia.org/wiki/Exception_chaining
5
- # see: http://www.ruby-forum.com/topic/148193
6
- # see: http://jqr.github.com/2009/02/11/passing-data-with-ruby-exceptions.html
7
- module LyberCore
8
- module Exceptions
9
- class ChainedError < StandardError
10
- def initialize(message, cause=nil)
11
- if (cause && cause.is_a?(Exception))
12
- # exaample: "My message; caused by #<Interrupt: interrupt message>"
13
- super("#{message}; caused by #{cause.inspect}")
14
- self.set_backtrace(cause.backtrace)
15
- else
16
- super(message)
17
- end
18
- end
19
- end
20
- end
21
- end
@@ -1,9 +0,0 @@
1
- require 'dor_service'
2
-
3
- module LyberCore
4
- module Exceptions
5
- class EmptyQueue < RuntimeError
6
-
7
- end
8
- end
9
- end
@@ -1,10 +0,0 @@
1
- require File.join(File.dirname(__FILE__), 'chained_error')
2
-
3
- # A ServiceError is used to wrap timeouts, HTTP exceptions, etc
4
- # And create a new exception that is usually treated as a fatal error
5
- module LyberCore
6
- module Exceptions
7
- class FatalError < LyberCore::Exceptions::ChainedError
8
- end
9
- end
10
- end
@@ -1,19 +0,0 @@
1
- require File.join(File.dirname(__FILE__), 'chained_error')
2
-
3
- # A ItemError is used to wrap a causal exception
4
- # And create a new exception that usually terminates processing of the current item
5
- # the druid parameter makes it convenient to include the object id using a std message syntax
6
- module LyberCore
7
- module Exceptions
8
- class ItemError < LyberCore::Exceptions::ChainedError
9
- def initialize(druid, msg, cause=nil)
10
- if (druid)
11
- message = "#{druid} - #{msg}"
12
- else
13
- message= msg
14
- end
15
- super(message, cause)
16
- end
17
- end
18
- end
19
- end
@@ -1,10 +0,0 @@
1
- require File.join(File.dirname(__FILE__), 'fatal_error')
2
-
3
- # A ServiceError is used to wrap timeouts, HTTP exceptions, etc
4
- # And create a new exception that is usually treated as a fatal error
5
- module LyberCore
6
- module Exceptions
7
- class ServiceError < LyberCore::Exceptions::FatalError
8
- end
9
- end
10
- end
@@ -1,333 +0,0 @@
1
- # == Usage
2
- # ruby_cl_skeleton [options] source_file
3
- #
4
- # For help use: ruby_cl_skeleton -h
5
-
6
- module LyberCore
7
- module Robots
8
-
9
- CONTINUE = 0
10
- SLEEP = 1
11
- HALT = 2
12
-
13
- require 'optparse'
14
- require 'ostruct'
15
-
16
- # ===== Usage
17
- # User defined robots should derive from this class and override the #process_item method
18
- class Robot
19
-
20
- attr_accessor :workflow_name
21
- attr_accessor :workflow_step
22
-
23
- # A LyberCore::Robots::Workflow object
24
- attr_accessor :workflow
25
- attr_accessor :collection_name
26
- attr_accessor :workspace
27
- attr_accessor :args
28
- attr_accessor :options
29
-
30
-
31
- # Available options
32
- # - :collection_name - The collection this workflow should work with.
33
- # Defined as a subdirectory within ROBOT_ROOT/config/workflows/your_workflow/your_collection
34
- # - :workspace - Full path of where to find content for a particular workflow
35
- # - :logfile - Where to write log messages
36
- # - :loglevel - Level of logging from 0 - 4 where 0 = DEBUG and 4 = FATAL
37
- def initialize(workflow_name, workflow_step, args = {})
38
- @workflow_name = workflow_name
39
- @workflow_step = workflow_step
40
- #TODO: Replace 'dor.' with actual repository ID
41
- @collection_name = args[:collection_name]
42
- @opts = args
43
-
44
- if args[:logfile]
45
- LyberCore::Log.set_logfile(args[:logfile])
46
- else
47
- FileUtils.mkdir(File.join(ROBOT_ROOT, 'log')) unless(File.exists?(File.join(ROBOT_ROOT, 'log')))
48
- robot_logfile = File.join(ROBOT_ROOT,'log',workflow_step+'.log')
49
- LyberCore::Log.set_logfile(robot_logfile)
50
- end
51
-
52
- LyberCore::Log.set_level(args[:loglevel]) if args[:loglevel]
53
-
54
- # Set defaults
55
- @options = OpenStruct.new
56
- self.parse_options
57
- self.create_workflow
58
- self.set_workspace
59
-
60
- @msg_queue_name = "/queue/#{@workflow.repository}.#{@workflow_name}.#{@workflow_step}"
61
- end
62
-
63
- # Some workflows require a directory where their content lives
64
- # If a robot is invoked with a :workspace => true option, its @workspace
65
- # should be set from the value in
66
- def set_workspace
67
- if(Dor::Config.robots.workspace)
68
- @workspace = LyberCore::Robots::Workspace.new(@workflow_name, @collection_name)
69
- LyberCore::Log.debug("workspace = #{workspace.inspect}")
70
- end
71
- end
72
-
73
- # Create the workflow at instantiation, not when we start running the robot.
74
- # That way we can do better error checking and ensure that everything is going
75
- # to run okay before we actually start things.
76
- def create_workflow
77
-
78
- unless defined?(WORKFLOW_URI)
79
- LyberCore::Log.fatal "FATAL: WORKFLOW_URI is not defined"
80
- LyberCore::Log.fatal "Usually this is a value like 'http://lyberservices-dev.stanford.edu/workflow'"
81
- LyberCore::Log.fatal "Usually you load it by setting ROBOT_ENVIRONMENT when you invoke your robot"
82
- raise "WORKFLOW_URI is not set! Do you need to set your ROBOT_ENVIRONMENT value?"
83
- end
84
- LyberCore::Log.debug("About to instatiate a Workflow object
85
- -- LyberCore::Robots::Workflow.new(#{@workflow_name},#{collection_name}")
86
- @workflow = LyberCore::Robots::Workflow.new(@workflow_name, {:logger => @logger, :collection_name => @collection_name})
87
-
88
- end
89
-
90
- # == Create a new workflow
91
- def start_standalone()
92
- LyberCore::Log.debug("Running as standalone...")
93
- queue = establish_queue()
94
- process_queue(queue)
95
- return false if(queue.max_errors_reached?)
96
-
97
- true
98
- end
99
-
100
- def start_master(stomp)
101
- LyberCore::Log.info("Running as master...")
102
- LyberCore::Log.info("Publishing to #{@msg_queue_name}")
103
- queue = establish_queue()
104
- while work_item = queue.next_item do
105
- stomp.begin("enqueue_#{work_item.druid}")
106
- begin
107
- timeout(MSG_BROKER_TIMEOUT) do
108
- begin
109
- stomp.publish(@msg_queue_name, work_item.druid, :persistent => true)
110
- work_item.set_status('queued')
111
- stomp.commit("enqueue_#{work_item.druid}")
112
- rescue
113
- stomp.abort("enqueue_#{work_item.druid}")
114
- end
115
- end
116
- rescue Timeout::Error
117
- # the FatalError will be trapped and logged by the start() method
118
- raise LyberCore::Exceptions::FatalError.new("Message broker unreachable for more than #{MSG_BROKER_TIMEOUT} seconds. Aborting master mode.")
119
- end
120
- end
121
- end
122
-
123
- def start_slave(stomp)
124
- LyberCore::Log.info("Running as slave...")
125
- # Note: stomp is a Stomp::Connection, not a Stomp::Client!
126
- LyberCore::Log.info("Subscribing to #{@msg_queue_name}")
127
- stomp.subscribe(@msg_queue_name, :ack => :client)
128
- msg = nil
129
- interrupt = false
130
- old_trap = trap "SIGINT", proc {
131
- interrupt = true
132
- LyberCore::Log.info("Shutting down due to user interrupt...")
133
- }
134
- begin
135
- until interrupt
136
- begin
137
- timeout(MSG_BROKER_TIMEOUT) do
138
- msg = stomp.receive
139
- end
140
- if msg.command == 'MESSAGE'
141
- queue = @workflow.queue(@workflow_step)
142
- queue.enqueue_druids([msg.body.strip])
143
- process_queue(queue)
144
- end
145
- # TODO: Generate statistics about the work
146
- rescue Timeout::Error
147
- msg = nil
148
- break
149
- ensure
150
- unless msg.nil?
151
- stomp.ack msg.headers['message-id']
152
- end
153
- end
154
- end
155
- ensure
156
- trap "SIGINT", old_trap
157
- end
158
- # TODO: Decouple work_item, work_queue, and identity logic
159
- end
160
-
161
- def start()
162
- LyberCore::Log.debug("Starting robot...")
163
- if @options.mode == :master or @options.mode == :slave
164
- require 'stomp'
165
-
166
- msg_broker_config = {
167
- :hosts => [{:host => MSG_BROKER_HOST, :port => MSG_BROKER_PORT}],
168
- :initial_reconnect_delay => 1.0,
169
- :use_exponential_back_off => true,
170
- :back_off_multiplier => 1.05,
171
- :max_reconnect_delay => 3.0,
172
- :reliable => true
173
- }
174
-
175
- stomp = Stomp::Connection.new(msg_broker_config)
176
- if @options.mode == :master
177
- start_master(stomp)
178
- end
179
- # Run as slave when master is done
180
- start_slave(stomp)
181
- else
182
- did_not_halt = start_standalone()
183
- if(did_not_halt)
184
- return LyberCore::Robots::CONTINUE
185
- else
186
- return LyberCore::Robots::HALT
187
- end
188
- end
189
- rescue LyberCore::Exceptions::EmptyQueue
190
- LyberCore::Log.info("Empty queue -- no objects to process")
191
- return LyberCore::Robots::SLEEP
192
- rescue Exception => e
193
- LyberCore::Log.exception(e)
194
- end
195
-
196
- # Generate a queue of work items based from file, druid, or service
197
- def establish_queue()
198
- queue = @workflow.queue(@workflow_step)
199
-
200
- # If we have arguments, parse out the parts that indicate druids
201
- if(@options.file or @options.druid)
202
- queue.enqueue_druids(get_druid_list)
203
- else
204
- queue.enqueue_workstep_waiting()
205
- end
206
- return queue
207
- end
208
-
209
- # Generate a list of druids to process
210
- def get_druid_list
211
-
212
- druid_list = Array.new
213
-
214
- # append any druids passed explicitly
215
- if(@options.druid)
216
- druid_list << @options.druid
217
- end
218
-
219
- # identifier list is in a file
220
- if (@options.file && File.exist?(@options.file))
221
- File.open(@options.file) do |file|
222
- file.each_line do |line|
223
- druid = line.strip
224
- if (druid.length > 0)
225
- druid_list << druid
226
- end
227
- end
228
- end
229
- end
230
-
231
- return druid_list
232
- end
233
-
234
- def process_queue(queue)
235
- while work_item = queue.next_item do
236
- process_work_item(work_item)
237
- end
238
- end
239
-
240
- def process_work_item(work_item)
241
- begin
242
- #call overridden method
243
- process_item(work_item)
244
- work_item.set_success
245
- rescue LyberCore::Exceptions::FatalError => fatal_error
246
- # ToDo cleanup/rollback transaction
247
- raise fatal_error
248
- rescue Exception => e
249
- # ToDo cleanup/rollback transaction
250
- work_item.set_error(e)
251
- end
252
- end
253
-
254
- # Override this method in your robot instance. The method in this base class will throw an exception if it is not overriden.
255
- def process_item(work_item)
256
- #to be overridden by child classes
257
- raise 'You must implement this method in your subclass'
258
- end
259
-
260
- # ###########################
261
- # command line option parsing
262
-
263
- def parse_options
264
-
265
- options = {}
266
-
267
- o = OptionParser.new do |opts|
268
- opts.banner = "Usage: example.rb [options]"
269
- opts.separator ""
270
-
271
- opts.on("-d DRUID", "--druid DRUID", "Pass in a druid to process") do |d|
272
- @options.druid = d
273
- end
274
-
275
- opts.on("-f", "--file FILE", "Pass in a file of druids to process") do |f|
276
- @options.file = f
277
- end
278
-
279
- opts.on("-m MODE", "--mode MODE", "Specify the mode to run in") do |m|
280
- case m
281
- when "master"
282
- @options.mode = :master
283
- when "slave"
284
- @options.mode = :slave
285
- when "default"
286
- @options.mode = :default
287
- else
288
- raise OptionParser::InvalidArgument, "Invalid mode: #{m}"
289
- end
290
- end
291
-
292
- end
293
-
294
- # Parse the command line options and ignore anything not specified above
295
- begin
296
- o.parse!(@opts[:argv] || ARGV)
297
- rescue OptionParser::InvalidOption => e
298
- LyberCore::Log.debug("e.inspect")
299
- rescue OptionParser::ParseError => e
300
- LyberCore::Log.error("Couldn't parse options: #{e.backtrace}")
301
- raise e
302
- end
303
-
304
- end
305
-
306
- # def output_options
307
- # puts "Options:\n"
308
- #
309
- # @options.marshal_dump.each do |name, val|
310
- # puts " #{name} = #{val}"
311
- # end
312
- # end
313
- #
314
- # def output_help
315
- # output_version
316
- # RDoc::usage() #exits app
317
- # end
318
- #
319
- # def output_usage
320
- # RDoc::usage('usage') # gets usage from comments above
321
- # end
322
- #
323
- # def output_version
324
- # puts "#{File.basename(__FILE__)} version #{VERSION}"
325
- # end
326
-
327
- # ##################################
328
- # end of command line option parsing
329
- # ##################################
330
-
331
- end # end of class
332
- end # end of Robots module
333
- end # end of LyberCore module