lyber-core 1.3.0 → 3.2.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,174 +0,0 @@
1
- require 'daemons'
2
- require 'logger'
3
- require 'fileutils'
4
-
5
- module LyberCore
6
- module Robots
7
- class ServiceController < Daemons::ApplicationGroup
8
- attr_reader :logger
9
-
10
- def initialize(opts = {})
11
- if opts[:logger]
12
- @logger = opts[:logger]
13
- else
14
- @logger = Logger.new($stdout)
15
- @logger.level = opts[:log_level] || Logger::WARN
16
- end
17
- @sleep_time = opts[:sleep_time] || (15*60)
18
- @working_dir = opts[:working_dir] || ENV['ROBOT_ROOT'] || Dir.pwd
19
- @pid_dir = opts[:pid_dir] || File.join(@working_dir, 'pid')
20
- @pid_dir = File.expand_path(@pid_dir)
21
- FileUtils.mkdir(@pid_dir) unless(File.directory? @pid_dir)
22
- @argv = (opts[:argv] || []).dup
23
- @logger.debug "Initializing application group."
24
- @logger.debug "Writing pids to #{@pid_dir}"
25
- super('robot_service_controller', :dir_mode => :normal, :dir => @pid_dir, :multiple => true, :backtrace => true)
26
- end
27
-
28
- def qname(workflow, robot_name)
29
- [workflow,robot_name].join(':')
30
- end
31
-
32
- def start(workflow, robot_name)
33
- result = false
34
- app = find_app(workflow, robot_name).first
35
- process_name = qname(workflow,robot_name)
36
- if app.nil? or (app.running? == false)
37
- @logger.info "Starting #{process_name}..."
38
- with_app_name("#{process_name}") do
39
- app, message = capture_stdout do
40
- raw_module_name = workflow.split('WF').first
41
- module_name = raw_module_name[0].chr.upcase << raw_module_name.slice(1, raw_module_name.size - 1)
42
- robot_klass = Module.const_get(module_name).const_get(robot_name.split(/-/).collect { |w| w.capitalize }.join(''))
43
- log_state = marshal_logger(@logger)
44
- robot_proc = lambda {
45
- Dir.chdir(@working_dir) do
46
- begin
47
- logger = restore_logger(log_state)
48
- robot = robot_klass.new(:argv => @argv.dup)
49
- loop {
50
- case robot.start
51
- when LyberCore::Robots::SLEEP
52
- logger.info "SLEEP condition reached in #{process_name}. Sleeping for #{@sleep_time} seconds."
53
- sleep(@sleep_time)
54
- when LyberCore::Robots::HALT
55
- logger.error "HALT condition reached in #{process_name}. Shutting down."
56
- break
57
- end
58
- }
59
- ensure
60
- logger.info "Shutting down."
61
- end
62
- end
63
- }
64
- new_app = self.new_application({:mode => :proc, :proc => robot_proc, :dir_mode => :normal, :log_output => true, :log_dir => @pid_dir})
65
- new_app.start
66
- new_app
67
- end
68
- end
69
-
70
- if app.running?
71
- @logger.info "#{process_name} [#{app.pid.pid}] started."
72
- result = true
73
- else
74
- @logger.error "Unable to start #{process_name}"
75
- end
76
- else app.running?
77
- @logger.warn "Robot #{process_name} [#{app.pid.pid}] is already running"
78
- end
79
- return result
80
- end
81
-
82
- def stop(workflow, robot_name)
83
- apps = find_app(workflow, robot_name)
84
- process_name = qname(workflow,robot_name)
85
- result = false
86
- if apps.empty?
87
- @logger.info "Robot #{process_name} not found"
88
- else
89
- apps.each do |app|
90
- if app.running?
91
- @logger.info "Shutting down #{process_name} [#{app.pid.pid}]..."
92
- result, message = capture_stdout { app.stop }
93
- if app.running?
94
- @logger.error "Unable to stop #{process_name} [#{app.pid.pid}]."
95
- else
96
- @logger.info "#{process_name} [#{app.pid.pid}] shut down."
97
- result = true
98
- end
99
- else
100
- @logger.warn "Robot #{process_name} [#{app.pid.pid}] is not running but pidfile exists"
101
- app.zap!
102
- end
103
- end
104
- end
105
- result
106
- end
107
-
108
- def status(workflow, robot_name)
109
- apps = find_app(workflow, robot_name)
110
- apps.collect do |app|
111
- { :pid => app.pid.pid, :status => app.running? ? :running : :stopped }
112
- end
113
- end
114
-
115
- def status_message(workflow, robot_name)
116
- app_status = status(workflow, robot_name)
117
- process_name = qname(workflow,robot_name)
118
- if app_status.empty?
119
- ["Robot #{process_name} not found"]
120
- else
121
- app_status.collect do |s|
122
- case s[:status]
123
- when :running
124
- "Robot #{process_name} [#{s[:pid]}] is running"
125
- when :stopped
126
- "Robot #{process_name} [#{s[:pid]}] is not running but pidfile exists"
127
- end
128
- end
129
- end
130
- end
131
-
132
- # private
133
- def with_app_name(name)
134
- old_name, @app_name = @app_name, name
135
- begin
136
- return yield
137
- ensure
138
- @app_name = old_name
139
- end
140
- end
141
-
142
- def capture_stdout
143
- old_io = $stdout
144
- begin
145
- new_io = StringIO.new('')
146
- $stdout = new_io
147
- result = yield
148
- @logger.debug new_io.string
149
- return result, new_io.string
150
- ensure
151
- $stdout = old_io
152
- end
153
- end
154
-
155
- def find_app(workflow, robot_name)
156
- with_app_name(qname(workflow,robot_name)) {
157
- self.find_applications_by_pidfiles(@pid_dir)
158
- }
159
- end
160
-
161
- def marshal_logger(l)
162
- log_device = l.instance_variable_get('@logdev')
163
- { :dev => log_device, :file => log_device.filename, :level => l.level }
164
- end
165
-
166
- def restore_logger(params)
167
- result = Logger.new(params[:file] || params[:log_device])
168
- result.level = params[:level]
169
- return result
170
- end
171
-
172
- end
173
- end
174
- end
@@ -1,112 +0,0 @@
1
- require 'dor_service'
2
- require "xml_models/identity_metadata/identity_metadata"
3
- require "xml_models/identity_metadata/dublin_core"
4
-
5
-
6
- # Represents a single object being processed as part of a workflow queue
7
- module LyberCore
8
- module Robots
9
- class WorkItem
10
-
11
- # The queue that this workitem is a member of
12
- attr_reader :work_queue
13
- # The primary id for the object being processed
14
- attr_accessor :druid
15
- # An object used to hold unmarshalled XML from the identityMetadata datastream
16
- attr_accessor :identity_metadata
17
- # Timings for this workitem's processing
18
- attr_reader :start_time
19
- attr_reader :end_time
20
- attr_reader :elapsed_time
21
-
22
- # Create a new WorkItem object, save a pointer to the parent WorkQueue, and start the timer
23
- def initialize(work_queue)
24
- @work_queue = work_queue
25
- @start_time = Time.new
26
- end
27
-
28
- # Inject an IdentityMetadata object (currently used for unit testing only)
29
- def identity_metadata=(identity_metadata)
30
- @identity_metadata = identity_metadata
31
- end
32
-
33
- #save the IdentityMetadata object to identityMetadata datastream
34
- def identity_metadata_save
35
- unless DorService.get_datastream(@druid, 'identityMetadata')
36
- DorService.add_datastream(@druid, 'identityMetadata', 'identityMetadata', self.identity_metadata.to_xml)
37
- else
38
- DorService.update_datastream(@druid, 'identityMetadata', self.identity_metadata.to_xml, content_type='application/xml', versionable = false)
39
- end #unless
40
- end #identity_metadata_save
41
-
42
- # Return the IdentityMetadata object bound to identityMetadata datastream XML
43
- def identity_metadata
44
- if (@identity_metadata == nil)
45
- if (@druid == nil)
46
- @identity_metadata = IdentityMetadata.new
47
- else
48
- idmd_str = DorService.get_datastream(@druid, 'identityMetadata')
49
- @identity_metadata = IdentityMetadata.from_xml(idmd_str)
50
- end
51
- end
52
- return @identity_metadata
53
- end
54
-
55
- # Return the identifier value for the specified identier name
56
- def identifier(key)
57
- return self.identity_metadata.get_identifier_value(key)
58
- end
59
-
60
- # Add a new name,value pair to the set of identifiers
61
- def identifier_add(key, value)
62
- self.identity_metadata.add_identifier(key, value)
63
- end
64
-
65
- # Return an array of strings where each entry consists of name:value
66
- def id_pairs
67
- self.identity_metadata.get_id_pairs
68
- end
69
-
70
- # Return the druid for the work item if it exists, else the first identifier value
71
- def item_id
72
- return @druid if @druid
73
- pairs = self.identity_metadata.get_id_pairs
74
- return pairs[0] if (pairs.size > 0)
75
- end
76
-
77
- # Record a non-error status for the workstep operation
78
- def set_status(status)
79
- @elapsed_time = Time.new - @start_time
80
- @end_time = Time.new
81
- @elapsed_time = @end_time - @start_time
82
- LyberCore::Log.info("#{item_id} #{status} in #{@elapsed_time} seconds")
83
- if (@druid)
84
- Dor::WorkflowService.update_workflow_status(@work_queue.workflow.repository, @druid, @work_queue.workflow.workflow_id, @work_queue.workflow_step, status, @elapsed_time)
85
- end
86
- end
87
-
88
- # Record the successful outcome of the workstep operation for this workitem
89
- def set_success
90
- @work_queue.success_count += 1
91
- self.set_status('completed')
92
- end
93
-
94
- # Record the unsuccessful outcome of the workstep operation for this workitem
95
- def set_error(e)
96
- @work_queue.error_count += 1
97
- @end_time = Time.new
98
- @elapsed_time = @end_time - @start_time
99
- if (e.is_a?(LyberCore::Exceptions::ItemError) )
100
- item_error = e
101
- else
102
- item_error = LyberCore::Exceptions::ItemError.new(@druid, "Item error", e)
103
- end
104
- LyberCore::Log.exception(item_error)
105
- if (@druid)
106
- DorService.update_workflow_error_status(@work_queue.workflow.repository, @druid, @work_queue.workflow.workflow_id, @work_queue.workflow_step, item_error.message)
107
- end
108
- end
109
-
110
- end
111
- end
112
- end
@@ -1,177 +0,0 @@
1
- require 'dor_service'
2
- require 'dlss_service'
3
- require 'yaml'
4
-
5
- module LyberCore
6
- module Robots
7
- # Represents a set of workitem objects to be processed by a given step of a workflow
8
- class WorkQueue
9
-
10
- # The workflow that this queue is a part of
11
- attr_reader :workflow
12
- # The step in the workflow that is being processed against this queue
13
- attr_reader :workflow_step
14
- # The workflow step that should have already been completed for the workitem object
15
- attr_reader :prerequisite
16
- # The maximum number of workitem objects to process in one run of a robot
17
- attr_reader :batch_limit
18
- # The maximum number of errors to allow before terminating the batch run
19
- attr_reader :error_limit
20
- # The array of primary identifiers for the workitem objects to be processed
21
- attr_reader :druids
22
- # An alternative identitier to be used when druids are not yet available (e.g at registration)
23
- attr_reader :identifier_name
24
- attr_reader :identifier_values
25
- # The tally of how many items have been processed
26
- attr_reader :item_count
27
- attr_accessor :success_count
28
- attr_accessor :error_count
29
- # The timings for the batch run
30
- attr_reader :start_time
31
- attr_reader :end_time
32
- attr :elapsed_time
33
-
34
- attr_reader :config_file
35
-
36
-
37
- # Create a new WorkQueue object for the specified step,
38
- # save a pointer to the parent WorkFlow,
39
- # start the timer,
40
- # read in the configuration information for the work step
41
- def initialize(workflow=nil, workflow_step=nil)
42
- LyberCore::Log.debug("Initializing work queue with workflow #{workflow} and workflow_step #{workflow_step}")
43
- @start_time = Time.new
44
- LyberCore::Log.info("Starting #{workflow_step} at #{@start_time}")
45
- @workflow = workflow
46
- @workflow_step = workflow_step
47
- @item_count = 0
48
- @success_count = 0
49
- @error_count = 0
50
- # nil arguments should only be used if in test mode
51
- if (workflow.nil? || workflow_step.nil?)
52
- @batch_limit = 2
53
- @error_limit = 1
54
- return
55
- end
56
-
57
- self.process_config_file
58
-
59
- end
60
-
61
- def process_config_file
62
- LyberCore::Log.debug("Processing config file ... ")
63
- LyberCore::Log.debug("@workflow.workflow_config_dir = #{@workflow.workflow_config_dir}")
64
-
65
- @config_file = File.join(@workflow.workflow_config_dir, 'process-config.yaml')
66
- LyberCore::Log.debug("I'm opening the config file at #{@config_file}")
67
-
68
- # Does the file exist?
69
- raise "Can't open process-config file #{@config_file}" unless File.file? @config_file
70
-
71
- process_config = YAML.load_file(config_file)
72
- LyberCore::Log.debug("process_config: #{process_config.inspect}")
73
-
74
- @prerequisite = process_config[@workflow_step]["prerequisite"]
75
- LyberCore::Log.debug("@prerequisite: #{@prerequisite}")
76
-
77
- @batch_limit = process_config[@workflow_step]['batch_limit']
78
- LyberCore::Log.debug("@batch_limit: #{@batch_limit}")
79
-
80
- @error_limit = process_config[@workflow_step]['error_limit']
81
- LyberCore::Log.debug("@error_limit: #{@error_limit}")
82
-
83
- end
84
-
85
- # Explicitly specify a set of druids to be processed by the workflow step
86
- def enqueue_druids(druid_array)
87
- LyberCore::Log.debug("\nEnqueing an array of druids...")
88
- @druids = druid_array
89
- LyberCore::Log.debug("\n@druids = #{@druids}")
90
- end
91
-
92
- def fully_qualified_prerequisite?
93
- if(@prerequisite.class == Array)
94
- fully_qualified = @prerequisite.all? {|p| p =~ /.+:.+:.+/ }
95
- else
96
- fully_qualified = (@prerequisite =~ /.+:.+:.+/)
97
- end
98
- fully_qualified
99
- end
100
-
101
- # Obtain the set of druids to be processed using a database query
102
- # to obtain the repository objects that are awaiting this step
103
- def enqueue_workstep_waiting()
104
- begin
105
- LyberCore::Log.debug("\nEnqueing workstep waiting...")
106
-
107
- prerequisites = Array(@prerequisite)
108
- qualified = fully_qualified_prerequisite?
109
- druid_lists = prerequisites.collect do |prerequisite|
110
- object_list_xml = qualified ?
111
- DorService.get_objects_for_qualified_workstep(prerequisite, "#{workflow.repository}:#{workflow.workflow_id}:#{@workflow_step}") :
112
- DorService.get_objects_for_workstep(workflow.repository, workflow.workflow_id, prerequisite, @workflow_step)
113
- LyberCore::Log.debug("\nobject_list_xml = #{object_list_xml}")
114
- druid_list = DlssService.get_all_druids_from_object_list(object_list_xml)
115
- LyberCore::Log.debug("\n@druids = #{@druids}")
116
- druid_list
117
- end
118
- @druids = druid_lists.inject(druid_lists[0]) { |collector, list| collector & list }
119
- @druids = @druids[0..(self.batch_limit-1)]
120
- rescue Exception => e
121
- raise e
122
- end
123
- end
124
-
125
- # Use an alternative set of identifiers as the basis of this queue
126
- # e.g. use array of barcodes as basis for google register-object robot
127
- def enqueue_identifiers(identifier_name, identifier_values)
128
- @identifier_name = identifier_name
129
- @identifier_values = identifier_values
130
- end
131
-
132
- # Get the next WorkItem to be processed by the robot for the workflow step
133
- def next_item()
134
- if (@item_count >= @batch_limit )
135
- LyberCore::Log.info "Batch limit of #{@batch_limit} items reached"
136
- return nil
137
- end
138
- if (@error_count >= @error_limit )
139
- LyberCore::Log.info "Error limit of #{@error_limit} items reached"
140
- return nil
141
- end
142
- work_item = LyberCore::Robots::WorkItem.new(self)
143
- if (@druids)
144
- return nil if (@item_count >= @druids.length)
145
- work_item.druid= @druids[@item_count]
146
- elsif (@identifier_values)
147
- return nil if (@item_count >= @identifier_values.length)
148
- work_item.identifier_add(@identifier_name,@identifier_values[@item_count])
149
- else
150
- return nil
151
- end
152
- @item_count += 1
153
- return work_item
154
- end
155
-
156
- def max_errors_reached?
157
- @error_count >= @error_limit
158
- end
159
-
160
- # Output the batch's timings and other statistics to the main log file
161
- def print_stats
162
- @end_time = Time.new
163
- @elapsed_time = @end_time - @start_time
164
- LyberCore::Log.info "Total time: " + @elapsed_time.to_s + "\n"
165
- LyberCore::Log.info "Completed objects: " + self.success_count.to_s + "\n"
166
- LyberCore::Log.info "Errors: " + self.error_count.to_s + "\n"
167
- end
168
-
169
- def print_empty_stats
170
- @end_time = Time.new
171
- @elapsed_time = @end_time - @start_time
172
- LyberCore::Log.info "Total time: " + @elapsed_time.to_s + "\n"
173
- LyberCore::Log.info "Empty queue"
174
- end
175
- end
176
- end
177
- end