lyber-core 1.3.0 → 3.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,174 +0,0 @@
1
- require 'daemons'
2
- require 'logger'
3
- require 'fileutils'
4
-
5
- module LyberCore
6
- module Robots
7
- class ServiceController < Daemons::ApplicationGroup
8
- attr_reader :logger
9
-
10
- def initialize(opts = {})
11
- if opts[:logger]
12
- @logger = opts[:logger]
13
- else
14
- @logger = Logger.new($stdout)
15
- @logger.level = opts[:log_level] || Logger::WARN
16
- end
17
- @sleep_time = opts[:sleep_time] || (15*60)
18
- @working_dir = opts[:working_dir] || ENV['ROBOT_ROOT'] || Dir.pwd
19
- @pid_dir = opts[:pid_dir] || File.join(@working_dir, 'pid')
20
- @pid_dir = File.expand_path(@pid_dir)
21
- FileUtils.mkdir(@pid_dir) unless(File.directory? @pid_dir)
22
- @argv = (opts[:argv] || []).dup
23
- @logger.debug "Initializing application group."
24
- @logger.debug "Writing pids to #{@pid_dir}"
25
- super('robot_service_controller', :dir_mode => :normal, :dir => @pid_dir, :multiple => true, :backtrace => true)
26
- end
27
-
28
- def qname(workflow, robot_name)
29
- [workflow,robot_name].join(':')
30
- end
31
-
32
- def start(workflow, robot_name)
33
- result = false
34
- app = find_app(workflow, robot_name).first
35
- process_name = qname(workflow,robot_name)
36
- if app.nil? or (app.running? == false)
37
- @logger.info "Starting #{process_name}..."
38
- with_app_name("#{process_name}") do
39
- app, message = capture_stdout do
40
- raw_module_name = workflow.split('WF').first
41
- module_name = raw_module_name[0].chr.upcase << raw_module_name.slice(1, raw_module_name.size - 1)
42
- robot_klass = Module.const_get(module_name).const_get(robot_name.split(/-/).collect { |w| w.capitalize }.join(''))
43
- log_state = marshal_logger(@logger)
44
- robot_proc = lambda {
45
- Dir.chdir(@working_dir) do
46
- begin
47
- logger = restore_logger(log_state)
48
- robot = robot_klass.new(:argv => @argv.dup)
49
- loop {
50
- case robot.start
51
- when LyberCore::Robots::SLEEP
52
- logger.info "SLEEP condition reached in #{process_name}. Sleeping for #{@sleep_time} seconds."
53
- sleep(@sleep_time)
54
- when LyberCore::Robots::HALT
55
- logger.error "HALT condition reached in #{process_name}. Shutting down."
56
- break
57
- end
58
- }
59
- ensure
60
- logger.info "Shutting down."
61
- end
62
- end
63
- }
64
- new_app = self.new_application({:mode => :proc, :proc => robot_proc, :dir_mode => :normal, :log_output => true, :log_dir => @pid_dir})
65
- new_app.start
66
- new_app
67
- end
68
- end
69
-
70
- if app.running?
71
- @logger.info "#{process_name} [#{app.pid.pid}] started."
72
- result = true
73
- else
74
- @logger.error "Unable to start #{process_name}"
75
- end
76
- else app.running?
77
- @logger.warn "Robot #{process_name} [#{app.pid.pid}] is already running"
78
- end
79
- return result
80
- end
81
-
82
- def stop(workflow, robot_name)
83
- apps = find_app(workflow, robot_name)
84
- process_name = qname(workflow,robot_name)
85
- result = false
86
- if apps.empty?
87
- @logger.info "Robot #{process_name} not found"
88
- else
89
- apps.each do |app|
90
- if app.running?
91
- @logger.info "Shutting down #{process_name} [#{app.pid.pid}]..."
92
- result, message = capture_stdout { app.stop }
93
- if app.running?
94
- @logger.error "Unable to stop #{process_name} [#{app.pid.pid}]."
95
- else
96
- @logger.info "#{process_name} [#{app.pid.pid}] shut down."
97
- result = true
98
- end
99
- else
100
- @logger.warn "Robot #{process_name} [#{app.pid.pid}] is not running but pidfile exists"
101
- app.zap!
102
- end
103
- end
104
- end
105
- result
106
- end
107
-
108
- def status(workflow, robot_name)
109
- apps = find_app(workflow, robot_name)
110
- apps.collect do |app|
111
- { :pid => app.pid.pid, :status => app.running? ? :running : :stopped }
112
- end
113
- end
114
-
115
- def status_message(workflow, robot_name)
116
- app_status = status(workflow, robot_name)
117
- process_name = qname(workflow,robot_name)
118
- if app_status.empty?
119
- ["Robot #{process_name} not found"]
120
- else
121
- app_status.collect do |s|
122
- case s[:status]
123
- when :running
124
- "Robot #{process_name} [#{s[:pid]}] is running"
125
- when :stopped
126
- "Robot #{process_name} [#{s[:pid]}] is not running but pidfile exists"
127
- end
128
- end
129
- end
130
- end
131
-
132
- # private
133
- def with_app_name(name)
134
- old_name, @app_name = @app_name, name
135
- begin
136
- return yield
137
- ensure
138
- @app_name = old_name
139
- end
140
- end
141
-
142
- def capture_stdout
143
- old_io = $stdout
144
- begin
145
- new_io = StringIO.new('')
146
- $stdout = new_io
147
- result = yield
148
- @logger.debug new_io.string
149
- return result, new_io.string
150
- ensure
151
- $stdout = old_io
152
- end
153
- end
154
-
155
- def find_app(workflow, robot_name)
156
- with_app_name(qname(workflow,robot_name)) {
157
- self.find_applications_by_pidfiles(@pid_dir)
158
- }
159
- end
160
-
161
- def marshal_logger(l)
162
- log_device = l.instance_variable_get('@logdev')
163
- { :dev => log_device, :file => log_device.filename, :level => l.level }
164
- end
165
-
166
- def restore_logger(params)
167
- result = Logger.new(params[:file] || params[:log_device])
168
- result.level = params[:level]
169
- return result
170
- end
171
-
172
- end
173
- end
174
- end
@@ -1,112 +0,0 @@
1
- require 'dor_service'
2
- require "xml_models/identity_metadata/identity_metadata"
3
- require "xml_models/identity_metadata/dublin_core"
4
-
5
-
6
- # Represents a single object being processed as part of a workflow queue
7
- module LyberCore
8
- module Robots
9
- class WorkItem
10
-
11
- # The queue that this workitem is a member of
12
- attr_reader :work_queue
13
- # The primary id for the object being processed
14
- attr_accessor :druid
15
- # An object used to hold unmarshalled XML from the identityMetadata datastream
16
- attr_accessor :identity_metadata
17
- # Timings for this workitem's processing
18
- attr_reader :start_time
19
- attr_reader :end_time
20
- attr_reader :elapsed_time
21
-
22
- # Create a new WorkItem object, save a pointer to the parent WorkQueue, and start the timer
23
- def initialize(work_queue)
24
- @work_queue = work_queue
25
- @start_time = Time.new
26
- end
27
-
28
- # Inject an IdentityMetadata object (currently used for unit testing only)
29
- def identity_metadata=(identity_metadata)
30
- @identity_metadata = identity_metadata
31
- end
32
-
33
- #save the IdentityMetadata object to identityMetadata datastream
34
- def identity_metadata_save
35
- unless DorService.get_datastream(@druid, 'identityMetadata')
36
- DorService.add_datastream(@druid, 'identityMetadata', 'identityMetadata', self.identity_metadata.to_xml)
37
- else
38
- DorService.update_datastream(@druid, 'identityMetadata', self.identity_metadata.to_xml, content_type='application/xml', versionable = false)
39
- end #unless
40
- end #identity_metadata_save
41
-
42
- # Return the IdentityMetadata object bound to identityMetadata datastream XML
43
- def identity_metadata
44
- if (@identity_metadata == nil)
45
- if (@druid == nil)
46
- @identity_metadata = IdentityMetadata.new
47
- else
48
- idmd_str = DorService.get_datastream(@druid, 'identityMetadata')
49
- @identity_metadata = IdentityMetadata.from_xml(idmd_str)
50
- end
51
- end
52
- return @identity_metadata
53
- end
54
-
55
- # Return the identifier value for the specified identier name
56
- def identifier(key)
57
- return self.identity_metadata.get_identifier_value(key)
58
- end
59
-
60
- # Add a new name,value pair to the set of identifiers
61
- def identifier_add(key, value)
62
- self.identity_metadata.add_identifier(key, value)
63
- end
64
-
65
- # Return an array of strings where each entry consists of name:value
66
- def id_pairs
67
- self.identity_metadata.get_id_pairs
68
- end
69
-
70
- # Return the druid for the work item if it exists, else the first identifier value
71
- def item_id
72
- return @druid if @druid
73
- pairs = self.identity_metadata.get_id_pairs
74
- return pairs[0] if (pairs.size > 0)
75
- end
76
-
77
- # Record a non-error status for the workstep operation
78
- def set_status(status)
79
- @elapsed_time = Time.new - @start_time
80
- @end_time = Time.new
81
- @elapsed_time = @end_time - @start_time
82
- LyberCore::Log.info("#{item_id} #{status} in #{@elapsed_time} seconds")
83
- if (@druid)
84
- Dor::WorkflowService.update_workflow_status(@work_queue.workflow.repository, @druid, @work_queue.workflow.workflow_id, @work_queue.workflow_step, status, @elapsed_time)
85
- end
86
- end
87
-
88
- # Record the successful outcome of the workstep operation for this workitem
89
- def set_success
90
- @work_queue.success_count += 1
91
- self.set_status('completed')
92
- end
93
-
94
- # Record the unsuccessful outcome of the workstep operation for this workitem
95
- def set_error(e)
96
- @work_queue.error_count += 1
97
- @end_time = Time.new
98
- @elapsed_time = @end_time - @start_time
99
- if (e.is_a?(LyberCore::Exceptions::ItemError) )
100
- item_error = e
101
- else
102
- item_error = LyberCore::Exceptions::ItemError.new(@druid, "Item error", e)
103
- end
104
- LyberCore::Log.exception(item_error)
105
- if (@druid)
106
- DorService.update_workflow_error_status(@work_queue.workflow.repository, @druid, @work_queue.workflow.workflow_id, @work_queue.workflow_step, item_error.message)
107
- end
108
- end
109
-
110
- end
111
- end
112
- end
@@ -1,177 +0,0 @@
1
- require 'dor_service'
2
- require 'dlss_service'
3
- require 'yaml'
4
-
5
- module LyberCore
6
- module Robots
7
- # Represents a set of workitem objects to be processed by a given step of a workflow
8
- class WorkQueue
9
-
10
- # The workflow that this queue is a part of
11
- attr_reader :workflow
12
- # The step in the workflow that is being processed against this queue
13
- attr_reader :workflow_step
14
- # The workflow step that should have already been completed for the workitem object
15
- attr_reader :prerequisite
16
- # The maximum number of workitem objects to process in one run of a robot
17
- attr_reader :batch_limit
18
- # The maximum number of errors to allow before terminating the batch run
19
- attr_reader :error_limit
20
- # The array of primary identifiers for the workitem objects to be processed
21
- attr_reader :druids
22
- # An alternative identitier to be used when druids are not yet available (e.g at registration)
23
- attr_reader :identifier_name
24
- attr_reader :identifier_values
25
- # The tally of how many items have been processed
26
- attr_reader :item_count
27
- attr_accessor :success_count
28
- attr_accessor :error_count
29
- # The timings for the batch run
30
- attr_reader :start_time
31
- attr_reader :end_time
32
- attr :elapsed_time
33
-
34
- attr_reader :config_file
35
-
36
-
37
- # Create a new WorkQueue object for the specified step,
38
- # save a pointer to the parent WorkFlow,
39
- # start the timer,
40
- # read in the configuration information for the work step
41
- def initialize(workflow=nil, workflow_step=nil)
42
- LyberCore::Log.debug("Initializing work queue with workflow #{workflow} and workflow_step #{workflow_step}")
43
- @start_time = Time.new
44
- LyberCore::Log.info("Starting #{workflow_step} at #{@start_time}")
45
- @workflow = workflow
46
- @workflow_step = workflow_step
47
- @item_count = 0
48
- @success_count = 0
49
- @error_count = 0
50
- # nil arguments should only be used if in test mode
51
- if (workflow.nil? || workflow_step.nil?)
52
- @batch_limit = 2
53
- @error_limit = 1
54
- return
55
- end
56
-
57
- self.process_config_file
58
-
59
- end
60
-
61
- def process_config_file
62
- LyberCore::Log.debug("Processing config file ... ")
63
- LyberCore::Log.debug("@workflow.workflow_config_dir = #{@workflow.workflow_config_dir}")
64
-
65
- @config_file = File.join(@workflow.workflow_config_dir, 'process-config.yaml')
66
- LyberCore::Log.debug("I'm opening the config file at #{@config_file}")
67
-
68
- # Does the file exist?
69
- raise "Can't open process-config file #{@config_file}" unless File.file? @config_file
70
-
71
- process_config = YAML.load_file(config_file)
72
- LyberCore::Log.debug("process_config: #{process_config.inspect}")
73
-
74
- @prerequisite = process_config[@workflow_step]["prerequisite"]
75
- LyberCore::Log.debug("@prerequisite: #{@prerequisite}")
76
-
77
- @batch_limit = process_config[@workflow_step]['batch_limit']
78
- LyberCore::Log.debug("@batch_limit: #{@batch_limit}")
79
-
80
- @error_limit = process_config[@workflow_step]['error_limit']
81
- LyberCore::Log.debug("@error_limit: #{@error_limit}")
82
-
83
- end
84
-
85
- # Explicitly specify a set of druids to be processed by the workflow step
86
- def enqueue_druids(druid_array)
87
- LyberCore::Log.debug("\nEnqueing an array of druids...")
88
- @druids = druid_array
89
- LyberCore::Log.debug("\n@druids = #{@druids}")
90
- end
91
-
92
- def fully_qualified_prerequisite?
93
- if(@prerequisite.class == Array)
94
- fully_qualified = @prerequisite.all? {|p| p =~ /.+:.+:.+/ }
95
- else
96
- fully_qualified = (@prerequisite =~ /.+:.+:.+/)
97
- end
98
- fully_qualified
99
- end
100
-
101
- # Obtain the set of druids to be processed using a database query
102
- # to obtain the repository objects that are awaiting this step
103
- def enqueue_workstep_waiting()
104
- begin
105
- LyberCore::Log.debug("\nEnqueing workstep waiting...")
106
-
107
- prerequisites = Array(@prerequisite)
108
- qualified = fully_qualified_prerequisite?
109
- druid_lists = prerequisites.collect do |prerequisite|
110
- object_list_xml = qualified ?
111
- DorService.get_objects_for_qualified_workstep(prerequisite, "#{workflow.repository}:#{workflow.workflow_id}:#{@workflow_step}") :
112
- DorService.get_objects_for_workstep(workflow.repository, workflow.workflow_id, prerequisite, @workflow_step)
113
- LyberCore::Log.debug("\nobject_list_xml = #{object_list_xml}")
114
- druid_list = DlssService.get_all_druids_from_object_list(object_list_xml)
115
- LyberCore::Log.debug("\n@druids = #{@druids}")
116
- druid_list
117
- end
118
- @druids = druid_lists.inject(druid_lists[0]) { |collector, list| collector & list }
119
- @druids = @druids[0..(self.batch_limit-1)]
120
- rescue Exception => e
121
- raise e
122
- end
123
- end
124
-
125
- # Use an alternative set of identifiers as the basis of this queue
126
- # e.g. use array of barcodes as basis for google register-object robot
127
- def enqueue_identifiers(identifier_name, identifier_values)
128
- @identifier_name = identifier_name
129
- @identifier_values = identifier_values
130
- end
131
-
132
- # Get the next WorkItem to be processed by the robot for the workflow step
133
- def next_item()
134
- if (@item_count >= @batch_limit )
135
- LyberCore::Log.info "Batch limit of #{@batch_limit} items reached"
136
- return nil
137
- end
138
- if (@error_count >= @error_limit )
139
- LyberCore::Log.info "Error limit of #{@error_limit} items reached"
140
- return nil
141
- end
142
- work_item = LyberCore::Robots::WorkItem.new(self)
143
- if (@druids)
144
- return nil if (@item_count >= @druids.length)
145
- work_item.druid= @druids[@item_count]
146
- elsif (@identifier_values)
147
- return nil if (@item_count >= @identifier_values.length)
148
- work_item.identifier_add(@identifier_name,@identifier_values[@item_count])
149
- else
150
- return nil
151
- end
152
- @item_count += 1
153
- return work_item
154
- end
155
-
156
- def max_errors_reached?
157
- @error_count >= @error_limit
158
- end
159
-
160
- # Output the batch's timings and other statistics to the main log file
161
- def print_stats
162
- @end_time = Time.new
163
- @elapsed_time = @end_time - @start_time
164
- LyberCore::Log.info "Total time: " + @elapsed_time.to_s + "\n"
165
- LyberCore::Log.info "Completed objects: " + self.success_count.to_s + "\n"
166
- LyberCore::Log.info "Errors: " + self.error_count.to_s + "\n"
167
- end
168
-
169
- def print_empty_stats
170
- @end_time = Time.new
171
- @elapsed_time = @end_time - @start_time
172
- LyberCore::Log.info "Total time: " + @elapsed_time.to_s + "\n"
173
- LyberCore::Log.info "Empty queue"
174
- end
175
- end
176
- end
177
- end