lyber-core 1.3.0 → 3.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/LICENSE +1 -1
- data/README.md +81 -0
- data/lib/lyber_core.rb +1 -15
- data/lib/lyber_core/destroyer.rb +2 -9
- data/lib/lyber_core/log.rb +26 -30
- data/lib/lyber_core/robot.rb +67 -0
- metadata +131 -370
- data/README.rdoc +0 -76
- data/lib/dlss_service.rb +0 -81
- data/lib/dor_service.rb +0 -588
- data/lib/lyber_core/config.rb +0 -13
- data/lib/lyber_core/connection.rb +0 -130
- data/lib/lyber_core/exceptions/chained_error.rb +0 -21
- data/lib/lyber_core/exceptions/empty_queue.rb +0 -9
- data/lib/lyber_core/exceptions/fatal_error.rb +0 -10
- data/lib/lyber_core/exceptions/item_error.rb +0 -19
- data/lib/lyber_core/exceptions/service_error.rb +0 -10
- data/lib/lyber_core/robots/robot.rb +0 -333
- data/lib/lyber_core/robots/service_controller.rb +0 -174
- data/lib/lyber_core/robots/work_item.rb +0 -112
- data/lib/lyber_core/robots/work_queue.rb +0 -177
- data/lib/lyber_core/robots/workflow.rb +0 -104
- data/lib/lyber_core/robots/workspace.rb +0 -77
- data/lib/lyber_core/utils.rb +0 -4
- data/lib/lyber_core/utils/bagit_bag.rb +0 -100
- data/lib/lyber_core/utils/checksum_validate.rb +0 -65
- data/lib/lyber_core/utils/file_utilities.rb +0 -168
- data/lib/xml_models/identity_metadata/dublin_core.rb +0 -116
- data/lib/xml_models/identity_metadata/identity_metadata.rb +0 -264
@@ -1,174 +0,0 @@
|
|
1
|
-
require 'daemons'
|
2
|
-
require 'logger'
|
3
|
-
require 'fileutils'
|
4
|
-
|
5
|
-
module LyberCore
|
6
|
-
module Robots
|
7
|
-
class ServiceController < Daemons::ApplicationGroup
|
8
|
-
attr_reader :logger
|
9
|
-
|
10
|
-
def initialize(opts = {})
|
11
|
-
if opts[:logger]
|
12
|
-
@logger = opts[:logger]
|
13
|
-
else
|
14
|
-
@logger = Logger.new($stdout)
|
15
|
-
@logger.level = opts[:log_level] || Logger::WARN
|
16
|
-
end
|
17
|
-
@sleep_time = opts[:sleep_time] || (15*60)
|
18
|
-
@working_dir = opts[:working_dir] || ENV['ROBOT_ROOT'] || Dir.pwd
|
19
|
-
@pid_dir = opts[:pid_dir] || File.join(@working_dir, 'pid')
|
20
|
-
@pid_dir = File.expand_path(@pid_dir)
|
21
|
-
FileUtils.mkdir(@pid_dir) unless(File.directory? @pid_dir)
|
22
|
-
@argv = (opts[:argv] || []).dup
|
23
|
-
@logger.debug "Initializing application group."
|
24
|
-
@logger.debug "Writing pids to #{@pid_dir}"
|
25
|
-
super('robot_service_controller', :dir_mode => :normal, :dir => @pid_dir, :multiple => true, :backtrace => true)
|
26
|
-
end
|
27
|
-
|
28
|
-
def qname(workflow, robot_name)
|
29
|
-
[workflow,robot_name].join(':')
|
30
|
-
end
|
31
|
-
|
32
|
-
def start(workflow, robot_name)
|
33
|
-
result = false
|
34
|
-
app = find_app(workflow, robot_name).first
|
35
|
-
process_name = qname(workflow,robot_name)
|
36
|
-
if app.nil? or (app.running? == false)
|
37
|
-
@logger.info "Starting #{process_name}..."
|
38
|
-
with_app_name("#{process_name}") do
|
39
|
-
app, message = capture_stdout do
|
40
|
-
raw_module_name = workflow.split('WF').first
|
41
|
-
module_name = raw_module_name[0].chr.upcase << raw_module_name.slice(1, raw_module_name.size - 1)
|
42
|
-
robot_klass = Module.const_get(module_name).const_get(robot_name.split(/-/).collect { |w| w.capitalize }.join(''))
|
43
|
-
log_state = marshal_logger(@logger)
|
44
|
-
robot_proc = lambda {
|
45
|
-
Dir.chdir(@working_dir) do
|
46
|
-
begin
|
47
|
-
logger = restore_logger(log_state)
|
48
|
-
robot = robot_klass.new(:argv => @argv.dup)
|
49
|
-
loop {
|
50
|
-
case robot.start
|
51
|
-
when LyberCore::Robots::SLEEP
|
52
|
-
logger.info "SLEEP condition reached in #{process_name}. Sleeping for #{@sleep_time} seconds."
|
53
|
-
sleep(@sleep_time)
|
54
|
-
when LyberCore::Robots::HALT
|
55
|
-
logger.error "HALT condition reached in #{process_name}. Shutting down."
|
56
|
-
break
|
57
|
-
end
|
58
|
-
}
|
59
|
-
ensure
|
60
|
-
logger.info "Shutting down."
|
61
|
-
end
|
62
|
-
end
|
63
|
-
}
|
64
|
-
new_app = self.new_application({:mode => :proc, :proc => robot_proc, :dir_mode => :normal, :log_output => true, :log_dir => @pid_dir})
|
65
|
-
new_app.start
|
66
|
-
new_app
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
if app.running?
|
71
|
-
@logger.info "#{process_name} [#{app.pid.pid}] started."
|
72
|
-
result = true
|
73
|
-
else
|
74
|
-
@logger.error "Unable to start #{process_name}"
|
75
|
-
end
|
76
|
-
else app.running?
|
77
|
-
@logger.warn "Robot #{process_name} [#{app.pid.pid}] is already running"
|
78
|
-
end
|
79
|
-
return result
|
80
|
-
end
|
81
|
-
|
82
|
-
def stop(workflow, robot_name)
|
83
|
-
apps = find_app(workflow, robot_name)
|
84
|
-
process_name = qname(workflow,robot_name)
|
85
|
-
result = false
|
86
|
-
if apps.empty?
|
87
|
-
@logger.info "Robot #{process_name} not found"
|
88
|
-
else
|
89
|
-
apps.each do |app|
|
90
|
-
if app.running?
|
91
|
-
@logger.info "Shutting down #{process_name} [#{app.pid.pid}]..."
|
92
|
-
result, message = capture_stdout { app.stop }
|
93
|
-
if app.running?
|
94
|
-
@logger.error "Unable to stop #{process_name} [#{app.pid.pid}]."
|
95
|
-
else
|
96
|
-
@logger.info "#{process_name} [#{app.pid.pid}] shut down."
|
97
|
-
result = true
|
98
|
-
end
|
99
|
-
else
|
100
|
-
@logger.warn "Robot #{process_name} [#{app.pid.pid}] is not running but pidfile exists"
|
101
|
-
app.zap!
|
102
|
-
end
|
103
|
-
end
|
104
|
-
end
|
105
|
-
result
|
106
|
-
end
|
107
|
-
|
108
|
-
def status(workflow, robot_name)
|
109
|
-
apps = find_app(workflow, robot_name)
|
110
|
-
apps.collect do |app|
|
111
|
-
{ :pid => app.pid.pid, :status => app.running? ? :running : :stopped }
|
112
|
-
end
|
113
|
-
end
|
114
|
-
|
115
|
-
def status_message(workflow, robot_name)
|
116
|
-
app_status = status(workflow, robot_name)
|
117
|
-
process_name = qname(workflow,robot_name)
|
118
|
-
if app_status.empty?
|
119
|
-
["Robot #{process_name} not found"]
|
120
|
-
else
|
121
|
-
app_status.collect do |s|
|
122
|
-
case s[:status]
|
123
|
-
when :running
|
124
|
-
"Robot #{process_name} [#{s[:pid]}] is running"
|
125
|
-
when :stopped
|
126
|
-
"Robot #{process_name} [#{s[:pid]}] is not running but pidfile exists"
|
127
|
-
end
|
128
|
-
end
|
129
|
-
end
|
130
|
-
end
|
131
|
-
|
132
|
-
# private
|
133
|
-
def with_app_name(name)
|
134
|
-
old_name, @app_name = @app_name, name
|
135
|
-
begin
|
136
|
-
return yield
|
137
|
-
ensure
|
138
|
-
@app_name = old_name
|
139
|
-
end
|
140
|
-
end
|
141
|
-
|
142
|
-
def capture_stdout
|
143
|
-
old_io = $stdout
|
144
|
-
begin
|
145
|
-
new_io = StringIO.new('')
|
146
|
-
$stdout = new_io
|
147
|
-
result = yield
|
148
|
-
@logger.debug new_io.string
|
149
|
-
return result, new_io.string
|
150
|
-
ensure
|
151
|
-
$stdout = old_io
|
152
|
-
end
|
153
|
-
end
|
154
|
-
|
155
|
-
def find_app(workflow, robot_name)
|
156
|
-
with_app_name(qname(workflow,robot_name)) {
|
157
|
-
self.find_applications_by_pidfiles(@pid_dir)
|
158
|
-
}
|
159
|
-
end
|
160
|
-
|
161
|
-
def marshal_logger(l)
|
162
|
-
log_device = l.instance_variable_get('@logdev')
|
163
|
-
{ :dev => log_device, :file => log_device.filename, :level => l.level }
|
164
|
-
end
|
165
|
-
|
166
|
-
def restore_logger(params)
|
167
|
-
result = Logger.new(params[:file] || params[:log_device])
|
168
|
-
result.level = params[:level]
|
169
|
-
return result
|
170
|
-
end
|
171
|
-
|
172
|
-
end
|
173
|
-
end
|
174
|
-
end
|
@@ -1,112 +0,0 @@
|
|
1
|
-
require 'dor_service'
|
2
|
-
require "xml_models/identity_metadata/identity_metadata"
|
3
|
-
require "xml_models/identity_metadata/dublin_core"
|
4
|
-
|
5
|
-
|
6
|
-
# Represents a single object being processed as part of a workflow queue
|
7
|
-
module LyberCore
|
8
|
-
module Robots
|
9
|
-
class WorkItem
|
10
|
-
|
11
|
-
# The queue that this workitem is a member of
|
12
|
-
attr_reader :work_queue
|
13
|
-
# The primary id for the object being processed
|
14
|
-
attr_accessor :druid
|
15
|
-
# An object used to hold unmarshalled XML from the identityMetadata datastream
|
16
|
-
attr_accessor :identity_metadata
|
17
|
-
# Timings for this workitem's processing
|
18
|
-
attr_reader :start_time
|
19
|
-
attr_reader :end_time
|
20
|
-
attr_reader :elapsed_time
|
21
|
-
|
22
|
-
# Create a new WorkItem object, save a pointer to the parent WorkQueue, and start the timer
|
23
|
-
def initialize(work_queue)
|
24
|
-
@work_queue = work_queue
|
25
|
-
@start_time = Time.new
|
26
|
-
end
|
27
|
-
|
28
|
-
# Inject an IdentityMetadata object (currently used for unit testing only)
|
29
|
-
def identity_metadata=(identity_metadata)
|
30
|
-
@identity_metadata = identity_metadata
|
31
|
-
end
|
32
|
-
|
33
|
-
#save the IdentityMetadata object to identityMetadata datastream
|
34
|
-
def identity_metadata_save
|
35
|
-
unless DorService.get_datastream(@druid, 'identityMetadata')
|
36
|
-
DorService.add_datastream(@druid, 'identityMetadata', 'identityMetadata', self.identity_metadata.to_xml)
|
37
|
-
else
|
38
|
-
DorService.update_datastream(@druid, 'identityMetadata', self.identity_metadata.to_xml, content_type='application/xml', versionable = false)
|
39
|
-
end #unless
|
40
|
-
end #identity_metadata_save
|
41
|
-
|
42
|
-
# Return the IdentityMetadata object bound to identityMetadata datastream XML
|
43
|
-
def identity_metadata
|
44
|
-
if (@identity_metadata == nil)
|
45
|
-
if (@druid == nil)
|
46
|
-
@identity_metadata = IdentityMetadata.new
|
47
|
-
else
|
48
|
-
idmd_str = DorService.get_datastream(@druid, 'identityMetadata')
|
49
|
-
@identity_metadata = IdentityMetadata.from_xml(idmd_str)
|
50
|
-
end
|
51
|
-
end
|
52
|
-
return @identity_metadata
|
53
|
-
end
|
54
|
-
|
55
|
-
# Return the identifier value for the specified identier name
|
56
|
-
def identifier(key)
|
57
|
-
return self.identity_metadata.get_identifier_value(key)
|
58
|
-
end
|
59
|
-
|
60
|
-
# Add a new name,value pair to the set of identifiers
|
61
|
-
def identifier_add(key, value)
|
62
|
-
self.identity_metadata.add_identifier(key, value)
|
63
|
-
end
|
64
|
-
|
65
|
-
# Return an array of strings where each entry consists of name:value
|
66
|
-
def id_pairs
|
67
|
-
self.identity_metadata.get_id_pairs
|
68
|
-
end
|
69
|
-
|
70
|
-
# Return the druid for the work item if it exists, else the first identifier value
|
71
|
-
def item_id
|
72
|
-
return @druid if @druid
|
73
|
-
pairs = self.identity_metadata.get_id_pairs
|
74
|
-
return pairs[0] if (pairs.size > 0)
|
75
|
-
end
|
76
|
-
|
77
|
-
# Record a non-error status for the workstep operation
|
78
|
-
def set_status(status)
|
79
|
-
@elapsed_time = Time.new - @start_time
|
80
|
-
@end_time = Time.new
|
81
|
-
@elapsed_time = @end_time - @start_time
|
82
|
-
LyberCore::Log.info("#{item_id} #{status} in #{@elapsed_time} seconds")
|
83
|
-
if (@druid)
|
84
|
-
Dor::WorkflowService.update_workflow_status(@work_queue.workflow.repository, @druid, @work_queue.workflow.workflow_id, @work_queue.workflow_step, status, @elapsed_time)
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
# Record the successful outcome of the workstep operation for this workitem
|
89
|
-
def set_success
|
90
|
-
@work_queue.success_count += 1
|
91
|
-
self.set_status('completed')
|
92
|
-
end
|
93
|
-
|
94
|
-
# Record the unsuccessful outcome of the workstep operation for this workitem
|
95
|
-
def set_error(e)
|
96
|
-
@work_queue.error_count += 1
|
97
|
-
@end_time = Time.new
|
98
|
-
@elapsed_time = @end_time - @start_time
|
99
|
-
if (e.is_a?(LyberCore::Exceptions::ItemError) )
|
100
|
-
item_error = e
|
101
|
-
else
|
102
|
-
item_error = LyberCore::Exceptions::ItemError.new(@druid, "Item error", e)
|
103
|
-
end
|
104
|
-
LyberCore::Log.exception(item_error)
|
105
|
-
if (@druid)
|
106
|
-
DorService.update_workflow_error_status(@work_queue.workflow.repository, @druid, @work_queue.workflow.workflow_id, @work_queue.workflow_step, item_error.message)
|
107
|
-
end
|
108
|
-
end
|
109
|
-
|
110
|
-
end
|
111
|
-
end
|
112
|
-
end
|
@@ -1,177 +0,0 @@
|
|
1
|
-
require 'dor_service'
|
2
|
-
require 'dlss_service'
|
3
|
-
require 'yaml'
|
4
|
-
|
5
|
-
module LyberCore
|
6
|
-
module Robots
|
7
|
-
# Represents a set of workitem objects to be processed by a given step of a workflow
|
8
|
-
class WorkQueue
|
9
|
-
|
10
|
-
# The workflow that this queue is a part of
|
11
|
-
attr_reader :workflow
|
12
|
-
# The step in the workflow that is being processed against this queue
|
13
|
-
attr_reader :workflow_step
|
14
|
-
# The workflow step that should have already been completed for the workitem object
|
15
|
-
attr_reader :prerequisite
|
16
|
-
# The maximum number of workitem objects to process in one run of a robot
|
17
|
-
attr_reader :batch_limit
|
18
|
-
# The maximum number of errors to allow before terminating the batch run
|
19
|
-
attr_reader :error_limit
|
20
|
-
# The array of primary identifiers for the workitem objects to be processed
|
21
|
-
attr_reader :druids
|
22
|
-
# An alternative identitier to be used when druids are not yet available (e.g at registration)
|
23
|
-
attr_reader :identifier_name
|
24
|
-
attr_reader :identifier_values
|
25
|
-
# The tally of how many items have been processed
|
26
|
-
attr_reader :item_count
|
27
|
-
attr_accessor :success_count
|
28
|
-
attr_accessor :error_count
|
29
|
-
# The timings for the batch run
|
30
|
-
attr_reader :start_time
|
31
|
-
attr_reader :end_time
|
32
|
-
attr :elapsed_time
|
33
|
-
|
34
|
-
attr_reader :config_file
|
35
|
-
|
36
|
-
|
37
|
-
# Create a new WorkQueue object for the specified step,
|
38
|
-
# save a pointer to the parent WorkFlow,
|
39
|
-
# start the timer,
|
40
|
-
# read in the configuration information for the work step
|
41
|
-
def initialize(workflow=nil, workflow_step=nil)
|
42
|
-
LyberCore::Log.debug("Initializing work queue with workflow #{workflow} and workflow_step #{workflow_step}")
|
43
|
-
@start_time = Time.new
|
44
|
-
LyberCore::Log.info("Starting #{workflow_step} at #{@start_time}")
|
45
|
-
@workflow = workflow
|
46
|
-
@workflow_step = workflow_step
|
47
|
-
@item_count = 0
|
48
|
-
@success_count = 0
|
49
|
-
@error_count = 0
|
50
|
-
# nil arguments should only be used if in test mode
|
51
|
-
if (workflow.nil? || workflow_step.nil?)
|
52
|
-
@batch_limit = 2
|
53
|
-
@error_limit = 1
|
54
|
-
return
|
55
|
-
end
|
56
|
-
|
57
|
-
self.process_config_file
|
58
|
-
|
59
|
-
end
|
60
|
-
|
61
|
-
def process_config_file
|
62
|
-
LyberCore::Log.debug("Processing config file ... ")
|
63
|
-
LyberCore::Log.debug("@workflow.workflow_config_dir = #{@workflow.workflow_config_dir}")
|
64
|
-
|
65
|
-
@config_file = File.join(@workflow.workflow_config_dir, 'process-config.yaml')
|
66
|
-
LyberCore::Log.debug("I'm opening the config file at #{@config_file}")
|
67
|
-
|
68
|
-
# Does the file exist?
|
69
|
-
raise "Can't open process-config file #{@config_file}" unless File.file? @config_file
|
70
|
-
|
71
|
-
process_config = YAML.load_file(config_file)
|
72
|
-
LyberCore::Log.debug("process_config: #{process_config.inspect}")
|
73
|
-
|
74
|
-
@prerequisite = process_config[@workflow_step]["prerequisite"]
|
75
|
-
LyberCore::Log.debug("@prerequisite: #{@prerequisite}")
|
76
|
-
|
77
|
-
@batch_limit = process_config[@workflow_step]['batch_limit']
|
78
|
-
LyberCore::Log.debug("@batch_limit: #{@batch_limit}")
|
79
|
-
|
80
|
-
@error_limit = process_config[@workflow_step]['error_limit']
|
81
|
-
LyberCore::Log.debug("@error_limit: #{@error_limit}")
|
82
|
-
|
83
|
-
end
|
84
|
-
|
85
|
-
# Explicitly specify a set of druids to be processed by the workflow step
|
86
|
-
def enqueue_druids(druid_array)
|
87
|
-
LyberCore::Log.debug("\nEnqueing an array of druids...")
|
88
|
-
@druids = druid_array
|
89
|
-
LyberCore::Log.debug("\n@druids = #{@druids}")
|
90
|
-
end
|
91
|
-
|
92
|
-
def fully_qualified_prerequisite?
|
93
|
-
if(@prerequisite.class == Array)
|
94
|
-
fully_qualified = @prerequisite.all? {|p| p =~ /.+:.+:.+/ }
|
95
|
-
else
|
96
|
-
fully_qualified = (@prerequisite =~ /.+:.+:.+/)
|
97
|
-
end
|
98
|
-
fully_qualified
|
99
|
-
end
|
100
|
-
|
101
|
-
# Obtain the set of druids to be processed using a database query
|
102
|
-
# to obtain the repository objects that are awaiting this step
|
103
|
-
def enqueue_workstep_waiting()
|
104
|
-
begin
|
105
|
-
LyberCore::Log.debug("\nEnqueing workstep waiting...")
|
106
|
-
|
107
|
-
prerequisites = Array(@prerequisite)
|
108
|
-
qualified = fully_qualified_prerequisite?
|
109
|
-
druid_lists = prerequisites.collect do |prerequisite|
|
110
|
-
object_list_xml = qualified ?
|
111
|
-
DorService.get_objects_for_qualified_workstep(prerequisite, "#{workflow.repository}:#{workflow.workflow_id}:#{@workflow_step}") :
|
112
|
-
DorService.get_objects_for_workstep(workflow.repository, workflow.workflow_id, prerequisite, @workflow_step)
|
113
|
-
LyberCore::Log.debug("\nobject_list_xml = #{object_list_xml}")
|
114
|
-
druid_list = DlssService.get_all_druids_from_object_list(object_list_xml)
|
115
|
-
LyberCore::Log.debug("\n@druids = #{@druids}")
|
116
|
-
druid_list
|
117
|
-
end
|
118
|
-
@druids = druid_lists.inject(druid_lists[0]) { |collector, list| collector & list }
|
119
|
-
@druids = @druids[0..(self.batch_limit-1)]
|
120
|
-
rescue Exception => e
|
121
|
-
raise e
|
122
|
-
end
|
123
|
-
end
|
124
|
-
|
125
|
-
# Use an alternative set of identifiers as the basis of this queue
|
126
|
-
# e.g. use array of barcodes as basis for google register-object robot
|
127
|
-
def enqueue_identifiers(identifier_name, identifier_values)
|
128
|
-
@identifier_name = identifier_name
|
129
|
-
@identifier_values = identifier_values
|
130
|
-
end
|
131
|
-
|
132
|
-
# Get the next WorkItem to be processed by the robot for the workflow step
|
133
|
-
def next_item()
|
134
|
-
if (@item_count >= @batch_limit )
|
135
|
-
LyberCore::Log.info "Batch limit of #{@batch_limit} items reached"
|
136
|
-
return nil
|
137
|
-
end
|
138
|
-
if (@error_count >= @error_limit )
|
139
|
-
LyberCore::Log.info "Error limit of #{@error_limit} items reached"
|
140
|
-
return nil
|
141
|
-
end
|
142
|
-
work_item = LyberCore::Robots::WorkItem.new(self)
|
143
|
-
if (@druids)
|
144
|
-
return nil if (@item_count >= @druids.length)
|
145
|
-
work_item.druid= @druids[@item_count]
|
146
|
-
elsif (@identifier_values)
|
147
|
-
return nil if (@item_count >= @identifier_values.length)
|
148
|
-
work_item.identifier_add(@identifier_name,@identifier_values[@item_count])
|
149
|
-
else
|
150
|
-
return nil
|
151
|
-
end
|
152
|
-
@item_count += 1
|
153
|
-
return work_item
|
154
|
-
end
|
155
|
-
|
156
|
-
def max_errors_reached?
|
157
|
-
@error_count >= @error_limit
|
158
|
-
end
|
159
|
-
|
160
|
-
# Output the batch's timings and other statistics to the main log file
|
161
|
-
def print_stats
|
162
|
-
@end_time = Time.new
|
163
|
-
@elapsed_time = @end_time - @start_time
|
164
|
-
LyberCore::Log.info "Total time: " + @elapsed_time.to_s + "\n"
|
165
|
-
LyberCore::Log.info "Completed objects: " + self.success_count.to_s + "\n"
|
166
|
-
LyberCore::Log.info "Errors: " + self.error_count.to_s + "\n"
|
167
|
-
end
|
168
|
-
|
169
|
-
def print_empty_stats
|
170
|
-
@end_time = Time.new
|
171
|
-
@elapsed_time = @end_time - @start_time
|
172
|
-
LyberCore::Log.info "Total time: " + @elapsed_time.to_s + "\n"
|
173
|
-
LyberCore::Log.info "Empty queue"
|
174
|
-
end
|
175
|
-
end
|
176
|
-
end
|
177
|
-
end
|