lyber-core 0.9.6.2.3 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,174 @@
1
+ require 'daemons'
2
+ require 'logger'
3
+ require 'fileutils'
4
+
5
+ module LyberCore
6
+ module Robots
7
+ class ServiceController < Daemons::ApplicationGroup
8
+ attr_reader :logger
9
+
10
+ def initialize(opts = {})
11
+ if opts[:logger]
12
+ @logger = opts[:logger]
13
+ else
14
+ @logger = Logger.new($stdout)
15
+ @logger.level = opts[:log_level] || Logger::WARN
16
+ end
17
+ @sleep_time = opts[:sleep_time] || (15*60)
18
+ @working_dir = opts[:working_dir] || ENV['ROBOT_ROOT'] || Dir.pwd
19
+ @pid_dir = opts[:pid_dir] || File.join(@working_dir, 'pid')
20
+ @pid_dir = File.expand_path(@pid_dir)
21
+ FileUtils.mkdir(@pid_dir) unless(File.directory? @pid_dir)
22
+ @argv = (opts[:argv] || []).dup
23
+ @logger.debug "Initializing application group."
24
+ @logger.debug "Writing pids to #{@pid_dir}"
25
+ super('robot_service_controller', :dir_mode => :normal, :dir => @pid_dir, :multiple => true, :backtrace => true)
26
+ end
27
+
28
+ def qname(workflow, robot_name)
29
+ [workflow,robot_name].join(':')
30
+ end
31
+
32
+ def start(workflow, robot_name)
33
+ result = false
34
+ app = find_app(workflow, robot_name).first
35
+ process_name = qname(workflow,robot_name)
36
+ if app.nil? or (app.running? == false)
37
+ @logger.info "Starting #{process_name}..."
38
+ with_app_name("#{process_name}") do
39
+ app, message = capture_stdout do
40
+ raw_module_name = workflow.split('WF').first
41
+ module_name = raw_module_name[0].chr.upcase << raw_module_name.slice(1, raw_module_name.size - 1)
42
+ robot_klass = Module.const_get(module_name).const_get(robot_name.split(/-/).collect { |w| w.capitalize }.join(''))
43
+ log_state = marshal_logger(@logger)
44
+ robot_proc = lambda {
45
+ Dir.chdir(@working_dir) do
46
+ begin
47
+ logger = restore_logger(log_state)
48
+ robot = robot_klass.new(:argv => @argv.dup)
49
+ loop {
50
+ case robot.start
51
+ when LyberCore::Robots::SLEEP
52
+ logger.info "SLEEP condition reached in #{process_name}. Sleeping for #{@sleep_time} seconds."
53
+ sleep(@sleep_time)
54
+ when LyberCore::Robots::HALT
55
+ logger.error "HALT condition reached in #{process_name}. Shutting down."
56
+ break
57
+ end
58
+ }
59
+ ensure
60
+ logger.info "Shutting down."
61
+ end
62
+ end
63
+ }
64
+ new_app = self.new_application({:mode => :proc, :proc => robot_proc, :dir_mode => :normal, :log_output => true, :log_dir => @pid_dir})
65
+ new_app.start
66
+ new_app
67
+ end
68
+ end
69
+
70
+ if app.running?
71
+ @logger.info "#{process_name} [#{app.pid.pid}] started."
72
+ result = true
73
+ else
74
+ @logger.error "Unable to start #{process_name}"
75
+ end
76
+ else app.running?
77
+ @logger.warn "Robot #{process_name} [#{app.pid.pid}] is already running"
78
+ end
79
+ return result
80
+ end
81
+
82
+ def stop(workflow, robot_name)
83
+ apps = find_app(workflow, robot_name)
84
+ process_name = qname(workflow,robot_name)
85
+ result = false
86
+ if apps.empty?
87
+ @logger.info "Robot #{process_name} not found"
88
+ else
89
+ apps.each do |app|
90
+ if app.running?
91
+ @logger.info "Shutting down #{process_name} [#{app.pid.pid}]..."
92
+ result, message = capture_stdout { app.stop }
93
+ if app.running?
94
+ @logger.error "Unable to stop #{process_name} [#{app.pid.pid}]."
95
+ else
96
+ @logger.info "#{process_name} [#{app.pid.pid}] shut down."
97
+ result = true
98
+ end
99
+ else
100
+ @logger.warn "Robot #{process_name} [#{app.pid.pid}] is not running but pidfile exists"
101
+ app.zap!
102
+ end
103
+ end
104
+ end
105
+ result
106
+ end
107
+
108
+ def status(workflow, robot_name)
109
+ apps = find_app(workflow, robot_name)
110
+ apps.collect do |app|
111
+ { :pid => app.pid.pid, :status => app.running? ? :running : :stopped }
112
+ end
113
+ end
114
+
115
+ def status_message(workflow, robot_name)
116
+ app_status = status(workflow, robot_name)
117
+ process_name = qname(workflow,robot_name)
118
+ if app_status.empty?
119
+ ["Robot #{process_name} not found"]
120
+ else
121
+ app_status.collect do |s|
122
+ case s[:status]
123
+ when :running
124
+ "Robot #{process_name} [#{s[:pid]}] is running"
125
+ when :stopped
126
+ "Robot #{process_name} [#{s[:pid]}] is not running but pidfile exists"
127
+ end
128
+ end
129
+ end
130
+ end
131
+
132
+ # private
133
+ def with_app_name(name)
134
+ old_name, @app_name = @app_name, name
135
+ begin
136
+ return yield
137
+ ensure
138
+ @app_name = old_name
139
+ end
140
+ end
141
+
142
+ def capture_stdout
143
+ old_io = $stdout
144
+ begin
145
+ new_io = StringIO.new('')
146
+ $stdout = new_io
147
+ result = yield
148
+ @logger.debug new_io.string
149
+ return result, new_io.string
150
+ ensure
151
+ $stdout = old_io
152
+ end
153
+ end
154
+
155
+ def find_app(workflow, robot_name)
156
+ with_app_name(qname(workflow,robot_name)) {
157
+ self.find_applications_by_pidfiles(@pid_dir)
158
+ }
159
+ end
160
+
161
+ def marshal_logger(l)
162
+ log_device = l.instance_variable_get('@logdev')
163
+ { :dev => log_device, :file => log_device.filename, :level => l.level }
164
+ end
165
+
166
+ def restore_logger(params)
167
+ result = Logger.new(params[:file] || params[:log_device])
168
+ result.level = params[:level]
169
+ return result
170
+ end
171
+
172
+ end
173
+ end
174
+ end
@@ -1,6 +1,7 @@
1
1
  require 'dor_service'
2
- require "roxml_models/identity_metadata/identity_metadata"
3
- require "roxml_models/identity_metadata/dublin_core"
2
+ require "xml_models/identity_metadata/identity_metadata"
3
+ require "xml_models/identity_metadata/dublin_core"
4
+
4
5
 
5
6
  # Represents a single object being processed as part of a workflow queue
6
7
  module LyberCore
@@ -32,9 +33,9 @@ module LyberCore
32
33
  #save the IdentityMetadata object to identityMetadata datastream
33
34
  def identity_metadata_save
34
35
  unless DorService.get_datastream(@druid, 'identityMetadata')
35
- DorService.add_datastream(@druid, 'identityMetadata', 'identityMetadata', self.identity_metadata.to_xml.to_xml)
36
+ DorService.add_datastream(@druid, 'identityMetadata', 'identityMetadata', self.identity_metadata.to_xml)
36
37
  else
37
- DorService.update_datastream(@druid, 'identityMetadata', self.identity_metadata.to_xml.to_xml, content_type='application/xml', versionable = false)
38
+ DorService.update_datastream(@druid, 'identityMetadata', self.identity_metadata.to_xml, content_type='application/xml', versionable = false)
38
39
  end #unless
39
40
  end #identity_metadata_save
40
41
 
@@ -73,31 +74,39 @@ module LyberCore
73
74
  return pairs[0] if (pairs.size > 0)
74
75
  end
75
76
 
76
- # Record the successful outcome of the workstep operation for this workitem
77
- def set_success
78
- @work_queue.success_count += 1
77
+ # Record a non-error status for the workstep operation
78
+ def set_status(status)
79
+ @elapsed_time = Time.new - @start_time
79
80
  @end_time = Time.new
80
81
  @elapsed_time = @end_time - @start_time
81
- LyberCore::Log.info("#{item_id} completed in #{@elapsed_time} seconds")
82
+ LyberCore::Log.info("#{item_id} #{status} in #{@elapsed_time} seconds")
82
83
  if (@druid)
83
- Dor::WorkflowService.update_workflow_status(@work_queue.workflow.repository, @druid, @work_queue.workflow.workflow_id, @work_queue.workflow_step, 'completed', @elapsed_time)
84
+ Dor::WorkflowService.update_workflow_status(@work_queue.workflow.repository, @druid, @work_queue.workflow.workflow_id, @work_queue.workflow_step, status, @elapsed_time)
84
85
  end
85
86
  end
86
87
 
88
+ # Record the successful outcome of the workstep operation for this workitem
89
+ def set_success
90
+ @work_queue.success_count += 1
91
+ self.set_status('completed')
92
+ end
93
+
87
94
  # Record the unsuccessful outcome of the workstep operation for this workitem
88
95
  def set_error(e)
89
96
  @work_queue.error_count += 1
90
97
  @end_time = Time.new
91
98
  @elapsed_time = @end_time - @start_time
92
- LyberCore::Log.error("#{item_id} error - #{e.backtrace}")
93
- # By default puts will output an array with a newline between each item.
99
+ if (e.is_a?(LyberCore::Exceptions::ItemError) )
100
+ item_error = e
101
+ else
102
+ item_error = LyberCore::Exceptions::ItemError.new(@druid, "Item error", e)
103
+ end
104
+ LyberCore::Log.exception(item_error)
94
105
  if (@druid)
95
- DorService.update_workflow_error_status(@work_queue.workflow.repository, @druid, @work_queue.workflow.workflow_id, @work_queue.workflow_step, e.message)
106
+ DorService.update_workflow_error_status(@work_queue.workflow.repository, @druid, @work_queue.workflow.workflow_id, @work_queue.workflow_step, item_error.message)
96
107
  end
97
- # We've caught and processed the error at this point, I don't think we want to raise it again. --bess
98
- # raise e
99
108
  end
100
-
109
+
101
110
  end
102
111
  end
103
112
  end
@@ -88,16 +88,35 @@ module LyberCore
88
88
  @druids = druid_array
89
89
  LyberCore::Log.debug("\n@druids = #{@druids}")
90
90
  end
91
+
92
+ def fully_qualified_prerequisite?
93
+ if(@prerequisite.class == Array)
94
+ fully_qualified = @prerequisite.all? {|p| p =~ /.+:.+:.+/ }
95
+ else
96
+ fully_qualified = (@prerequisite =~ /.+:.+:.+/)
97
+ end
98
+ fully_qualified
99
+ end
91
100
 
92
101
  # Obtain the set of druids to be processed using a database query
93
102
  # to obtain the repository objects that are awaiting this step
94
103
  def enqueue_workstep_waiting()
95
104
  begin
96
105
  LyberCore::Log.debug("\nEnqueing workstep waiting...")
97
- object_list_xml = DorService.get_objects_for_workstep(workflow.repository, workflow.workflow_id, @prerequisite, @workflow_step)
98
- LyberCore::Log.debug("\nobject_list_xml = #{object_list_xml}")
99
- @druids = DlssService.get_some_druids_from_object_list(object_list_xml,self.batch_limit)
100
- LyberCore::Log.debug("\n@druids = #{@druids}")
106
+
107
+ prerequisites = Array(@prerequisite)
108
+ qualified = fully_qualified_prerequisite?
109
+ druid_lists = prerequisites.collect do |prerequisite|
110
+ object_list_xml = qualified ?
111
+ DorService.get_objects_for_qualified_workstep(prerequisite, "#{workflow.repository}:#{workflow.workflow_id}:#{@workflow_step}") :
112
+ DorService.get_objects_for_workstep(workflow.repository, workflow.workflow_id, prerequisite, @workflow_step)
113
+ LyberCore::Log.debug("\nobject_list_xml = #{object_list_xml}")
114
+ druid_list = DlssService.get_all_druids_from_object_list(object_list_xml)
115
+ LyberCore::Log.debug("\n@druids = #{@druids}")
116
+ druid_list
117
+ end
118
+ @druids = druid_lists.inject(druid_lists[0]) { |collector, list| collector & list }
119
+ @druids = @druids[0..(self.batch_limit-1)]
101
120
  rescue Exception => e
102
121
  raise e
103
122
  end
@@ -133,6 +152,10 @@ module LyberCore
133
152
  @item_count += 1
134
153
  return work_item
135
154
  end
155
+
156
+ def max_errors_reached?
157
+ @error_count >= @error_limit
158
+ end
136
159
 
137
160
  # Output the batch's timings and other statistics to the main log file
138
161
  def print_stats
@@ -2,7 +2,7 @@ require 'fileutils'
2
2
  module LyberCore
3
3
  module Robots
4
4
  class Workspace
5
-
5
+
6
6
  attr_reader :workflow_name
7
7
  attr_reader :collection_name
8
8
  attr_reader :workspace_base
@@ -20,7 +20,7 @@ module LyberCore
20
20
  def set_workspace_home
21
21
  begin
22
22
  if not (workspace_home = ENV['WORKSPACE_HOME'] )
23
- workspace_home = WORKSPACE_HOME
23
+ workspace_home = Dor::Config.robots.workspace
24
24
  end
25
25
  rescue NameError => e
26
26
  LyberCore::Log.fatal("WORKSPACE_HOME is undefined. Do you need to set it in your config file?")
@@ -45,7 +45,7 @@ module LyberCore
45
45
  def self.md5_hash_from_content_metadata(content_md)
46
46
  content_md_checksum_hash = {}
47
47
  doc = Nokogiri::XML(content_md)
48
- doc.xpath('/contentMetadata/resource/file').each do |filenode|
48
+ doc.xpath('/contentMetadata/resource[@type="page"]/file').each do |filenode|
49
49
  filename = filenode.attribute('id')
50
50
  if (filename)
51
51
  md5_element = filenode.xpath('checksum[@type="MD5"]').first
@@ -3,21 +3,18 @@ require 'systemu'
3
3
 
4
4
  # File Utilities for use in transferring filesystem objects,
5
5
  # decrypting a file, unpacking a targz archive, and validating checksums
6
- # Author:: rnanders@stanford.edu
6
+ # @author rnanders@stanford.edu
7
7
  module LyberCore
8
8
  module Utils
9
9
  class FileUtilities
10
10
 
11
11
 
12
- # Executes a system command in a subprocess
13
- #
14
- # = Inputs:
15
- # * command = the command to be executed
16
- #
17
- # = Return value:
18
- # * The method will return stdout from the command if execution was successful.
19
- # * The method will raise an exception if if execution fails
12
+ # Executes a system command in a subprocess.
13
+ # The method will return stdout from the command if execution was successful.
14
+ # The method will raise an exception if if execution fails.
20
15
  # The exception's message will contain the explaination of the failure.
16
+ # @param [String] command the command to be executed
17
+ # @return [String] stdout from the command if execution was successful
21
18
  def FileUtilities.execute(command)
22
19
  status, stdout, stderr = systemu(command)
23
20
  if (status.exitstatus != 0)
@@ -25,7 +22,10 @@ module LyberCore
25
22
  end
26
23
  return stdout
27
24
  rescue
28
- raise "Command failed to execute: #{command}"
25
+ msg = "Command failed to execute: [#{command}] caused by <STDERR = #{stderr.split($/).join('; ')}>"
26
+ msg << " STDOUT = #{stdout.split($/).join('; ')}" if (stdout && (stdout.length > 0))
27
+ raise msg
28
+
29
29
  end
30
30
 
31
31
  # Generates a dirname for storing or retrieving a file in
@@ -0,0 +1,116 @@
1
+ require 'nokogiri'
2
+
3
+ #<oai_dc:dc xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/"
4
+ #xmlns:srw_dc="info:srw/schema/1/dc-schema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
5
+ #xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://cosimo.stanford.edu/standards/oai_dc/v2/oai_dc.xsd">
6
+ # <dc:title>Life of Abraham Lincoln, sixteenth president of the United States: Containing his early
7
+ #history and political career; together with the speeches, messages, proclamations and other official
8
+ #documents illus. of his eventful administration</dc:title>
9
+ # <dc:creator>Crosby, Frank.</dc:creator>
10
+ # <dc:format>text</dc:format>
11
+ # <dc:language>eng</dc:language>
12
+ # <dc:subject>E457 .C94</dc:subject>
13
+ # <dc:identifier>lccn:11030686</dc:identifier>
14
+ # <dc:identifier>callseq:1</dc:identifier>
15
+ # <dc:identifier>shelfseq:973.7111 .L731CR</dc:identifier>
16
+ # <dc:identifier>catkey:1206382</dc:identifier>
17
+ # <dc:identifier>barcode:36105005459602</dc:identifier>
18
+ # <dc:identifier>uuid:ddcf5f1a-0331-4345-beca-e66f7db276eb</dc:identifier>
19
+ # <dc:identifier>google:STANFORD_36105005459602</dc:identifier>
20
+ # <dc:identifier>druid:ng786kn0371</dc:identifier>
21
+ #</oai_dc:dc>
22
+
23
+ class DublinCore
24
+
25
+
26
+ attr_accessor :xml
27
+
28
+ attr_accessor :title
29
+ attr_accessor :creator
30
+ attr_accessor :subject
31
+ attr_accessor :description
32
+ attr_accessor :publisher
33
+ attr_accessor :contributor
34
+ attr_accessor :date
35
+ attr_accessor :type
36
+ attr_accessor :format
37
+ attr_accessor :identifier
38
+ attr_accessor :source
39
+ attr_accessor :language
40
+ attr_accessor :relation
41
+ attr_accessor :coverage
42
+ attr_accessor :rights
43
+
44
+
45
+
46
+
47
+ def initialize(xml = nil)
48
+
49
+ @title ||= []
50
+ @creator ||= []
51
+ @subject ||= []
52
+ @description ||= []
53
+ @publisher ||= []
54
+ @contributor ||= []
55
+ @date ||= []
56
+ @type ||= []
57
+ @format ||= []
58
+ @identifier ||= []
59
+ @source ||= []
60
+ @language ||= []
61
+ @relation ||= []
62
+ @coverage ||= []
63
+ @rights ||= []
64
+
65
+ # if the new is given an xml string, store that in the xml attr_accessor and don't rebuild.
66
+ # this will allow users to access the raw unprocessed XML string via @xml.
67
+ if xml.nil?
68
+ build_xml()
69
+ else
70
+ @xml = xml
71
+ end
72
+
73
+ end #initalize
74
+
75
+
76
+ def build_xml()
77
+ builder = Nokogiri::XML::Builder.new do |xml|
78
+ xml.dc('xmlns:dc' => 'http://purl.org/dc/elements/1.1/', 'xmlns:srw_dc' => 'info:srw/schema/1/dc-schema', "xmlns:xsi" => "http://www.w3.org/2001/XMLSchema-instance" ) {
79
+ xml.parent.namespace = xml.parent.add_namespace_definition('oai_dc','http://www.openarchives.org/OAI/2.0/oai_dc/')
80
+ xml.parent.add_namespace_definition("xsi:schemaLocation", "http://www.openarchives.org/OAI/2.0/oai_dc/ http://cosimo.stanford.edu/standards/oai_dc/v2/oai_dc.xsd")
81
+ self.instance_variables.each do |var|
82
+ unless var == "@xml"
83
+ self.instance_variable_get(var).each { |v| xml['dc'].send("#{var.gsub('@','')}_", v) }
84
+ end #unless
85
+ end #instance_Variables.each
86
+ }
87
+ end
88
+ @xml = builder.to_xml
89
+ end
90
+
91
+
92
+
93
+ # This method rebuilds the xml attr_accesor and returns it as a string.
94
+ def to_xml
95
+ build_xml
96
+ return self.xml
97
+ end #to_xml
98
+
99
+
100
+ # This method takes DC XML as a string, and maps the root child node to their proper attr_accesor.
101
+
102
+ def self.from_xml(xml="")
103
+ dc = DublinCore.new(xml)
104
+ doc = Nokogiri::XML(xml)
105
+ children = doc.root.element_children
106
+ children.each do |c|
107
+ if dc.instance_variables.include?("@#{c.name}")
108
+ dc.send("#{c.name}").send("<<", c.text.strip)
109
+ end #if
110
+ end #each
111
+ return dc
112
+ end #from_xml
113
+
114
+
115
+
116
+ end #dublin_core