lyber-core 0.9.6.2.3 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,174 @@
1
+ require 'daemons'
2
+ require 'logger'
3
+ require 'fileutils'
4
+
5
+ module LyberCore
6
+ module Robots
7
+ class ServiceController < Daemons::ApplicationGroup
8
+ attr_reader :logger
9
+
10
+ def initialize(opts = {})
11
+ if opts[:logger]
12
+ @logger = opts[:logger]
13
+ else
14
+ @logger = Logger.new($stdout)
15
+ @logger.level = opts[:log_level] || Logger::WARN
16
+ end
17
+ @sleep_time = opts[:sleep_time] || (15*60)
18
+ @working_dir = opts[:working_dir] || ENV['ROBOT_ROOT'] || Dir.pwd
19
+ @pid_dir = opts[:pid_dir] || File.join(@working_dir, 'pid')
20
+ @pid_dir = File.expand_path(@pid_dir)
21
+ FileUtils.mkdir(@pid_dir) unless(File.directory? @pid_dir)
22
+ @argv = (opts[:argv] || []).dup
23
+ @logger.debug "Initializing application group."
24
+ @logger.debug "Writing pids to #{@pid_dir}"
25
+ super('robot_service_controller', :dir_mode => :normal, :dir => @pid_dir, :multiple => true, :backtrace => true)
26
+ end
27
+
28
+ def qname(workflow, robot_name)
29
+ [workflow,robot_name].join(':')
30
+ end
31
+
32
+ def start(workflow, robot_name)
33
+ result = false
34
+ app = find_app(workflow, robot_name).first
35
+ process_name = qname(workflow,robot_name)
36
+ if app.nil? or (app.running? == false)
37
+ @logger.info "Starting #{process_name}..."
38
+ with_app_name("#{process_name}") do
39
+ app, message = capture_stdout do
40
+ raw_module_name = workflow.split('WF').first
41
+ module_name = raw_module_name[0].chr.upcase << raw_module_name.slice(1, raw_module_name.size - 1)
42
+ robot_klass = Module.const_get(module_name).const_get(robot_name.split(/-/).collect { |w| w.capitalize }.join(''))
43
+ log_state = marshal_logger(@logger)
44
+ robot_proc = lambda {
45
+ Dir.chdir(@working_dir) do
46
+ begin
47
+ logger = restore_logger(log_state)
48
+ robot = robot_klass.new(:argv => @argv.dup)
49
+ loop {
50
+ case robot.start
51
+ when LyberCore::Robots::SLEEP
52
+ logger.info "SLEEP condition reached in #{process_name}. Sleeping for #{@sleep_time} seconds."
53
+ sleep(@sleep_time)
54
+ when LyberCore::Robots::HALT
55
+ logger.error "HALT condition reached in #{process_name}. Shutting down."
56
+ break
57
+ end
58
+ }
59
+ ensure
60
+ logger.info "Shutting down."
61
+ end
62
+ end
63
+ }
64
+ new_app = self.new_application({:mode => :proc, :proc => robot_proc, :dir_mode => :normal, :log_output => true, :log_dir => @pid_dir})
65
+ new_app.start
66
+ new_app
67
+ end
68
+ end
69
+
70
+ if app.running?
71
+ @logger.info "#{process_name} [#{app.pid.pid}] started."
72
+ result = true
73
+ else
74
+ @logger.error "Unable to start #{process_name}"
75
+ end
76
+ else app.running?
77
+ @logger.warn "Robot #{process_name} [#{app.pid.pid}] is already running"
78
+ end
79
+ return result
80
+ end
81
+
82
+ def stop(workflow, robot_name)
83
+ apps = find_app(workflow, robot_name)
84
+ process_name = qname(workflow,robot_name)
85
+ result = false
86
+ if apps.empty?
87
+ @logger.info "Robot #{process_name} not found"
88
+ else
89
+ apps.each do |app|
90
+ if app.running?
91
+ @logger.info "Shutting down #{process_name} [#{app.pid.pid}]..."
92
+ result, message = capture_stdout { app.stop }
93
+ if app.running?
94
+ @logger.error "Unable to stop #{process_name} [#{app.pid.pid}]."
95
+ else
96
+ @logger.info "#{process_name} [#{app.pid.pid}] shut down."
97
+ result = true
98
+ end
99
+ else
100
+ @logger.warn "Robot #{process_name} [#{app.pid.pid}] is not running but pidfile exists"
101
+ app.zap!
102
+ end
103
+ end
104
+ end
105
+ result
106
+ end
107
+
108
+ def status(workflow, robot_name)
109
+ apps = find_app(workflow, robot_name)
110
+ apps.collect do |app|
111
+ { :pid => app.pid.pid, :status => app.running? ? :running : :stopped }
112
+ end
113
+ end
114
+
115
+ def status_message(workflow, robot_name)
116
+ app_status = status(workflow, robot_name)
117
+ process_name = qname(workflow,robot_name)
118
+ if app_status.empty?
119
+ ["Robot #{process_name} not found"]
120
+ else
121
+ app_status.collect do |s|
122
+ case s[:status]
123
+ when :running
124
+ "Robot #{process_name} [#{s[:pid]}] is running"
125
+ when :stopped
126
+ "Robot #{process_name} [#{s[:pid]}] is not running but pidfile exists"
127
+ end
128
+ end
129
+ end
130
+ end
131
+
132
+ # private
133
+ def with_app_name(name)
134
+ old_name, @app_name = @app_name, name
135
+ begin
136
+ return yield
137
+ ensure
138
+ @app_name = old_name
139
+ end
140
+ end
141
+
142
+ def capture_stdout
143
+ old_io = $stdout
144
+ begin
145
+ new_io = StringIO.new('')
146
+ $stdout = new_io
147
+ result = yield
148
+ @logger.debug new_io.string
149
+ return result, new_io.string
150
+ ensure
151
+ $stdout = old_io
152
+ end
153
+ end
154
+
155
+ def find_app(workflow, robot_name)
156
+ with_app_name(qname(workflow,robot_name)) {
157
+ self.find_applications_by_pidfiles(@pid_dir)
158
+ }
159
+ end
160
+
161
+ def marshal_logger(l)
162
+ log_device = l.instance_variable_get('@logdev')
163
+ { :dev => log_device, :file => log_device.filename, :level => l.level }
164
+ end
165
+
166
+ def restore_logger(params)
167
+ result = Logger.new(params[:file] || params[:log_device])
168
+ result.level = params[:level]
169
+ return result
170
+ end
171
+
172
+ end
173
+ end
174
+ end
@@ -1,6 +1,7 @@
1
1
  require 'dor_service'
2
- require "roxml_models/identity_metadata/identity_metadata"
3
- require "roxml_models/identity_metadata/dublin_core"
2
+ require "xml_models/identity_metadata/identity_metadata"
3
+ require "xml_models/identity_metadata/dublin_core"
4
+
4
5
 
5
6
  # Represents a single object being processed as part of a workflow queue
6
7
  module LyberCore
@@ -32,9 +33,9 @@ module LyberCore
32
33
  #save the IdentityMetadata object to identityMetadata datastream
33
34
  def identity_metadata_save
34
35
  unless DorService.get_datastream(@druid, 'identityMetadata')
35
- DorService.add_datastream(@druid, 'identityMetadata', 'identityMetadata', self.identity_metadata.to_xml.to_xml)
36
+ DorService.add_datastream(@druid, 'identityMetadata', 'identityMetadata', self.identity_metadata.to_xml)
36
37
  else
37
- DorService.update_datastream(@druid, 'identityMetadata', self.identity_metadata.to_xml.to_xml, content_type='application/xml', versionable = false)
38
+ DorService.update_datastream(@druid, 'identityMetadata', self.identity_metadata.to_xml, content_type='application/xml', versionable = false)
38
39
  end #unless
39
40
  end #identity_metadata_save
40
41
 
@@ -73,31 +74,39 @@ module LyberCore
73
74
  return pairs[0] if (pairs.size > 0)
74
75
  end
75
76
 
76
- # Record the successful outcome of the workstep operation for this workitem
77
- def set_success
78
- @work_queue.success_count += 1
77
+ # Record a non-error status for the workstep operation
78
+ def set_status(status)
79
+ @elapsed_time = Time.new - @start_time
79
80
  @end_time = Time.new
80
81
  @elapsed_time = @end_time - @start_time
81
- LyberCore::Log.info("#{item_id} completed in #{@elapsed_time} seconds")
82
+ LyberCore::Log.info("#{item_id} #{status} in #{@elapsed_time} seconds")
82
83
  if (@druid)
83
- Dor::WorkflowService.update_workflow_status(@work_queue.workflow.repository, @druid, @work_queue.workflow.workflow_id, @work_queue.workflow_step, 'completed', @elapsed_time)
84
+ Dor::WorkflowService.update_workflow_status(@work_queue.workflow.repository, @druid, @work_queue.workflow.workflow_id, @work_queue.workflow_step, status, @elapsed_time)
84
85
  end
85
86
  end
86
87
 
88
+ # Record the successful outcome of the workstep operation for this workitem
89
+ def set_success
90
+ @work_queue.success_count += 1
91
+ self.set_status('completed')
92
+ end
93
+
87
94
  # Record the unsuccessful outcome of the workstep operation for this workitem
88
95
  def set_error(e)
89
96
  @work_queue.error_count += 1
90
97
  @end_time = Time.new
91
98
  @elapsed_time = @end_time - @start_time
92
- LyberCore::Log.error("#{item_id} error - #{e.backtrace}")
93
- # By default puts will output an array with a newline between each item.
99
+ if (e.is_a?(LyberCore::Exceptions::ItemError) )
100
+ item_error = e
101
+ else
102
+ item_error = LyberCore::Exceptions::ItemError.new(@druid, "Item error", e)
103
+ end
104
+ LyberCore::Log.exception(item_error)
94
105
  if (@druid)
95
- DorService.update_workflow_error_status(@work_queue.workflow.repository, @druid, @work_queue.workflow.workflow_id, @work_queue.workflow_step, e.message)
106
+ DorService.update_workflow_error_status(@work_queue.workflow.repository, @druid, @work_queue.workflow.workflow_id, @work_queue.workflow_step, item_error.message)
96
107
  end
97
- # We've caught and processed the error at this point, I don't think we want to raise it again. --bess
98
- # raise e
99
108
  end
100
-
109
+
101
110
  end
102
111
  end
103
112
  end
@@ -88,16 +88,35 @@ module LyberCore
88
88
  @druids = druid_array
89
89
  LyberCore::Log.debug("\n@druids = #{@druids}")
90
90
  end
91
+
92
+ def fully_qualified_prerequisite?
93
+ if(@prerequisite.class == Array)
94
+ fully_qualified = @prerequisite.all? {|p| p =~ /.+:.+:.+/ }
95
+ else
96
+ fully_qualified = (@prerequisite =~ /.+:.+:.+/)
97
+ end
98
+ fully_qualified
99
+ end
91
100
 
92
101
  # Obtain the set of druids to be processed using a database query
93
102
  # to obtain the repository objects that are awaiting this step
94
103
  def enqueue_workstep_waiting()
95
104
  begin
96
105
  LyberCore::Log.debug("\nEnqueing workstep waiting...")
97
- object_list_xml = DorService.get_objects_for_workstep(workflow.repository, workflow.workflow_id, @prerequisite, @workflow_step)
98
- LyberCore::Log.debug("\nobject_list_xml = #{object_list_xml}")
99
- @druids = DlssService.get_some_druids_from_object_list(object_list_xml,self.batch_limit)
100
- LyberCore::Log.debug("\n@druids = #{@druids}")
106
+
107
+ prerequisites = Array(@prerequisite)
108
+ qualified = fully_qualified_prerequisite?
109
+ druid_lists = prerequisites.collect do |prerequisite|
110
+ object_list_xml = qualified ?
111
+ DorService.get_objects_for_qualified_workstep(prerequisite, "#{workflow.repository}:#{workflow.workflow_id}:#{@workflow_step}") :
112
+ DorService.get_objects_for_workstep(workflow.repository, workflow.workflow_id, prerequisite, @workflow_step)
113
+ LyberCore::Log.debug("\nobject_list_xml = #{object_list_xml}")
114
+ druid_list = DlssService.get_all_druids_from_object_list(object_list_xml)
115
+ LyberCore::Log.debug("\n@druids = #{@druids}")
116
+ druid_list
117
+ end
118
+ @druids = druid_lists.inject(druid_lists[0]) { |collector, list| collector & list }
119
+ @druids = @druids[0..(self.batch_limit-1)]
101
120
  rescue Exception => e
102
121
  raise e
103
122
  end
@@ -133,6 +152,10 @@ module LyberCore
133
152
  @item_count += 1
134
153
  return work_item
135
154
  end
155
+
156
+ def max_errors_reached?
157
+ @error_count >= @error_limit
158
+ end
136
159
 
137
160
  # Output the batch's timings and other statistics to the main log file
138
161
  def print_stats
@@ -2,7 +2,7 @@ require 'fileutils'
2
2
  module LyberCore
3
3
  module Robots
4
4
  class Workspace
5
-
5
+
6
6
  attr_reader :workflow_name
7
7
  attr_reader :collection_name
8
8
  attr_reader :workspace_base
@@ -20,7 +20,7 @@ module LyberCore
20
20
  def set_workspace_home
21
21
  begin
22
22
  if not (workspace_home = ENV['WORKSPACE_HOME'] )
23
- workspace_home = WORKSPACE_HOME
23
+ workspace_home = Dor::Config.robots.workspace
24
24
  end
25
25
  rescue NameError => e
26
26
  LyberCore::Log.fatal("WORKSPACE_HOME is undefined. Do you need to set it in your config file?")
@@ -45,7 +45,7 @@ module LyberCore
45
45
  def self.md5_hash_from_content_metadata(content_md)
46
46
  content_md_checksum_hash = {}
47
47
  doc = Nokogiri::XML(content_md)
48
- doc.xpath('/contentMetadata/resource/file').each do |filenode|
48
+ doc.xpath('/contentMetadata/resource[@type="page"]/file').each do |filenode|
49
49
  filename = filenode.attribute('id')
50
50
  if (filename)
51
51
  md5_element = filenode.xpath('checksum[@type="MD5"]').first
@@ -3,21 +3,18 @@ require 'systemu'
3
3
 
4
4
  # File Utilities for use in transferring filesystem objects,
5
5
  # decrypting a file, unpacking a targz archive, and validating checksums
6
- # Author:: rnanders@stanford.edu
6
+ # @author rnanders@stanford.edu
7
7
  module LyberCore
8
8
  module Utils
9
9
  class FileUtilities
10
10
 
11
11
 
12
- # Executes a system command in a subprocess
13
- #
14
- # = Inputs:
15
- # * command = the command to be executed
16
- #
17
- # = Return value:
18
- # * The method will return stdout from the command if execution was successful.
19
- # * The method will raise an exception if if execution fails
12
+ # Executes a system command in a subprocess.
13
+ # The method will return stdout from the command if execution was successful.
14
+ # The method will raise an exception if if execution fails.
20
15
  # The exception's message will contain the explaination of the failure.
16
+ # @param [String] command the command to be executed
17
+ # @return [String] stdout from the command if execution was successful
21
18
  def FileUtilities.execute(command)
22
19
  status, stdout, stderr = systemu(command)
23
20
  if (status.exitstatus != 0)
@@ -25,7 +22,10 @@ module LyberCore
25
22
  end
26
23
  return stdout
27
24
  rescue
28
- raise "Command failed to execute: #{command}"
25
+ msg = "Command failed to execute: [#{command}] caused by <STDERR = #{stderr.split($/).join('; ')}>"
26
+ msg << " STDOUT = #{stdout.split($/).join('; ')}" if (stdout && (stdout.length > 0))
27
+ raise msg
28
+
29
29
  end
30
30
 
31
31
  # Generates a dirname for storing or retrieving a file in
@@ -0,0 +1,116 @@
1
+ require 'nokogiri'
2
+
3
+ #<oai_dc:dc xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/"
4
+ #xmlns:srw_dc="info:srw/schema/1/dc-schema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
5
+ #xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://cosimo.stanford.edu/standards/oai_dc/v2/oai_dc.xsd">
6
+ # <dc:title>Life of Abraham Lincoln, sixteenth president of the United States: Containing his early
7
+ #history and political career; together with the speeches, messages, proclamations and other official
8
+ #documents illus. of his eventful administration</dc:title>
9
+ # <dc:creator>Crosby, Frank.</dc:creator>
10
+ # <dc:format>text</dc:format>
11
+ # <dc:language>eng</dc:language>
12
+ # <dc:subject>E457 .C94</dc:subject>
13
+ # <dc:identifier>lccn:11030686</dc:identifier>
14
+ # <dc:identifier>callseq:1</dc:identifier>
15
+ # <dc:identifier>shelfseq:973.7111 .L731CR</dc:identifier>
16
+ # <dc:identifier>catkey:1206382</dc:identifier>
17
+ # <dc:identifier>barcode:36105005459602</dc:identifier>
18
+ # <dc:identifier>uuid:ddcf5f1a-0331-4345-beca-e66f7db276eb</dc:identifier>
19
+ # <dc:identifier>google:STANFORD_36105005459602</dc:identifier>
20
+ # <dc:identifier>druid:ng786kn0371</dc:identifier>
21
+ #</oai_dc:dc>
22
+
23
+ class DublinCore
24
+
25
+
26
+ attr_accessor :xml
27
+
28
+ attr_accessor :title
29
+ attr_accessor :creator
30
+ attr_accessor :subject
31
+ attr_accessor :description
32
+ attr_accessor :publisher
33
+ attr_accessor :contributor
34
+ attr_accessor :date
35
+ attr_accessor :type
36
+ attr_accessor :format
37
+ attr_accessor :identifier
38
+ attr_accessor :source
39
+ attr_accessor :language
40
+ attr_accessor :relation
41
+ attr_accessor :coverage
42
+ attr_accessor :rights
43
+
44
+
45
+
46
+
47
+ def initialize(xml = nil)
48
+
49
+ @title ||= []
50
+ @creator ||= []
51
+ @subject ||= []
52
+ @description ||= []
53
+ @publisher ||= []
54
+ @contributor ||= []
55
+ @date ||= []
56
+ @type ||= []
57
+ @format ||= []
58
+ @identifier ||= []
59
+ @source ||= []
60
+ @language ||= []
61
+ @relation ||= []
62
+ @coverage ||= []
63
+ @rights ||= []
64
+
65
+ # if the new is given an xml string, store that in the xml attr_accessor and don't rebuild.
66
+ # this will allow users to access the raw unprocessed XML string via @xml.
67
+ if xml.nil?
68
+ build_xml()
69
+ else
70
+ @xml = xml
71
+ end
72
+
73
+ end #initalize
74
+
75
+
76
+ def build_xml()
77
+ builder = Nokogiri::XML::Builder.new do |xml|
78
+ xml.dc('xmlns:dc' => 'http://purl.org/dc/elements/1.1/', 'xmlns:srw_dc' => 'info:srw/schema/1/dc-schema', "xmlns:xsi" => "http://www.w3.org/2001/XMLSchema-instance" ) {
79
+ xml.parent.namespace = xml.parent.add_namespace_definition('oai_dc','http://www.openarchives.org/OAI/2.0/oai_dc/')
80
+ xml.parent.add_namespace_definition("xsi:schemaLocation", "http://www.openarchives.org/OAI/2.0/oai_dc/ http://cosimo.stanford.edu/standards/oai_dc/v2/oai_dc.xsd")
81
+ self.instance_variables.each do |var|
82
+ unless var == "@xml"
83
+ self.instance_variable_get(var).each { |v| xml['dc'].send("#{var.gsub('@','')}_", v) }
84
+ end #unless
85
+ end #instance_Variables.each
86
+ }
87
+ end
88
+ @xml = builder.to_xml
89
+ end
90
+
91
+
92
+
93
+ # This method rebuilds the xml attr_accesor and returns it as a string.
94
+ def to_xml
95
+ build_xml
96
+ return self.xml
97
+ end #to_xml
98
+
99
+
100
+ # This method takes DC XML as a string, and maps the root child node to their proper attr_accesor.
101
+
102
+ def self.from_xml(xml="")
103
+ dc = DublinCore.new(xml)
104
+ doc = Nokogiri::XML(xml)
105
+ children = doc.root.element_children
106
+ children.each do |c|
107
+ if dc.instance_variables.include?("@#{c.name}")
108
+ dc.send("#{c.name}").send("<<", c.text.strip)
109
+ end #if
110
+ end #each
111
+ return dc
112
+ end #from_xml
113
+
114
+
115
+
116
+ end #dublin_core