ddr-batch 1.1.0 → 1.2.0.rc1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a6f10719eff4705f320384ac30a9b9353911e375
4
- data.tar.gz: 668bcc32b0d5a85f36443a71436b42b0a20b01cb
3
+ metadata.gz: e99d993322b0feb42c08dd0a367aa2cbb9a8bd36
4
+ data.tar.gz: 7d3ea407d8f1cc09f33725c8c54cbbb934967a98
5
5
  SHA512:
6
- metadata.gz: d4c23913959d6405ca58d0155a98126ffc4e0c7741e7f9d2961f48f411927ff9c48d36c34e72a65bcb0cb961617324c1cbaa3fc7a5d78fa4aee519484d3d3187
7
- data.tar.gz: a9fbe5fbee20e38b27f94f9bc5f7f4d067e4dd3938e21372839c3e5ccf43513fe239be531cfb76011bac7c70f03dce4f4942c15f48206cb0ce8362b08581aeb8
6
+ metadata.gz: 8954284d5fed970fbaa7ee95626bafc28f7e906ba668d657d95279191555f842b6b5832943bc9034fdcf5bf699f27ad2d7c2d412d34cb4d89f6d9fff24bb1eb5
7
+ data.tar.gz: 2de0336399b640e5b3b85daa2ca65ebd1ac31f85e259907d80701d68198c8969e1dc155024ebe9a3bbe16b50cc61703677fe5cb2a9392d2306e2d5094b914bcc
@@ -0,0 +1,19 @@
1
+ module Ddr::Batch
2
+ class BatchDeletionJob
3
+ @queue = :batch
4
+
5
+ def self.perform(batch_id)
6
+ batch = Batch.find(batch_id)
7
+ batch.status = Batch::STATUS_DELETING
8
+ batch.save!
9
+ batch.destroy!
10
+ end
11
+
12
+ def self.before_enqueue_set_status(batch_id)
13
+ batch = Batch.find(batch_id)
14
+ batch.status = Batch::STATUS_QUEUED_FOR_DELETION
15
+ batch.save
16
+ end
17
+
18
+ end
19
+ end
@@ -0,0 +1,11 @@
1
+ module Ddr::Batch
2
+ class BatchObjectsProcessorJob
3
+ @queue = :batch
4
+
5
+ def self.perform(batch_object_ids, operator_id)
6
+ operator = User.find(operator_id)
7
+ ProcessBatchObjects.new(batch_object_ids: batch_object_ids, operator: operator).execute
8
+ end
9
+
10
+ end
11
+ end
@@ -3,12 +3,7 @@ module Ddr::Batch
3
3
  @queue = :batch
4
4
 
5
5
  def self.perform(batch_id, operator_id)
6
- ts = Time.now.strftime("%Y%m%d%H%M%S%L")
7
- logfile = "batch_processor_#{ts}_log.txt"
8
- batch = Batch.find(batch_id)
9
- operator = User.find(operator_id)
10
- bp = BatchProcessor.new(batch, operator, log_file: logfile)
11
- bp.execute
6
+ ProcessBatch.new(batch_id: batch_id, operator_id: operator_id).execute
12
7
  end
13
8
 
14
9
  def self.after_enqueue_set_status(batch_id, operator_id)
@@ -18,4 +13,4 @@ module Ddr::Batch
18
13
  end
19
14
 
20
15
  end
21
- end
16
+ end
@@ -6,7 +6,7 @@ module Ddr::Batch
6
6
 
7
7
  def send_notification(batch)
8
8
  @batch = batch
9
- @title = "Batch Processor Run #{@batch.status}"
9
+ @title = "Batch Processor Run #{@batch.status} #{@batch.outcome}"
10
10
  @host = `uname -n`.strip
11
11
  @subject = "[#{@host}] #{@title}"
12
12
  from = "#{`echo $USER`.strip}@#{@host}"
@@ -16,4 +16,4 @@ module Ddr::Batch
16
16
 
17
17
  end
18
18
 
19
- end
19
+ end
@@ -19,30 +19,18 @@ module Ddr::Batch
19
19
  STATUS_FINISHED = "FINISHED"
20
20
  STATUS_INTERRUPTED = "INTERRUPTED"
21
21
  STATUS_RESTARTABLE = "INTERRUPTED - RESTARTABLE"
22
+ STATUS_QUEUED_FOR_DELETION = "QUEUED FOR DELETION"
23
+ STATUS_DELETING = "DELETING"
22
24
 
23
- def validate
24
- errors = []
25
- begin
26
- batch_objects.each do |object|
27
- unless object.verified
28
- errors << object.validate
29
- end
30
- end
31
- rescue Exception => e
32
- errors << "Exception raised during batch validation: #{e.backtrace}"
33
- end
34
- errors.flatten
35
- end
36
-
37
- def completed_count
38
- batch_objects.where(verified: true).count
25
+ def handled_count
26
+ batch_objects.where(handled: true).count
39
27
  end
40
28
 
41
29
  def time_to_complete
42
- unless processing_step_start.nil?
43
- if completed_count > 0
44
- completed = completed_count
45
- ((Time.now - processing_step_start.to_time) / completed) * (batch_objects.count - completed)
30
+ unless start.nil?
31
+ if handled_count > 0
32
+ handled = handled_count
33
+ ((Time.now - start.to_time) / handled) * (batch_objects.count - handled)
46
34
  end
47
35
  end
48
36
  end
@@ -63,10 +51,14 @@ module Ddr::Batch
63
51
  batch_objects.map{ |x| x.pid if x.pid.present? }.compact
64
52
  end
65
53
 
54
+ def unhandled_objects?
55
+ batch_objects.any? { |batch_object| !batch_object.handled? }
56
+ end
57
+
66
58
  def finished?
67
59
  status == STATUS_FINISHED
68
60
  end
69
61
 
70
62
  end
71
63
 
72
- end
64
+ end
@@ -7,6 +7,7 @@ module Ddr::Batch
7
7
  belongs_to :batch, inverse_of: :batch_objects
8
8
  has_many :batch_object_attributes, -> { order "id ASC" }, inverse_of: :batch_object, dependent: :destroy
9
9
  has_many :batch_object_datastreams, inverse_of: :batch_object, dependent: :destroy
10
+ has_many :batch_object_messages, inverse_of: :batch_object, dependent: :destroy
10
11
  has_many :batch_object_relationships, inverse_of: :batch_object, dependent: :destroy
11
12
  has_many :batch_object_roles, inverse_of: :batch_object, dependent: :destroy
12
13
 
@@ -20,6 +21,8 @@ module Ddr::Batch
20
21
  Model: %{model}
21
22
  EOS
22
23
 
24
+ ProcessingResultsMessage = Struct.new(:level, :message)
25
+
23
26
  def self.pid_from_identifier(identifier, batch_id)
24
27
  query = "identifier = :identifier"
25
28
  query << " and batch_id = :batch_id" if batch_id
@@ -112,12 +115,11 @@ module Ddr::Batch
112
115
  obj_model = batch.found_pids[r[:object]]
113
116
  else
114
117
  begin
115
- obj = ActiveFedora::Base.find(r[:object], :cast => true)
116
- obj_model = obj.class.name
118
+ obj_model = SolrDocument.find(r[:object]).active_fedora_model
117
119
  if batch.present?
118
- batch.add_found_pid(obj.pid, obj_model)
120
+ batch.add_found_pid(r[:object], obj_model)
119
121
  end
120
- rescue ActiveFedora::ObjectNotFoundError
122
+ rescue SolrDocument::NotFound
121
123
  pid_in_batch = false
122
124
  if batch.present?
123
125
  if batch.pre_assigned_pids.include?(r[:object])
@@ -0,0 +1,8 @@
1
+ module Ddr::Batch
2
+ class BatchObjectMessage < ActiveRecord::Base
3
+ belongs_to :batch_object, :inverse_of => :batch_object_messages
4
+
5
+ validates_presence_of :message
6
+
7
+ end
8
+ end
@@ -1,7 +1,6 @@
1
1
  module Ddr::Batch
2
2
 
3
3
  class BatchObjectRelationship < ActiveRecord::Base
4
- # attr_accessible :name, :object, :object_type, :operation, :batch_object
5
4
  belongs_to :batch_object, :inverse_of => :batch_object_relationships
6
5
 
7
6
  RELATIONSHIP_ADMIN_POLICY = "admin_policy"
@@ -0,0 +1,10 @@
1
+ module Ddr
2
+ module Batch
3
+ # Base class for custom exceptions
4
+ class Error < StandardError; end
5
+
6
+ # Error processing batch object
7
+ class BatchObjectProcessingError < Error; end
8
+
9
+ end
10
+ end
@@ -20,10 +20,11 @@ module Ddr::Batch
20
20
 
21
21
  def results_message
22
22
  if pid
23
- verification_result = (verified ? "Verified" : "VERIFICATION FAILURE")
24
- message = "Ingested #{model} #{identifier} into #{pid}...#{verification_result}"
23
+ message_level = verified ? Logger::INFO : Logger::WARN
24
+ verification_result = verified ? "Verified" : "VERIFICATION FAILURE"
25
+ ProcessingResultsMessage.new(message_level, "Ingested #{model} #{identifier} into #{pid}...#{verification_result}")
25
26
  else
26
- message = "Attempt to ingest #{model} #{identifier} FAILED"
27
+ ProcessingResultsMessagemessage.new(Logger::ERROR, "Attempt to ingest #{model} #{identifier} FAILED")
27
28
  end
28
29
  end
29
30
 
@@ -95,30 +96,21 @@ module Ddr::Batch
95
96
  begin
96
97
  repo_object = model.constantize.new(:pid => repo_pid)
97
98
  repo_object.label = label if label
98
- repo_object.save(validate: false)
99
99
  batch_object_attributes.each { |a| repo_object = add_attribute(repo_object, a) }
100
+ repo_object.save(validate: false)
100
101
  batch_object_datastreams.each { |d| repo_object = populate_datastream(repo_object, d) }
101
102
  batch_object_relationships.each { |r| repo_object = add_relationship(repo_object, r) }
102
103
  batch_object_roles.each { |r| repo_object = add_role(repo_object, r) }
103
- repo_object.save! # Do not allow batch ingest to successfully create an invalid object
104
+ repo_object.save!
104
105
  rescue Exception => e1
105
106
  logger.fatal("Error in creating repository object #{repo_object.pid} for #{identifier} : #{e1}")
106
- repo_clean = false
107
107
  if repo_object && !repo_object.new_record?
108
108
  begin
109
109
  logger.info("Deleting potentially incomplete #{repo_object.pid} due to error in ingest batch processing")
110
110
  repo_object.destroy
111
111
  rescue Exception => e2
112
112
  logger.fatal("Error deleting repository object #{repo_object.pid}: #{e2}")
113
- else
114
- repo_clean = true
115
113
  end
116
- else
117
- repo_clean = true
118
- end
119
- if batch.present?
120
- batch.status = repo_clean ? Batch::STATUS_RESTARTABLE : Batch::STATUS_INTERRUPTED
121
- batch.save
122
114
  end
123
115
  raise e1
124
116
  end
@@ -0,0 +1,29 @@
1
+ module Ddr::Batch
2
+ class Log
3
+
4
+ DEFAULT_LOG_DIR = File.join(Rails.root, 'log')
5
+
6
+ class << self
7
+
8
+ def logger(batch_id)
9
+ loggr = Logger.new(File.open(file_path(batch_id), File::WRONLY | File::APPEND | File::CREAT))
10
+ loggr.level = Ddr::Batch.processor_logging_level
11
+ loggr.datetime_format = "%Y-%m-%d %H:%M:%S.L"
12
+ loggr.formatter = proc do |severity, datetime, progname, msg|
13
+ "#{datetime} #{severity}: #{msg}\n"
14
+ end
15
+ loggr
16
+ end
17
+
18
+ def clear_log(batch_id)
19
+ log_file_path = file_path(batch_id)
20
+ FileUtils.remove(log_file_path) if File.exists?(log_file_path)
21
+ end
22
+
23
+ def file_path(batch_id)
24
+ File.join(DEFAULT_LOG_DIR, "batch_#{batch_id}_log.txt")
25
+ end
26
+
27
+ end
28
+ end
29
+ end
@@ -43,10 +43,11 @@ module Ddr::Batch
43
43
 
44
44
  def results_message
45
45
  if pid
46
- verification_result = (verified ? "Verified" : "VERIFICATION FAILURE")
47
- message = "Updated #{pid}...#{verification_result}"
46
+ message_level = verified ? Logger::INFO : Logger::WARN
47
+ verification_result = verified ? "Verified" : "VERIFICATION FAILURE"
48
+ ProcessingResultsMessage.new(message_level, "Updated #{pid}...#{verification_result}")
48
49
  else
49
- message = "Attempt to update #{model} #{identifier} FAILED"
50
+ ProcessingResultsMessage.new(Logger::ERROR, "Attempt to update #{model} #{identifier} FAILED")
50
51
  end
51
52
  end
52
53
 
@@ -60,6 +61,7 @@ module Ddr::Batch
60
61
  repo_object = nil
61
62
  begin
62
63
  repo_object = ActiveFedora::Base.find(pid)
64
+ update!(model: repo_object.class.name) unless model.present?
63
65
  batch_object_attributes.each do |a|
64
66
  repo_object = case
65
67
  when a.operation.eql?(BatchObjectAttribute::OPERATION_ADD)
@@ -81,10 +83,6 @@ module Ddr::Batch
81
83
  end
82
84
  rescue Exception => e
83
85
  logger.error("Error in updating repository object #{pid} for #{identifier} : : #{e}")
84
- if batch.present?
85
- batch.status = Batch::STATUS_RESTARTABLE
86
- batch.save
87
- end
88
86
  raise e
89
87
  end
90
88
  repo_object
@@ -0,0 +1,84 @@
1
+ module Ddr::Batch
2
+ class MonitorBatchFinished
3
+
4
+ class << self
5
+ def call(*args)
6
+ event = ActiveSupport::Notifications::Event.new(*args)
7
+ batch = Ddr::Batch::Batch.find(event.payload[:batch_id])
8
+ batch_finished(batch)
9
+ end
10
+
11
+ private
12
+
13
+ def batch_finished(batch)
14
+ log_batch_finish(batch)
15
+ update_batch(batch)
16
+ send_notification(batch) if batch.user && batch.user.email
17
+ end
18
+
19
+ def log_batch_finish(batch)
20
+ logger = Ddr::Batch::Log.logger(batch.id)
21
+ logger.info "====== Summary ======"
22
+ results_tracker = results(batch)
23
+ results_tracker.keys.each do |type|
24
+ results_tracker[type].keys.each do |model|
25
+ log_result(results_tracker, type, model, logger)
26
+ end
27
+ end
28
+ logger.close
29
+ end
30
+
31
+ def results(batch)
32
+ results_tracker = Hash.new
33
+ batch.batch_objects.each do |batch_object|
34
+ track_result(results_tracker, batch_object)
35
+ end
36
+ results_tracker
37
+ end
38
+
39
+ def track_result(results_tracker, batch_object)
40
+ type, model = [ batch_object.type, batch_object.model ]
41
+ results_tracker[type] = Hash.new unless results_tracker.has_key?(type)
42
+ results_tracker[type][model] = Hash.new unless results_tracker[type].has_key?(model)
43
+ results_tracker[type][model][:successes] = 0 unless results_tracker[type][model].has_key?(:successes)
44
+ results_tracker[type][model][:successes] += 1 if batch_object.verified
45
+ end
46
+
47
+ def log_result(results_tracker, type, model, logger)
48
+ verb = type_verb(type)
49
+ count = results_tracker[type][model][:successes]
50
+ logger.info "#{verb} #{ActionController::Base.helpers.pluralize(count, model)}"
51
+ end
52
+
53
+ def type_verb(type)
54
+ case type
55
+ when IngestBatchObject.name
56
+ "Ingested"
57
+ when UpdateBatchObject.name
58
+ "Updated"
59
+ end
60
+ end
61
+
62
+ def update_batch(batch)
63
+ outcome = batch.success.eql?(batch.batch_objects.size) ? Batch::OUTCOME_SUCCESS : Batch::OUTCOME_FAILURE
64
+ logfile = File.new(Ddr::Batch::Log.file_path(batch.id))
65
+ batch.update!(stop: DateTime.now,
66
+ status: Batch::STATUS_FINISHED,
67
+ outcome: outcome,
68
+ logfile: logfile)
69
+ end
70
+
71
+ def send_notification(batch)
72
+ begin
73
+ BatchProcessorRunMailer.send_notification(batch).deliver!
74
+ rescue
75
+ Rails.logger.error("An error occurred while attempting to send a notification for batch #{batch.id}")
76
+ end
77
+ end
78
+ end
79
+
80
+ end
81
+ end
82
+
83
+
84
+
@@ -0,0 +1,36 @@
1
+ module Ddr::Batch
2
+ class MonitorBatchObjectHandled
3
+
4
+ class << self
5
+ def call(*args)
6
+ event = ActiveSupport::Notifications::Event.new(*args)
7
+ batch_object = BatchObject.find(event.payload[:batch_object_id])
8
+ batch = batch_object.batch
9
+ batch_object_handled(batch_object, batch)
10
+ end
11
+
12
+ private
13
+
14
+ def batch_object_handled(batch_object, batch)
15
+ log_batch_object_messages(batch_object, batch.id)
16
+ update_batch(batch_object, batch)
17
+ unless batch.unhandled_objects?
18
+ ActiveSupport::Notifications.instrument('finished.batch.batch.ddr', batch_id: batch.id)
19
+ end
20
+ end
21
+
22
+ def log_batch_object_messages(batch_object, batch_id)
23
+ logger = Ddr::Batch::Log.logger(batch_id)
24
+ batch_object.batch_object_messages.each do |message|
25
+ logger.add(message.level) { "Batch Object #{batch_object.id}: #{message.message}" }
26
+ end
27
+ logger.close
28
+ end
29
+
30
+ def update_batch(batch_object, batch)
31
+ batch_object.verified? ? batch.update!(success: batch.success + 1) : batch.update!(failure: batch.failure + 1)
32
+ end
33
+ end
34
+
35
+ end
36
+ end
@@ -0,0 +1,42 @@
1
+ module Ddr::Batch
2
+ class MonitorBatchStarted
3
+
4
+ class << self
5
+ def call(*args)
6
+ event = ActiveSupport::Notifications::Event.new(*args)
7
+ batch = Ddr::Batch::Batch.find(event.payload[:batch_id])
8
+ batch_started(batch)
9
+ end
10
+
11
+ private
12
+
13
+ def batch_started(batch)
14
+ clear_logs(batch)
15
+ log_batch_start(batch)
16
+ update_batch(batch)
17
+ end
18
+
19
+ def clear_logs(batch)
20
+ # delete any previously existing filesystem log file for this batch
21
+ Ddr::Batch::Log.clear_log(batch.id)
22
+ # remove any existing attached log file from the Batch ActiveRecord object
23
+ batch.logfile.clear
24
+ end
25
+
26
+ def log_batch_start(batch)
27
+ logger = Ddr::Batch::Log.logger(batch.id)
28
+ logger.info "Batch id: #{batch.id}"
29
+ logger.info "Batch name: #{batch.name}" if name
30
+ logger.info "Batch size: #{batch.batch_objects.size}"
31
+ logger.close
32
+ end
33
+
34
+ def update_batch(batch)
35
+ batch.update!(start: DateTime.now,
36
+ status: Ddr::Batch::Batch::STATUS_RUNNING,
37
+ version: VERSION)
38
+ end
39
+ end
40
+
41
+ end
42
+ end
@@ -0,0 +1,59 @@
1
+ module Ddr::Batch
2
+ class ProcessBatch
3
+
4
+ attr_accessor :batch, :operator_id
5
+
6
+ def initialize(batch_id:, operator_id:)
7
+ @batch = Ddr::Batch::Batch.find(batch_id)
8
+ @operator_id = operator_id
9
+ end
10
+
11
+ def execute
12
+ ActiveSupport::Notifications.instrument('started.batch.batch.ddr', batch_id: batch.id)
13
+ batch.batch_objects.each do |batch_object|
14
+ case
15
+ when batch_object.is_a?(IngestBatchObject)
16
+ handle_ingest_batch_object(batch_object)
17
+ when batch_object.is_a?(UpdateBatchObject)
18
+ handle_update_batch_object(batch_object)
19
+ end
20
+ end
21
+ end
22
+
23
+ def handle_ingest_batch_object(batch_object)
24
+ case batch_object.model
25
+ when 'Collection'
26
+ ingest_collection_object(batch_object)
27
+ when 'Item'
28
+ enqueue_item_component_ingest(batch_object)
29
+ when 'Component'
30
+ # skip -- will be handled along with associated Item
31
+ when 'Target', 'Attachment'
32
+ Resque.enqueue(BatchObjectsProcessorJob, [ batch_object.id ], operator_id)
33
+ end
34
+ end
35
+
36
+ def handle_update_batch_object(batch_object)
37
+ Resque.enqueue(BatchObjectsProcessorJob, [ batch_object.id ], operator_id)
38
+ end
39
+
40
+ def ingest_collection_object(batch_object)
41
+ # Collection batch objects are processed synchronously because they need to exist in the repository
42
+ # prior to the processing of any objects (e.g., Item, Component, Target) associated with them.
43
+ # If the Collection batch object does not process successfully, consider the batch finished (albeit unsuccessfully)
44
+ # and raise an exception.
45
+ unless ProcessBatchObject.new(batch_object_id: batch_object.id, operator: User.find(operator_id)).execute
46
+ ActiveSupport::Notifications.instrument('finished.batch.batch.ddr', batch_id: batch.id)
47
+ raise Ddr::Batch::BatchObjectProcessingError, batch_object.id
48
+ end
49
+ end
50
+
51
+ def enqueue_item_component_ingest(batch_object)
52
+ batch_object_ids = [ batch_object.id ]
53
+ parent_rel_query = "object = '#{batch_object.pid}' AND name = '#{Ddr::Batch::BatchObjectRelationship::RELATIONSHIP_PARENT}'"
54
+ parent_rel_recs = Ddr::Batch::BatchObjectRelationship.where(parent_rel_query)
55
+ parent_rel_recs.each { |parent_rel_rec| batch_object_ids << parent_rel_rec.batch_object_id}
56
+ Resque.enqueue(BatchObjectsProcessorJob, batch_object_ids, operator_id)
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,48 @@
1
+ module Ddr::Batch
2
+ class ProcessBatchObject
3
+
4
+ attr_reader :batch_object_id, :operator
5
+
6
+ def initialize(batch_object_id:, operator:)
7
+ @batch_object_id = batch_object_id
8
+ @operator = operator
9
+ end
10
+
11
+ def execute
12
+ ActiveSupport::Notifications.instrument("handled.batchobject.batch.ddr",
13
+ batch_object_id: batch_object_id) do |payload|
14
+ batch_object = BatchObject.find(batch_object_id)
15
+ # Mark batch object as 'handled'
16
+ batch_object.update!(handled: true)
17
+ # Validate batch object
18
+ errors = batch_object.validate
19
+ # Process batch object or record validation errors
20
+ if errors.empty?
21
+ process(batch_object, operator)
22
+ else
23
+ record_errors(batch_object, errors)
24
+ end
25
+ # return true if batch_object was processed; otherwise, false
26
+ batch_object.processed? ? true : false
27
+ end
28
+ end
29
+
30
+ def process(batch_object, operator)
31
+ batch_object.update!(validated: true)
32
+ batch_object.process(operator)
33
+ batch_object.update!(processed: true)
34
+ results_message = batch_object.results_message
35
+ Ddr::Batch::BatchObjectMessage.create!(batch_object: batch_object,
36
+ level: results_message.level,
37
+ message: results_message.message)
38
+ end
39
+
40
+ def record_errors(batch_object, errors)
41
+ errors.each do |error|
42
+ Ddr::Batch::BatchObjectMessage.create!(batch_object: batch_object,
43
+ level: Logger::ERROR,
44
+ message: error)
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,35 @@
1
+ module Ddr::Batch
2
+ class ProcessBatchObjects
3
+
4
+ attr_reader :batch_object_ids, :operator
5
+
6
+ def initialize(batch_object_ids:, operator:)
7
+ @batch_object_ids = batch_object_ids
8
+ @operator = operator
9
+ end
10
+
11
+ def execute
12
+ # Assume successful processing of all batch objects until proven otherwise.
13
+ success = true
14
+ batch_object_ids.each do |batch_object_id|
15
+ # Once any batch object included in this job fails to process successfully, do not attempt to process
16
+ # any remaining batch objects included in this job. Instead, mark them as "handled" so the batch knows
17
+ # it's not waiting on them to be handled before it can consider itself "finished".
18
+ # The use case prompting this behavior is a job containing an Item ingest batch object plus one or more
19
+ # associated Component ingest batch objects. If the Item batch object fails to process correctly, we don't
20
+ # want to attempt to process the Component batch objects.
21
+ # In the preceding use case, we could skip the remaining batch objects only if the failed batch object is an
22
+ # Item but there might be future cases in which we don't want to process the remaining batch objects in the
23
+ # job regardless of which batch object fails. The failure of any batch object to process should be rare
24
+ # enough that it doesn't seem harmful to cover this potential broader use case in the current code.
25
+ if success
26
+ success = ProcessBatchObject.new(batch_object_id: batch_object_id, operator: operator).execute
27
+ else
28
+ batch_object = Ddr::Batch::BatchObject.find(batch_object_id)
29
+ batch_object.update!(handled: true)
30
+ end
31
+ end
32
+ end
33
+
34
+ end
35
+ end
@@ -0,0 +1,9 @@
1
+ ##
2
+ ## Subscriptions to ActiveSupport::Notifications instrumentation events
3
+ ##
4
+
5
+ # Batch Processing events
6
+ ActiveSupport::Notifications.subscribe('started.batch.batch.ddr', Ddr::Batch::MonitorBatchStarted)
7
+ ActiveSupport::Notifications.subscribe('handled.batchobject.batch.ddr', Ddr::Batch::MonitorBatchObjectHandled)
8
+ ActiveSupport::Notifications.subscribe('finished.batch.batch.ddr', Ddr::Batch::MonitorBatchFinished)
9
+
@@ -2,7 +2,7 @@ en:
2
2
  ddr:
3
3
  batch:
4
4
  errors:
5
- prefix: "%{identifier} [Database ID: %{id}]:"
5
+ prefix: "%{identifier}:"
6
6
  no_batches: "No %{type} batches found for your user account."
7
7
  web:
8
8
  action_names:
@@ -49,4 +49,4 @@ en:
49
49
  finished_batches:
50
50
  label: "Already Run"
51
51
  pending_batches:
52
- label: "Pending"
52
+ label: "Pending"
@@ -0,0 +1,9 @@
1
+ class AddColumnsToBatchObject < ActiveRecord::Migration
2
+ def change
3
+ change_table :batch_objects do |t|
4
+ t.boolean "handled", default: false
5
+ t.boolean "processed", default: false
6
+ t.boolean "validated", default: false
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,13 @@
1
+ class CreateBatchObjectMessages < ActiveRecord::Migration
2
+ def change
3
+ unless table_exists?(:batch_object_messages)
4
+ create_table :batch_object_messages do |t|
5
+ t.integer :batch_object_id
6
+ t.integer :level, default: Logger::DEBUG
7
+ t.text :message, limit: 65535
8
+
9
+ t.timestamps
10
+ end
11
+ end
12
+ end
13
+ end
@@ -9,6 +9,12 @@ module Ddr
9
9
  extend ActiveSupport::Autoload
10
10
 
11
11
  autoload :BatchUser
12
+ autoload :BatchObjectProcessingError, 'ddr/batch/error'
13
+
14
+ # Logging level for batch processing - defaults to Logger::INFO
15
+ mattr_accessor :processor_logging_level do
16
+ Logger::INFO
17
+ end
12
18
 
13
19
  def self.table_name_prefix
14
20
  end
@@ -1,5 +1,5 @@
1
1
  module Ddr
2
2
  module Batch
3
- VERSION = "1.1.0"
3
+ VERSION = "1.2.0.rc1"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ddr-batch
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.2.0.rc1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jim Coble
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-10-21 00:00:00.000000000 Z
12
+ date: 2016-12-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rails
@@ -59,14 +59,14 @@ dependencies:
59
59
  requirements:
60
60
  - - "~>"
61
61
  - !ruby/object:Gem::Version
62
- version: '2.3'
62
+ version: '2.5'
63
63
  type: :runtime
64
64
  prerelease: false
65
65
  version_requirements: !ruby/object:Gem::Requirement
66
66
  requirements:
67
67
  - - "~>"
68
68
  - !ruby/object:Gem::Version
69
- version: '2.3'
69
+ version: '2.5'
70
70
  - !ruby/object:Gem::Dependency
71
71
  name: log4r
72
72
  requirement: !ruby/object:Gem::Requirement
@@ -175,6 +175,8 @@ files:
175
175
  - LICENSE.txt
176
176
  - README.md
177
177
  - Rakefile
178
+ - app/jobs/ddr/batch/batch_deletion_job.rb
179
+ - app/jobs/ddr/batch/batch_objects_processor_job.rb
178
180
  - app/jobs/ddr/batch/batch_processor_job.rb
179
181
  - app/mailers/ddr/batch/batch_processor_run_mailer.rb
180
182
  - app/models/ddr/batch/batch.rb
@@ -182,13 +184,22 @@ files:
182
184
  - app/models/ddr/batch/batch_object.rb
183
185
  - app/models/ddr/batch/batch_object_attribute.rb
184
186
  - app/models/ddr/batch/batch_object_datastream.rb
187
+ - app/models/ddr/batch/batch_object_message.rb
185
188
  - app/models/ddr/batch/batch_object_relationship.rb
186
189
  - app/models/ddr/batch/batch_object_role.rb
190
+ - app/models/ddr/batch/error.rb
187
191
  - app/models/ddr/batch/ingest_batch_object.rb
192
+ - app/models/ddr/batch/log.rb
188
193
  - app/models/ddr/batch/update_batch_object.rb
189
- - app/scripts/ddr/batch/batch_processor.rb
194
+ - app/services/ddr/batch/monitor_batch_finished.rb
195
+ - app/services/ddr/batch/monitor_batch_object_handled.rb
196
+ - app/services/ddr/batch/monitor_batch_started.rb
197
+ - app/services/ddr/batch/process_batch.rb
198
+ - app/services/ddr/batch/process_batch_object.rb
199
+ - app/services/ddr/batch/process_batch_objects.rb
190
200
  - app/views/ddr/batch/batch_processor_run_mailer/send_notification.html.erb
191
201
  - app/views/ddr/batch/batch_processor_run_mailer/send_notification.text.erb
202
+ - config/initializers/subscriptions.rb
192
203
  - config/locales/en.yml
193
204
  - config/routes.rb
194
205
  - db/migrate/20150828183839_create_batches.rb
@@ -197,6 +208,8 @@ files:
197
208
  - db/migrate/20150828202200_create_batch_object_datastreams.rb
198
209
  - db/migrate/20150828202240_create_batch_object_relationships.rb
199
210
  - db/migrate/20160816164010_create_batch_object_roles.rb
211
+ - db/migrate/20161115191636_add_columns_to_batch_object.rb
212
+ - db/migrate/20161116142512_create_batch_object_messages.rb
200
213
  - lib/ddr-batch.rb
201
214
  - lib/ddr/batch.rb
202
215
  - lib/ddr/batch/batch_user.rb
@@ -218,9 +231,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
218
231
  version: '0'
219
232
  required_rubygems_version: !ruby/object:Gem::Requirement
220
233
  requirements:
221
- - - ">="
234
+ - - ">"
222
235
  - !ruby/object:Gem::Version
223
- version: '0'
236
+ version: 1.3.1
224
237
  requirements: []
225
238
  rubyforge_project:
226
239
  rubygems_version: 2.4.3
@@ -1,152 +0,0 @@
1
- module Ddr::Batch
2
- class BatchProcessor
3
-
4
- LOG_CONFIG_FILEPATH = File.join(Rails.root, 'config', 'log4r_batch_processor.yml')
5
- DEFAULT_LOG_DIR = File.join(Rails.root, 'log')
6
- DEFAULT_LOG_FILE = "batch_processor_log.txt"
7
- PASS = "PASS"
8
- FAIL = "FAIL"
9
-
10
- # Options
11
- # :log_dir - optional - directory for log file - default is given in DEFAULT_LOG_DIR
12
- # :log_file - optional - filename of log file - default is given in DEFAULT_LOG_FILE
13
- # :skip_validation - optional - whether to skip batch object validation step when processing - default is false
14
- # :ignore_validation_errors - optional - whether to continue processing even if batch object validation errors occur - default is false
15
- def initialize(batch, operator=nil, opts={})
16
- @batch = batch
17
- @operator = operator
18
- @bp_log_dir = opts.fetch(:log_dir, DEFAULT_LOG_DIR)
19
- @bp_log_file = opts.fetch(:log_file, DEFAULT_LOG_FILE)
20
- @skip_validation = opts.fetch(:skip_validation, false)
21
- @ignore_validation_errors = opts.fetch(:ignore_validation_errors, false)
22
- end
23
-
24
- def execute
25
- config_logger
26
- if @batch
27
- initiate_batch_run
28
- unless @skip_validation
29
- valid_batch = validate_batch
30
- @batch.update_attributes(status: Batch::STATUS_INVALID) unless valid_batch
31
- end
32
- if @skip_validation || @ignore_validation_errors || valid_batch
33
- process_batch
34
- end
35
- close_batch_run
36
- end
37
- save_logfile
38
- send_notification if @batch.user && @batch.user.email
39
- end
40
-
41
- private
42
-
43
- def validate_batch
44
- @batch.update_attributes(status: Batch::STATUS_VALIDATING)
45
- valid = true
46
- errors = @batch.validate
47
- unless errors.empty?
48
- valid = false
49
- errors.each do |error|
50
- message = "Batch Object Validation Error: #{error}"
51
- @bp_log.error(message)
52
- end
53
- end
54
- @batch.update_attributes(status: Batch::STATUS_RUNNING)
55
- return valid
56
- end
57
-
58
- def process_batch
59
- @batch.update_attributes(status: Batch::STATUS_PROCESSING, processing_step_start: DateTime.now)
60
- @batch.batch_objects.each do |object|
61
- begin
62
- process_object(object)
63
- rescue Exception => e
64
- @bp_log.error(e.backtrace)
65
- break
66
- end
67
- sleep 2
68
- end
69
- @batch.update_attributes(status: Batch::STATUS_RUNNING) if @batch.status == Batch::STATUS_PROCESSING
70
- end
71
-
72
- def initiate_batch_run
73
- @bp_log.info "Batch id: #{@batch.id}"
74
- @bp_log.info "Batch name: #{@batch.name}" if @batch.name
75
- @bp_log.info "Batch size: #{@batch.batch_objects.size}"
76
- @batch.logfile.clear # clear out any attached logfile
77
- @batch.update_attributes(:start => DateTime.now,
78
- :status => Batch::STATUS_RUNNING,
79
- :version => VERSION)
80
- @failures = 0
81
- @successes = 0
82
- @results_tracker = Hash.new
83
- end
84
-
85
- def close_batch_run
86
- @batch.reload
87
- @batch.failure = @failures
88
- @batch.outcome = @successes.eql?(@batch.batch_objects.size) ? Batch::OUTCOME_SUCCESS : Batch::OUTCOME_FAILURE
89
- if @batch.status.eql?(Batch::STATUS_RUNNING)
90
- @batch.status = Batch::STATUS_FINISHED
91
- end
92
- @batch.stop = DateTime.now
93
- @batch.success = @successes
94
- @batch.save
95
- @bp_log.info "====== Summary ======"
96
- @results_tracker.keys.each do |type|
97
- verb = case type
98
- when IngestBatchObject.name
99
- "Ingested"
100
- when UpdateBatchObject.name
101
- "Updated"
102
- end
103
- @results_tracker[type].keys.each do |model|
104
- @bp_log.info "#{verb} #{@results_tracker[type][model][:successes]} #{model}"
105
- end
106
- end
107
- end
108
-
109
- def update_results_tracker(type, model, verified)
110
- @results_tracker[type] = Hash.new unless @results_tracker.has_key?(type)
111
- @results_tracker[type][model] = Hash.new unless @results_tracker[type].has_key?(model)
112
- @results_tracker[type][model][:successes] = 0 unless @results_tracker[type][model].has_key?(:successes)
113
- @results_tracker[type][model][:successes] += 1 if verified
114
- end
115
-
116
- def process_object(object)
117
- @bp_log.debug "Processing object: #{object.identifier}"
118
- repository_object = object.process(@operator)
119
- update_results_tracker(object.type, repository_object.present? ? repository_object.class.name : object.model, object.verified)
120
- if object.verified
121
- @successes += 1
122
- else
123
- @failures += 1
124
- end
125
- message = object.results_message
126
- @bp_log.info(message)
127
- end
128
-
129
- def config_logger
130
- logconfig = Log4r::YamlConfigurator
131
- logconfig['LOG_FILE'] = File.join(@bp_log_dir, @bp_log_file)
132
- logconfig.load_yaml_file File.join(LOG_CONFIG_FILEPATH)
133
- @bp_log = Log4r::Logger['batch_processor']
134
- end
135
-
136
- def save_logfile
137
- @bp_log.outputters.each do |outputter|
138
- @logfilename = outputter.filename if outputter.respond_to?(:filename)
139
- end
140
- @batch.update!({ logfile: File.new(@logfilename) }) if @logfilename
141
- end
142
-
143
- def send_notification
144
- begin
145
- BatchProcessorRunMailer.send_notification(@batch).deliver!
146
- rescue
147
- puts "An error occurred while attempting to send the notification."
148
- end
149
- end
150
-
151
- end
152
- end