ddr-batch 1.1.0 → 1.2.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a6f10719eff4705f320384ac30a9b9353911e375
4
- data.tar.gz: 668bcc32b0d5a85f36443a71436b42b0a20b01cb
3
+ metadata.gz: e99d993322b0feb42c08dd0a367aa2cbb9a8bd36
4
+ data.tar.gz: 7d3ea407d8f1cc09f33725c8c54cbbb934967a98
5
5
  SHA512:
6
- metadata.gz: d4c23913959d6405ca58d0155a98126ffc4e0c7741e7f9d2961f48f411927ff9c48d36c34e72a65bcb0cb961617324c1cbaa3fc7a5d78fa4aee519484d3d3187
7
- data.tar.gz: a9fbe5fbee20e38b27f94f9bc5f7f4d067e4dd3938e21372839c3e5ccf43513fe239be531cfb76011bac7c70f03dce4f4942c15f48206cb0ce8362b08581aeb8
6
+ metadata.gz: 8954284d5fed970fbaa7ee95626bafc28f7e906ba668d657d95279191555f842b6b5832943bc9034fdcf5bf699f27ad2d7c2d412d34cb4d89f6d9fff24bb1eb5
7
+ data.tar.gz: 2de0336399b640e5b3b85daa2ca65ebd1ac31f85e259907d80701d68198c8969e1dc155024ebe9a3bbe16b50cc61703677fe5cb2a9392d2306e2d5094b914bcc
@@ -0,0 +1,19 @@
1
+ module Ddr::Batch
2
+ class BatchDeletionJob
3
+ @queue = :batch
4
+
5
+ def self.perform(batch_id)
6
+ batch = Batch.find(batch_id)
7
+ batch.status = Batch::STATUS_DELETING
8
+ batch.save!
9
+ batch.destroy!
10
+ end
11
+
12
+ def self.before_enqueue_set_status(batch_id)
13
+ batch = Batch.find(batch_id)
14
+ batch.status = Batch::STATUS_QUEUED_FOR_DELETION
15
+ batch.save
16
+ end
17
+
18
+ end
19
+ end
@@ -0,0 +1,11 @@
1
+ module Ddr::Batch
2
+ class BatchObjectsProcessorJob
3
+ @queue = :batch
4
+
5
+ def self.perform(batch_object_ids, operator_id)
6
+ operator = User.find(operator_id)
7
+ ProcessBatchObjects.new(batch_object_ids: batch_object_ids, operator: operator).execute
8
+ end
9
+
10
+ end
11
+ end
@@ -3,12 +3,7 @@ module Ddr::Batch
3
3
  @queue = :batch
4
4
 
5
5
  def self.perform(batch_id, operator_id)
6
- ts = Time.now.strftime("%Y%m%d%H%M%S%L")
7
- logfile = "batch_processor_#{ts}_log.txt"
8
- batch = Batch.find(batch_id)
9
- operator = User.find(operator_id)
10
- bp = BatchProcessor.new(batch, operator, log_file: logfile)
11
- bp.execute
6
+ ProcessBatch.new(batch_id: batch_id, operator_id: operator_id).execute
12
7
  end
13
8
 
14
9
  def self.after_enqueue_set_status(batch_id, operator_id)
@@ -18,4 +13,4 @@ module Ddr::Batch
18
13
  end
19
14
 
20
15
  end
21
- end
16
+ end
@@ -6,7 +6,7 @@ module Ddr::Batch
6
6
 
7
7
  def send_notification(batch)
8
8
  @batch = batch
9
- @title = "Batch Processor Run #{@batch.status}"
9
+ @title = "Batch Processor Run #{@batch.status} #{@batch.outcome}"
10
10
  @host = `uname -n`.strip
11
11
  @subject = "[#{@host}] #{@title}"
12
12
  from = "#{`echo $USER`.strip}@#{@host}"
@@ -16,4 +16,4 @@ module Ddr::Batch
16
16
 
17
17
  end
18
18
 
19
- end
19
+ end
@@ -19,30 +19,18 @@ module Ddr::Batch
19
19
  STATUS_FINISHED = "FINISHED"
20
20
  STATUS_INTERRUPTED = "INTERRUPTED"
21
21
  STATUS_RESTARTABLE = "INTERRUPTED - RESTARTABLE"
22
+ STATUS_QUEUED_FOR_DELETION = "QUEUED FOR DELETION"
23
+ STATUS_DELETING = "DELETING"
22
24
 
23
- def validate
24
- errors = []
25
- begin
26
- batch_objects.each do |object|
27
- unless object.verified
28
- errors << object.validate
29
- end
30
- end
31
- rescue Exception => e
32
- errors << "Exception raised during batch validation: #{e.backtrace}"
33
- end
34
- errors.flatten
35
- end
36
-
37
- def completed_count
38
- batch_objects.where(verified: true).count
25
+ def handled_count
26
+ batch_objects.where(handled: true).count
39
27
  end
40
28
 
41
29
  def time_to_complete
42
- unless processing_step_start.nil?
43
- if completed_count > 0
44
- completed = completed_count
45
- ((Time.now - processing_step_start.to_time) / completed) * (batch_objects.count - completed)
30
+ unless start.nil?
31
+ if handled_count > 0
32
+ handled = handled_count
33
+ ((Time.now - start.to_time) / handled) * (batch_objects.count - handled)
46
34
  end
47
35
  end
48
36
  end
@@ -63,10 +51,14 @@ module Ddr::Batch
63
51
  batch_objects.map{ |x| x.pid if x.pid.present? }.compact
64
52
  end
65
53
 
54
+ def unhandled_objects?
55
+ batch_objects.any? { |batch_object| !batch_object.handled? }
56
+ end
57
+
66
58
  def finished?
67
59
  status == STATUS_FINISHED
68
60
  end
69
61
 
70
62
  end
71
63
 
72
- end
64
+ end
@@ -7,6 +7,7 @@ module Ddr::Batch
7
7
  belongs_to :batch, inverse_of: :batch_objects
8
8
  has_many :batch_object_attributes, -> { order "id ASC" }, inverse_of: :batch_object, dependent: :destroy
9
9
  has_many :batch_object_datastreams, inverse_of: :batch_object, dependent: :destroy
10
+ has_many :batch_object_messages, inverse_of: :batch_object, dependent: :destroy
10
11
  has_many :batch_object_relationships, inverse_of: :batch_object, dependent: :destroy
11
12
  has_many :batch_object_roles, inverse_of: :batch_object, dependent: :destroy
12
13
 
@@ -20,6 +21,8 @@ module Ddr::Batch
20
21
  Model: %{model}
21
22
  EOS
22
23
 
24
+ ProcessingResultsMessage = Struct.new(:level, :message)
25
+
23
26
  def self.pid_from_identifier(identifier, batch_id)
24
27
  query = "identifier = :identifier"
25
28
  query << " and batch_id = :batch_id" if batch_id
@@ -112,12 +115,11 @@ module Ddr::Batch
112
115
  obj_model = batch.found_pids[r[:object]]
113
116
  else
114
117
  begin
115
- obj = ActiveFedora::Base.find(r[:object], :cast => true)
116
- obj_model = obj.class.name
118
+ obj_model = SolrDocument.find(r[:object]).active_fedora_model
117
119
  if batch.present?
118
- batch.add_found_pid(obj.pid, obj_model)
120
+ batch.add_found_pid(r[:object], obj_model)
119
121
  end
120
- rescue ActiveFedora::ObjectNotFoundError
122
+ rescue SolrDocument::NotFound
121
123
  pid_in_batch = false
122
124
  if batch.present?
123
125
  if batch.pre_assigned_pids.include?(r[:object])
@@ -0,0 +1,8 @@
1
+ module Ddr::Batch
2
+ class BatchObjectMessage < ActiveRecord::Base
3
+ belongs_to :batch_object, :inverse_of => :batch_object_messages
4
+
5
+ validates_presence_of :message
6
+
7
+ end
8
+ end
@@ -1,7 +1,6 @@
1
1
  module Ddr::Batch
2
2
 
3
3
  class BatchObjectRelationship < ActiveRecord::Base
4
- # attr_accessible :name, :object, :object_type, :operation, :batch_object
5
4
  belongs_to :batch_object, :inverse_of => :batch_object_relationships
6
5
 
7
6
  RELATIONSHIP_ADMIN_POLICY = "admin_policy"
@@ -0,0 +1,10 @@
1
+ module Ddr
2
+ module Batch
3
+ # Base class for custom exceptions
4
+ class Error < StandardError; end
5
+
6
+ # Error processing batch object
7
+ class BatchObjectProcessingError < Error; end
8
+
9
+ end
10
+ end
@@ -20,10 +20,11 @@ module Ddr::Batch
20
20
 
21
21
  def results_message
22
22
  if pid
23
- verification_result = (verified ? "Verified" : "VERIFICATION FAILURE")
24
- message = "Ingested #{model} #{identifier} into #{pid}...#{verification_result}"
23
+ message_level = verified ? Logger::INFO : Logger::WARN
24
+ verification_result = verified ? "Verified" : "VERIFICATION FAILURE"
25
+ ProcessingResultsMessage.new(message_level, "Ingested #{model} #{identifier} into #{pid}...#{verification_result}")
25
26
  else
26
- message = "Attempt to ingest #{model} #{identifier} FAILED"
27
+ ProcessingResultsMessagemessage.new(Logger::ERROR, "Attempt to ingest #{model} #{identifier} FAILED")
27
28
  end
28
29
  end
29
30
 
@@ -95,30 +96,21 @@ module Ddr::Batch
95
96
  begin
96
97
  repo_object = model.constantize.new(:pid => repo_pid)
97
98
  repo_object.label = label if label
98
- repo_object.save(validate: false)
99
99
  batch_object_attributes.each { |a| repo_object = add_attribute(repo_object, a) }
100
+ repo_object.save(validate: false)
100
101
  batch_object_datastreams.each { |d| repo_object = populate_datastream(repo_object, d) }
101
102
  batch_object_relationships.each { |r| repo_object = add_relationship(repo_object, r) }
102
103
  batch_object_roles.each { |r| repo_object = add_role(repo_object, r) }
103
- repo_object.save! # Do not allow batch ingest to successfully create an invalid object
104
+ repo_object.save!
104
105
  rescue Exception => e1
105
106
  logger.fatal("Error in creating repository object #{repo_object.pid} for #{identifier} : #{e1}")
106
- repo_clean = false
107
107
  if repo_object && !repo_object.new_record?
108
108
  begin
109
109
  logger.info("Deleting potentially incomplete #{repo_object.pid} due to error in ingest batch processing")
110
110
  repo_object.destroy
111
111
  rescue Exception => e2
112
112
  logger.fatal("Error deleting repository object #{repo_object.pid}: #{e2}")
113
- else
114
- repo_clean = true
115
113
  end
116
- else
117
- repo_clean = true
118
- end
119
- if batch.present?
120
- batch.status = repo_clean ? Batch::STATUS_RESTARTABLE : Batch::STATUS_INTERRUPTED
121
- batch.save
122
114
  end
123
115
  raise e1
124
116
  end
@@ -0,0 +1,29 @@
1
+ module Ddr::Batch
2
+ class Log
3
+
4
+ DEFAULT_LOG_DIR = File.join(Rails.root, 'log')
5
+
6
+ class << self
7
+
8
+ def logger(batch_id)
9
+ loggr = Logger.new(File.open(file_path(batch_id), File::WRONLY | File::APPEND | File::CREAT))
10
+ loggr.level = Ddr::Batch.processor_logging_level
11
+ loggr.datetime_format = "%Y-%m-%d %H:%M:%S.L"
12
+ loggr.formatter = proc do |severity, datetime, progname, msg|
13
+ "#{datetime} #{severity}: #{msg}\n"
14
+ end
15
+ loggr
16
+ end
17
+
18
+ def clear_log(batch_id)
19
+ log_file_path = file_path(batch_id)
20
+ FileUtils.remove(log_file_path) if File.exists?(log_file_path)
21
+ end
22
+
23
+ def file_path(batch_id)
24
+ File.join(DEFAULT_LOG_DIR, "batch_#{batch_id}_log.txt")
25
+ end
26
+
27
+ end
28
+ end
29
+ end
@@ -43,10 +43,11 @@ module Ddr::Batch
43
43
 
44
44
  def results_message
45
45
  if pid
46
- verification_result = (verified ? "Verified" : "VERIFICATION FAILURE")
47
- message = "Updated #{pid}...#{verification_result}"
46
+ message_level = verified ? Logger::INFO : Logger::WARN
47
+ verification_result = verified ? "Verified" : "VERIFICATION FAILURE"
48
+ ProcessingResultsMessage.new(message_level, "Updated #{pid}...#{verification_result}")
48
49
  else
49
- message = "Attempt to update #{model} #{identifier} FAILED"
50
+ ProcessingResultsMessage.new(Logger::ERROR, "Attempt to update #{model} #{identifier} FAILED")
50
51
  end
51
52
  end
52
53
 
@@ -60,6 +61,7 @@ module Ddr::Batch
60
61
  repo_object = nil
61
62
  begin
62
63
  repo_object = ActiveFedora::Base.find(pid)
64
+ update!(model: repo_object.class.name) unless model.present?
63
65
  batch_object_attributes.each do |a|
64
66
  repo_object = case
65
67
  when a.operation.eql?(BatchObjectAttribute::OPERATION_ADD)
@@ -81,10 +83,6 @@ module Ddr::Batch
81
83
  end
82
84
  rescue Exception => e
83
85
  logger.error("Error in updating repository object #{pid} for #{identifier} : : #{e}")
84
- if batch.present?
85
- batch.status = Batch::STATUS_RESTARTABLE
86
- batch.save
87
- end
88
86
  raise e
89
87
  end
90
88
  repo_object
@@ -0,0 +1,84 @@
1
+ module Ddr::Batch
2
+ class MonitorBatchFinished
3
+
4
+ class << self
5
+ def call(*args)
6
+ event = ActiveSupport::Notifications::Event.new(*args)
7
+ batch = Ddr::Batch::Batch.find(event.payload[:batch_id])
8
+ batch_finished(batch)
9
+ end
10
+
11
+ private
12
+
13
+ def batch_finished(batch)
14
+ log_batch_finish(batch)
15
+ update_batch(batch)
16
+ send_notification(batch) if batch.user && batch.user.email
17
+ end
18
+
19
+ def log_batch_finish(batch)
20
+ logger = Ddr::Batch::Log.logger(batch.id)
21
+ logger.info "====== Summary ======"
22
+ results_tracker = results(batch)
23
+ results_tracker.keys.each do |type|
24
+ results_tracker[type].keys.each do |model|
25
+ log_result(results_tracker, type, model, logger)
26
+ end
27
+ end
28
+ logger.close
29
+ end
30
+
31
+ def results(batch)
32
+ results_tracker = Hash.new
33
+ batch.batch_objects.each do |batch_object|
34
+ track_result(results_tracker, batch_object)
35
+ end
36
+ results_tracker
37
+ end
38
+
39
+ def track_result(results_tracker, batch_object)
40
+ type, model = [ batch_object.type, batch_object.model ]
41
+ results_tracker[type] = Hash.new unless results_tracker.has_key?(type)
42
+ results_tracker[type][model] = Hash.new unless results_tracker[type].has_key?(model)
43
+ results_tracker[type][model][:successes] = 0 unless results_tracker[type][model].has_key?(:successes)
44
+ results_tracker[type][model][:successes] += 1 if batch_object.verified
45
+ end
46
+
47
+ def log_result(results_tracker, type, model, logger)
48
+ verb = type_verb(type)
49
+ count = results_tracker[type][model][:successes]
50
+ logger.info "#{verb} #{ActionController::Base.helpers.pluralize(count, model)}"
51
+ end
52
+
53
+ def type_verb(type)
54
+ case type
55
+ when IngestBatchObject.name
56
+ "Ingested"
57
+ when UpdateBatchObject.name
58
+ "Updated"
59
+ end
60
+ end
61
+
62
+ def update_batch(batch)
63
+ outcome = batch.success.eql?(batch.batch_objects.size) ? Batch::OUTCOME_SUCCESS : Batch::OUTCOME_FAILURE
64
+ logfile = File.new(Ddr::Batch::Log.file_path(batch.id))
65
+ batch.update!(stop: DateTime.now,
66
+ status: Batch::STATUS_FINISHED,
67
+ outcome: outcome,
68
+ logfile: logfile)
69
+ end
70
+
71
+ def send_notification(batch)
72
+ begin
73
+ BatchProcessorRunMailer.send_notification(batch).deliver!
74
+ rescue
75
+ Rails.logger.error("An error occurred while attempting to send a notification for batch #{batch.id}")
76
+ end
77
+ end
78
+ end
79
+
80
+ end
81
+ end
82
+
83
+
84
+
@@ -0,0 +1,36 @@
1
+ module Ddr::Batch
2
+ class MonitorBatchObjectHandled
3
+
4
+ class << self
5
+ def call(*args)
6
+ event = ActiveSupport::Notifications::Event.new(*args)
7
+ batch_object = BatchObject.find(event.payload[:batch_object_id])
8
+ batch = batch_object.batch
9
+ batch_object_handled(batch_object, batch)
10
+ end
11
+
12
+ private
13
+
14
+ def batch_object_handled(batch_object, batch)
15
+ log_batch_object_messages(batch_object, batch.id)
16
+ update_batch(batch_object, batch)
17
+ unless batch.unhandled_objects?
18
+ ActiveSupport::Notifications.instrument('finished.batch.batch.ddr', batch_id: batch.id)
19
+ end
20
+ end
21
+
22
+ def log_batch_object_messages(batch_object, batch_id)
23
+ logger = Ddr::Batch::Log.logger(batch_id)
24
+ batch_object.batch_object_messages.each do |message|
25
+ logger.add(message.level) { "Batch Object #{batch_object.id}: #{message.message}" }
26
+ end
27
+ logger.close
28
+ end
29
+
30
+ def update_batch(batch_object, batch)
31
+ batch_object.verified? ? batch.update!(success: batch.success + 1) : batch.update!(failure: batch.failure + 1)
32
+ end
33
+ end
34
+
35
+ end
36
+ end
@@ -0,0 +1,42 @@
1
+ module Ddr::Batch
2
+ class MonitorBatchStarted
3
+
4
+ class << self
5
+ def call(*args)
6
+ event = ActiveSupport::Notifications::Event.new(*args)
7
+ batch = Ddr::Batch::Batch.find(event.payload[:batch_id])
8
+ batch_started(batch)
9
+ end
10
+
11
+ private
12
+
13
+ def batch_started(batch)
14
+ clear_logs(batch)
15
+ log_batch_start(batch)
16
+ update_batch(batch)
17
+ end
18
+
19
+ def clear_logs(batch)
20
+ # delete any previously existing filesystem log file for this batch
21
+ Ddr::Batch::Log.clear_log(batch.id)
22
+ # remove any existing attached log file from the Batch ActiveRecord object
23
+ batch.logfile.clear
24
+ end
25
+
26
+ def log_batch_start(batch)
27
+ logger = Ddr::Batch::Log.logger(batch.id)
28
+ logger.info "Batch id: #{batch.id}"
29
+ logger.info "Batch name: #{batch.name}" if name
30
+ logger.info "Batch size: #{batch.batch_objects.size}"
31
+ logger.close
32
+ end
33
+
34
+ def update_batch(batch)
35
+ batch.update!(start: DateTime.now,
36
+ status: Ddr::Batch::Batch::STATUS_RUNNING,
37
+ version: VERSION)
38
+ end
39
+ end
40
+
41
+ end
42
+ end
@@ -0,0 +1,59 @@
1
+ module Ddr::Batch
2
+ class ProcessBatch
3
+
4
+ attr_accessor :batch, :operator_id
5
+
6
+ def initialize(batch_id:, operator_id:)
7
+ @batch = Ddr::Batch::Batch.find(batch_id)
8
+ @operator_id = operator_id
9
+ end
10
+
11
+ def execute
12
+ ActiveSupport::Notifications.instrument('started.batch.batch.ddr', batch_id: batch.id)
13
+ batch.batch_objects.each do |batch_object|
14
+ case
15
+ when batch_object.is_a?(IngestBatchObject)
16
+ handle_ingest_batch_object(batch_object)
17
+ when batch_object.is_a?(UpdateBatchObject)
18
+ handle_update_batch_object(batch_object)
19
+ end
20
+ end
21
+ end
22
+
23
+ def handle_ingest_batch_object(batch_object)
24
+ case batch_object.model
25
+ when 'Collection'
26
+ ingest_collection_object(batch_object)
27
+ when 'Item'
28
+ enqueue_item_component_ingest(batch_object)
29
+ when 'Component'
30
+ # skip -- will be handled along with associated Item
31
+ when 'Target', 'Attachment'
32
+ Resque.enqueue(BatchObjectsProcessorJob, [ batch_object.id ], operator_id)
33
+ end
34
+ end
35
+
36
+ def handle_update_batch_object(batch_object)
37
+ Resque.enqueue(BatchObjectsProcessorJob, [ batch_object.id ], operator_id)
38
+ end
39
+
40
+ def ingest_collection_object(batch_object)
41
+ # Collection batch objects are processed synchronously because they need to exist in the repository
42
+ # prior to the processing of any objects (e.g., Item, Component, Target) associated with them.
43
+ # If the Collection batch object does not process successfully, consider the batch finished (albeit unsuccessfully)
44
+ # and raise an exception.
45
+ unless ProcessBatchObject.new(batch_object_id: batch_object.id, operator: User.find(operator_id)).execute
46
+ ActiveSupport::Notifications.instrument('finished.batch.batch.ddr', batch_id: batch.id)
47
+ raise Ddr::Batch::BatchObjectProcessingError, batch_object.id
48
+ end
49
+ end
50
+
51
+ def enqueue_item_component_ingest(batch_object)
52
+ batch_object_ids = [ batch_object.id ]
53
+ parent_rel_query = "object = '#{batch_object.pid}' AND name = '#{Ddr::Batch::BatchObjectRelationship::RELATIONSHIP_PARENT}'"
54
+ parent_rel_recs = Ddr::Batch::BatchObjectRelationship.where(parent_rel_query)
55
+ parent_rel_recs.each { |parent_rel_rec| batch_object_ids << parent_rel_rec.batch_object_id}
56
+ Resque.enqueue(BatchObjectsProcessorJob, batch_object_ids, operator_id)
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,48 @@
1
+ module Ddr::Batch
2
+ class ProcessBatchObject
3
+
4
+ attr_reader :batch_object_id, :operator
5
+
6
+ def initialize(batch_object_id:, operator:)
7
+ @batch_object_id = batch_object_id
8
+ @operator = operator
9
+ end
10
+
11
+ def execute
12
+ ActiveSupport::Notifications.instrument("handled.batchobject.batch.ddr",
13
+ batch_object_id: batch_object_id) do |payload|
14
+ batch_object = BatchObject.find(batch_object_id)
15
+ # Mark batch object as 'handled'
16
+ batch_object.update!(handled: true)
17
+ # Validate batch object
18
+ errors = batch_object.validate
19
+ # Process batch object or record validation errors
20
+ if errors.empty?
21
+ process(batch_object, operator)
22
+ else
23
+ record_errors(batch_object, errors)
24
+ end
25
+ # return true if batch_object was processed; otherwise, false
26
+ batch_object.processed? ? true : false
27
+ end
28
+ end
29
+
30
+ def process(batch_object, operator)
31
+ batch_object.update!(validated: true)
32
+ batch_object.process(operator)
33
+ batch_object.update!(processed: true)
34
+ results_message = batch_object.results_message
35
+ Ddr::Batch::BatchObjectMessage.create!(batch_object: batch_object,
36
+ level: results_message.level,
37
+ message: results_message.message)
38
+ end
39
+
40
+ def record_errors(batch_object, errors)
41
+ errors.each do |error|
42
+ Ddr::Batch::BatchObjectMessage.create!(batch_object: batch_object,
43
+ level: Logger::ERROR,
44
+ message: error)
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,35 @@
1
+ module Ddr::Batch
2
+ class ProcessBatchObjects
3
+
4
+ attr_reader :batch_object_ids, :operator
5
+
6
+ def initialize(batch_object_ids:, operator:)
7
+ @batch_object_ids = batch_object_ids
8
+ @operator = operator
9
+ end
10
+
11
+ def execute
12
+ # Assume successful processing of all batch objects until proven otherwise.
13
+ success = true
14
+ batch_object_ids.each do |batch_object_id|
15
+ # Once any batch object included in this job fails to process successfully, do not attempt to process
16
+ # any remaining batch objects included in this job. Instead, mark them as "handled" so the batch knows
17
+ # it's not waiting on them to be handled before it can consider itself "finished".
18
+ # The use case prompting this behavior is a job containing an Item ingest batch object plus one or more
19
+ # associated Component ingest batch objects. If the Item batch object fails to process correctly, we don't
20
+ # want to attempt to process the Component batch objects.
21
+ # In the preceding use case, we could skip the remaining batch objects only if the failed batch object is an
22
+ # Item but there might be future cases in which we don't want to process the remaining batch objects in the
23
+ # job regardless of which batch object fails. The failure of any batch object to process should be rare
24
+ # enough that it doesn't seem harmful to cover this potential broader use case in the current code.
25
+ if success
26
+ success = ProcessBatchObject.new(batch_object_id: batch_object_id, operator: operator).execute
27
+ else
28
+ batch_object = Ddr::Batch::BatchObject.find(batch_object_id)
29
+ batch_object.update!(handled: true)
30
+ end
31
+ end
32
+ end
33
+
34
+ end
35
+ end
@@ -0,0 +1,9 @@
1
+ ##
2
+ ## Subscriptions to ActiveSupport::Notifications instrumentation events
3
+ ##
4
+
5
+ # Batch Processing events
6
+ ActiveSupport::Notifications.subscribe('started.batch.batch.ddr', Ddr::Batch::MonitorBatchStarted)
7
+ ActiveSupport::Notifications.subscribe('handled.batchobject.batch.ddr', Ddr::Batch::MonitorBatchObjectHandled)
8
+ ActiveSupport::Notifications.subscribe('finished.batch.batch.ddr', Ddr::Batch::MonitorBatchFinished)
9
+
@@ -2,7 +2,7 @@ en:
2
2
  ddr:
3
3
  batch:
4
4
  errors:
5
- prefix: "%{identifier} [Database ID: %{id}]:"
5
+ prefix: "%{identifier}:"
6
6
  no_batches: "No %{type} batches found for your user account."
7
7
  web:
8
8
  action_names:
@@ -49,4 +49,4 @@ en:
49
49
  finished_batches:
50
50
  label: "Already Run"
51
51
  pending_batches:
52
- label: "Pending"
52
+ label: "Pending"
@@ -0,0 +1,9 @@
1
+ class AddColumnsToBatchObject < ActiveRecord::Migration
2
+ def change
3
+ change_table :batch_objects do |t|
4
+ t.boolean "handled", default: false
5
+ t.boolean "processed", default: false
6
+ t.boolean "validated", default: false
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,13 @@
1
+ class CreateBatchObjectMessages < ActiveRecord::Migration
2
+ def change
3
+ unless table_exists?(:batch_object_messages)
4
+ create_table :batch_object_messages do |t|
5
+ t.integer :batch_object_id
6
+ t.integer :level, default: Logger::DEBUG
7
+ t.text :message, limit: 65535
8
+
9
+ t.timestamps
10
+ end
11
+ end
12
+ end
13
+ end
@@ -9,6 +9,12 @@ module Ddr
9
9
  extend ActiveSupport::Autoload
10
10
 
11
11
  autoload :BatchUser
12
+ autoload :BatchObjectProcessingError, 'ddr/batch/error'
13
+
14
+ # Logging level for batch processing - defaults to Logger::INFO
15
+ mattr_accessor :processor_logging_level do
16
+ Logger::INFO
17
+ end
12
18
 
13
19
  def self.table_name_prefix
14
20
  end
@@ -1,5 +1,5 @@
1
1
  module Ddr
2
2
  module Batch
3
- VERSION = "1.1.0"
3
+ VERSION = "1.2.0.rc1"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ddr-batch
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.2.0.rc1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jim Coble
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-10-21 00:00:00.000000000 Z
12
+ date: 2016-12-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rails
@@ -59,14 +59,14 @@ dependencies:
59
59
  requirements:
60
60
  - - "~>"
61
61
  - !ruby/object:Gem::Version
62
- version: '2.3'
62
+ version: '2.5'
63
63
  type: :runtime
64
64
  prerelease: false
65
65
  version_requirements: !ruby/object:Gem::Requirement
66
66
  requirements:
67
67
  - - "~>"
68
68
  - !ruby/object:Gem::Version
69
- version: '2.3'
69
+ version: '2.5'
70
70
  - !ruby/object:Gem::Dependency
71
71
  name: log4r
72
72
  requirement: !ruby/object:Gem::Requirement
@@ -175,6 +175,8 @@ files:
175
175
  - LICENSE.txt
176
176
  - README.md
177
177
  - Rakefile
178
+ - app/jobs/ddr/batch/batch_deletion_job.rb
179
+ - app/jobs/ddr/batch/batch_objects_processor_job.rb
178
180
  - app/jobs/ddr/batch/batch_processor_job.rb
179
181
  - app/mailers/ddr/batch/batch_processor_run_mailer.rb
180
182
  - app/models/ddr/batch/batch.rb
@@ -182,13 +184,22 @@ files:
182
184
  - app/models/ddr/batch/batch_object.rb
183
185
  - app/models/ddr/batch/batch_object_attribute.rb
184
186
  - app/models/ddr/batch/batch_object_datastream.rb
187
+ - app/models/ddr/batch/batch_object_message.rb
185
188
  - app/models/ddr/batch/batch_object_relationship.rb
186
189
  - app/models/ddr/batch/batch_object_role.rb
190
+ - app/models/ddr/batch/error.rb
187
191
  - app/models/ddr/batch/ingest_batch_object.rb
192
+ - app/models/ddr/batch/log.rb
188
193
  - app/models/ddr/batch/update_batch_object.rb
189
- - app/scripts/ddr/batch/batch_processor.rb
194
+ - app/services/ddr/batch/monitor_batch_finished.rb
195
+ - app/services/ddr/batch/monitor_batch_object_handled.rb
196
+ - app/services/ddr/batch/monitor_batch_started.rb
197
+ - app/services/ddr/batch/process_batch.rb
198
+ - app/services/ddr/batch/process_batch_object.rb
199
+ - app/services/ddr/batch/process_batch_objects.rb
190
200
  - app/views/ddr/batch/batch_processor_run_mailer/send_notification.html.erb
191
201
  - app/views/ddr/batch/batch_processor_run_mailer/send_notification.text.erb
202
+ - config/initializers/subscriptions.rb
192
203
  - config/locales/en.yml
193
204
  - config/routes.rb
194
205
  - db/migrate/20150828183839_create_batches.rb
@@ -197,6 +208,8 @@ files:
197
208
  - db/migrate/20150828202200_create_batch_object_datastreams.rb
198
209
  - db/migrate/20150828202240_create_batch_object_relationships.rb
199
210
  - db/migrate/20160816164010_create_batch_object_roles.rb
211
+ - db/migrate/20161115191636_add_columns_to_batch_object.rb
212
+ - db/migrate/20161116142512_create_batch_object_messages.rb
200
213
  - lib/ddr-batch.rb
201
214
  - lib/ddr/batch.rb
202
215
  - lib/ddr/batch/batch_user.rb
@@ -218,9 +231,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
218
231
  version: '0'
219
232
  required_rubygems_version: !ruby/object:Gem::Requirement
220
233
  requirements:
221
- - - ">="
234
+ - - ">"
222
235
  - !ruby/object:Gem::Version
223
- version: '0'
236
+ version: 1.3.1
224
237
  requirements: []
225
238
  rubyforge_project:
226
239
  rubygems_version: 2.4.3
@@ -1,152 +0,0 @@
1
- module Ddr::Batch
2
- class BatchProcessor
3
-
4
- LOG_CONFIG_FILEPATH = File.join(Rails.root, 'config', 'log4r_batch_processor.yml')
5
- DEFAULT_LOG_DIR = File.join(Rails.root, 'log')
6
- DEFAULT_LOG_FILE = "batch_processor_log.txt"
7
- PASS = "PASS"
8
- FAIL = "FAIL"
9
-
10
- # Options
11
- # :log_dir - optional - directory for log file - default is given in DEFAULT_LOG_DIR
12
- # :log_file - optional - filename of log file - default is given in DEFAULT_LOG_FILE
13
- # :skip_validation - optional - whether to skip batch object validation step when processing - default is false
14
- # :ignore_validation_errors - optional - whether to continue processing even if batch object validation errors occur - default is false
15
- def initialize(batch, operator=nil, opts={})
16
- @batch = batch
17
- @operator = operator
18
- @bp_log_dir = opts.fetch(:log_dir, DEFAULT_LOG_DIR)
19
- @bp_log_file = opts.fetch(:log_file, DEFAULT_LOG_FILE)
20
- @skip_validation = opts.fetch(:skip_validation, false)
21
- @ignore_validation_errors = opts.fetch(:ignore_validation_errors, false)
22
- end
23
-
24
- def execute
25
- config_logger
26
- if @batch
27
- initiate_batch_run
28
- unless @skip_validation
29
- valid_batch = validate_batch
30
- @batch.update_attributes(status: Batch::STATUS_INVALID) unless valid_batch
31
- end
32
- if @skip_validation || @ignore_validation_errors || valid_batch
33
- process_batch
34
- end
35
- close_batch_run
36
- end
37
- save_logfile
38
- send_notification if @batch.user && @batch.user.email
39
- end
40
-
41
- private
42
-
43
- def validate_batch
44
- @batch.update_attributes(status: Batch::STATUS_VALIDATING)
45
- valid = true
46
- errors = @batch.validate
47
- unless errors.empty?
48
- valid = false
49
- errors.each do |error|
50
- message = "Batch Object Validation Error: #{error}"
51
- @bp_log.error(message)
52
- end
53
- end
54
- @batch.update_attributes(status: Batch::STATUS_RUNNING)
55
- return valid
56
- end
57
-
58
- def process_batch
59
- @batch.update_attributes(status: Batch::STATUS_PROCESSING, processing_step_start: DateTime.now)
60
- @batch.batch_objects.each do |object|
61
- begin
62
- process_object(object)
63
- rescue Exception => e
64
- @bp_log.error(e.backtrace)
65
- break
66
- end
67
- sleep 2
68
- end
69
- @batch.update_attributes(status: Batch::STATUS_RUNNING) if @batch.status == Batch::STATUS_PROCESSING
70
- end
71
-
72
- def initiate_batch_run
73
- @bp_log.info "Batch id: #{@batch.id}"
74
- @bp_log.info "Batch name: #{@batch.name}" if @batch.name
75
- @bp_log.info "Batch size: #{@batch.batch_objects.size}"
76
- @batch.logfile.clear # clear out any attached logfile
77
- @batch.update_attributes(:start => DateTime.now,
78
- :status => Batch::STATUS_RUNNING,
79
- :version => VERSION)
80
- @failures = 0
81
- @successes = 0
82
- @results_tracker = Hash.new
83
- end
84
-
85
- def close_batch_run
86
- @batch.reload
87
- @batch.failure = @failures
88
- @batch.outcome = @successes.eql?(@batch.batch_objects.size) ? Batch::OUTCOME_SUCCESS : Batch::OUTCOME_FAILURE
89
- if @batch.status.eql?(Batch::STATUS_RUNNING)
90
- @batch.status = Batch::STATUS_FINISHED
91
- end
92
- @batch.stop = DateTime.now
93
- @batch.success = @successes
94
- @batch.save
95
- @bp_log.info "====== Summary ======"
96
- @results_tracker.keys.each do |type|
97
- verb = case type
98
- when IngestBatchObject.name
99
- "Ingested"
100
- when UpdateBatchObject.name
101
- "Updated"
102
- end
103
- @results_tracker[type].keys.each do |model|
104
- @bp_log.info "#{verb} #{@results_tracker[type][model][:successes]} #{model}"
105
- end
106
- end
107
- end
108
-
109
- def update_results_tracker(type, model, verified)
110
- @results_tracker[type] = Hash.new unless @results_tracker.has_key?(type)
111
- @results_tracker[type][model] = Hash.new unless @results_tracker[type].has_key?(model)
112
- @results_tracker[type][model][:successes] = 0 unless @results_tracker[type][model].has_key?(:successes)
113
- @results_tracker[type][model][:successes] += 1 if verified
114
- end
115
-
116
- def process_object(object)
117
- @bp_log.debug "Processing object: #{object.identifier}"
118
- repository_object = object.process(@operator)
119
- update_results_tracker(object.type, repository_object.present? ? repository_object.class.name : object.model, object.verified)
120
- if object.verified
121
- @successes += 1
122
- else
123
- @failures += 1
124
- end
125
- message = object.results_message
126
- @bp_log.info(message)
127
- end
128
-
129
- def config_logger
130
- logconfig = Log4r::YamlConfigurator
131
- logconfig['LOG_FILE'] = File.join(@bp_log_dir, @bp_log_file)
132
- logconfig.load_yaml_file File.join(LOG_CONFIG_FILEPATH)
133
- @bp_log = Log4r::Logger['batch_processor']
134
- end
135
-
136
- def save_logfile
137
- @bp_log.outputters.each do |outputter|
138
- @logfilename = outputter.filename if outputter.respond_to?(:filename)
139
- end
140
- @batch.update!({ logfile: File.new(@logfilename) }) if @logfilename
141
- end
142
-
143
- def send_notification
144
- begin
145
- BatchProcessorRunMailer.send_notification(@batch).deliver!
146
- rescue
147
- puts "An error occurred while attempting to send the notification."
148
- end
149
- end
150
-
151
- end
152
- end