ddr-batch 1.1.0 → 1.2.0.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/jobs/ddr/batch/batch_deletion_job.rb +19 -0
- data/app/jobs/ddr/batch/batch_objects_processor_job.rb +11 -0
- data/app/jobs/ddr/batch/batch_processor_job.rb +2 -7
- data/app/mailers/ddr/batch/batch_processor_run_mailer.rb +2 -2
- data/app/models/ddr/batch/batch.rb +13 -21
- data/app/models/ddr/batch/batch_object.rb +6 -4
- data/app/models/ddr/batch/batch_object_message.rb +8 -0
- data/app/models/ddr/batch/batch_object_relationship.rb +0 -1
- data/app/models/ddr/batch/error.rb +10 -0
- data/app/models/ddr/batch/ingest_batch_object.rb +6 -14
- data/app/models/ddr/batch/log.rb +29 -0
- data/app/models/ddr/batch/update_batch_object.rb +5 -7
- data/app/services/ddr/batch/monitor_batch_finished.rb +84 -0
- data/app/services/ddr/batch/monitor_batch_object_handled.rb +36 -0
- data/app/services/ddr/batch/monitor_batch_started.rb +42 -0
- data/app/services/ddr/batch/process_batch.rb +59 -0
- data/app/services/ddr/batch/process_batch_object.rb +48 -0
- data/app/services/ddr/batch/process_batch_objects.rb +35 -0
- data/config/initializers/subscriptions.rb +9 -0
- data/config/locales/en.yml +2 -2
- data/db/migrate/20161115191636_add_columns_to_batch_object.rb +9 -0
- data/db/migrate/20161116142512_create_batch_object_messages.rb +13 -0
- data/lib/ddr/batch.rb +6 -0
- data/lib/ddr/batch/version.rb +1 -1
- metadata +20 -7
- data/app/scripts/ddr/batch/batch_processor.rb +0 -152
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e99d993322b0feb42c08dd0a367aa2cbb9a8bd36
|
4
|
+
data.tar.gz: 7d3ea407d8f1cc09f33725c8c54cbbb934967a98
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8954284d5fed970fbaa7ee95626bafc28f7e906ba668d657d95279191555f842b6b5832943bc9034fdcf5bf699f27ad2d7c2d412d34cb4d89f6d9fff24bb1eb5
|
7
|
+
data.tar.gz: 2de0336399b640e5b3b85daa2ca65ebd1ac31f85e259907d80701d68198c8969e1dc155024ebe9a3bbe16b50cc61703677fe5cb2a9392d2306e2d5094b914bcc
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Ddr::Batch
|
2
|
+
class BatchDeletionJob
|
3
|
+
@queue = :batch
|
4
|
+
|
5
|
+
def self.perform(batch_id)
|
6
|
+
batch = Batch.find(batch_id)
|
7
|
+
batch.status = Batch::STATUS_DELETING
|
8
|
+
batch.save!
|
9
|
+
batch.destroy!
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.before_enqueue_set_status(batch_id)
|
13
|
+
batch = Batch.find(batch_id)
|
14
|
+
batch.status = Batch::STATUS_QUEUED_FOR_DELETION
|
15
|
+
batch.save
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
module Ddr::Batch
|
2
|
+
class BatchObjectsProcessorJob
|
3
|
+
@queue = :batch
|
4
|
+
|
5
|
+
def self.perform(batch_object_ids, operator_id)
|
6
|
+
operator = User.find(operator_id)
|
7
|
+
ProcessBatchObjects.new(batch_object_ids: batch_object_ids, operator: operator).execute
|
8
|
+
end
|
9
|
+
|
10
|
+
end
|
11
|
+
end
|
@@ -3,12 +3,7 @@ module Ddr::Batch
|
|
3
3
|
@queue = :batch
|
4
4
|
|
5
5
|
def self.perform(batch_id, operator_id)
|
6
|
-
|
7
|
-
logfile = "batch_processor_#{ts}_log.txt"
|
8
|
-
batch = Batch.find(batch_id)
|
9
|
-
operator = User.find(operator_id)
|
10
|
-
bp = BatchProcessor.new(batch, operator, log_file: logfile)
|
11
|
-
bp.execute
|
6
|
+
ProcessBatch.new(batch_id: batch_id, operator_id: operator_id).execute
|
12
7
|
end
|
13
8
|
|
14
9
|
def self.after_enqueue_set_status(batch_id, operator_id)
|
@@ -18,4 +13,4 @@ module Ddr::Batch
|
|
18
13
|
end
|
19
14
|
|
20
15
|
end
|
21
|
-
end
|
16
|
+
end
|
@@ -6,7 +6,7 @@ module Ddr::Batch
|
|
6
6
|
|
7
7
|
def send_notification(batch)
|
8
8
|
@batch = batch
|
9
|
-
@title = "Batch Processor Run #{@batch.status}"
|
9
|
+
@title = "Batch Processor Run #{@batch.status} #{@batch.outcome}"
|
10
10
|
@host = `uname -n`.strip
|
11
11
|
@subject = "[#{@host}] #{@title}"
|
12
12
|
from = "#{`echo $USER`.strip}@#{@host}"
|
@@ -16,4 +16,4 @@ module Ddr::Batch
|
|
16
16
|
|
17
17
|
end
|
18
18
|
|
19
|
-
end
|
19
|
+
end
|
@@ -19,30 +19,18 @@ module Ddr::Batch
|
|
19
19
|
STATUS_FINISHED = "FINISHED"
|
20
20
|
STATUS_INTERRUPTED = "INTERRUPTED"
|
21
21
|
STATUS_RESTARTABLE = "INTERRUPTED - RESTARTABLE"
|
22
|
+
STATUS_QUEUED_FOR_DELETION = "QUEUED FOR DELETION"
|
23
|
+
STATUS_DELETING = "DELETING"
|
22
24
|
|
23
|
-
def
|
24
|
-
|
25
|
-
begin
|
26
|
-
batch_objects.each do |object|
|
27
|
-
unless object.verified
|
28
|
-
errors << object.validate
|
29
|
-
end
|
30
|
-
end
|
31
|
-
rescue Exception => e
|
32
|
-
errors << "Exception raised during batch validation: #{e.backtrace}"
|
33
|
-
end
|
34
|
-
errors.flatten
|
35
|
-
end
|
36
|
-
|
37
|
-
def completed_count
|
38
|
-
batch_objects.where(verified: true).count
|
25
|
+
def handled_count
|
26
|
+
batch_objects.where(handled: true).count
|
39
27
|
end
|
40
28
|
|
41
29
|
def time_to_complete
|
42
|
-
unless
|
43
|
-
if
|
44
|
-
|
45
|
-
((Time.now -
|
30
|
+
unless start.nil?
|
31
|
+
if handled_count > 0
|
32
|
+
handled = handled_count
|
33
|
+
((Time.now - start.to_time) / handled) * (batch_objects.count - handled)
|
46
34
|
end
|
47
35
|
end
|
48
36
|
end
|
@@ -63,10 +51,14 @@ module Ddr::Batch
|
|
63
51
|
batch_objects.map{ |x| x.pid if x.pid.present? }.compact
|
64
52
|
end
|
65
53
|
|
54
|
+
def unhandled_objects?
|
55
|
+
batch_objects.any? { |batch_object| !batch_object.handled? }
|
56
|
+
end
|
57
|
+
|
66
58
|
def finished?
|
67
59
|
status == STATUS_FINISHED
|
68
60
|
end
|
69
61
|
|
70
62
|
end
|
71
63
|
|
72
|
-
end
|
64
|
+
end
|
@@ -7,6 +7,7 @@ module Ddr::Batch
|
|
7
7
|
belongs_to :batch, inverse_of: :batch_objects
|
8
8
|
has_many :batch_object_attributes, -> { order "id ASC" }, inverse_of: :batch_object, dependent: :destroy
|
9
9
|
has_many :batch_object_datastreams, inverse_of: :batch_object, dependent: :destroy
|
10
|
+
has_many :batch_object_messages, inverse_of: :batch_object, dependent: :destroy
|
10
11
|
has_many :batch_object_relationships, inverse_of: :batch_object, dependent: :destroy
|
11
12
|
has_many :batch_object_roles, inverse_of: :batch_object, dependent: :destroy
|
12
13
|
|
@@ -20,6 +21,8 @@ module Ddr::Batch
|
|
20
21
|
Model: %{model}
|
21
22
|
EOS
|
22
23
|
|
24
|
+
ProcessingResultsMessage = Struct.new(:level, :message)
|
25
|
+
|
23
26
|
def self.pid_from_identifier(identifier, batch_id)
|
24
27
|
query = "identifier = :identifier"
|
25
28
|
query << " and batch_id = :batch_id" if batch_id
|
@@ -112,12 +115,11 @@ module Ddr::Batch
|
|
112
115
|
obj_model = batch.found_pids[r[:object]]
|
113
116
|
else
|
114
117
|
begin
|
115
|
-
|
116
|
-
obj_model = obj.class.name
|
118
|
+
obj_model = SolrDocument.find(r[:object]).active_fedora_model
|
117
119
|
if batch.present?
|
118
|
-
batch.add_found_pid(
|
120
|
+
batch.add_found_pid(r[:object], obj_model)
|
119
121
|
end
|
120
|
-
rescue
|
122
|
+
rescue SolrDocument::NotFound
|
121
123
|
pid_in_batch = false
|
122
124
|
if batch.present?
|
123
125
|
if batch.pre_assigned_pids.include?(r[:object])
|
@@ -20,10 +20,11 @@ module Ddr::Batch
|
|
20
20
|
|
21
21
|
def results_message
|
22
22
|
if pid
|
23
|
-
|
24
|
-
|
23
|
+
message_level = verified ? Logger::INFO : Logger::WARN
|
24
|
+
verification_result = verified ? "Verified" : "VERIFICATION FAILURE"
|
25
|
+
ProcessingResultsMessage.new(message_level, "Ingested #{model} #{identifier} into #{pid}...#{verification_result}")
|
25
26
|
else
|
26
|
-
|
27
|
+
ProcessingResultsMessagemessage.new(Logger::ERROR, "Attempt to ingest #{model} #{identifier} FAILED")
|
27
28
|
end
|
28
29
|
end
|
29
30
|
|
@@ -95,30 +96,21 @@ module Ddr::Batch
|
|
95
96
|
begin
|
96
97
|
repo_object = model.constantize.new(:pid => repo_pid)
|
97
98
|
repo_object.label = label if label
|
98
|
-
repo_object.save(validate: false)
|
99
99
|
batch_object_attributes.each { |a| repo_object = add_attribute(repo_object, a) }
|
100
|
+
repo_object.save(validate: false)
|
100
101
|
batch_object_datastreams.each { |d| repo_object = populate_datastream(repo_object, d) }
|
101
102
|
batch_object_relationships.each { |r| repo_object = add_relationship(repo_object, r) }
|
102
103
|
batch_object_roles.each { |r| repo_object = add_role(repo_object, r) }
|
103
|
-
repo_object.save!
|
104
|
+
repo_object.save!
|
104
105
|
rescue Exception => e1
|
105
106
|
logger.fatal("Error in creating repository object #{repo_object.pid} for #{identifier} : #{e1}")
|
106
|
-
repo_clean = false
|
107
107
|
if repo_object && !repo_object.new_record?
|
108
108
|
begin
|
109
109
|
logger.info("Deleting potentially incomplete #{repo_object.pid} due to error in ingest batch processing")
|
110
110
|
repo_object.destroy
|
111
111
|
rescue Exception => e2
|
112
112
|
logger.fatal("Error deleting repository object #{repo_object.pid}: #{e2}")
|
113
|
-
else
|
114
|
-
repo_clean = true
|
115
113
|
end
|
116
|
-
else
|
117
|
-
repo_clean = true
|
118
|
-
end
|
119
|
-
if batch.present?
|
120
|
-
batch.status = repo_clean ? Batch::STATUS_RESTARTABLE : Batch::STATUS_INTERRUPTED
|
121
|
-
batch.save
|
122
114
|
end
|
123
115
|
raise e1
|
124
116
|
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Ddr::Batch
|
2
|
+
class Log
|
3
|
+
|
4
|
+
DEFAULT_LOG_DIR = File.join(Rails.root, 'log')
|
5
|
+
|
6
|
+
class << self
|
7
|
+
|
8
|
+
def logger(batch_id)
|
9
|
+
loggr = Logger.new(File.open(file_path(batch_id), File::WRONLY | File::APPEND | File::CREAT))
|
10
|
+
loggr.level = Ddr::Batch.processor_logging_level
|
11
|
+
loggr.datetime_format = "%Y-%m-%d %H:%M:%S.L"
|
12
|
+
loggr.formatter = proc do |severity, datetime, progname, msg|
|
13
|
+
"#{datetime} #{severity}: #{msg}\n"
|
14
|
+
end
|
15
|
+
loggr
|
16
|
+
end
|
17
|
+
|
18
|
+
def clear_log(batch_id)
|
19
|
+
log_file_path = file_path(batch_id)
|
20
|
+
FileUtils.remove(log_file_path) if File.exists?(log_file_path)
|
21
|
+
end
|
22
|
+
|
23
|
+
def file_path(batch_id)
|
24
|
+
File.join(DEFAULT_LOG_DIR, "batch_#{batch_id}_log.txt")
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -43,10 +43,11 @@ module Ddr::Batch
|
|
43
43
|
|
44
44
|
def results_message
|
45
45
|
if pid
|
46
|
-
|
47
|
-
|
46
|
+
message_level = verified ? Logger::INFO : Logger::WARN
|
47
|
+
verification_result = verified ? "Verified" : "VERIFICATION FAILURE"
|
48
|
+
ProcessingResultsMessage.new(message_level, "Updated #{pid}...#{verification_result}")
|
48
49
|
else
|
49
|
-
|
50
|
+
ProcessingResultsMessage.new(Logger::ERROR, "Attempt to update #{model} #{identifier} FAILED")
|
50
51
|
end
|
51
52
|
end
|
52
53
|
|
@@ -60,6 +61,7 @@ module Ddr::Batch
|
|
60
61
|
repo_object = nil
|
61
62
|
begin
|
62
63
|
repo_object = ActiveFedora::Base.find(pid)
|
64
|
+
update!(model: repo_object.class.name) unless model.present?
|
63
65
|
batch_object_attributes.each do |a|
|
64
66
|
repo_object = case
|
65
67
|
when a.operation.eql?(BatchObjectAttribute::OPERATION_ADD)
|
@@ -81,10 +83,6 @@ module Ddr::Batch
|
|
81
83
|
end
|
82
84
|
rescue Exception => e
|
83
85
|
logger.error("Error in updating repository object #{pid} for #{identifier} : : #{e}")
|
84
|
-
if batch.present?
|
85
|
-
batch.status = Batch::STATUS_RESTARTABLE
|
86
|
-
batch.save
|
87
|
-
end
|
88
86
|
raise e
|
89
87
|
end
|
90
88
|
repo_object
|
@@ -0,0 +1,84 @@
|
|
1
|
+
module Ddr::Batch
|
2
|
+
class MonitorBatchFinished
|
3
|
+
|
4
|
+
class << self
|
5
|
+
def call(*args)
|
6
|
+
event = ActiveSupport::Notifications::Event.new(*args)
|
7
|
+
batch = Ddr::Batch::Batch.find(event.payload[:batch_id])
|
8
|
+
batch_finished(batch)
|
9
|
+
end
|
10
|
+
|
11
|
+
private
|
12
|
+
|
13
|
+
def batch_finished(batch)
|
14
|
+
log_batch_finish(batch)
|
15
|
+
update_batch(batch)
|
16
|
+
send_notification(batch) if batch.user && batch.user.email
|
17
|
+
end
|
18
|
+
|
19
|
+
def log_batch_finish(batch)
|
20
|
+
logger = Ddr::Batch::Log.logger(batch.id)
|
21
|
+
logger.info "====== Summary ======"
|
22
|
+
results_tracker = results(batch)
|
23
|
+
results_tracker.keys.each do |type|
|
24
|
+
results_tracker[type].keys.each do |model|
|
25
|
+
log_result(results_tracker, type, model, logger)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
logger.close
|
29
|
+
end
|
30
|
+
|
31
|
+
def results(batch)
|
32
|
+
results_tracker = Hash.new
|
33
|
+
batch.batch_objects.each do |batch_object|
|
34
|
+
track_result(results_tracker, batch_object)
|
35
|
+
end
|
36
|
+
results_tracker
|
37
|
+
end
|
38
|
+
|
39
|
+
def track_result(results_tracker, batch_object)
|
40
|
+
type, model = [ batch_object.type, batch_object.model ]
|
41
|
+
results_tracker[type] = Hash.new unless results_tracker.has_key?(type)
|
42
|
+
results_tracker[type][model] = Hash.new unless results_tracker[type].has_key?(model)
|
43
|
+
results_tracker[type][model][:successes] = 0 unless results_tracker[type][model].has_key?(:successes)
|
44
|
+
results_tracker[type][model][:successes] += 1 if batch_object.verified
|
45
|
+
end
|
46
|
+
|
47
|
+
def log_result(results_tracker, type, model, logger)
|
48
|
+
verb = type_verb(type)
|
49
|
+
count = results_tracker[type][model][:successes]
|
50
|
+
logger.info "#{verb} #{ActionController::Base.helpers.pluralize(count, model)}"
|
51
|
+
end
|
52
|
+
|
53
|
+
def type_verb(type)
|
54
|
+
case type
|
55
|
+
when IngestBatchObject.name
|
56
|
+
"Ingested"
|
57
|
+
when UpdateBatchObject.name
|
58
|
+
"Updated"
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def update_batch(batch)
|
63
|
+
outcome = batch.success.eql?(batch.batch_objects.size) ? Batch::OUTCOME_SUCCESS : Batch::OUTCOME_FAILURE
|
64
|
+
logfile = File.new(Ddr::Batch::Log.file_path(batch.id))
|
65
|
+
batch.update!(stop: DateTime.now,
|
66
|
+
status: Batch::STATUS_FINISHED,
|
67
|
+
outcome: outcome,
|
68
|
+
logfile: logfile)
|
69
|
+
end
|
70
|
+
|
71
|
+
def send_notification(batch)
|
72
|
+
begin
|
73
|
+
BatchProcessorRunMailer.send_notification(batch).deliver!
|
74
|
+
rescue
|
75
|
+
Rails.logger.error("An error occurred while attempting to send a notification for batch #{batch.id}")
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
|
84
|
+
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Ddr::Batch
|
2
|
+
class MonitorBatchObjectHandled
|
3
|
+
|
4
|
+
class << self
|
5
|
+
def call(*args)
|
6
|
+
event = ActiveSupport::Notifications::Event.new(*args)
|
7
|
+
batch_object = BatchObject.find(event.payload[:batch_object_id])
|
8
|
+
batch = batch_object.batch
|
9
|
+
batch_object_handled(batch_object, batch)
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def batch_object_handled(batch_object, batch)
|
15
|
+
log_batch_object_messages(batch_object, batch.id)
|
16
|
+
update_batch(batch_object, batch)
|
17
|
+
unless batch.unhandled_objects?
|
18
|
+
ActiveSupport::Notifications.instrument('finished.batch.batch.ddr', batch_id: batch.id)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def log_batch_object_messages(batch_object, batch_id)
|
23
|
+
logger = Ddr::Batch::Log.logger(batch_id)
|
24
|
+
batch_object.batch_object_messages.each do |message|
|
25
|
+
logger.add(message.level) { "Batch Object #{batch_object.id}: #{message.message}" }
|
26
|
+
end
|
27
|
+
logger.close
|
28
|
+
end
|
29
|
+
|
30
|
+
def update_batch(batch_object, batch)
|
31
|
+
batch_object.verified? ? batch.update!(success: batch.success + 1) : batch.update!(failure: batch.failure + 1)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module Ddr::Batch
|
2
|
+
class MonitorBatchStarted
|
3
|
+
|
4
|
+
class << self
|
5
|
+
def call(*args)
|
6
|
+
event = ActiveSupport::Notifications::Event.new(*args)
|
7
|
+
batch = Ddr::Batch::Batch.find(event.payload[:batch_id])
|
8
|
+
batch_started(batch)
|
9
|
+
end
|
10
|
+
|
11
|
+
private
|
12
|
+
|
13
|
+
def batch_started(batch)
|
14
|
+
clear_logs(batch)
|
15
|
+
log_batch_start(batch)
|
16
|
+
update_batch(batch)
|
17
|
+
end
|
18
|
+
|
19
|
+
def clear_logs(batch)
|
20
|
+
# delete any previously existing filesystem log file for this batch
|
21
|
+
Ddr::Batch::Log.clear_log(batch.id)
|
22
|
+
# remove any existing attached log file from the Batch ActiveRecord object
|
23
|
+
batch.logfile.clear
|
24
|
+
end
|
25
|
+
|
26
|
+
def log_batch_start(batch)
|
27
|
+
logger = Ddr::Batch::Log.logger(batch.id)
|
28
|
+
logger.info "Batch id: #{batch.id}"
|
29
|
+
logger.info "Batch name: #{batch.name}" if name
|
30
|
+
logger.info "Batch size: #{batch.batch_objects.size}"
|
31
|
+
logger.close
|
32
|
+
end
|
33
|
+
|
34
|
+
def update_batch(batch)
|
35
|
+
batch.update!(start: DateTime.now,
|
36
|
+
status: Ddr::Batch::Batch::STATUS_RUNNING,
|
37
|
+
version: VERSION)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
module Ddr::Batch
|
2
|
+
class ProcessBatch
|
3
|
+
|
4
|
+
attr_accessor :batch, :operator_id
|
5
|
+
|
6
|
+
def initialize(batch_id:, operator_id:)
|
7
|
+
@batch = Ddr::Batch::Batch.find(batch_id)
|
8
|
+
@operator_id = operator_id
|
9
|
+
end
|
10
|
+
|
11
|
+
def execute
|
12
|
+
ActiveSupport::Notifications.instrument('started.batch.batch.ddr', batch_id: batch.id)
|
13
|
+
batch.batch_objects.each do |batch_object|
|
14
|
+
case
|
15
|
+
when batch_object.is_a?(IngestBatchObject)
|
16
|
+
handle_ingest_batch_object(batch_object)
|
17
|
+
when batch_object.is_a?(UpdateBatchObject)
|
18
|
+
handle_update_batch_object(batch_object)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def handle_ingest_batch_object(batch_object)
|
24
|
+
case batch_object.model
|
25
|
+
when 'Collection'
|
26
|
+
ingest_collection_object(batch_object)
|
27
|
+
when 'Item'
|
28
|
+
enqueue_item_component_ingest(batch_object)
|
29
|
+
when 'Component'
|
30
|
+
# skip -- will be handled along with associated Item
|
31
|
+
when 'Target', 'Attachment'
|
32
|
+
Resque.enqueue(BatchObjectsProcessorJob, [ batch_object.id ], operator_id)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def handle_update_batch_object(batch_object)
|
37
|
+
Resque.enqueue(BatchObjectsProcessorJob, [ batch_object.id ], operator_id)
|
38
|
+
end
|
39
|
+
|
40
|
+
def ingest_collection_object(batch_object)
|
41
|
+
# Collection batch objects are processed synchronously because they need to exist in the repository
|
42
|
+
# prior to the processing of any objects (e.g., Item, Component, Target) associated with them.
|
43
|
+
# If the Collection batch object does not process successfully, consider the batch finished (albeit unsuccessfully)
|
44
|
+
# and raise an exception.
|
45
|
+
unless ProcessBatchObject.new(batch_object_id: batch_object.id, operator: User.find(operator_id)).execute
|
46
|
+
ActiveSupport::Notifications.instrument('finished.batch.batch.ddr', batch_id: batch.id)
|
47
|
+
raise Ddr::Batch::BatchObjectProcessingError, batch_object.id
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def enqueue_item_component_ingest(batch_object)
|
52
|
+
batch_object_ids = [ batch_object.id ]
|
53
|
+
parent_rel_query = "object = '#{batch_object.pid}' AND name = '#{Ddr::Batch::BatchObjectRelationship::RELATIONSHIP_PARENT}'"
|
54
|
+
parent_rel_recs = Ddr::Batch::BatchObjectRelationship.where(parent_rel_query)
|
55
|
+
parent_rel_recs.each { |parent_rel_rec| batch_object_ids << parent_rel_rec.batch_object_id}
|
56
|
+
Resque.enqueue(BatchObjectsProcessorJob, batch_object_ids, operator_id)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module Ddr::Batch
|
2
|
+
class ProcessBatchObject
|
3
|
+
|
4
|
+
attr_reader :batch_object_id, :operator
|
5
|
+
|
6
|
+
def initialize(batch_object_id:, operator:)
|
7
|
+
@batch_object_id = batch_object_id
|
8
|
+
@operator = operator
|
9
|
+
end
|
10
|
+
|
11
|
+
def execute
|
12
|
+
ActiveSupport::Notifications.instrument("handled.batchobject.batch.ddr",
|
13
|
+
batch_object_id: batch_object_id) do |payload|
|
14
|
+
batch_object = BatchObject.find(batch_object_id)
|
15
|
+
# Mark batch object as 'handled'
|
16
|
+
batch_object.update!(handled: true)
|
17
|
+
# Validate batch object
|
18
|
+
errors = batch_object.validate
|
19
|
+
# Process batch object or record validation errors
|
20
|
+
if errors.empty?
|
21
|
+
process(batch_object, operator)
|
22
|
+
else
|
23
|
+
record_errors(batch_object, errors)
|
24
|
+
end
|
25
|
+
# return true if batch_object was processed; otherwise, false
|
26
|
+
batch_object.processed? ? true : false
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def process(batch_object, operator)
|
31
|
+
batch_object.update!(validated: true)
|
32
|
+
batch_object.process(operator)
|
33
|
+
batch_object.update!(processed: true)
|
34
|
+
results_message = batch_object.results_message
|
35
|
+
Ddr::Batch::BatchObjectMessage.create!(batch_object: batch_object,
|
36
|
+
level: results_message.level,
|
37
|
+
message: results_message.message)
|
38
|
+
end
|
39
|
+
|
40
|
+
def record_errors(batch_object, errors)
|
41
|
+
errors.each do |error|
|
42
|
+
Ddr::Batch::BatchObjectMessage.create!(batch_object: batch_object,
|
43
|
+
level: Logger::ERROR,
|
44
|
+
message: error)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Ddr::Batch
|
2
|
+
class ProcessBatchObjects
|
3
|
+
|
4
|
+
attr_reader :batch_object_ids, :operator
|
5
|
+
|
6
|
+
def initialize(batch_object_ids:, operator:)
|
7
|
+
@batch_object_ids = batch_object_ids
|
8
|
+
@operator = operator
|
9
|
+
end
|
10
|
+
|
11
|
+
def execute
|
12
|
+
# Assume successful processing of all batch objects until proven otherwise.
|
13
|
+
success = true
|
14
|
+
batch_object_ids.each do |batch_object_id|
|
15
|
+
# Once any batch object included in this job fails to process successfully, do not attempt to process
|
16
|
+
# any remaining batch objects included in this job. Instead, mark them as "handled" so the batch knows
|
17
|
+
# it's not waiting on them to be handled before it can consider itself "finished".
|
18
|
+
# The use case prompting this behavior is a job containing an Item ingest batch object plus one or more
|
19
|
+
# associated Component ingest batch objects. If the Item batch object fails to process correctly, we don't
|
20
|
+
# want to attempt to process the Component batch objects.
|
21
|
+
# In the preceding use case, we could skip the remaining batch objects only if the failed batch object is an
|
22
|
+
# Item but there might be future cases in which we don't want to process the remaining batch objects in the
|
23
|
+
# job regardless of which batch object fails. The failure of any batch object to process should be rare
|
24
|
+
# enough that it doesn't seem harmful to cover this potential broader use case in the current code.
|
25
|
+
if success
|
26
|
+
success = ProcessBatchObject.new(batch_object_id: batch_object_id, operator: operator).execute
|
27
|
+
else
|
28
|
+
batch_object = Ddr::Batch::BatchObject.find(batch_object_id)
|
29
|
+
batch_object.update!(handled: true)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,9 @@
|
|
1
|
+
##
|
2
|
+
## Subscriptions to ActiveSupport::Notifications instrumentation events
|
3
|
+
##
|
4
|
+
|
5
|
+
# Batch Processing events
|
6
|
+
ActiveSupport::Notifications.subscribe('started.batch.batch.ddr', Ddr::Batch::MonitorBatchStarted)
|
7
|
+
ActiveSupport::Notifications.subscribe('handled.batchobject.batch.ddr', Ddr::Batch::MonitorBatchObjectHandled)
|
8
|
+
ActiveSupport::Notifications.subscribe('finished.batch.batch.ddr', Ddr::Batch::MonitorBatchFinished)
|
9
|
+
|
data/config/locales/en.yml
CHANGED
@@ -2,7 +2,7 @@ en:
|
|
2
2
|
ddr:
|
3
3
|
batch:
|
4
4
|
errors:
|
5
|
-
prefix: "%{identifier}
|
5
|
+
prefix: "%{identifier}:"
|
6
6
|
no_batches: "No %{type} batches found for your user account."
|
7
7
|
web:
|
8
8
|
action_names:
|
@@ -49,4 +49,4 @@ en:
|
|
49
49
|
finished_batches:
|
50
50
|
label: "Already Run"
|
51
51
|
pending_batches:
|
52
|
-
label: "Pending"
|
52
|
+
label: "Pending"
|
@@ -0,0 +1,13 @@
|
|
1
|
+
class CreateBatchObjectMessages < ActiveRecord::Migration
|
2
|
+
def change
|
3
|
+
unless table_exists?(:batch_object_messages)
|
4
|
+
create_table :batch_object_messages do |t|
|
5
|
+
t.integer :batch_object_id
|
6
|
+
t.integer :level, default: Logger::DEBUG
|
7
|
+
t.text :message, limit: 65535
|
8
|
+
|
9
|
+
t.timestamps
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
data/lib/ddr/batch.rb
CHANGED
@@ -9,6 +9,12 @@ module Ddr
|
|
9
9
|
extend ActiveSupport::Autoload
|
10
10
|
|
11
11
|
autoload :BatchUser
|
12
|
+
autoload :BatchObjectProcessingError, 'ddr/batch/error'
|
13
|
+
|
14
|
+
# Logging level for batch processing - defaults to Logger::INFO
|
15
|
+
mattr_accessor :processor_logging_level do
|
16
|
+
Logger::INFO
|
17
|
+
end
|
12
18
|
|
13
19
|
def self.table_name_prefix
|
14
20
|
end
|
data/lib/ddr/batch/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ddr-batch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.0.rc1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jim Coble
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-
|
12
|
+
date: 2016-12-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rails
|
@@ -59,14 +59,14 @@ dependencies:
|
|
59
59
|
requirements:
|
60
60
|
- - "~>"
|
61
61
|
- !ruby/object:Gem::Version
|
62
|
-
version: '2.
|
62
|
+
version: '2.5'
|
63
63
|
type: :runtime
|
64
64
|
prerelease: false
|
65
65
|
version_requirements: !ruby/object:Gem::Requirement
|
66
66
|
requirements:
|
67
67
|
- - "~>"
|
68
68
|
- !ruby/object:Gem::Version
|
69
|
-
version: '2.
|
69
|
+
version: '2.5'
|
70
70
|
- !ruby/object:Gem::Dependency
|
71
71
|
name: log4r
|
72
72
|
requirement: !ruby/object:Gem::Requirement
|
@@ -175,6 +175,8 @@ files:
|
|
175
175
|
- LICENSE.txt
|
176
176
|
- README.md
|
177
177
|
- Rakefile
|
178
|
+
- app/jobs/ddr/batch/batch_deletion_job.rb
|
179
|
+
- app/jobs/ddr/batch/batch_objects_processor_job.rb
|
178
180
|
- app/jobs/ddr/batch/batch_processor_job.rb
|
179
181
|
- app/mailers/ddr/batch/batch_processor_run_mailer.rb
|
180
182
|
- app/models/ddr/batch/batch.rb
|
@@ -182,13 +184,22 @@ files:
|
|
182
184
|
- app/models/ddr/batch/batch_object.rb
|
183
185
|
- app/models/ddr/batch/batch_object_attribute.rb
|
184
186
|
- app/models/ddr/batch/batch_object_datastream.rb
|
187
|
+
- app/models/ddr/batch/batch_object_message.rb
|
185
188
|
- app/models/ddr/batch/batch_object_relationship.rb
|
186
189
|
- app/models/ddr/batch/batch_object_role.rb
|
190
|
+
- app/models/ddr/batch/error.rb
|
187
191
|
- app/models/ddr/batch/ingest_batch_object.rb
|
192
|
+
- app/models/ddr/batch/log.rb
|
188
193
|
- app/models/ddr/batch/update_batch_object.rb
|
189
|
-
- app/
|
194
|
+
- app/services/ddr/batch/monitor_batch_finished.rb
|
195
|
+
- app/services/ddr/batch/monitor_batch_object_handled.rb
|
196
|
+
- app/services/ddr/batch/monitor_batch_started.rb
|
197
|
+
- app/services/ddr/batch/process_batch.rb
|
198
|
+
- app/services/ddr/batch/process_batch_object.rb
|
199
|
+
- app/services/ddr/batch/process_batch_objects.rb
|
190
200
|
- app/views/ddr/batch/batch_processor_run_mailer/send_notification.html.erb
|
191
201
|
- app/views/ddr/batch/batch_processor_run_mailer/send_notification.text.erb
|
202
|
+
- config/initializers/subscriptions.rb
|
192
203
|
- config/locales/en.yml
|
193
204
|
- config/routes.rb
|
194
205
|
- db/migrate/20150828183839_create_batches.rb
|
@@ -197,6 +208,8 @@ files:
|
|
197
208
|
- db/migrate/20150828202200_create_batch_object_datastreams.rb
|
198
209
|
- db/migrate/20150828202240_create_batch_object_relationships.rb
|
199
210
|
- db/migrate/20160816164010_create_batch_object_roles.rb
|
211
|
+
- db/migrate/20161115191636_add_columns_to_batch_object.rb
|
212
|
+
- db/migrate/20161116142512_create_batch_object_messages.rb
|
200
213
|
- lib/ddr-batch.rb
|
201
214
|
- lib/ddr/batch.rb
|
202
215
|
- lib/ddr/batch/batch_user.rb
|
@@ -218,9 +231,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
218
231
|
version: '0'
|
219
232
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
220
233
|
requirements:
|
221
|
-
- - "
|
234
|
+
- - ">"
|
222
235
|
- !ruby/object:Gem::Version
|
223
|
-
version:
|
236
|
+
version: 1.3.1
|
224
237
|
requirements: []
|
225
238
|
rubyforge_project:
|
226
239
|
rubygems_version: 2.4.3
|
@@ -1,152 +0,0 @@
|
|
1
|
-
module Ddr::Batch
|
2
|
-
class BatchProcessor
|
3
|
-
|
4
|
-
LOG_CONFIG_FILEPATH = File.join(Rails.root, 'config', 'log4r_batch_processor.yml')
|
5
|
-
DEFAULT_LOG_DIR = File.join(Rails.root, 'log')
|
6
|
-
DEFAULT_LOG_FILE = "batch_processor_log.txt"
|
7
|
-
PASS = "PASS"
|
8
|
-
FAIL = "FAIL"
|
9
|
-
|
10
|
-
# Options
|
11
|
-
# :log_dir - optional - directory for log file - default is given in DEFAULT_LOG_DIR
|
12
|
-
# :log_file - optional - filename of log file - default is given in DEFAULT_LOG_FILE
|
13
|
-
# :skip_validation - optional - whether to skip batch object validation step when processing - default is false
|
14
|
-
# :ignore_validation_errors - optional - whether to continue processing even if batch object validation errors occur - default is false
|
15
|
-
def initialize(batch, operator=nil, opts={})
|
16
|
-
@batch = batch
|
17
|
-
@operator = operator
|
18
|
-
@bp_log_dir = opts.fetch(:log_dir, DEFAULT_LOG_DIR)
|
19
|
-
@bp_log_file = opts.fetch(:log_file, DEFAULT_LOG_FILE)
|
20
|
-
@skip_validation = opts.fetch(:skip_validation, false)
|
21
|
-
@ignore_validation_errors = opts.fetch(:ignore_validation_errors, false)
|
22
|
-
end
|
23
|
-
|
24
|
-
def execute
|
25
|
-
config_logger
|
26
|
-
if @batch
|
27
|
-
initiate_batch_run
|
28
|
-
unless @skip_validation
|
29
|
-
valid_batch = validate_batch
|
30
|
-
@batch.update_attributes(status: Batch::STATUS_INVALID) unless valid_batch
|
31
|
-
end
|
32
|
-
if @skip_validation || @ignore_validation_errors || valid_batch
|
33
|
-
process_batch
|
34
|
-
end
|
35
|
-
close_batch_run
|
36
|
-
end
|
37
|
-
save_logfile
|
38
|
-
send_notification if @batch.user && @batch.user.email
|
39
|
-
end
|
40
|
-
|
41
|
-
private
|
42
|
-
|
43
|
-
def validate_batch
|
44
|
-
@batch.update_attributes(status: Batch::STATUS_VALIDATING)
|
45
|
-
valid = true
|
46
|
-
errors = @batch.validate
|
47
|
-
unless errors.empty?
|
48
|
-
valid = false
|
49
|
-
errors.each do |error|
|
50
|
-
message = "Batch Object Validation Error: #{error}"
|
51
|
-
@bp_log.error(message)
|
52
|
-
end
|
53
|
-
end
|
54
|
-
@batch.update_attributes(status: Batch::STATUS_RUNNING)
|
55
|
-
return valid
|
56
|
-
end
|
57
|
-
|
58
|
-
def process_batch
|
59
|
-
@batch.update_attributes(status: Batch::STATUS_PROCESSING, processing_step_start: DateTime.now)
|
60
|
-
@batch.batch_objects.each do |object|
|
61
|
-
begin
|
62
|
-
process_object(object)
|
63
|
-
rescue Exception => e
|
64
|
-
@bp_log.error(e.backtrace)
|
65
|
-
break
|
66
|
-
end
|
67
|
-
sleep 2
|
68
|
-
end
|
69
|
-
@batch.update_attributes(status: Batch::STATUS_RUNNING) if @batch.status == Batch::STATUS_PROCESSING
|
70
|
-
end
|
71
|
-
|
72
|
-
def initiate_batch_run
|
73
|
-
@bp_log.info "Batch id: #{@batch.id}"
|
74
|
-
@bp_log.info "Batch name: #{@batch.name}" if @batch.name
|
75
|
-
@bp_log.info "Batch size: #{@batch.batch_objects.size}"
|
76
|
-
@batch.logfile.clear # clear out any attached logfile
|
77
|
-
@batch.update_attributes(:start => DateTime.now,
|
78
|
-
:status => Batch::STATUS_RUNNING,
|
79
|
-
:version => VERSION)
|
80
|
-
@failures = 0
|
81
|
-
@successes = 0
|
82
|
-
@results_tracker = Hash.new
|
83
|
-
end
|
84
|
-
|
85
|
-
def close_batch_run
|
86
|
-
@batch.reload
|
87
|
-
@batch.failure = @failures
|
88
|
-
@batch.outcome = @successes.eql?(@batch.batch_objects.size) ? Batch::OUTCOME_SUCCESS : Batch::OUTCOME_FAILURE
|
89
|
-
if @batch.status.eql?(Batch::STATUS_RUNNING)
|
90
|
-
@batch.status = Batch::STATUS_FINISHED
|
91
|
-
end
|
92
|
-
@batch.stop = DateTime.now
|
93
|
-
@batch.success = @successes
|
94
|
-
@batch.save
|
95
|
-
@bp_log.info "====== Summary ======"
|
96
|
-
@results_tracker.keys.each do |type|
|
97
|
-
verb = case type
|
98
|
-
when IngestBatchObject.name
|
99
|
-
"Ingested"
|
100
|
-
when UpdateBatchObject.name
|
101
|
-
"Updated"
|
102
|
-
end
|
103
|
-
@results_tracker[type].keys.each do |model|
|
104
|
-
@bp_log.info "#{verb} #{@results_tracker[type][model][:successes]} #{model}"
|
105
|
-
end
|
106
|
-
end
|
107
|
-
end
|
108
|
-
|
109
|
-
def update_results_tracker(type, model, verified)
|
110
|
-
@results_tracker[type] = Hash.new unless @results_tracker.has_key?(type)
|
111
|
-
@results_tracker[type][model] = Hash.new unless @results_tracker[type].has_key?(model)
|
112
|
-
@results_tracker[type][model][:successes] = 0 unless @results_tracker[type][model].has_key?(:successes)
|
113
|
-
@results_tracker[type][model][:successes] += 1 if verified
|
114
|
-
end
|
115
|
-
|
116
|
-
def process_object(object)
|
117
|
-
@bp_log.debug "Processing object: #{object.identifier}"
|
118
|
-
repository_object = object.process(@operator)
|
119
|
-
update_results_tracker(object.type, repository_object.present? ? repository_object.class.name : object.model, object.verified)
|
120
|
-
if object.verified
|
121
|
-
@successes += 1
|
122
|
-
else
|
123
|
-
@failures += 1
|
124
|
-
end
|
125
|
-
message = object.results_message
|
126
|
-
@bp_log.info(message)
|
127
|
-
end
|
128
|
-
|
129
|
-
def config_logger
|
130
|
-
logconfig = Log4r::YamlConfigurator
|
131
|
-
logconfig['LOG_FILE'] = File.join(@bp_log_dir, @bp_log_file)
|
132
|
-
logconfig.load_yaml_file File.join(LOG_CONFIG_FILEPATH)
|
133
|
-
@bp_log = Log4r::Logger['batch_processor']
|
134
|
-
end
|
135
|
-
|
136
|
-
def save_logfile
|
137
|
-
@bp_log.outputters.each do |outputter|
|
138
|
-
@logfilename = outputter.filename if outputter.respond_to?(:filename)
|
139
|
-
end
|
140
|
-
@batch.update!({ logfile: File.new(@logfilename) }) if @logfilename
|
141
|
-
end
|
142
|
-
|
143
|
-
def send_notification
|
144
|
-
begin
|
145
|
-
BatchProcessorRunMailer.send_notification(@batch).deliver!
|
146
|
-
rescue
|
147
|
-
puts "An error occurred while attempting to send the notification."
|
148
|
-
end
|
149
|
-
end
|
150
|
-
|
151
|
-
end
|
152
|
-
end
|