ddr-batch 1.1.0 → 1.2.0.rc1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/app/jobs/ddr/batch/batch_deletion_job.rb +19 -0
- data/app/jobs/ddr/batch/batch_objects_processor_job.rb +11 -0
- data/app/jobs/ddr/batch/batch_processor_job.rb +2 -7
- data/app/mailers/ddr/batch/batch_processor_run_mailer.rb +2 -2
- data/app/models/ddr/batch/batch.rb +13 -21
- data/app/models/ddr/batch/batch_object.rb +6 -4
- data/app/models/ddr/batch/batch_object_message.rb +8 -0
- data/app/models/ddr/batch/batch_object_relationship.rb +0 -1
- data/app/models/ddr/batch/error.rb +10 -0
- data/app/models/ddr/batch/ingest_batch_object.rb +6 -14
- data/app/models/ddr/batch/log.rb +29 -0
- data/app/models/ddr/batch/update_batch_object.rb +5 -7
- data/app/services/ddr/batch/monitor_batch_finished.rb +84 -0
- data/app/services/ddr/batch/monitor_batch_object_handled.rb +36 -0
- data/app/services/ddr/batch/monitor_batch_started.rb +42 -0
- data/app/services/ddr/batch/process_batch.rb +59 -0
- data/app/services/ddr/batch/process_batch_object.rb +48 -0
- data/app/services/ddr/batch/process_batch_objects.rb +35 -0
- data/config/initializers/subscriptions.rb +9 -0
- data/config/locales/en.yml +2 -2
- data/db/migrate/20161115191636_add_columns_to_batch_object.rb +9 -0
- data/db/migrate/20161116142512_create_batch_object_messages.rb +13 -0
- data/lib/ddr/batch.rb +6 -0
- data/lib/ddr/batch/version.rb +1 -1
- metadata +20 -7
- data/app/scripts/ddr/batch/batch_processor.rb +0 -152
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e99d993322b0feb42c08dd0a367aa2cbb9a8bd36
|
4
|
+
data.tar.gz: 7d3ea407d8f1cc09f33725c8c54cbbb934967a98
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8954284d5fed970fbaa7ee95626bafc28f7e906ba668d657d95279191555f842b6b5832943bc9034fdcf5bf699f27ad2d7c2d412d34cb4d89f6d9fff24bb1eb5
|
7
|
+
data.tar.gz: 2de0336399b640e5b3b85daa2ca65ebd1ac31f85e259907d80701d68198c8969e1dc155024ebe9a3bbe16b50cc61703677fe5cb2a9392d2306e2d5094b914bcc
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Ddr::Batch
|
2
|
+
class BatchDeletionJob
|
3
|
+
@queue = :batch
|
4
|
+
|
5
|
+
def self.perform(batch_id)
|
6
|
+
batch = Batch.find(batch_id)
|
7
|
+
batch.status = Batch::STATUS_DELETING
|
8
|
+
batch.save!
|
9
|
+
batch.destroy!
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.before_enqueue_set_status(batch_id)
|
13
|
+
batch = Batch.find(batch_id)
|
14
|
+
batch.status = Batch::STATUS_QUEUED_FOR_DELETION
|
15
|
+
batch.save
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
module Ddr::Batch
|
2
|
+
class BatchObjectsProcessorJob
|
3
|
+
@queue = :batch
|
4
|
+
|
5
|
+
def self.perform(batch_object_ids, operator_id)
|
6
|
+
operator = User.find(operator_id)
|
7
|
+
ProcessBatchObjects.new(batch_object_ids: batch_object_ids, operator: operator).execute
|
8
|
+
end
|
9
|
+
|
10
|
+
end
|
11
|
+
end
|
@@ -3,12 +3,7 @@ module Ddr::Batch
|
|
3
3
|
@queue = :batch
|
4
4
|
|
5
5
|
def self.perform(batch_id, operator_id)
|
6
|
-
|
7
|
-
logfile = "batch_processor_#{ts}_log.txt"
|
8
|
-
batch = Batch.find(batch_id)
|
9
|
-
operator = User.find(operator_id)
|
10
|
-
bp = BatchProcessor.new(batch, operator, log_file: logfile)
|
11
|
-
bp.execute
|
6
|
+
ProcessBatch.new(batch_id: batch_id, operator_id: operator_id).execute
|
12
7
|
end
|
13
8
|
|
14
9
|
def self.after_enqueue_set_status(batch_id, operator_id)
|
@@ -18,4 +13,4 @@ module Ddr::Batch
|
|
18
13
|
end
|
19
14
|
|
20
15
|
end
|
21
|
-
end
|
16
|
+
end
|
@@ -6,7 +6,7 @@ module Ddr::Batch
|
|
6
6
|
|
7
7
|
def send_notification(batch)
|
8
8
|
@batch = batch
|
9
|
-
@title = "Batch Processor Run #{@batch.status}"
|
9
|
+
@title = "Batch Processor Run #{@batch.status} #{@batch.outcome}"
|
10
10
|
@host = `uname -n`.strip
|
11
11
|
@subject = "[#{@host}] #{@title}"
|
12
12
|
from = "#{`echo $USER`.strip}@#{@host}"
|
@@ -16,4 +16,4 @@ module Ddr::Batch
|
|
16
16
|
|
17
17
|
end
|
18
18
|
|
19
|
-
end
|
19
|
+
end
|
@@ -19,30 +19,18 @@ module Ddr::Batch
|
|
19
19
|
STATUS_FINISHED = "FINISHED"
|
20
20
|
STATUS_INTERRUPTED = "INTERRUPTED"
|
21
21
|
STATUS_RESTARTABLE = "INTERRUPTED - RESTARTABLE"
|
22
|
+
STATUS_QUEUED_FOR_DELETION = "QUEUED FOR DELETION"
|
23
|
+
STATUS_DELETING = "DELETING"
|
22
24
|
|
23
|
-
def
|
24
|
-
|
25
|
-
begin
|
26
|
-
batch_objects.each do |object|
|
27
|
-
unless object.verified
|
28
|
-
errors << object.validate
|
29
|
-
end
|
30
|
-
end
|
31
|
-
rescue Exception => e
|
32
|
-
errors << "Exception raised during batch validation: #{e.backtrace}"
|
33
|
-
end
|
34
|
-
errors.flatten
|
35
|
-
end
|
36
|
-
|
37
|
-
def completed_count
|
38
|
-
batch_objects.where(verified: true).count
|
25
|
+
def handled_count
|
26
|
+
batch_objects.where(handled: true).count
|
39
27
|
end
|
40
28
|
|
41
29
|
def time_to_complete
|
42
|
-
unless
|
43
|
-
if
|
44
|
-
|
45
|
-
((Time.now -
|
30
|
+
unless start.nil?
|
31
|
+
if handled_count > 0
|
32
|
+
handled = handled_count
|
33
|
+
((Time.now - start.to_time) / handled) * (batch_objects.count - handled)
|
46
34
|
end
|
47
35
|
end
|
48
36
|
end
|
@@ -63,10 +51,14 @@ module Ddr::Batch
|
|
63
51
|
batch_objects.map{ |x| x.pid if x.pid.present? }.compact
|
64
52
|
end
|
65
53
|
|
54
|
+
def unhandled_objects?
|
55
|
+
batch_objects.any? { |batch_object| !batch_object.handled? }
|
56
|
+
end
|
57
|
+
|
66
58
|
def finished?
|
67
59
|
status == STATUS_FINISHED
|
68
60
|
end
|
69
61
|
|
70
62
|
end
|
71
63
|
|
72
|
-
end
|
64
|
+
end
|
@@ -7,6 +7,7 @@ module Ddr::Batch
|
|
7
7
|
belongs_to :batch, inverse_of: :batch_objects
|
8
8
|
has_many :batch_object_attributes, -> { order "id ASC" }, inverse_of: :batch_object, dependent: :destroy
|
9
9
|
has_many :batch_object_datastreams, inverse_of: :batch_object, dependent: :destroy
|
10
|
+
has_many :batch_object_messages, inverse_of: :batch_object, dependent: :destroy
|
10
11
|
has_many :batch_object_relationships, inverse_of: :batch_object, dependent: :destroy
|
11
12
|
has_many :batch_object_roles, inverse_of: :batch_object, dependent: :destroy
|
12
13
|
|
@@ -20,6 +21,8 @@ module Ddr::Batch
|
|
20
21
|
Model: %{model}
|
21
22
|
EOS
|
22
23
|
|
24
|
+
ProcessingResultsMessage = Struct.new(:level, :message)
|
25
|
+
|
23
26
|
def self.pid_from_identifier(identifier, batch_id)
|
24
27
|
query = "identifier = :identifier"
|
25
28
|
query << " and batch_id = :batch_id" if batch_id
|
@@ -112,12 +115,11 @@ module Ddr::Batch
|
|
112
115
|
obj_model = batch.found_pids[r[:object]]
|
113
116
|
else
|
114
117
|
begin
|
115
|
-
|
116
|
-
obj_model = obj.class.name
|
118
|
+
obj_model = SolrDocument.find(r[:object]).active_fedora_model
|
117
119
|
if batch.present?
|
118
|
-
batch.add_found_pid(
|
120
|
+
batch.add_found_pid(r[:object], obj_model)
|
119
121
|
end
|
120
|
-
rescue
|
122
|
+
rescue SolrDocument::NotFound
|
121
123
|
pid_in_batch = false
|
122
124
|
if batch.present?
|
123
125
|
if batch.pre_assigned_pids.include?(r[:object])
|
@@ -20,10 +20,11 @@ module Ddr::Batch
|
|
20
20
|
|
21
21
|
def results_message
|
22
22
|
if pid
|
23
|
-
|
24
|
-
|
23
|
+
message_level = verified ? Logger::INFO : Logger::WARN
|
24
|
+
verification_result = verified ? "Verified" : "VERIFICATION FAILURE"
|
25
|
+
ProcessingResultsMessage.new(message_level, "Ingested #{model} #{identifier} into #{pid}...#{verification_result}")
|
25
26
|
else
|
26
|
-
|
27
|
+
ProcessingResultsMessagemessage.new(Logger::ERROR, "Attempt to ingest #{model} #{identifier} FAILED")
|
27
28
|
end
|
28
29
|
end
|
29
30
|
|
@@ -95,30 +96,21 @@ module Ddr::Batch
|
|
95
96
|
begin
|
96
97
|
repo_object = model.constantize.new(:pid => repo_pid)
|
97
98
|
repo_object.label = label if label
|
98
|
-
repo_object.save(validate: false)
|
99
99
|
batch_object_attributes.each { |a| repo_object = add_attribute(repo_object, a) }
|
100
|
+
repo_object.save(validate: false)
|
100
101
|
batch_object_datastreams.each { |d| repo_object = populate_datastream(repo_object, d) }
|
101
102
|
batch_object_relationships.each { |r| repo_object = add_relationship(repo_object, r) }
|
102
103
|
batch_object_roles.each { |r| repo_object = add_role(repo_object, r) }
|
103
|
-
repo_object.save!
|
104
|
+
repo_object.save!
|
104
105
|
rescue Exception => e1
|
105
106
|
logger.fatal("Error in creating repository object #{repo_object.pid} for #{identifier} : #{e1}")
|
106
|
-
repo_clean = false
|
107
107
|
if repo_object && !repo_object.new_record?
|
108
108
|
begin
|
109
109
|
logger.info("Deleting potentially incomplete #{repo_object.pid} due to error in ingest batch processing")
|
110
110
|
repo_object.destroy
|
111
111
|
rescue Exception => e2
|
112
112
|
logger.fatal("Error deleting repository object #{repo_object.pid}: #{e2}")
|
113
|
-
else
|
114
|
-
repo_clean = true
|
115
113
|
end
|
116
|
-
else
|
117
|
-
repo_clean = true
|
118
|
-
end
|
119
|
-
if batch.present?
|
120
|
-
batch.status = repo_clean ? Batch::STATUS_RESTARTABLE : Batch::STATUS_INTERRUPTED
|
121
|
-
batch.save
|
122
114
|
end
|
123
115
|
raise e1
|
124
116
|
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Ddr::Batch
|
2
|
+
class Log
|
3
|
+
|
4
|
+
DEFAULT_LOG_DIR = File.join(Rails.root, 'log')
|
5
|
+
|
6
|
+
class << self
|
7
|
+
|
8
|
+
def logger(batch_id)
|
9
|
+
loggr = Logger.new(File.open(file_path(batch_id), File::WRONLY | File::APPEND | File::CREAT))
|
10
|
+
loggr.level = Ddr::Batch.processor_logging_level
|
11
|
+
loggr.datetime_format = "%Y-%m-%d %H:%M:%S.L"
|
12
|
+
loggr.formatter = proc do |severity, datetime, progname, msg|
|
13
|
+
"#{datetime} #{severity}: #{msg}\n"
|
14
|
+
end
|
15
|
+
loggr
|
16
|
+
end
|
17
|
+
|
18
|
+
def clear_log(batch_id)
|
19
|
+
log_file_path = file_path(batch_id)
|
20
|
+
FileUtils.remove(log_file_path) if File.exists?(log_file_path)
|
21
|
+
end
|
22
|
+
|
23
|
+
def file_path(batch_id)
|
24
|
+
File.join(DEFAULT_LOG_DIR, "batch_#{batch_id}_log.txt")
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -43,10 +43,11 @@ module Ddr::Batch
|
|
43
43
|
|
44
44
|
def results_message
|
45
45
|
if pid
|
46
|
-
|
47
|
-
|
46
|
+
message_level = verified ? Logger::INFO : Logger::WARN
|
47
|
+
verification_result = verified ? "Verified" : "VERIFICATION FAILURE"
|
48
|
+
ProcessingResultsMessage.new(message_level, "Updated #{pid}...#{verification_result}")
|
48
49
|
else
|
49
|
-
|
50
|
+
ProcessingResultsMessage.new(Logger::ERROR, "Attempt to update #{model} #{identifier} FAILED")
|
50
51
|
end
|
51
52
|
end
|
52
53
|
|
@@ -60,6 +61,7 @@ module Ddr::Batch
|
|
60
61
|
repo_object = nil
|
61
62
|
begin
|
62
63
|
repo_object = ActiveFedora::Base.find(pid)
|
64
|
+
update!(model: repo_object.class.name) unless model.present?
|
63
65
|
batch_object_attributes.each do |a|
|
64
66
|
repo_object = case
|
65
67
|
when a.operation.eql?(BatchObjectAttribute::OPERATION_ADD)
|
@@ -81,10 +83,6 @@ module Ddr::Batch
|
|
81
83
|
end
|
82
84
|
rescue Exception => e
|
83
85
|
logger.error("Error in updating repository object #{pid} for #{identifier} : : #{e}")
|
84
|
-
if batch.present?
|
85
|
-
batch.status = Batch::STATUS_RESTARTABLE
|
86
|
-
batch.save
|
87
|
-
end
|
88
86
|
raise e
|
89
87
|
end
|
90
88
|
repo_object
|
@@ -0,0 +1,84 @@
|
|
1
|
+
module Ddr::Batch
|
2
|
+
class MonitorBatchFinished
|
3
|
+
|
4
|
+
class << self
|
5
|
+
def call(*args)
|
6
|
+
event = ActiveSupport::Notifications::Event.new(*args)
|
7
|
+
batch = Ddr::Batch::Batch.find(event.payload[:batch_id])
|
8
|
+
batch_finished(batch)
|
9
|
+
end
|
10
|
+
|
11
|
+
private
|
12
|
+
|
13
|
+
def batch_finished(batch)
|
14
|
+
log_batch_finish(batch)
|
15
|
+
update_batch(batch)
|
16
|
+
send_notification(batch) if batch.user && batch.user.email
|
17
|
+
end
|
18
|
+
|
19
|
+
def log_batch_finish(batch)
|
20
|
+
logger = Ddr::Batch::Log.logger(batch.id)
|
21
|
+
logger.info "====== Summary ======"
|
22
|
+
results_tracker = results(batch)
|
23
|
+
results_tracker.keys.each do |type|
|
24
|
+
results_tracker[type].keys.each do |model|
|
25
|
+
log_result(results_tracker, type, model, logger)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
logger.close
|
29
|
+
end
|
30
|
+
|
31
|
+
def results(batch)
|
32
|
+
results_tracker = Hash.new
|
33
|
+
batch.batch_objects.each do |batch_object|
|
34
|
+
track_result(results_tracker, batch_object)
|
35
|
+
end
|
36
|
+
results_tracker
|
37
|
+
end
|
38
|
+
|
39
|
+
def track_result(results_tracker, batch_object)
|
40
|
+
type, model = [ batch_object.type, batch_object.model ]
|
41
|
+
results_tracker[type] = Hash.new unless results_tracker.has_key?(type)
|
42
|
+
results_tracker[type][model] = Hash.new unless results_tracker[type].has_key?(model)
|
43
|
+
results_tracker[type][model][:successes] = 0 unless results_tracker[type][model].has_key?(:successes)
|
44
|
+
results_tracker[type][model][:successes] += 1 if batch_object.verified
|
45
|
+
end
|
46
|
+
|
47
|
+
def log_result(results_tracker, type, model, logger)
|
48
|
+
verb = type_verb(type)
|
49
|
+
count = results_tracker[type][model][:successes]
|
50
|
+
logger.info "#{verb} #{ActionController::Base.helpers.pluralize(count, model)}"
|
51
|
+
end
|
52
|
+
|
53
|
+
def type_verb(type)
|
54
|
+
case type
|
55
|
+
when IngestBatchObject.name
|
56
|
+
"Ingested"
|
57
|
+
when UpdateBatchObject.name
|
58
|
+
"Updated"
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def update_batch(batch)
|
63
|
+
outcome = batch.success.eql?(batch.batch_objects.size) ? Batch::OUTCOME_SUCCESS : Batch::OUTCOME_FAILURE
|
64
|
+
logfile = File.new(Ddr::Batch::Log.file_path(batch.id))
|
65
|
+
batch.update!(stop: DateTime.now,
|
66
|
+
status: Batch::STATUS_FINISHED,
|
67
|
+
outcome: outcome,
|
68
|
+
logfile: logfile)
|
69
|
+
end
|
70
|
+
|
71
|
+
def send_notification(batch)
|
72
|
+
begin
|
73
|
+
BatchProcessorRunMailer.send_notification(batch).deliver!
|
74
|
+
rescue
|
75
|
+
Rails.logger.error("An error occurred while attempting to send a notification for batch #{batch.id}")
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
|
84
|
+
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Ddr::Batch
|
2
|
+
class MonitorBatchObjectHandled
|
3
|
+
|
4
|
+
class << self
|
5
|
+
def call(*args)
|
6
|
+
event = ActiveSupport::Notifications::Event.new(*args)
|
7
|
+
batch_object = BatchObject.find(event.payload[:batch_object_id])
|
8
|
+
batch = batch_object.batch
|
9
|
+
batch_object_handled(batch_object, batch)
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def batch_object_handled(batch_object, batch)
|
15
|
+
log_batch_object_messages(batch_object, batch.id)
|
16
|
+
update_batch(batch_object, batch)
|
17
|
+
unless batch.unhandled_objects?
|
18
|
+
ActiveSupport::Notifications.instrument('finished.batch.batch.ddr', batch_id: batch.id)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def log_batch_object_messages(batch_object, batch_id)
|
23
|
+
logger = Ddr::Batch::Log.logger(batch_id)
|
24
|
+
batch_object.batch_object_messages.each do |message|
|
25
|
+
logger.add(message.level) { "Batch Object #{batch_object.id}: #{message.message}" }
|
26
|
+
end
|
27
|
+
logger.close
|
28
|
+
end
|
29
|
+
|
30
|
+
def update_batch(batch_object, batch)
|
31
|
+
batch_object.verified? ? batch.update!(success: batch.success + 1) : batch.update!(failure: batch.failure + 1)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module Ddr::Batch
|
2
|
+
class MonitorBatchStarted
|
3
|
+
|
4
|
+
class << self
|
5
|
+
def call(*args)
|
6
|
+
event = ActiveSupport::Notifications::Event.new(*args)
|
7
|
+
batch = Ddr::Batch::Batch.find(event.payload[:batch_id])
|
8
|
+
batch_started(batch)
|
9
|
+
end
|
10
|
+
|
11
|
+
private
|
12
|
+
|
13
|
+
def batch_started(batch)
|
14
|
+
clear_logs(batch)
|
15
|
+
log_batch_start(batch)
|
16
|
+
update_batch(batch)
|
17
|
+
end
|
18
|
+
|
19
|
+
def clear_logs(batch)
|
20
|
+
# delete any previously existing filesystem log file for this batch
|
21
|
+
Ddr::Batch::Log.clear_log(batch.id)
|
22
|
+
# remove any existing attached log file from the Batch ActiveRecord object
|
23
|
+
batch.logfile.clear
|
24
|
+
end
|
25
|
+
|
26
|
+
def log_batch_start(batch)
|
27
|
+
logger = Ddr::Batch::Log.logger(batch.id)
|
28
|
+
logger.info "Batch id: #{batch.id}"
|
29
|
+
logger.info "Batch name: #{batch.name}" if name
|
30
|
+
logger.info "Batch size: #{batch.batch_objects.size}"
|
31
|
+
logger.close
|
32
|
+
end
|
33
|
+
|
34
|
+
def update_batch(batch)
|
35
|
+
batch.update!(start: DateTime.now,
|
36
|
+
status: Ddr::Batch::Batch::STATUS_RUNNING,
|
37
|
+
version: VERSION)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
module Ddr::Batch
|
2
|
+
class ProcessBatch
|
3
|
+
|
4
|
+
attr_accessor :batch, :operator_id
|
5
|
+
|
6
|
+
def initialize(batch_id:, operator_id:)
|
7
|
+
@batch = Ddr::Batch::Batch.find(batch_id)
|
8
|
+
@operator_id = operator_id
|
9
|
+
end
|
10
|
+
|
11
|
+
def execute
|
12
|
+
ActiveSupport::Notifications.instrument('started.batch.batch.ddr', batch_id: batch.id)
|
13
|
+
batch.batch_objects.each do |batch_object|
|
14
|
+
case
|
15
|
+
when batch_object.is_a?(IngestBatchObject)
|
16
|
+
handle_ingest_batch_object(batch_object)
|
17
|
+
when batch_object.is_a?(UpdateBatchObject)
|
18
|
+
handle_update_batch_object(batch_object)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def handle_ingest_batch_object(batch_object)
|
24
|
+
case batch_object.model
|
25
|
+
when 'Collection'
|
26
|
+
ingest_collection_object(batch_object)
|
27
|
+
when 'Item'
|
28
|
+
enqueue_item_component_ingest(batch_object)
|
29
|
+
when 'Component'
|
30
|
+
# skip -- will be handled along with associated Item
|
31
|
+
when 'Target', 'Attachment'
|
32
|
+
Resque.enqueue(BatchObjectsProcessorJob, [ batch_object.id ], operator_id)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def handle_update_batch_object(batch_object)
|
37
|
+
Resque.enqueue(BatchObjectsProcessorJob, [ batch_object.id ], operator_id)
|
38
|
+
end
|
39
|
+
|
40
|
+
def ingest_collection_object(batch_object)
|
41
|
+
# Collection batch objects are processed synchronously because they need to exist in the repository
|
42
|
+
# prior to the processing of any objects (e.g., Item, Component, Target) associated with them.
|
43
|
+
# If the Collection batch object does not process successfully, consider the batch finished (albeit unsuccessfully)
|
44
|
+
# and raise an exception.
|
45
|
+
unless ProcessBatchObject.new(batch_object_id: batch_object.id, operator: User.find(operator_id)).execute
|
46
|
+
ActiveSupport::Notifications.instrument('finished.batch.batch.ddr', batch_id: batch.id)
|
47
|
+
raise Ddr::Batch::BatchObjectProcessingError, batch_object.id
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def enqueue_item_component_ingest(batch_object)
|
52
|
+
batch_object_ids = [ batch_object.id ]
|
53
|
+
parent_rel_query = "object = '#{batch_object.pid}' AND name = '#{Ddr::Batch::BatchObjectRelationship::RELATIONSHIP_PARENT}'"
|
54
|
+
parent_rel_recs = Ddr::Batch::BatchObjectRelationship.where(parent_rel_query)
|
55
|
+
parent_rel_recs.each { |parent_rel_rec| batch_object_ids << parent_rel_rec.batch_object_id}
|
56
|
+
Resque.enqueue(BatchObjectsProcessorJob, batch_object_ids, operator_id)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module Ddr::Batch
|
2
|
+
class ProcessBatchObject
|
3
|
+
|
4
|
+
attr_reader :batch_object_id, :operator
|
5
|
+
|
6
|
+
def initialize(batch_object_id:, operator:)
|
7
|
+
@batch_object_id = batch_object_id
|
8
|
+
@operator = operator
|
9
|
+
end
|
10
|
+
|
11
|
+
def execute
|
12
|
+
ActiveSupport::Notifications.instrument("handled.batchobject.batch.ddr",
|
13
|
+
batch_object_id: batch_object_id) do |payload|
|
14
|
+
batch_object = BatchObject.find(batch_object_id)
|
15
|
+
# Mark batch object as 'handled'
|
16
|
+
batch_object.update!(handled: true)
|
17
|
+
# Validate batch object
|
18
|
+
errors = batch_object.validate
|
19
|
+
# Process batch object or record validation errors
|
20
|
+
if errors.empty?
|
21
|
+
process(batch_object, operator)
|
22
|
+
else
|
23
|
+
record_errors(batch_object, errors)
|
24
|
+
end
|
25
|
+
# return true if batch_object was processed; otherwise, false
|
26
|
+
batch_object.processed? ? true : false
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def process(batch_object, operator)
|
31
|
+
batch_object.update!(validated: true)
|
32
|
+
batch_object.process(operator)
|
33
|
+
batch_object.update!(processed: true)
|
34
|
+
results_message = batch_object.results_message
|
35
|
+
Ddr::Batch::BatchObjectMessage.create!(batch_object: batch_object,
|
36
|
+
level: results_message.level,
|
37
|
+
message: results_message.message)
|
38
|
+
end
|
39
|
+
|
40
|
+
def record_errors(batch_object, errors)
|
41
|
+
errors.each do |error|
|
42
|
+
Ddr::Batch::BatchObjectMessage.create!(batch_object: batch_object,
|
43
|
+
level: Logger::ERROR,
|
44
|
+
message: error)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Ddr::Batch
|
2
|
+
class ProcessBatchObjects
|
3
|
+
|
4
|
+
attr_reader :batch_object_ids, :operator
|
5
|
+
|
6
|
+
def initialize(batch_object_ids:, operator:)
|
7
|
+
@batch_object_ids = batch_object_ids
|
8
|
+
@operator = operator
|
9
|
+
end
|
10
|
+
|
11
|
+
def execute
|
12
|
+
# Assume successful processing of all batch objects until proven otherwise.
|
13
|
+
success = true
|
14
|
+
batch_object_ids.each do |batch_object_id|
|
15
|
+
# Once any batch object included in this job fails to process successfully, do not attempt to process
|
16
|
+
# any remaining batch objects included in this job. Instead, mark them as "handled" so the batch knows
|
17
|
+
# it's not waiting on them to be handled before it can consider itself "finished".
|
18
|
+
# The use case prompting this behavior is a job containing an Item ingest batch object plus one or more
|
19
|
+
# associated Component ingest batch objects. If the Item batch object fails to process correctly, we don't
|
20
|
+
# want to attempt to process the Component batch objects.
|
21
|
+
# In the preceding use case, we could skip the remaining batch objects only if the failed batch object is an
|
22
|
+
# Item but there might be future cases in which we don't want to process the remaining batch objects in the
|
23
|
+
# job regardless of which batch object fails. The failure of any batch object to process should be rare
|
24
|
+
# enough that it doesn't seem harmful to cover this potential broader use case in the current code.
|
25
|
+
if success
|
26
|
+
success = ProcessBatchObject.new(batch_object_id: batch_object_id, operator: operator).execute
|
27
|
+
else
|
28
|
+
batch_object = Ddr::Batch::BatchObject.find(batch_object_id)
|
29
|
+
batch_object.update!(handled: true)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,9 @@
|
|
1
|
+
##
|
2
|
+
## Subscriptions to ActiveSupport::Notifications instrumentation events
|
3
|
+
##
|
4
|
+
|
5
|
+
# Batch Processing events
|
6
|
+
ActiveSupport::Notifications.subscribe('started.batch.batch.ddr', Ddr::Batch::MonitorBatchStarted)
|
7
|
+
ActiveSupport::Notifications.subscribe('handled.batchobject.batch.ddr', Ddr::Batch::MonitorBatchObjectHandled)
|
8
|
+
ActiveSupport::Notifications.subscribe('finished.batch.batch.ddr', Ddr::Batch::MonitorBatchFinished)
|
9
|
+
|
data/config/locales/en.yml
CHANGED
@@ -2,7 +2,7 @@ en:
|
|
2
2
|
ddr:
|
3
3
|
batch:
|
4
4
|
errors:
|
5
|
-
prefix: "%{identifier}
|
5
|
+
prefix: "%{identifier}:"
|
6
6
|
no_batches: "No %{type} batches found for your user account."
|
7
7
|
web:
|
8
8
|
action_names:
|
@@ -49,4 +49,4 @@ en:
|
|
49
49
|
finished_batches:
|
50
50
|
label: "Already Run"
|
51
51
|
pending_batches:
|
52
|
-
label: "Pending"
|
52
|
+
label: "Pending"
|
@@ -0,0 +1,13 @@
|
|
1
|
+
class CreateBatchObjectMessages < ActiveRecord::Migration
|
2
|
+
def change
|
3
|
+
unless table_exists?(:batch_object_messages)
|
4
|
+
create_table :batch_object_messages do |t|
|
5
|
+
t.integer :batch_object_id
|
6
|
+
t.integer :level, default: Logger::DEBUG
|
7
|
+
t.text :message, limit: 65535
|
8
|
+
|
9
|
+
t.timestamps
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
data/lib/ddr/batch.rb
CHANGED
@@ -9,6 +9,12 @@ module Ddr
|
|
9
9
|
extend ActiveSupport::Autoload
|
10
10
|
|
11
11
|
autoload :BatchUser
|
12
|
+
autoload :BatchObjectProcessingError, 'ddr/batch/error'
|
13
|
+
|
14
|
+
# Logging level for batch processing - defaults to Logger::INFO
|
15
|
+
mattr_accessor :processor_logging_level do
|
16
|
+
Logger::INFO
|
17
|
+
end
|
12
18
|
|
13
19
|
def self.table_name_prefix
|
14
20
|
end
|
data/lib/ddr/batch/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ddr-batch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.0.rc1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jim Coble
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-
|
12
|
+
date: 2016-12-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rails
|
@@ -59,14 +59,14 @@ dependencies:
|
|
59
59
|
requirements:
|
60
60
|
- - "~>"
|
61
61
|
- !ruby/object:Gem::Version
|
62
|
-
version: '2.
|
62
|
+
version: '2.5'
|
63
63
|
type: :runtime
|
64
64
|
prerelease: false
|
65
65
|
version_requirements: !ruby/object:Gem::Requirement
|
66
66
|
requirements:
|
67
67
|
- - "~>"
|
68
68
|
- !ruby/object:Gem::Version
|
69
|
-
version: '2.
|
69
|
+
version: '2.5'
|
70
70
|
- !ruby/object:Gem::Dependency
|
71
71
|
name: log4r
|
72
72
|
requirement: !ruby/object:Gem::Requirement
|
@@ -175,6 +175,8 @@ files:
|
|
175
175
|
- LICENSE.txt
|
176
176
|
- README.md
|
177
177
|
- Rakefile
|
178
|
+
- app/jobs/ddr/batch/batch_deletion_job.rb
|
179
|
+
- app/jobs/ddr/batch/batch_objects_processor_job.rb
|
178
180
|
- app/jobs/ddr/batch/batch_processor_job.rb
|
179
181
|
- app/mailers/ddr/batch/batch_processor_run_mailer.rb
|
180
182
|
- app/models/ddr/batch/batch.rb
|
@@ -182,13 +184,22 @@ files:
|
|
182
184
|
- app/models/ddr/batch/batch_object.rb
|
183
185
|
- app/models/ddr/batch/batch_object_attribute.rb
|
184
186
|
- app/models/ddr/batch/batch_object_datastream.rb
|
187
|
+
- app/models/ddr/batch/batch_object_message.rb
|
185
188
|
- app/models/ddr/batch/batch_object_relationship.rb
|
186
189
|
- app/models/ddr/batch/batch_object_role.rb
|
190
|
+
- app/models/ddr/batch/error.rb
|
187
191
|
- app/models/ddr/batch/ingest_batch_object.rb
|
192
|
+
- app/models/ddr/batch/log.rb
|
188
193
|
- app/models/ddr/batch/update_batch_object.rb
|
189
|
-
- app/
|
194
|
+
- app/services/ddr/batch/monitor_batch_finished.rb
|
195
|
+
- app/services/ddr/batch/monitor_batch_object_handled.rb
|
196
|
+
- app/services/ddr/batch/monitor_batch_started.rb
|
197
|
+
- app/services/ddr/batch/process_batch.rb
|
198
|
+
- app/services/ddr/batch/process_batch_object.rb
|
199
|
+
- app/services/ddr/batch/process_batch_objects.rb
|
190
200
|
- app/views/ddr/batch/batch_processor_run_mailer/send_notification.html.erb
|
191
201
|
- app/views/ddr/batch/batch_processor_run_mailer/send_notification.text.erb
|
202
|
+
- config/initializers/subscriptions.rb
|
192
203
|
- config/locales/en.yml
|
193
204
|
- config/routes.rb
|
194
205
|
- db/migrate/20150828183839_create_batches.rb
|
@@ -197,6 +208,8 @@ files:
|
|
197
208
|
- db/migrate/20150828202200_create_batch_object_datastreams.rb
|
198
209
|
- db/migrate/20150828202240_create_batch_object_relationships.rb
|
199
210
|
- db/migrate/20160816164010_create_batch_object_roles.rb
|
211
|
+
- db/migrate/20161115191636_add_columns_to_batch_object.rb
|
212
|
+
- db/migrate/20161116142512_create_batch_object_messages.rb
|
200
213
|
- lib/ddr-batch.rb
|
201
214
|
- lib/ddr/batch.rb
|
202
215
|
- lib/ddr/batch/batch_user.rb
|
@@ -218,9 +231,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
218
231
|
version: '0'
|
219
232
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
220
233
|
requirements:
|
221
|
-
- - "
|
234
|
+
- - ">"
|
222
235
|
- !ruby/object:Gem::Version
|
223
|
-
version:
|
236
|
+
version: 1.3.1
|
224
237
|
requirements: []
|
225
238
|
rubyforge_project:
|
226
239
|
rubygems_version: 2.4.3
|
@@ -1,152 +0,0 @@
|
|
1
|
-
module Ddr::Batch
|
2
|
-
class BatchProcessor
|
3
|
-
|
4
|
-
LOG_CONFIG_FILEPATH = File.join(Rails.root, 'config', 'log4r_batch_processor.yml')
|
5
|
-
DEFAULT_LOG_DIR = File.join(Rails.root, 'log')
|
6
|
-
DEFAULT_LOG_FILE = "batch_processor_log.txt"
|
7
|
-
PASS = "PASS"
|
8
|
-
FAIL = "FAIL"
|
9
|
-
|
10
|
-
# Options
|
11
|
-
# :log_dir - optional - directory for log file - default is given in DEFAULT_LOG_DIR
|
12
|
-
# :log_file - optional - filename of log file - default is given in DEFAULT_LOG_FILE
|
13
|
-
# :skip_validation - optional - whether to skip batch object validation step when processing - default is false
|
14
|
-
# :ignore_validation_errors - optional - whether to continue processing even if batch object validation errors occur - default is false
|
15
|
-
def initialize(batch, operator=nil, opts={})
|
16
|
-
@batch = batch
|
17
|
-
@operator = operator
|
18
|
-
@bp_log_dir = opts.fetch(:log_dir, DEFAULT_LOG_DIR)
|
19
|
-
@bp_log_file = opts.fetch(:log_file, DEFAULT_LOG_FILE)
|
20
|
-
@skip_validation = opts.fetch(:skip_validation, false)
|
21
|
-
@ignore_validation_errors = opts.fetch(:ignore_validation_errors, false)
|
22
|
-
end
|
23
|
-
|
24
|
-
def execute
|
25
|
-
config_logger
|
26
|
-
if @batch
|
27
|
-
initiate_batch_run
|
28
|
-
unless @skip_validation
|
29
|
-
valid_batch = validate_batch
|
30
|
-
@batch.update_attributes(status: Batch::STATUS_INVALID) unless valid_batch
|
31
|
-
end
|
32
|
-
if @skip_validation || @ignore_validation_errors || valid_batch
|
33
|
-
process_batch
|
34
|
-
end
|
35
|
-
close_batch_run
|
36
|
-
end
|
37
|
-
save_logfile
|
38
|
-
send_notification if @batch.user && @batch.user.email
|
39
|
-
end
|
40
|
-
|
41
|
-
private
|
42
|
-
|
43
|
-
def validate_batch
|
44
|
-
@batch.update_attributes(status: Batch::STATUS_VALIDATING)
|
45
|
-
valid = true
|
46
|
-
errors = @batch.validate
|
47
|
-
unless errors.empty?
|
48
|
-
valid = false
|
49
|
-
errors.each do |error|
|
50
|
-
message = "Batch Object Validation Error: #{error}"
|
51
|
-
@bp_log.error(message)
|
52
|
-
end
|
53
|
-
end
|
54
|
-
@batch.update_attributes(status: Batch::STATUS_RUNNING)
|
55
|
-
return valid
|
56
|
-
end
|
57
|
-
|
58
|
-
def process_batch
|
59
|
-
@batch.update_attributes(status: Batch::STATUS_PROCESSING, processing_step_start: DateTime.now)
|
60
|
-
@batch.batch_objects.each do |object|
|
61
|
-
begin
|
62
|
-
process_object(object)
|
63
|
-
rescue Exception => e
|
64
|
-
@bp_log.error(e.backtrace)
|
65
|
-
break
|
66
|
-
end
|
67
|
-
sleep 2
|
68
|
-
end
|
69
|
-
@batch.update_attributes(status: Batch::STATUS_RUNNING) if @batch.status == Batch::STATUS_PROCESSING
|
70
|
-
end
|
71
|
-
|
72
|
-
def initiate_batch_run
|
73
|
-
@bp_log.info "Batch id: #{@batch.id}"
|
74
|
-
@bp_log.info "Batch name: #{@batch.name}" if @batch.name
|
75
|
-
@bp_log.info "Batch size: #{@batch.batch_objects.size}"
|
76
|
-
@batch.logfile.clear # clear out any attached logfile
|
77
|
-
@batch.update_attributes(:start => DateTime.now,
|
78
|
-
:status => Batch::STATUS_RUNNING,
|
79
|
-
:version => VERSION)
|
80
|
-
@failures = 0
|
81
|
-
@successes = 0
|
82
|
-
@results_tracker = Hash.new
|
83
|
-
end
|
84
|
-
|
85
|
-
def close_batch_run
|
86
|
-
@batch.reload
|
87
|
-
@batch.failure = @failures
|
88
|
-
@batch.outcome = @successes.eql?(@batch.batch_objects.size) ? Batch::OUTCOME_SUCCESS : Batch::OUTCOME_FAILURE
|
89
|
-
if @batch.status.eql?(Batch::STATUS_RUNNING)
|
90
|
-
@batch.status = Batch::STATUS_FINISHED
|
91
|
-
end
|
92
|
-
@batch.stop = DateTime.now
|
93
|
-
@batch.success = @successes
|
94
|
-
@batch.save
|
95
|
-
@bp_log.info "====== Summary ======"
|
96
|
-
@results_tracker.keys.each do |type|
|
97
|
-
verb = case type
|
98
|
-
when IngestBatchObject.name
|
99
|
-
"Ingested"
|
100
|
-
when UpdateBatchObject.name
|
101
|
-
"Updated"
|
102
|
-
end
|
103
|
-
@results_tracker[type].keys.each do |model|
|
104
|
-
@bp_log.info "#{verb} #{@results_tracker[type][model][:successes]} #{model}"
|
105
|
-
end
|
106
|
-
end
|
107
|
-
end
|
108
|
-
|
109
|
-
def update_results_tracker(type, model, verified)
|
110
|
-
@results_tracker[type] = Hash.new unless @results_tracker.has_key?(type)
|
111
|
-
@results_tracker[type][model] = Hash.new unless @results_tracker[type].has_key?(model)
|
112
|
-
@results_tracker[type][model][:successes] = 0 unless @results_tracker[type][model].has_key?(:successes)
|
113
|
-
@results_tracker[type][model][:successes] += 1 if verified
|
114
|
-
end
|
115
|
-
|
116
|
-
def process_object(object)
|
117
|
-
@bp_log.debug "Processing object: #{object.identifier}"
|
118
|
-
repository_object = object.process(@operator)
|
119
|
-
update_results_tracker(object.type, repository_object.present? ? repository_object.class.name : object.model, object.verified)
|
120
|
-
if object.verified
|
121
|
-
@successes += 1
|
122
|
-
else
|
123
|
-
@failures += 1
|
124
|
-
end
|
125
|
-
message = object.results_message
|
126
|
-
@bp_log.info(message)
|
127
|
-
end
|
128
|
-
|
129
|
-
def config_logger
|
130
|
-
logconfig = Log4r::YamlConfigurator
|
131
|
-
logconfig['LOG_FILE'] = File.join(@bp_log_dir, @bp_log_file)
|
132
|
-
logconfig.load_yaml_file File.join(LOG_CONFIG_FILEPATH)
|
133
|
-
@bp_log = Log4r::Logger['batch_processor']
|
134
|
-
end
|
135
|
-
|
136
|
-
def save_logfile
|
137
|
-
@bp_log.outputters.each do |outputter|
|
138
|
-
@logfilename = outputter.filename if outputter.respond_to?(:filename)
|
139
|
-
end
|
140
|
-
@batch.update!({ logfile: File.new(@logfilename) }) if @logfilename
|
141
|
-
end
|
142
|
-
|
143
|
-
def send_notification
|
144
|
-
begin
|
145
|
-
BatchProcessorRunMailer.send_notification(@batch).deliver!
|
146
|
-
rescue
|
147
|
-
puts "An error occurred while attempting to send the notification."
|
148
|
-
end
|
149
|
-
end
|
150
|
-
|
151
|
-
end
|
152
|
-
end
|