ddr-batch 1.0.0.rc1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (31) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +12 -0
  3. data/README.md +100 -0
  4. data/Rakefile +36 -0
  5. data/app/jobs/ddr/batch/batch_processor_job.rb +21 -0
  6. data/app/mailers/ddr/batch/batch_processor_run_mailer.rb +19 -0
  7. data/app/models/ddr/batch/batch.rb +72 -0
  8. data/app/models/ddr/batch/batch_ability_definitions.rb +14 -0
  9. data/app/models/ddr/batch/batch_object.rb +297 -0
  10. data/app/models/ddr/batch/batch_object_attribute.rb +57 -0
  11. data/app/models/ddr/batch/batch_object_datastream.rb +23 -0
  12. data/app/models/ddr/batch/batch_object_relationship.rb +26 -0
  13. data/app/models/ddr/batch/ingest_batch_object.rb +118 -0
  14. data/app/models/ddr/batch/update_batch_object.rb +94 -0
  15. data/app/scripts/ddr/batch/batch_processor.rb +151 -0
  16. data/app/views/ddr/batch/batch_processor_run_mailer/send_notification.html.erb +34 -0
  17. data/app/views/ddr/batch/batch_processor_run_mailer/send_notification.text.erb +20 -0
  18. data/config/locales/en.yml +52 -0
  19. data/config/routes.rb +2 -0
  20. data/db/migrate/20150828183839_create_batches.rb +25 -0
  21. data/db/migrate/20150828201857_create_batch_objects.rb +18 -0
  22. data/db/migrate/20150828202118_create_batch_object_attributes.rb +16 -0
  23. data/db/migrate/20150828202200_create_batch_object_datastreams.rb +17 -0
  24. data/db/migrate/20150828202240_create_batch_object_relationships.rb +15 -0
  25. data/lib/ddr-batch.rb +1 -0
  26. data/lib/ddr/batch.rb +17 -0
  27. data/lib/ddr/batch/batch_user.rb +10 -0
  28. data/lib/ddr/batch/engine.rb +14 -0
  29. data/lib/ddr/batch/version.rb +5 -0
  30. data/lib/tasks/ddr_batch_tasks.rake +4 -0
  31. metadata +228 -0
@@ -0,0 +1,57 @@
1
+ module Ddr::Batch
2
+
3
+ class BatchObjectAttribute < ActiveRecord::Base
4
+ belongs_to :batch_object, :inverse_of => :batch_object_attributes
5
+
6
+ OPERATION_ADD = "ADD" # Add the provided value to the attribute
7
+ OPERATION_DELETE = "DELETE" # Delete the provided value from the attribute
8
+ OPERATION_CLEAR = "CLEAR" # Clear all values from the attribute
9
+ OPERATION_CLEAR_ALL = "CLEAR_ALL" # Clear all attributes in the datastream
10
+
11
+ OPERATIONS = [ OPERATION_ADD, OPERATION_DELETE, OPERATION_CLEAR, OPERATION_CLEAR_ALL ]
12
+
13
+ VALUE_TYPE_STRING = "STRING"
14
+
15
+ VALUE_TYPES = [ VALUE_TYPE_STRING ]
16
+
17
+ validates :operation, inclusion: { in: OPERATIONS }
18
+ validates :datastream, presence: true
19
+ with_options if: :operation_requires_name? do |obj|
20
+ obj.validates :name, presence: true
21
+ end
22
+ validate :valid_datastream_and_attribute_name, if: [ 'batch_object.model', 'datastream', 'name' ]
23
+ with_options if: :operation_requires_value? do |obj|
24
+ obj.validates :value, presence: true
25
+ obj.validates :value_type, inclusion: { in: VALUE_TYPES }
26
+ end
27
+
28
+ def operation_requires_name?
29
+ [ OPERATION_ADD, OPERATION_DELETE, OPERATION_CLEAR ].include? operation
30
+ end
31
+
32
+ def operation_requires_value?
33
+ [ OPERATION_ADD, OPERATION_DELETE ].include? operation
34
+ end
35
+
36
+ def valid_datastream_and_attribute_name
37
+ if datastream_valid?
38
+ errors.add(:name, "is not valid") unless attribute_name_valid?
39
+ else
40
+ errors.add(:datastream, "is not valid")
41
+ end
42
+ end
43
+
44
+ def datastream_type
45
+ batch_object.model.constantize.ds_specs[datastream][:type] rescue nil
46
+ end
47
+
48
+ def datastream_valid?
49
+ datastream_type < ActiveFedora::RDFDatastream rescue false
50
+ end
51
+
52
+ def attribute_name_valid?
53
+ datastream_type.term_names.include?(name.to_sym)
54
+ end
55
+
56
+ end
57
+ end
@@ -0,0 +1,23 @@
1
+ module Ddr::Batch
2
+
3
+ class BatchObjectDatastream < ActiveRecord::Base
4
+
5
+ belongs_to :batch_object, :inverse_of => :batch_object_datastreams
6
+
7
+ DATASTREAMS = [ Ddr::Datastreams::CONTENT,
8
+ Ddr::Datastreams::DESC_METADATA,
9
+ Ddr::Datastreams::RIGHTS_METADATA,
10
+ Ddr::Datastreams::STRUCT_METADATA ]
11
+
12
+ OPERATION_ADD = "ADD" # add this datastream to the object -- considered an error if datastream already exists
13
+ OPERATION_ADDUPDATE = "ADDUPDATE" # add this datastream to or update this datastream in the object
14
+ OPERATION_UPDATE = "UPDATE" # update this datastream in the object -- considered an error if datastream does not already exist
15
+ OPERATION_DELETE = "DELETE" # delete this datastream from the object -- considered an error if datastream does not exist
16
+
17
+ PAYLOAD_TYPE_BYTES = "BYTES"
18
+ PAYLOAD_TYPE_FILENAME = "FILENAME"
19
+
20
+ PAYLOAD_TYPES = [ PAYLOAD_TYPE_BYTES, PAYLOAD_TYPE_FILENAME ]
21
+ end
22
+
23
+ end
@@ -0,0 +1,26 @@
1
+ module Ddr::Batch
2
+
3
+ class BatchObjectRelationship < ActiveRecord::Base
4
+ # attr_accessible :name, :object, :object_type, :operation, :batch_object
5
+ belongs_to :batch_object, :inverse_of => :batch_object_relationships
6
+
7
+ RELATIONSHIP_ADMIN_POLICY = "admin_policy"
8
+ RELATIONSHIP_COLLECTION = "collection"
9
+ RELATIONSHIP_PARENT = "parent"
10
+ RELATIONSHIP_ITEM = "item"
11
+ RELATIONSHIP_COMPONENT = "component"
12
+ RELATIONSHIP_ATTACHED_TO = "attached_to"
13
+
14
+ RELATIONSHIPS = [ RELATIONSHIP_ADMIN_POLICY, RELATIONSHIP_COLLECTION, RELATIONSHIP_PARENT, RELATIONSHIP_ITEM,
15
+ RELATIONSHIP_COMPONENT, RELATIONSHIP_ATTACHED_TO ]
16
+
17
+ OPERATION_ADD = "ADD"
18
+ OPERATION_UPDATE = "UPDATE"
19
+ OPERATION_DELETE = "DELETE"
20
+
21
+ OBJECT_TYPE_PID = "PID"
22
+
23
+ OBJECT_TYPES = [ OBJECT_TYPE_PID ]
24
+ end
25
+
26
+ end
@@ -0,0 +1,118 @@
1
+ module Ddr::Batch
2
+
3
+ class IngestBatchObject < BatchObject
4
+
5
+ def local_validations
6
+ errors = []
7
+ errors << "#{@error_prefix} Model required for INGEST operation" unless model
8
+ errors += validate_pre_assigned_pid if pid
9
+ errors
10
+ end
11
+
12
+ def model_datastream_keys
13
+ model.constantize.new.datastreams.keys
14
+ end
15
+
16
+ def process(user, opts = {})
17
+ ingest(user, opts) unless verified
18
+ end
19
+
20
+ def results_message
21
+ if pid
22
+ verification_result = (verified ? "Verified" : "VERIFICATION FAILURE")
23
+ message = "Ingested #{model} #{identifier} into #{pid}...#{verification_result}"
24
+ else
25
+ message = "Attempt to ingest #{model} #{identifier} FAILED"
26
+ end
27
+ end
28
+
29
+ private
30
+
31
+ def validate_pre_assigned_pid
32
+ errs = []
33
+ errs << "#{@error_prefix} #{pid} already exists in repository" if ActiveFedora::Base.exists?(pid)
34
+ return errs
35
+ end
36
+
37
+ def ingest(user, opts = {})
38
+ repo_object = create_repository_object
39
+ if !repo_object.nil? && !repo_object.new_record?
40
+ ingest_outcome_detail = []
41
+ ingest_outcome_detail << "Ingested #{model} #{identifier} into #{repo_object.pid}"
42
+ Ddr::Events::IngestionEvent.new.tap do |event|
43
+ event.object = repo_object
44
+ event.user = user
45
+ event.summary = EVENT_SUMMARY % {
46
+ :label => "Object ingestion",
47
+ :batch_id => id,
48
+ :identifier => identifier,
49
+ :model => model
50
+ }
51
+ event.detail = ingest_outcome_detail.join("\n")
52
+ event.save!
53
+ end
54
+ update_attributes(:pid => repo_object.pid)
55
+ verifications = verify_repository_object
56
+ verification_outcome_detail = []
57
+ verified = true
58
+ verifications.each do |key, value|
59
+ verification_outcome_detail << "#{key}...#{value}"
60
+ verified = false if value.eql?(VERIFICATION_FAIL)
61
+ end
62
+ update_attributes(:verified => verified)
63
+ Ddr::Events::ValidationEvent.new.tap do |event|
64
+ event.object = repo_object
65
+ event.failure! unless verified
66
+ event.summary = EVENT_SUMMARY % {
67
+ :label => "Object ingestion validation",
68
+ :batch_id => id,
69
+ :identifier => identifier,
70
+ :model => model
71
+ }
72
+ event.detail = verification_outcome_detail.join("\n")
73
+ event.save!
74
+ end
75
+ else
76
+ verifications = nil
77
+ end
78
+ repo_object
79
+ end
80
+
81
+ def create_repository_object
82
+ repo_pid = pid if pid.present?
83
+ repo_object = nil
84
+ begin
85
+ repo_object = model.constantize.new(:pid => repo_pid)
86
+ repo_object.label = label if label
87
+ repo_object.save(validate: false)
88
+ batch_object_attributes.each { |a| repo_object = add_attribute(repo_object, a) }
89
+ batch_object_datastreams.each { |d| repo_object = populate_datastream(repo_object, d) }
90
+ batch_object_relationships.each { |r| repo_object = add_relationship(repo_object, r) }
91
+ repo_object.save
92
+ rescue Exception => e1
93
+ logger.fatal("Error in creating repository object #{repo_object.pid} for #{identifier} : #{e1}")
94
+ repo_clean = false
95
+ if repo_object && !repo_object.new_record?
96
+ begin
97
+ logger.info("Deleting potentially incomplete #{repo_object.pid} due to error in ingest batch processing")
98
+ repo_object.destroy
99
+ rescue Exception => e2
100
+ logger.fatal("Error deleting repository object #{repo_object.pid}: #{e2}")
101
+ else
102
+ repo_clean = true
103
+ end
104
+ else
105
+ repo_clean = true
106
+ end
107
+ if batch.present?
108
+ batch.status = repo_clean ? Batch::STATUS_RESTARTABLE : Batch::STATUS_INTERRUPTED
109
+ batch.save
110
+ end
111
+ raise e1
112
+ end
113
+ repo_object
114
+ end
115
+
116
+ end
117
+
118
+ end
@@ -0,0 +1,94 @@
1
+ module Ddr::Batch
2
+
3
+ class UpdateBatchObject < BatchObject
4
+
5
+ def local_validations
6
+ errs = []
7
+ errs << "#{@error_prefix} PID required for UPDATE operation" unless pid
8
+ if pid
9
+ if ActiveFedora::Base.exists?(pid)
10
+ errs << "#{@error_prefix} #{batch.user.user_key} not permitted to edit #{pid}" unless batch.user.can?(:edit, ActiveFedora::Base.find(pid, :cast => true))
11
+ else
12
+ errs << "#{@error_prefix} PID #{pid} not found in repository" unless ActiveFedora::Base.exists?(pid)
13
+ end
14
+ end
15
+ errs
16
+ end
17
+
18
+ def model_datastream_keys
19
+ if pid
20
+ begin
21
+ obj = ActiveFedora::Base.find(pid, :cast => true)
22
+ obj.datastreams.keys
23
+ rescue
24
+ nil
25
+ end
26
+ end
27
+ end
28
+
29
+ def process(user, opts = {})
30
+ unless verified
31
+ repo_object = update_repository_object(user, opts)
32
+ verifications = verify_repository_object
33
+ verification_outcome_detail = []
34
+ verified = true
35
+ verifications.each do |key, value|
36
+ verification_outcome_detail << "#{key}...#{value}"
37
+ verified = false if value.eql?(VERIFICATION_FAIL)
38
+ end
39
+ update_attributes(:verified => verified)
40
+ repo_object
41
+ end
42
+ end
43
+
44
+ def results_message
45
+ if pid
46
+ verification_result = (verified ? "Verified" : "VERIFICATION FAILURE")
47
+ message = "Updated #{pid}...#{verification_result}"
48
+ else
49
+ message = "Attempt to update #{model} #{identifier} FAILED"
50
+ end
51
+ end
52
+
53
+ def event_log_comment
54
+ "Updated by batch process (Batch #{batch.id}, BatchObject #{id})"
55
+ end
56
+
57
+ private
58
+
59
+ def update_repository_object(user, opts = {})
60
+ repo_object = nil
61
+ begin
62
+ repo_object = ActiveFedora::Base.find(pid)
63
+ batch_object_attributes.each do |a|
64
+ repo_object = case
65
+ when a.operation.eql?(BatchObjectAttribute::OPERATION_ADD)
66
+ add_attribute(repo_object, a)
67
+ when a.operation.eql?(BatchObjectAttribute::OPERATION_CLEAR)
68
+ clear_attribute(repo_object, a)
69
+ when a.operation.eql?(BatchObjectAttribute::OPERATION_CLEAR_ALL)
70
+ clear_attributes(repo_object, a)
71
+ end
72
+ end
73
+ batch_object_datastreams.each do |d|
74
+ repo_object = case
75
+ when d.operation.eql?(BatchObjectDatastream::OPERATION_ADDUPDATE)
76
+ populate_datastream(repo_object, d)
77
+ end
78
+ end
79
+ if repo_object.save
80
+ repo_object.notify_event(:update, user: user, comment: event_log_comment)
81
+ end
82
+ rescue Exception => e
83
+ logger.error("Error in updating repository object #{pid} for #{identifier} : : #{e}")
84
+ if batch.present?
85
+ batch.status = Batch::STATUS_RESTARTABLE
86
+ batch.save
87
+ end
88
+ raise e
89
+ end
90
+ repo_object
91
+ end
92
+
93
+ end
94
+ end
@@ -0,0 +1,151 @@
1
+ module Ddr::Batch
2
+ class BatchProcessor
3
+
4
+ LOG_CONFIG_FILEPATH = File.join(Rails.root, 'config', 'log4r_batch_processor.yml')
5
+ DEFAULT_LOG_DIR = File.join(Rails.root, 'log')
6
+ DEFAULT_LOG_FILE = "batch_processor_log.txt"
7
+ PASS = "PASS"
8
+ FAIL = "FAIL"
9
+
10
+ # Options
11
+ # :log_dir - optional - directory for log file - default is given in DEFAULT_LOG_DIR
12
+ # :log_file - optional - filename of log file - default is given in DEFAULT_LOG_FILE
13
+ # :skip_validation - optional - whether to skip batch object validation step when processing - default is false
14
+ # :ignore_validation_errors - optional - whether to continue processing even if batch object validation errors occur - default is false
15
+ def initialize(batch, operator=nil, opts={})
16
+ @batch = batch
17
+ @operator = operator
18
+ @bp_log_dir = opts.fetch(:log_dir, DEFAULT_LOG_DIR)
19
+ @bp_log_file = opts.fetch(:log_file, DEFAULT_LOG_FILE)
20
+ @skip_validation = opts.fetch(:skip_validation, false)
21
+ @ignore_validation_errors = opts.fetch(:ignore_validation_errors, false)
22
+ end
23
+
24
+ def execute
25
+ config_logger
26
+ if @batch
27
+ initiate_batch_run
28
+ unless @skip_validation
29
+ valid_batch = validate_batch
30
+ @batch.update_attributes(status: Batch::STATUS_INVALID) unless valid_batch
31
+ end
32
+ if @skip_validation || @ignore_validation_errors || valid_batch
33
+ process_batch
34
+ end
35
+ close_batch_run
36
+ end
37
+ save_logfile
38
+ send_notification if @batch.user && @batch.user.email
39
+ end
40
+
41
+ private
42
+
43
+ def validate_batch
44
+ @batch.update_attributes(status: Batch::STATUS_VALIDATING)
45
+ valid = true
46
+ errors = @batch.validate
47
+ unless errors.empty?
48
+ valid = false
49
+ errors.each do |error|
50
+ message = "Batch Object Validation Error: #{error}"
51
+ @bp_log.error(message)
52
+ end
53
+ end
54
+ @batch.update_attributes(status: Batch::STATUS_RUNNING)
55
+ return valid
56
+ end
57
+
58
+ def process_batch
59
+ @batch.update_attributes(status: Batch::STATUS_PROCESSING, processing_step_start: DateTime.now)
60
+ @batch.batch_objects.each do |object|
61
+ begin
62
+ process_object(object)
63
+ rescue Exception => e
64
+ break
65
+ end
66
+ sleep 2
67
+ end
68
+ @batch.update_attributes(status: Batch::STATUS_RUNNING) if @batch.status == Batch::STATUS_PROCESSING
69
+ end
70
+
71
+ def initiate_batch_run
72
+ @bp_log.info "Batch id: #{@batch.id}"
73
+ @bp_log.info "Batch name: #{@batch.name}" if @batch.name
74
+ @bp_log.info "Batch size: #{@batch.batch_objects.size}"
75
+ @batch.logfile.clear # clear out any attached logfile
76
+ @batch.update_attributes(:start => DateTime.now,
77
+ :status => Batch::STATUS_RUNNING,
78
+ :version => VERSION)
79
+ @failures = 0
80
+ @successes = 0
81
+ @results_tracker = Hash.new
82
+ end
83
+
84
+ def close_batch_run
85
+ @batch.reload
86
+ @batch.failure = @failures
87
+ @batch.outcome = @successes.eql?(@batch.batch_objects.size) ? Batch::OUTCOME_SUCCESS : Batch::OUTCOME_FAILURE
88
+ if @batch.status.eql?(Batch::STATUS_RUNNING)
89
+ @batch.status = Batch::STATUS_FINISHED
90
+ end
91
+ @batch.stop = DateTime.now
92
+ @batch.success = @successes
93
+ @batch.save
94
+ @bp_log.info "====== Summary ======"
95
+ @results_tracker.keys.each do |type|
96
+ verb = case type
97
+ when IngestBatchObject.name
98
+ "Ingested"
99
+ when UpdateBatchObject.name
100
+ "Updated"
101
+ end
102
+ @results_tracker[type].keys.each do |model|
103
+ @bp_log.info "#{verb} #{@results_tracker[type][model][:successes]} #{model}"
104
+ end
105
+ end
106
+ end
107
+
108
+ def update_results_tracker(type, model, verified)
109
+ @results_tracker[type] = Hash.new unless @results_tracker.has_key?(type)
110
+ @results_tracker[type][model] = Hash.new unless @results_tracker[type].has_key?(model)
111
+ @results_tracker[type][model][:successes] = 0 unless @results_tracker[type][model].has_key?(:successes)
112
+ @results_tracker[type][model][:successes] += 1 if verified
113
+ end
114
+
115
+ def process_object(object)
116
+ @bp_log.debug "Processing object: #{object.identifier}"
117
+ repository_object = object.process(@operator)
118
+ update_results_tracker(object.type, repository_object.present? ? repository_object.class.name : object.model, object.verified)
119
+ if object.verified
120
+ @successes += 1
121
+ else
122
+ @failures += 1
123
+ end
124
+ message = object.results_message
125
+ @bp_log.info(message)
126
+ end
127
+
128
+ def config_logger
129
+ logconfig = Log4r::YamlConfigurator
130
+ logconfig['LOG_FILE'] = File.join(@bp_log_dir, @bp_log_file)
131
+ logconfig.load_yaml_file File.join(LOG_CONFIG_FILEPATH)
132
+ @bp_log = Log4r::Logger['batch_processor']
133
+ end
134
+
135
+ def save_logfile
136
+ @bp_log.outputters.each do |outputter|
137
+ @logfilename = outputter.filename if outputter.respond_to?(:filename)
138
+ end
139
+ @batch.update!({ logfile: File.new(@logfilename) }) if @logfilename
140
+ end
141
+
142
+ def send_notification
143
+ begin
144
+ BatchProcessorRunMailer.send_notification(@batch).deliver!
145
+ rescue
146
+ puts "An error occurred while attempting to send the notification."
147
+ end
148
+ end
149
+
150
+ end
151
+ end