ddr-batch 1.0.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +12 -0
  3. data/README.md +100 -0
  4. data/Rakefile +36 -0
  5. data/app/jobs/ddr/batch/batch_processor_job.rb +21 -0
  6. data/app/mailers/ddr/batch/batch_processor_run_mailer.rb +19 -0
  7. data/app/models/ddr/batch/batch.rb +72 -0
  8. data/app/models/ddr/batch/batch_ability_definitions.rb +14 -0
  9. data/app/models/ddr/batch/batch_object.rb +297 -0
  10. data/app/models/ddr/batch/batch_object_attribute.rb +57 -0
  11. data/app/models/ddr/batch/batch_object_datastream.rb +23 -0
  12. data/app/models/ddr/batch/batch_object_relationship.rb +26 -0
  13. data/app/models/ddr/batch/ingest_batch_object.rb +118 -0
  14. data/app/models/ddr/batch/update_batch_object.rb +94 -0
  15. data/app/scripts/ddr/batch/batch_processor.rb +151 -0
  16. data/app/views/ddr/batch/batch_processor_run_mailer/send_notification.html.erb +34 -0
  17. data/app/views/ddr/batch/batch_processor_run_mailer/send_notification.text.erb +20 -0
  18. data/config/locales/en.yml +52 -0
  19. data/config/routes.rb +2 -0
  20. data/db/migrate/20150828183839_create_batches.rb +25 -0
  21. data/db/migrate/20150828201857_create_batch_objects.rb +18 -0
  22. data/db/migrate/20150828202118_create_batch_object_attributes.rb +16 -0
  23. data/db/migrate/20150828202200_create_batch_object_datastreams.rb +17 -0
  24. data/db/migrate/20150828202240_create_batch_object_relationships.rb +15 -0
  25. data/lib/ddr-batch.rb +1 -0
  26. data/lib/ddr/batch.rb +17 -0
  27. data/lib/ddr/batch/batch_user.rb +10 -0
  28. data/lib/ddr/batch/engine.rb +14 -0
  29. data/lib/ddr/batch/version.rb +5 -0
  30. data/lib/tasks/ddr_batch_tasks.rake +4 -0
  31. metadata +228 -0
@@ -0,0 +1,57 @@
1
+ module Ddr::Batch
2
+
3
+ class BatchObjectAttribute < ActiveRecord::Base
4
+ belongs_to :batch_object, :inverse_of => :batch_object_attributes
5
+
6
+ OPERATION_ADD = "ADD" # Add the provided value to the attribute
7
+ OPERATION_DELETE = "DELETE" # Delete the provided value from the attribute
8
+ OPERATION_CLEAR = "CLEAR" # Clear all values from the attribute
9
+ OPERATION_CLEAR_ALL = "CLEAR_ALL" # Clear all attributes in the datastream
10
+
11
+ OPERATIONS = [ OPERATION_ADD, OPERATION_DELETE, OPERATION_CLEAR, OPERATION_CLEAR_ALL ]
12
+
13
+ VALUE_TYPE_STRING = "STRING"
14
+
15
+ VALUE_TYPES = [ VALUE_TYPE_STRING ]
16
+
17
+ validates :operation, inclusion: { in: OPERATIONS }
18
+ validates :datastream, presence: true
19
+ with_options if: :operation_requires_name? do |obj|
20
+ obj.validates :name, presence: true
21
+ end
22
+ validate :valid_datastream_and_attribute_name, if: [ 'batch_object.model', 'datastream', 'name' ]
23
+ with_options if: :operation_requires_value? do |obj|
24
+ obj.validates :value, presence: true
25
+ obj.validates :value_type, inclusion: { in: VALUE_TYPES }
26
+ end
27
+
28
+ def operation_requires_name?
29
+ [ OPERATION_ADD, OPERATION_DELETE, OPERATION_CLEAR ].include? operation
30
+ end
31
+
32
+ def operation_requires_value?
33
+ [ OPERATION_ADD, OPERATION_DELETE ].include? operation
34
+ end
35
+
36
+ def valid_datastream_and_attribute_name
37
+ if datastream_valid?
38
+ errors.add(:name, "is not valid") unless attribute_name_valid?
39
+ else
40
+ errors.add(:datastream, "is not valid")
41
+ end
42
+ end
43
+
44
+ def datastream_type
45
+ batch_object.model.constantize.ds_specs[datastream][:type] rescue nil
46
+ end
47
+
48
+ def datastream_valid?
49
+ datastream_type < ActiveFedora::RDFDatastream rescue false
50
+ end
51
+
52
+ def attribute_name_valid?
53
+ datastream_type.term_names.include?(name.to_sym)
54
+ end
55
+
56
+ end
57
+ end
@@ -0,0 +1,23 @@
1
+ module Ddr::Batch
2
+
3
+ class BatchObjectDatastream < ActiveRecord::Base
4
+
5
+ belongs_to :batch_object, :inverse_of => :batch_object_datastreams
6
+
7
+ DATASTREAMS = [ Ddr::Datastreams::CONTENT,
8
+ Ddr::Datastreams::DESC_METADATA,
9
+ Ddr::Datastreams::RIGHTS_METADATA,
10
+ Ddr::Datastreams::STRUCT_METADATA ]
11
+
12
+ OPERATION_ADD = "ADD" # add this datastream to the object -- considered an error if datastream already exists
13
+ OPERATION_ADDUPDATE = "ADDUPDATE" # add this datastream to or update this datastream in the object
14
+ OPERATION_UPDATE = "UPDATE" # update this datastream in the object -- considered an error if datastream does not already exist
15
+ OPERATION_DELETE = "DELETE" # delete this datastream from the object -- considered an error if datastream does not exist
16
+
17
+ PAYLOAD_TYPE_BYTES = "BYTES"
18
+ PAYLOAD_TYPE_FILENAME = "FILENAME"
19
+
20
+ PAYLOAD_TYPES = [ PAYLOAD_TYPE_BYTES, PAYLOAD_TYPE_FILENAME ]
21
+ end
22
+
23
+ end
@@ -0,0 +1,26 @@
1
+ module Ddr::Batch
2
+
3
+ class BatchObjectRelationship < ActiveRecord::Base
4
+ # attr_accessible :name, :object, :object_type, :operation, :batch_object
5
+ belongs_to :batch_object, :inverse_of => :batch_object_relationships
6
+
7
+ RELATIONSHIP_ADMIN_POLICY = "admin_policy"
8
+ RELATIONSHIP_COLLECTION = "collection"
9
+ RELATIONSHIP_PARENT = "parent"
10
+ RELATIONSHIP_ITEM = "item"
11
+ RELATIONSHIP_COMPONENT = "component"
12
+ RELATIONSHIP_ATTACHED_TO = "attached_to"
13
+
14
+ RELATIONSHIPS = [ RELATIONSHIP_ADMIN_POLICY, RELATIONSHIP_COLLECTION, RELATIONSHIP_PARENT, RELATIONSHIP_ITEM,
15
+ RELATIONSHIP_COMPONENT, RELATIONSHIP_ATTACHED_TO ]
16
+
17
+ OPERATION_ADD = "ADD"
18
+ OPERATION_UPDATE = "UPDATE"
19
+ OPERATION_DELETE = "DELETE"
20
+
21
+ OBJECT_TYPE_PID = "PID"
22
+
23
+ OBJECT_TYPES = [ OBJECT_TYPE_PID ]
24
+ end
25
+
26
+ end
@@ -0,0 +1,118 @@
1
+ module Ddr::Batch
2
+
3
+ class IngestBatchObject < BatchObject
4
+
5
+ def local_validations
6
+ errors = []
7
+ errors << "#{@error_prefix} Model required for INGEST operation" unless model
8
+ errors += validate_pre_assigned_pid if pid
9
+ errors
10
+ end
11
+
12
+ def model_datastream_keys
13
+ model.constantize.new.datastreams.keys
14
+ end
15
+
16
+ def process(user, opts = {})
17
+ ingest(user, opts) unless verified
18
+ end
19
+
20
+ def results_message
21
+ if pid
22
+ verification_result = (verified ? "Verified" : "VERIFICATION FAILURE")
23
+ message = "Ingested #{model} #{identifier} into #{pid}...#{verification_result}"
24
+ else
25
+ message = "Attempt to ingest #{model} #{identifier} FAILED"
26
+ end
27
+ end
28
+
29
+ private
30
+
31
+ def validate_pre_assigned_pid
32
+ errs = []
33
+ errs << "#{@error_prefix} #{pid} already exists in repository" if ActiveFedora::Base.exists?(pid)
34
+ return errs
35
+ end
36
+
37
+ def ingest(user, opts = {})
38
+ repo_object = create_repository_object
39
+ if !repo_object.nil? && !repo_object.new_record?
40
+ ingest_outcome_detail = []
41
+ ingest_outcome_detail << "Ingested #{model} #{identifier} into #{repo_object.pid}"
42
+ Ddr::Events::IngestionEvent.new.tap do |event|
43
+ event.object = repo_object
44
+ event.user = user
45
+ event.summary = EVENT_SUMMARY % {
46
+ :label => "Object ingestion",
47
+ :batch_id => id,
48
+ :identifier => identifier,
49
+ :model => model
50
+ }
51
+ event.detail = ingest_outcome_detail.join("\n")
52
+ event.save!
53
+ end
54
+ update_attributes(:pid => repo_object.pid)
55
+ verifications = verify_repository_object
56
+ verification_outcome_detail = []
57
+ verified = true
58
+ verifications.each do |key, value|
59
+ verification_outcome_detail << "#{key}...#{value}"
60
+ verified = false if value.eql?(VERIFICATION_FAIL)
61
+ end
62
+ update_attributes(:verified => verified)
63
+ Ddr::Events::ValidationEvent.new.tap do |event|
64
+ event.object = repo_object
65
+ event.failure! unless verified
66
+ event.summary = EVENT_SUMMARY % {
67
+ :label => "Object ingestion validation",
68
+ :batch_id => id,
69
+ :identifier => identifier,
70
+ :model => model
71
+ }
72
+ event.detail = verification_outcome_detail.join("\n")
73
+ event.save!
74
+ end
75
+ else
76
+ verifications = nil
77
+ end
78
+ repo_object
79
+ end
80
+
81
+ def create_repository_object
82
+ repo_pid = pid if pid.present?
83
+ repo_object = nil
84
+ begin
85
+ repo_object = model.constantize.new(:pid => repo_pid)
86
+ repo_object.label = label if label
87
+ repo_object.save(validate: false)
88
+ batch_object_attributes.each { |a| repo_object = add_attribute(repo_object, a) }
89
+ batch_object_datastreams.each { |d| repo_object = populate_datastream(repo_object, d) }
90
+ batch_object_relationships.each { |r| repo_object = add_relationship(repo_object, r) }
91
+ repo_object.save
92
+ rescue Exception => e1
93
+ logger.fatal("Error in creating repository object #{repo_object.pid} for #{identifier} : #{e1}")
94
+ repo_clean = false
95
+ if repo_object && !repo_object.new_record?
96
+ begin
97
+ logger.info("Deleting potentially incomplete #{repo_object.pid} due to error in ingest batch processing")
98
+ repo_object.destroy
99
+ rescue Exception => e2
100
+ logger.fatal("Error deleting repository object #{repo_object.pid}: #{e2}")
101
+ else
102
+ repo_clean = true
103
+ end
104
+ else
105
+ repo_clean = true
106
+ end
107
+ if batch.present?
108
+ batch.status = repo_clean ? Batch::STATUS_RESTARTABLE : Batch::STATUS_INTERRUPTED
109
+ batch.save
110
+ end
111
+ raise e1
112
+ end
113
+ repo_object
114
+ end
115
+
116
+ end
117
+
118
+ end
@@ -0,0 +1,94 @@
1
+ module Ddr::Batch
2
+
3
+ class UpdateBatchObject < BatchObject
4
+
5
+ def local_validations
6
+ errs = []
7
+ errs << "#{@error_prefix} PID required for UPDATE operation" unless pid
8
+ if pid
9
+ if ActiveFedora::Base.exists?(pid)
10
+ errs << "#{@error_prefix} #{batch.user.user_key} not permitted to edit #{pid}" unless batch.user.can?(:edit, ActiveFedora::Base.find(pid, :cast => true))
11
+ else
12
+ errs << "#{@error_prefix} PID #{pid} not found in repository" unless ActiveFedora::Base.exists?(pid)
13
+ end
14
+ end
15
+ errs
16
+ end
17
+
18
+ def model_datastream_keys
19
+ if pid
20
+ begin
21
+ obj = ActiveFedora::Base.find(pid, :cast => true)
22
+ obj.datastreams.keys
23
+ rescue
24
+ nil
25
+ end
26
+ end
27
+ end
28
+
29
+ def process(user, opts = {})
30
+ unless verified
31
+ repo_object = update_repository_object(user, opts)
32
+ verifications = verify_repository_object
33
+ verification_outcome_detail = []
34
+ verified = true
35
+ verifications.each do |key, value|
36
+ verification_outcome_detail << "#{key}...#{value}"
37
+ verified = false if value.eql?(VERIFICATION_FAIL)
38
+ end
39
+ update_attributes(:verified => verified)
40
+ repo_object
41
+ end
42
+ end
43
+
44
+ def results_message
45
+ if pid
46
+ verification_result = (verified ? "Verified" : "VERIFICATION FAILURE")
47
+ message = "Updated #{pid}...#{verification_result}"
48
+ else
49
+ message = "Attempt to update #{model} #{identifier} FAILED"
50
+ end
51
+ end
52
+
53
+ def event_log_comment
54
+ "Updated by batch process (Batch #{batch.id}, BatchObject #{id})"
55
+ end
56
+
57
+ private
58
+
59
+ def update_repository_object(user, opts = {})
60
+ repo_object = nil
61
+ begin
62
+ repo_object = ActiveFedora::Base.find(pid)
63
+ batch_object_attributes.each do |a|
64
+ repo_object = case
65
+ when a.operation.eql?(BatchObjectAttribute::OPERATION_ADD)
66
+ add_attribute(repo_object, a)
67
+ when a.operation.eql?(BatchObjectAttribute::OPERATION_CLEAR)
68
+ clear_attribute(repo_object, a)
69
+ when a.operation.eql?(BatchObjectAttribute::OPERATION_CLEAR_ALL)
70
+ clear_attributes(repo_object, a)
71
+ end
72
+ end
73
+ batch_object_datastreams.each do |d|
74
+ repo_object = case
75
+ when d.operation.eql?(BatchObjectDatastream::OPERATION_ADDUPDATE)
76
+ populate_datastream(repo_object, d)
77
+ end
78
+ end
79
+ if repo_object.save
80
+ repo_object.notify_event(:update, user: user, comment: event_log_comment)
81
+ end
82
+ rescue Exception => e
83
+ logger.error("Error in updating repository object #{pid} for #{identifier} : : #{e}")
84
+ if batch.present?
85
+ batch.status = Batch::STATUS_RESTARTABLE
86
+ batch.save
87
+ end
88
+ raise e
89
+ end
90
+ repo_object
91
+ end
92
+
93
+ end
94
+ end
@@ -0,0 +1,151 @@
1
+ module Ddr::Batch
2
+ class BatchProcessor
3
+
4
+ LOG_CONFIG_FILEPATH = File.join(Rails.root, 'config', 'log4r_batch_processor.yml')
5
+ DEFAULT_LOG_DIR = File.join(Rails.root, 'log')
6
+ DEFAULT_LOG_FILE = "batch_processor_log.txt"
7
+ PASS = "PASS"
8
+ FAIL = "FAIL"
9
+
10
+ # Options
11
+ # :log_dir - optional - directory for log file - default is given in DEFAULT_LOG_DIR
12
+ # :log_file - optional - filename of log file - default is given in DEFAULT_LOG_FILE
13
+ # :skip_validation - optional - whether to skip batch object validation step when processing - default is false
14
+ # :ignore_validation_errors - optional - whether to continue processing even if batch object validation errors occur - default is false
15
+ def initialize(batch, operator=nil, opts={})
16
+ @batch = batch
17
+ @operator = operator
18
+ @bp_log_dir = opts.fetch(:log_dir, DEFAULT_LOG_DIR)
19
+ @bp_log_file = opts.fetch(:log_file, DEFAULT_LOG_FILE)
20
+ @skip_validation = opts.fetch(:skip_validation, false)
21
+ @ignore_validation_errors = opts.fetch(:ignore_validation_errors, false)
22
+ end
23
+
24
+ def execute
25
+ config_logger
26
+ if @batch
27
+ initiate_batch_run
28
+ unless @skip_validation
29
+ valid_batch = validate_batch
30
+ @batch.update_attributes(status: Batch::STATUS_INVALID) unless valid_batch
31
+ end
32
+ if @skip_validation || @ignore_validation_errors || valid_batch
33
+ process_batch
34
+ end
35
+ close_batch_run
36
+ end
37
+ save_logfile
38
+ send_notification if @batch.user && @batch.user.email
39
+ end
40
+
41
+ private
42
+
43
+ def validate_batch
44
+ @batch.update_attributes(status: Batch::STATUS_VALIDATING)
45
+ valid = true
46
+ errors = @batch.validate
47
+ unless errors.empty?
48
+ valid = false
49
+ errors.each do |error|
50
+ message = "Batch Object Validation Error: #{error}"
51
+ @bp_log.error(message)
52
+ end
53
+ end
54
+ @batch.update_attributes(status: Batch::STATUS_RUNNING)
55
+ return valid
56
+ end
57
+
58
+ def process_batch
59
+ @batch.update_attributes(status: Batch::STATUS_PROCESSING, processing_step_start: DateTime.now)
60
+ @batch.batch_objects.each do |object|
61
+ begin
62
+ process_object(object)
63
+ rescue Exception => e
64
+ break
65
+ end
66
+ sleep 2
67
+ end
68
+ @batch.update_attributes(status: Batch::STATUS_RUNNING) if @batch.status == Batch::STATUS_PROCESSING
69
+ end
70
+
71
+ def initiate_batch_run
72
+ @bp_log.info "Batch id: #{@batch.id}"
73
+ @bp_log.info "Batch name: #{@batch.name}" if @batch.name
74
+ @bp_log.info "Batch size: #{@batch.batch_objects.size}"
75
+ @batch.logfile.clear # clear out any attached logfile
76
+ @batch.update_attributes(:start => DateTime.now,
77
+ :status => Batch::STATUS_RUNNING,
78
+ :version => VERSION)
79
+ @failures = 0
80
+ @successes = 0
81
+ @results_tracker = Hash.new
82
+ end
83
+
84
+ def close_batch_run
85
+ @batch.reload
86
+ @batch.failure = @failures
87
+ @batch.outcome = @successes.eql?(@batch.batch_objects.size) ? Batch::OUTCOME_SUCCESS : Batch::OUTCOME_FAILURE
88
+ if @batch.status.eql?(Batch::STATUS_RUNNING)
89
+ @batch.status = Batch::STATUS_FINISHED
90
+ end
91
+ @batch.stop = DateTime.now
92
+ @batch.success = @successes
93
+ @batch.save
94
+ @bp_log.info "====== Summary ======"
95
+ @results_tracker.keys.each do |type|
96
+ verb = case type
97
+ when IngestBatchObject.name
98
+ "Ingested"
99
+ when UpdateBatchObject.name
100
+ "Updated"
101
+ end
102
+ @results_tracker[type].keys.each do |model|
103
+ @bp_log.info "#{verb} #{@results_tracker[type][model][:successes]} #{model}"
104
+ end
105
+ end
106
+ end
107
+
108
+ def update_results_tracker(type, model, verified)
109
+ @results_tracker[type] = Hash.new unless @results_tracker.has_key?(type)
110
+ @results_tracker[type][model] = Hash.new unless @results_tracker[type].has_key?(model)
111
+ @results_tracker[type][model][:successes] = 0 unless @results_tracker[type][model].has_key?(:successes)
112
+ @results_tracker[type][model][:successes] += 1 if verified
113
+ end
114
+
115
+ def process_object(object)
116
+ @bp_log.debug "Processing object: #{object.identifier}"
117
+ repository_object = object.process(@operator)
118
+ update_results_tracker(object.type, repository_object.present? ? repository_object.class.name : object.model, object.verified)
119
+ if object.verified
120
+ @successes += 1
121
+ else
122
+ @failures += 1
123
+ end
124
+ message = object.results_message
125
+ @bp_log.info(message)
126
+ end
127
+
128
+ def config_logger
129
+ logconfig = Log4r::YamlConfigurator
130
+ logconfig['LOG_FILE'] = File.join(@bp_log_dir, @bp_log_file)
131
+ logconfig.load_yaml_file File.join(LOG_CONFIG_FILEPATH)
132
+ @bp_log = Log4r::Logger['batch_processor']
133
+ end
134
+
135
+ def save_logfile
136
+ @bp_log.outputters.each do |outputter|
137
+ @logfilename = outputter.filename if outputter.respond_to?(:filename)
138
+ end
139
+ @batch.update!({ logfile: File.new(@logfilename) }) if @logfilename
140
+ end
141
+
142
+ def send_notification
143
+ begin
144
+ BatchProcessorRunMailer.send_notification(@batch).deliver!
145
+ rescue
146
+ puts "An error occurred while attempting to send the notification."
147
+ end
148
+ end
149
+
150
+ end
151
+ end