ddr-batch 1.7.2 → 2.0.0.alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +45 -0
  3. data/app/jobs/ddr/batch/batch_processor_job.rb +7 -2
  4. data/app/mailers/ddr/batch/batch_processor_run_mailer.rb +7 -24
  5. data/app/models/ddr/batch/batch.rb +18 -20
  6. data/app/models/ddr/batch/batch_ability_definitions.rb +14 -0
  7. data/app/models/ddr/batch/batch_object.rb +26 -35
  8. data/app/models/ddr/batch/batch_object_attribute.rb +7 -2
  9. data/app/models/ddr/batch/batch_object_datastream.rb +5 -0
  10. data/app/models/ddr/batch/batch_object_relationship.rb +1 -0
  11. data/app/models/ddr/batch/ingest_batch_object.rb +21 -25
  12. data/app/models/ddr/batch/update_batch_object.rb +10 -18
  13. data/app/scripts/ddr/batch/batch_processor.rb +152 -0
  14. data/app/views/ddr/batch/batch_processor_run_mailer/send_notification.html.erb +5 -5
  15. data/app/views/ddr/batch/batch_processor_run_mailer/send_notification.text.erb +5 -5
  16. data/config/locales/en.yml +2 -3
  17. data/lib/ddr/batch.rb +0 -6
  18. data/lib/ddr/batch/version.rb +1 -1
  19. metadata +31 -47
  20. data/app/jobs/ddr/batch/batch_deletion_job.rb +0 -19
  21. data/app/jobs/ddr/batch/batch_objects_processor_job.rb +0 -11
  22. data/app/models/ddr/batch/batch_object_message.rb +0 -8
  23. data/app/models/ddr/batch/batch_object_role.rb +0 -26
  24. data/app/models/ddr/batch/error.rb +0 -10
  25. data/app/models/ddr/batch/log.rb +0 -29
  26. data/app/services/ddr/batch/monitor_batch_finished.rb +0 -87
  27. data/app/services/ddr/batch/monitor_batch_object_handled.rb +0 -42
  28. data/app/services/ddr/batch/monitor_batch_started.rb +0 -43
  29. data/app/services/ddr/batch/process_batch.rb +0 -60
  30. data/app/services/ddr/batch/process_batch_object.rb +0 -46
  31. data/app/services/ddr/batch/process_batch_objects.rb +0 -39
  32. data/config/initializers/subscriptions.rb +0 -9
  33. data/db/migrate/20160816164010_create_batch_object_roles.rb +0 -15
  34. data/db/migrate/20161115191636_add_columns_to_batch_object.rb +0 -9
  35. data/db/migrate/20161116142512_create_batch_object_messages.rb +0 -13
  36. data/db/migrate/20161222192611_remove_columns_from_batch.rb +0 -13
  37. data/db/migrate/20171116183514_add_collection_columns_to_batch.rb +0 -8
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e356a65a70813c599af04d01d764de1109d98f5e
4
- data.tar.gz: 4ac1235826c5fce10d7990f16ad9636d771169ba
3
+ metadata.gz: f604d559b09d360d0f9357113a314fbbe33d5777
4
+ data.tar.gz: 4ed2ac035ff250220a8b7b07e6766f338a9da348
5
5
  SHA512:
6
- metadata.gz: 10ea52dce76f86f9c9965ebe8d594f2a2c5c7ab3329cb7fe759d7fac9f6122adf84d929c350daab37334a86aa1088f48766c407e5541f00b76c5a405257380d3
7
- data.tar.gz: f397545eb2d5741fbcb9ece062f0e4c1f82e8eec0e952314de3bdab6ec595e128d3cbdc56406a52b2d372ef86ee497481aeb93281ca3bcea359b6b9eb52cac63
6
+ metadata.gz: 8a495a43eb4b92a0753a1ac80ca16a08a40693f91de7928ff7a7de7c81dc6b8733c38063526f07cb027bc744adfca8bafd47b7bc9afc6dac33490859e56c51d6
7
+ data.tar.gz: 3c3abce0902a76666701f79c80f65fd7ef5d6b019cce4927a462a7dbb398b4c6218088584f5a82d3fc5aef4631485c8b542b5c5faf0612fddc634ed4e9fac0e4
data/README.md CHANGED
@@ -42,6 +42,51 @@ class Ability < Ddr::Auth::Ability
42
42
  end
43
43
  ```
44
44
 
45
+ ### Log4r
46
+
47
+ #### Application.rb
48
+
49
+ Add the following lines to `config\application.rb` if they are not already there:
50
+
51
+ ```ruby
52
+ require 'log4r'
53
+ require 'log4r/yamlconfigurator'
54
+ require 'log4r/outputter/datefileoutputter'
55
+ include Log4r
56
+ ```
57
+
58
+ #### Configuration
59
+
60
+ `Ddr::Batch::BatchProcessor` expects a Log4r configuration file at `config\log4r_batch_processor.yml`.
61
+
62
+ ##### Example
63
+
64
+ ```yaml
65
+ log4r_config:
66
+ loggers:
67
+ - name : batch_processor
68
+ level : DEBUG
69
+ trace : 'false'
70
+ outputters:
71
+ - logfile
72
+ outputters:
73
+ - type : StdoutOutputter
74
+ name : stdout
75
+ level : DEBUG
76
+ formatter :
77
+ date_pattern: '%F %T.%L'
78
+ pattern : '%d %l: %m'
79
+ type : PatternFormatter
80
+ - type : FileOutputter
81
+ name : logfile
82
+ trunc : 'false'
83
+ filename : "#{LOG_FILE}"
84
+ formatter :
85
+ date_pattern: '%F %T.%L'
86
+ pattern : '%d %l: %m'
87
+ type : PatternFormatter
88
+ ```
89
+
45
90
  ### Migrations
46
91
 
47
92
  Install the ddr-batch migrations:
@@ -3,7 +3,12 @@ module Ddr::Batch
3
3
  @queue = :batch
4
4
 
5
5
  def self.perform(batch_id, operator_id)
6
- ProcessBatch.new(batch_id: batch_id, operator_id: operator_id).execute
6
+ ts = Time.now.strftime("%Y%m%d%H%M%S%L")
7
+ logfile = "batch_processor_#{ts}_log.txt"
8
+ batch = Batch.find(batch_id)
9
+ operator = User.find(operator_id)
10
+ bp = BatchProcessor.new(batch, operator, log_file: logfile)
11
+ bp.execute
7
12
  end
8
13
 
9
14
  def self.after_enqueue_set_status(batch_id, operator_id)
@@ -13,4 +18,4 @@ module Ddr::Batch
13
18
  end
14
19
 
15
20
  end
16
- end
21
+ end
@@ -2,35 +2,18 @@ module Ddr::Batch
2
2
 
3
3
  class BatchProcessorRunMailer < ActionMailer::Base
4
4
 
5
+ default :from => "noreply@duke.edu"
6
+
5
7
  def send_notification(batch)
6
8
  @batch = batch
7
- @title = "Batch Processor Run #{@batch.status} #{@batch.outcome}"
8
- @title << " - #{@batch.collection_title}" if @batch.collection_title.present?
9
+ @title = "Batch Processor Run #{@batch.status}"
9
10
  @host = `uname -n`.strip
10
11
  @subject = "[#{@host}] #{@title}"
11
- @size = @batch.batch_objects.size
12
- @handled = @batch.handled_count
13
- @success = @batch.success_count
14
- attachments[attachment_file_name(@batch)] = File.read(@batch.logfile.path)
15
- mail(to: @batch.user.email, subject: @subject)
12
+ from = "#{`echo $USER`.strip}@#{@host}"
13
+ attachments["details.txt"] = File.read(@batch.logfile.path)
14
+ mail(from: from, to: @batch.user.email, subject: @subject)
16
15
  end
17
16
 
18
- private
19
-
20
- def attachment_file_name(batch)
21
- if batch.collection_title.present?
22
- sanitized_title = sanitize_title_for_filename(batch.collection_title)
23
- "details_#{sanitized_title}.txt"
24
- else
25
- "details.txt"
26
- end
27
- end
28
-
29
- def sanitize_title_for_filename(title)
30
- title
31
- .gsub(/[^\w\s_-]+/, '')
32
- .gsub(/\s+/, '_')
33
- end
34
17
  end
35
18
 
36
- end
19
+ end
@@ -19,22 +19,30 @@ module Ddr::Batch
19
19
  STATUS_FINISHED = "FINISHED"
20
20
  STATUS_INTERRUPTED = "INTERRUPTED"
21
21
  STATUS_RESTARTABLE = "INTERRUPTED - RESTARTABLE"
22
- STATUS_QUEUED_FOR_DELETION = "QUEUED FOR DELETION"
23
- STATUS_DELETING = "DELETING"
24
22
 
25
- def handled_count
26
- batch_objects.where(handled: true).count
23
+ def validate
24
+ errors = []
25
+ begin
26
+ batch_objects.each do |object|
27
+ unless object.verified
28
+ errors << object.validate
29
+ end
30
+ end
31
+ rescue Exception => e
32
+ errors << "Exception raised during batch validation: #{e.backtrace}"
33
+ end
34
+ errors.flatten
27
35
  end
28
36
 
29
- def success_count
37
+ def completed_count
30
38
  batch_objects.where(verified: true).count
31
39
  end
32
40
 
33
41
  def time_to_complete
34
- unless start.nil?
35
- if handled_count > 0
36
- handled = handled_count
37
- ((Time.now - start.to_time) / handled) * (batch_objects.count - handled)
42
+ unless processing_step_start.nil?
43
+ if completed_count > 0
44
+ completed = completed_count
45
+ ((Time.now - processing_step_start.to_time) / completed) * (batch_objects.count - completed)
38
46
  end
39
47
  end
40
48
  end
@@ -55,20 +63,10 @@ module Ddr::Batch
55
63
  batch_objects.map{ |x| x.pid if x.pid.present? }.compact
56
64
  end
57
65
 
58
- def unhandled_objects?
59
- batch_objects.any? { |batch_object| !batch_object.handled? }
60
- end
61
-
62
66
  def finished?
63
67
  status == STATUS_FINISHED
64
68
  end
65
69
 
66
- def deletable?
67
- [ nil,
68
- Ddr::Batch::Batch::STATUS_READY,
69
- Ddr::Batch::Batch::STATUS_VALIDATED,
70
- Ddr::Batch::Batch::STATUS_INVALID ].include?(status)
71
- end
72
70
  end
73
71
 
74
- end
72
+ end
@@ -0,0 +1,14 @@
1
+ module Ddr::Batch
2
+ class BatchAbilityDefinitions < Ddr::Auth::AbilityDefinitions
3
+
4
+ def call
5
+ if authenticated?
6
+ can :manage, Batch, user_id: user.id
7
+ end
8
+ can :manage, Ddr::Batch::BatchObject do |batch_object|
9
+ can? :manage, batch_object.batch
10
+ end
11
+ end
12
+
13
+ end
14
+ end
@@ -7,9 +7,7 @@ module Ddr::Batch
7
7
  belongs_to :batch, inverse_of: :batch_objects
8
8
  has_many :batch_object_attributes, -> { order "id ASC" }, inverse_of: :batch_object, dependent: :destroy
9
9
  has_many :batch_object_datastreams, inverse_of: :batch_object, dependent: :destroy
10
- has_many :batch_object_messages, inverse_of: :batch_object, dependent: :destroy
11
10
  has_many :batch_object_relationships, inverse_of: :batch_object, dependent: :destroy
12
- has_many :batch_object_roles, inverse_of: :batch_object, dependent: :destroy
13
11
 
14
12
  VERIFICATION_PASS = "PASS"
15
13
  VERIFICATION_FAIL = "FAIL"
@@ -21,8 +19,6 @@ module Ddr::Batch
21
19
  Model: %{model}
22
20
  EOS
23
21
 
24
- ProcessingResultsMessage = Struct.new(:level, :message)
25
-
26
22
  def self.pid_from_identifier(identifier, batch_id)
27
23
  query = "identifier = :identifier"
28
24
  query << " and batch_id = :batch_id" if batch_id
@@ -79,7 +75,7 @@ module Ddr::Batch
79
75
  errs = []
80
76
  batch_object_datastreams.each do |d|
81
77
  if model_datastream_keys.present?
82
- unless model_datastream_keys.include?(d.name)
78
+ unless model_datastream_keys.include?(d.name.to_sym)
83
79
  errs << "#{@error_prefix} Invalid datastream name for #{model}: #{d.name}"
84
80
  end
85
81
  end
@@ -115,11 +111,12 @@ module Ddr::Batch
115
111
  obj_model = batch.found_pids[r[:object]]
116
112
  else
117
113
  begin
118
- obj_model = SolrDocument.find(r[:object]).active_fedora_model
114
+ obj = ActiveFedora::Base.find(r[:object], :cast => true)
115
+ obj_model = obj.class.name
119
116
  if batch.present?
120
- batch.add_found_pid(r[:object], obj_model)
117
+ batch.add_found_pid(obj.pid, obj_model)
121
118
  end
122
- rescue SolrDocument::NotFound
119
+ rescue ActiveFedora::ObjectNotFoundError
123
120
  pid_in_batch = false
124
121
  if batch.present?
125
122
  if batch.pre_assigned_pids.include?(r[:object])
@@ -156,7 +153,7 @@ module Ddr::Batch
156
153
  else
157
154
  verifications["Object exists in repository"] = VERIFICATION_PASS
158
155
  verifications["Object is correct model"] = verify_model(repo_object) if model
159
- verifications["Object has correct label"] = verify_label(repo_object) if label
156
+ # verifications["Object has correct label"] = verify_label(repo_object) if label
160
157
  unless batch_object_attributes.empty?
161
158
  batch_object_attributes.each do |a|
162
159
  if a.operation == BatchObjectAttribute::OPERATION_ADD
@@ -175,11 +172,6 @@ module Ddr::Batch
175
172
  verifications["#{r.name} relationship is correct"] = verify_relationship(repo_object, r)
176
173
  end
177
174
  end
178
- unless batch_object_roles.empty?
179
- batch_object_roles.each do |r|
180
- verifications["#{r.role_scope} #{r.role_type} #{r.agent} role is correct"] = verify_role(repo_object, r)
181
- end
182
- end
183
175
  result = Ddr::Actions::FixityCheck.execute repo_object
184
176
  verifications["Fixity check"] = result.success ? VERIFICATION_PASS : VERIFICATION_FAIL
185
177
  end
@@ -198,13 +190,18 @@ module Ddr::Batch
198
190
  end
199
191
  end
200
192
 
201
- def verify_label(repo_object)
202
- repo_object.label.eql?(label) ? VERIFICATION_PASS : VERIFICATION_FAIL
203
- end
204
-
193
+ # def verify_label(repo_object)
194
+ # repo_object.label.eql?(label) ? VERIFICATION_PASS : VERIFICATION_FAIL
195
+ # end
196
+ #
205
197
  def verify_attribute(repo_object, attribute)
206
- repo_object.datastreams[attribute.datastream].values(attribute.name).include?(attribute.value.strip) ?
207
- VERIFICATION_PASS : VERIFICATION_FAIL
198
+ verified = case attribute.datastream
199
+ when 'descMetadata'
200
+ repo_object.descMetadata.values(attribute.name).include?(attribute.value)
201
+ when 'adminMetadata'
202
+ repo_object.adminMetadata.values(attribute.name).include?(attribute.value)
203
+ end
204
+ verified ? VERIFICATION_PASS : VERIFICATION_FAIL
208
205
  end
209
206
 
210
207
  def verify_datastream(repo_object, datastream)
@@ -236,33 +233,23 @@ module Ddr::Batch
236
233
  end
237
234
  end
238
235
 
239
- def verify_role(repo_object, role)
240
- role_hash = { "role_type"=>[ role.role_type ], "agent"=>[ role.agent ], "scope"=>[ role.role_scope ] }
241
- repo_object.roles.role_set.map(&:to_h).include?(role_hash) ? VERIFICATION_PASS : VERIFICATION_FAIL
242
- end
243
-
244
236
  def add_attribute(repo_object, attribute)
245
- repo_object.datastreams[attribute.datastream].add_value(attribute.name, attribute.value)
237
+ repo_object.send(attribute.datastream).add_value(attribute.name, attribute.value)
246
238
  return repo_object
247
239
  end
248
240
 
249
241
  def clear_attribute(repo_object, attribute)
250
- repo_object.datastreams[attribute.datastream].set_values(attribute.name, nil)
242
+ repo_object.send(attribute.datastream).set_values(attribute.name, nil)
251
243
  return repo_object
252
244
  end
253
245
 
254
246
  def clear_attributes(repo_object, attribute)
255
- repo_object.datastreams[attribute.datastream].class.term_names.each do |term|
256
- repo_object.datastreams[attribute.datastream].set_values(term, nil) if repo_object.datastreams[attribute.datastream].values(term)
247
+ Ddr::Models::DescriptiveMetadata.unqualified_names.each do |term|
248
+ repo_object.descMetadata.set_values(term, nil) if repo_object.descMetadata.values(term)
257
249
  end
258
250
  return repo_object
259
251
  end
260
252
 
261
- def add_role(repo_object, role)
262
- repo_object.roles.grant(scope: role.role_scope, type: role.role_type, agent: role.agent)
263
- return repo_object
264
- end
265
-
266
253
  def populate_datastream(repo_object, datastream)
267
254
  case datastream[:payload_type]
268
255
  when BatchObjectDatastream::PAYLOAD_TYPE_BYTES
@@ -278,7 +265,11 @@ module Ddr::Batch
278
265
  else
279
266
  ds_content = File.new(datastream[:payload])
280
267
  end
281
- repo_object.add_file(ds_content, datastream[:name], mime_type: mime_type)
268
+ file_name = File.basename(datastream[:payload])
269
+ dsid = datastream[:name]
270
+ opts = { filename: file_name }
271
+ opts.merge({ mime_type: mime_type }) if mime_type
272
+ repo_object.add_file(ds_content, path: dsid)
282
273
  end
283
274
  return repo_object
284
275
  end
@@ -46,11 +46,16 @@ module Ddr::Batch
46
46
  end
47
47
 
48
48
  def datastream_valid?
49
- datastream_type < ActiveFedora::RDFDatastream rescue false
49
+ [ 'adminMetadata', 'descMetadata' ].include?(datastream)
50
50
  end
51
51
 
52
52
  def attribute_name_valid?
53
- datastream_type.term_names.include?(name.to_sym)
53
+ case datastream
54
+ when 'adminMetadata'
55
+ batch_object.model.constantize.properties.include?(name)
56
+ when 'descMetadata'
57
+ Ddr::Models::DescriptiveMetadata.unqualified_names.include?(name.to_sym)
58
+ end
54
59
  end
55
60
 
56
61
  end
@@ -4,6 +4,11 @@ module Ddr::Batch
4
4
 
5
5
  belongs_to :batch_object, :inverse_of => :batch_object_datastreams
6
6
 
7
+ DATASTREAMS = [ Ddr::Datastreams::CONTENT,
8
+ Ddr::Datastreams::DESC_METADATA,
9
+ Ddr::Datastreams::RIGHTS_METADATA,
10
+ Ddr::Datastreams::STRUCT_METADATA ]
11
+
7
12
  OPERATION_ADD = "ADD" # add this datastream to the object -- considered an error if datastream already exists
8
13
  OPERATION_ADDUPDATE = "ADDUPDATE" # add this datastream to or update this datastream in the object
9
14
  OPERATION_UPDATE = "UPDATE" # update this datastream in the object -- considered an error if datastream does not already exist
@@ -1,6 +1,7 @@
1
1
  module Ddr::Batch
2
2
 
3
3
  class BatchObjectRelationship < ActiveRecord::Base
4
+ # attr_accessible :name, :object, :object_type, :operation, :batch_object
4
5
  belongs_to :batch_object, :inverse_of => :batch_object_relationships
5
6
 
6
7
  RELATIONSHIP_ADMIN_POLICY = "admin_policy"
@@ -6,7 +6,6 @@ module Ddr::Batch
6
6
  errors = []
7
7
  errors << "#{@error_prefix} Model required for INGEST operation" unless model
8
8
  errors += validate_pre_assigned_pid if pid
9
- errors += validate_collection if model == 'Collection'
10
9
  errors
11
10
  end
12
11
 
@@ -20,11 +19,10 @@ module Ddr::Batch
20
19
 
21
20
  def results_message
22
21
  if pid
23
- message_level = verified ? Logger::INFO : Logger::WARN
24
- verification_result = verified ? "Verified" : "VERIFICATION FAILURE"
25
- ProcessingResultsMessage.new(message_level, "Ingested #{model} #{identifier} into #{pid}...#{verification_result}")
22
+ verification_result = (verified ? "Verified" : "VERIFICATION FAILURE")
23
+ message = "Ingested #{model} #{identifier} into #{pid}...#{verification_result}"
26
24
  else
27
- ProcessingResultsMessage.new(Logger::ERROR, "Attempt to ingest #{model} #{identifier} FAILED")
25
+ message = "Attempt to ingest #{model} #{identifier} FAILED"
28
26
  end
29
27
  end
30
28
 
@@ -36,18 +34,8 @@ module Ddr::Batch
36
34
  return errs
37
35
  end
38
36
 
39
- def validate_collection
40
- errs = []
41
- coll = Collection.new
42
- batch_object_attributes.each { |attr| coll = add_attribute(coll, attr) }
43
- unless coll.valid?
44
- coll.errors.messages.each { |k, v| errs << "#{@error_prefix} Collection #{k} #{v.join(';')}" }
45
- end
46
- errs
47
- end
48
-
49
37
  def ingest(user, opts = {})
50
- repo_object = create_repository_object(user)
38
+ repo_object = create_repository_object
51
39
  if !repo_object.nil? && !repo_object.new_record?
52
40
  ingest_outcome_detail = []
53
41
  ingest_outcome_detail << "Ingested #{model} #{identifier} into #{repo_object.pid}"
@@ -90,27 +78,35 @@ module Ddr::Batch
90
78
  repo_object
91
79
  end
92
80
 
93
- def create_repository_object(user)
81
+ def create_repository_object
94
82
  repo_pid = pid if pid.present?
95
83
  repo_object = nil
96
84
  begin
97
- repo_object = model.constantize.new(:pid => repo_pid)
98
- repo_object.label = label if label
85
+ repo_object = model.constantize.new(:id => repo_pid)
86
+ # repo_object.label = label if label
87
+ repo_object.save(validate: false)
99
88
  batch_object_attributes.each { |a| repo_object = add_attribute(repo_object, a) }
100
- repo_object.save(validate: false, skip_structure_updates: true, user: user)
101
89
  batch_object_datastreams.each { |d| repo_object = populate_datastream(repo_object, d) }
102
90
  batch_object_relationships.each { |r| repo_object = add_relationship(repo_object, r) }
103
- batch_object_roles.each { |r| repo_object = add_role(repo_object, r) }
104
- repo_object.save!(skip_structure_updates: true)
91
+ repo_object.save
105
92
  rescue Exception => e1
106
- logger.fatal("Error in creating repository object #{repo_object.pid} for #{identifier} : #{e1}")
93
+ logger.fatal("Error in creating repository object #{repo_object.id} for #{identifier} : #{e1}")
94
+ repo_clean = false
107
95
  if repo_object && !repo_object.new_record?
108
96
  begin
109
- logger.info("Deleting potentially incomplete #{repo_object.pid} due to error in ingest batch processing")
97
+ logger.info("Deleting potentially incomplete #{repo_object.id} due to error in ingest batch processing")
110
98
  repo_object.destroy
111
99
  rescue Exception => e2
112
- logger.fatal("Error deleting repository object #{repo_object.pid}: #{e2}")
100
+ logger.fatal("Error deleting repository object #{repo_object.id}: #{e2}")
101
+ else
102
+ repo_clean = true
113
103
  end
104
+ else
105
+ repo_clean = true
106
+ end
107
+ if batch.present?
108
+ batch.status = repo_clean ? Batch::STATUS_RESTARTABLE : Batch::STATUS_INTERRUPTED
109
+ batch.save
114
110
  end
115
111
  raise e1
116
112
  end