ddr-batch 1.7.2 → 2.0.0.alpha.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +45 -0
  3. data/app/jobs/ddr/batch/batch_processor_job.rb +7 -2
  4. data/app/mailers/ddr/batch/batch_processor_run_mailer.rb +7 -24
  5. data/app/models/ddr/batch/batch.rb +18 -20
  6. data/app/models/ddr/batch/batch_ability_definitions.rb +14 -0
  7. data/app/models/ddr/batch/batch_object.rb +26 -35
  8. data/app/models/ddr/batch/batch_object_attribute.rb +7 -2
  9. data/app/models/ddr/batch/batch_object_datastream.rb +5 -0
  10. data/app/models/ddr/batch/batch_object_relationship.rb +1 -0
  11. data/app/models/ddr/batch/ingest_batch_object.rb +21 -25
  12. data/app/models/ddr/batch/update_batch_object.rb +10 -18
  13. data/app/scripts/ddr/batch/batch_processor.rb +152 -0
  14. data/app/views/ddr/batch/batch_processor_run_mailer/send_notification.html.erb +5 -5
  15. data/app/views/ddr/batch/batch_processor_run_mailer/send_notification.text.erb +5 -5
  16. data/config/locales/en.yml +2 -3
  17. data/lib/ddr/batch.rb +0 -6
  18. data/lib/ddr/batch/version.rb +1 -1
  19. metadata +31 -47
  20. data/app/jobs/ddr/batch/batch_deletion_job.rb +0 -19
  21. data/app/jobs/ddr/batch/batch_objects_processor_job.rb +0 -11
  22. data/app/models/ddr/batch/batch_object_message.rb +0 -8
  23. data/app/models/ddr/batch/batch_object_role.rb +0 -26
  24. data/app/models/ddr/batch/error.rb +0 -10
  25. data/app/models/ddr/batch/log.rb +0 -29
  26. data/app/services/ddr/batch/monitor_batch_finished.rb +0 -87
  27. data/app/services/ddr/batch/monitor_batch_object_handled.rb +0 -42
  28. data/app/services/ddr/batch/monitor_batch_started.rb +0 -43
  29. data/app/services/ddr/batch/process_batch.rb +0 -60
  30. data/app/services/ddr/batch/process_batch_object.rb +0 -46
  31. data/app/services/ddr/batch/process_batch_objects.rb +0 -39
  32. data/config/initializers/subscriptions.rb +0 -9
  33. data/db/migrate/20160816164010_create_batch_object_roles.rb +0 -15
  34. data/db/migrate/20161115191636_add_columns_to_batch_object.rb +0 -9
  35. data/db/migrate/20161116142512_create_batch_object_messages.rb +0 -13
  36. data/db/migrate/20161222192611_remove_columns_from_batch.rb +0 -13
  37. data/db/migrate/20171116183514_add_collection_columns_to_batch.rb +0 -8
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e356a65a70813c599af04d01d764de1109d98f5e
4
- data.tar.gz: 4ac1235826c5fce10d7990f16ad9636d771169ba
3
+ metadata.gz: f604d559b09d360d0f9357113a314fbbe33d5777
4
+ data.tar.gz: 4ed2ac035ff250220a8b7b07e6766f338a9da348
5
5
  SHA512:
6
- metadata.gz: 10ea52dce76f86f9c9965ebe8d594f2a2c5c7ab3329cb7fe759d7fac9f6122adf84d929c350daab37334a86aa1088f48766c407e5541f00b76c5a405257380d3
7
- data.tar.gz: f397545eb2d5741fbcb9ece062f0e4c1f82e8eec0e952314de3bdab6ec595e128d3cbdc56406a52b2d372ef86ee497481aeb93281ca3bcea359b6b9eb52cac63
6
+ metadata.gz: 8a495a43eb4b92a0753a1ac80ca16a08a40693f91de7928ff7a7de7c81dc6b8733c38063526f07cb027bc744adfca8bafd47b7bc9afc6dac33490859e56c51d6
7
+ data.tar.gz: 3c3abce0902a76666701f79c80f65fd7ef5d6b019cce4927a462a7dbb398b4c6218088584f5a82d3fc5aef4631485c8b542b5c5faf0612fddc634ed4e9fac0e4
data/README.md CHANGED
@@ -42,6 +42,51 @@ class Ability < Ddr::Auth::Ability
42
42
  end
43
43
  ```
44
44
 
45
+ ### Log4r
46
+
47
+ #### Application.rb
48
+
49
+ Add the following lines to `config\application.rb` if they are not already there:
50
+
51
+ ```ruby
52
+ require 'log4r'
53
+ require 'log4r/yamlconfigurator'
54
+ require 'log4r/outputter/datefileoutputter'
55
+ include Log4r
56
+ ```
57
+
58
+ #### Configuration
59
+
60
+ `Ddr::Batch::BatchProcessor` expects a Log4r configuration file at `config\log4r_batch_processor.yml`.
61
+
62
+ ##### Example
63
+
64
+ ```yaml
65
+ log4r_config:
66
+ loggers:
67
+ - name : batch_processor
68
+ level : DEBUG
69
+ trace : 'false'
70
+ outputters:
71
+ - logfile
72
+ outputters:
73
+ - type : StdoutOutputter
74
+ name : stdout
75
+ level : DEBUG
76
+ formatter :
77
+ date_pattern: '%F %T.%L'
78
+ pattern : '%d %l: %m'
79
+ type : PatternFormatter
80
+ - type : FileOutputter
81
+ name : logfile
82
+ trunc : 'false'
83
+ filename : "#{LOG_FILE}"
84
+ formatter :
85
+ date_pattern: '%F %T.%L'
86
+ pattern : '%d %l: %m'
87
+ type : PatternFormatter
88
+ ```
89
+
45
90
  ### Migrations
46
91
 
47
92
  Install the ddr-batch migrations:
@@ -3,7 +3,12 @@ module Ddr::Batch
3
3
  @queue = :batch
4
4
 
5
5
  def self.perform(batch_id, operator_id)
6
- ProcessBatch.new(batch_id: batch_id, operator_id: operator_id).execute
6
+ ts = Time.now.strftime("%Y%m%d%H%M%S%L")
7
+ logfile = "batch_processor_#{ts}_log.txt"
8
+ batch = Batch.find(batch_id)
9
+ operator = User.find(operator_id)
10
+ bp = BatchProcessor.new(batch, operator, log_file: logfile)
11
+ bp.execute
7
12
  end
8
13
 
9
14
  def self.after_enqueue_set_status(batch_id, operator_id)
@@ -13,4 +18,4 @@ module Ddr::Batch
13
18
  end
14
19
 
15
20
  end
16
- end
21
+ end
@@ -2,35 +2,18 @@ module Ddr::Batch
2
2
 
3
3
  class BatchProcessorRunMailer < ActionMailer::Base
4
4
 
5
+ default :from => "noreply@duke.edu"
6
+
5
7
  def send_notification(batch)
6
8
  @batch = batch
7
- @title = "Batch Processor Run #{@batch.status} #{@batch.outcome}"
8
- @title << " - #{@batch.collection_title}" if @batch.collection_title.present?
9
+ @title = "Batch Processor Run #{@batch.status}"
9
10
  @host = `uname -n`.strip
10
11
  @subject = "[#{@host}] #{@title}"
11
- @size = @batch.batch_objects.size
12
- @handled = @batch.handled_count
13
- @success = @batch.success_count
14
- attachments[attachment_file_name(@batch)] = File.read(@batch.logfile.path)
15
- mail(to: @batch.user.email, subject: @subject)
12
+ from = "#{`echo $USER`.strip}@#{@host}"
13
+ attachments["details.txt"] = File.read(@batch.logfile.path)
14
+ mail(from: from, to: @batch.user.email, subject: @subject)
16
15
  end
17
16
 
18
- private
19
-
20
- def attachment_file_name(batch)
21
- if batch.collection_title.present?
22
- sanitized_title = sanitize_title_for_filename(batch.collection_title)
23
- "details_#{sanitized_title}.txt"
24
- else
25
- "details.txt"
26
- end
27
- end
28
-
29
- def sanitize_title_for_filename(title)
30
- title
31
- .gsub(/[^\w\s_-]+/, '')
32
- .gsub(/\s+/, '_')
33
- end
34
17
  end
35
18
 
36
- end
19
+ end
@@ -19,22 +19,30 @@ module Ddr::Batch
19
19
  STATUS_FINISHED = "FINISHED"
20
20
  STATUS_INTERRUPTED = "INTERRUPTED"
21
21
  STATUS_RESTARTABLE = "INTERRUPTED - RESTARTABLE"
22
- STATUS_QUEUED_FOR_DELETION = "QUEUED FOR DELETION"
23
- STATUS_DELETING = "DELETING"
24
22
 
25
- def handled_count
26
- batch_objects.where(handled: true).count
23
+ def validate
24
+ errors = []
25
+ begin
26
+ batch_objects.each do |object|
27
+ unless object.verified
28
+ errors << object.validate
29
+ end
30
+ end
31
+ rescue Exception => e
32
+ errors << "Exception raised during batch validation: #{e.backtrace}"
33
+ end
34
+ errors.flatten
27
35
  end
28
36
 
29
- def success_count
37
+ def completed_count
30
38
  batch_objects.where(verified: true).count
31
39
  end
32
40
 
33
41
  def time_to_complete
34
- unless start.nil?
35
- if handled_count > 0
36
- handled = handled_count
37
- ((Time.now - start.to_time) / handled) * (batch_objects.count - handled)
42
+ unless processing_step_start.nil?
43
+ if completed_count > 0
44
+ completed = completed_count
45
+ ((Time.now - processing_step_start.to_time) / completed) * (batch_objects.count - completed)
38
46
  end
39
47
  end
40
48
  end
@@ -55,20 +63,10 @@ module Ddr::Batch
55
63
  batch_objects.map{ |x| x.pid if x.pid.present? }.compact
56
64
  end
57
65
 
58
- def unhandled_objects?
59
- batch_objects.any? { |batch_object| !batch_object.handled? }
60
- end
61
-
62
66
  def finished?
63
67
  status == STATUS_FINISHED
64
68
  end
65
69
 
66
- def deletable?
67
- [ nil,
68
- Ddr::Batch::Batch::STATUS_READY,
69
- Ddr::Batch::Batch::STATUS_VALIDATED,
70
- Ddr::Batch::Batch::STATUS_INVALID ].include?(status)
71
- end
72
70
  end
73
71
 
74
- end
72
+ end
@@ -0,0 +1,14 @@
1
+ module Ddr::Batch
2
+ class BatchAbilityDefinitions < Ddr::Auth::AbilityDefinitions
3
+
4
+ def call
5
+ if authenticated?
6
+ can :manage, Batch, user_id: user.id
7
+ end
8
+ can :manage, Ddr::Batch::BatchObject do |batch_object|
9
+ can? :manage, batch_object.batch
10
+ end
11
+ end
12
+
13
+ end
14
+ end
@@ -7,9 +7,7 @@ module Ddr::Batch
7
7
  belongs_to :batch, inverse_of: :batch_objects
8
8
  has_many :batch_object_attributes, -> { order "id ASC" }, inverse_of: :batch_object, dependent: :destroy
9
9
  has_many :batch_object_datastreams, inverse_of: :batch_object, dependent: :destroy
10
- has_many :batch_object_messages, inverse_of: :batch_object, dependent: :destroy
11
10
  has_many :batch_object_relationships, inverse_of: :batch_object, dependent: :destroy
12
- has_many :batch_object_roles, inverse_of: :batch_object, dependent: :destroy
13
11
 
14
12
  VERIFICATION_PASS = "PASS"
15
13
  VERIFICATION_FAIL = "FAIL"
@@ -21,8 +19,6 @@ module Ddr::Batch
21
19
  Model: %{model}
22
20
  EOS
23
21
 
24
- ProcessingResultsMessage = Struct.new(:level, :message)
25
-
26
22
  def self.pid_from_identifier(identifier, batch_id)
27
23
  query = "identifier = :identifier"
28
24
  query << " and batch_id = :batch_id" if batch_id
@@ -79,7 +75,7 @@ module Ddr::Batch
79
75
  errs = []
80
76
  batch_object_datastreams.each do |d|
81
77
  if model_datastream_keys.present?
82
- unless model_datastream_keys.include?(d.name)
78
+ unless model_datastream_keys.include?(d.name.to_sym)
83
79
  errs << "#{@error_prefix} Invalid datastream name for #{model}: #{d.name}"
84
80
  end
85
81
  end
@@ -115,11 +111,12 @@ module Ddr::Batch
115
111
  obj_model = batch.found_pids[r[:object]]
116
112
  else
117
113
  begin
118
- obj_model = SolrDocument.find(r[:object]).active_fedora_model
114
+ obj = ActiveFedora::Base.find(r[:object], :cast => true)
115
+ obj_model = obj.class.name
119
116
  if batch.present?
120
- batch.add_found_pid(r[:object], obj_model)
117
+ batch.add_found_pid(obj.pid, obj_model)
121
118
  end
122
- rescue SolrDocument::NotFound
119
+ rescue ActiveFedora::ObjectNotFoundError
123
120
  pid_in_batch = false
124
121
  if batch.present?
125
122
  if batch.pre_assigned_pids.include?(r[:object])
@@ -156,7 +153,7 @@ module Ddr::Batch
156
153
  else
157
154
  verifications["Object exists in repository"] = VERIFICATION_PASS
158
155
  verifications["Object is correct model"] = verify_model(repo_object) if model
159
- verifications["Object has correct label"] = verify_label(repo_object) if label
156
+ # verifications["Object has correct label"] = verify_label(repo_object) if label
160
157
  unless batch_object_attributes.empty?
161
158
  batch_object_attributes.each do |a|
162
159
  if a.operation == BatchObjectAttribute::OPERATION_ADD
@@ -175,11 +172,6 @@ module Ddr::Batch
175
172
  verifications["#{r.name} relationship is correct"] = verify_relationship(repo_object, r)
176
173
  end
177
174
  end
178
- unless batch_object_roles.empty?
179
- batch_object_roles.each do |r|
180
- verifications["#{r.role_scope} #{r.role_type} #{r.agent} role is correct"] = verify_role(repo_object, r)
181
- end
182
- end
183
175
  result = Ddr::Actions::FixityCheck.execute repo_object
184
176
  verifications["Fixity check"] = result.success ? VERIFICATION_PASS : VERIFICATION_FAIL
185
177
  end
@@ -198,13 +190,18 @@ module Ddr::Batch
198
190
  end
199
191
  end
200
192
 
201
- def verify_label(repo_object)
202
- repo_object.label.eql?(label) ? VERIFICATION_PASS : VERIFICATION_FAIL
203
- end
204
-
193
+ # def verify_label(repo_object)
194
+ # repo_object.label.eql?(label) ? VERIFICATION_PASS : VERIFICATION_FAIL
195
+ # end
196
+ #
205
197
  def verify_attribute(repo_object, attribute)
206
- repo_object.datastreams[attribute.datastream].values(attribute.name).include?(attribute.value.strip) ?
207
- VERIFICATION_PASS : VERIFICATION_FAIL
198
+ verified = case attribute.datastream
199
+ when 'descMetadata'
200
+ repo_object.descMetadata.values(attribute.name).include?(attribute.value)
201
+ when 'adminMetadata'
202
+ repo_object.adminMetadata.values(attribute.name).include?(attribute.value)
203
+ end
204
+ verified ? VERIFICATION_PASS : VERIFICATION_FAIL
208
205
  end
209
206
 
210
207
  def verify_datastream(repo_object, datastream)
@@ -236,33 +233,23 @@ module Ddr::Batch
236
233
  end
237
234
  end
238
235
 
239
- def verify_role(repo_object, role)
240
- role_hash = { "role_type"=>[ role.role_type ], "agent"=>[ role.agent ], "scope"=>[ role.role_scope ] }
241
- repo_object.roles.role_set.map(&:to_h).include?(role_hash) ? VERIFICATION_PASS : VERIFICATION_FAIL
242
- end
243
-
244
236
  def add_attribute(repo_object, attribute)
245
- repo_object.datastreams[attribute.datastream].add_value(attribute.name, attribute.value)
237
+ repo_object.send(attribute.datastream).add_value(attribute.name, attribute.value)
246
238
  return repo_object
247
239
  end
248
240
 
249
241
  def clear_attribute(repo_object, attribute)
250
- repo_object.datastreams[attribute.datastream].set_values(attribute.name, nil)
242
+ repo_object.send(attribute.datastream).set_values(attribute.name, nil)
251
243
  return repo_object
252
244
  end
253
245
 
254
246
  def clear_attributes(repo_object, attribute)
255
- repo_object.datastreams[attribute.datastream].class.term_names.each do |term|
256
- repo_object.datastreams[attribute.datastream].set_values(term, nil) if repo_object.datastreams[attribute.datastream].values(term)
247
+ Ddr::Models::DescriptiveMetadata.unqualified_names.each do |term|
248
+ repo_object.descMetadata.set_values(term, nil) if repo_object.descMetadata.values(term)
257
249
  end
258
250
  return repo_object
259
251
  end
260
252
 
261
- def add_role(repo_object, role)
262
- repo_object.roles.grant(scope: role.role_scope, type: role.role_type, agent: role.agent)
263
- return repo_object
264
- end
265
-
266
253
  def populate_datastream(repo_object, datastream)
267
254
  case datastream[:payload_type]
268
255
  when BatchObjectDatastream::PAYLOAD_TYPE_BYTES
@@ -278,7 +265,11 @@ module Ddr::Batch
278
265
  else
279
266
  ds_content = File.new(datastream[:payload])
280
267
  end
281
- repo_object.add_file(ds_content, datastream[:name], mime_type: mime_type)
268
+ file_name = File.basename(datastream[:payload])
269
+ dsid = datastream[:name]
270
+ opts = { filename: file_name }
271
+ opts.merge({ mime_type: mime_type }) if mime_type
272
+ repo_object.add_file(ds_content, path: dsid)
282
273
  end
283
274
  return repo_object
284
275
  end
@@ -46,11 +46,16 @@ module Ddr::Batch
46
46
  end
47
47
 
48
48
  def datastream_valid?
49
- datastream_type < ActiveFedora::RDFDatastream rescue false
49
+ [ 'adminMetadata', 'descMetadata' ].include?(datastream)
50
50
  end
51
51
 
52
52
  def attribute_name_valid?
53
- datastream_type.term_names.include?(name.to_sym)
53
+ case datastream
54
+ when 'adminMetadata'
55
+ batch_object.model.constantize.properties.include?(name)
56
+ when 'descMetadata'
57
+ Ddr::Models::DescriptiveMetadata.unqualified_names.include?(name.to_sym)
58
+ end
54
59
  end
55
60
 
56
61
  end
@@ -4,6 +4,11 @@ module Ddr::Batch
4
4
 
5
5
  belongs_to :batch_object, :inverse_of => :batch_object_datastreams
6
6
 
7
+ DATASTREAMS = [ Ddr::Datastreams::CONTENT,
8
+ Ddr::Datastreams::DESC_METADATA,
9
+ Ddr::Datastreams::RIGHTS_METADATA,
10
+ Ddr::Datastreams::STRUCT_METADATA ]
11
+
7
12
  OPERATION_ADD = "ADD" # add this datastream to the object -- considered an error if datastream already exists
8
13
  OPERATION_ADDUPDATE = "ADDUPDATE" # add this datastream to or update this datastream in the object
9
14
  OPERATION_UPDATE = "UPDATE" # update this datastream in the object -- considered an error if datastream does not already exist
@@ -1,6 +1,7 @@
1
1
  module Ddr::Batch
2
2
 
3
3
  class BatchObjectRelationship < ActiveRecord::Base
4
+ # attr_accessible :name, :object, :object_type, :operation, :batch_object
4
5
  belongs_to :batch_object, :inverse_of => :batch_object_relationships
5
6
 
6
7
  RELATIONSHIP_ADMIN_POLICY = "admin_policy"
@@ -6,7 +6,6 @@ module Ddr::Batch
6
6
  errors = []
7
7
  errors << "#{@error_prefix} Model required for INGEST operation" unless model
8
8
  errors += validate_pre_assigned_pid if pid
9
- errors += validate_collection if model == 'Collection'
10
9
  errors
11
10
  end
12
11
 
@@ -20,11 +19,10 @@ module Ddr::Batch
20
19
 
21
20
  def results_message
22
21
  if pid
23
- message_level = verified ? Logger::INFO : Logger::WARN
24
- verification_result = verified ? "Verified" : "VERIFICATION FAILURE"
25
- ProcessingResultsMessage.new(message_level, "Ingested #{model} #{identifier} into #{pid}...#{verification_result}")
22
+ verification_result = (verified ? "Verified" : "VERIFICATION FAILURE")
23
+ message = "Ingested #{model} #{identifier} into #{pid}...#{verification_result}"
26
24
  else
27
- ProcessingResultsMessage.new(Logger::ERROR, "Attempt to ingest #{model} #{identifier} FAILED")
25
+ message = "Attempt to ingest #{model} #{identifier} FAILED"
28
26
  end
29
27
  end
30
28
 
@@ -36,18 +34,8 @@ module Ddr::Batch
36
34
  return errs
37
35
  end
38
36
 
39
- def validate_collection
40
- errs = []
41
- coll = Collection.new
42
- batch_object_attributes.each { |attr| coll = add_attribute(coll, attr) }
43
- unless coll.valid?
44
- coll.errors.messages.each { |k, v| errs << "#{@error_prefix} Collection #{k} #{v.join(';')}" }
45
- end
46
- errs
47
- end
48
-
49
37
  def ingest(user, opts = {})
50
- repo_object = create_repository_object(user)
38
+ repo_object = create_repository_object
51
39
  if !repo_object.nil? && !repo_object.new_record?
52
40
  ingest_outcome_detail = []
53
41
  ingest_outcome_detail << "Ingested #{model} #{identifier} into #{repo_object.pid}"
@@ -90,27 +78,35 @@ module Ddr::Batch
90
78
  repo_object
91
79
  end
92
80
 
93
- def create_repository_object(user)
81
+ def create_repository_object
94
82
  repo_pid = pid if pid.present?
95
83
  repo_object = nil
96
84
  begin
97
- repo_object = model.constantize.new(:pid => repo_pid)
98
- repo_object.label = label if label
85
+ repo_object = model.constantize.new(:id => repo_pid)
86
+ # repo_object.label = label if label
87
+ repo_object.save(validate: false)
99
88
  batch_object_attributes.each { |a| repo_object = add_attribute(repo_object, a) }
100
- repo_object.save(validate: false, skip_structure_updates: true, user: user)
101
89
  batch_object_datastreams.each { |d| repo_object = populate_datastream(repo_object, d) }
102
90
  batch_object_relationships.each { |r| repo_object = add_relationship(repo_object, r) }
103
- batch_object_roles.each { |r| repo_object = add_role(repo_object, r) }
104
- repo_object.save!(skip_structure_updates: true)
91
+ repo_object.save
105
92
  rescue Exception => e1
106
- logger.fatal("Error in creating repository object #{repo_object.pid} for #{identifier} : #{e1}")
93
+ logger.fatal("Error in creating repository object #{repo_object.id} for #{identifier} : #{e1}")
94
+ repo_clean = false
107
95
  if repo_object && !repo_object.new_record?
108
96
  begin
109
- logger.info("Deleting potentially incomplete #{repo_object.pid} due to error in ingest batch processing")
97
+ logger.info("Deleting potentially incomplete #{repo_object.id} due to error in ingest batch processing")
110
98
  repo_object.destroy
111
99
  rescue Exception => e2
112
- logger.fatal("Error deleting repository object #{repo_object.pid}: #{e2}")
100
+ logger.fatal("Error deleting repository object #{repo_object.id}: #{e2}")
101
+ else
102
+ repo_clean = true
113
103
  end
104
+ else
105
+ repo_clean = true
106
+ end
107
+ if batch.present?
108
+ batch.status = repo_clean ? Batch::STATUS_RESTARTABLE : Batch::STATUS_INTERRUPTED
109
+ batch.save
114
110
  end
115
111
  raise e1
116
112
  end