bulkrax 4.4.0 → 5.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/app/controllers/bulkrax/entries_controller.rb +9 -2
  3. data/app/controllers/bulkrax/exporters_controller.rb +18 -9
  4. data/app/controllers/bulkrax/importers_controller.rb +15 -6
  5. data/app/factories/bulkrax/object_factory.rb +52 -3
  6. data/app/helpers/bulkrax/application_helper.rb +1 -1
  7. data/app/helpers/bulkrax/importers_helper.rb +2 -2
  8. data/app/jobs/bulkrax/create_relationships_job.rb +75 -59
  9. data/app/jobs/bulkrax/delete_job.rb +1 -1
  10. data/app/jobs/bulkrax/export_work_job.rb +2 -2
  11. data/app/jobs/bulkrax/import_file_set_job.rb +2 -1
  12. data/app/jobs/bulkrax/import_work_job.rb +13 -5
  13. data/app/jobs/bulkrax/importer_job.rb +1 -1
  14. data/app/matchers/bulkrax/application_matcher.rb +4 -2
  15. data/app/models/bulkrax/csv_entry.rb +15 -3
  16. data/app/models/bulkrax/entry.rb +2 -1
  17. data/app/models/bulkrax/exporter.rb +15 -7
  18. data/app/models/bulkrax/importer.rb +4 -4
  19. data/app/models/bulkrax/importer_run.rb +6 -0
  20. data/app/models/bulkrax/oai_entry.rb +54 -8
  21. data/app/models/bulkrax/pending_relationship.rb +4 -0
  22. data/app/models/bulkrax/rdf_entry.rb +1 -1
  23. data/app/models/bulkrax/xml_entry.rb +54 -12
  24. data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +2 -0
  25. data/app/models/concerns/bulkrax/file_factory.rb +9 -3
  26. data/app/models/concerns/bulkrax/import_behavior.rb +17 -10
  27. data/app/models/concerns/bulkrax/status_info.rb +9 -4
  28. data/app/parsers/bulkrax/application_parser.rb +7 -1
  29. data/app/parsers/bulkrax/bagit_parser.rb +1 -1
  30. data/app/parsers/bulkrax/csv_parser.rb +10 -3
  31. data/app/parsers/bulkrax/xml_parser.rb +6 -0
  32. data/app/views/bulkrax/exporters/_form.html.erb +33 -17
  33. data/app/views/bulkrax/exporters/edit.html.erb +1 -1
  34. data/app/views/bulkrax/exporters/new.html.erb +1 -1
  35. data/app/views/bulkrax/importers/_form.html.erb +5 -5
  36. data/app/views/hyrax/dashboard/sidebar/_bulkrax_sidebar_additions.html.erb +3 -1
  37. data/app/views/hyrax/dashboard/sidebar/_repository_content.html.erb +24 -21
  38. data/lib/bulkrax/entry_spec_helper.rb +173 -0
  39. data/lib/bulkrax/version.rb +1 -1
  40. data/lib/bulkrax.rb +53 -0
  41. data/lib/generators/bulkrax/install_generator.rb +20 -0
  42. data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +3 -1
  43. metadata +9 -8
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d53012e0252f8033f5da334d5336c134deb3a6221040cfe4c1c6c01bb473d617
4
- data.tar.gz: e986a3506c073aa533c4ea303a123e8866d2bf7ac6d8ab6df3949682f61b6c05
3
+ metadata.gz: 490e0f170cc1128c00c467c3cc344db627d027a3b857d53dfa33b97805567d4b
4
+ data.tar.gz: 7290801bacea707b7398e674a17acf56e7a770cfb3bea20958169588a4404175
5
5
  SHA512:
6
- metadata.gz: 8ea20593d1164f62fdfda2dbe84bd8cc4e199a65d7e85eea0b68b27e71aeff7579f1b0059042f55eec1c7ab36f8dec9db406ad58cc3a315b29b8e4900f7cb450
7
- data.tar.gz: 89a03cd842d855d48f8c4952b6e9a6daea757e80cb8c0f9ee370b59f90e434b39dfb548deb1757f36463914bd5cccbcf1df6c5ffd9102bb5f0d54b5b7ea6476f
6
+ metadata.gz: a6f5486405e2d2eb7f6c0c49b17ed0926e55a701368e42c93db9b009a5f663682ec4141fe1dd58d0dde132fa747010ada7cd22187b81d60a7e8b6b23cbf2e24d
7
+ data.tar.gz: d56a8780ef074d412ac7406d3f3ddb39b34b17bbe43c61a64bebe06f39952a1d62cbf2dbd01a7012bcd776686def03c5a5e6c556cc1384284c2dea8a89f3eec2
@@ -5,9 +5,10 @@ require_dependency "oai"
5
5
 
6
6
  module Bulkrax
7
7
  class EntriesController < ApplicationController
8
- include Hyrax::ThemedLayoutController
8
+ include Hyrax::ThemedLayoutController if defined?(::Hyrax)
9
9
  before_action :authenticate_user!
10
- with_themed_layout 'dashboard'
10
+ before_action :check_permissions
11
+ with_themed_layout 'dashboard' if defined?(::Hyrax)
11
12
 
12
13
  def show
13
14
  if params[:importer_id].present?
@@ -22,6 +23,7 @@ module Bulkrax
22
23
  @importer = Importer.find(params[:importer_id])
23
24
  @entry = Entry.find(params[:id])
24
25
 
26
+ return unless defined?(::Hyrax)
25
27
  add_breadcrumb t(:'hyrax.controls.home'), main_app.root_path
26
28
  add_breadcrumb t(:'hyrax.dashboard.breadcrumbs.admin'), hyrax.dashboard_path
27
29
  add_breadcrumb 'Importers', bulkrax.importers_path
@@ -34,11 +36,16 @@ module Bulkrax
34
36
  @exporter = Exporter.find(params[:exporter_id])
35
37
  @entry = Entry.find(params[:id])
36
38
 
39
+ return unless defined?(::Hyrax)
37
40
  add_breadcrumb t(:'hyrax.controls.home'), main_app.root_path
38
41
  add_breadcrumb t(:'hyrax.dashboard.breadcrumbs.admin'), hyrax.dashboard_path
39
42
  add_breadcrumb 'Exporters', bulkrax.exporters_path
40
43
  add_breadcrumb @exporter.name, bulkrax.exporter_path(@exporter.id)
41
44
  add_breadcrumb @entry.id
42
45
  end
46
+
47
+ def check_permissions
48
+ raise CanCan::AccessDenied unless current_ability.can_import_works? || current_ability.can_export_works?
49
+ end
43
50
  end
44
51
  end
@@ -4,23 +4,26 @@ require_dependency "bulkrax/application_controller"
4
4
 
5
5
  module Bulkrax
6
6
  class ExportersController < ApplicationController
7
- include Hyrax::ThemedLayoutController
7
+ include Hyrax::ThemedLayoutController if defined?(::Hyrax)
8
8
  include Bulkrax::DownloadBehavior
9
9
  before_action :authenticate_user!
10
+ before_action :check_permissions
10
11
  before_action :set_exporter, only: [:show, :edit, :update, :destroy]
11
- with_themed_layout 'dashboard'
12
+ with_themed_layout 'dashboard' if defined?(::Hyrax)
12
13
 
13
14
  # GET /exporters
14
15
  def index
15
16
  @exporters = Exporter.all
16
17
 
17
- add_exporter_breadcrumbs
18
+ add_exporter_breadcrumbs if defined?(::Hyrax)
18
19
  end
19
20
 
20
21
  # GET /exporters/1
21
22
  def show
22
- add_exporter_breadcrumbs
23
- add_breadcrumb @exporter.name
23
+ if defined?(::Hyrax)
24
+ add_exporter_breadcrumbs
25
+ add_breadcrumb @exporter.name
26
+ end
24
27
 
25
28
  @work_entries = @exporter.entries.where(type: @exporter.parser.entry_class.to_s).page(params[:work_entries_page]).per(30)
26
29
  @collection_entries = @exporter.entries.where(type: @exporter.parser.collection_entry_class.to_s).page(params[:collections_entries_page]).per(30)
@@ -30,16 +33,18 @@ module Bulkrax
30
33
  # GET /exporters/new
31
34
  def new
32
35
  @exporter = Exporter.new
33
-
36
+ return unless defined?(::Hyrax)
34
37
  add_exporter_breadcrumbs
35
38
  add_breadcrumb 'New'
36
39
  end
37
40
 
38
41
  # GET /exporters/1/edit
39
42
  def edit
40
- add_exporter_breadcrumbs
41
- add_breadcrumb @exporter.name, bulkrax.exporter_path(@exporter.id)
42
- add_breadcrumb 'Edit'
43
+ if defined?(::Hyrax)
44
+ add_exporter_breadcrumbs
45
+ add_breadcrumb @exporter.name, bulkrax.exporter_path(@exporter.id)
46
+ add_breadcrumb 'Edit'
47
+ end
43
48
 
44
49
  # Correctly populate export_source_collection input
45
50
  @collection = Collection.find(@exporter.export_source) if @exporter.export_source.present? && @exporter.export_from == 'collection'
@@ -131,5 +136,9 @@ module Bulkrax
131
136
  def file_path
132
137
  "#{@exporter.exporter_export_zip_path}/#{params['exporter']['exporter_export_zip_files']}"
133
138
  end
139
+
140
+ def check_permissions
141
+ raise CanCan::AccessDenied unless current_ability.can_export_works?
142
+ end
134
143
  end
135
144
  end
@@ -6,7 +6,7 @@ require_dependency 'oai'
6
6
  module Bulkrax
7
7
  # rubocop:disable Metrics/ClassLength
8
8
  class ImportersController < ApplicationController
9
- include Hyrax::ThemedLayoutController
9
+ include Hyrax::ThemedLayoutController if defined?(::Hyrax)
10
10
  include Bulkrax::DownloadBehavior
11
11
  include Bulkrax::API
12
12
  include Bulkrax::ValidationHelper
@@ -14,15 +14,16 @@ module Bulkrax
14
14
  protect_from_forgery unless: -> { api_request? }
15
15
  before_action :token_authenticate!, if: -> { api_request? }, only: [:create, :update, :delete]
16
16
  before_action :authenticate_user!, unless: -> { api_request? }
17
+ before_action :check_permissions
17
18
  before_action :set_importer, only: [:show, :edit, :update, :destroy]
18
- with_themed_layout 'dashboard'
19
+ with_themed_layout 'dashboard' if defined?(::Hyrax)
19
20
 
20
21
  # GET /importers
21
22
  def index
22
23
  @importers = Importer.all
23
24
  if api_request?
24
25
  json_response('index')
25
- else
26
+ elsif defined?(::Hyrax)
26
27
  add_importer_breadcrumbs
27
28
  end
28
29
  end
@@ -31,7 +32,7 @@ module Bulkrax
31
32
  def show
32
33
  if api_request?
33
34
  json_response('show')
34
- else
35
+ elsif defined?(::Hyrax)
35
36
  add_importer_breadcrumbs
36
37
  add_breadcrumb @importer.name
37
38
 
@@ -46,7 +47,7 @@ module Bulkrax
46
47
  @importer = Importer.new
47
48
  if api_request?
48
49
  json_response('new')
49
- else
50
+ elsif defined?(::Hyrax)
50
51
  add_importer_breadcrumbs
51
52
  add_breadcrumb 'New'
52
53
  end
@@ -56,7 +57,7 @@ module Bulkrax
56
57
  def edit
57
58
  if api_request?
58
59
  json_response('edit')
59
- else
60
+ elsif defined?(::Hyrax)
60
61
  add_importer_breadcrumbs
61
62
  add_breadcrumb @importer.name, bulkrax.importer_path(@importer.id)
62
63
  add_breadcrumb 'Edit'
@@ -76,6 +77,9 @@ module Bulkrax
76
77
  @importer = Importer.new(importer_params)
77
78
  field_mapping_params
78
79
  @importer.validate_only = true if params[:commit] == 'Create and Validate'
80
+ # the following line is needed to handle updating remote files of a FileSet
81
+ # on a new import otherwise it only gets updated during the update path
82
+ @importer.parser_fields['update_files'] = true if params[:commit] == 'Create and Import'
79
83
  if @importer.save
80
84
  files_for_import(file, cloud_files)
81
85
  if params[:commit] == 'Create and Import'
@@ -155,6 +159,7 @@ module Bulkrax
155
159
  # GET /importer/1/upload_corrected_entries
156
160
  def upload_corrected_entries
157
161
  @importer = Importer.find(params[:importer_id])
162
+ return unless defined?(::Hyrax)
158
163
  add_breadcrumb t(:'hyrax.controls.home'), main_app.root_path
159
164
  add_breadcrumb t(:'hyrax.dashboard.breadcrumbs.admin'), hyrax.dashboard_path
160
165
  add_breadcrumb 'Importers', bulkrax.importers_path
@@ -316,6 +321,10 @@ module Bulkrax
316
321
  end
317
322
  @importer.save
318
323
  end
324
+
325
+ def check_permissions
326
+ raise CanCan::AccessDenied unless current_ability.can_import_works?
327
+ end
319
328
  end
320
329
  # rubocop:enable Metrics/ClassLength
321
330
  end
@@ -6,6 +6,27 @@ module Bulkrax
6
6
  include Bulkrax::FileFactory
7
7
  include DynamicRecordLookup
8
8
 
9
+ # @api private
10
+ #
11
+ # These are the attributes that we assume all "work type" classes (e.g. the given :klass) will
12
+ # have in addition to their specific attributes.
13
+ #
14
+ # @return [Array<Symbol>]
15
+ # @see #permitted_attributes
16
+ class_attribute :base_permitted_attributes,
17
+ default: %i[id edit_users edit_groups read_groups visibility work_members_attributes admin_set_id]
18
+
19
+ # @return [Boolean]
20
+ #
21
+ # @example
22
+ # Bulkrax::ObjectFactory.transformation_removes_blank_hash_values = true
23
+ #
24
+ # @see #transform_attributes
25
+ # @see https://github.com/samvera-labs/bulkrax/pull/708 For discussion concerning this feature
26
+ # @see https://github.com/samvera-labs/bulkrax/wiki/Interacting-with-Metadata For documentation
27
+ # concerning default behavior.
28
+ class_attribute :transformation_removes_blank_hash_values, default: false
29
+
9
30
  define_model_callbacks :save, :create
10
31
  attr_reader :attributes, :object, :source_identifier_value, :klass, :replace_files, :update_files, :work_identifier, :related_parents_parsed_mapping, :importer_run_id
11
32
 
@@ -58,7 +79,7 @@ module Bulkrax
58
79
  elsif klass == FileSet
59
80
  update_file_set(attrs)
60
81
  else
61
- work_actor.update(environment(attrs))
82
+ update_work(attrs)
62
83
  end
63
84
  end
64
85
  object.apply_depositor_metadata(@user) && object.save! if object.depositor.nil?
@@ -104,7 +125,7 @@ module Bulkrax
104
125
  elsif klass == FileSet
105
126
  create_file_set(attrs)
106
127
  else
107
- work_actor.create(environment(attrs))
128
+ create_work(attrs)
108
129
  end
109
130
  end
110
131
  end
@@ -139,6 +160,14 @@ module Bulkrax
139
160
  Hyrax::CurationConcern.actor
140
161
  end
141
162
 
163
+ def create_work(attrs)
164
+ work_actor.create(environment(attrs))
165
+ end
166
+
167
+ def update_work(attrs)
168
+ work_actor.update(environment(attrs))
169
+ end
170
+
142
171
  def create_collection(attrs)
143
172
  attrs = clean_attrs(attrs)
144
173
  attrs = collection_type(attrs)
@@ -227,12 +256,32 @@ module Bulkrax
227
256
  def transform_attributes(update: false)
228
257
  @transform_attributes = attributes.slice(*permitted_attributes)
229
258
  @transform_attributes.merge!(file_attributes(update_files)) if with_files
259
+ @transform_attributes = remove_blank_hash_values(@transform_attributes) if transformation_removes_blank_hash_values?
230
260
  update ? @transform_attributes.except(:id) : @transform_attributes
231
261
  end
232
262
 
233
263
  # Regardless of what the Parser gives us, these are the properties we are prepared to accept.
234
264
  def permitted_attributes
235
- klass.properties.keys.map(&:to_sym) + %i[id edit_users edit_groups read_groups visibility work_members_attributes admin_set_id]
265
+ klass.properties.keys.map(&:to_sym) + base_permitted_attributes
266
+ end
267
+
268
+ # Return a copy of the given attributes, such that all values that are empty or an array of all
269
+ # empty values are fully emptied. (See implementation details)
270
+ #
271
+ # @param attributes [Hash]
272
+ # @return [Hash]
273
+ #
274
+ # @see https://github.com/emory-libraries/dlp-curate/issues/1973
275
+ def remove_blank_hash_values(attributes)
276
+ dupe = attributes.dup
277
+ dupe.each do |key, values|
278
+ if values.is_a?(Array) && values.all? { |value| value.is_a?(String) && value.empty? }
279
+ dupe[key] = []
280
+ elsif values.is_a?(String) && values.empty?
281
+ dupe[key] = nil
282
+ end
283
+ end
284
+ dupe
236
285
  end
237
286
  end
238
287
  end
@@ -3,7 +3,7 @@ require 'coderay'
3
3
 
4
4
  module Bulkrax
5
5
  module ApplicationHelper
6
- include ::Hyrax::HyraxHelperBehavior
6
+ include ::Hyrax::HyraxHelperBehavior if defined?(::Hyrax)
7
7
 
8
8
  def coderay(value, opts)
9
9
  CodeRay
@@ -2,9 +2,9 @@
2
2
 
3
3
  module Bulkrax
4
4
  module ImportersHelper
5
- # borrowd from batch-importer https://github.com/samvera-labs/hyrax-batch_ingest/blob/main/app/controllers/hyrax/batch_ingest/batches_controller.rb
5
+ # borrowed from batch-importer https://github.com/samvera-labs/hyrax-batch_ingest/blob/main/app/controllers/hyrax/batch_ingest/batches_controller.rb
6
6
  def available_admin_sets
7
- # Restrict available_admin_sets to only those current user can desposit to.
7
+ # Restrict available_admin_sets to only those current user can deposit to.
8
8
  @available_admin_sets ||= Hyrax::Collections::PermissionsService.source_ids_for_deposit(ability: current_ability, source_type: 'admin_set').map do |admin_set_id|
9
9
  [AdminSet.find(admin_set_id).title.first, admin_set_id]
10
10
  end
@@ -17,12 +17,29 @@ module Bulkrax
17
17
  # NOTE: In the context of this job, "identifier" is used to generically refer
18
18
  # to either a record's ID or an Bulkrax::Entry's source_identifier.
19
19
  class CreateRelationshipsJob < ApplicationJob
20
+ ##
21
+ # @api public
22
+ # @since v5.0.1
23
+ #
24
+ # Once we've created the relationships, should we then index the works's file_sets to ensure
25
+ # that we have the proper indexed values. This can help set things like `is_page_of_ssim` for
26
+ # IIIF manifest and search results of file sets.
27
+ #
28
+ # @note As of v5.0.1 the default behavior is to not perform this. That preserves past
29
+ # implementations. However, we might determine that we want to change the default
30
+ # behavior. Which would likely mean a major version change.
31
+ #
32
+ # @example
33
+ # # In config/initializers/bulkrax.rb
34
+ # Bulkrax::CreateRelationshipsJob.update_child_records_works_file_sets = true
35
+ #
36
+ # @see https://github.com/scientist-softserv/louisville-hyku/commit/128a9ef
37
+ class_attribute :update_child_records_works_file_sets, default: false
38
+
20
39
  include DynamicRecordLookup
21
40
 
22
41
  queue_as :import
23
42
 
24
- attr_accessor :child_records, :child_entry, :parent_record, :parent_entry, :importer_run_id
25
-
26
43
  # @param parent_identifier [String] Work/Collection ID or Bulkrax::Entry source_identifiers
27
44
  # @param importer_run [Bulkrax::ImporterRun] current importer run (needed to properly update counters)
28
45
  #
@@ -31,82 +48,81 @@ module Bulkrax
31
48
  # Whether the @base_entry is the parent or the child in the relationship is determined by the presence of a
32
49
  # parent_identifier or child_identifier param. For example, if a parent_identifier is passed, we know @base_entry
33
50
  # is the child in the relationship, and vice versa if a child_identifier is passed.
51
+ #
52
+ # rubocop:disable Metrics/MethodLength
34
53
  def perform(parent_identifier:, importer_run_id:) # rubocop:disable Metrics/AbcSize
35
- pending_relationships = Bulkrax::PendingRelationship.find_each.select do |rel|
36
- rel.importer_run_id == importer_run_id && rel.parent_id == parent_identifier
37
- end.sort_by(&:order)
38
-
39
- @importer_run_id = importer_run_id
40
- @parent_entry, @parent_record = find_record(parent_identifier, importer_run_id)
41
- @child_records = { works: [], collections: [] }
42
- pending_relationships.each do |rel|
43
- raise ::StandardError, %("#{rel}" needs either a child or a parent to create a relationship) if rel.child_id.nil? || rel.parent_id.nil?
44
- @child_entry, child_record = find_record(rel.child_id, importer_run_id)
45
- if child_record
46
- child_record.is_a?(::Collection) ? @child_records[:collections] << child_record : @child_records[:works] << child_record
54
+ importer_run = Bulkrax::ImporterRun.find(importer_run_id)
55
+ ability = Ability.new(importer_run.user)
56
+
57
+ parent_entry, parent_record = find_record(parent_identifier, importer_run_id)
58
+
59
+ number_of_successes = 0
60
+ number_of_failures = 0
61
+ errors = []
62
+
63
+ ActiveRecord::Base.uncached do
64
+ Bulkrax::PendingRelationship.where(parent_id: parent_identifier, importer_run_id: importer_run_id)
65
+ .ordered.find_each do |rel|
66
+ process(relationship: rel, importer_run_id: importer_run_id, parent_record: parent_record, ability: ability)
67
+ number_of_successes += 1
68
+ rescue => e
69
+ number_of_failures += 1
70
+ errors << e
47
71
  end
48
72
  end
49
73
 
50
- if (child_records[:collections].blank? && child_records[:works].blank?) || parent_record.nil?
74
+ # save record if members were added
75
+ parent_record.save! if @parent_record_members_added
76
+
77
+ # rubocop:disable Rails/SkipsModelValidations
78
+ if errors.present?
79
+ importer_run.increment!(:failed_relationships, number_of_failures)
80
+ parent_entry&.set_status_info(errors.last, importer_run)
81
+
82
+ # TODO: This can create an infinite job cycle, consider a time to live tracker.
51
83
  reschedule({ parent_identifier: parent_identifier, importer_run_id: importer_run_id })
52
84
  return false # stop current job from continuing to run after rescheduling
85
+ else
86
+ Bulkrax::ImporterRun.find(importer_run_id).increment!(:processed_relationships, number_of_successes)
53
87
  end
54
- @parent_entry ||= Bulkrax::Entry.where(identifier: parent_identifier,
55
- importerexporter_id: ImporterRun.find(importer_run_id).importer_id,
56
- importerexporter_type: "Bulkrax::Importer").first
57
- create_relationships
58
- pending_relationships.each(&:destroy)
59
- rescue ::StandardError => e
60
- parent_entry ? parent_entry.status_info(e) : child_entry.status_info(e)
61
- Bulkrax::ImporterRun.find(importer_run_id).increment!(:failed_relationships) # rubocop:disable Rails/SkipsModelValidations
88
+ # rubocop:enable Rails/SkipsModelValidations
62
89
  end
90
+ # rubocop:enable Metrics/MethodLength
63
91
 
64
92
  private
65
93
 
66
- def create_relationships
67
- if parent_record.is_a?(::Collection)
68
- collection_parent_work_child unless child_records[:works].empty?
69
- collection_parent_collection_child unless child_records[:collections].empty?
70
- else
71
- work_parent_work_child unless child_records[:works].empty?
72
- raise ::StandardError, 'a Collection may not be assigned as a child of a Work' if child_records[:collections].present?
73
- end
74
- end
94
+ def process(relationship:, importer_run_id:, parent_record:, ability:)
95
+ raise "#{relationship} needs a child to create relationship" if relationship.child_id.nil?
96
+ raise "#{relationship} needs a parent to create relationship" if relationship.parent_id.nil?
75
97
 
76
- def user
77
- @user ||= Bulkrax::ImporterRun.find(importer_run_id).importer.user
78
- end
98
+ _child_entry, child_record = find_record(relationship.child_id, importer_run_id)
99
+ raise "#{relationship} could not find child record" unless child_record
79
100
 
80
- # Work-Collection membership is added to the child as member_of_collection_ids
81
- # This is adding the reverse relationship, from the child to the parent
82
- def collection_parent_work_child
83
- child_work_ids = child_records[:works].map(&:id)
84
- parent_record.try(:reindex_extent=, Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX)
101
+ raise "Cannot add child collection (ID=#{relationship.child_id}) to parent work (ID=#{relationship.parent_id})" if child_record.collection? && parent_record.work?
85
102
 
86
- parent_record.add_member_objects(child_work_ids)
87
- ImporterRun.find(importer_run_id).increment!(:processed_relationships, child_work_ids.count) # rubocop:disable Rails/SkipsModelValidations
103
+ ability.authorize!(:edit, child_record)
104
+
105
+ # We could do this outside of the loop, but that could lead to odd counter failures.
106
+ ability.authorize!(:edit, parent_record)
107
+
108
+ parent_record.is_a?(Collection) ? add_to_collection(child_record, parent_record) : add_to_work(child_record, parent_record)
109
+
110
+ child_record.file_sets.each(&:update_index) if update_child_records_works_file_sets? && child_record.respond_to?(:file_sets)
111
+ relationship.destroy
88
112
  end
89
113
 
90
- # Collection-Collection membership is added to the as member_ids
91
- def collection_parent_collection_child
92
- child_records[:collections].each do |child_record|
93
- ::Hyrax::Collections::NestedCollectionPersistenceService.persist_nested_collection_for(parent: parent_record, child: child_record)
94
- ImporterRun.find(importer_run_id).increment!(:processed_relationships) # rubocop:disable Rails/SkipsModelValidations
95
- end
114
+ def add_to_collection(child_record, parent_record)
115
+ child_record.member_of_collections << parent_record
116
+ child_record.save!
96
117
  end
97
118
 
98
- # Work-Work membership is added to the parent as member_ids
99
- def work_parent_work_child
100
- records_hash = {}
101
- child_records[:works].each_with_index do |child_record, i|
102
- records_hash[i] = { id: child_record.id }
103
- end
104
- attrs = { work_members_attributes: records_hash }
105
- parent_record.try(:reindex_extent=, Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX)
106
- env = Hyrax::Actors::Environment.new(parent_record, Ability.new(user), attrs)
119
+ def add_to_work(child_record, parent_record)
120
+ return true if parent_record.ordered_members.to_a.include?(child_record)
107
121
 
108
- Hyrax::CurationConcern.actor.update(env)
109
- ImporterRun.find(importer_run_id).increment!(:processed_relationships, child_records[:works].count) # rubocop:disable Rails/SkipsModelValidations
122
+ parent_record.ordered_members << child_record
123
+ @parent_record_members_added = true
124
+ # TODO: Do we need to save the child record?
125
+ child_record.save!
110
126
  end
111
127
 
112
128
  def reschedule(parent_identifier:, importer_run_id:)
@@ -13,7 +13,7 @@ module Bulkrax
13
13
  entry.save!
14
14
  entry.importer.current_run = ImporterRun.find(importer_run.id)
15
15
  entry.importer.record_status
16
- entry.status_info("Deleted", ImporterRun.find(importer_run.id))
16
+ entry.set_status_info("Deleted", ImporterRun.find(importer_run.id))
17
17
  end
18
18
  # rubocop:enable Rails/SkipsModelValidations
19
19
  end
@@ -29,9 +29,9 @@ module Bulkrax
29
29
  return entry if exporter_run.enqueued_records.positive?
30
30
 
31
31
  if exporter_run.failed_records.positive?
32
- exporter_run.exporter.status_info('Complete (with failures)')
32
+ exporter_run.exporter.set_status_info('Complete (with failures)')
33
33
  else
34
- exporter_run.exporter.status_info('Complete')
34
+ exporter_run.exporter.set_status_info('Complete')
35
35
  end
36
36
 
37
37
  return entry
@@ -2,6 +2,7 @@
2
2
 
3
3
  module Bulkrax
4
4
  class MissingParentError < ::StandardError; end
5
+
5
6
  class ImportFileSetJob < ApplicationJob
6
7
  include DynamicRecordLookup
7
8
 
@@ -40,7 +41,7 @@ module Bulkrax
40
41
  ImportFileSetJob.set(wait: (entry.import_attempts + 1).minutes).perform_later(entry_id, importer_run_id)
41
42
  else
42
43
  ImporterRun.find(importer_run_id).decrement!(:enqueued_records) # rubocop:disable Rails/SkipsModelValidations
43
- entry.status_info(e)
44
+ entry.set_status_info(e)
44
45
  end
45
46
  end
46
47
 
@@ -5,7 +5,7 @@ module Bulkrax
5
5
  queue_as :import
6
6
 
7
7
  # rubocop:disable Rails/SkipsModelValidations
8
- def perform(entry_id, run_id, *)
8
+ def perform(entry_id, run_id, time_to_live = 3, *)
9
9
  entry = Entry.find(entry_id)
10
10
  importer_run = ImporterRun.find(run_id)
11
11
  entry.build
@@ -24,13 +24,21 @@ module Bulkrax
24
24
  entry.save!
25
25
  entry.importer.current_run = importer_run
26
26
  entry.importer.record_status
27
- rescue Bulkrax::CollectionsCreatedError
28
- reschedule(entry_id, run_id)
27
+ rescue Bulkrax::CollectionsCreatedError => e
28
+ Rails.logger.warn("#{self.class} entry_id: #{entry_id}, run_id: #{run_id} encountered #{e.class}: #{e.message}")
29
+ # You get 3 attempts at the above perform before we have the import exception cascade into
30
+ # the Sidekiq retry ecosystem.
31
+ # rubocop:disable Style/IfUnlessModifier
32
+ if time_to_live <= 1
33
+ raise "Exhauted reschedule limit for #{self.class} entry_id: #{entry_id}, run_id: #{run_id}. Attemping retries"
34
+ end
35
+ # rubocop:enable Style/IfUnlessModifier
36
+ reschedule(entry_id, run_id, time_to_live)
29
37
  end
30
38
  # rubocop:enable Rails/SkipsModelValidations
31
39
 
32
- def reschedule(entry_id, run_id)
33
- ImportWorkJob.set(wait: 1.minute).perform_later(entry_id, run_id)
40
+ def reschedule(entry_id, run_id, time_to_live)
41
+ ImportWorkJob.set(wait: 1.minute).perform_later(entry_id, run_id, time_to_live - 1)
34
42
  end
35
43
  end
36
44
  end
@@ -13,7 +13,7 @@ module Bulkrax
13
13
  update_current_run_counters(importer)
14
14
  schedule(importer) if importer.schedulable?
15
15
  rescue CSV::MalformedCSVError => e
16
- importer.status_info(e)
16
+ importer.set_status_info(e)
17
17
  end
18
18
 
19
19
  def import(importer, only_updates_since_last_import)
@@ -6,6 +6,10 @@ module Bulkrax
6
6
  class ApplicationMatcher
7
7
  attr_accessor :to, :from, :parsed, :if, :split, :excluded, :nested_type
8
8
 
9
+ # New parse methods will need to be added here; you'll also want to define a corresponding
10
+ # "parse_#{field}" method.
11
+ class_attribute :parsed_fields, instance_writer: false, default: ['remote_files', 'language', 'subject', 'types', 'model', 'resource_type', 'format_original']
12
+
9
13
  def initialize(args)
10
14
  args.each do |k, v|
11
15
  send("#{k}=", v)
@@ -38,8 +42,6 @@ module Bulkrax
38
42
  end
39
43
 
40
44
  def process_parse
41
- # New parse methods will need to be added here
42
- parsed_fields = ['remote_files', 'language', 'subject', 'types', 'model', 'resource_type', 'format_original']
43
45
  # This accounts for prefixed matchers
44
46
  parser = parsed_fields.find { |field| to&.include? field }
45
47
 
@@ -7,7 +7,7 @@ module Bulkrax
7
7
  # We do too much in these entry classes. We need to extract the common logic from the various
8
8
  # entry models into a module that can be shared between them.
9
9
  class CsvEntry < Entry # rubocop:disable Metrics/ClassLength
10
- serialize :raw_metadata, JSON
10
+ serialize :raw_metadata, Bulkrax::NormalizedJson
11
11
 
12
12
  def self.fields_from_data(data)
13
13
  data.headers.flatten.compact.uniq
@@ -36,10 +36,14 @@ module Bulkrax
36
36
 
37
37
  def build_metadata
38
38
  raise StandardError, 'Record not found' if record.nil?
39
- raise StandardError, "Missing required elements, missing element(s) are: #{importerexporter.parser.missing_elements(keys_without_numbers(record.keys)).join(', ')}" unless importerexporter.parser.required_elements?(keys_without_numbers(record.keys))
39
+ unless importerexporter.parser.required_elements?(keys_without_numbers(record.keys))
40
+ raise StandardError,
41
+ "Missing required elements, missing element(s) are: #{importerexporter.parser.missing_elements(keys_without_numbers(record.keys)).join(', ')}"
42
+ end
40
43
 
41
44
  self.parsed_metadata = {}
42
45
  add_identifier
46
+ establish_factory_class
43
47
  add_ingested_metadata
44
48
  # TODO(alishaevn): remove the collections stuff entirely and only reference collections via the new parents code
45
49
  add_collections
@@ -56,6 +60,12 @@ module Bulkrax
56
60
  self.parsed_metadata[work_identifier] = [record[source_identifier]]
57
61
  end
58
62
 
63
+ def establish_factory_class
64
+ parser.model_field_mappings.each do |key|
65
+ add_metadata('model', record[key]) if record.key?(key)
66
+ end
67
+ end
68
+
59
69
  def add_metadata_for_model
60
70
  if factory_class == Collection
61
71
  add_collection_type_gid
@@ -107,7 +117,9 @@ module Bulkrax
107
117
  # Metadata required by Bulkrax for round-tripping
108
118
  def build_system_metadata
109
119
  self.parsed_metadata['id'] = hyrax_record.id
110
- self.parsed_metadata[source_identifier] = hyrax_record.send(work_identifier)
120
+ source_id = hyrax_record.send(work_identifier)
121
+ source_id = source_id.to_a.first if source_id.is_a?(ActiveTriples::Relation)
122
+ self.parsed_metadata[source_identifier] = source_id
111
123
  self.parsed_metadata[key_for_export('model')] = hyrax_record.has_model.first
112
124
  end
113
125
 
@@ -4,6 +4,7 @@ module Bulkrax
4
4
  # Custom error class for collections_created?
5
5
  class CollectionsCreatedError < RuntimeError; end
6
6
  class OAIError < RuntimeError; end
7
+
7
8
  class Entry < ApplicationRecord
8
9
  include Bulkrax::HasMatchers
9
10
  include Bulkrax::ImportBehavior
@@ -15,7 +16,7 @@ module Bulkrax
15
16
  alias importer importerexporter
16
17
  alias exporter importerexporter
17
18
 
18
- serialize :parsed_metadata, JSON
19
+ serialize :parsed_metadata, Bulkrax::NormalizedJson
19
20
  # Do not serialize raw_metadata as so we can support xml or other formats
20
21
  serialize :collection_ids, Array
21
22