bulkrax 7.0.0 → 8.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/app/assets/javascripts/bulkrax/datatables.js +1 -1
  3. data/app/controllers/bulkrax/exporters_controller.rb +1 -1
  4. data/app/controllers/bulkrax/importers_controller.rb +2 -1
  5. data/app/controllers/concerns/bulkrax/datatables_behavior.rb +15 -15
  6. data/app/factories/bulkrax/object_factory.rb +135 -163
  7. data/app/factories/bulkrax/object_factory_interface.rb +491 -0
  8. data/app/factories/bulkrax/valkyrie_object_factory.rb +402 -0
  9. data/app/helpers/bulkrax/importers_helper.rb +1 -1
  10. data/app/helpers/bulkrax/validation_helper.rb +4 -4
  11. data/app/jobs/bulkrax/create_relationships_job.rb +27 -16
  12. data/app/jobs/bulkrax/delete_job.rb +3 -2
  13. data/app/jobs/bulkrax/download_cloud_file_job.rb +16 -3
  14. data/app/jobs/bulkrax/import_file_set_job.rb +5 -2
  15. data/app/jobs/bulkrax/importer_job.rb +18 -2
  16. data/app/matchers/bulkrax/application_matcher.rb +0 -2
  17. data/app/models/bulkrax/csv_collection_entry.rb +1 -1
  18. data/app/models/bulkrax/csv_entry.rb +7 -6
  19. data/app/models/bulkrax/entry.rb +7 -11
  20. data/app/models/bulkrax/exporter.rb +2 -2
  21. data/app/models/bulkrax/importer.rb +1 -3
  22. data/app/models/bulkrax/oai_entry.rb +0 -3
  23. data/app/models/bulkrax/oai_set_entry.rb +1 -1
  24. data/app/models/bulkrax/rdf_collection_entry.rb +1 -1
  25. data/app/models/bulkrax/rdf_entry.rb +70 -69
  26. data/app/models/bulkrax/xml_entry.rb +0 -1
  27. data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +2 -19
  28. data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
  29. data/app/models/concerns/bulkrax/file_factory.rb +174 -118
  30. data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +2 -2
  31. data/app/models/concerns/bulkrax/has_matchers.rb +28 -25
  32. data/app/models/concerns/bulkrax/import_behavior.rb +10 -17
  33. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +3 -2
  34. data/app/parsers/bulkrax/application_parser.rb +31 -7
  35. data/app/parsers/bulkrax/bagit_parser.rb +175 -174
  36. data/app/parsers/bulkrax/csv_parser.rb +15 -5
  37. data/app/parsers/bulkrax/oai_dc_parser.rb +18 -0
  38. data/app/parsers/bulkrax/parser_export_record_set.rb +18 -22
  39. data/app/parsers/bulkrax/xml_parser.rb +0 -2
  40. data/app/services/bulkrax/factory_class_finder.rb +2 -0
  41. data/app/services/bulkrax/remove_relationships_for_importer.rb +3 -1
  42. data/app/services/hyrax/custom_queries/find_by_source_identifier.rb +50 -0
  43. data/app/services/wings/custom_queries/find_by_source_identifier.rb +32 -0
  44. data/app/views/bulkrax/entries/_parsed_metadata.html.erb +2 -2
  45. data/app/views/bulkrax/entries/_raw_metadata.html.erb +2 -2
  46. data/app/views/bulkrax/entries/show.html.erb +9 -8
  47. data/app/views/bulkrax/exporters/edit.html.erb +1 -1
  48. data/app/views/bulkrax/exporters/new.html.erb +1 -1
  49. data/app/views/bulkrax/exporters/show.html.erb +4 -2
  50. data/app/views/bulkrax/importers/_browse_everything.html.erb +2 -2
  51. data/app/views/bulkrax/importers/_csv_fields.html.erb +1 -1
  52. data/app/views/bulkrax/importers/edit.html.erb +1 -1
  53. data/app/views/bulkrax/importers/new.html.erb +1 -1
  54. data/app/views/bulkrax/importers/show.html.erb +1 -1
  55. data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +2 -2
  56. data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +1 -1
  57. data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +1 -1
  58. data/config/locales/bulkrax.en.yml +7 -0
  59. data/db/migrate/20230608153601_add_indices_to_bulkrax.rb +20 -9
  60. data/db/migrate/20240307053156_add_index_to_metadata_bulkrax_identifier.rb +18 -0
  61. data/lib/bulkrax/engine.rb +23 -6
  62. data/lib/bulkrax/version.rb +1 -1
  63. data/lib/bulkrax.rb +54 -52
  64. data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +2 -0
  65. data/lib/tasks/bulkrax_tasks.rake +1 -0
  66. data/lib/tasks/reset.rake +4 -4
  67. metadata +24 -8
  68. data/lib/bulkrax/persistence_layer/active_fedora_adapter.rb +0 -27
  69. data/lib/bulkrax/persistence_layer/valkyrie_adapter.rb +0 -8
  70. data/lib/bulkrax/persistence_layer.rb +0 -38
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'csv'
4
-
5
3
  module Bulkrax
6
4
  # TODO: We need to rework this class some to address the Metrics/ClassLength rubocop offense.
7
5
  # We do too much in these entry classes. We need to extract the common logic from the various
@@ -106,7 +104,7 @@ module Bulkrax
106
104
  end
107
105
 
108
106
  def add_metadata_for_model
109
- if defined?(::Collection) && factory_class == ::Collection
107
+ if factory_class.present? && factory_class == Bulkrax.collection_model_class
110
108
  add_collection_type_gid if defined?(::Hyrax)
111
109
  # add any additional collection metadata methods here
112
110
  elsif factory_class == Bulkrax.file_model_class
@@ -146,7 +144,7 @@ module Bulkrax
146
144
  self.parsed_metadata = {}
147
145
 
148
146
  build_system_metadata
149
- build_files_metadata if defined?(Collection) && !hyrax_record.is_a?(Collection)
147
+ build_files_metadata if Bulkrax.collection_model_class.present? && !hyrax_record.is_a?(Bulkrax.collection_model_class)
150
148
  build_relationship_metadata
151
149
  build_mapping_metadata
152
150
  self.save!
@@ -158,9 +156,12 @@ module Bulkrax
158
156
  def build_system_metadata
159
157
  self.parsed_metadata['id'] = hyrax_record.id
160
158
  source_id = hyrax_record.send(work_identifier)
161
- source_id = source_id.to_a.first if source_id.is_a?(ActiveTriples::Relation)
159
+ # Because ActiveTriples::Relation does not respond to #to_ary we can't rely on Array.wrap universally
160
+ source_id = source_id.to_a if source_id.is_a?(ActiveTriples::Relation)
161
+ source_id = Array.wrap(source_id).first
162
162
  self.parsed_metadata[source_identifier] = source_id
163
- self.parsed_metadata[key_for_export('model')] = hyrax_record.has_model.first
163
+ model_name = hyrax_record.respond_to?(:to_rdf_representation) ? hyrax_record.to_rdf_representation : hyrax_record.has_model.first
164
+ self.parsed_metadata[key_for_export('model')] = model_name
164
165
  end
165
166
 
166
167
  def build_files_metadata
@@ -103,22 +103,18 @@ module Bulkrax
103
103
  self.importerexporter_type == 'Bulkrax::Exporter'
104
104
  end
105
105
 
106
- def valid_system_id(model_class)
107
- return true if model_class.properties.keys.include?(work_identifier)
108
- raise(
109
- "#{model_class} does not implement the system_identifier_field: #{work_identifier}"
110
- )
111
- end
112
-
113
106
  def last_run
114
107
  self.importerexporter&.last_run
115
108
  end
116
109
 
117
110
  def find_collection(collection_identifier)
118
- return unless Collection.properties.keys.include?(work_identifier)
119
- Collection.where(
120
- work_identifier => collection_identifier
121
- ).detect { |m| m.send(work_identifier).include?(collection_identifier) }
111
+ Bulkrax.object_factory.search_by_property(
112
+ klass: Bulkrax.collection_model_class,
113
+ value: collection_identifier,
114
+ search_field: work_identifier,
115
+ name_field: work_identifier,
116
+ verify_property: true
117
+ )
122
118
  end
123
119
  end
124
120
  end
@@ -137,8 +137,8 @@ module Bulkrax
137
137
  end
138
138
 
139
139
  def export_properties
140
- properties = Bulkrax.curation_concerns.map { |work| work.properties.keys }.flatten.uniq.sort
141
- properties.reject { |prop| Bulkrax.reserved_properties.include?(prop) }
140
+ # TODO: Does this work for Valkyrie?
141
+ Bulkrax.object_factory.export_properties
142
142
  end
143
143
 
144
144
  def metadata_only?
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'iso8601'
4
-
5
3
  module Bulkrax
6
4
  class Importer < ApplicationRecord # rubocop:disable Metrics/ClassLength
7
5
  include Bulkrax::ImporterExporterBehavior
@@ -18,7 +16,7 @@ module Bulkrax
18
16
  validates :admin_set_id, presence: true if defined?(::Hyrax)
19
17
  validates :parser_klass, presence: true
20
18
 
21
- delegate :valid_import?, :write_errored_entries_file, :visibility, to: :parser
19
+ delegate :create_parent_child_relationships, :valid_import?, :write_errored_entries_file, :visibility, to: :parser
22
20
 
23
21
  attr_accessor :only_updates, :file_style, :file
24
22
  attr_writer :current_run
@@ -1,8 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'erb'
4
- require 'ostruct'
5
-
6
3
  module Bulkrax
7
4
  class OaiEntry < Entry
8
5
  serialize :raw_metadata, Bulkrax::NormalizedJson
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Bulkrax
4
4
  class OaiSetEntry < OaiEntry
5
- self.default_work_type = "Collection"
5
+ self.default_work_type = Bulkrax.collection_model_class.to_s
6
6
 
7
7
  def build_metadata
8
8
  self.parsed_metadata = self.raw_metadata
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Bulkrax
4
4
  class RdfCollectionEntry < RdfEntry
5
- self.default_work_type = "Collection"
5
+ self.default_work_type = Bulkrax.collection_model_class.to_s
6
6
  def record
7
7
  @record ||= self.raw_metadata
8
8
  end
@@ -1,91 +1,92 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'rdf'
4
- module Bulkrax
5
- class RdfEntry < Entry
6
- serialize :raw_metadata, Bulkrax::NormalizedJson
3
+ unless ENV.fetch('BULKRAX_NO_RDF', 'false').to_s == 'true'
4
+ module Bulkrax
5
+ class RdfEntry < Entry
6
+ serialize :raw_metadata, Bulkrax::NormalizedJson
7
7
 
8
- def self.read_data(path)
9
- RDF::Reader.open(path)
10
- end
8
+ def self.read_data(path)
9
+ RDF::Reader.open(path)
10
+ end
11
11
 
12
- def self.fields_from_data(data)
13
- data.predicates.map(&:to_s)
14
- end
12
+ def self.fields_from_data(data)
13
+ data.predicates.map(&:to_s)
14
+ end
15
15
 
16
- def self.data_for_entry(data, source_id, parser)
17
- reader = data
18
- format = reader.class.format.to_sym
19
- collections = []
20
- children = []
21
- delete = nil
22
- data = RDF::Writer.for(format).buffer do |writer|
23
- reader.each_statement do |statement|
24
- collections << statement.object.to_s if parent_field(parser).present? && parent_field(parser) == statement.predicate.to_s
25
- children << statement.object.to_s if related_children_parsed_mapping.present? && related_children_parsed_mapping == statement.predicate.to_s
26
- delete = statement.object.to_s if /deleted/.match?(statement.predicate.to_s)
27
- writer << statement
16
+ def self.data_for_entry(data, source_id, parser)
17
+ reader = data
18
+ format = reader.class.format.to_sym
19
+ collections = []
20
+ children = []
21
+ delete = nil
22
+ data = RDF::Writer.for(format).buffer do |writer|
23
+ reader.each_statement do |statement|
24
+ collections << statement.object.to_s if parent_field(parser).present? && parent_field(parser) == statement.predicate.to_s
25
+ children << statement.object.to_s if related_children_parsed_mapping.present? && related_children_parsed_mapping == statement.predicate.to_s
26
+ delete = statement.object.to_s if /deleted/.match?(statement.predicate.to_s)
27
+ writer << statement
28
+ end
28
29
  end
30
+ return {
31
+ source_id => reader.subjects.first.to_s,
32
+ delete: delete,
33
+ format: format,
34
+ data: data,
35
+ collection: collections,
36
+ children: children
37
+ }
29
38
  end
30
- return {
31
- source_id => reader.subjects.first.to_s,
32
- delete: delete,
33
- format: format,
34
- data: data,
35
- collection: collections,
36
- children: children
37
- }
38
- end
39
39
 
40
- def self.related_children_parsed_mapping
41
- return @related_children_parsed_mapping if @related_children_parsed_mapping.present?
40
+ def self.related_children_parsed_mapping
41
+ return @related_children_parsed_mapping if @related_children_parsed_mapping.present?
42
42
 
43
- rdf_related_children_field_mapping = Bulkrax.field_mappings['Bulkrax::RdfParser']&.select { |_, h| h.key?('related_children_field_mapping') }
44
- return if rdf_related_children_field_mapping.blank?
43
+ rdf_related_children_field_mapping = Bulkrax.field_mappings['Bulkrax::RdfParser']&.select { |_, h| h.key?('related_children_field_mapping') }
44
+ return if rdf_related_children_field_mapping.blank?
45
45
 
46
- @related_children_parsed_mapping = rdf_related_children_field_mapping&.keys&.first
47
- end
46
+ @related_children_parsed_mapping = rdf_related_children_field_mapping&.keys&.first
47
+ end
48
48
 
49
- def record
50
- @record ||= RDF::Reader.for(self.raw_metadata['format'].to_sym).new(self.raw_metadata['data'])
51
- end
49
+ def record
50
+ @record ||= RDF::Reader.for(self.raw_metadata['format'].to_sym).new(self.raw_metadata['data'])
51
+ end
52
52
 
53
- def build_metadata
54
- raise StandardError, 'Record not found' if record.nil?
55
- raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
53
+ def build_metadata
54
+ raise StandardError, 'Record not found' if record.nil?
55
+ raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
56
56
 
57
- self.parsed_metadata = {}
58
- self.parsed_metadata[work_identifier] = [self.raw_metadata[source_identifier]]
57
+ self.parsed_metadata = {}
58
+ self.parsed_metadata[work_identifier] = [self.raw_metadata[source_identifier]]
59
59
 
60
- record.each_statement do |statement|
61
- # Only process the subject for our record (in case other data is in the file)
62
- next unless statement.subject.to_s == self.raw_metadata[source_identifier]
63
- add_metadata(statement.predicate.to_s, statement.object.to_s)
64
- end
65
- add_visibility
66
- add_rights_statement
67
- add_admin_set_id
68
- add_collections
69
- add_local
70
- self.parsed_metadata['file'] = self.raw_metadata['file']
60
+ record.each_statement do |statement|
61
+ # Only process the subject for our record (in case other data is in the file)
62
+ next unless statement.subject.to_s == self.raw_metadata[source_identifier]
63
+ add_metadata(statement.predicate.to_s, statement.object.to_s)
64
+ end
65
+ add_visibility
66
+ add_rights_statement
67
+ add_admin_set_id
68
+ add_collections
69
+ add_local
70
+ self.parsed_metadata['file'] = self.raw_metadata['file']
71
71
 
72
- self.parsed_metadata
73
- end
72
+ self.parsed_metadata
73
+ end
74
74
 
75
- def collections_created?
76
- return true if self.raw_metadata['collection'].blank?
77
- self.raw_metadata['collection'].length == self.collection_ids.length
78
- end
75
+ def collections_created?
76
+ return true if self.raw_metadata['collection'].blank?
77
+ self.raw_metadata['collection'].length == self.collection_ids.length
78
+ end
79
79
 
80
- def find_collection_ids
81
- return self.collection_ids if collections_created?
82
- if self.raw_metadata['collection'].present?
83
- self.raw_metadata['collection'].each do |collection|
84
- c = find_collection(collection)
85
- self.collection_ids << c.id unless c.blank? || self.collection_ids.include?(c.id)
80
+ def find_collection_ids
81
+ return self.collection_ids if collections_created?
82
+ if self.raw_metadata['collection'].present?
83
+ self.raw_metadata['collection'].each do |collection|
84
+ c = find_collection(collection)
85
+ self.collection_ids << c.id unless c.blank? || self.collection_ids.include?(c.id)
86
+ end
86
87
  end
88
+ return self.collection_ids
87
89
  end
88
- return self.collection_ids
89
90
  end
90
91
  end
91
92
  end
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'nokogiri'
4
3
  module Bulkrax
5
4
  # Generic XML Entry
6
5
  class XmlEntry < Entry
@@ -18,9 +18,9 @@ module Bulkrax
18
18
  begin
19
19
  # the identifier parameter can be a :source_identifier or the id of an object
20
20
  record = Entry.find_by(default_scope.merge({ importerexporter_id: importer_id })) || Entry.find_by(default_scope)
21
- record ||= ActiveFedora::Base.find(identifier)
21
+ record ||= Bulkrax.object_factory.find(identifier)
22
22
  # NameError for if ActiveFedora isn't installed
23
- rescue NameError, ActiveFedora::ObjectNotFoundError
23
+ rescue NameError, ActiveFedora::ObjectNotFoundError, Bulkrax::ObjectFactoryInterface::ObjectNotFoundError
24
24
  record = nil
25
25
  end
26
26
 
@@ -28,22 +28,5 @@ module Bulkrax
28
28
  # also accounts for when the found entry isn't a part of this importer
29
29
  record.is_a?(Entry) ? [record, record.factory.find] : [nil, record]
30
30
  end
31
-
32
- # Check if the record is a Work
33
- def curation_concern?(record)
34
- available_work_types.include?(record.class)
35
- end
36
-
37
- private
38
-
39
- # @return [Array<Class>] list of work type classes
40
- def available_work_types
41
- # If running in a Hyku app, do not include disabled work types
42
- @available_work_types ||= if defined?(::Hyku)
43
- ::Site.instance.available_works.map(&:constantize)
44
- else
45
- Bulkrax.curation_concerns
46
- end
47
- end
48
31
  end
49
32
  end
@@ -1,5 +1,4 @@
1
1
  # frozen_string_literal: true
2
- require 'marcel'
3
2
 
4
3
  module Bulkrax
5
4
  module ExportBehavior
@@ -22,11 +21,12 @@ module Bulkrax
22
21
  end
23
22
 
24
23
  def hyrax_record
25
- @hyrax_record ||= ActiveFedora::Base.find(self.identifier)
24
+ @hyrax_record ||= Bulkrax.object_factory.find(self.identifier)
26
25
  end
27
26
 
28
27
  # Prepend the file_set id to ensure a unique filename and also one that is not longer than 255 characters
29
28
  def filename(file_set)
29
+ # NOTE: Will this work with Valkyrie?
30
30
  return if file_set.original_file.blank?
31
31
  fn = file_set.original_file.file_name.first
32
32
  mime = ::Marcel::MimeType.for(file_set.original_file.mime_type)