bulkrax 7.0.0 → 8.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/app/assets/javascripts/bulkrax/datatables.js +1 -1
  3. data/app/controllers/bulkrax/exporters_controller.rb +1 -1
  4. data/app/controllers/bulkrax/importers_controller.rb +2 -1
  5. data/app/controllers/concerns/bulkrax/datatables_behavior.rb +15 -15
  6. data/app/factories/bulkrax/object_factory.rb +135 -163
  7. data/app/factories/bulkrax/object_factory_interface.rb +491 -0
  8. data/app/factories/bulkrax/valkyrie_object_factory.rb +402 -0
  9. data/app/helpers/bulkrax/importers_helper.rb +1 -1
  10. data/app/helpers/bulkrax/validation_helper.rb +4 -4
  11. data/app/jobs/bulkrax/create_relationships_job.rb +27 -16
  12. data/app/jobs/bulkrax/delete_job.rb +3 -2
  13. data/app/jobs/bulkrax/download_cloud_file_job.rb +16 -3
  14. data/app/jobs/bulkrax/import_file_set_job.rb +5 -2
  15. data/app/jobs/bulkrax/importer_job.rb +18 -2
  16. data/app/matchers/bulkrax/application_matcher.rb +0 -2
  17. data/app/models/bulkrax/csv_collection_entry.rb +1 -1
  18. data/app/models/bulkrax/csv_entry.rb +7 -6
  19. data/app/models/bulkrax/entry.rb +7 -11
  20. data/app/models/bulkrax/exporter.rb +2 -2
  21. data/app/models/bulkrax/importer.rb +1 -3
  22. data/app/models/bulkrax/oai_entry.rb +0 -3
  23. data/app/models/bulkrax/oai_set_entry.rb +1 -1
  24. data/app/models/bulkrax/rdf_collection_entry.rb +1 -1
  25. data/app/models/bulkrax/rdf_entry.rb +70 -69
  26. data/app/models/bulkrax/xml_entry.rb +0 -1
  27. data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +2 -19
  28. data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
  29. data/app/models/concerns/bulkrax/file_factory.rb +174 -118
  30. data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +2 -2
  31. data/app/models/concerns/bulkrax/has_matchers.rb +28 -25
  32. data/app/models/concerns/bulkrax/import_behavior.rb +10 -17
  33. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +3 -2
  34. data/app/parsers/bulkrax/application_parser.rb +31 -7
  35. data/app/parsers/bulkrax/bagit_parser.rb +175 -174
  36. data/app/parsers/bulkrax/csv_parser.rb +15 -5
  37. data/app/parsers/bulkrax/oai_dc_parser.rb +18 -0
  38. data/app/parsers/bulkrax/parser_export_record_set.rb +18 -22
  39. data/app/parsers/bulkrax/xml_parser.rb +0 -2
  40. data/app/services/bulkrax/factory_class_finder.rb +2 -0
  41. data/app/services/bulkrax/remove_relationships_for_importer.rb +3 -1
  42. data/app/services/hyrax/custom_queries/find_by_source_identifier.rb +50 -0
  43. data/app/services/wings/custom_queries/find_by_source_identifier.rb +32 -0
  44. data/app/views/bulkrax/entries/_parsed_metadata.html.erb +2 -2
  45. data/app/views/bulkrax/entries/_raw_metadata.html.erb +2 -2
  46. data/app/views/bulkrax/entries/show.html.erb +9 -8
  47. data/app/views/bulkrax/exporters/edit.html.erb +1 -1
  48. data/app/views/bulkrax/exporters/new.html.erb +1 -1
  49. data/app/views/bulkrax/exporters/show.html.erb +4 -2
  50. data/app/views/bulkrax/importers/_browse_everything.html.erb +2 -2
  51. data/app/views/bulkrax/importers/_csv_fields.html.erb +1 -1
  52. data/app/views/bulkrax/importers/edit.html.erb +1 -1
  53. data/app/views/bulkrax/importers/new.html.erb +1 -1
  54. data/app/views/bulkrax/importers/show.html.erb +1 -1
  55. data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +2 -2
  56. data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +1 -1
  57. data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +1 -1
  58. data/config/locales/bulkrax.en.yml +7 -0
  59. data/db/migrate/20230608153601_add_indices_to_bulkrax.rb +20 -9
  60. data/db/migrate/20240307053156_add_index_to_metadata_bulkrax_identifier.rb +18 -0
  61. data/lib/bulkrax/engine.rb +23 -6
  62. data/lib/bulkrax/version.rb +1 -1
  63. data/lib/bulkrax.rb +54 -52
  64. data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +2 -0
  65. data/lib/tasks/bulkrax_tasks.rake +1 -0
  66. data/lib/tasks/reset.rake +4 -4
  67. metadata +24 -8
  68. data/lib/bulkrax/persistence_layer/active_fedora_adapter.rb +0 -27
  69. data/lib/bulkrax/persistence_layer/valkyrie_adapter.rb +0 -8
  70. data/lib/bulkrax/persistence_layer.rb +0 -38
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'csv'
4
-
5
3
  module Bulkrax
6
4
  # TODO: We need to rework this class some to address the Metrics/ClassLength rubocop offense.
7
5
  # We do too much in these entry classes. We need to extract the common logic from the various
@@ -106,7 +104,7 @@ module Bulkrax
106
104
  end
107
105
 
108
106
  def add_metadata_for_model
109
- if defined?(::Collection) && factory_class == ::Collection
107
+ if factory_class.present? && factory_class == Bulkrax.collection_model_class
110
108
  add_collection_type_gid if defined?(::Hyrax)
111
109
  # add any additional collection metadata methods here
112
110
  elsif factory_class == Bulkrax.file_model_class
@@ -146,7 +144,7 @@ module Bulkrax
146
144
  self.parsed_metadata = {}
147
145
 
148
146
  build_system_metadata
149
- build_files_metadata if defined?(Collection) && !hyrax_record.is_a?(Collection)
147
+ build_files_metadata if Bulkrax.collection_model_class.present? && !hyrax_record.is_a?(Bulkrax.collection_model_class)
150
148
  build_relationship_metadata
151
149
  build_mapping_metadata
152
150
  self.save!
@@ -158,9 +156,12 @@ module Bulkrax
158
156
  def build_system_metadata
159
157
  self.parsed_metadata['id'] = hyrax_record.id
160
158
  source_id = hyrax_record.send(work_identifier)
161
- source_id = source_id.to_a.first if source_id.is_a?(ActiveTriples::Relation)
159
+ # Because ActiveTriples::Relation does not respond to #to_ary we can't rely on Array.wrap universally
160
+ source_id = source_id.to_a if source_id.is_a?(ActiveTriples::Relation)
161
+ source_id = Array.wrap(source_id).first
162
162
  self.parsed_metadata[source_identifier] = source_id
163
- self.parsed_metadata[key_for_export('model')] = hyrax_record.has_model.first
163
+ model_name = hyrax_record.respond_to?(:to_rdf_representation) ? hyrax_record.to_rdf_representation : hyrax_record.has_model.first
164
+ self.parsed_metadata[key_for_export('model')] = model_name
164
165
  end
165
166
 
166
167
  def build_files_metadata
@@ -103,22 +103,18 @@ module Bulkrax
103
103
  self.importerexporter_type == 'Bulkrax::Exporter'
104
104
  end
105
105
 
106
- def valid_system_id(model_class)
107
- return true if model_class.properties.keys.include?(work_identifier)
108
- raise(
109
- "#{model_class} does not implement the system_identifier_field: #{work_identifier}"
110
- )
111
- end
112
-
113
106
  def last_run
114
107
  self.importerexporter&.last_run
115
108
  end
116
109
 
117
110
  def find_collection(collection_identifier)
118
- return unless Collection.properties.keys.include?(work_identifier)
119
- Collection.where(
120
- work_identifier => collection_identifier
121
- ).detect { |m| m.send(work_identifier).include?(collection_identifier) }
111
+ Bulkrax.object_factory.search_by_property(
112
+ klass: Bulkrax.collection_model_class,
113
+ value: collection_identifier,
114
+ search_field: work_identifier,
115
+ name_field: work_identifier,
116
+ verify_property: true
117
+ )
122
118
  end
123
119
  end
124
120
  end
@@ -137,8 +137,8 @@ module Bulkrax
137
137
  end
138
138
 
139
139
  def export_properties
140
- properties = Bulkrax.curation_concerns.map { |work| work.properties.keys }.flatten.uniq.sort
141
- properties.reject { |prop| Bulkrax.reserved_properties.include?(prop) }
140
+ # TODO: Does this work for Valkyrie?
141
+ Bulkrax.object_factory.export_properties
142
142
  end
143
143
 
144
144
  def metadata_only?
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'iso8601'
4
-
5
3
  module Bulkrax
6
4
  class Importer < ApplicationRecord # rubocop:disable Metrics/ClassLength
7
5
  include Bulkrax::ImporterExporterBehavior
@@ -18,7 +16,7 @@ module Bulkrax
18
16
  validates :admin_set_id, presence: true if defined?(::Hyrax)
19
17
  validates :parser_klass, presence: true
20
18
 
21
- delegate :valid_import?, :write_errored_entries_file, :visibility, to: :parser
19
+ delegate :create_parent_child_relationships, :valid_import?, :write_errored_entries_file, :visibility, to: :parser
22
20
 
23
21
  attr_accessor :only_updates, :file_style, :file
24
22
  attr_writer :current_run
@@ -1,8 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'erb'
4
- require 'ostruct'
5
-
6
3
  module Bulkrax
7
4
  class OaiEntry < Entry
8
5
  serialize :raw_metadata, Bulkrax::NormalizedJson
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Bulkrax
4
4
  class OaiSetEntry < OaiEntry
5
- self.default_work_type = "Collection"
5
+ self.default_work_type = Bulkrax.collection_model_class.to_s
6
6
 
7
7
  def build_metadata
8
8
  self.parsed_metadata = self.raw_metadata
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Bulkrax
4
4
  class RdfCollectionEntry < RdfEntry
5
- self.default_work_type = "Collection"
5
+ self.default_work_type = Bulkrax.collection_model_class.to_s
6
6
  def record
7
7
  @record ||= self.raw_metadata
8
8
  end
@@ -1,91 +1,92 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'rdf'
4
- module Bulkrax
5
- class RdfEntry < Entry
6
- serialize :raw_metadata, Bulkrax::NormalizedJson
3
+ unless ENV.fetch('BULKRAX_NO_RDF', 'false').to_s == 'true'
4
+ module Bulkrax
5
+ class RdfEntry < Entry
6
+ serialize :raw_metadata, Bulkrax::NormalizedJson
7
7
 
8
- def self.read_data(path)
9
- RDF::Reader.open(path)
10
- end
8
+ def self.read_data(path)
9
+ RDF::Reader.open(path)
10
+ end
11
11
 
12
- def self.fields_from_data(data)
13
- data.predicates.map(&:to_s)
14
- end
12
+ def self.fields_from_data(data)
13
+ data.predicates.map(&:to_s)
14
+ end
15
15
 
16
- def self.data_for_entry(data, source_id, parser)
17
- reader = data
18
- format = reader.class.format.to_sym
19
- collections = []
20
- children = []
21
- delete = nil
22
- data = RDF::Writer.for(format).buffer do |writer|
23
- reader.each_statement do |statement|
24
- collections << statement.object.to_s if parent_field(parser).present? && parent_field(parser) == statement.predicate.to_s
25
- children << statement.object.to_s if related_children_parsed_mapping.present? && related_children_parsed_mapping == statement.predicate.to_s
26
- delete = statement.object.to_s if /deleted/.match?(statement.predicate.to_s)
27
- writer << statement
16
+ def self.data_for_entry(data, source_id, parser)
17
+ reader = data
18
+ format = reader.class.format.to_sym
19
+ collections = []
20
+ children = []
21
+ delete = nil
22
+ data = RDF::Writer.for(format).buffer do |writer|
23
+ reader.each_statement do |statement|
24
+ collections << statement.object.to_s if parent_field(parser).present? && parent_field(parser) == statement.predicate.to_s
25
+ children << statement.object.to_s if related_children_parsed_mapping.present? && related_children_parsed_mapping == statement.predicate.to_s
26
+ delete = statement.object.to_s if /deleted/.match?(statement.predicate.to_s)
27
+ writer << statement
28
+ end
28
29
  end
30
+ return {
31
+ source_id => reader.subjects.first.to_s,
32
+ delete: delete,
33
+ format: format,
34
+ data: data,
35
+ collection: collections,
36
+ children: children
37
+ }
29
38
  end
30
- return {
31
- source_id => reader.subjects.first.to_s,
32
- delete: delete,
33
- format: format,
34
- data: data,
35
- collection: collections,
36
- children: children
37
- }
38
- end
39
39
 
40
- def self.related_children_parsed_mapping
41
- return @related_children_parsed_mapping if @related_children_parsed_mapping.present?
40
+ def self.related_children_parsed_mapping
41
+ return @related_children_parsed_mapping if @related_children_parsed_mapping.present?
42
42
 
43
- rdf_related_children_field_mapping = Bulkrax.field_mappings['Bulkrax::RdfParser']&.select { |_, h| h.key?('related_children_field_mapping') }
44
- return if rdf_related_children_field_mapping.blank?
43
+ rdf_related_children_field_mapping = Bulkrax.field_mappings['Bulkrax::RdfParser']&.select { |_, h| h.key?('related_children_field_mapping') }
44
+ return if rdf_related_children_field_mapping.blank?
45
45
 
46
- @related_children_parsed_mapping = rdf_related_children_field_mapping&.keys&.first
47
- end
46
+ @related_children_parsed_mapping = rdf_related_children_field_mapping&.keys&.first
47
+ end
48
48
 
49
- def record
50
- @record ||= RDF::Reader.for(self.raw_metadata['format'].to_sym).new(self.raw_metadata['data'])
51
- end
49
+ def record
50
+ @record ||= RDF::Reader.for(self.raw_metadata['format'].to_sym).new(self.raw_metadata['data'])
51
+ end
52
52
 
53
- def build_metadata
54
- raise StandardError, 'Record not found' if record.nil?
55
- raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
53
+ def build_metadata
54
+ raise StandardError, 'Record not found' if record.nil?
55
+ raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
56
56
 
57
- self.parsed_metadata = {}
58
- self.parsed_metadata[work_identifier] = [self.raw_metadata[source_identifier]]
57
+ self.parsed_metadata = {}
58
+ self.parsed_metadata[work_identifier] = [self.raw_metadata[source_identifier]]
59
59
 
60
- record.each_statement do |statement|
61
- # Only process the subject for our record (in case other data is in the file)
62
- next unless statement.subject.to_s == self.raw_metadata[source_identifier]
63
- add_metadata(statement.predicate.to_s, statement.object.to_s)
64
- end
65
- add_visibility
66
- add_rights_statement
67
- add_admin_set_id
68
- add_collections
69
- add_local
70
- self.parsed_metadata['file'] = self.raw_metadata['file']
60
+ record.each_statement do |statement|
61
+ # Only process the subject for our record (in case other data is in the file)
62
+ next unless statement.subject.to_s == self.raw_metadata[source_identifier]
63
+ add_metadata(statement.predicate.to_s, statement.object.to_s)
64
+ end
65
+ add_visibility
66
+ add_rights_statement
67
+ add_admin_set_id
68
+ add_collections
69
+ add_local
70
+ self.parsed_metadata['file'] = self.raw_metadata['file']
71
71
 
72
- self.parsed_metadata
73
- end
72
+ self.parsed_metadata
73
+ end
74
74
 
75
- def collections_created?
76
- return true if self.raw_metadata['collection'].blank?
77
- self.raw_metadata['collection'].length == self.collection_ids.length
78
- end
75
+ def collections_created?
76
+ return true if self.raw_metadata['collection'].blank?
77
+ self.raw_metadata['collection'].length == self.collection_ids.length
78
+ end
79
79
 
80
- def find_collection_ids
81
- return self.collection_ids if collections_created?
82
- if self.raw_metadata['collection'].present?
83
- self.raw_metadata['collection'].each do |collection|
84
- c = find_collection(collection)
85
- self.collection_ids << c.id unless c.blank? || self.collection_ids.include?(c.id)
80
+ def find_collection_ids
81
+ return self.collection_ids if collections_created?
82
+ if self.raw_metadata['collection'].present?
83
+ self.raw_metadata['collection'].each do |collection|
84
+ c = find_collection(collection)
85
+ self.collection_ids << c.id unless c.blank? || self.collection_ids.include?(c.id)
86
+ end
86
87
  end
88
+ return self.collection_ids
87
89
  end
88
- return self.collection_ids
89
90
  end
90
91
  end
91
92
  end
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'nokogiri'
4
3
  module Bulkrax
5
4
  # Generic XML Entry
6
5
  class XmlEntry < Entry
@@ -18,9 +18,9 @@ module Bulkrax
18
18
  begin
19
19
  # the identifier parameter can be a :source_identifier or the id of an object
20
20
  record = Entry.find_by(default_scope.merge({ importerexporter_id: importer_id })) || Entry.find_by(default_scope)
21
- record ||= ActiveFedora::Base.find(identifier)
21
+ record ||= Bulkrax.object_factory.find(identifier)
22
22
  # NameError for if ActiveFedora isn't installed
23
- rescue NameError, ActiveFedora::ObjectNotFoundError
23
+ rescue NameError, ActiveFedora::ObjectNotFoundError, Bulkrax::ObjectFactoryInterface::ObjectNotFoundError
24
24
  record = nil
25
25
  end
26
26
 
@@ -28,22 +28,5 @@ module Bulkrax
28
28
  # also accounts for when the found entry isn't a part of this importer
29
29
  record.is_a?(Entry) ? [record, record.factory.find] : [nil, record]
30
30
  end
31
-
32
- # Check if the record is a Work
33
- def curation_concern?(record)
34
- available_work_types.include?(record.class)
35
- end
36
-
37
- private
38
-
39
- # @return [Array<Class>] list of work type classes
40
- def available_work_types
41
- # If running in a Hyku app, do not include disabled work types
42
- @available_work_types ||= if defined?(::Hyku)
43
- ::Site.instance.available_works.map(&:constantize)
44
- else
45
- Bulkrax.curation_concerns
46
- end
47
- end
48
31
  end
49
32
  end
@@ -1,5 +1,4 @@
1
1
  # frozen_string_literal: true
2
- require 'marcel'
3
2
 
4
3
  module Bulkrax
5
4
  module ExportBehavior
@@ -22,11 +21,12 @@ module Bulkrax
22
21
  end
23
22
 
24
23
  def hyrax_record
25
- @hyrax_record ||= ActiveFedora::Base.find(self.identifier)
24
+ @hyrax_record ||= Bulkrax.object_factory.find(self.identifier)
26
25
  end
27
26
 
28
27
  # Prepend the file_set id to ensure a unique filename and also one that is not longer than 255 characters
29
28
  def filename(file_set)
29
+ # NOTE: Will this work with Valkyrie?
30
30
  return if file_set.original_file.blank?
31
31
  fn = file_set.original_file.file_name.first
32
32
  mime = ::Marcel::MimeType.for(file_set.original_file.mime_type)