bulkrax 7.0.0 → 8.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/app/assets/javascripts/bulkrax/datatables.js +1 -1
  3. data/app/concerns/loggable.rb +25 -0
  4. data/app/controllers/bulkrax/exporters_controller.rb +1 -1
  5. data/app/controllers/bulkrax/importers_controller.rb +2 -1
  6. data/app/controllers/concerns/bulkrax/datatables_behavior.rb +15 -15
  7. data/app/factories/bulkrax/object_factory.rb +135 -163
  8. data/app/factories/bulkrax/object_factory_interface.rb +483 -0
  9. data/app/factories/bulkrax/valkyrie_object_factory.rb +402 -0
  10. data/app/factories/bulkrax/valkyrize-hyku.code-workspace +19 -0
  11. data/app/helpers/bulkrax/importers_helper.rb +1 -1
  12. data/app/helpers/bulkrax/validation_helper.rb +4 -4
  13. data/app/jobs/bulkrax/create_relationships_job.rb +27 -16
  14. data/app/jobs/bulkrax/delete_job.rb +3 -2
  15. data/app/jobs/bulkrax/download_cloud_file_job.rb +16 -3
  16. data/app/jobs/bulkrax/import_file_set_job.rb +23 -19
  17. data/app/jobs/bulkrax/importer_job.rb +18 -2
  18. data/app/matchers/bulkrax/application_matcher.rb +0 -2
  19. data/app/models/bulkrax/csv_collection_entry.rb +1 -1
  20. data/app/models/bulkrax/csv_entry.rb +7 -6
  21. data/app/models/bulkrax/entry.rb +7 -11
  22. data/app/models/bulkrax/exporter.rb +2 -2
  23. data/app/models/bulkrax/importer.rb +1 -3
  24. data/app/models/bulkrax/oai_entry.rb +0 -3
  25. data/app/models/bulkrax/oai_set_entry.rb +1 -1
  26. data/app/models/bulkrax/rdf_collection_entry.rb +1 -1
  27. data/app/models/bulkrax/rdf_entry.rb +70 -69
  28. data/app/models/bulkrax/xml_entry.rb +0 -1
  29. data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +2 -19
  30. data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
  31. data/app/models/concerns/bulkrax/file_factory.rb +178 -118
  32. data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +2 -2
  33. data/app/models/concerns/bulkrax/has_matchers.rb +39 -25
  34. data/app/models/concerns/bulkrax/import_behavior.rb +10 -17
  35. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +3 -2
  36. data/app/parsers/bulkrax/application_parser.rb +31 -7
  37. data/app/parsers/bulkrax/bagit_parser.rb +175 -174
  38. data/app/parsers/bulkrax/csv_parser.rb +15 -5
  39. data/app/parsers/bulkrax/oai_dc_parser.rb +18 -0
  40. data/app/parsers/bulkrax/parser_export_record_set.rb +18 -22
  41. data/app/parsers/bulkrax/xml_parser.rb +0 -2
  42. data/app/services/bulkrax/factory_class_finder.rb +2 -0
  43. data/app/services/bulkrax/remove_relationships_for_importer.rb +3 -1
  44. data/app/services/hyrax/custom_queries/find_by_source_identifier.rb +50 -0
  45. data/app/services/wings/custom_queries/find_by_source_identifier.rb +32 -0
  46. data/app/views/bulkrax/entries/_parsed_metadata.html.erb +2 -2
  47. data/app/views/bulkrax/entries/_raw_metadata.html.erb +2 -2
  48. data/app/views/bulkrax/entries/show.html.erb +9 -8
  49. data/app/views/bulkrax/exporters/edit.html.erb +1 -1
  50. data/app/views/bulkrax/exporters/new.html.erb +1 -1
  51. data/app/views/bulkrax/exporters/show.html.erb +4 -2
  52. data/app/views/bulkrax/importers/_browse_everything.html.erb +2 -2
  53. data/app/views/bulkrax/importers/_csv_fields.html.erb +1 -1
  54. data/app/views/bulkrax/importers/edit.html.erb +1 -1
  55. data/app/views/bulkrax/importers/new.html.erb +1 -1
  56. data/app/views/bulkrax/importers/show.html.erb +1 -1
  57. data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +2 -2
  58. data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +1 -1
  59. data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +1 -1
  60. data/config/locales/bulkrax.en.yml +7 -0
  61. data/db/migrate/20230608153601_add_indices_to_bulkrax.rb +20 -9
  62. data/db/migrate/20240307053156_add_index_to_metadata_bulkrax_identifier.rb +18 -0
  63. data/lib/bulkrax/engine.rb +23 -6
  64. data/lib/bulkrax/version.rb +1 -1
  65. data/lib/bulkrax.rb +54 -52
  66. data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +2 -0
  67. data/lib/tasks/bulkrax_tasks.rake +1 -0
  68. data/lib/tasks/reset.rake +4 -4
  69. metadata +25 -7
  70. data/lib/bulkrax/persistence_layer/active_fedora_adapter.rb +0 -27
  71. data/lib/bulkrax/persistence_layer/valkyrie_adapter.rb +0 -8
  72. data/lib/bulkrax/persistence_layer.rb +0 -38
@@ -6,6 +6,7 @@ module Bulkrax
6
6
 
7
7
  def perform(importer_id, only_updates_since_last_import = false)
8
8
  importer = Importer.find(importer_id)
9
+ return schedule(importer, Time.zone.now + 3.minutes, 'Rescheduling: cloud files are not ready yet') unless all_files_completed?(importer)
9
10
 
10
11
  importer.current_run
11
12
  unzip_imported_file(importer.parser)
@@ -16,6 +17,8 @@ module Bulkrax
16
17
  importer.set_status_info(e)
17
18
  end
18
19
 
20
+ private
21
+
19
22
  def import(importer, only_updates_since_last_import)
20
23
  importer.only_updates = only_updates_since_last_import || false
21
24
  return unless importer.valid_import?
@@ -36,8 +39,21 @@ module Bulkrax
36
39
  importer.current_run.save!
37
40
  end
38
41
 
39
- def schedule(importer)
40
- ImporterJob.set(wait_until: importer.next_import_at).perform_later(importer.id, true)
42
+ def schedule(importer, wait_until = importer.next_import_at, message = nil)
43
+ Rails.logger.info message if message
44
+ ImporterJob.set(wait_until: wait_until).perform_later(importer.id, true)
45
+ end
46
+
47
+ # checks the file sizes of the download files to match the original files
48
+ def all_files_completed?(importer)
49
+ cloud_files = importer.parser_fields['cloud_file_paths']
50
+ original_files = importer.parser_fields['original_file_paths']
51
+ return true unless cloud_files.present? && original_files.present?
52
+
53
+ imported_file_sizes = cloud_files.map { |_, v| v['file_size'].to_i }
54
+ original_file_sizes = original_files.map { |imported_file| File.size(imported_file) }
55
+
56
+ original_file_sizes == imported_file_sizes
41
57
  end
42
58
  end
43
59
  end
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'language_list'
4
-
5
3
  module Bulkrax
6
4
  class ApplicationMatcher
7
5
  attr_accessor :to, :from, :parsed, :if, :split, :excluded, :nested_type
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Bulkrax
4
4
  class CsvCollectionEntry < CsvEntry
5
- self.default_work_type = "Collection"
5
+ self.default_work_type = Bulkrax.collection_model_class.to_s
6
6
 
7
7
  # Use identifier set by CsvParser#unique_collection_identifier, which falls back
8
8
  # on the Collection's first title if record[source_identifier] is not present
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'csv'
4
-
5
3
  module Bulkrax
6
4
  # TODO: We need to rework this class some to address the Metrics/ClassLength rubocop offense.
7
5
  # We do too much in these entry classes. We need to extract the common logic from the various
@@ -106,7 +104,7 @@ module Bulkrax
106
104
  end
107
105
 
108
106
  def add_metadata_for_model
109
- if defined?(::Collection) && factory_class == ::Collection
107
+ if factory_class.present? && factory_class == Bulkrax.collection_model_class
110
108
  add_collection_type_gid if defined?(::Hyrax)
111
109
  # add any additional collection metadata methods here
112
110
  elsif factory_class == Bulkrax.file_model_class
@@ -146,7 +144,7 @@ module Bulkrax
146
144
  self.parsed_metadata = {}
147
145
 
148
146
  build_system_metadata
149
- build_files_metadata if defined?(Collection) && !hyrax_record.is_a?(Collection)
147
+ build_files_metadata if Bulkrax.collection_model_class.present? && !hyrax_record.is_a?(Bulkrax.collection_model_class)
150
148
  build_relationship_metadata
151
149
  build_mapping_metadata
152
150
  self.save!
@@ -158,9 +156,12 @@ module Bulkrax
158
156
  def build_system_metadata
159
157
  self.parsed_metadata['id'] = hyrax_record.id
160
158
  source_id = hyrax_record.send(work_identifier)
161
- source_id = source_id.to_a.first if source_id.is_a?(ActiveTriples::Relation)
159
+ # Because ActiveTriples::Relation does not respond to #to_ary we can't rely on Array.wrap universally
160
+ source_id = source_id.to_a if source_id.is_a?(ActiveTriples::Relation)
161
+ source_id = Array.wrap(source_id).first
162
162
  self.parsed_metadata[source_identifier] = source_id
163
- self.parsed_metadata[key_for_export('model')] = hyrax_record.has_model.first
163
+ model_name = hyrax_record.respond_to?(:to_rdf_representation) ? hyrax_record.to_rdf_representation : hyrax_record.has_model.first
164
+ self.parsed_metadata[key_for_export('model')] = model_name
164
165
  end
165
166
 
166
167
  def build_files_metadata
@@ -103,22 +103,18 @@ module Bulkrax
103
103
  self.importerexporter_type == 'Bulkrax::Exporter'
104
104
  end
105
105
 
106
- def valid_system_id(model_class)
107
- return true if model_class.properties.keys.include?(work_identifier)
108
- raise(
109
- "#{model_class} does not implement the system_identifier_field: #{work_identifier}"
110
- )
111
- end
112
-
113
106
  def last_run
114
107
  self.importerexporter&.last_run
115
108
  end
116
109
 
117
110
  def find_collection(collection_identifier)
118
- return unless Collection.properties.keys.include?(work_identifier)
119
- Collection.where(
120
- work_identifier => collection_identifier
121
- ).detect { |m| m.send(work_identifier).include?(collection_identifier) }
111
+ Bulkrax.object_factory.search_by_property(
112
+ klass: Bulkrax.collection_model_class,
113
+ value: collection_identifier,
114
+ search_field: work_identifier,
115
+ name_field: work_identifier,
116
+ verify_property: true
117
+ )
122
118
  end
123
119
  end
124
120
  end
@@ -137,8 +137,8 @@ module Bulkrax
137
137
  end
138
138
 
139
139
  def export_properties
140
- properties = Bulkrax.curation_concerns.map { |work| work.properties.keys }.flatten.uniq.sort
141
- properties.reject { |prop| Bulkrax.reserved_properties.include?(prop) }
140
+ # TODO: Does this work for Valkyrie?
141
+ Bulkrax.object_factory.export_properties
142
142
  end
143
143
 
144
144
  def metadata_only?
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'iso8601'
4
-
5
3
  module Bulkrax
6
4
  class Importer < ApplicationRecord # rubocop:disable Metrics/ClassLength
7
5
  include Bulkrax::ImporterExporterBehavior
@@ -18,7 +16,7 @@ module Bulkrax
18
16
  validates :admin_set_id, presence: true if defined?(::Hyrax)
19
17
  validates :parser_klass, presence: true
20
18
 
21
- delegate :valid_import?, :write_errored_entries_file, :visibility, to: :parser
19
+ delegate :create_parent_child_relationships, :valid_import?, :write_errored_entries_file, :visibility, to: :parser
22
20
 
23
21
  attr_accessor :only_updates, :file_style, :file
24
22
  attr_writer :current_run
@@ -1,8 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'erb'
4
- require 'ostruct'
5
-
6
3
  module Bulkrax
7
4
  class OaiEntry < Entry
8
5
  serialize :raw_metadata, Bulkrax::NormalizedJson
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Bulkrax
4
4
  class OaiSetEntry < OaiEntry
5
- self.default_work_type = "Collection"
5
+ self.default_work_type = Bulkrax.collection_model_class.to_s
6
6
 
7
7
  def build_metadata
8
8
  self.parsed_metadata = self.raw_metadata
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Bulkrax
4
4
  class RdfCollectionEntry < RdfEntry
5
- self.default_work_type = "Collection"
5
+ self.default_work_type = Bulkrax.collection_model_class.to_s
6
6
  def record
7
7
  @record ||= self.raw_metadata
8
8
  end
@@ -1,91 +1,92 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'rdf'
4
- module Bulkrax
5
- class RdfEntry < Entry
6
- serialize :raw_metadata, Bulkrax::NormalizedJson
3
+ unless ENV.fetch('BULKRAX_NO_RDF', 'false').to_s == 'true'
4
+ module Bulkrax
5
+ class RdfEntry < Entry
6
+ serialize :raw_metadata, Bulkrax::NormalizedJson
7
7
 
8
- def self.read_data(path)
9
- RDF::Reader.open(path)
10
- end
8
+ def self.read_data(path)
9
+ RDF::Reader.open(path)
10
+ end
11
11
 
12
- def self.fields_from_data(data)
13
- data.predicates.map(&:to_s)
14
- end
12
+ def self.fields_from_data(data)
13
+ data.predicates.map(&:to_s)
14
+ end
15
15
 
16
- def self.data_for_entry(data, source_id, parser)
17
- reader = data
18
- format = reader.class.format.to_sym
19
- collections = []
20
- children = []
21
- delete = nil
22
- data = RDF::Writer.for(format).buffer do |writer|
23
- reader.each_statement do |statement|
24
- collections << statement.object.to_s if parent_field(parser).present? && parent_field(parser) == statement.predicate.to_s
25
- children << statement.object.to_s if related_children_parsed_mapping.present? && related_children_parsed_mapping == statement.predicate.to_s
26
- delete = statement.object.to_s if /deleted/.match?(statement.predicate.to_s)
27
- writer << statement
16
+ def self.data_for_entry(data, source_id, parser)
17
+ reader = data
18
+ format = reader.class.format.to_sym
19
+ collections = []
20
+ children = []
21
+ delete = nil
22
+ data = RDF::Writer.for(format).buffer do |writer|
23
+ reader.each_statement do |statement|
24
+ collections << statement.object.to_s if parent_field(parser).present? && parent_field(parser) == statement.predicate.to_s
25
+ children << statement.object.to_s if related_children_parsed_mapping.present? && related_children_parsed_mapping == statement.predicate.to_s
26
+ delete = statement.object.to_s if /deleted/.match?(statement.predicate.to_s)
27
+ writer << statement
28
+ end
28
29
  end
30
+ return {
31
+ source_id => reader.subjects.first.to_s,
32
+ delete: delete,
33
+ format: format,
34
+ data: data,
35
+ collection: collections,
36
+ children: children
37
+ }
29
38
  end
30
- return {
31
- source_id => reader.subjects.first.to_s,
32
- delete: delete,
33
- format: format,
34
- data: data,
35
- collection: collections,
36
- children: children
37
- }
38
- end
39
39
 
40
- def self.related_children_parsed_mapping
41
- return @related_children_parsed_mapping if @related_children_parsed_mapping.present?
40
+ def self.related_children_parsed_mapping
41
+ return @related_children_parsed_mapping if @related_children_parsed_mapping.present?
42
42
 
43
- rdf_related_children_field_mapping = Bulkrax.field_mappings['Bulkrax::RdfParser']&.select { |_, h| h.key?('related_children_field_mapping') }
44
- return if rdf_related_children_field_mapping.blank?
43
+ rdf_related_children_field_mapping = Bulkrax.field_mappings['Bulkrax::RdfParser']&.select { |_, h| h.key?('related_children_field_mapping') }
44
+ return if rdf_related_children_field_mapping.blank?
45
45
 
46
- @related_children_parsed_mapping = rdf_related_children_field_mapping&.keys&.first
47
- end
46
+ @related_children_parsed_mapping = rdf_related_children_field_mapping&.keys&.first
47
+ end
48
48
 
49
- def record
50
- @record ||= RDF::Reader.for(self.raw_metadata['format'].to_sym).new(self.raw_metadata['data'])
51
- end
49
+ def record
50
+ @record ||= RDF::Reader.for(self.raw_metadata['format'].to_sym).new(self.raw_metadata['data'])
51
+ end
52
52
 
53
- def build_metadata
54
- raise StandardError, 'Record not found' if record.nil?
55
- raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
53
+ def build_metadata
54
+ raise StandardError, 'Record not found' if record.nil?
55
+ raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
56
56
 
57
- self.parsed_metadata = {}
58
- self.parsed_metadata[work_identifier] = [self.raw_metadata[source_identifier]]
57
+ self.parsed_metadata = {}
58
+ self.parsed_metadata[work_identifier] = [self.raw_metadata[source_identifier]]
59
59
 
60
- record.each_statement do |statement|
61
- # Only process the subject for our record (in case other data is in the file)
62
- next unless statement.subject.to_s == self.raw_metadata[source_identifier]
63
- add_metadata(statement.predicate.to_s, statement.object.to_s)
64
- end
65
- add_visibility
66
- add_rights_statement
67
- add_admin_set_id
68
- add_collections
69
- add_local
70
- self.parsed_metadata['file'] = self.raw_metadata['file']
60
+ record.each_statement do |statement|
61
+ # Only process the subject for our record (in case other data is in the file)
62
+ next unless statement.subject.to_s == self.raw_metadata[source_identifier]
63
+ add_metadata(statement.predicate.to_s, statement.object.to_s)
64
+ end
65
+ add_visibility
66
+ add_rights_statement
67
+ add_admin_set_id
68
+ add_collections
69
+ add_local
70
+ self.parsed_metadata['file'] = self.raw_metadata['file']
71
71
 
72
- self.parsed_metadata
73
- end
72
+ self.parsed_metadata
73
+ end
74
74
 
75
- def collections_created?
76
- return true if self.raw_metadata['collection'].blank?
77
- self.raw_metadata['collection'].length == self.collection_ids.length
78
- end
75
+ def collections_created?
76
+ return true if self.raw_metadata['collection'].blank?
77
+ self.raw_metadata['collection'].length == self.collection_ids.length
78
+ end
79
79
 
80
- def find_collection_ids
81
- return self.collection_ids if collections_created?
82
- if self.raw_metadata['collection'].present?
83
- self.raw_metadata['collection'].each do |collection|
84
- c = find_collection(collection)
85
- self.collection_ids << c.id unless c.blank? || self.collection_ids.include?(c.id)
80
+ def find_collection_ids
81
+ return self.collection_ids if collections_created?
82
+ if self.raw_metadata['collection'].present?
83
+ self.raw_metadata['collection'].each do |collection|
84
+ c = find_collection(collection)
85
+ self.collection_ids << c.id unless c.blank? || self.collection_ids.include?(c.id)
86
+ end
86
87
  end
88
+ return self.collection_ids
87
89
  end
88
- return self.collection_ids
89
90
  end
90
91
  end
91
92
  end
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'nokogiri'
4
3
  module Bulkrax
5
4
  # Generic XML Entry
6
5
  class XmlEntry < Entry
@@ -18,9 +18,9 @@ module Bulkrax
18
18
  begin
19
19
  # the identifier parameter can be a :source_identifier or the id of an object
20
20
  record = Entry.find_by(default_scope.merge({ importerexporter_id: importer_id })) || Entry.find_by(default_scope)
21
- record ||= ActiveFedora::Base.find(identifier)
21
+ record ||= Bulkrax.object_factory.find(identifier)
22
22
  # NameError for if ActiveFedora isn't installed
23
- rescue NameError, ActiveFedora::ObjectNotFoundError
23
+ rescue NameError, ActiveFedora::ObjectNotFoundError, Bulkrax::ObjectFactoryInterface::ObjectNotFoundError
24
24
  record = nil
25
25
  end
26
26
 
@@ -28,22 +28,5 @@ module Bulkrax
28
28
  # also accounts for when the found entry isn't a part of this importer
29
29
  record.is_a?(Entry) ? [record, record.factory.find] : [nil, record]
30
30
  end
31
-
32
- # Check if the record is a Work
33
- def curation_concern?(record)
34
- available_work_types.include?(record.class)
35
- end
36
-
37
- private
38
-
39
- # @return [Array<Class>] list of work type classes
40
- def available_work_types
41
- # If running in a Hyku app, do not include disabled work types
42
- @available_work_types ||= if defined?(::Hyku)
43
- ::Site.instance.available_works.map(&:constantize)
44
- else
45
- Bulkrax.curation_concerns
46
- end
47
- end
48
31
  end
49
32
  end
@@ -1,5 +1,4 @@
1
1
  # frozen_string_literal: true
2
- require 'marcel'
3
2
 
4
3
  module Bulkrax
5
4
  module ExportBehavior
@@ -22,11 +21,12 @@ module Bulkrax
22
21
  end
23
22
 
24
23
  def hyrax_record
25
- @hyrax_record ||= ActiveFedora::Base.find(self.identifier)
24
+ @hyrax_record ||= Bulkrax.object_factory.find(self.identifier)
26
25
  end
27
26
 
28
27
  # Prepend the file_set id to ensure a unique filename and also one that is not longer than 255 characters
29
28
  def filename(file_set)
29
+ # NOTE: Will this work with Valkyrie?
30
30
  return if file_set.original_file.blank?
31
31
  fn = file_set.original_file.file_name.first
32
32
  mime = ::Marcel::MimeType.for(file_set.original_file.mime_type)