bulkrax 7.0.0 → 8.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/assets/javascripts/bulkrax/datatables.js +1 -1
- data/app/controllers/bulkrax/exporters_controller.rb +1 -1
- data/app/controllers/bulkrax/importers_controller.rb +2 -1
- data/app/controllers/concerns/bulkrax/datatables_behavior.rb +15 -15
- data/app/factories/bulkrax/object_factory.rb +135 -163
- data/app/factories/bulkrax/object_factory_interface.rb +491 -0
- data/app/factories/bulkrax/valkyrie_object_factory.rb +402 -0
- data/app/helpers/bulkrax/importers_helper.rb +1 -1
- data/app/helpers/bulkrax/validation_helper.rb +4 -4
- data/app/jobs/bulkrax/create_relationships_job.rb +27 -16
- data/app/jobs/bulkrax/delete_job.rb +3 -2
- data/app/jobs/bulkrax/download_cloud_file_job.rb +16 -3
- data/app/jobs/bulkrax/import_file_set_job.rb +5 -2
- data/app/jobs/bulkrax/importer_job.rb +18 -2
- data/app/matchers/bulkrax/application_matcher.rb +0 -2
- data/app/models/bulkrax/csv_collection_entry.rb +1 -1
- data/app/models/bulkrax/csv_entry.rb +7 -6
- data/app/models/bulkrax/entry.rb +7 -11
- data/app/models/bulkrax/exporter.rb +2 -2
- data/app/models/bulkrax/importer.rb +1 -3
- data/app/models/bulkrax/oai_entry.rb +0 -3
- data/app/models/bulkrax/oai_set_entry.rb +1 -1
- data/app/models/bulkrax/rdf_collection_entry.rb +1 -1
- data/app/models/bulkrax/rdf_entry.rb +70 -69
- data/app/models/bulkrax/xml_entry.rb +0 -1
- data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +2 -19
- data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
- data/app/models/concerns/bulkrax/file_factory.rb +174 -118
- data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +2 -2
- data/app/models/concerns/bulkrax/has_matchers.rb +28 -25
- data/app/models/concerns/bulkrax/import_behavior.rb +10 -17
- data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +3 -2
- data/app/parsers/bulkrax/application_parser.rb +31 -7
- data/app/parsers/bulkrax/bagit_parser.rb +175 -174
- data/app/parsers/bulkrax/csv_parser.rb +15 -5
- data/app/parsers/bulkrax/oai_dc_parser.rb +18 -0
- data/app/parsers/bulkrax/parser_export_record_set.rb +18 -22
- data/app/parsers/bulkrax/xml_parser.rb +0 -2
- data/app/services/bulkrax/factory_class_finder.rb +2 -0
- data/app/services/bulkrax/remove_relationships_for_importer.rb +3 -1
- data/app/services/hyrax/custom_queries/find_by_source_identifier.rb +50 -0
- data/app/services/wings/custom_queries/find_by_source_identifier.rb +32 -0
- data/app/views/bulkrax/entries/_parsed_metadata.html.erb +2 -2
- data/app/views/bulkrax/entries/_raw_metadata.html.erb +2 -2
- data/app/views/bulkrax/entries/show.html.erb +9 -8
- data/app/views/bulkrax/exporters/edit.html.erb +1 -1
- data/app/views/bulkrax/exporters/new.html.erb +1 -1
- data/app/views/bulkrax/exporters/show.html.erb +4 -2
- data/app/views/bulkrax/importers/_browse_everything.html.erb +2 -2
- data/app/views/bulkrax/importers/_csv_fields.html.erb +1 -1
- data/app/views/bulkrax/importers/edit.html.erb +1 -1
- data/app/views/bulkrax/importers/new.html.erb +1 -1
- data/app/views/bulkrax/importers/show.html.erb +1 -1
- data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +2 -2
- data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +1 -1
- data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +1 -1
- data/config/locales/bulkrax.en.yml +7 -0
- data/db/migrate/20230608153601_add_indices_to_bulkrax.rb +20 -9
- data/db/migrate/20240307053156_add_index_to_metadata_bulkrax_identifier.rb +18 -0
- data/lib/bulkrax/engine.rb +23 -6
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +54 -52
- data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +2 -0
- data/lib/tasks/bulkrax_tasks.rake +1 -0
- data/lib/tasks/reset.rake +4 -4
- metadata +24 -8
- data/lib/bulkrax/persistence_layer/active_fedora_adapter.rb +0 -27
- data/lib/bulkrax/persistence_layer/valkyrie_adapter.rb +0 -8
- data/lib/bulkrax/persistence_layer.rb +0 -38
@@ -1,7 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'csv'
|
4
|
-
|
5
3
|
module Bulkrax
|
6
4
|
# TODO: We need to rework this class some to address the Metrics/ClassLength rubocop offense.
|
7
5
|
# We do too much in these entry classes. We need to extract the common logic from the various
|
@@ -106,7 +104,7 @@ module Bulkrax
|
|
106
104
|
end
|
107
105
|
|
108
106
|
def add_metadata_for_model
|
109
|
-
if
|
107
|
+
if factory_class.present? && factory_class == Bulkrax.collection_model_class
|
110
108
|
add_collection_type_gid if defined?(::Hyrax)
|
111
109
|
# add any additional collection metadata methods here
|
112
110
|
elsif factory_class == Bulkrax.file_model_class
|
@@ -146,7 +144,7 @@ module Bulkrax
|
|
146
144
|
self.parsed_metadata = {}
|
147
145
|
|
148
146
|
build_system_metadata
|
149
|
-
build_files_metadata if
|
147
|
+
build_files_metadata if Bulkrax.collection_model_class.present? && !hyrax_record.is_a?(Bulkrax.collection_model_class)
|
150
148
|
build_relationship_metadata
|
151
149
|
build_mapping_metadata
|
152
150
|
self.save!
|
@@ -158,9 +156,12 @@ module Bulkrax
|
|
158
156
|
def build_system_metadata
|
159
157
|
self.parsed_metadata['id'] = hyrax_record.id
|
160
158
|
source_id = hyrax_record.send(work_identifier)
|
161
|
-
|
159
|
+
# Because ActiveTriples::Relation does not respond to #to_ary we can't rely on Array.wrap universally
|
160
|
+
source_id = source_id.to_a if source_id.is_a?(ActiveTriples::Relation)
|
161
|
+
source_id = Array.wrap(source_id).first
|
162
162
|
self.parsed_metadata[source_identifier] = source_id
|
163
|
-
|
163
|
+
model_name = hyrax_record.respond_to?(:to_rdf_representation) ? hyrax_record.to_rdf_representation : hyrax_record.has_model.first
|
164
|
+
self.parsed_metadata[key_for_export('model')] = model_name
|
164
165
|
end
|
165
166
|
|
166
167
|
def build_files_metadata
|
data/app/models/bulkrax/entry.rb
CHANGED
@@ -103,22 +103,18 @@ module Bulkrax
|
|
103
103
|
self.importerexporter_type == 'Bulkrax::Exporter'
|
104
104
|
end
|
105
105
|
|
106
|
-
def valid_system_id(model_class)
|
107
|
-
return true if model_class.properties.keys.include?(work_identifier)
|
108
|
-
raise(
|
109
|
-
"#{model_class} does not implement the system_identifier_field: #{work_identifier}"
|
110
|
-
)
|
111
|
-
end
|
112
|
-
|
113
106
|
def last_run
|
114
107
|
self.importerexporter&.last_run
|
115
108
|
end
|
116
109
|
|
117
110
|
def find_collection(collection_identifier)
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
111
|
+
Bulkrax.object_factory.search_by_property(
|
112
|
+
klass: Bulkrax.collection_model_class,
|
113
|
+
value: collection_identifier,
|
114
|
+
search_field: work_identifier,
|
115
|
+
name_field: work_identifier,
|
116
|
+
verify_property: true
|
117
|
+
)
|
122
118
|
end
|
123
119
|
end
|
124
120
|
end
|
@@ -137,8 +137,8 @@ module Bulkrax
|
|
137
137
|
end
|
138
138
|
|
139
139
|
def export_properties
|
140
|
-
|
141
|
-
|
140
|
+
# TODO: Does this work for Valkyrie?
|
141
|
+
Bulkrax.object_factory.export_properties
|
142
142
|
end
|
143
143
|
|
144
144
|
def metadata_only?
|
@@ -1,7 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'iso8601'
|
4
|
-
|
5
3
|
module Bulkrax
|
6
4
|
class Importer < ApplicationRecord # rubocop:disable Metrics/ClassLength
|
7
5
|
include Bulkrax::ImporterExporterBehavior
|
@@ -18,7 +16,7 @@ module Bulkrax
|
|
18
16
|
validates :admin_set_id, presence: true if defined?(::Hyrax)
|
19
17
|
validates :parser_klass, presence: true
|
20
18
|
|
21
|
-
delegate :valid_import?, :write_errored_entries_file, :visibility, to: :parser
|
19
|
+
delegate :create_parent_child_relationships, :valid_import?, :write_errored_entries_file, :visibility, to: :parser
|
22
20
|
|
23
21
|
attr_accessor :only_updates, :file_style, :file
|
24
22
|
attr_writer :current_run
|
@@ -1,91 +1,92 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
module Bulkrax
|
5
|
-
|
6
|
-
|
3
|
+
unless ENV.fetch('BULKRAX_NO_RDF', 'false').to_s == 'true'
|
4
|
+
module Bulkrax
|
5
|
+
class RdfEntry < Entry
|
6
|
+
serialize :raw_metadata, Bulkrax::NormalizedJson
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
8
|
+
def self.read_data(path)
|
9
|
+
RDF::Reader.open(path)
|
10
|
+
end
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
|
12
|
+
def self.fields_from_data(data)
|
13
|
+
data.predicates.map(&:to_s)
|
14
|
+
end
|
15
15
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
16
|
+
def self.data_for_entry(data, source_id, parser)
|
17
|
+
reader = data
|
18
|
+
format = reader.class.format.to_sym
|
19
|
+
collections = []
|
20
|
+
children = []
|
21
|
+
delete = nil
|
22
|
+
data = RDF::Writer.for(format).buffer do |writer|
|
23
|
+
reader.each_statement do |statement|
|
24
|
+
collections << statement.object.to_s if parent_field(parser).present? && parent_field(parser) == statement.predicate.to_s
|
25
|
+
children << statement.object.to_s if related_children_parsed_mapping.present? && related_children_parsed_mapping == statement.predicate.to_s
|
26
|
+
delete = statement.object.to_s if /deleted/.match?(statement.predicate.to_s)
|
27
|
+
writer << statement
|
28
|
+
end
|
28
29
|
end
|
30
|
+
return {
|
31
|
+
source_id => reader.subjects.first.to_s,
|
32
|
+
delete: delete,
|
33
|
+
format: format,
|
34
|
+
data: data,
|
35
|
+
collection: collections,
|
36
|
+
children: children
|
37
|
+
}
|
29
38
|
end
|
30
|
-
return {
|
31
|
-
source_id => reader.subjects.first.to_s,
|
32
|
-
delete: delete,
|
33
|
-
format: format,
|
34
|
-
data: data,
|
35
|
-
collection: collections,
|
36
|
-
children: children
|
37
|
-
}
|
38
|
-
end
|
39
39
|
|
40
|
-
|
41
|
-
|
40
|
+
def self.related_children_parsed_mapping
|
41
|
+
return @related_children_parsed_mapping if @related_children_parsed_mapping.present?
|
42
42
|
|
43
|
-
|
44
|
-
|
43
|
+
rdf_related_children_field_mapping = Bulkrax.field_mappings['Bulkrax::RdfParser']&.select { |_, h| h.key?('related_children_field_mapping') }
|
44
|
+
return if rdf_related_children_field_mapping.blank?
|
45
45
|
|
46
|
-
|
47
|
-
|
46
|
+
@related_children_parsed_mapping = rdf_related_children_field_mapping&.keys&.first
|
47
|
+
end
|
48
48
|
|
49
|
-
|
50
|
-
|
51
|
-
|
49
|
+
def record
|
50
|
+
@record ||= RDF::Reader.for(self.raw_metadata['format'].to_sym).new(self.raw_metadata['data'])
|
51
|
+
end
|
52
52
|
|
53
|
-
|
54
|
-
|
55
|
-
|
53
|
+
def build_metadata
|
54
|
+
raise StandardError, 'Record not found' if record.nil?
|
55
|
+
raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
|
56
56
|
|
57
|
-
|
58
|
-
|
57
|
+
self.parsed_metadata = {}
|
58
|
+
self.parsed_metadata[work_identifier] = [self.raw_metadata[source_identifier]]
|
59
59
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
60
|
+
record.each_statement do |statement|
|
61
|
+
# Only process the subject for our record (in case other data is in the file)
|
62
|
+
next unless statement.subject.to_s == self.raw_metadata[source_identifier]
|
63
|
+
add_metadata(statement.predicate.to_s, statement.object.to_s)
|
64
|
+
end
|
65
|
+
add_visibility
|
66
|
+
add_rights_statement
|
67
|
+
add_admin_set_id
|
68
|
+
add_collections
|
69
|
+
add_local
|
70
|
+
self.parsed_metadata['file'] = self.raw_metadata['file']
|
71
71
|
|
72
|
-
|
73
|
-
|
72
|
+
self.parsed_metadata
|
73
|
+
end
|
74
74
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
75
|
+
def collections_created?
|
76
|
+
return true if self.raw_metadata['collection'].blank?
|
77
|
+
self.raw_metadata['collection'].length == self.collection_ids.length
|
78
|
+
end
|
79
79
|
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
80
|
+
def find_collection_ids
|
81
|
+
return self.collection_ids if collections_created?
|
82
|
+
if self.raw_metadata['collection'].present?
|
83
|
+
self.raw_metadata['collection'].each do |collection|
|
84
|
+
c = find_collection(collection)
|
85
|
+
self.collection_ids << c.id unless c.blank? || self.collection_ids.include?(c.id)
|
86
|
+
end
|
86
87
|
end
|
88
|
+
return self.collection_ids
|
87
89
|
end
|
88
|
-
return self.collection_ids
|
89
90
|
end
|
90
91
|
end
|
91
92
|
end
|
@@ -18,9 +18,9 @@ module Bulkrax
|
|
18
18
|
begin
|
19
19
|
# the identifier parameter can be a :source_identifier or the id of an object
|
20
20
|
record = Entry.find_by(default_scope.merge({ importerexporter_id: importer_id })) || Entry.find_by(default_scope)
|
21
|
-
record ||=
|
21
|
+
record ||= Bulkrax.object_factory.find(identifier)
|
22
22
|
# NameError for if ActiveFedora isn't installed
|
23
|
-
rescue NameError, ActiveFedora::ObjectNotFoundError
|
23
|
+
rescue NameError, ActiveFedora::ObjectNotFoundError, Bulkrax::ObjectFactoryInterface::ObjectNotFoundError
|
24
24
|
record = nil
|
25
25
|
end
|
26
26
|
|
@@ -28,22 +28,5 @@ module Bulkrax
|
|
28
28
|
# also accounts for when the found entry isn't a part of this importer
|
29
29
|
record.is_a?(Entry) ? [record, record.factory.find] : [nil, record]
|
30
30
|
end
|
31
|
-
|
32
|
-
# Check if the record is a Work
|
33
|
-
def curation_concern?(record)
|
34
|
-
available_work_types.include?(record.class)
|
35
|
-
end
|
36
|
-
|
37
|
-
private
|
38
|
-
|
39
|
-
# @return [Array<Class>] list of work type classes
|
40
|
-
def available_work_types
|
41
|
-
# If running in a Hyku app, do not include disabled work types
|
42
|
-
@available_work_types ||= if defined?(::Hyku)
|
43
|
-
::Site.instance.available_works.map(&:constantize)
|
44
|
-
else
|
45
|
-
Bulkrax.curation_concerns
|
46
|
-
end
|
47
|
-
end
|
48
31
|
end
|
49
32
|
end
|
@@ -1,5 +1,4 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
require 'marcel'
|
3
2
|
|
4
3
|
module Bulkrax
|
5
4
|
module ExportBehavior
|
@@ -22,11 +21,12 @@ module Bulkrax
|
|
22
21
|
end
|
23
22
|
|
24
23
|
def hyrax_record
|
25
|
-
@hyrax_record ||=
|
24
|
+
@hyrax_record ||= Bulkrax.object_factory.find(self.identifier)
|
26
25
|
end
|
27
26
|
|
28
27
|
# Prepend the file_set id to ensure a unique filename and also one that is not longer than 255 characters
|
29
28
|
def filename(file_set)
|
29
|
+
# NOTE: Will this work with Valkyrie?
|
30
30
|
return if file_set.original_file.blank?
|
31
31
|
fn = file_set.original_file.file_name.first
|
32
32
|
mime = ::Marcel::MimeType.for(file_set.original_file.mime_type)
|