bulkrax 7.0.0 → 8.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/app/assets/javascripts/bulkrax/datatables.js +1 -1
- data/app/concerns/loggable.rb +25 -0
- data/app/controllers/bulkrax/exporters_controller.rb +1 -1
- data/app/controllers/bulkrax/importers_controller.rb +2 -1
- data/app/controllers/concerns/bulkrax/datatables_behavior.rb +15 -15
- data/app/factories/bulkrax/object_factory.rb +135 -163
- data/app/factories/bulkrax/object_factory_interface.rb +483 -0
- data/app/factories/bulkrax/valkyrie_object_factory.rb +402 -0
- data/app/factories/bulkrax/valkyrize-hyku.code-workspace +19 -0
- data/app/helpers/bulkrax/importers_helper.rb +1 -1
- data/app/helpers/bulkrax/validation_helper.rb +4 -4
- data/app/jobs/bulkrax/create_relationships_job.rb +27 -16
- data/app/jobs/bulkrax/delete_job.rb +3 -2
- data/app/jobs/bulkrax/download_cloud_file_job.rb +16 -3
- data/app/jobs/bulkrax/import_file_set_job.rb +23 -19
- data/app/jobs/bulkrax/importer_job.rb +18 -2
- data/app/matchers/bulkrax/application_matcher.rb +0 -2
- data/app/models/bulkrax/csv_collection_entry.rb +1 -1
- data/app/models/bulkrax/csv_entry.rb +7 -6
- data/app/models/bulkrax/entry.rb +7 -11
- data/app/models/bulkrax/exporter.rb +2 -2
- data/app/models/bulkrax/importer.rb +1 -3
- data/app/models/bulkrax/oai_entry.rb +0 -3
- data/app/models/bulkrax/oai_set_entry.rb +1 -1
- data/app/models/bulkrax/rdf_collection_entry.rb +1 -1
- data/app/models/bulkrax/rdf_entry.rb +70 -69
- data/app/models/bulkrax/xml_entry.rb +0 -1
- data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +2 -19
- data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
- data/app/models/concerns/bulkrax/file_factory.rb +178 -118
- data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +2 -2
- data/app/models/concerns/bulkrax/has_matchers.rb +39 -25
- data/app/models/concerns/bulkrax/import_behavior.rb +10 -17
- data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +3 -2
- data/app/parsers/bulkrax/application_parser.rb +31 -7
- data/app/parsers/bulkrax/bagit_parser.rb +175 -174
- data/app/parsers/bulkrax/csv_parser.rb +15 -5
- data/app/parsers/bulkrax/oai_dc_parser.rb +18 -0
- data/app/parsers/bulkrax/parser_export_record_set.rb +18 -22
- data/app/parsers/bulkrax/xml_parser.rb +0 -2
- data/app/services/bulkrax/factory_class_finder.rb +2 -0
- data/app/services/bulkrax/remove_relationships_for_importer.rb +3 -1
- data/app/services/hyrax/custom_queries/find_by_source_identifier.rb +50 -0
- data/app/services/wings/custom_queries/find_by_source_identifier.rb +32 -0
- data/app/views/bulkrax/entries/_parsed_metadata.html.erb +2 -2
- data/app/views/bulkrax/entries/_raw_metadata.html.erb +2 -2
- data/app/views/bulkrax/entries/show.html.erb +9 -8
- data/app/views/bulkrax/exporters/edit.html.erb +1 -1
- data/app/views/bulkrax/exporters/new.html.erb +1 -1
- data/app/views/bulkrax/exporters/show.html.erb +4 -2
- data/app/views/bulkrax/importers/_browse_everything.html.erb +2 -2
- data/app/views/bulkrax/importers/_csv_fields.html.erb +1 -1
- data/app/views/bulkrax/importers/edit.html.erb +1 -1
- data/app/views/bulkrax/importers/new.html.erb +1 -1
- data/app/views/bulkrax/importers/show.html.erb +1 -1
- data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +2 -2
- data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +1 -1
- data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +1 -1
- data/config/locales/bulkrax.en.yml +7 -0
- data/db/migrate/20230608153601_add_indices_to_bulkrax.rb +20 -9
- data/db/migrate/20240307053156_add_index_to_metadata_bulkrax_identifier.rb +18 -0
- data/lib/bulkrax/engine.rb +23 -6
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +54 -52
- data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +2 -0
- data/lib/tasks/bulkrax_tasks.rake +1 -0
- data/lib/tasks/reset.rake +4 -4
- metadata +25 -7
- data/lib/bulkrax/persistence_layer/active_fedora_adapter.rb +0 -27
- data/lib/bulkrax/persistence_layer/valkyrie_adapter.rb +0 -8
- data/lib/bulkrax/persistence_layer.rb +0 -38
@@ -6,6 +6,7 @@ module Bulkrax
|
|
6
6
|
|
7
7
|
def perform(importer_id, only_updates_since_last_import = false)
|
8
8
|
importer = Importer.find(importer_id)
|
9
|
+
return schedule(importer, Time.zone.now + 3.minutes, 'Rescheduling: cloud files are not ready yet') unless all_files_completed?(importer)
|
9
10
|
|
10
11
|
importer.current_run
|
11
12
|
unzip_imported_file(importer.parser)
|
@@ -16,6 +17,8 @@ module Bulkrax
|
|
16
17
|
importer.set_status_info(e)
|
17
18
|
end
|
18
19
|
|
20
|
+
private
|
21
|
+
|
19
22
|
def import(importer, only_updates_since_last_import)
|
20
23
|
importer.only_updates = only_updates_since_last_import || false
|
21
24
|
return unless importer.valid_import?
|
@@ -36,8 +39,21 @@ module Bulkrax
|
|
36
39
|
importer.current_run.save!
|
37
40
|
end
|
38
41
|
|
39
|
-
def schedule(importer)
|
40
|
-
|
42
|
+
def schedule(importer, wait_until = importer.next_import_at, message = nil)
|
43
|
+
Rails.logger.info message if message
|
44
|
+
ImporterJob.set(wait_until: wait_until).perform_later(importer.id, true)
|
45
|
+
end
|
46
|
+
|
47
|
+
# checks the file sizes of the download files to match the original files
|
48
|
+
def all_files_completed?(importer)
|
49
|
+
cloud_files = importer.parser_fields['cloud_file_paths']
|
50
|
+
original_files = importer.parser_fields['original_file_paths']
|
51
|
+
return true unless cloud_files.present? && original_files.present?
|
52
|
+
|
53
|
+
imported_file_sizes = cloud_files.map { |_, v| v['file_size'].to_i }
|
54
|
+
original_file_sizes = original_files.map { |imported_file| File.size(imported_file) }
|
55
|
+
|
56
|
+
original_file_sizes == imported_file_sizes
|
41
57
|
end
|
42
58
|
end
|
43
59
|
end
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
module Bulkrax
|
4
4
|
class CsvCollectionEntry < CsvEntry
|
5
|
-
self.default_work_type =
|
5
|
+
self.default_work_type = Bulkrax.collection_model_class.to_s
|
6
6
|
|
7
7
|
# Use identifier set by CsvParser#unique_collection_identifier, which falls back
|
8
8
|
# on the Collection's first title if record[source_identifier] is not present
|
@@ -1,7 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'csv'
|
4
|
-
|
5
3
|
module Bulkrax
|
6
4
|
# TODO: We need to rework this class some to address the Metrics/ClassLength rubocop offense.
|
7
5
|
# We do too much in these entry classes. We need to extract the common logic from the various
|
@@ -106,7 +104,7 @@ module Bulkrax
|
|
106
104
|
end
|
107
105
|
|
108
106
|
def add_metadata_for_model
|
109
|
-
if
|
107
|
+
if factory_class.present? && factory_class == Bulkrax.collection_model_class
|
110
108
|
add_collection_type_gid if defined?(::Hyrax)
|
111
109
|
# add any additional collection metadata methods here
|
112
110
|
elsif factory_class == Bulkrax.file_model_class
|
@@ -146,7 +144,7 @@ module Bulkrax
|
|
146
144
|
self.parsed_metadata = {}
|
147
145
|
|
148
146
|
build_system_metadata
|
149
|
-
build_files_metadata if
|
147
|
+
build_files_metadata if Bulkrax.collection_model_class.present? && !hyrax_record.is_a?(Bulkrax.collection_model_class)
|
150
148
|
build_relationship_metadata
|
151
149
|
build_mapping_metadata
|
152
150
|
self.save!
|
@@ -158,9 +156,12 @@ module Bulkrax
|
|
158
156
|
def build_system_metadata
|
159
157
|
self.parsed_metadata['id'] = hyrax_record.id
|
160
158
|
source_id = hyrax_record.send(work_identifier)
|
161
|
-
|
159
|
+
# Because ActiveTriples::Relation does not respond to #to_ary we can't rely on Array.wrap universally
|
160
|
+
source_id = source_id.to_a if source_id.is_a?(ActiveTriples::Relation)
|
161
|
+
source_id = Array.wrap(source_id).first
|
162
162
|
self.parsed_metadata[source_identifier] = source_id
|
163
|
-
|
163
|
+
model_name = hyrax_record.respond_to?(:to_rdf_representation) ? hyrax_record.to_rdf_representation : hyrax_record.has_model.first
|
164
|
+
self.parsed_metadata[key_for_export('model')] = model_name
|
164
165
|
end
|
165
166
|
|
166
167
|
def build_files_metadata
|
data/app/models/bulkrax/entry.rb
CHANGED
@@ -103,22 +103,18 @@ module Bulkrax
|
|
103
103
|
self.importerexporter_type == 'Bulkrax::Exporter'
|
104
104
|
end
|
105
105
|
|
106
|
-
def valid_system_id(model_class)
|
107
|
-
return true if model_class.properties.keys.include?(work_identifier)
|
108
|
-
raise(
|
109
|
-
"#{model_class} does not implement the system_identifier_field: #{work_identifier}"
|
110
|
-
)
|
111
|
-
end
|
112
|
-
|
113
106
|
def last_run
|
114
107
|
self.importerexporter&.last_run
|
115
108
|
end
|
116
109
|
|
117
110
|
def find_collection(collection_identifier)
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
111
|
+
Bulkrax.object_factory.search_by_property(
|
112
|
+
klass: Bulkrax.collection_model_class,
|
113
|
+
value: collection_identifier,
|
114
|
+
search_field: work_identifier,
|
115
|
+
name_field: work_identifier,
|
116
|
+
verify_property: true
|
117
|
+
)
|
122
118
|
end
|
123
119
|
end
|
124
120
|
end
|
@@ -137,8 +137,8 @@ module Bulkrax
|
|
137
137
|
end
|
138
138
|
|
139
139
|
def export_properties
|
140
|
-
|
141
|
-
|
140
|
+
# TODO: Does this work for Valkyrie?
|
141
|
+
Bulkrax.object_factory.export_properties
|
142
142
|
end
|
143
143
|
|
144
144
|
def metadata_only?
|
@@ -1,7 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'iso8601'
|
4
|
-
|
5
3
|
module Bulkrax
|
6
4
|
class Importer < ApplicationRecord # rubocop:disable Metrics/ClassLength
|
7
5
|
include Bulkrax::ImporterExporterBehavior
|
@@ -18,7 +16,7 @@ module Bulkrax
|
|
18
16
|
validates :admin_set_id, presence: true if defined?(::Hyrax)
|
19
17
|
validates :parser_klass, presence: true
|
20
18
|
|
21
|
-
delegate :valid_import?, :write_errored_entries_file, :visibility, to: :parser
|
19
|
+
delegate :create_parent_child_relationships, :valid_import?, :write_errored_entries_file, :visibility, to: :parser
|
22
20
|
|
23
21
|
attr_accessor :only_updates, :file_style, :file
|
24
22
|
attr_writer :current_run
|
@@ -1,91 +1,92 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
module Bulkrax
|
5
|
-
|
6
|
-
|
3
|
+
unless ENV.fetch('BULKRAX_NO_RDF', 'false').to_s == 'true'
|
4
|
+
module Bulkrax
|
5
|
+
class RdfEntry < Entry
|
6
|
+
serialize :raw_metadata, Bulkrax::NormalizedJson
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
8
|
+
def self.read_data(path)
|
9
|
+
RDF::Reader.open(path)
|
10
|
+
end
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
|
12
|
+
def self.fields_from_data(data)
|
13
|
+
data.predicates.map(&:to_s)
|
14
|
+
end
|
15
15
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
16
|
+
def self.data_for_entry(data, source_id, parser)
|
17
|
+
reader = data
|
18
|
+
format = reader.class.format.to_sym
|
19
|
+
collections = []
|
20
|
+
children = []
|
21
|
+
delete = nil
|
22
|
+
data = RDF::Writer.for(format).buffer do |writer|
|
23
|
+
reader.each_statement do |statement|
|
24
|
+
collections << statement.object.to_s if parent_field(parser).present? && parent_field(parser) == statement.predicate.to_s
|
25
|
+
children << statement.object.to_s if related_children_parsed_mapping.present? && related_children_parsed_mapping == statement.predicate.to_s
|
26
|
+
delete = statement.object.to_s if /deleted/.match?(statement.predicate.to_s)
|
27
|
+
writer << statement
|
28
|
+
end
|
28
29
|
end
|
30
|
+
return {
|
31
|
+
source_id => reader.subjects.first.to_s,
|
32
|
+
delete: delete,
|
33
|
+
format: format,
|
34
|
+
data: data,
|
35
|
+
collection: collections,
|
36
|
+
children: children
|
37
|
+
}
|
29
38
|
end
|
30
|
-
return {
|
31
|
-
source_id => reader.subjects.first.to_s,
|
32
|
-
delete: delete,
|
33
|
-
format: format,
|
34
|
-
data: data,
|
35
|
-
collection: collections,
|
36
|
-
children: children
|
37
|
-
}
|
38
|
-
end
|
39
39
|
|
40
|
-
|
41
|
-
|
40
|
+
def self.related_children_parsed_mapping
|
41
|
+
return @related_children_parsed_mapping if @related_children_parsed_mapping.present?
|
42
42
|
|
43
|
-
|
44
|
-
|
43
|
+
rdf_related_children_field_mapping = Bulkrax.field_mappings['Bulkrax::RdfParser']&.select { |_, h| h.key?('related_children_field_mapping') }
|
44
|
+
return if rdf_related_children_field_mapping.blank?
|
45
45
|
|
46
|
-
|
47
|
-
|
46
|
+
@related_children_parsed_mapping = rdf_related_children_field_mapping&.keys&.first
|
47
|
+
end
|
48
48
|
|
49
|
-
|
50
|
-
|
51
|
-
|
49
|
+
def record
|
50
|
+
@record ||= RDF::Reader.for(self.raw_metadata['format'].to_sym).new(self.raw_metadata['data'])
|
51
|
+
end
|
52
52
|
|
53
|
-
|
54
|
-
|
55
|
-
|
53
|
+
def build_metadata
|
54
|
+
raise StandardError, 'Record not found' if record.nil?
|
55
|
+
raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
|
56
56
|
|
57
|
-
|
58
|
-
|
57
|
+
self.parsed_metadata = {}
|
58
|
+
self.parsed_metadata[work_identifier] = [self.raw_metadata[source_identifier]]
|
59
59
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
60
|
+
record.each_statement do |statement|
|
61
|
+
# Only process the subject for our record (in case other data is in the file)
|
62
|
+
next unless statement.subject.to_s == self.raw_metadata[source_identifier]
|
63
|
+
add_metadata(statement.predicate.to_s, statement.object.to_s)
|
64
|
+
end
|
65
|
+
add_visibility
|
66
|
+
add_rights_statement
|
67
|
+
add_admin_set_id
|
68
|
+
add_collections
|
69
|
+
add_local
|
70
|
+
self.parsed_metadata['file'] = self.raw_metadata['file']
|
71
71
|
|
72
|
-
|
73
|
-
|
72
|
+
self.parsed_metadata
|
73
|
+
end
|
74
74
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
75
|
+
def collections_created?
|
76
|
+
return true if self.raw_metadata['collection'].blank?
|
77
|
+
self.raw_metadata['collection'].length == self.collection_ids.length
|
78
|
+
end
|
79
79
|
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
80
|
+
def find_collection_ids
|
81
|
+
return self.collection_ids if collections_created?
|
82
|
+
if self.raw_metadata['collection'].present?
|
83
|
+
self.raw_metadata['collection'].each do |collection|
|
84
|
+
c = find_collection(collection)
|
85
|
+
self.collection_ids << c.id unless c.blank? || self.collection_ids.include?(c.id)
|
86
|
+
end
|
86
87
|
end
|
88
|
+
return self.collection_ids
|
87
89
|
end
|
88
|
-
return self.collection_ids
|
89
90
|
end
|
90
91
|
end
|
91
92
|
end
|
@@ -18,9 +18,9 @@ module Bulkrax
|
|
18
18
|
begin
|
19
19
|
# the identifier parameter can be a :source_identifier or the id of an object
|
20
20
|
record = Entry.find_by(default_scope.merge({ importerexporter_id: importer_id })) || Entry.find_by(default_scope)
|
21
|
-
record ||=
|
21
|
+
record ||= Bulkrax.object_factory.find(identifier)
|
22
22
|
# NameError for if ActiveFedora isn't installed
|
23
|
-
rescue NameError, ActiveFedora::ObjectNotFoundError
|
23
|
+
rescue NameError, ActiveFedora::ObjectNotFoundError, Bulkrax::ObjectFactoryInterface::ObjectNotFoundError
|
24
24
|
record = nil
|
25
25
|
end
|
26
26
|
|
@@ -28,22 +28,5 @@ module Bulkrax
|
|
28
28
|
# also accounts for when the found entry isn't a part of this importer
|
29
29
|
record.is_a?(Entry) ? [record, record.factory.find] : [nil, record]
|
30
30
|
end
|
31
|
-
|
32
|
-
# Check if the record is a Work
|
33
|
-
def curation_concern?(record)
|
34
|
-
available_work_types.include?(record.class)
|
35
|
-
end
|
36
|
-
|
37
|
-
private
|
38
|
-
|
39
|
-
# @return [Array<Class>] list of work type classes
|
40
|
-
def available_work_types
|
41
|
-
# If running in a Hyku app, do not include disabled work types
|
42
|
-
@available_work_types ||= if defined?(::Hyku)
|
43
|
-
::Site.instance.available_works.map(&:constantize)
|
44
|
-
else
|
45
|
-
Bulkrax.curation_concerns
|
46
|
-
end
|
47
|
-
end
|
48
31
|
end
|
49
32
|
end
|
@@ -1,5 +1,4 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
require 'marcel'
|
3
2
|
|
4
3
|
module Bulkrax
|
5
4
|
module ExportBehavior
|
@@ -22,11 +21,12 @@ module Bulkrax
|
|
22
21
|
end
|
23
22
|
|
24
23
|
def hyrax_record
|
25
|
-
@hyrax_record ||=
|
24
|
+
@hyrax_record ||= Bulkrax.object_factory.find(self.identifier)
|
26
25
|
end
|
27
26
|
|
28
27
|
# Prepend the file_set id to ensure a unique filename and also one that is not longer than 255 characters
|
29
28
|
def filename(file_set)
|
29
|
+
# NOTE: Will this work with Valkyrie?
|
30
30
|
return if file_set.original_file.blank?
|
31
31
|
fn = file_set.original_file.file_name.first
|
32
32
|
mime = ::Marcel::MimeType.for(file_set.original_file.mime_type)
|