bulkrax 6.0.1 → 8.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Rakefile +7 -7
- data/app/assets/javascripts/bulkrax/bulkrax.js +11 -0
- data/app/assets/javascripts/bulkrax/datatables.js +139 -0
- data/app/assets/javascripts/bulkrax/exporters.js +4 -4
- data/app/assets/javascripts/bulkrax/importers.js.erb +15 -1
- data/app/assets/stylesheets/bulkrax/import_export.scss +6 -1
- data/app/controllers/bulkrax/entries_controller.rb +52 -3
- data/app/controllers/bulkrax/exporters_controller.rb +20 -8
- data/app/controllers/bulkrax/importers_controller.rb +31 -12
- data/app/controllers/concerns/bulkrax/datatables_behavior.rb +201 -0
- data/app/factories/bulkrax/object_factory.rb +135 -163
- data/app/factories/bulkrax/object_factory_interface.rb +491 -0
- data/app/factories/bulkrax/valkyrie_object_factory.rb +402 -0
- data/app/helpers/bulkrax/application_helper.rb +7 -3
- data/app/helpers/bulkrax/importers_helper.rb +1 -1
- data/app/helpers/bulkrax/validation_helper.rb +4 -4
- data/app/jobs/bulkrax/create_relationships_job.rb +28 -17
- data/app/jobs/bulkrax/delete_and_import_collection_job.rb +8 -0
- data/app/jobs/bulkrax/delete_and_import_file_set_job.rb +8 -0
- data/app/jobs/bulkrax/delete_and_import_job.rb +20 -0
- data/app/jobs/bulkrax/delete_and_import_work_job.rb +8 -0
- data/app/jobs/bulkrax/delete_job.rb +8 -3
- data/app/jobs/bulkrax/download_cloud_file_job.rb +17 -4
- data/app/jobs/bulkrax/import_collection_job.rb +1 -1
- data/app/jobs/bulkrax/import_file_set_job.rb +6 -3
- data/app/jobs/bulkrax/import_job.rb +7 -0
- data/app/jobs/bulkrax/import_work_job.rb +1 -1
- data/app/jobs/bulkrax/importer_job.rb +19 -3
- data/app/matchers/bulkrax/application_matcher.rb +0 -2
- data/app/models/bulkrax/csv_collection_entry.rb +1 -3
- data/app/models/bulkrax/csv_entry.rb +9 -7
- data/app/models/bulkrax/entry.rb +9 -11
- data/app/models/bulkrax/exporter.rb +11 -4
- data/app/models/bulkrax/importer.rb +49 -10
- data/app/models/bulkrax/oai_entry.rb +0 -3
- data/app/models/bulkrax/oai_set_entry.rb +1 -3
- data/app/models/bulkrax/rdf_collection_entry.rb +1 -4
- data/app/models/bulkrax/rdf_entry.rb +70 -69
- data/app/models/bulkrax/status.rb +10 -1
- data/app/models/bulkrax/xml_entry.rb +0 -1
- data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +2 -19
- data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
- data/app/models/concerns/bulkrax/file_factory.rb +174 -118
- data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +5 -3
- data/app/models/concerns/bulkrax/has_matchers.rb +28 -25
- data/app/models/concerns/bulkrax/import_behavior.rb +14 -33
- data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +3 -2
- data/app/models/concerns/bulkrax/status_info.rb +8 -0
- data/app/parsers/bulkrax/application_parser.rb +116 -21
- data/app/parsers/bulkrax/bagit_parser.rb +173 -195
- data/app/parsers/bulkrax/csv_parser.rb +15 -57
- data/app/parsers/bulkrax/oai_dc_parser.rb +44 -16
- data/app/parsers/bulkrax/parser_export_record_set.rb +20 -24
- data/app/parsers/bulkrax/xml_parser.rb +18 -23
- data/app/services/bulkrax/factory_class_finder.rb +92 -0
- data/app/services/bulkrax/remove_relationships_for_importer.rb +3 -1
- data/app/services/hyrax/custom_queries/find_by_source_identifier.rb +50 -0
- data/app/services/wings/custom_queries/find_by_source_identifier.rb +32 -0
- data/app/views/bulkrax/entries/_parsed_metadata.html.erb +2 -2
- data/app/views/bulkrax/entries/_raw_metadata.html.erb +2 -2
- data/app/views/bulkrax/entries/show.html.erb +9 -8
- data/app/views/bulkrax/exporters/_form.html.erb +10 -10
- data/app/views/bulkrax/exporters/edit.html.erb +1 -1
- data/app/views/bulkrax/exporters/index.html.erb +13 -57
- data/app/views/bulkrax/exporters/new.html.erb +1 -1
- data/app/views/bulkrax/exporters/show.html.erb +6 -12
- data/app/views/bulkrax/importers/_browse_everything.html.erb +2 -2
- data/app/views/bulkrax/importers/_csv_fields.html.erb +8 -2
- data/app/views/bulkrax/importers/_edit_form_buttons.html.erb +8 -1
- data/app/views/bulkrax/importers/_edit_item_buttons.html.erb +18 -0
- data/app/views/bulkrax/importers/edit.html.erb +1 -1
- data/app/views/bulkrax/importers/index.html.erb +20 -64
- data/app/views/bulkrax/importers/new.html.erb +1 -1
- data/app/views/bulkrax/importers/show.html.erb +8 -14
- data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +2 -2
- data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +1 -1
- data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +1 -1
- data/app/views/bulkrax/shared/_entries_tab.html.erb +16 -0
- data/config/locales/bulkrax.en.yml +7 -0
- data/config/routes.rb +8 -2
- data/db/migrate/20230608153601_add_indices_to_bulkrax.rb +20 -9
- data/db/migrate/20240208005801_denormalize_status_message.rb +7 -0
- data/db/migrate/20240209070952_update_identifier_index.rb +6 -0
- data/db/migrate/20240307053156_add_index_to_metadata_bulkrax_identifier.rb +18 -0
- data/lib/bulkrax/engine.rb +23 -0
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +107 -19
- data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +2 -0
- data/lib/tasks/bulkrax_tasks.rake +13 -0
- data/lib/tasks/reset.rake +4 -4
- metadata +64 -8
- data/app/views/bulkrax/shared/_collection_entries_tab.html.erb +0 -39
- data/app/views/bulkrax/shared/_file_set_entries_tab.html.erb +0 -39
- data/app/views/bulkrax/shared/_work_entries_tab.html.erb +0 -39
@@ -1,6 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'csv'
|
4
3
|
module Bulkrax
|
5
4
|
class CsvParser < ApplicationParser # rubocop:disable Metrics/ClassLength
|
6
5
|
include ErroredEntries
|
@@ -23,6 +22,7 @@ module Bulkrax
|
|
23
22
|
@records = csv_data.map { |record_data| entry_class.data_for_entry(record_data, nil, self) }
|
24
23
|
end
|
25
24
|
|
25
|
+
# rubocop:disable Metrics/AbcSize
|
26
26
|
def build_records
|
27
27
|
@collections = []
|
28
28
|
@works = []
|
@@ -34,7 +34,9 @@ module Bulkrax
|
|
34
34
|
next unless r.key?(model_mapping)
|
35
35
|
|
36
36
|
model = r[model_mapping].nil? ? "" : r[model_mapping].strip
|
37
|
-
|
37
|
+
# TODO: Eventually this should be refactored to us Hyrax.config.collection_model
|
38
|
+
# We aren't right now because so many Bulkrax users are in between Fedora and Valkyrie
|
39
|
+
if model.casecmp('collection').zero? || model.casecmp('collectionresource').zero?
|
38
40
|
@collections << r
|
39
41
|
elsif model.casecmp('fileset').zero?
|
40
42
|
@file_sets << r
|
@@ -52,6 +54,7 @@ module Bulkrax
|
|
52
54
|
|
53
55
|
true
|
54
56
|
end
|
57
|
+
# rubocop:enabled Metrics/AbcSize
|
55
58
|
|
56
59
|
def collections
|
57
60
|
build_records if @collections.nil?
|
@@ -113,57 +116,6 @@ module Bulkrax
|
|
113
116
|
false
|
114
117
|
end
|
115
118
|
|
116
|
-
def create_collections
|
117
|
-
create_objects(['collection'])
|
118
|
-
end
|
119
|
-
|
120
|
-
def create_works
|
121
|
-
create_objects(['work'])
|
122
|
-
end
|
123
|
-
|
124
|
-
def create_file_sets
|
125
|
-
create_objects(['file_set'])
|
126
|
-
end
|
127
|
-
|
128
|
-
def create_relationships
|
129
|
-
create_objects(['relationship'])
|
130
|
-
end
|
131
|
-
|
132
|
-
def create_objects(types_array = nil)
|
133
|
-
index = 0
|
134
|
-
(types_array || %w[collection work file_set relationship]).each do |type|
|
135
|
-
if type.eql?('relationship')
|
136
|
-
ScheduleRelationshipsJob.set(wait: 5.minutes).perform_later(importer_id: importerexporter.id)
|
137
|
-
next
|
138
|
-
end
|
139
|
-
send(type.pluralize).each do |current_record|
|
140
|
-
next unless record_has_source_identifier(current_record, index)
|
141
|
-
break if limit_reached?(limit, index)
|
142
|
-
|
143
|
-
seen[current_record[source_identifier]] = true
|
144
|
-
create_entry_and_job(current_record, type)
|
145
|
-
increment_counters(index, "#{type}": true)
|
146
|
-
index += 1
|
147
|
-
end
|
148
|
-
importer.record_status
|
149
|
-
end
|
150
|
-
true
|
151
|
-
rescue StandardError => e
|
152
|
-
set_status_info(e)
|
153
|
-
end
|
154
|
-
|
155
|
-
def create_entry_and_job(current_record, type)
|
156
|
-
new_entry = find_or_create_entry(send("#{type}_entry_class"),
|
157
|
-
current_record[source_identifier],
|
158
|
-
'Bulkrax::Importer',
|
159
|
-
current_record.to_h)
|
160
|
-
if current_record[:delete].present?
|
161
|
-
"Bulkrax::Delete#{type.camelize}Job".constantize.send(perform_method, new_entry, current_run)
|
162
|
-
else
|
163
|
-
"Bulkrax::Import#{type.camelize}Job".constantize.send(perform_method, new_entry.id, current_run.id)
|
164
|
-
end
|
165
|
-
end
|
166
|
-
|
167
119
|
def write_partial_import_file(file)
|
168
120
|
import_filename = import_file_path.split('/').last
|
169
121
|
partial_import_filename = "#{File.basename(import_filename, '.csv')}_corrected_entries.csv"
|
@@ -204,7 +156,6 @@ module Bulkrax
|
|
204
156
|
def entry_class
|
205
157
|
CsvEntry
|
206
158
|
end
|
207
|
-
alias work_entry_class entry_class
|
208
159
|
|
209
160
|
def collection_entry_class
|
210
161
|
CsvCollectionEntry
|
@@ -242,9 +193,10 @@ module Bulkrax
|
|
242
193
|
# @todo - investigate getting directory structure
|
243
194
|
# @todo - investigate using perform_later, and having the importer check for
|
244
195
|
# DownloadCloudFileJob before it starts
|
245
|
-
def retrieve_cloud_files(files)
|
196
|
+
def retrieve_cloud_files(files, importer)
|
246
197
|
files_path = File.join(path_for_import, 'files')
|
247
198
|
FileUtils.mkdir_p(files_path) unless File.exist?(files_path)
|
199
|
+
target_files = []
|
248
200
|
files.each_pair do |_key, file|
|
249
201
|
# fixes bug where auth headers do not get attached properly
|
250
202
|
if file['auth_header'].present?
|
@@ -253,10 +205,12 @@ module Bulkrax
|
|
253
205
|
end
|
254
206
|
# this only works for uniquely named files
|
255
207
|
target_file = File.join(files_path, file['file_name'].tr(' ', '_'))
|
208
|
+
target_files << target_file
|
256
209
|
# Now because we want the files in place before the importer runs
|
257
210
|
# Problematic for a large upload
|
258
|
-
Bulkrax::DownloadCloudFileJob.
|
211
|
+
Bulkrax::DownloadCloudFileJob.perform_later(file, target_file)
|
259
212
|
end
|
213
|
+
importer[:parser_fields]['original_file_paths'] = target_files
|
260
214
|
return nil
|
261
215
|
end
|
262
216
|
|
@@ -277,6 +231,7 @@ module Bulkrax
|
|
277
231
|
CSV.open(setup_export_file(folder_count), "w", headers: export_headers, write_headers: true) do |csv|
|
278
232
|
group.each do |entry|
|
279
233
|
csv << entry.parsed_metadata
|
234
|
+
# TODO: This is precarious when we have descendents of Bulkrax::CsvCollectionEntry
|
280
235
|
next if importerexporter.metadata_only? || entry.type == 'Bulkrax::CsvCollectionEntry'
|
281
236
|
|
282
237
|
store_files(entry.identifier, folder_count.to_s)
|
@@ -286,7 +241,7 @@ module Bulkrax
|
|
286
241
|
end
|
287
242
|
|
288
243
|
def store_files(identifier, folder_count)
|
289
|
-
record =
|
244
|
+
record = Bulkrax.object_factory.find(identifier)
|
290
245
|
return unless record
|
291
246
|
|
292
247
|
file_sets = record.file_set? ? Array.wrap(record) : record.file_sets
|
@@ -338,6 +293,9 @@ module Bulkrax
|
|
338
293
|
|
339
294
|
def sort_entries(entries)
|
340
295
|
# always export models in the same order: work, collection, file set
|
296
|
+
#
|
297
|
+
# TODO: This is a problem in that only these classes are compared. Instead
|
298
|
+
# We should add a comparison operator to the classes.
|
341
299
|
entries.sort_by do |entry|
|
342
300
|
case entry.type
|
343
301
|
when 'Bulkrax::CsvCollectionEntry'
|
@@ -63,6 +63,12 @@ module Bulkrax
|
|
63
63
|
|
64
64
|
delegate :list_sets, to: :client
|
65
65
|
|
66
|
+
def create_objects(types = [])
|
67
|
+
types.each do |object_type|
|
68
|
+
send("create_#{object_type.pluralize}")
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
66
72
|
def create_collections
|
67
73
|
metadata = {
|
68
74
|
visibility: 'open'
|
@@ -86,27 +92,49 @@ module Bulkrax
|
|
86
92
|
results = self.records(quick: true)
|
87
93
|
return if results.blank?
|
88
94
|
results.full.each_with_index do |record, index|
|
89
|
-
identifier = record
|
90
|
-
|
91
|
-
if Bulkrax.fill_in_blank_source_identifiers.present?
|
92
|
-
identifier = Bulkrax.fill_in_blank_source_identifiers.call(self, index)
|
93
|
-
else
|
94
|
-
invalid_record("Missing #{source_identifier} for #{record.to_h}\n")
|
95
|
-
next
|
96
|
-
end
|
97
|
-
end
|
98
|
-
|
95
|
+
identifier = record_has_source_identifier(record, index)
|
96
|
+
next unless identifier
|
99
97
|
break if limit_reached?(limit, index)
|
98
|
+
|
100
99
|
seen[identifier] = true
|
101
|
-
|
102
|
-
if record.deleted?
|
103
|
-
DeleteWorkJob.send(perform_method, new_entry, importerexporter.current_run)
|
104
|
-
else
|
105
|
-
ImportWorkJob.send(perform_method, new_entry.id, importerexporter.current_run.id)
|
106
|
-
end
|
100
|
+
create_entry_and_job(record, 'work', identifier)
|
107
101
|
increment_counters(index, work: true)
|
108
102
|
end
|
109
103
|
importer.record_status
|
104
|
+
rescue StandardError => e
|
105
|
+
set_status_info(e)
|
106
|
+
end
|
107
|
+
|
108
|
+
def create_file_sets; end
|
109
|
+
|
110
|
+
def create_relationships
|
111
|
+
ScheduleRelationshipsJob.set(wait: 5.minutes).perform_later(importer_id: importerexporter.id)
|
112
|
+
end
|
113
|
+
|
114
|
+
def record_raw_metadata(_record)
|
115
|
+
nil
|
116
|
+
end
|
117
|
+
|
118
|
+
def record_deleted?(_record)
|
119
|
+
false
|
120
|
+
end
|
121
|
+
|
122
|
+
def record_remove_and_rerun?(_record)
|
123
|
+
false
|
124
|
+
end
|
125
|
+
|
126
|
+
# oai records so not let us set the source identifier easily
|
127
|
+
def record_has_source_identifier(record, index)
|
128
|
+
identifier = record.send(source_identifier)
|
129
|
+
if identifier.blank?
|
130
|
+
if Bulkrax.fill_in_blank_source_identifiers.present?
|
131
|
+
identifier = Bulkrax.fill_in_blank_source_identifiers.call(self, index)
|
132
|
+
else
|
133
|
+
invalid_record("Missing #{source_identifier} for #{record.to_h}\n")
|
134
|
+
return false
|
135
|
+
end
|
136
|
+
end
|
137
|
+
identifier
|
110
138
|
end
|
111
139
|
|
112
140
|
def collections
|
@@ -113,14 +113,14 @@ module Bulkrax
|
|
113
113
|
#
|
114
114
|
# @see #file_sets
|
115
115
|
def candidate_file_set_ids
|
116
|
-
@candidate_file_set_ids ||= works.flat_map { |work| work.fetch(
|
116
|
+
@candidate_file_set_ids ||= works.flat_map { |work| work.fetch(Bulkrax.solr_key_for_member_file_ids, []) }
|
117
117
|
end
|
118
118
|
|
119
119
|
# @note Specifically not memoizing this so we can merge values without changing the object.
|
120
120
|
#
|
121
121
|
# No sense attempting to query for more than the limit.
|
122
122
|
def query_kwargs
|
123
|
-
{ fl: "id,#{Bulkrax.
|
123
|
+
{ fl: "id,#{Bulkrax.solr_key_for_member_file_ids}", method: :post, rows: row_limit }
|
124
124
|
end
|
125
125
|
|
126
126
|
# If we have a limit, we need not query beyond that limit
|
@@ -149,12 +149,12 @@ module Bulkrax
|
|
149
149
|
end
|
150
150
|
|
151
151
|
def works
|
152
|
-
@works ||=
|
152
|
+
@works ||= Bulkrax.object_factory.query(works_query, **works_query_kwargs)
|
153
153
|
end
|
154
154
|
|
155
155
|
def collections
|
156
156
|
@collections ||= if collections_query
|
157
|
-
|
157
|
+
Bulkrax.object_factory.query(collections_query, **collections_query_kwargs)
|
158
158
|
else
|
159
159
|
[]
|
160
160
|
end
|
@@ -173,43 +173,39 @@ module Bulkrax
|
|
173
173
|
# @see https://github.com/samvera/hyrax/blob/64c0bbf0dc0d3e1b49f040b50ea70d177cc9d8f6/app/indexers/hyrax/work_indexer.rb#L15-L18
|
174
174
|
def file_sets
|
175
175
|
@file_sets ||= ParserExportRecordSet.in_batches(candidate_file_set_ids) do |batch_of_ids|
|
176
|
-
fsq = "has_model_ssim:#{Bulkrax.
|
176
|
+
fsq = "has_model_ssim:#{Bulkrax.file_model_internal_resource} AND id:(\"" + batch_of_ids.join('" OR "') + "\")"
|
177
177
|
fsq += extra_filters if extra_filters.present?
|
178
|
-
|
178
|
+
Bulkrax.object_factory.query(
|
179
179
|
fsq,
|
180
|
-
|
180
|
+
fl: "id", method: :post, rows: batch_of_ids.size
|
181
181
|
)
|
182
182
|
end
|
183
183
|
end
|
184
184
|
|
185
185
|
def solr_name(base_name)
|
186
|
-
|
187
|
-
::Solrizer.solr_name(base_name)
|
188
|
-
else
|
189
|
-
::ActiveFedora.index_field_mapper.solr_name(base_name)
|
190
|
-
end
|
186
|
+
Bulkrax.object_factory.solr_name(base_name)
|
191
187
|
end
|
192
188
|
end
|
193
189
|
|
194
190
|
class All < Base
|
195
191
|
def works_query
|
196
|
-
"has_model_ssim:(#{Bulkrax.
|
192
|
+
"has_model_ssim:(#{Bulkrax.curation_concern_internal_resources.join(' OR ')}) #{extra_filters}"
|
197
193
|
end
|
198
194
|
|
199
195
|
def collections_query
|
200
|
-
"has_model_ssim
|
196
|
+
"has_model_ssim:#{Bulkrax.collection_model_internal_resource} #{extra_filters}"
|
201
197
|
end
|
202
198
|
end
|
203
199
|
|
204
200
|
class Collection < Base
|
205
201
|
def works_query
|
206
202
|
"member_of_collection_ids_ssim:#{importerexporter.export_source} #{extra_filters} AND " \
|
207
|
-
"has_model_ssim:(#{Bulkrax.
|
203
|
+
"has_model_ssim:(#{Bulkrax.curation_concern_internal_resources.join(' OR ')})"
|
208
204
|
end
|
209
205
|
|
210
206
|
def collections_query
|
211
207
|
"(id:#{importerexporter.export_source} #{extra_filters}) OR " \
|
212
|
-
"(has_model_ssim
|
208
|
+
"(has_model_ssim:#{Bulkrax.collection_model_internal_resource} AND member_of_collection_ids_ssim:#{importerexporter.export_source})"
|
213
209
|
end
|
214
210
|
end
|
215
211
|
|
@@ -247,12 +243,12 @@ module Bulkrax
|
|
247
243
|
|
248
244
|
def works
|
249
245
|
@works ||= ParserExportRecordSet.in_batches(complete_entry_identifiers) do |ids|
|
250
|
-
|
246
|
+
Bulkrax.object_factory.query(
|
251
247
|
extra_filters.to_s,
|
252
248
|
**query_kwargs.merge(
|
253
249
|
fq: [
|
254
250
|
%(#{solr_name(work_identifier)}:("#{ids.join('" OR "')}")),
|
255
|
-
"has_model_ssim:(#{Bulkrax.
|
251
|
+
"has_model_ssim:(#{Bulkrax.curation_concern_internal_resources.join(' OR ')})"
|
256
252
|
],
|
257
253
|
fl: 'id'
|
258
254
|
)
|
@@ -262,12 +258,12 @@ module Bulkrax
|
|
262
258
|
|
263
259
|
def collections
|
264
260
|
@collections ||= ParserExportRecordSet.in_batches(complete_entry_identifiers) do |ids|
|
265
|
-
|
266
|
-
"has_model_ssim
|
261
|
+
Bulkrax.object_factory.query(
|
262
|
+
"has_model_ssim:#{Bulkrax.collection_model_internal_resource} #{extra_filters}",
|
267
263
|
**query_kwargs.merge(
|
268
264
|
fq: [
|
269
265
|
%(#{solr_name(work_identifier)}:("#{ids.join('" OR "')}")),
|
270
|
-
"has_model_ssim
|
266
|
+
"has_model_ssim:#{Bulkrax.collection_model_internal_resource}"
|
271
267
|
],
|
272
268
|
fl: "id"
|
273
269
|
)
|
@@ -281,12 +277,12 @@ module Bulkrax
|
|
281
277
|
# @see Bulkrax::ParserExportRecordSet::Base#file_sets
|
282
278
|
def file_sets
|
283
279
|
@file_sets ||= ParserExportRecordSet.in_batches(complete_entry_identifiers) do |ids|
|
284
|
-
|
280
|
+
Bulkrax.object_factory.query(
|
285
281
|
extra_filters,
|
286
|
-
query_kwargs.merge(
|
282
|
+
**query_kwargs.merge(
|
287
283
|
fq: [
|
288
284
|
%(#{solr_name(work_identifier)}:("#{ids.join('" OR "')}")),
|
289
|
-
"has_model_ssim:#{Bulkrax.
|
285
|
+
"has_model_ssim:#{Bulkrax.file_model_internal_resource}"
|
290
286
|
],
|
291
287
|
fl: 'id'
|
292
288
|
)
|
@@ -1,6 +1,4 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
require 'marcel'
|
3
|
-
|
4
2
|
module Bulkrax
|
5
3
|
class XmlParser < ApplicationParser
|
6
4
|
def entry_class
|
@@ -11,13 +9,29 @@ module Bulkrax
|
|
11
9
|
def collection_entry_class; end
|
12
10
|
|
13
11
|
# @todo not yet supported
|
14
|
-
def create_collections
|
12
|
+
def create_collections
|
13
|
+
raise NotImplementedError
|
14
|
+
end
|
15
15
|
|
16
16
|
# @todo not yet supported
|
17
17
|
def file_set_entry_class; end
|
18
18
|
|
19
19
|
# @todo not yet supported
|
20
|
-
def create_file_sets
|
20
|
+
def create_file_sets
|
21
|
+
raise NotImplementedError
|
22
|
+
end
|
23
|
+
|
24
|
+
def file_sets
|
25
|
+
raise NotImplementedError
|
26
|
+
end
|
27
|
+
|
28
|
+
def collections
|
29
|
+
raise NotImplementedError
|
30
|
+
end
|
31
|
+
|
32
|
+
def works
|
33
|
+
records
|
34
|
+
end
|
21
35
|
|
22
36
|
# TODO: change to differentiate between collection and work records when adding ability to import collection metadata
|
23
37
|
def works_total
|
@@ -92,25 +106,6 @@ module Bulkrax
|
|
92
106
|
%w[.xml .xls .xsd].include?(File.extname(path)) || ::Marcel::MimeType.for(path).include?('application/xml')
|
93
107
|
end
|
94
108
|
|
95
|
-
def create_works
|
96
|
-
records.each_with_index do |record, index|
|
97
|
-
next unless record_has_source_identifier(record, index)
|
98
|
-
break if !limit.nil? && index >= limit
|
99
|
-
|
100
|
-
seen[record[source_identifier]] = true
|
101
|
-
new_entry = find_or_create_entry(entry_class, record[source_identifier], 'Bulkrax::Importer', record)
|
102
|
-
if record[:delete].present?
|
103
|
-
DeleteWorkJob.send(perform_method, new_entry, current_run)
|
104
|
-
else
|
105
|
-
ImportWorkJob.send(perform_method, new_entry.id, current_run.id)
|
106
|
-
end
|
107
|
-
increment_counters(index, work: true)
|
108
|
-
end
|
109
|
-
importer.record_status
|
110
|
-
rescue StandardError => e
|
111
|
-
set_status_info(e)
|
112
|
-
end
|
113
|
-
|
114
109
|
def total
|
115
110
|
records.size
|
116
111
|
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
class FactoryClassFinder
|
5
|
+
##
|
6
|
+
# The v6.0.0 default coercer. Responsible for converting a factory class name to a constant.
|
7
|
+
module DefaultCoercer
|
8
|
+
##
|
9
|
+
# @param name [String]
|
10
|
+
# @return [Class] when the name is a coercible constant.
|
11
|
+
# @raise [NameError] when the name is not coercible to a constant.
|
12
|
+
def self.call(name)
|
13
|
+
name.constantize
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
##
|
18
|
+
# A name coercer that favors classes that end with "Resource" but will attempt to fallback to
|
19
|
+
# those that don't.
|
20
|
+
module ValkyrieMigrationCoercer
|
21
|
+
SUFFIX = "Resource"
|
22
|
+
|
23
|
+
##
|
24
|
+
# @param name [String]
|
25
|
+
# @param suffix [String] the suffix we use for a naming convention.
|
26
|
+
#
|
27
|
+
# @return [Class] when the name is a coercible constant.
|
28
|
+
# @raise [NameError] when the name is not coercible to a constant.
|
29
|
+
def self.call(name, suffix: SUFFIX)
|
30
|
+
if name.end_with?(suffix)
|
31
|
+
name.constantize
|
32
|
+
elsif name == "FileSet"
|
33
|
+
Bulkrax.file_model_class
|
34
|
+
else
|
35
|
+
begin
|
36
|
+
"#{name}#{suffix}".constantize
|
37
|
+
rescue NameError
|
38
|
+
name.constantize
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
##
|
45
|
+
# @param entry [Bulkrax::Entry]
|
46
|
+
# @return [Class]
|
47
|
+
def self.find(entry:, coercer: Bulkrax.factory_class_name_coercer || DefaultCoercer)
|
48
|
+
new(entry: entry, coercer: coercer).find
|
49
|
+
end
|
50
|
+
|
51
|
+
def initialize(entry:, coercer:)
|
52
|
+
@entry = entry
|
53
|
+
@coercer = coercer
|
54
|
+
end
|
55
|
+
attr_reader :entry, :coercer
|
56
|
+
|
57
|
+
##
|
58
|
+
# @return [Class] when we are able to derive the class based on the {#name}.
|
59
|
+
# @return [Nil] when we encounter errors with constantizing the {#name}.
|
60
|
+
# @see #name
|
61
|
+
def find
|
62
|
+
coercer.call(name)
|
63
|
+
rescue NameError
|
64
|
+
nil
|
65
|
+
rescue
|
66
|
+
entry.default_work_type.constantize
|
67
|
+
end
|
68
|
+
|
69
|
+
##
|
70
|
+
# @api private
|
71
|
+
# @return [String]
|
72
|
+
def name
|
73
|
+
fc = if entry.parsed_metadata&.[]('model').present?
|
74
|
+
Array.wrap(entry.parsed_metadata['model']).first
|
75
|
+
elsif entry.importerexporter&.mapping&.[]('work_type').present?
|
76
|
+
# Because of delegation's nil guard, we're reaching rather far into the implementation
|
77
|
+
# details.
|
78
|
+
Array.wrap(entry.parsed_metadata['work_type']).first
|
79
|
+
else
|
80
|
+
entry.default_work_type
|
81
|
+
end
|
82
|
+
|
83
|
+
# Let's coerce this into the right shape; we're not mutating the string because it might well
|
84
|
+
# be frozen.
|
85
|
+
fc = fc.tr(' ', '_')
|
86
|
+
fc = fc.downcase if fc.match?(/[-_]/)
|
87
|
+
fc.camelcase
|
88
|
+
rescue
|
89
|
+
entry.default_work_type
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
@@ -57,7 +57,7 @@ module Bulkrax
|
|
57
57
|
obj = entry.factory.find
|
58
58
|
next if obj.is_a?(Bulkrax.file_model_class) # FileSets must be attached to a Work
|
59
59
|
|
60
|
-
if obj.is_a?(
|
60
|
+
if obj.is_a?(Bulkrax.collection_model_class)
|
61
61
|
remove_relationships_from_collection(obj)
|
62
62
|
else
|
63
63
|
remove_relationships_from_work(obj)
|
@@ -78,12 +78,14 @@ module Bulkrax
|
|
78
78
|
|
79
79
|
return if defined?(Hyrax)
|
80
80
|
|
81
|
+
# NOTE: This should not need to be migrated to the object factory.
|
81
82
|
# Remove parent collection relationships
|
82
83
|
collection.member_of_collections.each do |parent_col|
|
83
84
|
Hyrax::Collections::NestedCollectionPersistenceService
|
84
85
|
.remove_nested_relationship_for(parent: parent_col, child: collection)
|
85
86
|
end
|
86
87
|
|
88
|
+
# NOTE: This should not need to be migrated to the object factory.
|
87
89
|
# Remove child collection relationships
|
88
90
|
collection.member_collections.each do |child_col|
|
89
91
|
Hyrax::Collections::NestedCollectionPersistenceService
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Hyrax
|
4
|
+
module CustomQueries
|
5
|
+
##
|
6
|
+
# @see https://github.com/samvera/valkyrie/wiki/Queries#custom-queries
|
7
|
+
class FindBySourceIdentifier
|
8
|
+
def self.queries
|
9
|
+
[:find_by_model_and_property_value]
|
10
|
+
end
|
11
|
+
|
12
|
+
def initialize(query_service:)
|
13
|
+
@query_service = query_service
|
14
|
+
end
|
15
|
+
|
16
|
+
attr_reader :query_service
|
17
|
+
delegate :resource_factory, to: :query_service
|
18
|
+
delegate :orm_class, to: :resource_factory
|
19
|
+
|
20
|
+
##
|
21
|
+
# @param model [Class, #internal_resource]
|
22
|
+
# @param property [#to_s] the name of the property we're attempting to
|
23
|
+
# query.
|
24
|
+
# @param value [#to_s] the propety's value that we're trying to match.
|
25
|
+
#
|
26
|
+
# @return [NilClass] when no record was found
|
27
|
+
# @return [Valkyrie::Resource] when a record was found
|
28
|
+
#
|
29
|
+
# @note This is not a real estate transaction nor a Zillow lookup.
|
30
|
+
def find_by_model_and_property_value(model:, property:, value:)
|
31
|
+
sql_query = sql_for_find_by_model_and_property_value
|
32
|
+
# NOTE: Do we need to ask the model for it's internal_resource?
|
33
|
+
# TODO: no => undefined method `internal_resource' for Image:Class
|
34
|
+
query_service.run_query(sql_query, model, property, value).first
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def sql_for_find_by_model_and_property_value
|
40
|
+
# NOTE: This is querying the first element of the property, but we might
|
41
|
+
# want to check all of the elements.
|
42
|
+
<<-SQL
|
43
|
+
SELECT * FROM orm_resources
|
44
|
+
WHERE internal_resource = ? AND metadata -> ? ->> 0 = ?
|
45
|
+
LIMIT 1;
|
46
|
+
SQL
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Wings
|
4
|
+
module CustomQueries
|
5
|
+
class FindBySourceIdentifier
|
6
|
+
# Custom query override specific to Wings
|
7
|
+
|
8
|
+
def self.queries
|
9
|
+
[:find_by_model_and_property_value]
|
10
|
+
end
|
11
|
+
|
12
|
+
attr_reader :query_service
|
13
|
+
delegate :resource_factory, to: :query_service
|
14
|
+
|
15
|
+
def initialize(query_service:)
|
16
|
+
@query_service = query_service
|
17
|
+
end
|
18
|
+
|
19
|
+
def find_by_model_and_property_value(model:, property:, value:, use_valkyrie: Hyrax.config.use_valkyrie?)
|
20
|
+
# NOTE: This is using the Bulkrax::ObjectFactory (e.g. the one
|
21
|
+
# envisioned for ActiveFedora). In doing this, we avoid the situation
|
22
|
+
# where Bulkrax::ValkyrieObjectFactory calls this custom query.
|
23
|
+
af_object = Bulkrax::ObjectFactory.search_by_property(value: value, klass: model, field: property)
|
24
|
+
|
25
|
+
return if af_object.blank?
|
26
|
+
return af_object unless use_valkyrie
|
27
|
+
|
28
|
+
resource_factory.to_resource(object: af_object)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -5,7 +5,7 @@
|
|
5
5
|
Parsed Metadata:
|
6
6
|
</a>
|
7
7
|
<a role="button" data-toggle="collapse" data-target="#parsed-metadata-show" aria-expanded="true" aria-controls="parsed-metadata-show">
|
8
|
-
<div class="accordion-icon
|
8
|
+
<div class="accordion-icon fa fa-times-circle" aria-hidden="true"></div>
|
9
9
|
</a>
|
10
10
|
</div>
|
11
11
|
<div id="parsed-metadata-show" class="accordion-collapse collapse" role="tabpanel" aria-labelledby="parsed-metadata-show">
|
@@ -16,4 +16,4 @@
|
|
16
16
|
</div>
|
17
17
|
</div>
|
18
18
|
</div>
|
19
|
-
<% end %>
|
19
|
+
<% end %>
|
@@ -5,7 +5,7 @@
|
|
5
5
|
Raw Metadata:
|
6
6
|
</a>
|
7
7
|
<a role="button" data-toggle="collapse" data-target="#raw-metadata-show" aria-expanded="true" aria-controls="raw-metadata-show">
|
8
|
-
<div class="accordion-icon
|
8
|
+
<div class="accordion-icon fa fa-times-circle" aria-hidden="true"></div>
|
9
9
|
</a>
|
10
10
|
</div>
|
11
11
|
<div id="raw-metadata-show" class="accordion-collapse collapse" role="tabpanel" aria-labelledby="raw-metadata-show">
|
@@ -16,4 +16,4 @@
|
|
16
16
|
</div>
|
17
17
|
</div>
|
18
18
|
</div>
|
19
|
-
<% end %>
|
19
|
+
<% end %>
|