bulkrax 5.1.0 → 5.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/controllers/bulkrax/importers_controller.rb +3 -4
- data/app/helpers/bulkrax/validation_helper.rb +4 -4
- data/app/jobs/bulkrax/create_relationships_job.rb +3 -0
- data/app/jobs/bulkrax/import_work_job.rb +20 -7
- data/app/jobs/bulkrax/importer_job.rb +1 -1
- data/app/jobs/bulkrax/schedule_relationships_job.rb +2 -1
- data/app/matchers/bulkrax/application_matcher.rb +1 -0
- data/app/models/bulkrax/csv_entry.rb +93 -24
- data/app/models/bulkrax/exporter.rb +3 -12
- data/app/models/bulkrax/importer.rb +1 -1
- data/app/models/bulkrax/pending_relationship.rb +1 -1
- data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +1 -1
- data/app/models/concerns/bulkrax/export_behavior.rb +6 -4
- data/app/models/concerns/bulkrax/has_matchers.rb +1 -0
- data/app/models/concerns/bulkrax/import_behavior.rb +6 -3
- data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +9 -1
- data/app/parsers/bulkrax/application_parser.rb +14 -16
- data/app/parsers/bulkrax/bagit_parser.rb +5 -16
- data/app/parsers/bulkrax/csv_parser.rb +43 -111
- data/app/parsers/bulkrax/oai_dc_parser.rb +2 -2
- data/app/parsers/bulkrax/parser_export_record_set.rb +281 -0
- data/app/parsers/bulkrax/xml_parser.rb +9 -5
- data/app/services/bulkrax/remove_relationships_for_importer.rb +4 -2
- data/app/views/bulkrax/entries/show.html.erb +1 -1
- data/app/views/bulkrax/exporters/_form.html.erb +32 -33
- data/app/views/bulkrax/exporters/index.html.erb +2 -2
- data/app/views/bulkrax/exporters/show.html.erb +3 -3
- data/app/views/bulkrax/importers/_bagit_fields.html.erb +13 -12
- data/app/views/bulkrax/importers/_csv_fields.html.erb +13 -12
- data/app/views/bulkrax/importers/_oai_fields.html.erb +12 -10
- data/app/views/bulkrax/importers/_xml_fields.html.erb +12 -11
- data/app/views/bulkrax/importers/show.html.erb +18 -16
- data/app/views/bulkrax/shared/_collection_entries_tab.html.erb +6 -6
- data/app/views/bulkrax/shared/_file_set_entries_tab.html.erb +6 -6
- data/app/views/bulkrax/shared/_work_entries_tab.html.erb +6 -6
- data/config/locales/bulkrax.en.yml +26 -0
- data/lib/bulkrax/entry_spec_helper.rb +17 -0
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +119 -46
- data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +1 -1
- data/lib/tasks/reset.rake +1 -1
- metadata +7 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c9794c69e891f2397ac94398676f49db008e83b7a6df16e7a08f0ab393c148ca
|
4
|
+
data.tar.gz: fd2d48507add6bcbc7557f9240951d174c488e456579af65e20bb6cdc6f3c080
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3b07a4178650201c48602c9bb9df29135a5cc51bc6954355732bd6ae5fa918c5fd4c88d187e305b7ee2877ca995818d7ec3a476e92ec173c7569c0a15ca229a2
|
7
|
+
data.tar.gz: 324393aeb341d82b23e6391cbe1ec2a64d3e55a61400330f1656f46b3bb22cd0c6e7335cfeaf1b3b12e6ca5faefea5405b96e1100b1df138dd4c5a87fd522d76
|
@@ -35,11 +35,10 @@ module Bulkrax
|
|
35
35
|
elsif defined?(::Hyrax)
|
36
36
|
add_importer_breadcrumbs
|
37
37
|
add_breadcrumb @importer.name
|
38
|
-
|
39
|
-
@work_entries = @importer.entries.where(type: @importer.parser.entry_class.to_s).page(params[:work_entries_page]).per(30)
|
40
|
-
@collection_entries = @importer.entries.where(type: @importer.parser.collection_entry_class.to_s).page(params[:collections_entries_page]).per(30)
|
41
|
-
@file_set_entries = @importer.entries.where(type: @importer.parser.file_set_entry_class.to_s).page(params[:file_set_entries_page]).per(30)
|
42
38
|
end
|
39
|
+
@work_entries = @importer.entries.where(type: @importer.parser.entry_class.to_s).page(params[:work_entries_page]).per(30)
|
40
|
+
@collection_entries = @importer.entries.where(type: @importer.parser.collection_entry_class.to_s).page(params[:collections_entries_page]).per(30)
|
41
|
+
@file_set_entries = @importer.entries.where(type: @importer.parser.file_set_entry_class.to_s).page(params[:file_set_entries_page]).per(30)
|
43
42
|
end
|
44
43
|
|
45
44
|
# GET /importers/new
|
@@ -5,10 +5,8 @@ module Bulkrax
|
|
5
5
|
def valid_create_params?
|
6
6
|
check_admin_set
|
7
7
|
check_user
|
8
|
-
return true if valid_importer? &&
|
9
|
-
|
10
|
-
valid_name? &&
|
11
|
-
valid_parser_klass? &&
|
8
|
+
return true if valid_importer? && valid_commit? &&
|
9
|
+
valid_name? && valid_parser_klass? &&
|
12
10
|
valid_parser_fields?
|
13
11
|
end
|
14
12
|
|
@@ -19,6 +17,8 @@ module Bulkrax
|
|
19
17
|
end
|
20
18
|
|
21
19
|
def check_admin_set
|
20
|
+
return unless defined?(::Hyrax)
|
21
|
+
|
22
22
|
if params[:importer][:admin_set_id].blank?
|
23
23
|
params[:importer][:admin_set_id] = AdminSet::DEFAULT_ID
|
24
24
|
else
|
@@ -16,6 +16,8 @@ module Bulkrax
|
|
16
16
|
# to either an instance of a Work or an instance of a Collection.
|
17
17
|
# NOTE: In the context of this job, "identifier" is used to generically refer
|
18
18
|
# to either a record's ID or an Bulkrax::Entry's source_identifier.
|
19
|
+
# Please override with your own job for custom/non-hyrax applications
|
20
|
+
# set Bulkrax config variable :relationship_job to your custom class
|
19
21
|
class CreateRelationshipsJob < ApplicationJob
|
20
22
|
##
|
21
23
|
# @api public
|
@@ -112,6 +114,7 @@ module Bulkrax
|
|
112
114
|
end
|
113
115
|
|
114
116
|
def add_to_collection(child_record, parent_record)
|
117
|
+
parent_record.try(:reindex_extent=, Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX)
|
115
118
|
child_record.member_of_collections << parent_record
|
116
119
|
child_record.save!
|
117
120
|
end
|
@@ -5,24 +5,37 @@ module Bulkrax
|
|
5
5
|
queue_as :import
|
6
6
|
|
7
7
|
# rubocop:disable Rails/SkipsModelValidations
|
8
|
+
#
|
9
|
+
# @note Yes, we are calling {ImporterRun.find} each time. these were on purpose to prevent race
|
10
|
+
# conditions on the database update. If you do not re-find (or at least reload) the object
|
11
|
+
# on each increment, the count can get messed up. Let's say there are two jobs A and B and
|
12
|
+
# a counter set to 2.
|
13
|
+
#
|
14
|
+
# - A grabs the importer_run (line 10)
|
15
|
+
# - B grabs the importer_run (line 10)
|
16
|
+
# - A Finishes the build, does the increment (now the counter is 3)
|
17
|
+
# - B Finishes the build, does the increment (now the counter is 3 again) and thus a count
|
18
|
+
# is lost.
|
19
|
+
#
|
20
|
+
# @see https://codingdeliberately.com/activerecord-increment/
|
21
|
+
# @see https://github.com/samvera-labs/bulkrax/commit/5c2c795452e13a98c9217fdac81ae2f5aea031a0#r105848236
|
8
22
|
def perform(entry_id, run_id, time_to_live = 3, *)
|
9
23
|
entry = Entry.find(entry_id)
|
10
|
-
importer_run = ImporterRun.find(run_id)
|
11
24
|
entry.build
|
12
25
|
if entry.status == "Complete"
|
13
|
-
|
14
|
-
|
26
|
+
ImporterRun.find(run_id).increment!(:processed_records)
|
27
|
+
ImporterRun.find(run_id).increment!(:processed_works)
|
15
28
|
else
|
16
29
|
# do not retry here because whatever parse error kept you from creating a work will likely
|
17
30
|
# keep preventing you from doing so.
|
18
|
-
|
19
|
-
|
31
|
+
ImporterRun.find(run_id).increment!(:failed_records)
|
32
|
+
ImporterRun.find(run_id).increment!(:failed_works)
|
20
33
|
end
|
21
34
|
# Regardless of completion or not, we want to decrement the enqueued records.
|
22
|
-
|
35
|
+
ImporterRun.find(run_id).decrement!(:enqueued_records) unless ImporterRun.find(run_id).enqueued_records <= 0
|
23
36
|
|
24
37
|
entry.save!
|
25
|
-
entry.importer.current_run =
|
38
|
+
entry.importer.current_run = ImporterRun.find(run_id)
|
26
39
|
entry.importer.record_status
|
27
40
|
rescue Bulkrax::CollectionsCreatedError => e
|
28
41
|
Rails.logger.warn("#{self.class} entry_id: #{entry_id}, run_id: #{run_id} encountered #{e.class}: #{e.message}")
|
@@ -12,7 +12,7 @@ module Bulkrax
|
|
12
12
|
import(importer, only_updates_since_last_import)
|
13
13
|
update_current_run_counters(importer)
|
14
14
|
schedule(importer) if importer.schedulable?
|
15
|
-
rescue CSV::MalformedCSVError => e
|
15
|
+
rescue ::CSV::MalformedCSVError => e
|
16
16
|
importer.set_status_info(e)
|
17
17
|
end
|
18
18
|
|
@@ -9,7 +9,8 @@ module Bulkrax
|
|
9
9
|
return reschedule(importer_id) unless pending_num.zero?
|
10
10
|
|
11
11
|
importer.last_run.parents.each do |parent_id|
|
12
|
-
|
12
|
+
Bulkrax.relationship_job_class.constantize.perform_later(parent_identifier: parent_id,
|
13
|
+
importer_run_id: importer.last_run.id)
|
13
14
|
end
|
14
15
|
end
|
15
16
|
|
@@ -102,6 +102,7 @@ module Bulkrax
|
|
102
102
|
|
103
103
|
# Only add valid resource types
|
104
104
|
def parse_resource_type(src)
|
105
|
+
ActiveSupport::Deprecation.warn('#parse_resource_type will be removed in Bulkrax v6.0.0')
|
105
106
|
Hyrax::ResourceTypesService.label(src.to_s.strip.titleize)
|
106
107
|
rescue KeyError
|
107
108
|
nil
|
@@ -13,15 +13,49 @@ module Bulkrax
|
|
13
13
|
data.headers.flatten.compact.uniq
|
14
14
|
end
|
15
15
|
|
16
|
+
class_attribute(:csv_read_data_options, default: {})
|
17
|
+
|
16
18
|
# there's a risk that this reads the whole file into memory and could cause a memory leak
|
17
19
|
def self.read_data(path)
|
18
20
|
raise StandardError, 'CSV path empty' if path.blank?
|
19
|
-
|
21
|
+
options = {
|
20
22
|
headers: true,
|
21
23
|
header_converters: ->(h) { h.to_sym },
|
22
|
-
encoding: 'utf-8'
|
24
|
+
encoding: 'utf-8'
|
25
|
+
}.merge(csv_read_data_options)
|
26
|
+
|
27
|
+
results = CSV.read(path, **options)
|
28
|
+
csv_wrapper_class.new(results)
|
23
29
|
end
|
24
30
|
|
31
|
+
# The purpose of this class is to reject empty lines. This causes lots of grief in importing.
|
32
|
+
# But why not use {CSV.read}'s `skip_lines` option? Because for some CSVs, it will never finish
|
33
|
+
# reading the file.
|
34
|
+
#
|
35
|
+
# There is a spec that demonstrates this approach works.
|
36
|
+
class CsvWrapper
|
37
|
+
include Enumerable
|
38
|
+
def initialize(original)
|
39
|
+
@original = original
|
40
|
+
end
|
41
|
+
|
42
|
+
delegate :headers, to: :@original
|
43
|
+
|
44
|
+
def each
|
45
|
+
@original.each do |row|
|
46
|
+
next if all_fields_are_empty_for(row: row)
|
47
|
+
yield(row)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
def all_fields_are_empty_for(row:)
|
54
|
+
row.to_hash.values.all?(&:blank?)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
class_attribute :csv_wrapper_class, default: CsvWrapper
|
58
|
+
|
25
59
|
def self.data_for_entry(data, _source_id, parser)
|
26
60
|
# If a multi-line CSV data is passed, grab the first row
|
27
61
|
data = data.first if data.is_a?(CSV::Table)
|
@@ -35,11 +69,7 @@ module Bulkrax
|
|
35
69
|
end
|
36
70
|
|
37
71
|
def build_metadata
|
38
|
-
|
39
|
-
unless importerexporter.parser.required_elements?(keys_without_numbers(record.keys))
|
40
|
-
raise StandardError,
|
41
|
-
"Missing required elements, missing element(s) are: #{importerexporter.parser.missing_elements(keys_without_numbers(record.keys)).join(', ')}"
|
42
|
-
end
|
72
|
+
validate_record
|
43
73
|
|
44
74
|
self.parsed_metadata = {}
|
45
75
|
add_identifier
|
@@ -56,6 +86,12 @@ module Bulkrax
|
|
56
86
|
self.parsed_metadata
|
57
87
|
end
|
58
88
|
|
89
|
+
def validate_record
|
90
|
+
raise StandardError, 'Record not found' if record.nil?
|
91
|
+
raise StandardError, "Missing required elements, missing element(s) are: "\
|
92
|
+
"#{importerexporter.parser.missing_elements(record).join(', ')}" unless importerexporter.parser.required_elements?(record)
|
93
|
+
end
|
94
|
+
|
59
95
|
def add_identifier
|
60
96
|
self.parsed_metadata[work_identifier] = [record[source_identifier]]
|
61
97
|
end
|
@@ -67,9 +103,10 @@ module Bulkrax
|
|
67
103
|
end
|
68
104
|
|
69
105
|
def add_metadata_for_model
|
70
|
-
if factory_class == Collection
|
71
|
-
add_collection_type_gid
|
72
|
-
|
106
|
+
if defined?(::Collection) && factory_class == ::Collection
|
107
|
+
add_collection_type_gid if defined?(::Hyrax)
|
108
|
+
# add any additional collection metadata methods here
|
109
|
+
elsif factory_class == Bulkrax.file_model_class
|
73
110
|
validate_presence_of_filename!
|
74
111
|
add_path_to_file
|
75
112
|
validate_presence_of_parent!
|
@@ -106,7 +143,7 @@ module Bulkrax
|
|
106
143
|
self.parsed_metadata = {}
|
107
144
|
|
108
145
|
build_system_metadata
|
109
|
-
build_files_metadata
|
146
|
+
build_files_metadata if defined?(Collection) && !hyrax_record.is_a?(Collection)
|
110
147
|
build_relationship_metadata
|
111
148
|
build_mapping_metadata
|
112
149
|
self.save!
|
@@ -157,26 +194,48 @@ module Bulkrax
|
|
157
194
|
end
|
158
195
|
end
|
159
196
|
|
197
|
+
# The purpose of this helper module is to make easier the testing of the rather complex
|
198
|
+
# switching logic for determining the method we use for building the value.
|
199
|
+
module AttributeBuilderMethod
|
200
|
+
# @param key [Symbol]
|
201
|
+
# @param value [Hash<String, Object>]
|
202
|
+
# @param entry [Bulkrax::Entry]
|
203
|
+
#
|
204
|
+
# @return [NilClass] when we won't be processing this field
|
205
|
+
# @return [Symbol] (either :build_value or :build_object)
|
206
|
+
def self.for(key:, value:, entry:)
|
207
|
+
return if key == 'model'
|
208
|
+
return if key == 'file'
|
209
|
+
return if key == entry.related_parents_parsed_mapping
|
210
|
+
return if key == entry.related_children_parsed_mapping
|
211
|
+
return if value['excluded'] || value[:excluded]
|
212
|
+
return if Bulkrax.reserved_properties.include?(key) && !entry.field_supported?(key)
|
213
|
+
|
214
|
+
object_key = key if value.key?('object') || value.key?(:object)
|
215
|
+
return unless entry.hyrax_record.respond_to?(key.to_s) || object_key.present?
|
216
|
+
|
217
|
+
models_to_skip = Array.wrap(value['skip_object_for_model_names'] || value[:skip_object_for_model_names] || [])
|
218
|
+
|
219
|
+
return :build_value if models_to_skip.detect { |model| entry.factory_class.model_name.name == model }
|
220
|
+
return :build_object if object_key.present?
|
221
|
+
|
222
|
+
:build_value
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
160
226
|
def build_mapping_metadata
|
161
227
|
mapping = fetch_field_mapping
|
162
228
|
mapping.each do |key, value|
|
163
|
-
|
164
|
-
next
|
165
|
-
next if value['excluded']
|
166
|
-
next if Bulkrax.reserved_properties.include?(key) && !field_supported?(key)
|
167
|
-
|
168
|
-
object_key = key if value.key?('object')
|
169
|
-
next unless hyrax_record.respond_to?(key.to_s) || object_key.present?
|
229
|
+
method_name = AttributeBuilderMethod.for(key: key, value: value, entry: self)
|
230
|
+
next unless method_name
|
170
231
|
|
171
|
-
|
172
|
-
build_object(value)
|
173
|
-
else
|
174
|
-
build_value(key, value)
|
175
|
-
end
|
232
|
+
send(method_name, key, value)
|
176
233
|
end
|
177
234
|
end
|
178
235
|
|
179
|
-
def build_object(value)
|
236
|
+
def build_object(_key, value)
|
237
|
+
return unless hyrax_record.respond_to?(value['object'])
|
238
|
+
|
180
239
|
data = hyrax_record.send(value['object'])
|
181
240
|
return if data.empty?
|
182
241
|
|
@@ -185,6 +244,8 @@ module Bulkrax
|
|
185
244
|
end
|
186
245
|
|
187
246
|
def build_value(key, value)
|
247
|
+
return unless hyrax_record.respond_to?(key.to_s)
|
248
|
+
|
188
249
|
data = hyrax_record.send(key.to_s)
|
189
250
|
if data.is_a?(ActiveTriples::Relation)
|
190
251
|
if value['join']
|
@@ -217,6 +278,14 @@ module Bulkrax
|
|
217
278
|
end
|
218
279
|
|
219
280
|
def object_metadata(data)
|
281
|
+
# NOTE: What is `d` in this case:
|
282
|
+
#
|
283
|
+
# "[{\"single_object_first_name\"=>\"Fake\", \"single_object_last_name\"=>\"Fakerson\", \"single_object_position\"=>\"Leader, Jester, Queen\", \"single_object_language\"=>\"english\"}]"
|
284
|
+
#
|
285
|
+
# The above is a stringified version of a Ruby string. Using eval is a very bad idea as it
|
286
|
+
# will execute the value of `d` within the full Ruby interpreter context.
|
287
|
+
#
|
288
|
+
# TODO: Would it be possible to store this as a non-string? Maybe the actual Ruby Array and Hash?
|
220
289
|
data = data.map { |d| eval(d) }.flatten # rubocop:disable Security/Eval
|
221
290
|
|
222
291
|
data.each_with_index do |obj, index|
|
@@ -18,18 +18,9 @@ module Bulkrax
|
|
18
18
|
|
19
19
|
def export
|
20
20
|
current_run && setup_export_path
|
21
|
-
|
22
|
-
when 'collection'
|
23
|
-
create_from_collection
|
24
|
-
when 'importer'
|
25
|
-
create_from_importer
|
26
|
-
when 'worktype'
|
27
|
-
create_from_worktype
|
28
|
-
when 'all'
|
29
|
-
create_from_all
|
30
|
-
end
|
21
|
+
send("create_from_#{self.export_from}")
|
31
22
|
rescue StandardError => e
|
32
|
-
|
23
|
+
set_status_info(e)
|
33
24
|
end
|
34
25
|
|
35
26
|
# #export_source accessors
|
@@ -139,7 +130,7 @@ module Bulkrax
|
|
139
130
|
end
|
140
131
|
|
141
132
|
def export_properties
|
142
|
-
properties =
|
133
|
+
properties = Bulkrax.curation_concerns.map { |work| work.properties.keys }.flatten.uniq.sort
|
143
134
|
properties.reject { |prop| Bulkrax.reserved_properties.include?(prop) }
|
144
135
|
end
|
145
136
|
|
@@ -6,6 +6,6 @@ module Bulkrax
|
|
6
6
|
|
7
7
|
# Ideally we wouldn't have a column named "order", as it is a reserved SQL term. However, if we
|
8
8
|
# quote the column, all is well...for the application.
|
9
|
-
scope :ordered, -> { order("#{quoted_table_name}.#{connection.quote_column_name('order')}") }
|
9
|
+
scope :ordered, -> { order(Arel.sql("#{quoted_table_name}.#{connection.quote_column_name('order')}")) }
|
10
10
|
end
|
11
11
|
end
|
@@ -1,4 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
require 'marcel'
|
3
|
+
|
2
4
|
module Bulkrax
|
3
5
|
module ExportBehavior
|
4
6
|
extend ActiveSupport::Concern
|
@@ -10,9 +12,9 @@ module Bulkrax
|
|
10
12
|
rescue RSolr::Error::Http, CollectionsCreatedError => e
|
11
13
|
raise e
|
12
14
|
rescue StandardError => e
|
13
|
-
|
15
|
+
set_status_info(e)
|
14
16
|
else
|
15
|
-
|
17
|
+
set_status_info
|
16
18
|
end
|
17
19
|
|
18
20
|
def build_export_metadata
|
@@ -27,8 +29,8 @@ module Bulkrax
|
|
27
29
|
def filename(file_set)
|
28
30
|
return if file_set.original_file.blank?
|
29
31
|
fn = file_set.original_file.file_name.first
|
30
|
-
mime =
|
31
|
-
ext_mime =
|
32
|
+
mime = ::Marcel::MimeType.for(file_set.original_file.mime_type)
|
33
|
+
ext_mime = ::Marcel::MimeType.for(file_set.original_file.file_name)
|
32
34
|
if fn.include?(file_set.id) || importerexporter.metadata_only?
|
33
35
|
filename = "#{fn}.#{mime.to_sym}"
|
34
36
|
filename = fn if mime.to_s == ext_mime.to_s
|
@@ -11,16 +11,16 @@ module Bulkrax
|
|
11
11
|
unless self.importerexporter.validate_only
|
12
12
|
raise CollectionsCreatedError unless collections_created?
|
13
13
|
@item = factory.run!
|
14
|
-
add_user_to_permission_templates! if self.class.to_s.include?("Collection")
|
14
|
+
add_user_to_permission_templates! if self.class.to_s.include?("Collection") && defined?(::Hyrax)
|
15
15
|
parent_jobs if self.parsed_metadata[related_parents_parsed_mapping]&.join.present?
|
16
16
|
child_jobs if self.parsed_metadata[related_children_parsed_mapping]&.join.present?
|
17
17
|
end
|
18
18
|
rescue RSolr::Error::Http, CollectionsCreatedError => e
|
19
19
|
raise e
|
20
20
|
rescue StandardError => e
|
21
|
-
|
21
|
+
set_status_info(e)
|
22
22
|
else
|
23
|
-
|
23
|
+
set_status_info
|
24
24
|
ensure
|
25
25
|
self.save!
|
26
26
|
end
|
@@ -93,6 +93,8 @@ module Bulkrax
|
|
93
93
|
end
|
94
94
|
|
95
95
|
def add_admin_set_id
|
96
|
+
return unless defined?(::Hyrax)
|
97
|
+
|
96
98
|
self.parsed_metadata['admin_set_id'] = importerexporter.admin_set_id if self.parsed_metadata['admin_set_id'].blank?
|
97
99
|
end
|
98
100
|
|
@@ -165,6 +167,7 @@ module Bulkrax
|
|
165
167
|
# @param field [String] name of the controlled property
|
166
168
|
# @return [Boolean] provided value is a present, active authority ID for the provided field
|
167
169
|
def active_id_for_authority?(value, field)
|
170
|
+
return false unless defined?(::Hyrax)
|
168
171
|
field_service = ('Hyrax::' + "#{field}_service".camelcase).constantize
|
169
172
|
active_authority_ids = field_service.new.active_elements.map { |ae| ae['id'] }
|
170
173
|
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
require 'marcel'
|
2
3
|
|
3
4
|
module Bulkrax
|
4
5
|
module ImporterExporterBehavior
|
@@ -50,7 +51,14 @@ module Bulkrax
|
|
50
51
|
|
51
52
|
# Is this a zip file?
|
52
53
|
def zip?
|
53
|
-
parser_fields&.[]('import_file_path')
|
54
|
+
filename = parser_fields&.[]('import_file_path')
|
55
|
+
return false unless filename
|
56
|
+
return false unless File.file?(filename)
|
57
|
+
returning_value = false
|
58
|
+
File.open(filename) do |file|
|
59
|
+
returning_value = ::Marcel::MimeType.for(file).include?('application/zip')
|
60
|
+
end
|
61
|
+
returning_value
|
54
62
|
end
|
55
63
|
end
|
56
64
|
end
|
@@ -1,5 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
require 'zip'
|
3
|
+
require 'marcel'
|
3
4
|
|
4
5
|
module Bulkrax
|
5
6
|
# An abstract class that establishes the API for Bulkrax's import and export parsing.
|
@@ -10,10 +11,11 @@ module Bulkrax
|
|
10
11
|
alias importer importerexporter
|
11
12
|
alias exporter importerexporter
|
12
13
|
delegate :only_updates, :limit, :current_run, :errors, :mapping,
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
14
|
+
:seen, :increment_counters, :parser_fields, :user, :keys_without_numbers,
|
15
|
+
:key_without_numbers, :status, :set_status_info, :status_info, :status_at,
|
16
|
+
:exporter_export_path, :exporter_export_zip_path, :importer_unzip_path, :validate_only,
|
17
|
+
:zip?, :file?,
|
18
|
+
to: :importerexporter
|
17
19
|
|
18
20
|
# @todo Convert to `class_attribute :parser_fiels, default: {}`
|
19
21
|
def self.parser_fields
|
@@ -275,10 +277,16 @@ module Bulkrax
|
|
275
277
|
|
276
278
|
# @return [Array<String>]
|
277
279
|
def required_elements
|
280
|
+
matched_elements = ((importerexporter.mapping.keys || []) & (Bulkrax.required_elements || []))
|
281
|
+
unless matched_elements.count == Bulkrax.required_elements.count
|
282
|
+
missing_elements = Bulkrax.required_elements - matched_elements
|
283
|
+
error_alert = "Missing mapping for at least one required element, missing mappings are: #{missing_elements.join(', ')}"
|
284
|
+
raise StandardError, error_alert
|
285
|
+
end
|
278
286
|
if Bulkrax.fill_in_blank_source_identifiers
|
279
|
-
|
287
|
+
Bulkrax.required_elements
|
280
288
|
else
|
281
|
-
[
|
289
|
+
Bulkrax.required_elements + [source_identifier]
|
282
290
|
end
|
283
291
|
end
|
284
292
|
|
@@ -351,16 +359,6 @@ module Bulkrax
|
|
351
359
|
end
|
352
360
|
end
|
353
361
|
|
354
|
-
# Is this a file?
|
355
|
-
def file?
|
356
|
-
parser_fields&.[]('import_file_path') && File.file?(parser_fields['import_file_path'])
|
357
|
-
end
|
358
|
-
|
359
|
-
# Is this a zip file?
|
360
|
-
def zip?
|
361
|
-
parser_fields&.[]('import_file_path') && MIME::Types.type_for(parser_fields['import_file_path']).include?('application/zip')
|
362
|
-
end
|
363
|
-
|
364
362
|
# Path for the import
|
365
363
|
# @return [String]
|
366
364
|
def import_file_path
|
@@ -11,7 +11,7 @@ module Bulkrax
|
|
11
11
|
def valid_import?
|
12
12
|
return true if import_fields.present?
|
13
13
|
rescue => e
|
14
|
-
|
14
|
+
set_status_info(e)
|
15
15
|
false
|
16
16
|
end
|
17
17
|
|
@@ -51,7 +51,7 @@ module Bulkrax
|
|
51
51
|
record_data = entry_class.data_for_entry(data_row, source_identifier, self)
|
52
52
|
next record_data if importerexporter.metadata_only?
|
53
53
|
|
54
|
-
record_data[:file] = bag.bag_files.join('|') if
|
54
|
+
record_data[:file] = bag.bag_files.join('|') if Bulkrax.curation_concerns.include? record_data[:model]&.constantize
|
55
55
|
record_data
|
56
56
|
end
|
57
57
|
else
|
@@ -82,19 +82,7 @@ module Bulkrax
|
|
82
82
|
end
|
83
83
|
importer.record_status
|
84
84
|
rescue StandardError => e
|
85
|
-
|
86
|
-
end
|
87
|
-
|
88
|
-
def total
|
89
|
-
@total = importer.parser_fields['total'] || 0 if importer?
|
90
|
-
|
91
|
-
@total = if exporter?
|
92
|
-
limit.nil? || limit.zero? ? current_record_ids.count : limit
|
93
|
-
end
|
94
|
-
|
95
|
-
return @total || 0
|
96
|
-
rescue StandardError
|
97
|
-
@total = 0
|
85
|
+
set_status_info(e)
|
98
86
|
end
|
99
87
|
|
100
88
|
# export methods
|
@@ -144,7 +132,7 @@ module Bulkrax
|
|
144
132
|
bag.add_file(file_name, file.path) if bag.bag_files.select { |b| b.include?(file_name) }.blank?
|
145
133
|
rescue => e
|
146
134
|
entry.set_status_info(e)
|
147
|
-
|
135
|
+
set_status_info(e)
|
148
136
|
end
|
149
137
|
end
|
150
138
|
|
@@ -185,6 +173,7 @@ module Bulkrax
|
|
185
173
|
File.join(path, id)
|
186
174
|
end
|
187
175
|
|
176
|
+
# @todo(bjustice) - remove hyrax reference
|
188
177
|
def write_triples(folder_count, e)
|
189
178
|
sd = SolrDocument.find(e.identifier)
|
190
179
|
return if sd.nil?
|