bulkrax 5.1.0 → 5.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/app/controllers/bulkrax/importers_controller.rb +3 -4
- data/app/helpers/bulkrax/validation_helper.rb +4 -4
- data/app/jobs/bulkrax/create_relationships_job.rb +3 -0
- data/app/jobs/bulkrax/import_work_job.rb +20 -7
- data/app/jobs/bulkrax/importer_job.rb +1 -1
- data/app/jobs/bulkrax/schedule_relationships_job.rb +2 -1
- data/app/matchers/bulkrax/application_matcher.rb +1 -0
- data/app/models/bulkrax/csv_entry.rb +93 -24
- data/app/models/bulkrax/exporter.rb +3 -12
- data/app/models/bulkrax/importer.rb +1 -1
- data/app/models/bulkrax/pending_relationship.rb +1 -1
- data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +1 -1
- data/app/models/concerns/bulkrax/export_behavior.rb +6 -4
- data/app/models/concerns/bulkrax/has_matchers.rb +1 -0
- data/app/models/concerns/bulkrax/import_behavior.rb +6 -3
- data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +9 -1
- data/app/parsers/bulkrax/application_parser.rb +14 -16
- data/app/parsers/bulkrax/bagit_parser.rb +5 -16
- data/app/parsers/bulkrax/csv_parser.rb +43 -111
- data/app/parsers/bulkrax/oai_dc_parser.rb +2 -2
- data/app/parsers/bulkrax/parser_export_record_set.rb +281 -0
- data/app/parsers/bulkrax/xml_parser.rb +9 -5
- data/app/services/bulkrax/remove_relationships_for_importer.rb +4 -2
- data/app/views/bulkrax/entries/show.html.erb +1 -1
- data/app/views/bulkrax/exporters/_form.html.erb +32 -33
- data/app/views/bulkrax/exporters/index.html.erb +2 -2
- data/app/views/bulkrax/exporters/show.html.erb +3 -3
- data/app/views/bulkrax/importers/_bagit_fields.html.erb +13 -12
- data/app/views/bulkrax/importers/_csv_fields.html.erb +13 -12
- data/app/views/bulkrax/importers/_oai_fields.html.erb +12 -10
- data/app/views/bulkrax/importers/_xml_fields.html.erb +12 -11
- data/app/views/bulkrax/importers/show.html.erb +18 -16
- data/app/views/bulkrax/shared/_collection_entries_tab.html.erb +6 -6
- data/app/views/bulkrax/shared/_file_set_entries_tab.html.erb +6 -6
- data/app/views/bulkrax/shared/_work_entries_tab.html.erb +6 -6
- data/config/locales/bulkrax.en.yml +26 -0
- data/lib/bulkrax/entry_spec_helper.rb +17 -0
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +119 -46
- data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +1 -1
- data/lib/tasks/reset.rake +1 -1
- metadata +7 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c9794c69e891f2397ac94398676f49db008e83b7a6df16e7a08f0ab393c148ca
|
4
|
+
data.tar.gz: fd2d48507add6bcbc7557f9240951d174c488e456579af65e20bb6cdc6f3c080
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3b07a4178650201c48602c9bb9df29135a5cc51bc6954355732bd6ae5fa918c5fd4c88d187e305b7ee2877ca995818d7ec3a476e92ec173c7569c0a15ca229a2
|
7
|
+
data.tar.gz: 324393aeb341d82b23e6391cbe1ec2a64d3e55a61400330f1656f46b3bb22cd0c6e7335cfeaf1b3b12e6ca5faefea5405b96e1100b1df138dd4c5a87fd522d76
|
@@ -35,11 +35,10 @@ module Bulkrax
|
|
35
35
|
elsif defined?(::Hyrax)
|
36
36
|
add_importer_breadcrumbs
|
37
37
|
add_breadcrumb @importer.name
|
38
|
-
|
39
|
-
@work_entries = @importer.entries.where(type: @importer.parser.entry_class.to_s).page(params[:work_entries_page]).per(30)
|
40
|
-
@collection_entries = @importer.entries.where(type: @importer.parser.collection_entry_class.to_s).page(params[:collections_entries_page]).per(30)
|
41
|
-
@file_set_entries = @importer.entries.where(type: @importer.parser.file_set_entry_class.to_s).page(params[:file_set_entries_page]).per(30)
|
42
38
|
end
|
39
|
+
@work_entries = @importer.entries.where(type: @importer.parser.entry_class.to_s).page(params[:work_entries_page]).per(30)
|
40
|
+
@collection_entries = @importer.entries.where(type: @importer.parser.collection_entry_class.to_s).page(params[:collections_entries_page]).per(30)
|
41
|
+
@file_set_entries = @importer.entries.where(type: @importer.parser.file_set_entry_class.to_s).page(params[:file_set_entries_page]).per(30)
|
43
42
|
end
|
44
43
|
|
45
44
|
# GET /importers/new
|
@@ -5,10 +5,8 @@ module Bulkrax
|
|
5
5
|
def valid_create_params?
|
6
6
|
check_admin_set
|
7
7
|
check_user
|
8
|
-
return true if valid_importer? &&
|
9
|
-
|
10
|
-
valid_name? &&
|
11
|
-
valid_parser_klass? &&
|
8
|
+
return true if valid_importer? && valid_commit? &&
|
9
|
+
valid_name? && valid_parser_klass? &&
|
12
10
|
valid_parser_fields?
|
13
11
|
end
|
14
12
|
|
@@ -19,6 +17,8 @@ module Bulkrax
|
|
19
17
|
end
|
20
18
|
|
21
19
|
def check_admin_set
|
20
|
+
return unless defined?(::Hyrax)
|
21
|
+
|
22
22
|
if params[:importer][:admin_set_id].blank?
|
23
23
|
params[:importer][:admin_set_id] = AdminSet::DEFAULT_ID
|
24
24
|
else
|
@@ -16,6 +16,8 @@ module Bulkrax
|
|
16
16
|
# to either an instance of a Work or an instance of a Collection.
|
17
17
|
# NOTE: In the context of this job, "identifier" is used to generically refer
|
18
18
|
# to either a record's ID or an Bulkrax::Entry's source_identifier.
|
19
|
+
# Please override with your own job for custom/non-hyrax applications
|
20
|
+
# set Bulkrax config variable :relationship_job to your custom class
|
19
21
|
class CreateRelationshipsJob < ApplicationJob
|
20
22
|
##
|
21
23
|
# @api public
|
@@ -112,6 +114,7 @@ module Bulkrax
|
|
112
114
|
end
|
113
115
|
|
114
116
|
def add_to_collection(child_record, parent_record)
|
117
|
+
parent_record.try(:reindex_extent=, Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX)
|
115
118
|
child_record.member_of_collections << parent_record
|
116
119
|
child_record.save!
|
117
120
|
end
|
@@ -5,24 +5,37 @@ module Bulkrax
|
|
5
5
|
queue_as :import
|
6
6
|
|
7
7
|
# rubocop:disable Rails/SkipsModelValidations
|
8
|
+
#
|
9
|
+
# @note Yes, we are calling {ImporterRun.find} each time. these were on purpose to prevent race
|
10
|
+
# conditions on the database update. If you do not re-find (or at least reload) the object
|
11
|
+
# on each increment, the count can get messed up. Let's say there are two jobs A and B and
|
12
|
+
# a counter set to 2.
|
13
|
+
#
|
14
|
+
# - A grabs the importer_run (line 10)
|
15
|
+
# - B grabs the importer_run (line 10)
|
16
|
+
# - A Finishes the build, does the increment (now the counter is 3)
|
17
|
+
# - B Finishes the build, does the increment (now the counter is 3 again) and thus a count
|
18
|
+
# is lost.
|
19
|
+
#
|
20
|
+
# @see https://codingdeliberately.com/activerecord-increment/
|
21
|
+
# @see https://github.com/samvera-labs/bulkrax/commit/5c2c795452e13a98c9217fdac81ae2f5aea031a0#r105848236
|
8
22
|
def perform(entry_id, run_id, time_to_live = 3, *)
|
9
23
|
entry = Entry.find(entry_id)
|
10
|
-
importer_run = ImporterRun.find(run_id)
|
11
24
|
entry.build
|
12
25
|
if entry.status == "Complete"
|
13
|
-
|
14
|
-
|
26
|
+
ImporterRun.find(run_id).increment!(:processed_records)
|
27
|
+
ImporterRun.find(run_id).increment!(:processed_works)
|
15
28
|
else
|
16
29
|
# do not retry here because whatever parse error kept you from creating a work will likely
|
17
30
|
# keep preventing you from doing so.
|
18
|
-
|
19
|
-
|
31
|
+
ImporterRun.find(run_id).increment!(:failed_records)
|
32
|
+
ImporterRun.find(run_id).increment!(:failed_works)
|
20
33
|
end
|
21
34
|
# Regardless of completion or not, we want to decrement the enqueued records.
|
22
|
-
|
35
|
+
ImporterRun.find(run_id).decrement!(:enqueued_records) unless ImporterRun.find(run_id).enqueued_records <= 0
|
23
36
|
|
24
37
|
entry.save!
|
25
|
-
entry.importer.current_run =
|
38
|
+
entry.importer.current_run = ImporterRun.find(run_id)
|
26
39
|
entry.importer.record_status
|
27
40
|
rescue Bulkrax::CollectionsCreatedError => e
|
28
41
|
Rails.logger.warn("#{self.class} entry_id: #{entry_id}, run_id: #{run_id} encountered #{e.class}: #{e.message}")
|
@@ -12,7 +12,7 @@ module Bulkrax
|
|
12
12
|
import(importer, only_updates_since_last_import)
|
13
13
|
update_current_run_counters(importer)
|
14
14
|
schedule(importer) if importer.schedulable?
|
15
|
-
rescue CSV::MalformedCSVError => e
|
15
|
+
rescue ::CSV::MalformedCSVError => e
|
16
16
|
importer.set_status_info(e)
|
17
17
|
end
|
18
18
|
|
@@ -9,7 +9,8 @@ module Bulkrax
|
|
9
9
|
return reschedule(importer_id) unless pending_num.zero?
|
10
10
|
|
11
11
|
importer.last_run.parents.each do |parent_id|
|
12
|
-
|
12
|
+
Bulkrax.relationship_job_class.constantize.perform_later(parent_identifier: parent_id,
|
13
|
+
importer_run_id: importer.last_run.id)
|
13
14
|
end
|
14
15
|
end
|
15
16
|
|
@@ -102,6 +102,7 @@ module Bulkrax
|
|
102
102
|
|
103
103
|
# Only add valid resource types
|
104
104
|
def parse_resource_type(src)
|
105
|
+
ActiveSupport::Deprecation.warn('#parse_resource_type will be removed in Bulkrax v6.0.0')
|
105
106
|
Hyrax::ResourceTypesService.label(src.to_s.strip.titleize)
|
106
107
|
rescue KeyError
|
107
108
|
nil
|
@@ -13,15 +13,49 @@ module Bulkrax
|
|
13
13
|
data.headers.flatten.compact.uniq
|
14
14
|
end
|
15
15
|
|
16
|
+
class_attribute(:csv_read_data_options, default: {})
|
17
|
+
|
16
18
|
# there's a risk that this reads the whole file into memory and could cause a memory leak
|
17
19
|
def self.read_data(path)
|
18
20
|
raise StandardError, 'CSV path empty' if path.blank?
|
19
|
-
|
21
|
+
options = {
|
20
22
|
headers: true,
|
21
23
|
header_converters: ->(h) { h.to_sym },
|
22
|
-
encoding: 'utf-8'
|
24
|
+
encoding: 'utf-8'
|
25
|
+
}.merge(csv_read_data_options)
|
26
|
+
|
27
|
+
results = CSV.read(path, **options)
|
28
|
+
csv_wrapper_class.new(results)
|
23
29
|
end
|
24
30
|
|
31
|
+
# The purpose of this class is to reject empty lines. This causes lots of grief in importing.
|
32
|
+
# But why not use {CSV.read}'s `skip_lines` option? Because for some CSVs, it will never finish
|
33
|
+
# reading the file.
|
34
|
+
#
|
35
|
+
# There is a spec that demonstrates this approach works.
|
36
|
+
class CsvWrapper
|
37
|
+
include Enumerable
|
38
|
+
def initialize(original)
|
39
|
+
@original = original
|
40
|
+
end
|
41
|
+
|
42
|
+
delegate :headers, to: :@original
|
43
|
+
|
44
|
+
def each
|
45
|
+
@original.each do |row|
|
46
|
+
next if all_fields_are_empty_for(row: row)
|
47
|
+
yield(row)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
def all_fields_are_empty_for(row:)
|
54
|
+
row.to_hash.values.all?(&:blank?)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
class_attribute :csv_wrapper_class, default: CsvWrapper
|
58
|
+
|
25
59
|
def self.data_for_entry(data, _source_id, parser)
|
26
60
|
# If a multi-line CSV data is passed, grab the first row
|
27
61
|
data = data.first if data.is_a?(CSV::Table)
|
@@ -35,11 +69,7 @@ module Bulkrax
|
|
35
69
|
end
|
36
70
|
|
37
71
|
def build_metadata
|
38
|
-
|
39
|
-
unless importerexporter.parser.required_elements?(keys_without_numbers(record.keys))
|
40
|
-
raise StandardError,
|
41
|
-
"Missing required elements, missing element(s) are: #{importerexporter.parser.missing_elements(keys_without_numbers(record.keys)).join(', ')}"
|
42
|
-
end
|
72
|
+
validate_record
|
43
73
|
|
44
74
|
self.parsed_metadata = {}
|
45
75
|
add_identifier
|
@@ -56,6 +86,12 @@ module Bulkrax
|
|
56
86
|
self.parsed_metadata
|
57
87
|
end
|
58
88
|
|
89
|
+
def validate_record
|
90
|
+
raise StandardError, 'Record not found' if record.nil?
|
91
|
+
raise StandardError, "Missing required elements, missing element(s) are: "\
|
92
|
+
"#{importerexporter.parser.missing_elements(record).join(', ')}" unless importerexporter.parser.required_elements?(record)
|
93
|
+
end
|
94
|
+
|
59
95
|
def add_identifier
|
60
96
|
self.parsed_metadata[work_identifier] = [record[source_identifier]]
|
61
97
|
end
|
@@ -67,9 +103,10 @@ module Bulkrax
|
|
67
103
|
end
|
68
104
|
|
69
105
|
def add_metadata_for_model
|
70
|
-
if factory_class == Collection
|
71
|
-
add_collection_type_gid
|
72
|
-
|
106
|
+
if defined?(::Collection) && factory_class == ::Collection
|
107
|
+
add_collection_type_gid if defined?(::Hyrax)
|
108
|
+
# add any additional collection metadata methods here
|
109
|
+
elsif factory_class == Bulkrax.file_model_class
|
73
110
|
validate_presence_of_filename!
|
74
111
|
add_path_to_file
|
75
112
|
validate_presence_of_parent!
|
@@ -106,7 +143,7 @@ module Bulkrax
|
|
106
143
|
self.parsed_metadata = {}
|
107
144
|
|
108
145
|
build_system_metadata
|
109
|
-
build_files_metadata
|
146
|
+
build_files_metadata if defined?(Collection) && !hyrax_record.is_a?(Collection)
|
110
147
|
build_relationship_metadata
|
111
148
|
build_mapping_metadata
|
112
149
|
self.save!
|
@@ -157,26 +194,48 @@ module Bulkrax
|
|
157
194
|
end
|
158
195
|
end
|
159
196
|
|
197
|
+
# The purpose of this helper module is to make easier the testing of the rather complex
|
198
|
+
# switching logic for determining the method we use for building the value.
|
199
|
+
module AttributeBuilderMethod
|
200
|
+
# @param key [Symbol]
|
201
|
+
# @param value [Hash<String, Object>]
|
202
|
+
# @param entry [Bulkrax::Entry]
|
203
|
+
#
|
204
|
+
# @return [NilClass] when we won't be processing this field
|
205
|
+
# @return [Symbol] (either :build_value or :build_object)
|
206
|
+
def self.for(key:, value:, entry:)
|
207
|
+
return if key == 'model'
|
208
|
+
return if key == 'file'
|
209
|
+
return if key == entry.related_parents_parsed_mapping
|
210
|
+
return if key == entry.related_children_parsed_mapping
|
211
|
+
return if value['excluded'] || value[:excluded]
|
212
|
+
return if Bulkrax.reserved_properties.include?(key) && !entry.field_supported?(key)
|
213
|
+
|
214
|
+
object_key = key if value.key?('object') || value.key?(:object)
|
215
|
+
return unless entry.hyrax_record.respond_to?(key.to_s) || object_key.present?
|
216
|
+
|
217
|
+
models_to_skip = Array.wrap(value['skip_object_for_model_names'] || value[:skip_object_for_model_names] || [])
|
218
|
+
|
219
|
+
return :build_value if models_to_skip.detect { |model| entry.factory_class.model_name.name == model }
|
220
|
+
return :build_object if object_key.present?
|
221
|
+
|
222
|
+
:build_value
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
160
226
|
def build_mapping_metadata
|
161
227
|
mapping = fetch_field_mapping
|
162
228
|
mapping.each do |key, value|
|
163
|
-
|
164
|
-
next
|
165
|
-
next if value['excluded']
|
166
|
-
next if Bulkrax.reserved_properties.include?(key) && !field_supported?(key)
|
167
|
-
|
168
|
-
object_key = key if value.key?('object')
|
169
|
-
next unless hyrax_record.respond_to?(key.to_s) || object_key.present?
|
229
|
+
method_name = AttributeBuilderMethod.for(key: key, value: value, entry: self)
|
230
|
+
next unless method_name
|
170
231
|
|
171
|
-
|
172
|
-
build_object(value)
|
173
|
-
else
|
174
|
-
build_value(key, value)
|
175
|
-
end
|
232
|
+
send(method_name, key, value)
|
176
233
|
end
|
177
234
|
end
|
178
235
|
|
179
|
-
def build_object(value)
|
236
|
+
def build_object(_key, value)
|
237
|
+
return unless hyrax_record.respond_to?(value['object'])
|
238
|
+
|
180
239
|
data = hyrax_record.send(value['object'])
|
181
240
|
return if data.empty?
|
182
241
|
|
@@ -185,6 +244,8 @@ module Bulkrax
|
|
185
244
|
end
|
186
245
|
|
187
246
|
def build_value(key, value)
|
247
|
+
return unless hyrax_record.respond_to?(key.to_s)
|
248
|
+
|
188
249
|
data = hyrax_record.send(key.to_s)
|
189
250
|
if data.is_a?(ActiveTriples::Relation)
|
190
251
|
if value['join']
|
@@ -217,6 +278,14 @@ module Bulkrax
|
|
217
278
|
end
|
218
279
|
|
219
280
|
def object_metadata(data)
|
281
|
+
# NOTE: What is `d` in this case:
|
282
|
+
#
|
283
|
+
# "[{\"single_object_first_name\"=>\"Fake\", \"single_object_last_name\"=>\"Fakerson\", \"single_object_position\"=>\"Leader, Jester, Queen\", \"single_object_language\"=>\"english\"}]"
|
284
|
+
#
|
285
|
+
# The above is a stringified version of a Ruby string. Using eval is a very bad idea as it
|
286
|
+
# will execute the value of `d` within the full Ruby interpreter context.
|
287
|
+
#
|
288
|
+
# TODO: Would it be possible to store this as a non-string? Maybe the actual Ruby Array and Hash?
|
220
289
|
data = data.map { |d| eval(d) }.flatten # rubocop:disable Security/Eval
|
221
290
|
|
222
291
|
data.each_with_index do |obj, index|
|
@@ -18,18 +18,9 @@ module Bulkrax
|
|
18
18
|
|
19
19
|
def export
|
20
20
|
current_run && setup_export_path
|
21
|
-
|
22
|
-
when 'collection'
|
23
|
-
create_from_collection
|
24
|
-
when 'importer'
|
25
|
-
create_from_importer
|
26
|
-
when 'worktype'
|
27
|
-
create_from_worktype
|
28
|
-
when 'all'
|
29
|
-
create_from_all
|
30
|
-
end
|
21
|
+
send("create_from_#{self.export_from}")
|
31
22
|
rescue StandardError => e
|
32
|
-
|
23
|
+
set_status_info(e)
|
33
24
|
end
|
34
25
|
|
35
26
|
# #export_source accessors
|
@@ -139,7 +130,7 @@ module Bulkrax
|
|
139
130
|
end
|
140
131
|
|
141
132
|
def export_properties
|
142
|
-
properties =
|
133
|
+
properties = Bulkrax.curation_concerns.map { |work| work.properties.keys }.flatten.uniq.sort
|
143
134
|
properties.reject { |prop| Bulkrax.reserved_properties.include?(prop) }
|
144
135
|
end
|
145
136
|
|
@@ -6,6 +6,6 @@ module Bulkrax
|
|
6
6
|
|
7
7
|
# Ideally we wouldn't have a column named "order", as it is a reserved SQL term. However, if we
|
8
8
|
# quote the column, all is well...for the application.
|
9
|
-
scope :ordered, -> { order("#{quoted_table_name}.#{connection.quote_column_name('order')}") }
|
9
|
+
scope :ordered, -> { order(Arel.sql("#{quoted_table_name}.#{connection.quote_column_name('order')}")) }
|
10
10
|
end
|
11
11
|
end
|
@@ -1,4 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
require 'marcel'
|
3
|
+
|
2
4
|
module Bulkrax
|
3
5
|
module ExportBehavior
|
4
6
|
extend ActiveSupport::Concern
|
@@ -10,9 +12,9 @@ module Bulkrax
|
|
10
12
|
rescue RSolr::Error::Http, CollectionsCreatedError => e
|
11
13
|
raise e
|
12
14
|
rescue StandardError => e
|
13
|
-
|
15
|
+
set_status_info(e)
|
14
16
|
else
|
15
|
-
|
17
|
+
set_status_info
|
16
18
|
end
|
17
19
|
|
18
20
|
def build_export_metadata
|
@@ -27,8 +29,8 @@ module Bulkrax
|
|
27
29
|
def filename(file_set)
|
28
30
|
return if file_set.original_file.blank?
|
29
31
|
fn = file_set.original_file.file_name.first
|
30
|
-
mime =
|
31
|
-
ext_mime =
|
32
|
+
mime = ::Marcel::MimeType.for(file_set.original_file.mime_type)
|
33
|
+
ext_mime = ::Marcel::MimeType.for(file_set.original_file.file_name)
|
32
34
|
if fn.include?(file_set.id) || importerexporter.metadata_only?
|
33
35
|
filename = "#{fn}.#{mime.to_sym}"
|
34
36
|
filename = fn if mime.to_s == ext_mime.to_s
|
@@ -11,16 +11,16 @@ module Bulkrax
|
|
11
11
|
unless self.importerexporter.validate_only
|
12
12
|
raise CollectionsCreatedError unless collections_created?
|
13
13
|
@item = factory.run!
|
14
|
-
add_user_to_permission_templates! if self.class.to_s.include?("Collection")
|
14
|
+
add_user_to_permission_templates! if self.class.to_s.include?("Collection") && defined?(::Hyrax)
|
15
15
|
parent_jobs if self.parsed_metadata[related_parents_parsed_mapping]&.join.present?
|
16
16
|
child_jobs if self.parsed_metadata[related_children_parsed_mapping]&.join.present?
|
17
17
|
end
|
18
18
|
rescue RSolr::Error::Http, CollectionsCreatedError => e
|
19
19
|
raise e
|
20
20
|
rescue StandardError => e
|
21
|
-
|
21
|
+
set_status_info(e)
|
22
22
|
else
|
23
|
-
|
23
|
+
set_status_info
|
24
24
|
ensure
|
25
25
|
self.save!
|
26
26
|
end
|
@@ -93,6 +93,8 @@ module Bulkrax
|
|
93
93
|
end
|
94
94
|
|
95
95
|
def add_admin_set_id
|
96
|
+
return unless defined?(::Hyrax)
|
97
|
+
|
96
98
|
self.parsed_metadata['admin_set_id'] = importerexporter.admin_set_id if self.parsed_metadata['admin_set_id'].blank?
|
97
99
|
end
|
98
100
|
|
@@ -165,6 +167,7 @@ module Bulkrax
|
|
165
167
|
# @param field [String] name of the controlled property
|
166
168
|
# @return [Boolean] provided value is a present, active authority ID for the provided field
|
167
169
|
def active_id_for_authority?(value, field)
|
170
|
+
return false unless defined?(::Hyrax)
|
168
171
|
field_service = ('Hyrax::' + "#{field}_service".camelcase).constantize
|
169
172
|
active_authority_ids = field_service.new.active_elements.map { |ae| ae['id'] }
|
170
173
|
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
require 'marcel'
|
2
3
|
|
3
4
|
module Bulkrax
|
4
5
|
module ImporterExporterBehavior
|
@@ -50,7 +51,14 @@ module Bulkrax
|
|
50
51
|
|
51
52
|
# Is this a zip file?
|
52
53
|
def zip?
|
53
|
-
parser_fields&.[]('import_file_path')
|
54
|
+
filename = parser_fields&.[]('import_file_path')
|
55
|
+
return false unless filename
|
56
|
+
return false unless File.file?(filename)
|
57
|
+
returning_value = false
|
58
|
+
File.open(filename) do |file|
|
59
|
+
returning_value = ::Marcel::MimeType.for(file).include?('application/zip')
|
60
|
+
end
|
61
|
+
returning_value
|
54
62
|
end
|
55
63
|
end
|
56
64
|
end
|
@@ -1,5 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
require 'zip'
|
3
|
+
require 'marcel'
|
3
4
|
|
4
5
|
module Bulkrax
|
5
6
|
# An abstract class that establishes the API for Bulkrax's import and export parsing.
|
@@ -10,10 +11,11 @@ module Bulkrax
|
|
10
11
|
alias importer importerexporter
|
11
12
|
alias exporter importerexporter
|
12
13
|
delegate :only_updates, :limit, :current_run, :errors, :mapping,
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
14
|
+
:seen, :increment_counters, :parser_fields, :user, :keys_without_numbers,
|
15
|
+
:key_without_numbers, :status, :set_status_info, :status_info, :status_at,
|
16
|
+
:exporter_export_path, :exporter_export_zip_path, :importer_unzip_path, :validate_only,
|
17
|
+
:zip?, :file?,
|
18
|
+
to: :importerexporter
|
17
19
|
|
18
20
|
# @todo Convert to `class_attribute :parser_fiels, default: {}`
|
19
21
|
def self.parser_fields
|
@@ -275,10 +277,16 @@ module Bulkrax
|
|
275
277
|
|
276
278
|
# @return [Array<String>]
|
277
279
|
def required_elements
|
280
|
+
matched_elements = ((importerexporter.mapping.keys || []) & (Bulkrax.required_elements || []))
|
281
|
+
unless matched_elements.count == Bulkrax.required_elements.count
|
282
|
+
missing_elements = Bulkrax.required_elements - matched_elements
|
283
|
+
error_alert = "Missing mapping for at least one required element, missing mappings are: #{missing_elements.join(', ')}"
|
284
|
+
raise StandardError, error_alert
|
285
|
+
end
|
278
286
|
if Bulkrax.fill_in_blank_source_identifiers
|
279
|
-
|
287
|
+
Bulkrax.required_elements
|
280
288
|
else
|
281
|
-
[
|
289
|
+
Bulkrax.required_elements + [source_identifier]
|
282
290
|
end
|
283
291
|
end
|
284
292
|
|
@@ -351,16 +359,6 @@ module Bulkrax
|
|
351
359
|
end
|
352
360
|
end
|
353
361
|
|
354
|
-
# Is this a file?
|
355
|
-
def file?
|
356
|
-
parser_fields&.[]('import_file_path') && File.file?(parser_fields['import_file_path'])
|
357
|
-
end
|
358
|
-
|
359
|
-
# Is this a zip file?
|
360
|
-
def zip?
|
361
|
-
parser_fields&.[]('import_file_path') && MIME::Types.type_for(parser_fields['import_file_path']).include?('application/zip')
|
362
|
-
end
|
363
|
-
|
364
362
|
# Path for the import
|
365
363
|
# @return [String]
|
366
364
|
def import_file_path
|
@@ -11,7 +11,7 @@ module Bulkrax
|
|
11
11
|
def valid_import?
|
12
12
|
return true if import_fields.present?
|
13
13
|
rescue => e
|
14
|
-
|
14
|
+
set_status_info(e)
|
15
15
|
false
|
16
16
|
end
|
17
17
|
|
@@ -51,7 +51,7 @@ module Bulkrax
|
|
51
51
|
record_data = entry_class.data_for_entry(data_row, source_identifier, self)
|
52
52
|
next record_data if importerexporter.metadata_only?
|
53
53
|
|
54
|
-
record_data[:file] = bag.bag_files.join('|') if
|
54
|
+
record_data[:file] = bag.bag_files.join('|') if Bulkrax.curation_concerns.include? record_data[:model]&.constantize
|
55
55
|
record_data
|
56
56
|
end
|
57
57
|
else
|
@@ -82,19 +82,7 @@ module Bulkrax
|
|
82
82
|
end
|
83
83
|
importer.record_status
|
84
84
|
rescue StandardError => e
|
85
|
-
|
86
|
-
end
|
87
|
-
|
88
|
-
def total
|
89
|
-
@total = importer.parser_fields['total'] || 0 if importer?
|
90
|
-
|
91
|
-
@total = if exporter?
|
92
|
-
limit.nil? || limit.zero? ? current_record_ids.count : limit
|
93
|
-
end
|
94
|
-
|
95
|
-
return @total || 0
|
96
|
-
rescue StandardError
|
97
|
-
@total = 0
|
85
|
+
set_status_info(e)
|
98
86
|
end
|
99
87
|
|
100
88
|
# export methods
|
@@ -144,7 +132,7 @@ module Bulkrax
|
|
144
132
|
bag.add_file(file_name, file.path) if bag.bag_files.select { |b| b.include?(file_name) }.blank?
|
145
133
|
rescue => e
|
146
134
|
entry.set_status_info(e)
|
147
|
-
|
135
|
+
set_status_info(e)
|
148
136
|
end
|
149
137
|
end
|
150
138
|
|
@@ -185,6 +173,7 @@ module Bulkrax
|
|
185
173
|
File.join(path, id)
|
186
174
|
end
|
187
175
|
|
176
|
+
# @todo(bjustice) - remove hyrax reference
|
188
177
|
def write_triples(folder_count, e)
|
189
178
|
sd = SolrDocument.find(e.identifier)
|
190
179
|
return if sd.nil?
|