bulkrax 4.4.2 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/controllers/bulkrax/entries_controller.rb +5 -0
- data/app/controllers/bulkrax/exporters_controller.rb +5 -0
- data/app/controllers/bulkrax/importers_controller.rb +8 -0
- data/app/factories/bulkrax/object_factory.rb +52 -4
- data/app/jobs/bulkrax/import_file_set_job.rb +1 -0
- data/app/jobs/bulkrax/import_work_job.rb +13 -5
- data/app/matchers/bulkrax/application_matcher.rb +4 -2
- data/app/models/bulkrax/csv_entry.rb +15 -3
- data/app/models/bulkrax/entry.rb +2 -1
- data/app/models/bulkrax/oai_entry.rb +41 -7
- data/app/models/bulkrax/rdf_entry.rb +1 -1
- data/app/models/bulkrax/xml_entry.rb +54 -12
- data/app/models/concerns/bulkrax/file_factory.rb +9 -3
- data/app/models/concerns/bulkrax/import_behavior.rb +17 -10
- data/app/parsers/bulkrax/application_parser.rb +6 -0
- data/app/parsers/bulkrax/csv_parser.rb +10 -3
- data/app/parsers/bulkrax/oai_dc_parser.rb +0 -6
- data/app/parsers/bulkrax/xml_parser.rb +6 -0
- data/app/views/hyrax/dashboard/sidebar/_bulkrax_sidebar_additions.html.erb +3 -1
- data/app/views/hyrax/dashboard/sidebar/_repository_content.html.erb +24 -21
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +47 -0
- data/lib/generators/bulkrax/install_generator.rb +20 -0
- data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +3 -1
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 49a1f0ccc806cf73a7872a634c8c2819dac62f98f0a9dc163e7ada8931d9b1fe
|
4
|
+
data.tar.gz: ecf5c8ad3e4864110665cfb5c6c5f6329e3b868414815bba163646db839400e7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: df394e4fbbc6ca0a71eb595c075c5181d24098b1ab889ff185da10991ec01c2eaf60239658564838b67df4c0a421306de923d2eab4bbeffafb31321acb9ba1ff
|
7
|
+
data.tar.gz: 35b691e96d0e59f83efadc35364283239f7f741ab4bf9d859756dacc8483e380090512cd10bf3b6ec4ce81936d00c88eb4d1ed96742c384e0f3082028ffdcf45
|
@@ -7,6 +7,7 @@ module Bulkrax
|
|
7
7
|
class EntriesController < ApplicationController
|
8
8
|
include Hyrax::ThemedLayoutController
|
9
9
|
before_action :authenticate_user!
|
10
|
+
before_action :check_permissions
|
10
11
|
with_themed_layout 'dashboard'
|
11
12
|
|
12
13
|
def show
|
@@ -40,5 +41,9 @@ module Bulkrax
|
|
40
41
|
add_breadcrumb @exporter.name, bulkrax.exporter_path(@exporter.id)
|
41
42
|
add_breadcrumb @entry.id
|
42
43
|
end
|
44
|
+
|
45
|
+
def check_permissions
|
46
|
+
raise CanCan::AccessDenied unless current_ability.can_import_works? || current_ability.can_export_works?
|
47
|
+
end
|
43
48
|
end
|
44
49
|
end
|
@@ -7,6 +7,7 @@ module Bulkrax
|
|
7
7
|
include Hyrax::ThemedLayoutController
|
8
8
|
include Bulkrax::DownloadBehavior
|
9
9
|
before_action :authenticate_user!
|
10
|
+
before_action :check_permissions
|
10
11
|
before_action :set_exporter, only: [:show, :edit, :update, :destroy]
|
11
12
|
with_themed_layout 'dashboard'
|
12
13
|
|
@@ -131,5 +132,9 @@ module Bulkrax
|
|
131
132
|
def file_path
|
132
133
|
"#{@exporter.exporter_export_zip_path}/#{params['exporter']['exporter_export_zip_files']}"
|
133
134
|
end
|
135
|
+
|
136
|
+
def check_permissions
|
137
|
+
raise CanCan::AccessDenied unless current_ability.can_export_works?
|
138
|
+
end
|
134
139
|
end
|
135
140
|
end
|
@@ -14,6 +14,7 @@ module Bulkrax
|
|
14
14
|
protect_from_forgery unless: -> { api_request? }
|
15
15
|
before_action :token_authenticate!, if: -> { api_request? }, only: [:create, :update, :delete]
|
16
16
|
before_action :authenticate_user!, unless: -> { api_request? }
|
17
|
+
before_action :check_permissions
|
17
18
|
before_action :set_importer, only: [:show, :edit, :update, :destroy]
|
18
19
|
with_themed_layout 'dashboard'
|
19
20
|
|
@@ -76,6 +77,9 @@ module Bulkrax
|
|
76
77
|
@importer = Importer.new(importer_params)
|
77
78
|
field_mapping_params
|
78
79
|
@importer.validate_only = true if params[:commit] == 'Create and Validate'
|
80
|
+
# the following line is needed to handle updating remote files of a FileSet
|
81
|
+
# on a new import otherwise it only gets updated during the update path
|
82
|
+
@importer.parser_fields['update_files'] = true if params[:commit] == 'Create and Import'
|
79
83
|
if @importer.save
|
80
84
|
files_for_import(file, cloud_files)
|
81
85
|
if params[:commit] == 'Create and Import'
|
@@ -316,6 +320,10 @@ module Bulkrax
|
|
316
320
|
end
|
317
321
|
@importer.save
|
318
322
|
end
|
323
|
+
|
324
|
+
def check_permissions
|
325
|
+
raise CanCan::AccessDenied unless current_ability.can_import_works?
|
326
|
+
end
|
319
327
|
end
|
320
328
|
# rubocop:enable Metrics/ClassLength
|
321
329
|
end
|
@@ -6,6 +6,27 @@ module Bulkrax
|
|
6
6
|
include Bulkrax::FileFactory
|
7
7
|
include DynamicRecordLookup
|
8
8
|
|
9
|
+
# @api private
|
10
|
+
#
|
11
|
+
# These are the attributes that we assume all "work type" classes (e.g. the given :klass) will
|
12
|
+
# have in addition to their specific attributes.
|
13
|
+
#
|
14
|
+
# @return [Array<Symbol>]
|
15
|
+
# @see #permitted_attributes
|
16
|
+
class_attribute :base_permitted_attributes,
|
17
|
+
default: %i[id edit_users edit_groups read_groups visibility work_members_attributes admin_set_id]
|
18
|
+
|
19
|
+
# @return [Boolean]
|
20
|
+
#
|
21
|
+
# @example
|
22
|
+
# Bulkrax::ObjectFactory.transformation_removes_blank_hash_values = true
|
23
|
+
#
|
24
|
+
# @see #transform_attributes
|
25
|
+
# @see https://github.com/samvera-labs/bulkrax/pull/708 For discussion concerning this feature
|
26
|
+
# @see https://github.com/samvera-labs/bulkrax/wiki/Interacting-with-Metadata For documentation
|
27
|
+
# concerning default behavior.
|
28
|
+
class_attribute :transformation_removes_blank_hash_values, default: false
|
29
|
+
|
9
30
|
define_model_callbacks :save, :create
|
10
31
|
attr_reader :attributes, :object, :source_identifier_value, :klass, :replace_files, :update_files, :work_identifier, :related_parents_parsed_mapping, :importer_run_id
|
11
32
|
|
@@ -58,7 +79,7 @@ module Bulkrax
|
|
58
79
|
elsif klass == FileSet
|
59
80
|
update_file_set(attrs)
|
60
81
|
else
|
61
|
-
|
82
|
+
update_work(attrs)
|
62
83
|
end
|
63
84
|
end
|
64
85
|
object.apply_depositor_metadata(@user) && object.save! if object.depositor.nil?
|
@@ -104,7 +125,7 @@ module Bulkrax
|
|
104
125
|
elsif klass == FileSet
|
105
126
|
create_file_set(attrs)
|
106
127
|
else
|
107
|
-
|
128
|
+
create_work(attrs)
|
108
129
|
end
|
109
130
|
end
|
110
131
|
end
|
@@ -139,6 +160,14 @@ module Bulkrax
|
|
139
160
|
Hyrax::CurationConcern.actor
|
140
161
|
end
|
141
162
|
|
163
|
+
def create_work(attrs)
|
164
|
+
work_actor.create(environment(attrs))
|
165
|
+
end
|
166
|
+
|
167
|
+
def update_work(attrs)
|
168
|
+
work_actor.update(environment(attrs))
|
169
|
+
end
|
170
|
+
|
142
171
|
def create_collection(attrs)
|
143
172
|
attrs = clean_attrs(attrs)
|
144
173
|
attrs = collection_type(attrs)
|
@@ -227,13 +256,32 @@ module Bulkrax
|
|
227
256
|
def transform_attributes(update: false)
|
228
257
|
@transform_attributes = attributes.slice(*permitted_attributes)
|
229
258
|
@transform_attributes.merge!(file_attributes(update_files)) if with_files
|
230
|
-
@transform_attributes
|
259
|
+
@transform_attributes = remove_blank_hash_values(@transform_attributes) if transformation_removes_blank_hash_values?
|
231
260
|
update ? @transform_attributes.except(:id) : @transform_attributes
|
232
261
|
end
|
233
262
|
|
234
263
|
# Regardless of what the Parser gives us, these are the properties we are prepared to accept.
|
235
264
|
def permitted_attributes
|
236
|
-
klass.properties.keys.map(&:to_sym) +
|
265
|
+
klass.properties.keys.map(&:to_sym) + base_permitted_attributes
|
266
|
+
end
|
267
|
+
|
268
|
+
# Return a copy of the given attributes, such that all values that are empty or an array of all
|
269
|
+
# empty values are fully emptied. (See implementation details)
|
270
|
+
#
|
271
|
+
# @param attributes [Hash]
|
272
|
+
# @return [Hash]
|
273
|
+
#
|
274
|
+
# @see https://github.com/emory-libraries/dlp-curate/issues/1973
|
275
|
+
def remove_blank_hash_values(attributes)
|
276
|
+
dupe = attributes.dup
|
277
|
+
dupe.each do |key, values|
|
278
|
+
if values.is_a?(Array) && values.all? { |value| value.is_a?(String) && value.empty? }
|
279
|
+
dupe[key] = []
|
280
|
+
elsif values.is_a?(String) && values.empty?
|
281
|
+
dupe[key] = nil
|
282
|
+
end
|
283
|
+
end
|
284
|
+
dupe
|
237
285
|
end
|
238
286
|
end
|
239
287
|
end
|
@@ -5,7 +5,7 @@ module Bulkrax
|
|
5
5
|
queue_as :import
|
6
6
|
|
7
7
|
# rubocop:disable Rails/SkipsModelValidations
|
8
|
-
def perform(entry_id, run_id, *)
|
8
|
+
def perform(entry_id, run_id, time_to_live = 3, *)
|
9
9
|
entry = Entry.find(entry_id)
|
10
10
|
importer_run = ImporterRun.find(run_id)
|
11
11
|
entry.build
|
@@ -24,13 +24,21 @@ module Bulkrax
|
|
24
24
|
entry.save!
|
25
25
|
entry.importer.current_run = importer_run
|
26
26
|
entry.importer.record_status
|
27
|
-
rescue Bulkrax::CollectionsCreatedError
|
28
|
-
|
27
|
+
rescue Bulkrax::CollectionsCreatedError => e
|
28
|
+
Rails.logger.warn("#{self.class} entry_id: #{entry_id}, run_id: #{run_id} encountered #{e.class}: #{e.message}")
|
29
|
+
# You get 3 attempts at the above perform before we have the import exception cascade into
|
30
|
+
# the Sidekiq retry ecosystem.
|
31
|
+
# rubocop:disable Style/IfUnlessModifier
|
32
|
+
if time_to_live <= 1
|
33
|
+
raise "Exhauted reschedule limit for #{self.class} entry_id: #{entry_id}, run_id: #{run_id}. Attemping retries"
|
34
|
+
end
|
35
|
+
# rubocop:enable Style/IfUnlessModifier
|
36
|
+
reschedule(entry_id, run_id, time_to_live)
|
29
37
|
end
|
30
38
|
# rubocop:enable Rails/SkipsModelValidations
|
31
39
|
|
32
|
-
def reschedule(entry_id, run_id)
|
33
|
-
ImportWorkJob.set(wait: 1.minute).perform_later(entry_id, run_id)
|
40
|
+
def reschedule(entry_id, run_id, time_to_live)
|
41
|
+
ImportWorkJob.set(wait: 1.minute).perform_later(entry_id, run_id, time_to_live - 1)
|
34
42
|
end
|
35
43
|
end
|
36
44
|
end
|
@@ -6,6 +6,10 @@ module Bulkrax
|
|
6
6
|
class ApplicationMatcher
|
7
7
|
attr_accessor :to, :from, :parsed, :if, :split, :excluded, :nested_type
|
8
8
|
|
9
|
+
# New parse methods will need to be added here; you'll also want to define a corresponding
|
10
|
+
# "parse_#{field}" method.
|
11
|
+
class_attribute :parsed_fields, instance_writer: false, default: ['remote_files', 'language', 'subject', 'types', 'model', 'resource_type', 'format_original']
|
12
|
+
|
9
13
|
def initialize(args)
|
10
14
|
args.each do |k, v|
|
11
15
|
send("#{k}=", v)
|
@@ -38,8 +42,6 @@ module Bulkrax
|
|
38
42
|
end
|
39
43
|
|
40
44
|
def process_parse
|
41
|
-
# New parse methods will need to be added here
|
42
|
-
parsed_fields = ['remote_files', 'language', 'subject', 'types', 'model', 'resource_type', 'format_original']
|
43
45
|
# This accounts for prefixed matchers
|
44
46
|
parser = parsed_fields.find { |field| to&.include? field }
|
45
47
|
|
@@ -7,7 +7,7 @@ module Bulkrax
|
|
7
7
|
# We do too much in these entry classes. We need to extract the common logic from the various
|
8
8
|
# entry models into a module that can be shared between them.
|
9
9
|
class CsvEntry < Entry # rubocop:disable Metrics/ClassLength
|
10
|
-
serialize :raw_metadata,
|
10
|
+
serialize :raw_metadata, Bulkrax::NormalizedJson
|
11
11
|
|
12
12
|
def self.fields_from_data(data)
|
13
13
|
data.headers.flatten.compact.uniq
|
@@ -36,10 +36,14 @@ module Bulkrax
|
|
36
36
|
|
37
37
|
def build_metadata
|
38
38
|
raise StandardError, 'Record not found' if record.nil?
|
39
|
-
|
39
|
+
unless importerexporter.parser.required_elements?(keys_without_numbers(record.keys))
|
40
|
+
raise StandardError,
|
41
|
+
"Missing required elements, missing element(s) are: #{importerexporter.parser.missing_elements(keys_without_numbers(record.keys)).join(', ')}"
|
42
|
+
end
|
40
43
|
|
41
44
|
self.parsed_metadata = {}
|
42
45
|
add_identifier
|
46
|
+
establish_factory_class
|
43
47
|
add_ingested_metadata
|
44
48
|
# TODO(alishaevn): remove the collections stuff entirely and only reference collections via the new parents code
|
45
49
|
add_collections
|
@@ -56,6 +60,12 @@ module Bulkrax
|
|
56
60
|
self.parsed_metadata[work_identifier] = [record[source_identifier]]
|
57
61
|
end
|
58
62
|
|
63
|
+
def establish_factory_class
|
64
|
+
parser.model_field_mappings.each do |key|
|
65
|
+
add_metadata('model', record[key]) if record.key?(key)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
59
69
|
def add_metadata_for_model
|
60
70
|
if factory_class == Collection
|
61
71
|
add_collection_type_gid
|
@@ -107,7 +117,9 @@ module Bulkrax
|
|
107
117
|
# Metadata required by Bulkrax for round-tripping
|
108
118
|
def build_system_metadata
|
109
119
|
self.parsed_metadata['id'] = hyrax_record.id
|
110
|
-
|
120
|
+
source_id = hyrax_record.send(work_identifier)
|
121
|
+
source_id = source_id.to_a.first if source_id.is_a?(ActiveTriples::Relation)
|
122
|
+
self.parsed_metadata[source_identifier] = source_id
|
111
123
|
self.parsed_metadata[key_for_export('model')] = hyrax_record.has_model.first
|
112
124
|
end
|
113
125
|
|
data/app/models/bulkrax/entry.rb
CHANGED
@@ -4,6 +4,7 @@ module Bulkrax
|
|
4
4
|
# Custom error class for collections_created?
|
5
5
|
class CollectionsCreatedError < RuntimeError; end
|
6
6
|
class OAIError < RuntimeError; end
|
7
|
+
|
7
8
|
class Entry < ApplicationRecord
|
8
9
|
include Bulkrax::HasMatchers
|
9
10
|
include Bulkrax::ImportBehavior
|
@@ -15,7 +16,7 @@ module Bulkrax
|
|
15
16
|
alias importer importerexporter
|
16
17
|
alias exporter importerexporter
|
17
18
|
|
18
|
-
serialize :parsed_metadata,
|
19
|
+
serialize :parsed_metadata, Bulkrax::NormalizedJson
|
19
20
|
# Do not serialize raw_metadata as so we can support xml or other formats
|
20
21
|
serialize :collection_ids, Array
|
21
22
|
|
@@ -5,7 +5,7 @@ require 'ostruct'
|
|
5
5
|
|
6
6
|
module Bulkrax
|
7
7
|
class OaiEntry < Entry
|
8
|
-
serialize :raw_metadata,
|
8
|
+
serialize :raw_metadata, Bulkrax::NormalizedJson
|
9
9
|
|
10
10
|
delegate :record, to: :raw_record
|
11
11
|
|
@@ -28,13 +28,16 @@ module Bulkrax
|
|
28
28
|
def build_metadata
|
29
29
|
self.parsed_metadata = {}
|
30
30
|
self.parsed_metadata[work_identifier] = [record.header.identifier]
|
31
|
+
self.raw_metadata = { xml: record.metadata.to_s }
|
31
32
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
33
|
+
# We need to establish the #factory_class before we proceed with the metadata. See
|
34
|
+
# https://github.com/samvera-labs/bulkrax/issues/702 for further details.
|
35
|
+
#
|
36
|
+
# tl;dr - if we don't have the right factory_class we might skip properties that are
|
37
|
+
# specifically assigned to the factory class
|
38
|
+
establish_factory_class
|
39
|
+
add_metadata_from_record
|
40
|
+
add_thumbnail_url
|
38
41
|
|
39
42
|
add_visibility
|
40
43
|
add_rights_statement
|
@@ -53,6 +56,37 @@ module Bulkrax
|
|
53
56
|
end
|
54
57
|
end
|
55
58
|
|
59
|
+
# To ensure we capture the correct parse data, we first need to establish the factory_class.
|
60
|
+
# @see https://github.com/samvera-labs/bulkrax/issues/702
|
61
|
+
def establish_factory_class
|
62
|
+
model_field_names = parser.model_field_mappings
|
63
|
+
|
64
|
+
each_candidate_metadata_node do |node|
|
65
|
+
next unless model_field_names.include?(node.name)
|
66
|
+
add_metadata(node.name, node.content)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def add_metadata_from_record
|
71
|
+
each_candidate_metadata_node do |node|
|
72
|
+
add_metadata(node.name, node.content)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# A method that you could override to better handle the shape of the record's metadata.
|
77
|
+
# @yieldparam node [Object<#name, #content>]
|
78
|
+
def each_candidate_metadata_node
|
79
|
+
record.metadata.children.each do |child|
|
80
|
+
child.children.each do |node|
|
81
|
+
yield(node)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def add_thumbnail_url
|
87
|
+
add_metadata('thumbnail_url', thumbnail_url)
|
88
|
+
end
|
89
|
+
|
56
90
|
# Retrieve list of collections for the entry; add to collection_ids
|
57
91
|
# If OAI-PMH doesn't return setSpec in the headers for GetRecord, use parser.collection_name
|
58
92
|
# in this case, if 'All' is selected, records will not be added to a collection.
|
@@ -4,7 +4,7 @@ require 'nokogiri'
|
|
4
4
|
module Bulkrax
|
5
5
|
# Generic XML Entry
|
6
6
|
class XmlEntry < Entry
|
7
|
-
serialize :raw_metadata,
|
7
|
+
serialize :raw_metadata, Bulkrax::NormalizedJson
|
8
8
|
|
9
9
|
def self.fields_from_data(data); end
|
10
10
|
|
@@ -43,15 +43,14 @@ module Bulkrax
|
|
43
43
|
raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
|
44
44
|
self.parsed_metadata = {}
|
45
45
|
self.parsed_metadata[work_identifier] = [self.raw_metadata[source_identifier]]
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
end
|
46
|
+
|
47
|
+
# We need to establish the #factory_class before we proceed with the metadata. See
|
48
|
+
# https://github.com/samvera-labs/bulkrax/issues/702 for further details.
|
49
|
+
#
|
50
|
+
# tl;dr - if we don't have the right factory_class we might skip properties that are
|
51
|
+
# specifically assigned to the factory class
|
52
|
+
establish_factory_class
|
53
|
+
add_metadata_from_record
|
55
54
|
add_visibility
|
56
55
|
add_rights_statement
|
57
56
|
add_admin_set_id
|
@@ -63,11 +62,54 @@ module Bulkrax
|
|
63
62
|
self.parsed_metadata
|
64
63
|
end
|
65
64
|
|
66
|
-
|
67
|
-
|
65
|
+
def establish_factory_class
|
66
|
+
model_field_names = parser.model_field_mappings
|
67
|
+
|
68
|
+
each_candidate_metadata_node_name_and_content(elements: parser.model_field_mappings) do |name, content|
|
69
|
+
next unless model_field_names.include?(name)
|
70
|
+
add_metadata(name, content)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def add_metadata_from_record
|
75
|
+
each_candidate_metadata_node_name_and_content do |name, content|
|
76
|
+
add_metadata(name, content)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def each_candidate_metadata_node_name_and_content(elements: field_mapping_from_values_for_xml_element_names)
|
81
|
+
elements.each do |name|
|
82
|
+
# NOTE: the XML element name's case matters
|
83
|
+
nodes = record.xpath("//*[name()='#{name}']")
|
84
|
+
next if nodes.empty?
|
85
|
+
|
86
|
+
nodes.each do |node|
|
87
|
+
node.children.each do |content|
|
88
|
+
next if content.to_s.blank?
|
89
|
+
|
90
|
+
yield(name, content.to_s)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
# Returns the explicitly declared "from" key's value of each parser's element's value. (Yes, I
|
97
|
+
# would like a proper class for the thing I just tried to describe.)
|
98
|
+
#
|
99
|
+
# @return [Array<String>]
|
100
|
+
#
|
101
|
+
# @todo Additionally, we may want to revisit the XML parser fundamental logic; namely we only
|
102
|
+
# parse nodes that are explicitly declared with in the `from`. This is a bit different
|
103
|
+
# than other parsers, in that they will make assumptions about each encountered column (in
|
104
|
+
# the case of CSV) or node (in the case of OAI). tl;dr - Here there be dragons.
|
105
|
+
def field_mapping_from_values_for_xml_element_names
|
68
106
|
Bulkrax.field_mappings[self.importerexporter.parser_klass].map do |_k, v|
|
69
107
|
v[:from]
|
70
108
|
end.flatten.compact.uniq
|
71
109
|
end
|
110
|
+
|
111
|
+
# Included for potential downstream adopters
|
112
|
+
alias xml_elements field_mapping_from_values_for_xml_element_names
|
113
|
+
deprecation_deprecate xml_elements: "Use '#{self}#field_mapping_from_values_for_xml_element_names' instead"
|
72
114
|
end
|
73
115
|
end
|
@@ -45,9 +45,15 @@ module Bulkrax
|
|
45
45
|
end
|
46
46
|
|
47
47
|
def new_remote_files
|
48
|
-
|
49
|
-
|
50
|
-
|
48
|
+
@new_remote_files ||= if object.is_a? FileSet
|
49
|
+
parsed_remote_files.select do |file|
|
50
|
+
# is the url valid?
|
51
|
+
is_valid = file[:url]&.match(URI::ABS_URI)
|
52
|
+
# does the file already exist
|
53
|
+
is_existing = object.import_url && object.import_url == file[:url]
|
54
|
+
is_valid && !is_existing
|
55
|
+
end
|
56
|
+
elsif object.present? && object.file_sets.present?
|
51
57
|
parsed_remote_files.select do |file|
|
52
58
|
# is the url valid?
|
53
59
|
is_valid = file[:url]&.match(URI::ABS_URI)
|
@@ -117,23 +117,30 @@ module Bulkrax
|
|
117
117
|
Bulkrax.qa_controlled_properties.each do |field|
|
118
118
|
next if parsed_metadata[field].blank?
|
119
119
|
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
parsed_metadata[field][i] = validated_uri_value
|
125
|
-
else
|
126
|
-
debug_msg = %(Unable to locate active authority ID "#{value}" in config/authorities/#{field.pluralize}.yml)
|
127
|
-
Rails.logger.debug(debug_msg)
|
128
|
-
error_msg = %("#{value}" is not a valid and/or active authority ID for the :#{field} field)
|
129
|
-
raise ::StandardError, error_msg
|
120
|
+
if multiple?(field)
|
121
|
+
parsed_metadata[field].each_with_index do |value, i|
|
122
|
+
next if value.blank?
|
123
|
+
parsed_metadata[field][i] = sanitize_controlled_uri_value(field, value)
|
130
124
|
end
|
125
|
+
else
|
126
|
+
parsed_metadata[field] = sanitize_controlled_uri_value(field, parsed_metadata[field])
|
131
127
|
end
|
132
128
|
end
|
133
129
|
|
134
130
|
true
|
135
131
|
end
|
136
132
|
|
133
|
+
def sanitize_controlled_uri_value(field, value)
|
134
|
+
if (validated_uri_value = validate_value(value, field))
|
135
|
+
validated_uri_value
|
136
|
+
else
|
137
|
+
debug_msg = %(Unable to locate active authority ID "#{value}" in config/authorities/#{field.pluralize}.yml)
|
138
|
+
Rails.logger.debug(debug_msg)
|
139
|
+
error_msg = %("#{value}" is not a valid and/or active authority ID for the :#{field} field)
|
140
|
+
raise ::StandardError, error_msg
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
137
144
|
# @param value [String] value to validate
|
138
145
|
# @param field [String] name of the controlled property
|
139
146
|
# @return [String, nil] validated URI value or nil
|
@@ -51,6 +51,12 @@ module Bulkrax
|
|
51
51
|
raise NotImplementedError, 'must be defined'
|
52
52
|
end
|
53
53
|
|
54
|
+
# @api public
|
55
|
+
# @abstract Subclass and override {#file_set_entry_class} to implement behavior for the parser.
|
56
|
+
def file_set_entry_class
|
57
|
+
raise NotImplementedError, 'must be defined'
|
58
|
+
end
|
59
|
+
|
54
60
|
# @api public
|
55
61
|
# @abstract Subclass and override {#records} to implement behavior for the parser.
|
56
62
|
def records(_opts = {})
|
@@ -196,10 +196,13 @@ module Bulkrax
|
|
196
196
|
@collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
197
197
|
@file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
198
198
|
when 'collection'
|
199
|
-
@work_ids = ActiveFedora::SolrService.query(
|
199
|
+
@work_ids = ActiveFedora::SolrService.query(
|
200
|
+
"member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters} AND has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')})", method: :post, rows: 2_000_000_000
|
201
|
+
).map(&:id)
|
200
202
|
# get the parent collection and child collections
|
201
203
|
@collection_ids = ActiveFedora::SolrService.query("id:#{importerexporter.export_source} #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
202
|
-
@collection_ids += ActiveFedora::SolrService.query("has_model_ssim:Collection AND member_of_collection_ids_ssim:#{importerexporter.export_source}", method: :post,
|
204
|
+
@collection_ids += ActiveFedora::SolrService.query("has_model_ssim:Collection AND member_of_collection_ids_ssim:#{importerexporter.export_source}", method: :post,
|
205
|
+
rows: 2_147_483_647).map(&:id)
|
203
206
|
find_child_file_sets(@work_ids)
|
204
207
|
when 'worktype'
|
205
208
|
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
@@ -234,7 +237,7 @@ module Bulkrax
|
|
234
237
|
instance_variable_set(instance_var, ActiveFedora::SolrService.post(
|
235
238
|
extra_filters.to_s,
|
236
239
|
fq: [
|
237
|
-
%(#{
|
240
|
+
%(#{solr_name(work_identifier)}:("#{complete_entry_identifiers.join('" OR "')}")),
|
238
241
|
"has_model_ssim:(#{models_to_search.join(' OR ')})"
|
239
242
|
],
|
240
243
|
fl: 'id',
|
@@ -243,6 +246,10 @@ module Bulkrax
|
|
243
246
|
end
|
244
247
|
end
|
245
248
|
|
249
|
+
def solr_name(base_name)
|
250
|
+
Module.const_defined?(:Solrizer) ? ::Solrizer.solr_name(base_name) : ::ActiveFedora.index_field_mapper.solr_name(base_name)
|
251
|
+
end
|
252
|
+
|
246
253
|
def create_new_entries
|
247
254
|
current_record_ids.each_with_index do |id, index|
|
248
255
|
break if limit_reached?(limit, index)
|
@@ -30,12 +30,6 @@ module Bulkrax
|
|
30
30
|
OaiSetEntry
|
31
31
|
end
|
32
32
|
|
33
|
-
def file_set_entry_class; end
|
34
|
-
|
35
|
-
def create_relationships; end
|
36
|
-
|
37
|
-
def create_file_sets; end
|
38
|
-
|
39
33
|
def records(opts = {})
|
40
34
|
opts[:metadata_prefix] ||= importerexporter.parser_fields['metadata_prefix']
|
41
35
|
opts[:set] = collection_name unless collection_name == 'all'
|
@@ -12,6 +12,12 @@ module Bulkrax
|
|
12
12
|
# @todo not yet supported
|
13
13
|
def create_collections; end
|
14
14
|
|
15
|
+
# @todo not yet supported
|
16
|
+
def file_set_entry_class; end
|
17
|
+
|
18
|
+
# @todo not yet supported
|
19
|
+
def create_file_sets; end
|
20
|
+
|
15
21
|
# TODO: change to differentiate between collection and work records when adding ability to import collection metadata
|
16
22
|
def works_total
|
17
23
|
total
|
@@ -1,7 +1,9 @@
|
|
1
|
-
<% if
|
1
|
+
<% if current_ability.can_import_works? %>
|
2
2
|
<%= menu.nav_link(bulkrax.importers_path) do %>
|
3
3
|
<span class="fa fa-cloud-upload" aria-hidden="true"></span> <span class="sidebar-action-text"><%= t('bulkrax.admin.sidebar.importers') %></span>
|
4
4
|
<% end %>
|
5
|
+
<% end %>
|
6
|
+
<% if current_ability.can_export_works? %>
|
5
7
|
<%= menu.nav_link(bulkrax.exporters_path) do %>
|
6
8
|
<span class="fa fa-cloud-download" aria-hidden="true"></span> <span class="sidebar-action-text"><%= t('bulkrax.admin.sidebar.exporters') %></span>
|
7
9
|
<% end %>
|
@@ -1,27 +1,30 @@
|
|
1
|
-
|
1
|
+
<li class="h5 nav-item"><%= t('hyrax.admin.sidebar.repository_objects') %></li>
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
3
|
+
<%= menu.nav_link(hyrax.my_collections_path,
|
4
|
+
class: "nav-link",
|
5
|
+
onclick: "dontChangeAccordion(event);",
|
6
|
+
also_active_for: hyrax.dashboard_collections_path) do %>
|
7
|
+
<span class="fa fa-folder-open" aria-hidden="true"></span> <span class="sidebar-action-text"><%= t('hyrax.admin.sidebar.collections') %></span>
|
8
|
+
<% end %>
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
10
|
+
<%= menu.nav_link(hyrax.my_works_path,
|
11
|
+
class: "nav-link",
|
12
|
+
onclick: "dontChangeAccordion(event);",
|
13
|
+
also_active_for: hyrax.dashboard_works_path) do %>
|
14
|
+
<span class="fa fa-file" aria-hidden="true"></span> <span class="sidebar-action-text"><%= t('hyrax.admin.sidebar.works') %></span>
|
15
|
+
<% end %>
|
16
16
|
|
17
|
-
|
17
|
+
<% if ::Hyrax::DashboardController&.respond_to?(:sidebar_partials) %>
|
18
18
|
<%= render 'hyrax/dashboard/sidebar/menu_partials', menu: menu, section: :repository_content %>
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
<%= menu.nav_link(bulkrax.exporters_path) do %>
|
25
|
-
<span class="fa fa-cloud-download" aria-hidden="true"></span> <span class="sidebar-action-text"><%= t('bulkrax.admin.sidebar.exporters') %></span>
|
19
|
+
<% else %>
|
20
|
+
<% if current_ability.can_import_works? %>
|
21
|
+
<%= menu.nav_link(bulkrax.importers_path) do %>
|
22
|
+
<span class="fa fa-cloud-upload" aria-hidden="true"></span> <span class="sidebar-action-text"><%= t('bulkrax.admin.sidebar.importers') %></span>
|
23
|
+
<% end %>
|
26
24
|
<% end %>
|
25
|
+
<% if current_ability.can_export_works? %>
|
26
|
+
<%= menu.nav_link(bulkrax.exporters_path) do %>
|
27
|
+
<span class="fa fa-cloud-download" aria-hidden="true"></span> <span class="sidebar-action-text"><%= t('bulkrax.admin.sidebar.exporters') %></span>
|
28
|
+
<% end %>
|
27
29
|
<% end %>
|
30
|
+
<% end %>
|
data/lib/bulkrax/version.rb
CHANGED
data/lib/bulkrax.rb
CHANGED
@@ -4,8 +4,13 @@ require "bulkrax/version"
|
|
4
4
|
require "bulkrax/engine"
|
5
5
|
require 'active_support/all'
|
6
6
|
|
7
|
+
# rubocop:disable Metrics/ModuleLength
|
7
8
|
module Bulkrax
|
8
9
|
class << self
|
10
|
+
# @todo Move from module attribute methods to a configuration class. With module attributes,
|
11
|
+
# when we make a change we are polluting the global space. This means that our tests that
|
12
|
+
# modify these config values are modifying global state. Which is not desirous, as it can
|
13
|
+
# introduce unexpected flakey tests.
|
9
14
|
mattr_accessor :api_definition,
|
10
15
|
:default_field_mapping,
|
11
16
|
:default_work_type,
|
@@ -168,4 +173,46 @@ module Bulkrax
|
|
168
173
|
def self.setup
|
169
174
|
yield self
|
170
175
|
end
|
176
|
+
|
177
|
+
# Responsible for stripping hidden characters from the given string.
|
178
|
+
#
|
179
|
+
# @param value [#to_s]
|
180
|
+
# @return [String] with hidden characters removed
|
181
|
+
#
|
182
|
+
# @see https://github.com/samvera-labs/bulkrax/issues/688
|
183
|
+
def self.normalize_string(value)
|
184
|
+
# Removing [Byte Order Mark (BOM)](https://en.wikipedia.org/wiki/Byte_order_mark)
|
185
|
+
value.to_s.delete("\xEF\xBB\xBF")
|
186
|
+
end
|
187
|
+
|
188
|
+
# This class confirms to the Active::Support.serialze interface. It's job is to ensure that we
|
189
|
+
# don't have keys with the tricksy Byte Order Mark character.
|
190
|
+
#
|
191
|
+
# @see https://api.rubyonrails.org/classes/ActiveRecord/AttributeMethods/Serialization/ClassMethods.html#method-i-serialize
|
192
|
+
class NormalizedJson
|
193
|
+
def self.normalize_keys(hash)
|
194
|
+
return hash unless hash.respond_to?(:each_pair)
|
195
|
+
returning_value = {}
|
196
|
+
hash.each_pair do |key, value|
|
197
|
+
returning_value[Bulkrax.normalize_string(key)] = value
|
198
|
+
end
|
199
|
+
returning_value
|
200
|
+
end
|
201
|
+
|
202
|
+
# When we write the serialized data to the database, we "dump" the value into that database
|
203
|
+
# column.
|
204
|
+
def self.dump(value)
|
205
|
+
JSON.dump(normalize_keys(value))
|
206
|
+
end
|
207
|
+
|
208
|
+
# When we load the serialized data from the database, we pass the database's value into "load"
|
209
|
+
# function.
|
210
|
+
#
|
211
|
+
# rubocop:disable Security/JSONLoad
|
212
|
+
def self.load(string)
|
213
|
+
normalize_keys(JSON.load(string))
|
214
|
+
end
|
215
|
+
# rubocop:enable Security/JSONLoad
|
216
|
+
end
|
171
217
|
end
|
218
|
+
# rubocop:disable Metrics/ModuleLength
|
@@ -55,6 +55,26 @@ class Bulkrax::InstallGenerator < Rails::Generators::Base
|
|
55
55
|
end
|
56
56
|
end
|
57
57
|
|
58
|
+
def add_ability
|
59
|
+
file = 'app/models/ability.rb'
|
60
|
+
file_text = File.read(file)
|
61
|
+
import_line = 'def can_import_works?'
|
62
|
+
export_line = 'def can_export_works?'
|
63
|
+
unless file_text.include?(import_line)
|
64
|
+
insert_into_file file, before: /^end/ do
|
65
|
+
" def can_import_works?\n can_create_any_work?\n end"
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# rubocop:disable Style/GuardClause
|
70
|
+
unless file_text.include?(export_line)
|
71
|
+
insert_into_file file, before: /^end/ do
|
72
|
+
" def can_export_works?\n can_create_any_work?\n end"
|
73
|
+
end
|
74
|
+
end
|
75
|
+
# rubocop:enable Style/GuardClause
|
76
|
+
end
|
77
|
+
|
58
78
|
def add_css
|
59
79
|
['css', 'scss', 'sass'].map do |ext|
|
60
80
|
file = "app/assets/stylesheets/application.#{ext}"
|
@@ -80,4 +80,6 @@ Bulkrax.setup do |config|
|
|
80
80
|
end
|
81
81
|
|
82
82
|
# Sidebar for hyrax 3+ support
|
83
|
-
|
83
|
+
if Object.const_defined?(:Hyrax) && ::Hyrax::DashboardController&.respond_to?(:sidebar_partials)
|
84
|
+
Hyrax::DashboardController.sidebar_partials[:repository_content] << "hyrax/dashboard/sidebar/bulkrax_sidebar_additions"
|
85
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bulkrax
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 5.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Kaufman
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-01-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|
@@ -106,14 +106,14 @@ dependencies:
|
|
106
106
|
requirements:
|
107
107
|
- - "~>"
|
108
108
|
- !ruby/object:Gem::Version
|
109
|
-
version: 3.
|
109
|
+
version: 3.2.4
|
110
110
|
type: :runtime
|
111
111
|
prerelease: false
|
112
112
|
version_requirements: !ruby/object:Gem::Requirement
|
113
113
|
requirements:
|
114
114
|
- - "~>"
|
115
115
|
- !ruby/object:Gem::Version
|
116
|
-
version: 3.
|
116
|
+
version: 3.2.4
|
117
117
|
- !ruby/object:Gem::Dependency
|
118
118
|
name: loofah
|
119
119
|
requirement: !ruby/object:Gem::Requirement
|
@@ -425,7 +425,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
425
425
|
- !ruby/object:Gem::Version
|
426
426
|
version: '0'
|
427
427
|
requirements: []
|
428
|
-
rubygems_version: 3.
|
428
|
+
rubygems_version: 3.0.3
|
429
429
|
signing_key:
|
430
430
|
specification_version: 4
|
431
431
|
summary: Import and export tool for Hyrax and Hyku
|