bulkrax 4.4.2 → 5.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/app/controllers/bulkrax/entries_controller.rb +5 -0
- data/app/controllers/bulkrax/exporters_controller.rb +5 -0
- data/app/controllers/bulkrax/importers_controller.rb +8 -0
- data/app/factories/bulkrax/object_factory.rb +52 -4
- data/app/jobs/bulkrax/import_file_set_job.rb +1 -0
- data/app/jobs/bulkrax/import_work_job.rb +13 -5
- data/app/matchers/bulkrax/application_matcher.rb +4 -2
- data/app/models/bulkrax/csv_entry.rb +15 -3
- data/app/models/bulkrax/entry.rb +2 -1
- data/app/models/bulkrax/oai_entry.rb +41 -7
- data/app/models/bulkrax/rdf_entry.rb +1 -1
- data/app/models/bulkrax/xml_entry.rb +54 -12
- data/app/models/concerns/bulkrax/file_factory.rb +9 -3
- data/app/models/concerns/bulkrax/import_behavior.rb +17 -10
- data/app/parsers/bulkrax/application_parser.rb +6 -0
- data/app/parsers/bulkrax/csv_parser.rb +10 -3
- data/app/parsers/bulkrax/oai_dc_parser.rb +0 -6
- data/app/parsers/bulkrax/xml_parser.rb +6 -0
- data/app/views/hyrax/dashboard/sidebar/_bulkrax_sidebar_additions.html.erb +3 -1
- data/app/views/hyrax/dashboard/sidebar/_repository_content.html.erb +24 -21
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +47 -0
- data/lib/generators/bulkrax/install_generator.rb +20 -0
- data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +3 -1
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 49a1f0ccc806cf73a7872a634c8c2819dac62f98f0a9dc163e7ada8931d9b1fe
|
4
|
+
data.tar.gz: ecf5c8ad3e4864110665cfb5c6c5f6329e3b868414815bba163646db839400e7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: df394e4fbbc6ca0a71eb595c075c5181d24098b1ab889ff185da10991ec01c2eaf60239658564838b67df4c0a421306de923d2eab4bbeffafb31321acb9ba1ff
|
7
|
+
data.tar.gz: 35b691e96d0e59f83efadc35364283239f7f741ab4bf9d859756dacc8483e380090512cd10bf3b6ec4ce81936d00c88eb4d1ed96742c384e0f3082028ffdcf45
|
@@ -7,6 +7,7 @@ module Bulkrax
|
|
7
7
|
class EntriesController < ApplicationController
|
8
8
|
include Hyrax::ThemedLayoutController
|
9
9
|
before_action :authenticate_user!
|
10
|
+
before_action :check_permissions
|
10
11
|
with_themed_layout 'dashboard'
|
11
12
|
|
12
13
|
def show
|
@@ -40,5 +41,9 @@ module Bulkrax
|
|
40
41
|
add_breadcrumb @exporter.name, bulkrax.exporter_path(@exporter.id)
|
41
42
|
add_breadcrumb @entry.id
|
42
43
|
end
|
44
|
+
|
45
|
+
def check_permissions
|
46
|
+
raise CanCan::AccessDenied unless current_ability.can_import_works? || current_ability.can_export_works?
|
47
|
+
end
|
43
48
|
end
|
44
49
|
end
|
@@ -7,6 +7,7 @@ module Bulkrax
|
|
7
7
|
include Hyrax::ThemedLayoutController
|
8
8
|
include Bulkrax::DownloadBehavior
|
9
9
|
before_action :authenticate_user!
|
10
|
+
before_action :check_permissions
|
10
11
|
before_action :set_exporter, only: [:show, :edit, :update, :destroy]
|
11
12
|
with_themed_layout 'dashboard'
|
12
13
|
|
@@ -131,5 +132,9 @@ module Bulkrax
|
|
131
132
|
def file_path
|
132
133
|
"#{@exporter.exporter_export_zip_path}/#{params['exporter']['exporter_export_zip_files']}"
|
133
134
|
end
|
135
|
+
|
136
|
+
def check_permissions
|
137
|
+
raise CanCan::AccessDenied unless current_ability.can_export_works?
|
138
|
+
end
|
134
139
|
end
|
135
140
|
end
|
@@ -14,6 +14,7 @@ module Bulkrax
|
|
14
14
|
protect_from_forgery unless: -> { api_request? }
|
15
15
|
before_action :token_authenticate!, if: -> { api_request? }, only: [:create, :update, :delete]
|
16
16
|
before_action :authenticate_user!, unless: -> { api_request? }
|
17
|
+
before_action :check_permissions
|
17
18
|
before_action :set_importer, only: [:show, :edit, :update, :destroy]
|
18
19
|
with_themed_layout 'dashboard'
|
19
20
|
|
@@ -76,6 +77,9 @@ module Bulkrax
|
|
76
77
|
@importer = Importer.new(importer_params)
|
77
78
|
field_mapping_params
|
78
79
|
@importer.validate_only = true if params[:commit] == 'Create and Validate'
|
80
|
+
# the following line is needed to handle updating remote files of a FileSet
|
81
|
+
# on a new import otherwise it only gets updated during the update path
|
82
|
+
@importer.parser_fields['update_files'] = true if params[:commit] == 'Create and Import'
|
79
83
|
if @importer.save
|
80
84
|
files_for_import(file, cloud_files)
|
81
85
|
if params[:commit] == 'Create and Import'
|
@@ -316,6 +320,10 @@ module Bulkrax
|
|
316
320
|
end
|
317
321
|
@importer.save
|
318
322
|
end
|
323
|
+
|
324
|
+
def check_permissions
|
325
|
+
raise CanCan::AccessDenied unless current_ability.can_import_works?
|
326
|
+
end
|
319
327
|
end
|
320
328
|
# rubocop:enable Metrics/ClassLength
|
321
329
|
end
|
@@ -6,6 +6,27 @@ module Bulkrax
|
|
6
6
|
include Bulkrax::FileFactory
|
7
7
|
include DynamicRecordLookup
|
8
8
|
|
9
|
+
# @api private
|
10
|
+
#
|
11
|
+
# These are the attributes that we assume all "work type" classes (e.g. the given :klass) will
|
12
|
+
# have in addition to their specific attributes.
|
13
|
+
#
|
14
|
+
# @return [Array<Symbol>]
|
15
|
+
# @see #permitted_attributes
|
16
|
+
class_attribute :base_permitted_attributes,
|
17
|
+
default: %i[id edit_users edit_groups read_groups visibility work_members_attributes admin_set_id]
|
18
|
+
|
19
|
+
# @return [Boolean]
|
20
|
+
#
|
21
|
+
# @example
|
22
|
+
# Bulkrax::ObjectFactory.transformation_removes_blank_hash_values = true
|
23
|
+
#
|
24
|
+
# @see #transform_attributes
|
25
|
+
# @see https://github.com/samvera-labs/bulkrax/pull/708 For discussion concerning this feature
|
26
|
+
# @see https://github.com/samvera-labs/bulkrax/wiki/Interacting-with-Metadata For documentation
|
27
|
+
# concerning default behavior.
|
28
|
+
class_attribute :transformation_removes_blank_hash_values, default: false
|
29
|
+
|
9
30
|
define_model_callbacks :save, :create
|
10
31
|
attr_reader :attributes, :object, :source_identifier_value, :klass, :replace_files, :update_files, :work_identifier, :related_parents_parsed_mapping, :importer_run_id
|
11
32
|
|
@@ -58,7 +79,7 @@ module Bulkrax
|
|
58
79
|
elsif klass == FileSet
|
59
80
|
update_file_set(attrs)
|
60
81
|
else
|
61
|
-
|
82
|
+
update_work(attrs)
|
62
83
|
end
|
63
84
|
end
|
64
85
|
object.apply_depositor_metadata(@user) && object.save! if object.depositor.nil?
|
@@ -104,7 +125,7 @@ module Bulkrax
|
|
104
125
|
elsif klass == FileSet
|
105
126
|
create_file_set(attrs)
|
106
127
|
else
|
107
|
-
|
128
|
+
create_work(attrs)
|
108
129
|
end
|
109
130
|
end
|
110
131
|
end
|
@@ -139,6 +160,14 @@ module Bulkrax
|
|
139
160
|
Hyrax::CurationConcern.actor
|
140
161
|
end
|
141
162
|
|
163
|
+
def create_work(attrs)
|
164
|
+
work_actor.create(environment(attrs))
|
165
|
+
end
|
166
|
+
|
167
|
+
def update_work(attrs)
|
168
|
+
work_actor.update(environment(attrs))
|
169
|
+
end
|
170
|
+
|
142
171
|
def create_collection(attrs)
|
143
172
|
attrs = clean_attrs(attrs)
|
144
173
|
attrs = collection_type(attrs)
|
@@ -227,13 +256,32 @@ module Bulkrax
|
|
227
256
|
def transform_attributes(update: false)
|
228
257
|
@transform_attributes = attributes.slice(*permitted_attributes)
|
229
258
|
@transform_attributes.merge!(file_attributes(update_files)) if with_files
|
230
|
-
@transform_attributes
|
259
|
+
@transform_attributes = remove_blank_hash_values(@transform_attributes) if transformation_removes_blank_hash_values?
|
231
260
|
update ? @transform_attributes.except(:id) : @transform_attributes
|
232
261
|
end
|
233
262
|
|
234
263
|
# Regardless of what the Parser gives us, these are the properties we are prepared to accept.
|
235
264
|
def permitted_attributes
|
236
|
-
klass.properties.keys.map(&:to_sym) +
|
265
|
+
klass.properties.keys.map(&:to_sym) + base_permitted_attributes
|
266
|
+
end
|
267
|
+
|
268
|
+
# Return a copy of the given attributes, such that all values that are empty or an array of all
|
269
|
+
# empty values are fully emptied. (See implementation details)
|
270
|
+
#
|
271
|
+
# @param attributes [Hash]
|
272
|
+
# @return [Hash]
|
273
|
+
#
|
274
|
+
# @see https://github.com/emory-libraries/dlp-curate/issues/1973
|
275
|
+
def remove_blank_hash_values(attributes)
|
276
|
+
dupe = attributes.dup
|
277
|
+
dupe.each do |key, values|
|
278
|
+
if values.is_a?(Array) && values.all? { |value| value.is_a?(String) && value.empty? }
|
279
|
+
dupe[key] = []
|
280
|
+
elsif values.is_a?(String) && values.empty?
|
281
|
+
dupe[key] = nil
|
282
|
+
end
|
283
|
+
end
|
284
|
+
dupe
|
237
285
|
end
|
238
286
|
end
|
239
287
|
end
|
@@ -5,7 +5,7 @@ module Bulkrax
|
|
5
5
|
queue_as :import
|
6
6
|
|
7
7
|
# rubocop:disable Rails/SkipsModelValidations
|
8
|
-
def perform(entry_id, run_id, *)
|
8
|
+
def perform(entry_id, run_id, time_to_live = 3, *)
|
9
9
|
entry = Entry.find(entry_id)
|
10
10
|
importer_run = ImporterRun.find(run_id)
|
11
11
|
entry.build
|
@@ -24,13 +24,21 @@ module Bulkrax
|
|
24
24
|
entry.save!
|
25
25
|
entry.importer.current_run = importer_run
|
26
26
|
entry.importer.record_status
|
27
|
-
rescue Bulkrax::CollectionsCreatedError
|
28
|
-
|
27
|
+
rescue Bulkrax::CollectionsCreatedError => e
|
28
|
+
Rails.logger.warn("#{self.class} entry_id: #{entry_id}, run_id: #{run_id} encountered #{e.class}: #{e.message}")
|
29
|
+
# You get 3 attempts at the above perform before we have the import exception cascade into
|
30
|
+
# the Sidekiq retry ecosystem.
|
31
|
+
# rubocop:disable Style/IfUnlessModifier
|
32
|
+
if time_to_live <= 1
|
33
|
+
raise "Exhauted reschedule limit for #{self.class} entry_id: #{entry_id}, run_id: #{run_id}. Attemping retries"
|
34
|
+
end
|
35
|
+
# rubocop:enable Style/IfUnlessModifier
|
36
|
+
reschedule(entry_id, run_id, time_to_live)
|
29
37
|
end
|
30
38
|
# rubocop:enable Rails/SkipsModelValidations
|
31
39
|
|
32
|
-
def reschedule(entry_id, run_id)
|
33
|
-
ImportWorkJob.set(wait: 1.minute).perform_later(entry_id, run_id)
|
40
|
+
def reschedule(entry_id, run_id, time_to_live)
|
41
|
+
ImportWorkJob.set(wait: 1.minute).perform_later(entry_id, run_id, time_to_live - 1)
|
34
42
|
end
|
35
43
|
end
|
36
44
|
end
|
@@ -6,6 +6,10 @@ module Bulkrax
|
|
6
6
|
class ApplicationMatcher
|
7
7
|
attr_accessor :to, :from, :parsed, :if, :split, :excluded, :nested_type
|
8
8
|
|
9
|
+
# New parse methods will need to be added here; you'll also want to define a corresponding
|
10
|
+
# "parse_#{field}" method.
|
11
|
+
class_attribute :parsed_fields, instance_writer: false, default: ['remote_files', 'language', 'subject', 'types', 'model', 'resource_type', 'format_original']
|
12
|
+
|
9
13
|
def initialize(args)
|
10
14
|
args.each do |k, v|
|
11
15
|
send("#{k}=", v)
|
@@ -38,8 +42,6 @@ module Bulkrax
|
|
38
42
|
end
|
39
43
|
|
40
44
|
def process_parse
|
41
|
-
# New parse methods will need to be added here
|
42
|
-
parsed_fields = ['remote_files', 'language', 'subject', 'types', 'model', 'resource_type', 'format_original']
|
43
45
|
# This accounts for prefixed matchers
|
44
46
|
parser = parsed_fields.find { |field| to&.include? field }
|
45
47
|
|
@@ -7,7 +7,7 @@ module Bulkrax
|
|
7
7
|
# We do too much in these entry classes. We need to extract the common logic from the various
|
8
8
|
# entry models into a module that can be shared between them.
|
9
9
|
class CsvEntry < Entry # rubocop:disable Metrics/ClassLength
|
10
|
-
serialize :raw_metadata,
|
10
|
+
serialize :raw_metadata, Bulkrax::NormalizedJson
|
11
11
|
|
12
12
|
def self.fields_from_data(data)
|
13
13
|
data.headers.flatten.compact.uniq
|
@@ -36,10 +36,14 @@ module Bulkrax
|
|
36
36
|
|
37
37
|
def build_metadata
|
38
38
|
raise StandardError, 'Record not found' if record.nil?
|
39
|
-
|
39
|
+
unless importerexporter.parser.required_elements?(keys_without_numbers(record.keys))
|
40
|
+
raise StandardError,
|
41
|
+
"Missing required elements, missing element(s) are: #{importerexporter.parser.missing_elements(keys_without_numbers(record.keys)).join(', ')}"
|
42
|
+
end
|
40
43
|
|
41
44
|
self.parsed_metadata = {}
|
42
45
|
add_identifier
|
46
|
+
establish_factory_class
|
43
47
|
add_ingested_metadata
|
44
48
|
# TODO(alishaevn): remove the collections stuff entirely and only reference collections via the new parents code
|
45
49
|
add_collections
|
@@ -56,6 +60,12 @@ module Bulkrax
|
|
56
60
|
self.parsed_metadata[work_identifier] = [record[source_identifier]]
|
57
61
|
end
|
58
62
|
|
63
|
+
def establish_factory_class
|
64
|
+
parser.model_field_mappings.each do |key|
|
65
|
+
add_metadata('model', record[key]) if record.key?(key)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
59
69
|
def add_metadata_for_model
|
60
70
|
if factory_class == Collection
|
61
71
|
add_collection_type_gid
|
@@ -107,7 +117,9 @@ module Bulkrax
|
|
107
117
|
# Metadata required by Bulkrax for round-tripping
|
108
118
|
def build_system_metadata
|
109
119
|
self.parsed_metadata['id'] = hyrax_record.id
|
110
|
-
|
120
|
+
source_id = hyrax_record.send(work_identifier)
|
121
|
+
source_id = source_id.to_a.first if source_id.is_a?(ActiveTriples::Relation)
|
122
|
+
self.parsed_metadata[source_identifier] = source_id
|
111
123
|
self.parsed_metadata[key_for_export('model')] = hyrax_record.has_model.first
|
112
124
|
end
|
113
125
|
|
data/app/models/bulkrax/entry.rb
CHANGED
@@ -4,6 +4,7 @@ module Bulkrax
|
|
4
4
|
# Custom error class for collections_created?
|
5
5
|
class CollectionsCreatedError < RuntimeError; end
|
6
6
|
class OAIError < RuntimeError; end
|
7
|
+
|
7
8
|
class Entry < ApplicationRecord
|
8
9
|
include Bulkrax::HasMatchers
|
9
10
|
include Bulkrax::ImportBehavior
|
@@ -15,7 +16,7 @@ module Bulkrax
|
|
15
16
|
alias importer importerexporter
|
16
17
|
alias exporter importerexporter
|
17
18
|
|
18
|
-
serialize :parsed_metadata,
|
19
|
+
serialize :parsed_metadata, Bulkrax::NormalizedJson
|
19
20
|
# Do not serialize raw_metadata as so we can support xml or other formats
|
20
21
|
serialize :collection_ids, Array
|
21
22
|
|
@@ -5,7 +5,7 @@ require 'ostruct'
|
|
5
5
|
|
6
6
|
module Bulkrax
|
7
7
|
class OaiEntry < Entry
|
8
|
-
serialize :raw_metadata,
|
8
|
+
serialize :raw_metadata, Bulkrax::NormalizedJson
|
9
9
|
|
10
10
|
delegate :record, to: :raw_record
|
11
11
|
|
@@ -28,13 +28,16 @@ module Bulkrax
|
|
28
28
|
def build_metadata
|
29
29
|
self.parsed_metadata = {}
|
30
30
|
self.parsed_metadata[work_identifier] = [record.header.identifier]
|
31
|
+
self.raw_metadata = { xml: record.metadata.to_s }
|
31
32
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
33
|
+
# We need to establish the #factory_class before we proceed with the metadata. See
|
34
|
+
# https://github.com/samvera-labs/bulkrax/issues/702 for further details.
|
35
|
+
#
|
36
|
+
# tl;dr - if we don't have the right factory_class we might skip properties that are
|
37
|
+
# specifically assigned to the factory class
|
38
|
+
establish_factory_class
|
39
|
+
add_metadata_from_record
|
40
|
+
add_thumbnail_url
|
38
41
|
|
39
42
|
add_visibility
|
40
43
|
add_rights_statement
|
@@ -53,6 +56,37 @@ module Bulkrax
|
|
53
56
|
end
|
54
57
|
end
|
55
58
|
|
59
|
+
# To ensure we capture the correct parse data, we first need to establish the factory_class.
|
60
|
+
# @see https://github.com/samvera-labs/bulkrax/issues/702
|
61
|
+
def establish_factory_class
|
62
|
+
model_field_names = parser.model_field_mappings
|
63
|
+
|
64
|
+
each_candidate_metadata_node do |node|
|
65
|
+
next unless model_field_names.include?(node.name)
|
66
|
+
add_metadata(node.name, node.content)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def add_metadata_from_record
|
71
|
+
each_candidate_metadata_node do |node|
|
72
|
+
add_metadata(node.name, node.content)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# A method that you could override to better handle the shape of the record's metadata.
|
77
|
+
# @yieldparam node [Object<#name, #content>]
|
78
|
+
def each_candidate_metadata_node
|
79
|
+
record.metadata.children.each do |child|
|
80
|
+
child.children.each do |node|
|
81
|
+
yield(node)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def add_thumbnail_url
|
87
|
+
add_metadata('thumbnail_url', thumbnail_url)
|
88
|
+
end
|
89
|
+
|
56
90
|
# Retrieve list of collections for the entry; add to collection_ids
|
57
91
|
# If OAI-PMH doesn't return setSpec in the headers for GetRecord, use parser.collection_name
|
58
92
|
# in this case, if 'All' is selected, records will not be added to a collection.
|
@@ -4,7 +4,7 @@ require 'nokogiri'
|
|
4
4
|
module Bulkrax
|
5
5
|
# Generic XML Entry
|
6
6
|
class XmlEntry < Entry
|
7
|
-
serialize :raw_metadata,
|
7
|
+
serialize :raw_metadata, Bulkrax::NormalizedJson
|
8
8
|
|
9
9
|
def self.fields_from_data(data); end
|
10
10
|
|
@@ -43,15 +43,14 @@ module Bulkrax
|
|
43
43
|
raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
|
44
44
|
self.parsed_metadata = {}
|
45
45
|
self.parsed_metadata[work_identifier] = [self.raw_metadata[source_identifier]]
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
end
|
46
|
+
|
47
|
+
# We need to establish the #factory_class before we proceed with the metadata. See
|
48
|
+
# https://github.com/samvera-labs/bulkrax/issues/702 for further details.
|
49
|
+
#
|
50
|
+
# tl;dr - if we don't have the right factory_class we might skip properties that are
|
51
|
+
# specifically assigned to the factory class
|
52
|
+
establish_factory_class
|
53
|
+
add_metadata_from_record
|
55
54
|
add_visibility
|
56
55
|
add_rights_statement
|
57
56
|
add_admin_set_id
|
@@ -63,11 +62,54 @@ module Bulkrax
|
|
63
62
|
self.parsed_metadata
|
64
63
|
end
|
65
64
|
|
66
|
-
|
67
|
-
|
65
|
+
def establish_factory_class
|
66
|
+
model_field_names = parser.model_field_mappings
|
67
|
+
|
68
|
+
each_candidate_metadata_node_name_and_content(elements: parser.model_field_mappings) do |name, content|
|
69
|
+
next unless model_field_names.include?(name)
|
70
|
+
add_metadata(name, content)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def add_metadata_from_record
|
75
|
+
each_candidate_metadata_node_name_and_content do |name, content|
|
76
|
+
add_metadata(name, content)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def each_candidate_metadata_node_name_and_content(elements: field_mapping_from_values_for_xml_element_names)
|
81
|
+
elements.each do |name|
|
82
|
+
# NOTE: the XML element name's case matters
|
83
|
+
nodes = record.xpath("//*[name()='#{name}']")
|
84
|
+
next if nodes.empty?
|
85
|
+
|
86
|
+
nodes.each do |node|
|
87
|
+
node.children.each do |content|
|
88
|
+
next if content.to_s.blank?
|
89
|
+
|
90
|
+
yield(name, content.to_s)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
# Returns the explicitly declared "from" key's value of each parser's element's value. (Yes, I
|
97
|
+
# would like a proper class for the thing I just tried to describe.)
|
98
|
+
#
|
99
|
+
# @return [Array<String>]
|
100
|
+
#
|
101
|
+
# @todo Additionally, we may want to revisit the XML parser fundamental logic; namely we only
|
102
|
+
# parse nodes that are explicitly declared with in the `from`. This is a bit different
|
103
|
+
# than other parsers, in that they will make assumptions about each encountered column (in
|
104
|
+
# the case of CSV) or node (in the case of OAI). tl;dr - Here there be dragons.
|
105
|
+
def field_mapping_from_values_for_xml_element_names
|
68
106
|
Bulkrax.field_mappings[self.importerexporter.parser_klass].map do |_k, v|
|
69
107
|
v[:from]
|
70
108
|
end.flatten.compact.uniq
|
71
109
|
end
|
110
|
+
|
111
|
+
# Included for potential downstream adopters
|
112
|
+
alias xml_elements field_mapping_from_values_for_xml_element_names
|
113
|
+
deprecation_deprecate xml_elements: "Use '#{self}#field_mapping_from_values_for_xml_element_names' instead"
|
72
114
|
end
|
73
115
|
end
|
@@ -45,9 +45,15 @@ module Bulkrax
|
|
45
45
|
end
|
46
46
|
|
47
47
|
def new_remote_files
|
48
|
-
|
49
|
-
|
50
|
-
|
48
|
+
@new_remote_files ||= if object.is_a? FileSet
|
49
|
+
parsed_remote_files.select do |file|
|
50
|
+
# is the url valid?
|
51
|
+
is_valid = file[:url]&.match(URI::ABS_URI)
|
52
|
+
# does the file already exist
|
53
|
+
is_existing = object.import_url && object.import_url == file[:url]
|
54
|
+
is_valid && !is_existing
|
55
|
+
end
|
56
|
+
elsif object.present? && object.file_sets.present?
|
51
57
|
parsed_remote_files.select do |file|
|
52
58
|
# is the url valid?
|
53
59
|
is_valid = file[:url]&.match(URI::ABS_URI)
|
@@ -117,23 +117,30 @@ module Bulkrax
|
|
117
117
|
Bulkrax.qa_controlled_properties.each do |field|
|
118
118
|
next if parsed_metadata[field].blank?
|
119
119
|
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
parsed_metadata[field][i] = validated_uri_value
|
125
|
-
else
|
126
|
-
debug_msg = %(Unable to locate active authority ID "#{value}" in config/authorities/#{field.pluralize}.yml)
|
127
|
-
Rails.logger.debug(debug_msg)
|
128
|
-
error_msg = %("#{value}" is not a valid and/or active authority ID for the :#{field} field)
|
129
|
-
raise ::StandardError, error_msg
|
120
|
+
if multiple?(field)
|
121
|
+
parsed_metadata[field].each_with_index do |value, i|
|
122
|
+
next if value.blank?
|
123
|
+
parsed_metadata[field][i] = sanitize_controlled_uri_value(field, value)
|
130
124
|
end
|
125
|
+
else
|
126
|
+
parsed_metadata[field] = sanitize_controlled_uri_value(field, parsed_metadata[field])
|
131
127
|
end
|
132
128
|
end
|
133
129
|
|
134
130
|
true
|
135
131
|
end
|
136
132
|
|
133
|
+
def sanitize_controlled_uri_value(field, value)
|
134
|
+
if (validated_uri_value = validate_value(value, field))
|
135
|
+
validated_uri_value
|
136
|
+
else
|
137
|
+
debug_msg = %(Unable to locate active authority ID "#{value}" in config/authorities/#{field.pluralize}.yml)
|
138
|
+
Rails.logger.debug(debug_msg)
|
139
|
+
error_msg = %("#{value}" is not a valid and/or active authority ID for the :#{field} field)
|
140
|
+
raise ::StandardError, error_msg
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
137
144
|
# @param value [String] value to validate
|
138
145
|
# @param field [String] name of the controlled property
|
139
146
|
# @return [String, nil] validated URI value or nil
|
@@ -51,6 +51,12 @@ module Bulkrax
|
|
51
51
|
raise NotImplementedError, 'must be defined'
|
52
52
|
end
|
53
53
|
|
54
|
+
# @api public
|
55
|
+
# @abstract Subclass and override {#file_set_entry_class} to implement behavior for the parser.
|
56
|
+
def file_set_entry_class
|
57
|
+
raise NotImplementedError, 'must be defined'
|
58
|
+
end
|
59
|
+
|
54
60
|
# @api public
|
55
61
|
# @abstract Subclass and override {#records} to implement behavior for the parser.
|
56
62
|
def records(_opts = {})
|
@@ -196,10 +196,13 @@ module Bulkrax
|
|
196
196
|
@collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
197
197
|
@file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
198
198
|
when 'collection'
|
199
|
-
@work_ids = ActiveFedora::SolrService.query(
|
199
|
+
@work_ids = ActiveFedora::SolrService.query(
|
200
|
+
"member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters} AND has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')})", method: :post, rows: 2_000_000_000
|
201
|
+
).map(&:id)
|
200
202
|
# get the parent collection and child collections
|
201
203
|
@collection_ids = ActiveFedora::SolrService.query("id:#{importerexporter.export_source} #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
202
|
-
@collection_ids += ActiveFedora::SolrService.query("has_model_ssim:Collection AND member_of_collection_ids_ssim:#{importerexporter.export_source}", method: :post,
|
204
|
+
@collection_ids += ActiveFedora::SolrService.query("has_model_ssim:Collection AND member_of_collection_ids_ssim:#{importerexporter.export_source}", method: :post,
|
205
|
+
rows: 2_147_483_647).map(&:id)
|
203
206
|
find_child_file_sets(@work_ids)
|
204
207
|
when 'worktype'
|
205
208
|
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
@@ -234,7 +237,7 @@ module Bulkrax
|
|
234
237
|
instance_variable_set(instance_var, ActiveFedora::SolrService.post(
|
235
238
|
extra_filters.to_s,
|
236
239
|
fq: [
|
237
|
-
%(#{
|
240
|
+
%(#{solr_name(work_identifier)}:("#{complete_entry_identifiers.join('" OR "')}")),
|
238
241
|
"has_model_ssim:(#{models_to_search.join(' OR ')})"
|
239
242
|
],
|
240
243
|
fl: 'id',
|
@@ -243,6 +246,10 @@ module Bulkrax
|
|
243
246
|
end
|
244
247
|
end
|
245
248
|
|
249
|
+
def solr_name(base_name)
|
250
|
+
Module.const_defined?(:Solrizer) ? ::Solrizer.solr_name(base_name) : ::ActiveFedora.index_field_mapper.solr_name(base_name)
|
251
|
+
end
|
252
|
+
|
246
253
|
def create_new_entries
|
247
254
|
current_record_ids.each_with_index do |id, index|
|
248
255
|
break if limit_reached?(limit, index)
|
@@ -30,12 +30,6 @@ module Bulkrax
|
|
30
30
|
OaiSetEntry
|
31
31
|
end
|
32
32
|
|
33
|
-
def file_set_entry_class; end
|
34
|
-
|
35
|
-
def create_relationships; end
|
36
|
-
|
37
|
-
def create_file_sets; end
|
38
|
-
|
39
33
|
def records(opts = {})
|
40
34
|
opts[:metadata_prefix] ||= importerexporter.parser_fields['metadata_prefix']
|
41
35
|
opts[:set] = collection_name unless collection_name == 'all'
|
@@ -12,6 +12,12 @@ module Bulkrax
|
|
12
12
|
# @todo not yet supported
|
13
13
|
def create_collections; end
|
14
14
|
|
15
|
+
# @todo not yet supported
|
16
|
+
def file_set_entry_class; end
|
17
|
+
|
18
|
+
# @todo not yet supported
|
19
|
+
def create_file_sets; end
|
20
|
+
|
15
21
|
# TODO: change to differentiate between collection and work records when adding ability to import collection metadata
|
16
22
|
def works_total
|
17
23
|
total
|
@@ -1,7 +1,9 @@
|
|
1
|
-
<% if
|
1
|
+
<% if current_ability.can_import_works? %>
|
2
2
|
<%= menu.nav_link(bulkrax.importers_path) do %>
|
3
3
|
<span class="fa fa-cloud-upload" aria-hidden="true"></span> <span class="sidebar-action-text"><%= t('bulkrax.admin.sidebar.importers') %></span>
|
4
4
|
<% end %>
|
5
|
+
<% end %>
|
6
|
+
<% if current_ability.can_export_works? %>
|
5
7
|
<%= menu.nav_link(bulkrax.exporters_path) do %>
|
6
8
|
<span class="fa fa-cloud-download" aria-hidden="true"></span> <span class="sidebar-action-text"><%= t('bulkrax.admin.sidebar.exporters') %></span>
|
7
9
|
<% end %>
|
@@ -1,27 +1,30 @@
|
|
1
|
-
|
1
|
+
<li class="h5 nav-item"><%= t('hyrax.admin.sidebar.repository_objects') %></li>
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
3
|
+
<%= menu.nav_link(hyrax.my_collections_path,
|
4
|
+
class: "nav-link",
|
5
|
+
onclick: "dontChangeAccordion(event);",
|
6
|
+
also_active_for: hyrax.dashboard_collections_path) do %>
|
7
|
+
<span class="fa fa-folder-open" aria-hidden="true"></span> <span class="sidebar-action-text"><%= t('hyrax.admin.sidebar.collections') %></span>
|
8
|
+
<% end %>
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
10
|
+
<%= menu.nav_link(hyrax.my_works_path,
|
11
|
+
class: "nav-link",
|
12
|
+
onclick: "dontChangeAccordion(event);",
|
13
|
+
also_active_for: hyrax.dashboard_works_path) do %>
|
14
|
+
<span class="fa fa-file" aria-hidden="true"></span> <span class="sidebar-action-text"><%= t('hyrax.admin.sidebar.works') %></span>
|
15
|
+
<% end %>
|
16
16
|
|
17
|
-
|
17
|
+
<% if ::Hyrax::DashboardController&.respond_to?(:sidebar_partials) %>
|
18
18
|
<%= render 'hyrax/dashboard/sidebar/menu_partials', menu: menu, section: :repository_content %>
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
<%= menu.nav_link(bulkrax.exporters_path) do %>
|
25
|
-
<span class="fa fa-cloud-download" aria-hidden="true"></span> <span class="sidebar-action-text"><%= t('bulkrax.admin.sidebar.exporters') %></span>
|
19
|
+
<% else %>
|
20
|
+
<% if current_ability.can_import_works? %>
|
21
|
+
<%= menu.nav_link(bulkrax.importers_path) do %>
|
22
|
+
<span class="fa fa-cloud-upload" aria-hidden="true"></span> <span class="sidebar-action-text"><%= t('bulkrax.admin.sidebar.importers') %></span>
|
23
|
+
<% end %>
|
26
24
|
<% end %>
|
25
|
+
<% if current_ability.can_export_works? %>
|
26
|
+
<%= menu.nav_link(bulkrax.exporters_path) do %>
|
27
|
+
<span class="fa fa-cloud-download" aria-hidden="true"></span> <span class="sidebar-action-text"><%= t('bulkrax.admin.sidebar.exporters') %></span>
|
28
|
+
<% end %>
|
27
29
|
<% end %>
|
30
|
+
<% end %>
|
data/lib/bulkrax/version.rb
CHANGED
data/lib/bulkrax.rb
CHANGED
@@ -4,8 +4,13 @@ require "bulkrax/version"
|
|
4
4
|
require "bulkrax/engine"
|
5
5
|
require 'active_support/all'
|
6
6
|
|
7
|
+
# rubocop:disable Metrics/ModuleLength
|
7
8
|
module Bulkrax
|
8
9
|
class << self
|
10
|
+
# @todo Move from module attribute methods to a configuration class. With module attributes,
|
11
|
+
# when we make a change we are polluting the global space. This means that our tests that
|
12
|
+
# modify these config values are modifying global state. Which is not desirous, as it can
|
13
|
+
# introduce unexpected flakey tests.
|
9
14
|
mattr_accessor :api_definition,
|
10
15
|
:default_field_mapping,
|
11
16
|
:default_work_type,
|
@@ -168,4 +173,46 @@ module Bulkrax
|
|
168
173
|
def self.setup
|
169
174
|
yield self
|
170
175
|
end
|
176
|
+
|
177
|
+
# Responsible for stripping hidden characters from the given string.
|
178
|
+
#
|
179
|
+
# @param value [#to_s]
|
180
|
+
# @return [String] with hidden characters removed
|
181
|
+
#
|
182
|
+
# @see https://github.com/samvera-labs/bulkrax/issues/688
|
183
|
+
def self.normalize_string(value)
|
184
|
+
# Removing [Byte Order Mark (BOM)](https://en.wikipedia.org/wiki/Byte_order_mark)
|
185
|
+
value.to_s.delete("\xEF\xBB\xBF")
|
186
|
+
end
|
187
|
+
|
188
|
+
# This class confirms to the Active::Support.serialze interface. It's job is to ensure that we
|
189
|
+
# don't have keys with the tricksy Byte Order Mark character.
|
190
|
+
#
|
191
|
+
# @see https://api.rubyonrails.org/classes/ActiveRecord/AttributeMethods/Serialization/ClassMethods.html#method-i-serialize
|
192
|
+
class NormalizedJson
|
193
|
+
def self.normalize_keys(hash)
|
194
|
+
return hash unless hash.respond_to?(:each_pair)
|
195
|
+
returning_value = {}
|
196
|
+
hash.each_pair do |key, value|
|
197
|
+
returning_value[Bulkrax.normalize_string(key)] = value
|
198
|
+
end
|
199
|
+
returning_value
|
200
|
+
end
|
201
|
+
|
202
|
+
# When we write the serialized data to the database, we "dump" the value into that database
|
203
|
+
# column.
|
204
|
+
def self.dump(value)
|
205
|
+
JSON.dump(normalize_keys(value))
|
206
|
+
end
|
207
|
+
|
208
|
+
# When we load the serialized data from the database, we pass the database's value into "load"
|
209
|
+
# function.
|
210
|
+
#
|
211
|
+
# rubocop:disable Security/JSONLoad
|
212
|
+
def self.load(string)
|
213
|
+
normalize_keys(JSON.load(string))
|
214
|
+
end
|
215
|
+
# rubocop:enable Security/JSONLoad
|
216
|
+
end
|
171
217
|
end
|
218
|
+
# rubocop:disable Metrics/ModuleLength
|
@@ -55,6 +55,26 @@ class Bulkrax::InstallGenerator < Rails::Generators::Base
|
|
55
55
|
end
|
56
56
|
end
|
57
57
|
|
58
|
+
def add_ability
|
59
|
+
file = 'app/models/ability.rb'
|
60
|
+
file_text = File.read(file)
|
61
|
+
import_line = 'def can_import_works?'
|
62
|
+
export_line = 'def can_export_works?'
|
63
|
+
unless file_text.include?(import_line)
|
64
|
+
insert_into_file file, before: /^end/ do
|
65
|
+
" def can_import_works?\n can_create_any_work?\n end"
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# rubocop:disable Style/GuardClause
|
70
|
+
unless file_text.include?(export_line)
|
71
|
+
insert_into_file file, before: /^end/ do
|
72
|
+
" def can_export_works?\n can_create_any_work?\n end"
|
73
|
+
end
|
74
|
+
end
|
75
|
+
# rubocop:enable Style/GuardClause
|
76
|
+
end
|
77
|
+
|
58
78
|
def add_css
|
59
79
|
['css', 'scss', 'sass'].map do |ext|
|
60
80
|
file = "app/assets/stylesheets/application.#{ext}"
|
@@ -80,4 +80,6 @@ Bulkrax.setup do |config|
|
|
80
80
|
end
|
81
81
|
|
82
82
|
# Sidebar for hyrax 3+ support
|
83
|
-
|
83
|
+
if Object.const_defined?(:Hyrax) && ::Hyrax::DashboardController&.respond_to?(:sidebar_partials)
|
84
|
+
Hyrax::DashboardController.sidebar_partials[:repository_content] << "hyrax/dashboard/sidebar/bulkrax_sidebar_additions"
|
85
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bulkrax
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 5.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Kaufman
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-01-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|
@@ -106,14 +106,14 @@ dependencies:
|
|
106
106
|
requirements:
|
107
107
|
- - "~>"
|
108
108
|
- !ruby/object:Gem::Version
|
109
|
-
version: 3.
|
109
|
+
version: 3.2.4
|
110
110
|
type: :runtime
|
111
111
|
prerelease: false
|
112
112
|
version_requirements: !ruby/object:Gem::Requirement
|
113
113
|
requirements:
|
114
114
|
- - "~>"
|
115
115
|
- !ruby/object:Gem::Version
|
116
|
-
version: 3.
|
116
|
+
version: 3.2.4
|
117
117
|
- !ruby/object:Gem::Dependency
|
118
118
|
name: loofah
|
119
119
|
requirement: !ruby/object:Gem::Requirement
|
@@ -425,7 +425,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
425
425
|
- !ruby/object:Gem::Version
|
426
426
|
version: '0'
|
427
427
|
requirements: []
|
428
|
-
rubygems_version: 3.
|
428
|
+
rubygems_version: 3.0.3
|
429
429
|
signing_key:
|
430
430
|
specification_version: 4
|
431
431
|
summary: Import and export tool for Hyrax and Hyku
|