bulkrax 5.3.1 → 5.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/app/factories/bulkrax/object_factory.rb +5 -3
- data/app/jobs/bulkrax/create_relationships_job.rb +1 -1
- data/app/jobs/bulkrax/delete_job.rb +1 -1
- data/app/jobs/bulkrax/download_cloud_file_job.rb +1 -1
- data/app/jobs/bulkrax/import_collection_job.rb +1 -1
- data/app/jobs/bulkrax/import_file_set_job.rb +1 -1
- data/app/jobs/bulkrax/import_work_job.rb +1 -1
- data/app/jobs/bulkrax/importer_job.rb +1 -1
- data/app/views/bulkrax/entries/show.html.erb +5 -5
- data/app/views/bulkrax/importers/_bagit_fields.html.erb +6 -5
- data/app/views/bulkrax/importers/_csv_fields.html.erb +2 -1
- data/app/views/bulkrax/importers/_oai_fields.html.erb +5 -4
- data/app/views/bulkrax/importers/_xml_fields.html.erb +11 -10
- data/config/locales/bulkrax.en.yml +11 -6
- data/db/migrate/20210806044408_remove_unused_last_error.rb +3 -3
- data/db/migrate/20230608153601_add_indices_to_bulkrax.rb +8 -8
- data/lib/bulkrax/engine.rb +10 -9
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +198 -207
- data/lib/tasks/bulkrax_tasks.rake +102 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 56ac96f944d880492d7f7aa50870557ba81cf2ed23875751509b3ec23411644b
|
4
|
+
data.tar.gz: f18c9fa1d5a968d9214587476f2f949e04015e1402f6cb2ac516616669567c0e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0556a64e91eb41a27597b717fbf9e1de0682d666f116397e64a1b58a64fffaaadfd2dac645fbd256e3bd14ffb9065ce5f0ceaac920296f6623629199a6e130b5
|
7
|
+
data.tar.gz: dada612e0ee38d0b74ceacedc5c034cb39e26111edc90c39afd0d04338f77c1e0c0857a4d983bd7bd24fe8d7e601e2b8a6c32242e3142b2faad3a3369cbf82c1
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Bulkrax
|
4
|
-
class ObjectFactory
|
4
|
+
class ObjectFactory # rubocop:disable Metrics/ClassLength
|
5
5
|
extend ActiveModel::Callbacks
|
6
6
|
include Bulkrax::FileFactory
|
7
7
|
include DynamicRecordLookup
|
@@ -87,7 +87,8 @@ module Bulkrax
|
|
87
87
|
end
|
88
88
|
|
89
89
|
def find
|
90
|
-
|
90
|
+
found = find_by_id if attributes[:id].present?
|
91
|
+
return found if found.present?
|
91
92
|
return search_by_identifier if attributes[work_identifier].present?
|
92
93
|
end
|
93
94
|
|
@@ -102,7 +103,8 @@ module Bulkrax
|
|
102
103
|
end
|
103
104
|
|
104
105
|
def search_by_identifier
|
105
|
-
|
106
|
+
work_index = ::ActiveFedora.index_field_mapper.solr_name(work_identifier, :facetable)
|
107
|
+
query = { work_index =>
|
106
108
|
source_identifier_value }
|
107
109
|
# Query can return partial matches (something6 matches both something6 and something68)
|
108
110
|
# so we need to weed out any that are not the correct full match. But other items might be
|
@@ -40,7 +40,7 @@ module Bulkrax
|
|
40
40
|
|
41
41
|
include DynamicRecordLookup
|
42
42
|
|
43
|
-
queue_as
|
43
|
+
queue_as :import
|
44
44
|
|
45
45
|
# @param parent_identifier [String] Work/Collection ID or Bulkrax::Entry source_identifiers
|
46
46
|
# @param importer_run [Bulkrax::ImporterRun] current importer run (needed to properly update counters)
|
@@ -2,17 +2,17 @@
|
|
2
2
|
<div class="panel panel-default">
|
3
3
|
<div class="panel-body">
|
4
4
|
<p class='bulkrax-p-align'>
|
5
|
-
<strong
|
5
|
+
<strong><%= t('bulkrax.importer.labels.identifier') %>:</strong>
|
6
6
|
<%= @entry.identifier %>
|
7
7
|
</p>
|
8
8
|
|
9
9
|
<p class='bulkrax-p-align'>
|
10
|
-
<strong
|
10
|
+
<strong><%= t('bulkrax.importer.labels.entry_id') %>:</strong>
|
11
11
|
<%= @entry.id %>
|
12
12
|
</p>
|
13
13
|
|
14
14
|
<p class='bulkrax-p-align'>
|
15
|
-
<strong
|
15
|
+
<strong><%= t('bulkrax.importer.labels.type') %>:</strong>
|
16
16
|
<%= @entry.factory_class || 'Unknown' %>
|
17
17
|
</p>
|
18
18
|
<%= render partial: 'raw_metadata'%>
|
@@ -23,10 +23,10 @@
|
|
23
23
|
|
24
24
|
<p class="bulkrax-p-align">
|
25
25
|
<% if @importer.present? %>
|
26
|
-
<strong
|
26
|
+
<strong><%= t('bulkrax.importer.labels.importer') %>:</strong>
|
27
27
|
<%= link_to @importer.name, importer_path(@importer) %>
|
28
28
|
<% elsif @exporter.present? %>
|
29
|
-
<strong
|
29
|
+
<strong><%= t('bulkrax.importer.labels.exporter') %>:</strong>
|
30
30
|
<%= link_to @exporter.name, exporter_path(@exporter) %>
|
31
31
|
<% end %>
|
32
32
|
</p>
|
@@ -1,15 +1,15 @@
|
|
1
1
|
<div class='bagit_fields'>
|
2
2
|
|
3
|
-
<%#= fi.input :metadata_type,
|
4
|
-
collection: importer.import_metadata_type,
|
3
|
+
<%#= fi.input :metadata_type,
|
4
|
+
collection: importer.import_metadata_type,
|
5
5
|
selected: importer.parser_fields['metadata_type'],
|
6
6
|
include_blank: true,
|
7
7
|
input_html: { class: 'form-control' }
|
8
8
|
%>
|
9
9
|
<%= fi.input :metadata_file_name, as: :string, input_html: { value: importer.parser_fields['metadata_file_name'] } %>
|
10
10
|
|
11
|
-
<%= fi.input :metadata_format,
|
12
|
-
collection: importer.import_metadata_format,
|
11
|
+
<%= fi.input :metadata_format,
|
12
|
+
collection: importer.import_metadata_format,
|
13
13
|
selected: importer.parser_fields['metadata_format'],
|
14
14
|
include_blank: true,
|
15
15
|
input_html: { class: 'form-control' }
|
@@ -18,7 +18,8 @@
|
|
18
18
|
<%= fi.input :visibility,
|
19
19
|
collection: [
|
20
20
|
['Public', 'open'],
|
21
|
-
['Private', 'restricted']
|
21
|
+
['Private', 'restricted'],
|
22
|
+
['Institution', 'authenticated']
|
22
23
|
],
|
23
24
|
selected: importer.parser_fields['visibility'] || 'open',
|
24
25
|
input_html: { class: 'form-control' }
|
@@ -1,16 +1,17 @@
|
|
1
1
|
<div class='oai_fields'>
|
2
2
|
<%= fi.input :base_url, as: :string, input_html: { value: importer.parser_fields['base_url'] } %>
|
3
|
-
|
3
|
+
|
4
4
|
<%= fi.input :metadata_prefix, as: :string, hint: 'Such as oai_dc, dcterms or oai_qdc', input_html: { value: importer.parser_fields['metadata_prefix'] } %>
|
5
|
-
|
5
|
+
|
6
6
|
<%= fi.input :set, collection: [importer.parser_fields['set']], label: 'Set (source)', selected: importer.parser_fields['set'] %>
|
7
7
|
<button type="button" class="btn btn-default refresh-set-source">Refresh Sets</button>
|
8
8
|
|
9
9
|
<%= fi.input :visibility,
|
10
10
|
collection: [
|
11
11
|
['Public', 'open'],
|
12
|
-
['Private', 'restricted']
|
13
|
-
|
12
|
+
['Private', 'restricted'],
|
13
|
+
['Institution', 'authenticated']
|
14
|
+
],x
|
14
15
|
selected: importer.parser_fields['visibility'] || 'open',
|
15
16
|
input_html: { class: 'form-control' }
|
16
17
|
%>
|
@@ -1,31 +1,32 @@
|
|
1
1
|
<div class='xml_fields'>
|
2
2
|
|
3
|
-
<%# @todo improve on this implementation.
|
4
|
-
As it stands, it's a hostage to namespaces,
|
5
|
-
eg. dc:title
|
3
|
+
<%# @todo improve on this implementation.
|
4
|
+
As it stands, it's a hostage to namespaces,
|
5
|
+
eg. dc:title
|
6
6
|
if namespaces aren't in the xml, we would have to specify dc:title
|
7
7
|
but if the namespaces ARE present, we remove them so we would need title
|
8
8
|
%>
|
9
|
-
<%= fi.input :record_element,
|
10
|
-
hint: 'Provide the xml element name to use to identify the record, or records, eg. ROW - each record in the attached XML is wrapped in a <ROW> tag.',
|
9
|
+
<%= fi.input :record_element,
|
10
|
+
hint: 'Provide the xml element name to use to identify the record, or records, eg. ROW - each record in the attached XML is wrapped in a <ROW> tag.',
|
11
11
|
input_html: { value: importer.parser_fields['record_element'] }
|
12
12
|
%>
|
13
13
|
|
14
|
-
<%= fi.input :import_type,
|
14
|
+
<%= fi.input :import_type,
|
15
15
|
collection: [
|
16
16
|
['Single Work per Metadata File', 'single'],
|
17
|
-
['Multiple Works per Metadata File', 'multiple']
|
18
|
-
],
|
17
|
+
['Multiple Works per Metadata File', 'multiple']
|
18
|
+
],
|
19
19
|
selected: importer.parser_fields['import_type'],
|
20
20
|
input_html: { class: 'form-control' }
|
21
21
|
%>
|
22
|
-
|
22
|
+
|
23
23
|
<h4>Visiblity</h4>
|
24
24
|
|
25
25
|
<%= fi.input :visibility,
|
26
26
|
collection: [
|
27
27
|
['Public', 'open'],
|
28
|
-
['Private', 'restricted']
|
28
|
+
['Private', 'restricted'],
|
29
|
+
['Institution', 'authenticated']
|
29
30
|
],
|
30
31
|
selected: importer.parser_fields['visibility'] || 'open',
|
31
32
|
input_html: { class: 'form-control' }
|
@@ -46,18 +46,23 @@ en:
|
|
46
46
|
generated_metadata: "These exported fields currently cannot be imported."
|
47
47
|
importer:
|
48
48
|
labels:
|
49
|
-
name: Name
|
50
|
-
user: User
|
51
49
|
admin_set: Admin set
|
50
|
+
collection_entries: Collection Entries
|
51
|
+
entry_id: Entry ID
|
52
|
+
exporter: Exporter
|
53
|
+
file_set_entries: File Set Entries
|
52
54
|
frequency: Frequency
|
53
|
-
|
55
|
+
identifier: Identifier
|
56
|
+
importer: Importer
|
54
57
|
limit: Limit
|
55
|
-
|
58
|
+
name: Name
|
59
|
+
parser_klass: Parser klass
|
56
60
|
total_collections: Total Collections
|
57
61
|
total_file_sets: Total File Sets
|
62
|
+
total_work_entries: Total Works
|
63
|
+
type: Type
|
64
|
+
user: User
|
58
65
|
work_entries: Work Entries
|
59
|
-
collection_entries: Collection Entries
|
60
|
-
file_set_entries: File Set Entries
|
61
66
|
table_header:
|
62
67
|
labels:
|
63
68
|
identifier: Identifier
|
@@ -1,7 +1,7 @@
|
|
1
1
|
class RemoveUnusedLastError < ActiveRecord::Migration[5.1]
|
2
2
|
def change
|
3
|
-
remove_column :bulkrax_entries, :last_error
|
4
|
-
remove_column :bulkrax_exporters, :last_error
|
5
|
-
remove_column :bulkrax_importers, :last_error
|
3
|
+
remove_column :bulkrax_entries, :last_error if column_exists?(:bulkrax_entries, :last_error)
|
4
|
+
remove_column :bulkrax_exporters, :last_error if column_exists?(:bulkrax_exporters, :last_error)
|
5
|
+
remove_column :bulkrax_importers, :last_error if column_exists?(:bulkrax_importers, :last_error)
|
6
6
|
end
|
7
7
|
end
|
@@ -1,14 +1,14 @@
|
|
1
1
|
class AddIndicesToBulkrax < ActiveRecord::Migration[5.1]
|
2
2
|
def change
|
3
|
-
add_index :bulkrax_entries, :identifier
|
4
|
-
add_index :bulkrax_entries, :type
|
5
|
-
add_index :bulkrax_entries, [:importerexporter_id, :importerexporter_type], name: 'bulkrax_entries_importerexporter_idx'
|
3
|
+
add_index :bulkrax_entries, :identifier unless index_exists?(:bulkrax_entries, :identifier)
|
4
|
+
add_index :bulkrax_entries, :type unless index_exists?(:bulkrax_entries, :type)
|
5
|
+
add_index :bulkrax_entries, [:importerexporter_id, :importerexporter_type], name: 'bulkrax_entries_importerexporter_idx' unless index_exists?(:bulkrax_entries, [:importerexporter_id, :importerexporter_type], name: 'bulkrax_entries_importerexporter_idx')
|
6
6
|
|
7
|
-
add_index :bulkrax_pending_relationships, :parent_id
|
8
|
-
add_index :bulkrax_pending_relationships, :child_id
|
7
|
+
add_index :bulkrax_pending_relationships, :parent_id unless index_exists?(:bulkrax_pending_relationships, :parent_id)
|
8
|
+
add_index :bulkrax_pending_relationships, :child_id unless index_exists?(:bulkrax_pending_relationships, :child_id)
|
9
9
|
|
10
|
-
add_index :bulkrax_statuses, [:statusable_id, :statusable_type], name: 'bulkrax_statuses_statusable_idx'
|
11
|
-
add_index :bulkrax_statuses, [:runnable_id, :runnable_type], name: 'bulkrax_statuses_runnable_idx'
|
12
|
-
add_index :bulkrax_statuses, :error_class
|
10
|
+
add_index :bulkrax_statuses, [:statusable_id, :statusable_type], name: 'bulkrax_statuses_statusable_idx' unless index_exists?(:bulkrax_statuses, [:statusable_id, :statusable_type], name: 'bulkrax_statuses_statusable_idx')
|
11
|
+
add_index :bulkrax_statuses, [:runnable_id, :runnable_type], name: 'bulkrax_statuses_runnable_idx' unless index_exists?(:bulkrax_statuses, [:runnable_id, :runnable_type], name: 'bulkrax_statuses_runnable_idx')
|
12
|
+
add_index :bulkrax_statuses, :error_class unless index_exists?(:bulkrax_statuses, :error_class)
|
13
13
|
end
|
14
14
|
end
|
data/lib/bulkrax/engine.rb
CHANGED
@@ -23,15 +23,16 @@ module Bulkrax
|
|
23
23
|
end
|
24
24
|
|
25
25
|
config.after_initialize do
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
26
|
+
# We want to ensure that Bulkrax is earlier in the lookup for view_paths than Hyrax. That is
|
27
|
+
# we favor view in Bulkrax over those in Hyrax.
|
28
|
+
if defined?(Hyrax)
|
29
|
+
my_engine_root = Bulkrax::Engine.root.to_s
|
30
|
+
hyrax_engine_root = Hyrax::Engine.root.to_s
|
31
|
+
paths = ActionController::Base.view_paths.collect(&:to_s)
|
32
|
+
hyrax_view_path = paths.detect { |path| path.match(%r{^#{hyrax_engine_root}}) }
|
33
|
+
paths.insert(paths.index(hyrax_view_path), File.join(my_engine_root, 'app', 'views')) if hyrax_view_path
|
34
|
+
ActionController::Base.view_paths = paths.uniq
|
35
|
+
end
|
35
36
|
end
|
36
37
|
end
|
37
38
|
end
|
data/lib/bulkrax/version.rb
CHANGED
data/lib/bulkrax.rb
CHANGED
@@ -1,16 +1,16 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
3
|
+
require "bulkrax/version"
|
4
|
+
require "bulkrax/engine"
|
5
|
+
require 'active_support/all'
|
6
6
|
|
7
|
-
|
7
|
+
# rubocop:disable Metrics/ModuleLength
|
8
8
|
module Bulkrax
|
9
|
-
|
10
|
-
|
9
|
+
extend self # rubocop:disable Style/ModuleFunction
|
10
|
+
extend Forwardable
|
11
11
|
|
12
|
-
|
13
|
-
|
12
|
+
##
|
13
|
+
# @api public
|
14
14
|
class Configuration
|
15
15
|
attr_accessor :api_definition,
|
16
16
|
:curation_concerns,
|
@@ -35,15 +35,6 @@ module Bulkrax
|
|
35
35
|
:reserved_properties,
|
36
36
|
:server_name
|
37
37
|
|
38
|
-
attr_writer :ingest_queue_name
|
39
|
-
##
|
40
|
-
# @return [String, Proc]
|
41
|
-
def ingest_queue_name
|
42
|
-
return @ingest_queue_name if @ingest_queue_name.present?
|
43
|
-
return Hyrax.config.ingest_queue_name if defined?(Hyrax)
|
44
|
-
:import
|
45
|
-
end
|
46
|
-
|
47
38
|
attr_writer :use_locking
|
48
39
|
|
49
40
|
def use_locking
|
@@ -54,61 +45,61 @@ module Bulkrax
|
|
54
45
|
alias use_locking? use_locking
|
55
46
|
end
|
56
47
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
48
|
+
def config
|
49
|
+
@config ||= Configuration.new
|
50
|
+
yield @config if block_given?
|
51
|
+
@config
|
52
|
+
end
|
53
|
+
alias setup config
|
63
54
|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
55
|
+
def_delegators :@config,
|
56
|
+
:api_definition,
|
57
|
+
:api_definition=,
|
58
|
+
:curation_concerns,
|
59
|
+
:curation_concerns=,
|
60
|
+
:default_field_mapping,
|
61
|
+
:default_field_mapping=,
|
62
|
+
:default_work_type,
|
63
|
+
:default_work_type=,
|
64
|
+
:export_path,
|
65
|
+
:export_path=,
|
66
|
+
:field_mappings,
|
67
|
+
:field_mappings=,
|
68
|
+
:file_model_class,
|
69
|
+
:file_model_class=,
|
70
|
+
:fill_in_blank_source_identifiers,
|
71
|
+
:fill_in_blank_source_identifiers=,
|
72
|
+
:generated_metadata_mapping,
|
73
|
+
:generated_metadata_mapping=,
|
74
|
+
:import_path,
|
75
|
+
:import_path=,
|
76
|
+
:multi_value_element_join_on,
|
77
|
+
:multi_value_element_join_on=,
|
78
|
+
:multi_value_element_split_on,
|
79
|
+
:multi_value_element_split_on=,
|
80
|
+
:object_factory,
|
81
|
+
:object_factory=,
|
82
|
+
:parsers,
|
83
|
+
:parsers=,
|
84
|
+
:qa_controlled_properties,
|
85
|
+
:qa_controlled_properties=,
|
86
|
+
:related_children_field_mapping,
|
87
|
+
:related_children_field_mapping=,
|
88
|
+
:related_parents_field_mapping,
|
89
|
+
:related_parents_field_mapping=,
|
90
|
+
:relationship_job_class,
|
91
|
+
:relationship_job_class=,
|
92
|
+
:removed_image_path,
|
93
|
+
:removed_image_path=,
|
94
|
+
:required_elements,
|
95
|
+
:required_elements=,
|
96
|
+
:reserved_properties,
|
97
|
+
:reserved_properties=,
|
98
|
+
:server_name,
|
99
|
+
:server_name=,
|
100
|
+
:use_locking,
|
101
|
+
:use_locking=,
|
102
|
+
:use_locking?
|
112
103
|
|
113
104
|
config do |conf|
|
114
105
|
conf.parsers = [
|
@@ -147,149 +138,149 @@ module Bulkrax
|
|
147
138
|
# Based on Hyrax CoreMetadata && BasicMetadata
|
148
139
|
# Override at application level to change
|
149
140
|
conf.field_mappings = {
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
141
|
+
"Bulkrax::OaiDcParser" => {
|
142
|
+
"contributor" => { from: ["contributor"] },
|
143
|
+
# no appropriate mapping for coverage (based_near needs id)
|
144
|
+
# ""=>{:from=>["coverage"]},
|
145
|
+
"creator" => { from: ["creator"] },
|
146
|
+
"date_created" => { from: ["date"] },
|
147
|
+
"description" => { from: ["description"] },
|
148
|
+
# no appropriate mapping for format
|
149
|
+
# ""=>{:from=>["format"]},
|
150
|
+
"identifier" => { from: ["identifier"] },
|
151
|
+
"language" => { from: ["language"], parsed: true },
|
152
|
+
"publisher" => { from: ["publisher"] },
|
153
|
+
"related_url" => { from: ["relation"] },
|
154
|
+
"rights_statement" => { from: ["rights"] },
|
155
|
+
"source" => { from: ["source"] },
|
156
|
+
"subject" => { from: ["subject"], parsed: true },
|
157
|
+
"title" => { from: ["title"] },
|
158
|
+
"resource_type" => { from: ["type"], parsed: true },
|
159
|
+
"remote_files" => { from: ["thumbnail_url"], parsed: true }
|
169
160
|
},
|
170
|
-
|
171
|
-
|
172
|
-
|
161
|
+
"Bulkrax::OaiQualifiedDcParser" => {
|
162
|
+
"abstract" => { from: ["abstract"] },
|
163
|
+
"alternative_title" => { from: ["alternative"] },
|
173
164
|
"bibliographic_citation" => { from: ["bibliographicCitation"] },
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
165
|
+
"contributor" => { from: ["contributor"] },
|
166
|
+
"creator" => { from: ["creator"] },
|
167
|
+
"date_created" => { from: ["created"] },
|
168
|
+
"description" => { from: ["description"] },
|
169
|
+
"language" => { from: ["language"] },
|
170
|
+
"license" => { from: ["license"] },
|
171
|
+
"publisher" => { from: ["publisher"] },
|
172
|
+
"related_url" => { from: ["relation"] },
|
173
|
+
"rights_holder" => { from: ["rightsHolder"] },
|
174
|
+
"rights_statement" => { from: ["rights"] },
|
175
|
+
"source" => { from: ["source"] },
|
176
|
+
"subject" => { from: ["subject"], parsed: true },
|
177
|
+
"title" => { from: ["title"] },
|
178
|
+
"resource_type" => { from: ["type"], parsed: true },
|
179
|
+
"remote_files" => { from: ["thumbnail_url"], parsed: true }
|
180
|
+
},
|
181
|
+
# When empty, a default_field_mapping will be generated
|
182
|
+
"Bulkrax::CsvParser" => {},
|
183
|
+
'Bulkrax::BagitParser' => {},
|
184
|
+
'Bulkrax::XmlParser' => {}
|
185
|
+
}
|
195
186
|
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
187
|
+
# Lambda to set the default field mapping
|
188
|
+
conf.default_field_mapping = lambda do |field|
|
189
|
+
return if field.blank?
|
190
|
+
{
|
191
|
+
field.to_s =>
|
192
|
+
{
|
193
|
+
from: [field.to_s],
|
194
|
+
split: false,
|
195
|
+
parsed: Bulkrax::ApplicationMatcher.method_defined?("parse_#{field}"),
|
196
|
+
if: nil,
|
197
|
+
excluded: false
|
198
|
+
}
|
199
|
+
}
|
200
|
+
end
|
210
201
|
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
202
|
+
# Properties that should not be used in imports. They are reserved for use by Hyrax.
|
203
|
+
conf.reserved_properties = %w[
|
204
|
+
create_date
|
205
|
+
modified_date
|
206
|
+
date_modified
|
207
|
+
date_uploaded
|
208
|
+
depositor
|
209
|
+
arkivo_checksum
|
210
|
+
has_model
|
211
|
+
head
|
212
|
+
label
|
213
|
+
import_url
|
214
|
+
on_behalf_of
|
215
|
+
proxy_depositor
|
216
|
+
owner
|
217
|
+
state
|
218
|
+
tail
|
219
|
+
original_url
|
220
|
+
relative_path
|
221
|
+
]
|
231
222
|
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
223
|
+
# List of Questioning Authority properties that are controlled via YAML files in
|
224
|
+
# the config/authorities/ directory. For example, the :rights_statement property
|
225
|
+
# is controlled by the active terms in config/authorities/rights_statements.yml
|
226
|
+
conf.qa_controlled_properties = %w[rights_statement license]
|
227
|
+
end
|
237
228
|
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
229
|
+
def api_definition
|
230
|
+
@api_definition ||= ActiveSupport::HashWithIndifferentAccess.new(
|
231
|
+
YAML.safe_load(
|
232
|
+
ERB.new(
|
233
|
+
File.read(Rails.root.join('config', 'bulkrax_api.yml'))
|
234
|
+
).result
|
235
|
+
)
|
236
|
+
)
|
237
|
+
end
|
247
238
|
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
239
|
+
DEFAULT_MULTI_VALUE_ELEMENT_JOIN_ON = ' | '
|
240
|
+
# Specify the delimiter for joining an attribute's multi-value array into a string.
|
241
|
+
#
|
242
|
+
# @note the specific delimiter should likely be present in the multi_value_element_split_on
|
243
|
+
# expression.
|
244
|
+
def multi_value_element_join_on
|
245
|
+
@multi_value_element_join_on ||= DEFAULT_MULTI_VALUE_ELEMENT_JOIN_ON
|
246
|
+
end
|
256
247
|
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
248
|
+
DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON = /\s*[:;|]\s*/.freeze
|
249
|
+
# @return [RegexClass] the regular express to use to "split" an attribute's values. If set to
|
250
|
+
# `true` use the DEFAULT_MULTI_VALUE_ELEMENT_JOIN_ON.
|
251
|
+
#
|
252
|
+
# @note The "true" value is to preserve backwards compatibility.
|
253
|
+
# @see DEFAULT_MULTI_VALUE_ELEMENT_JOIN_ON
|
263
254
|
def multi_value_element_split_on
|
264
|
-
|
265
|
-
|
255
|
+
if @multi_value_element_join_on.is_a?(TrueClass)
|
256
|
+
DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON
|
266
257
|
else
|
267
|
-
|
258
|
+
@multi_value_element_split_on ||= DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON
|
268
259
|
end
|
269
260
|
end
|
270
261
|
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
262
|
+
# Responsible for stripping hidden characters from the given string.
|
263
|
+
#
|
264
|
+
# @param value [#to_s]
|
265
|
+
# @return [String] with hidden characters removed
|
266
|
+
#
|
267
|
+
# @see https://github.com/samvera-labs/bulkrax/issues/688
|
268
|
+
def normalize_string(value)
|
269
|
+
# Removing [Byte Order Mark (BOM)](https://en.wikipedia.org/wiki/Byte_order_mark)
|
270
|
+
value.to_s.delete("\xEF\xBB\xBF")
|
271
|
+
end
|
281
272
|
|
282
|
-
|
283
|
-
|
273
|
+
def fallback_user_for_importer_exporter_processing
|
274
|
+
return User.batch_user if defined?(Hyrax) && User.respond_to?(:batch_user)
|
284
275
|
|
285
|
-
|
286
|
-
|
276
|
+
raise "We have no fallback user available for Bulkrax.fallback_user_for_importer_exporter_processing"
|
277
|
+
end
|
287
278
|
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
279
|
+
# This class confirms to the Active::Support.serialize interface. It's job is to ensure that we
|
280
|
+
# don't have keys with the tricksy Byte Order Mark character.
|
281
|
+
#
|
282
|
+
# @see https://api.rubyonrails.org/classes/ActiveRecord/AttributeMethods/Serialization/ClassMethods.html#method-i-serialize
|
283
|
+
class NormalizedJson
|
293
284
|
def self.normalize_keys(hash)
|
294
285
|
return hash unless hash.respond_to?(:each_pair)
|
295
286
|
returning_value = {}
|
@@ -301,18 +292,18 @@ module Bulkrax
|
|
301
292
|
|
302
293
|
# When we write the serialized data to the database, we "dump" the value into that database
|
303
294
|
# column.
|
304
|
-
|
305
|
-
|
306
|
-
|
295
|
+
def self.dump(value)
|
296
|
+
JSON.dump(normalize_keys(value))
|
297
|
+
end
|
307
298
|
|
308
299
|
# When we load the serialized data from the database, we pass the database's value into "load"
|
309
300
|
# function.
|
310
301
|
#
|
311
302
|
# rubocop:disable Security/JSONLoad
|
312
|
-
|
313
|
-
|
314
|
-
|
303
|
+
def self.load(string)
|
304
|
+
normalize_keys(JSON.load(string))
|
305
|
+
end
|
315
306
|
# rubocop:enable Security/JSONLoad
|
316
|
-
|
307
|
+
end
|
317
308
|
end
|
318
|
-
|
309
|
+
# rubocop:disable Metrics/ModuleLength
|
@@ -1,6 +1,108 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
namespace :bulkrax do
|
4
|
+
# Usage example: rails bulkrax:generate_test_csvs['5','100','GenericWork']
|
5
|
+
desc 'Generate CSVs with fake data for testing purposes'
|
6
|
+
task :generate_test_csvs, [:num_of_csvs, :csv_rows, :record_type] => :environment do |_t, args|
|
7
|
+
# NOTE: If this line throws an error, run `gem install faker` inside your Docker container
|
8
|
+
require 'faker'
|
9
|
+
require 'csv'
|
10
|
+
|
11
|
+
FileUtils.mkdir_p(Rails.root.join('tmp', 'imports'))
|
12
|
+
|
13
|
+
IGNORED_PROPERTIES = %w[
|
14
|
+
admin_set_id
|
15
|
+
alternate_ids
|
16
|
+
arkivo_checksum
|
17
|
+
created_at
|
18
|
+
date_modified
|
19
|
+
date_uploaded
|
20
|
+
depositor
|
21
|
+
embargo
|
22
|
+
has_model
|
23
|
+
head
|
24
|
+
internal_resource
|
25
|
+
label
|
26
|
+
lease
|
27
|
+
member_ids
|
28
|
+
member_of_collection_ids
|
29
|
+
modified_date
|
30
|
+
new_record
|
31
|
+
on_behalf_of
|
32
|
+
owner
|
33
|
+
proxy_depositor
|
34
|
+
rendering_ids
|
35
|
+
representative_id
|
36
|
+
state
|
37
|
+
tail
|
38
|
+
thumbnail_id
|
39
|
+
updated_at
|
40
|
+
].freeze
|
41
|
+
|
42
|
+
BULKRAX_PROPERTIES = %w[
|
43
|
+
source_identifier
|
44
|
+
model
|
45
|
+
].freeze
|
46
|
+
|
47
|
+
num_of_csvs = args.num_of_csvs.presence&.to_i || 5
|
48
|
+
csv_rows = args.csv_rows.presence&.to_i || 100
|
49
|
+
record_type = args.record_type.presence&.constantize || GenericWork
|
50
|
+
|
51
|
+
csv_header = if Hyrax.config.try(:use_valkyrie?)
|
52
|
+
record_type.schema.map { |k| k.name.to_s }
|
53
|
+
else
|
54
|
+
record_type.properties.keys
|
55
|
+
end
|
56
|
+
|
57
|
+
csv_header -= IGNORED_PROPERTIES
|
58
|
+
csv_header.unshift(*BULKRAX_PROPERTIES)
|
59
|
+
|
60
|
+
num_of_csvs.times do |i|
|
61
|
+
CSV.open(Rails.root.join('tmp', 'imports', "importer_#{i}.csv"), 'wb') do |csv|
|
62
|
+
csv << csv_header
|
63
|
+
csv_rows.times do |_index|
|
64
|
+
row = []
|
65
|
+
csv_header.each do |prop_name|
|
66
|
+
row << case prop_name
|
67
|
+
when 'id', 'source_identifier'
|
68
|
+
Faker::Number.number(digits: 4)
|
69
|
+
when 'model'
|
70
|
+
record_type.to_s
|
71
|
+
when 'rights_statement'
|
72
|
+
'http://rightsstatements.org/vocab/CNE/1.0/'
|
73
|
+
when 'license'
|
74
|
+
'https://creativecommons.org/licenses/by-nc/4.0/'
|
75
|
+
when 'based_near'
|
76
|
+
# FIXME: Set a proper :based_near value
|
77
|
+
nil
|
78
|
+
else
|
79
|
+
Faker::Lorem.sentence
|
80
|
+
end
|
81
|
+
end
|
82
|
+
csv << row
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
num_of_csvs.times do |i|
|
88
|
+
Bulkrax::Importer.create(
|
89
|
+
name: "Generated CSV #{i}",
|
90
|
+
admin_set_id: 'admin_set/default',
|
91
|
+
user_id: User.find_by(email: 'admin@example.com').id,
|
92
|
+
frequency: 'PT0S',
|
93
|
+
parser_klass: 'Bulkrax::CsvParser',
|
94
|
+
parser_fields: {
|
95
|
+
'visibility' => 'open',
|
96
|
+
'rights_statement' => '',
|
97
|
+
'override_rights_statement' => '0',
|
98
|
+
'file_style' => 'Specify a Path on the Server',
|
99
|
+
'import_file_path' => "tmp/imports/importer_#{i}.csv",
|
100
|
+
'update_files' => false
|
101
|
+
}
|
102
|
+
)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
4
106
|
desc "Remove old exported zips and create new ones with the new file structure"
|
5
107
|
task rerun_all_exporters: :environment do
|
6
108
|
# delete the existing folders and zip files
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bulkrax
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.
|
4
|
+
version: 5.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Kaufman
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-10-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|