bulkrax 5.3.1 → 5.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/factories/bulkrax/object_factory.rb +5 -3
- data/app/jobs/bulkrax/create_relationships_job.rb +1 -1
- data/app/jobs/bulkrax/delete_job.rb +1 -1
- data/app/jobs/bulkrax/download_cloud_file_job.rb +1 -1
- data/app/jobs/bulkrax/import_collection_job.rb +1 -1
- data/app/jobs/bulkrax/import_file_set_job.rb +1 -1
- data/app/jobs/bulkrax/import_work_job.rb +1 -1
- data/app/jobs/bulkrax/importer_job.rb +1 -1
- data/app/views/bulkrax/entries/show.html.erb +5 -5
- data/app/views/bulkrax/importers/_bagit_fields.html.erb +6 -5
- data/app/views/bulkrax/importers/_csv_fields.html.erb +2 -1
- data/app/views/bulkrax/importers/_oai_fields.html.erb +5 -4
- data/app/views/bulkrax/importers/_xml_fields.html.erb +11 -10
- data/config/locales/bulkrax.en.yml +11 -6
- data/db/migrate/20210806044408_remove_unused_last_error.rb +3 -3
- data/db/migrate/20230608153601_add_indices_to_bulkrax.rb +8 -8
- data/lib/bulkrax/engine.rb +10 -9
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +198 -207
- data/lib/tasks/bulkrax_tasks.rake +102 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 56ac96f944d880492d7f7aa50870557ba81cf2ed23875751509b3ec23411644b
|
4
|
+
data.tar.gz: f18c9fa1d5a968d9214587476f2f949e04015e1402f6cb2ac516616669567c0e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0556a64e91eb41a27597b717fbf9e1de0682d666f116397e64a1b58a64fffaaadfd2dac645fbd256e3bd14ffb9065ce5f0ceaac920296f6623629199a6e130b5
|
7
|
+
data.tar.gz: dada612e0ee38d0b74ceacedc5c034cb39e26111edc90c39afd0d04338f77c1e0c0857a4d983bd7bd24fe8d7e601e2b8a6c32242e3142b2faad3a3369cbf82c1
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Bulkrax
|
4
|
-
class ObjectFactory
|
4
|
+
class ObjectFactory # rubocop:disable Metrics/ClassLength
|
5
5
|
extend ActiveModel::Callbacks
|
6
6
|
include Bulkrax::FileFactory
|
7
7
|
include DynamicRecordLookup
|
@@ -87,7 +87,8 @@ module Bulkrax
|
|
87
87
|
end
|
88
88
|
|
89
89
|
def find
|
90
|
-
|
90
|
+
found = find_by_id if attributes[:id].present?
|
91
|
+
return found if found.present?
|
91
92
|
return search_by_identifier if attributes[work_identifier].present?
|
92
93
|
end
|
93
94
|
|
@@ -102,7 +103,8 @@ module Bulkrax
|
|
102
103
|
end
|
103
104
|
|
104
105
|
def search_by_identifier
|
105
|
-
|
106
|
+
work_index = ::ActiveFedora.index_field_mapper.solr_name(work_identifier, :facetable)
|
107
|
+
query = { work_index =>
|
106
108
|
source_identifier_value }
|
107
109
|
# Query can return partial matches (something6 matches both something6 and something68)
|
108
110
|
# so we need to weed out any that are not the correct full match. But other items might be
|
@@ -40,7 +40,7 @@ module Bulkrax
|
|
40
40
|
|
41
41
|
include DynamicRecordLookup
|
42
42
|
|
43
|
-
queue_as
|
43
|
+
queue_as :import
|
44
44
|
|
45
45
|
# @param parent_identifier [String] Work/Collection ID or Bulkrax::Entry source_identifiers
|
46
46
|
# @param importer_run [Bulkrax::ImporterRun] current importer run (needed to properly update counters)
|
@@ -2,17 +2,17 @@
|
|
2
2
|
<div class="panel panel-default">
|
3
3
|
<div class="panel-body">
|
4
4
|
<p class='bulkrax-p-align'>
|
5
|
-
<strong
|
5
|
+
<strong><%= t('bulkrax.importer.labels.identifier') %>:</strong>
|
6
6
|
<%= @entry.identifier %>
|
7
7
|
</p>
|
8
8
|
|
9
9
|
<p class='bulkrax-p-align'>
|
10
|
-
<strong
|
10
|
+
<strong><%= t('bulkrax.importer.labels.entry_id') %>:</strong>
|
11
11
|
<%= @entry.id %>
|
12
12
|
</p>
|
13
13
|
|
14
14
|
<p class='bulkrax-p-align'>
|
15
|
-
<strong
|
15
|
+
<strong><%= t('bulkrax.importer.labels.type') %>:</strong>
|
16
16
|
<%= @entry.factory_class || 'Unknown' %>
|
17
17
|
</p>
|
18
18
|
<%= render partial: 'raw_metadata'%>
|
@@ -23,10 +23,10 @@
|
|
23
23
|
|
24
24
|
<p class="bulkrax-p-align">
|
25
25
|
<% if @importer.present? %>
|
26
|
-
<strong
|
26
|
+
<strong><%= t('bulkrax.importer.labels.importer') %>:</strong>
|
27
27
|
<%= link_to @importer.name, importer_path(@importer) %>
|
28
28
|
<% elsif @exporter.present? %>
|
29
|
-
<strong
|
29
|
+
<strong><%= t('bulkrax.importer.labels.exporter') %>:</strong>
|
30
30
|
<%= link_to @exporter.name, exporter_path(@exporter) %>
|
31
31
|
<% end %>
|
32
32
|
</p>
|
@@ -1,15 +1,15 @@
|
|
1
1
|
<div class='bagit_fields'>
|
2
2
|
|
3
|
-
<%#= fi.input :metadata_type,
|
4
|
-
collection: importer.import_metadata_type,
|
3
|
+
<%#= fi.input :metadata_type,
|
4
|
+
collection: importer.import_metadata_type,
|
5
5
|
selected: importer.parser_fields['metadata_type'],
|
6
6
|
include_blank: true,
|
7
7
|
input_html: { class: 'form-control' }
|
8
8
|
%>
|
9
9
|
<%= fi.input :metadata_file_name, as: :string, input_html: { value: importer.parser_fields['metadata_file_name'] } %>
|
10
10
|
|
11
|
-
<%= fi.input :metadata_format,
|
12
|
-
collection: importer.import_metadata_format,
|
11
|
+
<%= fi.input :metadata_format,
|
12
|
+
collection: importer.import_metadata_format,
|
13
13
|
selected: importer.parser_fields['metadata_format'],
|
14
14
|
include_blank: true,
|
15
15
|
input_html: { class: 'form-control' }
|
@@ -18,7 +18,8 @@
|
|
18
18
|
<%= fi.input :visibility,
|
19
19
|
collection: [
|
20
20
|
['Public', 'open'],
|
21
|
-
['Private', 'restricted']
|
21
|
+
['Private', 'restricted'],
|
22
|
+
['Institution', 'authenticated']
|
22
23
|
],
|
23
24
|
selected: importer.parser_fields['visibility'] || 'open',
|
24
25
|
input_html: { class: 'form-control' }
|
@@ -1,16 +1,17 @@
|
|
1
1
|
<div class='oai_fields'>
|
2
2
|
<%= fi.input :base_url, as: :string, input_html: { value: importer.parser_fields['base_url'] } %>
|
3
|
-
|
3
|
+
|
4
4
|
<%= fi.input :metadata_prefix, as: :string, hint: 'Such as oai_dc, dcterms or oai_qdc', input_html: { value: importer.parser_fields['metadata_prefix'] } %>
|
5
|
-
|
5
|
+
|
6
6
|
<%= fi.input :set, collection: [importer.parser_fields['set']], label: 'Set (source)', selected: importer.parser_fields['set'] %>
|
7
7
|
<button type="button" class="btn btn-default refresh-set-source">Refresh Sets</button>
|
8
8
|
|
9
9
|
<%= fi.input :visibility,
|
10
10
|
collection: [
|
11
11
|
['Public', 'open'],
|
12
|
-
['Private', 'restricted']
|
13
|
-
|
12
|
+
['Private', 'restricted'],
|
13
|
+
['Institution', 'authenticated']
|
14
|
+
],x
|
14
15
|
selected: importer.parser_fields['visibility'] || 'open',
|
15
16
|
input_html: { class: 'form-control' }
|
16
17
|
%>
|
@@ -1,31 +1,32 @@
|
|
1
1
|
<div class='xml_fields'>
|
2
2
|
|
3
|
-
<%# @todo improve on this implementation.
|
4
|
-
As it stands, it's a hostage to namespaces,
|
5
|
-
eg. dc:title
|
3
|
+
<%# @todo improve on this implementation.
|
4
|
+
As it stands, it's a hostage to namespaces,
|
5
|
+
eg. dc:title
|
6
6
|
if namespaces aren't in the xml, we would have to specify dc:title
|
7
7
|
but if the namespaces ARE present, we remove them so we would need title
|
8
8
|
%>
|
9
|
-
<%= fi.input :record_element,
|
10
|
-
hint: 'Provide the xml element name to use to identify the record, or records, eg. ROW - each record in the attached XML is wrapped in a <ROW> tag.',
|
9
|
+
<%= fi.input :record_element,
|
10
|
+
hint: 'Provide the xml element name to use to identify the record, or records, eg. ROW - each record in the attached XML is wrapped in a <ROW> tag.',
|
11
11
|
input_html: { value: importer.parser_fields['record_element'] }
|
12
12
|
%>
|
13
13
|
|
14
|
-
<%= fi.input :import_type,
|
14
|
+
<%= fi.input :import_type,
|
15
15
|
collection: [
|
16
16
|
['Single Work per Metadata File', 'single'],
|
17
|
-
['Multiple Works per Metadata File', 'multiple']
|
18
|
-
],
|
17
|
+
['Multiple Works per Metadata File', 'multiple']
|
18
|
+
],
|
19
19
|
selected: importer.parser_fields['import_type'],
|
20
20
|
input_html: { class: 'form-control' }
|
21
21
|
%>
|
22
|
-
|
22
|
+
|
23
23
|
<h4>Visiblity</h4>
|
24
24
|
|
25
25
|
<%= fi.input :visibility,
|
26
26
|
collection: [
|
27
27
|
['Public', 'open'],
|
28
|
-
['Private', 'restricted']
|
28
|
+
['Private', 'restricted'],
|
29
|
+
['Institution', 'authenticated']
|
29
30
|
],
|
30
31
|
selected: importer.parser_fields['visibility'] || 'open',
|
31
32
|
input_html: { class: 'form-control' }
|
@@ -46,18 +46,23 @@ en:
|
|
46
46
|
generated_metadata: "These exported fields currently cannot be imported."
|
47
47
|
importer:
|
48
48
|
labels:
|
49
|
-
name: Name
|
50
|
-
user: User
|
51
49
|
admin_set: Admin set
|
50
|
+
collection_entries: Collection Entries
|
51
|
+
entry_id: Entry ID
|
52
|
+
exporter: Exporter
|
53
|
+
file_set_entries: File Set Entries
|
52
54
|
frequency: Frequency
|
53
|
-
|
55
|
+
identifier: Identifier
|
56
|
+
importer: Importer
|
54
57
|
limit: Limit
|
55
|
-
|
58
|
+
name: Name
|
59
|
+
parser_klass: Parser klass
|
56
60
|
total_collections: Total Collections
|
57
61
|
total_file_sets: Total File Sets
|
62
|
+
total_work_entries: Total Works
|
63
|
+
type: Type
|
64
|
+
user: User
|
58
65
|
work_entries: Work Entries
|
59
|
-
collection_entries: Collection Entries
|
60
|
-
file_set_entries: File Set Entries
|
61
66
|
table_header:
|
62
67
|
labels:
|
63
68
|
identifier: Identifier
|
@@ -1,7 +1,7 @@
|
|
1
1
|
class RemoveUnusedLastError < ActiveRecord::Migration[5.1]
|
2
2
|
def change
|
3
|
-
remove_column :bulkrax_entries, :last_error
|
4
|
-
remove_column :bulkrax_exporters, :last_error
|
5
|
-
remove_column :bulkrax_importers, :last_error
|
3
|
+
remove_column :bulkrax_entries, :last_error if column_exists?(:bulkrax_entries, :last_error)
|
4
|
+
remove_column :bulkrax_exporters, :last_error if column_exists?(:bulkrax_exporters, :last_error)
|
5
|
+
remove_column :bulkrax_importers, :last_error if column_exists?(:bulkrax_importers, :last_error)
|
6
6
|
end
|
7
7
|
end
|
@@ -1,14 +1,14 @@
|
|
1
1
|
class AddIndicesToBulkrax < ActiveRecord::Migration[5.1]
|
2
2
|
def change
|
3
|
-
add_index :bulkrax_entries, :identifier
|
4
|
-
add_index :bulkrax_entries, :type
|
5
|
-
add_index :bulkrax_entries, [:importerexporter_id, :importerexporter_type], name: 'bulkrax_entries_importerexporter_idx'
|
3
|
+
add_index :bulkrax_entries, :identifier unless index_exists?(:bulkrax_entries, :identifier)
|
4
|
+
add_index :bulkrax_entries, :type unless index_exists?(:bulkrax_entries, :type)
|
5
|
+
add_index :bulkrax_entries, [:importerexporter_id, :importerexporter_type], name: 'bulkrax_entries_importerexporter_idx' unless index_exists?(:bulkrax_entries, [:importerexporter_id, :importerexporter_type], name: 'bulkrax_entries_importerexporter_idx')
|
6
6
|
|
7
|
-
add_index :bulkrax_pending_relationships, :parent_id
|
8
|
-
add_index :bulkrax_pending_relationships, :child_id
|
7
|
+
add_index :bulkrax_pending_relationships, :parent_id unless index_exists?(:bulkrax_pending_relationships, :parent_id)
|
8
|
+
add_index :bulkrax_pending_relationships, :child_id unless index_exists?(:bulkrax_pending_relationships, :child_id)
|
9
9
|
|
10
|
-
add_index :bulkrax_statuses, [:statusable_id, :statusable_type], name: 'bulkrax_statuses_statusable_idx'
|
11
|
-
add_index :bulkrax_statuses, [:runnable_id, :runnable_type], name: 'bulkrax_statuses_runnable_idx'
|
12
|
-
add_index :bulkrax_statuses, :error_class
|
10
|
+
add_index :bulkrax_statuses, [:statusable_id, :statusable_type], name: 'bulkrax_statuses_statusable_idx' unless index_exists?(:bulkrax_statuses, [:statusable_id, :statusable_type], name: 'bulkrax_statuses_statusable_idx')
|
11
|
+
add_index :bulkrax_statuses, [:runnable_id, :runnable_type], name: 'bulkrax_statuses_runnable_idx' unless index_exists?(:bulkrax_statuses, [:runnable_id, :runnable_type], name: 'bulkrax_statuses_runnable_idx')
|
12
|
+
add_index :bulkrax_statuses, :error_class unless index_exists?(:bulkrax_statuses, :error_class)
|
13
13
|
end
|
14
14
|
end
|
data/lib/bulkrax/engine.rb
CHANGED
@@ -23,15 +23,16 @@ module Bulkrax
|
|
23
23
|
end
|
24
24
|
|
25
25
|
config.after_initialize do
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
26
|
+
# We want to ensure that Bulkrax is earlier in the lookup for view_paths than Hyrax. That is
|
27
|
+
# we favor view in Bulkrax over those in Hyrax.
|
28
|
+
if defined?(Hyrax)
|
29
|
+
my_engine_root = Bulkrax::Engine.root.to_s
|
30
|
+
hyrax_engine_root = Hyrax::Engine.root.to_s
|
31
|
+
paths = ActionController::Base.view_paths.collect(&:to_s)
|
32
|
+
hyrax_view_path = paths.detect { |path| path.match(%r{^#{hyrax_engine_root}}) }
|
33
|
+
paths.insert(paths.index(hyrax_view_path), File.join(my_engine_root, 'app', 'views')) if hyrax_view_path
|
34
|
+
ActionController::Base.view_paths = paths.uniq
|
35
|
+
end
|
35
36
|
end
|
36
37
|
end
|
37
38
|
end
|
data/lib/bulkrax/version.rb
CHANGED
data/lib/bulkrax.rb
CHANGED
@@ -1,16 +1,16 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
3
|
+
require "bulkrax/version"
|
4
|
+
require "bulkrax/engine"
|
5
|
+
require 'active_support/all'
|
6
6
|
|
7
|
-
|
7
|
+
# rubocop:disable Metrics/ModuleLength
|
8
8
|
module Bulkrax
|
9
|
-
|
10
|
-
|
9
|
+
extend self # rubocop:disable Style/ModuleFunction
|
10
|
+
extend Forwardable
|
11
11
|
|
12
|
-
|
13
|
-
|
12
|
+
##
|
13
|
+
# @api public
|
14
14
|
class Configuration
|
15
15
|
attr_accessor :api_definition,
|
16
16
|
:curation_concerns,
|
@@ -35,15 +35,6 @@ module Bulkrax
|
|
35
35
|
:reserved_properties,
|
36
36
|
:server_name
|
37
37
|
|
38
|
-
attr_writer :ingest_queue_name
|
39
|
-
##
|
40
|
-
# @return [String, Proc]
|
41
|
-
def ingest_queue_name
|
42
|
-
return @ingest_queue_name if @ingest_queue_name.present?
|
43
|
-
return Hyrax.config.ingest_queue_name if defined?(Hyrax)
|
44
|
-
:import
|
45
|
-
end
|
46
|
-
|
47
38
|
attr_writer :use_locking
|
48
39
|
|
49
40
|
def use_locking
|
@@ -54,61 +45,61 @@ module Bulkrax
|
|
54
45
|
alias use_locking? use_locking
|
55
46
|
end
|
56
47
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
48
|
+
def config
|
49
|
+
@config ||= Configuration.new
|
50
|
+
yield @config if block_given?
|
51
|
+
@config
|
52
|
+
end
|
53
|
+
alias setup config
|
63
54
|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
55
|
+
def_delegators :@config,
|
56
|
+
:api_definition,
|
57
|
+
:api_definition=,
|
58
|
+
:curation_concerns,
|
59
|
+
:curation_concerns=,
|
60
|
+
:default_field_mapping,
|
61
|
+
:default_field_mapping=,
|
62
|
+
:default_work_type,
|
63
|
+
:default_work_type=,
|
64
|
+
:export_path,
|
65
|
+
:export_path=,
|
66
|
+
:field_mappings,
|
67
|
+
:field_mappings=,
|
68
|
+
:file_model_class,
|
69
|
+
:file_model_class=,
|
70
|
+
:fill_in_blank_source_identifiers,
|
71
|
+
:fill_in_blank_source_identifiers=,
|
72
|
+
:generated_metadata_mapping,
|
73
|
+
:generated_metadata_mapping=,
|
74
|
+
:import_path,
|
75
|
+
:import_path=,
|
76
|
+
:multi_value_element_join_on,
|
77
|
+
:multi_value_element_join_on=,
|
78
|
+
:multi_value_element_split_on,
|
79
|
+
:multi_value_element_split_on=,
|
80
|
+
:object_factory,
|
81
|
+
:object_factory=,
|
82
|
+
:parsers,
|
83
|
+
:parsers=,
|
84
|
+
:qa_controlled_properties,
|
85
|
+
:qa_controlled_properties=,
|
86
|
+
:related_children_field_mapping,
|
87
|
+
:related_children_field_mapping=,
|
88
|
+
:related_parents_field_mapping,
|
89
|
+
:related_parents_field_mapping=,
|
90
|
+
:relationship_job_class,
|
91
|
+
:relationship_job_class=,
|
92
|
+
:removed_image_path,
|
93
|
+
:removed_image_path=,
|
94
|
+
:required_elements,
|
95
|
+
:required_elements=,
|
96
|
+
:reserved_properties,
|
97
|
+
:reserved_properties=,
|
98
|
+
:server_name,
|
99
|
+
:server_name=,
|
100
|
+
:use_locking,
|
101
|
+
:use_locking=,
|
102
|
+
:use_locking?
|
112
103
|
|
113
104
|
config do |conf|
|
114
105
|
conf.parsers = [
|
@@ -147,149 +138,149 @@ module Bulkrax
|
|
147
138
|
# Based on Hyrax CoreMetadata && BasicMetadata
|
148
139
|
# Override at application level to change
|
149
140
|
conf.field_mappings = {
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
141
|
+
"Bulkrax::OaiDcParser" => {
|
142
|
+
"contributor" => { from: ["contributor"] },
|
143
|
+
# no appropriate mapping for coverage (based_near needs id)
|
144
|
+
# ""=>{:from=>["coverage"]},
|
145
|
+
"creator" => { from: ["creator"] },
|
146
|
+
"date_created" => { from: ["date"] },
|
147
|
+
"description" => { from: ["description"] },
|
148
|
+
# no appropriate mapping for format
|
149
|
+
# ""=>{:from=>["format"]},
|
150
|
+
"identifier" => { from: ["identifier"] },
|
151
|
+
"language" => { from: ["language"], parsed: true },
|
152
|
+
"publisher" => { from: ["publisher"] },
|
153
|
+
"related_url" => { from: ["relation"] },
|
154
|
+
"rights_statement" => { from: ["rights"] },
|
155
|
+
"source" => { from: ["source"] },
|
156
|
+
"subject" => { from: ["subject"], parsed: true },
|
157
|
+
"title" => { from: ["title"] },
|
158
|
+
"resource_type" => { from: ["type"], parsed: true },
|
159
|
+
"remote_files" => { from: ["thumbnail_url"], parsed: true }
|
169
160
|
},
|
170
|
-
|
171
|
-
|
172
|
-
|
161
|
+
"Bulkrax::OaiQualifiedDcParser" => {
|
162
|
+
"abstract" => { from: ["abstract"] },
|
163
|
+
"alternative_title" => { from: ["alternative"] },
|
173
164
|
"bibliographic_citation" => { from: ["bibliographicCitation"] },
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
165
|
+
"contributor" => { from: ["contributor"] },
|
166
|
+
"creator" => { from: ["creator"] },
|
167
|
+
"date_created" => { from: ["created"] },
|
168
|
+
"description" => { from: ["description"] },
|
169
|
+
"language" => { from: ["language"] },
|
170
|
+
"license" => { from: ["license"] },
|
171
|
+
"publisher" => { from: ["publisher"] },
|
172
|
+
"related_url" => { from: ["relation"] },
|
173
|
+
"rights_holder" => { from: ["rightsHolder"] },
|
174
|
+
"rights_statement" => { from: ["rights"] },
|
175
|
+
"source" => { from: ["source"] },
|
176
|
+
"subject" => { from: ["subject"], parsed: true },
|
177
|
+
"title" => { from: ["title"] },
|
178
|
+
"resource_type" => { from: ["type"], parsed: true },
|
179
|
+
"remote_files" => { from: ["thumbnail_url"], parsed: true }
|
180
|
+
},
|
181
|
+
# When empty, a default_field_mapping will be generated
|
182
|
+
"Bulkrax::CsvParser" => {},
|
183
|
+
'Bulkrax::BagitParser' => {},
|
184
|
+
'Bulkrax::XmlParser' => {}
|
185
|
+
}
|
195
186
|
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
187
|
+
# Lambda to set the default field mapping
|
188
|
+
conf.default_field_mapping = lambda do |field|
|
189
|
+
return if field.blank?
|
190
|
+
{
|
191
|
+
field.to_s =>
|
192
|
+
{
|
193
|
+
from: [field.to_s],
|
194
|
+
split: false,
|
195
|
+
parsed: Bulkrax::ApplicationMatcher.method_defined?("parse_#{field}"),
|
196
|
+
if: nil,
|
197
|
+
excluded: false
|
198
|
+
}
|
199
|
+
}
|
200
|
+
end
|
210
201
|
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
202
|
+
# Properties that should not be used in imports. They are reserved for use by Hyrax.
|
203
|
+
conf.reserved_properties = %w[
|
204
|
+
create_date
|
205
|
+
modified_date
|
206
|
+
date_modified
|
207
|
+
date_uploaded
|
208
|
+
depositor
|
209
|
+
arkivo_checksum
|
210
|
+
has_model
|
211
|
+
head
|
212
|
+
label
|
213
|
+
import_url
|
214
|
+
on_behalf_of
|
215
|
+
proxy_depositor
|
216
|
+
owner
|
217
|
+
state
|
218
|
+
tail
|
219
|
+
original_url
|
220
|
+
relative_path
|
221
|
+
]
|
231
222
|
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
223
|
+
# List of Questioning Authority properties that are controlled via YAML files in
|
224
|
+
# the config/authorities/ directory. For example, the :rights_statement property
|
225
|
+
# is controlled by the active terms in config/authorities/rights_statements.yml
|
226
|
+
conf.qa_controlled_properties = %w[rights_statement license]
|
227
|
+
end
|
237
228
|
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
229
|
+
def api_definition
|
230
|
+
@api_definition ||= ActiveSupport::HashWithIndifferentAccess.new(
|
231
|
+
YAML.safe_load(
|
232
|
+
ERB.new(
|
233
|
+
File.read(Rails.root.join('config', 'bulkrax_api.yml'))
|
234
|
+
).result
|
235
|
+
)
|
236
|
+
)
|
237
|
+
end
|
247
238
|
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
239
|
+
DEFAULT_MULTI_VALUE_ELEMENT_JOIN_ON = ' | '
|
240
|
+
# Specify the delimiter for joining an attribute's multi-value array into a string.
|
241
|
+
#
|
242
|
+
# @note the specific delimiter should likely be present in the multi_value_element_split_on
|
243
|
+
# expression.
|
244
|
+
def multi_value_element_join_on
|
245
|
+
@multi_value_element_join_on ||= DEFAULT_MULTI_VALUE_ELEMENT_JOIN_ON
|
246
|
+
end
|
256
247
|
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
248
|
+
DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON = /\s*[:;|]\s*/.freeze
|
249
|
+
# @return [RegexClass] the regular express to use to "split" an attribute's values. If set to
|
250
|
+
# `true` use the DEFAULT_MULTI_VALUE_ELEMENT_JOIN_ON.
|
251
|
+
#
|
252
|
+
# @note The "true" value is to preserve backwards compatibility.
|
253
|
+
# @see DEFAULT_MULTI_VALUE_ELEMENT_JOIN_ON
|
263
254
|
def multi_value_element_split_on
|
264
|
-
|
265
|
-
|
255
|
+
if @multi_value_element_join_on.is_a?(TrueClass)
|
256
|
+
DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON
|
266
257
|
else
|
267
|
-
|
258
|
+
@multi_value_element_split_on ||= DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON
|
268
259
|
end
|
269
260
|
end
|
270
261
|
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
262
|
+
# Responsible for stripping hidden characters from the given string.
|
263
|
+
#
|
264
|
+
# @param value [#to_s]
|
265
|
+
# @return [String] with hidden characters removed
|
266
|
+
#
|
267
|
+
# @see https://github.com/samvera-labs/bulkrax/issues/688
|
268
|
+
def normalize_string(value)
|
269
|
+
# Removing [Byte Order Mark (BOM)](https://en.wikipedia.org/wiki/Byte_order_mark)
|
270
|
+
value.to_s.delete("\xEF\xBB\xBF")
|
271
|
+
end
|
281
272
|
|
282
|
-
|
283
|
-
|
273
|
+
def fallback_user_for_importer_exporter_processing
|
274
|
+
return User.batch_user if defined?(Hyrax) && User.respond_to?(:batch_user)
|
284
275
|
|
285
|
-
|
286
|
-
|
276
|
+
raise "We have no fallback user available for Bulkrax.fallback_user_for_importer_exporter_processing"
|
277
|
+
end
|
287
278
|
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
279
|
+
# This class confirms to the Active::Support.serialize interface. It's job is to ensure that we
|
280
|
+
# don't have keys with the tricksy Byte Order Mark character.
|
281
|
+
#
|
282
|
+
# @see https://api.rubyonrails.org/classes/ActiveRecord/AttributeMethods/Serialization/ClassMethods.html#method-i-serialize
|
283
|
+
class NormalizedJson
|
293
284
|
def self.normalize_keys(hash)
|
294
285
|
return hash unless hash.respond_to?(:each_pair)
|
295
286
|
returning_value = {}
|
@@ -301,18 +292,18 @@ module Bulkrax
|
|
301
292
|
|
302
293
|
# When we write the serialized data to the database, we "dump" the value into that database
|
303
294
|
# column.
|
304
|
-
|
305
|
-
|
306
|
-
|
295
|
+
def self.dump(value)
|
296
|
+
JSON.dump(normalize_keys(value))
|
297
|
+
end
|
307
298
|
|
308
299
|
# When we load the serialized data from the database, we pass the database's value into "load"
|
309
300
|
# function.
|
310
301
|
#
|
311
302
|
# rubocop:disable Security/JSONLoad
|
312
|
-
|
313
|
-
|
314
|
-
|
303
|
+
def self.load(string)
|
304
|
+
normalize_keys(JSON.load(string))
|
305
|
+
end
|
315
306
|
# rubocop:enable Security/JSONLoad
|
316
|
-
|
307
|
+
end
|
317
308
|
end
|
318
|
-
|
309
|
+
# rubocop:disable Metrics/ModuleLength
|
@@ -1,6 +1,108 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
namespace :bulkrax do
|
4
|
+
# Usage example: rails bulkrax:generate_test_csvs['5','100','GenericWork']
|
5
|
+
desc 'Generate CSVs with fake data for testing purposes'
|
6
|
+
task :generate_test_csvs, [:num_of_csvs, :csv_rows, :record_type] => :environment do |_t, args|
|
7
|
+
# NOTE: If this line throws an error, run `gem install faker` inside your Docker container
|
8
|
+
require 'faker'
|
9
|
+
require 'csv'
|
10
|
+
|
11
|
+
FileUtils.mkdir_p(Rails.root.join('tmp', 'imports'))
|
12
|
+
|
13
|
+
IGNORED_PROPERTIES = %w[
|
14
|
+
admin_set_id
|
15
|
+
alternate_ids
|
16
|
+
arkivo_checksum
|
17
|
+
created_at
|
18
|
+
date_modified
|
19
|
+
date_uploaded
|
20
|
+
depositor
|
21
|
+
embargo
|
22
|
+
has_model
|
23
|
+
head
|
24
|
+
internal_resource
|
25
|
+
label
|
26
|
+
lease
|
27
|
+
member_ids
|
28
|
+
member_of_collection_ids
|
29
|
+
modified_date
|
30
|
+
new_record
|
31
|
+
on_behalf_of
|
32
|
+
owner
|
33
|
+
proxy_depositor
|
34
|
+
rendering_ids
|
35
|
+
representative_id
|
36
|
+
state
|
37
|
+
tail
|
38
|
+
thumbnail_id
|
39
|
+
updated_at
|
40
|
+
].freeze
|
41
|
+
|
42
|
+
BULKRAX_PROPERTIES = %w[
|
43
|
+
source_identifier
|
44
|
+
model
|
45
|
+
].freeze
|
46
|
+
|
47
|
+
num_of_csvs = args.num_of_csvs.presence&.to_i || 5
|
48
|
+
csv_rows = args.csv_rows.presence&.to_i || 100
|
49
|
+
record_type = args.record_type.presence&.constantize || GenericWork
|
50
|
+
|
51
|
+
csv_header = if Hyrax.config.try(:use_valkyrie?)
|
52
|
+
record_type.schema.map { |k| k.name.to_s }
|
53
|
+
else
|
54
|
+
record_type.properties.keys
|
55
|
+
end
|
56
|
+
|
57
|
+
csv_header -= IGNORED_PROPERTIES
|
58
|
+
csv_header.unshift(*BULKRAX_PROPERTIES)
|
59
|
+
|
60
|
+
num_of_csvs.times do |i|
|
61
|
+
CSV.open(Rails.root.join('tmp', 'imports', "importer_#{i}.csv"), 'wb') do |csv|
|
62
|
+
csv << csv_header
|
63
|
+
csv_rows.times do |_index|
|
64
|
+
row = []
|
65
|
+
csv_header.each do |prop_name|
|
66
|
+
row << case prop_name
|
67
|
+
when 'id', 'source_identifier'
|
68
|
+
Faker::Number.number(digits: 4)
|
69
|
+
when 'model'
|
70
|
+
record_type.to_s
|
71
|
+
when 'rights_statement'
|
72
|
+
'http://rightsstatements.org/vocab/CNE/1.0/'
|
73
|
+
when 'license'
|
74
|
+
'https://creativecommons.org/licenses/by-nc/4.0/'
|
75
|
+
when 'based_near'
|
76
|
+
# FIXME: Set a proper :based_near value
|
77
|
+
nil
|
78
|
+
else
|
79
|
+
Faker::Lorem.sentence
|
80
|
+
end
|
81
|
+
end
|
82
|
+
csv << row
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
num_of_csvs.times do |i|
|
88
|
+
Bulkrax::Importer.create(
|
89
|
+
name: "Generated CSV #{i}",
|
90
|
+
admin_set_id: 'admin_set/default',
|
91
|
+
user_id: User.find_by(email: 'admin@example.com').id,
|
92
|
+
frequency: 'PT0S',
|
93
|
+
parser_klass: 'Bulkrax::CsvParser',
|
94
|
+
parser_fields: {
|
95
|
+
'visibility' => 'open',
|
96
|
+
'rights_statement' => '',
|
97
|
+
'override_rights_statement' => '0',
|
98
|
+
'file_style' => 'Specify a Path on the Server',
|
99
|
+
'import_file_path' => "tmp/imports/importer_#{i}.csv",
|
100
|
+
'update_files' => false
|
101
|
+
}
|
102
|
+
)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
4
106
|
desc "Remove old exported zips and create new ones with the new file structure"
|
5
107
|
task rerun_all_exporters: :environment do
|
6
108
|
# delete the existing folders and zip files
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bulkrax
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.
|
4
|
+
version: 5.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Kaufman
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-10-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|