bulkrax 1.0.2 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/app/controllers/bulkrax/exporters_controller.rb +12 -4
- data/app/controllers/bulkrax/importers_controller.rb +23 -17
- data/app/factories/bulkrax/object_factory.rb +84 -63
- data/app/jobs/bulkrax/create_relationships_job.rb +156 -0
- data/app/jobs/bulkrax/delete_work_job.rb +6 -2
- data/app/jobs/bulkrax/export_work_job.rb +3 -1
- data/app/jobs/bulkrax/exporter_job.rb +1 -0
- data/app/jobs/bulkrax/{import_work_collection_job.rb → import_collection_job.rb} +4 -2
- data/app/jobs/bulkrax/import_file_set_job.rb +69 -0
- data/app/jobs/bulkrax/import_work_job.rb +2 -0
- data/app/jobs/bulkrax/importer_job.rb +18 -1
- data/app/matchers/bulkrax/application_matcher.rb +5 -5
- data/app/models/bulkrax/csv_collection_entry.rb +8 -6
- data/app/models/bulkrax/csv_entry.rb +132 -65
- data/app/models/bulkrax/csv_file_set_entry.rb +26 -0
- data/app/models/bulkrax/entry.rb +19 -8
- data/app/models/bulkrax/exporter.rb +12 -5
- data/app/models/bulkrax/importer.rb +24 -5
- data/app/models/bulkrax/oai_entry.rb +5 -1
- data/app/models/bulkrax/rdf_entry.rb +16 -7
- data/app/models/bulkrax/xml_entry.rb +4 -0
- data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +39 -0
- data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
- data/app/models/concerns/bulkrax/has_matchers.rb +44 -13
- data/app/models/concerns/bulkrax/import_behavior.rb +40 -5
- data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +23 -2
- data/app/models/concerns/bulkrax/status_info.rb +4 -4
- data/app/parsers/bulkrax/application_parser.rb +67 -84
- data/app/parsers/bulkrax/bagit_parser.rb +13 -4
- data/app/parsers/bulkrax/csv_parser.rb +170 -64
- data/app/parsers/bulkrax/oai_dc_parser.rb +6 -3
- data/app/parsers/bulkrax/xml_parser.rb +5 -0
- data/app/views/bulkrax/exporters/_form.html.erb +1 -1
- data/app/views/bulkrax/exporters/show.html.erb +2 -1
- data/app/views/bulkrax/importers/index.html.erb +17 -17
- data/app/views/bulkrax/importers/show.html.erb +52 -6
- data/config/locales/bulkrax.en.yml +1 -0
- data/db/migrate/20190731114016_change_importer_and_exporter_to_polymorphic.rb +5 -1
- data/db/migrate/20211004170708_change_bulkrax_statuses_error_message_column_type_to_text.rb +5 -0
- data/db/migrate/20211203195233_rename_children_counters_to_relationships.rb +6 -0
- data/db/migrate/20211220195027_add_file_set_counters_to_importer_runs.rb +7 -0
- data/db/migrate/20220118001339_add_import_attempts_to_entries.rb +5 -0
- data/db/migrate/20220119213325_add_work_counters_to_importer_runs.rb +6 -0
- data/lib/bulkrax/engine.rb +1 -1
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +9 -17
- data/lib/generators/bulkrax/templates/bin/importer +17 -11
- data/lib/generators/bulkrax/templates/config/bulkrax_api.yml +3 -1
- data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +7 -12
- metadata +22 -10
- data/app/jobs/bulkrax/child_relationships_job.rb +0 -128
data/lib/bulkrax.rb
CHANGED
@@ -5,12 +5,14 @@ require 'active_support/all'
|
|
5
5
|
|
6
6
|
module Bulkrax
|
7
7
|
class << self
|
8
|
+
# TODO: remove collection_field_mapping when releasing v2
|
8
9
|
mattr_accessor :parsers,
|
9
10
|
:default_work_type,
|
10
11
|
:default_field_mapping,
|
11
12
|
:collection_field_mapping,
|
12
13
|
:fill_in_blank_source_identifiers,
|
13
|
-
:
|
14
|
+
:related_children_field_mapping,
|
15
|
+
:related_parents_field_mapping,
|
14
16
|
:reserved_properties,
|
15
17
|
:field_mappings,
|
16
18
|
:import_path,
|
@@ -33,26 +35,16 @@ module Bulkrax
|
|
33
35
|
self.removed_image_path = Bulkrax::Engine.root.join('spec', 'fixtures', 'removed.png').to_s
|
34
36
|
self.server_name = 'bulkrax@example.com'
|
35
37
|
|
36
|
-
#
|
37
|
-
#
|
38
|
-
|
39
|
-
# Field_mapping for establishing a parent-child relationship (FROM parent TO child)
|
40
|
-
# This can be a Collection to Work, or Work to Work relationship
|
41
|
-
# This value IS NOT used for OAI, so setting the OAI Entries here will have no effect
|
42
|
-
# The mapping is supplied per Entry, provide the full class name as a string, eg. 'Bulkrax::CsvEntry'
|
43
|
-
# Example:
|
44
|
-
# {
|
45
|
-
# 'Bulkrax::RdfEntry' => 'http://opaquenamespace.org/ns/contents',
|
46
|
-
# 'Bulkrax::CsvEntry' => 'children'
|
47
|
-
# }
|
48
|
-
# By default no parent-child relationships are added
|
49
|
-
self.parent_child_field_mapping = {}
|
50
|
-
|
38
|
+
# NOTE: Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.
|
39
|
+
# Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.
|
40
|
+
# TODO: remove collection_field_mapping when releasing v2
|
51
41
|
# Field_mapping for establishing a collection relationship (FROM work TO collection)
|
52
42
|
# This value IS NOT used for OAI, so setting the OAI Entries here will have no effect
|
53
43
|
# The mapping is supplied per Entry, provide the full class name as a string, eg. 'Bulkrax::CsvEntry'
|
54
44
|
# The default value for CSV is collection
|
55
|
-
self.collection_field_mapping = {
|
45
|
+
self.collection_field_mapping = {
|
46
|
+
'Bulkrax::CsvEntry' => 'collection'
|
47
|
+
}
|
56
48
|
|
57
49
|
# Hash of Generic field_mappings for use in the view
|
58
50
|
# There must be one field_mappings hash per view parial
|
@@ -6,18 +6,11 @@ require_relative '../config/environment'
|
|
6
6
|
require 'slop'
|
7
7
|
|
8
8
|
def main(opts = {})
|
9
|
-
|
10
|
-
puts 'Missing required parameters'
|
11
|
-
help
|
12
|
-
end
|
13
|
-
|
14
|
-
if opts[:auth_token].blank?
|
15
|
-
puts 'Missing Authentication Token --auth_token'
|
16
|
-
exit
|
17
|
-
end
|
9
|
+
check_required_params
|
18
10
|
|
19
11
|
update = opts[:importer_id].present?
|
20
|
-
|
12
|
+
port = opts[:port].presence
|
13
|
+
url = build_url(opts.delete(:importer_id), opts.delete(:url), port)
|
21
14
|
|
22
15
|
headers = { 'Content-Type' => 'application/json' }
|
23
16
|
headers['Authorization'] = "Token: #{opts.delete(:auth_token)}"
|
@@ -43,6 +36,18 @@ def main(opts = {})
|
|
43
36
|
puts "#{response.status} - #{response.body.truncate(200)}"
|
44
37
|
end
|
45
38
|
|
39
|
+
def check_required_params
|
40
|
+
if opts[:importer_id].blank? && invalid?(opts)
|
41
|
+
puts 'Missing required parameters'
|
42
|
+
help
|
43
|
+
end
|
44
|
+
|
45
|
+
if opts[:auth_token].blank? # rubocop:disable Style/GuardClause
|
46
|
+
puts 'Missing Authentication Token --auth_token'
|
47
|
+
exit
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
46
51
|
def invalid?(opts)
|
47
52
|
required_params.each do |p|
|
48
53
|
return true if opts[p.to_sym].blank?
|
@@ -74,11 +79,12 @@ def build_params(opts = {})
|
|
74
79
|
return params.compact
|
75
80
|
end
|
76
81
|
|
77
|
-
def build_url(importer_id, url)
|
82
|
+
def build_url(importer_id, url, port = nil)
|
78
83
|
if url.nil?
|
79
84
|
protocol = Rails.application.config.force_ssl ? 'https://' : 'http://'
|
80
85
|
host = Rails.application.config.action_mailer.default_url_options[:host]
|
81
86
|
url = "#{protocol}#{host}"
|
87
|
+
url = "#{url}:#{port}" if port
|
82
88
|
end
|
83
89
|
path = Bulkrax::Engine.routes.url_helpers.polymorphic_path(Bulkrax::Importer)
|
84
90
|
url = File.join(url, path)
|
@@ -81,4 +81,6 @@ bulkrax:
|
|
81
81
|
- "Update and Import (importer has not yet been run)"
|
82
82
|
auth_token:
|
83
83
|
definition: 'Authentication token. Required for JSON requests only.'
|
84
|
-
required: true
|
84
|
+
required: true
|
85
|
+
port:
|
86
|
+
definition: 'Port to use in http request. Defaults to 80. May need to set to 3000 when running in the development environment'
|
@@ -19,18 +19,8 @@ Bulkrax.setup do |config|
|
|
19
19
|
# Server name for oai request header
|
20
20
|
# config.server_name = 'my_server@name.com'
|
21
21
|
|
22
|
-
#
|
23
|
-
#
|
24
|
-
# This value IS NOT used for OAI, so setting the OAI Entries here will have no effect
|
25
|
-
# The mapping is supplied per Entry, provide the full class name as a string, eg. 'Bulkrax::CsvEntry'
|
26
|
-
# Example:
|
27
|
-
# {
|
28
|
-
# 'Bulkrax::RdfEntry' => 'http://opaquenamespace.org/ns/contents',
|
29
|
-
# 'Bulkrax::CsvEntry' => 'children'
|
30
|
-
# }
|
31
|
-
# By default no parent-child relationships are added
|
32
|
-
# config.parent_child_field_mapping = { }
|
33
|
-
|
22
|
+
# NOTE: Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.
|
23
|
+
# Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.
|
34
24
|
# Field_mapping for establishing a collection relationship (FROM work TO collection)
|
35
25
|
# This value IS NOT used for OAI, so setting the OAI parser here will have no effect
|
36
26
|
# The mapping is supplied per Entry, provide the full class name as a string, eg. 'Bulkrax::CsvEntry'
|
@@ -48,6 +38,11 @@ Bulkrax.setup do |config|
|
|
48
38
|
# e.g. to exclude date
|
49
39
|
# config.field_mappings["Bulkrax::OaiDcParser"]["date"] = { from: ["date"], excluded: true }
|
50
40
|
#
|
41
|
+
# e.g. to import parent-child relationships
|
42
|
+
# config.field_mappings['Bulkrax::CsvParser']['parents'] = { from: ['parents'], related_parents_field_mapping: true }
|
43
|
+
# config.field_mappings['Bulkrax::CsvParser']['children'] = { from: ['children'], related_children_field_mapping: true }
|
44
|
+
# (For more info on importing relationships, see Bulkrax Wiki: https://github.com/samvera-labs/bulkrax/wiki/Configuring-Bulkrax#parent-child-relationship-field-mappings)
|
45
|
+
#
|
51
46
|
# # e.g. to add the required source_identifier field
|
52
47
|
# # config.field_mappings["Bulkrax::CsvParser"]["source_id"] = { from: ["old_source_id"], source_identifier: true }
|
53
48
|
# If you want Bulkrax to fill in source_identifiers for you, see below
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bulkrax
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 2.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Kaufman
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-02-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|
@@ -224,7 +224,11 @@ dependencies:
|
|
224
224
|
- - ">="
|
225
225
|
- !ruby/object:Gem::Version
|
226
226
|
version: '0'
|
227
|
-
description:
|
227
|
+
description: Bulkrax is a batteries included importer for Samvera applications. It
|
228
|
+
currently includes support for OAI-PMH (DC and Qualified DC) and CSV out of the
|
229
|
+
box. It is also designed to be extensible, allowing you to easily add new importers
|
230
|
+
in to your application or to include them with other gems. Bulkrax provides a full
|
231
|
+
admin interface including creating, editing, scheduling and reviewing imports.
|
228
232
|
email:
|
229
233
|
- rob@notch8.com
|
230
234
|
executables: []
|
@@ -255,12 +259,13 @@ files:
|
|
255
259
|
- app/helpers/bulkrax/importers_helper.rb
|
256
260
|
- app/helpers/bulkrax/validation_helper.rb
|
257
261
|
- app/jobs/bulkrax/application_job.rb
|
258
|
-
- app/jobs/bulkrax/
|
262
|
+
- app/jobs/bulkrax/create_relationships_job.rb
|
259
263
|
- app/jobs/bulkrax/delete_work_job.rb
|
260
264
|
- app/jobs/bulkrax/download_cloud_file_job.rb
|
261
265
|
- app/jobs/bulkrax/export_work_job.rb
|
262
266
|
- app/jobs/bulkrax/exporter_job.rb
|
263
|
-
- app/jobs/bulkrax/
|
267
|
+
- app/jobs/bulkrax/import_collection_job.rb
|
268
|
+
- app/jobs/bulkrax/import_file_set_job.rb
|
264
269
|
- app/jobs/bulkrax/import_work_job.rb
|
265
270
|
- app/jobs/bulkrax/importer_job.rb
|
266
271
|
- app/mailers/bulkrax/application_mailer.rb
|
@@ -271,6 +276,7 @@ files:
|
|
271
276
|
- app/models/bulkrax/application_record.rb
|
272
277
|
- app/models/bulkrax/csv_collection_entry.rb
|
273
278
|
- app/models/bulkrax/csv_entry.rb
|
279
|
+
- app/models/bulkrax/csv_file_set_entry.rb
|
274
280
|
- app/models/bulkrax/entry.rb
|
275
281
|
- app/models/bulkrax/exporter.rb
|
276
282
|
- app/models/bulkrax/exporter_run.rb
|
@@ -286,6 +292,7 @@ files:
|
|
286
292
|
- app/models/bulkrax/status.rb
|
287
293
|
- app/models/bulkrax/xml_entry.rb
|
288
294
|
- app/models/concerns/bulkrax/download_behavior.rb
|
295
|
+
- app/models/concerns/bulkrax/dynamic_record_lookup.rb
|
289
296
|
- app/models/concerns/bulkrax/errored_entries.rb
|
290
297
|
- app/models/concerns/bulkrax/export_behavior.rb
|
291
298
|
- app/models/concerns/bulkrax/file_factory.rb
|
@@ -351,6 +358,11 @@ files:
|
|
351
358
|
- db/migrate/20201117220007_add_workflow_status_to_bulkrax_exporter.rb
|
352
359
|
- db/migrate/20210806044408_remove_unused_last_error.rb
|
353
360
|
- db/migrate/20210806065737_increase_text_sizes.rb
|
361
|
+
- db/migrate/20211004170708_change_bulkrax_statuses_error_message_column_type_to_text.rb
|
362
|
+
- db/migrate/20211203195233_rename_children_counters_to_relationships.rb
|
363
|
+
- db/migrate/20211220195027_add_file_set_counters_to_importer_runs.rb
|
364
|
+
- db/migrate/20220118001339_add_import_attempts_to_entries.rb
|
365
|
+
- db/migrate/20220119213325_add_work_counters_to_importer_runs.rb
|
354
366
|
- lib/bulkrax.rb
|
355
367
|
- lib/bulkrax/engine.rb
|
356
368
|
- lib/bulkrax/version.rb
|
@@ -366,7 +378,7 @@ homepage: https://github.com/samvera-labs/bulkrax
|
|
366
378
|
licenses:
|
367
379
|
- Apache-2.0
|
368
380
|
metadata: {}
|
369
|
-
post_install_message:
|
381
|
+
post_install_message:
|
370
382
|
rdoc_options: []
|
371
383
|
require_paths:
|
372
384
|
- lib
|
@@ -381,8 +393,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
381
393
|
- !ruby/object:Gem::Version
|
382
394
|
version: '0'
|
383
395
|
requirements: []
|
384
|
-
rubygems_version: 3.1.
|
385
|
-
signing_key:
|
396
|
+
rubygems_version: 3.1.4
|
397
|
+
signing_key:
|
386
398
|
specification_version: 4
|
387
|
-
summary:
|
399
|
+
summary: Import and export tool for Hyrax and Hyku
|
388
400
|
test_files: []
|
@@ -1,128 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Bulkrax
|
4
|
-
class ChildWorksError < RuntimeError; end
|
5
|
-
class ChildRelationshipsJob < ApplicationJob
|
6
|
-
queue_as :import
|
7
|
-
|
8
|
-
def perform(*args)
|
9
|
-
@args = args
|
10
|
-
|
11
|
-
if entry.factory_class == Collection
|
12
|
-
collection_membership
|
13
|
-
else
|
14
|
-
work_membership
|
15
|
-
end
|
16
|
-
# Not all of the Works/Collections exist yet; reschedule
|
17
|
-
rescue Bulkrax::ChildWorksError
|
18
|
-
reschedule(args[0], args[1], args[2])
|
19
|
-
end
|
20
|
-
|
21
|
-
def collection_membership
|
22
|
-
# add collection to works
|
23
|
-
member_of_collection = []
|
24
|
-
child_works_hash.each { |k, v| member_of_collection << k if v[:class_name] != 'Collection' }
|
25
|
-
member_of_collection.each { |work| work_child_collection_parent(work) }
|
26
|
-
|
27
|
-
# add collections to collection
|
28
|
-
members_collections = []
|
29
|
-
child_works_hash.each { |k, v| members_collections << k if v[:class_name] == 'Collection' }
|
30
|
-
collection_parent_collection_child(members_collections) if members_collections.present?
|
31
|
-
end
|
32
|
-
|
33
|
-
def work_membership
|
34
|
-
# add works to work
|
35
|
-
# reject any Collections, they can't be children of Works
|
36
|
-
members_works = []
|
37
|
-
# reject any Collections, they can't be children of Works
|
38
|
-
child_works_hash.each { |k, v| members_works << k if v[:class_name] != 'Collection' }
|
39
|
-
if members_works.length < child_entries.length # rubocop:disable Style/IfUnlessModifier
|
40
|
-
Rails.logger.warn("Cannot add collections as children of works: #{(@child_entries.length - members_works.length)} collections were discarded for parent entry #{@entry.id} (of #{@child_entries.length})")
|
41
|
-
end
|
42
|
-
work_parent_work_child(members_works) if members_works.present?
|
43
|
-
end
|
44
|
-
|
45
|
-
def entry
|
46
|
-
@entry ||= Bulkrax::Entry.find(@args[0])
|
47
|
-
end
|
48
|
-
|
49
|
-
def child_entries
|
50
|
-
@child_entries ||= @args[1].map { |e| Bulkrax::Entry.find(e) }
|
51
|
-
end
|
52
|
-
|
53
|
-
def child_works_hash
|
54
|
-
@child_works_hash ||= child_entries.each_with_object({}) do |child_entry, hash|
|
55
|
-
work = child_entry.factory.find
|
56
|
-
# If we can't find the Work/Collection, raise a custom error
|
57
|
-
raise ChildWorksError if work.blank?
|
58
|
-
hash[work.id] = { class_name: work.class.to_s, entry.parser.source_identifier => child_entry.identifier }
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
def importer_run_id
|
63
|
-
@args[2]
|
64
|
-
end
|
65
|
-
|
66
|
-
def user
|
67
|
-
@user ||= entry.importerexporter.user
|
68
|
-
end
|
69
|
-
|
70
|
-
private
|
71
|
-
|
72
|
-
# rubocop:disable Rails/SkipsModelValidations
|
73
|
-
# Work-Collection membership is added to the child as member_of_collection_ids
|
74
|
-
# This is adding the reverse relatinship, from the child to the parent
|
75
|
-
def work_child_collection_parent(work_id)
|
76
|
-
attrs = { id: work_id, collections: [{ id: entry&.factory&.find&.id }] }
|
77
|
-
Bulkrax::ObjectFactory.new(attributes: attrs,
|
78
|
-
source_identifier_value: child_works_hash[work_id][entry.parser.source_identifier],
|
79
|
-
work_identifier: entry.parser.work_identifier,
|
80
|
-
replace_files: false,
|
81
|
-
user: user,
|
82
|
-
klass: child_works_hash[work_id][:class_name].constantize).run
|
83
|
-
ImporterRun.find(importer_run_id).increment!(:processed_children)
|
84
|
-
rescue StandardError => e
|
85
|
-
entry.status_info(e)
|
86
|
-
ImporterRun.find(importer_run_id).increment!(:failed_children)
|
87
|
-
end
|
88
|
-
|
89
|
-
# Collection-Collection membership is added to the as member_ids
|
90
|
-
def collection_parent_collection_child(member_ids)
|
91
|
-
attrs = { id: entry&.factory&.find&.id, children: member_ids }
|
92
|
-
Bulkrax::ObjectFactory.new(attributes: attrs,
|
93
|
-
source_identifier_value: entry.identifier,
|
94
|
-
work_identifier: entry.parser.work_identifier,
|
95
|
-
replace_files: false,
|
96
|
-
user: user,
|
97
|
-
klass: entry.factory_class).run
|
98
|
-
ImporterRun.find(importer_run_id).increment!(:processed_children)
|
99
|
-
rescue StandardError => e
|
100
|
-
entry.status_info(e)
|
101
|
-
ImporterRun.find(importer_run_id).increment!(:failed_children)
|
102
|
-
end
|
103
|
-
|
104
|
-
# Work-Work membership is added to the parent as member_ids
|
105
|
-
def work_parent_work_child(member_ids)
|
106
|
-
# build work_members_attributes
|
107
|
-
attrs = { id: entry&.factory&.find&.id,
|
108
|
-
work_members_attributes: member_ids.each.with_index.each_with_object({}) do |(member, index), ids|
|
109
|
-
ids[index] = { id: member }
|
110
|
-
end }
|
111
|
-
Bulkrax::ObjectFactory.new(attributes: attrs,
|
112
|
-
source_identifier_value: entry.identifier,
|
113
|
-
work_identifier: entry.parser.work_identifier,
|
114
|
-
replace_files: false,
|
115
|
-
user: user,
|
116
|
-
klass: entry.factory_class).run
|
117
|
-
ImporterRun.find(importer_run_id).increment!(:processed_children)
|
118
|
-
rescue StandardError => e
|
119
|
-
entry.status_info(e)
|
120
|
-
ImporterRun.find(importer_run_id).increment!(:failed_children)
|
121
|
-
end
|
122
|
-
# rubocop:enable Rails/SkipsModelValidations
|
123
|
-
|
124
|
-
def reschedule(entry_id, child_entry_ids, importer_run_id)
|
125
|
-
ChildRelationshipsJob.set(wait: 10.minutes).perform_later(entry_id, child_entry_ids, importer_run_id)
|
126
|
-
end
|
127
|
-
end
|
128
|
-
end
|