bulkrax 3.1.0 → 3.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +3 -4
- data/app/jobs/bulkrax/create_relationships_job.rb +4 -6
- data/app/jobs/bulkrax/import_collection_job.rb +5 -3
- data/app/jobs/bulkrax/import_file_set_job.rb +4 -4
- data/app/models/bulkrax/csv_file_set_entry.rb +1 -26
- data/app/models/bulkrax/rdf_file_set_entry.rb +7 -0
- data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +32 -0
- data/app/parsers/bulkrax/bagit_parser.rb +5 -0
- data/app/parsers/bulkrax/csv_parser.rb +1 -1
- data/lib/bulkrax/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 41dde3161532c80ff433be4697fa6c42f0f453dfea3547181c44914a68e3b466
|
4
|
+
data.tar.gz: 67473e62b537f71aa77aa664c361ba1e86c60e77451e4813a44597a9f465010b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3859148384111048a2a2c096cee9bde5a9c78cb8e5a73b5ddb1d31c3ddefe7a221d1ebff97e3b1f96f09e746ac60b3ec1c14e615ef162fe1a5e16f883a3a7ecf
|
7
|
+
data.tar.gz: bbc83265d59d5026546e6e92ccab3b93d6d4bf35ae08145ab6b27112ed587449572d763fdf3d876ec391dfce43d8b5fa96154e8507053ff2fac7b81bc2409580
|
data/README.md
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
# Bulkrax
|
2
2
|
Bulkrax is a batteries included importer for Samvera applications. It currently includes support for OAI-PMH (DC and Qualified DC) and CSV out of the box. It is also designed to be extensible, allowing you to easily add new importers in to your application or to include them with other gems. Bulkrax provides a full admin interface including creating, editing, scheduling and reviewing imports.
|
3
3
|
|
4
|
-
|
5
4
|
## Installation
|
6
5
|
|
7
6
|
### Install Generator
|
@@ -9,9 +8,9 @@ Bulkrax is a batteries included importer for Samvera applications. It currently
|
|
9
8
|
Add this line to your application's Gemfile:
|
10
9
|
|
11
10
|
```ruby
|
12
|
-
gem 'bulkrax'
|
11
|
+
gem 'bulkrax'
|
13
12
|
# or if using from github
|
14
|
-
gem 'bulkrax', git: 'https://github.com/samvera-labs/bulkrax.git'
|
13
|
+
gem 'bulkrax', git: 'https://github.com/samvera-labs/bulkrax.git', branch: 'main'
|
15
14
|
```
|
16
15
|
|
17
16
|
And then execute:
|
@@ -27,7 +26,7 @@ If using Sidekiq, set up queues for `import` and `export`.
|
|
27
26
|
Add this line to your application's Gemfile:
|
28
27
|
|
29
28
|
```ruby
|
30
|
-
gem 'bulkrax'
|
29
|
+
gem 'bulkrax'
|
31
30
|
```
|
32
31
|
|
33
32
|
And then execute:
|
@@ -46,18 +46,16 @@ module Bulkrax
|
|
46
46
|
end
|
47
47
|
|
48
48
|
if (child_records[:collections].blank? && child_records[:works].blank?) || parent_record.blank?
|
49
|
-
reschedule(
|
50
|
-
parent_identifier: parent_identifier,
|
51
|
-
importer_run_id: importer_run_id
|
52
|
-
)
|
49
|
+
reschedule({ parent_identifier: parent_identifier, importer_run_id: importer_run_id })
|
53
50
|
return false # stop current job from continuing to run after rescheduling
|
54
51
|
end
|
55
|
-
|
52
|
+
importer_id = ImporterRun.find(importer_run_id).importer_id
|
56
53
|
@parent_entry ||= Bulkrax::Entry.where(identifier: parent_identifier,
|
57
|
-
importerexporter_id:
|
54
|
+
importerexporter_id: importer_id,
|
58
55
|
importerexporter_type: "Bulkrax::Importer").first
|
59
56
|
create_relationships
|
60
57
|
pending_relationships.each(&:destroy)
|
58
|
+
Bulkrax::Importer.find(importer_id).record_status
|
61
59
|
rescue ::StandardError => e
|
62
60
|
parent_entry ? parent_entry.status_info(e) : child_entry.status_info(e)
|
63
61
|
Bulkrax::ImporterRun.find(importer_run_id).increment!(:failed_relationships) # rubocop:disable Rails/SkipsModelValidations
|
@@ -9,16 +9,18 @@ module Bulkrax
|
|
9
9
|
entry = Entry.find(args[0])
|
10
10
|
begin
|
11
11
|
entry.build
|
12
|
-
entry.save
|
12
|
+
entry.save!
|
13
13
|
ImporterRun.find(args[1]).increment!(:processed_records)
|
14
14
|
ImporterRun.find(args[1]).increment!(:processed_collections)
|
15
|
-
ImporterRun.find(args[1]).decrement!(:enqueued_records)
|
15
|
+
ImporterRun.find(args[1]).decrement!(:enqueued_records) unless ImporterRun.find(args[1]).enqueued_records <= 0 # rubocop:disable Style/IdenticalConditionalBranches
|
16
16
|
rescue => e
|
17
17
|
ImporterRun.find(args[1]).increment!(:failed_records)
|
18
18
|
ImporterRun.find(args[1]).increment!(:failed_collections)
|
19
|
-
ImporterRun.find(args[1]).decrement!(:enqueued_records)
|
19
|
+
ImporterRun.find(args[1]).decrement!(:enqueued_records) unless ImporterRun.find(args[1]).enqueued_records <= 0 # rubocop:disable Style/IdenticalConditionalBranches
|
20
20
|
raise e
|
21
21
|
end
|
22
|
+
entry.importer.current_run = ImporterRun.find(args[1])
|
23
|
+
entry.importer.record_status
|
22
24
|
end
|
23
25
|
# rubocop:enable Rails/SkipsModelValidations
|
24
26
|
end
|
@@ -26,17 +26,17 @@ module Bulkrax
|
|
26
26
|
ImporterRun.find(importer_run_id).increment!(:failed_file_sets)
|
27
27
|
# rubocop:enable Rails/SkipsModelValidations
|
28
28
|
end
|
29
|
-
ImporterRun.find(importer_run_id).decrement!(:enqueued_records) # rubocop:disable Rails/SkipsModelValidations
|
29
|
+
ImporterRun.find(importer_run_id).decrement!(:enqueued_records) unless ImporterRun.find(importer_run_id).enqueued_records <= 0 # rubocop:disable Rails/SkipsModelValidations
|
30
30
|
entry.save!
|
31
|
+
entry.importer.current_run = ImporterRun.find(importer_run_id)
|
32
|
+
entry.importer.record_status
|
31
33
|
|
32
34
|
rescue MissingParentError => e
|
33
35
|
# try waiting for the parent record to be created
|
34
36
|
entry.import_attempts += 1
|
35
37
|
entry.save!
|
36
38
|
if entry.import_attempts < 5
|
37
|
-
ImportFileSetJob
|
38
|
-
.set(wait: (entry.import_attempts + 1).minutes)
|
39
|
-
.perform_later(entry_id, importer_run_id)
|
39
|
+
ImportFileSetJob.set(wait: (entry.import_attempts + 1).minutes).perform_later(entry_id, importer_run_id)
|
40
40
|
else
|
41
41
|
ImporterRun.find(importer_run_id).decrement!(:enqueued_records) # rubocop:disable Rails/SkipsModelValidations
|
42
42
|
entry.status_info(e)
|
@@ -2,31 +2,6 @@
|
|
2
2
|
|
3
3
|
module Bulkrax
|
4
4
|
class CsvFileSetEntry < CsvEntry
|
5
|
-
|
6
|
-
::FileSet
|
7
|
-
end
|
8
|
-
|
9
|
-
def add_path_to_file
|
10
|
-
parsed_metadata['file'].each_with_index do |filename, i|
|
11
|
-
path_to_file = ::File.join(parser.path_to_files, filename)
|
12
|
-
|
13
|
-
parsed_metadata['file'][i] = path_to_file
|
14
|
-
end
|
15
|
-
raise ::StandardError, "one or more file paths are invalid: #{parsed_metadata['file'].join(', ')}" unless parsed_metadata['file'].map { |file_path| ::File.file?(file_path) }.all?
|
16
|
-
|
17
|
-
parsed_metadata['file']
|
18
|
-
end
|
19
|
-
|
20
|
-
def validate_presence_of_filename!
|
21
|
-
return if parsed_metadata&.[]('file')&.map(&:present?)&.any?
|
22
|
-
|
23
|
-
raise StandardError, 'File set must have a filename'
|
24
|
-
end
|
25
|
-
|
26
|
-
def validate_presence_of_parent!
|
27
|
-
return if parsed_metadata[related_parents_parsed_mapping]&.map(&:present?)&.any?
|
28
|
-
|
29
|
-
raise StandardError, 'File set must be related to at least one work'
|
30
|
-
end
|
5
|
+
include FileSetEntryBehavior
|
31
6
|
end
|
32
7
|
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
module FileSetEntryBehavior
|
5
|
+
def factory_class
|
6
|
+
::FileSet
|
7
|
+
end
|
8
|
+
|
9
|
+
def add_path_to_file
|
10
|
+
parsed_metadata['file'].each_with_index do |filename, i|
|
11
|
+
path_to_file = ::File.join(parser.path_to_files, filename)
|
12
|
+
|
13
|
+
parsed_metadata['file'][i] = path_to_file
|
14
|
+
end
|
15
|
+
raise ::StandardError, "one or more file paths are invalid: #{parsed_metadata['file'].join(', ')}" unless parsed_metadata['file'].map { |file_path| ::File.file?(file_path) }.all?
|
16
|
+
|
17
|
+
parsed_metadata['file']
|
18
|
+
end
|
19
|
+
|
20
|
+
def validate_presence_of_filename!
|
21
|
+
return if parsed_metadata&.[]('file')&.map(&:present?)&.any?
|
22
|
+
|
23
|
+
raise StandardError, 'File set must have a filename'
|
24
|
+
end
|
25
|
+
|
26
|
+
def validate_presence_of_parent!
|
27
|
+
return if parsed_metadata[related_parents_parsed_mapping]&.map(&:present?)&.any?
|
28
|
+
|
29
|
+
raise StandardError, 'File set must be related to at least one work'
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -23,6 +23,11 @@ module Bulkrax
|
|
23
23
|
Entry
|
24
24
|
end
|
25
25
|
|
26
|
+
def file_set_entry_class
|
27
|
+
csv_format = Bulkrax::Importer.last.parser_fields['metadata_format'] == "Bulkrax::CsvEntry"
|
28
|
+
csv_format ? CsvFileSetEntry : RdfFileSetEntry
|
29
|
+
end
|
30
|
+
|
26
31
|
# Take a random sample of 10 metadata_paths and work out the import fields from that
|
27
32
|
def import_fields
|
28
33
|
raise StandardError, 'No metadata files were found' if metadata_paths.blank?
|
@@ -217,7 +217,7 @@ module Bulkrax
|
|
217
217
|
instance_variable_set(instance_var, ActiveFedora::SolrService.post(
|
218
218
|
extra_filters.to_s,
|
219
219
|
fq: [
|
220
|
-
|
220
|
+
%(#{::Solrizer.solr_name(work_identifier)}:("#{complete_entry_identifiers.join('" OR "')}")),
|
221
221
|
"has_model_ssim:(#{models_to_search.join(' OR ')})"
|
222
222
|
],
|
223
223
|
fl: 'id',
|
data/lib/bulkrax/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bulkrax
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.1.
|
4
|
+
version: 3.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Kaufman
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-05-
|
11
|
+
date: 2022-05-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|
@@ -291,6 +291,7 @@ files:
|
|
291
291
|
- app/models/bulkrax/pending_relationship.rb
|
292
292
|
- app/models/bulkrax/rdf_collection_entry.rb
|
293
293
|
- app/models/bulkrax/rdf_entry.rb
|
294
|
+
- app/models/bulkrax/rdf_file_set_entry.rb
|
294
295
|
- app/models/bulkrax/status.rb
|
295
296
|
- app/models/bulkrax/xml_entry.rb
|
296
297
|
- app/models/concerns/bulkrax/download_behavior.rb
|
@@ -298,6 +299,7 @@ files:
|
|
298
299
|
- app/models/concerns/bulkrax/errored_entries.rb
|
299
300
|
- app/models/concerns/bulkrax/export_behavior.rb
|
300
301
|
- app/models/concerns/bulkrax/file_factory.rb
|
302
|
+
- app/models/concerns/bulkrax/file_set_entry_behavior.rb
|
301
303
|
- app/models/concerns/bulkrax/has_local_processing.rb
|
302
304
|
- app/models/concerns/bulkrax/has_matchers.rb
|
303
305
|
- app/models/concerns/bulkrax/import_behavior.rb
|