bulkrax 2.0.2 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/controllers/bulkrax/importers_controller.rb +1 -0
- data/app/factories/bulkrax/object_factory.rb +51 -7
- data/app/jobs/bulkrax/create_relationships_job.rb +2 -33
- data/app/jobs/bulkrax/import_collection_job.rb +2 -0
- data/app/jobs/bulkrax/import_file_set_job.rb +69 -0
- data/app/jobs/bulkrax/import_work_job.rb +2 -0
- data/app/jobs/bulkrax/importer_job.rb +2 -0
- data/app/models/bulkrax/csv_entry.rb +9 -2
- data/app/models/bulkrax/csv_file_set_entry.rb +26 -0
- data/app/models/bulkrax/importer.rb +13 -1
- data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +39 -0
- data/app/models/concerns/bulkrax/import_behavior.rb +1 -0
- data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +4 -2
- data/app/parsers/bulkrax/application_parser.rb +8 -0
- data/app/parsers/bulkrax/bagit_parser.rb +1 -1
- data/app/parsers/bulkrax/csv_parser.rb +34 -2
- data/app/parsers/bulkrax/oai_dc_parser.rb +1 -1
- data/app/views/bulkrax/importers/index.html.erb +8 -6
- data/app/views/bulkrax/importers/show.html.erb +46 -4
- data/db/migrate/20211220195027_add_file_set_counters_to_importer_runs.rb +7 -0
- data/db/migrate/20220118001339_add_import_attempts_to_entries.rb +5 -0
- data/db/migrate/20220119213325_add_work_counters_to_importer_runs.rb +6 -0
- data/lib/bulkrax/version.rb +1 -1
- metadata +12 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5a5ca12cf97da052d9855404c783d52e41679587332ac72aac25d85237cf8231
|
4
|
+
data.tar.gz: 449209843d001cdac64dd4073db130920bb62a3193b2eefd53b94f2ec15f6a08
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 296ef3a7a7af8765aaae12353bec7ecfb35a97546955520ac544269525adfe459f3e686886fefbff54c91ba09f7da20c3b6b8c62bcb3fc7be6970f7967defa17
|
7
|
+
data.tar.gz: e7a13f384940a89d42c5efb5d52b8e940939c103dceb35890e08cb56ee020c2c664d1bb4d57945ae9d7afb2acd822385861e004ab7d69c776a0816d688cf66e1
|
@@ -37,6 +37,7 @@ module Bulkrax
|
|
37
37
|
|
38
38
|
@work_entries = @importer.entries.where(type: @importer.parser.entry_class.to_s).page(params[:work_entries_page]).per(30)
|
39
39
|
@collection_entries = @importer.entries.where(type: @importer.parser.collection_entry_class.to_s).page(params[:collections_entries_page]).per(30)
|
40
|
+
@file_set_entries = @importer.entries.where(type: @importer.parser.file_set_entry_class.to_s).page(params[:file_set_entries_page]).per(30)
|
40
41
|
end
|
41
42
|
end
|
42
43
|
|
@@ -4,11 +4,13 @@ module Bulkrax
|
|
4
4
|
class ObjectFactory
|
5
5
|
extend ActiveModel::Callbacks
|
6
6
|
include Bulkrax::FileFactory
|
7
|
+
include DynamicRecordLookup
|
8
|
+
|
7
9
|
define_model_callbacks :save, :create
|
8
|
-
attr_reader :attributes, :object, :source_identifier_value, :klass, :replace_files, :update_files, :work_identifier, :collection_field_mapping
|
10
|
+
attr_reader :attributes, :object, :source_identifier_value, :klass, :replace_files, :update_files, :work_identifier, :collection_field_mapping, :related_parents_parsed_mapping
|
9
11
|
|
10
12
|
# rubocop:disable Metrics/ParameterLists
|
11
|
-
def initialize(attributes:, source_identifier_value:, work_identifier:, collection_field_mapping:, replace_files: false, user: nil, klass: nil, update_files: false)
|
13
|
+
def initialize(attributes:, source_identifier_value:, work_identifier:, collection_field_mapping:, related_parents_parsed_mapping: nil, replace_files: false, user: nil, klass: nil, update_files: false)
|
12
14
|
ActiveSupport::Deprecation.warn(
|
13
15
|
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
14
16
|
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
@@ -19,6 +21,7 @@ module Bulkrax
|
|
19
21
|
@user = user || User.batch_user
|
20
22
|
@work_identifier = work_identifier
|
21
23
|
@collection_field_mapping = collection_field_mapping
|
24
|
+
@related_parents_parsed_mapping = related_parents_parsed_mapping
|
22
25
|
@source_identifier_value = source_identifier_value
|
23
26
|
@klass = klass || Bulkrax.default_work_type.constantize
|
24
27
|
end
|
@@ -33,7 +36,7 @@ module Bulkrax
|
|
33
36
|
arg_hash = { id: attributes[:id], name: 'UPDATE', klass: klass }
|
34
37
|
@object = find
|
35
38
|
if object
|
36
|
-
object.reindex_extent = Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX
|
39
|
+
object.reindex_extent = Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX if object.respond_to?(:reindex_extent)
|
37
40
|
ActiveSupport::Notifications.instrument('import.importer', arg_hash) { update }
|
38
41
|
else
|
39
42
|
ActiveSupport::Notifications.instrument('import.importer', arg_hash.merge(name: 'CREATE')) { create }
|
@@ -51,10 +54,16 @@ module Bulkrax
|
|
51
54
|
|
52
55
|
def update
|
53
56
|
raise "Object doesn't exist" unless object
|
54
|
-
destroy_existing_files if @replace_files && klass
|
57
|
+
destroy_existing_files if @replace_files && ![Collection, FileSet].include?(klass)
|
55
58
|
attrs = attribute_update
|
56
59
|
run_callbacks :save do
|
57
|
-
klass == Collection
|
60
|
+
if klass == Collection
|
61
|
+
update_collection(attrs)
|
62
|
+
elsif klass == FileSet
|
63
|
+
update_file_set(attrs)
|
64
|
+
else
|
65
|
+
work_actor.update(environment(attrs))
|
66
|
+
end
|
58
67
|
end
|
59
68
|
log_updated(object)
|
60
69
|
end
|
@@ -90,10 +99,16 @@ module Bulkrax
|
|
90
99
|
def create
|
91
100
|
attrs = create_attributes
|
92
101
|
@object = klass.new
|
93
|
-
object.reindex_extent = Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX
|
102
|
+
object.reindex_extent = Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX if object.respond_to?(:reindex_extent)
|
94
103
|
run_callbacks :save do
|
95
104
|
run_callbacks :create do
|
96
|
-
klass == Collection
|
105
|
+
if klass == Collection
|
106
|
+
create_collection(attrs)
|
107
|
+
elsif klass == FileSet
|
108
|
+
create_file_set(attrs)
|
109
|
+
else
|
110
|
+
work_actor.create(environment(attrs))
|
111
|
+
end
|
97
112
|
end
|
98
113
|
end
|
99
114
|
log_created(object)
|
@@ -150,6 +165,35 @@ module Bulkrax
|
|
150
165
|
object.save!
|
151
166
|
end
|
152
167
|
|
168
|
+
# This method is heavily inspired by Hyrax's AttachFilesToWorkJob
|
169
|
+
def create_file_set(attrs)
|
170
|
+
work = find_record(attributes[related_parents_parsed_mapping].first)
|
171
|
+
work_permissions = work.permissions.map(&:to_hash)
|
172
|
+
file_set_attrs = attrs.slice(*object.attributes.keys)
|
173
|
+
object.assign_attributes(file_set_attrs)
|
174
|
+
|
175
|
+
attrs['uploaded_files'].each do |uploaded_file_id|
|
176
|
+
uploaded_file = ::Hyrax::UploadedFile.find(uploaded_file_id)
|
177
|
+
next if uploaded_file.file_set_uri.present?
|
178
|
+
|
179
|
+
actor = ::Hyrax::Actors::FileSetActor.new(object, @user)
|
180
|
+
uploaded_file.update(file_set_uri: actor.file_set.uri)
|
181
|
+
actor.file_set.permissions_attributes = work_permissions
|
182
|
+
actor.create_metadata
|
183
|
+
actor.create_content(uploaded_file)
|
184
|
+
actor.attach_to_work(work)
|
185
|
+
end
|
186
|
+
|
187
|
+
object.save!
|
188
|
+
end
|
189
|
+
|
190
|
+
def update_file_set(attrs)
|
191
|
+
file_set_attrs = attrs.slice(*object.attributes.keys)
|
192
|
+
actor = ::Hyrax::Actors::FileSetActor.new(object, @user)
|
193
|
+
|
194
|
+
actor.update_metadata(file_set_attrs)
|
195
|
+
end
|
196
|
+
|
153
197
|
# Add child to parent's #member_collections
|
154
198
|
# Add parent to child's #member_of_collections
|
155
199
|
def persist_collection_memberships(parent:, child:)
|
@@ -17,6 +17,8 @@ module Bulkrax
|
|
17
17
|
# NOTE: In the context of this job, "identifier" is used to generically refer
|
18
18
|
# to either a record's ID or an Bulkrax::Entry's source_identifier.
|
19
19
|
class CreateRelationshipsJob < ApplicationJob
|
20
|
+
include DynamicRecordLookup
|
21
|
+
|
20
22
|
queue_as :import
|
21
23
|
|
22
24
|
attr_accessor :base_entry, :child_record, :parent_record, :importer_run
|
@@ -74,39 +76,6 @@ module Bulkrax
|
|
74
76
|
end
|
75
77
|
end
|
76
78
|
|
77
|
-
# This method allows us to create relationships with preexisting records (by their ID) OR
|
78
|
-
# with records that are concurrently being imported (by their Bulkrax::Entry source_identifier).
|
79
|
-
#
|
80
|
-
# @param identifier [String] Work/Collection ID or Bulkrax::Entry source_identifier
|
81
|
-
# @return [Work, Collection, nil] Work or Collection if found, otherwise nil
|
82
|
-
def find_record(identifier)
|
83
|
-
record = Entry.find_by(identifier: identifier)
|
84
|
-
record ||= ::Collection.where(id: identifier).first
|
85
|
-
if record.blank?
|
86
|
-
available_work_types.each do |work_type|
|
87
|
-
record ||= work_type.where(id: identifier).first
|
88
|
-
end
|
89
|
-
end
|
90
|
-
record = record.factory.find if record.is_a?(Entry)
|
91
|
-
|
92
|
-
record
|
93
|
-
end
|
94
|
-
|
95
|
-
# Check if the record is a Work
|
96
|
-
def curation_concern?(record)
|
97
|
-
available_work_types.include?(record.class)
|
98
|
-
end
|
99
|
-
|
100
|
-
# @return [Array<Class>] list of work type classes
|
101
|
-
def available_work_types
|
102
|
-
# If running in a Hyku app, do not reference disabled work types
|
103
|
-
@available_work_types ||= if defined?(::Hyku)
|
104
|
-
::Site.instance.available_works.map(&:constantize)
|
105
|
-
else
|
106
|
-
::Hyrax.config.curation_concerns
|
107
|
-
end
|
108
|
-
end
|
109
|
-
|
110
79
|
def user
|
111
80
|
@user ||= importer_run.importer.user
|
112
81
|
end
|
@@ -11,9 +11,11 @@ module Bulkrax
|
|
11
11
|
entry.build
|
12
12
|
entry.save
|
13
13
|
add_user_to_permission_template!(entry)
|
14
|
+
ImporterRun.find(args[1]).increment!(:processed_records)
|
14
15
|
ImporterRun.find(args[1]).increment!(:processed_collections)
|
15
16
|
ImporterRun.find(args[1]).decrement!(:enqueued_records)
|
16
17
|
rescue => e
|
18
|
+
ImporterRun.find(args[1]).increment!(:failed_records)
|
17
19
|
ImporterRun.find(args[1]).increment!(:failed_collections)
|
18
20
|
ImporterRun.find(args[1]).decrement!(:enqueued_records)
|
19
21
|
raise e
|
@@ -0,0 +1,69 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
class MissingParentError < ::StandardError; end
|
5
|
+
class ImportFileSetJob < ApplicationJob
|
6
|
+
include DynamicRecordLookup
|
7
|
+
|
8
|
+
queue_as :import
|
9
|
+
|
10
|
+
def perform(entry_id, importer_run_id)
|
11
|
+
entry = Entry.find(entry_id)
|
12
|
+
parent_identifier = entry.raw_metadata[entry.related_parents_raw_mapping]&.strip
|
13
|
+
|
14
|
+
validate_parent!(parent_identifier)
|
15
|
+
|
16
|
+
entry.build
|
17
|
+
if entry.succeeded?
|
18
|
+
# rubocop:disable Rails/SkipsModelValidations
|
19
|
+
ImporterRun.find(importer_run_id).increment!(:processed_records)
|
20
|
+
ImporterRun.find(importer_run_id).increment!(:processed_file_sets)
|
21
|
+
else
|
22
|
+
ImporterRun.find(importer_run_id).increment!(:failed_records)
|
23
|
+
ImporterRun.find(importer_run_id).increment!(:failed_file_sets)
|
24
|
+
# rubocop:enable Rails/SkipsModelValidations
|
25
|
+
end
|
26
|
+
ImporterRun.find(importer_run_id).decrement!(:enqueued_records) # rubocop:disable Rails/SkipsModelValidations
|
27
|
+
entry.save!
|
28
|
+
|
29
|
+
rescue MissingParentError => e
|
30
|
+
# try waiting for the parent record to be created
|
31
|
+
entry.import_attempts += 1
|
32
|
+
entry.save!
|
33
|
+
if entry.import_attempts < 5
|
34
|
+
ImportFileSetJob
|
35
|
+
.set(wait: (entry.import_attempts + 1).minutes)
|
36
|
+
.perform_later(entry_id, importer_run_id)
|
37
|
+
else
|
38
|
+
ImporterRun.find(importer_run_id).decrement!(:enqueued_records) # rubocop:disable Rails/SkipsModelValidations
|
39
|
+
entry.status_info(e)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
|
45
|
+
attr_reader :parent_record
|
46
|
+
|
47
|
+
def validate_parent!(parent_identifier)
|
48
|
+
# if parent_identifier is missing, it will be caught by #validate_presence_of_parent!
|
49
|
+
return if parent_identifier.blank?
|
50
|
+
|
51
|
+
find_parent_record(parent_identifier)
|
52
|
+
check_parent_exists!(parent_identifier)
|
53
|
+
check_parent_is_a_work!(parent_identifier)
|
54
|
+
end
|
55
|
+
|
56
|
+
def check_parent_exists!(parent_identifier)
|
57
|
+
raise MissingParentError, %(Unable to find a record with the identifier "#{parent_identifier}") if parent_record.blank?
|
58
|
+
end
|
59
|
+
|
60
|
+
def check_parent_is_a_work!(parent_identifier)
|
61
|
+
error_msg = %(A record with the ID "#{parent_identifier}" was found, but it was a #{parent_record.class}, which is not an valid/available work type)
|
62
|
+
raise ::StandardError, error_msg unless curation_concern?(parent_record)
|
63
|
+
end
|
64
|
+
|
65
|
+
def find_parent_record(parent_identifier)
|
66
|
+
@parent_record ||= find_record(parent_identifier)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -10,11 +10,13 @@ module Bulkrax
|
|
10
10
|
entry.build
|
11
11
|
if entry.status == "Complete"
|
12
12
|
ImporterRun.find(args[1]).increment!(:processed_records)
|
13
|
+
ImporterRun.find(args[1]).increment!(:processed_works)
|
13
14
|
ImporterRun.find(args[1]).decrement!(:enqueued_records) unless ImporterRun.find(args[1]).enqueued_records <= 0 # rubocop:disable Style/IdenticalConditionalBranches
|
14
15
|
else
|
15
16
|
# do not retry here because whatever parse error kept you from creating a work will likely
|
16
17
|
# keep preventing you from doing so.
|
17
18
|
ImporterRun.find(args[1]).increment!(:failed_records)
|
19
|
+
ImporterRun.find(args[1]).increment!(:failed_works)
|
18
20
|
ImporterRun.find(args[1]).decrement!(:enqueued_records) unless ImporterRun.find(args[1]).enqueued_records <= 0 # rubocop:disable Style/IdenticalConditionalBranches
|
19
21
|
end
|
20
22
|
entry.save!
|
@@ -20,6 +20,7 @@ module Bulkrax
|
|
20
20
|
|
21
21
|
importer.import_collections
|
22
22
|
importer.import_works
|
23
|
+
importer.import_file_sets
|
23
24
|
end
|
24
25
|
|
25
26
|
def unzip_imported_file(parser)
|
@@ -31,6 +32,7 @@ module Bulkrax
|
|
31
32
|
def update_current_run_counters(importer)
|
32
33
|
importer.current_run.total_work_entries = importer.limit || importer.parser.works_total
|
33
34
|
importer.current_run.total_collection_entries = importer.parser.collections_total
|
35
|
+
importer.current_run.total_file_set_entries = importer.parser.file_sets_total
|
34
36
|
importer.current_run.save!
|
35
37
|
end
|
36
38
|
|
@@ -40,9 +40,9 @@ module Bulkrax
|
|
40
40
|
|
41
41
|
self.parsed_metadata = {}
|
42
42
|
add_identifier
|
43
|
-
add_metadata_for_model
|
44
43
|
add_visibility
|
45
44
|
add_ingested_metadata
|
45
|
+
add_metadata_for_model
|
46
46
|
add_rights_statement
|
47
47
|
add_collections
|
48
48
|
add_local
|
@@ -57,6 +57,9 @@ module Bulkrax
|
|
57
57
|
def add_metadata_for_model
|
58
58
|
if factory_class == Collection
|
59
59
|
add_collection_type_gid
|
60
|
+
elsif factory_class == FileSet
|
61
|
+
add_path_to_file
|
62
|
+
validate_presence_of_parent!
|
60
63
|
else
|
61
64
|
add_file unless importerexporter.metadata_only?
|
62
65
|
add_admin_set_id
|
@@ -85,7 +88,11 @@ module Bulkrax
|
|
85
88
|
elsif record['file'].is_a?(Array)
|
86
89
|
self.parsed_metadata['file'] = record['file']
|
87
90
|
end
|
88
|
-
self.parsed_metadata['file'] = self.parsed_metadata['file'].map
|
91
|
+
self.parsed_metadata['file'] = self.parsed_metadata['file'].map do |f|
|
92
|
+
next if f.blank?
|
93
|
+
|
94
|
+
path_to_file(f.tr(' ', '_'))
|
95
|
+
end.compact
|
89
96
|
end
|
90
97
|
|
91
98
|
def build_export_metadata
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
class CsvFileSetEntry < CsvEntry
|
5
|
+
def factory_class
|
6
|
+
::FileSet
|
7
|
+
end
|
8
|
+
|
9
|
+
def add_path_to_file
|
10
|
+
parsed_metadata['file'].each_with_index do |filename, i|
|
11
|
+
path_to_file = ::File.join(parser.path_to_files, filename)
|
12
|
+
|
13
|
+
parsed_metadata['file'][i] = path_to_file
|
14
|
+
end
|
15
|
+
raise ::StandardError, 'one or more file paths are invalid' unless parsed_metadata['file'].map { |file_path| ::File.file?(file_path) }.all?
|
16
|
+
|
17
|
+
parsed_metadata['file']
|
18
|
+
end
|
19
|
+
|
20
|
+
def validate_presence_of_parent!
|
21
|
+
return if parsed_metadata[related_parents_parsed_mapping]&.map(&:present?)&.any?
|
22
|
+
|
23
|
+
raise StandardError, 'File set must be related to at least one work'
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -99,7 +99,12 @@ module Bulkrax
|
|
99
99
|
@current_run ||= if file? && zip?
|
100
100
|
self.importer_runs.create!
|
101
101
|
else
|
102
|
-
|
102
|
+
entry_counts = {
|
103
|
+
total_work_entries: self.limit || parser.works_total,
|
104
|
+
total_collection_entries: parser.collections_total,
|
105
|
+
total_file_set_entries: parser.file_sets_total
|
106
|
+
}
|
107
|
+
self.importer_runs.create!(entry_counts)
|
103
108
|
end
|
104
109
|
end
|
105
110
|
|
@@ -134,6 +139,13 @@ module Bulkrax
|
|
134
139
|
status_info(e)
|
135
140
|
end
|
136
141
|
|
142
|
+
def import_file_sets
|
143
|
+
self.save if self.new_record? # Object needs to be saved for statuses
|
144
|
+
parser.create_file_sets
|
145
|
+
rescue StandardError => e
|
146
|
+
status_info(e)
|
147
|
+
end
|
148
|
+
|
137
149
|
# Prepend the base_url to ensure unique set identifiers
|
138
150
|
# @todo - move to parser, as this is OAI specific
|
139
151
|
def unique_collection_identifier(id)
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
module DynamicRecordLookup
|
5
|
+
# Search entries, collections, and every available work type for a record that
|
6
|
+
# has the provided identifier.
|
7
|
+
#
|
8
|
+
# @param identifier [String] Work/Collection ID or Bulkrax::Entry source_identifier
|
9
|
+
# @return [Work, Collection, nil] Work or Collection if found, otherwise nil
|
10
|
+
def find_record(identifier)
|
11
|
+
record = Entry.find_by(identifier: identifier)
|
12
|
+
record ||= ::Collection.where(id: identifier).first # rubocop:disable Rails/FindBy
|
13
|
+
if record.blank?
|
14
|
+
available_work_types.each do |work_type|
|
15
|
+
record ||= work_type.where(id: identifier).first # rubocop:disable Rails/FindBy
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
record.is_a?(Entry) ? record.factory.find : record
|
20
|
+
end
|
21
|
+
|
22
|
+
# Check if the record is a Work
|
23
|
+
def curation_concern?(record)
|
24
|
+
available_work_types.include?(record.class)
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
# @return [Array<Class>] list of work type classes
|
30
|
+
def available_work_types
|
31
|
+
# If running in a Hyku app, do not include disabled work types
|
32
|
+
@available_work_types ||= if defined?(::Hyku)
|
33
|
+
::Site.instance.available_works.map(&:constantize)
|
34
|
+
else
|
35
|
+
::Hyrax.config.curation_concerns
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -96,6 +96,7 @@ module Bulkrax
|
|
96
96
|
source_identifier_value: identifier,
|
97
97
|
work_identifier: parser.work_identifier,
|
98
98
|
collection_field_mapping: parser.collection_field_mapping,
|
99
|
+
related_parents_parsed_mapping: related_parents_parsed_mapping,
|
99
100
|
replace_files: replace_files,
|
100
101
|
user: user,
|
101
102
|
klass: factory_class,
|
@@ -20,15 +20,17 @@ module Bulkrax
|
|
20
20
|
(last_imported_at || Time.current) + frequency.to_seconds if schedulable? && last_imported_at.present?
|
21
21
|
end
|
22
22
|
|
23
|
-
def increment_counters(index, collection
|
23
|
+
def increment_counters(index, collection: false, file_set: false)
|
24
24
|
# Only set the totals if they were not set on initialization
|
25
25
|
if collection
|
26
26
|
current_run.total_collection_entries = index + 1 unless parser.collections_total.positive?
|
27
|
+
elsif file_set
|
28
|
+
current_run.total_file_set_entries = index + 1 unless parser.file_sets_total.positive?
|
27
29
|
else
|
28
30
|
# TODO: differentiate between work and collection counts for exporters
|
29
31
|
current_run.total_work_entries = index + 1 unless limit.to_i.positive? || parser.total.positive?
|
30
32
|
end
|
31
|
-
current_run.enqueued_records
|
33
|
+
current_run.enqueued_records += 1
|
32
34
|
current_run.save!
|
33
35
|
end
|
34
36
|
|
@@ -114,6 +114,10 @@ module Bulkrax
|
|
114
114
|
raise StandardError, 'must be defined' if importer?
|
115
115
|
end
|
116
116
|
|
117
|
+
def create_file_sets
|
118
|
+
raise StandardError, 'must be defined' if importer?
|
119
|
+
end
|
120
|
+
|
117
121
|
# Optional, define if using browse everything for file upload
|
118
122
|
def retrieve_cloud_files(files); end
|
119
123
|
|
@@ -234,6 +238,10 @@ module Bulkrax
|
|
234
238
|
0
|
235
239
|
end
|
236
240
|
|
241
|
+
def file_sets_total
|
242
|
+
0
|
243
|
+
end
|
244
|
+
|
237
245
|
def write
|
238
246
|
write_files
|
239
247
|
zip
|
@@ -59,7 +59,7 @@ module Bulkrax
|
|
59
59
|
}
|
60
60
|
new_entry = find_or_create_entry(collection_entry_class, collection, 'Bulkrax::Importer', metadata)
|
61
61
|
ImportCollectionJob.perform_now(new_entry.id, current_run.id)
|
62
|
-
increment_counters(index, true)
|
62
|
+
increment_counters(index, collection: true)
|
63
63
|
end
|
64
64
|
end
|
65
65
|
|
@@ -38,13 +38,27 @@ module Bulkrax
|
|
38
38
|
end
|
39
39
|
|
40
40
|
def works
|
41
|
-
records - collections
|
41
|
+
records - collections - file_sets
|
42
42
|
end
|
43
43
|
|
44
44
|
def works_total
|
45
45
|
works.size
|
46
46
|
end
|
47
47
|
|
48
|
+
def file_sets
|
49
|
+
records.map do |r|
|
50
|
+
file_sets = []
|
51
|
+
model_field_mappings.each do |model_mapping|
|
52
|
+
file_sets << r if r[model_mapping.to_sym]&.downcase == 'fileset'
|
53
|
+
end
|
54
|
+
file_sets
|
55
|
+
end.flatten.compact.uniq
|
56
|
+
end
|
57
|
+
|
58
|
+
def file_sets_total
|
59
|
+
file_sets.size
|
60
|
+
end
|
61
|
+
|
48
62
|
# We could use CsvEntry#fields_from_data(data) but that would mean re-reading the data
|
49
63
|
def import_fields
|
50
64
|
@import_fields ||= records.inject(:merge).keys.compact.uniq
|
@@ -98,7 +112,7 @@ module Bulkrax
|
|
98
112
|
new_entry = find_or_create_entry(collection_entry_class, collection_hash[source_identifier], 'Bulkrax::Importer', collection_hash)
|
99
113
|
# TODO: add support for :delete option
|
100
114
|
ImportCollectionJob.perform_now(new_entry.id, current_run.id)
|
101
|
-
increment_counters(index, true)
|
115
|
+
increment_counters(index, collection: true)
|
102
116
|
end
|
103
117
|
importer.record_status
|
104
118
|
rescue StandardError => e
|
@@ -124,6 +138,20 @@ module Bulkrax
|
|
124
138
|
status_info(e)
|
125
139
|
end
|
126
140
|
|
141
|
+
def create_file_sets
|
142
|
+
file_sets.each_with_index do |file_set, index|
|
143
|
+
next unless record_has_source_identifier(file_set, records.find_index(file_set))
|
144
|
+
break if limit_reached?(limit, records.find_index(file_set))
|
145
|
+
|
146
|
+
new_entry = find_or_create_entry(file_set_entry_class, file_set[source_identifier], 'Bulkrax::Importer', file_set.to_h)
|
147
|
+
ImportFileSetJob.perform_later(new_entry.id, current_run.id)
|
148
|
+
increment_counters(index, file_set: true)
|
149
|
+
end
|
150
|
+
importer.record_status
|
151
|
+
rescue StandardError => e
|
152
|
+
status_info(e)
|
153
|
+
end
|
154
|
+
|
127
155
|
def write_partial_import_file(file)
|
128
156
|
import_filename = import_file_path.split('/').last
|
129
157
|
partial_import_filename = "#{File.basename(import_filename, '.csv')}_corrected_entries.csv"
|
@@ -199,6 +227,10 @@ module Bulkrax
|
|
199
227
|
CsvCollectionEntry
|
200
228
|
end
|
201
229
|
|
230
|
+
def file_set_entry_class
|
231
|
+
CsvFileSetEntry
|
232
|
+
end
|
233
|
+
|
202
234
|
# See https://stackoverflow.com/questions/2650517/count-the-number-of-lines-in-a-file-without-reading-entire-file-into-memory
|
203
235
|
# Changed to grep as wc -l counts blank lines, and ignores the final unescaped line (which may or may not contain data)
|
204
236
|
def total
|
@@ -76,7 +76,7 @@ module Bulkrax
|
|
76
76
|
new_entry = collection_entry_class.where(importerexporter: importerexporter, identifier: unique_collection_identifier, raw_metadata: metadata).first_or_create!
|
77
77
|
# perform now to ensure this gets created before work imports start
|
78
78
|
ImportCollectionJob.perform_now(new_entry.id, importerexporter.current_run.id)
|
79
|
-
increment_counters(index, true)
|
79
|
+
increment_counters(index, collection: true)
|
80
80
|
end
|
81
81
|
end
|
82
82
|
|
@@ -24,6 +24,7 @@
|
|
24
24
|
<th scope="col">Entries Deleted Upstream</th>
|
25
25
|
<th scope="col">Total Collection Entries</th>
|
26
26
|
<th scope="col">Total Work Entries</th>
|
27
|
+
<th scope="col">Total File Set Entries</th>
|
27
28
|
<th scope="col"></th>
|
28
29
|
<th scope="col"></th>
|
29
30
|
<th scope="col"></th>
|
@@ -36,12 +37,13 @@
|
|
36
37
|
<td><%= importer.status %></td>
|
37
38
|
<td><%= importer.last_imported_at.strftime("%b %d, %Y") if importer.last_imported_at %></td>
|
38
39
|
<td><%= importer.next_import_at.strftime("%b %d, %Y") if importer.next_import_at %></td>
|
39
|
-
<td><%= importer.
|
40
|
-
<td><%= (importer.
|
41
|
-
<td><%= (importer.
|
42
|
-
<td><%= importer.
|
43
|
-
<td><%= importer.
|
44
|
-
<td><%= importer.
|
40
|
+
<td><%= importer.last_run&.enqueued_records %></td>
|
41
|
+
<td><%= (importer.last_run&.processed_records || 0) %></td>
|
42
|
+
<td><%= (importer.last_run&.failed_records || 0) %></td>
|
43
|
+
<td><%= importer.last_run&.deleted_records %></td>
|
44
|
+
<td><%= importer.last_run&.total_collection_entries %></td>
|
45
|
+
<td><%= importer.last_run&.total_work_entries %></td>
|
46
|
+
<td><%= importer.last_run&.total_file_set_entries %></td>
|
45
47
|
<td><%= link_to raw('<span class="glyphicon glyphicon-info-sign"></span>'), importer_path(importer) %></td>
|
46
48
|
<td><%= link_to raw('<span class="glyphicon glyphicon-pencil"></span>'), edit_importer_path(importer) %></td>
|
47
49
|
<td><%= link_to raw('<span class="glyphicon glyphicon-remove"></span>'), importer, method: :delete, data: { confirm: 'Are you sure?' } %></td>
|
@@ -56,14 +56,19 @@
|
|
56
56
|
|
57
57
|
<%= render partial: 'bulkrax/shared/bulkrax_field_mapping', locals: {item: @importer} %>
|
58
58
|
|
59
|
-
<p class="bulkrax-p-align">
|
59
|
+
<p class="bulkrax-p-align" title="<%= @importer.last_run&.processed_works %> processed, <%= @importer.last_run&.failed_works %> failed">
|
60
60
|
<strong>Total Works:</strong>
|
61
|
-
<%= @importer.
|
61
|
+
<%= @importer.last_run&.total_work_entries %>
|
62
62
|
</p>
|
63
63
|
|
64
|
-
<p class="bulkrax-p-align">
|
64
|
+
<p class="bulkrax-p-align" title="<%= @importer.last_run&.processed_collections %> processed, <%= @importer.last_run&.failed_collections %> failed">
|
65
65
|
<strong>Total Collections:</strong>
|
66
|
-
<%= @importer.
|
66
|
+
<%= @importer.last_run&.total_collection_entries %>
|
67
|
+
</p>
|
68
|
+
|
69
|
+
<p class="bulkrax-p-align" title="<%= @importer.last_run&.processed_file_sets %> processed, <%= @importer.last_run&.failed_file_sets %> failed">
|
70
|
+
<strong>Total File Sets:</strong>
|
71
|
+
<%= @importer.last_run&.total_file_set_entries %>
|
67
72
|
</p>
|
68
73
|
|
69
74
|
<div class="bulkrax-nav-tab-bottom-margin">
|
@@ -71,6 +76,7 @@
|
|
71
76
|
<ul class="bulkrax-nav-tab-top-margin tab-nav nav nav-tabs" role="tablist">
|
72
77
|
<li role="presentation" class='active'><a href="#work-entries" aria-controls="work-entries" role="tab" data-toggle="tab">Work Entries</a></li>
|
73
78
|
<li role="presentation"><a href="#collection-entries" aria-controls="collection-entries" role="tab" data-toggle="tab">Collection Entries</a></li>
|
79
|
+
<li role="presentation"><a href="#file-set-entries" aria-controls="file-set-entries" role="tab" data-toggle="tab">File Set Entries</a></li>
|
74
80
|
</ul>
|
75
81
|
<!-- Tab panes -->
|
76
82
|
<div class="tab-content outline">
|
@@ -158,6 +164,42 @@
|
|
158
164
|
<%= page_entries_info(@collection_entries) %><br />
|
159
165
|
<%= paginate(@collection_entries, theme: 'blacklight', param_name: :collections_entries_page, params: {anchor: 'collection-entries'}) %>
|
160
166
|
</div>
|
167
|
+
<div role="tabpanel" class="tab-pane bulkrax-nav-tab-table-left-align" id="file-set-entries">
|
168
|
+
<table class='table table-striped'>
|
169
|
+
<thead>
|
170
|
+
<tr>
|
171
|
+
<th>Identifier</th>
|
172
|
+
<th>Entry ID</th>
|
173
|
+
<th>Status</th>
|
174
|
+
<th>Errors</th>
|
175
|
+
<th>Status Set At</th>
|
176
|
+
<th>Actions</th>
|
177
|
+
</tr>
|
178
|
+
</thead>
|
179
|
+
<tbody>
|
180
|
+
<% @file_set_entries.each do |e| %>
|
181
|
+
<tr>
|
182
|
+
<td><%= link_to e.identifier, bulkrax.importer_entry_path(@importer.id, e.id) %></td>
|
183
|
+
<td><%= e.id %></td>
|
184
|
+
<% if e.status == "Complete" %>
|
185
|
+
<td><span class="glyphicon glyphicon-ok" style="color: green;"></span> <%= e.status %></td>
|
186
|
+
<% else %>
|
187
|
+
<td><span class="glyphicon glyphicon-remove" style="color: red;"></span> <%= e.status %></td>
|
188
|
+
<% end %>
|
189
|
+
<% if e.last_error.present? %>
|
190
|
+
<td><%= link_to e.last_error.dig("error_class"), bulkrax.importer_entry_path(@importer.id, e.id) %></td>
|
191
|
+
<% else %>
|
192
|
+
<td></td>
|
193
|
+
<% end %>
|
194
|
+
<td><%= e.status_at %></td>
|
195
|
+
<td><%= link_to raw('<span class="glyphicon glyphicon-info-sign"></span>'), bulkrax.importer_entry_path(@importer.id, e.id) %></td>
|
196
|
+
</tr>
|
197
|
+
<% end %>
|
198
|
+
</tbody>
|
199
|
+
</table>
|
200
|
+
<%= page_entries_info(@file_set_entries) %><br />
|
201
|
+
<%= paginate(@file_set_entries, theme: 'blacklight', param_name: :file_set_entries_page, params: {anchor: 'file-set-entries'}) %>
|
202
|
+
</div>
|
161
203
|
</div>
|
162
204
|
</div>
|
163
205
|
|
@@ -0,0 +1,7 @@
|
|
1
|
+
class AddFileSetCountersToImporterRuns < ActiveRecord::Migration[5.2]
|
2
|
+
def change
|
3
|
+
add_column :bulkrax_importer_runs, :processed_file_sets, :integer, default: 0 unless column_exists?(:bulkrax_importer_runs, :processed_file_sets)
|
4
|
+
add_column :bulkrax_importer_runs, :failed_file_sets, :integer, default: 0 unless column_exists?(:bulkrax_importer_runs, :failed_file_sets)
|
5
|
+
add_column :bulkrax_importer_runs, :total_file_set_entries, :integer, default: 0 unless column_exists?(:bulkrax_importer_runs, :total_file_set_entries)
|
6
|
+
end
|
7
|
+
end
|
@@ -0,0 +1,6 @@
|
|
1
|
+
class AddWorkCountersToImporterRuns < ActiveRecord::Migration[5.2]
|
2
|
+
def change
|
3
|
+
add_column :bulkrax_importer_runs, :processed_works, :integer, default: 0 unless column_exists?(:bulkrax_importer_runs, :processed_works)
|
4
|
+
add_column :bulkrax_importer_runs, :failed_works, :integer, default: 0 unless column_exists?(:bulkrax_importer_runs, :failed_works)
|
5
|
+
end
|
6
|
+
end
|
data/lib/bulkrax/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bulkrax
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0
|
4
|
+
version: 2.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Kaufman
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-02-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|
@@ -265,6 +265,7 @@ files:
|
|
265
265
|
- app/jobs/bulkrax/export_work_job.rb
|
266
266
|
- app/jobs/bulkrax/exporter_job.rb
|
267
267
|
- app/jobs/bulkrax/import_collection_job.rb
|
268
|
+
- app/jobs/bulkrax/import_file_set_job.rb
|
268
269
|
- app/jobs/bulkrax/import_work_job.rb
|
269
270
|
- app/jobs/bulkrax/importer_job.rb
|
270
271
|
- app/mailers/bulkrax/application_mailer.rb
|
@@ -275,6 +276,7 @@ files:
|
|
275
276
|
- app/models/bulkrax/application_record.rb
|
276
277
|
- app/models/bulkrax/csv_collection_entry.rb
|
277
278
|
- app/models/bulkrax/csv_entry.rb
|
279
|
+
- app/models/bulkrax/csv_file_set_entry.rb
|
278
280
|
- app/models/bulkrax/entry.rb
|
279
281
|
- app/models/bulkrax/exporter.rb
|
280
282
|
- app/models/bulkrax/exporter_run.rb
|
@@ -290,6 +292,7 @@ files:
|
|
290
292
|
- app/models/bulkrax/status.rb
|
291
293
|
- app/models/bulkrax/xml_entry.rb
|
292
294
|
- app/models/concerns/bulkrax/download_behavior.rb
|
295
|
+
- app/models/concerns/bulkrax/dynamic_record_lookup.rb
|
293
296
|
- app/models/concerns/bulkrax/errored_entries.rb
|
294
297
|
- app/models/concerns/bulkrax/export_behavior.rb
|
295
298
|
- app/models/concerns/bulkrax/file_factory.rb
|
@@ -357,6 +360,9 @@ files:
|
|
357
360
|
- db/migrate/20210806065737_increase_text_sizes.rb
|
358
361
|
- db/migrate/20211004170708_change_bulkrax_statuses_error_message_column_type_to_text.rb
|
359
362
|
- db/migrate/20211203195233_rename_children_counters_to_relationships.rb
|
363
|
+
- db/migrate/20211220195027_add_file_set_counters_to_importer_runs.rb
|
364
|
+
- db/migrate/20220118001339_add_import_attempts_to_entries.rb
|
365
|
+
- db/migrate/20220119213325_add_work_counters_to_importer_runs.rb
|
360
366
|
- lib/bulkrax.rb
|
361
367
|
- lib/bulkrax/engine.rb
|
362
368
|
- lib/bulkrax/version.rb
|
@@ -372,7 +378,7 @@ homepage: https://github.com/samvera-labs/bulkrax
|
|
372
378
|
licenses:
|
373
379
|
- Apache-2.0
|
374
380
|
metadata: {}
|
375
|
-
post_install_message:
|
381
|
+
post_install_message:
|
376
382
|
rdoc_options: []
|
377
383
|
require_paths:
|
378
384
|
- lib
|
@@ -387,8 +393,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
387
393
|
- !ruby/object:Gem::Version
|
388
394
|
version: '0'
|
389
395
|
requirements: []
|
390
|
-
rubygems_version: 3.1.
|
391
|
-
signing_key:
|
396
|
+
rubygems_version: 3.1.4
|
397
|
+
signing_key:
|
392
398
|
specification_version: 4
|
393
399
|
summary: Import and export tool for Hyrax and Hyku
|
394
400
|
test_files: []
|