bulkrax 1.0.1 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/app/controllers/bulkrax/exporters_controller.rb +12 -4
- data/app/controllers/bulkrax/importers_controller.rb +22 -17
- data/app/factories/bulkrax/object_factory.rb +44 -61
- data/app/jobs/bulkrax/create_relationships_job.rb +187 -0
- data/app/jobs/bulkrax/delete_work_job.rb +6 -2
- data/app/jobs/bulkrax/export_work_job.rb +3 -1
- data/app/jobs/bulkrax/exporter_job.rb +1 -0
- data/app/jobs/bulkrax/{import_work_collection_job.rb → import_collection_job.rb} +2 -2
- data/app/jobs/bulkrax/importer_job.rb +16 -1
- data/app/matchers/bulkrax/application_matcher.rb +9 -6
- data/app/models/bulkrax/csv_collection_entry.rb +8 -6
- data/app/models/bulkrax/csv_entry.rb +139 -45
- data/app/models/bulkrax/entry.rb +19 -8
- data/app/models/bulkrax/exporter.rb +12 -5
- data/app/models/bulkrax/importer.rb +22 -5
- data/app/models/bulkrax/oai_entry.rb +5 -1
- data/app/models/bulkrax/rdf_entry.rb +16 -7
- data/app/models/bulkrax/xml_entry.rb +4 -0
- data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
- data/app/models/concerns/bulkrax/file_factory.rb +2 -1
- data/app/models/concerns/bulkrax/has_matchers.rb +59 -16
- data/app/models/concerns/bulkrax/import_behavior.rb +39 -5
- data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +19 -0
- data/app/models/concerns/bulkrax/status_info.rb +4 -4
- data/app/parsers/bulkrax/application_parser.rb +59 -84
- data/app/parsers/bulkrax/bagit_parser.rb +12 -3
- data/app/parsers/bulkrax/csv_parser.rb +137 -63
- data/app/parsers/bulkrax/oai_dc_parser.rb +5 -2
- data/app/parsers/bulkrax/xml_parser.rb +5 -0
- data/app/views/bulkrax/exporters/_form.html.erb +1 -1
- data/app/views/bulkrax/exporters/show.html.erb +13 -1
- data/app/views/bulkrax/importers/_edit_form_buttons.html.erb +45 -14
- data/app/views/bulkrax/importers/edit.html.erb +2 -0
- data/app/views/bulkrax/importers/index.html.erb +15 -17
- data/app/views/bulkrax/importers/show.html.erb +6 -2
- data/config/locales/bulkrax.en.yml +1 -0
- data/db/migrate/20190731114016_change_importer_and_exporter_to_polymorphic.rb +5 -1
- data/db/migrate/20211004170708_change_bulkrax_statuses_error_message_column_type_to_text.rb +5 -0
- data/db/migrate/20211203195233_rename_children_counters_to_relationships.rb +6 -0
- data/lib/bulkrax/engine.rb +1 -1
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +9 -17
- data/lib/generators/bulkrax/templates/bin/importer +17 -11
- data/lib/generators/bulkrax/templates/config/bulkrax_api.yml +3 -1
- data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +7 -12
- metadata +12 -6
- data/app/jobs/bulkrax/child_relationships_job.rb +0 -128
@@ -11,6 +11,8 @@ module Bulkrax
|
|
11
11
|
unless self.importerexporter.validate_only
|
12
12
|
raise CollectionsCreatedError unless collections_created?
|
13
13
|
@item = factory.run!
|
14
|
+
parent_jobs if self.parsed_metadata[related_parents_parsed_mapping].present?
|
15
|
+
child_jobs if self.parsed_metadata[related_children_parsed_mapping].present?
|
14
16
|
end
|
15
17
|
rescue RSolr::Error::Http, CollectionsCreatedError => e
|
16
18
|
raise e
|
@@ -22,7 +24,23 @@ module Bulkrax
|
|
22
24
|
return @item
|
23
25
|
end
|
24
26
|
|
25
|
-
def
|
27
|
+
def parent_jobs
|
28
|
+
self.parsed_metadata[related_parents_parsed_mapping].each do |parent_identifier|
|
29
|
+
next if parent_identifier.blank?
|
30
|
+
|
31
|
+
CreateRelationshipsJob.perform_later(entry_identifier: self.identifier, parent_identifier: parent_identifier, importer_run: self.last_run)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def child_jobs
|
36
|
+
self.parsed_metadata[related_children_parsed_mapping].each do |child_identifier|
|
37
|
+
next if child_identifier.blank?
|
38
|
+
|
39
|
+
CreateRelationshipsJob.perform_later(entry_identifier: self.identifier, child_identifier: child_identifier, importer_run: self.last_run)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def find_collection_ids
|
26
44
|
self.collection_ids
|
27
45
|
end
|
28
46
|
|
@@ -57,15 +75,27 @@ module Bulkrax
|
|
57
75
|
end
|
58
76
|
|
59
77
|
def add_collections
|
60
|
-
return if
|
61
|
-
|
62
|
-
|
78
|
+
return if find_collection_ids.blank?
|
79
|
+
|
80
|
+
ActiveSupport::Deprecation.warn(
|
81
|
+
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
82
|
+
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
83
|
+
)
|
84
|
+
self.parsed_metadata['member_of_collections_attributes'] = {}
|
85
|
+
find_collection_ids.each_with_index do |c, i|
|
86
|
+
self.parsed_metadata['member_of_collections_attributes'][i.to_s] = { id: c }
|
87
|
+
end
|
63
88
|
end
|
64
89
|
|
65
90
|
def factory
|
91
|
+
ActiveSupport::Deprecation.warn(
|
92
|
+
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
93
|
+
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
94
|
+
)
|
66
95
|
@factory ||= Bulkrax::ObjectFactory.new(attributes: self.parsed_metadata,
|
67
96
|
source_identifier_value: identifier,
|
68
97
|
work_identifier: parser.work_identifier,
|
98
|
+
collection_field_mapping: parser.collection_field_mapping,
|
69
99
|
replace_files: replace_files,
|
70
100
|
user: user,
|
71
101
|
klass: factory_class,
|
@@ -80,7 +110,11 @@ module Bulkrax
|
|
80
110
|
else
|
81
111
|
Bulkrax.default_work_type
|
82
112
|
end
|
83
|
-
|
113
|
+
|
114
|
+
# return the name of the collection or work
|
115
|
+
fc.tr!(' ', '_')
|
116
|
+
fc.downcase! if fc.match?(/[-_]/)
|
117
|
+
fc.camelcase.constantize
|
84
118
|
rescue NameError
|
85
119
|
nil
|
86
120
|
rescue
|
@@ -25,10 +25,29 @@ module Bulkrax
|
|
25
25
|
if collection
|
26
26
|
current_run.total_collection_entries = index + 1 unless parser.collections_total.positive?
|
27
27
|
else
|
28
|
+
# TODO: differentiate between work and collection counts for exporters
|
28
29
|
current_run.total_work_entries = index + 1 unless limit.to_i.positive? || parser.total.positive?
|
29
30
|
end
|
30
31
|
current_run.enqueued_records = index + 1
|
31
32
|
current_run.save!
|
32
33
|
end
|
34
|
+
|
35
|
+
def keys_without_numbers(keys)
|
36
|
+
keys.map { |key| key_without_numbers(key) }
|
37
|
+
end
|
38
|
+
|
39
|
+
def key_without_numbers(key)
|
40
|
+
key.gsub(/_\d+/, '').sub(/^\d+_/, '')
|
41
|
+
end
|
42
|
+
|
43
|
+
# Is this a file?
|
44
|
+
def file?
|
45
|
+
parser_fields&.[]('import_file_path') && File.file?(parser_fields['import_file_path'])
|
46
|
+
end
|
47
|
+
|
48
|
+
# Is this a zip file?
|
49
|
+
def zip?
|
50
|
+
parser_fields&.[]('import_file_path') && MIME::Types.type_for(parser_fields['import_file_path']).include?('application/zip')
|
51
|
+
end
|
33
52
|
end
|
34
53
|
end
|
@@ -33,13 +33,13 @@ module Bulkrax
|
|
33
33
|
current_status&.created_at
|
34
34
|
end
|
35
35
|
|
36
|
-
def status_info(e = nil)
|
36
|
+
def status_info(e = nil, current_run = nil)
|
37
37
|
if e.nil?
|
38
|
-
self.statuses.create!(status_message: 'Complete', runnable: last_run)
|
38
|
+
self.statuses.create!(status_message: 'Complete', runnable: current_run || last_run)
|
39
39
|
elsif e.is_a?(String)
|
40
|
-
self.statuses.create!(status_message: e, runnable: last_run)
|
40
|
+
self.statuses.create!(status_message: e, runnable: current_run || last_run)
|
41
41
|
else
|
42
|
-
self.statuses.create!(status_message: 'Failed', runnable: last_run, error_class: e.class.to_s, error_message: e.message, error_backtrace: e.backtrace)
|
42
|
+
self.statuses.create!(status_message: 'Failed', runnable: current_run || last_run, error_class: e.class.to_s, error_message: e.message, error_backtrace: e.backtrace)
|
43
43
|
end
|
44
44
|
end
|
45
45
|
|
@@ -1,15 +1,15 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Bulkrax
|
4
|
-
class ApplicationParser
|
5
|
-
attr_accessor :importerexporter
|
4
|
+
class ApplicationParser # rubocop:disable Metrics/ClassLength
|
5
|
+
attr_accessor :importerexporter, :headers
|
6
6
|
alias importer importerexporter
|
7
7
|
alias exporter importerexporter
|
8
|
-
delegate :only_updates, :limit, :current_run, :errors,
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
8
|
+
delegate :only_updates, :limit, :current_run, :errors, :mapping,
|
9
|
+
:seen, :increment_counters, :parser_fields, :user, :keys_without_numbers,
|
10
|
+
:key_without_numbers, :status, :status_info, :status_at,
|
11
|
+
:exporter_export_path, :exporter_export_zip_path, :importer_unzip_path, :validate_only,
|
12
|
+
to: :importerexporter
|
13
13
|
|
14
14
|
def self.parser_fields
|
15
15
|
{}
|
@@ -25,6 +25,7 @@ module Bulkrax
|
|
25
25
|
|
26
26
|
def initialize(importerexporter)
|
27
27
|
@importerexporter = importerexporter
|
28
|
+
@headers = []
|
28
29
|
end
|
29
30
|
|
30
31
|
# @api
|
@@ -43,20 +44,54 @@ module Bulkrax
|
|
43
44
|
end
|
44
45
|
|
45
46
|
def source_identifier
|
46
|
-
@source_identifier ||=
|
47
|
+
@source_identifier ||= get_field_mapping_hash_for('source_identifier')&.values&.first&.[]('from')&.first&.to_sym || :source_identifier
|
47
48
|
end
|
48
49
|
|
49
50
|
def work_identifier
|
50
|
-
@work_identifier ||=
|
51
|
+
@work_identifier ||= get_field_mapping_hash_for('source_identifier')&.keys&.first&.to_sym || :source
|
51
52
|
end
|
52
53
|
|
53
|
-
def
|
54
|
-
@
|
55
|
-
|
56
|
-
|
57
|
-
|
54
|
+
def related_parents_raw_mapping
|
55
|
+
@related_parents_raw_mapping ||= get_field_mapping_hash_for('related_parents_field_mapping')&.values&.first&.[]('from')&.first
|
56
|
+
end
|
57
|
+
|
58
|
+
def related_parents_parsed_mapping
|
59
|
+
@related_parents_parsed_mapping ||= get_field_mapping_hash_for('related_parents_field_mapping')&.keys&.first
|
60
|
+
end
|
61
|
+
|
62
|
+
def related_children_raw_mapping
|
63
|
+
@related_children_raw_mapping ||= get_field_mapping_hash_for('related_children_field_mapping')&.values&.first&.[]('from')&.first
|
64
|
+
end
|
65
|
+
|
66
|
+
def related_children_parsed_mapping
|
67
|
+
@related_children_parsed_mapping ||= get_field_mapping_hash_for('related_children_field_mapping')&.keys&.first
|
68
|
+
end
|
69
|
+
|
70
|
+
def get_field_mapping_hash_for(key)
|
71
|
+
return instance_variable_get("@#{key}_hash") if instance_variable_get("@#{key}_hash").present?
|
72
|
+
|
73
|
+
instance_variable_set(
|
74
|
+
"@#{key}_hash",
|
75
|
+
importerexporter.mapping.with_indifferent_access.select { |_, h| h.key?(key) }
|
76
|
+
)
|
77
|
+
raise StandardError, "more than one #{key} declared: #{instance_variable_get("@#{key}_hash").keys.join(', ')}" if instance_variable_get("@#{key}_hash").length > 1
|
58
78
|
|
59
|
-
|
79
|
+
instance_variable_get("@#{key}_hash")
|
80
|
+
end
|
81
|
+
|
82
|
+
def collection_field_mapping
|
83
|
+
ActiveSupport::Deprecation.warn(
|
84
|
+
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
85
|
+
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
86
|
+
)
|
87
|
+
Bulkrax.collection_field_mapping[self.entry_class.to_s]&.to_sym || :collection
|
88
|
+
end
|
89
|
+
|
90
|
+
def model_field_mappings
|
91
|
+
model_mappings = Bulkrax.field_mappings[self.class.to_s]&.dig('model', :from) || []
|
92
|
+
model_mappings |= ['model']
|
93
|
+
|
94
|
+
model_mappings
|
60
95
|
end
|
61
96
|
|
62
97
|
def perform_method
|
@@ -91,76 +126,19 @@ module Bulkrax
|
|
91
126
|
path
|
92
127
|
end
|
93
128
|
|
129
|
+
# Base path for imported and exported files
|
130
|
+
def base_path(type = 'import')
|
131
|
+
ENV['HYKU_MULTITENANT'] ? File.join(Bulkrax.send("#{type}_path"), Site.instance.account.name) : Bulkrax.send("#{type}_path")
|
132
|
+
end
|
133
|
+
|
94
134
|
# Path where we'll store the import metadata and files
|
95
135
|
# this is used for uploaded and cloud files
|
96
136
|
def path_for_import
|
97
|
-
@path_for_import = File.join(
|
137
|
+
@path_for_import = File.join(base_path, importerexporter.path_string)
|
98
138
|
FileUtils.mkdir_p(@path_for_import) unless File.exist?(@path_for_import)
|
99
139
|
@path_for_import
|
100
140
|
end
|
101
141
|
|
102
|
-
# Optional, only used by certain parsers
|
103
|
-
# Other parsers should override with a custom or empty method
|
104
|
-
# Will be skipped unless the #record is a Hash
|
105
|
-
def create_parent_child_relationships
|
106
|
-
parents.each do |key, value|
|
107
|
-
parent = entry_class.where(
|
108
|
-
identifier: key,
|
109
|
-
importerexporter_id: importerexporter.id,
|
110
|
-
importerexporter_type: 'Bulkrax::Importer'
|
111
|
-
).first
|
112
|
-
|
113
|
-
# not finding the entries here indicates that the given identifiers are incorrect
|
114
|
-
# in that case we should log that
|
115
|
-
children = value.map do |child|
|
116
|
-
entry_class.where(
|
117
|
-
identifier: child,
|
118
|
-
importerexporter_id: importerexporter.id,
|
119
|
-
importerexporter_type: 'Bulkrax::Importer'
|
120
|
-
).first
|
121
|
-
end.compact.uniq
|
122
|
-
|
123
|
-
if parent.present? && (children.length != value.length)
|
124
|
-
# Increment the failures for the number we couldn't find
|
125
|
-
# Because all of our entries have been created by now, if we can't find them, the data is wrong
|
126
|
-
Rails.logger.error("Expected #{value.length} children for parent entry #{parent.id}, found #{children.length}")
|
127
|
-
break if children.empty?
|
128
|
-
Rails.logger.warn("Adding #{children.length} children to parent entry #{parent.id} (expected #{value.length})")
|
129
|
-
end
|
130
|
-
parent_id = parent.id
|
131
|
-
child_entry_ids = children.map(&:id)
|
132
|
-
ChildRelationshipsJob.perform_later(parent_id, child_entry_ids, current_run.id)
|
133
|
-
end
|
134
|
-
rescue StandardError => e
|
135
|
-
status_info(e)
|
136
|
-
end
|
137
|
-
|
138
|
-
def parents
|
139
|
-
@parents ||= setup_parents
|
140
|
-
end
|
141
|
-
|
142
|
-
def setup_parents
|
143
|
-
pts = []
|
144
|
-
records.each do |record|
|
145
|
-
r = if record.respond_to?(:to_h)
|
146
|
-
record.to_h
|
147
|
-
else
|
148
|
-
record
|
149
|
-
end
|
150
|
-
next unless r.is_a?(Hash)
|
151
|
-
children = if r[:children].is_a?(String)
|
152
|
-
r[:children].split(/\s*[:;|]\s*/)
|
153
|
-
else
|
154
|
-
r[:children]
|
155
|
-
end
|
156
|
-
next if children.blank?
|
157
|
-
pts << {
|
158
|
-
r[source_identifier] => children
|
159
|
-
}
|
160
|
-
end
|
161
|
-
pts.blank? ? pts : pts.inject(:merge)
|
162
|
-
end
|
163
|
-
|
164
142
|
def setup_export_file
|
165
143
|
raise StandardError, 'must be defined' if exporter?
|
166
144
|
end
|
@@ -288,12 +266,9 @@ module Bulkrax
|
|
288
266
|
private
|
289
267
|
|
290
268
|
def real_import_file_path
|
291
|
-
if file? && zip?
|
292
|
-
|
293
|
-
|
294
|
-
else
|
295
|
-
parser_fields['import_file_path']
|
296
|
-
end
|
269
|
+
return importer_unzip_path if file? && zip?
|
270
|
+
|
271
|
+
parser_fields['import_file_path']
|
297
272
|
end
|
298
273
|
end
|
299
274
|
end
|
@@ -40,7 +40,7 @@ module Bulkrax
|
|
40
40
|
raise StandardError, 'No metadata files were found' if path.blank?
|
41
41
|
data = entry_class.read_data(path)
|
42
42
|
data = entry_class.data_for_entry(data, source_identifier)
|
43
|
-
data[:file] = bag.bag_files.join('|')
|
43
|
+
data[:file] = bag.bag_files.join('|') unless importerexporter.metadata_only?
|
44
44
|
data
|
45
45
|
end
|
46
46
|
end
|
@@ -58,7 +58,7 @@ module Bulkrax
|
|
58
58
|
collection_type_gid: Hyrax::CollectionType.find_or_create_default_collection_type.gid
|
59
59
|
}
|
60
60
|
new_entry = find_or_create_entry(collection_entry_class, collection, 'Bulkrax::Importer', metadata)
|
61
|
-
|
61
|
+
ImportCollectionJob.perform_now(new_entry.id, current_run.id)
|
62
62
|
increment_counters(index, true)
|
63
63
|
end
|
64
64
|
end
|
@@ -83,13 +83,22 @@ module Bulkrax
|
|
83
83
|
end
|
84
84
|
|
85
85
|
def collections
|
86
|
-
|
86
|
+
ActiveSupport::Deprecation.warn(
|
87
|
+
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
88
|
+
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
89
|
+
)
|
90
|
+
records.map { |r| r[collection_field_mapping].split(/\s*[;|]\s*/) if r[collection_field_mapping].present? }.flatten.compact.uniq
|
87
91
|
end
|
88
92
|
|
89
93
|
def collections_total
|
90
94
|
collections.size
|
91
95
|
end
|
92
96
|
|
97
|
+
# TODO: change to differentiate between collection and work records when adding ability to import collection metadata
|
98
|
+
def works_total
|
99
|
+
total
|
100
|
+
end
|
101
|
+
|
93
102
|
def total
|
94
103
|
metadata_paths.count
|
95
104
|
end
|