bulkrax 1.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/app/controllers/bulkrax/exporters_controller.rb +12 -4
- data/app/controllers/bulkrax/importers_controller.rb +22 -17
- data/app/factories/bulkrax/object_factory.rb +44 -61
- data/app/jobs/bulkrax/create_relationships_job.rb +187 -0
- data/app/jobs/bulkrax/delete_work_job.rb +6 -2
- data/app/jobs/bulkrax/export_work_job.rb +3 -1
- data/app/jobs/bulkrax/exporter_job.rb +1 -0
- data/app/jobs/bulkrax/{import_work_collection_job.rb → import_collection_job.rb} +2 -2
- data/app/jobs/bulkrax/importer_job.rb +16 -1
- data/app/matchers/bulkrax/application_matcher.rb +9 -6
- data/app/models/bulkrax/csv_collection_entry.rb +8 -6
- data/app/models/bulkrax/csv_entry.rb +139 -45
- data/app/models/bulkrax/entry.rb +19 -8
- data/app/models/bulkrax/exporter.rb +12 -5
- data/app/models/bulkrax/importer.rb +22 -5
- data/app/models/bulkrax/oai_entry.rb +5 -1
- data/app/models/bulkrax/rdf_entry.rb +16 -7
- data/app/models/bulkrax/xml_entry.rb +4 -0
- data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
- data/app/models/concerns/bulkrax/file_factory.rb +2 -1
- data/app/models/concerns/bulkrax/has_matchers.rb +59 -16
- data/app/models/concerns/bulkrax/import_behavior.rb +35 -5
- data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +19 -0
- data/app/models/concerns/bulkrax/status_info.rb +4 -4
- data/app/parsers/bulkrax/application_parser.rb +59 -84
- data/app/parsers/bulkrax/bagit_parser.rb +12 -3
- data/app/parsers/bulkrax/csv_parser.rb +117 -62
- data/app/parsers/bulkrax/oai_dc_parser.rb +5 -2
- data/app/parsers/bulkrax/xml_parser.rb +5 -0
- data/app/views/bulkrax/exporters/_form.html.erb +1 -1
- data/app/views/bulkrax/exporters/show.html.erb +13 -1
- data/app/views/bulkrax/importers/_edit_form_buttons.html.erb +45 -14
- data/app/views/bulkrax/importers/edit.html.erb +2 -0
- data/app/views/bulkrax/importers/index.html.erb +15 -17
- data/app/views/bulkrax/importers/show.html.erb +6 -2
- data/config/locales/bulkrax.en.yml +1 -0
- data/db/migrate/20190731114016_change_importer_and_exporter_to_polymorphic.rb +5 -1
- data/db/migrate/20211004170708_change_bulkrax_statuses_error_message_column_type_to_text.rb +5 -0
- data/db/migrate/20211203195233_rename_children_counters_to_relationships.rb +6 -0
- data/lib/bulkrax/engine.rb +1 -1
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +9 -17
- data/lib/generators/bulkrax/templates/bin/importer +17 -11
- data/lib/generators/bulkrax/templates/config/bulkrax_api.yml +3 -1
- data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +7 -12
- metadata +13 -7
- data/app/jobs/bulkrax/child_relationships_job.rb +0 -128
@@ -12,6 +12,8 @@ module Bulkrax
|
|
12
12
|
raise CollectionsCreatedError unless collections_created?
|
13
13
|
@item = factory.run!
|
14
14
|
end
|
15
|
+
parent_jobs if self.parsed_metadata[related_parents_parsed_mapping].present?
|
16
|
+
child_jobs if self.parsed_metadata[related_children_parsed_mapping].present?
|
15
17
|
rescue RSolr::Error::Http, CollectionsCreatedError => e
|
16
18
|
raise e
|
17
19
|
rescue StandardError => e
|
@@ -22,7 +24,19 @@ module Bulkrax
|
|
22
24
|
return @item
|
23
25
|
end
|
24
26
|
|
25
|
-
def
|
27
|
+
def parent_jobs
|
28
|
+
self.parsed_metadata[related_parents_parsed_mapping].each do |parent_identifier|
|
29
|
+
CreateRelationshipsJob.perform_later(entry_identifier: self.identifier, parent_identifier: parent_identifier, importer_run: self.last_run)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def child_jobs
|
34
|
+
self.parsed_metadata[related_children_parsed_mapping].each do |child_identifier|
|
35
|
+
CreateRelationshipsJob.perform_later(entry_identifier: self.identifier, child_identifier: child_identifier, importer_run: self.last_run)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def find_collection_ids
|
26
40
|
self.collection_ids
|
27
41
|
end
|
28
42
|
|
@@ -57,15 +71,27 @@ module Bulkrax
|
|
57
71
|
end
|
58
72
|
|
59
73
|
def add_collections
|
60
|
-
return if
|
61
|
-
|
62
|
-
|
74
|
+
return if find_collection_ids.blank?
|
75
|
+
|
76
|
+
ActiveSupport::Deprecation.warn(
|
77
|
+
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
78
|
+
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
79
|
+
)
|
80
|
+
self.parsed_metadata['member_of_collections_attributes'] = {}
|
81
|
+
find_collection_ids.each_with_index do |c, i|
|
82
|
+
self.parsed_metadata['member_of_collections_attributes'][i.to_s] = { id: c }
|
83
|
+
end
|
63
84
|
end
|
64
85
|
|
65
86
|
def factory
|
87
|
+
ActiveSupport::Deprecation.warn(
|
88
|
+
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
89
|
+
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
90
|
+
)
|
66
91
|
@factory ||= Bulkrax::ObjectFactory.new(attributes: self.parsed_metadata,
|
67
92
|
source_identifier_value: identifier,
|
68
93
|
work_identifier: parser.work_identifier,
|
94
|
+
collection_field_mapping: parser.collection_field_mapping,
|
69
95
|
replace_files: replace_files,
|
70
96
|
user: user,
|
71
97
|
klass: factory_class,
|
@@ -80,7 +106,11 @@ module Bulkrax
|
|
80
106
|
else
|
81
107
|
Bulkrax.default_work_type
|
82
108
|
end
|
83
|
-
|
109
|
+
|
110
|
+
# return the name of the collection or work
|
111
|
+
fc.tr!(' ', '_')
|
112
|
+
fc.downcase! if fc.match?(/[-_]/)
|
113
|
+
fc.camelcase.constantize
|
84
114
|
rescue NameError
|
85
115
|
nil
|
86
116
|
rescue
|
@@ -25,10 +25,29 @@ module Bulkrax
|
|
25
25
|
if collection
|
26
26
|
current_run.total_collection_entries = index + 1 unless parser.collections_total.positive?
|
27
27
|
else
|
28
|
+
# TODO: differentiate between work and collection counts for exporters
|
28
29
|
current_run.total_work_entries = index + 1 unless limit.to_i.positive? || parser.total.positive?
|
29
30
|
end
|
30
31
|
current_run.enqueued_records = index + 1
|
31
32
|
current_run.save!
|
32
33
|
end
|
34
|
+
|
35
|
+
def keys_without_numbers(keys)
|
36
|
+
keys.map { |key| key_without_numbers(key) }
|
37
|
+
end
|
38
|
+
|
39
|
+
def key_without_numbers(key)
|
40
|
+
key.gsub(/_\d+/, '').sub(/^\d+_/, '')
|
41
|
+
end
|
42
|
+
|
43
|
+
# Is this a file?
|
44
|
+
def file?
|
45
|
+
parser_fields&.[]('import_file_path') && File.file?(parser_fields['import_file_path'])
|
46
|
+
end
|
47
|
+
|
48
|
+
# Is this a zip file?
|
49
|
+
def zip?
|
50
|
+
parser_fields&.[]('import_file_path') && MIME::Types.type_for(parser_fields['import_file_path']).include?('application/zip')
|
51
|
+
end
|
33
52
|
end
|
34
53
|
end
|
@@ -33,13 +33,13 @@ module Bulkrax
|
|
33
33
|
current_status&.created_at
|
34
34
|
end
|
35
35
|
|
36
|
-
def status_info(e = nil)
|
36
|
+
def status_info(e = nil, current_run = nil)
|
37
37
|
if e.nil?
|
38
|
-
self.statuses.create!(status_message: 'Complete', runnable: last_run)
|
38
|
+
self.statuses.create!(status_message: 'Complete', runnable: current_run || last_run)
|
39
39
|
elsif e.is_a?(String)
|
40
|
-
self.statuses.create!(status_message: e, runnable: last_run)
|
40
|
+
self.statuses.create!(status_message: e, runnable: current_run || last_run)
|
41
41
|
else
|
42
|
-
self.statuses.create!(status_message: 'Failed', runnable: last_run, error_class: e.class.to_s, error_message: e.message, error_backtrace: e.backtrace)
|
42
|
+
self.statuses.create!(status_message: 'Failed', runnable: current_run || last_run, error_class: e.class.to_s, error_message: e.message, error_backtrace: e.backtrace)
|
43
43
|
end
|
44
44
|
end
|
45
45
|
|
@@ -1,15 +1,15 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Bulkrax
|
4
|
-
class ApplicationParser
|
5
|
-
attr_accessor :importerexporter
|
4
|
+
class ApplicationParser # rubocop:disable Metrics/ClassLength
|
5
|
+
attr_accessor :importerexporter, :headers
|
6
6
|
alias importer importerexporter
|
7
7
|
alias exporter importerexporter
|
8
|
-
delegate :only_updates, :limit, :current_run, :errors,
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
8
|
+
delegate :only_updates, :limit, :current_run, :errors, :mapping,
|
9
|
+
:seen, :increment_counters, :parser_fields, :user, :keys_without_numbers,
|
10
|
+
:key_without_numbers, :status, :status_info, :status_at,
|
11
|
+
:exporter_export_path, :exporter_export_zip_path, :importer_unzip_path, :validate_only,
|
12
|
+
to: :importerexporter
|
13
13
|
|
14
14
|
def self.parser_fields
|
15
15
|
{}
|
@@ -25,6 +25,7 @@ module Bulkrax
|
|
25
25
|
|
26
26
|
def initialize(importerexporter)
|
27
27
|
@importerexporter = importerexporter
|
28
|
+
@headers = []
|
28
29
|
end
|
29
30
|
|
30
31
|
# @api
|
@@ -43,20 +44,54 @@ module Bulkrax
|
|
43
44
|
end
|
44
45
|
|
45
46
|
def source_identifier
|
46
|
-
@source_identifier ||=
|
47
|
+
@source_identifier ||= get_field_mapping_hash_for('source_identifier')&.values&.first&.[]('from')&.first&.to_sym || :source_identifier
|
47
48
|
end
|
48
49
|
|
49
50
|
def work_identifier
|
50
|
-
@work_identifier ||=
|
51
|
+
@work_identifier ||= get_field_mapping_hash_for('source_identifier')&.keys&.first&.to_sym || :source
|
51
52
|
end
|
52
53
|
|
53
|
-
def
|
54
|
-
@
|
55
|
-
|
56
|
-
|
57
|
-
|
54
|
+
def related_parents_raw_mapping
|
55
|
+
@related_parents_raw_mapping ||= get_field_mapping_hash_for('related_parents_field_mapping')&.values&.first&.[]('from')&.first
|
56
|
+
end
|
57
|
+
|
58
|
+
def related_parents_parsed_mapping
|
59
|
+
@related_parents_parsed_mapping ||= get_field_mapping_hash_for('related_parents_field_mapping')&.keys&.first
|
60
|
+
end
|
61
|
+
|
62
|
+
def related_children_raw_mapping
|
63
|
+
@related_children_raw_mapping ||= get_field_mapping_hash_for('related_children_field_mapping')&.values&.first&.[]('from')&.first
|
64
|
+
end
|
65
|
+
|
66
|
+
def related_children_parsed_mapping
|
67
|
+
@related_children_parsed_mapping ||= get_field_mapping_hash_for('related_children_field_mapping')&.keys&.first
|
68
|
+
end
|
69
|
+
|
70
|
+
def get_field_mapping_hash_for(key)
|
71
|
+
return instance_variable_get("@#{key}_hash") if instance_variable_get("@#{key}_hash").present?
|
72
|
+
|
73
|
+
instance_variable_set(
|
74
|
+
"@#{key}_hash",
|
75
|
+
importerexporter.mapping.with_indifferent_access.select { |_, h| h.key?(key) }
|
76
|
+
)
|
77
|
+
raise StandardError, "more than one #{key} declared: #{instance_variable_get("@#{key}_hash").keys.join(', ')}" if instance_variable_get("@#{key}_hash").length > 1
|
58
78
|
|
59
|
-
|
79
|
+
instance_variable_get("@#{key}_hash")
|
80
|
+
end
|
81
|
+
|
82
|
+
def collection_field_mapping
|
83
|
+
ActiveSupport::Deprecation.warn(
|
84
|
+
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
85
|
+
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
86
|
+
)
|
87
|
+
Bulkrax.collection_field_mapping[self.entry_class.to_s]&.to_sym || :collection
|
88
|
+
end
|
89
|
+
|
90
|
+
def model_field_mappings
|
91
|
+
model_mappings = Bulkrax.field_mappings[self.class.to_s]&.dig('model', :from) || []
|
92
|
+
model_mappings |= ['model']
|
93
|
+
|
94
|
+
model_mappings
|
60
95
|
end
|
61
96
|
|
62
97
|
def perform_method
|
@@ -91,76 +126,19 @@ module Bulkrax
|
|
91
126
|
path
|
92
127
|
end
|
93
128
|
|
129
|
+
# Base path for imported and exported files
|
130
|
+
def base_path(type = 'import')
|
131
|
+
ENV['HYKU_MULTITENANT'] ? File.join(Bulkrax.send("#{type}_path"), Site.instance.account.name) : Bulkrax.send("#{type}_path")
|
132
|
+
end
|
133
|
+
|
94
134
|
# Path where we'll store the import metadata and files
|
95
135
|
# this is used for uploaded and cloud files
|
96
136
|
def path_for_import
|
97
|
-
@path_for_import = File.join(
|
137
|
+
@path_for_import = File.join(base_path, importerexporter.path_string)
|
98
138
|
FileUtils.mkdir_p(@path_for_import) unless File.exist?(@path_for_import)
|
99
139
|
@path_for_import
|
100
140
|
end
|
101
141
|
|
102
|
-
# Optional, only used by certain parsers
|
103
|
-
# Other parsers should override with a custom or empty method
|
104
|
-
# Will be skipped unless the #record is a Hash
|
105
|
-
def create_parent_child_relationships
|
106
|
-
parents.each do |key, value|
|
107
|
-
parent = entry_class.where(
|
108
|
-
identifier: key,
|
109
|
-
importerexporter_id: importerexporter.id,
|
110
|
-
importerexporter_type: 'Bulkrax::Importer'
|
111
|
-
).first
|
112
|
-
|
113
|
-
# not finding the entries here indicates that the given identifiers are incorrect
|
114
|
-
# in that case we should log that
|
115
|
-
children = value.map do |child|
|
116
|
-
entry_class.where(
|
117
|
-
identifier: child,
|
118
|
-
importerexporter_id: importerexporter.id,
|
119
|
-
importerexporter_type: 'Bulkrax::Importer'
|
120
|
-
).first
|
121
|
-
end.compact.uniq
|
122
|
-
|
123
|
-
if parent.present? && (children.length != value.length)
|
124
|
-
# Increment the failures for the number we couldn't find
|
125
|
-
# Because all of our entries have been created by now, if we can't find them, the data is wrong
|
126
|
-
Rails.logger.error("Expected #{value.length} children for parent entry #{parent.id}, found #{children.length}")
|
127
|
-
break if children.empty?
|
128
|
-
Rails.logger.warn("Adding #{children.length} children to parent entry #{parent.id} (expected #{value.length})")
|
129
|
-
end
|
130
|
-
parent_id = parent.id
|
131
|
-
child_entry_ids = children.map(&:id)
|
132
|
-
ChildRelationshipsJob.perform_later(parent_id, child_entry_ids, current_run.id)
|
133
|
-
end
|
134
|
-
rescue StandardError => e
|
135
|
-
status_info(e)
|
136
|
-
end
|
137
|
-
|
138
|
-
def parents
|
139
|
-
@parents ||= setup_parents
|
140
|
-
end
|
141
|
-
|
142
|
-
def setup_parents
|
143
|
-
pts = []
|
144
|
-
records.each do |record|
|
145
|
-
r = if record.respond_to?(:to_h)
|
146
|
-
record.to_h
|
147
|
-
else
|
148
|
-
record
|
149
|
-
end
|
150
|
-
next unless r.is_a?(Hash)
|
151
|
-
children = if r[:children].is_a?(String)
|
152
|
-
r[:children].split(/\s*[:;|]\s*/)
|
153
|
-
else
|
154
|
-
r[:children]
|
155
|
-
end
|
156
|
-
next if children.blank?
|
157
|
-
pts << {
|
158
|
-
r[source_identifier] => children
|
159
|
-
}
|
160
|
-
end
|
161
|
-
pts.blank? ? pts : pts.inject(:merge)
|
162
|
-
end
|
163
|
-
|
164
142
|
def setup_export_file
|
165
143
|
raise StandardError, 'must be defined' if exporter?
|
166
144
|
end
|
@@ -288,12 +266,9 @@ module Bulkrax
|
|
288
266
|
private
|
289
267
|
|
290
268
|
def real_import_file_path
|
291
|
-
if file? && zip?
|
292
|
-
|
293
|
-
|
294
|
-
else
|
295
|
-
parser_fields['import_file_path']
|
296
|
-
end
|
269
|
+
return importer_unzip_path if file? && zip?
|
270
|
+
|
271
|
+
parser_fields['import_file_path']
|
297
272
|
end
|
298
273
|
end
|
299
274
|
end
|
@@ -40,7 +40,7 @@ module Bulkrax
|
|
40
40
|
raise StandardError, 'No metadata files were found' if path.blank?
|
41
41
|
data = entry_class.read_data(path)
|
42
42
|
data = entry_class.data_for_entry(data, source_identifier)
|
43
|
-
data[:file] = bag.bag_files.join('|')
|
43
|
+
data[:file] = bag.bag_files.join('|') unless importerexporter.metadata_only?
|
44
44
|
data
|
45
45
|
end
|
46
46
|
end
|
@@ -58,7 +58,7 @@ module Bulkrax
|
|
58
58
|
collection_type_gid: Hyrax::CollectionType.find_or_create_default_collection_type.gid
|
59
59
|
}
|
60
60
|
new_entry = find_or_create_entry(collection_entry_class, collection, 'Bulkrax::Importer', metadata)
|
61
|
-
|
61
|
+
ImportCollectionJob.perform_now(new_entry.id, current_run.id)
|
62
62
|
increment_counters(index, true)
|
63
63
|
end
|
64
64
|
end
|
@@ -83,13 +83,22 @@ module Bulkrax
|
|
83
83
|
end
|
84
84
|
|
85
85
|
def collections
|
86
|
-
|
86
|
+
ActiveSupport::Deprecation.warn(
|
87
|
+
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
88
|
+
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
89
|
+
)
|
90
|
+
records.map { |r| r[collection_field_mapping].split(/\s*[;|]\s*/) if r[collection_field_mapping].present? }.flatten.compact.uniq
|
87
91
|
end
|
88
92
|
|
89
93
|
def collections_total
|
90
94
|
collections.size
|
91
95
|
end
|
92
96
|
|
97
|
+
# TODO: change to differentiate between collection and work records when adding ability to import collection metadata
|
98
|
+
def works_total
|
99
|
+
total
|
100
|
+
end
|
101
|
+
|
93
102
|
def total
|
94
103
|
metadata_paths.count
|
95
104
|
end
|
@@ -2,31 +2,47 @@
|
|
2
2
|
|
3
3
|
require 'csv'
|
4
4
|
module Bulkrax
|
5
|
-
class CsvParser < ApplicationParser
|
5
|
+
class CsvParser < ApplicationParser # rubocop:disable Metrics/ClassLength
|
6
6
|
include ErroredEntries
|
7
7
|
def self.export_supported?
|
8
8
|
true
|
9
9
|
end
|
10
10
|
|
11
|
-
def
|
12
|
-
|
11
|
+
def records(_opts = {})
|
12
|
+
file_for_import = only_updates ? parser_fields['partial_import_file_path'] : import_file_path
|
13
|
+
# data for entry does not need source_identifier for csv, because csvs are read sequentially and mapped after raw data is read.
|
14
|
+
csv_data = entry_class.read_data(file_for_import)
|
15
|
+
importer.parser_fields['total'] = csv_data.count
|
16
|
+
importer.save
|
17
|
+
@records ||= csv_data.map { |record_data| entry_class.data_for_entry(record_data, nil) }
|
13
18
|
end
|
14
19
|
|
15
20
|
def collections
|
16
|
-
|
17
|
-
|
21
|
+
ActiveSupport::Deprecation.warn(
|
22
|
+
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
23
|
+
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
24
|
+
)
|
18
25
|
# retrieve a list of unique collections
|
19
|
-
records.map
|
26
|
+
records.map do |r|
|
27
|
+
collections = []
|
28
|
+
r[collection_field_mapping].split(/\s*[;|]\s*/).each { |title| collections << { title: title } } if r[collection_field_mapping].present?
|
29
|
+
model_field_mappings.each do |model_mapping|
|
30
|
+
collections << r if r[model_mapping.to_sym]&.downcase == 'collection'
|
31
|
+
end
|
32
|
+
collections
|
33
|
+
end.flatten.compact.uniq
|
20
34
|
end
|
21
35
|
|
22
36
|
def collections_total
|
23
37
|
collections.size
|
24
38
|
end
|
25
39
|
|
26
|
-
def
|
27
|
-
|
28
|
-
|
29
|
-
|
40
|
+
def works
|
41
|
+
records - collections
|
42
|
+
end
|
43
|
+
|
44
|
+
def works_total
|
45
|
+
works.size
|
30
46
|
end
|
31
47
|
|
32
48
|
# We could use CsvEntry#fields_from_data(data) but that would mean re-reading the data
|
@@ -44,8 +60,9 @@ module Bulkrax
|
|
44
60
|
end
|
45
61
|
|
46
62
|
def valid_import?
|
47
|
-
|
48
|
-
|
63
|
+
import_strings = keys_without_numbers(import_fields.map(&:to_s))
|
64
|
+
error_alert = "Missing at least one required element, missing element(s) are: #{missing_elements(import_strings).join(', ')}"
|
65
|
+
raise StandardError, error_alert unless required_elements?(import_strings)
|
49
66
|
|
50
67
|
file_paths.is_a?(Array)
|
51
68
|
rescue StandardError => e
|
@@ -56,26 +73,26 @@ module Bulkrax
|
|
56
73
|
def create_collections
|
57
74
|
collections.each_with_index do |collection, index|
|
58
75
|
next if collection.blank?
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
}
|
65
|
-
new_entry = find_or_create_entry(collection_entry_class, collection, 'Bulkrax::Importer', metadata)
|
66
|
-
ImportWorkCollectionJob.perform_now(new_entry.id, current_run.id)
|
76
|
+
break if records.find_index(collection).present? && limit_reached?(limit, records.find_index(collection))
|
77
|
+
|
78
|
+
new_entry = find_or_create_entry(collection_entry_class, unique_collection_identifier(collection), 'Bulkrax::Importer', collection.to_h)
|
79
|
+
# TODO: add support for :delete option
|
80
|
+
ImportCollectionJob.perform_now(new_entry.id, current_run.id)
|
67
81
|
increment_counters(index, true)
|
68
82
|
end
|
83
|
+
importer.record_status
|
84
|
+
rescue StandardError => e
|
85
|
+
status_info(e)
|
69
86
|
end
|
70
87
|
|
71
88
|
def create_works
|
72
|
-
|
73
|
-
next unless record_has_source_identifier(
|
74
|
-
break if limit_reached?(limit,
|
89
|
+
works.each_with_index do |work, index|
|
90
|
+
next unless record_has_source_identifier(work, records.find_index(work))
|
91
|
+
break if limit_reached?(limit, records.find_index(work))
|
75
92
|
|
76
|
-
seen[
|
77
|
-
new_entry = find_or_create_entry(entry_class,
|
78
|
-
if
|
93
|
+
seen[work[source_identifier]] = true
|
94
|
+
new_entry = find_or_create_entry(entry_class, work[source_identifier], 'Bulkrax::Importer', work.to_h)
|
95
|
+
if work[:delete].present?
|
79
96
|
DeleteWorkJob.send(perform_method, new_entry, current_run)
|
80
97
|
else
|
81
98
|
ImportWorkJob.send(perform_method, new_entry.id, current_run.id)
|
@@ -99,10 +116,6 @@ module Bulkrax
|
|
99
116
|
path
|
100
117
|
end
|
101
118
|
|
102
|
-
def create_parent_child_relationships
|
103
|
-
super
|
104
|
-
end
|
105
|
-
|
106
119
|
def extra_filters
|
107
120
|
output = ""
|
108
121
|
if importerexporter.start_date.present?
|
@@ -117,6 +130,8 @@ module Bulkrax
|
|
117
130
|
|
118
131
|
def current_work_ids
|
119
132
|
case importerexporter.export_from
|
133
|
+
when 'all'
|
134
|
+
ActiveFedora::SolrService.query("has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')}) #{extra_filters}", rows: 2_147_483_647).map(&:id)
|
120
135
|
when 'collection'
|
121
136
|
ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters}", rows: 2_000_000_000).map(&:id)
|
122
137
|
when 'worktype'
|
@@ -126,9 +141,16 @@ module Bulkrax
|
|
126
141
|
complete_statuses = Bulkrax::Status.latest_by_statusable
|
127
142
|
.includes(:statusable)
|
128
143
|
.where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', entry_ids, 'Bulkrax::Entry', 'Complete')
|
129
|
-
complete_entry_identifiers = complete_statuses.map { |s| s.statusable&.identifier }
|
130
144
|
|
131
|
-
|
145
|
+
complete_entry_identifiers = complete_statuses.map { |s| s.statusable&.identifier&.gsub(':', '\:') }
|
146
|
+
extra_filters = extra_filters.presence || '*:*'
|
147
|
+
|
148
|
+
ActiveFedora::SolrService.get(
|
149
|
+
extra_filters.to_s,
|
150
|
+
fq: "#{work_identifier}_sim:(#{complete_entry_identifiers.join(' OR ')})",
|
151
|
+
fl: 'id',
|
152
|
+
rows: 2_000_000_000
|
153
|
+
)['response']['docs'].map { |obj| obj['id'] }
|
132
154
|
end
|
133
155
|
end
|
134
156
|
|
@@ -136,12 +158,18 @@ module Bulkrax
|
|
136
158
|
current_work_ids.each_with_index do |wid, index|
|
137
159
|
break if limit_reached?(limit, index)
|
138
160
|
new_entry = find_or_create_entry(entry_class, wid, 'Bulkrax::Exporter')
|
139
|
-
|
161
|
+
begin
|
162
|
+
entry = Bulkrax::ExportWorkJob.perform_now(new_entry.id, current_run.id)
|
163
|
+
rescue => e
|
164
|
+
Rails.logger.info("#{e.message} was detected during export")
|
165
|
+
end
|
166
|
+
self.headers |= entry.parsed_metadata.keys if entry
|
140
167
|
end
|
141
168
|
end
|
142
169
|
alias create_from_collection create_new_entries
|
143
170
|
alias create_from_importer create_new_entries
|
144
171
|
alias create_from_worktype create_new_entries
|
172
|
+
alias create_from_all create_new_entries
|
145
173
|
|
146
174
|
def entry_class
|
147
175
|
CsvEntry
|
@@ -154,19 +182,11 @@ module Bulkrax
|
|
154
182
|
# See https://stackoverflow.com/questions/2650517/count-the-number-of-lines-in-a-file-without-reading-entire-file-into-memory
|
155
183
|
# Changed to grep as wc -l counts blank lines, and ignores the final unescaped line (which may or may not contain data)
|
156
184
|
def total
|
157
|
-
if importer?
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
@total = `grep -vc ^$ #{real_import_file_path}`.to_i - 1 if @total < 1
|
163
|
-
elsif exporter?
|
164
|
-
@total = importerexporter.entries.count
|
165
|
-
else
|
166
|
-
@total = 0
|
167
|
-
end
|
168
|
-
return @total
|
169
|
-
rescue StandardErrorr
|
185
|
+
@total = importer.parser_fields['total'] || 0 if importer?
|
186
|
+
@total = importerexporter.entries.count if exporter?
|
187
|
+
|
188
|
+
return @total || 0
|
189
|
+
rescue StandardError
|
170
190
|
@total = 0
|
171
191
|
end
|
172
192
|
|
@@ -201,31 +221,58 @@ module Bulkrax
|
|
201
221
|
end
|
202
222
|
end
|
203
223
|
|
204
|
-
def
|
205
|
-
|
206
|
-
new_entry(entry_class, 'Bulkrax::Exporter').field_supported?(key) &&
|
224
|
+
def export_key_allowed(key)
|
225
|
+
new_entry(entry_class, 'Bulkrax::Exporter').field_supported?(key) &&
|
207
226
|
key != source_identifier.to_s
|
208
227
|
end
|
209
228
|
|
210
229
|
# All possible column names
|
211
230
|
def export_headers
|
212
|
-
headers =
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
231
|
+
headers = sort_headers(self.headers)
|
232
|
+
|
233
|
+
# we don't want access_control_id exported and we want file at the end
|
234
|
+
headers.delete('access_control_id') if headers.include?('access_control_id')
|
235
|
+
|
236
|
+
# add the headers below at the beginning or end to maintain the preexisting export behavior
|
237
|
+
headers.prepend('model')
|
238
|
+
headers.prepend(source_identifier.to_s)
|
239
|
+
headers.prepend('id')
|
240
|
+
|
217
241
|
headers.uniq
|
218
242
|
end
|
219
243
|
|
244
|
+
def object_names
|
245
|
+
return @object_names if @object_names
|
246
|
+
|
247
|
+
@object_names = mapping.values.map { |value| value['object'] }
|
248
|
+
@object_names.uniq!.delete(nil)
|
249
|
+
|
250
|
+
@object_names
|
251
|
+
end
|
252
|
+
|
253
|
+
def sort_headers(headers)
|
254
|
+
# converting headers like creator_name_1 to creator_1_name so they get sorted by numerical order
|
255
|
+
# while keeping objects grouped together
|
256
|
+
headers.sort_by do |item|
|
257
|
+
number = item.match(/\d+/)&.[](0) || 0.to_s
|
258
|
+
sort_number = number.rjust(4, "0")
|
259
|
+
object_prefix = object_names.detect { |o| item.match(/^#{o}/) } || item
|
260
|
+
remainder = item.gsub(/^#{object_prefix}_/, '').gsub(/_#{number}/, '')
|
261
|
+
"#{object_prefix}_#{sort_number}_#{remainder}"
|
262
|
+
end
|
263
|
+
end
|
264
|
+
|
220
265
|
# in the parser as it is specific to the format
|
221
266
|
def setup_export_file
|
222
|
-
File.join(importerexporter.exporter_export_path,
|
267
|
+
File.join(importerexporter.exporter_export_path, "export_#{importerexporter.export_source}_from_#{importerexporter.export_from}.csv")
|
223
268
|
end
|
224
269
|
|
225
270
|
# Retrieve file paths for [:file] mapping in records
|
226
271
|
# and check all listed files exist.
|
227
272
|
def file_paths
|
228
273
|
raise StandardError, 'No records were found' if records.blank?
|
274
|
+
return [] if importerexporter.metadata_only?
|
275
|
+
|
229
276
|
@file_paths ||= records.map do |r|
|
230
277
|
file_mapping = Bulkrax.field_mappings.dig(self.class.to_s, 'file', :from)&.first&.to_sym || :file
|
231
278
|
next if r[file_mapping].blank?
|
@@ -244,23 +291,31 @@ module Bulkrax
|
|
244
291
|
# Retrieve the path where we expect to find the files
|
245
292
|
def path_to_files
|
246
293
|
@path_to_files ||= File.join(
|
247
|
-
|
294
|
+
zip? ? importer_unzip_path : File.dirname(import_file_path),
|
248
295
|
'files'
|
249
296
|
)
|
250
297
|
end
|
251
298
|
|
252
299
|
private
|
253
300
|
|
301
|
+
def unique_collection_identifier(collection_hash)
|
302
|
+
entry_uid = collection_hash[source_identifier]
|
303
|
+
entry_uid ||= if Bulkrax.fill_in_blank_source_identifiers.present?
|
304
|
+
Bulkrax.fill_in_blank_source_identifiers.call(self, records.find_index(collection_hash))
|
305
|
+
else
|
306
|
+
collection_hash[:title].split(/\s*[;|]\s*/).first
|
307
|
+
end
|
308
|
+
|
309
|
+
entry_uid
|
310
|
+
end
|
311
|
+
|
254
312
|
# Override to return the first CSV in the path, if a zip file is supplied
|
255
313
|
# We expect a single CSV at the top level of the zip in the CSVParser
|
256
314
|
# but we are willing to go look for it if need be
|
257
315
|
def real_import_file_path
|
258
|
-
if file? && zip?
|
259
|
-
|
260
|
-
|
261
|
-
else
|
262
|
-
parser_fields['import_file_path']
|
263
|
-
end
|
316
|
+
return Dir["#{importer_unzip_path}/**/*.csv"].first if file? && zip?
|
317
|
+
|
318
|
+
parser_fields['import_file_path']
|
264
319
|
end
|
265
320
|
end
|
266
321
|
end
|
@@ -75,7 +75,7 @@ module Bulkrax
|
|
75
75
|
|
76
76
|
new_entry = collection_entry_class.where(importerexporter: importerexporter, identifier: unique_collection_identifier, raw_metadata: metadata).first_or_create!
|
77
77
|
# perform now to ensure this gets created before work imports start
|
78
|
-
|
78
|
+
ImportCollectionJob.perform_now(new_entry.id, importerexporter.current_run.id)
|
79
79
|
increment_counters(index, true)
|
80
80
|
end
|
81
81
|
end
|
@@ -119,7 +119,10 @@ module Bulkrax
|
|
119
119
|
end
|
120
120
|
end
|
121
121
|
|
122
|
-
|
122
|
+
# TODO: change to differentiate between collection and work records when adding ability to import collection metadata
|
123
|
+
def works_total
|
124
|
+
total
|
125
|
+
end
|
123
126
|
|
124
127
|
def total
|
125
128
|
@total ||= records(quick: true).doc.find(".//resumptionToken").to_a.first.attributes["completeListSize"].to_i
|