bulkrax 1.0.2 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/app/controllers/bulkrax/exporters_controller.rb +12 -4
- data/app/controllers/bulkrax/importers_controller.rb +23 -17
- data/app/factories/bulkrax/object_factory.rb +84 -63
- data/app/jobs/bulkrax/create_relationships_job.rb +156 -0
- data/app/jobs/bulkrax/delete_work_job.rb +6 -2
- data/app/jobs/bulkrax/export_work_job.rb +3 -1
- data/app/jobs/bulkrax/exporter_job.rb +1 -0
- data/app/jobs/bulkrax/{import_work_collection_job.rb → import_collection_job.rb} +4 -2
- data/app/jobs/bulkrax/import_file_set_job.rb +69 -0
- data/app/jobs/bulkrax/import_work_job.rb +2 -0
- data/app/jobs/bulkrax/importer_job.rb +18 -1
- data/app/matchers/bulkrax/application_matcher.rb +5 -5
- data/app/models/bulkrax/csv_collection_entry.rb +8 -6
- data/app/models/bulkrax/csv_entry.rb +132 -65
- data/app/models/bulkrax/csv_file_set_entry.rb +26 -0
- data/app/models/bulkrax/entry.rb +19 -8
- data/app/models/bulkrax/exporter.rb +12 -5
- data/app/models/bulkrax/importer.rb +24 -5
- data/app/models/bulkrax/oai_entry.rb +5 -1
- data/app/models/bulkrax/rdf_entry.rb +16 -7
- data/app/models/bulkrax/xml_entry.rb +4 -0
- data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +39 -0
- data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
- data/app/models/concerns/bulkrax/has_matchers.rb +44 -13
- data/app/models/concerns/bulkrax/import_behavior.rb +40 -5
- data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +23 -2
- data/app/models/concerns/bulkrax/status_info.rb +4 -4
- data/app/parsers/bulkrax/application_parser.rb +67 -84
- data/app/parsers/bulkrax/bagit_parser.rb +13 -4
- data/app/parsers/bulkrax/csv_parser.rb +170 -64
- data/app/parsers/bulkrax/oai_dc_parser.rb +6 -3
- data/app/parsers/bulkrax/xml_parser.rb +5 -0
- data/app/views/bulkrax/exporters/_form.html.erb +1 -1
- data/app/views/bulkrax/exporters/show.html.erb +2 -1
- data/app/views/bulkrax/importers/index.html.erb +17 -17
- data/app/views/bulkrax/importers/show.html.erb +52 -6
- data/config/locales/bulkrax.en.yml +1 -0
- data/db/migrate/20190731114016_change_importer_and_exporter_to_polymorphic.rb +5 -1
- data/db/migrate/20211004170708_change_bulkrax_statuses_error_message_column_type_to_text.rb +5 -0
- data/db/migrate/20211203195233_rename_children_counters_to_relationships.rb +6 -0
- data/db/migrate/20211220195027_add_file_set_counters_to_importer_runs.rb +7 -0
- data/db/migrate/20220118001339_add_import_attempts_to_entries.rb +5 -0
- data/db/migrate/20220119213325_add_work_counters_to_importer_runs.rb +6 -0
- data/lib/bulkrax/engine.rb +1 -1
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +9 -17
- data/lib/generators/bulkrax/templates/bin/importer +17 -11
- data/lib/generators/bulkrax/templates/config/bulkrax_api.yml +3 -1
- data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +7 -12
- metadata +22 -10
- data/app/jobs/bulkrax/child_relationships_job.rb +0 -128
@@ -6,17 +6,34 @@ module Bulkrax
|
|
6
6
|
|
7
7
|
def perform(importer_id, only_updates_since_last_import = false)
|
8
8
|
importer = Importer.find(importer_id)
|
9
|
+
|
9
10
|
importer.current_run
|
11
|
+
unzip_imported_file(importer.parser)
|
10
12
|
import(importer, only_updates_since_last_import)
|
13
|
+
update_current_run_counters(importer)
|
11
14
|
schedule(importer) if importer.schedulable?
|
12
15
|
end
|
13
16
|
|
14
17
|
def import(importer, only_updates_since_last_import)
|
15
18
|
importer.only_updates = only_updates_since_last_import || false
|
16
19
|
return unless importer.valid_import?
|
20
|
+
|
17
21
|
importer.import_collections
|
18
22
|
importer.import_works
|
19
|
-
importer.
|
23
|
+
importer.import_file_sets
|
24
|
+
end
|
25
|
+
|
26
|
+
def unzip_imported_file(parser)
|
27
|
+
return unless parser.file? && parser.zip?
|
28
|
+
|
29
|
+
parser.unzip(parser.parser_fields['import_file_path'])
|
30
|
+
end
|
31
|
+
|
32
|
+
def update_current_run_counters(importer)
|
33
|
+
importer.current_run.total_work_entries = importer.limit || importer.parser.works_total
|
34
|
+
importer.current_run.total_collection_entries = importer.parser.collections_total
|
35
|
+
importer.current_run.total_file_set_entries = importer.parser.file_sets_total
|
36
|
+
importer.current_run.save!
|
20
37
|
end
|
21
38
|
|
22
39
|
def schedule(importer)
|
@@ -20,9 +20,9 @@ module Bulkrax
|
|
20
20
|
return unless content.send(self.if[0], Regexp.new(self.if[1]))
|
21
21
|
end
|
22
22
|
|
23
|
-
@result
|
24
|
-
@result.strip
|
25
|
-
process_split
|
23
|
+
# @result will evaluate to an empty string for nil content values
|
24
|
+
@result = content.to_s.gsub(/\s/, ' ').strip # remove any line feeds and tabs
|
25
|
+
process_split if @result.present?
|
26
26
|
@result = @result[0] if @result.is_a?(Array) && @result.size == 1
|
27
27
|
process_parse
|
28
28
|
return @result
|
@@ -66,14 +66,14 @@ module Bulkrax
|
|
66
66
|
end
|
67
67
|
|
68
68
|
def parse_subject(src)
|
69
|
-
string = src.
|
69
|
+
string = src.strip.downcase
|
70
70
|
return if string.blank?
|
71
71
|
|
72
72
|
string.slice(0, 1).capitalize + string.slice(1..-1)
|
73
73
|
end
|
74
74
|
|
75
75
|
def parse_types(src)
|
76
|
-
src.
|
76
|
+
src.strip.titleize
|
77
77
|
end
|
78
78
|
|
79
79
|
# Allow for mapping a model field to the work type or collection
|
@@ -6,14 +6,16 @@ module Bulkrax
|
|
6
6
|
Collection
|
7
7
|
end
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
9
|
+
# Use identifier set by CsvParser#unique_collection_identifier, which falls back
|
10
|
+
# on the Collection's first title if record[source_identifier] is not present
|
11
|
+
def add_identifier
|
12
|
+
self.parsed_metadata[work_identifier] = [self.identifier].flatten
|
13
13
|
end
|
14
14
|
|
15
|
-
def
|
16
|
-
|
15
|
+
def add_collection_type_gid
|
16
|
+
return if self.parsed_metadata['collection_type_gid'].present?
|
17
|
+
|
18
|
+
self.parsed_metadata['collection_type_gid'] = ::Hyrax::CollectionType.find_or_create_default_collection_type.gid
|
17
19
|
end
|
18
20
|
end
|
19
21
|
end
|
@@ -14,59 +14,71 @@ module Bulkrax
|
|
14
14
|
def self.read_data(path)
|
15
15
|
raise StandardError, 'CSV path empty' if path.blank?
|
16
16
|
CSV.read(path,
|
17
|
-
|
18
|
-
|
19
|
-
|
17
|
+
headers: true,
|
18
|
+
header_converters: :symbol,
|
19
|
+
encoding: 'utf-8')
|
20
20
|
end
|
21
21
|
|
22
22
|
def self.data_for_entry(data, _source_id)
|
23
|
+
ActiveSupport::Deprecation.warn(
|
24
|
+
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
25
|
+
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
26
|
+
)
|
23
27
|
# If a multi-line CSV data is passed, grab the first row
|
24
28
|
data = data.first if data.is_a?(CSV::Table)
|
25
29
|
# model has to be separated so that it doesn't get mistranslated by to_h
|
26
30
|
raw_data = data.to_h
|
27
|
-
raw_data[:model] = data[:model]
|
31
|
+
raw_data[:model] = data[:model] if data[:model].present?
|
28
32
|
# If the collection field mapping is not 'collection', add 'collection' - the parser needs it
|
29
33
|
raw_data[:collection] = raw_data[collection_field.to_sym] if raw_data.keys.include?(collection_field.to_sym) && collection_field != 'collection'
|
30
|
-
# If the children field mapping is not 'children', add 'children' - the parser needs it
|
31
|
-
raw_data[:children] = raw_data[collection_field.to_sym] if raw_data.keys.include?(children_field.to_sym) && children_field != 'children'
|
32
34
|
return raw_data
|
33
35
|
end
|
34
36
|
|
35
|
-
def
|
36
|
-
|
37
|
-
|
37
|
+
def build_metadata
|
38
|
+
raise StandardError, 'Record not found' if record.nil?
|
39
|
+
raise StandardError, "Missing required elements, missing element(s) are: #{importerexporter.parser.missing_elements(keys_without_numbers(record.keys)).join(', ')}" unless importerexporter.parser.required_elements?(keys_without_numbers(record.keys))
|
38
40
|
|
39
|
-
|
40
|
-
|
41
|
-
|
41
|
+
self.parsed_metadata = {}
|
42
|
+
add_identifier
|
43
|
+
add_visibility
|
44
|
+
add_ingested_metadata
|
45
|
+
add_metadata_for_model
|
46
|
+
add_rights_statement
|
47
|
+
add_collections
|
48
|
+
add_local
|
42
49
|
|
43
|
-
|
44
|
-
keys.map { |key| key_without_numbers(key) }
|
50
|
+
self.parsed_metadata
|
45
51
|
end
|
46
52
|
|
47
|
-
def
|
48
|
-
|
53
|
+
def add_identifier
|
54
|
+
self.parsed_metadata[work_identifier] = [record[source_identifier]]
|
49
55
|
end
|
50
56
|
|
51
|
-
def
|
52
|
-
|
53
|
-
|
57
|
+
def add_metadata_for_model
|
58
|
+
if factory_class == Collection
|
59
|
+
add_collection_type_gid
|
60
|
+
elsif factory_class == FileSet
|
61
|
+
add_path_to_file
|
62
|
+
validate_presence_of_parent!
|
63
|
+
else
|
64
|
+
add_file unless importerexporter.metadata_only?
|
65
|
+
add_admin_set_id
|
66
|
+
end
|
67
|
+
end
|
54
68
|
|
55
|
-
|
56
|
-
|
69
|
+
def add_ingested_metadata
|
70
|
+
ActiveSupport::Deprecation.warn(
|
71
|
+
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
72
|
+
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
73
|
+
)
|
74
|
+
# we do not want to sort the values in the record before adding the metadata.
|
75
|
+
# if we do, the factory_class will be set to the default_work_type for all values that come before "model" or "work type"
|
57
76
|
record.each do |key, value|
|
58
|
-
next if
|
77
|
+
next if self.parser.collection_field_mapping.to_s == key_without_numbers(key)
|
59
78
|
|
60
79
|
index = key[/\d+/].to_i - 1 if key[/\d+/].to_i != 0
|
61
80
|
add_metadata(key_without_numbers(key), value, index)
|
62
81
|
end
|
63
|
-
add_file
|
64
|
-
add_visibility
|
65
|
-
add_rights_statement
|
66
|
-
add_admin_set_id
|
67
|
-
add_collections
|
68
|
-
add_local
|
69
|
-
self.parsed_metadata
|
70
82
|
end
|
71
83
|
|
72
84
|
def add_file
|
@@ -76,7 +88,11 @@ module Bulkrax
|
|
76
88
|
elsif record['file'].is_a?(Array)
|
77
89
|
self.parsed_metadata['file'] = record['file']
|
78
90
|
end
|
79
|
-
self.parsed_metadata['file'] = self.parsed_metadata['file'].map
|
91
|
+
self.parsed_metadata['file'] = self.parsed_metadata['file'].map do |f|
|
92
|
+
next if f.blank?
|
93
|
+
|
94
|
+
path_to_file(f.tr(' ', '_'))
|
95
|
+
end.compact
|
80
96
|
end
|
81
97
|
|
82
98
|
def build_export_metadata
|
@@ -86,10 +102,20 @@ module Bulkrax
|
|
86
102
|
self.parsed_metadata[source_identifier] = hyrax_record.send(work_identifier)
|
87
103
|
self.parsed_metadata['model'] = hyrax_record.has_model.first
|
88
104
|
build_mapping_metadata
|
89
|
-
|
90
|
-
|
91
|
-
|
105
|
+
|
106
|
+
# TODO: fix the "send" parameter in the conditional below
|
107
|
+
# currently it returns: "NoMethodError - undefined method 'bulkrax_identifier' for #<Collection:0x00007fbe6a3b4248>"
|
108
|
+
if mapping['collection']&.[]('join')
|
109
|
+
self.parsed_metadata['collection'] = hyrax_record.member_of_collection_ids.join('; ')
|
110
|
+
# self.parsed_metadata['collection'] = hyrax_record.member_of_collections.map { |c| c.send(work_identifier)&.first }.compact.uniq.join(';')
|
111
|
+
else
|
112
|
+
hyrax_record.member_of_collections.each_with_index do |collection, i|
|
113
|
+
self.parsed_metadata["collection_#{i + 1}"] = collection.id
|
114
|
+
# self.parsed_metadata["collection_#{i + 1}"] = collection.send(work_identifier)&.first
|
115
|
+
end
|
92
116
|
end
|
117
|
+
|
118
|
+
build_files unless hyrax_record.is_a?(Collection)
|
93
119
|
self.parsed_metadata
|
94
120
|
end
|
95
121
|
|
@@ -97,24 +123,51 @@ module Bulkrax
|
|
97
123
|
mapping.each do |key, value|
|
98
124
|
next if Bulkrax.reserved_properties.include?(key) && !field_supported?(key)
|
99
125
|
next if key == "model"
|
126
|
+
next if value['excluded']
|
100
127
|
|
101
128
|
object_key = key if value.key?('object')
|
102
129
|
next unless hyrax_record.respond_to?(key.to_s) || object_key.present?
|
103
130
|
|
104
|
-
data = object_key.present? ? hyrax_record.send(value['object']) : hyrax_record.send(key.to_s)
|
105
131
|
if object_key.present?
|
106
|
-
|
107
|
-
|
132
|
+
build_object(value)
|
133
|
+
else
|
134
|
+
build_value(key, value)
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
def build_object(value)
|
140
|
+
data = hyrax_record.send(value['object'])
|
141
|
+
return if data.empty?
|
142
|
+
|
143
|
+
data = data.to_a if data.is_a?(ActiveTriples::Relation)
|
144
|
+
object_metadata(Array.wrap(data))
|
145
|
+
end
|
108
146
|
|
109
|
-
|
110
|
-
|
111
|
-
|
147
|
+
def build_value(key, value)
|
148
|
+
data = hyrax_record.send(key.to_s)
|
149
|
+
if data.is_a?(ActiveTriples::Relation)
|
150
|
+
if value['join']
|
151
|
+
self.parsed_metadata[key_for_export(key)] = data.map { |d| prepare_export_data(d) }.join('; ').to_s
|
112
152
|
else
|
113
|
-
|
153
|
+
data.each_with_index do |d, i|
|
154
|
+
self.parsed_metadata["#{key_for_export(key)}_#{i + 1}"] = prepare_export_data(d)
|
155
|
+
end
|
114
156
|
end
|
157
|
+
else
|
158
|
+
self.parsed_metadata[key_for_export(key)] = prepare_export_data(data)
|
115
159
|
end
|
116
160
|
end
|
117
161
|
|
162
|
+
# On export the key becomes the from and the from becomes the destination. It is the opposite of the import because we are moving data the opposite direction
|
163
|
+
# metadata that does not have a specific Bulkrax entry is mapped to the key name, as matching keys coming in are mapped by the csv parser automatically
|
164
|
+
def key_for_export(key)
|
165
|
+
clean_key = key_without_numbers(key)
|
166
|
+
unnumbered_key = mapping[clean_key] ? mapping[clean_key]['from'].first : clean_key
|
167
|
+
# Bring the number back if there is one
|
168
|
+
"#{unnumbered_key}#{key.sub(clean_key, '')}"
|
169
|
+
end
|
170
|
+
|
118
171
|
def prepare_export_data(datum)
|
119
172
|
if datum.is_a?(ActiveTriples::Resource)
|
120
173
|
datum.to_uri.to_s
|
@@ -123,30 +176,34 @@ module Bulkrax
|
|
123
176
|
end
|
124
177
|
end
|
125
178
|
|
126
|
-
def object_metadata(data
|
127
|
-
data =
|
179
|
+
def object_metadata(data)
|
180
|
+
data = data.map { |d| eval(d) }.flatten # rubocop:disable Security/Eval
|
128
181
|
|
129
182
|
data.each_with_index do |obj, index|
|
130
|
-
next
|
131
|
-
|
132
|
-
|
183
|
+
next if obj.nil?
|
184
|
+
# allow the object_key to be valid whether it's a string or symbol
|
185
|
+
obj = obj.with_indifferent_access
|
133
186
|
|
134
|
-
obj
|
135
|
-
|
187
|
+
obj.each_key do |key|
|
188
|
+
if obj[key].is_a?(Array)
|
189
|
+
obj[key].each_with_index do |_nested_item, nested_index|
|
190
|
+
self.parsed_metadata["#{key_for_export(key)}_#{index + 1}_#{nested_index + 1}"] = prepare_export_data(obj[key][nested_index])
|
191
|
+
end
|
192
|
+
else
|
193
|
+
self.parsed_metadata["#{key_for_export(key)}_#{index + 1}"] = prepare_export_data(obj[key])
|
194
|
+
end
|
136
195
|
end
|
137
196
|
end
|
138
197
|
end
|
139
198
|
|
140
|
-
def
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
return gsub_data.map { |d| JSON.parse(d) }
|
199
|
+
def build_files
|
200
|
+
if mapping['file']&.[]('join')
|
201
|
+
self.parsed_metadata['file'] = hyrax_record.file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact.join('; ')
|
202
|
+
else
|
203
|
+
hyrax_record.file_sets.each_with_index do |fs, i|
|
204
|
+
self.parsed_metadata["file_#{i + 1}"] = filename(fs).to_s if filename(fs).present?
|
205
|
+
end
|
206
|
+
end
|
150
207
|
end
|
151
208
|
|
152
209
|
# In order for the existing exported hyrax_record, to be updated by a re-import
|
@@ -167,18 +224,28 @@ module Bulkrax
|
|
167
224
|
Bulkrax::CsvMatcher
|
168
225
|
end
|
169
226
|
|
227
|
+
def possible_collection_ids
|
228
|
+
ActiveSupport::Deprecation.warn(
|
229
|
+
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
230
|
+
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
231
|
+
)
|
232
|
+
@possible_collection_ids ||= record.inject([]) do |memo, (key, value)|
|
233
|
+
memo += value.split(/\s*[:;|]\s*/) if self.class.collection_field.to_s == key_without_numbers(key) && value.present?
|
234
|
+
memo
|
235
|
+
end || []
|
236
|
+
end
|
237
|
+
|
170
238
|
def collections_created?
|
171
|
-
|
172
|
-
record[self.class.collection_field].split(/\s*[:;|]\s*/).length == self.collection_ids.length
|
239
|
+
possible_collection_ids.length == self.collection_ids.length
|
173
240
|
end
|
174
241
|
|
175
|
-
def
|
242
|
+
def find_collection_ids
|
176
243
|
return self.collection_ids if collections_created?
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
self.collection_ids << c.id unless
|
244
|
+
if possible_collection_ids.present?
|
245
|
+
possible_collection_ids.each do |collection_id|
|
246
|
+
c = find_collection(collection_id)
|
247
|
+
skip = c.blank? || self.collection_ids.include?(c.id)
|
248
|
+
self.collection_ids << c.id unless skip
|
182
249
|
end
|
183
250
|
end
|
184
251
|
self.collection_ids
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
class CsvFileSetEntry < CsvEntry
|
5
|
+
def factory_class
|
6
|
+
::FileSet
|
7
|
+
end
|
8
|
+
|
9
|
+
def add_path_to_file
|
10
|
+
parsed_metadata['file'].each_with_index do |filename, i|
|
11
|
+
path_to_file = ::File.join(parser.path_to_files, filename)
|
12
|
+
|
13
|
+
parsed_metadata['file'][i] = path_to_file
|
14
|
+
end
|
15
|
+
raise ::StandardError, 'one or more file paths are invalid' unless parsed_metadata['file'].map { |file_path| ::File.file?(file_path) }.all?
|
16
|
+
|
17
|
+
parsed_metadata['file']
|
18
|
+
end
|
19
|
+
|
20
|
+
def validate_presence_of_parent!
|
21
|
+
return if parsed_metadata[related_parents_parsed_mapping]&.map(&:present?)&.any?
|
22
|
+
|
23
|
+
raise StandardError, 'File set must be related to at least one work'
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
data/app/models/bulkrax/entry.rb
CHANGED
@@ -23,12 +23,22 @@ module Bulkrax
|
|
23
23
|
|
24
24
|
attr_accessor :all_attrs
|
25
25
|
|
26
|
-
delegate :parser,
|
26
|
+
delegate :parser,
|
27
|
+
:mapping,
|
28
|
+
:replace_files,
|
29
|
+
:update_files,
|
30
|
+
:keys_without_numbers,
|
31
|
+
:key_without_numbers,
|
32
|
+
to: :importerexporter
|
27
33
|
|
28
34
|
delegate :client,
|
29
|
-
|
30
|
-
|
31
|
-
|
35
|
+
:collection_name,
|
36
|
+
:user,
|
37
|
+
:related_parents_raw_mapping,
|
38
|
+
:related_parents_parsed_mapping,
|
39
|
+
:related_children_raw_mapping,
|
40
|
+
:related_children_parsed_mapping,
|
41
|
+
to: :parser
|
32
42
|
|
33
43
|
# Retrieve fields from the file
|
34
44
|
# @param data - the source data
|
@@ -61,13 +71,13 @@ module Bulkrax
|
|
61
71
|
end
|
62
72
|
|
63
73
|
def self.collection_field
|
74
|
+
ActiveSupport::Deprecation.warn(
|
75
|
+
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
76
|
+
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
77
|
+
)
|
64
78
|
Bulkrax.collection_field_mapping[self.to_s]
|
65
79
|
end
|
66
80
|
|
67
|
-
def self.children_field
|
68
|
-
Bulkrax.parent_child_field_mapping[self.to_s]
|
69
|
-
end
|
70
|
-
|
71
81
|
def build
|
72
82
|
return if type.nil?
|
73
83
|
self.save if self.new_record? # must be saved for statuses
|
@@ -96,6 +106,7 @@ module Bulkrax
|
|
96
106
|
end
|
97
107
|
|
98
108
|
def find_collection(collection_identifier)
|
109
|
+
return unless Collection.properties.keys.include?(work_identifier)
|
99
110
|
Collection.where(
|
100
111
|
work_identifier => collection_identifier
|
101
112
|
).detect { |m| m.send(work_identifier).include?(collection_identifier) }
|
@@ -14,7 +14,7 @@ module Bulkrax
|
|
14
14
|
validates :name, presence: true
|
15
15
|
validates :parser_klass, presence: true
|
16
16
|
|
17
|
-
delegate :write, :create_from_collection, :create_from_importer, :create_from_worktype, to: :parser
|
17
|
+
delegate :write, :create_from_collection, :create_from_importer, :create_from_worktype, :create_from_all, to: :parser
|
18
18
|
|
19
19
|
def export
|
20
20
|
current_run && setup_export_path
|
@@ -25,6 +25,8 @@ module Bulkrax
|
|
25
25
|
create_from_importer
|
26
26
|
when 'worktype'
|
27
27
|
create_from_worktype
|
28
|
+
when 'all'
|
29
|
+
create_from_all
|
28
30
|
end
|
29
31
|
rescue StandardError => e
|
30
32
|
status_info(e)
|
@@ -77,7 +79,8 @@ module Bulkrax
|
|
77
79
|
[
|
78
80
|
[I18n.t('bulkrax.exporter.labels.importer'), 'importer'],
|
79
81
|
[I18n.t('bulkrax.exporter.labels.collection'), 'collection'],
|
80
|
-
[I18n.t('bulkrax.exporter.labels.worktype'), 'worktype']
|
82
|
+
[I18n.t('bulkrax.exporter.labels.worktype'), 'worktype'],
|
83
|
+
[I18n.t('bulkrax.exporter.labels.all'), 'all']
|
81
84
|
]
|
82
85
|
end
|
83
86
|
|
@@ -105,18 +108,22 @@ module Bulkrax
|
|
105
108
|
end
|
106
109
|
|
107
110
|
def exporter_export_path
|
108
|
-
@exporter_export_path ||= File.join(
|
111
|
+
@exporter_export_path ||= File.join(parser.base_path('export'), self.id.to_s, self.exporter_runs.last.id.to_s)
|
109
112
|
end
|
110
113
|
|
111
114
|
def exporter_export_zip_path
|
112
|
-
@exporter_export_zip_path ||= File.join(
|
115
|
+
@exporter_export_zip_path ||= File.join(parser.base_path('export'), "export_#{self.id}_#{self.exporter_runs.last.id}.zip")
|
113
116
|
rescue
|
114
|
-
@exporter_export_zip_path ||= File.join(
|
117
|
+
@exporter_export_zip_path ||= File.join(parser.base_path('export'), "export_#{self.id}_0.zip")
|
115
118
|
end
|
116
119
|
|
117
120
|
def export_properties
|
118
121
|
properties = Hyrax.config.registered_curation_concern_types.map { |work| work.constantize.properties.keys }.flatten.uniq.sort
|
119
122
|
properties.reject { |prop| Bulkrax.reserved_properties.include?(prop) }
|
120
123
|
end
|
124
|
+
|
125
|
+
def metadata_only?
|
126
|
+
export_type == 'metadata'
|
127
|
+
end
|
121
128
|
end
|
122
129
|
end
|
@@ -18,8 +18,7 @@ module Bulkrax
|
|
18
18
|
validates :admin_set_id, presence: true
|
19
19
|
validates :parser_klass, presence: true
|
20
20
|
|
21
|
-
delegate :valid_import?, :
|
22
|
-
:write_errored_entries_file, :visibility, to: :parser
|
21
|
+
delegate :valid_import?, :write_errored_entries_file, :visibility, to: :parser
|
23
22
|
|
24
23
|
attr_accessor :only_updates, :file_style, :file
|
25
24
|
attr_writer :current_run
|
@@ -97,7 +96,16 @@ module Bulkrax
|
|
97
96
|
end
|
98
97
|
|
99
98
|
def current_run
|
100
|
-
@current_run ||=
|
99
|
+
@current_run ||= if file? && zip?
|
100
|
+
self.importer_runs.create!
|
101
|
+
else
|
102
|
+
entry_counts = {
|
103
|
+
total_work_entries: self.limit || parser.works_total,
|
104
|
+
total_collection_entries: parser.collections_total,
|
105
|
+
total_file_set_entries: parser.file_sets_total
|
106
|
+
}
|
107
|
+
self.importer_runs.create!(entry_counts)
|
108
|
+
end
|
101
109
|
end
|
102
110
|
|
103
111
|
def last_run
|
@@ -131,6 +139,13 @@ module Bulkrax
|
|
131
139
|
status_info(e)
|
132
140
|
end
|
133
141
|
|
142
|
+
def import_file_sets
|
143
|
+
self.save if self.new_record? # Object needs to be saved for statuses
|
144
|
+
parser.create_file_sets
|
145
|
+
rescue StandardError => e
|
146
|
+
status_info(e)
|
147
|
+
end
|
148
|
+
|
134
149
|
# Prepend the base_url to ensure unique set identifiers
|
135
150
|
# @todo - move to parser, as this is OAI specific
|
136
151
|
def unique_collection_identifier(id)
|
@@ -149,11 +164,11 @@ module Bulkrax
|
|
149
164
|
|
150
165
|
# If the import data is zipped, unzip it to this path
|
151
166
|
def importer_unzip_path
|
152
|
-
@importer_unzip_path ||= File.join(
|
167
|
+
@importer_unzip_path ||= File.join(parser.base_path, "import_#{path_string}")
|
153
168
|
end
|
154
169
|
|
155
170
|
def errored_entries_csv_path
|
156
|
-
@errored_entries_csv_path ||= File.join(
|
171
|
+
@errored_entries_csv_path ||= File.join(parser.base_path, "import_#{path_string}_errored_entries.csv")
|
157
172
|
end
|
158
173
|
|
159
174
|
def path_string
|
@@ -161,5 +176,9 @@ module Bulkrax
|
|
161
176
|
rescue
|
162
177
|
"#{self.id}_#{self.created_at.strftime('%Y%m%d%H%M%S')}"
|
163
178
|
end
|
179
|
+
|
180
|
+
def metadata_only?
|
181
|
+
parser.parser_fields['metadata_only'] == true
|
182
|
+
end
|
164
183
|
end
|
165
184
|
end
|
@@ -26,6 +26,10 @@ module Bulkrax
|
|
26
26
|
end
|
27
27
|
|
28
28
|
def build_metadata
|
29
|
+
ActiveSupport::Deprecation.warn(
|
30
|
+
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
31
|
+
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
32
|
+
)
|
29
33
|
self.parsed_metadata = {}
|
30
34
|
self.parsed_metadata[work_identifier] = [record.header.identifier]
|
31
35
|
|
@@ -56,7 +60,7 @@ module Bulkrax
|
|
56
60
|
# Retrieve list of collections for the entry; add to collection_ids
|
57
61
|
# If OAI-PMH doesn't return setSpec in the headers for GetRecord, use parser.collection_name
|
58
62
|
# in this case, if 'All' is selected, records will not be added to a collection.
|
59
|
-
def
|
63
|
+
def find_collection_ids
|
60
64
|
return self.collection_ids if collections_created?
|
61
65
|
if sets.blank? || parser.collection_name != 'all'
|
62
66
|
# c = Collection.where(Bulkrax.system_identifier_field => importerexporter.unique_collection_identifier(parser.collection_name)).first
|
@@ -14,6 +14,10 @@ module Bulkrax
|
|
14
14
|
end
|
15
15
|
|
16
16
|
def self.data_for_entry(data, source_id)
|
17
|
+
ActiveSupport::Deprecation.warn(
|
18
|
+
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
19
|
+
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
20
|
+
)
|
17
21
|
reader = data
|
18
22
|
format = reader.class.format.to_sym
|
19
23
|
collections = []
|
@@ -22,7 +26,7 @@ module Bulkrax
|
|
22
26
|
data = RDF::Writer.for(format).buffer do |writer|
|
23
27
|
reader.each_statement do |statement|
|
24
28
|
collections << statement.object.to_s if collection_field.present? && collection_field == statement.predicate.to_s
|
25
|
-
children << statement.object.to_s if
|
29
|
+
children << statement.object.to_s if related_children_parsed_mapping.present? && related_children_parsed_mapping == statement.predicate.to_s
|
26
30
|
delete = statement.object.to_s if /deleted/.match?(statement.predicate.to_s)
|
27
31
|
writer << statement
|
28
32
|
end
|
@@ -37,12 +41,13 @@ module Bulkrax
|
|
37
41
|
}
|
38
42
|
end
|
39
43
|
|
40
|
-
def self.
|
41
|
-
|
42
|
-
|
44
|
+
def self.related_children_parsed_mapping
|
45
|
+
return @related_children_parsed_mapping if @related_children_parsed_mapping.present?
|
46
|
+
|
47
|
+
rdf_related_children_field_mapping = Bulkrax.field_mappings['Bulkrax::RdfParser']&.select { |_, h| h.key?('related_children_field_mapping') }
|
48
|
+
return if rdf_related_children_field_mapping.blank?
|
43
49
|
|
44
|
-
|
45
|
-
Bulkrax.parent_child_field_mapping[self.to_s]
|
50
|
+
@related_children_parsed_mapping = rdf_related_children_field_mapping&.keys&.first
|
46
51
|
end
|
47
52
|
|
48
53
|
def record
|
@@ -50,6 +55,10 @@ module Bulkrax
|
|
50
55
|
end
|
51
56
|
|
52
57
|
def build_metadata
|
58
|
+
ActiveSupport::Deprecation.warn(
|
59
|
+
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
60
|
+
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
61
|
+
)
|
53
62
|
raise StandardError, 'Record not found' if record.nil?
|
54
63
|
raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
|
55
64
|
|
@@ -76,7 +85,7 @@ module Bulkrax
|
|
76
85
|
self.raw_metadata['collection'].length == self.collection_ids.length
|
77
86
|
end
|
78
87
|
|
79
|
-
def
|
88
|
+
def find_collection_ids
|
80
89
|
return self.collection_ids if collections_created?
|
81
90
|
if self.raw_metadata['collection'].present?
|
82
91
|
self.raw_metadata['collection'].each do |collection|
|
@@ -39,6 +39,10 @@ module Bulkrax
|
|
39
39
|
end
|
40
40
|
|
41
41
|
def build_metadata
|
42
|
+
ActiveSupport::Deprecation.warn(
|
43
|
+
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
44
|
+
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
45
|
+
)
|
42
46
|
raise StandardError, 'Record not found' if record.nil?
|
43
47
|
raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
|
44
48
|
self.parsed_metadata = {}
|