bulkrax 1.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/app/controllers/bulkrax/exporters_controller.rb +12 -4
- data/app/controllers/bulkrax/importers_controller.rb +22 -17
- data/app/factories/bulkrax/object_factory.rb +44 -61
- data/app/jobs/bulkrax/create_relationships_job.rb +187 -0
- data/app/jobs/bulkrax/delete_work_job.rb +6 -2
- data/app/jobs/bulkrax/export_work_job.rb +3 -1
- data/app/jobs/bulkrax/exporter_job.rb +1 -0
- data/app/jobs/bulkrax/{import_work_collection_job.rb → import_collection_job.rb} +2 -2
- data/app/jobs/bulkrax/importer_job.rb +16 -1
- data/app/matchers/bulkrax/application_matcher.rb +9 -6
- data/app/models/bulkrax/csv_collection_entry.rb +8 -6
- data/app/models/bulkrax/csv_entry.rb +139 -45
- data/app/models/bulkrax/entry.rb +19 -8
- data/app/models/bulkrax/exporter.rb +12 -5
- data/app/models/bulkrax/importer.rb +22 -5
- data/app/models/bulkrax/oai_entry.rb +5 -1
- data/app/models/bulkrax/rdf_entry.rb +16 -7
- data/app/models/bulkrax/xml_entry.rb +4 -0
- data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
- data/app/models/concerns/bulkrax/file_factory.rb +2 -1
- data/app/models/concerns/bulkrax/has_matchers.rb +59 -16
- data/app/models/concerns/bulkrax/import_behavior.rb +35 -5
- data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +19 -0
- data/app/models/concerns/bulkrax/status_info.rb +4 -4
- data/app/parsers/bulkrax/application_parser.rb +59 -84
- data/app/parsers/bulkrax/bagit_parser.rb +12 -3
- data/app/parsers/bulkrax/csv_parser.rb +117 -62
- data/app/parsers/bulkrax/oai_dc_parser.rb +5 -2
- data/app/parsers/bulkrax/xml_parser.rb +5 -0
- data/app/views/bulkrax/exporters/_form.html.erb +1 -1
- data/app/views/bulkrax/exporters/show.html.erb +13 -1
- data/app/views/bulkrax/importers/_edit_form_buttons.html.erb +45 -14
- data/app/views/bulkrax/importers/edit.html.erb +2 -0
- data/app/views/bulkrax/importers/index.html.erb +15 -17
- data/app/views/bulkrax/importers/show.html.erb +6 -2
- data/config/locales/bulkrax.en.yml +1 -0
- data/db/migrate/20190731114016_change_importer_and_exporter_to_polymorphic.rb +5 -1
- data/db/migrate/20211004170708_change_bulkrax_statuses_error_message_column_type_to_text.rb +5 -0
- data/db/migrate/20211203195233_rename_children_counters_to_relationships.rb +6 -0
- data/lib/bulkrax/engine.rb +1 -1
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +9 -17
- data/lib/generators/bulkrax/templates/bin/importer +17 -11
- data/lib/generators/bulkrax/templates/config/bulkrax_api.yml +3 -1
- data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +7 -12
- metadata +13 -7
- data/app/jobs/bulkrax/child_relationships_job.rb +0 -128
@@ -14,59 +14,68 @@ module Bulkrax
|
|
14
14
|
def self.read_data(path)
|
15
15
|
raise StandardError, 'CSV path empty' if path.blank?
|
16
16
|
CSV.read(path,
|
17
|
-
|
18
|
-
|
19
|
-
|
17
|
+
headers: true,
|
18
|
+
header_converters: :symbol,
|
19
|
+
encoding: 'utf-8')
|
20
20
|
end
|
21
21
|
|
22
22
|
def self.data_for_entry(data, _source_id)
|
23
|
+
ActiveSupport::Deprecation.warn(
|
24
|
+
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
25
|
+
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
26
|
+
)
|
23
27
|
# If a multi-line CSV data is passed, grab the first row
|
24
28
|
data = data.first if data.is_a?(CSV::Table)
|
25
29
|
# model has to be separated so that it doesn't get mistranslated by to_h
|
26
30
|
raw_data = data.to_h
|
27
|
-
raw_data[:model] = data[:model]
|
31
|
+
raw_data[:model] = data[:model] if data[:model].present?
|
28
32
|
# If the collection field mapping is not 'collection', add 'collection' - the parser needs it
|
29
33
|
raw_data[:collection] = raw_data[collection_field.to_sym] if raw_data.keys.include?(collection_field.to_sym) && collection_field != 'collection'
|
30
|
-
# If the children field mapping is not 'children', add 'children' - the parser needs it
|
31
|
-
raw_data[:children] = raw_data[collection_field.to_sym] if raw_data.keys.include?(children_field.to_sym) && children_field != 'children'
|
32
34
|
return raw_data
|
33
35
|
end
|
34
36
|
|
35
|
-
def
|
36
|
-
|
37
|
-
|
37
|
+
def build_metadata
|
38
|
+
raise StandardError, 'Record not found' if record.nil?
|
39
|
+
raise StandardError, "Missing required elements, missing element(s) are: #{importerexporter.parser.missing_elements(keys_without_numbers(record.keys)).join(', ')}" unless importerexporter.parser.required_elements?(keys_without_numbers(record.keys))
|
38
40
|
|
39
|
-
|
40
|
-
|
41
|
-
|
41
|
+
self.parsed_metadata = {}
|
42
|
+
add_identifier
|
43
|
+
add_metadata_for_model
|
44
|
+
add_visibility
|
45
|
+
add_ingested_metadata
|
46
|
+
add_rights_statement
|
47
|
+
add_collections
|
48
|
+
add_local
|
42
49
|
|
43
|
-
|
44
|
-
keys.map { |key| key_without_numbers(key) }
|
50
|
+
self.parsed_metadata
|
45
51
|
end
|
46
52
|
|
47
|
-
def
|
48
|
-
|
53
|
+
def add_identifier
|
54
|
+
self.parsed_metadata[work_identifier] = [record[source_identifier]]
|
49
55
|
end
|
50
56
|
|
51
|
-
def
|
52
|
-
|
53
|
-
|
57
|
+
def add_metadata_for_model
|
58
|
+
if factory_class == Collection
|
59
|
+
add_collection_type_gid
|
60
|
+
else
|
61
|
+
add_file unless importerexporter.metadata_only?
|
62
|
+
add_admin_set_id
|
63
|
+
end
|
64
|
+
end
|
54
65
|
|
55
|
-
|
56
|
-
|
66
|
+
def add_ingested_metadata
|
67
|
+
ActiveSupport::Deprecation.warn(
|
68
|
+
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
69
|
+
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
70
|
+
)
|
71
|
+
# we do not want to sort the values in the record before adding the metadata.
|
72
|
+
# if we do, the factory_class will be set to the default_work_type for all values that come before "model" or "work type"
|
57
73
|
record.each do |key, value|
|
58
|
-
next if
|
74
|
+
next if self.parser.collection_field_mapping.to_s == key_without_numbers(key)
|
59
75
|
|
60
76
|
index = key[/\d+/].to_i - 1 if key[/\d+/].to_i != 0
|
61
77
|
add_metadata(key_without_numbers(key), value, index)
|
62
78
|
end
|
63
|
-
add_file
|
64
|
-
add_visibility
|
65
|
-
add_rights_statement
|
66
|
-
add_admin_set_id
|
67
|
-
add_collections
|
68
|
-
add_local
|
69
|
-
self.parsed_metadata
|
70
79
|
end
|
71
80
|
|
72
81
|
def add_file
|
@@ -86,10 +95,20 @@ module Bulkrax
|
|
86
95
|
self.parsed_metadata[source_identifier] = hyrax_record.send(work_identifier)
|
87
96
|
self.parsed_metadata['model'] = hyrax_record.has_model.first
|
88
97
|
build_mapping_metadata
|
89
|
-
|
90
|
-
|
91
|
-
|
98
|
+
|
99
|
+
# TODO: fix the "send" parameter in the conditional below
|
100
|
+
# currently it returns: "NoMethodError - undefined method 'bulkrax_identifier' for #<Collection:0x00007fbe6a3b4248>"
|
101
|
+
if mapping['collection']&.[]('join')
|
102
|
+
self.parsed_metadata['collection'] = hyrax_record.member_of_collection_ids.join('; ')
|
103
|
+
# self.parsed_metadata['collection'] = hyrax_record.member_of_collections.map { |c| c.send(work_identifier)&.first }.compact.uniq.join(';')
|
104
|
+
else
|
105
|
+
hyrax_record.member_of_collections.each_with_index do |collection, i|
|
106
|
+
self.parsed_metadata["collection_#{i + 1}"] = collection.id
|
107
|
+
# self.parsed_metadata["collection_#{i + 1}"] = collection.send(work_identifier)&.first
|
108
|
+
end
|
92
109
|
end
|
110
|
+
|
111
|
+
build_files unless hyrax_record.is_a?(Collection)
|
93
112
|
self.parsed_metadata
|
94
113
|
end
|
95
114
|
|
@@ -97,16 +116,51 @@ module Bulkrax
|
|
97
116
|
mapping.each do |key, value|
|
98
117
|
next if Bulkrax.reserved_properties.include?(key) && !field_supported?(key)
|
99
118
|
next if key == "model"
|
100
|
-
next
|
101
|
-
|
102
|
-
if
|
103
|
-
|
119
|
+
next if value['excluded']
|
120
|
+
|
121
|
+
object_key = key if value.key?('object')
|
122
|
+
next unless hyrax_record.respond_to?(key.to_s) || object_key.present?
|
123
|
+
|
124
|
+
if object_key.present?
|
125
|
+
build_object(value)
|
126
|
+
else
|
127
|
+
build_value(key, value)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
def build_object(value)
|
133
|
+
data = hyrax_record.send(value['object'])
|
134
|
+
return if data.empty?
|
135
|
+
|
136
|
+
data = data.to_a if data.is_a?(ActiveTriples::Relation)
|
137
|
+
object_metadata(Array.wrap(data))
|
138
|
+
end
|
139
|
+
|
140
|
+
def build_value(key, value)
|
141
|
+
data = hyrax_record.send(key.to_s)
|
142
|
+
if data.is_a?(ActiveTriples::Relation)
|
143
|
+
if value['join']
|
144
|
+
self.parsed_metadata[key_for_export(key)] = data.map { |d| prepare_export_data(d) }.join('; ').to_s
|
104
145
|
else
|
105
|
-
|
146
|
+
data.each_with_index do |d, i|
|
147
|
+
self.parsed_metadata["#{key_for_export(key)}_#{i + 1}"] = prepare_export_data(d)
|
148
|
+
end
|
106
149
|
end
|
150
|
+
else
|
151
|
+
self.parsed_metadata[key_for_export(key)] = prepare_export_data(data)
|
107
152
|
end
|
108
153
|
end
|
109
154
|
|
155
|
+
# On export the key becomes the from and the from becomes the destination. It is the opposite of the import because we are moving data the opposite direction
|
156
|
+
# metadata that does not have a specific Bulkrax entry is mapped to the key name, as matching keys coming in are mapped by the csv parser automatically
|
157
|
+
def key_for_export(key)
|
158
|
+
clean_key = key_without_numbers(key)
|
159
|
+
unnumbered_key = mapping[clean_key] ? mapping[clean_key]['from'].first : clean_key
|
160
|
+
# Bring the number back if there is one
|
161
|
+
"#{unnumbered_key}#{key.sub(clean_key, '')}"
|
162
|
+
end
|
163
|
+
|
110
164
|
def prepare_export_data(datum)
|
111
165
|
if datum.is_a?(ActiveTriples::Resource)
|
112
166
|
datum.to_uri.to_s
|
@@ -115,6 +169,36 @@ module Bulkrax
|
|
115
169
|
end
|
116
170
|
end
|
117
171
|
|
172
|
+
def object_metadata(data)
|
173
|
+
data = data.map { |d| eval(d) }.flatten # rubocop:disable Security/Eval
|
174
|
+
|
175
|
+
data.each_with_index do |obj, index|
|
176
|
+
next if obj.nil?
|
177
|
+
# allow the object_key to be valid whether it's a string or symbol
|
178
|
+
obj = obj.with_indifferent_access
|
179
|
+
|
180
|
+
obj.each_key do |key|
|
181
|
+
if obj[key].is_a?(Array)
|
182
|
+
obj[key].each_with_index do |_nested_item, nested_index|
|
183
|
+
self.parsed_metadata["#{key_for_export(key)}_#{index + 1}_#{nested_index + 1}"] = prepare_export_data(obj[key][nested_index])
|
184
|
+
end
|
185
|
+
else
|
186
|
+
self.parsed_metadata["#{key_for_export(key)}_#{index + 1}"] = prepare_export_data(obj[key])
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
def build_files
|
193
|
+
if mapping['file']&.[]('join')
|
194
|
+
self.parsed_metadata['file'] = hyrax_record.file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact.join('; ')
|
195
|
+
else
|
196
|
+
hyrax_record.file_sets.each_with_index do |fs, i|
|
197
|
+
self.parsed_metadata["file_#{i + 1}"] = filename(fs).to_s if filename(fs).present?
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
118
202
|
# In order for the existing exported hyrax_record, to be updated by a re-import
|
119
203
|
# we need a unique value in system_identifier
|
120
204
|
# add the existing hyrax_record id to system_identifier
|
@@ -133,18 +217,28 @@ module Bulkrax
|
|
133
217
|
Bulkrax::CsvMatcher
|
134
218
|
end
|
135
219
|
|
220
|
+
def possible_collection_ids
|
221
|
+
ActiveSupport::Deprecation.warn(
|
222
|
+
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
223
|
+
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
224
|
+
)
|
225
|
+
@possible_collection_ids ||= record.inject([]) do |memo, (key, value)|
|
226
|
+
memo += value.split(/\s*[:;|]\s*/) if self.class.collection_field.to_s == key_without_numbers(key) && value.present?
|
227
|
+
memo
|
228
|
+
end || []
|
229
|
+
end
|
230
|
+
|
136
231
|
def collections_created?
|
137
|
-
|
138
|
-
record[self.class.collection_field].split(/\s*[:;|]\s*/).length == self.collection_ids.length
|
232
|
+
possible_collection_ids.length == self.collection_ids.length
|
139
233
|
end
|
140
234
|
|
141
|
-
def
|
235
|
+
def find_collection_ids
|
142
236
|
return self.collection_ids if collections_created?
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
self.collection_ids << c.id unless
|
237
|
+
if possible_collection_ids.present?
|
238
|
+
possible_collection_ids.each do |collection_id|
|
239
|
+
c = find_collection(collection_id)
|
240
|
+
skip = c.blank? || self.collection_ids.include?(c.id)
|
241
|
+
self.collection_ids << c.id unless skip
|
148
242
|
end
|
149
243
|
end
|
150
244
|
self.collection_ids
|
data/app/models/bulkrax/entry.rb
CHANGED
@@ -23,12 +23,22 @@ module Bulkrax
|
|
23
23
|
|
24
24
|
attr_accessor :all_attrs
|
25
25
|
|
26
|
-
delegate :parser,
|
26
|
+
delegate :parser,
|
27
|
+
:mapping,
|
28
|
+
:replace_files,
|
29
|
+
:update_files,
|
30
|
+
:keys_without_numbers,
|
31
|
+
:key_without_numbers,
|
32
|
+
to: :importerexporter
|
27
33
|
|
28
34
|
delegate :client,
|
29
|
-
|
30
|
-
|
31
|
-
|
35
|
+
:collection_name,
|
36
|
+
:user,
|
37
|
+
:related_parents_raw_mapping,
|
38
|
+
:related_parents_parsed_mapping,
|
39
|
+
:related_children_raw_mapping,
|
40
|
+
:related_children_parsed_mapping,
|
41
|
+
to: :parser
|
32
42
|
|
33
43
|
# Retrieve fields from the file
|
34
44
|
# @param data - the source data
|
@@ -61,13 +71,13 @@ module Bulkrax
|
|
61
71
|
end
|
62
72
|
|
63
73
|
def self.collection_field
|
74
|
+
ActiveSupport::Deprecation.warn(
|
75
|
+
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
76
|
+
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
77
|
+
)
|
64
78
|
Bulkrax.collection_field_mapping[self.to_s]
|
65
79
|
end
|
66
80
|
|
67
|
-
def self.children_field
|
68
|
-
Bulkrax.parent_child_field_mapping[self.to_s]
|
69
|
-
end
|
70
|
-
|
71
81
|
def build
|
72
82
|
return if type.nil?
|
73
83
|
self.save if self.new_record? # must be saved for statuses
|
@@ -96,6 +106,7 @@ module Bulkrax
|
|
96
106
|
end
|
97
107
|
|
98
108
|
def find_collection(collection_identifier)
|
109
|
+
return unless Collection.properties.keys.include?(work_identifier)
|
99
110
|
Collection.where(
|
100
111
|
work_identifier => collection_identifier
|
101
112
|
).detect { |m| m.send(work_identifier).include?(collection_identifier) }
|
@@ -14,7 +14,7 @@ module Bulkrax
|
|
14
14
|
validates :name, presence: true
|
15
15
|
validates :parser_klass, presence: true
|
16
16
|
|
17
|
-
delegate :write, :create_from_collection, :create_from_importer, :create_from_worktype, to: :parser
|
17
|
+
delegate :write, :create_from_collection, :create_from_importer, :create_from_worktype, :create_from_all, to: :parser
|
18
18
|
|
19
19
|
def export
|
20
20
|
current_run && setup_export_path
|
@@ -25,6 +25,8 @@ module Bulkrax
|
|
25
25
|
create_from_importer
|
26
26
|
when 'worktype'
|
27
27
|
create_from_worktype
|
28
|
+
when 'all'
|
29
|
+
create_from_all
|
28
30
|
end
|
29
31
|
rescue StandardError => e
|
30
32
|
status_info(e)
|
@@ -77,7 +79,8 @@ module Bulkrax
|
|
77
79
|
[
|
78
80
|
[I18n.t('bulkrax.exporter.labels.importer'), 'importer'],
|
79
81
|
[I18n.t('bulkrax.exporter.labels.collection'), 'collection'],
|
80
|
-
[I18n.t('bulkrax.exporter.labels.worktype'), 'worktype']
|
82
|
+
[I18n.t('bulkrax.exporter.labels.worktype'), 'worktype'],
|
83
|
+
[I18n.t('bulkrax.exporter.labels.all'), 'all']
|
81
84
|
]
|
82
85
|
end
|
83
86
|
|
@@ -105,18 +108,22 @@ module Bulkrax
|
|
105
108
|
end
|
106
109
|
|
107
110
|
def exporter_export_path
|
108
|
-
@exporter_export_path ||= File.join(
|
111
|
+
@exporter_export_path ||= File.join(parser.base_path('export'), self.id.to_s, self.exporter_runs.last.id.to_s)
|
109
112
|
end
|
110
113
|
|
111
114
|
def exporter_export_zip_path
|
112
|
-
@exporter_export_zip_path ||= File.join(
|
115
|
+
@exporter_export_zip_path ||= File.join(parser.base_path('export'), "export_#{self.id}_#{self.exporter_runs.last.id}.zip")
|
113
116
|
rescue
|
114
|
-
@exporter_export_zip_path ||= File.join(
|
117
|
+
@exporter_export_zip_path ||= File.join(parser.base_path('export'), "export_#{self.id}_0.zip")
|
115
118
|
end
|
116
119
|
|
117
120
|
def export_properties
|
118
121
|
properties = Hyrax.config.registered_curation_concern_types.map { |work| work.constantize.properties.keys }.flatten.uniq.sort
|
119
122
|
properties.reject { |prop| Bulkrax.reserved_properties.include?(prop) }
|
120
123
|
end
|
124
|
+
|
125
|
+
def metadata_only?
|
126
|
+
export_type == 'metadata'
|
127
|
+
end
|
121
128
|
end
|
122
129
|
end
|
@@ -18,12 +18,21 @@ module Bulkrax
|
|
18
18
|
validates :admin_set_id, presence: true
|
19
19
|
validates :parser_klass, presence: true
|
20
20
|
|
21
|
-
delegate :valid_import?, :
|
22
|
-
:write_errored_entries_file, :visibility, to: :parser
|
21
|
+
delegate :valid_import?, :write_errored_entries_file, :visibility, to: :parser
|
23
22
|
|
24
23
|
attr_accessor :only_updates, :file_style, :file
|
25
24
|
attr_writer :current_run
|
26
25
|
|
26
|
+
def self.safe_uri_filename(uri)
|
27
|
+
uri = URI.parse(uri) unless uri.is_a?(URI)
|
28
|
+
r = Faraday.head(uri.to_s)
|
29
|
+
return CGI.parse(r.headers['content-disposition'])["filename"][0].delete("\"")
|
30
|
+
rescue
|
31
|
+
filename = File.basename(uri.path)
|
32
|
+
filename.delete!('/')
|
33
|
+
filename.presence || file_set.id
|
34
|
+
end
|
35
|
+
|
27
36
|
def status
|
28
37
|
if self.validate_only
|
29
38
|
'Validated'
|
@@ -87,7 +96,11 @@ module Bulkrax
|
|
87
96
|
end
|
88
97
|
|
89
98
|
def current_run
|
90
|
-
@current_run ||=
|
99
|
+
@current_run ||= if file? && zip?
|
100
|
+
self.importer_runs.create!
|
101
|
+
else
|
102
|
+
self.importer_runs.create!(total_work_entries: self.limit || parser.works_total, total_collection_entries: parser.collections_total)
|
103
|
+
end
|
91
104
|
end
|
92
105
|
|
93
106
|
def last_run
|
@@ -139,11 +152,11 @@ module Bulkrax
|
|
139
152
|
|
140
153
|
# If the import data is zipped, unzip it to this path
|
141
154
|
def importer_unzip_path
|
142
|
-
@importer_unzip_path ||= File.join(
|
155
|
+
@importer_unzip_path ||= File.join(parser.base_path, "import_#{path_string}")
|
143
156
|
end
|
144
157
|
|
145
158
|
def errored_entries_csv_path
|
146
|
-
@errored_entries_csv_path ||= File.join(
|
159
|
+
@errored_entries_csv_path ||= File.join(parser.base_path, "import_#{path_string}_errored_entries.csv")
|
147
160
|
end
|
148
161
|
|
149
162
|
def path_string
|
@@ -151,5 +164,9 @@ module Bulkrax
|
|
151
164
|
rescue
|
152
165
|
"#{self.id}_#{self.created_at.strftime('%Y%m%d%H%M%S')}"
|
153
166
|
end
|
167
|
+
|
168
|
+
def metadata_only?
|
169
|
+
parser.parser_fields['metadata_only'] == true
|
170
|
+
end
|
154
171
|
end
|
155
172
|
end
|
@@ -26,6 +26,10 @@ module Bulkrax
|
|
26
26
|
end
|
27
27
|
|
28
28
|
def build_metadata
|
29
|
+
ActiveSupport::Deprecation.warn(
|
30
|
+
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
31
|
+
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
32
|
+
)
|
29
33
|
self.parsed_metadata = {}
|
30
34
|
self.parsed_metadata[work_identifier] = [record.header.identifier]
|
31
35
|
|
@@ -56,7 +60,7 @@ module Bulkrax
|
|
56
60
|
# Retrieve list of collections for the entry; add to collection_ids
|
57
61
|
# If OAI-PMH doesn't return setSpec in the headers for GetRecord, use parser.collection_name
|
58
62
|
# in this case, if 'All' is selected, records will not be added to a collection.
|
59
|
-
def
|
63
|
+
def find_collection_ids
|
60
64
|
return self.collection_ids if collections_created?
|
61
65
|
if sets.blank? || parser.collection_name != 'all'
|
62
66
|
# c = Collection.where(Bulkrax.system_identifier_field => importerexporter.unique_collection_identifier(parser.collection_name)).first
|
@@ -14,6 +14,10 @@ module Bulkrax
|
|
14
14
|
end
|
15
15
|
|
16
16
|
def self.data_for_entry(data, source_id)
|
17
|
+
ActiveSupport::Deprecation.warn(
|
18
|
+
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
19
|
+
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
20
|
+
)
|
17
21
|
reader = data
|
18
22
|
format = reader.class.format.to_sym
|
19
23
|
collections = []
|
@@ -22,7 +26,7 @@ module Bulkrax
|
|
22
26
|
data = RDF::Writer.for(format).buffer do |writer|
|
23
27
|
reader.each_statement do |statement|
|
24
28
|
collections << statement.object.to_s if collection_field.present? && collection_field == statement.predicate.to_s
|
25
|
-
children << statement.object.to_s if
|
29
|
+
children << statement.object.to_s if related_children_parsed_mapping.present? && related_children_parsed_mapping == statement.predicate.to_s
|
26
30
|
delete = statement.object.to_s if /deleted/.match?(statement.predicate.to_s)
|
27
31
|
writer << statement
|
28
32
|
end
|
@@ -37,12 +41,13 @@ module Bulkrax
|
|
37
41
|
}
|
38
42
|
end
|
39
43
|
|
40
|
-
def self.
|
41
|
-
|
42
|
-
|
44
|
+
def self.related_children_parsed_mapping
|
45
|
+
return @related_children_parsed_mapping if @related_children_parsed_mapping.present?
|
46
|
+
|
47
|
+
rdf_related_children_field_mapping = Bulkrax.field_mappings['Bulkrax::RdfParser']&.select { |_, h| h.key?('related_children_field_mapping') }
|
48
|
+
return if rdf_related_children_field_mapping.blank?
|
43
49
|
|
44
|
-
|
45
|
-
Bulkrax.parent_child_field_mapping[self.to_s]
|
50
|
+
@related_children_parsed_mapping = rdf_related_children_field_mapping&.keys&.first
|
46
51
|
end
|
47
52
|
|
48
53
|
def record
|
@@ -50,6 +55,10 @@ module Bulkrax
|
|
50
55
|
end
|
51
56
|
|
52
57
|
def build_metadata
|
58
|
+
ActiveSupport::Deprecation.warn(
|
59
|
+
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
60
|
+
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
61
|
+
)
|
53
62
|
raise StandardError, 'Record not found' if record.nil?
|
54
63
|
raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
|
55
64
|
|
@@ -76,7 +85,7 @@ module Bulkrax
|
|
76
85
|
self.raw_metadata['collection'].length == self.collection_ids.length
|
77
86
|
end
|
78
87
|
|
79
|
-
def
|
88
|
+
def find_collection_ids
|
80
89
|
return self.collection_ids if collections_created?
|
81
90
|
if self.raw_metadata['collection'].present?
|
82
91
|
self.raw_metadata['collection'].each do |collection|
|
@@ -39,6 +39,10 @@ module Bulkrax
|
|
39
39
|
end
|
40
40
|
|
41
41
|
def build_metadata
|
42
|
+
ActiveSupport::Deprecation.warn(
|
43
|
+
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
44
|
+
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
45
|
+
)
|
42
46
|
raise StandardError, 'Record not found' if record.nil?
|
43
47
|
raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
|
44
48
|
self.parsed_metadata = {}
|
@@ -40,13 +40,13 @@ module Bulkrax
|
|
40
40
|
end
|
41
41
|
end
|
42
42
|
|
43
|
-
#
|
43
|
+
# Prepend the file_set id to ensure a unique filename
|
44
44
|
def filename(file_set)
|
45
45
|
return if file_set.original_file.blank?
|
46
46
|
fn = file_set.original_file.file_name.first
|
47
47
|
mime = Mime::Type.lookup(file_set.original_file.mime_type)
|
48
48
|
ext_mime = MIME::Types.of(file_set.original_file.file_name).first
|
49
|
-
if fn.include?(file_set.id)
|
49
|
+
if fn.include?(file_set.id) || importerexporter.metadata_only?
|
50
50
|
return fn if mime.to_s == ext_mime.to_s
|
51
51
|
return "#{fn}.#{mime.to_sym}"
|
52
52
|
else
|
@@ -33,7 +33,8 @@ module Bulkrax
|
|
33
33
|
if file_value.is_a?(Hash)
|
34
34
|
file_value
|
35
35
|
elsif file_value.is_a?(String)
|
36
|
-
|
36
|
+
name = Bulkrax::Importer.safe_uri_filename(file_value)
|
37
|
+
{ url: file_value, file_name: name }
|
37
38
|
else
|
38
39
|
Rails.logger.error("skipped remote file #{file_value} because we do not recognize the type")
|
39
40
|
nil
|
@@ -1,5 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
|
2
|
+
# rubocop:disable Metrics/ModuleLength
|
3
3
|
module Bulkrax
|
4
4
|
module HasMatchers
|
5
5
|
extend ActiveSupport::Concern
|
@@ -43,7 +43,6 @@ module Bulkrax
|
|
43
43
|
|
44
44
|
value = if matcher
|
45
45
|
result = matcher.result(self, node_content)
|
46
|
-
next unless result
|
47
46
|
matched_metadata(multiple, name, result, object_multiple)
|
48
47
|
elsif multiple
|
49
48
|
Rails.logger.info("Bulkrax Column automatically matched #{node_name}, #{node_content}")
|
@@ -53,12 +52,20 @@ module Bulkrax
|
|
53
52
|
single_metadata(node_content)
|
54
53
|
end
|
55
54
|
|
56
|
-
|
55
|
+
object_name.present? ? set_parsed_object_data(object_multiple, object_name, name, index, value) : set_parsed_data(name, value)
|
57
56
|
end
|
58
57
|
end
|
59
58
|
|
60
|
-
def set_parsed_data(
|
59
|
+
def set_parsed_data(name, value)
|
60
|
+
return parsed_metadata[name] = value unless multiple?(name)
|
61
|
+
|
62
|
+
parsed_metadata[name] ||= []
|
63
|
+
parsed_metadata[name] += Array.wrap(value).flatten
|
64
|
+
end
|
65
|
+
|
66
|
+
def set_parsed_object_data(object_multiple, object_name, name, index, value)
|
61
67
|
if object_multiple
|
68
|
+
index ||= 0
|
62
69
|
parsed_metadata[object_name][index] ||= {}
|
63
70
|
parsed_metadata[object_name][index][name] ||= []
|
64
71
|
if value.is_a?(Array)
|
@@ -66,20 +73,13 @@ module Bulkrax
|
|
66
73
|
else
|
67
74
|
parsed_metadata[object_name][index][name] = value
|
68
75
|
end
|
69
|
-
|
76
|
+
else
|
70
77
|
parsed_metadata[object_name][name] ||= []
|
71
78
|
if value.is_a?(Array)
|
72
79
|
parsed_metadata[object_name][name] += value
|
73
80
|
else
|
74
81
|
parsed_metadata[object_name][name] = value
|
75
82
|
end
|
76
|
-
else
|
77
|
-
parsed_metadata[name] ||= []
|
78
|
-
if value.is_a?(Array)
|
79
|
-
parsed_metadata[name] += value
|
80
|
-
else
|
81
|
-
parsed_metadata[name] = value
|
82
|
-
end
|
83
83
|
end
|
84
84
|
end
|
85
85
|
|
@@ -90,9 +90,20 @@ module Bulkrax
|
|
90
90
|
end
|
91
91
|
|
92
92
|
def multiple_metadata(content)
|
93
|
-
content = content.content if content.is_a?(Nokogiri::XML::NodeSet)
|
94
93
|
return unless content
|
95
|
-
|
94
|
+
|
95
|
+
case content
|
96
|
+
when Nokogiri::XML::NodeSet
|
97
|
+
content&.content
|
98
|
+
when Array
|
99
|
+
content
|
100
|
+
when Hash
|
101
|
+
Array.wrap(content)
|
102
|
+
when String
|
103
|
+
Array.wrap(content.strip)
|
104
|
+
else
|
105
|
+
Array.wrap(content)
|
106
|
+
end
|
96
107
|
end
|
97
108
|
|
98
109
|
def matched_metadata(multiple, name, result, object_multiple)
|
@@ -113,12 +124,43 @@ module Bulkrax
|
|
113
124
|
field = field.gsub('_attributes', '')
|
114
125
|
|
115
126
|
return false if excluded?(field)
|
116
|
-
return true if
|
127
|
+
return true if supported_bulkrax_fields.include?(field)
|
117
128
|
return factory_class.method_defined?(field) && factory_class.properties[field].present?
|
118
129
|
end
|
119
130
|
|
131
|
+
def supported_bulkrax_fields
|
132
|
+
ActiveSupport::Deprecation.warn(
|
133
|
+
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
134
|
+
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
135
|
+
)
|
136
|
+
@supported_bulkrax_fields ||=
|
137
|
+
%W[
|
138
|
+
id
|
139
|
+
file
|
140
|
+
remote_files
|
141
|
+
model
|
142
|
+
delete
|
143
|
+
#{parser.collection_field_mapping}
|
144
|
+
#{related_parents_parsed_mapping}
|
145
|
+
#{related_children_parsed_mapping}
|
146
|
+
]
|
147
|
+
end
|
148
|
+
|
120
149
|
def multiple?(field)
|
121
|
-
|
150
|
+
ActiveSupport::Deprecation.warn(
|
151
|
+
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
152
|
+
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
153
|
+
)
|
154
|
+
@multiple_bulkrax_fields ||=
|
155
|
+
%W[
|
156
|
+
file
|
157
|
+
remote_files
|
158
|
+
#{parser.collection_field_mapping}
|
159
|
+
#{related_parents_parsed_mapping}
|
160
|
+
#{related_children_parsed_mapping}
|
161
|
+
]
|
162
|
+
|
163
|
+
return true if @multiple_bulkrax_fields.include?(field)
|
122
164
|
return false if field == 'model'
|
123
165
|
|
124
166
|
field_supported?(field) && factory_class&.properties&.[](field)&.[]('multiple')
|
@@ -153,3 +195,4 @@ module Bulkrax
|
|
153
195
|
end
|
154
196
|
end
|
155
197
|
end
|
198
|
+
# rubocop:enable Metrics/ModuleLength
|