bulkrax 1.0.0 → 2.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/app/controllers/bulkrax/exporters_controller.rb +12 -4
- data/app/controllers/bulkrax/importers_controller.rb +22 -17
- data/app/factories/bulkrax/object_factory.rb +44 -61
- data/app/jobs/bulkrax/create_relationships_job.rb +187 -0
- data/app/jobs/bulkrax/delete_work_job.rb +6 -2
- data/app/jobs/bulkrax/export_work_job.rb +3 -1
- data/app/jobs/bulkrax/exporter_job.rb +1 -0
- data/app/jobs/bulkrax/{import_work_collection_job.rb → import_collection_job.rb} +2 -2
- data/app/jobs/bulkrax/importer_job.rb +16 -1
- data/app/matchers/bulkrax/application_matcher.rb +9 -6
- data/app/models/bulkrax/csv_collection_entry.rb +8 -6
- data/app/models/bulkrax/csv_entry.rb +139 -45
- data/app/models/bulkrax/entry.rb +19 -8
- data/app/models/bulkrax/exporter.rb +12 -5
- data/app/models/bulkrax/importer.rb +22 -5
- data/app/models/bulkrax/oai_entry.rb +5 -1
- data/app/models/bulkrax/rdf_entry.rb +16 -7
- data/app/models/bulkrax/xml_entry.rb +4 -0
- data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
- data/app/models/concerns/bulkrax/file_factory.rb +2 -1
- data/app/models/concerns/bulkrax/has_matchers.rb +59 -16
- data/app/models/concerns/bulkrax/import_behavior.rb +35 -5
- data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +19 -0
- data/app/models/concerns/bulkrax/status_info.rb +4 -4
- data/app/parsers/bulkrax/application_parser.rb +59 -84
- data/app/parsers/bulkrax/bagit_parser.rb +12 -3
- data/app/parsers/bulkrax/csv_parser.rb +117 -62
- data/app/parsers/bulkrax/oai_dc_parser.rb +5 -2
- data/app/parsers/bulkrax/xml_parser.rb +5 -0
- data/app/views/bulkrax/exporters/_form.html.erb +1 -1
- data/app/views/bulkrax/exporters/show.html.erb +13 -1
- data/app/views/bulkrax/importers/_edit_form_buttons.html.erb +45 -14
- data/app/views/bulkrax/importers/edit.html.erb +2 -0
- data/app/views/bulkrax/importers/index.html.erb +15 -17
- data/app/views/bulkrax/importers/show.html.erb +6 -2
- data/config/locales/bulkrax.en.yml +1 -0
- data/db/migrate/20190731114016_change_importer_and_exporter_to_polymorphic.rb +5 -1
- data/db/migrate/20211004170708_change_bulkrax_statuses_error_message_column_type_to_text.rb +5 -0
- data/db/migrate/20211203195233_rename_children_counters_to_relationships.rb +6 -0
- data/lib/bulkrax/engine.rb +1 -1
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +9 -17
- data/lib/generators/bulkrax/templates/bin/importer +17 -11
- data/lib/generators/bulkrax/templates/config/bulkrax_api.yml +3 -1
- data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +7 -12
- metadata +13 -7
- data/app/jobs/bulkrax/child_relationships_job.rb +0 -128
@@ -14,59 +14,68 @@ module Bulkrax
|
|
14
14
|
def self.read_data(path)
|
15
15
|
raise StandardError, 'CSV path empty' if path.blank?
|
16
16
|
CSV.read(path,
|
17
|
-
|
18
|
-
|
19
|
-
|
17
|
+
headers: true,
|
18
|
+
header_converters: :symbol,
|
19
|
+
encoding: 'utf-8')
|
20
20
|
end
|
21
21
|
|
22
22
|
def self.data_for_entry(data, _source_id)
|
23
|
+
ActiveSupport::Deprecation.warn(
|
24
|
+
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
25
|
+
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
26
|
+
)
|
23
27
|
# If a multi-line CSV data is passed, grab the first row
|
24
28
|
data = data.first if data.is_a?(CSV::Table)
|
25
29
|
# model has to be separated so that it doesn't get mistranslated by to_h
|
26
30
|
raw_data = data.to_h
|
27
|
-
raw_data[:model] = data[:model]
|
31
|
+
raw_data[:model] = data[:model] if data[:model].present?
|
28
32
|
# If the collection field mapping is not 'collection', add 'collection' - the parser needs it
|
29
33
|
raw_data[:collection] = raw_data[collection_field.to_sym] if raw_data.keys.include?(collection_field.to_sym) && collection_field != 'collection'
|
30
|
-
# If the children field mapping is not 'children', add 'children' - the parser needs it
|
31
|
-
raw_data[:children] = raw_data[collection_field.to_sym] if raw_data.keys.include?(children_field.to_sym) && children_field != 'children'
|
32
34
|
return raw_data
|
33
35
|
end
|
34
36
|
|
35
|
-
def
|
36
|
-
|
37
|
-
|
37
|
+
def build_metadata
|
38
|
+
raise StandardError, 'Record not found' if record.nil?
|
39
|
+
raise StandardError, "Missing required elements, missing element(s) are: #{importerexporter.parser.missing_elements(keys_without_numbers(record.keys)).join(', ')}" unless importerexporter.parser.required_elements?(keys_without_numbers(record.keys))
|
38
40
|
|
39
|
-
|
40
|
-
|
41
|
-
|
41
|
+
self.parsed_metadata = {}
|
42
|
+
add_identifier
|
43
|
+
add_metadata_for_model
|
44
|
+
add_visibility
|
45
|
+
add_ingested_metadata
|
46
|
+
add_rights_statement
|
47
|
+
add_collections
|
48
|
+
add_local
|
42
49
|
|
43
|
-
|
44
|
-
keys.map { |key| key_without_numbers(key) }
|
50
|
+
self.parsed_metadata
|
45
51
|
end
|
46
52
|
|
47
|
-
def
|
48
|
-
|
53
|
+
def add_identifier
|
54
|
+
self.parsed_metadata[work_identifier] = [record[source_identifier]]
|
49
55
|
end
|
50
56
|
|
51
|
-
def
|
52
|
-
|
53
|
-
|
57
|
+
def add_metadata_for_model
|
58
|
+
if factory_class == Collection
|
59
|
+
add_collection_type_gid
|
60
|
+
else
|
61
|
+
add_file unless importerexporter.metadata_only?
|
62
|
+
add_admin_set_id
|
63
|
+
end
|
64
|
+
end
|
54
65
|
|
55
|
-
|
56
|
-
|
66
|
+
def add_ingested_metadata
|
67
|
+
ActiveSupport::Deprecation.warn(
|
68
|
+
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
69
|
+
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
70
|
+
)
|
71
|
+
# we do not want to sort the values in the record before adding the metadata.
|
72
|
+
# if we do, the factory_class will be set to the default_work_type for all values that come before "model" or "work type"
|
57
73
|
record.each do |key, value|
|
58
|
-
next if
|
74
|
+
next if self.parser.collection_field_mapping.to_s == key_without_numbers(key)
|
59
75
|
|
60
76
|
index = key[/\d+/].to_i - 1 if key[/\d+/].to_i != 0
|
61
77
|
add_metadata(key_without_numbers(key), value, index)
|
62
78
|
end
|
63
|
-
add_file
|
64
|
-
add_visibility
|
65
|
-
add_rights_statement
|
66
|
-
add_admin_set_id
|
67
|
-
add_collections
|
68
|
-
add_local
|
69
|
-
self.parsed_metadata
|
70
79
|
end
|
71
80
|
|
72
81
|
def add_file
|
@@ -86,10 +95,20 @@ module Bulkrax
|
|
86
95
|
self.parsed_metadata[source_identifier] = hyrax_record.send(work_identifier)
|
87
96
|
self.parsed_metadata['model'] = hyrax_record.has_model.first
|
88
97
|
build_mapping_metadata
|
89
|
-
|
90
|
-
|
91
|
-
|
98
|
+
|
99
|
+
# TODO: fix the "send" parameter in the conditional below
|
100
|
+
# currently it returns: "NoMethodError - undefined method 'bulkrax_identifier' for #<Collection:0x00007fbe6a3b4248>"
|
101
|
+
if mapping['collection']&.[]('join')
|
102
|
+
self.parsed_metadata['collection'] = hyrax_record.member_of_collection_ids.join('; ')
|
103
|
+
# self.parsed_metadata['collection'] = hyrax_record.member_of_collections.map { |c| c.send(work_identifier)&.first }.compact.uniq.join(';')
|
104
|
+
else
|
105
|
+
hyrax_record.member_of_collections.each_with_index do |collection, i|
|
106
|
+
self.parsed_metadata["collection_#{i + 1}"] = collection.id
|
107
|
+
# self.parsed_metadata["collection_#{i + 1}"] = collection.send(work_identifier)&.first
|
108
|
+
end
|
92
109
|
end
|
110
|
+
|
111
|
+
build_files unless hyrax_record.is_a?(Collection)
|
93
112
|
self.parsed_metadata
|
94
113
|
end
|
95
114
|
|
@@ -97,16 +116,51 @@ module Bulkrax
|
|
97
116
|
mapping.each do |key, value|
|
98
117
|
next if Bulkrax.reserved_properties.include?(key) && !field_supported?(key)
|
99
118
|
next if key == "model"
|
100
|
-
next
|
101
|
-
|
102
|
-
if
|
103
|
-
|
119
|
+
next if value['excluded']
|
120
|
+
|
121
|
+
object_key = key if value.key?('object')
|
122
|
+
next unless hyrax_record.respond_to?(key.to_s) || object_key.present?
|
123
|
+
|
124
|
+
if object_key.present?
|
125
|
+
build_object(value)
|
126
|
+
else
|
127
|
+
build_value(key, value)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
def build_object(value)
|
133
|
+
data = hyrax_record.send(value['object'])
|
134
|
+
return if data.empty?
|
135
|
+
|
136
|
+
data = data.to_a if data.is_a?(ActiveTriples::Relation)
|
137
|
+
object_metadata(Array.wrap(data))
|
138
|
+
end
|
139
|
+
|
140
|
+
def build_value(key, value)
|
141
|
+
data = hyrax_record.send(key.to_s)
|
142
|
+
if data.is_a?(ActiveTriples::Relation)
|
143
|
+
if value['join']
|
144
|
+
self.parsed_metadata[key_for_export(key)] = data.map { |d| prepare_export_data(d) }.join('; ').to_s
|
104
145
|
else
|
105
|
-
|
146
|
+
data.each_with_index do |d, i|
|
147
|
+
self.parsed_metadata["#{key_for_export(key)}_#{i + 1}"] = prepare_export_data(d)
|
148
|
+
end
|
106
149
|
end
|
150
|
+
else
|
151
|
+
self.parsed_metadata[key_for_export(key)] = prepare_export_data(data)
|
107
152
|
end
|
108
153
|
end
|
109
154
|
|
155
|
+
# On export the key becomes the from and the from becomes the destination. It is the opposite of the import because we are moving data the opposite direction
|
156
|
+
# metadata that does not have a specific Bulkrax entry is mapped to the key name, as matching keys coming in are mapped by the csv parser automatically
|
157
|
+
def key_for_export(key)
|
158
|
+
clean_key = key_without_numbers(key)
|
159
|
+
unnumbered_key = mapping[clean_key] ? mapping[clean_key]['from'].first : clean_key
|
160
|
+
# Bring the number back if there is one
|
161
|
+
"#{unnumbered_key}#{key.sub(clean_key, '')}"
|
162
|
+
end
|
163
|
+
|
110
164
|
def prepare_export_data(datum)
|
111
165
|
if datum.is_a?(ActiveTriples::Resource)
|
112
166
|
datum.to_uri.to_s
|
@@ -115,6 +169,36 @@ module Bulkrax
|
|
115
169
|
end
|
116
170
|
end
|
117
171
|
|
172
|
+
def object_metadata(data)
|
173
|
+
data = data.map { |d| eval(d) }.flatten # rubocop:disable Security/Eval
|
174
|
+
|
175
|
+
data.each_with_index do |obj, index|
|
176
|
+
next if obj.nil?
|
177
|
+
# allow the object_key to be valid whether it's a string or symbol
|
178
|
+
obj = obj.with_indifferent_access
|
179
|
+
|
180
|
+
obj.each_key do |key|
|
181
|
+
if obj[key].is_a?(Array)
|
182
|
+
obj[key].each_with_index do |_nested_item, nested_index|
|
183
|
+
self.parsed_metadata["#{key_for_export(key)}_#{index + 1}_#{nested_index + 1}"] = prepare_export_data(obj[key][nested_index])
|
184
|
+
end
|
185
|
+
else
|
186
|
+
self.parsed_metadata["#{key_for_export(key)}_#{index + 1}"] = prepare_export_data(obj[key])
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
def build_files
|
193
|
+
if mapping['file']&.[]('join')
|
194
|
+
self.parsed_metadata['file'] = hyrax_record.file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact.join('; ')
|
195
|
+
else
|
196
|
+
hyrax_record.file_sets.each_with_index do |fs, i|
|
197
|
+
self.parsed_metadata["file_#{i + 1}"] = filename(fs).to_s if filename(fs).present?
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
118
202
|
# In order for the existing exported hyrax_record, to be updated by a re-import
|
119
203
|
# we need a unique value in system_identifier
|
120
204
|
# add the existing hyrax_record id to system_identifier
|
@@ -133,18 +217,28 @@ module Bulkrax
|
|
133
217
|
Bulkrax::CsvMatcher
|
134
218
|
end
|
135
219
|
|
220
|
+
def possible_collection_ids
|
221
|
+
ActiveSupport::Deprecation.warn(
|
222
|
+
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
223
|
+
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
224
|
+
)
|
225
|
+
@possible_collection_ids ||= record.inject([]) do |memo, (key, value)|
|
226
|
+
memo += value.split(/\s*[:;|]\s*/) if self.class.collection_field.to_s == key_without_numbers(key) && value.present?
|
227
|
+
memo
|
228
|
+
end || []
|
229
|
+
end
|
230
|
+
|
136
231
|
def collections_created?
|
137
|
-
|
138
|
-
record[self.class.collection_field].split(/\s*[:;|]\s*/).length == self.collection_ids.length
|
232
|
+
possible_collection_ids.length == self.collection_ids.length
|
139
233
|
end
|
140
234
|
|
141
|
-
def
|
235
|
+
def find_collection_ids
|
142
236
|
return self.collection_ids if collections_created?
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
self.collection_ids << c.id unless
|
237
|
+
if possible_collection_ids.present?
|
238
|
+
possible_collection_ids.each do |collection_id|
|
239
|
+
c = find_collection(collection_id)
|
240
|
+
skip = c.blank? || self.collection_ids.include?(c.id)
|
241
|
+
self.collection_ids << c.id unless skip
|
148
242
|
end
|
149
243
|
end
|
150
244
|
self.collection_ids
|
data/app/models/bulkrax/entry.rb
CHANGED
@@ -23,12 +23,22 @@ module Bulkrax
|
|
23
23
|
|
24
24
|
attr_accessor :all_attrs
|
25
25
|
|
26
|
-
delegate :parser,
|
26
|
+
delegate :parser,
|
27
|
+
:mapping,
|
28
|
+
:replace_files,
|
29
|
+
:update_files,
|
30
|
+
:keys_without_numbers,
|
31
|
+
:key_without_numbers,
|
32
|
+
to: :importerexporter
|
27
33
|
|
28
34
|
delegate :client,
|
29
|
-
|
30
|
-
|
31
|
-
|
35
|
+
:collection_name,
|
36
|
+
:user,
|
37
|
+
:related_parents_raw_mapping,
|
38
|
+
:related_parents_parsed_mapping,
|
39
|
+
:related_children_raw_mapping,
|
40
|
+
:related_children_parsed_mapping,
|
41
|
+
to: :parser
|
32
42
|
|
33
43
|
# Retrieve fields from the file
|
34
44
|
# @param data - the source data
|
@@ -61,13 +71,13 @@ module Bulkrax
|
|
61
71
|
end
|
62
72
|
|
63
73
|
def self.collection_field
|
74
|
+
ActiveSupport::Deprecation.warn(
|
75
|
+
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
76
|
+
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
77
|
+
)
|
64
78
|
Bulkrax.collection_field_mapping[self.to_s]
|
65
79
|
end
|
66
80
|
|
67
|
-
def self.children_field
|
68
|
-
Bulkrax.parent_child_field_mapping[self.to_s]
|
69
|
-
end
|
70
|
-
|
71
81
|
def build
|
72
82
|
return if type.nil?
|
73
83
|
self.save if self.new_record? # must be saved for statuses
|
@@ -96,6 +106,7 @@ module Bulkrax
|
|
96
106
|
end
|
97
107
|
|
98
108
|
def find_collection(collection_identifier)
|
109
|
+
return unless Collection.properties.keys.include?(work_identifier)
|
99
110
|
Collection.where(
|
100
111
|
work_identifier => collection_identifier
|
101
112
|
).detect { |m| m.send(work_identifier).include?(collection_identifier) }
|
@@ -14,7 +14,7 @@ module Bulkrax
|
|
14
14
|
validates :name, presence: true
|
15
15
|
validates :parser_klass, presence: true
|
16
16
|
|
17
|
-
delegate :write, :create_from_collection, :create_from_importer, :create_from_worktype, to: :parser
|
17
|
+
delegate :write, :create_from_collection, :create_from_importer, :create_from_worktype, :create_from_all, to: :parser
|
18
18
|
|
19
19
|
def export
|
20
20
|
current_run && setup_export_path
|
@@ -25,6 +25,8 @@ module Bulkrax
|
|
25
25
|
create_from_importer
|
26
26
|
when 'worktype'
|
27
27
|
create_from_worktype
|
28
|
+
when 'all'
|
29
|
+
create_from_all
|
28
30
|
end
|
29
31
|
rescue StandardError => e
|
30
32
|
status_info(e)
|
@@ -77,7 +79,8 @@ module Bulkrax
|
|
77
79
|
[
|
78
80
|
[I18n.t('bulkrax.exporter.labels.importer'), 'importer'],
|
79
81
|
[I18n.t('bulkrax.exporter.labels.collection'), 'collection'],
|
80
|
-
[I18n.t('bulkrax.exporter.labels.worktype'), 'worktype']
|
82
|
+
[I18n.t('bulkrax.exporter.labels.worktype'), 'worktype'],
|
83
|
+
[I18n.t('bulkrax.exporter.labels.all'), 'all']
|
81
84
|
]
|
82
85
|
end
|
83
86
|
|
@@ -105,18 +108,22 @@ module Bulkrax
|
|
105
108
|
end
|
106
109
|
|
107
110
|
def exporter_export_path
|
108
|
-
@exporter_export_path ||= File.join(
|
111
|
+
@exporter_export_path ||= File.join(parser.base_path('export'), self.id.to_s, self.exporter_runs.last.id.to_s)
|
109
112
|
end
|
110
113
|
|
111
114
|
def exporter_export_zip_path
|
112
|
-
@exporter_export_zip_path ||= File.join(
|
115
|
+
@exporter_export_zip_path ||= File.join(parser.base_path('export'), "export_#{self.id}_#{self.exporter_runs.last.id}.zip")
|
113
116
|
rescue
|
114
|
-
@exporter_export_zip_path ||= File.join(
|
117
|
+
@exporter_export_zip_path ||= File.join(parser.base_path('export'), "export_#{self.id}_0.zip")
|
115
118
|
end
|
116
119
|
|
117
120
|
def export_properties
|
118
121
|
properties = Hyrax.config.registered_curation_concern_types.map { |work| work.constantize.properties.keys }.flatten.uniq.sort
|
119
122
|
properties.reject { |prop| Bulkrax.reserved_properties.include?(prop) }
|
120
123
|
end
|
124
|
+
|
125
|
+
def metadata_only?
|
126
|
+
export_type == 'metadata'
|
127
|
+
end
|
121
128
|
end
|
122
129
|
end
|
@@ -18,12 +18,21 @@ module Bulkrax
|
|
18
18
|
validates :admin_set_id, presence: true
|
19
19
|
validates :parser_klass, presence: true
|
20
20
|
|
21
|
-
delegate :valid_import?, :
|
22
|
-
:write_errored_entries_file, :visibility, to: :parser
|
21
|
+
delegate :valid_import?, :write_errored_entries_file, :visibility, to: :parser
|
23
22
|
|
24
23
|
attr_accessor :only_updates, :file_style, :file
|
25
24
|
attr_writer :current_run
|
26
25
|
|
26
|
+
def self.safe_uri_filename(uri)
|
27
|
+
uri = URI.parse(uri) unless uri.is_a?(URI)
|
28
|
+
r = Faraday.head(uri.to_s)
|
29
|
+
return CGI.parse(r.headers['content-disposition'])["filename"][0].delete("\"")
|
30
|
+
rescue
|
31
|
+
filename = File.basename(uri.path)
|
32
|
+
filename.delete!('/')
|
33
|
+
filename.presence || file_set.id
|
34
|
+
end
|
35
|
+
|
27
36
|
def status
|
28
37
|
if self.validate_only
|
29
38
|
'Validated'
|
@@ -87,7 +96,11 @@ module Bulkrax
|
|
87
96
|
end
|
88
97
|
|
89
98
|
def current_run
|
90
|
-
@current_run ||=
|
99
|
+
@current_run ||= if file? && zip?
|
100
|
+
self.importer_runs.create!
|
101
|
+
else
|
102
|
+
self.importer_runs.create!(total_work_entries: self.limit || parser.works_total, total_collection_entries: parser.collections_total)
|
103
|
+
end
|
91
104
|
end
|
92
105
|
|
93
106
|
def last_run
|
@@ -139,11 +152,11 @@ module Bulkrax
|
|
139
152
|
|
140
153
|
# If the import data is zipped, unzip it to this path
|
141
154
|
def importer_unzip_path
|
142
|
-
@importer_unzip_path ||= File.join(
|
155
|
+
@importer_unzip_path ||= File.join(parser.base_path, "import_#{path_string}")
|
143
156
|
end
|
144
157
|
|
145
158
|
def errored_entries_csv_path
|
146
|
-
@errored_entries_csv_path ||= File.join(
|
159
|
+
@errored_entries_csv_path ||= File.join(parser.base_path, "import_#{path_string}_errored_entries.csv")
|
147
160
|
end
|
148
161
|
|
149
162
|
def path_string
|
@@ -151,5 +164,9 @@ module Bulkrax
|
|
151
164
|
rescue
|
152
165
|
"#{self.id}_#{self.created_at.strftime('%Y%m%d%H%M%S')}"
|
153
166
|
end
|
167
|
+
|
168
|
+
def metadata_only?
|
169
|
+
parser.parser_fields['metadata_only'] == true
|
170
|
+
end
|
154
171
|
end
|
155
172
|
end
|
@@ -26,6 +26,10 @@ module Bulkrax
|
|
26
26
|
end
|
27
27
|
|
28
28
|
def build_metadata
|
29
|
+
ActiveSupport::Deprecation.warn(
|
30
|
+
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
31
|
+
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
32
|
+
)
|
29
33
|
self.parsed_metadata = {}
|
30
34
|
self.parsed_metadata[work_identifier] = [record.header.identifier]
|
31
35
|
|
@@ -56,7 +60,7 @@ module Bulkrax
|
|
56
60
|
# Retrieve list of collections for the entry; add to collection_ids
|
57
61
|
# If OAI-PMH doesn't return setSpec in the headers for GetRecord, use parser.collection_name
|
58
62
|
# in this case, if 'All' is selected, records will not be added to a collection.
|
59
|
-
def
|
63
|
+
def find_collection_ids
|
60
64
|
return self.collection_ids if collections_created?
|
61
65
|
if sets.blank? || parser.collection_name != 'all'
|
62
66
|
# c = Collection.where(Bulkrax.system_identifier_field => importerexporter.unique_collection_identifier(parser.collection_name)).first
|
@@ -14,6 +14,10 @@ module Bulkrax
|
|
14
14
|
end
|
15
15
|
|
16
16
|
def self.data_for_entry(data, source_id)
|
17
|
+
ActiveSupport::Deprecation.warn(
|
18
|
+
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
19
|
+
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
20
|
+
)
|
17
21
|
reader = data
|
18
22
|
format = reader.class.format.to_sym
|
19
23
|
collections = []
|
@@ -22,7 +26,7 @@ module Bulkrax
|
|
22
26
|
data = RDF::Writer.for(format).buffer do |writer|
|
23
27
|
reader.each_statement do |statement|
|
24
28
|
collections << statement.object.to_s if collection_field.present? && collection_field == statement.predicate.to_s
|
25
|
-
children << statement.object.to_s if
|
29
|
+
children << statement.object.to_s if related_children_parsed_mapping.present? && related_children_parsed_mapping == statement.predicate.to_s
|
26
30
|
delete = statement.object.to_s if /deleted/.match?(statement.predicate.to_s)
|
27
31
|
writer << statement
|
28
32
|
end
|
@@ -37,12 +41,13 @@ module Bulkrax
|
|
37
41
|
}
|
38
42
|
end
|
39
43
|
|
40
|
-
def self.
|
41
|
-
|
42
|
-
|
44
|
+
def self.related_children_parsed_mapping
|
45
|
+
return @related_children_parsed_mapping if @related_children_parsed_mapping.present?
|
46
|
+
|
47
|
+
rdf_related_children_field_mapping = Bulkrax.field_mappings['Bulkrax::RdfParser']&.select { |_, h| h.key?('related_children_field_mapping') }
|
48
|
+
return if rdf_related_children_field_mapping.blank?
|
43
49
|
|
44
|
-
|
45
|
-
Bulkrax.parent_child_field_mapping[self.to_s]
|
50
|
+
@related_children_parsed_mapping = rdf_related_children_field_mapping&.keys&.first
|
46
51
|
end
|
47
52
|
|
48
53
|
def record
|
@@ -50,6 +55,10 @@ module Bulkrax
|
|
50
55
|
end
|
51
56
|
|
52
57
|
def build_metadata
|
58
|
+
ActiveSupport::Deprecation.warn(
|
59
|
+
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
60
|
+
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
61
|
+
)
|
53
62
|
raise StandardError, 'Record not found' if record.nil?
|
54
63
|
raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
|
55
64
|
|
@@ -76,7 +85,7 @@ module Bulkrax
|
|
76
85
|
self.raw_metadata['collection'].length == self.collection_ids.length
|
77
86
|
end
|
78
87
|
|
79
|
-
def
|
88
|
+
def find_collection_ids
|
80
89
|
return self.collection_ids if collections_created?
|
81
90
|
if self.raw_metadata['collection'].present?
|
82
91
|
self.raw_metadata['collection'].each do |collection|
|
@@ -39,6 +39,10 @@ module Bulkrax
|
|
39
39
|
end
|
40
40
|
|
41
41
|
def build_metadata
|
42
|
+
ActiveSupport::Deprecation.warn(
|
43
|
+
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
44
|
+
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
45
|
+
)
|
42
46
|
raise StandardError, 'Record not found' if record.nil?
|
43
47
|
raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
|
44
48
|
self.parsed_metadata = {}
|
@@ -40,13 +40,13 @@ module Bulkrax
|
|
40
40
|
end
|
41
41
|
end
|
42
42
|
|
43
|
-
#
|
43
|
+
# Prepend the file_set id to ensure a unique filename
|
44
44
|
def filename(file_set)
|
45
45
|
return if file_set.original_file.blank?
|
46
46
|
fn = file_set.original_file.file_name.first
|
47
47
|
mime = Mime::Type.lookup(file_set.original_file.mime_type)
|
48
48
|
ext_mime = MIME::Types.of(file_set.original_file.file_name).first
|
49
|
-
if fn.include?(file_set.id)
|
49
|
+
if fn.include?(file_set.id) || importerexporter.metadata_only?
|
50
50
|
return fn if mime.to_s == ext_mime.to_s
|
51
51
|
return "#{fn}.#{mime.to_sym}"
|
52
52
|
else
|
@@ -33,7 +33,8 @@ module Bulkrax
|
|
33
33
|
if file_value.is_a?(Hash)
|
34
34
|
file_value
|
35
35
|
elsif file_value.is_a?(String)
|
36
|
-
|
36
|
+
name = Bulkrax::Importer.safe_uri_filename(file_value)
|
37
|
+
{ url: file_value, file_name: name }
|
37
38
|
else
|
38
39
|
Rails.logger.error("skipped remote file #{file_value} because we do not recognize the type")
|
39
40
|
nil
|
@@ -1,5 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
|
2
|
+
# rubocop:disable Metrics/ModuleLength
|
3
3
|
module Bulkrax
|
4
4
|
module HasMatchers
|
5
5
|
extend ActiveSupport::Concern
|
@@ -43,7 +43,6 @@ module Bulkrax
|
|
43
43
|
|
44
44
|
value = if matcher
|
45
45
|
result = matcher.result(self, node_content)
|
46
|
-
next unless result
|
47
46
|
matched_metadata(multiple, name, result, object_multiple)
|
48
47
|
elsif multiple
|
49
48
|
Rails.logger.info("Bulkrax Column automatically matched #{node_name}, #{node_content}")
|
@@ -53,12 +52,20 @@ module Bulkrax
|
|
53
52
|
single_metadata(node_content)
|
54
53
|
end
|
55
54
|
|
56
|
-
|
55
|
+
object_name.present? ? set_parsed_object_data(object_multiple, object_name, name, index, value) : set_parsed_data(name, value)
|
57
56
|
end
|
58
57
|
end
|
59
58
|
|
60
|
-
def set_parsed_data(
|
59
|
+
def set_parsed_data(name, value)
|
60
|
+
return parsed_metadata[name] = value unless multiple?(name)
|
61
|
+
|
62
|
+
parsed_metadata[name] ||= []
|
63
|
+
parsed_metadata[name] += Array.wrap(value).flatten
|
64
|
+
end
|
65
|
+
|
66
|
+
def set_parsed_object_data(object_multiple, object_name, name, index, value)
|
61
67
|
if object_multiple
|
68
|
+
index ||= 0
|
62
69
|
parsed_metadata[object_name][index] ||= {}
|
63
70
|
parsed_metadata[object_name][index][name] ||= []
|
64
71
|
if value.is_a?(Array)
|
@@ -66,20 +73,13 @@ module Bulkrax
|
|
66
73
|
else
|
67
74
|
parsed_metadata[object_name][index][name] = value
|
68
75
|
end
|
69
|
-
|
76
|
+
else
|
70
77
|
parsed_metadata[object_name][name] ||= []
|
71
78
|
if value.is_a?(Array)
|
72
79
|
parsed_metadata[object_name][name] += value
|
73
80
|
else
|
74
81
|
parsed_metadata[object_name][name] = value
|
75
82
|
end
|
76
|
-
else
|
77
|
-
parsed_metadata[name] ||= []
|
78
|
-
if value.is_a?(Array)
|
79
|
-
parsed_metadata[name] += value
|
80
|
-
else
|
81
|
-
parsed_metadata[name] = value
|
82
|
-
end
|
83
83
|
end
|
84
84
|
end
|
85
85
|
|
@@ -90,9 +90,20 @@ module Bulkrax
|
|
90
90
|
end
|
91
91
|
|
92
92
|
def multiple_metadata(content)
|
93
|
-
content = content.content if content.is_a?(Nokogiri::XML::NodeSet)
|
94
93
|
return unless content
|
95
|
-
|
94
|
+
|
95
|
+
case content
|
96
|
+
when Nokogiri::XML::NodeSet
|
97
|
+
content&.content
|
98
|
+
when Array
|
99
|
+
content
|
100
|
+
when Hash
|
101
|
+
Array.wrap(content)
|
102
|
+
when String
|
103
|
+
Array.wrap(content.strip)
|
104
|
+
else
|
105
|
+
Array.wrap(content)
|
106
|
+
end
|
96
107
|
end
|
97
108
|
|
98
109
|
def matched_metadata(multiple, name, result, object_multiple)
|
@@ -113,12 +124,43 @@ module Bulkrax
|
|
113
124
|
field = field.gsub('_attributes', '')
|
114
125
|
|
115
126
|
return false if excluded?(field)
|
116
|
-
return true if
|
127
|
+
return true if supported_bulkrax_fields.include?(field)
|
117
128
|
return factory_class.method_defined?(field) && factory_class.properties[field].present?
|
118
129
|
end
|
119
130
|
|
131
|
+
def supported_bulkrax_fields
|
132
|
+
ActiveSupport::Deprecation.warn(
|
133
|
+
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
134
|
+
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
135
|
+
)
|
136
|
+
@supported_bulkrax_fields ||=
|
137
|
+
%W[
|
138
|
+
id
|
139
|
+
file
|
140
|
+
remote_files
|
141
|
+
model
|
142
|
+
delete
|
143
|
+
#{parser.collection_field_mapping}
|
144
|
+
#{related_parents_parsed_mapping}
|
145
|
+
#{related_children_parsed_mapping}
|
146
|
+
]
|
147
|
+
end
|
148
|
+
|
120
149
|
def multiple?(field)
|
121
|
-
|
150
|
+
ActiveSupport::Deprecation.warn(
|
151
|
+
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
152
|
+
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
153
|
+
)
|
154
|
+
@multiple_bulkrax_fields ||=
|
155
|
+
%W[
|
156
|
+
file
|
157
|
+
remote_files
|
158
|
+
#{parser.collection_field_mapping}
|
159
|
+
#{related_parents_parsed_mapping}
|
160
|
+
#{related_children_parsed_mapping}
|
161
|
+
]
|
162
|
+
|
163
|
+
return true if @multiple_bulkrax_fields.include?(field)
|
122
164
|
return false if field == 'model'
|
123
165
|
|
124
166
|
field_supported?(field) && factory_class&.properties&.[](field)&.[]('multiple')
|
@@ -153,3 +195,4 @@ module Bulkrax
|
|
153
195
|
end
|
154
196
|
end
|
155
197
|
end
|
198
|
+
# rubocop:enable Metrics/ModuleLength
|