bulkrax 1.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/app/controllers/bulkrax/exporters_controller.rb +12 -4
  4. data/app/controllers/bulkrax/importers_controller.rb +22 -17
  5. data/app/factories/bulkrax/object_factory.rb +44 -61
  6. data/app/jobs/bulkrax/create_relationships_job.rb +187 -0
  7. data/app/jobs/bulkrax/delete_work_job.rb +6 -2
  8. data/app/jobs/bulkrax/export_work_job.rb +3 -1
  9. data/app/jobs/bulkrax/exporter_job.rb +1 -0
  10. data/app/jobs/bulkrax/{import_work_collection_job.rb → import_collection_job.rb} +2 -2
  11. data/app/jobs/bulkrax/importer_job.rb +16 -1
  12. data/app/matchers/bulkrax/application_matcher.rb +9 -6
  13. data/app/models/bulkrax/csv_collection_entry.rb +8 -6
  14. data/app/models/bulkrax/csv_entry.rb +139 -45
  15. data/app/models/bulkrax/entry.rb +19 -8
  16. data/app/models/bulkrax/exporter.rb +12 -5
  17. data/app/models/bulkrax/importer.rb +22 -5
  18. data/app/models/bulkrax/oai_entry.rb +5 -1
  19. data/app/models/bulkrax/rdf_entry.rb +16 -7
  20. data/app/models/bulkrax/xml_entry.rb +4 -0
  21. data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
  22. data/app/models/concerns/bulkrax/file_factory.rb +2 -1
  23. data/app/models/concerns/bulkrax/has_matchers.rb +59 -16
  24. data/app/models/concerns/bulkrax/import_behavior.rb +35 -5
  25. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +19 -0
  26. data/app/models/concerns/bulkrax/status_info.rb +4 -4
  27. data/app/parsers/bulkrax/application_parser.rb +59 -84
  28. data/app/parsers/bulkrax/bagit_parser.rb +12 -3
  29. data/app/parsers/bulkrax/csv_parser.rb +117 -62
  30. data/app/parsers/bulkrax/oai_dc_parser.rb +5 -2
  31. data/app/parsers/bulkrax/xml_parser.rb +5 -0
  32. data/app/views/bulkrax/exporters/_form.html.erb +1 -1
  33. data/app/views/bulkrax/exporters/show.html.erb +13 -1
  34. data/app/views/bulkrax/importers/_edit_form_buttons.html.erb +45 -14
  35. data/app/views/bulkrax/importers/edit.html.erb +2 -0
  36. data/app/views/bulkrax/importers/index.html.erb +15 -17
  37. data/app/views/bulkrax/importers/show.html.erb +6 -2
  38. data/config/locales/bulkrax.en.yml +1 -0
  39. data/db/migrate/20190731114016_change_importer_and_exporter_to_polymorphic.rb +5 -1
  40. data/db/migrate/20211004170708_change_bulkrax_statuses_error_message_column_type_to_text.rb +5 -0
  41. data/db/migrate/20211203195233_rename_children_counters_to_relationships.rb +6 -0
  42. data/lib/bulkrax/engine.rb +1 -1
  43. data/lib/bulkrax/version.rb +1 -1
  44. data/lib/bulkrax.rb +9 -17
  45. data/lib/generators/bulkrax/templates/bin/importer +17 -11
  46. data/lib/generators/bulkrax/templates/config/bulkrax_api.yml +3 -1
  47. data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +7 -12
  48. metadata +13 -7
  49. data/app/jobs/bulkrax/child_relationships_job.rb +0 -128
@@ -14,59 +14,68 @@ module Bulkrax
14
14
  def self.read_data(path)
15
15
  raise StandardError, 'CSV path empty' if path.blank?
16
16
  CSV.read(path,
17
- headers: true,
18
- header_converters: :symbol,
19
- encoding: 'utf-8')
17
+ headers: true,
18
+ header_converters: :symbol,
19
+ encoding: 'utf-8')
20
20
  end
21
21
 
22
22
  def self.data_for_entry(data, _source_id)
23
+ ActiveSupport::Deprecation.warn(
24
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
25
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
26
+ )
23
27
  # If a multi-line CSV data is passed, grab the first row
24
28
  data = data.first if data.is_a?(CSV::Table)
25
29
  # model has to be separated so that it doesn't get mistranslated by to_h
26
30
  raw_data = data.to_h
27
- raw_data[:model] = data[:model]
31
+ raw_data[:model] = data[:model] if data[:model].present?
28
32
  # If the collection field mapping is not 'collection', add 'collection' - the parser needs it
29
33
  raw_data[:collection] = raw_data[collection_field.to_sym] if raw_data.keys.include?(collection_field.to_sym) && collection_field != 'collection'
30
- # If the children field mapping is not 'children', add 'children' - the parser needs it
31
- raw_data[:children] = raw_data[collection_field.to_sym] if raw_data.keys.include?(children_field.to_sym) && children_field != 'children'
32
34
  return raw_data
33
35
  end
34
36
 
35
- def self.collection_field
36
- Bulkrax.collection_field_mapping[self.class.to_s] || 'collection'
37
- end
37
+ def build_metadata
38
+ raise StandardError, 'Record not found' if record.nil?
39
+ raise StandardError, "Missing required elements, missing element(s) are: #{importerexporter.parser.missing_elements(keys_without_numbers(record.keys)).join(', ')}" unless importerexporter.parser.required_elements?(keys_without_numbers(record.keys))
38
40
 
39
- def self.children_field
40
- Bulkrax.parent_child_field_mapping[self.to_s] || 'children'
41
- end
41
+ self.parsed_metadata = {}
42
+ add_identifier
43
+ add_metadata_for_model
44
+ add_visibility
45
+ add_ingested_metadata
46
+ add_rights_statement
47
+ add_collections
48
+ add_local
42
49
 
43
- def keys_without_numbers(keys)
44
- keys.map { |key| key_without_numbers(key) }
50
+ self.parsed_metadata
45
51
  end
46
52
 
47
- def key_without_numbers(key)
48
- key.gsub(/_\d+/, '').sub(/^\d+_/, '')
53
+ def add_identifier
54
+ self.parsed_metadata[work_identifier] = [record[source_identifier]]
49
55
  end
50
56
 
51
- def build_metadata
52
- raise StandardError, 'Record not found' if record.nil?
53
- raise StandardError, "Missing required elements, missing element(s) are: #{importerexporter.parser.missing_elements(keys_without_numbers(record.keys)).join(', ')}" unless importerexporter.parser.required_elements?(keys_without_numbers(record.keys))
57
+ def add_metadata_for_model
58
+ if factory_class == Collection
59
+ add_collection_type_gid
60
+ else
61
+ add_file unless importerexporter.metadata_only?
62
+ add_admin_set_id
63
+ end
64
+ end
54
65
 
55
- self.parsed_metadata = {}
56
- self.parsed_metadata[work_identifier] = [record[source_identifier]]
66
+ def add_ingested_metadata
67
+ ActiveSupport::Deprecation.warn(
68
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
69
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
70
+ )
71
+ # we do not want to sort the values in the record before adding the metadata.
72
+ # if we do, the factory_class will be set to the default_work_type for all values that come before "model" or "work type"
57
73
  record.each do |key, value|
58
- next if key == 'collection'
74
+ next if self.parser.collection_field_mapping.to_s == key_without_numbers(key)
59
75
 
60
76
  index = key[/\d+/].to_i - 1 if key[/\d+/].to_i != 0
61
77
  add_metadata(key_without_numbers(key), value, index)
62
78
  end
63
- add_file
64
- add_visibility
65
- add_rights_statement
66
- add_admin_set_id
67
- add_collections
68
- add_local
69
- self.parsed_metadata
70
79
  end
71
80
 
72
81
  def add_file
@@ -86,10 +95,20 @@ module Bulkrax
86
95
  self.parsed_metadata[source_identifier] = hyrax_record.send(work_identifier)
87
96
  self.parsed_metadata['model'] = hyrax_record.has_model.first
88
97
  build_mapping_metadata
89
- self.parsed_metadata['collections'] = hyrax_record.member_of_collection_ids.join('; ')
90
- unless hyrax_record.is_a?(Collection)
91
- self.parsed_metadata['file'] = hyrax_record.file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact.join('; ')
98
+
99
+ # TODO: fix the "send" parameter in the conditional below
100
+ # currently it returns: "NoMethodError - undefined method 'bulkrax_identifier' for #<Collection:0x00007fbe6a3b4248>"
101
+ if mapping['collection']&.[]('join')
102
+ self.parsed_metadata['collection'] = hyrax_record.member_of_collection_ids.join('; ')
103
+ # self.parsed_metadata['collection'] = hyrax_record.member_of_collections.map { |c| c.send(work_identifier)&.first }.compact.uniq.join(';')
104
+ else
105
+ hyrax_record.member_of_collections.each_with_index do |collection, i|
106
+ self.parsed_metadata["collection_#{i + 1}"] = collection.id
107
+ # self.parsed_metadata["collection_#{i + 1}"] = collection.send(work_identifier)&.first
108
+ end
92
109
  end
110
+
111
+ build_files unless hyrax_record.is_a?(Collection)
93
112
  self.parsed_metadata
94
113
  end
95
114
 
@@ -97,16 +116,51 @@ module Bulkrax
97
116
  mapping.each do |key, value|
98
117
  next if Bulkrax.reserved_properties.include?(key) && !field_supported?(key)
99
118
  next if key == "model"
100
- next unless hyrax_record.respond_to?(key.to_s)
101
- data = hyrax_record.send(key.to_s)
102
- if data.is_a?(ActiveTriples::Relation)
103
- self.parsed_metadata[key] = data.map { |d| prepare_export_data(d) }.join('; ').to_s unless value[:excluded]
119
+ next if value['excluded']
120
+
121
+ object_key = key if value.key?('object')
122
+ next unless hyrax_record.respond_to?(key.to_s) || object_key.present?
123
+
124
+ if object_key.present?
125
+ build_object(value)
126
+ else
127
+ build_value(key, value)
128
+ end
129
+ end
130
+ end
131
+
132
+ def build_object(value)
133
+ data = hyrax_record.send(value['object'])
134
+ return if data.empty?
135
+
136
+ data = data.to_a if data.is_a?(ActiveTriples::Relation)
137
+ object_metadata(Array.wrap(data))
138
+ end
139
+
140
+ def build_value(key, value)
141
+ data = hyrax_record.send(key.to_s)
142
+ if data.is_a?(ActiveTriples::Relation)
143
+ if value['join']
144
+ self.parsed_metadata[key_for_export(key)] = data.map { |d| prepare_export_data(d) }.join('; ').to_s
104
145
  else
105
- self.parsed_metadata[key] = prepare_export_data(data)
146
+ data.each_with_index do |d, i|
147
+ self.parsed_metadata["#{key_for_export(key)}_#{i + 1}"] = prepare_export_data(d)
148
+ end
106
149
  end
150
+ else
151
+ self.parsed_metadata[key_for_export(key)] = prepare_export_data(data)
107
152
  end
108
153
  end
109
154
 
155
+ # On export the key becomes the from and the from becomes the destination. It is the opposite of the import because we are moving data the opposite direction
156
+ # metadata that does not have a specific Bulkrax entry is mapped to the key name, as matching keys coming in are mapped by the csv parser automatically
157
+ def key_for_export(key)
158
+ clean_key = key_without_numbers(key)
159
+ unnumbered_key = mapping[clean_key] ? mapping[clean_key]['from'].first : clean_key
160
+ # Bring the number back if there is one
161
+ "#{unnumbered_key}#{key.sub(clean_key, '')}"
162
+ end
163
+
110
164
  def prepare_export_data(datum)
111
165
  if datum.is_a?(ActiveTriples::Resource)
112
166
  datum.to_uri.to_s
@@ -115,6 +169,36 @@ module Bulkrax
115
169
  end
116
170
  end
117
171
 
172
+ def object_metadata(data)
173
+ data = data.map { |d| eval(d) }.flatten # rubocop:disable Security/Eval
174
+
175
+ data.each_with_index do |obj, index|
176
+ next if obj.nil?
177
+ # allow the object_key to be valid whether it's a string or symbol
178
+ obj = obj.with_indifferent_access
179
+
180
+ obj.each_key do |key|
181
+ if obj[key].is_a?(Array)
182
+ obj[key].each_with_index do |_nested_item, nested_index|
183
+ self.parsed_metadata["#{key_for_export(key)}_#{index + 1}_#{nested_index + 1}"] = prepare_export_data(obj[key][nested_index])
184
+ end
185
+ else
186
+ self.parsed_metadata["#{key_for_export(key)}_#{index + 1}"] = prepare_export_data(obj[key])
187
+ end
188
+ end
189
+ end
190
+ end
191
+
192
+ def build_files
193
+ if mapping['file']&.[]('join')
194
+ self.parsed_metadata['file'] = hyrax_record.file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact.join('; ')
195
+ else
196
+ hyrax_record.file_sets.each_with_index do |fs, i|
197
+ self.parsed_metadata["file_#{i + 1}"] = filename(fs).to_s if filename(fs).present?
198
+ end
199
+ end
200
+ end
201
+
118
202
  # In order for the existing exported hyrax_record, to be updated by a re-import
119
203
  # we need a unique value in system_identifier
120
204
  # add the existing hyrax_record id to system_identifier
@@ -133,18 +217,28 @@ module Bulkrax
133
217
  Bulkrax::CsvMatcher
134
218
  end
135
219
 
220
+ def possible_collection_ids
221
+ ActiveSupport::Deprecation.warn(
222
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
223
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
224
+ )
225
+ @possible_collection_ids ||= record.inject([]) do |memo, (key, value)|
226
+ memo += value.split(/\s*[:;|]\s*/) if self.class.collection_field.to_s == key_without_numbers(key) && value.present?
227
+ memo
228
+ end || []
229
+ end
230
+
136
231
  def collections_created?
137
- return true if record[self.class.collection_field].blank?
138
- record[self.class.collection_field].split(/\s*[:;|]\s*/).length == self.collection_ids.length
232
+ possible_collection_ids.length == self.collection_ids.length
139
233
  end
140
234
 
141
- def find_or_create_collection_ids
235
+ def find_collection_ids
142
236
  return self.collection_ids if collections_created?
143
- valid_system_id(Collection)
144
- if record[self.class.collection_field].present?
145
- record[self.class.collection_field].split(/\s*[:;|]\s*/).each do |collection|
146
- c = find_collection(collection)
147
- self.collection_ids << c.id unless c.blank? || self.collection_ids.include?(c.id)
237
+ if possible_collection_ids.present?
238
+ possible_collection_ids.each do |collection_id|
239
+ c = find_collection(collection_id)
240
+ skip = c.blank? || self.collection_ids.include?(c.id)
241
+ self.collection_ids << c.id unless skip
148
242
  end
149
243
  end
150
244
  self.collection_ids
@@ -23,12 +23,22 @@ module Bulkrax
23
23
 
24
24
  attr_accessor :all_attrs
25
25
 
26
- delegate :parser, :mapping, :replace_files, :update_files, to: :importerexporter
26
+ delegate :parser,
27
+ :mapping,
28
+ :replace_files,
29
+ :update_files,
30
+ :keys_without_numbers,
31
+ :key_without_numbers,
32
+ to: :importerexporter
27
33
 
28
34
  delegate :client,
29
- :collection_name,
30
- :user,
31
- to: :parser
35
+ :collection_name,
36
+ :user,
37
+ :related_parents_raw_mapping,
38
+ :related_parents_parsed_mapping,
39
+ :related_children_raw_mapping,
40
+ :related_children_parsed_mapping,
41
+ to: :parser
32
42
 
33
43
  # Retrieve fields from the file
34
44
  # @param data - the source data
@@ -61,13 +71,13 @@ module Bulkrax
61
71
  end
62
72
 
63
73
  def self.collection_field
74
+ ActiveSupport::Deprecation.warn(
75
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
76
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
77
+ )
64
78
  Bulkrax.collection_field_mapping[self.to_s]
65
79
  end
66
80
 
67
- def self.children_field
68
- Bulkrax.parent_child_field_mapping[self.to_s]
69
- end
70
-
71
81
  def build
72
82
  return if type.nil?
73
83
  self.save if self.new_record? # must be saved for statuses
@@ -96,6 +106,7 @@ module Bulkrax
96
106
  end
97
107
 
98
108
  def find_collection(collection_identifier)
109
+ return unless Collection.properties.keys.include?(work_identifier)
99
110
  Collection.where(
100
111
  work_identifier => collection_identifier
101
112
  ).detect { |m| m.send(work_identifier).include?(collection_identifier) }
@@ -14,7 +14,7 @@ module Bulkrax
14
14
  validates :name, presence: true
15
15
  validates :parser_klass, presence: true
16
16
 
17
- delegate :write, :create_from_collection, :create_from_importer, :create_from_worktype, to: :parser
17
+ delegate :write, :create_from_collection, :create_from_importer, :create_from_worktype, :create_from_all, to: :parser
18
18
 
19
19
  def export
20
20
  current_run && setup_export_path
@@ -25,6 +25,8 @@ module Bulkrax
25
25
  create_from_importer
26
26
  when 'worktype'
27
27
  create_from_worktype
28
+ when 'all'
29
+ create_from_all
28
30
  end
29
31
  rescue StandardError => e
30
32
  status_info(e)
@@ -77,7 +79,8 @@ module Bulkrax
77
79
  [
78
80
  [I18n.t('bulkrax.exporter.labels.importer'), 'importer'],
79
81
  [I18n.t('bulkrax.exporter.labels.collection'), 'collection'],
80
- [I18n.t('bulkrax.exporter.labels.worktype'), 'worktype']
82
+ [I18n.t('bulkrax.exporter.labels.worktype'), 'worktype'],
83
+ [I18n.t('bulkrax.exporter.labels.all'), 'all']
81
84
  ]
82
85
  end
83
86
 
@@ -105,18 +108,22 @@ module Bulkrax
105
108
  end
106
109
 
107
110
  def exporter_export_path
108
- @exporter_export_path ||= File.join(Bulkrax.export_path, self.id.to_s, self.exporter_runs.last.id.to_s)
111
+ @exporter_export_path ||= File.join(parser.base_path('export'), self.id.to_s, self.exporter_runs.last.id.to_s)
109
112
  end
110
113
 
111
114
  def exporter_export_zip_path
112
- @exporter_export_zip_path ||= File.join(Bulkrax.export_path, "export_#{self.id}_#{self.exporter_runs.last.id}.zip")
115
+ @exporter_export_zip_path ||= File.join(parser.base_path('export'), "export_#{self.id}_#{self.exporter_runs.last.id}.zip")
113
116
  rescue
114
- @exporter_export_zip_path ||= File.join(Bulkrax.export_path, "export_#{self.id}_0.zip")
117
+ @exporter_export_zip_path ||= File.join(parser.base_path('export'), "export_#{self.id}_0.zip")
115
118
  end
116
119
 
117
120
  def export_properties
118
121
  properties = Hyrax.config.registered_curation_concern_types.map { |work| work.constantize.properties.keys }.flatten.uniq.sort
119
122
  properties.reject { |prop| Bulkrax.reserved_properties.include?(prop) }
120
123
  end
124
+
125
+ def metadata_only?
126
+ export_type == 'metadata'
127
+ end
121
128
  end
122
129
  end
@@ -18,12 +18,21 @@ module Bulkrax
18
18
  validates :admin_set_id, presence: true
19
19
  validates :parser_klass, presence: true
20
20
 
21
- delegate :valid_import?, :create_parent_child_relationships,
22
- :write_errored_entries_file, :visibility, to: :parser
21
+ delegate :valid_import?, :write_errored_entries_file, :visibility, to: :parser
23
22
 
24
23
  attr_accessor :only_updates, :file_style, :file
25
24
  attr_writer :current_run
26
25
 
26
+ def self.safe_uri_filename(uri)
27
+ uri = URI.parse(uri) unless uri.is_a?(URI)
28
+ r = Faraday.head(uri.to_s)
29
+ return CGI.parse(r.headers['content-disposition'])["filename"][0].delete("\"")
30
+ rescue
31
+ filename = File.basename(uri.path)
32
+ filename.delete!('/')
33
+ filename.presence || file_set.id
34
+ end
35
+
27
36
  def status
28
37
  if self.validate_only
29
38
  'Validated'
@@ -87,7 +96,11 @@ module Bulkrax
87
96
  end
88
97
 
89
98
  def current_run
90
- @current_run ||= self.importer_runs.create!(total_work_entries: self.limit || parser.total, total_collection_entries: parser.collections_total)
99
+ @current_run ||= if file? && zip?
100
+ self.importer_runs.create!
101
+ else
102
+ self.importer_runs.create!(total_work_entries: self.limit || parser.works_total, total_collection_entries: parser.collections_total)
103
+ end
91
104
  end
92
105
 
93
106
  def last_run
@@ -139,11 +152,11 @@ module Bulkrax
139
152
 
140
153
  # If the import data is zipped, unzip it to this path
141
154
  def importer_unzip_path
142
- @importer_unzip_path ||= File.join(Bulkrax.import_path, "import_#{path_string}")
155
+ @importer_unzip_path ||= File.join(parser.base_path, "import_#{path_string}")
143
156
  end
144
157
 
145
158
  def errored_entries_csv_path
146
- @errored_entries_csv_path ||= File.join(Bulkrax.import_path, "import_#{path_string}_errored_entries.csv")
159
+ @errored_entries_csv_path ||= File.join(parser.base_path, "import_#{path_string}_errored_entries.csv")
147
160
  end
148
161
 
149
162
  def path_string
@@ -151,5 +164,9 @@ module Bulkrax
151
164
  rescue
152
165
  "#{self.id}_#{self.created_at.strftime('%Y%m%d%H%M%S')}"
153
166
  end
167
+
168
+ def metadata_only?
169
+ parser.parser_fields['metadata_only'] == true
170
+ end
154
171
  end
155
172
  end
@@ -26,6 +26,10 @@ module Bulkrax
26
26
  end
27
27
 
28
28
  def build_metadata
29
+ ActiveSupport::Deprecation.warn(
30
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
31
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
32
+ )
29
33
  self.parsed_metadata = {}
30
34
  self.parsed_metadata[work_identifier] = [record.header.identifier]
31
35
 
@@ -56,7 +60,7 @@ module Bulkrax
56
60
  # Retrieve list of collections for the entry; add to collection_ids
57
61
  # If OAI-PMH doesn't return setSpec in the headers for GetRecord, use parser.collection_name
58
62
  # in this case, if 'All' is selected, records will not be added to a collection.
59
- def find_or_create_collection_ids
63
+ def find_collection_ids
60
64
  return self.collection_ids if collections_created?
61
65
  if sets.blank? || parser.collection_name != 'all'
62
66
  # c = Collection.where(Bulkrax.system_identifier_field => importerexporter.unique_collection_identifier(parser.collection_name)).first
@@ -14,6 +14,10 @@ module Bulkrax
14
14
  end
15
15
 
16
16
  def self.data_for_entry(data, source_id)
17
+ ActiveSupport::Deprecation.warn(
18
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
19
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
20
+ )
17
21
  reader = data
18
22
  format = reader.class.format.to_sym
19
23
  collections = []
@@ -22,7 +26,7 @@ module Bulkrax
22
26
  data = RDF::Writer.for(format).buffer do |writer|
23
27
  reader.each_statement do |statement|
24
28
  collections << statement.object.to_s if collection_field.present? && collection_field == statement.predicate.to_s
25
- children << statement.object.to_s if children_field.present? && children_field == statement.predicate.to_s
29
+ children << statement.object.to_s if related_children_parsed_mapping.present? && related_children_parsed_mapping == statement.predicate.to_s
26
30
  delete = statement.object.to_s if /deleted/.match?(statement.predicate.to_s)
27
31
  writer << statement
28
32
  end
@@ -37,12 +41,13 @@ module Bulkrax
37
41
  }
38
42
  end
39
43
 
40
- def self.collection_field
41
- Bulkrax.collection_field_mapping[self.to_s]
42
- end
44
+ def self.related_children_parsed_mapping
45
+ return @related_children_parsed_mapping if @related_children_parsed_mapping.present?
46
+
47
+ rdf_related_children_field_mapping = Bulkrax.field_mappings['Bulkrax::RdfParser']&.select { |_, h| h.key?('related_children_field_mapping') }
48
+ return if rdf_related_children_field_mapping.blank?
43
49
 
44
- def self.children_field
45
- Bulkrax.parent_child_field_mapping[self.to_s]
50
+ @related_children_parsed_mapping = rdf_related_children_field_mapping&.keys&.first
46
51
  end
47
52
 
48
53
  def record
@@ -50,6 +55,10 @@ module Bulkrax
50
55
  end
51
56
 
52
57
  def build_metadata
58
+ ActiveSupport::Deprecation.warn(
59
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
60
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
61
+ )
53
62
  raise StandardError, 'Record not found' if record.nil?
54
63
  raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
55
64
 
@@ -76,7 +85,7 @@ module Bulkrax
76
85
  self.raw_metadata['collection'].length == self.collection_ids.length
77
86
  end
78
87
 
79
- def find_or_create_collection_ids
88
+ def find_collection_ids
80
89
  return self.collection_ids if collections_created?
81
90
  if self.raw_metadata['collection'].present?
82
91
  self.raw_metadata['collection'].each do |collection|
@@ -39,6 +39,10 @@ module Bulkrax
39
39
  end
40
40
 
41
41
  def build_metadata
42
+ ActiveSupport::Deprecation.warn(
43
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
44
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
45
+ )
42
46
  raise StandardError, 'Record not found' if record.nil?
43
47
  raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
44
48
  self.parsed_metadata = {}
@@ -40,13 +40,13 @@ module Bulkrax
40
40
  end
41
41
  end
42
42
 
43
- # Append the file_set id to ensure a unique filename
43
+ # Prepend the file_set id to ensure a unique filename
44
44
  def filename(file_set)
45
45
  return if file_set.original_file.blank?
46
46
  fn = file_set.original_file.file_name.first
47
47
  mime = Mime::Type.lookup(file_set.original_file.mime_type)
48
48
  ext_mime = MIME::Types.of(file_set.original_file.file_name).first
49
- if fn.include?(file_set.id)
49
+ if fn.include?(file_set.id) || importerexporter.metadata_only?
50
50
  return fn if mime.to_s == ext_mime.to_s
51
51
  return "#{fn}.#{mime.to_sym}"
52
52
  else
@@ -33,7 +33,8 @@ module Bulkrax
33
33
  if file_value.is_a?(Hash)
34
34
  file_value
35
35
  elsif file_value.is_a?(String)
36
- { url: file_value }
36
+ name = Bulkrax::Importer.safe_uri_filename(file_value)
37
+ { url: file_value, file_name: name }
37
38
  else
38
39
  Rails.logger.error("skipped remote file #{file_value} because we do not recognize the type")
39
40
  nil
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
-
2
+ # rubocop:disable Metrics/ModuleLength
3
3
  module Bulkrax
4
4
  module HasMatchers
5
5
  extend ActiveSupport::Concern
@@ -43,7 +43,6 @@ module Bulkrax
43
43
 
44
44
  value = if matcher
45
45
  result = matcher.result(self, node_content)
46
- next unless result
47
46
  matched_metadata(multiple, name, result, object_multiple)
48
47
  elsif multiple
49
48
  Rails.logger.info("Bulkrax Column automatically matched #{node_name}, #{node_content}")
@@ -53,12 +52,20 @@ module Bulkrax
53
52
  single_metadata(node_content)
54
53
  end
55
54
 
56
- set_parsed_data(object_multiple, object_name, name, index, value) if value
55
+ object_name.present? ? set_parsed_object_data(object_multiple, object_name, name, index, value) : set_parsed_data(name, value)
57
56
  end
58
57
  end
59
58
 
60
- def set_parsed_data(object_multiple, object_name, name, index, value)
59
+ def set_parsed_data(name, value)
60
+ return parsed_metadata[name] = value unless multiple?(name)
61
+
62
+ parsed_metadata[name] ||= []
63
+ parsed_metadata[name] += Array.wrap(value).flatten
64
+ end
65
+
66
+ def set_parsed_object_data(object_multiple, object_name, name, index, value)
61
67
  if object_multiple
68
+ index ||= 0
62
69
  parsed_metadata[object_name][index] ||= {}
63
70
  parsed_metadata[object_name][index][name] ||= []
64
71
  if value.is_a?(Array)
@@ -66,20 +73,13 @@ module Bulkrax
66
73
  else
67
74
  parsed_metadata[object_name][index][name] = value
68
75
  end
69
- elsif object_name
76
+ else
70
77
  parsed_metadata[object_name][name] ||= []
71
78
  if value.is_a?(Array)
72
79
  parsed_metadata[object_name][name] += value
73
80
  else
74
81
  parsed_metadata[object_name][name] = value
75
82
  end
76
- else
77
- parsed_metadata[name] ||= []
78
- if value.is_a?(Array)
79
- parsed_metadata[name] += value
80
- else
81
- parsed_metadata[name] = value
82
- end
83
83
  end
84
84
  end
85
85
 
@@ -90,9 +90,20 @@ module Bulkrax
90
90
  end
91
91
 
92
92
  def multiple_metadata(content)
93
- content = content.content if content.is_a?(Nokogiri::XML::NodeSet)
94
93
  return unless content
95
- content.is_a?(Array) ? content : Array.wrap(content.strip)
94
+
95
+ case content
96
+ when Nokogiri::XML::NodeSet
97
+ content&.content
98
+ when Array
99
+ content
100
+ when Hash
101
+ Array.wrap(content)
102
+ when String
103
+ Array.wrap(content.strip)
104
+ else
105
+ Array.wrap(content)
106
+ end
96
107
  end
97
108
 
98
109
  def matched_metadata(multiple, name, result, object_multiple)
@@ -113,12 +124,43 @@ module Bulkrax
113
124
  field = field.gsub('_attributes', '')
114
125
 
115
126
  return false if excluded?(field)
116
- return true if ['collections', 'file', 'remote_files', 'model', 'delete'].include?(field)
127
+ return true if supported_bulkrax_fields.include?(field)
117
128
  return factory_class.method_defined?(field) && factory_class.properties[field].present?
118
129
  end
119
130
 
131
+ def supported_bulkrax_fields
132
+ ActiveSupport::Deprecation.warn(
133
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
134
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
135
+ )
136
+ @supported_bulkrax_fields ||=
137
+ %W[
138
+ id
139
+ file
140
+ remote_files
141
+ model
142
+ delete
143
+ #{parser.collection_field_mapping}
144
+ #{related_parents_parsed_mapping}
145
+ #{related_children_parsed_mapping}
146
+ ]
147
+ end
148
+
120
149
  def multiple?(field)
121
- return true if field == 'file' || field == 'remote_files' || field == 'collections'
150
+ ActiveSupport::Deprecation.warn(
151
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
152
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
153
+ )
154
+ @multiple_bulkrax_fields ||=
155
+ %W[
156
+ file
157
+ remote_files
158
+ #{parser.collection_field_mapping}
159
+ #{related_parents_parsed_mapping}
160
+ #{related_children_parsed_mapping}
161
+ ]
162
+
163
+ return true if @multiple_bulkrax_fields.include?(field)
122
164
  return false if field == 'model'
123
165
 
124
166
  field_supported?(field) && factory_class&.properties&.[](field)&.[]('multiple')
@@ -153,3 +195,4 @@ module Bulkrax
153
195
  end
154
196
  end
155
197
  end
198
+ # rubocop:enable Metrics/ModuleLength