bulkrax 1.0.2 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/app/controllers/bulkrax/exporters_controller.rb +12 -4
  4. data/app/controllers/bulkrax/importers_controller.rb +23 -17
  5. data/app/factories/bulkrax/object_factory.rb +84 -63
  6. data/app/jobs/bulkrax/create_relationships_job.rb +156 -0
  7. data/app/jobs/bulkrax/delete_work_job.rb +6 -2
  8. data/app/jobs/bulkrax/export_work_job.rb +3 -1
  9. data/app/jobs/bulkrax/exporter_job.rb +1 -0
  10. data/app/jobs/bulkrax/{import_work_collection_job.rb → import_collection_job.rb} +4 -2
  11. data/app/jobs/bulkrax/import_file_set_job.rb +69 -0
  12. data/app/jobs/bulkrax/import_work_job.rb +2 -0
  13. data/app/jobs/bulkrax/importer_job.rb +18 -1
  14. data/app/matchers/bulkrax/application_matcher.rb +5 -5
  15. data/app/models/bulkrax/csv_collection_entry.rb +8 -6
  16. data/app/models/bulkrax/csv_entry.rb +132 -65
  17. data/app/models/bulkrax/csv_file_set_entry.rb +26 -0
  18. data/app/models/bulkrax/entry.rb +19 -8
  19. data/app/models/bulkrax/exporter.rb +12 -5
  20. data/app/models/bulkrax/importer.rb +24 -5
  21. data/app/models/bulkrax/oai_entry.rb +5 -1
  22. data/app/models/bulkrax/rdf_entry.rb +16 -7
  23. data/app/models/bulkrax/xml_entry.rb +4 -0
  24. data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +39 -0
  25. data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
  26. data/app/models/concerns/bulkrax/has_matchers.rb +44 -13
  27. data/app/models/concerns/bulkrax/import_behavior.rb +40 -5
  28. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +23 -2
  29. data/app/models/concerns/bulkrax/status_info.rb +4 -4
  30. data/app/parsers/bulkrax/application_parser.rb +67 -84
  31. data/app/parsers/bulkrax/bagit_parser.rb +13 -4
  32. data/app/parsers/bulkrax/csv_parser.rb +170 -64
  33. data/app/parsers/bulkrax/oai_dc_parser.rb +6 -3
  34. data/app/parsers/bulkrax/xml_parser.rb +5 -0
  35. data/app/views/bulkrax/exporters/_form.html.erb +1 -1
  36. data/app/views/bulkrax/exporters/show.html.erb +2 -1
  37. data/app/views/bulkrax/importers/index.html.erb +17 -17
  38. data/app/views/bulkrax/importers/show.html.erb +52 -6
  39. data/config/locales/bulkrax.en.yml +1 -0
  40. data/db/migrate/20190731114016_change_importer_and_exporter_to_polymorphic.rb +5 -1
  41. data/db/migrate/20211004170708_change_bulkrax_statuses_error_message_column_type_to_text.rb +5 -0
  42. data/db/migrate/20211203195233_rename_children_counters_to_relationships.rb +6 -0
  43. data/db/migrate/20211220195027_add_file_set_counters_to_importer_runs.rb +7 -0
  44. data/db/migrate/20220118001339_add_import_attempts_to_entries.rb +5 -0
  45. data/db/migrate/20220119213325_add_work_counters_to_importer_runs.rb +6 -0
  46. data/lib/bulkrax/engine.rb +1 -1
  47. data/lib/bulkrax/version.rb +1 -1
  48. data/lib/bulkrax.rb +9 -17
  49. data/lib/generators/bulkrax/templates/bin/importer +17 -11
  50. data/lib/generators/bulkrax/templates/config/bulkrax_api.yml +3 -1
  51. data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +7 -12
  52. metadata +22 -10
  53. data/app/jobs/bulkrax/child_relationships_job.rb +0 -128
@@ -6,17 +6,34 @@ module Bulkrax
6
6
 
7
7
  def perform(importer_id, only_updates_since_last_import = false)
8
8
  importer = Importer.find(importer_id)
9
+
9
10
  importer.current_run
11
+ unzip_imported_file(importer.parser)
10
12
  import(importer, only_updates_since_last_import)
13
+ update_current_run_counters(importer)
11
14
  schedule(importer) if importer.schedulable?
12
15
  end
13
16
 
14
17
  def import(importer, only_updates_since_last_import)
15
18
  importer.only_updates = only_updates_since_last_import || false
16
19
  return unless importer.valid_import?
20
+
17
21
  importer.import_collections
18
22
  importer.import_works
19
- importer.create_parent_child_relationships unless importer.validate_only
23
+ importer.import_file_sets
24
+ end
25
+
26
+ def unzip_imported_file(parser)
27
+ return unless parser.file? && parser.zip?
28
+
29
+ parser.unzip(parser.parser_fields['import_file_path'])
30
+ end
31
+
32
+ def update_current_run_counters(importer)
33
+ importer.current_run.total_work_entries = importer.limit || importer.parser.works_total
34
+ importer.current_run.total_collection_entries = importer.parser.collections_total
35
+ importer.current_run.total_file_set_entries = importer.parser.file_sets_total
36
+ importer.current_run.save!
20
37
  end
21
38
 
22
39
  def schedule(importer)
@@ -20,9 +20,9 @@ module Bulkrax
20
20
  return unless content.send(self.if[0], Regexp.new(self.if[1]))
21
21
  end
22
22
 
23
- @result = content.to_s.gsub(/\s/, ' ') # remove any line feeds and tabs
24
- @result.strip!
25
- process_split
23
+ # @result will evaluate to an empty string for nil content values
24
+ @result = content.to_s.gsub(/\s/, ' ').strip # remove any line feeds and tabs
25
+ process_split if @result.present?
26
26
  @result = @result[0] if @result.is_a?(Array) && @result.size == 1
27
27
  process_parse
28
28
  return @result
@@ -66,14 +66,14 @@ module Bulkrax
66
66
  end
67
67
 
68
68
  def parse_subject(src)
69
- string = src.to_s.strip.downcase
69
+ string = src.strip.downcase
70
70
  return if string.blank?
71
71
 
72
72
  string.slice(0, 1).capitalize + string.slice(1..-1)
73
73
  end
74
74
 
75
75
  def parse_types(src)
76
- src.to_s.strip.titleize
76
+ src.strip.titleize
77
77
  end
78
78
 
79
79
  # Allow for mapping a model field to the work type or collection
@@ -6,14 +6,16 @@ module Bulkrax
6
6
  Collection
7
7
  end
8
8
 
9
- def build_metadata
10
- self.parsed_metadata = self.raw_metadata
11
- add_local
12
- return self.parsed_metadata
9
+ # Use identifier set by CsvParser#unique_collection_identifier, which falls back
10
+ # on the Collection's first title if record[source_identifier] is not present
11
+ def add_identifier
12
+ self.parsed_metadata[work_identifier] = [self.identifier].flatten
13
13
  end
14
14
 
15
- def collections_created?
16
- true
15
+ def add_collection_type_gid
16
+ return if self.parsed_metadata['collection_type_gid'].present?
17
+
18
+ self.parsed_metadata['collection_type_gid'] = ::Hyrax::CollectionType.find_or_create_default_collection_type.gid
17
19
  end
18
20
  end
19
21
  end
@@ -14,59 +14,71 @@ module Bulkrax
14
14
  def self.read_data(path)
15
15
  raise StandardError, 'CSV path empty' if path.blank?
16
16
  CSV.read(path,
17
- headers: true,
18
- header_converters: :symbol,
19
- encoding: 'utf-8')
17
+ headers: true,
18
+ header_converters: :symbol,
19
+ encoding: 'utf-8')
20
20
  end
21
21
 
22
22
  def self.data_for_entry(data, _source_id)
23
+ ActiveSupport::Deprecation.warn(
24
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
25
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
26
+ )
23
27
  # If a multi-line CSV data is passed, grab the first row
24
28
  data = data.first if data.is_a?(CSV::Table)
25
29
  # model has to be separated so that it doesn't get mistranslated by to_h
26
30
  raw_data = data.to_h
27
- raw_data[:model] = data[:model]
31
+ raw_data[:model] = data[:model] if data[:model].present?
28
32
  # If the collection field mapping is not 'collection', add 'collection' - the parser needs it
29
33
  raw_data[:collection] = raw_data[collection_field.to_sym] if raw_data.keys.include?(collection_field.to_sym) && collection_field != 'collection'
30
- # If the children field mapping is not 'children', add 'children' - the parser needs it
31
- raw_data[:children] = raw_data[collection_field.to_sym] if raw_data.keys.include?(children_field.to_sym) && children_field != 'children'
32
34
  return raw_data
33
35
  end
34
36
 
35
- def self.collection_field
36
- Bulkrax.collection_field_mapping[self.class.to_s] || 'collection'
37
- end
37
+ def build_metadata
38
+ raise StandardError, 'Record not found' if record.nil?
39
+ raise StandardError, "Missing required elements, missing element(s) are: #{importerexporter.parser.missing_elements(keys_without_numbers(record.keys)).join(', ')}" unless importerexporter.parser.required_elements?(keys_without_numbers(record.keys))
38
40
 
39
- def self.children_field
40
- Bulkrax.parent_child_field_mapping[self.to_s] || 'children'
41
- end
41
+ self.parsed_metadata = {}
42
+ add_identifier
43
+ add_visibility
44
+ add_ingested_metadata
45
+ add_metadata_for_model
46
+ add_rights_statement
47
+ add_collections
48
+ add_local
42
49
 
43
- def keys_without_numbers(keys)
44
- keys.map { |key| key_without_numbers(key) }
50
+ self.parsed_metadata
45
51
  end
46
52
 
47
- def key_without_numbers(key)
48
- key.gsub(/_\d+/, '').sub(/^\d+_/, '')
53
+ def add_identifier
54
+ self.parsed_metadata[work_identifier] = [record[source_identifier]]
49
55
  end
50
56
 
51
- def build_metadata
52
- raise StandardError, 'Record not found' if record.nil?
53
- raise StandardError, "Missing required elements, missing element(s) are: #{importerexporter.parser.missing_elements(keys_without_numbers(record.keys)).join(', ')}" unless importerexporter.parser.required_elements?(keys_without_numbers(record.keys))
57
+ def add_metadata_for_model
58
+ if factory_class == Collection
59
+ add_collection_type_gid
60
+ elsif factory_class == FileSet
61
+ add_path_to_file
62
+ validate_presence_of_parent!
63
+ else
64
+ add_file unless importerexporter.metadata_only?
65
+ add_admin_set_id
66
+ end
67
+ end
54
68
 
55
- self.parsed_metadata = {}
56
- self.parsed_metadata[work_identifier] = [record[source_identifier]]
69
+ def add_ingested_metadata
70
+ ActiveSupport::Deprecation.warn(
71
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
72
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
73
+ )
74
+ # we do not want to sort the values in the record before adding the metadata.
75
+ # if we do, the factory_class will be set to the default_work_type for all values that come before "model" or "work type"
57
76
  record.each do |key, value|
58
- next if key == 'collection'
77
+ next if self.parser.collection_field_mapping.to_s == key_without_numbers(key)
59
78
 
60
79
  index = key[/\d+/].to_i - 1 if key[/\d+/].to_i != 0
61
80
  add_metadata(key_without_numbers(key), value, index)
62
81
  end
63
- add_file
64
- add_visibility
65
- add_rights_statement
66
- add_admin_set_id
67
- add_collections
68
- add_local
69
- self.parsed_metadata
70
82
  end
71
83
 
72
84
  def add_file
@@ -76,7 +88,11 @@ module Bulkrax
76
88
  elsif record['file'].is_a?(Array)
77
89
  self.parsed_metadata['file'] = record['file']
78
90
  end
79
- self.parsed_metadata['file'] = self.parsed_metadata['file'].map { |f| path_to_file(f.tr(' ', '_')) }
91
+ self.parsed_metadata['file'] = self.parsed_metadata['file'].map do |f|
92
+ next if f.blank?
93
+
94
+ path_to_file(f.tr(' ', '_'))
95
+ end.compact
80
96
  end
81
97
 
82
98
  def build_export_metadata
@@ -86,10 +102,20 @@ module Bulkrax
86
102
  self.parsed_metadata[source_identifier] = hyrax_record.send(work_identifier)
87
103
  self.parsed_metadata['model'] = hyrax_record.has_model.first
88
104
  build_mapping_metadata
89
- self.parsed_metadata['collections'] = hyrax_record.member_of_collection_ids.join('; ')
90
- unless hyrax_record.is_a?(Collection)
91
- self.parsed_metadata['file'] = hyrax_record.file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact.join('; ')
105
+
106
+ # TODO: fix the "send" parameter in the conditional below
107
+ # currently it returns: "NoMethodError - undefined method 'bulkrax_identifier' for #<Collection:0x00007fbe6a3b4248>"
108
+ if mapping['collection']&.[]('join')
109
+ self.parsed_metadata['collection'] = hyrax_record.member_of_collection_ids.join('; ')
110
+ # self.parsed_metadata['collection'] = hyrax_record.member_of_collections.map { |c| c.send(work_identifier)&.first }.compact.uniq.join(';')
111
+ else
112
+ hyrax_record.member_of_collections.each_with_index do |collection, i|
113
+ self.parsed_metadata["collection_#{i + 1}"] = collection.id
114
+ # self.parsed_metadata["collection_#{i + 1}"] = collection.send(work_identifier)&.first
115
+ end
92
116
  end
117
+
118
+ build_files unless hyrax_record.is_a?(Collection)
93
119
  self.parsed_metadata
94
120
  end
95
121
 
@@ -97,24 +123,51 @@ module Bulkrax
97
123
  mapping.each do |key, value|
98
124
  next if Bulkrax.reserved_properties.include?(key) && !field_supported?(key)
99
125
  next if key == "model"
126
+ next if value['excluded']
100
127
 
101
128
  object_key = key if value.key?('object')
102
129
  next unless hyrax_record.respond_to?(key.to_s) || object_key.present?
103
130
 
104
- data = object_key.present? ? hyrax_record.send(value['object']) : hyrax_record.send(key.to_s)
105
131
  if object_key.present?
106
- next self.parsed_metadata[key] = '' if data.empty?
107
- data = data.first if data.is_a?(ActiveTriples::Relation)
132
+ build_object(value)
133
+ else
134
+ build_value(key, value)
135
+ end
136
+ end
137
+ end
138
+
139
+ def build_object(value)
140
+ data = hyrax_record.send(value['object'])
141
+ return if data.empty?
142
+
143
+ data = data.to_a if data.is_a?(ActiveTriples::Relation)
144
+ object_metadata(Array.wrap(data))
145
+ end
108
146
 
109
- object_metadata(data, object_key)
110
- elsif data.is_a?(ActiveTriples::Relation)
111
- self.parsed_metadata[key] = data.map { |d| prepare_export_data(d) }.join('; ').to_s unless value[:excluded]
147
+ def build_value(key, value)
148
+ data = hyrax_record.send(key.to_s)
149
+ if data.is_a?(ActiveTriples::Relation)
150
+ if value['join']
151
+ self.parsed_metadata[key_for_export(key)] = data.map { |d| prepare_export_data(d) }.join('; ').to_s
112
152
  else
113
- self.parsed_metadata[key] = prepare_export_data(data)
153
+ data.each_with_index do |d, i|
154
+ self.parsed_metadata["#{key_for_export(key)}_#{i + 1}"] = prepare_export_data(d)
155
+ end
114
156
  end
157
+ else
158
+ self.parsed_metadata[key_for_export(key)] = prepare_export_data(data)
115
159
  end
116
160
  end
117
161
 
162
+ # On export the key becomes the from and the from becomes the destination. It is the opposite of the import because we are moving data the opposite direction
163
+ # metadata that does not have a specific Bulkrax entry is mapped to the key name, as matching keys coming in are mapped by the csv parser automatically
164
+ def key_for_export(key)
165
+ clean_key = key_without_numbers(key)
166
+ unnumbered_key = mapping[clean_key] ? mapping[clean_key]['from'].first : clean_key
167
+ # Bring the number back if there is one
168
+ "#{unnumbered_key}#{key.sub(clean_key, '')}"
169
+ end
170
+
118
171
  def prepare_export_data(datum)
119
172
  if datum.is_a?(ActiveTriples::Resource)
120
173
  datum.to_uri.to_s
@@ -123,30 +176,34 @@ module Bulkrax
123
176
  end
124
177
  end
125
178
 
126
- def object_metadata(data, object_key)
127
- data = convert_to_hash(data)
179
+ def object_metadata(data)
180
+ data = data.map { |d| eval(d) }.flatten # rubocop:disable Security/Eval
128
181
 
129
182
  data.each_with_index do |obj, index|
130
- next unless obj[object_key]
131
-
132
- next self.parsed_metadata["#{object_key}_#{index + 1}"] = prepare_export_data(obj[object_key]) unless obj[object_key].is_a?(Array)
183
+ next if obj.nil?
184
+ # allow the object_key to be valid whether it's a string or symbol
185
+ obj = obj.with_indifferent_access
133
186
 
134
- obj[object_key].each_with_index do |_nested_item, nested_index|
135
- self.parsed_metadata["#{object_key}_#{index + 1}_#{nested_index + 1}"] = prepare_export_data(obj[object_key][nested_index])
187
+ obj.each_key do |key|
188
+ if obj[key].is_a?(Array)
189
+ obj[key].each_with_index do |_nested_item, nested_index|
190
+ self.parsed_metadata["#{key_for_export(key)}_#{index + 1}_#{nested_index + 1}"] = prepare_export_data(obj[key][nested_index])
191
+ end
192
+ else
193
+ self.parsed_metadata["#{key_for_export(key)}_#{index + 1}"] = prepare_export_data(obj[key])
194
+ end
136
195
  end
137
196
  end
138
197
  end
139
198
 
140
- def convert_to_hash(data)
141
- # converts data from `'[{}]'` to `[{}]`
142
- gsub_data = data.gsub(/\[{/, '{')
143
- .gsub(/}\]/, '}')
144
- .gsub('=>', ':')
145
- .gsub(/},\s?{/, "}},{{")
146
- .split("},{")
147
- gsub_data = [gsub_data] if gsub_data.is_a?(String)
148
-
149
- return gsub_data.map { |d| JSON.parse(d) }
199
+ def build_files
200
+ if mapping['file']&.[]('join')
201
+ self.parsed_metadata['file'] = hyrax_record.file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact.join('; ')
202
+ else
203
+ hyrax_record.file_sets.each_with_index do |fs, i|
204
+ self.parsed_metadata["file_#{i + 1}"] = filename(fs).to_s if filename(fs).present?
205
+ end
206
+ end
150
207
  end
151
208
 
152
209
  # In order for the existing exported hyrax_record, to be updated by a re-import
@@ -167,18 +224,28 @@ module Bulkrax
167
224
  Bulkrax::CsvMatcher
168
225
  end
169
226
 
227
+ def possible_collection_ids
228
+ ActiveSupport::Deprecation.warn(
229
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
230
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
231
+ )
232
+ @possible_collection_ids ||= record.inject([]) do |memo, (key, value)|
233
+ memo += value.split(/\s*[:;|]\s*/) if self.class.collection_field.to_s == key_without_numbers(key) && value.present?
234
+ memo
235
+ end || []
236
+ end
237
+
170
238
  def collections_created?
171
- return true if record[self.class.collection_field].blank?
172
- record[self.class.collection_field].split(/\s*[:;|]\s*/).length == self.collection_ids.length
239
+ possible_collection_ids.length == self.collection_ids.length
173
240
  end
174
241
 
175
- def find_or_create_collection_ids
242
+ def find_collection_ids
176
243
  return self.collection_ids if collections_created?
177
- valid_system_id(Collection)
178
- if record[self.class.collection_field].present?
179
- record[self.class.collection_field].split(/\s*[:;|]\s*/).each do |collection|
180
- c = find_collection(collection)
181
- self.collection_ids << c.id unless c.blank? || self.collection_ids.include?(c.id)
244
+ if possible_collection_ids.present?
245
+ possible_collection_ids.each do |collection_id|
246
+ c = find_collection(collection_id)
247
+ skip = c.blank? || self.collection_ids.include?(c.id)
248
+ self.collection_ids << c.id unless skip
182
249
  end
183
250
  end
184
251
  self.collection_ids
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class CsvFileSetEntry < CsvEntry
5
+ def factory_class
6
+ ::FileSet
7
+ end
8
+
9
+ def add_path_to_file
10
+ parsed_metadata['file'].each_with_index do |filename, i|
11
+ path_to_file = ::File.join(parser.path_to_files, filename)
12
+
13
+ parsed_metadata['file'][i] = path_to_file
14
+ end
15
+ raise ::StandardError, 'one or more file paths are invalid' unless parsed_metadata['file'].map { |file_path| ::File.file?(file_path) }.all?
16
+
17
+ parsed_metadata['file']
18
+ end
19
+
20
+ def validate_presence_of_parent!
21
+ return if parsed_metadata[related_parents_parsed_mapping]&.map(&:present?)&.any?
22
+
23
+ raise StandardError, 'File set must be related to at least one work'
24
+ end
25
+ end
26
+ end
@@ -23,12 +23,22 @@ module Bulkrax
23
23
 
24
24
  attr_accessor :all_attrs
25
25
 
26
- delegate :parser, :mapping, :replace_files, :update_files, to: :importerexporter
26
+ delegate :parser,
27
+ :mapping,
28
+ :replace_files,
29
+ :update_files,
30
+ :keys_without_numbers,
31
+ :key_without_numbers,
32
+ to: :importerexporter
27
33
 
28
34
  delegate :client,
29
- :collection_name,
30
- :user,
31
- to: :parser
35
+ :collection_name,
36
+ :user,
37
+ :related_parents_raw_mapping,
38
+ :related_parents_parsed_mapping,
39
+ :related_children_raw_mapping,
40
+ :related_children_parsed_mapping,
41
+ to: :parser
32
42
 
33
43
  # Retrieve fields from the file
34
44
  # @param data - the source data
@@ -61,13 +71,13 @@ module Bulkrax
61
71
  end
62
72
 
63
73
  def self.collection_field
74
+ ActiveSupport::Deprecation.warn(
75
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
76
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
77
+ )
64
78
  Bulkrax.collection_field_mapping[self.to_s]
65
79
  end
66
80
 
67
- def self.children_field
68
- Bulkrax.parent_child_field_mapping[self.to_s]
69
- end
70
-
71
81
  def build
72
82
  return if type.nil?
73
83
  self.save if self.new_record? # must be saved for statuses
@@ -96,6 +106,7 @@ module Bulkrax
96
106
  end
97
107
 
98
108
  def find_collection(collection_identifier)
109
+ return unless Collection.properties.keys.include?(work_identifier)
99
110
  Collection.where(
100
111
  work_identifier => collection_identifier
101
112
  ).detect { |m| m.send(work_identifier).include?(collection_identifier) }
@@ -14,7 +14,7 @@ module Bulkrax
14
14
  validates :name, presence: true
15
15
  validates :parser_klass, presence: true
16
16
 
17
- delegate :write, :create_from_collection, :create_from_importer, :create_from_worktype, to: :parser
17
+ delegate :write, :create_from_collection, :create_from_importer, :create_from_worktype, :create_from_all, to: :parser
18
18
 
19
19
  def export
20
20
  current_run && setup_export_path
@@ -25,6 +25,8 @@ module Bulkrax
25
25
  create_from_importer
26
26
  when 'worktype'
27
27
  create_from_worktype
28
+ when 'all'
29
+ create_from_all
28
30
  end
29
31
  rescue StandardError => e
30
32
  status_info(e)
@@ -77,7 +79,8 @@ module Bulkrax
77
79
  [
78
80
  [I18n.t('bulkrax.exporter.labels.importer'), 'importer'],
79
81
  [I18n.t('bulkrax.exporter.labels.collection'), 'collection'],
80
- [I18n.t('bulkrax.exporter.labels.worktype'), 'worktype']
82
+ [I18n.t('bulkrax.exporter.labels.worktype'), 'worktype'],
83
+ [I18n.t('bulkrax.exporter.labels.all'), 'all']
81
84
  ]
82
85
  end
83
86
 
@@ -105,18 +108,22 @@ module Bulkrax
105
108
  end
106
109
 
107
110
  def exporter_export_path
108
- @exporter_export_path ||= File.join(Bulkrax.export_path, self.id.to_s, self.exporter_runs.last.id.to_s)
111
+ @exporter_export_path ||= File.join(parser.base_path('export'), self.id.to_s, self.exporter_runs.last.id.to_s)
109
112
  end
110
113
 
111
114
  def exporter_export_zip_path
112
- @exporter_export_zip_path ||= File.join(Bulkrax.export_path, "export_#{self.id}_#{self.exporter_runs.last.id}.zip")
115
+ @exporter_export_zip_path ||= File.join(parser.base_path('export'), "export_#{self.id}_#{self.exporter_runs.last.id}.zip")
113
116
  rescue
114
- @exporter_export_zip_path ||= File.join(Bulkrax.export_path, "export_#{self.id}_0.zip")
117
+ @exporter_export_zip_path ||= File.join(parser.base_path('export'), "export_#{self.id}_0.zip")
115
118
  end
116
119
 
117
120
  def export_properties
118
121
  properties = Hyrax.config.registered_curation_concern_types.map { |work| work.constantize.properties.keys }.flatten.uniq.sort
119
122
  properties.reject { |prop| Bulkrax.reserved_properties.include?(prop) }
120
123
  end
124
+
125
+ def metadata_only?
126
+ export_type == 'metadata'
127
+ end
121
128
  end
122
129
  end
@@ -18,8 +18,7 @@ module Bulkrax
18
18
  validates :admin_set_id, presence: true
19
19
  validates :parser_klass, presence: true
20
20
 
21
- delegate :valid_import?, :create_parent_child_relationships,
22
- :write_errored_entries_file, :visibility, to: :parser
21
+ delegate :valid_import?, :write_errored_entries_file, :visibility, to: :parser
23
22
 
24
23
  attr_accessor :only_updates, :file_style, :file
25
24
  attr_writer :current_run
@@ -97,7 +96,16 @@ module Bulkrax
97
96
  end
98
97
 
99
98
  def current_run
100
- @current_run ||= self.importer_runs.create!(total_work_entries: self.limit || parser.total, total_collection_entries: parser.collections_total)
99
+ @current_run ||= if file? && zip?
100
+ self.importer_runs.create!
101
+ else
102
+ entry_counts = {
103
+ total_work_entries: self.limit || parser.works_total,
104
+ total_collection_entries: parser.collections_total,
105
+ total_file_set_entries: parser.file_sets_total
106
+ }
107
+ self.importer_runs.create!(entry_counts)
108
+ end
101
109
  end
102
110
 
103
111
  def last_run
@@ -131,6 +139,13 @@ module Bulkrax
131
139
  status_info(e)
132
140
  end
133
141
 
142
+ def import_file_sets
143
+ self.save if self.new_record? # Object needs to be saved for statuses
144
+ parser.create_file_sets
145
+ rescue StandardError => e
146
+ status_info(e)
147
+ end
148
+
134
149
  # Prepend the base_url to ensure unique set identifiers
135
150
  # @todo - move to parser, as this is OAI specific
136
151
  def unique_collection_identifier(id)
@@ -149,11 +164,11 @@ module Bulkrax
149
164
 
150
165
  # If the import data is zipped, unzip it to this path
151
166
  def importer_unzip_path
152
- @importer_unzip_path ||= File.join(Bulkrax.import_path, "import_#{path_string}")
167
+ @importer_unzip_path ||= File.join(parser.base_path, "import_#{path_string}")
153
168
  end
154
169
 
155
170
  def errored_entries_csv_path
156
- @errored_entries_csv_path ||= File.join(Bulkrax.import_path, "import_#{path_string}_errored_entries.csv")
171
+ @errored_entries_csv_path ||= File.join(parser.base_path, "import_#{path_string}_errored_entries.csv")
157
172
  end
158
173
 
159
174
  def path_string
@@ -161,5 +176,9 @@ module Bulkrax
161
176
  rescue
162
177
  "#{self.id}_#{self.created_at.strftime('%Y%m%d%H%M%S')}"
163
178
  end
179
+
180
+ def metadata_only?
181
+ parser.parser_fields['metadata_only'] == true
182
+ end
164
183
  end
165
184
  end
@@ -26,6 +26,10 @@ module Bulkrax
26
26
  end
27
27
 
28
28
  def build_metadata
29
+ ActiveSupport::Deprecation.warn(
30
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
31
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
32
+ )
29
33
  self.parsed_metadata = {}
30
34
  self.parsed_metadata[work_identifier] = [record.header.identifier]
31
35
 
@@ -56,7 +60,7 @@ module Bulkrax
56
60
  # Retrieve list of collections for the entry; add to collection_ids
57
61
  # If OAI-PMH doesn't return setSpec in the headers for GetRecord, use parser.collection_name
58
62
  # in this case, if 'All' is selected, records will not be added to a collection.
59
- def find_or_create_collection_ids
63
+ def find_collection_ids
60
64
  return self.collection_ids if collections_created?
61
65
  if sets.blank? || parser.collection_name != 'all'
62
66
  # c = Collection.where(Bulkrax.system_identifier_field => importerexporter.unique_collection_identifier(parser.collection_name)).first
@@ -14,6 +14,10 @@ module Bulkrax
14
14
  end
15
15
 
16
16
  def self.data_for_entry(data, source_id)
17
+ ActiveSupport::Deprecation.warn(
18
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
19
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
20
+ )
17
21
  reader = data
18
22
  format = reader.class.format.to_sym
19
23
  collections = []
@@ -22,7 +26,7 @@ module Bulkrax
22
26
  data = RDF::Writer.for(format).buffer do |writer|
23
27
  reader.each_statement do |statement|
24
28
  collections << statement.object.to_s if collection_field.present? && collection_field == statement.predicate.to_s
25
- children << statement.object.to_s if children_field.present? && children_field == statement.predicate.to_s
29
+ children << statement.object.to_s if related_children_parsed_mapping.present? && related_children_parsed_mapping == statement.predicate.to_s
26
30
  delete = statement.object.to_s if /deleted/.match?(statement.predicate.to_s)
27
31
  writer << statement
28
32
  end
@@ -37,12 +41,13 @@ module Bulkrax
37
41
  }
38
42
  end
39
43
 
40
- def self.collection_field
41
- Bulkrax.collection_field_mapping[self.to_s]
42
- end
44
+ def self.related_children_parsed_mapping
45
+ return @related_children_parsed_mapping if @related_children_parsed_mapping.present?
46
+
47
+ rdf_related_children_field_mapping = Bulkrax.field_mappings['Bulkrax::RdfParser']&.select { |_, h| h.key?('related_children_field_mapping') }
48
+ return if rdf_related_children_field_mapping.blank?
43
49
 
44
- def self.children_field
45
- Bulkrax.parent_child_field_mapping[self.to_s]
50
+ @related_children_parsed_mapping = rdf_related_children_field_mapping&.keys&.first
46
51
  end
47
52
 
48
53
  def record
@@ -50,6 +55,10 @@ module Bulkrax
50
55
  end
51
56
 
52
57
  def build_metadata
58
+ ActiveSupport::Deprecation.warn(
59
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
60
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
61
+ )
53
62
  raise StandardError, 'Record not found' if record.nil?
54
63
  raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
55
64
 
@@ -76,7 +85,7 @@ module Bulkrax
76
85
  self.raw_metadata['collection'].length == self.collection_ids.length
77
86
  end
78
87
 
79
- def find_or_create_collection_ids
88
+ def find_collection_ids
80
89
  return self.collection_ids if collections_created?
81
90
  if self.raw_metadata['collection'].present?
82
91
  self.raw_metadata['collection'].each do |collection|
@@ -39,6 +39,10 @@ module Bulkrax
39
39
  end
40
40
 
41
41
  def build_metadata
42
+ ActiveSupport::Deprecation.warn(
43
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
44
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
45
+ )
42
46
  raise StandardError, 'Record not found' if record.nil?
43
47
  raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
44
48
  self.parsed_metadata = {}