bulkrax 1.0.2 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/app/controllers/bulkrax/exporters_controller.rb +12 -4
  4. data/app/controllers/bulkrax/importers_controller.rb +23 -17
  5. data/app/factories/bulkrax/object_factory.rb +84 -63
  6. data/app/jobs/bulkrax/create_relationships_job.rb +156 -0
  7. data/app/jobs/bulkrax/delete_work_job.rb +6 -2
  8. data/app/jobs/bulkrax/export_work_job.rb +3 -1
  9. data/app/jobs/bulkrax/exporter_job.rb +1 -0
  10. data/app/jobs/bulkrax/{import_work_collection_job.rb → import_collection_job.rb} +4 -2
  11. data/app/jobs/bulkrax/import_file_set_job.rb +69 -0
  12. data/app/jobs/bulkrax/import_work_job.rb +2 -0
  13. data/app/jobs/bulkrax/importer_job.rb +18 -1
  14. data/app/matchers/bulkrax/application_matcher.rb +5 -5
  15. data/app/models/bulkrax/csv_collection_entry.rb +8 -6
  16. data/app/models/bulkrax/csv_entry.rb +132 -65
  17. data/app/models/bulkrax/csv_file_set_entry.rb +26 -0
  18. data/app/models/bulkrax/entry.rb +19 -8
  19. data/app/models/bulkrax/exporter.rb +12 -5
  20. data/app/models/bulkrax/importer.rb +24 -5
  21. data/app/models/bulkrax/oai_entry.rb +5 -1
  22. data/app/models/bulkrax/rdf_entry.rb +16 -7
  23. data/app/models/bulkrax/xml_entry.rb +4 -0
  24. data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +39 -0
  25. data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
  26. data/app/models/concerns/bulkrax/has_matchers.rb +44 -13
  27. data/app/models/concerns/bulkrax/import_behavior.rb +40 -5
  28. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +23 -2
  29. data/app/models/concerns/bulkrax/status_info.rb +4 -4
  30. data/app/parsers/bulkrax/application_parser.rb +67 -84
  31. data/app/parsers/bulkrax/bagit_parser.rb +13 -4
  32. data/app/parsers/bulkrax/csv_parser.rb +170 -64
  33. data/app/parsers/bulkrax/oai_dc_parser.rb +6 -3
  34. data/app/parsers/bulkrax/xml_parser.rb +5 -0
  35. data/app/views/bulkrax/exporters/_form.html.erb +1 -1
  36. data/app/views/bulkrax/exporters/show.html.erb +2 -1
  37. data/app/views/bulkrax/importers/index.html.erb +17 -17
  38. data/app/views/bulkrax/importers/show.html.erb +52 -6
  39. data/config/locales/bulkrax.en.yml +1 -0
  40. data/db/migrate/20190731114016_change_importer_and_exporter_to_polymorphic.rb +5 -1
  41. data/db/migrate/20211004170708_change_bulkrax_statuses_error_message_column_type_to_text.rb +5 -0
  42. data/db/migrate/20211203195233_rename_children_counters_to_relationships.rb +6 -0
  43. data/db/migrate/20211220195027_add_file_set_counters_to_importer_runs.rb +7 -0
  44. data/db/migrate/20220118001339_add_import_attempts_to_entries.rb +5 -0
  45. data/db/migrate/20220119213325_add_work_counters_to_importer_runs.rb +6 -0
  46. data/lib/bulkrax/engine.rb +1 -1
  47. data/lib/bulkrax/version.rb +1 -1
  48. data/lib/bulkrax.rb +9 -17
  49. data/lib/generators/bulkrax/templates/bin/importer +17 -11
  50. data/lib/generators/bulkrax/templates/config/bulkrax_api.yml +3 -1
  51. data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +7 -12
  52. metadata +22 -10
  53. data/app/jobs/bulkrax/child_relationships_job.rb +0 -128
@@ -6,17 +6,34 @@ module Bulkrax
6
6
 
7
7
  def perform(importer_id, only_updates_since_last_import = false)
8
8
  importer = Importer.find(importer_id)
9
+
9
10
  importer.current_run
11
+ unzip_imported_file(importer.parser)
10
12
  import(importer, only_updates_since_last_import)
13
+ update_current_run_counters(importer)
11
14
  schedule(importer) if importer.schedulable?
12
15
  end
13
16
 
14
17
  def import(importer, only_updates_since_last_import)
15
18
  importer.only_updates = only_updates_since_last_import || false
16
19
  return unless importer.valid_import?
20
+
17
21
  importer.import_collections
18
22
  importer.import_works
19
- importer.create_parent_child_relationships unless importer.validate_only
23
+ importer.import_file_sets
24
+ end
25
+
26
+ def unzip_imported_file(parser)
27
+ return unless parser.file? && parser.zip?
28
+
29
+ parser.unzip(parser.parser_fields['import_file_path'])
30
+ end
31
+
32
+ def update_current_run_counters(importer)
33
+ importer.current_run.total_work_entries = importer.limit || importer.parser.works_total
34
+ importer.current_run.total_collection_entries = importer.parser.collections_total
35
+ importer.current_run.total_file_set_entries = importer.parser.file_sets_total
36
+ importer.current_run.save!
20
37
  end
21
38
 
22
39
  def schedule(importer)
@@ -20,9 +20,9 @@ module Bulkrax
20
20
  return unless content.send(self.if[0], Regexp.new(self.if[1]))
21
21
  end
22
22
 
23
- @result = content.to_s.gsub(/\s/, ' ') # remove any line feeds and tabs
24
- @result.strip!
25
- process_split
23
+ # @result will evaluate to an empty string for nil content values
24
+ @result = content.to_s.gsub(/\s/, ' ').strip # remove any line feeds and tabs
25
+ process_split if @result.present?
26
26
  @result = @result[0] if @result.is_a?(Array) && @result.size == 1
27
27
  process_parse
28
28
  return @result
@@ -66,14 +66,14 @@ module Bulkrax
66
66
  end
67
67
 
68
68
  def parse_subject(src)
69
- string = src.to_s.strip.downcase
69
+ string = src.strip.downcase
70
70
  return if string.blank?
71
71
 
72
72
  string.slice(0, 1).capitalize + string.slice(1..-1)
73
73
  end
74
74
 
75
75
  def parse_types(src)
76
- src.to_s.strip.titleize
76
+ src.strip.titleize
77
77
  end
78
78
 
79
79
  # Allow for mapping a model field to the work type or collection
@@ -6,14 +6,16 @@ module Bulkrax
6
6
  Collection
7
7
  end
8
8
 
9
- def build_metadata
10
- self.parsed_metadata = self.raw_metadata
11
- add_local
12
- return self.parsed_metadata
9
+ # Use identifier set by CsvParser#unique_collection_identifier, which falls back
10
+ # on the Collection's first title if record[source_identifier] is not present
11
+ def add_identifier
12
+ self.parsed_metadata[work_identifier] = [self.identifier].flatten
13
13
  end
14
14
 
15
- def collections_created?
16
- true
15
+ def add_collection_type_gid
16
+ return if self.parsed_metadata['collection_type_gid'].present?
17
+
18
+ self.parsed_metadata['collection_type_gid'] = ::Hyrax::CollectionType.find_or_create_default_collection_type.gid
17
19
  end
18
20
  end
19
21
  end
@@ -14,59 +14,71 @@ module Bulkrax
14
14
  def self.read_data(path)
15
15
  raise StandardError, 'CSV path empty' if path.blank?
16
16
  CSV.read(path,
17
- headers: true,
18
- header_converters: :symbol,
19
- encoding: 'utf-8')
17
+ headers: true,
18
+ header_converters: :symbol,
19
+ encoding: 'utf-8')
20
20
  end
21
21
 
22
22
  def self.data_for_entry(data, _source_id)
23
+ ActiveSupport::Deprecation.warn(
24
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
25
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
26
+ )
23
27
  # If a multi-line CSV data is passed, grab the first row
24
28
  data = data.first if data.is_a?(CSV::Table)
25
29
  # model has to be separated so that it doesn't get mistranslated by to_h
26
30
  raw_data = data.to_h
27
- raw_data[:model] = data[:model]
31
+ raw_data[:model] = data[:model] if data[:model].present?
28
32
  # If the collection field mapping is not 'collection', add 'collection' - the parser needs it
29
33
  raw_data[:collection] = raw_data[collection_field.to_sym] if raw_data.keys.include?(collection_field.to_sym) && collection_field != 'collection'
30
- # If the children field mapping is not 'children', add 'children' - the parser needs it
31
- raw_data[:children] = raw_data[collection_field.to_sym] if raw_data.keys.include?(children_field.to_sym) && children_field != 'children'
32
34
  return raw_data
33
35
  end
34
36
 
35
- def self.collection_field
36
- Bulkrax.collection_field_mapping[self.class.to_s] || 'collection'
37
- end
37
+ def build_metadata
38
+ raise StandardError, 'Record not found' if record.nil?
39
+ raise StandardError, "Missing required elements, missing element(s) are: #{importerexporter.parser.missing_elements(keys_without_numbers(record.keys)).join(', ')}" unless importerexporter.parser.required_elements?(keys_without_numbers(record.keys))
38
40
 
39
- def self.children_field
40
- Bulkrax.parent_child_field_mapping[self.to_s] || 'children'
41
- end
41
+ self.parsed_metadata = {}
42
+ add_identifier
43
+ add_visibility
44
+ add_ingested_metadata
45
+ add_metadata_for_model
46
+ add_rights_statement
47
+ add_collections
48
+ add_local
42
49
 
43
- def keys_without_numbers(keys)
44
- keys.map { |key| key_without_numbers(key) }
50
+ self.parsed_metadata
45
51
  end
46
52
 
47
- def key_without_numbers(key)
48
- key.gsub(/_\d+/, '').sub(/^\d+_/, '')
53
+ def add_identifier
54
+ self.parsed_metadata[work_identifier] = [record[source_identifier]]
49
55
  end
50
56
 
51
- def build_metadata
52
- raise StandardError, 'Record not found' if record.nil?
53
- raise StandardError, "Missing required elements, missing element(s) are: #{importerexporter.parser.missing_elements(keys_without_numbers(record.keys)).join(', ')}" unless importerexporter.parser.required_elements?(keys_without_numbers(record.keys))
57
+ def add_metadata_for_model
58
+ if factory_class == Collection
59
+ add_collection_type_gid
60
+ elsif factory_class == FileSet
61
+ add_path_to_file
62
+ validate_presence_of_parent!
63
+ else
64
+ add_file unless importerexporter.metadata_only?
65
+ add_admin_set_id
66
+ end
67
+ end
54
68
 
55
- self.parsed_metadata = {}
56
- self.parsed_metadata[work_identifier] = [record[source_identifier]]
69
+ def add_ingested_metadata
70
+ ActiveSupport::Deprecation.warn(
71
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
72
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
73
+ )
74
+ # we do not want to sort the values in the record before adding the metadata.
75
+ # if we do, the factory_class will be set to the default_work_type for all values that come before "model" or "work type"
57
76
  record.each do |key, value|
58
- next if key == 'collection'
77
+ next if self.parser.collection_field_mapping.to_s == key_without_numbers(key)
59
78
 
60
79
  index = key[/\d+/].to_i - 1 if key[/\d+/].to_i != 0
61
80
  add_metadata(key_without_numbers(key), value, index)
62
81
  end
63
- add_file
64
- add_visibility
65
- add_rights_statement
66
- add_admin_set_id
67
- add_collections
68
- add_local
69
- self.parsed_metadata
70
82
  end
71
83
 
72
84
  def add_file
@@ -76,7 +88,11 @@ module Bulkrax
76
88
  elsif record['file'].is_a?(Array)
77
89
  self.parsed_metadata['file'] = record['file']
78
90
  end
79
- self.parsed_metadata['file'] = self.parsed_metadata['file'].map { |f| path_to_file(f.tr(' ', '_')) }
91
+ self.parsed_metadata['file'] = self.parsed_metadata['file'].map do |f|
92
+ next if f.blank?
93
+
94
+ path_to_file(f.tr(' ', '_'))
95
+ end.compact
80
96
  end
81
97
 
82
98
  def build_export_metadata
@@ -86,10 +102,20 @@ module Bulkrax
86
102
  self.parsed_metadata[source_identifier] = hyrax_record.send(work_identifier)
87
103
  self.parsed_metadata['model'] = hyrax_record.has_model.first
88
104
  build_mapping_metadata
89
- self.parsed_metadata['collections'] = hyrax_record.member_of_collection_ids.join('; ')
90
- unless hyrax_record.is_a?(Collection)
91
- self.parsed_metadata['file'] = hyrax_record.file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact.join('; ')
105
+
106
+ # TODO: fix the "send" parameter in the conditional below
107
+ # currently it returns: "NoMethodError - undefined method 'bulkrax_identifier' for #<Collection:0x00007fbe6a3b4248>"
108
+ if mapping['collection']&.[]('join')
109
+ self.parsed_metadata['collection'] = hyrax_record.member_of_collection_ids.join('; ')
110
+ # self.parsed_metadata['collection'] = hyrax_record.member_of_collections.map { |c| c.send(work_identifier)&.first }.compact.uniq.join(';')
111
+ else
112
+ hyrax_record.member_of_collections.each_with_index do |collection, i|
113
+ self.parsed_metadata["collection_#{i + 1}"] = collection.id
114
+ # self.parsed_metadata["collection_#{i + 1}"] = collection.send(work_identifier)&.first
115
+ end
92
116
  end
117
+
118
+ build_files unless hyrax_record.is_a?(Collection)
93
119
  self.parsed_metadata
94
120
  end
95
121
 
@@ -97,24 +123,51 @@ module Bulkrax
97
123
  mapping.each do |key, value|
98
124
  next if Bulkrax.reserved_properties.include?(key) && !field_supported?(key)
99
125
  next if key == "model"
126
+ next if value['excluded']
100
127
 
101
128
  object_key = key if value.key?('object')
102
129
  next unless hyrax_record.respond_to?(key.to_s) || object_key.present?
103
130
 
104
- data = object_key.present? ? hyrax_record.send(value['object']) : hyrax_record.send(key.to_s)
105
131
  if object_key.present?
106
- next self.parsed_metadata[key] = '' if data.empty?
107
- data = data.first if data.is_a?(ActiveTriples::Relation)
132
+ build_object(value)
133
+ else
134
+ build_value(key, value)
135
+ end
136
+ end
137
+ end
138
+
139
+ def build_object(value)
140
+ data = hyrax_record.send(value['object'])
141
+ return if data.empty?
142
+
143
+ data = data.to_a if data.is_a?(ActiveTriples::Relation)
144
+ object_metadata(Array.wrap(data))
145
+ end
108
146
 
109
- object_metadata(data, object_key)
110
- elsif data.is_a?(ActiveTriples::Relation)
111
- self.parsed_metadata[key] = data.map { |d| prepare_export_data(d) }.join('; ').to_s unless value[:excluded]
147
+ def build_value(key, value)
148
+ data = hyrax_record.send(key.to_s)
149
+ if data.is_a?(ActiveTriples::Relation)
150
+ if value['join']
151
+ self.parsed_metadata[key_for_export(key)] = data.map { |d| prepare_export_data(d) }.join('; ').to_s
112
152
  else
113
- self.parsed_metadata[key] = prepare_export_data(data)
153
+ data.each_with_index do |d, i|
154
+ self.parsed_metadata["#{key_for_export(key)}_#{i + 1}"] = prepare_export_data(d)
155
+ end
114
156
  end
157
+ else
158
+ self.parsed_metadata[key_for_export(key)] = prepare_export_data(data)
115
159
  end
116
160
  end
117
161
 
162
+ # On export the key becomes the from and the from becomes the destination. It is the opposite of the import because we are moving data the opposite direction
163
+ # metadata that does not have a specific Bulkrax entry is mapped to the key name, as matching keys coming in are mapped by the csv parser automatically
164
+ def key_for_export(key)
165
+ clean_key = key_without_numbers(key)
166
+ unnumbered_key = mapping[clean_key] ? mapping[clean_key]['from'].first : clean_key
167
+ # Bring the number back if there is one
168
+ "#{unnumbered_key}#{key.sub(clean_key, '')}"
169
+ end
170
+
118
171
  def prepare_export_data(datum)
119
172
  if datum.is_a?(ActiveTriples::Resource)
120
173
  datum.to_uri.to_s
@@ -123,30 +176,34 @@ module Bulkrax
123
176
  end
124
177
  end
125
178
 
126
- def object_metadata(data, object_key)
127
- data = convert_to_hash(data)
179
+ def object_metadata(data)
180
+ data = data.map { |d| eval(d) }.flatten # rubocop:disable Security/Eval
128
181
 
129
182
  data.each_with_index do |obj, index|
130
- next unless obj[object_key]
131
-
132
- next self.parsed_metadata["#{object_key}_#{index + 1}"] = prepare_export_data(obj[object_key]) unless obj[object_key].is_a?(Array)
183
+ next if obj.nil?
184
+ # allow the object_key to be valid whether it's a string or symbol
185
+ obj = obj.with_indifferent_access
133
186
 
134
- obj[object_key].each_with_index do |_nested_item, nested_index|
135
- self.parsed_metadata["#{object_key}_#{index + 1}_#{nested_index + 1}"] = prepare_export_data(obj[object_key][nested_index])
187
+ obj.each_key do |key|
188
+ if obj[key].is_a?(Array)
189
+ obj[key].each_with_index do |_nested_item, nested_index|
190
+ self.parsed_metadata["#{key_for_export(key)}_#{index + 1}_#{nested_index + 1}"] = prepare_export_data(obj[key][nested_index])
191
+ end
192
+ else
193
+ self.parsed_metadata["#{key_for_export(key)}_#{index + 1}"] = prepare_export_data(obj[key])
194
+ end
136
195
  end
137
196
  end
138
197
  end
139
198
 
140
- def convert_to_hash(data)
141
- # converts data from `'[{}]'` to `[{}]`
142
- gsub_data = data.gsub(/\[{/, '{')
143
- .gsub(/}\]/, '}')
144
- .gsub('=>', ':')
145
- .gsub(/},\s?{/, "}},{{")
146
- .split("},{")
147
- gsub_data = [gsub_data] if gsub_data.is_a?(String)
148
-
149
- return gsub_data.map { |d| JSON.parse(d) }
199
+ def build_files
200
+ if mapping['file']&.[]('join')
201
+ self.parsed_metadata['file'] = hyrax_record.file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact.join('; ')
202
+ else
203
+ hyrax_record.file_sets.each_with_index do |fs, i|
204
+ self.parsed_metadata["file_#{i + 1}"] = filename(fs).to_s if filename(fs).present?
205
+ end
206
+ end
150
207
  end
151
208
 
152
209
  # In order for the existing exported hyrax_record, to be updated by a re-import
@@ -167,18 +224,28 @@ module Bulkrax
167
224
  Bulkrax::CsvMatcher
168
225
  end
169
226
 
227
+ def possible_collection_ids
228
+ ActiveSupport::Deprecation.warn(
229
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
230
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
231
+ )
232
+ @possible_collection_ids ||= record.inject([]) do |memo, (key, value)|
233
+ memo += value.split(/\s*[:;|]\s*/) if self.class.collection_field.to_s == key_without_numbers(key) && value.present?
234
+ memo
235
+ end || []
236
+ end
237
+
170
238
  def collections_created?
171
- return true if record[self.class.collection_field].blank?
172
- record[self.class.collection_field].split(/\s*[:;|]\s*/).length == self.collection_ids.length
239
+ possible_collection_ids.length == self.collection_ids.length
173
240
  end
174
241
 
175
- def find_or_create_collection_ids
242
+ def find_collection_ids
176
243
  return self.collection_ids if collections_created?
177
- valid_system_id(Collection)
178
- if record[self.class.collection_field].present?
179
- record[self.class.collection_field].split(/\s*[:;|]\s*/).each do |collection|
180
- c = find_collection(collection)
181
- self.collection_ids << c.id unless c.blank? || self.collection_ids.include?(c.id)
244
+ if possible_collection_ids.present?
245
+ possible_collection_ids.each do |collection_id|
246
+ c = find_collection(collection_id)
247
+ skip = c.blank? || self.collection_ids.include?(c.id)
248
+ self.collection_ids << c.id unless skip
182
249
  end
183
250
  end
184
251
  self.collection_ids
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class CsvFileSetEntry < CsvEntry
5
+ def factory_class
6
+ ::FileSet
7
+ end
8
+
9
+ def add_path_to_file
10
+ parsed_metadata['file'].each_with_index do |filename, i|
11
+ path_to_file = ::File.join(parser.path_to_files, filename)
12
+
13
+ parsed_metadata['file'][i] = path_to_file
14
+ end
15
+ raise ::StandardError, 'one or more file paths are invalid' unless parsed_metadata['file'].map { |file_path| ::File.file?(file_path) }.all?
16
+
17
+ parsed_metadata['file']
18
+ end
19
+
20
+ def validate_presence_of_parent!
21
+ return if parsed_metadata[related_parents_parsed_mapping]&.map(&:present?)&.any?
22
+
23
+ raise StandardError, 'File set must be related to at least one work'
24
+ end
25
+ end
26
+ end
@@ -23,12 +23,22 @@ module Bulkrax
23
23
 
24
24
  attr_accessor :all_attrs
25
25
 
26
- delegate :parser, :mapping, :replace_files, :update_files, to: :importerexporter
26
+ delegate :parser,
27
+ :mapping,
28
+ :replace_files,
29
+ :update_files,
30
+ :keys_without_numbers,
31
+ :key_without_numbers,
32
+ to: :importerexporter
27
33
 
28
34
  delegate :client,
29
- :collection_name,
30
- :user,
31
- to: :parser
35
+ :collection_name,
36
+ :user,
37
+ :related_parents_raw_mapping,
38
+ :related_parents_parsed_mapping,
39
+ :related_children_raw_mapping,
40
+ :related_children_parsed_mapping,
41
+ to: :parser
32
42
 
33
43
  # Retrieve fields from the file
34
44
  # @param data - the source data
@@ -61,13 +71,13 @@ module Bulkrax
61
71
  end
62
72
 
63
73
  def self.collection_field
74
+ ActiveSupport::Deprecation.warn(
75
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
76
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
77
+ )
64
78
  Bulkrax.collection_field_mapping[self.to_s]
65
79
  end
66
80
 
67
- def self.children_field
68
- Bulkrax.parent_child_field_mapping[self.to_s]
69
- end
70
-
71
81
  def build
72
82
  return if type.nil?
73
83
  self.save if self.new_record? # must be saved for statuses
@@ -96,6 +106,7 @@ module Bulkrax
96
106
  end
97
107
 
98
108
  def find_collection(collection_identifier)
109
+ return unless Collection.properties.keys.include?(work_identifier)
99
110
  Collection.where(
100
111
  work_identifier => collection_identifier
101
112
  ).detect { |m| m.send(work_identifier).include?(collection_identifier) }
@@ -14,7 +14,7 @@ module Bulkrax
14
14
  validates :name, presence: true
15
15
  validates :parser_klass, presence: true
16
16
 
17
- delegate :write, :create_from_collection, :create_from_importer, :create_from_worktype, to: :parser
17
+ delegate :write, :create_from_collection, :create_from_importer, :create_from_worktype, :create_from_all, to: :parser
18
18
 
19
19
  def export
20
20
  current_run && setup_export_path
@@ -25,6 +25,8 @@ module Bulkrax
25
25
  create_from_importer
26
26
  when 'worktype'
27
27
  create_from_worktype
28
+ when 'all'
29
+ create_from_all
28
30
  end
29
31
  rescue StandardError => e
30
32
  status_info(e)
@@ -77,7 +79,8 @@ module Bulkrax
77
79
  [
78
80
  [I18n.t('bulkrax.exporter.labels.importer'), 'importer'],
79
81
  [I18n.t('bulkrax.exporter.labels.collection'), 'collection'],
80
- [I18n.t('bulkrax.exporter.labels.worktype'), 'worktype']
82
+ [I18n.t('bulkrax.exporter.labels.worktype'), 'worktype'],
83
+ [I18n.t('bulkrax.exporter.labels.all'), 'all']
81
84
  ]
82
85
  end
83
86
 
@@ -105,18 +108,22 @@ module Bulkrax
105
108
  end
106
109
 
107
110
  def exporter_export_path
108
- @exporter_export_path ||= File.join(Bulkrax.export_path, self.id.to_s, self.exporter_runs.last.id.to_s)
111
+ @exporter_export_path ||= File.join(parser.base_path('export'), self.id.to_s, self.exporter_runs.last.id.to_s)
109
112
  end
110
113
 
111
114
  def exporter_export_zip_path
112
- @exporter_export_zip_path ||= File.join(Bulkrax.export_path, "export_#{self.id}_#{self.exporter_runs.last.id}.zip")
115
+ @exporter_export_zip_path ||= File.join(parser.base_path('export'), "export_#{self.id}_#{self.exporter_runs.last.id}.zip")
113
116
  rescue
114
- @exporter_export_zip_path ||= File.join(Bulkrax.export_path, "export_#{self.id}_0.zip")
117
+ @exporter_export_zip_path ||= File.join(parser.base_path('export'), "export_#{self.id}_0.zip")
115
118
  end
116
119
 
117
120
  def export_properties
118
121
  properties = Hyrax.config.registered_curation_concern_types.map { |work| work.constantize.properties.keys }.flatten.uniq.sort
119
122
  properties.reject { |prop| Bulkrax.reserved_properties.include?(prop) }
120
123
  end
124
+
125
+ def metadata_only?
126
+ export_type == 'metadata'
127
+ end
121
128
  end
122
129
  end
@@ -18,8 +18,7 @@ module Bulkrax
18
18
  validates :admin_set_id, presence: true
19
19
  validates :parser_klass, presence: true
20
20
 
21
- delegate :valid_import?, :create_parent_child_relationships,
22
- :write_errored_entries_file, :visibility, to: :parser
21
+ delegate :valid_import?, :write_errored_entries_file, :visibility, to: :parser
23
22
 
24
23
  attr_accessor :only_updates, :file_style, :file
25
24
  attr_writer :current_run
@@ -97,7 +96,16 @@ module Bulkrax
97
96
  end
98
97
 
99
98
  def current_run
100
- @current_run ||= self.importer_runs.create!(total_work_entries: self.limit || parser.total, total_collection_entries: parser.collections_total)
99
+ @current_run ||= if file? && zip?
100
+ self.importer_runs.create!
101
+ else
102
+ entry_counts = {
103
+ total_work_entries: self.limit || parser.works_total,
104
+ total_collection_entries: parser.collections_total,
105
+ total_file_set_entries: parser.file_sets_total
106
+ }
107
+ self.importer_runs.create!(entry_counts)
108
+ end
101
109
  end
102
110
 
103
111
  def last_run
@@ -131,6 +139,13 @@ module Bulkrax
131
139
  status_info(e)
132
140
  end
133
141
 
142
+ def import_file_sets
143
+ self.save if self.new_record? # Object needs to be saved for statuses
144
+ parser.create_file_sets
145
+ rescue StandardError => e
146
+ status_info(e)
147
+ end
148
+
134
149
  # Prepend the base_url to ensure unique set identifiers
135
150
  # @todo - move to parser, as this is OAI specific
136
151
  def unique_collection_identifier(id)
@@ -149,11 +164,11 @@ module Bulkrax
149
164
 
150
165
  # If the import data is zipped, unzip it to this path
151
166
  def importer_unzip_path
152
- @importer_unzip_path ||= File.join(Bulkrax.import_path, "import_#{path_string}")
167
+ @importer_unzip_path ||= File.join(parser.base_path, "import_#{path_string}")
153
168
  end
154
169
 
155
170
  def errored_entries_csv_path
156
- @errored_entries_csv_path ||= File.join(Bulkrax.import_path, "import_#{path_string}_errored_entries.csv")
171
+ @errored_entries_csv_path ||= File.join(parser.base_path, "import_#{path_string}_errored_entries.csv")
157
172
  end
158
173
 
159
174
  def path_string
@@ -161,5 +176,9 @@ module Bulkrax
161
176
  rescue
162
177
  "#{self.id}_#{self.created_at.strftime('%Y%m%d%H%M%S')}"
163
178
  end
179
+
180
+ def metadata_only?
181
+ parser.parser_fields['metadata_only'] == true
182
+ end
164
183
  end
165
184
  end
@@ -26,6 +26,10 @@ module Bulkrax
26
26
  end
27
27
 
28
28
  def build_metadata
29
+ ActiveSupport::Deprecation.warn(
30
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
31
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
32
+ )
29
33
  self.parsed_metadata = {}
30
34
  self.parsed_metadata[work_identifier] = [record.header.identifier]
31
35
 
@@ -56,7 +60,7 @@ module Bulkrax
56
60
  # Retrieve list of collections for the entry; add to collection_ids
57
61
  # If OAI-PMH doesn't return setSpec in the headers for GetRecord, use parser.collection_name
58
62
  # in this case, if 'All' is selected, records will not be added to a collection.
59
- def find_or_create_collection_ids
63
+ def find_collection_ids
60
64
  return self.collection_ids if collections_created?
61
65
  if sets.blank? || parser.collection_name != 'all'
62
66
  # c = Collection.where(Bulkrax.system_identifier_field => importerexporter.unique_collection_identifier(parser.collection_name)).first
@@ -14,6 +14,10 @@ module Bulkrax
14
14
  end
15
15
 
16
16
  def self.data_for_entry(data, source_id)
17
+ ActiveSupport::Deprecation.warn(
18
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
19
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
20
+ )
17
21
  reader = data
18
22
  format = reader.class.format.to_sym
19
23
  collections = []
@@ -22,7 +26,7 @@ module Bulkrax
22
26
  data = RDF::Writer.for(format).buffer do |writer|
23
27
  reader.each_statement do |statement|
24
28
  collections << statement.object.to_s if collection_field.present? && collection_field == statement.predicate.to_s
25
- children << statement.object.to_s if children_field.present? && children_field == statement.predicate.to_s
29
+ children << statement.object.to_s if related_children_parsed_mapping.present? && related_children_parsed_mapping == statement.predicate.to_s
26
30
  delete = statement.object.to_s if /deleted/.match?(statement.predicate.to_s)
27
31
  writer << statement
28
32
  end
@@ -37,12 +41,13 @@ module Bulkrax
37
41
  }
38
42
  end
39
43
 
40
- def self.collection_field
41
- Bulkrax.collection_field_mapping[self.to_s]
42
- end
44
+ def self.related_children_parsed_mapping
45
+ return @related_children_parsed_mapping if @related_children_parsed_mapping.present?
46
+
47
+ rdf_related_children_field_mapping = Bulkrax.field_mappings['Bulkrax::RdfParser']&.select { |_, h| h.key?('related_children_field_mapping') }
48
+ return if rdf_related_children_field_mapping.blank?
43
49
 
44
- def self.children_field
45
- Bulkrax.parent_child_field_mapping[self.to_s]
50
+ @related_children_parsed_mapping = rdf_related_children_field_mapping&.keys&.first
46
51
  end
47
52
 
48
53
  def record
@@ -50,6 +55,10 @@ module Bulkrax
50
55
  end
51
56
 
52
57
  def build_metadata
58
+ ActiveSupport::Deprecation.warn(
59
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
60
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
61
+ )
53
62
  raise StandardError, 'Record not found' if record.nil?
54
63
  raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
55
64
 
@@ -76,7 +85,7 @@ module Bulkrax
76
85
  self.raw_metadata['collection'].length == self.collection_ids.length
77
86
  end
78
87
 
79
- def find_or_create_collection_ids
88
+ def find_collection_ids
80
89
  return self.collection_ids if collections_created?
81
90
  if self.raw_metadata['collection'].present?
82
91
  self.raw_metadata['collection'].each do |collection|
@@ -39,6 +39,10 @@ module Bulkrax
39
39
  end
40
40
 
41
41
  def build_metadata
42
+ ActiveSupport::Deprecation.warn(
43
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
44
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
45
+ )
42
46
  raise StandardError, 'Record not found' if record.nil?
43
47
  raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
44
48
  self.parsed_metadata = {}