bulkrax 9.3.4 → 9.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +11 -1
  3. data/app/assets/javascripts/bulkrax/application.js +2 -1
  4. data/app/assets/javascripts/bulkrax/bulkrax.js +13 -4
  5. data/app/assets/javascripts/bulkrax/bulkrax_utils.js +96 -0
  6. data/app/assets/javascripts/bulkrax/datatables.js +1 -0
  7. data/app/assets/javascripts/bulkrax/entries.js +17 -10
  8. data/app/assets/javascripts/bulkrax/importers.js.erb +9 -2
  9. data/app/assets/javascripts/bulkrax/importers_stepper.js +2420 -0
  10. data/app/assets/stylesheets/bulkrax/application.css +1 -1
  11. data/app/assets/stylesheets/bulkrax/import_export.scss +9 -2
  12. data/app/assets/stylesheets/bulkrax/stepper/_header.scss +83 -0
  13. data/app/assets/stylesheets/bulkrax/stepper/_mixins.scss +26 -0
  14. data/app/assets/stylesheets/bulkrax/stepper/_navigation.scss +103 -0
  15. data/app/assets/stylesheets/bulkrax/stepper/_responsive.scss +46 -0
  16. data/app/assets/stylesheets/bulkrax/stepper/_review.scss +92 -0
  17. data/app/assets/stylesheets/bulkrax/stepper/_settings.scss +106 -0
  18. data/app/assets/stylesheets/bulkrax/stepper/_success.scss +26 -0
  19. data/app/assets/stylesheets/bulkrax/stepper/_summary.scss +171 -0
  20. data/app/assets/stylesheets/bulkrax/stepper/_upload.scss +339 -0
  21. data/app/assets/stylesheets/bulkrax/stepper/_validation.scss +237 -0
  22. data/app/assets/stylesheets/bulkrax/stepper/_variables.scss +46 -0
  23. data/app/assets/stylesheets/bulkrax/stepper.scss +32 -0
  24. data/app/controllers/bulkrax/guided_imports_controller.rb +175 -0
  25. data/app/controllers/bulkrax/importers_controller.rb +34 -28
  26. data/app/controllers/concerns/bulkrax/guided_import_demo_scenarios.rb +201 -0
  27. data/app/controllers/concerns/bulkrax/importer_file_handler.rb +217 -0
  28. data/app/factories/bulkrax/object_factory.rb +3 -2
  29. data/app/factories/bulkrax/valkyrie_object_factory.rb +61 -17
  30. data/app/jobs/bulkrax/export_work_job.rb +1 -3
  31. data/app/jobs/bulkrax/importer_job.rb +11 -4
  32. data/app/models/bulkrax/csv_entry.rb +27 -7
  33. data/app/models/bulkrax/entry.rb +4 -0
  34. data/app/models/bulkrax/importer.rb +31 -1
  35. data/app/models/concerns/bulkrax/has_matchers.rb +2 -2
  36. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +6 -5
  37. data/app/parsers/bulkrax/application_parser.rb +31 -5
  38. data/app/parsers/bulkrax/csv_parser.rb +42 -10
  39. data/app/parsers/concerns/bulkrax/csv_parser/csv_template_generation.rb +73 -0
  40. data/app/parsers/concerns/bulkrax/csv_parser/csv_validation.rb +133 -0
  41. data/app/parsers/concerns/bulkrax/csv_parser/csv_validation_helpers.rb +282 -0
  42. data/app/parsers/concerns/bulkrax/csv_parser/csv_validation_hierarchy.rb +96 -0
  43. data/app/services/bulkrax/csv_template/column_builder.rb +60 -0
  44. data/app/services/bulkrax/csv_template/column_descriptor.rb +58 -0
  45. data/app/services/bulkrax/csv_template/csv_builder.rb +83 -0
  46. data/app/services/bulkrax/csv_template/explanation_builder.rb +57 -0
  47. data/app/services/bulkrax/csv_template/field_analyzer.rb +56 -0
  48. data/app/services/bulkrax/csv_template/file_path_generator.rb +47 -0
  49. data/app/services/bulkrax/csv_template/file_validator.rb +68 -0
  50. data/app/services/bulkrax/csv_template/mapping_manager.rb +55 -0
  51. data/app/services/bulkrax/csv_template/model_loader.rb +50 -0
  52. data/app/services/bulkrax/csv_template/row_builder.rb +35 -0
  53. data/app/services/bulkrax/csv_template/schema_analyzer.rb +70 -0
  54. data/app/services/bulkrax/csv_template/split_formatter.rb +44 -0
  55. data/app/services/bulkrax/csv_template/value_determiner.rb +68 -0
  56. data/app/services/bulkrax/stepper_response_formatter.rb +347 -0
  57. data/app/services/bulkrax/validation_error_csv_builder.rb +99 -0
  58. data/app/validators/bulkrax/csv_row/child_reference.rb +56 -0
  59. data/app/validators/bulkrax/csv_row/circular_reference.rb +71 -0
  60. data/app/validators/bulkrax/csv_row/controlled_vocabulary.rb +74 -0
  61. data/app/validators/bulkrax/csv_row/duplicate_identifier.rb +63 -0
  62. data/app/validators/bulkrax/csv_row/missing_source_identifier.rb +31 -0
  63. data/app/validators/bulkrax/csv_row/parent_reference.rb +59 -0
  64. data/app/validators/bulkrax/csv_row/required_values.rb +64 -0
  65. data/app/views/bulkrax/entries/_parsed_metadata.html.erb +1 -1
  66. data/app/views/bulkrax/entries/_raw_metadata.html.erb +1 -1
  67. data/app/views/bulkrax/entries/show.html.erb +6 -6
  68. data/app/views/bulkrax/exporters/_form.html.erb +19 -43
  69. data/app/views/bulkrax/exporters/edit.html.erb +2 -2
  70. data/app/views/bulkrax/exporters/index.html.erb +5 -5
  71. data/app/views/bulkrax/exporters/new.html.erb +3 -5
  72. data/app/views/bulkrax/exporters/show.html.erb +3 -3
  73. data/app/views/bulkrax/guided_imports/new.html.erb +567 -0
  74. data/app/views/bulkrax/importers/_bagit_fields.html.erb +9 -9
  75. data/app/views/bulkrax/importers/_browse_everything.html.erb +1 -1
  76. data/app/views/bulkrax/importers/_csv_fields.html.erb +11 -11
  77. data/app/views/bulkrax/importers/_edit_form_buttons.html.erb +23 -23
  78. data/app/views/bulkrax/importers/_edit_item_buttons.html.erb +2 -2
  79. data/app/views/bulkrax/importers/_file_uploader.html.erb +3 -3
  80. data/app/views/bulkrax/importers/_form.html.erb +4 -5
  81. data/app/views/bulkrax/importers/_oai_fields.html.erb +8 -18
  82. data/app/views/bulkrax/importers/_xml_fields.html.erb +13 -13
  83. data/app/views/bulkrax/importers/edit.html.erb +2 -2
  84. data/app/views/bulkrax/importers/index.html.erb +19 -14
  85. data/app/views/bulkrax/importers/new.html.erb +10 -9
  86. data/app/views/bulkrax/importers/show.html.erb +23 -7
  87. data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +6 -6
  88. data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +11 -11
  89. data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +3 -3
  90. data/config/i18n-tasks.yml +195 -0
  91. data/config/locales/bulkrax.de.yml +504 -0
  92. data/config/locales/bulkrax.en.yml +487 -28
  93. data/config/locales/bulkrax.es.yml +504 -0
  94. data/config/locales/bulkrax.fr.yml +504 -0
  95. data/config/locales/bulkrax.it.yml +504 -0
  96. data/config/locales/bulkrax.pt-BR.yml +504 -0
  97. data/config/locales/bulkrax.zh.yml +503 -0
  98. data/config/routes.rb +10 -0
  99. data/lib/bulkrax/data/demo_scenarios.json +2235 -0
  100. data/lib/bulkrax/version.rb +1 -1
  101. data/lib/bulkrax.rb +31 -3
  102. data/lib/tasks/bulkrax_tasks.rake +0 -102
  103. metadata +55 -3
  104. /data/{app/services → lib}/wings/custom_queries/find_by_source_identifier.rb +0 -0
@@ -0,0 +1,217 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ # rubocop:disable Metrics/ModuleLength
5
+ module ImporterFileHandler
6
+ extend ActiveSupport::Concern
7
+
8
+ private
9
+
10
+ # Resolves files for validation from either a server-side file path, pre-uploaded Hyrax files, or direct upload params
11
+ # @return [Array<(Array<File>, nil)>] on success, a tuple of [files, nil]
12
+ # @return [Array<(nil, Hash)>] on error, a tuple of [nil, error_response]
13
+ def resolve_validation_files
14
+ if import_via_file_path?
15
+ return [nil, StepperResponseFormatter.error(message: I18n.t('bulkrax.importer.guided_import.validation.file_path_not_exist'))] unless File.exist?(import_file_path)
16
+
17
+ [[File.open(import_file_path)], nil]
18
+ elsif params[:uploaded_files].present?
19
+ resolve_hyrax_uploaded_files
20
+ else
21
+ files = params[:importer]&.[](:parser_fields)&.[](:files) || []
22
+ files = [files] unless files.is_a?(Array)
23
+ [files.compact, nil]
24
+ end
25
+ end
26
+
27
+ # Loads files from Hyrax::UploadedFile IDs (used by chunked upload flow).
28
+ # Scoped to current_user to prevent accessing another user's uploads.
29
+ def resolve_hyrax_uploaded_files
30
+ uploads = uploaded_files_scope
31
+ return [nil, StepperResponseFormatter.error(message: 'No uploaded files found for the given IDs')] if uploads.empty?
32
+
33
+ files = uploads.filter_map do |u|
34
+ path = u.file&.path
35
+ next nil unless path && File.exist?(path)
36
+ File.open(path)
37
+ end
38
+ [files, nil]
39
+ rescue StandardError => e
40
+ Rails.logger.error("Bulkrax: error loading Hyrax uploaded files: #{e.class}: #{e.message}")
41
+ Rails.logger.debug { e.full_message }
42
+ [nil, StepperResponseFormatter.error(message: 'Failed to load uploaded files')]
43
+ end
44
+
45
+ def uploaded_files_scope
46
+ return [] unless defined?(::Hyrax)
47
+
48
+ base = Hyrax::UploadedFile.where(id: params[:uploaded_files])
49
+ if respond_to?(:current_user) && current_user.present?
50
+ base.where(user_id: current_user.id)
51
+ else
52
+ base.none
53
+ end
54
+ end
55
+
56
+ def resolve_create_files
57
+ if params[:uploaded_files].present?
58
+ uploads = uploaded_files_scope
59
+ uploads.filter_map do |u|
60
+ path = u.file&.path
61
+ next nil unless path && File.exist?(path)
62
+ File.open(path)
63
+ end
64
+ else
65
+ extract_uploaded_files
66
+ end
67
+ end
68
+
69
+ def extract_uploaded_files
70
+ files_param = params[:importer]&.[](:parser_fields)&.[](:files)
71
+ return [] if files_param.blank?
72
+
73
+ files_param.is_a?(Array) ? files_param.compact : [files_param].compact
74
+ end
75
+
76
+ # Scans the given files for a CSV and a ZIP by file extension
77
+ # @param files [Array<File, ActionDispatch::Http::UploadedFile>] the resolved files to search
78
+ # @return [Array<(File, nil), (nil, File), (File, File), (nil, nil)>] a tuple of [csv_file, zip_file]; either may be nil
79
+ def select_csv_and_zip(files)
80
+ csv_file = files.find { |f| filename_for(f)&.end_with?('.csv') }
81
+ zip_file = files.find { |f| filename_for(f)&.end_with?('.zip') }
82
+ [csv_file, zip_file]
83
+ end
84
+
85
+ # Returns a filename from any file-like object (ActionDispatch upload, File, or Tempfile)
86
+ def filename_for(file)
87
+ if file.respond_to?(:original_filename)
88
+ file.original_filename
89
+ elsif file.respond_to?(:path)
90
+ file.path
91
+ end
92
+ end
93
+
94
+ # Opens a ZIP and extracts the CSV content into a StringIO while the archive is open
95
+ # @param zip_file [File] the ZIP file to search
96
+ # @return [Array<(StringIO, nil)>] on success, a tuple of [csv_file, nil]
97
+ # @return [Array<(nil, Hash)>] on error, a tuple of [nil, error_response]
98
+ def extract_csv_from_zip(zip_file)
99
+ csv_file = nil
100
+ error = nil
101
+ Zip::File.open(zip_file.path) do |zip|
102
+ result = locate_csv_entry_in_zip(zip)
103
+ if result.is_a?(Hash) && result[:messages]
104
+ error = result
105
+ elsif result
106
+ csv_file = StringIO.new(result.get_input_stream.read)
107
+ end
108
+ end
109
+ [csv_file, error]
110
+ end
111
+
112
+ # Finds a CSV entry in a ZIP by traversing directory levels, preferring the shallowest
113
+ # @param zip [Zip::File] the open ZIP archive to search
114
+ # @return [Zip::Entry] the CSV entry on success
115
+ # @return [Hash] an error response hash if no CSV is found or multiple CSVs are ambiguous
116
+ def locate_csv_entry_in_zip(zip)
117
+ csv_entries = group_entries_by_directory_level(zip)
118
+
119
+ return StepperResponseFormatter.error(message: I18n.t('bulkrax.importer.guided_import.validation.no_csv_in_zip')) if csv_entries.empty?
120
+
121
+ csv_by_depth = get_directory_depth_for_each_csv(csv_entries)
122
+ csvs_at_level = determine_csvs_at_shallowest_level(csv_by_depth)
123
+
124
+ csvs_by_directory = csvs_at_level.group_by { |entry| File.dirname(entry.name) }
125
+ csvs_by_directory.each do |_dir, csvs|
126
+ return StepperResponseFormatter.error(message: I18n.t('bulkrax.importer.guided_import.validation.multiple_csv_same_dir')) if csvs.count > 1
127
+ end
128
+
129
+ return StepperResponseFormatter.error(message: I18n.t('bulkrax.importer.guided_import.validation.multiple_csv_same_level')) if csvs_at_level.size > 1
130
+
131
+ csvs_at_level.first
132
+ end
133
+
134
+ def group_entries_by_directory_level(zip)
135
+ zip.select { |entry| entry.name.end_with?('.csv') && !entry.directory? }
136
+ end
137
+
138
+ def get_directory_depth_for_each_csv(entries)
139
+ entries.group_by { |entry| entry.name.count('/') }
140
+ end
141
+
142
+ def determine_csvs_at_shallowest_level(csv_by_depth)
143
+ shallowest_depth = csv_by_depth.keys.min
144
+ csv_by_depth[shallowest_depth]
145
+ end
146
+
147
+ # Persists uploaded file(s) and/or cloud files onto the importer record.
148
+ # @param file [ActionDispatch::Http::UploadedFile, nil] a directly uploaded file
149
+ # @param cloud_files [Hash, nil] cloud file paths from browse-everything
150
+ # @param uploads [ActiveRecord::Relation, Array, nil] Hyrax::UploadedFile records
151
+ def files_for_import(file, cloud_files, uploads)
152
+ return if file.blank? && cloud_files.blank? && uploads.blank?
153
+
154
+ @importer[:parser_fields]['import_file_path'] = @importer.parser.write_import_file(file) if file.present?
155
+ if cloud_files.present?
156
+ @importer[:parser_fields]['cloud_file_paths'] = cloud_files
157
+ # For BagIt, there will only be one bag, so we get the file_path back and set import_file_path
158
+ # For CSV, we expect only file uploads, so we won't get the file_path back
159
+ # and we expect the import_file_path to be set already
160
+ target = @importer.parser.retrieve_cloud_files(cloud_files, @importer)
161
+ @importer[:parser_fields]['import_file_path'] = target if target.present?
162
+ end
163
+
164
+ if uploads.present?
165
+ uploads.each do |upload|
166
+ @importer[:parser_fields]['import_file_path'] = @importer.parser.write_import_file(upload.file.file)
167
+ end
168
+ end
169
+
170
+ @importer.save
171
+ end
172
+
173
+ def write_files(files)
174
+ csv_file, zip_file = select_csv_and_zip(files)
175
+
176
+ csv_path = write_file_if_present(csv_file)
177
+ zip_path = write_file_if_present(zip_file)
178
+
179
+ return unless csv_path || zip_path
180
+
181
+ # Determine import_file_path: prefer CSV, fallback to ZIP
182
+ @importer.parser_fields['import_file_path'] = csv_path || zip_path
183
+ @importer.parser_fields['attachments_zip_path'] = zip_path if zip_path && csv_path
184
+
185
+ @importer.save
186
+ rescue StandardError => e
187
+ Rails.logger.error("Bulkrax::ImporterFileHandler#write_files failed: #{e.message}")
188
+ raise
189
+ end
190
+
191
+ def write_file_if_present(file)
192
+ return nil unless file
193
+
194
+ if file.respond_to?(:original_filename)
195
+ @importer.parser.write_import_file(file)
196
+ else
197
+ dest_path = File.join(@importer.parser.path_for_import, File.basename(file.path))
198
+ FileUtils.cp(file.path, dest_path)
199
+ dest_path
200
+ end
201
+ end
202
+
203
+ def close_file_handles(files)
204
+ return unless files.is_a?(Array)
205
+ files.each { |f| f.close if f.respond_to?(:close) }
206
+ end
207
+
208
+ def import_via_file_path?
209
+ import_file_path.present?
210
+ end
211
+
212
+ def import_file_path
213
+ @file_path ||= params[:importer]&.[](:parser_fields)&.[](:import_file_path)
214
+ end
215
+ end
216
+ # rubocop:enable Metrics/ModuleLength
217
+ end
@@ -70,14 +70,15 @@ module Bulkrax
70
70
  properties.reject { |prop| Bulkrax.reserved_properties.include?(prop) }
71
71
  end
72
72
 
73
- def self.field_multi_value?(field:, model:)
73
+ # Unused admin set included to support flexible contexts in the Valkyrie version
74
+ def self.field_multi_value?(field:, model:, admin_set_id: nil) # rubocop:disable Lint/UnusedMethodArgument
74
75
  return false unless field_supported?(field: field, model: model)
75
76
  return false unless model.singleton_methods.include?(:properties)
76
77
 
77
78
  model&.properties&.[](field)&.[]("multiple")
78
79
  end
79
80
 
80
- def self.field_supported?(field:, model:)
81
+ def self.field_supported?(field:, model:, admin_set_id: nil) # rubocop:disable Lint/UnusedMethodArgument
81
82
  model.method_defined?(field) && model.properties[field].present?
82
83
  end
83
84
 
@@ -120,11 +120,11 @@ module Bulkrax
120
120
  save!(resource: resource, user: user)
121
121
  end
122
122
 
123
- def self.field_multi_value?(field:, model:)
124
- return false unless field_supported?(field: field, model: model)
123
+ def self.field_multi_value?(field:, model:, admin_set_id: nil)
124
+ return false unless field_supported?(field: field, model: model, admin_set_id: admin_set_id)
125
125
 
126
126
  if model.respond_to?(:schema)
127
- schema = model.new.singleton_class.schema || model.schema
127
+ schema = cached_schema_for(klass: model, admin_set_id: admin_set_id)
128
128
  dry_type = schema.key(field.to_sym)
129
129
  return true if dry_type.respond_to?(:primitive) && dry_type.primitive == Array
130
130
 
@@ -134,9 +134,9 @@ module Bulkrax
134
134
  end
135
135
  end
136
136
 
137
- def self.field_supported?(field:, model:)
137
+ def self.field_supported?(field:, model:, admin_set_id: nil)
138
138
  if model.respond_to?(:schema)
139
- schema_properties(model).include?(field)
139
+ schema_properties(klass: model, admin_set_id: admin_set_id).include?(field)
140
140
  else
141
141
  # We *might* have a Fedora object, so we need to consider that approach as
142
142
  # well.
@@ -272,17 +272,34 @@ module Bulkrax
272
272
  # rubocop:enable Metrics/ParameterLists
273
273
 
274
274
  ##
275
- # Retrieve properties from M3 model
276
- # @param klass the model
275
+ # Retrieve schema property names for a model, respecting admin set contexts
276
+ # when using flexible metadata. Delegates context resolution to Hyrax so
277
+ # Bulkrax does not need to know about HYRAX_FLEXIBLE or contexts.
278
+ #
279
+ # @param klass [Class] the model class
280
+ # @param admin_set_id [String, nil] admin set used to resolve contexts
277
281
  # @return [Array<String>]
278
- def self.schema_properties(klass)
279
- @schema_properties_map ||= {}
280
-
281
- klass_key = klass.name
282
- schema = klass.new.singleton_class.schema || klass.schema
283
- @schema_properties_map[klass_key] = schema.map { |k| k.name.to_s } unless @schema_properties_map.key?(klass_key)
282
+ def self.schema_properties(klass:, admin_set_id: nil)
283
+ cached_schema_for(klass: klass, admin_set_id: admin_set_id).map { |k| k.name.to_s }
284
+ end
284
285
 
285
- @schema_properties_map[klass_key]
286
+ ##
287
+ # Returns the schema for a model, memoized per (klass, admin_set_id) pair.
288
+ # Delegates to +Hyrax.schema_for+ when available so that context-gated
289
+ # properties are included without Bulkrax knowing about flexibility internals.
290
+ #
291
+ # @param klass [Class]
292
+ # @param admin_set_id [String, nil]
293
+ # @return [Dry::Types::Hash]
294
+ def self.cached_schema_for(klass:, admin_set_id: nil)
295
+ @cached_schema_map ||= {}
296
+ key = [klass.name, admin_set_id].compact.join('|')
297
+ @cached_schema_map[key] ||=
298
+ if admin_set_id.present? && defined?(Hyrax) && Hyrax.respond_to?(:schema_for)
299
+ Hyrax.schema_for(klass: klass, admin_set_id: admin_set_id)
300
+ else
301
+ klass.new.singleton_class.schema || klass.schema
302
+ end
286
303
  end
287
304
 
288
305
  def self.ordered_file_sets_for(object)
@@ -457,7 +474,9 @@ module Bulkrax
457
474
  # TODO What do we return when the calculated form fails?
458
475
  # @raise [StandardError] when there was a failure calling the translation.
459
476
  def perform_transaction_for(object:, attrs:)
460
- form = Hyrax::Forms::ResourceForm.for(object).prepopulate!
477
+ admin_set_id = attrs[:admin_set_id] || attrs['admin_set_id'] ||
478
+ attributes[:admin_set_id] || attributes['admin_set_id']
479
+ form = Hyrax::Forms::ResourceForm.for(resource: object, admin_set_id: admin_set_id).prepopulate!
461
480
 
462
481
  # TODO: Handle validations
463
482
  form.validate(attrs)
@@ -474,13 +493,15 @@ module Bulkrax
474
493
  end
475
494
 
476
495
  ##
477
- # We accept attributes based on the model schema
496
+ # We accept attributes based on the model schema. Passes the admin set ID
497
+ # so that context-restricted properties are included in the permitted list.
478
498
  #
479
499
  # @return [Array<Symbols>]
480
500
  def permitted_attributes
481
501
  @permitted_attributes ||= (
482
502
  base_permitted_attributes + if klass.respond_to?(:schema)
483
- Bulkrax::ValkyrieObjectFactory.schema_properties(klass)
503
+ admin_set_id = attributes[:admin_set_id] || attributes['admin_set_id']
504
+ Bulkrax::ValkyrieObjectFactory.schema_properties(klass: klass, admin_set_id: admin_set_id)
484
505
  else
485
506
  klass.properties.keys.map(&:to_sym)
486
507
  end
@@ -590,6 +611,29 @@ module Bulkrax
590
611
  .symbolize_keys
591
612
 
592
613
  attrs[:title] = [] if attrs[:title].blank?
614
+ attrs = convert_based_near_to_attributes(attrs)
615
+ attrs
616
+ end
617
+
618
+ # Hyrax's ResourceForm strips the plain `based_near` key during validation
619
+ # (BasedNearFieldBehavior#deserialize calls params.except('based_near')).
620
+ # Values must be passed as `based_near_attributes` — a numbered hash of
621
+ # { "0" => { "id" => uri, "_destroy" => "false" } } — so the populator
622
+ # can set them. Hyrax accepts any valid URI; note that only GeoNames URIs
623
+ # will resolve to a display label via LocationService.
624
+ def convert_based_near_to_attributes(attrs)
625
+ values = Array.wrap(attrs.delete(:based_near)).reject(&:blank?)
626
+ return attrs if values.empty?
627
+
628
+ invalid = values.reject { |v| v.to_s.match?(::URI::DEFAULT_PARSER.make_regexp) }
629
+ if invalid.any?
630
+ raise ::StandardError, "Invalid value(s) for location (based_near): #{invalid.join(', ')}. " \
631
+ "Values must be valid URIs (e.g. http://sws.geonames.org/5128581/)."
632
+ end
633
+
634
+ attrs[:based_near_attributes] = values.each_with_index.to_h do |uri, i|
635
+ [i.to_s, { "id" => uri.to_s, "_destroy" => "false" }]
636
+ end
593
637
  attrs
594
638
  end
595
639
  end
@@ -18,12 +18,10 @@ module Bulkrax
18
18
  else
19
19
  if entry.failed?
20
20
  ExporterRun.increment_counter(:failed_records, args[1])
21
- ExporterRun.decrement_counter(:enqueued_records, args[1]) unless exporter_run.reload.enqueued_records <= 0
22
- raise entry.reload.current_status.error_class.constantize
23
21
  else
24
22
  ExporterRun.increment_counter(:processed_records, args[1])
25
- ExporterRun.decrement_counter(:enqueued_records, args[1]) unless exporter_run.reload.enqueued_records <= 0
26
23
  end
24
+ ExporterRun.decrement_counter(:enqueued_records, args[1]) unless exporter_run.reload.enqueued_records <= 0
27
25
  # rubocop:enable Rails/SkipsModelValidations
28
26
  end
29
27
  return entry if exporter_run.reload.enqueued_records.positive?
@@ -27,10 +27,17 @@ module Bulkrax
27
27
  end
28
28
 
29
29
  def unzip_imported_file(parser)
30
- return unless parser.file? && parser.zip?
31
-
32
- parser.unzip(parser.parser_fields['import_file_path'])
33
- parser.remove_spaces_from_filenames
30
+ return unless parser.file?
31
+ if parser.zip?
32
+ # we have a zip file, and we need to unzip it before we can import the files
33
+ parser.unzip(parser.parser_fields['import_file_path'])
34
+ parser.remove_spaces_from_filenames
35
+ elsif parser.zip_file?(parser.parser_fields['attachments_zip_path'])
36
+ # we have a separate csv and zip file. We need to unzip the zip file, and move the csv file to the unzip location before we can import the files
37
+ parser.unzip(parser.parser_fields['attachments_zip_path'])
38
+ parser.copy_file(parser.parser_fields['import_file_path'])
39
+ parser.remove_spaces_from_filenames
40
+ end
34
41
  end
35
42
 
36
43
  def update_current_run_counters(importer)
@@ -45,7 +45,12 @@ module Bulkrax
45
45
  encoding: 'utf-8'
46
46
  }.merge(csv_read_data_options)
47
47
 
48
- results = CSV.read(path, **options)
48
+ results = if path.respond_to?(:read)
49
+ path.rewind if path.respond_to?(:rewind)
50
+ CSV.parse(path.read, **options)
51
+ else
52
+ CSV.read(path, **options)
53
+ end
49
54
  csv_wrapper_class.new(results)
50
55
  end
51
56
 
@@ -83,9 +88,10 @@ module Bulkrax
83
88
  # model has to be separated so that it doesn't get mistranslated by to_h
84
89
  raw_data = data.to_h
85
90
  raw_data[:model] = data[:model] if data[:model].present?
86
- # If the collection field mapping is not 'collection', add 'collection' - the parser needs it
87
- # TODO: change to :parents
88
- raw_data[:parents] = raw_data[parent_field(parser).to_sym] if raw_data.keys.include?(parent_field(parser).to_sym) && parent_field(parser) != 'parents'
91
+ # If the parents/children field mapping uses a custom column name, alias it to the standard key
92
+ # so downstream code can find it regardless of what the CSV column is named.
93
+ raw_data[:parents] = raw_data[parser.related_parents_raw_mapping.to_sym] if parser.related_parents_raw_mapping.present? && raw_data.key?(parser.related_parents_raw_mapping.to_sym) && parser.related_parents_raw_mapping != 'parents'
94
+ raw_data[:children] = raw_data[parser.related_children_raw_mapping.to_sym] if parser.related_children_raw_mapping.present? && raw_data.key?(parser.related_children_raw_mapping.to_sym) && parser.related_children_raw_mapping != 'children'
89
95
  return raw_data
90
96
  end
91
97
 
@@ -416,18 +422,32 @@ module Bulkrax
416
422
  self.collection_ids
417
423
  end
418
424
 
419
- # If only filename is given, construct the path (/files/my_file)
425
+ # If only filename is given, construct the path (/files/my_file).
426
+ # If file contains a path separator (e.g. attachments/cat_scan.jpg), resolve relative to the CSV's directory.
420
427
  def path_to_file(file)
421
- # return if we already have the full file path
422
428
  return file if File.exist?(file)
429
+
430
+ # Relative path: resolve from CSV's directory (allows arbitrary subdirectory names, not just "files")
431
+ return resolve_relative_file_path(file) if file.include?('/')
432
+
433
+ # Bare filename: use legacy files/ directory for backward compatibility and round-tripping
423
434
  path = importerexporter.parser.path_to_files
435
+ raise "Could not determine path to files directory. Ensure the import package contains a zip or a valid import_file_path." if path.nil?
436
+
424
437
  f = File.join(path, file)
425
438
  return f if File.exist?(f)
426
- raise "File #{f} does not exist"
439
+ raise "File not found: #{f}. Check the file column in your CSV and ensure the file exists in the import package or path_to_files directory."
427
440
  end
428
441
 
429
442
  private
430
443
 
444
+ def resolve_relative_file_path(file)
445
+ base = File.dirname(importerexporter.parser.import_file_path)
446
+ candidate = File.join(base, file)
447
+ return candidate if File.exist?(candidate)
448
+ raise "File not found: #{candidate}. Check the file path in your CSV and ensure the file exists in the import package or directory."
449
+ end
450
+
431
451
  def map_file_sets(file_sets)
432
452
  # rubocop:disable Rails/Presence
433
453
  file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact
@@ -93,6 +93,10 @@ module Bulkrax
93
93
  parser.related_parents_parsed_mapping
94
94
  end
95
95
 
96
+ def self.child_field(parser)
97
+ parser.related_children_parsed_mapping
98
+ end
99
+
96
100
  def build
97
101
  return if type.nil?
98
102
  self.save if self.new_record? # must be saved for statuses
@@ -169,6 +169,30 @@ module Bulkrax
169
169
  import_file_path if original_file?
170
170
  end
171
171
 
172
+ # Returns all available original files (CSV and ZIP if present)
173
+ # @return [Array<Hash>] Array of hashes with :path and :name keys
174
+ def original_files
175
+ files = []
176
+
177
+ if import_file_path && File.exist?(import_file_path)
178
+ files << {
179
+ path: import_file_path,
180
+ name: File.basename(import_file_path),
181
+ type: :csv
182
+ }
183
+ end
184
+
185
+ if parser_fields['attachments_zip_path'] && File.exist?(parser_fields['attachments_zip_path'])
186
+ files << {
187
+ path: parser_fields['attachments_zip_path'],
188
+ name: File.basename(parser_fields['attachments_zip_path']),
189
+ type: :zip
190
+ }
191
+ end
192
+
193
+ files
194
+ end
195
+
172
196
  def replace_files
173
197
  self.parser_fields['replace_files']
174
198
  end
@@ -241,8 +265,14 @@ module Bulkrax
241
265
  # [['Single Metadata File for all works', 'single'], ['Multiple Files, one per Work', 'multi']]
242
266
  # end
243
267
 
244
- # If the import data is zipped, unzip it to this path
245
268
  def importer_unzip_path(mkdir: false)
269
+ entry = parser_fields&.[]('import_file_path')
270
+ if entry.is_a?(String) && entry.end_with?('.zip') && File.file?(entry) && parser_fields["file_style"] != I18n.t('bulkrax.importer.xml.file_style.server_path')
271
+ unzip_dir = File.dirname(entry)
272
+ FileUtils.mkdir_p(unzip_dir) if mkdir
273
+ return unzip_dir
274
+ end
275
+
246
276
  @importer_unzip_path ||= File.join(parser.base_path, "import_#{path_string}")
247
277
  return @importer_unzip_path if Dir.exist?(@importer_unzip_path) || mkdir == true
248
278
 
@@ -131,7 +131,7 @@ module Bulkrax
131
131
  return false if excluded?(field)
132
132
  return true if supported_bulkrax_fields.include?(field)
133
133
 
134
- Bulkrax.object_factory.field_supported?(field: field, model: factory_class)
134
+ Bulkrax.object_factory.field_supported?(field: field, model: factory_class, admin_set_id: importerexporter.try(:admin_set_id))
135
135
  end
136
136
 
137
137
  def supported_bulkrax_fields
@@ -145,7 +145,7 @@ module Bulkrax
145
145
  return true if fields_that_are_always_singular.include?(field.to_s)
146
146
  return false if fields_that_are_always_multiple.include?(field.to_s)
147
147
 
148
- Bulkrax.object_factory.field_multi_value?(field: field, model: factory_class)
148
+ Bulkrax.object_factory.field_multi_value?(field: field, model: factory_class, admin_set_id: importerexporter.try(:admin_set_id))
149
149
  end
150
150
 
151
151
  def fields_that_are_always_multiple
@@ -51,15 +51,16 @@ module Bulkrax
51
51
  # Is this a zip file?
52
52
  def zip?
53
53
  filename = parser_fields&.[]('import_file_path')
54
- return false unless filename
55
- return false unless File.file?(filename)
54
+ return false unless filename && File.file?(filename)
55
+ zip_file?(filename)
56
+ end
56
57
 
57
- returning_value = false
58
+ def zip_file?(filename)
59
+ return false unless filename && File.file?(filename)
58
60
  File.open(filename) do |file|
59
61
  mime_type = ::Marcel::MimeType.for(name: file)
60
- returning_value = mime_type.include?('application/zip') || mime_type.include?('application/gzip')
62
+ mime_type.include?('application/zip') || mime_type.include?('application/gzip')
61
63
  end
62
- returning_value
63
64
  end
64
65
  end
65
66
  end
@@ -12,7 +12,7 @@ module Bulkrax
12
12
  :seen, :increment_counters, :parser_fields, :user, :keys_without_numbers,
13
13
  :key_without_numbers, :status, :set_status_info, :status_info, :status_at,
14
14
  :exporter_export_path, :exporter_export_zip_path, :importer_unzip_path, :validate_only,
15
- :zip?, :file?, :remove_and_rerun,
15
+ :zip?, :file?, :remove_and_rerun, :zip_file?,
16
16
  to: :importerexporter
17
17
 
18
18
  # @todo Convert to `class_attribute :parser_fiels, default: {}`
@@ -434,14 +434,40 @@ module Bulkrax
434
434
  return untar(file_to_unzip) if file_to_unzip.end_with?('.tar.gz')
435
435
 
436
436
  Zip::File.open(file_to_unzip) do |zip_file|
437
+ real_entries = zip_file.reject { |e| macos_junk_entry?(e.name) }
438
+ top_level_dirs = real_entries.map { |e| e.name.split('/').first }.uniq
439
+ strip_prefix = top_level_dirs.size == 1 ? "#{top_level_dirs.first}/" : nil
440
+
441
+ dest_dir = importer_unzip_path(mkdir: true)
437
442
  zip_file.each do |entry|
438
- entry_path = File.join(importer_unzip_path(mkdir: true), entry.name)
439
- FileUtils.mkdir_p(File.dirname(entry_path))
440
- zip_file.extract(entry, entry_path) unless File.exist?(entry_path)
443
+ next unless entry.file?
444
+ next if macos_junk_entry?(entry.name)
445
+ name = strip_prefix ? entry.name.delete_prefix(strip_prefix) : entry.name
446
+ next if name.empty?
447
+ dest_path = File.join(dest_dir, name)
448
+ FileUtils.mkdir_p(File.dirname(dest_path))
449
+ unless File.exist?(dest_path)
450
+ # rubyzip 2.x: extract(entry, absolute_dest_path)
451
+ # rubyzip 3.x: extract(entry, relative_name, destination_directory: dir)
452
+ if zip_file.method(:extract).arity == 2
453
+ zip_file.extract(entry, dest_path)
454
+ else
455
+ zip_file.extract(entry, name, destination_directory: dest_dir)
456
+ end
457
+ end
441
458
  end
442
459
  end
443
460
  end
444
461
 
462
+ def macos_junk_entry?(name)
463
+ name.start_with?('__MACOSX/') || name.split('/').any? { |part| part == '.DS_Store' || part.start_with?('._') }
464
+ end
465
+
466
+ def copy_file(file_to_copy)
467
+ destination = File.join(importer_unzip_path(mkdir: true), File.basename(file_to_copy))
468
+ FileUtils.cp(file_to_copy, destination)
469
+ end
470
+
445
471
  def untar(file_to_untar)
446
472
  Dir.mkdir(importer_unzip_path(mkdir: true)) unless File.directory?(importer_unzip_path(mkdir: true))
447
473
  command = "tar -xzf #{Shellwords.escape(file_to_untar)} -C #{Shellwords.escape(importer_unzip_path)}"
@@ -452,7 +478,7 @@ module Bulkrax
452
478
  # File names referenced in CSVs have spaces replaced with underscores
453
479
  # @see Bulkrax::CsvParser#file_paths
454
480
  def remove_spaces_from_filenames
455
- files = Dir.glob(File.join(importer_unzip_path, 'files', '*'))
481
+ files = Dir.glob(File.join(importer_unzip_path, 'files', '*')).uniq
456
482
  files_with_spaces = files.select { |f| f.split('/').last.match?(' ') }
457
483
  return if files_with_spaces.blank?
458
484