bulkrax 9.3.5 → 9.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +11 -1
  3. data/app/assets/javascripts/bulkrax/application.js +2 -1
  4. data/app/assets/javascripts/bulkrax/bulkrax.js +13 -4
  5. data/app/assets/javascripts/bulkrax/bulkrax_utils.js +96 -0
  6. data/app/assets/javascripts/bulkrax/datatables.js +1 -0
  7. data/app/assets/javascripts/bulkrax/entries.js +17 -10
  8. data/app/assets/javascripts/bulkrax/importers.js.erb +9 -2
  9. data/app/assets/javascripts/bulkrax/importers_stepper.js +2420 -0
  10. data/app/assets/stylesheets/bulkrax/application.css +1 -1
  11. data/app/assets/stylesheets/bulkrax/stepper/_header.scss +83 -0
  12. data/app/assets/stylesheets/bulkrax/stepper/_mixins.scss +26 -0
  13. data/app/assets/stylesheets/bulkrax/stepper/_navigation.scss +103 -0
  14. data/app/assets/stylesheets/bulkrax/stepper/_responsive.scss +46 -0
  15. data/app/assets/stylesheets/bulkrax/stepper/_review.scss +92 -0
  16. data/app/assets/stylesheets/bulkrax/stepper/_settings.scss +106 -0
  17. data/app/assets/stylesheets/bulkrax/stepper/_success.scss +26 -0
  18. data/app/assets/stylesheets/bulkrax/stepper/_summary.scss +171 -0
  19. data/app/assets/stylesheets/bulkrax/stepper/_upload.scss +339 -0
  20. data/app/assets/stylesheets/bulkrax/stepper/_validation.scss +237 -0
  21. data/app/assets/stylesheets/bulkrax/stepper/_variables.scss +46 -0
  22. data/app/assets/stylesheets/bulkrax/stepper.scss +32 -0
  23. data/app/controllers/bulkrax/guided_imports_controller.rb +175 -0
  24. data/app/controllers/bulkrax/importers_controller.rb +28 -31
  25. data/app/controllers/concerns/bulkrax/guided_import_demo_scenarios.rb +201 -0
  26. data/app/controllers/concerns/bulkrax/importer_file_handler.rb +212 -0
  27. data/app/errors/bulkrax/unzip_error.rb +16 -0
  28. data/app/factories/bulkrax/object_factory.rb +3 -2
  29. data/app/factories/bulkrax/valkyrie_object_factory.rb +61 -17
  30. data/app/jobs/bulkrax/importer_job.rb +42 -4
  31. data/app/models/bulkrax/csv_entry.rb +27 -7
  32. data/app/models/bulkrax/entry.rb +4 -0
  33. data/app/models/bulkrax/importer.rb +27 -10
  34. data/app/models/concerns/bulkrax/has_matchers.rb +2 -2
  35. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +6 -5
  36. data/app/parsers/bulkrax/application_parser.rb +63 -20
  37. data/app/parsers/bulkrax/bagit_parser.rb +12 -0
  38. data/app/parsers/bulkrax/csv_parser.rb +168 -25
  39. data/app/parsers/concerns/bulkrax/csv_parser/csv_template_generation.rb +73 -0
  40. data/app/parsers/concerns/bulkrax/csv_parser/csv_validation.rb +133 -0
  41. data/app/parsers/concerns/bulkrax/csv_parser/csv_validation_helpers.rb +282 -0
  42. data/app/parsers/concerns/bulkrax/csv_parser/csv_validation_hierarchy.rb +96 -0
  43. data/app/services/bulkrax/csv_template/column_builder.rb +60 -0
  44. data/app/services/bulkrax/csv_template/column_descriptor.rb +58 -0
  45. data/app/services/bulkrax/csv_template/csv_builder.rb +83 -0
  46. data/app/services/bulkrax/csv_template/explanation_builder.rb +57 -0
  47. data/app/services/bulkrax/csv_template/field_analyzer.rb +56 -0
  48. data/app/services/bulkrax/csv_template/file_path_generator.rb +47 -0
  49. data/app/services/bulkrax/csv_template/file_validator.rb +68 -0
  50. data/app/services/bulkrax/csv_template/mapping_manager.rb +55 -0
  51. data/app/services/bulkrax/csv_template/model_loader.rb +50 -0
  52. data/app/services/bulkrax/csv_template/row_builder.rb +35 -0
  53. data/app/services/bulkrax/csv_template/schema_analyzer.rb +70 -0
  54. data/app/services/bulkrax/csv_template/split_formatter.rb +44 -0
  55. data/app/services/bulkrax/csv_template/value_determiner.rb +68 -0
  56. data/app/services/bulkrax/stepper_response_formatter.rb +347 -0
  57. data/app/services/bulkrax/validation_error_csv_builder.rb +99 -0
  58. data/app/validators/bulkrax/csv_row/child_reference.rb +56 -0
  59. data/app/validators/bulkrax/csv_row/circular_reference.rb +71 -0
  60. data/app/validators/bulkrax/csv_row/controlled_vocabulary.rb +74 -0
  61. data/app/validators/bulkrax/csv_row/duplicate_identifier.rb +63 -0
  62. data/app/validators/bulkrax/csv_row/missing_source_identifier.rb +31 -0
  63. data/app/validators/bulkrax/csv_row/parent_reference.rb +59 -0
  64. data/app/validators/bulkrax/csv_row/required_values.rb +64 -0
  65. data/app/views/bulkrax/guided_imports/new.html.erb +567 -0
  66. data/app/views/bulkrax/importers/index.html.erb +6 -1
  67. data/app/views/bulkrax/importers/new.html.erb +1 -1
  68. data/app/views/bulkrax/importers/show.html.erb +17 -1
  69. data/config/i18n-tasks.yml +195 -0
  70. data/config/locales/bulkrax.de.yml +508 -0
  71. data/config/locales/bulkrax.en.yml +463 -233
  72. data/config/locales/bulkrax.es.yml +508 -0
  73. data/config/locales/bulkrax.fr.yml +508 -0
  74. data/config/locales/bulkrax.it.yml +508 -0
  75. data/config/locales/bulkrax.pt-BR.yml +508 -0
  76. data/config/locales/bulkrax.zh.yml +507 -0
  77. data/config/routes.rb +10 -1
  78. data/lib/bulkrax/data/demo_scenarios.json +2235 -0
  79. data/lib/bulkrax/version.rb +1 -1
  80. data/lib/bulkrax.rb +31 -0
  81. metadata +56 -16
  82. data/app/services/bulkrax/sample_csv_service/column_builder.rb +0 -58
  83. data/app/services/bulkrax/sample_csv_service/column_descriptor.rb +0 -56
  84. data/app/services/bulkrax/sample_csv_service/csv_builder.rb +0 -82
  85. data/app/services/bulkrax/sample_csv_service/explanation_builder.rb +0 -51
  86. data/app/services/bulkrax/sample_csv_service/field_analyzer.rb +0 -54
  87. data/app/services/bulkrax/sample_csv_service/file_path_generator.rb +0 -16
  88. data/app/services/bulkrax/sample_csv_service/mapping_manager.rb +0 -36
  89. data/app/services/bulkrax/sample_csv_service/model_loader.rb +0 -40
  90. data/app/services/bulkrax/sample_csv_service/row_builder.rb +0 -33
  91. data/app/services/bulkrax/sample_csv_service/schema_analyzer.rb +0 -69
  92. data/app/services/bulkrax/sample_csv_service/split_formatter.rb +0 -42
  93. data/app/services/bulkrax/sample_csv_service/value_determiner.rb +0 -67
  94. data/app/services/bulkrax/sample_csv_service.rb +0 -78
  95. /data/{app/services → lib}/wings/custom_queries/find_by_source_identifier.rb +0 -0
@@ -0,0 +1,175 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class GuidedImportsController < ::Bulkrax::ApplicationController
5
+ include Hyrax::ThemedLayoutController if defined?(::Hyrax)
6
+ include Bulkrax::GuidedImportDemoScenarios if Bulkrax.config.guided_import_demo_scenarios_enabled
7
+ include Bulkrax::ImporterFileHandler
8
+ helper Bulkrax::ImportersHelper
9
+
10
+ before_action :authenticate_user!
11
+ before_action :check_permissions
12
+ with_themed_layout 'dashboard' if defined?(::Hyrax)
13
+
14
+ # trigger form to allow upload
15
+ def new
16
+ @importer = Importer.new
17
+ return unless defined?(::Hyrax)
18
+ add_importer_breadcrumbs
19
+ add_breadcrumb I18n.t('bulkrax.importer.guided_import.breadcrumb')
20
+ end
21
+
22
+ # AJAX endpoint to validate uploaded files
23
+ def validate
24
+ set_locale_from_params
25
+
26
+ files, error = resolve_validation_files
27
+ return render json: error, status: :ok if error
28
+ return render json: StepperResponseFormatter.error(message: I18n.t('bulkrax.importer.guided_import.validation.no_files_uploaded')), status: :ok unless files.any?
29
+
30
+ csv_file, zip_file = select_csv_and_zip(files)
31
+
32
+ unless csv_file
33
+ return render json: StepperResponseFormatter.error(message: I18n.t('bulkrax.importer.guided_import.validation.no_csv_uploaded')), status: :ok unless zip_file
34
+
35
+ csv_file, error = extract_csv_from_zip(zip_file)
36
+ return render json: error, status: :ok if error
37
+ end
38
+
39
+ admin_set_id = params[:importer]&.[](:admin_set_id)
40
+ validation_result = run_validation(csv_file, zip_file, admin_set_id: admin_set_id)
41
+ raw_csv_data = validation_result.delete(:raw_csv_data)
42
+ cache_key = cache_validation_errors(validation_result, raw_csv_data, csv_file)
43
+ formatted = StepperResponseFormatter.format(validation_result)
44
+ formatted[:validationErrorsCacheKey] = cache_key
45
+ render json: formatted, status: :ok
46
+ ensure
47
+ close_file_handles(files)
48
+ end
49
+
50
+ def download_validation_errors
51
+ cache_key = params[:key].to_s
52
+ expected_prefix = "guided_import_errors:#{session.id}:"
53
+ return head :not_found unless cache_key.start_with?(expected_prefix)
54
+
55
+ cached = Rails.cache.read(cache_key)
56
+ return head :not_found unless cached
57
+
58
+ csv = ValidationErrorCsvBuilder.build(
59
+ headers: cached[:headers],
60
+ csv_data: cached[:csv_data],
61
+ row_errors: cached[:row_errors],
62
+ file_errors: cached[:file_errors]
63
+ )
64
+ send_data csv, filename: error_csv_filename(cached[:original_filename]), type: 'text/csv', disposition: 'attachment'
65
+ end
66
+
67
+ def create
68
+ files = nil
69
+ files = resolve_create_files
70
+ return render_invalid_uploaded_files_response if params[:uploaded_files].present? && files.empty?
71
+
72
+ @importer = Importer.new(importer_params)
73
+ @importer.parser_klass = 'Bulkrax::CsvParser'
74
+ @importer.user = current_user if respond_to?(:current_user) && current_user.present?
75
+ apply_field_mapping
76
+
77
+ if @importer.save
78
+ write_files(files)
79
+ Bulkrax::ImporterJob.perform_later(@importer.id)
80
+
81
+ respond_to do |format|
82
+ format.html { redirect_to bulkrax.importers_path, notice: I18n.t('bulkrax.importer.guided_import.flash.import_started') }
83
+ format.json { render json: { success: true, importer_id: @importer.id }, status: :created }
84
+ end
85
+ else
86
+ respond_to do |format|
87
+ format.html { render :new, status: :unprocessable_entity }
88
+ format.json { render json: { errors: @importer.errors.full_messages }, status: :unprocessable_entity }
89
+ end
90
+ end
91
+ ensure
92
+ close_file_handles(files)
93
+ end
94
+
95
+ private
96
+
97
+ def render_invalid_uploaded_files_response
98
+ respond_to do |format|
99
+ format.html { render :new, status: :unprocessable_entity }
100
+ format.json { render json: { errors: ['No valid uploaded files found'] }, status: :unprocessable_entity }
101
+ end
102
+ end
103
+
104
+ # Runs validation via the real service.
105
+ # @param csv_file [File, StringIO] the CSV to validate
106
+ # @param zip_file [File, nil] an optional ZIP containing file attachments
107
+ # @param admin_set_id [String, nil] optional admin set ID for validation context
108
+ # @return [Hash] validation result data
109
+ def cache_validation_errors(validation_result, raw_csv_data, csv_file)
110
+ has_errors = validation_result[:rowErrors]&.any? ||
111
+ validation_result[:missingRequired]&.any? ||
112
+ validation_result[:unrecognized]&.any? ||
113
+ validation_result[:emptyColumns]&.any? ||
114
+ validation_result[:missingFiles]&.any?
115
+ return nil unless has_errors
116
+
117
+ key = "guided_import_errors:#{session.id}:#{Time.now.to_i}"
118
+ Rails.cache.write(
119
+ key,
120
+ {
121
+ headers: validation_result[:headers],
122
+ csv_data: raw_csv_data,
123
+ row_errors: validation_result[:rowErrors] || [],
124
+ file_errors: {
125
+ missing_required: validation_result[:missingRequired] || [],
126
+ unrecognized: validation_result[:unrecognized] || {},
127
+ empty_columns: validation_result[:emptyColumns] || [],
128
+ missing_files: validation_result[:missingFiles] || []
129
+ },
130
+ original_filename: filename_for(csv_file)
131
+ },
132
+ expires_in: 1.hour
133
+ )
134
+ key
135
+ end
136
+
137
+ def run_validation(csv_file, zip_file, admin_set_id: nil)
138
+ CsvParser.validate_csv(csv_file: csv_file, zip_file: zip_file, admin_set_id: admin_set_id)
139
+ end
140
+
141
+ def importer_params
142
+ params.require(:importer).permit(
143
+ :name,
144
+ :admin_set_id,
145
+ :limit,
146
+ parser_fields: [:visibility, :rights_statement, :override_rights_statement, :import_file_path, :file_style]
147
+ )
148
+ end
149
+
150
+ def apply_field_mapping
151
+ @importer.field_mapping = Bulkrax.field_mappings['Bulkrax::CsvParser']
152
+ end
153
+
154
+ def error_csv_filename(original_filename)
155
+ return 'import_errors.csv' if original_filename.blank?
156
+
157
+ base = File.basename(original_filename, '.*')
158
+ "#{base}_errors.csv"
159
+ end
160
+
161
+ def set_locale_from_params
162
+ I18n.locale = params[:locale] if params[:locale].present? && I18n.available_locales.include?(params[:locale].to_sym)
163
+ end
164
+
165
+ def add_importer_breadcrumbs
166
+ add_breadcrumb t(:'hyrax.controls.home'), main_app.root_path
167
+ add_breadcrumb t(:'hyrax.dashboard.breadcrumbs.admin'), hyrax.dashboard_path
168
+ add_breadcrumb 'Importers', bulkrax.importers_path
169
+ end
170
+
171
+ def check_permissions
172
+ raise CanCan::AccessDenied unless current_ability.can_import_works?
173
+ end
174
+ end
175
+ end
@@ -8,6 +8,7 @@ module Bulkrax
8
8
  include Bulkrax::API
9
9
  include Bulkrax::DatatablesBehavior
10
10
  include Bulkrax::ValidationHelper
11
+ include Bulkrax::ImporterFileHandler
11
12
 
12
13
  protect_from_forgery unless: -> { api_request? }
13
14
  before_action :token_authenticate!, if: -> { api_request? }, only: [:create, :update, :delete]
@@ -28,7 +29,8 @@ module Bulkrax
28
29
  end
29
30
 
30
31
  def importer_table
31
- @importers = Importer.order(table_order).page(table_page).per(table_per_page)
32
+ order = table_order.presence || Arel.sql('last_imported_at DESC NULLS LAST')
33
+ @importers = Importer.order(order).page(table_page).per(table_per_page)
32
34
  @importers = @importers.where(importer_table_search) if importer_table_search.present?
33
35
  respond_to do |format|
34
36
  format.json { render json: format_importers(@importers) }
@@ -65,10 +67,11 @@ module Bulkrax
65
67
  end
66
68
  end
67
69
 
68
- # POST /importers/sample_csv_file
70
+ # GET /importers/sample_csv_file
69
71
  def sample_csv_file
70
- sample = Bulkrax::SampleCsvService.call(model_name: 'all', output: 'file')
71
- send_file sample, filename: File.basename(sample), type: 'text/csv'
72
+ admin_set_id = params[:admin_set_id].presence
73
+ sample = Bulkrax::CsvParser.generate_template(models: 'all', output: 'file', admin_set_id: admin_set_id)
74
+ send_file sample, filename: File.basename(sample), type: 'text/csv', disposition: 'attachment'
72
75
  rescue StandardError => e
73
76
  flash[:error] = "Unable to generate sample CSV file: #{e.message}"
74
77
  redirect_back fallback_location: bulkrax.importers_path
@@ -93,7 +96,7 @@ module Bulkrax
93
96
  if api_request?
94
97
  return return_json_response unless valid_create_params?
95
98
  end
96
- uploads = Hyrax::UploadedFile.find(params[:uploaded_files]) if params[:uploaded_files].present?
99
+ uploads = uploaded_files_scope
97
100
  file = file_param
98
101
  cloud_files = cloud_params
99
102
 
@@ -132,7 +135,7 @@ module Bulkrax
132
135
  if api_request?
133
136
  return return_json_response unless valid_update_params?
134
137
  end
135
- uploads = Hyrax::UploadedFile.find(params[:uploaded_files]) if params[:uploaded_files].present?
138
+ uploads = uploaded_files_scope
136
139
  file = file_param
137
140
  cloud_files = cloud_params
138
141
 
@@ -214,10 +217,26 @@ module Bulkrax
214
217
  end
215
218
 
216
219
  def original_file
217
- if @importer.original_file?
218
- send_file @importer.original_file
219
- else
220
+ file_type = params[:file_type]&.to_sym
221
+
222
+ files = @importer.original_files
223
+ if files.empty?
220
224
  redirect_to @importer, alert: 'Importer does not support file re-download or the imported file is not found on the server.'
225
+ return
226
+ end
227
+
228
+ # If file_type is specified, find that specific file
229
+ if file_type
230
+ file = files.find { |f| f[:type] == file_type }
231
+ if file
232
+ send_file file[:path], filename: file[:name], disposition: 'attachment'
233
+ else
234
+ redirect_to @importer, alert: "File type '#{file_type}' not found."
235
+ end
236
+ else
237
+ # Default behavior: send the first file (CSV) for backward compatibility
238
+ file = files.first
239
+ send_file file[:path], filename: file[:name], disposition: 'attachment'
221
240
  end
222
241
  end
223
242
 
@@ -230,28 +249,6 @@ module Bulkrax
230
249
 
231
250
  private
232
251
 
233
- def files_for_import(file, cloud_files, uploads)
234
- return if file.blank? && cloud_files.blank? && uploads.blank?
235
-
236
- @importer[:parser_fields]['import_file_path'] = @importer.parser.write_import_file(file) if file.present?
237
- if cloud_files.present?
238
- @importer[:parser_fields]['cloud_file_paths'] = cloud_files
239
- # For BagIt, there will only be one bag, so we get the file_path back and set import_file_path
240
- # For CSV, we expect only file uploads, so we won't get the file_path back
241
- # and we expect the import_file_path to be set already
242
- target = @importer.parser.retrieve_cloud_files(cloud_files, @importer)
243
- @importer[:parser_fields]['import_file_path'] = target if target.present?
244
- end
245
-
246
- if uploads.present?
247
- uploads.each do |upload|
248
- @importer[:parser_fields]['import_file_path'] = @importer.parser.write_import_file(upload.file.file)
249
- end
250
- end
251
-
252
- @importer.save
253
- end
254
-
255
252
  # Use callbacks to share common setup or constraints between actions.
256
253
  def set_importer
257
254
  @importer = Importer.find(params[:id] || params[:importer_id])
@@ -0,0 +1,201 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ # rubocop:disable Metrics/ModuleLength
5
+ module GuidedImportDemoScenarios
6
+ extend ActiveSupport::Concern
7
+
8
+ # Serve demo scenario fixtures for frontend testing
9
+ def demo_scenarios
10
+ file_path = Bulkrax::Engine.root.join('lib', 'bulkrax', 'data', 'demo_scenarios.json')
11
+ if File.exist?(file_path)
12
+ render json: File.read(file_path), status: :ok
13
+ else
14
+ render json: { error: I18n.t('bulkrax.importer.guided_import.flash.demo_not_available') }, status: :not_found
15
+ end
16
+ end
17
+
18
+ private
19
+
20
+ def run_validation(csv_file, zip_file, admin_set_id: nil)
21
+ if ENV['DEMO_MODE'] == 'true'
22
+ generate_validation_response(csv_file, zip_file)
23
+ else
24
+ super
25
+ end
26
+ end
27
+
28
+ # rubocop:disable Metrics/MethodLength
29
+ # Hardcoded mock response generator for demo mode
30
+ def generate_validation_response(_csv_file, zip_file)
31
+ # Generate mock collections
32
+ collections = [
33
+ { id: 'col-1', title: 'Historical Photographs Collection', type: 'collection', parentIds: [], childrenIds: ['work-shared-1'] },
34
+ { id: 'col-2', title: 'Manuscripts & Letters', type: 'collection', parentIds: [], childrenIds: [] },
35
+ { id: 'col-3', title: 'Audio Recordings', type: 'collection', parentIds: [], childrenIds: ['work-shared-2'] }
36
+ ]
37
+
38
+ # Generate mock works
39
+ works = []
40
+ 189.times do |i|
41
+ parent_ids = if i < 75
42
+ ['col-1']
43
+ elsif i < 140
44
+ ['col-2']
45
+ elsif i < 189
46
+ ['col-3']
47
+ end
48
+
49
+ works << {
50
+ id: "work-#{i + 1}",
51
+ title: "Work #{i + 1}",
52
+ type: 'work',
53
+ parentIds: parent_ids
54
+ }
55
+ end
56
+
57
+ # Multi-parent examples
58
+ works << { id: 'work-shared-1', title: 'Cross-Collection Photograph', type: 'work', parentIds: ['col-1', 'col-2'] }
59
+ works << { id: 'work-shared-2', title: 'Interdisciplinary Recording', type: 'work', parentIds: ['col-2', 'col-3'] }
60
+
61
+ # Generate mock file sets
62
+ file_sets = []
63
+ 55.times do |i|
64
+ file_sets << {
65
+ id: "fs-#{i + 1}",
66
+ title: "FileSet #{i + 1}",
67
+ type: 'file_set'
68
+ }
69
+ end
70
+
71
+ # Mock headers with one unrecognized field
72
+ headers = ['source_identifier', 'title', 'creator', 'model', 'parents', 'children', 'file', 'description', 'date_created', 'legacy_id', 'subject']
73
+ unrecognized = ['legacy_id']
74
+ missing_required = []
75
+ missing_files = ['photo_087.tiff', 'letter_scan_12.pdf', 'recording_03.wav']
76
+ zip_included = zip_file.present?
77
+
78
+ {
79
+ headers: headers,
80
+ missingRequired: missing_required,
81
+ unrecognized: unrecognized,
82
+ rowCount: 247,
83
+ isValid: true,
84
+ hasWarnings: true,
85
+ collections: collections,
86
+ works: works,
87
+ fileSets: file_sets,
88
+ totalItems: collections.length + works.length + file_sets.length,
89
+ fileReferences: 55,
90
+ missingFiles: missing_files,
91
+ foundFiles: 52,
92
+ zipIncluded: zip_included,
93
+ messages: build_validation_messages(
94
+ headers: headers, unrecognized: unrecognized, missing_required: missing_required,
95
+ missing_files: missing_files, zip_included: zip_included, row_count: 247,
96
+ is_valid: true, has_warnings: true, file_references: 55
97
+ )
98
+ }
99
+ end
100
+ # rubocop:enable Metrics/MethodLength
101
+
102
+ # Builds the structured messages hash from validation results.
103
+ # @param results [Hash] with keys: headers, unrecognized, missing_required,
104
+ # missing_files, zip_included, row_count, is_valid, has_warnings, file_references
105
+ def build_validation_messages(results)
106
+ issues = []
107
+ issues << missing_required_issue(results[:missing_required]) if results[:missing_required]&.any?
108
+ issues << unrecognized_fields_issue(results[:unrecognized]) if results[:unrecognized]&.any?
109
+ issues << file_references_issue(results) if results[:file_references]&.positive?
110
+
111
+ {
112
+ validationStatus: validation_status(results),
113
+ issues: issues.compact
114
+ }
115
+ end
116
+
117
+ def validation_status(results)
118
+ severity, icon, title = validation_status_level(results[:is_valid], results[:has_warnings])
119
+ recognized = results[:headers] - (results[:unrecognized] || [])
120
+
121
+ {
122
+ severity: severity,
123
+ icon: icon,
124
+ title: title,
125
+ summary: I18n.t('bulkrax.importer.guided_import.validation.columns_detected', columns: results[:headers].length, records: results[:row_count]),
126
+ details: results[:is_valid] ? I18n.t('bulkrax.importer.guided_import.validation.recognized_fields', fields: recognized.join(', ')) : I18n.t('bulkrax.importer.guided_import.validation.critical_errors'),
127
+ defaultOpen: true
128
+ }
129
+ end
130
+
131
+ def validation_status_level(is_valid, has_warnings)
132
+ if !is_valid
133
+ ['error', 'fa-times-circle', I18n.t('bulkrax.importer.guided_import.validation.failed')]
134
+ elsif has_warnings
135
+ ['warning', 'fa-exclamation-triangle', I18n.t('bulkrax.importer.guided_import.validation.passed_warnings')]
136
+ else
137
+ ['success', 'fa-check-circle', I18n.t('bulkrax.importer.guided_import.validation.passed')]
138
+ end
139
+ end
140
+
141
+ def missing_required_issue(missing_required)
142
+ {
143
+ type: 'missing_required_fields',
144
+ severity: 'error',
145
+ icon: 'fa-times-circle',
146
+ title: I18n.t('bulkrax.importer.guided_import.validation.missing_required_title'),
147
+ count: missing_required.length,
148
+ description: I18n.t('bulkrax.importer.guided_import.validation.missing_required_desc'),
149
+ items: missing_required.map { |field| { field: field, message: I18n.t('bulkrax.importer.guided_import.validation.missing_required_hint') } },
150
+ defaultOpen: false
151
+ }
152
+ end
153
+
154
+ def unrecognized_fields_issue(unrecognized)
155
+ {
156
+ type: 'unrecognized_fields',
157
+ severity: 'warning',
158
+ icon: 'fa-exclamation-triangle',
159
+ title: I18n.t('bulkrax.importer.guided_import.validation.unrecognized_title'),
160
+ count: unrecognized.length,
161
+ description: I18n.t('bulkrax.importer.guided_import.validation.unrecognized_desc'),
162
+ items: unrecognized.map { |field| { field: field, message: nil } },
163
+ defaultOpen: false
164
+ }
165
+ end
166
+
167
+ # rubocop:disable Metrics/MethodLength
168
+ def file_references_issue(results)
169
+ file_references = results[:file_references]
170
+ missing_files = results[:missing_files] || []
171
+ found_files = file_references - missing_files.length
172
+
173
+ if missing_files.any? && results[:zip_included]
174
+ {
175
+ type: 'file_references',
176
+ severity: 'warning',
177
+ icon: 'fa-info-circle',
178
+ title: I18n.t('bulkrax.importer.guided_import.validation.file_references_title'),
179
+ count: file_references,
180
+ summary: I18n.t('bulkrax.importer.guided_import.validation.files_found_in_zip', found: found_files, total: file_references),
181
+ description: I18n.t('bulkrax.importer.guided_import.validation.files_missing_from_zip', count: missing_files.length, files_word: 'file'.pluralize(missing_files.length)),
182
+ items: missing_files.map { |file| { field: file, message: I18n.t('bulkrax.importer.guided_import.validation.missing_from_zip') } },
183
+ defaultOpen: false
184
+ }
185
+ elsif !results[:zip_included]
186
+ {
187
+ type: 'file_references',
188
+ severity: 'warning',
189
+ icon: 'fa-exclamation-triangle',
190
+ title: I18n.t('bulkrax.importer.guided_import.validation.file_references_title'),
191
+ count: file_references,
192
+ summary: I18n.t('bulkrax.importer.guided_import.validation.files_referenced', count: file_references),
193
+ description: I18n.t('bulkrax.importer.guided_import.validation.no_zip_desc'),
194
+ items: [],
195
+ defaultOpen: false
196
+ }
197
+ end
198
+ end # rubocop:enable Metrics/MethodLength
199
+ end
200
+ # rubocop:enable Metrics/ModuleLength
201
+ end
@@ -0,0 +1,212 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ # rubocop:disable Metrics/ModuleLength
5
+ module ImporterFileHandler
6
+ extend ActiveSupport::Concern
7
+
8
+ private
9
+
10
+ # Resolves files for validation from either a server-side file path, pre-uploaded Hyrax files, or direct upload params
11
+ # @return [Array<(Array<File>, nil)>] on success, a tuple of [files, nil]
12
+ # @return [Array<(nil, Hash)>] on error, a tuple of [nil, error_response]
13
+ def resolve_validation_files
14
+ if import_via_file_path?
15
+ return [nil, StepperResponseFormatter.error(message: I18n.t('bulkrax.importer.guided_import.validation.file_path_not_exist'))] unless File.exist?(import_file_path)
16
+
17
+ [[File.open(import_file_path)], nil]
18
+ elsif params[:uploaded_files].present?
19
+ resolve_hyrax_uploaded_files
20
+ else
21
+ files = params[:importer]&.[](:parser_fields)&.[](:files) || []
22
+ files = [files] unless files.is_a?(Array)
23
+ [files.compact, nil]
24
+ end
25
+ end
26
+
27
+ # Loads files from Hyrax::UploadedFile IDs (used by chunked upload flow).
28
+ # Scoped to current_user to prevent accessing another user's uploads.
29
+ def resolve_hyrax_uploaded_files
30
+ uploads = uploaded_files_scope
31
+ return [nil, StepperResponseFormatter.error(message: 'No uploaded files found for the given IDs')] if uploads.empty?
32
+
33
+ files = uploads.filter_map do |u|
34
+ path = u.file&.path
35
+ next nil unless path && File.exist?(path)
36
+ File.open(path)
37
+ end
38
+ [files, nil]
39
+ rescue StandardError => e
40
+ Rails.logger.error("Bulkrax: error loading Hyrax uploaded files: #{e.class}: #{e.message}")
41
+ Rails.logger.debug { e.full_message }
42
+ [nil, StepperResponseFormatter.error(message: 'Failed to load uploaded files')]
43
+ end
44
+
45
+ def uploaded_files_scope
46
+ return [] unless defined?(::Hyrax)
47
+
48
+ base = Hyrax::UploadedFile.where(id: params[:uploaded_files])
49
+ if respond_to?(:current_user) && current_user.present?
50
+ base.where(user_id: current_user.id)
51
+ else
52
+ base.none
53
+ end
54
+ end
55
+
56
+ def resolve_create_files
57
+ if params[:uploaded_files].present?
58
+ uploads = uploaded_files_scope
59
+ uploads.filter_map do |u|
60
+ path = u.file&.path
61
+ next nil unless path && File.exist?(path)
62
+ File.open(path)
63
+ end
64
+ else
65
+ extract_uploaded_files
66
+ end
67
+ end
68
+
69
+ def extract_uploaded_files
70
+ files_param = params[:importer]&.[](:parser_fields)&.[](:files)
71
+ return [] if files_param.blank?
72
+
73
+ files_param.is_a?(Array) ? files_param.compact : [files_param].compact
74
+ end
75
+
76
+ # Scans the given files for a CSV and a ZIP by file extension
77
+ # @param files [Array<File, ActionDispatch::Http::UploadedFile>] the resolved files to search
78
+ # @return [Array<(File, nil), (nil, File), (File, File), (nil, nil)>] a tuple of [csv_file, zip_file]; either may be nil
79
+ def select_csv_and_zip(files)
80
+ csv_file = files.find { |f| filename_for(f)&.end_with?('.csv') }
81
+ zip_file = files.find { |f| filename_for(f)&.end_with?('.zip') }
82
+ [csv_file, zip_file]
83
+ end
84
+
85
+ # Returns a filename from any file-like object (ActionDispatch upload, File, or Tempfile)
86
+ def filename_for(file)
87
+ if file.respond_to?(:original_filename)
88
+ file.original_filename
89
+ elsif file.respond_to?(:path)
90
+ file.path
91
+ end
92
+ end
93
+
94
+ # Opens a ZIP and extracts the CSV content into a StringIO while the archive is open
95
+ # @param zip_file [File] the ZIP file to search
96
+ # @return [Array<(StringIO, nil)>] on success, a tuple of [csv_file, nil]
97
+ # @return [Array<(nil, Hash)>] on error, a tuple of [nil, error_response]
98
+ def extract_csv_from_zip(zip_file)
99
+ csv_file = nil
100
+ error = nil
101
+ Zip::File.open(zip_file.path) do |zip|
102
+ result = locate_csv_entry_in_zip(zip)
103
+ if result.is_a?(Hash) && result[:messages]
104
+ error = result
105
+ elsif result
106
+ csv_file = StringIO.new(result.get_input_stream.read)
107
+ end
108
+ end
109
+ [csv_file, error]
110
+ end
111
+
112
+ # Finds a CSV entry in a ZIP by traversing directory levels, preferring the shallowest
113
+ # @param zip [Zip::File] the open ZIP archive to search
114
+ # @return [Zip::Entry] the CSV entry on success
115
+ # @return [Hash] an error response hash if no CSV is found or multiple CSVs are ambiguous
116
+ def locate_csv_entry_in_zip(zip)
117
+ csv_entries = group_entries_by_directory_level(zip)
118
+
119
+ return StepperResponseFormatter.error(message: I18n.t('bulkrax.importer.guided_import.validation.no_csv_in_zip')) if csv_entries.empty?
120
+
121
+ csv_by_depth = get_directory_depth_for_each_csv(csv_entries)
122
+ csvs_at_level = determine_csvs_at_shallowest_level(csv_by_depth)
123
+
124
+ return StepperResponseFormatter.error(message: I18n.t('bulkrax.importer.guided_import.validation.multiple_csv')) if csvs_at_level.size > 1
125
+
126
+ csvs_at_level.first
127
+ end
128
+
129
+ def group_entries_by_directory_level(zip)
130
+ zip.select { |entry| entry.name.end_with?('.csv') && !entry.directory? }
131
+ end
132
+
133
+ def get_directory_depth_for_each_csv(entries)
134
+ entries.group_by { |entry| entry.name.count('/') }
135
+ end
136
+
137
+ def determine_csvs_at_shallowest_level(csv_by_depth)
138
+ shallowest_depth = csv_by_depth.keys.min
139
+ csv_by_depth[shallowest_depth]
140
+ end
141
+
142
+ # Persists uploaded file(s) and/or cloud files onto the importer record.
143
+ # @param file [ActionDispatch::Http::UploadedFile, nil] a directly uploaded file
144
+ # @param cloud_files [Hash, nil] cloud file paths from browse-everything
145
+ # @param uploads [ActiveRecord::Relation, Array, nil] Hyrax::UploadedFile records
146
+ def files_for_import(file, cloud_files, uploads)
147
+ return if file.blank? && cloud_files.blank? && uploads.blank?
148
+
149
+ @importer[:parser_fields]['import_file_path'] = @importer.parser.write_import_file(file) if file.present?
150
+ if cloud_files.present?
151
+ @importer[:parser_fields]['cloud_file_paths'] = cloud_files
152
+ # For BagIt, there will only be one bag, so we get the file_path back and set import_file_path
153
+ # For CSV, we expect only file uploads, so we won't get the file_path back
154
+ # and we expect the import_file_path to be set already
155
+ target = @importer.parser.retrieve_cloud_files(cloud_files, @importer)
156
+ @importer[:parser_fields]['import_file_path'] = target if target.present?
157
+ end
158
+
159
+ if uploads.present?
160
+ uploads.each do |upload|
161
+ @importer[:parser_fields]['import_file_path'] = @importer.parser.write_import_file(upload.file.file)
162
+ end
163
+ end
164
+
165
+ @importer.save
166
+ end
167
+
168
+ def write_files(files)
169
+ csv_file, zip_file = select_csv_and_zip(files)
170
+
171
+ csv_path = write_file_if_present(csv_file)
172
+ zip_path = write_file_if_present(zip_file)
173
+
174
+ return unless csv_path || zip_path
175
+
176
+ # Determine import_file_path: prefer CSV, fallback to ZIP
177
+ @importer.parser_fields['import_file_path'] = csv_path || zip_path
178
+ @importer.parser_fields['attachments_zip_path'] = zip_path if zip_path && csv_path
179
+
180
+ @importer.save
181
+ rescue StandardError => e
182
+ Rails.logger.error("Bulkrax::ImporterFileHandler#write_files failed: #{e.message}")
183
+ raise
184
+ end
185
+
186
+ def write_file_if_present(file)
187
+ return nil unless file
188
+
189
+ if file.respond_to?(:original_filename)
190
+ @importer.parser.write_import_file(file)
191
+ else
192
+ dest_path = File.join(@importer.parser.path_for_import, File.basename(file.path))
193
+ FileUtils.cp(file.path, dest_path)
194
+ dest_path
195
+ end
196
+ end
197
+
198
+ def close_file_handles(files)
199
+ return unless files.is_a?(Array)
200
+ files.each { |f| f.close if f.respond_to?(:close) }
201
+ end
202
+
203
+ def import_via_file_path?
204
+ import_file_path.present?
205
+ end
206
+
207
+ def import_file_path
208
+ @file_path ||= params[:importer]&.[](:parser_fields)&.[](:import_file_path)
209
+ end
210
+ end
211
+ # rubocop:enable Metrics/ModuleLength
212
+ end