bulkrax 9.3.5 → 9.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +11 -1
- data/app/assets/javascripts/bulkrax/application.js +2 -1
- data/app/assets/javascripts/bulkrax/bulkrax.js +13 -4
- data/app/assets/javascripts/bulkrax/bulkrax_utils.js +96 -0
- data/app/assets/javascripts/bulkrax/datatables.js +1 -0
- data/app/assets/javascripts/bulkrax/entries.js +17 -10
- data/app/assets/javascripts/bulkrax/importers.js.erb +9 -2
- data/app/assets/javascripts/bulkrax/importers_stepper.js +2420 -0
- data/app/assets/stylesheets/bulkrax/application.css +1 -1
- data/app/assets/stylesheets/bulkrax/stepper/_header.scss +83 -0
- data/app/assets/stylesheets/bulkrax/stepper/_mixins.scss +26 -0
- data/app/assets/stylesheets/bulkrax/stepper/_navigation.scss +103 -0
- data/app/assets/stylesheets/bulkrax/stepper/_responsive.scss +46 -0
- data/app/assets/stylesheets/bulkrax/stepper/_review.scss +92 -0
- data/app/assets/stylesheets/bulkrax/stepper/_settings.scss +106 -0
- data/app/assets/stylesheets/bulkrax/stepper/_success.scss +26 -0
- data/app/assets/stylesheets/bulkrax/stepper/_summary.scss +171 -0
- data/app/assets/stylesheets/bulkrax/stepper/_upload.scss +339 -0
- data/app/assets/stylesheets/bulkrax/stepper/_validation.scss +237 -0
- data/app/assets/stylesheets/bulkrax/stepper/_variables.scss +46 -0
- data/app/assets/stylesheets/bulkrax/stepper.scss +32 -0
- data/app/controllers/bulkrax/guided_imports_controller.rb +175 -0
- data/app/controllers/bulkrax/importers_controller.rb +28 -31
- data/app/controllers/concerns/bulkrax/guided_import_demo_scenarios.rb +201 -0
- data/app/controllers/concerns/bulkrax/importer_file_handler.rb +212 -0
- data/app/errors/bulkrax/unzip_error.rb +16 -0
- data/app/factories/bulkrax/object_factory.rb +3 -2
- data/app/factories/bulkrax/valkyrie_object_factory.rb +61 -17
- data/app/jobs/bulkrax/importer_job.rb +42 -4
- data/app/models/bulkrax/csv_entry.rb +27 -7
- data/app/models/bulkrax/entry.rb +4 -0
- data/app/models/bulkrax/importer.rb +27 -10
- data/app/models/concerns/bulkrax/has_matchers.rb +2 -2
- data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +6 -5
- data/app/parsers/bulkrax/application_parser.rb +63 -20
- data/app/parsers/bulkrax/bagit_parser.rb +12 -0
- data/app/parsers/bulkrax/csv_parser.rb +168 -25
- data/app/parsers/concerns/bulkrax/csv_parser/csv_template_generation.rb +73 -0
- data/app/parsers/concerns/bulkrax/csv_parser/csv_validation.rb +133 -0
- data/app/parsers/concerns/bulkrax/csv_parser/csv_validation_helpers.rb +282 -0
- data/app/parsers/concerns/bulkrax/csv_parser/csv_validation_hierarchy.rb +96 -0
- data/app/services/bulkrax/csv_template/column_builder.rb +60 -0
- data/app/services/bulkrax/csv_template/column_descriptor.rb +58 -0
- data/app/services/bulkrax/csv_template/csv_builder.rb +83 -0
- data/app/services/bulkrax/csv_template/explanation_builder.rb +57 -0
- data/app/services/bulkrax/csv_template/field_analyzer.rb +56 -0
- data/app/services/bulkrax/csv_template/file_path_generator.rb +47 -0
- data/app/services/bulkrax/csv_template/file_validator.rb +68 -0
- data/app/services/bulkrax/csv_template/mapping_manager.rb +55 -0
- data/app/services/bulkrax/csv_template/model_loader.rb +50 -0
- data/app/services/bulkrax/csv_template/row_builder.rb +35 -0
- data/app/services/bulkrax/csv_template/schema_analyzer.rb +70 -0
- data/app/services/bulkrax/csv_template/split_formatter.rb +44 -0
- data/app/services/bulkrax/csv_template/value_determiner.rb +68 -0
- data/app/services/bulkrax/stepper_response_formatter.rb +347 -0
- data/app/services/bulkrax/validation_error_csv_builder.rb +99 -0
- data/app/validators/bulkrax/csv_row/child_reference.rb +56 -0
- data/app/validators/bulkrax/csv_row/circular_reference.rb +71 -0
- data/app/validators/bulkrax/csv_row/controlled_vocabulary.rb +74 -0
- data/app/validators/bulkrax/csv_row/duplicate_identifier.rb +63 -0
- data/app/validators/bulkrax/csv_row/missing_source_identifier.rb +31 -0
- data/app/validators/bulkrax/csv_row/parent_reference.rb +59 -0
- data/app/validators/bulkrax/csv_row/required_values.rb +64 -0
- data/app/views/bulkrax/guided_imports/new.html.erb +567 -0
- data/app/views/bulkrax/importers/index.html.erb +6 -1
- data/app/views/bulkrax/importers/new.html.erb +1 -1
- data/app/views/bulkrax/importers/show.html.erb +17 -1
- data/config/i18n-tasks.yml +195 -0
- data/config/locales/bulkrax.de.yml +508 -0
- data/config/locales/bulkrax.en.yml +463 -233
- data/config/locales/bulkrax.es.yml +508 -0
- data/config/locales/bulkrax.fr.yml +508 -0
- data/config/locales/bulkrax.it.yml +508 -0
- data/config/locales/bulkrax.pt-BR.yml +508 -0
- data/config/locales/bulkrax.zh.yml +507 -0
- data/config/routes.rb +10 -1
- data/lib/bulkrax/data/demo_scenarios.json +2235 -0
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +31 -0
- metadata +56 -16
- data/app/services/bulkrax/sample_csv_service/column_builder.rb +0 -58
- data/app/services/bulkrax/sample_csv_service/column_descriptor.rb +0 -56
- data/app/services/bulkrax/sample_csv_service/csv_builder.rb +0 -82
- data/app/services/bulkrax/sample_csv_service/explanation_builder.rb +0 -51
- data/app/services/bulkrax/sample_csv_service/field_analyzer.rb +0 -54
- data/app/services/bulkrax/sample_csv_service/file_path_generator.rb +0 -16
- data/app/services/bulkrax/sample_csv_service/mapping_manager.rb +0 -36
- data/app/services/bulkrax/sample_csv_service/model_loader.rb +0 -40
- data/app/services/bulkrax/sample_csv_service/row_builder.rb +0 -33
- data/app/services/bulkrax/sample_csv_service/schema_analyzer.rb +0 -69
- data/app/services/bulkrax/sample_csv_service/split_formatter.rb +0 -42
- data/app/services/bulkrax/sample_csv_service/value_determiner.rb +0 -67
- data/app/services/bulkrax/sample_csv_service.rb +0 -78
- /data/{app/services → lib}/wings/custom_queries/find_by_source_identifier.rb +0 -0
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Bulkrax
|
|
4
|
+
class GuidedImportsController < ::Bulkrax::ApplicationController
|
|
5
|
+
include Hyrax::ThemedLayoutController if defined?(::Hyrax)
|
|
6
|
+
include Bulkrax::GuidedImportDemoScenarios if Bulkrax.config.guided_import_demo_scenarios_enabled
|
|
7
|
+
include Bulkrax::ImporterFileHandler
|
|
8
|
+
helper Bulkrax::ImportersHelper
|
|
9
|
+
|
|
10
|
+
before_action :authenticate_user!
|
|
11
|
+
before_action :check_permissions
|
|
12
|
+
with_themed_layout 'dashboard' if defined?(::Hyrax)
|
|
13
|
+
|
|
14
|
+
# trigger form to allow upload
|
|
15
|
+
def new
|
|
16
|
+
@importer = Importer.new
|
|
17
|
+
return unless defined?(::Hyrax)
|
|
18
|
+
add_importer_breadcrumbs
|
|
19
|
+
add_breadcrumb I18n.t('bulkrax.importer.guided_import.breadcrumb')
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# AJAX endpoint to validate uploaded files
|
|
23
|
+
def validate
|
|
24
|
+
set_locale_from_params
|
|
25
|
+
|
|
26
|
+
files, error = resolve_validation_files
|
|
27
|
+
return render json: error, status: :ok if error
|
|
28
|
+
return render json: StepperResponseFormatter.error(message: I18n.t('bulkrax.importer.guided_import.validation.no_files_uploaded')), status: :ok unless files.any?
|
|
29
|
+
|
|
30
|
+
csv_file, zip_file = select_csv_and_zip(files)
|
|
31
|
+
|
|
32
|
+
unless csv_file
|
|
33
|
+
return render json: StepperResponseFormatter.error(message: I18n.t('bulkrax.importer.guided_import.validation.no_csv_uploaded')), status: :ok unless zip_file
|
|
34
|
+
|
|
35
|
+
csv_file, error = extract_csv_from_zip(zip_file)
|
|
36
|
+
return render json: error, status: :ok if error
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
admin_set_id = params[:importer]&.[](:admin_set_id)
|
|
40
|
+
validation_result = run_validation(csv_file, zip_file, admin_set_id: admin_set_id)
|
|
41
|
+
raw_csv_data = validation_result.delete(:raw_csv_data)
|
|
42
|
+
cache_key = cache_validation_errors(validation_result, raw_csv_data, csv_file)
|
|
43
|
+
formatted = StepperResponseFormatter.format(validation_result)
|
|
44
|
+
formatted[:validationErrorsCacheKey] = cache_key
|
|
45
|
+
render json: formatted, status: :ok
|
|
46
|
+
ensure
|
|
47
|
+
close_file_handles(files)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def download_validation_errors
|
|
51
|
+
cache_key = params[:key].to_s
|
|
52
|
+
expected_prefix = "guided_import_errors:#{session.id}:"
|
|
53
|
+
return head :not_found unless cache_key.start_with?(expected_prefix)
|
|
54
|
+
|
|
55
|
+
cached = Rails.cache.read(cache_key)
|
|
56
|
+
return head :not_found unless cached
|
|
57
|
+
|
|
58
|
+
csv = ValidationErrorCsvBuilder.build(
|
|
59
|
+
headers: cached[:headers],
|
|
60
|
+
csv_data: cached[:csv_data],
|
|
61
|
+
row_errors: cached[:row_errors],
|
|
62
|
+
file_errors: cached[:file_errors]
|
|
63
|
+
)
|
|
64
|
+
send_data csv, filename: error_csv_filename(cached[:original_filename]), type: 'text/csv', disposition: 'attachment'
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def create
|
|
68
|
+
files = nil
|
|
69
|
+
files = resolve_create_files
|
|
70
|
+
return render_invalid_uploaded_files_response if params[:uploaded_files].present? && files.empty?
|
|
71
|
+
|
|
72
|
+
@importer = Importer.new(importer_params)
|
|
73
|
+
@importer.parser_klass = 'Bulkrax::CsvParser'
|
|
74
|
+
@importer.user = current_user if respond_to?(:current_user) && current_user.present?
|
|
75
|
+
apply_field_mapping
|
|
76
|
+
|
|
77
|
+
if @importer.save
|
|
78
|
+
write_files(files)
|
|
79
|
+
Bulkrax::ImporterJob.perform_later(@importer.id)
|
|
80
|
+
|
|
81
|
+
respond_to do |format|
|
|
82
|
+
format.html { redirect_to bulkrax.importers_path, notice: I18n.t('bulkrax.importer.guided_import.flash.import_started') }
|
|
83
|
+
format.json { render json: { success: true, importer_id: @importer.id }, status: :created }
|
|
84
|
+
end
|
|
85
|
+
else
|
|
86
|
+
respond_to do |format|
|
|
87
|
+
format.html { render :new, status: :unprocessable_entity }
|
|
88
|
+
format.json { render json: { errors: @importer.errors.full_messages }, status: :unprocessable_entity }
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
ensure
|
|
92
|
+
close_file_handles(files)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
private
|
|
96
|
+
|
|
97
|
+
def render_invalid_uploaded_files_response
|
|
98
|
+
respond_to do |format|
|
|
99
|
+
format.html { render :new, status: :unprocessable_entity }
|
|
100
|
+
format.json { render json: { errors: ['No valid uploaded files found'] }, status: :unprocessable_entity }
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Runs validation via the real service.
|
|
105
|
+
# @param csv_file [File, StringIO] the CSV to validate
|
|
106
|
+
# @param zip_file [File, nil] an optional ZIP containing file attachments
|
|
107
|
+
# @param admin_set_id [String, nil] optional admin set ID for validation context
|
|
108
|
+
# @return [Hash] validation result data
|
|
109
|
+
def cache_validation_errors(validation_result, raw_csv_data, csv_file)
|
|
110
|
+
has_errors = validation_result[:rowErrors]&.any? ||
|
|
111
|
+
validation_result[:missingRequired]&.any? ||
|
|
112
|
+
validation_result[:unrecognized]&.any? ||
|
|
113
|
+
validation_result[:emptyColumns]&.any? ||
|
|
114
|
+
validation_result[:missingFiles]&.any?
|
|
115
|
+
return nil unless has_errors
|
|
116
|
+
|
|
117
|
+
key = "guided_import_errors:#{session.id}:#{Time.now.to_i}"
|
|
118
|
+
Rails.cache.write(
|
|
119
|
+
key,
|
|
120
|
+
{
|
|
121
|
+
headers: validation_result[:headers],
|
|
122
|
+
csv_data: raw_csv_data,
|
|
123
|
+
row_errors: validation_result[:rowErrors] || [],
|
|
124
|
+
file_errors: {
|
|
125
|
+
missing_required: validation_result[:missingRequired] || [],
|
|
126
|
+
unrecognized: validation_result[:unrecognized] || {},
|
|
127
|
+
empty_columns: validation_result[:emptyColumns] || [],
|
|
128
|
+
missing_files: validation_result[:missingFiles] || []
|
|
129
|
+
},
|
|
130
|
+
original_filename: filename_for(csv_file)
|
|
131
|
+
},
|
|
132
|
+
expires_in: 1.hour
|
|
133
|
+
)
|
|
134
|
+
key
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def run_validation(csv_file, zip_file, admin_set_id: nil)
|
|
138
|
+
CsvParser.validate_csv(csv_file: csv_file, zip_file: zip_file, admin_set_id: admin_set_id)
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def importer_params
|
|
142
|
+
params.require(:importer).permit(
|
|
143
|
+
:name,
|
|
144
|
+
:admin_set_id,
|
|
145
|
+
:limit,
|
|
146
|
+
parser_fields: [:visibility, :rights_statement, :override_rights_statement, :import_file_path, :file_style]
|
|
147
|
+
)
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def apply_field_mapping
|
|
151
|
+
@importer.field_mapping = Bulkrax.field_mappings['Bulkrax::CsvParser']
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def error_csv_filename(original_filename)
|
|
155
|
+
return 'import_errors.csv' if original_filename.blank?
|
|
156
|
+
|
|
157
|
+
base = File.basename(original_filename, '.*')
|
|
158
|
+
"#{base}_errors.csv"
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def set_locale_from_params
|
|
162
|
+
I18n.locale = params[:locale] if params[:locale].present? && I18n.available_locales.include?(params[:locale].to_sym)
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
def add_importer_breadcrumbs
|
|
166
|
+
add_breadcrumb t(:'hyrax.controls.home'), main_app.root_path
|
|
167
|
+
add_breadcrumb t(:'hyrax.dashboard.breadcrumbs.admin'), hyrax.dashboard_path
|
|
168
|
+
add_breadcrumb 'Importers', bulkrax.importers_path
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def check_permissions
|
|
172
|
+
raise CanCan::AccessDenied unless current_ability.can_import_works?
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
end
|
|
@@ -8,6 +8,7 @@ module Bulkrax
|
|
|
8
8
|
include Bulkrax::API
|
|
9
9
|
include Bulkrax::DatatablesBehavior
|
|
10
10
|
include Bulkrax::ValidationHelper
|
|
11
|
+
include Bulkrax::ImporterFileHandler
|
|
11
12
|
|
|
12
13
|
protect_from_forgery unless: -> { api_request? }
|
|
13
14
|
before_action :token_authenticate!, if: -> { api_request? }, only: [:create, :update, :delete]
|
|
@@ -28,7 +29,8 @@ module Bulkrax
|
|
|
28
29
|
end
|
|
29
30
|
|
|
30
31
|
def importer_table
|
|
31
|
-
|
|
32
|
+
order = table_order.presence || Arel.sql('last_imported_at DESC NULLS LAST')
|
|
33
|
+
@importers = Importer.order(order).page(table_page).per(table_per_page)
|
|
32
34
|
@importers = @importers.where(importer_table_search) if importer_table_search.present?
|
|
33
35
|
respond_to do |format|
|
|
34
36
|
format.json { render json: format_importers(@importers) }
|
|
@@ -65,10 +67,11 @@ module Bulkrax
|
|
|
65
67
|
end
|
|
66
68
|
end
|
|
67
69
|
|
|
68
|
-
#
|
|
70
|
+
# GET /importers/sample_csv_file
|
|
69
71
|
def sample_csv_file
|
|
70
|
-
|
|
71
|
-
|
|
72
|
+
admin_set_id = params[:admin_set_id].presence
|
|
73
|
+
sample = Bulkrax::CsvParser.generate_template(models: 'all', output: 'file', admin_set_id: admin_set_id)
|
|
74
|
+
send_file sample, filename: File.basename(sample), type: 'text/csv', disposition: 'attachment'
|
|
72
75
|
rescue StandardError => e
|
|
73
76
|
flash[:error] = "Unable to generate sample CSV file: #{e.message}"
|
|
74
77
|
redirect_back fallback_location: bulkrax.importers_path
|
|
@@ -93,7 +96,7 @@ module Bulkrax
|
|
|
93
96
|
if api_request?
|
|
94
97
|
return return_json_response unless valid_create_params?
|
|
95
98
|
end
|
|
96
|
-
uploads =
|
|
99
|
+
uploads = uploaded_files_scope
|
|
97
100
|
file = file_param
|
|
98
101
|
cloud_files = cloud_params
|
|
99
102
|
|
|
@@ -132,7 +135,7 @@ module Bulkrax
|
|
|
132
135
|
if api_request?
|
|
133
136
|
return return_json_response unless valid_update_params?
|
|
134
137
|
end
|
|
135
|
-
uploads =
|
|
138
|
+
uploads = uploaded_files_scope
|
|
136
139
|
file = file_param
|
|
137
140
|
cloud_files = cloud_params
|
|
138
141
|
|
|
@@ -214,10 +217,26 @@ module Bulkrax
|
|
|
214
217
|
end
|
|
215
218
|
|
|
216
219
|
def original_file
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
+
file_type = params[:file_type]&.to_sym
|
|
221
|
+
|
|
222
|
+
files = @importer.original_files
|
|
223
|
+
if files.empty?
|
|
220
224
|
redirect_to @importer, alert: 'Importer does not support file re-download or the imported file is not found on the server.'
|
|
225
|
+
return
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
# If file_type is specified, find that specific file
|
|
229
|
+
if file_type
|
|
230
|
+
file = files.find { |f| f[:type] == file_type }
|
|
231
|
+
if file
|
|
232
|
+
send_file file[:path], filename: file[:name], disposition: 'attachment'
|
|
233
|
+
else
|
|
234
|
+
redirect_to @importer, alert: "File type '#{file_type}' not found."
|
|
235
|
+
end
|
|
236
|
+
else
|
|
237
|
+
# Default behavior: send the first file (CSV) for backward compatibility
|
|
238
|
+
file = files.first
|
|
239
|
+
send_file file[:path], filename: file[:name], disposition: 'attachment'
|
|
221
240
|
end
|
|
222
241
|
end
|
|
223
242
|
|
|
@@ -230,28 +249,6 @@ module Bulkrax
|
|
|
230
249
|
|
|
231
250
|
private
|
|
232
251
|
|
|
233
|
-
def files_for_import(file, cloud_files, uploads)
|
|
234
|
-
return if file.blank? && cloud_files.blank? && uploads.blank?
|
|
235
|
-
|
|
236
|
-
@importer[:parser_fields]['import_file_path'] = @importer.parser.write_import_file(file) if file.present?
|
|
237
|
-
if cloud_files.present?
|
|
238
|
-
@importer[:parser_fields]['cloud_file_paths'] = cloud_files
|
|
239
|
-
# For BagIt, there will only be one bag, so we get the file_path back and set import_file_path
|
|
240
|
-
# For CSV, we expect only file uploads, so we won't get the file_path back
|
|
241
|
-
# and we expect the import_file_path to be set already
|
|
242
|
-
target = @importer.parser.retrieve_cloud_files(cloud_files, @importer)
|
|
243
|
-
@importer[:parser_fields]['import_file_path'] = target if target.present?
|
|
244
|
-
end
|
|
245
|
-
|
|
246
|
-
if uploads.present?
|
|
247
|
-
uploads.each do |upload|
|
|
248
|
-
@importer[:parser_fields]['import_file_path'] = @importer.parser.write_import_file(upload.file.file)
|
|
249
|
-
end
|
|
250
|
-
end
|
|
251
|
-
|
|
252
|
-
@importer.save
|
|
253
|
-
end
|
|
254
|
-
|
|
255
252
|
# Use callbacks to share common setup or constraints between actions.
|
|
256
253
|
def set_importer
|
|
257
254
|
@importer = Importer.find(params[:id] || params[:importer_id])
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Bulkrax
|
|
4
|
+
# rubocop:disable Metrics/ModuleLength
|
|
5
|
+
module GuidedImportDemoScenarios
|
|
6
|
+
extend ActiveSupport::Concern
|
|
7
|
+
|
|
8
|
+
# Serve demo scenario fixtures for frontend testing
|
|
9
|
+
def demo_scenarios
|
|
10
|
+
file_path = Bulkrax::Engine.root.join('lib', 'bulkrax', 'data', 'demo_scenarios.json')
|
|
11
|
+
if File.exist?(file_path)
|
|
12
|
+
render json: File.read(file_path), status: :ok
|
|
13
|
+
else
|
|
14
|
+
render json: { error: I18n.t('bulkrax.importer.guided_import.flash.demo_not_available') }, status: :not_found
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
private
|
|
19
|
+
|
|
20
|
+
def run_validation(csv_file, zip_file, admin_set_id: nil)
|
|
21
|
+
if ENV['DEMO_MODE'] == 'true'
|
|
22
|
+
generate_validation_response(csv_file, zip_file)
|
|
23
|
+
else
|
|
24
|
+
super
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# rubocop:disable Metrics/MethodLength
|
|
29
|
+
# Hardcoded mock response generator for demo mode
|
|
30
|
+
def generate_validation_response(_csv_file, zip_file)
|
|
31
|
+
# Generate mock collections
|
|
32
|
+
collections = [
|
|
33
|
+
{ id: 'col-1', title: 'Historical Photographs Collection', type: 'collection', parentIds: [], childrenIds: ['work-shared-1'] },
|
|
34
|
+
{ id: 'col-2', title: 'Manuscripts & Letters', type: 'collection', parentIds: [], childrenIds: [] },
|
|
35
|
+
{ id: 'col-3', title: 'Audio Recordings', type: 'collection', parentIds: [], childrenIds: ['work-shared-2'] }
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
# Generate mock works
|
|
39
|
+
works = []
|
|
40
|
+
189.times do |i|
|
|
41
|
+
parent_ids = if i < 75
|
|
42
|
+
['col-1']
|
|
43
|
+
elsif i < 140
|
|
44
|
+
['col-2']
|
|
45
|
+
elsif i < 189
|
|
46
|
+
['col-3']
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
works << {
|
|
50
|
+
id: "work-#{i + 1}",
|
|
51
|
+
title: "Work #{i + 1}",
|
|
52
|
+
type: 'work',
|
|
53
|
+
parentIds: parent_ids
|
|
54
|
+
}
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Multi-parent examples
|
|
58
|
+
works << { id: 'work-shared-1', title: 'Cross-Collection Photograph', type: 'work', parentIds: ['col-1', 'col-2'] }
|
|
59
|
+
works << { id: 'work-shared-2', title: 'Interdisciplinary Recording', type: 'work', parentIds: ['col-2', 'col-3'] }
|
|
60
|
+
|
|
61
|
+
# Generate mock file sets
|
|
62
|
+
file_sets = []
|
|
63
|
+
55.times do |i|
|
|
64
|
+
file_sets << {
|
|
65
|
+
id: "fs-#{i + 1}",
|
|
66
|
+
title: "FileSet #{i + 1}",
|
|
67
|
+
type: 'file_set'
|
|
68
|
+
}
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Mock headers with one unrecognized field
|
|
72
|
+
headers = ['source_identifier', 'title', 'creator', 'model', 'parents', 'children', 'file', 'description', 'date_created', 'legacy_id', 'subject']
|
|
73
|
+
unrecognized = ['legacy_id']
|
|
74
|
+
missing_required = []
|
|
75
|
+
missing_files = ['photo_087.tiff', 'letter_scan_12.pdf', 'recording_03.wav']
|
|
76
|
+
zip_included = zip_file.present?
|
|
77
|
+
|
|
78
|
+
{
|
|
79
|
+
headers: headers,
|
|
80
|
+
missingRequired: missing_required,
|
|
81
|
+
unrecognized: unrecognized,
|
|
82
|
+
rowCount: 247,
|
|
83
|
+
isValid: true,
|
|
84
|
+
hasWarnings: true,
|
|
85
|
+
collections: collections,
|
|
86
|
+
works: works,
|
|
87
|
+
fileSets: file_sets,
|
|
88
|
+
totalItems: collections.length + works.length + file_sets.length,
|
|
89
|
+
fileReferences: 55,
|
|
90
|
+
missingFiles: missing_files,
|
|
91
|
+
foundFiles: 52,
|
|
92
|
+
zipIncluded: zip_included,
|
|
93
|
+
messages: build_validation_messages(
|
|
94
|
+
headers: headers, unrecognized: unrecognized, missing_required: missing_required,
|
|
95
|
+
missing_files: missing_files, zip_included: zip_included, row_count: 247,
|
|
96
|
+
is_valid: true, has_warnings: true, file_references: 55
|
|
97
|
+
)
|
|
98
|
+
}
|
|
99
|
+
end
|
|
100
|
+
# rubocop:enable Metrics/MethodLength
|
|
101
|
+
|
|
102
|
+
# Builds the structured messages hash from validation results.
|
|
103
|
+
# @param results [Hash] with keys: headers, unrecognized, missing_required,
|
|
104
|
+
# missing_files, zip_included, row_count, is_valid, has_warnings, file_references
|
|
105
|
+
def build_validation_messages(results)
|
|
106
|
+
issues = []
|
|
107
|
+
issues << missing_required_issue(results[:missing_required]) if results[:missing_required]&.any?
|
|
108
|
+
issues << unrecognized_fields_issue(results[:unrecognized]) if results[:unrecognized]&.any?
|
|
109
|
+
issues << file_references_issue(results) if results[:file_references]&.positive?
|
|
110
|
+
|
|
111
|
+
{
|
|
112
|
+
validationStatus: validation_status(results),
|
|
113
|
+
issues: issues.compact
|
|
114
|
+
}
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def validation_status(results)
|
|
118
|
+
severity, icon, title = validation_status_level(results[:is_valid], results[:has_warnings])
|
|
119
|
+
recognized = results[:headers] - (results[:unrecognized] || [])
|
|
120
|
+
|
|
121
|
+
{
|
|
122
|
+
severity: severity,
|
|
123
|
+
icon: icon,
|
|
124
|
+
title: title,
|
|
125
|
+
summary: I18n.t('bulkrax.importer.guided_import.validation.columns_detected', columns: results[:headers].length, records: results[:row_count]),
|
|
126
|
+
details: results[:is_valid] ? I18n.t('bulkrax.importer.guided_import.validation.recognized_fields', fields: recognized.join(', ')) : I18n.t('bulkrax.importer.guided_import.validation.critical_errors'),
|
|
127
|
+
defaultOpen: true
|
|
128
|
+
}
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def validation_status_level(is_valid, has_warnings)
|
|
132
|
+
if !is_valid
|
|
133
|
+
['error', 'fa-times-circle', I18n.t('bulkrax.importer.guided_import.validation.failed')]
|
|
134
|
+
elsif has_warnings
|
|
135
|
+
['warning', 'fa-exclamation-triangle', I18n.t('bulkrax.importer.guided_import.validation.passed_warnings')]
|
|
136
|
+
else
|
|
137
|
+
['success', 'fa-check-circle', I18n.t('bulkrax.importer.guided_import.validation.passed')]
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def missing_required_issue(missing_required)
|
|
142
|
+
{
|
|
143
|
+
type: 'missing_required_fields',
|
|
144
|
+
severity: 'error',
|
|
145
|
+
icon: 'fa-times-circle',
|
|
146
|
+
title: I18n.t('bulkrax.importer.guided_import.validation.missing_required_title'),
|
|
147
|
+
count: missing_required.length,
|
|
148
|
+
description: I18n.t('bulkrax.importer.guided_import.validation.missing_required_desc'),
|
|
149
|
+
items: missing_required.map { |field| { field: field, message: I18n.t('bulkrax.importer.guided_import.validation.missing_required_hint') } },
|
|
150
|
+
defaultOpen: false
|
|
151
|
+
}
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def unrecognized_fields_issue(unrecognized)
|
|
155
|
+
{
|
|
156
|
+
type: 'unrecognized_fields',
|
|
157
|
+
severity: 'warning',
|
|
158
|
+
icon: 'fa-exclamation-triangle',
|
|
159
|
+
title: I18n.t('bulkrax.importer.guided_import.validation.unrecognized_title'),
|
|
160
|
+
count: unrecognized.length,
|
|
161
|
+
description: I18n.t('bulkrax.importer.guided_import.validation.unrecognized_desc'),
|
|
162
|
+
items: unrecognized.map { |field| { field: field, message: nil } },
|
|
163
|
+
defaultOpen: false
|
|
164
|
+
}
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# rubocop:disable Metrics/MethodLength
|
|
168
|
+
def file_references_issue(results)
|
|
169
|
+
file_references = results[:file_references]
|
|
170
|
+
missing_files = results[:missing_files] || []
|
|
171
|
+
found_files = file_references - missing_files.length
|
|
172
|
+
|
|
173
|
+
if missing_files.any? && results[:zip_included]
|
|
174
|
+
{
|
|
175
|
+
type: 'file_references',
|
|
176
|
+
severity: 'warning',
|
|
177
|
+
icon: 'fa-info-circle',
|
|
178
|
+
title: I18n.t('bulkrax.importer.guided_import.validation.file_references_title'),
|
|
179
|
+
count: file_references,
|
|
180
|
+
summary: I18n.t('bulkrax.importer.guided_import.validation.files_found_in_zip', found: found_files, total: file_references),
|
|
181
|
+
description: I18n.t('bulkrax.importer.guided_import.validation.files_missing_from_zip', count: missing_files.length, files_word: 'file'.pluralize(missing_files.length)),
|
|
182
|
+
items: missing_files.map { |file| { field: file, message: I18n.t('bulkrax.importer.guided_import.validation.missing_from_zip') } },
|
|
183
|
+
defaultOpen: false
|
|
184
|
+
}
|
|
185
|
+
elsif !results[:zip_included]
|
|
186
|
+
{
|
|
187
|
+
type: 'file_references',
|
|
188
|
+
severity: 'warning',
|
|
189
|
+
icon: 'fa-exclamation-triangle',
|
|
190
|
+
title: I18n.t('bulkrax.importer.guided_import.validation.file_references_title'),
|
|
191
|
+
count: file_references,
|
|
192
|
+
summary: I18n.t('bulkrax.importer.guided_import.validation.files_referenced', count: file_references),
|
|
193
|
+
description: I18n.t('bulkrax.importer.guided_import.validation.no_zip_desc'),
|
|
194
|
+
items: [],
|
|
195
|
+
defaultOpen: false
|
|
196
|
+
}
|
|
197
|
+
end
|
|
198
|
+
end # rubocop:enable Metrics/MethodLength
|
|
199
|
+
end
|
|
200
|
+
# rubocop:enable Metrics/ModuleLength
|
|
201
|
+
end
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Bulkrax
|
|
4
|
+
# rubocop:disable Metrics/ModuleLength
|
|
5
|
+
module ImporterFileHandler
|
|
6
|
+
extend ActiveSupport::Concern
|
|
7
|
+
|
|
8
|
+
private
|
|
9
|
+
|
|
10
|
+
# Resolves files for validation from either a server-side file path, pre-uploaded Hyrax files, or direct upload params
|
|
11
|
+
# @return [Array<(Array<File>, nil)>] on success, a tuple of [files, nil]
|
|
12
|
+
# @return [Array<(nil, Hash)>] on error, a tuple of [nil, error_response]
|
|
13
|
+
def resolve_validation_files
|
|
14
|
+
if import_via_file_path?
|
|
15
|
+
return [nil, StepperResponseFormatter.error(message: I18n.t('bulkrax.importer.guided_import.validation.file_path_not_exist'))] unless File.exist?(import_file_path)
|
|
16
|
+
|
|
17
|
+
[[File.open(import_file_path)], nil]
|
|
18
|
+
elsif params[:uploaded_files].present?
|
|
19
|
+
resolve_hyrax_uploaded_files
|
|
20
|
+
else
|
|
21
|
+
files = params[:importer]&.[](:parser_fields)&.[](:files) || []
|
|
22
|
+
files = [files] unless files.is_a?(Array)
|
|
23
|
+
[files.compact, nil]
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Loads files from Hyrax::UploadedFile IDs (used by chunked upload flow).
|
|
28
|
+
# Scoped to current_user to prevent accessing another user's uploads.
|
|
29
|
+
def resolve_hyrax_uploaded_files
|
|
30
|
+
uploads = uploaded_files_scope
|
|
31
|
+
return [nil, StepperResponseFormatter.error(message: 'No uploaded files found for the given IDs')] if uploads.empty?
|
|
32
|
+
|
|
33
|
+
files = uploads.filter_map do |u|
|
|
34
|
+
path = u.file&.path
|
|
35
|
+
next nil unless path && File.exist?(path)
|
|
36
|
+
File.open(path)
|
|
37
|
+
end
|
|
38
|
+
[files, nil]
|
|
39
|
+
rescue StandardError => e
|
|
40
|
+
Rails.logger.error("Bulkrax: error loading Hyrax uploaded files: #{e.class}: #{e.message}")
|
|
41
|
+
Rails.logger.debug { e.full_message }
|
|
42
|
+
[nil, StepperResponseFormatter.error(message: 'Failed to load uploaded files')]
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def uploaded_files_scope
|
|
46
|
+
return [] unless defined?(::Hyrax)
|
|
47
|
+
|
|
48
|
+
base = Hyrax::UploadedFile.where(id: params[:uploaded_files])
|
|
49
|
+
if respond_to?(:current_user) && current_user.present?
|
|
50
|
+
base.where(user_id: current_user.id)
|
|
51
|
+
else
|
|
52
|
+
base.none
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def resolve_create_files
|
|
57
|
+
if params[:uploaded_files].present?
|
|
58
|
+
uploads = uploaded_files_scope
|
|
59
|
+
uploads.filter_map do |u|
|
|
60
|
+
path = u.file&.path
|
|
61
|
+
next nil unless path && File.exist?(path)
|
|
62
|
+
File.open(path)
|
|
63
|
+
end
|
|
64
|
+
else
|
|
65
|
+
extract_uploaded_files
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def extract_uploaded_files
|
|
70
|
+
files_param = params[:importer]&.[](:parser_fields)&.[](:files)
|
|
71
|
+
return [] if files_param.blank?
|
|
72
|
+
|
|
73
|
+
files_param.is_a?(Array) ? files_param.compact : [files_param].compact
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Scans the given files for a CSV and a ZIP by file extension
|
|
77
|
+
# @param files [Array<File, ActionDispatch::Http::UploadedFile>] the resolved files to search
|
|
78
|
+
# @return [Array<(File, nil), (nil, File), (File, File), (nil, nil)>] a tuple of [csv_file, zip_file]; either may be nil
|
|
79
|
+
def select_csv_and_zip(files)
|
|
80
|
+
csv_file = files.find { |f| filename_for(f)&.end_with?('.csv') }
|
|
81
|
+
zip_file = files.find { |f| filename_for(f)&.end_with?('.zip') }
|
|
82
|
+
[csv_file, zip_file]
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Returns a filename from any file-like object (ActionDispatch upload, File, or Tempfile)
|
|
86
|
+
def filename_for(file)
|
|
87
|
+
if file.respond_to?(:original_filename)
|
|
88
|
+
file.original_filename
|
|
89
|
+
elsif file.respond_to?(:path)
|
|
90
|
+
file.path
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Opens a ZIP and extracts the CSV content into a StringIO while the archive is open
|
|
95
|
+
# @param zip_file [File] the ZIP file to search
|
|
96
|
+
# @return [Array<(StringIO, nil)>] on success, a tuple of [csv_file, nil]
|
|
97
|
+
# @return [Array<(nil, Hash)>] on error, a tuple of [nil, error_response]
|
|
98
|
+
def extract_csv_from_zip(zip_file)
|
|
99
|
+
csv_file = nil
|
|
100
|
+
error = nil
|
|
101
|
+
Zip::File.open(zip_file.path) do |zip|
|
|
102
|
+
result = locate_csv_entry_in_zip(zip)
|
|
103
|
+
if result.is_a?(Hash) && result[:messages]
|
|
104
|
+
error = result
|
|
105
|
+
elsif result
|
|
106
|
+
csv_file = StringIO.new(result.get_input_stream.read)
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
[csv_file, error]
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Finds a CSV entry in a ZIP by traversing directory levels, preferring the shallowest
|
|
113
|
+
# @param zip [Zip::File] the open ZIP archive to search
|
|
114
|
+
# @return [Zip::Entry] the CSV entry on success
|
|
115
|
+
# @return [Hash] an error response hash if no CSV is found or multiple CSVs are ambiguous
|
|
116
|
+
def locate_csv_entry_in_zip(zip)
|
|
117
|
+
csv_entries = group_entries_by_directory_level(zip)
|
|
118
|
+
|
|
119
|
+
return StepperResponseFormatter.error(message: I18n.t('bulkrax.importer.guided_import.validation.no_csv_in_zip')) if csv_entries.empty?
|
|
120
|
+
|
|
121
|
+
csv_by_depth = get_directory_depth_for_each_csv(csv_entries)
|
|
122
|
+
csvs_at_level = determine_csvs_at_shallowest_level(csv_by_depth)
|
|
123
|
+
|
|
124
|
+
return StepperResponseFormatter.error(message: I18n.t('bulkrax.importer.guided_import.validation.multiple_csv')) if csvs_at_level.size > 1
|
|
125
|
+
|
|
126
|
+
csvs_at_level.first
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def group_entries_by_directory_level(zip)
|
|
130
|
+
zip.select { |entry| entry.name.end_with?('.csv') && !entry.directory? }
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def get_directory_depth_for_each_csv(entries)
|
|
134
|
+
entries.group_by { |entry| entry.name.count('/') }
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def determine_csvs_at_shallowest_level(csv_by_depth)
|
|
138
|
+
shallowest_depth = csv_by_depth.keys.min
|
|
139
|
+
csv_by_depth[shallowest_depth]
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# Persists uploaded file(s) and/or cloud files onto the importer record.
|
|
143
|
+
# @param file [ActionDispatch::Http::UploadedFile, nil] a directly uploaded file
|
|
144
|
+
# @param cloud_files [Hash, nil] cloud file paths from browse-everything
|
|
145
|
+
# @param uploads [ActiveRecord::Relation, Array, nil] Hyrax::UploadedFile records
|
|
146
|
+
def files_for_import(file, cloud_files, uploads)
|
|
147
|
+
return if file.blank? && cloud_files.blank? && uploads.blank?
|
|
148
|
+
|
|
149
|
+
@importer[:parser_fields]['import_file_path'] = @importer.parser.write_import_file(file) if file.present?
|
|
150
|
+
if cloud_files.present?
|
|
151
|
+
@importer[:parser_fields]['cloud_file_paths'] = cloud_files
|
|
152
|
+
# For BagIt, there will only be one bag, so we get the file_path back and set import_file_path
|
|
153
|
+
# For CSV, we expect only file uploads, so we won't get the file_path back
|
|
154
|
+
# and we expect the import_file_path to be set already
|
|
155
|
+
target = @importer.parser.retrieve_cloud_files(cloud_files, @importer)
|
|
156
|
+
@importer[:parser_fields]['import_file_path'] = target if target.present?
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
if uploads.present?
|
|
160
|
+
uploads.each do |upload|
|
|
161
|
+
@importer[:parser_fields]['import_file_path'] = @importer.parser.write_import_file(upload.file.file)
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
@importer.save
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def write_files(files)
|
|
169
|
+
csv_file, zip_file = select_csv_and_zip(files)
|
|
170
|
+
|
|
171
|
+
csv_path = write_file_if_present(csv_file)
|
|
172
|
+
zip_path = write_file_if_present(zip_file)
|
|
173
|
+
|
|
174
|
+
return unless csv_path || zip_path
|
|
175
|
+
|
|
176
|
+
# Determine import_file_path: prefer CSV, fallback to ZIP
|
|
177
|
+
@importer.parser_fields['import_file_path'] = csv_path || zip_path
|
|
178
|
+
@importer.parser_fields['attachments_zip_path'] = zip_path if zip_path && csv_path
|
|
179
|
+
|
|
180
|
+
@importer.save
|
|
181
|
+
rescue StandardError => e
|
|
182
|
+
Rails.logger.error("Bulkrax::ImporterFileHandler#write_files failed: #{e.message}")
|
|
183
|
+
raise
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
def write_file_if_present(file)
|
|
187
|
+
return nil unless file
|
|
188
|
+
|
|
189
|
+
if file.respond_to?(:original_filename)
|
|
190
|
+
@importer.parser.write_import_file(file)
|
|
191
|
+
else
|
|
192
|
+
dest_path = File.join(@importer.parser.path_for_import, File.basename(file.path))
|
|
193
|
+
FileUtils.cp(file.path, dest_path)
|
|
194
|
+
dest_path
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
def close_file_handles(files)
|
|
199
|
+
return unless files.is_a?(Array)
|
|
200
|
+
files.each { |f| f.close if f.respond_to?(:close) }
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
def import_via_file_path?
|
|
204
|
+
import_file_path.present?
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
def import_file_path
|
|
208
|
+
@file_path ||= params[:importer]&.[](:parser_fields)&.[](:import_file_path)
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
# rubocop:enable Metrics/ModuleLength
|
|
212
|
+
end
|