bulkrax 9.3.4 → 9.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +11 -1
- data/app/assets/javascripts/bulkrax/application.js +2 -1
- data/app/assets/javascripts/bulkrax/bulkrax.js +13 -4
- data/app/assets/javascripts/bulkrax/bulkrax_utils.js +96 -0
- data/app/assets/javascripts/bulkrax/datatables.js +1 -0
- data/app/assets/javascripts/bulkrax/entries.js +17 -10
- data/app/assets/javascripts/bulkrax/importers.js.erb +9 -2
- data/app/assets/javascripts/bulkrax/importers_stepper.js +2420 -0
- data/app/assets/stylesheets/bulkrax/application.css +1 -1
- data/app/assets/stylesheets/bulkrax/import_export.scss +9 -2
- data/app/assets/stylesheets/bulkrax/stepper/_header.scss +83 -0
- data/app/assets/stylesheets/bulkrax/stepper/_mixins.scss +26 -0
- data/app/assets/stylesheets/bulkrax/stepper/_navigation.scss +103 -0
- data/app/assets/stylesheets/bulkrax/stepper/_responsive.scss +46 -0
- data/app/assets/stylesheets/bulkrax/stepper/_review.scss +92 -0
- data/app/assets/stylesheets/bulkrax/stepper/_settings.scss +106 -0
- data/app/assets/stylesheets/bulkrax/stepper/_success.scss +26 -0
- data/app/assets/stylesheets/bulkrax/stepper/_summary.scss +171 -0
- data/app/assets/stylesheets/bulkrax/stepper/_upload.scss +339 -0
- data/app/assets/stylesheets/bulkrax/stepper/_validation.scss +237 -0
- data/app/assets/stylesheets/bulkrax/stepper/_variables.scss +46 -0
- data/app/assets/stylesheets/bulkrax/stepper.scss +32 -0
- data/app/controllers/bulkrax/guided_imports_controller.rb +175 -0
- data/app/controllers/bulkrax/importers_controller.rb +34 -28
- data/app/controllers/concerns/bulkrax/guided_import_demo_scenarios.rb +201 -0
- data/app/controllers/concerns/bulkrax/importer_file_handler.rb +217 -0
- data/app/factories/bulkrax/object_factory.rb +3 -2
- data/app/factories/bulkrax/valkyrie_object_factory.rb +61 -17
- data/app/jobs/bulkrax/export_work_job.rb +1 -3
- data/app/jobs/bulkrax/importer_job.rb +11 -4
- data/app/models/bulkrax/csv_entry.rb +27 -7
- data/app/models/bulkrax/entry.rb +4 -0
- data/app/models/bulkrax/importer.rb +31 -1
- data/app/models/concerns/bulkrax/has_matchers.rb +2 -2
- data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +6 -5
- data/app/parsers/bulkrax/application_parser.rb +31 -5
- data/app/parsers/bulkrax/csv_parser.rb +42 -10
- data/app/parsers/concerns/bulkrax/csv_parser/csv_template_generation.rb +73 -0
- data/app/parsers/concerns/bulkrax/csv_parser/csv_validation.rb +133 -0
- data/app/parsers/concerns/bulkrax/csv_parser/csv_validation_helpers.rb +282 -0
- data/app/parsers/concerns/bulkrax/csv_parser/csv_validation_hierarchy.rb +96 -0
- data/app/services/bulkrax/csv_template/column_builder.rb +60 -0
- data/app/services/bulkrax/csv_template/column_descriptor.rb +58 -0
- data/app/services/bulkrax/csv_template/csv_builder.rb +83 -0
- data/app/services/bulkrax/csv_template/explanation_builder.rb +57 -0
- data/app/services/bulkrax/csv_template/field_analyzer.rb +56 -0
- data/app/services/bulkrax/csv_template/file_path_generator.rb +47 -0
- data/app/services/bulkrax/csv_template/file_validator.rb +68 -0
- data/app/services/bulkrax/csv_template/mapping_manager.rb +55 -0
- data/app/services/bulkrax/csv_template/model_loader.rb +50 -0
- data/app/services/bulkrax/csv_template/row_builder.rb +35 -0
- data/app/services/bulkrax/csv_template/schema_analyzer.rb +70 -0
- data/app/services/bulkrax/csv_template/split_formatter.rb +44 -0
- data/app/services/bulkrax/csv_template/value_determiner.rb +68 -0
- data/app/services/bulkrax/stepper_response_formatter.rb +347 -0
- data/app/services/bulkrax/validation_error_csv_builder.rb +99 -0
- data/app/validators/bulkrax/csv_row/child_reference.rb +56 -0
- data/app/validators/bulkrax/csv_row/circular_reference.rb +71 -0
- data/app/validators/bulkrax/csv_row/controlled_vocabulary.rb +74 -0
- data/app/validators/bulkrax/csv_row/duplicate_identifier.rb +63 -0
- data/app/validators/bulkrax/csv_row/missing_source_identifier.rb +31 -0
- data/app/validators/bulkrax/csv_row/parent_reference.rb +59 -0
- data/app/validators/bulkrax/csv_row/required_values.rb +64 -0
- data/app/views/bulkrax/entries/_parsed_metadata.html.erb +1 -1
- data/app/views/bulkrax/entries/_raw_metadata.html.erb +1 -1
- data/app/views/bulkrax/entries/show.html.erb +6 -6
- data/app/views/bulkrax/exporters/_form.html.erb +19 -43
- data/app/views/bulkrax/exporters/edit.html.erb +2 -2
- data/app/views/bulkrax/exporters/index.html.erb +5 -5
- data/app/views/bulkrax/exporters/new.html.erb +3 -5
- data/app/views/bulkrax/exporters/show.html.erb +3 -3
- data/app/views/bulkrax/guided_imports/new.html.erb +567 -0
- data/app/views/bulkrax/importers/_bagit_fields.html.erb +9 -9
- data/app/views/bulkrax/importers/_browse_everything.html.erb +1 -1
- data/app/views/bulkrax/importers/_csv_fields.html.erb +11 -11
- data/app/views/bulkrax/importers/_edit_form_buttons.html.erb +23 -23
- data/app/views/bulkrax/importers/_edit_item_buttons.html.erb +2 -2
- data/app/views/bulkrax/importers/_file_uploader.html.erb +3 -3
- data/app/views/bulkrax/importers/_form.html.erb +4 -5
- data/app/views/bulkrax/importers/_oai_fields.html.erb +8 -18
- data/app/views/bulkrax/importers/_xml_fields.html.erb +13 -13
- data/app/views/bulkrax/importers/edit.html.erb +2 -2
- data/app/views/bulkrax/importers/index.html.erb +19 -14
- data/app/views/bulkrax/importers/new.html.erb +10 -9
- data/app/views/bulkrax/importers/show.html.erb +23 -7
- data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +6 -6
- data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +11 -11
- data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +3 -3
- data/config/i18n-tasks.yml +195 -0
- data/config/locales/bulkrax.de.yml +504 -0
- data/config/locales/bulkrax.en.yml +487 -28
- data/config/locales/bulkrax.es.yml +504 -0
- data/config/locales/bulkrax.fr.yml +504 -0
- data/config/locales/bulkrax.it.yml +504 -0
- data/config/locales/bulkrax.pt-BR.yml +504 -0
- data/config/locales/bulkrax.zh.yml +503 -0
- data/config/routes.rb +10 -0
- data/lib/bulkrax/data/demo_scenarios.json +2235 -0
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +31 -3
- data/lib/tasks/bulkrax_tasks.rake +0 -102
- metadata +55 -3
- /data/{app/services → lib}/wings/custom_queries/find_by_source_identifier.rb +0 -0
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Bulkrax
|
|
4
|
+
# rubocop:disable Metrics/ModuleLength
|
|
5
|
+
module ImporterFileHandler
|
|
6
|
+
extend ActiveSupport::Concern
|
|
7
|
+
|
|
8
|
+
private
|
|
9
|
+
|
|
10
|
+
# Resolves files for validation from either a server-side file path, pre-uploaded Hyrax files, or direct upload params
|
|
11
|
+
# @return [Array<(Array<File>, nil)>] on success, a tuple of [files, nil]
|
|
12
|
+
# @return [Array<(nil, Hash)>] on error, a tuple of [nil, error_response]
|
|
13
|
+
def resolve_validation_files
|
|
14
|
+
if import_via_file_path?
|
|
15
|
+
return [nil, StepperResponseFormatter.error(message: I18n.t('bulkrax.importer.guided_import.validation.file_path_not_exist'))] unless File.exist?(import_file_path)
|
|
16
|
+
|
|
17
|
+
[[File.open(import_file_path)], nil]
|
|
18
|
+
elsif params[:uploaded_files].present?
|
|
19
|
+
resolve_hyrax_uploaded_files
|
|
20
|
+
else
|
|
21
|
+
files = params[:importer]&.[](:parser_fields)&.[](:files) || []
|
|
22
|
+
files = [files] unless files.is_a?(Array)
|
|
23
|
+
[files.compact, nil]
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Loads files from Hyrax::UploadedFile IDs (used by chunked upload flow).
|
|
28
|
+
# Scoped to current_user to prevent accessing another user's uploads.
|
|
29
|
+
def resolve_hyrax_uploaded_files
|
|
30
|
+
uploads = uploaded_files_scope
|
|
31
|
+
return [nil, StepperResponseFormatter.error(message: 'No uploaded files found for the given IDs')] if uploads.empty?
|
|
32
|
+
|
|
33
|
+
files = uploads.filter_map do |u|
|
|
34
|
+
path = u.file&.path
|
|
35
|
+
next nil unless path && File.exist?(path)
|
|
36
|
+
File.open(path)
|
|
37
|
+
end
|
|
38
|
+
[files, nil]
|
|
39
|
+
rescue StandardError => e
|
|
40
|
+
Rails.logger.error("Bulkrax: error loading Hyrax uploaded files: #{e.class}: #{e.message}")
|
|
41
|
+
Rails.logger.debug { e.full_message }
|
|
42
|
+
[nil, StepperResponseFormatter.error(message: 'Failed to load uploaded files')]
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def uploaded_files_scope
|
|
46
|
+
return [] unless defined?(::Hyrax)
|
|
47
|
+
|
|
48
|
+
base = Hyrax::UploadedFile.where(id: params[:uploaded_files])
|
|
49
|
+
if respond_to?(:current_user) && current_user.present?
|
|
50
|
+
base.where(user_id: current_user.id)
|
|
51
|
+
else
|
|
52
|
+
base.none
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def resolve_create_files
|
|
57
|
+
if params[:uploaded_files].present?
|
|
58
|
+
uploads = uploaded_files_scope
|
|
59
|
+
uploads.filter_map do |u|
|
|
60
|
+
path = u.file&.path
|
|
61
|
+
next nil unless path && File.exist?(path)
|
|
62
|
+
File.open(path)
|
|
63
|
+
end
|
|
64
|
+
else
|
|
65
|
+
extract_uploaded_files
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def extract_uploaded_files
|
|
70
|
+
files_param = params[:importer]&.[](:parser_fields)&.[](:files)
|
|
71
|
+
return [] if files_param.blank?
|
|
72
|
+
|
|
73
|
+
files_param.is_a?(Array) ? files_param.compact : [files_param].compact
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Scans the given files for a CSV and a ZIP by file extension
|
|
77
|
+
# @param files [Array<File, ActionDispatch::Http::UploadedFile>] the resolved files to search
|
|
78
|
+
# @return [Array<(File, nil), (nil, File), (File, File), (nil, nil)>] a tuple of [csv_file, zip_file]; either may be nil
|
|
79
|
+
def select_csv_and_zip(files)
|
|
80
|
+
csv_file = files.find { |f| filename_for(f)&.end_with?('.csv') }
|
|
81
|
+
zip_file = files.find { |f| filename_for(f)&.end_with?('.zip') }
|
|
82
|
+
[csv_file, zip_file]
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Returns a filename from any file-like object (ActionDispatch upload, File, or Tempfile)
|
|
86
|
+
def filename_for(file)
|
|
87
|
+
if file.respond_to?(:original_filename)
|
|
88
|
+
file.original_filename
|
|
89
|
+
elsif file.respond_to?(:path)
|
|
90
|
+
file.path
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Opens a ZIP and extracts the CSV content into a StringIO while the archive is open
|
|
95
|
+
# @param zip_file [File] the ZIP file to search
|
|
96
|
+
# @return [Array<(StringIO, nil)>] on success, a tuple of [csv_file, nil]
|
|
97
|
+
# @return [Array<(nil, Hash)>] on error, a tuple of [nil, error_response]
|
|
98
|
+
def extract_csv_from_zip(zip_file)
|
|
99
|
+
csv_file = nil
|
|
100
|
+
error = nil
|
|
101
|
+
Zip::File.open(zip_file.path) do |zip|
|
|
102
|
+
result = locate_csv_entry_in_zip(zip)
|
|
103
|
+
if result.is_a?(Hash) && result[:messages]
|
|
104
|
+
error = result
|
|
105
|
+
elsif result
|
|
106
|
+
csv_file = StringIO.new(result.get_input_stream.read)
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
[csv_file, error]
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Finds a CSV entry in a ZIP by traversing directory levels, preferring the shallowest
|
|
113
|
+
# @param zip [Zip::File] the open ZIP archive to search
|
|
114
|
+
# @return [Zip::Entry] the CSV entry on success
|
|
115
|
+
# @return [Hash] an error response hash if no CSV is found or multiple CSVs are ambiguous
|
|
116
|
+
def locate_csv_entry_in_zip(zip)
|
|
117
|
+
csv_entries = group_entries_by_directory_level(zip)
|
|
118
|
+
|
|
119
|
+
return StepperResponseFormatter.error(message: I18n.t('bulkrax.importer.guided_import.validation.no_csv_in_zip')) if csv_entries.empty?
|
|
120
|
+
|
|
121
|
+
csv_by_depth = get_directory_depth_for_each_csv(csv_entries)
|
|
122
|
+
csvs_at_level = determine_csvs_at_shallowest_level(csv_by_depth)
|
|
123
|
+
|
|
124
|
+
csvs_by_directory = csvs_at_level.group_by { |entry| File.dirname(entry.name) }
|
|
125
|
+
csvs_by_directory.each do |_dir, csvs|
|
|
126
|
+
return StepperResponseFormatter.error(message: I18n.t('bulkrax.importer.guided_import.validation.multiple_csv_same_dir')) if csvs.count > 1
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
return StepperResponseFormatter.error(message: I18n.t('bulkrax.importer.guided_import.validation.multiple_csv_same_level')) if csvs_at_level.size > 1
|
|
130
|
+
|
|
131
|
+
csvs_at_level.first
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
def group_entries_by_directory_level(zip)
|
|
135
|
+
zip.select { |entry| entry.name.end_with?('.csv') && !entry.directory? }
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def get_directory_depth_for_each_csv(entries)
|
|
139
|
+
entries.group_by { |entry| entry.name.count('/') }
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def determine_csvs_at_shallowest_level(csv_by_depth)
|
|
143
|
+
shallowest_depth = csv_by_depth.keys.min
|
|
144
|
+
csv_by_depth[shallowest_depth]
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
# Persists uploaded file(s) and/or cloud files onto the importer record.
|
|
148
|
+
# @param file [ActionDispatch::Http::UploadedFile, nil] a directly uploaded file
|
|
149
|
+
# @param cloud_files [Hash, nil] cloud file paths from browse-everything
|
|
150
|
+
# @param uploads [ActiveRecord::Relation, Array, nil] Hyrax::UploadedFile records
|
|
151
|
+
def files_for_import(file, cloud_files, uploads)
|
|
152
|
+
return if file.blank? && cloud_files.blank? && uploads.blank?
|
|
153
|
+
|
|
154
|
+
@importer[:parser_fields]['import_file_path'] = @importer.parser.write_import_file(file) if file.present?
|
|
155
|
+
if cloud_files.present?
|
|
156
|
+
@importer[:parser_fields]['cloud_file_paths'] = cloud_files
|
|
157
|
+
# For BagIt, there will only be one bag, so we get the file_path back and set import_file_path
|
|
158
|
+
# For CSV, we expect only file uploads, so we won't get the file_path back
|
|
159
|
+
# and we expect the import_file_path to be set already
|
|
160
|
+
target = @importer.parser.retrieve_cloud_files(cloud_files, @importer)
|
|
161
|
+
@importer[:parser_fields]['import_file_path'] = target if target.present?
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
if uploads.present?
|
|
165
|
+
uploads.each do |upload|
|
|
166
|
+
@importer[:parser_fields]['import_file_path'] = @importer.parser.write_import_file(upload.file.file)
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
@importer.save
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def write_files(files)
|
|
174
|
+
csv_file, zip_file = select_csv_and_zip(files)
|
|
175
|
+
|
|
176
|
+
csv_path = write_file_if_present(csv_file)
|
|
177
|
+
zip_path = write_file_if_present(zip_file)
|
|
178
|
+
|
|
179
|
+
return unless csv_path || zip_path
|
|
180
|
+
|
|
181
|
+
# Determine import_file_path: prefer CSV, fallback to ZIP
|
|
182
|
+
@importer.parser_fields['import_file_path'] = csv_path || zip_path
|
|
183
|
+
@importer.parser_fields['attachments_zip_path'] = zip_path if zip_path && csv_path
|
|
184
|
+
|
|
185
|
+
@importer.save
|
|
186
|
+
rescue StandardError => e
|
|
187
|
+
Rails.logger.error("Bulkrax::ImporterFileHandler#write_files failed: #{e.message}")
|
|
188
|
+
raise
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
def write_file_if_present(file)
|
|
192
|
+
return nil unless file
|
|
193
|
+
|
|
194
|
+
if file.respond_to?(:original_filename)
|
|
195
|
+
@importer.parser.write_import_file(file)
|
|
196
|
+
else
|
|
197
|
+
dest_path = File.join(@importer.parser.path_for_import, File.basename(file.path))
|
|
198
|
+
FileUtils.cp(file.path, dest_path)
|
|
199
|
+
dest_path
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
def close_file_handles(files)
|
|
204
|
+
return unless files.is_a?(Array)
|
|
205
|
+
files.each { |f| f.close if f.respond_to?(:close) }
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
def import_via_file_path?
|
|
209
|
+
import_file_path.present?
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
def import_file_path
|
|
213
|
+
@file_path ||= params[:importer]&.[](:parser_fields)&.[](:import_file_path)
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
# rubocop:enable Metrics/ModuleLength
|
|
217
|
+
end
|
|
@@ -70,14 +70,15 @@ module Bulkrax
|
|
|
70
70
|
properties.reject { |prop| Bulkrax.reserved_properties.include?(prop) }
|
|
71
71
|
end
|
|
72
72
|
|
|
73
|
-
|
|
73
|
+
# Unused admin set included to support flexible contexts in the Valkyrie version
|
|
74
|
+
def self.field_multi_value?(field:, model:, admin_set_id: nil) # rubocop:disable Lint/UnusedMethodArgument
|
|
74
75
|
return false unless field_supported?(field: field, model: model)
|
|
75
76
|
return false unless model.singleton_methods.include?(:properties)
|
|
76
77
|
|
|
77
78
|
model&.properties&.[](field)&.[]("multiple")
|
|
78
79
|
end
|
|
79
80
|
|
|
80
|
-
def self.field_supported?(field:, model:)
|
|
81
|
+
def self.field_supported?(field:, model:, admin_set_id: nil) # rubocop:disable Lint/UnusedMethodArgument
|
|
81
82
|
model.method_defined?(field) && model.properties[field].present?
|
|
82
83
|
end
|
|
83
84
|
|
|
@@ -120,11 +120,11 @@ module Bulkrax
|
|
|
120
120
|
save!(resource: resource, user: user)
|
|
121
121
|
end
|
|
122
122
|
|
|
123
|
-
def self.field_multi_value?(field:, model:)
|
|
124
|
-
return false unless field_supported?(field: field, model: model)
|
|
123
|
+
def self.field_multi_value?(field:, model:, admin_set_id: nil)
|
|
124
|
+
return false unless field_supported?(field: field, model: model, admin_set_id: admin_set_id)
|
|
125
125
|
|
|
126
126
|
if model.respond_to?(:schema)
|
|
127
|
-
schema = model
|
|
127
|
+
schema = cached_schema_for(klass: model, admin_set_id: admin_set_id)
|
|
128
128
|
dry_type = schema.key(field.to_sym)
|
|
129
129
|
return true if dry_type.respond_to?(:primitive) && dry_type.primitive == Array
|
|
130
130
|
|
|
@@ -134,9 +134,9 @@ module Bulkrax
|
|
|
134
134
|
end
|
|
135
135
|
end
|
|
136
136
|
|
|
137
|
-
def self.field_supported?(field:, model:)
|
|
137
|
+
def self.field_supported?(field:, model:, admin_set_id: nil)
|
|
138
138
|
if model.respond_to?(:schema)
|
|
139
|
-
schema_properties(model).include?(field)
|
|
139
|
+
schema_properties(klass: model, admin_set_id: admin_set_id).include?(field)
|
|
140
140
|
else
|
|
141
141
|
# We *might* have a Fedora object, so we need to consider that approach as
|
|
142
142
|
# well.
|
|
@@ -272,17 +272,34 @@ module Bulkrax
|
|
|
272
272
|
# rubocop:enable Metrics/ParameterLists
|
|
273
273
|
|
|
274
274
|
##
|
|
275
|
-
# Retrieve
|
|
276
|
-
#
|
|
275
|
+
# Retrieve schema property names for a model, respecting admin set contexts
|
|
276
|
+
# when using flexible metadata. Delegates context resolution to Hyrax so
|
|
277
|
+
# Bulkrax does not need to know about HYRAX_FLEXIBLE or contexts.
|
|
278
|
+
#
|
|
279
|
+
# @param klass [Class] the model class
|
|
280
|
+
# @param admin_set_id [String, nil] admin set used to resolve contexts
|
|
277
281
|
# @return [Array<String>]
|
|
278
|
-
def self.schema_properties(klass)
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
klass_key = klass.name
|
|
282
|
-
schema = klass.new.singleton_class.schema || klass.schema
|
|
283
|
-
@schema_properties_map[klass_key] = schema.map { |k| k.name.to_s } unless @schema_properties_map.key?(klass_key)
|
|
282
|
+
def self.schema_properties(klass:, admin_set_id: nil)
|
|
283
|
+
cached_schema_for(klass: klass, admin_set_id: admin_set_id).map { |k| k.name.to_s }
|
|
284
|
+
end
|
|
284
285
|
|
|
285
|
-
|
|
286
|
+
##
|
|
287
|
+
# Returns the schema for a model, memoized per (klass, admin_set_id) pair.
|
|
288
|
+
# Delegates to +Hyrax.schema_for+ when available so that context-gated
|
|
289
|
+
# properties are included without Bulkrax knowing about flexibility internals.
|
|
290
|
+
#
|
|
291
|
+
# @param klass [Class]
|
|
292
|
+
# @param admin_set_id [String, nil]
|
|
293
|
+
# @return [Dry::Types::Hash]
|
|
294
|
+
def self.cached_schema_for(klass:, admin_set_id: nil)
|
|
295
|
+
@cached_schema_map ||= {}
|
|
296
|
+
key = [klass.name, admin_set_id].compact.join('|')
|
|
297
|
+
@cached_schema_map[key] ||=
|
|
298
|
+
if admin_set_id.present? && defined?(Hyrax) && Hyrax.respond_to?(:schema_for)
|
|
299
|
+
Hyrax.schema_for(klass: klass, admin_set_id: admin_set_id)
|
|
300
|
+
else
|
|
301
|
+
klass.new.singleton_class.schema || klass.schema
|
|
302
|
+
end
|
|
286
303
|
end
|
|
287
304
|
|
|
288
305
|
def self.ordered_file_sets_for(object)
|
|
@@ -457,7 +474,9 @@ module Bulkrax
|
|
|
457
474
|
# TODO What do we return when the calculated form fails?
|
|
458
475
|
# @raise [StandardError] when there was a failure calling the translation.
|
|
459
476
|
def perform_transaction_for(object:, attrs:)
|
|
460
|
-
|
|
477
|
+
admin_set_id = attrs[:admin_set_id] || attrs['admin_set_id'] ||
|
|
478
|
+
attributes[:admin_set_id] || attributes['admin_set_id']
|
|
479
|
+
form = Hyrax::Forms::ResourceForm.for(resource: object, admin_set_id: admin_set_id).prepopulate!
|
|
461
480
|
|
|
462
481
|
# TODO: Handle validations
|
|
463
482
|
form.validate(attrs)
|
|
@@ -474,13 +493,15 @@ module Bulkrax
|
|
|
474
493
|
end
|
|
475
494
|
|
|
476
495
|
##
|
|
477
|
-
# We accept attributes based on the model schema
|
|
496
|
+
# We accept attributes based on the model schema. Passes the admin set ID
|
|
497
|
+
# so that context-restricted properties are included in the permitted list.
|
|
478
498
|
#
|
|
479
499
|
# @return [Array<Symbols>]
|
|
480
500
|
def permitted_attributes
|
|
481
501
|
@permitted_attributes ||= (
|
|
482
502
|
base_permitted_attributes + if klass.respond_to?(:schema)
|
|
483
|
-
|
|
503
|
+
admin_set_id = attributes[:admin_set_id] || attributes['admin_set_id']
|
|
504
|
+
Bulkrax::ValkyrieObjectFactory.schema_properties(klass: klass, admin_set_id: admin_set_id)
|
|
484
505
|
else
|
|
485
506
|
klass.properties.keys.map(&:to_sym)
|
|
486
507
|
end
|
|
@@ -590,6 +611,29 @@ module Bulkrax
|
|
|
590
611
|
.symbolize_keys
|
|
591
612
|
|
|
592
613
|
attrs[:title] = [] if attrs[:title].blank?
|
|
614
|
+
attrs = convert_based_near_to_attributes(attrs)
|
|
615
|
+
attrs
|
|
616
|
+
end
|
|
617
|
+
|
|
618
|
+
# Hyrax's ResourceForm strips the plain `based_near` key during validation
|
|
619
|
+
# (BasedNearFieldBehavior#deserialize calls params.except('based_near')).
|
|
620
|
+
# Values must be passed as `based_near_attributes` — a numbered hash of
|
|
621
|
+
# { "0" => { "id" => uri, "_destroy" => "false" } } — so the populator
|
|
622
|
+
# can set them. Hyrax accepts any valid URI; note that only GeoNames URIs
|
|
623
|
+
# will resolve to a display label via LocationService.
|
|
624
|
+
def convert_based_near_to_attributes(attrs)
|
|
625
|
+
values = Array.wrap(attrs.delete(:based_near)).reject(&:blank?)
|
|
626
|
+
return attrs if values.empty?
|
|
627
|
+
|
|
628
|
+
invalid = values.reject { |v| v.to_s.match?(::URI::DEFAULT_PARSER.make_regexp) }
|
|
629
|
+
if invalid.any?
|
|
630
|
+
raise ::StandardError, "Invalid value(s) for location (based_near): #{invalid.join(', ')}. " \
|
|
631
|
+
"Values must be valid URIs (e.g. http://sws.geonames.org/5128581/)."
|
|
632
|
+
end
|
|
633
|
+
|
|
634
|
+
attrs[:based_near_attributes] = values.each_with_index.to_h do |uri, i|
|
|
635
|
+
[i.to_s, { "id" => uri.to_s, "_destroy" => "false" }]
|
|
636
|
+
end
|
|
593
637
|
attrs
|
|
594
638
|
end
|
|
595
639
|
end
|
|
@@ -18,12 +18,10 @@ module Bulkrax
|
|
|
18
18
|
else
|
|
19
19
|
if entry.failed?
|
|
20
20
|
ExporterRun.increment_counter(:failed_records, args[1])
|
|
21
|
-
ExporterRun.decrement_counter(:enqueued_records, args[1]) unless exporter_run.reload.enqueued_records <= 0
|
|
22
|
-
raise entry.reload.current_status.error_class.constantize
|
|
23
21
|
else
|
|
24
22
|
ExporterRun.increment_counter(:processed_records, args[1])
|
|
25
|
-
ExporterRun.decrement_counter(:enqueued_records, args[1]) unless exporter_run.reload.enqueued_records <= 0
|
|
26
23
|
end
|
|
24
|
+
ExporterRun.decrement_counter(:enqueued_records, args[1]) unless exporter_run.reload.enqueued_records <= 0
|
|
27
25
|
# rubocop:enable Rails/SkipsModelValidations
|
|
28
26
|
end
|
|
29
27
|
return entry if exporter_run.reload.enqueued_records.positive?
|
|
@@ -27,10 +27,17 @@ module Bulkrax
|
|
|
27
27
|
end
|
|
28
28
|
|
|
29
29
|
def unzip_imported_file(parser)
|
|
30
|
-
return unless parser.file?
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
30
|
+
return unless parser.file?
|
|
31
|
+
if parser.zip?
|
|
32
|
+
# we have a zip file, and we need to unzip it before we can import the files
|
|
33
|
+
parser.unzip(parser.parser_fields['import_file_path'])
|
|
34
|
+
parser.remove_spaces_from_filenames
|
|
35
|
+
elsif parser.zip_file?(parser.parser_fields['attachments_zip_path'])
|
|
36
|
+
# we have a separate csv and zip file. We need to unzip the zip file, and move the csv file to the unzip location before we can import the files
|
|
37
|
+
parser.unzip(parser.parser_fields['attachments_zip_path'])
|
|
38
|
+
parser.copy_file(parser.parser_fields['import_file_path'])
|
|
39
|
+
parser.remove_spaces_from_filenames
|
|
40
|
+
end
|
|
34
41
|
end
|
|
35
42
|
|
|
36
43
|
def update_current_run_counters(importer)
|
|
@@ -45,7 +45,12 @@ module Bulkrax
|
|
|
45
45
|
encoding: 'utf-8'
|
|
46
46
|
}.merge(csv_read_data_options)
|
|
47
47
|
|
|
48
|
-
results =
|
|
48
|
+
results = if path.respond_to?(:read)
|
|
49
|
+
path.rewind if path.respond_to?(:rewind)
|
|
50
|
+
CSV.parse(path.read, **options)
|
|
51
|
+
else
|
|
52
|
+
CSV.read(path, **options)
|
|
53
|
+
end
|
|
49
54
|
csv_wrapper_class.new(results)
|
|
50
55
|
end
|
|
51
56
|
|
|
@@ -83,9 +88,10 @@ module Bulkrax
|
|
|
83
88
|
# model has to be separated so that it doesn't get mistranslated by to_h
|
|
84
89
|
raw_data = data.to_h
|
|
85
90
|
raw_data[:model] = data[:model] if data[:model].present?
|
|
86
|
-
# If the
|
|
87
|
-
#
|
|
88
|
-
raw_data[:parents] = raw_data[
|
|
91
|
+
# If the parents/children field mapping uses a custom column name, alias it to the standard key
|
|
92
|
+
# so downstream code can find it regardless of what the CSV column is named.
|
|
93
|
+
raw_data[:parents] = raw_data[parser.related_parents_raw_mapping.to_sym] if parser.related_parents_raw_mapping.present? && raw_data.key?(parser.related_parents_raw_mapping.to_sym) && parser.related_parents_raw_mapping != 'parents'
|
|
94
|
+
raw_data[:children] = raw_data[parser.related_children_raw_mapping.to_sym] if parser.related_children_raw_mapping.present? && raw_data.key?(parser.related_children_raw_mapping.to_sym) && parser.related_children_raw_mapping != 'children'
|
|
89
95
|
return raw_data
|
|
90
96
|
end
|
|
91
97
|
|
|
@@ -416,18 +422,32 @@ module Bulkrax
|
|
|
416
422
|
self.collection_ids
|
|
417
423
|
end
|
|
418
424
|
|
|
419
|
-
# If only filename is given, construct the path (/files/my_file)
|
|
425
|
+
# If only filename is given, construct the path (/files/my_file).
|
|
426
|
+
# If file contains a path separator (e.g. attachments/cat_scan.jpg), resolve relative to the CSV's directory.
|
|
420
427
|
def path_to_file(file)
|
|
421
|
-
# return if we already have the full file path
|
|
422
428
|
return file if File.exist?(file)
|
|
429
|
+
|
|
430
|
+
# Relative path: resolve from CSV's directory (allows arbitrary subdirectory names, not just "files")
|
|
431
|
+
return resolve_relative_file_path(file) if file.include?('/')
|
|
432
|
+
|
|
433
|
+
# Bare filename: use legacy files/ directory for backward compatibility and round-tripping
|
|
423
434
|
path = importerexporter.parser.path_to_files
|
|
435
|
+
raise "Could not determine path to files directory. Ensure the import package contains a zip or a valid import_file_path." if path.nil?
|
|
436
|
+
|
|
424
437
|
f = File.join(path, file)
|
|
425
438
|
return f if File.exist?(f)
|
|
426
|
-
raise "File #{f}
|
|
439
|
+
raise "File not found: #{f}. Check the file column in your CSV and ensure the file exists in the import package or path_to_files directory."
|
|
427
440
|
end
|
|
428
441
|
|
|
429
442
|
private
|
|
430
443
|
|
|
444
|
+
def resolve_relative_file_path(file)
|
|
445
|
+
base = File.dirname(importerexporter.parser.import_file_path)
|
|
446
|
+
candidate = File.join(base, file)
|
|
447
|
+
return candidate if File.exist?(candidate)
|
|
448
|
+
raise "File not found: #{candidate}. Check the file path in your CSV and ensure the file exists in the import package or directory."
|
|
449
|
+
end
|
|
450
|
+
|
|
431
451
|
def map_file_sets(file_sets)
|
|
432
452
|
# rubocop:disable Rails/Presence
|
|
433
453
|
file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact
|
data/app/models/bulkrax/entry.rb
CHANGED
|
@@ -169,6 +169,30 @@ module Bulkrax
|
|
|
169
169
|
import_file_path if original_file?
|
|
170
170
|
end
|
|
171
171
|
|
|
172
|
+
# Returns all available original files (CSV and ZIP if present)
|
|
173
|
+
# @return [Array<Hash>] Array of hashes with :path and :name keys
|
|
174
|
+
def original_files
|
|
175
|
+
files = []
|
|
176
|
+
|
|
177
|
+
if import_file_path && File.exist?(import_file_path)
|
|
178
|
+
files << {
|
|
179
|
+
path: import_file_path,
|
|
180
|
+
name: File.basename(import_file_path),
|
|
181
|
+
type: :csv
|
|
182
|
+
}
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
if parser_fields['attachments_zip_path'] && File.exist?(parser_fields['attachments_zip_path'])
|
|
186
|
+
files << {
|
|
187
|
+
path: parser_fields['attachments_zip_path'],
|
|
188
|
+
name: File.basename(parser_fields['attachments_zip_path']),
|
|
189
|
+
type: :zip
|
|
190
|
+
}
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
files
|
|
194
|
+
end
|
|
195
|
+
|
|
172
196
|
def replace_files
|
|
173
197
|
self.parser_fields['replace_files']
|
|
174
198
|
end
|
|
@@ -241,8 +265,14 @@ module Bulkrax
|
|
|
241
265
|
# [['Single Metadata File for all works', 'single'], ['Multiple Files, one per Work', 'multi']]
|
|
242
266
|
# end
|
|
243
267
|
|
|
244
|
-
# If the import data is zipped, unzip it to this path
|
|
245
268
|
def importer_unzip_path(mkdir: false)
|
|
269
|
+
entry = parser_fields&.[]('import_file_path')
|
|
270
|
+
if entry.is_a?(String) && entry.end_with?('.zip') && File.file?(entry) && parser_fields["file_style"] != I18n.t('bulkrax.importer.xml.file_style.server_path')
|
|
271
|
+
unzip_dir = File.dirname(entry)
|
|
272
|
+
FileUtils.mkdir_p(unzip_dir) if mkdir
|
|
273
|
+
return unzip_dir
|
|
274
|
+
end
|
|
275
|
+
|
|
246
276
|
@importer_unzip_path ||= File.join(parser.base_path, "import_#{path_string}")
|
|
247
277
|
return @importer_unzip_path if Dir.exist?(@importer_unzip_path) || mkdir == true
|
|
248
278
|
|
|
@@ -131,7 +131,7 @@ module Bulkrax
|
|
|
131
131
|
return false if excluded?(field)
|
|
132
132
|
return true if supported_bulkrax_fields.include?(field)
|
|
133
133
|
|
|
134
|
-
Bulkrax.object_factory.field_supported?(field: field, model: factory_class)
|
|
134
|
+
Bulkrax.object_factory.field_supported?(field: field, model: factory_class, admin_set_id: importerexporter.try(:admin_set_id))
|
|
135
135
|
end
|
|
136
136
|
|
|
137
137
|
def supported_bulkrax_fields
|
|
@@ -145,7 +145,7 @@ module Bulkrax
|
|
|
145
145
|
return true if fields_that_are_always_singular.include?(field.to_s)
|
|
146
146
|
return false if fields_that_are_always_multiple.include?(field.to_s)
|
|
147
147
|
|
|
148
|
-
Bulkrax.object_factory.field_multi_value?(field: field, model: factory_class)
|
|
148
|
+
Bulkrax.object_factory.field_multi_value?(field: field, model: factory_class, admin_set_id: importerexporter.try(:admin_set_id))
|
|
149
149
|
end
|
|
150
150
|
|
|
151
151
|
def fields_that_are_always_multiple
|
|
@@ -51,15 +51,16 @@ module Bulkrax
|
|
|
51
51
|
# Is this a zip file?
|
|
52
52
|
def zip?
|
|
53
53
|
filename = parser_fields&.[]('import_file_path')
|
|
54
|
-
return false unless filename
|
|
55
|
-
|
|
54
|
+
return false unless filename && File.file?(filename)
|
|
55
|
+
zip_file?(filename)
|
|
56
|
+
end
|
|
56
57
|
|
|
57
|
-
|
|
58
|
+
def zip_file?(filename)
|
|
59
|
+
return false unless filename && File.file?(filename)
|
|
58
60
|
File.open(filename) do |file|
|
|
59
61
|
mime_type = ::Marcel::MimeType.for(name: file)
|
|
60
|
-
|
|
62
|
+
mime_type.include?('application/zip') || mime_type.include?('application/gzip')
|
|
61
63
|
end
|
|
62
|
-
returning_value
|
|
63
64
|
end
|
|
64
65
|
end
|
|
65
66
|
end
|
|
@@ -12,7 +12,7 @@ module Bulkrax
|
|
|
12
12
|
:seen, :increment_counters, :parser_fields, :user, :keys_without_numbers,
|
|
13
13
|
:key_without_numbers, :status, :set_status_info, :status_info, :status_at,
|
|
14
14
|
:exporter_export_path, :exporter_export_zip_path, :importer_unzip_path, :validate_only,
|
|
15
|
-
:zip?, :file?, :remove_and_rerun,
|
|
15
|
+
:zip?, :file?, :remove_and_rerun, :zip_file?,
|
|
16
16
|
to: :importerexporter
|
|
17
17
|
|
|
18
18
|
# @todo Convert to `class_attribute :parser_fiels, default: {}`
|
|
@@ -434,14 +434,40 @@ module Bulkrax
|
|
|
434
434
|
return untar(file_to_unzip) if file_to_unzip.end_with?('.tar.gz')
|
|
435
435
|
|
|
436
436
|
Zip::File.open(file_to_unzip) do |zip_file|
|
|
437
|
+
real_entries = zip_file.reject { |e| macos_junk_entry?(e.name) }
|
|
438
|
+
top_level_dirs = real_entries.map { |e| e.name.split('/').first }.uniq
|
|
439
|
+
strip_prefix = top_level_dirs.size == 1 ? "#{top_level_dirs.first}/" : nil
|
|
440
|
+
|
|
441
|
+
dest_dir = importer_unzip_path(mkdir: true)
|
|
437
442
|
zip_file.each do |entry|
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
443
|
+
next unless entry.file?
|
|
444
|
+
next if macos_junk_entry?(entry.name)
|
|
445
|
+
name = strip_prefix ? entry.name.delete_prefix(strip_prefix) : entry.name
|
|
446
|
+
next if name.empty?
|
|
447
|
+
dest_path = File.join(dest_dir, name)
|
|
448
|
+
FileUtils.mkdir_p(File.dirname(dest_path))
|
|
449
|
+
unless File.exist?(dest_path)
|
|
450
|
+
# rubyzip 2.x: extract(entry, absolute_dest_path)
|
|
451
|
+
# rubyzip 3.x: extract(entry, relative_name, destination_directory: dir)
|
|
452
|
+
if zip_file.method(:extract).arity == 2
|
|
453
|
+
zip_file.extract(entry, dest_path)
|
|
454
|
+
else
|
|
455
|
+
zip_file.extract(entry, name, destination_directory: dest_dir)
|
|
456
|
+
end
|
|
457
|
+
end
|
|
441
458
|
end
|
|
442
459
|
end
|
|
443
460
|
end
|
|
444
461
|
|
|
462
|
+
def macos_junk_entry?(name)
|
|
463
|
+
name.start_with?('__MACOSX/') || name.split('/').any? { |part| part == '.DS_Store' || part.start_with?('._') }
|
|
464
|
+
end
|
|
465
|
+
|
|
466
|
+
def copy_file(file_to_copy)
|
|
467
|
+
destination = File.join(importer_unzip_path(mkdir: true), File.basename(file_to_copy))
|
|
468
|
+
FileUtils.cp(file_to_copy, destination)
|
|
469
|
+
end
|
|
470
|
+
|
|
445
471
|
def untar(file_to_untar)
|
|
446
472
|
Dir.mkdir(importer_unzip_path(mkdir: true)) unless File.directory?(importer_unzip_path(mkdir: true))
|
|
447
473
|
command = "tar -xzf #{Shellwords.escape(file_to_untar)} -C #{Shellwords.escape(importer_unzip_path)}"
|
|
@@ -452,7 +478,7 @@ module Bulkrax
|
|
|
452
478
|
# File names referenced in CSVs have spaces replaced with underscores
|
|
453
479
|
# @see Bulkrax::CsvParser#file_paths
|
|
454
480
|
def remove_spaces_from_filenames
|
|
455
|
-
files = Dir.glob(File.join(importer_unzip_path, 'files', '*'))
|
|
481
|
+
files = Dir.glob(File.join(importer_unzip_path, 'files', '*')).uniq
|
|
456
482
|
files_with_spaces = files.select { |f| f.split('/').last.match?(' ') }
|
|
457
483
|
return if files_with_spaces.blank?
|
|
458
484
|
|