cdm_migrator 3.2.0 → 3.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/MIT-LICENSE +20 -20
  3. data/README.md +41 -41
  4. data/Rakefile +38 -38
  5. data/app/assets/config/cdm_migrator_manifest.js +2 -2
  6. data/app/assets/javascripts/cdm_migrator/application.js +13 -13
  7. data/app/assets/stylesheets/cdm_migrator/application.css +15 -15
  8. data/app/assets/stylesheets/cdm_migrator/csv_checker.css +36 -36
  9. data/app/controllers/cdm_migrator/application_controller.rb +10 -10
  10. data/app/controllers/cdm_migrator/cdm_controller.rb +216 -213
  11. data/app/controllers/cdm_migrator/csv_controller.rb +428 -371
  12. data/app/helpers/cdm_migrator/application_helper.rb +4 -4
  13. data/app/jobs/cdm_migrator/application_job.rb +4 -4
  14. data/app/jobs/cdm_migrator/batch_create_files_job.rb +32 -20
  15. data/app/jobs/cdm_migrator/batch_create_files_with_ordered_members_job.rb +45 -0
  16. data/app/jobs/cdm_migrator/batch_create_works_job.rb +20 -14
  17. data/app/jobs/cdm_migrator/cdm_ingest_files_job.rb +35 -35
  18. data/app/jobs/cdm_migrator/create_work_job.rb +36 -25
  19. data/app/jobs/cdm_migrator/restart_upload_from_middle_job.rb +36 -0
  20. data/app/jobs/cdm_migrator/update_object_job.rb +10 -10
  21. data/app/mailers/cdm_migrator/application_mailer.rb +6 -6
  22. data/app/models/cdm_migrator/application_record.rb +5 -5
  23. data/app/models/cdm_migrator/batch_ingest.rb +33 -33
  24. data/app/models/cdm_migrator/ingest_work.rb +7 -16
  25. data/app/views/cdm_migrator/cdm/collection.html.erb +11 -11
  26. data/app/views/cdm_migrator/cdm/mappings.html.erb +54 -53
  27. data/app/views/cdm_migrator/csv/_batches_list.html.erb +4 -4
  28. data/app/views/cdm_migrator/csv/_default_group.html.erb +17 -17
  29. data/app/views/cdm_migrator/csv/_error_list.html.erb +21 -0
  30. data/app/views/cdm_migrator/csv/_list_batches.html.erb +21 -21
  31. data/app/views/cdm_migrator/csv/_tabs.html.erb +8 -8
  32. data/app/views/cdm_migrator/csv/csv_checker.html.erb +45 -45
  33. data/app/views/cdm_migrator/csv/edit.html.erb +17 -18
  34. data/app/views/cdm_migrator/csv/index.html.erb +19 -19
  35. data/app/views/cdm_migrator/csv/upload.html.erb +18 -18
  36. data/app/views/layouts/cdm_migrator/application.html.erb +14 -14
  37. data/config/routes.rb +19 -19
  38. data/db/migrate/20191211193859_create_batch_ingests.rb +19 -19
  39. data/db/migrate/20191212192315_create_ingest_works.rb +18 -18
  40. data/lib/cdm_migrator/engine.rb +29 -29
  41. data/lib/cdm_migrator/version.rb +3 -3
  42. data/lib/cdm_migrator.rb +5 -5
  43. data/lib/generators/cdm_migrator/install/install_generator.rb +79 -79
  44. data/lib/generators/cdm_migrator/install/templates/config/cdm_migrator.yml +53 -50
  45. data/lib/generators/cdm_migrator/install/templates/presenters/hyku/menu_presenter.rb +47 -47
  46. data/lib/generators/cdm_migrator/install/templates/presenters/hyrax/menu_presenter.rb +66 -66
  47. data/lib/generators/cdm_migrator/install/templates/sidebar/_tasks.html.erb +55 -55
  48. data/lib/tasks/cdm_migrator_tasks.rake +4 -4
  49. metadata +6 -6
  50. data/app/views/cdm_migrator/csv/_path_list.html.erb +0 -19
  51. data/app/views/cdm_migrator/csv/_results_pagination.html.erb +0 -9
@@ -1,371 +1,428 @@
1
- module CdmMigrator
2
- class CsvController < ApplicationController
3
- helper_method :default_page_title, :admin_host?, :available_translations, :available_works
4
- include ActionView::Helpers::UrlHelper
5
- layout 'hyrax/dashboard' if Hyrax
6
- before_action :authenticate, except: :index
7
- before_action :load_config, only: :csv_checker
8
-
9
- def csv_checker
10
- if params[:file]
11
- check_csv params[:file].path
12
- if @error_list.blank?
13
- flash[:notice] = "All data are valid."
14
- else
15
- flash[:error] = "The CSV Checker found some errors in the CSV. Please correct them and check again."
16
- end
17
- end
18
- end
19
-
20
- def index
21
- if current_page?(main_app.csv_my_batches_path(locale: nil))
22
- @batches = BatchIngest.where(user_id: current_user.id).reverse_order
23
- elsif current_page?(main_app.csv_all_batches_path(locale: nil))
24
- @batches = BatchIngest.all.reverse_order
25
- else
26
- @batches = []
27
- end
28
- end
29
-
30
- def upload
31
- @admin_sets = AdminSet.all.map { |as| [as.title.first, as.id] }
32
- @collections = Collection.all.map { |col| [col.title.first, col.id] }
33
- end
34
-
35
- def create
36
- dir = Rails.root.join('public', 'uploads', 'csvs')
37
- FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
38
- time = DateTime.now.strftime('%s')
39
- filename = params[:csv_import][:csv_file].original_filename.gsub('.csv', "#{time}.csv")
40
- csv = dir.join(filename).to_s
41
- File.open(csv, 'wb') do |file|
42
- file.write(params[:csv_import][:csv_file].read)
43
- end
44
- check_csv csv
45
- if @error_list.present?
46
- flash[:error] = "Cdm Migrator found some problems with the CSV. Use the CSV Checker for more details."
47
- end
48
- parse_csv(csv, params[:csv_import][:mvs])
49
-
50
- ingest = BatchIngest.new({
51
- data: @works,
52
- size: @works.length,
53
- csv: csv,
54
- admin_set_id: params[:admin_set],
55
- collection_id: params[:collection],
56
- user_id: current_user.id,
57
- message: @path_list.blank? ? nil : @path_list.to_s.gsub("\"", "&quot;")
58
- })
59
- if ingest.save! && @path_list.blank?
60
- BatchCreateWorksJob.perform_later(ingest, current_user)
61
- flash[:notice] = "csv successfully uploaded, check this page to see the status while the batch is running"
62
- redirect_to csv_my_batches_path
63
- else
64
- flash[:error] ||= "csv could not be parsed, please check and re-upload"
65
- redirect_to csv_upload_path
66
- end
67
- end
68
-
69
- def rerun
70
- ingest = BatchIngest.find(params[:id]).deep_dup
71
- ingest.save
72
- BatchCreateWorksJob.perform_later(ingest, current_user)
73
- flash[:notice] = "csv successfully uploaded, check this page to see the status while the batch is running"
74
- redirect_to csv_my_batches_path
75
- end
76
-
77
- def generate
78
- headers = %w(type url)
79
- skip = %w(id head tail depositor date_uploaded date_modified import_url thumbnail_id embargo_id lease_id access_control_id representative_id)
80
- GenericWork.new.attributes.each do |key, val|
81
- headers << "work_#{key}" unless skip.include? key
82
- end
83
- FileSet.new.attributes.each do |key, val|
84
- headers << "file_#{key}" unless skip.include? key
85
- end
86
- fname = "template_#{DateTime.now.to_i}"
87
- render plain: CSV.generate { |csv| csv << headers }, content_type: 'text/csv'
88
- end
89
-
90
- def edit
91
- @collections = ::Collection.all.map { |c| [c.title.first, c.id] }
92
- end
93
-
94
- def update
95
- mvs = params[:csv_update][:mvs]
96
- csv = CSV.parse(params[:csv_update][:csv_file].read, headers: true, encoding: 'utf-8').map(&:to_hash)
97
- csv.each do |row|
98
- obj = ActiveFedora::Base.find row['id']
99
- type = row.first.last
100
- if type.nil?
101
- next
102
- elsif type.include? "Work"
103
- metadata = create_data(row.except('id', 'type'), work_form(type), obj, mvs)
104
- elsif type.include? "File"
105
- metadata = create_data(row.except('id', 'type'), work_form(type), obj, mvs)
106
- end
107
- unless metadata.nil?
108
- obj.attributes = metadata
109
- obj.save
110
- end
111
- end
112
- flash[:notice] = "csv successfully uploaded"
113
- redirect_to csv_edit_path
114
- end
115
-
116
- def export
117
- solr = RSolr.connect url: Account.find_by(tenant: Apartment::Tenant.current).solr_endpoint.url
118
- response = solr.get 'select', params: {
119
- q: "member_of_collection_ids_ssim:#{params[:collection_id]}",
120
- fl: "id"
121
- }
122
- unless response['response']['docs'].empty? || response['response']['docs'][0].empty?
123
- work_ids = response['response']['docs'].map { |doc| doc['id'] }
124
- end
125
- #works = ::ActiveFedora::Base.where member_of_collection_ids_ssim: params[:collection_id]
126
- @csv_headers = ['type'] + work_fields
127
- @csv_array = [@csv_headers.join(',')]
128
- work_ids.each do |work_id|
129
- doc = ::SolrDocument.find work_id
130
- add_line doc
131
- doc._source[:file_set_ids_ssim].each do |file_id|
132
- file_doc = ::SolrDocument.find file_id
133
- add_line file_doc
134
- end
135
- end
136
-
137
- send_data @csv_array.join("\n"),
138
- :type => 'text/csv; charset=iso-8859-5; header=present',
139
- :disposition => "attachment; filename=export.csv"
140
- end
141
-
142
- private
143
-
144
- def authenticate
145
- authorize! :create, available_works.first
146
- end
147
-
148
- def add_line doc
149
- line_hash = {}
150
- line_hash['type'] = doc._source[:has_model_ssim].first
151
- work_fields.each do |field|
152
- line_hash[field] = create_cell doc, field
153
- end
154
- @csv_array << line_hash.values_at(*@csv_headers).map { |cell| cell = '' if cell.nil?; "\"#{cell.gsub("\"", "\"\"")}\"" }.join(',')
155
-
156
- end
157
-
158
- def work_fields
159
- @fields ||= available_works.map { |work| work.new.attributes.keys }.flatten.uniq - excluded_fields
160
- end
161
-
162
- def excluded_fields
163
- %w[date_uploaded date_modified head tail state proxy_depositor on_behalf_of arkivo_checksum label
164
- relative_path import_url part_of resource_type access_control_id
165
- representative_id thumbnail_id rendering_ids admin_set_id embargo_id
166
- lease_id]
167
- end
168
-
169
- def create_cell w, field
170
- if field.include? 'date'
171
- if w._source[field+'_tesim'].is_a?(Array)
172
- w._source[field+'_tesim'].join('|')
173
- else
174
- w._source[field+'_tesim']
175
- end
176
- elsif w.respond_to?(field.to_sym)
177
- if w.send(field).is_a?(Array)
178
- w.send(field).join('|')
179
- else
180
- w.send(field)
181
- end
182
- end
183
- end
184
-
185
- def available_works
186
- @available_works ||= Hyrax::QuickClassificationQuery.new(current_user).authorized_models
187
- end
188
-
189
- def parse_csv csv, mvs
190
- csv = CSV.parse(File.read(csv), headers: true, encoding: 'utf-8').map(&:to_hash)
191
- @works = []
192
- csv.each do |row|
193
- type = row.first.last
194
- if type.nil?
195
- next
196
- elsif type.include? "Work"
197
- metadata = create_data(row, work_form(type), Object.const_get(type).new, mvs)
198
- @works << {type: type, metadata: metadata, files: []}
199
- elsif type.include? "File"
200
- metadata = create_data(row, file_form, FileSet.new, mvs)
201
- @works.last[:files] << {url: row.delete('url'), title: row.delete('title'), metadata: metadata}
202
- end
203
- end
204
- end
205
-
206
- def load_config
207
- tenant = Account.find_by(tenant: Apartment::Tenant.current).cname
208
- if CdmMigrator::Engine.config['tenant_settings'].has_key?(tenant)
209
- settings = CdmMigrator::Engine.config['tenant_settings'][tenant]['csv_checker']
210
- if settings.present?
211
- # .map will throw an error if settings[key] has no value
212
- @edtf_fields = settings['edtf_fields'].map(&:to_sym) if settings['edtf_fields']
213
- @uri_fields = settings['valid_uri_fields'].map(&:to_sym) if settings['valid_uri_fields']
214
- @separator = settings['multi_value_separator']
215
- @separator_fields = settings['separator_fields'].map(&:to_sym) if settings['separator_fields']
216
- @path_to_drive = settings['path_to_drive']
217
- else
218
- raise "Cdm Migrator couldn't find any configured settings. Are they in cdm_migrator.yml?"
219
- end
220
- else
221
- raise "Cdm Migrator couldn't find this tenant. Is it configured?"
222
- end
223
- end
224
-
225
- def check_csv csv_file
226
- row_number = 1
227
- @error_list = {}
228
- check_mounted_drive if @path_to_drive.present?
229
-
230
- CSV.foreach(csv_file, headers: true, header_converters: :symbol) do |row|
231
- row_number +=1 # Tells user what CSV row the error is on
232
- if row[:object_type].include? "Work"
233
- check_edtf(row_number, row) if @edtf_fields.present?
234
- check_uris(row_number, row) if @uri_fields.present?
235
- if params[:multi_value_separator].present? and @separator_fields.present?
236
- check_separator(row_number, row, params[:multi_value_separator])
237
- else
238
- alert_message = "No multi-value separator character was selected or no fields were configured. CSV Checker didn't check for valid separators."
239
- if flash[:alert] and flash[:alert].exclude?(alert_message) # Only add this message once, rather than per line
240
- flash[:alert] << alert_message
241
- elsif flash[:alert].blank?
242
- flash[:alert] = Array.wrap(alert_message)
243
- end
244
- end
245
- elsif row[:object_type] == "File"
246
- check_file_path(row_number, row[:url])
247
- else
248
- @error_list[row_number] = { "object_type" => "No or unknown object type. Please give a valid type (e.g. GenericWork, File)." }
249
- end
250
- @error_list.delete_if { |key, value| value.blank? } # Data are valid, no need to print the row
251
- end
252
- end
253
-
254
- def check_mounted_drive
255
- drive_address = @path_to_drive
256
- unless Dir.exist?(drive_address) and !Dir[drive_address].empty?
257
- flash[:alert] = "CSV Checker can't find the mounted drive to check file paths, so some paths may be mislabelled as incorrect. Please contact the administrator or try again later."
258
- end
259
- end
260
-
261
- def check_file_path(row_number, file_path)
262
- if file_path.nil?
263
- @error_list[row_number] = { "url" => "url is blank." }
264
- elsif File.file?(file_path.gsub("file://", "")) == false
265
- @error_list[row_number] = { "url" => "No file found at #{file_path}" }
266
- end
267
- end
268
-
269
- def check_edtf(row_number, row)
270
- edtf_fields = @edtf_fields
271
- edtf_errors = edtf_fields.each_with_object({}) do |field, hash|
272
- if Date.edtf(row[field]) == nil and row[field] != "unknown"
273
- hash[field.to_s] = "Blank or not a valid EDTF date."
274
- end
275
- end
276
- @error_list[row_number] = edtf_errors
277
- end
278
-
279
- # <Example: should be http://rightsstatements.org/vocab/etc. NOT https://rightsstatements.org/page/etc.
280
- def check_uris(row_number, row)
281
- uri_fields = @uri_fields
282
- uri_errors = uri_fields.each_with_object({}) do |field, hash|
283
- if row[field].include? "page"
284
- hash[field.to_s] = "Links to page instead of URI. (e.g. https://rightsstatements.org/page/etc. instead of http://rightsstatements.org/vocab/etc.)"
285
- end
286
- end
287
- @error_list[row_number].merge!(uri_errors)
288
- end
289
-
290
- # Check multi-value separators
291
- def check_separator(row_number, row, character)
292
- uri_fields = @separator_fields
293
- separator_errors = uri_fields.each_with_object({}) do |field, hash|
294
- value = row[field]
295
- if value.present?
296
- URI.extract(value).each { |uri| value.gsub!(uri, '') }
297
- unless value.split("").all?(character) # Check if remaining characters are the correct separator
298
- hash[field.to_s] = "May contain the wrong multi-value separator (i.e. not #{character})."
299
- end
300
- end
301
- end
302
- @error_list[row_number].merge!(separator_errors)
303
- end
304
-
305
- def default_page_title
306
- 'CSV Batch Uploader'
307
- end
308
-
309
- def admin_host?
310
- false unless Settings.multitenancy.enabled
311
- end
312
-
313
- def available_translations
314
- {
315
- 'en' => 'English',
316
- 'fr' => 'French'
317
- }
318
- end
319
-
320
- def work_form(worktype = "GenericWork")
321
- Module.const_get("Hyrax::#{worktype}Form") rescue nil || Module.const_get("Hyrax::Forms::WorkForm")
322
- end
323
-
324
- def file_form
325
- Module.const_get("Hyrax::FileSetForm") rescue nil || Module.const_get("Hyrax::Forms::FileSetEditForm")
326
- end
327
-
328
- def secondary_terms form_name
329
- form_name.terms - form_name.required_fields -
330
- [:visibility_during_embargo, :embargo_release_date,
331
- :visibility_after_embargo, :visibility_during_lease,
332
- :lease_expiration_date, :visibility_after_lease, :visibility,
333
- :thumbnail_id, :representative_id, :ordered_member_ids,
334
- :collection_ids, :in_works_ids, :admin_set_id, :files, :source, :member_of_collection_ids]
335
- end
336
-
337
- def create_data data, type, object, mvs
338
- final_data = {}
339
- accepted_terms = type.required_fields + secondary_terms(type)
340
- data.each do |key, att|
341
- if (att.nil? || att.empty? || key.to_s.include?("object_type") || !accepted_terms.include?(key.to_sym))
342
- next
343
- elsif (object.send(key).nil?)
344
- final_data[key] = att
345
- else
346
- final_data[key] = att.split(mvs)
347
- end
348
- end
349
- final_data
350
- end
351
-
352
- def create_lease visibility, status_after, date
353
- lease = Hydra::AccessControls::Lease.new(visibility_during_lease: visibility,
354
- visibility_after_lease: status_after, lease_expiration_date: @lease_date)
355
- lease.save
356
- end
357
-
358
- def create_embargo visibility
359
- embargo = Hydra::AccessControls::Embargo.new
360
- embargo.visibility_during_embargo = visibility
361
- embargo.visibility_after_embargo = @status_after
362
- embargo.embargo_release_date = @embargo_date
363
- embargo.save
364
- end
365
-
366
- def log(user)
367
- Hyrax::Operation.create!(user: user,
368
- operation_type: "Attach Remote File")
369
- end
370
- end
371
- end
1
+ module CdmMigrator
2
+ class CsvController < ApplicationController
3
+ helper_method :default_page_title, :admin_host?, :available_translations, :available_works
4
+ include ActionView::Helpers::UrlHelper
5
+ layout 'hyrax/dashboard' if Hyrax
6
+ before_action :authenticate, except: :index
7
+ before_action :load_config, only: :csv_checker
8
+
9
+ def csv_checker
10
+ if params[:file]
11
+ check_csv params[:file].path
12
+ if @error_list.blank?
13
+ flash[:notice] = "All data are valid."
14
+ else
15
+ flash[:error] = "The CSV Checker found some errors in the CSV. Please correct them and check again."
16
+ end
17
+ end
18
+ end
19
+
20
+ def index
21
+ if current_page?(main_app.csv_my_batches_path(locale: nil))
22
+ @batches = BatchIngest.where(user_id: current_user.id).reverse_order
23
+ elsif current_page?(main_app.csv_all_batches_path(locale: nil))
24
+ @batches = BatchIngest.all.reverse_order
25
+ else
26
+ @batches = []
27
+ end
28
+ end
29
+
30
+ def upload
31
+ @admin_sets = AdminSet.all.map { |as| [as.title.first, as.id] }
32
+ @collections = Collection.all.map { |col| [col.title.first, col.id] }
33
+ end
34
+
35
+ def create
36
+ dir = Rails.root.join('public', 'uploads', 'csvs')
37
+ FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
38
+ time = DateTime.now.strftime('%s')
39
+ filename = params[:csv_import][:csv_file].original_filename.gsub('.csv', "#{time}.csv")
40
+ csv = dir.join(filename).to_s
41
+ File.open(csv, 'wb') do |file|
42
+ file.write(params[:csv_import][:csv_file].read)
43
+ end
44
+ check_csv csv
45
+ if @error_list.present?
46
+ flash[:error] = "Cdm Migrator found some problems with the CSV. Use the CSV Checker for more details."
47
+ end
48
+ parse_csv(csv, params[:csv_import][:mvs])
49
+
50
+ ingest = BatchIngest.new({
51
+ data: @works,
52
+ size: @works.length,
53
+ csv: csv,
54
+ admin_set_id: params[:admin_set],
55
+ collection_id: params[:collection],
56
+ user_id: current_user.id,
57
+ message: @path_list.blank? ? nil : @path_list.to_s.gsub("\"", "&quot;")
58
+ })
59
+ if ingest.save! && @path_list.blank?
60
+ BatchCreateWorksJob.perform_later(ingest, current_user)
61
+ flash[:notice] = "csv successfully uploaded, check this page to see the status while the batch is running"
62
+ redirect_to csv_my_batches_path
63
+ else
64
+ flash[:error] ||= "csv could not be parsed, please check and re-upload"
65
+ redirect_to csv_upload_path
66
+ end
67
+ end
68
+
69
+ def rerun
70
+ ingest = BatchIngest.find(params[:id]).deep_dup
71
+ ingest.save
72
+ BatchCreateWorksJob.perform_later(ingest, current_user)
73
+ flash[:notice] = "csv successfully uploaded, check this page to see the status while the batch is running"
74
+ redirect_to csv_my_batches_path
75
+ end
76
+
77
+ def generate
78
+ headers = %w(type url)
79
+ skip = %w(id head tail depositor date_uploaded date_modified import_url thumbnail_id embargo_id lease_id access_control_id representative_id)
80
+ GenericWork.new.attributes.each do |key, val|
81
+ headers << "work_#{key}" unless skip.include? key
82
+ end
83
+ FileSet.new.attributes.each do |key, val|
84
+ headers << "file_#{key}" unless skip.include? key
85
+ end
86
+ fname = "template_#{DateTime.now.to_i}"
87
+ render plain: CSV.generate { |csv| csv << headers }, content_type: 'text/csv'
88
+ end
89
+
90
+ def edit
91
+ @collections = ::Collection.all.map { |c| [c.title.first, c.id] }
92
+ end
93
+
94
+ def update
95
+ mvs = params[:csv_update][:mvs]
96
+ csv = CSV.parse(params[:csv_update][:csv_file].read.force_encoding("UTF-8"), headers: true, encoding: 'utf-8').map(&:to_hash)
97
+ csv.each do |row|
98
+ obj = ActiveFedora::Base.find row['id']
99
+ type = row.first.last
100
+ if type.nil?
101
+ next
102
+ elsif type.include? "Work"
103
+ metadata = create_data(row.except('id', 'type'), work_form(type), obj, mvs)
104
+ elsif type.include? "File"
105
+ metadata = create_data(row.except('id', 'type'), work_form(type), obj, mvs)
106
+ end
107
+ unless metadata.nil?
108
+ obj.attributes = metadata
109
+ obj.save
110
+ end
111
+ end
112
+ flash[:notice] = "csv successfully uploaded"
113
+ redirect_to csv_edit_path
114
+ end
115
+
116
+ def export
117
+ solr = RSolr.connect url: Account.find_by(tenant: Apartment::Tenant.current).solr_endpoint.url
118
+ response = solr.get 'select', params: {
119
+ q: "member_of_collection_ids_ssim:#{params[:collection_id]}",
120
+ rows: 3400,
121
+ fl: "id"
122
+ }
123
+ unless response['response']['docs'].empty? || response['response']['docs'][0].empty?
124
+ work_ids = response['response']['docs'].map { |doc| doc['id'] }
125
+ end
126
+ #works = ::ActiveFedora::Base.where member_of_collection_ids_ssim: params[:collection_id]
127
+ @csv_headers = ['type'] + work_fields
128
+ @csv_array = [@csv_headers.join(',')]
129
+ work_ids.each do |work_id|
130
+ doc = ::SolrDocument.find work_id
131
+ add_line doc
132
+ doc._source[:file_set_ids_ssim].each do |file_id|
133
+ file_doc = ::SolrDocument.find file_id
134
+ add_line file_doc
135
+ end
136
+ end
137
+
138
+ send_data @csv_array.join("\n"),
139
+ :type => 'text/csv; charset=iso-8859-5; header=present',
140
+ :disposition => "attachment; filename=export.csv"
141
+ end
142
+
143
+ private
144
+
145
+ def authenticate
146
+ authorize! :create, available_works.first
147
+ end
148
+
149
+ def add_line doc
150
+ line_hash = {}
151
+ line_hash['type'] = doc._source[:has_model_ssim].first
152
+ work_fields.each do |field|
153
+ line_hash[field] = create_cell doc, field
154
+ end
155
+ @csv_array << line_hash.values_at(*@csv_headers).map { |cell| cell = '' if cell.nil?; "\"#{cell.gsub("\"", "\"\"")}\"" }.join(',')
156
+
157
+ end
158
+
159
+ def work_fields
160
+ @fields ||= available_works.map { |work| work.new.attributes.keys }.flatten.uniq - excluded_fields
161
+ end
162
+
163
+ def excluded_fields
164
+ %w[date_uploaded date_modified head tail state proxy_depositor on_behalf_of arkivo_checksum label
165
+ relative_path import_url part_of resource_type access_control_id
166
+ representative_id thumbnail_id rendering_ids admin_set_id embargo_id
167
+ lease_id]
168
+ end
169
+
170
+ def create_cell w, field
171
+ if field.include? 'date' or field == 'chronological_coverage'
172
+ if w._source[field+'_tesim'].is_a?(Array)
173
+ w._source[field+'_tesim'].join('|')
174
+ else
175
+ w._source[field+'_tesim']
176
+ end
177
+ elsif w.respond_to?(field.to_sym)
178
+ if w.send(field).is_a?(Array)
179
+ w.send(field).join('|')
180
+ else
181
+ w.send(field)
182
+ end
183
+ end
184
+ end
185
+
186
+ def available_works
187
+ @available_works ||= Hyrax::QuickClassificationQuery.new(current_user).authorized_models
188
+ end
189
+
190
+ def parse_csv csv, mvs
191
+ csv = CSV.parse(File.read(csv), headers: true, encoding: 'utf-8').map(&:to_hash)
192
+ @works = []
193
+ csv.each do |row|
194
+ type = row.first.last
195
+ if type.nil?
196
+ next
197
+ elsif type.include? "Work"
198
+ metadata = create_data(row, work_form(type), Object.const_get(type).new, mvs)
199
+ @works << {type: type, metadata: metadata, files: []}
200
+ elsif type.include? "File"
201
+ metadata = create_data(row, file_form, FileSet.new, mvs)
202
+ @works.last[:files] << {url: row.delete('url'), title: row.delete('title'), metadata: metadata}
203
+ end
204
+ end
205
+ end
206
+
207
+ def load_config
208
+ if Settings.multitenancy.enabled
209
+ tenant = Account.find_by(tenant: Apartment::Tenant.current).cname
210
+ else
211
+ tenant = "default"
212
+ end
213
+ if CdmMigrator::Engine.config['tenant_settings'].has_key?(tenant)
214
+ settings = CdmMigrator::Engine.config['tenant_settings'][tenant]['csv_checker']
215
+ if settings.present?
216
+ @date_indexing_service = settings['date_indexing_service'].first.constantize if settings['date_indexing_service']
217
+ @date_fields = settings['date_fields'].map(&:to_sym) if settings['date_fields']
218
+ @uri_fields = settings['valid_uri_fields'].map(&:to_sym) if settings['valid_uri_fields']
219
+ @separator = settings['multi_value_separator']
220
+ @separator_fields = settings['separator_fields'].map(&:to_sym) if settings['separator_fields']
221
+ @path_to_drive = settings['path_to_drive']
222
+ # If you would like to change this to match the uploader's max file size,
223
+ # change this to Hyrax.config.uploader[:maxFileSize]
224
+ @max_file_size = settings['max_file_size']
225
+ else
226
+ raise "Cdm Migrator couldn't find any configured settings. Are they in cdm_migrator.yml?"
227
+ end
228
+ else
229
+ raise "Cdm Migrator couldn't find this tenant. Is it configured?"
230
+ end
231
+ end
232
+
233
+ def check_csv csv_file
234
+ row_number = 1
235
+ @error_list = {}
236
+ check_mounted_drive if @path_to_drive.present?
237
+
238
+ CSV.foreach(csv_file, headers: true, header_converters: :symbol) do |row|
239
+ row_number +=1 # Tells user what CSV row the error is on
240
+ if row[:object_type].include? "Work"
241
+ check_dates(row_number, row) if @date_fields.present?
242
+ check_uris(row_number, row) if @uri_fields.present?
243
+ if params[:multi_value_separator].present? and @separator_fields.present?
244
+ check_multi_val_fields(row_number, row, params[:multi_value_separator])
245
+ else
246
+ alert_message = "No multi-value separator character was selected or no fields were configured. CSV Checker didn't check for valid separators."
247
+ if flash[:alert] and flash[:alert].exclude?(alert_message) # Only add this message once, rather than per line
248
+ flash[:alert] << alert_message
249
+ elsif flash[:alert].blank?
250
+ flash[:alert] = Array.wrap(alert_message)
251
+ end
252
+ end
253
+ elsif row[:object_type] == "File"
254
+ check_file_path(row_number, row[:url])
255
+ check_transcript_length(row_number, row[:transcript]) if row[:transcript].present?
256
+ check_file_size(row_number, row[:url])
257
+ else
258
+ @error_list[row_number] = { "object_type" => "No or unknown object type. Please give a valid type (e.g. GenericWork, File)." }
259
+ end
260
+ @error_list.delete_if { |key, value| value.blank? } # Data are valid, no need to print the row
261
+ end
262
+ end
263
+
264
+ def check_transcript_length(row_number, transcript)
265
+ if transcript.is_a? String
266
+ if transcript.length > 9000
267
+ @error_list[row_number] = { "transcript" => "Transcript is too long (over 9000 characters)." }
268
+ end
269
+ elsif transcript.is_a? Array
270
+ if transcript.any? { |tr| tr.length > 9000 }
271
+ @error_list[row_number] = { "transcript" => "Transcript is too long (over 9000 characters)." }
272
+ end
273
+ end
274
+ end
275
+
276
+ def check_file_size(row_number, file_path)
277
+ if file_path.present? && File.file?(file_path) && @max_file_size
278
+ if File.size(file_path.gsub("file://", "")) > @max_file_size
279
+ @error_list[row_number] = { "file size" => "The file at #{file_path} is too large to be uploaded. Please compress the file or split it into parts.
280
+ Each part should be under #{helpers.number_to_human_size(@max_file_size)}." }
281
+ end
282
+ end
283
+ end
284
+
285
+ def check_mounted_drive
286
+ drive_address = @path_to_drive
287
+ unless Dir.exist?(drive_address) and !Dir[drive_address].empty?
288
+ flash[:alert] = "CSV Checker can't find the mounted drive to check file paths, so some paths may be mislabelled as incorrect. Please contact the administrator or try again later."
289
+ end
290
+ end
291
+
292
+ def check_file_path(row_number, file_path)
293
+ if file_path.nil?
294
+ @error_list[row_number] = { "url" => "url is blank." }
295
+ elsif File.file?(file_path.gsub("file://", "")) == false
296
+ @error_list[row_number] = { "url" => "No file found at #{file_path}" }
297
+ end
298
+ end
299
+
300
+ def check_dates(row_number, row)
301
+ date_fields = @date_fields
302
+ unless @date_indexing_service
303
+ flash[:alert] = "No date indexing service was configured so CSV Checker didn't validate dates."
304
+ return
305
+ end
306
+ edtf_errors = date_fields.each_with_object({}) do |field, hash|
307
+ next unless row[field]
308
+ begin
309
+ @date_indexing_service.new(row[field])
310
+ rescue *@date_indexing_service.error_classes => error
311
+ hash[field.to_s] = "#{error.message}"
312
+ end
313
+ end
314
+ @error_list[row_number] = edtf_errors
315
+ end
316
+
317
+ # <Example: should be http://rightsstatements.org/vocab/etc. NOT https://rightsstatements.org/page/etc.
318
+ def check_uris(row_number, row)
319
+ uri_fields = @uri_fields
320
+ uri_errors = uri_fields.each_with_object({}) do |field, hash|
321
+ if row[field] and row[field].include? "page"
322
+ hash[field.to_s] = "Links to page instead of URI. (e.g. https://rightsstatements.org/page/etc. instead of http://rightsstatements.org/vocab/etc.)"
323
+ end
324
+ end
325
+ if @error_list.any?
326
+ @error_list[row_number].merge!(uri_errors)
327
+ else
328
+ @error_list[row_number] = uri_errors
329
+ end
330
+ end
331
+
332
+ # Check multi-value separators
333
+ def check_multi_val_fields(row_number, row, character)
334
+ uri_fields = @separator_fields
335
+ separator_errors = uri_fields.each_with_object({}) do |field, hash|
336
+ if value = row[field]
337
+ # Check for leading or trailing spaces
338
+ if value.match %r{ #{Regexp.escape(character)}|#{Regexp.escape(character)} }
339
+ hash[field.to_s] = "Contains leading or trailing whitespace around multi-value separator."
340
+ end
341
+ values = value.split(character).map(&:strip)
342
+ values.each do |val|
343
+ if val.match(URI.regexp) # Val should be URI
344
+ remainder = val.gsub(val.match(URI.regexp)[0],'')
345
+ unless remainder.blank?
346
+ hash[field.to_s] = "May contain the wrong multi-value separator or a typo in the URI."
347
+ end
348
+ else # Or val should be string
349
+ invalid_chars = ["\\"]
350
+ # Make exceptions for backslashes that are part of whitespace characters
351
+ # by deleting them before checking for stray \s
352
+ if val.delete("\t\r\n\s\n").match Regexp.union(invalid_chars)
353
+ hash[field.to_s] = "May contain an invalid character such as #{invalid_chars.to_sentence(last_word_connector: ", or ")}."
354
+ end
355
+ end
356
+ end
357
+ end
358
+ end
359
+ @error_list[row_number].merge!(separator_errors)
360
+ end
361
+
362
+ def default_page_title
363
+ 'CSV Batch Uploader'
364
+ end
365
+
366
+ def admin_host?
367
+ false unless Settings.multitenancy.enabled
368
+ end
369
+
370
+ def available_translations
371
+ {
372
+ 'en' => 'English',
373
+ 'fr' => 'French'
374
+ }
375
+ end
376
+
377
+ def work_form(worktype = "GenericWork")
378
+ Module.const_get("Hyrax::#{worktype}Form") rescue nil || Module.const_get("Hyrax::Forms::WorkForm")
379
+ end
380
+
381
+ def file_form
382
+ Module.const_get("Hyrax::FileSetForm") rescue nil || Module.const_get("Hyrax::Forms::FileSetEditForm")
383
+ end
384
+
385
+ def secondary_terms form_name
386
+ form_name.terms - form_name.required_fields -
387
+ [:visibility_during_embargo, :embargo_release_date,
388
+ :visibility_after_embargo, :visibility_during_lease,
389
+ :lease_expiration_date, :visibility_after_lease, :visibility,
390
+ :thumbnail_id, :representative_id, :ordered_member_ids,
391
+ :collection_ids, :in_works_ids, :admin_set_id, :files, :source, :member_of_collection_ids]
392
+ end
393
+
394
+ def create_data data, type, object, mvs
395
+ final_data = {}
396
+ accepted_terms = type.required_fields + secondary_terms(type)
397
+ data.each do |key, att|
398
+ if (att.nil? || att.empty? || key.to_s.include?("object_type") || !accepted_terms.include?(key.to_sym))
399
+ next
400
+ elsif object.send(key).nil?
401
+ final_data[key] = att
402
+ else
403
+ final_data[key] = att.split(mvs)
404
+ end
405
+ end
406
+ final_data
407
+ end
408
+
409
+ def create_lease visibility, status_after, date
410
+ lease = Hydra::AccessControls::Lease.new(visibility_during_lease: visibility,
411
+ visibility_after_lease: status_after, lease_expiration_date: @lease_date)
412
+ lease.save
413
+ end
414
+
415
+ def create_embargo visibility
416
+ embargo = Hydra::AccessControls::Embargo.new
417
+ embargo.visibility_during_embargo = visibility
418
+ embargo.visibility_after_embargo = @status_after
419
+ embargo.embargo_release_date = @embargo_date
420
+ embargo.save
421
+ end
422
+
423
+ def log(user)
424
+ Hyrax::Operation.create!(user: user,
425
+ operation_type: "Attach Remote File")
426
+ end
427
+ end
428
+ end