cdm_migrator 3.2.1 → 3.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/MIT-LICENSE +20 -20
  3. data/README.md +41 -41
  4. data/Rakefile +38 -38
  5. data/app/assets/config/cdm_migrator_manifest.js +2 -2
  6. data/app/assets/javascripts/cdm_migrator/application.js +13 -13
  7. data/app/assets/stylesheets/cdm_migrator/application.css +15 -15
  8. data/app/assets/stylesheets/cdm_migrator/csv_checker.css +36 -36
  9. data/app/controllers/cdm_migrator/application_controller.rb +10 -10
  10. data/app/controllers/cdm_migrator/cdm_controller.rb +216 -213
  11. data/app/controllers/cdm_migrator/csv_controller.rb +428 -408
  12. data/app/helpers/cdm_migrator/application_helper.rb +4 -4
  13. data/app/jobs/cdm_migrator/application_job.rb +4 -4
  14. data/app/jobs/cdm_migrator/batch_create_files_job.rb +32 -20
  15. data/app/jobs/cdm_migrator/batch_create_files_with_ordered_members_job.rb +45 -0
  16. data/app/jobs/cdm_migrator/batch_create_works_job.rb +20 -14
  17. data/app/jobs/cdm_migrator/cdm_ingest_files_job.rb +35 -35
  18. data/app/jobs/cdm_migrator/create_work_job.rb +36 -25
  19. data/app/jobs/cdm_migrator/restart_upload_from_middle_job.rb +36 -0
  20. data/app/jobs/cdm_migrator/update_object_job.rb +10 -10
  21. data/app/mailers/cdm_migrator/application_mailer.rb +6 -6
  22. data/app/models/cdm_migrator/application_record.rb +5 -5
  23. data/app/models/cdm_migrator/batch_ingest.rb +33 -33
  24. data/app/models/cdm_migrator/ingest_work.rb +7 -16
  25. data/app/views/cdm_migrator/cdm/collection.html.erb +11 -11
  26. data/app/views/cdm_migrator/cdm/mappings.html.erb +54 -53
  27. data/app/views/cdm_migrator/csv/_batches_list.html.erb +4 -4
  28. data/app/views/cdm_migrator/csv/_default_group.html.erb +17 -17
  29. data/app/views/cdm_migrator/csv/_error_list.html.erb +20 -20
  30. data/app/views/cdm_migrator/csv/_list_batches.html.erb +21 -21
  31. data/app/views/cdm_migrator/csv/_tabs.html.erb +8 -8
  32. data/app/views/cdm_migrator/csv/csv_checker.html.erb +45 -45
  33. data/app/views/cdm_migrator/csv/edit.html.erb +17 -18
  34. data/app/views/cdm_migrator/csv/index.html.erb +19 -19
  35. data/app/views/cdm_migrator/csv/upload.html.erb +18 -18
  36. data/app/views/layouts/cdm_migrator/application.html.erb +14 -14
  37. data/config/routes.rb +19 -19
  38. data/db/migrate/20191211193859_create_batch_ingests.rb +19 -19
  39. data/db/migrate/20191212192315_create_ingest_works.rb +18 -18
  40. data/lib/cdm_migrator/engine.rb +29 -29
  41. data/lib/cdm_migrator/version.rb +3 -3
  42. data/lib/cdm_migrator.rb +5 -5
  43. data/lib/generators/cdm_migrator/install/install_generator.rb +79 -79
  44. data/lib/generators/cdm_migrator/install/templates/config/cdm_migrator.yml +53 -50
  45. data/lib/generators/cdm_migrator/install/templates/presenters/hyku/menu_presenter.rb +47 -47
  46. data/lib/generators/cdm_migrator/install/templates/presenters/hyrax/menu_presenter.rb +66 -66
  47. data/lib/generators/cdm_migrator/install/templates/sidebar/_tasks.html.erb +55 -55
  48. data/lib/tasks/cdm_migrator_tasks.rake +4 -4
  49. metadata +7 -5
@@ -1,408 +1,428 @@
1
- module CdmMigrator
2
- class CsvController < ApplicationController
3
- helper_method :default_page_title, :admin_host?, :available_translations, :available_works
4
- include ActionView::Helpers::UrlHelper
5
- layout 'hyrax/dashboard' if Hyrax
6
- before_action :authenticate, except: :index
7
- before_action :load_config, only: :csv_checker
8
-
9
- def csv_checker
10
- if params[:file]
11
- check_csv params[:file].path
12
- if @error_list.blank?
13
- flash[:notice] = "All data are valid."
14
- else
15
- flash[:error] = "The CSV Checker found some errors in the CSV. Please correct them and check again."
16
- end
17
- end
18
- end
19
-
20
- def index
21
- if current_page?(main_app.csv_my_batches_path(locale: nil))
22
- @batches = BatchIngest.where(user_id: current_user.id).reverse_order
23
- elsif current_page?(main_app.csv_all_batches_path(locale: nil))
24
- @batches = BatchIngest.all.reverse_order
25
- else
26
- @batches = []
27
- end
28
- end
29
-
30
- def upload
31
- @admin_sets = AdminSet.all.map { |as| [as.title.first, as.id] }
32
- @collections = Collection.all.map { |col| [col.title.first, col.id] }
33
- end
34
-
35
- def create
36
- dir = Rails.root.join('public', 'uploads', 'csvs')
37
- FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
38
- time = DateTime.now.strftime('%s')
39
- filename = params[:csv_import][:csv_file].original_filename.gsub('.csv', "#{time}.csv")
40
- csv = dir.join(filename).to_s
41
- File.open(csv, 'wb') do |file|
42
- file.write(params[:csv_import][:csv_file].read)
43
- end
44
- check_csv csv
45
- if @error_list.present?
46
- flash[:error] = "Cdm Migrator found some problems with the CSV. Use the CSV Checker for more details."
47
- end
48
- parse_csv(csv, params[:csv_import][:mvs])
49
-
50
- ingest = BatchIngest.new({
51
- data: @works,
52
- size: @works.length,
53
- csv: csv,
54
- admin_set_id: params[:admin_set],
55
- collection_id: params[:collection],
56
- user_id: current_user.id,
57
- message: @path_list.blank? ? nil : @path_list.to_s.gsub("\"", "&quot;")
58
- })
59
- if ingest.save! && @path_list.blank?
60
- BatchCreateWorksJob.perform_later(ingest, current_user)
61
- flash[:notice] = "csv successfully uploaded, check this page to see the status while the batch is running"
62
- redirect_to csv_my_batches_path
63
- else
64
- flash[:error] ||= "csv could not be parsed, please check and re-upload"
65
- redirect_to csv_upload_path
66
- end
67
- end
68
-
69
- def rerun
70
- ingest = BatchIngest.find(params[:id]).deep_dup
71
- ingest.save
72
- BatchCreateWorksJob.perform_later(ingest, current_user)
73
- flash[:notice] = "csv successfully uploaded, check this page to see the status while the batch is running"
74
- redirect_to csv_my_batches_path
75
- end
76
-
77
- def generate
78
- headers = %w(type url)
79
- skip = %w(id head tail depositor date_uploaded date_modified import_url thumbnail_id embargo_id lease_id access_control_id representative_id)
80
- GenericWork.new.attributes.each do |key, val|
81
- headers << "work_#{key}" unless skip.include? key
82
- end
83
- FileSet.new.attributes.each do |key, val|
84
- headers << "file_#{key}" unless skip.include? key
85
- end
86
- fname = "template_#{DateTime.now.to_i}"
87
- render plain: CSV.generate { |csv| csv << headers }, content_type: 'text/csv'
88
- end
89
-
90
- def edit
91
- @collections = ::Collection.all.map { |c| [c.title.first, c.id] }
92
- end
93
-
94
- def update
95
- mvs = params[:csv_update][:mvs]
96
- csv = CSV.parse(params[:csv_update][:csv_file].read, headers: true, encoding: 'utf-8').map(&:to_hash)
97
- csv.each do |row|
98
- obj = ActiveFedora::Base.find row['id']
99
- type = row.first.last
100
- if type.nil?
101
- next
102
- elsif type.include? "Work"
103
- metadata = create_data(row.except('id', 'type'), work_form(type), obj, mvs)
104
- elsif type.include? "File"
105
- metadata = create_data(row.except('id', 'type'), work_form(type), obj, mvs)
106
- end
107
- unless metadata.nil?
108
- obj.attributes = metadata
109
- obj.save
110
- end
111
- end
112
- flash[:notice] = "csv successfully uploaded"
113
- redirect_to csv_edit_path
114
- end
115
-
116
- def export
117
- solr = RSolr.connect url: Account.find_by(tenant: Apartment::Tenant.current).solr_endpoint.url
118
- response = solr.get 'select', params: {
119
- q: "member_of_collection_ids_ssim:#{params[:collection_id]}",
120
- rows: 3400,
121
- fl: "id"
122
- }
123
- unless response['response']['docs'].empty? || response['response']['docs'][0].empty?
124
- work_ids = response['response']['docs'].map { |doc| doc['id'] }
125
- end
126
- #works = ::ActiveFedora::Base.where member_of_collection_ids_ssim: params[:collection_id]
127
- @csv_headers = ['type'] + work_fields
128
- @csv_array = [@csv_headers.join(',')]
129
- work_ids.each do |work_id|
130
- doc = ::SolrDocument.find work_id
131
- add_line doc
132
- doc._source[:file_set_ids_ssim].each do |file_id|
133
- file_doc = ::SolrDocument.find file_id
134
- add_line file_doc
135
- end
136
- end
137
-
138
- send_data @csv_array.join("\n"),
139
- :type => 'text/csv; charset=iso-8859-5; header=present',
140
- :disposition => "attachment; filename=export.csv"
141
- end
142
-
143
- private
144
-
145
- def authenticate
146
- authorize! :create, available_works.first
147
- end
148
-
149
- def add_line doc
150
- line_hash = {}
151
- line_hash['type'] = doc._source[:has_model_ssim].first
152
- work_fields.each do |field|
153
- line_hash[field] = create_cell doc, field
154
- end
155
- @csv_array << line_hash.values_at(*@csv_headers).map { |cell| cell = '' if cell.nil?; "\"#{cell.gsub("\"", "\"\"")}\"" }.join(',')
156
-
157
- end
158
-
159
- def work_fields
160
- @fields ||= available_works.map { |work| work.new.attributes.keys }.flatten.uniq - excluded_fields
161
- end
162
-
163
- def excluded_fields
164
- %w[date_uploaded date_modified head tail state proxy_depositor on_behalf_of arkivo_checksum label
165
- relative_path import_url part_of resource_type access_control_id
166
- representative_id thumbnail_id rendering_ids admin_set_id embargo_id
167
- lease_id]
168
- end
169
-
170
- def create_cell w, field
171
- if field.include? 'date'
172
- if w._source[field+'_tesim'].is_a?(Array)
173
- w._source[field+'_tesim'].join('|')
174
- else
175
- w._source[field+'_tesim']
176
- end
177
- elsif w.respond_to?(field.to_sym)
178
- if w.send(field).is_a?(Array)
179
- w.send(field).join('|')
180
- else
181
- w.send(field)
182
- end
183
- end
184
- end
185
-
186
- def available_works
187
- @available_works ||= Hyrax::QuickClassificationQuery.new(current_user).authorized_models
188
- end
189
-
190
- def parse_csv csv, mvs
191
- csv = CSV.parse(File.read(csv), headers: true, encoding: 'utf-8').map(&:to_hash)
192
- @works = []
193
- csv.each do |row|
194
- type = row.first.last
195
- if type.nil?
196
- next
197
- elsif type.include? "Work"
198
- metadata = create_data(row, work_form(type), Object.const_get(type).new, mvs)
199
- @works << {type: type, metadata: metadata, files: []}
200
- elsif type.include? "File"
201
- metadata = create_data(row, file_form, FileSet.new, mvs)
202
- @works.last[:files] << {url: row.delete('url'), title: row.delete('title'), metadata: metadata}
203
- end
204
- end
205
- end
206
-
207
- def load_config
208
- tenant = Account.find_by(tenant: Apartment::Tenant.current).cname
209
- if CdmMigrator::Engine.config['tenant_settings'].has_key?(tenant)
210
- settings = CdmMigrator::Engine.config['tenant_settings'][tenant]['csv_checker']
211
- if settings.present?
212
- # .map will throw an error if settings[key] has no value
213
- @edtf_fields = settings['edtf_fields'].map(&:to_sym) if settings['edtf_fields']
214
- @uri_fields = settings['valid_uri_fields'].map(&:to_sym) if settings['valid_uri_fields']
215
- @separator = settings['multi_value_separator']
216
- @separator_fields = settings['separator_fields'].map(&:to_sym) if settings['separator_fields']
217
- @path_to_drive = settings['path_to_drive']
218
- else
219
- raise "Cdm Migrator couldn't find any configured settings. Are they in cdm_migrator.yml?"
220
- end
221
- else
222
- raise "Cdm Migrator couldn't find this tenant. Is it configured?"
223
- end
224
- end
225
-
226
- def check_csv csv_file
227
- row_number = 1
228
- @error_list = {}
229
- check_mounted_drive if @path_to_drive.present?
230
-
231
- CSV.foreach(csv_file, headers: true, header_converters: :symbol) do |row|
232
- row_number +=1 # Tells user what CSV row the error is on
233
- if row[:object_type].include? "Work"
234
- check_edtf(row_number, row) if @edtf_fields.present?
235
- check_uris(row_number, row) if @uri_fields.present?
236
- if params[:multi_value_separator].present? and @separator_fields.present?
237
- check_separator(row_number, row, params[:multi_value_separator])
238
- else
239
- alert_message = "No multi-value separator character was selected or no fields were configured. CSV Checker didn't check for valid separators."
240
- if flash[:alert] and flash[:alert].exclude?(alert_message) # Only add this message once, rather than per line
241
- flash[:alert] << alert_message
242
- elsif flash[:alert].blank?
243
- flash[:alert] = Array.wrap(alert_message)
244
- end
245
- end
246
- elsif row[:object_type] == "File"
247
- check_file_path(row_number, row[:url])
248
- else
249
- @error_list[row_number] = { "object_type" => "No or unknown object type. Please give a valid type (e.g. GenericWork, File)." }
250
- end
251
- @error_list.delete_if { |key, value| value.blank? } # Data are valid, no need to print the row
252
- end
253
- end
254
-
255
- def check_mounted_drive
256
- drive_address = @path_to_drive
257
- unless Dir.exist?(drive_address) and !Dir[drive_address].empty?
258
- flash[:alert] = "CSV Checker can't find the mounted drive to check file paths, so some paths may be mislabelled as incorrect. Please contact the administrator or try again later."
259
- end
260
- end
261
-
262
- def check_file_path(row_number, file_path)
263
- if file_path.nil?
264
- @error_list[row_number] = { "url" => "url is blank." }
265
- elsif File.file?(file_path.gsub("file://", "")) == false
266
- @error_list[row_number] = { "url" => "No file found at #{file_path}" }
267
- end
268
- end
269
-
270
- def check_edtf(row_number, row)
271
- edtf_fields = @edtf_fields
272
- edtf_errors = edtf_fields.each_with_object({}) do |field, hash|
273
- temp_date = row[field]
274
- # modify date so that the interval encompasses the years on the last interval date
275
- temp_date = temp_date.gsub('/..','').gsub('%','?~').gsub(/\/$/,'')
276
- date = temp_date.include?("/") ? temp_date.gsub(/([0-9]+X+\/)([0-9]+)(X+)/){"#{$1}"+"#{$2.to_i+1}"+"#{$3}"}.gsub("X","u") : temp_date
277
- date = date.gsub("XX-","uu-").gsub("X-", "u-").gsub('XX?','uu').gsub('X?', 'u').gsub('u?','u').gsub('?','')
278
- # edtf has trouble with year-month (e.g. "19uu-12") or year-season strings (e.g. "190u-23")
279
- # that contain unspecified years, or intervals containing the above ("19uu-22/19uu-23", etc.).
280
- # So we check for/create exceptions.
281
- # Check for season interval
282
- if Date.edtf(date) == nil and date != "unknown" # Accept season intervals
283
- unless is_season?(date.split("/").first) and is_season?(date.split("/").second)
284
- # If an interval then, check each date individually
285
- if date.include?("/")
286
- dates = date.split("/")
287
- else
288
- dates = [date]
289
- end
290
- #byebug
291
- dates.each do |d|
292
- # Dates with 'u' in the last digit of the year return invalid when in format YYYY-MM
293
- # So we flub day specifity before checking again if the date is valid
294
- unless Date.edtf(d + '-01') # Date.edtf('193u-03-01') returns valid
295
- if match = d[/\d{3}u/] or match = d[/\d{2}u{2}-[2][1-4]/] # edtf can't parse single u in year (e.g. 192u) or uu in YYYY-SS (e.g. 19uu-21), so we replace it
296
- d.gsub!(match, match.gsub("u","0"))
297
- unless Date.edtf(d)
298
- hash[field.to_s] = "Blank or not a valid EDTF date."
299
- end
300
- else
301
- hash[field.to_s] = "Blank or not a valid EDTF date."
302
- end
303
- end
304
- end
305
- end
306
- end
307
-
308
- end
309
- @error_list[row_number] = edtf_errors
310
- end
311
-
312
- def is_season?(date)
313
- Date.edtf(date).class == EDTF::Season
314
- end
315
-
316
- # <Example: should be http://rightsstatements.org/vocab/etc. NOT https://rightsstatements.org/page/etc.
317
- def check_uris(row_number, row)
318
- uri_fields = @uri_fields
319
- uri_errors = uri_fields.each_with_object({}) do |field, hash|
320
- if row[field].include? "page"
321
- hash[field.to_s] = "Links to page instead of URI. (e.g. https://rightsstatements.org/page/etc. instead of http://rightsstatements.org/vocab/etc.)"
322
- end
323
- end
324
- @error_list[row_number].merge!(uri_errors)
325
- end
326
-
327
- # Check multi-value separators
328
- def check_separator(row_number, row, character)
329
- uri_fields = @separator_fields
330
- separator_errors = uri_fields.each_with_object({}) do |field, hash|
331
- value = row[field]
332
- if value.present?
333
- URI.extract(value).each { |uri| value.gsub!(uri, '') }
334
- unless value.split("").all? { |sep| sep == character } # Check if remaining characters are the correct separator
335
- hash[field.to_s] = "May contain the wrong multi-value separator (i.e. not #{character})."
336
- end
337
- end
338
- end
339
- @error_list[row_number].merge!(separator_errors)
340
- end
341
-
342
- def default_page_title
343
- 'CSV Batch Uploader'
344
- end
345
-
346
- def admin_host?
347
- false unless Settings.multitenancy.enabled
348
- end
349
-
350
- def available_translations
351
- {
352
- 'en' => 'English',
353
- 'fr' => 'French'
354
- }
355
- end
356
-
357
- def work_form(worktype = "GenericWork")
358
- Module.const_get("Hyrax::#{worktype}Form") rescue nil || Module.const_get("Hyrax::Forms::WorkForm")
359
- end
360
-
361
- def file_form
362
- Module.const_get("Hyrax::FileSetForm") rescue nil || Module.const_get("Hyrax::Forms::FileSetEditForm")
363
- end
364
-
365
- def secondary_terms form_name
366
- form_name.terms - form_name.required_fields -
367
- [:visibility_during_embargo, :embargo_release_date,
368
- :visibility_after_embargo, :visibility_during_lease,
369
- :lease_expiration_date, :visibility_after_lease, :visibility,
370
- :thumbnail_id, :representative_id, :ordered_member_ids,
371
- :collection_ids, :in_works_ids, :admin_set_id, :files, :source, :member_of_collection_ids]
372
- end
373
-
374
- def create_data data, type, object, mvs
375
- final_data = {}
376
- accepted_terms = type.required_fields + secondary_terms(type)
377
- data.each do |key, att|
378
- if (att.nil? || att.empty? || key.to_s.include?("object_type") || !accepted_terms.include?(key.to_sym))
379
- next
380
- elsif (object.send(key).nil?)
381
- final_data[key] = att
382
- else
383
- final_data[key] = att.split(mvs)
384
- end
385
- end
386
- final_data
387
- end
388
-
389
- def create_lease visibility, status_after, date
390
- lease = Hydra::AccessControls::Lease.new(visibility_during_lease: visibility,
391
- visibility_after_lease: status_after, lease_expiration_date: @lease_date)
392
- lease.save
393
- end
394
-
395
- def create_embargo visibility
396
- embargo = Hydra::AccessControls::Embargo.new
397
- embargo.visibility_during_embargo = visibility
398
- embargo.visibility_after_embargo = @status_after
399
- embargo.embargo_release_date = @embargo_date
400
- embargo.save
401
- end
402
-
403
- def log(user)
404
- Hyrax::Operation.create!(user: user,
405
- operation_type: "Attach Remote File")
406
- end
407
- end
408
- end
1
+ module CdmMigrator
2
+ class CsvController < ApplicationController
3
+ helper_method :default_page_title, :admin_host?, :available_translations, :available_works
4
+ include ActionView::Helpers::UrlHelper
5
+ layout 'hyrax/dashboard' if Hyrax
6
+ before_action :authenticate, except: :index
7
+ before_action :load_config, only: :csv_checker
8
+
9
+ def csv_checker
10
+ if params[:file]
11
+ check_csv params[:file].path
12
+ if @error_list.blank?
13
+ flash[:notice] = "All data are valid."
14
+ else
15
+ flash[:error] = "The CSV Checker found some errors in the CSV. Please correct them and check again."
16
+ end
17
+ end
18
+ end
19
+
20
+ def index
21
+ if current_page?(main_app.csv_my_batches_path(locale: nil))
22
+ @batches = BatchIngest.where(user_id: current_user.id).reverse_order
23
+ elsif current_page?(main_app.csv_all_batches_path(locale: nil))
24
+ @batches = BatchIngest.all.reverse_order
25
+ else
26
+ @batches = []
27
+ end
28
+ end
29
+
30
+ def upload
31
+ @admin_sets = AdminSet.all.map { |as| [as.title.first, as.id] }
32
+ @collections = Collection.all.map { |col| [col.title.first, col.id] }
33
+ end
34
+
35
+ def create
36
+ dir = Rails.root.join('public', 'uploads', 'csvs')
37
+ FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
38
+ time = DateTime.now.strftime('%s')
39
+ filename = params[:csv_import][:csv_file].original_filename.gsub('.csv', "#{time}.csv")
40
+ csv = dir.join(filename).to_s
41
+ File.open(csv, 'wb') do |file|
42
+ file.write(params[:csv_import][:csv_file].read)
43
+ end
44
+ check_csv csv
45
+ if @error_list.present?
46
+ flash[:error] = "Cdm Migrator found some problems with the CSV. Use the CSV Checker for more details."
47
+ end
48
+ parse_csv(csv, params[:csv_import][:mvs])
49
+
50
+ ingest = BatchIngest.new({
51
+ data: @works,
52
+ size: @works.length,
53
+ csv: csv,
54
+ admin_set_id: params[:admin_set],
55
+ collection_id: params[:collection],
56
+ user_id: current_user.id,
57
+ message: @path_list.blank? ? nil : @path_list.to_s.gsub("\"", "&quot;")
58
+ })
59
+ if ingest.save! && @path_list.blank?
60
+ BatchCreateWorksJob.perform_later(ingest, current_user)
61
+ flash[:notice] = "csv successfully uploaded, check this page to see the status while the batch is running"
62
+ redirect_to csv_my_batches_path
63
+ else
64
+ flash[:error] ||= "csv could not be parsed, please check and re-upload"
65
+ redirect_to csv_upload_path
66
+ end
67
+ end
68
+
69
+ def rerun
70
+ ingest = BatchIngest.find(params[:id]).deep_dup
71
+ ingest.save
72
+ BatchCreateWorksJob.perform_later(ingest, current_user)
73
+ flash[:notice] = "csv successfully uploaded, check this page to see the status while the batch is running"
74
+ redirect_to csv_my_batches_path
75
+ end
76
+
77
+ def generate
78
+ headers = %w(type url)
79
+ skip = %w(id head tail depositor date_uploaded date_modified import_url thumbnail_id embargo_id lease_id access_control_id representative_id)
80
+ GenericWork.new.attributes.each do |key, val|
81
+ headers << "work_#{key}" unless skip.include? key
82
+ end
83
+ FileSet.new.attributes.each do |key, val|
84
+ headers << "file_#{key}" unless skip.include? key
85
+ end
86
+ fname = "template_#{DateTime.now.to_i}"
87
+ render plain: CSV.generate { |csv| csv << headers }, content_type: 'text/csv'
88
+ end
89
+
90
+ def edit
91
+ @collections = ::Collection.all.map { |c| [c.title.first, c.id] }
92
+ end
93
+
94
+ def update
95
+ mvs = params[:csv_update][:mvs]
96
+ csv = CSV.parse(params[:csv_update][:csv_file].read.force_encoding("UTF-8"), headers: true, encoding: 'utf-8').map(&:to_hash)
97
+ csv.each do |row|
98
+ obj = ActiveFedora::Base.find row['id']
99
+ type = row.first.last
100
+ if type.nil?
101
+ next
102
+ elsif type.include? "Work"
103
+ metadata = create_data(row.except('id', 'type'), work_form(type), obj, mvs)
104
+ elsif type.include? "File"
105
+ metadata = create_data(row.except('id', 'type'), work_form(type), obj, mvs)
106
+ end
107
+ unless metadata.nil?
108
+ obj.attributes = metadata
109
+ obj.save
110
+ end
111
+ end
112
+ flash[:notice] = "csv successfully uploaded"
113
+ redirect_to csv_edit_path
114
+ end
115
+
116
+ def export
117
+ solr = RSolr.connect url: Account.find_by(tenant: Apartment::Tenant.current).solr_endpoint.url
118
+ response = solr.get 'select', params: {
119
+ q: "member_of_collection_ids_ssim:#{params[:collection_id]}",
120
+ rows: 3400,
121
+ fl: "id"
122
+ }
123
+ unless response['response']['docs'].empty? || response['response']['docs'][0].empty?
124
+ work_ids = response['response']['docs'].map { |doc| doc['id'] }
125
+ end
126
+ #works = ::ActiveFedora::Base.where member_of_collection_ids_ssim: params[:collection_id]
127
+ @csv_headers = ['type'] + work_fields
128
+ @csv_array = [@csv_headers.join(',')]
129
+ work_ids.each do |work_id|
130
+ doc = ::SolrDocument.find work_id
131
+ add_line doc
132
+ doc._source[:file_set_ids_ssim].each do |file_id|
133
+ file_doc = ::SolrDocument.find file_id
134
+ add_line file_doc
135
+ end
136
+ end
137
+
138
+ send_data @csv_array.join("\n"),
139
+ :type => 'text/csv; charset=iso-8859-5; header=present',
140
+ :disposition => "attachment; filename=export.csv"
141
+ end
142
+
143
+ private
144
+
145
+ def authenticate
146
+ authorize! :create, available_works.first
147
+ end
148
+
149
+ def add_line doc
150
+ line_hash = {}
151
+ line_hash['type'] = doc._source[:has_model_ssim].first
152
+ work_fields.each do |field|
153
+ line_hash[field] = create_cell doc, field
154
+ end
155
+ @csv_array << line_hash.values_at(*@csv_headers).map { |cell| cell = '' if cell.nil?; "\"#{cell.gsub("\"", "\"\"")}\"" }.join(',')
156
+
157
+ end
158
+
159
+ def work_fields
160
+ @fields ||= available_works.map { |work| work.new.attributes.keys }.flatten.uniq - excluded_fields
161
+ end
162
+
163
+ def excluded_fields
164
+ %w[date_uploaded date_modified head tail state proxy_depositor on_behalf_of arkivo_checksum label
165
+ relative_path import_url part_of resource_type access_control_id
166
+ representative_id thumbnail_id rendering_ids admin_set_id embargo_id
167
+ lease_id]
168
+ end
169
+
170
+ def create_cell w, field
171
+ if field.include? 'date' or field == 'chronological_coverage'
172
+ if w._source[field+'_tesim'].is_a?(Array)
173
+ w._source[field+'_tesim'].join('|')
174
+ else
175
+ w._source[field+'_tesim']
176
+ end
177
+ elsif w.respond_to?(field.to_sym)
178
+ if w.send(field).is_a?(Array)
179
+ w.send(field).join('|')
180
+ else
181
+ w.send(field)
182
+ end
183
+ end
184
+ end
185
+
186
+ def available_works
187
+ @available_works ||= Hyrax::QuickClassificationQuery.new(current_user).authorized_models
188
+ end
189
+
190
+ def parse_csv csv, mvs
191
+ csv = CSV.parse(File.read(csv), headers: true, encoding: 'utf-8').map(&:to_hash)
192
+ @works = []
193
+ csv.each do |row|
194
+ type = row.first.last
195
+ if type.nil?
196
+ next
197
+ elsif type.include? "Work"
198
+ metadata = create_data(row, work_form(type), Object.const_get(type).new, mvs)
199
+ @works << {type: type, metadata: metadata, files: []}
200
+ elsif type.include? "File"
201
+ metadata = create_data(row, file_form, FileSet.new, mvs)
202
+ @works.last[:files] << {url: row.delete('url'), title: row.delete('title'), metadata: metadata}
203
+ end
204
+ end
205
+ end
206
+
207
+ def load_config
208
+ if Settings.multitenancy.enabled
209
+ tenant = Account.find_by(tenant: Apartment::Tenant.current).cname
210
+ else
211
+ tenant = "default"
212
+ end
213
+ if CdmMigrator::Engine.config['tenant_settings'].has_key?(tenant)
214
+ settings = CdmMigrator::Engine.config['tenant_settings'][tenant]['csv_checker']
215
+ if settings.present?
216
+ @date_indexing_service = settings['date_indexing_service'].first.constantize if settings['date_indexing_service']
217
+ @date_fields = settings['date_fields'].map(&:to_sym) if settings['date_fields']
218
+ @uri_fields = settings['valid_uri_fields'].map(&:to_sym) if settings['valid_uri_fields']
219
+ @separator = settings['multi_value_separator']
220
+ @separator_fields = settings['separator_fields'].map(&:to_sym) if settings['separator_fields']
221
+ @path_to_drive = settings['path_to_drive']
222
+ # If you would like to change this to match the uploader's max file size,
223
+ # change this to Hyrax.config.uploader[:maxFileSize]
224
+ @max_file_size = settings['max_file_size']
225
+ else
226
+ raise "Cdm Migrator couldn't find any configured settings. Are they in cdm_migrator.yml?"
227
+ end
228
+ else
229
+ raise "Cdm Migrator couldn't find this tenant. Is it configured?"
230
+ end
231
+ end
232
+
233
+ def check_csv csv_file
234
+ row_number = 1
235
+ @error_list = {}
236
+ check_mounted_drive if @path_to_drive.present?
237
+
238
+ CSV.foreach(csv_file, headers: true, header_converters: :symbol) do |row|
239
+ row_number +=1 # Tells user what CSV row the error is on
240
+ if row[:object_type].include? "Work"
241
+ check_dates(row_number, row) if @date_fields.present?
242
+ check_uris(row_number, row) if @uri_fields.present?
243
+ if params[:multi_value_separator].present? and @separator_fields.present?
244
+ check_multi_val_fields(row_number, row, params[:multi_value_separator])
245
+ else
246
+ alert_message = "No multi-value separator character was selected or no fields were configured. CSV Checker didn't check for valid separators."
247
+ if flash[:alert] and flash[:alert].exclude?(alert_message) # Only add this message once, rather than per line
248
+ flash[:alert] << alert_message
249
+ elsif flash[:alert].blank?
250
+ flash[:alert] = Array.wrap(alert_message)
251
+ end
252
+ end
253
+ elsif row[:object_type] == "File"
254
+ check_file_path(row_number, row[:url])
255
+ check_transcript_length(row_number, row[:transcript]) if row[:transcript].present?
256
+ check_file_size(row_number, row[:url])
257
+ else
258
+ @error_list[row_number] = { "object_type" => "No or unknown object type. Please give a valid type (e.g. GenericWork, File)." }
259
+ end
260
+ @error_list.delete_if { |key, value| value.blank? } # Data are valid, no need to print the row
261
+ end
262
+ end
263
+
264
+ def check_transcript_length(row_number, transcript)
265
+ if transcript.is_a? String
266
+ if transcript.length > 9000
267
+ @error_list[row_number] = { "transcript" => "Transcript is too long (over 9000 characters)." }
268
+ end
269
+ elsif transcript.is_a? Array
270
+ if transcript.any? { |tr| tr.length > 9000 }
271
+ @error_list[row_number] = { "transcript" => "Transcript is too long (over 9000 characters)." }
272
+ end
273
+ end
274
+ end
275
+
276
+ def check_file_size(row_number, file_path)
277
+ if file_path.present? && File.file?(file_path) && @max_file_size
278
+ if File.size(file_path.gsub("file://", "")) > @max_file_size
279
+ @error_list[row_number] = { "file size" => "The file at #{file_path} is too large to be uploaded. Please compress the file or split it into parts.
280
+ Each part should be under #{helpers.number_to_human_size(@max_file_size)}." }
281
+ end
282
+ end
283
+ end
284
+
285
+ def check_mounted_drive
286
+ drive_address = @path_to_drive
287
+ unless Dir.exist?(drive_address) and !Dir[drive_address].empty?
288
+ flash[:alert] = "CSV Checker can't find the mounted drive to check file paths, so some paths may be mislabelled as incorrect. Please contact the administrator or try again later."
289
+ end
290
+ end
291
+
292
+ def check_file_path(row_number, file_path)
293
+ if file_path.nil?
294
+ @error_list[row_number] = { "url" => "url is blank." }
295
+ elsif File.file?(file_path.gsub("file://", "")) == false
296
+ @error_list[row_number] = { "url" => "No file found at #{file_path}" }
297
+ end
298
+ end
299
+
300
+ def check_dates(row_number, row)
301
+ date_fields = @date_fields
302
+ unless @date_indexing_service
303
+ flash[:alert] = "No date indexing service was configured so CSV Checker didn't validate dates."
304
+ return
305
+ end
306
+ edtf_errors = date_fields.each_with_object({}) do |field, hash|
307
+ next unless row[field]
308
+ begin
309
+ @date_indexing_service.new(row[field])
310
+ rescue *@date_indexing_service.error_classes => error
311
+ hash[field.to_s] = "#{error.message}"
312
+ end
313
+ end
314
+ @error_list[row_number] = edtf_errors
315
+ end
316
+
317
+ # <Example: should be http://rightsstatements.org/vocab/etc. NOT https://rightsstatements.org/page/etc.
318
+ def check_uris(row_number, row)
319
+ uri_fields = @uri_fields
320
+ uri_errors = uri_fields.each_with_object({}) do |field, hash|
321
+ if row[field] and row[field].include? "page"
322
+ hash[field.to_s] = "Links to page instead of URI. (e.g. https://rightsstatements.org/page/etc. instead of http://rightsstatements.org/vocab/etc.)"
323
+ end
324
+ end
325
+ if @error_list.any?
326
+ @error_list[row_number].merge!(uri_errors)
327
+ else
328
+ @error_list[row_number] = uri_errors
329
+ end
330
+ end
331
+
332
+ # Check multi-value separators
333
+ def check_multi_val_fields(row_number, row, character)
334
+ uri_fields = @separator_fields
335
+ separator_errors = uri_fields.each_with_object({}) do |field, hash|
336
+ if value = row[field]
337
+ # Check for leading or trailing spaces
338
+ if value.match %r{ #{Regexp.escape(character)}|#{Regexp.escape(character)} }
339
+ hash[field.to_s] = "Contains leading or trailing whitespace around multi-value separator."
340
+ end
341
+ values = value.split(character).map(&:strip)
342
+ values.each do |val|
343
+ if val.match(URI.regexp) # Val should be URI
344
+ remainder = val.gsub(val.match(URI.regexp)[0],'')
345
+ unless remainder.blank?
346
+ hash[field.to_s] = "May contain the wrong multi-value separator or a typo in the URI."
347
+ end
348
+ else # Or val should be string
349
+ invalid_chars = ["\\"]
350
+ # Make exceptions for backslashes that are part of whitespace characters
351
+ # by deleting them before checking for stray \s
352
+ if val.delete("\t\r\n\s\n").match Regexp.union(invalid_chars)
353
+ hash[field.to_s] = "May contain an invalid character such as #{invalid_chars.to_sentence(last_word_connector: ", or ")}."
354
+ end
355
+ end
356
+ end
357
+ end
358
+ end
359
+ @error_list[row_number].merge!(separator_errors)
360
+ end
361
+
362
+ def default_page_title
363
+ 'CSV Batch Uploader'
364
+ end
365
+
366
+ def admin_host?
367
+ false unless Settings.multitenancy.enabled
368
+ end
369
+
370
+ def available_translations
371
+ {
372
+ 'en' => 'English',
373
+ 'fr' => 'French'
374
+ }
375
+ end
376
+
377
+ def work_form(worktype = "GenericWork")
378
+ Module.const_get("Hyrax::#{worktype}Form") rescue nil || Module.const_get("Hyrax::Forms::WorkForm")
379
+ end
380
+
381
+ def file_form
382
+ Module.const_get("Hyrax::FileSetForm") rescue nil || Module.const_get("Hyrax::Forms::FileSetEditForm")
383
+ end
384
+
385
+ def secondary_terms form_name
386
+ form_name.terms - form_name.required_fields -
387
+ [:visibility_during_embargo, :embargo_release_date,
388
+ :visibility_after_embargo, :visibility_during_lease,
389
+ :lease_expiration_date, :visibility_after_lease, :visibility,
390
+ :thumbnail_id, :representative_id, :ordered_member_ids,
391
+ :collection_ids, :in_works_ids, :admin_set_id, :files, :source, :member_of_collection_ids]
392
+ end
393
+
394
+ def create_data data, type, object, mvs
395
+ final_data = {}
396
+ accepted_terms = type.required_fields + secondary_terms(type)
397
+ data.each do |key, att|
398
+ if (att.nil? || att.empty? || key.to_s.include?("object_type") || !accepted_terms.include?(key.to_sym))
399
+ next
400
+ elsif object.send(key).nil?
401
+ final_data[key] = att
402
+ else
403
+ final_data[key] = att.split(mvs)
404
+ end
405
+ end
406
+ final_data
407
+ end
408
+
409
+ def create_lease visibility, status_after, date
410
+ lease = Hydra::AccessControls::Lease.new(visibility_during_lease: visibility,
411
+ visibility_after_lease: status_after, lease_expiration_date: @lease_date)
412
+ lease.save
413
+ end
414
+
415
+ def create_embargo visibility
416
+ embargo = Hydra::AccessControls::Embargo.new
417
+ embargo.visibility_during_embargo = visibility
418
+ embargo.visibility_after_embargo = @status_after
419
+ embargo.embargo_release_date = @embargo_date
420
+ embargo.save
421
+ end
422
+
423
+ def log(user)
424
+ Hyrax::Operation.create!(user: user,
425
+ operation_type: "Attach Remote File")
426
+ end
427
+ end
428
+ end