cdm_migrator 3.2.1 → 3.3.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/MIT-LICENSE +20 -20
  3. data/README.md +41 -41
  4. data/Rakefile +38 -38
  5. data/app/assets/config/cdm_migrator_manifest.js +2 -2
  6. data/app/assets/javascripts/cdm_migrator/application.js +13 -13
  7. data/app/assets/stylesheets/cdm_migrator/application.css +15 -15
  8. data/app/assets/stylesheets/cdm_migrator/csv_checker.css +36 -36
  9. data/app/controllers/cdm_migrator/application_controller.rb +10 -10
  10. data/app/controllers/cdm_migrator/cdm_controller.rb +216 -213
  11. data/app/controllers/cdm_migrator/csv_controller.rb +428 -408
  12. data/app/helpers/cdm_migrator/application_helper.rb +4 -4
  13. data/app/jobs/cdm_migrator/application_job.rb +4 -4
  14. data/app/jobs/cdm_migrator/batch_create_files_job.rb +32 -20
  15. data/app/jobs/cdm_migrator/batch_create_files_with_ordered_members_job.rb +45 -0
  16. data/app/jobs/cdm_migrator/batch_create_works_job.rb +20 -14
  17. data/app/jobs/cdm_migrator/cdm_ingest_files_job.rb +35 -35
  18. data/app/jobs/cdm_migrator/create_work_job.rb +36 -25
  19. data/app/jobs/cdm_migrator/restart_upload_from_middle_job.rb +36 -0
  20. data/app/jobs/cdm_migrator/update_object_job.rb +10 -10
  21. data/app/mailers/cdm_migrator/application_mailer.rb +6 -6
  22. data/app/models/cdm_migrator/application_record.rb +5 -5
  23. data/app/models/cdm_migrator/batch_ingest.rb +33 -33
  24. data/app/models/cdm_migrator/ingest_work.rb +7 -16
  25. data/app/views/cdm_migrator/cdm/collection.html.erb +11 -11
  26. data/app/views/cdm_migrator/cdm/mappings.html.erb +54 -53
  27. data/app/views/cdm_migrator/csv/_batches_list.html.erb +4 -4
  28. data/app/views/cdm_migrator/csv/_default_group.html.erb +17 -17
  29. data/app/views/cdm_migrator/csv/_error_list.html.erb +20 -20
  30. data/app/views/cdm_migrator/csv/_list_batches.html.erb +21 -21
  31. data/app/views/cdm_migrator/csv/_tabs.html.erb +8 -8
  32. data/app/views/cdm_migrator/csv/csv_checker.html.erb +45 -45
  33. data/app/views/cdm_migrator/csv/edit.html.erb +17 -18
  34. data/app/views/cdm_migrator/csv/index.html.erb +19 -19
  35. data/app/views/cdm_migrator/csv/upload.html.erb +18 -18
  36. data/app/views/layouts/cdm_migrator/application.html.erb +14 -14
  37. data/config/routes.rb +19 -19
  38. data/db/migrate/20191211193859_create_batch_ingests.rb +21 -19
  39. data/db/migrate/20191212192315_create_ingest_works.rb +18 -18
  40. data/lib/cdm_migrator/engine.rb +29 -29
  41. data/lib/cdm_migrator/version.rb +3 -3
  42. data/lib/cdm_migrator.rb +5 -5
  43. data/lib/generators/cdm_migrator/install/install_generator.rb +79 -79
  44. data/lib/generators/cdm_migrator/install/templates/config/cdm_migrator.yml +53 -50
  45. data/lib/generators/cdm_migrator/install/templates/presenters/hyku/menu_presenter.rb +47 -47
  46. data/lib/generators/cdm_migrator/install/templates/presenters/hyrax/menu_presenter.rb +66 -66
  47. data/lib/generators/cdm_migrator/install/templates/sidebar/_tasks.html.erb +55 -55
  48. data/lib/tasks/cdm_migrator_tasks.rake +4 -4
  49. metadata +7 -5
@@ -1,408 +1,428 @@
1
- module CdmMigrator
2
- class CsvController < ApplicationController
3
- helper_method :default_page_title, :admin_host?, :available_translations, :available_works
4
- include ActionView::Helpers::UrlHelper
5
- layout 'hyrax/dashboard' if Hyrax
6
- before_action :authenticate, except: :index
7
- before_action :load_config, only: :csv_checker
8
-
9
- def csv_checker
10
- if params[:file]
11
- check_csv params[:file].path
12
- if @error_list.blank?
13
- flash[:notice] = "All data are valid."
14
- else
15
- flash[:error] = "The CSV Checker found some errors in the CSV. Please correct them and check again."
16
- end
17
- end
18
- end
19
-
20
- def index
21
- if current_page?(main_app.csv_my_batches_path(locale: nil))
22
- @batches = BatchIngest.where(user_id: current_user.id).reverse_order
23
- elsif current_page?(main_app.csv_all_batches_path(locale: nil))
24
- @batches = BatchIngest.all.reverse_order
25
- else
26
- @batches = []
27
- end
28
- end
29
-
30
- def upload
31
- @admin_sets = AdminSet.all.map { |as| [as.title.first, as.id] }
32
- @collections = Collection.all.map { |col| [col.title.first, col.id] }
33
- end
34
-
35
- def create
36
- dir = Rails.root.join('public', 'uploads', 'csvs')
37
- FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
38
- time = DateTime.now.strftime('%s')
39
- filename = params[:csv_import][:csv_file].original_filename.gsub('.csv', "#{time}.csv")
40
- csv = dir.join(filename).to_s
41
- File.open(csv, 'wb') do |file|
42
- file.write(params[:csv_import][:csv_file].read)
43
- end
44
- check_csv csv
45
- if @error_list.present?
46
- flash[:error] = "Cdm Migrator found some problems with the CSV. Use the CSV Checker for more details."
47
- end
48
- parse_csv(csv, params[:csv_import][:mvs])
49
-
50
- ingest = BatchIngest.new({
51
- data: @works,
52
- size: @works.length,
53
- csv: csv,
54
- admin_set_id: params[:admin_set],
55
- collection_id: params[:collection],
56
- user_id: current_user.id,
57
- message: @path_list.blank? ? nil : @path_list.to_s.gsub("\"", "&quot;")
58
- })
59
- if ingest.save! && @path_list.blank?
60
- BatchCreateWorksJob.perform_later(ingest, current_user)
61
- flash[:notice] = "csv successfully uploaded, check this page to see the status while the batch is running"
62
- redirect_to csv_my_batches_path
63
- else
64
- flash[:error] ||= "csv could not be parsed, please check and re-upload"
65
- redirect_to csv_upload_path
66
- end
67
- end
68
-
69
- def rerun
70
- ingest = BatchIngest.find(params[:id]).deep_dup
71
- ingest.save
72
- BatchCreateWorksJob.perform_later(ingest, current_user)
73
- flash[:notice] = "csv successfully uploaded, check this page to see the status while the batch is running"
74
- redirect_to csv_my_batches_path
75
- end
76
-
77
- def generate
78
- headers = %w(type url)
79
- skip = %w(id head tail depositor date_uploaded date_modified import_url thumbnail_id embargo_id lease_id access_control_id representative_id)
80
- GenericWork.new.attributes.each do |key, val|
81
- headers << "work_#{key}" unless skip.include? key
82
- end
83
- FileSet.new.attributes.each do |key, val|
84
- headers << "file_#{key}" unless skip.include? key
85
- end
86
- fname = "template_#{DateTime.now.to_i}"
87
- render plain: CSV.generate { |csv| csv << headers }, content_type: 'text/csv'
88
- end
89
-
90
- def edit
91
- @collections = ::Collection.all.map { |c| [c.title.first, c.id] }
92
- end
93
-
94
- def update
95
- mvs = params[:csv_update][:mvs]
96
- csv = CSV.parse(params[:csv_update][:csv_file].read, headers: true, encoding: 'utf-8').map(&:to_hash)
97
- csv.each do |row|
98
- obj = ActiveFedora::Base.find row['id']
99
- type = row.first.last
100
- if type.nil?
101
- next
102
- elsif type.include? "Work"
103
- metadata = create_data(row.except('id', 'type'), work_form(type), obj, mvs)
104
- elsif type.include? "File"
105
- metadata = create_data(row.except('id', 'type'), work_form(type), obj, mvs)
106
- end
107
- unless metadata.nil?
108
- obj.attributes = metadata
109
- obj.save
110
- end
111
- end
112
- flash[:notice] = "csv successfully uploaded"
113
- redirect_to csv_edit_path
114
- end
115
-
116
- def export
117
- solr = RSolr.connect url: Account.find_by(tenant: Apartment::Tenant.current).solr_endpoint.url
118
- response = solr.get 'select', params: {
119
- q: "member_of_collection_ids_ssim:#{params[:collection_id]}",
120
- rows: 3400,
121
- fl: "id"
122
- }
123
- unless response['response']['docs'].empty? || response['response']['docs'][0].empty?
124
- work_ids = response['response']['docs'].map { |doc| doc['id'] }
125
- end
126
- #works = ::ActiveFedora::Base.where member_of_collection_ids_ssim: params[:collection_id]
127
- @csv_headers = ['type'] + work_fields
128
- @csv_array = [@csv_headers.join(',')]
129
- work_ids.each do |work_id|
130
- doc = ::SolrDocument.find work_id
131
- add_line doc
132
- doc._source[:file_set_ids_ssim].each do |file_id|
133
- file_doc = ::SolrDocument.find file_id
134
- add_line file_doc
135
- end
136
- end
137
-
138
- send_data @csv_array.join("\n"),
139
- :type => 'text/csv; charset=iso-8859-5; header=present',
140
- :disposition => "attachment; filename=export.csv"
141
- end
142
-
143
- private
144
-
145
- def authenticate
146
- authorize! :create, available_works.first
147
- end
148
-
149
- def add_line doc
150
- line_hash = {}
151
- line_hash['type'] = doc._source[:has_model_ssim].first
152
- work_fields.each do |field|
153
- line_hash[field] = create_cell doc, field
154
- end
155
- @csv_array << line_hash.values_at(*@csv_headers).map { |cell| cell = '' if cell.nil?; "\"#{cell.gsub("\"", "\"\"")}\"" }.join(',')
156
-
157
- end
158
-
159
- def work_fields
160
- @fields ||= available_works.map { |work| work.new.attributes.keys }.flatten.uniq - excluded_fields
161
- end
162
-
163
- def excluded_fields
164
- %w[date_uploaded date_modified head tail state proxy_depositor on_behalf_of arkivo_checksum label
165
- relative_path import_url part_of resource_type access_control_id
166
- representative_id thumbnail_id rendering_ids admin_set_id embargo_id
167
- lease_id]
168
- end
169
-
170
- def create_cell w, field
171
- if field.include? 'date'
172
- if w._source[field+'_tesim'].is_a?(Array)
173
- w._source[field+'_tesim'].join('|')
174
- else
175
- w._source[field+'_tesim']
176
- end
177
- elsif w.respond_to?(field.to_sym)
178
- if w.send(field).is_a?(Array)
179
- w.send(field).join('|')
180
- else
181
- w.send(field)
182
- end
183
- end
184
- end
185
-
186
- def available_works
187
- @available_works ||= Hyrax::QuickClassificationQuery.new(current_user).authorized_models
188
- end
189
-
190
- def parse_csv csv, mvs
191
- csv = CSV.parse(File.read(csv), headers: true, encoding: 'utf-8').map(&:to_hash)
192
- @works = []
193
- csv.each do |row|
194
- type = row.first.last
195
- if type.nil?
196
- next
197
- elsif type.include? "Work"
198
- metadata = create_data(row, work_form(type), Object.const_get(type).new, mvs)
199
- @works << {type: type, metadata: metadata, files: []}
200
- elsif type.include? "File"
201
- metadata = create_data(row, file_form, FileSet.new, mvs)
202
- @works.last[:files] << {url: row.delete('url'), title: row.delete('title'), metadata: metadata}
203
- end
204
- end
205
- end
206
-
207
- def load_config
208
- tenant = Account.find_by(tenant: Apartment::Tenant.current).cname
209
- if CdmMigrator::Engine.config['tenant_settings'].has_key?(tenant)
210
- settings = CdmMigrator::Engine.config['tenant_settings'][tenant]['csv_checker']
211
- if settings.present?
212
- # .map will throw an error if settings[key] has no value
213
- @edtf_fields = settings['edtf_fields'].map(&:to_sym) if settings['edtf_fields']
214
- @uri_fields = settings['valid_uri_fields'].map(&:to_sym) if settings['valid_uri_fields']
215
- @separator = settings['multi_value_separator']
216
- @separator_fields = settings['separator_fields'].map(&:to_sym) if settings['separator_fields']
217
- @path_to_drive = settings['path_to_drive']
218
- else
219
- raise "Cdm Migrator couldn't find any configured settings. Are they in cdm_migrator.yml?"
220
- end
221
- else
222
- raise "Cdm Migrator couldn't find this tenant. Is it configured?"
223
- end
224
- end
225
-
226
- def check_csv csv_file
227
- row_number = 1
228
- @error_list = {}
229
- check_mounted_drive if @path_to_drive.present?
230
-
231
- CSV.foreach(csv_file, headers: true, header_converters: :symbol) do |row|
232
- row_number +=1 # Tells user what CSV row the error is on
233
- if row[:object_type].include? "Work"
234
- check_edtf(row_number, row) if @edtf_fields.present?
235
- check_uris(row_number, row) if @uri_fields.present?
236
- if params[:multi_value_separator].present? and @separator_fields.present?
237
- check_separator(row_number, row, params[:multi_value_separator])
238
- else
239
- alert_message = "No multi-value separator character was selected or no fields were configured. CSV Checker didn't check for valid separators."
240
- if flash[:alert] and flash[:alert].exclude?(alert_message) # Only add this message once, rather than per line
241
- flash[:alert] << alert_message
242
- elsif flash[:alert].blank?
243
- flash[:alert] = Array.wrap(alert_message)
244
- end
245
- end
246
- elsif row[:object_type] == "File"
247
- check_file_path(row_number, row[:url])
248
- else
249
- @error_list[row_number] = { "object_type" => "No or unknown object type. Please give a valid type (e.g. GenericWork, File)." }
250
- end
251
- @error_list.delete_if { |key, value| value.blank? } # Data are valid, no need to print the row
252
- end
253
- end
254
-
255
- def check_mounted_drive
256
- drive_address = @path_to_drive
257
- unless Dir.exist?(drive_address) and !Dir[drive_address].empty?
258
- flash[:alert] = "CSV Checker can't find the mounted drive to check file paths, so some paths may be mislabelled as incorrect. Please contact the administrator or try again later."
259
- end
260
- end
261
-
262
- def check_file_path(row_number, file_path)
263
- if file_path.nil?
264
- @error_list[row_number] = { "url" => "url is blank." }
265
- elsif File.file?(file_path.gsub("file://", "")) == false
266
- @error_list[row_number] = { "url" => "No file found at #{file_path}" }
267
- end
268
- end
269
-
270
- def check_edtf(row_number, row)
271
- edtf_fields = @edtf_fields
272
- edtf_errors = edtf_fields.each_with_object({}) do |field, hash|
273
- temp_date = row[field]
274
- # modify date so that the interval encompasses the years on the last interval date
275
- temp_date = temp_date.gsub('/..','').gsub('%','?~').gsub(/\/$/,'')
276
- date = temp_date.include?("/") ? temp_date.gsub(/([0-9]+X+\/)([0-9]+)(X+)/){"#{$1}"+"#{$2.to_i+1}"+"#{$3}"}.gsub("X","u") : temp_date
277
- date = date.gsub("XX-","uu-").gsub("X-", "u-").gsub('XX?','uu').gsub('X?', 'u').gsub('u?','u').gsub('?','')
278
- # edtf has trouble with year-month (e.g. "19uu-12") or year-season strings (e.g. "190u-23")
279
- # that contain unspecified years, or intervals containing the above ("19uu-22/19uu-23", etc.).
280
- # So we check for/create exceptions.
281
- # Check for season interval
282
- if Date.edtf(date) == nil and date != "unknown" # Accept season intervals
283
- unless is_season?(date.split("/").first) and is_season?(date.split("/").second)
284
- # If an interval then, check each date individually
285
- if date.include?("/")
286
- dates = date.split("/")
287
- else
288
- dates = [date]
289
- end
290
- #byebug
291
- dates.each do |d|
292
- # Dates with 'u' in the last digit of the year return invalid when in format YYYY-MM
293
- # So we flub day specifity before checking again if the date is valid
294
- unless Date.edtf(d + '-01') # Date.edtf('193u-03-01') returns valid
295
- if match = d[/\d{3}u/] or match = d[/\d{2}u{2}-[2][1-4]/] # edtf can't parse single u in year (e.g. 192u) or uu in YYYY-SS (e.g. 19uu-21), so we replace it
296
- d.gsub!(match, match.gsub("u","0"))
297
- unless Date.edtf(d)
298
- hash[field.to_s] = "Blank or not a valid EDTF date."
299
- end
300
- else
301
- hash[field.to_s] = "Blank or not a valid EDTF date."
302
- end
303
- end
304
- end
305
- end
306
- end
307
-
308
- end
309
- @error_list[row_number] = edtf_errors
310
- end
311
-
312
- def is_season?(date)
313
- Date.edtf(date).class == EDTF::Season
314
- end
315
-
316
- # <Example: should be http://rightsstatements.org/vocab/etc. NOT https://rightsstatements.org/page/etc.
317
- def check_uris(row_number, row)
318
- uri_fields = @uri_fields
319
- uri_errors = uri_fields.each_with_object({}) do |field, hash|
320
- if row[field].include? "page"
321
- hash[field.to_s] = "Links to page instead of URI. (e.g. https://rightsstatements.org/page/etc. instead of http://rightsstatements.org/vocab/etc.)"
322
- end
323
- end
324
- @error_list[row_number].merge!(uri_errors)
325
- end
326
-
327
- # Check multi-value separators
328
- def check_separator(row_number, row, character)
329
- uri_fields = @separator_fields
330
- separator_errors = uri_fields.each_with_object({}) do |field, hash|
331
- value = row[field]
332
- if value.present?
333
- URI.extract(value).each { |uri| value.gsub!(uri, '') }
334
- unless value.split("").all? { |sep| sep == character } # Check if remaining characters are the correct separator
335
- hash[field.to_s] = "May contain the wrong multi-value separator (i.e. not #{character})."
336
- end
337
- end
338
- end
339
- @error_list[row_number].merge!(separator_errors)
340
- end
341
-
342
- def default_page_title
343
- 'CSV Batch Uploader'
344
- end
345
-
346
- def admin_host?
347
- false unless Settings.multitenancy.enabled
348
- end
349
-
350
- def available_translations
351
- {
352
- 'en' => 'English',
353
- 'fr' => 'French'
354
- }
355
- end
356
-
357
- def work_form(worktype = "GenericWork")
358
- Module.const_get("Hyrax::#{worktype}Form") rescue nil || Module.const_get("Hyrax::Forms::WorkForm")
359
- end
360
-
361
- def file_form
362
- Module.const_get("Hyrax::FileSetForm") rescue nil || Module.const_get("Hyrax::Forms::FileSetEditForm")
363
- end
364
-
365
- def secondary_terms form_name
366
- form_name.terms - form_name.required_fields -
367
- [:visibility_during_embargo, :embargo_release_date,
368
- :visibility_after_embargo, :visibility_during_lease,
369
- :lease_expiration_date, :visibility_after_lease, :visibility,
370
- :thumbnail_id, :representative_id, :ordered_member_ids,
371
- :collection_ids, :in_works_ids, :admin_set_id, :files, :source, :member_of_collection_ids]
372
- end
373
-
374
- def create_data data, type, object, mvs
375
- final_data = {}
376
- accepted_terms = type.required_fields + secondary_terms(type)
377
- data.each do |key, att|
378
- if (att.nil? || att.empty? || key.to_s.include?("object_type") || !accepted_terms.include?(key.to_sym))
379
- next
380
- elsif (object.send(key).nil?)
381
- final_data[key] = att
382
- else
383
- final_data[key] = att.split(mvs)
384
- end
385
- end
386
- final_data
387
- end
388
-
389
- def create_lease visibility, status_after, date
390
- lease = Hydra::AccessControls::Lease.new(visibility_during_lease: visibility,
391
- visibility_after_lease: status_after, lease_expiration_date: @lease_date)
392
- lease.save
393
- end
394
-
395
- def create_embargo visibility
396
- embargo = Hydra::AccessControls::Embargo.new
397
- embargo.visibility_during_embargo = visibility
398
- embargo.visibility_after_embargo = @status_after
399
- embargo.embargo_release_date = @embargo_date
400
- embargo.save
401
- end
402
-
403
- def log(user)
404
- Hyrax::Operation.create!(user: user,
405
- operation_type: "Attach Remote File")
406
- end
407
- end
408
- end
1
+ module CdmMigrator
2
+ class CsvController < ApplicationController
3
+ helper_method :default_page_title, :admin_host?, :available_translations, :available_works
4
+ include ActionView::Helpers::UrlHelper
5
+ layout 'hyrax/dashboard' if Hyrax
6
+ before_action :authenticate, except: :index
7
+ before_action :load_config, only: :csv_checker
8
+
9
+ def csv_checker
10
+ if params[:file]
11
+ check_csv params[:file].path
12
+ if @error_list.blank?
13
+ flash[:notice] = "All data are valid."
14
+ else
15
+ flash[:error] = "The CSV Checker found some errors in the CSV. Please correct them and check again."
16
+ end
17
+ end
18
+ end
19
+
20
+ def index
21
+ if current_page?(main_app.csv_my_batches_path(locale: nil))
22
+ @batches = BatchIngest.where(user_id: current_user.id).reverse_order
23
+ elsif current_page?(main_app.csv_all_batches_path(locale: nil))
24
+ @batches = BatchIngest.all.reverse_order
25
+ else
26
+ @batches = []
27
+ end
28
+ end
29
+
30
+ def upload
31
+ @admin_sets = AdminSet.all.map { |as| [as.title.first, as.id] }
32
+ @collections = Collection.all.map { |col| [col.title.first, col.id] }
33
+ end
34
+
35
+ def create
36
+ dir = Rails.root.join('public', 'uploads', 'csvs')
37
+ FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
38
+ time = DateTime.now.strftime('%s')
39
+ filename = params[:csv_import][:csv_file].original_filename.gsub('.csv', "#{time}.csv")
40
+ csv = dir.join(filename).to_s
41
+ File.open(csv, 'wb') do |file|
42
+ file.write(params[:csv_import][:csv_file].read)
43
+ end
44
+ check_csv csv
45
+ if @error_list.present?
46
+ flash[:error] = "Cdm Migrator found some problems with the CSV. Use the CSV Checker for more details."
47
+ end
48
+ parse_csv(csv, params[:csv_import][:mvs])
49
+
50
+ ingest = BatchIngest.new({
51
+ data: @works,
52
+ size: @works.length,
53
+ csv: csv,
54
+ admin_set_id: params[:admin_set],
55
+ collection_id: params[:collection],
56
+ user_id: current_user.id,
57
+ message: @path_list.blank? ? nil : @path_list.to_s.gsub("\"", "&quot;")
58
+ })
59
+ if ingest.save! && @path_list.blank?
60
+ BatchCreateWorksJob.perform_later(ingest, current_user)
61
+ flash[:notice] = "csv successfully uploaded, check this page to see the status while the batch is running"
62
+ redirect_to csv_my_batches_path
63
+ else
64
+ flash[:error] ||= "csv could not be parsed, please check and re-upload"
65
+ redirect_to csv_upload_path
66
+ end
67
+ end
68
+
69
+ def rerun
70
+ ingest = BatchIngest.find(params[:id]).deep_dup
71
+ ingest.save
72
+ BatchCreateWorksJob.perform_later(ingest, current_user)
73
+ flash[:notice] = "csv successfully uploaded, check this page to see the status while the batch is running"
74
+ redirect_to csv_my_batches_path
75
+ end
76
+
77
+ def generate
78
+ headers = %w(type url)
79
+ skip = %w(id head tail depositor date_uploaded date_modified import_url thumbnail_id embargo_id lease_id access_control_id representative_id)
80
+ GenericWork.new.attributes.each do |key, val|
81
+ headers << "work_#{key}" unless skip.include? key
82
+ end
83
+ FileSet.new.attributes.each do |key, val|
84
+ headers << "file_#{key}" unless skip.include? key
85
+ end
86
+ fname = "template_#{DateTime.now.to_i}"
87
+ render plain: CSV.generate { |csv| csv << headers }, content_type: 'text/csv'
88
+ end
89
+
90
+ def edit
91
+ @collections = ::Collection.all.map { |c| [c.title.first, c.id] }
92
+ end
93
+
94
+ def update
95
+ mvs = params[:csv_update][:mvs]
96
+ csv = CSV.parse(params[:csv_update][:csv_file].read.force_encoding("UTF-8"), headers: true, encoding: 'utf-8').map(&:to_hash)
97
+ csv.each do |row|
98
+ obj = ActiveFedora::Base.find row['id']
99
+ type = row.first.last
100
+ if type.nil?
101
+ next
102
+ elsif type.include? "Work"
103
+ metadata = create_data(row.except('id', 'type'), work_form(type), obj, mvs)
104
+ elsif type.include? "File"
105
+ metadata = create_data(row.except('id', 'type'), work_form(type), obj, mvs)
106
+ end
107
+ unless metadata.nil?
108
+ obj.attributes = metadata
109
+ obj.save
110
+ end
111
+ end
112
+ flash[:notice] = "csv successfully uploaded"
113
+ redirect_to csv_edit_path
114
+ end
115
+
116
+ def export
117
+ solr = RSolr.connect url: Account.find_by(tenant: Apartment::Tenant.current).solr_endpoint.url
118
+ response = solr.get 'select', params: {
119
+ q: "member_of_collection_ids_ssim:#{params[:collection_id]}",
120
+ rows: 3400,
121
+ fl: "id"
122
+ }
123
+ unless response['response']['docs'].empty? || response['response']['docs'][0].empty?
124
+ work_ids = response['response']['docs'].map { |doc| doc['id'] }
125
+ end
126
+ #works = ::ActiveFedora::Base.where member_of_collection_ids_ssim: params[:collection_id]
127
+ @csv_headers = ['type'] + work_fields
128
+ @csv_array = [@csv_headers.join(',')]
129
+ work_ids.each do |work_id|
130
+ doc = ::SolrDocument.find work_id
131
+ add_line doc
132
+ doc._source[:file_set_ids_ssim].each do |file_id|
133
+ file_doc = ::SolrDocument.find file_id
134
+ add_line file_doc
135
+ end
136
+ end
137
+
138
+ send_data @csv_array.join("\n"),
139
+ :type => 'text/csv; charset=iso-8859-5; header=present',
140
+ :disposition => "attachment; filename=export.csv"
141
+ end
142
+
143
+ private
144
+
145
+ def authenticate
146
+ authorize! :create, available_works.first
147
+ end
148
+
149
+ def add_line doc
150
+ line_hash = {}
151
+ line_hash['type'] = doc._source[:has_model_ssim].first
152
+ work_fields.each do |field|
153
+ line_hash[field] = create_cell doc, field
154
+ end
155
+ @csv_array << line_hash.values_at(*@csv_headers).map { |cell| cell = '' if cell.nil?; "\"#{cell.gsub("\"", "\"\"")}\"" }.join(',')
156
+
157
+ end
158
+
159
+ def work_fields
160
+ @fields ||= available_works.map { |work| work.new.attributes.keys }.flatten.uniq - excluded_fields
161
+ end
162
+
163
+ def excluded_fields
164
+ %w[date_uploaded date_modified head tail state proxy_depositor on_behalf_of arkivo_checksum label
165
+ relative_path import_url part_of resource_type access_control_id
166
+ representative_id thumbnail_id rendering_ids admin_set_id embargo_id
167
+ lease_id]
168
+ end
169
+
170
+ def create_cell w, field
171
+ if field.include? 'date' or field == 'chronological_coverage'
172
+ if w._source[field+'_tesim'].is_a?(Array)
173
+ w._source[field+'_tesim'].join('|')
174
+ else
175
+ w._source[field+'_tesim']
176
+ end
177
+ elsif w.respond_to?(field.to_sym)
178
+ if w.send(field).is_a?(Array)
179
+ w.send(field).join('|')
180
+ else
181
+ w.send(field)
182
+ end
183
+ end
184
+ end
185
+
186
+ def available_works
187
+ @available_works ||= Hyrax::QuickClassificationQuery.new(current_user).authorized_models
188
+ end
189
+
190
+ def parse_csv csv, mvs
191
+ csv = CSV.parse(File.read(csv), headers: true, encoding: 'utf-8').map(&:to_hash)
192
+ @works = []
193
+ csv.each do |row|
194
+ type = row.first.last
195
+ if type.nil?
196
+ next
197
+ elsif type.include? "Work"
198
+ metadata = create_data(row, work_form(type), Object.const_get(type).new, mvs)
199
+ @works << {type: type, metadata: metadata, files: []}
200
+ elsif type.include? "File"
201
+ metadata = create_data(row, file_form, FileSet.new, mvs)
202
+ @works.last[:files] << {url: row.delete('url'), title: row.delete('title'), metadata: metadata}
203
+ end
204
+ end
205
+ end
206
+
207
+ def load_config
208
+ if Settings.multitenancy.enabled
209
+ tenant = Account.find_by(tenant: Apartment::Tenant.current).cname
210
+ else
211
+ tenant = "default"
212
+ end
213
+ if CdmMigrator::Engine.config['tenant_settings'].has_key?(tenant)
214
+ settings = CdmMigrator::Engine.config['tenant_settings'][tenant]['csv_checker']
215
+ if settings.present?
216
+ @date_indexing_service = settings['date_indexing_service'].first.constantize if settings['date_indexing_service']
217
+ @date_fields = settings['date_fields'].map(&:to_sym) if settings['date_fields']
218
+ @uri_fields = settings['valid_uri_fields'].map(&:to_sym) if settings['valid_uri_fields']
219
+ @separator = settings['multi_value_separator']
220
+ @separator_fields = settings['separator_fields'].map(&:to_sym) if settings['separator_fields']
221
+ @path_to_drive = settings['path_to_drive']
222
+ # If you would like to change this to match the uploader's max file size,
223
+ # change this to Hyrax.config.uploader[:maxFileSize]
224
+ @max_file_size = settings['max_file_size']
225
+ else
226
+ raise "Cdm Migrator couldn't find any configured settings. Are they in cdm_migrator.yml?"
227
+ end
228
+ else
229
+ raise "Cdm Migrator couldn't find this tenant. Is it configured?"
230
+ end
231
+ end
232
+
233
+ def check_csv csv_file
234
+ row_number = 1
235
+ @error_list = {}
236
+ check_mounted_drive if @path_to_drive.present?
237
+
238
+ CSV.foreach(csv_file, headers: true, header_converters: :symbol) do |row|
239
+ row_number +=1 # Tells user what CSV row the error is on
240
+ if row[:object_type].include? "Work"
241
+ check_dates(row_number, row) if @date_fields.present?
242
+ check_uris(row_number, row) if @uri_fields.present?
243
+ if params[:multi_value_separator].present? and @separator_fields.present?
244
+ check_multi_val_fields(row_number, row, params[:multi_value_separator])
245
+ else
246
+ alert_message = "No multi-value separator character was selected or no fields were configured. CSV Checker didn't check for valid separators."
247
+ if flash[:alert] and flash[:alert].exclude?(alert_message) # Only add this message once, rather than per line
248
+ flash[:alert] << alert_message
249
+ elsif flash[:alert].blank?
250
+ flash[:alert] = Array.wrap(alert_message)
251
+ end
252
+ end
253
+ elsif row[:object_type] == "File"
254
+ check_file_path(row_number, row[:url])
255
+ check_transcript_length(row_number, row[:transcript]) if row[:transcript].present?
256
+ check_file_size(row_number, row[:url])
257
+ else
258
+ @error_list[row_number] = { "object_type" => "No or unknown object type. Please give a valid type (e.g. GenericWork, File)." }
259
+ end
260
+ @error_list.delete_if { |key, value| value.blank? } # Data are valid, no need to print the row
261
+ end
262
+ end
263
+
264
+ def check_transcript_length(row_number, transcript)
265
+ if transcript.is_a? String
266
+ if transcript.length > 9000
267
+ @error_list[row_number] = { "transcript" => "Transcript is too long (over 9000 characters)." }
268
+ end
269
+ elsif transcript.is_a? Array
270
+ if transcript.any? { |tr| tr.length > 9000 }
271
+ @error_list[row_number] = { "transcript" => "Transcript is too long (over 9000 characters)." }
272
+ end
273
+ end
274
+ end
275
+
276
+ def check_file_size(row_number, file_path)
277
+ if file_path.present? && File.file?(file_path) && @max_file_size
278
+ if File.size(file_path.gsub("file://", "")) > @max_file_size
279
+ @error_list[row_number] = { "file size" => "The file at #{file_path} is too large to be uploaded. Please compress the file or split it into parts.
280
+ Each part should be under #{helpers.number_to_human_size(@max_file_size)}." }
281
+ end
282
+ end
283
+ end
284
+
285
+ def check_mounted_drive
286
+ drive_address = @path_to_drive
287
+ unless Dir.exist?(drive_address) and !Dir[drive_address].empty?
288
+ flash[:alert] = "CSV Checker can't find the mounted drive to check file paths, so some paths may be mislabelled as incorrect. Please contact the administrator or try again later."
289
+ end
290
+ end
291
+
292
+ def check_file_path(row_number, file_path)
293
+ if file_path.nil?
294
+ @error_list[row_number] = { "url" => "url is blank." }
295
+ elsif File.file?(file_path.gsub("file://", "")) == false
296
+ @error_list[row_number] = { "url" => "No file found at #{file_path}" }
297
+ end
298
+ end
299
+
300
+ def check_dates(row_number, row)
301
+ date_fields = @date_fields
302
+ unless @date_indexing_service
303
+ flash[:alert] = "No date indexing service was configured so CSV Checker didn't validate dates."
304
+ return
305
+ end
306
+ edtf_errors = date_fields.each_with_object({}) do |field, hash|
307
+ next unless row[field]
308
+ begin
309
+ @date_indexing_service.new(row[field])
310
+ rescue *@date_indexing_service.error_classes => error
311
+ hash[field.to_s] = "#{error.message}"
312
+ end
313
+ end
314
+ @error_list[row_number] = edtf_errors
315
+ end
316
+
317
+ # <Example: should be http://rightsstatements.org/vocab/etc. NOT https://rightsstatements.org/page/etc.
318
+ def check_uris(row_number, row)
319
+ uri_fields = @uri_fields
320
+ uri_errors = uri_fields.each_with_object({}) do |field, hash|
321
+ if row[field] and row[field].include? "page"
322
+ hash[field.to_s] = "Links to page instead of URI. (e.g. https://rightsstatements.org/page/etc. instead of http://rightsstatements.org/vocab/etc.)"
323
+ end
324
+ end
325
+ if @error_list.any?
326
+ @error_list[row_number].merge!(uri_errors)
327
+ else
328
+ @error_list[row_number] = uri_errors
329
+ end
330
+ end
331
+
332
+ # Check multi-value separators
333
+ def check_multi_val_fields(row_number, row, character)
334
+ uri_fields = @separator_fields
335
+ separator_errors = uri_fields.each_with_object({}) do |field, hash|
336
+ if value = row[field]
337
+ # Check for leading or trailing spaces
338
+ if value.match %r{ #{Regexp.escape(character)}|#{Regexp.escape(character)} }
339
+ hash[field.to_s] = "Contains leading or trailing whitespace around multi-value separator."
340
+ end
341
+ values = value.split(character).map(&:strip)
342
+ values.each do |val|
343
+ if val.match(URI.regexp) # Val should be URI
344
+ remainder = val.gsub(val.match(URI.regexp)[0],'')
345
+ unless remainder.blank?
346
+ hash[field.to_s] = "May contain the wrong multi-value separator or a typo in the URI."
347
+ end
348
+ else # Or val should be string
349
+ invalid_chars = ["\\"]
350
+ # Make exceptions for backslashes that are part of whitespace characters
351
+ # by deleting them before checking for stray \s
352
+ if val.delete("\t\r\n\s\n").match Regexp.union(invalid_chars)
353
+ hash[field.to_s] = "May contain an invalid character such as #{invalid_chars.to_sentence(last_word_connector: ", or ")}."
354
+ end
355
+ end
356
+ end
357
+ end
358
+ end
359
+ @error_list[row_number].merge!(separator_errors)
360
+ end
361
+
362
+ def default_page_title
363
+ 'CSV Batch Uploader'
364
+ end
365
+
366
+ def admin_host?
367
+ false unless Settings.multitenancy.enabled
368
+ end
369
+
370
+ def available_translations
371
+ {
372
+ 'en' => 'English',
373
+ 'fr' => 'French'
374
+ }
375
+ end
376
+
377
+ def work_form(worktype = "GenericWork")
378
+ Module.const_get("Hyrax::#{worktype}Form") rescue nil || Module.const_get("Hyrax::Forms::WorkForm")
379
+ end
380
+
381
+ def file_form
382
+ Module.const_get("Hyrax::FileSetForm") rescue nil || Module.const_get("Hyrax::Forms::FileSetEditForm")
383
+ end
384
+
385
+ def secondary_terms form_name
386
+ form_name.terms - form_name.required_fields -
387
+ [:visibility_during_embargo, :embargo_release_date,
388
+ :visibility_after_embargo, :visibility_during_lease,
389
+ :lease_expiration_date, :visibility_after_lease, :visibility,
390
+ :thumbnail_id, :representative_id, :ordered_member_ids,
391
+ :collection_ids, :in_works_ids, :admin_set_id, :files, :source, :member_of_collection_ids]
392
+ end
393
+
394
+ def create_data data, type, object, mvs
395
+ final_data = {}
396
+ accepted_terms = type.required_fields + secondary_terms(type)
397
+ data.each do |key, att|
398
+ if (att.nil? || att.empty? || key.to_s.include?("object_type") || !accepted_terms.include?(key.to_sym))
399
+ next
400
+ elsif object.send(key).nil?
401
+ final_data[key] = att
402
+ else
403
+ final_data[key] = att.split(mvs)
404
+ end
405
+ end
406
+ final_data
407
+ end
408
+
409
+ def create_lease visibility, status_after, date
410
+ lease = Hydra::AccessControls::Lease.new(visibility_during_lease: visibility,
411
+ visibility_after_lease: status_after, lease_expiration_date: @lease_date)
412
+ lease.save
413
+ end
414
+
415
+ def create_embargo visibility
416
+ embargo = Hydra::AccessControls::Embargo.new
417
+ embargo.visibility_during_embargo = visibility
418
+ embargo.visibility_after_embargo = @status_after
419
+ embargo.embargo_release_date = @embargo_date
420
+ embargo.save
421
+ end
422
+
423
+ def log(user)
424
+ Hyrax::Operation.create!(user: user,
425
+ operation_type: "Attach Remote File")
426
+ end
427
+ end
428
+ end