cdm_migrator 3.2.1 → 3.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/MIT-LICENSE +20 -20
- data/README.md +41 -41
- data/Rakefile +38 -38
- data/app/assets/config/cdm_migrator_manifest.js +2 -2
- data/app/assets/javascripts/cdm_migrator/application.js +13 -13
- data/app/assets/stylesheets/cdm_migrator/application.css +15 -15
- data/app/assets/stylesheets/cdm_migrator/csv_checker.css +36 -36
- data/app/controllers/cdm_migrator/application_controller.rb +10 -10
- data/app/controllers/cdm_migrator/cdm_controller.rb +216 -213
- data/app/controllers/cdm_migrator/csv_controller.rb +428 -408
- data/app/helpers/cdm_migrator/application_helper.rb +4 -4
- data/app/jobs/cdm_migrator/application_job.rb +4 -4
- data/app/jobs/cdm_migrator/batch_create_files_job.rb +32 -20
- data/app/jobs/cdm_migrator/batch_create_files_with_ordered_members_job.rb +45 -0
- data/app/jobs/cdm_migrator/batch_create_works_job.rb +20 -14
- data/app/jobs/cdm_migrator/cdm_ingest_files_job.rb +35 -35
- data/app/jobs/cdm_migrator/create_work_job.rb +36 -25
- data/app/jobs/cdm_migrator/restart_upload_from_middle_job.rb +36 -0
- data/app/jobs/cdm_migrator/update_object_job.rb +10 -10
- data/app/mailers/cdm_migrator/application_mailer.rb +6 -6
- data/app/models/cdm_migrator/application_record.rb +5 -5
- data/app/models/cdm_migrator/batch_ingest.rb +33 -33
- data/app/models/cdm_migrator/ingest_work.rb +7 -16
- data/app/views/cdm_migrator/cdm/collection.html.erb +11 -11
- data/app/views/cdm_migrator/cdm/mappings.html.erb +54 -53
- data/app/views/cdm_migrator/csv/_batches_list.html.erb +4 -4
- data/app/views/cdm_migrator/csv/_default_group.html.erb +17 -17
- data/app/views/cdm_migrator/csv/_error_list.html.erb +20 -20
- data/app/views/cdm_migrator/csv/_list_batches.html.erb +21 -21
- data/app/views/cdm_migrator/csv/_tabs.html.erb +8 -8
- data/app/views/cdm_migrator/csv/csv_checker.html.erb +45 -45
- data/app/views/cdm_migrator/csv/edit.html.erb +17 -18
- data/app/views/cdm_migrator/csv/index.html.erb +19 -19
- data/app/views/cdm_migrator/csv/upload.html.erb +18 -18
- data/app/views/layouts/cdm_migrator/application.html.erb +14 -14
- data/config/routes.rb +19 -19
- data/db/migrate/20191211193859_create_batch_ingests.rb +19 -19
- data/db/migrate/20191212192315_create_ingest_works.rb +18 -18
- data/lib/cdm_migrator/engine.rb +29 -29
- data/lib/cdm_migrator/version.rb +3 -3
- data/lib/cdm_migrator.rb +5 -5
- data/lib/generators/cdm_migrator/install/install_generator.rb +79 -79
- data/lib/generators/cdm_migrator/install/templates/config/cdm_migrator.yml +53 -50
- data/lib/generators/cdm_migrator/install/templates/presenters/hyku/menu_presenter.rb +47 -47
- data/lib/generators/cdm_migrator/install/templates/presenters/hyrax/menu_presenter.rb +66 -66
- data/lib/generators/cdm_migrator/install/templates/sidebar/_tasks.html.erb +55 -55
- data/lib/tasks/cdm_migrator_tasks.rake +4 -4
- metadata +7 -5
@@ -1,408 +1,428 @@
|
|
1
|
-
module CdmMigrator
|
2
|
-
class CsvController < ApplicationController
|
3
|
-
helper_method :default_page_title, :admin_host?, :available_translations, :available_works
|
4
|
-
include ActionView::Helpers::UrlHelper
|
5
|
-
layout 'hyrax/dashboard' if Hyrax
|
6
|
-
before_action :authenticate, except: :index
|
7
|
-
before_action :load_config, only: :csv_checker
|
8
|
-
|
9
|
-
def csv_checker
|
10
|
-
if params[:file]
|
11
|
-
check_csv params[:file].path
|
12
|
-
if @error_list.blank?
|
13
|
-
flash[:notice] = "All data are valid."
|
14
|
-
else
|
15
|
-
flash[:error] = "The CSV Checker found some errors in the CSV. Please correct them and check again."
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
def index
|
21
|
-
if current_page?(main_app.csv_my_batches_path(locale: nil))
|
22
|
-
@batches = BatchIngest.where(user_id: current_user.id).reverse_order
|
23
|
-
elsif current_page?(main_app.csv_all_batches_path(locale: nil))
|
24
|
-
@batches = BatchIngest.all.reverse_order
|
25
|
-
else
|
26
|
-
@batches = []
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
def upload
|
31
|
-
@admin_sets = AdminSet.all.map { |as| [as.title.first, as.id] }
|
32
|
-
@collections = Collection.all.map { |col| [col.title.first, col.id] }
|
33
|
-
end
|
34
|
-
|
35
|
-
def create
|
36
|
-
dir = Rails.root.join('public', 'uploads', 'csvs')
|
37
|
-
FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
|
38
|
-
time = DateTime.now.strftime('%s')
|
39
|
-
filename = params[:csv_import][:csv_file].original_filename.gsub('.csv', "#{time}.csv")
|
40
|
-
csv = dir.join(filename).to_s
|
41
|
-
File.open(csv, 'wb') do |file|
|
42
|
-
file.write(params[:csv_import][:csv_file].read)
|
43
|
-
end
|
44
|
-
check_csv csv
|
45
|
-
if @error_list.present?
|
46
|
-
flash[:error] = "Cdm Migrator found some problems with the CSV. Use the CSV Checker for more details."
|
47
|
-
end
|
48
|
-
parse_csv(csv, params[:csv_import][:mvs])
|
49
|
-
|
50
|
-
ingest = BatchIngest.new({
|
51
|
-
data: @works,
|
52
|
-
size: @works.length,
|
53
|
-
csv: csv,
|
54
|
-
admin_set_id: params[:admin_set],
|
55
|
-
collection_id: params[:collection],
|
56
|
-
user_id: current_user.id,
|
57
|
-
message: @path_list.blank? ? nil : @path_list.to_s.gsub("\"", """)
|
58
|
-
})
|
59
|
-
if ingest.save! && @path_list.blank?
|
60
|
-
BatchCreateWorksJob.perform_later(ingest, current_user)
|
61
|
-
flash[:notice] = "csv successfully uploaded, check this page to see the status while the batch is running"
|
62
|
-
redirect_to csv_my_batches_path
|
63
|
-
else
|
64
|
-
flash[:error] ||= "csv could not be parsed, please check and re-upload"
|
65
|
-
redirect_to csv_upload_path
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
|
-
def rerun
|
70
|
-
ingest = BatchIngest.find(params[:id]).deep_dup
|
71
|
-
ingest.save
|
72
|
-
BatchCreateWorksJob.perform_later(ingest, current_user)
|
73
|
-
flash[:notice] = "csv successfully uploaded, check this page to see the status while the batch is running"
|
74
|
-
redirect_to csv_my_batches_path
|
75
|
-
end
|
76
|
-
|
77
|
-
def generate
|
78
|
-
headers = %w(type url)
|
79
|
-
skip = %w(id head tail depositor date_uploaded date_modified import_url thumbnail_id embargo_id lease_id access_control_id representative_id)
|
80
|
-
GenericWork.new.attributes.each do |key, val|
|
81
|
-
headers << "work_#{key}" unless skip.include? key
|
82
|
-
end
|
83
|
-
FileSet.new.attributes.each do |key, val|
|
84
|
-
headers << "file_#{key}" unless skip.include? key
|
85
|
-
end
|
86
|
-
fname = "template_#{DateTime.now.to_i}"
|
87
|
-
render plain: CSV.generate { |csv| csv << headers }, content_type: 'text/csv'
|
88
|
-
end
|
89
|
-
|
90
|
-
def edit
|
91
|
-
@collections = ::Collection.all.map { |c| [c.title.first, c.id] }
|
92
|
-
end
|
93
|
-
|
94
|
-
def update
|
95
|
-
mvs = params[:csv_update][:mvs]
|
96
|
-
csv = CSV.parse(params[:csv_update][:csv_file].read, headers: true, encoding: 'utf-8').map(&:to_hash)
|
97
|
-
csv.each do |row|
|
98
|
-
obj = ActiveFedora::Base.find row['id']
|
99
|
-
type = row.first.last
|
100
|
-
if type.nil?
|
101
|
-
next
|
102
|
-
elsif type.include? "Work"
|
103
|
-
metadata = create_data(row.except('id', 'type'), work_form(type), obj, mvs)
|
104
|
-
elsif type.include? "File"
|
105
|
-
metadata = create_data(row.except('id', 'type'), work_form(type), obj, mvs)
|
106
|
-
end
|
107
|
-
unless metadata.nil?
|
108
|
-
obj.attributes = metadata
|
109
|
-
obj.save
|
110
|
-
end
|
111
|
-
end
|
112
|
-
flash[:notice] = "csv successfully uploaded"
|
113
|
-
redirect_to csv_edit_path
|
114
|
-
end
|
115
|
-
|
116
|
-
def export
|
117
|
-
solr = RSolr.connect url: Account.find_by(tenant: Apartment::Tenant.current).solr_endpoint.url
|
118
|
-
response = solr.get 'select', params: {
|
119
|
-
q: "member_of_collection_ids_ssim:#{params[:collection_id]}",
|
120
|
-
rows: 3400,
|
121
|
-
fl: "id"
|
122
|
-
}
|
123
|
-
unless response['response']['docs'].empty? || response['response']['docs'][0].empty?
|
124
|
-
work_ids = response['response']['docs'].map { |doc| doc['id'] }
|
125
|
-
end
|
126
|
-
#works = ::ActiveFedora::Base.where member_of_collection_ids_ssim: params[:collection_id]
|
127
|
-
@csv_headers = ['type'] + work_fields
|
128
|
-
@csv_array = [@csv_headers.join(',')]
|
129
|
-
work_ids.each do |work_id|
|
130
|
-
doc = ::SolrDocument.find work_id
|
131
|
-
add_line doc
|
132
|
-
doc._source[:file_set_ids_ssim].each do |file_id|
|
133
|
-
file_doc = ::SolrDocument.find file_id
|
134
|
-
add_line file_doc
|
135
|
-
end
|
136
|
-
end
|
137
|
-
|
138
|
-
send_data @csv_array.join("\n"),
|
139
|
-
:type => 'text/csv; charset=iso-8859-5; header=present',
|
140
|
-
:disposition => "attachment; filename=export.csv"
|
141
|
-
end
|
142
|
-
|
143
|
-
private
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
relative_path import_url part_of resource_type access_control_id
|
166
|
-
representative_id thumbnail_id rendering_ids admin_set_id embargo_id
|
167
|
-
lease_id]
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
def load_config
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
@
|
217
|
-
@
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
1
|
+
module CdmMigrator
|
2
|
+
class CsvController < ApplicationController
|
3
|
+
helper_method :default_page_title, :admin_host?, :available_translations, :available_works
|
4
|
+
include ActionView::Helpers::UrlHelper
|
5
|
+
layout 'hyrax/dashboard' if Hyrax
|
6
|
+
before_action :authenticate, except: :index
|
7
|
+
before_action :load_config, only: :csv_checker
|
8
|
+
|
9
|
+
def csv_checker
|
10
|
+
if params[:file]
|
11
|
+
check_csv params[:file].path
|
12
|
+
if @error_list.blank?
|
13
|
+
flash[:notice] = "All data are valid."
|
14
|
+
else
|
15
|
+
flash[:error] = "The CSV Checker found some errors in the CSV. Please correct them and check again."
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def index
|
21
|
+
if current_page?(main_app.csv_my_batches_path(locale: nil))
|
22
|
+
@batches = BatchIngest.where(user_id: current_user.id).reverse_order
|
23
|
+
elsif current_page?(main_app.csv_all_batches_path(locale: nil))
|
24
|
+
@batches = BatchIngest.all.reverse_order
|
25
|
+
else
|
26
|
+
@batches = []
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def upload
|
31
|
+
@admin_sets = AdminSet.all.map { |as| [as.title.first, as.id] }
|
32
|
+
@collections = Collection.all.map { |col| [col.title.first, col.id] }
|
33
|
+
end
|
34
|
+
|
35
|
+
def create
|
36
|
+
dir = Rails.root.join('public', 'uploads', 'csvs')
|
37
|
+
FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
|
38
|
+
time = DateTime.now.strftime('%s')
|
39
|
+
filename = params[:csv_import][:csv_file].original_filename.gsub('.csv', "#{time}.csv")
|
40
|
+
csv = dir.join(filename).to_s
|
41
|
+
File.open(csv, 'wb') do |file|
|
42
|
+
file.write(params[:csv_import][:csv_file].read)
|
43
|
+
end
|
44
|
+
check_csv csv
|
45
|
+
if @error_list.present?
|
46
|
+
flash[:error] = "Cdm Migrator found some problems with the CSV. Use the CSV Checker for more details."
|
47
|
+
end
|
48
|
+
parse_csv(csv, params[:csv_import][:mvs])
|
49
|
+
|
50
|
+
ingest = BatchIngest.new({
|
51
|
+
data: @works,
|
52
|
+
size: @works.length,
|
53
|
+
csv: csv,
|
54
|
+
admin_set_id: params[:admin_set],
|
55
|
+
collection_id: params[:collection],
|
56
|
+
user_id: current_user.id,
|
57
|
+
message: @path_list.blank? ? nil : @path_list.to_s.gsub("\"", """)
|
58
|
+
})
|
59
|
+
if ingest.save! && @path_list.blank?
|
60
|
+
BatchCreateWorksJob.perform_later(ingest, current_user)
|
61
|
+
flash[:notice] = "csv successfully uploaded, check this page to see the status while the batch is running"
|
62
|
+
redirect_to csv_my_batches_path
|
63
|
+
else
|
64
|
+
flash[:error] ||= "csv could not be parsed, please check and re-upload"
|
65
|
+
redirect_to csv_upload_path
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def rerun
|
70
|
+
ingest = BatchIngest.find(params[:id]).deep_dup
|
71
|
+
ingest.save
|
72
|
+
BatchCreateWorksJob.perform_later(ingest, current_user)
|
73
|
+
flash[:notice] = "csv successfully uploaded, check this page to see the status while the batch is running"
|
74
|
+
redirect_to csv_my_batches_path
|
75
|
+
end
|
76
|
+
|
77
|
+
def generate
|
78
|
+
headers = %w(type url)
|
79
|
+
skip = %w(id head tail depositor date_uploaded date_modified import_url thumbnail_id embargo_id lease_id access_control_id representative_id)
|
80
|
+
GenericWork.new.attributes.each do |key, val|
|
81
|
+
headers << "work_#{key}" unless skip.include? key
|
82
|
+
end
|
83
|
+
FileSet.new.attributes.each do |key, val|
|
84
|
+
headers << "file_#{key}" unless skip.include? key
|
85
|
+
end
|
86
|
+
fname = "template_#{DateTime.now.to_i}"
|
87
|
+
render plain: CSV.generate { |csv| csv << headers }, content_type: 'text/csv'
|
88
|
+
end
|
89
|
+
|
90
|
+
def edit
|
91
|
+
@collections = ::Collection.all.map { |c| [c.title.first, c.id] }
|
92
|
+
end
|
93
|
+
|
94
|
+
def update
|
95
|
+
mvs = params[:csv_update][:mvs]
|
96
|
+
csv = CSV.parse(params[:csv_update][:csv_file].read.force_encoding("UTF-8"), headers: true, encoding: 'utf-8').map(&:to_hash)
|
97
|
+
csv.each do |row|
|
98
|
+
obj = ActiveFedora::Base.find row['id']
|
99
|
+
type = row.first.last
|
100
|
+
if type.nil?
|
101
|
+
next
|
102
|
+
elsif type.include? "Work"
|
103
|
+
metadata = create_data(row.except('id', 'type'), work_form(type), obj, mvs)
|
104
|
+
elsif type.include? "File"
|
105
|
+
metadata = create_data(row.except('id', 'type'), work_form(type), obj, mvs)
|
106
|
+
end
|
107
|
+
unless metadata.nil?
|
108
|
+
obj.attributes = metadata
|
109
|
+
obj.save
|
110
|
+
end
|
111
|
+
end
|
112
|
+
flash[:notice] = "csv successfully uploaded"
|
113
|
+
redirect_to csv_edit_path
|
114
|
+
end
|
115
|
+
|
116
|
+
def export
|
117
|
+
solr = RSolr.connect url: Account.find_by(tenant: Apartment::Tenant.current).solr_endpoint.url
|
118
|
+
response = solr.get 'select', params: {
|
119
|
+
q: "member_of_collection_ids_ssim:#{params[:collection_id]}",
|
120
|
+
rows: 3400,
|
121
|
+
fl: "id"
|
122
|
+
}
|
123
|
+
unless response['response']['docs'].empty? || response['response']['docs'][0].empty?
|
124
|
+
work_ids = response['response']['docs'].map { |doc| doc['id'] }
|
125
|
+
end
|
126
|
+
#works = ::ActiveFedora::Base.where member_of_collection_ids_ssim: params[:collection_id]
|
127
|
+
@csv_headers = ['type'] + work_fields
|
128
|
+
@csv_array = [@csv_headers.join(',')]
|
129
|
+
work_ids.each do |work_id|
|
130
|
+
doc = ::SolrDocument.find work_id
|
131
|
+
add_line doc
|
132
|
+
doc._source[:file_set_ids_ssim].each do |file_id|
|
133
|
+
file_doc = ::SolrDocument.find file_id
|
134
|
+
add_line file_doc
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
send_data @csv_array.join("\n"),
|
139
|
+
:type => 'text/csv; charset=iso-8859-5; header=present',
|
140
|
+
:disposition => "attachment; filename=export.csv"
|
141
|
+
end
|
142
|
+
|
143
|
+
private
|
144
|
+
|
145
|
+
def authenticate
|
146
|
+
authorize! :create, available_works.first
|
147
|
+
end
|
148
|
+
|
149
|
+
def add_line doc
|
150
|
+
line_hash = {}
|
151
|
+
line_hash['type'] = doc._source[:has_model_ssim].first
|
152
|
+
work_fields.each do |field|
|
153
|
+
line_hash[field] = create_cell doc, field
|
154
|
+
end
|
155
|
+
@csv_array << line_hash.values_at(*@csv_headers).map { |cell| cell = '' if cell.nil?; "\"#{cell.gsub("\"", "\"\"")}\"" }.join(',')
|
156
|
+
|
157
|
+
end
|
158
|
+
|
159
|
+
def work_fields
|
160
|
+
@fields ||= available_works.map { |work| work.new.attributes.keys }.flatten.uniq - excluded_fields
|
161
|
+
end
|
162
|
+
|
163
|
+
def excluded_fields
|
164
|
+
%w[date_uploaded date_modified head tail state proxy_depositor on_behalf_of arkivo_checksum label
|
165
|
+
relative_path import_url part_of resource_type access_control_id
|
166
|
+
representative_id thumbnail_id rendering_ids admin_set_id embargo_id
|
167
|
+
lease_id]
|
168
|
+
end
|
169
|
+
|
170
|
+
def create_cell w, field
|
171
|
+
if field.include? 'date' or field == 'chronological_coverage'
|
172
|
+
if w._source[field+'_tesim'].is_a?(Array)
|
173
|
+
w._source[field+'_tesim'].join('|')
|
174
|
+
else
|
175
|
+
w._source[field+'_tesim']
|
176
|
+
end
|
177
|
+
elsif w.respond_to?(field.to_sym)
|
178
|
+
if w.send(field).is_a?(Array)
|
179
|
+
w.send(field).join('|')
|
180
|
+
else
|
181
|
+
w.send(field)
|
182
|
+
end
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
def available_works
|
187
|
+
@available_works ||= Hyrax::QuickClassificationQuery.new(current_user).authorized_models
|
188
|
+
end
|
189
|
+
|
190
|
+
def parse_csv csv, mvs
|
191
|
+
csv = CSV.parse(File.read(csv), headers: true, encoding: 'utf-8').map(&:to_hash)
|
192
|
+
@works = []
|
193
|
+
csv.each do |row|
|
194
|
+
type = row.first.last
|
195
|
+
if type.nil?
|
196
|
+
next
|
197
|
+
elsif type.include? "Work"
|
198
|
+
metadata = create_data(row, work_form(type), Object.const_get(type).new, mvs)
|
199
|
+
@works << {type: type, metadata: metadata, files: []}
|
200
|
+
elsif type.include? "File"
|
201
|
+
metadata = create_data(row, file_form, FileSet.new, mvs)
|
202
|
+
@works.last[:files] << {url: row.delete('url'), title: row.delete('title'), metadata: metadata}
|
203
|
+
end
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
def load_config
|
208
|
+
if Settings.multitenancy.enabled
|
209
|
+
tenant = Account.find_by(tenant: Apartment::Tenant.current).cname
|
210
|
+
else
|
211
|
+
tenant = "default"
|
212
|
+
end
|
213
|
+
if CdmMigrator::Engine.config['tenant_settings'].has_key?(tenant)
|
214
|
+
settings = CdmMigrator::Engine.config['tenant_settings'][tenant]['csv_checker']
|
215
|
+
if settings.present?
|
216
|
+
@date_indexing_service = settings['date_indexing_service'].first.constantize if settings['date_indexing_service']
|
217
|
+
@date_fields = settings['date_fields'].map(&:to_sym) if settings['date_fields']
|
218
|
+
@uri_fields = settings['valid_uri_fields'].map(&:to_sym) if settings['valid_uri_fields']
|
219
|
+
@separator = settings['multi_value_separator']
|
220
|
+
@separator_fields = settings['separator_fields'].map(&:to_sym) if settings['separator_fields']
|
221
|
+
@path_to_drive = settings['path_to_drive']
|
222
|
+
# If you would like to change this to match the uploader's max file size,
|
223
|
+
# change this to Hyrax.config.uploader[:maxFileSize]
|
224
|
+
@max_file_size = settings['max_file_size']
|
225
|
+
else
|
226
|
+
raise "Cdm Migrator couldn't find any configured settings. Are they in cdm_migrator.yml?"
|
227
|
+
end
|
228
|
+
else
|
229
|
+
raise "Cdm Migrator couldn't find this tenant. Is it configured?"
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
def check_csv csv_file
|
234
|
+
row_number = 1
|
235
|
+
@error_list = {}
|
236
|
+
check_mounted_drive if @path_to_drive.present?
|
237
|
+
|
238
|
+
CSV.foreach(csv_file, headers: true, header_converters: :symbol) do |row|
|
239
|
+
row_number +=1 # Tells user what CSV row the error is on
|
240
|
+
if row[:object_type].include? "Work"
|
241
|
+
check_dates(row_number, row) if @date_fields.present?
|
242
|
+
check_uris(row_number, row) if @uri_fields.present?
|
243
|
+
if params[:multi_value_separator].present? and @separator_fields.present?
|
244
|
+
check_multi_val_fields(row_number, row, params[:multi_value_separator])
|
245
|
+
else
|
246
|
+
alert_message = "No multi-value separator character was selected or no fields were configured. CSV Checker didn't check for valid separators."
|
247
|
+
if flash[:alert] and flash[:alert].exclude?(alert_message) # Only add this message once, rather than per line
|
248
|
+
flash[:alert] << alert_message
|
249
|
+
elsif flash[:alert].blank?
|
250
|
+
flash[:alert] = Array.wrap(alert_message)
|
251
|
+
end
|
252
|
+
end
|
253
|
+
elsif row[:object_type] == "File"
|
254
|
+
check_file_path(row_number, row[:url])
|
255
|
+
check_transcript_length(row_number, row[:transcript]) if row[:transcript].present?
|
256
|
+
check_file_size(row_number, row[:url])
|
257
|
+
else
|
258
|
+
@error_list[row_number] = { "object_type" => "No or unknown object type. Please give a valid type (e.g. GenericWork, File)." }
|
259
|
+
end
|
260
|
+
@error_list.delete_if { |key, value| value.blank? } # Data are valid, no need to print the row
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
264
|
+
def check_transcript_length(row_number, transcript)
|
265
|
+
if transcript.is_a? String
|
266
|
+
if transcript.length > 9000
|
267
|
+
@error_list[row_number] = { "transcript" => "Transcript is too long (over 9000 characters)." }
|
268
|
+
end
|
269
|
+
elsif transcript.is_a? Array
|
270
|
+
if transcript.any? { |tr| tr.length > 9000 }
|
271
|
+
@error_list[row_number] = { "transcript" => "Transcript is too long (over 9000 characters)." }
|
272
|
+
end
|
273
|
+
end
|
274
|
+
end
|
275
|
+
|
276
|
+
def check_file_size(row_number, file_path)
|
277
|
+
if file_path.present? && File.file?(file_path) && @max_file_size
|
278
|
+
if File.size(file_path.gsub("file://", "")) > @max_file_size
|
279
|
+
@error_list[row_number] = { "file size" => "The file at #{file_path} is too large to be uploaded. Please compress the file or split it into parts.
|
280
|
+
Each part should be under #{helpers.number_to_human_size(@max_file_size)}." }
|
281
|
+
end
|
282
|
+
end
|
283
|
+
end
|
284
|
+
|
285
|
+
def check_mounted_drive
|
286
|
+
drive_address = @path_to_drive
|
287
|
+
unless Dir.exist?(drive_address) and !Dir[drive_address].empty?
|
288
|
+
flash[:alert] = "CSV Checker can't find the mounted drive to check file paths, so some paths may be mislabelled as incorrect. Please contact the administrator or try again later."
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
def check_file_path(row_number, file_path)
|
293
|
+
if file_path.nil?
|
294
|
+
@error_list[row_number] = { "url" => "url is blank." }
|
295
|
+
elsif File.file?(file_path.gsub("file://", "")) == false
|
296
|
+
@error_list[row_number] = { "url" => "No file found at #{file_path}" }
|
297
|
+
end
|
298
|
+
end
|
299
|
+
|
300
|
+
def check_dates(row_number, row)
|
301
|
+
date_fields = @date_fields
|
302
|
+
unless @date_indexing_service
|
303
|
+
flash[:alert] = "No date indexing service was configured so CSV Checker didn't validate dates."
|
304
|
+
return
|
305
|
+
end
|
306
|
+
edtf_errors = date_fields.each_with_object({}) do |field, hash|
|
307
|
+
next unless row[field]
|
308
|
+
begin
|
309
|
+
@date_indexing_service.new(row[field])
|
310
|
+
rescue *@date_indexing_service.error_classes => error
|
311
|
+
hash[field.to_s] = "#{error.message}"
|
312
|
+
end
|
313
|
+
end
|
314
|
+
@error_list[row_number] = edtf_errors
|
315
|
+
end
|
316
|
+
|
317
|
+
# <Example: should be http://rightsstatements.org/vocab/etc. NOT https://rightsstatements.org/page/etc.
|
318
|
+
def check_uris(row_number, row)
|
319
|
+
uri_fields = @uri_fields
|
320
|
+
uri_errors = uri_fields.each_with_object({}) do |field, hash|
|
321
|
+
if row[field] and row[field].include? "page"
|
322
|
+
hash[field.to_s] = "Links to page instead of URI. (e.g. https://rightsstatements.org/page/etc. instead of http://rightsstatements.org/vocab/etc.)"
|
323
|
+
end
|
324
|
+
end
|
325
|
+
if @error_list.any?
|
326
|
+
@error_list[row_number].merge!(uri_errors)
|
327
|
+
else
|
328
|
+
@error_list[row_number] = uri_errors
|
329
|
+
end
|
330
|
+
end
|
331
|
+
|
332
|
+
# Check multi-value separators
|
333
|
+
def check_multi_val_fields(row_number, row, character)
|
334
|
+
uri_fields = @separator_fields
|
335
|
+
separator_errors = uri_fields.each_with_object({}) do |field, hash|
|
336
|
+
if value = row[field]
|
337
|
+
# Check for leading or trailing spaces
|
338
|
+
if value.match %r{ #{Regexp.escape(character)}|#{Regexp.escape(character)} }
|
339
|
+
hash[field.to_s] = "Contains leading or trailing whitespace around multi-value separator."
|
340
|
+
end
|
341
|
+
values = value.split(character).map(&:strip)
|
342
|
+
values.each do |val|
|
343
|
+
if val.match(URI.regexp) # Val should be URI
|
344
|
+
remainder = val.gsub(val.match(URI.regexp)[0],'')
|
345
|
+
unless remainder.blank?
|
346
|
+
hash[field.to_s] = "May contain the wrong multi-value separator or a typo in the URI."
|
347
|
+
end
|
348
|
+
else # Or val should be string
|
349
|
+
invalid_chars = ["\\"]
|
350
|
+
# Make exceptions for backslashes that are part of whitespace characters
|
351
|
+
# by deleting them before checking for stray \s
|
352
|
+
if val.delete("\t\r\n\s\n").match Regexp.union(invalid_chars)
|
353
|
+
hash[field.to_s] = "May contain an invalid character such as #{invalid_chars.to_sentence(last_word_connector: ", or ")}."
|
354
|
+
end
|
355
|
+
end
|
356
|
+
end
|
357
|
+
end
|
358
|
+
end
|
359
|
+
@error_list[row_number].merge!(separator_errors)
|
360
|
+
end
|
361
|
+
|
362
|
+
def default_page_title
|
363
|
+
'CSV Batch Uploader'
|
364
|
+
end
|
365
|
+
|
366
|
+
def admin_host?
|
367
|
+
false unless Settings.multitenancy.enabled
|
368
|
+
end
|
369
|
+
|
370
|
+
def available_translations
|
371
|
+
{
|
372
|
+
'en' => 'English',
|
373
|
+
'fr' => 'French'
|
374
|
+
}
|
375
|
+
end
|
376
|
+
|
377
|
+
def work_form(worktype = "GenericWork")
|
378
|
+
Module.const_get("Hyrax::#{worktype}Form") rescue nil || Module.const_get("Hyrax::Forms::WorkForm")
|
379
|
+
end
|
380
|
+
|
381
|
+
def file_form
|
382
|
+
Module.const_get("Hyrax::FileSetForm") rescue nil || Module.const_get("Hyrax::Forms::FileSetEditForm")
|
383
|
+
end
|
384
|
+
|
385
|
+
def secondary_terms form_name
|
386
|
+
form_name.terms - form_name.required_fields -
|
387
|
+
[:visibility_during_embargo, :embargo_release_date,
|
388
|
+
:visibility_after_embargo, :visibility_during_lease,
|
389
|
+
:lease_expiration_date, :visibility_after_lease, :visibility,
|
390
|
+
:thumbnail_id, :representative_id, :ordered_member_ids,
|
391
|
+
:collection_ids, :in_works_ids, :admin_set_id, :files, :source, :member_of_collection_ids]
|
392
|
+
end
|
393
|
+
|
394
|
+
def create_data data, type, object, mvs
|
395
|
+
final_data = {}
|
396
|
+
accepted_terms = type.required_fields + secondary_terms(type)
|
397
|
+
data.each do |key, att|
|
398
|
+
if (att.nil? || att.empty? || key.to_s.include?("object_type") || !accepted_terms.include?(key.to_sym))
|
399
|
+
next
|
400
|
+
elsif object.send(key).nil?
|
401
|
+
final_data[key] = att
|
402
|
+
else
|
403
|
+
final_data[key] = att.split(mvs)
|
404
|
+
end
|
405
|
+
end
|
406
|
+
final_data
|
407
|
+
end
|
408
|
+
|
409
|
+
def create_lease visibility, status_after, date
|
410
|
+
lease = Hydra::AccessControls::Lease.new(visibility_during_lease: visibility,
|
411
|
+
visibility_after_lease: status_after, lease_expiration_date: @lease_date)
|
412
|
+
lease.save
|
413
|
+
end
|
414
|
+
|
415
|
+
def create_embargo visibility
|
416
|
+
embargo = Hydra::AccessControls::Embargo.new
|
417
|
+
embargo.visibility_during_embargo = visibility
|
418
|
+
embargo.visibility_after_embargo = @status_after
|
419
|
+
embargo.embargo_release_date = @embargo_date
|
420
|
+
embargo.save
|
421
|
+
end
|
422
|
+
|
423
|
+
def log(user)
|
424
|
+
Hyrax::Operation.create!(user: user,
|
425
|
+
operation_type: "Attach Remote File")
|
426
|
+
end
|
427
|
+
end
|
428
|
+
end
|