cdm_migrator 3.2.1 → 3.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/MIT-LICENSE +20 -20
- data/README.md +41 -41
- data/Rakefile +38 -38
- data/app/assets/config/cdm_migrator_manifest.js +2 -2
- data/app/assets/javascripts/cdm_migrator/application.js +13 -13
- data/app/assets/stylesheets/cdm_migrator/application.css +15 -15
- data/app/assets/stylesheets/cdm_migrator/csv_checker.css +36 -36
- data/app/controllers/cdm_migrator/application_controller.rb +10 -10
- data/app/controllers/cdm_migrator/cdm_controller.rb +216 -213
- data/app/controllers/cdm_migrator/csv_controller.rb +428 -408
- data/app/helpers/cdm_migrator/application_helper.rb +4 -4
- data/app/jobs/cdm_migrator/application_job.rb +4 -4
- data/app/jobs/cdm_migrator/batch_create_files_job.rb +32 -20
- data/app/jobs/cdm_migrator/batch_create_files_with_ordered_members_job.rb +45 -0
- data/app/jobs/cdm_migrator/batch_create_works_job.rb +20 -14
- data/app/jobs/cdm_migrator/cdm_ingest_files_job.rb +35 -35
- data/app/jobs/cdm_migrator/create_work_job.rb +36 -25
- data/app/jobs/cdm_migrator/restart_upload_from_middle_job.rb +36 -0
- data/app/jobs/cdm_migrator/update_object_job.rb +10 -10
- data/app/mailers/cdm_migrator/application_mailer.rb +6 -6
- data/app/models/cdm_migrator/application_record.rb +5 -5
- data/app/models/cdm_migrator/batch_ingest.rb +33 -33
- data/app/models/cdm_migrator/ingest_work.rb +7 -16
- data/app/views/cdm_migrator/cdm/collection.html.erb +11 -11
- data/app/views/cdm_migrator/cdm/mappings.html.erb +54 -53
- data/app/views/cdm_migrator/csv/_batches_list.html.erb +4 -4
- data/app/views/cdm_migrator/csv/_default_group.html.erb +17 -17
- data/app/views/cdm_migrator/csv/_error_list.html.erb +20 -20
- data/app/views/cdm_migrator/csv/_list_batches.html.erb +21 -21
- data/app/views/cdm_migrator/csv/_tabs.html.erb +8 -8
- data/app/views/cdm_migrator/csv/csv_checker.html.erb +45 -45
- data/app/views/cdm_migrator/csv/edit.html.erb +17 -18
- data/app/views/cdm_migrator/csv/index.html.erb +19 -19
- data/app/views/cdm_migrator/csv/upload.html.erb +18 -18
- data/app/views/layouts/cdm_migrator/application.html.erb +14 -14
- data/config/routes.rb +19 -19
- data/db/migrate/20191211193859_create_batch_ingests.rb +19 -19
- data/db/migrate/20191212192315_create_ingest_works.rb +18 -18
- data/lib/cdm_migrator/engine.rb +29 -29
- data/lib/cdm_migrator/version.rb +3 -3
- data/lib/cdm_migrator.rb +5 -5
- data/lib/generators/cdm_migrator/install/install_generator.rb +79 -79
- data/lib/generators/cdm_migrator/install/templates/config/cdm_migrator.yml +53 -50
- data/lib/generators/cdm_migrator/install/templates/presenters/hyku/menu_presenter.rb +47 -47
- data/lib/generators/cdm_migrator/install/templates/presenters/hyrax/menu_presenter.rb +66 -66
- data/lib/generators/cdm_migrator/install/templates/sidebar/_tasks.html.erb +55 -55
- data/lib/tasks/cdm_migrator_tasks.rake +4 -4
- metadata +7 -5
@@ -1,408 +1,428 @@
|
|
1
|
-
module CdmMigrator
|
2
|
-
class CsvController < ApplicationController
|
3
|
-
helper_method :default_page_title, :admin_host?, :available_translations, :available_works
|
4
|
-
include ActionView::Helpers::UrlHelper
|
5
|
-
layout 'hyrax/dashboard' if Hyrax
|
6
|
-
before_action :authenticate, except: :index
|
7
|
-
before_action :load_config, only: :csv_checker
|
8
|
-
|
9
|
-
def csv_checker
|
10
|
-
if params[:file]
|
11
|
-
check_csv params[:file].path
|
12
|
-
if @error_list.blank?
|
13
|
-
flash[:notice] = "All data are valid."
|
14
|
-
else
|
15
|
-
flash[:error] = "The CSV Checker found some errors in the CSV. Please correct them and check again."
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
def index
|
21
|
-
if current_page?(main_app.csv_my_batches_path(locale: nil))
|
22
|
-
@batches = BatchIngest.where(user_id: current_user.id).reverse_order
|
23
|
-
elsif current_page?(main_app.csv_all_batches_path(locale: nil))
|
24
|
-
@batches = BatchIngest.all.reverse_order
|
25
|
-
else
|
26
|
-
@batches = []
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
def upload
|
31
|
-
@admin_sets = AdminSet.all.map { |as| [as.title.first, as.id] }
|
32
|
-
@collections = Collection.all.map { |col| [col.title.first, col.id] }
|
33
|
-
end
|
34
|
-
|
35
|
-
def create
|
36
|
-
dir = Rails.root.join('public', 'uploads', 'csvs')
|
37
|
-
FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
|
38
|
-
time = DateTime.now.strftime('%s')
|
39
|
-
filename = params[:csv_import][:csv_file].original_filename.gsub('.csv', "#{time}.csv")
|
40
|
-
csv = dir.join(filename).to_s
|
41
|
-
File.open(csv, 'wb') do |file|
|
42
|
-
file.write(params[:csv_import][:csv_file].read)
|
43
|
-
end
|
44
|
-
check_csv csv
|
45
|
-
if @error_list.present?
|
46
|
-
flash[:error] = "Cdm Migrator found some problems with the CSV. Use the CSV Checker for more details."
|
47
|
-
end
|
48
|
-
parse_csv(csv, params[:csv_import][:mvs])
|
49
|
-
|
50
|
-
ingest = BatchIngest.new({
|
51
|
-
data: @works,
|
52
|
-
size: @works.length,
|
53
|
-
csv: csv,
|
54
|
-
admin_set_id: params[:admin_set],
|
55
|
-
collection_id: params[:collection],
|
56
|
-
user_id: current_user.id,
|
57
|
-
message: @path_list.blank? ? nil : @path_list.to_s.gsub("\"", """)
|
58
|
-
})
|
59
|
-
if ingest.save! && @path_list.blank?
|
60
|
-
BatchCreateWorksJob.perform_later(ingest, current_user)
|
61
|
-
flash[:notice] = "csv successfully uploaded, check this page to see the status while the batch is running"
|
62
|
-
redirect_to csv_my_batches_path
|
63
|
-
else
|
64
|
-
flash[:error] ||= "csv could not be parsed, please check and re-upload"
|
65
|
-
redirect_to csv_upload_path
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
|
-
def rerun
|
70
|
-
ingest = BatchIngest.find(params[:id]).deep_dup
|
71
|
-
ingest.save
|
72
|
-
BatchCreateWorksJob.perform_later(ingest, current_user)
|
73
|
-
flash[:notice] = "csv successfully uploaded, check this page to see the status while the batch is running"
|
74
|
-
redirect_to csv_my_batches_path
|
75
|
-
end
|
76
|
-
|
77
|
-
def generate
|
78
|
-
headers = %w(type url)
|
79
|
-
skip = %w(id head tail depositor date_uploaded date_modified import_url thumbnail_id embargo_id lease_id access_control_id representative_id)
|
80
|
-
GenericWork.new.attributes.each do |key, val|
|
81
|
-
headers << "work_#{key}" unless skip.include? key
|
82
|
-
end
|
83
|
-
FileSet.new.attributes.each do |key, val|
|
84
|
-
headers << "file_#{key}" unless skip.include? key
|
85
|
-
end
|
86
|
-
fname = "template_#{DateTime.now.to_i}"
|
87
|
-
render plain: CSV.generate { |csv| csv << headers }, content_type: 'text/csv'
|
88
|
-
end
|
89
|
-
|
90
|
-
def edit
|
91
|
-
@collections = ::Collection.all.map { |c| [c.title.first, c.id] }
|
92
|
-
end
|
93
|
-
|
94
|
-
def update
|
95
|
-
mvs = params[:csv_update][:mvs]
|
96
|
-
csv = CSV.parse(params[:csv_update][:csv_file].read, headers: true, encoding: 'utf-8').map(&:to_hash)
|
97
|
-
csv.each do |row|
|
98
|
-
obj = ActiveFedora::Base.find row['id']
|
99
|
-
type = row.first.last
|
100
|
-
if type.nil?
|
101
|
-
next
|
102
|
-
elsif type.include? "Work"
|
103
|
-
metadata = create_data(row.except('id', 'type'), work_form(type), obj, mvs)
|
104
|
-
elsif type.include? "File"
|
105
|
-
metadata = create_data(row.except('id', 'type'), work_form(type), obj, mvs)
|
106
|
-
end
|
107
|
-
unless metadata.nil?
|
108
|
-
obj.attributes = metadata
|
109
|
-
obj.save
|
110
|
-
end
|
111
|
-
end
|
112
|
-
flash[:notice] = "csv successfully uploaded"
|
113
|
-
redirect_to csv_edit_path
|
114
|
-
end
|
115
|
-
|
116
|
-
def export
|
117
|
-
solr = RSolr.connect url: Account.find_by(tenant: Apartment::Tenant.current).solr_endpoint.url
|
118
|
-
response = solr.get 'select', params: {
|
119
|
-
q: "member_of_collection_ids_ssim:#{params[:collection_id]}",
|
120
|
-
rows: 3400,
|
121
|
-
fl: "id"
|
122
|
-
}
|
123
|
-
unless response['response']['docs'].empty? || response['response']['docs'][0].empty?
|
124
|
-
work_ids = response['response']['docs'].map { |doc| doc['id'] }
|
125
|
-
end
|
126
|
-
#works = ::ActiveFedora::Base.where member_of_collection_ids_ssim: params[:collection_id]
|
127
|
-
@csv_headers = ['type'] + work_fields
|
128
|
-
@csv_array = [@csv_headers.join(',')]
|
129
|
-
work_ids.each do |work_id|
|
130
|
-
doc = ::SolrDocument.find work_id
|
131
|
-
add_line doc
|
132
|
-
doc._source[:file_set_ids_ssim].each do |file_id|
|
133
|
-
file_doc = ::SolrDocument.find file_id
|
134
|
-
add_line file_doc
|
135
|
-
end
|
136
|
-
end
|
137
|
-
|
138
|
-
send_data @csv_array.join("\n"),
|
139
|
-
:type => 'text/csv; charset=iso-8859-5; header=present',
|
140
|
-
:disposition => "attachment; filename=export.csv"
|
141
|
-
end
|
142
|
-
|
143
|
-
private
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
relative_path import_url part_of resource_type access_control_id
|
166
|
-
representative_id thumbnail_id rendering_ids admin_set_id embargo_id
|
167
|
-
lease_id]
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
def load_config
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
@
|
217
|
-
@
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
1
|
+
module CdmMigrator
|
2
|
+
class CsvController < ApplicationController
|
3
|
+
helper_method :default_page_title, :admin_host?, :available_translations, :available_works
|
4
|
+
include ActionView::Helpers::UrlHelper
|
5
|
+
layout 'hyrax/dashboard' if Hyrax
|
6
|
+
before_action :authenticate, except: :index
|
7
|
+
before_action :load_config, only: :csv_checker
|
8
|
+
|
9
|
+
def csv_checker
|
10
|
+
if params[:file]
|
11
|
+
check_csv params[:file].path
|
12
|
+
if @error_list.blank?
|
13
|
+
flash[:notice] = "All data are valid."
|
14
|
+
else
|
15
|
+
flash[:error] = "The CSV Checker found some errors in the CSV. Please correct them and check again."
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def index
|
21
|
+
if current_page?(main_app.csv_my_batches_path(locale: nil))
|
22
|
+
@batches = BatchIngest.where(user_id: current_user.id).reverse_order
|
23
|
+
elsif current_page?(main_app.csv_all_batches_path(locale: nil))
|
24
|
+
@batches = BatchIngest.all.reverse_order
|
25
|
+
else
|
26
|
+
@batches = []
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def upload
|
31
|
+
@admin_sets = AdminSet.all.map { |as| [as.title.first, as.id] }
|
32
|
+
@collections = Collection.all.map { |col| [col.title.first, col.id] }
|
33
|
+
end
|
34
|
+
|
35
|
+
def create
|
36
|
+
dir = Rails.root.join('public', 'uploads', 'csvs')
|
37
|
+
FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
|
38
|
+
time = DateTime.now.strftime('%s')
|
39
|
+
filename = params[:csv_import][:csv_file].original_filename.gsub('.csv', "#{time}.csv")
|
40
|
+
csv = dir.join(filename).to_s
|
41
|
+
File.open(csv, 'wb') do |file|
|
42
|
+
file.write(params[:csv_import][:csv_file].read)
|
43
|
+
end
|
44
|
+
check_csv csv
|
45
|
+
if @error_list.present?
|
46
|
+
flash[:error] = "Cdm Migrator found some problems with the CSV. Use the CSV Checker for more details."
|
47
|
+
end
|
48
|
+
parse_csv(csv, params[:csv_import][:mvs])
|
49
|
+
|
50
|
+
ingest = BatchIngest.new({
|
51
|
+
data: @works,
|
52
|
+
size: @works.length,
|
53
|
+
csv: csv,
|
54
|
+
admin_set_id: params[:admin_set],
|
55
|
+
collection_id: params[:collection],
|
56
|
+
user_id: current_user.id,
|
57
|
+
message: @path_list.blank? ? nil : @path_list.to_s.gsub("\"", """)
|
58
|
+
})
|
59
|
+
if ingest.save! && @path_list.blank?
|
60
|
+
BatchCreateWorksJob.perform_later(ingest, current_user)
|
61
|
+
flash[:notice] = "csv successfully uploaded, check this page to see the status while the batch is running"
|
62
|
+
redirect_to csv_my_batches_path
|
63
|
+
else
|
64
|
+
flash[:error] ||= "csv could not be parsed, please check and re-upload"
|
65
|
+
redirect_to csv_upload_path
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def rerun
|
70
|
+
ingest = BatchIngest.find(params[:id]).deep_dup
|
71
|
+
ingest.save
|
72
|
+
BatchCreateWorksJob.perform_later(ingest, current_user)
|
73
|
+
flash[:notice] = "csv successfully uploaded, check this page to see the status while the batch is running"
|
74
|
+
redirect_to csv_my_batches_path
|
75
|
+
end
|
76
|
+
|
77
|
+
def generate
|
78
|
+
headers = %w(type url)
|
79
|
+
skip = %w(id head tail depositor date_uploaded date_modified import_url thumbnail_id embargo_id lease_id access_control_id representative_id)
|
80
|
+
GenericWork.new.attributes.each do |key, val|
|
81
|
+
headers << "work_#{key}" unless skip.include? key
|
82
|
+
end
|
83
|
+
FileSet.new.attributes.each do |key, val|
|
84
|
+
headers << "file_#{key}" unless skip.include? key
|
85
|
+
end
|
86
|
+
fname = "template_#{DateTime.now.to_i}"
|
87
|
+
render plain: CSV.generate { |csv| csv << headers }, content_type: 'text/csv'
|
88
|
+
end
|
89
|
+
|
90
|
+
def edit
|
91
|
+
@collections = ::Collection.all.map { |c| [c.title.first, c.id] }
|
92
|
+
end
|
93
|
+
|
94
|
+
def update
|
95
|
+
mvs = params[:csv_update][:mvs]
|
96
|
+
csv = CSV.parse(params[:csv_update][:csv_file].read.force_encoding("UTF-8"), headers: true, encoding: 'utf-8').map(&:to_hash)
|
97
|
+
csv.each do |row|
|
98
|
+
obj = ActiveFedora::Base.find row['id']
|
99
|
+
type = row.first.last
|
100
|
+
if type.nil?
|
101
|
+
next
|
102
|
+
elsif type.include? "Work"
|
103
|
+
metadata = create_data(row.except('id', 'type'), work_form(type), obj, mvs)
|
104
|
+
elsif type.include? "File"
|
105
|
+
metadata = create_data(row.except('id', 'type'), work_form(type), obj, mvs)
|
106
|
+
end
|
107
|
+
unless metadata.nil?
|
108
|
+
obj.attributes = metadata
|
109
|
+
obj.save
|
110
|
+
end
|
111
|
+
end
|
112
|
+
flash[:notice] = "csv successfully uploaded"
|
113
|
+
redirect_to csv_edit_path
|
114
|
+
end
|
115
|
+
|
116
|
+
def export
|
117
|
+
solr = RSolr.connect url: Account.find_by(tenant: Apartment::Tenant.current).solr_endpoint.url
|
118
|
+
response = solr.get 'select', params: {
|
119
|
+
q: "member_of_collection_ids_ssim:#{params[:collection_id]}",
|
120
|
+
rows: 3400,
|
121
|
+
fl: "id"
|
122
|
+
}
|
123
|
+
unless response['response']['docs'].empty? || response['response']['docs'][0].empty?
|
124
|
+
work_ids = response['response']['docs'].map { |doc| doc['id'] }
|
125
|
+
end
|
126
|
+
#works = ::ActiveFedora::Base.where member_of_collection_ids_ssim: params[:collection_id]
|
127
|
+
@csv_headers = ['type'] + work_fields
|
128
|
+
@csv_array = [@csv_headers.join(',')]
|
129
|
+
work_ids.each do |work_id|
|
130
|
+
doc = ::SolrDocument.find work_id
|
131
|
+
add_line doc
|
132
|
+
doc._source[:file_set_ids_ssim].each do |file_id|
|
133
|
+
file_doc = ::SolrDocument.find file_id
|
134
|
+
add_line file_doc
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
send_data @csv_array.join("\n"),
|
139
|
+
:type => 'text/csv; charset=iso-8859-5; header=present',
|
140
|
+
:disposition => "attachment; filename=export.csv"
|
141
|
+
end
|
142
|
+
|
143
|
+
private
|
144
|
+
|
145
|
+
def authenticate
|
146
|
+
authorize! :create, available_works.first
|
147
|
+
end
|
148
|
+
|
149
|
+
def add_line doc
|
150
|
+
line_hash = {}
|
151
|
+
line_hash['type'] = doc._source[:has_model_ssim].first
|
152
|
+
work_fields.each do |field|
|
153
|
+
line_hash[field] = create_cell doc, field
|
154
|
+
end
|
155
|
+
@csv_array << line_hash.values_at(*@csv_headers).map { |cell| cell = '' if cell.nil?; "\"#{cell.gsub("\"", "\"\"")}\"" }.join(',')
|
156
|
+
|
157
|
+
end
|
158
|
+
|
159
|
+
def work_fields
|
160
|
+
@fields ||= available_works.map { |work| work.new.attributes.keys }.flatten.uniq - excluded_fields
|
161
|
+
end
|
162
|
+
|
163
|
+
def excluded_fields
|
164
|
+
%w[date_uploaded date_modified head tail state proxy_depositor on_behalf_of arkivo_checksum label
|
165
|
+
relative_path import_url part_of resource_type access_control_id
|
166
|
+
representative_id thumbnail_id rendering_ids admin_set_id embargo_id
|
167
|
+
lease_id]
|
168
|
+
end
|
169
|
+
|
170
|
+
def create_cell w, field
|
171
|
+
if field.include? 'date' or field == 'chronological_coverage'
|
172
|
+
if w._source[field+'_tesim'].is_a?(Array)
|
173
|
+
w._source[field+'_tesim'].join('|')
|
174
|
+
else
|
175
|
+
w._source[field+'_tesim']
|
176
|
+
end
|
177
|
+
elsif w.respond_to?(field.to_sym)
|
178
|
+
if w.send(field).is_a?(Array)
|
179
|
+
w.send(field).join('|')
|
180
|
+
else
|
181
|
+
w.send(field)
|
182
|
+
end
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
def available_works
|
187
|
+
@available_works ||= Hyrax::QuickClassificationQuery.new(current_user).authorized_models
|
188
|
+
end
|
189
|
+
|
190
|
+
def parse_csv csv, mvs
|
191
|
+
csv = CSV.parse(File.read(csv), headers: true, encoding: 'utf-8').map(&:to_hash)
|
192
|
+
@works = []
|
193
|
+
csv.each do |row|
|
194
|
+
type = row.first.last
|
195
|
+
if type.nil?
|
196
|
+
next
|
197
|
+
elsif type.include? "Work"
|
198
|
+
metadata = create_data(row, work_form(type), Object.const_get(type).new, mvs)
|
199
|
+
@works << {type: type, metadata: metadata, files: []}
|
200
|
+
elsif type.include? "File"
|
201
|
+
metadata = create_data(row, file_form, FileSet.new, mvs)
|
202
|
+
@works.last[:files] << {url: row.delete('url'), title: row.delete('title'), metadata: metadata}
|
203
|
+
end
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
def load_config
|
208
|
+
if Settings.multitenancy.enabled
|
209
|
+
tenant = Account.find_by(tenant: Apartment::Tenant.current).cname
|
210
|
+
else
|
211
|
+
tenant = "default"
|
212
|
+
end
|
213
|
+
if CdmMigrator::Engine.config['tenant_settings'].has_key?(tenant)
|
214
|
+
settings = CdmMigrator::Engine.config['tenant_settings'][tenant]['csv_checker']
|
215
|
+
if settings.present?
|
216
|
+
@date_indexing_service = settings['date_indexing_service'].first.constantize if settings['date_indexing_service']
|
217
|
+
@date_fields = settings['date_fields'].map(&:to_sym) if settings['date_fields']
|
218
|
+
@uri_fields = settings['valid_uri_fields'].map(&:to_sym) if settings['valid_uri_fields']
|
219
|
+
@separator = settings['multi_value_separator']
|
220
|
+
@separator_fields = settings['separator_fields'].map(&:to_sym) if settings['separator_fields']
|
221
|
+
@path_to_drive = settings['path_to_drive']
|
222
|
+
# If you would like to change this to match the uploader's max file size,
|
223
|
+
# change this to Hyrax.config.uploader[:maxFileSize]
|
224
|
+
@max_file_size = settings['max_file_size']
|
225
|
+
else
|
226
|
+
raise "Cdm Migrator couldn't find any configured settings. Are they in cdm_migrator.yml?"
|
227
|
+
end
|
228
|
+
else
|
229
|
+
raise "Cdm Migrator couldn't find this tenant. Is it configured?"
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
def check_csv csv_file
|
234
|
+
row_number = 1
|
235
|
+
@error_list = {}
|
236
|
+
check_mounted_drive if @path_to_drive.present?
|
237
|
+
|
238
|
+
CSV.foreach(csv_file, headers: true, header_converters: :symbol) do |row|
|
239
|
+
row_number +=1 # Tells user what CSV row the error is on
|
240
|
+
if row[:object_type].include? "Work"
|
241
|
+
check_dates(row_number, row) if @date_fields.present?
|
242
|
+
check_uris(row_number, row) if @uri_fields.present?
|
243
|
+
if params[:multi_value_separator].present? and @separator_fields.present?
|
244
|
+
check_multi_val_fields(row_number, row, params[:multi_value_separator])
|
245
|
+
else
|
246
|
+
alert_message = "No multi-value separator character was selected or no fields were configured. CSV Checker didn't check for valid separators."
|
247
|
+
if flash[:alert] and flash[:alert].exclude?(alert_message) # Only add this message once, rather than per line
|
248
|
+
flash[:alert] << alert_message
|
249
|
+
elsif flash[:alert].blank?
|
250
|
+
flash[:alert] = Array.wrap(alert_message)
|
251
|
+
end
|
252
|
+
end
|
253
|
+
elsif row[:object_type] == "File"
|
254
|
+
check_file_path(row_number, row[:url])
|
255
|
+
check_transcript_length(row_number, row[:transcript]) if row[:transcript].present?
|
256
|
+
check_file_size(row_number, row[:url])
|
257
|
+
else
|
258
|
+
@error_list[row_number] = { "object_type" => "No or unknown object type. Please give a valid type (e.g. GenericWork, File)." }
|
259
|
+
end
|
260
|
+
@error_list.delete_if { |key, value| value.blank? } # Data are valid, no need to print the row
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
264
|
+
def check_transcript_length(row_number, transcript)
|
265
|
+
if transcript.is_a? String
|
266
|
+
if transcript.length > 9000
|
267
|
+
@error_list[row_number] = { "transcript" => "Transcript is too long (over 9000 characters)." }
|
268
|
+
end
|
269
|
+
elsif transcript.is_a? Array
|
270
|
+
if transcript.any? { |tr| tr.length > 9000 }
|
271
|
+
@error_list[row_number] = { "transcript" => "Transcript is too long (over 9000 characters)." }
|
272
|
+
end
|
273
|
+
end
|
274
|
+
end
|
275
|
+
|
276
|
+
def check_file_size(row_number, file_path)
|
277
|
+
if file_path.present? && File.file?(file_path) && @max_file_size
|
278
|
+
if File.size(file_path.gsub("file://", "")) > @max_file_size
|
279
|
+
@error_list[row_number] = { "file size" => "The file at #{file_path} is too large to be uploaded. Please compress the file or split it into parts.
|
280
|
+
Each part should be under #{helpers.number_to_human_size(@max_file_size)}." }
|
281
|
+
end
|
282
|
+
end
|
283
|
+
end
|
284
|
+
|
285
|
+
def check_mounted_drive
|
286
|
+
drive_address = @path_to_drive
|
287
|
+
unless Dir.exist?(drive_address) and !Dir[drive_address].empty?
|
288
|
+
flash[:alert] = "CSV Checker can't find the mounted drive to check file paths, so some paths may be mislabelled as incorrect. Please contact the administrator or try again later."
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
def check_file_path(row_number, file_path)
|
293
|
+
if file_path.nil?
|
294
|
+
@error_list[row_number] = { "url" => "url is blank." }
|
295
|
+
elsif File.file?(file_path.gsub("file://", "")) == false
|
296
|
+
@error_list[row_number] = { "url" => "No file found at #{file_path}" }
|
297
|
+
end
|
298
|
+
end
|
299
|
+
|
300
|
+
def check_dates(row_number, row)
|
301
|
+
date_fields = @date_fields
|
302
|
+
unless @date_indexing_service
|
303
|
+
flash[:alert] = "No date indexing service was configured so CSV Checker didn't validate dates."
|
304
|
+
return
|
305
|
+
end
|
306
|
+
edtf_errors = date_fields.each_with_object({}) do |field, hash|
|
307
|
+
next unless row[field]
|
308
|
+
begin
|
309
|
+
@date_indexing_service.new(row[field])
|
310
|
+
rescue *@date_indexing_service.error_classes => error
|
311
|
+
hash[field.to_s] = "#{error.message}"
|
312
|
+
end
|
313
|
+
end
|
314
|
+
@error_list[row_number] = edtf_errors
|
315
|
+
end
|
316
|
+
|
317
|
+
# <Example: should be http://rightsstatements.org/vocab/etc. NOT https://rightsstatements.org/page/etc.
|
318
|
+
def check_uris(row_number, row)
|
319
|
+
uri_fields = @uri_fields
|
320
|
+
uri_errors = uri_fields.each_with_object({}) do |field, hash|
|
321
|
+
if row[field] and row[field].include? "page"
|
322
|
+
hash[field.to_s] = "Links to page instead of URI. (e.g. https://rightsstatements.org/page/etc. instead of http://rightsstatements.org/vocab/etc.)"
|
323
|
+
end
|
324
|
+
end
|
325
|
+
if @error_list.any?
|
326
|
+
@error_list[row_number].merge!(uri_errors)
|
327
|
+
else
|
328
|
+
@error_list[row_number] = uri_errors
|
329
|
+
end
|
330
|
+
end
|
331
|
+
|
332
|
+
# Check multi-value separators
|
333
|
+
def check_multi_val_fields(row_number, row, character)
|
334
|
+
uri_fields = @separator_fields
|
335
|
+
separator_errors = uri_fields.each_with_object({}) do |field, hash|
|
336
|
+
if value = row[field]
|
337
|
+
# Check for leading or trailing spaces
|
338
|
+
if value.match %r{ #{Regexp.escape(character)}|#{Regexp.escape(character)} }
|
339
|
+
hash[field.to_s] = "Contains leading or trailing whitespace around multi-value separator."
|
340
|
+
end
|
341
|
+
values = value.split(character).map(&:strip)
|
342
|
+
values.each do |val|
|
343
|
+
if val.match(URI.regexp) # Val should be URI
|
344
|
+
remainder = val.gsub(val.match(URI.regexp)[0],'')
|
345
|
+
unless remainder.blank?
|
346
|
+
hash[field.to_s] = "May contain the wrong multi-value separator or a typo in the URI."
|
347
|
+
end
|
348
|
+
else # Or val should be string
|
349
|
+
invalid_chars = ["\\"]
|
350
|
+
# Make exceptions for backslashes that are part of whitespace characters
|
351
|
+
# by deleting them before checking for stray \s
|
352
|
+
if val.delete("\t\r\n\s\n").match Regexp.union(invalid_chars)
|
353
|
+
hash[field.to_s] = "May contain an invalid character such as #{invalid_chars.to_sentence(last_word_connector: ", or ")}."
|
354
|
+
end
|
355
|
+
end
|
356
|
+
end
|
357
|
+
end
|
358
|
+
end
|
359
|
+
@error_list[row_number].merge!(separator_errors)
|
360
|
+
end
|
361
|
+
|
362
|
+
def default_page_title
|
363
|
+
'CSV Batch Uploader'
|
364
|
+
end
|
365
|
+
|
366
|
+
def admin_host?
|
367
|
+
false unless Settings.multitenancy.enabled
|
368
|
+
end
|
369
|
+
|
370
|
+
def available_translations
|
371
|
+
{
|
372
|
+
'en' => 'English',
|
373
|
+
'fr' => 'French'
|
374
|
+
}
|
375
|
+
end
|
376
|
+
|
377
|
+
def work_form(worktype = "GenericWork")
|
378
|
+
Module.const_get("Hyrax::#{worktype}Form") rescue nil || Module.const_get("Hyrax::Forms::WorkForm")
|
379
|
+
end
|
380
|
+
|
381
|
+
def file_form
|
382
|
+
Module.const_get("Hyrax::FileSetForm") rescue nil || Module.const_get("Hyrax::Forms::FileSetEditForm")
|
383
|
+
end
|
384
|
+
|
385
|
+
def secondary_terms form_name
|
386
|
+
form_name.terms - form_name.required_fields -
|
387
|
+
[:visibility_during_embargo, :embargo_release_date,
|
388
|
+
:visibility_after_embargo, :visibility_during_lease,
|
389
|
+
:lease_expiration_date, :visibility_after_lease, :visibility,
|
390
|
+
:thumbnail_id, :representative_id, :ordered_member_ids,
|
391
|
+
:collection_ids, :in_works_ids, :admin_set_id, :files, :source, :member_of_collection_ids]
|
392
|
+
end
|
393
|
+
|
394
|
+
def create_data data, type, object, mvs
|
395
|
+
final_data = {}
|
396
|
+
accepted_terms = type.required_fields + secondary_terms(type)
|
397
|
+
data.each do |key, att|
|
398
|
+
if (att.nil? || att.empty? || key.to_s.include?("object_type") || !accepted_terms.include?(key.to_sym))
|
399
|
+
next
|
400
|
+
elsif object.send(key).nil?
|
401
|
+
final_data[key] = att
|
402
|
+
else
|
403
|
+
final_data[key] = att.split(mvs)
|
404
|
+
end
|
405
|
+
end
|
406
|
+
final_data
|
407
|
+
end
|
408
|
+
|
409
|
+
def create_lease visibility, status_after, date
|
410
|
+
lease = Hydra::AccessControls::Lease.new(visibility_during_lease: visibility,
|
411
|
+
visibility_after_lease: status_after, lease_expiration_date: @lease_date)
|
412
|
+
lease.save
|
413
|
+
end
|
414
|
+
|
415
|
+
def create_embargo visibility
|
416
|
+
embargo = Hydra::AccessControls::Embargo.new
|
417
|
+
embargo.visibility_during_embargo = visibility
|
418
|
+
embargo.visibility_after_embargo = @status_after
|
419
|
+
embargo.embargo_release_date = @embargo_date
|
420
|
+
embargo.save
|
421
|
+
end
|
422
|
+
|
423
|
+
def log(user)
|
424
|
+
Hyrax::Operation.create!(user: user,
|
425
|
+
operation_type: "Attach Remote File")
|
426
|
+
end
|
427
|
+
end
|
428
|
+
end
|