cdm_migrator 3.0.1 → 3.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 985574c3d7f661fd0f9d1ce9a15e9269fa4c2dafd10d387110695400aff2b762
4
- data.tar.gz: 0c24c2d7beb6f6d9c66a44dd30393c8dae0cb9222c9d6c69186909cdfe074590
3
+ metadata.gz: 8bc6e448326179791de466ba5dc1d35a26fd48fa2a85a8fbabeb7d8dad60339e
4
+ data.tar.gz: f3cf46dec46cd8166bc0555eef28e2868db6fbbf77963083f93fca1958b00e19
5
5
  SHA512:
6
- metadata.gz: 6b06e22ca127a1b5fa304c18edb939c5082cb3251b7c5be202e51f6914e6ab703a3a7c2a3cc992327ae0122eacc52eec5d5ae810e00eb8531b5e49f5a9481a71
7
- data.tar.gz: 2cefe7be69db9247165ec1c03eeadd5bbffbe0b1ed8044f097e6a6070968e77b224706845eb6072d891cdd5718612c93544321b7a2d9a204af8e92ff4cf6388d
6
+ metadata.gz: 9d8b8bc4d318269375c82429cbcd4bb2e3d07b1761ef54854b262746c09e46430ee3d9da02122567f5244599bb3f312b7248b1b0a610b12abc2a049b0b7470dc
7
+ data.tar.gz: 4dfc8a920f09e1e3c84b5eb09116adadcbf44556c3abe4b4daa1dbce7e87e767ea380de9fbdc5368c548032bf4b6f074a5ab0268fd6600dff75a9530445a7210
data/README.md CHANGED
@@ -27,11 +27,12 @@ to insert the yml and add a link to your Hyrax dashboard
27
27
 
28
28
  ## Usage
29
29
  1. Add your ContentDM url and api port to the cdm_migrator.yml file.
30
- 2. Navigate to the *cdm_migrator/collection* url to select your contentdm collection and what type of work you want to export it to and click "choose mappings".
31
- 3. Map the ContentDM fields to your Hyrax work and file fields\* and click "generate CSV".
32
- 4. Refine the CSV as you see fit.
33
- 5. Navigate to the *cdm_migrator/upload* url; choose your multi-value seperator (default is |) and upload your CSV file.
34
- 6. Done.
30
+ 2. Configure the CSV Checker with the appropriate fields, paths, or multi-value separator (in cdm_migrator.yml).
31
+ 3. Navigate to the *cdm_migrator/collection* url to select your contentdm collection and what type of work you want to export it to and click "choose mappings".
32
+ 4. Map the ContentDM fields to your Hyrax work and file fields\* and click "generate CSV".
33
+ 5. Refine the CSV as you see fit. Optional: go to *cdm_migrator/csv_checker* and upload it to validate metadata fields and/or file paths.
34
+ 6. Navigate to the *cdm_migrator/upload* url; choose your multi-value separator (default is |) and upload your CSV file.
35
+ 7. Done.
35
36
 
36
37
  \* cdm_migrator uses the generated Hyrax forms (ex. Hyrax::Forms::GenericWorkForm) in your host application to obtain it's terms for mapping. If you have added terms to your FileSet model extend the Hyrax::Forms::FileSetEditForm with Hyrax::FileSetForm in your host application so that the changes will be detected by the migrator. You can also add a list of fields in the yml file, under "default fields".
37
38
 
@@ -0,0 +1,37 @@
1
+ .csv-collapse-link.collapsed {
2
+ display: inline-block;
3
+ vertical-align: top;
4
+ }
5
+
6
+ #csv-collapse-link-text::after {
7
+ content: "❯";
8
+ display: inline-block;
9
+ font-size: smaller;
10
+ right: 15px;
11
+ transform: rotate(90deg);
12
+ margin-left: 0.5em;
13
+ }
14
+
15
+ .collapsed #csv-collapse-link-text::after {
16
+ transform: rotate(0deg);
17
+ transition: transform 0.1s ease;
18
+ margin-left: 2px;
19
+ vertical-align: top;
20
+ }
21
+
22
+ #errors-explanation ul li {
23
+ list-style-type: none;
24
+ margin-bottom: 2px;
25
+ }
26
+
27
+ #errors-explanation ul {
28
+ padding-left: 0;
29
+ }
30
+
31
+ #errors-explanation {
32
+ margin-top: 5px;
33
+ }
34
+
35
+ #csv-form {
36
+ margin-top: 1em;
37
+ }
@@ -104,10 +104,10 @@ module CdmMigrator
104
104
 
105
105
  def load_yaml
106
106
  stripped_url = request.base_url.dup.gsub(/https?:\/\//, '').gsub(/:[0-9]*/,'')
107
- if CdmMigrator::Engine.config['cdm_api'].key? stripped_url
108
- tenant = CdmMigrator::Engine.config['cdm_api'][stripped_url]
107
+ if CdmMigrator::Engine.config['tenant_settings'].key? stripped_url
108
+ tenant = CdmMigrator::Engine.config['tenant_settings'][stripped_url]['cdm_api']
109
109
  else
110
- tenant = CdmMigrator::Engine.config['cdm_api']['default']
110
+ tenant = CdmMigrator::Engine.config['tenant_settings']['default']['cdm_api']
111
111
  end
112
112
  @cdm_url = tenant['url']
113
113
  @cdm_port = tenant['port']
@@ -121,7 +121,7 @@ module CdmMigrator
121
121
  filename = child ? child['pagefile'] : "#{rec.first}.#{rec.last}"
122
122
 
123
123
  if params[:file_system]=='true'
124
- "file://#{file_path(rec.first)}"
124
+ "file://#{file_path(cisoptr)}"
125
125
  elsif @cdm_api == 'server'
126
126
  "#{@cdm_url}:#{@cdm_port}/cgi-bin/showfile.exe?CISOROOT=#{params[:collection]}&CISOPTR=#{cisoptr}"
127
127
  else
@@ -1,20 +1,20 @@
1
1
  module CdmMigrator
2
- class CsvController < ApplicationController
3
- helper_method :default_page_title, :admin_host?, :available_translations, :available_works
2
+ class CsvController < ApplicationController
3
+ helper_method :default_page_title, :admin_host?, :available_translations, :available_works
4
4
  include ActionView::Helpers::UrlHelper
5
- layout 'hyrax/dashboard' if Hyrax
5
+ layout 'hyrax/dashboard' if Hyrax
6
6
  before_action :authenticate, except: :index
7
+ before_action :load_config, only: :csv_checker
7
8
 
8
- def file_path_checker
9
- if params[:file]
10
- check_paths params[:file].path
11
-
12
- if @path_list.blank?
13
- flash[:notice] = "All file paths are valid."
14
- else
15
- flash[:error] = "Cdm Migrator couldn't find files at the following urls. Please correct the paths and try again."
16
- end
17
- end
9
+ def csv_checker
10
+ if params[:file]
11
+ check_csv params[:file].path
12
+ if @error_list.blank?
13
+ flash[:notice] = "All data are valid."
14
+ else
15
+ flash[:error] = "The CSV Checker found some errors in the CSV. Please correct them and check again."
16
+ end
17
+ end
18
18
  end
19
19
 
20
20
  def index
@@ -27,34 +27,34 @@ module CdmMigrator
27
27
  end
28
28
  end
29
29
 
30
- def upload
31
- @admin_sets = AdminSet.all.map { |as| [as.title.first, as.id] }
32
- @collections = Collection.all.map { |col| [col.title.first, col.id] }
33
- end
30
+ def upload
31
+ @admin_sets = AdminSet.all.map { |as| [as.title.first, as.id] }
32
+ @collections = Collection.all.map { |col| [col.title.first, col.id] }
33
+ end
34
34
 
35
- def create
36
- dir = Rails.root.join('public', 'uploads', 'csvs')
37
- FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
38
- time = DateTime.now.strftime('%s')
39
- filename = params[:csv_import][:csv_file].original_filename.gsub('.csv',"#{time}.csv")
40
- csv = dir.join(filename).to_s
41
- File.open(csv, 'wb') do |file|
42
- file.write(params[:csv_import][:csv_file].read)
35
+ def create
36
+ dir = Rails.root.join('public', 'uploads', 'csvs')
37
+ FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
38
+ time = DateTime.now.strftime('%s')
39
+ filename = params[:csv_import][:csv_file].original_filename.gsub('.csv', "#{time}.csv")
40
+ csv = dir.join(filename).to_s
41
+ File.open(csv, 'wb') do |file|
42
+ file.write(params[:csv_import][:csv_file].read)
43
43
  end
44
- check_paths csv
45
- if @path_list.present?
46
- flash[:error] = "some file paths are invalid"
44
+ check_csv csv
45
+ if @error_list.present?
46
+ flash[:error] = "Cdm Migrator found some problems with the CSV. Use the CSV Checker for more details."
47
47
  end
48
48
  parse_csv(csv, params[:csv_import][:mvs])
49
49
 
50
50
  ingest = BatchIngest.new({
51
- data: @works,
52
- size: @works.length,
53
- csv: csv,
54
- admin_set_id: params[:admin_set],
51
+ data: @works,
52
+ size: @works.length,
53
+ csv: csv,
54
+ admin_set_id: params[:admin_set],
55
55
  collection_id: params[:collection],
56
- user_id: current_user.id,
57
- message: @path_list.blank? ? nil : @path_list.to_s.gsub("\"","&quot;")
56
+ user_id: current_user.id,
57
+ message: @path_list.blank? ? nil : @path_list.to_s.gsub("\"", "&quot;")
58
58
  })
59
59
  if ingest.save! && @path_list.blank?
60
60
  BatchCreateWorksJob.perform_later(ingest, current_user)
@@ -75,9 +75,8 @@ module CdmMigrator
75
75
  end
76
76
 
77
77
  def generate
78
- headers = ['type','url']
79
- skip = ["id", "head", "tail", "depositor", "date_uploaded", "date_modified", "import_url", "thumbnail_id",
80
- "embargo_id", "lease_id", "access_control_id", "representative_id"]
78
+ headers = %w(type url)
79
+ skip = %w(id head tail depositor date_uploaded date_modified import_url thumbnail_id embargo_id lease_id access_control_id representative_id)
81
80
  GenericWork.new.attributes.each do |key, val|
82
81
  headers << "work_#{key}" unless skip.include? key
83
82
  end
@@ -87,19 +86,109 @@ module CdmMigrator
87
86
  fname = "template_#{DateTime.now.to_i}"
88
87
  render plain: CSV.generate { |csv| csv << headers }, content_type: 'text/csv'
89
88
  end
90
-
91
- private
89
+
90
+ def edit
91
+ @collections = ::Collection.all.map { |c| [c.title.first, c.id] }
92
+ end
93
+
94
+ def update
95
+ mvs = params[:csv_update][:mvs]
96
+ csv = CSV.parse(params[:csv_update][:csv_file].read, headers: true, encoding: 'utf-8').map(&:to_hash)
97
+ csv.each do |row|
98
+ obj = ActiveFedora::Base.find row['id']
99
+ type = row.first.last
100
+ if type.nil?
101
+ next
102
+ elsif type.include? "Work"
103
+ metadata = create_data(row.except('id', 'type'), work_form(type), obj, mvs)
104
+ elsif type.include? "File"
105
+ metadata = create_data(row.except('id', 'type'), work_form(type), obj, mvs)
106
+ end
107
+ unless metadata.nil?
108
+ obj.attributes = metadata
109
+ obj.save
110
+ end
111
+ end
112
+ flash[:notice] = "csv successfully uploaded"
113
+ redirect_to csv_edit_path
114
+ end
115
+
116
+ def export
117
+ solr = RSolr.connect url: Account.find_by(tenant: Apartment::Tenant.current).solr_endpoint.url
118
+ response = solr.get 'select', params: {
119
+ q: "member_of_collection_ids_ssim:#{params[:collection_id]}",
120
+ rows: 3400,
121
+ fl: "id"
122
+ }
123
+ unless response['response']['docs'].empty? || response['response']['docs'][0].empty?
124
+ work_ids = response['response']['docs'].map { |doc| doc['id'] }
125
+ end
126
+ #works = ::ActiveFedora::Base.where member_of_collection_ids_ssim: params[:collection_id]
127
+ @csv_headers = ['type'] + work_fields
128
+ @csv_array = [@csv_headers.join(',')]
129
+ work_ids.each do |work_id|
130
+ doc = ::SolrDocument.find work_id
131
+ add_line doc
132
+ doc._source[:file_set_ids_ssim].each do |file_id|
133
+ file_doc = ::SolrDocument.find file_id
134
+ add_line file_doc
135
+ end
136
+ end
137
+
138
+ send_data @csv_array.join("\n"),
139
+ :type => 'text/csv; charset=iso-8859-5; header=present',
140
+ :disposition => "attachment; filename=export.csv"
141
+ end
142
+
143
+ private
92
144
 
93
145
  def authenticate
94
146
  authorize! :create, available_works.first
95
147
  end
96
148
 
149
+ def add_line doc
150
+ line_hash = {}
151
+ line_hash['type'] = doc._source[:has_model_ssim].first
152
+ work_fields.each do |field|
153
+ line_hash[field] = create_cell doc, field
154
+ end
155
+ @csv_array << line_hash.values_at(*@csv_headers).map { |cell| cell = '' if cell.nil?; "\"#{cell.gsub("\"", "\"\"")}\"" }.join(',')
156
+
157
+ end
158
+
159
+ def work_fields
160
+ @fields ||= available_works.map { |work| work.new.attributes.keys }.flatten.uniq - excluded_fields
161
+ end
162
+
163
+ def excluded_fields
164
+ %w[date_uploaded date_modified head tail state proxy_depositor on_behalf_of arkivo_checksum label
165
+ relative_path import_url part_of resource_type access_control_id
166
+ representative_id thumbnail_id rendering_ids admin_set_id embargo_id
167
+ lease_id]
168
+ end
169
+
170
+ def create_cell w, field
171
+ if field.include? 'date'
172
+ if w._source[field+'_tesim'].is_a?(Array)
173
+ w._source[field+'_tesim'].join('|')
174
+ else
175
+ w._source[field+'_tesim']
176
+ end
177
+ elsif w.respond_to?(field.to_sym)
178
+ if w.send(field).is_a?(Array)
179
+ w.send(field).join('|')
180
+ else
181
+ w.send(field)
182
+ end
183
+ end
184
+ end
185
+
97
186
  def available_works
98
187
  @available_works ||= Hyrax::QuickClassificationQuery.new(current_user).authorized_models
99
188
  end
100
189
 
101
190
  def parse_csv csv, mvs
102
- csv = CSV.parse(File.read(csv), headers: true, encoding: 'utf-8').map(&:to_hash)
191
+ csv = CSV.parse(File.read(csv), headers: true, encoding: 'utf-8').map(&:to_hash)
103
192
  @works = []
104
193
  csv.each do |row|
105
194
  type = row.first.last
@@ -115,19 +204,140 @@ module CdmMigrator
115
204
  end
116
205
  end
117
206
 
118
- def check_paths csv_file
119
- row_number = 1 # +1 offset to account for csv headers
120
- @path_list = {}
207
+ def load_config
208
+ tenant = Account.find_by(tenant: Apartment::Tenant.current).cname
209
+ if CdmMigrator::Engine.config['tenant_settings'].has_key?(tenant)
210
+ settings = CdmMigrator::Engine.config['tenant_settings'][tenant]['csv_checker']
211
+ if settings.present?
212
+ # .map will throw an error if settings[key] has no value
213
+ @edtf_fields = settings['edtf_fields'].map(&:to_sym) if settings['edtf_fields']
214
+ @uri_fields = settings['valid_uri_fields'].map(&:to_sym) if settings['valid_uri_fields']
215
+ @separator = settings['multi_value_separator']
216
+ @separator_fields = settings['separator_fields'].map(&:to_sym) if settings['separator_fields']
217
+ @path_to_drive = settings['path_to_drive']
218
+ else
219
+ raise "Cdm Migrator couldn't find any configured settings. Are they in cdm_migrator.yml?"
220
+ end
221
+ else
222
+ raise "Cdm Migrator couldn't find this tenant. Is it configured?"
223
+ end
224
+ end
225
+
226
+ def check_csv csv_file
227
+ row_number = 1
228
+ @error_list = {}
229
+ check_mounted_drive if @path_to_drive.present?
230
+
231
+ CSV.foreach(csv_file, headers: true, header_converters: :symbol) do |row|
232
+ row_number +=1 # Tells user what CSV row the error is on
233
+ if row[:object_type].include? "Work"
234
+ check_edtf(row_number, row) if @edtf_fields.present?
235
+ check_uris(row_number, row) if @uri_fields.present?
236
+ if params[:multi_value_separator].present? and @separator_fields.present?
237
+ check_separator(row_number, row, params[:multi_value_separator])
238
+ else
239
+ alert_message = "No multi-value separator character was selected or no fields were configured. CSV Checker didn't check for valid separators."
240
+ if flash[:alert] and flash[:alert].exclude?(alert_message) # Only add this message once, rather than per line
241
+ flash[:alert] << alert_message
242
+ elsif flash[:alert].blank?
243
+ flash[:alert] = Array.wrap(alert_message)
244
+ end
245
+ end
246
+ elsif row[:object_type] == "File"
247
+ check_file_path(row_number, row[:url])
248
+ else
249
+ @error_list[row_number] = { "object_type" => "No or unknown object type. Please give a valid type (e.g. GenericWork, File)." }
250
+ end
251
+ @error_list.delete_if { |key, value| value.blank? } # Data are valid, no need to print the row
252
+ end
253
+ end
254
+
255
+ def check_mounted_drive
256
+ drive_address = @path_to_drive
257
+ unless Dir.exist?(drive_address) and !Dir[drive_address].empty?
258
+ flash[:alert] = "CSV Checker can't find the mounted drive to check file paths, so some paths may be mislabelled as incorrect. Please contact the administrator or try again later."
259
+ end
260
+ end
261
+
262
+ def check_file_path(row_number, file_path)
263
+ if file_path.nil?
264
+ @error_list[row_number] = { "url" => "url is blank." }
265
+ elsif File.file?(file_path.gsub("file://", "")) == false
266
+ @error_list[row_number] = { "url" => "No file found at #{file_path}" }
267
+ end
268
+ end
269
+
270
+ def check_edtf(row_number, row)
271
+ edtf_fields = @edtf_fields
272
+ edtf_errors = edtf_fields.each_with_object({}) do |field, hash|
273
+ temp_date = row[field]
274
+ # modify date so that the interval encompasses the years on the last interval date
275
+ temp_date = temp_date.gsub('/..','').gsub('%','?~').gsub(/\/$/,'')
276
+ date = temp_date.include?("/") ? temp_date.gsub(/([0-9]+X+\/)([0-9]+)(X+)/){"#{$1}"+"#{$2.to_i+1}"+"#{$3}"}.gsub("X","u") : temp_date
277
+ date = date.gsub("XX-","uu-").gsub("X-", "u-").gsub('XX?','uu').gsub('X?', 'u').gsub('u?','u').gsub('?','')
278
+ # edtf has trouble with year-month (e.g. "19uu-12") or year-season strings (e.g. "190u-23")
279
+ # that contain unspecified years, or intervals containing the above ("19uu-22/19uu-23", etc.).
280
+ # So we check for/create exceptions.
281
+ # Check for season interval
282
+ if Date.edtf(date) == nil and date != "unknown" # Accept season intervals
283
+ unless is_season?(date.split("/").first) and is_season?(date.split("/").second)
284
+ # If an interval then, check each date individually
285
+ if date.include?("/")
286
+ dates = date.split("/")
287
+ else
288
+ dates = [date]
289
+ end
290
+ #byebug
291
+ dates.each do |d|
292
+ # Dates with 'u' in the last digit of the year return invalid when in format YYYY-MM
293
+ # So we flub day specifity before checking again if the date is valid
294
+ unless Date.edtf(d + '-01') # Date.edtf('193u-03-01') returns valid
295
+ if match = d[/\d{3}u/] or match = d[/\d{2}u{2}-[2][1-4]/] # edtf can't parse single u in year (e.g. 192u) or uu in YYYY-SS (e.g. 19uu-21), so we replace it
296
+ d.gsub!(match, match.gsub("u","0"))
297
+ unless Date.edtf(d)
298
+ hash[field.to_s] = "Blank or not a valid EDTF date."
299
+ end
300
+ else
301
+ hash[field.to_s] = "Blank or not a valid EDTF date."
302
+ end
303
+ end
304
+ end
305
+ end
306
+ end
307
+
308
+ end
309
+ @error_list[row_number] = edtf_errors
310
+ end
311
+
312
+ def is_season?(date)
313
+ Date.edtf(date).class == EDTF::Season
314
+ end
315
+
316
+ # <Example: should be http://rightsstatements.org/vocab/etc. NOT https://rightsstatements.org/page/etc.
317
+ def check_uris(row_number, row)
318
+ uri_fields = @uri_fields
319
+ uri_errors = uri_fields.each_with_object({}) do |field, hash|
320
+ if row[field].include? "page"
321
+ hash[field.to_s] = "Links to page instead of URI. (e.g. https://rightsstatements.org/page/etc. instead of http://rightsstatements.org/vocab/etc.)"
322
+ end
323
+ end
324
+ @error_list[row_number].merge!(uri_errors)
325
+ end
121
326
 
122
- CSV.foreach(csv_file, headers: true, header_converters: :symbol) do |row|
123
- row_number +=1 # Tells user what CSV row the bogus file path is on
124
- next if row[:url].nil?
125
- file_path = row[:url]
126
- unless File.file?(file_path.gsub("file://", ""))
127
- @path_list[row_number] = file_path
327
+ # Check multi-value separators
328
+ def check_separator(row_number, row, character)
329
+ uri_fields = @separator_fields
330
+ separator_errors = uri_fields.each_with_object({}) do |field, hash|
331
+ value = row[field]
332
+ if value.present?
333
+ URI.extract(value).each { |uri| value.gsub!(uri, '') }
334
+ unless value.split("").all? { |sep| sep == character } # Check if remaining characters are the correct separator
335
+ hash[field.to_s] = "May contain the wrong multi-value separator (i.e. not #{character})."
128
336
  end
129
337
  end
130
338
  end
339
+ @error_list[row_number].merge!(separator_errors)
340
+ end
131
341
 
132
342
  def default_page_title
133
343
  'CSV Batch Uploader'
@@ -139,8 +349,8 @@ module CdmMigrator
139
349
 
140
350
  def available_translations
141
351
  {
142
- 'en' => 'English',
143
- 'fr' => 'French'
352
+ 'en' => 'English',
353
+ 'fr' => 'French'
144
354
  }
145
355
  end
146
356
 
@@ -162,12 +372,12 @@ module CdmMigrator
162
372
  end
163
373
 
164
374
  def create_data data, type, object, mvs
165
- final_data = {}
375
+ final_data = {}
166
376
  accepted_terms = type.required_fields + secondary_terms(type)
167
377
  data.each do |key, att|
168
- if(att.nil? || att.empty? || key.to_s.include?("object_type") || !accepted_terms.include?(key.to_sym) )
378
+ if (att.nil? || att.empty? || key.to_s.include?("object_type") || !accepted_terms.include?(key.to_sym))
169
379
  next
170
- elsif(object.send(key).nil?)
380
+ elsif (object.send(key).nil?)
171
381
  final_data[key] = att
172
382
  else
173
383
  final_data[key] = att.split(mvs)
@@ -176,23 +386,23 @@ module CdmMigrator
176
386
  final_data
177
387
  end
178
388
 
179
- def create_lease visibility, status_after, date
180
- lease = Hydra::AccessControls::Lease.new(visibility_during_lease: visibility,
181
- visibility_after_lease: status_after, lease_expiration_date: @lease_date)
182
- lease.save
183
- end
389
+ def create_lease visibility, status_after, date
390
+ lease = Hydra::AccessControls::Lease.new(visibility_during_lease: visibility,
391
+ visibility_after_lease: status_after, lease_expiration_date: @lease_date)
392
+ lease.save
393
+ end
184
394
 
185
- def create_embargo visibility
186
- embargo = Hydra::AccessControls::Embargo.new
187
- embargo.visibility_during_embargo = visibility
188
- embargo.visibility_after_embargo = @status_after
189
- embargo.embargo_release_date = @embargo_date
190
- embargo.save
191
- end
395
+ def create_embargo visibility
396
+ embargo = Hydra::AccessControls::Embargo.new
397
+ embargo.visibility_during_embargo = visibility
398
+ embargo.visibility_after_embargo = @status_after
399
+ embargo.embargo_release_date = @embargo_date
400
+ embargo.save
401
+ end
192
402
 
193
403
  def log(user)
194
- Hyrax::Operation.create!(user: user,
195
- operation_type: "Attach Remote File")
404
+ Hyrax::Operation.create!(user: user,
405
+ operation_type: "Attach Remote File")
196
406
  end
197
- end
407
+ end
198
408
  end
@@ -2,7 +2,7 @@ module CdmMigrator
2
2
  class BatchCreateFilesJob < ActiveJob::Base
3
3
  queue_as Hyrax.config.ingest_queue_name
4
4
 
5
- def perform work, ingest_work, user, last_work=false
5
+ def perform work, ingest_work, user
6
6
  ingest_work.files.each do |file|
7
7
  url = file[:url]
8
8
  last_file = ingest_work.files.last==file
@@ -11,10 +11,9 @@ module CdmMigrator
11
11
  actor = Hyrax::Actors::FileSetActor.new(fs, user)
12
12
  actor.create_metadata#(work, visibility: work.visibility)
13
13
  actor.attach_file_to_work(work)
14
- #byebug
15
14
  fs.attributes = file[:metadata]
16
15
  fs.save!
17
- CdmIngestFilesJob.perform_later(fs, url, user, ingest_work, last_file, last_work)
16
+ CdmIngestFilesJob.perform_later(fs, url, user, ingest_work, last_file)
18
17
  end
19
18
  end
20
19
  end
@@ -4,10 +4,9 @@ module CdmMigrator
4
4
 
5
5
  def perform(ingest, user)
6
6
  ingest.data.each do |w|
7
- last_work = ingest.data.last==w
8
7
  ingest_work = IngestWork.new(w, ingest.id)
9
8
  ingest_work.save!
10
- CreateWorkJob.perform_later ingest_work, user, ingest.admin_set_id, ingest.collection_id, last_work
9
+ CreateWorkJob.perform_later ingest_work, user, ingest.admin_set_id, ingest.collection_id
11
10
  end
12
11
 
13
12
  end
@@ -2,7 +2,7 @@ module CdmMigrator
2
2
  class CdmIngestFilesJob < ActiveJob::Base
3
3
  queue_as Hyrax.config.ingest_queue_name
4
4
 
5
- def perform(fs, url, user, ingest_work = nil, last_file = false, last_work = false)
5
+ def perform(fs, url, user, ingest_work = nil, last_file = false)
6
6
  if url.include?("http") && File.extname(url).include?("pdf")
7
7
  download = open(url)
8
8
  dir = Rails.root.join('public', 'uploads', 'csv_pdfs')
@@ -16,14 +16,14 @@ module CdmMigrator
16
16
  IO.copy_stream(download, url)
17
17
  url = "file://"+url.to_s
18
18
  end
19
- uri = URI.parse(url.gsub(' ','%20'))
19
+ uri = URI.parse(url.gsub(' ','%20').gsub(/[\[\]@#\$\*{}]/, ""))
20
20
  if uri.scheme == 'file'
21
- IngestLocalFileJob.perform_now(fs, uri.path.gsub('%20',' '), user)
21
+ IngestLocalFileJob.perform_now(fs, url.gsub('file://',''), user)
22
22
  else
23
+ URI.parse(url.gsub(' ','%20'))
23
24
  ImportUrlJob.perform_now(fs, log(user))
24
25
  end
25
26
  ingest_work.update_attribute('complete', true) if last_file
26
- BatchIngest.find(ingest_work.id).update_attribute('complete', true) if last_work
27
27
  end
28
28
 
29
29
  def log(user)
@@ -2,18 +2,24 @@ module CdmMigrator
2
2
  class CreateWorkJob < ActiveJob::Base
3
3
  queue_as Hyrax.config.ingest_queue_name
4
4
 
5
- def perform(ingest_work, user, admin_set_id, collection_id, last_work=false)
5
+ def perform(ingest_work, user, admin_set_id, collection_id)
6
6
  admin_set = ::AdminSet.find(admin_set_id) rescue nil
7
7
  collection = ::Collection.find(collection_id) rescue nil
8
8
  work = Object.const_get(ingest_work.work_type).new
9
9
  #status_after, embargo_date, lease_date = nil, nil, nil
10
10
  work.apply_depositor_metadata(user)
11
11
  work.attributes = ingest_work.data
12
+ if ingest_work.data.has_key? 'downloadable'
13
+ # Convert string to boolean
14
+ work.downloadable = ActiveModel::Type::Boolean.new.cast(ingest_work.data['downloadable'])
15
+ elsif work.attributes.include? 'downloadable' # Set work to downloadable by default
16
+ work.downloadable = true
17
+ end
12
18
  work.member_of_collections = [collection] if collection
13
19
  work.admin_set = admin_set if admin_set
14
20
  work.date_uploaded = DateTime.now
15
21
  work.save
16
- BatchCreateFilesJob.perform_later work, ingest_work, user, last_work
22
+ BatchCreateFilesJob.perform_later work, ingest_work, user
17
23
 
18
24
  end
19
25
  end
@@ -0,0 +1,10 @@
1
+ module CdmMigrator
2
+ class UpdateObjectJob < ActiveJob::Base
3
+
4
+
5
+ def perform(attributes)
6
+ obj = ActiveFedora::Base.find
7
+
8
+ end
9
+ end
10
+ end
@@ -7,11 +7,14 @@ module CdmMigrator
7
7
  end
8
8
 
9
9
  def progress
10
- if complete?
10
+ return "Complete" if complete?
11
+ completed = IngestWork.where(batch_ingest_id: id, complete: true ).length
12
+ if completed==data.length
13
+ complete=true
14
+ save
11
15
  "Complete"
12
16
  else
13
- completed = IngestWork.where(batch_ingest_id: id, complete: true ).length.to_s
14
- "#{completed}/#{size}"
17
+ "#{completed.to_s}/#{size}"
15
18
  end
16
19
  end
17
20
 
@@ -20,7 +23,7 @@ module CdmMigrator
20
23
  end
21
24
 
22
25
  def complete?
23
- self.complete
26
+ complete
24
27
  end
25
28
 
26
29
  def message?
@@ -30,7 +30,7 @@ border:1px solid black;
30
30
  </tr>
31
31
  <% end %>
32
32
  </table>
33
- <%= select_tag "mappings_url", options_for_select(@dirs) if @cdm_dirs %>
33
+ <%= select_tag "mappings_url", options_for_select(@dirs.sort { |x,y| x[0].downcase <=> y[0].downcase }) if @cdm_dirs %>
34
34
  <%= hidden_field_tag "work", params[:work] %>
35
35
  <%= submit_tag 'generate csv'%>
36
36
  <% end %>
@@ -0,0 +1,21 @@
1
+ <% if @error_list && @error_list.any? %>
2
+ <%#= @error_list.inspect %>
3
+ <table class="table table-striped">
4
+ <thead>
5
+ <tr>
6
+ <th scope="col" style="min-width: 100px;">Line No.</th>
7
+ <th scope="col" style="margin-right: 0.5em;">Column(s)</th>
8
+ <th scope="col">Issue</th>
9
+ </tr>
10
+ </thead>
11
+ <tbody>
12
+ <% @error_list.keys.each do |line_number| %>
13
+ <tr>
14
+ <td><%= line_number %></td>
15
+ <td><%= @error_list[line_number].keys.join("<br />").html_safe %></td>
16
+ <td><%= @error_list[line_number].values.join("<br />").html_safe %></td>
17
+ </tr>
18
+ <% end %>
19
+ </tbody>
20
+ </table>
21
+ <% end %>
@@ -0,0 +1,46 @@
1
+ <% provide :page_title, "CSV Checker" %>
2
+
3
+ <%#= flash[:alert].join("<br/>") if flash[:alert] %>
4
+
5
+ <h1>CSV Checker</h1>
6
+
7
+ <!-- <div class="row"> -->
8
+ <p>This tool validates CSV data and creates a table listing any errors.</p>
9
+
10
+ <a role="button" class="collapse-toggle collapsed csv-collapse-link" data-toggle="collapse" data-target="#errors-explanation" aria-expanded="false">
11
+ <span id="csv-collapse-link-text">Expand for more details</span>
12
+ </a>
13
+ <div class="collapse" id="errors-explanation">
14
+ <ul>
15
+ <li><strong>File paths: </strong>The url field contains a valid path to a file.</li>
16
+ <li><strong>Multi-value separator: </strong>Configured fields with URIs contain the right separator character (e.g. |).</li>
17
+ <% if @path_to_drive.present? %>
18
+ <li><strong>Mounted drive: </strong>A mounted directory (folder) exists and is not empty.</li>
19
+ <% end %>
20
+ <% if @edtf_fields.present? %>
21
+ <li><strong>EDTF dates: </strong>Configured fields contain valid <a href="https://www.loc.gov/standards/datetime/" target="_blank">EDTF</a> dates or "unknown."</li>
22
+ <% end %>
23
+ <% if @uri_fields.present? %>
24
+ <li><strong>Valid URIs ("page" vs "vocab"): </strong>Configured fields with URIs link to the "vocab" address rather than the "page" address. For example, rights_statement should be "http://rightsstatement.org/vocab/..." and not "https://rightsstatement.org/page/..."</li>
25
+ <% end %>
26
+ </ul>
27
+ </div>
28
+
29
+
30
+
31
+ <%= form_tag(check_csv_path, remote: true, method: :post, multipart: true, id: "csv-form") do %>
32
+ <div class="input-group">
33
+ <%= label_tag :multi_value_separator %>
34
+ <%= text_field_tag(:multi_value_separator, @separator, size: 1) %>
35
+ <%= file_field_tag(:file, class: "form-control-file") %>
36
+ <%= hidden_field_tag :authenticity_token, value: form_authenticity_token %>
37
+ <%= button_tag(type: :submit, class: "btn btn-large btn-primary", style: "margin-top: 1em;") do %>
38
+ Check CSV
39
+ <% end %>
40
+ <% end %>
41
+ </div>
42
+ <!-- </div> -->
43
+
44
+ <div id="error_list" class="col-md-10 offset-md-1">
45
+ <%= render 'error_list' %>
46
+ </div>
@@ -0,0 +1,18 @@
1
+
2
+ <h1><span class="fa fa-map"></span> Export </h1>
3
+
4
+ <%= form_tag main_app.csv_export_path, method: :post do %>
5
+ <%= select_tag 'collection_id', options_for_select(@collections), include_blank: true %>
6
+ <%= submit_tag "Download CSV", class: 'btn btn-primary' %>
7
+ <% end %>
8
+
9
+ <h1><span class="fa fa-map"></span> Update </h1>
10
+
11
+ <%= form_for :csv_update, url: csv_update_path do |f| %>
12
+ <%= f.label 'Multi-value Separator:' %>
13
+ <%= f.text_field 'mvs' %>
14
+ <br />
15
+ <%= f.file_field 'csv_file' %>
16
+ <br />
17
+ <%= f.submit 'Save' %>
18
+ <% end %>
@@ -5,9 +5,12 @@ Rails.application.routes.draw do
5
5
  get '/cdm_migrator/my/batches', to: 'cdm_migrator/csv#index', as: 'csv_my_batches'
6
6
  get '/cdm_migrator/batches', to: 'cdm_migrator/csv#index', as: 'csv_all_batches'
7
7
  get '/cdm_migrator/rerun/:id', to: 'cdm_migrator/csv#rerun', as: 'csv_rerun'
8
+ get '/cdm_migrator/edit', to: 'cdm_migrator/csv#edit', as: 'csv_edit'
9
+ post '/cdm_migrator/export', to: 'cdm_migrator/csv#export', as: 'csv_export'
10
+ post '/cdm_migrator/update', to: 'cdm_migrator/csv#update', as: 'csv_update'
8
11
 
9
- get '/cdm_migrator/file_path_checker', to: 'cdm_migrator/csv#file_path_checker', as: 'file_path_checker'
10
- post '/cdm_migrator/file_path_checker', to: 'cdm_migrator/csv#file_path_checker', as: 'check_file_paths'
12
+ get '/cdm_migrator/csv_checker', to: 'cdm_migrator/csv#csv_checker', as: 'csv_checker'
13
+ post '/cdm_migrator/csv_checker', to: 'cdm_migrator/csv#csv_checker', as: 'check_csv'
11
14
 
12
15
  get '/cdm_migrator/collection', to: 'cdm_migrator/cdm#collection', as: 'cdm_start'
13
16
  post '/cdm_migrator/mappings/', to: 'cdm_migrator/cdm#mappings', as: 'cdm_mappings'
@@ -1,3 +1,3 @@
1
1
  module CdmMigrator
2
- VERSION = '3.0.1'
2
+ VERSION = '3.2.1'
3
3
  end
@@ -22,8 +22,8 @@ class CdmMigrator::InstallGenerator < Rails::Generators::Base
22
22
  " <%= menu.nav_link(main_app.cdm_start_path) do %>\n" \
23
23
  " <span class=\"fa fa-map\"></span> <span class=\"sidebar-action-text\"><%= t('CDM Mapping Tool') %></span>\n" \
24
24
  " <% end %>\n" \
25
- " <%= menu.nav_link(main_app.file_path_checker_path) do %>\n" \
26
- " <span class=\"fa fa-check-circle\"></span><span>File Path Checker</span>\n" \
25
+ " <%= menu.nav_link(main_app.csv_checker_path) do %>\n" \
26
+ " <span class=\"fa fa-check-circle\"></span><span>CSV Checker</span>\n" \
27
27
  " <% end %>\n" \
28
28
  " <%= menu.nav_link(main_app.csv_upload_path) do %>\n"\
29
29
  " <span class=\"fa fa-angle-double-up\"></span> <span class=\"sidebar-action-text\"><%= t('CSV Batch Uploader') %></span>\n" \
@@ -67,5 +67,13 @@ class CdmMigrator::InstallGenerator < Rails::Generators::Base
67
67
  def inject_content_dm_yml
68
68
  copy_file("config/cdm_migrator.yml", "config/cdm_migrator.yml") unless File.file?("config/cdm_migrator.yml")
69
69
  end
70
+
71
+ def inject_stylesheets
72
+ css_file_path = "app/assets/stylesheets/application.css"
73
+ copy_file("stylesheets/csv_checker.css", "app/assets/stylesheets/csv_checker.css") unless File.file?("app/assets/styelsheets/csv_checker.css")
74
+ insert_into_file css_file_path, :before => " *= require_self\n" do
75
+ " *= require csv_checker\n "
76
+ end
77
+ end
70
78
 
71
79
  end
@@ -1,25 +1,38 @@
1
- cdm_api:
1
+ tenant_settings:
2
2
  tenant1.institution.com:
3
- url: 'http://your-content-dm-host'
4
- port: 8080
5
- type: 'front'
6
- dirs:
7
- dir1: '/dir1/path/goes/here'
8
- dir2: '/dir2/path/goes/here'
3
+ cdm_api:
4
+ url: 'http://your-content-dm-host'
5
+ port: 8080
6
+ type: 'front'
7
+ dirs:
8
+ dir1: '/dir1/path/goes/here'
9
+ dir2: '/dir2/path/goes/here'
9
10
  tenant2.institution.com:
10
- url: 'http://your-content-dm-host'
11
- port: 8080
12
- type: 'front'
13
- dirs:
14
- dir1: '/dir1/path/goes/here'
15
- dir2: '/dir2/path/goes/here'
11
+ cdm_api:
12
+ url: 'http://your-content-dm-host'
13
+ port: 8080
14
+ type: 'front'
15
+ dirs:
16
+ dir1: '/dir1/path/goes/here'
17
+ dir2: '/dir2/path/goes/here'
16
18
  default:
17
- url: 'http://your-content-dm-host'
18
- port: 8080
19
- type: 'front'
20
- dirs:
21
- dir1: '/dir1/path/goes/here'
22
- dir2: '/dir2/path/goes/here'
19
+ cdm_api:
20
+ url: 'http://your-content-dm-host'
21
+ port: 8080
22
+ type: 'front'
23
+ dirs:
24
+ dir1: '/dir1/path/goes/here'
25
+ dir2: '/dir2/path/goes/here'
26
+ csv_checker:
27
+ edtf_fields:
28
+ # - date_created
29
+ valid_uri_fields:
30
+ # - rights_statement
31
+ # - genre
32
+ separator_fields:
33
+ # - subject
34
+ multi_value_separator: # '|'
35
+ path_to_drive: #'/mnt/drive'
23
36
  default_fields:
24
37
  - title
25
38
  - creator
@@ -8,8 +8,8 @@
8
8
  <span class="fa fa-map"></span> <span class="sidebar-action-text"><%= t('CDM Mapping Tool') %></span>
9
9
  <% end %>
10
10
 
11
- <%= menu.nav_link(main_app.file_path_checker_path) do %>
12
- <span class="fa fa-check-circle"></span><span class="sidebar-action-text"><%= t('File Path Checker') %></span>
11
+ <%= menu.nav_link(main_app.csv_checker_path) do %>
12
+ <span class="fa fa-check-circle"></span><span class="sidebar-action-text"><%= t('CSV Checker') %></span>
13
13
  <% end %>
14
14
 
15
15
  <%= menu.nav_link(main_app.csv_upload_path) do %>
@@ -19,6 +19,10 @@
19
19
  <%= menu.nav_link(main_app.csv_my_batches_path) do %>
20
20
  <span class="fa fa-database"></span> <span class="sidebar-action-text"><%= t('Batches') %></span>
21
21
  <% end %>
22
+
23
+ <%= menu.nav_link(main_app.csv_edit_path) do %>
24
+ <span class="fa fa-angle-double-up"></span> <span class="sidebar-action-text"><%= t('Batch Update') %></span>
25
+ <% end %>
22
26
  <% end %>
23
27
  </li>
24
28
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cdm_migrator
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.1
4
+ version: 3.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - sephirothkod
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-12-20 00:00:00.000000000 Z
11
+ date: 2020-11-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails
@@ -81,6 +81,7 @@ files:
81
81
  - app/assets/config/cdm_migrator_manifest.js
82
82
  - app/assets/javascripts/cdm_migrator/application.js
83
83
  - app/assets/stylesheets/cdm_migrator/application.css
84
+ - app/assets/stylesheets/cdm_migrator/csv_checker.css
84
85
  - app/controllers/cdm_migrator/application_controller.rb
85
86
  - app/controllers/cdm_migrator/cdm_controller.rb
86
87
  - app/controllers/cdm_migrator/csv_controller.rb
@@ -90,6 +91,7 @@ files:
90
91
  - app/jobs/cdm_migrator/batch_create_works_job.rb
91
92
  - app/jobs/cdm_migrator/cdm_ingest_files_job.rb
92
93
  - app/jobs/cdm_migrator/create_work_job.rb
94
+ - app/jobs/cdm_migrator/update_object_job.rb
93
95
  - app/mailers/cdm_migrator/application_mailer.rb
94
96
  - app/models/cdm_migrator/application_record.rb
95
97
  - app/models/cdm_migrator/batch_ingest.rb
@@ -98,11 +100,11 @@ files:
98
100
  - app/views/cdm_migrator/cdm/mappings.html.erb
99
101
  - app/views/cdm_migrator/csv/_batches_list.html.erb
100
102
  - app/views/cdm_migrator/csv/_default_group.html.erb
103
+ - app/views/cdm_migrator/csv/_error_list.html.erb
101
104
  - app/views/cdm_migrator/csv/_list_batches.html.erb
102
- - app/views/cdm_migrator/csv/_path_list.html.erb
103
- - app/views/cdm_migrator/csv/_results_pagination.html.erb
104
105
  - app/views/cdm_migrator/csv/_tabs.html.erb
105
- - app/views/cdm_migrator/csv/file_path_checker.html.erb
106
+ - app/views/cdm_migrator/csv/csv_checker.html.erb
107
+ - app/views/cdm_migrator/csv/edit.html.erb
106
108
  - app/views/cdm_migrator/csv/index.html.erb
107
109
  - app/views/cdm_migrator/csv/upload.html.erb
108
110
  - app/views/layouts/cdm_migrator/application.html.erb
@@ -122,7 +124,7 @@ homepage: https://github.com/UVicLibrary/cdm_migrator
122
124
  licenses:
123
125
  - MIT
124
126
  metadata: {}
125
- post_install_message:
127
+ post_install_message:
126
128
  rdoc_options: []
127
129
  require_paths:
128
130
  - lib
@@ -137,9 +139,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
137
139
  - !ruby/object:Gem::Version
138
140
  version: '0'
139
141
  requirements: []
140
- rubyforge_project:
141
- rubygems_version: 2.7.7
142
- signing_key:
142
+ rubygems_version: 3.1.2
143
+ signing_key:
143
144
  specification_version: 4
144
145
  summary: ContentDM to Hyrax migrator.
145
146
  test_files: []
@@ -1,19 +0,0 @@
1
- <% if @path_list && @path_list.any? %>
2
- <table class="table table-striped">
3
- <thead>
4
- <tr>
5
- <th scope="col" style="min-width: 100px;">Line No.</th>
6
- <th scope="col">File Path (url)</th>
7
- </tr>
8
- </thead>
9
- <tbody>
10
- <% @path_list.each do |line, path| %>
11
- <tr>
12
- <td><%= line %></td>
13
- <td><%= path %></td>
14
- </tr>
15
- <% end %>
16
- </tbody>
17
- </table>
18
- <% end %>
19
-
@@ -1,9 +0,0 @@
1
- <% if @response.total_pages > 1 %>
2
- <div class="row record-padding">
3
- <div class="col-md-9">
4
- <div class="pagination">
5
- <%= paginate @response, outer_window: 2, theme: 'blacklight', route_set: hyrax %>
6
- </div>
7
- </div>
8
- </div>
9
- <% end %>
@@ -1,21 +0,0 @@
1
- <% provide :page_title, "File Path Checker" %>
2
-
3
- <h1>File Path Checker</h1>
4
-
5
- <!-- <div class="row"> -->
6
- <p>This tool checks if a file exists at each url in a csv.</p>
7
- <%= form_tag(check_file_paths_path, remote: true, method: :post, multipart: true, id: "csv-form") do %>
8
- <div class="input-group">
9
- <%= file_field_tag(:file, class: "form-control-file") %>
10
- <%= hidden_field_tag :authenticity_token, value: form_authenticity_token %>
11
- <%= button_tag(type: :submit, class: "btn btn-large btn-primary", style: "margin-top: 1em;") do %>
12
- Check CSV
13
- <% end %>
14
- <% end %>
15
- </div>
16
- <!-- </div> -->
17
-
18
- <div id="path_list" class="col-md-10 offset-md-1">
19
- <%= render 'path_list' %>
20
- </div>
21
-