cdm_migrator 3.0.1 → 3.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 985574c3d7f661fd0f9d1ce9a15e9269fa4c2dafd10d387110695400aff2b762
4
- data.tar.gz: 0c24c2d7beb6f6d9c66a44dd30393c8dae0cb9222c9d6c69186909cdfe074590
3
+ metadata.gz: 8bc6e448326179791de466ba5dc1d35a26fd48fa2a85a8fbabeb7d8dad60339e
4
+ data.tar.gz: f3cf46dec46cd8166bc0555eef28e2868db6fbbf77963083f93fca1958b00e19
5
5
  SHA512:
6
- metadata.gz: 6b06e22ca127a1b5fa304c18edb939c5082cb3251b7c5be202e51f6914e6ab703a3a7c2a3cc992327ae0122eacc52eec5d5ae810e00eb8531b5e49f5a9481a71
7
- data.tar.gz: 2cefe7be69db9247165ec1c03eeadd5bbffbe0b1ed8044f097e6a6070968e77b224706845eb6072d891cdd5718612c93544321b7a2d9a204af8e92ff4cf6388d
6
+ metadata.gz: 9d8b8bc4d318269375c82429cbcd4bb2e3d07b1761ef54854b262746c09e46430ee3d9da02122567f5244599bb3f312b7248b1b0a610b12abc2a049b0b7470dc
7
+ data.tar.gz: 4dfc8a920f09e1e3c84b5eb09116adadcbf44556c3abe4b4daa1dbce7e87e767ea380de9fbdc5368c548032bf4b6f074a5ab0268fd6600dff75a9530445a7210
data/README.md CHANGED
@@ -27,11 +27,12 @@ to insert the yml and add a link to your Hyrax dashboard
27
27
 
28
28
  ## Usage
29
29
  1. Add your ContentDM url and api port to the cdm_migrator.yml file.
30
- 2. Navigate to the *cdm_migrator/collection* url to select your contentdm collection and what type of work you want to export it to and click "choose mappings".
31
- 3. Map the ContentDM fields to your Hyrax work and file fields\* and click "generate CSV".
32
- 4. Refine the CSV as you see fit.
33
- 5. Navigate to the *cdm_migrator/upload* url; choose your multi-value seperator (default is |) and upload your CSV file.
34
- 6. Done.
30
+ 2. Configure the CSV Checker with the appropriate fields, paths, or multi-value separator (in cdm_migrator.yml).
31
+ 3. Navigate to the *cdm_migrator/collection* url to select your contentdm collection and what type of work you want to export it to and click "choose mappings".
32
+ 4. Map the ContentDM fields to your Hyrax work and file fields\* and click "generate CSV".
33
+ 5. Refine the CSV as you see fit. Optional: go to *cdm_migrator/csv_checker* and upload it to validate metadata fields and/or file paths.
34
+ 6. Navigate to the *cdm_migrator/upload* url; choose your multi-value separator (default is |) and upload your CSV file.
35
+ 7. Done.
35
36
 
36
37
  \* cdm_migrator uses the generated Hyrax forms (ex. Hyrax::Forms::GenericWorkForm) in your host application to obtain it's terms for mapping. If you have added terms to your FileSet model extend the Hyrax::Forms::FileSetEditForm with Hyrax::FileSetForm in your host application so that the changes will be detected by the migrator. You can also add a list of fields in the yml file, under "default fields".
37
38
 
@@ -0,0 +1,37 @@
1
+ .csv-collapse-link.collapsed {
2
+ display: inline-block;
3
+ vertical-align: top;
4
+ }
5
+
6
+ #csv-collapse-link-text::after {
7
+ content: "❯";
8
+ display: inline-block;
9
+ font-size: smaller;
10
+ right: 15px;
11
+ transform: rotate(90deg);
12
+ margin-left: 0.5em;
13
+ }
14
+
15
+ .collapsed #csv-collapse-link-text::after {
16
+ transform: rotate(0deg);
17
+ transition: transform 0.1s ease;
18
+ margin-left: 2px;
19
+ vertical-align: top;
20
+ }
21
+
22
+ #errors-explanation ul li {
23
+ list-style-type: none;
24
+ margin-bottom: 2px;
25
+ }
26
+
27
+ #errors-explanation ul {
28
+ padding-left: 0;
29
+ }
30
+
31
+ #errors-explanation {
32
+ margin-top: 5px;
33
+ }
34
+
35
+ #csv-form {
36
+ margin-top: 1em;
37
+ }
@@ -104,10 +104,10 @@ module CdmMigrator
104
104
 
105
105
  def load_yaml
106
106
  stripped_url = request.base_url.dup.gsub(/https?:\/\//, '').gsub(/:[0-9]*/,'')
107
- if CdmMigrator::Engine.config['cdm_api'].key? stripped_url
108
- tenant = CdmMigrator::Engine.config['cdm_api'][stripped_url]
107
+ if CdmMigrator::Engine.config['tenant_settings'].key? stripped_url
108
+ tenant = CdmMigrator::Engine.config['tenant_settings'][stripped_url]['cdm_api']
109
109
  else
110
- tenant = CdmMigrator::Engine.config['cdm_api']['default']
110
+ tenant = CdmMigrator::Engine.config['tenant_settings']['default']['cdm_api']
111
111
  end
112
112
  @cdm_url = tenant['url']
113
113
  @cdm_port = tenant['port']
@@ -121,7 +121,7 @@ module CdmMigrator
121
121
  filename = child ? child['pagefile'] : "#{rec.first}.#{rec.last}"
122
122
 
123
123
  if params[:file_system]=='true'
124
- "file://#{file_path(rec.first)}"
124
+ "file://#{file_path(cisoptr)}"
125
125
  elsif @cdm_api == 'server'
126
126
  "#{@cdm_url}:#{@cdm_port}/cgi-bin/showfile.exe?CISOROOT=#{params[:collection]}&CISOPTR=#{cisoptr}"
127
127
  else
@@ -1,20 +1,20 @@
1
1
  module CdmMigrator
2
- class CsvController < ApplicationController
3
- helper_method :default_page_title, :admin_host?, :available_translations, :available_works
2
+ class CsvController < ApplicationController
3
+ helper_method :default_page_title, :admin_host?, :available_translations, :available_works
4
4
  include ActionView::Helpers::UrlHelper
5
- layout 'hyrax/dashboard' if Hyrax
5
+ layout 'hyrax/dashboard' if Hyrax
6
6
  before_action :authenticate, except: :index
7
+ before_action :load_config, only: :csv_checker
7
8
 
8
- def file_path_checker
9
- if params[:file]
10
- check_paths params[:file].path
11
-
12
- if @path_list.blank?
13
- flash[:notice] = "All file paths are valid."
14
- else
15
- flash[:error] = "Cdm Migrator couldn't find files at the following urls. Please correct the paths and try again."
16
- end
17
- end
9
+ def csv_checker
10
+ if params[:file]
11
+ check_csv params[:file].path
12
+ if @error_list.blank?
13
+ flash[:notice] = "All data are valid."
14
+ else
15
+ flash[:error] = "The CSV Checker found some errors in the CSV. Please correct them and check again."
16
+ end
17
+ end
18
18
  end
19
19
 
20
20
  def index
@@ -27,34 +27,34 @@ module CdmMigrator
27
27
  end
28
28
  end
29
29
 
30
- def upload
31
- @admin_sets = AdminSet.all.map { |as| [as.title.first, as.id] }
32
- @collections = Collection.all.map { |col| [col.title.first, col.id] }
33
- end
30
+ def upload
31
+ @admin_sets = AdminSet.all.map { |as| [as.title.first, as.id] }
32
+ @collections = Collection.all.map { |col| [col.title.first, col.id] }
33
+ end
34
34
 
35
- def create
36
- dir = Rails.root.join('public', 'uploads', 'csvs')
37
- FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
38
- time = DateTime.now.strftime('%s')
39
- filename = params[:csv_import][:csv_file].original_filename.gsub('.csv',"#{time}.csv")
40
- csv = dir.join(filename).to_s
41
- File.open(csv, 'wb') do |file|
42
- file.write(params[:csv_import][:csv_file].read)
35
+ def create
36
+ dir = Rails.root.join('public', 'uploads', 'csvs')
37
+ FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
38
+ time = DateTime.now.strftime('%s')
39
+ filename = params[:csv_import][:csv_file].original_filename.gsub('.csv', "#{time}.csv")
40
+ csv = dir.join(filename).to_s
41
+ File.open(csv, 'wb') do |file|
42
+ file.write(params[:csv_import][:csv_file].read)
43
43
  end
44
- check_paths csv
45
- if @path_list.present?
46
- flash[:error] = "some file paths are invalid"
44
+ check_csv csv
45
+ if @error_list.present?
46
+ flash[:error] = "Cdm Migrator found some problems with the CSV. Use the CSV Checker for more details."
47
47
  end
48
48
  parse_csv(csv, params[:csv_import][:mvs])
49
49
 
50
50
  ingest = BatchIngest.new({
51
- data: @works,
52
- size: @works.length,
53
- csv: csv,
54
- admin_set_id: params[:admin_set],
51
+ data: @works,
52
+ size: @works.length,
53
+ csv: csv,
54
+ admin_set_id: params[:admin_set],
55
55
  collection_id: params[:collection],
56
- user_id: current_user.id,
57
- message: @path_list.blank? ? nil : @path_list.to_s.gsub("\"","&quot;")
56
+ user_id: current_user.id,
57
+ message: @path_list.blank? ? nil : @path_list.to_s.gsub("\"", "&quot;")
58
58
  })
59
59
  if ingest.save! && @path_list.blank?
60
60
  BatchCreateWorksJob.perform_later(ingest, current_user)
@@ -75,9 +75,8 @@ module CdmMigrator
75
75
  end
76
76
 
77
77
  def generate
78
- headers = ['type','url']
79
- skip = ["id", "head", "tail", "depositor", "date_uploaded", "date_modified", "import_url", "thumbnail_id",
80
- "embargo_id", "lease_id", "access_control_id", "representative_id"]
78
+ headers = %w(type url)
79
+ skip = %w(id head tail depositor date_uploaded date_modified import_url thumbnail_id embargo_id lease_id access_control_id representative_id)
81
80
  GenericWork.new.attributes.each do |key, val|
82
81
  headers << "work_#{key}" unless skip.include? key
83
82
  end
@@ -87,19 +86,109 @@ module CdmMigrator
87
86
  fname = "template_#{DateTime.now.to_i}"
88
87
  render plain: CSV.generate { |csv| csv << headers }, content_type: 'text/csv'
89
88
  end
90
-
91
- private
89
+
90
+ def edit
91
+ @collections = ::Collection.all.map { |c| [c.title.first, c.id] }
92
+ end
93
+
94
+ def update
95
+ mvs = params[:csv_update][:mvs]
96
+ csv = CSV.parse(params[:csv_update][:csv_file].read, headers: true, encoding: 'utf-8').map(&:to_hash)
97
+ csv.each do |row|
98
+ obj = ActiveFedora::Base.find row['id']
99
+ type = row.first.last
100
+ if type.nil?
101
+ next
102
+ elsif type.include? "Work"
103
+ metadata = create_data(row.except('id', 'type'), work_form(type), obj, mvs)
104
+ elsif type.include? "File"
105
+ metadata = create_data(row.except('id', 'type'), work_form(type), obj, mvs)
106
+ end
107
+ unless metadata.nil?
108
+ obj.attributes = metadata
109
+ obj.save
110
+ end
111
+ end
112
+ flash[:notice] = "csv successfully uploaded"
113
+ redirect_to csv_edit_path
114
+ end
115
+
116
+ def export
117
+ solr = RSolr.connect url: Account.find_by(tenant: Apartment::Tenant.current).solr_endpoint.url
118
+ response = solr.get 'select', params: {
119
+ q: "member_of_collection_ids_ssim:#{params[:collection_id]}",
120
+ rows: 3400,
121
+ fl: "id"
122
+ }
123
+ unless response['response']['docs'].empty? || response['response']['docs'][0].empty?
124
+ work_ids = response['response']['docs'].map { |doc| doc['id'] }
125
+ end
126
+ #works = ::ActiveFedora::Base.where member_of_collection_ids_ssim: params[:collection_id]
127
+ @csv_headers = ['type'] + work_fields
128
+ @csv_array = [@csv_headers.join(',')]
129
+ work_ids.each do |work_id|
130
+ doc = ::SolrDocument.find work_id
131
+ add_line doc
132
+ doc._source[:file_set_ids_ssim].each do |file_id|
133
+ file_doc = ::SolrDocument.find file_id
134
+ add_line file_doc
135
+ end
136
+ end
137
+
138
+ send_data @csv_array.join("\n"),
139
+ :type => 'text/csv; charset=iso-8859-5; header=present',
140
+ :disposition => "attachment; filename=export.csv"
141
+ end
142
+
143
+ private
92
144
 
93
145
  def authenticate
94
146
  authorize! :create, available_works.first
95
147
  end
96
148
 
149
+ def add_line doc
150
+ line_hash = {}
151
+ line_hash['type'] = doc._source[:has_model_ssim].first
152
+ work_fields.each do |field|
153
+ line_hash[field] = create_cell doc, field
154
+ end
155
+ @csv_array << line_hash.values_at(*@csv_headers).map { |cell| cell = '' if cell.nil?; "\"#{cell.gsub("\"", "\"\"")}\"" }.join(',')
156
+
157
+ end
158
+
159
+ def work_fields
160
+ @fields ||= available_works.map { |work| work.new.attributes.keys }.flatten.uniq - excluded_fields
161
+ end
162
+
163
+ def excluded_fields
164
+ %w[date_uploaded date_modified head tail state proxy_depositor on_behalf_of arkivo_checksum label
165
+ relative_path import_url part_of resource_type access_control_id
166
+ representative_id thumbnail_id rendering_ids admin_set_id embargo_id
167
+ lease_id]
168
+ end
169
+
170
+ def create_cell w, field
171
+ if field.include? 'date'
172
+ if w._source[field+'_tesim'].is_a?(Array)
173
+ w._source[field+'_tesim'].join('|')
174
+ else
175
+ w._source[field+'_tesim']
176
+ end
177
+ elsif w.respond_to?(field.to_sym)
178
+ if w.send(field).is_a?(Array)
179
+ w.send(field).join('|')
180
+ else
181
+ w.send(field)
182
+ end
183
+ end
184
+ end
185
+
97
186
  def available_works
98
187
  @available_works ||= Hyrax::QuickClassificationQuery.new(current_user).authorized_models
99
188
  end
100
189
 
101
190
  def parse_csv csv, mvs
102
- csv = CSV.parse(File.read(csv), headers: true, encoding: 'utf-8').map(&:to_hash)
191
+ csv = CSV.parse(File.read(csv), headers: true, encoding: 'utf-8').map(&:to_hash)
103
192
  @works = []
104
193
  csv.each do |row|
105
194
  type = row.first.last
@@ -115,19 +204,140 @@ module CdmMigrator
115
204
  end
116
205
  end
117
206
 
118
- def check_paths csv_file
119
- row_number = 1 # +1 offset to account for csv headers
120
- @path_list = {}
207
+ def load_config
208
+ tenant = Account.find_by(tenant: Apartment::Tenant.current).cname
209
+ if CdmMigrator::Engine.config['tenant_settings'].has_key?(tenant)
210
+ settings = CdmMigrator::Engine.config['tenant_settings'][tenant]['csv_checker']
211
+ if settings.present?
212
+ # .map will throw an error if settings[key] has no value
213
+ @edtf_fields = settings['edtf_fields'].map(&:to_sym) if settings['edtf_fields']
214
+ @uri_fields = settings['valid_uri_fields'].map(&:to_sym) if settings['valid_uri_fields']
215
+ @separator = settings['multi_value_separator']
216
+ @separator_fields = settings['separator_fields'].map(&:to_sym) if settings['separator_fields']
217
+ @path_to_drive = settings['path_to_drive']
218
+ else
219
+ raise "Cdm Migrator couldn't find any configured settings. Are they in cdm_migrator.yml?"
220
+ end
221
+ else
222
+ raise "Cdm Migrator couldn't find this tenant. Is it configured?"
223
+ end
224
+ end
225
+
226
+ def check_csv csv_file
227
+ row_number = 1
228
+ @error_list = {}
229
+ check_mounted_drive if @path_to_drive.present?
230
+
231
+ CSV.foreach(csv_file, headers: true, header_converters: :symbol) do |row|
232
+ row_number +=1 # Tells user what CSV row the error is on
233
+ if row[:object_type].include? "Work"
234
+ check_edtf(row_number, row) if @edtf_fields.present?
235
+ check_uris(row_number, row) if @uri_fields.present?
236
+ if params[:multi_value_separator].present? and @separator_fields.present?
237
+ check_separator(row_number, row, params[:multi_value_separator])
238
+ else
239
+ alert_message = "No multi-value separator character was selected or no fields were configured. CSV Checker didn't check for valid separators."
240
+ if flash[:alert] and flash[:alert].exclude?(alert_message) # Only add this message once, rather than per line
241
+ flash[:alert] << alert_message
242
+ elsif flash[:alert].blank?
243
+ flash[:alert] = Array.wrap(alert_message)
244
+ end
245
+ end
246
+ elsif row[:object_type] == "File"
247
+ check_file_path(row_number, row[:url])
248
+ else
249
+ @error_list[row_number] = { "object_type" => "No or unknown object type. Please give a valid type (e.g. GenericWork, File)." }
250
+ end
251
+ @error_list.delete_if { |key, value| value.blank? } # Data are valid, no need to print the row
252
+ end
253
+ end
254
+
255
+ def check_mounted_drive
256
+ drive_address = @path_to_drive
257
+ unless Dir.exist?(drive_address) and !Dir[drive_address].empty?
258
+ flash[:alert] = "CSV Checker can't find the mounted drive to check file paths, so some paths may be mislabelled as incorrect. Please contact the administrator or try again later."
259
+ end
260
+ end
261
+
262
+ def check_file_path(row_number, file_path)
263
+ if file_path.nil?
264
+ @error_list[row_number] = { "url" => "url is blank." }
265
+ elsif File.file?(file_path.gsub("file://", "")) == false
266
+ @error_list[row_number] = { "url" => "No file found at #{file_path}" }
267
+ end
268
+ end
269
+
270
+ def check_edtf(row_number, row)
271
+ edtf_fields = @edtf_fields
272
+ edtf_errors = edtf_fields.each_with_object({}) do |field, hash|
273
+ temp_date = row[field]
274
+ # modify date so that the interval encompasses the years on the last interval date
275
+ temp_date = temp_date.gsub('/..','').gsub('%','?~').gsub(/\/$/,'')
276
+ date = temp_date.include?("/") ? temp_date.gsub(/([0-9]+X+\/)([0-9]+)(X+)/){"#{$1}"+"#{$2.to_i+1}"+"#{$3}"}.gsub("X","u") : temp_date
277
+ date = date.gsub("XX-","uu-").gsub("X-", "u-").gsub('XX?','uu').gsub('X?', 'u').gsub('u?','u').gsub('?','')
278
+ # edtf has trouble with year-month (e.g. "19uu-12") or year-season strings (e.g. "190u-23")
279
+ # that contain unspecified years, or intervals containing the above ("19uu-22/19uu-23", etc.).
280
+ # So we check for/create exceptions.
281
+ # Check for season interval
282
+ if Date.edtf(date) == nil and date != "unknown" # Accept season intervals
283
+ unless is_season?(date.split("/").first) and is_season?(date.split("/").second)
284
+ # If an interval then, check each date individually
285
+ if date.include?("/")
286
+ dates = date.split("/")
287
+ else
288
+ dates = [date]
289
+ end
290
+ #byebug
291
+ dates.each do |d|
292
+ # Dates with 'u' in the last digit of the year return invalid when in format YYYY-MM
293
+ # So we flub day specifity before checking again if the date is valid
294
+ unless Date.edtf(d + '-01') # Date.edtf('193u-03-01') returns valid
295
+ if match = d[/\d{3}u/] or match = d[/\d{2}u{2}-[2][1-4]/] # edtf can't parse single u in year (e.g. 192u) or uu in YYYY-SS (e.g. 19uu-21), so we replace it
296
+ d.gsub!(match, match.gsub("u","0"))
297
+ unless Date.edtf(d)
298
+ hash[field.to_s] = "Blank or not a valid EDTF date."
299
+ end
300
+ else
301
+ hash[field.to_s] = "Blank or not a valid EDTF date."
302
+ end
303
+ end
304
+ end
305
+ end
306
+ end
307
+
308
+ end
309
+ @error_list[row_number] = edtf_errors
310
+ end
311
+
312
+ def is_season?(date)
313
+ Date.edtf(date).class == EDTF::Season
314
+ end
315
+
316
+ # <Example: should be http://rightsstatements.org/vocab/etc. NOT https://rightsstatements.org/page/etc.
317
+ def check_uris(row_number, row)
318
+ uri_fields = @uri_fields
319
+ uri_errors = uri_fields.each_with_object({}) do |field, hash|
320
+ if row[field].include? "page"
321
+ hash[field.to_s] = "Links to page instead of URI. (e.g. https://rightsstatements.org/page/etc. instead of http://rightsstatements.org/vocab/etc.)"
322
+ end
323
+ end
324
+ @error_list[row_number].merge!(uri_errors)
325
+ end
121
326
 
122
- CSV.foreach(csv_file, headers: true, header_converters: :symbol) do |row|
123
- row_number +=1 # Tells user what CSV row the bogus file path is on
124
- next if row[:url].nil?
125
- file_path = row[:url]
126
- unless File.file?(file_path.gsub("file://", ""))
127
- @path_list[row_number] = file_path
327
+ # Check multi-value separators
328
+ def check_separator(row_number, row, character)
329
+ uri_fields = @separator_fields
330
+ separator_errors = uri_fields.each_with_object({}) do |field, hash|
331
+ value = row[field]
332
+ if value.present?
333
+ URI.extract(value).each { |uri| value.gsub!(uri, '') }
334
+ unless value.split("").all? { |sep| sep == character } # Check if remaining characters are the correct separator
335
+ hash[field.to_s] = "May contain the wrong multi-value separator (i.e. not #{character})."
128
336
  end
129
337
  end
130
338
  end
339
+ @error_list[row_number].merge!(separator_errors)
340
+ end
131
341
 
132
342
  def default_page_title
133
343
  'CSV Batch Uploader'
@@ -139,8 +349,8 @@ module CdmMigrator
139
349
 
140
350
  def available_translations
141
351
  {
142
- 'en' => 'English',
143
- 'fr' => 'French'
352
+ 'en' => 'English',
353
+ 'fr' => 'French'
144
354
  }
145
355
  end
146
356
 
@@ -162,12 +372,12 @@ module CdmMigrator
162
372
  end
163
373
 
164
374
  def create_data data, type, object, mvs
165
- final_data = {}
375
+ final_data = {}
166
376
  accepted_terms = type.required_fields + secondary_terms(type)
167
377
  data.each do |key, att|
168
- if(att.nil? || att.empty? || key.to_s.include?("object_type") || !accepted_terms.include?(key.to_sym) )
378
+ if (att.nil? || att.empty? || key.to_s.include?("object_type") || !accepted_terms.include?(key.to_sym))
169
379
  next
170
- elsif(object.send(key).nil?)
380
+ elsif (object.send(key).nil?)
171
381
  final_data[key] = att
172
382
  else
173
383
  final_data[key] = att.split(mvs)
@@ -176,23 +386,23 @@ module CdmMigrator
176
386
  final_data
177
387
  end
178
388
 
179
- def create_lease visibility, status_after, date
180
- lease = Hydra::AccessControls::Lease.new(visibility_during_lease: visibility,
181
- visibility_after_lease: status_after, lease_expiration_date: @lease_date)
182
- lease.save
183
- end
389
+ def create_lease visibility, status_after, date
390
+ lease = Hydra::AccessControls::Lease.new(visibility_during_lease: visibility,
391
+ visibility_after_lease: status_after, lease_expiration_date: @lease_date)
392
+ lease.save
393
+ end
184
394
 
185
- def create_embargo visibility
186
- embargo = Hydra::AccessControls::Embargo.new
187
- embargo.visibility_during_embargo = visibility
188
- embargo.visibility_after_embargo = @status_after
189
- embargo.embargo_release_date = @embargo_date
190
- embargo.save
191
- end
395
+ def create_embargo visibility
396
+ embargo = Hydra::AccessControls::Embargo.new
397
+ embargo.visibility_during_embargo = visibility
398
+ embargo.visibility_after_embargo = @status_after
399
+ embargo.embargo_release_date = @embargo_date
400
+ embargo.save
401
+ end
192
402
 
193
403
  def log(user)
194
- Hyrax::Operation.create!(user: user,
195
- operation_type: "Attach Remote File")
404
+ Hyrax::Operation.create!(user: user,
405
+ operation_type: "Attach Remote File")
196
406
  end
197
- end
407
+ end
198
408
  end
@@ -2,7 +2,7 @@ module CdmMigrator
2
2
  class BatchCreateFilesJob < ActiveJob::Base
3
3
  queue_as Hyrax.config.ingest_queue_name
4
4
 
5
- def perform work, ingest_work, user, last_work=false
5
+ def perform work, ingest_work, user
6
6
  ingest_work.files.each do |file|
7
7
  url = file[:url]
8
8
  last_file = ingest_work.files.last==file
@@ -11,10 +11,9 @@ module CdmMigrator
11
11
  actor = Hyrax::Actors::FileSetActor.new(fs, user)
12
12
  actor.create_metadata#(work, visibility: work.visibility)
13
13
  actor.attach_file_to_work(work)
14
- #byebug
15
14
  fs.attributes = file[:metadata]
16
15
  fs.save!
17
- CdmIngestFilesJob.perform_later(fs, url, user, ingest_work, last_file, last_work)
16
+ CdmIngestFilesJob.perform_later(fs, url, user, ingest_work, last_file)
18
17
  end
19
18
  end
20
19
  end
@@ -4,10 +4,9 @@ module CdmMigrator
4
4
 
5
5
  def perform(ingest, user)
6
6
  ingest.data.each do |w|
7
- last_work = ingest.data.last==w
8
7
  ingest_work = IngestWork.new(w, ingest.id)
9
8
  ingest_work.save!
10
- CreateWorkJob.perform_later ingest_work, user, ingest.admin_set_id, ingest.collection_id, last_work
9
+ CreateWorkJob.perform_later ingest_work, user, ingest.admin_set_id, ingest.collection_id
11
10
  end
12
11
 
13
12
  end
@@ -2,7 +2,7 @@ module CdmMigrator
2
2
  class CdmIngestFilesJob < ActiveJob::Base
3
3
  queue_as Hyrax.config.ingest_queue_name
4
4
 
5
- def perform(fs, url, user, ingest_work = nil, last_file = false, last_work = false)
5
+ def perform(fs, url, user, ingest_work = nil, last_file = false)
6
6
  if url.include?("http") && File.extname(url).include?("pdf")
7
7
  download = open(url)
8
8
  dir = Rails.root.join('public', 'uploads', 'csv_pdfs')
@@ -16,14 +16,14 @@ module CdmMigrator
16
16
  IO.copy_stream(download, url)
17
17
  url = "file://"+url.to_s
18
18
  end
19
- uri = URI.parse(url.gsub(' ','%20'))
19
+ uri = URI.parse(url.gsub(' ','%20').gsub(/[\[\]@#\$\*{}]/, ""))
20
20
  if uri.scheme == 'file'
21
- IngestLocalFileJob.perform_now(fs, uri.path.gsub('%20',' '), user)
21
+ IngestLocalFileJob.perform_now(fs, url.gsub('file://',''), user)
22
22
  else
23
+ URI.parse(url.gsub(' ','%20'))
23
24
  ImportUrlJob.perform_now(fs, log(user))
24
25
  end
25
26
  ingest_work.update_attribute('complete', true) if last_file
26
- BatchIngest.find(ingest_work.id).update_attribute('complete', true) if last_work
27
27
  end
28
28
 
29
29
  def log(user)
@@ -2,18 +2,24 @@ module CdmMigrator
2
2
  class CreateWorkJob < ActiveJob::Base
3
3
  queue_as Hyrax.config.ingest_queue_name
4
4
 
5
- def perform(ingest_work, user, admin_set_id, collection_id, last_work=false)
5
+ def perform(ingest_work, user, admin_set_id, collection_id)
6
6
  admin_set = ::AdminSet.find(admin_set_id) rescue nil
7
7
  collection = ::Collection.find(collection_id) rescue nil
8
8
  work = Object.const_get(ingest_work.work_type).new
9
9
  #status_after, embargo_date, lease_date = nil, nil, nil
10
10
  work.apply_depositor_metadata(user)
11
11
  work.attributes = ingest_work.data
12
+ if ingest_work.data.has_key? 'downloadable'
13
+ # Convert string to boolean
14
+ work.downloadable = ActiveModel::Type::Boolean.new.cast(ingest_work.data['downloadable'])
15
+ elsif work.attributes.include? 'downloadable' # Set work to downloadable by default
16
+ work.downloadable = true
17
+ end
12
18
  work.member_of_collections = [collection] if collection
13
19
  work.admin_set = admin_set if admin_set
14
20
  work.date_uploaded = DateTime.now
15
21
  work.save
16
- BatchCreateFilesJob.perform_later work, ingest_work, user, last_work
22
+ BatchCreateFilesJob.perform_later work, ingest_work, user
17
23
 
18
24
  end
19
25
  end
@@ -0,0 +1,10 @@
1
+ module CdmMigrator
2
+ class UpdateObjectJob < ActiveJob::Base
3
+
4
+
5
+ def perform(attributes)
6
+ obj = ActiveFedora::Base.find
7
+
8
+ end
9
+ end
10
+ end
@@ -7,11 +7,14 @@ module CdmMigrator
7
7
  end
8
8
 
9
9
  def progress
10
- if complete?
10
+ return "Complete" if complete?
11
+ completed = IngestWork.where(batch_ingest_id: id, complete: true ).length
12
+ if completed==data.length
13
+ complete=true
14
+ save
11
15
  "Complete"
12
16
  else
13
- completed = IngestWork.where(batch_ingest_id: id, complete: true ).length.to_s
14
- "#{completed}/#{size}"
17
+ "#{completed.to_s}/#{size}"
15
18
  end
16
19
  end
17
20
 
@@ -20,7 +23,7 @@ module CdmMigrator
20
23
  end
21
24
 
22
25
  def complete?
23
- self.complete
26
+ complete
24
27
  end
25
28
 
26
29
  def message?
@@ -30,7 +30,7 @@ border:1px solid black;
30
30
  </tr>
31
31
  <% end %>
32
32
  </table>
33
- <%= select_tag "mappings_url", options_for_select(@dirs) if @cdm_dirs %>
33
+ <%= select_tag "mappings_url", options_for_select(@dirs.sort { |x,y| x[0].downcase <=> y[0].downcase }) if @cdm_dirs %>
34
34
  <%= hidden_field_tag "work", params[:work] %>
35
35
  <%= submit_tag 'generate csv'%>
36
36
  <% end %>
@@ -0,0 +1,21 @@
1
+ <% if @error_list && @error_list.any? %>
2
+ <%#= @error_list.inspect %>
3
+ <table class="table table-striped">
4
+ <thead>
5
+ <tr>
6
+ <th scope="col" style="min-width: 100px;">Line No.</th>
7
+ <th scope="col" style="margin-right: 0.5em;">Column(s)</th>
8
+ <th scope="col">Issue</th>
9
+ </tr>
10
+ </thead>
11
+ <tbody>
12
+ <% @error_list.keys.each do |line_number| %>
13
+ <tr>
14
+ <td><%= line_number %></td>
15
+ <td><%= @error_list[line_number].keys.join("<br />").html_safe %></td>
16
+ <td><%= @error_list[line_number].values.join("<br />").html_safe %></td>
17
+ </tr>
18
+ <% end %>
19
+ </tbody>
20
+ </table>
21
+ <% end %>
@@ -0,0 +1,46 @@
1
+ <% provide :page_title, "CSV Checker" %>
2
+
3
+ <%#= flash[:alert].join("<br/>") if flash[:alert] %>
4
+
5
+ <h1>CSV Checker</h1>
6
+
7
+ <!-- <div class="row"> -->
8
+ <p>This tool validates CSV data and creates a table listing any errors.</p>
9
+
10
+ <a role="button" class="collapse-toggle collapsed csv-collapse-link" data-toggle="collapse" data-target="#errors-explanation" aria-expanded="false">
11
+ <span id="csv-collapse-link-text">Expand for more details</span>
12
+ </a>
13
+ <div class="collapse" id="errors-explanation">
14
+ <ul>
15
+ <li><strong>File paths: </strong>The url field contains a valid path to a file.</li>
16
+ <li><strong>Multi-value separator: </strong>Configured fields with URIs contain the right separator character (e.g. |).</li>
17
+ <% if @path_to_drive.present? %>
18
+ <li><strong>Mounted drive: </strong>A mounted directory (folder) exists and is not empty.</li>
19
+ <% end %>
20
+ <% if @edtf_fields.present? %>
21
+ <li><strong>EDTF dates: </strong>Configured fields contain valid <a href="https://www.loc.gov/standards/datetime/" target="_blank">EDTF</a> dates or "unknown."</li>
22
+ <% end %>
23
+ <% if @uri_fields.present? %>
24
+ <li><strong>Valid URIs ("page" vs "vocab"): </strong>Configured fields with URIs link to the "vocab" address rather than the "page" address. For example, rights_statement should be "http://rightsstatement.org/vocab/..." and not "https://rightsstatement.org/page/..."</li>
25
+ <% end %>
26
+ </ul>
27
+ </div>
28
+
29
+
30
+
31
+ <%= form_tag(check_csv_path, remote: true, method: :post, multipart: true, id: "csv-form") do %>
32
+ <div class="input-group">
33
+ <%= label_tag :multi_value_separator %>
34
+ <%= text_field_tag(:multi_value_separator, @separator, size: 1) %>
35
+ <%= file_field_tag(:file, class: "form-control-file") %>
36
+ <%= hidden_field_tag :authenticity_token, value: form_authenticity_token %>
37
+ <%= button_tag(type: :submit, class: "btn btn-large btn-primary", style: "margin-top: 1em;") do %>
38
+ Check CSV
39
+ <% end %>
40
+ <% end %>
41
+ </div>
42
+ <!-- </div> -->
43
+
44
+ <div id="error_list" class="col-md-10 offset-md-1">
45
+ <%= render 'error_list' %>
46
+ </div>
@@ -0,0 +1,18 @@
1
+
2
+ <h1><span class="fa fa-map"></span> Export </h1>
3
+
4
+ <%= form_tag main_app.csv_export_path, method: :post do %>
5
+ <%= select_tag 'collection_id', options_for_select(@collections), include_blank: true %>
6
+ <%= submit_tag "Download CSV", class: 'btn btn-primary' %>
7
+ <% end %>
8
+
9
+ <h1><span class="fa fa-map"></span> Update </h1>
10
+
11
+ <%= form_for :csv_update, url: csv_update_path do |f| %>
12
+ <%= f.label 'Multi-value Separator:' %>
13
+ <%= f.text_field 'mvs' %>
14
+ <br />
15
+ <%= f.file_field 'csv_file' %>
16
+ <br />
17
+ <%= f.submit 'Save' %>
18
+ <% end %>
@@ -5,9 +5,12 @@ Rails.application.routes.draw do
5
5
  get '/cdm_migrator/my/batches', to: 'cdm_migrator/csv#index', as: 'csv_my_batches'
6
6
  get '/cdm_migrator/batches', to: 'cdm_migrator/csv#index', as: 'csv_all_batches'
7
7
  get '/cdm_migrator/rerun/:id', to: 'cdm_migrator/csv#rerun', as: 'csv_rerun'
8
+ get '/cdm_migrator/edit', to: 'cdm_migrator/csv#edit', as: 'csv_edit'
9
+ post '/cdm_migrator/export', to: 'cdm_migrator/csv#export', as: 'csv_export'
10
+ post '/cdm_migrator/update', to: 'cdm_migrator/csv#update', as: 'csv_update'
8
11
 
9
- get '/cdm_migrator/file_path_checker', to: 'cdm_migrator/csv#file_path_checker', as: 'file_path_checker'
10
- post '/cdm_migrator/file_path_checker', to: 'cdm_migrator/csv#file_path_checker', as: 'check_file_paths'
12
+ get '/cdm_migrator/csv_checker', to: 'cdm_migrator/csv#csv_checker', as: 'csv_checker'
13
+ post '/cdm_migrator/csv_checker', to: 'cdm_migrator/csv#csv_checker', as: 'check_csv'
11
14
 
12
15
  get '/cdm_migrator/collection', to: 'cdm_migrator/cdm#collection', as: 'cdm_start'
13
16
  post '/cdm_migrator/mappings/', to: 'cdm_migrator/cdm#mappings', as: 'cdm_mappings'
@@ -1,3 +1,3 @@
1
1
  module CdmMigrator
2
- VERSION = '3.0.1'
2
+ VERSION = '3.2.1'
3
3
  end
@@ -22,8 +22,8 @@ class CdmMigrator::InstallGenerator < Rails::Generators::Base
22
22
  " <%= menu.nav_link(main_app.cdm_start_path) do %>\n" \
23
23
  " <span class=\"fa fa-map\"></span> <span class=\"sidebar-action-text\"><%= t('CDM Mapping Tool') %></span>\n" \
24
24
  " <% end %>\n" \
25
- " <%= menu.nav_link(main_app.file_path_checker_path) do %>\n" \
26
- " <span class=\"fa fa-check-circle\"></span><span>File Path Checker</span>\n" \
25
+ " <%= menu.nav_link(main_app.csv_checker_path) do %>\n" \
26
+ " <span class=\"fa fa-check-circle\"></span><span>CSV Checker</span>\n" \
27
27
  " <% end %>\n" \
28
28
  " <%= menu.nav_link(main_app.csv_upload_path) do %>\n"\
29
29
  " <span class=\"fa fa-angle-double-up\"></span> <span class=\"sidebar-action-text\"><%= t('CSV Batch Uploader') %></span>\n" \
@@ -67,5 +67,13 @@ class CdmMigrator::InstallGenerator < Rails::Generators::Base
67
67
  def inject_content_dm_yml
68
68
  copy_file("config/cdm_migrator.yml", "config/cdm_migrator.yml") unless File.file?("config/cdm_migrator.yml")
69
69
  end
70
+
71
+ def inject_stylesheets
72
+ css_file_path = "app/assets/stylesheets/application.css"
73
+ copy_file("stylesheets/csv_checker.css", "app/assets/stylesheets/csv_checker.css") unless File.file?("app/assets/styelsheets/csv_checker.css")
74
+ insert_into_file css_file_path, :before => " *= require_self\n" do
75
+ " *= require csv_checker\n "
76
+ end
77
+ end
70
78
 
71
79
  end
@@ -1,25 +1,38 @@
1
- cdm_api:
1
+ tenant_settings:
2
2
  tenant1.institution.com:
3
- url: 'http://your-content-dm-host'
4
- port: 8080
5
- type: 'front'
6
- dirs:
7
- dir1: '/dir1/path/goes/here'
8
- dir2: '/dir2/path/goes/here'
3
+ cdm_api:
4
+ url: 'http://your-content-dm-host'
5
+ port: 8080
6
+ type: 'front'
7
+ dirs:
8
+ dir1: '/dir1/path/goes/here'
9
+ dir2: '/dir2/path/goes/here'
9
10
  tenant2.institution.com:
10
- url: 'http://your-content-dm-host'
11
- port: 8080
12
- type: 'front'
13
- dirs:
14
- dir1: '/dir1/path/goes/here'
15
- dir2: '/dir2/path/goes/here'
11
+ cdm_api:
12
+ url: 'http://your-content-dm-host'
13
+ port: 8080
14
+ type: 'front'
15
+ dirs:
16
+ dir1: '/dir1/path/goes/here'
17
+ dir2: '/dir2/path/goes/here'
16
18
  default:
17
- url: 'http://your-content-dm-host'
18
- port: 8080
19
- type: 'front'
20
- dirs:
21
- dir1: '/dir1/path/goes/here'
22
- dir2: '/dir2/path/goes/here'
19
+ cdm_api:
20
+ url: 'http://your-content-dm-host'
21
+ port: 8080
22
+ type: 'front'
23
+ dirs:
24
+ dir1: '/dir1/path/goes/here'
25
+ dir2: '/dir2/path/goes/here'
26
+ csv_checker:
27
+ edtf_fields:
28
+ # - date_created
29
+ valid_uri_fields:
30
+ # - rights_statement
31
+ # - genre
32
+ separator_fields:
33
+ # - subject
34
+ multi_value_separator: # '|'
35
+ path_to_drive: #'/mnt/drive'
23
36
  default_fields:
24
37
  - title
25
38
  - creator
@@ -8,8 +8,8 @@
8
8
  <span class="fa fa-map"></span> <span class="sidebar-action-text"><%= t('CDM Mapping Tool') %></span>
9
9
  <% end %>
10
10
 
11
- <%= menu.nav_link(main_app.file_path_checker_path) do %>
12
- <span class="fa fa-check-circle"></span><span class="sidebar-action-text"><%= t('File Path Checker') %></span>
11
+ <%= menu.nav_link(main_app.csv_checker_path) do %>
12
+ <span class="fa fa-check-circle"></span><span class="sidebar-action-text"><%= t('CSV Checker') %></span>
13
13
  <% end %>
14
14
 
15
15
  <%= menu.nav_link(main_app.csv_upload_path) do %>
@@ -19,6 +19,10 @@
19
19
  <%= menu.nav_link(main_app.csv_my_batches_path) do %>
20
20
  <span class="fa fa-database"></span> <span class="sidebar-action-text"><%= t('Batches') %></span>
21
21
  <% end %>
22
+
23
+ <%= menu.nav_link(main_app.csv_edit_path) do %>
24
+ <span class="fa fa-angle-double-up"></span> <span class="sidebar-action-text"><%= t('Batch Update') %></span>
25
+ <% end %>
22
26
  <% end %>
23
27
  </li>
24
28
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cdm_migrator
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.1
4
+ version: 3.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - sephirothkod
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-12-20 00:00:00.000000000 Z
11
+ date: 2020-11-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails
@@ -81,6 +81,7 @@ files:
81
81
  - app/assets/config/cdm_migrator_manifest.js
82
82
  - app/assets/javascripts/cdm_migrator/application.js
83
83
  - app/assets/stylesheets/cdm_migrator/application.css
84
+ - app/assets/stylesheets/cdm_migrator/csv_checker.css
84
85
  - app/controllers/cdm_migrator/application_controller.rb
85
86
  - app/controllers/cdm_migrator/cdm_controller.rb
86
87
  - app/controllers/cdm_migrator/csv_controller.rb
@@ -90,6 +91,7 @@ files:
90
91
  - app/jobs/cdm_migrator/batch_create_works_job.rb
91
92
  - app/jobs/cdm_migrator/cdm_ingest_files_job.rb
92
93
  - app/jobs/cdm_migrator/create_work_job.rb
94
+ - app/jobs/cdm_migrator/update_object_job.rb
93
95
  - app/mailers/cdm_migrator/application_mailer.rb
94
96
  - app/models/cdm_migrator/application_record.rb
95
97
  - app/models/cdm_migrator/batch_ingest.rb
@@ -98,11 +100,11 @@ files:
98
100
  - app/views/cdm_migrator/cdm/mappings.html.erb
99
101
  - app/views/cdm_migrator/csv/_batches_list.html.erb
100
102
  - app/views/cdm_migrator/csv/_default_group.html.erb
103
+ - app/views/cdm_migrator/csv/_error_list.html.erb
101
104
  - app/views/cdm_migrator/csv/_list_batches.html.erb
102
- - app/views/cdm_migrator/csv/_path_list.html.erb
103
- - app/views/cdm_migrator/csv/_results_pagination.html.erb
104
105
  - app/views/cdm_migrator/csv/_tabs.html.erb
105
- - app/views/cdm_migrator/csv/file_path_checker.html.erb
106
+ - app/views/cdm_migrator/csv/csv_checker.html.erb
107
+ - app/views/cdm_migrator/csv/edit.html.erb
106
108
  - app/views/cdm_migrator/csv/index.html.erb
107
109
  - app/views/cdm_migrator/csv/upload.html.erb
108
110
  - app/views/layouts/cdm_migrator/application.html.erb
@@ -122,7 +124,7 @@ homepage: https://github.com/UVicLibrary/cdm_migrator
122
124
  licenses:
123
125
  - MIT
124
126
  metadata: {}
125
- post_install_message:
127
+ post_install_message:
126
128
  rdoc_options: []
127
129
  require_paths:
128
130
  - lib
@@ -137,9 +139,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
137
139
  - !ruby/object:Gem::Version
138
140
  version: '0'
139
141
  requirements: []
140
- rubyforge_project:
141
- rubygems_version: 2.7.7
142
- signing_key:
142
+ rubygems_version: 3.1.2
143
+ signing_key:
143
144
  specification_version: 4
144
145
  summary: ContentDM to Hyrax migrator.
145
146
  test_files: []
@@ -1,19 +0,0 @@
1
- <% if @path_list && @path_list.any? %>
2
- <table class="table table-striped">
3
- <thead>
4
- <tr>
5
- <th scope="col" style="min-width: 100px;">Line No.</th>
6
- <th scope="col">File Path (url)</th>
7
- </tr>
8
- </thead>
9
- <tbody>
10
- <% @path_list.each do |line, path| %>
11
- <tr>
12
- <td><%= line %></td>
13
- <td><%= path %></td>
14
- </tr>
15
- <% end %>
16
- </tbody>
17
- </table>
18
- <% end %>
19
-
@@ -1,9 +0,0 @@
1
- <% if @response.total_pages > 1 %>
2
- <div class="row record-padding">
3
- <div class="col-md-9">
4
- <div class="pagination">
5
- <%= paginate @response, outer_window: 2, theme: 'blacklight', route_set: hyrax %>
6
- </div>
7
- </div>
8
- </div>
9
- <% end %>
@@ -1,21 +0,0 @@
1
- <% provide :page_title, "File Path Checker" %>
2
-
3
- <h1>File Path Checker</h1>
4
-
5
- <!-- <div class="row"> -->
6
- <p>This tool checks if a file exists at each url in a csv.</p>
7
- <%= form_tag(check_file_paths_path, remote: true, method: :post, multipart: true, id: "csv-form") do %>
8
- <div class="input-group">
9
- <%= file_field_tag(:file, class: "form-control-file") %>
10
- <%= hidden_field_tag :authenticity_token, value: form_authenticity_token %>
11
- <%= button_tag(type: :submit, class: "btn btn-large btn-primary", style: "margin-top: 1em;") do %>
12
- Check CSV
13
- <% end %>
14
- <% end %>
15
- </div>
16
- <!-- </div> -->
17
-
18
- <div id="path_list" class="col-md-10 offset-md-1">
19
- <%= render 'path_list' %>
20
- </div>
21
-