bulk_ops 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (60) hide show
  1. checksums.yaml +7 -0
  2. data/app/assets/images/bulk_ops/github_logo.png +0 -0
  3. data/app/assets/javascripts/bulk_ops.js +14 -0
  4. data/app/assets/javascripts/bulk_ops/selections.js +24 -0
  5. data/app/assets/javascripts/selections.js +38 -0
  6. data/app/assets/javascripts/work_search.js +64 -0
  7. data/app/assets/stylesheets/bulk_ops.scss +99 -0
  8. data/app/controllers/bulk_ops/application_controller.rb +13 -0
  9. data/app/controllers/bulk_ops/github_authorization_controller.rb +33 -0
  10. data/app/controllers/bulk_ops/operations_controller.rb +481 -0
  11. data/app/jobs/bulk_ops/application_job.rb +4 -0
  12. data/app/mailers/bulk_ops/application_mailer.rb +6 -0
  13. data/app/models/bulk_ops/application_record.rb +5 -0
  14. data/app/views/bulk_ops/_bulk_ops_sidebar_widget.html.erb +15 -0
  15. data/app/views/bulk_ops/_github_auth_widget.html.erb +13 -0
  16. data/app/views/bulk_ops/operations/_bulk_ops_header.html.erb +4 -0
  17. data/app/views/bulk_ops/operations/_choose_fields.html.erb +22 -0
  18. data/app/views/bulk_ops/operations/_choose_notifications.html.erb +22 -0
  19. data/app/views/bulk_ops/operations/_git_message.html.erb +7 -0
  20. data/app/views/bulk_ops/operations/_ingest_options.html.erb +42 -0
  21. data/app/views/bulk_ops/operations/_operation_options.html.erb +38 -0
  22. data/app/views/bulk_ops/operations/_show_authorize.html.erb +13 -0
  23. data/app/views/bulk_ops/operations/_show_complete.html.erb +31 -0
  24. data/app/views/bulk_ops/operations/_show_draft.html.erb +20 -0
  25. data/app/views/bulk_ops/operations/_show_new.html.erb +2 -0
  26. data/app/views/bulk_ops/operations/_show_pending.html.erb +58 -0
  27. data/app/views/bulk_ops/operations/_show_running.html.erb +56 -0
  28. data/app/views/bulk_ops/operations/_show_verifying.html.erb +8 -0
  29. data/app/views/bulk_ops/operations/_show_waiting.html.erb +9 -0
  30. data/app/views/bulk_ops/operations/_update_draft_work_list.html.erb +45 -0
  31. data/app/views/bulk_ops/operations/_update_draft_work_search.html.erb +59 -0
  32. data/app/views/bulk_ops/operations/_update_options.html.erb +9 -0
  33. data/app/views/bulk_ops/operations/index.html.erb +51 -0
  34. data/app/views/bulk_ops/operations/new.html.erb +36 -0
  35. data/app/views/bulk_ops/operations/show.html.erb +7 -0
  36. data/config/routes.rb +25 -0
  37. data/db/migrate/20180926190757_create_github_credentials.rb +13 -0
  38. data/db/migrate/20181017180436_create_bulk_ops_tables.rb +40 -0
  39. data/lib/bulk_ops.rb +15 -0
  40. data/lib/bulk_ops/create_spreadsheet_job.rb +43 -0
  41. data/lib/bulk_ops/create_work_job.rb +14 -0
  42. data/lib/bulk_ops/delete_file_set_job.rb +15 -0
  43. data/lib/bulk_ops/engine.rb +6 -0
  44. data/lib/bulk_ops/error.rb +141 -0
  45. data/lib/bulk_ops/github_access.rb +284 -0
  46. data/lib/bulk_ops/github_credential.rb +3 -0
  47. data/lib/bulk_ops/operation.rb +358 -0
  48. data/lib/bulk_ops/relationship.rb +79 -0
  49. data/lib/bulk_ops/search_builder_behavior.rb +80 -0
  50. data/lib/bulk_ops/templates/configuration.yml +5 -0
  51. data/lib/bulk_ops/templates/readme.md +1 -0
  52. data/lib/bulk_ops/update_work_job.rb +14 -0
  53. data/lib/bulk_ops/verification.rb +210 -0
  54. data/lib/bulk_ops/verification_job.rb +23 -0
  55. data/lib/bulk_ops/version.rb +3 -0
  56. data/lib/bulk_ops/work_job.rb +104 -0
  57. data/lib/bulk_ops/work_proxy.rb +466 -0
  58. data/lib/generators/bulk_ops/install/install_generator.rb +27 -0
  59. data/lib/generators/bulk_ops/install/templates/config/github.yml.example +28 -0
  60. metadata +145 -0
@@ -0,0 +1,466 @@
1
+ class BulkOps::WorkProxy < ActiveRecord::Base
2
+
3
+ require 'uri'
4
+ OPTION_FIELDS = ['visibility','work type']
5
+ RELATIONSHIP_FIELDS = ['parent','child','collection','next','order']
6
+ REFERENCE_IDENTIFIER_FIELDS = ['Reference Identifier','ref_id','Reference ID','Relationship ID','Relationship Identifier','Reference Identifier Type','Reference ID Type','Ref ID Type','relationship_identifier_type','relationship_id_type']
7
+ FILE_FIELDS = ['file','files','filename','filenames']
8
+ FILE_ACTIONS = ['add','upload','remove','delete']
9
+ SEPARATOR = ';'
10
+ self.table_name = "bulk_ops_work_proxies"
11
+ belongs_to :operation, class_name: "BulkOps::Operation", foreign_key: "operation_id"
12
+ has_many :relationships, class_name: "BulkOps::Relationship"
13
+
14
+ attr_accessor :proxy_errors
15
+
16
+ def initialize *args
17
+ super *args
18
+ place_hold if @work_id
19
+ end
20
+
21
+ def work
22
+ return @work if @work
23
+ begin
24
+ @work = ActiveFedora::Base.find(work_id)
25
+ rescue
26
+ return false
27
+ end
28
+ return @work
29
+ end
30
+
31
+ def work_type
32
+ super || operation.work_type || "Work"
33
+ end
34
+
35
+ def place_hold
36
+ # TODO make it so nobody can edit the work
37
+ end
38
+
39
+ def lift_hold
40
+ # TODO make it so people can edit the work again
41
+ end
42
+
43
+ def interpret_data raw_data
44
+ admin_set = AdminSet.where(title: "Bulk Ingest Set").first || AdminSet.find(AdminSet.find_or_create_default_admin_set_id)
45
+ metadata = {admin_set_id: admin_set.id}
46
+ metadata.merge! interpret_file_fields(raw_data)
47
+ metadata.merge! interpret_controlled_fields(raw_data)
48
+ metadata.merge! interpret_scalar_fields(raw_data)
49
+ metadata.merge! interpret_relationship_fields(raw_data )
50
+ metadata.merge! interpret_option_fields(raw_data)
51
+ metadata = setAdminSet(metadata)
52
+ return metadata
53
+ end
54
+
55
+ def proxy_errors
56
+ @proxy_errors ||= []
57
+ end
58
+
59
+ private
60
+
61
+ def is_file_field? field
62
+ operation.is_file_field? field
63
+ end
64
+
65
+ def record_exists? id
66
+ operation.record_exists? id
67
+ end
68
+
69
+ def localAuthUrl(property, value)
70
+ return value if (auth = getLocalAuth(property)).nil?
71
+ url = findAuthUrl(auth, value) || mintLocalAuthUrl(auth,value)
72
+ return url
73
+ end
74
+
75
+ def find_collection(collection)
76
+ cols = Collection.where(id: collection)
77
+ cols += Collection.where(title: collection).select{|col| col.title.first == collection}
78
+ return cols.last unless cols.empty?
79
+ return false
80
+ end
81
+
82
+ def find_or_create_collection(collection)
83
+ col = find_collection(collection)
84
+ return col if col
85
+ return false if collection.to_i > 0
86
+ col = Collection.create(title: [collection.to_s], depositor: operation.user.email, collection_type: Hyrax::CollectionType.all.first)
87
+ end
88
+
89
+ def get_remote_id(value, authority: nil, property: nil)
90
+ return false
91
+ #TODO retrieve URL for this value from the specified remote authr
92
+ end
93
+
94
+ def format_param_name(name)
95
+ name.titleize.gsub(/\s+/, "").camelcase(:lower)
96
+ end
97
+
98
+ def schema
99
+ ScoobySnacks::METADATA_SCHEMA
100
+ end
101
+
102
+ def find_field_name(field)
103
+ operation.find_field_name(field)
104
+ end
105
+
106
+ def downcase_first_letter(str)
107
+ return "" unless str
108
+ str[0].downcase + str[1..-1]
109
+ end
110
+
111
+ def split_values value_string
112
+ # Split values on all un-escaped separator character (escape character is '\')
113
+ # Then replace all escaped separator charactors with un-escaped versions
114
+ value_string.split(/(?<!\\)#{SEPARATOR}/).map{|val| val.gsub("\\#{SEPARATOR}",SEPARATOR)}
115
+ end
116
+
117
+ def interpret_controlled_fields raw_data
118
+
119
+ # The labels array tracks the contents of columns marked as labels,
120
+ # which may require special validation
121
+ labels = {}
122
+
123
+ # This hash is populated with relevant data as we loop through the fields
124
+ controlled_data = {}
125
+
126
+ raw_data.each do |field_name, value|
127
+ next if value.blank? or field_name.blank?
128
+ field_name = field_name.to_s
129
+
130
+ #If our CSV interpreter is feeding us the headers as a line, ignore it.
131
+ next if field_name == value
132
+
133
+ #check if they are using the 'field_name.authority' syntax
134
+ authority = nil
135
+ if ((split=field_name.split('.')).count == 2)
136
+ authority = split.last
137
+ field_name = split.first
138
+ end
139
+
140
+ # get the field name, if this column is a metadata field
141
+ field_name_norm = find_field_name(field_name)
142
+ field = schema.get_field(field_name_norm)
143
+
144
+ # Ignore anything that isn't a controlled field
145
+ next unless field.present? && field.controlled?
146
+
147
+ # Keep track of label fields
148
+ if field_name.downcase.ends_with?("label")
149
+ next if operation.options["ignore_labels"]
150
+ labels[field_name_norm] ||= []
151
+ labels[field_name_norm] += split_values value
152
+ next unless operation.options["import_labels"]
153
+ end
154
+
155
+ remove = field_name.downcase.starts_with?("remove") || field_name.downcase.starts_with?("delete")
156
+
157
+ # handle multiple values
158
+ split_values(value).each do |value|
159
+
160
+ # Decide of we're dealing with a label or url
161
+ # It's an ID if it's a URL and the name doesn't end in 'label'
162
+ if value =~ /^#{URI::regexp}$/ and !field_name.downcase.ends_with?("label")
163
+ id = value
164
+ label = WorkIndexer.fetch_remote_label(value)
165
+ error_message = "cannot fetch remote label for url: #{value}"
166
+ report_error( :cannot_retrieve_label , error_message, url: value, row_number: row_number) unless label
167
+ else
168
+ # It's a label, so get the id
169
+ id = get_remote_id(value, property: field_name_norm, authority: authority) || localAuthUrl(field_name_norm, value)
170
+ label = value
171
+ report_error(:cannot_retrieve_url,
172
+ message: "cannot find or create url for controlled vocabulary label: #{value}",
173
+ url: value,
174
+ row_number: row_number) unless id
175
+ end
176
+ (controlled_data[field_name_norm] ||= []) << {id: id, label: label, remove: field_name.downcase.starts_with?("remove")}
177
+ end
178
+ end
179
+
180
+ #delete any duplicates (if someone listed a url and also its label, or the same url twice)
181
+ controlled_data.each{|field_name, values| controlled_data[field_name] = values.uniq }
182
+
183
+ if operation.options["compare_labels"]
184
+ controlled_data.each do |field_name,values|
185
+ unless labels['field'].count == values.count
186
+ report_error(:mismatched_auth_terms,
187
+ message: "Different numbers of labels and ids for #{field}",
188
+ row_number: row_number)
189
+ end
190
+ end
191
+ labels['field'].each do |label|
192
+ next if controlled_data.any?{|dt| dt["label"] == label}
193
+ report_error(:mismatched_auth_terms,
194
+ message: "There are controlled vocab term labels that no provided URL resolves to, in the field #{field}.",
195
+ row_number: row_number)
196
+ end
197
+ end
198
+
199
+ # Actually add all the data
200
+ metadata = {}
201
+ leftover_data = raw_data.dup.to_hash
202
+ controlled_data.each do |property_name, data|
203
+ data.each do |datum|
204
+ atts = {"id" => datum[:id]}
205
+ atts["_delete"] = true if datum[:remove]
206
+ metadata["#{property_name}_attributes"] ||= []
207
+ metadata["#{property_name}_attributes"] << atts
208
+ leftover_data.except! property_name
209
+ end
210
+ end
211
+ #return [metadata, leftover_data]
212
+ return metadata
213
+ end
214
+
215
+ def interpret_scalar_fields raw_data
216
+ metadata = {}
217
+ raw_data.each do |field, values|
218
+ next if values.blank? or field.nil? or field == values
219
+ # get the field name, if this column is a metadata field
220
+ next unless field_name = find_field_name(field.to_s)
221
+ field = schema.get_field(field_name)
222
+ # Ignore controlled fields
223
+ next if field.controlled?
224
+ values.split(SEPARATOR).each do |value|
225
+ next if value.blank?
226
+ value = value.strip.encode('utf-8', :invalid => :replace, :undef => :replace, :replace => '_') unless value.blank?
227
+ (metadata[field_name] ||= []) << value
228
+ end
229
+ end
230
+ return metadata
231
+ end
232
+
233
+ def interpret_file_fields raw_data
234
+ # This method handles file additions and deletions from the spreadsheet
235
+ # if additional files need to be deleted because the update is set to replace
236
+ # some or all existing files, those replacement-related deletions are handled
237
+ # by the BulkOps::Operation.
238
+ #
239
+ # TODO: THIS DOES NOT YET MANAGE THE ORDER OF INGESTED FILESETS
240
+
241
+ metadata = {}
242
+ raw_data.each do |field, value|
243
+ next if value.blank? or field.blank?
244
+ field = field.to_s
245
+ #If our CSV interpreter is feeding us the headers as a line, ignore it.
246
+ next if field == value
247
+
248
+
249
+ # Check if this is a file field, and whether we are removing or adding a file
250
+ next unless (action = is_file_field?(field))
251
+
252
+ # Move on if this field is the name of another property (e.g. masterFilename)
253
+ next if find_field_name(field)
254
+
255
+ # Check if we are removing a file
256
+ if action == "remove"
257
+ get_removed_filesets(value).each { |fileset_id| delete_file_set(file_set_id) }
258
+ else
259
+ # Add a file
260
+ operation.get_file_paths(value).each do |filepath|
261
+ begin
262
+ uploaded_file = Hyrax::UploadedFile.create(file: File.open(filepath), user: operation.user)
263
+ (metadata[:uploaded_files] ||= []) << uploaded_file.id unless uploaded_file.id.nil?
264
+ rescue Exception => e
265
+ report_error(:upload_error,
266
+ message: "Error opening file: #{ filepath } -- #{e}",
267
+ file: File.join(BulkOps::Operation::INGEST_MEDIA_PATH,filename),
268
+ row_number: row_number)
269
+ end
270
+ end
271
+ end
272
+ end
273
+ return metadata
274
+ end
275
+
276
+ def interpret_option_fields raw_data
277
+ raw_data.each do |field,value|
278
+ next if value.blank? or field.blank?
279
+ field = field.to_s
280
+ next if value == field
281
+
282
+ normfield = field.downcase.parameterize.gsub(/[_\s-]/,'')
283
+ if ["visibility", "public"].include?(normfield)
284
+ update(visibility: format_visibility(value))
285
+ end
286
+ if ["worktype","model","type"].include?(normfield)
287
+ update(work_type: format_worktype(value) )
288
+ end
289
+ if ["referenceidentifier",
290
+ "referenceid",
291
+ "refid",
292
+ "referenceidentifiertype",
293
+ "referenceidtype",
294
+ "refidtype",
295
+ "relationshipidentifier",
296
+ "relationshipid",
297
+ "relationshipidentifiertype",
298
+ "relationshipidtype",
299
+ "relid",
300
+ "relidtype"].include?(normfield)
301
+ update(reference_identifier: format_reference_id(value))
302
+ end
303
+ end
304
+ return {}
305
+ end
306
+
307
+ def interpret_relationship_fields raw_data
308
+ metadata = {}
309
+ raw_data.each do |field,value|
310
+ next if value.blank? or field.blank?
311
+ field = field.to_s
312
+
313
+ next if value == field
314
+
315
+ if (split = field.split(":")).count == 2
316
+ ref_id = split.first
317
+ field = split.last.to_s
318
+ end
319
+
320
+ normfield = field.downcase.parameterize.gsub(/[_\s-]/,'')
321
+ # next unless RELATIONSHIP_FIELDS.include? normfield
322
+
323
+ # If the field specifies the object's order among siblings (usually for multiple filesets)
324
+ update(order: value.to_f) if normfield == "order"
325
+
326
+ # If the field specifies the name or ID of a collection,
327
+ # find or create the collection and update the metadata to match
328
+ if ["collection","collectiontitle","memberofcollection","collectionname", "collectionid"].include?(normfield)
329
+ col = find_or_create_collection(value)
330
+ ( metadata[:member_of_collection_ids] ||= [] ) << col.id if col
331
+ end
332
+
333
+ # All variations of field names that require BulkOps::Relationship objects
334
+ next unless ["parent","parentid","parentidentifier","parentwork","child","childid","childidentifier","childwork","next","nextfile","nextwork","nextid","nextfileidentifier","nextfileid","nextworkid"].include?(normfield)
335
+
336
+ # find which type of relationship
337
+ ["parent","child","next"].select{|type| normfield.include?(type)}.first
338
+ # correctly interpret the notation "id:a78C2d81"
339
+ if ((split = value.split(":")).count == 2)
340
+ ref_id = split.first
341
+ value = split.last
342
+ end
343
+ BulkOps::Relationship.create( { work_proxy_id: id,
344
+ identifier_type: ref_id || reference_identifier,
345
+ relationship_type: normfield,
346
+ object_identifier: value,
347
+ status: "new"} )
348
+ end
349
+ return metadata
350
+ end
351
+
352
+ def format_reference_id(value)
353
+ return value if value=="id"
354
+ # normalize the value string
355
+ value_method = value.titleize.gsub(/[-_\s]/,'').downcase_first_letter
356
+ # if this is a valid metadata property or solr parameter, return it as-is
357
+ return value_method if (schema.get_field?(value_method) || SolrDocument.new.respond_to?(value_method))
358
+ # if it is means to reference a row number, return the string "row"
359
+ case value.downcase.parameterize.gsub(/[_\s-]/,'')
360
+ when "row", "rownum","row number"
361
+ return "row"
362
+ end
363
+ end
364
+
365
+ def format_worktype(value)
366
+ # format the value like a class name
367
+ type = value.titleize.gsub(/[-_\s]/,'')
368
+ # reject it if it isn't a defined class
369
+ type = false unless Object.const_defined? type
370
+ # fall back to the work type defined by the operation, or a standard "Work"
371
+ return type ||= operation.work_type || "Work"
372
+ end
373
+
374
+ def format_visibility(value)
375
+ case value.downcase
376
+ when "public", "open", "true"
377
+ return "open"
378
+ when "campus", "ucsc", "institution"
379
+ return "ucsc"
380
+ when "restricted", "private", "closed", "false"
381
+ return "restricted"
382
+ end
383
+ end
384
+
385
+ def mintLocalAuthUrl(auth_name, value)
386
+ id = value.parameterize
387
+ auth = Qa::LocalAuthority.find_or_create_by(name: auth_name)
388
+ entry = Qa::LocalAuthorityEntry.create(local_authority: auth,
389
+ label: value,
390
+ uri: id)
391
+ return localIdToUrl(id,auth_name)
392
+ end
393
+
394
+ def findAuthUrl(auth, value)
395
+ return nil if auth.nil?
396
+ return nil unless (entries = Qa::Authorities::Local.subauthority_for(auth).search(value))
397
+ entries.each do |entry|
398
+ #require exact match
399
+ if entry["label"] == value
400
+ url = entry["url"]
401
+ url ||= entry["id"]
402
+ url = localIdToUrl(url,auth) unless url =~ URI::regexp
403
+ return url
404
+ end
405
+ end
406
+ return nil
407
+ end
408
+
409
+ def localIdToUrl(id,auth_name)
410
+ return "https://digitalcollections.library.ucsc.edu/authorities/show/local/#{auth_name}/#{id}"
411
+ end
412
+
413
+ def getLocalAuth(field_name)
414
+ field = schema.get_property(field_name)
415
+ # There is only ever one local authority per field, so just pick the first you find
416
+ if vocs = field.vocabularies
417
+ vocs.each do |voc|
418
+ return voc["subauthority"] if voc["authority"].downcase == "local"
419
+ end
420
+ end
421
+ return nil
422
+ end
423
+
424
+ def setAdminSet metadata
425
+ return metadata if metadata[:admin_set_id]
426
+ asets = AdminSet.where({title: "Bulk Ingest Set"})
427
+ asets = AdminSet.find('admin_set/default') if asets.blank?
428
+ metadata[:admin_set_id] = asets.first.id unless asets.blank?
429
+ return metadata
430
+ end
431
+
432
+ def report_error type, message, **args
433
+ puts "ERROR MESSAGE: #{message}"
434
+ update(status: "error", message: message)
435
+ args[:type]=type
436
+ (@proxy_errors ||= []) << BulkOps::Error.new(**args)
437
+ end
438
+
439
+ def filename_prefix
440
+ @filename_prefix ||= operation.filename_prefix
441
+ end
442
+
443
+ def record_exists?
444
+ operation.record_exists? work_id
445
+ end
446
+
447
+ def get_removed_filesets(filestring)
448
+ file_ids = split_values(filestring)
449
+ file_ids.select{|file_id| record_exists?(file_id)}
450
+
451
+ # This part handles filenames in addition to file ids. It doesn't work yet!
452
+ # file_ids.map do |file_id|
453
+ # If the filename is the id of an existing record, keep that
454
+ # next(file_id) if (record_exists?(file_id))
455
+ # If this is the label (i.e.filename) of an existing fileset, use that fileset id
456
+ # TODO MAKE THIS WORK!!
457
+ # next(filename) if (filename_exists?(filename))
458
+ # File.join(BulkOps::Operation::INGEST_MEDIA_PATH, filename_prefix, filename)
459
+ # end
460
+ end
461
+
462
+ def delete_file_set fileset_id
463
+ BulkOps::DeleteFileSetJob.perform_later(fileset_id, operation.user.email )
464
+ end
465
+
466
+ end