bulk_ops 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +7 -0
  2. data/app/assets/images/bulk_ops/github_logo.png +0 -0
  3. data/app/assets/javascripts/bulk_ops.js +14 -0
  4. data/app/assets/javascripts/bulk_ops/selections.js +24 -0
  5. data/app/assets/javascripts/selections.js +38 -0
  6. data/app/assets/javascripts/work_search.js +64 -0
  7. data/app/assets/stylesheets/bulk_ops.scss +99 -0
  8. data/app/controllers/bulk_ops/application_controller.rb +13 -0
  9. data/app/controllers/bulk_ops/github_authorization_controller.rb +33 -0
  10. data/app/controllers/bulk_ops/operations_controller.rb +481 -0
  11. data/app/jobs/bulk_ops/application_job.rb +4 -0
  12. data/app/mailers/bulk_ops/application_mailer.rb +6 -0
  13. data/app/models/bulk_ops/application_record.rb +5 -0
  14. data/app/views/bulk_ops/_bulk_ops_sidebar_widget.html.erb +15 -0
  15. data/app/views/bulk_ops/_github_auth_widget.html.erb +13 -0
  16. data/app/views/bulk_ops/operations/_bulk_ops_header.html.erb +4 -0
  17. data/app/views/bulk_ops/operations/_choose_fields.html.erb +22 -0
  18. data/app/views/bulk_ops/operations/_choose_notifications.html.erb +22 -0
  19. data/app/views/bulk_ops/operations/_git_message.html.erb +7 -0
  20. data/app/views/bulk_ops/operations/_ingest_options.html.erb +42 -0
  21. data/app/views/bulk_ops/operations/_operation_options.html.erb +38 -0
  22. data/app/views/bulk_ops/operations/_show_authorize.html.erb +13 -0
  23. data/app/views/bulk_ops/operations/_show_complete.html.erb +31 -0
  24. data/app/views/bulk_ops/operations/_show_draft.html.erb +20 -0
  25. data/app/views/bulk_ops/operations/_show_new.html.erb +2 -0
  26. data/app/views/bulk_ops/operations/_show_pending.html.erb +58 -0
  27. data/app/views/bulk_ops/operations/_show_running.html.erb +56 -0
  28. data/app/views/bulk_ops/operations/_show_verifying.html.erb +8 -0
  29. data/app/views/bulk_ops/operations/_show_waiting.html.erb +9 -0
  30. data/app/views/bulk_ops/operations/_update_draft_work_list.html.erb +45 -0
  31. data/app/views/bulk_ops/operations/_update_draft_work_search.html.erb +59 -0
  32. data/app/views/bulk_ops/operations/_update_options.html.erb +9 -0
  33. data/app/views/bulk_ops/operations/index.html.erb +51 -0
  34. data/app/views/bulk_ops/operations/new.html.erb +36 -0
  35. data/app/views/bulk_ops/operations/show.html.erb +7 -0
  36. data/config/routes.rb +25 -0
  37. data/db/migrate/20180926190757_create_github_credentials.rb +13 -0
  38. data/db/migrate/20181017180436_create_bulk_ops_tables.rb +40 -0
  39. data/lib/bulk_ops.rb +15 -0
  40. data/lib/bulk_ops/create_spreadsheet_job.rb +43 -0
  41. data/lib/bulk_ops/create_work_job.rb +14 -0
  42. data/lib/bulk_ops/delete_file_set_job.rb +15 -0
  43. data/lib/bulk_ops/engine.rb +6 -0
  44. data/lib/bulk_ops/error.rb +141 -0
  45. data/lib/bulk_ops/github_access.rb +284 -0
  46. data/lib/bulk_ops/github_credential.rb +3 -0
  47. data/lib/bulk_ops/operation.rb +358 -0
  48. data/lib/bulk_ops/relationship.rb +79 -0
  49. data/lib/bulk_ops/search_builder_behavior.rb +80 -0
  50. data/lib/bulk_ops/templates/configuration.yml +5 -0
  51. data/lib/bulk_ops/templates/readme.md +1 -0
  52. data/lib/bulk_ops/update_work_job.rb +14 -0
  53. data/lib/bulk_ops/verification.rb +210 -0
  54. data/lib/bulk_ops/verification_job.rb +23 -0
  55. data/lib/bulk_ops/version.rb +3 -0
  56. data/lib/bulk_ops/work_job.rb +104 -0
  57. data/lib/bulk_ops/work_proxy.rb +466 -0
  58. data/lib/generators/bulk_ops/install/install_generator.rb +27 -0
  59. data/lib/generators/bulk_ops/install/templates/config/github.yml.example +28 -0
  60. metadata +145 -0
@@ -0,0 +1,466 @@
1
+ class BulkOps::WorkProxy < ActiveRecord::Base
2
+
3
+ require 'uri'
4
+ OPTION_FIELDS = ['visibility','work type']
5
+ RELATIONSHIP_FIELDS = ['parent','child','collection','next','order']
6
+ REFERENCE_IDENTIFIER_FIELDS = ['Reference Identifier','ref_id','Reference ID','Relationship ID','Relationship Identifier','Reference Identifier Type','Reference ID Type','Ref ID Type','relationship_identifier_type','relationship_id_type']
7
+ FILE_FIELDS = ['file','files','filename','filenames']
8
+ FILE_ACTIONS = ['add','upload','remove','delete']
9
+ SEPARATOR = ';'
10
+ self.table_name = "bulk_ops_work_proxies"
11
+ belongs_to :operation, class_name: "BulkOps::Operation", foreign_key: "operation_id"
12
+ has_many :relationships, class_name: "BulkOps::Relationship"
13
+
14
+ attr_accessor :proxy_errors
15
+
16
+ def initialize *args
17
+ super *args
18
+ place_hold if @work_id
19
+ end
20
+
21
+ def work
22
+ return @work if @work
23
+ begin
24
+ @work = ActiveFedora::Base.find(work_id)
25
+ rescue
26
+ return false
27
+ end
28
+ return @work
29
+ end
30
+
31
+ def work_type
32
+ super || operation.work_type || "Work"
33
+ end
34
+
35
+ def place_hold
36
+ # TODO make it so nobody can edit the work
37
+ end
38
+
39
+ def lift_hold
40
+ # TODO make it so people can edit the work again
41
+ end
42
+
43
+ def interpret_data raw_data
44
+ admin_set = AdminSet.where(title: "Bulk Ingest Set").first || AdminSet.find(AdminSet.find_or_create_default_admin_set_id)
45
+ metadata = {admin_set_id: admin_set.id}
46
+ metadata.merge! interpret_file_fields(raw_data)
47
+ metadata.merge! interpret_controlled_fields(raw_data)
48
+ metadata.merge! interpret_scalar_fields(raw_data)
49
+ metadata.merge! interpret_relationship_fields(raw_data )
50
+ metadata.merge! interpret_option_fields(raw_data)
51
+ metadata = setAdminSet(metadata)
52
+ return metadata
53
+ end
54
+
55
+ def proxy_errors
56
+ @proxy_errors ||= []
57
+ end
58
+
59
+ private
60
+
61
+ def is_file_field? field
62
+ operation.is_file_field? field
63
+ end
64
+
65
+ def record_exists? id
66
+ operation.record_exists? id
67
+ end
68
+
69
+ def localAuthUrl(property, value)
70
+ return value if (auth = getLocalAuth(property)).nil?
71
+ url = findAuthUrl(auth, value) || mintLocalAuthUrl(auth,value)
72
+ return url
73
+ end
74
+
75
+ def find_collection(collection)
76
+ cols = Collection.where(id: collection)
77
+ cols += Collection.where(title: collection).select{|col| col.title.first == collection}
78
+ return cols.last unless cols.empty?
79
+ return false
80
+ end
81
+
82
+ def find_or_create_collection(collection)
83
+ col = find_collection(collection)
84
+ return col if col
85
+ return false if collection.to_i > 0
86
+ col = Collection.create(title: [collection.to_s], depositor: operation.user.email, collection_type: Hyrax::CollectionType.all.first)
87
+ end
88
+
89
+ def get_remote_id(value, authority: nil, property: nil)
90
+ return false
91
+ #TODO retrieve URL for this value from the specified remote authr
92
+ end
93
+
94
+ def format_param_name(name)
95
+ name.titleize.gsub(/\s+/, "").camelcase(:lower)
96
+ end
97
+
98
+ def schema
99
+ ScoobySnacks::METADATA_SCHEMA
100
+ end
101
+
102
+ def find_field_name(field)
103
+ operation.find_field_name(field)
104
+ end
105
+
106
+ def downcase_first_letter(str)
107
+ return "" unless str
108
+ str[0].downcase + str[1..-1]
109
+ end
110
+
111
+ def split_values value_string
112
+ # Split values on all un-escaped separator character (escape character is '\')
113
+ # Then replace all escaped separator charactors with un-escaped versions
114
+ value_string.split(/(?<!\\)#{SEPARATOR}/).map{|val| val.gsub("\\#{SEPARATOR}",SEPARATOR)}
115
+ end
116
+
117
+ def interpret_controlled_fields raw_data
118
+
119
+ # The labels array tracks the contents of columns marked as labels,
120
+ # which may require special validation
121
+ labels = {}
122
+
123
+ # This hash is populated with relevant data as we loop through the fields
124
+ controlled_data = {}
125
+
126
+ raw_data.each do |field_name, value|
127
+ next if value.blank? or field_name.blank?
128
+ field_name = field_name.to_s
129
+
130
+ #If our CSV interpreter is feeding us the headers as a line, ignore it.
131
+ next if field_name == value
132
+
133
+ #check if they are using the 'field_name.authority' syntax
134
+ authority = nil
135
+ if ((split=field_name.split('.')).count == 2)
136
+ authority = split.last
137
+ field_name = split.first
138
+ end
139
+
140
+ # get the field name, if this column is a metadata field
141
+ field_name_norm = find_field_name(field_name)
142
+ field = schema.get_field(field_name_norm)
143
+
144
+ # Ignore anything that isn't a controlled field
145
+ next unless field.present? && field.controlled?
146
+
147
+ # Keep track of label fields
148
+ if field_name.downcase.ends_with?("label")
149
+ next if operation.options["ignore_labels"]
150
+ labels[field_name_norm] ||= []
151
+ labels[field_name_norm] += split_values value
152
+ next unless operation.options["import_labels"]
153
+ end
154
+
155
+ remove = field_name.downcase.starts_with?("remove") || field_name.downcase.starts_with?("delete")
156
+
157
+ # handle multiple values
158
+ split_values(value).each do |value|
159
+
160
+ # Decide of we're dealing with a label or url
161
+ # It's an ID if it's a URL and the name doesn't end in 'label'
162
+ if value =~ /^#{URI::regexp}$/ and !field_name.downcase.ends_with?("label")
163
+ id = value
164
+ label = WorkIndexer.fetch_remote_label(value)
165
+ error_message = "cannot fetch remote label for url: #{value}"
166
+ report_error( :cannot_retrieve_label , error_message, url: value, row_number: row_number) unless label
167
+ else
168
+ # It's a label, so get the id
169
+ id = get_remote_id(value, property: field_name_norm, authority: authority) || localAuthUrl(field_name_norm, value)
170
+ label = value
171
+ report_error(:cannot_retrieve_url,
172
+ message: "cannot find or create url for controlled vocabulary label: #{value}",
173
+ url: value,
174
+ row_number: row_number) unless id
175
+ end
176
+ (controlled_data[field_name_norm] ||= []) << {id: id, label: label, remove: field_name.downcase.starts_with?("remove")}
177
+ end
178
+ end
179
+
180
+ #delete any duplicates (if someone listed a url and also its label, or the same url twice)
181
+ controlled_data.each{|field_name, values| controlled_data[field_name] = values.uniq }
182
+
183
+ if operation.options["compare_labels"]
184
+ controlled_data.each do |field_name,values|
185
+ unless labels['field'].count == values.count
186
+ report_error(:mismatched_auth_terms,
187
+ message: "Different numbers of labels and ids for #{field}",
188
+ row_number: row_number)
189
+ end
190
+ end
191
+ labels['field'].each do |label|
192
+ next if controlled_data.any?{|dt| dt["label"] == label}
193
+ report_error(:mismatched_auth_terms,
194
+ message: "There are controlled vocab term labels that no provided URL resolves to, in the field #{field}.",
195
+ row_number: row_number)
196
+ end
197
+ end
198
+
199
+ # Actually add all the data
200
+ metadata = {}
201
+ leftover_data = raw_data.dup.to_hash
202
+ controlled_data.each do |property_name, data|
203
+ data.each do |datum|
204
+ atts = {"id" => datum[:id]}
205
+ atts["_delete"] = true if datum[:remove]
206
+ metadata["#{property_name}_attributes"] ||= []
207
+ metadata["#{property_name}_attributes"] << atts
208
+ leftover_data.except! property_name
209
+ end
210
+ end
211
+ #return [metadata, leftover_data]
212
+ return metadata
213
+ end
214
+
215
+ def interpret_scalar_fields raw_data
216
+ metadata = {}
217
+ raw_data.each do |field, values|
218
+ next if values.blank? or field.nil? or field == values
219
+ # get the field name, if this column is a metadata field
220
+ next unless field_name = find_field_name(field.to_s)
221
+ field = schema.get_field(field_name)
222
+ # Ignore controlled fields
223
+ next if field.controlled?
224
+ values.split(SEPARATOR).each do |value|
225
+ next if value.blank?
226
+ value = value.strip.encode('utf-8', :invalid => :replace, :undef => :replace, :replace => '_') unless value.blank?
227
+ (metadata[field_name] ||= []) << value
228
+ end
229
+ end
230
+ return metadata
231
+ end
232
+
233
+ def interpret_file_fields raw_data
234
+ # This method handles file additions and deletions from the spreadsheet
235
+ # if additional files need to be deleted because the update is set to replace
236
+ # some or all existing files, those replacement-related deletions are handled
237
+ # by the BulkOps::Operation.
238
+ #
239
+ # TODO: THIS DOES NOT YET MANAGE THE ORDER OF INGESTED FILESETS
240
+
241
+ metadata = {}
242
+ raw_data.each do |field, value|
243
+ next if value.blank? or field.blank?
244
+ field = field.to_s
245
+ #If our CSV interpreter is feeding us the headers as a line, ignore it.
246
+ next if field == value
247
+
248
+
249
+ # Check if this is a file field, and whether we are removing or adding a file
250
+ next unless (action = is_file_field?(field))
251
+
252
+ # Move on if this field is the name of another property (e.g. masterFilename)
253
+ next if find_field_name(field)
254
+
255
+ # Check if we are removing a file
256
+ if action == "remove"
257
+ get_removed_filesets(value).each { |fileset_id| delete_file_set(file_set_id) }
258
+ else
259
+ # Add a file
260
+ operation.get_file_paths(value).each do |filepath|
261
+ begin
262
+ uploaded_file = Hyrax::UploadedFile.create(file: File.open(filepath), user: operation.user)
263
+ (metadata[:uploaded_files] ||= []) << uploaded_file.id unless uploaded_file.id.nil?
264
+ rescue Exception => e
265
+ report_error(:upload_error,
266
+ message: "Error opening file: #{ filepath } -- #{e}",
267
+ file: File.join(BulkOps::Operation::INGEST_MEDIA_PATH,filename),
268
+ row_number: row_number)
269
+ end
270
+ end
271
+ end
272
+ end
273
+ return metadata
274
+ end
275
+
276
+ def interpret_option_fields raw_data
277
+ raw_data.each do |field,value|
278
+ next if value.blank? or field.blank?
279
+ field = field.to_s
280
+ next if value == field
281
+
282
+ normfield = field.downcase.parameterize.gsub(/[_\s-]/,'')
283
+ if ["visibility", "public"].include?(normfield)
284
+ update(visibility: format_visibility(value))
285
+ end
286
+ if ["worktype","model","type"].include?(normfield)
287
+ update(work_type: format_worktype(value) )
288
+ end
289
+ if ["referenceidentifier",
290
+ "referenceid",
291
+ "refid",
292
+ "referenceidentifiertype",
293
+ "referenceidtype",
294
+ "refidtype",
295
+ "relationshipidentifier",
296
+ "relationshipid",
297
+ "relationshipidentifiertype",
298
+ "relationshipidtype",
299
+ "relid",
300
+ "relidtype"].include?(normfield)
301
+ update(reference_identifier: format_reference_id(value))
302
+ end
303
+ end
304
+ return {}
305
+ end
306
+
307
+ def interpret_relationship_fields raw_data
308
+ metadata = {}
309
+ raw_data.each do |field,value|
310
+ next if value.blank? or field.blank?
311
+ field = field.to_s
312
+
313
+ next if value == field
314
+
315
+ if (split = field.split(":")).count == 2
316
+ ref_id = split.first
317
+ field = split.last.to_s
318
+ end
319
+
320
+ normfield = field.downcase.parameterize.gsub(/[_\s-]/,'')
321
+ # next unless RELATIONSHIP_FIELDS.include? normfield
322
+
323
+ # If the field specifies the object's order among siblings (usually for multiple filesets)
324
+ update(order: value.to_f) if normfield == "order"
325
+
326
+ # If the field specifies the name or ID of a collection,
327
+ # find or create the collection and update the metadata to match
328
+ if ["collection","collectiontitle","memberofcollection","collectionname", "collectionid"].include?(normfield)
329
+ col = find_or_create_collection(value)
330
+ ( metadata[:member_of_collection_ids] ||= [] ) << col.id if col
331
+ end
332
+
333
+ # All variations of field names that require BulkOps::Relationship objects
334
+ next unless ["parent","parentid","parentidentifier","parentwork","child","childid","childidentifier","childwork","next","nextfile","nextwork","nextid","nextfileidentifier","nextfileid","nextworkid"].include?(normfield)
335
+
336
+ # find which type of relationship
337
+ ["parent","child","next"].select{|type| normfield.include?(type)}.first
338
+ # correctly interpret the notation "id:a78C2d81"
339
+ if ((split = value.split(":")).count == 2)
340
+ ref_id = split.first
341
+ value = split.last
342
+ end
343
+ BulkOps::Relationship.create( { work_proxy_id: id,
344
+ identifier_type: ref_id || reference_identifier,
345
+ relationship_type: normfield,
346
+ object_identifier: value,
347
+ status: "new"} )
348
+ end
349
+ return metadata
350
+ end
351
+
352
+ def format_reference_id(value)
353
+ return value if value=="id"
354
+ # normalize the value string
355
+ value_method = value.titleize.gsub(/[-_\s]/,'').downcase_first_letter
356
+ # if this is a valid metadata property or solr parameter, return it as-is
357
+ return value_method if (schema.get_field?(value_method) || SolrDocument.new.respond_to?(value_method))
358
+ # if it is means to reference a row number, return the string "row"
359
+ case value.downcase.parameterize.gsub(/[_\s-]/,'')
360
+ when "row", "rownum","row number"
361
+ return "row"
362
+ end
363
+ end
364
+
365
+ def format_worktype(value)
366
+ # format the value like a class name
367
+ type = value.titleize.gsub(/[-_\s]/,'')
368
+ # reject it if it isn't a defined class
369
+ type = false unless Object.const_defined? type
370
+ # fall back to the work type defined by the operation, or a standard "Work"
371
+ return type ||= operation.work_type || "Work"
372
+ end
373
+
374
+ def format_visibility(value)
375
+ case value.downcase
376
+ when "public", "open", "true"
377
+ return "open"
378
+ when "campus", "ucsc", "institution"
379
+ return "ucsc"
380
+ when "restricted", "private", "closed", "false"
381
+ return "restricted"
382
+ end
383
+ end
384
+
385
+ def mintLocalAuthUrl(auth_name, value)
386
+ id = value.parameterize
387
+ auth = Qa::LocalAuthority.find_or_create_by(name: auth_name)
388
+ entry = Qa::LocalAuthorityEntry.create(local_authority: auth,
389
+ label: value,
390
+ uri: id)
391
+ return localIdToUrl(id,auth_name)
392
+ end
393
+
394
+ def findAuthUrl(auth, value)
395
+ return nil if auth.nil?
396
+ return nil unless (entries = Qa::Authorities::Local.subauthority_for(auth).search(value))
397
+ entries.each do |entry|
398
+ #require exact match
399
+ if entry["label"] == value
400
+ url = entry["url"]
401
+ url ||= entry["id"]
402
+ url = localIdToUrl(url,auth) unless url =~ URI::regexp
403
+ return url
404
+ end
405
+ end
406
+ return nil
407
+ end
408
+
409
+ def localIdToUrl(id,auth_name)
410
+ return "https://digitalcollections.library.ucsc.edu/authorities/show/local/#{auth_name}/#{id}"
411
+ end
412
+
413
+ def getLocalAuth(field_name)
414
+ field = schema.get_property(field_name)
415
+ # There is only ever one local authority per field, so just pick the first you find
416
+ if vocs = field.vocabularies
417
+ vocs.each do |voc|
418
+ return voc["subauthority"] if voc["authority"].downcase == "local"
419
+ end
420
+ end
421
+ return nil
422
+ end
423
+
424
+ def setAdminSet metadata
425
+ return metadata if metadata[:admin_set_id]
426
+ asets = AdminSet.where({title: "Bulk Ingest Set"})
427
+ asets = AdminSet.find('admin_set/default') if asets.blank?
428
+ metadata[:admin_set_id] = asets.first.id unless asets.blank?
429
+ return metadata
430
+ end
431
+
432
+ def report_error type, message, **args
433
+ puts "ERROR MESSAGE: #{message}"
434
+ update(status: "error", message: message)
435
+ args[:type]=type
436
+ (@proxy_errors ||= []) << BulkOps::Error.new(**args)
437
+ end
438
+
439
+ def filename_prefix
440
+ @filename_prefix ||= operation.filename_prefix
441
+ end
442
+
443
+ def record_exists?
444
+ operation.record_exists? work_id
445
+ end
446
+
447
+ def get_removed_filesets(filestring)
448
+ file_ids = split_values(filestring)
449
+ file_ids.select{|file_id| record_exists?(file_id)}
450
+
451
+ # This part handles filenames in addition to file ids. It doesn't work yet!
452
+ # file_ids.map do |file_id|
453
+ # If the filename is the id of an existing record, keep that
454
+ # next(file_id) if (record_exists?(file_id))
455
+ # If this is the label (i.e.filename) of an existing fileset, use that fileset id
456
+ # TODO MAKE THIS WORK!!
457
+ # next(filename) if (filename_exists?(filename))
458
+ # File.join(BulkOps::Operation::INGEST_MEDIA_PATH, filename_prefix, filename)
459
+ # end
460
+ end
461
+
462
+ def delete_file_set fileset_id
463
+ BulkOps::DeleteFileSetJob.perform_later(fileset_id, operation.user.email )
464
+ end
465
+
466
+ end