bulk_ops 0.1.14 → 0.1.15

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 27b9b67583cbf4ca808867661196e5bb8a6b95490b3a377dd85d11a91d0a41fb
4
- data.tar.gz: 508dbf4a72146f7a893851aec847bb9bb82a399765135dfde6daa6eec0a4d121
3
+ metadata.gz: fea513373c0ae0267f9302311300b8f4ba03b9fa632db168aec201c2f8359182
4
+ data.tar.gz: baa0fe9b67bfbe7d2f8283ff7949cb8ec46e268c7e15ef17c7b73b9c3a80ef19
5
5
  SHA512:
6
- metadata.gz: da715c7235ae2044b2354653b382825078a63f466e542642d995c81b6dd3bb8d8336c13ac84dd811ecefbff4e9e2422c45f1dc39e10b0ea1a4119a6736397ee2
7
- data.tar.gz: 9bd37e6481170e1da5ba4494888fb16a1cfa65cc9869edcee87353228a1eb78eefe5f32c87bb7c14f4fc2b6e3ead0ad068901273d8c100e3afbce0d5268e4486
6
+ metadata.gz: 33810a935cc44ee6de4448a12e37d4c0889b6a4c7d409011fc5dd9d0bddc18e1a53f0f18337c933ab3dd6903d4112b0a968579f20e7e204d4278220c0dbb0315
7
+ data.tar.gz: b7ff43aed578a7aba0cb59d0862af6d1ffe7f50eccce6715171063a09e1edf2670e3d23333b4e506ff3d473ff6dbed56f672ff66b2f43209e58e67950706072a
data/lib/bulk_ops.rb CHANGED
@@ -1,6 +1,39 @@
1
1
  require "bulk_ops/version"
2
2
 
3
3
  module BulkOps
4
+ OPTION_FIELDS = ['visibility','work type']
5
+ RELATIONSHIP_FIELDS = ['parent','child','collection','order']
6
+ REFERENCE_IDENTIFIER_FIELDS = ['Reference Identifier','ref_id','Reference ID','Relationship ID','Relationship Identifier','Reference Identifier Type','Reference ID Type','Ref ID Type','relationship_identifier_type','relationship_id_type']
7
+ FILE_FIELDS = ['file','files','filename','filenames']
8
+ FILE_ACTIONS = ['add','upload','remove','delete']
9
+ SEPARATOR = ';'
10
+ DEFAULT_ADMIN_SET_TITLE = "Bulk Ingest Set"
11
+ INGEST_MEDIA_PATH = "/dams_ingest"
12
+ TEMPLATE_DIR = "lib/bulk_ops/templates"
13
+ RELATIONSHIP_COLUMNS = ["parent","child","next"]
14
+ SPECIAL_COLUMNS = ["parent",
15
+ "child",
16
+ "order",
17
+ "next",
18
+ "work_type",
19
+ "collection",
20
+ "collection_title",
21
+ "collection_id",
22
+ "visibility",
23
+ "relationship_identifier_type",
24
+ "id",
25
+ "filename",
26
+ "file"]
27
+ IGNORED_COLUMNS = ["ignore","offline_notes"]
28
+ OPTION_REQUIREMENTS = {type: {required: true,
29
+ values:[:ingest,:update]},
30
+ file_method: {required: :true,
31
+ values: [:replace_some,:add_remove,:replace_all]},
32
+ notifications: {required: true}}
33
+ SPREADSHEET_FILENAME = 'metadata.csv'
34
+ OPTIONS_FILENAME = 'configuration.yml'
35
+ ROW_OFFSET = 2
36
+
4
37
  dirstring = File.join( File.dirname(__FILE__), 'bulk_ops/**/*.rb')
5
38
  Dir[dirstring].each do |file|
6
39
  begin
@@ -9,7 +42,5 @@ module BulkOps
9
42
  puts "ERROR LOADING #{File.basename(file)}: #{e}"
10
43
  end
11
44
  end
12
- # require 'bulk_ops/verification'
13
- # require 'bulk_ops/verification'
14
- # require 'bulk_ops/work_proxy'
45
+
15
46
  end
@@ -36,7 +36,7 @@ class BulkOps::CreateSpreadsheetJob < ActiveJob::Base
36
36
  next if value.is_a? DateTime
37
37
  value = (label ? WorkIndexer.fetch_remote_label(value.id) : value.id) unless value.is_a? String
38
38
  value.gsub("\"","\"\"")
39
- end.join(BulkOps::WorkProxy::SEPARATOR).prepend('"').concat('"')
39
+ end.join(BulkOps::SEPARATOR).prepend('"').concat('"')
40
40
  end.join(',')
41
41
  end
42
42
 
@@ -5,10 +5,6 @@ require 'base64'
5
5
 
6
6
  class BulkOps::GithubAccess
7
7
 
8
- ROW_OFFSET = 2
9
- SPREADSHEET_FILENAME = 'metadata.csv'
10
- OPTIONS_FILENAME = 'configuration.yml'
11
-
12
8
  attr_accessor :name
13
9
 
14
10
  def self.auth_url user
@@ -142,11 +138,11 @@ class BulkOps::GithubAccess
142
138
  def add_new_spreadsheet file, message=false
143
139
  if file.is_a? Tempfile
144
140
  file.close
145
- add_file file.path, SPREADSHEET_FILENAME, message: message
141
+ add_file file.path, BulkOps::SPREADSHEET_FILENAME, message: message
146
142
  elsif file.is_a?(String) && File.file?(file)
147
- add_file file, SPREADSHEET_FILENAME, message: message
143
+ add_file file, BulkOps::SPREADSHEET_FILENAME, message: message
148
144
  elsif file.is_a? String
149
- add_contents(spreadsheet_path, SPREADSHEET_FILENAME, message: message)
145
+ add_contents(spreadsheet_path, BulkOps::SPREADSHEET_FILENAME, message: message)
150
146
  end
151
147
  end
152
148
 
@@ -218,12 +214,12 @@ class BulkOps::GithubAccess
218
214
 
219
215
  def get_metadata_row row_number
220
216
  @current_metadata ||= load_metadata
221
- @current_metadata[row_number - ROW_OFFSET]
217
+ @current_metadata[row_number - BulkOps::ROW_OFFSET]
222
218
  end
223
219
 
224
220
  def get_past_metadata_row commit_sha, row_number
225
221
  past_metadata = Base64.decode64( client.contents(repo, path: filename, ref: commit_sha) )
226
- past_metadata[row_number - ROW_OFFSET]
222
+ past_metadata[row_number - BulkOps::ROW_OFFSET]
227
223
  end
228
224
 
229
225
  def get_file filename
@@ -244,13 +240,13 @@ class BulkOps::GithubAccess
244
240
  end
245
241
 
246
242
  def spreadsheet_path
247
- "#{name}/#{SPREADSHEET_FILENAME}"
243
+ "#{name}/#{BulkOps::SPREADSHEET_FILENAME}"
248
244
  end
249
245
 
250
246
  private
251
247
 
252
248
  def options_path
253
- "#{name}/#{OPTIONS_FILENAME}"
249
+ "#{name}/#{BulkOps::OPTIONS_FILENAME}"
254
250
  end
255
251
 
256
252
  def current_master_commit_sha
@@ -7,33 +7,10 @@ module BulkOps
7
7
 
8
8
  include BulkOps::Verification
9
9
 
10
- attr_accessor :work_type, :visibility, :reference_identifier
10
+ attr_accessor :work_type, :visibility, :reference_identifier, :metadata
11
11
 
12
12
  delegate :can_merge?, :merge_pull_request, to: :git
13
13
 
14
- INGEST_MEDIA_PATH = "/dams_ingest"
15
- TEMPLATE_DIR = "lib/bulk_ops/templates"
16
- RELATIONSHIP_COLUMNS = ["parent","child","next"]
17
- SPECIAL_COLUMNS = ["parent",
18
- "child",
19
- "order",
20
- "next",
21
- "work_type",
22
- "collection",
23
- "collection_title",
24
- "collection_id",
25
- "visibility",
26
- "relationship_identifier_type",
27
- "id",
28
- "filename",
29
- "file"]
30
- IGNORED_COLUMNS = ["ignore","offline_notes"]
31
- OPTION_REQUIREMENTS = {type: {required: true,
32
- values:[:ingest,:update]},
33
- file_method: {required: :true,
34
- values: [:replace_some,:add_remove,:replace_all]},
35
- notifications: {required: true}}
36
-
37
14
  def self.unique_name name, user
38
15
  while BulkOps::Operation.find_by(name: name) || BulkOps::GithubAccess.list_branch_names(user).include?(name) do
39
16
  if ['-','_'].include?(name[-2]) && name[-1].to_i > 0
@@ -119,7 +96,7 @@ module BulkOps
119
96
  @metadata.each_with_index do |values,row_number|
120
97
  proxy = work_proxies.find_by(row_number: row_number)
121
98
  proxy.update(message: "interpreted at #{DateTime.now.strftime("%d/%m/%Y %H:%M")} " + proxy.message)
122
- data = proxy.interpret_data values
99
+ data = BulkOps::Parser.new(proxy, @metadata).interpret_data(raw_row: values)
123
100
  next unless proxy.proxy_errors.blank?
124
101
  BulkOps::CreateWorkJob.perform_later(proxy.work_type || "Work",
125
102
  user.email,
@@ -202,7 +179,7 @@ module BulkOps
202
179
 
203
180
  #loop through the work proxies to create a job for each work
204
181
  work_proxies.each do |proxy|
205
- data = proxy.interpret_data final_spreadsheet[proxy.row_number]
182
+ data = BulkOps::Parser.new(proxy,final_spreadsheet).interpret_data(raw_row: final_spreadsheet[proxy.row_number])
206
183
  BulkOps::UpdateWorkJob.perform_later(proxy.work_type || "",
207
184
  user.email,
208
185
  data,
@@ -238,13 +215,13 @@ module BulkOps
238
215
  bulk_ops_dir = Gem::Specification.find_by_name("bulk_ops").gem_dir
239
216
 
240
217
  #copy template files
241
- Dir["#{bulk_ops_dir}/#{TEMPLATE_DIR}/*"].each do |file|
218
+ Dir["#{bulk_ops_dir}/#{BulkOps::TEMPLATE_DIR}/*"].each do |file|
242
219
  git.add_file file
243
220
  end
244
221
 
245
222
  #update configuration options
246
223
  unless options.blank?
247
- full_options = YAML.load_file(File.join(bulk_ops_dir,TEMPLATE_DIR, BulkOps::GithubAccess::OPTIONS_FILENAME))
224
+ full_options = YAML.load_file(File.join(bulk_ops_dir,BulkOps::TEMPLATE_DIR, BulkOps::OPTIONS_FILENAME))
248
225
 
249
226
  options.each { |option, value| full_options[option] = value }
250
227
 
@@ -278,6 +255,10 @@ module BulkOps
278
255
  git.update_options(options, message: message)
279
256
  end
280
257
 
258
+ def metadata
259
+ @metadata ||= git.load_metadata
260
+ end
261
+
281
262
  def options
282
263
  return {} if name.nil?
283
264
  return @options if @options
@@ -332,7 +313,7 @@ module BulkOps
332
313
  end
333
314
 
334
315
  def ignored_fields
335
- (options['ignored headers'] || []) + IGNORED_COLUMNS
316
+ (options['ignored headers'] || []) + BulkOps::IGNORED_COLUMNS
336
317
  end
337
318
 
338
319
 
@@ -0,0 +1,485 @@
1
+ class BulkOps::Parser
2
+ require 'uri'
3
+
4
+ attr_accessor :proxy, :raw_data, :raw_row
5
+
6
+ delegate :relationships, :operation, :row_number, :work_id, :visibility, :work_type, :reference_identifier, :order, to: :proxy
7
+
8
+ def initialize prx, metadata_sheet=nil
9
+ @proxy = prx
10
+ @raw_data = (metadata_sheet || proxy.operation.metadata)
11
+ @raw_row = @raw_data[@proxy.row_number].dup
12
+ @metadata = {}
13
+ @parsing_errors = []
14
+ end
15
+
16
+ def interpret_data raw_row: nil, raw_data: nil, proxy: nil
17
+ @raw_row = raw_row if raw_row.present?
18
+ @proxy = proxy if proxy.present?
19
+ @raw_data = raw_data if raw_data.present?
20
+ setAdminSet
21
+ setMetadataInheritance
22
+ interpret_option_fields
23
+ interpret_relationship_fields
24
+ disambiguate_columns
25
+ interpret_file_fields
26
+ interpret_controlled_fields
27
+ interpret_scalar_fields
28
+ @proxy.update(status: "ERROR", message: "error parsing spreadsheet line") if @parsing_errors.present?
29
+ @proxy.proxy_errors = (@proxy.proxy_errors || []) + @parsing_errors
30
+ return @metadata
31
+ end
32
+
33
+ def disambiguate_columns
34
+ #do nothing unless there are columns with the same header
35
+ return unless (@raw_row.respond_to?(:headers) && (@raw_row.headers.uniq.length < @raw_row.length) )
36
+ row = {}
37
+ (0...@raw_row.length).each do |i|
38
+ header = @raw_row.headers[i]
39
+ value = @raw_row[i]
40
+ # separate values in identical columns using the separator
41
+ row[header] = (Array(row[header]) << value).join(BulkOps::SEPARATOR)
42
+ end
43
+ #return a hash with identical columns merged
44
+ return row
45
+ end
46
+
47
+ def interpret_controlled_fields
48
+
49
+ # The labels array tracks the contents of columns marked as labels,
50
+ # which may require special validation
51
+ labels = {}
52
+
53
+ # This hash is populated with relevant data as we loop through the fields
54
+ controlled_data = {}
55
+
56
+ row = @raw_row.dup
57
+ @raw_row.each do |field_name, value|
58
+ next if value.blank? or field_name.blank?
59
+ field_name = field_name.to_s
60
+
61
+ #If our CSV interpreter is feeding us the headers as a line, ignore it.
62
+ next if field_name == value
63
+
64
+ #check if they are using the 'field_name.authority' syntax
65
+ authority = nil
66
+ if ((split=field_name.split('.')).count == 2)
67
+ authority = split.last
68
+ field_name = split.first
69
+ end
70
+
71
+ # get the field name, if this column is a metadata field
72
+ field_name_norm = find_field_name(field_name)
73
+ field = schema.get_field(field_name_norm)
74
+
75
+ # Ignore anything that isn't a controlled field
76
+ next unless field.present? && field.controlled?
77
+
78
+ # Keep track of label fields
79
+ if field_name.downcase.ends_with?("label")
80
+ next if operation.options["ignore_labels"]
81
+ labels[field_name_norm] ||= []
82
+ labels[field_name_norm] += split_values value
83
+ next unless operation.options["import_labels"]
84
+ end
85
+
86
+ remove = field_name.downcase.starts_with?("remove") || field_name.downcase.starts_with?("delete")
87
+
88
+ # handle multiple values
89
+ value_array = split_values(value)
90
+ controlled_data[field_name_norm] ||= [] unless value_array.blank?
91
+ value_array.each do |value|
92
+ # Decide of we're dealing with a label or url
93
+ # It's an ID if it's a URL and the name doesn't end in 'label'
94
+ value.strip!
95
+ if value =~ /^#{URI::regexp}$/ and !field_name.downcase.ends_with?("label")
96
+ value_id = value
97
+ # label = WorkIndexer.fetch_remote_label(value)
98
+ # error_message = "cannot fetch remote label for url: #{value}"
99
+ # report_error( :cannot_retrieve_label , error_message, url: value, row_number: row_number) unless label
100
+ else
101
+ # It's a label, so unescape it and get the id
102
+ value = unescape_csv(value)
103
+ value_id = get_remote_id(value, property: field_name_norm, authority: authority) || localAuthUrl(field_name_norm, value)
104
+ # label = value
105
+ report_error(:cannot_retrieve_url,
106
+ message: "cannot find or create url for controlled vocabulary label: #{value}",
107
+ url: value,
108
+ row_number: row_number) unless value_id
109
+ end
110
+ controlled_data[field_name_norm] << {id: value_id, remove: field_name.downcase.starts_with?("remove")}
111
+ row.delete(field_name)
112
+ end
113
+ end
114
+ @raw_row = row
115
+
116
+ # Actually add all the data
117
+ controlled_data.each do |property_name, data|
118
+ @metadata["#{property_name}_attributes"] ||= [] unless data.blank?
119
+ data.uniq.each do |datum|
120
+ atts = {"id" => datum[:id]}
121
+ atts["_delete"] = true if datum[:remove]
122
+ @metadata["#{property_name}_attributes"] << atts
123
+ end
124
+ end
125
+ end
126
+
127
+ def interpret_scalar_fields
128
+ row = @raw_row.dup
129
+ @raw_row.each do |field, values|
130
+ next if values.blank? or field.nil? or field == values
131
+ # get the field name, if this column is a metadata field
132
+ next unless field_name = find_field_name(field.to_s)
133
+ field = schema.get_field(field_name)
134
+ # Ignore controlled fields
135
+ next if field.controlled?
136
+ split_values(values).each do |value|
137
+ next if value.blank?
138
+ value = value.strip.encode('utf-8', :invalid => :replace, :undef => :replace, :replace => '_') unless value.blank?
139
+ value = unescape_csv(value)
140
+ (@metadata[field_name] ||= []) << value
141
+ row.delete(field)
142
+ end
143
+ end
144
+ @raw_row = row
145
+ end
146
+
147
+ def interpret_file_fields
148
+ # This method handles file additions and deletions from the spreadsheet
149
+ # if additional files need to be deleted because the update is set to replace
150
+ # some or all existing files, those replacement-related deletions are handled
151
+ # by the BulkOps::Operation.
152
+ #
153
+ # TODO: THIS DOES NOT YET MANAGE THE ORDER OF INGESTED FILESETS
154
+
155
+ row = @raw_row.dup
156
+ @raw_row.each do |field, value|
157
+ next if value.blank? or field.blank?
158
+ field = field.to_s
159
+ #If our CSV interpreter is feeding us the headers as a line, ignore it.
160
+ next if field == value
161
+
162
+
163
+ # Check if this is a file field, and whether we are removing or adding a file
164
+ next unless (action = is_file_field?(field))
165
+
166
+ # Move on if this field is the name of another property (e.g. masterFilename)
167
+ next if find_field_name(field)
168
+
169
+ # Check if we are removing a file
170
+ if action == "remove"
171
+ get_removed_filesets(value).each { |fileset_id| delete_file_set(file_set_id) }
172
+ else
173
+ # Add a file
174
+ operation.get_file_paths(value).each do |filepath|
175
+ begin
176
+ uploaded_file = Hyrax::UploadedFile.create(file: File.open(filepath), user: operation.user)
177
+ (@metadata[:uploaded_files] ||= []) << uploaded_file.id unless uploaded_file.id.nil?
178
+ row.delete(field)
179
+ rescue Exception => e
180
+ report_error(:upload_error,
181
+ message: "Error opening file: #{ filepath } -- #{e}",
182
+ file: File.join(BulkOps::INGEST_MEDIA_PATH,filename),
183
+ row_number: row_number)
184
+ end
185
+ end
186
+ end
187
+ end
188
+ @raw_row = row
189
+ end
190
+
191
+ def interpret_option_fields
192
+ row = @raw_row.dup
193
+ @raw_row.each do |field,value|
194
+ next if value.blank? or field.blank?
195
+ field = field.to_s
196
+ next if value == field
197
+
198
+ normfield = field.downcase.parameterize.gsub(/[_\s-]/,'')
199
+ if ["visibility", "public"].include?(normfield)
200
+ @proxy.update(visibility: format_visibility(value))
201
+ row.delete(field)
202
+ end
203
+ if ["worktype","model","type"].include?(normfield)
204
+ @proxy.update(work_type: format_worktype(value) )
205
+ row.delete(field)
206
+ end
207
+ if ["referenceidentifier",
208
+ "referenceid",
209
+ "refid",
210
+ "referenceidentifiertype",
211
+ "referenceidtype",
212
+ "refidtype",
213
+ "relationshipidentifier",
214
+ "relationshipid",
215
+ "relationshipidentifiertype",
216
+ "relationshipidtype",
217
+ "relid",
218
+ "relidtype"].include?(normfield)
219
+ @proxy.update(reference_identifier: format_reference_id(value))
220
+ row.delete(field)
221
+ end
222
+ end
223
+ @raw_row = row
224
+ end
225
+
226
+ def interpret_relationship_fields
227
+ row = @raw_row.dup
228
+ @raw_row.each do |field,value|
229
+ next if value.blank? or field.blank?
230
+ field = field.to_s
231
+ value = unescape_csv(value)
232
+ identifer_type = reference_identifier
233
+
234
+ next if value == field
235
+
236
+ # Correctly interpret the notation "parent:id", "parent id" etc in a column header
237
+ if (split = field.split(/[:_\-\s]/)).count == 2
238
+ identifier_type = split.last
239
+ relationship_type = split.first.to_s
240
+ else
241
+ relationship_type = field
242
+ end
243
+
244
+ relationship_type = normalize_relationship_field_name(relationship_type)
245
+ case relationship_type
246
+ when "order"
247
+ # If the field specifies the object's order among siblings
248
+ @proxy.update(order: value.to_f)
249
+ row.delete(field)
250
+ next
251
+ when "collection"
252
+ # If the field specifies the name or ID of a collection,
253
+ # find or create the collection and update the metadata to match
254
+ col = find_or_create_collection(value)
255
+ ( @metadata[:member_of_collection_ids] ||= [] ) << col.id if col
256
+ row.delete field
257
+ next
258
+ when "parent", "child"
259
+
260
+ # correctly interpret the notation "id:a78C2d81"
261
+ identifier_type, object_identifier = interpret_relationship_value(identifier_type, value)
262
+
263
+ relationship_parameters = { work_proxy_id: @proxy.id,
264
+ identifier_type: identifier_type,
265
+ relationship_type: relationship_type,
266
+ object_identifier: object_identifier,
267
+ status: "new"}
268
+
269
+ #add previous sibling link if necessary
270
+ previous_value = @raw_data[row_number-1][field]
271
+ # Check if this is a parent relationship, and the previous row also has one
272
+ if previous_value.present? && (relationship_type == "parent")
273
+ # Check if the previous row has the same parent as this row
274
+ if object_identifier == interpret_relationship_value(identifier_type, previous_value, field).last
275
+ # If so, set the previous sibling parameter on the relationshp
276
+ # to the id for the proxy associated with the previous row
277
+ relationship_parameters[:previous_sibling] = operation.work_proxies.find_by(row_number: row_number-1).id
278
+ end
279
+ end
280
+ BulkOps::Relationship.create(relationship_parameters)
281
+ row.delete field
282
+ end
283
+ end
284
+ @raw_row = row
285
+ end
286
+
287
+ def normalize_relationship_field_name field
288
+ normfield = field.downcase.parameterize.gsub(/[_\s-]/,'')
289
+ BulkOps::RELATIONSHIP_FIELDS.find{|rel_field| normfield == rel_field }
290
+ end
291
+
292
+ def find_previous_parent field="parent"
293
+ #Return the row number of the most recent preceding row that does
294
+ # not itself have a parent defined
295
+ i = 1;
296
+ while (prev_row = raw_data[row_number - i])
297
+ return (row_number - i) if prev_row[field].blank?
298
+ i += 1
299
+ end
300
+ end
301
+
302
+ def interpret_relationship_value id_type, value, field="parent"
303
+ #Handle "id:20kj4259" syntax if it hasn't already been handled
304
+ if (split = value.to_s.split(":")).count == 2
305
+ id_type = split.first
306
+ value = split.last
307
+ end
308
+ #Handle special shorthand syntax for refering to relative row numbers
309
+ if id_type == "row"
310
+ if value.to_i < 0
311
+ # if given a negative integer, count backwards from the current row (remember that value.to_i is negative)
312
+ return [id_type,row_number + value.to_i]
313
+ elsif value.to_s.downcase.include?("prev")
314
+ # if given any variation of the word "previous", get the first preceding row with no parent of its own
315
+ return [id_type,find_previous_parent(field)]
316
+ end
317
+ end
318
+ return [id_type,value]
319
+ end
320
+
321
+ def unescape_csv(value)
322
+ value.gsub(/\\(['";,])/,'\1')
323
+ end
324
+
325
+
326
+ def format_worktype(value)
327
+ # format the value like a class name
328
+ type = value.titleize.gsub(/[-_\s]/,'')
329
+ # reject it if it isn't a defined class
330
+ type = false unless Object.const_defined? type
331
+ # fall back to the work type defined by the operation, or a standard "Work"
332
+ return type ||= work_type || operation.work_type || "Work"
333
+ end
334
+
335
+ def format_visibility(value)
336
+ case value.downcase
337
+ when "public", "open", "true"
338
+ return "open"
339
+ when "campus", "ucsc", "institution"
340
+ return "ucsc"
341
+ when "restricted", "private", "closed", "false"
342
+ return "restricted"
343
+ end
344
+ end
345
+
346
+
347
+ def mintLocalAuthUrl(auth_name, value)
348
+ value.strip!
349
+ id = value.parameterize
350
+ auth = Qa::LocalAuthority.find_or_create_by(name: auth_name)
351
+ entry = Qa::LocalAuthorityEntry.create(local_authority: auth,
352
+ label: value,
353
+ uri: id)
354
+ return localIdToUrl(id,auth_name)
355
+ end
356
+
357
+ def findAuthUrl(auth, value)
358
+ value.strip!
359
+ return nil if auth.nil?
360
+ return nil unless (entries = Qa::Authorities::Local.subauthority_for(auth).search(value))
361
+ entries.each do |entry|
362
+ #require exact match
363
+ next unless entry["label"].force_encoding('UTF-8') == value.force_encoding('UTF-8')
364
+ url = entry["url"] || entry["id"]
365
+ # url = localIdToUrl(url,auth) unless url =~ URI::regexp
366
+ return url
367
+ end
368
+ return nil
369
+ end
370
+
371
+ def localIdToUrl(id,auth_name)
372
+ root_urls = {production: "https://digitalcollections.library.ucsc.edu",
373
+ staging: "http://digitalcollections-staging.library.ucsc.edu",
374
+ development: "http://#{Socket.gethostname}",
375
+ test: "http://#{Socket.gethostname}"}
376
+ return "#{root_urls[Rails.env.to_sym]}/authorities/show/local/#{auth_name}/#{id}"
377
+ end
378
+
379
+ def getLocalAuth(field_name)
380
+ field = schema.get_property(field_name)
381
+ # There is only ever one local authority per field, so just pick the first you find
382
+ if vocs = field.vocabularies
383
+ vocs.each do |voc|
384
+ return voc["subauthority"] if voc["authority"].downcase == "local"
385
+ end
386
+ end
387
+ return nil
388
+ end
389
+
390
+ def setAdminSet
391
+ return if @metadata[:admin_set_id]
392
+ asets = AdminSet.where({title: "Bulk Ingest Set"})
393
+ asets = AdminSet.find('admin_set/default') if asets.blank?
394
+ @metadata[:admin_set_id] = Array(asets).first.id unless asets.blank?
395
+ end
396
+
397
+ def setMetadataInheritance
398
+ return if @metadata[:metadataInheritance].present?
399
+ @metadata[:metadataInheritance] = operation.options["metadataInheritance"] unless operation.options["metadataInheritance"].blank?
400
+ end
401
+
402
+ def report_error type, message, **args
403
+ puts "ERROR MESSAGE: #{message}"
404
+ @proxy.update(status: "error", message: message)
405
+ args[:type]=type
406
+ (@parsing_errors ||= []) << BulkOps::Error.new(**args)
407
+ end
408
+
409
+ def get_removed_filesets(filestring)
410
+ file_ids = split_values(filestring)
411
+ file_ids.select{|file_id| record_exists?(file_id)}
412
+
413
+ # This part handles filenames in addition to file ids. It doesn't work yet!
414
+ # file_ids.map do |file_id|
415
+ # If the filename is the id of an existing record, keep that
416
+ # next(file_id) if (record_exists?(file_id))
417
+ # If this is the label (i.e.filename) of an existing fileset, use that fileset id
418
+ # TODO MAKE THIS WORK!!
419
+ # next(filename) if (filename_exists?(filename))
420
+ # File.join(BulkOps::INGEST_MEDIA_PATH, filename_prefix, filename)
421
+ # end
422
+ end
423
+
424
+ def delete_file_set fileset_id
425
+ BulkOps::DeleteFileSetJob.perform_later(fileset_id, operation.user.email )
426
+ end
427
+
428
+
429
+ def is_file_field? field
430
+ operation.is_file_field? field
431
+ end
432
+
433
+ def record_exists? id
434
+ operation.record_exists? id
435
+ end
436
+
437
+ def localAuthUrl(property, value)
438
+ return value if (auth = getLocalAuth(property)).nil?
439
+ url = findAuthUrl(auth, value) || mintLocalAuthUrl(auth,value)
440
+ return url
441
+ end
442
+
443
+ def find_collection(collection)
444
+ cols = Collection.where(id: collection)
445
+ cols += Collection.where(title: collection).select{|col| col.title.first == collection}
446
+ return cols.last unless cols.empty?
447
+ return false
448
+ end
449
+
450
+ def find_or_create_collection(collection)
451
+ col = find_collection(collection)
452
+ return col if col
453
+ return false if collection.to_i > 0
454
+ col = Collection.create(title: [collection.to_s], depositor: operation.user.email, collection_type: Hyrax::CollectionType.find_by(title:"User Collection"))
455
+ end
456
+
457
+ def get_remote_id(value, authority: nil, property: nil)
458
+ return false
459
+ #TODO retrieve URL for this value from the specified remote authr
460
+ end
461
+
462
+ def format_param_name(name)
463
+ name.titleize.gsub(/\s+/, "").camelcase(:lower)
464
+ end
465
+
466
+ def schema
467
+ ScoobySnacks::METADATA_SCHEMA
468
+ end
469
+
470
+ def find_field_name(field)
471
+ operation.find_field_name(field)
472
+ end
473
+
474
+ def downcase_first_letter(str)
475
+ return "" unless str
476
+ str[0].downcase + str[1..-1]
477
+ end
478
+
479
+ def split_values value_string
480
+ # Split values on all un-escaped separator character (escape character is '\')
481
+ # Then replace all escaped separator charactors with un-escaped versions
482
+ value_string.split(/(?<!\\)#{BulkOps::SEPARATOR}/).map{|val| val.gsub("\\#{BulkOps::SEPARATOR}",BulkOps::SEPARATOR).strip}
483
+ end
484
+
485
+ end
@@ -35,7 +35,7 @@ module BulkOps
35
35
  return false if fieldname.blank?
36
36
  return false if schema.get_field(fieldname)
37
37
  field_parts = fieldname.underscore.humanize.downcase.gsub(/[-_]/,' ').split(" ")
38
- return false unless field_parts.any?{ |field_type| BulkOps::WorkProxy::FILE_FIELDS.include?(field_type) }
38
+ return false unless field_parts.any?{ |field_type| BulkOps::FILE_FIELDS.include?(field_type) }
39
39
  return "remove" if field_parts.any?{ |field_type| ['remove','delete'].include?(field_type) }
40
40
  return "add"
41
41
  end
@@ -46,7 +46,7 @@ module BulkOps
46
46
  name.gsub!(/[_\s-]?[lL]abel$/,'')
47
47
  name.gsub!(/^[rR]emove[_\s-]?/,'')
48
48
  name.gsub!(/^[dD]elete[_\s-]?/,'')
49
- possible_fields = Work.attribute_names + schema.all_field_names
49
+ possible_fields = (Work.attribute_names + schema.all_field_names).uniq
50
50
  matching_fields = possible_fields.select{|pfield| pfield.gsub(/[_\s-]/,'').parameterize == name.gsub(/[_\s-]/,'').parameterize }
51
51
  return false if matching_fields.blank?
52
52
  # raise Exception "Ambiguous metadata fields!" if matching_fields.uniq.count > 1
@@ -55,8 +55,8 @@ module BulkOps
55
55
 
56
56
  def get_file_paths(filestring)
57
57
  return [] if filestring.blank?
58
- filenames = filestring.split(BulkOps::WorkProxy::SEPARATOR)
59
- filenames.map { |filename| File.join(BulkOps::Operation::INGEST_MEDIA_PATH, options['file_prefix'] || "", filename) }
58
+ filenames = filestring.split(BulkOps::SEPARATOR)
59
+ filenames.map { |filename| File.join(BulkOps::INGEST_MEDIA_PATH, options['file_prefix'] || "", filename) }
60
60
  end
61
61
 
62
62
  def record_exists? id
@@ -85,7 +85,7 @@ module BulkOps
85
85
  end
86
86
 
87
87
  def verify_configuration
88
- BulkOps::Operation::OPTION_REQUIREMENTS.each do |option_name, option_info|
88
+ BulkOps::OPTION_REQUIREMENTS.each do |option_name, option_info|
89
89
  # Make sure it's present if required
90
90
  if (option_info["required"].to_s == "true") || (option_info["required"].to_s == type)
91
91
  if options[option_name].blank?
@@ -120,7 +120,7 @@ module BulkOps
120
120
  # Ignore everything marked as a label
121
121
  next if column_name_redux.ends_with? "label"
122
122
  # Ignore any column names with special meaning in hyrax
123
- next if BulkOps::Operation::SPECIAL_COLUMNS.any?{|col| col.downcase.parameterize.gsub(/[_\s-]/,"") == column_name_redux }
123
+ next if BulkOps::SPECIAL_COLUMNS.any?{|col| col.downcase.parameterize.gsub(/[_\s-]/,"") == column_name_redux }
124
124
  # Ignore any columns speficied to be ignored in the configuration
125
125
  ignored = options["ignored headers"] || []
126
126
  next if ignored.any?{|col| col.downcase.parameterize.gsub(/[_\s-]/,"") == column_name_redux }
@@ -131,7 +131,7 @@ module BulkOps
131
131
  end
132
132
 
133
133
  def verify_remote_urls
134
- row_offset = BulkOps::GithubAccess::ROW_OFFSET.present? ? BulkOps::GithubAccess::ROW_OFFSET : 2
134
+ row_offset = BulkOps::ROW_OFFSET.present? ? BulkOps::ROW_OFFSET : 2
135
135
  get_spreadsheet.each_with_index do |row, row_num|
136
136
  update(message: "verifying controlled vocab urls (row number #{row_num})")
137
137
  next if row_num.nil?
@@ -173,7 +173,7 @@ module BulkOps
173
173
  def get_ref_id row
174
174
  row.each do |field,value|
175
175
  next if field.blank? or value.blank? or field === value
176
- next unless BulkOps::WorkProxy::REFERENCE_IDENTIFIER_FIELDS.any?{ |ref_field| normalize_field(ref_field) == normalize_field(field) }
176
+ next unless BulkOps::REFERENCE_IDENTIFIER_FIELDS.any?{ |ref_field| normalize_field(ref_field) == normalize_field(field) }
177
177
  return value
178
178
  end
179
179
  # No reference identifier specified in the row. Use the default for the operation.
@@ -190,7 +190,7 @@ module BulkOps
190
190
  # This is sketchy. Redo it.
191
191
  (metadata = get_spreadsheet).each do |row,row_num|
192
192
  ref_id = get_ref_id(row)
193
- BulkOps::Operation::RELATIONSHIP_COLUMNS.each do |relationship|
193
+ BulkOps::RELATIONSHIP_COLUMNS.each do |relationship|
194
194
  next unless (obj_id = row[relationship])
195
195
  if (split = obj_id.split(':')).present? && split.count == 2
196
196
  ref_id = split[0].downcase
@@ -1,3 +1,3 @@
1
1
  module BulkOps
2
- VERSION = "0.1.14"
2
+ VERSION = "0.1.15"
3
3
  end
@@ -1,12 +1,5 @@
1
1
  class BulkOps::WorkProxy < ActiveRecord::Base
2
2
 
3
- require 'uri'
4
- OPTION_FIELDS = ['visibility','work type']
5
- RELATIONSHIP_FIELDS = ['parent','child','collection','order']
6
- REFERENCE_IDENTIFIER_FIELDS = ['Reference Identifier','ref_id','Reference ID','Relationship ID','Relationship Identifier','Reference Identifier Type','Reference ID Type','Ref ID Type','relationship_identifier_type','relationship_id_type']
7
- FILE_FIELDS = ['file','files','filename','filenames']
8
- FILE_ACTIONS = ['add','upload','remove','delete']
9
- SEPARATOR = ';'
10
3
  self.table_name = "bulk_ops_work_proxies"
11
4
  belongs_to :operation, class_name: "BulkOps::Operation", foreign_key: "operation_id"
12
5
  has_many :relationships, class_name: "BulkOps::Relationship"
@@ -40,462 +33,10 @@ class BulkOps::WorkProxy < ActiveRecord::Base
40
33
  # TODO make it so people can edit the work again
41
34
  end
42
35
 
43
- def interpret_data raw_data
44
- admin_set = AdminSet.where(title: "Bulk Ingest Set").first || AdminSet.find(AdminSet.find_or_create_default_admin_set_id)
45
- metadata = {admin_set_id: admin_set.id}
46
- metadata.merge! interpret_file_fields(raw_data)
47
- metadata.merge! interpret_controlled_fields(raw_data)
48
- metadata.merge! interpret_scalar_fields(raw_data)
49
- metadata.merge! interpret_relationship_fields(raw_data)
50
- metadata.merge! interpret_option_fields(raw_data)
51
- metadata = setAdminSet(metadata)
52
- metadata = setMetadataInheritance(metadata)
53
- return metadata
54
- end
55
36
 
56
37
  def proxy_errors
57
38
  @proxy_errors ||= []
58
39
  end
59
40
 
60
- private
61
-
62
- def is_file_field? field
63
- operation.is_file_field? field
64
- end
65
-
66
- def record_exists? id
67
- operation.record_exists? id
68
- end
69
-
70
- def localAuthUrl(property, value)
71
- return value if (auth = getLocalAuth(property)).nil?
72
- url = findAuthUrl(auth, value) || mintLocalAuthUrl(auth,value)
73
- return url
74
- end
75
-
76
- def find_collection(collection)
77
- cols = Collection.where(id: collection)
78
- cols += Collection.where(title: collection).select{|col| col.title.first == collection}
79
- return cols.last unless cols.empty?
80
- return false
81
- end
82
-
83
- def find_or_create_collection(collection)
84
- col = find_collection(collection)
85
- return col if col
86
- return false if collection.to_i > 0
87
- col = Collection.create(title: [collection.to_s], depositor: operation.user.email, collection_type: Hyrax::CollectionType.find_by(title:"User Collection"))
88
- end
89
-
90
- def get_remote_id(value, authority: nil, property: nil)
91
- return false
92
- #TODO retrieve URL for this value from the specified remote authr
93
- end
94
-
95
- def format_param_name(name)
96
- name.titleize.gsub(/\s+/, "").camelcase(:lower)
97
- end
98
-
99
- def schema
100
- ScoobySnacks::METADATA_SCHEMA
101
- end
102
-
103
- def find_field_name(field)
104
- operation.find_field_name(field)
105
- end
106
-
107
- def downcase_first_letter(str)
108
- return "" unless str
109
- str[0].downcase + str[1..-1]
110
- end
111
-
112
- def split_values value_string
113
- # Split values on all un-escaped separator character (escape character is '\')
114
- # Then replace all escaped separator charactors with un-escaped versions
115
- value_string.split(/(?<!\\)#{SEPARATOR}/).map{|val| val.gsub("\\#{SEPARATOR}",SEPARATOR).strip}
116
- end
117
-
118
- def interpret_controlled_fields raw_data
119
-
120
- # The labels array tracks the contents of columns marked as labels,
121
- # which may require special validation
122
- labels = {}
123
-
124
- # This hash is populated with relevant data as we loop through the fields
125
- controlled_data = {}
126
-
127
- raw_data.each do |field_name, value|
128
- next if value.blank? or field_name.blank?
129
- field_name = field_name.to_s
130
-
131
- #If our CSV interpreter is feeding us the headers as a line, ignore it.
132
- next if field_name == value
133
-
134
- #check if they are using the 'field_name.authority' syntax
135
- authority = nil
136
- if ((split=field_name.split('.')).count == 2)
137
- authority = split.last
138
- field_name = split.first
139
- end
140
-
141
- # get the field name, if this column is a metadata field
142
- field_name_norm = find_field_name(field_name)
143
- field = schema.get_field(field_name_norm)
144
-
145
- # Ignore anything that isn't a controlled field
146
- next unless field.present? && field.controlled?
147
-
148
- # Keep track of label fields
149
- if field_name.downcase.ends_with?("label")
150
- next if operation.options["ignore_labels"]
151
- labels[field_name_norm] ||= []
152
- labels[field_name_norm] += split_values value
153
- next unless operation.options["import_labels"]
154
- end
155
-
156
- remove = field_name.downcase.starts_with?("remove") || field_name.downcase.starts_with?("delete")
157
-
158
- # handle multiple values
159
- value_array = split_values(value)
160
- controlled_data[field_name_norm] ||= [] unless value_array.blank?
161
- value_array.each do |value|
162
- # Decide of we're dealing with a label or url
163
- # It's an ID if it's a URL and the name doesn't end in 'label'
164
- value.strip!
165
- if value =~ /^#{URI::regexp}$/ and !field_name.downcase.ends_with?("label")
166
- id = value
167
- # label = WorkIndexer.fetch_remote_label(value)
168
- # error_message = "cannot fetch remote label for url: #{value}"
169
- # report_error( :cannot_retrieve_label , error_message, url: value, row_number: row_number) unless label
170
- else
171
- # It's a label, so unescape it and get the id
172
- value = unescape_csv(value)
173
- id = get_remote_id(value, property: field_name_norm, authority: authority) || localAuthUrl(field_name_norm, value)
174
- # label = value
175
- report_error(:cannot_retrieve_url,
176
- message: "cannot find or create url for controlled vocabulary label: #{value}",
177
- url: value,
178
- row_number: row_number) unless id
179
- end
180
- controlled_data[field_name_norm] << {id: id, remove: field_name.downcase.starts_with?("remove")}
181
- end
182
- end
183
-
184
- #delete any duplicates (if someone listed a url and also its label, or the same url twice)
185
- controlled_data.each{|field_name, values| controlled_data[field_name] = values.uniq }
186
-
187
- # Actually add all the data
188
- metadata = {}
189
- leftover_data = raw_data.dup.to_hash
190
- controlled_data.each do |property_name, data|
191
- metadata["#{property_name}_attributes"] ||= [] unless data.blank?
192
- data.each do |datum|
193
- atts = {"id" => datum[:id]}
194
- atts["_delete"] = true if datum[:remove]
195
- metadata["#{property_name}_attributes"] << atts
196
- leftover_data.except! property_name
197
- end
198
- end
199
- #return [metadata, leftover_data]
200
- return metadata
201
- end
202
-
203
- def interpret_scalar_fields raw_data
204
- metadata = {}
205
- raw_data.each do |field, values|
206
- next if values.blank? or field.nil? or field == values
207
- # get the field name, if this column is a metadata field
208
- next unless field_name = find_field_name(field.to_s)
209
- field = schema.get_field(field_name)
210
- # Ignore controlled fields
211
- next if field.controlled?
212
- split_values(values).each do |value|
213
- next if value.blank?
214
- value = value.strip.encode('utf-8', :invalid => :replace, :undef => :replace, :replace => '_') unless value.blank?
215
- value = unescape_csv(value)
216
- (metadata[field_name] ||= []) << value
217
- end
218
- end
219
- return metadata
220
- end
221
-
222
- def interpret_file_fields raw_data
223
- # This method handles file additions and deletions from the spreadsheet
224
- # if additional files need to be deleted because the update is set to replace
225
- # some or all existing files, those replacement-related deletions are handled
226
- # by the BulkOps::Operation.
227
- #
228
- # TODO: THIS DOES NOT YET MANAGE THE ORDER OF INGESTED FILESETS
229
-
230
- metadata = {}
231
- raw_data.each do |field, value|
232
- next if value.blank? or field.blank?
233
- field = field.to_s
234
- #If our CSV interpreter is feeding us the headers as a line, ignore it.
235
- next if field == value
236
-
237
-
238
- # Check if this is a file field, and whether we are removing or adding a file
239
- next unless (action = is_file_field?(field))
240
-
241
- # Move on if this field is the name of another property (e.g. masterFilename)
242
- next if find_field_name(field)
243
-
244
- # Check if we are removing a file
245
- if action == "remove"
246
- get_removed_filesets(value).each { |fileset_id| delete_file_set(file_set_id) }
247
- else
248
- # Add a file
249
- operation.get_file_paths(value).each do |filepath|
250
- begin
251
- uploaded_file = Hyrax::UploadedFile.create(file: File.open(filepath), user: operation.user)
252
- (metadata[:uploaded_files] ||= []) << uploaded_file.id unless uploaded_file.id.nil?
253
- rescue Exception => e
254
- report_error(:upload_error,
255
- message: "Error opening file: #{ filepath } -- #{e}",
256
- file: File.join(BulkOps::Operation::INGEST_MEDIA_PATH,filename),
257
- row_number: row_number)
258
- end
259
- end
260
- end
261
- end
262
- return metadata
263
- end
264
-
265
- def interpret_option_fields raw_data
266
- raw_data.each do |field,value|
267
- next if value.blank? or field.blank?
268
- field = field.to_s
269
- next if value == field
270
-
271
- normfield = field.downcase.parameterize.gsub(/[_\s-]/,'')
272
- if ["visibility", "public"].include?(normfield)
273
- update(visibility: format_visibility(value))
274
- end
275
- if ["worktype","model","type"].include?(normfield)
276
- update(work_type: format_worktype(value) )
277
- end
278
- if ["referenceidentifier",
279
- "referenceid",
280
- "refid",
281
- "referenceidentifiertype",
282
- "referenceidtype",
283
- "refidtype",
284
- "relationshipidentifier",
285
- "relationshipid",
286
- "relationshipidentifiertype",
287
- "relationshipidtype",
288
- "relid",
289
- "relidtype"].include?(normfield)
290
- update(reference_identifier: format_reference_id(value))
291
- end
292
- end
293
- return {}
294
- end
295
-
296
- def interpret_relationship_fields(raw_data)
297
- metadata = {}
298
- raw_data.each do |field,value|
299
- next if value.blank? or field.blank?
300
- field = field.to_s
301
- value = unescape_csv(value)
302
- identifer_type = reference_identifier
303
-
304
- next if value == field
305
-
306
- if (split = field.split(":")).count == 2
307
- identifier_type = split.last
308
- relationship_type = split.first.to_s
309
- else
310
- relationship_type = field
311
- end
312
-
313
- relationship_type = normalize_relationship_field_name(relationship_type)
314
- case relationship_type
315
- when "order"
316
- # If the field specifies the object's order among siblings
317
- update(order: value.to_f)
318
- next
319
- when "collection"
320
- # If the field specifies the name or ID of a collection,
321
- # find or create the collection and update the metadata to match
322
- col = find_or_create_collection(value)
323
- ( metadata[:member_of_collection_ids] ||= [] ) << col.id if col
324
- next
325
- when "parent", "child"
326
-
327
- # correctly interpret the notation "id:a78C2d81"
328
- identifier_type, object_identifier = interpret_relationship_value(identifier_type, value)
329
-
330
- relationship_parameters = { work_proxy_id: id,
331
- identifier_type: identifier_type,
332
- relationship_type: relationship_type,
333
- object_identifier: object_identifier,
334
- status: "new"}
335
-
336
- #add previous sibling link if necessary
337
- previous_value = operation.final_spreadsheet[row_number-1][field]
338
- # Check if this is a parent relationship, and the previous row also has one
339
- if previous_value.present? && (relationship_type == "parent")
340
- # Check if the previous row has the same parent as this row
341
- if object_identifier == interpret_relationship_value(identifier_type, previous_value, field).last
342
- # If so, set the previous sibling parameter on the relationshp
343
- # to the id for the proxy associated with the previous row
344
- relationship_parameters[:previous_sibling] = operation.work_proxies.find_by(row_number: row_number-1).id
345
- end
346
- end
347
- BulkOps::Relationship.create(relationship_parameters)
348
- end
349
- return metadata
350
- end
351
- end
352
-
353
- def normalize_relationship_field_name field
354
- normfield = field.downcase.parameterize.gsub(/[_\s-]/,'')
355
- RELATIONSHIP_FIELDS.find{|field| normfield.include?(field) }
356
- end
357
-
358
- def find_previous_parent field="parent"
359
- #Return the row number of the most recent preceding row that does
360
- # not itself have a parent defined
361
- i = 0;
362
- while (prev_row = operation.final_spreadsheet[row_number - i])
363
- return (row_number - i) if prev_row[field].blank?
364
- end
365
- end
366
-
367
- def interpret_relationship_value id_type, value, field="parent"
368
- #Handle "id:20kj4259" syntax if it hasn't already been handled
369
- if (split = value.to_s.split(":")).count == 2
370
- id_type = split.first
371
- value = split.last
372
- end
373
- #Handle special shorthand syntax for refering to relative row numbers
374
- if id_type == "row"
375
- if value.to_i < 0
376
- # if given a negative integer, count backwards from the current row
377
- return [id_type,row_number - value]
378
- elsif value.to_s.downcase.include?("prev")
379
- # if given any variation of the word "previous", get the first preceding row with no parent of its own
380
- return [id_type,find_previous_parent(field)]
381
- end
382
- end
383
- return [id_type,value]
384
- end
385
-
386
- def unescape_csv(value)
387
- value.gsub(/\\(['";,])/,'\1')
388
- end
389
-
390
- def format_worktype(value)
391
- # format the value like a class name
392
- type = value.titleize.gsub(/[-_\s]/,'')
393
- # reject it if it isn't a defined class
394
- type = false unless Object.const_defined? type
395
- # fall back to the work type defined by the operation, or a standard "Work"
396
- return type ||= operation.work_type || "Work"
397
- end
398
-
399
- def format_visibility(value)
400
- case value.downcase
401
- when "public", "open", "true"
402
- return "open"
403
- when "campus", "ucsc", "institution"
404
- return "ucsc"
405
- when "restricted", "private", "closed", "false"
406
- return "restricted"
407
- end
408
- end
409
-
410
- def mintLocalAuthUrl(auth_name, value)
411
- value.strip!
412
- id = value.parameterize
413
- auth = Qa::LocalAuthority.find_or_create_by(name: auth_name)
414
- entry = Qa::LocalAuthorityEntry.create(local_authority: auth,
415
- label: value,
416
- uri: id)
417
- return localIdToUrl(id,auth_name)
418
- end
419
-
420
- def findAuthUrl(auth, value)
421
- value.strip!
422
- return nil if auth.nil?
423
- return nil unless (entries = Qa::Authorities::Local.subauthority_for(auth).search(value))
424
- entries.each do |entry|
425
- #require exact match
426
- next unless entry["label"].force_encoding('UTF-8') == value.force_encoding('UTF-8')
427
- url = entry["url"] || entry["id"]
428
- # url = localIdToUrl(url,auth) unless url =~ URI::regexp
429
- return url
430
- end
431
- return nil
432
- end
433
-
434
- def localIdToUrl(id,auth_name)
435
- root_urls = {production: "https://digitalcollections.library.ucsc.edu",
436
- staging: "http://digitalcollections-staging.library.ucsc.edu",
437
- development: "http://#{Socket.gethostname}",
438
- test: "http://#{Socket.gethostname}"}
439
- return "#{root_urls[Rails.env.to_sym]}/authorities/show/local/#{auth_name}/#{id}"
440
- end
441
-
442
- def getLocalAuth(field_name)
443
- field = schema.get_property(field_name)
444
- # There is only ever one local authority per field, so just pick the first you find
445
- if vocs = field.vocabularies
446
- vocs.each do |voc|
447
- return voc["subauthority"] if voc["authority"].downcase == "local"
448
- end
449
- end
450
- return nil
451
- end
452
-
453
- def setAdminSet metadata
454
- return metadata if metadata[:admin_set_id]
455
- asets = AdminSet.where({title: "Bulk Ingest Set"})
456
- asets = AdminSet.find('admin_set/default') if asets.blank?
457
- metadata[:admin_set_id] = Array(asets).first.id unless asets.blank?
458
- return metadata
459
- end
460
-
461
- def setMetadataInheritance metadata
462
- return metadata if metadata[:metadataInheritance].present?
463
- metadata[:metadataInheritance] = operation.options["metadataInheritance"] unless operation.options["metadataInheritance"].blank?
464
- return metadata
465
- end
466
-
467
- def report_error type, message, **args
468
- puts "ERROR MESSAGE: #{message}"
469
- update(status: "error", message: message)
470
- args[:type]=type
471
- (@proxy_errors ||= []) << BulkOps::Error.new(**args)
472
- end
473
-
474
- def filename_prefix
475
- @filename_prefix ||= operation.filename_prefix
476
- end
477
-
478
- def record_exists?
479
- operation.record_exists? work_id
480
- end
481
-
482
- def get_removed_filesets(filestring)
483
- file_ids = split_values(filestring)
484
- file_ids.select{|file_id| record_exists?(file_id)}
485
-
486
- # This part handles filenames in addition to file ids. It doesn't work yet!
487
- # file_ids.map do |file_id|
488
- # If the filename is the id of an existing record, keep that
489
- # next(file_id) if (record_exists?(file_id))
490
- # If this is the label (i.e.filename) of an existing fileset, use that fileset id
491
- # TODO MAKE THIS WORK!!
492
- # next(filename) if (filename_exists?(filename))
493
- # File.join(BulkOps::Operation::INGEST_MEDIA_PATH, filename_prefix, filename)
494
- # end
495
- end
496
-
497
- def delete_file_set fileset_id
498
- BulkOps::DeleteFileSetJob.perform_later(fileset_id, operation.user.email )
499
- end
500
41
 
501
42
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bulk_ops
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.14
4
+ version: 0.1.15
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ned Henry, UCSC Library Digital Initiatives
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-10-02 00:00:00.000000000 Z
11
+ date: 2019-10-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails
@@ -106,6 +106,7 @@ files:
106
106
  - lib/bulk_ops/github_access.rb
107
107
  - lib/bulk_ops/github_credential.rb
108
108
  - lib/bulk_ops/operation.rb
109
+ - lib/bulk_ops/parser.rb
109
110
  - lib/bulk_ops/queue_work_ingests_job.rb
110
111
  - lib/bulk_ops/relationship.rb
111
112
  - lib/bulk_ops/search_builder_behavior.rb