bulk_ops 0.1.14 → 0.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 27b9b67583cbf4ca808867661196e5bb8a6b95490b3a377dd85d11a91d0a41fb
4
- data.tar.gz: 508dbf4a72146f7a893851aec847bb9bb82a399765135dfde6daa6eec0a4d121
3
+ metadata.gz: fea513373c0ae0267f9302311300b8f4ba03b9fa632db168aec201c2f8359182
4
+ data.tar.gz: baa0fe9b67bfbe7d2f8283ff7949cb8ec46e268c7e15ef17c7b73b9c3a80ef19
5
5
  SHA512:
6
- metadata.gz: da715c7235ae2044b2354653b382825078a63f466e542642d995c81b6dd3bb8d8336c13ac84dd811ecefbff4e9e2422c45f1dc39e10b0ea1a4119a6736397ee2
7
- data.tar.gz: 9bd37e6481170e1da5ba4494888fb16a1cfa65cc9869edcee87353228a1eb78eefe5f32c87bb7c14f4fc2b6e3ead0ad068901273d8c100e3afbce0d5268e4486
6
+ metadata.gz: 33810a935cc44ee6de4448a12e37d4c0889b6a4c7d409011fc5dd9d0bddc18e1a53f0f18337c933ab3dd6903d4112b0a968579f20e7e204d4278220c0dbb0315
7
+ data.tar.gz: b7ff43aed578a7aba0cb59d0862af6d1ffe7f50eccce6715171063a09e1edf2670e3d23333b4e506ff3d473ff6dbed56f672ff66b2f43209e58e67950706072a
data/lib/bulk_ops.rb CHANGED
@@ -1,6 +1,39 @@
1
1
  require "bulk_ops/version"
2
2
 
3
3
  module BulkOps
4
+ OPTION_FIELDS = ['visibility','work type']
5
+ RELATIONSHIP_FIELDS = ['parent','child','collection','order']
6
+ REFERENCE_IDENTIFIER_FIELDS = ['Reference Identifier','ref_id','Reference ID','Relationship ID','Relationship Identifier','Reference Identifier Type','Reference ID Type','Ref ID Type','relationship_identifier_type','relationship_id_type']
7
+ FILE_FIELDS = ['file','files','filename','filenames']
8
+ FILE_ACTIONS = ['add','upload','remove','delete']
9
+ SEPARATOR = ';'
10
+ DEFAULT_ADMIN_SET_TITLE = "Bulk Ingest Set"
11
+ INGEST_MEDIA_PATH = "/dams_ingest"
12
+ TEMPLATE_DIR = "lib/bulk_ops/templates"
13
+ RELATIONSHIP_COLUMNS = ["parent","child","next"]
14
+ SPECIAL_COLUMNS = ["parent",
15
+ "child",
16
+ "order",
17
+ "next",
18
+ "work_type",
19
+ "collection",
20
+ "collection_title",
21
+ "collection_id",
22
+ "visibility",
23
+ "relationship_identifier_type",
24
+ "id",
25
+ "filename",
26
+ "file"]
27
+ IGNORED_COLUMNS = ["ignore","offline_notes"]
28
+ OPTION_REQUIREMENTS = {type: {required: true,
29
+ values:[:ingest,:update]},
30
+ file_method: {required: :true,
31
+ values: [:replace_some,:add_remove,:replace_all]},
32
+ notifications: {required: true}}
33
+ SPREADSHEET_FILENAME = 'metadata.csv'
34
+ OPTIONS_FILENAME = 'configuration.yml'
35
+ ROW_OFFSET = 2
36
+
4
37
  dirstring = File.join( File.dirname(__FILE__), 'bulk_ops/**/*.rb')
5
38
  Dir[dirstring].each do |file|
6
39
  begin
@@ -9,7 +42,5 @@ module BulkOps
9
42
  puts "ERROR LOADING #{File.basename(file)}: #{e}"
10
43
  end
11
44
  end
12
- # require 'bulk_ops/verification'
13
- # require 'bulk_ops/verification'
14
- # require 'bulk_ops/work_proxy'
45
+
15
46
  end
@@ -36,7 +36,7 @@ class BulkOps::CreateSpreadsheetJob < ActiveJob::Base
36
36
  next if value.is_a? DateTime
37
37
  value = (label ? WorkIndexer.fetch_remote_label(value.id) : value.id) unless value.is_a? String
38
38
  value.gsub("\"","\"\"")
39
- end.join(BulkOps::WorkProxy::SEPARATOR).prepend('"').concat('"')
39
+ end.join(BulkOps::SEPARATOR).prepend('"').concat('"')
40
40
  end.join(',')
41
41
  end
42
42
 
@@ -5,10 +5,6 @@ require 'base64'
5
5
 
6
6
  class BulkOps::GithubAccess
7
7
 
8
- ROW_OFFSET = 2
9
- SPREADSHEET_FILENAME = 'metadata.csv'
10
- OPTIONS_FILENAME = 'configuration.yml'
11
-
12
8
  attr_accessor :name
13
9
 
14
10
  def self.auth_url user
@@ -142,11 +138,11 @@ class BulkOps::GithubAccess
142
138
  def add_new_spreadsheet file, message=false
143
139
  if file.is_a? Tempfile
144
140
  file.close
145
- add_file file.path, SPREADSHEET_FILENAME, message: message
141
+ add_file file.path, BulkOps::SPREADSHEET_FILENAME, message: message
146
142
  elsif file.is_a?(String) && File.file?(file)
147
- add_file file, SPREADSHEET_FILENAME, message: message
143
+ add_file file, BulkOps::SPREADSHEET_FILENAME, message: message
148
144
  elsif file.is_a? String
149
- add_contents(spreadsheet_path, SPREADSHEET_FILENAME, message: message)
145
+ add_contents(spreadsheet_path, BulkOps::SPREADSHEET_FILENAME, message: message)
150
146
  end
151
147
  end
152
148
 
@@ -218,12 +214,12 @@ class BulkOps::GithubAccess
218
214
 
219
215
  def get_metadata_row row_number
220
216
  @current_metadata ||= load_metadata
221
- @current_metadata[row_number - ROW_OFFSET]
217
+ @current_metadata[row_number - BulkOps::ROW_OFFSET]
222
218
  end
223
219
 
224
220
  def get_past_metadata_row commit_sha, row_number
225
221
  past_metadata = Base64.decode64( client.contents(repo, path: filename, ref: commit_sha) )
226
- past_metadata[row_number - ROW_OFFSET]
222
+ past_metadata[row_number - BulkOps::ROW_OFFSET]
227
223
  end
228
224
 
229
225
  def get_file filename
@@ -244,13 +240,13 @@ class BulkOps::GithubAccess
244
240
  end
245
241
 
246
242
  def spreadsheet_path
247
- "#{name}/#{SPREADSHEET_FILENAME}"
243
+ "#{name}/#{BulkOps::SPREADSHEET_FILENAME}"
248
244
  end
249
245
 
250
246
  private
251
247
 
252
248
  def options_path
253
- "#{name}/#{OPTIONS_FILENAME}"
249
+ "#{name}/#{BulkOps::OPTIONS_FILENAME}"
254
250
  end
255
251
 
256
252
  def current_master_commit_sha
@@ -7,33 +7,10 @@ module BulkOps
7
7
 
8
8
  include BulkOps::Verification
9
9
 
10
- attr_accessor :work_type, :visibility, :reference_identifier
10
+ attr_accessor :work_type, :visibility, :reference_identifier, :metadata
11
11
 
12
12
  delegate :can_merge?, :merge_pull_request, to: :git
13
13
 
14
- INGEST_MEDIA_PATH = "/dams_ingest"
15
- TEMPLATE_DIR = "lib/bulk_ops/templates"
16
- RELATIONSHIP_COLUMNS = ["parent","child","next"]
17
- SPECIAL_COLUMNS = ["parent",
18
- "child",
19
- "order",
20
- "next",
21
- "work_type",
22
- "collection",
23
- "collection_title",
24
- "collection_id",
25
- "visibility",
26
- "relationship_identifier_type",
27
- "id",
28
- "filename",
29
- "file"]
30
- IGNORED_COLUMNS = ["ignore","offline_notes"]
31
- OPTION_REQUIREMENTS = {type: {required: true,
32
- values:[:ingest,:update]},
33
- file_method: {required: :true,
34
- values: [:replace_some,:add_remove,:replace_all]},
35
- notifications: {required: true}}
36
-
37
14
  def self.unique_name name, user
38
15
  while BulkOps::Operation.find_by(name: name) || BulkOps::GithubAccess.list_branch_names(user).include?(name) do
39
16
  if ['-','_'].include?(name[-2]) && name[-1].to_i > 0
@@ -119,7 +96,7 @@ module BulkOps
119
96
  @metadata.each_with_index do |values,row_number|
120
97
  proxy = work_proxies.find_by(row_number: row_number)
121
98
  proxy.update(message: "interpreted at #{DateTime.now.strftime("%d/%m/%Y %H:%M")} " + proxy.message)
122
- data = proxy.interpret_data values
99
+ data = BulkOps::Parser.new(proxy, @metadata).interpret_data(raw_row: values)
123
100
  next unless proxy.proxy_errors.blank?
124
101
  BulkOps::CreateWorkJob.perform_later(proxy.work_type || "Work",
125
102
  user.email,
@@ -202,7 +179,7 @@ module BulkOps
202
179
 
203
180
  #loop through the work proxies to create a job for each work
204
181
  work_proxies.each do |proxy|
205
- data = proxy.interpret_data final_spreadsheet[proxy.row_number]
182
+ data = BulkOps::Parser.new(proxy,final_spreadsheet).interpret_data(raw_row: final_spreadsheet[proxy.row_number])
206
183
  BulkOps::UpdateWorkJob.perform_later(proxy.work_type || "",
207
184
  user.email,
208
185
  data,
@@ -238,13 +215,13 @@ module BulkOps
238
215
  bulk_ops_dir = Gem::Specification.find_by_name("bulk_ops").gem_dir
239
216
 
240
217
  #copy template files
241
- Dir["#{bulk_ops_dir}/#{TEMPLATE_DIR}/*"].each do |file|
218
+ Dir["#{bulk_ops_dir}/#{BulkOps::TEMPLATE_DIR}/*"].each do |file|
242
219
  git.add_file file
243
220
  end
244
221
 
245
222
  #update configuration options
246
223
  unless options.blank?
247
- full_options = YAML.load_file(File.join(bulk_ops_dir,TEMPLATE_DIR, BulkOps::GithubAccess::OPTIONS_FILENAME))
224
+ full_options = YAML.load_file(File.join(bulk_ops_dir,BulkOps::TEMPLATE_DIR, BulkOps::OPTIONS_FILENAME))
248
225
 
249
226
  options.each { |option, value| full_options[option] = value }
250
227
 
@@ -278,6 +255,10 @@ module BulkOps
278
255
  git.update_options(options, message: message)
279
256
  end
280
257
 
258
+ def metadata
259
+ @metadata ||= git.load_metadata
260
+ end
261
+
281
262
  def options
282
263
  return {} if name.nil?
283
264
  return @options if @options
@@ -332,7 +313,7 @@ module BulkOps
332
313
  end
333
314
 
334
315
  def ignored_fields
335
- (options['ignored headers'] || []) + IGNORED_COLUMNS
316
+ (options['ignored headers'] || []) + BulkOps::IGNORED_COLUMNS
336
317
  end
337
318
 
338
319
 
@@ -0,0 +1,485 @@
1
+ class BulkOps::Parser
2
+ require 'uri'
3
+
4
+ attr_accessor :proxy, :raw_data, :raw_row
5
+
6
+ delegate :relationships, :operation, :row_number, :work_id, :visibility, :work_type, :reference_identifier, :order, to: :proxy
7
+
8
+ def initialize prx, metadata_sheet=nil
9
+ @proxy = prx
10
+ @raw_data = (metadata_sheet || proxy.operation.metadata)
11
+ @raw_row = @raw_data[@proxy.row_number].dup
12
+ @metadata = {}
13
+ @parsing_errors = []
14
+ end
15
+
16
+ def interpret_data raw_row: nil, raw_data: nil, proxy: nil
17
+ @raw_row = raw_row if raw_row.present?
18
+ @proxy = proxy if proxy.present?
19
+ @raw_data = raw_data if raw_data.present?
20
+ setAdminSet
21
+ setMetadataInheritance
22
+ interpret_option_fields
23
+ interpret_relationship_fields
24
+ disambiguate_columns
25
+ interpret_file_fields
26
+ interpret_controlled_fields
27
+ interpret_scalar_fields
28
+ @proxy.update(status: "ERROR", message: "error parsing spreadsheet line") if @parsing_errors.present?
29
+ @proxy.proxy_errors = (@proxy.proxy_errors || []) + @parsing_errors
30
+ return @metadata
31
+ end
32
+
33
+ def disambiguate_columns
34
+ #do nothing unless there are columns with the same header
35
+ return unless (@raw_row.respond_to?(:headers) && (@raw_row.headers.uniq.length < @raw_row.length) )
36
+ row = {}
37
+ (0...@raw_row.length).each do |i|
38
+ header = @raw_row.headers[i]
39
+ value = @raw_row[i]
40
+ # separate values in identical columns using the separator
41
+ row[header] = (Array(row[header]) << value).join(BulkOps::SEPARATOR)
42
+ end
43
+ #return a hash with identical columns merged
44
+ return row
45
+ end
46
+
47
+ def interpret_controlled_fields
48
+
49
+ # The labels array tracks the contents of columns marked as labels,
50
+ # which may require special validation
51
+ labels = {}
52
+
53
+ # This hash is populated with relevant data as we loop through the fields
54
+ controlled_data = {}
55
+
56
+ row = @raw_row.dup
57
+ @raw_row.each do |field_name, value|
58
+ next if value.blank? or field_name.blank?
59
+ field_name = field_name.to_s
60
+
61
+ #If our CSV interpreter is feeding us the headers as a line, ignore it.
62
+ next if field_name == value
63
+
64
+ #check if they are using the 'field_name.authority' syntax
65
+ authority = nil
66
+ if ((split=field_name.split('.')).count == 2)
67
+ authority = split.last
68
+ field_name = split.first
69
+ end
70
+
71
+ # get the field name, if this column is a metadata field
72
+ field_name_norm = find_field_name(field_name)
73
+ field = schema.get_field(field_name_norm)
74
+
75
+ # Ignore anything that isn't a controlled field
76
+ next unless field.present? && field.controlled?
77
+
78
+ # Keep track of label fields
79
+ if field_name.downcase.ends_with?("label")
80
+ next if operation.options["ignore_labels"]
81
+ labels[field_name_norm] ||= []
82
+ labels[field_name_norm] += split_values value
83
+ next unless operation.options["import_labels"]
84
+ end
85
+
86
+ remove = field_name.downcase.starts_with?("remove") || field_name.downcase.starts_with?("delete")
87
+
88
+ # handle multiple values
89
+ value_array = split_values(value)
90
+ controlled_data[field_name_norm] ||= [] unless value_array.blank?
91
+ value_array.each do |value|
92
+ # Decide of we're dealing with a label or url
93
+ # It's an ID if it's a URL and the name doesn't end in 'label'
94
+ value.strip!
95
+ if value =~ /^#{URI::regexp}$/ and !field_name.downcase.ends_with?("label")
96
+ value_id = value
97
+ # label = WorkIndexer.fetch_remote_label(value)
98
+ # error_message = "cannot fetch remote label for url: #{value}"
99
+ # report_error( :cannot_retrieve_label , error_message, url: value, row_number: row_number) unless label
100
+ else
101
+ # It's a label, so unescape it and get the id
102
+ value = unescape_csv(value)
103
+ value_id = get_remote_id(value, property: field_name_norm, authority: authority) || localAuthUrl(field_name_norm, value)
104
+ # label = value
105
+ report_error(:cannot_retrieve_url,
106
+ message: "cannot find or create url for controlled vocabulary label: #{value}",
107
+ url: value,
108
+ row_number: row_number) unless value_id
109
+ end
110
+ controlled_data[field_name_norm] << {id: value_id, remove: field_name.downcase.starts_with?("remove")}
111
+ row.delete(field_name)
112
+ end
113
+ end
114
+ @raw_row = row
115
+
116
+ # Actually add all the data
117
+ controlled_data.each do |property_name, data|
118
+ @metadata["#{property_name}_attributes"] ||= [] unless data.blank?
119
+ data.uniq.each do |datum|
120
+ atts = {"id" => datum[:id]}
121
+ atts["_delete"] = true if datum[:remove]
122
+ @metadata["#{property_name}_attributes"] << atts
123
+ end
124
+ end
125
+ end
126
+
127
+ def interpret_scalar_fields
128
+ row = @raw_row.dup
129
+ @raw_row.each do |field, values|
130
+ next if values.blank? or field.nil? or field == values
131
+ # get the field name, if this column is a metadata field
132
+ next unless field_name = find_field_name(field.to_s)
133
+ field = schema.get_field(field_name)
134
+ # Ignore controlled fields
135
+ next if field.controlled?
136
+ split_values(values).each do |value|
137
+ next if value.blank?
138
+ value = value.strip.encode('utf-8', :invalid => :replace, :undef => :replace, :replace => '_') unless value.blank?
139
+ value = unescape_csv(value)
140
+ (@metadata[field_name] ||= []) << value
141
+ row.delete(field)
142
+ end
143
+ end
144
+ @raw_row = row
145
+ end
146
+
147
+ def interpret_file_fields
148
+ # This method handles file additions and deletions from the spreadsheet
149
+ # if additional files need to be deleted because the update is set to replace
150
+ # some or all existing files, those replacement-related deletions are handled
151
+ # by the BulkOps::Operation.
152
+ #
153
+ # TODO: THIS DOES NOT YET MANAGE THE ORDER OF INGESTED FILESETS
154
+
155
+ row = @raw_row.dup
156
+ @raw_row.each do |field, value|
157
+ next if value.blank? or field.blank?
158
+ field = field.to_s
159
+ #If our CSV interpreter is feeding us the headers as a line, ignore it.
160
+ next if field == value
161
+
162
+
163
+ # Check if this is a file field, and whether we are removing or adding a file
164
+ next unless (action = is_file_field?(field))
165
+
166
+ # Move on if this field is the name of another property (e.g. masterFilename)
167
+ next if find_field_name(field)
168
+
169
+ # Check if we are removing a file
170
+ if action == "remove"
171
+ get_removed_filesets(value).each { |fileset_id| delete_file_set(file_set_id) }
172
+ else
173
+ # Add a file
174
+ operation.get_file_paths(value).each do |filepath|
175
+ begin
176
+ uploaded_file = Hyrax::UploadedFile.create(file: File.open(filepath), user: operation.user)
177
+ (@metadata[:uploaded_files] ||= []) << uploaded_file.id unless uploaded_file.id.nil?
178
+ row.delete(field)
179
+ rescue Exception => e
180
+ report_error(:upload_error,
181
+ message: "Error opening file: #{ filepath } -- #{e}",
182
+ file: File.join(BulkOps::INGEST_MEDIA_PATH,filename),
183
+ row_number: row_number)
184
+ end
185
+ end
186
+ end
187
+ end
188
+ @raw_row = row
189
+ end
190
+
191
+ def interpret_option_fields
192
+ row = @raw_row.dup
193
+ @raw_row.each do |field,value|
194
+ next if value.blank? or field.blank?
195
+ field = field.to_s
196
+ next if value == field
197
+
198
+ normfield = field.downcase.parameterize.gsub(/[_\s-]/,'')
199
+ if ["visibility", "public"].include?(normfield)
200
+ @proxy.update(visibility: format_visibility(value))
201
+ row.delete(field)
202
+ end
203
+ if ["worktype","model","type"].include?(normfield)
204
+ @proxy.update(work_type: format_worktype(value) )
205
+ row.delete(field)
206
+ end
207
+ if ["referenceidentifier",
208
+ "referenceid",
209
+ "refid",
210
+ "referenceidentifiertype",
211
+ "referenceidtype",
212
+ "refidtype",
213
+ "relationshipidentifier",
214
+ "relationshipid",
215
+ "relationshipidentifiertype",
216
+ "relationshipidtype",
217
+ "relid",
218
+ "relidtype"].include?(normfield)
219
+ @proxy.update(reference_identifier: format_reference_id(value))
220
+ row.delete(field)
221
+ end
222
+ end
223
+ @raw_row = row
224
+ end
225
+
226
+ def interpret_relationship_fields
227
+ row = @raw_row.dup
228
+ @raw_row.each do |field,value|
229
+ next if value.blank? or field.blank?
230
+ field = field.to_s
231
+ value = unescape_csv(value)
232
+ identifer_type = reference_identifier
233
+
234
+ next if value == field
235
+
236
+ # Correctly interpret the notation "parent:id", "parent id" etc in a column header
237
+ if (split = field.split(/[:_\-\s]/)).count == 2
238
+ identifier_type = split.last
239
+ relationship_type = split.first.to_s
240
+ else
241
+ relationship_type = field
242
+ end
243
+
244
+ relationship_type = normalize_relationship_field_name(relationship_type)
245
+ case relationship_type
246
+ when "order"
247
+ # If the field specifies the object's order among siblings
248
+ @proxy.update(order: value.to_f)
249
+ row.delete(field)
250
+ next
251
+ when "collection"
252
+ # If the field specifies the name or ID of a collection,
253
+ # find or create the collection and update the metadata to match
254
+ col = find_or_create_collection(value)
255
+ ( @metadata[:member_of_collection_ids] ||= [] ) << col.id if col
256
+ row.delete field
257
+ next
258
+ when "parent", "child"
259
+
260
+ # correctly interpret the notation "id:a78C2d81"
261
+ identifier_type, object_identifier = interpret_relationship_value(identifier_type, value)
262
+
263
+ relationship_parameters = { work_proxy_id: @proxy.id,
264
+ identifier_type: identifier_type,
265
+ relationship_type: relationship_type,
266
+ object_identifier: object_identifier,
267
+ status: "new"}
268
+
269
+ #add previous sibling link if necessary
270
+ previous_value = @raw_data[row_number-1][field]
271
+ # Check if this is a parent relationship, and the previous row also has one
272
+ if previous_value.present? && (relationship_type == "parent")
273
+ # Check if the previous row has the same parent as this row
274
+ if object_identifier == interpret_relationship_value(identifier_type, previous_value, field).last
275
+ # If so, set the previous sibling parameter on the relationshp
276
+ # to the id for the proxy associated with the previous row
277
+ relationship_parameters[:previous_sibling] = operation.work_proxies.find_by(row_number: row_number-1).id
278
+ end
279
+ end
280
+ BulkOps::Relationship.create(relationship_parameters)
281
+ row.delete field
282
+ end
283
+ end
284
+ @raw_row = row
285
+ end
286
+
287
+ def normalize_relationship_field_name field
288
+ normfield = field.downcase.parameterize.gsub(/[_\s-]/,'')
289
+ BulkOps::RELATIONSHIP_FIELDS.find{|rel_field| normfield == rel_field }
290
+ end
291
+
292
+ def find_previous_parent field="parent"
293
+ #Return the row number of the most recent preceding row that does
294
+ # not itself have a parent defined
295
+ i = 1;
296
+ while (prev_row = raw_data[row_number - i])
297
+ return (row_number - i) if prev_row[field].blank?
298
+ i += 1
299
+ end
300
+ end
301
+
302
+ def interpret_relationship_value id_type, value, field="parent"
303
+ #Handle "id:20kj4259" syntax if it hasn't already been handled
304
+ if (split = value.to_s.split(":")).count == 2
305
+ id_type = split.first
306
+ value = split.last
307
+ end
308
+ #Handle special shorthand syntax for refering to relative row numbers
309
+ if id_type == "row"
310
+ if value.to_i < 0
311
+ # if given a negative integer, count backwards from the current row (remember that value.to_i is negative)
312
+ return [id_type,row_number + value.to_i]
313
+ elsif value.to_s.downcase.include?("prev")
314
+ # if given any variation of the word "previous", get the first preceding row with no parent of its own
315
+ return [id_type,find_previous_parent(field)]
316
+ end
317
+ end
318
+ return [id_type,value]
319
+ end
320
+
321
+ def unescape_csv(value)
322
+ value.gsub(/\\(['";,])/,'\1')
323
+ end
324
+
325
+
326
+ def format_worktype(value)
327
+ # format the value like a class name
328
+ type = value.titleize.gsub(/[-_\s]/,'')
329
+ # reject it if it isn't a defined class
330
+ type = false unless Object.const_defined? type
331
+ # fall back to the work type defined by the operation, or a standard "Work"
332
+ return type ||= work_type || operation.work_type || "Work"
333
+ end
334
+
335
+ def format_visibility(value)
336
+ case value.downcase
337
+ when "public", "open", "true"
338
+ return "open"
339
+ when "campus", "ucsc", "institution"
340
+ return "ucsc"
341
+ when "restricted", "private", "closed", "false"
342
+ return "restricted"
343
+ end
344
+ end
345
+
346
+
347
+ def mintLocalAuthUrl(auth_name, value)
348
+ value.strip!
349
+ id = value.parameterize
350
+ auth = Qa::LocalAuthority.find_or_create_by(name: auth_name)
351
+ entry = Qa::LocalAuthorityEntry.create(local_authority: auth,
352
+ label: value,
353
+ uri: id)
354
+ return localIdToUrl(id,auth_name)
355
+ end
356
+
357
+ def findAuthUrl(auth, value)
358
+ value.strip!
359
+ return nil if auth.nil?
360
+ return nil unless (entries = Qa::Authorities::Local.subauthority_for(auth).search(value))
361
+ entries.each do |entry|
362
+ #require exact match
363
+ next unless entry["label"].force_encoding('UTF-8') == value.force_encoding('UTF-8')
364
+ url = entry["url"] || entry["id"]
365
+ # url = localIdToUrl(url,auth) unless url =~ URI::regexp
366
+ return url
367
+ end
368
+ return nil
369
+ end
370
+
371
+ def localIdToUrl(id,auth_name)
372
+ root_urls = {production: "https://digitalcollections.library.ucsc.edu",
373
+ staging: "http://digitalcollections-staging.library.ucsc.edu",
374
+ development: "http://#{Socket.gethostname}",
375
+ test: "http://#{Socket.gethostname}"}
376
+ return "#{root_urls[Rails.env.to_sym]}/authorities/show/local/#{auth_name}/#{id}"
377
+ end
378
+
379
+ def getLocalAuth(field_name)
380
+ field = schema.get_property(field_name)
381
+ # There is only ever one local authority per field, so just pick the first you find
382
+ if vocs = field.vocabularies
383
+ vocs.each do |voc|
384
+ return voc["subauthority"] if voc["authority"].downcase == "local"
385
+ end
386
+ end
387
+ return nil
388
+ end
389
+
390
+ def setAdminSet
391
+ return if @metadata[:admin_set_id]
392
+ asets = AdminSet.where({title: "Bulk Ingest Set"})
393
+ asets = AdminSet.find('admin_set/default') if asets.blank?
394
+ @metadata[:admin_set_id] = Array(asets).first.id unless asets.blank?
395
+ end
396
+
397
+ def setMetadataInheritance
398
+ return if @metadata[:metadataInheritance].present?
399
+ @metadata[:metadataInheritance] = operation.options["metadataInheritance"] unless operation.options["metadataInheritance"].blank?
400
+ end
401
+
402
+ def report_error type, message, **args
403
+ puts "ERROR MESSAGE: #{message}"
404
+ @proxy.update(status: "error", message: message)
405
+ args[:type]=type
406
+ (@parsing_errors ||= []) << BulkOps::Error.new(**args)
407
+ end
408
+
409
+ def get_removed_filesets(filestring)
410
+ file_ids = split_values(filestring)
411
+ file_ids.select{|file_id| record_exists?(file_id)}
412
+
413
+ # This part handles filenames in addition to file ids. It doesn't work yet!
414
+ # file_ids.map do |file_id|
415
+ # If the filename is the id of an existing record, keep that
416
+ # next(file_id) if (record_exists?(file_id))
417
+ # If this is the label (i.e.filename) of an existing fileset, use that fileset id
418
+ # TODO MAKE THIS WORK!!
419
+ # next(filename) if (filename_exists?(filename))
420
+ # File.join(BulkOps::INGEST_MEDIA_PATH, filename_prefix, filename)
421
+ # end
422
+ end
423
+
424
+ def delete_file_set fileset_id
425
+ BulkOps::DeleteFileSetJob.perform_later(fileset_id, operation.user.email )
426
+ end
427
+
428
+
429
+ def is_file_field? field
430
+ operation.is_file_field? field
431
+ end
432
+
433
+ def record_exists? id
434
+ operation.record_exists? id
435
+ end
436
+
437
+ def localAuthUrl(property, value)
438
+ return value if (auth = getLocalAuth(property)).nil?
439
+ url = findAuthUrl(auth, value) || mintLocalAuthUrl(auth,value)
440
+ return url
441
+ end
442
+
443
+ def find_collection(collection)
444
+ cols = Collection.where(id: collection)
445
+ cols += Collection.where(title: collection).select{|col| col.title.first == collection}
446
+ return cols.last unless cols.empty?
447
+ return false
448
+ end
449
+
450
+ def find_or_create_collection(collection)
451
+ col = find_collection(collection)
452
+ return col if col
453
+ return false if collection.to_i > 0
454
+ col = Collection.create(title: [collection.to_s], depositor: operation.user.email, collection_type: Hyrax::CollectionType.find_by(title:"User Collection"))
455
+ end
456
+
457
+ def get_remote_id(value, authority: nil, property: nil)
458
+ return false
459
+ #TODO retrieve URL for this value from the specified remote authr
460
+ end
461
+
462
+ def format_param_name(name)
463
+ name.titleize.gsub(/\s+/, "").camelcase(:lower)
464
+ end
465
+
466
+ def schema
467
+ ScoobySnacks::METADATA_SCHEMA
468
+ end
469
+
470
+ def find_field_name(field)
471
+ operation.find_field_name(field)
472
+ end
473
+
474
+ def downcase_first_letter(str)
475
+ return "" unless str
476
+ str[0].downcase + str[1..-1]
477
+ end
478
+
479
+ def split_values value_string
480
+ # Split values on all un-escaped separator character (escape character is '\')
481
+ # Then replace all escaped separator charactors with un-escaped versions
482
+ value_string.split(/(?<!\\)#{BulkOps::SEPARATOR}/).map{|val| val.gsub("\\#{BulkOps::SEPARATOR}",BulkOps::SEPARATOR).strip}
483
+ end
484
+
485
+ end
@@ -35,7 +35,7 @@ module BulkOps
35
35
  return false if fieldname.blank?
36
36
  return false if schema.get_field(fieldname)
37
37
  field_parts = fieldname.underscore.humanize.downcase.gsub(/[-_]/,' ').split(" ")
38
- return false unless field_parts.any?{ |field_type| BulkOps::WorkProxy::FILE_FIELDS.include?(field_type) }
38
+ return false unless field_parts.any?{ |field_type| BulkOps::FILE_FIELDS.include?(field_type) }
39
39
  return "remove" if field_parts.any?{ |field_type| ['remove','delete'].include?(field_type) }
40
40
  return "add"
41
41
  end
@@ -46,7 +46,7 @@ module BulkOps
46
46
  name.gsub!(/[_\s-]?[lL]abel$/,'')
47
47
  name.gsub!(/^[rR]emove[_\s-]?/,'')
48
48
  name.gsub!(/^[dD]elete[_\s-]?/,'')
49
- possible_fields = Work.attribute_names + schema.all_field_names
49
+ possible_fields = (Work.attribute_names + schema.all_field_names).uniq
50
50
  matching_fields = possible_fields.select{|pfield| pfield.gsub(/[_\s-]/,'').parameterize == name.gsub(/[_\s-]/,'').parameterize }
51
51
  return false if matching_fields.blank?
52
52
  # raise Exception "Ambiguous metadata fields!" if matching_fields.uniq.count > 1
@@ -55,8 +55,8 @@ module BulkOps
55
55
 
56
56
  def get_file_paths(filestring)
57
57
  return [] if filestring.blank?
58
- filenames = filestring.split(BulkOps::WorkProxy::SEPARATOR)
59
- filenames.map { |filename| File.join(BulkOps::Operation::INGEST_MEDIA_PATH, options['file_prefix'] || "", filename) }
58
+ filenames = filestring.split(BulkOps::SEPARATOR)
59
+ filenames.map { |filename| File.join(BulkOps::INGEST_MEDIA_PATH, options['file_prefix'] || "", filename) }
60
60
  end
61
61
 
62
62
  def record_exists? id
@@ -85,7 +85,7 @@ module BulkOps
85
85
  end
86
86
 
87
87
  def verify_configuration
88
- BulkOps::Operation::OPTION_REQUIREMENTS.each do |option_name, option_info|
88
+ BulkOps::OPTION_REQUIREMENTS.each do |option_name, option_info|
89
89
  # Make sure it's present if required
90
90
  if (option_info["required"].to_s == "true") || (option_info["required"].to_s == type)
91
91
  if options[option_name].blank?
@@ -120,7 +120,7 @@ module BulkOps
120
120
  # Ignore everything marked as a label
121
121
  next if column_name_redux.ends_with? "label"
122
122
  # Ignore any column names with special meaning in hyrax
123
- next if BulkOps::Operation::SPECIAL_COLUMNS.any?{|col| col.downcase.parameterize.gsub(/[_\s-]/,"") == column_name_redux }
123
+ next if BulkOps::SPECIAL_COLUMNS.any?{|col| col.downcase.parameterize.gsub(/[_\s-]/,"") == column_name_redux }
124
124
  # Ignore any columns speficied to be ignored in the configuration
125
125
  ignored = options["ignored headers"] || []
126
126
  next if ignored.any?{|col| col.downcase.parameterize.gsub(/[_\s-]/,"") == column_name_redux }
@@ -131,7 +131,7 @@ module BulkOps
131
131
  end
132
132
 
133
133
  def verify_remote_urls
134
- row_offset = BulkOps::GithubAccess::ROW_OFFSET.present? ? BulkOps::GithubAccess::ROW_OFFSET : 2
134
+ row_offset = BulkOps::ROW_OFFSET.present? ? BulkOps::ROW_OFFSET : 2
135
135
  get_spreadsheet.each_with_index do |row, row_num|
136
136
  update(message: "verifying controlled vocab urls (row number #{row_num})")
137
137
  next if row_num.nil?
@@ -173,7 +173,7 @@ module BulkOps
173
173
  def get_ref_id row
174
174
  row.each do |field,value|
175
175
  next if field.blank? or value.blank? or field === value
176
- next unless BulkOps::WorkProxy::REFERENCE_IDENTIFIER_FIELDS.any?{ |ref_field| normalize_field(ref_field) == normalize_field(field) }
176
+ next unless BulkOps::REFERENCE_IDENTIFIER_FIELDS.any?{ |ref_field| normalize_field(ref_field) == normalize_field(field) }
177
177
  return value
178
178
  end
179
179
  # No reference identifier specified in the row. Use the default for the operation.
@@ -190,7 +190,7 @@ module BulkOps
190
190
  # This is sketchy. Redo it.
191
191
  (metadata = get_spreadsheet).each do |row,row_num|
192
192
  ref_id = get_ref_id(row)
193
- BulkOps::Operation::RELATIONSHIP_COLUMNS.each do |relationship|
193
+ BulkOps::RELATIONSHIP_COLUMNS.each do |relationship|
194
194
  next unless (obj_id = row[relationship])
195
195
  if (split = obj_id.split(':')).present? && split.count == 2
196
196
  ref_id = split[0].downcase
@@ -1,3 +1,3 @@
1
1
  module BulkOps
2
- VERSION = "0.1.14"
2
+ VERSION = "0.1.15"
3
3
  end
@@ -1,12 +1,5 @@
1
1
  class BulkOps::WorkProxy < ActiveRecord::Base
2
2
 
3
- require 'uri'
4
- OPTION_FIELDS = ['visibility','work type']
5
- RELATIONSHIP_FIELDS = ['parent','child','collection','order']
6
- REFERENCE_IDENTIFIER_FIELDS = ['Reference Identifier','ref_id','Reference ID','Relationship ID','Relationship Identifier','Reference Identifier Type','Reference ID Type','Ref ID Type','relationship_identifier_type','relationship_id_type']
7
- FILE_FIELDS = ['file','files','filename','filenames']
8
- FILE_ACTIONS = ['add','upload','remove','delete']
9
- SEPARATOR = ';'
10
3
  self.table_name = "bulk_ops_work_proxies"
11
4
  belongs_to :operation, class_name: "BulkOps::Operation", foreign_key: "operation_id"
12
5
  has_many :relationships, class_name: "BulkOps::Relationship"
@@ -40,462 +33,10 @@ class BulkOps::WorkProxy < ActiveRecord::Base
40
33
  # TODO make it so people can edit the work again
41
34
  end
42
35
 
43
- def interpret_data raw_data
44
- admin_set = AdminSet.where(title: "Bulk Ingest Set").first || AdminSet.find(AdminSet.find_or_create_default_admin_set_id)
45
- metadata = {admin_set_id: admin_set.id}
46
- metadata.merge! interpret_file_fields(raw_data)
47
- metadata.merge! interpret_controlled_fields(raw_data)
48
- metadata.merge! interpret_scalar_fields(raw_data)
49
- metadata.merge! interpret_relationship_fields(raw_data)
50
- metadata.merge! interpret_option_fields(raw_data)
51
- metadata = setAdminSet(metadata)
52
- metadata = setMetadataInheritance(metadata)
53
- return metadata
54
- end
55
36
 
56
37
  def proxy_errors
57
38
  @proxy_errors ||= []
58
39
  end
59
40
 
60
- private
61
-
62
- def is_file_field? field
63
- operation.is_file_field? field
64
- end
65
-
66
- def record_exists? id
67
- operation.record_exists? id
68
- end
69
-
70
- def localAuthUrl(property, value)
71
- return value if (auth = getLocalAuth(property)).nil?
72
- url = findAuthUrl(auth, value) || mintLocalAuthUrl(auth,value)
73
- return url
74
- end
75
-
76
- def find_collection(collection)
77
- cols = Collection.where(id: collection)
78
- cols += Collection.where(title: collection).select{|col| col.title.first == collection}
79
- return cols.last unless cols.empty?
80
- return false
81
- end
82
-
83
- def find_or_create_collection(collection)
84
- col = find_collection(collection)
85
- return col if col
86
- return false if collection.to_i > 0
87
- col = Collection.create(title: [collection.to_s], depositor: operation.user.email, collection_type: Hyrax::CollectionType.find_by(title:"User Collection"))
88
- end
89
-
90
- def get_remote_id(value, authority: nil, property: nil)
91
- return false
92
- #TODO retrieve URL for this value from the specified remote authr
93
- end
94
-
95
- def format_param_name(name)
96
- name.titleize.gsub(/\s+/, "").camelcase(:lower)
97
- end
98
-
99
- def schema
100
- ScoobySnacks::METADATA_SCHEMA
101
- end
102
-
103
- def find_field_name(field)
104
- operation.find_field_name(field)
105
- end
106
-
107
- def downcase_first_letter(str)
108
- return "" unless str
109
- str[0].downcase + str[1..-1]
110
- end
111
-
112
- def split_values value_string
113
- # Split values on all un-escaped separator character (escape character is '\')
114
- # Then replace all escaped separator charactors with un-escaped versions
115
- value_string.split(/(?<!\\)#{SEPARATOR}/).map{|val| val.gsub("\\#{SEPARATOR}",SEPARATOR).strip}
116
- end
117
-
118
- def interpret_controlled_fields raw_data
119
-
120
- # The labels array tracks the contents of columns marked as labels,
121
- # which may require special validation
122
- labels = {}
123
-
124
- # This hash is populated with relevant data as we loop through the fields
125
- controlled_data = {}
126
-
127
- raw_data.each do |field_name, value|
128
- next if value.blank? or field_name.blank?
129
- field_name = field_name.to_s
130
-
131
- #If our CSV interpreter is feeding us the headers as a line, ignore it.
132
- next if field_name == value
133
-
134
- #check if they are using the 'field_name.authority' syntax
135
- authority = nil
136
- if ((split=field_name.split('.')).count == 2)
137
- authority = split.last
138
- field_name = split.first
139
- end
140
-
141
- # get the field name, if this column is a metadata field
142
- field_name_norm = find_field_name(field_name)
143
- field = schema.get_field(field_name_norm)
144
-
145
- # Ignore anything that isn't a controlled field
146
- next unless field.present? && field.controlled?
147
-
148
- # Keep track of label fields
149
- if field_name.downcase.ends_with?("label")
150
- next if operation.options["ignore_labels"]
151
- labels[field_name_norm] ||= []
152
- labels[field_name_norm] += split_values value
153
- next unless operation.options["import_labels"]
154
- end
155
-
156
- remove = field_name.downcase.starts_with?("remove") || field_name.downcase.starts_with?("delete")
157
-
158
- # handle multiple values
159
- value_array = split_values(value)
160
- controlled_data[field_name_norm] ||= [] unless value_array.blank?
161
- value_array.each do |value|
162
- # Decide of we're dealing with a label or url
163
- # It's an ID if it's a URL and the name doesn't end in 'label'
164
- value.strip!
165
- if value =~ /^#{URI::regexp}$/ and !field_name.downcase.ends_with?("label")
166
- id = value
167
- # label = WorkIndexer.fetch_remote_label(value)
168
- # error_message = "cannot fetch remote label for url: #{value}"
169
- # report_error( :cannot_retrieve_label , error_message, url: value, row_number: row_number) unless label
170
- else
171
- # It's a label, so unescape it and get the id
172
- value = unescape_csv(value)
173
- id = get_remote_id(value, property: field_name_norm, authority: authority) || localAuthUrl(field_name_norm, value)
174
- # label = value
175
- report_error(:cannot_retrieve_url,
176
- message: "cannot find or create url for controlled vocabulary label: #{value}",
177
- url: value,
178
- row_number: row_number) unless id
179
- end
180
- controlled_data[field_name_norm] << {id: id, remove: field_name.downcase.starts_with?("remove")}
181
- end
182
- end
183
-
184
- #delete any duplicates (if someone listed a url and also its label, or the same url twice)
185
- controlled_data.each{|field_name, values| controlled_data[field_name] = values.uniq }
186
-
187
- # Actually add all the data
188
- metadata = {}
189
- leftover_data = raw_data.dup.to_hash
190
- controlled_data.each do |property_name, data|
191
- metadata["#{property_name}_attributes"] ||= [] unless data.blank?
192
- data.each do |datum|
193
- atts = {"id" => datum[:id]}
194
- atts["_delete"] = true if datum[:remove]
195
- metadata["#{property_name}_attributes"] << atts
196
- leftover_data.except! property_name
197
- end
198
- end
199
- #return [metadata, leftover_data]
200
- return metadata
201
- end
202
-
203
- def interpret_scalar_fields raw_data
204
- metadata = {}
205
- raw_data.each do |field, values|
206
- next if values.blank? or field.nil? or field == values
207
- # get the field name, if this column is a metadata field
208
- next unless field_name = find_field_name(field.to_s)
209
- field = schema.get_field(field_name)
210
- # Ignore controlled fields
211
- next if field.controlled?
212
- split_values(values).each do |value|
213
- next if value.blank?
214
- value = value.strip.encode('utf-8', :invalid => :replace, :undef => :replace, :replace => '_') unless value.blank?
215
- value = unescape_csv(value)
216
- (metadata[field_name] ||= []) << value
217
- end
218
- end
219
- return metadata
220
- end
221
-
222
- def interpret_file_fields raw_data
223
- # This method handles file additions and deletions from the spreadsheet
224
- # if additional files need to be deleted because the update is set to replace
225
- # some or all existing files, those replacement-related deletions are handled
226
- # by the BulkOps::Operation.
227
- #
228
- # TODO: THIS DOES NOT YET MANAGE THE ORDER OF INGESTED FILESETS
229
-
230
- metadata = {}
231
- raw_data.each do |field, value|
232
- next if value.blank? or field.blank?
233
- field = field.to_s
234
- #If our CSV interpreter is feeding us the headers as a line, ignore it.
235
- next if field == value
236
-
237
-
238
- # Check if this is a file field, and whether we are removing or adding a file
239
- next unless (action = is_file_field?(field))
240
-
241
- # Move on if this field is the name of another property (e.g. masterFilename)
242
- next if find_field_name(field)
243
-
244
- # Check if we are removing a file
245
- if action == "remove"
246
- get_removed_filesets(value).each { |fileset_id| delete_file_set(file_set_id) }
247
- else
248
- # Add a file
249
- operation.get_file_paths(value).each do |filepath|
250
- begin
251
- uploaded_file = Hyrax::UploadedFile.create(file: File.open(filepath), user: operation.user)
252
- (metadata[:uploaded_files] ||= []) << uploaded_file.id unless uploaded_file.id.nil?
253
- rescue Exception => e
254
- report_error(:upload_error,
255
- message: "Error opening file: #{ filepath } -- #{e}",
256
- file: File.join(BulkOps::Operation::INGEST_MEDIA_PATH,filename),
257
- row_number: row_number)
258
- end
259
- end
260
- end
261
- end
262
- return metadata
263
- end
264
-
265
- def interpret_option_fields raw_data
266
- raw_data.each do |field,value|
267
- next if value.blank? or field.blank?
268
- field = field.to_s
269
- next if value == field
270
-
271
- normfield = field.downcase.parameterize.gsub(/[_\s-]/,'')
272
- if ["visibility", "public"].include?(normfield)
273
- update(visibility: format_visibility(value))
274
- end
275
- if ["worktype","model","type"].include?(normfield)
276
- update(work_type: format_worktype(value) )
277
- end
278
- if ["referenceidentifier",
279
- "referenceid",
280
- "refid",
281
- "referenceidentifiertype",
282
- "referenceidtype",
283
- "refidtype",
284
- "relationshipidentifier",
285
- "relationshipid",
286
- "relationshipidentifiertype",
287
- "relationshipidtype",
288
- "relid",
289
- "relidtype"].include?(normfield)
290
- update(reference_identifier: format_reference_id(value))
291
- end
292
- end
293
- return {}
294
- end
295
-
296
- def interpret_relationship_fields(raw_data)
297
- metadata = {}
298
- raw_data.each do |field,value|
299
- next if value.blank? or field.blank?
300
- field = field.to_s
301
- value = unescape_csv(value)
302
- identifer_type = reference_identifier
303
-
304
- next if value == field
305
-
306
- if (split = field.split(":")).count == 2
307
- identifier_type = split.last
308
- relationship_type = split.first.to_s
309
- else
310
- relationship_type = field
311
- end
312
-
313
- relationship_type = normalize_relationship_field_name(relationship_type)
314
- case relationship_type
315
- when "order"
316
- # If the field specifies the object's order among siblings
317
- update(order: value.to_f)
318
- next
319
- when "collection"
320
- # If the field specifies the name or ID of a collection,
321
- # find or create the collection and update the metadata to match
322
- col = find_or_create_collection(value)
323
- ( metadata[:member_of_collection_ids] ||= [] ) << col.id if col
324
- next
325
- when "parent", "child"
326
-
327
- # correctly interpret the notation "id:a78C2d81"
328
- identifier_type, object_identifier = interpret_relationship_value(identifier_type, value)
329
-
330
- relationship_parameters = { work_proxy_id: id,
331
- identifier_type: identifier_type,
332
- relationship_type: relationship_type,
333
- object_identifier: object_identifier,
334
- status: "new"}
335
-
336
- #add previous sibling link if necessary
337
- previous_value = operation.final_spreadsheet[row_number-1][field]
338
- # Check if this is a parent relationship, and the previous row also has one
339
- if previous_value.present? && (relationship_type == "parent")
340
- # Check if the previous row has the same parent as this row
341
- if object_identifier == interpret_relationship_value(identifier_type, previous_value, field).last
342
- # If so, set the previous sibling parameter on the relationshp
343
- # to the id for the proxy associated with the previous row
344
- relationship_parameters[:previous_sibling] = operation.work_proxies.find_by(row_number: row_number-1).id
345
- end
346
- end
347
- BulkOps::Relationship.create(relationship_parameters)
348
- end
349
- return metadata
350
- end
351
- end
352
-
353
- def normalize_relationship_field_name field
354
- normfield = field.downcase.parameterize.gsub(/[_\s-]/,'')
355
- RELATIONSHIP_FIELDS.find{|field| normfield.include?(field) }
356
- end
357
-
358
- def find_previous_parent field="parent"
359
- #Return the row number of the most recent preceding row that does
360
- # not itself have a parent defined
361
- i = 0;
362
- while (prev_row = operation.final_spreadsheet[row_number - i])
363
- return (row_number - i) if prev_row[field].blank?
364
- end
365
- end
366
-
367
- def interpret_relationship_value id_type, value, field="parent"
368
- #Handle "id:20kj4259" syntax if it hasn't already been handled
369
- if (split = value.to_s.split(":")).count == 2
370
- id_type = split.first
371
- value = split.last
372
- end
373
- #Handle special shorthand syntax for refering to relative row numbers
374
- if id_type == "row"
375
- if value.to_i < 0
376
- # if given a negative integer, count backwards from the current row
377
- return [id_type,row_number - value]
378
- elsif value.to_s.downcase.include?("prev")
379
- # if given any variation of the word "previous", get the first preceding row with no parent of its own
380
- return [id_type,find_previous_parent(field)]
381
- end
382
- end
383
- return [id_type,value]
384
- end
385
-
386
- def unescape_csv(value)
387
- value.gsub(/\\(['";,])/,'\1')
388
- end
389
-
390
- def format_worktype(value)
391
- # format the value like a class name
392
- type = value.titleize.gsub(/[-_\s]/,'')
393
- # reject it if it isn't a defined class
394
- type = false unless Object.const_defined? type
395
- # fall back to the work type defined by the operation, or a standard "Work"
396
- return type ||= operation.work_type || "Work"
397
- end
398
-
399
- def format_visibility(value)
400
- case value.downcase
401
- when "public", "open", "true"
402
- return "open"
403
- when "campus", "ucsc", "institution"
404
- return "ucsc"
405
- when "restricted", "private", "closed", "false"
406
- return "restricted"
407
- end
408
- end
409
-
410
- def mintLocalAuthUrl(auth_name, value)
411
- value.strip!
412
- id = value.parameterize
413
- auth = Qa::LocalAuthority.find_or_create_by(name: auth_name)
414
- entry = Qa::LocalAuthorityEntry.create(local_authority: auth,
415
- label: value,
416
- uri: id)
417
- return localIdToUrl(id,auth_name)
418
- end
419
-
420
- def findAuthUrl(auth, value)
421
- value.strip!
422
- return nil if auth.nil?
423
- return nil unless (entries = Qa::Authorities::Local.subauthority_for(auth).search(value))
424
- entries.each do |entry|
425
- #require exact match
426
- next unless entry["label"].force_encoding('UTF-8') == value.force_encoding('UTF-8')
427
- url = entry["url"] || entry["id"]
428
- # url = localIdToUrl(url,auth) unless url =~ URI::regexp
429
- return url
430
- end
431
- return nil
432
- end
433
-
434
- def localIdToUrl(id,auth_name)
435
- root_urls = {production: "https://digitalcollections.library.ucsc.edu",
436
- staging: "http://digitalcollections-staging.library.ucsc.edu",
437
- development: "http://#{Socket.gethostname}",
438
- test: "http://#{Socket.gethostname}"}
439
- return "#{root_urls[Rails.env.to_sym]}/authorities/show/local/#{auth_name}/#{id}"
440
- end
441
-
442
- def getLocalAuth(field_name)
443
- field = schema.get_property(field_name)
444
- # There is only ever one local authority per field, so just pick the first you find
445
- if vocs = field.vocabularies
446
- vocs.each do |voc|
447
- return voc["subauthority"] if voc["authority"].downcase == "local"
448
- end
449
- end
450
- return nil
451
- end
452
-
453
- def setAdminSet metadata
454
- return metadata if metadata[:admin_set_id]
455
- asets = AdminSet.where({title: "Bulk Ingest Set"})
456
- asets = AdminSet.find('admin_set/default') if asets.blank?
457
- metadata[:admin_set_id] = Array(asets).first.id unless asets.blank?
458
- return metadata
459
- end
460
-
461
- def setMetadataInheritance metadata
462
- return metadata if metadata[:metadataInheritance].present?
463
- metadata[:metadataInheritance] = operation.options["metadataInheritance"] unless operation.options["metadataInheritance"].blank?
464
- return metadata
465
- end
466
-
467
- def report_error type, message, **args
468
- puts "ERROR MESSAGE: #{message}"
469
- update(status: "error", message: message)
470
- args[:type]=type
471
- (@proxy_errors ||= []) << BulkOps::Error.new(**args)
472
- end
473
-
474
- def filename_prefix
475
- @filename_prefix ||= operation.filename_prefix
476
- end
477
-
478
- def record_exists?
479
- operation.record_exists? work_id
480
- end
481
-
482
- def get_removed_filesets(filestring)
483
- file_ids = split_values(filestring)
484
- file_ids.select{|file_id| record_exists?(file_id)}
485
-
486
- # This part handles filenames in addition to file ids. It doesn't work yet!
487
- # file_ids.map do |file_id|
488
- # If the filename is the id of an existing record, keep that
489
- # next(file_id) if (record_exists?(file_id))
490
- # If this is the label (i.e.filename) of an existing fileset, use that fileset id
491
- # TODO MAKE THIS WORK!!
492
- # next(filename) if (filename_exists?(filename))
493
- # File.join(BulkOps::Operation::INGEST_MEDIA_PATH, filename_prefix, filename)
494
- # end
495
- end
496
-
497
- def delete_file_set fileset_id
498
- BulkOps::DeleteFileSetJob.perform_later(fileset_id, operation.user.email )
499
- end
500
41
 
501
42
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bulk_ops
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.14
4
+ version: 0.1.15
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ned Henry, UCSC Library Digital Initiatives
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-10-02 00:00:00.000000000 Z
11
+ date: 2019-10-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails
@@ -106,6 +106,7 @@ files:
106
106
  - lib/bulk_ops/github_access.rb
107
107
  - lib/bulk_ops/github_credential.rb
108
108
  - lib/bulk_ops/operation.rb
109
+ - lib/bulk_ops/parser.rb
109
110
  - lib/bulk_ops/queue_work_ingests_job.rb
110
111
  - lib/bulk_ops/relationship.rb
111
112
  - lib/bulk_ops/search_builder_behavior.rb