bulk_ops 0.1.23 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3653b9554a93348ce398063d0b5fb98d01e075ac70efaee09b3081f49fa036e1
4
- data.tar.gz: 01eb0bdb7084fb3e0d37b417d82b47772dc5d7734428f47f91168f81724c2139
3
+ metadata.gz: f9a35abfb31034307e62c7b7483b215447b848e1bb57f08d7200ae5293924e96
4
+ data.tar.gz: 77f0dd2a02a8343f945da3cd3f0a19cf9c590556ceff90bb86118032a2ed9192
5
5
  SHA512:
6
- metadata.gz: 4d77355a39703eca5010a42b3bfcb0bcd872468e77b9b15e8f57bc68042dc684c2afe57d296c858cd5d6a68e60668c6bd2a11f4e3125e1d428dd5cd1e5dd1e2e
7
- data.tar.gz: 6ee7e9c2d50231d555a9adde17a6c27b6f52041533e86a9727bb9d9caf2eafb9f8ba18e6cae76f8ef010980d3b09981e9c7e794ed54dd1b8dbab776060e923bc
6
+ metadata.gz: 5e0490d9f81743bbfbf87848654cff4fba774f2c5b218903535e732e42e72908382598741d452a6de16017cb43a9c19b3283c07d2ff5b03008b16afc8163cda0
7
+ data.tar.gz: 9123a66a499e37ef944854f70f49aa7d0de8f73af6461efe1949a7dbe75cead327e3b7fd1edd266965254758bb38d89d70cfd212fc90408c54a14da77f506b38
@@ -0,0 +1,14 @@
1
+ class RemoveRelationshipsAmmendWorkProxy < ActiveRecord::Migration[5.0]
2
+ def change
3
+
4
+ drop_table :bulk_ops_relationships
5
+
6
+ change_table :bulk_ops_work_proxies do |t|
7
+ t.integer :parent_id
8
+ t.integer :previous_sibling_id
9
+ end
10
+
11
+ remove_column :bulk_ops_operations, :operation_type
12
+
13
+ end
14
+ end
@@ -34,8 +34,9 @@ module BulkOps
34
34
  OPTIONS_FILENAME = 'configuration.yml'
35
35
  ROW_OFFSET = 2
36
36
 
37
- dirstring = File.join( File.dirname(__FILE__), 'bulk_ops/**/*.rb')
38
- Dir[dirstring].each do |file|
37
+ dirstring = File.join( File.dirname(__FILE__), 'concerns/*.rb')
38
+ dirstring2 = File.join( File.dirname(__FILE__), 'bulk_ops/**/*.rb')
39
+ ((Dir[dirstring] || []) + Dir[dirstring2]).uniq.each do |file|
39
40
  begin
40
41
  require file
41
42
  rescue Exception => e
@@ -0,0 +1,8 @@
1
+ class BulkOps::ApplyOperationJob < ActiveJob::Base
2
+ queue_as :ingest
3
+
4
+ def perform(op_id)
5
+ BulkOps::Operation.find(op_id).apply
6
+ end
7
+
8
+ end
@@ -12,7 +12,7 @@ class BulkOps::CreateWorkJob < BulkOps::WorkJob
12
12
  end
13
13
 
14
14
  def define_work workClass
15
- if record_exists?(@work_proxy.work_id)
15
+ if BulkOps::SolrService.record_exists?(@work_proxy.work_id)
16
16
  report_error "trying to ingest a work proxy that already has a work attached. Work id: #{@work_proxy.work_id} Proxy id: #{@work_proxy.id}"
17
17
  return false
18
18
  end
@@ -196,7 +196,7 @@ class BulkOps::GithubAccess
196
196
 
197
197
  def create_pull_request message: false
198
198
  begin
199
- message ||= "Apply update #{name} through Hyrax browser interface"
199
+ message ||= "Apply operation #{name} through Hyrax browser interface"
200
200
  pull = client.create_pull_request(repo, "master", name, message)
201
201
  pull["number"]
202
202
  rescue Octokit::UnprocessableEntity
@@ -38,10 +38,6 @@ module BulkOps
38
38
  states
39
39
  end
40
40
 
41
- def type
42
- operation_type
43
- end
44
-
45
41
  def self.schema
46
42
  ScoobySnacks::METADATA_SCHEMA
47
43
  end
@@ -62,45 +58,73 @@ module BulkOps
62
58
  update(stage: new_stage)
63
59
  end
64
60
 
65
- def apply!
66
- status = "#{type}ing"
67
- update({stage: "running", message: "#{type.titleize} initiated by #{user.name || user.email}"})
68
- # @stage = "running"
69
- final_spreadsheet
70
-
71
- # This commented line currently fails because it doesn't pull from the master branch by default
72
- # It's usually already verified, but maybe we should fix this for double-checking
73
- # in the future
74
- # return unless verify
61
+ def destroy_all_works_and_proxies
62
+ work_proxies.each do |proxy|
63
+ if BulkOps::SolrService.record_exists?(proxy.work_id)
64
+ ActiveFedora::Base.find(work_id).destroy
65
+ end
66
+ proxy.destroy
67
+ end
68
+ update(stage: "waiting",
69
+ status: "reverted changes")
70
+
71
+ end
75
72
 
76
- apply_ingest! if ingest?
77
- apply_update! if update?
73
+ def destroy_all_works
74
+ work_proxies.each do |proxy|
75
+ if BulkOps::SolrService.record_exists?(proxy.work_id)
76
+ ActiveFedora::Base.find(work_id).destroy
77
+ end
78
+ proxy.update(status: "destroyed", message: "The work created by this proxy was destroyed by the user")
79
+ end
80
+ update(stage: "waiting",
81
+ status: "reverted changes")
78
82
  end
79
83
 
80
- def apply_ingest!
81
- #Destroy any existing work proxies (which should not exist for an ingest). Create new proxies from finalized spreadsheet only.
82
- work_proxies.each{|proxy| proxy.destroy!}
84
+ def destroy_all_proxies
85
+ work_proxies.each do |proxy|
86
+ proxy.destroy
87
+ end
88
+ update(stage: "waiting",
89
+ status: "reverted changes")
90
+ end
83
91
 
84
- #create a work proxy for each work in the spreadsheet, creating filesets where appropriate
85
- @metadata.each_with_index do |values,row_number|
86
- next if values.to_s.gsub(',','').blank?
92
+ def apply!
93
+ update({stage: "running",
94
+ status: "OK",
95
+ message: "Bulk operation initiated by #{user.name || user.email}"})
96
+ # We should now on the master branch. Make sure the correct spreadsheet version is loaded
97
+ final_spreadsheet
87
98
 
88
- next if BulkOps::Parser.is_file_set? @metadata, row_number
99
+ # In case this operation has run before, gather all work proxies that are completed and exclude them from the application
100
+ complete_proxies = work_proxies.select{|proxy| proxy.status == "complete" && proxy.work_id.present?}
101
+ incomplete_row_numbers = Array(0..@metadata.length-1) - complete_proxies.map(&:row_number)
89
102
 
90
- work_proxies.create(status: "queued",
103
+ # Destroy all proxies corresponding to incomplete rows
104
+ (work_proxies - complete_proxies).each{proxy| proxy.destroy!}
105
+
106
+ # Create a new work proxy for incompplete row
107
+ # All the proxies need to exist before parsing in order to correctly recognize relationships
108
+ incomplete_row_numbers.each do |row_number|
109
+ values = @metadata[row_number]
110
+ next if values.to_s.gsub(',','').blank?
111
+ next if BulkOps::Parser.is_file_set? @metadata, proxy.row_number
112
+ work_proxies.create(status: "new",
91
113
  last_event: DateTime.now,
92
- row_number: row_number,
114
+ work_type: work_type,
115
+ row_number: proxy.row_number,
93
116
  visibility: options['visibility'],
94
117
  message: "created during ingest initiated by #{user.name || user.email}")
118
+
95
119
  end
96
-
97
- # make sure the work proxies we just created are loaded in memory
120
+ # Reload the operation so that it can recognize its new proxies
98
121
  reload
99
- #loop through the work proxies to create a job for each work
100
- @metadata.each_with_index do |values,row_number|
122
+ # Parse each spreadsheet row and create a background job for each proxy we just created
123
+ incomplete_row_numberss.each do |row_number|
124
+ values = @metadata[row_number]
101
125
  proxy = work_proxies.find_by(row_number: row_number)
102
126
  proxy.update(message: "interpreted at #{DateTime.now.strftime("%d/%m/%Y %H:%M")} " + proxy.message)
103
- data = BulkOps::Parser.new(proxy, @metadata).interpret_data(raw_row: values)
127
+ data = BulkOps::Parser.new(proxy, @metadata,options).interpret_data(raw_row: values)
104
128
  next unless proxy.proxy_errors.blank?
105
129
  BulkOps::WorkJob.perform_later(proxy.work_type || "Work",
106
130
  user.email,
@@ -112,13 +136,6 @@ module BulkOps
112
136
  report_errors!
113
137
  end
114
138
 
115
- def delete_all
116
- work_proxies.each do |proxy|
117
- ActiveFedora::Base.find(proxy.work_id).destroy
118
- proxy.update(status: "destroyed", message: "The work created by this proxy was destroyed by the user")
119
- end
120
- end
121
-
122
139
  def check_if_finished
123
140
  return unless stage == "running" && !busy?
124
141
 
@@ -208,7 +225,7 @@ module BulkOps
208
225
 
209
226
  def report_errors!
210
227
  error_file_name = BulkOps::Error.write_errors!(accumulated_errors, git)
211
- notify!(subject: "Errors initializing bulk #{type} in Hycruz", message: "Hycruz encountered some errors while it was setting up your #{type} and preparing to begin. For most types of errors, the individual rows of the spreadsheet with errors will be ignored and the rest will proceed. Please consult the #{type} summary for real time information on the status of the #{type}. Details about these initialization errors can be seen on Github at the following url: https://github.com/#{git.repo}/blob/#{git.name}/#{git.name}/errors/#{error_file_name}") if error_file_name
228
+ notify!(subject: "Errors initializing bulk operation in Hycruz", message: "Hycruz encountered some errors while it was setting up your operation and preparing to begin. For most types of errors, the individual rows of the spreadsheet with errors will be ignored and the rest will proceed. Please consult the operation summary for real time information on the status of the operation. Details about these initialization errors can be seen on Github at the following url: https://github.com/#{git.repo}/blob/#{git.name}/#{git.name}/errors/#{error_file_name}") if error_file_name
212
229
  end
213
230
 
214
231
  def create_pull_request message: false
@@ -222,7 +239,7 @@ module BulkOps
222
239
  update(stage: "pending")
223
240
  end
224
241
 
225
- def create_branch(fields: nil, work_ids: nil, options: nil, operation_type: :ingest)
242
+ def create_branch(fields: nil, work_ids: nil, options: nil)
226
243
  git.create_branch!
227
244
  bulk_ops_dir = Gem::Specification.find_by_name("bulk_ops").gem_dir
228
245
 
@@ -238,13 +255,12 @@ module BulkOps
238
255
  options.each { |option, value| full_options[option] = value }
239
256
 
240
257
  full_options[name] = name
241
- full_options[type] = type
242
258
  full_options[status] = status
243
259
 
244
260
  git.update_options full_options
245
261
  end
246
262
 
247
- create_new_spreadsheet(fields: fields, work_ids: work_ids) if operation_type == :ingest
263
+ create_new_spreadsheet(fields: fields, work_ids: work_ids)
248
264
  end
249
265
 
250
266
  def get_spreadsheet return_headers: false
@@ -298,14 +314,6 @@ module BulkOps
298
314
  return false
299
315
  end
300
316
 
301
- def ingest?
302
- type == "ingest"
303
- end
304
-
305
- def update?
306
- type == "update"
307
- end
308
-
309
317
  def delete_branch
310
318
  git.delete_branch!
311
319
  end
@@ -1,4 +1,3 @@
1
-
2
1
  class BulkOps::Parser
3
2
  require 'uri'
4
3
 
@@ -6,6 +5,27 @@ class BulkOps::Parser
6
5
 
7
6
  delegate :relationships, :operation, :row_number, :work_id, :visibility, :work_type, :reference_identifier, :order, to: :proxy
8
7
 
8
+ include BulkOps::InterpretRelationshipsBehavior
9
+ include BulkOps::InterpretFilesBehavior
10
+ include BulkOps::InterpretScalarBehavior
11
+ include BulkOps::InterpretOptionsBehavior
12
+ include BulkOps::InterpretControlledBehavior
13
+
14
+ def self.unescape_csv(value)
15
+ value.gsub(/\\(['";,])/,'\1')
16
+ end
17
+
18
+ def self.split_values value_string
19
+ # Split values on all un-escaped separator character (escape character is '\')
20
+ # Then replace all escaped separator charactors with un-escaped versions
21
+ value_string.split(/(?<!\\)#{BulkOps::SEPARATOR}/).map{|val| val.gsub("\\#{BulkOps::SEPARATOR}",BulkOps::SEPARATOR).strip}
22
+ end
23
+
24
+ def self.normalize_relationship_field_name field
25
+ normfield = field.to_s.downcase.parameterize.gsub(/[_\s-]/,'')
26
+ BulkOps::RELATIONSHIP_FIELDS.find{|rel_field| normfield == rel_field }
27
+ end
28
+
9
29
  def self.is_file_set? metadata, row_number
10
30
  return false unless metadata[row_number].present?
11
31
  # If the work type is explicitly specified, use that
@@ -23,12 +43,32 @@ class BulkOps::Parser
23
43
  return true
24
44
  end
25
45
 
26
- def initialize prx, metadata_sheet=nil
46
+ def self.get_negating_metadata(work_id, metadata={})
47
+ return false unless BulkOps::SolrService.record_exists?(work_id)
48
+ work = ActiveFedora::Base.find(work_id)
49
+ schema = ScoobySnacks::METADATA_SCHEMA
50
+ schema.all_fields.each do |field|
51
+ field_key = field.controlled? ? "#{field.name}_attributes" : field.name
52
+ metadata[field_key] ||= (field.multiple? ? [] : nil)
53
+ if field.controlled?
54
+ values = Array(work.send(field.name)).map{|value| {id: value.id, _destroy: true} }
55
+ if field.multiple?
56
+ metadata[field_key] += values
57
+ else
58
+ metadata[field_key] = values.first
59
+ end
60
+ end
61
+ end
62
+ return metadata
63
+ end
64
+
65
+ def initialize prx, metadata_sheet=nil, options={}
27
66
  @proxy = prx
28
- @raw_data = (metadata_sheet || proxy.operation.metadata)
67
+ @raw_data = (metadata_sheet || operation.metadata)
29
68
  @raw_row = @raw_data[@proxy.row_number]
30
69
  @metadata = {}
31
70
  @parsing_errors = []
71
+ @options = options || operation.options
32
72
  end
33
73
 
34
74
  def interpret_data raw_row: nil, raw_data: nil, proxy: nil
@@ -42,6 +82,9 @@ class BulkOps::Parser
42
82
  interpret_relationship_fields
43
83
  setMetadataInheritance
44
84
  interpret_option_fields
85
+ if @proxy.work_id.present? && @options['discard_existing_metadata']
86
+ @metadata.deep_merge!(self.class.get_negating_metadata(@proxy.work_id))
87
+ end
45
88
  interpret_file_fields
46
89
  interpret_controlled_fields
47
90
  interpret_scalar_fields
@@ -66,7 +109,7 @@ class BulkOps::Parser
66
109
  end
67
110
 
68
111
  def connect_existing_work
69
- return unless (column_name = operation.options["update_identifier"])
112
+ return unless (column_name = @options["update_identifier"])
70
113
  return unless (key = @raw_row.to_h.keys.find{|key| key.to_s.parameterize.downcase.gsub("_","") == column_name.to_s.parameterize.downcase.gsub("_","")})
71
114
  return unless (value = @raw_row[key]).present?
72
115
  return unless (work_id = find_work_id_from_unique_metadata(key, value))
@@ -83,351 +126,6 @@ class BulkOps::Parser
83
126
  return response["docs"][0]["id"]
84
127
  end
85
128
 
86
- def interpret_controlled_fields
87
-
88
- # The labels array tracks the contents of columns marked as labels,
89
- # which may require special validation
90
- labels = {}
91
-
92
- # This hash is populated with relevant data as we loop through the fields
93
- controlled_data = {}
94
-
95
- @raw_row.each do |field_name, value|
96
- next if value.blank? or field_name.blank?
97
- field_name = field_name.to_s
98
-
99
- #If our CSV interpreter is feeding us the headers as a line, ignore it.
100
- next if field_name == value
101
-
102
- #check if they are using the 'field_name.authority' syntax
103
- authority = nil
104
- if ((split=field_name.split('.')).count == 2)
105
- authority = split.last
106
- field_name = split.first
107
- end
108
-
109
- # get the field name, if this column is a metadata field
110
- field_name_norm = find_field_name(field_name)
111
- field = schema.get_field(field_name_norm)
112
-
113
- # Ignore anything that isn't a controlled field
114
- next unless field.present? && field.controlled?
115
-
116
- # Keep track of label fields
117
- if field_name.downcase.ends_with?("label")
118
- next if operation.options["ignore_labels"]
119
- labels[field_name_norm] ||= []
120
- labels[field_name_norm] += split_values value
121
- next unless operation.options["import_labels"]
122
- end
123
-
124
- remove = field_name.downcase.starts_with?("remove") || field_name.downcase.starts_with?("delete")
125
-
126
- # handle multiple values
127
- value_array = split_values(value)
128
- controlled_data[field_name_norm] ||= [] unless value_array.blank?
129
- value_array.each do |value|
130
- # Decide of we're dealing with a label or url
131
- # It's an ID if it's a URL and the name doesn't end in 'label'
132
- value.strip!
133
- if value =~ /^#{URI::regexp}$/ and !field_name.downcase.ends_with?("label")
134
- value_id = value
135
- # label = WorkIndexer.fetch_remote_label(value)
136
- # error_message = "cannot fetch remote label for url: #{value}"
137
- # report_error( :cannot_retrieve_label , error_message, url: value, row_number: row_number) unless label
138
- else
139
- # It's a label, so unescape it and get the id
140
- value = unescape_csv(value)
141
- value_id = get_remote_id(value, property: field_name_norm, authority: authority) || localAuthUrl(field_name_norm, value)
142
- # label = value
143
- report_error(:cannot_retrieve_url,
144
- message: "cannot find or create url for controlled vocabulary label: #{value}",
145
- url: value,
146
- row_number: row_number) unless value_id
147
- end
148
- controlled_data[field_name_norm] << {id: value_id, remove: field_name.downcase.starts_with?("remove")}
149
- end
150
- end
151
-
152
- # Actually add all the data
153
- controlled_data.each do |property_name, data|
154
- @metadata["#{property_name}_attributes"] ||= [] unless data.blank?
155
- data.uniq.each do |datum|
156
- atts = {"id" => datum[:id]}
157
- atts["_delete"] = true if datum[:remove]
158
- @metadata["#{property_name}_attributes"] << atts
159
- end
160
- end
161
- end
162
-
163
- def interpret_scalar_fields
164
- @raw_row.each do |field, values|
165
- next if values.blank? or field.nil? or field == values
166
- # get the field name, if this column is a metadata field
167
- next unless field_name = find_field_name(field.to_s)
168
- field = schema.get_field(field_name)
169
- # Ignore controlled fields
170
- next if field.controlled?
171
- split_values(values).each do |value|
172
- next if value.blank?
173
- value = value.strip.encode('utf-8', :invalid => :replace, :undef => :replace, :replace => '_') unless value.blank?
174
- value = unescape_csv(value)
175
- (@metadata[field_name] ||= []) << value
176
- end
177
- end
178
- end
179
-
180
- def interpret_file_fields
181
- # This method handles file additions and deletions from the spreadsheet
182
- # if additional files need to be deleted because the update is set to replace
183
- # some or all existing files, those replacement-related deletions are handled
184
- # by the BulkOps::Operation.
185
- #
186
-
187
- @raw_row.each do |field, value|
188
- next if value.blank? or field.blank?
189
- field = field.to_s
190
- #If our CSV interpreter is feeding us the headers as a line, ignore it.
191
- next if field == value
192
-
193
- # Check if this is a file field, and whether we are removing or adding a file
194
- next unless (action = BulkOps::Verification.is_file_field?(field))
195
-
196
- # Move on if this field is the name of another property (e.g. masterFilename)
197
- next if find_field_name(field)
198
-
199
- # Check if we are removing a file
200
- if action == "remove"
201
- get_removed_filesets(value).each { |fileset_id| delete_file_set(file_set_id) }
202
- else
203
- # Add a file
204
- operation.get_file_paths(value).each do |filepath|
205
- begin
206
- uploaded_file = Hyrax::UploadedFile.create(file: File.open(filepath), user: operation.user)
207
- (@metadata[:uploaded_files] ||= []) << uploaded_file.id unless uploaded_file.id.nil?
208
- rescue Exception => e
209
- report_error(:upload_error,
210
- message: "Error opening file: #{ filepath } -- #{e}",
211
- file: File.join(BulkOps::INGEST_MEDIA_PATH,filename),
212
- row_number: row_number)
213
- end
214
- end
215
- end
216
-
217
- # Check if any of the upcoming rows are child filesets
218
- i = 1
219
- while self.class.is_file_set?(@metadata,row_number+i)
220
- child_row.each do |field,value|
221
- next if value.blank?
222
- title = value if ["title","label"].include?(field.downcase.strip)
223
- if BulkOps::Verification.is_file_field?(field)
224
- operation.get_file_paths(value).each do |filepath|
225
- uploaded_file = Hyrax::UploadedFile.create(file: File.open(filepath), user: operation.user)
226
- end
227
- end
228
- end
229
- i+=1
230
- end
231
-
232
- end
233
- end
234
-
235
- def interpret_option_fields
236
- @raw_row.each do |field,value|
237
- next if value.blank? or field.blank?
238
- field = field.to_s
239
- next if value == field
240
-
241
- normfield = field.downcase.parameterize.gsub(/[_\s-]/,'')
242
- if ["visibility", "public"].include?(normfield)
243
- @proxy.update(visibility: format_visibility(value))
244
-
245
- end
246
- if ["worktype","model","type"].include?(normfield)
247
- @proxy.update(work_type: format_worktype(value) )
248
- end
249
- if ["referenceidentifier",
250
- "referenceid",
251
- "refid",
252
- "referenceidentifiertype",
253
- "referenceidtype",
254
- "refidtype",
255
- "relationshipidentifier",
256
- "relationshipid",
257
- "relationshipidentifiertype",
258
- "relationshipidtype",
259
- "relid",
260
- "relidtype"].include?(normfield)
261
- @proxy.update(reference_identifier: format_reference_id(value))
262
- end
263
- end
264
- end
265
-
266
- def interpret_relationship_fields
267
- @raw_row.each do |field,value|
268
- next if value.blank? or field.blank?
269
- field = field.to_s
270
- value = unescape_csv(value)
271
- identifer_type = reference_identifier
272
-
273
- next if value == field
274
-
275
- # Correctly interpret the notation "parent:id", "parent id" etc in a column header
276
- if (split = field.split(/[:_\-\s]/)).count == 2
277
- identifier_type = split.last
278
- relationship_type = split.first.to_s
279
- else
280
- relationship_type = field
281
- end
282
-
283
- relationship_type = self.class.normalize_relationship_field_name(relationship_type)
284
- case relationship_type
285
- when "order"
286
- # If the field specifies the object's order among siblings
287
- @proxy.update(order: value.to_f)
288
- next
289
- when "collection"
290
- # If the field specifies the name or ID of a collection,
291
- # find or create the collection and update the metadata to match
292
- col = find_or_create_collection(value)
293
- ( @metadata[:member_of_collection_ids] ||= [] ) << col.id if col
294
- next
295
- when "parent", "child"
296
-
297
- # correctly interpret the notation "id:a78C2d81"
298
- identifier_type, object_identifier = interpret_relationship_value(identifier_type, value)
299
-
300
- relationship_parameters = { work_proxy_id: @proxy.id,
301
- identifier_type: identifier_type,
302
- relationship_type: relationship_type,
303
- object_identifier: object_identifier,
304
- status: "new"}
305
-
306
- #add previous sibling link if necessary
307
- previous_value = @raw_data[row_number-1][field]
308
- # Check if this is a parent relationship, and the previous row also has one
309
- if previous_value.present? && (relationship_type == "parent")
310
- # Check if the previous row has the same parent as this row
311
- if object_identifier == interpret_relationship_value(identifier_type, previous_value, field).last
312
- # If so, set the previous sibling parameter on the relationshp
313
- # to the id for the proxy associated with the previous row
314
- relationship_parameters[:previous_sibling] = operation.work_proxies.find_by(row_number: row_number-1).id
315
- end
316
- end
317
- BulkOps::Relationship.create(relationship_parameters)
318
- end
319
- end
320
- end
321
-
322
- def self.normalize_relationship_field_name field
323
- normfield = field.downcase.parameterize.gsub(/[_\s-]/,'')
324
- BulkOps::RELATIONSHIP_FIELDS.find{|rel_field| normfield == rel_field }
325
- end
326
-
327
- def find_previous_parent field="parent"
328
- #Return the row number of the most recent preceding row that does
329
- # not itself have a parent defined
330
- i = 1;
331
- while (prev_row = raw_data[row_number - i])
332
- return (row_number - i) if prev_row[field].blank?
333
- i += 1
334
- end
335
- end
336
-
337
- def interpret_relationship_value id_type, value, field="parent"
338
- #Handle "id:20kj4259" syntax if it hasn't already been handled
339
- if (split = value.to_s.split(":")).count == 2
340
- id_type, value = split.first
341
- value = split.last
342
- end
343
- #Handle special shorthand syntax for refering to relative row numbers
344
- if id_type == "row"
345
- #if the value is an integer
346
- if value =~ /\A[-+]?[0-9]+\z/
347
- if value.to_i < 0
348
- # if given a negative integer, count backwards from the current row (remember that value.to_i is negative)
349
- return [id_type,row_number + value.to_i]
350
- elsif value.to_i > 0
351
- # if given a positive integer, remove the row offset
352
- value = (value.to_i - BulkOps::ROW_OFFSET).to_s
353
- end
354
- elsif value.to_s.downcase.include?("prev")
355
- # if given any variation of the word "previous", get the first preceding row with no parent of its own
356
- return [id_type,find_previous_parent(field)]
357
- end
358
- end
359
- return [id_type,value]
360
- end
361
-
362
- def unescape_csv(value)
363
- value.gsub(/\\(['";,])/,'\1')
364
- end
365
-
366
-
367
- def format_worktype(value)
368
- # format the value like a class name
369
- type = value.titleize.gsub(/[-_\s]/,'')
370
- # reject it if it isn't a defined class
371
- type = false unless Object.const_defined? type
372
- # fall back to the work type defined by the operation, or a standard "Work"
373
- return type ||= work_type || operation.work_type || "Work"
374
- end
375
-
376
- def format_visibility(value)
377
- case value.downcase
378
- when "public", "open", "true"
379
- return "open"
380
- when "campus", "ucsc", "institution"
381
- return "ucsc"
382
- when "restricted", "private", "closed", "false"
383
- return "restricted"
384
- end
385
- end
386
-
387
-
388
- def mintLocalAuthUrl(auth_name, value)
389
- value.strip!
390
- id = value.parameterize
391
- auth = Qa::LocalAuthority.find_or_create_by(name: auth_name)
392
- entry = Qa::LocalAuthorityEntry.create(local_authority: auth,
393
- label: value,
394
- uri: id)
395
- return localIdToUrl(id,auth_name)
396
- end
397
-
398
- def findAuthUrl(auth, value)
399
- value.strip!
400
- return nil if auth.nil?
401
- return nil unless (entries = Qa::Authorities::Local.subauthority_for(auth).search(value))
402
- entries.each do |entry|
403
- #require exact match
404
- next unless entry["label"].force_encoding('UTF-8') == value.force_encoding('UTF-8')
405
- url = entry["url"] || entry["id"]
406
- # url = localIdToUrl(url,auth) unless url =~ URI::regexp
407
- return url
408
- end
409
- return nil
410
- end
411
-
412
- def localIdToUrl(id,auth_name)
413
- root_urls = {production: "https://digitalcollections.library.ucsc.edu",
414
- staging: "http://digitalcollections-staging.library.ucsc.edu",
415
- development: "http://#{Socket.gethostname}",
416
- test: "http://#{Socket.gethostname}"}
417
- return "#{root_urls[Rails.env.to_sym]}/authorities/show/local/#{auth_name}/#{id}"
418
- end
419
-
420
- def getLocalAuth(field_name)
421
- field = schema.get_property(field_name)
422
- # There is only ever one local authority per field, so just pick the first you find
423
- if vocs = field.vocabularies
424
- vocs.each do |voc|
425
- return voc["subauthority"] if voc["authority"].downcase == "local"
426
- end
427
- end
428
- return nil
429
- end
430
-
431
129
  def setAdminSet
432
130
  return if @metadata[:admin_set_id]
433
131
  asets = AdminSet.where({title: "Bulk Ingest Set"})
@@ -437,7 +135,7 @@ class BulkOps::Parser
437
135
 
438
136
  def setMetadataInheritance
439
137
  return if @metadata[:metadataInheritance].present?
440
- @metadata[:metadataInheritance] = operation.options["metadataInheritance"] unless operation.options["metadataInheritance"].blank?
138
+ @metadata[:metadataInheritance] = @options["metadataInheritance"] unless @options["metadataInheritance"].blank?
441
139
  end
442
140
 
443
141
  def report_error type, message, **args
@@ -447,75 +145,13 @@ class BulkOps::Parser
447
145
  (@parsing_errors ||= []) << BulkOps::Error.new(**args)
448
146
  end
449
147
 
450
- def get_removed_filesets(filestring)
451
- file_ids = split_values(filestring)
452
- file_ids.select{|file_id| record_exists?(file_id)}
453
-
454
- # This part handles filenames in addition to file ids. It doesn't work yet!
455
- # file_ids.map do |file_id|
456
- # If the filename is the id of an existing record, keep that
457
- # next(file_id) if (record_exists?(file_id))
458
- # If this is the label (i.e.filename) of an existing fileset, use that fileset id
459
- # TODO MAKE THIS WORK!!
460
- # next(filename) if (filename_exists?(filename))
461
- # File.join(BulkOps::INGEST_MEDIA_PATH, filename_prefix, filename)
462
- # end
463
- end
464
-
465
- def delete_file_set fileset_id
466
- BulkOps::DeleteFileSetJob.perform_later(fileset_id, operation.user.email )
467
- end
468
-
469
- def record_exists? id
470
- operation.record_exists? id
471
- end
472
-
473
- def localAuthUrl(property, value)
474
- return value if (auth = getLocalAuth(property)).nil?
475
- url = findAuthUrl(auth, value) || mintLocalAuthUrl(auth,value)
476
- return url
477
- end
478
-
479
- def find_collection(collection)
480
- cols = Collection.where(id: collection)
481
- cols += Collection.where(title: collection).select{|col| col.title.first == collection}
482
- return cols.last unless cols.empty?
483
- return false
484
- end
485
-
486
- def find_or_create_collection(collection)
487
- col = find_collection(collection)
488
- return col if col
489
- return false if collection.to_i > 0
490
- col = Collection.create(title: [collection.to_s], depositor: operation.user.email, collection_type: Hyrax::CollectionType.find_by(title:"User Collection"))
491
- end
492
-
493
- def get_remote_id(value, authority: nil, property: nil)
494
- return false
495
- #TODO retrieve URL for this value from the specified remote authr
496
- end
497
-
498
- def format_param_name(name)
499
- name.titleize.gsub(/\s+/, "").camelcase(:lower)
500
- end
501
-
502
- def schema
503
- ScoobySnacks::METADATA_SCHEMA
504
- end
505
-
506
148
  def find_field_name(field)
507
149
  operation.find_field_name(field)
508
150
  end
509
151
 
510
- def downcase_first_letter(str)
511
- return "" unless str
512
- str[0].downcase + str[1..-1]
152
+ def schema
153
+ ScoobySnacks::METADATA_SCHEMA
513
154
  end
514
155
 
515
- def split_values value_string
516
- # Split values on all un-escaped separator character (escape character is '\')
517
- # Then replace all escaped separator charactors with un-escaped versions
518
- value_string.split(/(?<!\\)#{BulkOps::SEPARATOR}/).map{|val| val.gsub("\\#{BulkOps::SEPARATOR}",BulkOps::SEPARATOR).strip}
519
- end
520
156
 
521
157
  end