bulk_ops 0.1.9 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/bulk_ops/relationship.rb +14 -5
- data/lib/bulk_ops/verification.rb +4 -8
- data/lib/bulk_ops/version.rb +1 -1
- data/lib/bulk_ops/work_job.rb +1 -1
- data/lib/bulk_ops/work_proxy.rb +11 -5
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5b52469608636bbd2fc39c85bdc717b1c1777b7a5fa468b9e40137217b9ee0fe
|
4
|
+
data.tar.gz: 3c8a65f05674b6d43fa3d7a06bdc7a2a0424d56378fdf762dfde917c9c234575
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7e3386a5f0a2f33edac1eeb47cf6cfb95fbc11cb01d9c35ae7a547004d86c41d7fb390b1f3e1d429af3302e95107793b46522091afbb673faadca12facf79d34
|
7
|
+
data.tar.gz: 332a0abf7e52d7ac298c8b82bb0f79432e20a3432f095f6df9829b6a6827aa1b4286c8ea972484133e0be35d8fae9325a0435211bd0dba7900a0f677782adb35
|
@@ -25,15 +25,24 @@ class BulkOps::Relationship < ActiveRecord::Base
|
|
25
25
|
when "title"
|
26
26
|
# TODO clean up solr query and add work type to it
|
27
27
|
query = "{!field f=title_tesim}#{object_identifier}"
|
28
|
-
objects = ActiveFedora::SolrService.instance.conn.get(ActiveFedora::SolrService.select_path,
|
29
|
-
|
30
|
-
|
31
|
-
|
28
|
+
objects = ActiveFedora::SolrService.instance.conn.get(ActiveFedora::SolrService.select_path,
|
29
|
+
params: { fq: query, rows: 100})["response"]["docs"].first
|
30
|
+
if objects.present?
|
31
|
+
return ActiveFedora::Base.find(objects.first["id"])
|
32
|
+
elsif work_type == "Collection"
|
33
|
+
return Collection.create(title: [object_identifier])
|
34
|
+
else
|
35
|
+
return false
|
36
|
+
end
|
32
37
|
when "identifier"
|
33
38
|
query = "{!field f=identifier_tesim}#{object_identifier}"
|
34
39
|
objects = ActiveFedora::SolrService.instance.conn.get(ActiveFedora::SolrService.select_path,params: { fq: query, rows: 100})["response"]["docs"]
|
35
40
|
return false if objects.blank?
|
36
|
-
return objects.first
|
41
|
+
return ActiveFedora::Base.find(objects.first["id"])
|
42
|
+
when "row"
|
43
|
+
object_proxy = WorkProxy.find_by(operation_id: work_proxy.operation.id,
|
44
|
+
row_number: object_identifier.to_i)
|
45
|
+
ActiveFedora::Base.find(object_proxy.work_id)
|
37
46
|
end
|
38
47
|
end
|
39
48
|
|
@@ -188,18 +188,19 @@ module BulkOps
|
|
188
188
|
def verify_internal_references
|
189
189
|
# TODO
|
190
190
|
# This is sketchy. Redo it.
|
191
|
-
get_spreadsheet.each do |row,row_num|
|
191
|
+
(metadata = get_spreadsheet).each do |row,row_num|
|
192
192
|
ref_id = get_ref_id(row)
|
193
193
|
BulkOps::Operation::RELATIONSHIP_COLUMNS.each do |relationship|
|
194
194
|
next unless (obj_id = row[relationship])
|
195
|
-
if (split = obj_id.split(':')).count == 2
|
195
|
+
if (split = obj_id.split(':')).present? && split.count == 2
|
196
196
|
ref_id = split[0].downcase
|
197
197
|
obj_id = split[1]
|
198
198
|
end
|
199
199
|
|
200
200
|
if ref_id == "row" || (ref_id == "id/row" && obj_id.is_a?(Integer))
|
201
|
+
obj_id = obj_id.to_i
|
201
202
|
# This is a row number reference. It should be an integer in the range of possible row numbers.
|
202
|
-
unless obj_id.
|
203
|
+
unless obj_id.present?(obj_id > 0) && (obj_id <= metadata.count)
|
203
204
|
@verification_errors << BulkOps::Error.new({type: :bad_object_reference, object_id: obj_id, row_number: row_num + ROW_OFFSET})
|
204
205
|
end
|
205
206
|
elsif ref_id == "id" || ref_id == "hyrax id" || (ref_id == "id/row" && (obj_id.is_a? Integer))
|
@@ -207,14 +208,9 @@ module BulkOps
|
|
207
208
|
unless record_exists?(obj_id)
|
208
209
|
@verification_errors << BulkOps::Error.new({type: :bad_object_reference, object_id: obj_id, row_number: row_num+ROW_OFFSET})
|
209
210
|
end
|
210
|
-
else
|
211
|
-
|
212
|
-
# This must be based on some other presumably unique field in hyrax, or a dummy field in the spreadsheet. We haven't added this functionality yet. Ignore for now.
|
213
|
-
|
214
211
|
end
|
215
212
|
end
|
216
213
|
end
|
217
214
|
end
|
218
|
-
|
219
215
|
end
|
220
216
|
end
|
data/lib/bulk_ops/version.rb
CHANGED
data/lib/bulk_ops/work_job.rb
CHANGED
@@ -58,7 +58,7 @@ class BulkOps::WorkJob < ActiveJob::Base
|
|
58
58
|
return
|
59
59
|
end
|
60
60
|
|
61
|
-
return unless define_work
|
61
|
+
return unless define_work(workClass)
|
62
62
|
|
63
63
|
user = User.find_by_email(user_email)
|
64
64
|
update_status "running", "Started background task at #{DateTime.now.strftime("%d/%m/%Y %H:%M")}"
|
data/lib/bulk_ops/work_proxy.rb
CHANGED
@@ -167,7 +167,8 @@ class BulkOps::WorkProxy < ActiveRecord::Base
|
|
167
167
|
# error_message = "cannot fetch remote label for url: #{value}"
|
168
168
|
# report_error( :cannot_retrieve_label , error_message, url: value, row_number: row_number) unless label
|
169
169
|
else
|
170
|
-
# It's a label, so get the id
|
170
|
+
# It's a label, so unescape it and get the id
|
171
|
+
value = unescape_csv(value)
|
171
172
|
id = get_remote_id(value, property: field_name_norm, authority: authority) || localAuthUrl(field_name_norm, value)
|
172
173
|
# label = value
|
173
174
|
report_error(:cannot_retrieve_url,
|
@@ -175,7 +176,7 @@ class BulkOps::WorkProxy < ActiveRecord::Base
|
|
175
176
|
url: value,
|
176
177
|
row_number: row_number) unless id
|
177
178
|
end
|
178
|
-
controlled_data[field_name_norm] << {id: id,
|
179
|
+
controlled_data[field_name_norm] << {id: id, remove: field_name.downcase.starts_with?("remove")}
|
179
180
|
end
|
180
181
|
end
|
181
182
|
|
@@ -210,6 +211,7 @@ class BulkOps::WorkProxy < ActiveRecord::Base
|
|
210
211
|
values.split(SEPARATOR).each do |value|
|
211
212
|
next if value.blank?
|
212
213
|
value = value.strip.encode('utf-8', :invalid => :replace, :undef => :replace, :replace => '_') unless value.blank?
|
214
|
+
value = unescape_csv(value)
|
213
215
|
(metadata[field_name] ||= []) << value
|
214
216
|
end
|
215
217
|
end
|
@@ -295,6 +297,7 @@ class BulkOps::WorkProxy < ActiveRecord::Base
|
|
295
297
|
raw_data.each do |field,value|
|
296
298
|
next if value.blank? or field.blank?
|
297
299
|
field = field.to_s
|
300
|
+
value = unescape_csv(value)
|
298
301
|
|
299
302
|
next if value == field
|
300
303
|
|
@@ -347,6 +350,10 @@ class BulkOps::WorkProxy < ActiveRecord::Base
|
|
347
350
|
return "row"
|
348
351
|
end
|
349
352
|
end
|
353
|
+
|
354
|
+
def unescape_csv(value)
|
355
|
+
value.gsub(/\\(['";,])/,'\1')
|
356
|
+
end
|
350
357
|
|
351
358
|
def format_worktype(value)
|
352
359
|
# format the value like a class name
|
@@ -384,9 +391,8 @@ class BulkOps::WorkProxy < ActiveRecord::Base
|
|
384
391
|
return nil unless (entries = Qa::Authorities::Local.subauthority_for(auth).search(value))
|
385
392
|
entries.each do |entry|
|
386
393
|
#require exact match
|
387
|
-
next unless entry["label"].
|
388
|
-
url = entry["url"]
|
389
|
-
url ||= entry["id"]
|
394
|
+
next unless entry["label"].force_encoding('UTF-8') == value.force_encoding('UTF-8')
|
395
|
+
url = entry["url"] || entry["id"]
|
390
396
|
# url = localIdToUrl(url,auth) unless url =~ URI::regexp
|
391
397
|
return url
|
392
398
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bulk_ops
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ned Henry, UCSC Library Digital Initiatives
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-08-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|