bulk_ops 0.1.21 → 0.1.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/bulk_ops/error.rb +31 -0
- data/lib/bulk_ops/operation.rb +19 -11
- data/lib/bulk_ops/parser.rb +7 -4
- data/lib/bulk_ops/version.rb +1 -1
- data/lib/bulk_ops/work_job.rb +10 -4
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d0b68ae5923dae38908ff5382d52aba0fa514d0cf9e714eefcfb46013ec9ce02
|
4
|
+
data.tar.gz: 175b647ebf14fb4e432861c0185d9bf142d666e4df29129631d064517f55f588
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4209b3080bd3e6b4e5df1ef727e1db943252b0d32169bb080db61e6ed01b4ba93fff56303a38d6e655eda22f82377e06a1d847c7d47df8d664f7af6e5bf49d75
|
7
|
+
data.tar.gz: ac685fa72de08e26d4fe403b57143399602986aa3cf2600c00f2c55a86788bdefa5cfc0dc250d364a8b808ed1f856ea8f84c5e7dd8c88107859d21783b6ca55c
|
data/lib/bulk_ops/error.rb
CHANGED
@@ -137,6 +137,37 @@ class BulkOps::Error
|
|
137
137
|
message += "An example of a missing filename is: #{errors.first.file}\n"
|
138
138
|
end
|
139
139
|
|
140
|
+
when :relationship_error
|
141
|
+
message = "\n-- Errors resolving relationships --\n "
|
142
|
+
message += "There were issues resolving #{errors.count} relationships.\n"
|
143
|
+
if errors.count < max_error
|
144
|
+
message += "errors:\n"
|
145
|
+
message += errors.map{|er| "Row #{er.row_number}, relationship ##{er.object_id}: #{er.message}"}.join("\n")
|
146
|
+
else
|
147
|
+
message += "An example of an error is: Row #{er.first.row_number}, relationship ##{er.first.object_id}: #{er.first.message}\n"
|
148
|
+
end
|
149
|
+
|
150
|
+
when :ingest_failure
|
151
|
+
message = "\n-- Ingested File is Broken or Missing --\n "
|
152
|
+
message += "After the ingest completed, we had issues finding and re-saving the ingested works associated with #{errors.count} rows.\n"
|
153
|
+
if errors.count < max_error
|
154
|
+
message += "Problem rows:\n"
|
155
|
+
message += errors.map{|er| "#{er.row_number} - proxy ##{er.object_id}"}.join("\n")
|
156
|
+
else
|
157
|
+
message += "An example of a failed ingest is row #{errors.first.row_number} with work proxy #{errors.first.object_id} \n"
|
158
|
+
end
|
159
|
+
|
160
|
+
when :id_not_unique
|
161
|
+
message = "\n-- Multiple works shared a supposedly unique identifier, and we don't know which one to edit --\n "
|
162
|
+
if errors.count < max_error
|
163
|
+
message += "Problem rows:\n"
|
164
|
+
message += errors.map{|er| "#{er.row_number} - proxy ##{er.object_id} - #{er.options_name}: #{er.option_values}"}.join("\n")
|
165
|
+
else
|
166
|
+
message += "An example of a row that identifies multiple works is #{errors.first.row_number} with work proxy #{errors.first.object_id} using the identifier: #{er.options_name} - #{er.option_values} \n"
|
167
|
+
end
|
168
|
+
|
169
|
+
else
|
170
|
+
message = "\n-- There were other errors of an unrecognized type. Check the application logs --\n "
|
140
171
|
end
|
141
172
|
return message
|
142
173
|
end
|
data/lib/bulk_ops/operation.rb
CHANGED
@@ -125,18 +125,26 @@ module BulkOps
|
|
125
125
|
update(stage: "finishing")
|
126
126
|
|
127
127
|
# Attempt to resolve each dangling (objectless) relationships
|
128
|
-
|
129
|
-
|
128
|
+
relationships = work_proxies.reduce([]){|all_rels,proxy| all_rels + proxy.relationships.select{|rel| rel.status == "pending"}}
|
129
|
+
relationships.each do |rel|
|
130
|
+
begin
|
131
|
+
rel.resolve!
|
132
|
+
rescue StandardError => e
|
133
|
+
@operation_errors << BulkOps::Error.new(:relationship_error, row_number: proxy.row_number, object_id: relationship.id, message: "#{e.class} - #{e.message}" )
|
134
|
+
end
|
130
135
|
end
|
131
136
|
|
132
|
-
work_proxies.each do |proxy|
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
+
work_proxies.each do |proxy|
|
138
|
+
work = nil
|
139
|
+
begin
|
140
|
+
work = Work.find(proxy.work_id).save
|
141
|
+
rescue StandardError => e
|
142
|
+
@operation_errors << BulkOps::Error.new(:ingest_failure, row_number: proxy.row_number, object_id: proxy.id, message: "#{e.class} - #{e.message}")
|
143
|
+
end
|
137
144
|
end
|
138
145
|
|
139
|
-
|
146
|
+
new_stage = accumulated_errors.blank? ? "complete" : "errors"
|
147
|
+
update(stage: new_stage)
|
140
148
|
report_errors!
|
141
149
|
lift_holds
|
142
150
|
end
|
@@ -284,9 +292,9 @@ module BulkOps
|
|
284
292
|
end
|
285
293
|
|
286
294
|
def busy?
|
287
|
-
return true if work_proxies.
|
288
|
-
return true if work_proxies.
|
289
|
-
return true if work_proxies.
|
295
|
+
return true if work_proxies.any?{|prx| prx.status.downcase == "running"}
|
296
|
+
return true if work_proxies.any?{|prx| prx.status.downcase == "queued"}
|
297
|
+
return true if work_proxies.any?{|prx| prx.status.downcase == "starting"}
|
290
298
|
return false
|
291
299
|
end
|
292
300
|
|
data/lib/bulk_ops/parser.rb
CHANGED
@@ -8,9 +8,9 @@ class BulkOps::Parser
|
|
8
8
|
def self.is_file_set? metadata, row_number
|
9
9
|
return false unless metadata[row_number].present?
|
10
10
|
# If the work type is explicitly specified, use that
|
11
|
-
if (type_key = metadata.keys.find{|key| key.downcase.gsub(/[_\-\s]/,"").include?("worktype") })
|
12
|
-
return true if metadata[type_key].downcase == "fileset"
|
13
|
-
return false if metadata[type_key].present?
|
11
|
+
if (type_key = metadata[row_number].to_h.keys.find{|key| key.downcase.gsub(/[_\-\s]/,"").include?("worktype") })
|
12
|
+
return true if metadata[row_number][type_key].downcase == "fileset"
|
13
|
+
return false if metadata[row_number][type_key].present?
|
14
14
|
end
|
15
15
|
# Otherwise, if there are any valid fields other than relationship or file fields, call it a work
|
16
16
|
metadata[row_number].each do |field, value|
|
@@ -73,8 +73,11 @@ class BulkOps::Parser
|
|
73
73
|
|
74
74
|
def find_work_id_from_unique_metadata field_name, value
|
75
75
|
field_solr_name = schema.get_field(field_name).solr_name
|
76
|
-
query = "_query_:\"{!
|
76
|
+
query = "_query_:\"{!dismax qf=#{field_solr_name}}#{value}\""
|
77
77
|
response = ActiveFedora::SolrService.instance.conn.get(ActiveFedora::SolrService.select_path, params: { fq: query, rows: 1, start: 0})["response"]
|
78
|
+
if response["numFound"] > 1
|
79
|
+
report_error( :id_not_unique , "", row_number: row_number, object_id: @proxy.id, options_name: field_name, option_values: value ) unless label
|
80
|
+
end
|
78
81
|
return response["docs"][0]["id"]
|
79
82
|
end
|
80
83
|
|
data/lib/bulk_ops/version.rb
CHANGED
data/lib/bulk_ops/work_job.rb
CHANGED
@@ -23,7 +23,11 @@ class BulkOps::WorkJob < ActiveJob::Base
|
|
23
23
|
# Delete any UploadedFiles. These take up tons of unnecessary disk space.
|
24
24
|
@work.file_sets.each do |fileset|
|
25
25
|
if uf = Hyrax::UploadedFile.find_by(file: fileset.label)
|
26
|
-
|
26
|
+
begin
|
27
|
+
uf.destroy!
|
28
|
+
rescue StandardError => e
|
29
|
+
Rails.logger.warn("Could not delete uploaded file. #{e.class} - #{e.message}")
|
30
|
+
end
|
27
31
|
end
|
28
32
|
end
|
29
33
|
|
@@ -49,28 +53,30 @@ class BulkOps::WorkJob < ActiveJob::Base
|
|
49
53
|
return
|
50
54
|
end
|
51
55
|
|
52
|
-
return unless define_work(workClass)
|
56
|
+
return unless (work_action = define_work(workClass))
|
53
57
|
|
54
58
|
user = User.find_by_email(user_email)
|
55
59
|
update_status "running", "Started background task at #{DateTime.now.strftime("%d/%m/%Y %H:%M")}"
|
56
60
|
ability = Ability.new(user)
|
57
61
|
env = Hyrax::Actors::Environment.new(@work, ability, attributes)
|
58
|
-
update_status "complete", Hyrax::CurationConcern.actor.send(
|
62
|
+
update_status "complete", Hyrax::CurationConcern.actor.send(work_action,env)
|
59
63
|
end
|
60
64
|
|
61
65
|
private
|
62
66
|
|
63
67
|
|
64
|
-
def define_work
|
68
|
+
def define_work(workClass="Work")
|
65
69
|
if (@work_proxy.present? && @work_proxy.work_id.present? && record_exists?(@work_proxy.work_id))
|
66
70
|
begin
|
67
71
|
@work = ActiveFedora::Base.find(@work_proxy.work_id)
|
72
|
+
return :update
|
68
73
|
rescue ActiveFedora::ObjectNotFoundError
|
69
74
|
report_error "Could not find work to update in Fedora (though it shows up in Solr). Work id: #{@work_proxy.work_id}"
|
70
75
|
return false
|
71
76
|
end
|
72
77
|
else
|
73
78
|
@work = workClass.capitalize.constantize.new
|
79
|
+
return :ingest
|
74
80
|
end
|
75
81
|
end
|
76
82
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bulk_ops
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.22
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ned Henry, UCSC Library Digital Initiatives
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-12-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|