bulk_ops 0.1.21 → 0.1.22
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/bulk_ops/error.rb +31 -0
- data/lib/bulk_ops/operation.rb +19 -11
- data/lib/bulk_ops/parser.rb +7 -4
- data/lib/bulk_ops/version.rb +1 -1
- data/lib/bulk_ops/work_job.rb +10 -4
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d0b68ae5923dae38908ff5382d52aba0fa514d0cf9e714eefcfb46013ec9ce02
|
4
|
+
data.tar.gz: 175b647ebf14fb4e432861c0185d9bf142d666e4df29129631d064517f55f588
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4209b3080bd3e6b4e5df1ef727e1db943252b0d32169bb080db61e6ed01b4ba93fff56303a38d6e655eda22f82377e06a1d847c7d47df8d664f7af6e5bf49d75
|
7
|
+
data.tar.gz: ac685fa72de08e26d4fe403b57143399602986aa3cf2600c00f2c55a86788bdefa5cfc0dc250d364a8b808ed1f856ea8f84c5e7dd8c88107859d21783b6ca55c
|
data/lib/bulk_ops/error.rb
CHANGED
@@ -137,6 +137,37 @@ class BulkOps::Error
|
|
137
137
|
message += "An example of a missing filename is: #{errors.first.file}\n"
|
138
138
|
end
|
139
139
|
|
140
|
+
when :relationship_error
|
141
|
+
message = "\n-- Errors resolving relationships --\n "
|
142
|
+
message += "There were issues resolving #{errors.count} relationships.\n"
|
143
|
+
if errors.count < max_error
|
144
|
+
message += "errors:\n"
|
145
|
+
message += errors.map{|er| "Row #{er.row_number}, relationship ##{er.object_id}: #{er.message}"}.join("\n")
|
146
|
+
else
|
147
|
+
message += "An example of an error is: Row #{er.first.row_number}, relationship ##{er.first.object_id}: #{er.first.message}\n"
|
148
|
+
end
|
149
|
+
|
150
|
+
when :ingest_failure
|
151
|
+
message = "\n-- Ingested File is Broken or Missing --\n "
|
152
|
+
message += "After the ingest completed, we had issues finding and re-saving the ingested works associated with #{errors.count} rows.\n"
|
153
|
+
if errors.count < max_error
|
154
|
+
message += "Problem rows:\n"
|
155
|
+
message += errors.map{|er| "#{er.row_number} - proxy ##{er.object_id}"}.join("\n")
|
156
|
+
else
|
157
|
+
message += "An example of a failed ingest is row #{errors.first.row_number} with work proxy #{errors.first.object_id} \n"
|
158
|
+
end
|
159
|
+
|
160
|
+
when :id_not_unique
|
161
|
+
message = "\n-- Multiple works shared a supposedly unique identifier, and we don't know which one to edit --\n "
|
162
|
+
if errors.count < max_error
|
163
|
+
message += "Problem rows:\n"
|
164
|
+
message += errors.map{|er| "#{er.row_number} - proxy ##{er.object_id} - #{er.options_name}: #{er.option_values}"}.join("\n")
|
165
|
+
else
|
166
|
+
message += "An example of a row that identifies multiple works is #{errors.first.row_number} with work proxy #{errors.first.object_id} using the identifier: #{er.options_name} - #{er.option_values} \n"
|
167
|
+
end
|
168
|
+
|
169
|
+
else
|
170
|
+
message = "\n-- There were other errors of an unrecognized type. Check the application logs --\n "
|
140
171
|
end
|
141
172
|
return message
|
142
173
|
end
|
data/lib/bulk_ops/operation.rb
CHANGED
@@ -125,18 +125,26 @@ module BulkOps
|
|
125
125
|
update(stage: "finishing")
|
126
126
|
|
127
127
|
# Attempt to resolve each dangling (objectless) relationships
|
128
|
-
|
129
|
-
|
128
|
+
relationships = work_proxies.reduce([]){|all_rels,proxy| all_rels + proxy.relationships.select{|rel| rel.status == "pending"}}
|
129
|
+
relationships.each do |rel|
|
130
|
+
begin
|
131
|
+
rel.resolve!
|
132
|
+
rescue StandardError => e
|
133
|
+
@operation_errors << BulkOps::Error.new(:relationship_error, row_number: proxy.row_number, object_id: relationship.id, message: "#{e.class} - #{e.message}" )
|
134
|
+
end
|
130
135
|
end
|
131
136
|
|
132
|
-
work_proxies.each do |proxy|
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
+
work_proxies.each do |proxy|
|
138
|
+
work = nil
|
139
|
+
begin
|
140
|
+
work = Work.find(proxy.work_id).save
|
141
|
+
rescue StandardError => e
|
142
|
+
@operation_errors << BulkOps::Error.new(:ingest_failure, row_number: proxy.row_number, object_id: proxy.id, message: "#{e.class} - #{e.message}")
|
143
|
+
end
|
137
144
|
end
|
138
145
|
|
139
|
-
|
146
|
+
new_stage = accumulated_errors.blank? ? "complete" : "errors"
|
147
|
+
update(stage: new_stage)
|
140
148
|
report_errors!
|
141
149
|
lift_holds
|
142
150
|
end
|
@@ -284,9 +292,9 @@ module BulkOps
|
|
284
292
|
end
|
285
293
|
|
286
294
|
def busy?
|
287
|
-
return true if work_proxies.
|
288
|
-
return true if work_proxies.
|
289
|
-
return true if work_proxies.
|
295
|
+
return true if work_proxies.any?{|prx| prx.status.downcase == "running"}
|
296
|
+
return true if work_proxies.any?{|prx| prx.status.downcase == "queued"}
|
297
|
+
return true if work_proxies.any?{|prx| prx.status.downcase == "starting"}
|
290
298
|
return false
|
291
299
|
end
|
292
300
|
|
data/lib/bulk_ops/parser.rb
CHANGED
@@ -8,9 +8,9 @@ class BulkOps::Parser
|
|
8
8
|
def self.is_file_set? metadata, row_number
|
9
9
|
return false unless metadata[row_number].present?
|
10
10
|
# If the work type is explicitly specified, use that
|
11
|
-
if (type_key = metadata.keys.find{|key| key.downcase.gsub(/[_\-\s]/,"").include?("worktype") })
|
12
|
-
return true if metadata[type_key].downcase == "fileset"
|
13
|
-
return false if metadata[type_key].present?
|
11
|
+
if (type_key = metadata[row_number].to_h.keys.find{|key| key.downcase.gsub(/[_\-\s]/,"").include?("worktype") })
|
12
|
+
return true if metadata[row_number][type_key].downcase == "fileset"
|
13
|
+
return false if metadata[row_number][type_key].present?
|
14
14
|
end
|
15
15
|
# Otherwise, if there are any valid fields other than relationship or file fields, call it a work
|
16
16
|
metadata[row_number].each do |field, value|
|
@@ -73,8 +73,11 @@ class BulkOps::Parser
|
|
73
73
|
|
74
74
|
def find_work_id_from_unique_metadata field_name, value
|
75
75
|
field_solr_name = schema.get_field(field_name).solr_name
|
76
|
-
query = "_query_:\"{!
|
76
|
+
query = "_query_:\"{!dismax qf=#{field_solr_name}}#{value}\""
|
77
77
|
response = ActiveFedora::SolrService.instance.conn.get(ActiveFedora::SolrService.select_path, params: { fq: query, rows: 1, start: 0})["response"]
|
78
|
+
if response["numFound"] > 1
|
79
|
+
report_error( :id_not_unique , "", row_number: row_number, object_id: @proxy.id, options_name: field_name, option_values: value ) unless label
|
80
|
+
end
|
78
81
|
return response["docs"][0]["id"]
|
79
82
|
end
|
80
83
|
|
data/lib/bulk_ops/version.rb
CHANGED
data/lib/bulk_ops/work_job.rb
CHANGED
@@ -23,7 +23,11 @@ class BulkOps::WorkJob < ActiveJob::Base
|
|
23
23
|
# Delete any UploadedFiles. These take up tons of unnecessary disk space.
|
24
24
|
@work.file_sets.each do |fileset|
|
25
25
|
if uf = Hyrax::UploadedFile.find_by(file: fileset.label)
|
26
|
-
|
26
|
+
begin
|
27
|
+
uf.destroy!
|
28
|
+
rescue StandardError => e
|
29
|
+
Rails.logger.warn("Could not delete uploaded file. #{e.class} - #{e.message}")
|
30
|
+
end
|
27
31
|
end
|
28
32
|
end
|
29
33
|
|
@@ -49,28 +53,30 @@ class BulkOps::WorkJob < ActiveJob::Base
|
|
49
53
|
return
|
50
54
|
end
|
51
55
|
|
52
|
-
return unless define_work(workClass)
|
56
|
+
return unless (work_action = define_work(workClass))
|
53
57
|
|
54
58
|
user = User.find_by_email(user_email)
|
55
59
|
update_status "running", "Started background task at #{DateTime.now.strftime("%d/%m/%Y %H:%M")}"
|
56
60
|
ability = Ability.new(user)
|
57
61
|
env = Hyrax::Actors::Environment.new(@work, ability, attributes)
|
58
|
-
update_status "complete", Hyrax::CurationConcern.actor.send(
|
62
|
+
update_status "complete", Hyrax::CurationConcern.actor.send(work_action,env)
|
59
63
|
end
|
60
64
|
|
61
65
|
private
|
62
66
|
|
63
67
|
|
64
|
-
def define_work
|
68
|
+
def define_work(workClass="Work")
|
65
69
|
if (@work_proxy.present? && @work_proxy.work_id.present? && record_exists?(@work_proxy.work_id))
|
66
70
|
begin
|
67
71
|
@work = ActiveFedora::Base.find(@work_proxy.work_id)
|
72
|
+
return :update
|
68
73
|
rescue ActiveFedora::ObjectNotFoundError
|
69
74
|
report_error "Could not find work to update in Fedora (though it shows up in Solr). Work id: #{@work_proxy.work_id}"
|
70
75
|
return false
|
71
76
|
end
|
72
77
|
else
|
73
78
|
@work = workClass.capitalize.constantize.new
|
79
|
+
return :ingest
|
74
80
|
end
|
75
81
|
end
|
76
82
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bulk_ops
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.22
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ned Henry, UCSC Library Digital Initiatives
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-12-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|