pushmi_pullyu 2.0.4 → 2.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/Gemfile.lock +28 -27
- data/README.md +5 -2
- data/examples/pushmi_pullyu.yml +3 -0
- data/lib/pushmi_pullyu/aip/downloader.rb +16 -8
- data/lib/pushmi_pullyu/cli.rb +32 -8
- data/lib/pushmi_pullyu/preservation_queue.rb +20 -3
- data/lib/pushmi_pullyu/version.rb +1 -1
- data/lib/pushmi_pullyu.rb +3 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 25051393a56976db185a30c9cd498afd7997854d2a6931f3b6190132cec76a97
|
4
|
+
data.tar.gz: 731b8dbcf8a3d9124f44d57001bda6952bb2da07df874a1f97f52d7e7176c065
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 82b083e9b991fbc0b7c95507415d312bc29b29a4f16fdcfe8689efc2dc4c69542a4c15010ce9b5841cebebe73920de7499512bf1eaab57bb379816bd1dba75ae
|
7
|
+
data.tar.gz: 2439db5f7058ddc7b042894538a592d076c467b34c7211c262d15e3a2d6d436f8a3d785f6b53c3bd68e93566832fed18bbde6b32efa4f0f4e8e08eceacd3ed57
|
data/CHANGELOG.md
CHANGED
@@ -8,6 +8,13 @@ and releases in PushmiPullyu adheres to [Semantic Versioning](https://semver.org
|
|
8
8
|
|
9
9
|
## [Unreleased]
|
10
10
|
|
11
|
+
## [2.0.5] - 2023-02-17
|
12
|
+
|
13
|
+
- Add rescue block to catch exceptions while waiting for next item [#280](https://github.com/ualbertalib/pushmi_pullyu/issues/280)
|
14
|
+
- Add logic to fetch new community and collection information from jupiter and create their AIPS. [#255](https://github.com/ualbertalib/pushmi_pullyu/issues/255)
|
15
|
+
- Add delay to re-ingestion attempts to allow for problems to be fixed [#297](https://github.com/ualbertalib/pushmi_pullyu/issues/297)
|
16
|
+
- Bump git from 1.9.1 to 1.13.0
|
17
|
+
|
11
18
|
## [2.0.4] - 2022-11-22
|
12
19
|
|
13
20
|
- Fix issue with temporary work files not being deleted after a failed swift deposit [#242](https://github.com/ualbertalib/pushmi_pullyu/issues/242)
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
pushmi_pullyu (2.0.
|
4
|
+
pushmi_pullyu (2.0.5)
|
5
5
|
activesupport (>= 5, < 8)
|
6
6
|
bagit (~> 0.4)
|
7
7
|
connection_pool (~> 2.2)
|
@@ -18,7 +18,7 @@ PATH
|
|
18
18
|
GEM
|
19
19
|
remote: https://rubygems.org/
|
20
20
|
specs:
|
21
|
-
activesupport (7.0.2
|
21
|
+
activesupport (7.0.4.2)
|
22
22
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
23
23
|
i18n (>= 1.6, < 2)
|
24
24
|
minitest (>= 5.1)
|
@@ -39,8 +39,8 @@ GEM
|
|
39
39
|
open4 (~> 1.3)
|
40
40
|
coderay (1.1.3)
|
41
41
|
colored2 (3.1.2)
|
42
|
-
concurrent-ruby (1.
|
43
|
-
connection_pool (2.
|
42
|
+
concurrent-ruby (1.2.0)
|
43
|
+
connection_pool (2.3.0)
|
44
44
|
cork (0.3.0)
|
45
45
|
colored2 (~> 3.1)
|
46
46
|
crack (0.4.5)
|
@@ -91,22 +91,23 @@ GEM
|
|
91
91
|
faraday-net_http_persistent (1.2.0)
|
92
92
|
faraday-patron (1.0.0)
|
93
93
|
faraday-rack (1.0.0)
|
94
|
-
git (1.
|
94
|
+
git (1.13.0)
|
95
|
+
addressable (~> 2.8)
|
95
96
|
rchardet (~> 1.8)
|
96
97
|
hashdiff (1.0.1)
|
97
98
|
htmlentities (4.3.4)
|
98
99
|
http-accept (1.7.0)
|
99
|
-
http-cookie (1.0.
|
100
|
+
http-cookie (1.0.5)
|
100
101
|
domain_name (~> 0.5)
|
101
|
-
i18n (1.
|
102
|
+
i18n (1.12.0)
|
102
103
|
concurrent-ruby (~> 1.0)
|
103
|
-
json (2.6.
|
104
|
+
json (2.6.3)
|
104
105
|
kramdown (2.3.1)
|
105
106
|
rexml
|
106
107
|
kramdown-parser-gfm (1.1.0)
|
107
108
|
kramdown (~> 2.0)
|
108
109
|
link_header (0.0.8)
|
109
|
-
logger (1.5.
|
110
|
+
logger (1.5.3)
|
110
111
|
macaddr (1.7.2)
|
111
112
|
systemu (~> 2.6.5)
|
112
113
|
matrix (0.4.2)
|
@@ -115,7 +116,7 @@ GEM
|
|
115
116
|
mime-types-data (~> 3.2015)
|
116
117
|
mime-types-data (3.2022.0105)
|
117
118
|
minitar (0.9)
|
118
|
-
minitest (5.
|
119
|
+
minitest (5.17.0)
|
119
120
|
multipart-post (2.1.1)
|
120
121
|
nap (1.1.0)
|
121
122
|
net-http-persistent (4.0.1)
|
@@ -141,9 +142,9 @@ GEM
|
|
141
142
|
rainbow (3.1.1)
|
142
143
|
rake (13.0.6)
|
143
144
|
rchardet (1.8.0)
|
144
|
-
rdf (3.2.
|
145
|
+
rdf (3.2.9)
|
145
146
|
link_header (~> 0.0, >= 0.0.8)
|
146
|
-
rdf-aggregate-repo (3.2.
|
147
|
+
rdf-aggregate-repo (3.2.1)
|
147
148
|
rdf (~> 3.2)
|
148
149
|
rdf-n3 (3.2.1)
|
149
150
|
ebnf (~> 2.2)
|
@@ -153,7 +154,7 @@ GEM
|
|
153
154
|
rdf-xsd (3.2.1)
|
154
155
|
rdf (~> 3.2)
|
155
156
|
rexml (~> 3.2)
|
156
|
-
redis (4.
|
157
|
+
redis (4.8.1)
|
157
158
|
regexp_parser (2.6.0)
|
158
159
|
rest-client (2.1.0)
|
159
160
|
http-accept (>= 1.7.0, < 2.0)
|
@@ -161,7 +162,7 @@ GEM
|
|
161
162
|
mime-types (>= 1.16, < 4.0)
|
162
163
|
netrc (~> 0.8)
|
163
164
|
rexml (3.2.5)
|
164
|
-
rollbar (3.
|
165
|
+
rollbar (3.4.0)
|
165
166
|
rspec (3.12.0)
|
166
167
|
rspec-core (~> 3.12.0)
|
167
168
|
rspec-expectations (~> 3.12.0)
|
@@ -194,32 +195,32 @@ GEM
|
|
194
195
|
addressable (>= 2.3.5)
|
195
196
|
faraday (> 0.8, < 2.0)
|
196
197
|
scanf (1.0.0)
|
197
|
-
sparql (3.2.
|
198
|
+
sparql (3.2.5)
|
198
199
|
builder (~> 3.2)
|
199
|
-
ebnf (~> 2.2)
|
200
|
-
logger (~> 1.
|
201
|
-
rdf (~> 3.2, >= 3.2.
|
200
|
+
ebnf (~> 2.2, >= 2.3.1)
|
201
|
+
logger (~> 1.5)
|
202
|
+
rdf (~> 3.2, >= 3.2.8)
|
202
203
|
rdf-aggregate-repo (~> 3.2)
|
203
204
|
rdf-xsd (~> 3.2)
|
204
|
-
sparql-client (~> 3.2)
|
205
|
-
sxp (~> 1.2, >= 1.2.
|
205
|
+
sparql-client (~> 3.2, >= 3.2.1)
|
206
|
+
sxp (~> 1.2, >= 1.2.2)
|
206
207
|
sparql-client (3.2.1)
|
207
208
|
net-http-persistent (~> 4.0, >= 4.0.1)
|
208
209
|
rdf (~> 3.2, >= 3.2.6)
|
209
|
-
sxp (1.2.
|
210
|
-
matrix
|
210
|
+
sxp (1.2.3)
|
211
|
+
matrix (~> 0.4)
|
211
212
|
rdf (~> 3.2)
|
212
213
|
systemu (2.6.5)
|
213
214
|
terminal-table (3.0.2)
|
214
215
|
unicode-display_width (>= 1.1.1, < 3)
|
215
|
-
timecop (0.9.
|
216
|
-
tzinfo (2.0.
|
216
|
+
timecop (0.9.6)
|
217
|
+
tzinfo (2.0.6)
|
217
218
|
concurrent-ruby (~> 1.0)
|
218
219
|
unf (0.1.4)
|
219
220
|
unf_ext
|
220
|
-
unf_ext (0.0.8.
|
221
|
+
unf_ext (0.0.8.2)
|
221
222
|
unicode-display_width (2.3.0)
|
222
|
-
unicode-types (1.
|
223
|
+
unicode-types (1.8.0)
|
223
224
|
uuid (2.3.9)
|
224
225
|
macaddr (~> 1.0)
|
225
226
|
validatable (1.6.7)
|
@@ -247,4 +248,4 @@ DEPENDENCIES
|
|
247
248
|
webmock (~> 3.3)
|
248
249
|
|
249
250
|
BUNDLED WITH
|
250
|
-
2.3.
|
251
|
+
2.3.19
|
data/README.md
CHANGED
@@ -66,7 +66,10 @@ Specific options:
|
|
66
66
|
-W, --workdir PATH Path for directory where AIP creation work takes place in
|
67
67
|
-N, --process_name NAME Name of the application process
|
68
68
|
-m, --monitor Start monitor process for a deamon
|
69
|
-
-q, --queue NAME Name of the queue to read from
|
69
|
+
-q, --queue NAME Name of the queue to read from
|
70
|
+
-i, --ingestion_prefix PREFIX Prefix for keys used in counting the number of failed ingestion attempts
|
71
|
+
-x, --ingestion_attempts NUMBER Max number of attempts to try ingesting an entity
|
72
|
+
-f, --first_failed_wait NUMBER Time in seconds to wait after first failed entity deposit. This time will double every failed attempt
|
70
73
|
|
71
74
|
Common options:
|
72
75
|
-v, --version Show version
|
@@ -144,7 +147,7 @@ This will cut a tag version, builds the gem, and pushes the gem up to Rubygems
|
|
144
147
|
|
145
148
|
Note: You may need permission to push a gem up to Rubygems!
|
146
149
|
You will first need to create an account on rubygems.org.
|
147
|
-
Once you have an account, bug
|
150
|
+
Once you have an account, bug @pgwillia (Tricia Jenkins), @lagoan (Omar Rodriguez-Arenas), or @henryzhang87 (Henry Zhang) to [add you as an owner](http://guides.rubygems.org/command-reference/#gem-owner) to pushmi_pullyu Rubygem. Once you are an owner you should be able to push new versions of pushmi_pullyu up to Rubygems
|
148
151
|
|
149
152
|
## Deployment
|
150
153
|
|
data/examples/pushmi_pullyu.yml
CHANGED
@@ -35,6 +35,11 @@ class PushmiPullyu::AIP::Downloader
|
|
35
35
|
# Main object metadata
|
36
36
|
download_and_log(object_aip_paths[:main_object_remote],
|
37
37
|
object_aip_paths[:main_object_local])
|
38
|
+
|
39
|
+
# Communities and collections do not have their own files.
|
40
|
+
return unless can_have_files?
|
41
|
+
|
42
|
+
FileUtils.mkdir_p(object_aip_paths[:file_sets_directory_local])
|
38
43
|
download_and_log(object_aip_paths[:file_sets_remote],
|
39
44
|
object_aip_paths[:file_sets_local])
|
40
45
|
|
@@ -139,25 +144,26 @@ class PushmiPullyu::AIP::Downloader
|
|
139
144
|
PushmiPullyu::Logging.log_aip_activity(@aip_directory, message)
|
140
145
|
end
|
141
146
|
|
147
|
+
def can_have_files?
|
148
|
+
@entity[:type] == 'items' || @entity[:type] == 'theses'
|
149
|
+
end
|
150
|
+
|
142
151
|
### Directories
|
143
152
|
|
144
153
|
def aip_dirs
|
145
154
|
@aip_dirs ||= {
|
146
155
|
objects: "#{@aip_directory}/data/objects",
|
147
156
|
metadata: "#{@aip_directory}/data/objects/metadata",
|
148
|
-
|
149
|
-
files_metadata: "#{@aip_directory}/data/objects/metadata/files_metadata",
|
150
|
-
logs: "#{@aip_directory}/data/logs",
|
151
|
-
file_logs: "#{@aip_directory}/data/logs/files_logs"
|
157
|
+
logs: "#{@aip_directory}/data/logs"
|
152
158
|
}
|
153
159
|
end
|
154
160
|
|
155
161
|
def file_set_dirs(file_set_uuid)
|
156
162
|
@file_set_dirs ||= {}
|
157
163
|
@file_set_dirs[file_set_uuid] ||= {
|
158
|
-
|
159
|
-
|
160
|
-
|
164
|
+
files: "#{@aip_directory}/data/objects/files/#{file_set_uuid}",
|
165
|
+
logs: "#{@aip_directory}/data/logs/files_logs/#{file_set_uuid}",
|
166
|
+
metadata: "#{@aip_directory}/data/objects/metadata/files_metadata/#{file_set_uuid}"
|
161
167
|
}
|
162
168
|
end
|
163
169
|
|
@@ -193,7 +199,9 @@ class PushmiPullyu::AIP::Downloader
|
|
193
199
|
main_object_remote: object_uri,
|
194
200
|
main_object_local: "#{aip_dirs[:metadata]}/object_metadata.n3",
|
195
201
|
file_sets_remote: "#{object_uri}/filesets",
|
196
|
-
|
202
|
+
# This directory needs to be created before we can downloaded the file order information
|
203
|
+
file_sets_directory_local: "#{@aip_directory}/data/objects/metadata/files_metadata",
|
204
|
+
file_sets_local: "#{@aip_directory}/data/objects/metadata/files_metadata/file_order.xml",
|
197
205
|
# This is downloaded for processing but not saved
|
198
206
|
file_paths_remote: "#{object_uri}/file_paths"
|
199
207
|
}.freeze
|
data/lib/pushmi_pullyu/cli.rb
CHANGED
@@ -148,6 +148,21 @@ class PushmiPullyu::CLI
|
|
148
148
|
opts[:queue_name] = queue
|
149
149
|
end
|
150
150
|
|
151
|
+
o.on('-i', '--ingestion_prefix PREFIX',
|
152
|
+
'Prefix for keys used in counting the number of failed ingestion attempts') do |prefix|
|
153
|
+
opts[:ingestion_prefix] = prefix
|
154
|
+
end
|
155
|
+
|
156
|
+
o.on('-x', '--ingestion_attempts NUMBER', Integer,
|
157
|
+
'Max number of attempts to try ingesting an entity') do |ingestion_attempts|
|
158
|
+
opts[:ingestion_attempts] = ingestion_attempts
|
159
|
+
end
|
160
|
+
|
161
|
+
o.on('-f', '--first_failed_wait NUMBER', Integer,
|
162
|
+
'Time in seconds to wait after first failed deposit. Time will double every failed attempt') do |failed_wait|
|
163
|
+
opts[:first_failed_wait] = failed_wait
|
164
|
+
end
|
165
|
+
|
151
166
|
o.separator ''
|
152
167
|
o.separator 'Common options:'
|
153
168
|
|
@@ -182,11 +197,12 @@ class PushmiPullyu::CLI
|
|
182
197
|
end
|
183
198
|
|
184
199
|
def run_preservation_cycle
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
200
|
+
begin
|
201
|
+
entity = queue.wait_next_item
|
202
|
+
return unless entity && entity[:type].present? && entity[:uuid].present?
|
203
|
+
rescue StandardError => e
|
204
|
+
log_exception(e)
|
205
|
+
end
|
190
206
|
|
191
207
|
# add additional information about the error context to errors that occur while processing this item.
|
192
208
|
Rollbar.scoped(entity_uuid: entity[:uuid]) do
|
@@ -202,7 +218,11 @@ class PushmiPullyu::CLI
|
|
202
218
|
# readding it to the queue as it will always fail
|
203
219
|
rescue PushmiPullyu::AIP::EntityInvalid => e
|
204
220
|
rescue StandardError => e
|
205
|
-
|
221
|
+
begin
|
222
|
+
queue.add_entity_in_timeframe(entity)
|
223
|
+
rescue MaxDepositAttemptsReached => e
|
224
|
+
log_exception(e)
|
225
|
+
end
|
206
226
|
|
207
227
|
# rubocop:disable Lint/RescueException
|
208
228
|
# Something other than a StandardError exception means something happened which we were not expecting!
|
@@ -211,8 +231,7 @@ class PushmiPullyu::CLI
|
|
211
231
|
raise e
|
212
232
|
# rubocop:enable Lint/RescueException
|
213
233
|
ensure
|
214
|
-
|
215
|
-
logger.error(e)
|
234
|
+
log_exception(e)
|
216
235
|
end
|
217
236
|
end
|
218
237
|
|
@@ -287,4 +306,9 @@ class PushmiPullyu::CLI
|
|
287
306
|
end
|
288
307
|
end
|
289
308
|
|
309
|
+
def log_exception(exception)
|
310
|
+
Rollbar.error(exception)
|
311
|
+
logger.error(exception)
|
312
|
+
end
|
313
|
+
|
290
314
|
end
|
@@ -20,6 +20,7 @@ require 'connection_pool'
|
|
20
20
|
class PushmiPullyu::PreservationQueue
|
21
21
|
|
22
22
|
class ConnectionError < StandardError; end
|
23
|
+
class MaxDepositAttemptsReached < StandardError; end
|
23
24
|
|
24
25
|
def initialize(redis_url: 'redis://localhost:6379',
|
25
26
|
pool_opts: { size: 1, timeout: 5 },
|
@@ -50,7 +51,8 @@ class PushmiPullyu::PreservationQueue
|
|
50
51
|
rd.multi do |tx|
|
51
52
|
tx.zrem(@queue_name, element) # remove the top element transactionally
|
52
53
|
end
|
53
|
-
|
54
|
+
|
55
|
+
return JSON.parse(element, { symbolize_names: true })
|
54
56
|
else
|
55
57
|
rd.unwatch # cancel the transaction since there was nothing in the queue
|
56
58
|
return nil
|
@@ -68,12 +70,27 @@ class PushmiPullyu::PreservationQueue
|
|
68
70
|
end
|
69
71
|
end
|
70
72
|
|
71
|
-
def
|
73
|
+
def add_entity_in_timeframe(entity)
|
74
|
+
entity_attempts_key = "#{PushmiPullyu.options[:ingestion_prefix]}#{entity[:uuid]}"
|
75
|
+
|
72
76
|
@redis.with do |connection|
|
73
|
-
|
77
|
+
# separate information for priority information and queue
|
78
|
+
deposit_attempt = connection.incr entity_attempts_key
|
79
|
+
|
80
|
+
if deposit_attempt <= PushmiPullyu.options[:ingestion_attempts]
|
81
|
+
connection.zadd @queue_name, Time.now.to_f + self.class.extra_wait_time(deposit_attempt),
|
82
|
+
entity.slice(:uuid, :type).to_json
|
83
|
+
else
|
84
|
+
connection.del entity_attempts_key
|
85
|
+
raise MaxDepositAttemptsReached
|
86
|
+
end
|
74
87
|
end
|
75
88
|
end
|
76
89
|
|
90
|
+
def self.extra_wait_time(deposit_attempt)
|
91
|
+
(2**deposit_attempt) * PushmiPullyu.options[:first_failed_wait]
|
92
|
+
end
|
93
|
+
|
77
94
|
protected
|
78
95
|
|
79
96
|
def connected?
|
data/lib/pushmi_pullyu.rb
CHANGED
@@ -26,6 +26,9 @@ module PushmiPullyu
|
|
26
26
|
workdir: 'tmp/work',
|
27
27
|
process_name: 'pushmi_pullyu',
|
28
28
|
queue_name: 'dev:pmpy_queue',
|
29
|
+
ingestion_prefix: 'prod:pmpy_ingest_attempt:',
|
30
|
+
ingestion_attempts: 15,
|
31
|
+
first_failed_wait: 10,
|
29
32
|
redis: {
|
30
33
|
url: 'redis://localhost:6379'
|
31
34
|
},
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pushmi_pullyu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shane Murnaghan
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2023-02-28 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: activesupport
|