pushmi_pullyu 2.0.4 → 2.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/Gemfile.lock +28 -27
- data/README.md +5 -2
- data/examples/pushmi_pullyu.yml +3 -0
- data/lib/pushmi_pullyu/aip/downloader.rb +16 -8
- data/lib/pushmi_pullyu/cli.rb +32 -8
- data/lib/pushmi_pullyu/preservation_queue.rb +20 -3
- data/lib/pushmi_pullyu/version.rb +1 -1
- data/lib/pushmi_pullyu.rb +3 -0
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 25051393a56976db185a30c9cd498afd7997854d2a6931f3b6190132cec76a97
|
|
4
|
+
data.tar.gz: 731b8dbcf8a3d9124f44d57001bda6952bb2da07df874a1f97f52d7e7176c065
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 82b083e9b991fbc0b7c95507415d312bc29b29a4f16fdcfe8689efc2dc4c69542a4c15010ce9b5841cebebe73920de7499512bf1eaab57bb379816bd1dba75ae
|
|
7
|
+
data.tar.gz: 2439db5f7058ddc7b042894538a592d076c467b34c7211c262d15e3a2d6d436f8a3d785f6b53c3bd68e93566832fed18bbde6b32efa4f0f4e8e08eceacd3ed57
|
data/CHANGELOG.md
CHANGED
|
@@ -8,6 +8,13 @@ and releases in PushmiPullyu adheres to [Semantic Versioning](https://semver.org
|
|
|
8
8
|
|
|
9
9
|
## [Unreleased]
|
|
10
10
|
|
|
11
|
+
## [2.0.5] - 2023-02-17
|
|
12
|
+
|
|
13
|
+
- Add rescue block to catch exceptions while waiting for next item [#280](https://github.com/ualbertalib/pushmi_pullyu/issues/280)
|
|
14
|
+
- Add logic to fetch new community and collection information from jupiter and create their AIPS. [#255](https://github.com/ualbertalib/pushmi_pullyu/issues/255)
|
|
15
|
+
- Add delay to re-ingestion attempts to allow for problems to be fixed [#297](https://github.com/ualbertalib/pushmi_pullyu/issues/297)
|
|
16
|
+
- Bump git from 1.9.1 to 1.13.0
|
|
17
|
+
|
|
11
18
|
## [2.0.4] - 2022-11-22
|
|
12
19
|
|
|
13
20
|
- Fix issue with temporary work files not being deleted after a failed swift deposit [#242](https://github.com/ualbertalib/pushmi_pullyu/issues/242)
|
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
pushmi_pullyu (2.0.
|
|
4
|
+
pushmi_pullyu (2.0.5)
|
|
5
5
|
activesupport (>= 5, < 8)
|
|
6
6
|
bagit (~> 0.4)
|
|
7
7
|
connection_pool (~> 2.2)
|
|
@@ -18,7 +18,7 @@ PATH
|
|
|
18
18
|
GEM
|
|
19
19
|
remote: https://rubygems.org/
|
|
20
20
|
specs:
|
|
21
|
-
activesupport (7.0.2
|
|
21
|
+
activesupport (7.0.4.2)
|
|
22
22
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
|
23
23
|
i18n (>= 1.6, < 2)
|
|
24
24
|
minitest (>= 5.1)
|
|
@@ -39,8 +39,8 @@ GEM
|
|
|
39
39
|
open4 (~> 1.3)
|
|
40
40
|
coderay (1.1.3)
|
|
41
41
|
colored2 (3.1.2)
|
|
42
|
-
concurrent-ruby (1.
|
|
43
|
-
connection_pool (2.
|
|
42
|
+
concurrent-ruby (1.2.0)
|
|
43
|
+
connection_pool (2.3.0)
|
|
44
44
|
cork (0.3.0)
|
|
45
45
|
colored2 (~> 3.1)
|
|
46
46
|
crack (0.4.5)
|
|
@@ -91,22 +91,23 @@ GEM
|
|
|
91
91
|
faraday-net_http_persistent (1.2.0)
|
|
92
92
|
faraday-patron (1.0.0)
|
|
93
93
|
faraday-rack (1.0.0)
|
|
94
|
-
git (1.
|
|
94
|
+
git (1.13.0)
|
|
95
|
+
addressable (~> 2.8)
|
|
95
96
|
rchardet (~> 1.8)
|
|
96
97
|
hashdiff (1.0.1)
|
|
97
98
|
htmlentities (4.3.4)
|
|
98
99
|
http-accept (1.7.0)
|
|
99
|
-
http-cookie (1.0.
|
|
100
|
+
http-cookie (1.0.5)
|
|
100
101
|
domain_name (~> 0.5)
|
|
101
|
-
i18n (1.
|
|
102
|
+
i18n (1.12.0)
|
|
102
103
|
concurrent-ruby (~> 1.0)
|
|
103
|
-
json (2.6.
|
|
104
|
+
json (2.6.3)
|
|
104
105
|
kramdown (2.3.1)
|
|
105
106
|
rexml
|
|
106
107
|
kramdown-parser-gfm (1.1.0)
|
|
107
108
|
kramdown (~> 2.0)
|
|
108
109
|
link_header (0.0.8)
|
|
109
|
-
logger (1.5.
|
|
110
|
+
logger (1.5.3)
|
|
110
111
|
macaddr (1.7.2)
|
|
111
112
|
systemu (~> 2.6.5)
|
|
112
113
|
matrix (0.4.2)
|
|
@@ -115,7 +116,7 @@ GEM
|
|
|
115
116
|
mime-types-data (~> 3.2015)
|
|
116
117
|
mime-types-data (3.2022.0105)
|
|
117
118
|
minitar (0.9)
|
|
118
|
-
minitest (5.
|
|
119
|
+
minitest (5.17.0)
|
|
119
120
|
multipart-post (2.1.1)
|
|
120
121
|
nap (1.1.0)
|
|
121
122
|
net-http-persistent (4.0.1)
|
|
@@ -141,9 +142,9 @@ GEM
|
|
|
141
142
|
rainbow (3.1.1)
|
|
142
143
|
rake (13.0.6)
|
|
143
144
|
rchardet (1.8.0)
|
|
144
|
-
rdf (3.2.
|
|
145
|
+
rdf (3.2.9)
|
|
145
146
|
link_header (~> 0.0, >= 0.0.8)
|
|
146
|
-
rdf-aggregate-repo (3.2.
|
|
147
|
+
rdf-aggregate-repo (3.2.1)
|
|
147
148
|
rdf (~> 3.2)
|
|
148
149
|
rdf-n3 (3.2.1)
|
|
149
150
|
ebnf (~> 2.2)
|
|
@@ -153,7 +154,7 @@ GEM
|
|
|
153
154
|
rdf-xsd (3.2.1)
|
|
154
155
|
rdf (~> 3.2)
|
|
155
156
|
rexml (~> 3.2)
|
|
156
|
-
redis (4.
|
|
157
|
+
redis (4.8.1)
|
|
157
158
|
regexp_parser (2.6.0)
|
|
158
159
|
rest-client (2.1.0)
|
|
159
160
|
http-accept (>= 1.7.0, < 2.0)
|
|
@@ -161,7 +162,7 @@ GEM
|
|
|
161
162
|
mime-types (>= 1.16, < 4.0)
|
|
162
163
|
netrc (~> 0.8)
|
|
163
164
|
rexml (3.2.5)
|
|
164
|
-
rollbar (3.
|
|
165
|
+
rollbar (3.4.0)
|
|
165
166
|
rspec (3.12.0)
|
|
166
167
|
rspec-core (~> 3.12.0)
|
|
167
168
|
rspec-expectations (~> 3.12.0)
|
|
@@ -194,32 +195,32 @@ GEM
|
|
|
194
195
|
addressable (>= 2.3.5)
|
|
195
196
|
faraday (> 0.8, < 2.0)
|
|
196
197
|
scanf (1.0.0)
|
|
197
|
-
sparql (3.2.
|
|
198
|
+
sparql (3.2.5)
|
|
198
199
|
builder (~> 3.2)
|
|
199
|
-
ebnf (~> 2.2)
|
|
200
|
-
logger (~> 1.
|
|
201
|
-
rdf (~> 3.2, >= 3.2.
|
|
200
|
+
ebnf (~> 2.2, >= 2.3.1)
|
|
201
|
+
logger (~> 1.5)
|
|
202
|
+
rdf (~> 3.2, >= 3.2.8)
|
|
202
203
|
rdf-aggregate-repo (~> 3.2)
|
|
203
204
|
rdf-xsd (~> 3.2)
|
|
204
|
-
sparql-client (~> 3.2)
|
|
205
|
-
sxp (~> 1.2, >= 1.2.
|
|
205
|
+
sparql-client (~> 3.2, >= 3.2.1)
|
|
206
|
+
sxp (~> 1.2, >= 1.2.2)
|
|
206
207
|
sparql-client (3.2.1)
|
|
207
208
|
net-http-persistent (~> 4.0, >= 4.0.1)
|
|
208
209
|
rdf (~> 3.2, >= 3.2.6)
|
|
209
|
-
sxp (1.2.
|
|
210
|
-
matrix
|
|
210
|
+
sxp (1.2.3)
|
|
211
|
+
matrix (~> 0.4)
|
|
211
212
|
rdf (~> 3.2)
|
|
212
213
|
systemu (2.6.5)
|
|
213
214
|
terminal-table (3.0.2)
|
|
214
215
|
unicode-display_width (>= 1.1.1, < 3)
|
|
215
|
-
timecop (0.9.
|
|
216
|
-
tzinfo (2.0.
|
|
216
|
+
timecop (0.9.6)
|
|
217
|
+
tzinfo (2.0.6)
|
|
217
218
|
concurrent-ruby (~> 1.0)
|
|
218
219
|
unf (0.1.4)
|
|
219
220
|
unf_ext
|
|
220
|
-
unf_ext (0.0.8.
|
|
221
|
+
unf_ext (0.0.8.2)
|
|
221
222
|
unicode-display_width (2.3.0)
|
|
222
|
-
unicode-types (1.
|
|
223
|
+
unicode-types (1.8.0)
|
|
223
224
|
uuid (2.3.9)
|
|
224
225
|
macaddr (~> 1.0)
|
|
225
226
|
validatable (1.6.7)
|
|
@@ -247,4 +248,4 @@ DEPENDENCIES
|
|
|
247
248
|
webmock (~> 3.3)
|
|
248
249
|
|
|
249
250
|
BUNDLED WITH
|
|
250
|
-
2.3.
|
|
251
|
+
2.3.19
|
data/README.md
CHANGED
|
@@ -66,7 +66,10 @@ Specific options:
|
|
|
66
66
|
-W, --workdir PATH Path for directory where AIP creation work takes place in
|
|
67
67
|
-N, --process_name NAME Name of the application process
|
|
68
68
|
-m, --monitor Start monitor process for a deamon
|
|
69
|
-
-q, --queue NAME Name of the queue to read from
|
|
69
|
+
-q, --queue NAME Name of the queue to read from
|
|
70
|
+
-i, --ingestion_prefix PREFIX Prefix for keys used in counting the number of failed ingestion attempts
|
|
71
|
+
-x, --ingestion_attempts NUMBER Max number of attempts to try ingesting an entity
|
|
72
|
+
-f, --first_failed_wait NUMBER Time in seconds to wait after first failed entity deposit. This time will double every failed attempt
|
|
70
73
|
|
|
71
74
|
Common options:
|
|
72
75
|
-v, --version Show version
|
|
@@ -144,7 +147,7 @@ This will cut a tag version, builds the gem, and pushes the gem up to Rubygems
|
|
|
144
147
|
|
|
145
148
|
Note: You may need permission to push a gem up to Rubygems!
|
|
146
149
|
You will first need to create an account on rubygems.org.
|
|
147
|
-
Once you have an account, bug
|
|
150
|
+
Once you have an account, bug @pgwillia (Tricia Jenkins), @lagoan (Omar Rodriguez-Arenas), or @henryzhang87 (Henry Zhang) to [add you as an owner](http://guides.rubygems.org/command-reference/#gem-owner) to pushmi_pullyu Rubygem. Once you are an owner you should be able to push new versions of pushmi_pullyu up to Rubygems
|
|
148
151
|
|
|
149
152
|
## Deployment
|
|
150
153
|
|
data/examples/pushmi_pullyu.yml
CHANGED
|
@@ -35,6 +35,11 @@ class PushmiPullyu::AIP::Downloader
|
|
|
35
35
|
# Main object metadata
|
|
36
36
|
download_and_log(object_aip_paths[:main_object_remote],
|
|
37
37
|
object_aip_paths[:main_object_local])
|
|
38
|
+
|
|
39
|
+
# Communities and collections do not have their own files.
|
|
40
|
+
return unless can_have_files?
|
|
41
|
+
|
|
42
|
+
FileUtils.mkdir_p(object_aip_paths[:file_sets_directory_local])
|
|
38
43
|
download_and_log(object_aip_paths[:file_sets_remote],
|
|
39
44
|
object_aip_paths[:file_sets_local])
|
|
40
45
|
|
|
@@ -139,25 +144,26 @@ class PushmiPullyu::AIP::Downloader
|
|
|
139
144
|
PushmiPullyu::Logging.log_aip_activity(@aip_directory, message)
|
|
140
145
|
end
|
|
141
146
|
|
|
147
|
+
def can_have_files?
|
|
148
|
+
@entity[:type] == 'items' || @entity[:type] == 'theses'
|
|
149
|
+
end
|
|
150
|
+
|
|
142
151
|
### Directories
|
|
143
152
|
|
|
144
153
|
def aip_dirs
|
|
145
154
|
@aip_dirs ||= {
|
|
146
155
|
objects: "#{@aip_directory}/data/objects",
|
|
147
156
|
metadata: "#{@aip_directory}/data/objects/metadata",
|
|
148
|
-
|
|
149
|
-
files_metadata: "#{@aip_directory}/data/objects/metadata/files_metadata",
|
|
150
|
-
logs: "#{@aip_directory}/data/logs",
|
|
151
|
-
file_logs: "#{@aip_directory}/data/logs/files_logs"
|
|
157
|
+
logs: "#{@aip_directory}/data/logs"
|
|
152
158
|
}
|
|
153
159
|
end
|
|
154
160
|
|
|
155
161
|
def file_set_dirs(file_set_uuid)
|
|
156
162
|
@file_set_dirs ||= {}
|
|
157
163
|
@file_set_dirs[file_set_uuid] ||= {
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
164
|
+
files: "#{@aip_directory}/data/objects/files/#{file_set_uuid}",
|
|
165
|
+
logs: "#{@aip_directory}/data/logs/files_logs/#{file_set_uuid}",
|
|
166
|
+
metadata: "#{@aip_directory}/data/objects/metadata/files_metadata/#{file_set_uuid}"
|
|
161
167
|
}
|
|
162
168
|
end
|
|
163
169
|
|
|
@@ -193,7 +199,9 @@ class PushmiPullyu::AIP::Downloader
|
|
|
193
199
|
main_object_remote: object_uri,
|
|
194
200
|
main_object_local: "#{aip_dirs[:metadata]}/object_metadata.n3",
|
|
195
201
|
file_sets_remote: "#{object_uri}/filesets",
|
|
196
|
-
|
|
202
|
+
# This directory needs to be created before we can downloaded the file order information
|
|
203
|
+
file_sets_directory_local: "#{@aip_directory}/data/objects/metadata/files_metadata",
|
|
204
|
+
file_sets_local: "#{@aip_directory}/data/objects/metadata/files_metadata/file_order.xml",
|
|
197
205
|
# This is downloaded for processing but not saved
|
|
198
206
|
file_paths_remote: "#{object_uri}/file_paths"
|
|
199
207
|
}.freeze
|
data/lib/pushmi_pullyu/cli.rb
CHANGED
|
@@ -148,6 +148,21 @@ class PushmiPullyu::CLI
|
|
|
148
148
|
opts[:queue_name] = queue
|
|
149
149
|
end
|
|
150
150
|
|
|
151
|
+
o.on('-i', '--ingestion_prefix PREFIX',
|
|
152
|
+
'Prefix for keys used in counting the number of failed ingestion attempts') do |prefix|
|
|
153
|
+
opts[:ingestion_prefix] = prefix
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
o.on('-x', '--ingestion_attempts NUMBER', Integer,
|
|
157
|
+
'Max number of attempts to try ingesting an entity') do |ingestion_attempts|
|
|
158
|
+
opts[:ingestion_attempts] = ingestion_attempts
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
o.on('-f', '--first_failed_wait NUMBER', Integer,
|
|
162
|
+
'Time in seconds to wait after first failed deposit. Time will double every failed attempt') do |failed_wait|
|
|
163
|
+
opts[:first_failed_wait] = failed_wait
|
|
164
|
+
end
|
|
165
|
+
|
|
151
166
|
o.separator ''
|
|
152
167
|
o.separator 'Common options:'
|
|
153
168
|
|
|
@@ -182,11 +197,12 @@ class PushmiPullyu::CLI
|
|
|
182
197
|
end
|
|
183
198
|
|
|
184
199
|
def run_preservation_cycle
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
200
|
+
begin
|
|
201
|
+
entity = queue.wait_next_item
|
|
202
|
+
return unless entity && entity[:type].present? && entity[:uuid].present?
|
|
203
|
+
rescue StandardError => e
|
|
204
|
+
log_exception(e)
|
|
205
|
+
end
|
|
190
206
|
|
|
191
207
|
# add additional information about the error context to errors that occur while processing this item.
|
|
192
208
|
Rollbar.scoped(entity_uuid: entity[:uuid]) do
|
|
@@ -202,7 +218,11 @@ class PushmiPullyu::CLI
|
|
|
202
218
|
# readding it to the queue as it will always fail
|
|
203
219
|
rescue PushmiPullyu::AIP::EntityInvalid => e
|
|
204
220
|
rescue StandardError => e
|
|
205
|
-
|
|
221
|
+
begin
|
|
222
|
+
queue.add_entity_in_timeframe(entity)
|
|
223
|
+
rescue MaxDepositAttemptsReached => e
|
|
224
|
+
log_exception(e)
|
|
225
|
+
end
|
|
206
226
|
|
|
207
227
|
# rubocop:disable Lint/RescueException
|
|
208
228
|
# Something other than a StandardError exception means something happened which we were not expecting!
|
|
@@ -211,8 +231,7 @@ class PushmiPullyu::CLI
|
|
|
211
231
|
raise e
|
|
212
232
|
# rubocop:enable Lint/RescueException
|
|
213
233
|
ensure
|
|
214
|
-
|
|
215
|
-
logger.error(e)
|
|
234
|
+
log_exception(e)
|
|
216
235
|
end
|
|
217
236
|
end
|
|
218
237
|
|
|
@@ -287,4 +306,9 @@ class PushmiPullyu::CLI
|
|
|
287
306
|
end
|
|
288
307
|
end
|
|
289
308
|
|
|
309
|
+
def log_exception(exception)
|
|
310
|
+
Rollbar.error(exception)
|
|
311
|
+
logger.error(exception)
|
|
312
|
+
end
|
|
313
|
+
|
|
290
314
|
end
|
|
@@ -20,6 +20,7 @@ require 'connection_pool'
|
|
|
20
20
|
class PushmiPullyu::PreservationQueue
|
|
21
21
|
|
|
22
22
|
class ConnectionError < StandardError; end
|
|
23
|
+
class MaxDepositAttemptsReached < StandardError; end
|
|
23
24
|
|
|
24
25
|
def initialize(redis_url: 'redis://localhost:6379',
|
|
25
26
|
pool_opts: { size: 1, timeout: 5 },
|
|
@@ -50,7 +51,8 @@ class PushmiPullyu::PreservationQueue
|
|
|
50
51
|
rd.multi do |tx|
|
|
51
52
|
tx.zrem(@queue_name, element) # remove the top element transactionally
|
|
52
53
|
end
|
|
53
|
-
|
|
54
|
+
|
|
55
|
+
return JSON.parse(element, { symbolize_names: true })
|
|
54
56
|
else
|
|
55
57
|
rd.unwatch # cancel the transaction since there was nothing in the queue
|
|
56
58
|
return nil
|
|
@@ -68,12 +70,27 @@ class PushmiPullyu::PreservationQueue
|
|
|
68
70
|
end
|
|
69
71
|
end
|
|
70
72
|
|
|
71
|
-
def
|
|
73
|
+
def add_entity_in_timeframe(entity)
|
|
74
|
+
entity_attempts_key = "#{PushmiPullyu.options[:ingestion_prefix]}#{entity[:uuid]}"
|
|
75
|
+
|
|
72
76
|
@redis.with do |connection|
|
|
73
|
-
|
|
77
|
+
# separate information for priority information and queue
|
|
78
|
+
deposit_attempt = connection.incr entity_attempts_key
|
|
79
|
+
|
|
80
|
+
if deposit_attempt <= PushmiPullyu.options[:ingestion_attempts]
|
|
81
|
+
connection.zadd @queue_name, Time.now.to_f + self.class.extra_wait_time(deposit_attempt),
|
|
82
|
+
entity.slice(:uuid, :type).to_json
|
|
83
|
+
else
|
|
84
|
+
connection.del entity_attempts_key
|
|
85
|
+
raise MaxDepositAttemptsReached
|
|
86
|
+
end
|
|
74
87
|
end
|
|
75
88
|
end
|
|
76
89
|
|
|
90
|
+
def self.extra_wait_time(deposit_attempt)
|
|
91
|
+
(2**deposit_attempt) * PushmiPullyu.options[:first_failed_wait]
|
|
92
|
+
end
|
|
93
|
+
|
|
77
94
|
protected
|
|
78
95
|
|
|
79
96
|
def connected?
|
data/lib/pushmi_pullyu.rb
CHANGED
|
@@ -26,6 +26,9 @@ module PushmiPullyu
|
|
|
26
26
|
workdir: 'tmp/work',
|
|
27
27
|
process_name: 'pushmi_pullyu',
|
|
28
28
|
queue_name: 'dev:pmpy_queue',
|
|
29
|
+
ingestion_prefix: 'prod:pmpy_ingest_attempt:',
|
|
30
|
+
ingestion_attempts: 15,
|
|
31
|
+
first_failed_wait: 10,
|
|
29
32
|
redis: {
|
|
30
33
|
url: 'redis://localhost:6379'
|
|
31
34
|
},
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: pushmi_pullyu
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.0.
|
|
4
|
+
version: 2.0.5
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Shane Murnaghan
|
|
@@ -9,7 +9,7 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: exe
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date:
|
|
12
|
+
date: 2023-02-28 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
15
|
name: activesupport
|