pushmi_pullyu 2.0.3 → 2.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +2 -3
- data/.gitignore +0 -1
- data/.rubocop.yml +1 -1
- data/CHANGELOG.md +18 -0
- data/Gemfile.lock +251 -0
- data/README.md +6 -3
- data/examples/pushmi_pullyu.yml +3 -0
- data/lib/pushmi_pullyu/aip/downloader.rb +16 -8
- data/lib/pushmi_pullyu/aip.rb +12 -9
- data/lib/pushmi_pullyu/cli.rb +43 -12
- data/lib/pushmi_pullyu/logging.rb +7 -7
- data/lib/pushmi_pullyu/preservation_queue.rb +24 -1
- data/lib/pushmi_pullyu/version.rb +1 -1
- data/lib/pushmi_pullyu.rb +3 -0
- data/pushmi_pullyu.gemspec +1 -1
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 25051393a56976db185a30c9cd498afd7997854d2a6931f3b6190132cec76a97
|
4
|
+
data.tar.gz: 731b8dbcf8a3d9124f44d57001bda6952bb2da07df874a1f97f52d7e7176c065
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 82b083e9b991fbc0b7c95507415d312bc29b29a4f16fdcfe8689efc2dc4c69542a4c15010ce9b5841cebebe73920de7499512bf1eaab57bb379816bd1dba75ae
|
7
|
+
data.tar.gz: 2439db5f7058ddc7b042894538a592d076c467b34c7211c262d15e3a2d6d436f8a3d785f6b53c3bd68e93566832fed18bbde6b32efa4f0f4e8e08eceacd3ed57
|
data/.github/workflows/ruby.yml
CHANGED
@@ -18,9 +18,8 @@ jobs:
|
|
18
18
|
- name: Set up Ruby
|
19
19
|
uses: ruby/setup-ruby@v1
|
20
20
|
with:
|
21
|
-
ruby-version: 2.
|
22
|
-
|
23
|
-
run: bundle install
|
21
|
+
ruby-version: '2.7'
|
22
|
+
bundler-cache: true # runs 'bundle install' and caches installed gems automatically
|
24
23
|
- name: Lint with RuboCop
|
25
24
|
run: bundle exec rubocop --parallel
|
26
25
|
- name: Run Danger
|
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -8,6 +8,24 @@ and releases in PushmiPullyu adheres to [Semantic Versioning](https://semver.org
|
|
8
8
|
|
9
9
|
## [Unreleased]
|
10
10
|
|
11
|
+
## [2.0.5] - 2023-02-17
|
12
|
+
|
13
|
+
- Add rescue block to catch exceptions while waiting for next item [#280](https://github.com/ualbertalib/pushmi_pullyu/issues/280)
|
14
|
+
- Add logic to fetch new community and collection information from jupiter and create their AIPS. [#255](https://github.com/ualbertalib/pushmi_pullyu/issues/255)
|
15
|
+
- Add delay to re-ingestion attempts to allow for problems to be fixed [#297](https://github.com/ualbertalib/pushmi_pullyu/issues/297)
|
16
|
+
- Bump git from 1.9.1 to 1.13.0
|
17
|
+
|
18
|
+
## [2.0.4] - 2022-11-22
|
19
|
+
|
20
|
+
- Fix issue with temporary work files not being deleted after a failed swift deposit [#242](https://github.com/ualbertalib/pushmi_pullyu/issues/242)
|
21
|
+
- Bump to Ruby 2.7
|
22
|
+
- Fix issue with entity information consumed even after failed deposit [#232](https://github.com/ualbertalib/pushmi_pullyu/issues/232)
|
23
|
+
- Bump rspec from 3.10.0 to 3.12.0
|
24
|
+
- Bump rollbar from 3.3.0 to 3.3.2
|
25
|
+
- Bump pry-byebug from 3.8.0 to 3.10.1
|
26
|
+
- Bump webmock from 3.14.0 to 3.18.1
|
27
|
+
- Bump rubocop-rspec from 2.6.0 to 2.11.1
|
28
|
+
- Bump timecop from 0.9.4 to 0.9.5
|
11
29
|
## [2.0.3] - 2022-04-28
|
12
30
|
|
13
31
|
- Changed Danger token in Github Actions
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,251 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
pushmi_pullyu (2.0.5)
|
5
|
+
activesupport (>= 5, < 8)
|
6
|
+
bagit (~> 0.4)
|
7
|
+
connection_pool (~> 2.2)
|
8
|
+
daemons (~> 1.2, >= 1.2.4)
|
9
|
+
minitar (~> 0.7)
|
10
|
+
openstack (~> 3.3, >= 3.3.10)
|
11
|
+
rdf (>= 1.99, < 4.0)
|
12
|
+
rdf-n3 (>= 1.99, < 4.0)
|
13
|
+
redis (>= 3.3, < 5.0)
|
14
|
+
rest-client (>= 1.8, < 3.0)
|
15
|
+
rollbar (>= 2.18, < 4.0)
|
16
|
+
uuid (~> 2.3.9)
|
17
|
+
|
18
|
+
GEM
|
19
|
+
remote: https://rubygems.org/
|
20
|
+
specs:
|
21
|
+
activesupport (7.0.4.2)
|
22
|
+
concurrent-ruby (~> 1.0, >= 1.0.2)
|
23
|
+
i18n (>= 1.6, < 2)
|
24
|
+
minitest (>= 5.1)
|
25
|
+
tzinfo (~> 2.0)
|
26
|
+
addressable (2.8.1)
|
27
|
+
public_suffix (>= 2.0.2, < 6.0)
|
28
|
+
amazing_print (1.4.0)
|
29
|
+
ast (2.4.2)
|
30
|
+
bagit (0.4.5)
|
31
|
+
docopt (~> 0.5.0)
|
32
|
+
validatable (~> 1.6)
|
33
|
+
builder (3.2.4)
|
34
|
+
byebug (11.1.3)
|
35
|
+
claide (1.0.3)
|
36
|
+
claide-plugins (0.9.2)
|
37
|
+
cork
|
38
|
+
nap
|
39
|
+
open4 (~> 1.3)
|
40
|
+
coderay (1.1.3)
|
41
|
+
colored2 (3.1.2)
|
42
|
+
concurrent-ruby (1.2.0)
|
43
|
+
connection_pool (2.3.0)
|
44
|
+
cork (0.3.0)
|
45
|
+
colored2 (~> 3.1)
|
46
|
+
crack (0.4.5)
|
47
|
+
rexml
|
48
|
+
daemons (1.4.1)
|
49
|
+
danger (8.4.2)
|
50
|
+
claide (~> 1.0)
|
51
|
+
claide-plugins (>= 0.9.2)
|
52
|
+
colored2 (~> 3.1)
|
53
|
+
cork (~> 0.1)
|
54
|
+
faraday (>= 0.9.0, < 2.0)
|
55
|
+
faraday-http-cache (~> 2.0)
|
56
|
+
git (~> 1.7)
|
57
|
+
kramdown (~> 2.3)
|
58
|
+
kramdown-parser-gfm (~> 1.0)
|
59
|
+
no_proxy_fix
|
60
|
+
octokit (~> 4.7)
|
61
|
+
terminal-table (>= 1, < 4)
|
62
|
+
diff-lcs (1.5.0)
|
63
|
+
docopt (0.5.0)
|
64
|
+
domain_name (0.5.20190701)
|
65
|
+
unf (>= 0.0.5, < 1.0.0)
|
66
|
+
ebnf (2.3.1)
|
67
|
+
amazing_print (~> 1.4)
|
68
|
+
htmlentities (~> 4.3)
|
69
|
+
rdf (~> 3.2)
|
70
|
+
scanf (~> 1.0)
|
71
|
+
sxp (~> 1.2)
|
72
|
+
unicode-types (~> 1.7)
|
73
|
+
faraday (1.8.0)
|
74
|
+
faraday-em_http (~> 1.0)
|
75
|
+
faraday-em_synchrony (~> 1.0)
|
76
|
+
faraday-excon (~> 1.1)
|
77
|
+
faraday-httpclient (~> 1.0.1)
|
78
|
+
faraday-net_http (~> 1.0)
|
79
|
+
faraday-net_http_persistent (~> 1.1)
|
80
|
+
faraday-patron (~> 1.0)
|
81
|
+
faraday-rack (~> 1.0)
|
82
|
+
multipart-post (>= 1.2, < 3)
|
83
|
+
ruby2_keywords (>= 0.0.4)
|
84
|
+
faraday-em_http (1.0.0)
|
85
|
+
faraday-em_synchrony (1.0.0)
|
86
|
+
faraday-excon (1.1.0)
|
87
|
+
faraday-http-cache (2.2.0)
|
88
|
+
faraday (>= 0.8)
|
89
|
+
faraday-httpclient (1.0.1)
|
90
|
+
faraday-net_http (1.0.1)
|
91
|
+
faraday-net_http_persistent (1.2.0)
|
92
|
+
faraday-patron (1.0.0)
|
93
|
+
faraday-rack (1.0.0)
|
94
|
+
git (1.13.0)
|
95
|
+
addressable (~> 2.8)
|
96
|
+
rchardet (~> 1.8)
|
97
|
+
hashdiff (1.0.1)
|
98
|
+
htmlentities (4.3.4)
|
99
|
+
http-accept (1.7.0)
|
100
|
+
http-cookie (1.0.5)
|
101
|
+
domain_name (~> 0.5)
|
102
|
+
i18n (1.12.0)
|
103
|
+
concurrent-ruby (~> 1.0)
|
104
|
+
json (2.6.3)
|
105
|
+
kramdown (2.3.1)
|
106
|
+
rexml
|
107
|
+
kramdown-parser-gfm (1.1.0)
|
108
|
+
kramdown (~> 2.0)
|
109
|
+
link_header (0.0.8)
|
110
|
+
logger (1.5.3)
|
111
|
+
macaddr (1.7.2)
|
112
|
+
systemu (~> 2.6.5)
|
113
|
+
matrix (0.4.2)
|
114
|
+
method_source (1.0.0)
|
115
|
+
mime-types (3.4.1)
|
116
|
+
mime-types-data (~> 3.2015)
|
117
|
+
mime-types-data (3.2022.0105)
|
118
|
+
minitar (0.9)
|
119
|
+
minitest (5.17.0)
|
120
|
+
multipart-post (2.1.1)
|
121
|
+
nap (1.1.0)
|
122
|
+
net-http-persistent (4.0.1)
|
123
|
+
connection_pool (~> 2.2)
|
124
|
+
netrc (0.11.0)
|
125
|
+
no_proxy_fix (0.1.2)
|
126
|
+
octokit (4.21.0)
|
127
|
+
faraday (>= 0.9)
|
128
|
+
sawyer (~> 0.8.0, >= 0.5.3)
|
129
|
+
open4 (1.3.4)
|
130
|
+
openstack (3.3.21)
|
131
|
+
json
|
132
|
+
parallel (1.22.1)
|
133
|
+
parser (3.1.2.1)
|
134
|
+
ast (~> 2.4.1)
|
135
|
+
pry (0.14.1)
|
136
|
+
coderay (~> 1.1)
|
137
|
+
method_source (~> 1.0)
|
138
|
+
pry-byebug (3.10.1)
|
139
|
+
byebug (~> 11.0)
|
140
|
+
pry (>= 0.13, < 0.15)
|
141
|
+
public_suffix (5.0.0)
|
142
|
+
rainbow (3.1.1)
|
143
|
+
rake (13.0.6)
|
144
|
+
rchardet (1.8.0)
|
145
|
+
rdf (3.2.9)
|
146
|
+
link_header (~> 0.0, >= 0.0.8)
|
147
|
+
rdf-aggregate-repo (3.2.1)
|
148
|
+
rdf (~> 3.2)
|
149
|
+
rdf-n3 (3.2.1)
|
150
|
+
ebnf (~> 2.2)
|
151
|
+
rdf (~> 3.2)
|
152
|
+
sparql (~> 3.2)
|
153
|
+
sxp (~> 1.2)
|
154
|
+
rdf-xsd (3.2.1)
|
155
|
+
rdf (~> 3.2)
|
156
|
+
rexml (~> 3.2)
|
157
|
+
redis (4.8.1)
|
158
|
+
regexp_parser (2.6.0)
|
159
|
+
rest-client (2.1.0)
|
160
|
+
http-accept (>= 1.7.0, < 2.0)
|
161
|
+
http-cookie (>= 1.0.2, < 2.0)
|
162
|
+
mime-types (>= 1.16, < 4.0)
|
163
|
+
netrc (~> 0.8)
|
164
|
+
rexml (3.2.5)
|
165
|
+
rollbar (3.4.0)
|
166
|
+
rspec (3.12.0)
|
167
|
+
rspec-core (~> 3.12.0)
|
168
|
+
rspec-expectations (~> 3.12.0)
|
169
|
+
rspec-mocks (~> 3.12.0)
|
170
|
+
rspec-core (3.12.0)
|
171
|
+
rspec-support (~> 3.12.0)
|
172
|
+
rspec-expectations (3.12.0)
|
173
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
174
|
+
rspec-support (~> 3.12.0)
|
175
|
+
rspec-mocks (3.12.0)
|
176
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
177
|
+
rspec-support (~> 3.12.0)
|
178
|
+
rspec-support (3.12.0)
|
179
|
+
rubocop (1.28.1)
|
180
|
+
parallel (~> 1.10)
|
181
|
+
parser (>= 3.1.0.0)
|
182
|
+
rainbow (>= 2.2.2, < 4.0)
|
183
|
+
regexp_parser (>= 1.8, < 3.0)
|
184
|
+
rexml
|
185
|
+
rubocop-ast (>= 1.17.0, < 2.0)
|
186
|
+
ruby-progressbar (~> 1.7)
|
187
|
+
unicode-display_width (>= 1.4.0, < 3.0)
|
188
|
+
rubocop-ast (1.23.0)
|
189
|
+
parser (>= 3.1.1.0)
|
190
|
+
rubocop-rspec (2.11.1)
|
191
|
+
rubocop (~> 1.19)
|
192
|
+
ruby-progressbar (1.11.0)
|
193
|
+
ruby2_keywords (0.0.5)
|
194
|
+
sawyer (0.8.2)
|
195
|
+
addressable (>= 2.3.5)
|
196
|
+
faraday (> 0.8, < 2.0)
|
197
|
+
scanf (1.0.0)
|
198
|
+
sparql (3.2.5)
|
199
|
+
builder (~> 3.2)
|
200
|
+
ebnf (~> 2.2, >= 2.3.1)
|
201
|
+
logger (~> 1.5)
|
202
|
+
rdf (~> 3.2, >= 3.2.8)
|
203
|
+
rdf-aggregate-repo (~> 3.2)
|
204
|
+
rdf-xsd (~> 3.2)
|
205
|
+
sparql-client (~> 3.2, >= 3.2.1)
|
206
|
+
sxp (~> 1.2, >= 1.2.2)
|
207
|
+
sparql-client (3.2.1)
|
208
|
+
net-http-persistent (~> 4.0, >= 4.0.1)
|
209
|
+
rdf (~> 3.2, >= 3.2.6)
|
210
|
+
sxp (1.2.3)
|
211
|
+
matrix (~> 0.4)
|
212
|
+
rdf (~> 3.2)
|
213
|
+
systemu (2.6.5)
|
214
|
+
terminal-table (3.0.2)
|
215
|
+
unicode-display_width (>= 1.1.1, < 3)
|
216
|
+
timecop (0.9.6)
|
217
|
+
tzinfo (2.0.6)
|
218
|
+
concurrent-ruby (~> 1.0)
|
219
|
+
unf (0.1.4)
|
220
|
+
unf_ext
|
221
|
+
unf_ext (0.0.8.2)
|
222
|
+
unicode-display_width (2.3.0)
|
223
|
+
unicode-types (1.8.0)
|
224
|
+
uuid (2.3.9)
|
225
|
+
macaddr (~> 1.0)
|
226
|
+
validatable (1.6.7)
|
227
|
+
vcr (5.1.0)
|
228
|
+
webmock (3.18.1)
|
229
|
+
addressable (>= 2.8.0)
|
230
|
+
crack (>= 0.3.2)
|
231
|
+
hashdiff (>= 0.4.0, < 2.0.0)
|
232
|
+
|
233
|
+
PLATFORMS
|
234
|
+
x86_64-linux
|
235
|
+
|
236
|
+
DEPENDENCIES
|
237
|
+
bundler (~> 2.0)
|
238
|
+
danger (~> 8.0)
|
239
|
+
pry (~> 0.10, >= 0.10.4)
|
240
|
+
pry-byebug (~> 3.6)
|
241
|
+
pushmi_pullyu!
|
242
|
+
rake (~> 13.0)
|
243
|
+
rspec (~> 3.0)
|
244
|
+
rubocop (~> 1.23)
|
245
|
+
rubocop-rspec (~> 2.6)
|
246
|
+
timecop (~> 0.8)
|
247
|
+
vcr (~> 5.0)
|
248
|
+
webmock (~> 3.3)
|
249
|
+
|
250
|
+
BUNDLED WITH
|
251
|
+
2.3.19
|
data/README.md
CHANGED
@@ -26,7 +26,7 @@ Its primary job is to manage the flow of content from Jupiter into Swift for pre
|
|
26
26
|
|
27
27
|
## Requirements
|
28
28
|
|
29
|
-
PushmiPullyu supports Ruby 2.
|
29
|
+
PushmiPullyu supports Ruby 2.7
|
30
30
|
|
31
31
|
## Installation
|
32
32
|
|
@@ -66,7 +66,10 @@ Specific options:
|
|
66
66
|
-W, --workdir PATH Path for directory where AIP creation work takes place in
|
67
67
|
-N, --process_name NAME Name of the application process
|
68
68
|
-m, --monitor Start monitor process for a deamon
|
69
|
-
-q, --queue NAME Name of the queue to read from
|
69
|
+
-q, --queue NAME Name of the queue to read from
|
70
|
+
-i, --ingestion_prefix PREFIX Prefix for keys used in counting the number of failed ingestion attempts
|
71
|
+
-x, --ingestion_attempts NUMBER Max number of attempts to try ingesting an entity
|
72
|
+
-f, --first_failed_wait NUMBER Time in seconds to wait after first failed entity deposit. This time will double every failed attempt
|
70
73
|
|
71
74
|
Common options:
|
72
75
|
-v, --version Show version
|
@@ -144,7 +147,7 @@ This will cut a tag version, builds the gem, and pushes the gem up to Rubygems
|
|
144
147
|
|
145
148
|
Note: You may need permission to push a gem up to Rubygems!
|
146
149
|
You will first need to create an account on rubygems.org.
|
147
|
-
Once you have an account, bug
|
150
|
+
Once you have an account, bug @pgwillia (Tricia Jenkins), @lagoan (Omar Rodriguez-Arenas), or @henryzhang87 (Henry Zhang) to [add you as an owner](http://guides.rubygems.org/command-reference/#gem-owner) to pushmi_pullyu Rubygem. Once you are an owner you should be able to push new versions of pushmi_pullyu up to Rubygems
|
148
151
|
|
149
152
|
## Deployment
|
150
153
|
|
data/examples/pushmi_pullyu.yml
CHANGED
@@ -35,6 +35,11 @@ class PushmiPullyu::AIP::Downloader
|
|
35
35
|
# Main object metadata
|
36
36
|
download_and_log(object_aip_paths[:main_object_remote],
|
37
37
|
object_aip_paths[:main_object_local])
|
38
|
+
|
39
|
+
# Communities and collections do not have their own files.
|
40
|
+
return unless can_have_files?
|
41
|
+
|
42
|
+
FileUtils.mkdir_p(object_aip_paths[:file_sets_directory_local])
|
38
43
|
download_and_log(object_aip_paths[:file_sets_remote],
|
39
44
|
object_aip_paths[:file_sets_local])
|
40
45
|
|
@@ -139,25 +144,26 @@ class PushmiPullyu::AIP::Downloader
|
|
139
144
|
PushmiPullyu::Logging.log_aip_activity(@aip_directory, message)
|
140
145
|
end
|
141
146
|
|
147
|
+
def can_have_files?
|
148
|
+
@entity[:type] == 'items' || @entity[:type] == 'theses'
|
149
|
+
end
|
150
|
+
|
142
151
|
### Directories
|
143
152
|
|
144
153
|
def aip_dirs
|
145
154
|
@aip_dirs ||= {
|
146
155
|
objects: "#{@aip_directory}/data/objects",
|
147
156
|
metadata: "#{@aip_directory}/data/objects/metadata",
|
148
|
-
|
149
|
-
files_metadata: "#{@aip_directory}/data/objects/metadata/files_metadata",
|
150
|
-
logs: "#{@aip_directory}/data/logs",
|
151
|
-
file_logs: "#{@aip_directory}/data/logs/files_logs"
|
157
|
+
logs: "#{@aip_directory}/data/logs"
|
152
158
|
}
|
153
159
|
end
|
154
160
|
|
155
161
|
def file_set_dirs(file_set_uuid)
|
156
162
|
@file_set_dirs ||= {}
|
157
163
|
@file_set_dirs[file_set_uuid] ||= {
|
158
|
-
|
159
|
-
|
160
|
-
|
164
|
+
files: "#{@aip_directory}/data/objects/files/#{file_set_uuid}",
|
165
|
+
logs: "#{@aip_directory}/data/logs/files_logs/#{file_set_uuid}",
|
166
|
+
metadata: "#{@aip_directory}/data/objects/metadata/files_metadata/#{file_set_uuid}"
|
161
167
|
}
|
162
168
|
end
|
163
169
|
|
@@ -193,7 +199,9 @@ class PushmiPullyu::AIP::Downloader
|
|
193
199
|
main_object_remote: object_uri,
|
194
200
|
main_object_local: "#{aip_dirs[:metadata]}/object_metadata.n3",
|
195
201
|
file_sets_remote: "#{object_uri}/filesets",
|
196
|
-
|
202
|
+
# This directory needs to be created before we can downloaded the file order information
|
203
|
+
file_sets_directory_local: "#{@aip_directory}/data/objects/metadata/files_metadata",
|
204
|
+
file_sets_local: "#{@aip_directory}/data/objects/metadata/files_metadata/file_order.xml",
|
197
205
|
# This is downloaded for processing but not saved
|
198
206
|
file_paths_remote: "#{object_uri}/file_paths"
|
199
207
|
}.freeze
|
data/lib/pushmi_pullyu/aip.rb
CHANGED
@@ -6,19 +6,22 @@ module PushmiPullyu::AIP
|
|
6
6
|
module_function
|
7
7
|
|
8
8
|
def create(entity)
|
9
|
-
raise EntityInvalid if entity.
|
10
|
-
UUID.validate(entity[:uuid])
|
9
|
+
raise EntityInvalid if entity.blank? ||
|
10
|
+
UUID.validate(entity[:uuid]).blank? ||
|
11
11
|
entity[:type].blank?
|
12
12
|
|
13
13
|
aip_directory = "#{PushmiPullyu.options[:workdir]}/#{entity[:uuid]}"
|
14
14
|
aip_filename = "#{aip_directory}.tar"
|
15
|
+
begin
|
16
|
+
PushmiPullyu::AIP::Downloader.new(entity, aip_directory).run
|
17
|
+
PushmiPullyu::AIP::Creator.new(entity[:uuid], aip_directory, aip_filename).run
|
15
18
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
19
|
+
yield aip_filename, aip_directory
|
20
|
+
# Here we will ensure the files are removed even if an exception comes up.
|
21
|
+
# You will notice there is no rescue block. We will catch exceptions in `PushmiPullyu::CLI`
|
22
|
+
ensure
|
23
|
+
FileUtils.rm_rf(aip_filename)
|
24
|
+
FileUtils.rm_rf(aip_directory)
|
25
|
+
end
|
23
26
|
end
|
24
27
|
end
|
data/lib/pushmi_pullyu/cli.rb
CHANGED
@@ -148,6 +148,21 @@ class PushmiPullyu::CLI
|
|
148
148
|
opts[:queue_name] = queue
|
149
149
|
end
|
150
150
|
|
151
|
+
o.on('-i', '--ingestion_prefix PREFIX',
|
152
|
+
'Prefix for keys used in counting the number of failed ingestion attempts') do |prefix|
|
153
|
+
opts[:ingestion_prefix] = prefix
|
154
|
+
end
|
155
|
+
|
156
|
+
o.on('-x', '--ingestion_attempts NUMBER', Integer,
|
157
|
+
'Max number of attempts to try ingesting an entity') do |ingestion_attempts|
|
158
|
+
opts[:ingestion_attempts] = ingestion_attempts
|
159
|
+
end
|
160
|
+
|
161
|
+
o.on('-f', '--first_failed_wait NUMBER', Integer,
|
162
|
+
'Time in seconds to wait after first failed deposit. Time will double every failed attempt') do |failed_wait|
|
163
|
+
opts[:first_failed_wait] = failed_wait
|
164
|
+
end
|
165
|
+
|
151
166
|
o.separator ''
|
152
167
|
o.separator 'Common options:'
|
153
168
|
|
@@ -182,12 +197,12 @@ class PushmiPullyu::CLI
|
|
182
197
|
end
|
183
198
|
|
184
199
|
def run_preservation_cycle
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
200
|
+
begin
|
201
|
+
entity = queue.wait_next_item
|
202
|
+
return unless entity && entity[:type].present? && entity[:uuid].present?
|
203
|
+
rescue StandardError => e
|
204
|
+
log_exception(e)
|
205
|
+
end
|
191
206
|
|
192
207
|
# add additional information about the error context to errors that occur while processing this item.
|
193
208
|
Rollbar.scoped(entity_uuid: entity[:uuid]) do
|
@@ -199,13 +214,24 @@ class PushmiPullyu::CLI
|
|
199
214
|
# Log successful preservation event to the log files
|
200
215
|
PushmiPullyu::Logging.log_preservation_event(deposited_file, aip_directory)
|
201
216
|
end
|
202
|
-
|
217
|
+
# An EntityInvalid expection means there is a problem with the entity information format so there is no point in
|
218
|
+
# readding it to the queue as it will always fail
|
219
|
+
rescue PushmiPullyu::AIP::EntityInvalid => e
|
220
|
+
rescue StandardError => e
|
221
|
+
begin
|
222
|
+
queue.add_entity_in_timeframe(entity)
|
223
|
+
rescue MaxDepositAttemptsReached => e
|
224
|
+
log_exception(e)
|
225
|
+
end
|
226
|
+
|
227
|
+
# rubocop:disable Lint/RescueException
|
228
|
+
# Something other than a StandardError exception means something happened which we were not expecting!
|
229
|
+
# Make sure we log the problem
|
203
230
|
rescue Exception => e
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
# rubocop:enable Lint/RescueException
|
231
|
+
raise e
|
232
|
+
# rubocop:enable Lint/RescueException
|
233
|
+
ensure
|
234
|
+
log_exception(e)
|
209
235
|
end
|
210
236
|
end
|
211
237
|
|
@@ -280,4 +306,9 @@ class PushmiPullyu::CLI
|
|
280
306
|
end
|
281
307
|
end
|
282
308
|
|
309
|
+
def log_exception(exception)
|
310
|
+
Rollbar.error(exception)
|
311
|
+
logger.error(exception)
|
312
|
+
end
|
313
|
+
|
283
314
|
end
|
@@ -47,12 +47,12 @@ module PushmiPullyu::Logging
|
|
47
47
|
preservation_logger = Logger.new("#{PushmiPullyu.options[:logdir]}/preservation_events.log")
|
48
48
|
preservation_json_logger = Logger.new("#{PushmiPullyu.options[:logdir]}/preservation_events.json")
|
49
49
|
|
50
|
-
message = "#{deposited_file.name} was successfully deposited into Swift Storage!\n"\
|
51
|
-
"Here are the details of this preservation event:\n"\
|
52
|
-
"\tUUID: '#{deposited_file.name}'\n"\
|
53
|
-
"\tTimestamp of Completion: '#{deposited_file.last_modified}'\n"\
|
54
|
-
"\tAIP Checksum: '#{deposited_file.etag}'\n"\
|
55
|
-
"\tMetadata: #{deposited_file.metadata}\n"\
|
50
|
+
message = "#{deposited_file.name} was successfully deposited into Swift Storage!\n" \
|
51
|
+
"Here are the details of this preservation event:\n" \
|
52
|
+
"\tUUID: '#{deposited_file.name}'\n" \
|
53
|
+
"\tTimestamp of Completion: '#{deposited_file.last_modified}'\n" \
|
54
|
+
"\tAIP Checksum: '#{deposited_file.etag}'\n" \
|
55
|
+
"\tMetadata: #{deposited_file.metadata}\n" \
|
56
56
|
|
57
57
|
file_details = file_log_details(aip_directory)
|
58
58
|
|
@@ -155,7 +155,7 @@ module PushmiPullyu::Logging
|
|
155
155
|
fileset_name: File.dirname(file).split('/')[-1],
|
156
156
|
file_name: File.basename(file),
|
157
157
|
file_size: File.size(file),
|
158
|
-
file_extension: File.extname(file).strip.downcase[1
|
158
|
+
file_extension: File.extname(file).strip.downcase[1..]
|
159
159
|
}
|
160
160
|
end
|
161
161
|
end
|
@@ -20,6 +20,7 @@ require 'connection_pool'
|
|
20
20
|
class PushmiPullyu::PreservationQueue
|
21
21
|
|
22
22
|
class ConnectionError < StandardError; end
|
23
|
+
class MaxDepositAttemptsReached < StandardError; end
|
23
24
|
|
24
25
|
def initialize(redis_url: 'redis://localhost:6379',
|
25
26
|
pool_opts: { size: 1, timeout: 5 },
|
@@ -50,7 +51,8 @@ class PushmiPullyu::PreservationQueue
|
|
50
51
|
rd.multi do |tx|
|
51
52
|
tx.zrem(@queue_name, element) # remove the top element transactionally
|
52
53
|
end
|
53
|
-
|
54
|
+
|
55
|
+
return JSON.parse(element, { symbolize_names: true })
|
54
56
|
else
|
55
57
|
rd.unwatch # cancel the transaction since there was nothing in the queue
|
56
58
|
return nil
|
@@ -68,6 +70,27 @@ class PushmiPullyu::PreservationQueue
|
|
68
70
|
end
|
69
71
|
end
|
70
72
|
|
73
|
+
def add_entity_in_timeframe(entity)
|
74
|
+
entity_attempts_key = "#{PushmiPullyu.options[:ingestion_prefix]}#{entity[:uuid]}"
|
75
|
+
|
76
|
+
@redis.with do |connection|
|
77
|
+
# separate information for priority information and queue
|
78
|
+
deposit_attempt = connection.incr entity_attempts_key
|
79
|
+
|
80
|
+
if deposit_attempt <= PushmiPullyu.options[:ingestion_attempts]
|
81
|
+
connection.zadd @queue_name, Time.now.to_f + self.class.extra_wait_time(deposit_attempt),
|
82
|
+
entity.slice(:uuid, :type).to_json
|
83
|
+
else
|
84
|
+
connection.del entity_attempts_key
|
85
|
+
raise MaxDepositAttemptsReached
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def self.extra_wait_time(deposit_attempt)
|
91
|
+
(2**deposit_attempt) * PushmiPullyu.options[:first_failed_wait]
|
92
|
+
end
|
93
|
+
|
71
94
|
protected
|
72
95
|
|
73
96
|
def connected?
|
data/lib/pushmi_pullyu.rb
CHANGED
@@ -26,6 +26,9 @@ module PushmiPullyu
|
|
26
26
|
workdir: 'tmp/work',
|
27
27
|
process_name: 'pushmi_pullyu',
|
28
28
|
queue_name: 'dev:pmpy_queue',
|
29
|
+
ingestion_prefix: 'prod:pmpy_ingest_attempt:',
|
30
|
+
ingestion_attempts: 15,
|
31
|
+
first_failed_wait: 10,
|
29
32
|
redis: {
|
30
33
|
url: 'redis://localhost:6379'
|
31
34
|
},
|
data/pushmi_pullyu.gemspec
CHANGED
@@ -19,7 +19,7 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
20
20
|
spec.require_paths = ['lib']
|
21
21
|
|
22
|
-
spec.required_ruby_version = '>= 2.
|
22
|
+
spec.required_ruby_version = '>= 2.7'
|
23
23
|
|
24
24
|
spec.add_runtime_dependency 'activesupport', '>= 5', '< 8'
|
25
25
|
spec.add_runtime_dependency 'bagit', '~> 0.4'
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pushmi_pullyu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shane Murnaghan
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2023-02-28 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: activesupport
|
@@ -409,6 +409,7 @@ files:
|
|
409
409
|
- CHANGELOG.md
|
410
410
|
- Dangerfile
|
411
411
|
- Gemfile
|
412
|
+
- Gemfile.lock
|
412
413
|
- LICENSE.txt
|
413
414
|
- README.md
|
414
415
|
- Rakefile
|
@@ -446,7 +447,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
446
447
|
requirements:
|
447
448
|
- - ">="
|
448
449
|
- !ruby/object:Gem::Version
|
449
|
-
version: '2.
|
450
|
+
version: '2.7'
|
450
451
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
451
452
|
requirements:
|
452
453
|
- - ">="
|