pushmi_pullyu 2.0.3 → 2.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +2 -3
- data/.gitignore +0 -1
- data/.rubocop.yml +1 -1
- data/CHANGELOG.md +18 -0
- data/Gemfile.lock +251 -0
- data/README.md +6 -3
- data/examples/pushmi_pullyu.yml +3 -0
- data/lib/pushmi_pullyu/aip/downloader.rb +16 -8
- data/lib/pushmi_pullyu/aip.rb +12 -9
- data/lib/pushmi_pullyu/cli.rb +43 -12
- data/lib/pushmi_pullyu/logging.rb +7 -7
- data/lib/pushmi_pullyu/preservation_queue.rb +24 -1
- data/lib/pushmi_pullyu/version.rb +1 -1
- data/lib/pushmi_pullyu.rb +3 -0
- data/pushmi_pullyu.gemspec +1 -1
- metadata +4 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 25051393a56976db185a30c9cd498afd7997854d2a6931f3b6190132cec76a97
|
|
4
|
+
data.tar.gz: 731b8dbcf8a3d9124f44d57001bda6952bb2da07df874a1f97f52d7e7176c065
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 82b083e9b991fbc0b7c95507415d312bc29b29a4f16fdcfe8689efc2dc4c69542a4c15010ce9b5841cebebe73920de7499512bf1eaab57bb379816bd1dba75ae
|
|
7
|
+
data.tar.gz: 2439db5f7058ddc7b042894538a592d076c467b34c7211c262d15e3a2d6d436f8a3d785f6b53c3bd68e93566832fed18bbde6b32efa4f0f4e8e08eceacd3ed57
|
data/.github/workflows/ruby.yml
CHANGED
|
@@ -18,9 +18,8 @@ jobs:
|
|
|
18
18
|
- name: Set up Ruby
|
|
19
19
|
uses: ruby/setup-ruby@v1
|
|
20
20
|
with:
|
|
21
|
-
ruby-version: 2.
|
|
22
|
-
|
|
23
|
-
run: bundle install
|
|
21
|
+
ruby-version: '2.7'
|
|
22
|
+
bundler-cache: true # runs 'bundle install' and caches installed gems automatically
|
|
24
23
|
- name: Lint with RuboCop
|
|
25
24
|
run: bundle exec rubocop --parallel
|
|
26
25
|
- name: Run Danger
|
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
data/CHANGELOG.md
CHANGED
|
@@ -8,6 +8,24 @@ and releases in PushmiPullyu adheres to [Semantic Versioning](https://semver.org
|
|
|
8
8
|
|
|
9
9
|
## [Unreleased]
|
|
10
10
|
|
|
11
|
+
## [2.0.5] - 2023-02-17
|
|
12
|
+
|
|
13
|
+
- Add rescue block to catch exceptions while waiting for next item [#280](https://github.com/ualbertalib/pushmi_pullyu/issues/280)
|
|
14
|
+
- Add logic to fetch new community and collection information from jupiter and create their AIPS. [#255](https://github.com/ualbertalib/pushmi_pullyu/issues/255)
|
|
15
|
+
- Add delay to re-ingestion attempts to allow for problems to be fixed [#297](https://github.com/ualbertalib/pushmi_pullyu/issues/297)
|
|
16
|
+
- Bump git from 1.9.1 to 1.13.0
|
|
17
|
+
|
|
18
|
+
## [2.0.4] - 2022-11-22
|
|
19
|
+
|
|
20
|
+
- Fix issue with temporary work files not being deleted after a failed swift deposit [#242](https://github.com/ualbertalib/pushmi_pullyu/issues/242)
|
|
21
|
+
- Bump to Ruby 2.7
|
|
22
|
+
- Fix issue with entity information consumed even after failed deposit [#232](https://github.com/ualbertalib/pushmi_pullyu/issues/232)
|
|
23
|
+
- Bump rspec from 3.10.0 to 3.12.0
|
|
24
|
+
- Bump rollbar from 3.3.0 to 3.3.2
|
|
25
|
+
- Bump pry-byebug from 3.8.0 to 3.10.1
|
|
26
|
+
- Bump webmock from 3.14.0 to 3.18.1
|
|
27
|
+
- Bump rubocop-rspec from 2.6.0 to 2.11.1
|
|
28
|
+
- Bump timecop from 0.9.4 to 0.9.5
|
|
11
29
|
## [2.0.3] - 2022-04-28
|
|
12
30
|
|
|
13
31
|
- Changed Danger token in Github Actions
|
data/Gemfile.lock
ADDED
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
PATH
|
|
2
|
+
remote: .
|
|
3
|
+
specs:
|
|
4
|
+
pushmi_pullyu (2.0.5)
|
|
5
|
+
activesupport (>= 5, < 8)
|
|
6
|
+
bagit (~> 0.4)
|
|
7
|
+
connection_pool (~> 2.2)
|
|
8
|
+
daemons (~> 1.2, >= 1.2.4)
|
|
9
|
+
minitar (~> 0.7)
|
|
10
|
+
openstack (~> 3.3, >= 3.3.10)
|
|
11
|
+
rdf (>= 1.99, < 4.0)
|
|
12
|
+
rdf-n3 (>= 1.99, < 4.0)
|
|
13
|
+
redis (>= 3.3, < 5.0)
|
|
14
|
+
rest-client (>= 1.8, < 3.0)
|
|
15
|
+
rollbar (>= 2.18, < 4.0)
|
|
16
|
+
uuid (~> 2.3.9)
|
|
17
|
+
|
|
18
|
+
GEM
|
|
19
|
+
remote: https://rubygems.org/
|
|
20
|
+
specs:
|
|
21
|
+
activesupport (7.0.4.2)
|
|
22
|
+
concurrent-ruby (~> 1.0, >= 1.0.2)
|
|
23
|
+
i18n (>= 1.6, < 2)
|
|
24
|
+
minitest (>= 5.1)
|
|
25
|
+
tzinfo (~> 2.0)
|
|
26
|
+
addressable (2.8.1)
|
|
27
|
+
public_suffix (>= 2.0.2, < 6.0)
|
|
28
|
+
amazing_print (1.4.0)
|
|
29
|
+
ast (2.4.2)
|
|
30
|
+
bagit (0.4.5)
|
|
31
|
+
docopt (~> 0.5.0)
|
|
32
|
+
validatable (~> 1.6)
|
|
33
|
+
builder (3.2.4)
|
|
34
|
+
byebug (11.1.3)
|
|
35
|
+
claide (1.0.3)
|
|
36
|
+
claide-plugins (0.9.2)
|
|
37
|
+
cork
|
|
38
|
+
nap
|
|
39
|
+
open4 (~> 1.3)
|
|
40
|
+
coderay (1.1.3)
|
|
41
|
+
colored2 (3.1.2)
|
|
42
|
+
concurrent-ruby (1.2.0)
|
|
43
|
+
connection_pool (2.3.0)
|
|
44
|
+
cork (0.3.0)
|
|
45
|
+
colored2 (~> 3.1)
|
|
46
|
+
crack (0.4.5)
|
|
47
|
+
rexml
|
|
48
|
+
daemons (1.4.1)
|
|
49
|
+
danger (8.4.2)
|
|
50
|
+
claide (~> 1.0)
|
|
51
|
+
claide-plugins (>= 0.9.2)
|
|
52
|
+
colored2 (~> 3.1)
|
|
53
|
+
cork (~> 0.1)
|
|
54
|
+
faraday (>= 0.9.0, < 2.0)
|
|
55
|
+
faraday-http-cache (~> 2.0)
|
|
56
|
+
git (~> 1.7)
|
|
57
|
+
kramdown (~> 2.3)
|
|
58
|
+
kramdown-parser-gfm (~> 1.0)
|
|
59
|
+
no_proxy_fix
|
|
60
|
+
octokit (~> 4.7)
|
|
61
|
+
terminal-table (>= 1, < 4)
|
|
62
|
+
diff-lcs (1.5.0)
|
|
63
|
+
docopt (0.5.0)
|
|
64
|
+
domain_name (0.5.20190701)
|
|
65
|
+
unf (>= 0.0.5, < 1.0.0)
|
|
66
|
+
ebnf (2.3.1)
|
|
67
|
+
amazing_print (~> 1.4)
|
|
68
|
+
htmlentities (~> 4.3)
|
|
69
|
+
rdf (~> 3.2)
|
|
70
|
+
scanf (~> 1.0)
|
|
71
|
+
sxp (~> 1.2)
|
|
72
|
+
unicode-types (~> 1.7)
|
|
73
|
+
faraday (1.8.0)
|
|
74
|
+
faraday-em_http (~> 1.0)
|
|
75
|
+
faraday-em_synchrony (~> 1.0)
|
|
76
|
+
faraday-excon (~> 1.1)
|
|
77
|
+
faraday-httpclient (~> 1.0.1)
|
|
78
|
+
faraday-net_http (~> 1.0)
|
|
79
|
+
faraday-net_http_persistent (~> 1.1)
|
|
80
|
+
faraday-patron (~> 1.0)
|
|
81
|
+
faraday-rack (~> 1.0)
|
|
82
|
+
multipart-post (>= 1.2, < 3)
|
|
83
|
+
ruby2_keywords (>= 0.0.4)
|
|
84
|
+
faraday-em_http (1.0.0)
|
|
85
|
+
faraday-em_synchrony (1.0.0)
|
|
86
|
+
faraday-excon (1.1.0)
|
|
87
|
+
faraday-http-cache (2.2.0)
|
|
88
|
+
faraday (>= 0.8)
|
|
89
|
+
faraday-httpclient (1.0.1)
|
|
90
|
+
faraday-net_http (1.0.1)
|
|
91
|
+
faraday-net_http_persistent (1.2.0)
|
|
92
|
+
faraday-patron (1.0.0)
|
|
93
|
+
faraday-rack (1.0.0)
|
|
94
|
+
git (1.13.0)
|
|
95
|
+
addressable (~> 2.8)
|
|
96
|
+
rchardet (~> 1.8)
|
|
97
|
+
hashdiff (1.0.1)
|
|
98
|
+
htmlentities (4.3.4)
|
|
99
|
+
http-accept (1.7.0)
|
|
100
|
+
http-cookie (1.0.5)
|
|
101
|
+
domain_name (~> 0.5)
|
|
102
|
+
i18n (1.12.0)
|
|
103
|
+
concurrent-ruby (~> 1.0)
|
|
104
|
+
json (2.6.3)
|
|
105
|
+
kramdown (2.3.1)
|
|
106
|
+
rexml
|
|
107
|
+
kramdown-parser-gfm (1.1.0)
|
|
108
|
+
kramdown (~> 2.0)
|
|
109
|
+
link_header (0.0.8)
|
|
110
|
+
logger (1.5.3)
|
|
111
|
+
macaddr (1.7.2)
|
|
112
|
+
systemu (~> 2.6.5)
|
|
113
|
+
matrix (0.4.2)
|
|
114
|
+
method_source (1.0.0)
|
|
115
|
+
mime-types (3.4.1)
|
|
116
|
+
mime-types-data (~> 3.2015)
|
|
117
|
+
mime-types-data (3.2022.0105)
|
|
118
|
+
minitar (0.9)
|
|
119
|
+
minitest (5.17.0)
|
|
120
|
+
multipart-post (2.1.1)
|
|
121
|
+
nap (1.1.0)
|
|
122
|
+
net-http-persistent (4.0.1)
|
|
123
|
+
connection_pool (~> 2.2)
|
|
124
|
+
netrc (0.11.0)
|
|
125
|
+
no_proxy_fix (0.1.2)
|
|
126
|
+
octokit (4.21.0)
|
|
127
|
+
faraday (>= 0.9)
|
|
128
|
+
sawyer (~> 0.8.0, >= 0.5.3)
|
|
129
|
+
open4 (1.3.4)
|
|
130
|
+
openstack (3.3.21)
|
|
131
|
+
json
|
|
132
|
+
parallel (1.22.1)
|
|
133
|
+
parser (3.1.2.1)
|
|
134
|
+
ast (~> 2.4.1)
|
|
135
|
+
pry (0.14.1)
|
|
136
|
+
coderay (~> 1.1)
|
|
137
|
+
method_source (~> 1.0)
|
|
138
|
+
pry-byebug (3.10.1)
|
|
139
|
+
byebug (~> 11.0)
|
|
140
|
+
pry (>= 0.13, < 0.15)
|
|
141
|
+
public_suffix (5.0.0)
|
|
142
|
+
rainbow (3.1.1)
|
|
143
|
+
rake (13.0.6)
|
|
144
|
+
rchardet (1.8.0)
|
|
145
|
+
rdf (3.2.9)
|
|
146
|
+
link_header (~> 0.0, >= 0.0.8)
|
|
147
|
+
rdf-aggregate-repo (3.2.1)
|
|
148
|
+
rdf (~> 3.2)
|
|
149
|
+
rdf-n3 (3.2.1)
|
|
150
|
+
ebnf (~> 2.2)
|
|
151
|
+
rdf (~> 3.2)
|
|
152
|
+
sparql (~> 3.2)
|
|
153
|
+
sxp (~> 1.2)
|
|
154
|
+
rdf-xsd (3.2.1)
|
|
155
|
+
rdf (~> 3.2)
|
|
156
|
+
rexml (~> 3.2)
|
|
157
|
+
redis (4.8.1)
|
|
158
|
+
regexp_parser (2.6.0)
|
|
159
|
+
rest-client (2.1.0)
|
|
160
|
+
http-accept (>= 1.7.0, < 2.0)
|
|
161
|
+
http-cookie (>= 1.0.2, < 2.0)
|
|
162
|
+
mime-types (>= 1.16, < 4.0)
|
|
163
|
+
netrc (~> 0.8)
|
|
164
|
+
rexml (3.2.5)
|
|
165
|
+
rollbar (3.4.0)
|
|
166
|
+
rspec (3.12.0)
|
|
167
|
+
rspec-core (~> 3.12.0)
|
|
168
|
+
rspec-expectations (~> 3.12.0)
|
|
169
|
+
rspec-mocks (~> 3.12.0)
|
|
170
|
+
rspec-core (3.12.0)
|
|
171
|
+
rspec-support (~> 3.12.0)
|
|
172
|
+
rspec-expectations (3.12.0)
|
|
173
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
|
174
|
+
rspec-support (~> 3.12.0)
|
|
175
|
+
rspec-mocks (3.12.0)
|
|
176
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
|
177
|
+
rspec-support (~> 3.12.0)
|
|
178
|
+
rspec-support (3.12.0)
|
|
179
|
+
rubocop (1.28.1)
|
|
180
|
+
parallel (~> 1.10)
|
|
181
|
+
parser (>= 3.1.0.0)
|
|
182
|
+
rainbow (>= 2.2.2, < 4.0)
|
|
183
|
+
regexp_parser (>= 1.8, < 3.0)
|
|
184
|
+
rexml
|
|
185
|
+
rubocop-ast (>= 1.17.0, < 2.0)
|
|
186
|
+
ruby-progressbar (~> 1.7)
|
|
187
|
+
unicode-display_width (>= 1.4.0, < 3.0)
|
|
188
|
+
rubocop-ast (1.23.0)
|
|
189
|
+
parser (>= 3.1.1.0)
|
|
190
|
+
rubocop-rspec (2.11.1)
|
|
191
|
+
rubocop (~> 1.19)
|
|
192
|
+
ruby-progressbar (1.11.0)
|
|
193
|
+
ruby2_keywords (0.0.5)
|
|
194
|
+
sawyer (0.8.2)
|
|
195
|
+
addressable (>= 2.3.5)
|
|
196
|
+
faraday (> 0.8, < 2.0)
|
|
197
|
+
scanf (1.0.0)
|
|
198
|
+
sparql (3.2.5)
|
|
199
|
+
builder (~> 3.2)
|
|
200
|
+
ebnf (~> 2.2, >= 2.3.1)
|
|
201
|
+
logger (~> 1.5)
|
|
202
|
+
rdf (~> 3.2, >= 3.2.8)
|
|
203
|
+
rdf-aggregate-repo (~> 3.2)
|
|
204
|
+
rdf-xsd (~> 3.2)
|
|
205
|
+
sparql-client (~> 3.2, >= 3.2.1)
|
|
206
|
+
sxp (~> 1.2, >= 1.2.2)
|
|
207
|
+
sparql-client (3.2.1)
|
|
208
|
+
net-http-persistent (~> 4.0, >= 4.0.1)
|
|
209
|
+
rdf (~> 3.2, >= 3.2.6)
|
|
210
|
+
sxp (1.2.3)
|
|
211
|
+
matrix (~> 0.4)
|
|
212
|
+
rdf (~> 3.2)
|
|
213
|
+
systemu (2.6.5)
|
|
214
|
+
terminal-table (3.0.2)
|
|
215
|
+
unicode-display_width (>= 1.1.1, < 3)
|
|
216
|
+
timecop (0.9.6)
|
|
217
|
+
tzinfo (2.0.6)
|
|
218
|
+
concurrent-ruby (~> 1.0)
|
|
219
|
+
unf (0.1.4)
|
|
220
|
+
unf_ext
|
|
221
|
+
unf_ext (0.0.8.2)
|
|
222
|
+
unicode-display_width (2.3.0)
|
|
223
|
+
unicode-types (1.8.0)
|
|
224
|
+
uuid (2.3.9)
|
|
225
|
+
macaddr (~> 1.0)
|
|
226
|
+
validatable (1.6.7)
|
|
227
|
+
vcr (5.1.0)
|
|
228
|
+
webmock (3.18.1)
|
|
229
|
+
addressable (>= 2.8.0)
|
|
230
|
+
crack (>= 0.3.2)
|
|
231
|
+
hashdiff (>= 0.4.0, < 2.0.0)
|
|
232
|
+
|
|
233
|
+
PLATFORMS
|
|
234
|
+
x86_64-linux
|
|
235
|
+
|
|
236
|
+
DEPENDENCIES
|
|
237
|
+
bundler (~> 2.0)
|
|
238
|
+
danger (~> 8.0)
|
|
239
|
+
pry (~> 0.10, >= 0.10.4)
|
|
240
|
+
pry-byebug (~> 3.6)
|
|
241
|
+
pushmi_pullyu!
|
|
242
|
+
rake (~> 13.0)
|
|
243
|
+
rspec (~> 3.0)
|
|
244
|
+
rubocop (~> 1.23)
|
|
245
|
+
rubocop-rspec (~> 2.6)
|
|
246
|
+
timecop (~> 0.8)
|
|
247
|
+
vcr (~> 5.0)
|
|
248
|
+
webmock (~> 3.3)
|
|
249
|
+
|
|
250
|
+
BUNDLED WITH
|
|
251
|
+
2.3.19
|
data/README.md
CHANGED
|
@@ -26,7 +26,7 @@ Its primary job is to manage the flow of content from Jupiter into Swift for pre
|
|
|
26
26
|
|
|
27
27
|
## Requirements
|
|
28
28
|
|
|
29
|
-
PushmiPullyu supports Ruby 2.
|
|
29
|
+
PushmiPullyu supports Ruby 2.7
|
|
30
30
|
|
|
31
31
|
## Installation
|
|
32
32
|
|
|
@@ -66,7 +66,10 @@ Specific options:
|
|
|
66
66
|
-W, --workdir PATH Path for directory where AIP creation work takes place in
|
|
67
67
|
-N, --process_name NAME Name of the application process
|
|
68
68
|
-m, --monitor Start monitor process for a deamon
|
|
69
|
-
-q, --queue NAME Name of the queue to read from
|
|
69
|
+
-q, --queue NAME Name of the queue to read from
|
|
70
|
+
-i, --ingestion_prefix PREFIX Prefix for keys used in counting the number of failed ingestion attempts
|
|
71
|
+
-x, --ingestion_attempts NUMBER Max number of attempts to try ingesting an entity
|
|
72
|
+
-f, --first_failed_wait NUMBER Time in seconds to wait after first failed entity deposit. This time will double every failed attempt
|
|
70
73
|
|
|
71
74
|
Common options:
|
|
72
75
|
-v, --version Show version
|
|
@@ -144,7 +147,7 @@ This will cut a tag version, builds the gem, and pushes the gem up to Rubygems
|
|
|
144
147
|
|
|
145
148
|
Note: You may need permission to push a gem up to Rubygems!
|
|
146
149
|
You will first need to create an account on rubygems.org.
|
|
147
|
-
Once you have an account, bug
|
|
150
|
+
Once you have an account, bug @pgwillia (Tricia Jenkins), @lagoan (Omar Rodriguez-Arenas), or @henryzhang87 (Henry Zhang) to [add you as an owner](http://guides.rubygems.org/command-reference/#gem-owner) to pushmi_pullyu Rubygem. Once you are an owner you should be able to push new versions of pushmi_pullyu up to Rubygems
|
|
148
151
|
|
|
149
152
|
## Deployment
|
|
150
153
|
|
data/examples/pushmi_pullyu.yml
CHANGED
|
@@ -35,6 +35,11 @@ class PushmiPullyu::AIP::Downloader
|
|
|
35
35
|
# Main object metadata
|
|
36
36
|
download_and_log(object_aip_paths[:main_object_remote],
|
|
37
37
|
object_aip_paths[:main_object_local])
|
|
38
|
+
|
|
39
|
+
# Communities and collections do not have their own files.
|
|
40
|
+
return unless can_have_files?
|
|
41
|
+
|
|
42
|
+
FileUtils.mkdir_p(object_aip_paths[:file_sets_directory_local])
|
|
38
43
|
download_and_log(object_aip_paths[:file_sets_remote],
|
|
39
44
|
object_aip_paths[:file_sets_local])
|
|
40
45
|
|
|
@@ -139,25 +144,26 @@ class PushmiPullyu::AIP::Downloader
|
|
|
139
144
|
PushmiPullyu::Logging.log_aip_activity(@aip_directory, message)
|
|
140
145
|
end
|
|
141
146
|
|
|
147
|
+
def can_have_files?
|
|
148
|
+
@entity[:type] == 'items' || @entity[:type] == 'theses'
|
|
149
|
+
end
|
|
150
|
+
|
|
142
151
|
### Directories
|
|
143
152
|
|
|
144
153
|
def aip_dirs
|
|
145
154
|
@aip_dirs ||= {
|
|
146
155
|
objects: "#{@aip_directory}/data/objects",
|
|
147
156
|
metadata: "#{@aip_directory}/data/objects/metadata",
|
|
148
|
-
|
|
149
|
-
files_metadata: "#{@aip_directory}/data/objects/metadata/files_metadata",
|
|
150
|
-
logs: "#{@aip_directory}/data/logs",
|
|
151
|
-
file_logs: "#{@aip_directory}/data/logs/files_logs"
|
|
157
|
+
logs: "#{@aip_directory}/data/logs"
|
|
152
158
|
}
|
|
153
159
|
end
|
|
154
160
|
|
|
155
161
|
def file_set_dirs(file_set_uuid)
|
|
156
162
|
@file_set_dirs ||= {}
|
|
157
163
|
@file_set_dirs[file_set_uuid] ||= {
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
164
|
+
files: "#{@aip_directory}/data/objects/files/#{file_set_uuid}",
|
|
165
|
+
logs: "#{@aip_directory}/data/logs/files_logs/#{file_set_uuid}",
|
|
166
|
+
metadata: "#{@aip_directory}/data/objects/metadata/files_metadata/#{file_set_uuid}"
|
|
161
167
|
}
|
|
162
168
|
end
|
|
163
169
|
|
|
@@ -193,7 +199,9 @@ class PushmiPullyu::AIP::Downloader
|
|
|
193
199
|
main_object_remote: object_uri,
|
|
194
200
|
main_object_local: "#{aip_dirs[:metadata]}/object_metadata.n3",
|
|
195
201
|
file_sets_remote: "#{object_uri}/filesets",
|
|
196
|
-
|
|
202
|
+
# This directory needs to be created before we can downloaded the file order information
|
|
203
|
+
file_sets_directory_local: "#{@aip_directory}/data/objects/metadata/files_metadata",
|
|
204
|
+
file_sets_local: "#{@aip_directory}/data/objects/metadata/files_metadata/file_order.xml",
|
|
197
205
|
# This is downloaded for processing but not saved
|
|
198
206
|
file_paths_remote: "#{object_uri}/file_paths"
|
|
199
207
|
}.freeze
|
data/lib/pushmi_pullyu/aip.rb
CHANGED
|
@@ -6,19 +6,22 @@ module PushmiPullyu::AIP
|
|
|
6
6
|
module_function
|
|
7
7
|
|
|
8
8
|
def create(entity)
|
|
9
|
-
raise EntityInvalid if entity.
|
|
10
|
-
UUID.validate(entity[:uuid])
|
|
9
|
+
raise EntityInvalid if entity.blank? ||
|
|
10
|
+
UUID.validate(entity[:uuid]).blank? ||
|
|
11
11
|
entity[:type].blank?
|
|
12
12
|
|
|
13
13
|
aip_directory = "#{PushmiPullyu.options[:workdir]}/#{entity[:uuid]}"
|
|
14
14
|
aip_filename = "#{aip_directory}.tar"
|
|
15
|
+
begin
|
|
16
|
+
PushmiPullyu::AIP::Downloader.new(entity, aip_directory).run
|
|
17
|
+
PushmiPullyu::AIP::Creator.new(entity[:uuid], aip_directory, aip_filename).run
|
|
15
18
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
19
|
+
yield aip_filename, aip_directory
|
|
20
|
+
# Here we will ensure the files are removed even if an exception comes up.
|
|
21
|
+
# You will notice there is no rescue block. We will catch exceptions in `PushmiPullyu::CLI`
|
|
22
|
+
ensure
|
|
23
|
+
FileUtils.rm_rf(aip_filename)
|
|
24
|
+
FileUtils.rm_rf(aip_directory)
|
|
25
|
+
end
|
|
23
26
|
end
|
|
24
27
|
end
|
data/lib/pushmi_pullyu/cli.rb
CHANGED
|
@@ -148,6 +148,21 @@ class PushmiPullyu::CLI
|
|
|
148
148
|
opts[:queue_name] = queue
|
|
149
149
|
end
|
|
150
150
|
|
|
151
|
+
o.on('-i', '--ingestion_prefix PREFIX',
|
|
152
|
+
'Prefix for keys used in counting the number of failed ingestion attempts') do |prefix|
|
|
153
|
+
opts[:ingestion_prefix] = prefix
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
o.on('-x', '--ingestion_attempts NUMBER', Integer,
|
|
157
|
+
'Max number of attempts to try ingesting an entity') do |ingestion_attempts|
|
|
158
|
+
opts[:ingestion_attempts] = ingestion_attempts
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
o.on('-f', '--first_failed_wait NUMBER', Integer,
|
|
162
|
+
'Time in seconds to wait after first failed deposit. Time will double every failed attempt') do |failed_wait|
|
|
163
|
+
opts[:first_failed_wait] = failed_wait
|
|
164
|
+
end
|
|
165
|
+
|
|
151
166
|
o.separator ''
|
|
152
167
|
o.separator 'Common options:'
|
|
153
168
|
|
|
@@ -182,12 +197,12 @@ class PushmiPullyu::CLI
|
|
|
182
197
|
end
|
|
183
198
|
|
|
184
199
|
def run_preservation_cycle
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
200
|
+
begin
|
|
201
|
+
entity = queue.wait_next_item
|
|
202
|
+
return unless entity && entity[:type].present? && entity[:uuid].present?
|
|
203
|
+
rescue StandardError => e
|
|
204
|
+
log_exception(e)
|
|
205
|
+
end
|
|
191
206
|
|
|
192
207
|
# add additional information about the error context to errors that occur while processing this item.
|
|
193
208
|
Rollbar.scoped(entity_uuid: entity[:uuid]) do
|
|
@@ -199,13 +214,24 @@ class PushmiPullyu::CLI
|
|
|
199
214
|
# Log successful preservation event to the log files
|
|
200
215
|
PushmiPullyu::Logging.log_preservation_event(deposited_file, aip_directory)
|
|
201
216
|
end
|
|
202
|
-
|
|
217
|
+
# An EntityInvalid expection means there is a problem with the entity information format so there is no point in
|
|
218
|
+
# readding it to the queue as it will always fail
|
|
219
|
+
rescue PushmiPullyu::AIP::EntityInvalid => e
|
|
220
|
+
rescue StandardError => e
|
|
221
|
+
begin
|
|
222
|
+
queue.add_entity_in_timeframe(entity)
|
|
223
|
+
rescue MaxDepositAttemptsReached => e
|
|
224
|
+
log_exception(e)
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
# rubocop:disable Lint/RescueException
|
|
228
|
+
# Something other than a StandardError exception means something happened which we were not expecting!
|
|
229
|
+
# Make sure we log the problem
|
|
203
230
|
rescue Exception => e
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
# rubocop:enable Lint/RescueException
|
|
231
|
+
raise e
|
|
232
|
+
# rubocop:enable Lint/RescueException
|
|
233
|
+
ensure
|
|
234
|
+
log_exception(e)
|
|
209
235
|
end
|
|
210
236
|
end
|
|
211
237
|
|
|
@@ -280,4 +306,9 @@ class PushmiPullyu::CLI
|
|
|
280
306
|
end
|
|
281
307
|
end
|
|
282
308
|
|
|
309
|
+
def log_exception(exception)
|
|
310
|
+
Rollbar.error(exception)
|
|
311
|
+
logger.error(exception)
|
|
312
|
+
end
|
|
313
|
+
|
|
283
314
|
end
|
|
@@ -47,12 +47,12 @@ module PushmiPullyu::Logging
|
|
|
47
47
|
preservation_logger = Logger.new("#{PushmiPullyu.options[:logdir]}/preservation_events.log")
|
|
48
48
|
preservation_json_logger = Logger.new("#{PushmiPullyu.options[:logdir]}/preservation_events.json")
|
|
49
49
|
|
|
50
|
-
message = "#{deposited_file.name} was successfully deposited into Swift Storage!\n"\
|
|
51
|
-
"Here are the details of this preservation event:\n"\
|
|
52
|
-
"\tUUID: '#{deposited_file.name}'\n"\
|
|
53
|
-
"\tTimestamp of Completion: '#{deposited_file.last_modified}'\n"\
|
|
54
|
-
"\tAIP Checksum: '#{deposited_file.etag}'\n"\
|
|
55
|
-
"\tMetadata: #{deposited_file.metadata}\n"\
|
|
50
|
+
message = "#{deposited_file.name} was successfully deposited into Swift Storage!\n" \
|
|
51
|
+
"Here are the details of this preservation event:\n" \
|
|
52
|
+
"\tUUID: '#{deposited_file.name}'\n" \
|
|
53
|
+
"\tTimestamp of Completion: '#{deposited_file.last_modified}'\n" \
|
|
54
|
+
"\tAIP Checksum: '#{deposited_file.etag}'\n" \
|
|
55
|
+
"\tMetadata: #{deposited_file.metadata}\n" \
|
|
56
56
|
|
|
57
57
|
file_details = file_log_details(aip_directory)
|
|
58
58
|
|
|
@@ -155,7 +155,7 @@ module PushmiPullyu::Logging
|
|
|
155
155
|
fileset_name: File.dirname(file).split('/')[-1],
|
|
156
156
|
file_name: File.basename(file),
|
|
157
157
|
file_size: File.size(file),
|
|
158
|
-
file_extension: File.extname(file).strip.downcase[1
|
|
158
|
+
file_extension: File.extname(file).strip.downcase[1..]
|
|
159
159
|
}
|
|
160
160
|
end
|
|
161
161
|
end
|
|
@@ -20,6 +20,7 @@ require 'connection_pool'
|
|
|
20
20
|
class PushmiPullyu::PreservationQueue
|
|
21
21
|
|
|
22
22
|
class ConnectionError < StandardError; end
|
|
23
|
+
class MaxDepositAttemptsReached < StandardError; end
|
|
23
24
|
|
|
24
25
|
def initialize(redis_url: 'redis://localhost:6379',
|
|
25
26
|
pool_opts: { size: 1, timeout: 5 },
|
|
@@ -50,7 +51,8 @@ class PushmiPullyu::PreservationQueue
|
|
|
50
51
|
rd.multi do |tx|
|
|
51
52
|
tx.zrem(@queue_name, element) # remove the top element transactionally
|
|
52
53
|
end
|
|
53
|
-
|
|
54
|
+
|
|
55
|
+
return JSON.parse(element, { symbolize_names: true })
|
|
54
56
|
else
|
|
55
57
|
rd.unwatch # cancel the transaction since there was nothing in the queue
|
|
56
58
|
return nil
|
|
@@ -68,6 +70,27 @@ class PushmiPullyu::PreservationQueue
|
|
|
68
70
|
end
|
|
69
71
|
end
|
|
70
72
|
|
|
73
|
+
def add_entity_in_timeframe(entity)
|
|
74
|
+
entity_attempts_key = "#{PushmiPullyu.options[:ingestion_prefix]}#{entity[:uuid]}"
|
|
75
|
+
|
|
76
|
+
@redis.with do |connection|
|
|
77
|
+
# separate information for priority information and queue
|
|
78
|
+
deposit_attempt = connection.incr entity_attempts_key
|
|
79
|
+
|
|
80
|
+
if deposit_attempt <= PushmiPullyu.options[:ingestion_attempts]
|
|
81
|
+
connection.zadd @queue_name, Time.now.to_f + self.class.extra_wait_time(deposit_attempt),
|
|
82
|
+
entity.slice(:uuid, :type).to_json
|
|
83
|
+
else
|
|
84
|
+
connection.del entity_attempts_key
|
|
85
|
+
raise MaxDepositAttemptsReached
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def self.extra_wait_time(deposit_attempt)
|
|
91
|
+
(2**deposit_attempt) * PushmiPullyu.options[:first_failed_wait]
|
|
92
|
+
end
|
|
93
|
+
|
|
71
94
|
protected
|
|
72
95
|
|
|
73
96
|
def connected?
|
data/lib/pushmi_pullyu.rb
CHANGED
|
@@ -26,6 +26,9 @@ module PushmiPullyu
|
|
|
26
26
|
workdir: 'tmp/work',
|
|
27
27
|
process_name: 'pushmi_pullyu',
|
|
28
28
|
queue_name: 'dev:pmpy_queue',
|
|
29
|
+
ingestion_prefix: 'prod:pmpy_ingest_attempt:',
|
|
30
|
+
ingestion_attempts: 15,
|
|
31
|
+
first_failed_wait: 10,
|
|
29
32
|
redis: {
|
|
30
33
|
url: 'redis://localhost:6379'
|
|
31
34
|
},
|
data/pushmi_pullyu.gemspec
CHANGED
|
@@ -19,7 +19,7 @@ Gem::Specification.new do |spec|
|
|
|
19
19
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
|
20
20
|
spec.require_paths = ['lib']
|
|
21
21
|
|
|
22
|
-
spec.required_ruby_version = '>= 2.
|
|
22
|
+
spec.required_ruby_version = '>= 2.7'
|
|
23
23
|
|
|
24
24
|
spec.add_runtime_dependency 'activesupport', '>= 5', '< 8'
|
|
25
25
|
spec.add_runtime_dependency 'bagit', '~> 0.4'
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: pushmi_pullyu
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.0.
|
|
4
|
+
version: 2.0.5
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Shane Murnaghan
|
|
@@ -9,7 +9,7 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: exe
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date:
|
|
12
|
+
date: 2023-02-28 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
15
|
name: activesupport
|
|
@@ -409,6 +409,7 @@ files:
|
|
|
409
409
|
- CHANGELOG.md
|
|
410
410
|
- Dangerfile
|
|
411
411
|
- Gemfile
|
|
412
|
+
- Gemfile.lock
|
|
412
413
|
- LICENSE.txt
|
|
413
414
|
- README.md
|
|
414
415
|
- Rakefile
|
|
@@ -446,7 +447,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
446
447
|
requirements:
|
|
447
448
|
- - ">="
|
|
448
449
|
- !ruby/object:Gem::Version
|
|
449
|
-
version: '2.
|
|
450
|
+
version: '2.7'
|
|
450
451
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
451
452
|
requirements:
|
|
452
453
|
- - ">="
|