pushmi_pullyu 2.0.4 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: aabca31fdc4036e26f6fbd682b2e9aece8179973fc1509e7eada08b7bcdaf796
4
- data.tar.gz: '0678d634ac98e9a4aeefa0bfb5d6df5a4869c0db3ba1b784be38a65caf916d75'
3
+ metadata.gz: 25051393a56976db185a30c9cd498afd7997854d2a6931f3b6190132cec76a97
4
+ data.tar.gz: 731b8dbcf8a3d9124f44d57001bda6952bb2da07df874a1f97f52d7e7176c065
5
5
  SHA512:
6
- metadata.gz: fb769198d6bf7e609fbbe7eec02498756f185e338439983a1ad14391392e9d3f66dd8368438791e237066137edd9bbf0afe355d6ef84cb61a11b5a00acc3e48a
7
- data.tar.gz: b921457537461cd5512fec7ac072f301c701a98a40ee210236c39cb098b04b9c1206ca2b2a0c759ab6e9fce4f2a4768329ded84a104c1fd8ec51e74e4b7b395b
6
+ metadata.gz: 82b083e9b991fbc0b7c95507415d312bc29b29a4f16fdcfe8689efc2dc4c69542a4c15010ce9b5841cebebe73920de7499512bf1eaab57bb379816bd1dba75ae
7
+ data.tar.gz: 2439db5f7058ddc7b042894538a592d076c467b34c7211c262d15e3a2d6d436f8a3d785f6b53c3bd68e93566832fed18bbde6b32efa4f0f4e8e08eceacd3ed57
data/CHANGELOG.md CHANGED
@@ -8,6 +8,13 @@ and releases in PushmiPullyu adheres to [Semantic Versioning](https://semver.org
8
8
 
9
9
  ## [Unreleased]
10
10
 
11
+ ## [2.0.5] - 2023-02-17
12
+
13
+ - Add rescue block to catch exceptions while waiting for next item [#280](https://github.com/ualbertalib/pushmi_pullyu/issues/280)
14
+ - Add logic to fetch new community and collection information from jupiter and create their AIPS. [#255](https://github.com/ualbertalib/pushmi_pullyu/issues/255)
15
+ - Add delay to re-ingestion attempts to allow for problems to be fixed [#297](https://github.com/ualbertalib/pushmi_pullyu/issues/297)
16
+ - Bump git from 1.9.1 to 1.13.0
17
+
11
18
  ## [2.0.4] - 2022-11-22
12
19
 
13
20
  - Fix issue with temporary work files not being deleted after a failed swift deposit [#242](https://github.com/ualbertalib/pushmi_pullyu/issues/242)
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- pushmi_pullyu (2.0.4)
4
+ pushmi_pullyu (2.0.5)
5
5
  activesupport (>= 5, < 8)
6
6
  bagit (~> 0.4)
7
7
  connection_pool (~> 2.2)
@@ -18,7 +18,7 @@ PATH
18
18
  GEM
19
19
  remote: https://rubygems.org/
20
20
  specs:
21
- activesupport (7.0.2.4)
21
+ activesupport (7.0.4.2)
22
22
  concurrent-ruby (~> 1.0, >= 1.0.2)
23
23
  i18n (>= 1.6, < 2)
24
24
  minitest (>= 5.1)
@@ -39,8 +39,8 @@ GEM
39
39
  open4 (~> 1.3)
40
40
  coderay (1.1.3)
41
41
  colored2 (3.1.2)
42
- concurrent-ruby (1.1.10)
43
- connection_pool (2.2.5)
42
+ concurrent-ruby (1.2.0)
43
+ connection_pool (2.3.0)
44
44
  cork (0.3.0)
45
45
  colored2 (~> 3.1)
46
46
  crack (0.4.5)
@@ -91,22 +91,23 @@ GEM
91
91
  faraday-net_http_persistent (1.2.0)
92
92
  faraday-patron (1.0.0)
93
93
  faraday-rack (1.0.0)
94
- git (1.9.1)
94
+ git (1.13.0)
95
+ addressable (~> 2.8)
95
96
  rchardet (~> 1.8)
96
97
  hashdiff (1.0.1)
97
98
  htmlentities (4.3.4)
98
99
  http-accept (1.7.0)
99
- http-cookie (1.0.4)
100
+ http-cookie (1.0.5)
100
101
  domain_name (~> 0.5)
101
- i18n (1.10.0)
102
+ i18n (1.12.0)
102
103
  concurrent-ruby (~> 1.0)
103
- json (2.6.1)
104
+ json (2.6.3)
104
105
  kramdown (2.3.1)
105
106
  rexml
106
107
  kramdown-parser-gfm (1.1.0)
107
108
  kramdown (~> 2.0)
108
109
  link_header (0.0.8)
109
- logger (1.5.1)
110
+ logger (1.5.3)
110
111
  macaddr (1.7.2)
111
112
  systemu (~> 2.6.5)
112
113
  matrix (0.4.2)
@@ -115,7 +116,7 @@ GEM
115
116
  mime-types-data (~> 3.2015)
116
117
  mime-types-data (3.2022.0105)
117
118
  minitar (0.9)
118
- minitest (5.15.0)
119
+ minitest (5.17.0)
119
120
  multipart-post (2.1.1)
120
121
  nap (1.1.0)
121
122
  net-http-persistent (4.0.1)
@@ -141,9 +142,9 @@ GEM
141
142
  rainbow (3.1.1)
142
143
  rake (13.0.6)
143
144
  rchardet (1.8.0)
144
- rdf (3.2.7)
145
+ rdf (3.2.9)
145
146
  link_header (~> 0.0, >= 0.0.8)
146
- rdf-aggregate-repo (3.2.0)
147
+ rdf-aggregate-repo (3.2.1)
147
148
  rdf (~> 3.2)
148
149
  rdf-n3 (3.2.1)
149
150
  ebnf (~> 2.2)
@@ -153,7 +154,7 @@ GEM
153
154
  rdf-xsd (3.2.1)
154
155
  rdf (~> 3.2)
155
156
  rexml (~> 3.2)
156
- redis (4.6.0)
157
+ redis (4.8.1)
157
158
  regexp_parser (2.6.0)
158
159
  rest-client (2.1.0)
159
160
  http-accept (>= 1.7.0, < 2.0)
@@ -161,7 +162,7 @@ GEM
161
162
  mime-types (>= 1.16, < 4.0)
162
163
  netrc (~> 0.8)
163
164
  rexml (3.2.5)
164
- rollbar (3.3.0)
165
+ rollbar (3.4.0)
165
166
  rspec (3.12.0)
166
167
  rspec-core (~> 3.12.0)
167
168
  rspec-expectations (~> 3.12.0)
@@ -194,32 +195,32 @@ GEM
194
195
  addressable (>= 2.3.5)
195
196
  faraday (> 0.8, < 2.0)
196
197
  scanf (1.0.0)
197
- sparql (3.2.1)
198
+ sparql (3.2.5)
198
199
  builder (~> 3.2)
199
- ebnf (~> 2.2)
200
- logger (~> 1.4)
201
- rdf (~> 3.2, >= 3.2.3)
200
+ ebnf (~> 2.2, >= 2.3.1)
201
+ logger (~> 1.5)
202
+ rdf (~> 3.2, >= 3.2.8)
202
203
  rdf-aggregate-repo (~> 3.2)
203
204
  rdf-xsd (~> 3.2)
204
- sparql-client (~> 3.2)
205
- sxp (~> 1.2, >= 1.2.1)
205
+ sparql-client (~> 3.2, >= 3.2.1)
206
+ sxp (~> 1.2, >= 1.2.2)
206
207
  sparql-client (3.2.1)
207
208
  net-http-persistent (~> 4.0, >= 4.0.1)
208
209
  rdf (~> 3.2, >= 3.2.6)
209
- sxp (1.2.2)
210
- matrix
210
+ sxp (1.2.3)
211
+ matrix (~> 0.4)
211
212
  rdf (~> 3.2)
212
213
  systemu (2.6.5)
213
214
  terminal-table (3.0.2)
214
215
  unicode-display_width (>= 1.1.1, < 3)
215
- timecop (0.9.5)
216
- tzinfo (2.0.4)
216
+ timecop (0.9.6)
217
+ tzinfo (2.0.6)
217
218
  concurrent-ruby (~> 1.0)
218
219
  unf (0.1.4)
219
220
  unf_ext
220
- unf_ext (0.0.8.1)
221
+ unf_ext (0.0.8.2)
221
222
  unicode-display_width (2.3.0)
222
- unicode-types (1.7.0)
223
+ unicode-types (1.8.0)
223
224
  uuid (2.3.9)
224
225
  macaddr (~> 1.0)
225
226
  validatable (1.6.7)
@@ -247,4 +248,4 @@ DEPENDENCIES
247
248
  webmock (~> 3.3)
248
249
 
249
250
  BUNDLED WITH
250
- 2.3.12
251
+ 2.3.19
data/README.md CHANGED
@@ -66,7 +66,10 @@ Specific options:
66
66
  -W, --workdir PATH Path for directory where AIP creation work takes place in
67
67
  -N, --process_name NAME Name of the application process
68
68
  -m, --monitor Start monitor process for a deamon
69
- -q, --queue NAME Name of the queue to read from
69
+ -q, --queue NAME Name of the queue to read from
70
+ -i, --ingestion_prefix PREFIX Prefix for keys used in counting the number of failed ingestion attempts
71
+ -x, --ingestion_attempts NUMBER Max number of attempts to try ingesting an entity
72
+ -f, --first_failed_wait NUMBER Time in seconds to wait after first failed entity deposit. This time will double every failed attempt
70
73
 
71
74
  Common options:
72
75
  -v, --version Show version
@@ -144,7 +147,7 @@ This will cut a tag version, builds the gem, and pushes the gem up to Rubygems
144
147
 
145
148
  Note: You may need permission to push a gem up to Rubygems!
146
149
  You will first need to create an account on rubygems.org.
147
- Once you have an account, bug Shane or Matt to [add you as an owner](http://guides.rubygems.org/command-reference/#gem-owner) to pushmi_pullyu Rubygem. Once you are an owner you should be able to push new versions of pushmi_pullyu up to Rubygems.
150
+ Once you have an account, bug @pgwillia (Tricia Jenkins), @lagoan (Omar Rodriguez-Arenas), or @henryzhang87 (Henry Zhang) to [add you as an owner](http://guides.rubygems.org/command-reference/#gem-owner) to pushmi_pullyu Rubygem. Once you are an owner you should be able to push new versions of pushmi_pullyu up to Rubygems
148
151
 
149
152
  ## Deployment
150
153
 
@@ -16,6 +16,9 @@ piddir: tmp/pids
16
16
  workdir: tmp/work
17
17
  process_name: pushmi_pullyu
18
18
  queue_name: dev:pmpy_queue
19
+ ingestion_prefix: "'prod:pmpy_ingest_attempt:'"
20
+ ingestion_attempts: 15
21
+ first_failed_wait: 10
19
22
  minimum_age: 0
20
23
 
21
24
  redis:
@@ -35,6 +35,11 @@ class PushmiPullyu::AIP::Downloader
35
35
  # Main object metadata
36
36
  download_and_log(object_aip_paths[:main_object_remote],
37
37
  object_aip_paths[:main_object_local])
38
+
39
+ # Communities and collections do not have their own files.
40
+ return unless can_have_files?
41
+
42
+ FileUtils.mkdir_p(object_aip_paths[:file_sets_directory_local])
38
43
  download_and_log(object_aip_paths[:file_sets_remote],
39
44
  object_aip_paths[:file_sets_local])
40
45
 
@@ -139,25 +144,26 @@ class PushmiPullyu::AIP::Downloader
139
144
  PushmiPullyu::Logging.log_aip_activity(@aip_directory, message)
140
145
  end
141
146
 
147
+ def can_have_files?
148
+ @entity[:type] == 'items' || @entity[:type] == 'theses'
149
+ end
150
+
142
151
  ### Directories
143
152
 
144
153
  def aip_dirs
145
154
  @aip_dirs ||= {
146
155
  objects: "#{@aip_directory}/data/objects",
147
156
  metadata: "#{@aip_directory}/data/objects/metadata",
148
- files: "#{@aip_directory}/data/objects/files",
149
- files_metadata: "#{@aip_directory}/data/objects/metadata/files_metadata",
150
- logs: "#{@aip_directory}/data/logs",
151
- file_logs: "#{@aip_directory}/data/logs/files_logs"
157
+ logs: "#{@aip_directory}/data/logs"
152
158
  }
153
159
  end
154
160
 
155
161
  def file_set_dirs(file_set_uuid)
156
162
  @file_set_dirs ||= {}
157
163
  @file_set_dirs[file_set_uuid] ||= {
158
- metadata: "#{aip_dirs[:files_metadata]}/#{file_set_uuid}",
159
- files: "#{aip_dirs[:files]}/#{file_set_uuid}",
160
- logs: "#{aip_dirs[:file_logs]}/#{file_set_uuid}"
164
+ files: "#{@aip_directory}/data/objects/files/#{file_set_uuid}",
165
+ logs: "#{@aip_directory}/data/logs/files_logs/#{file_set_uuid}",
166
+ metadata: "#{@aip_directory}/data/objects/metadata/files_metadata/#{file_set_uuid}"
161
167
  }
162
168
  end
163
169
 
@@ -193,7 +199,9 @@ class PushmiPullyu::AIP::Downloader
193
199
  main_object_remote: object_uri,
194
200
  main_object_local: "#{aip_dirs[:metadata]}/object_metadata.n3",
195
201
  file_sets_remote: "#{object_uri}/filesets",
196
- file_sets_local: "#{aip_dirs[:files_metadata]}/file_order.xml",
202
+ # This directory needs to be created before we can downloaded the file order information
203
+ file_sets_directory_local: "#{@aip_directory}/data/objects/metadata/files_metadata",
204
+ file_sets_local: "#{@aip_directory}/data/objects/metadata/files_metadata/file_order.xml",
197
205
  # This is downloaded for processing but not saved
198
206
  file_paths_remote: "#{object_uri}/file_paths"
199
207
  }.freeze
@@ -148,6 +148,21 @@ class PushmiPullyu::CLI
148
148
  opts[:queue_name] = queue
149
149
  end
150
150
 
151
+ o.on('-i', '--ingestion_prefix PREFIX',
152
+ 'Prefix for keys used in counting the number of failed ingestion attempts') do |prefix|
153
+ opts[:ingestion_prefix] = prefix
154
+ end
155
+
156
+ o.on('-x', '--ingestion_attempts NUMBER', Integer,
157
+ 'Max number of attempts to try ingesting an entity') do |ingestion_attempts|
158
+ opts[:ingestion_attempts] = ingestion_attempts
159
+ end
160
+
161
+ o.on('-f', '--first_failed_wait NUMBER', Integer,
162
+ 'Time in seconds to wait after first failed deposit. Time will double every failed attempt') do |failed_wait|
163
+ opts[:first_failed_wait] = failed_wait
164
+ end
165
+
151
166
  o.separator ''
152
167
  o.separator 'Common options:'
153
168
 
@@ -182,11 +197,12 @@ class PushmiPullyu::CLI
182
197
  end
183
198
 
184
199
  def run_preservation_cycle
185
- entity_json = queue.wait_next_item
186
- # jupiter is submitting the entries to reddis in a hash format using fat arrows. We need to change them to colons in
187
- # order to parse them correctly from json
188
- entity = JSON.parse(entity_json.gsub('=>', ':'), { symbolize_names: true })
189
- return unless entity[:type].present? && entity[:uuid].present?
200
+ begin
201
+ entity = queue.wait_next_item
202
+ return unless entity && entity[:type].present? && entity[:uuid].present?
203
+ rescue StandardError => e
204
+ log_exception(e)
205
+ end
190
206
 
191
207
  # add additional information about the error context to errors that occur while processing this item.
192
208
  Rollbar.scoped(entity_uuid: entity[:uuid]) do
@@ -202,7 +218,11 @@ class PushmiPullyu::CLI
202
218
  # readding it to the queue as it will always fail
203
219
  rescue PushmiPullyu::AIP::EntityInvalid => e
204
220
  rescue StandardError => e
205
- queue.add_entity_json(entity_json)
221
+ begin
222
+ queue.add_entity_in_timeframe(entity)
223
+ rescue MaxDepositAttemptsReached => e
224
+ log_exception(e)
225
+ end
206
226
 
207
227
  # rubocop:disable Lint/RescueException
208
228
  # Something other than a StandardError exception means something happened which we were not expecting!
@@ -211,8 +231,7 @@ class PushmiPullyu::CLI
211
231
  raise e
212
232
  # rubocop:enable Lint/RescueException
213
233
  ensure
214
- Rollbar.error(e)
215
- logger.error(e)
234
+ log_exception(e)
216
235
  end
217
236
  end
218
237
 
@@ -287,4 +306,9 @@ class PushmiPullyu::CLI
287
306
  end
288
307
  end
289
308
 
309
+ def log_exception(exception)
310
+ Rollbar.error(exception)
311
+ logger.error(exception)
312
+ end
313
+
290
314
  end
@@ -20,6 +20,7 @@ require 'connection_pool'
20
20
  class PushmiPullyu::PreservationQueue
21
21
 
22
22
  class ConnectionError < StandardError; end
23
+ class MaxDepositAttemptsReached < StandardError; end
23
24
 
24
25
  def initialize(redis_url: 'redis://localhost:6379',
25
26
  pool_opts: { size: 1, timeout: 5 },
@@ -50,7 +51,8 @@ class PushmiPullyu::PreservationQueue
50
51
  rd.multi do |tx|
51
52
  tx.zrem(@queue_name, element) # remove the top element transactionally
52
53
  end
53
- return element
54
+
55
+ return JSON.parse(element, { symbolize_names: true })
54
56
  else
55
57
  rd.unwatch # cancel the transaction since there was nothing in the queue
56
58
  return nil
@@ -68,12 +70,27 @@ class PushmiPullyu::PreservationQueue
68
70
  end
69
71
  end
70
72
 
71
- def add_entity_json(entity_json)
73
+ def add_entity_in_timeframe(entity)
74
+ entity_attempts_key = "#{PushmiPullyu.options[:ingestion_prefix]}#{entity[:uuid]}"
75
+
72
76
  @redis.with do |connection|
73
- connection.zadd @queue_name, Time.now.to_f, entity_json
77
+ # separate information for priority information and queue
78
+ deposit_attempt = connection.incr entity_attempts_key
79
+
80
+ if deposit_attempt <= PushmiPullyu.options[:ingestion_attempts]
81
+ connection.zadd @queue_name, Time.now.to_f + self.class.extra_wait_time(deposit_attempt),
82
+ entity.slice(:uuid, :type).to_json
83
+ else
84
+ connection.del entity_attempts_key
85
+ raise MaxDepositAttemptsReached
86
+ end
74
87
  end
75
88
  end
76
89
 
90
+ def self.extra_wait_time(deposit_attempt)
91
+ (2**deposit_attempt) * PushmiPullyu.options[:first_failed_wait]
92
+ end
93
+
77
94
  protected
78
95
 
79
96
  def connected?
@@ -1,3 +1,3 @@
1
1
  module PushmiPullyu
2
- VERSION = '2.0.4'.freeze
2
+ VERSION = '2.0.5'.freeze
3
3
  end
data/lib/pushmi_pullyu.rb CHANGED
@@ -26,6 +26,9 @@ module PushmiPullyu
26
26
  workdir: 'tmp/work',
27
27
  process_name: 'pushmi_pullyu',
28
28
  queue_name: 'dev:pmpy_queue',
29
+ ingestion_prefix: 'prod:pmpy_ingest_attempt:',
30
+ ingestion_attempts: 15,
31
+ first_failed_wait: 10,
29
32
  redis: {
30
33
  url: 'redis://localhost:6379'
31
34
  },
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pushmi_pullyu
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.4
4
+ version: 2.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shane Murnaghan
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2022-11-22 00:00:00.000000000 Z
12
+ date: 2023-02-28 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: activesupport