pushmi_pullyu 2.0.4 → 2.0.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: aabca31fdc4036e26f6fbd682b2e9aece8179973fc1509e7eada08b7bcdaf796
4
- data.tar.gz: '0678d634ac98e9a4aeefa0bfb5d6df5a4869c0db3ba1b784be38a65caf916d75'
3
+ metadata.gz: 128bfb644445954d7b9c5eb3700dd1f3e4d3c92ea1e8448ab98054d750b2d53d
4
+ data.tar.gz: 2fc44c64692367c9c4b254ec32b8241c7ecdc8fb1a33384561cbfcd34096a085
5
5
  SHA512:
6
- metadata.gz: fb769198d6bf7e609fbbe7eec02498756f185e338439983a1ad14391392e9d3f66dd8368438791e237066137edd9bbf0afe355d6ef84cb61a11b5a00acc3e48a
7
- data.tar.gz: b921457537461cd5512fec7ac072f301c701a98a40ee210236c39cb098b04b9c1206ca2b2a0c759ab6e9fce4f2a4768329ded84a104c1fd8ec51e74e4b7b395b
6
+ metadata.gz: daaaf67bda17bdea14310b41a87a1c51190f41337d8f0d467887dea5ed9cc700b2b05822bddd6038a4c7746927439d2576fd8a7c3fb2a878d7f59a8ac84e45be
7
+ data.tar.gz: 33197ef8eb83869cea35ea5af258d83c16c56e77cbb0716fc61a6fa344b3d5cdf75e25a0fd72d6e2ab71148ddae9648351d29c5f2b53c9569af9f04869538ba0
data/CHANGELOG.md CHANGED
@@ -8,6 +8,17 @@ and releases in PushmiPullyu adheres to [Semantic Versioning](https://semver.org
8
8
 
9
9
  ## [Unreleased]
10
10
 
11
+ ## [2.0.6] - 2023-03-17
12
+
13
+ - Fix URI concatenation for jupiter's base url. [#309](https://github.com/ualbertalib/pushmi_pullyu/issues/309)
14
+
15
+ ## [2.0.5] - 2023-02-17
16
+
17
+ - Add rescue block to catch exceptions while waiting for next item [#280](https://github.com/ualbertalib/pushmi_pullyu/issues/280)
18
+ - Add logic to fetch new community and collection information from jupiter and create their AIPS. [#255](https://github.com/ualbertalib/pushmi_pullyu/issues/255)
19
+ - Add delay to re-ingestion attempts to allow for problems to be fixed [#297](https://github.com/ualbertalib/pushmi_pullyu/issues/297)
20
+ - Bump git from 1.9.1 to 1.13.0
21
+
11
22
  ## [2.0.4] - 2022-11-22
12
23
 
13
24
  - Fix issue with temporary work files not being deleted after a failed swift deposit [#242](https://github.com/ualbertalib/pushmi_pullyu/issues/242)
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- pushmi_pullyu (2.0.4)
4
+ pushmi_pullyu (2.0.6)
5
5
  activesupport (>= 5, < 8)
6
6
  bagit (~> 0.4)
7
7
  connection_pool (~> 2.2)
@@ -18,7 +18,7 @@ PATH
18
18
  GEM
19
19
  remote: https://rubygems.org/
20
20
  specs:
21
- activesupport (7.0.2.4)
21
+ activesupport (7.0.4.2)
22
22
  concurrent-ruby (~> 1.0, >= 1.0.2)
23
23
  i18n (>= 1.6, < 2)
24
24
  minitest (>= 5.1)
@@ -39,8 +39,8 @@ GEM
39
39
  open4 (~> 1.3)
40
40
  coderay (1.1.3)
41
41
  colored2 (3.1.2)
42
- concurrent-ruby (1.1.10)
43
- connection_pool (2.2.5)
42
+ concurrent-ruby (1.2.0)
43
+ connection_pool (2.3.0)
44
44
  cork (0.3.0)
45
45
  colored2 (~> 3.1)
46
46
  crack (0.4.5)
@@ -91,22 +91,23 @@ GEM
91
91
  faraday-net_http_persistent (1.2.0)
92
92
  faraday-patron (1.0.0)
93
93
  faraday-rack (1.0.0)
94
- git (1.9.1)
94
+ git (1.13.0)
95
+ addressable (~> 2.8)
95
96
  rchardet (~> 1.8)
96
97
  hashdiff (1.0.1)
97
98
  htmlentities (4.3.4)
98
99
  http-accept (1.7.0)
99
- http-cookie (1.0.4)
100
+ http-cookie (1.0.5)
100
101
  domain_name (~> 0.5)
101
- i18n (1.10.0)
102
+ i18n (1.12.0)
102
103
  concurrent-ruby (~> 1.0)
103
- json (2.6.1)
104
+ json (2.6.3)
104
105
  kramdown (2.3.1)
105
106
  rexml
106
107
  kramdown-parser-gfm (1.1.0)
107
108
  kramdown (~> 2.0)
108
109
  link_header (0.0.8)
109
- logger (1.5.1)
110
+ logger (1.5.3)
110
111
  macaddr (1.7.2)
111
112
  systemu (~> 2.6.5)
112
113
  matrix (0.4.2)
@@ -115,7 +116,7 @@ GEM
115
116
  mime-types-data (~> 3.2015)
116
117
  mime-types-data (3.2022.0105)
117
118
  minitar (0.9)
118
- minitest (5.15.0)
119
+ minitest (5.17.0)
119
120
  multipart-post (2.1.1)
120
121
  nap (1.1.0)
121
122
  net-http-persistent (4.0.1)
@@ -141,9 +142,9 @@ GEM
141
142
  rainbow (3.1.1)
142
143
  rake (13.0.6)
143
144
  rchardet (1.8.0)
144
- rdf (3.2.7)
145
+ rdf (3.2.9)
145
146
  link_header (~> 0.0, >= 0.0.8)
146
- rdf-aggregate-repo (3.2.0)
147
+ rdf-aggregate-repo (3.2.1)
147
148
  rdf (~> 3.2)
148
149
  rdf-n3 (3.2.1)
149
150
  ebnf (~> 2.2)
@@ -153,7 +154,7 @@ GEM
153
154
  rdf-xsd (3.2.1)
154
155
  rdf (~> 3.2)
155
156
  rexml (~> 3.2)
156
- redis (4.6.0)
157
+ redis (4.8.1)
157
158
  regexp_parser (2.6.0)
158
159
  rest-client (2.1.0)
159
160
  http-accept (>= 1.7.0, < 2.0)
@@ -161,7 +162,7 @@ GEM
161
162
  mime-types (>= 1.16, < 4.0)
162
163
  netrc (~> 0.8)
163
164
  rexml (3.2.5)
164
- rollbar (3.3.0)
165
+ rollbar (3.4.0)
165
166
  rspec (3.12.0)
166
167
  rspec-core (~> 3.12.0)
167
168
  rspec-expectations (~> 3.12.0)
@@ -194,32 +195,32 @@ GEM
194
195
  addressable (>= 2.3.5)
195
196
  faraday (> 0.8, < 2.0)
196
197
  scanf (1.0.0)
197
- sparql (3.2.1)
198
+ sparql (3.2.5)
198
199
  builder (~> 3.2)
199
- ebnf (~> 2.2)
200
- logger (~> 1.4)
201
- rdf (~> 3.2, >= 3.2.3)
200
+ ebnf (~> 2.2, >= 2.3.1)
201
+ logger (~> 1.5)
202
+ rdf (~> 3.2, >= 3.2.8)
202
203
  rdf-aggregate-repo (~> 3.2)
203
204
  rdf-xsd (~> 3.2)
204
- sparql-client (~> 3.2)
205
- sxp (~> 1.2, >= 1.2.1)
205
+ sparql-client (~> 3.2, >= 3.2.1)
206
+ sxp (~> 1.2, >= 1.2.2)
206
207
  sparql-client (3.2.1)
207
208
  net-http-persistent (~> 4.0, >= 4.0.1)
208
209
  rdf (~> 3.2, >= 3.2.6)
209
- sxp (1.2.2)
210
- matrix
210
+ sxp (1.2.3)
211
+ matrix (~> 0.4)
211
212
  rdf (~> 3.2)
212
213
  systemu (2.6.5)
213
214
  terminal-table (3.0.2)
214
215
  unicode-display_width (>= 1.1.1, < 3)
215
- timecop (0.9.5)
216
- tzinfo (2.0.4)
216
+ timecop (0.9.6)
217
+ tzinfo (2.0.6)
217
218
  concurrent-ruby (~> 1.0)
218
219
  unf (0.1.4)
219
220
  unf_ext
220
- unf_ext (0.0.8.1)
221
+ unf_ext (0.0.8.2)
221
222
  unicode-display_width (2.3.0)
222
- unicode-types (1.7.0)
223
+ unicode-types (1.8.0)
223
224
  uuid (2.3.9)
224
225
  macaddr (~> 1.0)
225
226
  validatable (1.6.7)
data/README.md CHANGED
@@ -66,7 +66,10 @@ Specific options:
66
66
  -W, --workdir PATH Path for directory where AIP creation work takes place in
67
67
  -N, --process_name NAME Name of the application process
68
68
  -m, --monitor Start monitor process for a deamon
69
- -q, --queue NAME Name of the queue to read from
69
+ -q, --queue NAME Name of the queue to read from
70
+ -i, --ingestion_prefix PREFIX Prefix for keys used in counting the number of failed ingestion attempts
71
+ -x, --ingestion_attempts NUMBER Max number of attempts to try ingesting an entity
72
+ -f, --first_failed_wait NUMBER Time in seconds to wait after first failed entity deposit. This time will double every failed attempt
70
73
 
71
74
  Common options:
72
75
  -v, --version Show version
@@ -101,6 +104,13 @@ After checking out the repo, run `bin/setup` to install dependencies. Then, run
101
104
 
102
105
  To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
103
106
 
107
+ You'll need to set these two env vars otherwise you'll likely run into an error (Errno::ECONNRESET: Connection reset by peer):
108
+
109
+ ```bash
110
+ export JUPITER_API_KEY=3eeb395e-63b7-11ea-bc55-0242ac130003
111
+ export JUPITER_USER=ditech@ualberta.ca
112
+ ```
113
+
104
114
  ## Testing
105
115
 
106
116
  Pre-requisites:
@@ -144,7 +154,7 @@ This will cut a tag version, builds the gem, and pushes the gem up to Rubygems
144
154
 
145
155
  Note: You may need permission to push a gem up to Rubygems!
146
156
  You will first need to create an account on rubygems.org.
147
- Once you have an account, bug Shane or Matt to [add you as an owner](http://guides.rubygems.org/command-reference/#gem-owner) to pushmi_pullyu Rubygem. Once you are an owner you should be able to push new versions of pushmi_pullyu up to Rubygems.
157
+ Once you have an account, bug @pgwillia (Tricia Jenkins), @lagoan (Omar Rodriguez-Arenas), or @henryzhang87 (Henry Zhang) to [add you as an owner](http://guides.rubygems.org/command-reference/#gem-owner) to pushmi_pullyu Rubygem. Once you are an owner you should be able to push new versions of pushmi_pullyu up to Rubygems
148
158
 
149
159
  ## Deployment
150
160
 
@@ -16,6 +16,9 @@ piddir: tmp/pids
16
16
  workdir: tmp/work
17
17
  process_name: pushmi_pullyu
18
18
  queue_name: dev:pmpy_queue
19
+ ingestion_prefix: "'prod:pmpy_ingest_attempt:'"
20
+ ingestion_attempts: 15
21
+ first_failed_wait: 10
19
22
  minimum_age: 0
20
23
 
21
24
  redis:
@@ -35,6 +35,11 @@ class PushmiPullyu::AIP::Downloader
35
35
  # Main object metadata
36
36
  download_and_log(object_aip_paths[:main_object_remote],
37
37
  object_aip_paths[:main_object_local])
38
+
39
+ # Communities and collections do not have their own files.
40
+ return unless can_have_files?
41
+
42
+ FileUtils.mkdir_p(object_aip_paths[:file_sets_directory_local])
38
43
  download_and_log(object_aip_paths[:file_sets_remote],
39
44
  object_aip_paths[:file_sets_local])
40
45
 
@@ -81,7 +86,7 @@ class PushmiPullyu::AIP::Downloader
81
86
  @uri = URI.parse(PushmiPullyu.options[:jupiter][:jupiter_url])
82
87
  @http = Net::HTTP.new(@uri.host, @uri.port)
83
88
  @http.use_ssl = true if @uri.instance_of? URI::HTTPS
84
- request = Net::HTTP::Post.new("#{@uri.request_uri}auth/system")
89
+ request = Net::HTTP::Post.new(URI.join(@uri, '/auth/system'))
85
90
  request.set_form_data(
86
91
  email: PushmiPullyu.options[:jupiter][:user],
87
92
  api_key: PushmiPullyu.options[:jupiter][:api_key]
@@ -97,7 +102,7 @@ class PushmiPullyu::AIP::Downloader
97
102
  log_downloading(remote, local)
98
103
 
99
104
  @uri = URI.parse(PushmiPullyu.options[:jupiter][:jupiter_url])
100
- request = Net::HTTP::Get.new(@uri.request_uri + remote)
105
+ request = Net::HTTP::Get.new(URI.join(@uri, remote))
101
106
  # add previously stored cookies
102
107
  request['Cookie'] = @cookies
103
108
 
@@ -113,7 +118,7 @@ class PushmiPullyu::AIP::Downloader
113
118
  end
114
119
 
115
120
  def get_file_paths(url)
116
- request = Net::HTTP::Get.new(@uri.request_uri + url)
121
+ request = Net::HTTP::Get.new(URI.join(@uri, url))
117
122
  # add previously stored cookies
118
123
  request['Cookie'] = @cookies
119
124
 
@@ -139,25 +144,26 @@ class PushmiPullyu::AIP::Downloader
139
144
  PushmiPullyu::Logging.log_aip_activity(@aip_directory, message)
140
145
  end
141
146
 
147
+ def can_have_files?
148
+ @entity[:type] == 'items' || @entity[:type] == 'theses'
149
+ end
150
+
142
151
  ### Directories
143
152
 
144
153
  def aip_dirs
145
154
  @aip_dirs ||= {
146
155
  objects: "#{@aip_directory}/data/objects",
147
156
  metadata: "#{@aip_directory}/data/objects/metadata",
148
- files: "#{@aip_directory}/data/objects/files",
149
- files_metadata: "#{@aip_directory}/data/objects/metadata/files_metadata",
150
- logs: "#{@aip_directory}/data/logs",
151
- file_logs: "#{@aip_directory}/data/logs/files_logs"
157
+ logs: "#{@aip_directory}/data/logs"
152
158
  }
153
159
  end
154
160
 
155
161
  def file_set_dirs(file_set_uuid)
156
162
  @file_set_dirs ||= {}
157
163
  @file_set_dirs[file_set_uuid] ||= {
158
- metadata: "#{aip_dirs[:files_metadata]}/#{file_set_uuid}",
159
- files: "#{aip_dirs[:files]}/#{file_set_uuid}",
160
- logs: "#{aip_dirs[:file_logs]}/#{file_set_uuid}"
164
+ files: "#{@aip_directory}/data/objects/files/#{file_set_uuid}",
165
+ logs: "#{@aip_directory}/data/logs/files_logs/#{file_set_uuid}",
166
+ metadata: "#{@aip_directory}/data/objects/metadata/files_metadata/#{file_set_uuid}"
161
167
  }
162
168
  end
163
169
 
@@ -193,7 +199,9 @@ class PushmiPullyu::AIP::Downloader
193
199
  main_object_remote: object_uri,
194
200
  main_object_local: "#{aip_dirs[:metadata]}/object_metadata.n3",
195
201
  file_sets_remote: "#{object_uri}/filesets",
196
- file_sets_local: "#{aip_dirs[:files_metadata]}/file_order.xml",
202
+ # This directory needs to be created before we can downloaded the file order information
203
+ file_sets_directory_local: "#{@aip_directory}/data/objects/metadata/files_metadata",
204
+ file_sets_local: "#{@aip_directory}/data/objects/metadata/files_metadata/file_order.xml",
197
205
  # This is downloaded for processing but not saved
198
206
  file_paths_remote: "#{object_uri}/file_paths"
199
207
  }.freeze
@@ -148,6 +148,21 @@ class PushmiPullyu::CLI
148
148
  opts[:queue_name] = queue
149
149
  end
150
150
 
151
+ o.on('-i', '--ingestion_prefix PREFIX',
152
+ 'Prefix for keys used in counting the number of failed ingestion attempts') do |prefix|
153
+ opts[:ingestion_prefix] = prefix
154
+ end
155
+
156
+ o.on('-x', '--ingestion_attempts NUMBER', Integer,
157
+ 'Max number of attempts to try ingesting an entity') do |ingestion_attempts|
158
+ opts[:ingestion_attempts] = ingestion_attempts
159
+ end
160
+
161
+ o.on('-f', '--first_failed_wait NUMBER', Integer,
162
+ 'Time in seconds to wait after first failed deposit. Time will double every failed attempt') do |failed_wait|
163
+ opts[:first_failed_wait] = failed_wait
164
+ end
165
+
151
166
  o.separator ''
152
167
  o.separator 'Common options:'
153
168
 
@@ -182,11 +197,12 @@ class PushmiPullyu::CLI
182
197
  end
183
198
 
184
199
  def run_preservation_cycle
185
- entity_json = queue.wait_next_item
186
- # jupiter is submitting the entries to reddis in a hash format using fat arrows. We need to change them to colons in
187
- # order to parse them correctly from json
188
- entity = JSON.parse(entity_json.gsub('=>', ':'), { symbolize_names: true })
189
- return unless entity[:type].present? && entity[:uuid].present?
200
+ begin
201
+ entity = queue.wait_next_item
202
+ return unless entity && entity[:type].present? && entity[:uuid].present?
203
+ rescue StandardError => e
204
+ log_exception(e)
205
+ end
190
206
 
191
207
  # add additional information about the error context to errors that occur while processing this item.
192
208
  Rollbar.scoped(entity_uuid: entity[:uuid]) do
@@ -202,7 +218,11 @@ class PushmiPullyu::CLI
202
218
  # readding it to the queue as it will always fail
203
219
  rescue PushmiPullyu::AIP::EntityInvalid => e
204
220
  rescue StandardError => e
205
- queue.add_entity_json(entity_json)
221
+ begin
222
+ queue.add_entity_in_timeframe(entity)
223
+ rescue PushmiPullyu::PreservationQueue::MaxDepositAttemptsReached => e
224
+ log_exception(e)
225
+ end
206
226
 
207
227
  # rubocop:disable Lint/RescueException
208
228
  # Something other than a StandardError exception means something happened which we were not expecting!
@@ -211,8 +231,7 @@ class PushmiPullyu::CLI
211
231
  raise e
212
232
  # rubocop:enable Lint/RescueException
213
233
  ensure
214
- Rollbar.error(e)
215
- logger.error(e)
234
+ log_exception(e)
216
235
  end
217
236
  end
218
237
 
@@ -287,4 +306,9 @@ class PushmiPullyu::CLI
287
306
  end
288
307
  end
289
308
 
309
+ def log_exception(exception)
310
+ Rollbar.error(exception)
311
+ logger.error(exception)
312
+ end
313
+
290
314
  end
@@ -20,6 +20,7 @@ require 'connection_pool'
20
20
  class PushmiPullyu::PreservationQueue
21
21
 
22
22
  class ConnectionError < StandardError; end
23
+ class MaxDepositAttemptsReached < StandardError; end
23
24
 
24
25
  def initialize(redis_url: 'redis://localhost:6379',
25
26
  pool_opts: { size: 1, timeout: 5 },
@@ -50,7 +51,8 @@ class PushmiPullyu::PreservationQueue
50
51
  rd.multi do |tx|
51
52
  tx.zrem(@queue_name, element) # remove the top element transactionally
52
53
  end
53
- return element
54
+
55
+ return JSON.parse(element, { symbolize_names: true })
54
56
  else
55
57
  rd.unwatch # cancel the transaction since there was nothing in the queue
56
58
  return nil
@@ -68,12 +70,27 @@ class PushmiPullyu::PreservationQueue
68
70
  end
69
71
  end
70
72
 
71
- def add_entity_json(entity_json)
73
+ def add_entity_in_timeframe(entity)
74
+ entity_attempts_key = "#{PushmiPullyu.options[:ingestion_prefix]}#{entity[:uuid]}"
75
+
72
76
  @redis.with do |connection|
73
- connection.zadd @queue_name, Time.now.to_f, entity_json
77
+ # separate information for priority information and queue
78
+ deposit_attempt = connection.incr entity_attempts_key
79
+
80
+ if deposit_attempt <= PushmiPullyu.options[:ingestion_attempts]
81
+ connection.zadd @queue_name, Time.now.to_f + self.class.extra_wait_time(deposit_attempt),
82
+ entity.slice(:uuid, :type).to_json
83
+ else
84
+ connection.del entity_attempts_key
85
+ raise MaxDepositAttemptsReached
86
+ end
74
87
  end
75
88
  end
76
89
 
90
+ def self.extra_wait_time(deposit_attempt)
91
+ (2**deposit_attempt) * PushmiPullyu.options[:first_failed_wait]
92
+ end
93
+
77
94
  protected
78
95
 
79
96
  def connected?
@@ -1,3 +1,3 @@
1
1
  module PushmiPullyu
2
- VERSION = '2.0.4'.freeze
2
+ VERSION = '2.0.6'.freeze
3
3
  end
data/lib/pushmi_pullyu.rb CHANGED
@@ -26,6 +26,9 @@ module PushmiPullyu
26
26
  workdir: 'tmp/work',
27
27
  process_name: 'pushmi_pullyu',
28
28
  queue_name: 'dev:pmpy_queue',
29
+ ingestion_prefix: 'prod:pmpy_ingest_attempt:',
30
+ ingestion_attempts: 15,
31
+ first_failed_wait: 10,
29
32
  redis: {
30
33
  url: 'redis://localhost:6379'
31
34
  },
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pushmi_pullyu
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.4
4
+ version: 2.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shane Murnaghan
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2022-11-22 00:00:00.000000000 Z
12
+ date: 2023-03-17 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: activesupport