pushmi_pullyu 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,70 @@
1
+ require 'logger'
2
+
3
+ # PushmiPullyu::Logging is a standard Ruby logger wrapper
4
+ module PushmiPullyu::Logging
5
+ # Simple formatter which only displays the message.
6
+ # Taken from ActiveSupport
7
+ class SimpleFormatter < Logger::Formatter
8
+
9
+ # This method is invoked when a log event occurs
10
+ def call(_severity, _timestamp, _program_name, msg)
11
+ "#{msg.is_a?(String) ? msg : msg.inspect}\n"
12
+ end
13
+
14
+ end
15
+
16
+ def self.initialize_logger(log_target = STDOUT)
17
+ @logger = Logger.new(log_target)
18
+ @logger.level = Logger::INFO
19
+ @logger
20
+ end
21
+
22
+ def self.logger
23
+ @logger ||= initialize_logger
24
+ end
25
+
26
+ def self.log_aip_activity(aip_directory, message)
27
+ log_file = "#{aip_directory}/data/logs/aipcreation.log"
28
+ aip_logger = Logger.new(log_file)
29
+ aip_logger.level = logger.level
30
+
31
+ # Log to both the application log, and the log file that gets archived in the AIP
32
+ logger.info(message)
33
+ aip_logger.info(message)
34
+
35
+ aip_logger.close
36
+ end
37
+
38
+ def self.log_preservation_event(deposited_file)
39
+ preservation_logger = Logger.new("#{PushmiPullyu.options[:logdir]}/preservation_events.log")
40
+
41
+ message = "#{deposited_file.name} was successfully deposited into Swift Storage! \n"\
42
+ "Here are the details of this preservation event: \n"\
43
+ "\t NOID: '#{deposited_file.name}' \n"\
44
+ "\t Timestamp of Completion: '#{deposited_file.last_modified}' \n"\
45
+ "\t AIP Checksum: '#{deposited_file.etag}' \n"\
46
+ "\t Metadata: #{deposited_file.metadata} \n"
47
+
48
+ # Log to both the application log, and the preservation log file
49
+ logger.info(message)
50
+ preservation_logger.info(message)
51
+
52
+ preservation_logger.close
53
+ end
54
+
55
+ def self.logger=(log)
56
+ @logger = log
57
+ end
58
+
59
+ def self.reopen
60
+ if @logger
61
+ @logger.reopen
62
+ else
63
+ @logger = initialize_logger
64
+ end
65
+ end
66
+
67
+ def logger
68
+ PushmiPullyu::Logging.logger
69
+ end
70
+ end
@@ -0,0 +1,76 @@
1
+ require 'redis'
2
+ require 'connection_pool'
3
+
4
+ # Quick and dirty take on sorted sets
5
+
6
+ # 1) Create a sorted set in Redis (https://redis.io/topics/data-types). Call it preservation_queue
7
+ #
8
+ # 2) In GenericFile add an after_save that:
9
+ # - determines a monotonically increasing "score". Obvious scores would be either the time in seconds/milliseconds
10
+ # or using something like redis INCR to create an atomic, increasing counter. It doesn't matter if 2 different
11
+ # noids ever have the same score, it only that scores generally increase over time.
12
+ # - zadd preservation_queue score "noid" adds the noid and gives it the score from above.
13
+ #
14
+ # 3) Pushmi-pullyu pops elements out of the sorted set, lowest score to highest.
15
+ #
16
+ # A sorted set will only ever contain a noid once, with whatever score it was last given. Because preservation_queue
17
+ # is sorted lowest score to highest, and because scores increase over time, a cascade of jobs/updates will cause a noid
18
+ # to keep "moving back" in the queue until it becomes the least recently updated noid in the queue, at which point it
19
+ # will be popped and preserved. Any further updates will trigger a new AIP build.
20
+ class PushmiPullyu::PreservationQueue
21
+
22
+ class ConnectionError < StandardError; end
23
+
24
+ def initialize(connection: {}, pool_opts: { size: 1, timeout: 5 }, poll_interval: 10, age_at_least: 0,
25
+ queue_name: 'dev:pmpy_queue')
26
+ # we use a connection pool even though we're not (currently) threading
27
+ # as it transparently provides for repairing connections if they are closed after long periods of inactivity
28
+ @redis = ConnectionPool.new(pool_opts) do
29
+ connection.reverse_merge!(host: 'localhost', port: 6379)
30
+ Redis.new(connection)
31
+ end
32
+
33
+ raise ConnectionError unless connected?
34
+
35
+ @poll_interval = poll_interval
36
+ @age_at_least = age_at_least
37
+ @queue_name = queue_name
38
+ end
39
+
40
+ def next_item
41
+ raise ConnectionError unless connected?
42
+
43
+ @redis.with do |conn|
44
+ conn.watch(@queue_name) do |rd| # transactional mutation of the set is dependent on the set key
45
+ element, score = rd.zrange(@queue_name, 0, 0, with_scores: true).first
46
+
47
+ if element && ((Time.now.to_f - @age_at_least) >= score)
48
+ rd.multi do |tx|
49
+ tx.zrem(@queue_name, element) # remove the top element transactionally
50
+ end
51
+ return element
52
+ else
53
+ rd.unwatch # cancel the transaction since there was nothing in the queue
54
+ return nil
55
+ end
56
+ end
57
+ end
58
+ end
59
+
60
+ def wait_next_item
61
+ while PushmiPullyu.continue_polling?
62
+ element = next_item
63
+ return element if element.present?
64
+ sleep @poll_interval
65
+ end
66
+ end
67
+
68
+ protected
69
+
70
+ def connected?
71
+ @redis.with do |conn|
72
+ conn.ping == 'PONG'
73
+ end
74
+ end
75
+
76
+ end
@@ -0,0 +1,51 @@
1
+ require 'digest/md5'
2
+ require 'openstack'
3
+
4
+ class PushmiPullyu::SwiftDepositer
5
+
6
+ attr_reader :swift_connection
7
+
8
+ def initialize(connection)
9
+ @swift_connection = OpenStack::Connection.create(
10
+ username: connection[:username],
11
+ api_key: connection[:password],
12
+ auth_method: 'password',
13
+ auth_url: "#{connection[:endpoint]}/auth/#{connection[:auth_version]}",
14
+ authtenant_name: connection[:tenant],
15
+ service_type: 'object-store'
16
+ )
17
+ end
18
+
19
+ def deposit_file(file_name, swift_container)
20
+ file_base_name = File.basename(file_name, '.*')
21
+
22
+ checksum = Digest::MD5.file(file_name).hexdigest
23
+
24
+ era_container = swift_connection.container(swift_container)
25
+
26
+ deposited_file = if era_container.object_exists?(file_base_name)
27
+ era_container.object(file_base_name)
28
+ else
29
+ era_container.create_object(file_base_name)
30
+ end
31
+
32
+ # Add swift metadata with in accordance to AIP spec:
33
+ # https://docs.google.com/document/d/154BqhDPAdGW-I9enrqLpBYbhkF9exX9lV3kMaijuwPg/edit#
34
+ metadata = {
35
+ project: 'ERA',
36
+ project_id: file_base_name,
37
+ promise: 'bronze',
38
+ aip_version: '1.0'
39
+ }
40
+
41
+ # ruby-openstack wants all keys of the metadata to be named like "X-Object-Meta-{{Key}}", so update them
42
+ metadata.transform_keys! { |key| "X-Object-Meta-#{key}" }
43
+
44
+ deposited_file.write(File.open(file_name),
45
+ { 'etag' => checksum,
46
+ 'content-type' => 'application/x-tar' }.merge(metadata))
47
+
48
+ deposited_file
49
+ end
50
+
51
+ end
@@ -0,0 +1,3 @@
1
+ module PushmiPullyu
2
+ VERSION = '0.2.0'.freeze
3
+ end
@@ -0,0 +1,48 @@
1
+ # coding: utf-8
2
+
3
+ lib = File.expand_path('../lib', __FILE__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require 'pushmi_pullyu/version'
6
+
7
+ Gem::Specification.new do |spec|
8
+ spec.name = 'pushmi_pullyu'
9
+ spec.version = PushmiPullyu::VERSION
10
+ spec.authors = ['Shane Murnaghan']
11
+ spec.email = ['murnagha@ualberta.ca']
12
+
13
+ spec.summary = 'Ruby application to manage flow of content from Fedora into Swift for preservation'
14
+ spec.homepage = 'https://github.com/ualbertalib/pushmi_pullyu'
15
+ spec.license = 'MIT'
16
+
17
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
18
+ f.match(%r{^(test|spec|features)/})
19
+ end
20
+ spec.bindir = 'exe'
21
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
22
+ spec.require_paths = ['lib']
23
+
24
+ spec.required_ruby_version = '>= 2.3.1'
25
+
26
+ spec.add_runtime_dependency 'activesupport', '~> 5.0'
27
+ spec.add_runtime_dependency 'bagit', '~> 0.4'
28
+ spec.add_runtime_dependency 'connection_pool', '~> 2.2'
29
+ spec.add_runtime_dependency 'daemons', '~> 1.2', '>= 1.2.4'
30
+ spec.add_runtime_dependency 'minitar', '~> 0.6'
31
+ spec.add_runtime_dependency 'openstack', '~> 3.3', '>= 3.3.10'
32
+ spec.add_runtime_dependency 'rdf', '~> 1.99'
33
+ spec.add_runtime_dependency 'rdf-n3', '~> 1.99'
34
+ spec.add_runtime_dependency 'redis', '~> 3.3'
35
+ spec.add_runtime_dependency 'rollbar', '~> 2.14'
36
+
37
+ spec.add_development_dependency 'bundler', '~> 1.14'
38
+ spec.add_development_dependency 'coveralls', '~> 0.8'
39
+ spec.add_development_dependency 'danger', '~> 5.2'
40
+ spec.add_development_dependency 'rake', '~> 12.0'
41
+ spec.add_development_dependency 'rspec', '~> 3.0'
42
+ spec.add_development_dependency 'rubocop', '~> 0.45'
43
+ spec.add_development_dependency 'rubocop-rspec', '~> 1.10'
44
+ spec.add_development_dependency 'pry', '~> 0.10', '>= 0.10.4'
45
+ spec.add_development_dependency 'timecop', '~> 0.8'
46
+ spec.add_development_dependency 'vcr', '~> 3.0'
47
+ spec.add_development_dependency 'webmock', '~> 2.1'
48
+ end
metadata ADDED
@@ -0,0 +1,391 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pushmi_pullyu
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Shane Murnaghan
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2017-06-08 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activesupport
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '5.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '5.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bagit
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0.4'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0.4'
41
+ - !ruby/object:Gem::Dependency
42
+ name: connection_pool
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '2.2'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '2.2'
55
+ - !ruby/object:Gem::Dependency
56
+ name: daemons
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.2'
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: 1.2.4
65
+ type: :runtime
66
+ prerelease: false
67
+ version_requirements: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - "~>"
70
+ - !ruby/object:Gem::Version
71
+ version: '1.2'
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: 1.2.4
75
+ - !ruby/object:Gem::Dependency
76
+ name: minitar
77
+ requirement: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - "~>"
80
+ - !ruby/object:Gem::Version
81
+ version: '0.6'
82
+ type: :runtime
83
+ prerelease: false
84
+ version_requirements: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - "~>"
87
+ - !ruby/object:Gem::Version
88
+ version: '0.6'
89
+ - !ruby/object:Gem::Dependency
90
+ name: openstack
91
+ requirement: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - "~>"
94
+ - !ruby/object:Gem::Version
95
+ version: '3.3'
96
+ - - ">="
97
+ - !ruby/object:Gem::Version
98
+ version: 3.3.10
99
+ type: :runtime
100
+ prerelease: false
101
+ version_requirements: !ruby/object:Gem::Requirement
102
+ requirements:
103
+ - - "~>"
104
+ - !ruby/object:Gem::Version
105
+ version: '3.3'
106
+ - - ">="
107
+ - !ruby/object:Gem::Version
108
+ version: 3.3.10
109
+ - !ruby/object:Gem::Dependency
110
+ name: rdf
111
+ requirement: !ruby/object:Gem::Requirement
112
+ requirements:
113
+ - - "~>"
114
+ - !ruby/object:Gem::Version
115
+ version: '1.99'
116
+ type: :runtime
117
+ prerelease: false
118
+ version_requirements: !ruby/object:Gem::Requirement
119
+ requirements:
120
+ - - "~>"
121
+ - !ruby/object:Gem::Version
122
+ version: '1.99'
123
+ - !ruby/object:Gem::Dependency
124
+ name: rdf-n3
125
+ requirement: !ruby/object:Gem::Requirement
126
+ requirements:
127
+ - - "~>"
128
+ - !ruby/object:Gem::Version
129
+ version: '1.99'
130
+ type: :runtime
131
+ prerelease: false
132
+ version_requirements: !ruby/object:Gem::Requirement
133
+ requirements:
134
+ - - "~>"
135
+ - !ruby/object:Gem::Version
136
+ version: '1.99'
137
+ - !ruby/object:Gem::Dependency
138
+ name: redis
139
+ requirement: !ruby/object:Gem::Requirement
140
+ requirements:
141
+ - - "~>"
142
+ - !ruby/object:Gem::Version
143
+ version: '3.3'
144
+ type: :runtime
145
+ prerelease: false
146
+ version_requirements: !ruby/object:Gem::Requirement
147
+ requirements:
148
+ - - "~>"
149
+ - !ruby/object:Gem::Version
150
+ version: '3.3'
151
+ - !ruby/object:Gem::Dependency
152
+ name: rollbar
153
+ requirement: !ruby/object:Gem::Requirement
154
+ requirements:
155
+ - - "~>"
156
+ - !ruby/object:Gem::Version
157
+ version: '2.14'
158
+ type: :runtime
159
+ prerelease: false
160
+ version_requirements: !ruby/object:Gem::Requirement
161
+ requirements:
162
+ - - "~>"
163
+ - !ruby/object:Gem::Version
164
+ version: '2.14'
165
+ - !ruby/object:Gem::Dependency
166
+ name: bundler
167
+ requirement: !ruby/object:Gem::Requirement
168
+ requirements:
169
+ - - "~>"
170
+ - !ruby/object:Gem::Version
171
+ version: '1.14'
172
+ type: :development
173
+ prerelease: false
174
+ version_requirements: !ruby/object:Gem::Requirement
175
+ requirements:
176
+ - - "~>"
177
+ - !ruby/object:Gem::Version
178
+ version: '1.14'
179
+ - !ruby/object:Gem::Dependency
180
+ name: coveralls
181
+ requirement: !ruby/object:Gem::Requirement
182
+ requirements:
183
+ - - "~>"
184
+ - !ruby/object:Gem::Version
185
+ version: '0.8'
186
+ type: :development
187
+ prerelease: false
188
+ version_requirements: !ruby/object:Gem::Requirement
189
+ requirements:
190
+ - - "~>"
191
+ - !ruby/object:Gem::Version
192
+ version: '0.8'
193
+ - !ruby/object:Gem::Dependency
194
+ name: danger
195
+ requirement: !ruby/object:Gem::Requirement
196
+ requirements:
197
+ - - "~>"
198
+ - !ruby/object:Gem::Version
199
+ version: '5.2'
200
+ type: :development
201
+ prerelease: false
202
+ version_requirements: !ruby/object:Gem::Requirement
203
+ requirements:
204
+ - - "~>"
205
+ - !ruby/object:Gem::Version
206
+ version: '5.2'
207
+ - !ruby/object:Gem::Dependency
208
+ name: rake
209
+ requirement: !ruby/object:Gem::Requirement
210
+ requirements:
211
+ - - "~>"
212
+ - !ruby/object:Gem::Version
213
+ version: '12.0'
214
+ type: :development
215
+ prerelease: false
216
+ version_requirements: !ruby/object:Gem::Requirement
217
+ requirements:
218
+ - - "~>"
219
+ - !ruby/object:Gem::Version
220
+ version: '12.0'
221
+ - !ruby/object:Gem::Dependency
222
+ name: rspec
223
+ requirement: !ruby/object:Gem::Requirement
224
+ requirements:
225
+ - - "~>"
226
+ - !ruby/object:Gem::Version
227
+ version: '3.0'
228
+ type: :development
229
+ prerelease: false
230
+ version_requirements: !ruby/object:Gem::Requirement
231
+ requirements:
232
+ - - "~>"
233
+ - !ruby/object:Gem::Version
234
+ version: '3.0'
235
+ - !ruby/object:Gem::Dependency
236
+ name: rubocop
237
+ requirement: !ruby/object:Gem::Requirement
238
+ requirements:
239
+ - - "~>"
240
+ - !ruby/object:Gem::Version
241
+ version: '0.45'
242
+ type: :development
243
+ prerelease: false
244
+ version_requirements: !ruby/object:Gem::Requirement
245
+ requirements:
246
+ - - "~>"
247
+ - !ruby/object:Gem::Version
248
+ version: '0.45'
249
+ - !ruby/object:Gem::Dependency
250
+ name: rubocop-rspec
251
+ requirement: !ruby/object:Gem::Requirement
252
+ requirements:
253
+ - - "~>"
254
+ - !ruby/object:Gem::Version
255
+ version: '1.10'
256
+ type: :development
257
+ prerelease: false
258
+ version_requirements: !ruby/object:Gem::Requirement
259
+ requirements:
260
+ - - "~>"
261
+ - !ruby/object:Gem::Version
262
+ version: '1.10'
263
+ - !ruby/object:Gem::Dependency
264
+ name: pry
265
+ requirement: !ruby/object:Gem::Requirement
266
+ requirements:
267
+ - - "~>"
268
+ - !ruby/object:Gem::Version
269
+ version: '0.10'
270
+ - - ">="
271
+ - !ruby/object:Gem::Version
272
+ version: 0.10.4
273
+ type: :development
274
+ prerelease: false
275
+ version_requirements: !ruby/object:Gem::Requirement
276
+ requirements:
277
+ - - "~>"
278
+ - !ruby/object:Gem::Version
279
+ version: '0.10'
280
+ - - ">="
281
+ - !ruby/object:Gem::Version
282
+ version: 0.10.4
283
+ - !ruby/object:Gem::Dependency
284
+ name: timecop
285
+ requirement: !ruby/object:Gem::Requirement
286
+ requirements:
287
+ - - "~>"
288
+ - !ruby/object:Gem::Version
289
+ version: '0.8'
290
+ type: :development
291
+ prerelease: false
292
+ version_requirements: !ruby/object:Gem::Requirement
293
+ requirements:
294
+ - - "~>"
295
+ - !ruby/object:Gem::Version
296
+ version: '0.8'
297
+ - !ruby/object:Gem::Dependency
298
+ name: vcr
299
+ requirement: !ruby/object:Gem::Requirement
300
+ requirements:
301
+ - - "~>"
302
+ - !ruby/object:Gem::Version
303
+ version: '3.0'
304
+ type: :development
305
+ prerelease: false
306
+ version_requirements: !ruby/object:Gem::Requirement
307
+ requirements:
308
+ - - "~>"
309
+ - !ruby/object:Gem::Version
310
+ version: '3.0'
311
+ - !ruby/object:Gem::Dependency
312
+ name: webmock
313
+ requirement: !ruby/object:Gem::Requirement
314
+ requirements:
315
+ - - "~>"
316
+ - !ruby/object:Gem::Version
317
+ version: '2.1'
318
+ type: :development
319
+ prerelease: false
320
+ version_requirements: !ruby/object:Gem::Requirement
321
+ requirements:
322
+ - - "~>"
323
+ - !ruby/object:Gem::Version
324
+ version: '2.1'
325
+ description:
326
+ email:
327
+ - murnagha@ualberta.ca
328
+ executables:
329
+ - pushmi_pullyu
330
+ extensions: []
331
+ extra_rdoc_files: []
332
+ files:
333
+ - ".coveralls.yml"
334
+ - ".editorconfig"
335
+ - ".gitignore"
336
+ - ".hound.yml"
337
+ - ".rspec"
338
+ - ".rubocop.yml"
339
+ - ".travis.yml"
340
+ - Dangerfile
341
+ - Gemfile
342
+ - LICENSE.txt
343
+ - README.md
344
+ - Rakefile
345
+ - bin/console
346
+ - bin/setup
347
+ - config/.gitkeep
348
+ - docs/images/pushmi-pullyu.png
349
+ - docs/images/system-infrastructure-diagram.png
350
+ - examples/pushmi_pullyu.yml
351
+ - exe/pushmi_pullyu
352
+ - lib/pushmi_pullyu.rb
353
+ - lib/pushmi_pullyu/aip.rb
354
+ - lib/pushmi_pullyu/aip/creator.rb
355
+ - lib/pushmi_pullyu/aip/downloader.rb
356
+ - lib/pushmi_pullyu/aip/fedora_fetcher.rb
357
+ - lib/pushmi_pullyu/aip/solr_fetcher.rb
358
+ - lib/pushmi_pullyu/cli.rb
359
+ - lib/pushmi_pullyu/logging.rb
360
+ - lib/pushmi_pullyu/preservation_queue.rb
361
+ - lib/pushmi_pullyu/swift_depositer.rb
362
+ - lib/pushmi_pullyu/version.rb
363
+ - log/.gitkeep
364
+ - pushmi_pullyu.gemspec
365
+ - tmp/pids/.gitkeep
366
+ - tmp/work/.gitkeep
367
+ homepage: https://github.com/ualbertalib/pushmi_pullyu
368
+ licenses:
369
+ - MIT
370
+ metadata: {}
371
+ post_install_message:
372
+ rdoc_options: []
373
+ require_paths:
374
+ - lib
375
+ required_ruby_version: !ruby/object:Gem::Requirement
376
+ requirements:
377
+ - - ">="
378
+ - !ruby/object:Gem::Version
379
+ version: 2.3.1
380
+ required_rubygems_version: !ruby/object:Gem::Requirement
381
+ requirements:
382
+ - - ">="
383
+ - !ruby/object:Gem::Version
384
+ version: '0'
385
+ requirements: []
386
+ rubyforge_project:
387
+ rubygems_version: 2.5.2
388
+ signing_key:
389
+ specification_version: 4
390
+ summary: Ruby application to manage flow of content from Fedora into Swift for preservation
391
+ test_files: []