s3-object-processor 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
data/Gemfile ADDED
@@ -0,0 +1,13 @@
1
+ source "http://rubygems.org"
2
+
3
+ gem "cli", "~> 1.3"
4
+ gem "right_aws", "~> 3.0"
5
+
6
+ # Add dependencies to develop your gem here.
7
+ # Include everything needed to run rake, tests, features, etc.
8
+ group :development do
9
+ gem "rspec", "~> 2.8.0"
10
+ gem "rdoc", "~> 3.12"
11
+ gem "bundler", "~> 1.0"
12
+ gem "jeweler", "~> 1.8.7"
13
+ end
@@ -0,0 +1,69 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ addressable (2.3.5)
5
+ builder (3.2.2)
6
+ cli (1.3.1)
7
+ diff-lcs (1.1.3)
8
+ faraday (0.8.8)
9
+ multipart-post (~> 1.2.0)
10
+ git (1.2.6)
11
+ github_api (0.10.1)
12
+ addressable
13
+ faraday (~> 0.8.1)
14
+ hashie (>= 1.2)
15
+ multi_json (~> 1.4)
16
+ nokogiri (~> 1.5.2)
17
+ oauth2
18
+ hashie (2.0.5)
19
+ highline (1.6.20)
20
+ httpauth (0.2.0)
21
+ jeweler (1.8.8)
22
+ builder
23
+ bundler (~> 1.0)
24
+ git (>= 1.2.5)
25
+ github_api (= 0.10.1)
26
+ highline (>= 1.6.15)
27
+ nokogiri (= 1.5.10)
28
+ rake
29
+ rdoc
30
+ json (1.8.1)
31
+ jwt (0.1.8)
32
+ multi_json (>= 1.5)
33
+ multi_json (1.8.2)
34
+ multi_xml (0.5.5)
35
+ multipart-post (1.2.0)
36
+ nokogiri (1.5.10)
37
+ oauth2 (0.9.2)
38
+ faraday (~> 0.8)
39
+ httpauth (~> 0.2)
40
+ jwt (~> 0.1.4)
41
+ multi_json (~> 1.0)
42
+ multi_xml (~> 0.5)
43
+ rack (~> 1.2)
44
+ rack (1.5.2)
45
+ rake (10.1.0)
46
+ rdoc (3.12.2)
47
+ json (~> 1.4)
48
+ right_aws (3.1.0)
49
+ right_http_connection (>= 1.2.5)
50
+ right_http_connection (1.4.0)
51
+ rspec (2.8.0)
52
+ rspec-core (~> 2.8.0)
53
+ rspec-expectations (~> 2.8.0)
54
+ rspec-mocks (~> 2.8.0)
55
+ rspec-core (2.8.0)
56
+ rspec-expectations (2.8.0)
57
+ diff-lcs (~> 1.1.2)
58
+ rspec-mocks (2.8.0)
59
+
60
+ PLATFORMS
61
+ ruby
62
+
63
+ DEPENDENCIES
64
+ bundler (~> 1.0)
65
+ cli (~> 1.3)
66
+ jeweler (~> 1.8.7)
67
+ rdoc (~> 3.12)
68
+ right_aws (~> 3.0)
69
+ rspec (~> 2.8.0)
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2013 Jakub Pastuszek
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,105 @@
1
+ # s3-object-processor
2
+
3
+ This library tries to help in development of CLI programs that can process all objects (or from list file) stored in given S3 bucket.
4
+
5
+ It is using multi-threaded worker model to allow for processing parallelism.
6
+
7
+ # Example usage
8
+
9
+ This example program can be used to call one of two HTTP endpoints for each object that matches a regexp and some other criteria.
10
+ In addition total handled objects size is counted and time spent on getting S3 object data and spend on API call is measured and reported at the end of the run.
11
+
12
+ ```ruby
13
+ require 's3-object-processor/cli'
14
+ require 'httpclient'
15
+
16
+ S3ObjectProcessor::CLI.new do
17
+ cli do
18
+ option :endpoint,
19
+ description: 'API endpoint URI for JPEG uploads',
20
+ default: '/iss/v2/pictures'
21
+ option :endpoint_as_is,
22
+ description: 'API endpoint URI for as-is uploads',
23
+ default: '/iss/v2/images'
24
+ switch :as_is,
25
+ description: 'upload images without conversion to JPEG'
26
+ option :httpimagestore,
27
+ description: 'URL to HTTP Image Store',
28
+ default: 'http://localhost:3000'
29
+ end
30
+
31
+ cli_process do |settings|
32
+ end
33
+
34
+ report :input_object_size, 0 do
35
+ report "total input object size [KiB]", "%d" do |value|
36
+ (value.to_f / 1024).round
37
+ end
38
+ end
39
+ report :s3_body_get_time, 0.0 do
40
+ report "total S3 get body time [s]", "%.3f"
41
+ end
42
+ report :httpimagestore_time, 0.0 do
43
+ report "total ISS request time [s]", "%.3f"
44
+ end
45
+
46
+ processor do |bucket, key, settings, log, reporter|
47
+ unless key.to_s =~ %r{(^|.*?/)([0-f]{16})(|/.*)\.(.{3,4})$}
48
+ log.warn "skipping bad format: #{key}"
49
+ reporter.report :skipped_key, key
50
+ next
51
+ end
52
+
53
+ dir = $1
54
+ hash = $2
55
+ name = $3
56
+ extension = $4
57
+
58
+ if name =~ /-(search|original|search_thumb|brochure|brochure_thumb|admin|admin_thumb|treatment_thumb|staff_member_thumb|consultation|clinic_google_map_thumb)$/
59
+ log.debug "skipping not original: #{key}"
60
+ reporter.report :skipped_key, key
61
+ next
62
+ end
63
+
64
+ log.debug "processing original dir: '#{dir}' hash: '#{hash}' name: '#{name}' extension: '#{extension}'"
65
+
66
+ data = nil
67
+ reporter.time :s3_body_get_time do
68
+ data = key.data
69
+ end
70
+ fail "no data for key; key not found?!" unless data
71
+ reporter.report :input_object_size, data.length
72
+
73
+ if settings.noop
74
+ reporter.report :noop_key, key
75
+ next
76
+ end
77
+
78
+ reporter.time :httpimagestore_time do
79
+ if settings.as_is
80
+ response = HTTPClient.put(settings.httpimagestore + settings.endpoint_as_is + "/#{hash}.#{extension}", data)
81
+ else
82
+ response = HTTPClient.put(settings.httpimagestore + settings.endpoint + "/#{hash}.jpg", data)
83
+ end
84
+ fail "bad HTTP Image Store response: #{response.status}: #{response.body}" if response.status != 200
85
+ end
86
+ reporter.report :handled_key, key
87
+ end
88
+ end
89
+ ```
90
+
91
+ ## Contributing to s3-object-processor
92
+
93
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
94
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
95
+ * Fork the project.
96
+ * Start a feature/bugfix branch.
97
+ * Commit and push until you are happy with your contribution.
98
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
99
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
100
+
101
+ ## Copyright
102
+
103
+ Copyright (c) 2013 Jakub Pastuszek. See LICENSE.txt for
104
+ further details.
105
+
@@ -0,0 +1,53 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "s3-object-processor"
18
+ gem.homepage = "http://github.com/jpastuszek/s3-object-processor"
19
+ gem.license = "MIT"
20
+ gem.summary = "S3 key-by-kye processor builder"
21
+ gem.description = "DSL tools for building programs that can process S3 object key-by-key using threaded worker pool"
22
+ gem.email = "jpastuszek@gmail.com"
23
+ gem.authors = ["Jakub Pastuszek"]
24
+ # dependencies defined in Gemfile
25
+
26
+ gem.files.select{|f| f =~ /^.idea/}.each do |file|
27
+ gem.files.exclude file
28
+ end
29
+ end
30
+ Jeweler::RubygemsDotOrgTasks.new
31
+
32
+ require 'rspec/core'
33
+ require 'rspec/core/rake_task'
34
+ RSpec::Core::RakeTask.new(:spec) do |spec|
35
+ spec.pattern = FileList['spec/**/*_spec.rb']
36
+ end
37
+
38
+ RSpec::Core::RakeTask.new(:rcov) do |spec|
39
+ spec.pattern = 'spec/**/*_spec.rb'
40
+ spec.rcov = true
41
+ end
42
+
43
+ task :default => :spec
44
+
45
+ require 'rdoc/task'
46
+ Rake::RDocTask.new do |rdoc|
47
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
48
+
49
+ rdoc.rdoc_dir = 'rdoc'
50
+ rdoc.title = "s3-object-processor #{version}"
51
+ rdoc.rdoc_files.include('README*')
52
+ rdoc.rdoc_files.include('lib/**/*.rb')
53
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,360 @@
1
+ require 'thread'
2
+ Thread.abort_on_exception = true
3
+
4
+ class Runnable
5
+ def on_finish(&callback)
6
+ (@on_finish ||= []) << callback
7
+ self
8
+ end
9
+
10
+ def run
11
+ @thread = Thread.new do
12
+ begin
13
+ yield
14
+ rescue Interrupt
15
+ # ignored
16
+ ensure
17
+ begin
18
+ @on_finish.each{|on_finish| on_finish.call} if @on_finish
19
+ rescue
20
+ # ignored
21
+ end
22
+ end
23
+ end
24
+ self
25
+ end
26
+
27
+ def join
28
+ @thread.join if @thread
29
+ end
30
+ end
31
+
32
+ class Lister < Runnable
33
+ def initialize(bucket, key_queue, fetch_size, max_keys = nil)
34
+ @bucket = bucket
35
+ @key_queue = key_queue
36
+ @fetch_size = fetch_size
37
+ @max_keys = max_keys
38
+ end
39
+
40
+ def on_keys_chunk(&callback)
41
+ @on_keys_chunk = callback
42
+ self
43
+ end
44
+
45
+ def run(prefix = nil)
46
+ super() do
47
+ catch :done do
48
+ marker = ''
49
+ total_keys = 0
50
+ loop do
51
+ keys_chunk = @bucket.keys(prefix: prefix, 'max-keys' => @fetch_size, marker: marker)
52
+ break if keys_chunk.empty?
53
+ @on_keys_chunk.call(keys_chunk) if @on_keys_chunk
54
+ keys_chunk.each do |key|
55
+ throw :done if @max_keys and total_keys >= @max_keys
56
+ @key_queue << key
57
+ total_keys += 1
58
+ end
59
+ marker = keys_chunk.last.name
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
65
+
66
+ class ListLister < Runnable
67
+ def initialize(bucket, key_queue, max_keys = nil)
68
+ @bucket = bucket
69
+ @key_queue = key_queue
70
+ @max_keys = max_keys
71
+ end
72
+
73
+ def on_keys_chunk(&callback)
74
+ @on_keys_chunk = callback
75
+ self
76
+ end
77
+
78
+ def run(list)
79
+ super() do
80
+ total_keys = 0
81
+ @on_keys_chunk.call(list) if @on_keys_chunk
82
+ list.each do |key|
83
+ @key_queue << @bucket.key(key)
84
+ break if @max_keys and total_keys >= @max_keys
85
+ total_keys += 1
86
+ end
87
+ end
88
+ end
89
+ end
90
+
91
+ class Worker < Runnable
92
+ def initialize(no, key_queue, &process_key)
93
+ @no = no
94
+ @key_queue = key_queue
95
+ @process_key = process_key
96
+ end
97
+
98
+ def on_error(&callback)
99
+ @on_error = callback
100
+ self
101
+ end
102
+
103
+ def run
104
+ super do
105
+ until (key = @key_queue.pop) == :end
106
+ begin
107
+ @process_key.call(key)
108
+ rescue => error
109
+ @on_error.call(key, error) if @on_error
110
+ end
111
+ end
112
+ end
113
+ end
114
+ end
115
+
116
+ class Reporter < Runnable
117
+ class Report
118
+ class DSL
119
+ attr_reader :update_callback, :description, :value_pattern, :value_processor
120
+
121
+ def initialize(&setup)
122
+ @update_callback = ->(t,v){t + v}
123
+ @value_processor = ->(v){v}
124
+ instance_eval &setup
125
+ end
126
+
127
+ def update(&callback)
128
+ @update_callback = callback
129
+ end
130
+
131
+ def report(description, value_pattern, &value_processor)
132
+ @description = description
133
+ @value_pattern = value_pattern
134
+ @value_processor = value_processor if value_processor
135
+ end
136
+ end
137
+
138
+ def initialize(init_value, &setup)
139
+ @dsl = DSL.new(&setup)
140
+ @value = init_value
141
+ end
142
+
143
+ def update(value)
144
+ @value = @dsl.update_callback.call(@value, value)
145
+ end
146
+
147
+ def value
148
+ @dsl.value_pattern % [@dsl.value_processor.call(@value)]
149
+ end
150
+
151
+ def description
152
+ @dsl.description
153
+ end
154
+
155
+ def to_s
156
+ "#{description}: #{value.rjust(6)}"
157
+ end
158
+
159
+ def final
160
+ "#{description}: ".ljust(40) + value
161
+ end
162
+ end
163
+
164
+ def initialize(queue_size, &callback)
165
+ @report_queue = SizedQueue.new(queue_size)
166
+ @processor = callback
167
+
168
+ on_finish do
169
+ # flush thread waiting on queue
170
+ @report_queue.max = 999999
171
+ end
172
+ end
173
+
174
+ def run
175
+ super do
176
+ @processor.call(self) if @processor
177
+ until (report = @report_queue.pop) == :end
178
+ @sink.call(*report) if @sink
179
+ end
180
+ end
181
+ end
182
+
183
+ def each(&callback)
184
+ @sink = callback
185
+ end
186
+
187
+ def report(key, value)
188
+ @report_queue << [key, value]
189
+ end
190
+
191
+ def time(key)
192
+ time = Time.now.to_f
193
+ yield
194
+ report key, Time.now.to_f - time
195
+ end
196
+
197
+ def join
198
+ @report_queue << :end
199
+ super
200
+ end
201
+ end
202
+
203
+ class BucketProcessor
204
+ def initialize(key_id, key_secret, bucket, options = {}, &callback)
205
+ protocol = options[:no_https] ? 'http' : 'https'
206
+ port = options[:no_https] ? 80 : 443
207
+ @log = options[:log] || Logger.new(STDERR)
208
+ workers = options[:workers] || 10
209
+ lister_fetch_size = options[:lister_fetch_size] || 200
210
+ lister_backlog = options[:lister_backlog] || 1000
211
+ max_keys = options[:max_keys]
212
+ reporter_backlog = options[:reporter_backlog] || 1000
213
+ reporter_summary_interval = options[:reporter_summary_interval] || 100
214
+ reporter_average_contribution = options[:reporter_average_contribution] || 0.10
215
+ custom_reports = options[:reports] || []
216
+ @key_list = options[:key_list]
217
+
218
+ s3 = RightAws::S3.new(key_id, key_secret, multi_thread: true, logger: @log, protocol: protocol, port: port)
219
+ bucket = s3.bucket(bucket)
220
+
221
+ @key_queue = SizedQueue.new(lister_backlog)
222
+
223
+ @reporter = Reporter.new(reporter_backlog) do |reports|
224
+ total_listed_keys = 0
225
+ total_processed_keys = 0
226
+ total_succeeded_keys = 0
227
+ total_failed_keys = 0
228
+ total_handled_keys = 0
229
+ total_skipped_keys = 0
230
+ total_nooped_keys = 0
231
+
232
+ processed_avg = 0.0
233
+ last_time = nil
234
+ last_total = 0
235
+
236
+ reports.each do |key, value|
237
+ case key
238
+ when :new_keys_count
239
+ total_listed_keys += value
240
+ when :processed_key
241
+ total_processed_keys += 1
242
+ if total_processed_keys % reporter_summary_interval == 0
243
+ if last_time
244
+ contribution = reporter_average_contribution
245
+ new = (total_processed_keys - last_total).to_f / (Time.now.to_f - last_time)
246
+ processed_avg = processed_avg * (1.0 - contribution) + new * contribution
247
+ end
248
+ last_time = Time.now.to_f
249
+ last_total = total_processed_keys
250
+
251
+ log_line = "-[%s]- processed %6d: failed: %6d (%6.2f %%) handled: %6d skipped: %6d (%6.2f %%)" % [
252
+ value.to_s[0...2].ljust(2),
253
+ total_processed_keys,
254
+ total_failed_keys,
255
+ total_failed_keys.to_f / total_processed_keys * 100,
256
+ total_handled_keys,
257
+ total_skipped_keys,
258
+ total_skipped_keys.to_f / total_processed_keys * 100
259
+ ]
260
+ log_line << custom_reports.each_value.map{|v| ' ' + v.to_s}.join
261
+ log_line << " [backlog: %4d] @ %.1f op/s" % [
262
+ @key_queue.size,
263
+ processed_avg
264
+ ]
265
+
266
+ @log.info log_line
267
+ end
268
+ when :succeeded_key
269
+ total_succeeded_keys += 1
270
+ when :failed_key
271
+ key, error = *value
272
+ @log.error "Key processing failed: #{key}: #{error.class.name}, #{error.message}"
273
+ total_failed_keys += 1
274
+ when :handled_key
275
+ total_handled_keys += 1
276
+ when :skipped_key
277
+ total_skipped_keys += 1
278
+ when :noop_key
279
+ total_nooped_keys += 1
280
+ else
281
+ #@log.debug "custom report event: #{key}: #{value}"
282
+ custom_reports[key].update(value)
283
+ end
284
+ #@log.debug("Report: #{key}: #{value}")
285
+ end
286
+
287
+ reports.on_finish do
288
+ @log.info("total listed keys: #{total_listed_keys}")
289
+ @log.info("total processed keys: #{total_processed_keys}")
290
+ @log.info("total succeeded keys: #{total_succeeded_keys}")
291
+ @log.info("total failed keys: #{total_failed_keys}")
292
+ @log.info("total handled keys: #{total_handled_keys}")
293
+ @log.info("total skipped keys: #{total_skipped_keys}")
294
+ @log.info("total nooped keys: #{total_nooped_keys}")
295
+ custom_reports.each_value do |report|
296
+ @log.info report.final
297
+ end
298
+ end
299
+ end
300
+
301
+ # create lister
302
+ @lister = if @key_list
303
+ @log.info "processing #{@key_list.length} keys from list file"
304
+ @lister = ListLister.new(bucket, @key_queue, max_keys)
305
+ else
306
+ @lister = Lister.new(bucket, @key_queue, lister_fetch_size, max_keys)
307
+ end
308
+ .on_keys_chunk do |keys_chunk|
309
+ @log.debug "Got #{keys_chunk.length} new keys"
310
+ @reporter.report(:new_keys_count, keys_chunk.length)
311
+ end
312
+ .on_finish do
313
+ @log.debug "Done listing keys"
314
+ # notify all workers that no more messages will be posted
315
+ workers.times{ @key_queue << :end }
316
+ end
317
+
318
+ # create workers
319
+ @log.info "Launching #{workers} workers"
320
+ @workers = (1..workers).to_a.map do |worker_no|
321
+ Worker.new(worker_no, @key_queue) do |key|
322
+ @log.debug "Worker[#{worker_no}]: Processing key #{key}"
323
+ yield bucket, key, @reporter
324
+ @reporter.report :processed_key, key
325
+ @reporter.report :succeeded_key, key
326
+ end
327
+ .on_error do |key, error|
328
+ @reporter.report :processed_key, key
329
+ @reporter.report :failed_key, [key, error]
330
+ end
331
+ .on_finish do
332
+ @log.debug "Worker #{worker_no} done"
333
+ end
334
+ end
335
+ end
336
+
337
+ def run(prefix = nil)
338
+ begin
339
+ @reporter.run
340
+ if @key_list
341
+ @lister.run(@key_list)
342
+ else
343
+ @lister.run(prefix)
344
+ end
345
+ @workers.each(&:run)
346
+
347
+ # wait for all to finish
348
+ @workers.each(&:join)
349
+ @log.info "All workers done"
350
+
351
+ @lister.join
352
+ @reporter.join
353
+ rescue Interrupt => error
354
+ @log.warn 'Interrupted'
355
+ # flush thread waiting on queues
356
+ @key_queue.max = 999999
357
+ @reporter.join
358
+ end
359
+ end
360
+ end
@@ -0,0 +1,137 @@
1
+ require 's3-object-processor'
2
+ require 'cli'
3
+ require 'logger'
4
+ require 'right_aws'
5
+ require 'time'
6
+
7
+ module S3ObjectProcessor
8
+ class CLI
9
+ def initialize(&config)
10
+ @reports = {}
11
+ instance_eval &config
12
+
13
+ cli_setup = @cli_setup
14
+ cli_process_setup = @cli_process_setup
15
+
16
+ settings = ::CLI.new do
17
+ description 'Set header of S3 object'
18
+
19
+ option :key_id,
20
+ short: :i,
21
+ description: 'AWS access key ID',
22
+ default_label: 'AWS_SECRET_KEY_ID environment variable',
23
+ default: ENV['AWS_ACCESS_KEY_ID'],
24
+ required: true
25
+ option :key_secret,
26
+ short: :s,
27
+ description: 'AWS access key secret',
28
+ default_label: 'AWS_SECRET_ACCESS_KEY environment variable',
29
+ default: ENV['AWS_SECRET_ACCESS_KEY'],
30
+ required: true
31
+ switch :no_https,
32
+ description: 'use plain HTTP S3 connections'
33
+
34
+ option :bucket,
35
+ short: :b,
36
+ description: 'bucket to process',
37
+ required: true
38
+ option :prefix,
39
+ short: :p,
40
+ description: 'process only objects of key starting with given prefix'
41
+
42
+ option :lister_fetch_size,
43
+ description: 'fetch no more that that number of keys per request',
44
+ cast: Integer,
45
+ default: 200
46
+ option :lister_backlog,
47
+ description: 'maximum length of to be processed key queue',
48
+ cast: Integer,
49
+ default: 1000
50
+ option :key_list,
51
+ description: 'file with keys to process (one per line)',
52
+ default_label: 'process all keys in S3 bucket',
53
+ cast: Pathname
54
+
55
+ option :reporter_backlog,
56
+ description: 'maximum length of to be processed report queue',
57
+ cast: Integer,
58
+ default: 1000
59
+ option :reporter_summary_interval,
60
+ description: 'pring summary every some number of processed objects',
61
+ cast: Integer,
62
+ default: 100
63
+ option :reporter_average_contribution,
64
+ description: 'how much does last average calculation contribute in the printed value - less => more stable',
65
+ cast: Float,
66
+ default: 0.10
67
+
68
+ option :workers,
69
+ short: :t,
70
+ description: 'number of processing threads to start',
71
+ cast: Integer,
72
+ default: 10
73
+
74
+ switch :noop,
75
+ short: :n,
76
+ description: 'do not change any object; just say what would be done'
77
+
78
+ switch :debug,
79
+ short: :d,
80
+ description: 'log at DEBUG level'
81
+
82
+ option :max_keys,
83
+ description: 'stop after processing this amout of keys',
84
+ cast: Integer
85
+
86
+ instance_eval &cli_setup if cli_setup
87
+ end.parse! do |settings|
88
+ instance_eval &cli_process_setup if cli_process_setup
89
+ end
90
+
91
+ log = Logger.new(STDERR)
92
+ log.level = settings.debug ? Logger::DEBUG : Logger::INFO
93
+
94
+ log.debug(settings.inspect)
95
+
96
+ trap 'QUIT' do
97
+ Thread.list.each do |thread|
98
+ STDERR.puts "Thread-#{thread.object_id.to_s(36)}"
99
+ STDERR.puts thread.backtrace.join("\n \\_ ")
100
+ end
101
+ end
102
+
103
+ BucketProcessor.new(settings.key_id, settings.key_secret, settings.bucket,
104
+ no_https: settings.no_https,
105
+ log: log,
106
+ workers: settings.workers,
107
+ max_keys: settings.max_keys,
108
+ lister_fetch_size: settings.lister_fetch_size,
109
+ lister_backlog: settings.lister_backlog,
110
+ key_list: settings.key_list && settings.key_list.readlines.map(&:strip),
111
+ reporter_backlog: settings.reporter_backlog,
112
+ reporter_summary_interval: settings.reporter_summary_interval,
113
+ reporter_average_contribution: settings.reporter_average_contribution,
114
+ reports: @reports
115
+ ) do |bucket, key, reporter|
116
+ @processor.call(bucket, key, settings, log, reporter)
117
+ end
118
+ .run(settings.prefix)
119
+ end
120
+
121
+ def cli(&setup)
122
+ @cli_setup = setup
123
+ end
124
+
125
+ def cli_process(&setup)
126
+ @cli_process_setup = setup
127
+ end
128
+
129
+ def report(name, init_value, &setup)
130
+ @reports[name] = Reporter::Report.new(init_value, &setup)
131
+ end
132
+
133
+ def processor(&callback)
134
+ @processor = callback
135
+ end
136
+ end
137
+ end
@@ -0,0 +1,67 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = "s3-object-processor"
8
+ s.version = "0.1.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Jakub Pastuszek"]
12
+ s.date = "2013-10-29"
13
+ s.description = "DSL tools for building programs that can process S3 object key-by-key using threaded worker pool"
14
+ s.email = "jpastuszek@gmail.com"
15
+ s.extra_rdoc_files = [
16
+ "LICENSE.txt",
17
+ "README.md"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".rspec",
22
+ "Gemfile",
23
+ "Gemfile.lock",
24
+ "LICENSE.txt",
25
+ "README.md",
26
+ "Rakefile",
27
+ "VERSION",
28
+ "lib/s3-object-processor.rb",
29
+ "lib/s3-object-processor/cli.rb",
30
+ "s3-object-processor.gemspec",
31
+ "spec/s3-object-processor_spec.rb",
32
+ "spec/spec_helper.rb"
33
+ ]
34
+ s.homepage = "http://github.com/jpastuszek/s3-object-processor"
35
+ s.licenses = ["MIT"]
36
+ s.require_paths = ["lib"]
37
+ s.rubygems_version = "1.8.25"
38
+ s.summary = "S3 key-by-kye processor builder"
39
+
40
+ if s.respond_to? :specification_version then
41
+ s.specification_version = 3
42
+
43
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
44
+ s.add_runtime_dependency(%q<cli>, ["~> 1.3"])
45
+ s.add_runtime_dependency(%q<right_aws>, ["~> 3.0"])
46
+ s.add_development_dependency(%q<rspec>, ["~> 2.8.0"])
47
+ s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
48
+ s.add_development_dependency(%q<bundler>, ["~> 1.0"])
49
+ s.add_development_dependency(%q<jeweler>, ["~> 1.8.7"])
50
+ else
51
+ s.add_dependency(%q<cli>, ["~> 1.3"])
52
+ s.add_dependency(%q<right_aws>, ["~> 3.0"])
53
+ s.add_dependency(%q<rspec>, ["~> 2.8.0"])
54
+ s.add_dependency(%q<rdoc>, ["~> 3.12"])
55
+ s.add_dependency(%q<bundler>, ["~> 1.0"])
56
+ s.add_dependency(%q<jeweler>, ["~> 1.8.7"])
57
+ end
58
+ else
59
+ s.add_dependency(%q<cli>, ["~> 1.3"])
60
+ s.add_dependency(%q<right_aws>, ["~> 3.0"])
61
+ s.add_dependency(%q<rspec>, ["~> 2.8.0"])
62
+ s.add_dependency(%q<rdoc>, ["~> 3.12"])
63
+ s.add_dependency(%q<bundler>, ["~> 1.0"])
64
+ s.add_dependency(%q<jeweler>, ["~> 1.8.7"])
65
+ end
66
+ end
67
+
@@ -0,0 +1,7 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "S3ObjectProcessor" do
4
+ it "fails" do
5
+ fail "hey buddy, you should probably rename this file and start specing for real"
6
+ end
7
+ end
@@ -0,0 +1,12 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
3
+ require 'rspec'
4
+ require 's3_object_processor'
5
+
6
+ # Requires supporting files with custom matchers and macros, etc,
7
+ # in ./support/ and its subdirectories.
8
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
9
+
10
+ RSpec.configure do |config|
11
+
12
+ end
metadata ADDED
@@ -0,0 +1,160 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: s3-object-processor
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Jakub Pastuszek
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-10-29 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: cli
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '1.3'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '1.3'
30
+ - !ruby/object:Gem::Dependency
31
+ name: right_aws
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: '3.0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: '3.0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rspec
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: 2.8.0
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 2.8.0
62
+ - !ruby/object:Gem::Dependency
63
+ name: rdoc
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ~>
68
+ - !ruby/object:Gem::Version
69
+ version: '3.12'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ~>
76
+ - !ruby/object:Gem::Version
77
+ version: '3.12'
78
+ - !ruby/object:Gem::Dependency
79
+ name: bundler
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ~>
84
+ - !ruby/object:Gem::Version
85
+ version: '1.0'
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ~>
92
+ - !ruby/object:Gem::Version
93
+ version: '1.0'
94
+ - !ruby/object:Gem::Dependency
95
+ name: jeweler
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ~>
100
+ - !ruby/object:Gem::Version
101
+ version: 1.8.7
102
+ type: :development
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ~>
108
+ - !ruby/object:Gem::Version
109
+ version: 1.8.7
110
+ description: DSL tools for building programs that can process S3 object key-by-key
111
+ using threaded worker pool
112
+ email: jpastuszek@gmail.com
113
+ executables: []
114
+ extensions: []
115
+ extra_rdoc_files:
116
+ - LICENSE.txt
117
+ - README.md
118
+ files:
119
+ - .document
120
+ - .rspec
121
+ - Gemfile
122
+ - Gemfile.lock
123
+ - LICENSE.txt
124
+ - README.md
125
+ - Rakefile
126
+ - VERSION
127
+ - lib/s3-object-processor.rb
128
+ - lib/s3-object-processor/cli.rb
129
+ - s3-object-processor.gemspec
130
+ - spec/s3-object-processor_spec.rb
131
+ - spec/spec_helper.rb
132
+ homepage: http://github.com/jpastuszek/s3-object-processor
133
+ licenses:
134
+ - MIT
135
+ post_install_message:
136
+ rdoc_options: []
137
+ require_paths:
138
+ - lib
139
+ required_ruby_version: !ruby/object:Gem::Requirement
140
+ none: false
141
+ requirements:
142
+ - - ! '>='
143
+ - !ruby/object:Gem::Version
144
+ version: '0'
145
+ segments:
146
+ - 0
147
+ hash: 3595802382691813695
148
+ required_rubygems_version: !ruby/object:Gem::Requirement
149
+ none: false
150
+ requirements:
151
+ - - ! '>='
152
+ - !ruby/object:Gem::Version
153
+ version: '0'
154
+ requirements: []
155
+ rubyforge_project:
156
+ rubygems_version: 1.8.25
157
+ signing_key:
158
+ specification_version: 3
159
+ summary: S3 key-by-kye processor builder
160
+ test_files: []