concurrent_downloader 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format nested
2
+ --color
3
+ --profile
data/Gemfile ADDED
@@ -0,0 +1,9 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in concurrent_downloader.gemspec
4
+ gemspec
5
+
6
+ gem 'puma'
7
+ gem 'rspec'
8
+ gem 'yajl-ruby'
9
+
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Oliver Clarke
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,56 @@
1
+ # ConcurrentDownloader
2
+
3
+ Concurrently download http requests in a synchronous environment. Basically this is an em-http-request wrapper that slots into a normal ruby script.
4
+
5
+ This would be useful inside a rails environment or a rake task.
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ gem 'concurrent_downloader'
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install concurrent_downloader
20
+
21
+ ## Usage
22
+
23
+ Check out spec/lib/concurrent_downloader_spec.rb for lots of usage examples. Here is one:
24
+
25
+ queue = []
26
+ 100.times do |i|
27
+ queue << {
28
+ :method => "post",
29
+ :path => "/test"
30
+ }
31
+ end
32
+
33
+ responses = []
34
+ ConcurrentDownloader.process_queue!(queue, :host => "http://localhost", :concurrent_downloads => 10) do |queue_item, response|
35
+ if response.status == 200
36
+ responses << response
37
+ end
38
+ end
39
+
40
+ # if all responses are 200
41
+ responses.size.should == 100
42
+
43
+ The test suite passes in both 1.8.7 and 1.9.3
44
+
45
+ ## Contributing
46
+
47
+ 1. Fork it
48
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
49
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
50
+ 4. Push to the branch (`git push origin my-new-feature`)
51
+ 5. Create new Pull Request
52
+
53
+ ## Contact
54
+
55
+ Oliver Clarke <ollie@clarketus.net>
56
+
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
@@ -0,0 +1,21 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/concurrent_downloader/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Oliver Clarke"]
6
+ gem.email = ["ollie@clarketus.net"]
7
+ gem.description = %q{Concurrently download http requests in a synchronous environment.}
8
+ gem.summary = %q{A basic em-http-request wrapper that slots into a normal ruby script.}
9
+ gem.homepage = "https://github.com/clarketus/concurrent_downloader"
10
+
11
+ gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
12
+ gem.files = `git ls-files`.split("\n")
13
+ gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
14
+ gem.name = "concurrent_downloader"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = ConcurrentDownloader::VERSION
17
+
18
+ gem.add_dependency('eventmachine')
19
+ gem.add_dependency('em-http-request')
20
+ end
21
+
@@ -0,0 +1,53 @@
1
+ require 'logger'
2
+
3
+ require 'eventmachine'
4
+ require 'em-http-request'
5
+
6
+ require 'concurrent_downloader/response'
7
+ require 'concurrent_downloader/processor'
8
+
9
+ module ConcurrentDownloader
10
+
11
+ class << self
12
+ def process_queue!(queue, options={}, &block)
13
+ Processor.process_queue!(queue, options, &block)
14
+ end
15
+
16
+ def logger
17
+ @logger ||= Logger.new('/dev/null')
18
+ end
19
+
20
+ def logger=(logger)
21
+ @logger = logger
22
+ end
23
+ end
24
+
25
+ class ConnectionError < StandardError
26
+ attr_reader \
27
+ :queue_item,
28
+ :downloader_id
29
+
30
+ def initialize(queue_item, downloader_id)
31
+ @queue_item = queue_item
32
+ @downloader_id = downloader_id
33
+
34
+ super "There was a connection error: #{@queue_item[:method].upcase} #{@queue_item[:path]}"
35
+ end
36
+ end
37
+
38
+ class DownloadError < StandardError
39
+ attr_reader \
40
+ :queue_item,
41
+ :response,
42
+ :downloader_id
43
+
44
+ def initialize(queue_item, response, downloader_id)
45
+ @queue_item = queue_item
46
+ @response = response
47
+ @downloader_id = downloader_id
48
+
49
+ super "There was a download error: #{@queue_item[:method].upcase} #{@queue_item[:path]}: #{@response.status}"
50
+ end
51
+ end
52
+ end
53
+
@@ -0,0 +1,103 @@
1
+ module ConcurrentDownloader
2
+ class Processor
3
+
4
+ class << self
5
+ def process_queue!(queue, options={}, &block)
6
+ new.process_queue!(queue, options, &block)
7
+ end
8
+ end
9
+
10
+ def process_queue!(queue, options={}, &block)
11
+ @queue = queue || []
12
+ @host = options[:host]
13
+
14
+ @error_limit = options[:error_limit] || 0
15
+ @concurrent_downloads = options[:concurrent_downloads] || 1
16
+ @connect_timeout = options[:connect_timeout] || 5
17
+ @inactivity_timeout = options[:inactivity_timeout] || 10
18
+
19
+ @error_count = 0
20
+ @error_limit_passed = false
21
+ @response_block = block
22
+
23
+ EM.run do
24
+ @concurrent_downloads.times do |downloader_id|
25
+ recursive_download(downloader_id)
26
+ end
27
+ end
28
+ end
29
+
30
+ private
31
+
32
+ def recursive_download(downloader_id)
33
+ if queue_item = @queue.pop
34
+ if queue_item.is_a?(String)
35
+ queue_item = {:path => queue_item}
36
+ end
37
+
38
+ queue_item[:method] ||= "get"
39
+
40
+ method = queue_item[:method]
41
+ path = queue_item[:path]
42
+ body = queue_item[:body]
43
+ head = queue_item[:head] || {}
44
+
45
+ head = head.merge(:downloader_id => downloader_id)
46
+
47
+ ConcurrentDownloader.logger.info "#{downloader_id} => #{method} #{path}"
48
+
49
+ connection = EM::HttpRequest.new @host,
50
+ :connect_timeout => @connect_timeout,
51
+ :inactivity_timeout => @inactivity_timeout
52
+
53
+ request = connection.send method,
54
+ :path => path,
55
+ :body => body,
56
+ :head => head
57
+
58
+ request.callback do |request|
59
+ response = Response.new \
60
+ :status => request.response_header.status,
61
+ :headers => Hash[request.response_header],
62
+ :body => request.response
63
+
64
+ if !@response_block.call(queue_item, response)
65
+ handle_error DownloadError.new(queue_item, response, downloader_id)
66
+ end
67
+ end
68
+
69
+ request.errback do |request|
70
+ handle_error ConnectionError.new(queue_item, downloader_id)
71
+ end
72
+
73
+ [:callback, :errback].each do |meth|
74
+ request.send(meth) do
75
+ recursive_download(downloader_id)
76
+ end
77
+ end
78
+ else
79
+ @concurrent_downloads -= 1
80
+ if @concurrent_downloads == 0
81
+ EM.stop
82
+
83
+ if @error_limit_passed
84
+ raise @last_error
85
+ end
86
+ end
87
+ end
88
+ end
89
+
90
+ def handle_error(error)
91
+ ConcurrentDownloader.logger.info "#{error.downloader_id} => #{error.class}: #{error.message}"
92
+ @last_error = error
93
+
94
+ if @error_count < @error_limit
95
+ @error_count += 1
96
+ @queue << error.queue_item
97
+ else
98
+ @error_limit_passed = true
99
+ @queue = []
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,14 @@
1
+ module ConcurrentDownloader
2
+ class Response
3
+ attr_reader \
4
+ :body,
5
+ :status,
6
+ :headers
7
+
8
+ def initialize(data={})
9
+ @body = data[:body]
10
+ @status = data[:status]
11
+ @headers = data[:headers]
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,3 @@
1
+ module ConcurrentDownloader
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,253 @@
1
+ require 'spec_helper'
2
+
3
+ describe ConcurrentDownloader do
4
+
5
+ context "Basic operation" do
6
+ it "should process a basic queue" do
7
+ queue = []
8
+ queue << "/test"
9
+
10
+ responses = []
11
+ ConcurrentDownloader.process_queue!(queue, :host => $mock_host_uri) do |queue_item, response|
12
+ responses << response
13
+ end
14
+
15
+ responses.size.should == 1
16
+ response = responses.first
17
+
18
+ response.should be_a(ConcurrentDownloader::Response)
19
+ response.status.should == 200
20
+ response.headers.should == {"CONNECTION"=>"close", "CONTENT_LENGTH"=>"82"}
21
+ Yajl.load(response.body).should == {
22
+ "path" => "/test",
23
+ "body" => nil,
24
+ "downloader_id" => "0",
25
+ "method" => "GET",
26
+ "test_header" => nil
27
+ }
28
+ end
29
+
30
+ it "should allow setting of specific request options" do
31
+ queue = []
32
+ queue << {
33
+ :method => "post",
34
+ :path => "/test",
35
+ :body => {"test_param_key" => "test_param_value"},
36
+ :head => {"test_header_key" => "test_header_value"}
37
+ }
38
+
39
+ responses = []
40
+ ConcurrentDownloader.process_queue!(queue, :host => $mock_host_uri) do |queue_item, response|
41
+ responses << response
42
+ end
43
+
44
+ responses.size.should == 1
45
+ response = responses.first
46
+
47
+ response.status.should == 200
48
+ response.headers.should == {"CONNECTION"=>"close", "CONTENT_LENGTH"=>"127"}
49
+ Yajl.load(response.body).should == {
50
+ "path" => "/test",
51
+ "body" => "test_param_key=test_param_value",
52
+ "downloader_id" => "0",
53
+ "method" => "POST",
54
+ "test_header" => "test_header_value"
55
+ }
56
+ end
57
+ end
58
+
59
+ context "large queues" do
60
+ it "should process a large queue" do
61
+ queue = []
62
+ 100.times do |i|
63
+ queue << {
64
+ :method => "post",
65
+ :path => "/test",
66
+ :body => {"request_number" => "%03d" % i},
67
+ }
68
+ end
69
+
70
+ responses = []
71
+ ConcurrentDownloader.process_queue!(queue, :host => $mock_host_uri) do |queue_item, response|
72
+ responses << response
73
+ end
74
+
75
+ responses.size.should == 100
76
+
77
+ responses.reverse.each_with_index do |response, index|
78
+ response.status.should == 200
79
+ response.headers.should == {"CONNECTION"=>"close", "CONTENT_LENGTH"=>"99"}
80
+ Yajl.load(response.body).should == {
81
+ "path" => "/test",
82
+ "body" => "request_number=#{"%03d" % index}",
83
+ "downloader_id" => "0",
84
+ "method" => "POST",
85
+ "test_header" => nil
86
+ }
87
+ end
88
+ end
89
+
90
+ it "should allow concurrent requests" do
91
+ queue = []
92
+ 100.times do |i|
93
+ queue << {
94
+ :method => "post",
95
+ :path => "/test"
96
+ }
97
+ end
98
+
99
+ responses = []
100
+ ConcurrentDownloader.process_queue!(queue, :host => $mock_host_uri, :concurrent_downloads => 10) do |queue_item, response|
101
+ responses << response
102
+ end
103
+
104
+ downloader_ids = []
105
+
106
+ responses.size.should == 100
107
+ responses.each do |response|
108
+ response.status.should == 200
109
+ response.headers.should == {"CONNECTION"=>"close", "CONTENT_LENGTH"=>"81"}
110
+ body = Yajl.load(response.body)
111
+ body["path"].should == "/test"
112
+ body["method"].should == "POST"
113
+
114
+ downloader_ids << body["downloader_id"].to_i
115
+ end
116
+
117
+ (0..9).each{|i| downloader_ids.should include(i) }
118
+ end
119
+ end
120
+
121
+ context "Error handling" do
122
+ it "should raise when there is a connection error" do
123
+ port = find_available_port
124
+
125
+ queue = []
126
+ queue << "/test"
127
+
128
+ exception = nil
129
+ begin
130
+ ConcurrentDownloader.process_queue!(queue, :host => "http://127.0.0.1:#{port}")
131
+ rescue => e
132
+ exception = e
133
+ end
134
+
135
+ exception.should_not be_nil
136
+ exception.should be_a(ConcurrentDownloader::ConnectionError)
137
+ exception.message.should == "There was a connection error: GET /test"
138
+ exception.downloader_id.should == 0
139
+ exception.queue_item.should == {
140
+ :path => "/test",
141
+ :method => "get"
142
+ }
143
+ end
144
+
145
+ it "should raise when there is an inactivity timeout" do
146
+ begin
147
+ server = TCPServer.new('127.0.0.1', 0)
148
+ port = server.addr[1]
149
+
150
+ queue = []
151
+ queue << "/test"
152
+
153
+ exception = nil
154
+ begin
155
+ ConcurrentDownloader.process_queue!(queue, :host => "http://127.0.0.1:#{port}", :inactivity_timeout => 1)
156
+ rescue => e
157
+ exception = e
158
+ end
159
+
160
+ exception.should_not be_nil
161
+ exception.should be_a(ConcurrentDownloader::ConnectionError)
162
+ exception.message.should == "There was a connection error: GET /test"
163
+ ensure
164
+ server.close if server
165
+ end
166
+ end
167
+
168
+ it "should raise a download error when request block returns false" do
169
+ queue = []
170
+ queue << "/test"
171
+
172
+ exception = nil
173
+
174
+ begin
175
+ ConcurrentDownloader.process_queue!(queue, :host => $mock_host_uri) do |queue_item, response|
176
+ false
177
+ end
178
+ rescue => e
179
+ exception = e
180
+ end
181
+
182
+ exception.should_not be_nil
183
+ exception.should be_a(ConcurrentDownloader::DownloadError)
184
+ exception.message.should == "There was a download error: GET /test: 200"
185
+ exception.response.should be_a(ConcurrentDownloader::Response)
186
+ exception.downloader_id.should == 0
187
+ exception.queue_item.should == {
188
+ :path => "/test",
189
+ :method => "get"
190
+ }
191
+ end
192
+
193
+ it "should retry downloading if an error occurs and the limit is not reached" do
194
+ queue = []
195
+ 10.times do
196
+ queue << "/test"
197
+ end
198
+
199
+ responses = []
200
+
201
+ count = 0
202
+ ConcurrentDownloader.process_queue!(queue, :host => $mock_host_uri, :error_limit => 20) do |queue_item, response|
203
+ count += 1
204
+ if count > 20
205
+ responses << response
206
+ end
207
+
208
+ count > 20
209
+ end
210
+
211
+ count.should == 30 # 30 total requests
212
+ responses.size.should == 10 # 10 succeeded
213
+ responses.each do |response|
214
+ response.status.should == 200
215
+ end
216
+ end
217
+
218
+ it "should raise an error if the limit is reached and cancel all downloads in the queue" do
219
+ queue = []
220
+ 10.times do
221
+ queue << "/test"
222
+ end
223
+
224
+ responses = []
225
+ exception = nil
226
+
227
+ begin
228
+ count = 0
229
+ ConcurrentDownloader.process_queue!(queue, :host => $mock_host_uri, :error_limit => 20) do |queue_item, response|
230
+ count += 1
231
+ if count < 5
232
+ responses << response
233
+ end
234
+
235
+ count < 5
236
+ end
237
+ rescue => e
238
+ exception = e
239
+ end
240
+
241
+ exception.should_not be_nil
242
+ exception.should be_a(ConcurrentDownloader::DownloadError)
243
+ exception.message.should == "There was a download error: GET /test: 200"
244
+
245
+ count.should == 25 # 25 total requests
246
+ responses.size.should == 4 # 4 succeeded
247
+ responses.each do |response|
248
+ response.status.should == 200
249
+ end
250
+ end
251
+ end
252
+ end
253
+
@@ -0,0 +1,26 @@
1
+ require 'concurrent_downloader'
2
+ require 'puma'
3
+ require 'yajl'
4
+
5
+ Dir[File.join(File.dirname(__FILE__), "support/**/*.rb")].each {|f| require f}
6
+
7
+ include MockServerHelper
8
+
9
+ RSpec.configure do |config|
10
+ config.before :suite do
11
+ # ConcurrentDownloader.logger = Logger.new($stdout)
12
+
13
+ $mock_host_uri = start_mock_server do |request|
14
+ body = {
15
+ :path => request.path,
16
+ :body => (request.post? ? request.body.read : nil),
17
+ :method => request.request_method,
18
+ :test_header => request.env["HTTP_TEST_HEADER_KEY"],
19
+ :downloader_id => request.env["HTTP_DOWNLOADER_ID"]
20
+ }
21
+
22
+ [200, [], [Yajl.dump(body)]]
23
+ end
24
+ end
25
+ end
26
+
@@ -0,0 +1,26 @@
1
+ module MockServerHelper
2
+ def start_mock_server(&block)
3
+ app = lambda {|env|
4
+ request = Rack::Request.new(env)
5
+ block.call(request)
6
+ }
7
+
8
+ port = find_available_port
9
+
10
+ Thread.new do
11
+ Puma::Server.new(app).tap do |s|
12
+ s.add_tcp_listener '127.0.0.1', port
13
+ end.run.join
14
+ end
15
+
16
+ URI.parse "http://127.0.0.1:#{port}"
17
+ end
18
+
19
+ def find_available_port
20
+ server = TCPServer.new('127.0.0.1', 0)
21
+ server.addr[1]
22
+ ensure
23
+ server.close if server
24
+ end
25
+ end
26
+
metadata ADDED
@@ -0,0 +1,94 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: concurrent_downloader
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Oliver Clarke
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-04-08 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: eventmachine
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: em-http-request
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ description: Concurrently download http requests in a synchronous environment.
47
+ email:
48
+ - ollie@clarketus.net
49
+ executables: []
50
+ extensions: []
51
+ extra_rdoc_files: []
52
+ files:
53
+ - .gitignore
54
+ - .rspec
55
+ - Gemfile
56
+ - LICENSE
57
+ - README.md
58
+ - Rakefile
59
+ - concurrent_downloader.gemspec
60
+ - lib/concurrent_downloader.rb
61
+ - lib/concurrent_downloader/processor.rb
62
+ - lib/concurrent_downloader/response.rb
63
+ - lib/concurrent_downloader/version.rb
64
+ - spec/lib/concurrent_downloader_spec.rb
65
+ - spec/spec_helper.rb
66
+ - spec/support/mock_server_helper.rb
67
+ homepage: https://github.com/clarketus/concurrent_downloader
68
+ licenses: []
69
+ post_install_message:
70
+ rdoc_options: []
71
+ require_paths:
72
+ - lib
73
+ required_ruby_version: !ruby/object:Gem::Requirement
74
+ none: false
75
+ requirements:
76
+ - - ! '>='
77
+ - !ruby/object:Gem::Version
78
+ version: '0'
79
+ required_rubygems_version: !ruby/object:Gem::Requirement
80
+ none: false
81
+ requirements:
82
+ - - ! '>='
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
85
+ requirements: []
86
+ rubyforge_project:
87
+ rubygems_version: 1.8.21
88
+ signing_key:
89
+ specification_version: 3
90
+ summary: A basic em-http-request wrapper that slots into a normal ruby script.
91
+ test_files:
92
+ - spec/lib/concurrent_downloader_spec.rb
93
+ - spec/spec_helper.rb
94
+ - spec/support/mock_server_helper.rb