concurrent_downloader 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format nested
2
+ --color
3
+ --profile
data/Gemfile ADDED
@@ -0,0 +1,9 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in concurrent_downloader.gemspec
4
+ gemspec
5
+
6
+ gem 'puma'
7
+ gem 'rspec'
8
+ gem 'yajl-ruby'
9
+
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Oliver Clarke
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,56 @@
1
+ # ConcurrentDownloader
2
+
3
+ Concurrently download http requests in a synchronous environment. Basically this is an em-http-request wrapper that slots into a normal ruby script.
4
+
5
+ This would be useful inside a rails environment or a rake task.
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ gem 'concurrent_downloader'
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install concurrent_downloader
20
+
21
+ ## Usage
22
+
23
+ Check out spec/lib/concurrent_downloader_spec.rb for lots of usage examples. Here is one:
24
+
25
+ queue = []
26
+ 100.times do |i|
27
+ queue << {
28
+ :method => "post",
29
+ :path => "/test"
30
+ }
31
+ end
32
+
33
+ responses = []
34
+ ConcurrentDownloader.process_queue!(queue, :host => "http://localhost", :concurrent_downloads => 10) do |queue_item, response|
35
+ if response.status == 200
36
+ responses << response
37
+ end
38
+ end
39
+
40
+ # if all responses are 200
41
+ responses.size.should == 100
42
+
43
+ The test suite passes in both 1.8.7 and 1.9.3
44
+
45
+ ## Contributing
46
+
47
+ 1. Fork it
48
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
49
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
50
+ 4. Push to the branch (`git push origin my-new-feature`)
51
+ 5. Create new Pull Request
52
+
53
+ ## Contact
54
+
55
+ Oliver Clarke <ollie@clarketus.net>
56
+
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
@@ -0,0 +1,21 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/concurrent_downloader/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Oliver Clarke"]
6
+ gem.email = ["ollie@clarketus.net"]
7
+ gem.description = %q{Concurrently download http requests in a synchronous environment.}
8
+ gem.summary = %q{A basic em-http-request wrapper that slots into a normal ruby script.}
9
+ gem.homepage = "https://github.com/clarketus/concurrent_downloader"
10
+
11
+ gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
12
+ gem.files = `git ls-files`.split("\n")
13
+ gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
14
+ gem.name = "concurrent_downloader"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = ConcurrentDownloader::VERSION
17
+
18
+ gem.add_dependency('eventmachine')
19
+ gem.add_dependency('em-http-request')
20
+ end
21
+
@@ -0,0 +1,53 @@
1
+ require 'logger'
2
+
3
+ require 'eventmachine'
4
+ require 'em-http-request'
5
+
6
+ require 'concurrent_downloader/response'
7
+ require 'concurrent_downloader/processor'
8
+
9
+ module ConcurrentDownloader
10
+
11
+ class << self
12
+ def process_queue!(queue, options={}, &block)
13
+ Processor.process_queue!(queue, options, &block)
14
+ end
15
+
16
+ def logger
17
+ @logger ||= Logger.new('/dev/null')
18
+ end
19
+
20
+ def logger=(logger)
21
+ @logger = logger
22
+ end
23
+ end
24
+
25
+ class ConnectionError < StandardError
26
+ attr_reader \
27
+ :queue_item,
28
+ :downloader_id
29
+
30
+ def initialize(queue_item, downloader_id)
31
+ @queue_item = queue_item
32
+ @downloader_id = downloader_id
33
+
34
+ super "There was a connection error: #{@queue_item[:method].upcase} #{@queue_item[:path]}"
35
+ end
36
+ end
37
+
38
+ class DownloadError < StandardError
39
+ attr_reader \
40
+ :queue_item,
41
+ :response,
42
+ :downloader_id
43
+
44
+ def initialize(queue_item, response, downloader_id)
45
+ @queue_item = queue_item
46
+ @response = response
47
+ @downloader_id = downloader_id
48
+
49
+ super "There was a download error: #{@queue_item[:method].upcase} #{@queue_item[:path]}: #{@response.status}"
50
+ end
51
+ end
52
+ end
53
+
@@ -0,0 +1,103 @@
1
+ module ConcurrentDownloader
2
+ class Processor
3
+
4
+ class << self
5
+ def process_queue!(queue, options={}, &block)
6
+ new.process_queue!(queue, options, &block)
7
+ end
8
+ end
9
+
10
+ def process_queue!(queue, options={}, &block)
11
+ @queue = queue || []
12
+ @host = options[:host]
13
+
14
+ @error_limit = options[:error_limit] || 0
15
+ @concurrent_downloads = options[:concurrent_downloads] || 1
16
+ @connect_timeout = options[:connect_timeout] || 5
17
+ @inactivity_timeout = options[:inactivity_timeout] || 10
18
+
19
+ @error_count = 0
20
+ @error_limit_passed = false
21
+ @response_block = block
22
+
23
+ EM.run do
24
+ @concurrent_downloads.times do |downloader_id|
25
+ recursive_download(downloader_id)
26
+ end
27
+ end
28
+ end
29
+
30
+ private
31
+
32
+ def recursive_download(downloader_id)
33
+ if queue_item = @queue.pop
34
+ if queue_item.is_a?(String)
35
+ queue_item = {:path => queue_item}
36
+ end
37
+
38
+ queue_item[:method] ||= "get"
39
+
40
+ method = queue_item[:method]
41
+ path = queue_item[:path]
42
+ body = queue_item[:body]
43
+ head = queue_item[:head] || {}
44
+
45
+ head = head.merge(:downloader_id => downloader_id)
46
+
47
+ ConcurrentDownloader.logger.info "#{downloader_id} => #{method} #{path}"
48
+
49
+ connection = EM::HttpRequest.new @host,
50
+ :connect_timeout => @connect_timeout,
51
+ :inactivity_timeout => @inactivity_timeout
52
+
53
+ request = connection.send method,
54
+ :path => path,
55
+ :body => body,
56
+ :head => head
57
+
58
+ request.callback do |request|
59
+ response = Response.new \
60
+ :status => request.response_header.status,
61
+ :headers => Hash[request.response_header],
62
+ :body => request.response
63
+
64
+ if !@response_block.call(queue_item, response)
65
+ handle_error DownloadError.new(queue_item, response, downloader_id)
66
+ end
67
+ end
68
+
69
+ request.errback do |request|
70
+ handle_error ConnectionError.new(queue_item, downloader_id)
71
+ end
72
+
73
+ [:callback, :errback].each do |meth|
74
+ request.send(meth) do
75
+ recursive_download(downloader_id)
76
+ end
77
+ end
78
+ else
79
+ @concurrent_downloads -= 1
80
+ if @concurrent_downloads == 0
81
+ EM.stop
82
+
83
+ if @error_limit_passed
84
+ raise @last_error
85
+ end
86
+ end
87
+ end
88
+ end
89
+
90
+ def handle_error(error)
91
+ ConcurrentDownloader.logger.info "#{error.downloader_id} => #{error.class}: #{error.message}"
92
+ @last_error = error
93
+
94
+ if @error_count < @error_limit
95
+ @error_count += 1
96
+ @queue << error.queue_item
97
+ else
98
+ @error_limit_passed = true
99
+ @queue = []
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,14 @@
1
+ module ConcurrentDownloader
2
+ class Response
3
+ attr_reader \
4
+ :body,
5
+ :status,
6
+ :headers
7
+
8
+ def initialize(data={})
9
+ @body = data[:body]
10
+ @status = data[:status]
11
+ @headers = data[:headers]
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,3 @@
1
+ module ConcurrentDownloader
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,253 @@
1
+ require 'spec_helper'
2
+
3
+ describe ConcurrentDownloader do
4
+
5
+ context "Basic operation" do
6
+ it "should process a basic queue" do
7
+ queue = []
8
+ queue << "/test"
9
+
10
+ responses = []
11
+ ConcurrentDownloader.process_queue!(queue, :host => $mock_host_uri) do |queue_item, response|
12
+ responses << response
13
+ end
14
+
15
+ responses.size.should == 1
16
+ response = responses.first
17
+
18
+ response.should be_a(ConcurrentDownloader::Response)
19
+ response.status.should == 200
20
+ response.headers.should == {"CONNECTION"=>"close", "CONTENT_LENGTH"=>"82"}
21
+ Yajl.load(response.body).should == {
22
+ "path" => "/test",
23
+ "body" => nil,
24
+ "downloader_id" => "0",
25
+ "method" => "GET",
26
+ "test_header" => nil
27
+ }
28
+ end
29
+
30
+ it "should allow setting of specific request options" do
31
+ queue = []
32
+ queue << {
33
+ :method => "post",
34
+ :path => "/test",
35
+ :body => {"test_param_key" => "test_param_value"},
36
+ :head => {"test_header_key" => "test_header_value"}
37
+ }
38
+
39
+ responses = []
40
+ ConcurrentDownloader.process_queue!(queue, :host => $mock_host_uri) do |queue_item, response|
41
+ responses << response
42
+ end
43
+
44
+ responses.size.should == 1
45
+ response = responses.first
46
+
47
+ response.status.should == 200
48
+ response.headers.should == {"CONNECTION"=>"close", "CONTENT_LENGTH"=>"127"}
49
+ Yajl.load(response.body).should == {
50
+ "path" => "/test",
51
+ "body" => "test_param_key=test_param_value",
52
+ "downloader_id" => "0",
53
+ "method" => "POST",
54
+ "test_header" => "test_header_value"
55
+ }
56
+ end
57
+ end
58
+
59
+ context "large queues" do
60
+ it "should process a large queue" do
61
+ queue = []
62
+ 100.times do |i|
63
+ queue << {
64
+ :method => "post",
65
+ :path => "/test",
66
+ :body => {"request_number" => "%03d" % i},
67
+ }
68
+ end
69
+
70
+ responses = []
71
+ ConcurrentDownloader.process_queue!(queue, :host => $mock_host_uri) do |queue_item, response|
72
+ responses << response
73
+ end
74
+
75
+ responses.size.should == 100
76
+
77
+ responses.reverse.each_with_index do |response, index|
78
+ response.status.should == 200
79
+ response.headers.should == {"CONNECTION"=>"close", "CONTENT_LENGTH"=>"99"}
80
+ Yajl.load(response.body).should == {
81
+ "path" => "/test",
82
+ "body" => "request_number=#{"%03d" % index}",
83
+ "downloader_id" => "0",
84
+ "method" => "POST",
85
+ "test_header" => nil
86
+ }
87
+ end
88
+ end
89
+
90
+ it "should allow concurrent requests" do
91
+ queue = []
92
+ 100.times do |i|
93
+ queue << {
94
+ :method => "post",
95
+ :path => "/test"
96
+ }
97
+ end
98
+
99
+ responses = []
100
+ ConcurrentDownloader.process_queue!(queue, :host => $mock_host_uri, :concurrent_downloads => 10) do |queue_item, response|
101
+ responses << response
102
+ end
103
+
104
+ downloader_ids = []
105
+
106
+ responses.size.should == 100
107
+ responses.each do |response|
108
+ response.status.should == 200
109
+ response.headers.should == {"CONNECTION"=>"close", "CONTENT_LENGTH"=>"81"}
110
+ body = Yajl.load(response.body)
111
+ body["path"].should == "/test"
112
+ body["method"].should == "POST"
113
+
114
+ downloader_ids << body["downloader_id"].to_i
115
+ end
116
+
117
+ (0..9).each{|i| downloader_ids.should include(i) }
118
+ end
119
+ end
120
+
121
+ context "Error handling" do
122
+ it "should raise when there is a connection error" do
123
+ port = find_available_port
124
+
125
+ queue = []
126
+ queue << "/test"
127
+
128
+ exception = nil
129
+ begin
130
+ ConcurrentDownloader.process_queue!(queue, :host => "http://127.0.0.1:#{port}")
131
+ rescue => e
132
+ exception = e
133
+ end
134
+
135
+ exception.should_not be_nil
136
+ exception.should be_a(ConcurrentDownloader::ConnectionError)
137
+ exception.message.should == "There was a connection error: GET /test"
138
+ exception.downloader_id.should == 0
139
+ exception.queue_item.should == {
140
+ :path => "/test",
141
+ :method => "get"
142
+ }
143
+ end
144
+
145
+ it "should raise when there is an inactivity timeout" do
146
+ begin
147
+ server = TCPServer.new('127.0.0.1', 0)
148
+ port = server.addr[1]
149
+
150
+ queue = []
151
+ queue << "/test"
152
+
153
+ exception = nil
154
+ begin
155
+ ConcurrentDownloader.process_queue!(queue, :host => "http://127.0.0.1:#{port}", :inactivity_timeout => 1)
156
+ rescue => e
157
+ exception = e
158
+ end
159
+
160
+ exception.should_not be_nil
161
+ exception.should be_a(ConcurrentDownloader::ConnectionError)
162
+ exception.message.should == "There was a connection error: GET /test"
163
+ ensure
164
+ server.close if server
165
+ end
166
+ end
167
+
168
+ it "should raise a download error when request block returns false" do
169
+ queue = []
170
+ queue << "/test"
171
+
172
+ exception = nil
173
+
174
+ begin
175
+ ConcurrentDownloader.process_queue!(queue, :host => $mock_host_uri) do |queue_item, response|
176
+ false
177
+ end
178
+ rescue => e
179
+ exception = e
180
+ end
181
+
182
+ exception.should_not be_nil
183
+ exception.should be_a(ConcurrentDownloader::DownloadError)
184
+ exception.message.should == "There was a download error: GET /test: 200"
185
+ exception.response.should be_a(ConcurrentDownloader::Response)
186
+ exception.downloader_id.should == 0
187
+ exception.queue_item.should == {
188
+ :path => "/test",
189
+ :method => "get"
190
+ }
191
+ end
192
+
193
+ it "should retry downloading if an error occurs and the limit is not reached" do
194
+ queue = []
195
+ 10.times do
196
+ queue << "/test"
197
+ end
198
+
199
+ responses = []
200
+
201
+ count = 0
202
+ ConcurrentDownloader.process_queue!(queue, :host => $mock_host_uri, :error_limit => 20) do |queue_item, response|
203
+ count += 1
204
+ if count > 20
205
+ responses << response
206
+ end
207
+
208
+ count > 20
209
+ end
210
+
211
+ count.should == 30 # 30 total requests
212
+ responses.size.should == 10 # 10 succeeded
213
+ responses.each do |response|
214
+ response.status.should == 200
215
+ end
216
+ end
217
+
218
+ it "should raise an error if the limit is reached and cancel all downloads in the queue" do
219
+ queue = []
220
+ 10.times do
221
+ queue << "/test"
222
+ end
223
+
224
+ responses = []
225
+ exception = nil
226
+
227
+ begin
228
+ count = 0
229
+ ConcurrentDownloader.process_queue!(queue, :host => $mock_host_uri, :error_limit => 20) do |queue_item, response|
230
+ count += 1
231
+ if count < 5
232
+ responses << response
233
+ end
234
+
235
+ count < 5
236
+ end
237
+ rescue => e
238
+ exception = e
239
+ end
240
+
241
+ exception.should_not be_nil
242
+ exception.should be_a(ConcurrentDownloader::DownloadError)
243
+ exception.message.should == "There was a download error: GET /test: 200"
244
+
245
+ count.should == 25 # 25 total requests
246
+ responses.size.should == 4 # 4 succeeded
247
+ responses.each do |response|
248
+ response.status.should == 200
249
+ end
250
+ end
251
+ end
252
+ end
253
+
@@ -0,0 +1,26 @@
1
+ require 'concurrent_downloader'
2
+ require 'puma'
3
+ require 'yajl'
4
+
5
+ Dir[File.join(File.dirname(__FILE__), "support/**/*.rb")].each {|f| require f}
6
+
7
+ include MockServerHelper
8
+
9
+ RSpec.configure do |config|
10
+ config.before :suite do
11
+ # ConcurrentDownloader.logger = Logger.new($stdout)
12
+
13
+ $mock_host_uri = start_mock_server do |request|
14
+ body = {
15
+ :path => request.path,
16
+ :body => (request.post? ? request.body.read : nil),
17
+ :method => request.request_method,
18
+ :test_header => request.env["HTTP_TEST_HEADER_KEY"],
19
+ :downloader_id => request.env["HTTP_DOWNLOADER_ID"]
20
+ }
21
+
22
+ [200, [], [Yajl.dump(body)]]
23
+ end
24
+ end
25
+ end
26
+
@@ -0,0 +1,26 @@
1
+ module MockServerHelper
2
+ def start_mock_server(&block)
3
+ app = lambda {|env|
4
+ request = Rack::Request.new(env)
5
+ block.call(request)
6
+ }
7
+
8
+ port = find_available_port
9
+
10
+ Thread.new do
11
+ Puma::Server.new(app).tap do |s|
12
+ s.add_tcp_listener '127.0.0.1', port
13
+ end.run.join
14
+ end
15
+
16
+ URI.parse "http://127.0.0.1:#{port}"
17
+ end
18
+
19
+ def find_available_port
20
+ server = TCPServer.new('127.0.0.1', 0)
21
+ server.addr[1]
22
+ ensure
23
+ server.close if server
24
+ end
25
+ end
26
+
metadata ADDED
@@ -0,0 +1,94 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: concurrent_downloader
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Oliver Clarke
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-04-08 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: eventmachine
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: em-http-request
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ description: Concurrently download http requests in a synchronous environment.
47
+ email:
48
+ - ollie@clarketus.net
49
+ executables: []
50
+ extensions: []
51
+ extra_rdoc_files: []
52
+ files:
53
+ - .gitignore
54
+ - .rspec
55
+ - Gemfile
56
+ - LICENSE
57
+ - README.md
58
+ - Rakefile
59
+ - concurrent_downloader.gemspec
60
+ - lib/concurrent_downloader.rb
61
+ - lib/concurrent_downloader/processor.rb
62
+ - lib/concurrent_downloader/response.rb
63
+ - lib/concurrent_downloader/version.rb
64
+ - spec/lib/concurrent_downloader_spec.rb
65
+ - spec/spec_helper.rb
66
+ - spec/support/mock_server_helper.rb
67
+ homepage: https://github.com/clarketus/concurrent_downloader
68
+ licenses: []
69
+ post_install_message:
70
+ rdoc_options: []
71
+ require_paths:
72
+ - lib
73
+ required_ruby_version: !ruby/object:Gem::Requirement
74
+ none: false
75
+ requirements:
76
+ - - ! '>='
77
+ - !ruby/object:Gem::Version
78
+ version: '0'
79
+ required_rubygems_version: !ruby/object:Gem::Requirement
80
+ none: false
81
+ requirements:
82
+ - - ! '>='
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
85
+ requirements: []
86
+ rubyforge_project:
87
+ rubygems_version: 1.8.21
88
+ signing_key:
89
+ specification_version: 3
90
+ summary: A basic em-http-request wrapper that slots into a normal ruby script.
91
+ test_files:
92
+ - spec/lib/concurrent_downloader_spec.rb
93
+ - spec/spec_helper.rb
94
+ - spec/support/mock_server_helper.rb