em-http-fetcher 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc ADDED
@@ -0,0 +1,89 @@
1
+ = em-http-fetcher
2
+
3
+ HTTP fetch client based on ruby EventMachne and EM-HTTP-Request
4
+ that has configureable concurrency regardless of EM's thread pool.
5
+
6
+ == Example
7
+
8
+ EM.run do
9
+ trap(:INT) { EM.stop }
10
+ fetcher = EM::HttpFetcher.new
11
+ fetcher.callback do |req| # req is HttpRequest instance
12
+ # Here is global callback block for all request
13
+ p "Fetch success! #{req.last_effective_url} (#{req.response.size} bytes)"
14
+ end
15
+
16
+ %w(
17
+ http://www.google.com/
18
+ http://heroku.com/
19
+ http://sourceforge.net/
20
+ http://github.com/
21
+ ).each do |url|
22
+ fetcher.request url
23
+ end
24
+
25
+ req = fetcher.request 'http://www.ruby-lang.org/'
26
+ req.callback do
27
+ # Here is appendix callback block for this request.
28
+ # Global callback block will also be called.
29
+ puts "Hello Ruby!"
30
+ end
31
+ end
32
+
33
+ == Install
34
+
35
+ After em-http-request *1.0.4* *or* *above* is released, you can install with gem.
36
+ Currently em-http-fetcher depends (> 1.0.3) to prevent to install intentionally.
37
+
38
+ $ gem install em-http-fetcher
39
+
40
+ === Workaround with bundler
41
+
42
+ gem install bundler
43
+
44
+ And create Gemfile to fetch develop version of em-http-request.
45
+
46
+ source "http://rubygems.org"
47
+ gem 'em-http-request', :git => 'git://github.com/igrigorik/em-http-request.git'
48
+
49
+ Then run bundle to install gems.
50
+
51
+ $ bundle
52
+
53
+ Finally run your script with "bundle exec".
54
+
55
+ $ bundle exec YOUR_SCRIPT
56
+
57
+ == Usage
58
+
59
+ === Options for HttpFetcher.new
60
+
61
+ [:concurrency] Concurrency for all request.
62
+ [:host_concurrency] Concurrency per host.
63
+ [:host_request_wait] Wait specified seconds after request on each request thread.
64
+ [(all other keys)] Pass through for HttpRequest.new
65
+
66
+ === Options for HttpFetcher#request
67
+
68
+ [:uri] Target URI (String or URI object)
69
+ [:method] Request method (get/head/put...) (default=:get)
70
+ [(all other keys)] Pass through for HttpRequest#(get/head/put...)
71
+
72
+ If first argument is not a hash, it will be treated as :uri.
73
+
74
+ == Limitations
75
+
76
+ * :host_concurrency is checked only for initial URI.
77
+ When request is redirected, number of parallel requests for
78
+ one host may be over host_concurrency.
79
+ * Redirections will not work until issue #230 of em-http-request
80
+ is resolved; https://github.com/igrigorik/em-http-request/pull/230
81
+
82
+ == License
83
+
84
+ Same as Ruby 2.0 (2-clause BSDL or Ruby original license)
85
+
86
+ == See Also
87
+
88
+ EventMachine:: http://rubyeventmachine.com/
89
+ EM-HTTP-Request:: https://github.com/igrigorik/em-http-request
@@ -0,0 +1,31 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "em-http-fetcher"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "em-http-fetcher"
7
+ s.version = EventMachine::HttpFetcher::VERSION
8
+
9
+ s.platform = Gem::Platform::RUBY
10
+ s.authors = ["Tatsuki Sugiura"]
11
+ s.email = ["sugi@nemui.org"]
12
+ s.homepage = "http://github.com/sugi/em-http-fetcher"
13
+ s.summary = "HTTP fetch client based on ruby EventMachne and EM-HTTP-Request"
14
+ s.description = "HTTP fetch client based on ruby EventMachne and EM-HTTP-Request that has configureable concurrency regardless of EM's thread pool."
15
+
16
+ # s.rubyforge_project = ""
17
+
18
+ s.required_ruby_version = '>= 1.9.0'
19
+
20
+ s.add_dependency "addressable", ">= 2.2.3"
21
+ s.add_dependency "em-http-request", "> 1.0.3"
22
+
23
+ # s.add_development_dependency "rspec"
24
+ # s.add_development_dependency "rake"
25
+
26
+ s.files = `git ls-files`.split("\n")
27
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
28
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
29
+ s.require_paths = ["lib"]
30
+ end
31
+
@@ -0,0 +1 @@
1
+ require 'em/http-fetcher/fetcher.rb'
@@ -0,0 +1,163 @@
1
+ #!/usr/bin/env ruby
2
+ require 'eventmachine'
3
+ require 'em/pool'
4
+ require 'em-http-request'
5
+ require 'addressable/uri'
6
+
7
+ module EventMachine
8
+ class HttpFetcher
9
+ VERSION = "0.1.0"
10
+
11
+ class RequestPool
12
+ def initialize(total_size, host_resource_size, host_reuse_wait = 0, opts = {})
13
+ super()
14
+ @total_size = total_size
15
+ @host_reuse_wait = host_reuse_wait
16
+ @host_resource_size = host_resource_size
17
+
18
+ @total_queue = EM::Queue.new
19
+ total_size.times { @total_queue.push true }
20
+ @host_pools = Hash.new {|h, k|
21
+ pool = EM::Pool.new
22
+ def pool.add item
23
+ super
24
+ @removed.delete item
25
+ end
26
+ host_resource_size.times {
27
+ pool.add EM::HttpRequest.new(k)
28
+ }
29
+ h[k] = { pool: pool, last_used: Time.now }
30
+ }
31
+ run
32
+ end
33
+
34
+ def perform(host, &b)
35
+ @host_pools[host][:pool].perform do |conn|
36
+ df = nil
37
+ @total_queue.pop do |tqi|
38
+ @host_pools[host][:last_used] = Time.now
39
+ @host_pools[host][:pool].remove conn
40
+ rq = proc { |req|
41
+ @total_queue.push tqi
42
+ lurl = req.last_effective_url
43
+ unless "#{lurl.scheme}://#{lurl.host}" == host
44
+ # Connection has been redirected another server.
45
+ # Re-create connection instance.
46
+ conn = EM::HttpRequest.new(host)
47
+ end
48
+
49
+ if @host_reuse_wait > 0
50
+ EM.add_timer(@host_reuse_wait) {
51
+ @host_pools[host][:pool].add conn
52
+ }
53
+ else
54
+ @host_pools[host][:pool].add conn
55
+ end
56
+ }
57
+ work = EM::Callback(&b)
58
+ df = work.call(conn)
59
+ df.callback(&rq)
60
+ df.errback(&rq)
61
+ df
62
+ end
63
+ df
64
+ end
65
+ end
66
+
67
+ def run
68
+ # cleanup host pool timer
69
+ EM.add_periodic_timer(10) do
70
+ hrsize = @host_resource_size
71
+ @host_pools.each do |host, info|
72
+ info[:pool].instance_eval { @resources.size < hrsize } and next
73
+ info[:last_used].to_i > Time.now.to_i - 5 * 60 and next
74
+ @host_pools.delete host
75
+ end
76
+ end
77
+ end
78
+ end
79
+
80
+ def initialize(opts = {})
81
+ @concurrency = opts[:concurrency] || 40
82
+ @host_concurrency = opts[:host_concurrency] || 2
83
+ @host_request_wait = opts[:host_request_wait] || 0.2
84
+ @request_pool = nil
85
+ @default_callbacks = []
86
+ @default_errbacks = []
87
+ @req_opts = {}.merge(opts)
88
+ @req_opts.delete :concurrency
89
+ @req_opts.delete :host_concurrency
90
+ @req_opts.delete :host_request_wait
91
+ end
92
+
93
+ def request_pool
94
+ @request_pool ||= RequestPool.new(@concurrency, @host_concurrency, @host_request_wait, @req_opts)
95
+ end
96
+
97
+ def callback(&block)
98
+ @default_callbacks << block
99
+ end
100
+
101
+ def errback(&block)
102
+ @default_errbacks << block
103
+ end
104
+
105
+ def request(*args)
106
+ if args.first.kind_of? Hash
107
+ opts = args[0]
108
+ uri = opts.delete(:uri)
109
+ else
110
+ uri = args.first
111
+ opts = args[1].kind_of?(Hash) ? args[1] : {}
112
+ end
113
+
114
+ uri.kind_of?(Addressable::URI) or
115
+ uri = Addressable::URI.parse(uri.to_s)
116
+ opts = {
117
+ :keepalive => true,
118
+ :redirects => 20,
119
+ :path => uri.path || '/',
120
+ }.merge(opts)
121
+ method = opts.delete(:method) || :get
122
+ uri.query and otps[:query] = uri.query
123
+
124
+ df = nil
125
+ request_pool.perform("#{uri.scheme}://#{uri.host}") do |conn|
126
+ df = req = conn.__send__(method, opts)
127
+ @default_callbacks.each do |cb|
128
+ req.callback(&cb)
129
+ end
130
+ @default_errbacks.each do |cb|
131
+ req.errback(&cb)
132
+ end
133
+ req
134
+ end
135
+ df
136
+ end
137
+ end
138
+ end
139
+
140
+
141
+ if __FILE__ == $0
142
+ trap(:INT) { EM.stop }
143
+ EM.run do
144
+ r = EM::HttpFetcher.new
145
+ r.callback do |req|
146
+ p [:success, req.last_effective_url, req.response.size]
147
+ end
148
+ r.errback do |req|
149
+ p [:err, req.last_effective_url, req.response.size]
150
+ end
151
+
152
+ ARGF.each { |line|
153
+ line.chomp!
154
+ line or next
155
+ req = r.request(line)
156
+ if line == 'http://www.yahoo.co.jp/'
157
+ req.callback do
158
+ p :yahoo!
159
+ end
160
+ end
161
+ }
162
+ end
163
+ end
metadata ADDED
@@ -0,0 +1,82 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: em-http-fetcher
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Tatsuki Sugiura
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-05-11 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: addressable
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 2.2.3
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 2.2.3
30
+ - !ruby/object:Gem::Dependency
31
+ name: em-http-request
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>'
36
+ - !ruby/object:Gem::Version
37
+ version: 1.0.3
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>'
44
+ - !ruby/object:Gem::Version
45
+ version: 1.0.3
46
+ description: HTTP fetch client based on ruby EventMachne and EM-HTTP-Request that
47
+ has configureable concurrency regardless of EM's thread pool.
48
+ email:
49
+ - sugi@nemui.org
50
+ executables: []
51
+ extensions: []
52
+ extra_rdoc_files: []
53
+ files:
54
+ - README.rdoc
55
+ - em-http-fetcher.gemspec
56
+ - lib/em-http-fetcher.rb
57
+ - lib/em/http-fetcher/fetcher.rb
58
+ homepage: http://github.com/sugi/em-http-fetcher
59
+ licenses: []
60
+ post_install_message:
61
+ rdoc_options: []
62
+ require_paths:
63
+ - lib
64
+ required_ruby_version: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: 1.9.0
70
+ required_rubygems_version: !ruby/object:Gem::Requirement
71
+ none: false
72
+ requirements:
73
+ - - ! '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ requirements: []
77
+ rubyforge_project:
78
+ rubygems_version: 1.8.23
79
+ signing_key:
80
+ specification_version: 3
81
+ summary: HTTP fetch client based on ruby EventMachne and EM-HTTP-Request
82
+ test_files: []