em-http-fetcher 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc ADDED
@@ -0,0 +1,89 @@
1
+ = em-http-fetcher
2
+
3
+ HTTP fetch client based on ruby EventMachne and EM-HTTP-Request
4
+ that has configureable concurrency regardless of EM's thread pool.
5
+
6
+ == Example
7
+
8
+ EM.run do
9
+ trap(:INT) { EM.stop }
10
+ fetcher = EM::HttpFetcher.new
11
+ fetcher.callback do |req| # req is HttpRequest instance
12
+ # Here is global callback block for all request
13
+ p "Fetch success! #{req.last_effective_url} (#{req.response.size} bytes)"
14
+ end
15
+
16
+ %w(
17
+ http://www.google.com/
18
+ http://heroku.com/
19
+ http://sourceforge.net/
20
+ http://github.com/
21
+ ).each do |url|
22
+ fetcher.request url
23
+ end
24
+
25
+ req = fetcher.request 'http://www.ruby-lang.org/'
26
+ req.callback do
27
+ # Here is appendix callback block for this request.
28
+ # Global callback block will also be called.
29
+ puts "Hello Ruby!"
30
+ end
31
+ end
32
+
33
+ == Install
34
+
35
+ After em-http-request *1.0.4* *or* *above* is released, you can install with gem.
36
+ Currently em-http-fetcher depends (> 1.0.3) to prevent to install intentionally.
37
+
38
+ $ gem install em-http-fetcher
39
+
40
+ === Workaround with bundler
41
+
42
+ gem install bundler
43
+
44
+ And create Gemfile to fetch develop version of em-http-request.
45
+
46
+ source "http://rubygems.org"
47
+ gem 'em-http-request', :git => 'git://github.com/igrigorik/em-http-request.git'
48
+
49
+ Then run bundle to install gems.
50
+
51
+ $ bundle
52
+
53
+ Finally run your script with "bundle exec".
54
+
55
+ $ bundle exec YOUR_SCRIPT
56
+
57
+ == Usage
58
+
59
+ === Options for HttpFetcher.new
60
+
61
+ [:concurrency] Concurrency for all request.
62
+ [:host_concurrency] Concurrency per host.
63
+ [:host_request_wait] Wait specified seconds after request on each request thread.
64
+ [(all other keys)] Pass through for HttpRequest.new
65
+
66
+ === Options for HttpFetcher#request
67
+
68
+ [:uri] Target URI (String or URI object)
69
+ [:method] Request method (get/head/put...) (default=:get)
70
+ [(all other keys)] Pass through for HttpRequest#(get/head/put...)
71
+
72
+ If first argument is not a hash, it will be treated as :uri.
73
+
74
+ == Limitations
75
+
76
+ * :host_concurrency is checked only for initial URI.
77
+ When request is redirected, number of parallel requests for
78
+ one host may be over host_concurrency.
79
+ * Redirections will not work until issue #230 of em-http-request
80
+ is resolved; https://github.com/igrigorik/em-http-request/pull/230
81
+
82
+ == License
83
+
84
+ Same as Ruby 2.0 (2-clause BSDL or Ruby original license)
85
+
86
+ == See Also
87
+
88
+ EventMachine:: http://rubyeventmachine.com/
89
+ EM-HTTP-Request:: https://github.com/igrigorik/em-http-request
@@ -0,0 +1,31 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "em-http-fetcher"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "em-http-fetcher"
7
+ s.version = EventMachine::HttpFetcher::VERSION
8
+
9
+ s.platform = Gem::Platform::RUBY
10
+ s.authors = ["Tatsuki Sugiura"]
11
+ s.email = ["sugi@nemui.org"]
12
+ s.homepage = "http://github.com/sugi/em-http-fetcher"
13
+ s.summary = "HTTP fetch client based on ruby EventMachne and EM-HTTP-Request"
14
+ s.description = "HTTP fetch client based on ruby EventMachne and EM-HTTP-Request that has configureable concurrency regardless of EM's thread pool."
15
+
16
+ # s.rubyforge_project = ""
17
+
18
+ s.required_ruby_version = '>= 1.9.0'
19
+
20
+ s.add_dependency "addressable", ">= 2.2.3"
21
+ s.add_dependency "em-http-request", "> 1.0.3"
22
+
23
+ # s.add_development_dependency "rspec"
24
+ # s.add_development_dependency "rake"
25
+
26
+ s.files = `git ls-files`.split("\n")
27
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
28
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
29
+ s.require_paths = ["lib"]
30
+ end
31
+
@@ -0,0 +1 @@
1
+ require 'em/http-fetcher/fetcher.rb'
@@ -0,0 +1,163 @@
1
+ #!/usr/bin/env ruby
2
+ require 'eventmachine'
3
+ require 'em/pool'
4
+ require 'em-http-request'
5
+ require 'addressable/uri'
6
+
7
+ module EventMachine
8
+ class HttpFetcher
9
+ VERSION = "0.1.0"
10
+
11
+ class RequestPool
12
+ def initialize(total_size, host_resource_size, host_reuse_wait = 0, opts = {})
13
+ super()
14
+ @total_size = total_size
15
+ @host_reuse_wait = host_reuse_wait
16
+ @host_resource_size = host_resource_size
17
+
18
+ @total_queue = EM::Queue.new
19
+ total_size.times { @total_queue.push true }
20
+ @host_pools = Hash.new {|h, k|
21
+ pool = EM::Pool.new
22
+ def pool.add item
23
+ super
24
+ @removed.delete item
25
+ end
26
+ host_resource_size.times {
27
+ pool.add EM::HttpRequest.new(k)
28
+ }
29
+ h[k] = { pool: pool, last_used: Time.now }
30
+ }
31
+ run
32
+ end
33
+
34
+ def perform(host, &b)
35
+ @host_pools[host][:pool].perform do |conn|
36
+ df = nil
37
+ @total_queue.pop do |tqi|
38
+ @host_pools[host][:last_used] = Time.now
39
+ @host_pools[host][:pool].remove conn
40
+ rq = proc { |req|
41
+ @total_queue.push tqi
42
+ lurl = req.last_effective_url
43
+ unless "#{lurl.scheme}://#{lurl.host}" == host
44
+ # Connection has been redirected another server.
45
+ # Re-create connection instance.
46
+ conn = EM::HttpRequest.new(host)
47
+ end
48
+
49
+ if @host_reuse_wait > 0
50
+ EM.add_timer(@host_reuse_wait) {
51
+ @host_pools[host][:pool].add conn
52
+ }
53
+ else
54
+ @host_pools[host][:pool].add conn
55
+ end
56
+ }
57
+ work = EM::Callback(&b)
58
+ df = work.call(conn)
59
+ df.callback(&rq)
60
+ df.errback(&rq)
61
+ df
62
+ end
63
+ df
64
+ end
65
+ end
66
+
67
+ def run
68
+ # cleanup host pool timer
69
+ EM.add_periodic_timer(10) do
70
+ hrsize = @host_resource_size
71
+ @host_pools.each do |host, info|
72
+ info[:pool].instance_eval { @resources.size < hrsize } and next
73
+ info[:last_used].to_i > Time.now.to_i - 5 * 60 and next
74
+ @host_pools.delete host
75
+ end
76
+ end
77
+ end
78
+ end
79
+
80
+ def initialize(opts = {})
81
+ @concurrency = opts[:concurrency] || 40
82
+ @host_concurrency = opts[:host_concurrency] || 2
83
+ @host_request_wait = opts[:host_request_wait] || 0.2
84
+ @request_pool = nil
85
+ @default_callbacks = []
86
+ @default_errbacks = []
87
+ @req_opts = {}.merge(opts)
88
+ @req_opts.delete :concurrency
89
+ @req_opts.delete :host_concurrency
90
+ @req_opts.delete :host_request_wait
91
+ end
92
+
93
+ def request_pool
94
+ @request_pool ||= RequestPool.new(@concurrency, @host_concurrency, @host_request_wait, @req_opts)
95
+ end
96
+
97
+ def callback(&block)
98
+ @default_callbacks << block
99
+ end
100
+
101
+ def errback(&block)
102
+ @default_errbacks << block
103
+ end
104
+
105
+ def request(*args)
106
+ if args.first.kind_of? Hash
107
+ opts = args[0]
108
+ uri = opts.delete(:uri)
109
+ else
110
+ uri = args.first
111
+ opts = args[1].kind_of?(Hash) ? args[1] : {}
112
+ end
113
+
114
+ uri.kind_of?(Addressable::URI) or
115
+ uri = Addressable::URI.parse(uri.to_s)
116
+ opts = {
117
+ :keepalive => true,
118
+ :redirects => 20,
119
+ :path => uri.path || '/',
120
+ }.merge(opts)
121
+ method = opts.delete(:method) || :get
122
+ uri.query and otps[:query] = uri.query
123
+
124
+ df = nil
125
+ request_pool.perform("#{uri.scheme}://#{uri.host}") do |conn|
126
+ df = req = conn.__send__(method, opts)
127
+ @default_callbacks.each do |cb|
128
+ req.callback(&cb)
129
+ end
130
+ @default_errbacks.each do |cb|
131
+ req.errback(&cb)
132
+ end
133
+ req
134
+ end
135
+ df
136
+ end
137
+ end
138
+ end
139
+
140
+
141
+ if __FILE__ == $0
142
+ trap(:INT) { EM.stop }
143
+ EM.run do
144
+ r = EM::HttpFetcher.new
145
+ r.callback do |req|
146
+ p [:success, req.last_effective_url, req.response.size]
147
+ end
148
+ r.errback do |req|
149
+ p [:err, req.last_effective_url, req.response.size]
150
+ end
151
+
152
+ ARGF.each { |line|
153
+ line.chomp!
154
+ line or next
155
+ req = r.request(line)
156
+ if line == 'http://www.yahoo.co.jp/'
157
+ req.callback do
158
+ p :yahoo!
159
+ end
160
+ end
161
+ }
162
+ end
163
+ end
metadata ADDED
@@ -0,0 +1,82 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: em-http-fetcher
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Tatsuki Sugiura
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-05-11 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: addressable
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 2.2.3
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 2.2.3
30
+ - !ruby/object:Gem::Dependency
31
+ name: em-http-request
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>'
36
+ - !ruby/object:Gem::Version
37
+ version: 1.0.3
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>'
44
+ - !ruby/object:Gem::Version
45
+ version: 1.0.3
46
+ description: HTTP fetch client based on ruby EventMachne and EM-HTTP-Request that
47
+ has configureable concurrency regardless of EM's thread pool.
48
+ email:
49
+ - sugi@nemui.org
50
+ executables: []
51
+ extensions: []
52
+ extra_rdoc_files: []
53
+ files:
54
+ - README.rdoc
55
+ - em-http-fetcher.gemspec
56
+ - lib/em-http-fetcher.rb
57
+ - lib/em/http-fetcher/fetcher.rb
58
+ homepage: http://github.com/sugi/em-http-fetcher
59
+ licenses: []
60
+ post_install_message:
61
+ rdoc_options: []
62
+ require_paths:
63
+ - lib
64
+ required_ruby_version: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: 1.9.0
70
+ required_rubygems_version: !ruby/object:Gem::Requirement
71
+ none: false
72
+ requirements:
73
+ - - ! '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ requirements: []
77
+ rubyforge_project:
78
+ rubygems_version: 1.8.23
79
+ signing_key:
80
+ specification_version: 3
81
+ summary: HTTP fetch client based on ruby EventMachne and EM-HTTP-Request
82
+ test_files: []