gogetter 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.rspec +1 -0
- data/Gemfile +9 -0
- data/Gemfile.lock +30 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +28 -0
- data/Rakefile +35 -0
- data/VERSION +1 -0
- data/gogetter.gemspec +69 -0
- data/lib/go_getter/go_getter.rb +76 -0
- data/lib/go_getter/response.rb +16 -0
- data/lib/go_getter/utils.rb +17 -0
- data/lib/go_getter.rb +3 -0
- data/lib/gogetter.rb +2 -0
- data/spec/fixtures/google.html +3 -0
- data/spec/fixtures/google.redirect.html +6 -0
- data/spec/go_getter/go_getter_spec.rb +151 -0
- data/spec/rspec.opts +3 -0
- data/spec/spec_helper.rb +21 -0
- metadata +124 -0
data/.document
ADDED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
GEM
|
2
|
+
remote: http://rubygems.org/
|
3
|
+
specs:
|
4
|
+
diff-lcs (1.1.2)
|
5
|
+
fakeweb (1.3.0)
|
6
|
+
git (1.2.5)
|
7
|
+
jeweler (1.6.4)
|
8
|
+
bundler (~> 1.0)
|
9
|
+
git (>= 1.2.5)
|
10
|
+
rake
|
11
|
+
rake (0.9.2)
|
12
|
+
rspec (2.6.0)
|
13
|
+
rspec-core (~> 2.6.0)
|
14
|
+
rspec-expectations (~> 2.6.0)
|
15
|
+
rspec-mocks (~> 2.6.0)
|
16
|
+
rspec-core (2.6.4)
|
17
|
+
rspec-expectations (2.6.0)
|
18
|
+
diff-lcs (~> 1.1.2)
|
19
|
+
rspec-mocks (2.6.0)
|
20
|
+
yard (0.7.2)
|
21
|
+
|
22
|
+
PLATFORMS
|
23
|
+
ruby
|
24
|
+
|
25
|
+
DEPENDENCIES
|
26
|
+
bundler (~> 1.0.0)
|
27
|
+
fakeweb (~> 1.3.0)
|
28
|
+
jeweler (~> 1.6.4)
|
29
|
+
rspec (~> 2.6.0)
|
30
|
+
yard (~> 0.7.2)
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2011 Elad Kehat
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
= GoGetter
|
2
|
+
|
3
|
+
Easily send get requests, with a little more sophistication than Net::HTTP.get
|
4
|
+
|
5
|
+
Unlike HTTParty, which is a great gem if your class works against a specific website, GoGetter fits the use case where
|
6
|
+
you need to send a bunch of HTTP GETs to several domains, and don't want to wrap each one in its own class.
|
7
|
+
|
8
|
+
Think of it as an alternative to open-uri that doesn't create any temporary files.
|
9
|
+
|
10
|
+
It handles proxies, basic authentication, and HTTP redirects.
|
11
|
+
|
12
|
+
Before releasing this code I used it extensively in a proprietary web crawler that sent around a billion GET requests
|
13
|
+
so far, so you could say that it's quite robust :)
|
14
|
+
|
15
|
+
== Contributing to gogetter
|
16
|
+
|
17
|
+
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
|
18
|
+
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
|
19
|
+
* Fork the project
|
20
|
+
* Start a feature/bugfix branch
|
21
|
+
* Commit and push until you are happy with your contribution
|
22
|
+
* Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
|
23
|
+
* Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise
|
24
|
+
necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
|
25
|
+
|
26
|
+
== Copyright
|
27
|
+
|
28
|
+
Copyright (c) 2011 Elad Kehat. See LICENSE.txt for further details.
|
data/Rakefile
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
require 'rake'
|
13
|
+
|
14
|
+
require 'jeweler'
|
15
|
+
Jeweler::Tasks.new do |gem|
|
16
|
+
gem.name = "gogetter"
|
17
|
+
gem.homepage = "http://github.com/eladkehat/gogetter"
|
18
|
+
gem.license = "MIT"
|
19
|
+
gem.summary = %Q{Go get something over HTTP}
|
20
|
+
gem.email = "eladkehat@gmail.com"
|
21
|
+
gem.authors = ["Elad Kehat"]
|
22
|
+
end
|
23
|
+
Jeweler::RubygemsDotOrgTasks.new
|
24
|
+
|
25
|
+
require 'rspec/core'
|
26
|
+
require 'rspec/core/rake_task'
|
27
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
28
|
+
spec.pattern = FileList['spec/**/*_spec.rb']
|
29
|
+
spec.rspec_opts = ['--options', 'spec/rspec.opts']
|
30
|
+
end
|
31
|
+
|
32
|
+
task :default => :spec
|
33
|
+
|
34
|
+
require 'yard'
|
35
|
+
YARD::Rake::YardocTask.new
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
data/gogetter.gemspec
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{gogetter}
|
8
|
+
s.version = "0.1.0"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Elad Kehat"]
|
12
|
+
s.date = %q{2011-07-25}
|
13
|
+
s.email = %q{eladkehat@gmail.com}
|
14
|
+
s.extra_rdoc_files = [
|
15
|
+
"LICENSE.txt",
|
16
|
+
"README.rdoc"
|
17
|
+
]
|
18
|
+
s.files = [
|
19
|
+
".document",
|
20
|
+
".rspec",
|
21
|
+
"Gemfile",
|
22
|
+
"Gemfile.lock",
|
23
|
+
"LICENSE.txt",
|
24
|
+
"README.rdoc",
|
25
|
+
"Rakefile",
|
26
|
+
"VERSION",
|
27
|
+
"gogetter.gemspec",
|
28
|
+
"lib/go_getter.rb",
|
29
|
+
"lib/go_getter/go_getter.rb",
|
30
|
+
"lib/go_getter/response.rb",
|
31
|
+
"lib/go_getter/utils.rb",
|
32
|
+
"lib/gogetter.rb",
|
33
|
+
"spec/fixtures/google.html",
|
34
|
+
"spec/fixtures/google.redirect.html",
|
35
|
+
"spec/go_getter/go_getter_spec.rb",
|
36
|
+
"spec/rspec.opts",
|
37
|
+
"spec/spec_helper.rb"
|
38
|
+
]
|
39
|
+
s.homepage = %q{http://github.com/eladkehat/gogetter}
|
40
|
+
s.licenses = ["MIT"]
|
41
|
+
s.require_paths = ["lib"]
|
42
|
+
s.rubygems_version = %q{1.7.2}
|
43
|
+
s.summary = %q{Go get something over HTTP}
|
44
|
+
|
45
|
+
if s.respond_to? :specification_version then
|
46
|
+
s.specification_version = 3
|
47
|
+
|
48
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
49
|
+
s.add_development_dependency(%q<rspec>, ["~> 2.6.0"])
|
50
|
+
s.add_development_dependency(%q<yard>, ["~> 0.7.2"])
|
51
|
+
s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
|
52
|
+
s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
|
53
|
+
s.add_development_dependency(%q<fakeweb>, ["~> 1.3.0"])
|
54
|
+
else
|
55
|
+
s.add_dependency(%q<rspec>, ["~> 2.6.0"])
|
56
|
+
s.add_dependency(%q<yard>, ["~> 0.7.2"])
|
57
|
+
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
58
|
+
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
59
|
+
s.add_dependency(%q<fakeweb>, ["~> 1.3.0"])
|
60
|
+
end
|
61
|
+
else
|
62
|
+
s.add_dependency(%q<rspec>, ["~> 2.6.0"])
|
63
|
+
s.add_dependency(%q<yard>, ["~> 0.7.2"])
|
64
|
+
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
65
|
+
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
66
|
+
s.add_dependency(%q<fakeweb>, ["~> 1.3.0"])
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
@@ -0,0 +1,76 @@
|
|
1
|
+
# Monkey-patch Net::HTTPResponse
|
2
|
+
# Add a final_url attribute, which we use in handling HTTP redirections to determine
|
3
|
+
# the ultimate URI that the response was retrieve from
|
4
|
+
class Net::HTTPResponse
|
5
|
+
attr_accessor :final_uri
|
6
|
+
end
|
7
|
+
|
8
|
+
|
9
|
+
module GoGetter
|
10
|
+
|
11
|
+
def GoGetter.get(uri, http_headers = {}, params = {})
|
12
|
+
uri = parse_url(uri.to_s) unless uri.is_a? URI
|
13
|
+
path = uri.path
|
14
|
+
path << "?#{uri.query}" if uri.query
|
15
|
+
request = Net::HTTP::Get.new(path)
|
16
|
+
http_headers.each {|key, value| request.add_field key, value }
|
17
|
+
|
18
|
+
# basic authentication
|
19
|
+
request.basic_auth(params[:auth_user], params[:auth_pass]) if params[:auth_user] and params[:auth_pass]
|
20
|
+
|
21
|
+
# proxy
|
22
|
+
klass = (params[:proxy_host] and params[:proxy_port]) ?
|
23
|
+
Net::HTTP::Proxy(params[:proxy_host], params[:proxy_port], params[:proxy_user], params[:proxy_pass]) : Net::HTTP
|
24
|
+
|
25
|
+
response = klass.start(uri.host, uri.port) do |http|
|
26
|
+
http.read_timeout = params.fetch(:read_timeout, 600)
|
27
|
+
http.request(request)
|
28
|
+
end
|
29
|
+
|
30
|
+
if response.is_a?(Net::HTTPRedirection) # Redirect
|
31
|
+
# allow for a single redirection by default
|
32
|
+
params[:max_redirects] = 1 unless params.has_key?(:max_redirects)
|
33
|
+
response = handle_redirection(uri, response, http_headers, params)
|
34
|
+
else
|
35
|
+
response.final_uri = uri
|
36
|
+
end
|
37
|
+
|
38
|
+
return response
|
39
|
+
end
|
40
|
+
|
41
|
+
# Given a URL, which may not be formatted properly, parse a URI
|
42
|
+
def GoGetter.parse_url(url)
|
43
|
+
unless (url =~ %r{^https?://}mi) == 0
|
44
|
+
url = "http://#{url}"
|
45
|
+
end
|
46
|
+
uri = URI.parse url
|
47
|
+
if uri.path.length == 0 and uri.query.nil?
|
48
|
+
uri.path = "/"
|
49
|
+
end
|
50
|
+
uri
|
51
|
+
end
|
52
|
+
|
53
|
+
def GoGetter.handle_redirection(from_uri, response, http_headers, params)
|
54
|
+
if params.fetch(:max_redirects, 0) > 0
|
55
|
+
params[:uris_seen] = Set.new unless params[:uris_seen]
|
56
|
+
if params[:uris_seen].size < params.fetch(:max_redirects, 0) && response['Location']
|
57
|
+
params[:uris_seen] << from_uri
|
58
|
+
new_uri = URI.parse(response['Location'])
|
59
|
+
# new uri may be just the path, w/o host and port; if so, copy from old
|
60
|
+
unless new_uri.host
|
61
|
+
new_uri.host = from_uri.host
|
62
|
+
new_uri.port = from_uri.port
|
63
|
+
end
|
64
|
+
new_uri.scheme = from_uri.scheme unless new_uri.scheme
|
65
|
+
# avoid infinite redirect loops
|
66
|
+
unless params[:uris_seen].member? new_uri
|
67
|
+
# request the new location just as we did the old one.
|
68
|
+
params[:max_redirects] -= 1
|
69
|
+
response = GoGetter.get(new_uri, http_headers, params)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
response
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# Monkey-patch for Net::HTTPResponse
|
2
|
+
# This file isn't required by the gem by default, so require it in your code
|
3
|
+
class Net::HTTPResponse
|
4
|
+
|
5
|
+
alias :body_asis :body
|
6
|
+
# New version of #body unzips a gzipped body before returning it
|
7
|
+
# Call GoGetter.get with the following in http_headers: "Accept-Encoding" => "gzip")
|
8
|
+
def body
|
9
|
+
if key?("Content-Encoding") and fetch("Content-Encoding") == "gzip"
|
10
|
+
body_io = StringIO.new(body_asis)
|
11
|
+
Zlib::GzipReader.new(body_io).read
|
12
|
+
else
|
13
|
+
body_asis
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module GoGetter
|
2
|
+
|
3
|
+
# Some user agents for use with websites that change their behavior according to your browser
|
4
|
+
# Set by adding to http_headers: "User-Agent" => USER_AGENTS[:chrome10_linux]
|
5
|
+
# Use http://www.useragentstring.com/pages/useragentstring.php to find more user agent strings
|
6
|
+
USER_AGENTS = {
|
7
|
+
:chrome10_win => "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.638.0 Safari/534.16",
|
8
|
+
:chrome10_linux => "Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Ubuntu/10.10 Chromium/10.0.648.0 Chrome/10.0.648.0 Safari/534.16",
|
9
|
+
:firefox36_win => "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8 ( .NET CLR 3.5.30729; .NET4.0C)",
|
10
|
+
:firefox36_linux => "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.8) Gecko/20100804 Gentoo Firefox/3.6.8",
|
11
|
+
:ie8 => "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; Media Center PC 6.0; InfoPath.2; MS-RTC LM 8)",
|
12
|
+
:ie7 => "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; SLCC2; .NET CLR 2.0.50727; InfoPath.3; .NET4.0C; .NET4.0E; .NET CLR 3.5.30729; .NET CLR 3.0.30729; MS-RTC LM 8)",
|
13
|
+
:opera11_win => "Opera/9.80 (Windows NT 6.0; U; en) Presto/2.7.39 Version/11.00",
|
14
|
+
:safari5_mac => "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_3; en-us) AppleWebKit/534.1+ (KHTML, like Gecko) Version/5.0 Safari/533.16",
|
15
|
+
}
|
16
|
+
|
17
|
+
end
|
data/lib/go_getter.rb
ADDED
data/lib/gogetter.rb
ADDED
@@ -0,0 +1,3 @@
|
|
1
|
+
<html><head><meta http-equiv="content-type" content="text/html; charset=ISO-8859-1"><title>Google</title><style>body,td,a,p,.h{font-family:arial,sans-serif}.h{color:#36c;font-size:20px}.q{color:#00c}.ts td{padding:0}.ts{border-collapse:collapse}#gbar{height:22px;padding-left:2px}.gbh,.gbd{border-top:1px solid #c9d7f1;font-size:1px}.gbh{height:0;position:absolute;top:24px;width:100%}#gbi,#gbs{background:#fff;left:0;position:absolute;top:24px;visibility:hidden;z-index:1000}#gbi{border:1px solid;border-color:#c9d7f1 #36c #36c #a2bae7;z-index:1001}#guser{padding-bottom:7px !important}#gbar,#guser{font-size:13px;padding-top:1px !important}@media all{.gb1,.gb3{height:22px;margin-right:.73em;vertical-align:top}#gbar{float:left}}.gb2{display:block;padding:.2em .5em}a.gb1,a.gb2,a.gb3{color:#00c !important}.gb2,.gb3{text-decoration:none}a.gb2:hover{background:#36c;color:#fff !important}</style><script>window.google={kEI:"Zuk6ScOkLKHCMrrttckF",kEXPI:"17259,19124,19314",kHL:"en"};
|
2
|
+
google.y={};google.x=function(e,g){google.y[e.id]=[e,g];return false};function sf(){document.f.q.focus()}
|
3
|
+
window.gbar={};(function(){var b=window.gbar,f,h;b.qs=function(a){var c=window.encodeURIComponent&&(document.forms[0].q||"").value;if(c)a.href=a.href.replace(/([?&])q=[^&]*|$/,function(i,g){return(g||"&")+"q="+encodeURIComponent(c)})};function j(a,c){a.visibility=h?"hidden":"visible";a.left=c+"px"}b.tg=function(a){a=a||window.event;var c=0,i,g=window.navExtra,d=document.getElementById("gbi"),e=a.target||a.srcElement;a.cancelBubble=true;if(!f){f=document.createElement(Array.every||window.createPopup?"iframe":"div");f.frameBorder="0";f.src="#";d.parentNode.appendChild(f).id="gbs";if(g)for(i in g)d.insertBefore(g[i],d.firstChild).className="gb2";document.onclick=b.close}if(e.className!="gb3")e=e.parentNode;do c+=e.offsetLeft;while(e=e.offsetParent);j(d.style,c);f.style.width=d.offsetWidth+"px";f.style.height=d.offsetHeight+"px";j(f.style,c);h=!h};b.close=function(a){h&&b.tg(a)}})();</script></head><body bgcolor=#ffffff text=#000000 link=#0000cc vlink=#551a8b alink=#ff0000 onload="sf();if(document.images)new Image().src='/images/nav_logo3.png'" topmargin=3 marginheight=3><div id=gbar><nobr><b class=gb1>Web</b> <a href="http://images.google.com/imghp?hl=en&tab=wi" onclick=gbar.qs(this) class=gb1>Images</a> <a href="http://maps.google.com/maps?hl=en&tab=wl" onclick=gbar.qs(this) class=gb1>Maps</a> <a href="http://news.google.com/nwshp?hl=en&tab=wn" onclick=gbar.qs(this) class=gb1>News</a> <a href="http://www.google.com/prdhp?hl=en&tab=wf" onclick=gbar.qs(this) class=gb1>Shopping</a> <a href="http://mail.google.com/mail/?hl=en&tab=wm" class=gb1>Gmail</a> <a href="http://www.google.com/intl/en/options/" onclick="this.blur();gbar.tg(event);return !1" class=gb3><u>more</u> <small>▼</small></a><div id=gbi> <a href="http://video.google.com/?hl=en&tab=wv" onclick=gbar.qs(this) class=gb2>Video</a> <a href="http://groups.google.com/grphp?hl=en&tab=wg" onclick=gbar.qs(this) class=gb2>Groups</a> <a href="http://books.google.com/bkshp?hl=en&tab=wp" onclick=gbar.qs(this) class=gb2>Books</a> <a href="http://scholar.google.com/schhp?hl=en&tab=ws" onclick=gbar.qs(this) class=gb2>Scholar</a> <a href="http://finance.google.com/finance?hl=en&tab=we" onclick=gbar.qs(this) class=gb2>Finance</a> <a href="http://blogsearch.google.com/?hl=en&tab=wb" onclick=gbar.qs(this) class=gb2>Blogs</a> <div class=gb2><div class=gbd></div></div> <a href="http://www.youtube.com/?hl=en&tab=w1" onclick=gbar.qs(this) class=gb2>YouTube</a> <a href="http://www.google.com/calendar/render?hl=en&tab=wc" class=gb2>Calendar</a> <a href="http://picasaweb.google.com/home?hl=en&tab=wq" onclick=gbar.qs(this) class=gb2>Photos</a> <a href="http://docs.google.com/?hl=en&tab=wo" class=gb2>Documents</a> <a href="http://www.google.com/reader/view/?hl=en&tab=wy" class=gb2>Reader</a> <a href="http://sites.google.com/?hl=en&tab=w3" class=gb2>Sites</a> <div class=gb2><div class=gbd></div></div> <a href="http://www.google.com/intl/en/options/" class=gb2>even more »</a></div> </nobr></div><div class=gbh style=left:0></div><div class=gbh style=right:0></div><div align=right id=guser style="font-size:84%;padding:0 0 4px" width=100%><nobr><a href="/url?sa=p&pref=ig&pval=3&q=http://www.google.com/ig%3Fhl%3Den%26source%3Diglk&usg=AFQjCNFA18XPfgb7dKnXfKz7x7g1GDH1tg">iGoogle</a> | <a href="https://www.google.com/accounts/Login?continue=http://www.google.com/&hl=en">Sign in</a></nobr></div><center><br clear=all id=lgpd><img alt="Google" height=110 src="/intl/en_ALL/images/logo.gif" width=276><br><br><form action="/search" name=f><table cellpadding=0 cellspacing=0><tr valign=top><td width=25%> </td><td align=center nowrap><input name=hl type=hidden value=en><input type=hidden name=ie value="ISO-8859-1"><input autocomplete="off" maxlength=2048 name=q size=55 title="Google Search" value=""><br><input name=btnG type=submit value="Google Search"><input name=btnI type=submit value="I'm Feeling Lucky"></td><td nowrap width=25%><font size=-2> <a href=/advanced_search?hl=en>Advanced Search</a><br> <a href=/preferences?hl=en>Preferences</a><br> <a href=/language_tools?hl=en>Language Tools</a></font></td></tr></table></form><br><br><font size=-1><a href="/intl/en/ads/">Advertising Programs</a> - <a href="/services/">Business Solutions</a> - <a href="/intl/en/about.html">About Google</a></font><p><font size=-2>©2008 - <a href="/intl/en/privacy.html">Privacy</a></font></p></center></body><script>if(google.y)google.y.first=[];window.setTimeout(function(){var xjs=document.createElement('script');xjs.src='/extern_js/f/CgJlbhICdXMgACswCjgMLCswDjgCLCswGDgDLA/8MIofMT_4o8.js';document.getElementsByTagName('head')[0].appendChild(xjs)},0);google.y.first.push(function(){google.ac.i(document.f,document.f.q,'','')})</script></html>
|
@@ -0,0 +1,151 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
|
2
|
+
|
3
|
+
describe "GoGetter" do
|
4
|
+
|
5
|
+
describe "#parse_url" do
|
6
|
+
it "returns a URI instance, given a URL string" do
|
7
|
+
uri = GoGetter.parse_url('http://www.google.com/')
|
8
|
+
uri.scheme.should == 'http'
|
9
|
+
uri.host.should == 'www.google.com'
|
10
|
+
uri.port.should == 80
|
11
|
+
uri.path.should == '/'
|
12
|
+
end
|
13
|
+
|
14
|
+
context "when the URL has no scheme" do
|
15
|
+
it "prepends http" do
|
16
|
+
uri = GoGetter.parse_url('www.google.com')
|
17
|
+
uri.scheme.should == 'http'
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
context "when the URL has no path" do
|
22
|
+
it "appends '/'" do
|
23
|
+
uri = GoGetter.parse_url('www.google.com')
|
24
|
+
uri.path.should == '/'
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
context "when the URL has a path" do
|
29
|
+
it "retains the path" do
|
30
|
+
uri = GoGetter.parse_url('www.google.com/search')
|
31
|
+
uri.path.should == '/search'
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
context "when the URL has a query part" do
|
36
|
+
it "retains the query" do
|
37
|
+
uri = GoGetter.parse_url('www.google.com/search?q=gogetter&hl=en')
|
38
|
+
uri.query.should == 'q=gogetter&hl=en'
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
describe "#handle_redirection" do
|
44
|
+
context "when the max_redirects param is 0" do
|
45
|
+
it "should not send another request" do
|
46
|
+
GoGetter.should_not_receive(:get)
|
47
|
+
response = Net::HTTPRedirection.new('1.1', '302', 'Found')
|
48
|
+
response['Location'] = 'http://www.google.co.il/'
|
49
|
+
GoGetter.handle_redirection(URI.parse('http://www.google.com/'), response, {}, {max_redirects: 0}).should == response
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
context "when the max_redirects param > 0" do
|
54
|
+
it "should send another get request to the new location" do
|
55
|
+
response = Net::HTTPRedirection.new('1.1', '302', 'Found')
|
56
|
+
response['Location'] = 'http://www.google.co.il/'
|
57
|
+
GoGetter.should_receive(:get).with(URI.parse('http://www.google.co.il'), {}, hash_including(:max_redirects=>0))
|
58
|
+
GoGetter.handle_redirection(URI.parse('http://www.google.com/'), response, {}, {max_redirects: 1})
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
context "when the response location is only a path (no host)" do
|
63
|
+
it "uses the host from the original URI" do
|
64
|
+
response = Net::HTTPRedirection.new('1.1', '302', 'Found')
|
65
|
+
response['Location'] = '/new_target'
|
66
|
+
exp_uri = URI.parse('/new_target')
|
67
|
+
exp_uri.host = 'www.google.com'; exp_uri.port = 80; exp_uri.scheme = 'http'
|
68
|
+
GoGetter.should_receive(:get).with(exp_uri, {}, hash_including(:max_redirects=>0))
|
69
|
+
GoGetter.handle_redirection(URI.parse('http://www.google.com/'), response, {}, {max_redirects: 1})
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
context "when the response redirects to a location that had already been got in the recursive chain" do
|
74
|
+
it "should not send another request" do
|
75
|
+
GoGetter.should_not_receive(:get)
|
76
|
+
response = Net::HTTPRedirection.new('1.1', '302', 'Found')
|
77
|
+
response['Location'] = 'http://www.google.co.il/'
|
78
|
+
GoGetter.handle_redirection(
|
79
|
+
URI.parse('http://www.google.com/'),
|
80
|
+
response, {},
|
81
|
+
{:max_redirects=>4,
|
82
|
+
:uris_seen => Set.new([URI.parse('http://www.google.co.il/'), URI.parse('http://www.yahoo.com/')])}
|
83
|
+
).should == response
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
describe "#get" do
|
89
|
+
context "when given a URL" do
|
90
|
+
it "should get it" do
|
91
|
+
url = "http://google.html/"
|
92
|
+
body = file_fixture('google.html')
|
93
|
+
FakeWeb.register_uri(:get, url, :status => ['200', 'OK'], :body => body)
|
94
|
+
response = GoGetter.get url
|
95
|
+
response.should be_a(Net::HTTPOK)
|
96
|
+
response.body.should == body
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
context "when given a URI" do
|
101
|
+
it "should get that too" do
|
102
|
+
url = "http://google.html/"
|
103
|
+
body = file_fixture('google.html')
|
104
|
+
FakeWeb.register_uri(:get, url, :status => ['200', 'OK'], :body => body)
|
105
|
+
response = GoGetter.get URI.parse(url)
|
106
|
+
response.should be_a(Net::HTTPOK)
|
107
|
+
response.body.should == body
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
context "when given basic auth params" do
|
112
|
+
it "should do basic authentication" do
|
113
|
+
url = 'http://example.com/secret'
|
114
|
+
url_auth = 'http://user:pass@example.com/secret'
|
115
|
+
FakeWeb.register_uri(:get, url, :body => "Unauthorized", :status => ["401", "Unauthorized"])
|
116
|
+
FakeWeb.register_uri(:get, url_auth, :status => ['200', 'OK'], :body => "Authorized")
|
117
|
+
GoGetter.get(url).should be_a(Net::HTTPUnauthorized)
|
118
|
+
GoGetter.get(url, {}, {:auth_user => 'user', :auth_pass => 'pass'}).should be_a(Net::HTTPOK)
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
context "when given proxy params" do
|
123
|
+
it "should use a proxy" do
|
124
|
+
url = "http://google.html/"
|
125
|
+
#proxy:
|
126
|
+
host = 'proxy.example.com'; port = '8080'
|
127
|
+
user = 'user'; pass = 'pass'
|
128
|
+
#proxy_class = Net::HTTP::Proxy(proxy_host, proxy_port, proxy_user, proxy_pass)
|
129
|
+
FakeWeb.register_uri(:get, url, :status => ['200', 'OK'], :body => file_fixture('google.html'))
|
130
|
+
Net::HTTP.should_receive(:Proxy).with(host, port, user, pass)
|
131
|
+
# I was unable to mock proxy behavior properly and #get keeps raising errors
|
132
|
+
# However, this code still tests that a proxy class is created, which is the whole point
|
133
|
+
expect {
|
134
|
+
GoGetter.get(url, {}, {proxy_host: host,proxy_port: port,proxy_user: user,proxy_pass: pass})
|
135
|
+
}.to raise_error
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
context "when the response is a redirect" do
|
140
|
+
it "does redirection" do
|
141
|
+
url1 = "http://www.google.com/"
|
142
|
+
url2 = "http://www.google.co.il/"
|
143
|
+
body = file_fixture('google.redirect.html')
|
144
|
+
FakeWeb.register_uri(:get, url1, :status => ['302','Found'], :headers => {'Location'=>url2},:body => body)
|
145
|
+
params = {max_redirects: 1}
|
146
|
+
GoGetter.should_receive(:handle_redirection).with(URI.parse(url1), an_instance_of(Net::HTTPFound), {}, params)
|
147
|
+
GoGetter.get(url1, {}, params)
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
data/spec/rspec.opts
ADDED
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
2
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
3
|
+
require 'rspec'
|
4
|
+
require 'gogetter'
|
5
|
+
require 'fakeweb'
|
6
|
+
|
7
|
+
def file_fixture(filename)
|
8
|
+
open(File.join(File.dirname(__FILE__), 'fixtures', "#{filename}")).read
|
9
|
+
end
|
10
|
+
# Requires supporting files with custom matchers and macros, etc,
|
11
|
+
# in ./support/ and its subdirectories.
|
12
|
+
Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
|
13
|
+
|
14
|
+
RSpec.configure do |config|
|
15
|
+
config.before(:suite) do
|
16
|
+
FakeWeb.allow_net_connect = false
|
17
|
+
end
|
18
|
+
config.after(:suite) do
|
19
|
+
FakeWeb.allow_net_connect = true
|
20
|
+
end
|
21
|
+
end
|
metadata
ADDED
@@ -0,0 +1,124 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: gogetter
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Elad Kehat
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2011-07-25 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rspec
|
16
|
+
requirement: &74419330 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 2.6.0
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *74419330
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: yard
|
27
|
+
requirement: &74418990 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ~>
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 0.7.2
|
33
|
+
type: :development
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *74418990
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: bundler
|
38
|
+
requirement: &74418500 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ~>
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: 1.0.0
|
44
|
+
type: :development
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *74418500
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: jeweler
|
49
|
+
requirement: &74417980 !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 1.6.4
|
55
|
+
type: :development
|
56
|
+
prerelease: false
|
57
|
+
version_requirements: *74417980
|
58
|
+
- !ruby/object:Gem::Dependency
|
59
|
+
name: fakeweb
|
60
|
+
requirement: &74417470 !ruby/object:Gem::Requirement
|
61
|
+
none: false
|
62
|
+
requirements:
|
63
|
+
- - ~>
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: 1.3.0
|
66
|
+
type: :development
|
67
|
+
prerelease: false
|
68
|
+
version_requirements: *74417470
|
69
|
+
description:
|
70
|
+
email: eladkehat@gmail.com
|
71
|
+
executables: []
|
72
|
+
extensions: []
|
73
|
+
extra_rdoc_files:
|
74
|
+
- LICENSE.txt
|
75
|
+
- README.rdoc
|
76
|
+
files:
|
77
|
+
- .document
|
78
|
+
- .rspec
|
79
|
+
- Gemfile
|
80
|
+
- Gemfile.lock
|
81
|
+
- LICENSE.txt
|
82
|
+
- README.rdoc
|
83
|
+
- Rakefile
|
84
|
+
- VERSION
|
85
|
+
- gogetter.gemspec
|
86
|
+
- lib/go_getter.rb
|
87
|
+
- lib/go_getter/go_getter.rb
|
88
|
+
- lib/go_getter/response.rb
|
89
|
+
- lib/go_getter/utils.rb
|
90
|
+
- lib/gogetter.rb
|
91
|
+
- spec/fixtures/google.html
|
92
|
+
- spec/fixtures/google.redirect.html
|
93
|
+
- spec/go_getter/go_getter_spec.rb
|
94
|
+
- spec/rspec.opts
|
95
|
+
- spec/spec_helper.rb
|
96
|
+
homepage: http://github.com/eladkehat/gogetter
|
97
|
+
licenses:
|
98
|
+
- MIT
|
99
|
+
post_install_message:
|
100
|
+
rdoc_options: []
|
101
|
+
require_paths:
|
102
|
+
- lib
|
103
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
104
|
+
none: false
|
105
|
+
requirements:
|
106
|
+
- - ! '>='
|
107
|
+
- !ruby/object:Gem::Version
|
108
|
+
version: '0'
|
109
|
+
segments:
|
110
|
+
- 0
|
111
|
+
hash: 740502813
|
112
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
113
|
+
none: false
|
114
|
+
requirements:
|
115
|
+
- - ! '>='
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
requirements: []
|
119
|
+
rubyforge_project:
|
120
|
+
rubygems_version: 1.7.2
|
121
|
+
signing_key:
|
122
|
+
specification_version: 3
|
123
|
+
summary: Go get something over HTTP
|
124
|
+
test_files: []
|