gogetter 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
data/Gemfile ADDED
@@ -0,0 +1,9 @@
1
+ source "http://rubygems.org"
2
+
3
+ group :development, :test do
4
+ gem "rspec", "~> 2.6.0"
5
+ gem "yard", "~> 0.7.2"
6
+ gem "bundler", "~> 1.0.0"
7
+ gem "jeweler", "~> 1.6.4"
8
+ gem "fakeweb", "~> 1.3.0"
9
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,30 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ diff-lcs (1.1.2)
5
+ fakeweb (1.3.0)
6
+ git (1.2.5)
7
+ jeweler (1.6.4)
8
+ bundler (~> 1.0)
9
+ git (>= 1.2.5)
10
+ rake
11
+ rake (0.9.2)
12
+ rspec (2.6.0)
13
+ rspec-core (~> 2.6.0)
14
+ rspec-expectations (~> 2.6.0)
15
+ rspec-mocks (~> 2.6.0)
16
+ rspec-core (2.6.4)
17
+ rspec-expectations (2.6.0)
18
+ diff-lcs (~> 1.1.2)
19
+ rspec-mocks (2.6.0)
20
+ yard (0.7.2)
21
+
22
+ PLATFORMS
23
+ ruby
24
+
25
+ DEPENDENCIES
26
+ bundler (~> 1.0.0)
27
+ fakeweb (~> 1.3.0)
28
+ jeweler (~> 1.6.4)
29
+ rspec (~> 2.6.0)
30
+ yard (~> 0.7.2)
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011 Elad Kehat
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,28 @@
1
+ = GoGetter
2
+
3
+ Easily send get requests, with a little more sophistication than Net::HTTP.get
4
+
5
+ Unlike HTTParty, which is a great gem if your class works against a specific website, GoGetter fits the use case where
6
+ you need to send a bunch of HTTP GETs to several domains, and don't want to wrap each one in its own class.
7
+
8
+ Think of it as an alternative to open-uri that doesn't create any temporary files.
9
+
10
+ It handles proxies, basic authentication, and HTTP redirects.
11
+
12
+ Before releasing this code I used it extensively in a proprietary web crawler that sent around a billion GET requests
13
+ so far, so you could say that it's quite robust :)
14
+
15
+ == Contributing to gogetter
16
+
17
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
18
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
19
+ * Fork the project
20
+ * Start a feature/bugfix branch
21
+ * Commit and push until you are happy with your contribution
22
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
23
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise
24
+ necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
25
+
26
+ == Copyright
27
+
28
+ Copyright (c) 2011 Elad Kehat. See LICENSE.txt for further details.
data/Rakefile ADDED
@@ -0,0 +1,35 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ gem.name = "gogetter"
17
+ gem.homepage = "http://github.com/eladkehat/gogetter"
18
+ gem.license = "MIT"
19
+ gem.summary = %Q{Go get something over HTTP}
20
+ gem.email = "eladkehat@gmail.com"
21
+ gem.authors = ["Elad Kehat"]
22
+ end
23
+ Jeweler::RubygemsDotOrgTasks.new
24
+
25
+ require 'rspec/core'
26
+ require 'rspec/core/rake_task'
27
+ RSpec::Core::RakeTask.new(:spec) do |spec|
28
+ spec.pattern = FileList['spec/**/*_spec.rb']
29
+ spec.rspec_opts = ['--options', 'spec/rspec.opts']
30
+ end
31
+
32
+ task :default => :spec
33
+
34
+ require 'yard'
35
+ YARD::Rake::YardocTask.new
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
data/gogetter.gemspec ADDED
@@ -0,0 +1,69 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{gogetter}
8
+ s.version = "0.1.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Elad Kehat"]
12
+ s.date = %q{2011-07-25}
13
+ s.email = %q{eladkehat@gmail.com}
14
+ s.extra_rdoc_files = [
15
+ "LICENSE.txt",
16
+ "README.rdoc"
17
+ ]
18
+ s.files = [
19
+ ".document",
20
+ ".rspec",
21
+ "Gemfile",
22
+ "Gemfile.lock",
23
+ "LICENSE.txt",
24
+ "README.rdoc",
25
+ "Rakefile",
26
+ "VERSION",
27
+ "gogetter.gemspec",
28
+ "lib/go_getter.rb",
29
+ "lib/go_getter/go_getter.rb",
30
+ "lib/go_getter/response.rb",
31
+ "lib/go_getter/utils.rb",
32
+ "lib/gogetter.rb",
33
+ "spec/fixtures/google.html",
34
+ "spec/fixtures/google.redirect.html",
35
+ "spec/go_getter/go_getter_spec.rb",
36
+ "spec/rspec.opts",
37
+ "spec/spec_helper.rb"
38
+ ]
39
+ s.homepage = %q{http://github.com/eladkehat/gogetter}
40
+ s.licenses = ["MIT"]
41
+ s.require_paths = ["lib"]
42
+ s.rubygems_version = %q{1.7.2}
43
+ s.summary = %q{Go get something over HTTP}
44
+
45
+ if s.respond_to? :specification_version then
46
+ s.specification_version = 3
47
+
48
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
49
+ s.add_development_dependency(%q<rspec>, ["~> 2.6.0"])
50
+ s.add_development_dependency(%q<yard>, ["~> 0.7.2"])
51
+ s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
52
+ s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
53
+ s.add_development_dependency(%q<fakeweb>, ["~> 1.3.0"])
54
+ else
55
+ s.add_dependency(%q<rspec>, ["~> 2.6.0"])
56
+ s.add_dependency(%q<yard>, ["~> 0.7.2"])
57
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
58
+ s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
59
+ s.add_dependency(%q<fakeweb>, ["~> 1.3.0"])
60
+ end
61
+ else
62
+ s.add_dependency(%q<rspec>, ["~> 2.6.0"])
63
+ s.add_dependency(%q<yard>, ["~> 0.7.2"])
64
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
65
+ s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
66
+ s.add_dependency(%q<fakeweb>, ["~> 1.3.0"])
67
+ end
68
+ end
69
+
@@ -0,0 +1,76 @@
1
+ # Monkey-patch Net::HTTPResponse
2
+ # Add a final_url attribute, which we use in handling HTTP redirections to determine
3
+ # the ultimate URI that the response was retrieve from
4
+ class Net::HTTPResponse
5
+ attr_accessor :final_uri
6
+ end
7
+
8
+
9
+ module GoGetter
10
+
11
+ def GoGetter.get(uri, http_headers = {}, params = {})
12
+ uri = parse_url(uri.to_s) unless uri.is_a? URI
13
+ path = uri.path
14
+ path << "?#{uri.query}" if uri.query
15
+ request = Net::HTTP::Get.new(path)
16
+ http_headers.each {|key, value| request.add_field key, value }
17
+
18
+ # basic authentication
19
+ request.basic_auth(params[:auth_user], params[:auth_pass]) if params[:auth_user] and params[:auth_pass]
20
+
21
+ # proxy
22
+ klass = (params[:proxy_host] and params[:proxy_port]) ?
23
+ Net::HTTP::Proxy(params[:proxy_host], params[:proxy_port], params[:proxy_user], params[:proxy_pass]) : Net::HTTP
24
+
25
+ response = klass.start(uri.host, uri.port) do |http|
26
+ http.read_timeout = params.fetch(:read_timeout, 600)
27
+ http.request(request)
28
+ end
29
+
30
+ if response.is_a?(Net::HTTPRedirection) # Redirect
31
+ # allow for a single redirection by default
32
+ params[:max_redirects] = 1 unless params.has_key?(:max_redirects)
33
+ response = handle_redirection(uri, response, http_headers, params)
34
+ else
35
+ response.final_uri = uri
36
+ end
37
+
38
+ return response
39
+ end
40
+
41
+ # Given a URL, which may not be formatted properly, parse a URI
42
+ def GoGetter.parse_url(url)
43
+ unless (url =~ %r{^https?://}mi) == 0
44
+ url = "http://#{url}"
45
+ end
46
+ uri = URI.parse url
47
+ if uri.path.length == 0 and uri.query.nil?
48
+ uri.path = "/"
49
+ end
50
+ uri
51
+ end
52
+
53
+ def GoGetter.handle_redirection(from_uri, response, http_headers, params)
54
+ if params.fetch(:max_redirects, 0) > 0
55
+ params[:uris_seen] = Set.new unless params[:uris_seen]
56
+ if params[:uris_seen].size < params.fetch(:max_redirects, 0) && response['Location']
57
+ params[:uris_seen] << from_uri
58
+ new_uri = URI.parse(response['Location'])
59
+ # new uri may be just the path, w/o host and port; if so, copy from old
60
+ unless new_uri.host
61
+ new_uri.host = from_uri.host
62
+ new_uri.port = from_uri.port
63
+ end
64
+ new_uri.scheme = from_uri.scheme unless new_uri.scheme
65
+ # avoid infinite redirect loops
66
+ unless params[:uris_seen].member? new_uri
67
+ # request the new location just as we did the old one.
68
+ params[:max_redirects] -= 1
69
+ response = GoGetter.get(new_uri, http_headers, params)
70
+ end
71
+ end
72
+ end
73
+ response
74
+ end
75
+
76
+ end
@@ -0,0 +1,16 @@
1
+ # Monkey-patch for Net::HTTPResponse
2
+ # This file isn't required by the gem by default, so require it in your code
3
+ class Net::HTTPResponse
4
+
5
+ alias :body_asis :body
6
+ # New version of #body unzips a gzipped body before returning it
7
+ # Call GoGetter.get with the following in http_headers: "Accept-Encoding" => "gzip")
8
+ def body
9
+ if key?("Content-Encoding") and fetch("Content-Encoding") == "gzip"
10
+ body_io = StringIO.new(body_asis)
11
+ Zlib::GzipReader.new(body_io).read
12
+ else
13
+ body_asis
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,17 @@
1
+ module GoGetter
2
+
3
+ # Some user agents for use with websites that change their behavior according to your browser
4
+ # Set by adding to http_headers: "User-Agent" => USER_AGENTS[:chrome10_linux]
5
+ # Use http://www.useragentstring.com/pages/useragentstring.php to find more user agent strings
6
+ USER_AGENTS = {
7
+ :chrome10_win => "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.638.0 Safari/534.16",
8
+ :chrome10_linux => "Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Ubuntu/10.10 Chromium/10.0.648.0 Chrome/10.0.648.0 Safari/534.16",
9
+ :firefox36_win => "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8 ( .NET CLR 3.5.30729; .NET4.0C)",
10
+ :firefox36_linux => "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.8) Gecko/20100804 Gentoo Firefox/3.6.8",
11
+ :ie8 => "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; Media Center PC 6.0; InfoPath.2; MS-RTC LM 8)",
12
+ :ie7 => "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; SLCC2; .NET CLR 2.0.50727; InfoPath.3; .NET4.0C; .NET4.0E; .NET CLR 3.5.30729; .NET CLR 3.0.30729; MS-RTC LM 8)",
13
+ :opera11_win => "Opera/9.80 (Windows NT 6.0; U; en) Presto/2.7.39 Version/11.00",
14
+ :safari5_mac => "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_3; en-us) AppleWebKit/534.1+ (KHTML, like Gecko) Version/5.0 Safari/533.16",
15
+ }
16
+
17
+ end
data/lib/go_getter.rb ADDED
@@ -0,0 +1,3 @@
1
+ require 'net/http'
2
+ require 'set'
3
+ require 'go_getter/go_getter'
data/lib/gogetter.rb ADDED
@@ -0,0 +1,2 @@
1
+ # So you can require "gogetter" instead of "go_getter". Picked this up from fakeweb.
2
+ require 'go_getter'
@@ -0,0 +1,3 @@
1
+ <html><head><meta http-equiv="content-type" content="text/html; charset=ISO-8859-1"><title>Google</title><style>body,td,a,p,.h{font-family:arial,sans-serif}.h{color:#36c;font-size:20px}.q{color:#00c}.ts td{padding:0}.ts{border-collapse:collapse}#gbar{height:22px;padding-left:2px}.gbh,.gbd{border-top:1px solid #c9d7f1;font-size:1px}.gbh{height:0;position:absolute;top:24px;width:100%}#gbi,#gbs{background:#fff;left:0;position:absolute;top:24px;visibility:hidden;z-index:1000}#gbi{border:1px solid;border-color:#c9d7f1 #36c #36c #a2bae7;z-index:1001}#guser{padding-bottom:7px !important}#gbar,#guser{font-size:13px;padding-top:1px !important}@media all{.gb1,.gb3{height:22px;margin-right:.73em;vertical-align:top}#gbar{float:left}}.gb2{display:block;padding:.2em .5em}a.gb1,a.gb2,a.gb3{color:#00c !important}.gb2,.gb3{text-decoration:none}a.gb2:hover{background:#36c;color:#fff !important}</style><script>window.google={kEI:"Zuk6ScOkLKHCMrrttckF",kEXPI:"17259,19124,19314",kHL:"en"};
2
+ google.y={};google.x=function(e,g){google.y[e.id]=[e,g];return false};function sf(){document.f.q.focus()}
3
+ window.gbar={};(function(){var b=window.gbar,f,h;b.qs=function(a){var c=window.encodeURIComponent&&(document.forms[0].q||"").value;if(c)a.href=a.href.replace(/([?&])q=[^&]*|$/,function(i,g){return(g||"&")+"q="+encodeURIComponent(c)})};function j(a,c){a.visibility=h?"hidden":"visible";a.left=c+"px"}b.tg=function(a){a=a||window.event;var c=0,i,g=window.navExtra,d=document.getElementById("gbi"),e=a.target||a.srcElement;a.cancelBubble=true;if(!f){f=document.createElement(Array.every||window.createPopup?"iframe":"div");f.frameBorder="0";f.src="#";d.parentNode.appendChild(f).id="gbs";if(g)for(i in g)d.insertBefore(g[i],d.firstChild).className="gb2";document.onclick=b.close}if(e.className!="gb3")e=e.parentNode;do c+=e.offsetLeft;while(e=e.offsetParent);j(d.style,c);f.style.width=d.offsetWidth+"px";f.style.height=d.offsetHeight+"px";j(f.style,c);h=!h};b.close=function(a){h&&b.tg(a)}})();</script></head><body bgcolor=#ffffff text=#000000 link=#0000cc vlink=#551a8b alink=#ff0000 onload="sf();if(document.images)new Image().src='/images/nav_logo3.png'" topmargin=3 marginheight=3><div id=gbar><nobr><b class=gb1>Web</b> <a href="http://images.google.com/imghp?hl=en&tab=wi" onclick=gbar.qs(this) class=gb1>Images</a> <a href="http://maps.google.com/maps?hl=en&tab=wl" onclick=gbar.qs(this) class=gb1>Maps</a> <a href="http://news.google.com/nwshp?hl=en&tab=wn" onclick=gbar.qs(this) class=gb1>News</a> <a href="http://www.google.com/prdhp?hl=en&tab=wf" onclick=gbar.qs(this) class=gb1>Shopping</a> <a href="http://mail.google.com/mail/?hl=en&tab=wm" class=gb1>Gmail</a> <a href="http://www.google.com/intl/en/options/" onclick="this.blur();gbar.tg(event);return !1" class=gb3><u>more</u> <small>&#9660;</small></a><div id=gbi> <a href="http://video.google.com/?hl=en&tab=wv" onclick=gbar.qs(this) class=gb2>Video</a> <a href="http://groups.google.com/grphp?hl=en&tab=wg" onclick=gbar.qs(this) class=gb2>Groups</a> <a href="http://books.google.com/bkshp?hl=en&tab=wp" onclick=gbar.qs(this) class=gb2>Books</a> <a href="http://scholar.google.com/schhp?hl=en&tab=ws" onclick=gbar.qs(this) class=gb2>Scholar</a> <a href="http://finance.google.com/finance?hl=en&tab=we" onclick=gbar.qs(this) class=gb2>Finance</a> <a href="http://blogsearch.google.com/?hl=en&tab=wb" onclick=gbar.qs(this) class=gb2>Blogs</a> <div class=gb2><div class=gbd></div></div> <a href="http://www.youtube.com/?hl=en&tab=w1" onclick=gbar.qs(this) class=gb2>YouTube</a> <a href="http://www.google.com/calendar/render?hl=en&tab=wc" class=gb2>Calendar</a> <a href="http://picasaweb.google.com/home?hl=en&tab=wq" onclick=gbar.qs(this) class=gb2>Photos</a> <a href="http://docs.google.com/?hl=en&tab=wo" class=gb2>Documents</a> <a href="http://www.google.com/reader/view/?hl=en&tab=wy" class=gb2>Reader</a> <a href="http://sites.google.com/?hl=en&tab=w3" class=gb2>Sites</a> <div class=gb2><div class=gbd></div></div> <a href="http://www.google.com/intl/en/options/" class=gb2>even more &raquo;</a></div> </nobr></div><div class=gbh style=left:0></div><div class=gbh style=right:0></div><div align=right id=guser style="font-size:84%;padding:0 0 4px" width=100%><nobr><a href="/url?sa=p&pref=ig&pval=3&q=http://www.google.com/ig%3Fhl%3Den%26source%3Diglk&usg=AFQjCNFA18XPfgb7dKnXfKz7x7g1GDH1tg">iGoogle</a> | <a href="https://www.google.com/accounts/Login?continue=http://www.google.com/&hl=en">Sign in</a></nobr></div><center><br clear=all id=lgpd><img alt="Google" height=110 src="/intl/en_ALL/images/logo.gif" width=276><br><br><form action="/search" name=f><table cellpadding=0 cellspacing=0><tr valign=top><td width=25%>&nbsp;</td><td align=center nowrap><input name=hl type=hidden value=en><input type=hidden name=ie value="ISO-8859-1"><input autocomplete="off" maxlength=2048 name=q size=55 title="Google Search" value=""><br><input name=btnG type=submit value="Google Search"><input name=btnI type=submit value="I'm Feeling Lucky"></td><td nowrap width=25%><font size=-2>&nbsp;&nbsp;<a href=/advanced_search?hl=en>Advanced Search</a><br>&nbsp;&nbsp;<a href=/preferences?hl=en>Preferences</a><br>&nbsp;&nbsp;<a href=/language_tools?hl=en>Language Tools</a></font></td></tr></table></form><br><br><font size=-1><a href="/intl/en/ads/">Advertising&nbsp;Programs</a> - <a href="/services/">Business Solutions</a> - <a href="/intl/en/about.html">About Google</a></font><p><font size=-2>&copy;2008 - <a href="/intl/en/privacy.html">Privacy</a></font></p></center></body><script>if(google.y)google.y.first=[];window.setTimeout(function(){var xjs=document.createElement('script');xjs.src='/extern_js/f/CgJlbhICdXMgACswCjgMLCswDjgCLCswGDgDLA/8MIofMT_4o8.js';document.getElementsByTagName('head')[0].appendChild(xjs)},0);google.y.first.push(function(){google.ac.i(document.f,document.f.q,'','')})</script></html>
@@ -0,0 +1,6 @@
1
+ <HTML><HEAD><meta http-equiv="content-type" content="text/html;charset=utf-8">
2
+ <TITLE>302 Moved</TITLE></HEAD><BODY>
3
+ <H1>302 Moved</H1>
4
+ The document has moved
5
+ <A HREF="http://www.google.co.il/">here</A>.
6
+ </BODY></HTML>
@@ -0,0 +1,151 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ describe "GoGetter" do
4
+
5
+ describe "#parse_url" do
6
+ it "returns a URI instance, given a URL string" do
7
+ uri = GoGetter.parse_url('http://www.google.com/')
8
+ uri.scheme.should == 'http'
9
+ uri.host.should == 'www.google.com'
10
+ uri.port.should == 80
11
+ uri.path.should == '/'
12
+ end
13
+
14
+ context "when the URL has no scheme" do
15
+ it "prepends http" do
16
+ uri = GoGetter.parse_url('www.google.com')
17
+ uri.scheme.should == 'http'
18
+ end
19
+ end
20
+
21
+ context "when the URL has no path" do
22
+ it "appends '/'" do
23
+ uri = GoGetter.parse_url('www.google.com')
24
+ uri.path.should == '/'
25
+ end
26
+ end
27
+
28
+ context "when the URL has a path" do
29
+ it "retains the path" do
30
+ uri = GoGetter.parse_url('www.google.com/search')
31
+ uri.path.should == '/search'
32
+ end
33
+ end
34
+
35
+ context "when the URL has a query part" do
36
+ it "retains the query" do
37
+ uri = GoGetter.parse_url('www.google.com/search?q=gogetter&hl=en')
38
+ uri.query.should == 'q=gogetter&hl=en'
39
+ end
40
+ end
41
+ end
42
+
43
+ describe "#handle_redirection" do
44
+ context "when the max_redirects param is 0" do
45
+ it "should not send another request" do
46
+ GoGetter.should_not_receive(:get)
47
+ response = Net::HTTPRedirection.new('1.1', '302', 'Found')
48
+ response['Location'] = 'http://www.google.co.il/'
49
+ GoGetter.handle_redirection(URI.parse('http://www.google.com/'), response, {}, {max_redirects: 0}).should == response
50
+ end
51
+ end
52
+
53
+ context "when the max_redirects param > 0" do
54
+ it "should send another get request to the new location" do
55
+ response = Net::HTTPRedirection.new('1.1', '302', 'Found')
56
+ response['Location'] = 'http://www.google.co.il/'
57
+ GoGetter.should_receive(:get).with(URI.parse('http://www.google.co.il'), {}, hash_including(:max_redirects=>0))
58
+ GoGetter.handle_redirection(URI.parse('http://www.google.com/'), response, {}, {max_redirects: 1})
59
+ end
60
+ end
61
+
62
+ context "when the response location is only a path (no host)" do
63
+ it "uses the host from the original URI" do
64
+ response = Net::HTTPRedirection.new('1.1', '302', 'Found')
65
+ response['Location'] = '/new_target'
66
+ exp_uri = URI.parse('/new_target')
67
+ exp_uri.host = 'www.google.com'; exp_uri.port = 80; exp_uri.scheme = 'http'
68
+ GoGetter.should_receive(:get).with(exp_uri, {}, hash_including(:max_redirects=>0))
69
+ GoGetter.handle_redirection(URI.parse('http://www.google.com/'), response, {}, {max_redirects: 1})
70
+ end
71
+ end
72
+
73
+ context "when the response redirects to a location that had already been got in the recursive chain" do
74
+ it "should not send another request" do
75
+ GoGetter.should_not_receive(:get)
76
+ response = Net::HTTPRedirection.new('1.1', '302', 'Found')
77
+ response['Location'] = 'http://www.google.co.il/'
78
+ GoGetter.handle_redirection(
79
+ URI.parse('http://www.google.com/'),
80
+ response, {},
81
+ {:max_redirects=>4,
82
+ :uris_seen => Set.new([URI.parse('http://www.google.co.il/'), URI.parse('http://www.yahoo.com/')])}
83
+ ).should == response
84
+ end
85
+ end
86
+ end
87
+
88
+ describe "#get" do
89
+ context "when given a URL" do
90
+ it "should get it" do
91
+ url = "http://google.html/"
92
+ body = file_fixture('google.html')
93
+ FakeWeb.register_uri(:get, url, :status => ['200', 'OK'], :body => body)
94
+ response = GoGetter.get url
95
+ response.should be_a(Net::HTTPOK)
96
+ response.body.should == body
97
+ end
98
+ end
99
+
100
+ context "when given a URI" do
101
+ it "should get that too" do
102
+ url = "http://google.html/"
103
+ body = file_fixture('google.html')
104
+ FakeWeb.register_uri(:get, url, :status => ['200', 'OK'], :body => body)
105
+ response = GoGetter.get URI.parse(url)
106
+ response.should be_a(Net::HTTPOK)
107
+ response.body.should == body
108
+ end
109
+ end
110
+
111
+ context "when given basic auth params" do
112
+ it "should do basic authentication" do
113
+ url = 'http://example.com/secret'
114
+ url_auth = 'http://user:pass@example.com/secret'
115
+ FakeWeb.register_uri(:get, url, :body => "Unauthorized", :status => ["401", "Unauthorized"])
116
+ FakeWeb.register_uri(:get, url_auth, :status => ['200', 'OK'], :body => "Authorized")
117
+ GoGetter.get(url).should be_a(Net::HTTPUnauthorized)
118
+ GoGetter.get(url, {}, {:auth_user => 'user', :auth_pass => 'pass'}).should be_a(Net::HTTPOK)
119
+ end
120
+ end
121
+
122
+ context "when given proxy params" do
123
+ it "should use a proxy" do
124
+ url = "http://google.html/"
125
+ #proxy:
126
+ host = 'proxy.example.com'; port = '8080'
127
+ user = 'user'; pass = 'pass'
128
+ #proxy_class = Net::HTTP::Proxy(proxy_host, proxy_port, proxy_user, proxy_pass)
129
+ FakeWeb.register_uri(:get, url, :status => ['200', 'OK'], :body => file_fixture('google.html'))
130
+ Net::HTTP.should_receive(:Proxy).with(host, port, user, pass)
131
+ # I was unable to mock proxy behavior properly and #get keeps raising errors
132
+ # However, this code still tests that a proxy class is created, which is the whole point
133
+ expect {
134
+ GoGetter.get(url, {}, {proxy_host: host,proxy_port: port,proxy_user: user,proxy_pass: pass})
135
+ }.to raise_error
136
+ end
137
+ end
138
+
139
+ context "when the response is a redirect" do
140
+ it "does redirection" do
141
+ url1 = "http://www.google.com/"
142
+ url2 = "http://www.google.co.il/"
143
+ body = file_fixture('google.redirect.html')
144
+ FakeWeb.register_uri(:get, url1, :status => ['302','Found'], :headers => {'Location'=>url2},:body => body)
145
+ params = {max_redirects: 1}
146
+ GoGetter.should_receive(:handle_redirection).with(URI.parse(url1), an_instance_of(Net::HTTPFound), {}, params)
147
+ GoGetter.get(url1, {}, params)
148
+ end
149
+ end
150
+ end
151
+ end
data/spec/rspec.opts ADDED
@@ -0,0 +1,3 @@
1
+ --colour
2
+ --format documentation
3
+ --backtrace
@@ -0,0 +1,21 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
3
+ require 'rspec'
4
+ require 'gogetter'
5
+ require 'fakeweb'
6
+
7
+ def file_fixture(filename)
8
+ open(File.join(File.dirname(__FILE__), 'fixtures', "#{filename}")).read
9
+ end
10
+ # Requires supporting files with custom matchers and macros, etc,
11
+ # in ./support/ and its subdirectories.
12
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
13
+
14
+ RSpec.configure do |config|
15
+ config.before(:suite) do
16
+ FakeWeb.allow_net_connect = false
17
+ end
18
+ config.after(:suite) do
19
+ FakeWeb.allow_net_connect = true
20
+ end
21
+ end
metadata ADDED
@@ -0,0 +1,124 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: gogetter
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Elad Kehat
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-07-25 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: &74419330 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 2.6.0
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: *74419330
25
+ - !ruby/object:Gem::Dependency
26
+ name: yard
27
+ requirement: &74418990 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ~>
31
+ - !ruby/object:Gem::Version
32
+ version: 0.7.2
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: *74418990
36
+ - !ruby/object:Gem::Dependency
37
+ name: bundler
38
+ requirement: &74418500 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ~>
42
+ - !ruby/object:Gem::Version
43
+ version: 1.0.0
44
+ type: :development
45
+ prerelease: false
46
+ version_requirements: *74418500
47
+ - !ruby/object:Gem::Dependency
48
+ name: jeweler
49
+ requirement: &74417980 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: 1.6.4
55
+ type: :development
56
+ prerelease: false
57
+ version_requirements: *74417980
58
+ - !ruby/object:Gem::Dependency
59
+ name: fakeweb
60
+ requirement: &74417470 !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ~>
64
+ - !ruby/object:Gem::Version
65
+ version: 1.3.0
66
+ type: :development
67
+ prerelease: false
68
+ version_requirements: *74417470
69
+ description:
70
+ email: eladkehat@gmail.com
71
+ executables: []
72
+ extensions: []
73
+ extra_rdoc_files:
74
+ - LICENSE.txt
75
+ - README.rdoc
76
+ files:
77
+ - .document
78
+ - .rspec
79
+ - Gemfile
80
+ - Gemfile.lock
81
+ - LICENSE.txt
82
+ - README.rdoc
83
+ - Rakefile
84
+ - VERSION
85
+ - gogetter.gemspec
86
+ - lib/go_getter.rb
87
+ - lib/go_getter/go_getter.rb
88
+ - lib/go_getter/response.rb
89
+ - lib/go_getter/utils.rb
90
+ - lib/gogetter.rb
91
+ - spec/fixtures/google.html
92
+ - spec/fixtures/google.redirect.html
93
+ - spec/go_getter/go_getter_spec.rb
94
+ - spec/rspec.opts
95
+ - spec/spec_helper.rb
96
+ homepage: http://github.com/eladkehat/gogetter
97
+ licenses:
98
+ - MIT
99
+ post_install_message:
100
+ rdoc_options: []
101
+ require_paths:
102
+ - lib
103
+ required_ruby_version: !ruby/object:Gem::Requirement
104
+ none: false
105
+ requirements:
106
+ - - ! '>='
107
+ - !ruby/object:Gem::Version
108
+ version: '0'
109
+ segments:
110
+ - 0
111
+ hash: 740502813
112
+ required_rubygems_version: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ requirements: []
119
+ rubyforge_project:
120
+ rubygems_version: 1.7.2
121
+ signing_key:
122
+ specification_version: 3
123
+ summary: Go get something over HTTP
124
+ test_files: []