gogetter 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
data/Gemfile ADDED
@@ -0,0 +1,9 @@
1
+ source "http://rubygems.org"
2
+
3
+ group :development, :test do
4
+ gem "rspec", "~> 2.6.0"
5
+ gem "yard", "~> 0.7.2"
6
+ gem "bundler", "~> 1.0.0"
7
+ gem "jeweler", "~> 1.6.4"
8
+ gem "fakeweb", "~> 1.3.0"
9
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,30 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ diff-lcs (1.1.2)
5
+ fakeweb (1.3.0)
6
+ git (1.2.5)
7
+ jeweler (1.6.4)
8
+ bundler (~> 1.0)
9
+ git (>= 1.2.5)
10
+ rake
11
+ rake (0.9.2)
12
+ rspec (2.6.0)
13
+ rspec-core (~> 2.6.0)
14
+ rspec-expectations (~> 2.6.0)
15
+ rspec-mocks (~> 2.6.0)
16
+ rspec-core (2.6.4)
17
+ rspec-expectations (2.6.0)
18
+ diff-lcs (~> 1.1.2)
19
+ rspec-mocks (2.6.0)
20
+ yard (0.7.2)
21
+
22
+ PLATFORMS
23
+ ruby
24
+
25
+ DEPENDENCIES
26
+ bundler (~> 1.0.0)
27
+ fakeweb (~> 1.3.0)
28
+ jeweler (~> 1.6.4)
29
+ rspec (~> 2.6.0)
30
+ yard (~> 0.7.2)
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011 Elad Kehat
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,28 @@
1
+ = GoGetter
2
+
3
+ Easily send get requests, with a little more sophistication than Net::HTTP.get
4
+
5
+ Unlike HTTParty, which is a great gem if your class works against a specific website, GoGetter fits the use case where
6
+ you need to send a bunch of HTTP GETs to several domains, and don't want to wrap each one in its own class.
7
+
8
+ Think of it as an alternative to open-uri that doesn't create any temporary files.
9
+
10
+ It handles proxies, basic authentication, and HTTP redirects.
11
+
12
+ Before releasing this code I used it extensively in a proprietary web crawler that sent around a billion GET requests
13
+ so far, so you could say that it's quite robust :)
14
+
15
+ == Contributing to gogetter
16
+
17
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
18
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
19
+ * Fork the project
20
+ * Start a feature/bugfix branch
21
+ * Commit and push until you are happy with your contribution
22
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
23
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise
24
+ necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
25
+
26
+ == Copyright
27
+
28
+ Copyright (c) 2011 Elad Kehat. See LICENSE.txt for further details.
data/Rakefile ADDED
@@ -0,0 +1,35 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ gem.name = "gogetter"
17
+ gem.homepage = "http://github.com/eladkehat/gogetter"
18
+ gem.license = "MIT"
19
+ gem.summary = %Q{Go get something over HTTP}
20
+ gem.email = "eladkehat@gmail.com"
21
+ gem.authors = ["Elad Kehat"]
22
+ end
23
+ Jeweler::RubygemsDotOrgTasks.new
24
+
25
+ require 'rspec/core'
26
+ require 'rspec/core/rake_task'
27
+ RSpec::Core::RakeTask.new(:spec) do |spec|
28
+ spec.pattern = FileList['spec/**/*_spec.rb']
29
+ spec.rspec_opts = ['--options', 'spec/rspec.opts']
30
+ end
31
+
32
+ task :default => :spec
33
+
34
+ require 'yard'
35
+ YARD::Rake::YardocTask.new
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
data/gogetter.gemspec ADDED
@@ -0,0 +1,69 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{gogetter}
8
+ s.version = "0.1.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Elad Kehat"]
12
+ s.date = %q{2011-07-25}
13
+ s.email = %q{eladkehat@gmail.com}
14
+ s.extra_rdoc_files = [
15
+ "LICENSE.txt",
16
+ "README.rdoc"
17
+ ]
18
+ s.files = [
19
+ ".document",
20
+ ".rspec",
21
+ "Gemfile",
22
+ "Gemfile.lock",
23
+ "LICENSE.txt",
24
+ "README.rdoc",
25
+ "Rakefile",
26
+ "VERSION",
27
+ "gogetter.gemspec",
28
+ "lib/go_getter.rb",
29
+ "lib/go_getter/go_getter.rb",
30
+ "lib/go_getter/response.rb",
31
+ "lib/go_getter/utils.rb",
32
+ "lib/gogetter.rb",
33
+ "spec/fixtures/google.html",
34
+ "spec/fixtures/google.redirect.html",
35
+ "spec/go_getter/go_getter_spec.rb",
36
+ "spec/rspec.opts",
37
+ "spec/spec_helper.rb"
38
+ ]
39
+ s.homepage = %q{http://github.com/eladkehat/gogetter}
40
+ s.licenses = ["MIT"]
41
+ s.require_paths = ["lib"]
42
+ s.rubygems_version = %q{1.7.2}
43
+ s.summary = %q{Go get something over HTTP}
44
+
45
+ if s.respond_to? :specification_version then
46
+ s.specification_version = 3
47
+
48
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
49
+ s.add_development_dependency(%q<rspec>, ["~> 2.6.0"])
50
+ s.add_development_dependency(%q<yard>, ["~> 0.7.2"])
51
+ s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
52
+ s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
53
+ s.add_development_dependency(%q<fakeweb>, ["~> 1.3.0"])
54
+ else
55
+ s.add_dependency(%q<rspec>, ["~> 2.6.0"])
56
+ s.add_dependency(%q<yard>, ["~> 0.7.2"])
57
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
58
+ s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
59
+ s.add_dependency(%q<fakeweb>, ["~> 1.3.0"])
60
+ end
61
+ else
62
+ s.add_dependency(%q<rspec>, ["~> 2.6.0"])
63
+ s.add_dependency(%q<yard>, ["~> 0.7.2"])
64
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
65
+ s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
66
+ s.add_dependency(%q<fakeweb>, ["~> 1.3.0"])
67
+ end
68
+ end
69
+
@@ -0,0 +1,76 @@
1
+ # Monkey-patch Net::HTTPResponse
2
+ # Add a final_url attribute, which we use in handling HTTP redirections to determine
3
+ # the ultimate URI that the response was retrieve from
4
+ class Net::HTTPResponse
5
+ attr_accessor :final_uri
6
+ end
7
+
8
+
9
+ module GoGetter
10
+
11
+ def GoGetter.get(uri, http_headers = {}, params = {})
12
+ uri = parse_url(uri.to_s) unless uri.is_a? URI
13
+ path = uri.path
14
+ path << "?#{uri.query}" if uri.query
15
+ request = Net::HTTP::Get.new(path)
16
+ http_headers.each {|key, value| request.add_field key, value }
17
+
18
+ # basic authentication
19
+ request.basic_auth(params[:auth_user], params[:auth_pass]) if params[:auth_user] and params[:auth_pass]
20
+
21
+ # proxy
22
+ klass = (params[:proxy_host] and params[:proxy_port]) ?
23
+ Net::HTTP::Proxy(params[:proxy_host], params[:proxy_port], params[:proxy_user], params[:proxy_pass]) : Net::HTTP
24
+
25
+ response = klass.start(uri.host, uri.port) do |http|
26
+ http.read_timeout = params.fetch(:read_timeout, 600)
27
+ http.request(request)
28
+ end
29
+
30
+ if response.is_a?(Net::HTTPRedirection) # Redirect
31
+ # allow for a single redirection by default
32
+ params[:max_redirects] = 1 unless params.has_key?(:max_redirects)
33
+ response = handle_redirection(uri, response, http_headers, params)
34
+ else
35
+ response.final_uri = uri
36
+ end
37
+
38
+ return response
39
+ end
40
+
41
+ # Given a URL, which may not be formatted properly, parse a URI
42
+ def GoGetter.parse_url(url)
43
+ unless (url =~ %r{^https?://}mi) == 0
44
+ url = "http://#{url}"
45
+ end
46
+ uri = URI.parse url
47
+ if uri.path.length == 0 and uri.query.nil?
48
+ uri.path = "/"
49
+ end
50
+ uri
51
+ end
52
+
53
+ def GoGetter.handle_redirection(from_uri, response, http_headers, params)
54
+ if params.fetch(:max_redirects, 0) > 0
55
+ params[:uris_seen] = Set.new unless params[:uris_seen]
56
+ if params[:uris_seen].size < params.fetch(:max_redirects, 0) && response['Location']
57
+ params[:uris_seen] << from_uri
58
+ new_uri = URI.parse(response['Location'])
59
+ # new uri may be just the path, w/o host and port; if so, copy from old
60
+ unless new_uri.host
61
+ new_uri.host = from_uri.host
62
+ new_uri.port = from_uri.port
63
+ end
64
+ new_uri.scheme = from_uri.scheme unless new_uri.scheme
65
+ # avoid infinite redirect loops
66
+ unless params[:uris_seen].member? new_uri
67
+ # request the new location just as we did the old one.
68
+ params[:max_redirects] -= 1
69
+ response = GoGetter.get(new_uri, http_headers, params)
70
+ end
71
+ end
72
+ end
73
+ response
74
+ end
75
+
76
+ end
@@ -0,0 +1,16 @@
1
+ # Monkey-patch for Net::HTTPResponse
2
+ # This file isn't required by the gem by default, so require it in your code
3
+ class Net::HTTPResponse
4
+
5
+ alias :body_asis :body
6
+ # New version of #body unzips a gzipped body before returning it
7
+ # Call GoGetter.get with the following in http_headers: "Accept-Encoding" => "gzip")
8
+ def body
9
+ if key?("Content-Encoding") and fetch("Content-Encoding") == "gzip"
10
+ body_io = StringIO.new(body_asis)
11
+ Zlib::GzipReader.new(body_io).read
12
+ else
13
+ body_asis
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,17 @@
1
+ module GoGetter
2
+
3
+ # Some user agents for use with websites that change their behavior according to your browser
4
+ # Set by adding to http_headers: "User-Agent" => USER_AGENTS[:chrome10_linux]
5
+ # Use http://www.useragentstring.com/pages/useragentstring.php to find more user agent strings
6
+ USER_AGENTS = {
7
+ :chrome10_win => "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.638.0 Safari/534.16",
8
+ :chrome10_linux => "Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Ubuntu/10.10 Chromium/10.0.648.0 Chrome/10.0.648.0 Safari/534.16",
9
+ :firefox36_win => "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8 ( .NET CLR 3.5.30729; .NET4.0C)",
10
+ :firefox36_linux => "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.8) Gecko/20100804 Gentoo Firefox/3.6.8",
11
+ :ie8 => "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; Media Center PC 6.0; InfoPath.2; MS-RTC LM 8)",
12
+ :ie7 => "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; SLCC2; .NET CLR 2.0.50727; InfoPath.3; .NET4.0C; .NET4.0E; .NET CLR 3.5.30729; .NET CLR 3.0.30729; MS-RTC LM 8)",
13
+ :opera11_win => "Opera/9.80 (Windows NT 6.0; U; en) Presto/2.7.39 Version/11.00",
14
+ :safari5_mac => "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_3; en-us) AppleWebKit/534.1+ (KHTML, like Gecko) Version/5.0 Safari/533.16",
15
+ }
16
+
17
+ end
data/lib/go_getter.rb ADDED
@@ -0,0 +1,3 @@
1
+ require 'net/http'
2
+ require 'set'
3
+ require 'go_getter/go_getter'
data/lib/gogetter.rb ADDED
@@ -0,0 +1,2 @@
1
+ # So you can require "gogetter" instead of "go_getter". Picked this up from fakeweb.
2
+ require 'go_getter'
@@ -0,0 +1,3 @@
1
+ <html><head><meta http-equiv="content-type" content="text/html; charset=ISO-8859-1"><title>Google</title><style>body,td,a,p,.h{font-family:arial,sans-serif}.h{color:#36c;font-size:20px}.q{color:#00c}.ts td{padding:0}.ts{border-collapse:collapse}#gbar{height:22px;padding-left:2px}.gbh,.gbd{border-top:1px solid #c9d7f1;font-size:1px}.gbh{height:0;position:absolute;top:24px;width:100%}#gbi,#gbs{background:#fff;left:0;position:absolute;top:24px;visibility:hidden;z-index:1000}#gbi{border:1px solid;border-color:#c9d7f1 #36c #36c #a2bae7;z-index:1001}#guser{padding-bottom:7px !important}#gbar,#guser{font-size:13px;padding-top:1px !important}@media all{.gb1,.gb3{height:22px;margin-right:.73em;vertical-align:top}#gbar{float:left}}.gb2{display:block;padding:.2em .5em}a.gb1,a.gb2,a.gb3{color:#00c !important}.gb2,.gb3{text-decoration:none}a.gb2:hover{background:#36c;color:#fff !important}</style><script>window.google={kEI:"Zuk6ScOkLKHCMrrttckF",kEXPI:"17259,19124,19314",kHL:"en"};
2
+ google.y={};google.x=function(e,g){google.y[e.id]=[e,g];return false};function sf(){document.f.q.focus()}
3
+ window.gbar={};(function(){var b=window.gbar,f,h;b.qs=function(a){var c=window.encodeURIComponent&&(document.forms[0].q||"").value;if(c)a.href=a.href.replace(/([?&])q=[^&]*|$/,function(i,g){return(g||"&")+"q="+encodeURIComponent(c)})};function j(a,c){a.visibility=h?"hidden":"visible";a.left=c+"px"}b.tg=function(a){a=a||window.event;var c=0,i,g=window.navExtra,d=document.getElementById("gbi"),e=a.target||a.srcElement;a.cancelBubble=true;if(!f){f=document.createElement(Array.every||window.createPopup?"iframe":"div");f.frameBorder="0";f.src="#";d.parentNode.appendChild(f).id="gbs";if(g)for(i in g)d.insertBefore(g[i],d.firstChild).className="gb2";document.onclick=b.close}if(e.className!="gb3")e=e.parentNode;do c+=e.offsetLeft;while(e=e.offsetParent);j(d.style,c);f.style.width=d.offsetWidth+"px";f.style.height=d.offsetHeight+"px";j(f.style,c);h=!h};b.close=function(a){h&&b.tg(a)}})();</script></head><body bgcolor=#ffffff text=#000000 link=#0000cc vlink=#551a8b alink=#ff0000 onload="sf();if(document.images)new Image().src='/images/nav_logo3.png'" topmargin=3 marginheight=3><div id=gbar><nobr><b class=gb1>Web</b> <a href="http://images.google.com/imghp?hl=en&tab=wi" onclick=gbar.qs(this) class=gb1>Images</a> <a href="http://maps.google.com/maps?hl=en&tab=wl" onclick=gbar.qs(this) class=gb1>Maps</a> <a href="http://news.google.com/nwshp?hl=en&tab=wn" onclick=gbar.qs(this) class=gb1>News</a> <a href="http://www.google.com/prdhp?hl=en&tab=wf" onclick=gbar.qs(this) class=gb1>Shopping</a> <a href="http://mail.google.com/mail/?hl=en&tab=wm" class=gb1>Gmail</a> <a href="http://www.google.com/intl/en/options/" onclick="this.blur();gbar.tg(event);return !1" class=gb3><u>more</u> <small>&#9660;</small></a><div id=gbi> <a href="http://video.google.com/?hl=en&tab=wv" onclick=gbar.qs(this) class=gb2>Video</a> <a href="http://groups.google.com/grphp?hl=en&tab=wg" onclick=gbar.qs(this) class=gb2>Groups</a> <a href="http://books.google.com/bkshp?hl=en&tab=wp" onclick=gbar.qs(this) class=gb2>Books</a> <a href="http://scholar.google.com/schhp?hl=en&tab=ws" onclick=gbar.qs(this) class=gb2>Scholar</a> <a href="http://finance.google.com/finance?hl=en&tab=we" onclick=gbar.qs(this) class=gb2>Finance</a> <a href="http://blogsearch.google.com/?hl=en&tab=wb" onclick=gbar.qs(this) class=gb2>Blogs</a> <div class=gb2><div class=gbd></div></div> <a href="http://www.youtube.com/?hl=en&tab=w1" onclick=gbar.qs(this) class=gb2>YouTube</a> <a href="http://www.google.com/calendar/render?hl=en&tab=wc" class=gb2>Calendar</a> <a href="http://picasaweb.google.com/home?hl=en&tab=wq" onclick=gbar.qs(this) class=gb2>Photos</a> <a href="http://docs.google.com/?hl=en&tab=wo" class=gb2>Documents</a> <a href="http://www.google.com/reader/view/?hl=en&tab=wy" class=gb2>Reader</a> <a href="http://sites.google.com/?hl=en&tab=w3" class=gb2>Sites</a> <div class=gb2><div class=gbd></div></div> <a href="http://www.google.com/intl/en/options/" class=gb2>even more &raquo;</a></div> </nobr></div><div class=gbh style=left:0></div><div class=gbh style=right:0></div><div align=right id=guser style="font-size:84%;padding:0 0 4px" width=100%><nobr><a href="/url?sa=p&pref=ig&pval=3&q=http://www.google.com/ig%3Fhl%3Den%26source%3Diglk&usg=AFQjCNFA18XPfgb7dKnXfKz7x7g1GDH1tg">iGoogle</a> | <a href="https://www.google.com/accounts/Login?continue=http://www.google.com/&hl=en">Sign in</a></nobr></div><center><br clear=all id=lgpd><img alt="Google" height=110 src="/intl/en_ALL/images/logo.gif" width=276><br><br><form action="/search" name=f><table cellpadding=0 cellspacing=0><tr valign=top><td width=25%>&nbsp;</td><td align=center nowrap><input name=hl type=hidden value=en><input type=hidden name=ie value="ISO-8859-1"><input autocomplete="off" maxlength=2048 name=q size=55 title="Google Search" value=""><br><input name=btnG type=submit value="Google Search"><input name=btnI type=submit value="I'm Feeling Lucky"></td><td nowrap width=25%><font size=-2>&nbsp;&nbsp;<a href=/advanced_search?hl=en>Advanced Search</a><br>&nbsp;&nbsp;<a href=/preferences?hl=en>Preferences</a><br>&nbsp;&nbsp;<a href=/language_tools?hl=en>Language Tools</a></font></td></tr></table></form><br><br><font size=-1><a href="/intl/en/ads/">Advertising&nbsp;Programs</a> - <a href="/services/">Business Solutions</a> - <a href="/intl/en/about.html">About Google</a></font><p><font size=-2>&copy;2008 - <a href="/intl/en/privacy.html">Privacy</a></font></p></center></body><script>if(google.y)google.y.first=[];window.setTimeout(function(){var xjs=document.createElement('script');xjs.src='/extern_js/f/CgJlbhICdXMgACswCjgMLCswDjgCLCswGDgDLA/8MIofMT_4o8.js';document.getElementsByTagName('head')[0].appendChild(xjs)},0);google.y.first.push(function(){google.ac.i(document.f,document.f.q,'','')})</script></html>
@@ -0,0 +1,6 @@
1
+ <HTML><HEAD><meta http-equiv="content-type" content="text/html;charset=utf-8">
2
+ <TITLE>302 Moved</TITLE></HEAD><BODY>
3
+ <H1>302 Moved</H1>
4
+ The document has moved
5
+ <A HREF="http://www.google.co.il/">here</A>.
6
+ </BODY></HTML>
@@ -0,0 +1,151 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ describe "GoGetter" do
4
+
5
+ describe "#parse_url" do
6
+ it "returns a URI instance, given a URL string" do
7
+ uri = GoGetter.parse_url('http://www.google.com/')
8
+ uri.scheme.should == 'http'
9
+ uri.host.should == 'www.google.com'
10
+ uri.port.should == 80
11
+ uri.path.should == '/'
12
+ end
13
+
14
+ context "when the URL has no scheme" do
15
+ it "prepends http" do
16
+ uri = GoGetter.parse_url('www.google.com')
17
+ uri.scheme.should == 'http'
18
+ end
19
+ end
20
+
21
+ context "when the URL has no path" do
22
+ it "appends '/'" do
23
+ uri = GoGetter.parse_url('www.google.com')
24
+ uri.path.should == '/'
25
+ end
26
+ end
27
+
28
+ context "when the URL has a path" do
29
+ it "retains the path" do
30
+ uri = GoGetter.parse_url('www.google.com/search')
31
+ uri.path.should == '/search'
32
+ end
33
+ end
34
+
35
+ context "when the URL has a query part" do
36
+ it "retains the query" do
37
+ uri = GoGetter.parse_url('www.google.com/search?q=gogetter&hl=en')
38
+ uri.query.should == 'q=gogetter&hl=en'
39
+ end
40
+ end
41
+ end
42
+
43
+ describe "#handle_redirection" do
44
+ context "when the max_redirects param is 0" do
45
+ it "should not send another request" do
46
+ GoGetter.should_not_receive(:get)
47
+ response = Net::HTTPRedirection.new('1.1', '302', 'Found')
48
+ response['Location'] = 'http://www.google.co.il/'
49
+ GoGetter.handle_redirection(URI.parse('http://www.google.com/'), response, {}, {max_redirects: 0}).should == response
50
+ end
51
+ end
52
+
53
+ context "when the max_redirects param > 0" do
54
+ it "should send another get request to the new location" do
55
+ response = Net::HTTPRedirection.new('1.1', '302', 'Found')
56
+ response['Location'] = 'http://www.google.co.il/'
57
+ GoGetter.should_receive(:get).with(URI.parse('http://www.google.co.il'), {}, hash_including(:max_redirects=>0))
58
+ GoGetter.handle_redirection(URI.parse('http://www.google.com/'), response, {}, {max_redirects: 1})
59
+ end
60
+ end
61
+
62
+ context "when the response location is only a path (no host)" do
63
+ it "uses the host from the original URI" do
64
+ response = Net::HTTPRedirection.new('1.1', '302', 'Found')
65
+ response['Location'] = '/new_target'
66
+ exp_uri = URI.parse('/new_target')
67
+ exp_uri.host = 'www.google.com'; exp_uri.port = 80; exp_uri.scheme = 'http'
68
+ GoGetter.should_receive(:get).with(exp_uri, {}, hash_including(:max_redirects=>0))
69
+ GoGetter.handle_redirection(URI.parse('http://www.google.com/'), response, {}, {max_redirects: 1})
70
+ end
71
+ end
72
+
73
+ context "when the response redirects to a location that had already been got in the recursive chain" do
74
+ it "should not send another request" do
75
+ GoGetter.should_not_receive(:get)
76
+ response = Net::HTTPRedirection.new('1.1', '302', 'Found')
77
+ response['Location'] = 'http://www.google.co.il/'
78
+ GoGetter.handle_redirection(
79
+ URI.parse('http://www.google.com/'),
80
+ response, {},
81
+ {:max_redirects=>4,
82
+ :uris_seen => Set.new([URI.parse('http://www.google.co.il/'), URI.parse('http://www.yahoo.com/')])}
83
+ ).should == response
84
+ end
85
+ end
86
+ end
87
+
88
+ describe "#get" do
89
+ context "when given a URL" do
90
+ it "should get it" do
91
+ url = "http://google.html/"
92
+ body = file_fixture('google.html')
93
+ FakeWeb.register_uri(:get, url, :status => ['200', 'OK'], :body => body)
94
+ response = GoGetter.get url
95
+ response.should be_a(Net::HTTPOK)
96
+ response.body.should == body
97
+ end
98
+ end
99
+
100
+ context "when given a URI" do
101
+ it "should get that too" do
102
+ url = "http://google.html/"
103
+ body = file_fixture('google.html')
104
+ FakeWeb.register_uri(:get, url, :status => ['200', 'OK'], :body => body)
105
+ response = GoGetter.get URI.parse(url)
106
+ response.should be_a(Net::HTTPOK)
107
+ response.body.should == body
108
+ end
109
+ end
110
+
111
+ context "when given basic auth params" do
112
+ it "should do basic authentication" do
113
+ url = 'http://example.com/secret'
114
+ url_auth = 'http://user:pass@example.com/secret'
115
+ FakeWeb.register_uri(:get, url, :body => "Unauthorized", :status => ["401", "Unauthorized"])
116
+ FakeWeb.register_uri(:get, url_auth, :status => ['200', 'OK'], :body => "Authorized")
117
+ GoGetter.get(url).should be_a(Net::HTTPUnauthorized)
118
+ GoGetter.get(url, {}, {:auth_user => 'user', :auth_pass => 'pass'}).should be_a(Net::HTTPOK)
119
+ end
120
+ end
121
+
122
+ context "when given proxy params" do
123
+ it "should use a proxy" do
124
+ url = "http://google.html/"
125
+ #proxy:
126
+ host = 'proxy.example.com'; port = '8080'
127
+ user = 'user'; pass = 'pass'
128
+ #proxy_class = Net::HTTP::Proxy(proxy_host, proxy_port, proxy_user, proxy_pass)
129
+ FakeWeb.register_uri(:get, url, :status => ['200', 'OK'], :body => file_fixture('google.html'))
130
+ Net::HTTP.should_receive(:Proxy).with(host, port, user, pass)
131
+ # I was unable to mock proxy behavior properly and #get keeps raising errors
132
+ # However, this code still tests that a proxy class is created, which is the whole point
133
+ expect {
134
+ GoGetter.get(url, {}, {proxy_host: host,proxy_port: port,proxy_user: user,proxy_pass: pass})
135
+ }.to raise_error
136
+ end
137
+ end
138
+
139
+ context "when the response is a redirect" do
140
+ it "does redirection" do
141
+ url1 = "http://www.google.com/"
142
+ url2 = "http://www.google.co.il/"
143
+ body = file_fixture('google.redirect.html')
144
+ FakeWeb.register_uri(:get, url1, :status => ['302','Found'], :headers => {'Location'=>url2},:body => body)
145
+ params = {max_redirects: 1}
146
+ GoGetter.should_receive(:handle_redirection).with(URI.parse(url1), an_instance_of(Net::HTTPFound), {}, params)
147
+ GoGetter.get(url1, {}, params)
148
+ end
149
+ end
150
+ end
151
+ end
data/spec/rspec.opts ADDED
@@ -0,0 +1,3 @@
1
+ --colour
2
+ --format documentation
3
+ --backtrace
@@ -0,0 +1,21 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
3
+ require 'rspec'
4
+ require 'gogetter'
5
+ require 'fakeweb'
6
+
7
+ def file_fixture(filename)
8
+ open(File.join(File.dirname(__FILE__), 'fixtures', "#{filename}")).read
9
+ end
10
+ # Requires supporting files with custom matchers and macros, etc,
11
+ # in ./support/ and its subdirectories.
12
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
13
+
14
+ RSpec.configure do |config|
15
+ config.before(:suite) do
16
+ FakeWeb.allow_net_connect = false
17
+ end
18
+ config.after(:suite) do
19
+ FakeWeb.allow_net_connect = true
20
+ end
21
+ end
metadata ADDED
@@ -0,0 +1,124 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: gogetter
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Elad Kehat
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-07-25 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: &74419330 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 2.6.0
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: *74419330
25
+ - !ruby/object:Gem::Dependency
26
+ name: yard
27
+ requirement: &74418990 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ~>
31
+ - !ruby/object:Gem::Version
32
+ version: 0.7.2
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: *74418990
36
+ - !ruby/object:Gem::Dependency
37
+ name: bundler
38
+ requirement: &74418500 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ~>
42
+ - !ruby/object:Gem::Version
43
+ version: 1.0.0
44
+ type: :development
45
+ prerelease: false
46
+ version_requirements: *74418500
47
+ - !ruby/object:Gem::Dependency
48
+ name: jeweler
49
+ requirement: &74417980 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: 1.6.4
55
+ type: :development
56
+ prerelease: false
57
+ version_requirements: *74417980
58
+ - !ruby/object:Gem::Dependency
59
+ name: fakeweb
60
+ requirement: &74417470 !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ~>
64
+ - !ruby/object:Gem::Version
65
+ version: 1.3.0
66
+ type: :development
67
+ prerelease: false
68
+ version_requirements: *74417470
69
+ description:
70
+ email: eladkehat@gmail.com
71
+ executables: []
72
+ extensions: []
73
+ extra_rdoc_files:
74
+ - LICENSE.txt
75
+ - README.rdoc
76
+ files:
77
+ - .document
78
+ - .rspec
79
+ - Gemfile
80
+ - Gemfile.lock
81
+ - LICENSE.txt
82
+ - README.rdoc
83
+ - Rakefile
84
+ - VERSION
85
+ - gogetter.gemspec
86
+ - lib/go_getter.rb
87
+ - lib/go_getter/go_getter.rb
88
+ - lib/go_getter/response.rb
89
+ - lib/go_getter/utils.rb
90
+ - lib/gogetter.rb
91
+ - spec/fixtures/google.html
92
+ - spec/fixtures/google.redirect.html
93
+ - spec/go_getter/go_getter_spec.rb
94
+ - spec/rspec.opts
95
+ - spec/spec_helper.rb
96
+ homepage: http://github.com/eladkehat/gogetter
97
+ licenses:
98
+ - MIT
99
+ post_install_message:
100
+ rdoc_options: []
101
+ require_paths:
102
+ - lib
103
+ required_ruby_version: !ruby/object:Gem::Requirement
104
+ none: false
105
+ requirements:
106
+ - - ! '>='
107
+ - !ruby/object:Gem::Version
108
+ version: '0'
109
+ segments:
110
+ - 0
111
+ hash: 740502813
112
+ required_rubygems_version: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ requirements: []
119
+ rubyforge_project:
120
+ rubygems_version: 1.7.2
121
+ signing_key:
122
+ specification_version: 3
123
+ summary: Go get something over HTTP
124
+ test_files: []