linkr 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
5
+ .rvmrc
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source :rubygems
2
+ gemspec
3
+
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2011, Duncan Robertson, British Broadcasting Corporation
2
+
3
+ Permission is hereby granted, free of charge, to any person
4
+ obtaining a copy of this software and associated documentation
5
+ files (the "Software"), to deal in the Software without
6
+ restriction, including without limitation the rights to use,
7
+ copy, modify, merge, publish, distribute, sublicense, and/or sell
8
+ copies of the Software, and to permit persons to whom the
9
+ Software is furnished to do so, subject to the following
10
+ conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17
+ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22
+ OTHER DEALINGS IN THE SOFTWARE.
data/README.org ADDED
@@ -0,0 +1,36 @@
1
+ * Linkr
2
+
3
+ ** Overview
4
+
5
+ A Ruby library for following links. NET/HTTP does not auto-redirect when webpages
6
+ return redirects 301's. Also with the amount of url shortening services out there
7
+ it is common to want to resolve urls to the cononical version.
8
+
9
+ Linkr fixes this problem through a simple interface.
10
+
11
+ ** Dependencies
12
+
13
+ - Uses Net::HTTP under the hood.
14
+ - Linkr requires the addressable Gem. This will be installed as a dependancy
15
+ when you install Linkr
16
+
17
+ ** Usage
18
+
19
+ : require 'linkr'
20
+ :
21
+ : # easy url resolving
22
+ : Linkr.resolve("http://bbc.in/pdTHqe") => http://www.bbc.co.uk
23
+ :
24
+ : # if you need more info
25
+ : link = Linkr.new("http://bbc.in/pdTHqe")
26
+ : link.body => "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Tr
27
+ : ansitional//EN\" \"http://www.w3.org/TR/xh".......</body></html>"
28
+
29
+ ** Contributing
30
+ - Fork the project
31
+ - Send a pull request
32
+ - Don't touch the .gemspec, I'll do that when I release a new version
33
+
34
+ ** Author
35
+
36
+ [[http://whomwah.com][Duncan Robertson]] - BBC R&D
data/Rakefile ADDED
@@ -0,0 +1,14 @@
1
+ require 'rake/testtask'
2
+ require 'bundler'
3
+
4
+ Bundler::GemHelper.install_tasks
5
+
6
+ desc "Run tests"
7
+ Rake::TestTask.new do |t|
8
+ t.libs << "lib"
9
+ t.libs << "test"
10
+ t.test_files = FileList['test/test_*.rb']
11
+ t.verbose = true
12
+ end
13
+
14
+ task :default => [:test]
@@ -0,0 +1,3 @@
1
+ module Linkr
2
+ VERSION = "0.1.0"
3
+ end
data/lib/linkr.rb ADDED
@@ -0,0 +1,88 @@
1
+ require 'ostruct'
2
+ require 'net/http'
3
+ require 'addressable/uri'
4
+
5
+ class Linkr
6
+ class TooManyRedirects < StandardError; end
7
+ class InValidUrl < StandardError; end
8
+
9
+ attr_accessor :original_url, :redirect_limit, :timeout
10
+ attr_writer :url, :response
11
+
12
+ def initialize(original_url, opts={})
13
+ opts = {
14
+ :redirect_limit => 5,
15
+ :timeout => 5
16
+ }.merge(opts)
17
+
18
+ @original_url = original_url
19
+ @redirect_limit = opts[:redirect_limit]
20
+ @timeout = opts[:timeout]
21
+ @proxy = ENV['http_proxy'] ? Addressable::URI.parse(ENV['http_proxy']) : OpenStruct.new
22
+ @link_cache = nil
23
+ end
24
+
25
+ def url
26
+ resolve unless @url
27
+ @url
28
+ end
29
+
30
+ def body
31
+ response.body
32
+ end
33
+
34
+ def response
35
+ resolve unless @response
36
+ @response
37
+ end
38
+
39
+ def self.resolve(*args)
40
+ self.new(*args).url
41
+ end
42
+
43
+ private
44
+
45
+ def resolve
46
+ raise TooManyRedirects if @redirect_limit < 0
47
+
48
+ self.url = original_url unless @url
49
+ @uri = Addressable::URI.parse(@url).normalize
50
+
51
+ fix_relative_url if !@uri.normalized_site && @link_cache
52
+
53
+ raise InValidUrl unless valid?
54
+
55
+ http = Net::HTTP::Proxy(@proxy.host, @proxy.port).new(@uri.host, @uri.port)
56
+ http.read_timeout = http.open_timeout = @timeout
57
+ request = Net::HTTP::Get.new(@uri.omit(:scheme,:authority).to_s)
58
+ self.response = http.request(request)
59
+
60
+ redirect if response.kind_of?(Net::HTTPRedirection)
61
+ end
62
+
63
+ def redirect
64
+ @link_cache = @uri.normalized_site
65
+ self.url = redirect_url
66
+ @redirect_limit -= 1
67
+ resolve
68
+ end
69
+
70
+ def fix_relative_url
71
+ @url = File.join(@link_cache, @uri.omit(:scheme,:authority).to_s)
72
+ @uri = Addressable::URI.parse(@url).normalize
73
+ @link_cache = nil
74
+ end
75
+
76
+ def redirect_url
77
+ if response['location'].nil?
78
+ response.body.match(/<a href=[\"|\']([^>]+)[\"|\']>/i)[1]
79
+ else
80
+ response['location']
81
+ end
82
+ end
83
+
84
+ def valid?
85
+ regex = /(^$)|(^(http|https):\/\/[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}(([0-9]{1,5})?\/.*)?$)/ix
86
+ true if self.url && self.url =~ regex
87
+ end
88
+ end
data/linkr.gemspec ADDED
@@ -0,0 +1,28 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "linkr/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "linkr"
7
+ s.version = Linkr::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Duncan Robertson"]
10
+ s.email = ["duncan.robertson@bbc.co.uk"]
11
+ s.homepage = ""
12
+ s.summary = %q{Resolves urls to the canonical version}
13
+ s.description = %q{Resolves urls to the canonical version. It does this by following redirects in the headers or body of the destination url.}
14
+
15
+ s.required_rubygems_version = Gem::Requirement.new('>= 1.3.6')
16
+
17
+ s.add_dependency "addressable"
18
+
19
+ s.add_development_dependency "rake"
20
+ s.add_development_dependency "fakeweb"
21
+ s.add_development_dependency("bundler", ">= 1.0.0")
22
+
23
+ s.has_rdoc = false
24
+ s.files = `git ls-files`.split("\n")
25
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
26
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
27
+ s.require_paths = ["lib"]
28
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,4 @@
1
+ require 'test/unit'
2
+ require 'fakeweb'
3
+
4
+ require_relative '../lib/linkr.rb'
@@ -0,0 +1,68 @@
1
+ require 'helper'
2
+
3
+ class TestLinkr < Test::Unit::TestCase
4
+ def test_basics
5
+ FakeWeb.register_uri(:get, "http://bbc.in/pdTHqe", :location => "http://www.bbc.co.uk", :status => ["301", "Moved permanently"])
6
+ FakeWeb.register_uri(:get, "http://www.bbc.co.uk", :status => ["200", "OK"], :body => "Hello World")
7
+
8
+ l = Linkr.new("http://bbc.in/pdTHqe", {
9
+ :redirect_limit => 10,
10
+ :timeout => 10
11
+ })
12
+ assert_equal l.class, Linkr
13
+ assert_equal l.original_url, "http://bbc.in/pdTHqe"
14
+ assert_equal l.redirect_limit, 10
15
+ assert_equal l.timeout, 10
16
+ assert_equal l.body, 'Hello World'
17
+ assert_equal l.response.class, Net::HTTPOK
18
+ end
19
+
20
+ def test_some_invalid_urls
21
+ # These are invalid based on a regular expression
22
+ # /(^$)|(^(http|https):\/\/[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}(([0-9]{1,5})?\/.*)?$)/ix
23
+ # and is because Net/HTTP bails on uris that URI and Addressable deem fine
24
+ ['http','xxx','whomwah.com','0','123','http://foo'].each do |link|
25
+ assert_raise(Linkr::InValidUrl) {
26
+ Linkr.resolve(link)
27
+ }
28
+ end
29
+ end
30
+
31
+ def test_empty_args
32
+ assert_raise(ArgumentError) {
33
+ Linkr.resolve('')
34
+ }
35
+ end
36
+
37
+ def test_simple_resolve
38
+ FakeWeb.register_uri(:get, "http://bbc.in/pdTHqe", :location => "http://www.bbc.co.uk", :status => ["301", "Moved permanently"])
39
+ FakeWeb.register_uri(:get, "http://www.bbc.co.uk", :status => ["200", "OK"])
40
+ assert_equal Linkr.resolve("http://bbc.in/pdTHqe"), "http://www.bbc.co.uk"
41
+ end
42
+
43
+ def test_too_many_redirects
44
+ FakeWeb.register_uri(:get, "http://bbc.in/pdTHqe", :location => "http://url1.com", :status => ["301", "Moved permanently"])
45
+ FakeWeb.register_uri(:get, "http://url1.com", :location => "http://url2.com", :status => ["301", "Moved permanently"])
46
+ FakeWeb.register_uri(:get, "http://url2.com", :location => "http://url3.com", :status => ["301", "Moved permanently"])
47
+ FakeWeb.register_uri(:get, "http://url3.com", :location => "http://url4.com", :status => ["301", "Moved permanently"])
48
+ FakeWeb.register_uri(:get, "http://url4.com", :location => "http://url5.com", :status => ["301", "Moved permanently"])
49
+ FakeWeb.register_uri(:get, "http://url5.com", :location => "http://url6.com", :status => ["301", "Moved permanently"])
50
+ FakeWeb.register_uri(:get, "http://url6.com", :location => "http://url7.com", :status => ["301", "Moved permanently"])
51
+ FakeWeb.register_uri(:get, "http://url7.com", :status => ["200", "OK"])
52
+ assert_raise(Linkr::TooManyRedirects) {
53
+ Linkr.resolve("http://bbc.in/pdTHqe")
54
+ }
55
+ end
56
+
57
+ def test_relative_urls_in_the_redirect
58
+ FakeWeb.register_uri(:get, "http://foo.in/duncan", :location => "/fred", :status => ["301", "Moved permanently"])
59
+ FakeWeb.register_uri(:get, "http://foo.in/fred", :status => ["200", "OK"])
60
+ assert_equal Linkr.resolve("http://foo.in/duncan"), "http://foo.in/fred"
61
+ end
62
+
63
+ def test_redirect_with_location_in_body
64
+ FakeWeb.register_uri(:get, "http://foo.in/duncan", :body => "<p><a href='http://bar.in/fred'>Redirecting...</a>", :status => ["301", "Moved permanently"])
65
+ FakeWeb.register_uri(:get, "http://bar.in/fred", :status => ["200", "OK"])
66
+ assert_equal Linkr.resolve("http://foo.in/duncan"), "http://bar.in/fred"
67
+ end
68
+ end
metadata ADDED
@@ -0,0 +1,102 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: linkr
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Duncan Robertson
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-08-03 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: addressable
16
+ requirement: &70317977716740 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *70317977716740
25
+ - !ruby/object:Gem::Dependency
26
+ name: rake
27
+ requirement: &70317977716280 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: *70317977716280
36
+ - !ruby/object:Gem::Dependency
37
+ name: fakeweb
38
+ requirement: &70317977715860 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ type: :development
45
+ prerelease: false
46
+ version_requirements: *70317977715860
47
+ - !ruby/object:Gem::Dependency
48
+ name: bundler
49
+ requirement: &70317977715360 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: 1.0.0
55
+ type: :development
56
+ prerelease: false
57
+ version_requirements: *70317977715360
58
+ description: Resolves urls to the canonical version. It does this by following redirects
59
+ in the headers or body of the destination url.
60
+ email:
61
+ - duncan.robertson@bbc.co.uk
62
+ executables: []
63
+ extensions: []
64
+ extra_rdoc_files: []
65
+ files:
66
+ - .gitignore
67
+ - Gemfile
68
+ - LICENSE
69
+ - README.org
70
+ - Rakefile
71
+ - lib/linkr.rb
72
+ - lib/linkr/version.rb
73
+ - linkr.gemspec
74
+ - test/helper.rb
75
+ - test/test_resolve.rb
76
+ homepage: ''
77
+ licenses: []
78
+ post_install_message:
79
+ rdoc_options: []
80
+ require_paths:
81
+ - lib
82
+ required_ruby_version: !ruby/object:Gem::Requirement
83
+ none: false
84
+ requirements:
85
+ - - ! '>='
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ required_rubygems_version: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: 1.3.6
94
+ requirements: []
95
+ rubyforge_project:
96
+ rubygems_version: 1.8.6
97
+ signing_key:
98
+ specification_version: 3
99
+ summary: Resolves urls to the canonical version
100
+ test_files:
101
+ - test/helper.rb
102
+ - test/test_resolve.rb