linkr 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
5
+ .rvmrc
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source :rubygems
2
+ gemspec
3
+
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2011, Duncan Robertson, British Broadcasting Corporation
2
+
3
+ Permission is hereby granted, free of charge, to any person
4
+ obtaining a copy of this software and associated documentation
5
+ files (the "Software"), to deal in the Software without
6
+ restriction, including without limitation the rights to use,
7
+ copy, modify, merge, publish, distribute, sublicense, and/or sell
8
+ copies of the Software, and to permit persons to whom the
9
+ Software is furnished to do so, subject to the following
10
+ conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17
+ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22
+ OTHER DEALINGS IN THE SOFTWARE.
data/README.org ADDED
@@ -0,0 +1,36 @@
1
+ * Linkr
2
+
3
+ ** Overview
4
+
5
+ A Ruby library for following links. NET/HTTP does not auto-redirect when webpages
6
+ return redirects 301's. Also with the amount of url shortening services out there
7
+ it is common to want to resolve urls to the cononical version.
8
+
9
+ Linkr fixes this problem through a simple interface.
10
+
11
+ ** Dependencies
12
+
13
+ - Uses Net::HTTP under the hood.
14
+ - Linkr requires the addressable Gem. This will be installed as a dependancy
15
+ when you install Linkr
16
+
17
+ ** Usage
18
+
19
+ : require 'linkr'
20
+ :
21
+ : # easy url resolving
22
+ : Linkr.resolve("http://bbc.in/pdTHqe") => http://www.bbc.co.uk
23
+ :
24
+ : # if you need more info
25
+ : link = Linkr.new("http://bbc.in/pdTHqe")
26
+ : link.body => "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Tr
27
+ : ansitional//EN\" \"http://www.w3.org/TR/xh".......</body></html>"
28
+
29
+ ** Contributing
30
+ - Fork the project
31
+ - Send a pull request
32
+ - Don't touch the .gemspec, I'll do that when I release a new version
33
+
34
+ ** Author
35
+
36
+ [[http://whomwah.com][Duncan Robertson]] - BBC R&D
data/Rakefile ADDED
@@ -0,0 +1,14 @@
1
+ require 'rake/testtask'
2
+ require 'bundler'
3
+
4
+ Bundler::GemHelper.install_tasks
5
+
6
+ desc "Run tests"
7
+ Rake::TestTask.new do |t|
8
+ t.libs << "lib"
9
+ t.libs << "test"
10
+ t.test_files = FileList['test/test_*.rb']
11
+ t.verbose = true
12
+ end
13
+
14
+ task :default => [:test]
@@ -0,0 +1,3 @@
1
+ module Linkr
2
+ VERSION = "0.1.0"
3
+ end
data/lib/linkr.rb ADDED
@@ -0,0 +1,88 @@
1
+ require 'ostruct'
2
+ require 'net/http'
3
+ require 'addressable/uri'
4
+
5
+ class Linkr
6
+ class TooManyRedirects < StandardError; end
7
+ class InValidUrl < StandardError; end
8
+
9
+ attr_accessor :original_url, :redirect_limit, :timeout
10
+ attr_writer :url, :response
11
+
12
+ def initialize(original_url, opts={})
13
+ opts = {
14
+ :redirect_limit => 5,
15
+ :timeout => 5
16
+ }.merge(opts)
17
+
18
+ @original_url = original_url
19
+ @redirect_limit = opts[:redirect_limit]
20
+ @timeout = opts[:timeout]
21
+ @proxy = ENV['http_proxy'] ? Addressable::URI.parse(ENV['http_proxy']) : OpenStruct.new
22
+ @link_cache = nil
23
+ end
24
+
25
+ def url
26
+ resolve unless @url
27
+ @url
28
+ end
29
+
30
+ def body
31
+ response.body
32
+ end
33
+
34
+ def response
35
+ resolve unless @response
36
+ @response
37
+ end
38
+
39
+ def self.resolve(*args)
40
+ self.new(*args).url
41
+ end
42
+
43
+ private
44
+
45
+ def resolve
46
+ raise TooManyRedirects if @redirect_limit < 0
47
+
48
+ self.url = original_url unless @url
49
+ @uri = Addressable::URI.parse(@url).normalize
50
+
51
+ fix_relative_url if !@uri.normalized_site && @link_cache
52
+
53
+ raise InValidUrl unless valid?
54
+
55
+ http = Net::HTTP::Proxy(@proxy.host, @proxy.port).new(@uri.host, @uri.port)
56
+ http.read_timeout = http.open_timeout = @timeout
57
+ request = Net::HTTP::Get.new(@uri.omit(:scheme,:authority).to_s)
58
+ self.response = http.request(request)
59
+
60
+ redirect if response.kind_of?(Net::HTTPRedirection)
61
+ end
62
+
63
+ def redirect
64
+ @link_cache = @uri.normalized_site
65
+ self.url = redirect_url
66
+ @redirect_limit -= 1
67
+ resolve
68
+ end
69
+
70
+ def fix_relative_url
71
+ @url = File.join(@link_cache, @uri.omit(:scheme,:authority).to_s)
72
+ @uri = Addressable::URI.parse(@url).normalize
73
+ @link_cache = nil
74
+ end
75
+
76
+ def redirect_url
77
+ if response['location'].nil?
78
+ response.body.match(/<a href=[\"|\']([^>]+)[\"|\']>/i)[1]
79
+ else
80
+ response['location']
81
+ end
82
+ end
83
+
84
+ def valid?
85
+ regex = /(^$)|(^(http|https):\/\/[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}(([0-9]{1,5})?\/.*)?$)/ix
86
+ true if self.url && self.url =~ regex
87
+ end
88
+ end
data/linkr.gemspec ADDED
@@ -0,0 +1,28 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "linkr/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "linkr"
7
+ s.version = Linkr::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Duncan Robertson"]
10
+ s.email = ["duncan.robertson@bbc.co.uk"]
11
+ s.homepage = ""
12
+ s.summary = %q{Resolves urls to the canonical version}
13
+ s.description = %q{Resolves urls to the canonical version. It does this by following redirects in the headers or body of the destination url.}
14
+
15
+ s.required_rubygems_version = Gem::Requirement.new('>= 1.3.6')
16
+
17
+ s.add_dependency "addressable"
18
+
19
+ s.add_development_dependency "rake"
20
+ s.add_development_dependency "fakeweb"
21
+ s.add_development_dependency("bundler", ">= 1.0.0")
22
+
23
+ s.has_rdoc = false
24
+ s.files = `git ls-files`.split("\n")
25
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
26
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
27
+ s.require_paths = ["lib"]
28
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,4 @@
1
+ require 'test/unit'
2
+ require 'fakeweb'
3
+
4
+ require_relative '../lib/linkr.rb'
@@ -0,0 +1,68 @@
1
+ require 'helper'
2
+
3
+ class TestLinkr < Test::Unit::TestCase
4
+ def test_basics
5
+ FakeWeb.register_uri(:get, "http://bbc.in/pdTHqe", :location => "http://www.bbc.co.uk", :status => ["301", "Moved permanently"])
6
+ FakeWeb.register_uri(:get, "http://www.bbc.co.uk", :status => ["200", "OK"], :body => "Hello World")
7
+
8
+ l = Linkr.new("http://bbc.in/pdTHqe", {
9
+ :redirect_limit => 10,
10
+ :timeout => 10
11
+ })
12
+ assert_equal l.class, Linkr
13
+ assert_equal l.original_url, "http://bbc.in/pdTHqe"
14
+ assert_equal l.redirect_limit, 10
15
+ assert_equal l.timeout, 10
16
+ assert_equal l.body, 'Hello World'
17
+ assert_equal l.response.class, Net::HTTPOK
18
+ end
19
+
20
+ def test_some_invalid_urls
21
+ # These are invalid based on a regular expression
22
+ # /(^$)|(^(http|https):\/\/[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}(([0-9]{1,5})?\/.*)?$)/ix
23
+ # and is because Net/HTTP bails on uris that URI and Addressable deem fine
24
+ ['http','xxx','whomwah.com','0','123','http://foo'].each do |link|
25
+ assert_raise(Linkr::InValidUrl) {
26
+ Linkr.resolve(link)
27
+ }
28
+ end
29
+ end
30
+
31
+ def test_empty_args
32
+ assert_raise(ArgumentError) {
33
+ Linkr.resolve('')
34
+ }
35
+ end
36
+
37
+ def test_simple_resolve
38
+ FakeWeb.register_uri(:get, "http://bbc.in/pdTHqe", :location => "http://www.bbc.co.uk", :status => ["301", "Moved permanently"])
39
+ FakeWeb.register_uri(:get, "http://www.bbc.co.uk", :status => ["200", "OK"])
40
+ assert_equal Linkr.resolve("http://bbc.in/pdTHqe"), "http://www.bbc.co.uk"
41
+ end
42
+
43
+ def test_too_many_redirects
44
+ FakeWeb.register_uri(:get, "http://bbc.in/pdTHqe", :location => "http://url1.com", :status => ["301", "Moved permanently"])
45
+ FakeWeb.register_uri(:get, "http://url1.com", :location => "http://url2.com", :status => ["301", "Moved permanently"])
46
+ FakeWeb.register_uri(:get, "http://url2.com", :location => "http://url3.com", :status => ["301", "Moved permanently"])
47
+ FakeWeb.register_uri(:get, "http://url3.com", :location => "http://url4.com", :status => ["301", "Moved permanently"])
48
+ FakeWeb.register_uri(:get, "http://url4.com", :location => "http://url5.com", :status => ["301", "Moved permanently"])
49
+ FakeWeb.register_uri(:get, "http://url5.com", :location => "http://url6.com", :status => ["301", "Moved permanently"])
50
+ FakeWeb.register_uri(:get, "http://url6.com", :location => "http://url7.com", :status => ["301", "Moved permanently"])
51
+ FakeWeb.register_uri(:get, "http://url7.com", :status => ["200", "OK"])
52
+ assert_raise(Linkr::TooManyRedirects) {
53
+ Linkr.resolve("http://bbc.in/pdTHqe")
54
+ }
55
+ end
56
+
57
+ def test_relative_urls_in_the_redirect
58
+ FakeWeb.register_uri(:get, "http://foo.in/duncan", :location => "/fred", :status => ["301", "Moved permanently"])
59
+ FakeWeb.register_uri(:get, "http://foo.in/fred", :status => ["200", "OK"])
60
+ assert_equal Linkr.resolve("http://foo.in/duncan"), "http://foo.in/fred"
61
+ end
62
+
63
+ def test_redirect_with_location_in_body
64
+ FakeWeb.register_uri(:get, "http://foo.in/duncan", :body => "<p><a href='http://bar.in/fred'>Redirecting...</a>", :status => ["301", "Moved permanently"])
65
+ FakeWeb.register_uri(:get, "http://bar.in/fred", :status => ["200", "OK"])
66
+ assert_equal Linkr.resolve("http://foo.in/duncan"), "http://bar.in/fred"
67
+ end
68
+ end
metadata ADDED
@@ -0,0 +1,102 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: linkr
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Duncan Robertson
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-08-03 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: addressable
16
+ requirement: &70317977716740 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *70317977716740
25
+ - !ruby/object:Gem::Dependency
26
+ name: rake
27
+ requirement: &70317977716280 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: *70317977716280
36
+ - !ruby/object:Gem::Dependency
37
+ name: fakeweb
38
+ requirement: &70317977715860 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ type: :development
45
+ prerelease: false
46
+ version_requirements: *70317977715860
47
+ - !ruby/object:Gem::Dependency
48
+ name: bundler
49
+ requirement: &70317977715360 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: 1.0.0
55
+ type: :development
56
+ prerelease: false
57
+ version_requirements: *70317977715360
58
+ description: Resolves urls to the canonical version. It does this by following redirects
59
+ in the headers or body of the destination url.
60
+ email:
61
+ - duncan.robertson@bbc.co.uk
62
+ executables: []
63
+ extensions: []
64
+ extra_rdoc_files: []
65
+ files:
66
+ - .gitignore
67
+ - Gemfile
68
+ - LICENSE
69
+ - README.org
70
+ - Rakefile
71
+ - lib/linkr.rb
72
+ - lib/linkr/version.rb
73
+ - linkr.gemspec
74
+ - test/helper.rb
75
+ - test/test_resolve.rb
76
+ homepage: ''
77
+ licenses: []
78
+ post_install_message:
79
+ rdoc_options: []
80
+ require_paths:
81
+ - lib
82
+ required_ruby_version: !ruby/object:Gem::Requirement
83
+ none: false
84
+ requirements:
85
+ - - ! '>='
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ required_rubygems_version: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: 1.3.6
94
+ requirements: []
95
+ rubyforge_project:
96
+ rubygems_version: 1.8.6
97
+ signing_key:
98
+ specification_version: 3
99
+ summary: Resolves urls to the canonical version
100
+ test_files:
101
+ - test/helper.rb
102
+ - test/test_resolve.rb