linkr 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +5 -0
- data/Gemfile +3 -0
- data/LICENSE +22 -0
- data/README.org +36 -0
- data/Rakefile +14 -0
- data/lib/linkr/version.rb +3 -0
- data/lib/linkr.rb +88 -0
- data/linkr.gemspec +28 -0
- data/test/helper.rb +4 -0
- data/test/test_resolve.rb +68 -0
- metadata +102 -0
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2011, Duncan Robertson, British Broadcasting Corporation
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person
|
4
|
+
obtaining a copy of this software and associated documentation
|
5
|
+
files (the "Software"), to deal in the Software without
|
6
|
+
restriction, including without limitation the rights to use,
|
7
|
+
copy, modify, merge, publish, distribute, sublicense, and/or sell
|
8
|
+
copies of the Software, and to permit persons to whom the
|
9
|
+
Software is furnished to do so, subject to the following
|
10
|
+
conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be
|
13
|
+
included in all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
17
|
+
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
19
|
+
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
20
|
+
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
21
|
+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
22
|
+
OTHER DEALINGS IN THE SOFTWARE.
|
data/README.org
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
* Linkr
|
2
|
+
|
3
|
+
** Overview
|
4
|
+
|
5
|
+
A Ruby library for following links. NET/HTTP does not auto-redirect when webpages
|
6
|
+
return redirects 301's. Also with the amount of url shortening services out there
|
7
|
+
it is common to want to resolve urls to the cononical version.
|
8
|
+
|
9
|
+
Linkr fixes this problem through a simple interface.
|
10
|
+
|
11
|
+
** Dependencies
|
12
|
+
|
13
|
+
- Uses Net::HTTP under the hood.
|
14
|
+
- Linkr requires the addressable Gem. This will be installed as a dependancy
|
15
|
+
when you install Linkr
|
16
|
+
|
17
|
+
** Usage
|
18
|
+
|
19
|
+
: require 'linkr'
|
20
|
+
:
|
21
|
+
: # easy url resolving
|
22
|
+
: Linkr.resolve("http://bbc.in/pdTHqe") => http://www.bbc.co.uk
|
23
|
+
:
|
24
|
+
: # if you need more info
|
25
|
+
: link = Linkr.new("http://bbc.in/pdTHqe")
|
26
|
+
: link.body => "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Tr
|
27
|
+
: ansitional//EN\" \"http://www.w3.org/TR/xh".......</body></html>"
|
28
|
+
|
29
|
+
** Contributing
|
30
|
+
- Fork the project
|
31
|
+
- Send a pull request
|
32
|
+
- Don't touch the .gemspec, I'll do that when I release a new version
|
33
|
+
|
34
|
+
** Author
|
35
|
+
|
36
|
+
[[http://whomwah.com][Duncan Robertson]] - BBC R&D
|
data/Rakefile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'rake/testtask'
|
2
|
+
require 'bundler'
|
3
|
+
|
4
|
+
Bundler::GemHelper.install_tasks
|
5
|
+
|
6
|
+
desc "Run tests"
|
7
|
+
Rake::TestTask.new do |t|
|
8
|
+
t.libs << "lib"
|
9
|
+
t.libs << "test"
|
10
|
+
t.test_files = FileList['test/test_*.rb']
|
11
|
+
t.verbose = true
|
12
|
+
end
|
13
|
+
|
14
|
+
task :default => [:test]
|
data/lib/linkr.rb
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
require 'ostruct'
|
2
|
+
require 'net/http'
|
3
|
+
require 'addressable/uri'
|
4
|
+
|
5
|
+
class Linkr
|
6
|
+
class TooManyRedirects < StandardError; end
|
7
|
+
class InValidUrl < StandardError; end
|
8
|
+
|
9
|
+
attr_accessor :original_url, :redirect_limit, :timeout
|
10
|
+
attr_writer :url, :response
|
11
|
+
|
12
|
+
def initialize(original_url, opts={})
|
13
|
+
opts = {
|
14
|
+
:redirect_limit => 5,
|
15
|
+
:timeout => 5
|
16
|
+
}.merge(opts)
|
17
|
+
|
18
|
+
@original_url = original_url
|
19
|
+
@redirect_limit = opts[:redirect_limit]
|
20
|
+
@timeout = opts[:timeout]
|
21
|
+
@proxy = ENV['http_proxy'] ? Addressable::URI.parse(ENV['http_proxy']) : OpenStruct.new
|
22
|
+
@link_cache = nil
|
23
|
+
end
|
24
|
+
|
25
|
+
def url
|
26
|
+
resolve unless @url
|
27
|
+
@url
|
28
|
+
end
|
29
|
+
|
30
|
+
def body
|
31
|
+
response.body
|
32
|
+
end
|
33
|
+
|
34
|
+
def response
|
35
|
+
resolve unless @response
|
36
|
+
@response
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.resolve(*args)
|
40
|
+
self.new(*args).url
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
|
45
|
+
def resolve
|
46
|
+
raise TooManyRedirects if @redirect_limit < 0
|
47
|
+
|
48
|
+
self.url = original_url unless @url
|
49
|
+
@uri = Addressable::URI.parse(@url).normalize
|
50
|
+
|
51
|
+
fix_relative_url if !@uri.normalized_site && @link_cache
|
52
|
+
|
53
|
+
raise InValidUrl unless valid?
|
54
|
+
|
55
|
+
http = Net::HTTP::Proxy(@proxy.host, @proxy.port).new(@uri.host, @uri.port)
|
56
|
+
http.read_timeout = http.open_timeout = @timeout
|
57
|
+
request = Net::HTTP::Get.new(@uri.omit(:scheme,:authority).to_s)
|
58
|
+
self.response = http.request(request)
|
59
|
+
|
60
|
+
redirect if response.kind_of?(Net::HTTPRedirection)
|
61
|
+
end
|
62
|
+
|
63
|
+
def redirect
|
64
|
+
@link_cache = @uri.normalized_site
|
65
|
+
self.url = redirect_url
|
66
|
+
@redirect_limit -= 1
|
67
|
+
resolve
|
68
|
+
end
|
69
|
+
|
70
|
+
def fix_relative_url
|
71
|
+
@url = File.join(@link_cache, @uri.omit(:scheme,:authority).to_s)
|
72
|
+
@uri = Addressable::URI.parse(@url).normalize
|
73
|
+
@link_cache = nil
|
74
|
+
end
|
75
|
+
|
76
|
+
def redirect_url
|
77
|
+
if response['location'].nil?
|
78
|
+
response.body.match(/<a href=[\"|\']([^>]+)[\"|\']>/i)[1]
|
79
|
+
else
|
80
|
+
response['location']
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def valid?
|
85
|
+
regex = /(^$)|(^(http|https):\/\/[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}(([0-9]{1,5})?\/.*)?$)/ix
|
86
|
+
true if self.url && self.url =~ regex
|
87
|
+
end
|
88
|
+
end
|
data/linkr.gemspec
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "linkr/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "linkr"
|
7
|
+
s.version = Linkr::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["Duncan Robertson"]
|
10
|
+
s.email = ["duncan.robertson@bbc.co.uk"]
|
11
|
+
s.homepage = ""
|
12
|
+
s.summary = %q{Resolves urls to the canonical version}
|
13
|
+
s.description = %q{Resolves urls to the canonical version. It does this by following redirects in the headers or body of the destination url.}
|
14
|
+
|
15
|
+
s.required_rubygems_version = Gem::Requirement.new('>= 1.3.6')
|
16
|
+
|
17
|
+
s.add_dependency "addressable"
|
18
|
+
|
19
|
+
s.add_development_dependency "rake"
|
20
|
+
s.add_development_dependency "fakeweb"
|
21
|
+
s.add_development_dependency("bundler", ">= 1.0.0")
|
22
|
+
|
23
|
+
s.has_rdoc = false
|
24
|
+
s.files = `git ls-files`.split("\n")
|
25
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
26
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
27
|
+
s.require_paths = ["lib"]
|
28
|
+
end
|
data/test/helper.rb
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestLinkr < Test::Unit::TestCase
|
4
|
+
def test_basics
|
5
|
+
FakeWeb.register_uri(:get, "http://bbc.in/pdTHqe", :location => "http://www.bbc.co.uk", :status => ["301", "Moved permanently"])
|
6
|
+
FakeWeb.register_uri(:get, "http://www.bbc.co.uk", :status => ["200", "OK"], :body => "Hello World")
|
7
|
+
|
8
|
+
l = Linkr.new("http://bbc.in/pdTHqe", {
|
9
|
+
:redirect_limit => 10,
|
10
|
+
:timeout => 10
|
11
|
+
})
|
12
|
+
assert_equal l.class, Linkr
|
13
|
+
assert_equal l.original_url, "http://bbc.in/pdTHqe"
|
14
|
+
assert_equal l.redirect_limit, 10
|
15
|
+
assert_equal l.timeout, 10
|
16
|
+
assert_equal l.body, 'Hello World'
|
17
|
+
assert_equal l.response.class, Net::HTTPOK
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_some_invalid_urls
|
21
|
+
# These are invalid based on a regular expression
|
22
|
+
# /(^$)|(^(http|https):\/\/[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}(([0-9]{1,5})?\/.*)?$)/ix
|
23
|
+
# and is because Net/HTTP bails on uris that URI and Addressable deem fine
|
24
|
+
['http','xxx','whomwah.com','0','123','http://foo'].each do |link|
|
25
|
+
assert_raise(Linkr::InValidUrl) {
|
26
|
+
Linkr.resolve(link)
|
27
|
+
}
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_empty_args
|
32
|
+
assert_raise(ArgumentError) {
|
33
|
+
Linkr.resolve('')
|
34
|
+
}
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_simple_resolve
|
38
|
+
FakeWeb.register_uri(:get, "http://bbc.in/pdTHqe", :location => "http://www.bbc.co.uk", :status => ["301", "Moved permanently"])
|
39
|
+
FakeWeb.register_uri(:get, "http://www.bbc.co.uk", :status => ["200", "OK"])
|
40
|
+
assert_equal Linkr.resolve("http://bbc.in/pdTHqe"), "http://www.bbc.co.uk"
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_too_many_redirects
|
44
|
+
FakeWeb.register_uri(:get, "http://bbc.in/pdTHqe", :location => "http://url1.com", :status => ["301", "Moved permanently"])
|
45
|
+
FakeWeb.register_uri(:get, "http://url1.com", :location => "http://url2.com", :status => ["301", "Moved permanently"])
|
46
|
+
FakeWeb.register_uri(:get, "http://url2.com", :location => "http://url3.com", :status => ["301", "Moved permanently"])
|
47
|
+
FakeWeb.register_uri(:get, "http://url3.com", :location => "http://url4.com", :status => ["301", "Moved permanently"])
|
48
|
+
FakeWeb.register_uri(:get, "http://url4.com", :location => "http://url5.com", :status => ["301", "Moved permanently"])
|
49
|
+
FakeWeb.register_uri(:get, "http://url5.com", :location => "http://url6.com", :status => ["301", "Moved permanently"])
|
50
|
+
FakeWeb.register_uri(:get, "http://url6.com", :location => "http://url7.com", :status => ["301", "Moved permanently"])
|
51
|
+
FakeWeb.register_uri(:get, "http://url7.com", :status => ["200", "OK"])
|
52
|
+
assert_raise(Linkr::TooManyRedirects) {
|
53
|
+
Linkr.resolve("http://bbc.in/pdTHqe")
|
54
|
+
}
|
55
|
+
end
|
56
|
+
|
57
|
+
def test_relative_urls_in_the_redirect
|
58
|
+
FakeWeb.register_uri(:get, "http://foo.in/duncan", :location => "/fred", :status => ["301", "Moved permanently"])
|
59
|
+
FakeWeb.register_uri(:get, "http://foo.in/fred", :status => ["200", "OK"])
|
60
|
+
assert_equal Linkr.resolve("http://foo.in/duncan"), "http://foo.in/fred"
|
61
|
+
end
|
62
|
+
|
63
|
+
def test_redirect_with_location_in_body
|
64
|
+
FakeWeb.register_uri(:get, "http://foo.in/duncan", :body => "<p><a href='http://bar.in/fred'>Redirecting...</a>", :status => ["301", "Moved permanently"])
|
65
|
+
FakeWeb.register_uri(:get, "http://bar.in/fred", :status => ["200", "OK"])
|
66
|
+
assert_equal Linkr.resolve("http://foo.in/duncan"), "http://bar.in/fred"
|
67
|
+
end
|
68
|
+
end
|
metadata
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: linkr
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Duncan Robertson
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2011-08-03 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: addressable
|
16
|
+
requirement: &70317977716740 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *70317977716740
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: rake
|
27
|
+
requirement: &70317977716280 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
type: :development
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *70317977716280
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: fakeweb
|
38
|
+
requirement: &70317977715860 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
type: :development
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *70317977715860
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: bundler
|
49
|
+
requirement: &70317977715360 !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 1.0.0
|
55
|
+
type: :development
|
56
|
+
prerelease: false
|
57
|
+
version_requirements: *70317977715360
|
58
|
+
description: Resolves urls to the canonical version. It does this by following redirects
|
59
|
+
in the headers or body of the destination url.
|
60
|
+
email:
|
61
|
+
- duncan.robertson@bbc.co.uk
|
62
|
+
executables: []
|
63
|
+
extensions: []
|
64
|
+
extra_rdoc_files: []
|
65
|
+
files:
|
66
|
+
- .gitignore
|
67
|
+
- Gemfile
|
68
|
+
- LICENSE
|
69
|
+
- README.org
|
70
|
+
- Rakefile
|
71
|
+
- lib/linkr.rb
|
72
|
+
- lib/linkr/version.rb
|
73
|
+
- linkr.gemspec
|
74
|
+
- test/helper.rb
|
75
|
+
- test/test_resolve.rb
|
76
|
+
homepage: ''
|
77
|
+
licenses: []
|
78
|
+
post_install_message:
|
79
|
+
rdoc_options: []
|
80
|
+
require_paths:
|
81
|
+
- lib
|
82
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
83
|
+
none: false
|
84
|
+
requirements:
|
85
|
+
- - ! '>='
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
version: '0'
|
88
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ! '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: 1.3.6
|
94
|
+
requirements: []
|
95
|
+
rubyforge_project:
|
96
|
+
rubygems_version: 1.8.6
|
97
|
+
signing_key:
|
98
|
+
specification_version: 3
|
99
|
+
summary: Resolves urls to the canonical version
|
100
|
+
test_files:
|
101
|
+
- test/helper.rb
|
102
|
+
- test/test_resolve.rb
|