unshorten 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/unshorten +10 -0
- data/lib/unshorten.rb +86 -0
- data/test/test_unshorten.rb +16 -0
- metadata +50 -0
data/bin/unshorten
ADDED
data/lib/unshorten.rb
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
require 'uri'
|
3
|
+
|
4
|
+
# Get original URLs from shortened ones.
|
5
|
+
class Unshorten
|
6
|
+
|
7
|
+
# Cache entities limit
|
8
|
+
CACHE_SIZE_LIMIT = 1024
|
9
|
+
|
10
|
+
# Default options for unshorten
|
11
|
+
DEFAULT_OPTIONS = {
|
12
|
+
:max_level => 10,
|
13
|
+
:timeout => 2,
|
14
|
+
:use_cache => true,
|
15
|
+
:add_missing_http => true
|
16
|
+
}
|
17
|
+
|
18
|
+
@@cache = { }
|
19
|
+
|
20
|
+
class << self
|
21
|
+
|
22
|
+
# Unshorten a URL
|
23
|
+
#
|
24
|
+
# @param url [String] A shortened URL
|
25
|
+
# @param options [Hash] A set of options
|
26
|
+
# @option options [Integer] :max_level Max redirect times
|
27
|
+
# @option options [Integer] :timeout Timeout in seconds, for every request
|
28
|
+
# @option options [Boolean] :use_cache Use cached result if available
|
29
|
+
# @option options [Boolean] :add_missing_http add 'http://' if missing
|
30
|
+
# @see DEFAULT_OPTIONS
|
31
|
+
#
|
32
|
+
# @return Original url, a url that does not redirect
|
33
|
+
def unshorten(url, options = {})
|
34
|
+
DEFAULT_OPTIONS.each { |k, v| (options[k] = v) unless options.has_key? k }
|
35
|
+
|
36
|
+
follow(url, options)
|
37
|
+
end
|
38
|
+
|
39
|
+
alias :'[]' :unshorten
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def expire_cache #:nodoc:
|
44
|
+
@@cache = { }
|
45
|
+
end
|
46
|
+
|
47
|
+
def mix_options(old, *new) #:nodoc:
|
48
|
+
options = old.dup
|
49
|
+
new.each { |n| n.each { |k, v| options[k] = v } }
|
50
|
+
options
|
51
|
+
end
|
52
|
+
|
53
|
+
def add_missing_http(url) #:nodoc:
|
54
|
+
if url =~ /^https?:/i
|
55
|
+
url
|
56
|
+
else
|
57
|
+
"http://#{url}"
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def follow(url, options = DEFAULT_OPTIONS, level = 0) #:nodoc:
|
62
|
+
return @@cache[url] if options[:use_cache] and @@cache[url]
|
63
|
+
|
64
|
+
url = add_missing_http(url) if options[:add_missing_http]
|
65
|
+
return url if level >= options[:max_level]
|
66
|
+
|
67
|
+
uri = URI.parse(url)
|
68
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
69
|
+
http.read_timeout = options[:timeout]
|
70
|
+
|
71
|
+
response = http.request_head(uri.path.empty? ? '/' : uri.path) rescue nil
|
72
|
+
|
73
|
+
if response.is_a? Net::HTTPRedirection and response['location'] then
|
74
|
+
expire_cache if @@cache.size > CACHE_SIZE_LIMIT
|
75
|
+
@@cache[url] = follow(response['location'], options, level + 1)
|
76
|
+
else
|
77
|
+
url
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
84
|
+
|
85
|
+
# vim:et:ts=2 sw=2
|
86
|
+
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'unshorten'
|
3
|
+
|
4
|
+
class UnshortenTest < Test::Unit::TestCase
|
5
|
+
ORIGINAL_URL = 'http://dir.yahoo.com/Reference/Libraries/Library_and_Information_Science/Metadata/URIs___Universal_Resource_Identifiers/URLs___Uniform_Resource_Locators/URL_Shortening/'
|
6
|
+
SHORTENED_URL = 'http://tinyurl.com/j'
|
7
|
+
|
8
|
+
def test_unshorten_alias
|
9
|
+
assert_equal ORIGINAL_URL, Unshorten[SHORTENED_URL, :use_cache => false]
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_option_max_level
|
13
|
+
assert_equal SHORTENED_URL, Unshorten.unshorten(SHORTENED_URL, :max_level => 0, :use_cache => false)
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
metadata
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: unshorten
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Wu Jun
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2011-12-10 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description: Get original URLs from shortened ones
|
15
|
+
email: quark@lihdd.net
|
16
|
+
executables:
|
17
|
+
- unshorten
|
18
|
+
extensions: []
|
19
|
+
extra_rdoc_files: []
|
20
|
+
files:
|
21
|
+
- lib/unshorten.rb
|
22
|
+
- test/test_unshorten.rb
|
23
|
+
- bin/unshorten
|
24
|
+
homepage: https://github.com/quark-zju/unshorten
|
25
|
+
licenses: []
|
26
|
+
post_install_message:
|
27
|
+
rdoc_options: []
|
28
|
+
require_paths:
|
29
|
+
- lib
|
30
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
31
|
+
none: false
|
32
|
+
requirements:
|
33
|
+
- - ! '>='
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: '0'
|
36
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
37
|
+
none: false
|
38
|
+
requirements:
|
39
|
+
- - ! '>='
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '0'
|
42
|
+
requirements: []
|
43
|
+
rubyforge_project:
|
44
|
+
rubygems_version: 1.8.11
|
45
|
+
signing_key:
|
46
|
+
specification_version: 3
|
47
|
+
summary: Unshorten URLs
|
48
|
+
test_files:
|
49
|
+
- test/test_unshorten.rb
|
50
|
+
has_rdoc:
|