url_expander 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/Gemfile +17 -0
- data/Gemfile.lock +28 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +131 -0
- data/Rakefile +70 -0
- data/VERSION +1 -0
- data/lib/url_expander/expanders/api/4sq.rb +12 -0
- data/lib/url_expander/expanders/api/bitly.rb +61 -0
- data/lib/url_expander/expanders/api/budurl.rb +41 -0
- data/lib/url_expander/expanders/api/cligs.rb +42 -0
- data/lib/url_expander/expanders/api/decenturl.rb +42 -0
- data/lib/url_expander/expanders/api/fxnws.rb +12 -0
- data/lib/url_expander/expanders/api/googl.rb +43 -0
- data/lib/url_expander/expanders/api/isgd.rb +41 -0
- data/lib/url_expander/expanders/api/nytims.rb +12 -0
- data/lib/url_expander/expanders/api/tcrnch.rb +12 -0
- data/lib/url_expander/expanders/api/xrlus.rb +40 -0
- data/lib/url_expander/expanders/api.rb +21 -0
- data/lib/url_expander/expanders/basic/adjix.rb +29 -0
- data/lib/url_expander/expanders/basic/digbig.rb +30 -0
- data/lib/url_expander/expanders/basic/doiop.rb +30 -0
- data/lib/url_expander/expanders/basic/easyurljp.rb +23 -0
- data/lib/url_expander/expanders/basic/justas.rb +23 -0
- data/lib/url_expander/expanders/basic/moourl.rb +29 -0
- data/lib/url_expander/expanders/basic/notlong.rb +29 -0
- data/lib/url_expander/expanders/basic/nutshellurl.rb +30 -0
- data/lib/url_expander/expanders/basic/owly.rb +23 -0
- data/lib/url_expander/expanders/basic/shrtst.rb +23 -0
- data/lib/url_expander/expanders/basic/snipurl.rb +26 -0
- data/lib/url_expander/expanders/basic/tco.rb +23 -0
- data/lib/url_expander/expanders/basic/tighturl.rb +23 -0
- data/lib/url_expander/expanders/basic/tinycc.rb +29 -0
- data/lib/url_expander/expanders/basic/tinyurl.rb +25 -0
- data/lib/url_expander/expanders/basic/twurlnl.rb +34 -0
- data/lib/url_expander/expanders/basic/urlie.rb +23 -0
- data/lib/url_expander/expanders/basic/youtube.rb +24 -0
- data/lib/url_expander/expanders/basic.rb +62 -0
- data/lib/url_expander/expanders/expanders.rb +49 -0
- data/lib/url_expander/expanders/scrape/qsrli.rb +30 -0
- data/lib/url_expander/expanders/scrape/shorl.rb +30 -0
- data/lib/url_expander/expanders/scrape/simurl.rb +34 -0
- data/lib/url_expander/expanders/scrape.rb +71 -0
- data/lib/url_expander.rb +62 -0
- data/test/helper.rb +18 -0
- data/test/test_url_expander.rb +7 -0
- metadata +216 -0
@@ -0,0 +1,34 @@
|
|
1
|
+
module UrlExpander
|
2
|
+
module Expanders
|
3
|
+
#
|
4
|
+
# Expand http://simurl.com/ URLS
|
5
|
+
# Usage:
|
6
|
+
# UrlExpander::Client.expand("http://simurl.com/fendaz")
|
7
|
+
#
|
8
|
+
class Simurl < UrlExpander::Expanders::Scrape
|
9
|
+
PATTERN = %r'(http://simurl\.com(/[\w/]+))'
|
10
|
+
attr_reader :parent_klass
|
11
|
+
|
12
|
+
def initialize(short_url="", options={})
|
13
|
+
@parent_klass = self.class
|
14
|
+
super(short_url, options)
|
15
|
+
end
|
16
|
+
|
17
|
+
# How to scarpe the url from a Qsr.li html document
|
18
|
+
def self.scrape_url(html)
|
19
|
+
doc = Hpricot(html)
|
20
|
+
str = doc.at("meta").attributes['content']
|
21
|
+
start_slice = str.index("=") +1
|
22
|
+
end_slice = str.size - start_slice
|
23
|
+
url = str.slice(start_slice, end_slice)
|
24
|
+
raise UrlExpander::Error.new('page not found',404) if url == "/index.php"
|
25
|
+
url
|
26
|
+
end
|
27
|
+
|
28
|
+
class Request
|
29
|
+
include HTTParty
|
30
|
+
base_uri 'http://simurl.com'
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
require 'Hpricot'
|
2
|
+
|
3
|
+
module UrlExpander
|
4
|
+
module Expanders
|
5
|
+
|
6
|
+
# Some websites don't follow the coding standards. They don't provide an
|
7
|
+
# api and they don't provide 301 redirect. The only way to get the shorten
|
8
|
+
# url is by parsing the returned html doc.
|
9
|
+
#
|
10
|
+
# To use the Scrape class, define your class inside scrape folder. Your class must
|
11
|
+
# provide the following:
|
12
|
+
# def initialize(short_url="", options={})
|
13
|
+
# def self.scrape_url(html)
|
14
|
+
# class Request
|
15
|
+
#
|
16
|
+
# Example:
|
17
|
+
# class Qsrli < UrlExpander::Expanders::Scrape
|
18
|
+
# PATTERN = %r'(http://qsr\.li(/[\w/]+))'
|
19
|
+
# attr_reader :parent_klass, :xpath
|
20
|
+
#
|
21
|
+
# def initialize(short_url="", options={})
|
22
|
+
# @parent_klass = self.class
|
23
|
+
# super(short_url, options)
|
24
|
+
# end
|
25
|
+
#
|
26
|
+
# def self.scrape_url(html)
|
27
|
+
# doc = Hpricot(html)
|
28
|
+
# doc.at('//*[@id="framecontent"]').attributes["src"]
|
29
|
+
# end
|
30
|
+
#
|
31
|
+
# class Request
|
32
|
+
# include HTTParty
|
33
|
+
# base_uri 'http://qsr.li'
|
34
|
+
# end
|
35
|
+
# end
|
36
|
+
class Scrape
|
37
|
+
attr_accessor :long_url
|
38
|
+
attr_reader :parttern, :parent_klass
|
39
|
+
|
40
|
+
def initialize(short_url="",options={})
|
41
|
+
if short_url.match(parent_klass::PATTERN)
|
42
|
+
path = $2
|
43
|
+
else
|
44
|
+
raise 'invalid pattern'
|
45
|
+
end
|
46
|
+
|
47
|
+
@long_url = fetch_url(path)
|
48
|
+
end
|
49
|
+
|
50
|
+
|
51
|
+
protected
|
52
|
+
|
53
|
+
def fetch_url(path)
|
54
|
+
url = nil
|
55
|
+
result = parent_klass::Request.get(path, :follow_redirects => false)
|
56
|
+
case result.response
|
57
|
+
when Net::HTTPMovedPermanently
|
58
|
+
url = result['Location']
|
59
|
+
when Net::HTTPFound
|
60
|
+
url = result['location']
|
61
|
+
when Net::HTTPOK
|
62
|
+
url = parent_klass.scrape_url(result.response.body)
|
63
|
+
end
|
64
|
+
|
65
|
+
raise UrlExpander::Error.new('page not found',404) if url.nil?
|
66
|
+
url
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
data/lib/url_expander.rb
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'httparty'
|
3
|
+
|
4
|
+
$:.unshift(File.dirname(__FILE__))
|
5
|
+
|
6
|
+
require 'url_expander/expanders/expanders'
|
7
|
+
|
8
|
+
module UrlExpander
|
9
|
+
class Client
|
10
|
+
# Expand a given url.
|
11
|
+
def self.expand(url="",options = {})
|
12
|
+
|
13
|
+
# Setup the default options
|
14
|
+
options[:nested_shortening] = true unless options.has_key?(:nested_shortening)
|
15
|
+
if options[:nested_shortening]
|
16
|
+
options[:limit] = 10 unless options.has_key?(:limit)
|
17
|
+
end
|
18
|
+
options[:config_file] = '~/url_expander_credentials.yml' unless options[:config_file]
|
19
|
+
|
20
|
+
# We Reached the maximum number of redirections, quit.
|
21
|
+
raise ArgumentError, 'HTTP redirect too deep' if options[:nested_shortening] && options[:limit] == 0
|
22
|
+
|
23
|
+
# Make sure we have a url
|
24
|
+
raise ArgumentError.new('Expander requires a short url') if url.nil? || url.empty?
|
25
|
+
exclude_klasses = ['UrlExpander::Expanders::Basic', 'UrlExpander::Expanders::API', 'UrlExpander::Expanders::Scrape']
|
26
|
+
|
27
|
+
# Get the names for all the expanders except the basic default one.
|
28
|
+
mod = UrlExpander::Expanders
|
29
|
+
expanders = mod.constants.collect{|c| mod.const_get(c)}.select{|c| c.class == Class && !exclude_klasses.include?(c.to_s)}
|
30
|
+
expander_klass = nil
|
31
|
+
|
32
|
+
# Find the correct expander
|
33
|
+
expanders.each do |exp|
|
34
|
+
if(exp::PATTERN.match(url))
|
35
|
+
expander_klass = exp
|
36
|
+
end
|
37
|
+
end
|
38
|
+
@expander = (!expander_klass.nil?) ? expander_klass.new(url,options) : nil
|
39
|
+
|
40
|
+
if @expander.nil? && !options[:is_redirection]
|
41
|
+
raise ArgumentError.new('Unknow url')
|
42
|
+
end
|
43
|
+
|
44
|
+
if options[:nested_shortening] & !@expander.nil?
|
45
|
+
options[:limit] -= 1
|
46
|
+
options[:is_redirection] = true
|
47
|
+
UrlExpander::Client.expand(@expander.long_url, options)
|
48
|
+
else
|
49
|
+
(@expander.nil?) ? url : @expander.long_url
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
class Error < StandardError
|
55
|
+
attr_reader :code
|
56
|
+
alias :msg :message
|
57
|
+
def initialize(msg, code)
|
58
|
+
@code = code
|
59
|
+
super("#{msg} - '#{code}'")
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
data/test/helper.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
begin
|
4
|
+
Bundler.setup(:default, :development)
|
5
|
+
rescue Bundler::BundlerError => e
|
6
|
+
$stderr.puts e.message
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
+
exit e.status_code
|
9
|
+
end
|
10
|
+
require 'test/unit'
|
11
|
+
require 'shoulda'
|
12
|
+
|
13
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
14
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
15
|
+
require 'url_expander'
|
16
|
+
|
17
|
+
class Test::Unit::TestCase
|
18
|
+
end
|
metadata
ADDED
@@ -0,0 +1,216 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: url_expander
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 25
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
- 1
|
10
|
+
version: 0.1.1
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Moski
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-07-16 00:00:00 +03:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
prerelease: false
|
23
|
+
type: :runtime
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 5
|
30
|
+
segments:
|
31
|
+
- 0
|
32
|
+
- 6
|
33
|
+
- 1
|
34
|
+
version: 0.6.1
|
35
|
+
name: httparty
|
36
|
+
version_requirements: *id001
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
prerelease: false
|
39
|
+
type: :runtime
|
40
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
hash: 3
|
46
|
+
segments:
|
47
|
+
- 0
|
48
|
+
version: "0"
|
49
|
+
name: json
|
50
|
+
version_requirements: *id002
|
51
|
+
- !ruby/object:Gem::Dependency
|
52
|
+
prerelease: false
|
53
|
+
type: :runtime
|
54
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
55
|
+
none: false
|
56
|
+
requirements:
|
57
|
+
- - ">="
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
hash: 3
|
60
|
+
segments:
|
61
|
+
- 0
|
62
|
+
version: "0"
|
63
|
+
name: hpricot
|
64
|
+
version_requirements: *id003
|
65
|
+
- !ruby/object:Gem::Dependency
|
66
|
+
prerelease: false
|
67
|
+
type: :development
|
68
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
69
|
+
none: false
|
70
|
+
requirements:
|
71
|
+
- - ">="
|
72
|
+
- !ruby/object:Gem::Version
|
73
|
+
hash: 3
|
74
|
+
segments:
|
75
|
+
- 0
|
76
|
+
version: "0"
|
77
|
+
name: shoulda
|
78
|
+
version_requirements: *id004
|
79
|
+
- !ruby/object:Gem::Dependency
|
80
|
+
prerelease: false
|
81
|
+
type: :development
|
82
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
83
|
+
none: false
|
84
|
+
requirements:
|
85
|
+
- - ~>
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
hash: 23
|
88
|
+
segments:
|
89
|
+
- 1
|
90
|
+
- 0
|
91
|
+
- 0
|
92
|
+
version: 1.0.0
|
93
|
+
name: bundler
|
94
|
+
version_requirements: *id005
|
95
|
+
- !ruby/object:Gem::Dependency
|
96
|
+
prerelease: false
|
97
|
+
type: :development
|
98
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
99
|
+
none: false
|
100
|
+
requirements:
|
101
|
+
- - ~>
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
hash: 7
|
104
|
+
segments:
|
105
|
+
- 1
|
106
|
+
- 6
|
107
|
+
- 4
|
108
|
+
version: 1.6.4
|
109
|
+
name: jeweler
|
110
|
+
version_requirements: *id006
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
prerelease: false
|
113
|
+
type: :development
|
114
|
+
requirement: &id007 !ruby/object:Gem::Requirement
|
115
|
+
none: false
|
116
|
+
requirements:
|
117
|
+
- - ">="
|
118
|
+
- !ruby/object:Gem::Version
|
119
|
+
hash: 3
|
120
|
+
segments:
|
121
|
+
- 0
|
122
|
+
version: "0"
|
123
|
+
name: rcov
|
124
|
+
version_requirements: *id007
|
125
|
+
description: Expand short urls from shortning services shuch as bitly and tinyurl
|
126
|
+
email: abushaikh@gmail.com
|
127
|
+
executables: []
|
128
|
+
|
129
|
+
extensions: []
|
130
|
+
|
131
|
+
extra_rdoc_files:
|
132
|
+
- LICENSE.txt
|
133
|
+
- README.rdoc
|
134
|
+
files:
|
135
|
+
- .document
|
136
|
+
- Gemfile
|
137
|
+
- Gemfile.lock
|
138
|
+
- LICENSE.txt
|
139
|
+
- README.rdoc
|
140
|
+
- Rakefile
|
141
|
+
- VERSION
|
142
|
+
- lib/url_expander.rb
|
143
|
+
- lib/url_expander/expanders/api.rb
|
144
|
+
- lib/url_expander/expanders/api/4sq.rb
|
145
|
+
- lib/url_expander/expanders/api/bitly.rb
|
146
|
+
- lib/url_expander/expanders/api/budurl.rb
|
147
|
+
- lib/url_expander/expanders/api/cligs.rb
|
148
|
+
- lib/url_expander/expanders/api/decenturl.rb
|
149
|
+
- lib/url_expander/expanders/api/fxnws.rb
|
150
|
+
- lib/url_expander/expanders/api/googl.rb
|
151
|
+
- lib/url_expander/expanders/api/isgd.rb
|
152
|
+
- lib/url_expander/expanders/api/nytims.rb
|
153
|
+
- lib/url_expander/expanders/api/tcrnch.rb
|
154
|
+
- lib/url_expander/expanders/api/xrlus.rb
|
155
|
+
- lib/url_expander/expanders/basic.rb
|
156
|
+
- lib/url_expander/expanders/basic/adjix.rb
|
157
|
+
- lib/url_expander/expanders/basic/digbig.rb
|
158
|
+
- lib/url_expander/expanders/basic/doiop.rb
|
159
|
+
- lib/url_expander/expanders/basic/easyurljp.rb
|
160
|
+
- lib/url_expander/expanders/basic/justas.rb
|
161
|
+
- lib/url_expander/expanders/basic/moourl.rb
|
162
|
+
- lib/url_expander/expanders/basic/notlong.rb
|
163
|
+
- lib/url_expander/expanders/basic/nutshellurl.rb
|
164
|
+
- lib/url_expander/expanders/basic/owly.rb
|
165
|
+
- lib/url_expander/expanders/basic/shrtst.rb
|
166
|
+
- lib/url_expander/expanders/basic/snipurl.rb
|
167
|
+
- lib/url_expander/expanders/basic/tco.rb
|
168
|
+
- lib/url_expander/expanders/basic/tighturl.rb
|
169
|
+
- lib/url_expander/expanders/basic/tinycc.rb
|
170
|
+
- lib/url_expander/expanders/basic/tinyurl.rb
|
171
|
+
- lib/url_expander/expanders/basic/twurlnl.rb
|
172
|
+
- lib/url_expander/expanders/basic/urlie.rb
|
173
|
+
- lib/url_expander/expanders/basic/youtube.rb
|
174
|
+
- lib/url_expander/expanders/expanders.rb
|
175
|
+
- lib/url_expander/expanders/scrape.rb
|
176
|
+
- lib/url_expander/expanders/scrape/qsrli.rb
|
177
|
+
- lib/url_expander/expanders/scrape/shorl.rb
|
178
|
+
- lib/url_expander/expanders/scrape/simurl.rb
|
179
|
+
- test/helper.rb
|
180
|
+
- test/test_url_expander.rb
|
181
|
+
has_rdoc: true
|
182
|
+
homepage: http://github.com/moski/url_expander
|
183
|
+
licenses:
|
184
|
+
- MIT
|
185
|
+
post_install_message:
|
186
|
+
rdoc_options: []
|
187
|
+
|
188
|
+
require_paths:
|
189
|
+
- lib
|
190
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
191
|
+
none: false
|
192
|
+
requirements:
|
193
|
+
- - ">="
|
194
|
+
- !ruby/object:Gem::Version
|
195
|
+
hash: 3
|
196
|
+
segments:
|
197
|
+
- 0
|
198
|
+
version: "0"
|
199
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
200
|
+
none: false
|
201
|
+
requirements:
|
202
|
+
- - ">="
|
203
|
+
- !ruby/object:Gem::Version
|
204
|
+
hash: 3
|
205
|
+
segments:
|
206
|
+
- 0
|
207
|
+
version: "0"
|
208
|
+
requirements: []
|
209
|
+
|
210
|
+
rubyforge_project:
|
211
|
+
rubygems_version: 1.6.2
|
212
|
+
signing_key:
|
213
|
+
specification_version: 3
|
214
|
+
summary: Expand short url from different services
|
215
|
+
test_files: []
|
216
|
+
|