postrank-uri 1.0.5 → 1.0.6
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/postrank-uri.rb +17 -1
- data/lib/postrank-uri/version.rb +1 -1
- data/spec/postrank-uri_spec.rb +23 -0
- metadata +3 -3
data/lib/postrank-uri.rb
CHANGED
@@ -132,16 +132,32 @@ module PostRank
|
|
132
132
|
|
133
133
|
def c18n(uri)
|
134
134
|
u = parse(uri)
|
135
|
+
u = embedded(u)
|
135
136
|
|
136
137
|
if q = u.query_values(:notation => :flat_array)
|
137
138
|
q.delete_if { |k,v| C18N[:global].include?(k) }
|
138
139
|
q.delete_if { |k,v| C18N[:hosts].find {|r,p| u.host =~ r && p.include?(k) } }
|
139
140
|
end
|
140
|
-
|
141
141
|
u.query_values = q
|
142
|
+
|
143
|
+
if u.host == 'twitter.com' && u.fragment.match(/^!(.*)/)
|
144
|
+
u.fragment = nil
|
145
|
+
u.path = $1
|
146
|
+
end
|
147
|
+
|
142
148
|
u
|
143
149
|
end
|
144
150
|
|
151
|
+
def embedded(uri)
|
152
|
+
if uri.host == 'news.google.com' && uri.path == '/news/url' \
|
153
|
+
|| uri.host == 'xfruits.com'
|
154
|
+
|
155
|
+
embedded = uri.query_values['url']
|
156
|
+
uri = clean(embedded, false) if embedded
|
157
|
+
end
|
158
|
+
uri
|
159
|
+
end
|
160
|
+
|
145
161
|
def parse(uri)
|
146
162
|
return uri if uri.is_a? Addressable::URI
|
147
163
|
|
data/lib/postrank-uri/version.rb
CHANGED
data/spec/postrank-uri_spec.rb
CHANGED
@@ -112,7 +112,26 @@ describe PostRank::URI do
|
|
112
112
|
c('igvita.com/?id=a&utm_source=a&awesm=b').should == 'http://igvita.com/?id=a'
|
113
113
|
c('igvita.com/?id=a&sms_ss=a').should == 'http://igvita.com/?id=a'
|
114
114
|
end
|
115
|
+
end
|
115
116
|
|
117
|
+
context "hashbang" do
|
118
|
+
it "should rewrite twitter links to crawlable versions" do
|
119
|
+
c('http://twitter.com/#!/igrigorik').should == 'http://twitter.com/igrigorik'
|
120
|
+
c('http://twitter.com/#!/a/statuses/1').should == 'http://twitter.com/a/statuses/1'
|
121
|
+
c('http://nontwitter.com/#!/a/statuses/1').should == 'http://nontwitter.com/#!/a/statuses/1'
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
context "embedded links" do
|
126
|
+
it "should extract embedded redirects from Google News" do
|
127
|
+
u = c('http://news.google.com/news/url?sa=t&fd=R&&url=http://www.ctv.ca/CTVNews/Politics/20110111/')
|
128
|
+
u.should == 'http://www.ctv.ca/CTVNews/Politics/20110111/'
|
129
|
+
end
|
130
|
+
|
131
|
+
it "should extract embedded redirects from xfruits.com" do
|
132
|
+
u = c('http://xfruits.com/MrGroar/?url=http%3A%2F%2Faap.lesroyaumes.com%2Fdepeches%2Fdepeche351820908.html')
|
133
|
+
u.should == 'http://aap.lesroyaumes.com/depeches/depeche351820908.html'
|
134
|
+
end
|
116
135
|
end
|
117
136
|
end
|
118
137
|
|
@@ -171,6 +190,10 @@ describe PostRank::URI do
|
|
171
190
|
end
|
172
191
|
end
|
173
192
|
|
193
|
+
it "should extract twitter links with hashbangs" do
|
194
|
+
e('test http://twitter.com/#!/igrigorik').should include('http://twitter.com/igrigorik')
|
195
|
+
end
|
196
|
+
|
174
197
|
it "should handle a URL that comes after text without a space" do
|
175
198
|
e("text:http://spn.tw/tfnLT").should include("http://spn.tw/tfnLT")
|
176
199
|
e("text;http://spn.tw/tfnLT").should include("http://spn.tw/tfnLT")
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 1
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 1.0.
|
8
|
+
- 6
|
9
|
+
version: 1.0.6
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Ilya Grigorik
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-02-
|
17
|
+
date: 2011-02-22 00:00:00 -05:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|