postrank-uri 1.0.5 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/postrank-uri.rb +17 -1
- data/lib/postrank-uri/version.rb +1 -1
- data/spec/postrank-uri_spec.rb +23 -0
- metadata +3 -3
data/lib/postrank-uri.rb
CHANGED
@@ -132,16 +132,32 @@ module PostRank
|
|
132
132
|
|
133
133
|
def c18n(uri)
|
134
134
|
u = parse(uri)
|
135
|
+
u = embedded(u)
|
135
136
|
|
136
137
|
if q = u.query_values(:notation => :flat_array)
|
137
138
|
q.delete_if { |k,v| C18N[:global].include?(k) }
|
138
139
|
q.delete_if { |k,v| C18N[:hosts].find {|r,p| u.host =~ r && p.include?(k) } }
|
139
140
|
end
|
140
|
-
|
141
141
|
u.query_values = q
|
142
|
+
|
143
|
+
if u.host == 'twitter.com' && u.fragment.match(/^!(.*)/)
|
144
|
+
u.fragment = nil
|
145
|
+
u.path = $1
|
146
|
+
end
|
147
|
+
|
142
148
|
u
|
143
149
|
end
|
144
150
|
|
151
|
+
def embedded(uri)
|
152
|
+
if uri.host == 'news.google.com' && uri.path == '/news/url' \
|
153
|
+
|| uri.host == 'xfruits.com'
|
154
|
+
|
155
|
+
embedded = uri.query_values['url']
|
156
|
+
uri = clean(embedded, false) if embedded
|
157
|
+
end
|
158
|
+
uri
|
159
|
+
end
|
160
|
+
|
145
161
|
def parse(uri)
|
146
162
|
return uri if uri.is_a? Addressable::URI
|
147
163
|
|
data/lib/postrank-uri/version.rb
CHANGED
data/spec/postrank-uri_spec.rb
CHANGED
@@ -112,7 +112,26 @@ describe PostRank::URI do
|
|
112
112
|
c('igvita.com/?id=a&utm_source=a&awesm=b').should == 'http://igvita.com/?id=a'
|
113
113
|
c('igvita.com/?id=a&sms_ss=a').should == 'http://igvita.com/?id=a'
|
114
114
|
end
|
115
|
+
end
|
115
116
|
|
117
|
+
context "hashbang" do
|
118
|
+
it "should rewrite twitter links to crawlable versions" do
|
119
|
+
c('http://twitter.com/#!/igrigorik').should == 'http://twitter.com/igrigorik'
|
120
|
+
c('http://twitter.com/#!/a/statuses/1').should == 'http://twitter.com/a/statuses/1'
|
121
|
+
c('http://nontwitter.com/#!/a/statuses/1').should == 'http://nontwitter.com/#!/a/statuses/1'
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
context "embedded links" do
|
126
|
+
it "should extract embedded redirects from Google News" do
|
127
|
+
u = c('http://news.google.com/news/url?sa=t&fd=R&&url=http://www.ctv.ca/CTVNews/Politics/20110111/')
|
128
|
+
u.should == 'http://www.ctv.ca/CTVNews/Politics/20110111/'
|
129
|
+
end
|
130
|
+
|
131
|
+
it "should extract embedded redirects from xfruits.com" do
|
132
|
+
u = c('http://xfruits.com/MrGroar/?url=http%3A%2F%2Faap.lesroyaumes.com%2Fdepeches%2Fdepeche351820908.html')
|
133
|
+
u.should == 'http://aap.lesroyaumes.com/depeches/depeche351820908.html'
|
134
|
+
end
|
116
135
|
end
|
117
136
|
end
|
118
137
|
|
@@ -171,6 +190,10 @@ describe PostRank::URI do
|
|
171
190
|
end
|
172
191
|
end
|
173
192
|
|
193
|
+
it "should extract twitter links with hashbangs" do
|
194
|
+
e('test http://twitter.com/#!/igrigorik').should include('http://twitter.com/igrigorik')
|
195
|
+
end
|
196
|
+
|
174
197
|
it "should handle a URL that comes after text without a space" do
|
175
198
|
e("text:http://spn.tw/tfnLT").should include("http://spn.tw/tfnLT")
|
176
199
|
e("text;http://spn.tw/tfnLT").should include("http://spn.tw/tfnLT")
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 1
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 1.0.
|
8
|
+
- 6
|
9
|
+
version: 1.0.6
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Ilya Grigorik
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-02-
|
17
|
+
date: 2011-02-22 00:00:00 -05:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|