postrank-uri 1.0.8 → 1.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/postrank-uri.rb +21 -6
- data/lib/postrank-uri/version.rb +1 -1
- data/spec/postrank-uri_spec.rb +35 -3
- metadata +2 -2
data/lib/postrank-uri.rb
CHANGED
@@ -6,6 +6,21 @@ require 'digest/md5'
|
|
6
6
|
require 'nokogiri'
|
7
7
|
require 'yaml'
|
8
8
|
|
9
|
+
module Addressable
|
10
|
+
class URI
|
11
|
+
def domain
|
12
|
+
begin
|
13
|
+
dp = Domainatrix.parse(self)
|
14
|
+
rescue
|
15
|
+
return nil
|
16
|
+
end
|
17
|
+
|
18
|
+
dom = dp.public_suffix
|
19
|
+
dom = dp.domain.downcase + "." + dom unless dp.domain.empty?
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
9
24
|
module PostRank
|
10
25
|
module URI
|
11
26
|
|
@@ -87,7 +102,7 @@ module PostRank
|
|
87
102
|
urls = []
|
88
103
|
Nokogiri.HTML(text).search('a').each do |a|
|
89
104
|
begin
|
90
|
-
url = clean(a.attr('href'),
|
105
|
+
url = clean(a.attr('href'), :raw => true)
|
91
106
|
if url.host.empty?
|
92
107
|
next if host.nil?
|
93
108
|
url.host = host
|
@@ -113,13 +128,13 @@ module PostRank
|
|
113
128
|
end
|
114
129
|
end
|
115
130
|
|
116
|
-
def clean(uri,
|
131
|
+
def clean(uri, opts = {})
|
117
132
|
uri = normalize(c18n(unescape(uri)))
|
118
|
-
|
133
|
+
opts[:raw] ? uri : uri.to_s
|
119
134
|
end
|
120
135
|
|
121
|
-
def hash(uri)
|
122
|
-
Digest::MD5.hexdigest(clean(uri))
|
136
|
+
def hash(uri, opts = {})
|
137
|
+
Digest::MD5.hexdigest(opts[:skip_clean] ? uri : clean(uri))
|
123
138
|
end
|
124
139
|
|
125
140
|
def normalize(uri)
|
@@ -158,7 +173,7 @@ module PostRank
|
|
158
173
|
embedded = uri.query_values['u']
|
159
174
|
end
|
160
175
|
|
161
|
-
uri = clean(embedded,
|
176
|
+
uri = clean(embedded, :raw => true) if embedded
|
162
177
|
uri
|
163
178
|
end
|
164
179
|
|
data/lib/postrank-uri/version.rb
CHANGED
data/spec/postrank-uri_spec.rb
CHANGED
@@ -177,8 +177,8 @@ describe PostRank::URI do
|
|
177
177
|
end
|
178
178
|
|
179
179
|
context "hash" do
|
180
|
-
def h(uri)
|
181
|
-
PostRank::URI.hash(uri)
|
180
|
+
def h(uri, opts = {})
|
181
|
+
PostRank::URI.hash(uri, opts)
|
182
182
|
end
|
183
183
|
|
184
184
|
it "should compute MD5 hash of the normalized URI" do
|
@@ -189,6 +189,13 @@ describe PostRank::URI do
|
|
189
189
|
h('everburning.com/feed/post/1').should == hash
|
190
190
|
h('everburning.com/feed/post/1/').should == hash
|
191
191
|
end
|
192
|
+
|
193
|
+
it "should not clean the URI if requested" do
|
194
|
+
hash = '55fae8910d312b7878a3201ed653b881'
|
195
|
+
|
196
|
+
h('http://everburning.com/feed/post/1', :skip_clean => true).should == hash
|
197
|
+
h('everburning.com/feed/post/1', :skip_clean => true).should_not == hash
|
198
|
+
end
|
192
199
|
end
|
193
200
|
|
194
201
|
context "extract" do
|
@@ -265,6 +272,31 @@ describe PostRank::URI do
|
|
265
272
|
i.last.should == 'link to stuff'
|
266
273
|
end
|
267
274
|
end
|
268
|
-
end
|
269
275
|
|
276
|
+
context 'domain extraction' do
|
277
|
+
url_list = {"http://alex.pages.example.com" => "example.com",
|
278
|
+
"alex.pages.example.com" => "example.com",
|
279
|
+
"http://example.com/2011/04/01/blah" => "example.com",
|
280
|
+
"http://example.com" => "example.com",
|
281
|
+
"example.com" => "example.com",
|
282
|
+
"ExampLe.com" => "example.com",
|
283
|
+
"ExampLe.com:3000" => "example.com",
|
284
|
+
"http://alex.pages.example.COM" => "example.com",
|
285
|
+
"http://www.example.ag.it/2011/04/01/blah" => "example.ag.it",
|
286
|
+
"ftp://www.example.com/2011/04/01/blah" => nil,
|
287
|
+
"http://com" => nil,
|
288
|
+
"http://alex.pages.examplecom" => nil,
|
289
|
+
"example" => nil,
|
290
|
+
"http://127.0.0.1" => nil,
|
291
|
+
"localhost" => nil
|
292
|
+
}
|
293
|
+
|
294
|
+
url_list.each_pair do |url, expected_result|
|
295
|
+
it "should extract #{expected_result.inspect} from #{url}" do
|
296
|
+
u = PostRank::URI.clean(url, :raw => true)
|
297
|
+
u.domain.should == expected_result
|
298
|
+
end
|
299
|
+
end
|
300
|
+
end
|
301
|
+
end
|
270
302
|
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: postrank-uri
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 1.0.
|
5
|
+
version: 1.0.9
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Ilya Grigorik
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-
|
13
|
+
date: 2011-04-08 00:00:00 -04:00
|
14
14
|
default_executable:
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|