postrank-uri 1.0.8 → 1.0.9

Sign up to get free protection for your applications and to get access to all the features.
data/lib/postrank-uri.rb CHANGED
@@ -6,6 +6,21 @@ require 'digest/md5'
6
6
  require 'nokogiri'
7
7
  require 'yaml'
8
8
 
9
+ module Addressable
10
+ class URI
11
+ def domain
12
+ begin
13
+ dp = Domainatrix.parse(self)
14
+ rescue
15
+ return nil
16
+ end
17
+
18
+ dom = dp.public_suffix
19
+ dom = dp.domain.downcase + "." + dom unless dp.domain.empty?
20
+ end
21
+ end
22
+ end
23
+
9
24
  module PostRank
10
25
  module URI
11
26
 
@@ -87,7 +102,7 @@ module PostRank
87
102
  urls = []
88
103
  Nokogiri.HTML(text).search('a').each do |a|
89
104
  begin
90
- url = clean(a.attr('href'), false)
105
+ url = clean(a.attr('href'), :raw => true)
91
106
  if url.host.empty?
92
107
  next if host.nil?
93
108
  url.host = host
@@ -113,13 +128,13 @@ module PostRank
113
128
  end
114
129
  end
115
130
 
116
- def clean(uri, string = true)
131
+ def clean(uri, opts = {})
117
132
  uri = normalize(c18n(unescape(uri)))
118
- string ? uri.to_s : uri
133
+ opts[:raw] ? uri : uri.to_s
119
134
  end
120
135
 
121
- def hash(uri)
122
- Digest::MD5.hexdigest(clean(uri))
136
+ def hash(uri, opts = {})
137
+ Digest::MD5.hexdigest(opts[:skip_clean] ? uri : clean(uri))
123
138
  end
124
139
 
125
140
  def normalize(uri)
@@ -158,7 +173,7 @@ module PostRank
158
173
  embedded = uri.query_values['u']
159
174
  end
160
175
 
161
- uri = clean(embedded, false) if embedded
176
+ uri = clean(embedded, :raw => true) if embedded
162
177
  uri
163
178
  end
164
179
 
@@ -1,5 +1,5 @@
1
1
  module PostRank
2
2
  module URI
3
- VERSION = "1.0.8"
3
+ VERSION = "1.0.9"
4
4
  end
5
5
  end
@@ -177,8 +177,8 @@ describe PostRank::URI do
177
177
  end
178
178
 
179
179
  context "hash" do
180
- def h(uri)
181
- PostRank::URI.hash(uri)
180
+ def h(uri, opts = {})
181
+ PostRank::URI.hash(uri, opts)
182
182
  end
183
183
 
184
184
  it "should compute MD5 hash of the normalized URI" do
@@ -189,6 +189,13 @@ describe PostRank::URI do
189
189
  h('everburning.com/feed/post/1').should == hash
190
190
  h('everburning.com/feed/post/1/').should == hash
191
191
  end
192
+
193
+ it "should not clean the URI if requested" do
194
+ hash = '55fae8910d312b7878a3201ed653b881'
195
+
196
+ h('http://everburning.com/feed/post/1', :skip_clean => true).should == hash
197
+ h('everburning.com/feed/post/1', :skip_clean => true).should_not == hash
198
+ end
192
199
  end
193
200
 
194
201
  context "extract" do
@@ -265,6 +272,31 @@ describe PostRank::URI do
265
272
  i.last.should == 'link to stuff'
266
273
  end
267
274
  end
268
- end
269
275
 
276
+ context 'domain extraction' do
277
+ url_list = {"http://alex.pages.example.com" => "example.com",
278
+ "alex.pages.example.com" => "example.com",
279
+ "http://example.com/2011/04/01/blah" => "example.com",
280
+ "http://example.com" => "example.com",
281
+ "example.com" => "example.com",
282
+ "ExampLe.com" => "example.com",
283
+ "ExampLe.com:3000" => "example.com",
284
+ "http://alex.pages.example.COM" => "example.com",
285
+ "http://www.example.ag.it/2011/04/01/blah" => "example.ag.it",
286
+ "ftp://www.example.com/2011/04/01/blah" => nil,
287
+ "http://com" => nil,
288
+ "http://alex.pages.examplecom" => nil,
289
+ "example" => nil,
290
+ "http://127.0.0.1" => nil,
291
+ "localhost" => nil
292
+ }
293
+
294
+ url_list.each_pair do |url, expected_result|
295
+ it "should extract #{expected_result.inspect} from #{url}" do
296
+ u = PostRank::URI.clean(url, :raw => true)
297
+ u.domain.should == expected_result
298
+ end
299
+ end
300
+ end
301
+ end
270
302
  end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: postrank-uri
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 1.0.8
5
+ version: 1.0.9
6
6
  platform: ruby
7
7
  authors:
8
8
  - Ilya Grigorik
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-03-31 00:00:00 -04:00
13
+ date: 2011-04-08 00:00:00 -04:00
14
14
  default_executable:
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency