postrank-uri 1.0.7 → 1.0.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -11,9 +11,11 @@
11
11
  :hosts:
12
12
  nytimes.com:
13
13
  - partner
14
+ - pagewanted
14
15
  - emc
15
16
  - _r
16
17
  - ref
18
+ - src
17
19
  diepresse.com:
18
20
  - _vl_backlink
19
21
  washingtonpost.com:
@@ -41,4 +43,8 @@
41
43
  cnet.com:
42
44
  - part
43
45
  - subj
44
- - tag
46
+ - tag
47
+ wsj.com:
48
+ - mod
49
+ allthingsd.com:
50
+ - mod
@@ -1,5 +1,5 @@
1
1
  module PostRank
2
2
  module URI
3
- VERSION = "1.0.7"
3
+ VERSION = "1.0.8"
4
4
  end
5
5
  end
data/lib/postrank-uri.rb CHANGED
@@ -125,6 +125,7 @@ module PostRank
125
125
  def normalize(uri)
126
126
  u = parse(uri)
127
127
  u.path = u.path.squeeze('/')
128
+ u.path = u.path.chomp('/') if u.path.size != 1
128
129
  u.query = nil if u.query && u.query.empty?
129
130
  u.fragment = nil
130
131
  u
data/spec/c18n_hosts.yml CHANGED
@@ -5,11 +5,23 @@
5
5
  - - http://www.nytimes.com/2011/02/20/magazine/20FOB-Medium-t.html?ref=magazine
6
6
  - http://www.nytimes.com/2011/02/20/magazine/20FOB-Medium-t.html
7
7
 
8
+ - - http://www.nytimes.com/2011/03/13/business/13hire.html?pagewanted=1&_r=1&ref=technology
9
+ - http://www.nytimes.com/2011/03/13/business/13hire.html
10
+
11
+ - - http://www.nytimes.com/2011/03/15/business/media/15adco.html?_r=2&src=recg
12
+ - http://www.nytimes.com/2011/03/15/business/media/15adco.html
13
+
14
+ - - http://networkeffect.allthingsd.com/20110308/googles-approach-to-social/?mod=tweet
15
+ - http://networkeffect.allthingsd.com/20110308/googles-approach-to-social
16
+
17
+ - - http://online.wsj.com/article/SB10001424052748704657704576150191661959856.html?mod=WSJ_hp_LEFTWhatsNewsCollection
18
+ - http://online.wsj.com/article/SB10001424052748704657704576150191661959856.html
19
+
8
20
  - - http://diepresse.com/home/wirtschaft/636448/Griechenland_Drachme-als-letzte-Rettung?_vl_backlink=%2Fhome
9
21
  - http://diepresse.com/home/wirtschaft/636448/Griechenland_Drachme-als-letzte-Rettung
10
22
 
11
23
  - - http://dotearth.blogs.nytimes.com/2010/12/14/beyond-political-science/?partner=rss&emc=rss
12
- - http://dotearth.blogs.nytimes.com/2010/12/14/beyond-political-science/
24
+ - http://dotearth.blogs.nytimes.com/2010/12/14/beyond-political-science
13
25
 
14
26
  - - http://www.washingtonpost.com/wp-dyn/content/article/2010/12/14/AR2010121406045.html?nav=rss_email/components
15
27
  - http://www.washingtonpost.com/wp-dyn/content/article/2010/12/14/AR2010121406045.html
@@ -36,7 +48,7 @@
36
48
  - http://www.dw-world.de/dw/article/0,,6330472,00.html
37
49
 
38
50
  - - http://www.repubblica.it/rubriche/il-caso-del-giorno/2010/12/13/news/riscossa_aeffe-10153565/?rss
39
- - http://www.repubblica.it/rubriche/il-caso-del-giorno/2010/12/13/news/riscossa_aeffe-10153565/
51
+ - http://www.repubblica.it/rubriche/il-caso-del-giorno/2010/12/13/news/riscossa_aeffe-10153565
40
52
 
41
53
  - - http://www.welt.de/sport/Der-Hoellenritt-des-Fussball-Profis-Jean-Marc-Bosman.html?wtmc=RSS.Sport.Fussball
42
54
  - http://www.welt.de/sport/Der-Hoellenritt-des-Fussball-Profis-Jean-Marc-Bosman.html
@@ -86,6 +86,16 @@ describe PostRank::URI do
86
86
  n('IGVITA.COM/ABC').should == (igvita + "ABC")
87
87
  end
88
88
 
89
+ it "should remove trailing slash on paths" do
90
+ n('http://igvita.com/').should == 'http://igvita.com/'
91
+
92
+ n('http://igvita.com/a').should == 'http://igvita.com/a'
93
+ n('http://igvita.com/a/').should == 'http://igvita.com/a'
94
+
95
+ n('http://igvita.com/a/b').should == 'http://igvita.com/a/b'
96
+ n('http://igvita.com/a/b/').should == 'http://igvita.com/a/b'
97
+ end
98
+
89
99
  end
90
100
 
91
101
  context "canonicalization" do
@@ -125,7 +135,7 @@ describe PostRank::URI do
125
135
  context "embedded links" do
126
136
  it "should extract embedded redirects from Google News" do
127
137
  u = c('http://news.google.com/news/url?sa=t&fd=R&&url=http://www.ctv.ca/CTVNews/Politics/20110111/')
128
- u.should == 'http://www.ctv.ca/CTVNews/Politics/20110111/'
138
+ u.should == 'http://www.ctv.ca/CTVNews/Politics/20110111'
129
139
  end
130
140
 
131
141
  it "should extract embedded redirects from xfruits.com" do
@@ -135,7 +145,7 @@ describe PostRank::URI do
135
145
 
136
146
  it "should extract embedded redirects from MySpace" do
137
147
  u = c('http://www.myspace.com/Modules/PostTo/Pages/?u=http%3A%2F%2Fghanaian-chronicle.com%2Fnews%2Fother-news%2Fcanadian-high-commissioner-urges-media%2F&t=Canadian%20High%20Commissioner%20urges%20media')
138
- u.should == 'http://ghanaian-chronicle.com/news/other-news/canadian-high-commissioner-urges-media/'
148
+ u.should == 'http://ghanaian-chronicle.com/news/other-news/canadian-high-commissioner-urges-media'
139
149
  end
140
150
  end
141
151
  end
@@ -172,11 +182,12 @@ describe PostRank::URI do
172
182
  end
173
183
 
174
184
  it "should compute MD5 hash of the normalized URI" do
175
- hash = '021a1032b1ea631a7c33d1a0ccc562bf'
185
+ hash = '55fae8910d312b7878a3201ed653b881'
176
186
 
177
- h('http://EverBurnign.Com/feed/post/1').should == hash
178
- h('Everburnign.com/feed/post/1').should == hash
179
- h('everburnign.com/feed/post/1').should == hash
187
+ h('http://EverBurning.Com/feed/post/1').should == hash
188
+ h('Everburning.com/feed/post/1').should == hash
189
+ h('everburning.com/feed/post/1').should == hash
190
+ h('everburning.com/feed/post/1/').should == hash
180
191
  end
181
192
  end
182
193
 
metadata CHANGED
@@ -1,12 +1,8 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: postrank-uri
3
3
  version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 1
7
- - 0
8
- - 7
9
- version: 1.0.7
4
+ prerelease:
5
+ version: 1.0.8
10
6
  platform: ruby
11
7
  authors:
12
8
  - Ilya Grigorik
@@ -14,7 +10,7 @@ autorequire:
14
10
  bindir: bin
15
11
  cert_chain: []
16
12
 
17
- date: 2011-02-22 00:00:00 -05:00
13
+ date: 2011-03-31 00:00:00 -04:00
18
14
  default_executable:
19
15
  dependencies:
20
16
  - !ruby/object:Gem::Dependency
@@ -25,10 +21,6 @@ dependencies:
25
21
  requirements:
26
22
  - - ">="
27
23
  - !ruby/object:Gem::Version
28
- segments:
29
- - 2
30
- - 2
31
- - 3
32
24
  version: 2.2.3
33
25
  type: :runtime
34
26
  version_requirements: *id001
@@ -40,8 +32,6 @@ dependencies:
40
32
  requirements:
41
33
  - - ">="
42
34
  - !ruby/object:Gem::Version
43
- segments:
44
- - 0
45
35
  version: "0"
46
36
  type: :runtime
47
37
  version_requirements: *id002
@@ -53,8 +43,6 @@ dependencies:
53
43
  requirements:
54
44
  - - ">="
55
45
  - !ruby/object:Gem::Version
56
- segments:
57
- - 0
58
46
  version: "0"
59
47
  type: :runtime
60
48
  version_requirements: *id003
@@ -66,8 +54,6 @@ dependencies:
66
54
  requirements:
67
55
  - - ">="
68
56
  - !ruby/object:Gem::Version
69
- segments:
70
- - 0
71
57
  version: "0"
72
58
  type: :development
73
59
  version_requirements: *id004
@@ -107,21 +93,17 @@ required_ruby_version: !ruby/object:Gem::Requirement
107
93
  requirements:
108
94
  - - ">="
109
95
  - !ruby/object:Gem::Version
110
- segments:
111
- - 0
112
96
  version: "0"
113
97
  required_rubygems_version: !ruby/object:Gem::Requirement
114
98
  none: false
115
99
  requirements:
116
100
  - - ">="
117
101
  - !ruby/object:Gem::Version
118
- segments:
119
- - 0
120
102
  version: "0"
121
103
  requirements: []
122
104
 
123
105
  rubyforge_project: postrank-uri
124
- rubygems_version: 1.3.7
106
+ rubygems_version: 1.6.2
125
107
  signing_key:
126
108
  specification_version: 3
127
109
  summary: URI normalization, c18n, escaping, and extraction