postrank-uri 1.0.7 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,9 +11,11 @@
11
11
  :hosts:
12
12
  nytimes.com:
13
13
  - partner
14
+ - pagewanted
14
15
  - emc
15
16
  - _r
16
17
  - ref
18
+ - src
17
19
  diepresse.com:
18
20
  - _vl_backlink
19
21
  washingtonpost.com:
@@ -41,4 +43,8 @@
41
43
  cnet.com:
42
44
  - part
43
45
  - subj
44
- - tag
46
+ - tag
47
+ wsj.com:
48
+ - mod
49
+ allthingsd.com:
50
+ - mod
@@ -1,5 +1,5 @@
1
1
  module PostRank
2
2
  module URI
3
- VERSION = "1.0.7"
3
+ VERSION = "1.0.8"
4
4
  end
5
5
  end
data/lib/postrank-uri.rb CHANGED
@@ -125,6 +125,7 @@ module PostRank
125
125
  def normalize(uri)
126
126
  u = parse(uri)
127
127
  u.path = u.path.squeeze('/')
128
+ u.path = u.path.chomp('/') if u.path.size != 1
128
129
  u.query = nil if u.query && u.query.empty?
129
130
  u.fragment = nil
130
131
  u
data/spec/c18n_hosts.yml CHANGED
@@ -5,11 +5,23 @@
5
5
  - - http://www.nytimes.com/2011/02/20/magazine/20FOB-Medium-t.html?ref=magazine
6
6
  - http://www.nytimes.com/2011/02/20/magazine/20FOB-Medium-t.html
7
7
 
8
+ - - http://www.nytimes.com/2011/03/13/business/13hire.html?pagewanted=1&_r=1&ref=technology
9
+ - http://www.nytimes.com/2011/03/13/business/13hire.html
10
+
11
+ - - http://www.nytimes.com/2011/03/15/business/media/15adco.html?_r=2&src=recg
12
+ - http://www.nytimes.com/2011/03/15/business/media/15adco.html
13
+
14
+ - - http://networkeffect.allthingsd.com/20110308/googles-approach-to-social/?mod=tweet
15
+ - http://networkeffect.allthingsd.com/20110308/googles-approach-to-social
16
+
17
+ - - http://online.wsj.com/article/SB10001424052748704657704576150191661959856.html?mod=WSJ_hp_LEFTWhatsNewsCollection
18
+ - http://online.wsj.com/article/SB10001424052748704657704576150191661959856.html
19
+
8
20
  - - http://diepresse.com/home/wirtschaft/636448/Griechenland_Drachme-als-letzte-Rettung?_vl_backlink=%2Fhome
9
21
  - http://diepresse.com/home/wirtschaft/636448/Griechenland_Drachme-als-letzte-Rettung
10
22
 
11
23
  - - http://dotearth.blogs.nytimes.com/2010/12/14/beyond-political-science/?partner=rss&emc=rss
12
- - http://dotearth.blogs.nytimes.com/2010/12/14/beyond-political-science/
24
+ - http://dotearth.blogs.nytimes.com/2010/12/14/beyond-political-science
13
25
 
14
26
  - - http://www.washingtonpost.com/wp-dyn/content/article/2010/12/14/AR2010121406045.html?nav=rss_email/components
15
27
  - http://www.washingtonpost.com/wp-dyn/content/article/2010/12/14/AR2010121406045.html
@@ -36,7 +48,7 @@
36
48
  - http://www.dw-world.de/dw/article/0,,6330472,00.html
37
49
 
38
50
  - - http://www.repubblica.it/rubriche/il-caso-del-giorno/2010/12/13/news/riscossa_aeffe-10153565/?rss
39
- - http://www.repubblica.it/rubriche/il-caso-del-giorno/2010/12/13/news/riscossa_aeffe-10153565/
51
+ - http://www.repubblica.it/rubriche/il-caso-del-giorno/2010/12/13/news/riscossa_aeffe-10153565
40
52
 
41
53
  - - http://www.welt.de/sport/Der-Hoellenritt-des-Fussball-Profis-Jean-Marc-Bosman.html?wtmc=RSS.Sport.Fussball
42
54
  - http://www.welt.de/sport/Der-Hoellenritt-des-Fussball-Profis-Jean-Marc-Bosman.html
@@ -86,6 +86,16 @@ describe PostRank::URI do
86
86
  n('IGVITA.COM/ABC').should == (igvita + "ABC")
87
87
  end
88
88
 
89
+ it "should remove trailing slash on paths" do
90
+ n('http://igvita.com/').should == 'http://igvita.com/'
91
+
92
+ n('http://igvita.com/a').should == 'http://igvita.com/a'
93
+ n('http://igvita.com/a/').should == 'http://igvita.com/a'
94
+
95
+ n('http://igvita.com/a/b').should == 'http://igvita.com/a/b'
96
+ n('http://igvita.com/a/b/').should == 'http://igvita.com/a/b'
97
+ end
98
+
89
99
  end
90
100
 
91
101
  context "canonicalization" do
@@ -125,7 +135,7 @@ describe PostRank::URI do
125
135
  context "embedded links" do
126
136
  it "should extract embedded redirects from Google News" do
127
137
  u = c('http://news.google.com/news/url?sa=t&fd=R&&url=http://www.ctv.ca/CTVNews/Politics/20110111/')
128
- u.should == 'http://www.ctv.ca/CTVNews/Politics/20110111/'
138
+ u.should == 'http://www.ctv.ca/CTVNews/Politics/20110111'
129
139
  end
130
140
 
131
141
  it "should extract embedded redirects from xfruits.com" do
@@ -135,7 +145,7 @@ describe PostRank::URI do
135
145
 
136
146
  it "should extract embedded redirects from MySpace" do
137
147
  u = c('http://www.myspace.com/Modules/PostTo/Pages/?u=http%3A%2F%2Fghanaian-chronicle.com%2Fnews%2Fother-news%2Fcanadian-high-commissioner-urges-media%2F&t=Canadian%20High%20Commissioner%20urges%20media')
138
- u.should == 'http://ghanaian-chronicle.com/news/other-news/canadian-high-commissioner-urges-media/'
148
+ u.should == 'http://ghanaian-chronicle.com/news/other-news/canadian-high-commissioner-urges-media'
139
149
  end
140
150
  end
141
151
  end
@@ -172,11 +182,12 @@ describe PostRank::URI do
172
182
  end
173
183
 
174
184
  it "should compute MD5 hash of the normalized URI" do
175
- hash = '021a1032b1ea631a7c33d1a0ccc562bf'
185
+ hash = '55fae8910d312b7878a3201ed653b881'
176
186
 
177
- h('http://EverBurnign.Com/feed/post/1').should == hash
178
- h('Everburnign.com/feed/post/1').should == hash
179
- h('everburnign.com/feed/post/1').should == hash
187
+ h('http://EverBurning.Com/feed/post/1').should == hash
188
+ h('Everburning.com/feed/post/1').should == hash
189
+ h('everburning.com/feed/post/1').should == hash
190
+ h('everburning.com/feed/post/1/').should == hash
180
191
  end
181
192
  end
182
193
 
metadata CHANGED
@@ -1,12 +1,8 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: postrank-uri
3
3
  version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 1
7
- - 0
8
- - 7
9
- version: 1.0.7
4
+ prerelease:
5
+ version: 1.0.8
10
6
  platform: ruby
11
7
  authors:
12
8
  - Ilya Grigorik
@@ -14,7 +10,7 @@ autorequire:
14
10
  bindir: bin
15
11
  cert_chain: []
16
12
 
17
- date: 2011-02-22 00:00:00 -05:00
13
+ date: 2011-03-31 00:00:00 -04:00
18
14
  default_executable:
19
15
  dependencies:
20
16
  - !ruby/object:Gem::Dependency
@@ -25,10 +21,6 @@ dependencies:
25
21
  requirements:
26
22
  - - ">="
27
23
  - !ruby/object:Gem::Version
28
- segments:
29
- - 2
30
- - 2
31
- - 3
32
24
  version: 2.2.3
33
25
  type: :runtime
34
26
  version_requirements: *id001
@@ -40,8 +32,6 @@ dependencies:
40
32
  requirements:
41
33
  - - ">="
42
34
  - !ruby/object:Gem::Version
43
- segments:
44
- - 0
45
35
  version: "0"
46
36
  type: :runtime
47
37
  version_requirements: *id002
@@ -53,8 +43,6 @@ dependencies:
53
43
  requirements:
54
44
  - - ">="
55
45
  - !ruby/object:Gem::Version
56
- segments:
57
- - 0
58
46
  version: "0"
59
47
  type: :runtime
60
48
  version_requirements: *id003
@@ -66,8 +54,6 @@ dependencies:
66
54
  requirements:
67
55
  - - ">="
68
56
  - !ruby/object:Gem::Version
69
- segments:
70
- - 0
71
57
  version: "0"
72
58
  type: :development
73
59
  version_requirements: *id004
@@ -107,21 +93,17 @@ required_ruby_version: !ruby/object:Gem::Requirement
107
93
  requirements:
108
94
  - - ">="
109
95
  - !ruby/object:Gem::Version
110
- segments:
111
- - 0
112
96
  version: "0"
113
97
  required_rubygems_version: !ruby/object:Gem::Requirement
114
98
  none: false
115
99
  requirements:
116
100
  - - ">="
117
101
  - !ruby/object:Gem::Version
118
- segments:
119
- - 0
120
102
  version: "0"
121
103
  requirements: []
122
104
 
123
105
  rubyforge_project: postrank-uri
124
- rubygems_version: 1.3.7
106
+ rubygems_version: 1.6.2
125
107
  signing_key:
126
108
  specification_version: 3
127
109
  summary: URI normalization, c18n, escaping, and extraction