url_parser 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,481 +1,187 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe UrlParser do
4
-
5
- let(:parser) { UrlParser.new(link, clean: true) }
3
+ RSpec.describe UrlParser do
6
4
 
7
5
  it "must be defined" do
8
6
  expect(UrlParser::VERSION).not_to be_nil
9
7
  end
10
8
 
11
- context "::SCHEMES" do
12
-
13
- it { expect( UrlParser::SCHEMES).to be_an Array }
14
-
15
- end
16
-
17
- context "::DEFAULT_SCHEMES" do
18
-
19
- it { expect( UrlParser::DEFAULT_SCHEMES).to be_an Array }
20
-
21
- end
22
-
23
- context "::call" do
24
-
25
- let(:link) { 'http://example.com/' }
26
- let(:text) { "there is a #{link} in here" }
27
- let(:extractor) { UrlParser.call(text, clean: true) }
28
-
29
- it "extracts urls from text into an array" do
30
- expect(extractor.collect(&:url).collect(&:to_s))
31
- .to include link
32
- end
33
-
34
- it "initializes each url with the parser" do
35
- expect(extractor.first).to be_a UrlParser::Base
36
- end
37
-
38
- end
39
-
40
- context "::new" do
41
-
42
- let(:link) { 'http://example.com/path' }
9
+ context "configuration" do
43
10
 
44
- it "initializes a parser with a url" do
45
- expect(parser.to_s).to eq link
46
- end
47
-
48
- it "adds http by default" do
49
- expect(UrlParser.new('example.com/path').to_s).to eq link
50
- end
51
-
52
- it "adds http to protocol-less urls" do
53
- expect(UrlParser.new('//example.com/path').to_s).to eq link
54
- end
55
-
56
- it "cannot initialize invalid urls" do
57
- expect(UrlParser.new('http:||bra.ziz').url).to be_nil
58
- end
59
-
60
- it "catches errors from invalid urls" do
61
- expect(UrlParser.new('http:||bra.ziz').errors).not_to be_empty
62
- end
63
-
64
- context "options" do
65
-
66
- context ":clean" do
67
-
68
- let(:link) { 'link.to?a=b&utm_source=FeedBurner#stuff' }
69
-
70
- it "when true cleans the url" do
71
- expect(parser.to_s).not_to eq parser.original_url
72
- end
73
-
74
- it "when true it normalizes the url" do
75
- [
76
- 'http://igvita.com/',
77
- 'http://igvita.com///',
78
- 'http://igvita.com/../?#',
79
- 'http://igvita.com/a/../?',
80
- 'http://igvita.com/a/../?utm_source%3Danalytics'
81
- ].each do |url|
82
- expect(UrlParser.new(url, clean: true).to_s)
83
- .to eq 'http://igvita.com/'
84
- end
85
- end
86
-
87
- it "does not clean the url by default" do
88
- expect(UrlParser.new(link).to_s)
89
- .to eq PostRank::URI.parse(parser.original_url).to_s
90
- end
11
+ context ":embedded_params" do
91
12
 
13
+ it "sets the unembed param keys" do
14
+ described_class.configuration.embedded_params = [ 'ref' ]
15
+ uri = UrlParser.unembed('https://www.upwork.com/leaving?ref=https%3A%2F%2Fwww.example.com')
16
+ expect(uri.to_s).to eq 'https://www.example.com/'
17
+ described_class.configuration.reset
92
18
  end
93
19
 
94
- context ":raise_errors" do
95
-
96
- it "raises instead of catching errors" do
97
- expect{
98
- UrlParser.new('http:||bra.ziz', raise_errors: true)
99
- }.to raise_error
100
- end
20
+ end
101
21
 
102
- it "any errors raised inherit from UrlParser::Error" do
103
- expect{
104
- UrlParser.new('http:||bra.ziz', raise_errors: true)
105
- }.to raise_error UrlParser::Error
106
- end
22
+ context ":default_scheme" do
107
23
 
24
+ it "sets a default scheme if one is not present" do
25
+ described_class.configuration.default_scheme = 'https'
26
+ uri = UrlParser.parse('example.com')
27
+ expect(uri.to_s).to eq 'https://example.com/'
28
+ described_class.configuration.reset
108
29
  end
109
30
 
110
31
  end
111
32
 
112
- end
33
+ context ":scheme_map" do
113
34
 
114
- context "#original_url" do
115
-
116
- let(:link) { 'link.to?a=b&utm_source=FeedBurner#stuff' }
35
+ it "replaces scheme keys in the map with the corresponding value" do
36
+ described_class.configuration.scheme_map = { 'feed' => 'http' }
37
+ uri = UrlParser.parse('feed://feeds.feedburner.com/YourBlog')
38
+ expect(uri.to_s).to eq 'http://feeds.feedburner.com/YourBlog'
39
+ described_class.configuration.reset
40
+ end
117
41
 
118
- it "preserves the url input" do
119
- expect(parser.original_url).to eq link
120
42
  end
121
43
 
122
44
  end
123
45
 
124
- context "#url" do
125
-
126
- let(:link) { 'link.to?a=b&utm_source=FeedBurner#stuff' }
46
+ context ".tag_errors" do
127
47
 
128
- it "returns a url" do
129
- expect(parser.url).to be_a Addressable::URI
48
+ it "tags StandardError exceptions" do
49
+ expect{
50
+ described_class.tag_errors{ raise StandardError }
51
+ }.to raise_error UrlParser::Error
130
52
  end
131
53
 
132
- end
133
-
134
- context "#schemes" do
135
-
136
- it "returns an array of allowed schemes" do
137
- parser = UrlParser.new('telnet://some.com', schemes: 'telnet')
138
- expect(parser.schemes).to be_an Array
54
+ it "does not tag errors that do not inherit from StandardError", :disable_raise_error_warning do
55
+ expect{
56
+ described_class.tag_errors{ raise Exception }
57
+ }.not_to raise_error UrlParser::Error
139
58
  end
140
59
 
141
60
  end
142
61
 
143
- context "#parse" do
62
+ context ".new" do
144
63
 
145
- let(:link) { 'link.to?a=b&utm_source=FeedBurner#stuff' }
146
-
147
- it "calls postrank-uri's parse function" do
148
- expect(PostRank::URI).to receive :parse
149
- UrlParser.new(link, clean: false)
64
+ it "is deprecated" do
65
+ expect(described_class).to receive(:warn)
66
+ described_class.new('http://example.com')
150
67
  end
151
68
 
152
- it "tags errors when set to raise errors" do
153
- parser = UrlParser.new(link, clean: true, raise_errors: true)
154
- expect(PostRank::URI).to receive(:parse).and_raise(StandardError)
155
- expect{ parser.send(:parse, link) }.to raise_error UrlParser::Error
69
+ it "calls .parse" do
70
+ expect(described_class).to receive(:warn)
71
+ expect(described_class).to receive(:parse)
72
+ described_class.new('http://example.com')
156
73
  end
157
74
 
158
75
  end
159
76
 
160
- context "#clean" do
161
-
162
- let(:link) { 'link.to?a=b&utm_source=FeedBurner#stuff' }
77
+ context ".escape" do
163
78
 
164
- it "calls postrank-uri's clean function" do
165
- expect(PostRank::URI).to receive :clean
166
- UrlParser.new(link, clean: true)
79
+ it "encodes a string" do
80
+ expect(described_class.escape('id=1')).to eq 'id%3D1'
167
81
  end
168
82
 
169
- it "tags errors" do
170
- parser = UrlParser.new(link, clean: false, raise_errors: true)
171
- expect(PostRank::URI).to receive(:clean).and_raise(StandardError)
172
- expect{ parser.send(:clean, link) }.to raise_error UrlParser::Error
83
+ it "escapes spaces as %20" do
84
+ expect(described_class.escape('id= 1')).to eq 'id%3D%201'
173
85
  end
174
86
 
175
87
  end
176
88
 
177
- context "#parser" do
89
+ context ".unescape" do
178
90
 
179
- let(:link) { 'link.to?a=b&utm_source=FeedBurner#stuff' }
180
-
181
- it "calls postrank-uri's clean function" do
182
- expect(Domainatrix).to receive(:parse).with(parser.to_s)
183
- UrlParser.new(link, clean: true)
91
+ it "decodes a string" do
92
+ expect(described_class.unescape('id%3D1')).to eq 'id=1'
184
93
  end
185
94
 
186
- it "tags errors" do
187
- expect(Domainatrix).to receive(:parse).and_raise(StandardError)
188
- expect{
189
- UrlParser.new(link, clean: false, raise_errors: true)
190
- }.to raise_error UrlParser::Error
95
+ it "unescapes spaces" do
96
+ expect(described_class.unescape('id%3D%201')).to eq 'id= 1'
191
97
  end
192
98
 
193
- end
194
-
195
- context "#clean!" do
196
-
197
- let(:link) { 'link.to?a=b&utm_source=FeedBurner#stuff' }
198
- let(:parser) { UrlParser.new(link) }
99
+ context "accept improperly encoded strings" do
199
100
 
200
- it "normalizes the url" do
201
- parser.clean!
202
- expect(parser.to_s).to eq 'http://link.to/?a=b'
203
- end
204
-
205
- it "resets the parser" do
206
- expect{
207
- parser.clean!
208
- }.to change{
209
- parser.parser
210
- }
211
- end
101
+ it "by unencoding spaces in the query encoded as '+'" do
102
+ expect(described_class.unescape('?id=+1')).to eq '?id= 1'
103
+ end
212
104
 
213
- end
105
+ it "by unencoding spaces in the query encoded as '+'" do
106
+ expect(described_class.unescape('?id%3D+1')).to eq '?id= 1'
107
+ end
214
108
 
215
- context "#to_s" do
109
+ it "by unencoding spaces in the query encoded as '%20'" do
110
+ expect(described_class.unescape('?id=%201')).to eq '?id= 1'
111
+ end
216
112
 
217
- let(:link) { 'http://example.com/' }
113
+ it "but does not unencode '+' to spaces in paths" do
114
+ expect(described_class.unescape('/foo+bar?id=foo+bar')).to eq '/foo+bar?id=foo bar'
115
+ end
218
116
 
219
- it "returns a string representation of the url" do
220
- expect(parser.to_s).to eq 'http://example.com/'
221
117
  end
222
118
 
223
119
  end
224
120
 
225
- context "#hash" do
121
+ context ".parse" do
226
122
 
227
- let(:link) { 'http://example.com/' }
228
-
229
- it "hashes the url string" do
230
- expect(parser.hash).to eq Digest::SHA1.hexdigest(link)
123
+ it "returns an instance of UrlParser::URI" do
124
+ expect(described_class.parse('http://example.com')).to be_a UrlParser::URI
231
125
  end
232
126
 
233
127
  end
234
128
 
235
- context "#valid?" do
129
+ context ".unembed" do
236
130
 
237
- it "returns true if there are no errors" do
238
- expect(UrlParser.new('http://example.com')).to be_valid
131
+ it "returns an instance of UrlParser::URI" do
132
+ expect(described_class.unembed('http://example.com')).to be_a UrlParser::URI
239
133
  end
240
134
 
241
- it "returns false if there are errors" do
242
- expect(UrlParser.new('http:||bra.ziz')).not_to be_valid
135
+ it "parses the URI with the :unembed option enabled" do
136
+ expect(UrlParser::URI).to receive(:new).with('#', hash_including(unembed: true))
137
+ described_class.unembed('#')
243
138
  end
244
139
 
245
140
  end
246
141
 
247
- # Thanks to http://stackoverflow.com/a/4864170
248
- #
249
- context "#join" do
250
-
251
- let(:link) { 'http://foo.com/zee/zaw/zoom.html' }
252
-
253
- it "properly combines a url and and relative url" do
254
- {
255
- 'http://zork.com/' => 'http://zork.com/',
256
- 'http://zork.com/#id' => 'http://zork.com/#id',
257
- 'http://zork.com/bar' => 'http://zork.com/bar',
258
- 'http://zork.com/bar#id' => 'http://zork.com/bar#id',
259
- 'http://zork.com/bar/' => 'http://zork.com/bar/',
260
- 'http://zork.com/bar/#id' => 'http://zork.com/bar/#id',
261
- 'http://zork.com/bar/jim.html' => 'http://zork.com/bar/jim.html',
262
- 'http://zork.com/bar/jim.html#id' => 'http://zork.com/bar/jim.html#id',
263
- '/bar' => 'http://foo.com/bar',
264
- '/bar#id' => 'http://foo.com/bar#id',
265
- '/bar/' => 'http://foo.com/bar/',
266
- '/bar/#id' => 'http://foo.com/bar/#id',
267
- '/bar/jim.html' => 'http://foo.com/bar/jim.html',
268
- '/bar/jim.html#id' => 'http://foo.com/bar/jim.html#id',
269
- 'jim.html' => 'http://foo.com/zee/zaw/jim.html',
270
- 'jim.html#id' => 'http://foo.com/zee/zaw/jim.html#id',
271
- '../jim.html' => 'http://foo.com/zee/jim.html',
272
- '../jim.html#id' => 'http://foo.com/zee/jim.html#id',
273
- '../' => 'http://foo.com/zee/',
274
- '../#id' => 'http://foo.com/zee/#id',
275
- '#id' => 'http://foo.com/zee/zaw/zoom.html#id'
276
- }.each do |relative_url, expected_result|
277
- expect(parser.join(relative_url).to_s).to eq expected_result
278
- end
142
+ context ".canonicalize" do
279
143
 
144
+ it "returns an instance of UrlParser::URI" do
145
+ expect(described_class.canonicalize('http://example.com')).to be_a UrlParser::URI
280
146
  end
281
147
 
282
- end
283
-
284
- # http://medialize.github.io/URI.js/about-uris.html
285
- #
286
- context "uri components" do
287
-
288
- let(:parser) { UrlParser.new(link, clean: false) }
289
-
290
- context "when all are present" do
291
-
292
- let(:link) do
293
- 'https://username:password@ww2.foo.bar.example.com:123/hello/world/there.html?name=ferret#foo'
294
- end
295
-
296
- it { expect(parser.errors).to be_empty }
297
- it { expect(parser).to be_valid }
298
- it { expect(parser.scheme).to eq 'https' }
299
- it { expect(parser.username).to eq 'username' }
300
- it { expect(parser.password).to eq 'password' }
301
- it { expect(parser.userinfo).to eq 'username:password' }
302
- it { expect(parser.www).to eq 'ww2' }
303
- it { expect(parser.subdomain).to eq 'foo.bar' }
304
- it { expect(parser.subdomains).to eq 'ww2.foo.bar' }
305
- it { expect(parser.domain_name).to eq 'example' }
306
- it { expect(parser.domain).to eq 'example.com' }
307
- it { expect(parser.tld).to eq 'com' }
308
- it { expect(parser.hostname).to eq 'ww2.foo.bar.example.com' }
309
- it { expect(parser.port).to eq 123 }
310
- it { expect(parser.host).to eq 'ww2.foo.bar.example.com:123' }
311
- it { expect(parser.origin).to eq 'https://ww2.foo.bar.example.com:123' }
312
- it { expect(parser.authority).to eq 'username:password@ww2.foo.bar.example.com:123' }
313
- it { expect(parser.site).to eq 'https://username:password@ww2.foo.bar.example.com:123' }
314
- it { expect(parser.directory).to eq '/hello/world' }
315
- it { expect(parser.path).to eq '/hello/world/there.html' }
316
- it { expect(parser.segment).to eq 'there.html' }
317
- it { expect(parser.filename).to eq 'there.html' }
318
- it { expect(parser.suffix).to eq 'html' }
319
- it { expect(parser.query).to eq 'name=ferret' }
320
- it { expect(parser.query_values['name']).to eq 'ferret' }
321
- it { expect(parser.fragment).to eq 'foo' }
322
- it { expect(parser.resource).to eq 'there.html?name=ferret#foo' }
323
- end
324
-
325
- context "when none are present" do
326
-
327
- let(:link) { '/' }
328
-
329
- it { expect(parser.errors).to be_empty }
330
- it { expect(parser.scheme).to be_nil }
331
- it { expect(parser.username).to be_nil }
332
- it { expect(parser.password).to be_nil }
333
- it { expect(parser.userinfo).to be_nil }
334
- it { expect(parser.www).to be_nil }
335
- it { expect(parser.subdomain).to be_nil }
336
- it { expect(parser.subdomains).to be_nil }
337
- it { expect(parser.domain_name).to be_nil }
338
- it { expect(parser.domain).to be_nil }
339
- it { expect(parser.tld).to be_nil }
340
- it { expect(parser.hostname).to be_nil }
341
- it { expect(parser.port).to be_nil }
342
- it { expect(parser.host).to be_nil }
343
- it { expect(parser.origin).to be_nil }
344
- it { expect(parser.authority).to be_nil }
345
- it { expect(parser.site).to be_nil }
346
- it { expect(parser.directory).to eq '/' }
347
- it { expect(parser.path).to eq '/' }
348
- it { expect(parser.segment).to be_nil }
349
- it { expect(parser.filename).to eq 'index.html' }
350
- it { expect(parser.suffix).to be_nil }
351
- it { expect(parser.query).to be_nil }
352
- it { expect(parser.query_values['name']).to be_nil }
353
- it { expect(parser.fragment).to be_nil }
354
- it { expect(parser.resource).to be_nil }
355
-
356
- end
357
-
358
- context "when empty" do
359
-
360
- let(:link) { '' }
361
-
362
- it { expect(parser.errors).to be_empty }
363
- it { expect(parser.scheme).to be_nil }
364
- it { expect(parser.username).to be_nil }
365
- it { expect(parser.password).to be_nil }
366
- it { expect(parser.userinfo).to be_nil }
367
- it { expect(parser.www).to be_nil }
368
- it { expect(parser.subdomain).to be_nil }
369
- it { expect(parser.subdomains).to be_nil }
370
- it { expect(parser.domain_name).to be_nil }
371
- it { expect(parser.domain).to be_nil }
372
- it { expect(parser.tld).to be_nil }
373
- it { expect(parser.hostname).to be_nil }
374
- it { expect(parser.port).to be_nil }
375
- it { expect(parser.host).to be_nil }
376
- it { expect(parser.origin).to be_nil }
377
- it { expect(parser.authority).to be_nil }
378
- it { expect(parser.site).to be_nil }
379
- it { expect(parser.directory).to eq '/' }
380
- it { expect(parser.path).to eq '' }
381
- it { expect(parser.segment).to be_nil }
382
- it { expect(parser.filename).to eq 'index.html' }
383
- it { expect(parser.suffix).to be_nil }
384
- it { expect(parser.query).to be_nil }
385
- it { expect(parser.query_values['name']).to be_nil }
386
- it { expect(parser.fragment).to be_nil }
387
- it { expect(parser.resource).to be_nil }
388
-
389
- end
390
-
391
- context "when invalid" do
392
-
393
- let(:link) { 'http://#content-zone' }
394
-
395
- it { expect(parser.errors).not_to be_empty }
396
- it { expect(parser.scheme).to be_nil }
397
- it { expect(parser.username).to be_nil }
398
- it { expect(parser.password).to be_nil }
399
- it { expect(parser.userinfo).to be_nil }
400
- it { expect(parser.www).to be_nil }
401
- it { expect(parser.subdomain).to be_nil }
402
- it { expect(parser.subdomains).to be_nil }
403
- it { expect(parser.domain_name).to be_nil }
404
- it { expect(parser.domain).to be_nil }
405
- it { expect(parser.tld).to be_nil }
406
- it { expect(parser.hostname).to be_nil }
407
- it { expect(parser.port).to be_nil }
408
- it { expect(parser.host).to be_nil }
409
- it { expect(parser.origin).to be_nil }
410
- it { expect(parser.authority).to be_nil }
411
- it { expect(parser.site).to be_nil }
412
- it { expect(parser.directory).to be_nil }
413
- it { expect(parser.path).to be_nil }
414
- it { expect(parser.segment).to be_nil }
415
- it { expect(parser.filename).to be_nil }
416
- it { expect(parser.suffix).to be_nil }
417
- it { expect(parser.query).to be_nil }
418
- it { expect(parser.query_values['name']).to be_nil }
419
- it { expect(parser.fragment).to be_nil }
420
- it { expect(parser.resource).to be_nil }
421
-
148
+ it "parses the URI with the :canonicalize option enabled" do
149
+ expect(UrlParser::URI).to receive(:new).with('#', hash_including(canonicalize: true))
150
+ described_class.canonicalize('#')
422
151
  end
423
152
 
424
153
  end
425
154
 
426
- context "localhost?" do
155
+ context ".normalize" do
427
156
 
428
- let(:link) { 'localhost:5000' }
429
-
430
- it "returns true for localhost" do
431
- expect(parser).to be_localhost
157
+ it "returns an instance of UrlParser::URI" do
158
+ expect(described_class.normalize('http://example.com')).to be_a UrlParser::URI
432
159
  end
433
160
 
434
- end
435
-
436
- context "#domain_name" do
437
-
438
- let(:link) { 'https://github.com/pauldix/domainatrix' }
439
-
440
- it "returns the domain name without the suffix" do
441
- expect(parser.domain_name).to eq 'github'
161
+ it "parses the URI with the :normalize option enabled" do
162
+ expect(UrlParser::URI).to receive(:new).with('#', hash_including(normalize: true))
163
+ described_class.normalize('#')
442
164
  end
443
165
 
444
166
  end
445
167
 
446
- context "#domain" do
447
-
448
- let(:link) { 'https://github.com/pauldix/domainatrix' }
168
+ context ".clean" do
449
169
 
450
- it "returns the domain name with suffix" do
451
- expect(parser.domain).to eq 'github.com'
170
+ it "returns an instance of UrlParser::URI" do
171
+ expect(described_class.clean('http://example.com')).to be_a UrlParser::URI
452
172
  end
453
173
 
454
- end
455
-
456
- context "#subdomain" do
457
-
458
- let(:link) { 'http://foo.bar.pauldix.co.uk/asdf.html?q=arg' }
459
-
460
- it "returns all subdomains" do
461
- expect(parser.subdomain).to eq 'foo.bar'
174
+ it "parses the URI with the :clean option enabled" do
175
+ expect(UrlParser::URI).to receive(:new).with('#', hash_including(clean: true))
176
+ described_class.clean('#')
462
177
  end
463
178
 
464
- it "returns nil if there is no subdomain" do
465
- url = UrlParser.new('https://github.com/')
466
- expect(url.subdomain).to be_nil
467
- end
179
+ end
468
180
 
469
- it "does not include www as part of the subdomain" do
470
- parser = UrlParser.new("http://www.energy.ca.gov/")
471
- expect(parser.subdomain).to eq 'energy'
472
- end
181
+ context ".wrap" do
473
182
 
474
- it "does not include any variation of www as part of the subdomain" do
475
- [ 'ww2', 'www2', 'ww23', 'www23' ].each do |www|
476
- parser = UrlParser.new("http://#{www}.energy.ca.gov/")
477
- expect(parser.subdomain).to eq 'energy'
478
- end
183
+ it "converts nil to an array" do
184
+ expect(described_class.wrap(nil)).to eq([])
479
185
  end
480
186
 
481
187
  end