url_parser 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,570 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe UrlParser::URI do
4
+
5
+ let(:ipv4) { described_class.new('http://192.168.1.1') }
6
+ let(:ipv6) { described_class.new('http://ff02::1') }
7
+ let(:localhost) { described_class.new('http://localhost:5000/some/path') }
8
+ let(:relative_uri) { described_class.new('/some/path/to.html?name=example') }
9
+
10
+ let(:instance) do
11
+ described_class.new('foo://username:password@ww2.foo.bar.example.com:123/hello/world/there.html?name=ferret#foo')
12
+ end
13
+
14
+ context ".new" do
15
+
16
+ it "does not accept the :raw option" do
17
+ instance = described_class.new('http://example.com', raw: true)
18
+ expect(instance.uri).to be_an Addressable::URI
19
+ end
20
+
21
+ it "requires a uri" do
22
+ expect{ described_class.new }.to raise_error ArgumentError
23
+ end
24
+
25
+ it "sets the input uri from the first argument" do
26
+ instance = described_class.new('http://example.com')
27
+ expect(instance.input).to eq 'http://example.com'
28
+ end
29
+
30
+ context "uri" do
31
+
32
+ it "requires an argument" do
33
+ expect{ described_class.new }.to raise_error ArgumentError
34
+ end
35
+
36
+ it "parses a string into an Addressable::URI" do
37
+ uri = 'http://example.com'
38
+ expect(described_class.new(uri).uri).to be_an Addressable::URI
39
+ end
40
+
41
+ it "parses a URI into an Addressable::URI" do
42
+ uri = URI('http://example.com')
43
+ expect(described_class.new(uri).uri).to be_an Addressable::URI
44
+ end
45
+
46
+ it "does not parse an object that is an existing Addressable::URI" do
47
+ uri = Addressable::URI.parse 'http://example.com'
48
+ expect(described_class.new(uri).uri).to eq uri
49
+ end
50
+
51
+ end
52
+
53
+ context "options" do
54
+
55
+ it "are not required" do
56
+ instance = described_class.new('http://example.com')
57
+ expect(instance.options).to be_empty
58
+ end
59
+
60
+ it "sets any included options" do
61
+ instance = described_class.new('http://example.com', some: 'option')
62
+ expect(instance.options).to eq({ some: 'option' })
63
+ end
64
+
65
+ context ":clean" do
66
+
67
+ it "when true cleans the url" do
68
+ instance = described_class.new('link.to?a=b&utm_source=FeedBurner#stuff', clean: true)
69
+ expect(instance.to_s).to eq 'http://link.to/?a=b'
70
+ end
71
+
72
+ it "when true it normalizes the url" do
73
+ [
74
+ 'http://example.com/',
75
+ 'http://example.com///',
76
+ 'http://example.com/../?#',
77
+ 'http://example.com/a/../?',
78
+ 'http://example.com/a/../?utm_source%3Danalytics'
79
+ ].each do |url|
80
+ expect(described_class.new(url, clean: true).to_s)
81
+ .to eq 'http://example.com/'
82
+ end
83
+ end
84
+
85
+ it "does not clean the url by default" do
86
+ expect(described_class.new('link.to/?a=b&utm_source=FeedBurner#stuff').to_s)
87
+ .to eq 'http://link.to/?a=b&utm_source=FeedBurner#stuff'
88
+ end
89
+
90
+ end
91
+
92
+ end
93
+
94
+ context "unescaped?" do
95
+
96
+ it "is false by default" do
97
+ instance = described_class.new('http://example.com/path?id%3D1')
98
+ expect(instance).not_to be_unescaped
99
+ expect(instance.to_s).to eq 'http://example.com/path?id%3D1'
100
+ end
101
+
102
+ it "returns true if the :unescape option is enabled" do
103
+ instance = described_class.new('http://example.com/path?id%3D1', unescape: true)
104
+ expect(instance).to be_unescaped
105
+ expect(instance.to_s).to eq 'http://example.com/path?id=1'
106
+ end
107
+
108
+ end
109
+
110
+ context "parsed?" do
111
+
112
+ it "is true by default" do
113
+ instance = described_class.new('http://example.com/')
114
+ expect(instance).to be_parsed
115
+ end
116
+
117
+ it "cannot be set to false" do
118
+ instance = described_class.new('http://example.com/', parse: false)
119
+ expect(instance).to be_parsed
120
+ end
121
+
122
+ end
123
+
124
+ context "unembedded?" do
125
+
126
+ it "is false by default" do
127
+ instance = described_class.new('http://energy.gov/exit?url=https%3A//twitter.com/energy')
128
+ expect(instance).not_to be_unembedded
129
+ expect(instance.uri.to_s).to eq 'http://energy.gov/exit?url=https%3A//twitter.com/energy'
130
+ end
131
+
132
+ it "returns true if the :unembed option is enabled" do
133
+ instance = described_class.new('http://energy.gov/exit?url=https%3A//twitter.com/energy', unembed: true)
134
+ expect(instance).to be_unembedded
135
+ expect(instance.uri.to_s).to eq 'https://twitter.com/energy'
136
+ end
137
+
138
+ end
139
+
140
+ context "canonicalized?" do
141
+
142
+ it "is false by default" do
143
+ instance = described_class.new('https://wikipedia.org/?source=ABCD&utm_source=EFGH')
144
+ expect(instance).not_to be_canonicalized
145
+ expect(instance.to_s).to eq 'https://wikipedia.org/?source=ABCD&utm_source=EFGH'
146
+ end
147
+
148
+ it "returns true if the :canonicalize option is enabled" do
149
+ instance = described_class.new('https://wikipedia.org/?source=ABCD&utm_source=EFGH', canonicalize: true)
150
+ expect(instance).to be_canonicalized
151
+ expect(instance.to_s).to eq 'https://wikipedia.org/?'
152
+ end
153
+
154
+ end
155
+
156
+ context "normalized?" do
157
+
158
+ it "is false by default" do
159
+ instance = described_class.new('http://example.com/#test')
160
+ expect(instance).not_to be_normalized
161
+ expect(instance.to_s).to eq 'http://example.com/#test'
162
+ end
163
+
164
+ it "returns true if the :canonicalize option is enabled" do
165
+ instance = described_class.new('http://example.com/#test', normalize: true)
166
+ expect(instance).to be_normalized
167
+ expect(instance.to_s).to eq 'http://example.com/'
168
+ end
169
+
170
+ end
171
+
172
+ context "cleaned?" do
173
+
174
+ it "is false by default" do
175
+ instance = described_class.new('http://example.com/?utm_source=google')
176
+ expect(instance).not_to be_cleaned
177
+ expect(instance.uri.to_s).to eq 'http://example.com/?utm_source=google'
178
+ end
179
+
180
+ it "returns true if the :clean option is enabled" do
181
+ instance = described_class.new('http://example.com/?utm_source=google', clean: true)
182
+ expect(instance).to be_cleaned
183
+ expect(instance.uri.to_s).to eq 'http://example.com/'
184
+ end
185
+
186
+ end
187
+
188
+ {
189
+ scheme: 'foo',
190
+ username: 'username',
191
+ user: 'username',
192
+ password: 'password',
193
+ userinfo: 'username:password',
194
+ hostname: 'ww2.foo.bar.example.com',
195
+ naked_hostname: 'foo.bar.example.com',
196
+ port: 123,
197
+ host: 'ww2.foo.bar.example.com:123',
198
+ www: 'ww2',
199
+ tld: 'com',
200
+ top_level_domain: 'com',
201
+ extension: 'com',
202
+ sld: 'example',
203
+ second_level_domain: 'example',
204
+ domain_name: 'example',
205
+ trd: 'ww2.foo.bar',
206
+ third_level_domain: 'ww2.foo.bar',
207
+ subdomains: 'ww2.foo.bar',
208
+ naked_trd: 'foo.bar',
209
+ naked_subdomain: 'foo.bar',
210
+ domain: 'example.com',
211
+ subdomain: 'ww2.foo.bar.example.com',
212
+ origin: 'foo://ww2.foo.bar.example.com:123',
213
+ authority: 'username:password@ww2.foo.bar.example.com:123',
214
+ site: 'foo://username:password@ww2.foo.bar.example.com:123',
215
+ path: '/hello/world/there.html',
216
+ segment: 'there.html',
217
+ directory: '/hello/world',
218
+ filename: 'there.html',
219
+ suffix: 'html',
220
+ query: 'name=ferret',
221
+ query_values: { 'name' => 'ferret' },
222
+ fragment: 'foo',
223
+ resource: 'there.html?name=ferret#foo',
224
+ location: '/hello/world/there.html?name=ferret#foo'
225
+ }.each do |method, expected_value|
226
+
227
+ it "delegates ##{method} to the model instance" do
228
+ expect(instance.send(method)).to eq UrlParser::Model.new(instance.uri).send(method)
229
+ expect(instance.send(method)).to eq expected_value # Sanity check
230
+ end
231
+
232
+ end
233
+
234
+ it "delegates #labels to the model instance's parsed_domain" do
235
+ expect(instance.labels).to eq [ "com", "example", "bar", "foo", "ww2" ]
236
+ end
237
+
238
+ end
239
+
240
+ context "clean" do
241
+
242
+ it "returns the raw URI if the URI was cleaned on initialization" do
243
+ instance = described_class.new('http://example.com/?utm_source=google', clean: true)
244
+ expect(instance.clean).to eq 'http://example.com/'
245
+ end
246
+
247
+ it "reparses the original URI if it was not cleaned" do
248
+ instance = described_class.new('http://example.com/?utm_source=google')
249
+ expect(instance.clean).to eq 'http://example.com/'
250
+ end
251
+
252
+ end
253
+
254
+ context "#clean?" do
255
+
256
+ it "returns true if the URI was cleaned on initialization" do
257
+ instance = described_class.new('http://example.com/?utm_source=google', clean: true)
258
+ expect(instance).to be_clean
259
+ end
260
+
261
+ it "returns true if the URI was already 'clean'" do
262
+ instance = described_class.new('http://example.com/')
263
+ expect(instance).to be_clean
264
+ end
265
+
266
+ it "returns false if the URI is not clean" do
267
+ instance = described_class.new('http://example.com/?utm_source=google')
268
+ expect(instance).not_to be_clean
269
+ end
270
+
271
+ end
272
+
273
+ # Thanks to http://stackoverflow.com/a/4864170
274
+ #
275
+ context "#+" do
276
+
277
+ let(:link) { 'http://foo.com/zee/zaw/zoom.html' }
278
+
279
+ it "properly combines a url and and relative url" do
280
+ {
281
+ 'http://zork.com/' => 'http://zork.com/',
282
+ 'http://zork.com/#id' => 'http://zork.com/#id',
283
+ 'http://zork.com/bar' => 'http://zork.com/bar',
284
+ 'http://zork.com/bar#id' => 'http://zork.com/bar#id',
285
+ 'http://zork.com/bar/' => 'http://zork.com/bar/',
286
+ 'http://zork.com/bar/#id' => 'http://zork.com/bar/#id',
287
+ 'http://zork.com/bar/jim.html' => 'http://zork.com/bar/jim.html',
288
+ 'http://zork.com/bar/jim.html#id' => 'http://zork.com/bar/jim.html#id',
289
+ '/bar' => 'http://foo.com/bar',
290
+ '/bar#id' => 'http://foo.com/bar#id',
291
+ '/bar/' => 'http://foo.com/bar/',
292
+ '/bar/#id' => 'http://foo.com/bar/#id',
293
+ '/bar/jim.html' => 'http://foo.com/bar/jim.html',
294
+ '/bar/jim.html#id' => 'http://foo.com/bar/jim.html#id',
295
+ 'jim.html' => 'http://foo.com/zee/zaw/jim.html',
296
+ 'jim.html#id' => 'http://foo.com/zee/zaw/jim.html#id',
297
+ '../jim.html' => 'http://foo.com/zee/jim.html',
298
+ '../jim.html#id' => 'http://foo.com/zee/jim.html#id',
299
+ '../' => 'http://foo.com/zee/',
300
+ '../#id' => 'http://foo.com/zee/#id',
301
+ '#id' => 'http://foo.com/zee/zaw/zoom.html#id'
302
+ }.each do |relative_url, expected_result|
303
+ instance = described_class.new(link)
304
+ expect((instance + relative_url).to_s).to eq expected_result
305
+ end
306
+ end
307
+
308
+ it "returns an instance of UrlParser::URI" do
309
+ instance = described_class.new(link)
310
+ expect(instance + '#').to be_a described_class
311
+ end
312
+
313
+ it "is aliased to #join" do
314
+ instance = described_class.new(link)
315
+ expect(instance.method(:join)).to eq instance.method(:+)
316
+ end
317
+
318
+ end
319
+
320
+ context "#raw" do
321
+
322
+ it "is alised to #to_s" do
323
+ instance = described_class.new('http://example.com/')
324
+ expect(instance.method(:to_s)).to eq instance.method(:raw)
325
+ end
326
+
327
+ it "returns a string of the URI" do
328
+ instance = described_class.new('http://example.com/')
329
+ expect(instance.raw).to be_a String
330
+ end
331
+
332
+ end
333
+
334
+ context "#sha1" do
335
+
336
+ let(:instance) { described_class.new('http://example.com/') }
337
+
338
+ it "is aliased to #hash" do
339
+ expect(instance.method(:sha1)).to eq instance.method(:hash)
340
+ end
341
+
342
+ it "returns a SHA1 hash representation of the raw uri" do
343
+ expect(instance.sha1).to eq "9c17e047f58f9220a7008d4f18152fee4d111d14"
344
+ end
345
+
346
+ end
347
+
348
+ context "#canonical" do
349
+
350
+ it "cleans the uri" do
351
+ instance = described_class.new('http://example.com/?utm_source%3Danalytics')
352
+ expect(instance.canonical).to eq '//example.com/'
353
+ end
354
+
355
+ it "strips the scheme" do
356
+ instance = described_class.new('https://example.com/')
357
+ expect(instance.canonical).to eq '//example.com/'
358
+ end
359
+
360
+ it "normalizes the uri" do
361
+ instance = described_class.new('http://example.com/../')
362
+ expect(instance.canonical).to eq '//example.com/'
363
+ end
364
+
365
+ it "converts it into a naked domain" do
366
+ instance = described_class.new('http://www.example.com/')
367
+ expect(instance.canonical).to eq '//example.com/'
368
+ end
369
+
370
+ it "preserves the scheme" do
371
+ instance = described_class.new('https://www.example.com/')
372
+ expect(instance.canonical).to eq '//example.com/'
373
+ end
374
+
375
+ end
376
+
377
+ context "#relative?" do
378
+
379
+ it "returns true for relative URIs" do
380
+ expect(relative_uri).to be_relative
381
+ end
382
+
383
+ it "returns false for absolute URIs" do
384
+ expect(instance).not_to be_relative
385
+ end
386
+
387
+ end
388
+
389
+ context "#absolute?" do
390
+
391
+ it "returns true for absolute URIs" do
392
+ expect(instance).to be_absolute
393
+ end
394
+
395
+ it "returns false for relative URIs" do
396
+ expect(relative_uri).not_to be_absolute
397
+ end
398
+
399
+ end
400
+
401
+ context "#naked?" do
402
+
403
+ it "is always false for localhost addresses" do
404
+ expect(localhost).not_to be_naked
405
+ end
406
+
407
+ it "is false for uris with a ww? third level domain" do
408
+ instance = described_class.new('http://www.example.com')
409
+ expect(instance).not_to be_naked
410
+ end
411
+
412
+ it "is true for uris without a ww? third level domain" do
413
+ instance = described_class.new('http://example.com')
414
+ expect(instance).to be_naked
415
+ end
416
+
417
+ end
418
+
419
+ context "#localhost?" do
420
+
421
+ it "returns true for localhost addresses" do
422
+ expect(localhost).to be_localhost
423
+ end
424
+
425
+ it "returns false for non-localhost addresses" do
426
+ expect(instance).not_to be_localhost
427
+ end
428
+
429
+ end
430
+
431
+ context "#ipv4" do
432
+
433
+ it "returns the value of the ipv4 address if present" do
434
+ expect(ipv4.ipv4).to eq '192.168.1.1'
435
+ end
436
+
437
+ it "returns nil if an ipv4 address is not present" do
438
+ expect(localhost.ipv4).to be_nil
439
+ end
440
+
441
+ end
442
+
443
+ context "#ipv4?" do
444
+
445
+ it "returns true for ipv4 addresses" do
446
+ expect(ipv4).to be_ipv4
447
+ end
448
+
449
+ it "returns false for non-ipv4 addresses" do
450
+ expect(ipv6).not_to be_ipv4
451
+ end
452
+
453
+ end
454
+
455
+ context "#ipv6" do
456
+
457
+ it "returns the value of the ipv6 address if present" do
458
+ expect(ipv6.ipv6).to eq 'ff02::1'
459
+ end
460
+
461
+ it "returns nil if an ipv6 address is not present" do
462
+ expect(localhost.ipv6).to be_nil
463
+ end
464
+
465
+ end
466
+
467
+ context "#ipv6?" do
468
+
469
+ it "returns true for ipv6 addresses" do
470
+ expect(ipv6).to be_ipv6
471
+ end
472
+
473
+ it "returns false for non-ipv6 addresses" do
474
+ expect(ipv4).not_to be_ipv6
475
+ end
476
+
477
+ end
478
+
479
+ context "#ip_address?" do
480
+
481
+ it "returns true for ipv4 addresses" do
482
+ expect(ipv4).to be_ip_address
483
+ end
484
+
485
+ it "returns true for ipv6 addresses" do
486
+ expect(ipv6).to be_ip_address
487
+ end
488
+
489
+ it "returns false for URIs that are not ip addresses" do
490
+ expect(instance).not_to be_ip_address
491
+ end
492
+
493
+ end
494
+
495
+ context "#==" do
496
+
497
+ it "is true if two URIs have the same SHA1" do
498
+ expect(
499
+ described_class.new('http://example.com/') == 'http://example.com'
500
+ ).to be true
501
+ end
502
+
503
+ it "is false if two URIs do not have the same SHA1" do
504
+ expect(
505
+ described_class.new('http://example.com/') == 'http://example.org'
506
+ ).to be false
507
+ end
508
+
509
+ it "cleans both URIs before comparing" do
510
+ expect(
511
+ described_class.new('http://example.com/?utm_source=google') ==
512
+ 'http://example.com/?utm_source=yahoo'
513
+ ).to be true
514
+ end
515
+
516
+ it "compares two URIs with the :raw option enabled" do
517
+ expect(
518
+ described_class.new('http://example.com/?utm_source=google', raw: true) ==
519
+ 'http://example.com/?utm_source=yahoo'
520
+ ).to be true
521
+ end
522
+
523
+ it "does not ignore scheme" do
524
+ expect(
525
+ described_class.new('http://example.com/') == 'https://example.com'
526
+ ).to be false
527
+ end
528
+
529
+ end
530
+
531
+ context "#=~" do
532
+
533
+ it "ignores scheme with the :ignore_scheme option" do
534
+ expect(
535
+ described_class.new('http://example.com/') =~ 'https://example.com'
536
+ ).to be true
537
+ end
538
+
539
+ end
540
+
541
+ context "#valid?" do
542
+
543
+ context "by default" do
544
+
545
+ it "is true for absolute URIs" do
546
+ expect(instance).to be_valid
547
+ end
548
+
549
+ it "is false for relative URIs" do
550
+ expect(relative_uri).not_to be_valid
551
+ end
552
+
553
+ it "is true for IPv4 addresses" do
554
+ expect(ipv4).to be_valid
555
+ end
556
+
557
+ it "is true for IPv6 addresses" do
558
+ expect(ipv6).to be_valid
559
+ end
560
+
561
+ it "is false with a domain not on the public suffix list" do
562
+ instance = described_class.new('http://example.qqq')
563
+ expect(instance).not_to be_valid
564
+ end
565
+
566
+ end
567
+
568
+ end
569
+
570
+ end