url_parser 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,515 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe UrlParser::Parser do
4
+
5
+ let(:url) { 'http://example.com/path' }
6
+
7
+ context ".new" do
8
+
9
+ it "sets #uri" do
10
+ expect(described_class.new('#').uri).to eq '#'
11
+ end
12
+
13
+ it "sets options" do
14
+ opts = { host: 'localhost' }
15
+ expect(described_class.new('#', opts).options).to eq opts
16
+ end
17
+
18
+ context "by default" do
19
+
20
+ it "uses the library configured embedded_params" do
21
+ expect(described_class.new('#').embedded_params)
22
+ .to eq UrlParser.configuration.embedded_params
23
+ end
24
+
25
+ it "does not return the raw uri" do
26
+ expect(described_class.new('#')).not_to be_raw
27
+ end
28
+
29
+ end
30
+
31
+ context "options" do
32
+
33
+ it "accepts a :base_uri option" do
34
+ expect(described_class.new('#', base_uri: 'http://example.com').base_uri)
35
+ .to eq 'http://example.com'
36
+ end
37
+
38
+ it "accepts a :raw option" do
39
+ expect(described_class.new('#', raw: true)).to be_raw
40
+ end
41
+
42
+ it "accepts an :embedded_params option" do
43
+ expect(described_class.new('#', embedded_params: 'ref').embedded_params)
44
+ .to eq [ 'ref' ]
45
+ end
46
+
47
+ end
48
+
49
+ end
50
+
51
+ context ".call" do
52
+
53
+ it "is aliased to .parse" do
54
+ expect(described_class.method(:call)).to eq described_class.method(:parse)
55
+ end
56
+
57
+ it "returns an Addressable::URI" do
58
+ expect(described_class.call('#id')).to be_an Addressable::URI
59
+ end
60
+
61
+ it "returns nil if the uri argument is nil" do
62
+ expect(described_class.call(nil)).to be_nil
63
+ end
64
+
65
+ it "uses the default scheme if only a host is present" do
66
+ expect(described_class.call('//example.com', default_scheme: 'https').scheme)
67
+ .to eq 'https'
68
+ end
69
+
70
+ it "does not fail with host labels that exceed size limitations" do
71
+ expect(described_class.call('a'*64+'.ca').host).to eq nil
72
+ end
73
+
74
+ %w(javascript mailto xmpp).each do |scheme|
75
+
76
+ context "with host-less schemes" do
77
+
78
+ let(:instance) { described_class.call("#{scheme}:void(0);") }
79
+
80
+ it "sets the scheme for #{scheme} links" do
81
+ expect(instance.scheme).to eq "#{scheme}"
82
+ end
83
+
84
+ it "sets the path for #{scheme} links" do
85
+ expect(instance.path).to eq 'void(0);'
86
+ end
87
+
88
+ end
89
+
90
+ end
91
+
92
+ it "accepts a custom host" do
93
+ expect(described_class.call('/path', host: 'localhost').to_s).to eq 'http://localhost/path'
94
+ end
95
+
96
+ context "with a block" do
97
+
98
+ it "can call parser methods to modify the uri" do
99
+ blk = ->(uri){ uri.unembed! }
100
+ uri = described_class.call('http://energy.gov/exit?url=https%3A//twitter.com/energy', &blk)
101
+ expect(uri).to eq described_class.call('https://twitter.com/energy')
102
+ end
103
+
104
+ it "accepts the :raw option" do
105
+ expect(described_class.call('https://twitter.com/energy', raw: true))
106
+ .to eq 'https://twitter.com/energy'
107
+ end
108
+
109
+ end
110
+
111
+ end
112
+
113
+ context "#parse" do
114
+
115
+ let(:instance) { described_class.new(url) }
116
+
117
+ it "returns a parsed Addressable::URI" do
118
+ expect(instance.parse).to be_an Addressable::URI
119
+ end
120
+
121
+ it "joins URIs with a :base_uri option" do
122
+ instance = described_class.new('/bar#id', base_uri: 'http://foo.com/zee/zaw/zoom.html')
123
+ expect(instance.parse).to eq described_class.call('http://foo.com/bar#id')
124
+ end
125
+
126
+ it "does not changes the value of #uri" do
127
+ expect{
128
+ instance.parse
129
+ }.not_to change{
130
+ instance.uri
131
+ }
132
+ end
133
+
134
+ end
135
+
136
+ context "#parse!" do
137
+
138
+ let(:instance) { described_class.new(url) }
139
+
140
+ it "updates #uri with the the parsed Addressable::URI" do
141
+ expect{
142
+ instance.parse!
143
+ }.to change{
144
+ instance.uri
145
+ }
146
+ end
147
+
148
+ it "is idempotent" do
149
+ instance.parse!
150
+ expect{
151
+ instance.parse!
152
+ }.not_to change{
153
+ instance.uri
154
+ }
155
+ end
156
+
157
+ end
158
+
159
+ context "#unescape" do
160
+
161
+ let(:instance) { described_class.new('http://example.com/path?id%3D1') }
162
+
163
+ it "returns an unescaped string" do
164
+ expect(instance.unescape).to eq 'http://example.com/path?id=1'
165
+ end
166
+
167
+ it "does not changes the value of #uri" do
168
+ expect{
169
+ instance.unescape
170
+ }.not_to change{
171
+ instance.uri
172
+ }
173
+ end
174
+
175
+ end
176
+
177
+ context "#unescape!" do
178
+
179
+ let(:instance) { described_class.new('http://example.com/path?id%3D1') }
180
+
181
+ it "updates #uri with the the unescaped string" do
182
+ expect{
183
+ instance.unescape!
184
+ }.to change{
185
+ instance.uri
186
+ }
187
+ end
188
+
189
+ it "is idempotent" do
190
+ instance.unescape!
191
+ expect{
192
+ instance.unescape!
193
+ }.not_to change{
194
+ instance.uri
195
+ }
196
+ end
197
+
198
+ end
199
+
200
+ context "#unembed" do
201
+
202
+ it "extracts an embedded url from a 'u' param" do
203
+ url = 'http://www.myspace.com/Modules/PostTo/Pages/?u=http%3A%2F%2Fexample.com%2Fnews'
204
+ instance = described_class.new(url)
205
+ expect(instance.unembed).to eq described_class.call('http://example.com/news')
206
+ end
207
+
208
+ it "extracts an embedded url from a 'url' param" do
209
+ url = 'http://energy.gov/exit?url=https%3A//twitter.com/energy'
210
+ instance = described_class.new(url)
211
+ expect(instance.unembed).to eq described_class.call('https://twitter.com/energy')
212
+ end
213
+
214
+ it "accepts a custom embedded param key" do
215
+ url = 'https://www.upwork.com/leaving?ref=https%3A%2F%2Fwww.solaraccreditation.com.au' +
216
+ '%2Fconsumers%2Ffind-an-installer.html'
217
+ instance = described_class.new(url, embedded_params: 'ref')
218
+ expect(instance.unembed)
219
+ .to eq described_class.call('https://www.solaraccreditation.com.au/consumers/find-an-installer.html')
220
+ end
221
+
222
+ it "accepts custom embedded param keys" do
223
+ url = 'https://www.upwork.com/leaving?ref=https%3A%2F%2Fwww.solaraccreditation.com.au' +
224
+ '%2Fconsumers%2Ffind-an-installer.html'
225
+ instance = described_class.new(url, embedded_params: [ 'u', 'url', 'ref'])
226
+ expect(instance.unembed)
227
+ .to eq described_class.call('https://www.solaraccreditation.com.au/consumers/find-an-installer.html')
228
+ end
229
+
230
+ end
231
+
232
+ context "#unembed!" do
233
+
234
+ let(:instance) { described_class.new('http://energy.gov/exit?url=https%3A//twitter.com/energy') }
235
+
236
+ it "updates #uri with the the unescaped string" do
237
+ expect{
238
+ instance.unembed!
239
+ }.to change{
240
+ instance.uri
241
+ }
242
+ end
243
+
244
+ it "is idempotent" do
245
+ instance.unembed!
246
+ expect{
247
+ instance.unembed!
248
+ }.not_to change{
249
+ instance.uri
250
+ }
251
+ end
252
+
253
+ end
254
+
255
+ context "#normalize" do
256
+
257
+ let(:example) { described_class.call('http://example.com/') }
258
+
259
+ def n(uri)
260
+ described_class.normalize(uri).to_s
261
+ end
262
+
263
+ it "normalizes paths" do
264
+ expect(described_class.new('http://example.com/').normalize).to eq example
265
+ expect(described_class.new('http://example.com').normalize).to eq example
266
+ expect(described_class.new('http://example.com///').normalize).to eq example
267
+ expect(described_class.new('http://example.com/../').normalize).to eq example
268
+ expect(described_class.new('http://example.com/a/b/../../').normalize).to eq example
269
+ expect(described_class.new('http://example.com/a/b/../..').normalize).to eq example
270
+ end
271
+
272
+ it "normalizes query strings" do
273
+ expect(described_class.new('http://example.com/?').normalize).to eq example
274
+ expect(described_class.new('http://example.com?').normalize).to eq example
275
+ expect(described_class.new('http://example.com/a/../?').normalize).to eq example
276
+ end
277
+
278
+ it "normalizes anchors" do
279
+ expect(described_class.new('http://example.com#test').normalize).to eq example
280
+ expect(described_class.new('http://example.com#test#test').normalize).to eq example
281
+ expect(described_class.new('http://example.com/a/../?#test').normalize).to eq example
282
+ end
283
+
284
+ it "cleans whitespace" do
285
+ expect(described_class.new('http://example.com/a/../? ').normalize).to eq example
286
+ expect(described_class.new('http://example.com/a/../? #test').normalize).to eq example
287
+ expect(described_class.new('http://example.com/ /../').normalize).to eq example
288
+ end
289
+
290
+ it "normalizes the hostname" do
291
+ expect(described_class.new('EXAMPLE.COM').normalize).to eq example
292
+ expect(described_class.new('EXAMPLE.COM/ABC').normalize).to eq (example + 'ABC')
293
+ expect(described_class.new("💩.la").normalize).to eq described_class.call("xn--ls8h.la")
294
+ end
295
+
296
+ it "defaults to http scheme if missing" do
297
+ expect(described_class.new('example.com').normalize).to eq example
298
+ expect(described_class.new('https://example.com/').normalize)
299
+ .to eq described_class.call('https://example.com/')
300
+ end
301
+
302
+ it "removes trailing slashes on paths" do
303
+ expect(described_class.new('http://example.com/').normalize).to eq example
304
+ expect(described_class.new('http://example.com/a').normalize).to eq (example + 'a')
305
+ expect(described_class.new('http://example.com/a/').normalize).to eq (example + 'a')
306
+ expect(described_class.new('http://example.com/a/b').normalize).to eq (example + 'a/b')
307
+ expect(described_class.new('http://example.com/a/b/').normalize).to eq (example + 'a/b')
308
+ end
309
+
310
+ end
311
+
312
+ context "#normalize!" do
313
+
314
+ let(:instance) { described_class.new('http://example.com///') }
315
+
316
+ it "updates #uri with the the normalized string" do
317
+ expect{
318
+ instance.normalize!
319
+ }.to change{
320
+ instance.uri
321
+ }
322
+ end
323
+
324
+ it "is idempotent" do
325
+ instance.normalize!
326
+ expect{
327
+ instance.normalize!
328
+ }.not_to change{
329
+ instance.uri
330
+ }
331
+ end
332
+
333
+ end
334
+
335
+ context "#canonicalize" do
336
+
337
+ let(:instance) { described_class.new('https://wikipedia.org/?source=ABCD&utm_source=EFGH') }
338
+
339
+ it "is alised to #c14n" do
340
+ expect(instance.method(:canonicalize)).to eq instance.method(:c14n)
341
+ end
342
+
343
+ it "returns a canonicalized Addressable::URI" do
344
+ expect(instance.canonicalize).to eq Addressable::URI.parse('https://wikipedia.org/')
345
+ end
346
+
347
+ it "does not changes the value of #uri" do
348
+ expect{
349
+ instance.canonicalize
350
+ }.not_to change{
351
+ instance.uri
352
+ }
353
+ end
354
+
355
+ end
356
+
357
+ context "#canonicalize!" do
358
+
359
+ let(:instance) { described_class.new('https://wikipedia.org/?source=ABCD&utm_source=EFGH') }
360
+
361
+ it "is alised to #c14n!" do
362
+ expect(instance.method(:canonicalize!)).to eq instance.method(:c14n!)
363
+ end
364
+
365
+ it "updates #uri with the the unescaped string" do
366
+ expect{
367
+ instance.canonicalize!
368
+ }.to change{
369
+ instance.uri
370
+ }
371
+ end
372
+
373
+ it "is idempotent" do
374
+ instance.canonicalize!
375
+ expect{
376
+ instance.canonicalize!
377
+ }.not_to change{
378
+ instance.uri
379
+ }
380
+ end
381
+
382
+ end
383
+
384
+ context "#raw" do
385
+
386
+ let(:instance) { described_class.new('https://example.com') }
387
+
388
+ it "returns a string" do
389
+ instance.parse!
390
+ expect(instance.raw).to eq 'https://example.com/'
391
+ end
392
+
393
+ it "does not changes the value of #uri" do
394
+ expect{
395
+ instance.raw
396
+ }.not_to change{
397
+ instance.uri
398
+ }
399
+ end
400
+
401
+ end
402
+
403
+ context "#raw!" do
404
+
405
+ let(:instance) { described_class.new('https://example.com') }
406
+
407
+ before do
408
+ instance.parse!
409
+ end
410
+
411
+ it "updates #uri with the the raw string" do
412
+ expect{
413
+ instance.raw!
414
+ }.to change{
415
+ instance.uri
416
+ }
417
+ end
418
+
419
+ it "is idempotent" do
420
+ instance.raw!
421
+ expect{
422
+ instance.raw!
423
+ }.not_to change{
424
+ instance.uri
425
+ }
426
+ end
427
+
428
+ end
429
+
430
+ context "#clean!" do
431
+
432
+ let(:instance) { described_class.new('#') }
433
+
434
+ it "unescapes the URI" do
435
+ expect(instance).to receive :unescape!
436
+ instance.clean!
437
+ end
438
+
439
+ it "parses the URI" do
440
+ expect(instance).to receive :parse!
441
+ instance.clean!
442
+ end
443
+
444
+ it "unembeds the URI" do
445
+ expect(instance).to receive :unembed!
446
+ instance.clean!
447
+ end
448
+
449
+ it "canonicalizes the URI" do
450
+ expect(instance).to receive :canonicalize!
451
+ instance.clean!
452
+ end
453
+
454
+ it "normalizes the URI" do
455
+ expect(instance).to receive :normalize!
456
+ instance.clean!
457
+ end
458
+
459
+ it "does not convert the URI to a string by default" do
460
+ expect(instance).not_to receive :raw!
461
+ instance.clean!
462
+ end
463
+
464
+ it "returns a string with the :raw option enabled" do
465
+ instance = described_class.new('#', raw: true)
466
+ expect(instance).to receive :raw!
467
+ instance.clean!
468
+ end
469
+
470
+ end
471
+
472
+ context "#sha1" do
473
+
474
+ let(:instance) { described_class.new('http://example.com') }
475
+
476
+ it "is aliased to #hash" do
477
+ expect(instance.method(:sha1)).to eq instance.method(:hash)
478
+ end
479
+
480
+ it "returns a SHA1 hash representation of the raw uri" do
481
+ expect(instance.sha1).to eq "89dce6a446a69d6b9bdc01ac75251e4c322bcdff"
482
+ end
483
+
484
+ end
485
+
486
+ context "#==" do
487
+
488
+ it "is true if two URIs have the same SHA1" do
489
+ expect(
490
+ described_class.new('http://example.com/') == 'http://example.com'
491
+ ).to be true
492
+ end
493
+
494
+ it "is false if two URIs do not have the same SHA1" do
495
+ expect(
496
+ described_class.new('http://example.com/') == 'http://example.org'
497
+ ).to be false
498
+ end
499
+
500
+ it "cleans both URIs before comparing" do
501
+ expect(
502
+ described_class.new('http://example.com/?utm_source=google') ==
503
+ 'http://example.com/?utm_source=yahoo'
504
+ ).to be true
505
+ end
506
+
507
+ it "compares two URIs with the :raw option enabled" do
508
+ expect(
509
+ described_class.new('http://example.com/?utm_source=google', raw: true) ==
510
+ 'http://example.com/?utm_source=yahoo'
511
+ ).to be true
512
+ end
513
+
514
+ end
515
+ end