url_parser 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,481 +1,187 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe UrlParser do
4
-
5
- let(:parser) { UrlParser.new(link, clean: true) }
3
+ RSpec.describe UrlParser do
6
4
 
7
5
  it "must be defined" do
8
6
  expect(UrlParser::VERSION).not_to be_nil
9
7
  end
10
8
 
11
- context "::SCHEMES" do
12
-
13
- it { expect( UrlParser::SCHEMES).to be_an Array }
14
-
15
- end
16
-
17
- context "::DEFAULT_SCHEMES" do
18
-
19
- it { expect( UrlParser::DEFAULT_SCHEMES).to be_an Array }
20
-
21
- end
22
-
23
- context "::call" do
24
-
25
- let(:link) { 'http://example.com/' }
26
- let(:text) { "there is a #{link} in here" }
27
- let(:extractor) { UrlParser.call(text, clean: true) }
28
-
29
- it "extracts urls from text into an array" do
30
- expect(extractor.collect(&:url).collect(&:to_s))
31
- .to include link
32
- end
33
-
34
- it "initializes each url with the parser" do
35
- expect(extractor.first).to be_a UrlParser::Base
36
- end
37
-
38
- end
39
-
40
- context "::new" do
41
-
42
- let(:link) { 'http://example.com/path' }
9
+ context "configuration" do
43
10
 
44
- it "initializes a parser with a url" do
45
- expect(parser.to_s).to eq link
46
- end
47
-
48
- it "adds http by default" do
49
- expect(UrlParser.new('example.com/path').to_s).to eq link
50
- end
51
-
52
- it "adds http to protocol-less urls" do
53
- expect(UrlParser.new('//example.com/path').to_s).to eq link
54
- end
55
-
56
- it "cannot initialize invalid urls" do
57
- expect(UrlParser.new('http:||bra.ziz').url).to be_nil
58
- end
59
-
60
- it "catches errors from invalid urls" do
61
- expect(UrlParser.new('http:||bra.ziz').errors).not_to be_empty
62
- end
63
-
64
- context "options" do
65
-
66
- context ":clean" do
67
-
68
- let(:link) { 'link.to?a=b&utm_source=FeedBurner#stuff' }
69
-
70
- it "when true cleans the url" do
71
- expect(parser.to_s).not_to eq parser.original_url
72
- end
73
-
74
- it "when true it normalizes the url" do
75
- [
76
- 'http://igvita.com/',
77
- 'http://igvita.com///',
78
- 'http://igvita.com/../?#',
79
- 'http://igvita.com/a/../?',
80
- 'http://igvita.com/a/../?utm_source%3Danalytics'
81
- ].each do |url|
82
- expect(UrlParser.new(url, clean: true).to_s)
83
- .to eq 'http://igvita.com/'
84
- end
85
- end
86
-
87
- it "does not clean the url by default" do
88
- expect(UrlParser.new(link).to_s)
89
- .to eq PostRank::URI.parse(parser.original_url).to_s
90
- end
11
+ context ":embedded_params" do
91
12
 
13
+ it "sets the unembed param keys" do
14
+ described_class.configuration.embedded_params = [ 'ref' ]
15
+ uri = UrlParser.unembed('https://www.upwork.com/leaving?ref=https%3A%2F%2Fwww.example.com')
16
+ expect(uri.to_s).to eq 'https://www.example.com/'
17
+ described_class.configuration.reset
92
18
  end
93
19
 
94
- context ":raise_errors" do
95
-
96
- it "raises instead of catching errors" do
97
- expect{
98
- UrlParser.new('http:||bra.ziz', raise_errors: true)
99
- }.to raise_error
100
- end
20
+ end
101
21
 
102
- it "any errors raised inherit from UrlParser::Error" do
103
- expect{
104
- UrlParser.new('http:||bra.ziz', raise_errors: true)
105
- }.to raise_error UrlParser::Error
106
- end
22
+ context ":default_scheme" do
107
23
 
24
+ it "sets a default scheme if one is not present" do
25
+ described_class.configuration.default_scheme = 'https'
26
+ uri = UrlParser.parse('example.com')
27
+ expect(uri.to_s).to eq 'https://example.com/'
28
+ described_class.configuration.reset
108
29
  end
109
30
 
110
31
  end
111
32
 
112
- end
33
+ context ":scheme_map" do
113
34
 
114
- context "#original_url" do
115
-
116
- let(:link) { 'link.to?a=b&utm_source=FeedBurner#stuff' }
35
+ it "replaces scheme keys in the map with the corresponding value" do
36
+ described_class.configuration.scheme_map = { 'feed' => 'http' }
37
+ uri = UrlParser.parse('feed://feeds.feedburner.com/YourBlog')
38
+ expect(uri.to_s).to eq 'http://feeds.feedburner.com/YourBlog'
39
+ described_class.configuration.reset
40
+ end
117
41
 
118
- it "preserves the url input" do
119
- expect(parser.original_url).to eq link
120
42
  end
121
43
 
122
44
  end
123
45
 
124
- context "#url" do
125
-
126
- let(:link) { 'link.to?a=b&utm_source=FeedBurner#stuff' }
46
+ context ".tag_errors" do
127
47
 
128
- it "returns a url" do
129
- expect(parser.url).to be_a Addressable::URI
48
+ it "tags StandardError exceptions" do
49
+ expect{
50
+ described_class.tag_errors{ raise StandardError }
51
+ }.to raise_error UrlParser::Error
130
52
  end
131
53
 
132
- end
133
-
134
- context "#schemes" do
135
-
136
- it "returns an array of allowed schemes" do
137
- parser = UrlParser.new('telnet://some.com', schemes: 'telnet')
138
- expect(parser.schemes).to be_an Array
54
+ it "does not tag errors that do not inherit from StandardError", :disable_raise_error_warning do
55
+ expect{
56
+ described_class.tag_errors{ raise Exception }
57
+ }.not_to raise_error UrlParser::Error
139
58
  end
140
59
 
141
60
  end
142
61
 
143
- context "#parse" do
62
+ context ".new" do
144
63
 
145
- let(:link) { 'link.to?a=b&utm_source=FeedBurner#stuff' }
146
-
147
- it "calls postrank-uri's parse function" do
148
- expect(PostRank::URI).to receive :parse
149
- UrlParser.new(link, clean: false)
64
+ it "is deprecated" do
65
+ expect(described_class).to receive(:warn)
66
+ described_class.new('http://example.com')
150
67
  end
151
68
 
152
- it "tags errors when set to raise errors" do
153
- parser = UrlParser.new(link, clean: true, raise_errors: true)
154
- expect(PostRank::URI).to receive(:parse).and_raise(StandardError)
155
- expect{ parser.send(:parse, link) }.to raise_error UrlParser::Error
69
+ it "calls .parse" do
70
+ expect(described_class).to receive(:warn)
71
+ expect(described_class).to receive(:parse)
72
+ described_class.new('http://example.com')
156
73
  end
157
74
 
158
75
  end
159
76
 
160
- context "#clean" do
161
-
162
- let(:link) { 'link.to?a=b&utm_source=FeedBurner#stuff' }
77
+ context ".escape" do
163
78
 
164
- it "calls postrank-uri's clean function" do
165
- expect(PostRank::URI).to receive :clean
166
- UrlParser.new(link, clean: true)
79
+ it "encodes a string" do
80
+ expect(described_class.escape('id=1')).to eq 'id%3D1'
167
81
  end
168
82
 
169
- it "tags errors" do
170
- parser = UrlParser.new(link, clean: false, raise_errors: true)
171
- expect(PostRank::URI).to receive(:clean).and_raise(StandardError)
172
- expect{ parser.send(:clean, link) }.to raise_error UrlParser::Error
83
+ it "escapes spaces as %20" do
84
+ expect(described_class.escape('id= 1')).to eq 'id%3D%201'
173
85
  end
174
86
 
175
87
  end
176
88
 
177
- context "#parser" do
89
+ context ".unescape" do
178
90
 
179
- let(:link) { 'link.to?a=b&utm_source=FeedBurner#stuff' }
180
-
181
- it "calls postrank-uri's clean function" do
182
- expect(Domainatrix).to receive(:parse).with(parser.to_s)
183
- UrlParser.new(link, clean: true)
91
+ it "decodes a string" do
92
+ expect(described_class.unescape('id%3D1')).to eq 'id=1'
184
93
  end
185
94
 
186
- it "tags errors" do
187
- expect(Domainatrix).to receive(:parse).and_raise(StandardError)
188
- expect{
189
- UrlParser.new(link, clean: false, raise_errors: true)
190
- }.to raise_error UrlParser::Error
95
+ it "unescapes spaces" do
96
+ expect(described_class.unescape('id%3D%201')).to eq 'id= 1'
191
97
  end
192
98
 
193
- end
194
-
195
- context "#clean!" do
196
-
197
- let(:link) { 'link.to?a=b&utm_source=FeedBurner#stuff' }
198
- let(:parser) { UrlParser.new(link) }
99
+ context "accept improperly encoded strings" do
199
100
 
200
- it "normalizes the url" do
201
- parser.clean!
202
- expect(parser.to_s).to eq 'http://link.to/?a=b'
203
- end
204
-
205
- it "resets the parser" do
206
- expect{
207
- parser.clean!
208
- }.to change{
209
- parser.parser
210
- }
211
- end
101
+ it "by unencoding spaces in the query encoded as '+'" do
102
+ expect(described_class.unescape('?id=+1')).to eq '?id= 1'
103
+ end
212
104
 
213
- end
105
+ it "by unencoding spaces in the query encoded as '+'" do
106
+ expect(described_class.unescape('?id%3D+1')).to eq '?id= 1'
107
+ end
214
108
 
215
- context "#to_s" do
109
+ it "by unencoding spaces in the query encoded as '%20'" do
110
+ expect(described_class.unescape('?id=%201')).to eq '?id= 1'
111
+ end
216
112
 
217
- let(:link) { 'http://example.com/' }
113
+ it "but does not unencode '+' to spaces in paths" do
114
+ expect(described_class.unescape('/foo+bar?id=foo+bar')).to eq '/foo+bar?id=foo bar'
115
+ end
218
116
 
219
- it "returns a string representation of the url" do
220
- expect(parser.to_s).to eq 'http://example.com/'
221
117
  end
222
118
 
223
119
  end
224
120
 
225
- context "#hash" do
121
+ context ".parse" do
226
122
 
227
- let(:link) { 'http://example.com/' }
228
-
229
- it "hashes the url string" do
230
- expect(parser.hash).to eq Digest::SHA1.hexdigest(link)
123
+ it "returns an instance of UrlParser::URI" do
124
+ expect(described_class.parse('http://example.com')).to be_a UrlParser::URI
231
125
  end
232
126
 
233
127
  end
234
128
 
235
- context "#valid?" do
129
+ context ".unembed" do
236
130
 
237
- it "returns true if there are no errors" do
238
- expect(UrlParser.new('http://example.com')).to be_valid
131
+ it "returns an instance of UrlParser::URI" do
132
+ expect(described_class.unembed('http://example.com')).to be_a UrlParser::URI
239
133
  end
240
134
 
241
- it "returns false if there are errors" do
242
- expect(UrlParser.new('http:||bra.ziz')).not_to be_valid
135
+ it "parses the URI with the :unembed option enabled" do
136
+ expect(UrlParser::URI).to receive(:new).with('#', hash_including(unembed: true))
137
+ described_class.unembed('#')
243
138
  end
244
139
 
245
140
  end
246
141
 
247
- # Thanks to http://stackoverflow.com/a/4864170
248
- #
249
- context "#join" do
250
-
251
- let(:link) { 'http://foo.com/zee/zaw/zoom.html' }
252
-
253
- it "properly combines a url and and relative url" do
254
- {
255
- 'http://zork.com/' => 'http://zork.com/',
256
- 'http://zork.com/#id' => 'http://zork.com/#id',
257
- 'http://zork.com/bar' => 'http://zork.com/bar',
258
- 'http://zork.com/bar#id' => 'http://zork.com/bar#id',
259
- 'http://zork.com/bar/' => 'http://zork.com/bar/',
260
- 'http://zork.com/bar/#id' => 'http://zork.com/bar/#id',
261
- 'http://zork.com/bar/jim.html' => 'http://zork.com/bar/jim.html',
262
- 'http://zork.com/bar/jim.html#id' => 'http://zork.com/bar/jim.html#id',
263
- '/bar' => 'http://foo.com/bar',
264
- '/bar#id' => 'http://foo.com/bar#id',
265
- '/bar/' => 'http://foo.com/bar/',
266
- '/bar/#id' => 'http://foo.com/bar/#id',
267
- '/bar/jim.html' => 'http://foo.com/bar/jim.html',
268
- '/bar/jim.html#id' => 'http://foo.com/bar/jim.html#id',
269
- 'jim.html' => 'http://foo.com/zee/zaw/jim.html',
270
- 'jim.html#id' => 'http://foo.com/zee/zaw/jim.html#id',
271
- '../jim.html' => 'http://foo.com/zee/jim.html',
272
- '../jim.html#id' => 'http://foo.com/zee/jim.html#id',
273
- '../' => 'http://foo.com/zee/',
274
- '../#id' => 'http://foo.com/zee/#id',
275
- '#id' => 'http://foo.com/zee/zaw/zoom.html#id'
276
- }.each do |relative_url, expected_result|
277
- expect(parser.join(relative_url).to_s).to eq expected_result
278
- end
142
+ context ".canonicalize" do
279
143
 
144
+ it "returns an instance of UrlParser::URI" do
145
+ expect(described_class.canonicalize('http://example.com')).to be_a UrlParser::URI
280
146
  end
281
147
 
282
- end
283
-
284
- # http://medialize.github.io/URI.js/about-uris.html
285
- #
286
- context "uri components" do
287
-
288
- let(:parser) { UrlParser.new(link, clean: false) }
289
-
290
- context "when all are present" do
291
-
292
- let(:link) do
293
- 'https://username:password@ww2.foo.bar.example.com:123/hello/world/there.html?name=ferret#foo'
294
- end
295
-
296
- it { expect(parser.errors).to be_empty }
297
- it { expect(parser).to be_valid }
298
- it { expect(parser.scheme).to eq 'https' }
299
- it { expect(parser.username).to eq 'username' }
300
- it { expect(parser.password).to eq 'password' }
301
- it { expect(parser.userinfo).to eq 'username:password' }
302
- it { expect(parser.www).to eq 'ww2' }
303
- it { expect(parser.subdomain).to eq 'foo.bar' }
304
- it { expect(parser.subdomains).to eq 'ww2.foo.bar' }
305
- it { expect(parser.domain_name).to eq 'example' }
306
- it { expect(parser.domain).to eq 'example.com' }
307
- it { expect(parser.tld).to eq 'com' }
308
- it { expect(parser.hostname).to eq 'ww2.foo.bar.example.com' }
309
- it { expect(parser.port).to eq 123 }
310
- it { expect(parser.host).to eq 'ww2.foo.bar.example.com:123' }
311
- it { expect(parser.origin).to eq 'https://ww2.foo.bar.example.com:123' }
312
- it { expect(parser.authority).to eq 'username:password@ww2.foo.bar.example.com:123' }
313
- it { expect(parser.site).to eq 'https://username:password@ww2.foo.bar.example.com:123' }
314
- it { expect(parser.directory).to eq '/hello/world' }
315
- it { expect(parser.path).to eq '/hello/world/there.html' }
316
- it { expect(parser.segment).to eq 'there.html' }
317
- it { expect(parser.filename).to eq 'there.html' }
318
- it { expect(parser.suffix).to eq 'html' }
319
- it { expect(parser.query).to eq 'name=ferret' }
320
- it { expect(parser.query_values['name']).to eq 'ferret' }
321
- it { expect(parser.fragment).to eq 'foo' }
322
- it { expect(parser.resource).to eq 'there.html?name=ferret#foo' }
323
- end
324
-
325
- context "when none are present" do
326
-
327
- let(:link) { '/' }
328
-
329
- it { expect(parser.errors).to be_empty }
330
- it { expect(parser.scheme).to be_nil }
331
- it { expect(parser.username).to be_nil }
332
- it { expect(parser.password).to be_nil }
333
- it { expect(parser.userinfo).to be_nil }
334
- it { expect(parser.www).to be_nil }
335
- it { expect(parser.subdomain).to be_nil }
336
- it { expect(parser.subdomains).to be_nil }
337
- it { expect(parser.domain_name).to be_nil }
338
- it { expect(parser.domain).to be_nil }
339
- it { expect(parser.tld).to be_nil }
340
- it { expect(parser.hostname).to be_nil }
341
- it { expect(parser.port).to be_nil }
342
- it { expect(parser.host).to be_nil }
343
- it { expect(parser.origin).to be_nil }
344
- it { expect(parser.authority).to be_nil }
345
- it { expect(parser.site).to be_nil }
346
- it { expect(parser.directory).to eq '/' }
347
- it { expect(parser.path).to eq '/' }
348
- it { expect(parser.segment).to be_nil }
349
- it { expect(parser.filename).to eq 'index.html' }
350
- it { expect(parser.suffix).to be_nil }
351
- it { expect(parser.query).to be_nil }
352
- it { expect(parser.query_values['name']).to be_nil }
353
- it { expect(parser.fragment).to be_nil }
354
- it { expect(parser.resource).to be_nil }
355
-
356
- end
357
-
358
- context "when empty" do
359
-
360
- let(:link) { '' }
361
-
362
- it { expect(parser.errors).to be_empty }
363
- it { expect(parser.scheme).to be_nil }
364
- it { expect(parser.username).to be_nil }
365
- it { expect(parser.password).to be_nil }
366
- it { expect(parser.userinfo).to be_nil }
367
- it { expect(parser.www).to be_nil }
368
- it { expect(parser.subdomain).to be_nil }
369
- it { expect(parser.subdomains).to be_nil }
370
- it { expect(parser.domain_name).to be_nil }
371
- it { expect(parser.domain).to be_nil }
372
- it { expect(parser.tld).to be_nil }
373
- it { expect(parser.hostname).to be_nil }
374
- it { expect(parser.port).to be_nil }
375
- it { expect(parser.host).to be_nil }
376
- it { expect(parser.origin).to be_nil }
377
- it { expect(parser.authority).to be_nil }
378
- it { expect(parser.site).to be_nil }
379
- it { expect(parser.directory).to eq '/' }
380
- it { expect(parser.path).to eq '' }
381
- it { expect(parser.segment).to be_nil }
382
- it { expect(parser.filename).to eq 'index.html' }
383
- it { expect(parser.suffix).to be_nil }
384
- it { expect(parser.query).to be_nil }
385
- it { expect(parser.query_values['name']).to be_nil }
386
- it { expect(parser.fragment).to be_nil }
387
- it { expect(parser.resource).to be_nil }
388
-
389
- end
390
-
391
- context "when invalid" do
392
-
393
- let(:link) { 'http://#content-zone' }
394
-
395
- it { expect(parser.errors).not_to be_empty }
396
- it { expect(parser.scheme).to be_nil }
397
- it { expect(parser.username).to be_nil }
398
- it { expect(parser.password).to be_nil }
399
- it { expect(parser.userinfo).to be_nil }
400
- it { expect(parser.www).to be_nil }
401
- it { expect(parser.subdomain).to be_nil }
402
- it { expect(parser.subdomains).to be_nil }
403
- it { expect(parser.domain_name).to be_nil }
404
- it { expect(parser.domain).to be_nil }
405
- it { expect(parser.tld).to be_nil }
406
- it { expect(parser.hostname).to be_nil }
407
- it { expect(parser.port).to be_nil }
408
- it { expect(parser.host).to be_nil }
409
- it { expect(parser.origin).to be_nil }
410
- it { expect(parser.authority).to be_nil }
411
- it { expect(parser.site).to be_nil }
412
- it { expect(parser.directory).to be_nil }
413
- it { expect(parser.path).to be_nil }
414
- it { expect(parser.segment).to be_nil }
415
- it { expect(parser.filename).to be_nil }
416
- it { expect(parser.suffix).to be_nil }
417
- it { expect(parser.query).to be_nil }
418
- it { expect(parser.query_values['name']).to be_nil }
419
- it { expect(parser.fragment).to be_nil }
420
- it { expect(parser.resource).to be_nil }
421
-
148
+ it "parses the URI with the :canonicalize option enabled" do
149
+ expect(UrlParser::URI).to receive(:new).with('#', hash_including(canonicalize: true))
150
+ described_class.canonicalize('#')
422
151
  end
423
152
 
424
153
  end
425
154
 
426
- context "localhost?" do
155
+ context ".normalize" do
427
156
 
428
- let(:link) { 'localhost:5000' }
429
-
430
- it "returns true for localhost" do
431
- expect(parser).to be_localhost
157
+ it "returns an instance of UrlParser::URI" do
158
+ expect(described_class.normalize('http://example.com')).to be_a UrlParser::URI
432
159
  end
433
160
 
434
- end
435
-
436
- context "#domain_name" do
437
-
438
- let(:link) { 'https://github.com/pauldix/domainatrix' }
439
-
440
- it "returns the domain name without the suffix" do
441
- expect(parser.domain_name).to eq 'github'
161
+ it "parses the URI with the :normalize option enabled" do
162
+ expect(UrlParser::URI).to receive(:new).with('#', hash_including(normalize: true))
163
+ described_class.normalize('#')
442
164
  end
443
165
 
444
166
  end
445
167
 
446
- context "#domain" do
447
-
448
- let(:link) { 'https://github.com/pauldix/domainatrix' }
168
+ context ".clean" do
449
169
 
450
- it "returns the domain name with suffix" do
451
- expect(parser.domain).to eq 'github.com'
170
+ it "returns an instance of UrlParser::URI" do
171
+ expect(described_class.clean('http://example.com')).to be_a UrlParser::URI
452
172
  end
453
173
 
454
- end
455
-
456
- context "#subdomain" do
457
-
458
- let(:link) { 'http://foo.bar.pauldix.co.uk/asdf.html?q=arg' }
459
-
460
- it "returns all subdomains" do
461
- expect(parser.subdomain).to eq 'foo.bar'
174
+ it "parses the URI with the :clean option enabled" do
175
+ expect(UrlParser::URI).to receive(:new).with('#', hash_including(clean: true))
176
+ described_class.clean('#')
462
177
  end
463
178
 
464
- it "returns nil if there is no subdomain" do
465
- url = UrlParser.new('https://github.com/')
466
- expect(url.subdomain).to be_nil
467
- end
179
+ end
468
180
 
469
- it "does not include www as part of the subdomain" do
470
- parser = UrlParser.new("http://www.energy.ca.gov/")
471
- expect(parser.subdomain).to eq 'energy'
472
- end
181
+ context ".wrap" do
473
182
 
474
- it "does not include any variation of www as part of the subdomain" do
475
- [ 'ww2', 'www2', 'ww23', 'www23' ].each do |www|
476
- parser = UrlParser.new("http://#{www}.energy.ca.gov/")
477
- expect(parser.subdomain).to eq 'energy'
478
- end
183
+ it "converts nil to an array" do
184
+ expect(described_class.wrap(nil)).to eq([])
479
185
  end
480
186
 
481
187
  end