url_parser 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/.travis.yml +7 -0
- data/CHANGELOG.md +20 -0
- data/Gemfile +4 -0
- data/Guardfile +40 -7
- data/LICENSE.txt +1 -1
- data/README.md +301 -5
- data/Rakefile +5 -0
- data/lib/url_parser.rb +93 -286
- data/lib/url_parser/db.yml +77 -0
- data/lib/url_parser/domain.rb +102 -0
- data/lib/url_parser/model.rb +233 -0
- data/lib/url_parser/option_setter.rb +47 -0
- data/lib/url_parser/parser.rb +206 -0
- data/lib/url_parser/uri.rb +206 -0
- data/lib/url_parser/version.rb +1 -1
- data/spec/spec_helper.rb +83 -6
- data/spec/support/.gitkeep +0 -0
- data/spec/support/helpers.rb +7 -0
- data/spec/url_parser/domain_spec.rb +163 -0
- data/spec/url_parser/model_spec.rb +426 -0
- data/spec/url_parser/option_setter_spec.rb +71 -0
- data/spec/url_parser/parser_spec.rb +515 -0
- data/spec/url_parser/uri_spec.rb +570 -0
- data/spec/url_parser_spec.rb +93 -387
- data/url_parser.gemspec +5 -6
- metadata +39 -29
@@ -0,0 +1,426 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe UrlParser::Model do
|
4
|
+
|
5
|
+
let(:empty_uri) { Addressable::URI.parse '' }
|
6
|
+
let(:root_path) { Addressable::URI.parse '/' }
|
7
|
+
let(:path_uri) { Addressable::URI.parse '/path/to/segment' }
|
8
|
+
let(:subdomain_uri) { Addressable::URI.parse 'https://some.subdomain.example.com' }
|
9
|
+
let(:example) { Addressable::URI.parse 'http://example.com' }
|
10
|
+
let(:relative_uri) { Addressable::URI.parse '/some/path/to.html?name=example' }
|
11
|
+
let(:absolute_uri) { Addressable::URI.parse 'foo://username:password@ww2.foo.bar.example.com:123/hello/world/there.html?name=ferret#foo' }
|
12
|
+
let(:ip_address) { Addressable::URI.parse 'http://127.0.0.1:80' }
|
13
|
+
let(:ip_address_instance) { described_class.new(ip_address) }
|
14
|
+
|
15
|
+
context ".new" do
|
16
|
+
|
17
|
+
it "requires an Addressable::URI" do
|
18
|
+
expect{ described_class.new('http://example.com') }
|
19
|
+
.to raise_error UrlParser::RequiresAddressableURI
|
20
|
+
end
|
21
|
+
|
22
|
+
it "does not accept a Ruby URI" do
|
23
|
+
expect{ described_class.new(URI('http://example.com')) }
|
24
|
+
.to raise_error UrlParser::RequiresAddressableURI
|
25
|
+
end
|
26
|
+
|
27
|
+
it "requires a UrlParser::Domain as the second argument" do
|
28
|
+
uri = Addressable::URI.parse('http://example.com/')
|
29
|
+
expect{ described_class.new(uri, 'example.com') }
|
30
|
+
.to raise_error UrlParser::RequiresUrlParserDomain
|
31
|
+
end
|
32
|
+
|
33
|
+
context "parsed_uri" do
|
34
|
+
|
35
|
+
it "sets the argument as #parsed_uri" do
|
36
|
+
uri = Addressable::URI.parse('http://example.com/')
|
37
|
+
expect(described_class.new(uri).parsed_uri).to eq uri
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
context "parsed_domain" do
|
43
|
+
|
44
|
+
let(:uri) { Addressable::URI.parse('http://example.com/') }
|
45
|
+
let(:domain) { UrlParser::Domain.new(uri.hostname) }
|
46
|
+
|
47
|
+
it "sets the second argument as #parsed_domain" do
|
48
|
+
expect(described_class.new(uri, domain).parsed_domain).to eq domain
|
49
|
+
end
|
50
|
+
|
51
|
+
it "fails silently if the domain is invalid" do
|
52
|
+
uri = Addressable::URI.parse('http://example.bullshit')
|
53
|
+
expect(described_class.new(uri, domain).parsed_domain).to be_a UrlParser::Domain
|
54
|
+
end
|
55
|
+
|
56
|
+
it "fails silently if the domain is not present" do
|
57
|
+
uri = Addressable::URI.parse('#')
|
58
|
+
expect(described_class.new(uri, domain).parsed_domain).to be_a UrlParser::Domain
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
64
|
+
|
65
|
+
context "DSL" do
|
66
|
+
|
67
|
+
let(:instance) { described_class.new(absolute_uri) }
|
68
|
+
|
69
|
+
context "#scheme" do
|
70
|
+
|
71
|
+
it "returns the top level URI protocol" do
|
72
|
+
expect(instance.scheme).to eq 'foo'
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
context "#username" do
|
78
|
+
|
79
|
+
it "returns the username portion of the userinfo" do
|
80
|
+
expect(instance.username).to eq 'username'
|
81
|
+
end
|
82
|
+
|
83
|
+
it "is aliased to #user" do
|
84
|
+
expect(instance.method(:user)).to eq instance.method(:username)
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
88
|
+
|
89
|
+
context "#password" do
|
90
|
+
|
91
|
+
it "returns the password portion of the userinfo" do
|
92
|
+
expect(instance.password).to eq 'password'
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
96
|
+
|
97
|
+
context "#userinfo" do
|
98
|
+
|
99
|
+
it "returns the URI username and password string for authentication" do
|
100
|
+
expect(instance.userinfo).to eq 'username:password'
|
101
|
+
end
|
102
|
+
|
103
|
+
end
|
104
|
+
|
105
|
+
context "#hostname" do
|
106
|
+
|
107
|
+
it "returns the fully qualified domain name" do
|
108
|
+
expect(instance.hostname).to eq 'ww2.foo.bar.example.com'
|
109
|
+
end
|
110
|
+
|
111
|
+
it "returns the fully qualified IP address" do
|
112
|
+
expect(ip_address_instance.hostname).to eq '127.0.0.1'
|
113
|
+
end
|
114
|
+
|
115
|
+
end
|
116
|
+
|
117
|
+
context "#naked_hostname" do
|
118
|
+
|
119
|
+
it "returns the fully qualified domain name without any ww? prefix" do
|
120
|
+
expect(instance.naked_hostname).to eq 'foo.bar.example.com'
|
121
|
+
end
|
122
|
+
|
123
|
+
it "returns the fully qualified IP address" do
|
124
|
+
expect(ip_address_instance.naked_hostname).to eq '127.0.0.1'
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
128
|
+
|
129
|
+
context "#port" do
|
130
|
+
|
131
|
+
it "returns the port number" do
|
132
|
+
expect(instance.port).to eq 123
|
133
|
+
end
|
134
|
+
|
135
|
+
end
|
136
|
+
|
137
|
+
context "#host" do
|
138
|
+
|
139
|
+
it "returns the hostname and port" do
|
140
|
+
expect(instance.host).to eq 'ww2.foo.bar.example.com:123'
|
141
|
+
end
|
142
|
+
|
143
|
+
end
|
144
|
+
|
145
|
+
context "#www" do
|
146
|
+
|
147
|
+
it "returns the ww? portion of the subdomain" do
|
148
|
+
expect(instance.www).to eq 'ww2'
|
149
|
+
end
|
150
|
+
|
151
|
+
end
|
152
|
+
|
153
|
+
context "#tld" do
|
154
|
+
|
155
|
+
it "returns the top level domain portion" do
|
156
|
+
expect(instance.tld).to eq 'com'
|
157
|
+
end
|
158
|
+
|
159
|
+
it "is aliased to #top_level_domain" do
|
160
|
+
expect(instance.method(:top_level_domain)).to eq instance.method(:tld)
|
161
|
+
end
|
162
|
+
|
163
|
+
it "is aliased to #extension" do
|
164
|
+
expect(instance.method(:extension)).to eq instance.method(:tld)
|
165
|
+
end
|
166
|
+
|
167
|
+
end
|
168
|
+
|
169
|
+
context "#sld" do
|
170
|
+
|
171
|
+
it "returns the second level domain portion" do
|
172
|
+
expect(instance.sld).to eq 'example'
|
173
|
+
end
|
174
|
+
|
175
|
+
it "is aliased to #second_level_domain" do
|
176
|
+
expect(instance.method(:second_level_domain)).to eq instance.method(:sld)
|
177
|
+
end
|
178
|
+
|
179
|
+
it "is aliased to #domain_name" do
|
180
|
+
expect(instance.method(:domain_name)).to eq instance.method(:sld)
|
181
|
+
end
|
182
|
+
|
183
|
+
end
|
184
|
+
|
185
|
+
context "#trd" do
|
186
|
+
|
187
|
+
it "returns the third level domain part" do
|
188
|
+
expect(instance.trd).to eq 'ww2.foo.bar'
|
189
|
+
end
|
190
|
+
|
191
|
+
it "is aliased to #third_level_domain" do
|
192
|
+
expect(instance.method(:third_level_domain)).to eq instance.method(:trd)
|
193
|
+
end
|
194
|
+
|
195
|
+
it "is aliased to #subdomains" do
|
196
|
+
expect(instance.method(:subdomains)).to eq instance.method(:trd)
|
197
|
+
end
|
198
|
+
|
199
|
+
end
|
200
|
+
|
201
|
+
context "#naked_trd" do
|
202
|
+
|
203
|
+
it "returns any non-ww? subdomains" do
|
204
|
+
expect(instance.naked_trd).to eq 'foo.bar'
|
205
|
+
end
|
206
|
+
|
207
|
+
it "is aliased to #naked_subdomain" do
|
208
|
+
expect(instance.method(:naked_subdomain)).to eq instance.method(:naked_trd)
|
209
|
+
end
|
210
|
+
|
211
|
+
it "returns non-ww? subdomains when there is no ww? present" do
|
212
|
+
instance = described_class.new(subdomain_uri)
|
213
|
+
expect(instance.naked_trd).to eq 'some.subdomain'
|
214
|
+
end
|
215
|
+
|
216
|
+
end
|
217
|
+
|
218
|
+
context "#domain" do
|
219
|
+
|
220
|
+
it "returns the domain name with the tld" do
|
221
|
+
expect(instance.domain).to eq 'example.com'
|
222
|
+
end
|
223
|
+
|
224
|
+
end
|
225
|
+
|
226
|
+
context "#subdomain" do
|
227
|
+
|
228
|
+
it "returns all subdomains including ww?" do
|
229
|
+
expect(instance.subdomain).to eq 'ww2.foo.bar.example.com'
|
230
|
+
end
|
231
|
+
|
232
|
+
end
|
233
|
+
|
234
|
+
context "#origin" do
|
235
|
+
|
236
|
+
it "returns the scheme and host" do
|
237
|
+
expect(instance.origin).to eq 'foo://ww2.foo.bar.example.com:123'
|
238
|
+
end
|
239
|
+
|
240
|
+
end
|
241
|
+
|
242
|
+
context "#authority" do
|
243
|
+
|
244
|
+
it "returns the userinfo and host" do
|
245
|
+
expect(instance.authority).to eq 'username:password@ww2.foo.bar.example.com:123'
|
246
|
+
end
|
247
|
+
|
248
|
+
end
|
249
|
+
|
250
|
+
context "#site" do
|
251
|
+
|
252
|
+
it "returns the scheme, userinfo, and host" do
|
253
|
+
expect(instance.site).to eq 'foo://username:password@ww2.foo.bar.example.com:123'
|
254
|
+
end
|
255
|
+
|
256
|
+
end
|
257
|
+
|
258
|
+
context "#path" do
|
259
|
+
|
260
|
+
it "returns the directory and segment" do
|
261
|
+
expect(instance.path).to eq '/hello/world/there.html'
|
262
|
+
end
|
263
|
+
|
264
|
+
end
|
265
|
+
|
266
|
+
context "#segment" do
|
267
|
+
|
268
|
+
it "returns the last portion of the path" do
|
269
|
+
expect(instance.segment).to eq 'there.html'
|
270
|
+
end
|
271
|
+
|
272
|
+
end
|
273
|
+
|
274
|
+
context "#directory" do
|
275
|
+
|
276
|
+
it "returns any directories following the site within the URI" do
|
277
|
+
expect(instance.directory).to eq '/hello/world'
|
278
|
+
end
|
279
|
+
|
280
|
+
end
|
281
|
+
|
282
|
+
context "#filename" do
|
283
|
+
|
284
|
+
it "returns the segment if a file extension is present" do
|
285
|
+
expect(instance.filename).to eq 'there.html'
|
286
|
+
end
|
287
|
+
|
288
|
+
it "returns nil if a file extension is not present" do
|
289
|
+
instance = described_class.new(path_uri)
|
290
|
+
expect(instance.filename).to be_nil
|
291
|
+
end
|
292
|
+
|
293
|
+
end
|
294
|
+
|
295
|
+
context "#suffix" do
|
296
|
+
|
297
|
+
it "returns the file extension of the filename" do
|
298
|
+
expect(instance.suffix).to eq 'html'
|
299
|
+
end
|
300
|
+
|
301
|
+
it "returns nil if a file extension is not present" do
|
302
|
+
instance = described_class.new(path_uri)
|
303
|
+
expect(instance.suffix).to be_nil
|
304
|
+
end
|
305
|
+
|
306
|
+
end
|
307
|
+
|
308
|
+
context "#query" do
|
309
|
+
|
310
|
+
it "returns the params and values as a string" do
|
311
|
+
expect(instance.query).to eq 'name=ferret'
|
312
|
+
end
|
313
|
+
|
314
|
+
end
|
315
|
+
|
316
|
+
context "#query_values" do
|
317
|
+
|
318
|
+
it "returns a hash of params and values" do
|
319
|
+
expect(instance.query_values).to eq({ 'name' => 'ferret' })
|
320
|
+
end
|
321
|
+
|
322
|
+
end
|
323
|
+
|
324
|
+
context "#fragment" do
|
325
|
+
|
326
|
+
it "returns the fragment identifier" do
|
327
|
+
expect(instance.fragment).to eq 'foo'
|
328
|
+
end
|
329
|
+
|
330
|
+
end
|
331
|
+
|
332
|
+
context "#resource" do
|
333
|
+
|
334
|
+
it "returns the path, query, and fragment" do
|
335
|
+
expect(instance.resource).to eq 'there.html?name=ferret#foo'
|
336
|
+
end
|
337
|
+
|
338
|
+
end
|
339
|
+
|
340
|
+
context "#location" do
|
341
|
+
|
342
|
+
it "returns the directory and resource, constituting everything after the site" do
|
343
|
+
expect(instance.location).to eq '/hello/world/there.html?name=ferret#foo'
|
344
|
+
end
|
345
|
+
|
346
|
+
it "handles query only locations" do
|
347
|
+
uri = Addressable::URI.parse 'http://example.com/?utm_source%3Danalytics'
|
348
|
+
instance = described_class.new(uri)
|
349
|
+
expect(instance.location).to eq '/?utm_source%3Danalytics'
|
350
|
+
end
|
351
|
+
|
352
|
+
end
|
353
|
+
|
354
|
+
end
|
355
|
+
|
356
|
+
context "with a root path" do
|
357
|
+
|
358
|
+
let(:instance) { described_class.new(root_path) }
|
359
|
+
|
360
|
+
specify { expect(instance.scheme).to be_nil }
|
361
|
+
specify { expect(instance.username).to be_nil }
|
362
|
+
specify { expect(instance.password).to be_nil }
|
363
|
+
specify { expect(instance.userinfo).to be_nil }
|
364
|
+
specify { expect(instance.hostname).to be_nil }
|
365
|
+
specify { expect(instance.naked_hostname).to be_nil }
|
366
|
+
specify { expect(instance.host).to be_nil }
|
367
|
+
specify { expect(instance.port).to be_nil }
|
368
|
+
specify { expect(instance.www).to be_nil }
|
369
|
+
specify { expect(instance.tld).to be_nil }
|
370
|
+
specify { expect(instance.sld).to be_nil }
|
371
|
+
specify { expect(instance.trd).to be_nil }
|
372
|
+
specify { expect(instance.naked_trd).to be_nil }
|
373
|
+
specify { expect(instance.domain).to be_nil }
|
374
|
+
specify { expect(instance.subdomain).to be_nil }
|
375
|
+
specify { expect(instance.origin).to be_nil }
|
376
|
+
specify { expect(instance.authority).to be_nil }
|
377
|
+
specify { expect(instance.site).to be_nil }
|
378
|
+
specify { expect(instance.path).to eq '/' }
|
379
|
+
specify { expect(instance.segment).to be_nil }
|
380
|
+
specify { expect(instance.directory).to eq '/' }
|
381
|
+
specify { expect(instance.filename).to be_nil }
|
382
|
+
specify { expect(instance.suffix).to be_nil }
|
383
|
+
specify { expect(instance.query).to be_nil }
|
384
|
+
specify { expect(instance.query_values).to eq({}) }
|
385
|
+
specify { expect(instance.fragment).to be_nil }
|
386
|
+
specify { expect(instance.resource).to be_nil }
|
387
|
+
specify { expect(instance.location).to eq '/' }
|
388
|
+
|
389
|
+
end
|
390
|
+
|
391
|
+
context "with empty input" do
|
392
|
+
|
393
|
+
let(:instance) { described_class.new(empty_uri) }
|
394
|
+
|
395
|
+
specify { expect(instance.scheme).to be_nil }
|
396
|
+
specify { expect(instance.username).to be_nil }
|
397
|
+
specify { expect(instance.password).to be_nil }
|
398
|
+
specify { expect(instance.userinfo).to be_nil }
|
399
|
+
specify { expect(instance.hostname).to be_nil }
|
400
|
+
specify { expect(instance.naked_hostname).to be_nil }
|
401
|
+
specify { expect(instance.host).to be_nil }
|
402
|
+
specify { expect(instance.port).to be_nil }
|
403
|
+
specify { expect(instance.www).to be_nil }
|
404
|
+
specify { expect(instance.tld).to be_nil }
|
405
|
+
specify { expect(instance.sld).to be_nil }
|
406
|
+
specify { expect(instance.trd).to be_nil }
|
407
|
+
specify { expect(instance.naked_trd).to be_nil }
|
408
|
+
specify { expect(instance.domain).to be_nil }
|
409
|
+
specify { expect(instance.subdomain).to be_nil }
|
410
|
+
specify { expect(instance.origin).to be_nil }
|
411
|
+
specify { expect(instance.authority).to be_nil }
|
412
|
+
specify { expect(instance.site).to be_nil }
|
413
|
+
specify { expect(instance.path).to eq '' }
|
414
|
+
specify { expect(instance.segment).to be_nil }
|
415
|
+
specify { expect(instance.directory).to be_nil }
|
416
|
+
specify { expect(instance.filename).to be_nil }
|
417
|
+
specify { expect(instance.suffix).to be_nil }
|
418
|
+
specify { expect(instance.query).to be_nil }
|
419
|
+
specify { expect(instance.query_values).to eq({}) }
|
420
|
+
specify { expect(instance.fragment).to be_nil }
|
421
|
+
specify { expect(instance.resource).to be_nil }
|
422
|
+
specify { expect(instance.location).to be_nil }
|
423
|
+
|
424
|
+
end
|
425
|
+
|
426
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe UrlParser::OptionSetter do
|
4
|
+
|
5
|
+
context "to_hash" do
|
6
|
+
|
7
|
+
it "returns an empty hash if there were no options or block" do
|
8
|
+
instance = described_class.new
|
9
|
+
expect(instance.to_hash).to eq({})
|
10
|
+
end
|
11
|
+
|
12
|
+
it "bases the hash results on the original options" do
|
13
|
+
instance = described_class.new(unescape: false)
|
14
|
+
settings = instance.to_hash
|
15
|
+
expect(settings).to eq unescape: false
|
16
|
+
end
|
17
|
+
|
18
|
+
it "overwrites option settings if a method was called" do
|
19
|
+
blk = ->(uri) { uri.unescape! }
|
20
|
+
instance = described_class.new(unescape: false, &blk)
|
21
|
+
settings = instance.to_hash
|
22
|
+
expect(settings).to eq unescape: true
|
23
|
+
end
|
24
|
+
|
25
|
+
it "converts an #unescape! call to an unescape: true setting" do
|
26
|
+
blk = ->(uri) { uri.unescape! }
|
27
|
+
instance = described_class.new(&blk)
|
28
|
+
settings = instance.to_hash
|
29
|
+
expect(settings).to eq unescape: true
|
30
|
+
end
|
31
|
+
|
32
|
+
it "converts an #unembed! call to an unembed: true setting" do
|
33
|
+
blk = ->(uri) { uri.unembed! }
|
34
|
+
instance = described_class.new(&blk)
|
35
|
+
settings = instance.to_hash
|
36
|
+
expect(settings).to eq unembed: true
|
37
|
+
end
|
38
|
+
|
39
|
+
it "converts a #canonicalize! call to a canonicalize: true setting" do
|
40
|
+
blk = ->(uri) { uri.canonicalize! }
|
41
|
+
instance = described_class.new(&blk)
|
42
|
+
settings = instance.to_hash
|
43
|
+
expect(settings).to eq canonicalize: true
|
44
|
+
end
|
45
|
+
|
46
|
+
it "converts a #normalize! call to a normalize: true setting" do
|
47
|
+
blk = ->(uri) { uri.normalize! }
|
48
|
+
instance = described_class.new(&blk)
|
49
|
+
settings = instance.to_hash
|
50
|
+
expect(settings).to eq normalize: true
|
51
|
+
end
|
52
|
+
|
53
|
+
it "converts a #clean! call to all true settings" do
|
54
|
+
blk = ->(uri) { uri.clean! }
|
55
|
+
instance = described_class.new(&blk)
|
56
|
+
expect(instance).to receive :unescape!
|
57
|
+
expect(instance).to receive :unembed!
|
58
|
+
expect(instance).to receive :canonicalize!
|
59
|
+
expect(instance).to receive :normalize!
|
60
|
+
instance.to_hash
|
61
|
+
end
|
62
|
+
|
63
|
+
it "ignores undefined method calls" do
|
64
|
+
blk = ->(uri) { uri.parse! }
|
65
|
+
instance = described_class.new(&blk)
|
66
|
+
expect{ instance.to_hash }.not_to raise_error
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|