url_parser 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/url_parser/version.rb +1 -1
- data/lib/url_parser.rb +149 -52
- data/spec/spec_helper.rb +1 -1
- data/spec/url_parser_spec.rb +206 -79
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: db620a681d6197369f31a483156df4163d2576fd
|
4
|
+
data.tar.gz: 00d0b29ded1f94326953bd5a1fece2003093d2fc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 083dda35526897fae462b70cec4f84709dddd617a1c7f5d1f4d1dd830aac23d5cf59241bee008fe54ed231510b23c63f3e60d9125620b03e0b34c5757a6f4669
|
7
|
+
data.tar.gz: 92c747d882b57cb14c7e499d5d97d2b676df95d30147e49e15ee58ba99bb7c057c2e092cf4ed56ba9534037429e584fae770fe18283e4ceba1443d9ca7a75787
|
data/lib/url_parser/version.rb
CHANGED
data/lib/url_parser.rb
CHANGED
@@ -2,6 +2,7 @@ require "url_parser/version"
|
|
2
2
|
require "domainatrix"
|
3
3
|
require "postrank-uri"
|
4
4
|
require "addressable/uri"
|
5
|
+
require "digest/sha1"
|
5
6
|
|
6
7
|
class Array
|
7
8
|
|
@@ -19,12 +20,13 @@ end
|
|
19
20
|
|
20
21
|
module UrlParser
|
21
22
|
|
23
|
+
|
22
24
|
module Error; end
|
23
25
|
|
24
|
-
def self.call(text)
|
26
|
+
def self.call(text, options = {})
|
25
27
|
urls = []
|
26
28
|
PostRank::URI.extract(text).each do |url|
|
27
|
-
urls << new(url)
|
29
|
+
urls << new(url, options)
|
28
30
|
end
|
29
31
|
urls
|
30
32
|
end
|
@@ -54,107 +56,202 @@ module UrlParser
|
|
54
56
|
attr_reader :url, :original_url
|
55
57
|
|
56
58
|
def initialize(url, options = {})
|
59
|
+
@schemes = options.fetch(:schemes) { DEFAULT_SCHEMES }
|
60
|
+
@clean = options.fetch(:clean) { false }
|
61
|
+
@original_url = url
|
62
|
+
@url = @clean ? clean(url) : parse(url)
|
63
|
+
end
|
64
|
+
|
65
|
+
def schemes
|
66
|
+
Array.wrap(@schemes)
|
67
|
+
end
|
68
|
+
|
69
|
+
def parse(url)
|
70
|
+
tag_errors do
|
71
|
+
PostRank::URI.parse(url, raw: true)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def clean(url)
|
76
|
+
tag_errors do
|
77
|
+
PostRank::URI.clean(url, raw: true)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def parser
|
57
82
|
tag_errors do
|
58
|
-
@
|
59
|
-
@preserve = !!options[:preserve]
|
60
|
-
@original_url = url
|
61
|
-
@url = @preserve ? url : PostRank::URI.clean(url)
|
83
|
+
@parser ||= Domainatrix.parse(to_s)
|
62
84
|
end
|
63
85
|
end
|
64
86
|
|
65
87
|
def clean!
|
66
|
-
@preserve = false
|
67
88
|
@parser = nil
|
68
|
-
@
|
69
|
-
@
|
89
|
+
@url = clean(url)
|
90
|
+
@clean = true
|
70
91
|
self
|
71
92
|
end
|
72
93
|
|
73
94
|
def to_s
|
74
|
-
url
|
95
|
+
url.to_s
|
75
96
|
end
|
76
97
|
|
77
|
-
def
|
78
|
-
|
98
|
+
def hash(options = {})
|
99
|
+
clean = options.fetch(:clean) { nil }
|
100
|
+
if clean.nil?
|
101
|
+
Digest::SHA1.hexdigest(url.to_s)
|
102
|
+
else
|
103
|
+
Digest::SHA1.hexdigest(
|
104
|
+
clean ? clean(original_url) : parse(original_url)
|
105
|
+
)
|
106
|
+
end
|
79
107
|
end
|
80
108
|
|
81
|
-
def
|
82
|
-
|
83
|
-
|
84
|
-
|
109
|
+
def valid?
|
110
|
+
return true if localhost?
|
111
|
+
return false unless schemes.include?(scheme)
|
112
|
+
return false unless hostname =~ /\./
|
113
|
+
true
|
85
114
|
end
|
86
115
|
|
116
|
+
def join(relative_path)
|
117
|
+
UrlParser.new(
|
118
|
+
Addressable::URI.join(url, relative_path).to_s
|
119
|
+
)
|
120
|
+
end
|
121
|
+
|
122
|
+
# URI Components
|
123
|
+
|
87
124
|
def scheme
|
88
|
-
|
125
|
+
url.scheme
|
89
126
|
end
|
90
127
|
|
91
|
-
def
|
92
|
-
|
128
|
+
def username
|
129
|
+
url.user
|
93
130
|
end
|
131
|
+
alias_method :user, :username
|
94
132
|
|
95
133
|
def password
|
96
|
-
|
134
|
+
url.password
|
97
135
|
end
|
98
136
|
|
99
|
-
def
|
100
|
-
|
137
|
+
def userinfo
|
138
|
+
url.userinfo
|
139
|
+
end
|
140
|
+
|
141
|
+
def www
|
142
|
+
return parser.subdomain if parser.subdomain.empty?
|
143
|
+
parts = slice_domain.split('.')
|
144
|
+
parts.first =~ /www?\d*/ ? parts.shift : ""
|
145
|
+
end
|
146
|
+
|
147
|
+
def subdomain
|
148
|
+
return parser.subdomain if parser.subdomain.empty?
|
149
|
+
parts = slice_domain.split('.')
|
150
|
+
parts.shift if parts.first =~ /www?\d*/
|
151
|
+
parts.compact.join('.')
|
152
|
+
end
|
153
|
+
|
154
|
+
def subdomains
|
155
|
+
[ www, subdomain ].compact.join('.')
|
156
|
+
end
|
157
|
+
|
158
|
+
def domain_name
|
159
|
+
parser.domain
|
160
|
+
end
|
161
|
+
|
162
|
+
def domain
|
163
|
+
parser.domain_with_public_suffix
|
164
|
+
end
|
165
|
+
|
166
|
+
def tld
|
167
|
+
parser.public_suffix
|
168
|
+
end
|
169
|
+
|
170
|
+
def hostname
|
171
|
+
url.host
|
101
172
|
end
|
102
173
|
|
103
174
|
def port
|
104
|
-
|
175
|
+
url.port
|
176
|
+
end
|
177
|
+
|
178
|
+
def host
|
179
|
+
[ hostname, port ].compact.join(':')
|
180
|
+
end
|
181
|
+
|
182
|
+
def origin
|
183
|
+
url.origin
|
184
|
+
end
|
185
|
+
|
186
|
+
def authority
|
187
|
+
url.authority
|
188
|
+
end
|
189
|
+
|
190
|
+
def site
|
191
|
+
url.site
|
192
|
+
end
|
193
|
+
|
194
|
+
def directory
|
195
|
+
parts = path.split('/')
|
196
|
+
parts.pop unless segment.empty?
|
197
|
+
parts.unshit('') unless parts.first.empty?
|
198
|
+
parts.compact.join('/')
|
105
199
|
end
|
106
200
|
|
107
201
|
def path
|
108
|
-
|
202
|
+
url.path
|
109
203
|
end
|
110
204
|
|
111
|
-
def
|
112
|
-
|
205
|
+
def segment
|
206
|
+
path =~ /\/\z/ ? '' : path.split('/').last
|
113
207
|
end
|
114
208
|
|
115
|
-
def
|
116
|
-
|
209
|
+
def filename
|
210
|
+
return 'index.html' if segment.empty?
|
211
|
+
return '' if suffix.empty?
|
212
|
+
segment
|
213
|
+
end
|
214
|
+
|
215
|
+
def suffix
|
216
|
+
ext = File.extname(path)
|
217
|
+
ext[0] = '' if ext[0] == '.'
|
218
|
+
ext
|
219
|
+
end
|
220
|
+
|
221
|
+
def query
|
222
|
+
url.query
|
117
223
|
end
|
118
224
|
|
119
225
|
def query_values
|
120
|
-
|
226
|
+
url.query_values.to_h
|
121
227
|
end
|
122
228
|
|
123
|
-
def
|
124
|
-
|
125
|
-
return false if uri.nil?
|
126
|
-
return false unless schemes.include?(scheme)
|
127
|
-
return false unless host =~ /\./
|
128
|
-
true
|
229
|
+
def fragment
|
230
|
+
url.fragment
|
129
231
|
end
|
130
232
|
|
131
|
-
def
|
132
|
-
|
133
|
-
@parser ||= Domainatrix.parse(url)
|
134
|
-
end
|
233
|
+
def resource
|
234
|
+
[ [ segment, query ].compact.join('?'), fragment ].compact.join('#')
|
135
235
|
end
|
136
236
|
|
137
|
-
def
|
138
|
-
|
237
|
+
def relative?
|
238
|
+
url.relative?
|
139
239
|
end
|
140
240
|
|
141
|
-
def
|
142
|
-
|
143
|
-
parts = parser.subdomain.tap{ |s| s.slice!(domain) }.split('.')
|
144
|
-
parts.shift if parts.first =~ /www?\d*/
|
145
|
-
(parts << domain).join('.')
|
146
|
-
else
|
147
|
-
domain
|
148
|
-
end
|
241
|
+
def absolute?
|
242
|
+
url.absolute?
|
149
243
|
end
|
150
244
|
|
151
|
-
def
|
152
|
-
|
153
|
-
UrlParser.new(joined_url, preserve: true)
|
245
|
+
def localhost?
|
246
|
+
!!(hostname =~ /(\A|\.)localhost\z/)
|
154
247
|
end
|
155
248
|
|
156
249
|
private
|
157
250
|
|
251
|
+
def slice_domain
|
252
|
+
parser.subdomain.tap{ |s| s.slice!(domain) }
|
253
|
+
end
|
254
|
+
|
158
255
|
def tag_errors
|
159
256
|
yield
|
160
257
|
rescue Exception => error
|
data/spec/spec_helper.rb
CHANGED
@@ -14,7 +14,7 @@ require "url_parser"
|
|
14
14
|
RSpec.configure do |config|
|
15
15
|
config.run_all_when_everything_filtered = true
|
16
16
|
config.filter_run :focus
|
17
|
-
|
17
|
+
config.raise_errors_for_deprecations!
|
18
18
|
# Run specs in random order to surface order dependencies. If you find an
|
19
19
|
# order dependency and want to debug it, you can fix the order by providing
|
20
20
|
# the seed, which is printed after each run.
|
data/spec/url_parser_spec.rb
CHANGED
@@ -2,7 +2,7 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
describe UrlParser do
|
4
4
|
|
5
|
-
let(:parser) { UrlParser.new(link) }
|
5
|
+
let(:parser) { UrlParser.new(link, clean: true) }
|
6
6
|
|
7
7
|
it "must be defined" do
|
8
8
|
expect(UrlParser::VERSION).not_to be_nil
|
@@ -12,10 +12,11 @@ describe UrlParser do
|
|
12
12
|
|
13
13
|
let(:link) { 'http://example.com/' }
|
14
14
|
let(:text) { "there is a #{link} in here" }
|
15
|
-
let(:extractor) { UrlParser.call(text) }
|
15
|
+
let(:extractor) { UrlParser.call(text, clean: true) }
|
16
16
|
|
17
17
|
it "extracts urls from text into an array" do
|
18
|
-
expect(extractor.collect(&:url))
|
18
|
+
expect(extractor.collect(&:url).collect(&:to_s))
|
19
|
+
.to include link
|
19
20
|
end
|
20
21
|
|
21
22
|
it "initializes each url with the parser" do
|
@@ -26,10 +27,10 @@ describe UrlParser do
|
|
26
27
|
|
27
28
|
context "::new" do
|
28
29
|
|
29
|
-
let(:link) { 'http://example.com/' }
|
30
|
+
let(:link) { 'http://example.com/path' }
|
30
31
|
|
31
32
|
it "initializes a parser with a url" do
|
32
|
-
expect(parser.
|
33
|
+
expect(parser.to_s).to eq link
|
33
34
|
end
|
34
35
|
|
35
36
|
it "cannot initialize invalid urls" do
|
@@ -37,11 +38,11 @@ describe UrlParser do
|
|
37
38
|
end
|
38
39
|
|
39
40
|
it "adds http by default" do
|
40
|
-
expect(UrlParser.new('example.com').
|
41
|
+
expect(UrlParser.new('example.com/path').to_s).to eq link
|
41
42
|
end
|
42
43
|
|
43
44
|
it "adds http to protocol-less urls" do
|
44
|
-
expect(UrlParser.new('//example.com').
|
45
|
+
expect(UrlParser.new('//example.com/path').to_s).to eq link
|
45
46
|
end
|
46
47
|
|
47
48
|
it "any errors raised inherit from UrlParser::Error" do
|
@@ -52,17 +53,30 @@ describe UrlParser do
|
|
52
53
|
|
53
54
|
context "options" do
|
54
55
|
|
55
|
-
context ":
|
56
|
+
context ":clean" do
|
56
57
|
|
57
58
|
let(:link) { 'link.to?a=b&utm_source=FeedBurner#stuff' }
|
58
59
|
|
59
|
-
it "
|
60
|
-
expect(parser.
|
60
|
+
it "when true cleans the url" do
|
61
|
+
expect(parser.to_s).not_to eq parser.original_url
|
62
|
+
end
|
63
|
+
|
64
|
+
it "when true it normalizes the url" do
|
65
|
+
[
|
66
|
+
'http://igvita.com/',
|
67
|
+
'http://igvita.com///',
|
68
|
+
'http://igvita.com/../?#',
|
69
|
+
'http://igvita.com/a/../?',
|
70
|
+
'http://igvita.com/a/../?utm_source%3Danalytics'
|
71
|
+
].each do |url|
|
72
|
+
expect(UrlParser.new(url, clean: true).to_s)
|
73
|
+
.to eq 'http://igvita.com/'
|
74
|
+
end
|
61
75
|
end
|
62
76
|
|
63
|
-
it "does not clean the url
|
64
|
-
|
65
|
-
|
77
|
+
it "does not clean the url by default" do
|
78
|
+
expect(UrlParser.new(link).to_s)
|
79
|
+
.to eq PostRank::URI.parse(parser.original_url).to_s
|
66
80
|
end
|
67
81
|
|
68
82
|
end
|
@@ -71,129 +85,147 @@ describe UrlParser do
|
|
71
85
|
|
72
86
|
end
|
73
87
|
|
74
|
-
context "#
|
88
|
+
context "#original_url" do
|
75
89
|
|
76
90
|
let(:link) { 'link.to?a=b&utm_source=FeedBurner#stuff' }
|
77
|
-
let(:parser) { UrlParser.new(link, preserve: true) }
|
78
91
|
|
79
|
-
|
80
|
-
|
81
|
-
it "normalizes the url" do
|
82
|
-
expect(parser.url).to eq 'http://link.to/?a=b'
|
92
|
+
it "preserves the url input" do
|
93
|
+
expect(parser.original_url).to eq link
|
83
94
|
end
|
84
95
|
|
85
|
-
|
86
|
-
expect(parser.instance_variable_get(:@uri)).to be_nil
|
87
|
-
end
|
96
|
+
end
|
88
97
|
|
89
|
-
|
90
|
-
|
98
|
+
context "#url" do
|
99
|
+
|
100
|
+
let(:link) { 'link.to?a=b&utm_source=FeedBurner#stuff' }
|
101
|
+
|
102
|
+
it "returns a url" do
|
103
|
+
expect(parser.url).to be_a Addressable::URI
|
91
104
|
end
|
92
105
|
|
93
106
|
end
|
94
107
|
|
95
|
-
context "#
|
108
|
+
context "#schemes" do
|
109
|
+
|
110
|
+
it "returns an array of allowed schemes" do
|
111
|
+
parser = UrlParser.new('telnet://some.com', schemes: 'telnet')
|
112
|
+
expect(parser.schemes).to be_an Array
|
113
|
+
end
|
96
114
|
|
97
115
|
end
|
98
116
|
|
99
|
-
context "#
|
117
|
+
context "#parse" do
|
118
|
+
|
119
|
+
let(:link) { 'link.to?a=b&utm_source=FeedBurner#stuff' }
|
100
120
|
|
101
|
-
it "
|
102
|
-
expect(
|
121
|
+
it "calls postrank-uri's parse function" do
|
122
|
+
expect(PostRank::URI).to receive :parse
|
123
|
+
UrlParser.new(link, clean: false)
|
124
|
+
end
|
125
|
+
|
126
|
+
it "tags errors" do
|
127
|
+
parser = UrlParser.new(link, clean: true)
|
128
|
+
expect(PostRank::URI).to receive(:parse).and_raise(StandardError)
|
129
|
+
expect{ parser.parse(link) }.to raise_error UrlParser::Error
|
103
130
|
end
|
104
131
|
|
105
132
|
end
|
106
133
|
|
107
|
-
context "#
|
134
|
+
context "#clean" do
|
108
135
|
|
109
|
-
|
110
|
-
expect(UrlParser.new('bullshit')).not_to be_valid
|
111
|
-
end
|
136
|
+
let(:link) { 'link.to?a=b&utm_source=FeedBurner#stuff' }
|
112
137
|
|
113
|
-
it "
|
114
|
-
expect(
|
138
|
+
it "calls postrank-uri's clean function" do
|
139
|
+
expect(PostRank::URI).to receive :clean
|
140
|
+
UrlParser.new(link, clean: true)
|
115
141
|
end
|
116
142
|
|
117
|
-
it "
|
118
|
-
|
143
|
+
it "tags errors" do
|
144
|
+
parser = UrlParser.new(link, clean: false)
|
145
|
+
expect(PostRank::URI).to receive(:clean).and_raise(StandardError)
|
146
|
+
expect{ parser.clean(link) }.to raise_error UrlParser::Error
|
119
147
|
end
|
120
148
|
|
121
|
-
|
122
|
-
|
149
|
+
end
|
150
|
+
|
151
|
+
context "#parser" do
|
152
|
+
|
153
|
+
let(:link) { 'link.to?a=b&utm_source=FeedBurner#stuff' }
|
154
|
+
|
155
|
+
it "calls postrank-uri's clean function" do
|
156
|
+
parser = UrlParser.new(link, clean: true)
|
157
|
+
expect(Domainatrix).to receive(:parse).with(parser.to_s)
|
158
|
+
parser.parser
|
123
159
|
end
|
124
160
|
|
125
|
-
it "
|
126
|
-
|
161
|
+
it "tags errors" do
|
162
|
+
parser = UrlParser.new(link, clean: false)
|
163
|
+
expect(Domainatrix).to receive(:parse).and_raise(StandardError)
|
164
|
+
expect{ parser.parser }.to raise_error UrlParser::Error
|
127
165
|
end
|
128
166
|
|
129
167
|
end
|
130
168
|
|
131
|
-
context "#
|
169
|
+
context "#clean!" do
|
132
170
|
|
133
171
|
let(:link) { 'link.to?a=b&utm_source=FeedBurner#stuff' }
|
172
|
+
let(:parser) { UrlParser.new(link) }
|
134
173
|
|
135
|
-
it "
|
136
|
-
|
174
|
+
it "normalizes the url" do
|
175
|
+
parser.clean!
|
176
|
+
expect(parser.to_s).to eq 'http://link.to/?a=b'
|
177
|
+
end
|
178
|
+
|
179
|
+
it "resets the parser" do
|
180
|
+
expect{
|
181
|
+
parser.clean!
|
182
|
+
}.to change{
|
183
|
+
parser.parser
|
184
|
+
}
|
137
185
|
end
|
138
186
|
|
139
187
|
end
|
140
188
|
|
141
|
-
context "#
|
189
|
+
context "#to_s" do
|
142
190
|
|
143
|
-
let(:link) { '
|
191
|
+
let(:link) { 'http://example.com/' }
|
144
192
|
|
145
|
-
it "returns a url" do
|
146
|
-
expect(parser.
|
147
|
-
end
|
148
|
-
|
149
|
-
it "attempts to clean and normalize urls" do
|
150
|
-
[
|
151
|
-
'http://igvita.com/',
|
152
|
-
'http://igvita.com///',
|
153
|
-
'http://igvita.com/../?#',
|
154
|
-
'http://igvita.com/a/../?',
|
155
|
-
'http://igvita.com/a/../?utm_source%3Danalytics'
|
156
|
-
].each do |url|
|
157
|
-
expect(UrlParser.new(url).url)
|
158
|
-
.to eq 'http://igvita.com/'
|
159
|
-
end
|
193
|
+
it "returns a string representation of the url" do
|
194
|
+
expect(parser.to_s).to eq 'http://example.com/'
|
160
195
|
end
|
161
196
|
|
162
197
|
end
|
163
198
|
|
164
|
-
context "#
|
199
|
+
context "#hash" do
|
165
200
|
|
166
|
-
let(:link) { '
|
201
|
+
let(:link) { 'http://example.com/' }
|
167
202
|
|
168
|
-
it "
|
169
|
-
expect(parser.
|
203
|
+
it "hashes the url string" do
|
204
|
+
expect(parser.hash).to eq Digest::SHA1.hexdigest(link)
|
170
205
|
end
|
171
206
|
|
172
207
|
end
|
173
208
|
|
174
|
-
context "#
|
209
|
+
context "#valid?" do
|
175
210
|
|
176
|
-
|
211
|
+
it "returns false if the url is invalid" do
|
212
|
+
expect(UrlParser.new('bullshit')).not_to be_valid
|
213
|
+
end
|
177
214
|
|
178
|
-
it "returns
|
179
|
-
expect(
|
215
|
+
it "returns false if the url scheme is not in the options" do
|
216
|
+
expect(UrlParser.new('telnet://some.com')).not_to be_valid
|
180
217
|
end
|
181
218
|
|
182
|
-
it "returns
|
183
|
-
|
184
|
-
expect(url.subdomain).to eq 'github.com'
|
219
|
+
it "returns true if the url scheme is in the options" do
|
220
|
+
expect(UrlParser.new('telnet://some.com', schemes: ['telnet'])).to be_valid
|
185
221
|
end
|
186
222
|
|
187
|
-
it "
|
188
|
-
|
189
|
-
expect(parser.subdomain).to eq 'energy.ca.gov'
|
223
|
+
it "returns true if the url is valid" do
|
224
|
+
expect(UrlParser.new('http://example.com/')).to be_valid
|
190
225
|
end
|
191
226
|
|
192
|
-
it "
|
193
|
-
|
194
|
-
parser = UrlParser.new("http://#{www}.energy.ca.gov/")
|
195
|
-
expect(parser.subdomain).to eq 'energy.ca.gov'
|
196
|
-
end
|
227
|
+
it "returns true for localhost" do
|
228
|
+
expect(UrlParser.new('localhost:5000')).to be_valid
|
197
229
|
end
|
198
230
|
|
199
231
|
end
|
@@ -235,4 +267,99 @@ describe UrlParser do
|
|
235
267
|
|
236
268
|
end
|
237
269
|
|
270
|
+
# http://medialize.github.io/URI.js/about-uris.html
|
271
|
+
#
|
272
|
+
context "uri components" do
|
273
|
+
|
274
|
+
let(:link) do
|
275
|
+
'foo://username:password@ww2.foo.bar.example.com:123/hello/world/there.html?name=ferret#foo'
|
276
|
+
end
|
277
|
+
|
278
|
+
let(:parser) { UrlParser.new(link, clean: false) }
|
279
|
+
|
280
|
+
it { expect(parser.scheme).to eq 'foo' }
|
281
|
+
it { expect(parser.username).to eq 'username' }
|
282
|
+
it { expect(parser.password).to eq 'password' }
|
283
|
+
it { expect(parser.userinfo).to eq 'username:password' }
|
284
|
+
it { expect(parser.www).to eq 'ww2' }
|
285
|
+
it { expect(parser.subdomain).to eq 'foo.bar' }
|
286
|
+
it { expect(parser.subdomains).to eq 'ww2.foo.bar' }
|
287
|
+
it { expect(parser.domain_name).to eq 'example' }
|
288
|
+
it { expect(parser.domain).to eq 'example.com' }
|
289
|
+
it { expect(parser.tld).to eq 'com' }
|
290
|
+
it { expect(parser.hostname).to eq 'ww2.foo.bar.example.com' }
|
291
|
+
it { expect(parser.port).to eq 123 }
|
292
|
+
it { expect(parser.host).to eq 'ww2.foo.bar.example.com:123' }
|
293
|
+
it { expect(parser.origin).to eq 'foo://ww2.foo.bar.example.com:123' }
|
294
|
+
it { expect(parser.authority).to eq 'username:password@ww2.foo.bar.example.com:123' }
|
295
|
+
it { expect(parser.site).to eq 'foo://username:password@ww2.foo.bar.example.com:123' }
|
296
|
+
it { expect(parser.directory).to eq '/hello/world' }
|
297
|
+
it { expect(parser.path).to eq '/hello/world/there.html' }
|
298
|
+
it { expect(parser.segment).to eq 'there.html' }
|
299
|
+
it { expect(parser.filename).to eq 'there.html' }
|
300
|
+
it { expect(parser.suffix).to eq 'html' }
|
301
|
+
it { expect(parser.query).to eq 'name=ferret' }
|
302
|
+
it { expect(parser.query_values['name']).to eq 'ferret' }
|
303
|
+
it { expect(parser.fragment).to eq 'foo' }
|
304
|
+
it { expect(parser.resource).to eq 'there.html?name=ferret#foo' }
|
305
|
+
|
306
|
+
end
|
307
|
+
|
308
|
+
context "localhost?" do
|
309
|
+
|
310
|
+
let(:link) { 'localhost:5000' }
|
311
|
+
|
312
|
+
it "returns true for localhost" do
|
313
|
+
expect(parser).to be_localhost
|
314
|
+
end
|
315
|
+
|
316
|
+
end
|
317
|
+
|
318
|
+
context "#domain_name" do
|
319
|
+
|
320
|
+
let(:link) { 'https://github.com/pauldix/domainatrix' }
|
321
|
+
|
322
|
+
it "returns the domain name without the suffix" do
|
323
|
+
expect(parser.domain_name).to eq 'github'
|
324
|
+
end
|
325
|
+
|
326
|
+
end
|
327
|
+
|
328
|
+
context "#domain" do
|
329
|
+
|
330
|
+
let(:link) { 'https://github.com/pauldix/domainatrix' }
|
331
|
+
|
332
|
+
it "returns the domain name with suffix" do
|
333
|
+
expect(parser.domain).to eq 'github.com'
|
334
|
+
end
|
335
|
+
|
336
|
+
end
|
337
|
+
|
338
|
+
context "#subdomain" do
|
339
|
+
|
340
|
+
let(:link) { 'http://foo.bar.pauldix.co.uk/asdf.html?q=arg' }
|
341
|
+
|
342
|
+
it "returns all subdomains" do
|
343
|
+
expect(parser.subdomain).to eq 'foo.bar'
|
344
|
+
end
|
345
|
+
|
346
|
+
it "returns an empty string if there is no subdomain" do
|
347
|
+
url = UrlParser.new('https://github.com/')
|
348
|
+
expect(url.subdomain).to eq ''
|
349
|
+
end
|
350
|
+
|
351
|
+
it "does not include www as part of the subdomain" do
|
352
|
+
parser = UrlParser.new("http://www.energy.ca.gov/")
|
353
|
+
expect(parser.subdomain).to eq 'energy'
|
354
|
+
end
|
355
|
+
|
356
|
+
it "does not include any variation of www as part of the subdomain" do
|
357
|
+
[ 'ww2', 'www2', 'ww23', 'www23' ].each do |www|
|
358
|
+
parser = UrlParser.new("http://#{www}.energy.ca.gov/")
|
359
|
+
expect(parser.subdomain).to eq 'energy'
|
360
|
+
end
|
361
|
+
end
|
362
|
+
|
363
|
+
end
|
364
|
+
|
238
365
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: url_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Matt Solt
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-08-
|
11
|
+
date: 2014-08-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|