mdurl-rb 1.0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +104 -0
- data/lib/mdurl-rb/decode.rb +145 -0
- data/lib/mdurl-rb/encode.rb +100 -0
- data/lib/mdurl-rb/format.rb +28 -0
- data/lib/mdurl-rb/parse.rb +304 -0
- data/lib/mdurl-rb/version.rb +5 -0
- data/lib/mdurl-rb.rb +18 -0
- data/spec/mdurl-rb/decode_spec.rb +112 -0
- data/spec/mdurl-rb/encode_spec.rb +74 -0
- data/spec/mdurl-rb/fixtures/url_spec.rb +704 -0
- data/spec/mdurl-rb/format_spec.rb +9 -0
- data/spec/mdurl-rb/parse_spec.rb +15 -0
- data/spec/spec_helper.rb +4 -0
- metadata +78 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: bfcbf24da2273d2580682fa690c821fc69b23c53
|
4
|
+
data.tar.gz: 87074f454da478a788227b689d89a6d6be676a60
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 347a0b6b36a1802ba392b3a8d01047b94517162fc418162f62b6040126d681e18b12797a6ef8d3a309ba23a38aec5498429174017c0c4aa222000d1efa4d6d1a
|
7
|
+
data.tar.gz: ce81db1bde29553a0776ae967523657e94c38fafec405f76fe8f6d3b0b66a9f6cedfd2f5dcdb91c54671892f6d2f3c2c98acf862853274ef65602ba33919897e
|
data/README.md
ADDED
@@ -0,0 +1,104 @@
|
|
1
|
+
# mdurl
|
2
|
+
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/mdurl-rb.svg)](http://badge.fury.io/rb/mdurl-rb)
|
4
|
+
|
5
|
+
URL utilities for [motion-markdown-it](https://github.com/digitalmoksha/motion-markdown-it) parser, for both Ruby and RubyMotion
|
6
|
+
|
7
|
+
This gem is a port of the [mdurl javascript package](https://github.com/markdown-it/mdurl) by Vitaly Puzrin and Alex Kocharin, that is used for the [markdown-it](https://github.com/markdown-it/markdown-it) package
|
8
|
+
|
9
|
+
## API
|
10
|
+
|
11
|
+
_As this gem was ported from the Javascript version, there may still be some mixture of Javascript terminology below'_
|
12
|
+
|
13
|
+
### MDUrl::Encode.encode(str [, exclude, keepEncoded]) -> String
|
14
|
+
|
15
|
+
Percent-encode a string, avoiding double encoding. Don't touch `/a-zA-Z0-9/` +
|
16
|
+
excluded chars + `/%[a-fA-F0-9]{2}/` (if not disabled). Broken surrorates are
|
17
|
+
replaced with `U+FFFD`.
|
18
|
+
|
19
|
+
Params:
|
20
|
+
|
21
|
+
- __str__ - input string.
|
22
|
+
- __exclude__ - optional, `;/?:@&=+$,-_.!~*'()#`. Additional chars to keep intact
|
23
|
+
(except `/a-zA-Z0-9/`).
|
24
|
+
- __keepEncoded__ - optional, `true`. By default it skips already encoded sequences
|
25
|
+
(`/%[a-fA-F0-9]{2}/`). If set to `false`, `%` will be encoded.
|
26
|
+
|
27
|
+
|
28
|
+
### MDUrl::Encode::DEFAULT_CHARACTERS, MDUrl::Encode::COMPONENT_CHARACTERS
|
29
|
+
|
30
|
+
You can use these constants as second argument to `encode` function.
|
31
|
+
|
32
|
+
- `DEFAULT_CHARACTERS` is the same exclude set as in the standard `encodeURI()` function
|
33
|
+
- `COMPONENT_CHARACTERS` is the same exclude set as in the `encodeURIComponent()` function
|
34
|
+
|
35
|
+
For example, `MDUrl::Encode.encode('something', MDUrl::Encode::COMPONENT_CHARACTERS, true)` is roughly the equivalent of
|
36
|
+
the `encodeURIComponent()` function in Javascript (except `encode()` doesn't throw).
|
37
|
+
|
38
|
+
|
39
|
+
### MDUrl::Decode.decode(str [, exclude]) -> String
|
40
|
+
|
41
|
+
Decode percent-encoded string. Invalid percent-encoded sequences (e.g. `%2G`)
|
42
|
+
are left as is. Invalid UTF-8 characters are replaced with `U+FFFD`.
|
43
|
+
|
44
|
+
|
45
|
+
Params:
|
46
|
+
|
47
|
+
- __str__ - input string.
|
48
|
+
- __exclude__ - set of characters to leave encoded, optional, `;/?:@&=+$,#`.
|
49
|
+
|
50
|
+
|
51
|
+
### MDUrl::Decode::DEFTAULT_CHARS, MDUrl::Decode::COMPONENT_CHARS
|
52
|
+
|
53
|
+
You can use these constants as second argument to `decode` function.
|
54
|
+
|
55
|
+
- `DEFTAULT_CHARS` is the same exclude set as in the standard `decodeURI()` function
|
56
|
+
- `COMPONENT_CHARS` is the same exclude set as in the `decodeURIComponent()` function
|
57
|
+
|
58
|
+
For example, `MDUrl::Decode.decode('something', MDUrl::Decode::DEFTAULT_CHARS)` has the same behavior as
|
59
|
+
`decodeURI('something')` in javascript on a correctly encoded input.
|
60
|
+
|
61
|
+
|
62
|
+
### MDUrl::Url.parse(url, slashesDenoteHost) -> urlObs
|
63
|
+
|
64
|
+
Parse url string. Similar to node's [url.parse](http://nodejs.org/api/url.html#url_url_parse_urlstr_parsequerystring_slashesdenotehost), but without any
|
65
|
+
normalizations and query string parse.
|
66
|
+
|
67
|
+
- __url__ - input url (string)
|
68
|
+
- __slashesDenoteHost__ - if url starts with `//`, expect a hostname after it. Optional, `false`.
|
69
|
+
|
70
|
+
Result (hash):
|
71
|
+
|
72
|
+
- protocol
|
73
|
+
- slashes
|
74
|
+
- auth
|
75
|
+
- port
|
76
|
+
- hostname
|
77
|
+
- hash
|
78
|
+
- search
|
79
|
+
- pathname
|
80
|
+
|
81
|
+
Difference with node's `url`:
|
82
|
+
|
83
|
+
1. No leading slash in paths, e.g. in `url.parse('http://foo?bar')` pathname is
|
84
|
+
``, not `/`
|
85
|
+
2. Backslashes are not replaced with slashes, so `http:\\example.org\` is
|
86
|
+
treated like a relative path
|
87
|
+
3. Trailing colon is treated like a part of the path, i.e. in
|
88
|
+
`http://example.org:foo` pathname is `:foo`
|
89
|
+
4. Nothing is URL-encoded in the resulting object, (in joyent/node some chars
|
90
|
+
in auth and paths are encoded)
|
91
|
+
5. `url.parse()` does not have `parseQueryString` argument
|
92
|
+
6. Removed extraneous result properties: `host`, `path`, `query`, etc.,
|
93
|
+
which can be constructed using other parts of the url.
|
94
|
+
|
95
|
+
|
96
|
+
### MDUrl::Format.format(urlObject)
|
97
|
+
|
98
|
+
Format an object previously obtained with `.parse()` function. Similar to node's
|
99
|
+
[url.format](http://nodejs.org/api/url.html#url_url_format_urlobj).
|
100
|
+
|
101
|
+
|
102
|
+
## License
|
103
|
+
|
104
|
+
[MIT](https://github.com/markdown-it/mdurl/blob/master/LICENSE)
|
@@ -0,0 +1,145 @@
|
|
1
|
+
module MDUrl
|
2
|
+
module Decode
|
3
|
+
|
4
|
+
@@decodeCache = {};
|
5
|
+
|
6
|
+
DEFTAULT_CHARS = ';/?:@&=+$,#'
|
7
|
+
COMPONENT_CHARS = ''
|
8
|
+
|
9
|
+
|
10
|
+
#------------------------------------------------------------------------------
|
11
|
+
def self.getDecodeCache(exclude)
|
12
|
+
cache = @@decodeCache[exclude]
|
13
|
+
return cache if (cache)
|
14
|
+
|
15
|
+
cache = @@decodeCache[exclude] = []
|
16
|
+
|
17
|
+
(0...128).each do |i|
|
18
|
+
ch = i.chr
|
19
|
+
cache.push(ch)
|
20
|
+
end
|
21
|
+
|
22
|
+
(0...exclude.length).each do |i|
|
23
|
+
ch = exclude[i].ord
|
24
|
+
cache[ch] = '%' + ('0' + ch.to_s(16).upcase).slice(-2, 2)
|
25
|
+
end
|
26
|
+
|
27
|
+
return cache
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
# Decode percent-encoded string.
|
32
|
+
#------------------------------------------------------------------------------
|
33
|
+
def self.decode(string, exclude = nil)
|
34
|
+
if !exclude.is_a? String
|
35
|
+
exclude = DEFTAULT_CHARS
|
36
|
+
end
|
37
|
+
|
38
|
+
cache = getDecodeCache(exclude)
|
39
|
+
|
40
|
+
return string.gsub(/(%[a-f0-9]{2})+/i) do |seq|
|
41
|
+
result = ''
|
42
|
+
|
43
|
+
i = 0
|
44
|
+
l = seq.length
|
45
|
+
while i < l
|
46
|
+
b1 = seq.slice((i + 1)...(i + 3)).to_i(16)
|
47
|
+
|
48
|
+
if (b1 < 0x80)
|
49
|
+
result += cache[b1]
|
50
|
+
i += 3
|
51
|
+
next
|
52
|
+
end
|
53
|
+
|
54
|
+
if ((b1 & 0xE0) == 0xC0 && (i + 3 < l))
|
55
|
+
# 110xxxxx 10xxxxxx
|
56
|
+
b2 = seq.slice((i + 4)...(i + 6)).to_i(16)
|
57
|
+
|
58
|
+
if ((b2 & 0xC0) == 0x80)
|
59
|
+
char = ((b1 << 6) & 0x7C0) | (b2 & 0x3F)
|
60
|
+
|
61
|
+
if (char < 0x80)
|
62
|
+
result += "\ufffd\ufffd"
|
63
|
+
else
|
64
|
+
result += char.chr(Encoding::UTF_8)
|
65
|
+
end
|
66
|
+
|
67
|
+
i += 6
|
68
|
+
next
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
if ((b1 & 0xF0) == 0xE0 && (i + 6 < l))
|
73
|
+
# 1110xxxx 10xxxxxx 10xxxxxx
|
74
|
+
b2 = seq.slice((i + 4)...(i + 6)).to_i(16)
|
75
|
+
b3 = seq.slice((i + 7)...(i + 9)).to_i(16)
|
76
|
+
|
77
|
+
if ((b2 & 0xC0) == 0x80 && (b3 & 0xC0) == 0x80)
|
78
|
+
char = ((b1 << 12) & 0xF000) | ((b2 << 6) & 0xFC0) | (b3 & 0x3F)
|
79
|
+
|
80
|
+
if (char < 0x800 || (char >= 0xD800 && char <= 0xDFFF))
|
81
|
+
result += "\ufffd\ufffd\ufffd"
|
82
|
+
else
|
83
|
+
result += char.chr(Encoding::UTF_8)
|
84
|
+
end
|
85
|
+
|
86
|
+
i += 9
|
87
|
+
next
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
if ((b1 & 0xF8) == 0xF0 && (i + 9 < l))
|
92
|
+
# 111110xx 10xxxxxx 10xxxxxx 10xxxxxx
|
93
|
+
b2 = seq.slice((i + 4)...(i + 6)).to_i(16)
|
94
|
+
b3 = seq.slice((i + 7)...(i + 9)).to_i(16)
|
95
|
+
b4 = seq.slice((i + 10)...(i + 12)).to_i(16)
|
96
|
+
|
97
|
+
if ((b2 & 0xC0) == 0x80 && (b3 & 0xC0) == 0x80 && (b4 & 0xC0) == 0x80)
|
98
|
+
char = ((b1 << 18) & 0x1C0000) | ((b2 << 12) & 0x3F000) | ((b3 << 6) & 0xFC0) | (b4 & 0x3F)
|
99
|
+
|
100
|
+
if (char < 0x10000 || char > 0x10FFFF)
|
101
|
+
result += "\ufffd\ufffd\ufffd\ufffd"
|
102
|
+
else
|
103
|
+
# TODO don't know how to handle surrogate pairs properly.
|
104
|
+
char -= 0x10000
|
105
|
+
result += [0xD800 + (char >> 10), 0xDC00 + (char & 0x3FF)].map{|c| c.chr(Encoding::UTF_8)}.join
|
106
|
+
|
107
|
+
# high = ((char - 0x10000) / 0x400).floor + 0xD800
|
108
|
+
# low = ((char - 0x10000) % 0x400) + 0xDC00
|
109
|
+
# result += '\u' + [high, low].map { |x| x.to_s(16) }.join('\u').downcase
|
110
|
+
end
|
111
|
+
|
112
|
+
i += 12
|
113
|
+
next
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
result += "\ufffd"
|
118
|
+
i += 3
|
119
|
+
end
|
120
|
+
|
121
|
+
result
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
# https://gist.github.com/kreeger/4480326
|
129
|
+
# class Fixnum
|
130
|
+
# def to_surrogate_pair
|
131
|
+
# if self >= 0x10000 && self <= 0x10FFFF
|
132
|
+
# high = ((self - 0x10000) / 0x400).floor + 0xD800
|
133
|
+
# low = ((self - 0x10000) % 0x400) + 0xDC00
|
134
|
+
# end
|
135
|
+
# '\U' + [high, low].map { |x| x.to_s(16) }.join('\U').upcase
|
136
|
+
# end
|
137
|
+
#
|
138
|
+
# end
|
139
|
+
#
|
140
|
+
# class String
|
141
|
+
# def to_hex
|
142
|
+
# self.gsub('\U000', '0x').to_i(16)
|
143
|
+
# end
|
144
|
+
# end
|
145
|
+
#
|
@@ -0,0 +1,100 @@
|
|
1
|
+
module MDUrl
|
2
|
+
module Encode
|
3
|
+
|
4
|
+
DEFAULT_CHARACTERS = ";/?:@&=+$,-_.!~*'()#"
|
5
|
+
COMPONENT_CHARACTERS = "-_.!~*'()"
|
6
|
+
|
7
|
+
@@encodeCache = {}
|
8
|
+
|
9
|
+
|
10
|
+
# Create a lookup array where anything but characters in `chars` string
|
11
|
+
# and alphanumeric chars is percent-encoded.
|
12
|
+
#------------------------------------------------------------------------------
|
13
|
+
def self.getEncodeCache(exclude)
|
14
|
+
cache = @@encodeCache[exclude]
|
15
|
+
return cache if (cache)
|
16
|
+
|
17
|
+
cache = @@encodeCache[exclude] = []
|
18
|
+
|
19
|
+
(0...128).each do |i|
|
20
|
+
ch = i.chr
|
21
|
+
|
22
|
+
if (/^[0-9a-z]$/i =~ ch)
|
23
|
+
# always allow unencoded alphanumeric characters
|
24
|
+
cache.push(ch)
|
25
|
+
else
|
26
|
+
cache.push('%' + ('0' + i.to_s(16).upcase).slice(-2, 2))
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
(0...exclude.length).each do |i|
|
31
|
+
cache[exclude[i].ord] = exclude[i]
|
32
|
+
end
|
33
|
+
|
34
|
+
return cache
|
35
|
+
end
|
36
|
+
|
37
|
+
|
38
|
+
# Encode unsafe characters with percent-encoding, skipping already
|
39
|
+
# encoded sequences.
|
40
|
+
#
|
41
|
+
# - string - string to encode
|
42
|
+
# - exclude - list of characters to ignore (in addition to a-zA-Z0-9)
|
43
|
+
# - keepEscaped - don't encode '%' in a correct escape sequence (default: true)
|
44
|
+
#------------------------------------------------------------------------------
|
45
|
+
def self.encode(string, exclude = nil, keepEscaped = nil)
|
46
|
+
result = ''
|
47
|
+
|
48
|
+
if !exclude.is_a? String
|
49
|
+
# encode(string, keepEscaped)
|
50
|
+
keepEscaped = exclude
|
51
|
+
exclude = DEFAULT_CHARACTERS
|
52
|
+
end
|
53
|
+
|
54
|
+
if keepEscaped == nil
|
55
|
+
keepEscaped = true
|
56
|
+
end
|
57
|
+
|
58
|
+
cache = getEncodeCache(exclude)
|
59
|
+
|
60
|
+
i = 0
|
61
|
+
l = string.length
|
62
|
+
while i < l
|
63
|
+
code = string[i].ord
|
64
|
+
|
65
|
+
if (keepEscaped && code == 0x25 && i + 2 < l) # %
|
66
|
+
if (/^[0-9a-f]{2}$/i =~ (string.slice((i + 1)...(i + 3))))
|
67
|
+
result += string.slice(i...(i + 3))
|
68
|
+
i += 3
|
69
|
+
next
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
if (code < 128)
|
74
|
+
result += cache[code]
|
75
|
+
i += 1
|
76
|
+
next
|
77
|
+
end
|
78
|
+
|
79
|
+
if (code >= 0xD800 && code <= 0xDFFF)
|
80
|
+
if (code >= 0xD800 && code <= 0xDBFF && i + 1 < l)
|
81
|
+
nextCode = string[i + 1].ord
|
82
|
+
if (nextCode >= 0xDC00 && nextCode <= 0xDFFF)
|
83
|
+
result += CGI::escape(string[i] + string[i + 1])
|
84
|
+
i += 2
|
85
|
+
next
|
86
|
+
end
|
87
|
+
end
|
88
|
+
result += '%EF%BF%BD'
|
89
|
+
i += 1
|
90
|
+
next
|
91
|
+
end
|
92
|
+
|
93
|
+
result += CGI::escape(string[i])
|
94
|
+
i += 1
|
95
|
+
end
|
96
|
+
|
97
|
+
return result
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module MDUrl
|
2
|
+
module Format
|
3
|
+
|
4
|
+
#------------------------------------------------------------------------------
|
5
|
+
def self.format(url)
|
6
|
+
result = ''
|
7
|
+
|
8
|
+
result += url.protocol || ''
|
9
|
+
result += url.slashes ? '//' : ''
|
10
|
+
result += url.auth ? url.auth + '@' : ''
|
11
|
+
|
12
|
+
if (url.hostname && url.hostname.index(':') != nil)
|
13
|
+
# ipv6 address
|
14
|
+
result += '[' + url.hostname + ']'
|
15
|
+
else
|
16
|
+
result += url.hostname || ''
|
17
|
+
end
|
18
|
+
|
19
|
+
result += url.port ? ':' + url.port : ''
|
20
|
+
result += url.pathname || ''
|
21
|
+
result += url.search || ''
|
22
|
+
result += url.hash || ''
|
23
|
+
|
24
|
+
return result
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,304 @@
|
|
1
|
+
# Copyright Joyent, Inc. and other Node contributors.
|
2
|
+
#
|
3
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
4
|
+
# copy of this software and associated documentation files (the
|
5
|
+
# "Software"), to deal in the Software without restriction, including
|
6
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
# distribute, sublicense, and/or sell copies of the Software, and to permit
|
8
|
+
# persons to whom the Software is furnished to do so, subject to the
|
9
|
+
# following conditions:
|
10
|
+
#
|
11
|
+
# The above copyright notice and this permission notice shall be included
|
12
|
+
# in all copies or substantial portions of the Software.
|
13
|
+
#
|
14
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
15
|
+
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
|
17
|
+
# NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
18
|
+
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
19
|
+
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
20
|
+
# USE OR OTHER DEALINGS IN THE SOFTWARE.
|
21
|
+
|
22
|
+
#
|
23
|
+
# Changes from joyent/node:
|
24
|
+
#
|
25
|
+
# 1. No leading slash in paths,
|
26
|
+
# e.g. in `url.parse('http://foo?bar')` pathname is ``, not `/`
|
27
|
+
#
|
28
|
+
# 2. Backslashes are not replaced with slashes,
|
29
|
+
# so `http:\\example.org\` is treated like a relative path
|
30
|
+
#
|
31
|
+
# 3. Trailing colon is treated like a part of the path,
|
32
|
+
# i.e. in `http://example.org:foo` pathname is `:foo`
|
33
|
+
#
|
34
|
+
# 4. Nothing is URL-encoded in the resulting object,
|
35
|
+
# (in joyent/node some chars in auth and paths are encoded)
|
36
|
+
#
|
37
|
+
# 5. `url.parse()` does not have `parseQueryString` argument
|
38
|
+
#
|
39
|
+
# 6. Removed extraneous result properties: `host`, `path`, `query`, etc.,
|
40
|
+
# which can be constructed using other parts of the url.
|
41
|
+
#
|
42
|
+
|
43
|
+
module MDUrl
|
44
|
+
class Url
|
45
|
+
|
46
|
+
attr_accessor :protocol, :slashes, :hostname, :pathname, :auth, :port, :search, :hash
|
47
|
+
|
48
|
+
# Reference: RFC 3986, RFC 1808, RFC 2396
|
49
|
+
|
50
|
+
# define these here so at least they only have to be
|
51
|
+
# compiled once on the first module load.
|
52
|
+
PROTOCOL_PATTERN = /^([a-z0-9.+-]+:)/i
|
53
|
+
PORT_PATTERN = /:[0-9]*$/
|
54
|
+
|
55
|
+
# Special case for a simple path URL
|
56
|
+
SIMPLE_PATH_PATTERN = /^(\/\/?(?!\/)[^\?\s]*)(\?[^\s]*)?$/
|
57
|
+
|
58
|
+
# RFC 2396: characters reserved for delimiting URLs.
|
59
|
+
# We actually just auto-escape these.
|
60
|
+
DELIMS = [ '<', '>', '"', '`', ' ', '\r', '\n', '\t' ]
|
61
|
+
|
62
|
+
# RFC 2396: characters not allowed for various reasons.
|
63
|
+
UNWISE = [ '{', '}', '|', '\\', '^', '`' ].concat(DELIMS)
|
64
|
+
|
65
|
+
# Allowed by RFCs, but cause of XSS attacks. Always escape these.
|
66
|
+
AUTO_ESCAPE = [ '\'' ].concat(UNWISE)
|
67
|
+
# Characters that are never ever allowed in a hostname.
|
68
|
+
# Note that any invalid chars are also handled, but these
|
69
|
+
# are the ones that are *expected* to be seen, so we fast-path
|
70
|
+
# them.
|
71
|
+
NON_HOST_CHARS = [ '%', '/', '?', ';', '#' ].concat(AUTO_ESCAPE)
|
72
|
+
HOST_ENDING_CHARS = [ '/', '?', '#' ]
|
73
|
+
HOSTNAME_MAX_LEN = 255
|
74
|
+
HOSTNAME_PART_PATTERN = /^[+a-z0-9A-Z_-]{0,63}$/
|
75
|
+
HOSTNAME_PART_START = /^([+a-z0-9A-Z_-]{0,63})(.*)$/
|
76
|
+
# protocols that can allow "unsafe" and "unwise" chars.
|
77
|
+
# protocols that never have a hostname.
|
78
|
+
HOSTLESS_PROTOCOL = {
|
79
|
+
'javascript' => true,
|
80
|
+
'javascript:' => true
|
81
|
+
}
|
82
|
+
# protocols that always contain a # bit.
|
83
|
+
SLASHED_PROTOCOL = {
|
84
|
+
'http' => true,
|
85
|
+
'https' => true,
|
86
|
+
'ftp' => true,
|
87
|
+
'gopher' => true,
|
88
|
+
'file' => true,
|
89
|
+
'http:' => true,
|
90
|
+
'https:' => true,
|
91
|
+
'ftp:' => true,
|
92
|
+
'gopher:' => true,
|
93
|
+
'file:' => true
|
94
|
+
}
|
95
|
+
|
96
|
+
#------------------------------------------------------------------------------
|
97
|
+
def self.urlParse(url, slashesDenoteHost = false)
|
98
|
+
return url if (url && url.is_a?(Url))
|
99
|
+
|
100
|
+
u = Url.new
|
101
|
+
u.parse(url, slashesDenoteHost)
|
102
|
+
return u
|
103
|
+
end
|
104
|
+
|
105
|
+
#------------------------------------------------------------------------------
|
106
|
+
def parse(url, slashesDenoteHost = false)
|
107
|
+
rest = url
|
108
|
+
|
109
|
+
# trim before proceeding.
|
110
|
+
# This is to support parse stuff like " http://foo.com \n"
|
111
|
+
rest = rest.strip
|
112
|
+
|
113
|
+
if (!slashesDenoteHost && url.split('#').length == 1)
|
114
|
+
# Try fast path regexp
|
115
|
+
simplePath = SIMPLE_PATH_PATTERN.match(rest)
|
116
|
+
if (simplePath)
|
117
|
+
@pathname = simplePath[1]
|
118
|
+
if (simplePath[2])
|
119
|
+
@search = simplePath[2]
|
120
|
+
end
|
121
|
+
return self
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
proto = PROTOCOL_PATTERN.match(rest)
|
126
|
+
if (proto)
|
127
|
+
proto = proto[0]
|
128
|
+
lowerProto = proto.downcase
|
129
|
+
@protocol = proto
|
130
|
+
rest = rest[proto.length..-1]
|
131
|
+
end
|
132
|
+
|
133
|
+
# figure out if it's got a host
|
134
|
+
# user@server is *always* interpreted as a hostname, and url
|
135
|
+
# resolution will treat //foo/bar as host=foo,path=bar because that's
|
136
|
+
# how the browser resolves relative URLs.
|
137
|
+
if (slashesDenoteHost || proto || rest.match(/^\/\/[^@\/]+@[^@\/]+/))
|
138
|
+
slashes = rest[0...2] == '//'
|
139
|
+
if (slashes && !(proto && HOSTLESS_PROTOCOL[proto]))
|
140
|
+
rest = rest[2..-1]
|
141
|
+
@slashes = true
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
if (!HOSTLESS_PROTOCOL[proto] &&
|
146
|
+
(slashes || (proto && !SLASHED_PROTOCOL[proto])))
|
147
|
+
|
148
|
+
# there's a hostname.
|
149
|
+
# the first instance of /, ?, ;, or # ends the host.
|
150
|
+
#
|
151
|
+
# If there is an @ in the hostname, then non-host chars *are* allowed
|
152
|
+
# to the left of the last @ sign, unless some host-ending character
|
153
|
+
# comes *before* the @-sign.
|
154
|
+
# URLs are obnoxious.
|
155
|
+
#
|
156
|
+
# ex:
|
157
|
+
# http://a@b@c/ => user:a@b host:c
|
158
|
+
# http://a@b?@c => user:a host:c path:/?@c
|
159
|
+
|
160
|
+
# v0.12 TODO(isaacs): This is not quite how Chrome does things.
|
161
|
+
# Review our test case against browsers more comprehensively.
|
162
|
+
|
163
|
+
# find the first instance of any HOST_ENDING_CHARS
|
164
|
+
hostEnd = -1
|
165
|
+
(0...HOST_ENDING_CHARS.length).each do |i|
|
166
|
+
hec = rest.index(HOST_ENDING_CHARS[i])
|
167
|
+
if (hec != nil && (hostEnd == -1 || hec < hostEnd))
|
168
|
+
hostEnd = hec
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
# at this point, either we have an explicit point where the
|
173
|
+
# auth portion cannot go past, or the last @ char is the decider.
|
174
|
+
if (hostEnd == -1)
|
175
|
+
# atSign can be anywhere.
|
176
|
+
atSign = rest.rindex('@')
|
177
|
+
else
|
178
|
+
# atSign must be in auth portion.
|
179
|
+
# http://a@b/c@d => host:b auth:a path:/c@d
|
180
|
+
# atSign = rest.lastIndexOf('@', hostEnd);
|
181
|
+
atSign = rest[0..hostEnd].rindex('@')
|
182
|
+
end
|
183
|
+
|
184
|
+
# Now we have a portion which is definitely the auth.
|
185
|
+
# Pull that off.
|
186
|
+
if (atSign != nil)
|
187
|
+
auth = rest.slice(0...atSign)
|
188
|
+
rest = rest.slice((atSign + 1)..-1)
|
189
|
+
@auth = auth
|
190
|
+
end
|
191
|
+
|
192
|
+
# the host is the remaining to the left of the first non-host char
|
193
|
+
hostEnd = -1
|
194
|
+
(0...NON_HOST_CHARS.length).each do |i|
|
195
|
+
hec = rest.index(NON_HOST_CHARS[i])
|
196
|
+
if (hec != nil && (hostEnd == -1 || hec < hostEnd))
|
197
|
+
hostEnd = hec
|
198
|
+
end
|
199
|
+
end
|
200
|
+
# if we still have not hit it, then the entire thing is a host.
|
201
|
+
if (hostEnd === -1)
|
202
|
+
hostEnd = rest.length
|
203
|
+
end
|
204
|
+
|
205
|
+
hostEnd -= 1 if (rest[hostEnd - 1] == ':')
|
206
|
+
host = rest.slice(0...hostEnd)
|
207
|
+
rest = rest.slice(hostEnd..-1)
|
208
|
+
|
209
|
+
# pull out port.
|
210
|
+
self.parseHost(host)
|
211
|
+
|
212
|
+
# we've indicated that there is a hostname,
|
213
|
+
# so even if it's empty, it has to be present.
|
214
|
+
@hostname = @hostname || ''
|
215
|
+
|
216
|
+
# if hostname begins with [ and ends with ]
|
217
|
+
# assume that it's an IPv6 address.
|
218
|
+
ipv6Hostname = @hostname[0] == '[' &&
|
219
|
+
@hostname[@hostname.length - 1] == ']'
|
220
|
+
|
221
|
+
# validate a little.
|
222
|
+
if (!ipv6Hostname)
|
223
|
+
hostparts = @hostname.split(/\./)
|
224
|
+
(0...hostparts.length).each do |i|
|
225
|
+
part = hostparts[i]
|
226
|
+
next if (!part)
|
227
|
+
if (!part.match(HOSTNAME_PART_PATTERN))
|
228
|
+
newpart = ''
|
229
|
+
(0...part.length).each do |j|
|
230
|
+
if (part[j].ord > 127)
|
231
|
+
# we replace non-ASCII char with a temporary placeholder
|
232
|
+
# we need this to make sure size of hostname is not
|
233
|
+
# broken by replacing non-ASCII by nothing
|
234
|
+
newpart += 'x'
|
235
|
+
else
|
236
|
+
newpart += part[j]
|
237
|
+
end
|
238
|
+
end
|
239
|
+
# we test again with ASCII char only
|
240
|
+
if (!newpart.match(HOSTNAME_PART_PATTERN))
|
241
|
+
validParts = hostparts.slice(0...i)
|
242
|
+
notHost = hostparts.slice((i + 1)..-1)
|
243
|
+
bit = part.match(HOSTNAME_PART_START)
|
244
|
+
if (bit)
|
245
|
+
validParts.push(bit[1])
|
246
|
+
notHost.unshift(bit[2])
|
247
|
+
end
|
248
|
+
if (notHost.length)
|
249
|
+
rest = notHost.join('.') + rest
|
250
|
+
end
|
251
|
+
@hostname = validParts.join('.')
|
252
|
+
break
|
253
|
+
end
|
254
|
+
end
|
255
|
+
end
|
256
|
+
end
|
257
|
+
|
258
|
+
if (@hostname.length > HOSTNAME_MAX_LEN)
|
259
|
+
@hostname = ''
|
260
|
+
end
|
261
|
+
|
262
|
+
# strip [ and ] from the hostname
|
263
|
+
# the host field still retains them, though
|
264
|
+
if (ipv6Hostname)
|
265
|
+
@hostname = @hostname[1, @hostname.length - 2]
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
269
|
+
# chop off from the tail first.
|
270
|
+
hash = rest.index('#')
|
271
|
+
if (hash != nil)
|
272
|
+
# got a fragment string.
|
273
|
+
@hash = rest.slice(hash..-1)
|
274
|
+
rest = rest.slice(0...hash)
|
275
|
+
end
|
276
|
+
qm = rest.index('?')
|
277
|
+
if (qm != nil)
|
278
|
+
@search = rest.slice(qm..-1)
|
279
|
+
rest = rest.slice(0...qm)
|
280
|
+
end
|
281
|
+
@pathname = rest if !rest.nil? && rest != ''
|
282
|
+
if (SLASHED_PROTOCOL[lowerProto] &&
|
283
|
+
@hostname && !@pathname)
|
284
|
+
@pathname = ''
|
285
|
+
end
|
286
|
+
|
287
|
+
return self
|
288
|
+
end
|
289
|
+
|
290
|
+
#------------------------------------------------------------------------------
|
291
|
+
def parseHost(host)
|
292
|
+
port = PORT_PATTERN.match(host)
|
293
|
+
if (port)
|
294
|
+
port = port[0]
|
295
|
+
if (port != ':')
|
296
|
+
@port = port.slice(1..-1)
|
297
|
+
end
|
298
|
+
host = host[0, host.length - port.length]
|
299
|
+
end
|
300
|
+
@hostname = host if (host)
|
301
|
+
end
|
302
|
+
|
303
|
+
end
|
304
|
+
end
|