mdurl-rb 1.0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: bfcbf24da2273d2580682fa690c821fc69b23c53
4
+ data.tar.gz: 87074f454da478a788227b689d89a6d6be676a60
5
+ SHA512:
6
+ metadata.gz: 347a0b6b36a1802ba392b3a8d01047b94517162fc418162f62b6040126d681e18b12797a6ef8d3a309ba23a38aec5498429174017c0c4aa222000d1efa4d6d1a
7
+ data.tar.gz: ce81db1bde29553a0776ae967523657e94c38fafec405f76fe8f6d3b0b66a9f6cedfd2f5dcdb91c54671892f6d2f3c2c98acf862853274ef65602ba33919897e
data/README.md ADDED
@@ -0,0 +1,104 @@
1
+ # mdurl
2
+
3
+ [![Gem Version](https://badge.fury.io/rb/mdurl-rb.svg)](http://badge.fury.io/rb/mdurl-rb)
4
+
5
+ URL utilities for [motion-markdown-it](https://github.com/digitalmoksha/motion-markdown-it) parser, for both Ruby and RubyMotion
6
+
7
+ This gem is a port of the [mdurl javascript package](https://github.com/markdown-it/mdurl) by Vitaly Puzrin and Alex Kocharin, that is used for the [markdown-it](https://github.com/markdown-it/markdown-it) package
8
+
9
+ ## API
10
+
11
+ _As this gem was ported from the Javascript version, there may still be some mixture of Javascript terminology below'_
12
+
13
+ ### MDUrl::Encode.encode(str [, exclude, keepEncoded]) -> String
14
+
15
+ Percent-encode a string, avoiding double encoding. Don't touch `/a-zA-Z0-9/` +
16
+ excluded chars + `/%[a-fA-F0-9]{2}/` (if not disabled). Broken surrorates are
17
+ replaced with `U+FFFD`.
18
+
19
+ Params:
20
+
21
+ - __str__ - input string.
22
+ - __exclude__ - optional, `;/?:@&=+$,-_.!~*'()#`. Additional chars to keep intact
23
+ (except `/a-zA-Z0-9/`).
24
+ - __keepEncoded__ - optional, `true`. By default it skips already encoded sequences
25
+ (`/%[a-fA-F0-9]{2}/`). If set to `false`, `%` will be encoded.
26
+
27
+
28
+ ### MDUrl::Encode::DEFAULT_CHARACTERS, MDUrl::Encode::COMPONENT_CHARACTERS
29
+
30
+ You can use these constants as second argument to `encode` function.
31
+
32
+ - `DEFAULT_CHARACTERS` is the same exclude set as in the standard `encodeURI()` function
33
+ - `COMPONENT_CHARACTERS` is the same exclude set as in the `encodeURIComponent()` function
34
+
35
+ For example, `MDUrl::Encode.encode('something', MDUrl::Encode::COMPONENT_CHARACTERS, true)` is roughly the equivalent of
36
+ the `encodeURIComponent()` function in Javascript (except `encode()` doesn't throw).
37
+
38
+
39
+ ### MDUrl::Decode.decode(str [, exclude]) -> String
40
+
41
+ Decode percent-encoded string. Invalid percent-encoded sequences (e.g. `%2G`)
42
+ are left as is. Invalid UTF-8 characters are replaced with `U+FFFD`.
43
+
44
+
45
+ Params:
46
+
47
+ - __str__ - input string.
48
+ - __exclude__ - set of characters to leave encoded, optional, `;/?:@&=+$,#`.
49
+
50
+
51
+ ### MDUrl::Decode::DEFTAULT_CHARS, MDUrl::Decode::COMPONENT_CHARS
52
+
53
+ You can use these constants as second argument to `decode` function.
54
+
55
+ - `DEFTAULT_CHARS` is the same exclude set as in the standard `decodeURI()` function
56
+ - `COMPONENT_CHARS` is the same exclude set as in the `decodeURIComponent()` function
57
+
58
+ For example, `MDUrl::Decode.decode('something', MDUrl::Decode::DEFTAULT_CHARS)` has the same behavior as
59
+ `decodeURI('something')` in javascript on a correctly encoded input.
60
+
61
+
62
+ ### MDUrl::Url.parse(url, slashesDenoteHost) -> urlObs
63
+
64
+ Parse url string. Similar to node's [url.parse](http://nodejs.org/api/url.html#url_url_parse_urlstr_parsequerystring_slashesdenotehost), but without any
65
+ normalizations and query string parse.
66
+
67
+ - __url__ - input url (string)
68
+ - __slashesDenoteHost__ - if url starts with `//`, expect a hostname after it. Optional, `false`.
69
+
70
+ Result (hash):
71
+
72
+ - protocol
73
+ - slashes
74
+ - auth
75
+ - port
76
+ - hostname
77
+ - hash
78
+ - search
79
+ - pathname
80
+
81
+ Difference with node's `url`:
82
+
83
+ 1. No leading slash in paths, e.g. in `url.parse('http://foo?bar')` pathname is
84
+ ``, not `/`
85
+ 2. Backslashes are not replaced with slashes, so `http:\\example.org\` is
86
+ treated like a relative path
87
+ 3. Trailing colon is treated like a part of the path, i.e. in
88
+ `http://example.org:foo` pathname is `:foo`
89
+ 4. Nothing is URL-encoded in the resulting object, (in joyent/node some chars
90
+ in auth and paths are encoded)
91
+ 5. `url.parse()` does not have `parseQueryString` argument
92
+ 6. Removed extraneous result properties: `host`, `path`, `query`, etc.,
93
+ which can be constructed using other parts of the url.
94
+
95
+
96
+ ### MDUrl::Format.format(urlObject)
97
+
98
+ Format an object previously obtained with `.parse()` function. Similar to node's
99
+ [url.format](http://nodejs.org/api/url.html#url_url_format_urlobj).
100
+
101
+
102
+ ## License
103
+
104
+ [MIT](https://github.com/markdown-it/mdurl/blob/master/LICENSE)
@@ -0,0 +1,145 @@
1
+ module MDUrl
2
+ module Decode
3
+
4
+ @@decodeCache = {};
5
+
6
+ DEFTAULT_CHARS = ';/?:@&=+$,#'
7
+ COMPONENT_CHARS = ''
8
+
9
+
10
+ #------------------------------------------------------------------------------
11
+ def self.getDecodeCache(exclude)
12
+ cache = @@decodeCache[exclude]
13
+ return cache if (cache)
14
+
15
+ cache = @@decodeCache[exclude] = []
16
+
17
+ (0...128).each do |i|
18
+ ch = i.chr
19
+ cache.push(ch)
20
+ end
21
+
22
+ (0...exclude.length).each do |i|
23
+ ch = exclude[i].ord
24
+ cache[ch] = '%' + ('0' + ch.to_s(16).upcase).slice(-2, 2)
25
+ end
26
+
27
+ return cache
28
+ end
29
+
30
+
31
+ # Decode percent-encoded string.
32
+ #------------------------------------------------------------------------------
33
+ def self.decode(string, exclude = nil)
34
+ if !exclude.is_a? String
35
+ exclude = DEFTAULT_CHARS
36
+ end
37
+
38
+ cache = getDecodeCache(exclude)
39
+
40
+ return string.gsub(/(%[a-f0-9]{2})+/i) do |seq|
41
+ result = ''
42
+
43
+ i = 0
44
+ l = seq.length
45
+ while i < l
46
+ b1 = seq.slice((i + 1)...(i + 3)).to_i(16)
47
+
48
+ if (b1 < 0x80)
49
+ result += cache[b1]
50
+ i += 3
51
+ next
52
+ end
53
+
54
+ if ((b1 & 0xE0) == 0xC0 && (i + 3 < l))
55
+ # 110xxxxx 10xxxxxx
56
+ b2 = seq.slice((i + 4)...(i + 6)).to_i(16)
57
+
58
+ if ((b2 & 0xC0) == 0x80)
59
+ char = ((b1 << 6) & 0x7C0) | (b2 & 0x3F)
60
+
61
+ if (char < 0x80)
62
+ result += "\ufffd\ufffd"
63
+ else
64
+ result += char.chr(Encoding::UTF_8)
65
+ end
66
+
67
+ i += 6
68
+ next
69
+ end
70
+ end
71
+
72
+ if ((b1 & 0xF0) == 0xE0 && (i + 6 < l))
73
+ # 1110xxxx 10xxxxxx 10xxxxxx
74
+ b2 = seq.slice((i + 4)...(i + 6)).to_i(16)
75
+ b3 = seq.slice((i + 7)...(i + 9)).to_i(16)
76
+
77
+ if ((b2 & 0xC0) == 0x80 && (b3 & 0xC0) == 0x80)
78
+ char = ((b1 << 12) & 0xF000) | ((b2 << 6) & 0xFC0) | (b3 & 0x3F)
79
+
80
+ if (char < 0x800 || (char >= 0xD800 && char <= 0xDFFF))
81
+ result += "\ufffd\ufffd\ufffd"
82
+ else
83
+ result += char.chr(Encoding::UTF_8)
84
+ end
85
+
86
+ i += 9
87
+ next
88
+ end
89
+ end
90
+
91
+ if ((b1 & 0xF8) == 0xF0 && (i + 9 < l))
92
+ # 111110xx 10xxxxxx 10xxxxxx 10xxxxxx
93
+ b2 = seq.slice((i + 4)...(i + 6)).to_i(16)
94
+ b3 = seq.slice((i + 7)...(i + 9)).to_i(16)
95
+ b4 = seq.slice((i + 10)...(i + 12)).to_i(16)
96
+
97
+ if ((b2 & 0xC0) == 0x80 && (b3 & 0xC0) == 0x80 && (b4 & 0xC0) == 0x80)
98
+ char = ((b1 << 18) & 0x1C0000) | ((b2 << 12) & 0x3F000) | ((b3 << 6) & 0xFC0) | (b4 & 0x3F)
99
+
100
+ if (char < 0x10000 || char > 0x10FFFF)
101
+ result += "\ufffd\ufffd\ufffd\ufffd"
102
+ else
103
+ # TODO don't know how to handle surrogate pairs properly.
104
+ char -= 0x10000
105
+ result += [0xD800 + (char >> 10), 0xDC00 + (char & 0x3FF)].map{|c| c.chr(Encoding::UTF_8)}.join
106
+
107
+ # high = ((char - 0x10000) / 0x400).floor + 0xD800
108
+ # low = ((char - 0x10000) % 0x400) + 0xDC00
109
+ # result += '\u' + [high, low].map { |x| x.to_s(16) }.join('\u').downcase
110
+ end
111
+
112
+ i += 12
113
+ next
114
+ end
115
+ end
116
+
117
+ result += "\ufffd"
118
+ i += 3
119
+ end
120
+
121
+ result
122
+ end
123
+ end
124
+
125
+ end
126
+ end
127
+
128
+ # https://gist.github.com/kreeger/4480326
129
+ # class Fixnum
130
+ # def to_surrogate_pair
131
+ # if self >= 0x10000 && self <= 0x10FFFF
132
+ # high = ((self - 0x10000) / 0x400).floor + 0xD800
133
+ # low = ((self - 0x10000) % 0x400) + 0xDC00
134
+ # end
135
+ # '\U' + [high, low].map { |x| x.to_s(16) }.join('\U').upcase
136
+ # end
137
+ #
138
+ # end
139
+ #
140
+ # class String
141
+ # def to_hex
142
+ # self.gsub('\U000', '0x').to_i(16)
143
+ # end
144
+ # end
145
+ #
@@ -0,0 +1,100 @@
1
+ module MDUrl
2
+ module Encode
3
+
4
+ DEFAULT_CHARACTERS = ";/?:@&=+$,-_.!~*'()#"
5
+ COMPONENT_CHARACTERS = "-_.!~*'()"
6
+
7
+ @@encodeCache = {}
8
+
9
+
10
+ # Create a lookup array where anything but characters in `chars` string
11
+ # and alphanumeric chars is percent-encoded.
12
+ #------------------------------------------------------------------------------
13
+ def self.getEncodeCache(exclude)
14
+ cache = @@encodeCache[exclude]
15
+ return cache if (cache)
16
+
17
+ cache = @@encodeCache[exclude] = []
18
+
19
+ (0...128).each do |i|
20
+ ch = i.chr
21
+
22
+ if (/^[0-9a-z]$/i =~ ch)
23
+ # always allow unencoded alphanumeric characters
24
+ cache.push(ch)
25
+ else
26
+ cache.push('%' + ('0' + i.to_s(16).upcase).slice(-2, 2))
27
+ end
28
+ end
29
+
30
+ (0...exclude.length).each do |i|
31
+ cache[exclude[i].ord] = exclude[i]
32
+ end
33
+
34
+ return cache
35
+ end
36
+
37
+
38
+ # Encode unsafe characters with percent-encoding, skipping already
39
+ # encoded sequences.
40
+ #
41
+ # - string - string to encode
42
+ # - exclude - list of characters to ignore (in addition to a-zA-Z0-9)
43
+ # - keepEscaped - don't encode '%' in a correct escape sequence (default: true)
44
+ #------------------------------------------------------------------------------
45
+ def self.encode(string, exclude = nil, keepEscaped = nil)
46
+ result = ''
47
+
48
+ if !exclude.is_a? String
49
+ # encode(string, keepEscaped)
50
+ keepEscaped = exclude
51
+ exclude = DEFAULT_CHARACTERS
52
+ end
53
+
54
+ if keepEscaped == nil
55
+ keepEscaped = true
56
+ end
57
+
58
+ cache = getEncodeCache(exclude)
59
+
60
+ i = 0
61
+ l = string.length
62
+ while i < l
63
+ code = string[i].ord
64
+
65
+ if (keepEscaped && code == 0x25 && i + 2 < l) # %
66
+ if (/^[0-9a-f]{2}$/i =~ (string.slice((i + 1)...(i + 3))))
67
+ result += string.slice(i...(i + 3))
68
+ i += 3
69
+ next
70
+ end
71
+ end
72
+
73
+ if (code < 128)
74
+ result += cache[code]
75
+ i += 1
76
+ next
77
+ end
78
+
79
+ if (code >= 0xD800 && code <= 0xDFFF)
80
+ if (code >= 0xD800 && code <= 0xDBFF && i + 1 < l)
81
+ nextCode = string[i + 1].ord
82
+ if (nextCode >= 0xDC00 && nextCode <= 0xDFFF)
83
+ result += CGI::escape(string[i] + string[i + 1])
84
+ i += 2
85
+ next
86
+ end
87
+ end
88
+ result += '%EF%BF%BD'
89
+ i += 1
90
+ next
91
+ end
92
+
93
+ result += CGI::escape(string[i])
94
+ i += 1
95
+ end
96
+
97
+ return result
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,28 @@
1
+ module MDUrl
2
+ module Format
3
+
4
+ #------------------------------------------------------------------------------
5
+ def self.format(url)
6
+ result = ''
7
+
8
+ result += url.protocol || ''
9
+ result += url.slashes ? '//' : ''
10
+ result += url.auth ? url.auth + '@' : ''
11
+
12
+ if (url.hostname && url.hostname.index(':') != nil)
13
+ # ipv6 address
14
+ result += '[' + url.hostname + ']'
15
+ else
16
+ result += url.hostname || ''
17
+ end
18
+
19
+ result += url.port ? ':' + url.port : ''
20
+ result += url.pathname || ''
21
+ result += url.search || ''
22
+ result += url.hash || ''
23
+
24
+ return result
25
+ end
26
+
27
+ end
28
+ end
@@ -0,0 +1,304 @@
1
+ # Copyright Joyent, Inc. and other Node contributors.
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a
4
+ # copy of this software and associated documentation files (the
5
+ # "Software"), to deal in the Software without restriction, including
6
+ # without limitation the rights to use, copy, modify, merge, publish,
7
+ # distribute, sublicense, and/or sell copies of the Software, and to permit
8
+ # persons to whom the Software is furnished to do so, subject to the
9
+ # following conditions:
10
+ #
11
+ # The above copyright notice and this permission notice shall be included
12
+ # in all copies or substantial portions of the Software.
13
+ #
14
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15
+ # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
17
+ # NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
18
+ # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19
+ # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20
+ # USE OR OTHER DEALINGS IN THE SOFTWARE.
21
+
22
+ #
23
+ # Changes from joyent/node:
24
+ #
25
+ # 1. No leading slash in paths,
26
+ # e.g. in `url.parse('http://foo?bar')` pathname is ``, not `/`
27
+ #
28
+ # 2. Backslashes are not replaced with slashes,
29
+ # so `http:\\example.org\` is treated like a relative path
30
+ #
31
+ # 3. Trailing colon is treated like a part of the path,
32
+ # i.e. in `http://example.org:foo` pathname is `:foo`
33
+ #
34
+ # 4. Nothing is URL-encoded in the resulting object,
35
+ # (in joyent/node some chars in auth and paths are encoded)
36
+ #
37
+ # 5. `url.parse()` does not have `parseQueryString` argument
38
+ #
39
+ # 6. Removed extraneous result properties: `host`, `path`, `query`, etc.,
40
+ # which can be constructed using other parts of the url.
41
+ #
42
+
43
+ module MDUrl
44
+ class Url
45
+
46
+ attr_accessor :protocol, :slashes, :hostname, :pathname, :auth, :port, :search, :hash
47
+
48
+ # Reference: RFC 3986, RFC 1808, RFC 2396
49
+
50
+ # define these here so at least they only have to be
51
+ # compiled once on the first module load.
52
+ PROTOCOL_PATTERN = /^([a-z0-9.+-]+:)/i
53
+ PORT_PATTERN = /:[0-9]*$/
54
+
55
+ # Special case for a simple path URL
56
+ SIMPLE_PATH_PATTERN = /^(\/\/?(?!\/)[^\?\s]*)(\?[^\s]*)?$/
57
+
58
+ # RFC 2396: characters reserved for delimiting URLs.
59
+ # We actually just auto-escape these.
60
+ DELIMS = [ '<', '>', '"', '`', ' ', '\r', '\n', '\t' ]
61
+
62
+ # RFC 2396: characters not allowed for various reasons.
63
+ UNWISE = [ '{', '}', '|', '\\', '^', '`' ].concat(DELIMS)
64
+
65
+ # Allowed by RFCs, but cause of XSS attacks. Always escape these.
66
+ AUTO_ESCAPE = [ '\'' ].concat(UNWISE)
67
+ # Characters that are never ever allowed in a hostname.
68
+ # Note that any invalid chars are also handled, but these
69
+ # are the ones that are *expected* to be seen, so we fast-path
70
+ # them.
71
+ NON_HOST_CHARS = [ '%', '/', '?', ';', '#' ].concat(AUTO_ESCAPE)
72
+ HOST_ENDING_CHARS = [ '/', '?', '#' ]
73
+ HOSTNAME_MAX_LEN = 255
74
+ HOSTNAME_PART_PATTERN = /^[+a-z0-9A-Z_-]{0,63}$/
75
+ HOSTNAME_PART_START = /^([+a-z0-9A-Z_-]{0,63})(.*)$/
76
+ # protocols that can allow "unsafe" and "unwise" chars.
77
+ # protocols that never have a hostname.
78
+ HOSTLESS_PROTOCOL = {
79
+ 'javascript' => true,
80
+ 'javascript:' => true
81
+ }
82
+ # protocols that always contain a # bit.
83
+ SLASHED_PROTOCOL = {
84
+ 'http' => true,
85
+ 'https' => true,
86
+ 'ftp' => true,
87
+ 'gopher' => true,
88
+ 'file' => true,
89
+ 'http:' => true,
90
+ 'https:' => true,
91
+ 'ftp:' => true,
92
+ 'gopher:' => true,
93
+ 'file:' => true
94
+ }
95
+
96
+ #------------------------------------------------------------------------------
97
+ def self.urlParse(url, slashesDenoteHost = false)
98
+ return url if (url && url.is_a?(Url))
99
+
100
+ u = Url.new
101
+ u.parse(url, slashesDenoteHost)
102
+ return u
103
+ end
104
+
105
+ #------------------------------------------------------------------------------
106
+ def parse(url, slashesDenoteHost = false)
107
+ rest = url
108
+
109
+ # trim before proceeding.
110
+ # This is to support parse stuff like " http://foo.com \n"
111
+ rest = rest.strip
112
+
113
+ if (!slashesDenoteHost && url.split('#').length == 1)
114
+ # Try fast path regexp
115
+ simplePath = SIMPLE_PATH_PATTERN.match(rest)
116
+ if (simplePath)
117
+ @pathname = simplePath[1]
118
+ if (simplePath[2])
119
+ @search = simplePath[2]
120
+ end
121
+ return self
122
+ end
123
+ end
124
+
125
+ proto = PROTOCOL_PATTERN.match(rest)
126
+ if (proto)
127
+ proto = proto[0]
128
+ lowerProto = proto.downcase
129
+ @protocol = proto
130
+ rest = rest[proto.length..-1]
131
+ end
132
+
133
+ # figure out if it's got a host
134
+ # user@server is *always* interpreted as a hostname, and url
135
+ # resolution will treat //foo/bar as host=foo,path=bar because that's
136
+ # how the browser resolves relative URLs.
137
+ if (slashesDenoteHost || proto || rest.match(/^\/\/[^@\/]+@[^@\/]+/))
138
+ slashes = rest[0...2] == '//'
139
+ if (slashes && !(proto && HOSTLESS_PROTOCOL[proto]))
140
+ rest = rest[2..-1]
141
+ @slashes = true
142
+ end
143
+ end
144
+
145
+ if (!HOSTLESS_PROTOCOL[proto] &&
146
+ (slashes || (proto && !SLASHED_PROTOCOL[proto])))
147
+
148
+ # there's a hostname.
149
+ # the first instance of /, ?, ;, or # ends the host.
150
+ #
151
+ # If there is an @ in the hostname, then non-host chars *are* allowed
152
+ # to the left of the last @ sign, unless some host-ending character
153
+ # comes *before* the @-sign.
154
+ # URLs are obnoxious.
155
+ #
156
+ # ex:
157
+ # http://a@b@c/ => user:a@b host:c
158
+ # http://a@b?@c => user:a host:c path:/?@c
159
+
160
+ # v0.12 TODO(isaacs): This is not quite how Chrome does things.
161
+ # Review our test case against browsers more comprehensively.
162
+
163
+ # find the first instance of any HOST_ENDING_CHARS
164
+ hostEnd = -1
165
+ (0...HOST_ENDING_CHARS.length).each do |i|
166
+ hec = rest.index(HOST_ENDING_CHARS[i])
167
+ if (hec != nil && (hostEnd == -1 || hec < hostEnd))
168
+ hostEnd = hec
169
+ end
170
+ end
171
+
172
+ # at this point, either we have an explicit point where the
173
+ # auth portion cannot go past, or the last @ char is the decider.
174
+ if (hostEnd == -1)
175
+ # atSign can be anywhere.
176
+ atSign = rest.rindex('@')
177
+ else
178
+ # atSign must be in auth portion.
179
+ # http://a@b/c@d => host:b auth:a path:/c@d
180
+ # atSign = rest.lastIndexOf('@', hostEnd);
181
+ atSign = rest[0..hostEnd].rindex('@')
182
+ end
183
+
184
+ # Now we have a portion which is definitely the auth.
185
+ # Pull that off.
186
+ if (atSign != nil)
187
+ auth = rest.slice(0...atSign)
188
+ rest = rest.slice((atSign + 1)..-1)
189
+ @auth = auth
190
+ end
191
+
192
+ # the host is the remaining to the left of the first non-host char
193
+ hostEnd = -1
194
+ (0...NON_HOST_CHARS.length).each do |i|
195
+ hec = rest.index(NON_HOST_CHARS[i])
196
+ if (hec != nil && (hostEnd == -1 || hec < hostEnd))
197
+ hostEnd = hec
198
+ end
199
+ end
200
+ # if we still have not hit it, then the entire thing is a host.
201
+ if (hostEnd === -1)
202
+ hostEnd = rest.length
203
+ end
204
+
205
+ hostEnd -= 1 if (rest[hostEnd - 1] == ':')
206
+ host = rest.slice(0...hostEnd)
207
+ rest = rest.slice(hostEnd..-1)
208
+
209
+ # pull out port.
210
+ self.parseHost(host)
211
+
212
+ # we've indicated that there is a hostname,
213
+ # so even if it's empty, it has to be present.
214
+ @hostname = @hostname || ''
215
+
216
+ # if hostname begins with [ and ends with ]
217
+ # assume that it's an IPv6 address.
218
+ ipv6Hostname = @hostname[0] == '[' &&
219
+ @hostname[@hostname.length - 1] == ']'
220
+
221
+ # validate a little.
222
+ if (!ipv6Hostname)
223
+ hostparts = @hostname.split(/\./)
224
+ (0...hostparts.length).each do |i|
225
+ part = hostparts[i]
226
+ next if (!part)
227
+ if (!part.match(HOSTNAME_PART_PATTERN))
228
+ newpart = ''
229
+ (0...part.length).each do |j|
230
+ if (part[j].ord > 127)
231
+ # we replace non-ASCII char with a temporary placeholder
232
+ # we need this to make sure size of hostname is not
233
+ # broken by replacing non-ASCII by nothing
234
+ newpart += 'x'
235
+ else
236
+ newpart += part[j]
237
+ end
238
+ end
239
+ # we test again with ASCII char only
240
+ if (!newpart.match(HOSTNAME_PART_PATTERN))
241
+ validParts = hostparts.slice(0...i)
242
+ notHost = hostparts.slice((i + 1)..-1)
243
+ bit = part.match(HOSTNAME_PART_START)
244
+ if (bit)
245
+ validParts.push(bit[1])
246
+ notHost.unshift(bit[2])
247
+ end
248
+ if (notHost.length)
249
+ rest = notHost.join('.') + rest
250
+ end
251
+ @hostname = validParts.join('.')
252
+ break
253
+ end
254
+ end
255
+ end
256
+ end
257
+
258
+ if (@hostname.length > HOSTNAME_MAX_LEN)
259
+ @hostname = ''
260
+ end
261
+
262
+ # strip [ and ] from the hostname
263
+ # the host field still retains them, though
264
+ if (ipv6Hostname)
265
+ @hostname = @hostname[1, @hostname.length - 2]
266
+ end
267
+ end
268
+
269
+ # chop off from the tail first.
270
+ hash = rest.index('#')
271
+ if (hash != nil)
272
+ # got a fragment string.
273
+ @hash = rest.slice(hash..-1)
274
+ rest = rest.slice(0...hash)
275
+ end
276
+ qm = rest.index('?')
277
+ if (qm != nil)
278
+ @search = rest.slice(qm..-1)
279
+ rest = rest.slice(0...qm)
280
+ end
281
+ @pathname = rest if !rest.nil? && rest != ''
282
+ if (SLASHED_PROTOCOL[lowerProto] &&
283
+ @hostname && !@pathname)
284
+ @pathname = ''
285
+ end
286
+
287
+ return self
288
+ end
289
+
290
+ #------------------------------------------------------------------------------
291
+ def parseHost(host)
292
+ port = PORT_PATTERN.match(host)
293
+ if (port)
294
+ port = port[0]
295
+ if (port != ':')
296
+ @port = port.slice(1..-1)
297
+ end
298
+ host = host[0, host.length - port.length]
299
+ end
300
+ @hostname = host if (host)
301
+ end
302
+
303
+ end
304
+ end
@@ -0,0 +1,5 @@
1
+ module MDUrl
2
+
3
+ VERSION = '1.0.0.1'
4
+
5
+ end