mdurl-rb 1.0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: bfcbf24da2273d2580682fa690c821fc69b23c53
4
+ data.tar.gz: 87074f454da478a788227b689d89a6d6be676a60
5
+ SHA512:
6
+ metadata.gz: 347a0b6b36a1802ba392b3a8d01047b94517162fc418162f62b6040126d681e18b12797a6ef8d3a309ba23a38aec5498429174017c0c4aa222000d1efa4d6d1a
7
+ data.tar.gz: ce81db1bde29553a0776ae967523657e94c38fafec405f76fe8f6d3b0b66a9f6cedfd2f5dcdb91c54671892f6d2f3c2c98acf862853274ef65602ba33919897e
data/README.md ADDED
@@ -0,0 +1,104 @@
1
+ # mdurl
2
+
3
+ [![Gem Version](https://badge.fury.io/rb/mdurl-rb.svg)](http://badge.fury.io/rb/mdurl-rb)
4
+
5
+ URL utilities for [motion-markdown-it](https://github.com/digitalmoksha/motion-markdown-it) parser, for both Ruby and RubyMotion
6
+
7
+ This gem is a port of the [mdurl javascript package](https://github.com/markdown-it/mdurl) by Vitaly Puzrin and Alex Kocharin, that is used for the [markdown-it](https://github.com/markdown-it/markdown-it) package
8
+
9
+ ## API
10
+
11
+ _As this gem was ported from the Javascript version, there may still be some mixture of Javascript terminology below'_
12
+
13
+ ### MDUrl::Encode.encode(str [, exclude, keepEncoded]) -> String
14
+
15
+ Percent-encode a string, avoiding double encoding. Don't touch `/a-zA-Z0-9/` +
16
+ excluded chars + `/%[a-fA-F0-9]{2}/` (if not disabled). Broken surrorates are
17
+ replaced with `U+FFFD`.
18
+
19
+ Params:
20
+
21
+ - __str__ - input string.
22
+ - __exclude__ - optional, `;/?:@&=+$,-_.!~*'()#`. Additional chars to keep intact
23
+ (except `/a-zA-Z0-9/`).
24
+ - __keepEncoded__ - optional, `true`. By default it skips already encoded sequences
25
+ (`/%[a-fA-F0-9]{2}/`). If set to `false`, `%` will be encoded.
26
+
27
+
28
+ ### MDUrl::Encode::DEFAULT_CHARACTERS, MDUrl::Encode::COMPONENT_CHARACTERS
29
+
30
+ You can use these constants as second argument to `encode` function.
31
+
32
+ - `DEFAULT_CHARACTERS` is the same exclude set as in the standard `encodeURI()` function
33
+ - `COMPONENT_CHARACTERS` is the same exclude set as in the `encodeURIComponent()` function
34
+
35
+ For example, `MDUrl::Encode.encode('something', MDUrl::Encode::COMPONENT_CHARACTERS, true)` is roughly the equivalent of
36
+ the `encodeURIComponent()` function in Javascript (except `encode()` doesn't throw).
37
+
38
+
39
+ ### MDUrl::Decode.decode(str [, exclude]) -> String
40
+
41
+ Decode percent-encoded string. Invalid percent-encoded sequences (e.g. `%2G`)
42
+ are left as is. Invalid UTF-8 characters are replaced with `U+FFFD`.
43
+
44
+
45
+ Params:
46
+
47
+ - __str__ - input string.
48
+ - __exclude__ - set of characters to leave encoded, optional, `;/?:@&=+$,#`.
49
+
50
+
51
+ ### MDUrl::Decode::DEFTAULT_CHARS, MDUrl::Decode::COMPONENT_CHARS
52
+
53
+ You can use these constants as second argument to `decode` function.
54
+
55
+ - `DEFTAULT_CHARS` is the same exclude set as in the standard `decodeURI()` function
56
+ - `COMPONENT_CHARS` is the same exclude set as in the `decodeURIComponent()` function
57
+
58
+ For example, `MDUrl::Decode.decode('something', MDUrl::Decode::DEFTAULT_CHARS)` has the same behavior as
59
+ `decodeURI('something')` in javascript on a correctly encoded input.
60
+
61
+
62
+ ### MDUrl::Url.parse(url, slashesDenoteHost) -> urlObs
63
+
64
+ Parse url string. Similar to node's [url.parse](http://nodejs.org/api/url.html#url_url_parse_urlstr_parsequerystring_slashesdenotehost), but without any
65
+ normalizations and query string parse.
66
+
67
+ - __url__ - input url (string)
68
+ - __slashesDenoteHost__ - if url starts with `//`, expect a hostname after it. Optional, `false`.
69
+
70
+ Result (hash):
71
+
72
+ - protocol
73
+ - slashes
74
+ - auth
75
+ - port
76
+ - hostname
77
+ - hash
78
+ - search
79
+ - pathname
80
+
81
+ Difference with node's `url`:
82
+
83
+ 1. No leading slash in paths, e.g. in `url.parse('http://foo?bar')` pathname is
84
+ ``, not `/`
85
+ 2. Backslashes are not replaced with slashes, so `http:\\example.org\` is
86
+ treated like a relative path
87
+ 3. Trailing colon is treated like a part of the path, i.e. in
88
+ `http://example.org:foo` pathname is `:foo`
89
+ 4. Nothing is URL-encoded in the resulting object, (in joyent/node some chars
90
+ in auth and paths are encoded)
91
+ 5. `url.parse()` does not have `parseQueryString` argument
92
+ 6. Removed extraneous result properties: `host`, `path`, `query`, etc.,
93
+ which can be constructed using other parts of the url.
94
+
95
+
96
+ ### MDUrl::Format.format(urlObject)
97
+
98
+ Format an object previously obtained with `.parse()` function. Similar to node's
99
+ [url.format](http://nodejs.org/api/url.html#url_url_format_urlobj).
100
+
101
+
102
+ ## License
103
+
104
+ [MIT](https://github.com/markdown-it/mdurl/blob/master/LICENSE)
@@ -0,0 +1,145 @@
1
+ module MDUrl
2
+ module Decode
3
+
4
+ @@decodeCache = {};
5
+
6
+ DEFTAULT_CHARS = ';/?:@&=+$,#'
7
+ COMPONENT_CHARS = ''
8
+
9
+
10
+ #------------------------------------------------------------------------------
11
+ def self.getDecodeCache(exclude)
12
+ cache = @@decodeCache[exclude]
13
+ return cache if (cache)
14
+
15
+ cache = @@decodeCache[exclude] = []
16
+
17
+ (0...128).each do |i|
18
+ ch = i.chr
19
+ cache.push(ch)
20
+ end
21
+
22
+ (0...exclude.length).each do |i|
23
+ ch = exclude[i].ord
24
+ cache[ch] = '%' + ('0' + ch.to_s(16).upcase).slice(-2, 2)
25
+ end
26
+
27
+ return cache
28
+ end
29
+
30
+
31
+ # Decode percent-encoded string.
32
+ #------------------------------------------------------------------------------
33
+ def self.decode(string, exclude = nil)
34
+ if !exclude.is_a? String
35
+ exclude = DEFTAULT_CHARS
36
+ end
37
+
38
+ cache = getDecodeCache(exclude)
39
+
40
+ return string.gsub(/(%[a-f0-9]{2})+/i) do |seq|
41
+ result = ''
42
+
43
+ i = 0
44
+ l = seq.length
45
+ while i < l
46
+ b1 = seq.slice((i + 1)...(i + 3)).to_i(16)
47
+
48
+ if (b1 < 0x80)
49
+ result += cache[b1]
50
+ i += 3
51
+ next
52
+ end
53
+
54
+ if ((b1 & 0xE0) == 0xC0 && (i + 3 < l))
55
+ # 110xxxxx 10xxxxxx
56
+ b2 = seq.slice((i + 4)...(i + 6)).to_i(16)
57
+
58
+ if ((b2 & 0xC0) == 0x80)
59
+ char = ((b1 << 6) & 0x7C0) | (b2 & 0x3F)
60
+
61
+ if (char < 0x80)
62
+ result += "\ufffd\ufffd"
63
+ else
64
+ result += char.chr(Encoding::UTF_8)
65
+ end
66
+
67
+ i += 6
68
+ next
69
+ end
70
+ end
71
+
72
+ if ((b1 & 0xF0) == 0xE0 && (i + 6 < l))
73
+ # 1110xxxx 10xxxxxx 10xxxxxx
74
+ b2 = seq.slice((i + 4)...(i + 6)).to_i(16)
75
+ b3 = seq.slice((i + 7)...(i + 9)).to_i(16)
76
+
77
+ if ((b2 & 0xC0) == 0x80 && (b3 & 0xC0) == 0x80)
78
+ char = ((b1 << 12) & 0xF000) | ((b2 << 6) & 0xFC0) | (b3 & 0x3F)
79
+
80
+ if (char < 0x800 || (char >= 0xD800 && char <= 0xDFFF))
81
+ result += "\ufffd\ufffd\ufffd"
82
+ else
83
+ result += char.chr(Encoding::UTF_8)
84
+ end
85
+
86
+ i += 9
87
+ next
88
+ end
89
+ end
90
+
91
+ if ((b1 & 0xF8) == 0xF0 && (i + 9 < l))
92
+ # 111110xx 10xxxxxx 10xxxxxx 10xxxxxx
93
+ b2 = seq.slice((i + 4)...(i + 6)).to_i(16)
94
+ b3 = seq.slice((i + 7)...(i + 9)).to_i(16)
95
+ b4 = seq.slice((i + 10)...(i + 12)).to_i(16)
96
+
97
+ if ((b2 & 0xC0) == 0x80 && (b3 & 0xC0) == 0x80 && (b4 & 0xC0) == 0x80)
98
+ char = ((b1 << 18) & 0x1C0000) | ((b2 << 12) & 0x3F000) | ((b3 << 6) & 0xFC0) | (b4 & 0x3F)
99
+
100
+ if (char < 0x10000 || char > 0x10FFFF)
101
+ result += "\ufffd\ufffd\ufffd\ufffd"
102
+ else
103
+ # TODO don't know how to handle surrogate pairs properly.
104
+ char -= 0x10000
105
+ result += [0xD800 + (char >> 10), 0xDC00 + (char & 0x3FF)].map{|c| c.chr(Encoding::UTF_8)}.join
106
+
107
+ # high = ((char - 0x10000) / 0x400).floor + 0xD800
108
+ # low = ((char - 0x10000) % 0x400) + 0xDC00
109
+ # result += '\u' + [high, low].map { |x| x.to_s(16) }.join('\u').downcase
110
+ end
111
+
112
+ i += 12
113
+ next
114
+ end
115
+ end
116
+
117
+ result += "\ufffd"
118
+ i += 3
119
+ end
120
+
121
+ result
122
+ end
123
+ end
124
+
125
+ end
126
+ end
127
+
128
+ # https://gist.github.com/kreeger/4480326
129
+ # class Fixnum
130
+ # def to_surrogate_pair
131
+ # if self >= 0x10000 && self <= 0x10FFFF
132
+ # high = ((self - 0x10000) / 0x400).floor + 0xD800
133
+ # low = ((self - 0x10000) % 0x400) + 0xDC00
134
+ # end
135
+ # '\U' + [high, low].map { |x| x.to_s(16) }.join('\U').upcase
136
+ # end
137
+ #
138
+ # end
139
+ #
140
+ # class String
141
+ # def to_hex
142
+ # self.gsub('\U000', '0x').to_i(16)
143
+ # end
144
+ # end
145
+ #
@@ -0,0 +1,100 @@
1
+ module MDUrl
2
+ module Encode
3
+
4
+ DEFAULT_CHARACTERS = ";/?:@&=+$,-_.!~*'()#"
5
+ COMPONENT_CHARACTERS = "-_.!~*'()"
6
+
7
+ @@encodeCache = {}
8
+
9
+
10
+ # Create a lookup array where anything but characters in `chars` string
11
+ # and alphanumeric chars is percent-encoded.
12
+ #------------------------------------------------------------------------------
13
+ def self.getEncodeCache(exclude)
14
+ cache = @@encodeCache[exclude]
15
+ return cache if (cache)
16
+
17
+ cache = @@encodeCache[exclude] = []
18
+
19
+ (0...128).each do |i|
20
+ ch = i.chr
21
+
22
+ if (/^[0-9a-z]$/i =~ ch)
23
+ # always allow unencoded alphanumeric characters
24
+ cache.push(ch)
25
+ else
26
+ cache.push('%' + ('0' + i.to_s(16).upcase).slice(-2, 2))
27
+ end
28
+ end
29
+
30
+ (0...exclude.length).each do |i|
31
+ cache[exclude[i].ord] = exclude[i]
32
+ end
33
+
34
+ return cache
35
+ end
36
+
37
+
38
+ # Encode unsafe characters with percent-encoding, skipping already
39
+ # encoded sequences.
40
+ #
41
+ # - string - string to encode
42
+ # - exclude - list of characters to ignore (in addition to a-zA-Z0-9)
43
+ # - keepEscaped - don't encode '%' in a correct escape sequence (default: true)
44
+ #------------------------------------------------------------------------------
45
+ def self.encode(string, exclude = nil, keepEscaped = nil)
46
+ result = ''
47
+
48
+ if !exclude.is_a? String
49
+ # encode(string, keepEscaped)
50
+ keepEscaped = exclude
51
+ exclude = DEFAULT_CHARACTERS
52
+ end
53
+
54
+ if keepEscaped == nil
55
+ keepEscaped = true
56
+ end
57
+
58
+ cache = getEncodeCache(exclude)
59
+
60
+ i = 0
61
+ l = string.length
62
+ while i < l
63
+ code = string[i].ord
64
+
65
+ if (keepEscaped && code == 0x25 && i + 2 < l) # %
66
+ if (/^[0-9a-f]{2}$/i =~ (string.slice((i + 1)...(i + 3))))
67
+ result += string.slice(i...(i + 3))
68
+ i += 3
69
+ next
70
+ end
71
+ end
72
+
73
+ if (code < 128)
74
+ result += cache[code]
75
+ i += 1
76
+ next
77
+ end
78
+
79
+ if (code >= 0xD800 && code <= 0xDFFF)
80
+ if (code >= 0xD800 && code <= 0xDBFF && i + 1 < l)
81
+ nextCode = string[i + 1].ord
82
+ if (nextCode >= 0xDC00 && nextCode <= 0xDFFF)
83
+ result += CGI::escape(string[i] + string[i + 1])
84
+ i += 2
85
+ next
86
+ end
87
+ end
88
+ result += '%EF%BF%BD'
89
+ i += 1
90
+ next
91
+ end
92
+
93
+ result += CGI::escape(string[i])
94
+ i += 1
95
+ end
96
+
97
+ return result
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,28 @@
1
+ module MDUrl
2
+ module Format
3
+
4
+ #------------------------------------------------------------------------------
5
+ def self.format(url)
6
+ result = ''
7
+
8
+ result += url.protocol || ''
9
+ result += url.slashes ? '//' : ''
10
+ result += url.auth ? url.auth + '@' : ''
11
+
12
+ if (url.hostname && url.hostname.index(':') != nil)
13
+ # ipv6 address
14
+ result += '[' + url.hostname + ']'
15
+ else
16
+ result += url.hostname || ''
17
+ end
18
+
19
+ result += url.port ? ':' + url.port : ''
20
+ result += url.pathname || ''
21
+ result += url.search || ''
22
+ result += url.hash || ''
23
+
24
+ return result
25
+ end
26
+
27
+ end
28
+ end
@@ -0,0 +1,304 @@
1
+ # Copyright Joyent, Inc. and other Node contributors.
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a
4
+ # copy of this software and associated documentation files (the
5
+ # "Software"), to deal in the Software without restriction, including
6
+ # without limitation the rights to use, copy, modify, merge, publish,
7
+ # distribute, sublicense, and/or sell copies of the Software, and to permit
8
+ # persons to whom the Software is furnished to do so, subject to the
9
+ # following conditions:
10
+ #
11
+ # The above copyright notice and this permission notice shall be included
12
+ # in all copies or substantial portions of the Software.
13
+ #
14
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15
+ # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
17
+ # NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
18
+ # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19
+ # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20
+ # USE OR OTHER DEALINGS IN THE SOFTWARE.
21
+
22
+ #
23
+ # Changes from joyent/node:
24
+ #
25
+ # 1. No leading slash in paths,
26
+ # e.g. in `url.parse('http://foo?bar')` pathname is ``, not `/`
27
+ #
28
+ # 2. Backslashes are not replaced with slashes,
29
+ # so `http:\\example.org\` is treated like a relative path
30
+ #
31
+ # 3. Trailing colon is treated like a part of the path,
32
+ # i.e. in `http://example.org:foo` pathname is `:foo`
33
+ #
34
+ # 4. Nothing is URL-encoded in the resulting object,
35
+ # (in joyent/node some chars in auth and paths are encoded)
36
+ #
37
+ # 5. `url.parse()` does not have `parseQueryString` argument
38
+ #
39
+ # 6. Removed extraneous result properties: `host`, `path`, `query`, etc.,
40
+ # which can be constructed using other parts of the url.
41
+ #
42
+
43
+ module MDUrl
44
+ class Url
45
+
46
+ attr_accessor :protocol, :slashes, :hostname, :pathname, :auth, :port, :search, :hash
47
+
48
+ # Reference: RFC 3986, RFC 1808, RFC 2396
49
+
50
+ # define these here so at least they only have to be
51
+ # compiled once on the first module load.
52
+ PROTOCOL_PATTERN = /^([a-z0-9.+-]+:)/i
53
+ PORT_PATTERN = /:[0-9]*$/
54
+
55
+ # Special case for a simple path URL
56
+ SIMPLE_PATH_PATTERN = /^(\/\/?(?!\/)[^\?\s]*)(\?[^\s]*)?$/
57
+
58
+ # RFC 2396: characters reserved for delimiting URLs.
59
+ # We actually just auto-escape these.
60
+ DELIMS = [ '<', '>', '"', '`', ' ', '\r', '\n', '\t' ]
61
+
62
+ # RFC 2396: characters not allowed for various reasons.
63
+ UNWISE = [ '{', '}', '|', '\\', '^', '`' ].concat(DELIMS)
64
+
65
+ # Allowed by RFCs, but cause of XSS attacks. Always escape these.
66
+ AUTO_ESCAPE = [ '\'' ].concat(UNWISE)
67
+ # Characters that are never ever allowed in a hostname.
68
+ # Note that any invalid chars are also handled, but these
69
+ # are the ones that are *expected* to be seen, so we fast-path
70
+ # them.
71
+ NON_HOST_CHARS = [ '%', '/', '?', ';', '#' ].concat(AUTO_ESCAPE)
72
+ HOST_ENDING_CHARS = [ '/', '?', '#' ]
73
+ HOSTNAME_MAX_LEN = 255
74
+ HOSTNAME_PART_PATTERN = /^[+a-z0-9A-Z_-]{0,63}$/
75
+ HOSTNAME_PART_START = /^([+a-z0-9A-Z_-]{0,63})(.*)$/
76
+ # protocols that can allow "unsafe" and "unwise" chars.
77
+ # protocols that never have a hostname.
78
+ HOSTLESS_PROTOCOL = {
79
+ 'javascript' => true,
80
+ 'javascript:' => true
81
+ }
82
+ # protocols that always contain a # bit.
83
+ SLASHED_PROTOCOL = {
84
+ 'http' => true,
85
+ 'https' => true,
86
+ 'ftp' => true,
87
+ 'gopher' => true,
88
+ 'file' => true,
89
+ 'http:' => true,
90
+ 'https:' => true,
91
+ 'ftp:' => true,
92
+ 'gopher:' => true,
93
+ 'file:' => true
94
+ }
95
+
96
+ #------------------------------------------------------------------------------
97
+ def self.urlParse(url, slashesDenoteHost = false)
98
+ return url if (url && url.is_a?(Url))
99
+
100
+ u = Url.new
101
+ u.parse(url, slashesDenoteHost)
102
+ return u
103
+ end
104
+
105
+ #------------------------------------------------------------------------------
106
+ def parse(url, slashesDenoteHost = false)
107
+ rest = url
108
+
109
+ # trim before proceeding.
110
+ # This is to support parse stuff like " http://foo.com \n"
111
+ rest = rest.strip
112
+
113
+ if (!slashesDenoteHost && url.split('#').length == 1)
114
+ # Try fast path regexp
115
+ simplePath = SIMPLE_PATH_PATTERN.match(rest)
116
+ if (simplePath)
117
+ @pathname = simplePath[1]
118
+ if (simplePath[2])
119
+ @search = simplePath[2]
120
+ end
121
+ return self
122
+ end
123
+ end
124
+
125
+ proto = PROTOCOL_PATTERN.match(rest)
126
+ if (proto)
127
+ proto = proto[0]
128
+ lowerProto = proto.downcase
129
+ @protocol = proto
130
+ rest = rest[proto.length..-1]
131
+ end
132
+
133
+ # figure out if it's got a host
134
+ # user@server is *always* interpreted as a hostname, and url
135
+ # resolution will treat //foo/bar as host=foo,path=bar because that's
136
+ # how the browser resolves relative URLs.
137
+ if (slashesDenoteHost || proto || rest.match(/^\/\/[^@\/]+@[^@\/]+/))
138
+ slashes = rest[0...2] == '//'
139
+ if (slashes && !(proto && HOSTLESS_PROTOCOL[proto]))
140
+ rest = rest[2..-1]
141
+ @slashes = true
142
+ end
143
+ end
144
+
145
+ if (!HOSTLESS_PROTOCOL[proto] &&
146
+ (slashes || (proto && !SLASHED_PROTOCOL[proto])))
147
+
148
+ # there's a hostname.
149
+ # the first instance of /, ?, ;, or # ends the host.
150
+ #
151
+ # If there is an @ in the hostname, then non-host chars *are* allowed
152
+ # to the left of the last @ sign, unless some host-ending character
153
+ # comes *before* the @-sign.
154
+ # URLs are obnoxious.
155
+ #
156
+ # ex:
157
+ # http://a@b@c/ => user:a@b host:c
158
+ # http://a@b?@c => user:a host:c path:/?@c
159
+
160
+ # v0.12 TODO(isaacs): This is not quite how Chrome does things.
161
+ # Review our test case against browsers more comprehensively.
162
+
163
+ # find the first instance of any HOST_ENDING_CHARS
164
+ hostEnd = -1
165
+ (0...HOST_ENDING_CHARS.length).each do |i|
166
+ hec = rest.index(HOST_ENDING_CHARS[i])
167
+ if (hec != nil && (hostEnd == -1 || hec < hostEnd))
168
+ hostEnd = hec
169
+ end
170
+ end
171
+
172
+ # at this point, either we have an explicit point where the
173
+ # auth portion cannot go past, or the last @ char is the decider.
174
+ if (hostEnd == -1)
175
+ # atSign can be anywhere.
176
+ atSign = rest.rindex('@')
177
+ else
178
+ # atSign must be in auth portion.
179
+ # http://a@b/c@d => host:b auth:a path:/c@d
180
+ # atSign = rest.lastIndexOf('@', hostEnd);
181
+ atSign = rest[0..hostEnd].rindex('@')
182
+ end
183
+
184
+ # Now we have a portion which is definitely the auth.
185
+ # Pull that off.
186
+ if (atSign != nil)
187
+ auth = rest.slice(0...atSign)
188
+ rest = rest.slice((atSign + 1)..-1)
189
+ @auth = auth
190
+ end
191
+
192
+ # the host is the remaining to the left of the first non-host char
193
+ hostEnd = -1
194
+ (0...NON_HOST_CHARS.length).each do |i|
195
+ hec = rest.index(NON_HOST_CHARS[i])
196
+ if (hec != nil && (hostEnd == -1 || hec < hostEnd))
197
+ hostEnd = hec
198
+ end
199
+ end
200
+ # if we still have not hit it, then the entire thing is a host.
201
+ if (hostEnd === -1)
202
+ hostEnd = rest.length
203
+ end
204
+
205
+ hostEnd -= 1 if (rest[hostEnd - 1] == ':')
206
+ host = rest.slice(0...hostEnd)
207
+ rest = rest.slice(hostEnd..-1)
208
+
209
+ # pull out port.
210
+ self.parseHost(host)
211
+
212
+ # we've indicated that there is a hostname,
213
+ # so even if it's empty, it has to be present.
214
+ @hostname = @hostname || ''
215
+
216
+ # if hostname begins with [ and ends with ]
217
+ # assume that it's an IPv6 address.
218
+ ipv6Hostname = @hostname[0] == '[' &&
219
+ @hostname[@hostname.length - 1] == ']'
220
+
221
+ # validate a little.
222
+ if (!ipv6Hostname)
223
+ hostparts = @hostname.split(/\./)
224
+ (0...hostparts.length).each do |i|
225
+ part = hostparts[i]
226
+ next if (!part)
227
+ if (!part.match(HOSTNAME_PART_PATTERN))
228
+ newpart = ''
229
+ (0...part.length).each do |j|
230
+ if (part[j].ord > 127)
231
+ # we replace non-ASCII char with a temporary placeholder
232
+ # we need this to make sure size of hostname is not
233
+ # broken by replacing non-ASCII by nothing
234
+ newpart += 'x'
235
+ else
236
+ newpart += part[j]
237
+ end
238
+ end
239
+ # we test again with ASCII char only
240
+ if (!newpart.match(HOSTNAME_PART_PATTERN))
241
+ validParts = hostparts.slice(0...i)
242
+ notHost = hostparts.slice((i + 1)..-1)
243
+ bit = part.match(HOSTNAME_PART_START)
244
+ if (bit)
245
+ validParts.push(bit[1])
246
+ notHost.unshift(bit[2])
247
+ end
248
+ if (notHost.length)
249
+ rest = notHost.join('.') + rest
250
+ end
251
+ @hostname = validParts.join('.')
252
+ break
253
+ end
254
+ end
255
+ end
256
+ end
257
+
258
+ if (@hostname.length > HOSTNAME_MAX_LEN)
259
+ @hostname = ''
260
+ end
261
+
262
+ # strip [ and ] from the hostname
263
+ # the host field still retains them, though
264
+ if (ipv6Hostname)
265
+ @hostname = @hostname[1, @hostname.length - 2]
266
+ end
267
+ end
268
+
269
+ # chop off from the tail first.
270
+ hash = rest.index('#')
271
+ if (hash != nil)
272
+ # got a fragment string.
273
+ @hash = rest.slice(hash..-1)
274
+ rest = rest.slice(0...hash)
275
+ end
276
+ qm = rest.index('?')
277
+ if (qm != nil)
278
+ @search = rest.slice(qm..-1)
279
+ rest = rest.slice(0...qm)
280
+ end
281
+ @pathname = rest if !rest.nil? && rest != ''
282
+ if (SLASHED_PROTOCOL[lowerProto] &&
283
+ @hostname && !@pathname)
284
+ @pathname = ''
285
+ end
286
+
287
+ return self
288
+ end
289
+
290
+ #------------------------------------------------------------------------------
291
+ def parseHost(host)
292
+ port = PORT_PATTERN.match(host)
293
+ if (port)
294
+ port = port[0]
295
+ if (port != ':')
296
+ @port = port.slice(1..-1)
297
+ end
298
+ host = host[0, host.length - port.length]
299
+ end
300
+ @hostname = host if (host)
301
+ end
302
+
303
+ end
304
+ end
@@ -0,0 +1,5 @@
1
+ module MDUrl
2
+
3
+ VERSION = '1.0.0.1'
4
+
5
+ end