mdurl-rb 1.0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +104 -0
- data/lib/mdurl-rb/decode.rb +145 -0
- data/lib/mdurl-rb/encode.rb +100 -0
- data/lib/mdurl-rb/format.rb +28 -0
- data/lib/mdurl-rb/parse.rb +304 -0
- data/lib/mdurl-rb/version.rb +5 -0
- data/lib/mdurl-rb.rb +18 -0
- data/spec/mdurl-rb/decode_spec.rb +112 -0
- data/spec/mdurl-rb/encode_spec.rb +74 -0
- data/spec/mdurl-rb/fixtures/url_spec.rb +704 -0
- data/spec/mdurl-rb/format_spec.rb +9 -0
- data/spec/mdurl-rb/parse_spec.rb +15 -0
- data/spec/spec_helper.rb +4 -0
- metadata +78 -0
data/lib/mdurl-rb.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
if defined?(Motion::Project::Config)
|
4
|
+
|
5
|
+
lib_dir_path = File.dirname(File.expand_path(__FILE__))
|
6
|
+
Motion::Project::App.setup do |app|
|
7
|
+
app.files.unshift(Dir.glob(File.join(lib_dir_path, "mdurl-rb/**/*.rb")))
|
8
|
+
end
|
9
|
+
|
10
|
+
require 'motion-support'
|
11
|
+
else
|
12
|
+
|
13
|
+
require 'mdurl-rb/parse'
|
14
|
+
require 'mdurl-rb/format'
|
15
|
+
require 'mdurl-rb/encode'
|
16
|
+
require 'mdurl-rb/decode'
|
17
|
+
|
18
|
+
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
def encodeBinary(str)
|
2
|
+
result = ''
|
3
|
+
|
4
|
+
str = str.gsub(/\s+/, '')
|
5
|
+
while (str.length > 0)
|
6
|
+
result = '%' + ('0' + str.slice(-8..-1).to_i(2).to_s(16)).slice(-2, 2) + result
|
7
|
+
str = str.slice(0...-8)
|
8
|
+
end
|
9
|
+
|
10
|
+
return result
|
11
|
+
end
|
12
|
+
|
13
|
+
samples = {
|
14
|
+
'00000000' => true,
|
15
|
+
'01010101' => true,
|
16
|
+
'01111111' => true,
|
17
|
+
|
18
|
+
# invalid as 1st byte
|
19
|
+
'10000000' => false,
|
20
|
+
'10111111' => false,
|
21
|
+
|
22
|
+
# invalid sequences, 2nd byte should be >= 0x80
|
23
|
+
'11000111 01010101' => false,
|
24
|
+
'11100011 01010101' => false,
|
25
|
+
'11110001 01010101' => false,
|
26
|
+
|
27
|
+
# invalid sequences, 2nd byte should be < 0xc0
|
28
|
+
'11000111 11000000' => false,
|
29
|
+
'11100011 11000000' => false,
|
30
|
+
'11110001 11000000' => false,
|
31
|
+
|
32
|
+
# invalid 3rd byte
|
33
|
+
'11100011 10010101 01010101' => false,
|
34
|
+
'11110001 10010101 01010101' => false,
|
35
|
+
|
36
|
+
# invalid 4th byte
|
37
|
+
'11110001 10010101 10010101 01010101' => false,
|
38
|
+
|
39
|
+
# valid sequences
|
40
|
+
'11000111 10101010' => true,
|
41
|
+
'11100011 10101010 10101010' => true,
|
42
|
+
# '11110001 10101010 10101010 10101010' => true, # TODO don't know how to handle surrogate pairs
|
43
|
+
|
44
|
+
# minimal chars with given length
|
45
|
+
'11000010 10000000' => true,
|
46
|
+
'11100000 10100000 10000000' => true,
|
47
|
+
|
48
|
+
# impossible sequences
|
49
|
+
'11000001 10111111' => false,
|
50
|
+
'11100000 10011111 10111111' => false,
|
51
|
+
'11000001 10000000' => false,
|
52
|
+
'11100000 10010000 10000000' => false,
|
53
|
+
|
54
|
+
# maximum chars with given length
|
55
|
+
'11011111 10111111' => true,
|
56
|
+
'11101111 10111111 10111111' => true,
|
57
|
+
|
58
|
+
# '11110000 10010000 10000000 10000000' => true, # TODO don't know how to handle surrogate pairs
|
59
|
+
# '11110000 10010000 10001111 10001111' => true, # TODO don't know how to handle surrogate pairs
|
60
|
+
# '11110100 10001111 10110000 10000000' => true, # TODO don't know how to handle surrogate pairs
|
61
|
+
# '11110100 10001111 10111111 10111111' => true, # TODO don't know how to handle surrogate pairs
|
62
|
+
|
63
|
+
# too low
|
64
|
+
'11110000 10001111 10111111 10111111' => false,
|
65
|
+
|
66
|
+
# too high
|
67
|
+
'11110100 10010000 10000000 10000000' => false,
|
68
|
+
'11110100 10011111 10111111 10111111' => false,
|
69
|
+
|
70
|
+
# surrogate range
|
71
|
+
'11101101 10011111 10111111' => true,
|
72
|
+
'11101101 10100000 10000000' => false,
|
73
|
+
'11101101 10111111 10111111' => false,
|
74
|
+
'11101110 10000000 10000000' => true
|
75
|
+
}
|
76
|
+
|
77
|
+
describe 'decode' do
|
78
|
+
it 'should decode %xx' do
|
79
|
+
expect(MDUrl::Decode.decode('x%20xx%20%2520')).to eq 'x xx %20'
|
80
|
+
end
|
81
|
+
|
82
|
+
it 'should not decode invalid sequences' do
|
83
|
+
expect(MDUrl::Decode.decode('%2g%z1%%')).to eq '%2g%z1%%'
|
84
|
+
end
|
85
|
+
|
86
|
+
it 'should not decode reservedSet' do
|
87
|
+
expect(MDUrl::Decode.decode('%20%25%20', '%')).to eq ' %25 '
|
88
|
+
expect(MDUrl::Decode.decode('%20%25%20', ' ')).to eq '%20%%20'
|
89
|
+
expect(MDUrl::Decode.decode('%20%25%20', ' %')).to eq '%20%25%20'
|
90
|
+
end
|
91
|
+
|
92
|
+
describe 'utf8' do
|
93
|
+
samples.each_pair do |k, v|
|
94
|
+
|
95
|
+
it "#{k}" do
|
96
|
+
er = nil
|
97
|
+
str = encodeBinary(k)
|
98
|
+
|
99
|
+
if v == true
|
100
|
+
res1 = CGI::unescape(str, Encoding::UTF_8)
|
101
|
+
res2 = MDUrl::Decode.decode(str)
|
102
|
+
expect(res1).to eq res2
|
103
|
+
expect(res2.index("\ufffd")).to eq nil
|
104
|
+
else
|
105
|
+
res2 = MDUrl::Decode.decode(str)
|
106
|
+
expect(res2.index("\ufffd")).not_to eq nil
|
107
|
+
end
|
108
|
+
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
describe 'encode' do
|
2
|
+
|
3
|
+
it 'should encode percent' do
|
4
|
+
expect(MDUrl::Encode.encode("%%%")).to eq '%25%25%25'
|
5
|
+
end
|
6
|
+
|
7
|
+
it 'should encode control chars' do
|
8
|
+
expect(MDUrl::Encode.encode("\r\n")).to eq '%0D%0A'
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'should not encode parts of an url' do
|
12
|
+
expect(MDUrl::Encode.encode('?#')).to eq '?#'
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'should not encode []^ - commonmark tests' do
|
16
|
+
expect(MDUrl::Encode.encode('[]^')).to eq '%5B%5D%5E'
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'should encode spaces' do
|
20
|
+
expect(MDUrl::Encode.encode('my url')).to eq 'my%20url'
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'should encode unicode' do
|
24
|
+
expect(MDUrl::Encode.encode('φου')).to eq '%CF%86%CE%BF%CF%85'
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'should encode % if it doesn\'t start a valid escape seq' do
|
28
|
+
expect(MDUrl::Encode.encode('%FG')).to eq '%25FG'
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'should preserve non-utf8 encoded characters' do
|
32
|
+
expect(MDUrl::Encode.encode('%00%FF')).to eq '%00%FF'
|
33
|
+
end
|
34
|
+
|
35
|
+
# it 'should encode characters on the cache borders' do
|
36
|
+
# # protects against off-by-one in cache implementation
|
37
|
+
# expect(MDUrl::Encode.encode("\x00\x7F\x80")).to eq '%00%7F%C2%80'
|
38
|
+
# end
|
39
|
+
|
40
|
+
describe 'arguments' do
|
41
|
+
it 'encode(string, unescapedSet)' do
|
42
|
+
expect(MDUrl::Encode.encode('!@#$', '@$')).to eq '%21@%23$'
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'encode(string, keepEscaped=true)' do
|
46
|
+
expect(MDUrl::Encode.encode('%20%2G', true)).to eq '%20%252G'
|
47
|
+
end
|
48
|
+
|
49
|
+
it 'encode(string, keepEscaped=false)' do
|
50
|
+
expect(MDUrl::Encode.encode('%20%2G', false)).to eq '%2520%252G'
|
51
|
+
end
|
52
|
+
|
53
|
+
it 'encode(string, unescapedSet, keepEscaped)' do
|
54
|
+
expect(MDUrl::Encode.encode('!@%25', '@', false)).to eq '%21@%2525'
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
# TODO don't know how to fix utf8 issue yet
|
59
|
+
# describe 'surrogates' do
|
60
|
+
# it 'bad surrogates (high)' do
|
61
|
+
# expect(MDUrl::Encode.encode("\uD800foo")).to eq '%EF%BF%BDfoo'
|
62
|
+
# expect(MDUrl::Encode.encode("foo\uD800")).to eq 'foo%EF%BF%BD'
|
63
|
+
# end
|
64
|
+
#
|
65
|
+
# it 'bad surrogates (low)' do
|
66
|
+
# expect(MDUrl::Encode.encode("\uDD00foo")).to eq '%EF%BF%BDfoo'
|
67
|
+
# expect(MDUrl::Encode.encode("foo\uDD00")).to eq 'foo%EF%BF%BD'
|
68
|
+
# end
|
69
|
+
#
|
70
|
+
# it 'valid one' do
|
71
|
+
# expect(MDUrl::Encode.encode("\uD800\uDD00")).to eq '%F0%90%84%80'
|
72
|
+
# end
|
73
|
+
# end
|
74
|
+
end
|