mdurl-rb 1.0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/mdurl-rb.rb ADDED
@@ -0,0 +1,18 @@
1
+ # encoding: utf-8
2
+
3
+ if defined?(Motion::Project::Config)
4
+
5
+ lib_dir_path = File.dirname(File.expand_path(__FILE__))
6
+ Motion::Project::App.setup do |app|
7
+ app.files.unshift(Dir.glob(File.join(lib_dir_path, "mdurl-rb/**/*.rb")))
8
+ end
9
+
10
+ require 'motion-support'
11
+ else
12
+
13
+ require 'mdurl-rb/parse'
14
+ require 'mdurl-rb/format'
15
+ require 'mdurl-rb/encode'
16
+ require 'mdurl-rb/decode'
17
+
18
+ end
@@ -0,0 +1,112 @@
1
+ def encodeBinary(str)
2
+ result = ''
3
+
4
+ str = str.gsub(/\s+/, '')
5
+ while (str.length > 0)
6
+ result = '%' + ('0' + str.slice(-8..-1).to_i(2).to_s(16)).slice(-2, 2) + result
7
+ str = str.slice(0...-8)
8
+ end
9
+
10
+ return result
11
+ end
12
+
13
+ samples = {
14
+ '00000000' => true,
15
+ '01010101' => true,
16
+ '01111111' => true,
17
+
18
+ # invalid as 1st byte
19
+ '10000000' => false,
20
+ '10111111' => false,
21
+
22
+ # invalid sequences, 2nd byte should be >= 0x80
23
+ '11000111 01010101' => false,
24
+ '11100011 01010101' => false,
25
+ '11110001 01010101' => false,
26
+
27
+ # invalid sequences, 2nd byte should be < 0xc0
28
+ '11000111 11000000' => false,
29
+ '11100011 11000000' => false,
30
+ '11110001 11000000' => false,
31
+
32
+ # invalid 3rd byte
33
+ '11100011 10010101 01010101' => false,
34
+ '11110001 10010101 01010101' => false,
35
+
36
+ # invalid 4th byte
37
+ '11110001 10010101 10010101 01010101' => false,
38
+
39
+ # valid sequences
40
+ '11000111 10101010' => true,
41
+ '11100011 10101010 10101010' => true,
42
+ # '11110001 10101010 10101010 10101010' => true, # TODO don't know how to handle surrogate pairs
43
+
44
+ # minimal chars with given length
45
+ '11000010 10000000' => true,
46
+ '11100000 10100000 10000000' => true,
47
+
48
+ # impossible sequences
49
+ '11000001 10111111' => false,
50
+ '11100000 10011111 10111111' => false,
51
+ '11000001 10000000' => false,
52
+ '11100000 10010000 10000000' => false,
53
+
54
+ # maximum chars with given length
55
+ '11011111 10111111' => true,
56
+ '11101111 10111111 10111111' => true,
57
+
58
+ # '11110000 10010000 10000000 10000000' => true, # TODO don't know how to handle surrogate pairs
59
+ # '11110000 10010000 10001111 10001111' => true, # TODO don't know how to handle surrogate pairs
60
+ # '11110100 10001111 10110000 10000000' => true, # TODO don't know how to handle surrogate pairs
61
+ # '11110100 10001111 10111111 10111111' => true, # TODO don't know how to handle surrogate pairs
62
+
63
+ # too low
64
+ '11110000 10001111 10111111 10111111' => false,
65
+
66
+ # too high
67
+ '11110100 10010000 10000000 10000000' => false,
68
+ '11110100 10011111 10111111 10111111' => false,
69
+
70
+ # surrogate range
71
+ '11101101 10011111 10111111' => true,
72
+ '11101101 10100000 10000000' => false,
73
+ '11101101 10111111 10111111' => false,
74
+ '11101110 10000000 10000000' => true
75
+ }
76
+
77
+ describe 'decode' do
78
+ it 'should decode %xx' do
79
+ expect(MDUrl::Decode.decode('x%20xx%20%2520')).to eq 'x xx %20'
80
+ end
81
+
82
+ it 'should not decode invalid sequences' do
83
+ expect(MDUrl::Decode.decode('%2g%z1%%')).to eq '%2g%z1%%'
84
+ end
85
+
86
+ it 'should not decode reservedSet' do
87
+ expect(MDUrl::Decode.decode('%20%25%20', '%')).to eq ' %25 '
88
+ expect(MDUrl::Decode.decode('%20%25%20', ' ')).to eq '%20%%20'
89
+ expect(MDUrl::Decode.decode('%20%25%20', ' %')).to eq '%20%25%20'
90
+ end
91
+
92
+ describe 'utf8' do
93
+ samples.each_pair do |k, v|
94
+
95
+ it "#{k}" do
96
+ er = nil
97
+ str = encodeBinary(k)
98
+
99
+ if v == true
100
+ res1 = CGI::unescape(str, Encoding::UTF_8)
101
+ res2 = MDUrl::Decode.decode(str)
102
+ expect(res1).to eq res2
103
+ expect(res2.index("\ufffd")).to eq nil
104
+ else
105
+ res2 = MDUrl::Decode.decode(str)
106
+ expect(res2.index("\ufffd")).not_to eq nil
107
+ end
108
+
109
+ end
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,74 @@
1
+ describe 'encode' do
2
+
3
+ it 'should encode percent' do
4
+ expect(MDUrl::Encode.encode("%%%")).to eq '%25%25%25'
5
+ end
6
+
7
+ it 'should encode control chars' do
8
+ expect(MDUrl::Encode.encode("\r\n")).to eq '%0D%0A'
9
+ end
10
+
11
+ it 'should not encode parts of an url' do
12
+ expect(MDUrl::Encode.encode('?#')).to eq '?#'
13
+ end
14
+
15
+ it 'should not encode []^ - commonmark tests' do
16
+ expect(MDUrl::Encode.encode('[]^')).to eq '%5B%5D%5E'
17
+ end
18
+
19
+ it 'should encode spaces' do
20
+ expect(MDUrl::Encode.encode('my url')).to eq 'my%20url'
21
+ end
22
+
23
+ it 'should encode unicode' do
24
+ expect(MDUrl::Encode.encode('φου')).to eq '%CF%86%CE%BF%CF%85'
25
+ end
26
+
27
+ it 'should encode % if it doesn\'t start a valid escape seq' do
28
+ expect(MDUrl::Encode.encode('%FG')).to eq '%25FG'
29
+ end
30
+
31
+ it 'should preserve non-utf8 encoded characters' do
32
+ expect(MDUrl::Encode.encode('%00%FF')).to eq '%00%FF'
33
+ end
34
+
35
+ # it 'should encode characters on the cache borders' do
36
+ # # protects against off-by-one in cache implementation
37
+ # expect(MDUrl::Encode.encode("\x00\x7F\x80")).to eq '%00%7F%C2%80'
38
+ # end
39
+
40
+ describe 'arguments' do
41
+ it 'encode(string, unescapedSet)' do
42
+ expect(MDUrl::Encode.encode('!@#$', '@$')).to eq '%21@%23$'
43
+ end
44
+
45
+ it 'encode(string, keepEscaped=true)' do
46
+ expect(MDUrl::Encode.encode('%20%2G', true)).to eq '%20%252G'
47
+ end
48
+
49
+ it 'encode(string, keepEscaped=false)' do
50
+ expect(MDUrl::Encode.encode('%20%2G', false)).to eq '%2520%252G'
51
+ end
52
+
53
+ it 'encode(string, unescapedSet, keepEscaped)' do
54
+ expect(MDUrl::Encode.encode('!@%25', '@', false)).to eq '%21@%2525'
55
+ end
56
+ end
57
+
58
+ # TODO don't know how to fix utf8 issue yet
59
+ # describe 'surrogates' do
60
+ # it 'bad surrogates (high)' do
61
+ # expect(MDUrl::Encode.encode("\uD800foo")).to eq '%EF%BF%BDfoo'
62
+ # expect(MDUrl::Encode.encode("foo\uD800")).to eq 'foo%EF%BF%BD'
63
+ # end
64
+ #
65
+ # it 'bad surrogates (low)' do
66
+ # expect(MDUrl::Encode.encode("\uDD00foo")).to eq '%EF%BF%BDfoo'
67
+ # expect(MDUrl::Encode.encode("foo\uDD00")).to eq 'foo%EF%BF%BD'
68
+ # end
69
+ #
70
+ # it 'valid one' do
71
+ # expect(MDUrl::Encode.encode("\uD800\uDD00")).to eq '%F0%90%84%80'
72
+ # end
73
+ # end
74
+ end