escape_utils 0.2.4 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +2 -1
- data/.travis.yml +13 -0
- data/CHANGELOG.md +7 -0
- data/MIT-LICENSE +1 -1
- data/Rakefile +5 -18
- data/benchmark/html_escape.rb +9 -2
- data/benchmark/xml_escape.rb +29 -0
- data/escape_utils.gemspec +2 -3
- data/ext/escape_utils/buffer.c +181 -160
- data/ext/escape_utils/buffer.h +90 -68
- data/ext/escape_utils/escape_utils.c +77 -39
- data/ext/escape_utils/extconf.rb +1 -1
- data/ext/escape_utils/houdini.h +37 -8
- data/ext/escape_utils/houdini_href_e.c +115 -0
- data/ext/escape_utils/houdini_html_e.c +90 -0
- data/ext/escape_utils/houdini_html_u.c +122 -0
- data/ext/escape_utils/{houdini_js.c → houdini_js_e.c} +17 -75
- data/ext/escape_utils/houdini_js_u.c +60 -0
- data/ext/escape_utils/{uri_escape.h → houdini_uri_e.c} +68 -2
- data/ext/escape_utils/houdini_uri_u.c +65 -0
- data/ext/escape_utils/houdini_xml_e.c +136 -0
- data/lib/escape_utils/version.rb +1 -1
- data/lib/escape_utils/xml/builder.rb +8 -0
- data/test/helper.rb +14 -0
- data/test/html/escape_test.rb +61 -0
- data/test/html/unescape_test.rb +48 -0
- data/test/html_safety_test.rb +46 -0
- data/test/javascript/escape_test.rb +42 -0
- data/test/javascript/unescape_test.rb +46 -0
- data/test/query/escape_test.rb +50 -0
- data/test/query/unescape_test.rb +52 -0
- data/test/uri/escape_test.rb +50 -0
- data/test/uri/unescape_test.rb +55 -0
- data/test/url/escape_test.rb +58 -0
- data/test/url/unescape_test.rb +60 -0
- data/test/xml/escape_test.rb +67 -0
- metadata +136 -152
- data/.rspec +0 -2
- data/ext/escape_utils/houdini_html.c +0 -214
- data/ext/escape_utils/houdini_uri.c +0 -130
- data/spec/html/escape_spec.rb +0 -42
- data/spec/html/unescape_spec.rb +0 -37
- data/spec/html_safety_spec.rb +0 -48
- data/spec/javascript/escape_spec.rb +0 -34
- data/spec/javascript/unescape_spec.rb +0 -37
- data/spec/query/escape_spec.rb +0 -44
- data/spec/query/unescape_spec.rb +0 -46
- data/spec/rcov.opts +0 -3
- data/spec/spec_helper.rb +0 -5
- data/spec/uri/escape_spec.rb +0 -43
- data/spec/uri/unescape_spec.rb +0 -57
- data/spec/url/escape_spec.rb +0 -52
- data/spec/url/unescape_spec.rb +0 -57
@@ -0,0 +1,67 @@
|
|
1
|
+
require File.expand_path("../../helper", __FILE__)
|
2
|
+
|
3
|
+
class XmlEscapeTest < MiniTest::Unit::TestCase
|
4
|
+
def test_basic_xml
|
5
|
+
assert_equal "<some_tag/>", EscapeUtils.escape_xml("<some_tag/>")
|
6
|
+
end
|
7
|
+
|
8
|
+
def test_double_quotes
|
9
|
+
assert_equal "<some_tag some_attr="some value"/>", EscapeUtils.escape_xml("<some_tag some_attr=\"some value\"/>")
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_single_quotes
|
13
|
+
assert_equal "<some_tag some_attr='some value'/>", EscapeUtils.escape_xml("<some_tag some_attr='some value'/>")
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_ampersand
|
17
|
+
assert_equal "<b>Bourbon & Branch</b>", EscapeUtils.escape_xml("<b>Bourbon & Branch</b>")
|
18
|
+
end
|
19
|
+
|
20
|
+
# See http://www.w3.org/TR/REC-xml/#charsets for details.
|
21
|
+
VALID = [
|
22
|
+
(0x9..0xA), 0xD,
|
23
|
+
(0x20..0xD7FF),
|
24
|
+
(0xE000..0xFFFD),
|
25
|
+
(0x10000..0x10FFFF)
|
26
|
+
]
|
27
|
+
|
28
|
+
REPLACEMENT_CHAR = "?".unpack('U*').first
|
29
|
+
|
30
|
+
def test_invalid_characters
|
31
|
+
VALID.each do |range|
|
32
|
+
if range.kind_of? Range
|
33
|
+
start = range.begin
|
34
|
+
last = range.end
|
35
|
+
last -= 1 if range.exclude_end?
|
36
|
+
else
|
37
|
+
start = last = range
|
38
|
+
end
|
39
|
+
input = [start.pred, start, last, last.next].pack('U*')
|
40
|
+
expect = [REPLACEMENT_CHAR, start, last, REPLACEMENT_CHAR].pack('U*')
|
41
|
+
assert_equal expect, EscapeUtils.escape_xml(input)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
if RUBY_VERSION =~ /^1.9/
|
46
|
+
def test_input_must_be_utf8_or_ascii
|
47
|
+
str = "<some_tag/>"
|
48
|
+
|
49
|
+
str.force_encoding 'ISO-8859-1'
|
50
|
+
assert_raises Encoding::CompatibilityError do
|
51
|
+
EscapeUtils.escape_xml(str)
|
52
|
+
end
|
53
|
+
|
54
|
+
str.force_encoding 'UTF-8'
|
55
|
+
begin
|
56
|
+
EscapeUtils.escape_xml(str)
|
57
|
+
rescue Encoding::CompatibilityError => e
|
58
|
+
assert_nil e, "#{e.class.name} raised, expected not to"
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def test_return_value_is_tagged_as_utf8
|
63
|
+
str = "<some_tag/>"
|
64
|
+
assert_equal Encoding.find('UTF-8'), EscapeUtils.escape_url(str).encoding
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
metadata
CHANGED
@@ -1,136 +1,137 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: escape_utils
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.3.0
|
5
5
|
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 2
|
9
|
-
- 4
|
10
|
-
version: 0.2.4
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- Brian Lopez
|
14
9
|
autorequire:
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
dependencies:
|
21
|
-
- !ruby/object:Gem::Dependency
|
12
|
+
date: 2013-02-26 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
22
15
|
name: rake-compiler
|
23
|
-
|
24
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
25
17
|
none: false
|
26
|
-
requirements:
|
27
|
-
- -
|
28
|
-
- !ruby/object:Gem::Version
|
29
|
-
hash: 9
|
30
|
-
segments:
|
31
|
-
- 0
|
32
|
-
- 7
|
33
|
-
- 5
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
34
21
|
version: 0.7.5
|
35
22
|
type: :development
|
36
|
-
version_requirements: *id001
|
37
|
-
- !ruby/object:Gem::Dependency
|
38
|
-
name: rspec
|
39
23
|
prerelease: false
|
40
|
-
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 0.7.5
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: minitest
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
41
33
|
none: false
|
42
|
-
requirements:
|
43
|
-
- -
|
44
|
-
- !ruby/object:Gem::Version
|
45
|
-
|
46
|
-
segments:
|
47
|
-
- 2
|
48
|
-
- 0
|
49
|
-
- 0
|
50
|
-
version: 2.0.0
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
51
38
|
type: :development
|
52
|
-
version_requirements: *id002
|
53
|
-
- !ruby/object:Gem::Dependency
|
54
|
-
name: rack
|
55
39
|
prerelease: false
|
56
|
-
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: rack
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
57
49
|
none: false
|
58
|
-
requirements:
|
59
|
-
- -
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
|
62
|
-
segments:
|
63
|
-
- 0
|
64
|
-
version: "0"
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
65
54
|
type: :development
|
66
|
-
version_requirements: *id003
|
67
|
-
- !ruby/object:Gem::Dependency
|
68
|
-
name: haml
|
69
55
|
prerelease: false
|
70
|
-
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
71
57
|
none: false
|
72
|
-
requirements:
|
73
|
-
- -
|
74
|
-
- !ruby/object:Gem::Version
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: haml
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
79
70
|
type: :development
|
80
|
-
version_requirements: *id004
|
81
|
-
- !ruby/object:Gem::Dependency
|
82
|
-
name: fast_xs
|
83
71
|
prerelease: false
|
84
|
-
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
85
73
|
none: false
|
86
|
-
requirements:
|
87
|
-
- -
|
88
|
-
- !ruby/object:Gem::Version
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: fast_xs
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ! '>='
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0'
|
93
86
|
type: :development
|
94
|
-
version_requirements: *id005
|
95
|
-
- !ruby/object:Gem::Dependency
|
96
|
-
name: actionpack
|
97
87
|
prerelease: false
|
98
|
-
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ! '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
94
|
+
- !ruby/object:Gem::Dependency
|
95
|
+
name: actionpack
|
96
|
+
requirement: !ruby/object:Gem::Requirement
|
99
97
|
none: false
|
100
|
-
requirements:
|
101
|
-
- -
|
102
|
-
- !ruby/object:Gem::Version
|
103
|
-
|
104
|
-
segments:
|
105
|
-
- 0
|
106
|
-
version: "0"
|
98
|
+
requirements:
|
99
|
+
- - ! '>='
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
107
102
|
type: :development
|
108
|
-
version_requirements: *id006
|
109
|
-
- !ruby/object:Gem::Dependency
|
110
|
-
name: url_escape
|
111
103
|
prerelease: false
|
112
|
-
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ! '>='
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0'
|
110
|
+
- !ruby/object:Gem::Dependency
|
111
|
+
name: url_escape
|
112
|
+
requirement: !ruby/object:Gem::Requirement
|
113
113
|
none: false
|
114
|
-
requirements:
|
115
|
-
- -
|
116
|
-
- !ruby/object:Gem::Version
|
117
|
-
|
118
|
-
segments:
|
119
|
-
- 0
|
120
|
-
version: "0"
|
114
|
+
requirements:
|
115
|
+
- - ! '>='
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
121
118
|
type: :development
|
122
|
-
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
none: false
|
122
|
+
requirements:
|
123
|
+
- - ! '>='
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: '0'
|
123
126
|
description:
|
124
127
|
email: seniorlopez@gmail.com
|
125
128
|
executables: []
|
126
|
-
|
127
|
-
extensions:
|
129
|
+
extensions:
|
128
130
|
- ext/escape_utils/extconf.rb
|
129
131
|
extra_rdoc_files: []
|
130
|
-
|
131
|
-
files:
|
132
|
+
files:
|
132
133
|
- .gitignore
|
133
|
-
- .
|
134
|
+
- .travis.yml
|
134
135
|
- CHANGELOG.md
|
135
136
|
- Gemfile
|
136
137
|
- MIT-LICENSE
|
@@ -142,17 +143,22 @@ files:
|
|
142
143
|
- benchmark/javascript_unescape.rb
|
143
144
|
- benchmark/url_escape.rb
|
144
145
|
- benchmark/url_unescape.rb
|
146
|
+
- benchmark/xml_escape.rb
|
145
147
|
- escape_utils.gemspec
|
146
148
|
- ext/escape_utils/buffer.c
|
147
149
|
- ext/escape_utils/buffer.h
|
148
150
|
- ext/escape_utils/escape_utils.c
|
149
151
|
- ext/escape_utils/extconf.rb
|
150
152
|
- ext/escape_utils/houdini.h
|
151
|
-
- ext/escape_utils/
|
152
|
-
- ext/escape_utils/
|
153
|
-
- ext/escape_utils/
|
153
|
+
- ext/escape_utils/houdini_href_e.c
|
154
|
+
- ext/escape_utils/houdini_html_e.c
|
155
|
+
- ext/escape_utils/houdini_html_u.c
|
156
|
+
- ext/escape_utils/houdini_js_e.c
|
157
|
+
- ext/escape_utils/houdini_js_u.c
|
158
|
+
- ext/escape_utils/houdini_uri_e.c
|
159
|
+
- ext/escape_utils/houdini_uri_u.c
|
160
|
+
- ext/escape_utils/houdini_xml_e.c
|
154
161
|
- ext/escape_utils/html_unescape.h
|
155
|
-
- ext/escape_utils/uri_escape.h
|
156
162
|
- lib/escape_utils.rb
|
157
163
|
- lib/escape_utils/html/cgi.rb
|
158
164
|
- lib/escape_utils/html/erb.rb
|
@@ -165,65 +171,43 @@ files:
|
|
165
171
|
- lib/escape_utils/url/rack.rb
|
166
172
|
- lib/escape_utils/url/uri.rb
|
167
173
|
- lib/escape_utils/version.rb
|
168
|
-
-
|
169
|
-
-
|
170
|
-
-
|
171
|
-
-
|
172
|
-
-
|
173
|
-
-
|
174
|
-
-
|
175
|
-
-
|
176
|
-
-
|
177
|
-
-
|
178
|
-
-
|
179
|
-
-
|
180
|
-
-
|
181
|
-
|
174
|
+
- lib/escape_utils/xml/builder.rb
|
175
|
+
- test/helper.rb
|
176
|
+
- test/html/escape_test.rb
|
177
|
+
- test/html/unescape_test.rb
|
178
|
+
- test/html_safety_test.rb
|
179
|
+
- test/javascript/escape_test.rb
|
180
|
+
- test/javascript/unescape_test.rb
|
181
|
+
- test/query/escape_test.rb
|
182
|
+
- test/query/unescape_test.rb
|
183
|
+
- test/uri/escape_test.rb
|
184
|
+
- test/uri/unescape_test.rb
|
185
|
+
- test/url/escape_test.rb
|
186
|
+
- test/url/unescape_test.rb
|
187
|
+
- test/xml/escape_test.rb
|
182
188
|
homepage: http://github.com/brianmario/escape_utils
|
183
189
|
licenses: []
|
184
|
-
|
185
190
|
post_install_message:
|
186
|
-
rdoc_options:
|
191
|
+
rdoc_options:
|
187
192
|
- --charset=UTF-8
|
188
|
-
require_paths:
|
193
|
+
require_paths:
|
189
194
|
- lib
|
190
|
-
|
191
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
195
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
192
196
|
none: false
|
193
|
-
requirements:
|
194
|
-
- -
|
195
|
-
- !ruby/object:Gem::Version
|
196
|
-
|
197
|
-
|
198
|
-
- 0
|
199
|
-
version: "0"
|
200
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
197
|
+
requirements:
|
198
|
+
- - ! '>='
|
199
|
+
- !ruby/object:Gem::Version
|
200
|
+
version: '0'
|
201
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
201
202
|
none: false
|
202
|
-
requirements:
|
203
|
-
- -
|
204
|
-
- !ruby/object:Gem::Version
|
205
|
-
|
206
|
-
segments:
|
207
|
-
- 0
|
208
|
-
version: "0"
|
203
|
+
requirements:
|
204
|
+
- - ! '>='
|
205
|
+
- !ruby/object:Gem::Version
|
206
|
+
version: '0'
|
209
207
|
requirements: []
|
210
|
-
|
211
208
|
rubyforge_project:
|
212
|
-
rubygems_version: 1.
|
209
|
+
rubygems_version: 1.8.23
|
213
210
|
signing_key:
|
214
211
|
specification_version: 3
|
215
212
|
summary: Faster string escaping routines for your web apps
|
216
|
-
test_files:
|
217
|
-
- spec/html/escape_spec.rb
|
218
|
-
- spec/html/unescape_spec.rb
|
219
|
-
- spec/html_safety_spec.rb
|
220
|
-
- spec/javascript/escape_spec.rb
|
221
|
-
- spec/javascript/unescape_spec.rb
|
222
|
-
- spec/query/escape_spec.rb
|
223
|
-
- spec/query/unescape_spec.rb
|
224
|
-
- spec/rcov.opts
|
225
|
-
- spec/spec_helper.rb
|
226
|
-
- spec/uri/escape_spec.rb
|
227
|
-
- spec/uri/unescape_spec.rb
|
228
|
-
- spec/url/escape_spec.rb
|
229
|
-
- spec/url/unescape_spec.rb
|
213
|
+
test_files: []
|
data/.rspec
DELETED
@@ -1,214 +0,0 @@
|
|
1
|
-
#include <assert.h>
|
2
|
-
#include <stdio.h>
|
3
|
-
#include <string.h>
|
4
|
-
|
5
|
-
#include "houdini.h"
|
6
|
-
#include "html_unescape.h"
|
7
|
-
|
8
|
-
#define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10) /* this is very scientific, yes */
|
9
|
-
#define UNESCAPE_GROW_FACTOR(x) (x) /* unescaping shouldn't grow our buffer */
|
10
|
-
|
11
|
-
/* Helper _isdigit methods -- do not trust the current locale */
|
12
|
-
int _isxdigit(int c)
|
13
|
-
{
|
14
|
-
return strchr("0123456789ABCDEFabcdef", c) != NULL;
|
15
|
-
}
|
16
|
-
|
17
|
-
int _isdigit(int c)
|
18
|
-
{
|
19
|
-
return (c >= '0' && c <= '9');
|
20
|
-
}
|
21
|
-
|
22
|
-
|
23
|
-
/**
|
24
|
-
* According to the OWASP rules:
|
25
|
-
*
|
26
|
-
* & --> &
|
27
|
-
* < --> <
|
28
|
-
* > --> >
|
29
|
-
* " --> "
|
30
|
-
* ' --> ' ' is not recommended
|
31
|
-
* / --> / forward slash is included as it helps end an HTML entity
|
32
|
-
*
|
33
|
-
*/
|
34
|
-
static const char HTML_ESCAPE_TABLE[] = {
|
35
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
36
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
37
|
-
0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
|
38
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0,
|
39
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
40
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
41
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
42
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
43
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
44
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
45
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
46
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
47
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
48
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
49
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
50
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
51
|
-
};
|
52
|
-
|
53
|
-
static const char *HTML_ESCAPES[] = {
|
54
|
-
"",
|
55
|
-
""",
|
56
|
-
"&",
|
57
|
-
"'",
|
58
|
-
"/",
|
59
|
-
"<",
|
60
|
-
">"
|
61
|
-
};
|
62
|
-
|
63
|
-
void
|
64
|
-
houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size, int secure)
|
65
|
-
{
|
66
|
-
size_t i = 0, org, esc;
|
67
|
-
|
68
|
-
bufgrow(ob, ESCAPE_GROW_FACTOR(size));
|
69
|
-
|
70
|
-
while (i < size) {
|
71
|
-
org = i;
|
72
|
-
while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
|
73
|
-
i++;
|
74
|
-
|
75
|
-
if (i > org)
|
76
|
-
bufput(ob, src + org, i - org);
|
77
|
-
|
78
|
-
/* escaping */
|
79
|
-
if (i >= size)
|
80
|
-
break;
|
81
|
-
|
82
|
-
/* The forward slash is only escaped in secure mode */
|
83
|
-
if (src[i] == '/' && !secure) {
|
84
|
-
bufputc(ob, '/');
|
85
|
-
} else {
|
86
|
-
bufputs(ob, HTML_ESCAPES[esc]);
|
87
|
-
}
|
88
|
-
|
89
|
-
i++;
|
90
|
-
}
|
91
|
-
}
|
92
|
-
|
93
|
-
static inline void
|
94
|
-
bufput_utf8(struct buf *ob, int c)
|
95
|
-
{
|
96
|
-
unsigned char unichar[4];
|
97
|
-
|
98
|
-
if (c < 0x80) {
|
99
|
-
bufputc(ob, c);
|
100
|
-
}
|
101
|
-
else if (c < 0x800) {
|
102
|
-
unichar[0] = 192 + (c / 64);
|
103
|
-
unichar[1] = 128 + (c % 64);
|
104
|
-
bufput(ob, unichar, 2);
|
105
|
-
}
|
106
|
-
else if (c - 0xd800u < 0x800) {
|
107
|
-
bufputc(ob, '?');
|
108
|
-
}
|
109
|
-
else if (c < 0x10000) {
|
110
|
-
unichar[0] = 224 + (c / 4096);
|
111
|
-
unichar[1] = 128 + (c / 64) % 64;
|
112
|
-
unichar[2] = 128 + (c % 64);
|
113
|
-
bufput(ob, unichar, 3);
|
114
|
-
}
|
115
|
-
else if (c < 0x110000) {
|
116
|
-
unichar[0] = 240 + (c / 262144);
|
117
|
-
unichar[1] = 128 + (c / 4096) % 64;
|
118
|
-
unichar[2] = 128 + (c / 64) % 64;
|
119
|
-
unichar[3] = 128 + (c % 64);
|
120
|
-
bufput(ob, unichar, 4);
|
121
|
-
}
|
122
|
-
else {
|
123
|
-
bufputc(ob, '?');
|
124
|
-
}
|
125
|
-
}
|
126
|
-
|
127
|
-
static size_t
|
128
|
-
unescape_ent(struct buf *ob, const uint8_t *src, size_t size)
|
129
|
-
{
|
130
|
-
size_t i = 0;
|
131
|
-
|
132
|
-
if (size > 3 && src[0] == '#') {
|
133
|
-
int codepoint = 0;
|
134
|
-
|
135
|
-
if (_isdigit(src[1])) {
|
136
|
-
for (i = 1; i < size && _isdigit(src[i]); ++i)
|
137
|
-
codepoint = (codepoint * 10) + (src[i] - '0');
|
138
|
-
}
|
139
|
-
|
140
|
-
else if (src[1] == 'x' || src[1] == 'X') {
|
141
|
-
for (i = 2; i < size && _isxdigit(src[i]); ++i)
|
142
|
-
codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
|
143
|
-
}
|
144
|
-
|
145
|
-
if (i < size && src[i] == ';') {
|
146
|
-
bufput_utf8(ob, codepoint);
|
147
|
-
return i + 1;
|
148
|
-
}
|
149
|
-
}
|
150
|
-
|
151
|
-
else {
|
152
|
-
if (size > MAX_WORD_LENGTH)
|
153
|
-
size = MAX_WORD_LENGTH;
|
154
|
-
|
155
|
-
for (i = MIN_WORD_LENGTH; i < size; ++i) {
|
156
|
-
if (src[i] == ' ')
|
157
|
-
break;
|
158
|
-
|
159
|
-
if (src[i] == ';') {
|
160
|
-
const struct html_ent *entity = find_entity((char *)src, i);
|
161
|
-
|
162
|
-
if (entity != NULL) {
|
163
|
-
bufput(ob, entity->utf8, entity->utf8_len);
|
164
|
-
return i + 1;
|
165
|
-
}
|
166
|
-
|
167
|
-
break;
|
168
|
-
}
|
169
|
-
}
|
170
|
-
}
|
171
|
-
|
172
|
-
bufputc(ob, '&');
|
173
|
-
return 0;
|
174
|
-
}
|
175
|
-
|
176
|
-
void
|
177
|
-
houdini_unescape_html(struct buf *ob, const uint8_t *src, size_t size)
|
178
|
-
{
|
179
|
-
size_t i = 0, org;
|
180
|
-
|
181
|
-
bufgrow(ob, UNESCAPE_GROW_FACTOR(size));
|
182
|
-
|
183
|
-
while (i < size) {
|
184
|
-
org = i;
|
185
|
-
while (i < size && src[i] != '&')
|
186
|
-
i++;
|
187
|
-
|
188
|
-
if (i > org)
|
189
|
-
bufput(ob, src + org, i - org);
|
190
|
-
|
191
|
-
/* escaping */
|
192
|
-
if (i >= size)
|
193
|
-
break;
|
194
|
-
|
195
|
-
i++;
|
196
|
-
i += unescape_ent(ob, src + i, size - i);
|
197
|
-
}
|
198
|
-
}
|
199
|
-
|
200
|
-
#ifdef TEST
|
201
|
-
|
202
|
-
int main()
|
203
|
-
{
|
204
|
-
const char TEST_STRING[] = "This ♣ is & just "an example♦"";
|
205
|
-
struct buf *buffer;
|
206
|
-
|
207
|
-
buffer = bufnew(128);
|
208
|
-
houdini_unescape_html(buffer, TEST_STRING, strlen(TEST_STRING));
|
209
|
-
printf("Result: %.*s\n", (int)buffer->size, buffer->data);
|
210
|
-
bufrelease(buffer);
|
211
|
-
return 0;
|
212
|
-
}
|
213
|
-
#endif
|
214
|
-
|