escape_utils 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/workflows/ci.yml +43 -0
- data/.gitignore +0 -1
- data/CHANGELOG.md +23 -0
- data/Gemfile +15 -0
- data/README.md +48 -91
- data/Rakefile +4 -2
- data/benchmark/html_escape_once.rb +25 -0
- data/benchmark/javascript_escape.rb +1 -1
- data/benchmark/javascript_unescape.rb +1 -1
- data/benchmark/url_decode.rb +28 -0
- data/benchmark/url_encode.rb +37 -0
- data/benchmark/xml_escape.rb +7 -11
- data/bin/console +8 -0
- data/escape_utils.gemspec +1 -12
- data/ext/escape_utils/escape_utils.c +8 -115
- data/ext/escape_utils/houdini.h +3 -5
- data/ext/escape_utils/houdini_html_e.c +52 -24
- data/ext/escape_utils/houdini_js_e.c +15 -3
- data/ext/escape_utils/houdini_uri_e.c +7 -18
- data/ext/escape_utils/houdini_uri_u.c +5 -15
- data/ext/escape_utils/houdini_xml_e.c +15 -1
- data/lib/escape_utils/html/cgi.rb +10 -8
- data/lib/escape_utils/html/erb.rb +1 -10
- data/lib/escape_utils/html/haml.rb +1 -7
- data/lib/escape_utils/html/rack.rb +3 -3
- data/lib/escape_utils/html_safety.rb +13 -0
- data/lib/escape_utils/url/cgi.rb +0 -8
- data/lib/escape_utils/url/erb.rb +1 -1
- data/lib/escape_utils/url/rack.rb +0 -12
- data/lib/escape_utils/url/uri.rb +11 -7
- data/lib/escape_utils/version.rb +1 -1
- data/lib/escape_utils/xml/builder.rb +2 -2
- data/lib/escape_utils.rb +61 -9
- data/test/helper.rb +16 -3
- data/test/html/escape_test.rb +66 -42
- data/test/html/unescape_test.rb +3 -21
- data/test/html_safety_test.rb +1 -27
- data/test/javascript/escape_test.rb +53 -20
- data/test/javascript/unescape_test.rb +16 -18
- data/test/query/escape_test.rb +3 -21
- data/test/query/unescape_test.rb +5 -23
- data/test/uri/escape_test.rb +16 -18
- data/test/uri/unescape_test.rb +17 -19
- data/test/uri_component/escape_test.rb +15 -17
- data/test/uri_component/unescape_test.rb +17 -19
- data/test/url/escape_test.rb +3 -21
- data/test/url/unescape_test.rb +5 -23
- data/test/xml/escape_test.rb +15 -17
- metadata +14 -127
- data/.travis.yml +0 -7
- data/benchmark/html_escape.rb +0 -68
- data/benchmark/html_unescape.rb +0 -35
- data/benchmark/url_escape.rb +0 -56
- data/benchmark/url_unescape.rb +0 -50
- data/ext/escape_utils/houdini_html_u.c +0 -122
@@ -9,11 +9,6 @@
|
|
9
9
|
|
10
10
|
static VALUE rb_eEncodingCompatibilityError;
|
11
11
|
|
12
|
-
static VALUE eu_new_str(const char *str, size_t len)
|
13
|
-
{
|
14
|
-
return rb_enc_str_new(str, len, rb_utf8_encoding());
|
15
|
-
}
|
16
|
-
|
17
12
|
static void check_utf8_encoding(VALUE str)
|
18
13
|
{
|
19
14
|
static rb_encoding *_cached[3] = {NULL, NULL, NULL};
|
@@ -34,41 +29,6 @@ static void check_utf8_encoding(VALUE str)
|
|
34
29
|
|
35
30
|
typedef int (*houdini_cb)(gh_buf *, const uint8_t *, size_t);
|
36
31
|
|
37
|
-
static VALUE rb_mEscapeUtils;
|
38
|
-
static ID ID_at_html_safe, ID_new;
|
39
|
-
|
40
|
-
/**
|
41
|
-
* html_secure instance variable
|
42
|
-
*/
|
43
|
-
static int g_html_secure = 1;
|
44
|
-
|
45
|
-
static VALUE rb_eu_set_html_secure(VALUE self, VALUE val)
|
46
|
-
{
|
47
|
-
g_html_secure = RTEST(val);
|
48
|
-
rb_ivar_set(self, rb_intern("@html_secure"), val);
|
49
|
-
return val;
|
50
|
-
}
|
51
|
-
|
52
|
-
/**
|
53
|
-
* html_safe_string_class instance variable
|
54
|
-
*/
|
55
|
-
static VALUE rb_html_safe_string_class;
|
56
|
-
static VALUE rb_html_safe_string_template_object;
|
57
|
-
|
58
|
-
static VALUE rb_eu_set_html_safe_string_class(VALUE self, VALUE val)
|
59
|
-
{
|
60
|
-
Check_Type(val, T_CLASS);
|
61
|
-
|
62
|
-
if (rb_funcall(val, rb_intern("<="), 1, rb_cString) == Qnil)
|
63
|
-
rb_raise(rb_eArgError, "%s must be a descendent of String", rb_class2name(val));
|
64
|
-
|
65
|
-
rb_html_safe_string_class = val;
|
66
|
-
rb_html_safe_string_template_object = rb_class_new_instance(0, NULL, rb_html_safe_string_class);
|
67
|
-
OBJ_FREEZE(rb_html_safe_string_template_object);
|
68
|
-
rb_ivar_set(self, rb_intern("@html_safe_string_class"), val);
|
69
|
-
return val;
|
70
|
-
}
|
71
|
-
|
72
32
|
/**
|
73
33
|
* Generic template
|
74
34
|
*/
|
@@ -78,13 +38,13 @@ rb_eu__generic(VALUE str, houdini_cb do_escape)
|
|
78
38
|
gh_buf buf = GH_BUF_INIT;
|
79
39
|
|
80
40
|
if (NIL_P(str))
|
81
|
-
return
|
41
|
+
return rb_utf8_str_new("", 0);
|
82
42
|
|
83
43
|
Check_Type(str, T_STRING);
|
84
44
|
check_utf8_encoding(str);
|
85
45
|
|
86
46
|
if (do_escape(&buf, (const uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str))) {
|
87
|
-
VALUE result =
|
47
|
+
VALUE result = rb_utf8_str_new(buf.ptr, buf.size);
|
88
48
|
gh_buf_free(&buf);
|
89
49
|
return result;
|
90
50
|
}
|
@@ -96,49 +56,15 @@ rb_eu__generic(VALUE str, houdini_cb do_escape)
|
|
96
56
|
/**
|
97
57
|
* HTML methods
|
98
58
|
*/
|
99
|
-
static VALUE new_html_safe_string(const char *ptr, size_t len)
|
100
|
-
{
|
101
|
-
return rb_str_new_with_class(rb_html_safe_string_template_object, ptr, len);
|
102
|
-
}
|
103
|
-
|
104
|
-
static VALUE rb_eu_escape_html_as_html_safe(VALUE self, VALUE str)
|
105
|
-
{
|
106
|
-
VALUE result;
|
107
|
-
int secure = g_html_secure;
|
108
|
-
gh_buf buf = GH_BUF_INIT;
|
109
|
-
|
110
|
-
Check_Type(str, T_STRING);
|
111
|
-
check_utf8_encoding(str);
|
112
59
|
|
113
|
-
|
114
|
-
result = new_html_safe_string(buf.ptr, buf.size);
|
115
|
-
gh_buf_free(&buf);
|
116
|
-
} else {
|
117
|
-
result = new_html_safe_string(RSTRING_PTR(str), RSTRING_LEN(str));
|
118
|
-
}
|
119
|
-
|
120
|
-
rb_ivar_set(result, ID_at_html_safe, Qtrue);
|
121
|
-
|
122
|
-
return result;
|
123
|
-
}
|
124
|
-
|
125
|
-
static VALUE rb_eu_escape_html(int argc, VALUE *argv, VALUE self)
|
60
|
+
static VALUE rb_eu_escape_html_once(VALUE self, VALUE str)
|
126
61
|
{
|
127
|
-
VALUE str, rb_secure;
|
128
62
|
gh_buf buf = GH_BUF_INIT;
|
129
|
-
int secure = g_html_secure;
|
130
|
-
|
131
|
-
if (rb_scan_args(argc, argv, "11", &str, &rb_secure) == 2) {
|
132
|
-
if (rb_secure == Qfalse) {
|
133
|
-
secure = 0;
|
134
|
-
}
|
135
|
-
}
|
136
|
-
|
137
63
|
Check_Type(str, T_STRING);
|
138
64
|
check_utf8_encoding(str);
|
139
65
|
|
140
|
-
if (
|
141
|
-
VALUE result =
|
66
|
+
if (houdini_escape_html_once(&buf, (const uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str))) {
|
67
|
+
VALUE result = rb_utf8_str_new(buf.ptr, buf.size);
|
142
68
|
gh_buf_free(&buf);
|
143
69
|
return result;
|
144
70
|
}
|
@@ -146,11 +72,6 @@ static VALUE rb_eu_escape_html(int argc, VALUE *argv, VALUE self)
|
|
146
72
|
return str;
|
147
73
|
}
|
148
74
|
|
149
|
-
static VALUE rb_eu_unescape_html(VALUE self, VALUE str)
|
150
|
-
{
|
151
|
-
return rb_eu__generic(str, &houdini_unescape_html);
|
152
|
-
}
|
153
|
-
|
154
75
|
|
155
76
|
/**
|
156
77
|
* XML methods
|
@@ -166,7 +87,7 @@ static VALUE rb_eu_escape_xml(VALUE self, VALUE str)
|
|
166
87
|
*/
|
167
88
|
static VALUE rb_eu_escape_js(VALUE self, VALUE str)
|
168
89
|
{
|
169
|
-
return rb_eu__generic(str, &houdini_escape_js);
|
90
|
+
return rb_eu__generic(rb_obj_as_string(str), &houdini_escape_js);
|
170
91
|
}
|
171
92
|
|
172
93
|
static VALUE rb_eu_unescape_js(VALUE self, VALUE str)
|
@@ -174,21 +95,6 @@ static VALUE rb_eu_unescape_js(VALUE self, VALUE str)
|
|
174
95
|
return rb_eu__generic(str, &houdini_unescape_js);
|
175
96
|
}
|
176
97
|
|
177
|
-
|
178
|
-
/**
|
179
|
-
* URL methods
|
180
|
-
*/
|
181
|
-
static VALUE rb_eu_escape_url(VALUE self, VALUE str)
|
182
|
-
{
|
183
|
-
return rb_eu__generic(str, &houdini_escape_url);
|
184
|
-
}
|
185
|
-
|
186
|
-
static VALUE rb_eu_unescape_url(VALUE self, VALUE str)
|
187
|
-
{
|
188
|
-
return rb_eu__generic(str, &houdini_unescape_url);
|
189
|
-
}
|
190
|
-
|
191
|
-
|
192
98
|
/**
|
193
99
|
* URI methods
|
194
100
|
*/
|
@@ -215,7 +121,6 @@ static VALUE rb_eu_unescape_uri_component(VALUE self, VALUE str)
|
|
215
121
|
return rb_eu__generic(str, &houdini_unescape_uri_component);
|
216
122
|
}
|
217
123
|
|
218
|
-
|
219
124
|
/**
|
220
125
|
* Ruby Extension initializer
|
221
126
|
*/
|
@@ -224,26 +129,14 @@ void Init_escape_utils()
|
|
224
129
|
{
|
225
130
|
rb_eEncodingCompatibilityError = rb_const_get(rb_cEncoding, rb_intern("CompatibilityError"));
|
226
131
|
|
227
|
-
|
228
|
-
|
229
|
-
rb_global_variable(&rb_html_safe_string_class);
|
230
|
-
rb_global_variable(&rb_html_safe_string_template_object);
|
231
|
-
|
232
|
-
rb_mEscapeUtils = rb_define_module("EscapeUtils");
|
233
|
-
rb_define_method(rb_mEscapeUtils, "escape_html_as_html_safe", rb_eu_escape_html_as_html_safe, 1);
|
234
|
-
rb_define_method(rb_mEscapeUtils, "escape_html", rb_eu_escape_html, -1);
|
235
|
-
rb_define_method(rb_mEscapeUtils, "unescape_html", rb_eu_unescape_html, 1);
|
132
|
+
VALUE rb_mEscapeUtils = rb_define_module("EscapeUtils");
|
133
|
+
rb_define_method(rb_mEscapeUtils, "escape_html_once", rb_eu_escape_html_once, 1);
|
236
134
|
rb_define_method(rb_mEscapeUtils, "escape_xml", rb_eu_escape_xml, 1);
|
237
135
|
rb_define_method(rb_mEscapeUtils, "escape_javascript", rb_eu_escape_js, 1);
|
238
136
|
rb_define_method(rb_mEscapeUtils, "unescape_javascript", rb_eu_unescape_js, 1);
|
239
|
-
rb_define_method(rb_mEscapeUtils, "escape_url", rb_eu_escape_url, 1);
|
240
|
-
rb_define_method(rb_mEscapeUtils, "unescape_url", rb_eu_unescape_url, 1);
|
241
137
|
rb_define_method(rb_mEscapeUtils, "escape_uri", rb_eu_escape_uri, 1);
|
242
138
|
rb_define_method(rb_mEscapeUtils, "unescape_uri", rb_eu_unescape_uri, 1);
|
243
139
|
rb_define_method(rb_mEscapeUtils, "escape_uri_component", rb_eu_escape_uri_component, 1);
|
244
140
|
rb_define_method(rb_mEscapeUtils, "unescape_uri_component", rb_eu_unescape_uri_component, 1);
|
245
|
-
|
246
|
-
rb_define_singleton_method(rb_mEscapeUtils, "html_secure=", rb_eu_set_html_secure, 1);
|
247
|
-
rb_define_singleton_method(rb_mEscapeUtils, "html_safe_string_class=", rb_eu_set_html_safe_string_class, 1);
|
248
141
|
}
|
249
142
|
|
data/ext/escape_utils/houdini.h
CHANGED
@@ -22,20 +22,18 @@ extern "C" {
|
|
22
22
|
# define _isdigit(c) ((c) >= '0' && (c) <= '9')
|
23
23
|
#endif
|
24
24
|
|
25
|
+
#define _isasciialpha(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
|
26
|
+
|
25
27
|
#define HOUDINI_ESCAPED_SIZE(x) (((x) * 12) / 10)
|
26
28
|
#define HOUDINI_UNESCAPED_SIZE(x) (x)
|
27
29
|
|
28
|
-
extern int
|
29
|
-
extern int houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure);
|
30
|
-
extern int houdini_unescape_html(gh_buf *ob, const uint8_t *src, size_t size);
|
30
|
+
extern int houdini_escape_html_once(gh_buf *ob, const uint8_t *src, size_t size);
|
31
31
|
extern int houdini_escape_xml(gh_buf *ob, const uint8_t *src, size_t size);
|
32
32
|
extern int houdini_escape_uri(gh_buf *ob, const uint8_t *src, size_t size);
|
33
33
|
extern int houdini_escape_uri_component(gh_buf *ob, const uint8_t *src, size_t size);
|
34
|
-
extern int houdini_escape_url(gh_buf *ob, const uint8_t *src, size_t size);
|
35
34
|
extern int houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size);
|
36
35
|
extern int houdini_unescape_uri(gh_buf *ob, const uint8_t *src, size_t size);
|
37
36
|
extern int houdini_unescape_uri_component(gh_buf *ob, const uint8_t *src, size_t size);
|
38
|
-
extern int houdini_unescape_url(gh_buf *ob, const uint8_t *src, size_t size);
|
39
37
|
extern int houdini_escape_js(gh_buf *ob, const uint8_t *src, size_t size);
|
40
38
|
extern int houdini_unescape_js(gh_buf *ob, const uint8_t *src, size_t size);
|
41
39
|
|
@@ -18,8 +18,8 @@
|
|
18
18
|
static const char HTML_ESCAPE_TABLE[] = {
|
19
19
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
20
20
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
21
|
-
0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0,
|
22
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
21
|
+
0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0,
|
22
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 5, 0,
|
23
23
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
24
24
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
25
25
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
@@ -35,24 +35,64 @@ static const char HTML_ESCAPE_TABLE[] = {
|
|
35
35
|
};
|
36
36
|
|
37
37
|
static const char *HTML_ESCAPES[] = {
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
">"
|
38
|
+
"",
|
39
|
+
""",
|
40
|
+
"&",
|
41
|
+
"'",
|
42
|
+
"<",
|
43
|
+
">"
|
45
44
|
};
|
46
45
|
|
46
|
+
static const int HTML_ESCAPES_LENGTHS[] = {
|
47
|
+
0,
|
48
|
+
6,
|
49
|
+
5,
|
50
|
+
5,
|
51
|
+
4,
|
52
|
+
4
|
53
|
+
};
|
54
|
+
|
55
|
+
static int
|
56
|
+
is_entity(const uint8_t *src, size_t size)
|
57
|
+
{
|
58
|
+
size_t i = 0;
|
59
|
+
|
60
|
+
if (size == 0 || src[0] != '&')
|
61
|
+
return false;
|
62
|
+
|
63
|
+
if (size > 16)
|
64
|
+
size = 16;
|
65
|
+
|
66
|
+
if (size >= 4 && src[1] == '#') {
|
67
|
+
if (_isdigit(src[2])) {
|
68
|
+
for (i = 3; i < size && _isdigit(src[i]); ++i);
|
69
|
+
}
|
70
|
+
else if ((src[2] == 'x' || src[2] == 'X') && _isxdigit(src[3])) {
|
71
|
+
for (i = 4; i < size && _isxdigit(src[i]); ++i);
|
72
|
+
}
|
73
|
+
else return false;
|
74
|
+
}
|
75
|
+
else {
|
76
|
+
for (i = 1; i < size && _isasciialpha(src[i]); ++i);
|
77
|
+
if (i == 1) return false;
|
78
|
+
}
|
79
|
+
|
80
|
+
return i < size && src[i] == ';';
|
81
|
+
}
|
82
|
+
|
47
83
|
int
|
48
|
-
|
84
|
+
houdini_escape_html_once(gh_buf *ob, const uint8_t *src, size_t size)
|
49
85
|
{
|
50
86
|
size_t i = 0, org, esc = 0;
|
51
87
|
|
52
88
|
while (i < size) {
|
53
89
|
org = i;
|
54
|
-
while (i < size
|
90
|
+
while (i < size) {
|
91
|
+
esc = HTML_ESCAPE_TABLE[src[i]];
|
92
|
+
if (unlikely(esc != 0) && !is_entity(src + i, size - i))
|
93
|
+
break;
|
55
94
|
i++;
|
95
|
+
}
|
56
96
|
|
57
97
|
if (i > org) {
|
58
98
|
if (unlikely(org == 0)) {
|
@@ -69,22 +109,10 @@ houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure)
|
|
69
109
|
if (unlikely(i >= size))
|
70
110
|
break;
|
71
111
|
|
72
|
-
|
73
|
-
if (src[i] == '/' && !secure) {
|
74
|
-
gh_buf_putc(ob, '/');
|
75
|
-
} else {
|
76
|
-
gh_buf_puts(ob, HTML_ESCAPES[esc]);
|
77
|
-
}
|
112
|
+
gh_buf_put(ob, HTML_ESCAPES[esc], HTML_ESCAPES_LENGTHS[esc]);
|
78
113
|
|
79
114
|
i++;
|
80
115
|
}
|
81
116
|
|
82
117
|
return 1;
|
83
118
|
}
|
84
|
-
|
85
|
-
int
|
86
|
-
houdini_escape_html(gh_buf *ob, const uint8_t *src, size_t size)
|
87
|
-
{
|
88
|
-
return houdini_escape_html0(ob, src, size, 1);
|
89
|
-
}
|
90
|
-
|
@@ -7,10 +7,11 @@
|
|
7
7
|
static const char JS_ESCAPE[] = {
|
8
8
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
|
9
9
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
10
|
-
0, 0, 1, 0,
|
10
|
+
0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
|
11
11
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
12
12
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
13
13
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
|
14
|
+
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
14
15
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
15
16
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
16
17
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
@@ -18,8 +19,7 @@ static const char JS_ESCAPE[] = {
|
|
18
19
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
19
20
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
20
21
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
21
|
-
0, 0,
|
22
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
22
|
+
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
23
23
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
24
24
|
};
|
25
25
|
|
@@ -51,6 +51,18 @@ houdini_escape_js(gh_buf *ob, const uint8_t *src, size_t size)
|
|
51
51
|
ch = src[i];
|
52
52
|
|
53
53
|
switch (ch) {
|
54
|
+
case 226:
|
55
|
+
if (i + 2 < size && src[i + 1] == 128) {
|
56
|
+
if (src[i + 2] == 168) {
|
57
|
+
gh_buf_put(ob, "
", 8);
|
58
|
+
i += 2;
|
59
|
+
} else if (src[i + 2] == 169) {
|
60
|
+
gh_buf_put(ob, "
", 8);
|
61
|
+
i += 2;
|
62
|
+
}
|
63
|
+
}
|
64
|
+
break;
|
65
|
+
|
54
66
|
case '/':
|
55
67
|
/*
|
56
68
|
* Escape only if preceded by a lt
|
@@ -12,7 +12,7 @@ static const char URL_SAFE[] = {
|
|
12
12
|
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
13
13
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
|
14
14
|
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
15
|
-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
|
15
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0,
|
16
16
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
17
17
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
18
18
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
@@ -44,7 +44,7 @@ static const char URI_SAFE[] = {
|
|
44
44
|
|
45
45
|
static int
|
46
46
|
escape(gh_buf *ob, const uint8_t *src, size_t size,
|
47
|
-
const char *safe_table
|
47
|
+
const char *safe_table)
|
48
48
|
{
|
49
49
|
static const uint8_t hex_chars[] = "0123456789ABCDEF";
|
50
50
|
|
@@ -73,13 +73,9 @@ escape(gh_buf *ob, const uint8_t *src, size_t size,
|
|
73
73
|
if (i >= size)
|
74
74
|
break;
|
75
75
|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
|
80
|
-
hex_str[2] = hex_chars[src[i] & 0xF];
|
81
|
-
gh_buf_put(ob, hex_str, 3);
|
82
|
-
}
|
76
|
+
hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
|
77
|
+
hex_str[2] = hex_chars[src[i] & 0xF];
|
78
|
+
gh_buf_put(ob, hex_str, 3);
|
83
79
|
|
84
80
|
i++;
|
85
81
|
}
|
@@ -90,18 +86,11 @@ escape(gh_buf *ob, const uint8_t *src, size_t size,
|
|
90
86
|
int
|
91
87
|
houdini_escape_uri(gh_buf *ob, const uint8_t *src, size_t size)
|
92
88
|
{
|
93
|
-
return escape(ob, src, size, URI_SAFE
|
89
|
+
return escape(ob, src, size, URI_SAFE);
|
94
90
|
}
|
95
91
|
|
96
92
|
int
|
97
93
|
houdini_escape_uri_component(gh_buf *ob, const uint8_t *src, size_t size)
|
98
94
|
{
|
99
|
-
return escape(ob, src, size, URL_SAFE
|
95
|
+
return escape(ob, src, size, URL_SAFE);
|
100
96
|
}
|
101
|
-
|
102
|
-
int
|
103
|
-
houdini_escape_url(gh_buf *ob, const uint8_t *src, size_t size)
|
104
|
-
{
|
105
|
-
return escape(ob, src, size, URL_SAFE, true);
|
106
|
-
}
|
107
|
-
|
@@ -7,13 +7,13 @@
|
|
7
7
|
#define hex2c(c) ((c | 32) % 39 - 9)
|
8
8
|
|
9
9
|
static int
|
10
|
-
unescape(gh_buf *ob, const uint8_t *src, size_t size
|
10
|
+
unescape(gh_buf *ob, const uint8_t *src, size_t size)
|
11
11
|
{
|
12
12
|
size_t i = 0, org;
|
13
13
|
|
14
14
|
while (i < size) {
|
15
15
|
org = i;
|
16
|
-
while (i < size && src[i] != '%'
|
16
|
+
while (i < size && src[i] != '%')
|
17
17
|
i++;
|
18
18
|
|
19
19
|
if (likely(i > org)) {
|
@@ -31,11 +31,7 @@ unescape(gh_buf *ob, const uint8_t *src, size_t size, bool unescape_plus)
|
|
31
31
|
if (i >= size)
|
32
32
|
break;
|
33
33
|
|
34
|
-
|
35
|
-
gh_buf_putc(ob, unescape_plus ? ' ' : '+');
|
36
|
-
continue;
|
37
|
-
}
|
38
|
-
|
34
|
+
i++;
|
39
35
|
if (i + 1 < size && _isxdigit(src[i]) && _isxdigit(src[i + 1])) {
|
40
36
|
unsigned char new_char = (hex2c(src[i]) << 4) + hex2c(src[i + 1]);
|
41
37
|
gh_buf_putc(ob, new_char);
|
@@ -51,18 +47,12 @@ unescape(gh_buf *ob, const uint8_t *src, size_t size, bool unescape_plus)
|
|
51
47
|
int
|
52
48
|
houdini_unescape_uri(gh_buf *ob, const uint8_t *src, size_t size)
|
53
49
|
{
|
54
|
-
return unescape(ob, src, size
|
50
|
+
return unescape(ob, src, size);
|
55
51
|
}
|
56
52
|
|
57
53
|
int
|
58
54
|
houdini_unescape_uri_component(gh_buf *ob, const uint8_t *src, size_t size)
|
59
55
|
{
|
60
|
-
return unescape(ob, src, size
|
61
|
-
}
|
62
|
-
|
63
|
-
int
|
64
|
-
houdini_unescape_url(gh_buf *ob, const uint8_t *src, size_t size)
|
65
|
-
{
|
66
|
-
return unescape(ob, src, size, true);
|
56
|
+
return unescape(ob, src, size);
|
67
57
|
}
|
68
58
|
|
@@ -25,6 +25,20 @@ static const char *LOOKUP_CODES[] = {
|
|
25
25
|
">"
|
26
26
|
};
|
27
27
|
|
28
|
+
static const int LOOKUP_CODES_LENGTHS[] = {
|
29
|
+
0,
|
30
|
+
0,
|
31
|
+
0,
|
32
|
+
0,
|
33
|
+
0,
|
34
|
+
1,
|
35
|
+
6,
|
36
|
+
5,
|
37
|
+
6,
|
38
|
+
4,
|
39
|
+
4
|
40
|
+
};
|
41
|
+
|
28
42
|
static const char CODE_INVALID = 5;
|
29
43
|
|
30
44
|
static const char XML_LOOKUP_TABLE[] = {
|
@@ -129,7 +143,7 @@ houdini_escape_xml(gh_buf *ob, const uint8_t *src, size_t size)
|
|
129
143
|
if (end >= size)
|
130
144
|
break;
|
131
145
|
|
132
|
-
|
146
|
+
gh_buf_put(ob, LOOKUP_CODES[code], LOOKUP_CODES_LENGTHS[code]);
|
133
147
|
}
|
134
148
|
|
135
149
|
return 1;
|
@@ -1,11 +1,13 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
1
|
+
module EscapeUtils
|
2
|
+
module CGIHtmlSafety
|
3
|
+
def escapeHTML(html)
|
4
|
+
::EscapeUtils::HtmlSafety.escape_once(html) { |s| super(s) }
|
5
|
+
end
|
6
6
|
|
7
|
-
def unescapeHTML(
|
8
|
-
|
7
|
+
def unescapeHTML(html)
|
8
|
+
super(html.to_s)
|
9
9
|
end
|
10
10
|
end
|
11
|
-
end
|
11
|
+
end
|
12
|
+
|
13
|
+
CGI.singleton_class.prepend(EscapeUtils::CGIHtmlSafety)
|
@@ -1,6 +1,15 @@
|
|
1
1
|
module EscapeUtils
|
2
2
|
module HtmlSafety
|
3
3
|
if "".respond_to? :html_safe?
|
4
|
+
def self.escape_once(s)
|
5
|
+
s = s.to_s
|
6
|
+
if s.html_safe?
|
7
|
+
s.html_safe
|
8
|
+
else
|
9
|
+
yield(s).html_safe
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
4
13
|
def _escape_html(s)
|
5
14
|
if s.html_safe?
|
6
15
|
s.to_s.html_safe
|
@@ -9,6 +18,10 @@ module EscapeUtils
|
|
9
18
|
end
|
10
19
|
end
|
11
20
|
else
|
21
|
+
def self.escape_once(s)
|
22
|
+
yield s.to_s
|
23
|
+
end
|
24
|
+
|
12
25
|
def _escape_html(s)
|
13
26
|
EscapeUtils.escape_html(s.to_s)
|
14
27
|
end
|
data/lib/escape_utils/url/cgi.rb
CHANGED
data/lib/escape_utils/url/erb.rb
CHANGED
data/lib/escape_utils/url/uri.rb
CHANGED
@@ -1,8 +1,12 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
1
|
+
require 'uri'
|
2
|
+
|
3
|
+
if URI.respond_to?(:escape) # Was removed in Ruby 3.0. Let's not bring it back
|
4
|
+
module URI
|
5
|
+
def self.escape(s, unsafe=nil)
|
6
|
+
EscapeUtils.escape_uri(s.to_s)
|
7
|
+
end
|
8
|
+
def self.unescape(s)
|
9
|
+
EscapeUtils.unescape_uri(s.to_s)
|
10
|
+
end
|
4
11
|
end
|
5
|
-
|
6
|
-
EscapeUtils.unescape_uri(s.to_s)
|
7
|
-
end
|
8
|
-
end
|
12
|
+
end
|
data/lib/escape_utils/version.rb
CHANGED