escape_utils 1.2.0 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.github/workflows/ci.yml +43 -0
- data/.gitignore +0 -1
- data/CHANGELOG.md +23 -0
- data/Gemfile +15 -0
- data/README.md +48 -91
- data/Rakefile +4 -2
- data/benchmark/html_escape_once.rb +25 -0
- data/benchmark/javascript_escape.rb +1 -1
- data/benchmark/javascript_unescape.rb +1 -1
- data/benchmark/url_decode.rb +28 -0
- data/benchmark/url_encode.rb +37 -0
- data/benchmark/xml_escape.rb +7 -11
- data/bin/console +8 -0
- data/escape_utils.gemspec +1 -12
- data/ext/escape_utils/escape_utils.c +8 -115
- data/ext/escape_utils/houdini.h +3 -5
- data/ext/escape_utils/houdini_html_e.c +52 -24
- data/ext/escape_utils/houdini_js_e.c +15 -3
- data/ext/escape_utils/houdini_uri_e.c +7 -18
- data/ext/escape_utils/houdini_uri_u.c +5 -15
- data/ext/escape_utils/houdini_xml_e.c +15 -1
- data/lib/escape_utils/html/cgi.rb +10 -8
- data/lib/escape_utils/html/erb.rb +1 -10
- data/lib/escape_utils/html/haml.rb +1 -7
- data/lib/escape_utils/html/rack.rb +3 -3
- data/lib/escape_utils/html_safety.rb +13 -0
- data/lib/escape_utils/url/cgi.rb +0 -8
- data/lib/escape_utils/url/erb.rb +1 -1
- data/lib/escape_utils/url/rack.rb +0 -12
- data/lib/escape_utils/url/uri.rb +11 -7
- data/lib/escape_utils/version.rb +1 -1
- data/lib/escape_utils/xml/builder.rb +2 -2
- data/lib/escape_utils.rb +61 -9
- data/test/helper.rb +16 -3
- data/test/html/escape_test.rb +66 -42
- data/test/html/unescape_test.rb +3 -21
- data/test/html_safety_test.rb +1 -27
- data/test/javascript/escape_test.rb +53 -20
- data/test/javascript/unescape_test.rb +16 -18
- data/test/query/escape_test.rb +3 -21
- data/test/query/unescape_test.rb +5 -23
- data/test/uri/escape_test.rb +16 -18
- data/test/uri/unescape_test.rb +17 -19
- data/test/uri_component/escape_test.rb +15 -17
- data/test/uri_component/unescape_test.rb +17 -19
- data/test/url/escape_test.rb +3 -21
- data/test/url/unescape_test.rb +5 -23
- data/test/xml/escape_test.rb +15 -17
- metadata +14 -127
- data/.travis.yml +0 -7
- data/benchmark/html_escape.rb +0 -68
- data/benchmark/html_unescape.rb +0 -35
- data/benchmark/url_escape.rb +0 -56
- data/benchmark/url_unescape.rb +0 -50
- data/ext/escape_utils/houdini_html_u.c +0 -122
@@ -1,122 +0,0 @@
|
|
1
|
-
#include <assert.h>
|
2
|
-
#include <stdio.h>
|
3
|
-
#include <string.h>
|
4
|
-
|
5
|
-
#include "houdini.h"
|
6
|
-
#include "html_unescape.h"
|
7
|
-
|
8
|
-
static inline void
|
9
|
-
gh_buf_put_utf8(gh_buf *ob, int c)
|
10
|
-
{
|
11
|
-
unsigned char unichar[4];
|
12
|
-
|
13
|
-
if (c < 0x80) {
|
14
|
-
gh_buf_putc(ob, c);
|
15
|
-
}
|
16
|
-
else if (c < 0x800) {
|
17
|
-
unichar[0] = 192 + (c / 64);
|
18
|
-
unichar[1] = 128 + (c % 64);
|
19
|
-
gh_buf_put(ob, unichar, 2);
|
20
|
-
}
|
21
|
-
else if (c - 0xd800u < 0x800) {
|
22
|
-
gh_buf_putc(ob, '?');
|
23
|
-
}
|
24
|
-
else if (c < 0x10000) {
|
25
|
-
unichar[0] = 224 + (c / 4096);
|
26
|
-
unichar[1] = 128 + (c / 64) % 64;
|
27
|
-
unichar[2] = 128 + (c % 64);
|
28
|
-
gh_buf_put(ob, unichar, 3);
|
29
|
-
}
|
30
|
-
else if (c < 0x110000) {
|
31
|
-
unichar[0] = 240 + (c / 262144);
|
32
|
-
unichar[1] = 128 + (c / 4096) % 64;
|
33
|
-
unichar[2] = 128 + (c / 64) % 64;
|
34
|
-
unichar[3] = 128 + (c % 64);
|
35
|
-
gh_buf_put(ob, unichar, 4);
|
36
|
-
}
|
37
|
-
else {
|
38
|
-
gh_buf_putc(ob, '?');
|
39
|
-
}
|
40
|
-
}
|
41
|
-
|
42
|
-
static size_t
|
43
|
-
unescape_ent(gh_buf *ob, const uint8_t *src, size_t size)
|
44
|
-
{
|
45
|
-
size_t i = 0;
|
46
|
-
|
47
|
-
if (size > 3 && src[0] == '#') {
|
48
|
-
int codepoint = 0;
|
49
|
-
|
50
|
-
if (_isdigit(src[1])) {
|
51
|
-
for (i = 1; i < size && _isdigit(src[i]); ++i)
|
52
|
-
codepoint = (codepoint * 10) + (src[i] - '0');
|
53
|
-
}
|
54
|
-
|
55
|
-
else if (src[1] == 'x' || src[1] == 'X') {
|
56
|
-
for (i = 2; i < size && _isxdigit(src[i]); ++i)
|
57
|
-
codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
|
58
|
-
}
|
59
|
-
|
60
|
-
if (i < size && src[i] == ';' && codepoint) {
|
61
|
-
gh_buf_put_utf8(ob, codepoint);
|
62
|
-
return i + 1;
|
63
|
-
}
|
64
|
-
}
|
65
|
-
|
66
|
-
else {
|
67
|
-
if (size > MAX_WORD_LENGTH)
|
68
|
-
size = MAX_WORD_LENGTH;
|
69
|
-
|
70
|
-
for (i = MIN_WORD_LENGTH; i < size; ++i) {
|
71
|
-
if (src[i] == ' ')
|
72
|
-
break;
|
73
|
-
|
74
|
-
if (src[i] == ';') {
|
75
|
-
const struct html_ent *entity = find_entity((char *)src, i);
|
76
|
-
|
77
|
-
if (entity != NULL) {
|
78
|
-
gh_buf_put(ob, entity->utf8, entity->utf8_len);
|
79
|
-
return i + 1;
|
80
|
-
}
|
81
|
-
|
82
|
-
break;
|
83
|
-
}
|
84
|
-
}
|
85
|
-
}
|
86
|
-
|
87
|
-
gh_buf_putc(ob, '&');
|
88
|
-
return 0;
|
89
|
-
}
|
90
|
-
|
91
|
-
int
|
92
|
-
houdini_unescape_html(gh_buf *ob, const uint8_t *src, size_t size)
|
93
|
-
{
|
94
|
-
size_t i = 0, org;
|
95
|
-
|
96
|
-
while (i < size) {
|
97
|
-
org = i;
|
98
|
-
while (i < size && src[i] != '&')
|
99
|
-
i++;
|
100
|
-
|
101
|
-
if (likely(i > org)) {
|
102
|
-
if (unlikely(org == 0)) {
|
103
|
-
if (i >= size)
|
104
|
-
return 0;
|
105
|
-
|
106
|
-
gh_buf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
|
107
|
-
}
|
108
|
-
|
109
|
-
gh_buf_put(ob, src + org, i - org);
|
110
|
-
}
|
111
|
-
|
112
|
-
/* escaping */
|
113
|
-
if (i >= size)
|
114
|
-
break;
|
115
|
-
|
116
|
-
i++;
|
117
|
-
i += unescape_ent(ob, src + i, size - i);
|
118
|
-
}
|
119
|
-
|
120
|
-
return 1;
|
121
|
-
}
|
122
|
-
|