hamlit 2.5.0 → 2.6.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,68 +0,0 @@
1
- #include <assert.h>
2
- #include <stdio.h>
3
- #include <string.h>
4
-
5
- #include "houdini.h"
6
-
7
- #define hex2c(c) ((c | 32) % 39 - 9)
8
-
9
- static int
10
- unescape(gh_buf *ob, const uint8_t *src, size_t size, bool unescape_plus)
11
- {
12
- size_t i = 0, org;
13
-
14
- while (i < size) {
15
- org = i;
16
- while (i < size && src[i] != '%' && src[i] != '+')
17
- i++;
18
-
19
- if (likely(i > org)) {
20
- if (unlikely(org == 0)) {
21
- if (i >= size)
22
- return 0;
23
-
24
- gh_buf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
25
- }
26
-
27
- gh_buf_put(ob, src + org, i - org);
28
- }
29
-
30
- /* escaping */
31
- if (i >= size)
32
- break;
33
-
34
- if (src[i++] == '+') {
35
- gh_buf_putc(ob, unescape_plus ? ' ' : '+');
36
- continue;
37
- }
38
-
39
- if (i + 1 < size && _isxdigit(src[i]) && _isxdigit(src[i + 1])) {
40
- unsigned char new_char = (hex2c(src[i]) << 4) + hex2c(src[i + 1]);
41
- gh_buf_putc(ob, new_char);
42
- i += 2;
43
- } else {
44
- gh_buf_putc(ob, '%');
45
- }
46
- }
47
-
48
- return 1;
49
- }
50
-
51
- int
52
- houdini_unescape_uri(gh_buf *ob, const uint8_t *src, size_t size)
53
- {
54
- return unescape(ob, src, size, false);
55
- }
56
-
57
- int
58
- houdini_unescape_uri_component(gh_buf *ob, const uint8_t *src, size_t size)
59
- {
60
- return unescape(ob, src, size, false);
61
- }
62
-
63
- int
64
- houdini_unescape_url(gh_buf *ob, const uint8_t *src, size_t size)
65
- {
66
- return unescape(ob, src, size, true);
67
- }
68
-
@@ -1,136 +0,0 @@
1
- #include <assert.h>
2
- #include <stdio.h>
3
- #include <string.h>
4
-
5
- #include "houdini.h"
6
-
7
- /**
8
- * & --> &amp;
9
- * < --> &lt;
10
- * > --> &gt;
11
- * " --> &quot;
12
- * ' --> &apos;
13
- */
14
- static const char *LOOKUP_CODES[] = {
15
- "", /* reserved: use literal single character */
16
- "", /* unused */
17
- "", /* reserved: 2 character UTF-8 */
18
- "", /* reserved: 3 character UTF-8 */
19
- "", /* reserved: 4 character UTF-8 */
20
- "?", /* invalid UTF-8 character */
21
- "&quot;",
22
- "&amp;",
23
- "&apos;",
24
- "&lt;",
25
- "&gt;"
26
- };
27
-
28
- static const char CODE_INVALID = 5;
29
-
30
- static const char XML_LOOKUP_TABLE[] = {
31
- /* ASCII: 0xxxxxxx */
32
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 5, 5, 0, 5, 5,
33
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
34
- 0, 0, 6, 0, 0, 0, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0,
35
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0,10, 0,
36
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
37
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
38
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
39
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
40
-
41
- /* Invalid UTF-8 char start: 10xxxxxx */
42
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
43
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
44
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
45
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
46
-
47
- /* Multibyte UTF-8 */
48
-
49
- /* 2 bytes: 110xxxxx */
50
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
51
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
52
-
53
- /* 3 bytes: 1110xxxx */
54
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
55
-
56
- /* 4 bytes: 11110xxx */
57
- 4, 4, 4, 4, 4, 4, 4, 4,
58
-
59
- /* Invalid UTF-8: 11111xxx */
60
- 5, 5, 5, 5, 5, 5, 5, 5,
61
- };
62
-
63
- int
64
- houdini_escape_xml(gh_buf *ob, const uint8_t *src, size_t size)
65
- {
66
- size_t i = 0;
67
- unsigned char code = 0;
68
-
69
- gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size));
70
-
71
- while (i < size) {
72
- size_t start, end;
73
-
74
- start = end = i;
75
-
76
- while (i < size) {
77
- unsigned int byte;
78
-
79
- byte = src[i++];
80
- code = XML_LOOKUP_TABLE[byte];
81
-
82
- if (!code) {
83
- /* single character used literally */
84
- } else if (code >= CODE_INVALID) {
85
- break; /* insert lookup code string */
86
- } else if (code > size - end) {
87
- code = CODE_INVALID; /* truncated UTF-8 character */
88
- break;
89
- } else {
90
- unsigned int chr = byte & (0xff >> code);
91
-
92
- while (--code) {
93
- byte = src[i++];
94
- if ((byte & 0xc0) != 0x80) {
95
- code = CODE_INVALID;
96
- break;
97
- }
98
- chr = (chr << 6) + (byte & 0x3f);
99
- }
100
-
101
- switch (i - end) {
102
- case 2:
103
- if (chr < 0x80)
104
- code = CODE_INVALID;
105
- break;
106
- case 3:
107
- if (chr < 0x800 ||
108
- (chr > 0xd7ff && chr < 0xe000) ||
109
- chr > 0xfffd)
110
- code = CODE_INVALID;
111
- break;
112
- case 4:
113
- if (chr < 0x10000 || chr > 0x10ffff)
114
- code = CODE_INVALID;
115
- break;
116
- default:
117
- break;
118
- }
119
- if (code == CODE_INVALID)
120
- break;
121
- }
122
- end = i;
123
- }
124
-
125
- if (end > start)
126
- gh_buf_put(ob, src + start, end - start);
127
-
128
- /* escaping */
129
- if (end >= size)
130
- break;
131
-
132
- gh_buf_puts(ob, LOOKUP_CODES[code]);
133
- }
134
-
135
- return 1;
136
- }
@@ -1,258 +0,0 @@
1
- struct html_ent {
2
- const char *entity;
3
- unsigned char utf8_len;
4
- unsigned char utf8[3];
5
- };
6
- %%
7
- "quot", 1, { 0x22 }
8
- "amp", 1, { 0x26 }
9
- "apos", 1, { 0x27 }
10
- "lt", 1, { 0x3C }
11
- "gt", 1, { 0x3E }
12
- "nbsp", 2, { 0xC2, 0xA0 }
13
- "iexcl", 2, { 0xC2, 0xA1 }
14
- "cent", 2, { 0xC2, 0xA2 }
15
- "pound", 2, { 0xC2, 0xA3 }
16
- "curren", 2, { 0xC2, 0xA4 }
17
- "yen", 2, { 0xC2, 0xA5 }
18
- "brvbar", 2, { 0xC2, 0xA6 }
19
- "sect", 2, { 0xC2, 0xA7 }
20
- "uml", 2, { 0xC2, 0xA8 }
21
- "copy", 2, { 0xC2, 0xA9 }
22
- "ordf", 2, { 0xC2, 0xAA }
23
- "laquo", 2, { 0xC2, 0xAB }
24
- "not", 2, { 0xC2, 0xAC }
25
- "shy", 2, { 0xC2, 0xAD }
26
- "reg", 2, { 0xC2, 0xAE }
27
- "macr", 2, { 0xC2, 0xAF }
28
- "deg", 2, { 0xC2, 0xB0 }
29
- "plusmn", 2, { 0xC2, 0xB1 }
30
- "sup2", 2, { 0xC2, 0xB2 }
31
- "sup3", 2, { 0xC2, 0xB3 }
32
- "acute", 2, { 0xC2, 0xB4 }
33
- "micro", 2, { 0xC2, 0xB5 }
34
- "para", 2, { 0xC2, 0xB6 }
35
- "middot", 2, { 0xC2, 0xB7 }
36
- "cedil", 2, { 0xC2, 0xB8 }
37
- "sup1", 2, { 0xC2, 0xB9 }
38
- "ordm", 2, { 0xC2, 0xBA }
39
- "raquo", 2, { 0xC2, 0xBB }
40
- "frac14", 2, { 0xC2, 0xBC }
41
- "frac12", 2, { 0xC2, 0xBD }
42
- "frac34", 2, { 0xC2, 0xBE }
43
- "iquest", 2, { 0xC2, 0xBF }
44
- "Agrave", 2, { 0xC3, 0x80 }
45
- "Aacute", 2, { 0xC3, 0x81 }
46
- "Acirc", 2, { 0xC3, 0x82 }
47
- "Atilde", 2, { 0xC3, 0x83 }
48
- "Auml", 2, { 0xC3, 0x84 }
49
- "Aring", 2, { 0xC3, 0x85 }
50
- "AElig", 2, { 0xC3, 0x86 }
51
- "Ccedil", 2, { 0xC3, 0x87 }
52
- "Egrave", 2, { 0xC3, 0x88 }
53
- "Eacute", 2, { 0xC3, 0x89 }
54
- "Ecirc", 2, { 0xC3, 0x8A }
55
- "Euml", 2, { 0xC3, 0x8B }
56
- "Igrave", 2, { 0xC3, 0x8C }
57
- "Iacute", 2, { 0xC3, 0x8D }
58
- "Icirc", 2, { 0xC3, 0x8E }
59
- "Iuml", 2, { 0xC3, 0x8F }
60
- "ETH", 2, { 0xC3, 0x90 }
61
- "Ntilde", 2, { 0xC3, 0x91 }
62
- "Ograve", 2, { 0xC3, 0x92 }
63
- "Oacute", 2, { 0xC3, 0x93 }
64
- "Ocirc", 2, { 0xC3, 0x94 }
65
- "Otilde", 2, { 0xC3, 0x95 }
66
- "Ouml", 2, { 0xC3, 0x96 }
67
- "times", 2, { 0xC3, 0x97 }
68
- "Oslash", 2, { 0xC3, 0x98 }
69
- "Ugrave", 2, { 0xC3, 0x99 }
70
- "Uacute", 2, { 0xC3, 0x9A }
71
- "Ucirc", 2, { 0xC3, 0x9B }
72
- "Uuml", 2, { 0xC3, 0x9C }
73
- "Yacute", 2, { 0xC3, 0x9D }
74
- "THORN", 2, { 0xC3, 0x9E }
75
- "szlig", 2, { 0xC3, 0x9F }
76
- "agrave", 2, { 0xC3, 0xA0 }
77
- "aacute", 2, { 0xC3, 0xA1 }
78
- "acirc", 2, { 0xC3, 0xA2 }
79
- "atilde", 2, { 0xC3, 0xA3 }
80
- "auml", 2, { 0xC3, 0xA4 }
81
- "aring", 2, { 0xC3, 0xA5 }
82
- "aelig", 2, { 0xC3, 0xA6 }
83
- "ccedil", 2, { 0xC3, 0xA7 }
84
- "egrave", 2, { 0xC3, 0xA8 }
85
- "eacute", 2, { 0xC3, 0xA9 }
86
- "ecirc", 2, { 0xC3, 0xAA }
87
- "euml", 2, { 0xC3, 0xAB }
88
- "igrave", 2, { 0xC3, 0xAC }
89
- "iacute", 2, { 0xC3, 0xAD }
90
- "icirc", 2, { 0xC3, 0xAE }
91
- "iuml", 2, { 0xC3, 0xAF }
92
- "eth", 2, { 0xC3, 0xB0 }
93
- "ntilde", 2, { 0xC3, 0xB1 }
94
- "ograve", 2, { 0xC3, 0xB2 }
95
- "oacute", 2, { 0xC3, 0xB3 }
96
- "ocirc", 2, { 0xC3, 0xB4 }
97
- "otilde", 2, { 0xC3, 0xB5 }
98
- "ouml", 2, { 0xC3, 0xB6 }
99
- "divide", 2, { 0xC3, 0xB7 }
100
- "oslash", 2, { 0xC3, 0xB8 }
101
- "ugrave", 2, { 0xC3, 0xB9 }
102
- "uacute", 2, { 0xC3, 0xBA }
103
- "ucirc", 2, { 0xC3, 0xBB }
104
- "uuml", 2, { 0xC3, 0xBC }
105
- "yacute", 2, { 0xC3, 0xBD }
106
- "thorn", 2, { 0xC3, 0xBE }
107
- "yuml", 2, { 0xC3, 0xBF }
108
- "OElig", 2, { 0xC5, 0x92 }
109
- "oelig", 2, { 0xC5, 0x93 }
110
- "Scaron", 2, { 0xC5, 0xA0 }
111
- "scaron", 2, { 0xC5, 0xA1 }
112
- "Yuml", 2, { 0xC5, 0xB8 }
113
- "fnof", 2, { 0xC6, 0x92 }
114
- "circ", 2, { 0xCB, 0x86 }
115
- "tilde", 2, { 0xCB, 0x9C }
116
- "Alpha", 2, { 0xCE, 0x91 }
117
- "Beta", 2, { 0xCE, 0x92 }
118
- "Gamma", 2, { 0xCE, 0x93 }
119
- "Delta", 2, { 0xCE, 0x94 }
120
- "Epsilon", 2, { 0xCE, 0x95 }
121
- "Zeta", 2, { 0xCE, 0x96 }
122
- "Eta", 2, { 0xCE, 0x97 }
123
- "Theta", 2, { 0xCE, 0x98 }
124
- "Iota", 2, { 0xCE, 0x99 }
125
- "Kappa", 2, { 0xCE, 0x9A }
126
- "Lambda", 2, { 0xCE, 0x9B }
127
- "Mu", 2, { 0xCE, 0x9C }
128
- "Nu", 2, { 0xCE, 0x9D }
129
- "Xi", 2, { 0xCE, 0x9E }
130
- "Omicron", 2, { 0xCE, 0x9F }
131
- "Pi", 2, { 0xCE, 0xA0 }
132
- "Rho", 2, { 0xCE, 0xA1 }
133
- "Sigma", 2, { 0xCE, 0xA3 }
134
- "Tau", 2, { 0xCE, 0xA4 }
135
- "Upsilon", 2, { 0xCE, 0xA5 }
136
- "Phi", 2, { 0xCE, 0xA6 }
137
- "Chi", 2, { 0xCE, 0xA7 }
138
- "Psi", 2, { 0xCE, 0xA8 }
139
- "Omega", 2, { 0xCE, 0xA9 }
140
- "alpha", 2, { 0xCE, 0xB1 }
141
- "beta", 2, { 0xCE, 0xB2 }
142
- "gamma", 2, { 0xCE, 0xB3 }
143
- "delta", 2, { 0xCE, 0xB4 }
144
- "epsilon", 2, { 0xCE, 0xB5 }
145
- "zeta", 2, { 0xCE, 0xB6 }
146
- "eta", 2, { 0xCE, 0xB7 }
147
- "theta", 2, { 0xCE, 0xB8 }
148
- "iota", 2, { 0xCE, 0xB9 }
149
- "kappa", 2, { 0xCE, 0xBA }
150
- "lambda", 2, { 0xCE, 0xBB }
151
- "mu", 2, { 0xCE, 0xBC }
152
- "nu", 2, { 0xCE, 0xBD }
153
- "xi", 2, { 0xCE, 0xBE }
154
- "omicron", 2, { 0xCE, 0xBF }
155
- "pi", 2, { 0xCF, 0x80 }
156
- "rho", 2, { 0xCF, 0x81 }
157
- "sigmaf", 2, { 0xCF, 0x82 }
158
- "sigma", 2, { 0xCF, 0x83 }
159
- "tau", 2, { 0xCF, 0x84 }
160
- "upsilon", 2, { 0xCF, 0x85 }
161
- "phi", 2, { 0xCF, 0x86 }
162
- "chi", 2, { 0xCF, 0x87 }
163
- "psi", 2, { 0xCF, 0x88 }
164
- "omega", 2, { 0xCF, 0x89 }
165
- "thetasym", 2, { 0xCF, 0x91 }
166
- "piv", 2, { 0xCF, 0x96 }
167
- "ensp", 3, { 0xE2, 0x80, 0x82 }
168
- "emsp", 3, { 0xE2, 0x80, 0x83 }
169
- "thinsp", 3, { 0xE2, 0x80, 0x89 }
170
- "zwnj", 3, { 0xE2, 0x80, 0x8C }
171
- "zwj", 3, { 0xE2, 0x80, 0x8D }
172
- "lrm", 3, { 0xE2, 0x80, 0x8E }
173
- "rlm", 3, { 0xE2, 0x80, 0x8F }
174
- "ndash", 3, { 0xE2, 0x80, 0x93 }
175
- "mdash", 3, { 0xE2, 0x80, 0x94 }
176
- "lsquo", 3, { 0xE2, 0x80, 0x98 }
177
- "rsquo", 3, { 0xE2, 0x80, 0x99 }
178
- "sbquo", 3, { 0xE2, 0x80, 0x9A }
179
- "ldquo", 3, { 0xE2, 0x80, 0x9C }
180
- "rdquo", 3, { 0xE2, 0x80, 0x9D }
181
- "bdquo", 3, { 0xE2, 0x80, 0x9E }
182
- "dagger", 3, { 0xE2, 0x80, 0xA0 }
183
- "Dagger", 3, { 0xE2, 0x80, 0xA1 }
184
- "bull", 3, { 0xE2, 0x80, 0xA2 }
185
- "hellip", 3, { 0xE2, 0x80, 0xA6 }
186
- "permil", 3, { 0xE2, 0x80, 0xB0 }
187
- "prime", 3, { 0xE2, 0x80, 0xB2 }
188
- "Prime", 3, { 0xE2, 0x80, 0xB3 }
189
- "lsaquo", 3, { 0xE2, 0x80, 0xB9 }
190
- "rsaquo", 3, { 0xE2, 0x80, 0xBA }
191
- "oline", 3, { 0xE2, 0x80, 0xBE }
192
- "frasl", 3, { 0xE2, 0x81, 0x84 }
193
- "euro", 3, { 0xE2, 0x82, 0xAC }
194
- "image", 3, { 0xE2, 0x84, 0x91 }
195
- "weierp", 3, { 0xE2, 0x84, 0x98 }
196
- "real", 3, { 0xE2, 0x84, 0x9C }
197
- "trade", 3, { 0xE2, 0x84, 0xA2 }
198
- "alefsym", 3, { 0xE2, 0x84, 0xB5 }
199
- "larr", 3, { 0xE2, 0x86, 0x90 }
200
- "uarr", 3, { 0xE2, 0x86, 0x91 }
201
- "rarr", 3, { 0xE2, 0x86, 0x92 }
202
- "darr", 3, { 0xE2, 0x86, 0x93 }
203
- "harr", 3, { 0xE2, 0x86, 0x94 }
204
- "crarr", 3, { 0xE2, 0x86, 0xB5 }
205
- "lArr", 3, { 0xE2, 0x87, 0x90 }
206
- "uArr", 3, { 0xE2, 0x87, 0x91 }
207
- "rArr", 3, { 0xE2, 0x87, 0x92 }
208
- "dArr", 3, { 0xE2, 0x87, 0x93 }
209
- "hArr", 3, { 0xE2, 0x87, 0x94 }
210
- "forall", 3, { 0xE2, 0x88, 0x80 }
211
- "part", 3, { 0xE2, 0x88, 0x82 }
212
- "exist", 3, { 0xE2, 0x88, 0x83 }
213
- "empty", 3, { 0xE2, 0x88, 0x85 }
214
- "nabla", 3, { 0xE2, 0x88, 0x87 }
215
- "isin", 3, { 0xE2, 0x88, 0x88 }
216
- "notin", 3, { 0xE2, 0x88, 0x89 }
217
- "ni", 3, { 0xE2, 0x88, 0x8B }
218
- "prod", 3, { 0xE2, 0x88, 0x8F }
219
- "sum", 3, { 0xE2, 0x88, 0x91 }
220
- "minus", 3, { 0xE2, 0x88, 0x92 }
221
- "lowast", 3, { 0xE2, 0x88, 0x97 }
222
- "radic", 3, { 0xE2, 0x88, 0x9A }
223
- "prop", 3, { 0xE2, 0x88, 0x9D }
224
- "infin", 3, { 0xE2, 0x88, 0x9E }
225
- "ang", 3, { 0xE2, 0x88, 0xA0 }
226
- "and", 3, { 0xE2, 0x88, 0xA7 }
227
- "or", 3, { 0xE2, 0x88, 0xA8 }
228
- "cap", 3, { 0xE2, 0x88, 0xA9 }
229
- "cup", 3, { 0xE2, 0x88, 0xAA }
230
- "int", 3, { 0xE2, 0x88, 0xAB }
231
- "there4", 3, { 0xE2, 0x88, 0xB4 }
232
- "sim", 3, { 0xE2, 0x88, 0xBC }
233
- "cong", 3, { 0xE2, 0x89, 0x85 }
234
- "asymp", 3, { 0xE2, 0x89, 0x88 }
235
- "ne", 3, { 0xE2, 0x89, 0xA0 }
236
- "equiv", 3, { 0xE2, 0x89, 0xA1 }
237
- "le", 3, { 0xE2, 0x89, 0xA4 }
238
- "ge", 3, { 0xE2, 0x89, 0xA5 }
239
- "sub", 3, { 0xE2, 0x8A, 0x82 }
240
- "sup", 3, { 0xE2, 0x8A, 0x83 }
241
- "nsub", 3, { 0xE2, 0x8A, 0x84 }
242
- "sube", 3, { 0xE2, 0x8A, 0x86 }
243
- "supe", 3, { 0xE2, 0x8A, 0x87 }
244
- "oplus", 3, { 0xE2, 0x8A, 0x95 }
245
- "otimes", 3, { 0xE2, 0x8A, 0x97 }
246
- "perp", 3, { 0xE2, 0x8A, 0xA5 }
247
- "sdot", 3, { 0xE2, 0x8B, 0x85 }
248
- "lceil", 3, { 0xE2, 0x8C, 0x88 }
249
- "rceil", 3, { 0xE2, 0x8C, 0x89 }
250
- "lfloor", 3, { 0xE2, 0x8C, 0x8A }
251
- "rfloor", 3, { 0xE2, 0x8C, 0x8B }
252
- "lang", 3, { 0xE2, 0x9F, 0xA8 }
253
- "rang", 3, { 0xE2, 0x9F, 0xA9 }
254
- "loz", 3, { 0xE2, 0x97, 0x8A }
255
- "spades", 3, { 0xE2, 0x99, 0xA0 }
256
- "clubs", 3, { 0xE2, 0x99, 0xA3 }
257
- "hearts", 3, { 0xE2, 0x99, 0xA5 }
258
- "diams", 3, { 0xE2, 0x99, 0xA6 }