walters 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,115 @@
1
+ #include <assert.h>
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+
5
+ #include "houdini.h"
6
+
7
+ /*
8
+ * The following characters will not be escaped:
9
+ *
10
+ * -_.+!*'(),%#@?=;:/,+&$ alphanum
11
+ *
12
+ * Note that this character set is the addition of:
13
+ *
14
+ * - The characters which are safe to be in an URL
15
+ * - The characters which are *not* safe to be in
16
+ * an URL because they are RESERVED characters.
17
+ *
18
+ * We asume (lazily) that any RESERVED char that
19
+ * appears inside an URL is actually meant to
20
+ * have its native function (i.e. as an URL
21
+ * component/separator) and hence needs no escaping.
22
+ *
23
+ * There are two exceptions: the chacters & (amp)
24
+ * and ' (single quote) do not appear in the table.
25
+ * They are meant to appear in the URL as components,
26
+ * yet they require special HTML-entity escaping
27
+ * to generate valid HTML markup.
28
+ *
29
+ * All other characters will be escaped to %XX.
30
+ *
31
+ */
32
+ static const char HREF_SAFE[] = {
33
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
34
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
35
+ 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
36
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
37
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
39
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
41
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
42
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
43
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
45
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
46
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49
+ };
50
+
51
+ int
52
+ houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size)
53
+ {
54
+ static const uint8_t hex_chars[] = "0123456789ABCDEF";
55
+ size_t i = 0, org;
56
+ uint8_t hex_str[3];
57
+
58
+ hex_str[0] = '%';
59
+
60
+ while (i < size) {
61
+ org = i;
62
+ while (i < size && HREF_SAFE[src[i]] != 0)
63
+ i++;
64
+
65
+ if (likely(i > org)) {
66
+ if (unlikely(org == 0)) {
67
+ if (i >= size)
68
+ return 0;
69
+
70
+ gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size));
71
+ }
72
+
73
+ gh_buf_put(ob, src + org, i - org);
74
+ }
75
+
76
+ /* escaping */
77
+ if (i >= size)
78
+ break;
79
+
80
+ switch (src[i]) {
81
+ /* amp appears all the time in URLs, but needs
82
+ * HTML-entity escaping to be inside an href */
83
+ case '&':
84
+ gh_buf_PUTS(ob, "&amp;");
85
+ break;
86
+
87
+ /* the single quote is a valid URL character
88
+ * according to the standard; it needs HTML
89
+ * entity escaping too */
90
+ case '\'':
91
+ gh_buf_PUTS(ob, "&#x27;");
92
+ break;
93
+
94
+ /* the space can be escaped to %20 or a plus
95
+ * sign. we're going with the generic escape
96
+ * for now. the plus thing is more commonly seen
97
+ * when building GET strings */
98
+ #if 0
99
+ case ' ':
100
+ gh_buf_putc(ob, '+');
101
+ break;
102
+ #endif
103
+
104
+ /* every other character goes with a %XX escaping */
105
+ default:
106
+ hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
107
+ hex_str[2] = hex_chars[src[i] & 0xF];
108
+ gh_buf_put(ob, hex_str, 3);
109
+ }
110
+
111
+ i++;
112
+ }
113
+
114
+ return 1;
115
+ }
@@ -0,0 +1,90 @@
1
+ #include <assert.h>
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+
5
+ #include "houdini.h"
6
+
7
+ /**
8
+ * According to the OWASP rules:
9
+ *
10
+ * & --> &amp;
11
+ * < --> &lt;
12
+ * > --> &gt;
13
+ * " --> &quot;
14
+ * ' --> &#x27; &apos; is not recommended
15
+ * / --> &#x2F; forward slash is included as it helps end an HTML entity
16
+ *
17
+ */
18
+ static const char HTML_ESCAPE_TABLE[] = {
19
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21
+ 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
22
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0,
23
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
24
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
25
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
26
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
27
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
28
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
29
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
30
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
31
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
32
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
33
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
34
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
35
+ };
36
+
37
+ static const char *HTML_ESCAPES[] = {
38
+ "",
39
+ "&quot;",
40
+ "&amp;",
41
+ "&#39;",
42
+ "&#47;",
43
+ "&lt;",
44
+ "&gt;"
45
+ };
46
+
47
+ int
48
+ houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure)
49
+ {
50
+ size_t i = 0, org, esc = 0;
51
+
52
+ while (i < size) {
53
+ org = i;
54
+ while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
55
+ i++;
56
+
57
+ if (i > org) {
58
+ if (unlikely(org == 0)) {
59
+ if (i >= size)
60
+ return 0;
61
+
62
+ gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size));
63
+ }
64
+
65
+ gh_buf_put(ob, src + org, i - org);
66
+ }
67
+
68
+ /* escaping */
69
+ if (unlikely(i >= size))
70
+ break;
71
+
72
+ /* The forward slash is only escaped in secure mode */
73
+ if (src[i] == '/' && !secure) {
74
+ gh_buf_putc(ob, '/');
75
+ } else {
76
+ gh_buf_puts(ob, HTML_ESCAPES[esc]);
77
+ }
78
+
79
+ i++;
80
+ }
81
+
82
+ return 1;
83
+ }
84
+
85
+ int
86
+ houdini_escape_html(gh_buf *ob, const uint8_t *src, size_t size)
87
+ {
88
+ return houdini_escape_html0(ob, src, size, 1);
89
+ }
90
+
@@ -0,0 +1,122 @@
1
+ #include <assert.h>
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+
5
+ #include "houdini.h"
6
+ #include "html_unescape.h"
7
+
8
+ static inline void
9
+ gh_buf_put_utf8(gh_buf *ob, int c)
10
+ {
11
+ unsigned char unichar[4];
12
+
13
+ if (c < 0x80) {
14
+ gh_buf_putc(ob, c);
15
+ }
16
+ else if (c < 0x800) {
17
+ unichar[0] = 192 + (c / 64);
18
+ unichar[1] = 128 + (c % 64);
19
+ gh_buf_put(ob, unichar, 2);
20
+ }
21
+ else if (c - 0xd800u < 0x800) {
22
+ gh_buf_putc(ob, '?');
23
+ }
24
+ else if (c < 0x10000) {
25
+ unichar[0] = 224 + (c / 4096);
26
+ unichar[1] = 128 + (c / 64) % 64;
27
+ unichar[2] = 128 + (c % 64);
28
+ gh_buf_put(ob, unichar, 3);
29
+ }
30
+ else if (c < 0x110000) {
31
+ unichar[0] = 240 + (c / 262144);
32
+ unichar[1] = 128 + (c / 4096) % 64;
33
+ unichar[2] = 128 + (c / 64) % 64;
34
+ unichar[3] = 128 + (c % 64);
35
+ gh_buf_put(ob, unichar, 4);
36
+ }
37
+ else {
38
+ gh_buf_putc(ob, '?');
39
+ }
40
+ }
41
+
42
+ static size_t
43
+ unescape_ent(gh_buf *ob, const uint8_t *src, size_t size)
44
+ {
45
+ size_t i = 0;
46
+
47
+ if (size > 3 && src[0] == '#') {
48
+ int codepoint = 0;
49
+
50
+ if (_isdigit(src[1])) {
51
+ for (i = 1; i < size && _isdigit(src[i]); ++i)
52
+ codepoint = (codepoint * 10) + (src[i] - '0');
53
+ }
54
+
55
+ else if (src[1] == 'x' || src[1] == 'X') {
56
+ for (i = 2; i < size && _isxdigit(src[i]); ++i)
57
+ codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
58
+ }
59
+
60
+ if (i < size && src[i] == ';') {
61
+ gh_buf_put_utf8(ob, codepoint);
62
+ return i + 1;
63
+ }
64
+ }
65
+
66
+ else {
67
+ if (size > MAX_WORD_LENGTH)
68
+ size = MAX_WORD_LENGTH;
69
+
70
+ for (i = MIN_WORD_LENGTH; i < size; ++i) {
71
+ if (src[i] == ' ')
72
+ break;
73
+
74
+ if (src[i] == ';') {
75
+ const struct html_ent *entity = find_entity((char *)src, i);
76
+
77
+ if (entity != NULL) {
78
+ gh_buf_put(ob, entity->utf8, entity->utf8_len);
79
+ return i + 1;
80
+ }
81
+
82
+ break;
83
+ }
84
+ }
85
+ }
86
+
87
+ gh_buf_putc(ob, '&');
88
+ return 0;
89
+ }
90
+
91
+ int
92
+ houdini_unescape_html(gh_buf *ob, const uint8_t *src, size_t size)
93
+ {
94
+ size_t i = 0, org;
95
+
96
+ while (i < size) {
97
+ org = i;
98
+ while (i < size && src[i] != '&')
99
+ i++;
100
+
101
+ if (likely(i > org)) {
102
+ if (unlikely(org == 0)) {
103
+ if (i >= size)
104
+ return 0;
105
+
106
+ gh_buf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
107
+ }
108
+
109
+ gh_buf_put(ob, src + org, i - org);
110
+ }
111
+
112
+ /* escaping */
113
+ if (i >= size)
114
+ break;
115
+
116
+ i++;
117
+ i += unescape_ent(ob, src + i, size - i);
118
+ }
119
+
120
+ return 1;
121
+ }
122
+
@@ -0,0 +1,90 @@
1
+ #include <assert.h>
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+
5
+ #include "houdini.h"
6
+
7
+ static const char JS_ESCAPE[] = {
8
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
9
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10
+ 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
11
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
14
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
15
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
19
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
22
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
23
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
24
+ };
25
+
26
+ int
27
+ houdini_escape_js(gh_buf *ob, const uint8_t *src, size_t size)
28
+ {
29
+ size_t i = 0, org, ch;
30
+
31
+ while (i < size) {
32
+ org = i;
33
+ while (i < size && JS_ESCAPE[src[i]] == 0)
34
+ i++;
35
+
36
+ if (likely(i > org)) {
37
+ if (unlikely(org == 0)) {
38
+ if (i >= size)
39
+ return 0;
40
+
41
+ gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size));
42
+ }
43
+
44
+ gh_buf_put(ob, src + org, i - org);
45
+ }
46
+
47
+ /* escaping */
48
+ if (i >= size)
49
+ break;
50
+
51
+ ch = src[i];
52
+
53
+ switch (ch) {
54
+ case '/':
55
+ /*
56
+ * Escape only if preceded by a lt
57
+ */
58
+ if (i && src[i - 1] == '<')
59
+ gh_buf_putc(ob, '\\');
60
+
61
+ gh_buf_putc(ob, ch);
62
+ break;
63
+
64
+ case '\r':
65
+ /*
66
+ * Escape as \n, and skip the next \n if it's there
67
+ */
68
+ if (i + 1 < size && src[i + 1] == '\n') i++;
69
+
70
+ case '\n':
71
+ /*
72
+ * Escape actually as '\','n', not as '\', '\n'
73
+ */
74
+ ch = 'n';
75
+
76
+ default:
77
+ /*
78
+ * Normal escaping
79
+ */
80
+ gh_buf_putc(ob, '\\');
81
+ gh_buf_putc(ob, ch);
82
+ break;
83
+ }
84
+
85
+ i++;
86
+ }
87
+
88
+ return 1;
89
+ }
90
+
@@ -0,0 +1,60 @@
1
+ #include <assert.h>
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+
5
+ #include "houdini.h"
6
+
7
+ int
8
+ houdini_unescape_js(gh_buf *ob, const uint8_t *src, size_t size)
9
+ {
10
+ size_t i = 0, org, ch;
11
+
12
+ while (i < size) {
13
+ org = i;
14
+ while (i < size && src[i] != '\\')
15
+ i++;
16
+
17
+ if (likely(i > org)) {
18
+ if (unlikely(org == 0)) {
19
+ if (i >= size)
20
+ return 0;
21
+
22
+ gh_buf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
23
+ }
24
+
25
+ gh_buf_put(ob, src + org, i - org);
26
+ }
27
+
28
+ /* escaping */
29
+ if (i == size)
30
+ break;
31
+
32
+ if (++i == size) {
33
+ gh_buf_putc(ob, '\\');
34
+ break;
35
+ }
36
+
37
+ ch = src[i];
38
+
39
+ switch (ch) {
40
+ case 'n':
41
+ ch = '\n';
42
+ /* pass through */
43
+
44
+ case '\\':
45
+ case '\'':
46
+ case '\"':
47
+ case '/':
48
+ gh_buf_putc(ob, ch);
49
+ i++;
50
+ break;
51
+
52
+ default:
53
+ gh_buf_putc(ob, '\\');
54
+ break;
55
+ }
56
+ }
57
+
58
+ return 1;
59
+ }
60
+