walters 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,115 @@
1
+ #include <assert.h>
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+
5
+ #include "houdini.h"
6
+
7
+ /*
8
+ * The following characters will not be escaped:
9
+ *
10
+ * -_.+!*'(),%#@?=;:/,+&$ alphanum
11
+ *
12
+ * Note that this character set is the addition of:
13
+ *
14
+ * - The characters which are safe to be in an URL
15
+ * - The characters which are *not* safe to be in
16
+ * an URL because they are RESERVED characters.
17
+ *
18
+ * We asume (lazily) that any RESERVED char that
19
+ * appears inside an URL is actually meant to
20
+ * have its native function (i.e. as an URL
21
+ * component/separator) and hence needs no escaping.
22
+ *
23
+ * There are two exceptions: the chacters & (amp)
24
+ * and ' (single quote) do not appear in the table.
25
+ * They are meant to appear in the URL as components,
26
+ * yet they require special HTML-entity escaping
27
+ * to generate valid HTML markup.
28
+ *
29
+ * All other characters will be escaped to %XX.
30
+ *
31
+ */
32
+ static const char HREF_SAFE[] = {
33
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
34
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
35
+ 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
36
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
37
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
39
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
41
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
42
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
43
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
45
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
46
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49
+ };
50
+
51
+ int
52
+ houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size)
53
+ {
54
+ static const uint8_t hex_chars[] = "0123456789ABCDEF";
55
+ size_t i = 0, org;
56
+ uint8_t hex_str[3];
57
+
58
+ hex_str[0] = '%';
59
+
60
+ while (i < size) {
61
+ org = i;
62
+ while (i < size && HREF_SAFE[src[i]] != 0)
63
+ i++;
64
+
65
+ if (likely(i > org)) {
66
+ if (unlikely(org == 0)) {
67
+ if (i >= size)
68
+ return 0;
69
+
70
+ gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size));
71
+ }
72
+
73
+ gh_buf_put(ob, src + org, i - org);
74
+ }
75
+
76
+ /* escaping */
77
+ if (i >= size)
78
+ break;
79
+
80
+ switch (src[i]) {
81
+ /* amp appears all the time in URLs, but needs
82
+ * HTML-entity escaping to be inside an href */
83
+ case '&':
84
+ gh_buf_PUTS(ob, "&amp;");
85
+ break;
86
+
87
+ /* the single quote is a valid URL character
88
+ * according to the standard; it needs HTML
89
+ * entity escaping too */
90
+ case '\'':
91
+ gh_buf_PUTS(ob, "&#x27;");
92
+ break;
93
+
94
+ /* the space can be escaped to %20 or a plus
95
+ * sign. we're going with the generic escape
96
+ * for now. the plus thing is more commonly seen
97
+ * when building GET strings */
98
+ #if 0
99
+ case ' ':
100
+ gh_buf_putc(ob, '+');
101
+ break;
102
+ #endif
103
+
104
+ /* every other character goes with a %XX escaping */
105
+ default:
106
+ hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
107
+ hex_str[2] = hex_chars[src[i] & 0xF];
108
+ gh_buf_put(ob, hex_str, 3);
109
+ }
110
+
111
+ i++;
112
+ }
113
+
114
+ return 1;
115
+ }
@@ -0,0 +1,90 @@
1
+ #include <assert.h>
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+
5
+ #include "houdini.h"
6
+
7
+ /**
8
+ * According to the OWASP rules:
9
+ *
10
+ * & --> &amp;
11
+ * < --> &lt;
12
+ * > --> &gt;
13
+ * " --> &quot;
14
+ * ' --> &#x27; &apos; is not recommended
15
+ * / --> &#x2F; forward slash is included as it helps end an HTML entity
16
+ *
17
+ */
18
+ static const char HTML_ESCAPE_TABLE[] = {
19
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21
+ 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
22
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0,
23
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
24
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
25
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
26
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
27
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
28
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
29
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
30
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
31
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
32
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
33
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
34
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
35
+ };
36
+
37
+ static const char *HTML_ESCAPES[] = {
38
+ "",
39
+ "&quot;",
40
+ "&amp;",
41
+ "&#39;",
42
+ "&#47;",
43
+ "&lt;",
44
+ "&gt;"
45
+ };
46
+
47
+ int
48
+ houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure)
49
+ {
50
+ size_t i = 0, org, esc = 0;
51
+
52
+ while (i < size) {
53
+ org = i;
54
+ while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
55
+ i++;
56
+
57
+ if (i > org) {
58
+ if (unlikely(org == 0)) {
59
+ if (i >= size)
60
+ return 0;
61
+
62
+ gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size));
63
+ }
64
+
65
+ gh_buf_put(ob, src + org, i - org);
66
+ }
67
+
68
+ /* escaping */
69
+ if (unlikely(i >= size))
70
+ break;
71
+
72
+ /* The forward slash is only escaped in secure mode */
73
+ if (src[i] == '/' && !secure) {
74
+ gh_buf_putc(ob, '/');
75
+ } else {
76
+ gh_buf_puts(ob, HTML_ESCAPES[esc]);
77
+ }
78
+
79
+ i++;
80
+ }
81
+
82
+ return 1;
83
+ }
84
+
85
+ int
86
+ houdini_escape_html(gh_buf *ob, const uint8_t *src, size_t size)
87
+ {
88
+ return houdini_escape_html0(ob, src, size, 1);
89
+ }
90
+
@@ -0,0 +1,122 @@
1
+ #include <assert.h>
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+
5
+ #include "houdini.h"
6
+ #include "html_unescape.h"
7
+
8
+ static inline void
9
+ gh_buf_put_utf8(gh_buf *ob, int c)
10
+ {
11
+ unsigned char unichar[4];
12
+
13
+ if (c < 0x80) {
14
+ gh_buf_putc(ob, c);
15
+ }
16
+ else if (c < 0x800) {
17
+ unichar[0] = 192 + (c / 64);
18
+ unichar[1] = 128 + (c % 64);
19
+ gh_buf_put(ob, unichar, 2);
20
+ }
21
+ else if (c - 0xd800u < 0x800) {
22
+ gh_buf_putc(ob, '?');
23
+ }
24
+ else if (c < 0x10000) {
25
+ unichar[0] = 224 + (c / 4096);
26
+ unichar[1] = 128 + (c / 64) % 64;
27
+ unichar[2] = 128 + (c % 64);
28
+ gh_buf_put(ob, unichar, 3);
29
+ }
30
+ else if (c < 0x110000) {
31
+ unichar[0] = 240 + (c / 262144);
32
+ unichar[1] = 128 + (c / 4096) % 64;
33
+ unichar[2] = 128 + (c / 64) % 64;
34
+ unichar[3] = 128 + (c % 64);
35
+ gh_buf_put(ob, unichar, 4);
36
+ }
37
+ else {
38
+ gh_buf_putc(ob, '?');
39
+ }
40
+ }
41
+
42
+ static size_t
43
+ unescape_ent(gh_buf *ob, const uint8_t *src, size_t size)
44
+ {
45
+ size_t i = 0;
46
+
47
+ if (size > 3 && src[0] == '#') {
48
+ int codepoint = 0;
49
+
50
+ if (_isdigit(src[1])) {
51
+ for (i = 1; i < size && _isdigit(src[i]); ++i)
52
+ codepoint = (codepoint * 10) + (src[i] - '0');
53
+ }
54
+
55
+ else if (src[1] == 'x' || src[1] == 'X') {
56
+ for (i = 2; i < size && _isxdigit(src[i]); ++i)
57
+ codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
58
+ }
59
+
60
+ if (i < size && src[i] == ';') {
61
+ gh_buf_put_utf8(ob, codepoint);
62
+ return i + 1;
63
+ }
64
+ }
65
+
66
+ else {
67
+ if (size > MAX_WORD_LENGTH)
68
+ size = MAX_WORD_LENGTH;
69
+
70
+ for (i = MIN_WORD_LENGTH; i < size; ++i) {
71
+ if (src[i] == ' ')
72
+ break;
73
+
74
+ if (src[i] == ';') {
75
+ const struct html_ent *entity = find_entity((char *)src, i);
76
+
77
+ if (entity != NULL) {
78
+ gh_buf_put(ob, entity->utf8, entity->utf8_len);
79
+ return i + 1;
80
+ }
81
+
82
+ break;
83
+ }
84
+ }
85
+ }
86
+
87
+ gh_buf_putc(ob, '&');
88
+ return 0;
89
+ }
90
+
91
+ int
92
+ houdini_unescape_html(gh_buf *ob, const uint8_t *src, size_t size)
93
+ {
94
+ size_t i = 0, org;
95
+
96
+ while (i < size) {
97
+ org = i;
98
+ while (i < size && src[i] != '&')
99
+ i++;
100
+
101
+ if (likely(i > org)) {
102
+ if (unlikely(org == 0)) {
103
+ if (i >= size)
104
+ return 0;
105
+
106
+ gh_buf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
107
+ }
108
+
109
+ gh_buf_put(ob, src + org, i - org);
110
+ }
111
+
112
+ /* escaping */
113
+ if (i >= size)
114
+ break;
115
+
116
+ i++;
117
+ i += unescape_ent(ob, src + i, size - i);
118
+ }
119
+
120
+ return 1;
121
+ }
122
+
@@ -0,0 +1,90 @@
1
+ #include <assert.h>
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+
5
+ #include "houdini.h"
6
+
7
+ static const char JS_ESCAPE[] = {
8
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
9
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10
+ 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
11
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
14
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
15
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
19
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
22
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
23
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
24
+ };
25
+
26
+ int
27
+ houdini_escape_js(gh_buf *ob, const uint8_t *src, size_t size)
28
+ {
29
+ size_t i = 0, org, ch;
30
+
31
+ while (i < size) {
32
+ org = i;
33
+ while (i < size && JS_ESCAPE[src[i]] == 0)
34
+ i++;
35
+
36
+ if (likely(i > org)) {
37
+ if (unlikely(org == 0)) {
38
+ if (i >= size)
39
+ return 0;
40
+
41
+ gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size));
42
+ }
43
+
44
+ gh_buf_put(ob, src + org, i - org);
45
+ }
46
+
47
+ /* escaping */
48
+ if (i >= size)
49
+ break;
50
+
51
+ ch = src[i];
52
+
53
+ switch (ch) {
54
+ case '/':
55
+ /*
56
+ * Escape only if preceded by a lt
57
+ */
58
+ if (i && src[i - 1] == '<')
59
+ gh_buf_putc(ob, '\\');
60
+
61
+ gh_buf_putc(ob, ch);
62
+ break;
63
+
64
+ case '\r':
65
+ /*
66
+ * Escape as \n, and skip the next \n if it's there
67
+ */
68
+ if (i + 1 < size && src[i + 1] == '\n') i++;
69
+
70
+ case '\n':
71
+ /*
72
+ * Escape actually as '\','n', not as '\', '\n'
73
+ */
74
+ ch = 'n';
75
+
76
+ default:
77
+ /*
78
+ * Normal escaping
79
+ */
80
+ gh_buf_putc(ob, '\\');
81
+ gh_buf_putc(ob, ch);
82
+ break;
83
+ }
84
+
85
+ i++;
86
+ }
87
+
88
+ return 1;
89
+ }
90
+
@@ -0,0 +1,60 @@
1
+ #include <assert.h>
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+
5
+ #include "houdini.h"
6
+
7
+ int
8
+ houdini_unescape_js(gh_buf *ob, const uint8_t *src, size_t size)
9
+ {
10
+ size_t i = 0, org, ch;
11
+
12
+ while (i < size) {
13
+ org = i;
14
+ while (i < size && src[i] != '\\')
15
+ i++;
16
+
17
+ if (likely(i > org)) {
18
+ if (unlikely(org == 0)) {
19
+ if (i >= size)
20
+ return 0;
21
+
22
+ gh_buf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
23
+ }
24
+
25
+ gh_buf_put(ob, src + org, i - org);
26
+ }
27
+
28
+ /* escaping */
29
+ if (i == size)
30
+ break;
31
+
32
+ if (++i == size) {
33
+ gh_buf_putc(ob, '\\');
34
+ break;
35
+ }
36
+
37
+ ch = src[i];
38
+
39
+ switch (ch) {
40
+ case 'n':
41
+ ch = '\n';
42
+ /* pass through */
43
+
44
+ case '\\':
45
+ case '\'':
46
+ case '\"':
47
+ case '/':
48
+ gh_buf_putc(ob, ch);
49
+ i++;
50
+ break;
51
+
52
+ default:
53
+ gh_buf_putc(ob, '\\');
54
+ break;
55
+ }
56
+ }
57
+
58
+ return 1;
59
+ }
60
+