escape_utils 0.2.3 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. data/.gitignore +2 -1
  2. data/CHANGELOG.md +4 -0
  3. data/README.md +206 -0
  4. data/benchmark/html_escape.rb +1 -0
  5. data/benchmark/html_unescape.rb +1 -0
  6. data/benchmark/javascript_escape.rb +1 -0
  7. data/benchmark/javascript_unescape.rb +1 -0
  8. data/benchmark/url_escape.rb +1 -0
  9. data/benchmark/url_unescape.rb +1 -0
  10. data/escape_utils.gemspec +0 -3
  11. data/ext/escape_utils/buffer.c +228 -0
  12. data/ext/escape_utils/buffer.h +91 -0
  13. data/ext/escape_utils/escape_utils.c +111 -531
  14. data/ext/escape_utils/houdini.h +15 -0
  15. data/ext/escape_utils/houdini_html.c +214 -0
  16. data/ext/escape_utils/houdini_js.c +148 -0
  17. data/ext/escape_utils/houdini_uri.c +130 -0
  18. data/ext/escape_utils/html_unescape.h +754 -0
  19. data/ext/escape_utils/uri_escape.h +35 -0
  20. data/lib/escape_utils.rb +2 -2
  21. data/lib/escape_utils/html/cgi.rb +0 -2
  22. data/lib/escape_utils/html/erb.rb +0 -2
  23. data/lib/escape_utils/html/haml.rb +0 -2
  24. data/lib/escape_utils/html/rack.rb +0 -2
  25. data/lib/escape_utils/html_safety.rb +0 -2
  26. data/lib/escape_utils/javascript/action_view.rb +0 -2
  27. data/lib/escape_utils/url/cgi.rb +0 -2
  28. data/lib/escape_utils/url/erb.rb +0 -2
  29. data/lib/escape_utils/url/rack.rb +0 -2
  30. data/lib/escape_utils/url/uri.rb +0 -2
  31. data/lib/escape_utils/version.rb +1 -1
  32. data/spec/html/escape_spec.rb +0 -1
  33. data/spec/html/unescape_spec.rb +0 -1
  34. data/spec/html_safety_spec.rb +0 -1
  35. data/spec/javascript/escape_spec.rb +0 -1
  36. data/spec/javascript/unescape_spec.rb +0 -1
  37. data/spec/query/escape_spec.rb +0 -1
  38. data/spec/query/unescape_spec.rb +1 -0
  39. data/spec/spec_helper.rb +0 -1
  40. data/spec/uri/escape_spec.rb +0 -1
  41. data/spec/uri/unescape_spec.rb +1 -0
  42. data/spec/url/escape_spec.rb +0 -1
  43. data/spec/url/unescape_spec.rb +1 -0
  44. metadata +16 -8
  45. data/README.rdoc +0 -146
@@ -0,0 +1,15 @@
1
+ #ifndef __HOUDINI_H__
2
+ #define __HOUDINI_H__
3
+
4
+ #include "buffer.h"
5
+
6
+ extern void houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size, int secure);
7
+ extern void houdini_unescape_html(struct buf *ob, const uint8_t *src, size_t size);
8
+ extern void houdini_escape_uri(struct buf *ob, const uint8_t *src, size_t size);
9
+ extern void houdini_escape_url(struct buf *ob, const uint8_t *src, size_t size);
10
+ extern void houdini_unescape_uri(struct buf *ob, const uint8_t *src, size_t size);
11
+ extern void houdini_unescape_url(struct buf *ob, const uint8_t *src, size_t size);
12
+ extern void houdini_escape_js(struct buf *ob, const uint8_t *src, size_t size);
13
+ extern void houdini_unescape_js(struct buf *ob, const uint8_t *src, size_t size);
14
+
15
+ #endif
@@ -0,0 +1,214 @@
1
+ #include <assert.h>
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+
5
+ #include "houdini.h"
6
+ #include "html_unescape.h"
7
+
8
+ #define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10) /* this is very scientific, yes */
9
+ #define UNESCAPE_GROW_FACTOR(x) (x) /* unescaping shouldn't grow our buffer */
10
+
11
+ /* Helper _isdigit methods -- do not trust the current locale */
12
+ int _isxdigit(int c)
13
+ {
14
+ return strchr("0123456789ABCDEFabcdef", c) != NULL;
15
+ }
16
+
17
+ int _isdigit(int c)
18
+ {
19
+ return (c >= '0' && c <= '9');
20
+ }
21
+
22
+
23
+ /**
24
+ * According to the OWASP rules:
25
+ *
26
+ * & --> &amp;
27
+ * < --> &lt;
28
+ * > --> &gt;
29
+ * " --> &quot;
30
+ * ' --> &#x27; &apos; is not recommended
31
+ * / --> &#x2F; forward slash is included as it helps end an HTML entity
32
+ *
33
+ */
34
+ static const char HTML_ESCAPE_TABLE[] = {
35
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
36
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
37
+ 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
38
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0,
39
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
40
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
41
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
42
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
43
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
45
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
46
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
51
+ };
52
+
53
+ static const char *HTML_ESCAPES[] = {
54
+ "",
55
+ "&quot;",
56
+ "&amp;",
57
+ "&#39;",
58
+ "&#47;",
59
+ "&lt;",
60
+ "&gt;"
61
+ };
62
+
63
+ void
64
+ houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size, int secure)
65
+ {
66
+ size_t i = 0, org, esc;
67
+
68
+ bufgrow(ob, ESCAPE_GROW_FACTOR(size));
69
+
70
+ while (i < size) {
71
+ org = i;
72
+ while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
73
+ i++;
74
+
75
+ if (i > org)
76
+ bufput(ob, src + org, i - org);
77
+
78
+ /* escaping */
79
+ if (i >= size)
80
+ break;
81
+
82
+ /* The forward slash is only escaped in secure mode */
83
+ if (src[i] == '/' && !secure) {
84
+ bufputc(ob, '/');
85
+ } else {
86
+ bufputs(ob, HTML_ESCAPES[esc]);
87
+ }
88
+
89
+ i++;
90
+ }
91
+ }
92
+
93
+ static inline void
94
+ bufput_utf8(struct buf *ob, int c)
95
+ {
96
+ unsigned char unichar[4];
97
+
98
+ if (c < 0x80) {
99
+ bufputc(ob, c);
100
+ }
101
+ else if (c < 0x800) {
102
+ unichar[0] = 192 + (c / 64);
103
+ unichar[1] = 128 + (c % 64);
104
+ bufput(ob, unichar, 2);
105
+ }
106
+ else if (c - 0xd800u < 0x800) {
107
+ bufputc(ob, '?');
108
+ }
109
+ else if (c < 0x10000) {
110
+ unichar[0] = 224 + (c / 4096);
111
+ unichar[1] = 128 + (c / 64) % 64;
112
+ unichar[2] = 128 + (c % 64);
113
+ bufput(ob, unichar, 3);
114
+ }
115
+ else if (c < 0x110000) {
116
+ unichar[0] = 240 + (c / 262144);
117
+ unichar[1] = 128 + (c / 4096) % 64;
118
+ unichar[2] = 128 + (c / 64) % 64;
119
+ unichar[3] = 128 + (c % 64);
120
+ bufput(ob, unichar, 4);
121
+ }
122
+ else {
123
+ bufputc(ob, '?');
124
+ }
125
+ }
126
+
127
+ static size_t
128
+ unescape_ent(struct buf *ob, const uint8_t *src, size_t size)
129
+ {
130
+ size_t i = 0;
131
+
132
+ if (size > 3 && src[0] == '#') {
133
+ int codepoint = 0;
134
+
135
+ if (_isdigit(src[1])) {
136
+ for (i = 1; i < size && _isdigit(src[i]); ++i)
137
+ codepoint = (codepoint * 10) + (src[i] - '0');
138
+ }
139
+
140
+ else if (src[1] == 'x' || src[1] == 'X') {
141
+ for (i = 2; i < size && _isxdigit(src[i]); ++i)
142
+ codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
143
+ }
144
+
145
+ if (i < size && src[i] == ';') {
146
+ bufput_utf8(ob, codepoint);
147
+ return i + 1;
148
+ }
149
+ }
150
+
151
+ else {
152
+ if (size > MAX_WORD_LENGTH)
153
+ size = MAX_WORD_LENGTH;
154
+
155
+ for (i = MIN_WORD_LENGTH; i < size; ++i) {
156
+ if (src[i] == ' ')
157
+ break;
158
+
159
+ if (src[i] == ';') {
160
+ const struct html_ent *entity = find_entity((char *)src, i);
161
+
162
+ if (entity != NULL) {
163
+ bufput(ob, entity->utf8, entity->utf8_len);
164
+ return i + 1;
165
+ }
166
+
167
+ break;
168
+ }
169
+ }
170
+ }
171
+
172
+ bufputc(ob, '&');
173
+ return 0;
174
+ }
175
+
176
+ void
177
+ houdini_unescape_html(struct buf *ob, const uint8_t *src, size_t size)
178
+ {
179
+ size_t i = 0, org;
180
+
181
+ bufgrow(ob, UNESCAPE_GROW_FACTOR(size));
182
+
183
+ while (i < size) {
184
+ org = i;
185
+ while (i < size && src[i] != '&')
186
+ i++;
187
+
188
+ if (i > org)
189
+ bufput(ob, src + org, i - org);
190
+
191
+ /* escaping */
192
+ if (i >= size)
193
+ break;
194
+
195
+ i++;
196
+ i += unescape_ent(ob, src + i, size - i);
197
+ }
198
+ }
199
+
200
+ #ifdef TEST
201
+
202
+ int main()
203
+ {
204
+ const char TEST_STRING[] = "This &#x2663; is & just &quot;an example&diams;&quot;";
205
+ struct buf *buffer;
206
+
207
+ buffer = bufnew(128);
208
+ houdini_unescape_html(buffer, TEST_STRING, strlen(TEST_STRING));
209
+ printf("Result: %.*s\n", (int)buffer->size, buffer->data);
210
+ bufrelease(buffer);
211
+ return 0;
212
+ }
213
+ #endif
214
+
@@ -0,0 +1,148 @@
1
+ #include <assert.h>
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+
5
+ #include "houdini.h"
6
+
7
+ #define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10)
8
+ #define UNESCAPE_GROW_FACTOR(x) (x)
9
+
10
+ void
11
+ houdini_unescape_js(struct buf *ob, const uint8_t *src, size_t size)
12
+ {
13
+ size_t i = 0, org, ch;
14
+
15
+ bufgrow(ob, UNESCAPE_GROW_FACTOR(size));
16
+
17
+ while (i < size) {
18
+ org = i;
19
+ while (i < size && src[i] != '\\')
20
+ i++;
21
+
22
+ if (i > org)
23
+ bufput(ob, src + org, i - org);
24
+
25
+ /* escaping */
26
+ if (i == size)
27
+ break;
28
+
29
+ if (++i == size) {
30
+ bufputc(ob, '\\');
31
+ break;
32
+ }
33
+
34
+ ch = src[i];
35
+
36
+ switch (ch) {
37
+ case 'n':
38
+ ch = '\n';
39
+ /* pass through */
40
+
41
+ case '\\':
42
+ case '\'':
43
+ case '\"':
44
+ case '/':
45
+ bufputc(ob, ch);
46
+ i++;
47
+ break;
48
+
49
+ default:
50
+ bufputc(ob, '\\');
51
+ break;
52
+ }
53
+ }
54
+ }
55
+
56
+ static const char JS_ESCAPE[] = {
57
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
58
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
59
+ 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
60
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
61
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
62
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
63
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
64
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
65
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
66
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
67
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
68
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
69
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
70
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
71
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
72
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
73
+ };
74
+
75
+ void
76
+ houdini_escape_js(struct buf *ob, const uint8_t *src, size_t size)
77
+ {
78
+ size_t i = 0, org, ch;
79
+
80
+ bufgrow(ob, ESCAPE_GROW_FACTOR(size));
81
+
82
+ while (i < size) {
83
+ org = i;
84
+ while (i < size && JS_ESCAPE[src[i]] == 0)
85
+ i++;
86
+
87
+ if (i > org)
88
+ bufput(ob, src + org, i - org);
89
+
90
+ /* escaping */
91
+ if (i >= size)
92
+ break;
93
+
94
+ ch = src[i];
95
+
96
+ switch (ch) {
97
+ case '/':
98
+ /*
99
+ * Escape only if preceded by a lt
100
+ */
101
+ if (i && src[i - 1] == '<')
102
+ bufputc(ob, '\\');
103
+
104
+ bufputc(ob, ch);
105
+ break;
106
+
107
+ case '\r':
108
+ /*
109
+ * Escape as \n, and skip the next \n if it's there
110
+ */
111
+ if (i + 1 < size && src[i + 1] == '\n') i++;
112
+
113
+ case '\n':
114
+ /*
115
+ * Escape actually as '\','n', not as '\', '\n'
116
+ */
117
+ ch = 'n';
118
+
119
+ default:
120
+ /*
121
+ * Normal escaping
122
+ */
123
+ bufputc(ob, '\\');
124
+ bufputc(ob, ch);
125
+ break;
126
+ }
127
+
128
+ i++;
129
+ }
130
+ }
131
+
132
+
133
+ //#define TEST
134
+ #ifdef TEST
135
+
136
+ int main()
137
+ {
138
+ const char TEST_STRING[] = "http% this \200 is a test";
139
+ struct buf *buffer;
140
+
141
+ buffer = bufnew(128);
142
+ houdini_escape_uri(buffer, TEST_STRING, strlen(TEST_STRING));
143
+ printf("Result: %.*s\n", (int)buffer->size, buffer->data);
144
+ bufrelease(buffer);
145
+ return 0;
146
+ }
147
+ #endif
148
+
@@ -0,0 +1,130 @@
1
+ #include <assert.h>
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+
5
+ #include "houdini.h"
6
+ #include "uri_escape.h"
7
+
8
+ #define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10)
9
+ #define UNESCAPE_GROW_FACTOR(x) (x)
10
+
11
+ extern int _isxdigit(int c);
12
+
13
+ static void
14
+ escape(struct buf *ob, const uint8_t *src, size_t size, int is_url)
15
+ {
16
+ static const char hex_chars[] = "0123456789ABCDEF";
17
+ const char *safe_table = is_url ? URL_SAFE : URI_SAFE;
18
+
19
+ size_t i = 0, org;
20
+ char hex_str[3];
21
+
22
+ bufgrow(ob, ESCAPE_GROW_FACTOR(size));
23
+ hex_str[0] = '%';
24
+
25
+ while (i < size) {
26
+ org = i;
27
+ while (i < size && safe_table[src[i]] != 0)
28
+ i++;
29
+
30
+ if (i > org)
31
+ bufput(ob, src + org, i - org);
32
+
33
+ /* escaping */
34
+ if (i >= size)
35
+ break;
36
+
37
+ if (src[i] == ' ' && is_url) {
38
+ bufputc(ob, '+');
39
+ } else {
40
+ hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
41
+ hex_str[2] = hex_chars[src[i] & 0xF];
42
+ bufput(ob, hex_str, 3);
43
+ }
44
+
45
+ i++;
46
+ }
47
+ }
48
+
49
+ #define hex2c(c) ((c | 32) % 39 - 9)
50
+
51
+ static void
52
+ unescape(struct buf *ob, const uint8_t *src, size_t size, int is_url)
53
+ {
54
+ size_t i = 0, org;
55
+
56
+ bufgrow(ob, UNESCAPE_GROW_FACTOR(size));
57
+
58
+ while (i < size) {
59
+ org = i;
60
+ while (i < size && src[i] != '%')
61
+ i++;
62
+
63
+ if (i > org)
64
+ bufput(ob, src + org, i - org);
65
+
66
+ /* escaping */
67
+ if (i >= size)
68
+ break;
69
+
70
+ i++;
71
+
72
+ if (i + 1 < size && _isxdigit(src[i]) && _isxdigit(src[i + 1])) {
73
+ unsigned char new_char = (hex2c(src[i]) << 4) + hex2c(src[i + 1]);
74
+ bufputc(ob, new_char);
75
+ i += 2;
76
+ } else {
77
+ bufputc(ob, '%');
78
+ }
79
+ }
80
+
81
+ if (is_url) {
82
+ char *find = (char *)bufcstr(ob);
83
+ while ((find = strchr(find, '+')) != NULL)
84
+ *find = ' ';
85
+ }
86
+ }
87
+
88
+
89
+ void
90
+ houdini_escape_uri(struct buf *ob, const uint8_t *src, size_t size)
91
+ {
92
+ return escape(ob, src, size, 0);
93
+ }
94
+
95
+ void
96
+ houdini_escape_url(struct buf *ob, const uint8_t *src, size_t size)
97
+ {
98
+ return escape(ob, src, size, 1);
99
+ }
100
+
101
+ void
102
+ houdini_unescape_uri(struct buf *ob, const uint8_t *src, size_t size)
103
+ {
104
+ return unescape(ob, src, size, 0);
105
+ }
106
+
107
+ void
108
+ houdini_unescape_url(struct buf *ob, const uint8_t *src, size_t size)
109
+ {
110
+ return unescape(ob, src, size, 1);
111
+ }
112
+
113
+
114
+
115
+ //#define TEST
116
+ #ifdef TEST
117
+
118
+ int main()
119
+ {
120
+ const char TEST_STRING[] = "http% this \200 is a test";
121
+ struct buf *buffer;
122
+
123
+ buffer = bufnew(128);
124
+ houdini_escape_uri(buffer, TEST_STRING, strlen(TEST_STRING));
125
+ printf("Result: %.*s\n", (int)buffer->size, buffer->data);
126
+ bufrelease(buffer);
127
+ return 0;
128
+ }
129
+ #endif
130
+