escape_utils 0.2.3 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +2 -1
- data/CHANGELOG.md +4 -0
- data/README.md +206 -0
- data/benchmark/html_escape.rb +1 -0
- data/benchmark/html_unescape.rb +1 -0
- data/benchmark/javascript_escape.rb +1 -0
- data/benchmark/javascript_unescape.rb +1 -0
- data/benchmark/url_escape.rb +1 -0
- data/benchmark/url_unescape.rb +1 -0
- data/escape_utils.gemspec +0 -3
- data/ext/escape_utils/buffer.c +228 -0
- data/ext/escape_utils/buffer.h +91 -0
- data/ext/escape_utils/escape_utils.c +111 -531
- data/ext/escape_utils/houdini.h +15 -0
- data/ext/escape_utils/houdini_html.c +214 -0
- data/ext/escape_utils/houdini_js.c +148 -0
- data/ext/escape_utils/houdini_uri.c +130 -0
- data/ext/escape_utils/html_unescape.h +754 -0
- data/ext/escape_utils/uri_escape.h +35 -0
- data/lib/escape_utils.rb +2 -2
- data/lib/escape_utils/html/cgi.rb +0 -2
- data/lib/escape_utils/html/erb.rb +0 -2
- data/lib/escape_utils/html/haml.rb +0 -2
- data/lib/escape_utils/html/rack.rb +0 -2
- data/lib/escape_utils/html_safety.rb +0 -2
- data/lib/escape_utils/javascript/action_view.rb +0 -2
- data/lib/escape_utils/url/cgi.rb +0 -2
- data/lib/escape_utils/url/erb.rb +0 -2
- data/lib/escape_utils/url/rack.rb +0 -2
- data/lib/escape_utils/url/uri.rb +0 -2
- data/lib/escape_utils/version.rb +1 -1
- data/spec/html/escape_spec.rb +0 -1
- data/spec/html/unescape_spec.rb +0 -1
- data/spec/html_safety_spec.rb +0 -1
- data/spec/javascript/escape_spec.rb +0 -1
- data/spec/javascript/unescape_spec.rb +0 -1
- data/spec/query/escape_spec.rb +0 -1
- data/spec/query/unescape_spec.rb +1 -0
- data/spec/spec_helper.rb +0 -1
- data/spec/uri/escape_spec.rb +0 -1
- data/spec/uri/unescape_spec.rb +1 -0
- data/spec/url/escape_spec.rb +0 -1
- data/spec/url/unescape_spec.rb +1 -0
- metadata +16 -8
- data/README.rdoc +0 -146
@@ -0,0 +1,91 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2008, Natacha Porté
|
3
|
+
* Copyright (c) 2011, Vicent Martí
|
4
|
+
*
|
5
|
+
* Permission to use, copy, modify, and distribute this software for any
|
6
|
+
* purpose with or without fee is hereby granted, provided that the above
|
7
|
+
* copyright notice and this permission notice appear in all copies.
|
8
|
+
*
|
9
|
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
10
|
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
11
|
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
12
|
+
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
13
|
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
14
|
+
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
15
|
+
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
16
|
+
*/
|
17
|
+
|
18
|
+
#ifndef __GEN_BUFFER_H__
|
19
|
+
#define __GEN_BUFFER_H__
|
20
|
+
|
21
|
+
#include <stddef.h>
|
22
|
+
#include <stdarg.h>
|
23
|
+
#include <stdint.h>
|
24
|
+
|
25
|
+
#if defined(_MSC_VER)
|
26
|
+
#define __attribute__(x)
|
27
|
+
#define inline
|
28
|
+
#endif
|
29
|
+
|
30
|
+
typedef enum {
|
31
|
+
BUF_OK = 0,
|
32
|
+
BUF_ENOMEM = -1,
|
33
|
+
} buferror_t;
|
34
|
+
|
35
|
+
/* struct buf: character array buffer */
|
36
|
+
struct buf {
|
37
|
+
uint8_t *data; /* actual character data */
|
38
|
+
size_t size; /* size of the string */
|
39
|
+
size_t asize; /* allocated size (0 = volatile buffer) */
|
40
|
+
size_t unit; /* reallocation unit size (0 = read-only buffer) */
|
41
|
+
};
|
42
|
+
|
43
|
+
/* CONST_BUF: global buffer from a string litteral */
|
44
|
+
#define BUF_STATIC(string) \
|
45
|
+
{ (uint8_t *)string, sizeof string -1, sizeof string, 0, 0 }
|
46
|
+
|
47
|
+
/* VOLATILE_BUF: macro for creating a volatile buffer on the stack */
|
48
|
+
#define BUF_VOLATILE(strname) \
|
49
|
+
{ (uint8_t *)strname, strlen(strname), 0, 0, 0 }
|
50
|
+
|
51
|
+
/* BUFPUTSL: optimized bufputs of a string litteral */
|
52
|
+
#define BUFPUTSL(output, literal) \
|
53
|
+
bufput(output, literal, sizeof literal - 1)
|
54
|
+
|
55
|
+
/* bufgrow: increasing the allocated size to the given value */
|
56
|
+
int bufgrow(struct buf *, size_t);
|
57
|
+
|
58
|
+
/* bufnew: allocation of a new buffer */
|
59
|
+
struct buf *bufnew(size_t) __attribute__ ((malloc));
|
60
|
+
|
61
|
+
/* bufnullterm: NUL-termination of the string array (making a C-string) */
|
62
|
+
const char *bufcstr(struct buf *);
|
63
|
+
|
64
|
+
/* bufprefix: compare the beginning of a buffer with a string */
|
65
|
+
int bufprefix(const struct buf *buf, const char *prefix);
|
66
|
+
|
67
|
+
/* bufput: appends raw data to a buffer */
|
68
|
+
void bufput(struct buf *, const void *, size_t);
|
69
|
+
|
70
|
+
/* bufputs: appends a NUL-terminated string to a buffer */
|
71
|
+
void bufputs(struct buf *, const char *);
|
72
|
+
|
73
|
+
/* bufputc: appends a single char to a buffer */
|
74
|
+
void bufputc(struct buf *, int);
|
75
|
+
|
76
|
+
/* bufrelease: decrease the reference count and free the buffer if needed */
|
77
|
+
void bufrelease(struct buf *);
|
78
|
+
|
79
|
+
/* bufreset: frees internal data of the buffer */
|
80
|
+
void bufreset(struct buf *);
|
81
|
+
|
82
|
+
/* bufslurp: removes a given number of bytes from the head of the array */
|
83
|
+
void bufslurp(struct buf *, size_t);
|
84
|
+
|
85
|
+
/* bufprintf: formatted printing to a buffer */
|
86
|
+
void bufprintf(struct buf *, const char *, ...) __attribute__ ((format (printf, 2, 3)));
|
87
|
+
|
88
|
+
/* vbufprintf: stdarg variant of formatted printing into a buffer */
|
89
|
+
void vbufprintf(struct buf *, const char * , va_list);
|
90
|
+
|
91
|
+
#endif
|
@@ -1,3 +1,8 @@
|
|
1
|
+
// tell rbx not to use it's caching compat layer
|
2
|
+
// by doing this we're making a promise to RBX that
|
3
|
+
// we'll never modify the pointers we get back from RSTRING_PTR
|
4
|
+
#define RSTRING_NOT_MODIFIED
|
5
|
+
|
1
6
|
#include <ruby.h>
|
2
7
|
#if RB_CVAR_SET_ARITY == 4
|
3
8
|
# define rb_cvar_set(a,b,c) rb_cvar_set(a,b,c,0)
|
@@ -6,582 +11,157 @@
|
|
6
11
|
#include <ruby/encoding.h>
|
7
12
|
#endif
|
8
13
|
|
9
|
-
|
10
|
-
static ID rb_html_secure;
|
11
|
-
static int html_secure = 1;
|
12
|
-
|
13
|
-
#define IS_HEX(c) ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'))
|
14
|
-
#define UNHEX(c) (c >= '0' && c <= '9' ? c - '0' : c >= 'A' && c <= 'F' ? c - 'A' + 10 : c - 'a' + 10)
|
15
|
-
|
16
|
-
#define ALPHANUM(c) ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9'))
|
17
|
-
#define URL_SAFE(c) (ALPHANUM(c) || c == '-' || c == '_' || c == '.')
|
18
|
-
|
19
|
-
/* from uri/common.rb */
|
20
|
-
#define UNRESERVED(c) (ALPHANUM(c) || c == '-' || c == '_' || c == '.' || c == '!' || c == '~' || c == '*' || c == '\'' || c == '(' || c == ')')
|
21
|
-
#define RESERVED(c) (c == ';' || c == '/' || c == '?' || c == ':' || c == '@' || c== '&' || c == '=' || c == '+' || c == '$' || c == ',' || c == '[' || c == ']')
|
22
|
-
#define URI_SAFE(c) (URL_SAFE(c) || UNRESERVED(c) || RESERVED(c))
|
23
|
-
|
24
|
-
static size_t escape_html(unsigned char *out, const unsigned char *in, size_t in_len, int secure) {
|
25
|
-
size_t total = 0;
|
26
|
-
unsigned char curChar;
|
27
|
-
|
28
|
-
total = in_len;
|
29
|
-
while (in_len) {
|
30
|
-
curChar = *in++;
|
31
|
-
|
32
|
-
if (curChar == '<') {
|
33
|
-
*out++ = '&'; *out++ = 'l'; *out++ = 't'; *out++ = ';';
|
34
|
-
total += 3;
|
35
|
-
} else if (curChar == '>') {
|
36
|
-
*out++ = '&'; *out++ = 'g'; *out++ = 't'; *out++ = ';';
|
37
|
-
total += 3;
|
38
|
-
} else if (curChar == '&') {
|
39
|
-
*out++ = '&'; *out++ = 'a'; *out++ = 'm'; *out++ = 'p'; *out++ = ';';
|
40
|
-
total += 4;
|
41
|
-
} else if (curChar == '\'') {
|
42
|
-
*out++ = '&'; *out++ = '#'; *out++ = '3'; *out++ = '9'; *out++ = ';';
|
43
|
-
total += 4;
|
44
|
-
} else if (curChar == '\"') {
|
45
|
-
*out++ = '&'; *out++ = 'q'; *out++ = 'u'; *out++ = 'o'; *out++ = 't'; *out++ = ';';
|
46
|
-
total += 5;
|
47
|
-
} else if (secure && curChar == '/') {
|
48
|
-
*out++ = '&'; *out++ = '#'; *out++ = '4'; *out++ = '7'; *out++ = ';';
|
49
|
-
total += 4;
|
50
|
-
} else {
|
51
|
-
*out++ = curChar;
|
52
|
-
}
|
53
|
-
in_len--;
|
54
|
-
}
|
55
|
-
|
56
|
-
return total;
|
57
|
-
}
|
14
|
+
#include "houdini.h"
|
58
15
|
|
59
|
-
|
60
|
-
size_t total = 0, len = in_len;
|
61
|
-
unsigned char curChar, *start;
|
62
|
-
|
63
|
-
start = (unsigned char *)&in[0];
|
64
|
-
total = in_len;
|
65
|
-
while (len) {
|
66
|
-
curChar = *in++;
|
67
|
-
if (curChar == '&') {
|
68
|
-
if (*in == 'l' && *(in+1) == 't' && *(in+2) == ';') {
|
69
|
-
*out++ = '<';
|
70
|
-
total-=3;
|
71
|
-
in+=3;
|
72
|
-
len-=3;
|
73
|
-
} else if (*in == 'g' && *(in+1) == 't' && *(in+2) == ';') {
|
74
|
-
*out++ = '>';
|
75
|
-
total-=3;
|
76
|
-
in+=3;
|
77
|
-
len-=3;
|
78
|
-
} else if (*in == 'a' && *(in+1) == 'm' && *(in+2) == 'p' && *(in+3) == ';') {
|
79
|
-
*out++ = '&';
|
80
|
-
total-=4;
|
81
|
-
in+=4;
|
82
|
-
len-=4;
|
83
|
-
} else if (*in == '#' && *(in+1) == '3' && *(in+2) == '9' && *(in+3) == ';') {
|
84
|
-
*out++ = '\'';
|
85
|
-
total-=4;
|
86
|
-
in+=4;
|
87
|
-
len-=4;
|
88
|
-
} else if (*in == '#' && *(in+1) == '4' && *(in+2) == '7' && *(in+3) == ';') {
|
89
|
-
*out++ = '/';
|
90
|
-
total-=4;
|
91
|
-
in+=4;
|
92
|
-
len-=4;
|
93
|
-
} else if (*in == 'q' && *(in+1) == 'u' && *(in+2) == 'o' && *(in+3) == 't' && *(in+4) == ';') {
|
94
|
-
*out++ = '\"';
|
95
|
-
total-=5;
|
96
|
-
in+=5;
|
97
|
-
len-=5;
|
98
|
-
} else {
|
99
|
-
/* incomplete tag, pass it through */
|
100
|
-
*out++ = curChar;
|
101
|
-
}
|
102
|
-
} else {
|
103
|
-
*out++ = curChar;
|
104
|
-
}
|
105
|
-
len--;
|
106
|
-
}
|
107
|
-
|
108
|
-
return total;
|
109
|
-
}
|
16
|
+
typedef void (*houdini_cb)(struct buf *, const uint8_t *, size_t);
|
110
17
|
|
111
|
-
static
|
112
|
-
size_t total = 0;
|
113
|
-
unsigned char curChar;
|
114
|
-
|
115
|
-
total = in_len;
|
116
|
-
while (in_len) {
|
117
|
-
curChar = *in++;
|
118
|
-
switch (curChar) {
|
119
|
-
case '\\':
|
120
|
-
*out++ = '\\'; *out++ = '\\';
|
121
|
-
total++;
|
122
|
-
break;
|
123
|
-
case '<':
|
124
|
-
*out++ = '<';
|
125
|
-
if (*in == '/') {
|
126
|
-
*out++ = '\\'; *out++ = '/';
|
127
|
-
in++; in_len--;
|
128
|
-
total++;
|
129
|
-
}
|
130
|
-
break;
|
131
|
-
case '\r':
|
132
|
-
if (*in == '\n') {
|
133
|
-
*out++ = '\\'; *out++ = 'n';
|
134
|
-
in++; in_len--;
|
135
|
-
} else {
|
136
|
-
*out++ = '\\'; *out++ = 'n';
|
137
|
-
total++;
|
138
|
-
}
|
139
|
-
break;
|
140
|
-
case '\n':
|
141
|
-
*out++ = '\\'; *out++ = 'n';
|
142
|
-
total++;
|
143
|
-
break;
|
144
|
-
case '\'':
|
145
|
-
*out++ = '\\'; *out++ = '\'';
|
146
|
-
total++;
|
147
|
-
break;
|
148
|
-
case '\"':
|
149
|
-
*out++ = '\\'; *out++ = '\"';
|
150
|
-
total++;
|
151
|
-
break;
|
152
|
-
default:
|
153
|
-
*out++ = curChar;
|
154
|
-
break;
|
155
|
-
}
|
156
|
-
in_len--;
|
157
|
-
}
|
158
|
-
|
159
|
-
return total;
|
160
|
-
}
|
161
|
-
|
162
|
-
static size_t unescape_javascript(unsigned char *out, const unsigned char *in, size_t in_len) {
|
163
|
-
size_t total = 0;
|
164
|
-
unsigned char curChar;
|
165
|
-
|
166
|
-
total = in_len;
|
167
|
-
while (in_len) {
|
168
|
-
curChar = *in++;
|
169
|
-
if (curChar == '\\') {
|
170
|
-
if (*in == 'n') {
|
171
|
-
*out++ = '\n';
|
172
|
-
total--;
|
173
|
-
} else if (*in == '\\') {
|
174
|
-
*out++ = '\\';
|
175
|
-
total--;
|
176
|
-
} else if (*in == '\'') {
|
177
|
-
*out++ = '\'';
|
178
|
-
total--;
|
179
|
-
} else if (*in == '\"') {
|
180
|
-
*out++ = '\"';
|
181
|
-
total--;
|
182
|
-
} else if (*in == '/') {
|
183
|
-
*out++ = '/';
|
184
|
-
total--;
|
185
|
-
} else {
|
186
|
-
/* incomplete escape, pass it through */
|
187
|
-
*out++ = curChar;
|
188
|
-
continue;
|
189
|
-
}
|
190
|
-
in++; in_len--;
|
191
|
-
} else {
|
192
|
-
*out++ = curChar;
|
193
|
-
}
|
194
|
-
in_len--;
|
195
|
-
}
|
196
|
-
|
197
|
-
return total;
|
198
|
-
}
|
199
|
-
|
200
|
-
static size_t escape_url(unsigned char *out, const unsigned char *in, size_t in_len) {
|
201
|
-
size_t total = 0;
|
202
|
-
unsigned char curChar, hex[2];
|
203
|
-
const unsigned char hexChars[16] = "0123456789ABCDEF";
|
204
|
-
|
205
|
-
total = in_len;
|
206
|
-
while (in_len) {
|
207
|
-
curChar = *in++;
|
208
|
-
if (curChar == ' ') {
|
209
|
-
*out++ = '+';
|
210
|
-
} else if (URL_SAFE(curChar)) {
|
211
|
-
*out++ = curChar;
|
212
|
-
} else {
|
213
|
-
hex[1] = hexChars[curChar & 0x0f];
|
214
|
-
hex[0] = hexChars[(curChar >> 4) & 0x0f];
|
215
|
-
*out++ = '%'; *out++ = hex[0]; *out++ = hex[1];
|
216
|
-
total += 2;
|
217
|
-
}
|
218
|
-
in_len--;
|
219
|
-
}
|
220
|
-
|
221
|
-
return total;
|
222
|
-
}
|
18
|
+
static VALUE rb_mEscapeUtils;
|
223
19
|
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
total = in_len;
|
230
|
-
while (len) {
|
231
|
-
curChar = *in++;
|
232
|
-
if (curChar == '%') {
|
233
|
-
if (IS_HEX(*in) && IS_HEX(*(in+1))) {
|
234
|
-
*out++ = (UNHEX(*in) << 4) + UNHEX(*(in+1));
|
235
|
-
in+=2;
|
236
|
-
len-=2;
|
237
|
-
total-=2;
|
238
|
-
} else {
|
239
|
-
/* incomplete escape, pass it through */
|
240
|
-
*out++ = curChar;
|
241
|
-
}
|
242
|
-
} else if (curChar == '+') {
|
243
|
-
*out++ = ' ';
|
244
|
-
} else {
|
245
|
-
*out++ = curChar;
|
246
|
-
}
|
247
|
-
len--;
|
248
|
-
}
|
249
|
-
|
250
|
-
return total;
|
251
|
-
}
|
20
|
+
/**
|
21
|
+
* html_secure instance variable
|
22
|
+
*/
|
23
|
+
static ID rb_html_secure;
|
24
|
+
static int g_html_secure = 1;
|
252
25
|
|
253
|
-
static
|
254
|
-
|
255
|
-
|
256
|
-
const unsigned char hexChars[16] = "0123456789ABCDEF";
|
257
|
-
|
258
|
-
total = in_len;
|
259
|
-
while (in_len) {
|
260
|
-
curChar = *in++;
|
261
|
-
if (URI_SAFE(curChar)) {
|
262
|
-
*out++ = curChar;
|
263
|
-
} else {
|
264
|
-
hex[1] = hexChars[curChar & 0x0f];
|
265
|
-
hex[0] = hexChars[(curChar >> 4) & 0x0f];
|
266
|
-
*out++ = '%'; *out++ = hex[0]; *out++ = hex[1];
|
267
|
-
total += 2;
|
268
|
-
}
|
269
|
-
in_len--;
|
270
|
-
}
|
271
|
-
|
272
|
-
return total;
|
26
|
+
static VALUE rb_eu_get_html_secure(VALUE self)
|
27
|
+
{
|
28
|
+
return rb_cvar_get(self, rb_html_secure);
|
273
29
|
}
|
274
30
|
|
275
|
-
static
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
total = in_len;
|
281
|
-
while (len) {
|
282
|
-
curChar = *in++;
|
283
|
-
if (curChar == '%') {
|
284
|
-
if (IS_HEX(*in) && IS_HEX(*(in+1))) {
|
285
|
-
*out++ = (UNHEX(*in) << 4) + UNHEX(*(in+1));
|
286
|
-
in+=2;
|
287
|
-
len-=2;
|
288
|
-
total-=2;
|
289
|
-
} else {
|
290
|
-
/* incomplete escape, pass it through */
|
291
|
-
*out++ = curChar;
|
292
|
-
}
|
293
|
-
} else {
|
294
|
-
*out++ = curChar;
|
295
|
-
}
|
296
|
-
len--;
|
297
|
-
}
|
298
|
-
|
299
|
-
return total;
|
31
|
+
static VALUE rb_eu_set_html_secure(VALUE self, VALUE val)
|
32
|
+
{
|
33
|
+
g_html_secure = RTEST(val);
|
34
|
+
rb_cvar_set(self, rb_html_secure, val);
|
35
|
+
return val;
|
300
36
|
}
|
301
37
|
|
302
|
-
static VALUE rb_escape_html(int argc, VALUE * argv, VALUE self) {
|
303
|
-
VALUE str, rb_secure;
|
304
|
-
int secure = html_secure;
|
305
|
-
VALUE rb_output_buf;
|
306
|
-
unsigned char *inBuf, *outBuf;
|
307
|
-
size_t len, new_len;
|
308
|
-
|
309
|
-
if (rb_scan_args(argc, argv, "11", &str, &rb_secure) == 2) {
|
310
|
-
if (rb_secure == Qfalse) {
|
311
|
-
secure = 0;
|
312
|
-
}
|
313
|
-
}
|
314
|
-
|
315
|
-
Check_Type(str, T_STRING);
|
316
38
|
|
317
|
-
|
318
|
-
|
39
|
+
/**
|
40
|
+
* Generic template
|
41
|
+
*/
|
42
|
+
static VALUE
|
43
|
+
rb_eu__generic(
|
44
|
+
VALUE self, VALUE str,
|
45
|
+
houdini_cb callback,
|
46
|
+
size_t chunk_size)
|
47
|
+
{
|
48
|
+
VALUE result;
|
49
|
+
struct buf *out_buf;
|
319
50
|
|
320
|
-
|
321
|
-
|
322
|
-
new_len = sizeof(unsigned char)*(len*5);
|
51
|
+
if (NIL_P(str))
|
52
|
+
return rb_str_new2("");
|
323
53
|
|
324
|
-
|
325
|
-
|
326
|
-
outBuf = (unsigned char *)RSTRING_PTR(rb_output_buf);
|
54
|
+
Check_Type(str, T_STRING);
|
55
|
+
out_buf = bufnew(chunk_size);
|
327
56
|
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
// shrink our new ruby string
|
332
|
-
rb_str_resize(rb_output_buf, new_len);
|
57
|
+
callback(out_buf, (uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str));
|
58
|
+
result = rb_str_new((char *)out_buf->data, out_buf->size);
|
59
|
+
bufrelease(out_buf);
|
333
60
|
|
334
61
|
#ifdef HAVE_RUBY_ENCODING_H
|
335
|
-
|
62
|
+
rb_enc_copy(result, str);
|
336
63
|
#endif
|
337
|
-
return rb_output_buf;
|
338
|
-
}
|
339
|
-
|
340
|
-
static VALUE rb_unescape_html(VALUE self, VALUE str) {
|
341
|
-
VALUE rb_output_buf;
|
342
|
-
unsigned char *inBuf, *outBuf;
|
343
|
-
size_t len, new_len;
|
344
|
-
|
345
|
-
Check_Type(str, T_STRING);
|
346
|
-
inBuf = (unsigned char*)RSTRING_PTR(str);
|
347
|
-
len = RSTRING_LEN(str);
|
348
64
|
|
349
|
-
|
350
|
-
// TODO: we could be more intelligent about this, but probably not
|
351
|
-
new_len = sizeof(unsigned char) * len;
|
352
|
-
|
353
|
-
// create our new ruby string
|
354
|
-
rb_output_buf = rb_str_new(NULL, new_len);
|
355
|
-
outBuf = (unsigned char *)RSTRING_PTR(rb_output_buf);
|
356
|
-
|
357
|
-
// perform our escape, returning the new string's length
|
358
|
-
new_len = unescape_html(outBuf, inBuf, len);
|
359
|
-
|
360
|
-
// shrink our new ruby string
|
361
|
-
rb_str_resize(rb_output_buf, new_len);
|
362
|
-
|
363
|
-
#ifdef HAVE_RUBY_ENCODING_H
|
364
|
-
rb_enc_copy(rb_output_buf, str);
|
365
|
-
#endif
|
366
|
-
return rb_output_buf;
|
65
|
+
return result;
|
367
66
|
}
|
368
67
|
|
369
|
-
static VALUE rb_escape_javascript(VALUE self, VALUE str) {
|
370
|
-
VALUE rb_output_buf;
|
371
|
-
unsigned char *inBuf, *outBuf;
|
372
|
-
size_t len, new_len;
|
373
|
-
|
374
|
-
if (str == Qnil) {
|
375
|
-
return rb_str_new2("");
|
376
|
-
}
|
377
68
|
|
378
|
-
|
69
|
+
/**
|
70
|
+
* HTML methods
|
71
|
+
*/
|
72
|
+
static VALUE rb_eu_escape_html(int argc, VALUE *argv, VALUE self)
|
73
|
+
{
|
74
|
+
VALUE rb_out_buf, str, rb_secure;
|
75
|
+
struct buf *out_buf;
|
76
|
+
int secure = g_html_secure;
|
379
77
|
|
380
|
-
|
381
|
-
|
78
|
+
if (rb_scan_args(argc, argv, "11", &str, &rb_secure) == 2) {
|
79
|
+
if (rb_secure == Qfalse) {
|
80
|
+
secure = 0;
|
81
|
+
}
|
82
|
+
}
|
382
83
|
|
383
|
-
|
384
|
-
|
385
|
-
new_len = sizeof(unsigned char)*(len*2);
|
84
|
+
Check_Type(str, T_STRING);
|
85
|
+
out_buf = bufnew(128);
|
386
86
|
|
387
|
-
|
388
|
-
rb_output_buf = rb_str_new(NULL, new_len);
|
389
|
-
outBuf = (unsigned char *)RSTRING_PTR(rb_output_buf);
|
87
|
+
houdini_escape_html(out_buf, (uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str), secure);
|
390
88
|
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
// shrink our new ruby string
|
395
|
-
rb_str_resize(rb_output_buf, new_len);
|
89
|
+
rb_out_buf = rb_str_new((char *)out_buf->data, out_buf->size);
|
90
|
+
bufrelease(out_buf);
|
396
91
|
|
397
92
|
#ifdef HAVE_RUBY_ENCODING_H
|
398
|
-
|
93
|
+
rb_enc_copy(rb_out_buf, str);
|
399
94
|
#endif
|
400
|
-
return rb_output_buf;
|
401
|
-
}
|
402
|
-
|
403
|
-
static VALUE rb_unescape_javascript(VALUE self, VALUE str) {
|
404
|
-
VALUE rb_output_buf;
|
405
|
-
unsigned char *inBuf, *outBuf;
|
406
|
-
size_t len, new_len;
|
407
|
-
|
408
|
-
if (str == Qnil) {
|
409
|
-
return rb_str_new2("");
|
410
|
-
}
|
411
|
-
|
412
|
-
Check_Type(str, T_STRING);
|
413
|
-
|
414
|
-
inBuf = (unsigned char*)RSTRING_PTR(str);
|
415
|
-
len = RSTRING_LEN(str);
|
416
|
-
|
417
|
-
// this is the max size the string could be
|
418
|
-
// TODO: we could be more intelligent about this, but probably not
|
419
|
-
new_len = sizeof(unsigned char) * len;
|
420
|
-
|
421
|
-
// create our new ruby string
|
422
|
-
rb_output_buf = rb_str_new(NULL, new_len);
|
423
|
-
outBuf = (unsigned char *)RSTRING_PTR(rb_output_buf);
|
424
|
-
|
425
|
-
// perform our escape, returning the new string's length
|
426
|
-
new_len = unescape_javascript(outBuf, inBuf, len);
|
427
|
-
|
428
|
-
// shrink our new ruby string
|
429
|
-
rb_str_resize(rb_output_buf, new_len);
|
430
95
|
|
431
|
-
|
432
|
-
rb_enc_copy(rb_output_buf, str);
|
433
|
-
#endif
|
434
|
-
return rb_output_buf;
|
96
|
+
return rb_out_buf;
|
435
97
|
}
|
436
98
|
|
437
|
-
static VALUE
|
438
|
-
|
439
|
-
|
440
|
-
size_t len, new_len;
|
441
|
-
|
442
|
-
Check_Type(str, T_STRING);
|
443
|
-
|
444
|
-
inBuf = (unsigned char*)RSTRING_PTR(str);
|
445
|
-
len = RSTRING_LEN(str);
|
446
|
-
|
447
|
-
// this is the max size the string could be
|
448
|
-
// TODO: we should try to be more intelligent about this
|
449
|
-
new_len = sizeof(unsigned char)*(len*3);
|
450
|
-
|
451
|
-
// create our new ruby string
|
452
|
-
rb_output_buf = rb_str_new(NULL, new_len);
|
453
|
-
outBuf = (unsigned char *)RSTRING_PTR(rb_output_buf);
|
454
|
-
|
455
|
-
// perform our escape, returning the new string's length
|
456
|
-
new_len = escape_url(outBuf, inBuf, len);
|
457
|
-
|
458
|
-
// shrink our new ruby string
|
459
|
-
rb_str_resize(rb_output_buf, new_len);
|
460
|
-
|
461
|
-
#ifdef HAVE_RUBY_ENCODING_H
|
462
|
-
rb_enc_copy(rb_output_buf, str);
|
463
|
-
#endif
|
464
|
-
return rb_output_buf;
|
99
|
+
static VALUE rb_eu_unescape_html(VALUE self, VALUE str)
|
100
|
+
{
|
101
|
+
return rb_eu__generic(self, str, &houdini_unescape_html, 128);
|
465
102
|
}
|
466
103
|
|
467
|
-
static VALUE rb_unescape_url(VALUE self, VALUE str) {
|
468
|
-
Check_Type(str, T_STRING);
|
469
|
-
|
470
|
-
VALUE rb_output_buf;
|
471
|
-
unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
|
472
|
-
size_t len = RSTRING_LEN(str);
|
473
|
-
|
474
|
-
// this is the max size the string could be
|
475
|
-
// TODO: we could be more intelligent about this, but probably not
|
476
|
-
size_t new_len = sizeof(unsigned char) * len;
|
477
|
-
unsigned char *outBuf;
|
478
104
|
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
// shrink our new ruby string
|
487
|
-
rb_str_resize(rb_output_buf, new_len);
|
488
|
-
|
489
|
-
#ifdef HAVE_RUBY_ENCODING_H
|
490
|
-
rb_enc_copy(rb_output_buf, str);
|
491
|
-
#endif
|
492
|
-
return rb_output_buf;
|
105
|
+
/**
|
106
|
+
* JavaScript methods
|
107
|
+
*/
|
108
|
+
static VALUE rb_eu_escape_js(VALUE self, VALUE str)
|
109
|
+
{
|
110
|
+
return rb_eu__generic(self, str, &houdini_escape_js, 128);
|
493
111
|
}
|
494
112
|
|
495
|
-
static VALUE
|
496
|
-
|
497
|
-
|
498
|
-
VALUE rb_output_buf;
|
499
|
-
unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
|
500
|
-
size_t len = RSTRING_LEN(str);
|
501
|
-
unsigned char *outBuf;
|
502
|
-
|
503
|
-
// this is the max size the string could be
|
504
|
-
// TODO: we should try to be more intelligent about this
|
505
|
-
size_t new_len = sizeof(unsigned char)*(len*3);
|
506
|
-
|
507
|
-
// create our new ruby string
|
508
|
-
rb_output_buf = rb_str_new(NULL, new_len);
|
509
|
-
outBuf = (unsigned char *)RSTRING_PTR(rb_output_buf);
|
510
|
-
|
511
|
-
// perform our escape, returning the new string's length
|
512
|
-
new_len = escape_uri(outBuf, inBuf, len);
|
513
|
-
|
514
|
-
// shrink our new ruby string
|
515
|
-
rb_str_resize(rb_output_buf, new_len);
|
516
|
-
|
517
|
-
#ifdef HAVE_RUBY_ENCODING_H
|
518
|
-
rb_enc_copy(rb_output_buf, str);
|
519
|
-
#endif
|
520
|
-
return rb_output_buf;
|
113
|
+
static VALUE rb_eu_unescape_js(VALUE self, VALUE str)
|
114
|
+
{
|
115
|
+
return rb_eu__generic(self, str, &houdini_unescape_js, 128);
|
521
116
|
}
|
522
117
|
|
523
|
-
static VALUE rb_unescape_uri(VALUE self, VALUE str) {
|
524
|
-
Check_Type(str, T_STRING);
|
525
118
|
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
size_t new_len = sizeof(unsigned char)*len;
|
534
|
-
|
535
|
-
// create our new ruby string
|
536
|
-
rb_output_buf = rb_str_new(NULL, new_len);
|
537
|
-
outBuf = (unsigned char *)RSTRING_PTR(rb_output_buf);
|
119
|
+
/**
|
120
|
+
* URL methods
|
121
|
+
*/
|
122
|
+
static VALUE rb_eu_escape_url(VALUE self, VALUE str)
|
123
|
+
{
|
124
|
+
return rb_eu__generic(self, str, &houdini_escape_url, 32);
|
125
|
+
}
|
538
126
|
|
539
|
-
|
540
|
-
|
127
|
+
static VALUE rb_eu_unescape_url(VALUE self, VALUE str)
|
128
|
+
{
|
129
|
+
return rb_eu__generic(self, str, &houdini_unescape_url, 32);
|
130
|
+
}
|
541
131
|
|
542
|
-
// shrink our new ruby string
|
543
|
-
rb_str_resize(rb_output_buf, new_len);
|
544
132
|
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
133
|
+
/**
|
134
|
+
* URI methods
|
135
|
+
*/
|
136
|
+
static VALUE rb_eu_escape_uri(VALUE self, VALUE str)
|
137
|
+
{
|
138
|
+
return rb_eu__generic(self, str, &houdini_escape_uri, 32);
|
549
139
|
}
|
550
140
|
|
551
|
-
static VALUE
|
552
|
-
|
141
|
+
static VALUE rb_eu_unescape_uri(VALUE self, VALUE str)
|
142
|
+
{
|
143
|
+
return rb_eu__generic(self, str, &houdini_unescape_uri, 32);
|
553
144
|
}
|
554
145
|
|
555
|
-
static VALUE rb_s_set_html_secure(VALUE self, VALUE val) {
|
556
|
-
html_secure = RTEST(val);
|
557
|
-
rb_cvar_set(self, rb_html_secure, val);
|
558
146
|
|
559
|
-
|
560
|
-
|
147
|
+
/**
|
148
|
+
* Ruby Extension initializer
|
149
|
+
*/
|
150
|
+
void Init_escape_utils()
|
151
|
+
{
|
152
|
+
rb_mEscapeUtils = rb_define_module("EscapeUtils");
|
153
|
+
rb_define_method(rb_mEscapeUtils, "escape_html", rb_eu_escape_html, -1);
|
154
|
+
rb_define_method(rb_mEscapeUtils, "unescape_html", rb_eu_unescape_html, 1);
|
155
|
+
rb_define_method(rb_mEscapeUtils, "escape_javascript", rb_eu_escape_js, 1);
|
156
|
+
rb_define_method(rb_mEscapeUtils, "unescape_javascript", rb_eu_unescape_js, 1);
|
157
|
+
rb_define_method(rb_mEscapeUtils, "escape_url", rb_eu_escape_url, 1);
|
158
|
+
rb_define_method(rb_mEscapeUtils, "unescape_url", rb_eu_unescape_url, 1);
|
159
|
+
rb_define_method(rb_mEscapeUtils, "escape_uri", rb_eu_escape_uri, 1);
|
160
|
+
rb_define_method(rb_mEscapeUtils, "unescape_uri", rb_eu_unescape_uri, 1);
|
161
|
+
|
162
|
+
rb_define_singleton_method(rb_mEscapeUtils, "html_secure", rb_eu_get_html_secure, 0);
|
163
|
+
rb_define_singleton_method(rb_mEscapeUtils, "html_secure=", rb_eu_set_html_secure, 1);
|
561
164
|
|
562
|
-
|
563
|
-
void Init_escape_utils() {
|
564
|
-
mEscapeUtils = rb_define_module("EscapeUtils");
|
565
|
-
rb_define_method(mEscapeUtils, "escape_html", rb_escape_html, -1);
|
566
|
-
rb_define_module_function(mEscapeUtils, "escape_html", rb_escape_html, -1);
|
567
|
-
rb_define_method(mEscapeUtils, "unescape_html", rb_unescape_html, 1);
|
568
|
-
rb_define_module_function(mEscapeUtils, "unescape_html", rb_unescape_html, 1);
|
569
|
-
rb_define_method(mEscapeUtils, "escape_javascript", rb_escape_javascript, 1);
|
570
|
-
rb_define_module_function(mEscapeUtils, "escape_javascript", rb_escape_javascript, 1);
|
571
|
-
rb_define_method(mEscapeUtils, "unescape_javascript", rb_unescape_javascript, 1);
|
572
|
-
rb_define_module_function(mEscapeUtils, "unescape_javascript", rb_unescape_javascript, 1);
|
573
|
-
rb_define_method(mEscapeUtils, "escape_url", rb_escape_url, 1);
|
574
|
-
rb_define_module_function(mEscapeUtils, "escape_url", rb_escape_url, 1);
|
575
|
-
rb_define_method(mEscapeUtils, "unescape_url", rb_unescape_url, 1);
|
576
|
-
rb_define_module_function(mEscapeUtils, "unescape_url", rb_unescape_url, 1);
|
577
|
-
rb_define_method(mEscapeUtils, "escape_uri", rb_escape_uri, 1);
|
578
|
-
rb_define_module_function(mEscapeUtils, "escape_uri", rb_escape_uri, 1);
|
579
|
-
rb_define_method(mEscapeUtils, "unescape_uri", rb_unescape_uri, 1);
|
580
|
-
rb_define_module_function(mEscapeUtils, "unescape_uri", rb_unescape_uri, 1);
|
581
|
-
|
582
|
-
rb_define_singleton_method(mEscapeUtils, "html_secure", rb_s_get_html_secure, 0);
|
583
|
-
rb_define_singleton_method(mEscapeUtils, "html_secure=", rb_s_set_html_secure, 1);
|
584
|
-
|
585
|
-
rb_html_secure = rb_intern("@@html_secure");
|
165
|
+
rb_html_secure = rb_intern("@@html_secure");
|
586
166
|
}
|
587
167
|
|