escape_utils 0.2.3 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +2 -1
- data/CHANGELOG.md +4 -0
- data/README.md +206 -0
- data/benchmark/html_escape.rb +1 -0
- data/benchmark/html_unescape.rb +1 -0
- data/benchmark/javascript_escape.rb +1 -0
- data/benchmark/javascript_unescape.rb +1 -0
- data/benchmark/url_escape.rb +1 -0
- data/benchmark/url_unescape.rb +1 -0
- data/escape_utils.gemspec +0 -3
- data/ext/escape_utils/buffer.c +228 -0
- data/ext/escape_utils/buffer.h +91 -0
- data/ext/escape_utils/escape_utils.c +111 -531
- data/ext/escape_utils/houdini.h +15 -0
- data/ext/escape_utils/houdini_html.c +214 -0
- data/ext/escape_utils/houdini_js.c +148 -0
- data/ext/escape_utils/houdini_uri.c +130 -0
- data/ext/escape_utils/html_unescape.h +754 -0
- data/ext/escape_utils/uri_escape.h +35 -0
- data/lib/escape_utils.rb +2 -2
- data/lib/escape_utils/html/cgi.rb +0 -2
- data/lib/escape_utils/html/erb.rb +0 -2
- data/lib/escape_utils/html/haml.rb +0 -2
- data/lib/escape_utils/html/rack.rb +0 -2
- data/lib/escape_utils/html_safety.rb +0 -2
- data/lib/escape_utils/javascript/action_view.rb +0 -2
- data/lib/escape_utils/url/cgi.rb +0 -2
- data/lib/escape_utils/url/erb.rb +0 -2
- data/lib/escape_utils/url/rack.rb +0 -2
- data/lib/escape_utils/url/uri.rb +0 -2
- data/lib/escape_utils/version.rb +1 -1
- data/spec/html/escape_spec.rb +0 -1
- data/spec/html/unescape_spec.rb +0 -1
- data/spec/html_safety_spec.rb +0 -1
- data/spec/javascript/escape_spec.rb +0 -1
- data/spec/javascript/unescape_spec.rb +0 -1
- data/spec/query/escape_spec.rb +0 -1
- data/spec/query/unescape_spec.rb +1 -0
- data/spec/spec_helper.rb +0 -1
- data/spec/uri/escape_spec.rb +0 -1
- data/spec/uri/unescape_spec.rb +1 -0
- data/spec/url/escape_spec.rb +0 -1
- data/spec/url/unescape_spec.rb +1 -0
- metadata +16 -8
- data/README.rdoc +0 -146
@@ -0,0 +1,91 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2008, Natacha Porté
|
3
|
+
* Copyright (c) 2011, Vicent Martí
|
4
|
+
*
|
5
|
+
* Permission to use, copy, modify, and distribute this software for any
|
6
|
+
* purpose with or without fee is hereby granted, provided that the above
|
7
|
+
* copyright notice and this permission notice appear in all copies.
|
8
|
+
*
|
9
|
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
10
|
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
11
|
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
12
|
+
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
13
|
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
14
|
+
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
15
|
+
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
16
|
+
*/
|
17
|
+
|
18
|
+
#ifndef __GEN_BUFFER_H__
|
19
|
+
#define __GEN_BUFFER_H__
|
20
|
+
|
21
|
+
#include <stddef.h>
|
22
|
+
#include <stdarg.h>
|
23
|
+
#include <stdint.h>
|
24
|
+
|
25
|
+
#if defined(_MSC_VER)
|
26
|
+
#define __attribute__(x)
|
27
|
+
#define inline
|
28
|
+
#endif
|
29
|
+
|
30
|
+
typedef enum {
|
31
|
+
BUF_OK = 0,
|
32
|
+
BUF_ENOMEM = -1,
|
33
|
+
} buferror_t;
|
34
|
+
|
35
|
+
/* struct buf: character array buffer */
|
36
|
+
struct buf {
|
37
|
+
uint8_t *data; /* actual character data */
|
38
|
+
size_t size; /* size of the string */
|
39
|
+
size_t asize; /* allocated size (0 = volatile buffer) */
|
40
|
+
size_t unit; /* reallocation unit size (0 = read-only buffer) */
|
41
|
+
};
|
42
|
+
|
43
|
+
/* CONST_BUF: global buffer from a string litteral */
|
44
|
+
#define BUF_STATIC(string) \
|
45
|
+
{ (uint8_t *)string, sizeof string -1, sizeof string, 0, 0 }
|
46
|
+
|
47
|
+
/* VOLATILE_BUF: macro for creating a volatile buffer on the stack */
|
48
|
+
#define BUF_VOLATILE(strname) \
|
49
|
+
{ (uint8_t *)strname, strlen(strname), 0, 0, 0 }
|
50
|
+
|
51
|
+
/* BUFPUTSL: optimized bufputs of a string litteral */
|
52
|
+
#define BUFPUTSL(output, literal) \
|
53
|
+
bufput(output, literal, sizeof literal - 1)
|
54
|
+
|
55
|
+
/* bufgrow: increasing the allocated size to the given value */
|
56
|
+
int bufgrow(struct buf *, size_t);
|
57
|
+
|
58
|
+
/* bufnew: allocation of a new buffer */
|
59
|
+
struct buf *bufnew(size_t) __attribute__ ((malloc));
|
60
|
+
|
61
|
+
/* bufnullterm: NUL-termination of the string array (making a C-string) */
|
62
|
+
const char *bufcstr(struct buf *);
|
63
|
+
|
64
|
+
/* bufprefix: compare the beginning of a buffer with a string */
|
65
|
+
int bufprefix(const struct buf *buf, const char *prefix);
|
66
|
+
|
67
|
+
/* bufput: appends raw data to a buffer */
|
68
|
+
void bufput(struct buf *, const void *, size_t);
|
69
|
+
|
70
|
+
/* bufputs: appends a NUL-terminated string to a buffer */
|
71
|
+
void bufputs(struct buf *, const char *);
|
72
|
+
|
73
|
+
/* bufputc: appends a single char to a buffer */
|
74
|
+
void bufputc(struct buf *, int);
|
75
|
+
|
76
|
+
/* bufrelease: decrease the reference count and free the buffer if needed */
|
77
|
+
void bufrelease(struct buf *);
|
78
|
+
|
79
|
+
/* bufreset: frees internal data of the buffer */
|
80
|
+
void bufreset(struct buf *);
|
81
|
+
|
82
|
+
/* bufslurp: removes a given number of bytes from the head of the array */
|
83
|
+
void bufslurp(struct buf *, size_t);
|
84
|
+
|
85
|
+
/* bufprintf: formatted printing to a buffer */
|
86
|
+
void bufprintf(struct buf *, const char *, ...) __attribute__ ((format (printf, 2, 3)));
|
87
|
+
|
88
|
+
/* vbufprintf: stdarg variant of formatted printing into a buffer */
|
89
|
+
void vbufprintf(struct buf *, const char * , va_list);
|
90
|
+
|
91
|
+
#endif
|
@@ -1,3 +1,8 @@
|
|
1
|
+
// tell rbx not to use it's caching compat layer
|
2
|
+
// by doing this we're making a promise to RBX that
|
3
|
+
// we'll never modify the pointers we get back from RSTRING_PTR
|
4
|
+
#define RSTRING_NOT_MODIFIED
|
5
|
+
|
1
6
|
#include <ruby.h>
|
2
7
|
#if RB_CVAR_SET_ARITY == 4
|
3
8
|
# define rb_cvar_set(a,b,c) rb_cvar_set(a,b,c,0)
|
@@ -6,582 +11,157 @@
|
|
6
11
|
#include <ruby/encoding.h>
|
7
12
|
#endif
|
8
13
|
|
9
|
-
|
10
|
-
static ID rb_html_secure;
|
11
|
-
static int html_secure = 1;
|
12
|
-
|
13
|
-
#define IS_HEX(c) ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'))
|
14
|
-
#define UNHEX(c) (c >= '0' && c <= '9' ? c - '0' : c >= 'A' && c <= 'F' ? c - 'A' + 10 : c - 'a' + 10)
|
15
|
-
|
16
|
-
#define ALPHANUM(c) ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9'))
|
17
|
-
#define URL_SAFE(c) (ALPHANUM(c) || c == '-' || c == '_' || c == '.')
|
18
|
-
|
19
|
-
/* from uri/common.rb */
|
20
|
-
#define UNRESERVED(c) (ALPHANUM(c) || c == '-' || c == '_' || c == '.' || c == '!' || c == '~' || c == '*' || c == '\'' || c == '(' || c == ')')
|
21
|
-
#define RESERVED(c) (c == ';' || c == '/' || c == '?' || c == ':' || c == '@' || c== '&' || c == '=' || c == '+' || c == '$' || c == ',' || c == '[' || c == ']')
|
22
|
-
#define URI_SAFE(c) (URL_SAFE(c) || UNRESERVED(c) || RESERVED(c))
|
23
|
-
|
24
|
-
static size_t escape_html(unsigned char *out, const unsigned char *in, size_t in_len, int secure) {
|
25
|
-
size_t total = 0;
|
26
|
-
unsigned char curChar;
|
27
|
-
|
28
|
-
total = in_len;
|
29
|
-
while (in_len) {
|
30
|
-
curChar = *in++;
|
31
|
-
|
32
|
-
if (curChar == '<') {
|
33
|
-
*out++ = '&'; *out++ = 'l'; *out++ = 't'; *out++ = ';';
|
34
|
-
total += 3;
|
35
|
-
} else if (curChar == '>') {
|
36
|
-
*out++ = '&'; *out++ = 'g'; *out++ = 't'; *out++ = ';';
|
37
|
-
total += 3;
|
38
|
-
} else if (curChar == '&') {
|
39
|
-
*out++ = '&'; *out++ = 'a'; *out++ = 'm'; *out++ = 'p'; *out++ = ';';
|
40
|
-
total += 4;
|
41
|
-
} else if (curChar == '\'') {
|
42
|
-
*out++ = '&'; *out++ = '#'; *out++ = '3'; *out++ = '9'; *out++ = ';';
|
43
|
-
total += 4;
|
44
|
-
} else if (curChar == '\"') {
|
45
|
-
*out++ = '&'; *out++ = 'q'; *out++ = 'u'; *out++ = 'o'; *out++ = 't'; *out++ = ';';
|
46
|
-
total += 5;
|
47
|
-
} else if (secure && curChar == '/') {
|
48
|
-
*out++ = '&'; *out++ = '#'; *out++ = '4'; *out++ = '7'; *out++ = ';';
|
49
|
-
total += 4;
|
50
|
-
} else {
|
51
|
-
*out++ = curChar;
|
52
|
-
}
|
53
|
-
in_len--;
|
54
|
-
}
|
55
|
-
|
56
|
-
return total;
|
57
|
-
}
|
14
|
+
#include "houdini.h"
|
58
15
|
|
59
|
-
|
60
|
-
size_t total = 0, len = in_len;
|
61
|
-
unsigned char curChar, *start;
|
62
|
-
|
63
|
-
start = (unsigned char *)&in[0];
|
64
|
-
total = in_len;
|
65
|
-
while (len) {
|
66
|
-
curChar = *in++;
|
67
|
-
if (curChar == '&') {
|
68
|
-
if (*in == 'l' && *(in+1) == 't' && *(in+2) == ';') {
|
69
|
-
*out++ = '<';
|
70
|
-
total-=3;
|
71
|
-
in+=3;
|
72
|
-
len-=3;
|
73
|
-
} else if (*in == 'g' && *(in+1) == 't' && *(in+2) == ';') {
|
74
|
-
*out++ = '>';
|
75
|
-
total-=3;
|
76
|
-
in+=3;
|
77
|
-
len-=3;
|
78
|
-
} else if (*in == 'a' && *(in+1) == 'm' && *(in+2) == 'p' && *(in+3) == ';') {
|
79
|
-
*out++ = '&';
|
80
|
-
total-=4;
|
81
|
-
in+=4;
|
82
|
-
len-=4;
|
83
|
-
} else if (*in == '#' && *(in+1) == '3' && *(in+2) == '9' && *(in+3) == ';') {
|
84
|
-
*out++ = '\'';
|
85
|
-
total-=4;
|
86
|
-
in+=4;
|
87
|
-
len-=4;
|
88
|
-
} else if (*in == '#' && *(in+1) == '4' && *(in+2) == '7' && *(in+3) == ';') {
|
89
|
-
*out++ = '/';
|
90
|
-
total-=4;
|
91
|
-
in+=4;
|
92
|
-
len-=4;
|
93
|
-
} else if (*in == 'q' && *(in+1) == 'u' && *(in+2) == 'o' && *(in+3) == 't' && *(in+4) == ';') {
|
94
|
-
*out++ = '\"';
|
95
|
-
total-=5;
|
96
|
-
in+=5;
|
97
|
-
len-=5;
|
98
|
-
} else {
|
99
|
-
/* incomplete tag, pass it through */
|
100
|
-
*out++ = curChar;
|
101
|
-
}
|
102
|
-
} else {
|
103
|
-
*out++ = curChar;
|
104
|
-
}
|
105
|
-
len--;
|
106
|
-
}
|
107
|
-
|
108
|
-
return total;
|
109
|
-
}
|
16
|
+
typedef void (*houdini_cb)(struct buf *, const uint8_t *, size_t);
|
110
17
|
|
111
|
-
static
|
112
|
-
size_t total = 0;
|
113
|
-
unsigned char curChar;
|
114
|
-
|
115
|
-
total = in_len;
|
116
|
-
while (in_len) {
|
117
|
-
curChar = *in++;
|
118
|
-
switch (curChar) {
|
119
|
-
case '\\':
|
120
|
-
*out++ = '\\'; *out++ = '\\';
|
121
|
-
total++;
|
122
|
-
break;
|
123
|
-
case '<':
|
124
|
-
*out++ = '<';
|
125
|
-
if (*in == '/') {
|
126
|
-
*out++ = '\\'; *out++ = '/';
|
127
|
-
in++; in_len--;
|
128
|
-
total++;
|
129
|
-
}
|
130
|
-
break;
|
131
|
-
case '\r':
|
132
|
-
if (*in == '\n') {
|
133
|
-
*out++ = '\\'; *out++ = 'n';
|
134
|
-
in++; in_len--;
|
135
|
-
} else {
|
136
|
-
*out++ = '\\'; *out++ = 'n';
|
137
|
-
total++;
|
138
|
-
}
|
139
|
-
break;
|
140
|
-
case '\n':
|
141
|
-
*out++ = '\\'; *out++ = 'n';
|
142
|
-
total++;
|
143
|
-
break;
|
144
|
-
case '\'':
|
145
|
-
*out++ = '\\'; *out++ = '\'';
|
146
|
-
total++;
|
147
|
-
break;
|
148
|
-
case '\"':
|
149
|
-
*out++ = '\\'; *out++ = '\"';
|
150
|
-
total++;
|
151
|
-
break;
|
152
|
-
default:
|
153
|
-
*out++ = curChar;
|
154
|
-
break;
|
155
|
-
}
|
156
|
-
in_len--;
|
157
|
-
}
|
158
|
-
|
159
|
-
return total;
|
160
|
-
}
|
161
|
-
|
162
|
-
static size_t unescape_javascript(unsigned char *out, const unsigned char *in, size_t in_len) {
|
163
|
-
size_t total = 0;
|
164
|
-
unsigned char curChar;
|
165
|
-
|
166
|
-
total = in_len;
|
167
|
-
while (in_len) {
|
168
|
-
curChar = *in++;
|
169
|
-
if (curChar == '\\') {
|
170
|
-
if (*in == 'n') {
|
171
|
-
*out++ = '\n';
|
172
|
-
total--;
|
173
|
-
} else if (*in == '\\') {
|
174
|
-
*out++ = '\\';
|
175
|
-
total--;
|
176
|
-
} else if (*in == '\'') {
|
177
|
-
*out++ = '\'';
|
178
|
-
total--;
|
179
|
-
} else if (*in == '\"') {
|
180
|
-
*out++ = '\"';
|
181
|
-
total--;
|
182
|
-
} else if (*in == '/') {
|
183
|
-
*out++ = '/';
|
184
|
-
total--;
|
185
|
-
} else {
|
186
|
-
/* incomplete escape, pass it through */
|
187
|
-
*out++ = curChar;
|
188
|
-
continue;
|
189
|
-
}
|
190
|
-
in++; in_len--;
|
191
|
-
} else {
|
192
|
-
*out++ = curChar;
|
193
|
-
}
|
194
|
-
in_len--;
|
195
|
-
}
|
196
|
-
|
197
|
-
return total;
|
198
|
-
}
|
199
|
-
|
200
|
-
static size_t escape_url(unsigned char *out, const unsigned char *in, size_t in_len) {
|
201
|
-
size_t total = 0;
|
202
|
-
unsigned char curChar, hex[2];
|
203
|
-
const unsigned char hexChars[16] = "0123456789ABCDEF";
|
204
|
-
|
205
|
-
total = in_len;
|
206
|
-
while (in_len) {
|
207
|
-
curChar = *in++;
|
208
|
-
if (curChar == ' ') {
|
209
|
-
*out++ = '+';
|
210
|
-
} else if (URL_SAFE(curChar)) {
|
211
|
-
*out++ = curChar;
|
212
|
-
} else {
|
213
|
-
hex[1] = hexChars[curChar & 0x0f];
|
214
|
-
hex[0] = hexChars[(curChar >> 4) & 0x0f];
|
215
|
-
*out++ = '%'; *out++ = hex[0]; *out++ = hex[1];
|
216
|
-
total += 2;
|
217
|
-
}
|
218
|
-
in_len--;
|
219
|
-
}
|
220
|
-
|
221
|
-
return total;
|
222
|
-
}
|
18
|
+
static VALUE rb_mEscapeUtils;
|
223
19
|
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
total = in_len;
|
230
|
-
while (len) {
|
231
|
-
curChar = *in++;
|
232
|
-
if (curChar == '%') {
|
233
|
-
if (IS_HEX(*in) && IS_HEX(*(in+1))) {
|
234
|
-
*out++ = (UNHEX(*in) << 4) + UNHEX(*(in+1));
|
235
|
-
in+=2;
|
236
|
-
len-=2;
|
237
|
-
total-=2;
|
238
|
-
} else {
|
239
|
-
/* incomplete escape, pass it through */
|
240
|
-
*out++ = curChar;
|
241
|
-
}
|
242
|
-
} else if (curChar == '+') {
|
243
|
-
*out++ = ' ';
|
244
|
-
} else {
|
245
|
-
*out++ = curChar;
|
246
|
-
}
|
247
|
-
len--;
|
248
|
-
}
|
249
|
-
|
250
|
-
return total;
|
251
|
-
}
|
20
|
+
/**
|
21
|
+
* html_secure instance variable
|
22
|
+
*/
|
23
|
+
static ID rb_html_secure;
|
24
|
+
static int g_html_secure = 1;
|
252
25
|
|
253
|
-
static
|
254
|
-
|
255
|
-
|
256
|
-
const unsigned char hexChars[16] = "0123456789ABCDEF";
|
257
|
-
|
258
|
-
total = in_len;
|
259
|
-
while (in_len) {
|
260
|
-
curChar = *in++;
|
261
|
-
if (URI_SAFE(curChar)) {
|
262
|
-
*out++ = curChar;
|
263
|
-
} else {
|
264
|
-
hex[1] = hexChars[curChar & 0x0f];
|
265
|
-
hex[0] = hexChars[(curChar >> 4) & 0x0f];
|
266
|
-
*out++ = '%'; *out++ = hex[0]; *out++ = hex[1];
|
267
|
-
total += 2;
|
268
|
-
}
|
269
|
-
in_len--;
|
270
|
-
}
|
271
|
-
|
272
|
-
return total;
|
26
|
+
static VALUE rb_eu_get_html_secure(VALUE self)
|
27
|
+
{
|
28
|
+
return rb_cvar_get(self, rb_html_secure);
|
273
29
|
}
|
274
30
|
|
275
|
-
static
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
total = in_len;
|
281
|
-
while (len) {
|
282
|
-
curChar = *in++;
|
283
|
-
if (curChar == '%') {
|
284
|
-
if (IS_HEX(*in) && IS_HEX(*(in+1))) {
|
285
|
-
*out++ = (UNHEX(*in) << 4) + UNHEX(*(in+1));
|
286
|
-
in+=2;
|
287
|
-
len-=2;
|
288
|
-
total-=2;
|
289
|
-
} else {
|
290
|
-
/* incomplete escape, pass it through */
|
291
|
-
*out++ = curChar;
|
292
|
-
}
|
293
|
-
} else {
|
294
|
-
*out++ = curChar;
|
295
|
-
}
|
296
|
-
len--;
|
297
|
-
}
|
298
|
-
|
299
|
-
return total;
|
31
|
+
static VALUE rb_eu_set_html_secure(VALUE self, VALUE val)
|
32
|
+
{
|
33
|
+
g_html_secure = RTEST(val);
|
34
|
+
rb_cvar_set(self, rb_html_secure, val);
|
35
|
+
return val;
|
300
36
|
}
|
301
37
|
|
302
|
-
static VALUE rb_escape_html(int argc, VALUE * argv, VALUE self) {
|
303
|
-
VALUE str, rb_secure;
|
304
|
-
int secure = html_secure;
|
305
|
-
VALUE rb_output_buf;
|
306
|
-
unsigned char *inBuf, *outBuf;
|
307
|
-
size_t len, new_len;
|
308
|
-
|
309
|
-
if (rb_scan_args(argc, argv, "11", &str, &rb_secure) == 2) {
|
310
|
-
if (rb_secure == Qfalse) {
|
311
|
-
secure = 0;
|
312
|
-
}
|
313
|
-
}
|
314
|
-
|
315
|
-
Check_Type(str, T_STRING);
|
316
38
|
|
317
|
-
|
318
|
-
|
39
|
+
/**
|
40
|
+
* Generic template
|
41
|
+
*/
|
42
|
+
static VALUE
|
43
|
+
rb_eu__generic(
|
44
|
+
VALUE self, VALUE str,
|
45
|
+
houdini_cb callback,
|
46
|
+
size_t chunk_size)
|
47
|
+
{
|
48
|
+
VALUE result;
|
49
|
+
struct buf *out_buf;
|
319
50
|
|
320
|
-
|
321
|
-
|
322
|
-
new_len = sizeof(unsigned char)*(len*5);
|
51
|
+
if (NIL_P(str))
|
52
|
+
return rb_str_new2("");
|
323
53
|
|
324
|
-
|
325
|
-
|
326
|
-
outBuf = (unsigned char *)RSTRING_PTR(rb_output_buf);
|
54
|
+
Check_Type(str, T_STRING);
|
55
|
+
out_buf = bufnew(chunk_size);
|
327
56
|
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
// shrink our new ruby string
|
332
|
-
rb_str_resize(rb_output_buf, new_len);
|
57
|
+
callback(out_buf, (uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str));
|
58
|
+
result = rb_str_new((char *)out_buf->data, out_buf->size);
|
59
|
+
bufrelease(out_buf);
|
333
60
|
|
334
61
|
#ifdef HAVE_RUBY_ENCODING_H
|
335
|
-
|
62
|
+
rb_enc_copy(result, str);
|
336
63
|
#endif
|
337
|
-
return rb_output_buf;
|
338
|
-
}
|
339
|
-
|
340
|
-
static VALUE rb_unescape_html(VALUE self, VALUE str) {
|
341
|
-
VALUE rb_output_buf;
|
342
|
-
unsigned char *inBuf, *outBuf;
|
343
|
-
size_t len, new_len;
|
344
|
-
|
345
|
-
Check_Type(str, T_STRING);
|
346
|
-
inBuf = (unsigned char*)RSTRING_PTR(str);
|
347
|
-
len = RSTRING_LEN(str);
|
348
64
|
|
349
|
-
|
350
|
-
// TODO: we could be more intelligent about this, but probably not
|
351
|
-
new_len = sizeof(unsigned char) * len;
|
352
|
-
|
353
|
-
// create our new ruby string
|
354
|
-
rb_output_buf = rb_str_new(NULL, new_len);
|
355
|
-
outBuf = (unsigned char *)RSTRING_PTR(rb_output_buf);
|
356
|
-
|
357
|
-
// perform our escape, returning the new string's length
|
358
|
-
new_len = unescape_html(outBuf, inBuf, len);
|
359
|
-
|
360
|
-
// shrink our new ruby string
|
361
|
-
rb_str_resize(rb_output_buf, new_len);
|
362
|
-
|
363
|
-
#ifdef HAVE_RUBY_ENCODING_H
|
364
|
-
rb_enc_copy(rb_output_buf, str);
|
365
|
-
#endif
|
366
|
-
return rb_output_buf;
|
65
|
+
return result;
|
367
66
|
}
|
368
67
|
|
369
|
-
static VALUE rb_escape_javascript(VALUE self, VALUE str) {
|
370
|
-
VALUE rb_output_buf;
|
371
|
-
unsigned char *inBuf, *outBuf;
|
372
|
-
size_t len, new_len;
|
373
|
-
|
374
|
-
if (str == Qnil) {
|
375
|
-
return rb_str_new2("");
|
376
|
-
}
|
377
68
|
|
378
|
-
|
69
|
+
/**
|
70
|
+
* HTML methods
|
71
|
+
*/
|
72
|
+
static VALUE rb_eu_escape_html(int argc, VALUE *argv, VALUE self)
|
73
|
+
{
|
74
|
+
VALUE rb_out_buf, str, rb_secure;
|
75
|
+
struct buf *out_buf;
|
76
|
+
int secure = g_html_secure;
|
379
77
|
|
380
|
-
|
381
|
-
|
78
|
+
if (rb_scan_args(argc, argv, "11", &str, &rb_secure) == 2) {
|
79
|
+
if (rb_secure == Qfalse) {
|
80
|
+
secure = 0;
|
81
|
+
}
|
82
|
+
}
|
382
83
|
|
383
|
-
|
384
|
-
|
385
|
-
new_len = sizeof(unsigned char)*(len*2);
|
84
|
+
Check_Type(str, T_STRING);
|
85
|
+
out_buf = bufnew(128);
|
386
86
|
|
387
|
-
|
388
|
-
rb_output_buf = rb_str_new(NULL, new_len);
|
389
|
-
outBuf = (unsigned char *)RSTRING_PTR(rb_output_buf);
|
87
|
+
houdini_escape_html(out_buf, (uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str), secure);
|
390
88
|
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
// shrink our new ruby string
|
395
|
-
rb_str_resize(rb_output_buf, new_len);
|
89
|
+
rb_out_buf = rb_str_new((char *)out_buf->data, out_buf->size);
|
90
|
+
bufrelease(out_buf);
|
396
91
|
|
397
92
|
#ifdef HAVE_RUBY_ENCODING_H
|
398
|
-
|
93
|
+
rb_enc_copy(rb_out_buf, str);
|
399
94
|
#endif
|
400
|
-
return rb_output_buf;
|
401
|
-
}
|
402
|
-
|
403
|
-
static VALUE rb_unescape_javascript(VALUE self, VALUE str) {
|
404
|
-
VALUE rb_output_buf;
|
405
|
-
unsigned char *inBuf, *outBuf;
|
406
|
-
size_t len, new_len;
|
407
|
-
|
408
|
-
if (str == Qnil) {
|
409
|
-
return rb_str_new2("");
|
410
|
-
}
|
411
|
-
|
412
|
-
Check_Type(str, T_STRING);
|
413
|
-
|
414
|
-
inBuf = (unsigned char*)RSTRING_PTR(str);
|
415
|
-
len = RSTRING_LEN(str);
|
416
|
-
|
417
|
-
// this is the max size the string could be
|
418
|
-
// TODO: we could be more intelligent about this, but probably not
|
419
|
-
new_len = sizeof(unsigned char) * len;
|
420
|
-
|
421
|
-
// create our new ruby string
|
422
|
-
rb_output_buf = rb_str_new(NULL, new_len);
|
423
|
-
outBuf = (unsigned char *)RSTRING_PTR(rb_output_buf);
|
424
|
-
|
425
|
-
// perform our escape, returning the new string's length
|
426
|
-
new_len = unescape_javascript(outBuf, inBuf, len);
|
427
|
-
|
428
|
-
// shrink our new ruby string
|
429
|
-
rb_str_resize(rb_output_buf, new_len);
|
430
95
|
|
431
|
-
|
432
|
-
rb_enc_copy(rb_output_buf, str);
|
433
|
-
#endif
|
434
|
-
return rb_output_buf;
|
96
|
+
return rb_out_buf;
|
435
97
|
}
|
436
98
|
|
437
|
-
static VALUE
|
438
|
-
|
439
|
-
|
440
|
-
size_t len, new_len;
|
441
|
-
|
442
|
-
Check_Type(str, T_STRING);
|
443
|
-
|
444
|
-
inBuf = (unsigned char*)RSTRING_PTR(str);
|
445
|
-
len = RSTRING_LEN(str);
|
446
|
-
|
447
|
-
// this is the max size the string could be
|
448
|
-
// TODO: we should try to be more intelligent about this
|
449
|
-
new_len = sizeof(unsigned char)*(len*3);
|
450
|
-
|
451
|
-
// create our new ruby string
|
452
|
-
rb_output_buf = rb_str_new(NULL, new_len);
|
453
|
-
outBuf = (unsigned char *)RSTRING_PTR(rb_output_buf);
|
454
|
-
|
455
|
-
// perform our escape, returning the new string's length
|
456
|
-
new_len = escape_url(outBuf, inBuf, len);
|
457
|
-
|
458
|
-
// shrink our new ruby string
|
459
|
-
rb_str_resize(rb_output_buf, new_len);
|
460
|
-
|
461
|
-
#ifdef HAVE_RUBY_ENCODING_H
|
462
|
-
rb_enc_copy(rb_output_buf, str);
|
463
|
-
#endif
|
464
|
-
return rb_output_buf;
|
99
|
+
static VALUE rb_eu_unescape_html(VALUE self, VALUE str)
|
100
|
+
{
|
101
|
+
return rb_eu__generic(self, str, &houdini_unescape_html, 128);
|
465
102
|
}
|
466
103
|
|
467
|
-
static VALUE rb_unescape_url(VALUE self, VALUE str) {
|
468
|
-
Check_Type(str, T_STRING);
|
469
|
-
|
470
|
-
VALUE rb_output_buf;
|
471
|
-
unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
|
472
|
-
size_t len = RSTRING_LEN(str);
|
473
|
-
|
474
|
-
// this is the max size the string could be
|
475
|
-
// TODO: we could be more intelligent about this, but probably not
|
476
|
-
size_t new_len = sizeof(unsigned char) * len;
|
477
|
-
unsigned char *outBuf;
|
478
104
|
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
// shrink our new ruby string
|
487
|
-
rb_str_resize(rb_output_buf, new_len);
|
488
|
-
|
489
|
-
#ifdef HAVE_RUBY_ENCODING_H
|
490
|
-
rb_enc_copy(rb_output_buf, str);
|
491
|
-
#endif
|
492
|
-
return rb_output_buf;
|
105
|
+
/**
|
106
|
+
* JavaScript methods
|
107
|
+
*/
|
108
|
+
static VALUE rb_eu_escape_js(VALUE self, VALUE str)
|
109
|
+
{
|
110
|
+
return rb_eu__generic(self, str, &houdini_escape_js, 128);
|
493
111
|
}
|
494
112
|
|
495
|
-
static VALUE
|
496
|
-
|
497
|
-
|
498
|
-
VALUE rb_output_buf;
|
499
|
-
unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
|
500
|
-
size_t len = RSTRING_LEN(str);
|
501
|
-
unsigned char *outBuf;
|
502
|
-
|
503
|
-
// this is the max size the string could be
|
504
|
-
// TODO: we should try to be more intelligent about this
|
505
|
-
size_t new_len = sizeof(unsigned char)*(len*3);
|
506
|
-
|
507
|
-
// create our new ruby string
|
508
|
-
rb_output_buf = rb_str_new(NULL, new_len);
|
509
|
-
outBuf = (unsigned char *)RSTRING_PTR(rb_output_buf);
|
510
|
-
|
511
|
-
// perform our escape, returning the new string's length
|
512
|
-
new_len = escape_uri(outBuf, inBuf, len);
|
513
|
-
|
514
|
-
// shrink our new ruby string
|
515
|
-
rb_str_resize(rb_output_buf, new_len);
|
516
|
-
|
517
|
-
#ifdef HAVE_RUBY_ENCODING_H
|
518
|
-
rb_enc_copy(rb_output_buf, str);
|
519
|
-
#endif
|
520
|
-
return rb_output_buf;
|
113
|
+
static VALUE rb_eu_unescape_js(VALUE self, VALUE str)
|
114
|
+
{
|
115
|
+
return rb_eu__generic(self, str, &houdini_unescape_js, 128);
|
521
116
|
}
|
522
117
|
|
523
|
-
static VALUE rb_unescape_uri(VALUE self, VALUE str) {
|
524
|
-
Check_Type(str, T_STRING);
|
525
118
|
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
size_t new_len = sizeof(unsigned char)*len;
|
534
|
-
|
535
|
-
// create our new ruby string
|
536
|
-
rb_output_buf = rb_str_new(NULL, new_len);
|
537
|
-
outBuf = (unsigned char *)RSTRING_PTR(rb_output_buf);
|
119
|
+
/**
|
120
|
+
* URL methods
|
121
|
+
*/
|
122
|
+
static VALUE rb_eu_escape_url(VALUE self, VALUE str)
|
123
|
+
{
|
124
|
+
return rb_eu__generic(self, str, &houdini_escape_url, 32);
|
125
|
+
}
|
538
126
|
|
539
|
-
|
540
|
-
|
127
|
+
static VALUE rb_eu_unescape_url(VALUE self, VALUE str)
|
128
|
+
{
|
129
|
+
return rb_eu__generic(self, str, &houdini_unescape_url, 32);
|
130
|
+
}
|
541
131
|
|
542
|
-
// shrink our new ruby string
|
543
|
-
rb_str_resize(rb_output_buf, new_len);
|
544
132
|
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
133
|
+
/**
|
134
|
+
* URI methods
|
135
|
+
*/
|
136
|
+
static VALUE rb_eu_escape_uri(VALUE self, VALUE str)
|
137
|
+
{
|
138
|
+
return rb_eu__generic(self, str, &houdini_escape_uri, 32);
|
549
139
|
}
|
550
140
|
|
551
|
-
static VALUE
|
552
|
-
|
141
|
+
static VALUE rb_eu_unescape_uri(VALUE self, VALUE str)
|
142
|
+
{
|
143
|
+
return rb_eu__generic(self, str, &houdini_unescape_uri, 32);
|
553
144
|
}
|
554
145
|
|
555
|
-
static VALUE rb_s_set_html_secure(VALUE self, VALUE val) {
|
556
|
-
html_secure = RTEST(val);
|
557
|
-
rb_cvar_set(self, rb_html_secure, val);
|
558
146
|
|
559
|
-
|
560
|
-
|
147
|
+
/**
|
148
|
+
* Ruby Extension initializer
|
149
|
+
*/
|
150
|
+
void Init_escape_utils()
|
151
|
+
{
|
152
|
+
rb_mEscapeUtils = rb_define_module("EscapeUtils");
|
153
|
+
rb_define_method(rb_mEscapeUtils, "escape_html", rb_eu_escape_html, -1);
|
154
|
+
rb_define_method(rb_mEscapeUtils, "unescape_html", rb_eu_unescape_html, 1);
|
155
|
+
rb_define_method(rb_mEscapeUtils, "escape_javascript", rb_eu_escape_js, 1);
|
156
|
+
rb_define_method(rb_mEscapeUtils, "unescape_javascript", rb_eu_unescape_js, 1);
|
157
|
+
rb_define_method(rb_mEscapeUtils, "escape_url", rb_eu_escape_url, 1);
|
158
|
+
rb_define_method(rb_mEscapeUtils, "unescape_url", rb_eu_unescape_url, 1);
|
159
|
+
rb_define_method(rb_mEscapeUtils, "escape_uri", rb_eu_escape_uri, 1);
|
160
|
+
rb_define_method(rb_mEscapeUtils, "unescape_uri", rb_eu_unescape_uri, 1);
|
161
|
+
|
162
|
+
rb_define_singleton_method(rb_mEscapeUtils, "html_secure", rb_eu_get_html_secure, 0);
|
163
|
+
rb_define_singleton_method(rb_mEscapeUtils, "html_secure=", rb_eu_set_html_secure, 1);
|
561
164
|
|
562
|
-
|
563
|
-
void Init_escape_utils() {
|
564
|
-
mEscapeUtils = rb_define_module("EscapeUtils");
|
565
|
-
rb_define_method(mEscapeUtils, "escape_html", rb_escape_html, -1);
|
566
|
-
rb_define_module_function(mEscapeUtils, "escape_html", rb_escape_html, -1);
|
567
|
-
rb_define_method(mEscapeUtils, "unescape_html", rb_unescape_html, 1);
|
568
|
-
rb_define_module_function(mEscapeUtils, "unescape_html", rb_unescape_html, 1);
|
569
|
-
rb_define_method(mEscapeUtils, "escape_javascript", rb_escape_javascript, 1);
|
570
|
-
rb_define_module_function(mEscapeUtils, "escape_javascript", rb_escape_javascript, 1);
|
571
|
-
rb_define_method(mEscapeUtils, "unescape_javascript", rb_unescape_javascript, 1);
|
572
|
-
rb_define_module_function(mEscapeUtils, "unescape_javascript", rb_unescape_javascript, 1);
|
573
|
-
rb_define_method(mEscapeUtils, "escape_url", rb_escape_url, 1);
|
574
|
-
rb_define_module_function(mEscapeUtils, "escape_url", rb_escape_url, 1);
|
575
|
-
rb_define_method(mEscapeUtils, "unescape_url", rb_unescape_url, 1);
|
576
|
-
rb_define_module_function(mEscapeUtils, "unescape_url", rb_unescape_url, 1);
|
577
|
-
rb_define_method(mEscapeUtils, "escape_uri", rb_escape_uri, 1);
|
578
|
-
rb_define_module_function(mEscapeUtils, "escape_uri", rb_escape_uri, 1);
|
579
|
-
rb_define_method(mEscapeUtils, "unescape_uri", rb_unescape_uri, 1);
|
580
|
-
rb_define_module_function(mEscapeUtils, "unescape_uri", rb_unescape_uri, 1);
|
581
|
-
|
582
|
-
rb_define_singleton_method(mEscapeUtils, "html_secure", rb_s_get_html_secure, 0);
|
583
|
-
rb_define_singleton_method(mEscapeUtils, "html_secure=", rb_s_set_html_secure, 1);
|
584
|
-
|
585
|
-
rb_html_secure = rb_intern("@@html_secure");
|
165
|
+
rb_html_secure = rb_intern("@@html_secure");
|
586
166
|
}
|
587
167
|
|