escape_utils 0.2.4 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. data/.gitignore +2 -1
  2. data/.travis.yml +13 -0
  3. data/CHANGELOG.md +7 -0
  4. data/MIT-LICENSE +1 -1
  5. data/Rakefile +5 -18
  6. data/benchmark/html_escape.rb +9 -2
  7. data/benchmark/xml_escape.rb +29 -0
  8. data/escape_utils.gemspec +2 -3
  9. data/ext/escape_utils/buffer.c +181 -160
  10. data/ext/escape_utils/buffer.h +90 -68
  11. data/ext/escape_utils/escape_utils.c +77 -39
  12. data/ext/escape_utils/extconf.rb +1 -1
  13. data/ext/escape_utils/houdini.h +37 -8
  14. data/ext/escape_utils/houdini_href_e.c +115 -0
  15. data/ext/escape_utils/houdini_html_e.c +90 -0
  16. data/ext/escape_utils/houdini_html_u.c +122 -0
  17. data/ext/escape_utils/{houdini_js.c → houdini_js_e.c} +17 -75
  18. data/ext/escape_utils/houdini_js_u.c +60 -0
  19. data/ext/escape_utils/{uri_escape.h → houdini_uri_e.c} +68 -2
  20. data/ext/escape_utils/houdini_uri_u.c +65 -0
  21. data/ext/escape_utils/houdini_xml_e.c +136 -0
  22. data/lib/escape_utils/version.rb +1 -1
  23. data/lib/escape_utils/xml/builder.rb +8 -0
  24. data/test/helper.rb +14 -0
  25. data/test/html/escape_test.rb +61 -0
  26. data/test/html/unescape_test.rb +48 -0
  27. data/test/html_safety_test.rb +46 -0
  28. data/test/javascript/escape_test.rb +42 -0
  29. data/test/javascript/unescape_test.rb +46 -0
  30. data/test/query/escape_test.rb +50 -0
  31. data/test/query/unescape_test.rb +52 -0
  32. data/test/uri/escape_test.rb +50 -0
  33. data/test/uri/unescape_test.rb +55 -0
  34. data/test/url/escape_test.rb +58 -0
  35. data/test/url/unescape_test.rb +60 -0
  36. data/test/xml/escape_test.rb +67 -0
  37. metadata +136 -152
  38. data/.rspec +0 -2
  39. data/ext/escape_utils/houdini_html.c +0 -214
  40. data/ext/escape_utils/houdini_uri.c +0 -130
  41. data/spec/html/escape_spec.rb +0 -42
  42. data/spec/html/unescape_spec.rb +0 -37
  43. data/spec/html_safety_spec.rb +0 -48
  44. data/spec/javascript/escape_spec.rb +0 -34
  45. data/spec/javascript/unescape_spec.rb +0 -37
  46. data/spec/query/escape_spec.rb +0 -44
  47. data/spec/query/unescape_spec.rb +0 -46
  48. data/spec/rcov.opts +0 -3
  49. data/spec/spec_helper.rb +0 -5
  50. data/spec/uri/escape_spec.rb +0 -43
  51. data/spec/uri/unescape_spec.rb +0 -57
  52. data/spec/url/escape_spec.rb +0 -52
  53. data/spec/url/unescape_spec.rb +0 -57
@@ -1,91 +1,113 @@
1
1
  /*
2
- * Copyright (c) 2008, Natacha Porté
3
- * Copyright (c) 2011, Vicent Martí
2
+ * Copyright (C) the libgit2 contributors. All rights reserved.
4
3
  *
5
- * Permission to use, copy, modify, and distribute this software for any
6
- * purpose with or without fee is hereby granted, provided that the above
7
- * copyright notice and this permission notice appear in all copies.
8
- *
9
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
4
+ * This file is part of libgit2, distributed under the GNU GPL v2 with
5
+ * a Linking Exception. For full terms see the included COPYING file.
16
6
  */
7
+ #ifndef INCLUDE_buffer_h__
8
+ #define INCLUDE_buffer_h__
17
9
 
18
- #ifndef __GEN_BUFFER_H__
19
- #define __GEN_BUFFER_H__
20
-
10
+ #include <stdbool.h>
21
11
  #include <stddef.h>
22
12
  #include <stdarg.h>
13
+ #include <sys/types.h>
23
14
  #include <stdint.h>
24
15
 
25
- #if defined(_MSC_VER)
26
- #define __attribute__(x)
27
- #define inline
28
- #endif
29
-
30
- typedef enum {
31
- BUF_OK = 0,
32
- BUF_ENOMEM = -1,
33
- } buferror_t;
34
-
35
- /* struct buf: character array buffer */
36
- struct buf {
37
- uint8_t *data; /* actual character data */
38
- size_t size; /* size of the string */
39
- size_t asize; /* allocated size (0 = volatile buffer) */
40
- size_t unit; /* reallocation unit size (0 = read-only buffer) */
41
- };
16
+ typedef struct {
17
+ char *ptr;
18
+ size_t asize, size;
19
+ } gh_buf;
42
20
 
43
- /* CONST_BUF: global buffer from a string litteral */
44
- #define BUF_STATIC(string) \
45
- { (uint8_t *)string, sizeof string -1, sizeof string, 0, 0 }
21
+ extern char gh_buf__initbuf[];
22
+ extern char gh_buf__oom[];
46
23
 
47
- /* VOLATILE_BUF: macro for creating a volatile buffer on the stack */
48
- #define BUF_VOLATILE(strname) \
49
- { (uint8_t *)strname, strlen(strname), 0, 0, 0 }
24
+ #define GH_BUF_INIT { gh_buf__initbuf, 0, 0 }
50
25
 
51
- /* BUFPUTSL: optimized bufputs of a string litteral */
52
- #define BUFPUTSL(output, literal) \
53
- bufput(output, literal, sizeof literal - 1)
54
-
55
- /* bufgrow: increasing the allocated size to the given value */
56
- int bufgrow(struct buf *, size_t);
57
-
58
- /* bufnew: allocation of a new buffer */
59
- struct buf *bufnew(size_t) __attribute__ ((malloc));
26
+ /**
27
+ * Initialize a gh_buf structure.
28
+ *
29
+ * For the cases where GH_BUF_INIT cannot be used to do static
30
+ * initialization.
31
+ */
32
+ extern void gh_buf_init(gh_buf *buf, size_t initial_size);
60
33
 
61
- /* bufnullterm: NUL-termination of the string array (making a C-string) */
62
- const char *bufcstr(struct buf *);
34
+ /**
35
+ * Attempt to grow the buffer to hold at least `target_size` bytes.
36
+ *
37
+ * If the allocation fails, this will return an error. If mark_oom is true,
38
+ * this will mark the buffer as invalid for future operations; if false,
39
+ * existing buffer content will be preserved, but calling code must handle
40
+ * that buffer was not expanded.
41
+ */
42
+ extern int gh_buf_try_grow(gh_buf *buf, size_t target_size, bool mark_oom);
63
43
 
64
- /* bufprefix: compare the beginning of a buffer with a string */
65
- int bufprefix(const struct buf *buf, const char *prefix);
44
+ /**
45
+ * Grow the buffer to hold at least `target_size` bytes.
46
+ *
47
+ * If the allocation fails, this will return an error and the buffer will be
48
+ * marked as invalid for future operations, invaliding contents.
49
+ *
50
+ * @return 0 on success or -1 on failure
51
+ */
52
+ static inline int gh_buf_grow(gh_buf *buf, size_t target_size)
53
+ {
54
+ return gh_buf_try_grow(buf, target_size, true);
55
+ }
66
56
 
67
- /* bufput: appends raw data to a buffer */
68
- void bufput(struct buf *, const void *, size_t);
57
+ extern void gh_buf_free(gh_buf *buf);
58
+ extern void gh_buf_swap(gh_buf *buf_a, gh_buf *buf_b);
69
59
 
70
- /* bufputs: appends a NUL-terminated string to a buffer */
71
- void bufputs(struct buf *, const char *);
60
+ /**
61
+ * Test if there have been any reallocation failures with this gh_buf.
62
+ *
63
+ * Any function that writes to a gh_buf can fail due to memory allocation
64
+ * issues. If one fails, the gh_buf will be marked with an OOM error and
65
+ * further calls to modify the buffer will fail. Check gh_buf_oom() at the
66
+ * end of your sequence and it will be true if you ran out of memory at any
67
+ * point with that buffer.
68
+ *
69
+ * @return false if no error, true if allocation error
70
+ */
71
+ static inline bool gh_buf_oom(const gh_buf *buf)
72
+ {
73
+ return (buf->ptr == gh_buf__oom);
74
+ }
72
75
 
73
- /* bufputc: appends a single char to a buffer */
74
- void bufputc(struct buf *, int);
75
76
 
76
- /* bufrelease: decrease the reference count and free the buffer if needed */
77
- void bufrelease(struct buf *);
77
+ static inline size_t gh_buf_len(const gh_buf *buf)
78
+ {
79
+ return buf->size;
80
+ }
78
81
 
79
- /* bufreset: frees internal data of the buffer */
80
- void bufreset(struct buf *);
82
+ extern int gh_buf_cmp(const gh_buf *a, const gh_buf *b);
81
83
 
82
- /* bufslurp: removes a given number of bytes from the head of the array */
83
- void bufslurp(struct buf *, size_t);
84
+ extern void gh_buf_attach(gh_buf *buf, char *ptr, size_t asize);
85
+ extern char *gh_buf_detach(gh_buf *buf);
86
+ extern void gh_buf_copy_cstr(char *data, size_t datasize, const gh_buf *buf);
84
87
 
85
- /* bufprintf: formatted printing to a buffer */
86
- void bufprintf(struct buf *, const char *, ...) __attribute__ ((format (printf, 2, 3)));
88
+ static inline const char *gh_buf_cstr(const gh_buf *buf)
89
+ {
90
+ return buf->ptr;
91
+ }
87
92
 
88
- /* vbufprintf: stdarg variant of formatted printing into a buffer */
89
- void vbufprintf(struct buf *, const char * , va_list);
93
+ /*
94
+ * Functions below that return int value error codes will return 0 on
95
+ * success or -1 on failure (which generally means an allocation failed).
96
+ * Using a gh_buf where the allocation has failed with result in -1 from
97
+ * all further calls using that buffer. As a result, you can ignore the
98
+ * return code of these functions and call them in a series then just call
99
+ * gh_buf_oom at the end.
100
+ */
101
+ extern int gh_buf_set(gh_buf *buf, const char *data, size_t len);
102
+ extern int gh_buf_sets(gh_buf *buf, const char *string);
103
+ extern int gh_buf_putc(gh_buf *buf, char c);
104
+ extern int gh_buf_put(gh_buf *buf, const void *data, size_t len);
105
+ extern int gh_buf_puts(gh_buf *buf, const char *string);
106
+ extern int gh_buf_printf(gh_buf *buf, const char *format, ...)
107
+ __attribute__((format (printf, 2, 3)));
108
+ extern int gh_buf_vprintf(gh_buf *buf, const char *format, va_list ap);
109
+ extern void gh_buf_clear(gh_buf *buf);
110
+
111
+ #define gh_buf_PUTS(buf, str) gh_buf_put(buf, str, sizeof(str) - 1)
90
112
 
91
113
  #endif
@@ -4,16 +4,48 @@
4
4
  #define RSTRING_NOT_MODIFIED
5
5
 
6
6
  #include <ruby.h>
7
+ #include "houdini.h"
8
+
7
9
  #if RB_CVAR_SET_ARITY == 4
8
10
  # define rb_cvar_set(a,b,c) rb_cvar_set(a,b,c,0)
9
11
  #endif
12
+
10
13
  #ifdef HAVE_RUBY_ENCODING_H
11
14
  #include <ruby/encoding.h>
12
- #endif
15
+ static VALUE rb_eEncodingCompatibilityError;
13
16
 
14
- #include "houdini.h"
17
+ static VALUE eu_new_str(const char *str, size_t len)
18
+ {
19
+ return rb_enc_str_new(str, len, rb_utf8_encoding());
20
+ }
15
21
 
16
- typedef void (*houdini_cb)(struct buf *, const uint8_t *, size_t);
22
+ static void check_utf8_encoding(VALUE str)
23
+ {
24
+ static rb_encoding *_cached[3] = {NULL, NULL, NULL};
25
+ rb_encoding *enc;
26
+
27
+ if (_cached[0] == NULL) {
28
+ _cached[0] = rb_utf8_encoding();
29
+ _cached[1] = rb_usascii_encoding();
30
+ _cached[2] = rb_ascii8bit_encoding();
31
+ }
32
+
33
+ enc = rb_enc_get(str);
34
+ if (enc != _cached[0] && enc != _cached[1] && enc != _cached[2]) {
35
+ rb_raise(rb_eEncodingCompatibilityError,
36
+ "Input must be UTF-8 or US-ASCII, %s given", rb_enc_name(enc));
37
+ }
38
+ }
39
+ #else
40
+ static VALUE eu_new_str(const char *str, size_t len)
41
+ {
42
+ return rb_str_new(str, len);
43
+ }
44
+
45
+ static void check_utf8_encoding(VALUE str) {}
46
+ #endif
47
+
48
+ typedef int (*houdini_cb)(gh_buf *, const uint8_t *, size_t);
17
49
 
18
50
  static VALUE rb_mEscapeUtils;
19
51
 
@@ -35,34 +67,27 @@ static VALUE rb_eu_set_html_secure(VALUE self, VALUE val)
35
67
  return val;
36
68
  }
37
69
 
38
-
39
70
  /**
40
71
  * Generic template
41
72
  */
42
73
  static VALUE
43
- rb_eu__generic(
44
- VALUE self, VALUE str,
45
- houdini_cb callback,
46
- size_t chunk_size)
74
+ rb_eu__generic(VALUE str, houdini_cb do_escape)
47
75
  {
48
- VALUE result;
49
- struct buf *out_buf;
76
+ gh_buf buf = GH_BUF_INIT;
50
77
 
51
78
  if (NIL_P(str))
52
- return rb_str_new2("");
79
+ return eu_new_str("", 0);
53
80
 
54
81
  Check_Type(str, T_STRING);
55
- out_buf = bufnew(chunk_size);
82
+ check_utf8_encoding(str);
56
83
 
57
- callback(out_buf, (uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str));
58
- result = rb_str_new((char *)out_buf->data, out_buf->size);
59
- bufrelease(out_buf);
60
-
61
- #ifdef HAVE_RUBY_ENCODING_H
62
- rb_enc_copy(result, str);
63
- #endif
84
+ if (do_escape(&buf, (const uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str))) {
85
+ VALUE result = eu_new_str(buf.ptr, buf.size);
86
+ gh_buf_free(&buf);
87
+ return result;
88
+ }
64
89
 
65
- return result;
90
+ return str;
66
91
  }
67
92
 
68
93
 
@@ -71,8 +96,8 @@ rb_eu__generic(
71
96
  */
72
97
  static VALUE rb_eu_escape_html(int argc, VALUE *argv, VALUE self)
73
98
  {
74
- VALUE rb_out_buf, str, rb_secure;
75
- struct buf *out_buf;
99
+ VALUE str, rb_secure;
100
+ gh_buf buf = GH_BUF_INIT;
76
101
  int secure = g_html_secure;
77
102
 
78
103
  if (rb_scan_args(argc, argv, "11", &str, &rb_secure) == 2) {
@@ -82,23 +107,29 @@ static VALUE rb_eu_escape_html(int argc, VALUE *argv, VALUE self)
82
107
  }
83
108
 
84
109
  Check_Type(str, T_STRING);
85
- out_buf = bufnew(128);
86
-
87
- houdini_escape_html(out_buf, (uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str), secure);
110
+ check_utf8_encoding(str);
88
111
 
89
- rb_out_buf = rb_str_new((char *)out_buf->data, out_buf->size);
90
- bufrelease(out_buf);
91
-
92
- #ifdef HAVE_RUBY_ENCODING_H
93
- rb_enc_copy(rb_out_buf, str);
94
- #endif
112
+ if (houdini_escape_html0(&buf, (const uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str), secure)) {
113
+ VALUE result = eu_new_str(buf.ptr, buf.size);
114
+ gh_buf_free(&buf);
115
+ return result;
116
+ }
95
117
 
96
- return rb_out_buf;
118
+ return str;
97
119
  }
98
120
 
99
121
  static VALUE rb_eu_unescape_html(VALUE self, VALUE str)
100
122
  {
101
- return rb_eu__generic(self, str, &houdini_unescape_html, 128);
123
+ return rb_eu__generic(str, &houdini_unescape_html);
124
+ }
125
+
126
+
127
+ /**
128
+ * XML methods
129
+ */
130
+ static VALUE rb_eu_escape_xml(VALUE self, VALUE str)
131
+ {
132
+ return rb_eu__generic(str, &houdini_escape_xml);
102
133
  }
103
134
 
104
135
 
@@ -107,12 +138,12 @@ static VALUE rb_eu_unescape_html(VALUE self, VALUE str)
107
138
  */
108
139
  static VALUE rb_eu_escape_js(VALUE self, VALUE str)
109
140
  {
110
- return rb_eu__generic(self, str, &houdini_escape_js, 128);
141
+ return rb_eu__generic(str, &houdini_escape_js);
111
142
  }
112
143
 
113
144
  static VALUE rb_eu_unescape_js(VALUE self, VALUE str)
114
145
  {
115
- return rb_eu__generic(self, str, &houdini_unescape_js, 128);
146
+ return rb_eu__generic(str, &houdini_unescape_js);
116
147
  }
117
148
 
118
149
 
@@ -121,12 +152,12 @@ static VALUE rb_eu_unescape_js(VALUE self, VALUE str)
121
152
  */
122
153
  static VALUE rb_eu_escape_url(VALUE self, VALUE str)
123
154
  {
124
- return rb_eu__generic(self, str, &houdini_escape_url, 32);
155
+ return rb_eu__generic(str, &houdini_escape_url);
125
156
  }
126
157
 
127
158
  static VALUE rb_eu_unescape_url(VALUE self, VALUE str)
128
159
  {
129
- return rb_eu__generic(self, str, &houdini_unescape_url, 32);
160
+ return rb_eu__generic(str, &houdini_unescape_url);
130
161
  }
131
162
 
132
163
 
@@ -135,12 +166,12 @@ static VALUE rb_eu_unescape_url(VALUE self, VALUE str)
135
166
  */
136
167
  static VALUE rb_eu_escape_uri(VALUE self, VALUE str)
137
168
  {
138
- return rb_eu__generic(self, str, &houdini_escape_uri, 32);
169
+ return rb_eu__generic(str, &houdini_escape_uri);
139
170
  }
140
171
 
141
172
  static VALUE rb_eu_unescape_uri(VALUE self, VALUE str)
142
173
  {
143
- return rb_eu__generic(self, str, &houdini_unescape_uri, 32);
174
+ return rb_eu__generic(str, &houdini_unescape_uri);
144
175
  }
145
176
 
146
177
 
@@ -149,9 +180,16 @@ static VALUE rb_eu_unescape_uri(VALUE self, VALUE str)
149
180
  */
150
181
  void Init_escape_utils()
151
182
  {
183
+ #ifdef HAVE_RUBY_ENCODING_H
184
+ VALUE rb_cEncoding = rb_const_get(rb_cObject, rb_intern("Encoding"));
185
+ rb_eEncodingCompatibilityError = rb_const_get(rb_cEncoding, rb_intern("CompatibilityError"));
186
+ #endif
187
+
152
188
  rb_mEscapeUtils = rb_define_module("EscapeUtils");
189
+
153
190
  rb_define_method(rb_mEscapeUtils, "escape_html", rb_eu_escape_html, -1);
154
191
  rb_define_method(rb_mEscapeUtils, "unescape_html", rb_eu_unescape_html, 1);
192
+ rb_define_method(rb_mEscapeUtils, "escape_xml", rb_eu_escape_xml, 1);
155
193
  rb_define_method(rb_mEscapeUtils, "escape_javascript", rb_eu_escape_js, 1);
156
194
  rb_define_method(rb_mEscapeUtils, "unescape_javascript", rb_eu_unescape_js, 1);
157
195
  rb_define_method(rb_mEscapeUtils, "escape_url", rb_eu_escape_url, 1);
@@ -14,4 +14,4 @@ else
14
14
  $CFLAGS << " -DRB_CVAR_SET_ARITY=4 "
15
15
  end
16
16
 
17
- create_makefile("escape_utils")
17
+ create_makefile("escape_utils/escape_utils")
@@ -1,15 +1,44 @@
1
1
  #ifndef __HOUDINI_H__
2
2
  #define __HOUDINI_H__
3
3
 
4
+ #ifdef __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
8
+ #include <stdint.h>
4
9
  #include "buffer.h"
5
10
 
6
- extern void houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size, int secure);
7
- extern void houdini_unescape_html(struct buf *ob, const uint8_t *src, size_t size);
8
- extern void houdini_escape_uri(struct buf *ob, const uint8_t *src, size_t size);
9
- extern void houdini_escape_url(struct buf *ob, const uint8_t *src, size_t size);
10
- extern void houdini_unescape_uri(struct buf *ob, const uint8_t *src, size_t size);
11
- extern void houdini_unescape_url(struct buf *ob, const uint8_t *src, size_t size);
12
- extern void houdini_escape_js(struct buf *ob, const uint8_t *src, size_t size);
13
- extern void houdini_unescape_js(struct buf *ob, const uint8_t *src, size_t size);
11
+ #define likely(x) __builtin_expect((x),1)
12
+ #define unlikely(x) __builtin_expect((x),0)
13
+
14
+ #ifdef HOUDINI_USE_LOCALE
15
+ # define _isxdigit(c) isxdigit(c)
16
+ # define _isdigit(c) isdigit(c)
17
+ #else
18
+ /*
19
+ * Helper _isdigit methods -- do not trust the current locale
20
+ * */
21
+ # define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
22
+ # define _isdigit(c) ((c) >= '0' && (c) <= '9')
23
+ #endif
24
+
25
+ #define HOUDINI_ESCAPED_SIZE(x) (((x) * 12) / 10)
26
+ #define HOUDINI_UNESCAPED_SIZE(x) (x)
27
+
28
+ extern int houdini_escape_html(gh_buf *ob, const uint8_t *src, size_t size);
29
+ extern int houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure);
30
+ extern int houdini_unescape_html(gh_buf *ob, const uint8_t *src, size_t size);
31
+ extern int houdini_escape_xml(gh_buf *ob, const uint8_t *src, size_t size);
32
+ extern int houdini_escape_uri(gh_buf *ob, const uint8_t *src, size_t size);
33
+ extern int houdini_escape_url(gh_buf *ob, const uint8_t *src, size_t size);
34
+ extern int houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size);
35
+ extern int houdini_unescape_uri(gh_buf *ob, const uint8_t *src, size_t size);
36
+ extern int houdini_unescape_url(gh_buf *ob, const uint8_t *src, size_t size);
37
+ extern int houdini_escape_js(gh_buf *ob, const uint8_t *src, size_t size);
38
+ extern int houdini_unescape_js(gh_buf *ob, const uint8_t *src, size_t size);
39
+
40
+ #ifdef __cplusplus
41
+ }
42
+ #endif
14
43
 
15
44
  #endif