escape_utils 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. data/.gitignore +2 -1
  2. data/.travis.yml +13 -0
  3. data/CHANGELOG.md +7 -0
  4. data/MIT-LICENSE +1 -1
  5. data/Rakefile +5 -18
  6. data/benchmark/html_escape.rb +9 -2
  7. data/benchmark/xml_escape.rb +29 -0
  8. data/escape_utils.gemspec +2 -3
  9. data/ext/escape_utils/buffer.c +181 -160
  10. data/ext/escape_utils/buffer.h +90 -68
  11. data/ext/escape_utils/escape_utils.c +77 -39
  12. data/ext/escape_utils/extconf.rb +1 -1
  13. data/ext/escape_utils/houdini.h +37 -8
  14. data/ext/escape_utils/houdini_href_e.c +115 -0
  15. data/ext/escape_utils/houdini_html_e.c +90 -0
  16. data/ext/escape_utils/houdini_html_u.c +122 -0
  17. data/ext/escape_utils/{houdini_js.c → houdini_js_e.c} +17 -75
  18. data/ext/escape_utils/houdini_js_u.c +60 -0
  19. data/ext/escape_utils/{uri_escape.h → houdini_uri_e.c} +68 -2
  20. data/ext/escape_utils/houdini_uri_u.c +65 -0
  21. data/ext/escape_utils/houdini_xml_e.c +136 -0
  22. data/lib/escape_utils/version.rb +1 -1
  23. data/lib/escape_utils/xml/builder.rb +8 -0
  24. data/test/helper.rb +14 -0
  25. data/test/html/escape_test.rb +61 -0
  26. data/test/html/unescape_test.rb +48 -0
  27. data/test/html_safety_test.rb +46 -0
  28. data/test/javascript/escape_test.rb +42 -0
  29. data/test/javascript/unescape_test.rb +46 -0
  30. data/test/query/escape_test.rb +50 -0
  31. data/test/query/unescape_test.rb +52 -0
  32. data/test/uri/escape_test.rb +50 -0
  33. data/test/uri/unescape_test.rb +55 -0
  34. data/test/url/escape_test.rb +58 -0
  35. data/test/url/unescape_test.rb +60 -0
  36. data/test/xml/escape_test.rb +67 -0
  37. metadata +136 -152
  38. data/.rspec +0 -2
  39. data/ext/escape_utils/houdini_html.c +0 -214
  40. data/ext/escape_utils/houdini_uri.c +0 -130
  41. data/spec/html/escape_spec.rb +0 -42
  42. data/spec/html/unescape_spec.rb +0 -37
  43. data/spec/html_safety_spec.rb +0 -48
  44. data/spec/javascript/escape_spec.rb +0 -34
  45. data/spec/javascript/unescape_spec.rb +0 -37
  46. data/spec/query/escape_spec.rb +0 -44
  47. data/spec/query/unescape_spec.rb +0 -46
  48. data/spec/rcov.opts +0 -3
  49. data/spec/spec_helper.rb +0 -5
  50. data/spec/uri/escape_spec.rb +0 -43
  51. data/spec/uri/unescape_spec.rb +0 -57
  52. data/spec/url/escape_spec.rb +0 -52
  53. data/spec/url/unescape_spec.rb +0 -57
@@ -1,91 +1,113 @@
1
1
  /*
2
- * Copyright (c) 2008, Natacha Porté
3
- * Copyright (c) 2011, Vicent Martí
2
+ * Copyright (C) the libgit2 contributors. All rights reserved.
4
3
  *
5
- * Permission to use, copy, modify, and distribute this software for any
6
- * purpose with or without fee is hereby granted, provided that the above
7
- * copyright notice and this permission notice appear in all copies.
8
- *
9
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
4
+ * This file is part of libgit2, distributed under the GNU GPL v2 with
5
+ * a Linking Exception. For full terms see the included COPYING file.
16
6
  */
7
+ #ifndef INCLUDE_buffer_h__
8
+ #define INCLUDE_buffer_h__
17
9
 
18
- #ifndef __GEN_BUFFER_H__
19
- #define __GEN_BUFFER_H__
20
-
10
+ #include <stdbool.h>
21
11
  #include <stddef.h>
22
12
  #include <stdarg.h>
13
+ #include <sys/types.h>
23
14
  #include <stdint.h>
24
15
 
25
- #if defined(_MSC_VER)
26
- #define __attribute__(x)
27
- #define inline
28
- #endif
29
-
30
- typedef enum {
31
- BUF_OK = 0,
32
- BUF_ENOMEM = -1,
33
- } buferror_t;
34
-
35
- /* struct buf: character array buffer */
36
- struct buf {
37
- uint8_t *data; /* actual character data */
38
- size_t size; /* size of the string */
39
- size_t asize; /* allocated size (0 = volatile buffer) */
40
- size_t unit; /* reallocation unit size (0 = read-only buffer) */
41
- };
16
+ typedef struct {
17
+ char *ptr;
18
+ size_t asize, size;
19
+ } gh_buf;
42
20
 
43
- /* CONST_BUF: global buffer from a string litteral */
44
- #define BUF_STATIC(string) \
45
- { (uint8_t *)string, sizeof string -1, sizeof string, 0, 0 }
21
+ extern char gh_buf__initbuf[];
22
+ extern char gh_buf__oom[];
46
23
 
47
- /* VOLATILE_BUF: macro for creating a volatile buffer on the stack */
48
- #define BUF_VOLATILE(strname) \
49
- { (uint8_t *)strname, strlen(strname), 0, 0, 0 }
24
+ #define GH_BUF_INIT { gh_buf__initbuf, 0, 0 }
50
25
 
51
- /* BUFPUTSL: optimized bufputs of a string litteral */
52
- #define BUFPUTSL(output, literal) \
53
- bufput(output, literal, sizeof literal - 1)
54
-
55
- /* bufgrow: increasing the allocated size to the given value */
56
- int bufgrow(struct buf *, size_t);
57
-
58
- /* bufnew: allocation of a new buffer */
59
- struct buf *bufnew(size_t) __attribute__ ((malloc));
26
+ /**
27
+ * Initialize a gh_buf structure.
28
+ *
29
+ * For the cases where GH_BUF_INIT cannot be used to do static
30
+ * initialization.
31
+ */
32
+ extern void gh_buf_init(gh_buf *buf, size_t initial_size);
60
33
 
61
- /* bufnullterm: NUL-termination of the string array (making a C-string) */
62
- const char *bufcstr(struct buf *);
34
+ /**
35
+ * Attempt to grow the buffer to hold at least `target_size` bytes.
36
+ *
37
+ * If the allocation fails, this will return an error. If mark_oom is true,
38
+ * this will mark the buffer as invalid for future operations; if false,
39
+ * existing buffer content will be preserved, but calling code must handle
40
+ * that buffer was not expanded.
41
+ */
42
+ extern int gh_buf_try_grow(gh_buf *buf, size_t target_size, bool mark_oom);
63
43
 
64
- /* bufprefix: compare the beginning of a buffer with a string */
65
- int bufprefix(const struct buf *buf, const char *prefix);
44
+ /**
45
+ * Grow the buffer to hold at least `target_size` bytes.
46
+ *
47
+ * If the allocation fails, this will return an error and the buffer will be
48
+ * marked as invalid for future operations, invaliding contents.
49
+ *
50
+ * @return 0 on success or -1 on failure
51
+ */
52
+ static inline int gh_buf_grow(gh_buf *buf, size_t target_size)
53
+ {
54
+ return gh_buf_try_grow(buf, target_size, true);
55
+ }
66
56
 
67
- /* bufput: appends raw data to a buffer */
68
- void bufput(struct buf *, const void *, size_t);
57
+ extern void gh_buf_free(gh_buf *buf);
58
+ extern void gh_buf_swap(gh_buf *buf_a, gh_buf *buf_b);
69
59
 
70
- /* bufputs: appends a NUL-terminated string to a buffer */
71
- void bufputs(struct buf *, const char *);
60
+ /**
61
+ * Test if there have been any reallocation failures with this gh_buf.
62
+ *
63
+ * Any function that writes to a gh_buf can fail due to memory allocation
64
+ * issues. If one fails, the gh_buf will be marked with an OOM error and
65
+ * further calls to modify the buffer will fail. Check gh_buf_oom() at the
66
+ * end of your sequence and it will be true if you ran out of memory at any
67
+ * point with that buffer.
68
+ *
69
+ * @return false if no error, true if allocation error
70
+ */
71
+ static inline bool gh_buf_oom(const gh_buf *buf)
72
+ {
73
+ return (buf->ptr == gh_buf__oom);
74
+ }
72
75
 
73
- /* bufputc: appends a single char to a buffer */
74
- void bufputc(struct buf *, int);
75
76
 
76
- /* bufrelease: decrease the reference count and free the buffer if needed */
77
- void bufrelease(struct buf *);
77
+ static inline size_t gh_buf_len(const gh_buf *buf)
78
+ {
79
+ return buf->size;
80
+ }
78
81
 
79
- /* bufreset: frees internal data of the buffer */
80
- void bufreset(struct buf *);
82
+ extern int gh_buf_cmp(const gh_buf *a, const gh_buf *b);
81
83
 
82
- /* bufslurp: removes a given number of bytes from the head of the array */
83
- void bufslurp(struct buf *, size_t);
84
+ extern void gh_buf_attach(gh_buf *buf, char *ptr, size_t asize);
85
+ extern char *gh_buf_detach(gh_buf *buf);
86
+ extern void gh_buf_copy_cstr(char *data, size_t datasize, const gh_buf *buf);
84
87
 
85
- /* bufprintf: formatted printing to a buffer */
86
- void bufprintf(struct buf *, const char *, ...) __attribute__ ((format (printf, 2, 3)));
88
+ static inline const char *gh_buf_cstr(const gh_buf *buf)
89
+ {
90
+ return buf->ptr;
91
+ }
87
92
 
88
- /* vbufprintf: stdarg variant of formatted printing into a buffer */
89
- void vbufprintf(struct buf *, const char * , va_list);
93
+ /*
94
+ * Functions below that return int value error codes will return 0 on
95
+ * success or -1 on failure (which generally means an allocation failed).
96
+ * Using a gh_buf where the allocation has failed with result in -1 from
97
+ * all further calls using that buffer. As a result, you can ignore the
98
+ * return code of these functions and call them in a series then just call
99
+ * gh_buf_oom at the end.
100
+ */
101
+ extern int gh_buf_set(gh_buf *buf, const char *data, size_t len);
102
+ extern int gh_buf_sets(gh_buf *buf, const char *string);
103
+ extern int gh_buf_putc(gh_buf *buf, char c);
104
+ extern int gh_buf_put(gh_buf *buf, const void *data, size_t len);
105
+ extern int gh_buf_puts(gh_buf *buf, const char *string);
106
+ extern int gh_buf_printf(gh_buf *buf, const char *format, ...)
107
+ __attribute__((format (printf, 2, 3)));
108
+ extern int gh_buf_vprintf(gh_buf *buf, const char *format, va_list ap);
109
+ extern void gh_buf_clear(gh_buf *buf);
110
+
111
+ #define gh_buf_PUTS(buf, str) gh_buf_put(buf, str, sizeof(str) - 1)
90
112
 
91
113
  #endif
@@ -4,16 +4,48 @@
4
4
  #define RSTRING_NOT_MODIFIED
5
5
 
6
6
  #include <ruby.h>
7
+ #include "houdini.h"
8
+
7
9
  #if RB_CVAR_SET_ARITY == 4
8
10
  # define rb_cvar_set(a,b,c) rb_cvar_set(a,b,c,0)
9
11
  #endif
12
+
10
13
  #ifdef HAVE_RUBY_ENCODING_H
11
14
  #include <ruby/encoding.h>
12
- #endif
15
+ static VALUE rb_eEncodingCompatibilityError;
13
16
 
14
- #include "houdini.h"
17
+ static VALUE eu_new_str(const char *str, size_t len)
18
+ {
19
+ return rb_enc_str_new(str, len, rb_utf8_encoding());
20
+ }
15
21
 
16
- typedef void (*houdini_cb)(struct buf *, const uint8_t *, size_t);
22
+ static void check_utf8_encoding(VALUE str)
23
+ {
24
+ static rb_encoding *_cached[3] = {NULL, NULL, NULL};
25
+ rb_encoding *enc;
26
+
27
+ if (_cached[0] == NULL) {
28
+ _cached[0] = rb_utf8_encoding();
29
+ _cached[1] = rb_usascii_encoding();
30
+ _cached[2] = rb_ascii8bit_encoding();
31
+ }
32
+
33
+ enc = rb_enc_get(str);
34
+ if (enc != _cached[0] && enc != _cached[1] && enc != _cached[2]) {
35
+ rb_raise(rb_eEncodingCompatibilityError,
36
+ "Input must be UTF-8 or US-ASCII, %s given", rb_enc_name(enc));
37
+ }
38
+ }
39
+ #else
40
+ static VALUE eu_new_str(const char *str, size_t len)
41
+ {
42
+ return rb_str_new(str, len);
43
+ }
44
+
45
+ static void check_utf8_encoding(VALUE str) {}
46
+ #endif
47
+
48
+ typedef int (*houdini_cb)(gh_buf *, const uint8_t *, size_t);
17
49
 
18
50
  static VALUE rb_mEscapeUtils;
19
51
 
@@ -35,34 +67,27 @@ static VALUE rb_eu_set_html_secure(VALUE self, VALUE val)
35
67
  return val;
36
68
  }
37
69
 
38
-
39
70
  /**
40
71
  * Generic template
41
72
  */
42
73
  static VALUE
43
- rb_eu__generic(
44
- VALUE self, VALUE str,
45
- houdini_cb callback,
46
- size_t chunk_size)
74
+ rb_eu__generic(VALUE str, houdini_cb do_escape)
47
75
  {
48
- VALUE result;
49
- struct buf *out_buf;
76
+ gh_buf buf = GH_BUF_INIT;
50
77
 
51
78
  if (NIL_P(str))
52
- return rb_str_new2("");
79
+ return eu_new_str("", 0);
53
80
 
54
81
  Check_Type(str, T_STRING);
55
- out_buf = bufnew(chunk_size);
82
+ check_utf8_encoding(str);
56
83
 
57
- callback(out_buf, (uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str));
58
- result = rb_str_new((char *)out_buf->data, out_buf->size);
59
- bufrelease(out_buf);
60
-
61
- #ifdef HAVE_RUBY_ENCODING_H
62
- rb_enc_copy(result, str);
63
- #endif
84
+ if (do_escape(&buf, (const uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str))) {
85
+ VALUE result = eu_new_str(buf.ptr, buf.size);
86
+ gh_buf_free(&buf);
87
+ return result;
88
+ }
64
89
 
65
- return result;
90
+ return str;
66
91
  }
67
92
 
68
93
 
@@ -71,8 +96,8 @@ rb_eu__generic(
71
96
  */
72
97
  static VALUE rb_eu_escape_html(int argc, VALUE *argv, VALUE self)
73
98
  {
74
- VALUE rb_out_buf, str, rb_secure;
75
- struct buf *out_buf;
99
+ VALUE str, rb_secure;
100
+ gh_buf buf = GH_BUF_INIT;
76
101
  int secure = g_html_secure;
77
102
 
78
103
  if (rb_scan_args(argc, argv, "11", &str, &rb_secure) == 2) {
@@ -82,23 +107,29 @@ static VALUE rb_eu_escape_html(int argc, VALUE *argv, VALUE self)
82
107
  }
83
108
 
84
109
  Check_Type(str, T_STRING);
85
- out_buf = bufnew(128);
86
-
87
- houdini_escape_html(out_buf, (uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str), secure);
110
+ check_utf8_encoding(str);
88
111
 
89
- rb_out_buf = rb_str_new((char *)out_buf->data, out_buf->size);
90
- bufrelease(out_buf);
91
-
92
- #ifdef HAVE_RUBY_ENCODING_H
93
- rb_enc_copy(rb_out_buf, str);
94
- #endif
112
+ if (houdini_escape_html0(&buf, (const uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str), secure)) {
113
+ VALUE result = eu_new_str(buf.ptr, buf.size);
114
+ gh_buf_free(&buf);
115
+ return result;
116
+ }
95
117
 
96
- return rb_out_buf;
118
+ return str;
97
119
  }
98
120
 
99
121
  static VALUE rb_eu_unescape_html(VALUE self, VALUE str)
100
122
  {
101
- return rb_eu__generic(self, str, &houdini_unescape_html, 128);
123
+ return rb_eu__generic(str, &houdini_unescape_html);
124
+ }
125
+
126
+
127
+ /**
128
+ * XML methods
129
+ */
130
+ static VALUE rb_eu_escape_xml(VALUE self, VALUE str)
131
+ {
132
+ return rb_eu__generic(str, &houdini_escape_xml);
102
133
  }
103
134
 
104
135
 
@@ -107,12 +138,12 @@ static VALUE rb_eu_unescape_html(VALUE self, VALUE str)
107
138
  */
108
139
  static VALUE rb_eu_escape_js(VALUE self, VALUE str)
109
140
  {
110
- return rb_eu__generic(self, str, &houdini_escape_js, 128);
141
+ return rb_eu__generic(str, &houdini_escape_js);
111
142
  }
112
143
 
113
144
  static VALUE rb_eu_unescape_js(VALUE self, VALUE str)
114
145
  {
115
- return rb_eu__generic(self, str, &houdini_unescape_js, 128);
146
+ return rb_eu__generic(str, &houdini_unescape_js);
116
147
  }
117
148
 
118
149
 
@@ -121,12 +152,12 @@ static VALUE rb_eu_unescape_js(VALUE self, VALUE str)
121
152
  */
122
153
  static VALUE rb_eu_escape_url(VALUE self, VALUE str)
123
154
  {
124
- return rb_eu__generic(self, str, &houdini_escape_url, 32);
155
+ return rb_eu__generic(str, &houdini_escape_url);
125
156
  }
126
157
 
127
158
  static VALUE rb_eu_unescape_url(VALUE self, VALUE str)
128
159
  {
129
- return rb_eu__generic(self, str, &houdini_unescape_url, 32);
160
+ return rb_eu__generic(str, &houdini_unescape_url);
130
161
  }
131
162
 
132
163
 
@@ -135,12 +166,12 @@ static VALUE rb_eu_unescape_url(VALUE self, VALUE str)
135
166
  */
136
167
  static VALUE rb_eu_escape_uri(VALUE self, VALUE str)
137
168
  {
138
- return rb_eu__generic(self, str, &houdini_escape_uri, 32);
169
+ return rb_eu__generic(str, &houdini_escape_uri);
139
170
  }
140
171
 
141
172
  static VALUE rb_eu_unescape_uri(VALUE self, VALUE str)
142
173
  {
143
- return rb_eu__generic(self, str, &houdini_unescape_uri, 32);
174
+ return rb_eu__generic(str, &houdini_unescape_uri);
144
175
  }
145
176
 
146
177
 
@@ -149,9 +180,16 @@ static VALUE rb_eu_unescape_uri(VALUE self, VALUE str)
149
180
  */
150
181
  void Init_escape_utils()
151
182
  {
183
+ #ifdef HAVE_RUBY_ENCODING_H
184
+ VALUE rb_cEncoding = rb_const_get(rb_cObject, rb_intern("Encoding"));
185
+ rb_eEncodingCompatibilityError = rb_const_get(rb_cEncoding, rb_intern("CompatibilityError"));
186
+ #endif
187
+
152
188
  rb_mEscapeUtils = rb_define_module("EscapeUtils");
189
+
153
190
  rb_define_method(rb_mEscapeUtils, "escape_html", rb_eu_escape_html, -1);
154
191
  rb_define_method(rb_mEscapeUtils, "unescape_html", rb_eu_unescape_html, 1);
192
+ rb_define_method(rb_mEscapeUtils, "escape_xml", rb_eu_escape_xml, 1);
155
193
  rb_define_method(rb_mEscapeUtils, "escape_javascript", rb_eu_escape_js, 1);
156
194
  rb_define_method(rb_mEscapeUtils, "unescape_javascript", rb_eu_unescape_js, 1);
157
195
  rb_define_method(rb_mEscapeUtils, "escape_url", rb_eu_escape_url, 1);
@@ -14,4 +14,4 @@ else
14
14
  $CFLAGS << " -DRB_CVAR_SET_ARITY=4 "
15
15
  end
16
16
 
17
- create_makefile("escape_utils")
17
+ create_makefile("escape_utils/escape_utils")
@@ -1,15 +1,44 @@
1
1
  #ifndef __HOUDINI_H__
2
2
  #define __HOUDINI_H__
3
3
 
4
+ #ifdef __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
8
+ #include <stdint.h>
4
9
  #include "buffer.h"
5
10
 
6
- extern void houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size, int secure);
7
- extern void houdini_unescape_html(struct buf *ob, const uint8_t *src, size_t size);
8
- extern void houdini_escape_uri(struct buf *ob, const uint8_t *src, size_t size);
9
- extern void houdini_escape_url(struct buf *ob, const uint8_t *src, size_t size);
10
- extern void houdini_unescape_uri(struct buf *ob, const uint8_t *src, size_t size);
11
- extern void houdini_unescape_url(struct buf *ob, const uint8_t *src, size_t size);
12
- extern void houdini_escape_js(struct buf *ob, const uint8_t *src, size_t size);
13
- extern void houdini_unescape_js(struct buf *ob, const uint8_t *src, size_t size);
11
+ #define likely(x) __builtin_expect((x),1)
12
+ #define unlikely(x) __builtin_expect((x),0)
13
+
14
+ #ifdef HOUDINI_USE_LOCALE
15
+ # define _isxdigit(c) isxdigit(c)
16
+ # define _isdigit(c) isdigit(c)
17
+ #else
18
+ /*
19
+ * Helper _isdigit methods -- do not trust the current locale
20
+ * */
21
+ # define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
22
+ # define _isdigit(c) ((c) >= '0' && (c) <= '9')
23
+ #endif
24
+
25
+ #define HOUDINI_ESCAPED_SIZE(x) (((x) * 12) / 10)
26
+ #define HOUDINI_UNESCAPED_SIZE(x) (x)
27
+
28
+ extern int houdini_escape_html(gh_buf *ob, const uint8_t *src, size_t size);
29
+ extern int houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure);
30
+ extern int houdini_unescape_html(gh_buf *ob, const uint8_t *src, size_t size);
31
+ extern int houdini_escape_xml(gh_buf *ob, const uint8_t *src, size_t size);
32
+ extern int houdini_escape_uri(gh_buf *ob, const uint8_t *src, size_t size);
33
+ extern int houdini_escape_url(gh_buf *ob, const uint8_t *src, size_t size);
34
+ extern int houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size);
35
+ extern int houdini_unescape_uri(gh_buf *ob, const uint8_t *src, size_t size);
36
+ extern int houdini_unescape_url(gh_buf *ob, const uint8_t *src, size_t size);
37
+ extern int houdini_escape_js(gh_buf *ob, const uint8_t *src, size_t size);
38
+ extern int houdini_unescape_js(gh_buf *ob, const uint8_t *src, size_t size);
39
+
40
+ #ifdef __cplusplus
41
+ }
42
+ #endif
14
43
 
15
44
  #endif