escape_utils 1.2.0 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +5 -5
  2. data/.github/workflows/ci.yml +43 -0
  3. data/.gitignore +0 -1
  4. data/CHANGELOG.md +23 -0
  5. data/Gemfile +15 -0
  6. data/README.md +48 -91
  7. data/Rakefile +4 -2
  8. data/benchmark/html_escape_once.rb +25 -0
  9. data/benchmark/javascript_escape.rb +1 -1
  10. data/benchmark/javascript_unescape.rb +1 -1
  11. data/benchmark/url_decode.rb +28 -0
  12. data/benchmark/url_encode.rb +37 -0
  13. data/benchmark/xml_escape.rb +7 -11
  14. data/bin/console +8 -0
  15. data/escape_utils.gemspec +1 -12
  16. data/ext/escape_utils/escape_utils.c +8 -115
  17. data/ext/escape_utils/houdini.h +3 -5
  18. data/ext/escape_utils/houdini_html_e.c +52 -24
  19. data/ext/escape_utils/houdini_js_e.c +15 -3
  20. data/ext/escape_utils/houdini_uri_e.c +7 -18
  21. data/ext/escape_utils/houdini_uri_u.c +5 -15
  22. data/ext/escape_utils/houdini_xml_e.c +15 -1
  23. data/lib/escape_utils/html/cgi.rb +10 -8
  24. data/lib/escape_utils/html/erb.rb +1 -10
  25. data/lib/escape_utils/html/haml.rb +1 -7
  26. data/lib/escape_utils/html/rack.rb +3 -3
  27. data/lib/escape_utils/html_safety.rb +13 -0
  28. data/lib/escape_utils/url/cgi.rb +0 -8
  29. data/lib/escape_utils/url/erb.rb +1 -1
  30. data/lib/escape_utils/url/rack.rb +0 -12
  31. data/lib/escape_utils/url/uri.rb +11 -7
  32. data/lib/escape_utils/version.rb +1 -1
  33. data/lib/escape_utils/xml/builder.rb +2 -2
  34. data/lib/escape_utils.rb +61 -9
  35. data/test/helper.rb +16 -3
  36. data/test/html/escape_test.rb +66 -42
  37. data/test/html/unescape_test.rb +3 -21
  38. data/test/html_safety_test.rb +1 -27
  39. data/test/javascript/escape_test.rb +53 -20
  40. data/test/javascript/unescape_test.rb +16 -18
  41. data/test/query/escape_test.rb +3 -21
  42. data/test/query/unescape_test.rb +5 -23
  43. data/test/uri/escape_test.rb +16 -18
  44. data/test/uri/unescape_test.rb +17 -19
  45. data/test/uri_component/escape_test.rb +15 -17
  46. data/test/uri_component/unescape_test.rb +17 -19
  47. data/test/url/escape_test.rb +3 -21
  48. data/test/url/unescape_test.rb +5 -23
  49. data/test/xml/escape_test.rb +15 -17
  50. metadata +14 -127
  51. data/.travis.yml +0 -7
  52. data/benchmark/html_escape.rb +0 -68
  53. data/benchmark/html_unescape.rb +0 -35
  54. data/benchmark/url_escape.rb +0 -56
  55. data/benchmark/url_unescape.rb +0 -50
  56. data/ext/escape_utils/houdini_html_u.c +0 -122
@@ -9,11 +9,6 @@
9
9
 
10
10
  static VALUE rb_eEncodingCompatibilityError;
11
11
 
12
- static VALUE eu_new_str(const char *str, size_t len)
13
- {
14
- return rb_enc_str_new(str, len, rb_utf8_encoding());
15
- }
16
-
17
12
  static void check_utf8_encoding(VALUE str)
18
13
  {
19
14
  static rb_encoding *_cached[3] = {NULL, NULL, NULL};
@@ -34,41 +29,6 @@ static void check_utf8_encoding(VALUE str)
34
29
 
35
30
  typedef int (*houdini_cb)(gh_buf *, const uint8_t *, size_t);
36
31
 
37
- static VALUE rb_mEscapeUtils;
38
- static ID ID_at_html_safe, ID_new;
39
-
40
- /**
41
- * html_secure instance variable
42
- */
43
- static int g_html_secure = 1;
44
-
45
- static VALUE rb_eu_set_html_secure(VALUE self, VALUE val)
46
- {
47
- g_html_secure = RTEST(val);
48
- rb_ivar_set(self, rb_intern("@html_secure"), val);
49
- return val;
50
- }
51
-
52
- /**
53
- * html_safe_string_class instance variable
54
- */
55
- static VALUE rb_html_safe_string_class;
56
- static VALUE rb_html_safe_string_template_object;
57
-
58
- static VALUE rb_eu_set_html_safe_string_class(VALUE self, VALUE val)
59
- {
60
- Check_Type(val, T_CLASS);
61
-
62
- if (rb_funcall(val, rb_intern("<="), 1, rb_cString) == Qnil)
63
- rb_raise(rb_eArgError, "%s must be a descendent of String", rb_class2name(val));
64
-
65
- rb_html_safe_string_class = val;
66
- rb_html_safe_string_template_object = rb_class_new_instance(0, NULL, rb_html_safe_string_class);
67
- OBJ_FREEZE(rb_html_safe_string_template_object);
68
- rb_ivar_set(self, rb_intern("@html_safe_string_class"), val);
69
- return val;
70
- }
71
-
72
32
  /**
73
33
  * Generic template
74
34
  */
@@ -78,13 +38,13 @@ rb_eu__generic(VALUE str, houdini_cb do_escape)
78
38
  gh_buf buf = GH_BUF_INIT;
79
39
 
80
40
  if (NIL_P(str))
81
- return eu_new_str("", 0);
41
+ return rb_utf8_str_new("", 0);
82
42
 
83
43
  Check_Type(str, T_STRING);
84
44
  check_utf8_encoding(str);
85
45
 
86
46
  if (do_escape(&buf, (const uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str))) {
87
- VALUE result = eu_new_str(buf.ptr, buf.size);
47
+ VALUE result = rb_utf8_str_new(buf.ptr, buf.size);
88
48
  gh_buf_free(&buf);
89
49
  return result;
90
50
  }
@@ -96,49 +56,15 @@ rb_eu__generic(VALUE str, houdini_cb do_escape)
96
56
  /**
97
57
  * HTML methods
98
58
  */
99
- static VALUE new_html_safe_string(const char *ptr, size_t len)
100
- {
101
- return rb_str_new_with_class(rb_html_safe_string_template_object, ptr, len);
102
- }
103
-
104
- static VALUE rb_eu_escape_html_as_html_safe(VALUE self, VALUE str)
105
- {
106
- VALUE result;
107
- int secure = g_html_secure;
108
- gh_buf buf = GH_BUF_INIT;
109
-
110
- Check_Type(str, T_STRING);
111
- check_utf8_encoding(str);
112
59
 
113
- if (houdini_escape_html0(&buf, (const uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str), secure)) {
114
- result = new_html_safe_string(buf.ptr, buf.size);
115
- gh_buf_free(&buf);
116
- } else {
117
- result = new_html_safe_string(RSTRING_PTR(str), RSTRING_LEN(str));
118
- }
119
-
120
- rb_ivar_set(result, ID_at_html_safe, Qtrue);
121
-
122
- return result;
123
- }
124
-
125
- static VALUE rb_eu_escape_html(int argc, VALUE *argv, VALUE self)
60
+ static VALUE rb_eu_escape_html_once(VALUE self, VALUE str)
126
61
  {
127
- VALUE str, rb_secure;
128
62
  gh_buf buf = GH_BUF_INIT;
129
- int secure = g_html_secure;
130
-
131
- if (rb_scan_args(argc, argv, "11", &str, &rb_secure) == 2) {
132
- if (rb_secure == Qfalse) {
133
- secure = 0;
134
- }
135
- }
136
-
137
63
  Check_Type(str, T_STRING);
138
64
  check_utf8_encoding(str);
139
65
 
140
- if (houdini_escape_html0(&buf, (const uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str), secure)) {
141
- VALUE result = eu_new_str(buf.ptr, buf.size);
66
+ if (houdini_escape_html_once(&buf, (const uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str))) {
67
+ VALUE result = rb_utf8_str_new(buf.ptr, buf.size);
142
68
  gh_buf_free(&buf);
143
69
  return result;
144
70
  }
@@ -146,11 +72,6 @@ static VALUE rb_eu_escape_html(int argc, VALUE *argv, VALUE self)
146
72
  return str;
147
73
  }
148
74
 
149
- static VALUE rb_eu_unescape_html(VALUE self, VALUE str)
150
- {
151
- return rb_eu__generic(str, &houdini_unescape_html);
152
- }
153
-
154
75
 
155
76
  /**
156
77
  * XML methods
@@ -166,7 +87,7 @@ static VALUE rb_eu_escape_xml(VALUE self, VALUE str)
166
87
  */
167
88
  static VALUE rb_eu_escape_js(VALUE self, VALUE str)
168
89
  {
169
- return rb_eu__generic(str, &houdini_escape_js);
90
+ return rb_eu__generic(rb_obj_as_string(str), &houdini_escape_js);
170
91
  }
171
92
 
172
93
  static VALUE rb_eu_unescape_js(VALUE self, VALUE str)
@@ -174,21 +95,6 @@ static VALUE rb_eu_unescape_js(VALUE self, VALUE str)
174
95
  return rb_eu__generic(str, &houdini_unescape_js);
175
96
  }
176
97
 
177
-
178
- /**
179
- * URL methods
180
- */
181
- static VALUE rb_eu_escape_url(VALUE self, VALUE str)
182
- {
183
- return rb_eu__generic(str, &houdini_escape_url);
184
- }
185
-
186
- static VALUE rb_eu_unescape_url(VALUE self, VALUE str)
187
- {
188
- return rb_eu__generic(str, &houdini_unescape_url);
189
- }
190
-
191
-
192
98
  /**
193
99
  * URI methods
194
100
  */
@@ -215,7 +121,6 @@ static VALUE rb_eu_unescape_uri_component(VALUE self, VALUE str)
215
121
  return rb_eu__generic(str, &houdini_unescape_uri_component);
216
122
  }
217
123
 
218
-
219
124
  /**
220
125
  * Ruby Extension initializer
221
126
  */
@@ -224,26 +129,14 @@ void Init_escape_utils()
224
129
  {
225
130
  rb_eEncodingCompatibilityError = rb_const_get(rb_cEncoding, rb_intern("CompatibilityError"));
226
131
 
227
- ID_new = rb_intern("new");
228
- ID_at_html_safe = rb_intern("@html_safe");
229
- rb_global_variable(&rb_html_safe_string_class);
230
- rb_global_variable(&rb_html_safe_string_template_object);
231
-
232
- rb_mEscapeUtils = rb_define_module("EscapeUtils");
233
- rb_define_method(rb_mEscapeUtils, "escape_html_as_html_safe", rb_eu_escape_html_as_html_safe, 1);
234
- rb_define_method(rb_mEscapeUtils, "escape_html", rb_eu_escape_html, -1);
235
- rb_define_method(rb_mEscapeUtils, "unescape_html", rb_eu_unescape_html, 1);
132
+ VALUE rb_mEscapeUtils = rb_define_module("EscapeUtils");
133
+ rb_define_method(rb_mEscapeUtils, "escape_html_once", rb_eu_escape_html_once, 1);
236
134
  rb_define_method(rb_mEscapeUtils, "escape_xml", rb_eu_escape_xml, 1);
237
135
  rb_define_method(rb_mEscapeUtils, "escape_javascript", rb_eu_escape_js, 1);
238
136
  rb_define_method(rb_mEscapeUtils, "unescape_javascript", rb_eu_unescape_js, 1);
239
- rb_define_method(rb_mEscapeUtils, "escape_url", rb_eu_escape_url, 1);
240
- rb_define_method(rb_mEscapeUtils, "unescape_url", rb_eu_unescape_url, 1);
241
137
  rb_define_method(rb_mEscapeUtils, "escape_uri", rb_eu_escape_uri, 1);
242
138
  rb_define_method(rb_mEscapeUtils, "unescape_uri", rb_eu_unescape_uri, 1);
243
139
  rb_define_method(rb_mEscapeUtils, "escape_uri_component", rb_eu_escape_uri_component, 1);
244
140
  rb_define_method(rb_mEscapeUtils, "unescape_uri_component", rb_eu_unescape_uri_component, 1);
245
-
246
- rb_define_singleton_method(rb_mEscapeUtils, "html_secure=", rb_eu_set_html_secure, 1);
247
- rb_define_singleton_method(rb_mEscapeUtils, "html_safe_string_class=", rb_eu_set_html_safe_string_class, 1);
248
141
  }
249
142
 
@@ -22,20 +22,18 @@ extern "C" {
22
22
  # define _isdigit(c) ((c) >= '0' && (c) <= '9')
23
23
  #endif
24
24
 
25
+ #define _isasciialpha(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
26
+
25
27
  #define HOUDINI_ESCAPED_SIZE(x) (((x) * 12) / 10)
26
28
  #define HOUDINI_UNESCAPED_SIZE(x) (x)
27
29
 
28
- extern int houdini_escape_html(gh_buf *ob, const uint8_t *src, size_t size);
29
- extern int houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure);
30
- extern int houdini_unescape_html(gh_buf *ob, const uint8_t *src, size_t size);
30
+ extern int houdini_escape_html_once(gh_buf *ob, const uint8_t *src, size_t size);
31
31
  extern int houdini_escape_xml(gh_buf *ob, const uint8_t *src, size_t size);
32
32
  extern int houdini_escape_uri(gh_buf *ob, const uint8_t *src, size_t size);
33
33
  extern int houdini_escape_uri_component(gh_buf *ob, const uint8_t *src, size_t size);
34
- extern int houdini_escape_url(gh_buf *ob, const uint8_t *src, size_t size);
35
34
  extern int houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size);
36
35
  extern int houdini_unescape_uri(gh_buf *ob, const uint8_t *src, size_t size);
37
36
  extern int houdini_unescape_uri_component(gh_buf *ob, const uint8_t *src, size_t size);
38
- extern int houdini_unescape_url(gh_buf *ob, const uint8_t *src, size_t size);
39
37
  extern int houdini_escape_js(gh_buf *ob, const uint8_t *src, size_t size);
40
38
  extern int houdini_unescape_js(gh_buf *ob, const uint8_t *src, size_t size);
41
39
 
@@ -18,8 +18,8 @@
18
18
  static const char HTML_ESCAPE_TABLE[] = {
19
19
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20
20
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21
- 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
22
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0,
21
+ 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0,
22
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 5, 0,
23
23
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
24
24
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
25
25
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -35,24 +35,64 @@ static const char HTML_ESCAPE_TABLE[] = {
35
35
  };
36
36
 
37
37
  static const char *HTML_ESCAPES[] = {
38
- "",
39
- "&quot;",
40
- "&amp;",
41
- "&#39;",
42
- "&#47;",
43
- "&lt;",
44
- "&gt;"
38
+ "",
39
+ "&quot;",
40
+ "&amp;",
41
+ "&#39;",
42
+ "&lt;",
43
+ "&gt;"
45
44
  };
46
45
 
46
+ static const int HTML_ESCAPES_LENGTHS[] = {
47
+ 0,
48
+ 6,
49
+ 5,
50
+ 5,
51
+ 4,
52
+ 4
53
+ };
54
+
55
+ static int
56
+ is_entity(const uint8_t *src, size_t size)
57
+ {
58
+ size_t i = 0;
59
+
60
+ if (size == 0 || src[0] != '&')
61
+ return false;
62
+
63
+ if (size > 16)
64
+ size = 16;
65
+
66
+ if (size >= 4 && src[1] == '#') {
67
+ if (_isdigit(src[2])) {
68
+ for (i = 3; i < size && _isdigit(src[i]); ++i);
69
+ }
70
+ else if ((src[2] == 'x' || src[2] == 'X') && _isxdigit(src[3])) {
71
+ for (i = 4; i < size && _isxdigit(src[i]); ++i);
72
+ }
73
+ else return false;
74
+ }
75
+ else {
76
+ for (i = 1; i < size && _isasciialpha(src[i]); ++i);
77
+ if (i == 1) return false;
78
+ }
79
+
80
+ return i < size && src[i] == ';';
81
+ }
82
+
47
83
  int
48
- houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure)
84
+ houdini_escape_html_once(gh_buf *ob, const uint8_t *src, size_t size)
49
85
  {
50
86
  size_t i = 0, org, esc = 0;
51
87
 
52
88
  while (i < size) {
53
89
  org = i;
54
- while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
90
+ while (i < size) {
91
+ esc = HTML_ESCAPE_TABLE[src[i]];
92
+ if (unlikely(esc != 0) && !is_entity(src + i, size - i))
93
+ break;
55
94
  i++;
95
+ }
56
96
 
57
97
  if (i > org) {
58
98
  if (unlikely(org == 0)) {
@@ -69,22 +109,10 @@ houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure)
69
109
  if (unlikely(i >= size))
70
110
  break;
71
111
 
72
- /* The forward slash is only escaped in secure mode */
73
- if (src[i] == '/' && !secure) {
74
- gh_buf_putc(ob, '/');
75
- } else {
76
- gh_buf_puts(ob, HTML_ESCAPES[esc]);
77
- }
112
+ gh_buf_put(ob, HTML_ESCAPES[esc], HTML_ESCAPES_LENGTHS[esc]);
78
113
 
79
114
  i++;
80
115
  }
81
116
 
82
117
  return 1;
83
118
  }
84
-
85
- int
86
- houdini_escape_html(gh_buf *ob, const uint8_t *src, size_t size)
87
- {
88
- return houdini_escape_html0(ob, src, size, 1);
89
- }
90
-
@@ -7,10 +7,11 @@
7
7
  static const char JS_ESCAPE[] = {
8
8
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
9
9
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10
- 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
10
+ 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
11
11
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12
12
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13
13
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
14
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14
15
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
15
16
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16
17
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -18,8 +19,7 @@ static const char JS_ESCAPE[] = {
18
19
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
19
20
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20
21
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
22
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
22
+ 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
23
23
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
24
24
  };
25
25
 
@@ -51,6 +51,18 @@ houdini_escape_js(gh_buf *ob, const uint8_t *src, size_t size)
51
51
  ch = src[i];
52
52
 
53
53
  switch (ch) {
54
+ case 226:
55
+ if (i + 2 < size && src[i + 1] == 128) {
56
+ if (src[i + 2] == 168) {
57
+ gh_buf_put(ob, "&#x2028;", 8);
58
+ i += 2;
59
+ } else if (src[i + 2] == 169) {
60
+ gh_buf_put(ob, "&#x2029;", 8);
61
+ i += 2;
62
+ }
63
+ }
64
+ break;
65
+
54
66
  case '/':
55
67
  /*
56
68
  * Escape only if preceded by a lt
@@ -12,7 +12,7 @@ static const char URL_SAFE[] = {
12
12
  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13
13
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
14
14
  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
15
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
15
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0,
16
16
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17
17
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18
18
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -44,7 +44,7 @@ static const char URI_SAFE[] = {
44
44
 
45
45
  static int
46
46
  escape(gh_buf *ob, const uint8_t *src, size_t size,
47
- const char *safe_table, bool escape_plus)
47
+ const char *safe_table)
48
48
  {
49
49
  static const uint8_t hex_chars[] = "0123456789ABCDEF";
50
50
 
@@ -73,13 +73,9 @@ escape(gh_buf *ob, const uint8_t *src, size_t size,
73
73
  if (i >= size)
74
74
  break;
75
75
 
76
- if (src[i] == ' ' && escape_plus) {
77
- gh_buf_putc(ob, '+');
78
- } else {
79
- hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
80
- hex_str[2] = hex_chars[src[i] & 0xF];
81
- gh_buf_put(ob, hex_str, 3);
82
- }
76
+ hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
77
+ hex_str[2] = hex_chars[src[i] & 0xF];
78
+ gh_buf_put(ob, hex_str, 3);
83
79
 
84
80
  i++;
85
81
  }
@@ -90,18 +86,11 @@ escape(gh_buf *ob, const uint8_t *src, size_t size,
90
86
  int
91
87
  houdini_escape_uri(gh_buf *ob, const uint8_t *src, size_t size)
92
88
  {
93
- return escape(ob, src, size, URI_SAFE, false);
89
+ return escape(ob, src, size, URI_SAFE);
94
90
  }
95
91
 
96
92
  int
97
93
  houdini_escape_uri_component(gh_buf *ob, const uint8_t *src, size_t size)
98
94
  {
99
- return escape(ob, src, size, URL_SAFE, false);
95
+ return escape(ob, src, size, URL_SAFE);
100
96
  }
101
-
102
- int
103
- houdini_escape_url(gh_buf *ob, const uint8_t *src, size_t size)
104
- {
105
- return escape(ob, src, size, URL_SAFE, true);
106
- }
107
-
@@ -7,13 +7,13 @@
7
7
  #define hex2c(c) ((c | 32) % 39 - 9)
8
8
 
9
9
  static int
10
- unescape(gh_buf *ob, const uint8_t *src, size_t size, bool unescape_plus)
10
+ unescape(gh_buf *ob, const uint8_t *src, size_t size)
11
11
  {
12
12
  size_t i = 0, org;
13
13
 
14
14
  while (i < size) {
15
15
  org = i;
16
- while (i < size && src[i] != '%' && src[i] != '+')
16
+ while (i < size && src[i] != '%')
17
17
  i++;
18
18
 
19
19
  if (likely(i > org)) {
@@ -31,11 +31,7 @@ unescape(gh_buf *ob, const uint8_t *src, size_t size, bool unescape_plus)
31
31
  if (i >= size)
32
32
  break;
33
33
 
34
- if (src[i++] == '+') {
35
- gh_buf_putc(ob, unescape_plus ? ' ' : '+');
36
- continue;
37
- }
38
-
34
+ i++;
39
35
  if (i + 1 < size && _isxdigit(src[i]) && _isxdigit(src[i + 1])) {
40
36
  unsigned char new_char = (hex2c(src[i]) << 4) + hex2c(src[i + 1]);
41
37
  gh_buf_putc(ob, new_char);
@@ -51,18 +47,12 @@ unescape(gh_buf *ob, const uint8_t *src, size_t size, bool unescape_plus)
51
47
  int
52
48
  houdini_unescape_uri(gh_buf *ob, const uint8_t *src, size_t size)
53
49
  {
54
- return unescape(ob, src, size, false);
50
+ return unescape(ob, src, size);
55
51
  }
56
52
 
57
53
  int
58
54
  houdini_unescape_uri_component(gh_buf *ob, const uint8_t *src, size_t size)
59
55
  {
60
- return unescape(ob, src, size, false);
61
- }
62
-
63
- int
64
- houdini_unescape_url(gh_buf *ob, const uint8_t *src, size_t size)
65
- {
66
- return unescape(ob, src, size, true);
56
+ return unescape(ob, src, size);
67
57
  }
68
58
 
@@ -25,6 +25,20 @@ static const char *LOOKUP_CODES[] = {
25
25
  "&gt;"
26
26
  };
27
27
 
28
+ static const int LOOKUP_CODES_LENGTHS[] = {
29
+ 0,
30
+ 0,
31
+ 0,
32
+ 0,
33
+ 0,
34
+ 1,
35
+ 6,
36
+ 5,
37
+ 6,
38
+ 4,
39
+ 4
40
+ };
41
+
28
42
  static const char CODE_INVALID = 5;
29
43
 
30
44
  static const char XML_LOOKUP_TABLE[] = {
@@ -129,7 +143,7 @@ houdini_escape_xml(gh_buf *ob, const uint8_t *src, size_t size)
129
143
  if (end >= size)
130
144
  break;
131
145
 
132
- gh_buf_puts(ob, LOOKUP_CODES[code]);
146
+ gh_buf_put(ob, LOOKUP_CODES[code], LOOKUP_CODES_LENGTHS[code]);
133
147
  }
134
148
 
135
149
  return 1;
@@ -1,11 +1,13 @@
1
- class CGI
2
- extend ::EscapeUtils::HtmlSafety
3
-
4
- class << self
5
- alias escapeHTML _escape_html
1
+ module EscapeUtils
2
+ module CGIHtmlSafety
3
+ def escapeHTML(html)
4
+ ::EscapeUtils::HtmlSafety.escape_once(html) { |s| super(s) }
5
+ end
6
6
 
7
- def unescapeHTML(s)
8
- EscapeUtils.unescape_html(s.to_s)
7
+ def unescapeHTML(html)
8
+ super(html.to_s)
9
9
  end
10
10
  end
11
- end
11
+ end
12
+
13
+ CGI.singleton_class.prepend(EscapeUtils::CGIHtmlSafety)
@@ -1,10 +1 @@
1
- class ERB
2
- module Util
3
- include ::EscapeUtils::HtmlSafety
4
-
5
- alias html_escape _escape_html
6
- alias h html_escape
7
- module_function :h
8
- module_function :html_escape
9
- end
10
- end
1
+ require 'escape_utils/html/cgi' # ERB delegates to EscapeUtils.escapeHTML
@@ -1,7 +1 @@
1
- module Haml
2
- module Helpers
3
- include ::EscapeUtils::HtmlSafety
4
-
5
- alias html_escape _escape_html
6
- end
7
- end
1
+ require 'escape_utils/html/cgi' # HAML delegates to EscapeUtils.escapeHTML
@@ -1,8 +1,8 @@
1
1
  module Rack
2
2
  module Utils
3
- include ::EscapeUtils::HtmlSafety
4
-
5
- alias escape_html _escape_html
3
+ def escape_html(html)
4
+ ::EscapeUtils::HtmlSafety.escape_once(html) { |s| CGI.escapeHTML(s) }
5
+ end
6
6
  module_function :escape_html
7
7
  end
8
8
  end
@@ -1,6 +1,15 @@
1
1
  module EscapeUtils
2
2
  module HtmlSafety
3
3
  if "".respond_to? :html_safe?
4
+ def self.escape_once(s)
5
+ s = s.to_s
6
+ if s.html_safe?
7
+ s.html_safe
8
+ else
9
+ yield(s).html_safe
10
+ end
11
+ end
12
+
4
13
  def _escape_html(s)
5
14
  if s.html_safe?
6
15
  s.to_s.html_safe
@@ -9,6 +18,10 @@ module EscapeUtils
9
18
  end
10
19
  end
11
20
  else
21
+ def self.escape_once(s)
22
+ yield s.to_s
23
+ end
24
+
12
25
  def _escape_html(s)
13
26
  EscapeUtils.escape_html(s.to_s)
14
27
  end
@@ -1,8 +0,0 @@
1
- class CGI
2
- def self.escape(s)
3
- EscapeUtils.escape_url(s.to_s)
4
- end
5
- def self.unescape(s)
6
- EscapeUtils.unescape_url(s.to_s)
7
- end
8
- end
@@ -1,7 +1,7 @@
1
1
  class ERB
2
2
  module Util
3
3
  def url_encode(s)
4
- EscapeUtils.escape_url(s.to_s)
4
+ EscapeUtils.escape_uri(s.to_s)
5
5
  end
6
6
  alias u url_encode
7
7
  module_function :u
@@ -1,12 +0,0 @@
1
- module Rack
2
- module Utils
3
- def escape(url)
4
- EscapeUtils.escape_url(url.to_s)
5
- end
6
- def unescape(url)
7
- EscapeUtils.unescape_url(url.to_s)
8
- end
9
- module_function :escape
10
- module_function :unescape
11
- end
12
- end
@@ -1,8 +1,12 @@
1
- module URI
2
- def self.escape(s, unsafe=nil)
3
- EscapeUtils.escape_uri(s.to_s)
1
+ require 'uri'
2
+
3
+ if URI.respond_to?(:escape) # Was removed in Ruby 3.0. Let's not bring it back
4
+ module URI
5
+ def self.escape(s, unsafe=nil)
6
+ EscapeUtils.escape_uri(s.to_s)
7
+ end
8
+ def self.unescape(s)
9
+ EscapeUtils.unescape_uri(s.to_s)
10
+ end
4
11
  end
5
- def self.unescape(s)
6
- EscapeUtils.unescape_uri(s.to_s)
7
- end
8
- end
12
+ end
@@ -1,3 +1,3 @@
1
1
  module EscapeUtils
2
- VERSION = "1.2.0"
2
+ VERSION = "1.3.0"
3
3
  end
@@ -1,8 +1,8 @@
1
1
  module Builder
2
- class XmlBase < BlankSlate
2
+ class XmlBase
3
3
  private
4
4
  def _escape(text)
5
- EscapeUtils.escape_xml(text.to_s)
5
+ ::EscapeUtils.escape_xml(text.to_s)
6
6
  end
7
7
  end
8
8
  end