escape_utils 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +5 -5
  2. data/.github/workflows/ci.yml +43 -0
  3. data/.gitignore +0 -1
  4. data/CHANGELOG.md +23 -0
  5. data/Gemfile +15 -0
  6. data/README.md +48 -91
  7. data/Rakefile +4 -2
  8. data/benchmark/html_escape_once.rb +25 -0
  9. data/benchmark/javascript_escape.rb +1 -1
  10. data/benchmark/javascript_unescape.rb +1 -1
  11. data/benchmark/url_decode.rb +28 -0
  12. data/benchmark/url_encode.rb +37 -0
  13. data/benchmark/xml_escape.rb +7 -11
  14. data/bin/console +8 -0
  15. data/escape_utils.gemspec +1 -12
  16. data/ext/escape_utils/escape_utils.c +8 -115
  17. data/ext/escape_utils/houdini.h +3 -5
  18. data/ext/escape_utils/houdini_html_e.c +52 -24
  19. data/ext/escape_utils/houdini_js_e.c +15 -3
  20. data/ext/escape_utils/houdini_uri_e.c +7 -18
  21. data/ext/escape_utils/houdini_uri_u.c +5 -15
  22. data/ext/escape_utils/houdini_xml_e.c +15 -1
  23. data/lib/escape_utils/html/cgi.rb +10 -8
  24. data/lib/escape_utils/html/erb.rb +1 -10
  25. data/lib/escape_utils/html/haml.rb +1 -7
  26. data/lib/escape_utils/html/rack.rb +3 -3
  27. data/lib/escape_utils/html_safety.rb +13 -0
  28. data/lib/escape_utils/url/cgi.rb +0 -8
  29. data/lib/escape_utils/url/erb.rb +1 -1
  30. data/lib/escape_utils/url/rack.rb +0 -12
  31. data/lib/escape_utils/url/uri.rb +11 -7
  32. data/lib/escape_utils/version.rb +1 -1
  33. data/lib/escape_utils/xml/builder.rb +2 -2
  34. data/lib/escape_utils.rb +61 -9
  35. data/test/helper.rb +16 -3
  36. data/test/html/escape_test.rb +66 -42
  37. data/test/html/unescape_test.rb +3 -21
  38. data/test/html_safety_test.rb +1 -27
  39. data/test/javascript/escape_test.rb +53 -20
  40. data/test/javascript/unescape_test.rb +16 -18
  41. data/test/query/escape_test.rb +3 -21
  42. data/test/query/unescape_test.rb +5 -23
  43. data/test/uri/escape_test.rb +16 -18
  44. data/test/uri/unescape_test.rb +17 -19
  45. data/test/uri_component/escape_test.rb +15 -17
  46. data/test/uri_component/unescape_test.rb +17 -19
  47. data/test/url/escape_test.rb +3 -21
  48. data/test/url/unescape_test.rb +5 -23
  49. data/test/xml/escape_test.rb +15 -17
  50. metadata +14 -127
  51. data/.travis.yml +0 -7
  52. data/benchmark/html_escape.rb +0 -68
  53. data/benchmark/html_unescape.rb +0 -35
  54. data/benchmark/url_escape.rb +0 -56
  55. data/benchmark/url_unescape.rb +0 -50
  56. data/ext/escape_utils/houdini_html_u.c +0 -122
@@ -9,11 +9,6 @@
9
9
 
10
10
  static VALUE rb_eEncodingCompatibilityError;
11
11
 
12
- static VALUE eu_new_str(const char *str, size_t len)
13
- {
14
- return rb_enc_str_new(str, len, rb_utf8_encoding());
15
- }
16
-
17
12
  static void check_utf8_encoding(VALUE str)
18
13
  {
19
14
  static rb_encoding *_cached[3] = {NULL, NULL, NULL};
@@ -34,41 +29,6 @@ static void check_utf8_encoding(VALUE str)
34
29
 
35
30
  typedef int (*houdini_cb)(gh_buf *, const uint8_t *, size_t);
36
31
 
37
- static VALUE rb_mEscapeUtils;
38
- static ID ID_at_html_safe, ID_new;
39
-
40
- /**
41
- * html_secure instance variable
42
- */
43
- static int g_html_secure = 1;
44
-
45
- static VALUE rb_eu_set_html_secure(VALUE self, VALUE val)
46
- {
47
- g_html_secure = RTEST(val);
48
- rb_ivar_set(self, rb_intern("@html_secure"), val);
49
- return val;
50
- }
51
-
52
- /**
53
- * html_safe_string_class instance variable
54
- */
55
- static VALUE rb_html_safe_string_class;
56
- static VALUE rb_html_safe_string_template_object;
57
-
58
- static VALUE rb_eu_set_html_safe_string_class(VALUE self, VALUE val)
59
- {
60
- Check_Type(val, T_CLASS);
61
-
62
- if (rb_funcall(val, rb_intern("<="), 1, rb_cString) == Qnil)
63
- rb_raise(rb_eArgError, "%s must be a descendent of String", rb_class2name(val));
64
-
65
- rb_html_safe_string_class = val;
66
- rb_html_safe_string_template_object = rb_class_new_instance(0, NULL, rb_html_safe_string_class);
67
- OBJ_FREEZE(rb_html_safe_string_template_object);
68
- rb_ivar_set(self, rb_intern("@html_safe_string_class"), val);
69
- return val;
70
- }
71
-
72
32
  /**
73
33
  * Generic template
74
34
  */
@@ -78,13 +38,13 @@ rb_eu__generic(VALUE str, houdini_cb do_escape)
78
38
  gh_buf buf = GH_BUF_INIT;
79
39
 
80
40
  if (NIL_P(str))
81
- return eu_new_str("", 0);
41
+ return rb_utf8_str_new("", 0);
82
42
 
83
43
  Check_Type(str, T_STRING);
84
44
  check_utf8_encoding(str);
85
45
 
86
46
  if (do_escape(&buf, (const uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str))) {
87
- VALUE result = eu_new_str(buf.ptr, buf.size);
47
+ VALUE result = rb_utf8_str_new(buf.ptr, buf.size);
88
48
  gh_buf_free(&buf);
89
49
  return result;
90
50
  }
@@ -96,49 +56,15 @@ rb_eu__generic(VALUE str, houdini_cb do_escape)
96
56
  /**
97
57
  * HTML methods
98
58
  */
99
- static VALUE new_html_safe_string(const char *ptr, size_t len)
100
- {
101
- return rb_str_new_with_class(rb_html_safe_string_template_object, ptr, len);
102
- }
103
-
104
- static VALUE rb_eu_escape_html_as_html_safe(VALUE self, VALUE str)
105
- {
106
- VALUE result;
107
- int secure = g_html_secure;
108
- gh_buf buf = GH_BUF_INIT;
109
-
110
- Check_Type(str, T_STRING);
111
- check_utf8_encoding(str);
112
59
 
113
- if (houdini_escape_html0(&buf, (const uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str), secure)) {
114
- result = new_html_safe_string(buf.ptr, buf.size);
115
- gh_buf_free(&buf);
116
- } else {
117
- result = new_html_safe_string(RSTRING_PTR(str), RSTRING_LEN(str));
118
- }
119
-
120
- rb_ivar_set(result, ID_at_html_safe, Qtrue);
121
-
122
- return result;
123
- }
124
-
125
- static VALUE rb_eu_escape_html(int argc, VALUE *argv, VALUE self)
60
+ static VALUE rb_eu_escape_html_once(VALUE self, VALUE str)
126
61
  {
127
- VALUE str, rb_secure;
128
62
  gh_buf buf = GH_BUF_INIT;
129
- int secure = g_html_secure;
130
-
131
- if (rb_scan_args(argc, argv, "11", &str, &rb_secure) == 2) {
132
- if (rb_secure == Qfalse) {
133
- secure = 0;
134
- }
135
- }
136
-
137
63
  Check_Type(str, T_STRING);
138
64
  check_utf8_encoding(str);
139
65
 
140
- if (houdini_escape_html0(&buf, (const uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str), secure)) {
141
- VALUE result = eu_new_str(buf.ptr, buf.size);
66
+ if (houdini_escape_html_once(&buf, (const uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str))) {
67
+ VALUE result = rb_utf8_str_new(buf.ptr, buf.size);
142
68
  gh_buf_free(&buf);
143
69
  return result;
144
70
  }
@@ -146,11 +72,6 @@ static VALUE rb_eu_escape_html(int argc, VALUE *argv, VALUE self)
146
72
  return str;
147
73
  }
148
74
 
149
- static VALUE rb_eu_unescape_html(VALUE self, VALUE str)
150
- {
151
- return rb_eu__generic(str, &houdini_unescape_html);
152
- }
153
-
154
75
 
155
76
  /**
156
77
  * XML methods
@@ -166,7 +87,7 @@ static VALUE rb_eu_escape_xml(VALUE self, VALUE str)
166
87
  */
167
88
  static VALUE rb_eu_escape_js(VALUE self, VALUE str)
168
89
  {
169
- return rb_eu__generic(str, &houdini_escape_js);
90
+ return rb_eu__generic(rb_obj_as_string(str), &houdini_escape_js);
170
91
  }
171
92
 
172
93
  static VALUE rb_eu_unescape_js(VALUE self, VALUE str)
@@ -174,21 +95,6 @@ static VALUE rb_eu_unescape_js(VALUE self, VALUE str)
174
95
  return rb_eu__generic(str, &houdini_unescape_js);
175
96
  }
176
97
 
177
-
178
- /**
179
- * URL methods
180
- */
181
- static VALUE rb_eu_escape_url(VALUE self, VALUE str)
182
- {
183
- return rb_eu__generic(str, &houdini_escape_url);
184
- }
185
-
186
- static VALUE rb_eu_unescape_url(VALUE self, VALUE str)
187
- {
188
- return rb_eu__generic(str, &houdini_unescape_url);
189
- }
190
-
191
-
192
98
  /**
193
99
  * URI methods
194
100
  */
@@ -215,7 +121,6 @@ static VALUE rb_eu_unescape_uri_component(VALUE self, VALUE str)
215
121
  return rb_eu__generic(str, &houdini_unescape_uri_component);
216
122
  }
217
123
 
218
-
219
124
  /**
220
125
  * Ruby Extension initializer
221
126
  */
@@ -224,26 +129,14 @@ void Init_escape_utils()
224
129
  {
225
130
  rb_eEncodingCompatibilityError = rb_const_get(rb_cEncoding, rb_intern("CompatibilityError"));
226
131
 
227
- ID_new = rb_intern("new");
228
- ID_at_html_safe = rb_intern("@html_safe");
229
- rb_global_variable(&rb_html_safe_string_class);
230
- rb_global_variable(&rb_html_safe_string_template_object);
231
-
232
- rb_mEscapeUtils = rb_define_module("EscapeUtils");
233
- rb_define_method(rb_mEscapeUtils, "escape_html_as_html_safe", rb_eu_escape_html_as_html_safe, 1);
234
- rb_define_method(rb_mEscapeUtils, "escape_html", rb_eu_escape_html, -1);
235
- rb_define_method(rb_mEscapeUtils, "unescape_html", rb_eu_unescape_html, 1);
132
+ VALUE rb_mEscapeUtils = rb_define_module("EscapeUtils");
133
+ rb_define_method(rb_mEscapeUtils, "escape_html_once", rb_eu_escape_html_once, 1);
236
134
  rb_define_method(rb_mEscapeUtils, "escape_xml", rb_eu_escape_xml, 1);
237
135
  rb_define_method(rb_mEscapeUtils, "escape_javascript", rb_eu_escape_js, 1);
238
136
  rb_define_method(rb_mEscapeUtils, "unescape_javascript", rb_eu_unescape_js, 1);
239
- rb_define_method(rb_mEscapeUtils, "escape_url", rb_eu_escape_url, 1);
240
- rb_define_method(rb_mEscapeUtils, "unescape_url", rb_eu_unescape_url, 1);
241
137
  rb_define_method(rb_mEscapeUtils, "escape_uri", rb_eu_escape_uri, 1);
242
138
  rb_define_method(rb_mEscapeUtils, "unescape_uri", rb_eu_unescape_uri, 1);
243
139
  rb_define_method(rb_mEscapeUtils, "escape_uri_component", rb_eu_escape_uri_component, 1);
244
140
  rb_define_method(rb_mEscapeUtils, "unescape_uri_component", rb_eu_unescape_uri_component, 1);
245
-
246
- rb_define_singleton_method(rb_mEscapeUtils, "html_secure=", rb_eu_set_html_secure, 1);
247
- rb_define_singleton_method(rb_mEscapeUtils, "html_safe_string_class=", rb_eu_set_html_safe_string_class, 1);
248
141
  }
249
142
 
@@ -22,20 +22,18 @@ extern "C" {
22
22
  # define _isdigit(c) ((c) >= '0' && (c) <= '9')
23
23
  #endif
24
24
 
25
+ #define _isasciialpha(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
26
+
25
27
  #define HOUDINI_ESCAPED_SIZE(x) (((x) * 12) / 10)
26
28
  #define HOUDINI_UNESCAPED_SIZE(x) (x)
27
29
 
28
- extern int houdini_escape_html(gh_buf *ob, const uint8_t *src, size_t size);
29
- extern int houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure);
30
- extern int houdini_unescape_html(gh_buf *ob, const uint8_t *src, size_t size);
30
+ extern int houdini_escape_html_once(gh_buf *ob, const uint8_t *src, size_t size);
31
31
  extern int houdini_escape_xml(gh_buf *ob, const uint8_t *src, size_t size);
32
32
  extern int houdini_escape_uri(gh_buf *ob, const uint8_t *src, size_t size);
33
33
  extern int houdini_escape_uri_component(gh_buf *ob, const uint8_t *src, size_t size);
34
- extern int houdini_escape_url(gh_buf *ob, const uint8_t *src, size_t size);
35
34
  extern int houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size);
36
35
  extern int houdini_unescape_uri(gh_buf *ob, const uint8_t *src, size_t size);
37
36
  extern int houdini_unescape_uri_component(gh_buf *ob, const uint8_t *src, size_t size);
38
- extern int houdini_unescape_url(gh_buf *ob, const uint8_t *src, size_t size);
39
37
  extern int houdini_escape_js(gh_buf *ob, const uint8_t *src, size_t size);
40
38
  extern int houdini_unescape_js(gh_buf *ob, const uint8_t *src, size_t size);
41
39
 
@@ -18,8 +18,8 @@
18
18
  static const char HTML_ESCAPE_TABLE[] = {
19
19
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20
20
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21
- 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
22
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0,
21
+ 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0,
22
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 5, 0,
23
23
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
24
24
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
25
25
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -35,24 +35,64 @@ static const char HTML_ESCAPE_TABLE[] = {
35
35
  };
36
36
 
37
37
  static const char *HTML_ESCAPES[] = {
38
- "",
39
- "&quot;",
40
- "&amp;",
41
- "&#39;",
42
- "&#47;",
43
- "&lt;",
44
- "&gt;"
38
+ "",
39
+ "&quot;",
40
+ "&amp;",
41
+ "&#39;",
42
+ "&lt;",
43
+ "&gt;"
45
44
  };
46
45
 
46
+ static const int HTML_ESCAPES_LENGTHS[] = {
47
+ 0,
48
+ 6,
49
+ 5,
50
+ 5,
51
+ 4,
52
+ 4
53
+ };
54
+
55
+ static int
56
+ is_entity(const uint8_t *src, size_t size)
57
+ {
58
+ size_t i = 0;
59
+
60
+ if (size == 0 || src[0] != '&')
61
+ return false;
62
+
63
+ if (size > 16)
64
+ size = 16;
65
+
66
+ if (size >= 4 && src[1] == '#') {
67
+ if (_isdigit(src[2])) {
68
+ for (i = 3; i < size && _isdigit(src[i]); ++i);
69
+ }
70
+ else if ((src[2] == 'x' || src[2] == 'X') && _isxdigit(src[3])) {
71
+ for (i = 4; i < size && _isxdigit(src[i]); ++i);
72
+ }
73
+ else return false;
74
+ }
75
+ else {
76
+ for (i = 1; i < size && _isasciialpha(src[i]); ++i);
77
+ if (i == 1) return false;
78
+ }
79
+
80
+ return i < size && src[i] == ';';
81
+ }
82
+
47
83
  int
48
- houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure)
84
+ houdini_escape_html_once(gh_buf *ob, const uint8_t *src, size_t size)
49
85
  {
50
86
  size_t i = 0, org, esc = 0;
51
87
 
52
88
  while (i < size) {
53
89
  org = i;
54
- while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
90
+ while (i < size) {
91
+ esc = HTML_ESCAPE_TABLE[src[i]];
92
+ if (unlikely(esc != 0) && !is_entity(src + i, size - i))
93
+ break;
55
94
  i++;
95
+ }
56
96
 
57
97
  if (i > org) {
58
98
  if (unlikely(org == 0)) {
@@ -69,22 +109,10 @@ houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure)
69
109
  if (unlikely(i >= size))
70
110
  break;
71
111
 
72
- /* The forward slash is only escaped in secure mode */
73
- if (src[i] == '/' && !secure) {
74
- gh_buf_putc(ob, '/');
75
- } else {
76
- gh_buf_puts(ob, HTML_ESCAPES[esc]);
77
- }
112
+ gh_buf_put(ob, HTML_ESCAPES[esc], HTML_ESCAPES_LENGTHS[esc]);
78
113
 
79
114
  i++;
80
115
  }
81
116
 
82
117
  return 1;
83
118
  }
84
-
85
- int
86
- houdini_escape_html(gh_buf *ob, const uint8_t *src, size_t size)
87
- {
88
- return houdini_escape_html0(ob, src, size, 1);
89
- }
90
-
@@ -7,10 +7,11 @@
7
7
  static const char JS_ESCAPE[] = {
8
8
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
9
9
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10
- 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
10
+ 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
11
11
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12
12
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13
13
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
14
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14
15
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
15
16
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16
17
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -18,8 +19,7 @@ static const char JS_ESCAPE[] = {
18
19
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
19
20
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20
21
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
22
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
22
+ 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
23
23
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
24
24
  };
25
25
 
@@ -51,6 +51,18 @@ houdini_escape_js(gh_buf *ob, const uint8_t *src, size_t size)
51
51
  ch = src[i];
52
52
 
53
53
  switch (ch) {
54
+ case 226:
55
+ if (i + 2 < size && src[i + 1] == 128) {
56
+ if (src[i + 2] == 168) {
57
+ gh_buf_put(ob, "&#x2028;", 8);
58
+ i += 2;
59
+ } else if (src[i + 2] == 169) {
60
+ gh_buf_put(ob, "&#x2029;", 8);
61
+ i += 2;
62
+ }
63
+ }
64
+ break;
65
+
54
66
  case '/':
55
67
  /*
56
68
  * Escape only if preceded by a lt
@@ -12,7 +12,7 @@ static const char URL_SAFE[] = {
12
12
  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13
13
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
14
14
  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
15
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
15
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0,
16
16
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17
17
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18
18
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -44,7 +44,7 @@ static const char URI_SAFE[] = {
44
44
 
45
45
  static int
46
46
  escape(gh_buf *ob, const uint8_t *src, size_t size,
47
- const char *safe_table, bool escape_plus)
47
+ const char *safe_table)
48
48
  {
49
49
  static const uint8_t hex_chars[] = "0123456789ABCDEF";
50
50
 
@@ -73,13 +73,9 @@ escape(gh_buf *ob, const uint8_t *src, size_t size,
73
73
  if (i >= size)
74
74
  break;
75
75
 
76
- if (src[i] == ' ' && escape_plus) {
77
- gh_buf_putc(ob, '+');
78
- } else {
79
- hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
80
- hex_str[2] = hex_chars[src[i] & 0xF];
81
- gh_buf_put(ob, hex_str, 3);
82
- }
76
+ hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
77
+ hex_str[2] = hex_chars[src[i] & 0xF];
78
+ gh_buf_put(ob, hex_str, 3);
83
79
 
84
80
  i++;
85
81
  }
@@ -90,18 +86,11 @@ escape(gh_buf *ob, const uint8_t *src, size_t size,
90
86
  int
91
87
  houdini_escape_uri(gh_buf *ob, const uint8_t *src, size_t size)
92
88
  {
93
- return escape(ob, src, size, URI_SAFE, false);
89
+ return escape(ob, src, size, URI_SAFE);
94
90
  }
95
91
 
96
92
  int
97
93
  houdini_escape_uri_component(gh_buf *ob, const uint8_t *src, size_t size)
98
94
  {
99
- return escape(ob, src, size, URL_SAFE, false);
95
+ return escape(ob, src, size, URL_SAFE);
100
96
  }
101
-
102
- int
103
- houdini_escape_url(gh_buf *ob, const uint8_t *src, size_t size)
104
- {
105
- return escape(ob, src, size, URL_SAFE, true);
106
- }
107
-
@@ -7,13 +7,13 @@
7
7
  #define hex2c(c) ((c | 32) % 39 - 9)
8
8
 
9
9
  static int
10
- unescape(gh_buf *ob, const uint8_t *src, size_t size, bool unescape_plus)
10
+ unescape(gh_buf *ob, const uint8_t *src, size_t size)
11
11
  {
12
12
  size_t i = 0, org;
13
13
 
14
14
  while (i < size) {
15
15
  org = i;
16
- while (i < size && src[i] != '%' && src[i] != '+')
16
+ while (i < size && src[i] != '%')
17
17
  i++;
18
18
 
19
19
  if (likely(i > org)) {
@@ -31,11 +31,7 @@ unescape(gh_buf *ob, const uint8_t *src, size_t size, bool unescape_plus)
31
31
  if (i >= size)
32
32
  break;
33
33
 
34
- if (src[i++] == '+') {
35
- gh_buf_putc(ob, unescape_plus ? ' ' : '+');
36
- continue;
37
- }
38
-
34
+ i++;
39
35
  if (i + 1 < size && _isxdigit(src[i]) && _isxdigit(src[i + 1])) {
40
36
  unsigned char new_char = (hex2c(src[i]) << 4) + hex2c(src[i + 1]);
41
37
  gh_buf_putc(ob, new_char);
@@ -51,18 +47,12 @@ unescape(gh_buf *ob, const uint8_t *src, size_t size, bool unescape_plus)
51
47
  int
52
48
  houdini_unescape_uri(gh_buf *ob, const uint8_t *src, size_t size)
53
49
  {
54
- return unescape(ob, src, size, false);
50
+ return unescape(ob, src, size);
55
51
  }
56
52
 
57
53
  int
58
54
  houdini_unescape_uri_component(gh_buf *ob, const uint8_t *src, size_t size)
59
55
  {
60
- return unescape(ob, src, size, false);
61
- }
62
-
63
- int
64
- houdini_unescape_url(gh_buf *ob, const uint8_t *src, size_t size)
65
- {
66
- return unescape(ob, src, size, true);
56
+ return unescape(ob, src, size);
67
57
  }
68
58
 
@@ -25,6 +25,20 @@ static const char *LOOKUP_CODES[] = {
25
25
  "&gt;"
26
26
  };
27
27
 
28
+ static const int LOOKUP_CODES_LENGTHS[] = {
29
+ 0,
30
+ 0,
31
+ 0,
32
+ 0,
33
+ 0,
34
+ 1,
35
+ 6,
36
+ 5,
37
+ 6,
38
+ 4,
39
+ 4
40
+ };
41
+
28
42
  static const char CODE_INVALID = 5;
29
43
 
30
44
  static const char XML_LOOKUP_TABLE[] = {
@@ -129,7 +143,7 @@ houdini_escape_xml(gh_buf *ob, const uint8_t *src, size_t size)
129
143
  if (end >= size)
130
144
  break;
131
145
 
132
- gh_buf_puts(ob, LOOKUP_CODES[code]);
146
+ gh_buf_put(ob, LOOKUP_CODES[code], LOOKUP_CODES_LENGTHS[code]);
133
147
  }
134
148
 
135
149
  return 1;
@@ -1,11 +1,13 @@
1
- class CGI
2
- extend ::EscapeUtils::HtmlSafety
3
-
4
- class << self
5
- alias escapeHTML _escape_html
1
+ module EscapeUtils
2
+ module CGIHtmlSafety
3
+ def escapeHTML(html)
4
+ ::EscapeUtils::HtmlSafety.escape_once(html) { |s| super(s) }
5
+ end
6
6
 
7
- def unescapeHTML(s)
8
- EscapeUtils.unescape_html(s.to_s)
7
+ def unescapeHTML(html)
8
+ super(html.to_s)
9
9
  end
10
10
  end
11
- end
11
+ end
12
+
13
+ CGI.singleton_class.prepend(EscapeUtils::CGIHtmlSafety)
@@ -1,10 +1 @@
1
- class ERB
2
- module Util
3
- include ::EscapeUtils::HtmlSafety
4
-
5
- alias html_escape _escape_html
6
- alias h html_escape
7
- module_function :h
8
- module_function :html_escape
9
- end
10
- end
1
+ require 'escape_utils/html/cgi' # ERB delegates to EscapeUtils.escapeHTML
@@ -1,7 +1 @@
1
- module Haml
2
- module Helpers
3
- include ::EscapeUtils::HtmlSafety
4
-
5
- alias html_escape _escape_html
6
- end
7
- end
1
+ require 'escape_utils/html/cgi' # HAML delegates to EscapeUtils.escapeHTML
@@ -1,8 +1,8 @@
1
1
  module Rack
2
2
  module Utils
3
- include ::EscapeUtils::HtmlSafety
4
-
5
- alias escape_html _escape_html
3
+ def escape_html(html)
4
+ ::EscapeUtils::HtmlSafety.escape_once(html) { |s| CGI.escapeHTML(s) }
5
+ end
6
6
  module_function :escape_html
7
7
  end
8
8
  end
@@ -1,6 +1,15 @@
1
1
  module EscapeUtils
2
2
  module HtmlSafety
3
3
  if "".respond_to? :html_safe?
4
+ def self.escape_once(s)
5
+ s = s.to_s
6
+ if s.html_safe?
7
+ s.html_safe
8
+ else
9
+ yield(s).html_safe
10
+ end
11
+ end
12
+
4
13
  def _escape_html(s)
5
14
  if s.html_safe?
6
15
  s.to_s.html_safe
@@ -9,6 +18,10 @@ module EscapeUtils
9
18
  end
10
19
  end
11
20
  else
21
+ def self.escape_once(s)
22
+ yield s.to_s
23
+ end
24
+
12
25
  def _escape_html(s)
13
26
  EscapeUtils.escape_html(s.to_s)
14
27
  end
@@ -1,8 +0,0 @@
1
- class CGI
2
- def self.escape(s)
3
- EscapeUtils.escape_url(s.to_s)
4
- end
5
- def self.unescape(s)
6
- EscapeUtils.unescape_url(s.to_s)
7
- end
8
- end
@@ -1,7 +1,7 @@
1
1
  class ERB
2
2
  module Util
3
3
  def url_encode(s)
4
- EscapeUtils.escape_url(s.to_s)
4
+ EscapeUtils.escape_uri(s.to_s)
5
5
  end
6
6
  alias u url_encode
7
7
  module_function :u
@@ -1,12 +0,0 @@
1
- module Rack
2
- module Utils
3
- def escape(url)
4
- EscapeUtils.escape_url(url.to_s)
5
- end
6
- def unescape(url)
7
- EscapeUtils.unescape_url(url.to_s)
8
- end
9
- module_function :escape
10
- module_function :unescape
11
- end
12
- end
@@ -1,8 +1,12 @@
1
- module URI
2
- def self.escape(s, unsafe=nil)
3
- EscapeUtils.escape_uri(s.to_s)
1
+ require 'uri'
2
+
3
+ if URI.respond_to?(:escape) # Was removed in Ruby 3.0. Let's not bring it back
4
+ module URI
5
+ def self.escape(s, unsafe=nil)
6
+ EscapeUtils.escape_uri(s.to_s)
7
+ end
8
+ def self.unescape(s)
9
+ EscapeUtils.unescape_uri(s.to_s)
10
+ end
4
11
  end
5
- def self.unescape(s)
6
- EscapeUtils.unescape_uri(s.to_s)
7
- end
8
- end
12
+ end
@@ -1,3 +1,3 @@
1
1
  module EscapeUtils
2
- VERSION = "1.2.0"
2
+ VERSION = "1.3.0"
3
3
  end
@@ -1,8 +1,8 @@
1
1
  module Builder
2
- class XmlBase < BlankSlate
2
+ class XmlBase
3
3
  private
4
4
  def _escape(text)
5
- EscapeUtils.escape_xml(text.to_s)
5
+ ::EscapeUtils.escape_xml(text.to_s)
6
6
  end
7
7
  end
8
8
  end