escape_utils 1.2.2 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/README.md +47 -88
  4. data/benchmark/html_escape_once.rb +25 -0
  5. data/benchmark/javascript_escape.rb +1 -1
  6. data/benchmark/javascript_unescape.rb +1 -1
  7. data/benchmark/url_decode.rb +28 -0
  8. data/benchmark/url_encode.rb +37 -0
  9. data/benchmark/xml_escape.rb +7 -11
  10. data/ext/escape_utils/escape_utils.c +7 -115
  11. data/ext/escape_utils/houdini.h +3 -5
  12. data/ext/escape_utils/houdini_html_e.c +52 -24
  13. data/ext/escape_utils/houdini_uri_e.c +6 -17
  14. data/ext/escape_utils/houdini_uri_u.c +5 -15
  15. data/ext/escape_utils/houdini_xml_e.c +15 -1
  16. data/lib/escape_utils/html/cgi.rb +10 -8
  17. data/lib/escape_utils/html/erb.rb +1 -10
  18. data/lib/escape_utils/html/haml.rb +1 -7
  19. data/lib/escape_utils/html/rack.rb +3 -3
  20. data/lib/escape_utils/html_safety.rb +13 -0
  21. data/lib/escape_utils/url/cgi.rb +0 -8
  22. data/lib/escape_utils/url/erb.rb +1 -1
  23. data/lib/escape_utils/url/uri.rb +11 -7
  24. data/lib/escape_utils/version.rb +1 -1
  25. data/lib/escape_utils.rb +61 -9
  26. data/test/helper.rb +16 -3
  27. data/test/html/escape_test.rb +41 -39
  28. data/test/html/unescape_test.rb +0 -16
  29. data/test/html_safety_test.rb +1 -27
  30. data/test/javascript/escape_test.rb +0 -5
  31. data/test/query/escape_test.rb +0 -16
  32. data/test/query/unescape_test.rb +2 -18
  33. data/test/uri/unescape_test.rb +2 -2
  34. data/test/uri_component/unescape_test.rb +2 -2
  35. data/test/url/escape_test.rb +0 -16
  36. data/test/url/unescape_test.rb +2 -18
  37. metadata +6 -8
  38. data/benchmark/html_escape.rb +0 -68
  39. data/benchmark/html_unescape.rb +0 -35
  40. data/benchmark/url_escape.rb +0 -56
  41. data/benchmark/url_unescape.rb +0 -50
  42. data/ext/escape_utils/houdini_html_u.c +0 -122
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6f26c8a24ba01ecb130e04ef9af6802ab2deb56455527ccd551ae93839736604
4
- data.tar.gz: 4fc6f51b66cd0c773cf3aea923023ccb66e31cd865e9b89da81a9119540cae05
3
+ metadata.gz: 7b4b256a1ceb8ed2f7e673d5e2081daafb36761d93d3ebe38c6a1b51017f7ed5
4
+ data.tar.gz: 23de0f72e4df0b9ddf1d6ca0063fcac74696dc7da429f4b8eb5200fb90ba6435
5
5
  SHA512:
6
- metadata.gz: f0f6556bd83c1068189d5a3183b2ad1df88bd4a79f46f119ae7fcfdff4339544d6e1654379fe0b64cd6f07865c67eafa83b9caa236e0ac6b23de1814fd808b2e
7
- data.tar.gz: dd5e1b3baf9c594d48f0580f6c71c766e7e341fce332dba8fbd7f0a36d8611bb70e043ef3808c95e9d6a774a71d088e1136ff8025c2ae43e1588f0500d8d04ad
6
+ metadata.gz: 0d009060659e31a0d82073d8d6f870b174fc7647c34686d1168ac65bb6abf066043fb568c161850fb90a00ca0f0e55151874e0509089d62e92fb7c97d2187534
7
+ data.tar.gz: 6395d5b453930debba5b6eee4f5cc2440f2fc72f01d27c45987ecdcb461ff0edfd6ae15935f8ec9f50b010f5f4afcd4e419ee10a2bcdb995652b8be5688a316a
data/CHANGELOG.md CHANGED
@@ -1,5 +1,15 @@
1
1
  # Unreleased
2
2
 
3
+ # 1.3.0
4
+
5
+ - Deprecate `EscapeUtils.escape_url` and `EscapeUtils.unescape_url` given that Ruby 2.5 provides an optimized `CGI.escape` and `CGI.unescape` with mostly similar performance.
6
+ - Don't patch `URI.escape` and `URI.unescape` if they don't already exist.
7
+ - Add `EscapeUtils.escape_html_once` and `EscapeUtils.rb_eu_escape_html_once_as_html_safe` as faster implementations of Rails `escape_once` helper.
8
+ - Deprecate `escape_html` and `escape_html_as_html_safe` given that Ruby 2.5 optimized `GCI.escapeHTML` to be twice faster than the `EscapeUtils` implementation.
9
+ - Deprecate `unescape_html` given that Ruby 2.5 optimized `GCI.unescapeHTML` to be only 40% slower than th `EscapeUtils` implementation.
10
+ - Deprecate `escape_html_as_html_safe` as well.
11
+ - Deprecate `EscapeUtils.html_safe`, there's no reason to escape for slashes `/` in 2022.
12
+
3
13
  # 1.2.2
4
14
 
5
15
  - Update EscapeUtils.escape_javascript to match Rails `escape_javascript`
data/README.md CHANGED
@@ -1,12 +1,12 @@
1
1
  # escape_utils
2
2
 
3
- Being as though we're all html escaping everything these days, why not make it faster?
3
+ `EscapeUtils` used to provide optimized escaping function to replace the slow methods
4
+ provided by Ruby. Since Ruby 2.5, the various `CGI` escape methods have been severely optimized
5
+ and most `EscapeUtils` methods became irrelevant and were deprecated.
4
6
 
5
- For character encoding, the output string's encoding is copied from the input string.
7
+ It however still provide fast escaping and unescaping methods for URL (RFC 3986), Javascript, XML, as well as an "escape HTML once" method.
6
8
 
7
- It has monkey-patches for Rack::Utils, CGI, URI, ERB::Util and Haml and ActionView so you can drop this in and have your app start escaping fast as balls in no time
8
-
9
- It supports HTML, URL, URI and Javascript escaping/unescaping.
9
+ It has monkey-patches for Rack::Utils, URI and ERB::Util so you can drop this in and have your app start escaping fast as balls in no time
10
10
 
11
11
  ## Installing
12
12
 
@@ -22,70 +22,62 @@ escape_utils assumes all input is encoded as valid UTF-8. If you are dealing wit
22
22
 
23
23
 
24
24
  ``` ruby
25
- utf8_string = non_utf8_string.encode('UTF-8')
25
+ utf8_string = non_utf8_string.encode(Encoding::UTF_8)
26
26
  ```
27
27
 
28
28
  ## Usage
29
29
 
30
30
  ### HTML
31
31
 
32
- #### Escaping
33
-
34
- ``` ruby
35
- html = `curl -s http://maps.google.com`
36
- escaped_html = EscapeUtils.escape_html(html)
37
- ```
32
+ As of `escape_utils 1.3.0`, regular HTML escaping methods are deprecated. Ruby 2.5 introduced C implementations for `CGI.escapeHTML` and `CGI.unescapeHTML` which are respectively faster and almost as fast as `EscapeUtils`. Use that instead.
38
33
 
39
- By default escape_utils will escape `/` characters with `/`, but you can disable that by setting `EscapeUtils.html_secure = false`
40
- or per-call by passing `false` as the second parameter to `escape_html` like `EscapeUtils.escape_html(html, false)`
41
-
42
- For more information check out: http://www.owasp.org/index.php/XSS_(Cross_Site_Scripting)_Prevention_Cheat_Sheet#RULE_.231_-_HTML_Escape_Before_Inserting_Untrusted_Data_into_HTML_Element_Content
43
-
44
- #### Unescaping
45
-
46
- ``` ruby
47
- html = `curl -s http://maps.google.com`
48
- escaped_html = EscapeUtils.escape_html(html)
49
- html = EscapeUtils.unescape_html(escaped_html)
50
- ```
34
+ To avoid double-escaping HTML entities, use `EscapeUtils.escape_html_once`.
51
35
 
52
36
  #### Monkey Patches
53
37
 
38
+ Since historically, `HTML` monkey patches changed the return value for `ActiveSupport::SafeBuffer` instances, they are conserved for that purpose only, but they should be considered as deprecated as well.
39
+
54
40
  ``` ruby
55
- require 'escape_utils/html/erb' # to patch ERB::Util
56
41
  require 'escape_utils/html/cgi' # to patch CGI
57
- require 'escape_utils/html/haml' # to patch Haml::Helpers
58
42
  ```
59
43
 
60
44
  ### URL
61
45
 
62
- Use (un)escape_uri to get RFC-compliant escaping (like PHP rawurlencode).
46
+ Use `escape_uri` and `unescape` to get RFC 3986 compliant escaping (like PHP `rawurlencode` or `ERB::Util.url_encode`).
63
47
 
64
- Use (un)escape_url to get CGI escaping (where space is +).
48
+ The difference with `CGI.escape` is that spaces (` `) are encoded as `%20` instead of `+`.
65
49
 
66
50
  #### Escaping
67
51
 
68
52
  ``` ruby
69
53
  url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mcEA~!!#*YH*>@!U"
70
- escaped_url = EscapeUtils.escape_url(url)
54
+ escaped_url = EscapeUtils.escape_uri(url)
71
55
  ```
72
56
 
73
57
  #### Unescaping
74
58
 
75
59
  ``` ruby
76
60
  url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mcEA~!!#*YH*>@!U"
77
- escaped_url = EscapeUtils.escape_url(url)
78
- EscapeUtils.unescape_url(escaped_url) == url # => true
61
+ escaped_url = EscapeUtils.escape_uri(url)
62
+ EscapeUtils.unescape_uri(escaped_uri) == url # => true
79
63
  ```
80
64
 
81
65
  #### Monkey Patches
82
66
 
83
67
  ``` ruby
84
- require 'escape_utils/url/cgi' # to patch CGI
85
68
  require 'escape_utils/url/erb' # to patch ERB::Util
86
69
  require 'escape_utils/url/uri' # to patch URI
87
70
  ```
88
71
 
72
+ Note that `URI.escape` and `URI.unescape` were removed in Ruby 3.0. `'escape_utils/url/uri'` is a noop on Ruby 3+.
73
+
74
+ ### XML
75
+
76
+ ```ruby
77
+ xml = `curl -s 'https://raw.githubusercontent.com/darcyliu/google-styleguide/master/cppguide.xml'`
78
+ escaped_xml = EscapeUtils.escape_xml(xml)
79
+ ```
80
+
89
81
  ### Javascript
90
82
 
91
83
  #### Escaping
@@ -111,87 +103,54 @@ require 'escape_utils/javascript/action_view' # to patch ActionView::Helpers::Ja
111
103
 
112
104
  ## Benchmarks
113
105
 
114
- In my testing, escaping html is around 10-30x faster than the pure ruby implementations in wide use today.
115
- While unescaping html is around 40-100x faster than CGI.unescapeHTML which is also pure ruby.
116
- Escaping Javascript is around 16-30x faster.
106
+ Escaping URL following RFC 3986 is 13-32x faster than the methods provided by Ruby.
107
+
108
+ Escaping Javascript is around 13x faster than Rails `escape_javascript`.
109
+
110
+ `EscapeUtils.escape_html_once` is about 17x faster than Rails `escape_once`.
117
111
 
118
112
  This output is from my laptop using the benchmark scripts in the benchmarks folder.
119
113
 
120
- ### HTML
114
+ ### Javascript
121
115
 
122
116
  #### Escaping
123
117
 
124
118
  ```
125
- Rack::Utils.escape_html
126
- 9.650000 0.090000 9.740000 ( 9.750756)
127
- Haml::Helpers.html_escape
128
- 9.310000 0.110000 9.420000 ( 9.417317)
129
- ERB::Util.html_escape
130
- 5.330000 0.390000 5.720000 ( 5.748394)
131
- CGI.escapeHTML
132
- 5.370000 0.380000 5.750000 ( 5.791344)
133
- FasterHTMLEscape.html_escape
134
- 0.520000 0.010000 0.530000 ( 0.539485)
135
- fast_xs_extra#fast_xs_html
136
- 0.310000 0.030000 0.340000 ( 0.336734)
137
- EscapeUtils.escape_html
138
- 0.200000 0.050000 0.250000 ( 0.258839)
119
+ EscapeUtils.escape_javascript: 1567.5 i/s
120
+ ActionView::Helpers::JavaScriptHelper#escape_javascript: 116.8 i/s - 13.42x (± 0.00) slower
139
121
  ```
140
122
 
141
123
  #### Unescaping
142
124
 
143
125
  ```
144
- CGI.unescapeHTML
145
- 16.520000 0.080000 16.600000 ( 16.853888)
146
- EscapeUtils.unescape_html
147
- 0.120000 0.040000 0.160000 ( 0.162696)
126
+ EscapeUtils.escape_javascript: 2.089k (± 3.0%) i/s - 10.530k in 5.044615s
148
127
  ```
149
128
 
150
- ### Javascript
129
+ I didn't look that hard, but I'm not aware of another ruby library that does Javascript unescaping to benchmark against. Anyone know of any?
130
+
131
+ ### URL
151
132
 
152
133
  #### Escaping
153
134
 
154
135
  ```
155
- ActionView::Helpers::JavaScriptHelper#escape_javascript
156
- 3.810000 0.100000 3.910000 ( 3.925557)
157
- EscapeUtils.escape_javascript
158
- 0.200000 0.040000 0.240000 ( 0.236692)
136
+ EscapeUtils.escape_uri: 4019359.2 i/s
137
+ fast_xs_extra#fast_xs_url: 2435949.2 i/s - 1.65x (± 0.00) slower
138
+ URI::DEFAULT_PARSER.escape: 288800.8 i/s - 13.92x (± 0.00) slower
139
+ ERB::Util.url_encode: 122373.5 i/s - 32.85x (± 0.00) slower
159
140
  ```
160
141
 
161
142
  #### Unescaping
162
143
 
163
- I didn't look that hard, but I'm not aware of another ruby library that does Javascript unescaping to benchmark against. Anyone know of any?
164
-
165
- ### URL
166
-
167
- #### Escaping
168
-
169
144
  ```
170
- ERB::Util.url_encode
171
- 0.520000 0.010000 0.530000 ( 0.529277)
172
- Rack::Utils.escape
173
- 0.460000 0.010000 0.470000 ( 0.466962)
174
- CGI.escape
175
- 0.440000 0.000000 0.440000 ( 0.443017)
176
- URLEscape#escape
177
- 0.040000 0.000000 0.040000 ( 0.045661)
178
- fast_xs_extra#fast_xs_url
179
- 0.010000 0.000000 0.010000 ( 0.015429)
180
- EscapeUtils.escape_url
181
- 0.010000 0.000000 0.010000 ( 0.010843)
145
+ EscapeUtils.unescape_uri: 3866774.5 i/s
146
+ fast_xs_extra#fast_uxs_url: 2438900.7 i/s - 1.59x (± 0.00) slower
182
147
  ```
183
148
 
184
- #### Unescaping
149
+ ### HTML
150
+
151
+ #### Escape once
185
152
 
186
153
  ```
187
- Rack::Utils.unescape
188
- 0.250000 0.010000 0.260000 ( 0.257558)
189
- CGI.unescape
190
- 0.250000 0.000000 0.250000 ( 0.257837)
191
- URLEscape#unescape
192
- 0.040000 0.000000 0.040000 ( 0.031548)
193
- fast_xs_extra#fast_uxs_cgi
194
- 0.010000 0.000000 0.010000 ( 0.006062)
195
- EscapeUtils.unescape_url
196
- 0.000000 0.000000 0.000000 ( 0.005679)
154
+ EscapeUtils.escape_html_once: 2831.5 i/s
155
+ ActionView::Helpers::TagHelper#escape_once: 161.4 i/s - 17.55x (± 0.00) slower
197
156
  ```
@@ -0,0 +1,25 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler/setup'
5
+ require 'benchmark/ips'
6
+
7
+ require 'escape_utils'
8
+ require 'active_support/core_ext/string/output_safety'
9
+
10
+ url = "https://en.wikipedia.org/wiki/Succession_to_the_British_throne"
11
+ html = `curl -s #{url}`
12
+ html = html.force_encoding('utf-8')
13
+ puts "Escaping #{html.bytesize} bytes of html from #{url}"
14
+
15
+ Benchmark.ips do |x|
16
+ x.report "EscapeUtils.escape_html_once" do
17
+ EscapeUtils.escape_html_once(html)
18
+ end
19
+
20
+ x.report "ActionView::Helpers::TagHelper#escape_once" do # Rails expose it as ERB::Util.html_escape_once
21
+ ERB::Util.html_escape_once(html)
22
+ end
23
+
24
+ x.compare!(order: :baseline)
25
+ end
@@ -13,7 +13,7 @@ end
13
13
 
14
14
  url = "http://ajax.googleapis.com/ajax/libs/dojo/1.4.3/dojo/dojo.xd.js.uncompressed.js"
15
15
  javascript = `curl -s #{url}`
16
- javascript = javascript.force_encoding('utf-8') if javascript.respond_to?(:force_encoding)
16
+ javascript = javascript.force_encoding('utf-8')
17
17
  puts "Escaping #{javascript.bytesize} bytes of javascript, from #{url}"
18
18
 
19
19
  Benchmark.ips do |x|
@@ -8,7 +8,7 @@ require 'escape_utils'
8
8
 
9
9
  url = "http://ajax.googleapis.com/ajax/libs/dojo/1.4.3/dojo/dojo.xd.js.uncompressed.js"
10
10
  javascript = `curl -s #{url}`
11
- javascript = javascript.force_encoding('utf-8') if javascript.respond_to?(:force_encoding)
11
+ javascript = javascript.force_encoding('utf-8')
12
12
  escaped_javascript = EscapeUtils.escape_javascript(javascript)
13
13
  puts "Escaping #{escaped_javascript.bytesize} bytes of javascript, from #{url}"
14
14
 
@@ -0,0 +1,28 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler/setup'
5
+ require 'benchmark/ips'
6
+
7
+ require 'rack'
8
+ require 'cgi'
9
+ require 'url_escape'
10
+ require 'fast_xs_extra'
11
+ require 'escape_utils'
12
+
13
+ url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mYHcEA dh435dqUs0moGHeeAJTSLLbdbcbd9ef----,574b95600e9ab7d27eb0bf524ac68c27----"
14
+ url = url.force_encoding('us-ascii')
15
+ escaped_url = EscapeUtils.escape_uri(url)
16
+ puts "Escaping a #{url.bytesize} byte URL"
17
+
18
+ Benchmark.ips do |x|
19
+ x.report "EscapeUtils.unescape_uri" do
20
+ EscapeUtils.unescape_uri(escaped_url)
21
+ end
22
+
23
+ x.report "fast_xs_extra#fast_uxs_url" do
24
+ url.fast_xs_url
25
+ end
26
+
27
+ x.compare!(order: :baseline)
28
+ end
@@ -0,0 +1,37 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler/setup'
5
+ require 'benchmark/ips'
6
+
7
+ require 'rack'
8
+ require 'erb'
9
+ require 'cgi'
10
+ require 'url_escape'
11
+ require 'fast_xs_extra'
12
+ require 'escape_utils'
13
+
14
+ url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mYHcEA dh435dqUs0moGHeeAJTSLLbdbcbd9ef----,574b95600e9ab7d27eb0bf524ac68c27----"
15
+ puts "Escaping a #{url.bytesize} byte URL times"
16
+
17
+ Benchmark.ips do |x|
18
+ x.report "EscapeUtils.escape_uri" do
19
+ EscapeUtils.escape_uri(url)
20
+ end
21
+
22
+ x.report " URI::DEFAULT_PARSER.escape" do
23
+ URI::DEFAULT_PARSER.escape(url)
24
+ end
25
+
26
+ x.report "ERB::Util.url_encode" do |times|
27
+ times.times do
28
+ ERB::Util.url_encode(url)
29
+ end
30
+ end
31
+
32
+ x.report "fast_xs_extra#fast_xs_url" do
33
+ url.fast_xs_url
34
+ end
35
+
36
+ x.compare!(order: :baseline)
37
+ end
@@ -7,23 +7,19 @@ require 'benchmark/ips'
7
7
  require 'fast_xs'
8
8
  require 'escape_utils'
9
9
 
10
- url = "http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml"
10
+ url = "https://raw.githubusercontent.com/darcyliu/google-styleguide/master/cppguide.xml"
11
11
  xml = `curl -s #{url}`
12
- xml = xml.force_encoding('binary') if xml.respond_to?(:force_encoding)
12
+ xml = xml.force_encoding('binary')
13
13
  puts "Escaping #{xml.bytesize} bytes of xml, from #{url}"
14
14
 
15
15
  Benchmark.ips do |x|
16
- x.report "fast_xs" do |times|
17
- times.times do
18
- xml.fast_xs
19
- end
16
+ x.report "EscapeUtils.escape_xml" do
17
+ EscapeUtils.escape_xml(xml)
20
18
  end
21
19
 
22
- x.report "EscapeUtils.escape_xml" do |times|
23
- times.times do
24
- EscapeUtils.escape_xml(xml)
25
- end
20
+ x.report "fast_xs" do
21
+ xml.fast_xs
26
22
  end
27
23
 
28
- x.compare!
24
+ x.compare!(order: :baseline)
29
25
  end
@@ -9,11 +9,6 @@
9
9
 
10
10
  static VALUE rb_eEncodingCompatibilityError;
11
11
 
12
- static VALUE eu_new_str(const char *str, size_t len)
13
- {
14
- return rb_enc_str_new(str, len, rb_utf8_encoding());
15
- }
16
-
17
12
  static void check_utf8_encoding(VALUE str)
18
13
  {
19
14
  static rb_encoding *_cached[3] = {NULL, NULL, NULL};
@@ -34,41 +29,6 @@ static void check_utf8_encoding(VALUE str)
34
29
 
35
30
  typedef int (*houdini_cb)(gh_buf *, const uint8_t *, size_t);
36
31
 
37
- static VALUE rb_mEscapeUtils;
38
- static ID ID_at_html_safe, ID_new;
39
-
40
- /**
41
- * html_secure instance variable
42
- */
43
- static int g_html_secure = 1;
44
-
45
- static VALUE rb_eu_set_html_secure(VALUE self, VALUE val)
46
- {
47
- g_html_secure = RTEST(val);
48
- rb_ivar_set(self, rb_intern("@html_secure"), val);
49
- return val;
50
- }
51
-
52
- /**
53
- * html_safe_string_class instance variable
54
- */
55
- static VALUE rb_html_safe_string_class;
56
- static VALUE rb_html_safe_string_template_object;
57
-
58
- static VALUE rb_eu_set_html_safe_string_class(VALUE self, VALUE val)
59
- {
60
- Check_Type(val, T_CLASS);
61
-
62
- if (rb_funcall(val, rb_intern("<="), 1, rb_cString) == Qnil)
63
- rb_raise(rb_eArgError, "%s must be a descendent of String", rb_class2name(val));
64
-
65
- rb_html_safe_string_class = val;
66
- rb_html_safe_string_template_object = rb_class_new_instance(0, NULL, rb_html_safe_string_class);
67
- OBJ_FREEZE(rb_html_safe_string_template_object);
68
- rb_ivar_set(self, rb_intern("@html_safe_string_class"), val);
69
- return val;
70
- }
71
-
72
32
  /**
73
33
  * Generic template
74
34
  */
@@ -78,13 +38,13 @@ rb_eu__generic(VALUE str, houdini_cb do_escape)
78
38
  gh_buf buf = GH_BUF_INIT;
79
39
 
80
40
  if (NIL_P(str))
81
- return eu_new_str("", 0);
41
+ return rb_utf8_str_new("", 0);
82
42
 
83
43
  Check_Type(str, T_STRING);
84
44
  check_utf8_encoding(str);
85
45
 
86
46
  if (do_escape(&buf, (const uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str))) {
87
- VALUE result = eu_new_str(buf.ptr, buf.size);
47
+ VALUE result = rb_utf8_str_new(buf.ptr, buf.size);
88
48
  gh_buf_free(&buf);
89
49
  return result;
90
50
  }
@@ -96,50 +56,15 @@ rb_eu__generic(VALUE str, houdini_cb do_escape)
96
56
  /**
97
57
  * HTML methods
98
58
  */
99
- static VALUE new_html_safe_string(const char *ptr, size_t len)
100
- {
101
- return rb_str_new_with_class(rb_html_safe_string_template_object, ptr, len);
102
- }
103
-
104
- static VALUE rb_eu_escape_html_as_html_safe(VALUE self, VALUE str)
105
- {
106
- VALUE result;
107
- int secure = g_html_secure;
108
- gh_buf buf = GH_BUF_INIT;
109
-
110
- Check_Type(str, T_STRING);
111
- check_utf8_encoding(str);
112
59
 
113
- if (houdini_escape_html0(&buf, (const uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str), secure)) {
114
- result = new_html_safe_string(buf.ptr, buf.size);
115
- gh_buf_free(&buf);
116
- } else {
117
- result = new_html_safe_string(RSTRING_PTR(str), RSTRING_LEN(str));
118
- }
119
-
120
- rb_ivar_set(result, ID_at_html_safe, Qtrue);
121
- rb_enc_associate(result, rb_enc_get(str));
122
-
123
- return result;
124
- }
125
-
126
- static VALUE rb_eu_escape_html(int argc, VALUE *argv, VALUE self)
60
+ static VALUE rb_eu_escape_html_once(VALUE self, VALUE str)
127
61
  {
128
- VALUE str, rb_secure;
129
62
  gh_buf buf = GH_BUF_INIT;
130
- int secure = g_html_secure;
131
-
132
- if (rb_scan_args(argc, argv, "11", &str, &rb_secure) == 2) {
133
- if (rb_secure == Qfalse) {
134
- secure = 0;
135
- }
136
- }
137
-
138
63
  Check_Type(str, T_STRING);
139
64
  check_utf8_encoding(str);
140
65
 
141
- if (houdini_escape_html0(&buf, (const uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str), secure)) {
142
- VALUE result = eu_new_str(buf.ptr, buf.size);
66
+ if (houdini_escape_html_once(&buf, (const uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str))) {
67
+ VALUE result = rb_utf8_str_new(buf.ptr, buf.size);
143
68
  gh_buf_free(&buf);
144
69
  return result;
145
70
  }
@@ -147,11 +72,6 @@ static VALUE rb_eu_escape_html(int argc, VALUE *argv, VALUE self)
147
72
  return str;
148
73
  }
149
74
 
150
- static VALUE rb_eu_unescape_html(VALUE self, VALUE str)
151
- {
152
- return rb_eu__generic(str, &houdini_unescape_html);
153
- }
154
-
155
75
 
156
76
  /**
157
77
  * XML methods
@@ -175,21 +95,6 @@ static VALUE rb_eu_unescape_js(VALUE self, VALUE str)
175
95
  return rb_eu__generic(str, &houdini_unescape_js);
176
96
  }
177
97
 
178
-
179
- /**
180
- * URL methods
181
- */
182
- static VALUE rb_eu_escape_url(VALUE self, VALUE str)
183
- {
184
- return rb_eu__generic(str, &houdini_escape_url);
185
- }
186
-
187
- static VALUE rb_eu_unescape_url(VALUE self, VALUE str)
188
- {
189
- return rb_eu__generic(str, &houdini_unescape_url);
190
- }
191
-
192
-
193
98
  /**
194
99
  * URI methods
195
100
  */
@@ -216,7 +121,6 @@ static VALUE rb_eu_unescape_uri_component(VALUE self, VALUE str)
216
121
  return rb_eu__generic(str, &houdini_unescape_uri_component);
217
122
  }
218
123
 
219
-
220
124
  /**
221
125
  * Ruby Extension initializer
222
126
  */
@@ -225,26 +129,14 @@ void Init_escape_utils()
225
129
  {
226
130
  rb_eEncodingCompatibilityError = rb_const_get(rb_cEncoding, rb_intern("CompatibilityError"));
227
131
 
228
- ID_new = rb_intern("new");
229
- ID_at_html_safe = rb_intern("@html_safe");
230
- rb_global_variable(&rb_html_safe_string_class);
231
- rb_global_variable(&rb_html_safe_string_template_object);
232
-
233
- rb_mEscapeUtils = rb_define_module("EscapeUtils");
234
- rb_define_method(rb_mEscapeUtils, "escape_html_as_html_safe", rb_eu_escape_html_as_html_safe, 1);
235
- rb_define_method(rb_mEscapeUtils, "escape_html", rb_eu_escape_html, -1);
236
- rb_define_method(rb_mEscapeUtils, "unescape_html", rb_eu_unescape_html, 1);
132
+ VALUE rb_mEscapeUtils = rb_define_module("EscapeUtils");
133
+ rb_define_method(rb_mEscapeUtils, "escape_html_once", rb_eu_escape_html_once, 1);
237
134
  rb_define_method(rb_mEscapeUtils, "escape_xml", rb_eu_escape_xml, 1);
238
135
  rb_define_method(rb_mEscapeUtils, "escape_javascript", rb_eu_escape_js, 1);
239
136
  rb_define_method(rb_mEscapeUtils, "unescape_javascript", rb_eu_unescape_js, 1);
240
- rb_define_method(rb_mEscapeUtils, "escape_url", rb_eu_escape_url, 1);
241
- rb_define_method(rb_mEscapeUtils, "unescape_url", rb_eu_unescape_url, 1);
242
137
  rb_define_method(rb_mEscapeUtils, "escape_uri", rb_eu_escape_uri, 1);
243
138
  rb_define_method(rb_mEscapeUtils, "unescape_uri", rb_eu_unescape_uri, 1);
244
139
  rb_define_method(rb_mEscapeUtils, "escape_uri_component", rb_eu_escape_uri_component, 1);
245
140
  rb_define_method(rb_mEscapeUtils, "unescape_uri_component", rb_eu_unescape_uri_component, 1);
246
-
247
- rb_define_singleton_method(rb_mEscapeUtils, "html_secure=", rb_eu_set_html_secure, 1);
248
- rb_define_singleton_method(rb_mEscapeUtils, "html_safe_string_class=", rb_eu_set_html_safe_string_class, 1);
249
141
  }
250
142
 
@@ -22,20 +22,18 @@ extern "C" {
22
22
  # define _isdigit(c) ((c) >= '0' && (c) <= '9')
23
23
  #endif
24
24
 
25
+ #define _isasciialpha(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
26
+
25
27
  #define HOUDINI_ESCAPED_SIZE(x) (((x) * 12) / 10)
26
28
  #define HOUDINI_UNESCAPED_SIZE(x) (x)
27
29
 
28
- extern int houdini_escape_html(gh_buf *ob, const uint8_t *src, size_t size);
29
- extern int houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure);
30
- extern int houdini_unescape_html(gh_buf *ob, const uint8_t *src, size_t size);
30
+ extern int houdini_escape_html_once(gh_buf *ob, const uint8_t *src, size_t size);
31
31
  extern int houdini_escape_xml(gh_buf *ob, const uint8_t *src, size_t size);
32
32
  extern int houdini_escape_uri(gh_buf *ob, const uint8_t *src, size_t size);
33
33
  extern int houdini_escape_uri_component(gh_buf *ob, const uint8_t *src, size_t size);
34
- extern int houdini_escape_url(gh_buf *ob, const uint8_t *src, size_t size);
35
34
  extern int houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size);
36
35
  extern int houdini_unescape_uri(gh_buf *ob, const uint8_t *src, size_t size);
37
36
  extern int houdini_unescape_uri_component(gh_buf *ob, const uint8_t *src, size_t size);
38
- extern int houdini_unescape_url(gh_buf *ob, const uint8_t *src, size_t size);
39
37
  extern int houdini_escape_js(gh_buf *ob, const uint8_t *src, size_t size);
40
38
  extern int houdini_unescape_js(gh_buf *ob, const uint8_t *src, size_t size);
41
39