escape_utils 1.2.2 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/README.md +47 -88
  4. data/benchmark/html_escape_once.rb +25 -0
  5. data/benchmark/javascript_escape.rb +1 -1
  6. data/benchmark/javascript_unescape.rb +1 -1
  7. data/benchmark/url_decode.rb +28 -0
  8. data/benchmark/url_encode.rb +37 -0
  9. data/benchmark/xml_escape.rb +7 -11
  10. data/ext/escape_utils/escape_utils.c +7 -115
  11. data/ext/escape_utils/houdini.h +3 -5
  12. data/ext/escape_utils/houdini_html_e.c +52 -24
  13. data/ext/escape_utils/houdini_uri_e.c +6 -17
  14. data/ext/escape_utils/houdini_uri_u.c +5 -15
  15. data/ext/escape_utils/houdini_xml_e.c +15 -1
  16. data/lib/escape_utils/html/cgi.rb +10 -8
  17. data/lib/escape_utils/html/erb.rb +1 -10
  18. data/lib/escape_utils/html/haml.rb +1 -7
  19. data/lib/escape_utils/html/rack.rb +3 -3
  20. data/lib/escape_utils/html_safety.rb +13 -0
  21. data/lib/escape_utils/url/cgi.rb +0 -8
  22. data/lib/escape_utils/url/erb.rb +1 -1
  23. data/lib/escape_utils/url/uri.rb +11 -7
  24. data/lib/escape_utils/version.rb +1 -1
  25. data/lib/escape_utils.rb +61 -9
  26. data/test/helper.rb +16 -3
  27. data/test/html/escape_test.rb +41 -39
  28. data/test/html/unescape_test.rb +0 -16
  29. data/test/html_safety_test.rb +1 -27
  30. data/test/javascript/escape_test.rb +0 -5
  31. data/test/query/escape_test.rb +0 -16
  32. data/test/query/unescape_test.rb +2 -18
  33. data/test/uri/unescape_test.rb +2 -2
  34. data/test/uri_component/unescape_test.rb +2 -2
  35. data/test/url/escape_test.rb +0 -16
  36. data/test/url/unescape_test.rb +2 -18
  37. metadata +6 -8
  38. data/benchmark/html_escape.rb +0 -68
  39. data/benchmark/html_unescape.rb +0 -35
  40. data/benchmark/url_escape.rb +0 -56
  41. data/benchmark/url_unescape.rb +0 -50
  42. data/ext/escape_utils/houdini_html_u.c +0 -122
@@ -1,10 +1,5 @@
1
1
  require File.expand_path("../../helper", __FILE__)
2
2
 
3
- require 'active_support'
4
- require 'active_support/json'
5
- require 'action_view'
6
- require 'action_view/helpers'
7
-
8
3
  class JavascriptEscapeTest < Minitest::Test
9
4
  ActiveSupport.escape_html_entities_in_json = true
10
5
 
@@ -25,22 +25,6 @@ class QueryEscapeTest < Minitest::Test
25
25
  assert_equal '%E3%81%BE%E3%81%A4+%E3%82%82%E3%81%A8', EscapeUtils.escape_url(matz_name_sep)
26
26
  end
27
27
 
28
- def test_input_must_be_utf8_or_ascii
29
- str = "a space"
30
-
31
- str.force_encoding 'ISO-8859-1'
32
- assert_raises Encoding::CompatibilityError do
33
- EscapeUtils.escape_url(str)
34
- end
35
-
36
- str.force_encoding 'UTF-8'
37
- begin
38
- EscapeUtils.escape_url(str)
39
- rescue Encoding::CompatibilityError => e
40
- assert_nil e, "#{e.class.name} raised, expected not to"
41
- end
42
- end
43
-
44
28
  def test_return_value_is_tagged_as_utf8
45
29
  str = "a+space"
46
30
  assert_equal Encoding.find('UTF-8'), EscapeUtils.escape_url(str).encoding
@@ -20,29 +20,13 @@ class QueryUnescapeTest < Minitest::Test
20
20
 
21
21
  def test_url_containing_multibyte_characters
22
22
  matz_name = "\xE3\x81\xBE\xE3\x81\xA4\xE3\x82\x82\xE3\x81\xA8" # Matsumoto
23
- matz_name.force_encoding('UTF-8') if matz_name.respond_to?(:force_encoding)
23
+ matz_name.force_encoding('UTF-8')
24
24
  assert_equal matz_name, EscapeUtils.unescape_url('%E3%81%BE%E3%81%A4%E3%82%82%E3%81%A8')
25
25
  matz_name_sep = "\xE3\x81\xBE\xE3\x81\xA4 \xE3\x82\x82\xE3\x81\xA8" # Matsu moto
26
- matz_name_sep.force_encoding('UTF-8') if matz_name_sep.respond_to?(:force_encoding)
26
+ matz_name_sep.force_encoding('UTF-8')
27
27
  assert_equal matz_name_sep, EscapeUtils.unescape_url('%E3%81%BE%E3%81%A4+%E3%82%82%E3%81%A8')
28
28
  end
29
29
 
30
- def test_input_must_be_valid_utf8_or_ascii
31
- escaped = EscapeUtils.unescape_url("a+space")
32
-
33
- escaped.force_encoding 'ISO-8859-1'
34
- assert_raises Encoding::CompatibilityError do
35
- EscapeUtils.unescape_url(escaped)
36
- end
37
-
38
- escaped.force_encoding 'UTF-8'
39
- begin
40
- EscapeUtils.unescape_url(escaped)
41
- rescue Encoding::CompatibilityError => e
42
- assert_nil e, "#{e.class.name} raised, expected not to"
43
- end
44
- end
45
-
46
30
  def test_return_value_is_tagged_as_utf8
47
31
  escaped = EscapeUtils.escape_url("a space")
48
32
  assert_equal Encoding.find('UTF-8'), EscapeUtils.unescape_url(escaped).encoding
@@ -23,10 +23,10 @@ class UriUnescapeTest < Minitest::Test
23
23
 
24
24
  def test_uri_containing_multibyte_charactes
25
25
  matz_name = "\xE3\x81\xBE\xE3\x81\xA4\xE3\x82\x82\xE3\x81\xA8" # Matsumoto
26
- matz_name.force_encoding('UTF-8') if matz_name.respond_to?(:force_encoding)
26
+ matz_name.force_encoding('UTF-8')
27
27
  assert_equal matz_name, EscapeUtils.unescape_uri('%E3%81%BE%E3%81%A4%E3%82%82%E3%81%A8')
28
28
  matz_name_sep = "\xE3\x81\xBE\xE3\x81\xA4 \xE3\x82\x82\xE3\x81\xA8" # Matsu moto
29
- matz_name_sep.force_encoding('UTF-8') if matz_name_sep.respond_to?(:force_encoding)
29
+ matz_name_sep.force_encoding('UTF-8')
30
30
  assert_equal matz_name_sep, EscapeUtils.unescape_uri('%E3%81%BE%E3%81%A4%20%E3%82%82%E3%81%A8')
31
31
  end
32
32
 
@@ -39,10 +39,10 @@ class UriComponentUnescapeTest < Minitest::Test
39
39
 
40
40
  def test_multibyte_characters
41
41
  matz_name = "\xE3\x81\xBE\xE3\x81\xA4\xE3\x82\x82\xE3\x81\xA8" # Matsumoto
42
- matz_name.force_encoding('UTF-8') if matz_name.respond_to?(:force_encoding)
42
+ matz_name.force_encoding('UTF-8')
43
43
  assert_equal matz_name, EscapeUtils.unescape_uri_component('%E3%81%BE%E3%81%A4%E3%82%82%E3%81%A8')
44
44
  matz_name_sep = "\xE3\x81\xBE\xE3\x81\xA4 \xE3\x82\x82\xE3\x81\xA8" # Matsu moto
45
- matz_name_sep.force_encoding('UTF-8') if matz_name_sep.respond_to?(:force_encoding)
45
+ matz_name_sep.force_encoding('UTF-8')
46
46
  assert_equal matz_name_sep, EscapeUtils.unescape_uri_component('%E3%81%BE%E3%81%A4%20%E3%82%82%E3%81%A8')
47
47
  end
48
48
 
@@ -41,22 +41,6 @@ class UrlEscapeTest < Minitest::Test
41
41
  assert_equal "a%2Fslash", EscapeUtils.escape_url("a/slash")
42
42
  end
43
43
 
44
- def test_input_must_be_utf8_or_ascii
45
- str = "fo<o>bar"
46
-
47
- str.force_encoding 'ISO-8859-1'
48
- assert_raises Encoding::CompatibilityError do
49
- EscapeUtils.escape_url(str)
50
- end
51
-
52
- str.force_encoding 'UTF-8'
53
- begin
54
- EscapeUtils.escape_url(str)
55
- rescue Encoding::CompatibilityError => e
56
- assert_nil e, "#{e.class.name} raised, expected not to"
57
- end
58
- end
59
-
60
44
  def test_return_value_is_tagged_as_utf8
61
45
  str = "fo<o>bar"
62
46
  assert_equal Encoding.find('UTF-8'), EscapeUtils.escape_url(str).encoding
@@ -28,10 +28,10 @@ class UrlUnescapeTest < Minitest::Test
28
28
 
29
29
  def test_multibyte_characters
30
30
  matz_name = "\xE3\x81\xBE\xE3\x81\xA4\xE3\x82\x82\xE3\x81\xA8" # Matsumoto
31
- matz_name.force_encoding('UTF-8') if matz_name.respond_to?(:force_encoding)
31
+ matz_name.force_encoding('UTF-8')
32
32
  assert_equal matz_name, EscapeUtils.unescape_url('%E3%81%BE%E3%81%A4%E3%82%82%E3%81%A8')
33
33
  matz_name_sep = "\xE3\x81\xBE\xE3\x81\xA4 \xE3\x82\x82\xE3\x81\xA8" # Matsu moto
34
- matz_name_sep.force_encoding('UTF-8') if matz_name_sep.respond_to?(:force_encoding)
34
+ matz_name_sep.force_encoding('UTF-8')
35
35
  assert_equal matz_name_sep, EscapeUtils.unescape_url('%E3%81%BE%E3%81%A4%20%E3%82%82%E3%81%A8')
36
36
  end
37
37
 
@@ -46,22 +46,6 @@ class UrlUnescapeTest < Minitest::Test
46
46
  end
47
47
  end
48
48
 
49
- def test_input_must_be_valid_utf8_or_ascii
50
- escaped = EscapeUtils.escape_url("fo<o>bar")
51
-
52
- escaped.force_encoding 'ISO-8859-1'
53
- assert_raises Encoding::CompatibilityError do
54
- EscapeUtils.unescape_url(escaped)
55
- end
56
-
57
- escaped.force_encoding 'UTF-8'
58
- begin
59
- EscapeUtils.unescape_url(escaped)
60
- rescue Encoding::CompatibilityError => e
61
- assert_nil e, "#{e.class.name} raised, expected not to"
62
- end
63
- end
64
-
65
49
  def test_return_value_is_tagged_as_utf8
66
50
  escaped = EscapeUtils.escape_url("fo<o>bar")
67
51
  assert_equal Encoding.find('UTF-8'), EscapeUtils.unescape_url(escaped).encoding
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: escape_utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.2
4
+ version: 1.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brian Lopez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-06-07 00:00:00.000000000 Z
11
+ date: 2022-06-16 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Quickly perform HTML, URL, URI and Javascript escaping/unescaping
14
14
  email: seniorlopez@gmail.com
@@ -24,12 +24,11 @@ files:
24
24
  - LICENSE
25
25
  - README.md
26
26
  - Rakefile
27
- - benchmark/html_escape.rb
28
- - benchmark/html_unescape.rb
27
+ - benchmark/html_escape_once.rb
29
28
  - benchmark/javascript_escape.rb
30
29
  - benchmark/javascript_unescape.rb
31
- - benchmark/url_escape.rb
32
- - benchmark/url_unescape.rb
30
+ - benchmark/url_decode.rb
31
+ - benchmark/url_encode.rb
33
32
  - benchmark/xml_escape.rb
34
33
  - bin/console
35
34
  - escape_utils.gemspec
@@ -40,7 +39,6 @@ files:
40
39
  - ext/escape_utils/houdini.h
41
40
  - ext/escape_utils/houdini_href_e.c
42
41
  - ext/escape_utils/houdini_html_e.c
43
- - ext/escape_utils/houdini_html_u.c
44
42
  - ext/escape_utils/houdini_js_e.c
45
43
  - ext/escape_utils/houdini_js_u.c
46
44
  - ext/escape_utils/houdini_uri_e.c
@@ -97,7 +95,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
97
95
  - !ruby/object:Gem::Version
98
96
  version: '0'
99
97
  requirements: []
100
- rubygems_version: 3.3.7
98
+ rubygems_version: 3.1.2
101
99
  signing_key:
102
100
  specification_version: 4
103
101
  summary: Faster string escaping routines for your web apps
@@ -1,68 +0,0 @@
1
- # encoding: utf-8
2
-
3
- require 'rubygems'
4
- require 'bundler/setup'
5
- require 'benchmark/ips'
6
-
7
- require 'rack'
8
- require 'erb'
9
- require 'cgi'
10
- require 'haml'
11
- require 'fast_xs_extra'
12
- require 'escape_utils'
13
-
14
- module HamlBench
15
- extend Haml::Helpers
16
- end
17
-
18
- url = "https://en.wikipedia.org/wiki/Succession_to_the_British_throne"
19
- html = `curl -s #{url}`
20
- html = html.force_encoding('utf-8') if html.respond_to?(:force_encoding)
21
- puts "Escaping #{html.bytesize} bytes of html from #{url}"
22
-
23
- Benchmark.ips do |x|
24
- x.report "Rack::Utils.escape_html" do |times|
25
- times.times do
26
- Rack::Utils.escape_html(html)
27
- end
28
- end
29
-
30
- x.report "Haml::Helpers.html_escape" do |times|
31
- times.times do
32
- HamlBench.html_escape(html)
33
- end
34
- end
35
-
36
- x.report "ERB::Util.html_escape" do |times|
37
- times.times do
38
- ERB::Util.html_escape(html)
39
- end
40
- end
41
-
42
- x.report "CGI.escapeHTML" do |times|
43
- times.times do
44
- CGI.escapeHTML(html)
45
- end
46
- end
47
-
48
- x.report "String#gsub" do |times|
49
- html_escape = { '&' => '&amp;', '>' => '&gt;', '<' => '&lt;', '"' => '&quot;', "'" => '&#39;' }
50
- times.times do
51
- html.gsub(/[&"'><]/, html_escape)
52
- end
53
- end
54
-
55
- x.report "fast_xs_extra#fast_xs_html" do |times|
56
- times.times do
57
- html.fast_xs_html
58
- end
59
- end
60
-
61
- x.report "EscapeUtils.escape_html" do |times|
62
- times.times do
63
- EscapeUtils.escape_html(html)
64
- end
65
- end
66
-
67
- x.compare!
68
- end
@@ -1,35 +0,0 @@
1
- # encoding: utf-8
2
-
3
- require 'rubygems'
4
- require 'bundler/setup'
5
- require 'benchmark/ips'
6
-
7
- require 'cgi'
8
- require 'haml'
9
- require 'escape_utils'
10
-
11
- module HamlBench
12
- extend Haml::Helpers
13
- end
14
-
15
- url = "https://en.wikipedia.org/wiki/Succession_to_the_British_throne"
16
- html = `curl -s #{url}`
17
- html = html.force_encoding('binary') if html.respond_to?(:force_encoding)
18
- escaped_html = EscapeUtils.escape_html(html)
19
- puts "Unescaping #{escaped_html.bytesize} bytes of escaped html, from #{url}"
20
-
21
- Benchmark.ips do |x|
22
- x.report "CGI.unescapeHTML" do |times|
23
- times.times do
24
- CGI.unescapeHTML(escaped_html)
25
- end
26
- end
27
-
28
- x.report "EscapeUtils.unescape_html" do |times|
29
- times.times do
30
- EscapeUtils.unescape_html(escaped_html)
31
- end
32
- end
33
-
34
- x.compare!
35
- end
@@ -1,56 +0,0 @@
1
- # encoding: utf-8
2
-
3
- require 'rubygems'
4
- require 'bundler/setup'
5
- require 'benchmark/ips'
6
-
7
- require 'rack'
8
- require 'erb'
9
- require 'cgi'
10
- require 'url_escape'
11
- require 'fast_xs_extra'
12
- require 'escape_utils'
13
-
14
- url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mYHcEA dh435dqUs0moGHeeAJTSLLbdbcbd9ef----,574b95600e9ab7d27eb0bf524ac68c27----"
15
- url = url.force_encoding('us-ascii') if url.respond_to?(:force_encoding)
16
- puts "Escaping a #{url.bytesize} byte URL times"
17
-
18
- Benchmark.ips do |x|
19
- x.report "ERB::Util.url_encode" do |times|
20
- times.times do
21
- ERB::Util.url_encode(url)
22
- end
23
- end
24
-
25
- x.report "Rack::Utils.escape" do |times|
26
- times.times do
27
- Rack::Utils.escape(url)
28
- end
29
- end
30
-
31
- x.report "CGI.escape" do |times|
32
- times.times do
33
- CGI.escape(url)
34
- end
35
- end
36
-
37
- x.report "URLEscape#escape" do |times|
38
- times.times do
39
- URLEscape.escape(url)
40
- end
41
- end
42
-
43
- x.report "fast_xs_extra#fast_xs_url" do |times|
44
- times.times do
45
- url.fast_xs_url
46
- end
47
- end
48
-
49
- x.report "EscapeUtils.escape_url" do |times|
50
- times.times do
51
- EscapeUtils.escape_url(url)
52
- end
53
- end
54
-
55
- x.compare!
56
- end
@@ -1,50 +0,0 @@
1
- # encoding: utf-8
2
-
3
- require 'rubygems'
4
- require 'bundler/setup'
5
- require 'benchmark/ips'
6
-
7
- require 'rack'
8
- require 'cgi'
9
- require 'url_escape'
10
- require 'fast_xs_extra'
11
- require 'escape_utils'
12
-
13
- url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mYHcEA dh435dqUs0moGHeeAJTSLLbdbcbd9ef----,574b95600e9ab7d27eb0bf524ac68c27----"
14
- url = url.force_encoding('us-ascii') if url.respond_to?(:force_encoding)
15
- escaped_url = EscapeUtils.escape_url(url)
16
- puts "Escaping a #{url.bytesize} byte URL"
17
-
18
- Benchmark.ips do |x|
19
- x.report "Rack::Utils.unescape" do |times|
20
- times.times do
21
- Rack::Utils.unescape(escaped_url)
22
- end
23
- end
24
-
25
- x.report "CGI.unescape" do |times|
26
- times.times do
27
- CGI.unescape(escaped_url)
28
- end
29
- end
30
-
31
- x.report "URLEscape#unescape" do |times|
32
- times.times do
33
- URLEscape.unescape(escaped_url)
34
- end
35
- end
36
-
37
- x.report "fast_xs_extra#fast_uxs_cgi" do |times|
38
- times.times do
39
- url.fast_uxs_cgi
40
- end
41
- end
42
-
43
- x.report "EscapeUtils.unescape_url" do |times|
44
- times.times do
45
- EscapeUtils.unescape_url(escaped_url)
46
- end
47
- end
48
-
49
- x.compare!
50
- end
@@ -1,122 +0,0 @@
1
- #include <assert.h>
2
- #include <stdio.h>
3
- #include <string.h>
4
-
5
- #include "houdini.h"
6
- #include "html_unescape.h"
7
-
8
- static inline void
9
- gh_buf_put_utf8(gh_buf *ob, int c)
10
- {
11
- unsigned char unichar[4];
12
-
13
- if (c < 0x80) {
14
- gh_buf_putc(ob, c);
15
- }
16
- else if (c < 0x800) {
17
- unichar[0] = 192 + (c / 64);
18
- unichar[1] = 128 + (c % 64);
19
- gh_buf_put(ob, unichar, 2);
20
- }
21
- else if (c - 0xd800u < 0x800) {
22
- gh_buf_putc(ob, '?');
23
- }
24
- else if (c < 0x10000) {
25
- unichar[0] = 224 + (c / 4096);
26
- unichar[1] = 128 + (c / 64) % 64;
27
- unichar[2] = 128 + (c % 64);
28
- gh_buf_put(ob, unichar, 3);
29
- }
30
- else if (c < 0x110000) {
31
- unichar[0] = 240 + (c / 262144);
32
- unichar[1] = 128 + (c / 4096) % 64;
33
- unichar[2] = 128 + (c / 64) % 64;
34
- unichar[3] = 128 + (c % 64);
35
- gh_buf_put(ob, unichar, 4);
36
- }
37
- else {
38
- gh_buf_putc(ob, '?');
39
- }
40
- }
41
-
42
- static size_t
43
- unescape_ent(gh_buf *ob, const uint8_t *src, size_t size)
44
- {
45
- size_t i = 0;
46
-
47
- if (size > 3 && src[0] == '#') {
48
- int codepoint = 0;
49
-
50
- if (_isdigit(src[1])) {
51
- for (i = 1; i < size && _isdigit(src[i]); ++i)
52
- codepoint = (codepoint * 10) + (src[i] - '0');
53
- }
54
-
55
- else if (src[1] == 'x' || src[1] == 'X') {
56
- for (i = 2; i < size && _isxdigit(src[i]); ++i)
57
- codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
58
- }
59
-
60
- if (i < size && src[i] == ';' && codepoint) {
61
- gh_buf_put_utf8(ob, codepoint);
62
- return i + 1;
63
- }
64
- }
65
-
66
- else {
67
- if (size > MAX_WORD_LENGTH)
68
- size = MAX_WORD_LENGTH;
69
-
70
- for (i = MIN_WORD_LENGTH; i < size; ++i) {
71
- if (src[i] == ' ')
72
- break;
73
-
74
- if (src[i] == ';') {
75
- const struct html_ent *entity = find_entity((char *)src, i);
76
-
77
- if (entity != NULL) {
78
- gh_buf_put(ob, entity->utf8, entity->utf8_len);
79
- return i + 1;
80
- }
81
-
82
- break;
83
- }
84
- }
85
- }
86
-
87
- gh_buf_putc(ob, '&');
88
- return 0;
89
- }
90
-
91
- int
92
- houdini_unescape_html(gh_buf *ob, const uint8_t *src, size_t size)
93
- {
94
- size_t i = 0, org;
95
-
96
- while (i < size) {
97
- org = i;
98
- while (i < size && src[i] != '&')
99
- i++;
100
-
101
- if (likely(i > org)) {
102
- if (unlikely(org == 0)) {
103
- if (i >= size)
104
- return 0;
105
-
106
- gh_buf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
107
- }
108
-
109
- gh_buf_put(ob, src + org, i - org);
110
- }
111
-
112
- /* escaping */
113
- if (i >= size)
114
- break;
115
-
116
- i++;
117
- i += unescape_ent(ob, src + i, size - i);
118
- }
119
-
120
- return 1;
121
- }
122
-