RubyGems - escape_utils - Versions diffs - 1.2.2 → 1.3.0 - Mend

escape_utils 1.2.2 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +10 -0
data/README.md +47 -88
data/benchmark/html_escape_once.rb +25 -0
data/benchmark/javascript_escape.rb +1 -1
data/benchmark/javascript_unescape.rb +1 -1
data/benchmark/url_decode.rb +28 -0
data/benchmark/url_encode.rb +37 -0
data/benchmark/xml_escape.rb +7 -11
data/ext/escape_utils/escape_utils.c +7 -115
data/ext/escape_utils/houdini.h +3 -5
data/ext/escape_utils/houdini_html_e.c +52 -24
data/ext/escape_utils/houdini_uri_e.c +6 -17
data/ext/escape_utils/houdini_uri_u.c +5 -15
data/ext/escape_utils/houdini_xml_e.c +15 -1
data/lib/escape_utils/html/cgi.rb +10 -8
data/lib/escape_utils/html/erb.rb +1 -10
data/lib/escape_utils/html/haml.rb +1 -7
data/lib/escape_utils/html/rack.rb +3 -3
data/lib/escape_utils/html_safety.rb +13 -0
data/lib/escape_utils/url/cgi.rb +0 -8
data/lib/escape_utils/url/erb.rb +1 -1
data/lib/escape_utils/url/uri.rb +11 -7
data/lib/escape_utils/version.rb +1 -1
data/lib/escape_utils.rb +61 -9
data/test/helper.rb +16 -3
data/test/html/escape_test.rb +41 -39
data/test/html/unescape_test.rb +0 -16
data/test/html_safety_test.rb +1 -27
data/test/javascript/escape_test.rb +0 -5
data/test/query/escape_test.rb +0 -16
data/test/query/unescape_test.rb +2 -18
data/test/uri/unescape_test.rb +2 -2
data/test/uri_component/unescape_test.rb +2 -2
data/test/url/escape_test.rb +0 -16
data/test/url/unescape_test.rb +2 -18
metadata +6 -8
data/benchmark/html_escape.rb +0 -68
data/benchmark/html_unescape.rb +0 -35
data/benchmark/url_escape.rb +0 -56
data/benchmark/url_unescape.rb +0 -50
data/ext/escape_utils/houdini_html_u.c +0 -122

data/test/javascript/escape_test.rb CHANGED Viewed

@@ -1,10 +1,5 @@
 require File.expand_path("../../helper", __FILE__)
-require 'active_support'
-require 'active_support/json'
-require 'action_view'
-require 'action_view/helpers'
 class JavascriptEscapeTest < Minitest::Test
   ActiveSupport.escape_html_entities_in_json = true

data/test/query/escape_test.rb CHANGED Viewed

@@ -25,22 +25,6 @@ class QueryEscapeTest < Minitest::Test
     assert_equal '%E3%81%BE%E3%81%A4+%E3%82%82%E3%81%A8', EscapeUtils.escape_url(matz_name_sep)
   end
-  def test_input_must_be_utf8_or_ascii
-    str = "a space"
-    str.force_encoding 'ISO-8859-1'
-    assert_raises Encoding::CompatibilityError do
-      EscapeUtils.escape_url(str)
-    end
-    str.force_encoding 'UTF-8'
-    begin
-      EscapeUtils.escape_url(str)
-    rescue Encoding::CompatibilityError => e
-      assert_nil e, "#{e.class.name} raised, expected not to"
-    end
-  end
   def test_return_value_is_tagged_as_utf8
     str = "a+space"
     assert_equal Encoding.find('UTF-8'), EscapeUtils.escape_url(str).encoding

data/test/query/unescape_test.rb CHANGED Viewed

@@ -20,29 +20,13 @@ class QueryUnescapeTest < Minitest::Test
   def test_url_containing_multibyte_characters
     matz_name = "\xE3\x81\xBE\xE3\x81\xA4\xE3\x82\x82\xE3\x81\xA8" # Matsumoto
-    matz_name.force_encoding('UTF-8') if matz_name.respond_to?(:force_encoding)
+    matz_name.force_encoding('UTF-8')
     assert_equal matz_name, EscapeUtils.unescape_url('%E3%81%BE%E3%81%A4%E3%82%82%E3%81%A8')
     matz_name_sep = "\xE3\x81\xBE\xE3\x81\xA4 \xE3\x82\x82\xE3\x81\xA8" # Matsu moto
-    matz_name_sep.force_encoding('UTF-8') if matz_name_sep.respond_to?(:force_encoding)
+    matz_name_sep.force_encoding('UTF-8')
     assert_equal matz_name_sep, EscapeUtils.unescape_url('%E3%81%BE%E3%81%A4+%E3%82%82%E3%81%A8')
   end
-  def test_input_must_be_valid_utf8_or_ascii
-    escaped = EscapeUtils.unescape_url("a+space")
-    escaped.force_encoding 'ISO-8859-1'
-    assert_raises Encoding::CompatibilityError do
-      EscapeUtils.unescape_url(escaped)
-    end
-    escaped.force_encoding 'UTF-8'
-    begin
-      EscapeUtils.unescape_url(escaped)
-    rescue Encoding::CompatibilityError => e
-      assert_nil e, "#{e.class.name} raised, expected not to"
-    end
-  end
   def test_return_value_is_tagged_as_utf8
     escaped = EscapeUtils.escape_url("a space")
     assert_equal Encoding.find('UTF-8'), EscapeUtils.unescape_url(escaped).encoding

data/test/uri/unescape_test.rb CHANGED Viewed

@@ -23,10 +23,10 @@ class UriUnescapeTest < Minitest::Test
   def test_uri_containing_multibyte_charactes
     matz_name = "\xE3\x81\xBE\xE3\x81\xA4\xE3\x82\x82\xE3\x81\xA8" # Matsumoto
-    matz_name.force_encoding('UTF-8') if matz_name.respond_to?(:force_encoding)
+    matz_name.force_encoding('UTF-8')
     assert_equal matz_name, EscapeUtils.unescape_uri('%E3%81%BE%E3%81%A4%E3%82%82%E3%81%A8')
     matz_name_sep = "\xE3\x81\xBE\xE3\x81\xA4 \xE3\x82\x82\xE3\x81\xA8" # Matsu moto
-    matz_name_sep.force_encoding('UTF-8') if matz_name_sep.respond_to?(:force_encoding)
+    matz_name_sep.force_encoding('UTF-8')
     assert_equal matz_name_sep, EscapeUtils.unescape_uri('%E3%81%BE%E3%81%A4%20%E3%82%82%E3%81%A8')
   end

data/test/uri_component/unescape_test.rb CHANGED Viewed

@@ -39,10 +39,10 @@ class UriComponentUnescapeTest < Minitest::Test
   def test_multibyte_characters
     matz_name = "\xE3\x81\xBE\xE3\x81\xA4\xE3\x82\x82\xE3\x81\xA8" # Matsumoto
-    matz_name.force_encoding('UTF-8') if matz_name.respond_to?(:force_encoding)
+    matz_name.force_encoding('UTF-8')
     assert_equal matz_name, EscapeUtils.unescape_uri_component('%E3%81%BE%E3%81%A4%E3%82%82%E3%81%A8')
     matz_name_sep = "\xE3\x81\xBE\xE3\x81\xA4 \xE3\x82\x82\xE3\x81\xA8" # Matsu moto
-    matz_name_sep.force_encoding('UTF-8') if matz_name_sep.respond_to?(:force_encoding)
+    matz_name_sep.force_encoding('UTF-8')
     assert_equal matz_name_sep, EscapeUtils.unescape_uri_component('%E3%81%BE%E3%81%A4%20%E3%82%82%E3%81%A8')
   end

data/test/url/escape_test.rb CHANGED Viewed

@@ -41,22 +41,6 @@ class UrlEscapeTest < Minitest::Test
     assert_equal "a%2Fslash", EscapeUtils.escape_url("a/slash")
   end
-  def test_input_must_be_utf8_or_ascii
-    str = "fo<o>bar"
-    str.force_encoding 'ISO-8859-1'
-    assert_raises Encoding::CompatibilityError do
-      EscapeUtils.escape_url(str)
-    end
-    str.force_encoding 'UTF-8'
-    begin
-      EscapeUtils.escape_url(str)
-    rescue Encoding::CompatibilityError => e
-      assert_nil e, "#{e.class.name} raised, expected not to"
-    end
-  end
   def test_return_value_is_tagged_as_utf8
     str = "fo<o>bar"
     assert_equal Encoding.find('UTF-8'), EscapeUtils.escape_url(str).encoding

data/test/url/unescape_test.rb CHANGED Viewed

@@ -28,10 +28,10 @@ class UrlUnescapeTest < Minitest::Test
   def test_multibyte_characters
     matz_name = "\xE3\x81\xBE\xE3\x81\xA4\xE3\x82\x82\xE3\x81\xA8" # Matsumoto
-    matz_name.force_encoding('UTF-8') if matz_name.respond_to?(:force_encoding)
+    matz_name.force_encoding('UTF-8')
     assert_equal matz_name, EscapeUtils.unescape_url('%E3%81%BE%E3%81%A4%E3%82%82%E3%81%A8')
     matz_name_sep = "\xE3\x81\xBE\xE3\x81\xA4 \xE3\x82\x82\xE3\x81\xA8" # Matsu moto
-    matz_name_sep.force_encoding('UTF-8') if matz_name_sep.respond_to?(:force_encoding)
+    matz_name_sep.force_encoding('UTF-8')
     assert_equal matz_name_sep, EscapeUtils.unescape_url('%E3%81%BE%E3%81%A4%20%E3%82%82%E3%81%A8')
   end
@@ -46,22 +46,6 @@ class UrlUnescapeTest < Minitest::Test
     end
   end
-  def test_input_must_be_valid_utf8_or_ascii
-    escaped = EscapeUtils.escape_url("fo<o>bar")
-    escaped.force_encoding 'ISO-8859-1'
-    assert_raises Encoding::CompatibilityError do
-      EscapeUtils.unescape_url(escaped)
-    end
-    escaped.force_encoding 'UTF-8'
-    begin
-      EscapeUtils.unescape_url(escaped)
-    rescue Encoding::CompatibilityError => e
-      assert_nil e, "#{e.class.name} raised, expected not to"
-    end
-  end
   def test_return_value_is_tagged_as_utf8
     escaped = EscapeUtils.escape_url("fo<o>bar")
     assert_equal Encoding.find('UTF-8'), EscapeUtils.unescape_url(escaped).encoding

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: escape_utils
 version: !ruby/object:Gem::Version
-  version: 1.2.2
+  version: 1.3.0
 platform: ruby
 authors:
 - Brian Lopez
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2022-06-07 00:00:00.000000000 Z
+date: 2022-06-16 00:00:00.000000000 Z
 dependencies: []
 description: Quickly perform HTML, URL, URI and Javascript escaping/unescaping
 email: seniorlopez@gmail.com
@@ -24,12 +24,11 @@ files:
 - LICENSE
 - README.md
 - Rakefile
-- benchmark/html_escape.rb
-- benchmark/html_unescape.rb
+- benchmark/html_escape_once.rb
 - benchmark/javascript_escape.rb
 - benchmark/javascript_unescape.rb
-- benchmark/url_escape.rb
-- benchmark/url_unescape.rb
+- benchmark/url_decode.rb
+- benchmark/url_encode.rb
 - benchmark/xml_escape.rb
 - bin/console
 - escape_utils.gemspec
@@ -40,7 +39,6 @@ files:
 - ext/escape_utils/houdini.h
 - ext/escape_utils/houdini_href_e.c
 - ext/escape_utils/houdini_html_e.c
-- ext/escape_utils/houdini_html_u.c
 - ext/escape_utils/houdini_js_e.c
 - ext/escape_utils/houdini_js_u.c
 - ext/escape_utils/houdini_uri_e.c
@@ -97,7 +95,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.3.7
+rubygems_version: 3.1.2
 signing_key:
 specification_version: 4
 summary: Faster string escaping routines for your web apps

data/benchmark/html_escape.rb DELETED Viewed

@@ -1,68 +0,0 @@
-# encoding: utf-8
-require 'rubygems'
-require 'bundler/setup'
-require 'benchmark/ips'
-require 'rack'
-require 'erb'
-require 'cgi'
-require 'haml'
-require 'fast_xs_extra'
-require 'escape_utils'
-module HamlBench
-  extend Haml::Helpers
-end
-url = "https://en.wikipedia.org/wiki/Succession_to_the_British_throne"
-html = `curl -s #{url}`
-html = html.force_encoding('utf-8') if html.respond_to?(:force_encoding)
-puts "Escaping #{html.bytesize} bytes of html from #{url}"
-Benchmark.ips do |x|
-  x.report "Rack::Utils.escape_html" do |times|
-    times.times do
-      Rack::Utils.escape_html(html)
-    end
-  end
-  x.report "Haml::Helpers.html_escape" do |times|
-    times.times do
-      HamlBench.html_escape(html)
-    end
-  end
-  x.report "ERB::Util.html_escape" do |times|
-    times.times do
-      ERB::Util.html_escape(html)
-    end
-  end
-  x.report "CGI.escapeHTML" do |times|
-    times.times do
-      CGI.escapeHTML(html)
-    end
-  end
-  x.report "String#gsub" do |times|
-    html_escape = { '&' => '&amp;',  '>' => '&gt;',   '<' => '&lt;', '"' => '&quot;', "'" => '&#39;' }
-    times.times do
-      html.gsub(/[&"'><]/, html_escape)
-    end
-  end
-  x.report "fast_xs_extra#fast_xs_html" do |times|
-    times.times do
-      html.fast_xs_html
-    end
-  end
-  x.report "EscapeUtils.escape_html" do |times|
-    times.times do
-      EscapeUtils.escape_html(html)
-    end
-  end
-  x.compare!
-end

data/benchmark/html_unescape.rb DELETED Viewed

@@ -1,35 +0,0 @@
-# encoding: utf-8
-require 'rubygems'
-require 'bundler/setup'
-require 'benchmark/ips'
-require 'cgi'
-require 'haml'
-require 'escape_utils'
-module HamlBench
-  extend Haml::Helpers
-end
-url = "https://en.wikipedia.org/wiki/Succession_to_the_British_throne"
-html = `curl -s #{url}`
-html = html.force_encoding('binary') if html.respond_to?(:force_encoding)
-escaped_html = EscapeUtils.escape_html(html)
-puts "Unescaping #{escaped_html.bytesize} bytes of escaped html, from #{url}"
-Benchmark.ips do |x|
-  x.report "CGI.unescapeHTML" do |times|
-    times.times do
-      CGI.unescapeHTML(escaped_html)
-    end
-  end
-  x.report "EscapeUtils.unescape_html" do |times|
-    times.times do
-      EscapeUtils.unescape_html(escaped_html)
-    end
-  end
-  x.compare!
-end

data/benchmark/url_escape.rb DELETED Viewed

@@ -1,56 +0,0 @@
-# encoding: utf-8
-require 'rubygems'
-require 'bundler/setup'
-require 'benchmark/ips'
-require 'rack'
-require 'erb'
-require 'cgi'
-require 'url_escape'
-require 'fast_xs_extra'
-require 'escape_utils'
-url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mYHcEA  dh435dqUs0moGHeeAJTSLLbdbcbd9ef----,574b95600e9ab7d27eb0bf524ac68c27----"
-url = url.force_encoding('us-ascii') if url.respond_to?(:force_encoding)
-puts "Escaping a #{url.bytesize} byte URL times"
-Benchmark.ips do |x|
-  x.report "ERB::Util.url_encode" do |times|
-    times.times do
-      ERB::Util.url_encode(url)
-    end
-  end
-  x.report "Rack::Utils.escape" do |times|
-    times.times do
-      Rack::Utils.escape(url)
-    end
-  end
-  x.report "CGI.escape" do |times|
-    times.times do
-      CGI.escape(url)
-    end
-  end
-  x.report "URLEscape#escape" do |times|
-    times.times do
-      URLEscape.escape(url)
-    end
-  end
-  x.report "fast_xs_extra#fast_xs_url" do |times|
-    times.times do
-      url.fast_xs_url
-    end
-  end
-  x.report "EscapeUtils.escape_url" do |times|
-    times.times do
-      EscapeUtils.escape_url(url)
-    end
-  end
-  x.compare!
-end

data/benchmark/url_unescape.rb DELETED Viewed

@@ -1,50 +0,0 @@
-# encoding: utf-8
-require 'rubygems'
-require 'bundler/setup'
-require 'benchmark/ips'
-require 'rack'
-require 'cgi'
-require 'url_escape'
-require 'fast_xs_extra'
-require 'escape_utils'
-url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mYHcEA  dh435dqUs0moGHeeAJTSLLbdbcbd9ef----,574b95600e9ab7d27eb0bf524ac68c27----"
-url = url.force_encoding('us-ascii') if url.respond_to?(:force_encoding)
-escaped_url = EscapeUtils.escape_url(url)
-puts "Escaping a #{url.bytesize} byte URL"
-Benchmark.ips do |x|
-  x.report "Rack::Utils.unescape" do |times|
-    times.times do
-      Rack::Utils.unescape(escaped_url)
-    end
-  end
-  x.report "CGI.unescape" do |times|
-    times.times do
-      CGI.unescape(escaped_url)
-    end
-  end
-  x.report "URLEscape#unescape" do |times|
-    times.times do
-      URLEscape.unescape(escaped_url)
-    end
-  end
-  x.report "fast_xs_extra#fast_uxs_cgi" do |times|
-    times.times do
-      url.fast_uxs_cgi
-    end
-  end
-  x.report "EscapeUtils.unescape_url" do |times|
-    times.times do
-      EscapeUtils.unescape_url(escaped_url)
-    end
-  end
-  x.compare!
-end

data/ext/escape_utils/houdini_html_u.c DELETED Viewed

@@ -1,122 +0,0 @@
-#include <assert.h>
-#include <stdio.h>
-#include <string.h>
-#include "houdini.h"
-#include "html_unescape.h"
-static inline void
-gh_buf_put_utf8(gh_buf *ob, int c)
-{
-	unsigned char unichar[4];
-	if (c < 0x80) {
-		gh_buf_putc(ob, c);
-	}
-	else if (c < 0x800) {
-		unichar[0] = 192 + (c / 64);
-		unichar[1] = 128 + (c % 64);
-		gh_buf_put(ob, unichar, 2);
-	}
-	else if (c - 0xd800u < 0x800) {
-		gh_buf_putc(ob, '?');
-	}
-	else if (c < 0x10000) {
-		unichar[0] = 224 + (c / 4096);
-		unichar[1] = 128 + (c / 64) % 64;
-		unichar[2] = 128 + (c % 64);
-		gh_buf_put(ob, unichar, 3);
-	}
-	else if (c < 0x110000) {
-		unichar[0] = 240 + (c / 262144);
-		unichar[1] = 128 + (c / 4096) % 64;
-		unichar[2] = 128 + (c / 64) % 64;
-		unichar[3] = 128 + (c % 64);
-		gh_buf_put(ob, unichar, 4);
-	}
-	else {
-		gh_buf_putc(ob, '?');
-	}
-}
-static size_t
-unescape_ent(gh_buf *ob, const uint8_t *src, size_t size)
-{
-	size_t i = 0;
-	if (size > 3 && src[0] == '#') {
-		int codepoint = 0;
-		if (_isdigit(src[1])) {
-			for (i = 1; i < size && _isdigit(src[i]); ++i)
-				codepoint = (codepoint * 10) + (src[i] - '0');
-		}
-		else if (src[1] == 'x' || src[1] == 'X') {
-			for (i = 2; i < size && _isxdigit(src[i]); ++i)
-				codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
-		}
-		if (i < size && src[i] == ';' && codepoint) {
-			gh_buf_put_utf8(ob, codepoint);
-			return i + 1;
-		}
-	}
-	else {
-		if (size > MAX_WORD_LENGTH)
-			size = MAX_WORD_LENGTH;
-		for (i = MIN_WORD_LENGTH; i < size; ++i) {
-			if (src[i] == ' ')
-				break;
-			if (src[i] == ';') {
-				const struct html_ent *entity = find_entity((char *)src, i);
-				if (entity != NULL) {
-					gh_buf_put(ob, entity->utf8, entity->utf8_len);
-					return i + 1;
-				}
-				break;
-			}
-		}
-	}
-	gh_buf_putc(ob, '&');
-	return 0;
-}
-int
-houdini_unescape_html(gh_buf *ob, const uint8_t *src, size_t size)
-{
-	size_t  i = 0, org;
-	while (i < size) {
-		org = i;
-		while (i < size && src[i] != '&')
-			i++;
-		if (likely(i > org)) {
-			if (unlikely(org == 0)) {
-				if (i >= size)
-					return 0;
-				gh_buf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
-			}
-			gh_buf_put(ob, src + org, i - org);
-		}
-		/* escaping */
-		if (i >= size)
-			break;
-		i++;
-		i += unescape_ent(ob, src + i, size - i);
-	}
-	return 1;
-}