RubyGems - escape_escape_escape - Versions diffs - 0.1.0 - Mend

escape_escape_escape 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

checksums.yaml +7 -0
data/.gitignore +25 -0
data/Gemfile +3 -0
data/LICENSE +23 -0
data/LICENSE.txt +23 -0
data/README.md +14 -0
data/VERSION +1 -0
data/escape_escape_escape.gemspec +32 -0
data/lib/beta.rb +270 -0
data/lib/e_e_e.js +258 -0
data/lib/escape_escape_escape.rb +60 -0
data/package.json +31 -0
data/specs/as_json/0001-html.json +23 -0
data/specs/as_json/0010-text.json +29 -0
data/specs/escape_escape_escape.rb +35 -0
data/specs/helpers.rb +4 -0
data/test/sanitize_attrs.js +132 -0
data/test/sanitize_html.js +57 -0
data/test/sanitize_un_escape.js +41 -0
metadata +163 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: f8e9caa2b123a6e7258a68b096860d2d81f6ad48
+  data.tar.gz: 04cb5b7fb058a88dbc3be52cd933f10b34970c3f
+SHA512:
+  metadata.gz: 6181a9de7665728466ac4bbd72375dad6ad632938af5b2c53faaf300717cde58a0cd5ddec57780974608cbdb2a2feb4094fe519cb8e65f7682e9714726e926dc
+  data.tar.gz: 87e14c445c08f606959fa353b7d373d093c9658de519890cdfadcebdc6424a54da442a87ecb2a253c91ea099268f71f6143576cd5cef61b3be4ee06de9013b24

data/.gitignore ADDED Viewed

@@ -0,0 +1,25 @@
+/pids/
+/logs/
+/results/
+/npm-debug.log/
+/node_modules/
+/npm-debug.log
+*.gem
+*.rbc
+.bundle
+.config
+.yardoc
+Gemfile.lock
+InstalledFiles
+_yardoc
+coverage
+doc/
+lib/bundler/man
+pkg
+rdoc
+spec/reports
+test/tmp
+test/version_tmp
+tmp

data/Gemfile ADDED Viewed

@@ -0,0 +1,3 @@
+source 'https://rubygems.org'
+gemspec

data/LICENSE ADDED Viewed

@@ -0,0 +1,23 @@
+Copyright (c) 2013 da99
+Permission is hereby granted, free of charge, to any person
+obtaining a copy of this software and associated documentation
+files (the "Software"), to deal in the Software without
+restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following
+conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.

data/LICENSE.txt ADDED Viewed

@@ -0,0 +1,23 @@
+Copyright (c) 2014 da99
+MIT License
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/README.md ADDED Viewed

@@ -0,0 +1,14 @@
+What is it?
+====================
+My way of escaping and sanitizing HTML.
+Use:
+=====================
+    // npm install escape_escape_escape
+    var E = require("escape_escape_escape").Sanitize.html;
+    E("The <strong>brave</strong> and the <b>bold</b>.");

data/VERSION ADDED Viewed

	@@ -0,0 +1 @@
1	+ 0.1.0

data/escape_escape_escape.gemspec ADDED Viewed

@@ -0,0 +1,32 @@
+# coding: utf-8
+lib = File.expand_path('../lib', __FILE__)
+$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
+Gem::Specification.new do |spec|
+  spec.name          = "escape_escape_escape"
+  spec.version       = `cat VERSION`
+  spec.authors       = ["da99"]
+  spec.email         = ["i-hate-spam-1234567@mailinator.com"]
+  spec.summary       = %q{My way of escaping/encoding HTML.}
+  spec.description   = %q{
+    My way of escaping/encoding HTML with the proper entities.
+  }
+  spec.homepage      = "https://github.com/da99/escape_escape_escape"
+  spec.license       = "MIT"
+  spec.files         = `git ls-files -z`.split("\x0").reject { |file|
+    file.index('bin/') == 0 && file != "bin/#{File.basename Dir.pwd}"
+  }
+  spec.executables   = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
+  spec.test_files    = spec.files.grep(%r{^(test|spec|features)/})
+  spec.require_paths = ["lib"]
+  spec.add_dependency "sanitize"     , "~> 3.0"
+  spec.add_development_dependency "pry"           , "~> 0.9"
+  spec.add_development_dependency "rake"          , "~> 10.3"
+  spec.add_development_dependency "bundler"       , "~> 1.5"
+  spec.add_development_dependency "bacon"         , "~> 1.0"
+  spec.add_development_dependency "Bacon_Colored" , "~> 0.1"
+  spec.add_development_dependency "multi_json"    , "~> 1.10"
+end

data/lib/beta.rb ADDED Viewed

@@ -0,0 +1,270 @@
+# === Important Gems ===
+require 'cgi' # Don't use URI.escape because it does not escape all invalid characters.
+require 'htmlentities'
+require 'loofah'
+require "addressable/uri"
+require "escape_utils"
+require "htmlentities"
+require "uri"
+def Escape_Escape_Escape s
+  Escape_Escape_Escape.escape(s)
+end
+class Escape_Escape_Escape
+  Coder = HTMLEntities.new(:xhtml1)
+  ENCODING_OPTIONS_CLEAN_UTF8 = {
+    :invalid => :replace, # Replace invalid byte sequences
+    :undef => :replace, # Replace anything not defined in ASCII
+    :replace => '' # Use a blank for those replacements
+    # :newline => :universal
+    # :universal_newline => true # Always break lines with \n, not \r\n
+  }
+  opts = Regexp::FIXEDENCODING | Regexp::IGNORECASE
+  # tabs, etc.
+  Control = Regexp.new("[[:cntrl:]]".force_encoding('utf-8'), opts) # unicode whitespaces, like 160 codepoint
+  # From:
+  # http://www.rubyinside.com/the-split-is-not-enough-whitespace-shenigans-for-rubyists-5980.html
+  White_Space = Regexp.new("[[:space:]]".force_encoding('utf-8'), opts)
+  REPEATING_DOTS = /\.{1,}/
+  INVALID_FILE_NAME_CHARS = /[^a-z0-9\_\.]{1,}/i
+  # HTML_ESCAPE_TABLE is used after text is escaped to
+  # further escape text more. This is why th semi-colon (&#59;) was left out
+  # from HTML_ESCAPE_TABLE. It would conflict with already escaped text.
+  # For more entities: http://www.w3.org/MarkUp/html3/latin1.html
+  # or go to: http://www.mountaindragon.com/html/iso.htm
+  HTML_ESCAPE_TABLE = {
+    '&laquo;' => "&lt;",
+    '&raquo;' => "&gt;",
+    "&lsquo;" => "&apos;",
+    "&rsquo;" => "&apos;",
+    "&sbquo;" => "&apos;",
+    "&lsquo;" => "&apos;",
+    "&rsquo;" => "&apos;",
+    "&ldquo;" => "&quot;",
+    "&rdquo;" => "&quot;",
+    "&bdquo;" => "&quot;",
+    "&lsaquo;" => "&lt;",
+    "&rsaquo;" => "&gt;",
+    "&acute;" => "&apos;",
+    "&uml;" => "&quot;",
+    '\\' => "&#92;",
+    # '/' => "&#47;",
+    # '%' => "&#37;",
+    # ':' => '&#58;',
+    # '=' => '&#61;',
+    # '?' => '&#63;',
+    # '@' => '&#64;',
+    "\`" => '&apos;',
+    '‘' => "&apos;",
+    '’' => "&apos;",
+    '“' => '&quot;',
+    '”' => '&quot;',
+    # "$" => "&#36;",
+    # '#' => '&#35;', # Don't use this or else it will ruin all other entities.
+    # '&' => # Don't use this " " " " " "
+    # ';' => # Don't use this " " " " " "
+    '|' => '&brvbar;',
+    '~' => '&sim;'
+    # '!' => '&#33;',
+    # '*' => '&lowast;', # Don't use this. '*' is used by text formating, ie RedCloth, etc.
+    # '{' => '&#123;',
+    # '}' => '&#125;',
+    # '(' => '&#40;',
+    # ')' => '&#41;',
+    # "\n" => '<br />'
+  }
+  def new_regexp str
+    Regexp.new(clean_utf8(str), Regexp::FIXEDENCODING | Regexp::IGNORECASE)
+  end
+  class << self # ======================================================
+    # From:
+    # http://stackoverflow.com/questions/1268289/how-to-get-rid-of-non-ascii-characters-in-ruby
+    #
+    # Test:
+    # [160, 160,64, 116, 119, 101, 108, 108, 121, 109, 101, 160, 102, 105, 108, 109].
+    # inject('', :<<)
+    #
+    def clean_utf8 s
+      s.
+        encode(Encoding.find('utf-8'), ENCODING_OPTIONS_CLEAN_UTF8).
+        gsub(self::Control, "\n").
+        gsub(self::White_Space, " ")
+    end
+    def un_escape raw
+      EscapeUtils.unescape_html clean_utf8(raw)
+    end
+    def uri str
+      uri = Addressable::URI.parse(str)
+      if ["http","https","ftp"].include?(uri.scheme) || uri.path.index('/') == 0
+        str
+      else
+        nil
+      end
+    rescue Addressable::URI::InvalidURIError
+      fail "Invalid: address: #{str.inspect}"
+    end
+    def escape o
+      case o
+      when String
+        Coder.encode(un_escape(o), :named, :hexadecimal)
+      else
+        fail "Unknown type: #{o.inspect}"
+      end
+    end # === def
+    # ===============================================
+    # Raises: TZInfo::InvalidTimezoneIdentifier.
+    # ===============================================
+    def validate_timezone(timezone)
+      TZInfo::Timezone.get( timezone.to_s.strip ).identifier
+    end
+    # =========================================================
+    # Takes out any periods and back slashes in a String.
+    # Single periods surround text are allowed on the last substring
+    # past the last slash because they are assumed to be filenames
+    # with extensions.
+    # =========================================================
+    def path( raw_path )
+      clean_crumbs = raw_path.split('/').map { |crumb| filename(crumb) }
+      File.join( *clean_crumbs )
+    end
+    # ====================================================================
+    # Returns a String where all characters except:
+    # letters numbers underscores dashes
+    # are replaced with a dash.
+    # It also delets any non-alphanumeric characters at the end
+    # of the String.
+    # ====================================================================
+    def filename( raw_filename )
+      plaintext( raw_filename ).
+        downcase.
+        gsub(REPEATING_DOTS, '.').
+        gsub(INVALID_FILE_NAME_CHARS, '-').
+        to_s
+    end
+    # ===============================================
+    # This method is not meant to be called directly. Instead, call
+    # <Wash.parse_tags>.
+    # Returns: String with
+    # * all spaces and underscores turned into dashes.
+    # * all non-alphanumeric characters, underscores, dashes, and periods
+    # turned into dashes.
+    # * non-alphanumeric characters at the beginning and end stripped out.
+    # ===============================================
+    def tag( raw_tag )
+      # raw_tag.strip.downcase.gsub( /[^a-z0-9\.]{1,}/,'-').gsub(/^[^a-z0-9]{1,}|[^a-z0-9]{1,}$/i, '').gsub(/\.{1,}/, '.')
+      raw_tag.strip.downcase.gsub(/^[\,\.]{1,}|[\"]{1,}|[\,\.]{1,}$/, '').gsub(/\ /, '-')
+    end
+    # ===============================================
+    # A better alternative than "Rack::Utils.escape_html". Escapes
+    # various characters (including '&', '<', '>', and both quotation mark types)
+    # to HTML decimal entities. Also escapes the characters from
+    # SWISS::HTML_ESCAPE_TABLE.
+    #
+    # Text has to be UTF-8 before encoding, according to HTMLEntities gem.
+    # Therefore, all text is run through <Wash.plaintext> before encoding.
+    # ===============================================
+    def html( raw_text )
+      # Turn string into UTF8. (This also takes out control characters
+      # which is good or else they too will be escaped into HTML too.
+      # Strip it after conversion.
+      # return Dryopteris.sanitize(utf8_text)
+      # Now encode it.
+      normalized_encoded_text = escape( plaintext(raw_text).strip, :named )
+      sanitized_text = Loofah.scrub_fragment( normalized_encoded_text, :prune ).to_s
+    end # === def html
+    # ===============================================
+    # Returns: A string that is:
+    # * normalized to :KC
+    # * "\r\n" changed to "\n"
+    # * all control characters stripped except for "\n"
+    # and end.
+    # Options:
+    # :tabs
+    # :spaces
+    #
+    # ===============================================
+    def plaintext( raw_str, *opts)
+      # Check options.
+      @plaintext_allowed_options ||= [ :spaces, :tabs ]
+      invalid_opts = opts - @plaintext_allowed_options
+      raise(ArgumentError, "INVALID OPTION: #{invalid_opts.inspect}" ) if !invalid_opts.empty?
+      # Save tabs if requested.
+      raw_str = raw_str.gsub("\t", "&#09;") if opts.include?(:tabs)
+      # First: Normalize characters.
+      # Second: Strip out control characters.
+      # Note: Must be normalized first, then strip.
+      # See: http://msdn.microsoft.com/en-us/library/ms776393(VS.85).aspx
+      final_str = raw_str.
+        split("\n").
+        map { |line|
+          # Don't use "\x20" because that is the space character.
+          line.chars.normalize.gsub( /[[:cntrl:]\x00-\x1f]*/, '' )
+        }.
+        join("\n")
+      # Save whitespace or strip.
+      if !opts.include?(:spaces)
+        final_str = final_str.strip
+      end
+      # Normalize quotations and other characters through HTML entity encoding/decoding.
+      final_str = coder.decode( normalised_str Coder.encode(final_str, :named) )
+      # Put back tabs by request.
+      if opts.include?(:tabs)
+          final_str = final_str.gsub("&#09;", "\t")
+      end
+      final_str
+    end # self.plaintext
+    # Encode a few other symbols.
+    # This also normalizes certain quotation and apostrophe HTML entities.
+    def normalize_encoded_string s
+      HTML_ESCAPE_TABLE.inject(s) do |m, kv|
+         m.gsub( kv.first, kv.last)
+      end
+    end
+  end # === class self ===
+end # === class Escape_Escape_Escape ===

data/lib/e_e_e.js ADDED Viewed

@@ -0,0 +1,258 @@
+var _       = require('underscore')
+  , _s      = require('underscore.string')
+  , special = require('special-html')
+  , HTML_E  = require('entities')
+  , URI_js  = require('uri-js')
+;
+var NL             = "\n";
+var SPACES         = /\ +/g;
+var VALID_HTML_ID  = /^[0-9a-zA-Z_]+$/;
+var VALID_HTML_TAG = /^[0-9a-zA-Z_]+$/;
+var IS_ERROR       = function (o) { return (_.isObject(o) && o.constructor == Error); };
+var funcs_scope    = this;
+var INSPECT        = function (v) { return JSON.stringify(v); };
+var E = exports.Sanitize = {};
+// ****************************************************************
+// ****************** Sanitize Tag Attributes and content *********
+// ****************************************************************
+E.name   = function (v) { return E.id(v  , "name"); };
+E.href   = function (v) { return E.uri(v , "href"); }
+E.action = function (v) { return E.uri(v , 'action'); };
+E.string = function (raw, name) {
+  name = (name) ? (name + ': ') : '';
+  if (_.isString(raw))
+    return (raw.trim());
+  return new Error(name + "String expected, instead got: " + JSON.stringify(raw));
+};
+E.uri = function (raw, name) {
+  name = (name) ? name : 'uri';
+  var val = E.string(raw, name);
+  if (E.is_error(val))
+    return val;
+  var url   = HTML_E.decode(val, 2);
+  var parse = URI_js.parse(url);
+  if (parse.errors.length)
+    return new Error(name + ": " + parse.errors[0] + ': ' + val);
+  return URI_js.normalize(url);
+};
+E.tag = function (raw, name) {
+  name = (name) ? name : "tag";
+  var val = E.string(raw, name);
+  if (E.is_error(val))
+    return val;
+  if (!val.match(VALID_HTML_TAG))
+    return new Error(name + ": invalid characters: " + JSON.stringify(val));
+  return val;
+};
+E.id   = function (raw_val, name) {
+  name = (name) ? name : "id";
+  var val = E.string(raw_val, name);
+  if (val.message)
+    return val;
+  if (!val.match(VALID_HTML_ID))
+    return new Error(name + ": invalid characters: " + JSON.stringify(val));
+  return val;
+};
+E.num_of_lines = function (raw_val, name) {
+  name = (name) ? name : 'num_of_lines';
+  if (!_.isNumber(raw_val) || _.isNaN(raw_val))
+    return new Error(name + ": Must be a number: " + JSON.stringify(raw_val));
+  if (raw_val < 1 || raw_val > 250)
+    return new Error(name + ": Number out of range: " + JSON.stringify(raw_val));
+  return raw_val;
+};
+E.string_in_array = function (unk, name) {
+  name = (name) ? name : 'string_in_array';
+  if (_.isArray(unk) && unk.length === 1 && _.isString(unk[0]))
+    return unk;
+  return new Error(name + ": Must be a string within an array: " + JSON.stringify(unk));
+};
+var temp = null;
+E.attr_funcs = [];
+for (temp in E) {
+  if (_.isFunction(E[temp])) {
+    E.attr_funcs.push(temp);
+  }
+}
+E.opt = function (func, name) {
+  return function (v) {
+    if (v === undefined || v === null)
+      return null;
+    return func(v, name);
+  };
+};
+E.is = function (func) {
+  return function (v) {
+    var result = func(v);
+    if (result && result.message)
+      return false;
+    return !!result;
+  };
+};
+_.each(E.attr_funcs, function (name, i) {
+  E["opt_" + name] = E.opt(E[name], name);
+  E["is_" + name] = E.is(E[name]);
+});
+// ****************************************************************
+// ****************** End of Sanitize Attr Checkers ***************
+// ****************************************************************
+E.is_error = function (obj) {
+  if (!_.isObject(obj))
+    return false;
+  return obj.constructor === Error;
+};
+E.html = function (str) {
+  if (_.isArray(str)) {
+    return _.map(str, function (v, i) {
+      return E.html(v);
+    });
+  }
+  if (_.isObject(str)) {
+    var new_o = {};
+    _.each(str, function (v, k) {
+      new_o[E.html(k)] = E.html(v);
+    });
+    return new_o;
+  }
+  if (!_.isString(str))
+    return str;
+  return special( _s.escapeHTML( E.un_escape(str) ) );
+};
+E.un_escape = function (str) {
+  return _s.unescapeHTML( HTML_E.decode( str , 2) )
+};
+E.attr = function (k, v, tag) {
+  if (_.contains(['id', 'name', 'type'], k) && !v.match(VALID_HTML_ID))
+    return new Error("Invalid chars in " + tag + " " + k + ": " + v);
+  if (!k.match(VALID_HTML_ID))
+    return new Error("Invalid chars in " + tag + " attribute name: " + k);
+  var safe_name = Ok.escape(k).trim();
+  if (_.contains(['href', 'action'], k)) {
+    var safe_val  = Ok.escape_uri(v);
+    if (!safe_val)
+      return new Error('Invalid link address: ' + v);
+  } else {
+    var safe_val  = Ok.escape(v);
+  }
+  return [safe_name, safe_val];
+};
+E.attrs = function (raw_attrs, tag) {
+  var sanitized = {};
+  var err       = null;
+  _.find(raw_attrs, function (v, k) {
+    var pair = Ok.escape_attr(k, v, tag);
+    if (IS_ERROR(pair)) {
+      err = pair;
+      return pair;
+    }
+    sanitized[pair[0]] = pair[1];
+  });
+  if (err)
+    return err;
+  return sanitized;
+};
+E.to_func_calls = function (arr) {
+  var next  = null;
+  var final = [];
+  var line  = null;
+  while (arr.length) {
+    line = [arr.shift()];
+    while(arr.length && !_.isString(arr[0])) {
+      line.push(arr.shift());
+    }
+    final.push(line);
+  }
+  return final;
+};
+E.to_applet_func_calls = function (arr) {
+  var next  = null;
+  var final = [];
+  while (arr.length) {
+    var name = arr.shift();
+    if (!_.isString(name))
+      return new Error("No function specfied for arg: " + JSON.stringify(name));
+    name = name.trim();
+    var attrs = null;
+    var body  = null;
+    next = arr[0];
+    if (!_.isString(next)) {
+      if (_.isObject(next) && !_.isArray(next))
+        attrs = arr.shift();
+      next = arr[0];
+      if (_.isArray(next))
+        body  = arr.shift();
+      next = arr[0];
+      if (next && _.isArray(next))
+        return new Error(name + ": extra array argument: " + JSON.stringify(next));
+      if (next && _.isObject(next))
+        return new Error(name + ": extra attr object argument: " + JSON.stringify(next));
+      if (next && !_.isString(next))
+        return new Error(name + ": invalid argument: " + JSON.stringify(next));
+    }
+    final.push([name, attrs, body]);
+  }
+  return final;
+};

data/lib/escape_escape_escape.rb ADDED Viewed

@@ -0,0 +1,60 @@
+require "sanitize"
+def Escape_Escape_Escape s
+  Escape_Escape_Escape.html(s)
+end
+class Escape_Escape_Escape
+  REPEATING_DOTS          = /\.{1,}\//
+  INVALID_FILE_NAME_CHARS = /[^a-z0-9\_\.]{1,}/i
+  UN_PRINT_ABLE           = /[^[:print:]\n]/
+  CR                      = "\r"
+  TABS                    = "\t"
+  CONTROL_CHARS           = /[[:cntrl:]\x00-\x1f]/  # Don't use "\x20" because that is the space character.
+  WHITE_SPACE             = /[[:space:]]&&[^\n]/            # http://www.rubyinside.com/the-split-is-not-enough-whitespace-shenigans-for-rubyists-5980.html
+  ENCODING_OPTIONS_CLEAN_UTF8 = {
+    :invalid           => :replace, # Replace invalid byte sequences
+    :undef             => :replace, # Replace anything not defined in ASCII
+    :replace           => '' # Use a blank for those replacements
+    # :newline         => :universal
+    # :universal_newline => true # Always break lines with \n, not \r\n
+  }
+  class << self # ======================================================
+    # From:
+    # http://stackoverflow.com/questions/1268289/how-to-get-rid-of-non-ascii-characters-in-ruby
+    #
+    # Test:
+    # [160, 160,64, 116, 119, 101, 108, 108, 121, 109, 101, 160, 102, 105, 108, 109].
+    # inject('', :<<)
+    #
+    def clean_utf8 s
+      s.
+        encode(Encoding.find('utf-8') , ENCODING_OPTIONS_CLEAN_UTF8).
+        gsub(TABS                     , "  ").
+        gsub(CR                       , "").
+        gsub(UN_PRINT_ABLE            , '').
+        gsub(CONTROL_CHARS            , "\n" ).
+        gsub(WHITE_SPACE              , " ")
+    end
+    def text s
+      clean_utf8 s
+    end
+    def html s
+      Sanitize.fragment( clean_utf8(s), Sanitize::Config::RELAXED )
+    end
+  end # === class self ===
+end # === class Escape_Escape_Escape ===

data/package.json ADDED Viewed

@@ -0,0 +1,31 @@
+{
+  "name": "escape_escape_escape",
+  "version": "0.0.5",
+  "description": "My way of escaping HTML.",
+  "main": "lib/e_e_e.js",
+  "directories": {
+    "test": "test"
+  },
+  "scripts": {
+    "test": "mocha"
+  },
+  "repository": {
+    "type": "git",
+    "url": "git://github.com/da99/escape_escape_escape.git"
+  },
+  "keywords": [
+    "da99"
+  ],
+  "dependencies": {
+    "underscore": "1.x.x",
+    "unhtml": "x.x.x",
+    "special-html": "x.x.x",
+    "underscore.string": "x.x.x",
+    "entities": "x.x.x",
+    "uri-js": "x.x.x"
+  },
+  "author": "da99",
+  "license": "MIT",
+  "readmeFilename": "README.md",
+  "gitHead": "d7addccc1aea361d29d060720a54e34ec6dac499"
+}

data/specs/as_json/0001-html.json ADDED Viewed

@@ -0,0 +1,23 @@
+[
+  {
+    "it" : "does not re-escape already escaped text",
+    "input"  : "<p>Hello &amp; GoodBye</p>",
+    "output" : "<p>Hello &amp; GoodBye</p>"
+  },
+  {
+    "it" : "removes invalid attributes",
+    "input"  : "<a ignoreme=\"blah\">Hello GoodBye</a>",
+    "output" : "<a>Hello GoodBye</a>"
+  },
+  {
+    "it" : "removes \"javascript:\" protocol in \"href\" attributes",
+    "input" : "<a href=\"javascript:alert()\">hello</a>",
+    "output" : "<a>hello</a>"
+  }
+]

data/specs/as_json/0010-text.json ADDED Viewed

@@ -0,0 +1,29 @@
+[
+  {
+    "it" : "replaces tabs with 2 spaces",
+    "input"  : "<p>hello\tagain</p>",
+    "output" : "<p>hello  again</p>"
+  },
+  {
+    "it" : "removes \\r",
+    "input" : "hi \r\r again",
+    "output" : "hi  again"
+  },
+  {
+    "it" : "does not remove \\n",
+    "input"  : "<p>hello\nagain</p>",
+    "output" : "<p>hello\nagain</p>"
+  },
+  {
+    "it" : "does not remove multiple \\n",
+    "input"  : "<p>hello\n \nagain</p>",
+    "output" : "<p>hello\n \nagain</p>"
+  }
+]

data/specs/escape_escape_escape.rb ADDED Viewed

@@ -0,0 +1,35 @@
+require "multi_json"
+require "escape_escape_escape"
+Dir.glob("specs/as_json/*.json").sort.each { |f|
+  contents = MultiJson.load(File.read f)
+  method_name = File.basename(f).gsub(/\A\d+-|\.json\Z/, '')
+  describe ":#{method_name}" do
+    contents.each { |t|
+      it t["it"] do
+        i      = t["input"]
+        o      = t["output"]
+        actual = Escape_Escape_Escape.send(method_name, i)
+        case o
+        when String
+          actual.should == o
+        when Array
+          target = o.pop
+          begin
+            if o[1].is_a?(Array)
+              meth = o.shift
+              args = o.shift
+              actual = actual.send(meth, *args)
+            else
+              fail "Unknown method: #{o[0].inspect}"
+            end
+          end while !o.empty?
+          actual.should == target
+        end # === case
+      end # === it
+    }
+  end
+}

data/specs/helpers.rb ADDED Viewed

@@ -0,0 +1,4 @@
+require 'Bacon_Colored'
+require 'escape_escape_escape'
+require 'pry'

data/test/sanitize_attrs.js ADDED Viewed

@@ -0,0 +1,132 @@
+var _     = require('underscore')
+, assert  = require('assert')
+, E       = require('../lib/e_e_e').Sanitize
+;
+describe( 'Sanitize attrs:', function () {
+  // What if the value is null? undefined?
+  _.each(E.attr_funcs, function (name) {
+    describe( name, function () {
+      it( 'returns error if value is null', function () {
+        assert.equal(E[name](null).constructor, Error);
+      });
+      it( 'returns error if value is undefined', function () {
+        assert.equal(E[name](undefined).constructor, Error);
+      });
+      if ( !_.contains("name href action".split(' '), name ) )
+        it( 'adds specified name to error', function () {
+          var result = E[name](null, 'my_name').message;
+          if ( result.indexOf('my_name: ') !== 0)
+            assert.fail(result, 'my_name', 'E.' + name + ' is not adding name to error message.');
+        });
+    }); // === end desc
+  }); // end _.each
+  describe( 'string', function () {
+    it( 'returns value if string', function () {
+      assert.equal(E.string("s"), "s");
+    });
+    it( 'returns error if value is number', function () {
+      assert.equal(E.string(1).constructor, Error);
+    });
+  }); // === end desc
+  describe( 'string_in_array', function () {
+    it( 'returns value if string in array: [ my_string ]', function () {
+      var val = ["This is a string."];
+      assert.equal(E.string_in_array(val), val);
+    });
+  }); // === end desc
+  describe( 'tag', function () {
+    it( 'returns value if valid string', function () {
+      assert.equal(E.tag("button"), "button");
+    });
+    it( 'returns error if string contains invalid chars', function () {
+      assert.equal(E.tag("my-tag").message, "tag: invalid characters: \"my-tag\"");
+    });
+  }); // === end desc
+  describe( 'name', function () {
+    it( 'returns value if valid string', function () {
+      assert.equal(E.name("some_name"), "some_name");
+    });
+  }); // === end desc
+  _.each( ['href', 'action', 'uri'] , function (name) {
+    describe( 'url: ' + name, function () {
+      it( 'returns error if url is not valid', function () {
+        assert.equal(E[name]("http://wwwtome<").message, name + ": URI is not strictly valid.: http://wwwtome<");
+      });
+    }); // === end desc
+  });
+  describe( 'uri', function () {
+    it( 'normalizes address', function () {
+      var s = "hTTp://wWw.test.com/";
+      assert.equal(E.uri(s), s.toLowerCase());
+    });
+    it( 'returns an Error if path contains: <', function () {
+      var s = "http://www.test.com/<something/";
+      assert.equal(E.uri(s).constructor, Error);
+    });
+    it( 'returns an Error if path contains HTML entities', function () {
+      var s = "http://6&#9;6.000146.0x7.147/";
+      assert.equal(E.uri(s).constructor, Error);
+    });
+    it( 'returns an Error if path contains HTML entities', function () {
+      var s = "http://www.test.com/&nbsp;s/";
+      assert.equal(E.uri(s).constructor, Error);
+    });
+    it( 'returns an Error if query string contains HTML entities', function () {
+      var s = "http://www.test.com/s/test?t&nbsp;test";
+      assert.equal(E.uri(s).constructor, Error);
+    });
+  }); // === end desc
+  // ****************************************************************
+  // ****************** END of Sanitize Attrs ***********************
+  // ****************************************************************
+  describe( '.opt(func)', function () {
+    it( 'returns a function where null returns null', function () {
+      assert.equal(E.opt(E.string)(null), null);
+    });
+    it( 'returns a function where undefined returns null', function () {
+      assert.equal(E.opt(E.string)(undefined), null);
+    });
+    it( 'returns a function that passes false to underlying function', function () {
+      assert.equal(E.opt(E.string)(false).constructor, Error);
+    });
+    it( 'returns a function that passes any Number to underlying function', function () {
+      assert.equal(E.opt(E.string)(1).constructor, Error);
+    });
+    it( 'returns a function that passes any String to underlying function', function () {
+      assert.equal(E.opt(E.string)("a"), "a");
+    });
+  }); // === end desc
+}); // === end desc

data/test/sanitize_html.js ADDED Viewed

@@ -0,0 +1,57 @@
+var _      = require('underscore')
+, _s       = require('underscore.string')
+, unhtml   = require('unhtml')
+, special  = require('special-html')
+, assert   = require('assert')
+, Sanitize = require('../lib/e_e_e').Sanitize
+, E        = Sanitize.html
+;
+var BRACKET = " < %3C &lt &lt; &LT &LT; &#60 &#060 &#0060  \
+&#00060 &#000060 &#0000060 &#60; &#060; &#0060; &#00060;  \
+&#000060; &#0000060; &#x3c &#x03c &#x003c &#x0003c &#x00003c  \
+&#x000003c &#x3c; &#x03c; &#x003c; &#x0003c; &#x00003c;  \
+&#x000003c; &#X3c &#X03c &#X003c &#X0003c &#X00003c &#X000003c  \
+&#X3c; &#X03c; &#X003c; &#X0003c; &#X00003c; &#X000003c;  \
+&#x3C &#x03C &#x003C &#x0003C &#x00003C &#x000003C &#x3C; &#x03C;  \
+&#x003C; &#x0003C; &#x00003C; &#x000003C; &#X3C &#X03C  \
+&#X003C &#X0003C &#X00003C &#X000003C &#X3C; &#X03C; &#X003C; &#X0003C;  \
+&#X00003C; &#X000003C; \x3c \x3C \u003c \u003C ";
+describe( 'Sanitize', function () {
+  it( 'does not re-escape already escaped text mixed with HTML', function () {
+    var h = "<p>Hi</p>";
+    var e = _s.escapeHTML(h);
+    var o = e + h;
+    assert.equal(E(o), _s.escapeHTML(h + h));
+  });
+  it( 'escapes special chars: "Hello ©®∆"', function () {
+    var s = "Hello & World ©®∆";
+    var t = "Hello &amp; World &#169;&#174;&#8710;";
+    assert.equal(E(s), t);
+  });
+  it( 'escapes all 70 different combos of "<"', function () {
+    assert.equal(_.uniq(E(BRACKET.trim()).split(/\s+/)).join(' '), "&lt; %3C");
+  });
+  it( 'escapes all keys in nested objects', function () {
+    var HTML = "<b>test</b>";
+    assert.deepEqual(E({" a >":{" a >": HTML}}), {" a &gt;": {" a &gt;": _s.escapeHTML(HTML)}});
+  });
+  it( 'escapes all values in nested objects', function () {
+    var HTML = "<b>test</b>";
+    assert.deepEqual(E({name:{name: HTML}}), {name: {name: _s.escapeHTML(HTML)}});
+  });
+  it( 'escapes all values in nested arrays', function () {
+    var HTML = "<b>test</b>";
+    assert.deepEqual(E([{name:{name: HTML}}]), [{name: {name: _s.escapeHTML(HTML)}}]);
+  });
+}); // === end desc

data/test/sanitize_un_escape.js ADDED Viewed

@@ -0,0 +1,41 @@
+var _      = require('underscore')
+, _s       = require('underscore.string')
+, unhtml   = require('unhtml')
+, special  = require('special-html')
+, assert   = require('assert')
+, Sanitize = require('../lib/e_e_e').Sanitize
+, E        = Sanitize.html
+, U        = Sanitize.un_escape
+;
+var BRACKET = " < %3C &lt &lt; &LT &LT; &#60 &#060 &#0060  \
+&#00060 &#000060 &#0000060 &#60; &#060; &#0060; &#00060;  \
+&#000060; &#0000060; &#x3c &#x03c &#x003c &#x0003c &#x00003c  \
+&#x000003c &#x3c; &#x03c; &#x003c; &#x0003c; &#x00003c;  \
+&#x000003c; &#X3c &#X03c &#X003c &#X0003c &#X00003c &#X000003c  \
+&#X3c; &#X03c; &#X003c; &#X0003c; &#X00003c; &#X000003c;  \
+&#x3C &#x03C &#x003C &#x0003C &#x00003C &#x000003C &#x3C; &#x03C;  \
+&#x003C; &#x0003C; &#x00003C; &#x000003C; &#X3C &#X03C  \
+&#X003C &#X0003C &#X00003C &#X000003C &#X3C; &#X03C; &#X003C; &#X0003C;  \
+&#X00003C; &#X000003C; \x3c \x3C \u003c \u003C ";
+describe( 'Sanitize', function () {
+  it( 'un-escapes escaped text mixed with HTML', function () {
+    var s = "<p>Hi&amp;</p>";
+    assert.equal(U(s), "<p>Hi&</p>");
+  });
+  it( 'un-escapes special chars: "Hello ©®∆"', function () {
+    var s = "Hello &amp; World &#169;&#174;&#8710;";
+    var t = "Hello & World ©®∆";
+    assert.equal(U(s), t);
+  });
+  it( 'un-escapes all 70 different combos of "<"', function () {
+    assert.equal(_.uniq(U(BRACKET.trim()).split(/\s+/)).join(' '), "< %3C");
+  });
+}); // === end desc

metadata ADDED Viewed

@@ -0,0 +1,163 @@
+--- !ruby/object:Gem::Specification
+name: escape_escape_escape
+version: !ruby/object:Gem::Version
+  version: 0.1.0
+platform: ruby
+authors:
+- da99
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2014-06-23 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: sanitize
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '3.0'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '3.0'
+- !ruby/object:Gem::Dependency
+  name: pry
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '0.9'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '0.9'
+- !ruby/object:Gem::Dependency
+  name: rake
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '10.3'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '10.3'
+- !ruby/object:Gem::Dependency
+  name: bundler
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.5'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.5'
+- !ruby/object:Gem::Dependency
+  name: bacon
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.0'
+- !ruby/object:Gem::Dependency
+  name: Bacon_Colored
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '0.1'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '0.1'
+- !ruby/object:Gem::Dependency
+  name: multi_json
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.10'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.10'
+description: "\n    My way of escaping/encoding HTML with the proper entities.\n  "
+email:
+- i-hate-spam-1234567@mailinator.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- ".gitignore"
+- Gemfile
+- LICENSE
+- LICENSE.txt
+- README.md
+- VERSION
+- escape_escape_escape.gemspec
+- lib/beta.rb
+- lib/e_e_e.js
+- lib/escape_escape_escape.rb
+- package.json
+- specs/as_json/0001-html.json
+- specs/as_json/0010-text.json
+- specs/escape_escape_escape.rb
+- specs/helpers.rb
+- test/sanitize_attrs.js
+- test/sanitize_html.js
+- test/sanitize_un_escape.js
+homepage: https://github.com/da99/escape_escape_escape
+licenses:
+- MIT
+metadata: {}
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 2.3.0
+signing_key:
+specification_version: 4
+summary: My way of escaping/encoding HTML.
+test_files:
+- test/sanitize_attrs.js
+- test/sanitize_html.js
+- test/sanitize_un_escape.js