RubyGems - escape_escape_escape - Versions diffs - 0.3.0 → 1.1.0 - Mend

escape_escape_escape 0.3.0 → 1.1.0

Files changed (28) hide show

checksums.yaml +4 -4
data/README.md +6 -11
data/VERSION +1 -1
data/escape_escape_escape.gemspec +5 -2
data/lib/escape_escape_escape.rb +219 -47
data/specs/as_ruby/0001-html.rb +60 -0
data/specs/as_ruby/0002-decode_html.rb +13 -0
data/specs/as_ruby/0003-css_attr.rb +10 -0
data/specs/as_ruby/0003-css_selector.rb +12 -0
data/specs/as_ruby/0003-css_value.rb +53 -0
data/specs/as_ruby/0004-==.rb +5 -0
data/specs/as_ruby/0020-href.rb +118 -0
data/specs/as_ruby/0030-clean_utf8.rb +34 -0
data/specs/as_ruby/0040-escape.rb +41 -0
data/specs/escape_escape_escape.rb +133 -21
data/specs/lib/helpers.rb +1 -0
metadata +61 -23
data/LICENSE.txt +0 -23
data/lib/beta.rb +0 -270
data/lib/e_e_e.js +0 -258
data/package.json +0 -31
data/specs/as_json/0001-html.json +0 -23
data/specs/as_json/0002-inner_html.json +0 -16
data/specs/as_json/0010-text.json +0 -29
data/specs/helpers.rb +0 -4
data/test/sanitize_attrs.js +0 -132
data/test/sanitize_html.js +0 -57
data/test/sanitize_un_escape.js +0 -41

data/LICENSE.txt DELETED

@@ -1,23 +0,0 @@
-Copyright (c) 2014 da99
-MIT License
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/lib/beta.rb DELETED

@@ -1,270 +0,0 @@
-# === Important Gems ===
-require 'cgi' # Don't use URI.escape because it does not escape all invalid characters.
-require 'htmlentities'
-require 'loofah'
-require "addressable/uri"
-require "escape_utils"
-require "htmlentities"
-require "uri"
-def Escape_Escape_Escape s
-  Escape_Escape_Escape.escape(s)
-end
-class Escape_Escape_Escape
-  Coder = HTMLEntities.new(:xhtml1)
-  ENCODING_OPTIONS_CLEAN_UTF8 = {
-    :invalid => :replace, # Replace invalid byte sequences
-    :undef => :replace, # Replace anything not defined in ASCII
-    :replace => '' # Use a blank for those replacements
-    # :newline => :universal
-    # :universal_newline => true # Always break lines with \n, not \r\n
-  }
-  opts = Regexp::FIXEDENCODING | Regexp::IGNORECASE
-  # tabs, etc.
-  Control = Regexp.new("[[:cntrl:]]".force_encoding('utf-8'), opts) # unicode whitespaces, like 160 codepoint
-  # From:
-  # http://www.rubyinside.com/the-split-is-not-enough-whitespace-shenigans-for-rubyists-5980.html
-  White_Space = Regexp.new("[[:space:]]".force_encoding('utf-8'), opts)
-  REPEATING_DOTS = /\.{1,}/
-  INVALID_FILE_NAME_CHARS = /[^a-z0-9\_\.]{1,}/i
-  # HTML_ESCAPE_TABLE is used after text is escaped to
-  # further escape text more. This is why th semi-colon (&#59;) was left out
-  # from HTML_ESCAPE_TABLE. It would conflict with already escaped text.
-  # For more entities: http://www.w3.org/MarkUp/html3/latin1.html
-  # or go to: http://www.mountaindragon.com/html/iso.htm
-  HTML_ESCAPE_TABLE = {
-    '&laquo;' => "&lt;",
-    '&raquo;' => "&gt;",
-    "&lsquo;" => "&apos;",
-    "&rsquo;" => "&apos;",
-    "&sbquo;" => "&apos;",
-    "&lsquo;" => "&apos;",
-    "&rsquo;" => "&apos;",
-    "&ldquo;" => "&quot;",
-    "&rdquo;" => "&quot;",
-    "&bdquo;" => "&quot;",
-    "&lsaquo;" => "&lt;",
-    "&rsaquo;" => "&gt;",
-    "&acute;" => "&apos;",
-    "&uml;" => "&quot;",
-    '\\' => "&#92;",
-    # '/' => "&#47;",
-    # '%' => "&#37;",
-    # ':' => '&#58;',
-    # '=' => '&#61;',
-    # '?' => '&#63;',
-    # '@' => '&#64;',
-    "\`" => '&apos;',
-    '‘' => "&apos;",
-    '’' => "&apos;",
-    '“' => '&quot;',
-    '”' => '&quot;',
-    # "$" => "&#36;",
-    # '#' => '&#35;', # Don't use this or else it will ruin all other entities.
-    # '&' => # Don't use this " " " " " "
-    # ';' => # Don't use this " " " " " "
-    '|' => '&brvbar;',
-    '~' => '&sim;'
-    # '!' => '&#33;',
-    # '*' => '&lowast;', # Don't use this. '*' is used by text formating, ie RedCloth, etc.
-    # '{' => '&#123;',
-    # '}' => '&#125;',
-    # '(' => '&#40;',
-    # ')' => '&#41;',
-    # "\n" => '<br />'
-  }
-  def new_regexp str
-    Regexp.new(clean_utf8(str), Regexp::FIXEDENCODING | Regexp::IGNORECASE)
-  end
-  class << self # ======================================================
-    # From:
-    # http://stackoverflow.com/questions/1268289/how-to-get-rid-of-non-ascii-characters-in-ruby
-    #
-    # Test:
-    # [160, 160,64, 116, 119, 101, 108, 108, 121, 109, 101, 160, 102, 105, 108, 109].
-    # inject('', :<<)
-    #
-    def clean_utf8 s
-      s.
-        encode(Encoding.find('utf-8'), ENCODING_OPTIONS_CLEAN_UTF8).
-        gsub(self::Control, "\n").
-        gsub(self::White_Space, " ")
-    end
-    def un_escape raw
-      EscapeUtils.unescape_html clean_utf8(raw)
-    end
-    def uri str
-      uri = Addressable::URI.parse(str)
-      if ["http","https","ftp"].include?(uri.scheme) || uri.path.index('/') == 0
-        str
-      else
-        nil
-      end
-    rescue Addressable::URI::InvalidURIError
-      fail "Invalid: address: #{str.inspect}"
-    end
-    def escape o
-      case o
-      when String
-        Coder.encode(un_escape(o), :named, :hexadecimal)
-      else
-        fail "Unknown type: #{o.inspect}"
-      end
-    end # === def
-    # ===============================================
-    # Raises: TZInfo::InvalidTimezoneIdentifier.
-    # ===============================================
-    def validate_timezone(timezone)
-      TZInfo::Timezone.get( timezone.to_s.strip ).identifier
-    end
-    # =========================================================
-    # Takes out any periods and back slashes in a String.
-    # Single periods surround text are allowed on the last substring
-    # past the last slash because they are assumed to be filenames
-    # with extensions.
-    # =========================================================
-    def path( raw_path )
-      clean_crumbs = raw_path.split('/').map { |crumb| filename(crumb) }
-      File.join( *clean_crumbs )
-    end
-    # ====================================================================
-    # Returns a String where all characters except:
-    # letters numbers underscores dashes
-    # are replaced with a dash.
-    # It also delets any non-alphanumeric characters at the end
-    # of the String.
-    # ====================================================================
-    def filename( raw_filename )
-      plaintext( raw_filename ).
-        downcase.
-        gsub(REPEATING_DOTS, '.').
-        gsub(INVALID_FILE_NAME_CHARS, '-').
-        to_s
-    end
-    # ===============================================
-    # This method is not meant to be called directly. Instead, call
-    # <Wash.parse_tags>.
-    # Returns: String with
-    # * all spaces and underscores turned into dashes.
-    # * all non-alphanumeric characters, underscores, dashes, and periods
-    # turned into dashes.
-    # * non-alphanumeric characters at the beginning and end stripped out.
-    # ===============================================
-    def tag( raw_tag )
-      # raw_tag.strip.downcase.gsub( /[^a-z0-9\.]{1,}/,'-').gsub(/^[^a-z0-9]{1,}|[^a-z0-9]{1,}$/i, '').gsub(/\.{1,}/, '.')
-      raw_tag.strip.downcase.gsub(/^[\,\.]{1,}|[\"]{1,}|[\,\.]{1,}$/, '').gsub(/\ /, '-')
-    end
-    # ===============================================
-    # A better alternative than "Rack::Utils.escape_html". Escapes
-    # various characters (including '&', '<', '>', and both quotation mark types)
-    # to HTML decimal entities. Also escapes the characters from
-    # SWISS::HTML_ESCAPE_TABLE.
-    #
-    # Text has to be UTF-8 before encoding, according to HTMLEntities gem.
-    # Therefore, all text is run through <Wash.plaintext> before encoding.
-    # ===============================================
-    def html( raw_text )
-      # Turn string into UTF8. (This also takes out control characters
-      # which is good or else they too will be escaped into HTML too.
-      # Strip it after conversion.
-      # return Dryopteris.sanitize(utf8_text)
-      # Now encode it.
-      normalized_encoded_text = escape( plaintext(raw_text).strip, :named )
-      sanitized_text = Loofah.scrub_fragment( normalized_encoded_text, :prune ).to_s
-    end # === def html
-    # ===============================================
-    # Returns: A string that is:
-    # * normalized to :KC
-    # * "\r\n" changed to "\n"
-    # * all control characters stripped except for "\n"
-    # and end.
-    # Options:
-    # :tabs
-    # :spaces
-    #
-    # ===============================================
-    def plaintext( raw_str, *opts)
-      # Check options.
-      @plaintext_allowed_options ||= [ :spaces, :tabs ]
-      invalid_opts = opts - @plaintext_allowed_options
-      raise(ArgumentError, "INVALID OPTION: #{invalid_opts.inspect}" ) if !invalid_opts.empty?
-      # Save tabs if requested.
-      raw_str = raw_str.gsub("\t", "&#09;") if opts.include?(:tabs)
-      # First: Normalize characters.
-      # Second: Strip out control characters.
-      # Note: Must be normalized first, then strip.
-      # See: http://msdn.microsoft.com/en-us/library/ms776393(VS.85).aspx
-      final_str = raw_str.
-        split("\n").
-        map { |line|
-          # Don't use "\x20" because that is the space character.
-          line.chars.normalize.gsub( /[[:cntrl:]\x00-\x1f]*/, '' )
-        }.
-        join("\n")
-      # Save whitespace or strip.
-      if !opts.include?(:spaces)
-        final_str = final_str.strip
-      end
-      # Normalize quotations and other characters through HTML entity encoding/decoding.
-      final_str = coder.decode( normalised_str Coder.encode(final_str, :named) )
-      # Put back tabs by request.
-      if opts.include?(:tabs)
-          final_str = final_str.gsub("&#09;", "\t")
-      end
-      final_str
-    end # self.plaintext
-    # Encode a few other symbols.
-    # This also normalizes certain quotation and apostrophe HTML entities.
-    def normalize_encoded_string s
-      HTML_ESCAPE_TABLE.inject(s) do |m, kv|
-         m.gsub( kv.first, kv.last)
-      end
-    end
-  end # === class self ===
-end # === class Escape_Escape_Escape ===

data/lib/e_e_e.js DELETED

@@ -1,258 +0,0 @@
-var _       = require('underscore')
-  , _s      = require('underscore.string')
-  , special = require('special-html')
-  , HTML_E  = require('entities')
-  , URI_js  = require('uri-js')
-;
-var NL             = "\n";
-var SPACES         = /\ +/g;
-var VALID_HTML_ID  = /^[0-9a-zA-Z_]+$/;
-var VALID_HTML_TAG = /^[0-9a-zA-Z_]+$/;
-var IS_ERROR       = function (o) { return (_.isObject(o) && o.constructor == Error); };
-var funcs_scope    = this;
-var INSPECT        = function (v) { return JSON.stringify(v); };
-var E = exports.Sanitize = {};
-// ****************************************************************
-// ****************** Sanitize Tag Attributes and content *********
-// ****************************************************************
-E.name   = function (v) { return E.id(v  , "name"); };
-E.href   = function (v) { return E.uri(v , "href"); }
-E.action = function (v) { return E.uri(v , 'action'); };
-E.string = function (raw, name) {
-  name = (name) ? (name + ': ') : '';
-  if (_.isString(raw))
-    return (raw.trim());
-  return new Error(name + "String expected, instead got: " + JSON.stringify(raw));
-};
-E.uri = function (raw, name) {
-  name = (name) ? name : 'uri';
-  var val = E.string(raw, name);
-  if (E.is_error(val))
-    return val;
-  var url   = HTML_E.decode(val, 2);
-  var parse = URI_js.parse(url);
-  if (parse.errors.length)
-    return new Error(name + ": " + parse.errors[0] + ': ' + val);
-  return URI_js.normalize(url);
-};
-E.tag = function (raw, name) {
-  name = (name) ? name : "tag";
-  var val = E.string(raw, name);
-  if (E.is_error(val))
-    return val;
-  if (!val.match(VALID_HTML_TAG))
-    return new Error(name + ": invalid characters: " + JSON.stringify(val));
-  return val;
-};
-E.id   = function (raw_val, name) {
-  name = (name) ? name : "id";
-  var val = E.string(raw_val, name);
-  if (val.message)
-    return val;
-  if (!val.match(VALID_HTML_ID))
-    return new Error(name + ": invalid characters: " + JSON.stringify(val));
-  return val;
-};
-E.num_of_lines = function (raw_val, name) {
-  name = (name) ? name : 'num_of_lines';
-  if (!_.isNumber(raw_val) || _.isNaN(raw_val))
-    return new Error(name + ": Must be a number: " + JSON.stringify(raw_val));
-  if (raw_val < 1 || raw_val > 250)
-    return new Error(name + ": Number out of range: " + JSON.stringify(raw_val));
-  return raw_val;
-};
-E.string_in_array = function (unk, name) {
-  name = (name) ? name : 'string_in_array';
-  if (_.isArray(unk) && unk.length === 1 && _.isString(unk[0]))
-    return unk;
-  return new Error(name + ": Must be a string within an array: " + JSON.stringify(unk));
-};
-var temp = null;
-E.attr_funcs = [];
-for (temp in E) {
-  if (_.isFunction(E[temp])) {
-    E.attr_funcs.push(temp);
-  }
-}
-E.opt = function (func, name) {
-  return function (v) {
-    if (v === undefined || v === null)
-      return null;
-    return func(v, name);
-  };
-};
-E.is = function (func) {
-  return function (v) {
-    var result = func(v);
-    if (result && result.message)
-      return false;
-    return !!result;
-  };
-};
-_.each(E.attr_funcs, function (name, i) {
-  E["opt_" + name] = E.opt(E[name], name);
-  E["is_" + name] = E.is(E[name]);
-});
-// ****************************************************************
-// ****************** End of Sanitize Attr Checkers ***************
-// ****************************************************************
-E.is_error = function (obj) {
-  if (!_.isObject(obj))
-    return false;
-  return obj.constructor === Error;
-};
-E.html = function (str) {
-  if (_.isArray(str)) {
-    return _.map(str, function (v, i) {
-      return E.html(v);
-    });
-  }
-  if (_.isObject(str)) {
-    var new_o = {};
-    _.each(str, function (v, k) {
-      new_o[E.html(k)] = E.html(v);
-    });
-    return new_o;
-  }
-  if (!_.isString(str))
-    return str;
-  return special( _s.escapeHTML( E.un_escape(str) ) );
-};
-E.un_escape = function (str) {
-  return _s.unescapeHTML( HTML_E.decode( str , 2) )
-};
-E.attr = function (k, v, tag) {
-  if (_.contains(['id', 'name', 'type'], k) && !v.match(VALID_HTML_ID))
-    return new Error("Invalid chars in " + tag + " " + k + ": " + v);
-  if (!k.match(VALID_HTML_ID))
-    return new Error("Invalid chars in " + tag + " attribute name: " + k);
-  var safe_name = Ok.escape(k).trim();
-  if (_.contains(['href', 'action'], k)) {
-    var safe_val  = Ok.escape_uri(v);
-    if (!safe_val)
-      return new Error('Invalid link address: ' + v);
-  } else {
-    var safe_val  = Ok.escape(v);
-  }
-  return [safe_name, safe_val];
-};
-E.attrs = function (raw_attrs, tag) {
-  var sanitized = {};
-  var err       = null;
-  _.find(raw_attrs, function (v, k) {
-    var pair = Ok.escape_attr(k, v, tag);
-    if (IS_ERROR(pair)) {
-      err = pair;
-      return pair;
-    }
-    sanitized[pair[0]] = pair[1];
-  });
-  if (err)
-    return err;
-  return sanitized;
-};
-E.to_func_calls = function (arr) {
-  var next  = null;
-  var final = [];
-  var line  = null;
-  while (arr.length) {
-    line = [arr.shift()];
-    while(arr.length && !_.isString(arr[0])) {
-      line.push(arr.shift());
-    }
-    final.push(line);
-  }
-  return final;
-};
-E.to_applet_func_calls = function (arr) {
-  var next  = null;
-  var final = [];
-  while (arr.length) {
-    var name = arr.shift();
-    if (!_.isString(name))
-      return new Error("No function specfied for arg: " + JSON.stringify(name));
-    name = name.trim();
-    var attrs = null;
-    var body  = null;
-    next = arr[0];
-    if (!_.isString(next)) {
-      if (_.isObject(next) && !_.isArray(next))
-        attrs = arr.shift();
-      next = arr[0];
-      if (_.isArray(next))
-        body  = arr.shift();
-      next = arr[0];
-      if (next && _.isArray(next))
-        return new Error(name + ": extra array argument: " + JSON.stringify(next));
-      if (next && _.isObject(next))
-        return new Error(name + ": extra attr object argument: " + JSON.stringify(next));
-      if (next && !_.isString(next))
-        return new Error(name + ": invalid argument: " + JSON.stringify(next));
-    }
-    final.push([name, attrs, body]);
-  }
-  return final;
-};