RubyGems - htmlentities - Versions diffs - 3.0.0 → 3.0.1 - Mend

htmlentities 3.0.0 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

data/CHANGES CHANGED

@@ -1,29 +1,32 @@
+== 3.0.1 (2005-04-08)
+* Improved documentation.
 == 3.0.0 (2005-04-08)
 * Changed licence to MIT due to confusion with previous 'Fair' licence (my
   intention was to be liberal, not obscure).
 * Moved basic functionality out of String class; for previous behaviour,
-  require 'htmlentities/string'
-* Changed version numbering scheme
-* Now available as a Gem
+  require 'htmlentities/string'.
+* Changed version numbering scheme.
+* Now available as a Gem.
 == 2.2 (2005-11-07)
-* Important bug fixes -- thanks to Moonwolf
+* Important bug fixes -- thanks to Moonwolf.
 * Decoding hexadecimal entities now accepts 'f' as a hex digit. (D'oh!)
-* Decimal decoding edge cases addressed
-* Test cases added
+* Decimal decoding edge cases addressed.
+* Test cases added.
 == 2.1 (2005-10-31)
-* Removed some unnecessary code in basic entity encoding
+* Removed some unnecessary code in basic entity encoding.
 * Improved handling of encoding: commands are now automatically sorted, so the
-  user doesn't have to worry about their order
-* Now using setup.rb
-* Tests moved to separate file
+  user doesn't have to worry about their order.
+* Now using setup.rb.
+* Tests moved to separate file.
 == 2.0 (2005-08-23)
-* Added encoding to entities
-* Decoding interface unchanged
-* Fixed a bug with handling high codepoints
+* Added encoding to entities.
+* Decoding interface unchanged.
+* Fixed a bug with handling high codepoints.
 == 1.0 (2005-08-03)
-* Initial release
-* Decoding only
+* Initial release.
+* Decoding only.

data/lib/htmlentities.rb CHANGED

@@ -2,85 +2,84 @@
 # HTML entity encoding and decoding for Ruby
 #
-module HTMLEntities # :nodoc:
+module HTMLEntities
   class InstructionError < RuntimeError
   end
-  #
-  # MAP is a hash of all the HTML entities I could discover, as taken
-  # from the w3schools page on the subject:
-  # http://www.w3schools.com/html/html_entitiesref.asp
-  # The format is 'entity name' => codepoint where entity name is given
-  # without the surrounding ampersand and semicolon.
-  #
-  MAP = {
-    'quot'      => 34,        'apos'      => 39,        'amp'       => 38,
-    'lt'        => 60,        'gt'        => 62,        'nbsp'      => 160,
-    'iexcl'     => 161,       'curren'    => 164,       'cent'      => 162,
-    'pound'     => 163,       'yen'       => 165,       'brvbar'    => 166,
-    'sect'      => 167,       'uml'       => 168,       'copy'      => 169,
-    'ordf'      => 170,       'laquo'     => 171,       'not'       => 172,
-    'shy'       => 173,       'reg'       => 174,       'trade'     => 8482,
-    'macr'      => 175,       'deg'       => 176,       'plusmn'    => 177,
-    'sup2'      => 178,       'sup3'      => 179,       'acute'     => 180,
-    'micro'     => 181,       'para'      => 182,       'middot'    => 183,
-    'cedil'     => 184,       'sup1'      => 185,       'ordm'      => 186,
-    'raquo'     => 187,       'frac14'    => 188,       'frac12'    => 189,
-    'frac34'    => 190,       'iquest'    => 191,       'times'     => 215,
-    'divide'    => 247,       'Agrave'    => 192,       'Aacute'    => 193,
-    'Acirc'     => 194,       'Atilde'    => 195,       'Auml'      => 196,
-    'Aring'     => 197,       'AElig'     => 198,       'Ccedil'    => 199,
-    'Egrave'    => 200,       'Eacute'    => 201,       'Ecirc'     => 202,
-    'Euml'      => 203,       'Igrave'    => 204,       'Iacute'    => 205,
-    'Icirc'     => 206,       'Iuml'      => 207,       'ETH'       => 208,
-    'Ntilde'    => 209,       'Ograve'    => 210,       'Oacute'    => 211,
-    'Ocirc'     => 212,       'Otilde'    => 213,       'Ouml'      => 214,
-    'Oslash'    => 216,       'Ugrave'    => 217,       'Uacute'    => 218,
-    'Ucirc'     => 219,       'Uuml'      => 220,       'Yacute'    => 221,
-    'THORN'     => 222,       'szlig'     => 223,       'agrave'    => 224,
-    'aacute'    => 225,       'acirc'     => 226,       'atilde'    => 227,
-    'auml'      => 228,       'aring'     => 229,       'aelig'     => 230,
-    'ccedil'    => 231,       'egrave'    => 232,       'eacute'    => 233,
-    'ecirc'     => 234,       'euml'      => 235,       'igrave'    => 236,
-    'iacute'    => 237,       'icirc'     => 238,       'iuml'      => 239,
-    'eth'       => 240,       'ntilde'    => 241,       'ograve'    => 242,
-    'oacute'    => 243,       'ocirc'     => 244,       'otilde'    => 245,
-    'ouml'      => 246,       'oslash'    => 248,       'ugrave'    => 249,
-    'uacute'    => 250,       'ucirc'     => 251,       'uuml'      => 252,
-    'yacute'    => 253,       'thorn'     => 254,       'yuml'      => 255,
-    'OElig'     => 338,       'oelig'     => 339,       'Scaron'    => 352,
-    'scaron'    => 353,       'Yuml'      => 376,       'circ'      => 710,
-    'tilde'     => 732,       'ensp'      => 8194,      'emsp'      => 8195,
-    'thinsp'    => 8201,      'zwnj'      => 8204,      'zwj'       => 8205,
-    'lrm'       => 8206,      'rlm'       => 8207,      'ndash'     => 8211,
-    'mdash'     => 8212,      'lsquo'     => 8216,      'rsquo'     => 8217,
-    'sbquo'     => 8218,      'ldquo'     => 8220,      'rdquo'     => 8221,
-    'bdquo'     => 8222,      'dagger'    => 8224,      'Dagger'    => 8225,
-    'hellip'    => 8230,      'permil'    => 8240,      'lsaquo'    => 8249,
-    'rsaquo'    => 8250,      'euro'      => 8364
-  }
+  module Data #:nodoc:
-  MIN_LENGTH = MAP.keys.map{ |a| a.length }.min
-  MAX_LENGTH = MAP.keys.map{ |a| a.length }.max
-  # Precompile the regexp
-  NAMED_ENTITY_REGEXP =
-    /&([a-z]{#{HTMLEntities::MIN_LENGTH},#{HTMLEntities::MAX_LENGTH}});/i
+    #
+    # MAP is a hash of all the HTML entities I could discover, as taken
+    # from the w3schools page on the subject:
+    # http://www.w3schools.com/html/html_entitiesref.asp
+    # The format is 'entity name' => codepoint where entity name is given
+    # without the surrounding ampersand and semicolon.
+    #
+    MAP = {
+      'quot'      => 34,        'apos'      => 39,        'amp'       => 38,
+      'lt'        => 60,        'gt'        => 62,        'nbsp'      => 160,
+      'iexcl'     => 161,       'curren'    => 164,       'cent'      => 162,
+      'pound'     => 163,       'yen'       => 165,       'brvbar'    => 166,
+      'sect'      => 167,       'uml'       => 168,       'copy'      => 169,
+      'ordf'      => 170,       'laquo'     => 171,       'not'       => 172,
+      'shy'       => 173,       'reg'       => 174,       'trade'     => 8482,
+      'macr'      => 175,       'deg'       => 176,       'plusmn'    => 177,
+      'sup2'      => 178,       'sup3'      => 179,       'acute'     => 180,
+      'micro'     => 181,       'para'      => 182,       'middot'    => 183,
+      'cedil'     => 184,       'sup1'      => 185,       'ordm'      => 186,
+      'raquo'     => 187,       'frac14'    => 188,       'frac12'    => 189,
+      'frac34'    => 190,       'iquest'    => 191,       'times'     => 215,
+      'divide'    => 247,       'Agrave'    => 192,       'Aacute'    => 193,
+      'Acirc'     => 194,       'Atilde'    => 195,       'Auml'      => 196,
+      'Aring'     => 197,       'AElig'     => 198,       'Ccedil'    => 199,
+      'Egrave'    => 200,       'Eacute'    => 201,       'Ecirc'     => 202,
+      'Euml'      => 203,       'Igrave'    => 204,       'Iacute'    => 205,
+      'Icirc'     => 206,       'Iuml'      => 207,       'ETH'       => 208,
+      'Ntilde'    => 209,       'Ograve'    => 210,       'Oacute'    => 211,
+      'Ocirc'     => 212,       'Otilde'    => 213,       'Ouml'      => 214,
+      'Oslash'    => 216,       'Ugrave'    => 217,       'Uacute'    => 218,
+      'Ucirc'     => 219,       'Uuml'      => 220,       'Yacute'    => 221,
+      'THORN'     => 222,       'szlig'     => 223,       'agrave'    => 224,
+      'aacute'    => 225,       'acirc'     => 226,       'atilde'    => 227,
+      'auml'      => 228,       'aring'     => 229,       'aelig'     => 230,
+      'ccedil'    => 231,       'egrave'    => 232,       'eacute'    => 233,
+      'ecirc'     => 234,       'euml'      => 235,       'igrave'    => 236,
+      'iacute'    => 237,       'icirc'     => 238,       'iuml'      => 239,
+      'eth'       => 240,       'ntilde'    => 241,       'ograve'    => 242,
+      'oacute'    => 243,       'ocirc'     => 244,       'otilde'    => 245,
+      'ouml'      => 246,       'oslash'    => 248,       'ugrave'    => 249,
+      'uacute'    => 250,       'ucirc'     => 251,       'uuml'      => 252,
+      'yacute'    => 253,       'thorn'     => 254,       'yuml'      => 255,
+      'OElig'     => 338,       'oelig'     => 339,       'Scaron'    => 352,
+      'scaron'    => 353,       'Yuml'      => 376,       'circ'      => 710,
+      'tilde'     => 732,       'ensp'      => 8194,      'emsp'      => 8195,
+      'thinsp'    => 8201,      'zwnj'      => 8204,      'zwj'       => 8205,
+      'lrm'       => 8206,      'rlm'       => 8207,      'ndash'     => 8211,
+      'mdash'     => 8212,      'lsquo'     => 8216,      'rsquo'     => 8217,
+      'sbquo'     => 8218,      'ldquo'     => 8220,      'rdquo'     => 8221,
+      'bdquo'     => 8222,      'dagger'    => 8224,      'Dagger'    => 8225,
+      'hellip'    => 8230,      'permil'    => 8240,      'lsaquo'    => 8249,
+      'rsaquo'    => 8250,      'euro'      => 8364
+    }
-  # Reverse map for converting characters to named entities
-  REVERSE_MAP = MAP.invert
+    MIN_LENGTH = MAP.keys.map{ |a| a.length }.min
+    MAX_LENGTH = MAP.keys.map{ |a| a.length }.max
+    NAMED_ENTITY_REGEXP = /&([a-z]{#{MIN_LENGTH},#{MAX_LENGTH}});/i
+    REVERSE_MAP = MAP.invert
-  BASIC_ENTITY_REGEXP = /[<>'"&]/
+    BASIC_ENTITY_REGEXP = /[<>'"&]/
-  UTF8_NON_ASCII_REGEXP = /[\x00-\x1f]|[\xc0-\xfd][\x80-\xbf]+/
+    UTF8_NON_ASCII_REGEXP = /[\x00-\x1f]|[\xc0-\xfd][\x80-\xbf]+/
-  ENCODE_ENTITIES_COMMAND_ORDER = {
-    :basic => 0,
-    :named => 1,
-    :decimal => 2,
-    :hexadecimal => 3
-  }
+    ENCODE_ENTITIES_COMMAND_ORDER = {
+      :basic => 0,
+      :named => 1,
+      :decimal => 2,
+      :hexadecimal => 3
+    }
+  end
   #
   # Decode XML and HTML 4.01 entities in a string into their UTF-8
@@ -91,8 +90,8 @@ module HTMLEntities # :nodoc:
   # Unknown named entities are not converted
   #
   def decode_entities(string)
-    return string.gsub(NAMED_ENTITY_REGEXP) {
-      (cp = MAP[$1]) ? [cp].pack('U') : $&
+    return string.gsub(Data::NAMED_ENTITY_REGEXP) {
+      (cp = Data::MAP[$1]) ? [cp].pack('U') : $&
     }.gsub(/&#([0-9]{1,7});|&#x([0-9a-f]{1,6});/i) {
       $1 ? [$1.to_i].pack('U') : [$2.to_i(16)].pack('U')
     }
@@ -129,7 +128,7 @@ module HTMLEntities # :nodoc:
       instructions = [:basic]
     else
       instructions = instructions.sort_by { |instruction|
-        ENCODE_ENTITIES_COMMAND_ORDER[instruction] ||
+        Data::ENCODE_ENTITIES_COMMAND_ORDER[instruction] ||
         (raise InstructionError, "unknown encode_entities command `#{instruction.inspect}'")
       }
     end
@@ -137,23 +136,23 @@ module HTMLEntities # :nodoc:
       case instruction
       when :basic
         # Handled as basic ASCII
-        output = (output || string).gsub(BASIC_ENTITY_REGEXP) {
+        output = (output || string).gsub(Data::BASIC_ENTITY_REGEXP) {
           # It's safe to use the simpler [0] here because we know
           # that the basic entities are ASCII.
-          '&' << REVERSE_MAP[$&[0]] << ';'
+          '&' << Data::REVERSE_MAP[$&[0]] << ';'
         }
       when :named
         # Test everything except printable ASCII
-        output = (output || string).gsub(UTF8_NON_ASCII_REGEXP) {
+        output = (output || string).gsub(Data::UTF8_NON_ASCII_REGEXP) {
           cp = $&.unpack('U')[0]
-          (e = REVERSE_MAP[cp]) ?  "&#{e};" : $&
+          (e = Data::REVERSE_MAP[cp]) ?  "&#{e};" : $&
         }
       when :decimal
-        output = (output || string).gsub(UTF8_NON_ASCII_REGEXP) {
+        output = (output || string).gsub(Data::UTF8_NON_ASCII_REGEXP) {
           "&##{$&.unpack('U')[0]};"
         }
       when :hexadecimal
-        output = (output || string).gsub(UTF8_NON_ASCII_REGEXP) {
+        output = (output || string).gsub(Data::UTF8_NON_ASCII_REGEXP) {
           "&#x#{$&.unpack('U')[0].to_s(16)};"
         }
       end

data/lib/htmlentities/string.rb CHANGED

@@ -1,15 +1,24 @@
 require 'htmlentities'
 #
-# This library extends the String class with methods to allow encoding and decoding of
+# This file extends the String class with methods to allow encoding and decoding of
 # HTML/XML entities from/to their corresponding UTF-8 codepoints.
 #
 class String
+  #
+  # Decode XML and HTML 4.01 entities in a string into their UTF-8
+  # equivalents.
+  #
   def decode_entities
     return HTMLEntities.decode_entities(self)
   end
+  #
+  # Encode codepoints in a string into their corresponding entities. See
+  # the documentation of HTMLEntities.encode_entities for a list of possible
+  # instructions.
+  #
   def encode_entities(*instructions)
     return HTMLEntities.encode_entities(self, *instructions)
   end

metadata CHANGED

@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
 specification_version: 1
 name: htmlentities
 version: !ruby/object:Gem::Version
-  version: 3.0.0
-date: 2006-04-08 00:00:00 +01:00
+  version: 3.0.1
+date: 2006-04-09 00:00:00 +01:00
 summary: A module for encoding and decoding of HTML/XML entities from/to their corresponding UTF-8 codepoints. Optional String class extension for same.
 require_paths:
 - lib