RubyGems - unicode_utils - Versions diffs - 1.2.2 → 1.3.0 - Mend

unicode_utils 1.2.2 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

data/CHANGES.txt +14 -0
data/LICENSE.txt +1 -1
data/cdata/canonical_decomposition_map +1 -1
data/cdata/case_ignorable_set +1 -1
data/cdata/casefold_c_map +1 -1
data/cdata/combining_class_map +1 -1
data/cdata/compatibility_decomposition_map +1 -1
data/cdata/composition_exclusion_set +1 -1
data/cdata/east_asian_width_property_per_cp +1 -1
data/cdata/east_asian_width_property_ranges +1 -1
data/cdata/general_category_per_cp +1 -1
data/cdata/general_category_ranges +1 -1
data/cdata/grapheme_break_property +1 -1
data/cdata/name_aliases +1 -0
data/cdata/names +731 -0
data/cdata/prop_set_lowercase +1 -1
data/cdata/prop_set_uppercase +1 -1
data/cdata/simple_lc_map +1 -1
data/cdata/simple_tc_map +1 -1
data/cdata/simple_uc_map +1 -1
data/cdata/word_break_property +1 -1
data/lib/unicode_utils.rb +6 -3
data/lib/unicode_utils/canonical_decomposition.rb +2 -2
data/lib/unicode_utils/case_ignorable_char_q.rb +1 -1
data/lib/unicode_utils/char_display_width.rb +2 -2
data/lib/unicode_utils/char_name.rb +13 -3
data/lib/unicode_utils/char_type.rb +1 -1
data/lib/unicode_utils/code_point_type.rb +70 -0
data/lib/unicode_utils/codepoint.rb +5 -5
data/lib/unicode_utils/compatibility_decomposition.rb +1 -1
data/lib/unicode_utils/debug.rb +5 -5
data/lib/unicode_utils/default_ignorable_char_q.rb +2 -2
data/lib/unicode_utils/display_width.rb +3 -3
data/lib/unicode_utils/each_grapheme.rb +2 -2
data/lib/unicode_utils/each_word.rb +1 -1
data/lib/unicode_utils/east_asian_width.rb +2 -2
data/lib/unicode_utils/gc.rb +1 -1
data/lib/unicode_utils/general_category.rb +1 -1
data/lib/unicode_utils/lowercase_char_q.rb +1 -1
data/lib/unicode_utils/name_alias.rb +46 -0
data/lib/unicode_utils/name_aliases.rb +29 -0
data/lib/unicode_utils/nfc.rb +3 -3
data/lib/unicode_utils/read_cdata.rb +36 -4
data/lib/unicode_utils/sid.rb +63 -0
data/lib/unicode_utils/simple_casefold.rb +2 -2
data/lib/unicode_utils/simple_downcase.rb +2 -2
data/lib/unicode_utils/simple_upcase.rb +2 -2
data/lib/unicode_utils/soft_dotted_char_q.rb +1 -1
data/lib/unicode_utils/titlecase.rb +1 -1
data/lib/unicode_utils/titlecase_char_q.rb +1 -1
data/lib/unicode_utils/uppercase_char_q.rb +1 -1
data/lib/unicode_utils/version.rb +10 -3
data/test/test_unicode_utils.rb +109 -5
metadata +26 -39

data/lib/unicode_utils/name_aliases.rb ADDED

@@ -0,0 +1,29 @@
+# -*- encoding: utf-8 -*-
+require "unicode_utils/name_alias"
+require "unicode_utils/read_cdata"
+module UnicodeUtils
+  NAME_ALIASES_MAP = Impl.read_name_aliases("name_aliases") # :nodoc:
+  NAME_ALIASES_MAP.default = [].freeze
+  # Get an Enumerable of formal name aliases of the given character. Returns an
+  # empty Enumerable if the character doesn't have an alias.
+  #
+  # The aliases are instances of UnicodeUtils::NameAlias, the order of the
+  # aliases in the returned Enumerable is preserved from NameAliases.txt in the
+  # Unicode Character Database.
+  #
+  # Example:
+  #
+  #   require "unicode_utils/name_aliases"
+  #   UnicodeUtils.name_aliases("\n").map(&:name) # => ["LINE FEED", "NEW LINE", "END OF LINE", "LF", "NL", "EOL"]
+  #
+  # See also: UnicodeUtils.char_name
+  def name_aliases(char)
+    NAME_ALIASES_MAP[char.ord]
+  end
+  module_function :name_aliases
+end

data/lib/unicode_utils/nfc.rb CHANGED

@@ -9,7 +9,7 @@ module UnicodeUtils
   module Impl # :nodoc:all
     COMPOSITION_EXCLUSION_SET =
-      Impl.read_codepoint_set("composition_exclusion_set")
+      Impl.read_code_point_set("composition_exclusion_set")
     CANONICAL_COMPOSITION_MAP = Hash.new.tap do |m|
       CANONICAL_DECOMPOSITION_MAP.each_pair { |comp, decomp|
@@ -124,8 +124,8 @@ module UnicodeUtils
   # Get +str+ in Normalization Form C.
   #
   # The Unicode standard has multiple representations for some
-  # characters. One representation as a single codepoint and other
-  # representation(s) as a combination of multiple codepoints. This
+  # characters. One representation as a single code point and other
+  # representation(s) as a combination of multiple code points. This
   # function "composes" these characters into the former
   # representation.
   #

data/lib/unicode_utils/read_cdata.rb CHANGED

@@ -16,11 +16,19 @@ module UnicodeUtils
       5 => :Narrow
     }.freeze
+    NAME_ALIAS_TYPE_TO_SYMBOL_MAP = {
+      1 => :correction,
+      2 => :control,
+      3 => :alternate,
+      4 => :figment,
+      5 => :abbreviation
+    }.freeze
     def self.open_cdata_file(filename, &block)
       File.open(File.join(CDATA_DIR, filename), "r:US-ASCII:-", &block)
     end
-    def self.read_codepoint_set(filename)
+    def self.read_code_point_set(filename)
       Hash.new.tap { |set|
         open_cdata_file(filename) do |input|
           buffer = "x" * 6
@@ -32,7 +40,7 @@ module UnicodeUtils
       }
     end
-    def self.read_codepoint_map(filename)
+    def self.read_code_point_map(filename)
       Hash.new.tap { |map|
         open_cdata_file(filename) do |input|
           buffer = "x" * 6
@@ -104,7 +112,7 @@ module UnicodeUtils
       }
     end
-    # Read a map whose keys are codepoints (6 hexgdigits, converted to
+    # Read a map whose keys are code points (6 hexgdigits, converted to
     # integer) and whose values are single hexdigits (converted to
     # integer).
     def self.read_hexdigit_map(filename)
@@ -122,7 +130,7 @@ module UnicodeUtils
     end
     # Returns a list (array) of pairs (two element Arrays) of Range
-    # (codepoints) and associated integer value.
+    # (code points) and associated integer value.
     def self.read_range_to_hexdigit_list(filename)
       Array.new.tap { |list|
         open_cdata_file(filename) do |input|
@@ -208,6 +216,30 @@ module UnicodeUtils
       }
     end
+    def self.read_name_aliases(filename)
+      Hash.new.tap { |map|
+        open_cdata_file(filename) do |input|
+          cp_buffer = "x" * 6
+          cp_buffer.force_encoding(Encoding::US_ASCII)
+          ac_buffer = "x" * 1
+          ac_buffer.force_encoding(Encoding::US_ASCII)
+          at_buffer = "x" * 1
+          at_buffer.force_encoding(Encoding::US_ASCII)
+          al_buffer = "x" * 2
+          al_buffer.force_encoding(Encoding::US_ASCII)
+          while input.read(6, cp_buffer)
+            aliases = Array.new(input.read(1, ac_buffer).to_i(16))
+            0.upto(aliases.length - 1) { |i|
+              type = NAME_ALIAS_TYPE_TO_SYMBOL_MAP[input.read(1, at_buffer).to_i(16)]
+              name = input.read(input.read(2, al_buffer).to_i(16))
+              aliases[i] = NameAlias.new(name.freeze, type)
+            }
+            map[cp_buffer.to_i(16)] = aliases.freeze
+          end
+        end
+      }
+    end
   end
 end

data/lib/unicode_utils/sid.rb ADDED

@@ -0,0 +1,63 @@
+# -*- encoding: utf-8 -*-
+require "unicode_utils/name_aliases"
+require "unicode_utils/code_point_type"
+module UnicodeUtils
+  CP_PREFERRED_ALIAS_STRING_MAP = Hash.new.tap do |map|
+    NAME_ALIASES_MAP.each { |cp, aliases|
+      al =
+        (aliases.find { |al| al.type == :correction } ||
+         aliases.find { |al| al.type == :control } ||
+         aliases.find { |al| al.type == :figment } ||
+         aliases.find { |al| al.type == :alternate })
+      map[cp] = al.name if al
+    }
+  end #:nodoc:
+  # Returns a unique string identifier for every code point. Returns
+  # nil if +code_point+ is not in the Unicode codespace. +code_point+
+  # must be an Integer.
+  #
+  # The returned string identifier is either the non-empty Name
+  # property value of +code_point+, a non-empty Name_Alias string
+  # property value of +code_point+, or the code point label as
+  # described by section "Code Point Labels" in chapter 4.8 "Name" of
+  # the Unicode standard.
+  #
+  # If the returned identifier starts with "<", it is a code point
+  # label and it ends with ">". Otherwise it is the normative name or
+  # a formal alias string.
+  #
+  # The exact name/alias/label selection algorithm may change even in
+  # minor UnicodeUtils releases, but overall behaviour will stay the
+  # same in spirit.
+  #
+  # The selection process in this version of UnicodeUtils is:
+  # 1. Use an alias of type :correction, :control, :figment or
+  #    :alternate (with listed precendence) if available
+  # 2. Use the Unicode Name property value if it is not empty
+  # 3. Construct a code point label in angle brackets.
+  #
+  # Examples:
+  #
+  #     require "unicode_utils/sid"
+  #
+  #     U.sid 0xa     # => "LINE FEED"
+  #     U.sid 0x0     # => "NULL"
+  #     U.sid 0xfeff  # => "BYTE ORDER MARK"
+  #     U.sid 0xe000  # => "<private-use-E000>"
+  #     U.sid 0x61    # => "LATIN SMALL LETTER A"
+  #     U.sid -1      # => nil
+  def sid(code_point)
+    s = CP_PREFERRED_ALIAS_STRING_MAP[code_point] and return s
+    cn = UnicodeUtils.char_name(code_point)
+    return cn if cn && cn !~ /\A(\<|\z)/
+    ct = UnicodeUtils.code_point_type(code_point) or return nil
+    ts = ct.to_s.downcase.gsub('_', '-')
+    "<#{ts}-#{code_point.to_s(16).upcase.rjust(4, '0')}>"
+  end
+  module_function :sid
+end

data/lib/unicode_utils/simple_casefold.rb CHANGED

@@ -4,9 +4,9 @@ require "unicode_utils/read_cdata"
 module UnicodeUtils
-  CASEFOLD_C_MAP = Impl.read_codepoint_map("casefold_c_map") # :nodoc:
+  CASEFOLD_C_MAP = Impl.read_code_point_map("casefold_c_map") # :nodoc:
-  CASEFOLD_S_MAP = Impl.read_codepoint_map("casefold_s_map") # :nodoc:
+  CASEFOLD_S_MAP = Impl.read_code_point_map("casefold_s_map") # :nodoc:
   # Perform simple case folding. Contrary to full case folding, this
   # uses only one to one mappings, so that the length of the returned

data/lib/unicode_utils/simple_downcase.rb CHANGED

@@ -4,9 +4,9 @@ require "unicode_utils/read_cdata"
 module UnicodeUtils
-  SIMPLE_DOWNCASE_MAP = Impl.read_codepoint_map("simple_lc_map") # :nodoc:
+  SIMPLE_DOWNCASE_MAP = Impl.read_code_point_map("simple_lc_map") # :nodoc:
-  # Map each codepoint in +str+ that has a single codepoint
+  # Map each code point in +str+ that has a single code point
   # lowercase-mapping to that lowercase mapping. The returned string
   # has the same length as the original string.
   #

data/lib/unicode_utils/simple_upcase.rb CHANGED

@@ -4,9 +4,9 @@ require "unicode_utils/read_cdata"
 module UnicodeUtils
-  SIMPLE_UPCASE_MAP = Impl.read_codepoint_map("simple_uc_map") # :nodoc:
+  SIMPLE_UPCASE_MAP = Impl.read_code_point_map("simple_uc_map") # :nodoc:
-  # Map each codepoint in +str+ that has a single codepoint
+  # Map each code point in +str+ that has a single code point
   # uppercase-mapping to that uppercase mapping. The returned string
   # has the same length as the original string.
   #

data/lib/unicode_utils/soft_dotted_char_q.rb CHANGED

@@ -4,7 +4,7 @@ require "unicode_utils/read_cdata"
 module UnicodeUtils
-  SOFT_DOTTED_SET = Impl.read_codepoint_set("soft_dotted_set") # :nodoc:
+  SOFT_DOTTED_SET = Impl.read_code_point_set("soft_dotted_set") # :nodoc:
   # Returns true if the given character has the Unicode property
   # Soft_Dotted.

data/lib/unicode_utils/titlecase.rb CHANGED

@@ -8,7 +8,7 @@ require "unicode_utils/downcase"
 module UnicodeUtils
-  SIMPLE_TITLECASE_MAP = Impl.read_codepoint_map("simple_tc_map") # :nodoc:
+  SIMPLE_TITLECASE_MAP = Impl.read_code_point_map("simple_tc_map") # :nodoc:
   SPECIAL_TITLECASE_MAP = Impl.read_multivalued_map("special_tc_map") # :nodoc:
   # Convert the first cased character after each word boundary to

data/lib/unicode_utils/titlecase_char_q.rb CHANGED

@@ -4,7 +4,7 @@ require "unicode_utils/read_cdata"
 module UnicodeUtils
-  TITLECASE_LETTER_SET = Impl.read_codepoint_set("cat_set_titlecase") # :nodoc:
+  TITLECASE_LETTER_SET = Impl.read_code_point_set("cat_set_titlecase") # :nodoc:
   # True if the given character has the General_Category
   # Titlecase_Letter (Lt).

data/lib/unicode_utils/uppercase_char_q.rb CHANGED

@@ -4,7 +4,7 @@ require "unicode_utils/read_cdata"
 module UnicodeUtils
-  PROP_UPPERCASE_SET = Impl.read_codepoint_set("prop_set_uppercase") # :nodoc:
+  PROP_UPPERCASE_SET = Impl.read_code_point_set("prop_set_uppercase") # :nodoc:
   # True if the given character has the Unicode property Uppercase.
   def uppercase_char?(char)

data/lib/unicode_utils/version.rb CHANGED

@@ -4,13 +4,20 @@ module UnicodeUtils
   # Corresponds to the unicode_utils gem version.
   #
+  # Conforms to Semantic Versioning as documented at semver.org.
+  #
+  # Summary:
   # MAJOR.MINOR.PATCHLEVEL
   # - A backwards incompatible change causes a change in MAJOR
   # - New features or non-bugfix improvals cause a change in MINOR
   # - Bugfixes increase only PATCHLEVEL.
+  # - Pre-release versions append more info after a dash.
+  VERSION = "1.3.0"
+  # The version of Unicode implemented by this version of UnicodeUtils.
   #
-  # A release always has an even PATCHLEVEL. PATCHLEVEL is uneven
-  # during development.
-  VERSION = "1.2.2"
+  #   require "unicode_utils/version"
+  #   puts "Unicode #{UnicodeUtils::UNICODE_VERSION}"
+  UNICODE_VERSION = "6.1.0"
 end

data/test/test_unicode_utils.rb CHANGED

@@ -8,6 +8,10 @@ require "unicode_utils"
 # Fast tests for allmost all UnicodeUtils functions.
 class TestUnicodeUtils < Test::Unit::TestCase
+  def test_unicode_version
+    assert_match /\A\d+\.\d+\.\d+\z/, UnicodeUtils::UNICODE_VERSION
+  end
   def test_name
     assert_equal "LATIN SMALL LETTER F", UnicodeUtils.char_name("f")
     assert_equal Encoding::US_ASCII, UnicodeUtils.char_name("f").encoding
@@ -421,19 +425,119 @@ class TestUnicodeUtils < Test::Unit::TestCase
     io = StringIO.new
     UnicodeUtils.debug("", io: io)
     assert_equal <<-'EOF', io.string
- Char | Ordinal | Name | General Category | UTF-8
-------+---------+------+------------------+-------
+ Char | Ordinal | Sid | General Category | UTF-8
+------+---------+-----+------------------+-------
     EOF
     io = StringIO.new
     UnicodeUtils.debug("一 \u{100000}\n", io: io)
     assert_equal <<-'EOF', io.string
- Char | Ordinal | Name                       | General Category | UTF-8
+ Char | Ordinal | Sid                        | General Category | UTF-8
 ------+---------+----------------------------+------------------+-------------
  "一" |    4E00 | CJK UNIFIED IDEOGRAPH-4E00 | Other_Letter     | E4 B8 80
  " "  |      20 | SPACE                      | Space_Separator  | 20
- N/A  |  100000 | N/A                        | Private_Use      | F4 80 80 80
- "\n" |       A | <control>                  | Control          | 0A
+ N/A  |  100000 | <private-use-100000>       | Private_Use      | F4 80 80 80
+ "\n" |       A | LINE FEED                  | Control          | 0A
     EOF
   end
+  def test_code_point_type
+    assert_equal :Graphic, UnicodeUtils.code_point_type("A")
+    assert_equal :Graphic, UnicodeUtils.code_point_type("a")
+    assert_equal :Graphic, UnicodeUtils.code_point_type(0x1cb)
+    assert_equal :Graphic, UnicodeUtils.code_point_type(0x2b5)
+    assert_equal :Graphic, UnicodeUtils.code_point_type(0x10923)
+    assert_equal :Graphic, UnicodeUtils.code_point_type(0x5a0)
+    assert_equal :Graphic, UnicodeUtils.code_point_type(0x93f)
+    assert_equal :Graphic, UnicodeUtils.code_point_type(0x20dd)
+    assert_equal :Graphic, UnicodeUtils.code_point_type(0xa901)
+    assert_equal :Graphic, UnicodeUtils.code_point_type(0x10144)
+    assert_equal :Graphic, UnicodeUtils.code_point_type(0x10917)
+    assert_equal :Graphic, UnicodeUtils.code_point_type(0x5f)
+    assert_equal :Graphic, UnicodeUtils.code_point_type(0x2011)
+    assert_equal :Graphic, UnicodeUtils.code_point_type(0x2329)
+    assert_equal :Graphic, UnicodeUtils.code_point_type(0xfe38)
+    assert_equal :Graphic, UnicodeUtils.code_point_type(0x201c)
+    assert_equal :Graphic, UnicodeUtils.code_point_type(0x201d)
+    assert_equal :Graphic, UnicodeUtils.code_point_type(0x2e10)
+    assert_equal :Graphic, UnicodeUtils.code_point_type(0xff0b)
+    assert_equal :Graphic, UnicodeUtils.code_point_type(0xa3)
+    assert_equal :Graphic, UnicodeUtils.code_point_type(0x2c2)
+    assert_equal :Graphic, UnicodeUtils.code_point_type(0x60f)
+    assert_equal :Graphic, UnicodeUtils.code_point_type(0x2001)
+    assert_equal :Format, UnicodeUtils.code_point_type(0x2028)
+    assert_equal :Format, UnicodeUtils.code_point_type(0x2029)
+    assert_equal :Control, UnicodeUtils.code_point_type(0x0)
+    assert_equal :Format, UnicodeUtils.code_point_type(0x70f)
+    assert_equal :Surrogate, UnicodeUtils.code_point_type(0xdb82)
+    assert_equal :Private_Use, UnicodeUtils.code_point_type(0xf1020)
+    assert_equal :Private_Use, UnicodeUtils.code_point_type(0x10fffd)
+    assert_equal :Noncharacter, UnicodeUtils.code_point_type(0x10ffff)
+    assert_equal :Noncharacter, UnicodeUtils.code_point_type(0xfffe)
+    assert_equal :Noncharacter, UnicodeUtils.code_point_type(0xffff)
+    assert_equal :Noncharacter, UnicodeUtils.code_point_type(0xbfffe)
+    assert_equal :Noncharacter, UnicodeUtils.code_point_type(0xbffff)
+    assert_equal :Reserved, UnicodeUtils.code_point_type(0x380)
+    assert_equal :Reserved, UnicodeUtils.code_point_type(0xeeb)
+    assert_equal :Reserved, UnicodeUtils.code_point_type(0xfff)
+    assert_equal :Reserved, UnicodeUtils.code_point_type(0x7fffd)
+    assert_equal :Reserved, UnicodeUtils.code_point_type(0xeffef)
+    ### above is at least one assertion for every general category ###
+    assert_equal nil, UnicodeUtils.code_point_type(-1)
+    assert_equal nil, UnicodeUtils.code_point_type(0x110000)
+  end
+  def test_name_aliases
+    assert_equal [UnicodeUtils::NameAlias.new("NULL", :control),
+                  UnicodeUtils::NameAlias.new("NUL", :abbreviation)],
+                 UnicodeUtils.name_aliases(0x0)
+    assert_equal [UnicodeUtils::NameAlias.new("LATIN CAPITAL LETTER GHA", :correction)],
+                 UnicodeUtils.name_aliases(0x1a2)
+    assert_equal [UnicodeUtils::NameAlias.new("BYTE ORDER MARK", :alternate),
+                  UnicodeUtils::NameAlias.new("BOM", :abbreviation),
+                  UnicodeUtils::NameAlias.new("ZWNBSP", :abbreviation)],
+                 UnicodeUtils.name_aliases(0xfeff)
+    assert_equal [UnicodeUtils::NameAlias.new("PADDING CHARACTER", :figment),
+                  UnicodeUtils::NameAlias.new("PAD", :abbreviation)],
+                 UnicodeUtils.name_aliases(0x80)
+    assert_equal [UnicodeUtils::NameAlias.new("VS256", :abbreviation)],
+                 UnicodeUtils.name_aliases(0xe01ef)
+    assert_equal [UnicodeUtils::NameAlias.new("LINE FEED", :control),
+                  UnicodeUtils::NameAlias.new("NEW LINE", :control),
+                  UnicodeUtils::NameAlias.new("END OF LINE", :control),
+                  UnicodeUtils::NameAlias.new("LF", :abbreviation),
+                  UnicodeUtils::NameAlias.new("NL", :abbreviation),
+                  UnicodeUtils::NameAlias.new("EOL", :abbreviation)],
+                 UnicodeUtils.name_aliases(0xa)
+    assert_equal [UnicodeUtils::NameAlias.new("CHARACTER TABULATION", :control),
+                  UnicodeUtils::NameAlias.new("HORIZONTAL TABULATION", :control),
+                  UnicodeUtils::NameAlias.new("HT", :abbreviation),
+                  UnicodeUtils::NameAlias.new("TAB", :abbreviation)],
+                 UnicodeUtils.name_aliases("\t")
+    assert_equal [],
+                 UnicodeUtils.name_aliases("a")
+  end
+  def test_sid
+    assert_equal nil, UnicodeUtils.sid(-1)
+    assert_equal "NULL", UnicodeUtils.sid(0x0)
+    assert_equal "LATIN CAPITAL LETTER GHA", UnicodeUtils.sid(0x1a2)
+    assert_equal "LINE FEED", UnicodeUtils.sid(0xa)
+    assert_equal "PADDING CHARACTER", UnicodeUtils.sid(0x80)
+    assert_equal "BYTE ORDER MARK", UnicodeUtils.sid(0xfeff)
+    assert_equal "SPACE", UnicodeUtils.sid(0x20)
+    assert_equal "<reserved-0380>", UnicodeUtils.sid(0x380)
+    assert_equal "<surrogate-D800>", UnicodeUtils.sid(0xd800)
+    assert_equal "<private-use-F0000>", UnicodeUtils.sid(0xf0000)
+    assert_equal "<private-use-10FFFD>", UnicodeUtils.sid(0x10fffd)
+    assert_equal "<noncharacter-10FFFF>", UnicodeUtils.sid(UnicodeUtils::Codepoint::RANGE.end)
+    assert_equal nil, UnicodeUtils.sid(UnicodeUtils::Codepoint::RANGE.end + 1)
+  end
 end

metadata CHANGED

@@ -1,34 +1,25 @@
---- !ruby/object:Gem::Specification
+--- !ruby/object:Gem::Specification
 name: unicode_utils
-version: !ruby/object:Gem::Version
-  prerelease: false
-  segments:
-  - 1
-  - 2
-  - 2
-  version: 1.2.2
+version: !ruby/object:Gem::Version
+  version: 1.3.0
+  prerelease:
 platform: ruby
-authors:
+authors:
 - Stefan Lang
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2011-11-27 00:00:00 +01:00
-default_executable:
+date: 2012-03-07 00:00:00.000000000 Z
 dependencies: []
 description:
 email: langstefan@gmx.at
 executables: []
 extensions: []
-extra_rdoc_files:
+extra_rdoc_files:
 - README.txt
 - INSTALL.txt
 - CHANGES.txt
-files:
+files:
 - lib/unicode_utils.rb
 - lib/unicode_utils/conditional_casing.rb
 - lib/unicode_utils/version.rb
@@ -43,8 +34,12 @@ files:
 - lib/unicode_utils/general_category.rb
 - lib/unicode_utils/uppercase_char_q.rb
 - lib/unicode_utils/upcase.rb
+- lib/unicode_utils/sid.rb
 - lib/unicode_utils/u.rb
+- lib/unicode_utils/code_point_type.rb
 - lib/unicode_utils/hangul_syllable_decomposition.rb
+- lib/unicode_utils/name_aliases.rb
+- lib/unicode_utils/name_alias.rb
 - lib/unicode_utils/soft_dotted_char_q.rb
 - lib/unicode_utils/lowercase_char_q.rb
 - lib/unicode_utils/read_cdata.rb
@@ -87,6 +82,7 @@ files:
 - cdata/general_category_aliases
 - cdata/canonical_decomposition_map
 - cdata/cat_set_titlecase
+- cdata/name_aliases
 - cdata/casefold_f_map
 - cdata/special_uc_map
 - cdata/special_tc_map
@@ -107,40 +103,31 @@ files:
 - INSTALL.txt
 - LICENSE.txt
 - CHANGES.txt
-has_rdoc: true
 homepage: http://github.com/lang/unicode_utils
 licenses: []
 post_install_message:
-rdoc_options:
+rdoc_options:
 - --main=README.txt
 - --charset=UTF-8
-require_paths:
+require_paths:
 - lib
-required_ruby_version: !ruby/object:Gem::Requirement
+required_ruby_version: !ruby/object:Gem::Requirement
   none: false
-  requirements:
-  - - ">="
-    - !ruby/object:Gem::Version
-      segments:
-      - 1
-      - 9
-      - 1
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
       version: 1.9.1
-required_rubygems_version: !ruby/object:Gem::Requirement
+required_rubygems_version: !ruby/object:Gem::Requirement
   none: false
-  requirements:
-  - - ">="
-    - !ruby/object:Gem::Version
-      segments:
-      - 0
-      version: "0"
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
 requirements: []
 rubyforge_project: unicode-utils
-rubygems_version: 1.3.7
+rubygems_version: 1.8.11
 signing_key:
 specification_version: 3
 summary: additional Unicode aware functions for Ruby 1.9
-test_files:
+test_files:
 - test/test_unicode_utils.rb