RubyGems - ting - Versions diffs - 0.3.0 → 0.9.0 - Mend

ting 0.3.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

checksums.yaml +7 -0
data/.gitignore +5 -0
data/.travis.yml +13 -0
data/Gemfile +9 -0
data/Gemfile.lock +235 -0
data/LICENSE.txt +674 -0
data/{README.rdoc → README.md} +43 -35
data/Rakefile +28 -15
data/TODO +16 -15
data/examples/hello.rb +12 -12
data/lib/ting.rb +36 -61
data/lib/ting/conversion.rb +6 -5
data/lib/ting/conversions.rb +88 -80
data/lib/ting/conversions/hanyu.rb +5 -9
data/lib/ting/converter.rb +30 -0
data/lib/ting/data/comparison.csv +410 -410
data/lib/ting/data/final.csv +12 -10
data/lib/ting/data/initial.csv +8 -7
data/lib/ting/data/paladiy.txt +421 -421
data/lib/ting/data/rules.yaml +38 -27
data/lib/ting/data/valid_pinyin.yaml +454 -453
data/lib/ting/exception.rb +14 -17
data/lib/ting/groundwork.rb +181 -177
data/lib/ting/procable.rb +7 -0
data/lib/ting/reader.rb +27 -0
data/lib/ting/string.rb +0 -15
data/lib/ting/tones.rb +65 -65
data/lib/ting/tones/accents.rb +75 -69
data/lib/ting/tones/ipa.rb +1 -1
data/lib/ting/tones/no_tones.rb +7 -7
data/lib/ting/tones/numbers.rb +25 -25
data/lib/ting/tones/supernum.rb +1 -1
data/lib/ting/version.rb +1 -1
data/lib/ting/writer.rb +23 -0
data/spec/jruby_csv_spec.rb +78 -0
data/spec/spec_helper.rb +3 -0
data/spec/ting_spec.rb +19 -0
data/test/test_comparison.rb +43 -35
data/test/test_hanyu_coverage.rb +42 -37
data/ting.gemspec +23 -0
metadata +95 -71
data/examples/cgiform/cgiform.rb +0 -24
data/examples/cgiform/template.rhtml +0 -69
data/lib/ting/support.rb +0 -19

data/lib/ting/tones/accents.rb CHANGED

@@ -1,69 +1,75 @@
-# coding: utf-8
-module Ting
-  module Tones
-    class Accents < Tone
-      class <<self
-      UNICODE_TONE_GLYPHS={
-        :a=>[97, 257, 225, 462, 224],
-        :e=>[101, 275, 233, 283, 232],
-        :i=>[105, 299, 237, 464, 236],
-        :o=>[111, 333, 243, 466, 242],
-        :u=>[117, 363, 250, 468, 249],
-        :v=>[252, 470, 472, 474, 476]
-      }
-      def tone_glyph(letter,tone)
-        if (u=UNICODE_TONE_GLYPHS[letter.to_sym][tone%MAX_TONE])
-          [u].pack('U')
-        end
-      end
-      def add_tone(syll, tone)
-        syll.gsub!('ü','v')
-        tone %= MAX_TONE
-        case syll
-        when /a/
-          syll.sub(/a/, tone_glyph(:a,tone))
-        when /e/
-          syll.sub(/e/, tone_glyph(:e,tone))
-        when /o/
-          syll.sub(/o/, tone_glyph(:o,tone))
-        when /(i|u|v)/
-          syll.sub($1, tone_glyph($1,tone))
-        else
-          syll
-        end
-      end
-      def peek_tone(syll)
-        unpacked = syll.unpack('U*')
-        each_tone_glyph do |vowel, tones|
-          tone_glyph=unpacked.find {|t| tones.include?(t)}
-          normalize( tones.index(tone_glyph) ) if tone_glyph
-        end
-      end
-      def pop_tone(syll)
-        unpacked = syll.unpack('U*')
-        each_tone_glyph do |vowel, tones|
-          if tone_glyph = unpacked.find {|t| tones.include?(t)}
-            unpacked[unpacked.index(tone_glyph)]=vowel.to_s[0]
-            break [normalize(tones.index(tone_glyph)), unpacked.pack('U*')]
-          end
-        end
-      end
-      private
-        def each_tone_glyph
-          [:a,:e,:i,:o,:u,:v].each do |v|  #Order is significant
-            vowel, tones = v, UNICODE_TONE_GLYPHS[v]
-            yield vowel,tones
-          end
-        end
-      end
-    end
-  end
-end
+# coding: utf-8
+module Ting
+  module Tones
+    class Accents < Tone
+      class << self
+      UNICODE_TONE_GLYPHS={
+        :a=>[97, 257, 225, 462, 224],
+        :e=>[101, 275, 233, 283, 232],
+        :i=>[105, 299, 237, 464, 236],
+        :o=>[111, 333, 243, 466, 242],
+        :u=>[117, 363, 250, 468, 249],
+        :v=>[252, 470, 472, 474, 476]
+      }
+      def tone_glyph(letter,tone)
+        if (u=UNICODE_TONE_GLYPHS[letter.to_sym][tone%MAX_TONE])
+          [u].pack('U')
+        end
+      end
+      def add_tone(syll, tone)
+        syll = syll.sub('ü','v')
+        tone %= MAX_TONE
+        case syll
+        when /a/
+          syll.sub(/a/, tone_glyph(:a,tone))
+        when /e/
+          syll.sub(/e/, tone_glyph(:e,tone))
+        when /o/
+          syll.sub(/o/, tone_glyph(:o,tone))
+        when /(i|u|v)\z/
+          syll.sub($1, tone_glyph($1,tone)).sub('v', 'ü')
+        when /(i|u|v)/
+          syll.sub($1, tone_glyph($1,tone)).sub('v', 'ü')
+        else
+          syll
+        end
+      end
+      def peek_tone(syll)
+        unpacked = syll.unpack('U*')
+        each_tone_glyph do |vowel, tones|
+          tone_glyph=unpacked.find {|t| tones.include?(t)}
+          normalize( tones.index(tone_glyph) ) if tone_glyph
+        end
+      end
+      # returns [ tone number, syllable without tone ]
+      # e.g. ni3 => [ 3, 'ni' ]
+      def pop_tone(syll)
+        unpacked = syll.unpack('U*')
+        each_tone_glyph do |vowel, tones|
+          if tone_glyph = unpacked.find {|t| tones.include?(t)}
+            unpacked[unpacked.index(tone_glyph)] = vowel.to_s.unpack('U').first
+            break [normalize(tones.index(tone_glyph)), unpacked.pack('U*').sub('v', 'ü')]
+          end
+        end
+      end
+      private
+        def each_tone_glyph
+          [:a,:e,:i,:o,:u,:v].each do |v|  #Order is significant
+            vowel, tones = v, UNICODE_TONE_GLYPHS[v]
+            yield vowel,tones
+          end
+        end
+      end
+    end
+  end
+end

data/lib/ting/tones/ipa.rb CHANGED

@@ -12,7 +12,7 @@ module Ting
         end
         def peek_tone(syll)
-          if t = GLYPHS.index(syll.uchars[-1])
+          if t = GLYPHS.index(syll.chars.last)
             return t
           end
           return NEUTRAL_TONE

data/lib/ting/tones/no_tones.rb CHANGED

@@ -1,7 +1,7 @@
-module Ting
-  module Tones
-    class NoTones < Tone
-    end
-  end
-end
+module Ting
+  module Tones
+    class NoTones < Tone
+    end
+  end
+end

data/lib/ting/tones/numbers.rb CHANGED

@@ -1,25 +1,25 @@
-module Ting
-  module Tones
-    class Numbers < Tone
-      class <<self
-      def add_tone(syll, tone)
-        syll + normalize(tone).to_s
-      end
-      def peek_tone(syll)
-        if syll =~ /(\d)\Z/
-          normalize Integer($1)
-        else
-          NEUTRAL_TONE
-        end
-      end
-      def pop_tone(syll)
-        [ peek_tone(syll), syll[/\A\D+/] ]
-      end
-      end
-    end
-  end
-end
+module Ting
+  module Tones
+    class Numbers < Tone
+      class <<self
+      def add_tone(syll, tone)
+        syll + normalize(tone).to_s
+      end
+      def peek_tone(syll)
+        if syll =~ /(\d)\Z/
+          normalize Integer($1)
+        else
+          NEUTRAL_TONE
+        end
+      end
+      def pop_tone(syll)
+        [ peek_tone(syll), syll[/\A\D+/] ]
+      end
+      end
+    end
+  end
+end

data/lib/ting/tones/supernum.rb CHANGED

@@ -12,7 +12,7 @@ module Ting
         end
         def peek_tone(syll)
-          if t = GLYPHS.index(syll.uchars[-1])
+          if t = GLYPHS.index(syll.chars.last)
             return t
           end
           return NEUTRAL_TONE

data/lib/ting/version.rb CHANGED

@@ -1,3 +1,3 @@
 module Ting
-  VERSION = '0.3.0'
+  VERSION = '0.9.0'
 end

data/lib/ting/writer.rb ADDED

@@ -0,0 +1,23 @@
+module Ting
+  class Writer
+    include Procable
+    def initialize(conv, tone)
+      @conv = conv.to_s
+      @tone = Tones.const_get Ting.camelize(tone.to_s)
+    end
+    def generate(syll)
+      Array(syll).map do |s|
+        syllable = Conversions.unparse(@conv, s)
+        str = @tone.add_tone(syllable, s.tone)
+        str.capitalize! if s.capitalized?
+        str
+      end.join(' ')
+    end
+    alias :<< :generate
+    alias :unparse :generate
+    alias :call :generate
+  end
+end

data/spec/jruby_csv_spec.rb ADDED

@@ -0,0 +1,78 @@
+# -*- coding: utf-8 -*-
+require 'csv'
+require 'rspec/autorun'
+# Describes a problem with CSV parsing on JRuby, see output at the bottom.
+#
+# Version:
+#   jruby 1.7.2 (1.9.3p327) 2013-01-04 302c706 on Java HotSpot(TM) Server VM 1.7.0_15-b03 [linux-i386]
+# Has since been fixed, verified with 1.7.11
+describe "a problem with jruby?" do
+  let(:csv_full_contents) {
+'"zhuyin","wadegiles","mps2","yale","tongyong","hanyu","gwoyeu1","gwoyeu2","gwoyeu3","gwoyeu4"
+"ㄚ","a","a","a","a","a","a","ar","aa","ah"
+"ㄞ","ai","ai","ai","ai","ai","ai","air","ae","ay"
+"ㄢ","an","an","an","an","an","an","arn","aan","ann"
+"ㄤ","ang","ang","ang","ang","ang","ang","arng","aang","anq"
+"ㄠ","ao","au","au","ao","ao","au","aur","ao","aw"
+"ㄅㄚ","pa","ba","ba","ba","ba","ba","bar","baa","bah"
+"ㄅㄞ","pai","bai","bai","bai","bai","bai","bair","bae","bay"
+"ㄅㄢ","pan","ban","ban","ban","ban","ban","barn","baan","bann"
+"ㄅㄤ","pang","bang","bang","bang","bang","bang","barng","baang","banq"
+"ㄅㄠ","pao","bau","bau","bao","bao","bau","baur","bao","baw"
+"ㄅㄟ","pei","bei","bei","bei","bei","bei","beir","beei","bey"
+"ㄅㄣ","pen","ben","ben","ben","ben","ben","bern","been","benn"
+"ㄅㄥ","peng","beng","beng","beng","beng","beng","berng","beeng","benq"
+"ㄅㄧ","pi","bi","bi","bi","bi","bi","byi","bii","bih"
+"ㄅㄧㄢ","pien","bian","byan","bian","bian","bian","byan","bean","biann"
+"ㄅㄧㄠ","piao","biau","byau","biao","biao","biau","byau","beau","biaw"
+"ㄅㄧㄝ","pieh","bie","bye","bie","bie","bie","bye","biee","bieh"
+"ㄅㄧㄣ","pin","bin","bin","bin","bin","bin","byn","biin","binn"
+"ㄅㄧㄥ","ping","bing","bing","bing","bing","bing","byng","biing","binq"'
+}
+  def lines(range)
+    csv_full_contents.split("\n")[range].join("\n")
+  end
+  it "this actually does raise an exception, so this spec fails" do
+    expect{ CSV.parse(csv_full_contents) }.to_not raise_exception
+  end
+  it "using the first 15 lines still works ok" do
+    expect{ CSV.parse(lines(0..15))}.to_not raise_exception
+  end
+  it "from line 16 on there's a problem" do
+    expect{ CSV.parse(lines(0..16))}.to_not raise_exception
+  end
+  it "but line 16 itself isn't the culprit" do
+    expect{ CSV.parse(lines(3..18))}.to_not raise_exception
+  end
+end
+#   1) a problem with jruby? this actually does raise an exception, so this spec fails
+#      Failure/Error: expect{ CSV.parse(csv_full_contents) }.to_not raise_exception
+#        expected no Exception, got #<ArgumentError: invalid byte sequence in UTF-8> with backtrace:
+#          # ./spec/jruby_csv_spec.rb:38:in `(root)'
+#          # ./spec/jruby_csv_spec.rb:38:in `(root)'
+#      # ./spec/jruby_csv_spec.rb:38:in `(root)'
+#   2) a problem with jruby? from line 16 on there's a problem
+#      Failure/Error: expect{ CSV.parse(lines(0..16))}.to_not raise_exception
+#        expected no Exception, got #<ArgumentError: invalid byte sequence in UTF-8> with backtrace:
+#          # ./spec/jruby_csv_spec.rb:46:in `(root)'
+#          # ./spec/jruby_csv_spec.rb:46:in `(root)'
+#      # ./spec/jruby_csv_spec.rb:46:in `(root)'
+# Finished in 0.111 seconds
+# 4 examples, 2 failures
+# Failed examples:
+# rspec ./spec/jruby_csv_spec.rb:37 # a problem with jruby? this actually does raise an exception, so this spec fails
+# rspec ./spec/jruby_csv_spec.rb:45 # a problem with jruby? from line 16 on there's a problem

data/spec/spec_helper.rb ADDED

@@ -0,0 +1,3 @@
+$:.unshift( File.expand_path('../lib', __FILE__) )
+require 'ting'

data/spec/ting_spec.rb ADDED

@@ -0,0 +1,19 @@
+# -*- coding: utf-8 -*-
+require 'spec_helper'
+describe Ting do
+  let(:pinyin)   { 'dao4 ke3 dao4 fei1 chang2 dao4'.force_encoding('UTF-8') }
+  let(:bopomofo) { 'ㄉㄠˋ ㄎㄜˇ ㄉㄠˋ ㄈㄟ ㄔㄤˊ ㄉㄠˋ'.force_encoding('UTF-8') }
+  it 'should convert from Hany Pinyin to Bopomofo' do
+    Ting.from(:hanyu, :numbers).to(:zhuyin, :marks).convert(pinyin).should == bopomofo
+  end
+  it "should parse" do
+    Ting::Reader.new(:hanyu, :numbers).parse('Bei3').first.should == Ting::Syllable.new( Ting::Initial::Bo, Ting::Final::Ei, 3, true )
+  end
+  it 'should respect capitalization' do
+    Ting.from(:hanyu, :numbers).to(:hanyu, :accents).convert('Bei3 jing1').should == 'Běi jīng'
+  end
+end

data/test/test_comparison.rb CHANGED

@@ -1,35 +1,43 @@
-require 'ting'
-require 'test/unit'
-require 'csv'
-# This test uses the chart from piyin.info to compare all implemted conversion types
-# Since I can't find another reference of the hanyu pinyin 'lo', I have removed it from the table
-class TestCompare < Test::Unit::TestCase
-  CHART=CSV.parse(IO.read(File.dirname(__FILE__)+'/../lib/ting/data/comparison.csv'))
-  COMPARE=[:hanyu, :wadegiles, :zhuyin, :tongyong]
-  # Test all combinations, included parsing/unparsing the same type
-  def test_do_comparisons
-    COMPARE.each do |from|
-      COMPARE.each do |to|
-        compare(from,to)
-      end
-    end
-  end
-  def compare(from, to)
-    reader = Ting.reader(from, :no_tones)
-    writer = Ting.writer(to, :no_tones)
-    ifrom = CHART[0].index from.to_s
-    ito   = CHART[0].index to.to_s
-    CHART[1..-1].each do |vals|
-      assert_equal(vals[ito].strip, writer << (reader << vals[ifrom].strip), "Converting `#{vals[ifrom]}' from #{from} to #{to} value #{vals[ito]}")
-    end
-  end
-end
+require 'ting'
+require 'test/unit'
+# This test uses the chart from piyin.info to compare all implemented conversion types
+# Since I can't find another reference of the hanyu pinyin 'lo', I have removed it from the table
+class TestCompare < Test::Unit::TestCase
+  CHART_FILE = File.expand_path('../../lib/ting/data/comparison.csv', __FILE__)
+  COMPARE=[:hanyu, :wadegiles, :zhuyin, :tongyong]
+  # Both Rubinius and JRuby are having trouble parsing our otherwise valid UTF-8 CSV file.
+  # See https://github.com/jruby/jruby/issues/563 for the JRuby issue that logs the issue.
+  # So we do our own naive CSV parsing here.
+  CHART = begin
+            File.open(CHART_FILE, 'r:UTF-8').each_line.map do |line|
+              line.strip.split(',').map{|entry| entry[/\A"(.*)"\z/, 1]}
+            end
+          end
+  # Test all combinations, included parsing/unparsing the same type
+  def test_do_comparisons
+    COMPARE.each do |from|
+      COMPARE.each do |to|
+        compare(from,to)
+      end
+    end
+  end
+  def compare(from, to)
+    reader = Ting.reader(from, :no_tones)
+    writer = Ting.writer(to, :no_tones)
+    ifrom = CHART[0].index from.to_s
+    ito   = CHART[0].index to.to_s
+    CHART[1..-1].each do |vals|
+      assert_equal(vals[ito].strip, writer << (reader << vals[ifrom].strip), "Converting `#{vals[ifrom]}' from #{from} to #{to} value #{vals[ito]}")
+    end
+  end
+end

data/test/test_hanyu_coverage.rb CHANGED

@@ -1,37 +1,42 @@
-require 'test/unit'
-require 'ting'
-require 'yaml'
-if RUBY_VERSION =~ /^1.8/
-  $KCODE='u'
-end
-module HanyuCoverage
-  grid=YAML.load(IO.read(File.dirname(__FILE__)+'/../lib/ting/data/valid_pinyin.yaml'))
-  grid.each do |fname, row|
-    row.each do |iname, hanyu|
-      eval %[
-        class Test_#{hanyu} < Test::Unit::TestCase
-          include Ting
-          def initialize(s)
-            super(s)
-            @reader = Ting.reader(:hanyu, :no_tones)
-            @writer = Ting.writer(:hanyu, :no_tones)
-          end
-          def test_parse_#{hanyu}
-            assert_equal('#{hanyu}', @writer.unparse(Syllable.new(Initial::#{iname}, Final::#{fname}, Tones::NEUTRAL_TONE)), 'Wrong hanyu for Initial::#{iname}+Final::#{fname}, expected `#{hanyu}` ')
-          end
-          def test_unparse_#{hanyu}
-            ts=@reader.parse('#{hanyu}').first
-            assert_not_nil(ts, 'Reader<:hanyu, :no_tone>#parse("#{hanyu}") returned nil')
-            assert_equal(Initial::#{iname}, ts.initial, 'Wrong initial for `#{hanyu}`, expected Initial::#{iname}')
-            assert_equal(Final::#{fname}, ts.final, 'Wrong final for `#{hanyu}`, expected Final::#{fname}')
-          end
-        end
-      ]
-    end
-  end
-end
+# -*- coding: utf-8 -*-
+require 'test/unit'
+require 'ting'
+require 'yaml'
+module HanyuCoverage
+  class Test_ParseUnparse < Test::Unit::TestCase
+    include Ting
+    def initialize(s)
+      super(s)
+      @reader = Ting.reader(:hanyu, :no_tones)
+      @writer = Ting.writer(:hanyu, :no_tones)
+    end
+    grid=YAML.load(File.open(File.expand_path('../../lib/ting/data/valid_pinyin.yaml', __FILE__), 'r:UTF-8').read)
+    grid.each do |fname, row|
+      row.each do |iname, hanyu|
+        hanyu=hanyu.force_encoding('UTF-8')
+        safe_hanyu = hanyu.gsub('ü','v').gsub('ê','_e')
+        define_method :"test_unparse_#{safe_hanyu}" do
+          assert_equal(
+            hanyu,
+            @writer.unparse(
+              Syllable.new(Initial.const_get(iname), Final.const_get(fname), Tones::NEUTRAL_TONE)
+            ),
+            "Wrong hanyu for Initial::#{iname}+Final::#{fname}, expected `#{hanyu}` "
+          )
+        end
+        define_method :"test_parse_#{safe_hanyu}" do
+          ts=@reader.parse(hanyu).first
+          assert_not_nil(ts, "Reader<:hanyu, :no_tone>#parse('#{hanyu}') returned nil")
+          assert_equal(Initial.const_get(iname), ts.initial, "Wrong initial for `#{hanyu}`, expected Initial::#{iname}")
+          assert_equal(Final.const_get(fname), ts.final, "Wrong final for `#{hanyu}`, expected Final::#{fname}")
+        end
+      end
+    end
+  end
+end