ting 0.3.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,69 +1,75 @@
1
- # coding: utf-8
2
-
3
- module Ting
4
- module Tones
5
- class Accents < Tone
6
- class <<self
7
-
8
- UNICODE_TONE_GLYPHS={
9
- :a=>[97, 257, 225, 462, 224],
10
- :e=>[101, 275, 233, 283, 232],
11
- :i=>[105, 299, 237, 464, 236],
12
- :o=>[111, 333, 243, 466, 242],
13
- :u=>[117, 363, 250, 468, 249],
14
- :v=>[252, 470, 472, 474, 476]
15
- }
16
-
17
- def tone_glyph(letter,tone)
18
- if (u=UNICODE_TONE_GLYPHS[letter.to_sym][tone%MAX_TONE])
19
- [u].pack('U')
20
- end
21
- end
22
-
23
- def add_tone(syll, tone)
24
- syll.gsub!('ü','v')
25
- tone %= MAX_TONE
26
- case syll
27
- when /a/
28
- syll.sub(/a/, tone_glyph(:a,tone))
29
- when /e/
30
- syll.sub(/e/, tone_glyph(:e,tone))
31
- when /o/
32
- syll.sub(/o/, tone_glyph(:o,tone))
33
- when /(i|u|v)/
34
- syll.sub($1, tone_glyph($1,tone))
35
- else
36
- syll
37
- end
38
- end
39
-
40
- def peek_tone(syll)
41
- unpacked = syll.unpack('U*')
42
- each_tone_glyph do |vowel, tones|
43
- tone_glyph=unpacked.find {|t| tones.include?(t)}
44
- normalize( tones.index(tone_glyph) ) if tone_glyph
45
- end
46
- end
47
-
48
- def pop_tone(syll)
49
- unpacked = syll.unpack('U*')
50
- each_tone_glyph do |vowel, tones|
51
- if tone_glyph = unpacked.find {|t| tones.include?(t)}
52
- unpacked[unpacked.index(tone_glyph)]=vowel.to_s[0]
53
- break [normalize(tones.index(tone_glyph)), unpacked.pack('U*')]
54
- end
55
- end
56
- end
57
-
58
- private
59
- def each_tone_glyph
60
- [:a,:e,:i,:o,:u,:v].each do |v| #Order is significant
61
- vowel, tones = v, UNICODE_TONE_GLYPHS[v]
62
- yield vowel,tones
63
- end
64
- end
65
-
66
- end
67
- end
68
- end
69
- end
1
+ # coding: utf-8
2
+
3
+ module Ting
4
+ module Tones
5
+ class Accents < Tone
6
+ class << self
7
+
8
+ UNICODE_TONE_GLYPHS={
9
+ :a=>[97, 257, 225, 462, 224],
10
+ :e=>[101, 275, 233, 283, 232],
11
+ :i=>[105, 299, 237, 464, 236],
12
+ :o=>[111, 333, 243, 466, 242],
13
+ :u=>[117, 363, 250, 468, 249],
14
+ :v=>[252, 470, 472, 474, 476]
15
+ }
16
+
17
+ def tone_glyph(letter,tone)
18
+ if (u=UNICODE_TONE_GLYPHS[letter.to_sym][tone%MAX_TONE])
19
+ [u].pack('U')
20
+ end
21
+ end
22
+
23
+ def add_tone(syll, tone)
24
+ syll = syll.sub('ü','v')
25
+ tone %= MAX_TONE
26
+ case syll
27
+ when /a/
28
+ syll.sub(/a/, tone_glyph(:a,tone))
29
+ when /e/
30
+ syll.sub(/e/, tone_glyph(:e,tone))
31
+ when /o/
32
+ syll.sub(/o/, tone_glyph(:o,tone))
33
+ when /(i|u|v)\z/
34
+ syll.sub($1, tone_glyph($1,tone)).sub('v', 'ü')
35
+ when /(i|u|v)/
36
+ syll.sub($1, tone_glyph($1,tone)).sub('v', 'ü')
37
+ else
38
+ syll
39
+ end
40
+ end
41
+
42
+ def peek_tone(syll)
43
+ unpacked = syll.unpack('U*')
44
+ each_tone_glyph do |vowel, tones|
45
+ tone_glyph=unpacked.find {|t| tones.include?(t)}
46
+ normalize( tones.index(tone_glyph) ) if tone_glyph
47
+ end
48
+ end
49
+
50
+ # returns [ tone number, syllable without tone ]
51
+ # e.g. ni3 => [ 3, 'ni' ]
52
+ def pop_tone(syll)
53
+ unpacked = syll.unpack('U*')
54
+ each_tone_glyph do |vowel, tones|
55
+
56
+ if tone_glyph = unpacked.find {|t| tones.include?(t)}
57
+ unpacked[unpacked.index(tone_glyph)] = vowel.to_s.unpack('U').first
58
+ break [normalize(tones.index(tone_glyph)), unpacked.pack('U*').sub('v', 'ü')]
59
+ end
60
+
61
+ end
62
+ end
63
+
64
+ private
65
+ def each_tone_glyph
66
+ [:a,:e,:i,:o,:u,:v].each do |v| #Order is significant
67
+ vowel, tones = v, UNICODE_TONE_GLYPHS[v]
68
+ yield vowel,tones
69
+ end
70
+ end
71
+
72
+ end
73
+ end
74
+ end
75
+ end
@@ -12,7 +12,7 @@ module Ting
12
12
  end
13
13
 
14
14
  def peek_tone(syll)
15
- if t = GLYPHS.index(syll.uchars[-1])
15
+ if t = GLYPHS.index(syll.chars.last)
16
16
  return t
17
17
  end
18
18
  return NEUTRAL_TONE
@@ -1,7 +1,7 @@
1
- module Ting
2
- module Tones
3
- class NoTones < Tone
4
- end
5
- end
6
- end
7
-
1
+ module Ting
2
+ module Tones
3
+ class NoTones < Tone
4
+ end
5
+ end
6
+ end
7
+
@@ -1,25 +1,25 @@
1
- module Ting
2
- module Tones
3
- class Numbers < Tone
4
- class <<self
5
-
6
- def add_tone(syll, tone)
7
- syll + normalize(tone).to_s
8
- end
9
-
10
- def peek_tone(syll)
11
- if syll =~ /(\d)\Z/
12
- normalize Integer($1)
13
- else
14
- NEUTRAL_TONE
15
- end
16
- end
17
-
18
- def pop_tone(syll)
19
- [ peek_tone(syll), syll[/\A\D+/] ]
20
- end
21
-
22
- end
23
- end
24
- end
25
- end
1
+ module Ting
2
+ module Tones
3
+ class Numbers < Tone
4
+ class <<self
5
+
6
+ def add_tone(syll, tone)
7
+ syll + normalize(tone).to_s
8
+ end
9
+
10
+ def peek_tone(syll)
11
+ if syll =~ /(\d)\Z/
12
+ normalize Integer($1)
13
+ else
14
+ NEUTRAL_TONE
15
+ end
16
+ end
17
+
18
+ def pop_tone(syll)
19
+ [ peek_tone(syll), syll[/\A\D+/] ]
20
+ end
21
+
22
+ end
23
+ end
24
+ end
25
+ end
@@ -12,7 +12,7 @@ module Ting
12
12
  end
13
13
 
14
14
  def peek_tone(syll)
15
- if t = GLYPHS.index(syll.uchars[-1])
15
+ if t = GLYPHS.index(syll.chars.last)
16
16
  return t
17
17
  end
18
18
  return NEUTRAL_TONE
@@ -1,3 +1,3 @@
1
1
  module Ting
2
- VERSION = '0.3.0'
2
+ VERSION = '0.9.0'
3
3
  end
@@ -0,0 +1,23 @@
1
+ module Ting
2
+ class Writer
3
+ include Procable
4
+
5
+ def initialize(conv, tone)
6
+ @conv = conv.to_s
7
+ @tone = Tones.const_get Ting.camelize(tone.to_s)
8
+ end
9
+
10
+ def generate(syll)
11
+ Array(syll).map do |s|
12
+ syllable = Conversions.unparse(@conv, s)
13
+ str = @tone.add_tone(syllable, s.tone)
14
+ str.capitalize! if s.capitalized?
15
+ str
16
+ end.join(' ')
17
+ end
18
+
19
+ alias :<< :generate
20
+ alias :unparse :generate
21
+ alias :call :generate
22
+ end
23
+ end
@@ -0,0 +1,78 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require 'csv'
4
+ require 'rspec/autorun'
5
+
6
+ # Describes a problem with CSV parsing on JRuby, see output at the bottom.
7
+ #
8
+ # Version:
9
+ # jruby 1.7.2 (1.9.3p327) 2013-01-04 302c706 on Java HotSpot(TM) Server VM 1.7.0_15-b03 [linux-i386]
10
+ # Has since been fixed, verified with 1.7.11
11
+
12
+ describe "a problem with jruby?" do
13
+ let(:csv_full_contents) {
14
+ '"zhuyin","wadegiles","mps2","yale","tongyong","hanyu","gwoyeu1","gwoyeu2","gwoyeu3","gwoyeu4"
15
+ "ㄚ","a","a","a","a","a","a","ar","aa","ah"
16
+ "ㄞ","ai","ai","ai","ai","ai","ai","air","ae","ay"
17
+ "ㄢ","an","an","an","an","an","an","arn","aan","ann"
18
+ "ㄤ","ang","ang","ang","ang","ang","ang","arng","aang","anq"
19
+ "ㄠ","ao","au","au","ao","ao","au","aur","ao","aw"
20
+ "ㄅㄚ","pa","ba","ba","ba","ba","ba","bar","baa","bah"
21
+ "ㄅㄞ","pai","bai","bai","bai","bai","bai","bair","bae","bay"
22
+ "ㄅㄢ","pan","ban","ban","ban","ban","ban","barn","baan","bann"
23
+ "ㄅㄤ","pang","bang","bang","bang","bang","bang","barng","baang","banq"
24
+ "ㄅㄠ","pao","bau","bau","bao","bao","bau","baur","bao","baw"
25
+ "ㄅㄟ","pei","bei","bei","bei","bei","bei","beir","beei","bey"
26
+ "ㄅㄣ","pen","ben","ben","ben","ben","ben","bern","been","benn"
27
+ "ㄅㄥ","peng","beng","beng","beng","beng","beng","berng","beeng","benq"
28
+ "ㄅㄧ","pi","bi","bi","bi","bi","bi","byi","bii","bih"
29
+ "ㄅㄧㄢ","pien","bian","byan","bian","bian","bian","byan","bean","biann"
30
+ "ㄅㄧㄠ","piao","biau","byau","biao","biao","biau","byau","beau","biaw"
31
+ "ㄅㄧㄝ","pieh","bie","bye","bie","bie","bie","bye","biee","bieh"
32
+ "ㄅㄧㄣ","pin","bin","bin","bin","bin","bin","byn","biin","binn"
33
+ "ㄅㄧㄥ","ping","bing","bing","bing","bing","bing","byng","biing","binq"'
34
+ }
35
+
36
+ def lines(range)
37
+ csv_full_contents.split("\n")[range].join("\n")
38
+ end
39
+
40
+ it "this actually does raise an exception, so this spec fails" do
41
+ expect{ CSV.parse(csv_full_contents) }.to_not raise_exception
42
+ end
43
+
44
+ it "using the first 15 lines still works ok" do
45
+ expect{ CSV.parse(lines(0..15))}.to_not raise_exception
46
+ end
47
+
48
+ it "from line 16 on there's a problem" do
49
+ expect{ CSV.parse(lines(0..16))}.to_not raise_exception
50
+ end
51
+
52
+ it "but line 16 itself isn't the culprit" do
53
+ expect{ CSV.parse(lines(3..18))}.to_not raise_exception
54
+ end
55
+ end
56
+
57
+
58
+ # 1) a problem with jruby? this actually does raise an exception, so this spec fails
59
+ # Failure/Error: expect{ CSV.parse(csv_full_contents) }.to_not raise_exception
60
+ # expected no Exception, got #<ArgumentError: invalid byte sequence in UTF-8> with backtrace:
61
+ # # ./spec/jruby_csv_spec.rb:38:in `(root)'
62
+ # # ./spec/jruby_csv_spec.rb:38:in `(root)'
63
+ # # ./spec/jruby_csv_spec.rb:38:in `(root)'
64
+
65
+ # 2) a problem with jruby? from line 16 on there's a problem
66
+ # Failure/Error: expect{ CSV.parse(lines(0..16))}.to_not raise_exception
67
+ # expected no Exception, got #<ArgumentError: invalid byte sequence in UTF-8> with backtrace:
68
+ # # ./spec/jruby_csv_spec.rb:46:in `(root)'
69
+ # # ./spec/jruby_csv_spec.rb:46:in `(root)'
70
+ # # ./spec/jruby_csv_spec.rb:46:in `(root)'
71
+
72
+ # Finished in 0.111 seconds
73
+ # 4 examples, 2 failures
74
+
75
+ # Failed examples:
76
+
77
+ # rspec ./spec/jruby_csv_spec.rb:37 # a problem with jruby? this actually does raise an exception, so this spec fails
78
+ # rspec ./spec/jruby_csv_spec.rb:45 # a problem with jruby? from line 16 on there's a problem
@@ -0,0 +1,3 @@
1
+ $:.unshift( File.expand_path('../lib', __FILE__) )
2
+
3
+ require 'ting'
@@ -0,0 +1,19 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'spec_helper'
3
+
4
+ describe Ting do
5
+ let(:pinyin) { 'dao4 ke3 dao4 fei1 chang2 dao4'.force_encoding('UTF-8') }
6
+ let(:bopomofo) { 'ㄉㄠˋ ㄎㄜˇ ㄉㄠˋ ㄈㄟ ㄔㄤˊ ㄉㄠˋ'.force_encoding('UTF-8') }
7
+
8
+ it 'should convert from Hany Pinyin to Bopomofo' do
9
+ Ting.from(:hanyu, :numbers).to(:zhuyin, :marks).convert(pinyin).should == bopomofo
10
+ end
11
+
12
+ it "should parse" do
13
+ Ting::Reader.new(:hanyu, :numbers).parse('Bei3').first.should == Ting::Syllable.new( Ting::Initial::Bo, Ting::Final::Ei, 3, true )
14
+ end
15
+
16
+ it 'should respect capitalization' do
17
+ Ting.from(:hanyu, :numbers).to(:hanyu, :accents).convert('Bei3 jing1').should == 'Běi jīng'
18
+ end
19
+ end
@@ -1,35 +1,43 @@
1
- require 'ting'
2
- require 'test/unit'
3
- require 'csv'
4
-
5
-
6
- # This test uses the chart from piyin.info to compare all implemted conversion types
7
- # Since I can't find another reference of the hanyu pinyin 'lo', I have removed it from the table
8
-
9
- class TestCompare < Test::Unit::TestCase
10
- CHART=CSV.parse(IO.read(File.dirname(__FILE__)+'/../lib/ting/data/comparison.csv'))
11
- COMPARE=[:hanyu, :wadegiles, :zhuyin, :tongyong]
12
-
13
-
14
- # Test all combinations, included parsing/unparsing the same type
15
-
16
- def test_do_comparisons
17
- COMPARE.each do |from|
18
- COMPARE.each do |to|
19
- compare(from,to)
20
- end
21
- end
22
- end
23
-
24
- def compare(from, to)
25
- reader = Ting.reader(from, :no_tones)
26
- writer = Ting.writer(to, :no_tones)
27
-
28
- ifrom = CHART[0].index from.to_s
29
- ito = CHART[0].index to.to_s
30
-
31
- CHART[1..-1].each do |vals|
32
- assert_equal(vals[ito].strip, writer << (reader << vals[ifrom].strip), "Converting `#{vals[ifrom]}' from #{from} to #{to} value #{vals[ito]}")
33
- end
34
- end
35
- end
1
+ require 'ting'
2
+ require 'test/unit'
3
+
4
+ # This test uses the chart from piyin.info to compare all implemented conversion types
5
+ # Since I can't find another reference of the hanyu pinyin 'lo', I have removed it from the table
6
+
7
+ class TestCompare < Test::Unit::TestCase
8
+ CHART_FILE = File.expand_path('../../lib/ting/data/comparison.csv', __FILE__)
9
+ COMPARE=[:hanyu, :wadegiles, :zhuyin, :tongyong]
10
+
11
+ # Both Rubinius and JRuby are having trouble parsing our otherwise valid UTF-8 CSV file.
12
+ # See https://github.com/jruby/jruby/issues/563 for the JRuby issue that logs the issue.
13
+ # So we do our own naive CSV parsing here.
14
+ CHART = begin
15
+ File.open(CHART_FILE, 'r:UTF-8').each_line.map do |line|
16
+ line.strip.split(',').map{|entry| entry[/\A"(.*)"\z/, 1]}
17
+ end
18
+ end
19
+
20
+
21
+
22
+ # Test all combinations, included parsing/unparsing the same type
23
+
24
+ def test_do_comparisons
25
+ COMPARE.each do |from|
26
+ COMPARE.each do |to|
27
+ compare(from,to)
28
+ end
29
+ end
30
+ end
31
+
32
+ def compare(from, to)
33
+ reader = Ting.reader(from, :no_tones)
34
+ writer = Ting.writer(to, :no_tones)
35
+
36
+ ifrom = CHART[0].index from.to_s
37
+ ito = CHART[0].index to.to_s
38
+
39
+ CHART[1..-1].each do |vals|
40
+ assert_equal(vals[ito].strip, writer << (reader << vals[ifrom].strip), "Converting `#{vals[ifrom]}' from #{from} to #{to} value #{vals[ito]}")
41
+ end
42
+ end
43
+ end
@@ -1,37 +1,42 @@
1
- require 'test/unit'
2
- require 'ting'
3
- require 'yaml'
4
-
5
- if RUBY_VERSION =~ /^1.8/
6
- $KCODE='u'
7
- end
8
-
9
- module HanyuCoverage
10
- grid=YAML.load(IO.read(File.dirname(__FILE__)+'/../lib/ting/data/valid_pinyin.yaml'))
11
- grid.each do |fname, row|
12
- row.each do |iname, hanyu|
13
- eval %[
14
- class Test_#{hanyu} < Test::Unit::TestCase
15
- include Ting
16
- def initialize(s)
17
- super(s)
18
- @reader = Ting.reader(:hanyu, :no_tones)
19
- @writer = Ting.writer(:hanyu, :no_tones)
20
- end
21
-
22
- def test_parse_#{hanyu}
23
- assert_equal('#{hanyu}', @writer.unparse(Syllable.new(Initial::#{iname}, Final::#{fname}, Tones::NEUTRAL_TONE)), 'Wrong hanyu for Initial::#{iname}+Final::#{fname}, expected `#{hanyu}` ')
24
- end
25
-
26
- def test_unparse_#{hanyu}
27
- ts=@reader.parse('#{hanyu}').first
28
- assert_not_nil(ts, 'Reader<:hanyu, :no_tone>#parse("#{hanyu}") returned nil')
29
- assert_equal(Initial::#{iname}, ts.initial, 'Wrong initial for `#{hanyu}`, expected Initial::#{iname}')
30
- assert_equal(Final::#{fname}, ts.final, 'Wrong final for `#{hanyu}`, expected Final::#{fname}')
31
- end
32
- end
33
- ]
34
- end
35
- end
36
-
37
- end
1
+ # -*- coding: utf-8 -*-
2
+ require 'test/unit'
3
+ require 'ting'
4
+ require 'yaml'
5
+
6
+ module HanyuCoverage
7
+
8
+ class Test_ParseUnparse < Test::Unit::TestCase
9
+ include Ting
10
+ def initialize(s)
11
+ super(s)
12
+ @reader = Ting.reader(:hanyu, :no_tones)
13
+ @writer = Ting.writer(:hanyu, :no_tones)
14
+ end
15
+
16
+ grid=YAML.load(File.open(File.expand_path('../../lib/ting/data/valid_pinyin.yaml', __FILE__), 'r:UTF-8').read)
17
+ grid.each do |fname, row|
18
+ row.each do |iname, hanyu|
19
+ hanyu=hanyu.force_encoding('UTF-8')
20
+ safe_hanyu = hanyu.gsub('ü','v').gsub('ê','_e')
21
+
22
+ define_method :"test_unparse_#{safe_hanyu}" do
23
+ assert_equal(
24
+ hanyu,
25
+ @writer.unparse(
26
+ Syllable.new(Initial.const_get(iname), Final.const_get(fname), Tones::NEUTRAL_TONE)
27
+ ),
28
+ "Wrong hanyu for Initial::#{iname}+Final::#{fname}, expected `#{hanyu}` "
29
+ )
30
+ end
31
+
32
+ define_method :"test_parse_#{safe_hanyu}" do
33
+ ts=@reader.parse(hanyu).first
34
+ assert_not_nil(ts, "Reader<:hanyu, :no_tone>#parse('#{hanyu}') returned nil")
35
+ assert_equal(Initial.const_get(iname), ts.initial, "Wrong initial for `#{hanyu}`, expected Initial::#{iname}")
36
+ assert_equal(Final.const_get(fname), ts.final, "Wrong final for `#{hanyu}`, expected Final::#{fname}")
37
+ end
38
+
39
+ end
40
+ end
41
+ end
42
+ end