ting 0.3.0 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,69 +1,75 @@
1
- # coding: utf-8
2
-
3
- module Ting
4
- module Tones
5
- class Accents < Tone
6
- class <<self
7
-
8
- UNICODE_TONE_GLYPHS={
9
- :a=>[97, 257, 225, 462, 224],
10
- :e=>[101, 275, 233, 283, 232],
11
- :i=>[105, 299, 237, 464, 236],
12
- :o=>[111, 333, 243, 466, 242],
13
- :u=>[117, 363, 250, 468, 249],
14
- :v=>[252, 470, 472, 474, 476]
15
- }
16
-
17
- def tone_glyph(letter,tone)
18
- if (u=UNICODE_TONE_GLYPHS[letter.to_sym][tone%MAX_TONE])
19
- [u].pack('U')
20
- end
21
- end
22
-
23
- def add_tone(syll, tone)
24
- syll.gsub!('ü','v')
25
- tone %= MAX_TONE
26
- case syll
27
- when /a/
28
- syll.sub(/a/, tone_glyph(:a,tone))
29
- when /e/
30
- syll.sub(/e/, tone_glyph(:e,tone))
31
- when /o/
32
- syll.sub(/o/, tone_glyph(:o,tone))
33
- when /(i|u|v)/
34
- syll.sub($1, tone_glyph($1,tone))
35
- else
36
- syll
37
- end
38
- end
39
-
40
- def peek_tone(syll)
41
- unpacked = syll.unpack('U*')
42
- each_tone_glyph do |vowel, tones|
43
- tone_glyph=unpacked.find {|t| tones.include?(t)}
44
- normalize( tones.index(tone_glyph) ) if tone_glyph
45
- end
46
- end
47
-
48
- def pop_tone(syll)
49
- unpacked = syll.unpack('U*')
50
- each_tone_glyph do |vowel, tones|
51
- if tone_glyph = unpacked.find {|t| tones.include?(t)}
52
- unpacked[unpacked.index(tone_glyph)]=vowel.to_s[0]
53
- break [normalize(tones.index(tone_glyph)), unpacked.pack('U*')]
54
- end
55
- end
56
- end
57
-
58
- private
59
- def each_tone_glyph
60
- [:a,:e,:i,:o,:u,:v].each do |v| #Order is significant
61
- vowel, tones = v, UNICODE_TONE_GLYPHS[v]
62
- yield vowel,tones
63
- end
64
- end
65
-
66
- end
67
- end
68
- end
69
- end
1
+ # coding: utf-8
2
+
3
+ module Ting
4
+ module Tones
5
+ class Accents < Tone
6
+ class << self
7
+
8
+ UNICODE_TONE_GLYPHS={
9
+ :a=>[97, 257, 225, 462, 224],
10
+ :e=>[101, 275, 233, 283, 232],
11
+ :i=>[105, 299, 237, 464, 236],
12
+ :o=>[111, 333, 243, 466, 242],
13
+ :u=>[117, 363, 250, 468, 249],
14
+ :v=>[252, 470, 472, 474, 476]
15
+ }
16
+
17
+ def tone_glyph(letter,tone)
18
+ if (u=UNICODE_TONE_GLYPHS[letter.to_sym][tone%MAX_TONE])
19
+ [u].pack('U')
20
+ end
21
+ end
22
+
23
+ def add_tone(syll, tone)
24
+ syll = syll.sub('ü','v')
25
+ tone %= MAX_TONE
26
+ case syll
27
+ when /a/
28
+ syll.sub(/a/, tone_glyph(:a,tone))
29
+ when /e/
30
+ syll.sub(/e/, tone_glyph(:e,tone))
31
+ when /o/
32
+ syll.sub(/o/, tone_glyph(:o,tone))
33
+ when /(i|u|v)\z/
34
+ syll.sub($1, tone_glyph($1,tone)).sub('v', 'ü')
35
+ when /(i|u|v)/
36
+ syll.sub($1, tone_glyph($1,tone)).sub('v', 'ü')
37
+ else
38
+ syll
39
+ end
40
+ end
41
+
42
+ def peek_tone(syll)
43
+ unpacked = syll.unpack('U*')
44
+ each_tone_glyph do |vowel, tones|
45
+ tone_glyph=unpacked.find {|t| tones.include?(t)}
46
+ normalize( tones.index(tone_glyph) ) if tone_glyph
47
+ end
48
+ end
49
+
50
+ # returns [ tone number, syllable without tone ]
51
+ # e.g. ni3 => [ 3, 'ni' ]
52
+ def pop_tone(syll)
53
+ unpacked = syll.unpack('U*')
54
+ each_tone_glyph do |vowel, tones|
55
+
56
+ if tone_glyph = unpacked.find {|t| tones.include?(t)}
57
+ unpacked[unpacked.index(tone_glyph)] = vowel.to_s.unpack('U').first
58
+ break [normalize(tones.index(tone_glyph)), unpacked.pack('U*').sub('v', 'ü')]
59
+ end
60
+
61
+ end
62
+ end
63
+
64
+ private
65
+ def each_tone_glyph
66
+ [:a,:e,:i,:o,:u,:v].each do |v| #Order is significant
67
+ vowel, tones = v, UNICODE_TONE_GLYPHS[v]
68
+ yield vowel,tones
69
+ end
70
+ end
71
+
72
+ end
73
+ end
74
+ end
75
+ end
@@ -12,7 +12,7 @@ module Ting
12
12
  end
13
13
 
14
14
  def peek_tone(syll)
15
- if t = GLYPHS.index(syll.uchars[-1])
15
+ if t = GLYPHS.index(syll.chars.last)
16
16
  return t
17
17
  end
18
18
  return NEUTRAL_TONE
@@ -1,7 +1,7 @@
1
- module Ting
2
- module Tones
3
- class NoTones < Tone
4
- end
5
- end
6
- end
7
-
1
+ module Ting
2
+ module Tones
3
+ class NoTones < Tone
4
+ end
5
+ end
6
+ end
7
+
@@ -1,25 +1,25 @@
1
- module Ting
2
- module Tones
3
- class Numbers < Tone
4
- class <<self
5
-
6
- def add_tone(syll, tone)
7
- syll + normalize(tone).to_s
8
- end
9
-
10
- def peek_tone(syll)
11
- if syll =~ /(\d)\Z/
12
- normalize Integer($1)
13
- else
14
- NEUTRAL_TONE
15
- end
16
- end
17
-
18
- def pop_tone(syll)
19
- [ peek_tone(syll), syll[/\A\D+/] ]
20
- end
21
-
22
- end
23
- end
24
- end
25
- end
1
+ module Ting
2
+ module Tones
3
+ class Numbers < Tone
4
+ class <<self
5
+
6
+ def add_tone(syll, tone)
7
+ syll + normalize(tone).to_s
8
+ end
9
+
10
+ def peek_tone(syll)
11
+ if syll =~ /(\d)\Z/
12
+ normalize Integer($1)
13
+ else
14
+ NEUTRAL_TONE
15
+ end
16
+ end
17
+
18
+ def pop_tone(syll)
19
+ [ peek_tone(syll), syll[/\A\D+/] ]
20
+ end
21
+
22
+ end
23
+ end
24
+ end
25
+ end
@@ -12,7 +12,7 @@ module Ting
12
12
  end
13
13
 
14
14
  def peek_tone(syll)
15
- if t = GLYPHS.index(syll.uchars[-1])
15
+ if t = GLYPHS.index(syll.chars.last)
16
16
  return t
17
17
  end
18
18
  return NEUTRAL_TONE
@@ -1,3 +1,3 @@
1
1
  module Ting
2
- VERSION = '0.3.0'
2
+ VERSION = '0.9.0'
3
3
  end
@@ -0,0 +1,23 @@
1
+ module Ting
2
+ class Writer
3
+ include Procable
4
+
5
+ def initialize(conv, tone)
6
+ @conv = conv.to_s
7
+ @tone = Tones.const_get Ting.camelize(tone.to_s)
8
+ end
9
+
10
+ def generate(syll)
11
+ Array(syll).map do |s|
12
+ syllable = Conversions.unparse(@conv, s)
13
+ str = @tone.add_tone(syllable, s.tone)
14
+ str.capitalize! if s.capitalized?
15
+ str
16
+ end.join(' ')
17
+ end
18
+
19
+ alias :<< :generate
20
+ alias :unparse :generate
21
+ alias :call :generate
22
+ end
23
+ end
@@ -0,0 +1,78 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require 'csv'
4
+ require 'rspec/autorun'
5
+
6
+ # Describes a problem with CSV parsing on JRuby, see output at the bottom.
7
+ #
8
+ # Version:
9
+ # jruby 1.7.2 (1.9.3p327) 2013-01-04 302c706 on Java HotSpot(TM) Server VM 1.7.0_15-b03 [linux-i386]
10
+ # Has since been fixed, verified with 1.7.11
11
+
12
+ describe "a problem with jruby?" do
13
+ let(:csv_full_contents) {
14
+ '"zhuyin","wadegiles","mps2","yale","tongyong","hanyu","gwoyeu1","gwoyeu2","gwoyeu3","gwoyeu4"
15
+ "ㄚ","a","a","a","a","a","a","ar","aa","ah"
16
+ "ㄞ","ai","ai","ai","ai","ai","ai","air","ae","ay"
17
+ "ㄢ","an","an","an","an","an","an","arn","aan","ann"
18
+ "ㄤ","ang","ang","ang","ang","ang","ang","arng","aang","anq"
19
+ "ㄠ","ao","au","au","ao","ao","au","aur","ao","aw"
20
+ "ㄅㄚ","pa","ba","ba","ba","ba","ba","bar","baa","bah"
21
+ "ㄅㄞ","pai","bai","bai","bai","bai","bai","bair","bae","bay"
22
+ "ㄅㄢ","pan","ban","ban","ban","ban","ban","barn","baan","bann"
23
+ "ㄅㄤ","pang","bang","bang","bang","bang","bang","barng","baang","banq"
24
+ "ㄅㄠ","pao","bau","bau","bao","bao","bau","baur","bao","baw"
25
+ "ㄅㄟ","pei","bei","bei","bei","bei","bei","beir","beei","bey"
26
+ "ㄅㄣ","pen","ben","ben","ben","ben","ben","bern","been","benn"
27
+ "ㄅㄥ","peng","beng","beng","beng","beng","beng","berng","beeng","benq"
28
+ "ㄅㄧ","pi","bi","bi","bi","bi","bi","byi","bii","bih"
29
+ "ㄅㄧㄢ","pien","bian","byan","bian","bian","bian","byan","bean","biann"
30
+ "ㄅㄧㄠ","piao","biau","byau","biao","biao","biau","byau","beau","biaw"
31
+ "ㄅㄧㄝ","pieh","bie","bye","bie","bie","bie","bye","biee","bieh"
32
+ "ㄅㄧㄣ","pin","bin","bin","bin","bin","bin","byn","biin","binn"
33
+ "ㄅㄧㄥ","ping","bing","bing","bing","bing","bing","byng","biing","binq"'
34
+ }
35
+
36
+ def lines(range)
37
+ csv_full_contents.split("\n")[range].join("\n")
38
+ end
39
+
40
+ it "this actually does raise an exception, so this spec fails" do
41
+ expect{ CSV.parse(csv_full_contents) }.to_not raise_exception
42
+ end
43
+
44
+ it "using the first 15 lines still works ok" do
45
+ expect{ CSV.parse(lines(0..15))}.to_not raise_exception
46
+ end
47
+
48
+ it "from line 16 on there's a problem" do
49
+ expect{ CSV.parse(lines(0..16))}.to_not raise_exception
50
+ end
51
+
52
+ it "but line 16 itself isn't the culprit" do
53
+ expect{ CSV.parse(lines(3..18))}.to_not raise_exception
54
+ end
55
+ end
56
+
57
+
58
+ # 1) a problem with jruby? this actually does raise an exception, so this spec fails
59
+ # Failure/Error: expect{ CSV.parse(csv_full_contents) }.to_not raise_exception
60
+ # expected no Exception, got #<ArgumentError: invalid byte sequence in UTF-8> with backtrace:
61
+ # # ./spec/jruby_csv_spec.rb:38:in `(root)'
62
+ # # ./spec/jruby_csv_spec.rb:38:in `(root)'
63
+ # # ./spec/jruby_csv_spec.rb:38:in `(root)'
64
+
65
+ # 2) a problem with jruby? from line 16 on there's a problem
66
+ # Failure/Error: expect{ CSV.parse(lines(0..16))}.to_not raise_exception
67
+ # expected no Exception, got #<ArgumentError: invalid byte sequence in UTF-8> with backtrace:
68
+ # # ./spec/jruby_csv_spec.rb:46:in `(root)'
69
+ # # ./spec/jruby_csv_spec.rb:46:in `(root)'
70
+ # # ./spec/jruby_csv_spec.rb:46:in `(root)'
71
+
72
+ # Finished in 0.111 seconds
73
+ # 4 examples, 2 failures
74
+
75
+ # Failed examples:
76
+
77
+ # rspec ./spec/jruby_csv_spec.rb:37 # a problem with jruby? this actually does raise an exception, so this spec fails
78
+ # rspec ./spec/jruby_csv_spec.rb:45 # a problem with jruby? from line 16 on there's a problem
@@ -0,0 +1,3 @@
1
+ $:.unshift( File.expand_path('../lib', __FILE__) )
2
+
3
+ require 'ting'
@@ -0,0 +1,19 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'spec_helper'
3
+
4
+ describe Ting do
5
+ let(:pinyin) { 'dao4 ke3 dao4 fei1 chang2 dao4'.force_encoding('UTF-8') }
6
+ let(:bopomofo) { 'ㄉㄠˋ ㄎㄜˇ ㄉㄠˋ ㄈㄟ ㄔㄤˊ ㄉㄠˋ'.force_encoding('UTF-8') }
7
+
8
+ it 'should convert from Hany Pinyin to Bopomofo' do
9
+ Ting.from(:hanyu, :numbers).to(:zhuyin, :marks).convert(pinyin).should == bopomofo
10
+ end
11
+
12
+ it "should parse" do
13
+ Ting::Reader.new(:hanyu, :numbers).parse('Bei3').first.should == Ting::Syllable.new( Ting::Initial::Bo, Ting::Final::Ei, 3, true )
14
+ end
15
+
16
+ it 'should respect capitalization' do
17
+ Ting.from(:hanyu, :numbers).to(:hanyu, :accents).convert('Bei3 jing1').should == 'Běi jīng'
18
+ end
19
+ end
@@ -1,35 +1,43 @@
1
- require 'ting'
2
- require 'test/unit'
3
- require 'csv'
4
-
5
-
6
- # This test uses the chart from piyin.info to compare all implemted conversion types
7
- # Since I can't find another reference of the hanyu pinyin 'lo', I have removed it from the table
8
-
9
- class TestCompare < Test::Unit::TestCase
10
- CHART=CSV.parse(IO.read(File.dirname(__FILE__)+'/../lib/ting/data/comparison.csv'))
11
- COMPARE=[:hanyu, :wadegiles, :zhuyin, :tongyong]
12
-
13
-
14
- # Test all combinations, included parsing/unparsing the same type
15
-
16
- def test_do_comparisons
17
- COMPARE.each do |from|
18
- COMPARE.each do |to|
19
- compare(from,to)
20
- end
21
- end
22
- end
23
-
24
- def compare(from, to)
25
- reader = Ting.reader(from, :no_tones)
26
- writer = Ting.writer(to, :no_tones)
27
-
28
- ifrom = CHART[0].index from.to_s
29
- ito = CHART[0].index to.to_s
30
-
31
- CHART[1..-1].each do |vals|
32
- assert_equal(vals[ito].strip, writer << (reader << vals[ifrom].strip), "Converting `#{vals[ifrom]}' from #{from} to #{to} value #{vals[ito]}")
33
- end
34
- end
35
- end
1
+ require 'ting'
2
+ require 'test/unit'
3
+
4
+ # This test uses the chart from piyin.info to compare all implemented conversion types
5
+ # Since I can't find another reference of the hanyu pinyin 'lo', I have removed it from the table
6
+
7
+ class TestCompare < Test::Unit::TestCase
8
+ CHART_FILE = File.expand_path('../../lib/ting/data/comparison.csv', __FILE__)
9
+ COMPARE=[:hanyu, :wadegiles, :zhuyin, :tongyong]
10
+
11
+ # Both Rubinius and JRuby are having trouble parsing our otherwise valid UTF-8 CSV file.
12
+ # See https://github.com/jruby/jruby/issues/563 for the JRuby issue that logs the issue.
13
+ # So we do our own naive CSV parsing here.
14
+ CHART = begin
15
+ File.open(CHART_FILE, 'r:UTF-8').each_line.map do |line|
16
+ line.strip.split(',').map{|entry| entry[/\A"(.*)"\z/, 1]}
17
+ end
18
+ end
19
+
20
+
21
+
22
+ # Test all combinations, included parsing/unparsing the same type
23
+
24
+ def test_do_comparisons
25
+ COMPARE.each do |from|
26
+ COMPARE.each do |to|
27
+ compare(from,to)
28
+ end
29
+ end
30
+ end
31
+
32
+ def compare(from, to)
33
+ reader = Ting.reader(from, :no_tones)
34
+ writer = Ting.writer(to, :no_tones)
35
+
36
+ ifrom = CHART[0].index from.to_s
37
+ ito = CHART[0].index to.to_s
38
+
39
+ CHART[1..-1].each do |vals|
40
+ assert_equal(vals[ito].strip, writer << (reader << vals[ifrom].strip), "Converting `#{vals[ifrom]}' from #{from} to #{to} value #{vals[ito]}")
41
+ end
42
+ end
43
+ end
@@ -1,37 +1,42 @@
1
- require 'test/unit'
2
- require 'ting'
3
- require 'yaml'
4
-
5
- if RUBY_VERSION =~ /^1.8/
6
- $KCODE='u'
7
- end
8
-
9
- module HanyuCoverage
10
- grid=YAML.load(IO.read(File.dirname(__FILE__)+'/../lib/ting/data/valid_pinyin.yaml'))
11
- grid.each do |fname, row|
12
- row.each do |iname, hanyu|
13
- eval %[
14
- class Test_#{hanyu} < Test::Unit::TestCase
15
- include Ting
16
- def initialize(s)
17
- super(s)
18
- @reader = Ting.reader(:hanyu, :no_tones)
19
- @writer = Ting.writer(:hanyu, :no_tones)
20
- end
21
-
22
- def test_parse_#{hanyu}
23
- assert_equal('#{hanyu}', @writer.unparse(Syllable.new(Initial::#{iname}, Final::#{fname}, Tones::NEUTRAL_TONE)), 'Wrong hanyu for Initial::#{iname}+Final::#{fname}, expected `#{hanyu}` ')
24
- end
25
-
26
- def test_unparse_#{hanyu}
27
- ts=@reader.parse('#{hanyu}').first
28
- assert_not_nil(ts, 'Reader<:hanyu, :no_tone>#parse("#{hanyu}") returned nil')
29
- assert_equal(Initial::#{iname}, ts.initial, 'Wrong initial for `#{hanyu}`, expected Initial::#{iname}')
30
- assert_equal(Final::#{fname}, ts.final, 'Wrong final for `#{hanyu}`, expected Final::#{fname}')
31
- end
32
- end
33
- ]
34
- end
35
- end
36
-
37
- end
1
+ # -*- coding: utf-8 -*-
2
+ require 'test/unit'
3
+ require 'ting'
4
+ require 'yaml'
5
+
6
+ module HanyuCoverage
7
+
8
+ class Test_ParseUnparse < Test::Unit::TestCase
9
+ include Ting
10
+ def initialize(s)
11
+ super(s)
12
+ @reader = Ting.reader(:hanyu, :no_tones)
13
+ @writer = Ting.writer(:hanyu, :no_tones)
14
+ end
15
+
16
+ grid=YAML.load(File.open(File.expand_path('../../lib/ting/data/valid_pinyin.yaml', __FILE__), 'r:UTF-8').read)
17
+ grid.each do |fname, row|
18
+ row.each do |iname, hanyu|
19
+ hanyu=hanyu.force_encoding('UTF-8')
20
+ safe_hanyu = hanyu.gsub('ü','v').gsub('ê','_e')
21
+
22
+ define_method :"test_unparse_#{safe_hanyu}" do
23
+ assert_equal(
24
+ hanyu,
25
+ @writer.unparse(
26
+ Syllable.new(Initial.const_get(iname), Final.const_get(fname), Tones::NEUTRAL_TONE)
27
+ ),
28
+ "Wrong hanyu for Initial::#{iname}+Final::#{fname}, expected `#{hanyu}` "
29
+ )
30
+ end
31
+
32
+ define_method :"test_parse_#{safe_hanyu}" do
33
+ ts=@reader.parse(hanyu).first
34
+ assert_not_nil(ts, "Reader<:hanyu, :no_tone>#parse('#{hanyu}') returned nil")
35
+ assert_equal(Initial.const_get(iname), ts.initial, "Wrong initial for `#{hanyu}`, expected Initial::#{iname}")
36
+ assert_equal(Final.const_get(fname), ts.final, "Wrong final for `#{hanyu}`, expected Final::#{fname}")
37
+ end
38
+
39
+ end
40
+ end
41
+ end
42
+ end