arnebrasseur-pinyin 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,14 @@
1
+ module Pinyin
2
+ # All exceptions arising from this module inherit from Pinyin::Error
3
+ Error = Class.new StandardError
4
+
5
+ class ParseError < Error
6
+ attr_reader :input, :position
7
+
8
+ def initialize(input, position)
9
+ @input=input
10
+ @position=position
11
+ end
12
+ end
13
+ end
14
+
@@ -0,0 +1,183 @@
1
+ # Classes and constants used throughout the module
2
+ # * Initial
3
+ # * Final
4
+ # * TonelessSyllable
5
+ # * Syllable
6
+ # * ILLEGAL_COMBINATIONS
7
+
8
+ module Pinyin
9
+
10
+ #
11
+ # A Chinese initial (start of a syllable)
12
+ #
13
+
14
+ class Initial
15
+ attr :name
16
+ def initialize(n)
17
+ @name=n
18
+ end
19
+
20
+ All = %w(
21
+ Empty Bo Po Mo Fo De Te Ne Le Ge Ke He
22
+ Ji Qi Xi Zhi Chi Shi Ri Zi Ci Si
23
+ ).map{|c| const_set c, Initial.new(c)}
24
+
25
+ class <<self
26
+ private :new
27
+ end
28
+
29
+ Groups=[
30
+ Group_0=[ Empty ],
31
+ Group_1=[ Bo,Po,Mo,Fo], #Bilabial and Labio-dental
32
+ Group_2=[ De,Te,Ne,Le ], #Plosive, nasal and lateral approximant alveolar
33
+ Group_3=[ Ge,Ke,He ], #Velar
34
+ Group_4=[ Ji,Qi,Xi ], #Alveolo-palatal
35
+ Group_5=[ Zhi,Chi,Shi,Ri ], #Retroflex
36
+ Group_6=[ Zi,Ci,Si ], #Fricative and affricate alveolar
37
+ ]
38
+
39
+ def +(f)
40
+ TonelessSyllable.new(self,f)
41
+ end
42
+
43
+ def inspect()
44
+ "<#{self.class.name}::#{@name}>"
45
+ end
46
+ end
47
+
48
+ #
49
+ # A Chinese final (end of a syllable)
50
+ #
51
+
52
+ class Final
53
+ attr :name
54
+ def initialize(n)
55
+ @name=n
56
+ end
57
+
58
+ All=%w(
59
+ Empty A O E Ee Ai Ei Ao Ou An En Ang Eng Ong Er
60
+ I Ia Io Ie Iai Iao Iu Ian In Iang Ing
61
+ U Ua Uo Uai Ui Uan Un Uang Ueng V Ue Van Vn Iong
62
+ ).map{|c| const_set c, Final.new(c)}
63
+
64
+ class <<self
65
+ private :new
66
+ end
67
+
68
+ Groups=[
69
+ Group_0=[ Empty ],
70
+ Group_A=[ A,O,E,Ee,Ai,Ei,Ao,Ou,An,En,Ang,Eng,Ong,Er ],
71
+ Group_I=[ I,Ia,Io,Ie,Iai,Iao,Iu,Ian,In,Iang,Ing ],
72
+ Group_U=[ U,Ua,Uo,Uai,Ui,Uan,Un,Uang,Ueng ],
73
+ Group_V=[ V,Ue,Van,Vn,Iong]
74
+ ]
75
+ def inspect()
76
+ "<#{self.class.name}::#{name}>"
77
+ end
78
+ end
79
+
80
+
81
+ #
82
+ # Combination of an initial and a final
83
+ # Not to be confused with a syllable that has the neutral tone
84
+ #
85
+
86
+ class TonelessSyllable
87
+ attr_accessor :initial, :final
88
+
89
+ def initialize(initial, final)
90
+ self.initial = initial
91
+ self.final = final
92
+ end
93
+
94
+ def +(tone)
95
+ Syllable.new(initial, final, tone)
96
+ end
97
+
98
+ def inspect
99
+ "<#{self.class.name} <initial=#{initial.name}, final=#{final.name}>>"
100
+ end
101
+
102
+ def self.illegal?(i,f)
103
+ ILLEGAL_COMBINATIONS.any? {|in_gr, fin_gr| in_gr.include?(i) && fin_gr.include?(f)}
104
+ end
105
+
106
+ alias :to_s :inspect
107
+ end
108
+
109
+
110
+ #
111
+ # Syllable : initial, final and tone
112
+ #
113
+
114
+ class Syllable < TonelessSyllable
115
+ attr_accessor :tone
116
+
117
+ def initialize(initial, final, tone)
118
+ super(initial, final)
119
+ self.tone = tone
120
+ end
121
+
122
+ def inspect
123
+ "<#{self.class.name} <initial=#{initial.name}, final=#{final.name}, tone=#{tone}>>"
124
+ end
125
+
126
+ alias :to_s :inspect
127
+ end
128
+
129
+ #
130
+ # Some groups of initials and finals may not be combined
131
+ # This list is not exhaustive but is sufficient to resolve ambiguity
132
+ #
133
+
134
+ ILLEGAL_COMBINATIONS=
135
+ [
136
+ [Initial::Group_0, Final::Group_0],
137
+ [Initial::Group_1, Final::Group_0],
138
+ [Initial::Group_2, Final::Group_0],
139
+ [Initial::Group_3, Final::Group_0],
140
+ [Initial::Group_4, Final::Group_0],
141
+
142
+ [Initial::Group_4, Final::Group_U],
143
+ [Initial::Group_4, Final::Group_A],
144
+
145
+ [Initial::Group_3, Final::Group_I],
146
+ [Initial::Group_5, Final::Group_I],
147
+ [Initial::Group_6, Final::Group_I],
148
+
149
+ [Initial::Group_1, Final::Group_V],
150
+ [Initial::Group_3, Final::Group_V],
151
+
152
+ #2008.05.26 lo is also valid!
153
+ #[Initial::Group_2, [Final::O]], #Only bo, po, mo and fo are valid -o combinations
154
+ [Initial::Group_3, [Final::O]],
155
+ [Initial::Group_4, [Final::O]],
156
+ [Initial::Group_5, [Final::O]],
157
+ [Initial::Group_6, [Final::O]],
158
+
159
+ [[Initial::Empty], [Final::Ong]]
160
+ # TODO: Ong is actually the same as Ueng, in Hanyu Pinyin : -ong or weng
161
+ ]
162
+
163
+ class <<self
164
+
165
+
166
+ #
167
+ # Yields a block for any valid initial/final pair
168
+ #
169
+
170
+ def valid_combinations
171
+ require 'yaml'
172
+ inp = YAML::load(IO.read(File.join(File.dirname(__FILE__), 'data', 'valid_pinyin.yaml')))
173
+ inp.each do |final, initials|
174
+ final = Final.const_get(final)
175
+ initials.each do |initial, pinyin|
176
+ initial = Initial.const_get(initial)
177
+ yield(initial, final)
178
+ end
179
+ end
180
+ end
181
+ end
182
+
183
+ end
@@ -0,0 +1,16 @@
1
+ class String
2
+ PINYIN_CACHE={}
3
+ def pretty_tones
4
+ self.gsub('u:','ü').gsub(/[A-Za-züÜ]{1,5}\d/) do |m|
5
+ m.downcase!
6
+ PINYIN_CACHE[m] || PINYIN_CACHE[m]=(Pinyin.HanyuWriter(:accents) << Pinyin.HanyuReader(:numbers).parse(m.downcase))
7
+ end
8
+ end
9
+
10
+ def bpmf
11
+ self.gsub('u:','ü').scan(/[A-Za-züÜ]{1,5}\d/).map do |m|
12
+ Pinyin.ZhuyinWriter(:marks) <<
13
+ (Pinyin.HanyuReader(:numbers) << m.downcase)
14
+ end.join(' ')
15
+ end
16
+ end
@@ -0,0 +1,12 @@
1
+ class String
2
+ def chars
3
+ self.unpack('U*').map{|c| [c].pack('U')}
4
+ end
5
+ end
6
+
7
+ module Kernel
8
+ def returning(s)
9
+ yield(s)
10
+ s
11
+ end
12
+ end
@@ -0,0 +1,47 @@
1
+ module Pinyin
2
+ #
3
+ # Base class for Tone classes
4
+ #
5
+ class Tone
6
+ VALID_TONES = 1..5
7
+ MAX_TONE = NEUTRAL_TONE = 5
8
+
9
+ class <<self
10
+ def add_tone(s,t)
11
+ s
12
+ end
13
+
14
+ def peek_tone(s)
15
+ NEUTRAL_TONE
16
+ end
17
+
18
+ def pop_tone(s)
19
+ [NEUTRAL_TONE, s]
20
+ end
21
+
22
+ private
23
+ def normalize(t)
24
+ if VALID_TONES === t
25
+ t
26
+ else
27
+ t %= MAX_TONE
28
+ t = NEUTRAL_TONE if t == 0
29
+ end
30
+ end
31
+
32
+ end
33
+ end
34
+ end
35
+
36
+ require "pinyin/tones/marks"
37
+ require "pinyin/tones/numbers"
38
+ require "pinyin/tones/accents"
39
+ require "pinyin/tones/no_tones"
40
+
41
+ module Pinyin
42
+ module Tones
43
+ All = [Numbers, Marks, Accents, NoTones]
44
+ MAX_TONE = NEUTRAL_TONE = 5
45
+ VALID_TONES = 1..5
46
+ end
47
+ end
@@ -0,0 +1,62 @@
1
+ module Pinyin
2
+ module Tones
3
+ class Accents < Tone
4
+ class <<self
5
+
6
+ UNICODE_TONE_GLYPHS={
7
+ :a=>[97, 257, 225, 462, 224],
8
+ :e=>[101, 275, 233, 283, 232],
9
+ :i=>[105, 299, 237, 464, 236],
10
+ :o=>[111, 333, 243, 466, 242],
11
+ :u=>[117, 363, 250, 468, 249],
12
+ :v=>[252, 470, 472, 474, 476]
13
+ }
14
+
15
+ def tone_glyph(letter,tone)
16
+ if (u=UNICODE_TONE_GLYPHS[letter.to_sym][tone%MAX_TONE])
17
+ [u].pack('U')
18
+ end
19
+ end
20
+
21
+ def add_tone(syll, tone)
22
+ syll.gsub!('ü','v')
23
+ tone %= MAX_TONE
24
+ case syll
25
+ when /a/ : syll.sub(/a/, tone_glyph(:a,tone))
26
+ when /e/ : syll.sub(/e/, tone_glyph(:e,tone))
27
+ when /o/ : syll.sub(/o/, tone_glyph(:o,tone))
28
+ when /(i|u|v)/ : syll.sub($1, tone_glyph($1,tone))
29
+ else syll
30
+ end
31
+ end
32
+
33
+ def peek_tone(syll)
34
+ unpacked = syll.unpack('U*')
35
+ each_tone_glyph do |vowel, tones|
36
+ tone_glyph=unpacked.find {|t| tones.include?(t)}
37
+ normalize( tones.index(tone_glyph) ) if tone_glyph
38
+ end
39
+ end
40
+
41
+ def pop_tone(syll)
42
+ unpacked = syll.unpack('U*')
43
+ each_tone_glyph do |vowel, tones|
44
+ if tone_glyph = unpacked.find {|t| tones.include?(t)}
45
+ unpacked[unpacked.index(tone_glyph)]=vowel.to_s[0]
46
+ break [normalize(tones.index(tone_glyph)), unpacked.pack('U*')]
47
+ end
48
+ end
49
+ end
50
+
51
+ private
52
+ def each_tone_glyph
53
+ [:a,:e,:i,:o,:u,:v].each do |v| #Order is significant
54
+ vowel, tones = v, UNICODE_TONE_GLYPHS[v]
55
+ yield vowel,tones
56
+ end
57
+ end
58
+
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,30 @@
1
+ module Pinyin
2
+ module Tones
3
+ class Marks < Tone
4
+ class <<self
5
+
6
+ GLYPHS=['˙', '', 'ˊ', 'ˇ', 'ˋ']
7
+
8
+ def add_tone(syll,tone)
9
+ syll + GLYPHS[normalize(tone) % 5]
10
+ end
11
+
12
+ def peek_tone(syll)
13
+ case syll
14
+ when /ˊ/ : 2
15
+ when /ˇ/ : 3
16
+ when /ˋ/ : 4
17
+ when /˙/ : NEUTRAL_TONE
18
+ else
19
+ 1
20
+ end
21
+ end
22
+
23
+ def pop_tone(syll)
24
+ [ peek_tone(syll), syll[/\A[^#{GLYPHS.join}]+/] ]
25
+ end
26
+
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,6 @@
1
+ module Pinyin
2
+ module Tones
3
+ NoTones = Pinyin::Tone
4
+ end
5
+ end
6
+
@@ -0,0 +1,25 @@
1
+ module Pinyin
2
+ module Tones
3
+ class Numbers < Tone
4
+ class <<self
5
+
6
+ def add_tone(syll, tone)
7
+ syll + normalize(tone).to_s
8
+ end
9
+
10
+ def peek_tone(syll)
11
+ if syll =~ /(\d)\Z/
12
+ normalize Integer($1)
13
+ else
14
+ NEUTRAL_TONE
15
+ end
16
+ end
17
+
18
+ def pop_tone(syll)
19
+ [ peek_tone(syll), syll[/\A\D+/] ]
20
+ end
21
+
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,22 @@
1
+ require 'rubygems'
2
+ require 'hoe'
3
+
4
+ $:.unshift './lib'
5
+
6
+ require 'pinyin'
7
+
8
+ Hoe.new('pinyin', Pinyin::VERSION) do |p|
9
+ p.rubyforge_name = 'pinyin'
10
+ p.summary = 'A conversion library for Chinese transcription methods like Hanyu Pinyin, Bopomofo and Wade-Giles.'
11
+ p.description = p.paragraphs_of('README.txt', 2).join
12
+ p.url = 'http://rubyforge.org/projects/pinyin'
13
+ p.changes = p.paragraphs_of('History.txt', 0..1).join("\n\n")
14
+ p.email = 'pinyin@arnebrasseur.net'
15
+ p.author = 'Arne Brasseur'
16
+ p.extra_deps << ['facets', '>= 2.4.0']
17
+ p.spec_extras = {
18
+ :extra_rdoc_files => ['README.txt', 'History.txt'],
19
+ :rdoc_options => ['--main', 'README.txt'],
20
+ :platform => Gem::Platform::RUBY
21
+ }
22
+ end
@@ -0,0 +1,4 @@
1
+ sudo gem uninstall pinyin
2
+ rake clean
3
+ rake package
4
+ sudo gem install pkg/pinyin -y
@@ -0,0 +1,35 @@
1
+ require 'pinyin'
2
+ require 'test/unit'
3
+ require 'csv'
4
+
5
+
6
+ # This test uses the chart from piyin.info to compare all implemted conversion types
7
+ # Since I can't find another reference of the hanyu pinyin 'lo', I have removed it from the table
8
+
9
+ class TestCompare < Test::Unit::TestCase
10
+ CHART=CSV.parse(IO.read(File.dirname(__FILE__)+'/../lib/pinyin/data/comparison.csv'))
11
+ COMPARE=[:hanyu, :wadegiles, :zhuyin, :tongyong]
12
+
13
+
14
+ # Test all combinations, included parsing/unparsing the same type
15
+
16
+ def test_do_comparisons
17
+ COMPARE.each do |from|
18
+ COMPARE.each do |to|
19
+ compare(from,to)
20
+ end
21
+ end
22
+ end
23
+
24
+ def compare(from, to)
25
+ reader = Pinyin::Reader.new(from, :no_tones)
26
+ writer = Pinyin::Writer.new(to, :no_tones)
27
+
28
+ ifrom = CHART[0].index from.to_s
29
+ ito = CHART[0].index to.to_s
30
+
31
+ CHART[1..-1].each do |vals|
32
+ assert_equal(vals[ito].strip, writer << (reader << vals[ifrom].strip), "Converting from #{from} to #{to} value #{vals[ito]}")
33
+ end
34
+ end
35
+ end