pinyin 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,59 @@
1
+ module Pinyin
2
+ module Tones
3
+ class Accents
4
+
5
+ UNICODE_TONE_GLYPHS={
6
+ :a=>[97, 257, 225, 462, 224],
7
+ :e=>[101, 275, 233, 283, 232],
8
+ :i=>[105, 299, 237, 464, 236],
9
+ :o=>[111, 333, 243, 466, 242],
10
+ :u=>[117, 363, 250, 468, 249],
11
+ :v=>[252, 470, 472, 474, 476]
12
+ }
13
+
14
+ def self.tone_glyph(letter,tone)
15
+ if (u=UNICODE_TONE_GLYPHS[letter.to_sym][tone%MAX_TONE])
16
+ [u].pack('U')
17
+ end
18
+ end
19
+
20
+ def self.add_tone(syll, tone)
21
+ tone %= MAX_TONE
22
+ case syll
23
+ when /a/ : syll.sub(/a/, tone_glyph(:a,tone))
24
+ when /e/ : syll.sub(/e/, tone_glyph(:e,tone))
25
+ when /o/ : syll.sub(/o/, tone_glyph(:o,tone))
26
+ when /(i|o|u|v)\Z/ : syll.sub(Regexp.new($1),tone_glyph($1,tone))
27
+ else syll
28
+ end
29
+ end
30
+
31
+ def self.peek_tone(syll)
32
+ unpacked = syll.unpack('U*')
33
+ each_tone_glyph do |vowel, tones|
34
+ tone_glyph=unpacked.find {|t| tones.include?(t)}
35
+ Tones.normalize( tones.index(tone_glyph) ) if tone_glyph
36
+ end
37
+ end
38
+
39
+ def self.pop_tone(syll)
40
+ unpacked = syll.unpack('U*')
41
+ each_tone_glyph do |vowel, tones|
42
+ if tone_glyph = unpacked.find {|t| tones.include?(t)}
43
+ unpacked[unpacked.index(tone_glyph)]=vowel.to_s[0]
44
+ break [Tones.normalize(tones.index(tone_glyph)), unpacked.pack('U*')]
45
+ end
46
+ end
47
+ end
48
+
49
+ private
50
+ def self.each_tone_glyph
51
+ [:a,:e,:i,:o,:u].each do |v| #Order is significant
52
+ vowel, tones = v, UNICODE_TONE_GLYPHS[v]
53
+ yield vowel,tones
54
+ end
55
+ end
56
+
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,25 @@
1
+ module Pinyin
2
+ module Tones
3
+ class Marks
4
+ GLYPHS=['˙', '', 'ˊ', 'ˇ', 'ˋ']
5
+ def self.add_tone(syll,tone)
6
+ syll + GLYPHS[Tones.normalize(tone) % 5]
7
+ end
8
+
9
+ def self.peek_tone(syll)
10
+ case syll
11
+ when /ˊ/ : 2
12
+ when /ˇ/ : 3
13
+ when /ˋ/ : 4
14
+ when /˙/ : NEUTRAL_TONE
15
+ else
16
+ 1
17
+ end
18
+ end
19
+
20
+ def self.pop_tone(syll)
21
+ [ peek_tone(syll), syll[/\A[^#{GLYPHS.join}]+/] ]
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,16 @@
1
+ module Pinyin
2
+ module Tones
3
+ module NoTones
4
+ def self.add_tone(s,t)
5
+ s
6
+ end
7
+ def self.peek_tone(s)
8
+ NEUTRAL_TONE
9
+ end
10
+ def self.pop_tone(s)
11
+ [NEUTRAL_TONE, s]
12
+ end
13
+ end
14
+ end
15
+ end
16
+
@@ -0,0 +1,24 @@
1
+ module Pinyin
2
+ module Tones
3
+ class Numbers
4
+
5
+ def self.add_tone(syll, tone)
6
+ syll + Tones.normalize(tone).to_s
7
+ end
8
+
9
+
10
+ def self.peek_tone(syll)
11
+ if syll =~ /(\d)\Z/
12
+ Tones.normalize Integer($1)
13
+ else
14
+ NEUTRAL_TONE
15
+ end
16
+ end
17
+
18
+ def self.pop_tone(syll)
19
+ [ peek_tone(syll), syll[/\A\D+/] ]
20
+ end
21
+
22
+ end
23
+ end
24
+ end
data/lib/tones.rb ADDED
@@ -0,0 +1,19 @@
1
+
2
+ module Pinyin
3
+ module Tones
4
+ All=Dir[File.join(File.dirname(__FILE__),'tones','*.rb')].map{|s| s[/tones\/(.*)\.rb/,1]}
5
+ MAX_TONE = NEUTRAL_TONE = 5
6
+ VALID_TONES = 1..5
7
+ CONVERSIONS = {0 => NEUTRAL_TONE}
8
+
9
+ def self.normalize(t)
10
+ if VALID_TONES === t
11
+ t
12
+ else
13
+ t %= MAX_TONE
14
+ CONVERSIONS[t] || t
15
+ end
16
+ end
17
+ end
18
+ end
19
+
data/rakefile ADDED
@@ -0,0 +1,39 @@
1
+ require 'rake'
2
+ require 'rake/testtask'
3
+ require 'rubygems'
4
+ Gem::manage_gems
5
+ require 'rake/gempackagetask'
6
+
7
+
8
+ task :default => [:test_units]
9
+
10
+ namespace "test" do
11
+ Rake::TestTask.new("pinyin") do |t|
12
+ $: << File.dirname(__FILE__) + '/lib'
13
+ t.pattern = 'test/*.rb'
14
+ t.verbose = true
15
+ t.warning = true
16
+ end
17
+ end
18
+
19
+ spec = Gem::Specification.new do |s|
20
+ s.name = "pinyin"
21
+ s.version = "0.0.1"
22
+ s.author = "Arne Brasseur"
23
+ s.email = "pinyin at menolikespam arnebrasseur dot net"
24
+ s.homepage = "http://svn.arnebrasseur.net/pinyin"
25
+ s.platform = Gem::Platform::RUBY
26
+ s.summary = "A conversion library for Chinese transcription methods in Ruby"
27
+ s.files = FileList["**/*"]
28
+ #FileList["{bin,docs,lib,test}/**/*"].exclude("rdoc").to_a
29
+ s.require_path = "lib/"
30
+ # not sure yet what these are
31
+ # s.autorequire = "pinyin"
32
+ # s.test_file = "test/runtest.rb"
33
+ # s.has_rdoc = true
34
+ # s.extra_rdoc_files = ['README']
35
+ end
36
+
37
+ Rake::GemPackageTask.new(spec) do |pkg|
38
+ pkg.need_tar = true
39
+ end
@@ -0,0 +1,35 @@
1
+ require 'pinyin'
2
+ require 'test/unit'
3
+ require 'csv'
4
+
5
+
6
+ # This test uses the chart from piyin.info to compare all implemted conversion types
7
+ # Since I can't find another reference of the hanyu pinyin 'lo', I have removed it from the table
8
+
9
+ class TestCompare < Test::Unit::TestCase
10
+ CHART=CSV.parse(IO.read(File.dirname(__FILE__)+'/../lib/data/comparison.csv'))
11
+ COMPARE=[:hanyu, :wadegiles, :zhuyin, :tongyong]
12
+
13
+
14
+ # Test all combinations, included parsing/unparsing the same type
15
+
16
+ def test_do_comparisons
17
+ COMPARE.each do |from|
18
+ COMPARE.each do |to|
19
+ compare(from,to)
20
+ end
21
+ end
22
+ end
23
+
24
+ def compare(from, to)
25
+ reader = Pinyin::Reader.new(from, :no_tones)
26
+ writer = Pinyin::Writer.new(to, :no_tones)
27
+
28
+ ifrom = CHART[0].index from.to_s
29
+ ito = CHART[0].index to.to_s
30
+
31
+ CHART[1..-1].each do |vals|
32
+ assert_equal(vals[ito].strip, writer << (reader << vals[ifrom].strip), "Converting from #{from} to #{to} value #{vals[ito]}")
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,33 @@
1
+ require 'test/unit'
2
+ require 'pinyin.rb'
3
+ require 'yaml'
4
+ $KCODE='u'
5
+ module HanyuCoverage
6
+ grid=YAML.load(IO.read(File.dirname(__FILE__)+'/../lib/data/valid_pinyin.yaml'))
7
+ grid.each do |fname, row|
8
+ row.each do |iname, hanyu|
9
+ eval %[
10
+ class Test_#{hanyu} < Test::Unit::TestCase
11
+ include Pinyin
12
+ def initialize(s)
13
+ super(s)
14
+ @reader = Reader.new(:hanyu, :no_tones)
15
+ @writer = Writer.new(:hanyu, :no_tones)
16
+ end
17
+
18
+ def test_parse_#{hanyu}
19
+ assert_equal('#{hanyu}', @writer.unparse(Syllable.new(Initial::#{iname}, Final::#{fname}, Tones::NEUTRAL_TONE)), 'Wrong hanyu for Initial::#{iname}+Final::#{fname}, expected `#{hanyu}` ')
20
+ end
21
+
22
+ def test_unparse_#{hanyu}
23
+ ts=*@reader.parse('#{hanyu}')
24
+ assert_not_nil(ts, 'Reader<:hanyu, :no_tone>#parse("#{hanyu}") returned nil')
25
+ assert_equal(Initial::#{iname}, ts.initial, 'Wrong initial for `#{hanyu}`, expected Initial::#{iname}')
26
+ assert_equal(Final::#{fname}, ts.final, 'Wrong final for `#{hanyu}`, expected Final::#{fname}')
27
+ end
28
+ end
29
+ ]
30
+ end
31
+ end
32
+
33
+ end
metadata ADDED
@@ -0,0 +1,74 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.9.0
3
+ specification_version: 1
4
+ name: pinyin
5
+ version: !ruby/object:Gem::Version
6
+ version: 0.0.1
7
+ date: 2007-07-26 00:00:00 +02:00
8
+ summary: A conversion library for Chinese transcription methods in Ruby
9
+ require_paths:
10
+ - lib/
11
+ email: pinyin at menolikespam arnebrasseur dot net
12
+ homepage: http://svn.arnebrasseur.net/pinyin
13
+ rubyforge_project:
14
+ description:
15
+ autorequire:
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: false
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ platform: ruby
26
+ signing_key:
27
+ cert_chain:
28
+ post_install_message:
29
+ authors:
30
+ - Arne Brasseur
31
+ files:
32
+ - examples
33
+ - lib
34
+ - rakefile
35
+ - test
36
+ - TODO
37
+ - examples/cgiform
38
+ - examples/hello.rb
39
+ - examples/cgiform/cgiform.rb
40
+ - examples/cgiform/template.rhtml
41
+ - lib/conversions.rb
42
+ - lib/data
43
+ - lib/exception.rb
44
+ - lib/groundwork.rb
45
+ - lib/pinyin.rb
46
+ - lib/support.rb
47
+ - lib/tones
48
+ - lib/tones.rb
49
+ - lib/data/comparison.csv
50
+ - lib/data/final.csv
51
+ - lib/data/initial.csv
52
+ - lib/data/paladiy.txt
53
+ - lib/data/rules.yaml
54
+ - lib/data/valid_pinyin.yaml
55
+ - lib/tones/accents.rb
56
+ - lib/tones/marks.rb
57
+ - lib/tones/no_tones.rb
58
+ - lib/tones/numbers.rb
59
+ - test/comparison_test.rb
60
+ - test/hanyu_coverage.rb
61
+ test_files: []
62
+
63
+ rdoc_options: []
64
+
65
+ extra_rdoc_files: []
66
+
67
+ executables: []
68
+
69
+ extensions: []
70
+
71
+ requirements: []
72
+
73
+ dependencies: []
74
+