pinyin 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,59 @@
1
+ module Pinyin
2
+ module Tones
3
+ class Accents
4
+
5
+ UNICODE_TONE_GLYPHS={
6
+ :a=>[97, 257, 225, 462, 224],
7
+ :e=>[101, 275, 233, 283, 232],
8
+ :i=>[105, 299, 237, 464, 236],
9
+ :o=>[111, 333, 243, 466, 242],
10
+ :u=>[117, 363, 250, 468, 249],
11
+ :v=>[252, 470, 472, 474, 476]
12
+ }
13
+
14
+ def self.tone_glyph(letter,tone)
15
+ if (u=UNICODE_TONE_GLYPHS[letter.to_sym][tone%MAX_TONE])
16
+ [u].pack('U')
17
+ end
18
+ end
19
+
20
+ def self.add_tone(syll, tone)
21
+ tone %= MAX_TONE
22
+ case syll
23
+ when /a/ : syll.sub(/a/, tone_glyph(:a,tone))
24
+ when /e/ : syll.sub(/e/, tone_glyph(:e,tone))
25
+ when /o/ : syll.sub(/o/, tone_glyph(:o,tone))
26
+ when /(i|o|u|v)\Z/ : syll.sub(Regexp.new($1),tone_glyph($1,tone))
27
+ else syll
28
+ end
29
+ end
30
+
31
+ def self.peek_tone(syll)
32
+ unpacked = syll.unpack('U*')
33
+ each_tone_glyph do |vowel, tones|
34
+ tone_glyph=unpacked.find {|t| tones.include?(t)}
35
+ Tones.normalize( tones.index(tone_glyph) ) if tone_glyph
36
+ end
37
+ end
38
+
39
+ def self.pop_tone(syll)
40
+ unpacked = syll.unpack('U*')
41
+ each_tone_glyph do |vowel, tones|
42
+ if tone_glyph = unpacked.find {|t| tones.include?(t)}
43
+ unpacked[unpacked.index(tone_glyph)]=vowel.to_s[0]
44
+ break [Tones.normalize(tones.index(tone_glyph)), unpacked.pack('U*')]
45
+ end
46
+ end
47
+ end
48
+
49
+ private
50
+ def self.each_tone_glyph
51
+ [:a,:e,:i,:o,:u].each do |v| #Order is significant
52
+ vowel, tones = v, UNICODE_TONE_GLYPHS[v]
53
+ yield vowel,tones
54
+ end
55
+ end
56
+
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,25 @@
1
+ module Pinyin
2
+ module Tones
3
+ class Marks
4
+ GLYPHS=['˙', '', 'ˊ', 'ˇ', 'ˋ']
5
+ def self.add_tone(syll,tone)
6
+ syll + GLYPHS[Tones.normalize(tone) % 5]
7
+ end
8
+
9
+ def self.peek_tone(syll)
10
+ case syll
11
+ when /ˊ/ : 2
12
+ when /ˇ/ : 3
13
+ when /ˋ/ : 4
14
+ when /˙/ : NEUTRAL_TONE
15
+ else
16
+ 1
17
+ end
18
+ end
19
+
20
+ def self.pop_tone(syll)
21
+ [ peek_tone(syll), syll[/\A[^#{GLYPHS.join}]+/] ]
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,16 @@
1
+ module Pinyin
2
+ module Tones
3
+ module NoTones
4
+ def self.add_tone(s,t)
5
+ s
6
+ end
7
+ def self.peek_tone(s)
8
+ NEUTRAL_TONE
9
+ end
10
+ def self.pop_tone(s)
11
+ [NEUTRAL_TONE, s]
12
+ end
13
+ end
14
+ end
15
+ end
16
+
@@ -0,0 +1,24 @@
1
+ module Pinyin
2
+ module Tones
3
+ class Numbers
4
+
5
+ def self.add_tone(syll, tone)
6
+ syll + Tones.normalize(tone).to_s
7
+ end
8
+
9
+
10
+ def self.peek_tone(syll)
11
+ if syll =~ /(\d)\Z/
12
+ Tones.normalize Integer($1)
13
+ else
14
+ NEUTRAL_TONE
15
+ end
16
+ end
17
+
18
+ def self.pop_tone(syll)
19
+ [ peek_tone(syll), syll[/\A\D+/] ]
20
+ end
21
+
22
+ end
23
+ end
24
+ end
data/lib/tones.rb ADDED
@@ -0,0 +1,19 @@
1
+
2
+ module Pinyin
3
+ module Tones
4
+ All=Dir[File.join(File.dirname(__FILE__),'tones','*.rb')].map{|s| s[/tones\/(.*)\.rb/,1]}
5
+ MAX_TONE = NEUTRAL_TONE = 5
6
+ VALID_TONES = 1..5
7
+ CONVERSIONS = {0 => NEUTRAL_TONE}
8
+
9
+ def self.normalize(t)
10
+ if VALID_TONES === t
11
+ t
12
+ else
13
+ t %= MAX_TONE
14
+ CONVERSIONS[t] || t
15
+ end
16
+ end
17
+ end
18
+ end
19
+
data/rakefile ADDED
@@ -0,0 +1,39 @@
1
+ require 'rake'
2
+ require 'rake/testtask'
3
+ require 'rubygems'
4
+ Gem::manage_gems
5
+ require 'rake/gempackagetask'
6
+
7
+
8
+ task :default => [:test_units]
9
+
10
+ namespace "test" do
11
+ Rake::TestTask.new("pinyin") do |t|
12
+ $: << File.dirname(__FILE__) + '/lib'
13
+ t.pattern = 'test/*.rb'
14
+ t.verbose = true
15
+ t.warning = true
16
+ end
17
+ end
18
+
19
+ spec = Gem::Specification.new do |s|
20
+ s.name = "pinyin"
21
+ s.version = "0.0.1"
22
+ s.author = "Arne Brasseur"
23
+ s.email = "pinyin at menolikespam arnebrasseur dot net"
24
+ s.homepage = "http://svn.arnebrasseur.net/pinyin"
25
+ s.platform = Gem::Platform::RUBY
26
+ s.summary = "A conversion library for Chinese transcription methods in Ruby"
27
+ s.files = FileList["**/*"]
28
+ #FileList["{bin,docs,lib,test}/**/*"].exclude("rdoc").to_a
29
+ s.require_path = "lib/"
30
+ # not sure yet what these are
31
+ # s.autorequire = "pinyin"
32
+ # s.test_file = "test/runtest.rb"
33
+ # s.has_rdoc = true
34
+ # s.extra_rdoc_files = ['README']
35
+ end
36
+
37
+ Rake::GemPackageTask.new(spec) do |pkg|
38
+ pkg.need_tar = true
39
+ end
@@ -0,0 +1,35 @@
1
+ require 'pinyin'
2
+ require 'test/unit'
3
+ require 'csv'
4
+
5
+
6
+ # This test uses the chart from piyin.info to compare all implemted conversion types
7
+ # Since I can't find another reference of the hanyu pinyin 'lo', I have removed it from the table
8
+
9
+ class TestCompare < Test::Unit::TestCase
10
+ CHART=CSV.parse(IO.read(File.dirname(__FILE__)+'/../lib/data/comparison.csv'))
11
+ COMPARE=[:hanyu, :wadegiles, :zhuyin, :tongyong]
12
+
13
+
14
+ # Test all combinations, included parsing/unparsing the same type
15
+
16
+ def test_do_comparisons
17
+ COMPARE.each do |from|
18
+ COMPARE.each do |to|
19
+ compare(from,to)
20
+ end
21
+ end
22
+ end
23
+
24
+ def compare(from, to)
25
+ reader = Pinyin::Reader.new(from, :no_tones)
26
+ writer = Pinyin::Writer.new(to, :no_tones)
27
+
28
+ ifrom = CHART[0].index from.to_s
29
+ ito = CHART[0].index to.to_s
30
+
31
+ CHART[1..-1].each do |vals|
32
+ assert_equal(vals[ito].strip, writer << (reader << vals[ifrom].strip), "Converting from #{from} to #{to} value #{vals[ito]}")
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,33 @@
1
+ require 'test/unit'
2
+ require 'pinyin.rb'
3
+ require 'yaml'
4
+ $KCODE='u'
5
+ module HanyuCoverage
6
+ grid=YAML.load(IO.read(File.dirname(__FILE__)+'/../lib/data/valid_pinyin.yaml'))
7
+ grid.each do |fname, row|
8
+ row.each do |iname, hanyu|
9
+ eval %[
10
+ class Test_#{hanyu} < Test::Unit::TestCase
11
+ include Pinyin
12
+ def initialize(s)
13
+ super(s)
14
+ @reader = Reader.new(:hanyu, :no_tones)
15
+ @writer = Writer.new(:hanyu, :no_tones)
16
+ end
17
+
18
+ def test_parse_#{hanyu}
19
+ assert_equal('#{hanyu}', @writer.unparse(Syllable.new(Initial::#{iname}, Final::#{fname}, Tones::NEUTRAL_TONE)), 'Wrong hanyu for Initial::#{iname}+Final::#{fname}, expected `#{hanyu}` ')
20
+ end
21
+
22
+ def test_unparse_#{hanyu}
23
+ ts=*@reader.parse('#{hanyu}')
24
+ assert_not_nil(ts, 'Reader<:hanyu, :no_tone>#parse("#{hanyu}") returned nil')
25
+ assert_equal(Initial::#{iname}, ts.initial, 'Wrong initial for `#{hanyu}`, expected Initial::#{iname}')
26
+ assert_equal(Final::#{fname}, ts.final, 'Wrong final for `#{hanyu}`, expected Final::#{fname}')
27
+ end
28
+ end
29
+ ]
30
+ end
31
+ end
32
+
33
+ end
metadata ADDED
@@ -0,0 +1,74 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.9.0
3
+ specification_version: 1
4
+ name: pinyin
5
+ version: !ruby/object:Gem::Version
6
+ version: 0.0.1
7
+ date: 2007-07-26 00:00:00 +02:00
8
+ summary: A conversion library for Chinese transcription methods in Ruby
9
+ require_paths:
10
+ - lib/
11
+ email: pinyin at menolikespam arnebrasseur dot net
12
+ homepage: http://svn.arnebrasseur.net/pinyin
13
+ rubyforge_project:
14
+ description:
15
+ autorequire:
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: false
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ platform: ruby
26
+ signing_key:
27
+ cert_chain:
28
+ post_install_message:
29
+ authors:
30
+ - Arne Brasseur
31
+ files:
32
+ - examples
33
+ - lib
34
+ - rakefile
35
+ - test
36
+ - TODO
37
+ - examples/cgiform
38
+ - examples/hello.rb
39
+ - examples/cgiform/cgiform.rb
40
+ - examples/cgiform/template.rhtml
41
+ - lib/conversions.rb
42
+ - lib/data
43
+ - lib/exception.rb
44
+ - lib/groundwork.rb
45
+ - lib/pinyin.rb
46
+ - lib/support.rb
47
+ - lib/tones
48
+ - lib/tones.rb
49
+ - lib/data/comparison.csv
50
+ - lib/data/final.csv
51
+ - lib/data/initial.csv
52
+ - lib/data/paladiy.txt
53
+ - lib/data/rules.yaml
54
+ - lib/data/valid_pinyin.yaml
55
+ - lib/tones/accents.rb
56
+ - lib/tones/marks.rb
57
+ - lib/tones/no_tones.rb
58
+ - lib/tones/numbers.rb
59
+ - test/comparison_test.rb
60
+ - test/hanyu_coverage.rb
61
+ test_files: []
62
+
63
+ rdoc_options: []
64
+
65
+ extra_rdoc_files: []
66
+
67
+ executables: []
68
+
69
+ extensions: []
70
+
71
+ requirements: []
72
+
73
+ dependencies: []
74
+