pinyin 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/TODO +22 -0
- data/examples/cgiform/cgiform.rb +24 -0
- data/examples/cgiform/template.rhtml +69 -0
- data/examples/hello.rb +12 -0
- data/lib/conversions.rb +74 -0
- data/lib/data/comparison.csv +410 -0
- data/lib/data/final.csv +10 -0
- data/lib/data/initial.csv +7 -0
- data/lib/data/paladiy.txt +421 -0
- data/lib/data/rules.yaml +24 -0
- data/lib/data/valid_pinyin.yaml +455 -0
- data/lib/exception.rb +14 -0
- data/lib/groundwork.rb +148 -0
- data/lib/pinyin.rb +71 -0
- data/lib/support.rb +16 -0
- data/lib/tones/accents.rb +59 -0
- data/lib/tones/marks.rb +25 -0
- data/lib/tones/no_tones.rb +16 -0
- data/lib/tones/numbers.rb +24 -0
- data/lib/tones.rb +19 -0
- data/rakefile +39 -0
- data/test/comparison_test.rb +35 -0
- data/test/hanyu_coverage.rb +33 -0
- metadata +74 -0
@@ -0,0 +1,59 @@
|
|
1
|
+
module Pinyin
|
2
|
+
module Tones
|
3
|
+
class Accents
|
4
|
+
|
5
|
+
UNICODE_TONE_GLYPHS={
|
6
|
+
:a=>[97, 257, 225, 462, 224],
|
7
|
+
:e=>[101, 275, 233, 283, 232],
|
8
|
+
:i=>[105, 299, 237, 464, 236],
|
9
|
+
:o=>[111, 333, 243, 466, 242],
|
10
|
+
:u=>[117, 363, 250, 468, 249],
|
11
|
+
:v=>[252, 470, 472, 474, 476]
|
12
|
+
}
|
13
|
+
|
14
|
+
def self.tone_glyph(letter,tone)
|
15
|
+
if (u=UNICODE_TONE_GLYPHS[letter.to_sym][tone%MAX_TONE])
|
16
|
+
[u].pack('U')
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.add_tone(syll, tone)
|
21
|
+
tone %= MAX_TONE
|
22
|
+
case syll
|
23
|
+
when /a/ : syll.sub(/a/, tone_glyph(:a,tone))
|
24
|
+
when /e/ : syll.sub(/e/, tone_glyph(:e,tone))
|
25
|
+
when /o/ : syll.sub(/o/, tone_glyph(:o,tone))
|
26
|
+
when /(i|o|u|v)\Z/ : syll.sub(Regexp.new($1),tone_glyph($1,tone))
|
27
|
+
else syll
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.peek_tone(syll)
|
32
|
+
unpacked = syll.unpack('U*')
|
33
|
+
each_tone_glyph do |vowel, tones|
|
34
|
+
tone_glyph=unpacked.find {|t| tones.include?(t)}
|
35
|
+
Tones.normalize( tones.index(tone_glyph) ) if tone_glyph
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.pop_tone(syll)
|
40
|
+
unpacked = syll.unpack('U*')
|
41
|
+
each_tone_glyph do |vowel, tones|
|
42
|
+
if tone_glyph = unpacked.find {|t| tones.include?(t)}
|
43
|
+
unpacked[unpacked.index(tone_glyph)]=vowel.to_s[0]
|
44
|
+
break [Tones.normalize(tones.index(tone_glyph)), unpacked.pack('U*')]
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
def self.each_tone_glyph
|
51
|
+
[:a,:e,:i,:o,:u].each do |v| #Order is significant
|
52
|
+
vowel, tones = v, UNICODE_TONE_GLYPHS[v]
|
53
|
+
yield vowel,tones
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
data/lib/tones/marks.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
module Pinyin
|
2
|
+
module Tones
|
3
|
+
class Marks
|
4
|
+
GLYPHS=['˙', '', 'ˊ', 'ˇ', 'ˋ']
|
5
|
+
def self.add_tone(syll,tone)
|
6
|
+
syll + GLYPHS[Tones.normalize(tone) % 5]
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.peek_tone(syll)
|
10
|
+
case syll
|
11
|
+
when /ˊ/ : 2
|
12
|
+
when /ˇ/ : 3
|
13
|
+
when /ˋ/ : 4
|
14
|
+
when /˙/ : NEUTRAL_TONE
|
15
|
+
else
|
16
|
+
1
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.pop_tone(syll)
|
21
|
+
[ peek_tone(syll), syll[/\A[^#{GLYPHS.join}]+/] ]
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Pinyin
|
2
|
+
module Tones
|
3
|
+
class Numbers
|
4
|
+
|
5
|
+
def self.add_tone(syll, tone)
|
6
|
+
syll + Tones.normalize(tone).to_s
|
7
|
+
end
|
8
|
+
|
9
|
+
|
10
|
+
def self.peek_tone(syll)
|
11
|
+
if syll =~ /(\d)\Z/
|
12
|
+
Tones.normalize Integer($1)
|
13
|
+
else
|
14
|
+
NEUTRAL_TONE
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.pop_tone(syll)
|
19
|
+
[ peek_tone(syll), syll[/\A\D+/] ]
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
data/lib/tones.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
|
2
|
+
module Pinyin
|
3
|
+
module Tones
|
4
|
+
All=Dir[File.join(File.dirname(__FILE__),'tones','*.rb')].map{|s| s[/tones\/(.*)\.rb/,1]}
|
5
|
+
MAX_TONE = NEUTRAL_TONE = 5
|
6
|
+
VALID_TONES = 1..5
|
7
|
+
CONVERSIONS = {0 => NEUTRAL_TONE}
|
8
|
+
|
9
|
+
def self.normalize(t)
|
10
|
+
if VALID_TONES === t
|
11
|
+
t
|
12
|
+
else
|
13
|
+
t %= MAX_TONE
|
14
|
+
CONVERSIONS[t] || t
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
data/rakefile
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rake/testtask'
|
3
|
+
require 'rubygems'
|
4
|
+
Gem::manage_gems
|
5
|
+
require 'rake/gempackagetask'
|
6
|
+
|
7
|
+
|
8
|
+
task :default => [:test_units]
|
9
|
+
|
10
|
+
namespace "test" do
|
11
|
+
Rake::TestTask.new("pinyin") do |t|
|
12
|
+
$: << File.dirname(__FILE__) + '/lib'
|
13
|
+
t.pattern = 'test/*.rb'
|
14
|
+
t.verbose = true
|
15
|
+
t.warning = true
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
spec = Gem::Specification.new do |s|
|
20
|
+
s.name = "pinyin"
|
21
|
+
s.version = "0.0.1"
|
22
|
+
s.author = "Arne Brasseur"
|
23
|
+
s.email = "pinyin at menolikespam arnebrasseur dot net"
|
24
|
+
s.homepage = "http://svn.arnebrasseur.net/pinyin"
|
25
|
+
s.platform = Gem::Platform::RUBY
|
26
|
+
s.summary = "A conversion library for Chinese transcription methods in Ruby"
|
27
|
+
s.files = FileList["**/*"]
|
28
|
+
#FileList["{bin,docs,lib,test}/**/*"].exclude("rdoc").to_a
|
29
|
+
s.require_path = "lib/"
|
30
|
+
# not sure yet what these are
|
31
|
+
# s.autorequire = "pinyin"
|
32
|
+
# s.test_file = "test/runtest.rb"
|
33
|
+
# s.has_rdoc = true
|
34
|
+
# s.extra_rdoc_files = ['README']
|
35
|
+
end
|
36
|
+
|
37
|
+
Rake::GemPackageTask.new(spec) do |pkg|
|
38
|
+
pkg.need_tar = true
|
39
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'pinyin'
|
2
|
+
require 'test/unit'
|
3
|
+
require 'csv'
|
4
|
+
|
5
|
+
|
6
|
+
# This test uses the chart from piyin.info to compare all implemted conversion types
|
7
|
+
# Since I can't find another reference of the hanyu pinyin 'lo', I have removed it from the table
|
8
|
+
|
9
|
+
class TestCompare < Test::Unit::TestCase
|
10
|
+
CHART=CSV.parse(IO.read(File.dirname(__FILE__)+'/../lib/data/comparison.csv'))
|
11
|
+
COMPARE=[:hanyu, :wadegiles, :zhuyin, :tongyong]
|
12
|
+
|
13
|
+
|
14
|
+
# Test all combinations, included parsing/unparsing the same type
|
15
|
+
|
16
|
+
def test_do_comparisons
|
17
|
+
COMPARE.each do |from|
|
18
|
+
COMPARE.each do |to|
|
19
|
+
compare(from,to)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def compare(from, to)
|
25
|
+
reader = Pinyin::Reader.new(from, :no_tones)
|
26
|
+
writer = Pinyin::Writer.new(to, :no_tones)
|
27
|
+
|
28
|
+
ifrom = CHART[0].index from.to_s
|
29
|
+
ito = CHART[0].index to.to_s
|
30
|
+
|
31
|
+
CHART[1..-1].each do |vals|
|
32
|
+
assert_equal(vals[ito].strip, writer << (reader << vals[ifrom].strip), "Converting from #{from} to #{to} value #{vals[ito]}")
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'pinyin.rb'
|
3
|
+
require 'yaml'
|
4
|
+
$KCODE='u'
|
5
|
+
module HanyuCoverage
|
6
|
+
grid=YAML.load(IO.read(File.dirname(__FILE__)+'/../lib/data/valid_pinyin.yaml'))
|
7
|
+
grid.each do |fname, row|
|
8
|
+
row.each do |iname, hanyu|
|
9
|
+
eval %[
|
10
|
+
class Test_#{hanyu} < Test::Unit::TestCase
|
11
|
+
include Pinyin
|
12
|
+
def initialize(s)
|
13
|
+
super(s)
|
14
|
+
@reader = Reader.new(:hanyu, :no_tones)
|
15
|
+
@writer = Writer.new(:hanyu, :no_tones)
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_parse_#{hanyu}
|
19
|
+
assert_equal('#{hanyu}', @writer.unparse(Syllable.new(Initial::#{iname}, Final::#{fname}, Tones::NEUTRAL_TONE)), 'Wrong hanyu for Initial::#{iname}+Final::#{fname}, expected `#{hanyu}` ')
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_unparse_#{hanyu}
|
23
|
+
ts=*@reader.parse('#{hanyu}')
|
24
|
+
assert_not_nil(ts, 'Reader<:hanyu, :no_tone>#parse("#{hanyu}") returned nil')
|
25
|
+
assert_equal(Initial::#{iname}, ts.initial, 'Wrong initial for `#{hanyu}`, expected Initial::#{iname}')
|
26
|
+
assert_equal(Final::#{fname}, ts.final, 'Wrong final for `#{hanyu}`, expected Final::#{fname}')
|
27
|
+
end
|
28
|
+
end
|
29
|
+
]
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
metadata
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.9.0
|
3
|
+
specification_version: 1
|
4
|
+
name: pinyin
|
5
|
+
version: !ruby/object:Gem::Version
|
6
|
+
version: 0.0.1
|
7
|
+
date: 2007-07-26 00:00:00 +02:00
|
8
|
+
summary: A conversion library for Chinese transcription methods in Ruby
|
9
|
+
require_paths:
|
10
|
+
- lib/
|
11
|
+
email: pinyin at menolikespam arnebrasseur dot net
|
12
|
+
homepage: http://svn.arnebrasseur.net/pinyin
|
13
|
+
rubyforge_project:
|
14
|
+
description:
|
15
|
+
autorequire:
|
16
|
+
default_executable:
|
17
|
+
bindir: bin
|
18
|
+
has_rdoc: false
|
19
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">"
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.0.0
|
24
|
+
version:
|
25
|
+
platform: ruby
|
26
|
+
signing_key:
|
27
|
+
cert_chain:
|
28
|
+
post_install_message:
|
29
|
+
authors:
|
30
|
+
- Arne Brasseur
|
31
|
+
files:
|
32
|
+
- examples
|
33
|
+
- lib
|
34
|
+
- rakefile
|
35
|
+
- test
|
36
|
+
- TODO
|
37
|
+
- examples/cgiform
|
38
|
+
- examples/hello.rb
|
39
|
+
- examples/cgiform/cgiform.rb
|
40
|
+
- examples/cgiform/template.rhtml
|
41
|
+
- lib/conversions.rb
|
42
|
+
- lib/data
|
43
|
+
- lib/exception.rb
|
44
|
+
- lib/groundwork.rb
|
45
|
+
- lib/pinyin.rb
|
46
|
+
- lib/support.rb
|
47
|
+
- lib/tones
|
48
|
+
- lib/tones.rb
|
49
|
+
- lib/data/comparison.csv
|
50
|
+
- lib/data/final.csv
|
51
|
+
- lib/data/initial.csv
|
52
|
+
- lib/data/paladiy.txt
|
53
|
+
- lib/data/rules.yaml
|
54
|
+
- lib/data/valid_pinyin.yaml
|
55
|
+
- lib/tones/accents.rb
|
56
|
+
- lib/tones/marks.rb
|
57
|
+
- lib/tones/no_tones.rb
|
58
|
+
- lib/tones/numbers.rb
|
59
|
+
- test/comparison_test.rb
|
60
|
+
- test/hanyu_coverage.rb
|
61
|
+
test_files: []
|
62
|
+
|
63
|
+
rdoc_options: []
|
64
|
+
|
65
|
+
extra_rdoc_files: []
|
66
|
+
|
67
|
+
executables: []
|
68
|
+
|
69
|
+
extensions: []
|
70
|
+
|
71
|
+
requirements: []
|
72
|
+
|
73
|
+
dependencies: []
|
74
|
+
|