pinyin 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/TODO +22 -0
- data/examples/cgiform/cgiform.rb +24 -0
- data/examples/cgiform/template.rhtml +69 -0
- data/examples/hello.rb +12 -0
- data/lib/conversions.rb +74 -0
- data/lib/data/comparison.csv +410 -0
- data/lib/data/final.csv +10 -0
- data/lib/data/initial.csv +7 -0
- data/lib/data/paladiy.txt +421 -0
- data/lib/data/rules.yaml +24 -0
- data/lib/data/valid_pinyin.yaml +455 -0
- data/lib/exception.rb +14 -0
- data/lib/groundwork.rb +148 -0
- data/lib/pinyin.rb +71 -0
- data/lib/support.rb +16 -0
- data/lib/tones/accents.rb +59 -0
- data/lib/tones/marks.rb +25 -0
- data/lib/tones/no_tones.rb +16 -0
- data/lib/tones/numbers.rb +24 -0
- data/lib/tones.rb +19 -0
- data/rakefile +39 -0
- data/test/comparison_test.rb +35 -0
- data/test/hanyu_coverage.rb +33 -0
- metadata +74 -0
@@ -0,0 +1,59 @@
|
|
1
|
+
module Pinyin
|
2
|
+
module Tones
|
3
|
+
class Accents
|
4
|
+
|
5
|
+
UNICODE_TONE_GLYPHS={
|
6
|
+
:a=>[97, 257, 225, 462, 224],
|
7
|
+
:e=>[101, 275, 233, 283, 232],
|
8
|
+
:i=>[105, 299, 237, 464, 236],
|
9
|
+
:o=>[111, 333, 243, 466, 242],
|
10
|
+
:u=>[117, 363, 250, 468, 249],
|
11
|
+
:v=>[252, 470, 472, 474, 476]
|
12
|
+
}
|
13
|
+
|
14
|
+
def self.tone_glyph(letter,tone)
|
15
|
+
if (u=UNICODE_TONE_GLYPHS[letter.to_sym][tone%MAX_TONE])
|
16
|
+
[u].pack('U')
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.add_tone(syll, tone)
|
21
|
+
tone %= MAX_TONE
|
22
|
+
case syll
|
23
|
+
when /a/ : syll.sub(/a/, tone_glyph(:a,tone))
|
24
|
+
when /e/ : syll.sub(/e/, tone_glyph(:e,tone))
|
25
|
+
when /o/ : syll.sub(/o/, tone_glyph(:o,tone))
|
26
|
+
when /(i|o|u|v)\Z/ : syll.sub(Regexp.new($1),tone_glyph($1,tone))
|
27
|
+
else syll
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.peek_tone(syll)
|
32
|
+
unpacked = syll.unpack('U*')
|
33
|
+
each_tone_glyph do |vowel, tones|
|
34
|
+
tone_glyph=unpacked.find {|t| tones.include?(t)}
|
35
|
+
Tones.normalize( tones.index(tone_glyph) ) if tone_glyph
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.pop_tone(syll)
|
40
|
+
unpacked = syll.unpack('U*')
|
41
|
+
each_tone_glyph do |vowel, tones|
|
42
|
+
if tone_glyph = unpacked.find {|t| tones.include?(t)}
|
43
|
+
unpacked[unpacked.index(tone_glyph)]=vowel.to_s[0]
|
44
|
+
break [Tones.normalize(tones.index(tone_glyph)), unpacked.pack('U*')]
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
def self.each_tone_glyph
|
51
|
+
[:a,:e,:i,:o,:u].each do |v| #Order is significant
|
52
|
+
vowel, tones = v, UNICODE_TONE_GLYPHS[v]
|
53
|
+
yield vowel,tones
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
data/lib/tones/marks.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
module Pinyin
|
2
|
+
module Tones
|
3
|
+
class Marks
|
4
|
+
GLYPHS=['˙', '', 'ˊ', 'ˇ', 'ˋ']
|
5
|
+
def self.add_tone(syll,tone)
|
6
|
+
syll + GLYPHS[Tones.normalize(tone) % 5]
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.peek_tone(syll)
|
10
|
+
case syll
|
11
|
+
when /ˊ/ : 2
|
12
|
+
when /ˇ/ : 3
|
13
|
+
when /ˋ/ : 4
|
14
|
+
when /˙/ : NEUTRAL_TONE
|
15
|
+
else
|
16
|
+
1
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.pop_tone(syll)
|
21
|
+
[ peek_tone(syll), syll[/\A[^#{GLYPHS.join}]+/] ]
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Pinyin
|
2
|
+
module Tones
|
3
|
+
class Numbers
|
4
|
+
|
5
|
+
def self.add_tone(syll, tone)
|
6
|
+
syll + Tones.normalize(tone).to_s
|
7
|
+
end
|
8
|
+
|
9
|
+
|
10
|
+
def self.peek_tone(syll)
|
11
|
+
if syll =~ /(\d)\Z/
|
12
|
+
Tones.normalize Integer($1)
|
13
|
+
else
|
14
|
+
NEUTRAL_TONE
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.pop_tone(syll)
|
19
|
+
[ peek_tone(syll), syll[/\A\D+/] ]
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
data/lib/tones.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
|
2
|
+
module Pinyin
|
3
|
+
module Tones
|
4
|
+
All=Dir[File.join(File.dirname(__FILE__),'tones','*.rb')].map{|s| s[/tones\/(.*)\.rb/,1]}
|
5
|
+
MAX_TONE = NEUTRAL_TONE = 5
|
6
|
+
VALID_TONES = 1..5
|
7
|
+
CONVERSIONS = {0 => NEUTRAL_TONE}
|
8
|
+
|
9
|
+
def self.normalize(t)
|
10
|
+
if VALID_TONES === t
|
11
|
+
t
|
12
|
+
else
|
13
|
+
t %= MAX_TONE
|
14
|
+
CONVERSIONS[t] || t
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
data/rakefile
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rake/testtask'
|
3
|
+
require 'rubygems'
|
4
|
+
Gem::manage_gems
|
5
|
+
require 'rake/gempackagetask'
|
6
|
+
|
7
|
+
|
8
|
+
task :default => [:test_units]
|
9
|
+
|
10
|
+
namespace "test" do
|
11
|
+
Rake::TestTask.new("pinyin") do |t|
|
12
|
+
$: << File.dirname(__FILE__) + '/lib'
|
13
|
+
t.pattern = 'test/*.rb'
|
14
|
+
t.verbose = true
|
15
|
+
t.warning = true
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
spec = Gem::Specification.new do |s|
|
20
|
+
s.name = "pinyin"
|
21
|
+
s.version = "0.0.1"
|
22
|
+
s.author = "Arne Brasseur"
|
23
|
+
s.email = "pinyin at menolikespam arnebrasseur dot net"
|
24
|
+
s.homepage = "http://svn.arnebrasseur.net/pinyin"
|
25
|
+
s.platform = Gem::Platform::RUBY
|
26
|
+
s.summary = "A conversion library for Chinese transcription methods in Ruby"
|
27
|
+
s.files = FileList["**/*"]
|
28
|
+
#FileList["{bin,docs,lib,test}/**/*"].exclude("rdoc").to_a
|
29
|
+
s.require_path = "lib/"
|
30
|
+
# not sure yet what these are
|
31
|
+
# s.autorequire = "pinyin"
|
32
|
+
# s.test_file = "test/runtest.rb"
|
33
|
+
# s.has_rdoc = true
|
34
|
+
# s.extra_rdoc_files = ['README']
|
35
|
+
end
|
36
|
+
|
37
|
+
Rake::GemPackageTask.new(spec) do |pkg|
|
38
|
+
pkg.need_tar = true
|
39
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'pinyin'
|
2
|
+
require 'test/unit'
|
3
|
+
require 'csv'
|
4
|
+
|
5
|
+
|
6
|
+
# This test uses the chart from piyin.info to compare all implemted conversion types
|
7
|
+
# Since I can't find another reference of the hanyu pinyin 'lo', I have removed it from the table
|
8
|
+
|
9
|
+
class TestCompare < Test::Unit::TestCase
|
10
|
+
CHART=CSV.parse(IO.read(File.dirname(__FILE__)+'/../lib/data/comparison.csv'))
|
11
|
+
COMPARE=[:hanyu, :wadegiles, :zhuyin, :tongyong]
|
12
|
+
|
13
|
+
|
14
|
+
# Test all combinations, included parsing/unparsing the same type
|
15
|
+
|
16
|
+
def test_do_comparisons
|
17
|
+
COMPARE.each do |from|
|
18
|
+
COMPARE.each do |to|
|
19
|
+
compare(from,to)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def compare(from, to)
|
25
|
+
reader = Pinyin::Reader.new(from, :no_tones)
|
26
|
+
writer = Pinyin::Writer.new(to, :no_tones)
|
27
|
+
|
28
|
+
ifrom = CHART[0].index from.to_s
|
29
|
+
ito = CHART[0].index to.to_s
|
30
|
+
|
31
|
+
CHART[1..-1].each do |vals|
|
32
|
+
assert_equal(vals[ito].strip, writer << (reader << vals[ifrom].strip), "Converting from #{from} to #{to} value #{vals[ito]}")
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'pinyin.rb'
|
3
|
+
require 'yaml'
|
4
|
+
$KCODE='u'
|
5
|
+
module HanyuCoverage
|
6
|
+
grid=YAML.load(IO.read(File.dirname(__FILE__)+'/../lib/data/valid_pinyin.yaml'))
|
7
|
+
grid.each do |fname, row|
|
8
|
+
row.each do |iname, hanyu|
|
9
|
+
eval %[
|
10
|
+
class Test_#{hanyu} < Test::Unit::TestCase
|
11
|
+
include Pinyin
|
12
|
+
def initialize(s)
|
13
|
+
super(s)
|
14
|
+
@reader = Reader.new(:hanyu, :no_tones)
|
15
|
+
@writer = Writer.new(:hanyu, :no_tones)
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_parse_#{hanyu}
|
19
|
+
assert_equal('#{hanyu}', @writer.unparse(Syllable.new(Initial::#{iname}, Final::#{fname}, Tones::NEUTRAL_TONE)), 'Wrong hanyu for Initial::#{iname}+Final::#{fname}, expected `#{hanyu}` ')
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_unparse_#{hanyu}
|
23
|
+
ts=*@reader.parse('#{hanyu}')
|
24
|
+
assert_not_nil(ts, 'Reader<:hanyu, :no_tone>#parse("#{hanyu}") returned nil')
|
25
|
+
assert_equal(Initial::#{iname}, ts.initial, 'Wrong initial for `#{hanyu}`, expected Initial::#{iname}')
|
26
|
+
assert_equal(Final::#{fname}, ts.final, 'Wrong final for `#{hanyu}`, expected Final::#{fname}')
|
27
|
+
end
|
28
|
+
end
|
29
|
+
]
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
metadata
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.9.0
|
3
|
+
specification_version: 1
|
4
|
+
name: pinyin
|
5
|
+
version: !ruby/object:Gem::Version
|
6
|
+
version: 0.0.1
|
7
|
+
date: 2007-07-26 00:00:00 +02:00
|
8
|
+
summary: A conversion library for Chinese transcription methods in Ruby
|
9
|
+
require_paths:
|
10
|
+
- lib/
|
11
|
+
email: pinyin at menolikespam arnebrasseur dot net
|
12
|
+
homepage: http://svn.arnebrasseur.net/pinyin
|
13
|
+
rubyforge_project:
|
14
|
+
description:
|
15
|
+
autorequire:
|
16
|
+
default_executable:
|
17
|
+
bindir: bin
|
18
|
+
has_rdoc: false
|
19
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">"
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.0.0
|
24
|
+
version:
|
25
|
+
platform: ruby
|
26
|
+
signing_key:
|
27
|
+
cert_chain:
|
28
|
+
post_install_message:
|
29
|
+
authors:
|
30
|
+
- Arne Brasseur
|
31
|
+
files:
|
32
|
+
- examples
|
33
|
+
- lib
|
34
|
+
- rakefile
|
35
|
+
- test
|
36
|
+
- TODO
|
37
|
+
- examples/cgiform
|
38
|
+
- examples/hello.rb
|
39
|
+
- examples/cgiform/cgiform.rb
|
40
|
+
- examples/cgiform/template.rhtml
|
41
|
+
- lib/conversions.rb
|
42
|
+
- lib/data
|
43
|
+
- lib/exception.rb
|
44
|
+
- lib/groundwork.rb
|
45
|
+
- lib/pinyin.rb
|
46
|
+
- lib/support.rb
|
47
|
+
- lib/tones
|
48
|
+
- lib/tones.rb
|
49
|
+
- lib/data/comparison.csv
|
50
|
+
- lib/data/final.csv
|
51
|
+
- lib/data/initial.csv
|
52
|
+
- lib/data/paladiy.txt
|
53
|
+
- lib/data/rules.yaml
|
54
|
+
- lib/data/valid_pinyin.yaml
|
55
|
+
- lib/tones/accents.rb
|
56
|
+
- lib/tones/marks.rb
|
57
|
+
- lib/tones/no_tones.rb
|
58
|
+
- lib/tones/numbers.rb
|
59
|
+
- test/comparison_test.rb
|
60
|
+
- test/hanyu_coverage.rb
|
61
|
+
test_files: []
|
62
|
+
|
63
|
+
rdoc_options: []
|
64
|
+
|
65
|
+
extra_rdoc_files: []
|
66
|
+
|
67
|
+
executables: []
|
68
|
+
|
69
|
+
extensions: []
|
70
|
+
|
71
|
+
requirements: []
|
72
|
+
|
73
|
+
dependencies: []
|
74
|
+
|