pinyin 0.0.1 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +12 -0
- data/Manifest.txt +31 -0
- data/README.txt +725 -0
- data/Rakefile +36 -0
- data/TODO +2 -1
- data/examples/cgiform/cgiform.rb +0 -0
- data/examples/cgiform/template.rhtml +0 -0
- data/examples/hello.rb +0 -0
- data/lib/pinyin.rb +31 -12
- data/lib/pinyin/conversion.rb +51 -0
- data/lib/{conversions.rb → pinyin/conversions.rb} +7 -6
- data/lib/pinyin/conversions/hanyu.rb +77 -0
- data/lib/{data → pinyin/data}/comparison.csv +0 -0
- data/lib/{data → pinyin/data}/final.csv +0 -0
- data/lib/{data → pinyin/data}/initial.csv +0 -0
- data/lib/{data → pinyin/data}/paladiy.txt +0 -0
- data/lib/{data → pinyin/data}/rules.yaml +0 -0
- data/lib/{data → pinyin/data}/valid_pinyin.yaml +2 -3
- data/lib/{exception.rb → pinyin/exception.rb} +0 -0
- data/lib/{groundwork.rb → pinyin/groundwork.rb} +40 -5
- data/lib/pinyin/string.rb +14 -0
- data/lib/{support.rb → pinyin/support.rb} +1 -5
- data/lib/pinyin/tones.rb +47 -0
- data/lib/{tones → pinyin/tones}/accents.rb +14 -11
- data/lib/{tones → pinyin/tones}/marks.rb +10 -5
- data/lib/pinyin/tones/no_tones.rb +6 -0
- data/lib/pinyin/tones/numbers.rb +25 -0
- data/rakefile +17 -34
- data/script/update +4 -0
- data/test/{comparison_test.rb → test_comparison.rb} +1 -1
- data/test/{hanyu_coverage.rb → test_hanyu_coverage.rb} +3 -1
- metadata +92 -61
- data/lib/tones.rb +0 -19
- data/lib/tones/no_tones.rb +0 -16
- data/lib/tones/numbers.rb +0 -24
data/lib/pinyin/tones.rb
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
module Pinyin
|
2
|
+
#
|
3
|
+
# Base class for Tone classes
|
4
|
+
#
|
5
|
+
class Tone
|
6
|
+
VALID_TONES = 1..5
|
7
|
+
MAX_TONE = NEUTRAL_TONE = 5
|
8
|
+
|
9
|
+
class <<self
|
10
|
+
def add_tone(s,t)
|
11
|
+
s
|
12
|
+
end
|
13
|
+
|
14
|
+
def peek_tone(s)
|
15
|
+
NEUTRAL_TONE
|
16
|
+
end
|
17
|
+
|
18
|
+
def pop_tone(s)
|
19
|
+
[NEUTRAL_TONE, s]
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
def normalize(t)
|
24
|
+
if VALID_TONES === t
|
25
|
+
t
|
26
|
+
else
|
27
|
+
t %= MAX_TONE
|
28
|
+
t = NEUTRAL_TONE if t == 0
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
require "pinyin/tones/marks"
|
37
|
+
require "pinyin/tones/numbers"
|
38
|
+
require "pinyin/tones/accents"
|
39
|
+
require "pinyin/tones/no_tones"
|
40
|
+
|
41
|
+
module Pinyin
|
42
|
+
module Tones
|
43
|
+
All = [Numbers, Marks, Accents, NoTones]
|
44
|
+
MAX_TONE = NEUTRAL_TONE = 5
|
45
|
+
VALID_TONES = 1..5
|
46
|
+
end
|
47
|
+
end
|
@@ -1,7 +1,8 @@
|
|
1
1
|
module Pinyin
|
2
2
|
module Tones
|
3
|
-
class Accents
|
4
|
-
|
3
|
+
class Accents < Tone
|
4
|
+
class <<self
|
5
|
+
|
5
6
|
UNICODE_TONE_GLYPHS={
|
6
7
|
:a=>[97, 257, 225, 462, 224],
|
7
8
|
:e=>[101, 275, 233, 283, 232],
|
@@ -11,49 +12,51 @@ module Pinyin
|
|
11
12
|
:v=>[252, 470, 472, 474, 476]
|
12
13
|
}
|
13
14
|
|
14
|
-
def
|
15
|
+
def tone_glyph(letter,tone)
|
15
16
|
if (u=UNICODE_TONE_GLYPHS[letter.to_sym][tone%MAX_TONE])
|
16
17
|
[u].pack('U')
|
17
18
|
end
|
18
19
|
end
|
19
20
|
|
20
|
-
def
|
21
|
+
def add_tone(syll, tone)
|
22
|
+
syll.gsub!('ü','v')
|
21
23
|
tone %= MAX_TONE
|
22
24
|
case syll
|
23
25
|
when /a/ : syll.sub(/a/, tone_glyph(:a,tone))
|
24
26
|
when /e/ : syll.sub(/e/, tone_glyph(:e,tone))
|
25
27
|
when /o/ : syll.sub(/o/, tone_glyph(:o,tone))
|
26
|
-
when /(i|
|
28
|
+
when /(i|u|v)/ : syll.sub($1, tone_glyph($1,tone))
|
27
29
|
else syll
|
28
30
|
end
|
29
31
|
end
|
30
32
|
|
31
|
-
def
|
33
|
+
def peek_tone(syll)
|
32
34
|
unpacked = syll.unpack('U*')
|
33
35
|
each_tone_glyph do |vowel, tones|
|
34
36
|
tone_glyph=unpacked.find {|t| tones.include?(t)}
|
35
|
-
|
37
|
+
normalize( tones.index(tone_glyph) ) if tone_glyph
|
36
38
|
end
|
37
39
|
end
|
38
40
|
|
39
|
-
def
|
41
|
+
def pop_tone(syll)
|
40
42
|
unpacked = syll.unpack('U*')
|
41
43
|
each_tone_glyph do |vowel, tones|
|
42
44
|
if tone_glyph = unpacked.find {|t| tones.include?(t)}
|
43
45
|
unpacked[unpacked.index(tone_glyph)]=vowel.to_s[0]
|
44
|
-
break [
|
46
|
+
break [normalize(tones.index(tone_glyph)), unpacked.pack('U*')]
|
45
47
|
end
|
46
48
|
end
|
47
49
|
end
|
48
50
|
|
49
51
|
private
|
50
|
-
def
|
51
|
-
[:a,:e,:i,:o,:u].each do |v| #Order is significant
|
52
|
+
def each_tone_glyph
|
53
|
+
[:a,:e,:i,:o,:u,:v].each do |v| #Order is significant
|
52
54
|
vowel, tones = v, UNICODE_TONE_GLYPHS[v]
|
53
55
|
yield vowel,tones
|
54
56
|
end
|
55
57
|
end
|
56
58
|
|
57
59
|
end
|
60
|
+
end
|
58
61
|
end
|
59
62
|
end
|
@@ -1,12 +1,15 @@
|
|
1
1
|
module Pinyin
|
2
2
|
module Tones
|
3
|
-
class Marks
|
3
|
+
class Marks < Tone
|
4
|
+
class <<self
|
5
|
+
|
4
6
|
GLYPHS=['˙', '', 'ˊ', 'ˇ', 'ˋ']
|
5
|
-
|
6
|
-
|
7
|
+
|
8
|
+
def add_tone(syll,tone)
|
9
|
+
syll + GLYPHS[normalize(tone) % 5]
|
7
10
|
end
|
8
11
|
|
9
|
-
def
|
12
|
+
def peek_tone(syll)
|
10
13
|
case syll
|
11
14
|
when /ˊ/ : 2
|
12
15
|
when /ˇ/ : 3
|
@@ -17,9 +20,11 @@ module Pinyin
|
|
17
20
|
end
|
18
21
|
end
|
19
22
|
|
20
|
-
def
|
23
|
+
def pop_tone(syll)
|
21
24
|
[ peek_tone(syll), syll[/\A[^#{GLYPHS.join}]+/] ]
|
22
25
|
end
|
26
|
+
|
27
|
+
end
|
23
28
|
end
|
24
29
|
end
|
25
30
|
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module Pinyin
|
2
|
+
module Tones
|
3
|
+
class Numbers < Tone
|
4
|
+
class <<self
|
5
|
+
|
6
|
+
def add_tone(syll, tone)
|
7
|
+
syll + normalize(tone).to_s
|
8
|
+
end
|
9
|
+
|
10
|
+
def peek_tone(syll)
|
11
|
+
if syll =~ /(\d)\Z/
|
12
|
+
normalize Integer($1)
|
13
|
+
else
|
14
|
+
NEUTRAL_TONE
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def pop_tone(syll)
|
19
|
+
[ peek_tone(syll), syll[/\A\D+/] ]
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
data/rakefile
CHANGED
@@ -1,39 +1,22 @@
|
|
1
|
-
require 'rake'
|
2
|
-
require 'rake/testtask'
|
3
1
|
require 'rubygems'
|
4
|
-
|
5
|
-
require 'rake/gempackagetask'
|
2
|
+
require 'hoe'
|
6
3
|
|
4
|
+
$:.unshift './lib'
|
7
5
|
|
8
|
-
|
6
|
+
require 'pinyin'
|
9
7
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
s.homepage = "http://svn.arnebrasseur.net/pinyin"
|
25
|
-
s.platform = Gem::Platform::RUBY
|
26
|
-
s.summary = "A conversion library for Chinese transcription methods in Ruby"
|
27
|
-
s.files = FileList["**/*"]
|
28
|
-
#FileList["{bin,docs,lib,test}/**/*"].exclude("rdoc").to_a
|
29
|
-
s.require_path = "lib/"
|
30
|
-
# not sure yet what these are
|
31
|
-
# s.autorequire = "pinyin"
|
32
|
-
# s.test_file = "test/runtest.rb"
|
33
|
-
# s.has_rdoc = true
|
34
|
-
# s.extra_rdoc_files = ['README']
|
35
|
-
end
|
36
|
-
|
37
|
-
Rake::GemPackageTask.new(spec) do |pkg|
|
38
|
-
pkg.need_tar = true
|
8
|
+
Hoe.new('pinyin', Pinyin::VERSION) do |p|
|
9
|
+
p.rubyforge_name = 'pinyin'
|
10
|
+
p.summary = 'A conversion library for Chinese transcription methods like Hanyu Pinyin, Bopomofo and Wade-Giles.'
|
11
|
+
p.description = p.paragraphs_of('README.txt', 2).join
|
12
|
+
p.url = 'http://rubyforge.org/projects/pinyin'
|
13
|
+
p.changes = p.paragraphs_of('History.txt', 0..1).join("\n\n")
|
14
|
+
p.email = 'pinyin@arnebrasseur.net'
|
15
|
+
p.author = 'Arne Brasseur'
|
16
|
+
p.extra_deps << ['facets', '>= 2.4.0']
|
17
|
+
p.spec_extras = {
|
18
|
+
:extra_rdoc_files => ['README.txt', 'History.txt'],
|
19
|
+
:rdoc_options => ['--main', 'README.txt'],
|
20
|
+
:platform => Gem::Platform::RUBY
|
21
|
+
}
|
39
22
|
end
|
data/script/update
ADDED
@@ -7,7 +7,7 @@ require 'csv'
|
|
7
7
|
# Since I can't find another reference of the hanyu pinyin 'lo', I have removed it from the table
|
8
8
|
|
9
9
|
class TestCompare < Test::Unit::TestCase
|
10
|
-
CHART=CSV.parse(IO.read(File.dirname(__FILE__)+'/../lib/data/comparison.csv'))
|
10
|
+
CHART=CSV.parse(IO.read(File.dirname(__FILE__)+'/../lib/pinyin/data/comparison.csv'))
|
11
11
|
COMPARE=[:hanyu, :wadegiles, :zhuyin, :tongyong]
|
12
12
|
|
13
13
|
|
@@ -1,9 +1,11 @@
|
|
1
1
|
require 'test/unit'
|
2
2
|
require 'pinyin.rb'
|
3
3
|
require 'yaml'
|
4
|
+
|
4
5
|
$KCODE='u'
|
6
|
+
|
5
7
|
module HanyuCoverage
|
6
|
-
grid=YAML.load(IO.read(File.dirname(__FILE__)+'/../lib/data/valid_pinyin.yaml'))
|
8
|
+
grid=YAML.load(IO.read(File.dirname(__FILE__)+'/../lib/pinyin/data/valid_pinyin.yaml'))
|
7
9
|
grid.each do |fname, row|
|
8
10
|
row.each do |iname, hanyu|
|
9
11
|
eval %[
|
metadata
CHANGED
@@ -1,74 +1,105 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.9.0
|
3
|
-
specification_version: 1
|
4
2
|
name: pinyin
|
5
3
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.
|
7
|
-
date: 2007-07-26 00:00:00 +02:00
|
8
|
-
summary: A conversion library for Chinese transcription methods in Ruby
|
9
|
-
require_paths:
|
10
|
-
- lib/
|
11
|
-
email: pinyin at menolikespam arnebrasseur dot net
|
12
|
-
homepage: http://svn.arnebrasseur.net/pinyin
|
13
|
-
rubyforge_project:
|
14
|
-
description:
|
15
|
-
autorequire:
|
16
|
-
default_executable:
|
17
|
-
bindir: bin
|
18
|
-
has_rdoc: false
|
19
|
-
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
-
requirements:
|
21
|
-
- - ">"
|
22
|
-
- !ruby/object:Gem::Version
|
23
|
-
version: 0.0.0
|
24
|
-
version:
|
4
|
+
version: 0.1.4
|
25
5
|
platform: ruby
|
26
|
-
signing_key:
|
27
|
-
cert_chain:
|
28
|
-
post_install_message:
|
29
6
|
authors:
|
30
7
|
- Arne Brasseur
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
- rakefile
|
35
|
-
- test
|
36
|
-
- TODO
|
37
|
-
- examples/cgiform
|
38
|
-
- examples/hello.rb
|
39
|
-
- examples/cgiform/cgiform.rb
|
40
|
-
- examples/cgiform/template.rhtml
|
41
|
-
- lib/conversions.rb
|
42
|
-
- lib/data
|
43
|
-
- lib/exception.rb
|
44
|
-
- lib/groundwork.rb
|
45
|
-
- lib/pinyin.rb
|
46
|
-
- lib/support.rb
|
47
|
-
- lib/tones
|
48
|
-
- lib/tones.rb
|
49
|
-
- lib/data/comparison.csv
|
50
|
-
- lib/data/final.csv
|
51
|
-
- lib/data/initial.csv
|
52
|
-
- lib/data/paladiy.txt
|
53
|
-
- lib/data/rules.yaml
|
54
|
-
- lib/data/valid_pinyin.yaml
|
55
|
-
- lib/tones/accents.rb
|
56
|
-
- lib/tones/marks.rb
|
57
|
-
- lib/tones/no_tones.rb
|
58
|
-
- lib/tones/numbers.rb
|
59
|
-
- test/comparison_test.rb
|
60
|
-
- test/hanyu_coverage.rb
|
61
|
-
test_files: []
|
62
|
-
|
63
|
-
rdoc_options: []
|
64
|
-
|
65
|
-
extra_rdoc_files: []
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
66
11
|
|
12
|
+
date: 2008-07-18 00:00:00 +02:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: facets
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 2.4.0
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: hoe
|
27
|
+
type: :development
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 1.7.0
|
34
|
+
version:
|
35
|
+
description: Pinyin can convert between various systems for phonetically writing Mandarin Chinese. It can also handle various representation of tones, so it can be used to convert pinyin with numbers to pinyin with tones.
|
36
|
+
email: pinyin@arnebrasseur.net
|
67
37
|
executables: []
|
68
38
|
|
69
39
|
extensions: []
|
70
40
|
|
41
|
+
extra_rdoc_files:
|
42
|
+
- README.txt
|
43
|
+
- History.txt
|
44
|
+
files:
|
45
|
+
- History.txt
|
46
|
+
- Manifest.txt
|
47
|
+
- README.txt
|
48
|
+
- Rakefile
|
49
|
+
- TODO
|
50
|
+
- examples/cgiform/cgiform.rb
|
51
|
+
- examples/cgiform/template.rhtml
|
52
|
+
- examples/hello.rb
|
53
|
+
- lib/pinyin.rb
|
54
|
+
- lib/pinyin/conversion.rb
|
55
|
+
- lib/pinyin/conversions.rb
|
56
|
+
- lib/pinyin/conversions/hanyu.rb
|
57
|
+
- lib/pinyin/data/comparison.csv
|
58
|
+
- lib/pinyin/data/final.csv
|
59
|
+
- lib/pinyin/data/initial.csv
|
60
|
+
- lib/pinyin/data/paladiy.txt
|
61
|
+
- lib/pinyin/data/rules.yaml
|
62
|
+
- lib/pinyin/data/valid_pinyin.yaml
|
63
|
+
- lib/pinyin/exception.rb
|
64
|
+
- lib/pinyin/groundwork.rb
|
65
|
+
- lib/pinyin/string.rb
|
66
|
+
- lib/pinyin/support.rb
|
67
|
+
- lib/pinyin/tones.rb
|
68
|
+
- lib/pinyin/tones/accents.rb
|
69
|
+
- lib/pinyin/tones/marks.rb
|
70
|
+
- lib/pinyin/tones/no_tones.rb
|
71
|
+
- lib/pinyin/tones/numbers.rb
|
72
|
+
- rakefile
|
73
|
+
- script/update
|
74
|
+
- test/test_comparison.rb
|
75
|
+
- test/test_hanyu_coverage.rb
|
76
|
+
has_rdoc: true
|
77
|
+
homepage: http://rubyforge.org/projects/pinyin
|
78
|
+
post_install_message:
|
79
|
+
rdoc_options:
|
80
|
+
- --main
|
81
|
+
- README.txt
|
82
|
+
require_paths:
|
83
|
+
- lib
|
84
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - ">="
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: "0"
|
89
|
+
version:
|
90
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
91
|
+
requirements:
|
92
|
+
- - ">="
|
93
|
+
- !ruby/object:Gem::Version
|
94
|
+
version: "0"
|
95
|
+
version:
|
71
96
|
requirements: []
|
72
97
|
|
73
|
-
|
74
|
-
|
98
|
+
rubyforge_project: pinyin
|
99
|
+
rubygems_version: 1.2.0
|
100
|
+
signing_key:
|
101
|
+
specification_version: 2
|
102
|
+
summary: A conversion library for Chinese transcription methods like Hanyu Pinyin, Bopomofo and Wade-Giles.
|
103
|
+
test_files:
|
104
|
+
- test/test_comparison.rb
|
105
|
+
- test/test_hanyu_coverage.rb
|
data/lib/tones.rb
DELETED
@@ -1,19 +0,0 @@
|
|
1
|
-
|
2
|
-
module Pinyin
|
3
|
-
module Tones
|
4
|
-
All=Dir[File.join(File.dirname(__FILE__),'tones','*.rb')].map{|s| s[/tones\/(.*)\.rb/,1]}
|
5
|
-
MAX_TONE = NEUTRAL_TONE = 5
|
6
|
-
VALID_TONES = 1..5
|
7
|
-
CONVERSIONS = {0 => NEUTRAL_TONE}
|
8
|
-
|
9
|
-
def self.normalize(t)
|
10
|
-
if VALID_TONES === t
|
11
|
-
t
|
12
|
-
else
|
13
|
-
t %= MAX_TONE
|
14
|
-
CONVERSIONS[t] || t
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|