ting 0.3.0 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +5 -0
- data/.travis.yml +13 -0
- data/Gemfile +9 -0
- data/Gemfile.lock +235 -0
- data/LICENSE.txt +674 -0
- data/{README.rdoc → README.md} +43 -35
- data/Rakefile +28 -15
- data/TODO +16 -15
- data/examples/hello.rb +12 -12
- data/lib/ting.rb +36 -61
- data/lib/ting/conversion.rb +6 -5
- data/lib/ting/conversions.rb +88 -80
- data/lib/ting/conversions/hanyu.rb +5 -9
- data/lib/ting/converter.rb +30 -0
- data/lib/ting/data/comparison.csv +410 -410
- data/lib/ting/data/final.csv +12 -10
- data/lib/ting/data/initial.csv +8 -7
- data/lib/ting/data/paladiy.txt +421 -421
- data/lib/ting/data/rules.yaml +38 -27
- data/lib/ting/data/valid_pinyin.yaml +454 -453
- data/lib/ting/exception.rb +14 -17
- data/lib/ting/groundwork.rb +181 -177
- data/lib/ting/procable.rb +7 -0
- data/lib/ting/reader.rb +27 -0
- data/lib/ting/string.rb +0 -15
- data/lib/ting/tones.rb +65 -65
- data/lib/ting/tones/accents.rb +75 -69
- data/lib/ting/tones/ipa.rb +1 -1
- data/lib/ting/tones/no_tones.rb +7 -7
- data/lib/ting/tones/numbers.rb +25 -25
- data/lib/ting/tones/supernum.rb +1 -1
- data/lib/ting/version.rb +1 -1
- data/lib/ting/writer.rb +23 -0
- data/spec/jruby_csv_spec.rb +78 -0
- data/spec/spec_helper.rb +3 -0
- data/spec/ting_spec.rb +19 -0
- data/test/test_comparison.rb +43 -35
- data/test/test_hanyu_coverage.rb +42 -37
- data/ting.gemspec +23 -0
- metadata +95 -71
- data/examples/cgiform/cgiform.rb +0 -24
- data/examples/cgiform/template.rhtml +0 -69
- data/lib/ting/support.rb +0 -19
data/{README.rdoc → README.md}
RENAMED
@@ -1,4 +1,6 @@
|
|
1
|
-
|
1
|
+
[![Build Status](https://travis-ci.org/arnebrasseur/ting.png)](https://travis-ci.org/arnebrasseur/ting) [![Code Climate](https://codeclimate.com/github/arnebrasseur/ting.png)](https://codeclimate.com/github/arnebrasseur/ting)
|
2
|
+
|
3
|
+
# Ting
|
2
4
|
|
3
5
|
Ting can convert between various systems for phonetically
|
4
6
|
writing Mandarin Chinese. It can also handle various representation
|
@@ -8,88 +10,94 @@ to pinyin with tones.
|
|
8
10
|
Hanyu Pinyin, Bopomofo, Wade-Giles, Tongyong Pinyin
|
9
11
|
and International Phonetic Alphabet (IPA) are supported.
|
10
12
|
|
11
|
-
|
13
|
+
## SYNOPSIS
|
12
14
|
|
13
|
-
To parse your strings create a
|
15
|
+
To parse your strings create a `Reader` object. Ting.reader() takes two
|
14
16
|
parameters : the transliteration format, and the way that tones are represented.
|
15
17
|
|
16
18
|
To some extent these can be mixed and matched.
|
17
19
|
|
18
|
-
To generate pinyin/wade-giles/etc. create a
|
20
|
+
To generate pinyin/wade-giles/etc. create a `Writer` object. Use Ting.writer()
|
19
21
|
|
20
|
-
|
22
|
+
### Formats
|
21
23
|
|
22
|
-
*
|
23
|
-
*
|
24
|
-
*
|
25
|
-
*
|
26
|
-
*
|
24
|
+
* `:hanyu` Hanyu Pinyin
|
25
|
+
* `:zhuyin` Zhuyin Fuhao (a.k.a. Bopomofo)
|
26
|
+
* `:wadegiles` Wade Giles
|
27
|
+
* `:ipa` International Phonetic Alphabet
|
28
|
+
* `:tongyong` Tongyong Pinyin
|
27
29
|
|
28
|
-
|
30
|
+
### Tones
|
29
31
|
|
30
|
-
*
|
31
|
-
*
|
32
|
-
*
|
33
|
-
*
|
34
|
-
*
|
35
|
-
*
|
32
|
+
* `:numbers` Simply put a number after the syllable, easy to type
|
33
|
+
* `:accents` Use diacritics, follows the Hanyu Pinyin rules, there needs to be at least one vowel to apply this to, not usable with IPA or Bopomofo
|
34
|
+
* `:supernum` Superscript numerals, typically used for Wade-Giles
|
35
|
+
* `:marks` Tone mark after the syllable, typically used for Bopomofo
|
36
|
+
* `:ipa` IPA tone marks
|
37
|
+
* `:no_tones` Use no tones
|
36
38
|
|
37
|
-
|
39
|
+
## Examples
|
38
40
|
|
39
41
|
Parse Hanyu Pinyin
|
40
42
|
|
43
|
+
````ruby
|
41
44
|
require 'ting'
|
42
45
|
|
43
46
|
reader = Ting.reader(:hanyu, :numbers)
|
44
|
-
reader
|
45
|
-
# => [<Ting::Syllable <initial=Empty, final=Uo, tone=3>>,
|
46
|
-
# <Ting::Syllable <initial=Empty, final=Ai, tone=4>>,
|
47
|
+
reader.( "wo3 ai4 ni3" )
|
48
|
+
# => [<Ting::Syllable <initial=Empty, final=Uo, tone=3>>,
|
49
|
+
# <Ting::Syllable <initial=Empty, final=Ai, tone=4>>,
|
47
50
|
# <Ting::Syllable <initial=Ne, final=I, tone=3>>]
|
51
|
+
````
|
48
52
|
|
49
53
|
Generate Bopomofo
|
50
54
|
|
55
|
+
````ruby
|
51
56
|
zhuyin = Ting.writer(:zhuyin, :marks)
|
52
|
-
zhuyin
|
57
|
+
zhuyin.(reader.("wo3 ai4 ni3"))
|
53
58
|
# => "ㄨㄛˇ ㄞˋ ㄋㄧˇ"
|
59
|
+
````
|
54
60
|
|
55
61
|
Generate Wade-Giles
|
56
62
|
|
63
|
+
````ruby
|
57
64
|
wadegiles = Ting.writer(:wadegiles, :supernum)
|
58
|
-
wadegiles
|
65
|
+
wadegiles.(reader.("qing2 kuang4 ru2 he2"))
|
59
66
|
# => "ch`ing² k`uang⁴ ju² ho²"
|
67
|
+
````
|
60
68
|
|
61
69
|
Generate IPA
|
62
70
|
|
71
|
+
````ruby
|
63
72
|
ipa = Ting.writer.new(:ipa, :ipa)
|
64
|
-
ipa
|
73
|
+
ipa.(reader.("you3 peng2 zi4 yuan2 fang1 lai2"))
|
65
74
|
# => "iou˧˩˧ pʰeŋ˧˥ ts˥˩ yɛn˧˥ faŋ˥˥ lai˧˥"
|
75
|
+
````
|
66
76
|
|
67
77
|
Since this is such a common use case, a convenience method to add diacritics to pinyin.
|
68
78
|
|
69
|
-
|
70
|
-
|
71
|
-
"wo3 ai4 ni3".pretty_tones
|
79
|
+
````ruby
|
80
|
+
Ting.pretty_tones "wo3 ai4 ni3"
|
72
81
|
# => "wǒ ài nǐ"
|
82
|
+
````
|
73
83
|
|
74
84
|
Note that syllables need to be separated by spaces, feeding "peng2you3" to the parser
|
75
|
-
does not work. The String#pretty_tones method does handle these things a bit more gracefully.
|
85
|
+
does not work. The `String#pretty_tones` method does handle these things a bit more gracefully.
|
76
86
|
|
77
87
|
If you need to parse input that does not conform, consider using a regexp to scan for valid
|
78
|
-
syllables, then feed the syllables to the parser one by one. Have a look at
|
88
|
+
syllables, then feed the syllables to the parser one by one. Have a look at `#pretty_tones` for
|
79
89
|
an example of how to do this.
|
80
90
|
|
81
|
-
|
91
|
+
## REQUIREMENTS
|
82
92
|
|
83
93
|
* none, Ting uses nothing but Ruby
|
84
|
-
* $KCODE should probably be set to "UTF8", or you might run into problems.
|
85
94
|
|
86
|
-
|
95
|
+
## INSTALL
|
87
96
|
|
88
97
|
* gem install ting
|
89
98
|
|
90
|
-
|
99
|
+
## LICENSE
|
91
100
|
|
92
|
-
Copyright (c)
|
101
|
+
Copyright (c) 2007-2014, Arne Brasseur. (http://www.arnebrasseur.net)
|
93
102
|
|
94
103
|
Available as Free Software under the GPLv3 License, see LICENSE.txt for details
|
95
|
-
|
data/Rakefile
CHANGED
@@ -1,15 +1,28 @@
|
|
1
|
-
require 'rubygems'
|
2
|
-
|
3
|
-
|
4
|
-
require 'rake
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
1
|
+
require 'rubygems'
|
2
|
+
require 'pathname'
|
3
|
+
|
4
|
+
require 'rake'
|
5
|
+
require 'rake/testtask'
|
6
|
+
require 'rspec/core/rake_task'
|
7
|
+
require 'rubygems/package_task'
|
8
|
+
task :default => :all_tests
|
9
|
+
task :all_tests => [:test, :spec]
|
10
|
+
|
11
|
+
Rake::TestTask.new(:test) do |t|
|
12
|
+
$: << File.dirname(__FILE__) + '/lib'
|
13
|
+
t.pattern = 'test/*.rb'
|
14
|
+
#t.verbose = true
|
15
|
+
#t.warning = true
|
16
|
+
end
|
17
|
+
|
18
|
+
RSpec::Core::RakeTask.new(:spec)
|
19
|
+
|
20
|
+
spec = Gem::Specification.load(Pathname.glob('*.gemspec').first.to_s)
|
21
|
+
Gem::PackageTask.new(spec).define
|
22
|
+
|
23
|
+
desc "Push gem to rubygems.org"
|
24
|
+
task :push => :gem do
|
25
|
+
sh "git tag v#{Ting::VERSION}"
|
26
|
+
sh "git push --tags"
|
27
|
+
sh "gem push pkg/hexp-#{Ting::VERSION}.gem"
|
28
|
+
end
|
data/TODO
CHANGED
@@ -1,15 +1,16 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
-
|
8
|
-
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
1
|
+
This used to be a checklist style TODO, but with the pace that this thing evolves, I think it's more interesting to add some reflections.
|
2
|
+
|
3
|
+
I wrote this Gem back in 2006. When I look at the code now there are plenty of things I would do different. The code's biggest vice is that it's too 'clever', i.e. opaque. But it gets the job done.
|
4
|
+
|
5
|
+
- Additional transcription systems that could be added
|
6
|
+
- MPS2 (or how do you call that)
|
7
|
+
- Gwoyueh
|
8
|
+
- Yale
|
9
|
+
|
10
|
+
One of the most interesting things about this undertaking has been learning the fine details of Mandarin romanization, and phonology.
|
11
|
+
|
12
|
+
E.g. is 'lo' valid pinyin, or should it be 'luo'? CEDICT has entries for both, with the first being used only for exlamations. But most tables on pinyin won't list 'lo'. How about 'ng'? 'mm'? ...
|
13
|
+
|
14
|
+
The core lib basically does translation on the syllable level. It can handle strings with syllables nicely seperated by spaces. Successive layers should make it possible to convert a sentence with interpunction into a different system. It should be possible to write compound words together in Hanyu, and have the syllables seperated by dashes when converting to WG. For instance:
|
15
|
+
|
16
|
+
Wǒ de péngyǒu, shì dàifu. => Wǒ te p`éng-yǔ, shih tài-fu.
|
data/examples/hello.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
|
-
$: << File.join(File.dirname(__FILE__), '../lib')
|
2
|
-
|
3
|
-
require 'pinyin'
|
4
|
-
|
5
|
-
conv1 = Pinyin::Converter.new(:hanyu, :numbers, :wadegiles, :accents)
|
6
|
-
conv2 = Pinyin::Converter.new(:wadegiles, :accents, :zhuyin, :marks)
|
7
|
-
|
8
|
-
pinyin = 'wo3 de2 peng2 you3 shi4 dai4 fu'
|
9
|
-
wadegiles = conv1 << pinyin
|
10
|
-
zhuyin = conv2 << wadegiles
|
11
|
-
|
12
|
-
puts pinyin, wadegiles, zhuyin
|
1
|
+
$: << File.join(File.dirname(__FILE__), '../lib')
|
2
|
+
|
3
|
+
require 'pinyin'
|
4
|
+
|
5
|
+
conv1 = Pinyin::Converter.new(:hanyu, :numbers, :wadegiles, :accents)
|
6
|
+
conv2 = Pinyin::Converter.new(:wadegiles, :accents, :zhuyin, :marks)
|
7
|
+
|
8
|
+
pinyin = 'wo3 de2 peng2 you3 shi4 dai4 fu'
|
9
|
+
wadegiles = conv1 << pinyin
|
10
|
+
zhuyin = conv2 << wadegiles
|
11
|
+
|
12
|
+
puts pinyin, wadegiles, zhuyin
|
data/lib/ting.rb
CHANGED
@@ -1,13 +1,12 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
1
3
|
# Handle several romanization systems for Mandarin Chinese
|
2
4
|
#
|
3
5
|
# Author:: Arne Brasseur (arne@arnebrasseur.net)
|
4
|
-
# Copyright:: Copyright (c) 2007-
|
6
|
+
# Copyright:: Copyright (c) 2007-2014, Arne Brasseur
|
5
7
|
# Licence:: GNU General Public License, v3
|
6
8
|
|
7
|
-
$: << File.dirname(__FILE__)
|
8
|
-
|
9
9
|
require 'ting/version'
|
10
|
-
require 'ting/support'
|
11
10
|
require 'ting/groundwork'
|
12
11
|
require 'ting/exception'
|
13
12
|
|
@@ -16,77 +15,53 @@ require 'ting/conversion'
|
|
16
15
|
require 'ting/conversions'
|
17
16
|
require 'ting/conversions/hanyu'
|
18
17
|
|
18
|
+
require 'ting/procable'
|
19
|
+
require 'ting/reader'
|
20
|
+
require 'ting/writer'
|
21
|
+
require 'ting/converter'
|
22
|
+
|
19
23
|
module Ting
|
20
|
-
class
|
21
|
-
def initialize(conv, tone)
|
22
|
-
@conv = conv.to_s
|
23
|
-
@tone = Tones.const_get tone.to_s.camelcase
|
24
|
-
@cache = {}
|
25
|
-
end
|
24
|
+
class << self
|
26
25
|
|
27
|
-
def
|
28
|
-
|
29
|
-
tone,syll = @tone.pop_tone(s)
|
30
|
-
tsyll = Conversions.parse(@conv,syll)
|
31
|
-
ini, fin = tsyll.initial, tsyll.final
|
32
|
-
unless tone && fin && ini
|
33
|
-
raise ParseError.new(s,pos),"Illegal syllable <#{s}> in input <#{str}> at position #{pos}."
|
34
|
-
end
|
35
|
-
Syllable.new(ini, fin, tone)
|
36
|
-
end
|
37
|
-
rescue Object => e
|
38
|
-
raise ParseError.new(str,0), "Parsing of #{str.inspect} failed : #{e}"
|
26
|
+
def reader(format, tones)
|
27
|
+
Reader.new(format,tones)
|
39
28
|
end
|
40
29
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
class Writer
|
45
|
-
def initialize(conv, tone)
|
46
|
-
@conv = conv.to_s
|
47
|
-
@tone = Tones.const_get tone.to_s.camelcase
|
48
|
-
@cache = {}
|
30
|
+
def writer(format, tones)
|
31
|
+
Writer.new(format,tones)
|
49
32
|
end
|
50
33
|
|
51
|
-
def
|
52
|
-
|
53
|
-
return @cache[py] ||= if py.respond_to? :map
|
54
|
-
py.map(&conv).join(' ')
|
55
|
-
else
|
56
|
-
conv.call(py)
|
57
|
-
end
|
34
|
+
def from(from, from_tone)
|
35
|
+
Converter.new(from, from_tone, nil, nil)
|
58
36
|
end
|
59
37
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
def initialize(from, from_tone, to, to_tone)
|
66
|
-
@reader = Reader.new(from, from_tone)
|
67
|
-
@writer = Writer.new(to, to_tone)
|
38
|
+
def camelize(str)
|
39
|
+
str = str.dup
|
40
|
+
str.gsub!(/(?:_+|-+)([a-z])/){ $1.upcase }
|
41
|
+
str.gsub!(/(\A|\s)([a-z])/){ $1 + $2.upcase }
|
42
|
+
str
|
68
43
|
end
|
69
44
|
|
70
|
-
|
71
|
-
|
45
|
+
SYLLABLE_CACHE = Hash.new do |hsh, syll|
|
46
|
+
hsh[syll] = Ting.writer(:hanyu, :accents).(
|
47
|
+
Ting.reader(:hanyu, :numbers).(syll.downcase)
|
48
|
+
)
|
72
49
|
end
|
73
50
|
|
74
|
-
alias :<< :convert
|
75
|
-
end
|
76
|
-
|
77
|
-
class <<self
|
78
|
-
READERS={}
|
79
|
-
WRITERS={}
|
80
51
|
|
81
|
-
def
|
82
|
-
|
52
|
+
def pretty_tones(string)
|
53
|
+
string.gsub('u:','ü').gsub(/[A-Za-züÜ]{1,5}\d/) do |syll|
|
54
|
+
SYLLABLE_CACHE[syll]
|
55
|
+
end
|
83
56
|
end
|
84
|
-
|
85
|
-
|
57
|
+
|
58
|
+
def bpmf(string)
|
59
|
+
string.gsub('u:','ü').scan(/[A-Za-züÜ]{1,5}\d/).map do |m|
|
60
|
+
Ting.writer(:zhuyin, :marks).(
|
61
|
+
Ting.reader(:hanyu, :numbers).(m.downcase)
|
62
|
+
)
|
63
|
+
end.join(' ')
|
86
64
|
end
|
87
|
-
end
|
88
65
|
|
66
|
+
end
|
89
67
|
end
|
90
|
-
|
91
|
-
|
92
|
-
Pinyin = Ting #legacy support
|
data/lib/ting/conversion.rb
CHANGED
@@ -1,7 +1,9 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
1
3
|
module Ting
|
2
|
-
|
4
|
+
|
3
5
|
#
|
4
|
-
# Base class for conversions like Hanyu pinyin,
|
6
|
+
# Base class for conversions like Hanyu pinyin,
|
5
7
|
# Wade-Giles, etc.
|
6
8
|
#
|
7
9
|
class Conversion
|
@@ -10,7 +12,7 @@ module Ting
|
|
10
12
|
# For Wade-Giles this is a dash, Hanyu pinyin
|
11
13
|
# uses a single quote in certain situations
|
12
14
|
attr_reader :syllable_separator
|
13
|
-
|
15
|
+
|
14
16
|
# The tone handling object
|
15
17
|
attr_reader :tones
|
16
18
|
|
@@ -30,7 +32,7 @@ module Ting
|
|
30
32
|
if Tone === tone
|
31
33
|
@tone = tone
|
32
34
|
else
|
33
|
-
@tone = Ting::Tones.const_get(tone.to_s
|
35
|
+
@tone = Ting::Tones.const_get(Ting.camelize(tone.to_s))
|
34
36
|
end
|
35
37
|
|
36
38
|
@name = self.class.name.underscore
|
@@ -48,4 +50,3 @@ module Ting
|
|
48
50
|
|
49
51
|
end
|
50
52
|
end
|
51
|
-
|
data/lib/ting/conversions.rb
CHANGED
@@ -1,80 +1,88 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require 'csv'
|
4
|
+
require 'yaml'
|
5
|
+
|
6
|
+
module Ting
|
7
|
+
module Conversions
|
8
|
+
All=[]
|
9
|
+
|
10
|
+
DATA_DIR=File.dirname(__FILE__)+'/data/'
|
11
|
+
|
12
|
+
#Load various representations for initials and finals
|
13
|
+
%w(Initial Final).each do |c|
|
14
|
+
klazz=Ting.const_get c
|
15
|
+
begin
|
16
|
+
CSV.open(DATA_DIR+c.downcase+'.csv', 'r:utf-8').each do |name, *values|
|
17
|
+
next if name == "name"
|
18
|
+
All << name.to_s unless All.include?(name) || name =~ /standalone/i
|
19
|
+
klazz.class_eval {attr_accessor name.to_sym}
|
20
|
+
values.each_with_index do |v,i|
|
21
|
+
klazz::All[i].send(name+'=', v && v.force_encoding('UTF-8'))
|
22
|
+
end
|
23
|
+
end
|
24
|
+
rescue
|
25
|
+
STDERR << "Bad data in #{c.downcase}.csv : #{$!}"
|
26
|
+
raise
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
|
31
|
+
#Substitution rules
|
32
|
+
@@rules=YAML::load(IO.read(DATA_DIR+'rules.yaml'))
|
33
|
+
|
34
|
+
def self.parse(type, string)
|
35
|
+
capitalized = (string.downcase != string && string.downcase.capitalize == string)
|
36
|
+
string = string.to_s.downcase
|
37
|
+
if (final = Final::All.find {|f| f.respond_to?("#{type}_standalone") && f.send("#{type}_standalone") == string})
|
38
|
+
Syllable.new(Initial::Empty, final, nil, capitalized)
|
39
|
+
else
|
40
|
+
finals = Final::All.dup
|
41
|
+
finals.unshift(finals.delete(Final::Uo)) #hack : move Uo to the front
|
42
|
+
#otherwise wadegiles parses 'lo' as Le+O rather than Le+Uo
|
43
|
+
#probably better to add a hardcoded 'overrule' table for these cases
|
44
|
+
Initial.each do |ini|
|
45
|
+
finals.each do |fin|
|
46
|
+
next if Syllable.illegal?(ini,fin)
|
47
|
+
if string == apply_rules(type, (ini.send(type)||'') + (fin.send(type)||''))
|
48
|
+
return Syllable.new(ini, fin, nil, capitalized)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
raise "Can't parse `#{string.inspect}'"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.unparse(type, tsyll)
|
57
|
+
str = if tsyll.initial.send(type)
|
58
|
+
apply_rules(type, tsyll.initial.send(type) + (tsyll.final.send(type) || ''))
|
59
|
+
elsif tsyll.final.respond_to?(type.to_s+'_standalone') && standalone = tsyll.final.send(type.to_s+'_standalone')
|
60
|
+
standalone
|
61
|
+
else
|
62
|
+
apply_rules(type, tsyll.final.send(type))
|
63
|
+
end
|
64
|
+
(tsyll.capitalized? ? str.capitalize : str).force_encoding('UTF-8')
|
65
|
+
end
|
66
|
+
|
67
|
+
def self.tokenize(str)
|
68
|
+
[].tap do |tokens|
|
69
|
+
str,pos = str.dup, 0
|
70
|
+
while str && token = str[/[^' ]*/]
|
71
|
+
tokens << [token.strip, pos]
|
72
|
+
pos += token.length
|
73
|
+
str = str[/[' ]+(.*)/, 1]
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
private
|
79
|
+
def self.apply_rules(type, string)
|
80
|
+
string.dup.tap do |s|
|
81
|
+
@@rules[type] && @@rules[type].each do |rule|
|
82
|
+
s.gsub!(Regexp.new(rule['match']), rule['subst'])
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
88
|
+
end
|