pinyin 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/TODO ADDED
@@ -0,0 +1,22 @@
1
+ !Core
2
+
3
+ - Additional tone sytems
4
+ - Superscript numbers (for wade giles)
5
+ - IPA tone notation
6
+
7
+ - Additional transcription systems
8
+ - MSP2 (or how do you call that)
9
+ - Palladiy (To make things interesting)
10
+ - Gwoyueh
11
+ - Yale
12
+
13
+ - Research some rare pinyin syllables : lo, yo ^e, yai
14
+ - Add a general README as rdoc start page
15
+ - Add a README to the data/ directory with info on sources, contents and purposes
16
+ - More tests
17
+ - Add remembering of parameters to cgiform example, other examples
18
+
19
+ !More
20
+ The core lib basically does translation on the syllable level. It can handle strings with syllables nicely seperated by spaces. Successive layers should make it possible to convert a sentence with interpunction into a different system. It should be possible to write compound words together in Hanyu, and have the syllables seperated by dashes when converting to WG. For instance:
21
+
22
+ Wǒ de péngyǒu, shì dàifu. => Wǒ te p`éng-yǔ, shih tài-fu.
@@ -0,0 +1,24 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+
4
+ require 'cgi'
5
+ require 'erb'
6
+
7
+ $: << File.dirname(__FILE__)+'/../../lib'
8
+ require 'pinyin'
9
+
10
+ cgi=CGI.new("xhtml1")
11
+
12
+ params=cgi.params
13
+ begin
14
+ if params['pinyin'] && params['pinyin'] != '' && params['pinyin'] != []
15
+ @converted = Pinyin::Writer.new(params['to'], params['to_tone']) << (Pinyin::Reader.new(params['from'],params['from_tone']) << params['pinyin'].first)
16
+ end
17
+ rescue
18
+ cgi.out{$!.to_s}
19
+ cgi.out{params['pinyin'].inspect}
20
+ end
21
+
22
+ cgi.out("text/html; charset=utf-8") do
23
+ ERB.new(IO.read('template.rhtml')).result(binding)
24
+ end
@@ -0,0 +1,69 @@
1
+ <!doctype html>
2
+ <html>
3
+ <head>
4
+ <title>Ruby Pinyin CGIForm example</title>
5
+ <style type='text/css'>
6
+ body {
7
+ font-family: sans-serif;
8
+ }
9
+
10
+ div#wrap {
11
+ width: 40%;
12
+ margin: 0 auto;
13
+ }
14
+
15
+ table {
16
+ width: 100%;
17
+ }
18
+ div#converted_text {
19
+ border: 1px dotted #000;
20
+ }
21
+
22
+ textarea {
23
+ width: 100%;
24
+ height: 10em;
25
+ margin: 0 auto;
26
+ }
27
+ </style>
28
+ </head>
29
+ <body>
30
+ <div id='wrap'>
31
+ <h2>Pinyin example application</h2>
32
+ <h3>Enter some pinyin text and choose your format</h3>
33
+ <table>
34
+ <form method='post'>
35
+ <tr>
36
+ <td colspan='2'>
37
+ <textarea name='pinyin'><%=params['pinyin'].first if params['pinyin'] != []%></textarea>
38
+ </td>
39
+ </tr>
40
+
41
+ <tr><td>From</td><td>To</td></tr>
42
+ <% Pinyin::Conversions::All.each do |f|%>
43
+ <tr>
44
+ <td><input type='radio' name='from' value='<%=f%>'><%=f.capitalize%></input></td>
45
+ <td><input type='radio' name='to' value='<%=f%>'><%=f.capitalize%></input></td>
46
+ </tr>
47
+ <% end %>
48
+ <tr><td>From tone</td><td>To tone</td></tr>
49
+ <% Pinyin::Tones::All.each do |f|%>
50
+ <tr>
51
+ <td><input type='radio' name='from_tone' value='<%=f%>'><%=f.capitalize%></input></td>
52
+ <td><input type='radio' name='to_tone' value='<%=f%>'><%=f.capitalize%></input></td>
53
+ </tr>
54
+ <% end %>
55
+ <tr>
56
+ <td><input type='submit'></input></td>
57
+ <td>&nbsp;</td>
58
+ </tr>
59
+ </form>
60
+ </table>
61
+ <% if @converted %>
62
+ <h2>Converted:</h2>
63
+ <div id='converted_text'>
64
+ <%= @converted %>
65
+ </div>
66
+ <% end %>
67
+ </div>
68
+ </body>
69
+ </html>
data/examples/hello.rb ADDED
@@ -0,0 +1,12 @@
1
+ $: << File.join(File.dirname(__FILE__), '../lib')
2
+
3
+ require 'pinyin'
4
+
5
+ conv1 = Pinyin::Converter.new(:hanyu, :numbers, :wadegiles, :accents)
6
+ conv2 = Pinyin::Converter.new(:wadegiles, :accents, :zhuyin, :marks)
7
+
8
+ pinyin = 'wo3 de2 peng2 you3 shi4 dai4 fu'
9
+ wadegiles = conv1 << pinyin
10
+ zhuyin = conv2 << wadegiles
11
+
12
+ puts pinyin, wadegiles, zhuyin
@@ -0,0 +1,74 @@
1
+ require 'csv'
2
+ require 'yaml'
3
+
4
+ module Pinyin
5
+ module Conversions
6
+ All=[]
7
+
8
+ DATA_DIR=File.dirname(__FILE__)+'/data/'
9
+
10
+ #Load various representations for initials and finals
11
+ %w(Initial Final).each do |c|
12
+ klazz=Pinyin.const_get c
13
+ begin
14
+ CSV.open(DATA_DIR+c.downcase+'.csv', 'r').each do |name, *values|
15
+ All << name unless All.index name || name =~ /name|standalone/i
16
+ klazz.class_eval {attr_accessor name.to_sym}
17
+ values.each_with_index do |v,i|
18
+ klazz::All[i].send(name+'=', v)
19
+ end
20
+ end
21
+ rescue
22
+ puts "Bad data in #{c.downcase}.csv : " + $!
23
+ raise
24
+ end
25
+
26
+ end
27
+
28
+ #Substitution rules
29
+ @@rules=YAML::load(IO.read(DATA_DIR+'rules.yaml'))
30
+
31
+ def self.parse(type, string)
32
+ if (fin = Final::All.find {|f| f.respond_to?("#{type}_standalone") && f.send("#{type}_standalone") == string})
33
+ TonelessSyllable.new(Initial::Empty, fin)
34
+ else
35
+ Initial::All.find do |ini|
36
+ Final::All.find do |fin|
37
+ next if TonelessSyllable.illegal?(ini,fin)
38
+ return TonelessSyllable.new(ini,fin) if apply_rules(type, (ini.send(type)||'') + (fin.send(type)||'')) == string
39
+ end
40
+ end
41
+ end
42
+ end
43
+
44
+ def self.unparse(type, tsyll)
45
+ if tsyll.initial.send(type)
46
+ apply_rules(type, tsyll.initial.send(type) + (tsyll.final.send(type) || ''))
47
+ elsif tsyll.final.respond_to?(type.to_s+'_standalone') && standalone = tsyll.final.send(type.to_s+'_standalone')
48
+ standalone
49
+ else
50
+ apply_rules(type, tsyll.final.send(type))
51
+ end
52
+ end
53
+
54
+ def self.tokenize(str)
55
+ returning [] do |ary|
56
+ str,pos = str.dup, 0
57
+ while s=str.slice!(/[^' ]*/) and s != ""
58
+ ary << [s.strip, pos]
59
+ pos+=s.length
60
+ str.slice!(/[' ]/)
61
+ end
62
+ end
63
+ end
64
+
65
+ private
66
+ def self.apply_rules(type, string)
67
+ returning string.dup do |s|
68
+ @@rules[type] && @@rules[type].each do |rule|
69
+ s.gsub!(Regexp.new(rule['match']),rule['subst'])
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end