formosa 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/License.txt +20 -0
- data/Manifest.txt +23 -0
- data/README.txt +18 -0
- data/Rakefile +127 -0
- data/lib/ext/native_syllable_composer/HoloVowels.h +242 -0
- data/lib/ext/native_syllable_composer/LibHolo.h +1223 -0
- data/lib/ext/native_syllable_composer/compose.cpp +67 -0
- data/lib/ext/native_syllable_composer/compose.h +11 -0
- data/lib/ext/native_syllable_composer/extconf.rb +7 -0
- data/lib/ext/native_syllable_composer/native_syllable_composer.c +34 -0
- data/lib/formosa/syllable_utility.rb +86 -0
- data/lib/formosa/version.rb +9 -0
- data/lib/formosa.rb +31 -0
- data/scripts/txt2html +67 -0
- data/setup.rb +1585 -0
- data/test/test_formosa.rb +11 -0
- data/test/test_helper.rb +2 -0
- data/website/index.html +92 -0
- data/website/index.txt +38 -0
- data/website/javascripts/rounded_corners_lite.inc.js +285 -0
- data/website/stylesheets/screen.css +138 -0
- data/website/template.rhtml +48 -0
- metadata +74 -0
data/History.txt
ADDED
data/License.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2007 FIXME full name
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Manifest.txt
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
History.txt
|
2
|
+
License.txt
|
3
|
+
Manifest.txt
|
4
|
+
README.txt
|
5
|
+
Rakefile
|
6
|
+
lib/formosa.rb
|
7
|
+
lib/formosa/version.rb
|
8
|
+
lib/formosa/syllable_utility.rb
|
9
|
+
lib/ext/native_syllable_composer/HoloVowels.h
|
10
|
+
lib/ext/native_syllable_composer/LibHolo.h
|
11
|
+
lib/ext/native_syllable_composer/compose.cpp
|
12
|
+
lib/ext/native_syllable_composer/compose.h
|
13
|
+
lib/ext/native_syllable_composer/extconf.rb
|
14
|
+
lib/ext/native_syllable_composer/native_syllable_composer.c
|
15
|
+
scripts/txt2html
|
16
|
+
setup.rb
|
17
|
+
test/test_formosa.rb
|
18
|
+
test/test_helper.rb
|
19
|
+
website/index.html
|
20
|
+
website/index.txt
|
21
|
+
website/javascripts/rounded_corners_lite.inc.js
|
22
|
+
website/stylesheets/screen.css
|
23
|
+
website/template.rhtml
|
data/README.txt
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
Formosa is a Ruby library for processing Taiwanese languages. Major languages
|
2
|
+
spoken in Taiwan include Holo, Hakka, Mandarin and those of the indigenous
|
3
|
+
people. Formosa is the Ruby branch of the lib-formosa project
|
4
|
+
(http://code.google.com/p/lib-formosa/).
|
5
|
+
|
6
|
+
Currently, we focus on the processing of the Holo (Southern Min) language,
|
7
|
+
with necessary tools such as SyllableComposer available for general use.
|
8
|
+
|
9
|
+
The following example shows how to use Formosa:
|
10
|
+
|
11
|
+
$KCODE="u" # set the Ruby environment to use UTF-8
|
12
|
+
require "rubygems"
|
13
|
+
require "formosa"
|
14
|
+
include Formosa::Holo
|
15
|
+
poj = SyllableType::POJ
|
16
|
+
tl = SyllableType::TL
|
17
|
+
SyllableUtility.compose_syllable(poj, tl, "goa2") # => guá
|
18
|
+
SyllableUtility.compose_syllable(tl, poj, "gua2") # => goá
|
data/Rakefile
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
require 'rake/clean'
|
4
|
+
require 'rake/testtask'
|
5
|
+
require 'rake/packagetask'
|
6
|
+
require 'rake/gempackagetask'
|
7
|
+
require 'rake/rdoctask'
|
8
|
+
require 'rake/contrib/rubyforgepublisher'
|
9
|
+
require 'fileutils'
|
10
|
+
require 'hoe'
|
11
|
+
|
12
|
+
include FileUtils
|
13
|
+
require File.join(File.dirname(__FILE__), 'lib', 'formosa', 'version')
|
14
|
+
|
15
|
+
AUTHOR = 'Lukhnos D. Liu' # can also be an array of Authors
|
16
|
+
EMAIL = "lukhnos@gmail.com"
|
17
|
+
DESCRIPTION = "A collection of libraries for Taiwanese languages processing"
|
18
|
+
GEM_NAME = 'formosa' # what ppl will type to install your gem
|
19
|
+
|
20
|
+
@config_file = "~/.rubyforge/user-config.yml"
|
21
|
+
@config = nil
|
22
|
+
def rubyforge_username
|
23
|
+
unless @config
|
24
|
+
begin
|
25
|
+
@config = YAML.load(File.read(File.expand_path(@config_file)))
|
26
|
+
rescue
|
27
|
+
puts <<-EOS
|
28
|
+
ERROR: No rubyforge config file found: #{@config_file}"
|
29
|
+
Run 'rubyforge setup' to prepare your env for access to Rubyforge
|
30
|
+
- See http://newgem.rubyforge.org/rubyforge.html for more details
|
31
|
+
EOS
|
32
|
+
exit
|
33
|
+
end
|
34
|
+
end
|
35
|
+
@rubyforge_username ||= @config["username"]
|
36
|
+
end
|
37
|
+
|
38
|
+
RUBYFORGE_PROJECT = 'formosa' # The unix name for your project
|
39
|
+
HOMEPATH = "http://#{RUBYFORGE_PROJECT}.rubyforge.org"
|
40
|
+
DOWNLOAD_PATH = "http://rubyforge.org/projects/#{RUBYFORGE_PROJECT}"
|
41
|
+
|
42
|
+
NAME = "formosa"
|
43
|
+
REV = nil
|
44
|
+
# UNCOMMENT IF REQUIRED:
|
45
|
+
# REV = `svn info`.each {|line| if line =~ /^Revision:/ then k,v = line.split(': '); break v.chomp; else next; end} rescue nil
|
46
|
+
VERS = Formosa::VERSION::STRING + (REV ? ".#{REV}" : "")
|
47
|
+
CLEAN.include ['**/.*.sw?', '*.gem', '.config', '**/.DS_Store']
|
48
|
+
RDOC_OPTS = ['--quiet', '--title', 'formosa documentation',
|
49
|
+
"--opname", "index.html",
|
50
|
+
"--line-numbers",
|
51
|
+
"--main", "README",
|
52
|
+
"--inline-source"]
|
53
|
+
|
54
|
+
class Hoe
|
55
|
+
def extra_deps
|
56
|
+
@extra_deps.reject { |x| Array(x).first == 'hoe' }
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# Generate all the Rake tasks
|
61
|
+
# Run 'rake -T' to see list of generated tasks (from gem root directory)
|
62
|
+
hoe = Hoe.new(GEM_NAME, VERS) do |p|
|
63
|
+
p.author = AUTHOR
|
64
|
+
p.description = DESCRIPTION
|
65
|
+
p.email = EMAIL
|
66
|
+
p.summary = DESCRIPTION
|
67
|
+
p.url = HOMEPATH
|
68
|
+
p.rubyforge_name = RUBYFORGE_PROJECT if RUBYFORGE_PROJECT
|
69
|
+
p.test_globs = ["test/**/test_*.rb"]
|
70
|
+
p.clean_globs |= CLEAN #An array of file patterns to delete on clean.
|
71
|
+
|
72
|
+
# == Optional
|
73
|
+
p.changes = p.paragraphs_of("History.txt", 0..1).join("\n\n")
|
74
|
+
#p.extra_deps = [] # An array of rubygem dependencies [name, version], e.g. [ ['active_support', '>= 1.3.1'] ]
|
75
|
+
# p.spec_extras = []
|
76
|
+
p.spec_extras = {
|
77
|
+
:extensions => ['lib/ext/native_syllable_composer/extconf.rb'],
|
78
|
+
}
|
79
|
+
|
80
|
+
end
|
81
|
+
|
82
|
+
CHANGES = hoe.paragraphs_of('History.txt', 0..1).join("\n\n")
|
83
|
+
PATH = (RUBYFORGE_PROJECT == GEM_NAME) ? RUBYFORGE_PROJECT : "#{RUBYFORGE_PROJECT}/#{GEM_NAME}"
|
84
|
+
hoe.remote_rdoc_dir = File.join(PATH.gsub(/^#{RUBYFORGE_PROJECT}\/?/,''), 'rdoc')
|
85
|
+
|
86
|
+
desc 'Generate website files'
|
87
|
+
task :website_generate do
|
88
|
+
Dir['website/**/*.txt'].each do |txt|
|
89
|
+
sh %{ ruby scripts/txt2html #{txt} > #{txt.gsub(/txt$/,'html')} }
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
desc 'Upload website files to rubyforge'
|
94
|
+
task :website_upload do
|
95
|
+
host = "#{rubyforge_username}@rubyforge.org"
|
96
|
+
remote_dir = "/var/www/gforge-projects/#{PATH}/"
|
97
|
+
local_dir = 'website'
|
98
|
+
sh %{rsync -aCv #{local_dir}/ #{host}:#{remote_dir}}
|
99
|
+
end
|
100
|
+
|
101
|
+
desc 'Generate and upload website files'
|
102
|
+
task :website => [:website_generate, :website_upload, :publish_docs]
|
103
|
+
|
104
|
+
desc 'Release the website and new gem version'
|
105
|
+
task :deploy => [:check_version, :website, :release] do
|
106
|
+
puts "Remember to create SVN tag:"
|
107
|
+
puts "svn copy svn+ssh://#{rubyforge_username}@rubyforge.org/var/svn/#{PATH}/trunk " +
|
108
|
+
"svn+ssh://#{rubyforge_username}@rubyforge.org/var/svn/#{PATH}/tags/REL-#{VERS} "
|
109
|
+
puts "Suggested comment:"
|
110
|
+
puts "Tagging release #{CHANGES}"
|
111
|
+
end
|
112
|
+
|
113
|
+
desc 'Runs tasks website_generate and install_gem as a local deployment of the gem'
|
114
|
+
task :local_deploy => [:website_generate, :install_gem]
|
115
|
+
|
116
|
+
task :check_version do
|
117
|
+
unless ENV['VERSION']
|
118
|
+
puts 'Must pass a VERSION=x.y.z release version'
|
119
|
+
exit
|
120
|
+
end
|
121
|
+
unless ENV['VERSION'] == VERS
|
122
|
+
puts "Please update your version.rb to match the release version, currently #{VERS}"
|
123
|
+
exit
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
|
@@ -0,0 +1,242 @@
|
|
1
|
+
// HoloVowels.h: Holo Vowel character composer for the Holo dialects
|
2
|
+
//
|
3
|
+
// Copyright (c) 2007 The OpenVanilla Project (http://openvanilla.org)
|
4
|
+
// All rights reserved.
|
5
|
+
//
|
6
|
+
// Redistribution and use in source and binary forms, with or without
|
7
|
+
// modification, are permitted provided that the following conditions
|
8
|
+
// are met:
|
9
|
+
//
|
10
|
+
// 1. Redistributions of source code must retain the above copyright
|
11
|
+
// notice, this list of conditions and the following disclaimer.
|
12
|
+
// 2. Redistributions in binary form must reproduce the above copyright
|
13
|
+
// notice, this list of conditions and the following disclaimer in the
|
14
|
+
// documentation and/or other materials provided with the distribution.
|
15
|
+
// 3. Neither the name of OpenVanilla nor the names of its contributors
|
16
|
+
// may be used to endorse or promote products derived from this software
|
17
|
+
// without specific prior written permission.
|
18
|
+
//
|
19
|
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
20
|
+
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
21
|
+
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
22
|
+
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
23
|
+
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
24
|
+
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
25
|
+
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
26
|
+
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
27
|
+
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
28
|
+
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
29
|
+
// POSSIBILITY OF SUCH DAMAGE.
|
30
|
+
//
|
31
|
+
// This module follows the POJ/TL convention of tone notation.
|
32
|
+
// The tone of any DT input must be re-mapped before feeding into
|
33
|
+
// the ComposeHoloVowel() function
|
34
|
+
//
|
35
|
+
// TL does not compose nn and ou (TL uses oo--there's no ou),
|
36
|
+
// use the flag "composePOJOnlySymbols=false" when composing TL vowels.
|
37
|
+
//
|
38
|
+
// If you are feeding the ending N (in POJ) to this function, you
|
39
|
+
// must explicity covert it into "nn" (lower case only). This is because
|
40
|
+
// "n" can mean other things elsewhere.
|
41
|
+
|
42
|
+
#ifndef __HoloVowels_h
|
43
|
+
#define __HoloVowels_h
|
44
|
+
|
45
|
+
#include <string>
|
46
|
+
|
47
|
+
namespace LibHolo {
|
48
|
+
|
49
|
+
using namespace std;
|
50
|
+
|
51
|
+
string ComposeHoloVowel(const string& vowel, unsigned int tone, bool composePOJOnlySymbols=true, bool useMiddleDotFallback=false);
|
52
|
+
|
53
|
+
string GetToneASCIIRepresentation(unsigned int tone);
|
54
|
+
bool IsDiacriticSymbol(char c);
|
55
|
+
};
|
56
|
+
|
57
|
+
namespace LibHolo {
|
58
|
+
|
59
|
+
const char *holoVowelLookupTable="aeimnoquAEIMNOQU";
|
60
|
+
const unsigned int holoVowelRows = 16;
|
61
|
+
|
62
|
+
#ifndef _MSC_VER
|
63
|
+
|
64
|
+
static const char *holoNasel="ⁿ";
|
65
|
+
|
66
|
+
// ou = o + U+0358 (default)
|
67
|
+
static const char *holoVowels[holoVowelRows * 9]= {
|
68
|
+
"a", "á", "à", "a", "â", "ǎ", "ā", "a̍", "a̋",
|
69
|
+
"e", "é", "è", "e", "ê", "ě", "ē", "e̍", "e̋",
|
70
|
+
"i", "í", "ì", "i", "î", "ǐ", "ī", "i̍", "i̋",
|
71
|
+
"m", "ḿ", "m̀", "m", "m̂", "m̌", "m̄", "m̍", "m̋",
|
72
|
+
"n", "ń", "ǹ", "n", "n̂", "ň", "n̄", "n̍", "n̋",
|
73
|
+
"o", "ó", "ò", "o", "ô", "ǒ", "ō", "o̍", "ő",
|
74
|
+
"o͘", "ó͘", "ò͘", "o͘", "ô͘", "ǒ͘", "ō͘", "o̍͘", "ő͘",
|
75
|
+
"u", "ú", "ù", "u", "û", "ǔ", "ū", "u̍", "ű",
|
76
|
+
"A", "Á", "À", "a", "Â", "Ǎ", "Ā", "A̍", "A̋",
|
77
|
+
"E", "É", "È", "E", "Ê", "Ě", "Ē", "E̍", "E̋",
|
78
|
+
"I", "Í", "Ì", "I", "Î", "Ǐ", "Ī", "I̍", "I̋",
|
79
|
+
"M", "Ḿ", "M̀", "M", "M̂", "M̌", "M̄", "M̍", "M̋",
|
80
|
+
"N", "Ń", "Ǹ", "N", "N̂", "Ň", "N̄", "N̍", "N̋",
|
81
|
+
"O", "Ó", "Ò", "O", "Ô", "Ǒ", "Ō", "O̍", "Ő",
|
82
|
+
"O͘", "Ó͘", "Ò͘", "O͘", "Ô͘", "Ǒ͘", "Ō͘", "O̍͘", "Ő͘",
|
83
|
+
"U", "Ú", "Ù", "U", "Û", "Ǔ", "Ū", "U̍", "Ű"
|
84
|
+
};
|
85
|
+
|
86
|
+
// Does not use U+0358 (fallback)
|
87
|
+
static const char *holoVowelsUsingFallback[holoVowelRows * 9]= {
|
88
|
+
"a", "á", "à", "a", "â", "ǎ", "ā", "a̍", "a̋",
|
89
|
+
"e", "é", "è", "e", "ê", "ě", "ē", "e̍", "e̋",
|
90
|
+
"i", "í", "ì", "i", "î", "ǐ", "ī", "i̍", "i̋",
|
91
|
+
"m", "ḿ", "m̀", "m", "m̂", "m̌", "m̄", "m̍", "m̋",
|
92
|
+
"n", "ń", "ǹ", "n", "n̂", "ň", "n̄", "n̍", "n̋",
|
93
|
+
"o·","ó·","ò·","o·","ô·","ǒ·","ō·","o̍·", "ő·",
|
94
|
+
"u", "ú", "ù", "u", "û", "ǔ", "ū", "u̍", "ű",
|
95
|
+
"A", "Á", "À", "a", "Â", "Ǎ", "Ā", "A̍", "A̋",
|
96
|
+
"E", "É", "È", "E", "Ê", "Ě", "Ē", "E̍", "E̋",
|
97
|
+
"I", "Í", "Ì", "I", "Î", "Ǐ", "Ī", "I̍", "I̋",
|
98
|
+
"M", "Ḿ", "M̀", "M", "M̂", "M̌", "M̄", "M̍", "M̋",
|
99
|
+
"N", "Ń", "Ǹ", "N", "N̂", "Ň", "N̄", "N̍", "N̋",
|
100
|
+
"O", "Ó", "Ò", "O", "Ô", "Ǒ", "Ō", "O̍", "Ő",
|
101
|
+
"O·","Ó·","Ò·","O·","Ô·","Ǒ·","Ō·","O̍·","Ő·",
|
102
|
+
"U", "Ú", "Ù", "U", "Û", "Ǔ", "Ū", "U̍", "Ű"
|
103
|
+
};
|
104
|
+
#endif
|
105
|
+
|
106
|
+
// we don't use #ifndef ... #else, otherwise MSC will try to
|
107
|
+
// peek into the above section, causing compiler to stop!
|
108
|
+
|
109
|
+
#ifdef _MSC_VER
|
110
|
+
static const char *holoNasel="\xe2\x81\xbf";
|
111
|
+
|
112
|
+
// ou = o + U+0358 (default)
|
113
|
+
static const char *holoVowels[holoVowelRows * 9]= {
|
114
|
+
"a", "\xc3\xa1", "\xc3\xa0", "a", "\xc3\xa2", "\xc7\x8e", "\xc4\x81", "a\xcc\x8d", "a\xcc\x8b",
|
115
|
+
"e", "\xc3\xa9", "\xc3\xa8", "e", "\xc3\xaa", "\xc4\x9b", "\xc4\x93", "e\xcc\x8d", "e\xcc\x8b",
|
116
|
+
"i", "\xc3\xad", "\xc3\xac", "i", "\xc3\xae", "\xc7\x90", "\xc4\xab", "i\xcc\x8d", "i\xcc\x8b",
|
117
|
+
"m", "\xe1\xb8\xbf", "m\xcc\x80", "m", "m\xcc\x82", "m\xcc\x8c", "m\xcc\x84", "m\xcc\x8d", "m\xcc\x8b",
|
118
|
+
"n", "\xc5\x84", "\xc7\xb9", "n", "n\xcc\x82", "n\xcc\x8c", "n\xcc\x84", "n\xcc\x8d", "n\xcc\x8b",
|
119
|
+
"o", "\xc3\xb3", "\xc3\xb2", "o", "\xc3\xb4", "\xc7\x92", "\xc5\x8d", "o\xcc\x8d", "\xc5\x91",
|
120
|
+
"o\xcd\x98", "\xc3\xb3\xcd\x98", "\xc3\xb2\xcd\x98", "o\xcd\x98", "\xc3\xb4\xcd\x98", "\xc7\x92\xcd\x98", "\xc5\x8d\xcd\x98", "o\xcc\x8d\xcd\x98", "o\xcc\x8b\xcd\x98",
|
121
|
+
"u", "\xc3\xba", "\xc3\xb9", "u", "\xc3\xbb", "\xc7\x94", "\xc5\xab", "u\xcc\x8d", "\xc5\xb1",
|
122
|
+
"A", "\xc3\x81", "\xc3\x80", "a", "\xc3\x82", "\xc7\x8d", "\xc4\x80", "A\xcc\x8d", "A\xcc\x8b",
|
123
|
+
"E", "\xc3\x89", "\xc3\x88", "E", "\xc3\x8a", "\xc4\x9a", "\xc4\x92", "E\xcc\x8d", "E\xcc\x8b",
|
124
|
+
"I", "\xc3\x8d", "\xc3\x8c", "I", "\xc3\x8e", "\xc7\x8f", "\xc4\xaa", "I\xcc\x8d", "I\xcc\x8b",
|
125
|
+
"M", "\xe1\xb8\xbe", "M\xcc\x80", "M", "M\xcc\x82", "M\xcc\x8c", "M\xcc\x84", "M\xcc\x8d", "M\xcc\x8b",
|
126
|
+
"N", "\xc5\x83", "\xc7\xb8", "N", "N\xcc\x82", "N\xcc\x8c", "N\xcc\x84", "N\xcc\x8d", "N\xcc\x8b",
|
127
|
+
"O", "\xc3\x93", "\xc3\x92", "O", "\xc3\x94", "\xc7\x91", "\xc5\x8c", "O\xcc\x8d", "\xc5\x90",
|
128
|
+
"O\xcd\x98", "\xc3\x93\xcd\x98", "\xc3\x92\xcd\x98", "O\xcd\x98", "\xc3\x94\xcd\x98", "\xc7\x91\xcd\x98", "\xc5\x8c\xcd\x98", "O\xcc\x8d\xcd\x98", "O\xcc\x8b\xcd\x98",
|
129
|
+
"U", "\xc3\x9a", "\xc3\x99", "U", "\xc3\x9b", "\xc7\x93", "\xc5\xaa", "U\xcc\x8d", "\xc5\xb0"
|
130
|
+
};
|
131
|
+
|
132
|
+
// Does not use U+0358 (fallback)
|
133
|
+
static const char *holoVowelsUsingFallback[holoVowelRows * 9]= {
|
134
|
+
"a", "\xc3\xa1", "\xc3\xa0", "a", "\xc3\xa2", "\xc7\x8e", "\xc4\x81", "a\xcc\x8d", "a\xcc\x8b",
|
135
|
+
"e", "\xc3\xa9", "\xc3\xa8", "e", "\xc3\xaa", "\xc4\x9b", "\xc4\x93", "e\xcc\x8d", "e\xcc\x8b",
|
136
|
+
"i", "\xc3\xad", "\xc3\xac", "i", "\xc3\xae", "\xc7\x90", "\xc4\xab", "i\xcc\x8d", "i\xcc\x8b",
|
137
|
+
"m", "\xe1\xb8\xbf", "m\xcc\x80", "m", "m\xcc\x82", "m\xcc\x8c", "m\xcc\x84", "m\xcc\x8d", "m\xcc\x8b",
|
138
|
+
"n", "\xc5\x84", "\xc7\xb9", "n", "n\xcc\x82", "n\xcc\x8c", "n\xcc\x84", "n\xcc\x8d", "n\xcc\x8b",
|
139
|
+
"o\xc2\xb7","\xc3\xb3\xc2\xb7","\xc3\xb2\xc2\xb7","o\xc2\xb7","\xc3\xb4\xc2\xb7","\xc7\x92\xc2\xb7","\xc5\x8d\xc2\xb7","o\xcc\x8d\xc2\xb7", "o\xcc\x8b\xc2\xb7",
|
140
|
+
"u", "\xc3\xba", "\xc3\xb9", "u", "\xc3\xbb", "\xc7\x94", "\xc5\xab", "u\xcc\x8d", "\xc5\xb1",
|
141
|
+
"A", "\xc3\x81", "\xc3\x80", "a", "\xc3\x82", "\xc7\x8d", "\xc4\x80", "A\xcc\x8d", "A\xcc\x8b",
|
142
|
+
"E", "\xc3\x89", "\xc3\x88", "E", "\xc3\x8a", "\xc4\x9a", "\xc4\x92", "E\xcc\x8d", "E\xcc\x8b",
|
143
|
+
"I", "\xc3\x8d", "\xc3\x8c", "I", "\xc3\x8e", "\xc7\x8f", "\xc4\xaa", "I\xcc\x8d", "I\xcc\x8b",
|
144
|
+
"M", "\xe1\xb8\xbe", "M\xcc\x80", "M", "M\xcc\x82", "M\xcc\x8c", "M\xcc\x84", "M\xcc\x8d", "M\xcc\x8b",
|
145
|
+
"N", "\xc5\x83", "\xc7\xb8", "N", "N\xcc\x82", "N\xcc\x8c", "N\xcc\x84", "N\xcc\x8d", "N\xcc\x8b",
|
146
|
+
"O", "\xc3\x93", "\xc3\x92", "O", "\xc3\x94", "\xc7\x91", "\xc5\x8c", "O\xcc\x8d", "\xc5\x90",
|
147
|
+
"O\xc2\xb7","\xc3\x93\xc2\xb7","\xc3\x92\xc2\xb7","O\xc2\xb7","\xc3\x94\xc2\xb7","\xc7\x91\xc2\xb7","\xc5\x8c\xc2\xb7","O\xcc\x8d\xc2\xb7","O\xcc\x8b\xc2\xb7",
|
148
|
+
"U", "\xc3\x9a", "\xc3\x99", "U", "\xc3\x9b", "\xc7\x93", "\xc5\xaa", "U\xcc\x8d", "\xc5\xb0"
|
149
|
+
};
|
150
|
+
#endif
|
151
|
+
|
152
|
+
|
153
|
+
string ComposeHoloVowel(const string& vowel, unsigned int tone, bool composePOJOnlySymbols, bool useMiddleDotFallback)
|
154
|
+
{
|
155
|
+
string composed;
|
156
|
+
string coda;
|
157
|
+
|
158
|
+
// if tone is out of range, return
|
159
|
+
if (tone > 9) return composed;
|
160
|
+
|
161
|
+
// if tone is 0, treat as tone 1
|
162
|
+
unsigned int realtone = (tone == 0) ? 1 : tone;
|
163
|
+
|
164
|
+
// then we substract realtone by 1 to get the real realtone... XD
|
165
|
+
realtone--;
|
166
|
+
|
167
|
+
if (!vowel.length()) return composed;
|
168
|
+
|
169
|
+
// we reject "q", only accept the real ou
|
170
|
+
if (vowel=="q" || vowel=="Q") return composed;
|
171
|
+
|
172
|
+
if (vowel == "nn" || vowel=="Nn" || vowel=="Nn" || vowel=="NN") {
|
173
|
+
composed = composePOJOnlySymbols ? holoNasel : vowel;
|
174
|
+
return composed;
|
175
|
+
}
|
176
|
+
|
177
|
+
// pick up the table to use
|
178
|
+
const char **vowels = useMiddleDotFallback ? holoVowelsUsingFallback : holoVowels;
|
179
|
+
|
180
|
+
// use the first character in the vowel string as the look-up character
|
181
|
+
char lookupChar = vowel[0];
|
182
|
+
coda = vowel.substr(1, vowel.length()-1);
|
183
|
+
|
184
|
+
if (composePOJOnlySymbols)
|
185
|
+
{
|
186
|
+
if (vowel=="OU" || vowel=="Ou" || vowel=="OO" || vowel=="Oo") { lookupChar = 'Q'; coda = ""; }
|
187
|
+
else { if (vowel=="ou" || vowel=="oU" || vowel=="oo" || vowel=="oO") { lookupChar = 'q'; } coda = ""; }
|
188
|
+
}
|
189
|
+
|
190
|
+
unsigned int s = strlen(holoVowelLookupTable);
|
191
|
+
unsigned int row;
|
192
|
+
|
193
|
+
for (row=0; row<s; row++) if (holoVowelLookupTable[row] == lookupChar) break;
|
194
|
+
|
195
|
+
// if not found, i reaches the end
|
196
|
+
if (row==s) return composed;
|
197
|
+
|
198
|
+
// now we pick up the correct composed form
|
199
|
+
composed = vowels[row*9 + realtone];
|
200
|
+
return composed + coda;
|
201
|
+
}
|
202
|
+
|
203
|
+
string GetToneASCIIRepresentation(unsigned int tone)
|
204
|
+
{
|
205
|
+
switch(tone)
|
206
|
+
{
|
207
|
+
case 2: return string ("'");
|
208
|
+
case 3: return string ("`");
|
209
|
+
case 5: return string ("^");
|
210
|
+
case 6: return string ("/");
|
211
|
+
case 7: return string ("=");
|
212
|
+
case 8: return string ("|");
|
213
|
+
case 9: return string ("\"");
|
214
|
+
}
|
215
|
+
|
216
|
+
return string();
|
217
|
+
}
|
218
|
+
|
219
|
+
bool IsDiacriticSymbol(char c)
|
220
|
+
{
|
221
|
+
if (c=='\'' || c=='`' || c=='|' || c=='^' || c=='/' || c=='=' || c=='\"') return true;
|
222
|
+
return false;
|
223
|
+
}
|
224
|
+
|
225
|
+
unsigned int ToneFromDiacriticSymbol(char c)
|
226
|
+
{
|
227
|
+
switch(c)
|
228
|
+
{
|
229
|
+
case '\'': return 2;
|
230
|
+
case '`': return 3;
|
231
|
+
case '^': return 5;
|
232
|
+
case '/': return 6;
|
233
|
+
case '=': return 7;
|
234
|
+
case '|': return 8;
|
235
|
+
case '\"': return 9;
|
236
|
+
}
|
237
|
+
return 0;
|
238
|
+
}
|
239
|
+
}
|
240
|
+
|
241
|
+
#endif // of #ifndef __HoloVowels_h
|
242
|
+
|