formosa 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/License.txt +20 -0
- data/Manifest.txt +23 -0
- data/README.txt +18 -0
- data/Rakefile +127 -0
- data/lib/ext/native_syllable_composer/HoloVowels.h +242 -0
- data/lib/ext/native_syllable_composer/LibHolo.h +1223 -0
- data/lib/ext/native_syllable_composer/compose.cpp +67 -0
- data/lib/ext/native_syllable_composer/compose.h +11 -0
- data/lib/ext/native_syllable_composer/extconf.rb +7 -0
- data/lib/ext/native_syllable_composer/native_syllable_composer.c +34 -0
- data/lib/formosa/syllable_utility.rb +86 -0
- data/lib/formosa/version.rb +9 -0
- data/lib/formosa.rb +31 -0
- data/scripts/txt2html +67 -0
- data/setup.rb +1585 -0
- data/test/test_formosa.rb +11 -0
- data/test/test_helper.rb +2 -0
- data/website/index.html +92 -0
- data/website/index.txt +38 -0
- data/website/javascripts/rounded_corners_lite.inc.js +285 -0
- data/website/stylesheets/screen.css +138 -0
- data/website/template.rhtml +48 -0
- metadata +74 -0
data/History.txt
ADDED
data/License.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2007 FIXME full name
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Manifest.txt
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
History.txt
|
2
|
+
License.txt
|
3
|
+
Manifest.txt
|
4
|
+
README.txt
|
5
|
+
Rakefile
|
6
|
+
lib/formosa.rb
|
7
|
+
lib/formosa/version.rb
|
8
|
+
lib/formosa/syllable_utility.rb
|
9
|
+
lib/ext/native_syllable_composer/HoloVowels.h
|
10
|
+
lib/ext/native_syllable_composer/LibHolo.h
|
11
|
+
lib/ext/native_syllable_composer/compose.cpp
|
12
|
+
lib/ext/native_syllable_composer/compose.h
|
13
|
+
lib/ext/native_syllable_composer/extconf.rb
|
14
|
+
lib/ext/native_syllable_composer/native_syllable_composer.c
|
15
|
+
scripts/txt2html
|
16
|
+
setup.rb
|
17
|
+
test/test_formosa.rb
|
18
|
+
test/test_helper.rb
|
19
|
+
website/index.html
|
20
|
+
website/index.txt
|
21
|
+
website/javascripts/rounded_corners_lite.inc.js
|
22
|
+
website/stylesheets/screen.css
|
23
|
+
website/template.rhtml
|
data/README.txt
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
Formosa is a Ruby library for processing Taiwanese languages. Major languages
|
2
|
+
spoken in Taiwan include Holo, Hakka, Mandarin and those of the indigenous
|
3
|
+
people. Formosa is the Ruby branch of the lib-formosa project
|
4
|
+
(http://code.google.com/p/lib-formosa/).
|
5
|
+
|
6
|
+
Currently, we focus on the processing of the Holo (Southern Min) language,
|
7
|
+
with necessary tools such as SyllableComposer available for general use.
|
8
|
+
|
9
|
+
The following example shows how to use Formosa:
|
10
|
+
|
11
|
+
$KCODE="u" # set the Ruby environment to use UTF-8
|
12
|
+
require "rubygems"
|
13
|
+
require "formosa"
|
14
|
+
include Formosa::Holo
|
15
|
+
poj = SyllableType::POJ
|
16
|
+
tl = SyllableType::TL
|
17
|
+
SyllableUtility.compose_syllable(poj, tl, "goa2") # => guá
|
18
|
+
SyllableUtility.compose_syllable(tl, poj, "gua2") # => goá
|
data/Rakefile
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
require 'rake/clean'
|
4
|
+
require 'rake/testtask'
|
5
|
+
require 'rake/packagetask'
|
6
|
+
require 'rake/gempackagetask'
|
7
|
+
require 'rake/rdoctask'
|
8
|
+
require 'rake/contrib/rubyforgepublisher'
|
9
|
+
require 'fileutils'
|
10
|
+
require 'hoe'
|
11
|
+
|
12
|
+
include FileUtils
|
13
|
+
require File.join(File.dirname(__FILE__), 'lib', 'formosa', 'version')
|
14
|
+
|
15
|
+
AUTHOR = 'Lukhnos D. Liu' # can also be an array of Authors
|
16
|
+
EMAIL = "lukhnos@gmail.com"
|
17
|
+
DESCRIPTION = "A collection of libraries for Taiwanese languages processing"
|
18
|
+
GEM_NAME = 'formosa' # what ppl will type to install your gem
|
19
|
+
|
20
|
+
@config_file = "~/.rubyforge/user-config.yml"
|
21
|
+
@config = nil
|
22
|
+
def rubyforge_username
|
23
|
+
unless @config
|
24
|
+
begin
|
25
|
+
@config = YAML.load(File.read(File.expand_path(@config_file)))
|
26
|
+
rescue
|
27
|
+
puts <<-EOS
|
28
|
+
ERROR: No rubyforge config file found: #{@config_file}"
|
29
|
+
Run 'rubyforge setup' to prepare your env for access to Rubyforge
|
30
|
+
- See http://newgem.rubyforge.org/rubyforge.html for more details
|
31
|
+
EOS
|
32
|
+
exit
|
33
|
+
end
|
34
|
+
end
|
35
|
+
@rubyforge_username ||= @config["username"]
|
36
|
+
end
|
37
|
+
|
38
|
+
RUBYFORGE_PROJECT = 'formosa' # The unix name for your project
|
39
|
+
HOMEPATH = "http://#{RUBYFORGE_PROJECT}.rubyforge.org"
|
40
|
+
DOWNLOAD_PATH = "http://rubyforge.org/projects/#{RUBYFORGE_PROJECT}"
|
41
|
+
|
42
|
+
NAME = "formosa"
|
43
|
+
REV = nil
|
44
|
+
# UNCOMMENT IF REQUIRED:
|
45
|
+
# REV = `svn info`.each {|line| if line =~ /^Revision:/ then k,v = line.split(': '); break v.chomp; else next; end} rescue nil
|
46
|
+
VERS = Formosa::VERSION::STRING + (REV ? ".#{REV}" : "")
|
47
|
+
CLEAN.include ['**/.*.sw?', '*.gem', '.config', '**/.DS_Store']
|
48
|
+
RDOC_OPTS = ['--quiet', '--title', 'formosa documentation',
|
49
|
+
"--opname", "index.html",
|
50
|
+
"--line-numbers",
|
51
|
+
"--main", "README",
|
52
|
+
"--inline-source"]
|
53
|
+
|
54
|
+
class Hoe
|
55
|
+
def extra_deps
|
56
|
+
@extra_deps.reject { |x| Array(x).first == 'hoe' }
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# Generate all the Rake tasks
|
61
|
+
# Run 'rake -T' to see list of generated tasks (from gem root directory)
|
62
|
+
hoe = Hoe.new(GEM_NAME, VERS) do |p|
|
63
|
+
p.author = AUTHOR
|
64
|
+
p.description = DESCRIPTION
|
65
|
+
p.email = EMAIL
|
66
|
+
p.summary = DESCRIPTION
|
67
|
+
p.url = HOMEPATH
|
68
|
+
p.rubyforge_name = RUBYFORGE_PROJECT if RUBYFORGE_PROJECT
|
69
|
+
p.test_globs = ["test/**/test_*.rb"]
|
70
|
+
p.clean_globs |= CLEAN #An array of file patterns to delete on clean.
|
71
|
+
|
72
|
+
# == Optional
|
73
|
+
p.changes = p.paragraphs_of("History.txt", 0..1).join("\n\n")
|
74
|
+
#p.extra_deps = [] # An array of rubygem dependencies [name, version], e.g. [ ['active_support', '>= 1.3.1'] ]
|
75
|
+
# p.spec_extras = []
|
76
|
+
p.spec_extras = {
|
77
|
+
:extensions => ['lib/ext/native_syllable_composer/extconf.rb'],
|
78
|
+
}
|
79
|
+
|
80
|
+
end
|
81
|
+
|
82
|
+
CHANGES = hoe.paragraphs_of('History.txt', 0..1).join("\n\n")
|
83
|
+
PATH = (RUBYFORGE_PROJECT == GEM_NAME) ? RUBYFORGE_PROJECT : "#{RUBYFORGE_PROJECT}/#{GEM_NAME}"
|
84
|
+
hoe.remote_rdoc_dir = File.join(PATH.gsub(/^#{RUBYFORGE_PROJECT}\/?/,''), 'rdoc')
|
85
|
+
|
86
|
+
desc 'Generate website files'
|
87
|
+
task :website_generate do
|
88
|
+
Dir['website/**/*.txt'].each do |txt|
|
89
|
+
sh %{ ruby scripts/txt2html #{txt} > #{txt.gsub(/txt$/,'html')} }
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
desc 'Upload website files to rubyforge'
|
94
|
+
task :website_upload do
|
95
|
+
host = "#{rubyforge_username}@rubyforge.org"
|
96
|
+
remote_dir = "/var/www/gforge-projects/#{PATH}/"
|
97
|
+
local_dir = 'website'
|
98
|
+
sh %{rsync -aCv #{local_dir}/ #{host}:#{remote_dir}}
|
99
|
+
end
|
100
|
+
|
101
|
+
desc 'Generate and upload website files'
|
102
|
+
task :website => [:website_generate, :website_upload, :publish_docs]
|
103
|
+
|
104
|
+
desc 'Release the website and new gem version'
|
105
|
+
task :deploy => [:check_version, :website, :release] do
|
106
|
+
puts "Remember to create SVN tag:"
|
107
|
+
puts "svn copy svn+ssh://#{rubyforge_username}@rubyforge.org/var/svn/#{PATH}/trunk " +
|
108
|
+
"svn+ssh://#{rubyforge_username}@rubyforge.org/var/svn/#{PATH}/tags/REL-#{VERS} "
|
109
|
+
puts "Suggested comment:"
|
110
|
+
puts "Tagging release #{CHANGES}"
|
111
|
+
end
|
112
|
+
|
113
|
+
desc 'Runs tasks website_generate and install_gem as a local deployment of the gem'
|
114
|
+
task :local_deploy => [:website_generate, :install_gem]
|
115
|
+
|
116
|
+
task :check_version do
|
117
|
+
unless ENV['VERSION']
|
118
|
+
puts 'Must pass a VERSION=x.y.z release version'
|
119
|
+
exit
|
120
|
+
end
|
121
|
+
unless ENV['VERSION'] == VERS
|
122
|
+
puts "Please update your version.rb to match the release version, currently #{VERS}"
|
123
|
+
exit
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
|
@@ -0,0 +1,242 @@
|
|
1
|
+
// HoloVowels.h: Holo Vowel character composer for the Holo dialects
|
2
|
+
//
|
3
|
+
// Copyright (c) 2007 The OpenVanilla Project (http://openvanilla.org)
|
4
|
+
// All rights reserved.
|
5
|
+
//
|
6
|
+
// Redistribution and use in source and binary forms, with or without
|
7
|
+
// modification, are permitted provided that the following conditions
|
8
|
+
// are met:
|
9
|
+
//
|
10
|
+
// 1. Redistributions of source code must retain the above copyright
|
11
|
+
// notice, this list of conditions and the following disclaimer.
|
12
|
+
// 2. Redistributions in binary form must reproduce the above copyright
|
13
|
+
// notice, this list of conditions and the following disclaimer in the
|
14
|
+
// documentation and/or other materials provided with the distribution.
|
15
|
+
// 3. Neither the name of OpenVanilla nor the names of its contributors
|
16
|
+
// may be used to endorse or promote products derived from this software
|
17
|
+
// without specific prior written permission.
|
18
|
+
//
|
19
|
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
20
|
+
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
21
|
+
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
22
|
+
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
23
|
+
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
24
|
+
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
25
|
+
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
26
|
+
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
27
|
+
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
28
|
+
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
29
|
+
// POSSIBILITY OF SUCH DAMAGE.
|
30
|
+
//
|
31
|
+
// This module follows the POJ/TL convention of tone notation.
|
32
|
+
// The tone of any DT input must be re-mapped before feeding into
|
33
|
+
// the ComposeHoloVowel() function
|
34
|
+
//
|
35
|
+
// TL does not compose nn and ou (TL uses oo--there's no ou),
|
36
|
+
// use the flag "composePOJOnlySymbols=false" when composing TL vowels.
|
37
|
+
//
|
38
|
+
// If you are feeding the ending N (in POJ) to this function, you
|
39
|
+
// must explicity covert it into "nn" (lower case only). This is because
|
40
|
+
// "n" can mean other things elsewhere.
|
41
|
+
|
42
|
+
#ifndef __HoloVowels_h
|
43
|
+
#define __HoloVowels_h
|
44
|
+
|
45
|
+
#include <string>
|
46
|
+
|
47
|
+
namespace LibHolo {
|
48
|
+
|
49
|
+
using namespace std;
|
50
|
+
|
51
|
+
string ComposeHoloVowel(const string& vowel, unsigned int tone, bool composePOJOnlySymbols=true, bool useMiddleDotFallback=false);
|
52
|
+
|
53
|
+
string GetToneASCIIRepresentation(unsigned int tone);
|
54
|
+
bool IsDiacriticSymbol(char c);
|
55
|
+
};
|
56
|
+
|
57
|
+
namespace LibHolo {
|
58
|
+
|
59
|
+
const char *holoVowelLookupTable="aeimnoquAEIMNOQU";
|
60
|
+
const unsigned int holoVowelRows = 16;
|
61
|
+
|
62
|
+
#ifndef _MSC_VER
|
63
|
+
|
64
|
+
static const char *holoNasel="ⁿ";
|
65
|
+
|
66
|
+
// ou = o + U+0358 (default)
|
67
|
+
static const char *holoVowels[holoVowelRows * 9]= {
|
68
|
+
"a", "á", "à", "a", "â", "ǎ", "ā", "a̍", "a̋",
|
69
|
+
"e", "é", "è", "e", "ê", "ě", "ē", "e̍", "e̋",
|
70
|
+
"i", "í", "ì", "i", "î", "ǐ", "ī", "i̍", "i̋",
|
71
|
+
"m", "ḿ", "m̀", "m", "m̂", "m̌", "m̄", "m̍", "m̋",
|
72
|
+
"n", "ń", "ǹ", "n", "n̂", "ň", "n̄", "n̍", "n̋",
|
73
|
+
"o", "ó", "ò", "o", "ô", "ǒ", "ō", "o̍", "ő",
|
74
|
+
"o͘", "ó͘", "ò͘", "o͘", "ô͘", "ǒ͘", "ō͘", "o̍͘", "ő͘",
|
75
|
+
"u", "ú", "ù", "u", "û", "ǔ", "ū", "u̍", "ű",
|
76
|
+
"A", "Á", "À", "a", "Â", "Ǎ", "Ā", "A̍", "A̋",
|
77
|
+
"E", "É", "È", "E", "Ê", "Ě", "Ē", "E̍", "E̋",
|
78
|
+
"I", "Í", "Ì", "I", "Î", "Ǐ", "Ī", "I̍", "I̋",
|
79
|
+
"M", "Ḿ", "M̀", "M", "M̂", "M̌", "M̄", "M̍", "M̋",
|
80
|
+
"N", "Ń", "Ǹ", "N", "N̂", "Ň", "N̄", "N̍", "N̋",
|
81
|
+
"O", "Ó", "Ò", "O", "Ô", "Ǒ", "Ō", "O̍", "Ő",
|
82
|
+
"O͘", "Ó͘", "Ò͘", "O͘", "Ô͘", "Ǒ͘", "Ō͘", "O̍͘", "Ő͘",
|
83
|
+
"U", "Ú", "Ù", "U", "Û", "Ǔ", "Ū", "U̍", "Ű"
|
84
|
+
};
|
85
|
+
|
86
|
+
// Does not use U+0358 (fallback)
|
87
|
+
static const char *holoVowelsUsingFallback[holoVowelRows * 9]= {
|
88
|
+
"a", "á", "à", "a", "â", "ǎ", "ā", "a̍", "a̋",
|
89
|
+
"e", "é", "è", "e", "ê", "ě", "ē", "e̍", "e̋",
|
90
|
+
"i", "í", "ì", "i", "î", "ǐ", "ī", "i̍", "i̋",
|
91
|
+
"m", "ḿ", "m̀", "m", "m̂", "m̌", "m̄", "m̍", "m̋",
|
92
|
+
"n", "ń", "ǹ", "n", "n̂", "ň", "n̄", "n̍", "n̋",
|
93
|
+
"o·","ó·","ò·","o·","ô·","ǒ·","ō·","o̍·", "ő·",
|
94
|
+
"u", "ú", "ù", "u", "û", "ǔ", "ū", "u̍", "ű",
|
95
|
+
"A", "Á", "À", "a", "Â", "Ǎ", "Ā", "A̍", "A̋",
|
96
|
+
"E", "É", "È", "E", "Ê", "Ě", "Ē", "E̍", "E̋",
|
97
|
+
"I", "Í", "Ì", "I", "Î", "Ǐ", "Ī", "I̍", "I̋",
|
98
|
+
"M", "Ḿ", "M̀", "M", "M̂", "M̌", "M̄", "M̍", "M̋",
|
99
|
+
"N", "Ń", "Ǹ", "N", "N̂", "Ň", "N̄", "N̍", "N̋",
|
100
|
+
"O", "Ó", "Ò", "O", "Ô", "Ǒ", "Ō", "O̍", "Ő",
|
101
|
+
"O·","Ó·","Ò·","O·","Ô·","Ǒ·","Ō·","O̍·","Ő·",
|
102
|
+
"U", "Ú", "Ù", "U", "Û", "Ǔ", "Ū", "U̍", "Ű"
|
103
|
+
};
|
104
|
+
#endif
|
105
|
+
|
106
|
+
// we don't use #ifndef ... #else, otherwise MSC will try to
|
107
|
+
// peek into the above section, causing compiler to stop!
|
108
|
+
|
109
|
+
#ifdef _MSC_VER
|
110
|
+
static const char *holoNasel="\xe2\x81\xbf";
|
111
|
+
|
112
|
+
// ou = o + U+0358 (default)
|
113
|
+
static const char *holoVowels[holoVowelRows * 9]= {
|
114
|
+
"a", "\xc3\xa1", "\xc3\xa0", "a", "\xc3\xa2", "\xc7\x8e", "\xc4\x81", "a\xcc\x8d", "a\xcc\x8b",
|
115
|
+
"e", "\xc3\xa9", "\xc3\xa8", "e", "\xc3\xaa", "\xc4\x9b", "\xc4\x93", "e\xcc\x8d", "e\xcc\x8b",
|
116
|
+
"i", "\xc3\xad", "\xc3\xac", "i", "\xc3\xae", "\xc7\x90", "\xc4\xab", "i\xcc\x8d", "i\xcc\x8b",
|
117
|
+
"m", "\xe1\xb8\xbf", "m\xcc\x80", "m", "m\xcc\x82", "m\xcc\x8c", "m\xcc\x84", "m\xcc\x8d", "m\xcc\x8b",
|
118
|
+
"n", "\xc5\x84", "\xc7\xb9", "n", "n\xcc\x82", "n\xcc\x8c", "n\xcc\x84", "n\xcc\x8d", "n\xcc\x8b",
|
119
|
+
"o", "\xc3\xb3", "\xc3\xb2", "o", "\xc3\xb4", "\xc7\x92", "\xc5\x8d", "o\xcc\x8d", "\xc5\x91",
|
120
|
+
"o\xcd\x98", "\xc3\xb3\xcd\x98", "\xc3\xb2\xcd\x98", "o\xcd\x98", "\xc3\xb4\xcd\x98", "\xc7\x92\xcd\x98", "\xc5\x8d\xcd\x98", "o\xcc\x8d\xcd\x98", "o\xcc\x8b\xcd\x98",
|
121
|
+
"u", "\xc3\xba", "\xc3\xb9", "u", "\xc3\xbb", "\xc7\x94", "\xc5\xab", "u\xcc\x8d", "\xc5\xb1",
|
122
|
+
"A", "\xc3\x81", "\xc3\x80", "a", "\xc3\x82", "\xc7\x8d", "\xc4\x80", "A\xcc\x8d", "A\xcc\x8b",
|
123
|
+
"E", "\xc3\x89", "\xc3\x88", "E", "\xc3\x8a", "\xc4\x9a", "\xc4\x92", "E\xcc\x8d", "E\xcc\x8b",
|
124
|
+
"I", "\xc3\x8d", "\xc3\x8c", "I", "\xc3\x8e", "\xc7\x8f", "\xc4\xaa", "I\xcc\x8d", "I\xcc\x8b",
|
125
|
+
"M", "\xe1\xb8\xbe", "M\xcc\x80", "M", "M\xcc\x82", "M\xcc\x8c", "M\xcc\x84", "M\xcc\x8d", "M\xcc\x8b",
|
126
|
+
"N", "\xc5\x83", "\xc7\xb8", "N", "N\xcc\x82", "N\xcc\x8c", "N\xcc\x84", "N\xcc\x8d", "N\xcc\x8b",
|
127
|
+
"O", "\xc3\x93", "\xc3\x92", "O", "\xc3\x94", "\xc7\x91", "\xc5\x8c", "O\xcc\x8d", "\xc5\x90",
|
128
|
+
"O\xcd\x98", "\xc3\x93\xcd\x98", "\xc3\x92\xcd\x98", "O\xcd\x98", "\xc3\x94\xcd\x98", "\xc7\x91\xcd\x98", "\xc5\x8c\xcd\x98", "O\xcc\x8d\xcd\x98", "O\xcc\x8b\xcd\x98",
|
129
|
+
"U", "\xc3\x9a", "\xc3\x99", "U", "\xc3\x9b", "\xc7\x93", "\xc5\xaa", "U\xcc\x8d", "\xc5\xb0"
|
130
|
+
};
|
131
|
+
|
132
|
+
// Does not use U+0358 (fallback)
|
133
|
+
static const char *holoVowelsUsingFallback[holoVowelRows * 9]= {
|
134
|
+
"a", "\xc3\xa1", "\xc3\xa0", "a", "\xc3\xa2", "\xc7\x8e", "\xc4\x81", "a\xcc\x8d", "a\xcc\x8b",
|
135
|
+
"e", "\xc3\xa9", "\xc3\xa8", "e", "\xc3\xaa", "\xc4\x9b", "\xc4\x93", "e\xcc\x8d", "e\xcc\x8b",
|
136
|
+
"i", "\xc3\xad", "\xc3\xac", "i", "\xc3\xae", "\xc7\x90", "\xc4\xab", "i\xcc\x8d", "i\xcc\x8b",
|
137
|
+
"m", "\xe1\xb8\xbf", "m\xcc\x80", "m", "m\xcc\x82", "m\xcc\x8c", "m\xcc\x84", "m\xcc\x8d", "m\xcc\x8b",
|
138
|
+
"n", "\xc5\x84", "\xc7\xb9", "n", "n\xcc\x82", "n\xcc\x8c", "n\xcc\x84", "n\xcc\x8d", "n\xcc\x8b",
|
139
|
+
"o\xc2\xb7","\xc3\xb3\xc2\xb7","\xc3\xb2\xc2\xb7","o\xc2\xb7","\xc3\xb4\xc2\xb7","\xc7\x92\xc2\xb7","\xc5\x8d\xc2\xb7","o\xcc\x8d\xc2\xb7", "o\xcc\x8b\xc2\xb7",
|
140
|
+
"u", "\xc3\xba", "\xc3\xb9", "u", "\xc3\xbb", "\xc7\x94", "\xc5\xab", "u\xcc\x8d", "\xc5\xb1",
|
141
|
+
"A", "\xc3\x81", "\xc3\x80", "a", "\xc3\x82", "\xc7\x8d", "\xc4\x80", "A\xcc\x8d", "A\xcc\x8b",
|
142
|
+
"E", "\xc3\x89", "\xc3\x88", "E", "\xc3\x8a", "\xc4\x9a", "\xc4\x92", "E\xcc\x8d", "E\xcc\x8b",
|
143
|
+
"I", "\xc3\x8d", "\xc3\x8c", "I", "\xc3\x8e", "\xc7\x8f", "\xc4\xaa", "I\xcc\x8d", "I\xcc\x8b",
|
144
|
+
"M", "\xe1\xb8\xbe", "M\xcc\x80", "M", "M\xcc\x82", "M\xcc\x8c", "M\xcc\x84", "M\xcc\x8d", "M\xcc\x8b",
|
145
|
+
"N", "\xc5\x83", "\xc7\xb8", "N", "N\xcc\x82", "N\xcc\x8c", "N\xcc\x84", "N\xcc\x8d", "N\xcc\x8b",
|
146
|
+
"O", "\xc3\x93", "\xc3\x92", "O", "\xc3\x94", "\xc7\x91", "\xc5\x8c", "O\xcc\x8d", "\xc5\x90",
|
147
|
+
"O\xc2\xb7","\xc3\x93\xc2\xb7","\xc3\x92\xc2\xb7","O\xc2\xb7","\xc3\x94\xc2\xb7","\xc7\x91\xc2\xb7","\xc5\x8c\xc2\xb7","O\xcc\x8d\xc2\xb7","O\xcc\x8b\xc2\xb7",
|
148
|
+
"U", "\xc3\x9a", "\xc3\x99", "U", "\xc3\x9b", "\xc7\x93", "\xc5\xaa", "U\xcc\x8d", "\xc5\xb0"
|
149
|
+
};
|
150
|
+
#endif
|
151
|
+
|
152
|
+
|
153
|
+
string ComposeHoloVowel(const string& vowel, unsigned int tone, bool composePOJOnlySymbols, bool useMiddleDotFallback)
|
154
|
+
{
|
155
|
+
string composed;
|
156
|
+
string coda;
|
157
|
+
|
158
|
+
// if tone is out of range, return
|
159
|
+
if (tone > 9) return composed;
|
160
|
+
|
161
|
+
// if tone is 0, treat as tone 1
|
162
|
+
unsigned int realtone = (tone == 0) ? 1 : tone;
|
163
|
+
|
164
|
+
// then we substract realtone by 1 to get the real realtone... XD
|
165
|
+
realtone--;
|
166
|
+
|
167
|
+
if (!vowel.length()) return composed;
|
168
|
+
|
169
|
+
// we reject "q", only accept the real ou
|
170
|
+
if (vowel=="q" || vowel=="Q") return composed;
|
171
|
+
|
172
|
+
if (vowel == "nn" || vowel=="Nn" || vowel=="Nn" || vowel=="NN") {
|
173
|
+
composed = composePOJOnlySymbols ? holoNasel : vowel;
|
174
|
+
return composed;
|
175
|
+
}
|
176
|
+
|
177
|
+
// pick up the table to use
|
178
|
+
const char **vowels = useMiddleDotFallback ? holoVowelsUsingFallback : holoVowels;
|
179
|
+
|
180
|
+
// use the first character in the vowel string as the look-up character
|
181
|
+
char lookupChar = vowel[0];
|
182
|
+
coda = vowel.substr(1, vowel.length()-1);
|
183
|
+
|
184
|
+
if (composePOJOnlySymbols)
|
185
|
+
{
|
186
|
+
if (vowel=="OU" || vowel=="Ou" || vowel=="OO" || vowel=="Oo") { lookupChar = 'Q'; coda = ""; }
|
187
|
+
else { if (vowel=="ou" || vowel=="oU" || vowel=="oo" || vowel=="oO") { lookupChar = 'q'; } coda = ""; }
|
188
|
+
}
|
189
|
+
|
190
|
+
unsigned int s = strlen(holoVowelLookupTable);
|
191
|
+
unsigned int row;
|
192
|
+
|
193
|
+
for (row=0; row<s; row++) if (holoVowelLookupTable[row] == lookupChar) break;
|
194
|
+
|
195
|
+
// if not found, i reaches the end
|
196
|
+
if (row==s) return composed;
|
197
|
+
|
198
|
+
// now we pick up the correct composed form
|
199
|
+
composed = vowels[row*9 + realtone];
|
200
|
+
return composed + coda;
|
201
|
+
}
|
202
|
+
|
203
|
+
string GetToneASCIIRepresentation(unsigned int tone)
|
204
|
+
{
|
205
|
+
switch(tone)
|
206
|
+
{
|
207
|
+
case 2: return string ("'");
|
208
|
+
case 3: return string ("`");
|
209
|
+
case 5: return string ("^");
|
210
|
+
case 6: return string ("/");
|
211
|
+
case 7: return string ("=");
|
212
|
+
case 8: return string ("|");
|
213
|
+
case 9: return string ("\"");
|
214
|
+
}
|
215
|
+
|
216
|
+
return string();
|
217
|
+
}
|
218
|
+
|
219
|
+
bool IsDiacriticSymbol(char c)
|
220
|
+
{
|
221
|
+
if (c=='\'' || c=='`' || c=='|' || c=='^' || c=='/' || c=='=' || c=='\"') return true;
|
222
|
+
return false;
|
223
|
+
}
|
224
|
+
|
225
|
+
unsigned int ToneFromDiacriticSymbol(char c)
|
226
|
+
{
|
227
|
+
switch(c)
|
228
|
+
{
|
229
|
+
case '\'': return 2;
|
230
|
+
case '`': return 3;
|
231
|
+
case '^': return 5;
|
232
|
+
case '/': return 6;
|
233
|
+
case '=': return 7;
|
234
|
+
case '|': return 8;
|
235
|
+
case '\"': return 9;
|
236
|
+
}
|
237
|
+
return 0;
|
238
|
+
}
|
239
|
+
}
|
240
|
+
|
241
|
+
#endif // of #ifndef __HoloVowels_h
|
242
|
+
|