formosa 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/License.txt +20 -0
- data/Manifest.txt +23 -0
- data/README.txt +18 -0
- data/Rakefile +127 -0
- data/lib/ext/native_syllable_composer/HoloVowels.h +242 -0
- data/lib/ext/native_syllable_composer/LibHolo.h +1223 -0
- data/lib/ext/native_syllable_composer/compose.cpp +67 -0
- data/lib/ext/native_syllable_composer/compose.h +11 -0
- data/lib/ext/native_syllable_composer/extconf.rb +7 -0
- data/lib/ext/native_syllable_composer/native_syllable_composer.c +34 -0
- data/lib/formosa/syllable_utility.rb +86 -0
- data/lib/formosa/version.rb +9 -0
- data/lib/formosa.rb +31 -0
- data/scripts/txt2html +67 -0
- data/setup.rb +1585 -0
- data/test/test_formosa.rb +11 -0
- data/test/test_helper.rb +2 -0
- data/website/index.html +92 -0
- data/website/index.txt +38 -0
- data/website/javascripts/rounded_corners_lite.inc.js +285 -0
- data/website/stylesheets/screen.css +138 -0
- data/website/template.rhtml +48 -0
- metadata +74 -0
@@ -0,0 +1,1223 @@
|
|
1
|
+
// LibHolo.h: LibFormosa's Holo Processing Library
|
2
|
+
//
|
3
|
+
// Copyright (c) 2007 The OpenVanilla Project (http://openvanilla.org)
|
4
|
+
// All rights reserved.
|
5
|
+
//
|
6
|
+
// Redistribution and use in source and binary forms, with or without
|
7
|
+
// modification, are permitted provided that the following conditions
|
8
|
+
// are met:
|
9
|
+
//
|
10
|
+
// 1. Redistributions of source code must retain the above copyright
|
11
|
+
// notice, this list of conditions and the following disclaimer.
|
12
|
+
// 2. Redistributions in binary form must reproduce the above copyright
|
13
|
+
// notice, this list of conditions and the following disclaimer in the
|
14
|
+
// documentation and/or other materials provided with the distribution.
|
15
|
+
// 3. Neither the name of OpenVanilla nor the names of its contributors
|
16
|
+
// may be used to endorse or promote products derived from this software
|
17
|
+
// without specific prior written permission.
|
18
|
+
//
|
19
|
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
20
|
+
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
21
|
+
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
22
|
+
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
23
|
+
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
24
|
+
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
25
|
+
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
26
|
+
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
27
|
+
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
28
|
+
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
29
|
+
// POSSIBILITY OF SUCH DAMAGE.
|
30
|
+
|
31
|
+
#include <vector>
|
32
|
+
#include <string>
|
33
|
+
#include <iostream>
|
34
|
+
#include <ctype.h>
|
35
|
+
|
36
|
+
#include "HoloVowels.h"
|
37
|
+
|
38
|
+
namespace LibHolo
|
39
|
+
{
|
40
|
+
using namespace std;
|
41
|
+
|
42
|
+
class Composable
|
43
|
+
{
|
44
|
+
public:
|
45
|
+
virtual ~Composable() {}
|
46
|
+
virtual void clear()=0;
|
47
|
+
virtual bool empty()=0;
|
48
|
+
virtual unsigned int numberOfCodepoints()=0;
|
49
|
+
virtual string composedForm()=0;
|
50
|
+
};
|
51
|
+
|
52
|
+
class ComposableStringBuffer : Composable {
|
53
|
+
public:
|
54
|
+
ComposableStringBuffer() : _cursor(0)
|
55
|
+
{
|
56
|
+
}
|
57
|
+
|
58
|
+
virtual unsigned int cursor()
|
59
|
+
{
|
60
|
+
return _cursor;
|
61
|
+
}
|
62
|
+
|
63
|
+
virtual unsigned int setCursor(unsigned int c)
|
64
|
+
{
|
65
|
+
if (c <= numberOfCodepoints()) _cursor = c;
|
66
|
+
return _cursor;
|
67
|
+
}
|
68
|
+
|
69
|
+
virtual void clear()
|
70
|
+
{
|
71
|
+
_cursor = 0;
|
72
|
+
strvec.clear();
|
73
|
+
}
|
74
|
+
|
75
|
+
virtual bool empty()
|
76
|
+
{
|
77
|
+
return strvec.empty();
|
78
|
+
}
|
79
|
+
|
80
|
+
virtual unsigned int numberOfCodepoints()
|
81
|
+
{
|
82
|
+
return strvec.size();
|
83
|
+
}
|
84
|
+
|
85
|
+
virtual bool insertCharacterAt(unsigned int i, char c)
|
86
|
+
{
|
87
|
+
if (i > numberOfCodepoints()) return false;
|
88
|
+
strvec.insert(strvec.begin()+i, string(1, c));
|
89
|
+
return true;
|
90
|
+
}
|
91
|
+
|
92
|
+
virtual bool removeCodepointAt(unsigned int i)
|
93
|
+
{
|
94
|
+
if (i >= numberOfCodepoints()) return false;
|
95
|
+
strvec.erase(strvec.begin() + i);
|
96
|
+
return true;
|
97
|
+
}
|
98
|
+
|
99
|
+
virtual string composedForm()
|
100
|
+
{
|
101
|
+
return internalForm();
|
102
|
+
}
|
103
|
+
|
104
|
+
virtual string internalForm()
|
105
|
+
{
|
106
|
+
string newstr;
|
107
|
+
unsigned int s=numberOfCodepoints();
|
108
|
+
for (unsigned int i=0; i<s; i++) newstr+=strvec[i];
|
109
|
+
return newstr;
|
110
|
+
}
|
111
|
+
|
112
|
+
protected:
|
113
|
+
vector<string> strvec;
|
114
|
+
unsigned int _cursor;
|
115
|
+
};
|
116
|
+
|
117
|
+
|
118
|
+
enum SyllableType
|
119
|
+
{
|
120
|
+
POJSyllable = 0,
|
121
|
+
TLSyllable = 1,
|
122
|
+
TLPASyllable = 2,
|
123
|
+
DTSyllable = 3
|
124
|
+
};
|
125
|
+
|
126
|
+
enum DiacriticInputOption
|
127
|
+
{
|
128
|
+
DiacriticGivenBeforeVowel = 0,
|
129
|
+
DiacriticGivenAfterVowel = 1
|
130
|
+
};
|
131
|
+
|
132
|
+
class HoloSymbol
|
133
|
+
{
|
134
|
+
public:
|
135
|
+
HoloSymbol() : _tone(0), _type(POJSyllable)
|
136
|
+
{
|
137
|
+
}
|
138
|
+
|
139
|
+
HoloSymbol(const string &s, SyllableType t) : _tone(0), _type(t), _symbol(s)
|
140
|
+
{
|
141
|
+
}
|
142
|
+
|
143
|
+
HoloSymbol(const HoloSymbol &s) : _tone(s._tone), _type(s._type), _symbol(s._symbol)
|
144
|
+
{
|
145
|
+
}
|
146
|
+
|
147
|
+
void setType(SyllableType t)
|
148
|
+
{
|
149
|
+
_type = t;
|
150
|
+
}
|
151
|
+
|
152
|
+
const HoloSymbol& operator=(const HoloSymbol &s)
|
153
|
+
{
|
154
|
+
_symbol = s._symbol;
|
155
|
+
_tone = s._tone;
|
156
|
+
_type = s._type;
|
157
|
+
return *this;
|
158
|
+
}
|
159
|
+
|
160
|
+
string symbol()
|
161
|
+
{
|
162
|
+
return string(_symbol);
|
163
|
+
}
|
164
|
+
|
165
|
+
string symbolInLowerCase()
|
166
|
+
{
|
167
|
+
string lower;
|
168
|
+
unsigned int s=_symbol.length();
|
169
|
+
for (unsigned int i=0; i<s; i++) lower+=tolower(_symbol[i]);
|
170
|
+
return lower;
|
171
|
+
}
|
172
|
+
|
173
|
+
string setSymbol(const string& s)
|
174
|
+
{
|
175
|
+
return (_symbol = s);
|
176
|
+
}
|
177
|
+
|
178
|
+
string composedForm(bool forcePOJStyle=false)
|
179
|
+
{
|
180
|
+
string composed = ComposeHoloVowel(_symbol, _tone, ((_type==POJSyllable) || forcePOJStyle) ? true : false);
|
181
|
+
if (!composed.length()) return _symbol;
|
182
|
+
return composed;
|
183
|
+
}
|
184
|
+
|
185
|
+
unsigned int composedLength()
|
186
|
+
{
|
187
|
+
string composed = composedForm();
|
188
|
+
unsigned int len = 0, clen = composed.length();
|
189
|
+
for (unsigned int i=0; i<clen; )
|
190
|
+
{
|
191
|
+
if (!(composed[i] & 0x80)) {
|
192
|
+
len++;
|
193
|
+
i++;
|
194
|
+
}
|
195
|
+
else if ((composed[i] & 0xe0) == 0xc0) {
|
196
|
+
len++;
|
197
|
+
i+=2;
|
198
|
+
}
|
199
|
+
else if ((composed[i] & 0xf0) == 0xe0) {
|
200
|
+
len++;
|
201
|
+
i+=3;
|
202
|
+
}
|
203
|
+
else {
|
204
|
+
len++;
|
205
|
+
i+=4;
|
206
|
+
}
|
207
|
+
}
|
208
|
+
|
209
|
+
// fprintf (stderr, "composed=%s, strlen=%d, calculated len=%d\n", composed.c_str(), clen, len);
|
210
|
+
|
211
|
+
return len;
|
212
|
+
}
|
213
|
+
|
214
|
+
unsigned int tone()
|
215
|
+
{
|
216
|
+
return _tone;
|
217
|
+
}
|
218
|
+
|
219
|
+
unsigned int setTone(unsigned int t)
|
220
|
+
{
|
221
|
+
_tone = t > 9 ? _tone : t;
|
222
|
+
return _tone;
|
223
|
+
}
|
224
|
+
|
225
|
+
bool isUpperCase()
|
226
|
+
{
|
227
|
+
if (!_symbol.length()) return false;
|
228
|
+
return toupper(_symbol[0]) == _symbol[0];
|
229
|
+
}
|
230
|
+
|
231
|
+
|
232
|
+
protected:
|
233
|
+
unsigned int _tone;
|
234
|
+
SyllableType _type;
|
235
|
+
string _symbol;
|
236
|
+
};
|
237
|
+
|
238
|
+
|
239
|
+
// the _cursor in HoloSyllable really means "the internal symbol cursor",
|
240
|
+
// as for the display cursor (called by the input method context),
|
241
|
+
// we need to recalculate
|
242
|
+
class HoloSyllable : public Composable
|
243
|
+
{
|
244
|
+
public:
|
245
|
+
HoloSyllable() : _inputType(POJSyllable), _inputOption(DiacriticGivenBeforeVowel),
|
246
|
+
_forcePOJStyle(false),
|
247
|
+
_cursor(0), _preparedTone(0)
|
248
|
+
{
|
249
|
+
}
|
250
|
+
|
251
|
+
HoloSyllable(const HoloSyllable &s) : _inputType(s._inputType),
|
252
|
+
_inputOption(s._inputOption),
|
253
|
+
_forcePOJStyle(s._forcePOJStyle),
|
254
|
+
_symvec(s._symvec),
|
255
|
+
_cursor(s._cursor), _preparedTone(s._preparedTone)
|
256
|
+
{
|
257
|
+
}
|
258
|
+
|
259
|
+
const HoloSyllable& operator=(const HoloSyllable &s)
|
260
|
+
{
|
261
|
+
_inputType = s._inputType;
|
262
|
+
_inputOption = s._inputOption;
|
263
|
+
_forcePOJStyle = s._forcePOJStyle;
|
264
|
+
_symvec = s._symvec;
|
265
|
+
_cursor = s._cursor;
|
266
|
+
_preparedTone = s._preparedTone;
|
267
|
+
return *this;
|
268
|
+
}
|
269
|
+
|
270
|
+
virtual void setInputType(SyllableType t)
|
271
|
+
{
|
272
|
+
_inputType = t;
|
273
|
+
}
|
274
|
+
|
275
|
+
virtual void setInputOption(DiacriticInputOption o)
|
276
|
+
{
|
277
|
+
if (o != _inputOption) clearPreparedTone();
|
278
|
+
_inputOption = o;
|
279
|
+
}
|
280
|
+
|
281
|
+
virtual void setForcePOJStyle(bool p)
|
282
|
+
{
|
283
|
+
_forcePOJStyle = p;
|
284
|
+
}
|
285
|
+
|
286
|
+
virtual void clear()
|
287
|
+
{
|
288
|
+
_symvec.clear();
|
289
|
+
_cursor = 0;
|
290
|
+
_preparedTone = 0;
|
291
|
+
}
|
292
|
+
|
293
|
+
virtual bool empty()
|
294
|
+
{
|
295
|
+
return _symvec.empty();
|
296
|
+
}
|
297
|
+
|
298
|
+
virtual unsigned int numberOfCodepoints()
|
299
|
+
{
|
300
|
+
return _symvec.size();
|
301
|
+
}
|
302
|
+
|
303
|
+
virtual string composedForm()
|
304
|
+
{
|
305
|
+
unsigned int s = _symvec.size();
|
306
|
+
string composed;
|
307
|
+
unsigned int i;
|
308
|
+
|
309
|
+
if (_preparedTone) _cursor--;
|
310
|
+
|
311
|
+
for (i=0; i<_cursor; i++)
|
312
|
+
{
|
313
|
+
composed += _symvec[i].composedForm(_forcePOJStyle);
|
314
|
+
// fprintf(stderr, "%d, symbol=%s, composed=%s, composd form=%s\n", i, _symvec[i].symbol().c_str(), _symvec[i].composedForm().c_str(), composed.c_str());
|
315
|
+
}
|
316
|
+
|
317
|
+
composed += GetToneASCIIRepresentation(_preparedTone);
|
318
|
+
// fprintf(stderr, "composd form=%s\n", composed.c_str());
|
319
|
+
|
320
|
+
for (; i<s; i++)
|
321
|
+
{
|
322
|
+
composed += _symvec[i].composedForm(_forcePOJStyle);
|
323
|
+
// fprintf(stderr, "composd form=%s\n", composed.c_str());
|
324
|
+
}
|
325
|
+
|
326
|
+
if (_preparedTone) _cursor++;
|
327
|
+
|
328
|
+
return composed;
|
329
|
+
}
|
330
|
+
|
331
|
+
void setCursor(unsigned int c)
|
332
|
+
{
|
333
|
+
clearPreparedTone();
|
334
|
+
_cursor = c;
|
335
|
+
}
|
336
|
+
|
337
|
+
unsigned int cursor()
|
338
|
+
{
|
339
|
+
unsigned int realcursor = _preparedTone ? _cursor-1 : _cursor;
|
340
|
+
unsigned codepointCursor=0;
|
341
|
+
for (unsigned int i=0; i<realcursor; i++) codepointCursor+=_symvec[i].composedLength();
|
342
|
+
|
343
|
+
if (_preparedTone) codepointCursor++;
|
344
|
+
|
345
|
+
return codepointCursor;
|
346
|
+
}
|
347
|
+
|
348
|
+
bool cursorHome()
|
349
|
+
{
|
350
|
+
clearPreparedTone();
|
351
|
+
if (_cursor==0) return false;
|
352
|
+
_cursor=0;
|
353
|
+
return true;
|
354
|
+
}
|
355
|
+
|
356
|
+
bool cursorEnd()
|
357
|
+
{
|
358
|
+
clearPreparedTone();
|
359
|
+
unsigned int len = numberOfCodepoints();
|
360
|
+
if (_cursor == len) return false;
|
361
|
+
_cursor = len;
|
362
|
+
return true;
|
363
|
+
}
|
364
|
+
|
365
|
+
bool cursorLeft()
|
366
|
+
{
|
367
|
+
clearPreparedTone();
|
368
|
+
if (_cursor==0) return false;
|
369
|
+
_cursor--;
|
370
|
+
return true;
|
371
|
+
}
|
372
|
+
|
373
|
+
bool cursorRight()
|
374
|
+
{
|
375
|
+
clearPreparedTone();
|
376
|
+
if (_cursor == numberOfCodepoints()) return false;
|
377
|
+
_cursor++;
|
378
|
+
return true;
|
379
|
+
}
|
380
|
+
|
381
|
+
bool insertSymbolAtCursor(const HoloSymbol &s)
|
382
|
+
{
|
383
|
+
clearPreparedTone();
|
384
|
+
HoloSymbol newsym(s);
|
385
|
+
newsym.setType(_inputType);
|
386
|
+
_symvec.insert(_symvec.begin() + _cursor, newsym);
|
387
|
+
_cursor++;
|
388
|
+
|
389
|
+
return true;
|
390
|
+
}
|
391
|
+
|
392
|
+
// if there is a prepared tone, the given tone parameter will be ignored
|
393
|
+
bool insertCharacterAtCursor(char c, unsigned int tone=0)
|
394
|
+
{
|
395
|
+
// fprintf(stderr, "insert char %d ('%c'), cursor=%d\n", c, c, _cursor);
|
396
|
+
if (IsDiacriticSymbol(c))
|
397
|
+
{
|
398
|
+
unsigned int tone = ToneFromDiacriticSymbol(c);
|
399
|
+
|
400
|
+
// if there's already a prepared tone, we replace it with the current one
|
401
|
+
if (_preparedTone) {
|
402
|
+
_preparedTone = tone;
|
403
|
+
return true;
|
404
|
+
}
|
405
|
+
|
406
|
+
if (_inputOption==DiacriticGivenBeforeVowel) {
|
407
|
+
_preparedTone = tone;
|
408
|
+
_cursor++;
|
409
|
+
}
|
410
|
+
else {
|
411
|
+
// diacritic given after vowel
|
412
|
+
if (hasPreviousSymbolAtCursor()) previousSymbolAtCursor().setTone(tone);
|
413
|
+
}
|
414
|
+
return true;
|
415
|
+
}
|
416
|
+
|
417
|
+
// if it's not a diacritic symbol, it's POJ^W^W^W, and it's n or u or g,
|
418
|
+
// (and if there's no prepared tone!)
|
419
|
+
// we need to do something special...
|
420
|
+
if (!IsDiacriticSymbol(c) && !_preparedTone /* && _inputType==POJSyllable */)
|
421
|
+
{
|
422
|
+
if (hasPreviousSymbolAtCursor())
|
423
|
+
{
|
424
|
+
string prev = previousSymbolAtCursor().symbolInLowerCase();
|
425
|
+
|
426
|
+
// N -> nn only works if the first character of the syllable is not an
|
427
|
+
// all uppercase symbol
|
428
|
+
if (c=='N' && ((prev != "n") && (prev != "nn")) && _inputType == POJSyllable
|
429
|
+
&& (_symvec.size() > 0 && !_symvec[0].isUpperCase()))
|
430
|
+
{
|
431
|
+
// insert two n's in a row
|
432
|
+
_symvec.insert(_symvec.begin() + _cursor, HoloSymbol(string("nn"), _inputType));
|
433
|
+
_cursor++;
|
434
|
+
return true;
|
435
|
+
}
|
436
|
+
else if (tolower(c)=='n' && prev=="n") {
|
437
|
+
previousSymbolAtCursor().setSymbol(previousSymbolAtCursor().symbol() + string(1, c));
|
438
|
+
return true;
|
439
|
+
}
|
440
|
+
else if (_inputType == POJSyllable && tolower(c)=='u' && prev=="o") {
|
441
|
+
previousSymbolAtCursor().setSymbol(previousSymbolAtCursor().symbol() + string(1, c));
|
442
|
+
return true;
|
443
|
+
}
|
444
|
+
else if (_inputType == TLSyllable && tolower(c)=='o' && prev=="o") {
|
445
|
+
previousSymbolAtCursor().setSymbol(previousSymbolAtCursor().symbol() + string(1, c));
|
446
|
+
return true;
|
447
|
+
}
|
448
|
+
else if (tolower(c)=='g' && prev=="nn") {
|
449
|
+
// we need to break them up!
|
450
|
+
string before = previousSymbolAtCursor().symbol();
|
451
|
+
|
452
|
+
// and the tone of the previous symbol (when it's combined into nn) will be retained
|
453
|
+
previousSymbolAtCursor().setSymbol(before.substr(0, 1));
|
454
|
+
|
455
|
+
// insert one n and one g
|
456
|
+
_symvec.insert(_symvec.begin() + _cursor, HoloSymbol(before.substr(1,1), _inputType));
|
457
|
+
_cursor++;
|
458
|
+
_symvec.insert(_symvec.begin() + _cursor, HoloSymbol(string(1, c), _inputType));
|
459
|
+
_cursor++;
|
460
|
+
return true;
|
461
|
+
}
|
462
|
+
}
|
463
|
+
}
|
464
|
+
|
465
|
+
|
466
|
+
HoloSymbol s(string(1, c), _inputType);
|
467
|
+
if (_preparedTone)
|
468
|
+
{
|
469
|
+
_cursor--;
|
470
|
+
s.setTone(_preparedTone);
|
471
|
+
_preparedTone = 0;
|
472
|
+
}
|
473
|
+
else if (tone > 1)
|
474
|
+
{
|
475
|
+
s.setTone(tone);
|
476
|
+
}
|
477
|
+
|
478
|
+
_symvec.insert(_symvec.begin() + _cursor, s);
|
479
|
+
_cursor++;
|
480
|
+
|
481
|
+
return true;
|
482
|
+
}
|
483
|
+
|
484
|
+
bool removeCharacterAtRightOfCursor() // backspace
|
485
|
+
{
|
486
|
+
if (_preparedTone)
|
487
|
+
{
|
488
|
+
clearPreparedTone();
|
489
|
+
return true;
|
490
|
+
}
|
491
|
+
|
492
|
+
if (atBeginning()) return false;
|
493
|
+
_cursor--;
|
494
|
+
_symvec.erase(_symvec.begin() + _cursor);
|
495
|
+
return true;
|
496
|
+
}
|
497
|
+
|
498
|
+
bool removeCharacterAtLeftOfCursor() // delete
|
499
|
+
{
|
500
|
+
// we do some tightrope trick here: if we have _preparedTone ready,
|
501
|
+
// we "push back" the real _cursor position, do the delete thing,
|
502
|
+
// then push it back
|
503
|
+
bool retval=true;
|
504
|
+
if (_preparedTone) _cursor--;
|
505
|
+
if (atEnd()) retval=false; else _symvec.erase(_symvec.begin() + _cursor);
|
506
|
+
if (_preparedTone) _cursor++;
|
507
|
+
return retval;
|
508
|
+
}
|
509
|
+
|
510
|
+
// returns a normalized string that represents the "internal form" for querying the database
|
511
|
+
// implies normalization
|
512
|
+
string normalizedQueryData(unsigned int finalTone=0)
|
513
|
+
{
|
514
|
+
HoloSyllable s(*this);
|
515
|
+
s.normalize(finalTone);
|
516
|
+
string query;
|
517
|
+
|
518
|
+
unsigned int size=s._symvec.size();
|
519
|
+
unsigned int loudest = 0;
|
520
|
+
|
521
|
+
for (unsigned int i=0; i<size; i++) {
|
522
|
+
query = query + s._symvec[i].symbol();
|
523
|
+
if (s._symvec[i].tone() > 1) loudest = s._symvec[i].tone();
|
524
|
+
// fprintf (stderr, "combining query data %s, tone %d\n", s._symvec[i].symbol().c_str(), s._symvec[i].tone());
|
525
|
+
}
|
526
|
+
|
527
|
+
if (loudest > 1) query = query + string(1, loudest+'0');
|
528
|
+
return query;
|
529
|
+
}
|
530
|
+
|
531
|
+
// normalization is an "identpotent" function, ie. the result should
|
532
|
+
// be the same no matter how many times you call it--this being a very
|
533
|
+
// important linguistic characteristic of this function
|
534
|
+
void normalize(unsigned int finalTone=0)
|
535
|
+
{
|
536
|
+
// fprintf (stderr, "input finalTone=%d\n", finalTone);
|
537
|
+
unsigned int end = _symvec.size();
|
538
|
+
|
539
|
+
// if it's empty, just return
|
540
|
+
if (!end) return;
|
541
|
+
|
542
|
+
unsigned int loudestVowel = end;
|
543
|
+
unsigned int loudestTone = 0;
|
544
|
+
unsigned int p;
|
545
|
+
|
546
|
+
// find the loudest vowel
|
547
|
+
#define FLV(x) ((p=findSymbol(x)) != end)
|
548
|
+
#define SETLOUDEST(v) do { loudestVowel = v; if (_symvec[loudestVowel].tone()>1) { loudestTone = _symvec[loudestVowel].tone(); } } while(0)
|
549
|
+
|
550
|
+
|
551
|
+
if (end==1 && _symvec[0].symbolInLowerCase()=="m") SETLOUDEST(0);
|
552
|
+
if (FLV("n")) SETLOUDEST(p);
|
553
|
+
if (FLV("m")) SETLOUDEST(p);
|
554
|
+
|
555
|
+
// see if it's ng
|
556
|
+
if ((p=findSymbolPair("n", "g")) != end)
|
557
|
+
SETLOUDEST(p);
|
558
|
+
|
559
|
+
if (FLV("u")) SETLOUDEST(p);
|
560
|
+
if (FLV("i")) SETLOUDEST(p);
|
561
|
+
if (FLV("o")) SETLOUDEST(p);
|
562
|
+
if (FLV("e")) SETLOUDEST(p);
|
563
|
+
if (FLV("ou")) SETLOUDEST(p);
|
564
|
+
if (FLV("oo")) SETLOUDEST(p);
|
565
|
+
if (FLV("a")) SETLOUDEST(p);
|
566
|
+
|
567
|
+
// the last "ere" override
|
568
|
+
if (end >= 3) {
|
569
|
+
if (_symvec[end-1].symbolInLowerCase() == "e" && _symvec[end-2].symbolInLowerCase() == "r" && _symvec[end-3].symbolInLowerCase() == "e")
|
570
|
+
{
|
571
|
+
SETLOUDEST(end-1);
|
572
|
+
}
|
573
|
+
}
|
574
|
+
|
575
|
+
if (loudestVowel==end) return;
|
576
|
+
// fprintf(stderr, "found loudest vowel=%d (%s), loudest tone=%d\n", loudestVowel, _symvec[loudestVowel].symbol().c_str(), loudestTone);
|
577
|
+
|
578
|
+
// finalTone overrides
|
579
|
+
if (finalTone > 1) loudestTone = finalTone;
|
580
|
+
|
581
|
+
for (unsigned int i=0; i<end; i++) _symvec[i].setTone(0);
|
582
|
+
|
583
|
+
string lastSymbolStr = _symvec[end-1].symbolInLowerCase();
|
584
|
+
|
585
|
+
// if the symbol is "i", and there's a next "u", we shift
|
586
|
+
// the vowel to "u"
|
587
|
+
|
588
|
+
if (_symvec[loudestVowel].symbolInLowerCase()=="i")
|
589
|
+
{
|
590
|
+
if (loudestVowel+1 < end)
|
591
|
+
{
|
592
|
+
if (_symvec[loudestVowel+1].symbolInLowerCase()=="u") loudestVowel++;
|
593
|
+
}
|
594
|
+
}
|
595
|
+
|
596
|
+
if (loudestTone==4 || /* loudestTone==6 || */ loudestTone <= 1) {
|
597
|
+
// ignore the 4th, 6th and 1th (or no tone), so everything is set to 0 now
|
598
|
+
return;
|
599
|
+
}
|
600
|
+
|
601
|
+
if (lastSymbolStr=="t" || lastSymbolStr=="p" || lastSymbolStr=="k" || lastSymbolStr=="h") {
|
602
|
+
// only when the ending is t, p, k, h is the tone set -- and only when the tone is 8
|
603
|
+
if (loudestTone==8) _symvec[loudestVowel].setTone(loudestTone);
|
604
|
+
return;
|
605
|
+
}
|
606
|
+
else {
|
607
|
+
// if not t,p,k,h, we need to override the loudest tone--back to tone 1 !
|
608
|
+
if (loudestTone==8) {
|
609
|
+
_symvec[loudestVowel].setTone(0);
|
610
|
+
return;
|
611
|
+
}
|
612
|
+
}
|
613
|
+
|
614
|
+
_symvec[loudestVowel].setTone(loudestTone);
|
615
|
+
|
616
|
+
#undef FLV
|
617
|
+
#undef SETTONE
|
618
|
+
}
|
619
|
+
|
620
|
+
HoloSyllable convertToPOJSyllable()
|
621
|
+
{
|
622
|
+
HoloSyllable syl = *this;
|
623
|
+
syl.clearPreparedTone();
|
624
|
+
syl.setCursor(0);
|
625
|
+
if (_inputType==POJSyllable) return syl;
|
626
|
+
|
627
|
+
syl.setInputType(POJSyllable);
|
628
|
+
syl.clear();
|
629
|
+
|
630
|
+
// begin TL->POJ conversion
|
631
|
+
unsigned int size=_symvec.size();
|
632
|
+
unsigned int i;
|
633
|
+
|
634
|
+
for (i=0; i<size; i++)
|
635
|
+
{
|
636
|
+
HoloSymbol sym1 = _symvec[i];
|
637
|
+
string str1 = sym1.symbol();
|
638
|
+
|
639
|
+
// fprintf (stderr, "converting to POJ: %s\n", str1.c_str());
|
640
|
+
|
641
|
+
string lowstr1 = sym1.symbolInLowerCase();
|
642
|
+
|
643
|
+
// oo -> ou
|
644
|
+
if (lowstr1=="oo")
|
645
|
+
{
|
646
|
+
// detect case
|
647
|
+
if (str1[0] == tolower(str1[0])) {
|
648
|
+
syl.insertCharacterAtCursor('o', sym1.tone());
|
649
|
+
syl.insertCharacterAtCursor('u');
|
650
|
+
}
|
651
|
+
else
|
652
|
+
{
|
653
|
+
syl.insertCharacterAtCursor('O', sym1.tone());
|
654
|
+
syl.insertCharacterAtCursor('U');
|
655
|
+
}
|
656
|
+
continue;
|
657
|
+
}
|
658
|
+
|
659
|
+
|
660
|
+
if (hasNextSymbol(i)) {
|
661
|
+
HoloSymbol sym2 = _symvec[i+1];
|
662
|
+
string str2 = sym2.symbol();
|
663
|
+
string lowstr2 = sym2.symbolInLowerCase();
|
664
|
+
|
665
|
+
// ts -> ch with case detection
|
666
|
+
if (lowstr1=="t" && lowstr2=="s") {
|
667
|
+
// detect case
|
668
|
+
if (str1[0] == tolower(str1[0])) {
|
669
|
+
syl.insertCharacterAtCursor('c');
|
670
|
+
syl.insertCharacterAtCursor('h');
|
671
|
+
}
|
672
|
+
else {
|
673
|
+
syl.insertCharacterAtCursor('C');
|
674
|
+
syl.insertCharacterAtCursor('H');
|
675
|
+
}
|
676
|
+
|
677
|
+
i++;
|
678
|
+
continue;
|
679
|
+
}
|
680
|
+
|
681
|
+
// ue -> oe
|
682
|
+
if (lowstr1=="u" && lowstr2=="e") {
|
683
|
+
// detect case
|
684
|
+
if (str1[0] == tolower(str1[0])) {
|
685
|
+
syl.insertCharacterAtCursor('o', sym1.tone());
|
686
|
+
syl.insertCharacterAtCursor('e', sym2.tone());
|
687
|
+
}
|
688
|
+
else {
|
689
|
+
syl.insertCharacterAtCursor('O', sym1.tone());
|
690
|
+
syl.insertCharacterAtCursor('E', sym2.tone());
|
691
|
+
}
|
692
|
+
|
693
|
+
i++;
|
694
|
+
continue;
|
695
|
+
}
|
696
|
+
|
697
|
+
// ua -> oa
|
698
|
+
if (lowstr1=="u" && lowstr2=="a") {
|
699
|
+
// detect case
|
700
|
+
if (str1[0] == tolower(str1[0])) {
|
701
|
+
syl.insertCharacterAtCursor('o', sym1.tone());
|
702
|
+
syl.insertCharacterAtCursor('a', sym2.tone());
|
703
|
+
}
|
704
|
+
else {
|
705
|
+
syl.insertCharacterAtCursor('O', sym1.tone());
|
706
|
+
syl.insertCharacterAtCursor('A', sym2.tone());
|
707
|
+
}
|
708
|
+
|
709
|
+
i++;
|
710
|
+
continue;
|
711
|
+
}
|
712
|
+
|
713
|
+
// ik -> ek (at ending)
|
714
|
+
if (lowstr1=="i" && lowstr2=="k" && (i+2)==size) {
|
715
|
+
// detect case
|
716
|
+
if (str1[0] == tolower(str1[0])) {
|
717
|
+
syl.insertCharacterAtCursor('e', sym1.tone());
|
718
|
+
syl.insertCharacterAtCursor('k', sym2.tone());
|
719
|
+
}
|
720
|
+
else {
|
721
|
+
syl.insertCharacterAtCursor('E', sym1.tone());
|
722
|
+
syl.insertCharacterAtCursor('K', sym2.tone());
|
723
|
+
}
|
724
|
+
|
725
|
+
i++;
|
726
|
+
continue;
|
727
|
+
}
|
728
|
+
|
729
|
+
if (hasNextNextSymbol(i) && (i+3)==size) {
|
730
|
+
HoloSymbol sym3 = _symvec[i+2];
|
731
|
+
string str3 = sym3.symbol();
|
732
|
+
string lowstr3 = sym3.symbolInLowerCase();
|
733
|
+
|
734
|
+
// ing -> eng (must be ending)
|
735
|
+
if (lowstr1=="i" && lowstr2=="n" && lowstr3=="g") {
|
736
|
+
// detect case
|
737
|
+
if (str1[0] == tolower(str1[0])) {
|
738
|
+
syl.insertCharacterAtCursor('e', sym1.tone());
|
739
|
+
syl.insertCharacterAtCursor('n', sym2.tone());
|
740
|
+
syl.insertCharacterAtCursor('g', sym3.tone());
|
741
|
+
}
|
742
|
+
else {
|
743
|
+
syl.insertCharacterAtCursor('E', sym1.tone());
|
744
|
+
syl.insertCharacterAtCursor('N', sym2.tone());
|
745
|
+
syl.insertCharacterAtCursor('G', sym3.tone());
|
746
|
+
}
|
747
|
+
|
748
|
+
i+=2;
|
749
|
+
continue;
|
750
|
+
}
|
751
|
+
|
752
|
+
// ouh -> oh (ending)
|
753
|
+
if (lowstr1=="o" && lowstr2=="u" && lowstr3=="h") {
|
754
|
+
// detect case
|
755
|
+
if (str2[0] == tolower(str2[0])) {
|
756
|
+
syl.insertCharacterAtCursor('o', sym1.tone());
|
757
|
+
syl.insertCharacterAtCursor('h', sym2.tone());
|
758
|
+
}
|
759
|
+
else {
|
760
|
+
syl.insertCharacterAtCursor('O', sym1.tone());
|
761
|
+
syl.insertCharacterAtCursor('H', sym2.tone());
|
762
|
+
}
|
763
|
+
|
764
|
+
i+=2;
|
765
|
+
continue;
|
766
|
+
}
|
767
|
+
}
|
768
|
+
}
|
769
|
+
|
770
|
+
syl.insertSymbolAtCursor(sym1);
|
771
|
+
}
|
772
|
+
|
773
|
+
return syl;
|
774
|
+
}
|
775
|
+
|
776
|
+
HoloSyllable convertToTLSyllable()
|
777
|
+
{
|
778
|
+
HoloSyllable syl = *this;
|
779
|
+
syl.clearPreparedTone();
|
780
|
+
syl.setCursor(0);
|
781
|
+
if (_inputType==TLSyllable) return syl;
|
782
|
+
|
783
|
+
syl.setInputType(TLSyllable);
|
784
|
+
syl.clear();
|
785
|
+
|
786
|
+
// begin POJ->TL conversion
|
787
|
+
unsigned int size=_symvec.size();
|
788
|
+
unsigned int i;
|
789
|
+
|
790
|
+
for (i=0; i<size; i++)
|
791
|
+
{
|
792
|
+
HoloSymbol sym1 = _symvec[i];
|
793
|
+
string str1 = sym1.symbol();
|
794
|
+
string lowstr1 = sym1.symbolInLowerCase();
|
795
|
+
|
796
|
+
// ou -> oo
|
797
|
+
if (lowstr1=="ou")
|
798
|
+
{
|
799
|
+
// detect case
|
800
|
+
if (str1[0] == tolower(str1[0])) {
|
801
|
+
syl.insertCharacterAtCursor('o', sym1.tone());
|
802
|
+
syl.insertCharacterAtCursor('o');
|
803
|
+
}
|
804
|
+
else
|
805
|
+
{
|
806
|
+
syl.insertCharacterAtCursor('O', sym1.tone());
|
807
|
+
syl.insertCharacterAtCursor('O');
|
808
|
+
}
|
809
|
+
continue;
|
810
|
+
}
|
811
|
+
|
812
|
+
|
813
|
+
if (hasNextSymbol(i)) {
|
814
|
+
HoloSymbol sym2 = _symvec[i+1];
|
815
|
+
string str2 = sym2.symbol();
|
816
|
+
string lowstr2 = sym2.symbolInLowerCase();
|
817
|
+
|
818
|
+
// ch -> ts with case detection
|
819
|
+
if (lowstr1=="c" && lowstr2=="h") {
|
820
|
+
// detect case
|
821
|
+
if (str1[0] == tolower(str1[0])) {
|
822
|
+
syl.insertCharacterAtCursor('t');
|
823
|
+
syl.insertCharacterAtCursor('s');
|
824
|
+
}
|
825
|
+
else {
|
826
|
+
syl.insertCharacterAtCursor('T');
|
827
|
+
syl.insertCharacterAtCursor('S');
|
828
|
+
}
|
829
|
+
|
830
|
+
i++;
|
831
|
+
continue;
|
832
|
+
}
|
833
|
+
|
834
|
+
// oe -> ue
|
835
|
+
if (lowstr1=="o" && lowstr2=="e") {
|
836
|
+
// detect case
|
837
|
+
if (str1[0] == tolower(str1[0])) {
|
838
|
+
syl.insertCharacterAtCursor('u', sym1.tone());
|
839
|
+
syl.insertCharacterAtCursor('e', sym2.tone());
|
840
|
+
}
|
841
|
+
else {
|
842
|
+
syl.insertCharacterAtCursor('U', sym1.tone());
|
843
|
+
syl.insertCharacterAtCursor('E', sym2.tone());
|
844
|
+
}
|
845
|
+
|
846
|
+
i++;
|
847
|
+
continue;
|
848
|
+
}
|
849
|
+
|
850
|
+
// oa -> ua
|
851
|
+
if (lowstr1=="o" && lowstr2=="a") {
|
852
|
+
// detect case
|
853
|
+
if (str1[0] == tolower(str1[0])) {
|
854
|
+
syl.insertCharacterAtCursor('u', sym1.tone());
|
855
|
+
syl.insertCharacterAtCursor('a', sym2.tone());
|
856
|
+
}
|
857
|
+
else {
|
858
|
+
syl.insertCharacterAtCursor('U', sym1.tone());
|
859
|
+
syl.insertCharacterAtCursor('A', sym2.tone());
|
860
|
+
}
|
861
|
+
|
862
|
+
i++;
|
863
|
+
continue;
|
864
|
+
}
|
865
|
+
|
866
|
+
// ek -> ik (at ending)
|
867
|
+
if (lowstr1=="e" && lowstr2=="k" && (i+2)==size) {
|
868
|
+
// detect case
|
869
|
+
if (str1[0] == tolower(str1[0])) {
|
870
|
+
syl.insertCharacterAtCursor('i', sym1.tone());
|
871
|
+
syl.insertCharacterAtCursor('k', sym2.tone());
|
872
|
+
}
|
873
|
+
else {
|
874
|
+
syl.insertCharacterAtCursor('I', sym1.tone());
|
875
|
+
syl.insertCharacterAtCursor('K', sym2.tone());
|
876
|
+
}
|
877
|
+
|
878
|
+
i++;
|
879
|
+
continue;
|
880
|
+
}
|
881
|
+
|
882
|
+
if (hasNextNextSymbol(i) && (i+3)==size) {
|
883
|
+
HoloSymbol sym3 = _symvec[i+2];
|
884
|
+
string str3 = sym3.symbol();
|
885
|
+
string lowstr3 = sym3.symbolInLowerCase();
|
886
|
+
|
887
|
+
// ing -> eng (must be ending)
|
888
|
+
if (lowstr1=="e" && lowstr2=="n" && lowstr3=="g") {
|
889
|
+
// detect case
|
890
|
+
if (str1[0] == tolower(str1[0])) {
|
891
|
+
syl.insertCharacterAtCursor('i', sym1.tone());
|
892
|
+
syl.insertCharacterAtCursor('n', sym2.tone());
|
893
|
+
syl.insertCharacterAtCursor('g', sym3.tone());
|
894
|
+
}
|
895
|
+
else {
|
896
|
+
syl.insertCharacterAtCursor('I', sym1.tone());
|
897
|
+
syl.insertCharacterAtCursor('N', sym2.tone());
|
898
|
+
syl.insertCharacterAtCursor('G', sym3.tone());
|
899
|
+
}
|
900
|
+
|
901
|
+
i+=2;
|
902
|
+
continue;
|
903
|
+
}
|
904
|
+
}
|
905
|
+
}
|
906
|
+
|
907
|
+
syl.insertSymbolAtCursor(sym1);
|
908
|
+
}
|
909
|
+
return syl;
|
910
|
+
}
|
911
|
+
|
912
|
+
|
913
|
+
protected:
|
914
|
+
bool atBeginning()
|
915
|
+
{
|
916
|
+
return _cursor == 0;
|
917
|
+
}
|
918
|
+
|
919
|
+
bool atEnd()
|
920
|
+
{
|
921
|
+
return _cursor == numberOfCodepoints();
|
922
|
+
}
|
923
|
+
|
924
|
+
void clearPreparedTone()
|
925
|
+
{
|
926
|
+
if (!_preparedTone) return;
|
927
|
+
_preparedTone = 0;
|
928
|
+
_cursor--;
|
929
|
+
}
|
930
|
+
|
931
|
+
bool hasPreviousSymbolAtCursor()
|
932
|
+
{
|
933
|
+
unsigned int realcursor = _preparedTone ? _cursor-1 : _cursor;
|
934
|
+
return realcursor > 0;
|
935
|
+
}
|
936
|
+
|
937
|
+
bool hasNextSymbol(unsigned int pos)
|
938
|
+
{
|
939
|
+
if (pos+1 >= _symvec.size()) return false;
|
940
|
+
return true;
|
941
|
+
}
|
942
|
+
|
943
|
+
bool hasNextNextSymbol(unsigned int pos)
|
944
|
+
{
|
945
|
+
if (pos+2 >= _symvec.size()) return false;
|
946
|
+
return true;
|
947
|
+
}
|
948
|
+
|
949
|
+
|
950
|
+
// the result of this function is unpredictable if there's no
|
951
|
+
// previous symbol--always check with hasPreviousSymbolAtCursor() !
|
952
|
+
HoloSymbol& previousSymbolAtCursor()
|
953
|
+
{
|
954
|
+
unsigned int realcursor = _preparedTone ? _cursor-1 : _cursor;
|
955
|
+
return _symvec[realcursor-1];
|
956
|
+
}
|
957
|
+
|
958
|
+
// always assumes that the given input is in all lower case
|
959
|
+
unsigned int findSymbol(const char *s)
|
960
|
+
{
|
961
|
+
string cpps(s);
|
962
|
+
unsigned int size = _symvec.size();
|
963
|
+
unsigned int i;
|
964
|
+
for (i = 0; i < size; i++) {
|
965
|
+
if (_symvec[i].symbolInLowerCase() == cpps) break;
|
966
|
+
}
|
967
|
+
return i;
|
968
|
+
}
|
969
|
+
|
970
|
+
unsigned int findSymbolPair(const char *s1, const char *s2)
|
971
|
+
{
|
972
|
+
string cpps1(s1), cpps2(s2);
|
973
|
+
|
974
|
+
unsigned int size = _symvec.size();
|
975
|
+
if (size < 2) return size;
|
976
|
+
|
977
|
+
unsigned int i;
|
978
|
+
for (i = 0; i < size-1; i++) {
|
979
|
+
if (_symvec[i].symbolInLowerCase()==cpps1 && _symvec[i+1].symbolInLowerCase()==cpps2) return i;
|
980
|
+
}
|
981
|
+
|
982
|
+
return size;
|
983
|
+
}
|
984
|
+
|
985
|
+
SyllableType _inputType;
|
986
|
+
DiacriticInputOption _inputOption;
|
987
|
+
bool _forcePOJStyle;
|
988
|
+
|
989
|
+
vector<HoloSymbol> _symvec;
|
990
|
+
unsigned int _cursor;
|
991
|
+
unsigned int _preparedTone;
|
992
|
+
};
|
993
|
+
|
994
|
+
class FreeFormSyllable : public ComposableStringBuffer
|
995
|
+
{
|
996
|
+
public:
|
997
|
+
HoloSyllable convertToTLFromTLPA(unsigned int finalTone=0)
|
998
|
+
{
|
999
|
+
string rep=internalForm();
|
1000
|
+
HoloSyllable syl;
|
1001
|
+
syl.setInputType(TLSyllable);
|
1002
|
+
|
1003
|
+
unsigned int size=rep.length();
|
1004
|
+
for (unsigned int i=0; i<size; i++)
|
1005
|
+
{
|
1006
|
+
if (rep[i]=='c') {
|
1007
|
+
syl.insertCharacterAtCursor('t');
|
1008
|
+
syl.insertCharacterAtCursor('s');
|
1009
|
+
}
|
1010
|
+
else if (rep[i]=='C')
|
1011
|
+
{
|
1012
|
+
syl.insertCharacterAtCursor('T');
|
1013
|
+
syl.insertCharacterAtCursor('S');
|
1014
|
+
}
|
1015
|
+
else syl.insertCharacterAtCursor(rep[i]);
|
1016
|
+
}
|
1017
|
+
|
1018
|
+
syl.normalize(finalTone);
|
1019
|
+
return syl;
|
1020
|
+
}
|
1021
|
+
|
1022
|
+
|
1023
|
+
HoloSyllable convertToTLFromDT(unsigned int finalTone=0)
|
1024
|
+
{
|
1025
|
+
string rep=internalForm();
|
1026
|
+
HoloSyllable syl;
|
1027
|
+
syl.setInputType(TLSyllable);
|
1028
|
+
|
1029
|
+
unsigned int size=rep.length();
|
1030
|
+
for (unsigned int i=0; i<size; i++)
|
1031
|
+
{
|
1032
|
+
char dt1 = rep[i];
|
1033
|
+
char lowdt1 = tolower(dt1);
|
1034
|
+
|
1035
|
+
// r -> j (beginning)
|
1036
|
+
if (i==0 && lowdt1=='r') {
|
1037
|
+
syl.insertCharacterAtCursor(charWithCaseAccordingTo('j', dt1));
|
1038
|
+
continue;
|
1039
|
+
}
|
1040
|
+
|
1041
|
+
// replaces the two-character combinations
|
1042
|
+
if (i+1 < size) {
|
1043
|
+
string part=rep.substr(i, 2);
|
1044
|
+
string lower=toLowerString(part);
|
1045
|
+
|
1046
|
+
// or -> o
|
1047
|
+
if (lower=="or") {
|
1048
|
+
syl.insertCharacterAtCursor(charWithCaseAccordingTo('o', part));
|
1049
|
+
i++;
|
1050
|
+
continue;
|
1051
|
+
}
|
1052
|
+
|
1053
|
+
// en -> ian
|
1054
|
+
if (lower=="en") {
|
1055
|
+
syl.insertCharacterAtCursor(charWithCaseAccordingTo('i', part));
|
1056
|
+
syl.insertCharacterAtCursor(charWithCaseAccordingTo('a', part));
|
1057
|
+
syl.insertCharacterAtCursor(charWithCaseAccordingTo('n', part));
|
1058
|
+
i++;
|
1059
|
+
continue;
|
1060
|
+
}
|
1061
|
+
|
1062
|
+
// et -> iat
|
1063
|
+
if (lower=="et") {
|
1064
|
+
syl.insertCharacterAtCursor(charWithCaseAccordingTo('i', part));
|
1065
|
+
syl.insertCharacterAtCursor(charWithCaseAccordingTo('a', part));
|
1066
|
+
syl.insertCharacterAtCursor(charWithCaseAccordingTo('t', part));
|
1067
|
+
i++;
|
1068
|
+
continue;
|
1069
|
+
}
|
1070
|
+
|
1071
|
+
|
1072
|
+
// bh -> b (beginning)
|
1073
|
+
if (i==0 && lower=="bh") {
|
1074
|
+
syl.insertCharacterAtCursor(charWithCaseAccordingTo('b', part));
|
1075
|
+
i++;
|
1076
|
+
continue;
|
1077
|
+
}
|
1078
|
+
|
1079
|
+
// gh -> g (beginning)
|
1080
|
+
if (i==0 && lower=="gh") {
|
1081
|
+
syl.insertCharacterAtCursor(charWithCaseAccordingTo('g', part));
|
1082
|
+
i++;
|
1083
|
+
continue;
|
1084
|
+
}
|
1085
|
+
|
1086
|
+
// wa -> ua (beginning)
|
1087
|
+
if (lower=="wa" && i==0) {
|
1088
|
+
syl.insertCharacterAtCursor(charWithCaseAccordingTo('u', part));
|
1089
|
+
syl.insertCharacterAtCursor(charWithCaseAccordingTo('a', part));
|
1090
|
+
i++;
|
1091
|
+
continue;
|
1092
|
+
}
|
1093
|
+
|
1094
|
+
// we -> ue (beginning)
|
1095
|
+
if (lower=="we" && i==0) {
|
1096
|
+
syl.insertCharacterAtCursor(charWithCaseAccordingTo('u', part));
|
1097
|
+
syl.insertCharacterAtCursor(charWithCaseAccordingTo('e', part));
|
1098
|
+
i++;
|
1099
|
+
continue;
|
1100
|
+
}
|
1101
|
+
|
1102
|
+
// wi -> ui (beginning)
|
1103
|
+
if (lower=="wi" && i==0) {
|
1104
|
+
syl.insertCharacterAtCursor(charWithCaseAccordingTo('u', part));
|
1105
|
+
syl.insertCharacterAtCursor(charWithCaseAccordingTo('i', part));
|
1106
|
+
i++;
|
1107
|
+
continue;
|
1108
|
+
}
|
1109
|
+
|
1110
|
+
// yo -> io (beginning)
|
1111
|
+
if (lower=="yo" && i==0) {
|
1112
|
+
syl.insertCharacterAtCursor(charWithCaseAccordingTo('i', part));
|
1113
|
+
syl.insertCharacterAtCursor(charWithCaseAccordingTo('o', part));
|
1114
|
+
i++;
|
1115
|
+
continue;
|
1116
|
+
}
|
1117
|
+
|
1118
|
+
// yi -> i (beginning)
|
1119
|
+
if (lower=="yi" && i==0) {
|
1120
|
+
syl.insertCharacterAtCursor(charWithCaseAccordingTo('i', part));
|
1121
|
+
i++;
|
1122
|
+
continue;
|
1123
|
+
}
|
1124
|
+
}
|
1125
|
+
|
1126
|
+
// o -> oo
|
1127
|
+
if (lowdt1=='o') {
|
1128
|
+
syl.insertCharacterAtCursor(charWithCaseAccordingTo('o', dt1));
|
1129
|
+
syl.insertCharacterAtCursor(charWithCaseAccordingTo('o', dt1));
|
1130
|
+
continue;
|
1131
|
+
}
|
1132
|
+
|
1133
|
+
// b -> p (beginning)
|
1134
|
+
if (i==0 && lowdt1=='b') {
|
1135
|
+
syl.insertCharacterAtCursor(charWithCaseAccordingTo('p', dt1));
|
1136
|
+
continue;
|
1137
|
+
}
|
1138
|
+
|
1139
|
+
// p -> ph (beginning)
|
1140
|
+
if (i==0 && lowdt1=='p') {
|
1141
|
+
syl.insertCharacterAtCursor(charWithCaseAccordingTo('p', dt1));
|
1142
|
+
syl.insertCharacterAtCursor(charWithCaseAccordingTo('h', dt1));
|
1143
|
+
continue;
|
1144
|
+
}
|
1145
|
+
|
1146
|
+
// k -> kh (beginning)
|
1147
|
+
if (i==0 && lowdt1=='k') {
|
1148
|
+
syl.insertCharacterAtCursor(charWithCaseAccordingTo('k', dt1));
|
1149
|
+
syl.insertCharacterAtCursor(charWithCaseAccordingTo('h', dt1));
|
1150
|
+
continue;
|
1151
|
+
}
|
1152
|
+
|
1153
|
+
// g -> k (beginning)
|
1154
|
+
if (i==0 && lowdt1=='g') {
|
1155
|
+
syl.insertCharacterAtCursor(charWithCaseAccordingTo('k', dt1));
|
1156
|
+
continue;
|
1157
|
+
}
|
1158
|
+
|
1159
|
+
// d -> t (beginning)
|
1160
|
+
if (i==0 && lowdt1=='d') {
|
1161
|
+
syl.insertCharacterAtCursor(charWithCaseAccordingTo('t', dt1));
|
1162
|
+
continue;
|
1163
|
+
}
|
1164
|
+
|
1165
|
+
// t -> th (beginning)
|
1166
|
+
if (i==0 && lowdt1=='t') {
|
1167
|
+
syl.insertCharacterAtCursor(charWithCaseAccordingTo('t', dt1));
|
1168
|
+
syl.insertCharacterAtCursor(charWithCaseAccordingTo('h', dt1));
|
1169
|
+
continue;
|
1170
|
+
}
|
1171
|
+
|
1172
|
+
// z -> ts (beginning)
|
1173
|
+
if (i==0 && lowdt1=='z') {
|
1174
|
+
syl.insertCharacterAtCursor(charWithCaseAccordingTo('t', dt1));
|
1175
|
+
syl.insertCharacterAtCursor(charWithCaseAccordingTo('s', dt1));
|
1176
|
+
continue;
|
1177
|
+
}
|
1178
|
+
|
1179
|
+
// c -> tsh (beginning)
|
1180
|
+
if (i==0 && lowdt1=='c') {
|
1181
|
+
syl.insertCharacterAtCursor(charWithCaseAccordingTo('t', dt1));
|
1182
|
+
syl.insertCharacterAtCursor(charWithCaseAccordingTo('s', dt1));
|
1183
|
+
syl.insertCharacterAtCursor(charWithCaseAccordingTo('h', dt1));
|
1184
|
+
continue;
|
1185
|
+
}
|
1186
|
+
|
1187
|
+
// else ...
|
1188
|
+
syl.insertCharacterAtCursor(dt1);
|
1189
|
+
}
|
1190
|
+
|
1191
|
+
// remap the final tone
|
1192
|
+
unsigned int tltone=finalTone;
|
1193
|
+
|
1194
|
+
syl.normalize(tltone);
|
1195
|
+
return syl;
|
1196
|
+
|
1197
|
+
}
|
1198
|
+
|
1199
|
+
protected:
|
1200
|
+
char charWithCaseAccordingTo(char c, char ref)
|
1201
|
+
{
|
1202
|
+
if (tolower(ref) == ref) return tolower(c);
|
1203
|
+
return toupper(c);
|
1204
|
+
}
|
1205
|
+
|
1206
|
+
char charWithCaseAccordingTo(char c, const string &r)
|
1207
|
+
{
|
1208
|
+
if (tolower(r[0]) == r[0]) return tolower(c);
|
1209
|
+
return toupper(c);
|
1210
|
+
}
|
1211
|
+
|
1212
|
+
string toLowerString(const string &s)
|
1213
|
+
{
|
1214
|
+
unsigned int size=s.length();
|
1215
|
+
string lower;
|
1216
|
+
unsigned int i;
|
1217
|
+
for (i=0;i<size;i++) lower+=string(1, tolower(s[i]));
|
1218
|
+
return lower;
|
1219
|
+
}
|
1220
|
+
};
|
1221
|
+
};
|
1222
|
+
|
1223
|
+
|