glaemscribe 1.1.14 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +5 -5
  2. data/bin/glaemscribe +19 -15
  3. data/glaemresources/charsets/cirth_ds.cst +205 -0
  4. data/glaemresources/charsets/sarati_eldamar.cst +256 -0
  5. data/glaemresources/charsets/tengwar_ds_annatar.cst +546 -0
  6. data/glaemresources/charsets/tengwar_ds_eldamar.cst +535 -0
  7. data/glaemresources/charsets/tengwar_ds_elfica.cst +551 -0
  8. data/glaemresources/charsets/tengwar_ds_parmaite.cst +534 -0
  9. data/glaemresources/charsets/tengwar_ds_sindarin.cst +531 -0
  10. data/glaemresources/charsets/tengwar_freemono.cst +217 -0
  11. data/glaemresources/charsets/tengwar_guni_annatar.cst +628 -0
  12. data/glaemresources/charsets/tengwar_guni_eldamar.cst +618 -0
  13. data/glaemresources/charsets/tengwar_guni_elfica.cst +620 -0
  14. data/glaemresources/charsets/tengwar_guni_parmaite.cst +621 -0
  15. data/glaemresources/charsets/tengwar_guni_sindarin.cst +617 -0
  16. data/glaemresources/charsets/tengwar_telcontar.cst +218 -0
  17. data/glaemresources/charsets/unicode_gothic.cst +64 -0
  18. data/glaemresources/charsets/unicode_runes.cst +121 -0
  19. data/glaemresources/modes/{adunaic.glaem → adunaic-tengwar-glaemscrafu.glaem} +14 -2
  20. data/glaemresources/modes/{blackspeech.glaem → blackspeech-tengwar-general_use.glaem} +12 -2
  21. data/glaemresources/modes/japanese-tengwar.glaem +771 -0
  22. data/glaemresources/modes/{khuzdul.glaem → khuzdul-cirth-moria.glaem} +4 -1
  23. data/glaemresources/modes/{futhorc.glaem → old_english-futhorc.glaem} +0 -0
  24. data/glaemresources/modes/{mercian.glaem → old_english-tengwar-mercian.glaem} +22 -12
  25. data/glaemresources/modes/{westsaxon.glaem → old_english-tengwar-westsaxon.glaem} +20 -11
  26. data/glaemresources/modes/{futhark-runicus.glaem → old_norse-futhark-runicus.glaem} +0 -0
  27. data/glaemresources/modes/{futhark-younger.glaem → old_norse-futhark-younger.glaem} +0 -0
  28. data/glaemresources/modes/{quenya.glaem → quenya-tengwar-classical.glaem} +32 -50
  29. data/glaemresources/modes/raw-tengwar.glaem +46 -23
  30. data/glaemresources/modes/{rlyehian.glaem → rlyehian-tengwar.glaem} +14 -3
  31. data/glaemresources/modes/{sindarin-daeron.glaem → sindarin-cirth-daeron.glaem} +55 -14
  32. data/glaemresources/modes/{sindarin-beleriand.glaem → sindarin-tengwar-beleriand.glaem} +154 -28
  33. data/glaemresources/modes/{sindarin.glaem → sindarin-tengwar-general_use.glaem} +86 -25
  34. data/glaemresources/modes/{telerin.glaem → telerin-tengwar-glaemscrafu.glaem} +16 -6
  35. data/glaemresources/modes/{westron.glaem → westron-tengwar-glaemscrafu.glaem} +18 -8
  36. data/lib/api/charset.rb +67 -7
  37. data/lib/api/charset_parser.rb +7 -0
  38. data/lib/api/constants.rb +3 -4
  39. data/lib/api/fragment.rb +26 -5
  40. data/lib/api/if_tree.rb +70 -8
  41. data/lib/api/macro.rb +40 -0
  42. data/lib/api/mode.rb +35 -13
  43. data/lib/api/mode_parser.rb +106 -12
  44. data/lib/api/object_additions.rb +23 -1
  45. data/lib/api/option.rb +17 -2
  46. data/lib/api/post_processor/resolve_virtuals.rb +25 -9
  47. data/lib/api/resource_manager.rb +1 -0
  48. data/lib/api/rule_group.rb +170 -26
  49. data/lib/api/sheaf_chain_iterator.rb +1 -1
  50. data/lib/api/transcription_processor.rb +3 -3
  51. data/lib/api/tts.rb +51 -0
  52. data/lib/glaemscribe.rb +34 -31
  53. data/lib_espeak/espeakng.for.glaemscribe.nowasm.sync.js +21 -0
  54. data/lib_espeak/glaemscribe_tts.js +365 -0
  55. metadata +67 -21
@@ -0,0 +1,771 @@
1
+ \**
2
+
3
+ Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ the transcription of texts between writing systems, and more
5
+ specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ invented languages to some of his devised writing systems.
7
+
8
+ Copyright (C) 2015 Benjamin Babut (Talagan).
9
+
10
+ This program is free software: you can redistribute it and/or modify
11
+ it under the terms of the GNU Affero General Public License as published by
12
+ the Free Software Foundation, either version 3 of the License, or
13
+ any later version.
14
+
15
+ This program is distributed in the hope that it will be useful,
16
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ GNU Affero General Public License for more details.
19
+
20
+ You should have received a copy of the GNU Affero General Public License
21
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ **\
24
+
25
+ \language "Japanese"
26
+ \writing "Tengwar"
27
+ \mode "Japanese Tengwar - G*"
28
+ \authors "Talagan (Benjamin Babut)"
29
+ \version "0.0.1"
30
+
31
+ \world primary
32
+ \invention experimental
33
+
34
+ \raw_mode "raw-tengwar"
35
+
36
+ \charset tengwar_ds_sindarin false
37
+ \charset tengwar_ds_parmaite false
38
+ \charset tengwar_ds_eldamar false
39
+ \charset tengwar_ds_annatar true
40
+ \charset tengwar_ds_elfica false
41
+
42
+ \charset tengwar_guni_sindarin false
43
+ \charset tengwar_guni_parmaite false
44
+ \charset tengwar_guni_eldamar false
45
+ \charset tengwar_guni_annatar false
46
+ \charset tengwar_guni_elfica false
47
+
48
+ \charset tengwar_freemono false
49
+ \charset tengwar_telcontar false
50
+
51
+ \beg options
52
+
53
+ \** Vowel options **\
54
+ \beg option choon_long_vowels CHOON_CONFLATE
55
+ \value CHOON_CONFLATE 0
56
+ \value CHOON_KEEP_SEPARATE 1
57
+ \end
58
+ \** Yoon options **\
59
+ \beg option palatal_sign PALATAL_SIGN_UNUTIXE
60
+ \value PALATAL_SIGN_UNUTIXE 0
61
+ \value PALATAL_SIGN_DOUBLE_UNUTIXE 1
62
+ \end
63
+
64
+ \** Palatal **\
65
+ \beg option gojuon_y GOJUON_Y_USE_YANTA
66
+ \value GOJUON_Y_USE_TELCO 0
67
+ \value GOJUON_Y_USE_YANTA 1
68
+ \end
69
+
70
+ \** labial **\
71
+ \beg option gojuon_w GOJUON_W_USE_URE
72
+ \value GOJUON_W_USE_TELCO 0
73
+ \value GOJUON_W_USE_URE 1
74
+ \end
75
+
76
+ \** Consonant options **\
77
+ \beg option isolated_n ISOLATED_N_USE_NOLDO
78
+ \value ISOLATED_N_USE_NOLDO 0
79
+ \value ISOLATED_N_USE_MODIFIER 1
80
+ \end
81
+
82
+ \beg option geminates GEMINATES_USE_HALLA
83
+ \value GEMINATES_USE_HALLA 0
84
+ \value GEMINATES_USE_MODIFIER 1
85
+ \value GEMINATES_DOUBLE_TENGWA 2
86
+ \end
87
+
88
+ \beg option dakuten_h_p_b DAKUTEN_THK
89
+ \value DAKUTEN_THK 0 \** JP STYLE **\
90
+ \value DAKUTEN_TPK 1 \** ELVISH STYLE **\
91
+ \end
92
+
93
+ \** Foreign otpions **\
94
+ \beg option foreign_labialized FOREIGN_LABIALIZED_DOWN_TEHTA
95
+ \value FOREIGN_LABIALIZED_DOWN_TEHTA 0
96
+ \value FOREIGN_LABIALIZED_QUESSETEMA 1
97
+ \end
98
+
99
+ \beg option foreign_f_v FOREIGN_F_V_XTD
100
+ \value FOREIGN_F_V_XTD 0
101
+ \value FOREIGN_F_V_OVERSTEM 1
102
+ \end
103
+
104
+ \** Graphical style options **\
105
+ \beg option consonant_modification_style CONSONANT_MODIFICATION_STYLE_WAVE
106
+ \value CONSONANT_MODIFICATION_STYLE_WAVE 0
107
+ \value CONSONANT_MODIFICATION_STYLE_BAR 1
108
+ \end
109
+
110
+ \end
111
+
112
+ \beg preprocessor
113
+
114
+ \downcase
115
+
116
+ \** Normalize explicitly written long vowels **\
117
+ \rxsubstitute "[āâàá]" "aー"
118
+ \rxsubstitute "[īîíì]" "iー"
119
+ \rxsubstitute "[ūûúù]" "uー"
120
+ \rxsubstitute "[ēêéè]" "eー"
121
+ \rxsubstitute "[ōôóò]" "oー"
122
+
123
+ \** Normalize elvish bizarrerie **\
124
+ \substitute "ä" "a"
125
+ \substitute "ï" "i"
126
+ \substitute "ü" "u"
127
+ \substitute "ë" "e"
128
+ \substitute "ö" "o"
129
+
130
+ \if "choon_long_vowels == CHOON_CONFLATE"
131
+
132
+ \** ================== **\
133
+ \** Gojûon **\
134
+ \** ================== **\
135
+
136
+ \rxsubstitute "([あアカかさサたタなナはハまマらラがガざザだダばバぱパやヤわワ])[あアー]" "\\1ー"
137
+ \rxsubstitute "([いイきキしシちチにニひヒみミりリぎギじジぢヂびビぴピゐヰ])[いイー]" "\\1ー"
138
+ \rxsubstitute "([うウくクすスつツぬヌふフむムるルぐグずズづヅぶブぷプゆユ])[うウー]" "\\1ー"
139
+ \rxsubstitute "([えエけケせセてテねネへヘめメれレげゲぜゼでデべベぺペゑヱ])[いイー]" "\\1ー"
140
+ \rxsubstitute "([おオこコそソとトのノほホもモろロごゴぞゾどドぼボぽポよヨをヲ])[うウー]" "\\1ー"
141
+
142
+ \** too, oo . E.G. ookami, too **\
143
+ \rxsubstitute "([とトおオ])[おオ]" "\\1ー"
144
+ \** nee, ee . E.G. oneesan, nee, ee **\
145
+ \rxsubstitute "([ねネえエ])[えエ]" "\\1ー"
146
+
147
+ \** ================== **\
148
+ \** Yôon **\
149
+ \** ================== **\
150
+
151
+ \** Normalization of long vowels : yoon .yâ, .yû, .yô **\
152
+ \rxsubstitute "([ゃャ])[あアー]" "\\1ー"
153
+ \rxsubstitute "([ゅュ])[うウー]" "\\1ー"
154
+ \rxsubstitute "([ょョ])[うウー]" "\\1ー"
155
+
156
+ \** ================== **\
157
+ \** Rômaji equivalents **\
158
+ \** ================== **\
159
+
160
+ \**
161
+ a ka sa ta na ha ma ra ga za da ba pa ya wa + a = .â (k|s|t|n|h|m|r|g|z|d|b|p|y|w)
162
+ i ki shi chi ni hi mi ri gi ji dji bi pi - wi + i = .î (k|sh|ch|n|h|m|r|g|j|dj|b|p|w)
163
+ u ku su tsu nu fu mu ru gu ju dju bu pu yu - + u = .û (k|s|ts|n|f|m|r|g|j|dj|b|p|y)
164
+ e ke se te ne he me re ge ze de be pe - we + i = .ê (k|s|t|n|h|m|r|g|z|d|b|p|w)
165
+ o ko so to no ho mo ro go zo do bo po yo wo + u = .ô (k|s|t|n|h|m|r|g|z|d|b|p|y|w)
166
+ **\
167
+
168
+ \** Note : this additionally does handle the Yôon through y **\
169
+ \rxsubstitute "aa" "aー"
170
+ \rxsubstitute "ii" "iー"
171
+ \rxsubstitute "uu" "uー"
172
+ \rxsubstitute "ei" "eー"
173
+ \rxsubstitute "ou" "oー"
174
+
175
+ \** nee, ee . E.G. oneesan, nee, ee. Avoid breaking things like kee. **\
176
+ \rxsubstitute "(k|s|t|h|m|r|g|z|d|b|p|w)ee" "\\1e'e" \** Removed n for nee **\
177
+ \rxsubstitute "(k|s|n|h|m|r|g|z|d|b|p|y|w)oo" "\\1o'o" \** Removed t for too **\
178
+ \rxsubstitute "oo" "oー"
179
+ \rxsubstitute "ee" "eー"
180
+
181
+ \** Foreign additional stuff should always use macrons or accents to denote long vowels **\
182
+ \endif
183
+
184
+ \** Normalization of geminates. Simply introduce っ in romanisation ;) **\
185
+ \rxsubstitute "([tpkcs])\\1" "っ\\1"
186
+ \rxsubstitute "tch" "っch"
187
+
188
+ \** Far less common, maybe even impossible **\
189
+ \rxsubstitute "([h])\\1" "っ\\1"
190
+ \rxsubstitute "([dbgz])\\1" "っ\\1"
191
+ \rxsubstitute "([fvl])\\1" "っ\\1"
192
+ \rxsubstitute "([rwy])\\1" "っ\\1"
193
+
194
+ \** Normalization of isolated nasalizer n (&m). Simply introduce ん in romanisation :) **\
195
+ \rxsubstitute "(n|m)([^aeiouy]|$)" "ん\\2"
196
+
197
+ \end
198
+
199
+ \beg processor
200
+ \beg rules litteral
201
+
202
+ \** ================================ **\
203
+ \** PREREQUISITES **\
204
+ \** ================================ **\
205
+
206
+ \if "consonant_modification_style == CONSONANT_MODIFICATION_STYLE_WAVE"
207
+ {GEMINATE} === GEMINATE_SIGN_TILD
208
+ {NASAL} === NASALIZE_SIGN_TILD
209
+ \else
210
+ {GEMINATE} === GEMINATE_SIGN
211
+ {NASAL} === NASALIZE_SIGN
212
+ \endif
213
+
214
+ \** Dakuten / Handakuten resolution **\
215
+ \if "dakuten_h_p_b == DAKUTEN_THK"
216
+ {_P_TENGWA_} === VALA
217
+ {_H_TENGWA_} === PARMA
218
+ \else
219
+ {_P_TENGWA_} === PARMA
220
+ {_H_TENGWA_} === VALA
221
+ \endif
222
+
223
+ {MORAIC_NASAL} === (ん,ン)
224
+ {SOKUON} === (っ,ッ)
225
+
226
+ {_A_} === A_TEHTA
227
+ {_I_} === I_TEHTA
228
+ {_U_} === U_TEHTA
229
+ {_E_} === E_TEHTA
230
+ {_O_} === O_TEHTA
231
+
232
+ {_AA_} === A_TEHTA_REVERSED
233
+ {_II_} === I_TEHTA_DOUBLE
234
+ {_UU_} === U_TEHTA_DOUBLE
235
+ {_EE_} === E_TEHTA_DOUBLE
236
+ {_OO_} === O_TEHTA_DOUBLE
237
+
238
+ {_SHORT_VOWELS_} === [{_A_} * {_I_} * {_U_} * {_E_} * {_O_}]
239
+ {_LONG_VOWELS_} === [{_AA_} * {_II_} * {_UU_} * {_EE_} * {_OO_}]
240
+
241
+ {_SHORT_VOWELS_YON_} === [{_A_} * {_U_} * {_O_}]
242
+ {_LONG_VOWELS_YON_} === [{_AA_} * {_UU_} * {_OO_}]
243
+
244
+ {NASAL_OR_NOT} === [{NULL} * {MORAIC_NASAL}]
245
+ {GEMINATE_OR_NOT} === [{NULL} * {SOKUON}]
246
+ {MODS} === {NASAL_OR_NOT} {GEMINATE_OR_NOT}
247
+
248
+ \if "palatal_sign == PALATAL_SIGN_UNUTIXE"
249
+ {_PALATAL_} === UNUTIXE
250
+ \else
251
+ {_PALATAL_} === I_TEHTA_DOUBLE_INF
252
+ \endif
253
+
254
+ {_NASAL_} === {NASAL}
255
+ {_GEMINATE_} === {GEMINATE}
256
+ {_LABIAL_} === U_TEHTA_INF
257
+ {_CANCELLER_} === THINNAS
258
+ {_SIBILANT_} === SARINCE
259
+ {_CANCEL_THEN_PALATAL_} === CIRC_TEHTA_INF
260
+
261
+ \** THIS IS OUR GENERAL MACRO FOR JAPANESE. Will be used almost everywhere **\
262
+ \beg macro serie ARG_SERIE _ARG_MAIN_TENGWA_ _ARG_SERIE_MODIFIERS_ _ARG_SHORT_VOWELS_ _ARG_LONG_VOWELS_ _ARG_SIB_
263
+ {_PREMODS_} === {NULL}
264
+ {_POSTMODS_} === {NULL}
265
+ {__X__} === identity
266
+ \if "isolated_n == ISOLATED_N_USE_NOLDO"
267
+ \if "geminates == GEMINATES_USE_HALLA"
268
+ {_PREMODS_} === [{NULL} * NOLDO][{NULL} * HALLA]
269
+ \elsif "geminates == GEMINATES_DOUBLE_TENGWA"
270
+ {_PREMODS_} === [{NULL} * NOLDO][{NULL} * {_ARG_MAIN_TENGWA_}]
271
+ \else
272
+ {_PREMODS_} === [{NULL} * NOLDO]
273
+ {_POSTMODS_} === [{NULL} * {_GEMINATE_}]
274
+ \endif
275
+ \else
276
+ \if "geminates == GEMINATES_USE_HALLA"
277
+ {_PREMODS_} === [{NULL} * HALLA]
278
+ {_POSTMODS_} === [{NULL} * {_NASAL_}]
279
+ \** The nasal mark is before the geminate, but their position is reversed at the arrival, so cross rule here **\
280
+ {__X__} === 2,1,3
281
+ \elsif "geminates == GEMINATES_DOUBLE_TENGWA"
282
+ \** This case is the most complex because we want : (Teng Nasal) Teng **/
283
+ \** But it can be factorized by cheating because ; **\
284
+ \** (Teng Nasal) Teng = Teng (Nasal Teng) **\
285
+ {_POSTMODS_} === [{NULL} * {_NASAL_}][{NULL} * {_ARG_MAIN_TENGWA_}]
286
+ \else
287
+ {_POSTMODS_} === [{NULL} * {_NASAL_}][{NULL} * {_GEMINATE_}]
288
+ \endif
289
+ \endif
290
+ {MODS}{ARG_SERIE} --> {__X__} --> {_PREMODS_} {_ARG_MAIN_TENGWA_} {_POSTMODS_} {_ARG_SERIE_MODIFIERS_} {_ARG_SHORT_VOWELS_} {_ARG_SIB_}
291
+ {MODS}{ARG_SERIE}ー --> {__X__} --> {_PREMODS_} {_ARG_MAIN_TENGWA_} {_POSTMODS_} {_ARG_SERIE_MODIFIERS_} {_ARG_LONG_VOWELS_} {_ARG_SIB_}
292
+ \end
293
+
294
+ \** ================================ **\
295
+ \** START OF RULES **\
296
+ \** ================================ **\
297
+
298
+ \** Isolated vowels **\
299
+ {GOJUON} === [(あ,ア,a) * (い,イ,i) * (う,ウ,u) * (え,エ,e) * (お,オ,o)]
300
+ {YOON} === [(や,ヤ,ya) * (ゆ,ユ,yu) * (よ,ヨ,yo)]
301
+ {GOJUON} --> TELCO {_SHORT_VOWELS_}
302
+ {GOJUON}ー --> TELCO {_LONG_VOWELS_}
303
+
304
+ \** Isolated Y serie **\
305
+ \if "gojuon_y == GOJUON_Y_USE_YANTA"
306
+ {TENGWA} === YANTA
307
+ \else
308
+ {TENGWA} === TELCO {_PALATAL_}
309
+ \endif
310
+ {YOON} --> {TENGWA} {_SHORT_VOWELS_YON_}
311
+ {YOON}ー --> {TENGWA} {_LONG_VOWELS_YON_}
312
+
313
+ \** T serie **\
314
+ {GOJUON} === [(た,タ,ta) * (ち,チ,chi) * (つ,ツ,tsu) * (て,テ,te) * (と,ト,to)]
315
+ {YOON} === [(ちゃ,チャ,cha) * (ちゅ,チュ,chu) * (ちょ,チョ,cho)]
316
+ {TENGWA} === TINCO
317
+
318
+ \deploy serie {GOJUON} {TENGWA} {NULL} {_SHORT_VOWELS_} {_LONG_VOWELS_} {NULL}
319
+ \deploy serie {YOON} {TENGWA} {_PALATAL_} {_SHORT_VOWELS_YON_} {_LONG_VOWELS_YON_} {NULL}
320
+
321
+ \** P serie **\
322
+ {GOJUON} === [(ぱ,パ,pa) * (ぴ,ピ,pi) * (ぷ,プ,pu) * (ぺ,ペ,pe) * (ぽ,ポ,po)]
323
+ {YOON} === [(ぴゃ,ピャ,pya) * (ぴゅ,ピュ,pyu) * (ぴょ,ピョ,pyo)]
324
+ {TENGWA} === {_P_TENGWA_}
325
+
326
+ \deploy serie {GOJUON} {TENGWA} {NULL} {_SHORT_VOWELS_} {_LONG_VOWELS_} {NULL}
327
+ \deploy serie {YOON} {TENGWA} {_PALATAL_} {_SHORT_VOWELS_YON_} {_LONG_VOWELS_YON_} {NULL}
328
+
329
+ \** K serie **\
330
+ {GOJUON} === [(か,カ,ka) * (き,キ,ki) * (く,ク,ku) * (け,ケ,ke) * (こ,コ,ko)]
331
+ {YOON} === [(きゃ,キャ,kya) * (きゅ,キュ,kyu) * (きょ,キョ,kyo)]
332
+ {TENGWA} === CALMA
333
+
334
+ \deploy serie {GOJUON} {TENGWA} {NULL} {_SHORT_VOWELS_} {_LONG_VOWELS_} {NULL}
335
+ \deploy serie {YOON} {TENGWA} {_PALATAL_} {_SHORT_VOWELS_YON_} {_LONG_VOWELS_YON_} {NULL}
336
+
337
+ \** D serie **\
338
+ {GOJUON} === [(だ,ダ,da) * (ぢ,ヂ,dji) * (づ,ヅ,dzu) * (で,デ,de) * (ど,ド,do)]
339
+ {YOON} === [(ぢゃ,ヂャ,dja) * (ぢゅ,ヂュ,dju) * (ぢょ,ヂョ,djo)]
340
+ {TENGWA} === ANDO
341
+
342
+ \deploy serie {GOJUON} {TENGWA} {NULL} {_SHORT_VOWELS_} {_LONG_VOWELS_} {NULL}
343
+ \deploy serie {YOON} {TENGWA} {_PALATAL_} {_SHORT_VOWELS_YON_} {_LONG_VOWELS_YON_} {NULL}
344
+
345
+ \** B serie **\
346
+ {GOJUON} === [(ば,バ,ba) * (び,ビ,bi) * (ぶ,ブ,bu) * (べ,ベ,be) * (ぼ,ボ,bo)]
347
+ {YOON} === [(びゃ,ビャ,bya) * (びゅ,ビュ,byu) * (びょ,ビョ,byo)]
348
+ {TENGWA} === UMBAR
349
+
350
+ \deploy serie {GOJUON} {TENGWA} {NULL} {_SHORT_VOWELS_} {_LONG_VOWELS_} {NULL}
351
+ \deploy serie {YOON} {TENGWA} {_PALATAL_} {_SHORT_VOWELS_YON_} {_LONG_VOWELS_YON_} {NULL}
352
+
353
+ \** G Serie **\
354
+ {GOJUON} === [(が,ガ,ga) * (ぎ,ギ,gi) * (ぐ,グ,gu) * (げ,ゲ,ge) * (ご,ゴ,go)]
355
+ {YOON} === [(ぎゃ,ギャ,gya) * (ぎゅ,ギュ,gyu) * (ぎょ,ギョ,gyo)]
356
+ {TENGWA} === ANGA
357
+
358
+ \deploy serie {GOJUON} {TENGWA} {NULL} {_SHORT_VOWELS_} {_LONG_VOWELS_} {NULL}
359
+ \deploy serie {YOON} {TENGWA} {_PALATAL_} {_SHORT_VOWELS_YON_} {_LONG_VOWELS_YON_} {NULL}
360
+
361
+ \** N serie **\
362
+ {GOJUON} === [(な,ナ,na) * (に,ニ,ni) * (ぬ,ヌ,nu) * (ね,ネ,ne) * (の,ノ,no)]
363
+ {YOON} === [(にゃ,ニャ,nya) * (にゅ,ニュ,nyu) * (にょ,ニョ,nyo)]
364
+ {TENGWA} === NUMEN
365
+
366
+ \deploy serie {GOJUON} {TENGWA} {NULL} {_SHORT_VOWELS_} {_LONG_VOWELS_} {NULL}
367
+ \deploy serie {YOON} {TENGWA} {_PALATAL_} {_SHORT_VOWELS_YON_} {_LONG_VOWELS_YON_} {NULL}
368
+ {MORAIC_NASAL} --> NOLDO
369
+
370
+ \** M serie **\
371
+ {GOJUON} === [(ま,マ,ma) * (み,ミ,mi) * (む,ム,mu) * (め,メ,me) * (も,モ,mo)]
372
+ {YOON} === [(みゃ,ミャ,mya) * (みゅ,ミュ,myu) * (みょ,ミョ,myo)]
373
+ {TENGWA} === MALTA
374
+
375
+ \deploy serie {GOJUON} {TENGWA} {NULL} {_SHORT_VOWELS_} {_LONG_VOWELS_} {NULL}
376
+ \deploy serie {YOON} {TENGWA} {_PALATAL_} {_SHORT_VOWELS_YON_} {_LONG_VOWELS_YON_} {NULL}
377
+
378
+ \** H serie **\
379
+ {GOJUON} === [(は,ハ,ha) * (ひ,ヒ,hi) * (ふ,フ,fu) * (へ,ヘ,he) * (ほ,ホ,ho)]
380
+ {YOON} === [(ひゃ,ヒャ,hya) * (ひゅ,ヒュ,hyu) * (ひょ,ヒョ,hyo)]
381
+ {TENGWA} === {_H_TENGWA_}
382
+
383
+ \deploy serie {GOJUON} {TENGWA} {NULL} {_SHORT_VOWELS_} {_LONG_VOWELS_} {NULL}
384
+ \deploy serie {YOON} {TENGWA} {_PALATAL_} {_SHORT_VOWELS_YON_} {_LONG_VOWELS_YON_} {NULL}
385
+
386
+ \** S serie **\
387
+ {GOJUON} === [(さ,サ,sa) * (し,シ,shi) * (す,ス,su) * (せ,セ,se) * (そ,ソ,so)]
388
+ {YOON} === [(しゃ,シャ,sha) * (しゅ,シュ,shu) * (しょ,ショ,sho)]
389
+ {TENGWA} === SILME_NUQUERNA
390
+
391
+ \deploy serie {GOJUON} {TENGWA} {NULL} {_SHORT_VOWELS_} {_LONG_VOWELS_} {NULL}
392
+ \deploy serie {YOON} {TENGWA} {_PALATAL_} {_SHORT_VOWELS_YON_} {_LONG_VOWELS_YON_} {NULL}
393
+
394
+ \** Z serie **\
395
+ {GOJUON} === [(ざ,ザ,za) * (じ,ジ,ji) * (ず,ズ,zu) * (ぜ,ゼ,ze) * (ぞ,ゾ,zo)]
396
+ {YOON} === [(じゃ,ジャ,ja) * (じゅ,ジュ,ju) * (じょ,ジョ,jo)]
397
+ {TENGWA} === ESSE_NUQUERNA
398
+
399
+ \deploy serie {GOJUON} {TENGWA} {NULL} {_SHORT_VOWELS_} {_LONG_VOWELS_} {NULL}
400
+ \deploy serie {YOON} {TENGWA} {_PALATAL_} {_SHORT_VOWELS_YON_} {_LONG_VOWELS_YON_} {NULL}
401
+
402
+ \** R serie **\
403
+ {GOJUON} === [(ら,ラ,ra) * (り,リ,ri) * (る,ル,ru) * (れ,レ,re) * (ろ,ロ,ro)]
404
+ {YOON} === [(りゃ,リャ,rya) * (りゅ,リュ,ryu) * (りょ,リョ,ryo)]
405
+ {TENGWA} === ROMEN
406
+
407
+ \deploy serie {GOJUON} {TENGWA} {NULL} {_SHORT_VOWELS_} {_LONG_VOWELS_} {NULL}
408
+ \deploy serie {YOON} {TENGWA} {_PALATAL_} {_SHORT_VOWELS_YON_} {_LONG_VOWELS_YON_} {NULL}
409
+
410
+ \** W serie **\
411
+ {GOJUON} === [(わ,ワ,wa) * (ゐ,ヰ,wi) * (ゑ,ヱ,we) ]
412
+ \if "gojuon_w == GOJUON_W_USE_URE"
413
+ {TENGWA} === URE
414
+ \else
415
+ {TENGWA} === TELCO {_LABIAL_}
416
+ \endif
417
+
418
+ {__SV__} === [{_A_} * {_I_} * {_E_}]
419
+ {__LV__} === [{_AA_} * {_II_} * {_EE_}]
420
+ \deploy serie {GOJUON} {TENGWA} {NULL} {__SV__} {__LV__} {NULL}
421
+
422
+ \** SPECIALS **\
423
+ (へ,he,e){ASTERISK} --> NUM_12
424
+ (は,ha,wa){ASTERISK} --> HWESTA_SINDARINWA
425
+ (を,ヲ,wo,を{ASTERISK},ヲ{ASTERISK},wo{ASTERISK},o{ASTERISK}) --> VAIA
426
+ {ASTERISK} --> {NULL}
427
+
428
+ \** ***************** **\
429
+ \** Extended katakana **\
430
+ \** ***************** **\
431
+
432
+ \if "foreign_f_v == FOREIGN_F_V_XTD"
433
+ {_FOREIGN_F_} === TW_EXT_12
434
+ {_FOREIGN_V_} === TW_EXT_22
435
+ \else
436
+ {_FOREIGN_F_} === FORMEN
437
+ {_FOREIGN_V_} === AMPA
438
+ \endif
439
+
440
+ {SERIE} === [(イィ,yi) * (イェ,ye)]
441
+ \if "gojuon_y == GOJUON_Y_USE_YANTA"
442
+ {TENGWA} === YANTA
443
+ \else
444
+ {TENGWA} === TELCO {_PALATAL_}
445
+ \endif
446
+ {SERIE} --> {TENGWA} [{_I_} * {_E_}]
447
+ {SERIE}ー --> {TENGWA} [{_II_} * {_EE_}]
448
+
449
+ \** wa, wi, wu, we, wo **\
450
+ \** wa, wi, we - these latin combinations where already defined above **\
451
+ {SERIE} === [(ウァ,wa) * (ウィ,wi) * (ウゥ,wu) * (ウェ,we) * (ウォ,wo)]
452
+ \if "gojuon_w == GOJUON_W_USE_URE"
453
+ {TENGWA} === URE
454
+ \else
455
+ {TENGWA} === TELCO {_LABIAL_}
456
+ \endif
457
+
458
+ \deploy serie {SERIE} {TENGWA} {NULL} {_SHORT_VOWELS_} {_LONG_VOWELS_} {NULL}
459
+
460
+ {SERIE} === [(ウュ,wyu)]
461
+ {TENGWA} === URE \** only possibility : palatal + labial interfere so telco + labial + palatal will not work **\
462
+ {__SV__} === [{_U_}]
463
+ {__LV__} === [{_UU_}]
464
+ \deploy serie {SERIE} {TENGWA} {_PALATAL_} {__SV__} {__LV__} {NULL}
465
+
466
+ \** va vi vu ve vo **\
467
+ {SERIE} === [(ヴァ,ヷ,va) * (ヴィ,ヸ,vi) * (ヴ,vu) * (ヴェ,ヹ,ve) * (ヴォ,ヺ,vo)]
468
+ {TENGWA} === {_FOREIGN_V_}
469
+ \deploy serie {SERIE} {TENGWA} {NULL} {_SHORT_VOWELS_} {_LONG_VOWELS_} {NULL}
470
+
471
+
472
+ \** v yon : vya vyu vye vyo **\
473
+ {SERIE} === [(ヴャ,vya) * (ヴュ,vyu) * (ヴィェ,vye) * (ヴョ,vyo)]
474
+ {TENGWA} === {_FOREIGN_V_}
475
+ {__SV__} === [{_A_} * {_U_} * {_E_} * {_O_}]
476
+ {__LV__} === [{_AA_} * {_UU_} * {_EE_} * {_OO_}]
477
+ \deploy serie {SERIE} {TENGWA} {_PALATAL_} {__SV__} {__LV__} {NULL}
478
+
479
+ \** kye **\
480
+ {SERIE} === [(キェ,kye)]
481
+ {TENGWA} === CALMA
482
+ {__SV__} === [{_E_}]
483
+ {__LV__} === [{_EE_}]
484
+ \deploy serie {SERIE} {TENGWA} {_PALATAL_} {__SV__} {__LV__} {NULL}
485
+
486
+ \** gye **\
487
+ {SERIE} === [(ギェ,gye)]
488
+ {TENGWA} === ANGA
489
+ {__SV__} === [{_E_}]
490
+ {__LV__} === [{_EE_}]
491
+ \deploy serie {SERIE} {TENGWA} {_PALATAL_} {__SV__} {__LV__} {NULL}
492
+
493
+ \if "foreign_labialized == FOREIGN_LABIALIZED_DOWN_TEHTA"
494
+ {_FOREIGN_KW_} === CALMA
495
+ {_FOREIGN_GW_} === ANGA
496
+ {_FOREIGN_LABIALIZED_DIACRITIC_} === {_LABIAL_}
497
+ \else
498
+ {_FOREIGN_KW_} === QUESSE
499
+ {_FOREIGN_GW_} === UNGWE
500
+ {_FOREIGN_LABIALIZED_DIACRITIC_} === {NULL}
501
+ \endif
502
+
503
+ \** kWa, kWa, kwi, kwe, kwo **\
504
+ {SERIE} === [(クァ,クヮ,kwa) * (クィ,kwi) * (クェ,kwe) * (クォ,kwo)]
505
+ {TENGWA} === {_FOREIGN_KW_}
506
+ {__SV__} === [{_A_} * {_I_} * {_E_} * {_O_}]
507
+ {__LV__} === [{_AA_} * {_II_} * {_EE_} * {_OO_}]
508
+ \deploy serie {SERIE} {TENGWA} {_FOREIGN_LABIALIZED_DIACRITIC_} {__SV__} {__LV__} {NULL}
509
+
510
+
511
+ \** gwa, gWa, gwi, gwe, gwo **\
512
+ {SERIE} === [(グァ,グヮ,gwa) * (グィ,gwi) * (グェ,gwe) * (グォ,gwo)]
513
+ {TENGWA} === {_FOREIGN_GW_}
514
+ {__SV__} === [{_A_} * {_I_} * {_E_} * {_O_}]
515
+ {__LV__} === [{_AA_} * {_II_} * {_EE_} * {_OO_}]
516
+ \deploy serie {SERIE} {TENGWA} {_FOREIGN_LABIALIZED_DIACRITIC_} {__SV__} {__LV__} {NULL}
517
+
518
+ \** she **\
519
+ {SERIE} === [(シェ,she)]
520
+ {TENGWA} === SILME_NUQUERNA
521
+ {__SV__} === [{_E_}]
522
+ {__LV__} === [{_EE_}]
523
+ \deploy serie {SERIE} {TENGWA} {_PALATAL_} {__SV__} {__LV__} {NULL}
524
+
525
+
526
+ \** je **\
527
+ {SERIE} === [(ジェ,je)]
528
+ {TENGWA} === ESSE_NUQUERNA
529
+ {__SV__} === [{_E_}]
530
+ {__LV__} === [{_EE_}]
531
+ \deploy serie {SERIE} {TENGWA} {_PALATAL_} {__SV__} {__LV__} {NULL}
532
+
533
+ \** si **\
534
+ {SERIE} === [(スィ,si)]
535
+ {TENGWA} === SILME_NUQUERNA
536
+ {__SV__} === [{_I_}]
537
+ {__LV__} === [{_II_}]
538
+ \deploy serie {SERIE} {TENGWA} {_CANCELLER_} {__SV__} {__LV__} {NULL}
539
+
540
+ \** zi **\
541
+ {SERIE} === [(ズィ,zi)]
542
+ {TENGWA} === ESSE_NUQUERNA
543
+ {__SV__} === [{_I_}]
544
+ {__LV__} === [{_II_}]
545
+ \deploy serie {SERIE} {TENGWA} {_CANCELLER_} {__SV__} {__LV__} {NULL}
546
+
547
+ \** che **\
548
+ {SERIE} === [(チェ,che)]
549
+ {TENGWA} === TINCO
550
+ {__SV__} === [{_E_}]
551
+ {__LV__} === [{_EE_}]
552
+ \deploy serie {SERIE} {TENGWA} {_PALATAL_} {__SV__} {__LV__} {NULL}
553
+
554
+ \** tsa, tsi, tse, tso **\
555
+ {SERIE} === [(ツァ,tsa) * (ツィ,tsi) * (ツェ,tse) * (ツォ,tso)]
556
+ {TENGWA} === TINCO
557
+ {__SV__} === [{_A_} * {_I_} * {_E_} * {_O_}]
558
+ {__LV__} === [{_AA_} * {_II_} * {_EE_} * {_OO_}]
559
+ \deploy serie {SERIE} {TENGWA} {NULL} {__SV__} {__LV__} {_SIBILANT_}
560
+
561
+ \** tsyu = t sibilant + palatale + u **\
562
+ {SERIE} === [(ツュ,tsyu)]
563
+ {TENGWA} === TINCO
564
+ {__SV__} === [{_U_}]
565
+ {__LV__} === [{_UU_}]
566
+ \deploy serie {SERIE} {TENGWA} {_PALATAL_} {__SV__} {__LV__} {_SIBILANT_}
567
+
568
+
569
+ \** ti, tu **\
570
+ {SERIE} === [(ティ,ti) * (トゥ,tu)]
571
+ {TENGWA} === TINCO
572
+ {__SV__} === [{_I_} * {_U_}]
573
+ {__LV__} === [{_II_} * {_UU_}]
574
+ \deploy serie {SERIE} {TENGWA} {_CANCELLER_} {__SV__} {__LV__} {NULL}
575
+
576
+ \** tyu **\
577
+ {SERIE} === [(テュ,tyu)]
578
+ {TENGWA} === TINCO
579
+ {__SV__} === [{_U_}]
580
+ {__LV__} === [{_UU_}]
581
+ \deploy serie {SERIE} {TENGWA} {_CANCEL_THEN_PALATAL_} {__SV__} {__LV__} {NULL}
582
+
583
+ \** di, du **\
584
+ {SERIE} === [(ディ,di) * (ドゥ,du)]
585
+ {TENGWA} === ANDO
586
+ {__SV__} === [{_I_} * {_U_}]
587
+ {__LV__} === [{_II_} * {_UU_}]
588
+ \deploy serie {SERIE} {TENGWA} {_CANCELLER_} {__SV__} {__LV__} {NULL}
589
+
590
+ \** dyu **\
591
+ {SERIE} === [(デュ,dyu)]
592
+ {TENGWA} === ANDO
593
+ {__SV__} === [{_U_}]
594
+ {__LV__} === [{_UU_}]
595
+ \deploy serie {SERIE} {TENGWA} {_CANCEL_THEN_PALATAL_} {__SV__} {__LV__} {NULL}
596
+
597
+ \** nye **\
598
+ {SERIE} === [(ニェ,nye)]
599
+ {TENGWA} === NUMEN
600
+ {__SV__} === [{_E_}]
601
+ {__LV__} === [{_EE_}]
602
+ \deploy serie {SERIE} {TENGWA} {_PALATAL_} {__SV__} {__LV__} {NULL}
603
+
604
+ \** hye **\
605
+ {SERIE} === [(ヒェ,hye)]
606
+ {TENGWA} === {_H_TENGWA_}
607
+ {__SV__} === [{_E_}]
608
+ {__LV__} === [{_EE_}]
609
+ \deploy serie {SERIE} {TENGWA} {_PALATAL_} {__SV__} {__LV__} {NULL}
610
+
611
+ \** bye **\
612
+ {SERIE} === [(ビェ,bye)]
613
+ {TENGWA} === UMBAR
614
+ {__SV__} === [{_E_}]
615
+ {__LV__} === [{_EE_}]
616
+ \deploy serie {SERIE} {TENGWA} {_PALATAL_} {__SV__} {__LV__} {NULL}
617
+
618
+ \** pye **\
619
+ {SERIE} === [(ピェ,pye)]
620
+ {TENGWA} === {_P_TENGWA_}
621
+ {__SV__} === [{_E_}]
622
+ {__LV__} === [{_EE_}]
623
+ \deploy serie {SERIE} {TENGWA} {_PALATAL_} {__SV__} {__LV__} {NULL}
624
+
625
+ \** fa, fi, fu, fe, fo **\
626
+ {SERIE} === [(ファ,fa) * (フィ,fi) * (フェ,fe) * (フォ,fo)]
627
+ {TENGWA} === {_FOREIGN_F_}
628
+ {__SV__} === [{_A_} * {_I_} * {_E_} * {_O_}]
629
+ {__LV__} === [{_AA_} * {_II_} * {_EE_} * {_OO_}]
630
+ \deploy serie {SERIE} {TENGWA} {NULL} {__SV__} {__LV__} {NULL}
631
+
632
+ \** fya, fyu, fye, fyo **\
633
+ {SERIE} === [(フャ,fya) * (フュ,fyu) * (フィェ,fye) * (フョ,fyo)]
634
+ {TENGWA} === {_FOREIGN_F_}
635
+ {__SV__} === [{_A_} * {_U_} * {_E_} * {_O_}]
636
+ {__LV__} === [{_AA_} * {_UU_} * {_EE_} * {_OO_}]
637
+ \deploy serie {SERIE} {TENGWA} {_PALATAL_} {__SV__} {__LV__} {NULL}
638
+
639
+ \** hu **\
640
+ {SERIE} === [(ホゥ,hu)]
641
+ {TENGWA} === {_H_TENGWA_}
642
+ {__SV__} === [{_U_}]
643
+ {__LV__} === [{_UU_}]
644
+ \deploy serie {SERIE} {TENGWA} {_CANCELLER_} {__SV__} {__LV__} {NULL}
645
+
646
+ \** mye **\
647
+ {SERIE} === [(ミェ,mye)]
648
+ {TENGWA} === MALTA
649
+ {__SV__} === [{_E_}]
650
+ {__LV__} === [{_EE_}]
651
+ \deploy serie {SERIE} {TENGWA} {_PALATAL_} {__SV__} {__LV__} {NULL}
652
+
653
+ \** rye **\
654
+ {SERIE} === [(リェ,rye)]
655
+ {TENGWA} === ROMEN
656
+ {__SV__} === [{_E_}]
657
+ {__LV__} === [{_EE_}]
658
+ \deploy serie {SERIE} {TENGWA} {_PALATAL_} {__SV__} {__LV__} {NULL}
659
+
660
+ \** la, li, lu, le, lo / sons of the patriots **\
661
+ {SERIE} === [(ラ゜,la) * (リ゜,li) * (ル゜ ,lu) * (レ゜,le) * (ロ゜,lo)]
662
+ {TENGWA} === LAMBE
663
+ \deploy serie {SERIE} {TENGWA} {NULL} {_SHORT_VOWELS_} {_LONG_VOWELS_} {NULL}
664
+
665
+ \end
666
+
667
+ \beg rules punctuation
668
+ . --> PUNCT_DDOT
669
+ 。 --> PUNCT_DDOT
670
+ .. --> PUNCT_DOT PUNCT_DDOT PUNCT_DOT
671
+ … --> PUNCT_TILD
672
+ ‥ --> PUNCT_TILD \** JP tensen **\
673
+ ... --> PUNCT_TILD
674
+ .... --> PUNCT_TILD
675
+ ..... --> PUNCT_TILD
676
+ ...... --> PUNCT_TILD
677
+ ....... --> PUNCT_TILD
678
+
679
+ , --> PUNCT_DOT
680
+ 、 --> PUNCT_DOT
681
+ : --> PUNCT_DOT
682
+ ; --> PUNCT_DOT
683
+ ! --> PUNCT_EXCLAM
684
+ ? --> PUNCT_INTERR
685
+ · --> PUNCT_DOT
686
+ ・ --> {NULL} \** JP middle dot **\
687
+ ゠ --> {NULL} \** JP daburu haifun **\
688
+
689
+ \** Apostrophe **\
690
+
691
+ ' --> {NULL}
692
+ ’ --> {NULL}
693
+
694
+ \** NBSP **\
695
+ {NBSP} --> NBSP
696
+
697
+ \** Quotes **\
698
+
699
+ “ --> DQUOT_OPEN
700
+ ” --> DQUOT_CLOSE
701
+ « --> DQUOT_OPEN
702
+ » --> DQUOT_CLOSE
703
+
704
+ \** JP Quotes **\
705
+ 「 --> DQUOT_OPEN
706
+ 」 --> DQUOT_CLOSE
707
+
708
+ - --> {NULL}
709
+ – --> PUNCT_TILD
710
+ — --> PUNCT_TILD
711
+
712
+ 〜 --> PUNCT_TILD \** JP Tild **\
713
+ ~ --> PUNCT_TILD \** JP Tild **\
714
+
715
+ [ --> PUNCT_PAREN_L
716
+ ] --> PUNCT_PAREN_R
717
+ ( --> PUNCT_PAREN_L
718
+ ) --> PUNCT_PAREN_R
719
+ { --> PUNCT_PAREN_L
720
+ } --> PUNCT_PAREN_R
721
+ < --> PUNCT_PAREN_L
722
+ > --> PUNCT_PAREN_R
723
+
724
+ \** JP Paren **\
725
+ ( --> PUNCT_PAREN_L
726
+ ) --> PUNCT_PAREN_R
727
+ ([,〔,{,⦅,〈,《,【,〖,〘,〚) --> PUNCT_PAREN_L
728
+ (],〕,},⦆,〉,》,】,〗,〙,〛) --> PUNCT_PAREN_R
729
+
730
+ \** Not universal between fonts ... **\
731
+ $ --> ELVISH_PAREN
732
+ ≤ --> RING_MARK_L \** Ring inscription left beautiful stuff **\
733
+ ≥ --> RING_MARK_R \** Ring inscription right beautiful stuff **\
734
+
735
+ \end
736
+
737
+ \beg rules "numbers"
738
+ (〇,零) --> NUM_0
739
+ 一 --> NUM_1
740
+ 二 --> NUM_2
741
+ 三 --> NUM_3
742
+ 四 --> NUM_4
743
+ 五 --> NUM_5
744
+ 六 --> NUM_6
745
+ 七 --> NUM_7
746
+ 八 --> NUM_8
747
+ 九 --> NUM_9
748
+ 十 --> NUM_1 CIRC_TEHTA_INF
749
+ 百 --> NUM_2 CIRC_TEHTA_INF
750
+ 千 --> NUM_3 CIRC_TEHTA_INF
751
+ 万 --> NUM_4 CIRC_TEHTA_INF
752
+ 億 --> NUM_9 CIRC_TEHTA_INF
753
+ 0 --> NUM_0
754
+ 1 --> NUM_1
755
+ 2 --> NUM_2
756
+ 3 --> NUM_3
757
+ 4 --> NUM_4
758
+ 5 --> NUM_5
759
+ 6 --> NUM_6
760
+ 7 --> NUM_7
761
+ 8 --> NUM_8
762
+ 9 --> NUM_9
763
+ \end
764
+
765
+ \end
766
+
767
+
768
+
769
+ \beg postprocessor
770
+ \resolve_virtuals
771
+ \end