glaemscribe 1.2.0 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (33) hide show
  1. checksums.yaml +4 -4
  2. data/bin/glaemscribe +2 -2
  3. data/glaemresources/charsets/cirth_ds.cst +514 -179
  4. data/glaemresources/charsets/eldamar.cst +210 -0
  5. data/glaemresources/charsets/tengwar_ds_annatar.cst +2452 -130
  6. data/glaemresources/charsets/tengwar_ds_eldamar.cst +2319 -125
  7. data/glaemresources/charsets/tengwar_ds_elfica.cst +2317 -126
  8. data/glaemresources/charsets/tengwar_ds_parmaite.cst +2319 -127
  9. data/glaemresources/charsets/tengwar_ds_sindarin.cst +2318 -127
  10. data/glaemresources/charsets/tengwar_freemono.cst +1 -1
  11. data/glaemresources/charsets/tengwar_guni_annatar.cst +2451 -131
  12. data/glaemresources/charsets/tengwar_guni_eldamar.cst +2317 -126
  13. data/glaemresources/charsets/tengwar_guni_elfica.cst +2316 -127
  14. data/glaemresources/charsets/tengwar_guni_parmaite.cst +2319 -127
  15. data/glaemresources/charsets/tengwar_guni_sindarin.cst +2317 -126
  16. data/glaemresources/charsets/tengwar_telcontar.cst +7 -0
  17. data/glaemresources/modes/blackspeech-tengwar-general_use.glaem +1 -1
  18. data/glaemresources/modes/english-cirth-espeak.glaem +687 -0
  19. data/glaemresources/modes/english-tengwar-espeak.glaem +814 -0
  20. data/glaemresources/modes/japanese-tengwar.glaem +9 -4
  21. data/glaemresources/modes/lang_belta-tengwar-dadef.glaem +248 -0
  22. data/glaemresources/modes/raw-cirth.glaem +154 -0
  23. data/lib/api/charset_parser.rb +7 -1
  24. data/lib/api/mode.rb +35 -10
  25. data/lib/api/mode_parser.rb +21 -12
  26. data/lib/api/post_processor/outspace.rb +44 -0
  27. data/lib/api/rule_group.rb +1 -1
  28. data/lib/api/transcription_pre_post_processor.rb +8 -5
  29. data/lib/api/transcription_processor.rb +12 -9
  30. data/lib/glaemscribe.rb +2 -0
  31. data/lib_espeak/espeakng.for.glaemscribe.nowasm.sync.js +25 -11
  32. data/lib_espeak/glaemscribe_tts.js +363 -223
  33. metadata +12 -6
@@ -0,0 +1,814 @@
1
+ \**
2
+
3
+ Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ the transcription of texts between writing systems, and more
5
+ specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ invented languages to some of his devised writing systems.
7
+
8
+ Copyright (C) 2015 Benjamin Babut (Talagan).
9
+
10
+ This program is free software: you can redistribute it and/or modify
11
+ it under the terms of the GNU Affero General Public License as published by
12
+ the Free Software Foundation, either version 3 of the License, or
13
+ any later version.
14
+
15
+ This program is distributed in the hope that it will be useful,
16
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ GNU Affero General Public License for more details.
19
+
20
+ You should have received a copy of the GNU Affero General Public License
21
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ **\
24
+
25
+ \beg changelog
26
+ \entry "0.0.1" "First version."
27
+ \end
28
+
29
+ \language "English"
30
+ \writing "Tengwar"
31
+ \mode "English Tengwar - General Use"
32
+ \version "0.0.1"
33
+ \authors "J.R.R. Tolkien, impl. Talagan (Benjamin Babut), advis. Corchalad (Bertrand Bellet)"
34
+
35
+ \world primary_related_to_arda
36
+ \invention jrrt
37
+
38
+ \raw_mode "raw-tengwar"
39
+
40
+ \charset tengwar_ds_sindarin false
41
+ \charset tengwar_ds_parmaite false
42
+ \charset tengwar_ds_eldamar false
43
+ \charset tengwar_ds_annatar true
44
+ \charset tengwar_ds_elfica false
45
+
46
+ \charset tengwar_guni_sindarin false
47
+ \charset tengwar_guni_parmaite false
48
+ \charset tengwar_guni_eldamar false
49
+ \charset tengwar_guni_annatar false
50
+ \charset tengwar_guni_elfica false
51
+
52
+ \charset tengwar_freemono false
53
+ \charset tengwar_telcontar false
54
+
55
+ \beg options
56
+
57
+ \** ENGLISH accent/dialect/variant. It also controls espeak behaviour. **\
58
+ \beg option espeak_voice ESPEAK_VOICE_EN_TENGWAR
59
+ \value ESPEAK_VOICE_EN_TENGWAR 0
60
+ \value ESPEAK_VOICE_EN_TENGWAR_GB 1
61
+ \value ESPEAK_VOICE_EN_TENGWAR_RP 2
62
+ \value ESPEAK_VOICE_EN_TENGWAR_US 3
63
+ \end
64
+
65
+ \** 'the' word **\
66
+ \beg option english_the ENGLISH_THE_EXTENDED_TENGWAR
67
+ \value ENGLISH_THE_EXTENDED_TENGWAR 0
68
+ \value ENGLISH_THE_SEPARATE 1
69
+ \end
70
+
71
+ \** 'of' word **\
72
+ \beg option english_of ENGLISH_OF_EXTENDED_TENGWAR
73
+ \value ENGLISH_OF_EXTENDED_TENGWAR 0
74
+ \value ENGLISH_OF_SEPARATE 1
75
+ \end
76
+
77
+ \** 'to' word (the word 'to' may have its vowel reduced to a schwa) **\
78
+ \beg option schwa_of_to SCHWA_OF_TO_U
79
+ \value SCHWA_OF_TO_U 0
80
+ \value SCHWA_OF_TO_SCHWA 1
81
+ \end
82
+
83
+ \** 'wh' in old accents/US. Sometimes called 'wine/whine' merger. **\
84
+ \beg option ancient_voiceless_labiovelar_fricative_wh WH_VLVF_HWESTA_SINDARINWA
85
+ \value WH_VLVF_HWESTA_SINDARINWA 0
86
+ \value WH_VLVF_WHINE_MERGER 1
87
+ \end
88
+
89
+ \** SARINCE option when consonants are oriented left **\
90
+ \beg option s_consonants_l SCONSL_SARINCE_ALWAYS
91
+ \value SCONSL_SARINCE_NEVER 0
92
+ \value SCONSL_SARINCE_ALWAYS 1
93
+ \end
94
+
95
+ \** SARINCE option when consonants are oriented right **\
96
+ \beg option s_consonants_r SCONSR_SARINCE_END_OF_WORD
97
+ \value SCONSR_SARINCE_NEVER 0
98
+ \value SCONSR_SARINCE_ALWAYS 1
99
+ \value SCONSR_SARINCE_END_OF_WORD 2
100
+ \end
101
+
102
+ \** Re-establishment of linking r in non-rhotic accent, ex : 'better life' vs 'betteR answer' **\
103
+ \beg option linking_r true
104
+ \visible_when "espeak_voice == ESPEAK_VOICE_EN_TENGWAR_RP || espeak_voice == ESPEAK_VOICE_EN_TENGWAR_GB"
105
+ \end
106
+
107
+ \** Intrusive r, like in vanillaR ice **\
108
+ \beg option intrusive_r true
109
+ \visible_when "espeak_voice != ESPEAK_VOICE_EN_TENGWAR_US"
110
+ \end
111
+
112
+ \beg option pre_consonant_n_with_same_articulation_point PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK
113
+ \value PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_SEPARATE 0
114
+ \value PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK 1
115
+ \end
116
+
117
+ \** **\
118
+ \beg option pre_velar_n PRE_VELAR_N_ASSIMILABLE
119
+ \value PRE_VELAR_N_NON_ASSIMILABLE 0
120
+ \value PRE_VELAR_N_ASSIMILABLE 1
121
+ \end
122
+
123
+ \** Common elvish / tengwar option **\
124
+ \beg option consonant_modification_style CONSONANT_MODIFICATION_STYLE_WAVE
125
+ \value CONSONANT_MODIFICATION_STYLE_WAVE 0
126
+ \value CONSONANT_MODIFICATION_STYLE_BAR 1
127
+ \end
128
+
129
+ \** Long a like in 'palm' **\
130
+ \beg option long_back_a LONG_BACK_A_IMPLICIT_CARRIER
131
+ \radio
132
+ \value LONG_BACK_A_IMPLICIT_CARRIER 0
133
+ \value LONG_BACK_A_WITH_CARRIER 1
134
+ \end
135
+
136
+ \** DISABLED : it's always long **\
137
+ \beg option long_front_e LONG_FRONT_E_DOUBLE_TEHTA
138
+ \visible_when false
139
+ \radio
140
+ \value LONG_FRONT_E_DOUBLE_TEHTA 0
141
+ \value LONG_FRONT_E_WITH_CARRIER 1
142
+ \end
143
+
144
+ \** DISABLED : it's always long **\
145
+ \beg option long_back_e LONG_BACK_E_DOUBLE_TEHTA
146
+ \visible_when false
147
+ \radio
148
+ \value LONG_BACK_E_DOUBLE_TEHTA 0
149
+ \value LONG_BACK_E_WITH_CARRIER 1
150
+ \end
151
+
152
+ \** Long i like in 'fleece' **\
153
+ \beg option long_i LONG_I_DOUBLE_TEHTA
154
+ \radio
155
+ \value LONG_I_DOUBLE_TEHTA 0
156
+ \value LONG_I_WITH_CARRIER 1
157
+ \value LONG_I_AS_DIPHTONG 2
158
+ \end
159
+
160
+ \** long o like in 'thought' **\
161
+ \beg option long_o LONG_O_DOUBLE_TEHTA
162
+ \radio
163
+ \value LONG_O_DOUBLE_TEHTA 0
164
+ \value LONG_O_WITH_CARRIER 1
165
+ \end
166
+
167
+ \** long u like in 'goose' **\
168
+ \beg option long_u LONG_U_DOUBLE_TEHTA
169
+ \radio
170
+ \value LONG_U_DOUBLE_TEHTA 0
171
+ \value LONG_U_WITH_CARRIER 1
172
+ \value LONG_U_AS_DIPHTONG 2
173
+ \end
174
+
175
+ \** 'cure', 'cute' diphthong **\
176
+ \beg option ju_diphthong JU_DIPHTHONG_SEPARATE
177
+ \radio
178
+ \value JU_DIPHTHONG_SEPARATE 0
179
+ \value JU_DIPHTHONG_LIKE_IW 1
180
+ \end
181
+
182
+ \** Horse / Hoarse vowel distinction (only JRRT/US accents) **\
183
+ \beg option horse_hoarse_merger HORSE_HOARSE_SEPARATE
184
+ \visible_when "espeak_voice == ESPEAK_VOICE_EN_TENGWAR || espeak_voice == ESPEAK_VOICE_EN_TENGWAR_US"
185
+ \value HORSE_HOARSE_MERGE 0
186
+ \value HORSE_HOARSE_SEPARATE 1
187
+ \end
188
+
189
+ \** Cot / Coat vowel distinction, all accents **\
190
+ \beg option cot_coat_merger COT_COAT_SEPARATE
191
+ \value COT_COAT_MERGE 0
192
+ \value COT_COAT_SEPARATE 1
193
+ \visible_when "espeak_voice == ESPEAK_VOICE_EN_TENGWAR_US"
194
+ \end
195
+
196
+ \** Remove unuseful, natural schwa marks **\
197
+ \beg option implicit_schwa IMPLICIT_SCHWA_NO
198
+ \value IMPLICIT_SCHWA_NO 0
199
+ \value IMPLICIT_SCHWA_YES 1
200
+ \end
201
+
202
+ \** when implicit schwa is on, how to mark non-reducible schwas **\
203
+ \beg option implicit_schwa_non_reducible IMPLICIT_SCHWA_NON_REDUCIBLE_UNUTIXE_IF_POSSIBLE
204
+ \value IMPLICIT_SCHWA_NON_REDUCIBLE_UNUTIXE_IF_POSSIBLE 0
205
+ \value IMPLICIT_SCHWA_NON_REDUCIBLE_ALWAYS_TELCO 1
206
+ \visible_when "implicit_schwa == IMPLICIT_SCHWA_YES"
207
+ \end
208
+
209
+ \** Schwi, in US/JRRT **\
210
+ \beg option schwi SCHWI_LIKE_I
211
+ \radio
212
+ \value SCHWI_LIKE_I 0
213
+ \value SCHWI_LIKE_SCHWA 1
214
+ \visible_when "espeak_voice == ESPEAK_VOICE_EN_TENGWAR || espeak_voice == ESPEAK_VOICE_EN_TENGWAR_US"
215
+ \end
216
+
217
+ \** 'strut' vowel special case **\
218
+ \beg option open_mid_back_unrounded OMBU_THINNAS
219
+ \radio
220
+ \value OMBU_THINNAS 0
221
+ \value OMBU_GRAVE 1
222
+ \value OMBU_LIKE_SCHWA 2
223
+ \end
224
+
225
+ \** Common elvish / tengwar option **\
226
+ \beg option reverse_o_u_tehtar U_UP_O_DOWN
227
+ \value O_UP_U_DOWN 1
228
+ \value U_UP_O_DOWN 2
229
+ \end
230
+
231
+ \** Use english standard by default **\
232
+ \option reverse_numbers false
233
+ \beg option numbers_base BASE_10
234
+ \value BASE_10 10
235
+ \value BASE_12 12
236
+ \end
237
+
238
+ \option auto_spacing true
239
+
240
+ \end
241
+
242
+ \beg preprocessor
243
+ \downcase
244
+
245
+ \** Remove phonetics accentuation marks **\
246
+ \rxsubstitute "[ˈˌ]" ""
247
+
248
+ \** foreign words nasal a, split to "an" (ex: croissant) **\
249
+ \rxsubstitute "ɑ̃" "ɑn"
250
+
251
+ \** Non rhotic schwa simplification **\
252
+ \rxsubstitute "ɐ" "ə"
253
+
254
+ \if linking_r
255
+ \rxsubstitute "ɹ‿" "ɹ"
256
+ \else
257
+ \rxsubstitute "ɹ‿" ""
258
+ \endif
259
+
260
+ \if intrusive_r
261
+ \rxsubstitute "ɹ̩‿" "ɹ"
262
+ \else
263
+ \rxsubstitute "ɹ̩‿" ""
264
+ \endif
265
+
266
+ \if "schwa_of_to == SCHWA_OF_TO_U"
267
+ \substitute "ʊ̟" "ʊ"
268
+ \else
269
+ \substitute "ʊ̟" "ə"
270
+ \endif
271
+
272
+ \if "pre_velar_n == PRE_VELAR_N_ASSIMILABLE"
273
+ \rxsubstitute "n‿" "ŋ"
274
+ \else
275
+ \rxsubstitute "n‿" "n"
276
+ \endif
277
+
278
+ \** IMPORTANT NOTE : in all following regexps **\
279
+ \** (^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃]) stands for 'word boundary' **\
280
+
281
+ \** 'the' variations **\
282
+ \** that the **\
283
+ \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])(ð[aæ]t)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
284
+
285
+ \** of the **\
286
+ \if "english_the == ENGLISH_THE_EXTENDED_TENGWAR && english_of == ENGLISH_OF_EXTENDED_TENGWAR"
287
+ \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])([ɒʌ]v)ð([əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1OFTH\\3\\4"
288
+ \else
289
+ \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])([ɒʌ]v)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
290
+ \endif
291
+
292
+ \** for the **\
293
+ \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])(f[ɚə])(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
294
+ \** with the **\
295
+ \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])(wɪð)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
296
+ \** in the **\
297
+ \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])(ɪn)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
298
+ \** on the **\
299
+ \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])([ɒɔ]n)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
300
+ \** from the **\
301
+ \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])(fɹʌm)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
302
+ \** was the **\
303
+ \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])(wʌz)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
304
+
305
+ \** Beware of the order of COT/COAT merger and horse/hoarse merger **\
306
+ \if "cot_coat_merger == COT_COAT_MERGE && espeak_voice == ESPEAK_VOICE_EN_TENGWAR_US"
307
+ \substitute "oʊ" "ɑː"
308
+ \endif
309
+
310
+ \if "horse_hoarse_merger == HORSE_HOARSE_SEPARATE"
311
+ \** Re-establish former diphtong **\
312
+ \substitute "oːɹ" "oʊɹ"
313
+ \endif
314
+
315
+
316
+ \** If treated as diphthong, change long i to i + schwi **\
317
+ \if "long_i == LONG_I_AS_DIPHTONG"
318
+ \substitute "iː" "iɪ"
319
+ \endif
320
+
321
+ \** Experimental, don't affect ju: at beginning of words/after consonnant **\
322
+ \if "ju_diphthong == JU_DIPHTHONG_LIKE_IW"
323
+ \rxsubstitute "(juː|jʊ)" "iw"
324
+ \endif
325
+
326
+ \if "long_u == LONG_U_AS_DIPHTONG"
327
+ \substitute "uː" "uʊ"
328
+ \endif
329
+
330
+ \** ! Beware of the order of the following rules **\
331
+ \** ! Rhotic schwa : remove 1 level of length when superfluous and always add explicit mark **\
332
+ \rxsubstitute "[ɜɚ]ː?" "ɜɹ"
333
+
334
+ \** ! Potentially remove superfluous added rhotic marks **\
335
+ \rxsubstitute "ɹ+" "ɹ"
336
+
337
+ \** ! Disambiguate ɹ + vowel : ORE/ROMEN **\
338
+ \rxsubstitute "ɹ([ɑæaeɛʌɐəɜɚiɪᵻoɒɔuʊʘ])" "r\\1"
339
+
340
+ \if "implicit_schwa == IMPLICIT_SCHWA_YES"
341
+ \** All schwas at beginning of words cannot reduce **\
342
+ \** or after vowels (== not consonant) **\
343
+ \** beware of ɪ as it can appear as consonant (lawyer) **\
344
+ \** same for ʊ for sour **\
345
+ \** Mark non reducing schwa as ʘ **\
346
+ \rxsubstitute "(^|[^bcdfghjklmnpqrstvwxyzðŋɡɣɹɾʃʍʒʔθɪʊ])([əɐɜɚ])" "\\1ʘ"
347
+ \rxsubstitute "([əɐɜɚ])r" "ʘr"
348
+
349
+ \if "schwi == SCHWI_LIKE_SCHWA"
350
+ \** Don't forget to mark schwis too **\
351
+ \rxsubstitute "(^|[^bcdfghjklmnpqrstvwxyzðŋɡɣɹɾʃʍʒʔθɪʊ])([ᵻ])" "\\1ʘ"
352
+ \rxsubstitute "ᵻr" "ʘr"
353
+ \endif
354
+
355
+ \if "open_mid_back_unrounded == OMBU_LIKE_SCHWA"
356
+ \** Don't forget to mark the ombus too **\
357
+ \rxsubstitute "(^|[^bcdfghjklmnpqrstvwxyzðŋɡɣɹɾʃʍʒʔθɪʊ])([ʌ])" "\\1ʘ"
358
+ \rxsubstitute "ʌr" "ʘr"
359
+ \endif
360
+ \endif
361
+
362
+ \elvish_numbers "\\eval numbers_base" "\\eval reverse_numbers"
363
+
364
+ \if "auto_spacing == true"
365
+ \rxsubstitute "([^\\s])([.,;:!?])" "\\1 \\2"
366
+ \rxsubstitute "([.,;:!?])([^\\s])" "\\1 \\2"
367
+ \endif
368
+ \end
369
+
370
+
371
+ \beg processor
372
+
373
+ \beg rules litteral
374
+
375
+ \if "consonant_modification_style == CONSONANT_MODIFICATION_STYLE_WAVE"
376
+ {GEMINATE} === GEMINATE_SIGN_TILD
377
+ {NASAL} === NASALIZE_SIGN_TILD
378
+ \else
379
+ {GEMINATE} === GEMINATE_SIGN
380
+ {NASAL} === NASALIZE_SIGN
381
+ \endif
382
+
383
+ \** sa-rinci for left-oriented tengwar **\
384
+ \if "s_consonants_l == SCONSL_SARINCE_ALWAYS"
385
+ {LWS} === [{NULL} * (s,z)]
386
+ {_LWS_} === [{NULL} * SARINCE]
387
+ {__LWSX__} === 2,1,3
388
+ \else
389
+ {LWS} === {NULL}
390
+ {_LWS_} === {NULL}
391
+ {__LWSX__} === 2,1
392
+ \endif
393
+
394
+ \** sa-rinci for right-oriented tengwar **\
395
+ \if "s_consonants_r == SCONSR_SARINCE_ALWAYS"
396
+ {RWS} === [{NULL} * (s,z)]
397
+ {_RWS_} === [{NULL} * SARINCE]
398
+ {__RWSX__} === 2,1,3
399
+ \elsif "s_consonants_r == SCONSR_SARINCE_END_OF_WORD"
400
+ {RWS} === [{NULL} * (s_,z_)]
401
+ {_RWS_} === [{NULL} * SARINCE]
402
+ {__RWSX__} === 2,1,3
403
+ \else
404
+ {RWS} === {NULL}
405
+ {_RWS_} === {NULL}
406
+ {__RWSX__} === 2,1
407
+ \endif
408
+
409
+ \if "reverse_o_u_tehtar == U_UP_O_DOWN"
410
+ {O_LOOP} === O_TEHTA
411
+ {O_LOOP_DOUBLE} === O_TEHTA_DOUBLE
412
+ {U_LOOP} === U_TEHTA
413
+ {U_LOOP_DOUBLE} === U_TEHTA_DOUBLE
414
+ \else
415
+ {O_LOOP} === U_TEHTA
416
+ {O_LOOP_DOUBLE} === U_TEHTA_DOUBLE
417
+ {U_LOOP} === O_TEHTA
418
+ {U_LOOP_DOUBLE} === O_TEHTA_DOUBLE
419
+ \endif
420
+
421
+ \** schwas : ɐ,ə **\
422
+ \** rothic shwa : ɚ **\
423
+ \** schwi : ᵻ **\
424
+ \** schwu : ʌ **\
425
+
426
+ {IGROUP} === i,ɪ
427
+ {UGROUP} === u,ʊ
428
+ {EBGROUP} === ə,ɐ \** REDUCIBLE **\
429
+
430
+ {SCHWA_NON_REDUCIBLE} === ʘ \** NON REDUCIBLE **\
431
+ {ESCHWA} === (ə,ʘ) \** REDUCIBLE & NON REDUCIBLE E SCHWA **\
432
+
433
+ \if "schwi == SCHWI_LIKE_I"
434
+ {IGROUP} === {IGROUP},ᵻ
435
+ \else
436
+ {EBGROUP} === {EBGROUP},ᵻ
437
+ \endif
438
+
439
+ {W_OMBU_GROUP} === {NULL}
440
+ {_W_OMBU_GROUP_} === {NULL}
441
+ \if "open_mid_back_unrounded == OMBU_GRAVE"
442
+ {W_OMBU_GROUP} === * (ʌ)
443
+ {_W_OMBU_GROUP_} === * E_TEHTA_GRAVE
444
+ \elsif "open_mid_back_unrounded == OMBU_THINNAS"
445
+ {W_OMBU_GROUP} === * (ʌ)
446
+ {_W_OMBU_GROUP_} === * THINNAS
447
+ \else
448
+ {EBGROUP} === {EBGROUP},ʌ
449
+ \endif
450
+
451
+ {A_FRONT} === (æ,a) \** Always short **\
452
+ {A_BACK} === (ɑ) \** Always long **\
453
+ {E_FRONT} === (e,ɛ)
454
+ {E_BACK} === ({EBGROUP})
455
+ {E_BACK_RHOTIC} === (ɚ,ɜ) \** Rhotic schwas are treated independently **\
456
+ {I} === ({IGROUP})
457
+ {O} === (o,ɒ,ɔ) \** force, mock, lord **\
458
+ {U} === ({UGROUP})
459
+
460
+
461
+ {AA_FRONT} === {A_FRONT}ː \** long front a probably does not exist **\
462
+ {AA_BACK} === {A_BACK}ː
463
+ {EE_FRONT} === {E_FRONT}ː
464
+ {EE_BACK} === {E_BACK}ː \** long back e probably does not exist when not rhotic **\
465
+ {EE_BACK_RHOTIC} === {E_BACK_RHOTIC}ː
466
+ {II} === {I}ː
467
+ {OO} === {O}ː
468
+ {UU} === {U}ː
469
+
470
+ {W_SCHWA_NON_REDUCIBLE} === {NULL}
471
+ {_W_SCHWA_NON_REDUCIBLE_} === {NULL}
472
+
473
+ \if "implicit_schwa == IMPLICIT_SCHWA_YES"
474
+ {_IMPLICIT_SCHWA_} === {NULL}
475
+ \if "implicit_schwa_non_reducible == IMPLICIT_SCHWA_NON_REDUCIBLE_UNUTIXE_IF_POSSIBLE"
476
+ {W_SCHWA_NON_REDUCIBLE} === * {SCHWA_NON_REDUCIBLE}
477
+ {_W_SCHWA_NON_REDUCIBLE_} === * UNUTIXE
478
+ \endif
479
+ \else
480
+ {_IMPLICIT_SCHWA_} === UNUTIXE
481
+ \endif
482
+
483
+ \** GB DIPHTONGS **\
484
+ \** +dˈeɪ +skˈaɪ +bˈɔɪ +bˈiə +bˈeə +tˈʊə +ɡˌəʊ +kˈaʊ **\
485
+ \** US DIPHTONGS **\
486
+ \** =dˈeɪ =skˈaɪ =bˈɔɪ -bˈɪɹ -bˈɛɹ -tˈʊɹ +ɡˌoʊ =kˈaʊ **\
487
+
488
+ \** U Diphthongs **\
489
+ {AW} === aʊ \** cow **\
490
+ {OW} === oʊ \** US most / mˈoʊst **\
491
+ {EW} === {ESCHWA}ʊ \** GB go **\
492
+ {UW} === uʊ \** goose if pronconced with labializing accent ... we don't have this in our pronunciations **\
493
+
494
+ \** I Diphtongues : eɪ (day) / aɪ (sky) / ɔɪ (boy) **\
495
+ {AJ} === aɪ \** nine / nˈaɪn **\
496
+ {EJ} === eɪ \** game / ɡˈeɪm **\
497
+ {OJ} === ɔɪ \** boy **\
498
+ {IJ} === iɪ \** fleece if prononced with palatalising accent **\
499
+
500
+ \** ə diphthongs : iə (GB : beer) / eə (GB: bear) / ʊə (US: tour) **\
501
+ {IER} === i{ESCHWA} \** GB Beer **\
502
+ {EAR} === e{ESCHWA} \** GB Bear **\
503
+ {UER} === ʊ{ESCHWA} \** GB Tour **\
504
+
505
+ {VOWELS} === {A_BACK} * {A_FRONT} * {E_FRONT} * {E_BACK} * {E_BACK_RHOTIC} * {IER} * {EAR} * {UER} * {I} * {O} * {U} {W_SCHWA_NON_REDUCIBLE} {W_OMBU_GROUP}
506
+ {TEHTAR} === A_TEHTA * A_TEHTA_REVERSED * E_TEHTA * {_IMPLICIT_SCHWA_} * {_IMPLICIT_SCHWA_} * UNUTIXE I_TEHTA * UNUTIXE E_TEHTA * UNUTIXE {U_LOOP} * I_TEHTA * {O_LOOP} * {U_LOOP} {_W_SCHWA_NON_REDUCIBLE_} {_W_OMBU_GROUP_}
507
+
508
+ {LVOWELS} === {AA_BACK} * {AA_FRONT} * {EE_FRONT} * {EE_BACK} * {EE_BACK_RHOTIC} * {II} * {OO} * {UU}
509
+
510
+ {DIPHTHONGS_R} === {AW} * {OW} * {EW} * {UW} * {AJ} * {EJ} * {OJ} * {IJ}
511
+ {_DIPHTHONGS_R_} === VALA A_TEHTA * VALA {O_LOOP} * VALA UNUTIXE * VALA {U_LOOP} * ANNA A_TEHTA * ANNA E_TEHTA * ANNA {O_LOOP} * ANNA I_TEHTA
512
+
513
+ {DIPHTHONGS} === {DIPHTHONGS_R}
514
+ {_DIPHTHONGS_} === {_DIPHTHONGS_R_}
515
+
516
+ {WLONG} === {NULL} \** long vowels that can be used as tehtar **\
517
+ {_WLONG_} === {NULL} \** tehtar of long vowels that can be used as tehtar **\
518
+
519
+ \** LV : Initialization step 1 **\
520
+ {_LONG_A_BACK_} === ARA A_TEHTA
521
+ {_LONG_A_FRONT_} === ARA A_TEHTA_REVERSED \** Should not be possible in English **\
522
+ {_LONG_E_FRONT_} === ARA E_TEHTA
523
+ {_LONG_E_BACK_} === ARA UNUTIXE \** PROBLEM (solved) : ara and unutixe don't work together. But this case will not appear : long back e is not possible when not rhotic. **\
524
+ {_LONG_E_BACK_RHOTIC_} === ARA UNUTIXE \** PROBLEM (solved) : ara and unutixe don't work together. But this case will not appear : simplified by prepro **\
525
+ {_LONG_I_} === ARA I_TEHTA
526
+ {_LONG_O_} === ARA {O_LOOP}
527
+ {_LONG_U_} === ARA {U_LOOP}
528
+
529
+ \** LV : Initialization step 2 **\
530
+ {_LONE_LONG_A_BACK_} === {_LONG_A_BACK_}
531
+ {_LONE_LONG_A_FRONT_} === {_LONG_A_FRONT_}
532
+ {_LONE_LONG_E_FRONT_} === {_LONG_E_FRONT_}
533
+ {_LONE_LONG_E_BACK_} === {_LONG_E_BACK_}
534
+ {_LONE_LONG_E_BACK_RHOTIC_} === {_LONG_E_BACK_RHOTIC_}
535
+ {_LONE_LONG_I_} === {_LONG_I_}
536
+ {_LONE_LONG_O_} === {_LONG_O_}
537
+ {_LONE_LONG_U_} === {_LONG_U_}
538
+
539
+ \if "long_back_a == LONG_BACK_A_IMPLICIT_CARRIER"
540
+ \** Remove carrier and use A_TEHTA as if it was a double tehta **\
541
+ {_LONG_A_BACK_} === A_TEHTA
542
+ {_LONE_LONG_E_FRONT_} === TELCO {_LONG_A_BACK_}
543
+ {WLONG} === {WLONG} * {AA_BACK}
544
+ {_WLONG_} === {_WLONG_} * {_LONG_A_BACK_}
545
+ \endif
546
+
547
+ \if "long_front_e == LONG_FRONT_E_DOUBLE_TEHTA"
548
+ \** Does not exist in standard accents **\
549
+ {_LONG_E_FRONT_} === E_TEHTA_DOUBLE
550
+ {_LONE_LONG_E_FRONT_} === TELCO {_LONG_E_FRONT_}
551
+ {WLONG} === {WLONG} * {EE_FRONT}
552
+ {_WLONG_} === {_WLONG_} * {_LONG_E_FRONT_}
553
+ \endif
554
+
555
+ \if "long_back_e == LONG_BACK_E_DOUBLE_TEHTA"
556
+ \** This case should not be possible when not rhotic. **\
557
+ {_LONG_E_BACK_} === I_TEHTA_DOUBLE_INF
558
+ {_LONE_LONG_E_BACK_} === TELCO {_LONG_E_BACK_}
559
+ {WLONG} === {WLONG} * {EE_BACK}
560
+ {_WLONG_} === {_WLONG_} * {_LONG_E_BACK_}
561
+ \endif
562
+
563
+ \if "long_i == LONG_I_DOUBLE_TEHTA"
564
+ {_LONG_I_} === I_TEHTA_DOUBLE
565
+ {_LONE_LONG_I_} === TELCO {_LONG_I_}
566
+ {WLONG} === {WLONG} * {II}
567
+ {_WLONG_} === {_WLONG_} * {_LONG_I_}
568
+ \endif
569
+
570
+ \if "long_o == LONG_O_DOUBLE_TEHTA"
571
+ {_LONG_O_} === {O_LOOP_DOUBLE}
572
+ {_LONE_LONG_O_} === TELCO {_LONG_O_}
573
+ {WLONG} === {WLONG} * {OO}
574
+ {_WLONG_} === {_WLONG_} * {_LONG_O_}
575
+ \endif
576
+
577
+ \if "long_u == LONG_U_DOUBLE_TEHTA"
578
+ {_LONG_U_} === {U_LOOP_DOUBLE}
579
+ {_LONE_LONG_U_} === TELCO {_LONG_U_}
580
+ {WLONG} === {WLONG} * {UU}
581
+ {_WLONG_} === {_WLONG_} * {_LONG_U_}
582
+ \endif
583
+
584
+ \** Define a variable for the images of all long vowels **\
585
+ {_LONE_LONG_VOWELS_} === {_LONE_LONG_A_BACK_} * {_LONE_LONG_A_FRONT_} * {_LONE_LONG_E_FRONT_} * {_LONE_LONG_E_BACK_} * {_LONE_LONG_E_BACK_RHOTIC_} * {_LONE_LONG_I_} * {_LONE_LONG_O_} * {_LONE_LONG_U_}
586
+
587
+ {V_D} === [ {VOWELS} {WLONG} ]
588
+ {V_D_WN} === [ {VOWELS} {WLONG} * {NULL} ]
589
+
590
+ {_V_D_} === [ {TEHTAR} {_WLONG_} ]
591
+ {_V_D_WN_} === [ {TEHTAR} {_WLONG_} * {NULL} ]
592
+
593
+ \** Vowel rules **\
594
+ [{VOWELS}] --> TELCO [{TEHTAR}] \** Replace isolated short vowels **\
595
+ [{DIPHTHONGS_R}]{RWS} --> [{_DIPHTHONGS_R_}]{_RWS_} \** Replace diphthongs **\
596
+
597
+ \if "implicit_schwa_non_reducible == IMPLICIT_SCHWA_NON_REDUCIBLE_ALWAYS_TELCO"
598
+ ʘ --> TELCO
599
+ \endif
600
+
601
+ \** LONE LONG VOWELS **\
602
+ [{LVOWELS}] --> [{_LONE_LONG_VOWELS_}]
603
+
604
+ {_WH_} === HWESTA_SINDARINWA
605
+ \if "ancient_voiceless_labiovelar_fricative_wh == WH_VLVF_WHINE_MERGER"
606
+ {_WH_} === VALA
607
+ \endif
608
+
609
+ \beg macro serie_l ARG_SL _ARG_SL_
610
+ {V_D_WN}[{ARG_SL}]{LWS} --> {__LWSX__} --> [{_ARG_SL_}]{_V_D_WN_}{_LWS_}
611
+ \end
612
+ \beg macro serie_ln ARG_SLN _ARG_SLN_
613
+ {V_D_WN}[{ARG_SLN}]{LWS} --> {__LWSX__} --> [{_ARG_SLN_}]{NASAL}{_V_D_WN_}{_LWS_}
614
+ \end
615
+ \beg macro serie_r ARG_SR _ARG_SR_
616
+ {V_D_WN}[{ARG_SR}]{RWS} --> {__RWSX__} --> [{_ARG_SR_}]{_V_D_WN_}{_RWS_}
617
+ \end
618
+ \beg macro serie_rn ARG_SRN _ARG_SRN_
619
+ {V_D_WN}[{ARG_SRN}]{RWS} --> {__RWSX__} --> [{_ARG_SRN_}]{NASAL}{_V_D_WN_}{_RWS_}
620
+ \end
621
+
622
+ \** Nasal + Conditional macro **\
623
+ \beg macro serie_lnc ARG_SLN_COND _ARG_SLN_COND_
624
+ \if "pre_consonant_n_with_same_articulation_point == PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK"
625
+ \deploy serie_ln {ARG_SLN_COND} {_ARG_SLN_COND_}
626
+ \endif
627
+ \end
628
+
629
+ \** Nasal + Conditional macro **\
630
+ \beg macro serie_rnc ARG_SRN_COND _ARG_SRN_COND_
631
+ \if "pre_consonant_n_with_same_articulation_point == PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK"
632
+ \deploy serie_rn {ARG_SRN_COND} {_ARG_SRN_COND_}
633
+ \endif
634
+ \end
635
+
636
+ \beg macro serie ARG_L ARG_R ARG_LN ARG_RN _ARG_L_ _ARG_R_
637
+ \deploy serie_l {ARG_L} {_ARG_L_}
638
+ \deploy serie_r {ARG_R} {_ARG_R_}
639
+ \deploy serie_lnc {ARG_LN} {_ARG_L_}
640
+ \deploy serie_rnc {ARG_RN} {_ARG_R_}
641
+ \end
642
+
643
+ \** ----------------------------------------------------------- **\
644
+ {L1R} === (t,ɾ,ʔ) * p
645
+ {L1L} === tʃ * k
646
+ {L1R_NASAL} === n(t,ɾ,ʔ) * mp
647
+ {L1L_NASAL} === ntʃ * ŋk
648
+ {_L1R_} === TINCO * PARMA
649
+ {_L1L_} === CALMA * QUESSE
650
+
651
+ \deploy serie {L1L} {L1R} {L1L_NASAL} {L1R_NASAL} {_L1L_} {_L1R_}
652
+
653
+ \** ----------------------------------------------------------- **\
654
+ {L2R} === d * b
655
+ {L2L} === dʒ * (ɡ,g)
656
+ {L2R_NASAL} === nd * mb
657
+ {L2L_NASAL} === ndʒ * ŋ(ɡ,g)
658
+ {_L2R_} === ANDO * UMBAR
659
+ {_L2L_} === ANGA * UNGWE
660
+
661
+ \deploy serie {L2L} {L2R} {L2L_NASAL} {L2R_NASAL} {_L2L_} {_L2R_}
662
+
663
+ \** ----------------------------------------------------------- **\
664
+ {L3R} === θ * f * ʃ * x
665
+ {L3R_NASAL} === nθ * mf * nʃ * ŋx
666
+ {_L3R_} === SULE * FORMEN * AHA * HWESTA
667
+
668
+ \deploy serie_r {L3R} {_L3R_}
669
+ \deploy serie_rnc {L3R_NASAL} {_L3R_}
670
+
671
+ \** ----------------------------------------------------------- **\
672
+ {L4R} === ð * v * ʒ * ɣ
673
+ {L4R_NASAL} === nð * mv * nʒ * ŋɣ
674
+ {_L4R_} === ANTO * AMPA * ANCA * UNQUE
675
+
676
+ \deploy serie_r {L4R} {_L4R_}
677
+ \deploy serie_rnc {L4R_NASAL} {_L4R_}
678
+
679
+ \** ----------------------------------------------------------- **\
680
+ {L5R} === (n,n̩) * m * n(j,J) * ŋ
681
+ {_L5R_} === NUMEN * MALTA * NOLDO * NWALME
682
+
683
+ \** no nasals for this serie **\
684
+ \deploy serie_r {L5R} {_L5R_}
685
+
686
+ \** ----------------------------------------------------------- **\
687
+ {L6R} === w
688
+ {L6R_NASAL} === nw
689
+ {_L6R_} === VALA
690
+
691
+ {L6R_NN} === (j,J)
692
+ {_L6R_NN_} === ANNA \** ORE for rhoticized schwas **\
693
+
694
+ \deploy serie_r {L6R} {_L6R_}
695
+ \deploy serie_r {L6R_NN} {_L6R_NN_}
696
+ \deploy serie_rnc {L6R_NASAL} {_L6R_}
697
+
698
+ \** ----------------------------------------------------------- **\
699
+ {L7R} === r * ɹ * l
700
+ {_L7R_} === ROMEN * ORE * LAMBE \** ARDA / ALDA **\
701
+
702
+ \deploy serie_r {L7R} {_L7R_}
703
+
704
+ \** ----------------------------------------------------------- **\
705
+ {L8} === s * z
706
+ {L8_NASAL} === ns * nz
707
+ {_L8_} === SILME_NUQUERNA * ESSE_NUQUERNA
708
+
709
+ {V_D_WN}[{L8}] --> 2,1 --> [{_L8_}]{_V_D_WN_}
710
+ \if "s_consonants_r != SCONSR_SARINCE_ALWAYS && pre_consonant_n_with_same_articulation_point == PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK"
711
+ \** Avoid clash between nasal sign and sa rince **\
712
+ {V_D_WN}[{L8_NASAL}] --> 2,1 --> [{_L8_}]{NASAL}{_V_D_WN_}
713
+ \endif
714
+
715
+ \** Single s/z : overload **\
716
+ s --> SILME
717
+ z --> ESSE
718
+
719
+ ns --> SILME_NUQUERNA {NASAL} \** Explicitly redefined for clarity (already defined in the nasal rule above) **\
720
+ nz --> ESSE_NUQUERNA {NASAL} \** Explicitly redefined for clarity (already defined in the nasal rule above) **\
721
+
722
+ \** ----------------------------------------------------------- **\
723
+ {L9} === h * ʍ
724
+ {_L9_} === HYARMEN * {_WH_} \** YANTA / URE **\
725
+
726
+ {V_D_WN}[{L9}] --> 2,1 --> [{_L9_}]{_V_D_WN_}
727
+
728
+ \** -- SPECIAL TOKENS **\
729
+
730
+ \if "english_the == ENGLISH_THE_EXTENDED_TENGWAR"
731
+ _ð{ESCHWA}_ --> TW_EXT_21
732
+ _ðɪ_ --> TW_EXT_21 I_TEHTA
733
+ \endif
734
+
735
+ \if "english_of == ENGLISH_OF_EXTENDED_TENGWAR"
736
+ _(ɒ,ʌ)v_ --> TW_EXT_22
737
+ \endif
738
+
739
+ \if "english_the == ENGLISH_THE_EXTENDED_TENGWAR && english_of == ENGLISH_OF_EXTENDED_TENGWAR"
740
+ _OFTH{ESCHWA}_ --> TW_EXT_22 {GEMINATE}
741
+ _OFTHɪ_ --> TW_EXT_22 {GEMINATE} I_TEHTA
742
+ \endif
743
+ \end
744
+
745
+ \beg rules punctuation
746
+ . --> PUNCT_DDOT
747
+ .. --> PUNCT_DOT PUNCT_DDOT PUNCT_DOT
748
+ ... --> PUNCT_TILD
749
+ … --> PUNCT_TILD
750
+ .... --> PUNCT_TILD
751
+ ..... --> PUNCT_TILD
752
+ ...... --> PUNCT_TILD
753
+ ....... --> PUNCT_TILD
754
+
755
+ , --> PUNCT_DOT
756
+ : --> PUNCT_DOT
757
+ ; --> PUNCT_DOT
758
+ ! --> PUNCT_EXCLAM
759
+ ? --> PUNCT_INTERR
760
+ · --> {NULL}
761
+
762
+ - --> {NULL}
763
+ – --> PUNCT_TILD
764
+ — --> PUNCT_TILD
765
+
766
+ \** Apostrophe **\
767
+
768
+ ' --> {NULL}
769
+ ’ --> {NULL}
770
+
771
+ \** NBSP **\
772
+ {NBSP} --> NBSP
773
+
774
+ \** Quotes **\
775
+
776
+ “ --> DQUOT_OPEN
777
+ ” --> DQUOT_CLOSE
778
+ « --> DQUOT_OPEN
779
+ » --> DQUOT_CLOSE
780
+
781
+ [ --> PUNCT_PAREN_L
782
+ ] --> PUNCT_PAREN_R
783
+ ( --> PUNCT_PAREN_L
784
+ ) --> PUNCT_PAREN_R
785
+ { --> PUNCT_PAREN_L
786
+ } --> PUNCT_PAREN_R
787
+ < --> PUNCT_PAREN_L
788
+ > --> PUNCT_PAREN_R
789
+
790
+ \** Not universal between fonts ... **\
791
+ $ --> BOOKMARK_SIGN
792
+ ≤ --> RING_MARK_L \** Ring inscription left beautiful stuff **\
793
+ ≥ --> RING_MARK_R \** Ring inscription right beautiful stuff **\
794
+ \end
795
+
796
+ \beg rules numbers
797
+ 0 --> NUM_0
798
+ 1 --> NUM_1
799
+ 2 --> NUM_2
800
+ 3 --> NUM_3
801
+ 4 --> NUM_4
802
+ 5 --> NUM_5
803
+ 6 --> NUM_6
804
+ 7 --> NUM_7
805
+ 8 --> NUM_8
806
+ 9 --> NUM_9
807
+ A --> NUM_10
808
+ B --> NUM_11
809
+ \end
810
+ \end
811
+
812
+ \beg postprocessor
813
+ \resolve_virtuals
814
+ \end