glaemscribe 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. checksums.yaml +4 -4
  2. data/bin/glaemscribe +2 -2
  3. data/glaemresources/charsets/cirth_ds.cst +514 -179
  4. data/glaemresources/charsets/eldamar.cst +210 -0
  5. data/glaemresources/charsets/tengwar_ds_annatar.cst +2452 -130
  6. data/glaemresources/charsets/tengwar_ds_eldamar.cst +2319 -125
  7. data/glaemresources/charsets/tengwar_ds_elfica.cst +2317 -126
  8. data/glaemresources/charsets/tengwar_ds_parmaite.cst +2319 -127
  9. data/glaemresources/charsets/tengwar_ds_sindarin.cst +2318 -127
  10. data/glaemresources/charsets/tengwar_freemono.cst +1 -1
  11. data/glaemresources/charsets/tengwar_guni_annatar.cst +2451 -131
  12. data/glaemresources/charsets/tengwar_guni_eldamar.cst +2317 -126
  13. data/glaemresources/charsets/tengwar_guni_elfica.cst +2316 -127
  14. data/glaemresources/charsets/tengwar_guni_parmaite.cst +2319 -127
  15. data/glaemresources/charsets/tengwar_guni_sindarin.cst +2317 -126
  16. data/glaemresources/charsets/tengwar_telcontar.cst +7 -0
  17. data/glaemresources/modes/blackspeech-tengwar-general_use.glaem +1 -1
  18. data/glaemresources/modes/english-cirth-espeak.glaem +687 -0
  19. data/glaemresources/modes/english-tengwar-espeak.glaem +814 -0
  20. data/glaemresources/modes/japanese-tengwar.glaem +9 -4
  21. data/glaemresources/modes/lang_belta-tengwar-dadef.glaem +248 -0
  22. data/glaemresources/modes/raw-cirth.glaem +154 -0
  23. data/lib/api/charset_parser.rb +7 -1
  24. data/lib/api/mode.rb +35 -10
  25. data/lib/api/mode_parser.rb +21 -12
  26. data/lib/api/post_processor/outspace.rb +44 -0
  27. data/lib/api/rule_group.rb +1 -1
  28. data/lib/api/transcription_pre_post_processor.rb +8 -5
  29. data/lib/api/transcription_processor.rb +12 -9
  30. data/lib/glaemscribe.rb +2 -0
  31. data/lib_espeak/espeakng.for.glaemscribe.nowasm.sync.js +25 -11
  32. data/lib_espeak/glaemscribe_tts.js +363 -223
  33. metadata +12 -6
@@ -0,0 +1,814 @@
1
+ \**
2
+
3
+ Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ the transcription of texts between writing systems, and more
5
+ specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ invented languages to some of his devised writing systems.
7
+
8
+ Copyright (C) 2015 Benjamin Babut (Talagan).
9
+
10
+ This program is free software: you can redistribute it and/or modify
11
+ it under the terms of the GNU Affero General Public License as published by
12
+ the Free Software Foundation, either version 3 of the License, or
13
+ any later version.
14
+
15
+ This program is distributed in the hope that it will be useful,
16
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ GNU Affero General Public License for more details.
19
+
20
+ You should have received a copy of the GNU Affero General Public License
21
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ **\
24
+
25
+ \beg changelog
26
+ \entry "0.0.1" "First version."
27
+ \end
28
+
29
+ \language "English"
30
+ \writing "Tengwar"
31
+ \mode "English Tengwar - General Use"
32
+ \version "0.0.1"
33
+ \authors "J.R.R. Tolkien, impl. Talagan (Benjamin Babut), advis. Corchalad (Bertrand Bellet)"
34
+
35
+ \world primary_related_to_arda
36
+ \invention jrrt
37
+
38
+ \raw_mode "raw-tengwar"
39
+
40
+ \charset tengwar_ds_sindarin false
41
+ \charset tengwar_ds_parmaite false
42
+ \charset tengwar_ds_eldamar false
43
+ \charset tengwar_ds_annatar true
44
+ \charset tengwar_ds_elfica false
45
+
46
+ \charset tengwar_guni_sindarin false
47
+ \charset tengwar_guni_parmaite false
48
+ \charset tengwar_guni_eldamar false
49
+ \charset tengwar_guni_annatar false
50
+ \charset tengwar_guni_elfica false
51
+
52
+ \charset tengwar_freemono false
53
+ \charset tengwar_telcontar false
54
+
55
+ \beg options
56
+
57
+ \** ENGLISH accent/dialect/variant. It also controls espeak behaviour. **\
58
+ \beg option espeak_voice ESPEAK_VOICE_EN_TENGWAR
59
+ \value ESPEAK_VOICE_EN_TENGWAR 0
60
+ \value ESPEAK_VOICE_EN_TENGWAR_GB 1
61
+ \value ESPEAK_VOICE_EN_TENGWAR_RP 2
62
+ \value ESPEAK_VOICE_EN_TENGWAR_US 3
63
+ \end
64
+
65
+ \** 'the' word **\
66
+ \beg option english_the ENGLISH_THE_EXTENDED_TENGWAR
67
+ \value ENGLISH_THE_EXTENDED_TENGWAR 0
68
+ \value ENGLISH_THE_SEPARATE 1
69
+ \end
70
+
71
+ \** 'of' word **\
72
+ \beg option english_of ENGLISH_OF_EXTENDED_TENGWAR
73
+ \value ENGLISH_OF_EXTENDED_TENGWAR 0
74
+ \value ENGLISH_OF_SEPARATE 1
75
+ \end
76
+
77
+ \** 'to' word (the word 'to' may have its vowel reduced to a schwa) **\
78
+ \beg option schwa_of_to SCHWA_OF_TO_U
79
+ \value SCHWA_OF_TO_U 0
80
+ \value SCHWA_OF_TO_SCHWA 1
81
+ \end
82
+
83
+ \** 'wh' in old accents/US. Sometimes called 'wine/whine' merger. **\
84
+ \beg option ancient_voiceless_labiovelar_fricative_wh WH_VLVF_HWESTA_SINDARINWA
85
+ \value WH_VLVF_HWESTA_SINDARINWA 0
86
+ \value WH_VLVF_WHINE_MERGER 1
87
+ \end
88
+
89
+ \** SARINCE option when consonants are oriented left **\
90
+ \beg option s_consonants_l SCONSL_SARINCE_ALWAYS
91
+ \value SCONSL_SARINCE_NEVER 0
92
+ \value SCONSL_SARINCE_ALWAYS 1
93
+ \end
94
+
95
+ \** SARINCE option when consonants are oriented right **\
96
+ \beg option s_consonants_r SCONSR_SARINCE_END_OF_WORD
97
+ \value SCONSR_SARINCE_NEVER 0
98
+ \value SCONSR_SARINCE_ALWAYS 1
99
+ \value SCONSR_SARINCE_END_OF_WORD 2
100
+ \end
101
+
102
+ \** Re-establishment of linking r in non-rhotic accent, ex : 'better life' vs 'betteR answer' **\
103
+ \beg option linking_r true
104
+ \visible_when "espeak_voice == ESPEAK_VOICE_EN_TENGWAR_RP || espeak_voice == ESPEAK_VOICE_EN_TENGWAR_GB"
105
+ \end
106
+
107
+ \** Intrusive r, like in vanillaR ice **\
108
+ \beg option intrusive_r true
109
+ \visible_when "espeak_voice != ESPEAK_VOICE_EN_TENGWAR_US"
110
+ \end
111
+
112
+ \beg option pre_consonant_n_with_same_articulation_point PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK
113
+ \value PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_SEPARATE 0
114
+ \value PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK 1
115
+ \end
116
+
117
+ \** **\
118
+ \beg option pre_velar_n PRE_VELAR_N_ASSIMILABLE
119
+ \value PRE_VELAR_N_NON_ASSIMILABLE 0
120
+ \value PRE_VELAR_N_ASSIMILABLE 1
121
+ \end
122
+
123
+ \** Common elvish / tengwar option **\
124
+ \beg option consonant_modification_style CONSONANT_MODIFICATION_STYLE_WAVE
125
+ \value CONSONANT_MODIFICATION_STYLE_WAVE 0
126
+ \value CONSONANT_MODIFICATION_STYLE_BAR 1
127
+ \end
128
+
129
+ \** Long a like in 'palm' **\
130
+ \beg option long_back_a LONG_BACK_A_IMPLICIT_CARRIER
131
+ \radio
132
+ \value LONG_BACK_A_IMPLICIT_CARRIER 0
133
+ \value LONG_BACK_A_WITH_CARRIER 1
134
+ \end
135
+
136
+ \** DISABLED : it's always long **\
137
+ \beg option long_front_e LONG_FRONT_E_DOUBLE_TEHTA
138
+ \visible_when false
139
+ \radio
140
+ \value LONG_FRONT_E_DOUBLE_TEHTA 0
141
+ \value LONG_FRONT_E_WITH_CARRIER 1
142
+ \end
143
+
144
+ \** DISABLED : it's always long **\
145
+ \beg option long_back_e LONG_BACK_E_DOUBLE_TEHTA
146
+ \visible_when false
147
+ \radio
148
+ \value LONG_BACK_E_DOUBLE_TEHTA 0
149
+ \value LONG_BACK_E_WITH_CARRIER 1
150
+ \end
151
+
152
+ \** Long i like in 'fleece' **\
153
+ \beg option long_i LONG_I_DOUBLE_TEHTA
154
+ \radio
155
+ \value LONG_I_DOUBLE_TEHTA 0
156
+ \value LONG_I_WITH_CARRIER 1
157
+ \value LONG_I_AS_DIPHTONG 2
158
+ \end
159
+
160
+ \** long o like in 'thought' **\
161
+ \beg option long_o LONG_O_DOUBLE_TEHTA
162
+ \radio
163
+ \value LONG_O_DOUBLE_TEHTA 0
164
+ \value LONG_O_WITH_CARRIER 1
165
+ \end
166
+
167
+ \** long u like in 'goose' **\
168
+ \beg option long_u LONG_U_DOUBLE_TEHTA
169
+ \radio
170
+ \value LONG_U_DOUBLE_TEHTA 0
171
+ \value LONG_U_WITH_CARRIER 1
172
+ \value LONG_U_AS_DIPHTONG 2
173
+ \end
174
+
175
+ \** 'cure', 'cute' diphthong **\
176
+ \beg option ju_diphthong JU_DIPHTHONG_SEPARATE
177
+ \radio
178
+ \value JU_DIPHTHONG_SEPARATE 0
179
+ \value JU_DIPHTHONG_LIKE_IW 1
180
+ \end
181
+
182
+ \** Horse / Hoarse vowel distinction (only JRRT/US accents) **\
183
+ \beg option horse_hoarse_merger HORSE_HOARSE_SEPARATE
184
+ \visible_when "espeak_voice == ESPEAK_VOICE_EN_TENGWAR || espeak_voice == ESPEAK_VOICE_EN_TENGWAR_US"
185
+ \value HORSE_HOARSE_MERGE 0
186
+ \value HORSE_HOARSE_SEPARATE 1
187
+ \end
188
+
189
+ \** Cot / Coat vowel distinction, all accents **\
190
+ \beg option cot_coat_merger COT_COAT_SEPARATE
191
+ \value COT_COAT_MERGE 0
192
+ \value COT_COAT_SEPARATE 1
193
+ \visible_when "espeak_voice == ESPEAK_VOICE_EN_TENGWAR_US"
194
+ \end
195
+
196
+ \** Remove unuseful, natural schwa marks **\
197
+ \beg option implicit_schwa IMPLICIT_SCHWA_NO
198
+ \value IMPLICIT_SCHWA_NO 0
199
+ \value IMPLICIT_SCHWA_YES 1
200
+ \end
201
+
202
+ \** when implicit schwa is on, how to mark non-reducible schwas **\
203
+ \beg option implicit_schwa_non_reducible IMPLICIT_SCHWA_NON_REDUCIBLE_UNUTIXE_IF_POSSIBLE
204
+ \value IMPLICIT_SCHWA_NON_REDUCIBLE_UNUTIXE_IF_POSSIBLE 0
205
+ \value IMPLICIT_SCHWA_NON_REDUCIBLE_ALWAYS_TELCO 1
206
+ \visible_when "implicit_schwa == IMPLICIT_SCHWA_YES"
207
+ \end
208
+
209
+ \** Schwi, in US/JRRT **\
210
+ \beg option schwi SCHWI_LIKE_I
211
+ \radio
212
+ \value SCHWI_LIKE_I 0
213
+ \value SCHWI_LIKE_SCHWA 1
214
+ \visible_when "espeak_voice == ESPEAK_VOICE_EN_TENGWAR || espeak_voice == ESPEAK_VOICE_EN_TENGWAR_US"
215
+ \end
216
+
217
+ \** 'strut' vowel special case **\
218
+ \beg option open_mid_back_unrounded OMBU_THINNAS
219
+ \radio
220
+ \value OMBU_THINNAS 0
221
+ \value OMBU_GRAVE 1
222
+ \value OMBU_LIKE_SCHWA 2
223
+ \end
224
+
225
+ \** Common elvish / tengwar option **\
226
+ \beg option reverse_o_u_tehtar U_UP_O_DOWN
227
+ \value O_UP_U_DOWN 1
228
+ \value U_UP_O_DOWN 2
229
+ \end
230
+
231
+ \** Use english standard by default **\
232
+ \option reverse_numbers false
233
+ \beg option numbers_base BASE_10
234
+ \value BASE_10 10
235
+ \value BASE_12 12
236
+ \end
237
+
238
+ \option auto_spacing true
239
+
240
+ \end
241
+
242
+ \beg preprocessor
243
+ \downcase
244
+
245
+ \** Remove phonetics accentuation marks **\
246
+ \rxsubstitute "[ˈˌ]" ""
247
+
248
+ \** foreign words nasal a, split to "an" (ex: croissant) **\
249
+ \rxsubstitute "ɑ̃" "ɑn"
250
+
251
+ \** Non rhotic schwa simplification **\
252
+ \rxsubstitute "ɐ" "ə"
253
+
254
+ \if linking_r
255
+ \rxsubstitute "ɹ‿" "ɹ"
256
+ \else
257
+ \rxsubstitute "ɹ‿" ""
258
+ \endif
259
+
260
+ \if intrusive_r
261
+ \rxsubstitute "ɹ̩‿" "ɹ"
262
+ \else
263
+ \rxsubstitute "ɹ̩‿" ""
264
+ \endif
265
+
266
+ \if "schwa_of_to == SCHWA_OF_TO_U"
267
+ \substitute "ʊ̟" "ʊ"
268
+ \else
269
+ \substitute "ʊ̟" "ə"
270
+ \endif
271
+
272
+ \if "pre_velar_n == PRE_VELAR_N_ASSIMILABLE"
273
+ \rxsubstitute "n‿" "ŋ"
274
+ \else
275
+ \rxsubstitute "n‿" "n"
276
+ \endif
277
+
278
+ \** IMPORTANT NOTE : in all following regexps **\
279
+ \** (^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃]) stands for 'word boundary' **\
280
+
281
+ \** 'the' variations **\
282
+ \** that the **\
283
+ \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])(ð[aæ]t)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
284
+
285
+ \** of the **\
286
+ \if "english_the == ENGLISH_THE_EXTENDED_TENGWAR && english_of == ENGLISH_OF_EXTENDED_TENGWAR"
287
+ \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])([ɒʌ]v)ð([əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1OFTH\\3\\4"
288
+ \else
289
+ \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])([ɒʌ]v)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
290
+ \endif
291
+
292
+ \** for the **\
293
+ \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])(f[ɚə])(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
294
+ \** with the **\
295
+ \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])(wɪð)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
296
+ \** in the **\
297
+ \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])(ɪn)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
298
+ \** on the **\
299
+ \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])([ɒɔ]n)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
300
+ \** from the **\
301
+ \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])(fɹʌm)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
302
+ \** was the **\
303
+ \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])(wʌz)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
304
+
305
+ \** Beware of the order of COT/COAT merger and horse/hoarse merger **\
306
+ \if "cot_coat_merger == COT_COAT_MERGE && espeak_voice == ESPEAK_VOICE_EN_TENGWAR_US"
307
+ \substitute "oʊ" "ɑː"
308
+ \endif
309
+
310
+ \if "horse_hoarse_merger == HORSE_HOARSE_SEPARATE"
311
+ \** Re-establish former diphtong **\
312
+ \substitute "oːɹ" "oʊɹ"
313
+ \endif
314
+
315
+
316
+ \** If treated as diphthong, change long i to i + schwi **\
317
+ \if "long_i == LONG_I_AS_DIPHTONG"
318
+ \substitute "iː" "iɪ"
319
+ \endif
320
+
321
+ \** Experimental, don't affect ju: at beginning of words/after consonnant **\
322
+ \if "ju_diphthong == JU_DIPHTHONG_LIKE_IW"
323
+ \rxsubstitute "(juː|jʊ)" "iw"
324
+ \endif
325
+
326
+ \if "long_u == LONG_U_AS_DIPHTONG"
327
+ \substitute "uː" "uʊ"
328
+ \endif
329
+
330
+ \** ! Beware of the order of the following rules **\
331
+ \** ! Rhotic schwa : remove 1 level of length when superfluous and always add explicit mark **\
332
+ \rxsubstitute "[ɜɚ]ː?" "ɜɹ"
333
+
334
+ \** ! Potentially remove superfluous added rhotic marks **\
335
+ \rxsubstitute "ɹ+" "ɹ"
336
+
337
+ \** ! Disambiguate ɹ + vowel : ORE/ROMEN **\
338
+ \rxsubstitute "ɹ([ɑæaeɛʌɐəɜɚiɪᵻoɒɔuʊʘ])" "r\\1"
339
+
340
+ \if "implicit_schwa == IMPLICIT_SCHWA_YES"
341
+ \** All schwas at beginning of words cannot reduce **\
342
+ \** or after vowels (== not consonant) **\
343
+ \** beware of ɪ as it can appear as consonant (lawyer) **\
344
+ \** same for ʊ for sour **\
345
+ \** Mark non reducing schwa as ʘ **\
346
+ \rxsubstitute "(^|[^bcdfghjklmnpqrstvwxyzðŋɡɣɹɾʃʍʒʔθɪʊ])([əɐɜɚ])" "\\1ʘ"
347
+ \rxsubstitute "([əɐɜɚ])r" "ʘr"
348
+
349
+ \if "schwi == SCHWI_LIKE_SCHWA"
350
+ \** Don't forget to mark schwis too **\
351
+ \rxsubstitute "(^|[^bcdfghjklmnpqrstvwxyzðŋɡɣɹɾʃʍʒʔθɪʊ])([ᵻ])" "\\1ʘ"
352
+ \rxsubstitute "ᵻr" "ʘr"
353
+ \endif
354
+
355
+ \if "open_mid_back_unrounded == OMBU_LIKE_SCHWA"
356
+ \** Don't forget to mark the ombus too **\
357
+ \rxsubstitute "(^|[^bcdfghjklmnpqrstvwxyzðŋɡɣɹɾʃʍʒʔθɪʊ])([ʌ])" "\\1ʘ"
358
+ \rxsubstitute "ʌr" "ʘr"
359
+ \endif
360
+ \endif
361
+
362
+ \elvish_numbers "\\eval numbers_base" "\\eval reverse_numbers"
363
+
364
+ \if "auto_spacing == true"
365
+ \rxsubstitute "([^\\s])([.,;:!?])" "\\1 \\2"
366
+ \rxsubstitute "([.,;:!?])([^\\s])" "\\1 \\2"
367
+ \endif
368
+ \end
369
+
370
+
371
+ \beg processor
372
+
373
+ \beg rules litteral
374
+
375
+ \if "consonant_modification_style == CONSONANT_MODIFICATION_STYLE_WAVE"
376
+ {GEMINATE} === GEMINATE_SIGN_TILD
377
+ {NASAL} === NASALIZE_SIGN_TILD
378
+ \else
379
+ {GEMINATE} === GEMINATE_SIGN
380
+ {NASAL} === NASALIZE_SIGN
381
+ \endif
382
+
383
+ \** sa-rinci for left-oriented tengwar **\
384
+ \if "s_consonants_l == SCONSL_SARINCE_ALWAYS"
385
+ {LWS} === [{NULL} * (s,z)]
386
+ {_LWS_} === [{NULL} * SARINCE]
387
+ {__LWSX__} === 2,1,3
388
+ \else
389
+ {LWS} === {NULL}
390
+ {_LWS_} === {NULL}
391
+ {__LWSX__} === 2,1
392
+ \endif
393
+
394
+ \** sa-rinci for right-oriented tengwar **\
395
+ \if "s_consonants_r == SCONSR_SARINCE_ALWAYS"
396
+ {RWS} === [{NULL} * (s,z)]
397
+ {_RWS_} === [{NULL} * SARINCE]
398
+ {__RWSX__} === 2,1,3
399
+ \elsif "s_consonants_r == SCONSR_SARINCE_END_OF_WORD"
400
+ {RWS} === [{NULL} * (s_,z_)]
401
+ {_RWS_} === [{NULL} * SARINCE]
402
+ {__RWSX__} === 2,1,3
403
+ \else
404
+ {RWS} === {NULL}
405
+ {_RWS_} === {NULL}
406
+ {__RWSX__} === 2,1
407
+ \endif
408
+
409
+ \if "reverse_o_u_tehtar == U_UP_O_DOWN"
410
+ {O_LOOP} === O_TEHTA
411
+ {O_LOOP_DOUBLE} === O_TEHTA_DOUBLE
412
+ {U_LOOP} === U_TEHTA
413
+ {U_LOOP_DOUBLE} === U_TEHTA_DOUBLE
414
+ \else
415
+ {O_LOOP} === U_TEHTA
416
+ {O_LOOP_DOUBLE} === U_TEHTA_DOUBLE
417
+ {U_LOOP} === O_TEHTA
418
+ {U_LOOP_DOUBLE} === O_TEHTA_DOUBLE
419
+ \endif
420
+
421
+ \** schwas : ɐ,ə **\
422
+ \** rothic shwa : ɚ **\
423
+ \** schwi : ᵻ **\
424
+ \** schwu : ʌ **\
425
+
426
+ {IGROUP} === i,ɪ
427
+ {UGROUP} === u,ʊ
428
+ {EBGROUP} === ə,ɐ \** REDUCIBLE **\
429
+
430
+ {SCHWA_NON_REDUCIBLE} === ʘ \** NON REDUCIBLE **\
431
+ {ESCHWA} === (ə,ʘ) \** REDUCIBLE & NON REDUCIBLE E SCHWA **\
432
+
433
+ \if "schwi == SCHWI_LIKE_I"
434
+ {IGROUP} === {IGROUP},ᵻ
435
+ \else
436
+ {EBGROUP} === {EBGROUP},ᵻ
437
+ \endif
438
+
439
+ {W_OMBU_GROUP} === {NULL}
440
+ {_W_OMBU_GROUP_} === {NULL}
441
+ \if "open_mid_back_unrounded == OMBU_GRAVE"
442
+ {W_OMBU_GROUP} === * (ʌ)
443
+ {_W_OMBU_GROUP_} === * E_TEHTA_GRAVE
444
+ \elsif "open_mid_back_unrounded == OMBU_THINNAS"
445
+ {W_OMBU_GROUP} === * (ʌ)
446
+ {_W_OMBU_GROUP_} === * THINNAS
447
+ \else
448
+ {EBGROUP} === {EBGROUP},ʌ
449
+ \endif
450
+
451
+ {A_FRONT} === (æ,a) \** Always short **\
452
+ {A_BACK} === (ɑ) \** Always long **\
453
+ {E_FRONT} === (e,ɛ)
454
+ {E_BACK} === ({EBGROUP})
455
+ {E_BACK_RHOTIC} === (ɚ,ɜ) \** Rhotic schwas are treated independently **\
456
+ {I} === ({IGROUP})
457
+ {O} === (o,ɒ,ɔ) \** force, mock, lord **\
458
+ {U} === ({UGROUP})
459
+
460
+
461
+ {AA_FRONT} === {A_FRONT}ː \** long front a probably does not exist **\
462
+ {AA_BACK} === {A_BACK}ː
463
+ {EE_FRONT} === {E_FRONT}ː
464
+ {EE_BACK} === {E_BACK}ː \** long back e probably does not exist when not rhotic **\
465
+ {EE_BACK_RHOTIC} === {E_BACK_RHOTIC}ː
466
+ {II} === {I}ː
467
+ {OO} === {O}ː
468
+ {UU} === {U}ː
469
+
470
+ {W_SCHWA_NON_REDUCIBLE} === {NULL}
471
+ {_W_SCHWA_NON_REDUCIBLE_} === {NULL}
472
+
473
+ \if "implicit_schwa == IMPLICIT_SCHWA_YES"
474
+ {_IMPLICIT_SCHWA_} === {NULL}
475
+ \if "implicit_schwa_non_reducible == IMPLICIT_SCHWA_NON_REDUCIBLE_UNUTIXE_IF_POSSIBLE"
476
+ {W_SCHWA_NON_REDUCIBLE} === * {SCHWA_NON_REDUCIBLE}
477
+ {_W_SCHWA_NON_REDUCIBLE_} === * UNUTIXE
478
+ \endif
479
+ \else
480
+ {_IMPLICIT_SCHWA_} === UNUTIXE
481
+ \endif
482
+
483
+ \** GB DIPHTONGS **\
484
+ \** +dˈeɪ +skˈaɪ +bˈɔɪ +bˈiə +bˈeə +tˈʊə +ɡˌəʊ +kˈaʊ **\
485
+ \** US DIPHTONGS **\
486
+ \** =dˈeɪ =skˈaɪ =bˈɔɪ -bˈɪɹ -bˈɛɹ -tˈʊɹ +ɡˌoʊ =kˈaʊ **\
487
+
488
+ \** U Diphthongs **\
489
+ {AW} === aʊ \** cow **\
490
+ {OW} === oʊ \** US most / mˈoʊst **\
491
+ {EW} === {ESCHWA}ʊ \** GB go **\
492
+ {UW} === uʊ \** goose if pronconced with labializing accent ... we don't have this in our pronunciations **\
493
+
494
+ \** I Diphtongues : eɪ (day) / aɪ (sky) / ɔɪ (boy) **\
495
+ {AJ} === aɪ \** nine / nˈaɪn **\
496
+ {EJ} === eɪ \** game / ɡˈeɪm **\
497
+ {OJ} === ɔɪ \** boy **\
498
+ {IJ} === iɪ \** fleece if prononced with palatalising accent **\
499
+
500
+ \** ə diphthongs : iə (GB : beer) / eə (GB: bear) / ʊə (US: tour) **\
501
+ {IER} === i{ESCHWA} \** GB Beer **\
502
+ {EAR} === e{ESCHWA} \** GB Bear **\
503
+ {UER} === ʊ{ESCHWA} \** GB Tour **\
504
+
505
+ {VOWELS} === {A_BACK} * {A_FRONT} * {E_FRONT} * {E_BACK} * {E_BACK_RHOTIC} * {IER} * {EAR} * {UER} * {I} * {O} * {U} {W_SCHWA_NON_REDUCIBLE} {W_OMBU_GROUP}
506
+ {TEHTAR} === A_TEHTA * A_TEHTA_REVERSED * E_TEHTA * {_IMPLICIT_SCHWA_} * {_IMPLICIT_SCHWA_} * UNUTIXE I_TEHTA * UNUTIXE E_TEHTA * UNUTIXE {U_LOOP} * I_TEHTA * {O_LOOP} * {U_LOOP} {_W_SCHWA_NON_REDUCIBLE_} {_W_OMBU_GROUP_}
507
+
508
+ {LVOWELS} === {AA_BACK} * {AA_FRONT} * {EE_FRONT} * {EE_BACK} * {EE_BACK_RHOTIC} * {II} * {OO} * {UU}
509
+
510
+ {DIPHTHONGS_R} === {AW} * {OW} * {EW} * {UW} * {AJ} * {EJ} * {OJ} * {IJ}
511
+ {_DIPHTHONGS_R_} === VALA A_TEHTA * VALA {O_LOOP} * VALA UNUTIXE * VALA {U_LOOP} * ANNA A_TEHTA * ANNA E_TEHTA * ANNA {O_LOOP} * ANNA I_TEHTA
512
+
513
+ {DIPHTHONGS} === {DIPHTHONGS_R}
514
+ {_DIPHTHONGS_} === {_DIPHTHONGS_R_}
515
+
516
+ {WLONG} === {NULL} \** long vowels that can be used as tehtar **\
517
+ {_WLONG_} === {NULL} \** tehtar of long vowels that can be used as tehtar **\
518
+
519
+ \** LV : Initialization step 1 **\
520
+ {_LONG_A_BACK_} === ARA A_TEHTA
521
+ {_LONG_A_FRONT_} === ARA A_TEHTA_REVERSED \** Should not be possible in English **\
522
+ {_LONG_E_FRONT_} === ARA E_TEHTA
523
+ {_LONG_E_BACK_} === ARA UNUTIXE \** PROBLEM (solved) : ara and unutixe don't work together. But this case will not appear : long back e is not possible when not rhotic. **\
524
+ {_LONG_E_BACK_RHOTIC_} === ARA UNUTIXE \** PROBLEM (solved) : ara and unutixe don't work together. But this case will not appear : simplified by prepro **\
525
+ {_LONG_I_} === ARA I_TEHTA
526
+ {_LONG_O_} === ARA {O_LOOP}
527
+ {_LONG_U_} === ARA {U_LOOP}
528
+
529
+ \** LV : Initialization step 2 **\
530
+ {_LONE_LONG_A_BACK_} === {_LONG_A_BACK_}
531
+ {_LONE_LONG_A_FRONT_} === {_LONG_A_FRONT_}
532
+ {_LONE_LONG_E_FRONT_} === {_LONG_E_FRONT_}
533
+ {_LONE_LONG_E_BACK_} === {_LONG_E_BACK_}
534
+ {_LONE_LONG_E_BACK_RHOTIC_} === {_LONG_E_BACK_RHOTIC_}
535
+ {_LONE_LONG_I_} === {_LONG_I_}
536
+ {_LONE_LONG_O_} === {_LONG_O_}
537
+ {_LONE_LONG_U_} === {_LONG_U_}
538
+
539
+ \if "long_back_a == LONG_BACK_A_IMPLICIT_CARRIER"
540
+ \** Remove carrier and use A_TEHTA as if it was a double tehta **\
541
+ {_LONG_A_BACK_} === A_TEHTA
542
+ {_LONE_LONG_E_FRONT_} === TELCO {_LONG_A_BACK_}
543
+ {WLONG} === {WLONG} * {AA_BACK}
544
+ {_WLONG_} === {_WLONG_} * {_LONG_A_BACK_}
545
+ \endif
546
+
547
+ \if "long_front_e == LONG_FRONT_E_DOUBLE_TEHTA"
548
+ \** Does not exist in standard accents **\
549
+ {_LONG_E_FRONT_} === E_TEHTA_DOUBLE
550
+ {_LONE_LONG_E_FRONT_} === TELCO {_LONG_E_FRONT_}
551
+ {WLONG} === {WLONG} * {EE_FRONT}
552
+ {_WLONG_} === {_WLONG_} * {_LONG_E_FRONT_}
553
+ \endif
554
+
555
+ \if "long_back_e == LONG_BACK_E_DOUBLE_TEHTA"
556
+ \** This case should not be possible when not rhotic. **\
557
+ {_LONG_E_BACK_} === I_TEHTA_DOUBLE_INF
558
+ {_LONE_LONG_E_BACK_} === TELCO {_LONG_E_BACK_}
559
+ {WLONG} === {WLONG} * {EE_BACK}
560
+ {_WLONG_} === {_WLONG_} * {_LONG_E_BACK_}
561
+ \endif
562
+
563
+ \if "long_i == LONG_I_DOUBLE_TEHTA"
564
+ {_LONG_I_} === I_TEHTA_DOUBLE
565
+ {_LONE_LONG_I_} === TELCO {_LONG_I_}
566
+ {WLONG} === {WLONG} * {II}
567
+ {_WLONG_} === {_WLONG_} * {_LONG_I_}
568
+ \endif
569
+
570
+ \if "long_o == LONG_O_DOUBLE_TEHTA"
571
+ {_LONG_O_} === {O_LOOP_DOUBLE}
572
+ {_LONE_LONG_O_} === TELCO {_LONG_O_}
573
+ {WLONG} === {WLONG} * {OO}
574
+ {_WLONG_} === {_WLONG_} * {_LONG_O_}
575
+ \endif
576
+
577
+ \if "long_u == LONG_U_DOUBLE_TEHTA"
578
+ {_LONG_U_} === {U_LOOP_DOUBLE}
579
+ {_LONE_LONG_U_} === TELCO {_LONG_U_}
580
+ {WLONG} === {WLONG} * {UU}
581
+ {_WLONG_} === {_WLONG_} * {_LONG_U_}
582
+ \endif
583
+
584
+ \** Define a variable for the images of all long vowels **\
585
+ {_LONE_LONG_VOWELS_} === {_LONE_LONG_A_BACK_} * {_LONE_LONG_A_FRONT_} * {_LONE_LONG_E_FRONT_} * {_LONE_LONG_E_BACK_} * {_LONE_LONG_E_BACK_RHOTIC_} * {_LONE_LONG_I_} * {_LONE_LONG_O_} * {_LONE_LONG_U_}
586
+
587
+ {V_D} === [ {VOWELS} {WLONG} ]
588
+ {V_D_WN} === [ {VOWELS} {WLONG} * {NULL} ]
589
+
590
+ {_V_D_} === [ {TEHTAR} {_WLONG_} ]
591
+ {_V_D_WN_} === [ {TEHTAR} {_WLONG_} * {NULL} ]
592
+
593
+ \** Vowel rules **\
594
+ [{VOWELS}] --> TELCO [{TEHTAR}] \** Replace isolated short vowels **\
595
+ [{DIPHTHONGS_R}]{RWS} --> [{_DIPHTHONGS_R_}]{_RWS_} \** Replace diphthongs **\
596
+
597
+ \if "implicit_schwa_non_reducible == IMPLICIT_SCHWA_NON_REDUCIBLE_ALWAYS_TELCO"
598
+ ʘ --> TELCO
599
+ \endif
600
+
601
+ \** LONE LONG VOWELS **\
602
+ [{LVOWELS}] --> [{_LONE_LONG_VOWELS_}]
603
+
604
+ {_WH_} === HWESTA_SINDARINWA
605
+ \if "ancient_voiceless_labiovelar_fricative_wh == WH_VLVF_WHINE_MERGER"
606
+ {_WH_} === VALA
607
+ \endif
608
+
609
+ \beg macro serie_l ARG_SL _ARG_SL_
610
+ {V_D_WN}[{ARG_SL}]{LWS} --> {__LWSX__} --> [{_ARG_SL_}]{_V_D_WN_}{_LWS_}
611
+ \end
612
+ \beg macro serie_ln ARG_SLN _ARG_SLN_
613
+ {V_D_WN}[{ARG_SLN}]{LWS} --> {__LWSX__} --> [{_ARG_SLN_}]{NASAL}{_V_D_WN_}{_LWS_}
614
+ \end
615
+ \beg macro serie_r ARG_SR _ARG_SR_
616
+ {V_D_WN}[{ARG_SR}]{RWS} --> {__RWSX__} --> [{_ARG_SR_}]{_V_D_WN_}{_RWS_}
617
+ \end
618
+ \beg macro serie_rn ARG_SRN _ARG_SRN_
619
+ {V_D_WN}[{ARG_SRN}]{RWS} --> {__RWSX__} --> [{_ARG_SRN_}]{NASAL}{_V_D_WN_}{_RWS_}
620
+ \end
621
+
622
+ \** Nasal + Conditional macro **\
623
+ \beg macro serie_lnc ARG_SLN_COND _ARG_SLN_COND_
624
+ \if "pre_consonant_n_with_same_articulation_point == PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK"
625
+ \deploy serie_ln {ARG_SLN_COND} {_ARG_SLN_COND_}
626
+ \endif
627
+ \end
628
+
629
+ \** Nasal + Conditional macro **\
630
+ \beg macro serie_rnc ARG_SRN_COND _ARG_SRN_COND_
631
+ \if "pre_consonant_n_with_same_articulation_point == PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK"
632
+ \deploy serie_rn {ARG_SRN_COND} {_ARG_SRN_COND_}
633
+ \endif
634
+ \end
635
+
636
+ \beg macro serie ARG_L ARG_R ARG_LN ARG_RN _ARG_L_ _ARG_R_
637
+ \deploy serie_l {ARG_L} {_ARG_L_}
638
+ \deploy serie_r {ARG_R} {_ARG_R_}
639
+ \deploy serie_lnc {ARG_LN} {_ARG_L_}
640
+ \deploy serie_rnc {ARG_RN} {_ARG_R_}
641
+ \end
642
+
643
+ \** ----------------------------------------------------------- **\
644
+ {L1R} === (t,ɾ,ʔ) * p
645
+ {L1L} === tʃ * k
646
+ {L1R_NASAL} === n(t,ɾ,ʔ) * mp
647
+ {L1L_NASAL} === ntʃ * ŋk
648
+ {_L1R_} === TINCO * PARMA
649
+ {_L1L_} === CALMA * QUESSE
650
+
651
+ \deploy serie {L1L} {L1R} {L1L_NASAL} {L1R_NASAL} {_L1L_} {_L1R_}
652
+
653
+ \** ----------------------------------------------------------- **\
654
+ {L2R} === d * b
655
+ {L2L} === dʒ * (ɡ,g)
656
+ {L2R_NASAL} === nd * mb
657
+ {L2L_NASAL} === ndʒ * ŋ(ɡ,g)
658
+ {_L2R_} === ANDO * UMBAR
659
+ {_L2L_} === ANGA * UNGWE
660
+
661
+ \deploy serie {L2L} {L2R} {L2L_NASAL} {L2R_NASAL} {_L2L_} {_L2R_}
662
+
663
+ \** ----------------------------------------------------------- **\
664
+ {L3R} === θ * f * ʃ * x
665
+ {L3R_NASAL} === nθ * mf * nʃ * ŋx
666
+ {_L3R_} === SULE * FORMEN * AHA * HWESTA
667
+
668
+ \deploy serie_r {L3R} {_L3R_}
669
+ \deploy serie_rnc {L3R_NASAL} {_L3R_}
670
+
671
+ \** ----------------------------------------------------------- **\
672
+ {L4R} === ð * v * ʒ * ɣ
673
+ {L4R_NASAL} === nð * mv * nʒ * ŋɣ
674
+ {_L4R_} === ANTO * AMPA * ANCA * UNQUE
675
+
676
+ \deploy serie_r {L4R} {_L4R_}
677
+ \deploy serie_rnc {L4R_NASAL} {_L4R_}
678
+
679
+ \** ----------------------------------------------------------- **\
680
+ {L5R} === (n,n̩) * m * n(j,J) * ŋ
681
+ {_L5R_} === NUMEN * MALTA * NOLDO * NWALME
682
+
683
+ \** no nasals for this serie **\
684
+ \deploy serie_r {L5R} {_L5R_}
685
+
686
+ \** ----------------------------------------------------------- **\
687
+ {L6R} === w
688
+ {L6R_NASAL} === nw
689
+ {_L6R_} === VALA
690
+
691
+ {L6R_NN} === (j,J)
692
+ {_L6R_NN_} === ANNA \** ORE for rhoticized schwas **\
693
+
694
+ \deploy serie_r {L6R} {_L6R_}
695
+ \deploy serie_r {L6R_NN} {_L6R_NN_}
696
+ \deploy serie_rnc {L6R_NASAL} {_L6R_}
697
+
698
+ \** ----------------------------------------------------------- **\
699
+ {L7R} === r * ɹ * l
700
+ {_L7R_} === ROMEN * ORE * LAMBE \** ARDA / ALDA **\
701
+
702
+ \deploy serie_r {L7R} {_L7R_}
703
+
704
+ \** ----------------------------------------------------------- **\
705
+ {L8} === s * z
706
+ {L8_NASAL} === ns * nz
707
+ {_L8_} === SILME_NUQUERNA * ESSE_NUQUERNA
708
+
709
+ {V_D_WN}[{L8}] --> 2,1 --> [{_L8_}]{_V_D_WN_}
710
+ \if "s_consonants_r != SCONSR_SARINCE_ALWAYS && pre_consonant_n_with_same_articulation_point == PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK"
711
+ \** Avoid clash between nasal sign and sa rince **\
712
+ {V_D_WN}[{L8_NASAL}] --> 2,1 --> [{_L8_}]{NASAL}{_V_D_WN_}
713
+ \endif
714
+
715
+ \** Single s/z : overload **\
716
+ s --> SILME
717
+ z --> ESSE
718
+
719
+ ns --> SILME_NUQUERNA {NASAL} \** Explicitly redefined for clarity (already defined in the nasal rule above) **\
720
+ nz --> ESSE_NUQUERNA {NASAL} \** Explicitly redefined for clarity (already defined in the nasal rule above) **\
721
+
722
+ \** ----------------------------------------------------------- **\
723
+ {L9} === h * ʍ
724
+ {_L9_} === HYARMEN * {_WH_} \** YANTA / URE **\
725
+
726
+ {V_D_WN}[{L9}] --> 2,1 --> [{_L9_}]{_V_D_WN_}
727
+
728
+ \** -- SPECIAL TOKENS **\
729
+
730
+ \if "english_the == ENGLISH_THE_EXTENDED_TENGWAR"
731
+ _ð{ESCHWA}_ --> TW_EXT_21
732
+ _ðɪ_ --> TW_EXT_21 I_TEHTA
733
+ \endif
734
+
735
+ \if "english_of == ENGLISH_OF_EXTENDED_TENGWAR"
736
+ _(ɒ,ʌ)v_ --> TW_EXT_22
737
+ \endif
738
+
739
+ \if "english_the == ENGLISH_THE_EXTENDED_TENGWAR && english_of == ENGLISH_OF_EXTENDED_TENGWAR"
740
+ _OFTH{ESCHWA}_ --> TW_EXT_22 {GEMINATE}
741
+ _OFTHɪ_ --> TW_EXT_22 {GEMINATE} I_TEHTA
742
+ \endif
743
+ \end
744
+
745
+ \beg rules punctuation
746
+ . --> PUNCT_DDOT
747
+ .. --> PUNCT_DOT PUNCT_DDOT PUNCT_DOT
748
+ ... --> PUNCT_TILD
749
+ … --> PUNCT_TILD
750
+ .... --> PUNCT_TILD
751
+ ..... --> PUNCT_TILD
752
+ ...... --> PUNCT_TILD
753
+ ....... --> PUNCT_TILD
754
+
755
+ , --> PUNCT_DOT
756
+ : --> PUNCT_DOT
757
+ ; --> PUNCT_DOT
758
+ ! --> PUNCT_EXCLAM
759
+ ? --> PUNCT_INTERR
760
+ · --> {NULL}
761
+
762
+ - --> {NULL}
763
+ – --> PUNCT_TILD
764
+ — --> PUNCT_TILD
765
+
766
+ \** Apostrophe **\
767
+
768
+ ' --> {NULL}
769
+ ’ --> {NULL}
770
+
771
+ \** NBSP **\
772
+ {NBSP} --> NBSP
773
+
774
+ \** Quotes **\
775
+
776
+ “ --> DQUOT_OPEN
777
+ ” --> DQUOT_CLOSE
778
+ « --> DQUOT_OPEN
779
+ » --> DQUOT_CLOSE
780
+
781
+ [ --> PUNCT_PAREN_L
782
+ ] --> PUNCT_PAREN_R
783
+ ( --> PUNCT_PAREN_L
784
+ ) --> PUNCT_PAREN_R
785
+ { --> PUNCT_PAREN_L
786
+ } --> PUNCT_PAREN_R
787
+ < --> PUNCT_PAREN_L
788
+ > --> PUNCT_PAREN_R
789
+
790
+ \** Not universal between fonts ... **\
791
+ $ --> BOOKMARK_SIGN
792
+ ≤ --> RING_MARK_L \** Ring inscription left beautiful stuff **\
793
+ ≥ --> RING_MARK_R \** Ring inscription right beautiful stuff **\
794
+ \end
795
+
796
+ \beg rules numbers
797
+ 0 --> NUM_0
798
+ 1 --> NUM_1
799
+ 2 --> NUM_2
800
+ 3 --> NUM_3
801
+ 4 --> NUM_4
802
+ 5 --> NUM_5
803
+ 6 --> NUM_6
804
+ 7 --> NUM_7
805
+ 8 --> NUM_8
806
+ 9 --> NUM_9
807
+ A --> NUM_10
808
+ B --> NUM_11
809
+ \end
810
+ \end
811
+
812
+ \beg postprocessor
813
+ \resolve_virtuals
814
+ \end