glaemscribe 1.2.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/glaemscribe +2 -2
- data/glaemresources/charsets/cirth_ds.cst +514 -179
- data/glaemresources/charsets/eldamar.cst +210 -0
- data/glaemresources/charsets/tengwar_ds_annatar.cst +2776 -348
- data/glaemresources/charsets/tengwar_ds_eldamar.cst +2648 -351
- data/glaemresources/charsets/tengwar_ds_elfica.cst +2639 -346
- data/glaemresources/charsets/tengwar_ds_parmaite.cst +2648 -351
- data/glaemresources/charsets/tengwar_ds_sindarin.cst +2642 -348
- data/glaemresources/charsets/tengwar_freemono.cst +1 -1
- data/glaemresources/charsets/tengwar_guni_annatar.cst +2725 -300
- data/glaemresources/charsets/tengwar_guni_eldamar.cst +2589 -295
- data/glaemresources/charsets/tengwar_guni_elfica.cst +2592 -298
- data/glaemresources/charsets/tengwar_guni_parmaite.cst +2592 -297
- data/glaemresources/charsets/tengwar_guni_sindarin.cst +2591 -297
- data/glaemresources/charsets/tengwar_telcontar.cst +7 -0
- data/glaemresources/modes/blackspeech-tengwar-general_use.glaem +1 -1
- data/glaemresources/modes/english-cirth-espeak.glaem +687 -0
- data/glaemresources/modes/english-tengwar-espeak.glaem +814 -0
- data/glaemresources/modes/japanese-tengwar.glaem +9 -4
- data/glaemresources/modes/lang_belta-tengwar-dadef.glaem +248 -0
- data/glaemresources/modes/raw-cirth.glaem +154 -0
- data/lib/api/charset.rb +124 -57
- data/lib/api/charset_parser.rb +39 -26
- data/lib/api/mode.rb +35 -10
- data/lib/api/mode_parser.rb +21 -12
- data/lib/api/post_processor/outspace.rb +44 -0
- data/lib/api/post_processor/resolve_virtuals.rb +41 -19
- data/lib/api/rule_group.rb +1 -1
- data/lib/api/transcription_pre_post_processor.rb +51 -45
- data/lib/api/transcription_processor.rb +12 -9
- data/lib/glaemscribe.rb +2 -0
- data/lib_espeak/espeakng.for.glaemscribe.nowasm.sync.js +25 -11
- data/lib_espeak/glaemscribe_tts.js +363 -223
- metadata +12 -6
@@ -0,0 +1,814 @@
|
|
1
|
+
\**
|
2
|
+
|
3
|
+
Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
the transcription of texts between writing systems, and more
|
5
|
+
specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
invented languages to some of his devised writing systems.
|
7
|
+
|
8
|
+
Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
|
10
|
+
This program is free software: you can redistribute it and/or modify
|
11
|
+
it under the terms of the GNU Affero General Public License as published by
|
12
|
+
the Free Software Foundation, either version 3 of the License, or
|
13
|
+
any later version.
|
14
|
+
|
15
|
+
This program is distributed in the hope that it will be useful,
|
16
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
GNU Affero General Public License for more details.
|
19
|
+
|
20
|
+
You should have received a copy of the GNU Affero General Public License
|
21
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
|
23
|
+
**\
|
24
|
+
|
25
|
+
\beg changelog
|
26
|
+
\entry "0.0.1" "First version."
|
27
|
+
\end
|
28
|
+
|
29
|
+
\language "English"
|
30
|
+
\writing "Tengwar"
|
31
|
+
\mode "English Tengwar - General Use"
|
32
|
+
\version "0.0.1"
|
33
|
+
\authors "J.R.R. Tolkien, impl. Talagan (Benjamin Babut), advis. Corchalad (Bertrand Bellet)"
|
34
|
+
|
35
|
+
\world primary_related_to_arda
|
36
|
+
\invention jrrt
|
37
|
+
|
38
|
+
\raw_mode "raw-tengwar"
|
39
|
+
|
40
|
+
\charset tengwar_ds_sindarin false
|
41
|
+
\charset tengwar_ds_parmaite false
|
42
|
+
\charset tengwar_ds_eldamar false
|
43
|
+
\charset tengwar_ds_annatar true
|
44
|
+
\charset tengwar_ds_elfica false
|
45
|
+
|
46
|
+
\charset tengwar_guni_sindarin false
|
47
|
+
\charset tengwar_guni_parmaite false
|
48
|
+
\charset tengwar_guni_eldamar false
|
49
|
+
\charset tengwar_guni_annatar false
|
50
|
+
\charset tengwar_guni_elfica false
|
51
|
+
|
52
|
+
\charset tengwar_freemono false
|
53
|
+
\charset tengwar_telcontar false
|
54
|
+
|
55
|
+
\beg options
|
56
|
+
|
57
|
+
\** ENGLISH accent/dialect/variant. It also controls espeak behaviour. **\
|
58
|
+
\beg option espeak_voice ESPEAK_VOICE_EN_TENGWAR
|
59
|
+
\value ESPEAK_VOICE_EN_TENGWAR 0
|
60
|
+
\value ESPEAK_VOICE_EN_TENGWAR_GB 1
|
61
|
+
\value ESPEAK_VOICE_EN_TENGWAR_RP 2
|
62
|
+
\value ESPEAK_VOICE_EN_TENGWAR_US 3
|
63
|
+
\end
|
64
|
+
|
65
|
+
\** 'the' word **\
|
66
|
+
\beg option english_the ENGLISH_THE_EXTENDED_TENGWAR
|
67
|
+
\value ENGLISH_THE_EXTENDED_TENGWAR 0
|
68
|
+
\value ENGLISH_THE_SEPARATE 1
|
69
|
+
\end
|
70
|
+
|
71
|
+
\** 'of' word **\
|
72
|
+
\beg option english_of ENGLISH_OF_EXTENDED_TENGWAR
|
73
|
+
\value ENGLISH_OF_EXTENDED_TENGWAR 0
|
74
|
+
\value ENGLISH_OF_SEPARATE 1
|
75
|
+
\end
|
76
|
+
|
77
|
+
\** 'to' word (the word 'to' may have its vowel reduced to a schwa) **\
|
78
|
+
\beg option schwa_of_to SCHWA_OF_TO_U
|
79
|
+
\value SCHWA_OF_TO_U 0
|
80
|
+
\value SCHWA_OF_TO_SCHWA 1
|
81
|
+
\end
|
82
|
+
|
83
|
+
\** 'wh' in old accents/US. Sometimes called 'wine/whine' merger. **\
|
84
|
+
\beg option ancient_voiceless_labiovelar_fricative_wh WH_VLVF_HWESTA_SINDARINWA
|
85
|
+
\value WH_VLVF_HWESTA_SINDARINWA 0
|
86
|
+
\value WH_VLVF_WHINE_MERGER 1
|
87
|
+
\end
|
88
|
+
|
89
|
+
\** SARINCE option when consonants are oriented left **\
|
90
|
+
\beg option s_consonants_l SCONSL_SARINCE_ALWAYS
|
91
|
+
\value SCONSL_SARINCE_NEVER 0
|
92
|
+
\value SCONSL_SARINCE_ALWAYS 1
|
93
|
+
\end
|
94
|
+
|
95
|
+
\** SARINCE option when consonants are oriented right **\
|
96
|
+
\beg option s_consonants_r SCONSR_SARINCE_END_OF_WORD
|
97
|
+
\value SCONSR_SARINCE_NEVER 0
|
98
|
+
\value SCONSR_SARINCE_ALWAYS 1
|
99
|
+
\value SCONSR_SARINCE_END_OF_WORD 2
|
100
|
+
\end
|
101
|
+
|
102
|
+
\** Re-establishment of linking r in non-rhotic accent, ex : 'better life' vs 'betteR answer' **\
|
103
|
+
\beg option linking_r true
|
104
|
+
\visible_when "espeak_voice == ESPEAK_VOICE_EN_TENGWAR_RP || espeak_voice == ESPEAK_VOICE_EN_TENGWAR_GB"
|
105
|
+
\end
|
106
|
+
|
107
|
+
\** Intrusive r, like in vanillaR ice **\
|
108
|
+
\beg option intrusive_r true
|
109
|
+
\visible_when "espeak_voice != ESPEAK_VOICE_EN_TENGWAR_US"
|
110
|
+
\end
|
111
|
+
|
112
|
+
\beg option pre_consonant_n_with_same_articulation_point PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK
|
113
|
+
\value PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_SEPARATE 0
|
114
|
+
\value PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK 1
|
115
|
+
\end
|
116
|
+
|
117
|
+
\** **\
|
118
|
+
\beg option pre_velar_n PRE_VELAR_N_ASSIMILABLE
|
119
|
+
\value PRE_VELAR_N_NON_ASSIMILABLE 0
|
120
|
+
\value PRE_VELAR_N_ASSIMILABLE 1
|
121
|
+
\end
|
122
|
+
|
123
|
+
\** Common elvish / tengwar option **\
|
124
|
+
\beg option consonant_modification_style CONSONANT_MODIFICATION_STYLE_WAVE
|
125
|
+
\value CONSONANT_MODIFICATION_STYLE_WAVE 0
|
126
|
+
\value CONSONANT_MODIFICATION_STYLE_BAR 1
|
127
|
+
\end
|
128
|
+
|
129
|
+
\** Long a like in 'palm' **\
|
130
|
+
\beg option long_back_a LONG_BACK_A_IMPLICIT_CARRIER
|
131
|
+
\radio
|
132
|
+
\value LONG_BACK_A_IMPLICIT_CARRIER 0
|
133
|
+
\value LONG_BACK_A_WITH_CARRIER 1
|
134
|
+
\end
|
135
|
+
|
136
|
+
\** DISABLED : it's always long **\
|
137
|
+
\beg option long_front_e LONG_FRONT_E_DOUBLE_TEHTA
|
138
|
+
\visible_when false
|
139
|
+
\radio
|
140
|
+
\value LONG_FRONT_E_DOUBLE_TEHTA 0
|
141
|
+
\value LONG_FRONT_E_WITH_CARRIER 1
|
142
|
+
\end
|
143
|
+
|
144
|
+
\** DISABLED : it's always long **\
|
145
|
+
\beg option long_back_e LONG_BACK_E_DOUBLE_TEHTA
|
146
|
+
\visible_when false
|
147
|
+
\radio
|
148
|
+
\value LONG_BACK_E_DOUBLE_TEHTA 0
|
149
|
+
\value LONG_BACK_E_WITH_CARRIER 1
|
150
|
+
\end
|
151
|
+
|
152
|
+
\** Long i like in 'fleece' **\
|
153
|
+
\beg option long_i LONG_I_DOUBLE_TEHTA
|
154
|
+
\radio
|
155
|
+
\value LONG_I_DOUBLE_TEHTA 0
|
156
|
+
\value LONG_I_WITH_CARRIER 1
|
157
|
+
\value LONG_I_AS_DIPHTONG 2
|
158
|
+
\end
|
159
|
+
|
160
|
+
\** long o like in 'thought' **\
|
161
|
+
\beg option long_o LONG_O_DOUBLE_TEHTA
|
162
|
+
\radio
|
163
|
+
\value LONG_O_DOUBLE_TEHTA 0
|
164
|
+
\value LONG_O_WITH_CARRIER 1
|
165
|
+
\end
|
166
|
+
|
167
|
+
\** long u like in 'goose' **\
|
168
|
+
\beg option long_u LONG_U_DOUBLE_TEHTA
|
169
|
+
\radio
|
170
|
+
\value LONG_U_DOUBLE_TEHTA 0
|
171
|
+
\value LONG_U_WITH_CARRIER 1
|
172
|
+
\value LONG_U_AS_DIPHTONG 2
|
173
|
+
\end
|
174
|
+
|
175
|
+
\** 'cure', 'cute' diphthong **\
|
176
|
+
\beg option ju_diphthong JU_DIPHTHONG_SEPARATE
|
177
|
+
\radio
|
178
|
+
\value JU_DIPHTHONG_SEPARATE 0
|
179
|
+
\value JU_DIPHTHONG_LIKE_IW 1
|
180
|
+
\end
|
181
|
+
|
182
|
+
\** Horse / Hoarse vowel distinction (only JRRT/US accents) **\
|
183
|
+
\beg option horse_hoarse_merger HORSE_HOARSE_SEPARATE
|
184
|
+
\visible_when "espeak_voice == ESPEAK_VOICE_EN_TENGWAR || espeak_voice == ESPEAK_VOICE_EN_TENGWAR_US"
|
185
|
+
\value HORSE_HOARSE_MERGE 0
|
186
|
+
\value HORSE_HOARSE_SEPARATE 1
|
187
|
+
\end
|
188
|
+
|
189
|
+
\** Cot / Coat vowel distinction, all accents **\
|
190
|
+
\beg option cot_coat_merger COT_COAT_SEPARATE
|
191
|
+
\value COT_COAT_MERGE 0
|
192
|
+
\value COT_COAT_SEPARATE 1
|
193
|
+
\visible_when "espeak_voice == ESPEAK_VOICE_EN_TENGWAR_US"
|
194
|
+
\end
|
195
|
+
|
196
|
+
\** Remove unuseful, natural schwa marks **\
|
197
|
+
\beg option implicit_schwa IMPLICIT_SCHWA_NO
|
198
|
+
\value IMPLICIT_SCHWA_NO 0
|
199
|
+
\value IMPLICIT_SCHWA_YES 1
|
200
|
+
\end
|
201
|
+
|
202
|
+
\** when implicit schwa is on, how to mark non-reducible schwas **\
|
203
|
+
\beg option implicit_schwa_non_reducible IMPLICIT_SCHWA_NON_REDUCIBLE_UNUTIXE_IF_POSSIBLE
|
204
|
+
\value IMPLICIT_SCHWA_NON_REDUCIBLE_UNUTIXE_IF_POSSIBLE 0
|
205
|
+
\value IMPLICIT_SCHWA_NON_REDUCIBLE_ALWAYS_TELCO 1
|
206
|
+
\visible_when "implicit_schwa == IMPLICIT_SCHWA_YES"
|
207
|
+
\end
|
208
|
+
|
209
|
+
\** Schwi, in US/JRRT **\
|
210
|
+
\beg option schwi SCHWI_LIKE_I
|
211
|
+
\radio
|
212
|
+
\value SCHWI_LIKE_I 0
|
213
|
+
\value SCHWI_LIKE_SCHWA 1
|
214
|
+
\visible_when "espeak_voice == ESPEAK_VOICE_EN_TENGWAR || espeak_voice == ESPEAK_VOICE_EN_TENGWAR_US"
|
215
|
+
\end
|
216
|
+
|
217
|
+
\** 'strut' vowel special case **\
|
218
|
+
\beg option open_mid_back_unrounded OMBU_THINNAS
|
219
|
+
\radio
|
220
|
+
\value OMBU_THINNAS 0
|
221
|
+
\value OMBU_GRAVE 1
|
222
|
+
\value OMBU_LIKE_SCHWA 2
|
223
|
+
\end
|
224
|
+
|
225
|
+
\** Common elvish / tengwar option **\
|
226
|
+
\beg option reverse_o_u_tehtar U_UP_O_DOWN
|
227
|
+
\value O_UP_U_DOWN 1
|
228
|
+
\value U_UP_O_DOWN 2
|
229
|
+
\end
|
230
|
+
|
231
|
+
\** Use english standard by default **\
|
232
|
+
\option reverse_numbers false
|
233
|
+
\beg option numbers_base BASE_10
|
234
|
+
\value BASE_10 10
|
235
|
+
\value BASE_12 12
|
236
|
+
\end
|
237
|
+
|
238
|
+
\option auto_spacing true
|
239
|
+
|
240
|
+
\end
|
241
|
+
|
242
|
+
\beg preprocessor
|
243
|
+
\downcase
|
244
|
+
|
245
|
+
\** Remove phonetics accentuation marks **\
|
246
|
+
\rxsubstitute "[ˈˌ]" ""
|
247
|
+
|
248
|
+
\** foreign words nasal a, split to "an" (ex: croissant) **\
|
249
|
+
\rxsubstitute "ɑ̃" "ɑn"
|
250
|
+
|
251
|
+
\** Non rhotic schwa simplification **\
|
252
|
+
\rxsubstitute "ɐ" "ə"
|
253
|
+
|
254
|
+
\if linking_r
|
255
|
+
\rxsubstitute "ɹ‿" "ɹ"
|
256
|
+
\else
|
257
|
+
\rxsubstitute "ɹ‿" ""
|
258
|
+
\endif
|
259
|
+
|
260
|
+
\if intrusive_r
|
261
|
+
\rxsubstitute "ɹ̩‿" "ɹ"
|
262
|
+
\else
|
263
|
+
\rxsubstitute "ɹ̩‿" ""
|
264
|
+
\endif
|
265
|
+
|
266
|
+
\if "schwa_of_to == SCHWA_OF_TO_U"
|
267
|
+
\substitute "ʊ̟" "ʊ"
|
268
|
+
\else
|
269
|
+
\substitute "ʊ̟" "ə"
|
270
|
+
\endif
|
271
|
+
|
272
|
+
\if "pre_velar_n == PRE_VELAR_N_ASSIMILABLE"
|
273
|
+
\rxsubstitute "n‿" "ŋ"
|
274
|
+
\else
|
275
|
+
\rxsubstitute "n‿" "n"
|
276
|
+
\endif
|
277
|
+
|
278
|
+
\** IMPORTANT NOTE : in all following regexps **\
|
279
|
+
\** (^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃]) stands for 'word boundary' **\
|
280
|
+
|
281
|
+
\** 'the' variations **\
|
282
|
+
\** that the **\
|
283
|
+
\rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])(ð[aæ]t)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
|
284
|
+
|
285
|
+
\** of the **\
|
286
|
+
\if "english_the == ENGLISH_THE_EXTENDED_TENGWAR && english_of == ENGLISH_OF_EXTENDED_TENGWAR"
|
287
|
+
\rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])([ɒʌ]v)ð([əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1OFTH\\3\\4"
|
288
|
+
\else
|
289
|
+
\rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])([ɒʌ]v)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
|
290
|
+
\endif
|
291
|
+
|
292
|
+
\** for the **\
|
293
|
+
\rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])(f[ɚə])(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
|
294
|
+
\** with the **\
|
295
|
+
\rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])(wɪð)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
|
296
|
+
\** in the **\
|
297
|
+
\rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])(ɪn)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
|
298
|
+
\** on the **\
|
299
|
+
\rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])([ɒɔ]n)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
|
300
|
+
\** from the **\
|
301
|
+
\rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])(fɹʌm)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
|
302
|
+
\** was the **\
|
303
|
+
\rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])(wʌz)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
|
304
|
+
|
305
|
+
\** Beware of the order of COT/COAT merger and horse/hoarse merger **\
|
306
|
+
\if "cot_coat_merger == COT_COAT_MERGE && espeak_voice == ESPEAK_VOICE_EN_TENGWAR_US"
|
307
|
+
\substitute "oʊ" "ɑː"
|
308
|
+
\endif
|
309
|
+
|
310
|
+
\if "horse_hoarse_merger == HORSE_HOARSE_SEPARATE"
|
311
|
+
\** Re-establish former diphtong **\
|
312
|
+
\substitute "oːɹ" "oʊɹ"
|
313
|
+
\endif
|
314
|
+
|
315
|
+
|
316
|
+
\** If treated as diphthong, change long i to i + schwi **\
|
317
|
+
\if "long_i == LONG_I_AS_DIPHTONG"
|
318
|
+
\substitute "iː" "iɪ"
|
319
|
+
\endif
|
320
|
+
|
321
|
+
\** Experimental, don't affect ju: at beginning of words/after consonnant **\
|
322
|
+
\if "ju_diphthong == JU_DIPHTHONG_LIKE_IW"
|
323
|
+
\rxsubstitute "(juː|jʊ)" "iw"
|
324
|
+
\endif
|
325
|
+
|
326
|
+
\if "long_u == LONG_U_AS_DIPHTONG"
|
327
|
+
\substitute "uː" "uʊ"
|
328
|
+
\endif
|
329
|
+
|
330
|
+
\** ! Beware of the order of the following rules **\
|
331
|
+
\** ! Rhotic schwa : remove 1 level of length when superfluous and always add explicit mark **\
|
332
|
+
\rxsubstitute "[ɜɚ]ː?" "ɜɹ"
|
333
|
+
|
334
|
+
\** ! Potentially remove superfluous added rhotic marks **\
|
335
|
+
\rxsubstitute "ɹ+" "ɹ"
|
336
|
+
|
337
|
+
\** ! Disambiguate ɹ + vowel : ORE/ROMEN **\
|
338
|
+
\rxsubstitute "ɹ([ɑæaeɛʌɐəɜɚiɪᵻoɒɔuʊʘ])" "r\\1"
|
339
|
+
|
340
|
+
\if "implicit_schwa == IMPLICIT_SCHWA_YES"
|
341
|
+
\** All schwas at beginning of words cannot reduce **\
|
342
|
+
\** or after vowels (== not consonant) **\
|
343
|
+
\** beware of ɪ as it can appear as consonant (lawyer) **\
|
344
|
+
\** same for ʊ for sour **\
|
345
|
+
\** Mark non reducing schwa as ʘ **\
|
346
|
+
\rxsubstitute "(^|[^bcdfghjklmnpqrstvwxyzðŋɡɣɹɾʃʍʒʔθɪʊ])([əɐɜɚ])" "\\1ʘ"
|
347
|
+
\rxsubstitute "([əɐɜɚ])r" "ʘr"
|
348
|
+
|
349
|
+
\if "schwi == SCHWI_LIKE_SCHWA"
|
350
|
+
\** Don't forget to mark schwis too **\
|
351
|
+
\rxsubstitute "(^|[^bcdfghjklmnpqrstvwxyzðŋɡɣɹɾʃʍʒʔθɪʊ])([ᵻ])" "\\1ʘ"
|
352
|
+
\rxsubstitute "ᵻr" "ʘr"
|
353
|
+
\endif
|
354
|
+
|
355
|
+
\if "open_mid_back_unrounded == OMBU_LIKE_SCHWA"
|
356
|
+
\** Don't forget to mark the ombus too **\
|
357
|
+
\rxsubstitute "(^|[^bcdfghjklmnpqrstvwxyzðŋɡɣɹɾʃʍʒʔθɪʊ])([ʌ])" "\\1ʘ"
|
358
|
+
\rxsubstitute "ʌr" "ʘr"
|
359
|
+
\endif
|
360
|
+
\endif
|
361
|
+
|
362
|
+
\elvish_numbers "\\eval numbers_base" "\\eval reverse_numbers"
|
363
|
+
|
364
|
+
\if "auto_spacing == true"
|
365
|
+
\rxsubstitute "([^\\s])([.,;:!?])" "\\1 \\2"
|
366
|
+
\rxsubstitute "([.,;:!?])([^\\s])" "\\1 \\2"
|
367
|
+
\endif
|
368
|
+
\end
|
369
|
+
|
370
|
+
|
371
|
+
\beg processor
|
372
|
+
|
373
|
+
\beg rules litteral
|
374
|
+
|
375
|
+
\if "consonant_modification_style == CONSONANT_MODIFICATION_STYLE_WAVE"
|
376
|
+
{GEMINATE} === GEMINATE_SIGN_TILD
|
377
|
+
{NASAL} === NASALIZE_SIGN_TILD
|
378
|
+
\else
|
379
|
+
{GEMINATE} === GEMINATE_SIGN
|
380
|
+
{NASAL} === NASALIZE_SIGN
|
381
|
+
\endif
|
382
|
+
|
383
|
+
\** sa-rinci for left-oriented tengwar **\
|
384
|
+
\if "s_consonants_l == SCONSL_SARINCE_ALWAYS"
|
385
|
+
{LWS} === [{NULL} * (s,z)]
|
386
|
+
{_LWS_} === [{NULL} * SARINCE]
|
387
|
+
{__LWSX__} === 2,1,3
|
388
|
+
\else
|
389
|
+
{LWS} === {NULL}
|
390
|
+
{_LWS_} === {NULL}
|
391
|
+
{__LWSX__} === 2,1
|
392
|
+
\endif
|
393
|
+
|
394
|
+
\** sa-rinci for right-oriented tengwar **\
|
395
|
+
\if "s_consonants_r == SCONSR_SARINCE_ALWAYS"
|
396
|
+
{RWS} === [{NULL} * (s,z)]
|
397
|
+
{_RWS_} === [{NULL} * SARINCE]
|
398
|
+
{__RWSX__} === 2,1,3
|
399
|
+
\elsif "s_consonants_r == SCONSR_SARINCE_END_OF_WORD"
|
400
|
+
{RWS} === [{NULL} * (s_,z_)]
|
401
|
+
{_RWS_} === [{NULL} * SARINCE]
|
402
|
+
{__RWSX__} === 2,1,3
|
403
|
+
\else
|
404
|
+
{RWS} === {NULL}
|
405
|
+
{_RWS_} === {NULL}
|
406
|
+
{__RWSX__} === 2,1
|
407
|
+
\endif
|
408
|
+
|
409
|
+
\if "reverse_o_u_tehtar == U_UP_O_DOWN"
|
410
|
+
{O_LOOP} === O_TEHTA
|
411
|
+
{O_LOOP_DOUBLE} === O_TEHTA_DOUBLE
|
412
|
+
{U_LOOP} === U_TEHTA
|
413
|
+
{U_LOOP_DOUBLE} === U_TEHTA_DOUBLE
|
414
|
+
\else
|
415
|
+
{O_LOOP} === U_TEHTA
|
416
|
+
{O_LOOP_DOUBLE} === U_TEHTA_DOUBLE
|
417
|
+
{U_LOOP} === O_TEHTA
|
418
|
+
{U_LOOP_DOUBLE} === O_TEHTA_DOUBLE
|
419
|
+
\endif
|
420
|
+
|
421
|
+
\** schwas : ɐ,ə **\
|
422
|
+
\** rothic shwa : ɚ **\
|
423
|
+
\** schwi : ᵻ **\
|
424
|
+
\** schwu : ʌ **\
|
425
|
+
|
426
|
+
{IGROUP} === i,ɪ
|
427
|
+
{UGROUP} === u,ʊ
|
428
|
+
{EBGROUP} === ə,ɐ \** REDUCIBLE **\
|
429
|
+
|
430
|
+
{SCHWA_NON_REDUCIBLE} === ʘ \** NON REDUCIBLE **\
|
431
|
+
{ESCHWA} === (ə,ʘ) \** REDUCIBLE & NON REDUCIBLE E SCHWA **\
|
432
|
+
|
433
|
+
\if "schwi == SCHWI_LIKE_I"
|
434
|
+
{IGROUP} === {IGROUP},ᵻ
|
435
|
+
\else
|
436
|
+
{EBGROUP} === {EBGROUP},ᵻ
|
437
|
+
\endif
|
438
|
+
|
439
|
+
{W_OMBU_GROUP} === {NULL}
|
440
|
+
{_W_OMBU_GROUP_} === {NULL}
|
441
|
+
\if "open_mid_back_unrounded == OMBU_GRAVE"
|
442
|
+
{W_OMBU_GROUP} === * (ʌ)
|
443
|
+
{_W_OMBU_GROUP_} === * E_TEHTA_GRAVE
|
444
|
+
\elsif "open_mid_back_unrounded == OMBU_THINNAS"
|
445
|
+
{W_OMBU_GROUP} === * (ʌ)
|
446
|
+
{_W_OMBU_GROUP_} === * THINNAS
|
447
|
+
\else
|
448
|
+
{EBGROUP} === {EBGROUP},ʌ
|
449
|
+
\endif
|
450
|
+
|
451
|
+
{A_FRONT} === (æ,a) \** Always short **\
|
452
|
+
{A_BACK} === (ɑ) \** Always long **\
|
453
|
+
{E_FRONT} === (e,ɛ)
|
454
|
+
{E_BACK} === ({EBGROUP})
|
455
|
+
{E_BACK_RHOTIC} === (ɚ,ɜ) \** Rhotic schwas are treated independently **\
|
456
|
+
{I} === ({IGROUP})
|
457
|
+
{O} === (o,ɒ,ɔ) \** force, mock, lord **\
|
458
|
+
{U} === ({UGROUP})
|
459
|
+
|
460
|
+
|
461
|
+
{AA_FRONT} === {A_FRONT}ː \** long front a probably does not exist **\
|
462
|
+
{AA_BACK} === {A_BACK}ː
|
463
|
+
{EE_FRONT} === {E_FRONT}ː
|
464
|
+
{EE_BACK} === {E_BACK}ː \** long back e probably does not exist when not rhotic **\
|
465
|
+
{EE_BACK_RHOTIC} === {E_BACK_RHOTIC}ː
|
466
|
+
{II} === {I}ː
|
467
|
+
{OO} === {O}ː
|
468
|
+
{UU} === {U}ː
|
469
|
+
|
470
|
+
{W_SCHWA_NON_REDUCIBLE} === {NULL}
|
471
|
+
{_W_SCHWA_NON_REDUCIBLE_} === {NULL}
|
472
|
+
|
473
|
+
\if "implicit_schwa == IMPLICIT_SCHWA_YES"
|
474
|
+
{_IMPLICIT_SCHWA_} === {NULL}
|
475
|
+
\if "implicit_schwa_non_reducible == IMPLICIT_SCHWA_NON_REDUCIBLE_UNUTIXE_IF_POSSIBLE"
|
476
|
+
{W_SCHWA_NON_REDUCIBLE} === * {SCHWA_NON_REDUCIBLE}
|
477
|
+
{_W_SCHWA_NON_REDUCIBLE_} === * UNUTIXE
|
478
|
+
\endif
|
479
|
+
\else
|
480
|
+
{_IMPLICIT_SCHWA_} === UNUTIXE
|
481
|
+
\endif
|
482
|
+
|
483
|
+
\** GB DIPHTONGS **\
|
484
|
+
\** +dˈeɪ +skˈaɪ +bˈɔɪ +bˈiə +bˈeə +tˈʊə +ɡˌəʊ +kˈaʊ **\
|
485
|
+
\** US DIPHTONGS **\
|
486
|
+
\** =dˈeɪ =skˈaɪ =bˈɔɪ -bˈɪɹ -bˈɛɹ -tˈʊɹ +ɡˌoʊ =kˈaʊ **\
|
487
|
+
|
488
|
+
\** U Diphthongs **\
|
489
|
+
{AW} === aʊ \** cow **\
|
490
|
+
{OW} === oʊ \** US most / mˈoʊst **\
|
491
|
+
{EW} === {ESCHWA}ʊ \** GB go **\
|
492
|
+
{UW} === uʊ \** goose if pronconced with labializing accent ... we don't have this in our pronunciations **\
|
493
|
+
|
494
|
+
\** I Diphtongues : eɪ (day) / aɪ (sky) / ɔɪ (boy) **\
|
495
|
+
{AJ} === aɪ \** nine / nˈaɪn **\
|
496
|
+
{EJ} === eɪ \** game / ɡˈeɪm **\
|
497
|
+
{OJ} === ɔɪ \** boy **\
|
498
|
+
{IJ} === iɪ \** fleece if prononced with palatalising accent **\
|
499
|
+
|
500
|
+
\** ə diphthongs : iə (GB : beer) / eə (GB: bear) / ʊə (US: tour) **\
|
501
|
+
{IER} === i{ESCHWA} \** GB Beer **\
|
502
|
+
{EAR} === e{ESCHWA} \** GB Bear **\
|
503
|
+
{UER} === ʊ{ESCHWA} \** GB Tour **\
|
504
|
+
|
505
|
+
{VOWELS} === {A_BACK} * {A_FRONT} * {E_FRONT} * {E_BACK} * {E_BACK_RHOTIC} * {IER} * {EAR} * {UER} * {I} * {O} * {U} {W_SCHWA_NON_REDUCIBLE} {W_OMBU_GROUP}
|
506
|
+
{TEHTAR} === A_TEHTA * A_TEHTA_REVERSED * E_TEHTA * {_IMPLICIT_SCHWA_} * {_IMPLICIT_SCHWA_} * UNUTIXE I_TEHTA * UNUTIXE E_TEHTA * UNUTIXE {U_LOOP} * I_TEHTA * {O_LOOP} * {U_LOOP} {_W_SCHWA_NON_REDUCIBLE_} {_W_OMBU_GROUP_}
|
507
|
+
|
508
|
+
{LVOWELS} === {AA_BACK} * {AA_FRONT} * {EE_FRONT} * {EE_BACK} * {EE_BACK_RHOTIC} * {II} * {OO} * {UU}
|
509
|
+
|
510
|
+
{DIPHTHONGS_R} === {AW} * {OW} * {EW} * {UW} * {AJ} * {EJ} * {OJ} * {IJ}
|
511
|
+
{_DIPHTHONGS_R_} === VALA A_TEHTA * VALA {O_LOOP} * VALA UNUTIXE * VALA {U_LOOP} * ANNA A_TEHTA * ANNA E_TEHTA * ANNA {O_LOOP} * ANNA I_TEHTA
|
512
|
+
|
513
|
+
{DIPHTHONGS} === {DIPHTHONGS_R}
|
514
|
+
{_DIPHTHONGS_} === {_DIPHTHONGS_R_}
|
515
|
+
|
516
|
+
{WLONG} === {NULL} \** long vowels that can be used as tehtar **\
|
517
|
+
{_WLONG_} === {NULL} \** tehtar of long vowels that can be used as tehtar **\
|
518
|
+
|
519
|
+
\** LV : Initialization step 1 **\
|
520
|
+
{_LONG_A_BACK_} === ARA A_TEHTA
|
521
|
+
{_LONG_A_FRONT_} === ARA A_TEHTA_REVERSED \** Should not be possible in English **\
|
522
|
+
{_LONG_E_FRONT_} === ARA E_TEHTA
|
523
|
+
{_LONG_E_BACK_} === ARA UNUTIXE \** PROBLEM (solved) : ara and unutixe don't work together. But this case will not appear : long back e is not possible when not rhotic. **\
|
524
|
+
{_LONG_E_BACK_RHOTIC_} === ARA UNUTIXE \** PROBLEM (solved) : ara and unutixe don't work together. But this case will not appear : simplified by prepro **\
|
525
|
+
{_LONG_I_} === ARA I_TEHTA
|
526
|
+
{_LONG_O_} === ARA {O_LOOP}
|
527
|
+
{_LONG_U_} === ARA {U_LOOP}
|
528
|
+
|
529
|
+
\** LV : Initialization step 2 **\
|
530
|
+
{_LONE_LONG_A_BACK_} === {_LONG_A_BACK_}
|
531
|
+
{_LONE_LONG_A_FRONT_} === {_LONG_A_FRONT_}
|
532
|
+
{_LONE_LONG_E_FRONT_} === {_LONG_E_FRONT_}
|
533
|
+
{_LONE_LONG_E_BACK_} === {_LONG_E_BACK_}
|
534
|
+
{_LONE_LONG_E_BACK_RHOTIC_} === {_LONG_E_BACK_RHOTIC_}
|
535
|
+
{_LONE_LONG_I_} === {_LONG_I_}
|
536
|
+
{_LONE_LONG_O_} === {_LONG_O_}
|
537
|
+
{_LONE_LONG_U_} === {_LONG_U_}
|
538
|
+
|
539
|
+
\if "long_back_a == LONG_BACK_A_IMPLICIT_CARRIER"
|
540
|
+
\** Remove carrier and use A_TEHTA as if it was a double tehta **\
|
541
|
+
{_LONG_A_BACK_} === A_TEHTA
|
542
|
+
{_LONE_LONG_E_FRONT_} === TELCO {_LONG_A_BACK_}
|
543
|
+
{WLONG} === {WLONG} * {AA_BACK}
|
544
|
+
{_WLONG_} === {_WLONG_} * {_LONG_A_BACK_}
|
545
|
+
\endif
|
546
|
+
|
547
|
+
\if "long_front_e == LONG_FRONT_E_DOUBLE_TEHTA"
|
548
|
+
\** Does not exist in standard accents **\
|
549
|
+
{_LONG_E_FRONT_} === E_TEHTA_DOUBLE
|
550
|
+
{_LONE_LONG_E_FRONT_} === TELCO {_LONG_E_FRONT_}
|
551
|
+
{WLONG} === {WLONG} * {EE_FRONT}
|
552
|
+
{_WLONG_} === {_WLONG_} * {_LONG_E_FRONT_}
|
553
|
+
\endif
|
554
|
+
|
555
|
+
\if "long_back_e == LONG_BACK_E_DOUBLE_TEHTA"
|
556
|
+
\** This case should not be possible when not rhotic. **\
|
557
|
+
{_LONG_E_BACK_} === I_TEHTA_DOUBLE_INF
|
558
|
+
{_LONE_LONG_E_BACK_} === TELCO {_LONG_E_BACK_}
|
559
|
+
{WLONG} === {WLONG} * {EE_BACK}
|
560
|
+
{_WLONG_} === {_WLONG_} * {_LONG_E_BACK_}
|
561
|
+
\endif
|
562
|
+
|
563
|
+
\if "long_i == LONG_I_DOUBLE_TEHTA"
|
564
|
+
{_LONG_I_} === I_TEHTA_DOUBLE
|
565
|
+
{_LONE_LONG_I_} === TELCO {_LONG_I_}
|
566
|
+
{WLONG} === {WLONG} * {II}
|
567
|
+
{_WLONG_} === {_WLONG_} * {_LONG_I_}
|
568
|
+
\endif
|
569
|
+
|
570
|
+
\if "long_o == LONG_O_DOUBLE_TEHTA"
|
571
|
+
{_LONG_O_} === {O_LOOP_DOUBLE}
|
572
|
+
{_LONE_LONG_O_} === TELCO {_LONG_O_}
|
573
|
+
{WLONG} === {WLONG} * {OO}
|
574
|
+
{_WLONG_} === {_WLONG_} * {_LONG_O_}
|
575
|
+
\endif
|
576
|
+
|
577
|
+
\if "long_u == LONG_U_DOUBLE_TEHTA"
|
578
|
+
{_LONG_U_} === {U_LOOP_DOUBLE}
|
579
|
+
{_LONE_LONG_U_} === TELCO {_LONG_U_}
|
580
|
+
{WLONG} === {WLONG} * {UU}
|
581
|
+
{_WLONG_} === {_WLONG_} * {_LONG_U_}
|
582
|
+
\endif
|
583
|
+
|
584
|
+
\** Define a variable for the images of all long vowels **\
|
585
|
+
{_LONE_LONG_VOWELS_} === {_LONE_LONG_A_BACK_} * {_LONE_LONG_A_FRONT_} * {_LONE_LONG_E_FRONT_} * {_LONE_LONG_E_BACK_} * {_LONE_LONG_E_BACK_RHOTIC_} * {_LONE_LONG_I_} * {_LONE_LONG_O_} * {_LONE_LONG_U_}
|
586
|
+
|
587
|
+
{V_D} === [ {VOWELS} {WLONG} ]
|
588
|
+
{V_D_WN} === [ {VOWELS} {WLONG} * {NULL} ]
|
589
|
+
|
590
|
+
{_V_D_} === [ {TEHTAR} {_WLONG_} ]
|
591
|
+
{_V_D_WN_} === [ {TEHTAR} {_WLONG_} * {NULL} ]
|
592
|
+
|
593
|
+
\** Vowel rules **\
|
594
|
+
[{VOWELS}] --> TELCO [{TEHTAR}] \** Replace isolated short vowels **\
|
595
|
+
[{DIPHTHONGS_R}]{RWS} --> [{_DIPHTHONGS_R_}]{_RWS_} \** Replace diphthongs **\
|
596
|
+
|
597
|
+
\if "implicit_schwa_non_reducible == IMPLICIT_SCHWA_NON_REDUCIBLE_ALWAYS_TELCO"
|
598
|
+
ʘ --> TELCO
|
599
|
+
\endif
|
600
|
+
|
601
|
+
\** LONE LONG VOWELS **\
|
602
|
+
[{LVOWELS}] --> [{_LONE_LONG_VOWELS_}]
|
603
|
+
|
604
|
+
{_WH_} === HWESTA_SINDARINWA
|
605
|
+
\if "ancient_voiceless_labiovelar_fricative_wh == WH_VLVF_WHINE_MERGER"
|
606
|
+
{_WH_} === VALA
|
607
|
+
\endif
|
608
|
+
|
609
|
+
\beg macro serie_l ARG_SL _ARG_SL_
|
610
|
+
{V_D_WN}[{ARG_SL}]{LWS} --> {__LWSX__} --> [{_ARG_SL_}]{_V_D_WN_}{_LWS_}
|
611
|
+
\end
|
612
|
+
\beg macro serie_ln ARG_SLN _ARG_SLN_
|
613
|
+
{V_D_WN}[{ARG_SLN}]{LWS} --> {__LWSX__} --> [{_ARG_SLN_}]{NASAL}{_V_D_WN_}{_LWS_}
|
614
|
+
\end
|
615
|
+
\beg macro serie_r ARG_SR _ARG_SR_
|
616
|
+
{V_D_WN}[{ARG_SR}]{RWS} --> {__RWSX__} --> [{_ARG_SR_}]{_V_D_WN_}{_RWS_}
|
617
|
+
\end
|
618
|
+
\beg macro serie_rn ARG_SRN _ARG_SRN_
|
619
|
+
{V_D_WN}[{ARG_SRN}]{RWS} --> {__RWSX__} --> [{_ARG_SRN_}]{NASAL}{_V_D_WN_}{_RWS_}
|
620
|
+
\end
|
621
|
+
|
622
|
+
\** Nasal + Conditional macro **\
|
623
|
+
\beg macro serie_lnc ARG_SLN_COND _ARG_SLN_COND_
|
624
|
+
\if "pre_consonant_n_with_same_articulation_point == PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK"
|
625
|
+
\deploy serie_ln {ARG_SLN_COND} {_ARG_SLN_COND_}
|
626
|
+
\endif
|
627
|
+
\end
|
628
|
+
|
629
|
+
\** Nasal + Conditional macro **\
|
630
|
+
\beg macro serie_rnc ARG_SRN_COND _ARG_SRN_COND_
|
631
|
+
\if "pre_consonant_n_with_same_articulation_point == PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK"
|
632
|
+
\deploy serie_rn {ARG_SRN_COND} {_ARG_SRN_COND_}
|
633
|
+
\endif
|
634
|
+
\end
|
635
|
+
|
636
|
+
\beg macro serie ARG_L ARG_R ARG_LN ARG_RN _ARG_L_ _ARG_R_
|
637
|
+
\deploy serie_l {ARG_L} {_ARG_L_}
|
638
|
+
\deploy serie_r {ARG_R} {_ARG_R_}
|
639
|
+
\deploy serie_lnc {ARG_LN} {_ARG_L_}
|
640
|
+
\deploy serie_rnc {ARG_RN} {_ARG_R_}
|
641
|
+
\end
|
642
|
+
|
643
|
+
\** ----------------------------------------------------------- **\
|
644
|
+
{L1R} === (t,ɾ,ʔ) * p
|
645
|
+
{L1L} === tʃ * k
|
646
|
+
{L1R_NASAL} === n(t,ɾ,ʔ) * mp
|
647
|
+
{L1L_NASAL} === ntʃ * ŋk
|
648
|
+
{_L1R_} === TINCO * PARMA
|
649
|
+
{_L1L_} === CALMA * QUESSE
|
650
|
+
|
651
|
+
\deploy serie {L1L} {L1R} {L1L_NASAL} {L1R_NASAL} {_L1L_} {_L1R_}
|
652
|
+
|
653
|
+
\** ----------------------------------------------------------- **\
|
654
|
+
{L2R} === d * b
|
655
|
+
{L2L} === dʒ * (ɡ,g)
|
656
|
+
{L2R_NASAL} === nd * mb
|
657
|
+
{L2L_NASAL} === ndʒ * ŋ(ɡ,g)
|
658
|
+
{_L2R_} === ANDO * UMBAR
|
659
|
+
{_L2L_} === ANGA * UNGWE
|
660
|
+
|
661
|
+
\deploy serie {L2L} {L2R} {L2L_NASAL} {L2R_NASAL} {_L2L_} {_L2R_}
|
662
|
+
|
663
|
+
\** ----------------------------------------------------------- **\
|
664
|
+
{L3R} === θ * f * ʃ * x
|
665
|
+
{L3R_NASAL} === nθ * mf * nʃ * ŋx
|
666
|
+
{_L3R_} === SULE * FORMEN * AHA * HWESTA
|
667
|
+
|
668
|
+
\deploy serie_r {L3R} {_L3R_}
|
669
|
+
\deploy serie_rnc {L3R_NASAL} {_L3R_}
|
670
|
+
|
671
|
+
\** ----------------------------------------------------------- **\
|
672
|
+
{L4R} === ð * v * ʒ * ɣ
|
673
|
+
{L4R_NASAL} === nð * mv * nʒ * ŋɣ
|
674
|
+
{_L4R_} === ANTO * AMPA * ANCA * UNQUE
|
675
|
+
|
676
|
+
\deploy serie_r {L4R} {_L4R_}
|
677
|
+
\deploy serie_rnc {L4R_NASAL} {_L4R_}
|
678
|
+
|
679
|
+
\** ----------------------------------------------------------- **\
|
680
|
+
{L5R} === (n,n̩) * m * n(j,J) * ŋ
|
681
|
+
{_L5R_} === NUMEN * MALTA * NOLDO * NWALME
|
682
|
+
|
683
|
+
\** no nasals for this serie **\
|
684
|
+
\deploy serie_r {L5R} {_L5R_}
|
685
|
+
|
686
|
+
\** ----------------------------------------------------------- **\
|
687
|
+
{L6R} === w
|
688
|
+
{L6R_NASAL} === nw
|
689
|
+
{_L6R_} === VALA
|
690
|
+
|
691
|
+
{L6R_NN} === (j,J)
|
692
|
+
{_L6R_NN_} === ANNA \** ORE for rhoticized schwas **\
|
693
|
+
|
694
|
+
\deploy serie_r {L6R} {_L6R_}
|
695
|
+
\deploy serie_r {L6R_NN} {_L6R_NN_}
|
696
|
+
\deploy serie_rnc {L6R_NASAL} {_L6R_}
|
697
|
+
|
698
|
+
\** ----------------------------------------------------------- **\
|
699
|
+
{L7R} === r * ɹ * l
|
700
|
+
{_L7R_} === ROMEN * ORE * LAMBE \** ARDA / ALDA **\
|
701
|
+
|
702
|
+
\deploy serie_r {L7R} {_L7R_}
|
703
|
+
|
704
|
+
\** ----------------------------------------------------------- **\
|
705
|
+
{L8} === s * z
|
706
|
+
{L8_NASAL} === ns * nz
|
707
|
+
{_L8_} === SILME_NUQUERNA * ESSE_NUQUERNA
|
708
|
+
|
709
|
+
{V_D_WN}[{L8}] --> 2,1 --> [{_L8_}]{_V_D_WN_}
|
710
|
+
\if "s_consonants_r != SCONSR_SARINCE_ALWAYS && pre_consonant_n_with_same_articulation_point == PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK"
|
711
|
+
\** Avoid clash between nasal sign and sa rince **\
|
712
|
+
{V_D_WN}[{L8_NASAL}] --> 2,1 --> [{_L8_}]{NASAL}{_V_D_WN_}
|
713
|
+
\endif
|
714
|
+
|
715
|
+
\** Single s/z : overload **\
|
716
|
+
s --> SILME
|
717
|
+
z --> ESSE
|
718
|
+
|
719
|
+
ns --> SILME_NUQUERNA {NASAL} \** Explicitly redefined for clarity (already defined in the nasal rule above) **\
|
720
|
+
nz --> ESSE_NUQUERNA {NASAL} \** Explicitly redefined for clarity (already defined in the nasal rule above) **\
|
721
|
+
|
722
|
+
\** ----------------------------------------------------------- **\
|
723
|
+
{L9} === h * ʍ
|
724
|
+
{_L9_} === HYARMEN * {_WH_} \** YANTA / URE **\
|
725
|
+
|
726
|
+
{V_D_WN}[{L9}] --> 2,1 --> [{_L9_}]{_V_D_WN_}
|
727
|
+
|
728
|
+
\** -- SPECIAL TOKENS **\
|
729
|
+
|
730
|
+
\if "english_the == ENGLISH_THE_EXTENDED_TENGWAR"
|
731
|
+
_ð{ESCHWA}_ --> TW_EXT_21
|
732
|
+
_ðɪ_ --> TW_EXT_21 I_TEHTA
|
733
|
+
\endif
|
734
|
+
|
735
|
+
\if "english_of == ENGLISH_OF_EXTENDED_TENGWAR"
|
736
|
+
_(ɒ,ʌ)v_ --> TW_EXT_22
|
737
|
+
\endif
|
738
|
+
|
739
|
+
\if "english_the == ENGLISH_THE_EXTENDED_TENGWAR && english_of == ENGLISH_OF_EXTENDED_TENGWAR"
|
740
|
+
_OFTH{ESCHWA}_ --> TW_EXT_22 {GEMINATE}
|
741
|
+
_OFTHɪ_ --> TW_EXT_22 {GEMINATE} I_TEHTA
|
742
|
+
\endif
|
743
|
+
\end
|
744
|
+
|
745
|
+
\beg rules punctuation
|
746
|
+
. --> PUNCT_DDOT
|
747
|
+
.. --> PUNCT_DOT PUNCT_DDOT PUNCT_DOT
|
748
|
+
... --> PUNCT_TILD
|
749
|
+
… --> PUNCT_TILD
|
750
|
+
.... --> PUNCT_TILD
|
751
|
+
..... --> PUNCT_TILD
|
752
|
+
...... --> PUNCT_TILD
|
753
|
+
....... --> PUNCT_TILD
|
754
|
+
|
755
|
+
, --> PUNCT_DOT
|
756
|
+
: --> PUNCT_DOT
|
757
|
+
; --> PUNCT_DOT
|
758
|
+
! --> PUNCT_EXCLAM
|
759
|
+
? --> PUNCT_INTERR
|
760
|
+
· --> {NULL}
|
761
|
+
|
762
|
+
- --> {NULL}
|
763
|
+
– --> PUNCT_TILD
|
764
|
+
— --> PUNCT_TILD
|
765
|
+
|
766
|
+
\** Apostrophe **\
|
767
|
+
|
768
|
+
' --> {NULL}
|
769
|
+
’ --> {NULL}
|
770
|
+
|
771
|
+
\** NBSP **\
|
772
|
+
{NBSP} --> NBSP
|
773
|
+
|
774
|
+
\** Quotes **\
|
775
|
+
|
776
|
+
“ --> DQUOT_OPEN
|
777
|
+
” --> DQUOT_CLOSE
|
778
|
+
« --> DQUOT_OPEN
|
779
|
+
» --> DQUOT_CLOSE
|
780
|
+
|
781
|
+
[ --> PUNCT_PAREN_L
|
782
|
+
] --> PUNCT_PAREN_R
|
783
|
+
( --> PUNCT_PAREN_L
|
784
|
+
) --> PUNCT_PAREN_R
|
785
|
+
{ --> PUNCT_PAREN_L
|
786
|
+
} --> PUNCT_PAREN_R
|
787
|
+
< --> PUNCT_PAREN_L
|
788
|
+
> --> PUNCT_PAREN_R
|
789
|
+
|
790
|
+
\** Not universal between fonts ... **\
|
791
|
+
$ --> BOOKMARK_SIGN
|
792
|
+
≤ --> RING_MARK_L \** Ring inscription left beautiful stuff **\
|
793
|
+
≥ --> RING_MARK_R \** Ring inscription right beautiful stuff **\
|
794
|
+
\end
|
795
|
+
|
796
|
+
\beg rules numbers
|
797
|
+
0 --> NUM_0
|
798
|
+
1 --> NUM_1
|
799
|
+
2 --> NUM_2
|
800
|
+
3 --> NUM_3
|
801
|
+
4 --> NUM_4
|
802
|
+
5 --> NUM_5
|
803
|
+
6 --> NUM_6
|
804
|
+
7 --> NUM_7
|
805
|
+
8 --> NUM_8
|
806
|
+
9 --> NUM_9
|
807
|
+
A --> NUM_10
|
808
|
+
B --> NUM_11
|
809
|
+
\end
|
810
|
+
\end
|
811
|
+
|
812
|
+
\beg postprocessor
|
813
|
+
\resolve_virtuals
|
814
|
+
\end
|