glaemscribe 1.2.0 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (33) hide show
  1. checksums.yaml +4 -4
  2. data/bin/glaemscribe +2 -2
  3. data/glaemresources/charsets/cirth_ds.cst +514 -179
  4. data/glaemresources/charsets/eldamar.cst +210 -0
  5. data/glaemresources/charsets/tengwar_ds_annatar.cst +2452 -130
  6. data/glaemresources/charsets/tengwar_ds_eldamar.cst +2319 -125
  7. data/glaemresources/charsets/tengwar_ds_elfica.cst +2317 -126
  8. data/glaemresources/charsets/tengwar_ds_parmaite.cst +2319 -127
  9. data/glaemresources/charsets/tengwar_ds_sindarin.cst +2318 -127
  10. data/glaemresources/charsets/tengwar_freemono.cst +1 -1
  11. data/glaemresources/charsets/tengwar_guni_annatar.cst +2451 -131
  12. data/glaemresources/charsets/tengwar_guni_eldamar.cst +2317 -126
  13. data/glaemresources/charsets/tengwar_guni_elfica.cst +2316 -127
  14. data/glaemresources/charsets/tengwar_guni_parmaite.cst +2319 -127
  15. data/glaemresources/charsets/tengwar_guni_sindarin.cst +2317 -126
  16. data/glaemresources/charsets/tengwar_telcontar.cst +7 -0
  17. data/glaemresources/modes/blackspeech-tengwar-general_use.glaem +1 -1
  18. data/glaemresources/modes/english-cirth-espeak.glaem +687 -0
  19. data/glaemresources/modes/english-tengwar-espeak.glaem +814 -0
  20. data/glaemresources/modes/japanese-tengwar.glaem +9 -4
  21. data/glaemresources/modes/lang_belta-tengwar-dadef.glaem +248 -0
  22. data/glaemresources/modes/raw-cirth.glaem +154 -0
  23. data/lib/api/charset_parser.rb +7 -1
  24. data/lib/api/mode.rb +35 -10
  25. data/lib/api/mode_parser.rb +21 -12
  26. data/lib/api/post_processor/outspace.rb +44 -0
  27. data/lib/api/rule_group.rb +1 -1
  28. data/lib/api/transcription_pre_post_processor.rb +8 -5
  29. data/lib/api/transcription_processor.rb +12 -9
  30. data/lib/glaemscribe.rb +2 -0
  31. data/lib_espeak/espeakng.for.glaemscribe.nowasm.sync.js +25 -11
  32. data/lib_espeak/glaemscribe_tts.js +363 -223
  33. metadata +12 -6
@@ -0,0 +1,687 @@
1
+ \**
2
+
3
+ Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ the transcription of texts between writing systems, and more
5
+ specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ invented languages to some of his devised writing systems.
7
+
8
+ Copyright (C) 2015 Benjamin Babut (Talagan).
9
+
10
+ This program is free software: you can redistribute it and/or modify
11
+ it under the terms of the GNU Affero General Public License as published by
12
+ the Free Software Foundation, either version 3 of the License, or
13
+ any later version.
14
+
15
+ This program is distributed in the hope that it will be useful,
16
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ GNU Affero General Public License for more details.
19
+
20
+ You should have received a copy of the GNU Affero General Public License
21
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ **\
24
+
25
+ \beg changelog
26
+ \entry "0.0.1" "First version."
27
+ \end
28
+
29
+ \language "English"
30
+ \writing "Cirth"
31
+ \mode "English Angerthas based on Angerthas Daeron"
32
+ \version "0.0.1"
33
+ \authors "J.R.R. Tolkien, impl. Talagan (Benjamin Babut) with extrapolations"
34
+
35
+ \world primary_related_to_arda
36
+ \invention jrrt
37
+
38
+ \charset cirth_ds true
39
+
40
+ \raw_mode "raw-cirth"
41
+
42
+ \outspace CIRTH_SPACE
43
+
44
+ \beg options
45
+
46
+ \** ENGLISH accent/dialect/variant. It also controls espeak behaviour. **\
47
+ \beg option espeak_voice ESPEAK_VOICE_EN_TENGWAR
48
+ \value ESPEAK_VOICE_EN_TENGWAR 0
49
+ \value ESPEAK_VOICE_EN_TENGWAR_GB 1
50
+ \value ESPEAK_VOICE_EN_TENGWAR_RP 2
51
+ \value ESPEAK_VOICE_EN_TENGWAR_US 3
52
+ \end
53
+
54
+ \** ----------Special words ---------- **\
55
+
56
+ \** 'the' word **\
57
+ \beg option english_the ENGLISH_THE_EXTENDED_CIRTH
58
+ \value ENGLISH_THE_EXTENDED_CIRTH 0
59
+ \value ENGLISH_THE_FULL_WRITING 1
60
+ \radio
61
+ \end
62
+
63
+ \** 'and' word may be represented by a special cirth **\
64
+ \beg option english_and ENGLISH_AND_EXTENDED_CIRTH
65
+ \value ENGLISH_AND_EXTENDED_CIRTH 0
66
+ \value ENGLISH_AND_FULL_WRITING 1
67
+ \radio
68
+ \end
69
+
70
+ \** 'to' word (the word 'to' may have its vowel reduced to a schwa) **\
71
+ \beg option schwa_of_to SCHWA_OF_TO_U
72
+ \value SCHWA_OF_TO_U 0
73
+ \value SCHWA_OF_TO_SCHWA 1
74
+ \end
75
+
76
+
77
+ \** ---------- Vowel options ---------- **\
78
+
79
+ \** Long i like in 'fleece' **\
80
+ \beg option long_i LONG_I_AS_DIPHTONG
81
+ \radio
82
+ \value LONG_I_DOUBLE_CIRTH 0
83
+ \value LONG_I_AS_DIPHTONG 1
84
+ \end
85
+
86
+ \** long u like in 'goose' **\
87
+ \** We render it either as the long vowel given in the Angerthas table **\
88
+ \** Or (extrapolation) we use an optional Cirth_45_alt that could look like **\
89
+ \** a /u+w/ because if's visually a mix of u and w **\
90
+ \beg option long_u LONG_U_AS_LONG_VOWEL
91
+ \radio
92
+ \value LONG_U_AS_LONG_VOWEL 0
93
+ \value LONG_U_AS_DIPHTONG 1
94
+ \end
95
+
96
+ \** ---------- Schwa options -------------- **\
97
+
98
+ \** Remove unuseful, natural schwa marks **\
99
+ \option implicit_schwa false
100
+
101
+ \beg option non_implicit_schwa_method NON_IMPLICIT_SCHWA_DIFFERENCIATE_REDUCIBLE
102
+ \value NON_IMPLICIT_SCHWA_DIFFERENCIATE_REDUCIBLE 0
103
+ \value NON_IMPLICIT_SCHWA_ALL_WITH_VERTICAL_BAR 1
104
+ \value NON_IMPLICIT_SCHWA_ALL_AS_ACCENTS 2
105
+ \visible_when "implicit_schwa == false"
106
+ \end
107
+
108
+ \beg option non_reducible_schwa_remaining NON_REDUCIBLE_SCHWA_REMAINING_AS_ACCENTS
109
+ \visible_when implicit_schwa
110
+ \value NON_REDUCIBLE_SCHWA_REMAINING_AS_VERTICAL_BARS 0
111
+ \value NON_REDUCIBLE_SCHWA_REMAINING_AS_ACCENTS 1
112
+ \end
113
+
114
+ \** Schwi, in US/JRRT **\
115
+ \beg option schwi SCHWI_SMALL_BAR
116
+ \radio
117
+ \value SCHWI_SMALL_BAR 0
118
+ \value SCHWI_LIKE_I 1
119
+ \value SCHWI_LIKE_SCHWA 2
120
+ \visible_when "espeak_voice == ESPEAK_VOICE_EN_TENGWAR || espeak_voice == ESPEAK_VOICE_EN_TENGWAR_US"
121
+ \end
122
+
123
+ \** 'strut' vowel special case **\
124
+ \beg option open_mid_back_unrounded OMBU_USE_LEFT_ORIENTED_CIRTH
125
+ \radio
126
+ \value OMBU_USE_LEFT_ORIENTED_CIRTH 0
127
+ \value OMBU_LIKE_SCHWA 1
128
+ \end
129
+
130
+ \** ---------- Diphthong options ---------- **\
131
+
132
+ \** 'cure', 'cute' diphthong **\
133
+ \beg option ju_diphthong JU_DIPHTHONG_SEPARATE
134
+ \radio
135
+ \value JU_DIPHTHONG_SEPARATE 0
136
+ \value JU_DIPHTHONG_LIKE_IW 1
137
+ \end
138
+
139
+ \** Horse / Hoarse vowel distinction (only JRRT/US accents) **\
140
+ \beg option horse_hoarse_merger HORSE_HOARSE_SEPARATE
141
+ \visible_when "espeak_voice == ESPEAK_VOICE_EN_TENGWAR || espeak_voice == ESPEAK_VOICE_EN_TENGWAR_US"
142
+ \value HORSE_HOARSE_MERGE 0
143
+ \value HORSE_HOARSE_SEPARATE 1
144
+ \end
145
+
146
+ \** ---------- Consonant options ---------- **\
147
+
148
+ \** 'wh' in old accents/US. Sometimes called 'wine/whine' merger. **\
149
+ \beg option ancient_voiceless_labiovelar_fricative_wh WH_VLVF_AS_IN_SINDARIN
150
+ \value WH_VLVF_AS_IN_SINDARIN 0
151
+ \value WH_VLVF_WHINE_MERGER 1
152
+ \end
153
+
154
+ \** Re-establishment of linking r in non-rhotic accent, ex : 'better life' vs 'betteR answer' **\
155
+ \beg option linking_r true
156
+ \visible_when "espeak_voice == ESPEAK_VOICE_EN_TENGWAR_RP || espeak_voice == ESPEAK_VOICE_EN_TENGWAR_GB"
157
+ \end
158
+
159
+ \** Intrusive r, like in vanillaR ice **\
160
+ \beg option intrusive_r true
161
+ \visible_when "espeak_voice != ESPEAK_VOICE_EN_TENGWAR_US"
162
+ \end
163
+
164
+ \beg option certh_for_y USE_CERTH_40
165
+ \value USE_CERTH_39 0
166
+ \value USE_CERTH_40 1
167
+ \radio
168
+ \end
169
+
170
+ \beg option certh_for_s USE_CERTH_34
171
+ \value USE_CERTH_34 0
172
+ \value USE_CERTH_35 1
173
+ \radio
174
+ \end
175
+
176
+ \beg option pre_consonant_n_with_same_articulation_point PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK
177
+ \value PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_SEPARATE 0
178
+ \value PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK 1
179
+ \end
180
+
181
+ \beg option pre_velar_n PRE_VELAR_N_ASSIMILABLE
182
+ \value PRE_VELAR_N_NON_ASSIMILABLE 0
183
+ \value PRE_VELAR_N_ASSIMILABLE 1
184
+ \end
185
+
186
+
187
+ \beg option numeral_system PENTADIC_SYSTEM
188
+ \value QUINARY_SYSTEM 0
189
+ \value PENTADIC_SYSTEM 1
190
+ \end
191
+
192
+ \** ---------- Styling options ---------- **\
193
+
194
+ \beg option space_character USE_NON_BREAKING_SPACE_SMALL
195
+ \value USE_NORMAL_SPACE 0
196
+ \value USE_NON_BREAKING_SPACE_SMALL 1
197
+ \value USE_NON_BREAKING_SPACE_BIG 2
198
+ \value USE_MIDDLE_DOT 3
199
+ \end
200
+
201
+ \option auto_spacing true
202
+
203
+ \end
204
+
205
+ \beg preprocessor
206
+ \downcase
207
+
208
+ \** Remove phonetics accentuation marks **\
209
+ \rxsubstitute "[ˈˌ]" ""
210
+
211
+ \** foreign words nasal a, split to "an" (ex: croissant) **\
212
+ \rxsubstitute "ɑ̃" "ɑn"
213
+
214
+ \** Non rhotic schwa simplification **\
215
+ \rxsubstitute "ɐ" "ə"
216
+
217
+ \if linking_r
218
+ \rxsubstitute "ɹ‿" "ɹ"
219
+ \else
220
+ \rxsubstitute "ɹ‿" ""
221
+ \endif
222
+
223
+ \if intrusive_r
224
+ \rxsubstitute "ɹ̩‿" "ɹ"
225
+ \else
226
+ \rxsubstitute "ɹ̩‿" ""
227
+ \endif
228
+
229
+ \if "schwa_of_to == SCHWA_OF_TO_U"
230
+ \substitute "ʊ̟" "ʊ"
231
+ \else
232
+ \substitute "ʊ̟" "ə"
233
+ \endif
234
+
235
+ \if "pre_velar_n == PRE_VELAR_N_ASSIMILABLE"
236
+ \rxsubstitute "n‿" "ŋ"
237
+ \else
238
+ \rxsubstitute "n‿" "n"
239
+ \endif
240
+
241
+ \** IMPORTANT NOTE : in all following regexps **\
242
+ \** since we cannot rely on \b because using IPA **\
243
+ \** [a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃] standa for "any possible letter" **\
244
+ \** (^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃]) ... stands for 'word beginning' **\
245
+ \** ... ($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃]) stands for 'word ending' **\
246
+
247
+ \** 'the' variations **\
248
+ \** that the **\
249
+ \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])(ð[aæ]t)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
250
+ \** of the **\
251
+ \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])([ɒʌ]v)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
252
+ \** for the **\
253
+ \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])(f[ɚə])(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
254
+ \** with the **\
255
+ \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])(wɪð)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
256
+ \** in the **\
257
+ \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])(ɪn)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
258
+ \** on the **\
259
+ \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])([ɒɔ]n)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
260
+ \** from the **\
261
+ \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])(fɹʌm)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
262
+ \** was the **\
263
+ \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])(wʌz)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
264
+
265
+ \if "horse_hoarse_merger == HORSE_HOARSE_SEPARATE"
266
+ \** Re-establish former diphtong **\
267
+ \substitute "oːɹ" "oʊɹ"
268
+ \endif
269
+
270
+ \** If treated as diphthong, change long i to i + schwi **\
271
+ \if "long_i == LONG_I_AS_DIPHTONG"
272
+ \substitute "iː" "iɪ"
273
+ \endif
274
+
275
+ \** Experimental, don't affect ju: at beginning of words/after consonnant **\
276
+ \if "ju_diphthong == JU_DIPHTHONG_LIKE_IW"
277
+ \rxsubstitute "(juː|jʊ)" "iw"
278
+ \endif
279
+
280
+ \if "long_u == LONG_U_AS_DIPHTONG"
281
+ \substitute "uː" "uʊ"
282
+ \endif
283
+
284
+ \** ! Beware of the order of the following rules **\
285
+ \** ! Rhotic schwa : remove 1 level of length when superfluous and always add explicit mark **\
286
+ \rxsubstitute "[ɜɚ]ː?" "ɜɹ"
287
+
288
+ \** ! Potentially remove superfluous added rhotic marks **\
289
+ \rxsubstitute "ɹ+" "ɹ"
290
+ \** ! Disambiguate ɹ + vowel : ORE/ROMEN **\
291
+ \rxsubstitute "ɹ([ɑæaeɛʌɐəɜɚiɪᵻoɒɔuʊʘ])" "r\\1"
292
+
293
+ \** Convention : for non reducing schwas we will use ɤ̞ for ʌ and ʘ for all other cases **\
294
+
295
+ \** All schwas at beginning or end of words cannot reduce **\
296
+ \** or after vowels (== not consonant) **\
297
+ \** beware of ɪ as it can appear as consonant (lawyer) **\
298
+ \** same for ʊ for sour **\
299
+ \** Handling is not exactly the same as in the tengwar mode **\
300
+ \** Because letters are treated independently, not with a VC pattern **\
301
+ \** We thus need to add ending of words, that would be handled by telco otherwise **\
302
+
303
+ \** Mark non reducing schwa as ʘ **\
304
+ \rxsubstitute "(^|[^bcdfghjklmnpqrstvwxyzðŋɡɣɹɾʃʍʒʔθɪʊ])([əɐɜɚ])" "\\1ʘ" \** beginning of words **\
305
+ \rxsubstitute "([əɐɜɚ])([^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃]|r|$)" "ʘ\\2" \** ending of words **\
306
+
307
+ \if "schwi == SCHWI_LIKE_SCHWA"
308
+ \** Don't forget to mark schwis too **\
309
+ \rxsubstitute "(^|[^bcdfghjklmnpqrstvwxyzðŋɡɣɹɾʃʍʒʔθɪʊ])([ᵻ])" "\\1ʘ" \** beginning of words **\
310
+ \rxsubstitute "ᵻ([^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃]|r|$)" "ʘ\\2" \** ending of words **\
311
+ \endif
312
+
313
+ \** Don't forget to mark the ombus too **\
314
+ \if "open_mid_back_unrounded == OMBU_LIKE_SCHWA"
315
+ \rxsubstitute "(^|[^bcdfghjklmnpqrstvwxyzðŋɡɣɹɾʃʍʒʔθɪʊ])([ʌ])" "\\1ɤ̞" \** beginning of words **\
316
+ \rxsubstitute "ʌ([^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃]|r|$)" "ɤ̞\\2" \** ending of words **\
317
+ \else
318
+ \rxsubstitute "(^|[^bcdfghjklmnpqrstvwxyzðŋɡɣɹɾʃʍʒʔθɪʊ])([ʌ])" "\\1ʘ" \** beginning of words **\
319
+ \rxsubstitute "ʌ([^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃]|r|$)" "ʘ\\2" \** ending of words **\
320
+ \endif
321
+
322
+ \if "auto_spacing == true"
323
+ \rxsubstitute "([^\\s])([.,;:!?])" "\\1 \\2"
324
+ \rxsubstitute "([.,;:!?])([^\\s])" "\\1 \\2"
325
+ \endif
326
+
327
+ \if "numeral_system == QUINARY_SYSTEM"
328
+ \elvish_numbers 5 false
329
+ \endif
330
+ \end
331
+
332
+ \beg processor
333
+ \beg rules litteral
334
+
335
+ {SCHWA_NON_REDUCIBLE} === ʘ \** NON REDUCIBLE **\
336
+ {SCHWU_NON_REDUCIBLE} === ɤ̞ \** NON REDUCIBLE **\
337
+
338
+ \** Very long logic here for schwa/schwu reducible/non reducible **\
339
+ \** Could be shorter ? **\
340
+ \if implicit_schwa
341
+ {_REDUCIBLE_SCHWA_} === {NULL}
342
+ {_REDUCIBLE_SCHWU_} === {NULL}
343
+
344
+ \if "non_reducible_schwa_remaining == NON_REDUCIBLE_SCHWA_REMAINING_AS_VERTICAL_BARS"
345
+ {_NON_REDUCIBLE_SCHWA_} === CIRTH_55
346
+ {_NON_REDUCIBLE_SCHWU_} === {_NON_REDUCIBLE_SCHWA_}
347
+
348
+ \if "open_mid_back_unrounded != OMBU_LIKE_SCHWA"
349
+ {_NON_REDUCIBLE_SCHWU_} === CIRTH_56
350
+ \endif
351
+ \else
352
+ {_NON_REDUCIBLE_SCHWA_} === CIRTH_55_ALT
353
+ {_NON_REDUCIBLE_SCHWU_} === {_NON_REDUCIBLE_SCHWA_}
354
+
355
+ \if "open_mid_back_unrounded != OMBU_LIKE_SCHWA"
356
+ {_NON_REDUCIBLE_SCHWU_} === CIRTH_56_ALT
357
+ \endif
358
+ \endif
359
+ \else
360
+ \if "non_implicit_schwa_method == NON_IMPLICIT_SCHWA_DIFFERENCIATE_REDUCIBLE"
361
+ {_NON_REDUCIBLE_SCHWA_} === CIRTH_55
362
+ {_REDUCIBLE_SCHWA_} === CIRTH_55_ALT
363
+ {_NON_REDUCIBLE_SCHWU_} === {_NON_REDUCIBLE_SCHWA_}
364
+ {_REDUCIBLE_SCHWU_} === {_REDUCIBLE_SCHWA_}
365
+ \if "open_mid_back_unrounded != OMBU_LIKE_SCHWA"
366
+ {_NON_REDUCIBLE_SCHWU_} === CIRTH_56
367
+ {_REDUCIBLE_SCHWU_} === CIRTH_56_ALT
368
+ \endif
369
+ \elsif "non_implicit_schwa_method == NON_IMPLICIT_SCHWA_ALL_WITH_VERTICAL_BAR"
370
+ {_NON_REDUCIBLE_SCHWA_} === CIRTH_55
371
+ {_REDUCIBLE_SCHWA_} === CIRTH_55
372
+ {_NON_REDUCIBLE_SCHWU_} === {_NON_REDUCIBLE_SCHWA_}
373
+ {_REDUCIBLE_SCHWU_} === {_REDUCIBLE_SCHWA_}
374
+ \if "open_mid_back_unrounded != OMBU_LIKE_SCHWA"
375
+ {_NON_REDUCIBLE_SCHWU_} === CIRTH_56
376
+ {_REDUCIBLE_SCHWU_} === CIRTH_56
377
+ \endif
378
+ \else
379
+ {_NON_REDUCIBLE_SCHWA_} === CIRTH_55_ALT
380
+ {_REDUCIBLE_SCHWA_} === CIRTH_55_ALT
381
+ {_NON_REDUCIBLE_SCHWU_} === {_NON_REDUCIBLE_SCHWA_}
382
+ {_REDUCIBLE_SCHWU_} === {_REDUCIBLE_SCHWA_}
383
+ \if "open_mid_back_unrounded != OMBU_LIKE_SCHWA"
384
+ {_NON_REDUCIBLE_SCHWU_} === CIRTH_56_ALT
385
+ {_REDUCIBLE_SCHWU_} === CIRTH_56_ALT
386
+ \endif
387
+ \endif
388
+ \endif
389
+
390
+
391
+ {IGROUP} === i,ɪ
392
+ {UGROUP} === u,ʊ
393
+ {EBGROUP} === ə,ɐ \** REDUCIBLE **\
394
+ {ESCHWA} === ə \** REDUCIBLE E SCHWA **\
395
+ {OMBU} === ʌ
396
+
397
+ {SCHWA_NON_REDUCIBLE} === ʘ
398
+ {SCHWU_NON_REDUCIBLE} === ɤ̞
399
+ {ALL_ESCHWA} === (ə,ʘ)
400
+
401
+ {W_INDEPENDENT_SCHWI} === {NULL}
402
+ {_W_INDEPENDENT_SCHWI_} === {NULL}
403
+
404
+
405
+ \if "schwi == SCHWI_LIKE_I"
406
+ {IGROUP} === {IGROUP},ᵻ
407
+ \elsif "schwi == SCHWI_LIKE_SCHWA"
408
+ {EBGROUP} === {EBGROUP},ᵻ
409
+ \else
410
+ {W_INDEPENDENT_SCHWI} === * ᵻ
411
+ {_W_INDEPENDENT_SCHWI_} === * CIRTH_59
412
+ \endif
413
+
414
+ {A_FRONT} === (æ,a) \** Always short **\
415
+ {A_BACK} === (ɑ) \** Always long **\
416
+ {E_FRONT} === (e,ɛ)
417
+ {E_BACK} === ({EBGROUP})
418
+ {E_BACK_RHOTIC} === (ɚ,ɜ) \** Rhotic schwas are treated independently **\
419
+ {I} === ({IGROUP})
420
+ {O} === (o,ɒ,ɔ) \** force, mock, lord **\
421
+ {U} === ({UGROUP})
422
+
423
+ {AA_FRONT} === {A_FRONT}ː \** long front a probably does not exist **\
424
+ {AA_BACK} === {A_BACK}ː
425
+ {EE_FRONT} === {E_FRONT}ː \** long front e probably does not exist **\
426
+ {EE_BACK} === {E_BACK}ː \** long back e probably does not exist when not rhotic **\
427
+ {EE_BACK_RHOTIC} === {E_BACK_RHOTIC}ː
428
+ {II} === {I}ː
429
+ {OO} === {O}ː
430
+ {UU} === {U}ː
431
+
432
+ \** GB DIPHTONGS **\
433
+ \** +dˈeɪ +skˈaɪ +bˈɔɪ +bˈiə +bˈeə +tˈʊə +ɡˌəʊ +kˈaʊ **\
434
+ \** US DIPHTONGS **\
435
+ \** =dˈeɪ =skˈaɪ =bˈɔɪ -bˈɪɹ -bˈɛɹ -tˈʊɹ +ɡˌoʊ =kˈaʊ **\
436
+
437
+ \** U Diphthongs **\
438
+ {AW} === aʊ \** cow **\
439
+ {OW} === oʊ \** US most / mˈoʊst **\
440
+ {EW} === {ALL_ESCHWA}ʊ \** GB go **\
441
+ {UW} === uʊ \** goose if pronconced with labializing accent ... we do not have this in our pronunciations **\
442
+
443
+ \** I Diphtongues : eɪ (day) / aɪ (sky) / ɔɪ (boy) **\
444
+ {AJ} === aɪ \** nine / nˈaɪn **\
445
+ {EJ} === eɪ \** game / ɡˈeɪm **\
446
+ {OJ} === ɔɪ \** boy **\
447
+ {IJ} === iɪ \** fleece if prononced with palatalising accent **\
448
+
449
+ \** ə diphthongs : iə (GB : beer) / eə (GB: bear) / ʊə (US: tour) **\
450
+ {IER} === i{ALL_ESCHWA} \** GB Beer **\
451
+ {EAR} === e{ALL_ESCHWA} \** GB Bear **\
452
+ {UER} === ʊ{ALL_ESCHWA} \** GB Tour **\
453
+
454
+ {VOWELS} === {A_BACK} * {A_FRONT} * {E_FRONT} * {E_BACK} * {E_BACK_RHOTIC} * {IER} * {EAR} * {UER} * {I} * {O} * {U} * {OMBU} * {ESCHWA} * {W_INDEPENDENT_SCHWI}
455
+ {_VOWELS_} === CIRTH_49 * CIRTH_48 * CIRTH_46 * {_REDUCIBLE_SCHWA_} * {_REDUCIBLE_SCHWA_} * CIRTH_39 {_NON_REDUCIBLE_SCHWA_} * CIRTH_46 {_NON_REDUCIBLE_SCHWA_} * CIRTH_42 {_NON_REDUCIBLE_SCHWA_} * CIRTH_39 * CIRTH_50 * CIRTH_42 * {_REDUCIBLE_SCHWU_} * {_REDUCIBLE_SCHWA_} * {_W_INDEPENDENT_SCHWI_}
456
+
457
+ \** Since in english back /a/ (trap) is always short and front /a/ (calm) is always long **\
458
+ \** we reuse the same cirth without risking a clash. **\
459
+ \** Long back e is probably not possible **\
460
+
461
+ \** Cirth 47 is used twice, but long front E is not present in english **\
462
+
463
+ {LVOWELS} === {AA_BACK} * {AA_FRONT} * {EE_FRONT} * {EE_BACK} * {EE_BACK_RHOTIC} * {II} * {OO} * {UU}
464
+ {_LVOWELS_} === CIRTH_49 * CIRTH_48 * CIRTH_47 * {_REDUCIBLE_SCHWA_} * {_REDUCIBLE_SCHWA_} * CIRTH_39 CIRTH_39 * CIRTH_51 * CIRTH_43
465
+
466
+ {DIPHTHONGS_R} === {AW} * {OW} * {EW} * {UW} * {AJ} * {EJ} * {OJ} * {IJ}
467
+ {_DIPHTHONGS_R_} === CIRTH_EREB_5 * CIRTH_38 * CIRTH_EREB_1 * CIRTH_45_ALT * CIRTH_EREB_4 * CIRTH_47 * CIRTH_52_ALT * CIRTH_39 CIRTH_59
468
+
469
+ {DIPHTHONGS} === {DIPHTHONGS_R}
470
+ {_DIPHTHONGS_} === {_DIPHTHONGS_R_}
471
+
472
+ {SCHWA_NON_REDUCIBLE} --> {_NON_REDUCIBLE_SCHWA_}
473
+ {SCHWU_NON_REDUCIBLE} --> {_NON_REDUCIBLE_SCHWU_}
474
+
475
+ {VOWELS} --> {_VOWELS_}
476
+ {LVOWELS} --> {_LVOWELS_}
477
+ {DIPHTHONGS} --> {_DIPHTHONGS_}
478
+
479
+ {L1} === (t,ɾ,ʔ) * p * tʃ * k
480
+ {_L1_} === CIRTH_8 * CIRTH_1 * CIRTH_13 * CIRTH_18
481
+ {L1} --> {_L1_}
482
+
483
+ \if "pre_consonant_n_with_same_articulation_point == PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK"
484
+ {L1_NASAL} === n(t,ɾ,ʔ) * mp * ntʃ * ŋk
485
+ [{L1_NASAL}] --> [{_L1_}] TEHTA_CIRCUM
486
+ \endif
487
+
488
+ \** ------------- **\
489
+
490
+ {L2} === d * b * dʒ * ɡ
491
+ {_L2_} === CIRTH_9 * CIRTH_2 * CIRTH_14 * CIRTH_19
492
+ {L2} --> {_L2_}
493
+ \if "pre_consonant_n_with_same_articulation_point == PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK"
494
+ {L2_NASAL} === nd * mb * ndʒ * ŋɡ
495
+ [{L2_NASAL}] --> [{_L2_}] TEHTA_CIRCUM
496
+ \endif
497
+
498
+ \** ------------- **\
499
+
500
+ {L3} === θ * f * ʃ * x
501
+ {_L3_} === CIRTH_10 * CIRTH_3 * CIRTH_15 * CIRTH_20
502
+ {L3} --> {_L3_}
503
+ \if "pre_consonant_n_with_same_articulation_point == PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK"
504
+ {L3_NASAL} === nθ * mf * nʃ * ŋx
505
+ [{L3_NASAL}] --> [{_L3_}] TEHTA_CIRCUM
506
+ \endif
507
+
508
+ \** ------------- **\
509
+
510
+ {L4} === ð * v * ʒ * ɣ
511
+ {_L4_} === CIRTH_11 * CIRTH_4 * CIRTH_16 * CIRTH_21
512
+ {L4} --> {_L4_}
513
+ \if "pre_consonant_n_with_same_articulation_point == PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK"
514
+ {L4_NASAL} === nð * mv * nʒ * ŋɣ
515
+ [{L4_NASAL}] --> [{_L4_}] TEHTA_CIRCUM
516
+ \endif
517
+
518
+ \** ------------- **\
519
+
520
+ {L5} === (n,n̩) * m * n(j,J) * ŋ
521
+ {_L5_} === CIRTH_12 * CIRTH_6 * CIRTH_17 * CIRTH_22
522
+ {L5} --> {_L5_}
523
+
524
+ \** ------------- **\
525
+
526
+ {L6} === w
527
+ {_L6_} === CIRTH_44
528
+ {L6} --> {_L6_}
529
+
530
+ {L6_NASAL} === nw
531
+ \if "pre_consonant_n_with_same_articulation_point == PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK"
532
+ [{L6_NASAL}] --> [{_L6_}] TEHTA_CIRCUM
533
+ \endif
534
+
535
+ {L6_NN} === (j,J)
536
+ \if "certh_for_y == USE_CERTH_39"
537
+ {_L6_NN_} === CIRTH_39
538
+ \else
539
+ {_L6_NN_} === CIRTH_40
540
+ \endif
541
+ {L6_NN} --> {_L6_NN_}
542
+
543
+ \** ------------- **\
544
+
545
+ \** CIRTH_30 (rh in angerthas daeron) is not used and is a good choice for ɹ **\
546
+ \** since it is an alternate r and graphically reversed **\
547
+ {L7} === r * ɹ * l
548
+ {_L7_} === CIRTH_29 * CIRTH_30 * CIRTH_31
549
+ {L7} --> {_L7_}
550
+
551
+ \** ------------- **\
552
+
553
+ \if "certh_for_s == USE_CERTH_34"
554
+ {_S_} === CIRTH_34
555
+ \else
556
+ {_S_} === CIRTH_35
557
+ \endif
558
+
559
+ {L8} === s * z
560
+ {_L8_} === {_S_} * CIRTH_36
561
+ {L8} --> {_L8_}
562
+ \if "pre_consonant_n_with_same_articulation_point == PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK"
563
+ {L8_NASAL} === ns * nz
564
+ [{L8_NASAL}] --> [{_L8_}] TEHTA_CIRCUM
565
+ \endif
566
+
567
+ \** ------------- **\
568
+
569
+ \** Use same character as in sindarin **\
570
+ {_WH_} === CIRTH_5
571
+ \if "ancient_voiceless_labiovelar_fricative_wh == WH_VLVF_WHINE_MERGER"
572
+ {_WH_} === CIRTH_44
573
+ \endif
574
+
575
+ {L9} === h * ʍ
576
+ {_L9_} === CIRTH_54 * {_WH_}
577
+ {L9} --> {_L9_}
578
+
579
+ \** ------------- **\
580
+ \** -- SPECIAL TOKENS **\
581
+
582
+ \if "english_the == ENGLISH_THE_EXTENDED_CIRTH"
583
+ _ð{ALL_ESCHWA}_ --> CIRTH_EREB_3
584
+ _ðɪ_ --> CIRTH_EREB_3 CIRTH_59 \** or CIRTH_39 (long vcrtical bar) **\
585
+ \endif
586
+
587
+ \if "english_and == ENGLISH_AND_EXTENDED_CIRTH"
588
+ _{A_FRONT}nd_ --> CIRTH_60
589
+ \endif
590
+
591
+ \end
592
+
593
+ \beg rules punctuation
594
+ . --> CIRTH_PUNCT_THREE_DOTS
595
+ .. --> CIRTH_PUNCT_THREE_DOTS
596
+ ... --> CIRTH_PUNCT_THREE_DOTS
597
+ … --> CIRTH_PUNCT_THREE_DOTS
598
+ .... --> CIRTH_PUNCT_FOUR_DOTS
599
+ ..... --> CIRTH_PUNCT_FOUR_DOTS
600
+ ...... --> CIRTH_PUNCT_FOUR_DOTS
601
+ ....... --> CIRTH_PUNCT_FOUR_DOTS
602
+
603
+ , --> CIRTH_PUNCT_MID_DOT
604
+ : --> CIRTH_PUNCT_TWO_DOTS
605
+ ; --> CIRTH_PUNCT_TWO_DOTS
606
+ ! --> CIRTH_PUNCT_THREE_DOTS
607
+ ? --> CIRTH_PUNCT_THREE_DOTS
608
+ · --> {NULL}
609
+
610
+ - --> {NULL}
611
+ – --> CIRTH_PUNCT_TWO_DOTS
612
+ — --> CIRTH_PUNCT_TWO_DOTS
613
+
614
+ \** Apostrophe **\
615
+
616
+ ' --> {NULL}
617
+ ’ --> {NULL}
618
+
619
+ \** NBSP **\
620
+ {NBSP} --> NBSP
621
+
622
+ \** Quotes **\
623
+
624
+ “ --> CIRTH_PUNCT_DOUBLE_VBAR
625
+ ” --> CIRTH_PUNCT_DOUBLE_VBAR
626
+ « --> CIRTH_PUNCT_DOUBLE_VBAR
627
+ » --> CIRTH_PUNCT_DOUBLE_VBAR
628
+
629
+ [ --> CIRTH_PUNCT_DOUBLE_VBAR
630
+ ] --> CIRTH_PUNCT_DOUBLE_VBAR
631
+ ( --> CIRTH_PUNCT_DOUBLE_VBAR
632
+ ) --> CIRTH_PUNCT_DOUBLE_VBAR
633
+ { --> CIRTH_PUNCT_DOUBLE_VBAR
634
+ } --> CIRTH_PUNCT_DOUBLE_VBAR
635
+ ⟨ --> CIRTH_PUNCT_DOUBLE_VBAR
636
+ ⟩ --> CIRTH_PUNCT_DOUBLE_VBAR
637
+ < --> CIRTH_PUNCT_DOUBLE_VBAR
638
+ > --> CIRTH_PUNCT_DOUBLE_VBAR
639
+
640
+ \** Not universal between fonts ... **\
641
+ $ --> CIRTH_PUNCT_STAR
642
+
643
+ \end
644
+
645
+ \beg rules numbers
646
+ \** Completely invented pentimal system based on the number of strokes **\
647
+
648
+ \if "numeral_system == QUINARY_SYSTEM"
649
+ 0 --> CIRTH_37 TEHTA_SUB_DOT
650
+
651
+ 1 --> CIRTH_NUMERAL_1 TEHTA_SUB_DOT
652
+ 2 --> CIRTH_NUMERAL_2 TEHTA_SUB_DOT
653
+ 3 --> CIRTH_NUMERAL_3 TEHTA_SUB_DOT
654
+ 4 --> CIRTH_NUMERAL_4 TEHTA_SUB_DOT
655
+ 5 --> CIRTH_NUMERAL_5 TEHTA_SUB_DOT
656
+ \else
657
+ 0 --> CIRTH_31 TEHTA_SUB_DOT
658
+
659
+ 1 --> CIRTH_10 TEHTA_SUB_DOT
660
+ 2 --> CIRTH_3 TEHTA_SUB_DOT
661
+ 3 --> CIRTH_4 TEHTA_SUB_DOT
662
+ 4 --> CIRTH_7 TEHTA_SUB_DOT
663
+
664
+ 5 --> CIRTH_39 TEHTA_SUB_DOT
665
+
666
+ 6 --> CIRTH_8 TEHTA_SUB_DOT
667
+ 7 --> CIRTH_1 TEHTA_SUB_DOT
668
+ 8 --> CIRTH_2 TEHTA_SUB_DOT
669
+ 9 --> CIRTH_6 TEHTA_SUB_DOT
670
+ \endif
671
+ \end
672
+
673
+ \end
674
+
675
+ \beg postprocessor
676
+ \if "space_character == USE_NON_BREAKING_SPACE_SMALL"
677
+ \outspace CIRTH_SPACE
678
+ \elsif "space_character == USE_NON_BREAKING_SPACE_BIG"
679
+ \outspace CIRTH_SPACE_BIG
680
+ \elsif "space_character == USE_MIDDLE_DOT"
681
+ \outspace "CIRTH_SPACE CIRTH_PUNCT_MID_DOT CIRTH_SPACE"
682
+ \else
683
+ \outspace SPACE
684
+ \endif
685
+
686
+ \resolve_virtuals
687
+ \end