glaemscribe 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. checksums.yaml +4 -4
  2. data/bin/glaemscribe +2 -2
  3. data/glaemresources/charsets/cirth_ds.cst +514 -179
  4. data/glaemresources/charsets/eldamar.cst +210 -0
  5. data/glaemresources/charsets/tengwar_ds_annatar.cst +2452 -130
  6. data/glaemresources/charsets/tengwar_ds_eldamar.cst +2319 -125
  7. data/glaemresources/charsets/tengwar_ds_elfica.cst +2317 -126
  8. data/glaemresources/charsets/tengwar_ds_parmaite.cst +2319 -127
  9. data/glaemresources/charsets/tengwar_ds_sindarin.cst +2318 -127
  10. data/glaemresources/charsets/tengwar_freemono.cst +1 -1
  11. data/glaemresources/charsets/tengwar_guni_annatar.cst +2451 -131
  12. data/glaemresources/charsets/tengwar_guni_eldamar.cst +2317 -126
  13. data/glaemresources/charsets/tengwar_guni_elfica.cst +2316 -127
  14. data/glaemresources/charsets/tengwar_guni_parmaite.cst +2319 -127
  15. data/glaemresources/charsets/tengwar_guni_sindarin.cst +2317 -126
  16. data/glaemresources/charsets/tengwar_telcontar.cst +7 -0
  17. data/glaemresources/modes/blackspeech-tengwar-general_use.glaem +1 -1
  18. data/glaemresources/modes/english-cirth-espeak.glaem +687 -0
  19. data/glaemresources/modes/english-tengwar-espeak.glaem +814 -0
  20. data/glaemresources/modes/japanese-tengwar.glaem +9 -4
  21. data/glaemresources/modes/lang_belta-tengwar-dadef.glaem +248 -0
  22. data/glaemresources/modes/raw-cirth.glaem +154 -0
  23. data/lib/api/charset_parser.rb +7 -1
  24. data/lib/api/mode.rb +35 -10
  25. data/lib/api/mode_parser.rb +21 -12
  26. data/lib/api/post_processor/outspace.rb +44 -0
  27. data/lib/api/rule_group.rb +1 -1
  28. data/lib/api/transcription_pre_post_processor.rb +8 -5
  29. data/lib/api/transcription_processor.rb +12 -9
  30. data/lib/glaemscribe.rb +2 -0
  31. data/lib_espeak/espeakng.for.glaemscribe.nowasm.sync.js +25 -11
  32. data/lib_espeak/glaemscribe_tts.js +363 -223
  33. metadata +12 -6
@@ -0,0 +1,687 @@
1
+ \**
2
+
3
+ Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ the transcription of texts between writing systems, and more
5
+ specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ invented languages to some of his devised writing systems.
7
+
8
+ Copyright (C) 2015 Benjamin Babut (Talagan).
9
+
10
+ This program is free software: you can redistribute it and/or modify
11
+ it under the terms of the GNU Affero General Public License as published by
12
+ the Free Software Foundation, either version 3 of the License, or
13
+ any later version.
14
+
15
+ This program is distributed in the hope that it will be useful,
16
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ GNU Affero General Public License for more details.
19
+
20
+ You should have received a copy of the GNU Affero General Public License
21
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ **\
24
+
25
+ \beg changelog
26
+ \entry "0.0.1" "First version."
27
+ \end
28
+
29
+ \language "English"
30
+ \writing "Cirth"
31
+ \mode "English Angerthas based on Angerthas Daeron"
32
+ \version "0.0.1"
33
+ \authors "J.R.R. Tolkien, impl. Talagan (Benjamin Babut) with extrapolations"
34
+
35
+ \world primary_related_to_arda
36
+ \invention jrrt
37
+
38
+ \charset cirth_ds true
39
+
40
+ \raw_mode "raw-cirth"
41
+
42
+ \outspace CIRTH_SPACE
43
+
44
+ \beg options
45
+
46
+ \** ENGLISH accent/dialect/variant. It also controls espeak behaviour. **\
47
+ \beg option espeak_voice ESPEAK_VOICE_EN_TENGWAR
48
+ \value ESPEAK_VOICE_EN_TENGWAR 0
49
+ \value ESPEAK_VOICE_EN_TENGWAR_GB 1
50
+ \value ESPEAK_VOICE_EN_TENGWAR_RP 2
51
+ \value ESPEAK_VOICE_EN_TENGWAR_US 3
52
+ \end
53
+
54
+ \** ----------Special words ---------- **\
55
+
56
+ \** 'the' word **\
57
+ \beg option english_the ENGLISH_THE_EXTENDED_CIRTH
58
+ \value ENGLISH_THE_EXTENDED_CIRTH 0
59
+ \value ENGLISH_THE_FULL_WRITING 1
60
+ \radio
61
+ \end
62
+
63
+ \** 'and' word may be represented by a special cirth **\
64
+ \beg option english_and ENGLISH_AND_EXTENDED_CIRTH
65
+ \value ENGLISH_AND_EXTENDED_CIRTH 0
66
+ \value ENGLISH_AND_FULL_WRITING 1
67
+ \radio
68
+ \end
69
+
70
+ \** 'to' word (the word 'to' may have its vowel reduced to a schwa) **\
71
+ \beg option schwa_of_to SCHWA_OF_TO_U
72
+ \value SCHWA_OF_TO_U 0
73
+ \value SCHWA_OF_TO_SCHWA 1
74
+ \end
75
+
76
+
77
+ \** ---------- Vowel options ---------- **\
78
+
79
+ \** Long i like in 'fleece' **\
80
+ \beg option long_i LONG_I_AS_DIPHTONG
81
+ \radio
82
+ \value LONG_I_DOUBLE_CIRTH 0
83
+ \value LONG_I_AS_DIPHTONG 1
84
+ \end
85
+
86
+ \** long u like in 'goose' **\
87
+ \** We render it either as the long vowel given in the Angerthas table **\
88
+ \** Or (extrapolation) we use an optional Cirth_45_alt that could look like **\
89
+ \** a /u+w/ because if's visually a mix of u and w **\
90
+ \beg option long_u LONG_U_AS_LONG_VOWEL
91
+ \radio
92
+ \value LONG_U_AS_LONG_VOWEL 0
93
+ \value LONG_U_AS_DIPHTONG 1
94
+ \end
95
+
96
+ \** ---------- Schwa options -------------- **\
97
+
98
+ \** Remove unuseful, natural schwa marks **\
99
+ \option implicit_schwa false
100
+
101
+ \beg option non_implicit_schwa_method NON_IMPLICIT_SCHWA_DIFFERENCIATE_REDUCIBLE
102
+ \value NON_IMPLICIT_SCHWA_DIFFERENCIATE_REDUCIBLE 0
103
+ \value NON_IMPLICIT_SCHWA_ALL_WITH_VERTICAL_BAR 1
104
+ \value NON_IMPLICIT_SCHWA_ALL_AS_ACCENTS 2
105
+ \visible_when "implicit_schwa == false"
106
+ \end
107
+
108
+ \beg option non_reducible_schwa_remaining NON_REDUCIBLE_SCHWA_REMAINING_AS_ACCENTS
109
+ \visible_when implicit_schwa
110
+ \value NON_REDUCIBLE_SCHWA_REMAINING_AS_VERTICAL_BARS 0
111
+ \value NON_REDUCIBLE_SCHWA_REMAINING_AS_ACCENTS 1
112
+ \end
113
+
114
+ \** Schwi, in US/JRRT **\
115
+ \beg option schwi SCHWI_SMALL_BAR
116
+ \radio
117
+ \value SCHWI_SMALL_BAR 0
118
+ \value SCHWI_LIKE_I 1
119
+ \value SCHWI_LIKE_SCHWA 2
120
+ \visible_when "espeak_voice == ESPEAK_VOICE_EN_TENGWAR || espeak_voice == ESPEAK_VOICE_EN_TENGWAR_US"
121
+ \end
122
+
123
+ \** 'strut' vowel special case **\
124
+ \beg option open_mid_back_unrounded OMBU_USE_LEFT_ORIENTED_CIRTH
125
+ \radio
126
+ \value OMBU_USE_LEFT_ORIENTED_CIRTH 0
127
+ \value OMBU_LIKE_SCHWA 1
128
+ \end
129
+
130
+ \** ---------- Diphthong options ---------- **\
131
+
132
+ \** 'cure', 'cute' diphthong **\
133
+ \beg option ju_diphthong JU_DIPHTHONG_SEPARATE
134
+ \radio
135
+ \value JU_DIPHTHONG_SEPARATE 0
136
+ \value JU_DIPHTHONG_LIKE_IW 1
137
+ \end
138
+
139
+ \** Horse / Hoarse vowel distinction (only JRRT/US accents) **\
140
+ \beg option horse_hoarse_merger HORSE_HOARSE_SEPARATE
141
+ \visible_when "espeak_voice == ESPEAK_VOICE_EN_TENGWAR || espeak_voice == ESPEAK_VOICE_EN_TENGWAR_US"
142
+ \value HORSE_HOARSE_MERGE 0
143
+ \value HORSE_HOARSE_SEPARATE 1
144
+ \end
145
+
146
+ \** ---------- Consonant options ---------- **\
147
+
148
+ \** 'wh' in old accents/US. Sometimes called 'wine/whine' merger. **\
149
+ \beg option ancient_voiceless_labiovelar_fricative_wh WH_VLVF_AS_IN_SINDARIN
150
+ \value WH_VLVF_AS_IN_SINDARIN 0
151
+ \value WH_VLVF_WHINE_MERGER 1
152
+ \end
153
+
154
+ \** Re-establishment of linking r in non-rhotic accent, ex : 'better life' vs 'betteR answer' **\
155
+ \beg option linking_r true
156
+ \visible_when "espeak_voice == ESPEAK_VOICE_EN_TENGWAR_RP || espeak_voice == ESPEAK_VOICE_EN_TENGWAR_GB"
157
+ \end
158
+
159
+ \** Intrusive r, like in vanillaR ice **\
160
+ \beg option intrusive_r true
161
+ \visible_when "espeak_voice != ESPEAK_VOICE_EN_TENGWAR_US"
162
+ \end
163
+
164
+ \beg option certh_for_y USE_CERTH_40
165
+ \value USE_CERTH_39 0
166
+ \value USE_CERTH_40 1
167
+ \radio
168
+ \end
169
+
170
+ \beg option certh_for_s USE_CERTH_34
171
+ \value USE_CERTH_34 0
172
+ \value USE_CERTH_35 1
173
+ \radio
174
+ \end
175
+
176
+ \beg option pre_consonant_n_with_same_articulation_point PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK
177
+ \value PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_SEPARATE 0
178
+ \value PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK 1
179
+ \end
180
+
181
+ \beg option pre_velar_n PRE_VELAR_N_ASSIMILABLE
182
+ \value PRE_VELAR_N_NON_ASSIMILABLE 0
183
+ \value PRE_VELAR_N_ASSIMILABLE 1
184
+ \end
185
+
186
+
187
+ \beg option numeral_system PENTADIC_SYSTEM
188
+ \value QUINARY_SYSTEM 0
189
+ \value PENTADIC_SYSTEM 1
190
+ \end
191
+
192
+ \** ---------- Styling options ---------- **\
193
+
194
+ \beg option space_character USE_NON_BREAKING_SPACE_SMALL
195
+ \value USE_NORMAL_SPACE 0
196
+ \value USE_NON_BREAKING_SPACE_SMALL 1
197
+ \value USE_NON_BREAKING_SPACE_BIG 2
198
+ \value USE_MIDDLE_DOT 3
199
+ \end
200
+
201
+ \option auto_spacing true
202
+
203
+ \end
204
+
205
+ \beg preprocessor
206
+ \downcase
207
+
208
+ \** Remove phonetics accentuation marks **\
209
+ \rxsubstitute "[ˈˌ]" ""
210
+
211
+ \** foreign words nasal a, split to "an" (ex: croissant) **\
212
+ \rxsubstitute "ɑ̃" "ɑn"
213
+
214
+ \** Non rhotic schwa simplification **\
215
+ \rxsubstitute "ɐ" "ə"
216
+
217
+ \if linking_r
218
+ \rxsubstitute "ɹ‿" "ɹ"
219
+ \else
220
+ \rxsubstitute "ɹ‿" ""
221
+ \endif
222
+
223
+ \if intrusive_r
224
+ \rxsubstitute "ɹ̩‿" "ɹ"
225
+ \else
226
+ \rxsubstitute "ɹ̩‿" ""
227
+ \endif
228
+
229
+ \if "schwa_of_to == SCHWA_OF_TO_U"
230
+ \substitute "ʊ̟" "ʊ"
231
+ \else
232
+ \substitute "ʊ̟" "ə"
233
+ \endif
234
+
235
+ \if "pre_velar_n == PRE_VELAR_N_ASSIMILABLE"
236
+ \rxsubstitute "n‿" "ŋ"
237
+ \else
238
+ \rxsubstitute "n‿" "n"
239
+ \endif
240
+
241
+ \** IMPORTANT NOTE : in all following regexps **\
242
+ \** since we cannot rely on \b because using IPA **\
243
+ \** [a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃] standa for "any possible letter" **\
244
+ \** (^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃]) ... stands for 'word beginning' **\
245
+ \** ... ($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃]) stands for 'word ending' **\
246
+
247
+ \** 'the' variations **\
248
+ \** that the **\
249
+ \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])(ð[aæ]t)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
250
+ \** of the **\
251
+ \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])([ɒʌ]v)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
252
+ \** for the **\
253
+ \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])(f[ɚə])(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
254
+ \** with the **\
255
+ \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])(wɪð)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
256
+ \** in the **\
257
+ \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])(ɪn)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
258
+ \** on the **\
259
+ \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])([ɒɔ]n)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
260
+ \** from the **\
261
+ \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])(fɹʌm)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
262
+ \** was the **\
263
+ \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])(wʌz)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
264
+
265
+ \if "horse_hoarse_merger == HORSE_HOARSE_SEPARATE"
266
+ \** Re-establish former diphtong **\
267
+ \substitute "oːɹ" "oʊɹ"
268
+ \endif
269
+
270
+ \** If treated as diphthong, change long i to i + schwi **\
271
+ \if "long_i == LONG_I_AS_DIPHTONG"
272
+ \substitute "iː" "iɪ"
273
+ \endif
274
+
275
+ \** Experimental, don't affect ju: at beginning of words/after consonnant **\
276
+ \if "ju_diphthong == JU_DIPHTHONG_LIKE_IW"
277
+ \rxsubstitute "(juː|jʊ)" "iw"
278
+ \endif
279
+
280
+ \if "long_u == LONG_U_AS_DIPHTONG"
281
+ \substitute "uː" "uʊ"
282
+ \endif
283
+
284
+ \** ! Beware of the order of the following rules **\
285
+ \** ! Rhotic schwa : remove 1 level of length when superfluous and always add explicit mark **\
286
+ \rxsubstitute "[ɜɚ]ː?" "ɜɹ"
287
+
288
+ \** ! Potentially remove superfluous added rhotic marks **\
289
+ \rxsubstitute "ɹ+" "ɹ"
290
+ \** ! Disambiguate ɹ + vowel : ORE/ROMEN **\
291
+ \rxsubstitute "ɹ([ɑæaeɛʌɐəɜɚiɪᵻoɒɔuʊʘ])" "r\\1"
292
+
293
+ \** Convention : for non reducing schwas we will use ɤ̞ for ʌ and ʘ for all other cases **\
294
+
295
+ \** All schwas at beginning or end of words cannot reduce **\
296
+ \** or after vowels (== not consonant) **\
297
+ \** beware of ɪ as it can appear as consonant (lawyer) **\
298
+ \** same for ʊ for sour **\
299
+ \** Handling is not exactly the same as in the tengwar mode **\
300
+ \** Because letters are treated independently, not with a VC pattern **\
301
+ \** We thus need to add ending of words, that would be handled by telco otherwise **\
302
+
303
+ \** Mark non reducing schwa as ʘ **\
304
+ \rxsubstitute "(^|[^bcdfghjklmnpqrstvwxyzðŋɡɣɹɾʃʍʒʔθɪʊ])([əɐɜɚ])" "\\1ʘ" \** beginning of words **\
305
+ \rxsubstitute "([əɐɜɚ])([^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃]|r|$)" "ʘ\\2" \** ending of words **\
306
+
307
+ \if "schwi == SCHWI_LIKE_SCHWA"
308
+ \** Don't forget to mark schwis too **\
309
+ \rxsubstitute "(^|[^bcdfghjklmnpqrstvwxyzðŋɡɣɹɾʃʍʒʔθɪʊ])([ᵻ])" "\\1ʘ" \** beginning of words **\
310
+ \rxsubstitute "ᵻ([^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃]|r|$)" "ʘ\\2" \** ending of words **\
311
+ \endif
312
+
313
+ \** Don't forget to mark the ombus too **\
314
+ \if "open_mid_back_unrounded == OMBU_LIKE_SCHWA"
315
+ \rxsubstitute "(^|[^bcdfghjklmnpqrstvwxyzðŋɡɣɹɾʃʍʒʔθɪʊ])([ʌ])" "\\1ɤ̞" \** beginning of words **\
316
+ \rxsubstitute "ʌ([^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃]|r|$)" "ɤ̞\\2" \** ending of words **\
317
+ \else
318
+ \rxsubstitute "(^|[^bcdfghjklmnpqrstvwxyzðŋɡɣɹɾʃʍʒʔθɪʊ])([ʌ])" "\\1ʘ" \** beginning of words **\
319
+ \rxsubstitute "ʌ([^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃]|r|$)" "ʘ\\2" \** ending of words **\
320
+ \endif
321
+
322
+ \if "auto_spacing == true"
323
+ \rxsubstitute "([^\\s])([.,;:!?])" "\\1 \\2"
324
+ \rxsubstitute "([.,;:!?])([^\\s])" "\\1 \\2"
325
+ \endif
326
+
327
+ \if "numeral_system == QUINARY_SYSTEM"
328
+ \elvish_numbers 5 false
329
+ \endif
330
+ \end
331
+
332
+ \beg processor
333
+ \beg rules litteral
334
+
335
+ {SCHWA_NON_REDUCIBLE} === ʘ \** NON REDUCIBLE **\
336
+ {SCHWU_NON_REDUCIBLE} === ɤ̞ \** NON REDUCIBLE **\
337
+
338
+ \** Very long logic here for schwa/schwu reducible/non reducible **\
339
+ \** Could be shorter ? **\
340
+ \if implicit_schwa
341
+ {_REDUCIBLE_SCHWA_} === {NULL}
342
+ {_REDUCIBLE_SCHWU_} === {NULL}
343
+
344
+ \if "non_reducible_schwa_remaining == NON_REDUCIBLE_SCHWA_REMAINING_AS_VERTICAL_BARS"
345
+ {_NON_REDUCIBLE_SCHWA_} === CIRTH_55
346
+ {_NON_REDUCIBLE_SCHWU_} === {_NON_REDUCIBLE_SCHWA_}
347
+
348
+ \if "open_mid_back_unrounded != OMBU_LIKE_SCHWA"
349
+ {_NON_REDUCIBLE_SCHWU_} === CIRTH_56
350
+ \endif
351
+ \else
352
+ {_NON_REDUCIBLE_SCHWA_} === CIRTH_55_ALT
353
+ {_NON_REDUCIBLE_SCHWU_} === {_NON_REDUCIBLE_SCHWA_}
354
+
355
+ \if "open_mid_back_unrounded != OMBU_LIKE_SCHWA"
356
+ {_NON_REDUCIBLE_SCHWU_} === CIRTH_56_ALT
357
+ \endif
358
+ \endif
359
+ \else
360
+ \if "non_implicit_schwa_method == NON_IMPLICIT_SCHWA_DIFFERENCIATE_REDUCIBLE"
361
+ {_NON_REDUCIBLE_SCHWA_} === CIRTH_55
362
+ {_REDUCIBLE_SCHWA_} === CIRTH_55_ALT
363
+ {_NON_REDUCIBLE_SCHWU_} === {_NON_REDUCIBLE_SCHWA_}
364
+ {_REDUCIBLE_SCHWU_} === {_REDUCIBLE_SCHWA_}
365
+ \if "open_mid_back_unrounded != OMBU_LIKE_SCHWA"
366
+ {_NON_REDUCIBLE_SCHWU_} === CIRTH_56
367
+ {_REDUCIBLE_SCHWU_} === CIRTH_56_ALT
368
+ \endif
369
+ \elsif "non_implicit_schwa_method == NON_IMPLICIT_SCHWA_ALL_WITH_VERTICAL_BAR"
370
+ {_NON_REDUCIBLE_SCHWA_} === CIRTH_55
371
+ {_REDUCIBLE_SCHWA_} === CIRTH_55
372
+ {_NON_REDUCIBLE_SCHWU_} === {_NON_REDUCIBLE_SCHWA_}
373
+ {_REDUCIBLE_SCHWU_} === {_REDUCIBLE_SCHWA_}
374
+ \if "open_mid_back_unrounded != OMBU_LIKE_SCHWA"
375
+ {_NON_REDUCIBLE_SCHWU_} === CIRTH_56
376
+ {_REDUCIBLE_SCHWU_} === CIRTH_56
377
+ \endif
378
+ \else
379
+ {_NON_REDUCIBLE_SCHWA_} === CIRTH_55_ALT
380
+ {_REDUCIBLE_SCHWA_} === CIRTH_55_ALT
381
+ {_NON_REDUCIBLE_SCHWU_} === {_NON_REDUCIBLE_SCHWA_}
382
+ {_REDUCIBLE_SCHWU_} === {_REDUCIBLE_SCHWA_}
383
+ \if "open_mid_back_unrounded != OMBU_LIKE_SCHWA"
384
+ {_NON_REDUCIBLE_SCHWU_} === CIRTH_56_ALT
385
+ {_REDUCIBLE_SCHWU_} === CIRTH_56_ALT
386
+ \endif
387
+ \endif
388
+ \endif
389
+
390
+
391
+ {IGROUP} === i,ɪ
392
+ {UGROUP} === u,ʊ
393
+ {EBGROUP} === ə,ɐ \** REDUCIBLE **\
394
+ {ESCHWA} === ə \** REDUCIBLE E SCHWA **\
395
+ {OMBU} === ʌ
396
+
397
+ {SCHWA_NON_REDUCIBLE} === ʘ
398
+ {SCHWU_NON_REDUCIBLE} === ɤ̞
399
+ {ALL_ESCHWA} === (ə,ʘ)
400
+
401
+ {W_INDEPENDENT_SCHWI} === {NULL}
402
+ {_W_INDEPENDENT_SCHWI_} === {NULL}
403
+
404
+
405
+ \if "schwi == SCHWI_LIKE_I"
406
+ {IGROUP} === {IGROUP},ᵻ
407
+ \elsif "schwi == SCHWI_LIKE_SCHWA"
408
+ {EBGROUP} === {EBGROUP},ᵻ
409
+ \else
410
+ {W_INDEPENDENT_SCHWI} === * ᵻ
411
+ {_W_INDEPENDENT_SCHWI_} === * CIRTH_59
412
+ \endif
413
+
414
+ {A_FRONT} === (æ,a) \** Always short **\
415
+ {A_BACK} === (ɑ) \** Always long **\
416
+ {E_FRONT} === (e,ɛ)
417
+ {E_BACK} === ({EBGROUP})
418
+ {E_BACK_RHOTIC} === (ɚ,ɜ) \** Rhotic schwas are treated independently **\
419
+ {I} === ({IGROUP})
420
+ {O} === (o,ɒ,ɔ) \** force, mock, lord **\
421
+ {U} === ({UGROUP})
422
+
423
+ {AA_FRONT} === {A_FRONT}ː \** long front a probably does not exist **\
424
+ {AA_BACK} === {A_BACK}ː
425
+ {EE_FRONT} === {E_FRONT}ː \** long front e probably does not exist **\
426
+ {EE_BACK} === {E_BACK}ː \** long back e probably does not exist when not rhotic **\
427
+ {EE_BACK_RHOTIC} === {E_BACK_RHOTIC}ː
428
+ {II} === {I}ː
429
+ {OO} === {O}ː
430
+ {UU} === {U}ː
431
+
432
+ \** GB DIPHTONGS **\
433
+ \** +dˈeɪ +skˈaɪ +bˈɔɪ +bˈiə +bˈeə +tˈʊə +ɡˌəʊ +kˈaʊ **\
434
+ \** US DIPHTONGS **\
435
+ \** =dˈeɪ =skˈaɪ =bˈɔɪ -bˈɪɹ -bˈɛɹ -tˈʊɹ +ɡˌoʊ =kˈaʊ **\
436
+
437
+ \** U Diphthongs **\
438
+ {AW} === aʊ \** cow **\
439
+ {OW} === oʊ \** US most / mˈoʊst **\
440
+ {EW} === {ALL_ESCHWA}ʊ \** GB go **\
441
+ {UW} === uʊ \** goose if pronconced with labializing accent ... we do not have this in our pronunciations **\
442
+
443
+ \** I Diphtongues : eɪ (day) / aɪ (sky) / ɔɪ (boy) **\
444
+ {AJ} === aɪ \** nine / nˈaɪn **\
445
+ {EJ} === eɪ \** game / ɡˈeɪm **\
446
+ {OJ} === ɔɪ \** boy **\
447
+ {IJ} === iɪ \** fleece if prononced with palatalising accent **\
448
+
449
+ \** ə diphthongs : iə (GB : beer) / eə (GB: bear) / ʊə (US: tour) **\
450
+ {IER} === i{ALL_ESCHWA} \** GB Beer **\
451
+ {EAR} === e{ALL_ESCHWA} \** GB Bear **\
452
+ {UER} === ʊ{ALL_ESCHWA} \** GB Tour **\
453
+
454
+ {VOWELS} === {A_BACK} * {A_FRONT} * {E_FRONT} * {E_BACK} * {E_BACK_RHOTIC} * {IER} * {EAR} * {UER} * {I} * {O} * {U} * {OMBU} * {ESCHWA} * {W_INDEPENDENT_SCHWI}
455
+ {_VOWELS_} === CIRTH_49 * CIRTH_48 * CIRTH_46 * {_REDUCIBLE_SCHWA_} * {_REDUCIBLE_SCHWA_} * CIRTH_39 {_NON_REDUCIBLE_SCHWA_} * CIRTH_46 {_NON_REDUCIBLE_SCHWA_} * CIRTH_42 {_NON_REDUCIBLE_SCHWA_} * CIRTH_39 * CIRTH_50 * CIRTH_42 * {_REDUCIBLE_SCHWU_} * {_REDUCIBLE_SCHWA_} * {_W_INDEPENDENT_SCHWI_}
456
+
457
+ \** Since in english back /a/ (trap) is always short and front /a/ (calm) is always long **\
458
+ \** we reuse the same cirth without risking a clash. **\
459
+ \** Long back e is probably not possible **\
460
+
461
+ \** Cirth 47 is used twice, but long front E is not present in english **\
462
+
463
+ {LVOWELS} === {AA_BACK} * {AA_FRONT} * {EE_FRONT} * {EE_BACK} * {EE_BACK_RHOTIC} * {II} * {OO} * {UU}
464
+ {_LVOWELS_} === CIRTH_49 * CIRTH_48 * CIRTH_47 * {_REDUCIBLE_SCHWA_} * {_REDUCIBLE_SCHWA_} * CIRTH_39 CIRTH_39 * CIRTH_51 * CIRTH_43
465
+
466
+ {DIPHTHONGS_R} === {AW} * {OW} * {EW} * {UW} * {AJ} * {EJ} * {OJ} * {IJ}
467
+ {_DIPHTHONGS_R_} === CIRTH_EREB_5 * CIRTH_38 * CIRTH_EREB_1 * CIRTH_45_ALT * CIRTH_EREB_4 * CIRTH_47 * CIRTH_52_ALT * CIRTH_39 CIRTH_59
468
+
469
+ {DIPHTHONGS} === {DIPHTHONGS_R}
470
+ {_DIPHTHONGS_} === {_DIPHTHONGS_R_}
471
+
472
+ {SCHWA_NON_REDUCIBLE} --> {_NON_REDUCIBLE_SCHWA_}
473
+ {SCHWU_NON_REDUCIBLE} --> {_NON_REDUCIBLE_SCHWU_}
474
+
475
+ {VOWELS} --> {_VOWELS_}
476
+ {LVOWELS} --> {_LVOWELS_}
477
+ {DIPHTHONGS} --> {_DIPHTHONGS_}
478
+
479
+ {L1} === (t,ɾ,ʔ) * p * tʃ * k
480
+ {_L1_} === CIRTH_8 * CIRTH_1 * CIRTH_13 * CIRTH_18
481
+ {L1} --> {_L1_}
482
+
483
+ \if "pre_consonant_n_with_same_articulation_point == PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK"
484
+ {L1_NASAL} === n(t,ɾ,ʔ) * mp * ntʃ * ŋk
485
+ [{L1_NASAL}] --> [{_L1_}] TEHTA_CIRCUM
486
+ \endif
487
+
488
+ \** ------------- **\
489
+
490
+ {L2} === d * b * dʒ * ɡ
491
+ {_L2_} === CIRTH_9 * CIRTH_2 * CIRTH_14 * CIRTH_19
492
+ {L2} --> {_L2_}
493
+ \if "pre_consonant_n_with_same_articulation_point == PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK"
494
+ {L2_NASAL} === nd * mb * ndʒ * ŋɡ
495
+ [{L2_NASAL}] --> [{_L2_}] TEHTA_CIRCUM
496
+ \endif
497
+
498
+ \** ------------- **\
499
+
500
+ {L3} === θ * f * ʃ * x
501
+ {_L3_} === CIRTH_10 * CIRTH_3 * CIRTH_15 * CIRTH_20
502
+ {L3} --> {_L3_}
503
+ \if "pre_consonant_n_with_same_articulation_point == PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK"
504
+ {L3_NASAL} === nθ * mf * nʃ * ŋx
505
+ [{L3_NASAL}] --> [{_L3_}] TEHTA_CIRCUM
506
+ \endif
507
+
508
+ \** ------------- **\
509
+
510
+ {L4} === ð * v * ʒ * ɣ
511
+ {_L4_} === CIRTH_11 * CIRTH_4 * CIRTH_16 * CIRTH_21
512
+ {L4} --> {_L4_}
513
+ \if "pre_consonant_n_with_same_articulation_point == PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK"
514
+ {L4_NASAL} === nð * mv * nʒ * ŋɣ
515
+ [{L4_NASAL}] --> [{_L4_}] TEHTA_CIRCUM
516
+ \endif
517
+
518
+ \** ------------- **\
519
+
520
+ {L5} === (n,n̩) * m * n(j,J) * ŋ
521
+ {_L5_} === CIRTH_12 * CIRTH_6 * CIRTH_17 * CIRTH_22
522
+ {L5} --> {_L5_}
523
+
524
+ \** ------------- **\
525
+
526
+ {L6} === w
527
+ {_L6_} === CIRTH_44
528
+ {L6} --> {_L6_}
529
+
530
+ {L6_NASAL} === nw
531
+ \if "pre_consonant_n_with_same_articulation_point == PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK"
532
+ [{L6_NASAL}] --> [{_L6_}] TEHTA_CIRCUM
533
+ \endif
534
+
535
+ {L6_NN} === (j,J)
536
+ \if "certh_for_y == USE_CERTH_39"
537
+ {_L6_NN_} === CIRTH_39
538
+ \else
539
+ {_L6_NN_} === CIRTH_40
540
+ \endif
541
+ {L6_NN} --> {_L6_NN_}
542
+
543
+ \** ------------- **\
544
+
545
+ \** CIRTH_30 (rh in angerthas daeron) is not used and is a good choice for ɹ **\
546
+ \** since it is an alternate r and graphically reversed **\
547
+ {L7} === r * ɹ * l
548
+ {_L7_} === CIRTH_29 * CIRTH_30 * CIRTH_31
549
+ {L7} --> {_L7_}
550
+
551
+ \** ------------- **\
552
+
553
+ \if "certh_for_s == USE_CERTH_34"
554
+ {_S_} === CIRTH_34
555
+ \else
556
+ {_S_} === CIRTH_35
557
+ \endif
558
+
559
+ {L8} === s * z
560
+ {_L8_} === {_S_} * CIRTH_36
561
+ {L8} --> {_L8_}
562
+ \if "pre_consonant_n_with_same_articulation_point == PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK"
563
+ {L8_NASAL} === ns * nz
564
+ [{L8_NASAL}] --> [{_L8_}] TEHTA_CIRCUM
565
+ \endif
566
+
567
+ \** ------------- **\
568
+
569
+ \** Use same character as in sindarin **\
570
+ {_WH_} === CIRTH_5
571
+ \if "ancient_voiceless_labiovelar_fricative_wh == WH_VLVF_WHINE_MERGER"
572
+ {_WH_} === CIRTH_44
573
+ \endif
574
+
575
+ {L9} === h * ʍ
576
+ {_L9_} === CIRTH_54 * {_WH_}
577
+ {L9} --> {_L9_}
578
+
579
+ \** ------------- **\
580
+ \** -- SPECIAL TOKENS **\
581
+
582
+ \if "english_the == ENGLISH_THE_EXTENDED_CIRTH"
583
+ _ð{ALL_ESCHWA}_ --> CIRTH_EREB_3
584
+ _ðɪ_ --> CIRTH_EREB_3 CIRTH_59 \** or CIRTH_39 (long vcrtical bar) **\
585
+ \endif
586
+
587
+ \if "english_and == ENGLISH_AND_EXTENDED_CIRTH"
588
+ _{A_FRONT}nd_ --> CIRTH_60
589
+ \endif
590
+
591
+ \end
592
+
593
+ \beg rules punctuation
594
+ . --> CIRTH_PUNCT_THREE_DOTS
595
+ .. --> CIRTH_PUNCT_THREE_DOTS
596
+ ... --> CIRTH_PUNCT_THREE_DOTS
597
+ … --> CIRTH_PUNCT_THREE_DOTS
598
+ .... --> CIRTH_PUNCT_FOUR_DOTS
599
+ ..... --> CIRTH_PUNCT_FOUR_DOTS
600
+ ...... --> CIRTH_PUNCT_FOUR_DOTS
601
+ ....... --> CIRTH_PUNCT_FOUR_DOTS
602
+
603
+ , --> CIRTH_PUNCT_MID_DOT
604
+ : --> CIRTH_PUNCT_TWO_DOTS
605
+ ; --> CIRTH_PUNCT_TWO_DOTS
606
+ ! --> CIRTH_PUNCT_THREE_DOTS
607
+ ? --> CIRTH_PUNCT_THREE_DOTS
608
+ · --> {NULL}
609
+
610
+ - --> {NULL}
611
+ – --> CIRTH_PUNCT_TWO_DOTS
612
+ — --> CIRTH_PUNCT_TWO_DOTS
613
+
614
+ \** Apostrophe **\
615
+
616
+ ' --> {NULL}
617
+ ’ --> {NULL}
618
+
619
+ \** NBSP **\
620
+ {NBSP} --> NBSP
621
+
622
+ \** Quotes **\
623
+
624
+ “ --> CIRTH_PUNCT_DOUBLE_VBAR
625
+ ” --> CIRTH_PUNCT_DOUBLE_VBAR
626
+ « --> CIRTH_PUNCT_DOUBLE_VBAR
627
+ » --> CIRTH_PUNCT_DOUBLE_VBAR
628
+
629
+ [ --> CIRTH_PUNCT_DOUBLE_VBAR
630
+ ] --> CIRTH_PUNCT_DOUBLE_VBAR
631
+ ( --> CIRTH_PUNCT_DOUBLE_VBAR
632
+ ) --> CIRTH_PUNCT_DOUBLE_VBAR
633
+ { --> CIRTH_PUNCT_DOUBLE_VBAR
634
+ } --> CIRTH_PUNCT_DOUBLE_VBAR
635
+ ⟨ --> CIRTH_PUNCT_DOUBLE_VBAR
636
+ ⟩ --> CIRTH_PUNCT_DOUBLE_VBAR
637
+ < --> CIRTH_PUNCT_DOUBLE_VBAR
638
+ > --> CIRTH_PUNCT_DOUBLE_VBAR
639
+
640
+ \** Not universal between fonts ... **\
641
+ $ --> CIRTH_PUNCT_STAR
642
+
643
+ \end
644
+
645
+ \beg rules numbers
646
+ \** Completely invented pentimal system based on the number of strokes **\
647
+
648
+ \if "numeral_system == QUINARY_SYSTEM"
649
+ 0 --> CIRTH_37 TEHTA_SUB_DOT
650
+
651
+ 1 --> CIRTH_NUMERAL_1 TEHTA_SUB_DOT
652
+ 2 --> CIRTH_NUMERAL_2 TEHTA_SUB_DOT
653
+ 3 --> CIRTH_NUMERAL_3 TEHTA_SUB_DOT
654
+ 4 --> CIRTH_NUMERAL_4 TEHTA_SUB_DOT
655
+ 5 --> CIRTH_NUMERAL_5 TEHTA_SUB_DOT
656
+ \else
657
+ 0 --> CIRTH_31 TEHTA_SUB_DOT
658
+
659
+ 1 --> CIRTH_10 TEHTA_SUB_DOT
660
+ 2 --> CIRTH_3 TEHTA_SUB_DOT
661
+ 3 --> CIRTH_4 TEHTA_SUB_DOT
662
+ 4 --> CIRTH_7 TEHTA_SUB_DOT
663
+
664
+ 5 --> CIRTH_39 TEHTA_SUB_DOT
665
+
666
+ 6 --> CIRTH_8 TEHTA_SUB_DOT
667
+ 7 --> CIRTH_1 TEHTA_SUB_DOT
668
+ 8 --> CIRTH_2 TEHTA_SUB_DOT
669
+ 9 --> CIRTH_6 TEHTA_SUB_DOT
670
+ \endif
671
+ \end
672
+
673
+ \end
674
+
675
+ \beg postprocessor
676
+ \if "space_character == USE_NON_BREAKING_SPACE_SMALL"
677
+ \outspace CIRTH_SPACE
678
+ \elsif "space_character == USE_NON_BREAKING_SPACE_BIG"
679
+ \outspace CIRTH_SPACE_BIG
680
+ \elsif "space_character == USE_MIDDLE_DOT"
681
+ \outspace "CIRTH_SPACE CIRTH_PUNCT_MID_DOT CIRTH_SPACE"
682
+ \else
683
+ \outspace SPACE
684
+ \endif
685
+
686
+ \resolve_virtuals
687
+ \end