RubyGems - glaemscribe - Versions diffs - 1.2.0 → 1.3.0 - Mend

glaemscribe 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

checksums.yaml +4 -4
data/bin/glaemscribe +2 -2
data/glaemresources/charsets/cirth_ds.cst +514 -179
data/glaemresources/charsets/eldamar.cst +210 -0
data/glaemresources/charsets/tengwar_ds_annatar.cst +2452 -130
data/glaemresources/charsets/tengwar_ds_eldamar.cst +2319 -125
data/glaemresources/charsets/tengwar_ds_elfica.cst +2317 -126
data/glaemresources/charsets/tengwar_ds_parmaite.cst +2319 -127
data/glaemresources/charsets/tengwar_ds_sindarin.cst +2318 -127
data/glaemresources/charsets/tengwar_freemono.cst +1 -1
data/glaemresources/charsets/tengwar_guni_annatar.cst +2451 -131
data/glaemresources/charsets/tengwar_guni_eldamar.cst +2317 -126
data/glaemresources/charsets/tengwar_guni_elfica.cst +2316 -127
data/glaemresources/charsets/tengwar_guni_parmaite.cst +2319 -127
data/glaemresources/charsets/tengwar_guni_sindarin.cst +2317 -126
data/glaemresources/charsets/tengwar_telcontar.cst +7 -0
data/glaemresources/modes/blackspeech-tengwar-general_use.glaem +1 -1
data/glaemresources/modes/english-cirth-espeak.glaem +687 -0
data/glaemresources/modes/english-tengwar-espeak.glaem +814 -0
data/glaemresources/modes/japanese-tengwar.glaem +9 -4
data/glaemresources/modes/lang_belta-tengwar-dadef.glaem +248 -0
data/glaemresources/modes/raw-cirth.glaem +154 -0
data/lib/api/charset_parser.rb +7 -1
data/lib/api/mode.rb +35 -10
data/lib/api/mode_parser.rb +21 -12
data/lib/api/post_processor/outspace.rb +44 -0
data/lib/api/rule_group.rb +1 -1
data/lib/api/transcription_pre_post_processor.rb +8 -5
data/lib/api/transcription_processor.rb +12 -9
data/lib/glaemscribe.rb +2 -0
data/lib_espeak/espeakng.for.glaemscribe.nowasm.sync.js +25 -11
data/lib_espeak/glaemscribe_tts.js +363 -223
metadata +12 -6

data/glaemresources/modes/english-cirth-espeak.glaem ADDED Viewed

@@ -0,0 +1,687 @@
+\**
+Glǽmscribe (also written Glaemscribe) is a software dedicated to
+the transcription of texts between writing systems, and more
+specifically dedicated to the transcription of J.R.R. Tolkien's
+invented languages to some of his devised writing systems.
+Copyright (C) 2015 Benjamin Babut (Talagan).
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+any later version.
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU Affero General Public License for more details.
+You should have received a copy of the GNU Affero General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+**\
+\beg changelog
+  \entry "0.0.1" "First version."
+\end
+\language "English"
+\writing  "Cirth"
+\mode     "English Angerthas based on Angerthas Daeron"
+\version  "0.0.1"
+\authors  "J.R.R. Tolkien, impl. Talagan (Benjamin Babut) with extrapolations"
+\world      primary_related_to_arda
+\invention  jrrt
+\charset  cirth_ds true
+\raw_mode "raw-cirth"
+\outspace CIRTH_SPACE
+\beg options
+  \** ENGLISH accent/dialect/variant. It also controls espeak behaviour. **\
+  \beg option espeak_voice ESPEAK_VOICE_EN_TENGWAR
+    \value ESPEAK_VOICE_EN_TENGWAR          0
+    \value ESPEAK_VOICE_EN_TENGWAR_GB       1
+    \value ESPEAK_VOICE_EN_TENGWAR_RP       2
+    \value ESPEAK_VOICE_EN_TENGWAR_US       3
+  \end
+  \** ----------Special words ---------- **\
+  \** 'the' word **\
+  \beg option english_the ENGLISH_THE_EXTENDED_CIRTH
+    \value ENGLISH_THE_EXTENDED_CIRTH 0
+    \value ENGLISH_THE_FULL_WRITING   1
+    \radio
+  \end
+  \** 'and' word may be represented by a special cirth **\
+  \beg option english_and ENGLISH_AND_EXTENDED_CIRTH
+    \value ENGLISH_AND_EXTENDED_CIRTH 0
+    \value ENGLISH_AND_FULL_WRITING   1
+    \radio
+  \end
+  \** 'to' word (the word 'to' may have its vowel reduced to a schwa) **\
+  \beg option schwa_of_to SCHWA_OF_TO_U
+    \value SCHWA_OF_TO_U 0
+    \value SCHWA_OF_TO_SCHWA 1
+  \end
+  \** ---------- Vowel options ---------- **\
+  \** Long i like in 'fleece' **\
+  \beg option long_i LONG_I_AS_DIPHTONG
+    \radio
+    \value LONG_I_DOUBLE_CIRTH 0
+    \value LONG_I_AS_DIPHTONG  1
+  \end
+  \** long u like in 'goose' **\
+  \** We render it either as the long vowel given in the Angerthas table **\
+  \** Or (extrapolation) we use an optional Cirth_45_alt that could look like **\
+  \** a /u+w/ because if's visually a mix of u and w **\
+  \beg option long_u LONG_U_AS_LONG_VOWEL
+    \radio
+    \value LONG_U_AS_LONG_VOWEL 0
+    \value LONG_U_AS_DIPHTONG   1
+  \end
+  \** ---------- Schwa options -------------- **\
+  \** Remove unuseful, natural schwa marks **\
+  \option implicit_schwa false
+  \beg option non_implicit_schwa_method NON_IMPLICIT_SCHWA_DIFFERENCIATE_REDUCIBLE
+    \value NON_IMPLICIT_SCHWA_DIFFERENCIATE_REDUCIBLE 0
+    \value NON_IMPLICIT_SCHWA_ALL_WITH_VERTICAL_BAR   1
+    \value NON_IMPLICIT_SCHWA_ALL_AS_ACCENTS          2
+    \visible_when "implicit_schwa == false"
+  \end
+  \beg option non_reducible_schwa_remaining NON_REDUCIBLE_SCHWA_REMAINING_AS_ACCENTS
+    \visible_when implicit_schwa
+    \value NON_REDUCIBLE_SCHWA_REMAINING_AS_VERTICAL_BARS 0
+    \value NON_REDUCIBLE_SCHWA_REMAINING_AS_ACCENTS 1
+  \end
+  \** Schwi, in US/JRRT **\
+  \beg option schwi SCHWI_SMALL_BAR
+    \radio
+    \value SCHWI_SMALL_BAR  0
+    \value SCHWI_LIKE_I     1
+    \value SCHWI_LIKE_SCHWA 2
+    \visible_when "espeak_voice == ESPEAK_VOICE_EN_TENGWAR || espeak_voice == ESPEAK_VOICE_EN_TENGWAR_US"
+  \end
+  \** 'strut' vowel special case **\
+  \beg option open_mid_back_unrounded OMBU_USE_LEFT_ORIENTED_CIRTH
+    \radio
+    \value OMBU_USE_LEFT_ORIENTED_CIRTH    0
+    \value OMBU_LIKE_SCHWA                 1
+  \end
+  \** ---------- Diphthong options ---------- **\
+  \** 'cure', 'cute' diphthong **\
+  \beg option ju_diphthong JU_DIPHTHONG_SEPARATE
+    \radio
+    \value JU_DIPHTHONG_SEPARATE 0
+    \value JU_DIPHTHONG_LIKE_IW  1
+  \end
+  \** Horse / Hoarse vowel distinction (only JRRT/US accents) **\
+  \beg option horse_hoarse_merger HORSE_HOARSE_SEPARATE
+    \visible_when "espeak_voice == ESPEAK_VOICE_EN_TENGWAR || espeak_voice == ESPEAK_VOICE_EN_TENGWAR_US"
+    \value HORSE_HOARSE_MERGE 0
+    \value HORSE_HOARSE_SEPARATE 1
+  \end
+  \** ---------- Consonant options ---------- **\
+  \** 'wh' in old accents/US. Sometimes called 'wine/whine' merger. **\
+  \beg option ancient_voiceless_labiovelar_fricative_wh WH_VLVF_AS_IN_SINDARIN
+    \value WH_VLVF_AS_IN_SINDARIN 0
+    \value WH_VLVF_WHINE_MERGER 1
+  \end
+  \** Re-establishment of linking r in non-rhotic accent, ex : 'better life' vs 'betteR answer' **\
+  \beg option linking_r true
+    \visible_when "espeak_voice == ESPEAK_VOICE_EN_TENGWAR_RP || espeak_voice == ESPEAK_VOICE_EN_TENGWAR_GB"
+  \end
+  \** Intrusive r, like in vanillaR ice **\
+  \beg option intrusive_r true
+    \visible_when "espeak_voice != ESPEAK_VOICE_EN_TENGWAR_US"
+  \end
+  \beg option certh_for_y USE_CERTH_40
+    \value USE_CERTH_39 0
+    \value USE_CERTH_40 1
+    \radio
+  \end
+  \beg option certh_for_s USE_CERTH_34
+    \value USE_CERTH_34 0
+    \value USE_CERTH_35 1
+    \radio
+  \end
+  \beg option pre_consonant_n_with_same_articulation_point PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK
+    \value PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_SEPARATE 0
+    \value PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK 1
+  \end
+  \beg option pre_velar_n PRE_VELAR_N_ASSIMILABLE
+    \value PRE_VELAR_N_NON_ASSIMILABLE 0
+    \value PRE_VELAR_N_ASSIMILABLE 1
+  \end
+  \beg option numeral_system PENTADIC_SYSTEM
+    \value QUINARY_SYSTEM   0
+    \value PENTADIC_SYSTEM  1
+  \end
+  \** ---------- Styling options ---------- **\
+  \beg option space_character USE_NON_BREAKING_SPACE_SMALL
+    \value USE_NORMAL_SPACE 0
+    \value USE_NON_BREAKING_SPACE_SMALL 1
+    \value USE_NON_BREAKING_SPACE_BIG 2
+    \value USE_MIDDLE_DOT 3
+  \end
+  \option auto_spacing true
+\end
+\beg  preprocessor
+  \downcase
+  \** Remove phonetics accentuation marks **\
+  \rxsubstitute "[ˈˌ]" ""
+  \** foreign words nasal a, split to "an" (ex: croissant) **\
+  \rxsubstitute "ɑ̃" "ɑn"
+  \** Non rhotic schwa simplification **\
+  \rxsubstitute "ɐ" "ə"
+  \if linking_r
+    \rxsubstitute "ɹ‿" "ɹ"
+  \else
+    \rxsubstitute "ɹ‿" ""
+  \endif
+  \if intrusive_r
+    \rxsubstitute "ɹ̩‿" "ɹ"
+  \else
+    \rxsubstitute "ɹ̩‿" ""
+  \endif
+  \if "schwa_of_to == SCHWA_OF_TO_U"
+    \substitute "ʊ̟" "ʊ"
+  \else
+    \substitute "ʊ̟" "ə"
+  \endif
+  \if "pre_velar_n == PRE_VELAR_N_ASSIMILABLE"
+    \rxsubstitute "n‿" "ŋ"
+  \else
+    \rxsubstitute "n‿" "n"
+  \endif
+  \** IMPORTANT NOTE : in all following regexps **\
+  \** since we cannot rely on \b because using IPA **\
+  \** [a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃] standa for "any possible letter" **\
+  \** (^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃]) ... stands for 'word beginning' **\
+  \** ... ($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃]) stands for 'word ending' **\
+  \** 'the' variations **\
+  \** that the **\
+  \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])(ð[aæ]t)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
+  \** of the **\
+  \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])([ɒʌ]v)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
+  \** for the **\
+  \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])(f[ɚə])(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
+  \** with the **\
+  \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])(wɪð)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
+  \** in the **\
+  \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])(ɪn)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
+  \** on the **\
+  \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])([ɒɔ]n)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
+  \** from the **\
+  \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])(fɹʌm)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
+  \** was the **\
+  \rxsubstitute "(^|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])(wʌz)(ð[əɪ])($|[^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃])" "\\1\\2 \\3\\4"
+  \if "horse_hoarse_merger == HORSE_HOARSE_SEPARATE"
+    \** Re-establish former diphtong **\
+    \substitute "oːɹ" "oʊɹ"
+  \endif
+  \** If treated as diphthong, change long i to i + schwi **\
+  \if "long_i == LONG_I_AS_DIPHTONG"
+    \substitute "iː" "iɪ"
+  \endif
+  \** Experimental, don't affect ju: at beginning of words/after consonnant **\
+  \if "ju_diphthong == JU_DIPHTHONG_LIKE_IW"
+    \rxsubstitute "(juː|jʊ)" "iw"
+  \endif
+  \if "long_u == LONG_U_AS_DIPHTONG"
+    \substitute "uː" "uʊ"
+  \endif
+  \** ! Beware of the order of the following rules **\
+  \** ! Rhotic schwa : remove 1 level of length when superfluous and always add explicit mark **\
+  \rxsubstitute "[ɜɚ]ː?" "ɜɹ"
+  \** ! Potentially remove superfluous added rhotic marks **\
+  \rxsubstitute "ɹ+" "ɹ"
+  \** ! Disambiguate ɹ + vowel : ORE/ROMEN **\
+  \rxsubstitute "ɹ([ɑæaeɛʌɐəɜɚiɪᵻoɒɔuʊʘ])" "r\\1"
+  \** Convention : for non reducing schwas we will use ɤ̞ for ʌ and ʘ for all other cases  **\
+  \** All schwas at beginning or end of words cannot reduce **\
+  \** or after vowels (== not consonant) **\
+  \** beware of ɪ as it can appear as consonant (lawyer) **\
+  \** same for ʊ for sour **\
+  \** Handling is not exactly the same as in the tengwar mode **\
+  \** Because letters are treated independently, not with a VC pattern **\
+  \** We thus need to add ending of words, that would be handled by telco otherwise **\
+  \** Mark non reducing schwa as ʘ **\
+  \rxsubstitute "(^|[^bcdfghjklmnpqrstvwxyzðŋɡɣɹɾʃʍʒʔθɪʊ])([əɐɜɚ])" "\\1ʘ" \** beginning of words **\
+  \rxsubstitute "([əɐɜɚ])([^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃]|r|$)" "ʘ\\2" \** ending of words **\
+  \if "schwi == SCHWI_LIKE_SCHWA"
+    \** Don't forget to mark schwis too **\
+    \rxsubstitute "(^|[^bcdfghjklmnpqrstvwxyzðŋɡɣɹɾʃʍʒʔθɪʊ])([ᵻ])" "\\1ʘ"  \** beginning of words **\
+    \rxsubstitute "ᵻ([^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃]|r|$)" "ʘ\\2"      \** ending of words **\
+  \endif
+  \** Don't forget to mark the ombus too **\
+  \if "open_mid_back_unrounded == OMBU_LIKE_SCHWA"
+    \rxsubstitute "(^|[^bcdfghjklmnpqrstvwxyzðŋɡɣɹɾʃʍʒʔθɪʊ])([ʌ])" "\\1ɤ̞"  \** beginning of words **\
+    \rxsubstitute "ʌ([^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃]|r|$)" "ɤ̞\\2"      \** ending of words **\
+  \else
+    \rxsubstitute "(^|[^bcdfghjklmnpqrstvwxyzðŋɡɣɹɾʃʍʒʔθɪʊ])([ʌ])" "\\1ʘ"  \** beginning of words **\
+    \rxsubstitute "ʌ([^a-zA-Zæǽɑɒɐəɚɛɜɪᵻʊʌɔðŋɡɣɹɾʃʍʒʔθː̟̩̃]|r|$)" "ʘ\\2"      \** ending of words **\
+  \endif
+  \if "auto_spacing == true"
+    \rxsubstitute "([^\\s])([.,;:!?])" "\\1 \\2"
+    \rxsubstitute "([.,;:!?])([^\\s])" "\\1 \\2"
+  \endif
+  \if "numeral_system == QUINARY_SYSTEM"
+    \elvish_numbers 5 false
+  \endif
+\end
+\beg processor
+  \beg rules litteral
+    {SCHWA_NON_REDUCIBLE}  === ʘ       \** NON REDUCIBLE **\
+    {SCHWU_NON_REDUCIBLE}  === ɤ̞       \** NON REDUCIBLE **\
+    \** Very long logic here for schwa/schwu reducible/non reducible **\
+    \** Could be shorter ? **\
+    \if implicit_schwa
+      {_REDUCIBLE_SCHWA_} === {NULL}
+      {_REDUCIBLE_SCHWU_} === {NULL}
+      \if "non_reducible_schwa_remaining == NON_REDUCIBLE_SCHWA_REMAINING_AS_VERTICAL_BARS"
+        {_NON_REDUCIBLE_SCHWA_} === CIRTH_55
+        {_NON_REDUCIBLE_SCHWU_} === {_NON_REDUCIBLE_SCHWA_}
+        \if "open_mid_back_unrounded != OMBU_LIKE_SCHWA"
+          {_NON_REDUCIBLE_SCHWU_} === CIRTH_56
+        \endif
+      \else
+        {_NON_REDUCIBLE_SCHWA_} === CIRTH_55_ALT
+        {_NON_REDUCIBLE_SCHWU_} === {_NON_REDUCIBLE_SCHWA_}
+        \if "open_mid_back_unrounded != OMBU_LIKE_SCHWA"
+          {_NON_REDUCIBLE_SCHWU_} === CIRTH_56_ALT
+        \endif
+      \endif
+    \else
+      \if "non_implicit_schwa_method == NON_IMPLICIT_SCHWA_DIFFERENCIATE_REDUCIBLE"
+        {_NON_REDUCIBLE_SCHWA_} === CIRTH_55
+        {_REDUCIBLE_SCHWA_}     === CIRTH_55_ALT
+        {_NON_REDUCIBLE_SCHWU_} === {_NON_REDUCIBLE_SCHWA_}
+        {_REDUCIBLE_SCHWU_}     === {_REDUCIBLE_SCHWA_}
+        \if "open_mid_back_unrounded != OMBU_LIKE_SCHWA"
+          {_NON_REDUCIBLE_SCHWU_} === CIRTH_56
+          {_REDUCIBLE_SCHWU_}     === CIRTH_56_ALT
+        \endif
+      \elsif "non_implicit_schwa_method == NON_IMPLICIT_SCHWA_ALL_WITH_VERTICAL_BAR"
+        {_NON_REDUCIBLE_SCHWA_} === CIRTH_55
+        {_REDUCIBLE_SCHWA_}     === CIRTH_55
+        {_NON_REDUCIBLE_SCHWU_} === {_NON_REDUCIBLE_SCHWA_}
+        {_REDUCIBLE_SCHWU_}     === {_REDUCIBLE_SCHWA_}
+        \if "open_mid_back_unrounded != OMBU_LIKE_SCHWA"
+          {_NON_REDUCIBLE_SCHWU_} === CIRTH_56
+          {_REDUCIBLE_SCHWU_}     === CIRTH_56
+        \endif
+      \else
+        {_NON_REDUCIBLE_SCHWA_} === CIRTH_55_ALT
+        {_REDUCIBLE_SCHWA_}     === CIRTH_55_ALT
+        {_NON_REDUCIBLE_SCHWU_} === {_NON_REDUCIBLE_SCHWA_}
+        {_REDUCIBLE_SCHWU_}     === {_REDUCIBLE_SCHWA_}
+        \if "open_mid_back_unrounded != OMBU_LIKE_SCHWA"
+          {_NON_REDUCIBLE_SCHWU_} === CIRTH_56_ALT
+          {_REDUCIBLE_SCHWU_}     === CIRTH_56_ALT
+        \endif
+      \endif
+    \endif
+    {IGROUP}               === i,ɪ
+    {UGROUP}               === u,ʊ
+    {EBGROUP}              === ə,ɐ     \** REDUCIBLE **\
+    {ESCHWA}               === ə       \** REDUCIBLE E SCHWA **\
+    {OMBU}                 === ʌ
+    {SCHWA_NON_REDUCIBLE}  === ʘ
+    {SCHWU_NON_REDUCIBLE}  === ɤ̞
+    {ALL_ESCHWA}           === (ə,ʘ)
+    {W_INDEPENDENT_SCHWI}    === {NULL}
+    {_W_INDEPENDENT_SCHWI_}  === {NULL}
+    \if "schwi == SCHWI_LIKE_I"
+      {IGROUP} === {IGROUP},ᵻ
+    \elsif "schwi == SCHWI_LIKE_SCHWA"
+      {EBGROUP} === {EBGROUP},ᵻ
+    \else
+      {W_INDEPENDENT_SCHWI}    === * ᵻ
+      {_W_INDEPENDENT_SCHWI_}  === * CIRTH_59
+    \endif
+    {A_FRONT}             === (æ,a)  \** Always short **\
+    {A_BACK}              === (ɑ)    \** Always long **\
+    {E_FRONT}             === (e,ɛ)
+    {E_BACK}              === ({EBGROUP})
+    {E_BACK_RHOTIC}       === (ɚ,ɜ)       \** Rhotic schwas are treated independently **\
+    {I}                   === ({IGROUP})
+    {O}                   === (o,ɒ,ɔ)     \** force, mock, lord **\
+    {U}                   === ({UGROUP})
+    {AA_FRONT}        === {A_FRONT}ː    \** long front a probably does not exist **\
+    {AA_BACK}         === {A_BACK}ː
+    {EE_FRONT}        === {E_FRONT}ː    \** long front e probably does not exist **\
+    {EE_BACK}         === {E_BACK}ː     \** long back  e probably does not exist when not rhotic **\
+    {EE_BACK_RHOTIC}  === {E_BACK_RHOTIC}ː
+    {II}              === {I}ː
+    {OO}              === {O}ː
+    {UU}              === {U}ː
+    \** GB DIPHTONGS **\
+    \** +dˈeɪ +skˈaɪ +bˈɔɪ +bˈiə +bˈeə +tˈʊə +ɡˌəʊ +kˈaʊ **\
+    \** US DIPHTONGS **\
+    \** =dˈeɪ =skˈaɪ =bˈɔɪ -bˈɪɹ -bˈɛɹ -tˈʊɹ +ɡˌoʊ =kˈaʊ **\
+    \** U Diphthongs **\
+    {AW} === aʊ        \** cow **\
+    {OW} === oʊ        \** US most / mˈoʊst **\
+    {EW} === {ALL_ESCHWA}ʊ \** GB go **\
+    {UW} === uʊ        \** goose if pronconced with labializing accent ... we do not have this in our pronunciations **\
+    \** I Diphtongues :  eɪ (day) / aɪ (sky) / ɔɪ (boy)  **\
+    {AJ} === aɪ \** nine / nˈaɪn **\
+    {EJ} === eɪ \** game / ɡˈeɪm **\
+    {OJ} === ɔɪ \** boy **\
+    {IJ} === iɪ \** fleece if prononced with palatalising accent **\
+    \** ə diphthongs : iə (GB : beer) / eə (GB: bear) / ʊə (US: tour) **\
+    {IER} === i{ALL_ESCHWA} \** GB Beer **\
+    {EAR} === e{ALL_ESCHWA} \** GB Bear **\
+    {UER} === ʊ{ALL_ESCHWA} \** GB Tour **\
+    {VOWELS}        === {A_BACK}  * {A_FRONT}     * {E_FRONT}     * {E_BACK}                * {E_BACK_RHOTIC}     * {IER}                            * {EAR}                             * {UER}                             * {I}         * {O}           * {U}      * {OMBU}              * {ESCHWA}            * {W_INDEPENDENT_SCHWI}
+    {_VOWELS_}      === CIRTH_49  * CIRTH_48      * CIRTH_46      * {_REDUCIBLE_SCHWA_}     * {_REDUCIBLE_SCHWA_} * CIRTH_39 {_NON_REDUCIBLE_SCHWA_} * CIRTH_46 {_NON_REDUCIBLE_SCHWA_}  * CIRTH_42 {_NON_REDUCIBLE_SCHWA_}  * CIRTH_39    * CIRTH_50      * CIRTH_42 * {_REDUCIBLE_SCHWU_} * {_REDUCIBLE_SCHWA_} * {_W_INDEPENDENT_SCHWI_}
+    \** Since in english back /a/ (trap) is always short and front /a/ (calm) is always long **\
+    \** we reuse the same cirth without risking a clash. **\
+    \** Long back e is probably not possible **\
+    \** Cirth 47 is used twice, but long front E is not present in english **\
+    {LVOWELS}       === {AA_BACK} * {AA_FRONT}    * {EE_FRONT}    * {EE_BACK}               * {EE_BACK_RHOTIC}    * {II}               * {OO}     * {UU}
+    {_LVOWELS_}     === CIRTH_49  * CIRTH_48      * CIRTH_47      * {_REDUCIBLE_SCHWA_}     * {_REDUCIBLE_SCHWA_} * CIRTH_39 CIRTH_39  * CIRTH_51 * CIRTH_43
+    {DIPHTHONGS_R}    === {AW}         * {OW}     * {EW}          * {UW}         * {AJ}         * {EJ}              * {OJ}              * {IJ}
+    {_DIPHTHONGS_R_}  === CIRTH_EREB_5 * CIRTH_38 * CIRTH_EREB_1  * CIRTH_45_ALT * CIRTH_EREB_4 * CIRTH_47          * CIRTH_52_ALT      * CIRTH_39 CIRTH_59
+    {DIPHTHONGS}    === {DIPHTHONGS_R}
+    {_DIPHTHONGS_}  === {_DIPHTHONGS_R_}
+    {SCHWA_NON_REDUCIBLE} --> {_NON_REDUCIBLE_SCHWA_}
+    {SCHWU_NON_REDUCIBLE} --> {_NON_REDUCIBLE_SCHWU_}
+    {VOWELS} --> {_VOWELS_}
+    {LVOWELS} --> {_LVOWELS_}
+    {DIPHTHONGS} --> {_DIPHTHONGS_}
+    {L1}   === (t,ɾ,ʔ) * p * tʃ * k
+    {_L1_} === CIRTH_8 * CIRTH_1 * CIRTH_13 * CIRTH_18
+    {L1}   --> {_L1_}
+    \if "pre_consonant_n_with_same_articulation_point == PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK"
+      {L1_NASAL} === n(t,ɾ,ʔ) * mp * ntʃ * ŋk
+      [{L1_NASAL}] --> [{_L1_}] TEHTA_CIRCUM
+    \endif
+    \** ------------- **\
+    {L2}   === d * b * dʒ * ɡ
+    {_L2_} === CIRTH_9 * CIRTH_2 * CIRTH_14 * CIRTH_19
+    {L2}   --> {_L2_}
+    \if "pre_consonant_n_with_same_articulation_point == PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK"
+      {L2_NASAL}   === nd    * mb * ndʒ   * ŋɡ
+      [{L2_NASAL}] --> [{_L2_}] TEHTA_CIRCUM
+    \endif
+    \** ------------- **\
+    {L3}   === θ        * f       * ʃ         * x
+    {_L3_} === CIRTH_10 * CIRTH_3 * CIRTH_15  * CIRTH_20
+    {L3}   --> {_L3_}
+    \if "pre_consonant_n_with_same_articulation_point == PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK"
+      {L3_NASAL} === nθ      * mf     * nʃ      * ŋx
+      [{L3_NASAL}] --> [{_L3_}] TEHTA_CIRCUM
+    \endif
+    \** ------------- **\
+    {L4}     === ð        * v       * ʒ         * ɣ
+    {_L4_}   === CIRTH_11 * CIRTH_4 * CIRTH_16  * CIRTH_21
+    {L4}     --> {_L4_}
+    \if "pre_consonant_n_with_same_articulation_point == PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK"
+      {L4_NASAL}   === nð    * mv    * nʒ    * ŋɣ
+      [{L4_NASAL}] --> [{_L4_}] TEHTA_CIRCUM
+    \endif
+    \** ------------- **\
+    {L5}     === (n,n̩)    * m       * n(j,J)     * ŋ
+    {_L5_}   === CIRTH_12 * CIRTH_6 * CIRTH_17   * CIRTH_22
+    {L5}     --> {_L5_}
+    \** ------------- **\
+    {L6}       === w
+    {_L6_}     === CIRTH_44
+    {L6}       --> {_L6_}
+    {L6_NASAL}   === nw
+    \if "pre_consonant_n_with_same_articulation_point == PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK"
+      [{L6_NASAL}]  --> [{_L6_}] TEHTA_CIRCUM
+    \endif
+    {L6_NN}   === (j,J)
+    \if "certh_for_y == USE_CERTH_39"
+      {_L6_NN_} === CIRTH_39
+    \else
+      {_L6_NN_} === CIRTH_40
+    \endif
+    {L6_NN}   --> {_L6_NN_}
+    \** ------------- **\
+    \** CIRTH_30 (rh in angerthas daeron) is not used and is a good choice for ɹ **\
+    \** since it is an alternate r and graphically reversed **\
+    {L7}    === r        * ɹ        * l
+    {_L7_}  === CIRTH_29 * CIRTH_30 * CIRTH_31
+    {L7}    --> {_L7_}
+    \** ------------- **\
+    \if "certh_for_s == USE_CERTH_34"
+      {_S_} === CIRTH_34
+    \else
+      {_S_} === CIRTH_35
+    \endif
+    {L8}   === s * z
+    {_L8_} === {_S_} * CIRTH_36
+    {L8}   --> {_L8_}
+    \if "pre_consonant_n_with_same_articulation_point == PRE_CONSONANT_N_WITH_SAME_ARTICULATION_POINT_MARK"
+      {L8_NASAL}   === ns * nz
+      [{L8_NASAL}] --> [{_L8_}] TEHTA_CIRCUM
+    \endif
+    \** ------------- **\
+    \** Use same character as in sindarin **\
+    {_WH_} === CIRTH_5
+    \if "ancient_voiceless_labiovelar_fricative_wh == WH_VLVF_WHINE_MERGER"
+      {_WH_} === CIRTH_44
+    \endif
+    {L9}    === h        * ʍ
+    {_L9_}  === CIRTH_54 * {_WH_}
+    {L9} --> {_L9_}
+    \** ------------- **\
+    \** -- SPECIAL TOKENS **\
+    \if "english_the == ENGLISH_THE_EXTENDED_CIRTH"
+      _ð{ALL_ESCHWA}_  --> CIRTH_EREB_3
+      _ðɪ_             --> CIRTH_EREB_3 CIRTH_59 \** or CIRTH_39 (long vcrtical bar) **\
+    \endif
+    \if "english_and == ENGLISH_AND_EXTENDED_CIRTH"
+      _{A_FRONT}nd_ --> CIRTH_60
+    \endif
+  \end
+  \beg rules punctuation
+    . --> CIRTH_PUNCT_THREE_DOTS
+    .. --> CIRTH_PUNCT_THREE_DOTS
+    ... --> CIRTH_PUNCT_THREE_DOTS
+    …   --> CIRTH_PUNCT_THREE_DOTS
+    .... --> CIRTH_PUNCT_FOUR_DOTS
+    ..... --> CIRTH_PUNCT_FOUR_DOTS
+    ...... --> CIRTH_PUNCT_FOUR_DOTS
+    ....... --> CIRTH_PUNCT_FOUR_DOTS
+    , --> CIRTH_PUNCT_MID_DOT
+    : --> CIRTH_PUNCT_TWO_DOTS
+    ; --> CIRTH_PUNCT_TWO_DOTS
+    ! --> CIRTH_PUNCT_THREE_DOTS
+    ? --> CIRTH_PUNCT_THREE_DOTS
+    · --> {NULL}
+    - --> {NULL}
+    – --> CIRTH_PUNCT_TWO_DOTS
+    — --> CIRTH_PUNCT_TWO_DOTS
+    \** Apostrophe **\
+    ' --> {NULL}
+    ’ --> {NULL}
+    \** NBSP **\
+    {NBSP} --> NBSP
+    \** Quotes **\
+    “ --> CIRTH_PUNCT_DOUBLE_VBAR
+    ” --> CIRTH_PUNCT_DOUBLE_VBAR
+    « --> CIRTH_PUNCT_DOUBLE_VBAR
+    » --> CIRTH_PUNCT_DOUBLE_VBAR
+    [ --> CIRTH_PUNCT_DOUBLE_VBAR
+    ] --> CIRTH_PUNCT_DOUBLE_VBAR
+    ( --> CIRTH_PUNCT_DOUBLE_VBAR
+    ) --> CIRTH_PUNCT_DOUBLE_VBAR
+    { --> CIRTH_PUNCT_DOUBLE_VBAR
+    } --> CIRTH_PUNCT_DOUBLE_VBAR
+    ⟨ --> CIRTH_PUNCT_DOUBLE_VBAR
+    ⟩ --> CIRTH_PUNCT_DOUBLE_VBAR
+    < --> CIRTH_PUNCT_DOUBLE_VBAR
+    > --> CIRTH_PUNCT_DOUBLE_VBAR
+    \** Not universal between fonts ... **\
+    $ --> CIRTH_PUNCT_STAR
+  \end
+  \beg rules numbers
+    \** Completely invented pentimal system based on the number of strokes **\
+    \if "numeral_system == QUINARY_SYSTEM"
+      0 --> CIRTH_37 TEHTA_SUB_DOT
+      1 --> CIRTH_NUMERAL_1 TEHTA_SUB_DOT
+      2 --> CIRTH_NUMERAL_2 TEHTA_SUB_DOT
+      3 --> CIRTH_NUMERAL_3 TEHTA_SUB_DOT
+      4 --> CIRTH_NUMERAL_4 TEHTA_SUB_DOT
+      5 --> CIRTH_NUMERAL_5 TEHTA_SUB_DOT
+    \else
+      0 --> CIRTH_31 TEHTA_SUB_DOT
+      1 --> CIRTH_10 TEHTA_SUB_DOT
+      2 --> CIRTH_3 TEHTA_SUB_DOT
+      3 --> CIRTH_4 TEHTA_SUB_DOT
+      4 --> CIRTH_7 TEHTA_SUB_DOT
+      5 --> CIRTH_39 TEHTA_SUB_DOT
+      6 --> CIRTH_8 TEHTA_SUB_DOT
+      7 --> CIRTH_1 TEHTA_SUB_DOT
+      8 --> CIRTH_2 TEHTA_SUB_DOT
+      9 --> CIRTH_6 TEHTA_SUB_DOT
+    \endif
+  \end
+\end
+\beg  postprocessor
+  \if "space_character == USE_NON_BREAKING_SPACE_SMALL"
+    \outspace CIRTH_SPACE
+  \elsif "space_character == USE_NON_BREAKING_SPACE_BIG"
+    \outspace CIRTH_SPACE_BIG
+  \elsif "space_character == USE_MIDDLE_DOT"
+    \outspace "CIRTH_SPACE CIRTH_PUNCT_MID_DOT CIRTH_SPACE"
+  \else
+    \outspace SPACE
+  \endif
+  \resolve_virtuals
+\end