RubyGems - glaemscribe - Versions diffs - 1.1.14 → 1.3.0 - Mend

glaemscribe 1.1.14 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

checksums.yaml +5 -5
data/bin/glaemscribe +21 -17
data/glaemresources/charsets/cirth_ds.cst +540 -0
data/glaemresources/charsets/eldamar.cst +210 -0
data/glaemresources/charsets/sarati_eldamar.cst +256 -0
data/glaemresources/charsets/tengwar_ds_annatar.cst +2868 -0
data/glaemresources/charsets/tengwar_ds_eldamar.cst +2729 -0
data/glaemresources/charsets/tengwar_ds_elfica.cst +2742 -0
data/glaemresources/charsets/tengwar_ds_parmaite.cst +2726 -0
data/glaemresources/charsets/tengwar_ds_sindarin.cst +2722 -0
data/glaemresources/charsets/tengwar_freemono.cst +217 -0
data/glaemresources/charsets/tengwar_guni_annatar.cst +2948 -0
data/glaemresources/charsets/tengwar_guni_eldamar.cst +2809 -0
data/glaemresources/charsets/tengwar_guni_elfica.cst +2809 -0
data/glaemresources/charsets/tengwar_guni_parmaite.cst +2813 -0
data/glaemresources/charsets/tengwar_guni_sindarin.cst +2808 -0
data/glaemresources/charsets/tengwar_telcontar.cst +225 -0
data/glaemresources/charsets/unicode_gothic.cst +64 -0
data/glaemresources/charsets/unicode_runes.cst +121 -0
data/glaemresources/modes/{adunaic.glaem → adunaic-tengwar-glaemscrafu.glaem} +14 -2
data/glaemresources/modes/{blackspeech.glaem → blackspeech-tengwar-general_use.glaem} +13 -3
data/glaemresources/modes/english-cirth-espeak.glaem +687 -0
data/glaemresources/modes/english-tengwar-espeak.glaem +814 -0
data/glaemresources/modes/japanese-tengwar.glaem +776 -0
data/glaemresources/modes/{khuzdul.glaem → khuzdul-cirth-moria.glaem} +4 -1
data/glaemresources/modes/lang_belta-tengwar-dadef.glaem +248 -0
data/glaemresources/modes/{futhorc.glaem → old_english-futhorc.glaem} +0 -0
data/glaemresources/modes/{mercian.glaem → old_english-tengwar-mercian.glaem} +22 -12
data/glaemresources/modes/{westsaxon.glaem → old_english-tengwar-westsaxon.glaem} +20 -11
data/glaemresources/modes/{futhark-runicus.glaem → old_norse-futhark-runicus.glaem} +0 -0
data/glaemresources/modes/{futhark-younger.glaem → old_norse-futhark-younger.glaem} +0 -0
data/glaemresources/modes/{quenya.glaem → quenya-tengwar-classical.glaem} +32 -50
data/glaemresources/modes/raw-cirth.glaem +154 -0
data/glaemresources/modes/raw-tengwar.glaem +46 -23
data/glaemresources/modes/{rlyehian.glaem → rlyehian-tengwar.glaem} +14 -3
data/glaemresources/modes/{sindarin-daeron.glaem → sindarin-cirth-daeron.glaem} +55 -14
data/glaemresources/modes/{sindarin-beleriand.glaem → sindarin-tengwar-beleriand.glaem} +154 -28
data/glaemresources/modes/{sindarin.glaem → sindarin-tengwar-general_use.glaem} +86 -25
data/glaemresources/modes/{telerin.glaem → telerin-tengwar-glaemscrafu.glaem} +16 -6
data/glaemresources/modes/{westron.glaem → westron-tengwar-glaemscrafu.glaem} +18 -8
data/lib/api/charset.rb +67 -7
data/lib/api/charset_parser.rb +14 -1
data/lib/api/constants.rb +3 -4
data/lib/api/fragment.rb +26 -5
data/lib/api/if_tree.rb +70 -8
data/lib/api/macro.rb +40 -0
data/lib/api/mode.rb +66 -19
data/lib/api/mode_parser.rb +117 -14
data/lib/api/object_additions.rb +23 -1
data/lib/api/option.rb +17 -2
data/lib/api/post_processor/outspace.rb +44 -0
data/lib/api/post_processor/resolve_virtuals.rb +25 -9
data/lib/api/resource_manager.rb +1 -0
data/lib/api/rule_group.rb +170 -26
data/lib/api/sheaf_chain_iterator.rb +1 -1
data/lib/api/transcription_pre_post_processor.rb +8 -5
data/lib/api/transcription_processor.rb +15 -12
data/lib/api/tts.rb +51 -0
data/lib/glaemscribe.rb +36 -31
data/lib_espeak/espeakng.for.glaemscribe.nowasm.sync.js +35 -0
data/lib_espeak/glaemscribe_tts.js +505 -0
metadata +76 -24

data/glaemresources/modes/raw-cirth.glaem ADDED Viewed

@@ -0,0 +1,154 @@
+\**
+Glǽmscribe (also written Glaemscribe) is a software dedicated to
+the transcription of texts between writing systems, and more
+specifically dedicated to the transcription of J.R.R. Tolkien's
+invented languages to some of his devised writing systems.
+Copyright (C) 2015 Benjamin Babut (Talagan).
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+any later version.
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU Affero General Public License for more details.
+You should have received a copy of the GNU Affero General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+**\
+\** very limited mode, mainly used for writing some doc for glaemscribe. **\
+\beg changelog
+  \entry "0.0.1" "Initial version."
+\end
+\language "Raw Cirth"
+\writing  "Cirth"
+\mode     "Raw Cirth"
+\version  "0.0.1"
+\authors  "Talagan (Benjamin Babut)"
+\world      arda
+\invention  experimental
+\metamode true
+\charset  cirth_ds true
+\beg      preprocessor
+  \** Work exclusively downcase **\
+  \downcase
+\end
+\beg processor
+  \beg    rules litteral
+    _c1_ --> CIRTH_1
+    _c2_ --> CIRTH_2
+    _c3_ --> CIRTH_3
+    _c4_ --> CIRTH_4
+    _c5_ --> CIRTH_5
+    _c6_ --> CIRTH_6
+    _c7_ --> CIRTH_7
+    _c8_ --> CIRTH_8
+    _c9_ --> CIRTH_9
+    _c10_ --> CIRTH_10
+    _c11_ --> CIRTH_11
+    _c12_ --> CIRTH_12
+    _c13_ --> CIRTH_13
+    _c14_ --> CIRTH_14
+    _c15_ --> CIRTH_15
+    _c16_ --> CIRTH_16
+    _c17_ --> CIRTH_17
+    _c18_ --> CIRTH_18
+    _c19_ --> CIRTH_19
+    _c20_ --> CIRTH_20
+    _c21_ --> CIRTH_21
+    _c22_ --> CIRTH_22
+    _c23_ --> CIRTH_23
+    _c24_ --> CIRTH_24
+    _c25_ --> CIRTH_25
+    _c26_ --> CIRTH_26
+    _c27_ --> CIRTH_27
+    _c28_ --> CIRTH_28
+    _c29_ --> CIRTH_29
+    _c30_ --> CIRTH_30
+    _c31_ --> CIRTH_31
+    _c32_ --> CIRTH_32
+    _c33_ --> CIRTH_33
+    _c34_ --> CIRTH_34
+    _c35_ --> CIRTH_35
+    _c36_ --> CIRTH_36
+    _c37_ --> CIRTH_37
+    _c38_ --> CIRTH_38
+    _c39_ --> CIRTH_39
+    _c40_ --> CIRTH_40
+    _c41_ --> CIRTH_41
+    _c42_ --> CIRTH_42
+    _c43_ --> CIRTH_43
+    _c44_ --> CIRTH_44
+    _c45_ --> CIRTH_45
+    _c46_ --> CIRTH_46
+    _c47_ --> CIRTH_47
+    _c48_ --> CIRTH_48
+    _c49_ --> CIRTH_49
+    _c50_ --> CIRTH_50
+    _c51_ --> CIRTH_51
+    _c52_ --> CIRTH_52
+    _c53_ --> CIRTH_53
+    _c54_ --> CIRTH_54
+    _c55_ --> CIRTH_55
+    _c56_ --> CIRTH_56
+    _c57_ --> CIRTH_57
+    _c58_ --> CIRTH_58
+    _c59_ --> CIRTH_59
+    _c60_ --> CIRTH_60
+    _c38alt_ --> CIRTH_38_ALT
+    _c45alt_ --> CIRTH_45_ALT
+    _c51alt_ --> CIRTH_51_ALT
+    _c52alt_ --> CIRTH_52_ALT
+    _c55alt_ --> CIRTH_55_ALT
+    _c56alt_ --> CIRTH_56_ALT
+    _ce1_ --> CIRTH_EREB_1
+    _ce2_ --> CIRTH_EREB_2
+    _ce3_ --> CIRTH_EREB_3
+    _ce4_ --> CIRTH_EREB_4
+    _ce5_ --> CIRTH_EREB_5
+    _ce6_ --> CIRTH_EREB_6
+    _ce7_ --> CIRTH_EREB_7
+    _1_ --> CIRTH_NUMERAL_1
+    _2_ --> CIRTH_NUMERAL_2
+    _3_ --> CIRTH_NUMERAL_3
+    _4_ --> CIRTH_NUMERAL_4
+    _5_ --> CIRTH_NUMERAL_5
+    _{UNDERSCORE}_   --> TEHTA_UNDERLINE
+    _sdot_           --> TEHTA_SUB_DOT
+    _(^,circ)_       --> TEHTA_CIRCUM
+    {NBSP}           --> NBSP
+  \end
+  \beg rules punctuation
+    , --> CIRTH_PUNCT_DOT
+    . --> CIRTH_PUNCT_MID_DOT
+    (..,:) --> CIRTH_PUNCT_TWO_DOTS
+    ... --> CIRTH_PUNCT_THREE_DOTS
+    (....,::) --> CIRTH_PUNCT_FOUR_DOTS
+  \end
+\end
+\beg postprocessor
+  \resolve_virtuals
+\end

data/glaemresources/modes/raw-tengwar.glaem CHANGED Viewed

@@ -25,13 +25,16 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>.
 \beg changelog
   \entry "0.0.1" "Initial version."
   \entry "0.0.2" "Added missing extended tengwar."
-  \entry "0.0.3" "Added support for non-breaking spaces"
+  \entry "0.0.3" "Added support for non-breaking spaces"
+  \entry "0.0.4" "Added support for new unicode charsets"
+  \entry "0.0.5" "Added support for the Tengwar Telcontar font"
+  \entry "0.0.6" "Added support for Bombadil W/HW"
 \end
 \language "Raw Tengwar"
 \writing  "Tengwar"
 \mode     "Raw Tengwar"
-\version  "0.0.3"
+\version  "0.0.6"
 \authors  "Talagan (Benjamin Babut)"
 \world      arda
@@ -44,7 +47,15 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>.
 \charset  tengwar_ds_eldamar  false
 \charset  tengwar_ds_annatar  false
 \charset  tengwar_ds_elfica   false
+\charset  tengwar_guni_sindarin false
+\charset  tengwar_guni_parmaite false
+\charset  tengwar_guni_eldamar  false
+\charset  tengwar_guni_annatar  false
+\charset  tengwar_guni_elfica   false
 \charset  tengwar_freemono    false
+\charset  tengwar_telcontar   false
 \beg      options
@@ -164,6 +175,8 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>.
     _(mh)_                --> TW_MH
     _(mhbeleriandic,mhb)_ --> TW_MH_BELERIANDIC
     _(hwlowdham,hwl)_     --> TW_HW_LOWDHAM
+    _(hwbombadil,hwbom)_  --> BOMBADIL_HW
+    _(wbombadil,wbom)_    --> BOMBADIL_W
     \** ligatures **\
     _(harmasilme,ahasilme,silmeharma,silmeaha)_ --> ANCA_CLOSED
@@ -179,7 +192,11 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>.
     _(geminate)_  --> {GEMINATE}
     _(nasal)_     --> {NASAL}
     _(palatal)_   --> PALATAL_SIGN
-    _(labial)_    --> SEV_TEHTA
+    _(labial)_    --> WA_TEHTA
+    \** SA-Rincer to be enhanced ... **\
+    _(sarince)_           --> SARINCE
+    _(arrince,sarincef)_  --> SARINCE_FLOURISHED
     _0_ --> NUM_0
     _1_ --> NUM_1
@@ -193,32 +210,42 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>.
     _9_ --> NUM_9
     _10_ --> NUM_10
     _11_ --> NUM_11
+    _12_ --> NUM_12
     _(lsd)_ --> CIRC_TEHTA_INF
   \end
   \beg    rules punctuation
+    \** Allow / as word breaker **\
     / --> {NULL}
+    - --> {NULL}
+    ·   --> PUNCT_DOT
+    ,   --> PUNCT_DOT
+    :   --> PUNCT_DOT
+    ;   --> PUNCT_DOT
-    . --> PUNCT_DDOT
-    .. --> PUNCT_DOT PUNCT_DDOT PUNCT_DOT
+    .   --> PUNCT_DDOT
+    ..  --> PUNCT_DDOT
+    ...   --> PUSTA_3
+    ....  --> PUSTA_4
+    ::    --> PUSTA_4_SQUARED
+    \** PUSTA_4_HALFED ? **\
+    ..... --> PUSTA_5
     …  --> PUNCT_TILD
-    ... --> PUNCT_TILD
-    .... --> PUNCT_TILD
-    ..... --> PUNCT_TILD
-    ...... --> PUNCT_TILD
-    ....... --> PUNCT_TILD
-    , --> PUNCT_DOT
-    : --> PUNCT_DOT
-    ; --> PUNCT_DOT
+    ~  --> PUNCT_TILD
+    –  --> PUNCT_TILD
+    —  --> PUNCT_TILD
     ! --> PUNCT_EXCLAM
     ? --> PUNCT_INTERR
-    · --> PUNCT_DOT
-    :: --> PUSTA_3
     \** Apostrophe **\
     ' --> {NULL}
@@ -234,10 +261,6 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>.
     « --> DQUOT_OPEN
     » --> DQUOT_CLOSE
-    - --> {NULL}
-    – --> PUNCT_TILD
-    — --> PUNCT_TILD
     [ --> PUNCT_PAREN_L
     ] --> PUNCT_PAREN_R
     ( --> PUNCT_PAREN_L
@@ -248,7 +271,7 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>.
 \**    > --> PUNCT_PAREN_R : cannot be used since it is already used as a letter in group litteral **\
     \** Not universal between fonts ... **\
-    $ --> BOOKMARK_SIGN
+    $ --> ELVISH_PAREN
     ≤ --> RING_MARK_L \** Ring inscription left beautiful stuff **\
     ≥ --> RING_MARK_R \** Ring inscription right beautiful stuff **\

data/glaemresources/modes/{rlyehian.glaem → rlyehian-tengwar.glaem} RENAMED Viewed

@@ -35,12 +35,14 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>.
 	\entry "0.0.2" "Ported to virtual chars"
 	\entry "0.0.3" "Ported to various charsets"
   \entry "0.1.1" "Added support for inlined raw tengwar"
+  \entry "0.1.2" "Added support for new unicode charsets"
+  \entry "0.1.3" "Added support for the Tengwar Telcontar font"
 \end
 \language "R'lyehian"
 \writing  "Tengwar"
 \mode     "R'lyehian Tengwar - G*"
-\version  "0.1.1"
+\version  "0.1.3"
 \authors  "H.P.Lovecraft & The Great Ancient Gods, impl. Fthalagn"
 \world      other_world
@@ -53,7 +55,16 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>.
 \charset  tengwar_ds_eldamar  false
 \charset  tengwar_ds_annatar  false
 \charset  tengwar_ds_elfica   false
+\charset  tengwar_guni_sindarin false
+\charset  tengwar_guni_parmaite false
+\charset  tengwar_guni_eldamar  false
+\charset  tengwar_guni_annatar  false
+\charset  tengwar_guni_elfica   false
 \charset  tengwar_freemono    false
+\charset  tengwar_telcontar   false
 \beg      options
 \end
@@ -178,7 +189,7 @@ y palatal semi vowel ?
     {O_LOOP}        === O_TEHTA
     {U_LOOP}        === U_TEHTA
-    {TEHTAR}            === A_TEHTA      * E_TEHTA      * I_TEHTA    * O_TEHTA     * U_TEHTA * SEV_TEHTA
+    {TEHTAR}            === A_TEHTA      * E_TEHTA      * I_TEHTA    * O_TEHTA     * U_TEHTA * WA_TEHTA
     [{VOWELS}]          --> TELCO [{TEHTAR}]  \** Replace isolated short vowels **\
@@ -268,7 +279,7 @@ y palatal semi vowel ?
     > --> PUNCT_PAREN_R
     \** Not universal between fonts ... **\
-    $ --> BOOKMARK_SIGN
+    $ --> ELVISH_PAREN
     ≤ --> RING_MARK_L \** Ring inscription left beautiful stuff **\
     ≥ --> RING_MARK_R \** Ring inscription right beautiful stuff **\
   \end

data/glaemresources/modes/{sindarin-daeron.glaem → sindarin-cirth-daeron.glaem} RENAMED Viewed

@@ -28,12 +28,13 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>.
   \entry 0.0.2 "Added thorn as equivalent for th"
   \entry 0.0.3 "Moved out space to general element"
   \entry 0.0.4 "Fixed wrong ch, hw, h"
+  \entry 0.0.5 "Added disambiguations from the tengwar modes. Reworked median point behaviour, and ng."
 \end
 \language "Sindarin"
 \writing  "Cirth"
 \mode     "Sindarin Cirth - Angerthas Daeron"
-\version  "0.0.4"
+\version  "0.0.5"
 \authors  "J.R.R. Tolkien, impl. Talagan (Benjamin Babut)"
 \world      arda
@@ -44,10 +45,29 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>.
 \** We redefine the output space to have something beautiful, especially with erebor1 and erebor2 **\
 \outspace CIRTH_SPACE_BIG
+\beg options
+  \beg option hyphen HYPHEN_WORD_BREAKER
+    \value HYPHEN_WORD_BREAKER  0
+    \value HYPHEN_WORD_JOINER   1
+  \end
+\end
 \beg      preprocessor
   \** Work exclusively downcase **\
   \downcase
+  \if "hyphen == HYPHEN_WORD_JOINER"
+    \** Replace hyphen by median point **\
+    \substitute "-" "·"
+  \else
+    \** Replace hyphen by glaemscribe's word breaker **\
+    \substitute "-" "|"
+  \endif
+  \** Add keyboard friendly word joiner **\
+  \substitute "*" "·"
   \** Simplify trema vowels **\
   \substitute ä a
   \substitute ë e
@@ -63,6 +83,25 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>.
   \rxsubstitute "(ō|ô|oo)" "ó"
   \rxsubstitute "(ū|û|uu)" "ú"
   \rxsubstitute "(ȳ|ŷ|yy)" "ý"
+  \** Special case of starting 'i' before vowels, replace i by j **\
+  \rxsubstitute "\\bi([aeouyáāâéēêíīîóōôúūûýȳŷ])" "j\\1"
+  \** Special case of diphtong aw. Before vowels, do not treat 'aw' as diphthong,
+  since it seems more logical that aw would behave as a semi vowel **\
+  \rxsubstitute "aw([aeouyáāâéēêíīîóōôúūûýȳŷ])" "a|w\\1"
+  \** Special case for ng : before the vast majority of consonnants, treat as ŋ **\
+  \** Don't include r / l / lh / w **\
+  \rxsubstitute "ng([tpckbdfðvnmhs])" "ŋ\\1"
+  \** Avoid mutated ng of being treated as strong middle word n|g (ex : i·ngelaidh [iŋɛlaið] ) **\
+  \substitute "·ng" "·ŋ"
+  \** But avoid losing the strong g in nasal mutation of g (ex : in·Gelydh [iŋgɛlyð]] ) **\
+  \substitute "n·g" "·ŋg"
+  \** Use median dot as word joiner **\
+  \substitute "·" ""
 \end
 \beg      processor
@@ -115,16 +154,11 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>.
     (k,c)     --> CIRTH_18
     (kh,ch)   --> CIRTH_20
-    ghw   --> CIRTH_26
-    gw    --> CIRTH_24
     h     --> CIRTH_54 \**  13 is more eng. ch like in chin and 15 is more eng. sh like in shoe **\
-    hw    --> CIRTH_5
     j     --> CIRTH_14
-    khw   --> CIRTH_25
-    kw    --> CIRTH_23
     l     --> CIRTH_31
     lh    --> CIRTH_32
     m     --> CIRTH_6
@@ -133,14 +167,11 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>.
     n     --> CIRTH_12
     nc_   --> CIRTH_22 CIRTH_18 \** equals ŋc **\
     nd    --> CIRTH_38
-    ng    --> CIRTH_33
-    _ng   --> CIRTH_22
-    ng_   --> CIRTH_22
-    ŋ     --> CIRTH_22
+    \** Normalisation of ng **\
+    (ng,ngg,ŋg,ñg)  --> CIRTH_33 \** strong **\
+    (ng_,_ng,ŋ,ñ)   --> CIRTH_22 \** weak **\
-    nw    --> CIRTH_28
-    ngw   --> CIRTH_27
     nj    --> CIRTH_17
     r     --> CIRTH_29
     rh    --> CIRTH_30
@@ -148,8 +179,18 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>.
     sh    --> CIRTH_15
     ss    --> CIRTH_36
     (þ,th) --> CIRTH_10
-    w     --> CIRTH_44
     zh    --> CIRTH_16
+    \** Labials **\
+    hw                --> CIRTH_5
+    ghw               --> CIRTH_26
+    gw                --> CIRTH_24
+    (ng,ngg,ŋg,ñg)w   --> CIRTH_27     \** STRONG NG + W **\
+    khw               --> CIRTH_25
+    kw                --> CIRTH_23
+    nw                --> CIRTH_28
+    w                 --> CIRTH_44
   \end
   \beg    rules punctuation