RubyGems - glaemscribe - Versions diffs - 1.0.0 - Mend

glaemscribe 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

checksums.yaml +7 -0
data/LICENSE.txt +19 -0
data/bin/glaemscribe +307 -0
data/glaemresources/charsets/cirth_ds.cst +205 -0
data/glaemresources/charsets/sarati_eldamar.cst +256 -0
data/glaemresources/charsets/tengwar_ds.cst +318 -0
data/glaemresources/charsets/unicode_gothic.cst +64 -0
data/glaemresources/charsets/unicode_runes.cst +120 -0
data/glaemresources/modes/adunaic.glaem +251 -0
data/glaemresources/modes/blackspeech-annatar.glaem +318 -0
data/glaemresources/modes/blackspeech.glaem +260 -0
data/glaemresources/modes/gothic.glaem +78 -0
data/glaemresources/modes/khuzdul.glaem +141 -0
data/glaemresources/modes/mercian.glaem +419 -0
data/glaemresources/modes/oldnorse-medieval.glaem +127 -0
data/glaemresources/modes/quenya-sarati.glaem +320 -0
data/glaemresources/modes/quenya.glaem +307 -0
data/glaemresources/modes/sindarin-beleriand.glaem +285 -0
data/glaemresources/modes/sindarin-classical.glaem +276 -0
data/glaemresources/modes/sindarin-daeron.glaem +182 -0
data/glaemresources/modes/telerin.glaem +302 -0
data/glaemresources/modes/valarin-sarati.glaem +210 -0
data/glaemresources/modes/westron.glaem +340 -0
data/glaemresources/modes/westsaxon.glaem +342 -0
data/lib/api/charset.rb +84 -0
data/lib/api/charset_parser.rb +55 -0
data/lib/api/constants.rb +29 -0
data/lib/api/debug.rb +36 -0
data/lib/api/eval.rb +268 -0
data/lib/api/fragment.rb +113 -0
data/lib/api/glaeml.rb +200 -0
data/lib/api/if_tree.rb +96 -0
data/lib/api/mode.rb +112 -0
data/lib/api/mode_parser.rb +314 -0
data/lib/api/option.rb +64 -0
data/lib/api/post_processor/reverse.rb +36 -0
data/lib/api/pre_processor/downcase.rb +35 -0
data/lib/api/pre_processor/elvish_numbers.rb +47 -0
data/lib/api/pre_processor/rxsubstitute.rb +40 -0
data/lib/api/pre_processor/substitute.rb +38 -0
data/lib/api/pre_processor/up_down_tehta_split.rb +138 -0
data/lib/api/resource_manager.rb +130 -0
data/lib/api/rule.rb +99 -0
data/lib/api/rule_group.rb +159 -0
data/lib/api/sheaf.rb +70 -0
data/lib/api/sheaf_chain.rb +86 -0
data/lib/api/sheaf_chain_iterator.rb +108 -0
data/lib/api/sub_rule.rb +40 -0
data/lib/api/transcription_pre_post_processor.rb +118 -0
data/lib/api/transcription_processor.rb +137 -0
data/lib/api/transcription_tree_node.rb +91 -0
data/lib/glaemscribe.rb +70 -0
metadata +112 -0

data/glaemresources/modes/sindarin-daeron.glaem ADDED Viewed

@@ -0,0 +1,182 @@
+\**
+Glǽmscribe (also written Glaemscribe) is a software dedicated to
+the transcription of texts between writing systems, and more
+specifically dedicated to the transcription of J.R.R. Tolkien's
+invented languages to some of his devised writing systems.
+Copyright (C) 2015 Benjamin Babut (Talagan).
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+any later version.
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU Affero General Public License for more details.
+You should have received a copy of the GNU Affero General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+**\
+\** Sindarin Angerthas Daeron mode for glaemscribe **\
+\language "Sindarin"
+\writing  "Cirth"
+\mode     "Angerthas Daeron"
+\version  "0.0.1"
+\authors  "Talagan (Benjamin Babut)"
+\charset  cirth_ds true
+\beg      preprocessor
+  \** Work exclusively downcase **\
+  \downcase
+  \** Simplify trema vowels **\
+  \substitute ä a
+  \substitute ë e
+  \substitute ï i
+  \substitute ö o
+  \substitute ü u
+  \substitute ÿ y
+  \** Dis-ambiguate long vowels **\
+  \rxsubstitute "(ā|â|aa)" "á"
+  \rxsubstitute "(ē|ê|ee)" "é"
+  \rxsubstitute "(ī|î|ii)" "í"
+  \rxsubstitute "(ō|ô|oo)" "ó"
+  \rxsubstitute "(ū|û|uu)" "ú"
+  \rxsubstitute "(ȳ|ŷ|yy)" "ý"
+\end
+\beg      processor
+  \** We redefine the output space to have something beautiful, especially with erebor1 and erebor2 **\
+  \outspace CIRTH_SPACE_BIG
+  \beg    rules litteral
+    a     --> CIRTH_48
+    á     --> CIRTH_49
+    b     --> CIRTH_2
+    bh    --> CIRTH_4
+    c     --> CIRTH_18
+    ch    --> CIRTH_13
+    d     --> CIRTH_9
+    dh    --> CIRTH_11
+    đ     --> CIRTH_11
+    ð     --> CIRTH_11
+    ðh    --> CIRTH_11
+    e     --> CIRTH_46
+    é     --> CIRTH_47
+    f     --> CIRTH_3
+    ff_   --> CIRTH_3
+    g     --> CIRTH_19
+    gh    --> CIRTH_21
+    ghw   --> CIRTH_26
+    gw    --> CIRTH_24
+    h     --> CIRTH_5 \** . is another one??, V is +h (should be used in combinations, maybe todo) **\
+    i     --> CIRTH_39
+    í     --> CIRTH_39 CIRTH_39
+    j     --> CIRTH_14
+    k     --> CIRTH_18
+    kh    --> CIRTH_20
+    khw   --> CIRTH_25
+    kw    --> CIRTH_23
+    l     --> CIRTH_31
+    lh    --> CIRTH_32
+    m     --> CIRTH_6
+    mb    --> CIRTH_7
+    mh    --> CIRTH_7
+    n     --> CIRTH_12
+    nc_   --> CIRTH_22 CIRTH_18 \** equals ŋc **\
+    nd    --> CIRTH_38
+    ng    --> CIRTH_33
+    _ng   --> CIRTH_22
+    ng_   --> CIRTH_22
+    ŋ     --> CIRTH_22
+    nw    --> CIRTH_28
+    ngw   --> CIRTH_27
+    nj    --> CIRTH_17
+    o     --> CIRTH_50
+    ó     --> CIRTH_51 \** Can use CIRTH_51_ALT **\
+    ö     --> CIRTH_52 \** Can use CIRTH_52_ALT **\
+    œ     --> CIRTH_52
+    p     --> CIRTH_1
+    r     --> CIRTH_29
+    rh    --> CIRTH_30
+    s     --> CIRTH_34 \** Can use CIRTH_35 **\
+    sh    --> CIRTH_15
+    ss    --> CIRTH_36
+    t     --> CIRTH_8
+    th    --> CIRTH_10
+    u     --> CIRTH_42
+    ú     --> CIRTH_43
+    ü     --> CIRTH_45_ALT \** Can use CIRTH_45 **\
+    y     --> CIRTH_45_ALT \** Can use CIRTH_45 **\
+    v     --> CIRTH_4
+    w     --> CIRTH_44
+    zh    --> CIRTH_16
+  \end
+  \beg    rules punctuation
+    . --> CIRTH_PUNCT_THREE_DOTS
+    .. --> CIRTH_PUNCT_THREE_DOTS
+    ... --> CIRTH_PUNCT_THREE_DOTS
+    …   --> CIRTH_PUNCT_THREE_DOTS
+    .... --> CIRTH_PUNCT_THREE_DOTS
+    ..... --> CIRTH_PUNCT_THREE_DOTS
+    ...... --> CIRTH_PUNCT_THREE_DOTS
+    ....... --> CIRTH_PUNCT_THREE_DOTS
+    , --> CIRTH_PUNCT_MID_DOT
+    : --> CIRTH_PUNCT_TWO_DOTS
+    ; --> CIRTH_PUNCT_TWO_DOTS
+    ! --> CIRTH_PUNCT_THREE_DOTS
+    ? --> CIRTH_PUNCT_THREE_DOTS
+    · --> {NULL}
+    - --> {NULL}
+    – --> CIRTH_PUNCT_TWO_DOTS
+    — --> CIRTH_PUNCT_TWO_DOTS
+    \** Apostrophe **\
+    ' --> {NULL}
+    ’ --> {NULL}
+    \** Quotes **\
+    “ --> {NULL}
+    ” --> {NULL}
+    « --> {NULL}
+    » --> {NULL}
+    [ --> CIRTH_PUNCT_THREE_DOTS_L
+    ] --> CIRTH_PUNCT_THREE_DOTS_L
+    ( --> CIRTH_PUNCT_THREE_DOTS_L
+    ) --> CIRTH_PUNCT_THREE_DOTS_L
+    { --> CIRTH_PUNCT_THREE_DOTS_L
+    } --> CIRTH_PUNCT_THREE_DOTS_L
+    < --> CIRTH_PUNCT_THREE_DOTS_L
+    > --> CIRTH_PUNCT_THREE_DOTS_L
+    / --> CIRTH_PUNCT_FOUR_DOTS
+  \end
+\end

data/glaemresources/modes/telerin.glaem ADDED Viewed

@@ -0,0 +1,302 @@
+\**
+Glǽmscribe (also written Glaemscribe) is a software dedicated to
+the transcription of texts between writing systems, and more
+specifically dedicated to the transcription of J.R.R. Tolkien's
+invented languages to some of his devised writing systems.
+Copyright (C) 2015 Benjamin Babut (Talagan).
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+any later version.
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU Affero General Public License for more details.
+You should have received a copy of the GNU Affero General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+**\
+\** Telerin mode for glaemscribe (MAY BE INCOMPLETE) - Derived from Quenya **\
+\language "Telerin"
+\writing  "Tengwar"
+\mode     "Glaemscrafu"
+\version  "0.0.1"
+\authors  "Talagan (Benjamin Babut)"
+\charset  tengwar_ds true
+\beg      options
+  \option reverse_numbers true
+  \beg option numbers_base BASE_12
+    \value    BASE_10 10
+    \value    BASE_12 12
+  \end
+\end
+\beg      preprocessor
+  \** Work exclusively downcase **\
+  \downcase
+  \** Simplify trema vowels **\
+  \substitute ä a
+  \substitute ë e
+  \substitute ï i
+  \substitute ö o
+  \substitute ü u
+  \substitute ÿ y
+  \** Dis-ambiguate long vowels **\
+  \rxsubstitute "(ā|â|aa)" "á"
+  \rxsubstitute "(ē|ê|ee)" "é"
+  \rxsubstitute "(ī|î|ii)" "í"
+  \rxsubstitute "(ō|ô|oo)" "ó"
+  \rxsubstitute "(ū|û|uu)" "ú"
+  \rxsubstitute "(ȳ|ŷ|yy)" "ý"
+  \substitute   "qu" "q" \** Dis-ambiguate qu **\
+  \elvish_numbers "\\eval numbers_base" "\\eval reverse_numbers"
+\end
+\beg processor
+  \beg rules litteral
+    {A}                 === a
+    {AA}                === á
+    {E}                 === e
+    {EE}                === é
+    {I}                 === i
+    {II}                === í
+    {O}                 === o
+    {OO}                === ó
+    {U}                 === u
+    {UU}                === ú
+    {AI}                === {A}{I}
+    {AU}                === {A}{U}
+    {EU}                === {E}{U}
+    {IU}                === {I}{U}
+    {OI}                === {O}{I}
+    {UI}                === {U}{I}
+    {K}                 === (c,k)
+    {W}                 === (v,w)
+    {SS}                === (z,ss)
+    \** {MB}                === (b,mb) **\
+    \** {SS}                === (z,ss) **\
+    {VOWELS}            === {A}           * {E}         * {I}           * {O}           * {U}
+    {LVOWELS}           === {AA}          * {EE}        * {II}          * {OO}          * {UU}
+    {TEHTA_XS}          === A_TEHTA_XS      * E_TEHTA_XS      * I_TEHTA_XS     * O_TEHTA_XS     * U_TEHTA_XS
+    {TEHTA__S}          === A_TEHTA_S       * E_TEHTA_S       * I_TEHTA_S      * O_TEHTA_S      * U_TEHTA_S
+    {TEHTA__L}          === A_TEHTA_L       * E_TEHTA_L       * I_TEHTA_L      * O_TEHTA_L      * U_TEHTA_L
+    {TEHTA_XL}          === A_TEHTA_XL      * E_TEHTA_XL      * I_TEHTA_XL     * O_TEHTA_XL     * U_TEHTA_XL
+    {DIPHTHONGS}         === {AI}              * {AU}              * {EU}              *  {IU}              * {OI}              * {UI}
+    {DIPHTHENGS}         === YANTA A_TEHTA_L    * URE A_TEHTA_L     * URE E_TEHTA_L     * URE I_TEHTA_L     * YANTA O_TEHTA_L   * YANTA U_TEHTA_L
+    {V_D_KER}           === [ {VOWELS}   * {DIPHTHONGS} ]
+    {V_D_IMG_XS}        === [ {TEHTA_XS} * {DIPHTHENGS} ]
+    {V_D_IMG__S}        === [ {TEHTA__L} * {DIPHTHENGS} ]
+    {V_D_IMG__L}        === [ {TEHTA__S} * {DIPHTHENGS} ]
+    {V_D_IMG_XL}        === [ {TEHTA_XL} * {DIPHTHENGS} ]
+    {V_D_KER_WN}        === [ {VOWELS} * {DIPHTHONGS} * {NULL} ]
+    {V_D_IMG_XS_WN}     === [ {TEHTA_XS} * {DIPHTHENGS} * {NULL} ]
+    {V_D_IMG__S_WN}     === [ {TEHTA__L} * {DIPHTHENGS} * {NULL} ]
+    {V_D_IMG__L_WN}     === [ {TEHTA__S} * {DIPHTHENGS} * {NULL} ]
+    {V_D_IMG_XL_WN}     === [ {TEHTA_XL} * {DIPHTHENGS} * {NULL} ]
+    \** VOWEL RULES **\
+    [{VOWELS}]        -->   TELCO[{TEHTA_XS}] \** Replace isolated short vowels **\
+    [{LVOWELS}]       -->   ARA[{TEHTA_XS}] \**  Replace long vowels **\
+    [{DIPHTHONGS}]     -->  [{DIPHTHENGS}]  \**  Replace diphthongs **\
+    \** TELERIN: changed v/w, removed all y rules **\
+    \** ===================== **\
+    \** 1ST LINE RULES **\
+    \** ===================== **\
+    {L1_KER_1}        === t                   * p
+    {L1_IMG_1}        === TINCO               * PARMA
+    {L1_KER_2}        === {K}                 * q
+    {L1_IMG_2}        === CALMA               * QUESSE
+    {L1_KER_1_GEMS}   === tt                  * pp
+    {L1_IMG_1_GEMS}   === TINCO DASH_INF_S    * PARMA DASH_INF_S
+    \** NORMAL **\
+    [ {L1_KER_1} ] {V_D_KER_WN}        --> [ {L1_IMG_1} ] {V_D_IMG__S_WN}
+    [ {L1_KER_2} ] {V_D_KER_WN}        --> [ {L1_IMG_2} ] {V_D_IMG__S_WN}
+    \** GEMINATED **\
+    [ {L1_KER_1_GEMS} ] {V_D_KER_WN}   --> [ {L1_IMG_1_GEMS} ] {V_D_IMG__S_WN} \** Tengscribe uses S but L is probably better  **\
+    {K}{K}{V_D_KER_WN}                 --> CALMA DASH_INF_S {V_D_IMG__S_WN}
+    ts{V_D_KER_WN}_         --> TINCO SHOOK_RIGHT_L {V_D_IMG__S_WN}
+    ps{V_D_KER_WN}_         --> PARMA SHOOK_RIGHT_L {V_D_IMG__S_WN}
+    x{V_D_KER_WN}           --> CALMA SHOOK_LEFT_L {V_D_IMG__S_WN}   \** render ks for x **\
+    \** ===================== **\
+    \** 2ND LINE RULES **\
+    \** ===================== **\
+    {L2_KER}        === nd      * mb        * ng      *  ngw
+    {L2_IMG}        === ANDO    * UMBAR     * ANGA    *  UNGWE
+    \** STANDARD **\
+    [{L2_KER}]{V_D_KER_WN}  --> [{L2_IMG}]{V_D_IMG_XL_WN}
+    {L2_KER_2}        === d      * b        * g
+    {L2_IMG_2}        === ORE    * VALA     * ANNA
+    \** STANDARD **\
+    [{L2_KER_2}]{V_D_KER_WN}  --> [{L2_IMG_2}]{V_D_IMG__S_WN}
+    \** ===================== **\
+    \** 3RD LINE RULES **\
+    \** ===================== **\
+    {L3_KER_1} === th     * f
+    {L3_IMG_1} === SULE   * FORMEN
+    {L3_KER_2} === h      * hw
+    {L3_IMG_2} === AHA    * HWESTA
+    \** NORMAL **\
+    [{L3_KER_1}]{V_D_KER_WN}  --> [{L3_IMG_1}]{V_D_IMG__S_WN}
+    [{L3_KER_2}]{V_D_KER_WN}  --> [{L3_IMG_2}]{V_D_IMG__S_WN} \**  Tengscribe uses S but L is probably better  **\
+    \** Override h with vowels (descendent) **\
+    _h{V_D_KER}                     --> HYARMEN {V_D_IMG__L}
+    h[{LVOWELS}]                    --> HYARMEN ARA [{TEHTA_XS}]
+    h                               --> AHA
+    \** ===================== **\
+    \** 4TH LINE RULES **\
+    \** ===================== **\
+    {L4_KER}  === nt    * mp    * nc    * nq      \** Not nqu, due to preprocessor **\
+    {L4_IMG}  === ANTO  * AMPA  * ANCA  * UNQUE
+    \** NORMAL **\
+    [{L4_KER}]{V_D_KER_WN}    --> [{L4_IMG}]{V_D_IMG_XL_WN}
+    \** ===================== **\
+    \** 5TH LINE RULES **\
+    \** ===================== **\
+    {L5_KER}  === n     * m     * ñ     * ñw      * _nw
+    {L5_IMG}  === NUMEN * MALTA * NOLDO * NWALME  * NWALME
+    [{L5_KER}]{V_D_KER_WN}  --> [{L5_IMG}]{V_D_IMG_XL_WN}
+    nn{V_D_KER_WN}          --> NUMEN DASH_INF_L {V_D_IMG_XL_WN}
+    mm{V_D_KER_WN}          --> MALTA DASH_INF_L {V_D_IMG_XL_WN}
+    \** ===================== **\
+    \** 6TH LINE RULES **\
+    \** ===================== **\
+    {L6_KER}        === r     * {W}
+    {L6_IMG}        === ROMEN * VILYA
+    [{L6_KER}]{V_D_KER_WN} --> [{L6_IMG}]{V_D_IMG__S_WN}
+    rr{V_D_KER_WN}        --> ROMEN DASH_INF_S {V_D_IMG__S_WN}
+    rd{V_D_KER_WN}        --> ARDA {V_D_IMG__S_WN}
+    \** ===================== **\
+    \** L   LINE RULES **\
+    \** ===================== **\
+    {LINE_L_KER}          === l     * ld      * ll
+    {LINE_L_IMG}          === LAMBE * ALDA    * LAMBE LAMBE_MARK_TILD
+    [{LINE_L_KER}]{V_D_KER_WN}    --> [{LINE_L_IMG}]{V_D_IMG__S_WN}
+    hl{V_D_KER_WN}                --> HALLA LAMBE {V_D_IMG__S_WN}
+    hr{V_D_KER_WN}                --> HALLA ROMEN {V_D_IMG__S_WN}
+    \** ===================== **\
+    \** S/Z LINE RULES **\
+    \** ===================== **\
+    {L8_KER}        === s               * {SS}
+    {L8_IMG}        === SILME_NUQUERNA  * ESSE_NUQUERNA
+    [{L8_KER}]{V_D_KER_WN} --> [{L8_IMG}]{V_D_IMG__S_WN}
+    \** Override lonely s / ss / before consonant **\
+    s               --> SILME
+    s[{LVOWELS}]    --> SILME ARA [{TEHTA_XS}]
+    {SS}            --> ESSE
+    {SS}[{LVOWELS}] --> ESSE ARA [{TEHTA_XS}]
+  \end
+  \beg rules punctuation
+    . --> PUNCT_DDOT
+    .. --> PUNCT_DOT PUNCT_DDOT PUNCT_DOT
+    …  --> PUNCT_TILD
+    ... --> PUNCT_TILD
+    .... --> PUNCT_TILD
+    ..... --> PUNCT_TILD
+    ...... --> PUNCT_TILD
+    ....... --> PUNCT_TILD
+    , --> PUNCT_DOT
+    : --> PUNCT_DOT
+    ; --> PUNCT_DOT
+    ! --> PUNCT_EXCLAM
+    ? --> PUNCT_INTERR
+    · --> PUNCT_DOT
+    \** Apostrophe **\
+    ' --> {NULL}
+    ’ --> {NULL}
+    \** Quotes **\
+    “ --> DQUOT_OPEN
+    ” --> DQUOT_CLOSE
+    « --> DQUOT_OPEN
+    » --> DQUOT_CLOSE
+    - --> {NULL}
+    – --> PUNCT_TILD
+    — --> PUNCT_TILD
+    [ --> PUNCT_PAREN_L
+    ] --> PUNCT_PAREN_R
+    ( --> PUNCT_PAREN_L
+    ) --> PUNCT_PAREN_R
+    { --> PUNCT_PAREN_L
+    } --> PUNCT_PAREN_R
+    < --> PUNCT_PAREN_L
+    > --> PUNCT_PAREN_R
+    \** Not universal between fonts ... **\
+    $ --> BOOKMARK_SIGN
+    ≤ --> RING_MARK_L \** Ring inscription left beautiful stuff **\
+    ≥ --> RING_MARK_R \** Ring inscription right beautiful stuff **\
+  \end
+  \beg rules numbers
+    0 --> NUM_0
+    1 --> NUM_1
+    2 --> NUM_2
+    3 --> NUM_3
+    4 --> NUM_4
+    5 --> NUM_5
+    6 --> NUM_6
+    7 --> NUM_7
+    8 --> NUM_8
+    9 --> NUM_9
+    A --> NUM_10
+    B --> NUM_11
+  \end
+\end