glaemscribe 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +19 -0
- data/bin/glaemscribe +307 -0
- data/glaemresources/charsets/cirth_ds.cst +205 -0
- data/glaemresources/charsets/sarati_eldamar.cst +256 -0
- data/glaemresources/charsets/tengwar_ds.cst +318 -0
- data/glaemresources/charsets/unicode_gothic.cst +64 -0
- data/glaemresources/charsets/unicode_runes.cst +120 -0
- data/glaemresources/modes/adunaic.glaem +251 -0
- data/glaemresources/modes/blackspeech-annatar.glaem +318 -0
- data/glaemresources/modes/blackspeech.glaem +260 -0
- data/glaemresources/modes/gothic.glaem +78 -0
- data/glaemresources/modes/khuzdul.glaem +141 -0
- data/glaemresources/modes/mercian.glaem +419 -0
- data/glaemresources/modes/oldnorse-medieval.glaem +127 -0
- data/glaemresources/modes/quenya-sarati.glaem +320 -0
- data/glaemresources/modes/quenya.glaem +307 -0
- data/glaemresources/modes/sindarin-beleriand.glaem +285 -0
- data/glaemresources/modes/sindarin-classical.glaem +276 -0
- data/glaemresources/modes/sindarin-daeron.glaem +182 -0
- data/glaemresources/modes/telerin.glaem +302 -0
- data/glaemresources/modes/valarin-sarati.glaem +210 -0
- data/glaemresources/modes/westron.glaem +340 -0
- data/glaemresources/modes/westsaxon.glaem +342 -0
- data/lib/api/charset.rb +84 -0
- data/lib/api/charset_parser.rb +55 -0
- data/lib/api/constants.rb +29 -0
- data/lib/api/debug.rb +36 -0
- data/lib/api/eval.rb +268 -0
- data/lib/api/fragment.rb +113 -0
- data/lib/api/glaeml.rb +200 -0
- data/lib/api/if_tree.rb +96 -0
- data/lib/api/mode.rb +112 -0
- data/lib/api/mode_parser.rb +314 -0
- data/lib/api/option.rb +64 -0
- data/lib/api/post_processor/reverse.rb +36 -0
- data/lib/api/pre_processor/downcase.rb +35 -0
- data/lib/api/pre_processor/elvish_numbers.rb +47 -0
- data/lib/api/pre_processor/rxsubstitute.rb +40 -0
- data/lib/api/pre_processor/substitute.rb +38 -0
- data/lib/api/pre_processor/up_down_tehta_split.rb +138 -0
- data/lib/api/resource_manager.rb +130 -0
- data/lib/api/rule.rb +99 -0
- data/lib/api/rule_group.rb +159 -0
- data/lib/api/sheaf.rb +70 -0
- data/lib/api/sheaf_chain.rb +86 -0
- data/lib/api/sheaf_chain_iterator.rb +108 -0
- data/lib/api/sub_rule.rb +40 -0
- data/lib/api/transcription_pre_post_processor.rb +118 -0
- data/lib/api/transcription_processor.rb +137 -0
- data/lib/api/transcription_tree_node.rb +91 -0
- data/lib/glaemscribe.rb +70 -0
- metadata +112 -0
@@ -0,0 +1,342 @@
|
|
1
|
+
\**
|
2
|
+
|
3
|
+
Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
the transcription of texts between writing systems, and more
|
5
|
+
specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
invented languages to some of his devised writing systems.
|
7
|
+
|
8
|
+
Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
|
10
|
+
This program is free software: you can redistribute it and/or modify
|
11
|
+
it under the terms of the GNU Affero General Public License as published by
|
12
|
+
the Free Software Foundation, either version 3 of the License, or
|
13
|
+
any later version.
|
14
|
+
|
15
|
+
This program is distributed in the hope that it will be useful,
|
16
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
GNU Affero General Public License for more details.
|
19
|
+
|
20
|
+
You should have received a copy of the GNU Affero General Public License
|
21
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
|
23
|
+
**\
|
24
|
+
|
25
|
+
\language "Old English"
|
26
|
+
\writing "Tengwar"
|
27
|
+
\mode "West Saxon"
|
28
|
+
\version "0.0.1"
|
29
|
+
\authors "Talagan (Benjamin Babut)"
|
30
|
+
|
31
|
+
\charset tengwar_ds true
|
32
|
+
|
33
|
+
\beg preprocessor
|
34
|
+
\** Work exclusively downcase **\
|
35
|
+
\downcase
|
36
|
+
|
37
|
+
\** Simplify trema vowels **\
|
38
|
+
\substitute ä a
|
39
|
+
\substitute ë e
|
40
|
+
\substitute ï i
|
41
|
+
\substitute ö o
|
42
|
+
\substitute ü u
|
43
|
+
\substitute ÿ y
|
44
|
+
|
45
|
+
\substitute "ae" "æ"
|
46
|
+
\substitute "ea" "æa"
|
47
|
+
\substitute "éa" "ǽa"
|
48
|
+
\substitute "7" "⁊"
|
49
|
+
|
50
|
+
\** Dis-ambiguate long vowels **\
|
51
|
+
\rxsubstitute "(ā|â|aa)" "á"
|
52
|
+
\rxsubstitute "(ē|ê|ee)" "é"
|
53
|
+
\rxsubstitute "(ī|î|ii)" "í"
|
54
|
+
\rxsubstitute "(ō|ô|oo)" "ó"
|
55
|
+
\rxsubstitute "(ū|û|uu)" "ú"
|
56
|
+
\rxsubstitute "(ȳ|ŷ|yy)" "ý"
|
57
|
+
|
58
|
+
\up_down_tehta_split "æ,ǽ,a,ä,á,e,ë,é,i,ï,í,o,ö,ó,u,ü,ú,y,ÿ,ý,ø,ǿ,œ,œ́" "t,p,ċ,c,d,b,ġ,g,þ,f,ç,χ,ħ,ð,v,j,ȝ,n,m,r,ĭ,w,l,ld,s,z,h,x,sċ,hw,çt,χt,ħt"
|
59
|
+
\end
|
60
|
+
|
61
|
+
\beg processor
|
62
|
+
|
63
|
+
\beg rules litteral
|
64
|
+
{A} === a
|
65
|
+
{AA} === á
|
66
|
+
{E} === e
|
67
|
+
{EE} === é
|
68
|
+
{I} === i
|
69
|
+
{II} === í
|
70
|
+
{O} === o
|
71
|
+
{OO} === ó
|
72
|
+
{U} === u
|
73
|
+
{UU} === ú
|
74
|
+
{Y} === y
|
75
|
+
{YY} === ý
|
76
|
+
|
77
|
+
{AE} === (æ,ae)
|
78
|
+
{AEAE} === (ǽ,ǣ)
|
79
|
+
|
80
|
+
{OE} === (ø,œ)
|
81
|
+
{OEOE} === (ǿ,œ́)
|
82
|
+
|
83
|
+
\** Diphtongs are always splitted, so consider we don't have any. **\
|
84
|
+
\** @ is the phantom vowel **\
|
85
|
+
|
86
|
+
{S_VOWELS_NP} === {A} * {AE} * {OE} * {E} * {I} * {O} * {U} * {Y} * {A}_
|
87
|
+
{S_VOWELS} === {S_VOWELS_NP} * @
|
88
|
+
|
89
|
+
\** UP TEHTAS **\
|
90
|
+
{UTEHTA_NP_XS} === E_TEHTA_XS * A_TEHTA_XS * E_TEHTA_DOUBLE_XS * Y_TEHTA_XS * I_TEHTA_XS * O_TEHTA_XS * U_TEHTA_XS * THSUP_SEV_XS * VILYA
|
91
|
+
{UTEHTA_NP__S} === E_TEHTA_S * A_TEHTA_S * E_TEHTA_DOUBLE_S * Y_TEHTA_S * I_TEHTA_S * O_TEHTA_S * U_TEHTA_S * THSUP_SEV_S * VILYA
|
92
|
+
{UTEHTA_NP__L} === E_TEHTA_L * A_TEHTA_L * E_TEHTA_DOUBLE_L * Y_TEHTA_L * I_TEHTA_L * O_TEHTA_L * U_TEHTA_L * THSUP_SEV_L * VILYA
|
93
|
+
{UTEHTA_NP_XL} === E_TEHTA_XL * A_TEHTA_XL * E_TEHTA_DOUBLE_XL * Y_TEHTA_XL * I_TEHTA_XL * O_TEHTA_XL * U_TEHTA_XL * THSUP_SEV_XL * VILYA
|
94
|
+
{UTEHTA_XS} === {UTEHTA_NP_XS} * {NULL}
|
95
|
+
{UTEHTA__S} === {UTEHTA_NP__S} * {NULL}
|
96
|
+
{UTEHTA__L} === {UTEHTA_NP__L} * {NULL}
|
97
|
+
{UTEHTA_XL} === {UTEHTA_NP_XL} * {NULL}
|
98
|
+
|
99
|
+
\** FOR LONG VOWELS **\
|
100
|
+
{L_VOWELS} === {AA} * {AEAE} * {OEOE} * {EE} * {II} * {OO} * {UU} * {YY}
|
101
|
+
{L_PTEHTAS} === ARA E_TEHTA_XS * ARA A_TEHTA_XS * ARA E_TEHTA_DOUBLE_XS * ARA Y_TEHTA_XS * ARA I_TEHTA_XS * ARA O_TEHTA_XS * ARA U_TEHTA_XS * ARA THSUP_SEV_XS
|
102
|
+
|
103
|
+
\** DOWN TEHTAS **\
|
104
|
+
{DTEHTA_XS} === THINF_ACCENT_XS * THINF_TDOT_XS * TH_SUB_CIRC_XS * THINF_DDOT_XS * THINF_DOT_XS * TELCO O_TEHTA_XS * TELCO U_TEHTA_XS * THINF_STROKE_XS * VILYA * {NULL}
|
105
|
+
{DTEHTA__S} === THINF_ACCENT_S * THINF_TDOT_S * TH_SUB_CIRC_S * THINF_DDOT_S * THINF_DOT_S * TELCO O_TEHTA_XS * TELCO U_TEHTA_XS * THINF_STROKE_S * VILYA * {NULL}
|
106
|
+
{DTEHTA__L} === THINF_ACCENT_L * THINF_TDOT_L * TH_SUB_CIRC_L * THINF_DDOT_L * THINF_DOT_L * TELCO O_TEHTA_XS * TELCO U_TEHTA_XS * THINF_STROKE_L * VILYA * {NULL}
|
107
|
+
{DTEHTA_XL} === THINF_ACCENT_XL * THINF_TDOT_XL * TH_SUB_CIRC_XL * THINF_DDOT_XL * THINF_DOT_XL * TELCO O_TEHTA_XS * TELCO U_TEHTA_XS * THINF_STROKE_XL * VILYA * {NULL}
|
108
|
+
|
109
|
+
{S_VOWELS_NP_KER} === [ {S_VOWELS_NP} ]
|
110
|
+
{S_VOWELS_NP_KER_WN} === [ {S_VOWELS_NP} * {NULL} ]
|
111
|
+
{S_VOWELS_KER} === [ {S_VOWELS} ]
|
112
|
+
{S_VOWELS_KER_WN} === [ {S_VOWELS} * {NULL} ]
|
113
|
+
|
114
|
+
\** Img bundles for all vowels **\
|
115
|
+
{SU_VOWELS_IMG_XS} === [ {UTEHTA_XS} ]
|
116
|
+
{SU_VOWELS_IMG__S} === [ {UTEHTA__S} ]
|
117
|
+
{SU_VOWELS_IMG__L} === [ {UTEHTA__L} ]
|
118
|
+
{SU_VOWELS_IMG_XL} === [ {UTEHTA_XL} ]
|
119
|
+
{SD_VOWELS_IMG_XS} === [ {DTEHTA_XS} ]
|
120
|
+
{SD_VOWELS_IMG__S} === [ {DTEHTA__S} ]
|
121
|
+
{SD_VOWELS_IMG__L} === [ {DTEHTA__L} ]
|
122
|
+
{SD_VOWELS_IMG_XL} === [ {DTEHTA_XL} ]
|
123
|
+
|
124
|
+
{SU_VOWELS_IMG_XS_WN} === [ {UTEHTA_XS} * {NULL} ]
|
125
|
+
{SU_VOWELS_IMG__S_WN} === [ {UTEHTA__S} * {NULL} ]
|
126
|
+
{SU_VOWELS_IMG__L_WN} === [ {UTEHTA__L} * {NULL} ]
|
127
|
+
{SU_VOWELS_IMG_XL_WN} === [ {UTEHTA_XL} * {NULL} ]
|
128
|
+
{SD_VOWELS_IMG_XS_WN} === [ {DTEHTA_XS} * {NULL} ]
|
129
|
+
{SD_VOWELS_IMG__S_WN} === [ {DTEHTA__S} * {NULL} ]
|
130
|
+
{SD_VOWELS_IMG__L_WN} === [ {DTEHTA__L} * {NULL} ]
|
131
|
+
{SD_VOWELS_IMG_XL_WN} === [ {DTEHTA_XL} * {NULL} ]
|
132
|
+
|
133
|
+
\** Rule for long vowels **\
|
134
|
+
[ {L_VOWELS} ] --> [ {L_PTEHTAS} ]
|
135
|
+
{UU} --> VALA U_TEHTA_L \** # Special treatment **\
|
136
|
+
|
137
|
+
\** Fallback rule for short vowels **\
|
138
|
+
@ --> {NULL} \** # If found alone, put nothing **\
|
139
|
+
[ {S_VOWELS_NP} ] --> TELCO [ {UTEHTA_NP_XS} ]
|
140
|
+
{A}_ --> VILYA \** # We don't want a short carrier for a_ **\
|
141
|
+
(w,u)_ --> VALA \** # Only resolved after vowels **\
|
142
|
+
|
143
|
+
\** ############# **\
|
144
|
+
\** CONSONANTS # **\
|
145
|
+
\** ############# **\
|
146
|
+
|
147
|
+
\** ## 1st Line (Voiceless occlusives) **\
|
148
|
+
\** ## Short upper dash for nasalisation **\
|
149
|
+
{L1_KER_1} === t * p
|
150
|
+
{L1_IMG_1} === TINCO * PARMA
|
151
|
+
{L1_KER_2} === ċ * c * k
|
152
|
+
{L1_IMG_2} === CALMA * QUESSE * QUESSE
|
153
|
+
|
154
|
+
{L1_KER_1_GEMS} === tt * pp
|
155
|
+
{L1_IMG_1_GEMS} === TINCO THINF_DSTROKE_XS * PARMA THINF_DSTROKE_XS
|
156
|
+
{L1_KER_2_GEMS} === ċċ * cc * kk
|
157
|
+
{L1_IMG_2_GEMS} === CALMA THINF_DSTROKE_XL * QUESSE THINF_DSTROKE_XL * QUESSE THINF_DSTROKE_XL
|
158
|
+
|
159
|
+
|
160
|
+
{S_VOWELS_KER_WN}[{L1_KER_1}]{S_VOWELS_KER_WN} --> 2,1,3 --> [{L1_IMG_1}]{SU_VOWELS_IMG__L_WN}{SD_VOWELS_IMG__S_WN}
|
161
|
+
{S_VOWELS_KER_WN}[{L1_KER_1_GEMS}] --> 2,1 --> [{L1_IMG_1_GEMS}]{SU_VOWELS_IMG__L_WN}
|
162
|
+
{S_VOWELS_KER_WN}[{L1_KER_2}]{S_VOWELS_KER_WN} --> 2,1,3 --> [{L1_IMG_2}]{SU_VOWELS_IMG__L_WN}{SD_VOWELS_IMG_XL_WN}
|
163
|
+
{S_VOWELS_KER_WN}[{L1_KER_2_GEMS}] --> 2,1 --> [{L1_IMG_2_GEMS}]{SU_VOWELS_IMG__L_WN}
|
164
|
+
|
165
|
+
{S_VOWELS_KER_WN}[ nt * mp ]{S_VOWELS_KER_WN} --> 2,1,3 --> [ TINCO TILD_SUP_S * PARMA TILD_SUP_S ]{SU_VOWELS_IMG__L_WN}{SD_VOWELS_IMG__S_WN}
|
166
|
+
{S_VOWELS_KER_WN}[ nċ * nc ]{S_VOWELS_KER_WN} --> 2,1,3 --> [ CALMA TILD_SUP_S * QUESSE TILD_SUP_S ]{SU_VOWELS_IMG__L_WN}{SD_VOWELS_IMG_XL_WN}
|
167
|
+
|
168
|
+
|
169
|
+
\** ## 2nd Line (Voiced occlusives) **\
|
170
|
+
\** ## Long upper dash for nasalisation **\
|
171
|
+
{L2_KER} === d * b * ġ * g
|
172
|
+
{L2_IMG} === ANDO * UMBAR * ANGA * UNGWE
|
173
|
+
|
174
|
+
{L2_KER_GEMS} === dd * bb * (ċġ,ġġ) * (cg,gg)
|
175
|
+
{L2_IMG_GEMS} === ANDO THINF_DSTROKE_L * UMBAR THINF_DSTROKE_L * ANGA THINF_DSTROKE_L * UNGWE THINF_DSTROKE_L
|
176
|
+
|
177
|
+
|
178
|
+
{S_VOWELS_KER_WN}[{L2_KER}]{S_VOWELS_KER_WN} --> 2,1,3 --> [{L2_IMG}]{SU_VOWELS_IMG_XL_WN}{SD_VOWELS_IMG_XL_WN}
|
179
|
+
{S_VOWELS_KER_WN}[{L2_KER_GEMS}] --> 2,1 --> [{L2_IMG_GEMS}]{SU_VOWELS_IMG_XL_WN}
|
180
|
+
|
181
|
+
{S_VOWELS_KER_WN}[ nd * mb * nġ * ng ]{S_VOWELS_KER_WN} --> 2,1,3 --> [ ANDO TILD_SUP_L * UMBAR TILD_SUP_L * ANGA TILD_SUP_L * UNGWE TILD_SUP_L ]{SU_VOWELS_IMG_XL_WN}{SD_VOWELS_IMG_XL_WN}
|
182
|
+
|
183
|
+
|
184
|
+
\** ## 3rd Line (Voiceless fricatives) **\
|
185
|
+
\** ## Short upper dash for nasalisation **\
|
186
|
+
{L3_KER_1} === þ * f
|
187
|
+
{L3_IMG_1} === SULE * FORMEN
|
188
|
+
{L3_KER_2} === ç * (χ,ħ)
|
189
|
+
{L3_IMG_2} === AHA * HWESTA
|
190
|
+
|
191
|
+
{L3_KER_1_GEMS} === þþ * ff
|
192
|
+
{L3_IMG_1_GEMS} === SULE THINF_DSTROKE_XS * FORMEN THINF_DSTROKE_XS
|
193
|
+
{L3_KER_2_GEMS} === çç * (χχ,ħħ)
|
194
|
+
{L3_IMG_2_GEMS} === AHA THINF_DSTROKE_XS * HWESTA THINF_DSTROKE_XS
|
195
|
+
|
196
|
+
{S_VOWELS_KER_WN}[{L3_KER_1}]{S_VOWELS_KER_WN} --> 2,1,3 --> [{L3_IMG_1}]{SU_VOWELS_IMG__S_WN}{SD_VOWELS_IMG__L_WN}
|
197
|
+
{S_VOWELS_KER_WN}[{L3_KER_1_GEMS}] --> 2,1 --> [{L3_IMG_1_GEMS}]{SU_VOWELS_IMG__S_WN}
|
198
|
+
{S_VOWELS_KER_WN}[{L3_KER_2}]{S_VOWELS_KER_WN} --> 2,1,3 --> [{L3_IMG_2}]{SU_VOWELS_IMG__L_WN}{SD_VOWELS_IMG_XL_WN}
|
199
|
+
{S_VOWELS_KER_WN}[{L3_KER_2_GEMS}] --> 2,1 --> [{L3_IMG_2_GEMS}]{SU_VOWELS_IMG__L_WN}
|
200
|
+
|
201
|
+
|
202
|
+
\** ## 4th Line (Voiced fricatives) **\
|
203
|
+
\** ## Long upper dash for nasalisation **\
|
204
|
+
|
205
|
+
{L4_KER} === ð * v * j * ȝ
|
206
|
+
{L4_IMG} === ANTO * AMPA * ANCA * UNQUE
|
207
|
+
|
208
|
+
{L4_KER_GEMS} === ðð * vv * jj * ȝȝ
|
209
|
+
{L4_IMG_GEMS} === ANTO THINF_DSTROKE_L * AMPA THINF_DSTROKE_L * ANCA THINF_DSTROKE_L * UNQUE THINF_DSTROKE_L
|
210
|
+
|
211
|
+
|
212
|
+
{S_VOWELS_KER_WN}[{L4_KER}]{S_VOWELS_KER_WN} --> 2,1,3 --> [{L4_IMG}]{SU_VOWELS_IMG_XL_WN}{SD_VOWELS_IMG_XL_WN}
|
213
|
+
{S_VOWELS_KER_WN}[{L4_KER_GEMS}] --> 2,1 --> [{L4_IMG_GEMS}]{SU_VOWELS_IMG_XL_WN}
|
214
|
+
|
215
|
+
|
216
|
+
\** ## 5th Line (Nasals) **\
|
217
|
+
\** ## Long upper dash for nasalisation (wins on gemination) **\
|
218
|
+
|
219
|
+
{L5_KER} === n * m
|
220
|
+
{L5_IMG} === NUMEN * MALTA
|
221
|
+
|
222
|
+
{S_VOWELS_KER_WN}[{L5_KER}]{S_VOWELS_KER_WN} --> 2,1,3 --> [{L5_IMG}]{SU_VOWELS_IMG_XL_WN}{SD_VOWELS_IMG_XL_WN}
|
223
|
+
{S_VOWELS_KER_WN}[ nn * mm ]{S_VOWELS_KER_WN} --> 2,1,3 --> [ NUMEN TILD_SUP_L * MALTA TILD_SUP_L ]{SU_VOWELS_IMG_XL_WN}{SD_VOWELS_IMG_XL_WN}
|
224
|
+
|
225
|
+
|
226
|
+
\** ## 6th Line (Approximants == fr : Spirantes) **\
|
227
|
+
\** ## Short upper dash for nasalisation **\
|
228
|
+
|
229
|
+
{L6_KER} === r * ĭ
|
230
|
+
{L6_IMG} === ORE * ANNA
|
231
|
+
{L6_KER_GEMS} === rr * ĭĭ
|
232
|
+
{L6_IMG_GEMS} === ORE THINF_DSTROKE_XS * ANNA THINF_DSTROKE_XS
|
233
|
+
|
234
|
+
{S_VOWELS_KER_WN}[{L6_KER}]{S_VOWELS_KER_WN} --> 2,1,3 --> [{L6_IMG}]{SU_VOWELS_IMG__S_WN}{SD_VOWELS_IMG__L_WN}
|
235
|
+
{S_VOWELS_KER_WN}[{L6_KER_GEMS}] --> 2,1 --> [{L6_IMG_GEMS}]{SU_VOWELS_IMG__S_WN}
|
236
|
+
|
237
|
+
|
238
|
+
\** ## Liquids **\
|
239
|
+
\** ## **\
|
240
|
+
|
241
|
+
{S_VOWELS_KER_WN}w{S_VOWELS_KER_WN} --> 2,1,3 --> ROMEN {SU_VOWELS_IMG__L_WN}{SD_VOWELS_IMG_XS_WN}
|
242
|
+
{S_VOWELS_KER_WN}ww --> 2,1 --> ROMEN THINF_DSTROKE_XS {SU_VOWELS_IMG__L_WN}
|
243
|
+
{S_VOWELS_KER_WN}[l * ll] --> 2,1 --> [LAMBE * LAMBE THINF_DSTROKE_FOR_LAMBE]{SU_VOWELS_IMG__L_WN}
|
244
|
+
{S_VOWELS_KER_WN}ld --> 2,1 --> ALDA {SU_VOWELS_IMG__L_WN}
|
245
|
+
|
246
|
+
|
247
|
+
\** ## Alveolar (sifflantes) **\
|
248
|
+
\** ## **\
|
249
|
+
{L8_KER} === s * z
|
250
|
+
{L8_IMG} === SILME_NUQUERNA * ESSE_NUQUERNA
|
251
|
+
{L8_KER_GEMS} === ss * zz
|
252
|
+
{L8_IMG_GEMS} === SILME_NUQUERNA THINF_DSTROKE_XS * ESSE_NUQUERNA THINF_DSTROKE_L
|
253
|
+
|
254
|
+
{S_VOWELS_KER_WN}[{L8_KER}]{S_VOWELS_KER_WN} --> 2,1,3 --> [{L8_IMG}]{SU_VOWELS_IMG__S_WN}{SD_VOWELS_IMG__S_WN}
|
255
|
+
{S_VOWELS_KER_WN}[{L8_KER_GEMS}] --> 2,1 --> [{L8_IMG_GEMS}]{SU_VOWELS_IMG__S_WN}
|
256
|
+
|
257
|
+
|
258
|
+
\** ## FINAL S (Challenging!) **\
|
259
|
+
{S_VOWELS_KER_WN}s_ --> SHOOK_BEAUTIFUL {SU_VOWELS_IMG_XL_WN} \** # Final rule for s **\
|
260
|
+
_{S_VOWELS_KER_WN}s_ --> SILME_NUQUERNA {SU_VOWELS_IMG__S_WN} \** # Rule _es_ **\
|
261
|
+
[{L_VOWELS} * _ ]{S_VOWELS_KER_WN}s_ --> 2,1,3 --> [{L_PTEHTAS} * {NULL} ] SILME_NUQUERNA {SU_VOWELS_IMG__S_WN} \** # Rule for éis_, és_ **\
|
262
|
+
{NULL}[ {S_VOWELS_NP} ]{S_VOWELS_KER}s_ --> 1,3,2,4 --> TELCO [ {UTEHTA_NP_XS} ] SILME_NUQUERNA {SU_VOWELS_IMG__S} \** # Rule for ies_ **\
|
263
|
+
|
264
|
+
s --> SILME \** # Overload lonely s **\
|
265
|
+
z --> ESSE \** # Overload lonely z **\
|
266
|
+
|
267
|
+
\** ## Ligatures **\
|
268
|
+
\** ## **\
|
269
|
+
{LINE_VARIOUS_1_KER} === sċ
|
270
|
+
{LINE_VARIOUS_1_IMG} === ANCA_CLOSED
|
271
|
+
{LINE_VARIOUS_2_KER} === hw * çt * (χt,ħt)
|
272
|
+
{LINE_VARIOUS_2_IMG} === HARP_SHAPED * AHA_TINCO * HWESTA_TINCO
|
273
|
+
|
274
|
+
{S_VOWELS_KER_WN}[{LINE_VARIOUS_1_KER}]{S_VOWELS_KER_WN} --> 2,1,3 --> [{LINE_VARIOUS_1_IMG}]{SU_VOWELS_IMG_XL_WN}{SD_VOWELS_IMG_XL_WN}
|
275
|
+
{S_VOWELS_KER_WN}[{LINE_VARIOUS_2_KER}]{S_VOWELS_KER_WN} --> 2,1,3 --> [{LINE_VARIOUS_2_IMG}]{SU_VOWELS_IMG__S_WN}{SD_VOWELS_IMG__S_WN}
|
276
|
+
|
277
|
+
|
278
|
+
\** ## Various **\
|
279
|
+
\** ## **\
|
280
|
+
{S_VOWELS_KER_WN}h{S_VOWELS_KER_WN} --> 2,1,3 --> HYARMEN {SU_VOWELS_IMG_XS_WN}{SD_VOWELS_IMG__L_WN}
|
281
|
+
{S_VOWELS_KER_WN}hh --> 2,1 --> HYARMEN THINF_DSTROKE_XL {SU_VOWELS_IMG_XS_WN}
|
282
|
+
|
283
|
+
|
284
|
+
\** ## X **\
|
285
|
+
\** ## For x, due to the cedilla, we cannot put tehtas under the tengwa. **\
|
286
|
+
{S_VOWELS_KER_WN}x --> 2,1 --> QUESSE SHOOK_LEFT_L {SU_VOWELS_IMG__S_WN}
|
287
|
+
{S_VOWELS_KER_WN}xx --> 2,1 --> QUESSE SHOOK_LEFT_L THINF_DSTROKE_XL {SU_VOWELS_IMG__S_WN}
|
288
|
+
{S_VOWELS_KER_WN}nx --> 2,1 --> QUESSE SHOOK_LEFT_L TILD_SUP_S {SU_VOWELS_IMG__S_WN}
|
289
|
+
\end
|
290
|
+
|
291
|
+
\beg rules punctuation
|
292
|
+
⁊ --> OLD_ENGLISH_AND
|
293
|
+
|
294
|
+
. --> PUNCT_DDOT
|
295
|
+
.. --> PUNCT_DOT PUNCT_DDOT PUNCT_DOT
|
296
|
+
… --> PUNCT_TILD
|
297
|
+
... --> PUNCT_TILD
|
298
|
+
.... --> PUNCT_TILD
|
299
|
+
..... --> PUNCT_TILD
|
300
|
+
...... --> PUNCT_TILD
|
301
|
+
....... --> PUNCT_TILD
|
302
|
+
|
303
|
+
, --> PUNCT_DOT
|
304
|
+
: --> PUNCT_DOT
|
305
|
+
; --> PUNCT_DOT
|
306
|
+
! --> PUNCT_EXCLAM
|
307
|
+
? --> PUNCT_INTERR
|
308
|
+
· --> PUNCT_DOT
|
309
|
+
|
310
|
+
\** Apostrophe **\
|
311
|
+
|
312
|
+
' --> {NULL}
|
313
|
+
’ --> {NULL}
|
314
|
+
|
315
|
+
\** Quotes **\
|
316
|
+
|
317
|
+
“ --> DQUOT_OPEN
|
318
|
+
” --> DQUOT_CLOSE
|
319
|
+
« --> DQUOT_OPEN
|
320
|
+
» --> DQUOT_CLOSE
|
321
|
+
|
322
|
+
- --> {NULL}
|
323
|
+
– --> PUNCT_TILD
|
324
|
+
— --> PUNCT_TILD
|
325
|
+
|
326
|
+
[ --> PUNCT_PAREN_L
|
327
|
+
] --> PUNCT_PAREN_R
|
328
|
+
( --> PUNCT_PAREN_L_ALT \** TODO : Remove alt ? **\
|
329
|
+
) --> PUNCT_PAREN_R_ALT \** TODO : Remove alt ? **\
|
330
|
+
{ --> PUNCT_PAREN_L
|
331
|
+
} --> PUNCT_PAREN_R
|
332
|
+
< --> PUNCT_PAREN_L
|
333
|
+
> --> PUNCT_PAREN_R
|
334
|
+
|
335
|
+
\** Not universal between fonts ... **\
|
336
|
+
$ --> BOOKMARK_SIGN
|
337
|
+
≤ --> RING_MARK_L \** Ring inscription left beautiful stuff **\
|
338
|
+
≥ --> RING_MARK_R \** Ring inscription right beautiful stuff **\
|
339
|
+
|
340
|
+
\end
|
341
|
+
|
342
|
+
\end
|
data/lib/api/charset.rb
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
# the transcription of texts between writing systems, and more
|
5
|
+
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
# invented languages to some of his devised writing systems.
|
7
|
+
#
|
8
|
+
# Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
#
|
10
|
+
# This program is free software: you can redistribute it and/or modify
|
11
|
+
# it under the terms of the GNU Affero General Public License as published by
|
12
|
+
# the Free Software Foundation, either version 3 of the License, or
|
13
|
+
# any later version.
|
14
|
+
#
|
15
|
+
# This program is distributed in the hope that it will be useful,
|
16
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
# GNU Affero General Public License for more details.
|
19
|
+
#
|
20
|
+
# You should have received a copy of the GNU Affero General Public License
|
21
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
|
23
|
+
module Glaemscribe
|
24
|
+
module API
|
25
|
+
class Charset
|
26
|
+
attr_reader :name
|
27
|
+
|
28
|
+
attr_accessor :errors
|
29
|
+
attr_reader :chars
|
30
|
+
|
31
|
+
class Char
|
32
|
+
attr_accessor :line
|
33
|
+
attr_accessor :code
|
34
|
+
attr_accessor :names
|
35
|
+
attr_accessor :str
|
36
|
+
|
37
|
+
def initialize
|
38
|
+
@names = {}
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def initialize(name)
|
43
|
+
@name = name
|
44
|
+
@chars = []
|
45
|
+
@errors = []
|
46
|
+
end
|
47
|
+
|
48
|
+
# Pass integer (utf8 num) and array (of strings)
|
49
|
+
def add_char(line, code, names)
|
50
|
+
return if names.empty? || names.include?("?") # Ignore characters with '?'
|
51
|
+
|
52
|
+
c = Char.new
|
53
|
+
c.line = line
|
54
|
+
c.code = code
|
55
|
+
c.names = names
|
56
|
+
c.str = code.chr('UTF-8')
|
57
|
+
@chars << c
|
58
|
+
end
|
59
|
+
|
60
|
+
def finalize
|
61
|
+
@errors = []
|
62
|
+
@lookup_table = {}
|
63
|
+
|
64
|
+
@chars.each { |c|
|
65
|
+
c.names.each { |cname|
|
66
|
+
found = @lookup_table[cname]
|
67
|
+
if found
|
68
|
+
@errors << Glaeml::Error.new(c.line, "Character #{cname} found twice.")
|
69
|
+
else
|
70
|
+
@lookup_table[cname] = c
|
71
|
+
end
|
72
|
+
}
|
73
|
+
}
|
74
|
+
|
75
|
+
API::Debug::log("Finalized charset '#{@name}', #{@lookup_table.count} symbols loaded.")
|
76
|
+
end
|
77
|
+
|
78
|
+
def [](symbol)
|
79
|
+
@lookup_table[symbol]
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
# the transcription of texts between writing systems, and more
|
5
|
+
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
# invented languages to some of his devised writing systems.
|
7
|
+
#
|
8
|
+
# Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
#
|
10
|
+
# This program is free software: you can redistribute it and/or modify
|
11
|
+
# it under the terms of the GNU Affero General Public License as published by
|
12
|
+
# the Free Software Foundation, either version 3 of the License, or
|
13
|
+
# any later version.
|
14
|
+
#
|
15
|
+
# This program is distributed in the hope that it will be useful,
|
16
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
# GNU Affero General Public License for more details.
|
19
|
+
#
|
20
|
+
# You should have received a copy of the GNU Affero General Public License
|
21
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
|
23
|
+
module Glaemscribe
|
24
|
+
module API
|
25
|
+
|
26
|
+
class CharsetParser
|
27
|
+
|
28
|
+
def initialize()
|
29
|
+
@charset = nil
|
30
|
+
end
|
31
|
+
|
32
|
+
def parse(file_path)
|
33
|
+
@charset = Charset.new(ResourceManager::charset_name_from_file_path(file_path))
|
34
|
+
|
35
|
+
raw = File.open(file_path,"rb:utf-8").read
|
36
|
+
doc = Glaeml::Parser.new.parse(raw)
|
37
|
+
|
38
|
+
if(doc.errors.any?)
|
39
|
+
@charset.errors = doc.errors
|
40
|
+
return @charset
|
41
|
+
end
|
42
|
+
|
43
|
+
doc.root_node.gpath("char").each { |char_element|
|
44
|
+
code = char_element.args[0].hex
|
45
|
+
names = char_element.args[1..-1].map{|cname| cname.strip }.reject{ |cname| cname.empty? }
|
46
|
+
@charset.add_char(char_element.line,code,names)
|
47
|
+
}
|
48
|
+
@charset.finalize
|
49
|
+
|
50
|
+
@charset
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|