glaemscribe 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +19 -0
- data/bin/glaemscribe +307 -0
- data/glaemresources/charsets/cirth_ds.cst +205 -0
- data/glaemresources/charsets/sarati_eldamar.cst +256 -0
- data/glaemresources/charsets/tengwar_ds.cst +318 -0
- data/glaemresources/charsets/unicode_gothic.cst +64 -0
- data/glaemresources/charsets/unicode_runes.cst +120 -0
- data/glaemresources/modes/adunaic.glaem +251 -0
- data/glaemresources/modes/blackspeech-annatar.glaem +318 -0
- data/glaemresources/modes/blackspeech.glaem +260 -0
- data/glaemresources/modes/gothic.glaem +78 -0
- data/glaemresources/modes/khuzdul.glaem +141 -0
- data/glaemresources/modes/mercian.glaem +419 -0
- data/glaemresources/modes/oldnorse-medieval.glaem +127 -0
- data/glaemresources/modes/quenya-sarati.glaem +320 -0
- data/glaemresources/modes/quenya.glaem +307 -0
- data/glaemresources/modes/sindarin-beleriand.glaem +285 -0
- data/glaemresources/modes/sindarin-classical.glaem +276 -0
- data/glaemresources/modes/sindarin-daeron.glaem +182 -0
- data/glaemresources/modes/telerin.glaem +302 -0
- data/glaemresources/modes/valarin-sarati.glaem +210 -0
- data/glaemresources/modes/westron.glaem +340 -0
- data/glaemresources/modes/westsaxon.glaem +342 -0
- data/lib/api/charset.rb +84 -0
- data/lib/api/charset_parser.rb +55 -0
- data/lib/api/constants.rb +29 -0
- data/lib/api/debug.rb +36 -0
- data/lib/api/eval.rb +268 -0
- data/lib/api/fragment.rb +113 -0
- data/lib/api/glaeml.rb +200 -0
- data/lib/api/if_tree.rb +96 -0
- data/lib/api/mode.rb +112 -0
- data/lib/api/mode_parser.rb +314 -0
- data/lib/api/option.rb +64 -0
- data/lib/api/post_processor/reverse.rb +36 -0
- data/lib/api/pre_processor/downcase.rb +35 -0
- data/lib/api/pre_processor/elvish_numbers.rb +47 -0
- data/lib/api/pre_processor/rxsubstitute.rb +40 -0
- data/lib/api/pre_processor/substitute.rb +38 -0
- data/lib/api/pre_processor/up_down_tehta_split.rb +138 -0
- data/lib/api/resource_manager.rb +130 -0
- data/lib/api/rule.rb +99 -0
- data/lib/api/rule_group.rb +159 -0
- data/lib/api/sheaf.rb +70 -0
- data/lib/api/sheaf_chain.rb +86 -0
- data/lib/api/sheaf_chain_iterator.rb +108 -0
- data/lib/api/sub_rule.rb +40 -0
- data/lib/api/transcription_pre_post_processor.rb +118 -0
- data/lib/api/transcription_processor.rb +137 -0
- data/lib/api/transcription_tree_node.rb +91 -0
- data/lib/glaemscribe.rb +70 -0
- metadata +112 -0
@@ -0,0 +1,320 @@
|
|
1
|
+
\**
|
2
|
+
|
3
|
+
Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
the transcription of texts between writing systems, and more
|
5
|
+
specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
invented languages to some of his devised writing systems.
|
7
|
+
|
8
|
+
Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
|
10
|
+
This program is free software: you can redistribute it and/or modify
|
11
|
+
it under the terms of the GNU Affero General Public License as published by
|
12
|
+
the Free Software Foundation, either version 3 of the License, or
|
13
|
+
any later version.
|
14
|
+
|
15
|
+
This program is distributed in the hope that it will be useful,
|
16
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
GNU Affero General Public License for more details.
|
19
|
+
|
20
|
+
You should have received a copy of the GNU Affero General Public License
|
21
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
|
23
|
+
**\
|
24
|
+
|
25
|
+
\language "Quenya"
|
26
|
+
\writing "Sarati"
|
27
|
+
\mode "Quenya Usage"
|
28
|
+
\version "0.0.1"
|
29
|
+
\authors "Talagan (Benjamin Babut)"
|
30
|
+
|
31
|
+
\charset sarati_eldamar true
|
32
|
+
|
33
|
+
\beg preprocessor
|
34
|
+
\** Work exclusively downcase **\
|
35
|
+
\downcase
|
36
|
+
|
37
|
+
\** Simplify trema vowels **\
|
38
|
+
\** Removed ï and ü, not quite sure how to treat them for qenya : ex : oïkta diphthong or not ? **\
|
39
|
+
\substitute ä a
|
40
|
+
\substitute ë e
|
41
|
+
\substitute ö o
|
42
|
+
\substitute ÿ y
|
43
|
+
|
44
|
+
\** Dis-ambiguate long vowels **\
|
45
|
+
\rxsubstitute "(ā|â|aa)" "á"
|
46
|
+
\rxsubstitute "(ē|ê)" "é" \** ee is allowed in qenya **\
|
47
|
+
\rxsubstitute "(ī|î|iï|ïi|ïï|ii)" "í"
|
48
|
+
\rxsubstitute "(ō|ô|oo)" "ó"
|
49
|
+
\rxsubstitute "(ū|û|uü|üu|üü|uu)" "ú"
|
50
|
+
\rxsubstitute "(ȳ|ŷ|yy)" "ý"
|
51
|
+
|
52
|
+
\substitute "iu" "iw"
|
53
|
+
\substitute "eu" "ew"
|
54
|
+
\substitute "au" "aw"
|
55
|
+
\substitute "ai" "ay"
|
56
|
+
\substitute "oi" "oy"
|
57
|
+
\substitute "ui" "uy"
|
58
|
+
|
59
|
+
\substitute "á" "aa" \** Split long a **\
|
60
|
+
\substitute "ā" "aa" \** '' **\
|
61
|
+
\substitute "â" "aa" \** '' **\
|
62
|
+
|
63
|
+
\substitute "qu" "q" \** Dis-ambiguate qu **\
|
64
|
+
\end
|
65
|
+
|
66
|
+
\beg processor
|
67
|
+
|
68
|
+
\outspace SARATI_SPACE
|
69
|
+
|
70
|
+
\beg rules litteral
|
71
|
+
{A} === a
|
72
|
+
{AA} === aa
|
73
|
+
{E} === e
|
74
|
+
{EE} === é
|
75
|
+
{I} === (i,ï)
|
76
|
+
{II} === í
|
77
|
+
{O} === o
|
78
|
+
{OO} === ó
|
79
|
+
{U} === (u,ü)
|
80
|
+
{UU} === ú
|
81
|
+
|
82
|
+
{K} === (c,k)
|
83
|
+
{MB} === (b,mb)
|
84
|
+
{SS} === (z,ss)
|
85
|
+
|
86
|
+
{VOWELS} === {A} * {E} * {I} * {O} * {U}
|
87
|
+
{LVOWELS} === {AA} * {EE} * {II} * {OO} * {UU}
|
88
|
+
{TEHTAS_} === SARATI_QUENYA_A * SARATI_QUENYA_E * SARATI_QUENYA_I * SARATI_QUENYA_O * SARATI_QUENYA_U
|
89
|
+
{STEHTAS} === {NULL} * SARATI_QUENYA_E * SARATI_QUENYA_I * SARATI_QUENYA_O * SARATI_QUENYA_U
|
90
|
+
{LTEHTAS} === SARATI_QUENYA_A SARATI_DASH_U * SARATI_QUENYA_E SARATI_DASH_U * SARATI_QUENYA_I SARATI_DASH_U * SARATI_QUENYA_O SARATI_DASH_U * SARATI_QUENYA_U SARATI_DASH_U
|
91
|
+
{LTEHTAS_FOR_CONSONANTS} === SARATI_QUENYA_A * SARATI_QUENYA_E SARATI_DASH_U * SARATI_QUENYA_I SARATI_DASH_U * SARATI_QUENYA_O SARATI_DASH_U * SARATI_QUENYA_U SARATI_DASH_U
|
92
|
+
|
93
|
+
{V_L_KER_WN} === [ {VOWELS} * {LVOWELS} * {NULL} ]
|
94
|
+
{V_IMG_FOR_CONSONNANTS_WN} === [ {STEHTAS} * {LTEHTAS_FOR_CONSONANTS} * SARATI_DOT_D ] \** No vowel == dot below **\
|
95
|
+
|
96
|
+
\** RULES **\
|
97
|
+
|
98
|
+
[{VOWELS}] --> [{TEHTAS_}] SARATI_QUENYA_LONG_VOWEL_CARRIER \** Isolated vowels : use short carrier (reversed order RTL) **\
|
99
|
+
[{LVOWELS}] --> [{LTEHTAS}] SARATI_QUENYA_LONG_VOWEL_CARRIER \** Long vowels: carrier + dash + tehta **\
|
100
|
+
|
101
|
+
\** FIRST LINE **\
|
102
|
+
|
103
|
+
{L1_KER} === t * p * {K} * q * tt * pp * {K}{K}
|
104
|
+
{L1_IMG} === SARATI_T * SARATI_P * SARATI_QUENYA_C * SARATI_QUENYA_QU * SARATI_DASH_D SARATI_T * SARATI_DASH_D SARATI_P * SARATI_DASH_D SARATI_QUENYA_C
|
105
|
+
|
106
|
+
[{L1_KER}]{V_L_KER_WN} --> 2,1 --> {V_IMG_FOR_CONSONNANTS_WN}[{L1_IMG}]
|
107
|
+
|
108
|
+
ty{V_L_KER_WN} --> {V_IMG_FOR_CONSONNANTS_WN} SARATI_QUENYA_TY
|
109
|
+
ts{V_L_KER_WN} --> {V_IMG_FOR_CONSONNANTS_WN} SARATI_QUENYA_TS
|
110
|
+
|
111
|
+
\** Missing py, ps? **\
|
112
|
+
|
113
|
+
\** SECOND LINE **\
|
114
|
+
|
115
|
+
{L2_KER} === nd * {MB} * ng * ngw
|
116
|
+
{L2_IMG} === SARATI_QUENYA_ND * SARATI_QUENYA_MB * SARATI_NG * SARATI_QUENYA_NGW
|
117
|
+
|
118
|
+
[{L2_KER}]{V_L_KER_WN} --> 2,1 --> {V_IMG_FOR_CONSONNANTS_WN}[{L2_IMG}]
|
119
|
+
|
120
|
+
ndy{V_L_KER_WN} --> {V_IMG_FOR_CONSONNANTS_WN} SARATI_QUENYA_NDY
|
121
|
+
|
122
|
+
\** ########### **\
|
123
|
+
\** THIRD LINE **\
|
124
|
+
|
125
|
+
{L3_KER} === th * f * h * hw
|
126
|
+
{L3_IMG} === SARATI_QUENYA_S * SARATI_QUENYA_F_ALT * SARATI_H * SARATI_QUENYA_HW
|
127
|
+
|
128
|
+
[{L3_KER}]{V_L_KER_WN} --> 2,1 --> {V_IMG_FOR_CONSONNANTS_WN}[{L3_IMG}]
|
129
|
+
|
130
|
+
hy{V_L_KER_WN} --> {V_IMG_FOR_CONSONNANTS_WN} SARATI_QUENYA_HY
|
131
|
+
|
132
|
+
\** # The two following are not treated the same way in tengwar **\
|
133
|
+
ht{V_L_KER_WN} --> {V_IMG_FOR_CONSONNANTS_WN} SARATI_QUENYA_HT_ALT_1
|
134
|
+
hty{V_L_KER_WN} --> {V_IMG_FOR_CONSONNANTS_WN} SARATI_QUENYA_HTY
|
135
|
+
|
136
|
+
\** ########### **\
|
137
|
+
\** FOURTH LINE **\
|
138
|
+
|
139
|
+
{LINE_4TH_KER} === nt * mp * nc * nq \** # Not nqu, due to preprocessor **\
|
140
|
+
{LINE_4TH_IMG} === SARATI_QUENYA_NT * SARATI_QUENYA_MP * SARATI_QUENYA_NC * SARATI_QUENYA_NQU
|
141
|
+
|
142
|
+
[{LINE_4TH_KER}]{V_L_KER_WN} --> 2,1 --> {V_IMG_FOR_CONSONNANTS_WN}[{LINE_4TH_IMG}]
|
143
|
+
nty{V_L_KER_WN} --> {V_IMG_FOR_CONSONNANTS_WN} SARATI_QUENYA_NTY
|
144
|
+
|
145
|
+
\** ########### **\
|
146
|
+
\** FIFTH LINE **\
|
147
|
+
|
148
|
+
{LINE_5TH_KER} === n * m * ñ * ñw * _nw * nn * mm
|
149
|
+
{LINE_5TH_IMG} === SARATI_N * SARATI_M * SARATI_QUENYA_VELAR_NASAL * SARATI_QUENYA_NW * SARATI_QUENYA_NW * SARATI_DASH_D SARATI_N * SARATI_DASH_D SARATI_M
|
150
|
+
|
151
|
+
[{LINE_5TH_KER}]{V_L_KER_WN} --> 2,1 --> {V_IMG_FOR_CONSONNANTS_WN}[{LINE_5TH_IMG}]
|
152
|
+
ny{V_L_KER_WN} --> {V_IMG_FOR_CONSONNANTS_WN} SARATI_QUENYA_NY
|
153
|
+
|
154
|
+
\** Missing my ? **\
|
155
|
+
|
156
|
+
\** ########### **\
|
157
|
+
\** SIXTH LINE **\
|
158
|
+
|
159
|
+
{LINE_6TH_KER} === r * v * y * w * rr
|
160
|
+
{LINE_6TH_IMG} === SARATI_R * SARATI_QUENYA_V_ALT * SARATI_QUENYA_Y * SARATI_W * SARATI_DASH_D SARATI_R
|
161
|
+
|
162
|
+
[{LINE_6TH_KER}]{V_L_KER_WN} --> 2,1 --> {V_IMG_FOR_CONSONNANTS_WN}[{LINE_6TH_IMG}]
|
163
|
+
|
164
|
+
\** Weak r is not distinguished **\
|
165
|
+
\** Missing ry? rd? **\
|
166
|
+
|
167
|
+
\** ########### **\
|
168
|
+
\** L Line **\
|
169
|
+
|
170
|
+
{LINE_L_KER} === l * ll * d
|
171
|
+
{LINE_L_IMG} === SARATI_L * SARATI_DASH_D SARATI_L * SARATI_D
|
172
|
+
|
173
|
+
[{LINE_L_KER}]{V_L_KER_WN} --> 2,1 --> {V_IMG_FOR_CONSONNANTS_WN}[{LINE_L_IMG}]
|
174
|
+
|
175
|
+
\** Missing ld, ly, hl, hr ? **\
|
176
|
+
|
177
|
+
\** ########### **\
|
178
|
+
\** S/Z line **\
|
179
|
+
|
180
|
+
\** st v **\
|
181
|
+
\** sty … **\
|
182
|
+
\** ss ¦ ou w ou i **\
|
183
|
+
|
184
|
+
\** For s, use the same sarat as for th **\
|
185
|
+
|
186
|
+
{LINE_8TH_KER} === s * {SS}
|
187
|
+
{LINE_8TH_IMG} === SARATI_QUENYA_S * SARATI_QUENYA_SS_ALT_1
|
188
|
+
|
189
|
+
[{LINE_8TH_KER}]{V_L_KER_WN} --> 2,1 --> {V_IMG_FOR_CONSONNANTS_WN}[{LINE_8TH_IMG}]
|
190
|
+
|
191
|
+
{LINE_8PTH_KER} === st * sty
|
192
|
+
{LINE_8PTH_IMG} === SARATI_QUENYA_ST * SARATI_QUENYA_STY
|
193
|
+
|
194
|
+
[{LINE_8PTH_KER}]{V_L_KER_WN} --> 2,1 --> {V_IMG_FOR_CONSONNANTS_WN}[{LINE_8PTH_IMG}]
|
195
|
+
|
196
|
+
\** Override lonely s / ss / before consonant: TODO!!!! **\
|
197
|
+
\** s -> 8 **\
|
198
|
+
\** {SS} -> , **\
|
199
|
+
|
200
|
+
\** ############ **\
|
201
|
+
\** OTHERS **\
|
202
|
+
|
203
|
+
x {V_L_KER_WN} --> {V_IMG_FOR_CONSONNANTS_WN} SARATI_QUENYA_X
|
204
|
+
\end
|
205
|
+
|
206
|
+
\beg rules punctuation
|
207
|
+
· --> {NULL}
|
208
|
+
, --> {NULL}
|
209
|
+
; --> {NULL}
|
210
|
+
: --> {NULL}
|
211
|
+
. --> SARATI_SPACE
|
212
|
+
- --> SARATI_SPACE
|
213
|
+
– --> SARATI_SPACE
|
214
|
+
! --> {NULL}
|
215
|
+
? --> {NULL}
|
216
|
+
' --> {NULL}
|
217
|
+
[ --> {NULL}
|
218
|
+
] --> {NULL}
|
219
|
+
‘ --> {NULL}
|
220
|
+
’ --> {NULL}
|
221
|
+
“ --> {NULL}
|
222
|
+
” --> {NULL}
|
223
|
+
\end
|
224
|
+
|
225
|
+
\end
|
226
|
+
|
227
|
+
\beg postprocessor
|
228
|
+
\reverse
|
229
|
+
\end
|
230
|
+
|
231
|
+
\** Ponctuation **\
|
232
|
+
|
233
|
+
|
234
|
+
|
235
|
+
\** ############### **\
|
236
|
+
\** Helpers, transcribed from amanye tenceli **\
|
237
|
+
|
238
|
+
\** ########### **\
|
239
|
+
\** # p Z **\
|
240
|
+
\** # t " **\
|
241
|
+
\** # c # **\
|
242
|
+
\** # q p **\
|
243
|
+
|
244
|
+
\** # ty ± **\
|
245
|
+
\** # ts g **\
|
246
|
+
|
247
|
+
\** ########### **\
|
248
|
+
\** # mb _ **\
|
249
|
+
\** # nd € **\
|
250
|
+
\** # ndy ³ **\
|
251
|
+
\** # ng & **\
|
252
|
+
\** # ngw s **\
|
253
|
+
|
254
|
+
\** ########### **\
|
255
|
+
|
256
|
+
\** # f \ ou [ ## Aside or below **\
|
257
|
+
\** # s (th) Ÿ **\
|
258
|
+
\** # hy ½ **\
|
259
|
+
\** # h Ë **\
|
260
|
+
\** # hw º **\
|
261
|
+
|
262
|
+
\** # ht ² ou ‚ **\
|
263
|
+
\** # hty Œ **\
|
264
|
+
|
265
|
+
\** ########### **\
|
266
|
+
|
267
|
+
\** # mp d **\
|
268
|
+
\** # nt ª **\
|
269
|
+
\** # nty „ **\
|
270
|
+
\** # nc — **\
|
271
|
+
\** # nq ˜ **\
|
272
|
+
|
273
|
+
\** ########### **\
|
274
|
+
|
275
|
+
\** # m P **\
|
276
|
+
\** # n À **\
|
277
|
+
\** # ny ‰ **\
|
278
|
+
\** # ñ + **\
|
279
|
+
\** # nw , **\
|
280
|
+
|
281
|
+
\** ########### **\
|
282
|
+
|
283
|
+
\** # v a ou ` ## Aside or below **\
|
284
|
+
\** # r F **\
|
285
|
+
\** # y » **\
|
286
|
+
\** # w ¹ **\
|
287
|
+
|
288
|
+
\** ########### **\
|
289
|
+
|
290
|
+
\** # l ? **\
|
291
|
+
|
292
|
+
\** ########### **\
|
293
|
+
|
294
|
+
\** # st v **\
|
295
|
+
\** # sty … **\
|
296
|
+
\** # ss ¦ ou w ou i **\
|
297
|
+
|
298
|
+
\** ################ **\
|
299
|
+
|
300
|
+
\** # x (ks) y **\
|
301
|
+
|
302
|
+
|
303
|
+
\** # GEMINATION -> ó **\
|
304
|
+
\** # PRECEDING S -> ý **\
|
305
|
+
\** # SHORT CARRIER -> È **\
|
306
|
+
\** # **\
|
307
|
+
\** # Vowels : **\
|
308
|
+
\** # Stop Vowel: Ó **\
|
309
|
+
\** # i Ò **\
|
310
|
+
\** # e è ou Ô **\
|
311
|
+
\** # a Ö **\
|
312
|
+
\** # o Ü **\
|
313
|
+
\** # u Þ **\
|
314
|
+
\** # **\
|
315
|
+
\** # Long Vowels: **\
|
316
|
+
\** # Carrier dash : ò **\
|
317
|
+
\** # Always use carrier dashes EXCEPT for a **\
|
318
|
+
|
319
|
+
|
320
|
+
|
@@ -0,0 +1,307 @@
|
|
1
|
+
\**
|
2
|
+
|
3
|
+
Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
the transcription of texts between writing systems, and more
|
5
|
+
specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
invented languages to some of his devised writing systems.
|
7
|
+
|
8
|
+
Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
|
10
|
+
This program is free software: you can redistribute it and/or modify
|
11
|
+
it under the terms of the GNU Affero General Public License as published by
|
12
|
+
the Free Software Foundation, either version 3 of the License, or
|
13
|
+
any later version.
|
14
|
+
|
15
|
+
This program is distributed in the hope that it will be useful,
|
16
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
GNU Affero General Public License for more details.
|
19
|
+
|
20
|
+
You should have received a copy of the GNU Affero General Public License
|
21
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
|
23
|
+
**\
|
24
|
+
|
25
|
+
\language "Quenya"
|
26
|
+
\writing "Tengwar"
|
27
|
+
\mode "Classical"
|
28
|
+
\version "0.0.1"
|
29
|
+
\authors "Talagan (Benjamin Babut)"
|
30
|
+
|
31
|
+
\charset tengwar_ds true
|
32
|
+
|
33
|
+
\beg options
|
34
|
+
\option split_diphthongs false
|
35
|
+
\option always_use_romen_for_r false
|
36
|
+
\option reverse_numbers true
|
37
|
+
\beg option numbers_base BASE_12
|
38
|
+
\value BASE_10 10
|
39
|
+
\value BASE_12 12
|
40
|
+
\end
|
41
|
+
\end
|
42
|
+
|
43
|
+
\beg preprocessor
|
44
|
+
\** Work exclusively downcase **\
|
45
|
+
\downcase
|
46
|
+
|
47
|
+
\** Simplify trema vowels **\
|
48
|
+
\substitute ä a
|
49
|
+
\substitute ë e
|
50
|
+
\substitute ï i
|
51
|
+
\substitute ö o
|
52
|
+
\substitute ü u
|
53
|
+
\substitute ÿ y
|
54
|
+
|
55
|
+
\** Dis-ambiguate long vowels **\
|
56
|
+
\rxsubstitute "(ā|â|aa)" "á"
|
57
|
+
\rxsubstitute "(ē|ê|ee)" "é"
|
58
|
+
\rxsubstitute "(ī|î|ii)" "í"
|
59
|
+
\rxsubstitute "(ō|ô|oo)" "ó"
|
60
|
+
\rxsubstitute "(ū|û|uu)" "ú"
|
61
|
+
\rxsubstitute "(ȳ|ŷ|yy)" "ý"
|
62
|
+
|
63
|
+
\substitute "qu" "q" \** Dis-ambiguate qu **\
|
64
|
+
|
65
|
+
\elvish_numbers "\\eval numbers_base" "\\eval reverse_numbers"
|
66
|
+
\end
|
67
|
+
|
68
|
+
\beg processor
|
69
|
+
|
70
|
+
\beg rules litteral
|
71
|
+
|
72
|
+
{K} === (c,k)
|
73
|
+
{MB} === (b,mb)
|
74
|
+
{SS} === (z,ss)
|
75
|
+
|
76
|
+
{VOWELS} === a * e * i * o * u
|
77
|
+
{LVOWELS} === á * é * í * ó * ú
|
78
|
+
|
79
|
+
{TEHTA_XS} === A_TEHTA_XS * E_TEHTA_XS * I_TEHTA_XS * O_TEHTA_XS * U_TEHTA_XS
|
80
|
+
{TEHTA__S} === A_TEHTA_S * E_TEHTA_S * I_TEHTA_S * O_TEHTA_S * U_TEHTA_S
|
81
|
+
{TEHTA__L} === A_TEHTA_L * E_TEHTA_L * I_TEHTA_L * O_TEHTA_L * U_TEHTA_L
|
82
|
+
{TEHTA_XL} === A_TEHTA_XL * E_TEHTA_XL * I_TEHTA_XL * O_TEHTA_XL * U_TEHTA_XL
|
83
|
+
|
84
|
+
\if split_diphthongs
|
85
|
+
{WDIPHTHONGS} === {NULL}
|
86
|
+
{WDIPHTHENGS} === {NULL}
|
87
|
+
\else
|
88
|
+
{DIPHTHONGS} === ai * au * eu * iu * oi * ui
|
89
|
+
{DIPHTHENGS} === YANTA A_TEHTA_L * URE A_TEHTA_L * URE E_TEHTA_L * URE I_TEHTA_L * YANTA O_TEHTA_L * YANTA U_TEHTA_L
|
90
|
+
{WDIPHTHONGS} === * {DIPHTHONGS} \** groovy! **\
|
91
|
+
{WDIPHTHENGS} === * {DIPHTHENGS} \** same thing **\
|
92
|
+
\endif
|
93
|
+
|
94
|
+
{V_D_KER} === [ {VOWELS} {WDIPHTHONGS} ]
|
95
|
+
{V_D_KER_WN} === [ {VOWELS} {WDIPHTHONGS} * {NULL} ]
|
96
|
+
|
97
|
+
{V_D_IMG_XS} === [ {TEHTA_XS} {WDIPHTHENGS} ]
|
98
|
+
{V_D_IMG__S} === [ {TEHTA__L} {WDIPHTHENGS} ]
|
99
|
+
{V_D_IMG__L} === [ {TEHTA__S} {WDIPHTHENGS} ]
|
100
|
+
{V_D_IMG_XL} === [ {TEHTA_XL} {WDIPHTHENGS} ]
|
101
|
+
{V_D_IMG_XS_WN} === [ {TEHTA_XS} {WDIPHTHENGS} * {NULL} ]
|
102
|
+
{V_D_IMG__S_WN} === [ {TEHTA__L} {WDIPHTHENGS} * {NULL} ]
|
103
|
+
{V_D_IMG__L_WN} === [ {TEHTA__S} {WDIPHTHENGS} * {NULL} ]
|
104
|
+
{V_D_IMG_XL_WN} === [ {TEHTA_XL} {WDIPHTHENGS} * {NULL} ]
|
105
|
+
|
106
|
+
\** VOWEL RULES **\
|
107
|
+
[{VOWELS}] --> TELCO [{TEHTA_XS}] \** Replace isolated short vowels **\
|
108
|
+
[{LVOWELS}] --> ARA [{TEHTA_XS}] \** Replace long vowels **\
|
109
|
+
|
110
|
+
\if !split_diphthongs
|
111
|
+
[{DIPHTHONGS}] --> [{DIPHTHENGS}] \** Replace diphthongs **\
|
112
|
+
\endif
|
113
|
+
|
114
|
+
\** ===================== **\
|
115
|
+
\** 1ST LINE RULES **\
|
116
|
+
\** ===================== **\
|
117
|
+
{L1_KER_1} === t * p
|
118
|
+
{L1_IMG_1} === TINCO * PARMA
|
119
|
+
{L1_KER_2} === {K} * q
|
120
|
+
{L1_IMG_2} === CALMA * QUESSE
|
121
|
+
{L1_KER_1_GEMS} === tt * pp
|
122
|
+
{L1_IMG_1_GEMS} === TINCO DASH_INF_S * PARMA DASH_INF_S
|
123
|
+
|
124
|
+
\** NORMAL **\
|
125
|
+
[ {L1_KER_1} ] {V_D_KER_WN} --> [ {L1_IMG_1} ] {V_D_IMG__S_WN}
|
126
|
+
[ {L1_KER_2} ] {V_D_KER_WN} --> [ {L1_IMG_2} ] {V_D_IMG__S_WN}
|
127
|
+
|
128
|
+
\** GEMINATED **\
|
129
|
+
[ {L1_KER_1_GEMS} ] {V_D_KER_WN} --> [ {L1_IMG_1_GEMS} ] {V_D_IMG__S_WN} \** Tengscribe uses S but L is probably better **\
|
130
|
+
{K}{K}{V_D_KER_WN} --> CALMA DASH_INF_S {V_D_IMG__S_WN}
|
131
|
+
|
132
|
+
\** OTHERS **\
|
133
|
+
ty{V_D_KER_WN} --> TINCO THINF_DDOT_L {V_D_IMG__S_WN}
|
134
|
+
py{V_D_KER_WN} --> PARMA THINF_DDOT_L {V_D_IMG__S_WN}
|
135
|
+
|
136
|
+
ts{V_D_KER_WN}_ --> TINCO SHOOK_RIGHT_L {V_D_IMG_XL_WN}
|
137
|
+
ps{V_D_KER_WN}_ --> PARMA SHOOK_RIGHT_L {V_D_IMG_XL_WN}
|
138
|
+
x{V_D_KER_WN} --> CALMA SHOOK_LEFT_L {V_D_IMG_S_WN} \** render ks for x **\
|
139
|
+
|
140
|
+
\** ===================== **\
|
141
|
+
\** 2ND LINE RULES **\
|
142
|
+
\** ===================== **\
|
143
|
+
{L2_KER} === nd * {MB} * ng * ngw
|
144
|
+
{L2_IMG} === ANDO * UMBAR * ANGA * UNGWE
|
145
|
+
|
146
|
+
\** STANDARD **\
|
147
|
+
[{L2_KER}]{V_D_KER_WN} --> [{L2_IMG}]{V_D_IMG_XL_WN}
|
148
|
+
\** OTHERS **\
|
149
|
+
ndy{V_D_KER_WN} --> ANDO THINF_DDOT_XL {V_D_IMG_XL_WN}
|
150
|
+
|
151
|
+
\** ===================== **\
|
152
|
+
\** 3RD LINE RULES **\
|
153
|
+
\** ===================== **\
|
154
|
+
{L3_KER_1} === (th,þ) * f
|
155
|
+
{L3_IMG_1} === SULE * FORMEN
|
156
|
+
{L3_KER_2} === h * hw
|
157
|
+
{L3_IMG_2} === AHA * HWESTA
|
158
|
+
|
159
|
+
\** NORMAL **\
|
160
|
+
[{L3_KER_1}]{V_D_KER_WN} --> [{L3_IMG_1}]{V_D_IMG__S_WN}
|
161
|
+
[{L3_KER_2}]{V_D_KER_WN} --> [{L3_IMG_2}]{V_D_IMG__S_WN} \** Tengscribe uses S but L is probably better **\
|
162
|
+
|
163
|
+
\** OTHERS **\
|
164
|
+
hy{V_D_KER_WN} --> HYARMEN THINF_DDOT_L {V_D_IMG__L_WN}
|
165
|
+
|
166
|
+
\** Override h with vowels (descendent of hy) **\
|
167
|
+
_h{V_D_KER} --> HYARMEN {V_D_IMG__L}
|
168
|
+
h[{LVOWELS}] --> HYARMEN ARA [{TEHTA_XS}]
|
169
|
+
h --> AHA
|
170
|
+
|
171
|
+
\** ===================== **\
|
172
|
+
\** 4TH LINE RULES **\
|
173
|
+
\** ===================== **\
|
174
|
+
{L4_KER} === nt * mp * nc * nq \** Not nqu, due to preprocessor **\
|
175
|
+
{L4_IMG} === ANTO * AMPA * ANCA * UNQUE
|
176
|
+
|
177
|
+
\** NORMAL **\
|
178
|
+
[{L4_KER}]{V_D_KER_WN} --> [{L4_IMG}]{V_D_IMG_XL_WN}
|
179
|
+
\** OTHERS **\
|
180
|
+
nty{V_D_KER_WN} --> ANTO THINF_DDOT_XL {V_D_IMG_XL_WN}
|
181
|
+
|
182
|
+
\** ===================== **\
|
183
|
+
\** 5TH LINE RULES **\
|
184
|
+
\** ===================== **\
|
185
|
+
{L5_KER} === n * m * ñ * ñw * _nw
|
186
|
+
{L5_IMG} === NUMEN * MALTA * NOLDO * NWALME * NWALME
|
187
|
+
|
188
|
+
[{L5_KER}]{V_D_KER_WN} --> [{L5_IMG}]{V_D_IMG_XL_WN}
|
189
|
+
|
190
|
+
ny{V_D_KER_WN} --> NUMEN THINF_DDOT_XL {V_D_IMG_XL_WN}
|
191
|
+
nn{V_D_KER_WN} --> NUMEN DASH_INF_L {V_D_IMG_XL_WN}
|
192
|
+
my{V_D_KER_WN} --> MALTA THINF_DDOT_XL {V_D_IMG_XL_WN}
|
193
|
+
mm{V_D_KER_WN} --> MALTA DASH_INF_L {V_D_IMG_XL_WN}
|
194
|
+
|
195
|
+
\** ===================== **\
|
196
|
+
\** 6TH LINE RULES **\
|
197
|
+
\** ===================== **\
|
198
|
+
{L6_KER} === r * v * y * w
|
199
|
+
{L6_IMG} === ROMEN * VALA * ANNA THINF_DDOT_L * VILYA
|
200
|
+
|
201
|
+
[{L6_KER}]{V_D_KER_WN} --> [{L6_IMG}]{V_D_IMG__S_WN}
|
202
|
+
|
203
|
+
\** r before long vowels is voiced **\
|
204
|
+
r[{LVOWELS}] --> ROMEN ARA [{TEHTA_XS}]
|
205
|
+
|
206
|
+
\if always_use_romen_for_r
|
207
|
+
r --> ROMEN
|
208
|
+
\else
|
209
|
+
r --> ORE \** lonely r is not voiced, so override rule **\
|
210
|
+
\endif
|
211
|
+
|
212
|
+
rr{V_D_KER_WN} --> ROMEN DASH_INF_S {V_D_IMG__S_WN}
|
213
|
+
ry{V_D_KER_WN} --> ROMEN THINF_DDOT_L {V_D_IMG__S_WN}
|
214
|
+
rd{V_D_KER_WN} --> ARDA {V_D_IMG__S_WN}
|
215
|
+
|
216
|
+
\** ===================== **\
|
217
|
+
\** L LINE RULES **\
|
218
|
+
\** ===================== **\
|
219
|
+
{LINE_L_KER} === l * ld * ll
|
220
|
+
{LINE_L_IMG} === LAMBE * ALDA * LAMBE LAMBE_MARK_TILD
|
221
|
+
|
222
|
+
[{LINE_L_KER}]{V_D_KER_WN} --> [{LINE_L_IMG}]{V_D_IMG__S_WN}
|
223
|
+
ly{V_D_KER_WN} --> LAMBE LAMBE_MARK_DDOT {V_D_IMG__S_WN}
|
224
|
+
hl{V_D_KER_WN} --> HALLA LAMBE {V_D_IMG__S_WN}
|
225
|
+
hr{V_D_KER_WN} --> HALLA ROMEN {V_D_IMG__S_WN}
|
226
|
+
|
227
|
+
\** ===================== **\
|
228
|
+
\** S/Z LINE RULES **\
|
229
|
+
\** ===================== **\
|
230
|
+
{L8_KER} === s * {SS}
|
231
|
+
{L8_IMG} === SILME_NUQUERNA * ESSE_NUQUERNA
|
232
|
+
|
233
|
+
[{L8_KER}]{V_D_KER_WN} --> [{L8_IMG}]{V_D_IMG__S_WN}
|
234
|
+
|
235
|
+
\** Override lonely s / ss / before consonant **\
|
236
|
+
s --> SILME
|
237
|
+
s[{LVOWELS}] --> SILME ARA [{TEHTA_XS}]
|
238
|
+
{SS} --> ESSE
|
239
|
+
{SS}[{LVOWELS}] --> ESSE ARA [{TEHTA_XS}]
|
240
|
+
|
241
|
+
\end
|
242
|
+
|
243
|
+
\beg rules punctuation
|
244
|
+
. --> PUNCT_DDOT
|
245
|
+
.. --> PUNCT_DOT PUNCT_DDOT PUNCT_DOT
|
246
|
+
… --> PUNCT_TILD
|
247
|
+
... --> PUNCT_TILD
|
248
|
+
.... --> PUNCT_TILD
|
249
|
+
..... --> PUNCT_TILD
|
250
|
+
...... --> PUNCT_TILD
|
251
|
+
....... --> PUNCT_TILD
|
252
|
+
|
253
|
+
, --> PUNCT_DOT
|
254
|
+
: --> PUNCT_DOT
|
255
|
+
; --> PUNCT_DOT
|
256
|
+
! --> PUNCT_EXCLAM
|
257
|
+
? --> PUNCT_INTERR
|
258
|
+
· --> PUNCT_DOT
|
259
|
+
|
260
|
+
\** Apostrophe **\
|
261
|
+
|
262
|
+
' --> {NULL}
|
263
|
+
’ --> {NULL}
|
264
|
+
|
265
|
+
\** Quotes **\
|
266
|
+
|
267
|
+
“ --> DQUOT_OPEN
|
268
|
+
” --> DQUOT_CLOSE
|
269
|
+
« --> DQUOT_OPEN
|
270
|
+
» --> DQUOT_CLOSE
|
271
|
+
|
272
|
+
- --> {NULL}
|
273
|
+
– --> PUNCT_TILD
|
274
|
+
— --> PUNCT_TILD
|
275
|
+
|
276
|
+
[ --> PUNCT_PAREN_L
|
277
|
+
] --> PUNCT_PAREN_R
|
278
|
+
( --> PUNCT_PAREN_L
|
279
|
+
) --> PUNCT_PAREN_R
|
280
|
+
{ --> PUNCT_PAREN_L
|
281
|
+
} --> PUNCT_PAREN_R
|
282
|
+
< --> PUNCT_PAREN_L
|
283
|
+
> --> PUNCT_PAREN_R
|
284
|
+
|
285
|
+
\** Not universal between fonts ... **\
|
286
|
+
$ --> BOOKMARK_SIGN
|
287
|
+
≤ --> RING_MARK_L \** Ring inscription left beautiful stuff **\
|
288
|
+
≥ --> RING_MARK_R \** Ring inscription right beautiful stuff **\
|
289
|
+
|
290
|
+
\end
|
291
|
+
|
292
|
+
\beg rules numbers
|
293
|
+
0 --> NUM_0
|
294
|
+
1 --> NUM_1
|
295
|
+
2 --> NUM_2
|
296
|
+
3 --> NUM_3
|
297
|
+
4 --> NUM_4
|
298
|
+
5 --> NUM_5
|
299
|
+
6 --> NUM_6
|
300
|
+
7 --> NUM_7
|
301
|
+
8 --> NUM_8
|
302
|
+
9 --> NUM_9
|
303
|
+
A --> NUM_10
|
304
|
+
B --> NUM_11
|
305
|
+
\end
|
306
|
+
|
307
|
+
\end
|