glaemscribe 1.1.14 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/bin/glaemscribe +19 -15
- data/glaemresources/charsets/cirth_ds.cst +205 -0
- data/glaemresources/charsets/sarati_eldamar.cst +256 -0
- data/glaemresources/charsets/tengwar_ds_annatar.cst +546 -0
- data/glaemresources/charsets/tengwar_ds_eldamar.cst +535 -0
- data/glaemresources/charsets/tengwar_ds_elfica.cst +551 -0
- data/glaemresources/charsets/tengwar_ds_parmaite.cst +534 -0
- data/glaemresources/charsets/tengwar_ds_sindarin.cst +531 -0
- data/glaemresources/charsets/tengwar_freemono.cst +217 -0
- data/glaemresources/charsets/tengwar_guni_annatar.cst +628 -0
- data/glaemresources/charsets/tengwar_guni_eldamar.cst +618 -0
- data/glaemresources/charsets/tengwar_guni_elfica.cst +620 -0
- data/glaemresources/charsets/tengwar_guni_parmaite.cst +621 -0
- data/glaemresources/charsets/tengwar_guni_sindarin.cst +617 -0
- data/glaemresources/charsets/tengwar_telcontar.cst +218 -0
- data/glaemresources/charsets/unicode_gothic.cst +64 -0
- data/glaemresources/charsets/unicode_runes.cst +121 -0
- data/glaemresources/modes/{adunaic.glaem → adunaic-tengwar-glaemscrafu.glaem} +14 -2
- data/glaemresources/modes/{blackspeech.glaem → blackspeech-tengwar-general_use.glaem} +12 -2
- data/glaemresources/modes/japanese-tengwar.glaem +771 -0
- data/glaemresources/modes/{khuzdul.glaem → khuzdul-cirth-moria.glaem} +4 -1
- data/glaemresources/modes/{futhorc.glaem → old_english-futhorc.glaem} +0 -0
- data/glaemresources/modes/{mercian.glaem → old_english-tengwar-mercian.glaem} +22 -12
- data/glaemresources/modes/{westsaxon.glaem → old_english-tengwar-westsaxon.glaem} +20 -11
- data/glaemresources/modes/{futhark-runicus.glaem → old_norse-futhark-runicus.glaem} +0 -0
- data/glaemresources/modes/{futhark-younger.glaem → old_norse-futhark-younger.glaem} +0 -0
- data/glaemresources/modes/{quenya.glaem → quenya-tengwar-classical.glaem} +32 -50
- data/glaemresources/modes/raw-tengwar.glaem +46 -23
- data/glaemresources/modes/{rlyehian.glaem → rlyehian-tengwar.glaem} +14 -3
- data/glaemresources/modes/{sindarin-daeron.glaem → sindarin-cirth-daeron.glaem} +55 -14
- data/glaemresources/modes/{sindarin-beleriand.glaem → sindarin-tengwar-beleriand.glaem} +154 -28
- data/glaemresources/modes/{sindarin.glaem → sindarin-tengwar-general_use.glaem} +86 -25
- data/glaemresources/modes/{telerin.glaem → telerin-tengwar-glaemscrafu.glaem} +16 -6
- data/glaemresources/modes/{westron.glaem → westron-tengwar-glaemscrafu.glaem} +18 -8
- data/lib/api/charset.rb +67 -7
- data/lib/api/charset_parser.rb +7 -0
- data/lib/api/constants.rb +3 -4
- data/lib/api/fragment.rb +26 -5
- data/lib/api/if_tree.rb +70 -8
- data/lib/api/macro.rb +40 -0
- data/lib/api/mode.rb +35 -13
- data/lib/api/mode_parser.rb +106 -12
- data/lib/api/object_additions.rb +23 -1
- data/lib/api/option.rb +17 -2
- data/lib/api/post_processor/resolve_virtuals.rb +25 -9
- data/lib/api/resource_manager.rb +1 -0
- data/lib/api/rule_group.rb +170 -26
- data/lib/api/sheaf_chain_iterator.rb +1 -1
- data/lib/api/transcription_processor.rb +3 -3
- data/lib/api/tts.rb +51 -0
- data/lib/glaemscribe.rb +34 -31
- data/lib_espeak/espeakng.for.glaemscribe.nowasm.sync.js +21 -0
- data/lib_espeak/glaemscribe_tts.js +365 -0
- metadata +67 -21
@@ -35,12 +35,16 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
35
35
|
\entry "0.1.0" "Added support for the Tengwar Elfica font"
|
36
36
|
\entry "0.1.1" "Added support for inlined raw tengwar"
|
37
37
|
\entry "0.1.2" "Added support for non-breaking spaces"
|
38
|
+
\entry "0.1.3" "Added support for new unicode charsets"
|
39
|
+
\entry "0.1.4" "Added support for the Tengwar Telcontar font"
|
40
|
+
\entry "0.1.5" "Added a few labial exotic combinations. Reworked median point behaviour, and ng."
|
41
|
+
\entry "0.1.6" "Added gasdil handling."
|
38
42
|
\end
|
39
43
|
|
40
44
|
\language "Sindarin"
|
41
45
|
\writing "Tengwar"
|
42
46
|
\mode "Sindarin Tengwar - General Use"
|
43
|
-
\version "0.1.
|
47
|
+
\version "0.1.6"
|
44
48
|
\authors "J.R.R. Tolkien, impl. Talagan (Benjamin Babut)"
|
45
49
|
|
46
50
|
\world arda
|
@@ -51,7 +55,15 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
51
55
|
\charset tengwar_ds_eldamar false
|
52
56
|
\charset tengwar_ds_annatar false
|
53
57
|
\charset tengwar_ds_elfica false
|
58
|
+
|
59
|
+
\charset tengwar_guni_sindarin false
|
60
|
+
\charset tengwar_guni_parmaite false
|
61
|
+
\charset tengwar_guni_eldamar false
|
62
|
+
\charset tengwar_guni_annatar false
|
63
|
+
\charset tengwar_guni_elfica false
|
64
|
+
|
54
65
|
\charset tengwar_freemono false
|
66
|
+
\charset tengwar_telcontar false
|
55
67
|
|
56
68
|
\raw_mode "raw-tengwar"
|
57
69
|
|
@@ -62,6 +74,16 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
62
74
|
\value U_UP_O_DOWN 2
|
63
75
|
\end
|
64
76
|
|
77
|
+
\beg option apostrophe APOSTROPHE_IGNORED
|
78
|
+
\value APOSTROPHE_IGNORED 0
|
79
|
+
\value APOSTROPHE_GASDIL 1
|
80
|
+
\end
|
81
|
+
|
82
|
+
\beg option hyphen HYPHEN_WORD_BREAKER
|
83
|
+
\value HYPHEN_WORD_BREAKER 0
|
84
|
+
\value HYPHEN_WORD_JOINER 1
|
85
|
+
\end
|
86
|
+
|
65
87
|
\beg option consonant_modification_style CONSONANT_MODIFICATION_STYLE_BAR
|
66
88
|
\value CONSONANT_MODIFICATION_STYLE_WAVE 0
|
67
89
|
\value CONSONANT_MODIFICATION_STYLE_BAR 1
|
@@ -85,6 +107,17 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
85
107
|
\** Work exclusively downcase **\
|
86
108
|
\downcase
|
87
109
|
|
110
|
+
\if "hyphen == HYPHEN_WORD_JOINER"
|
111
|
+
\** Replace hyphen by median point **\
|
112
|
+
\substitute "-" "·"
|
113
|
+
\else
|
114
|
+
\** Replace hyphen by glaemscribe's word breaker **\
|
115
|
+
\substitute "-" "|"
|
116
|
+
\endif
|
117
|
+
|
118
|
+
\** Add keyboard friendly word joiner **\
|
119
|
+
\substitute "*" "·"
|
120
|
+
|
88
121
|
\** Simplify trema vowels **\
|
89
122
|
\substitute ä a
|
90
123
|
\substitute ë e
|
@@ -107,6 +140,17 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
107
140
|
\** Special case of starting 'i' before vowels, replace i by j **\
|
108
141
|
\rxsubstitute "\\bi([aeouyáāâéēêíīîóōôúūûýȳŷ])" "j\\1"
|
109
142
|
|
143
|
+
\** Special case for ng : before the vast majority of consonnants, treat as ŋ **\
|
144
|
+
\** Don't include r / l / lh / w **\
|
145
|
+
\rxsubstitute "ng([tpckbdfðvnmhs])" "ŋ\\1"
|
146
|
+
|
147
|
+
\** Avoid mutated ng of being treated as strong middle word n|g (ex : i·ngelaidh [iŋɛlaið] ) **\
|
148
|
+
\substitute "·ng" "·ŋ"
|
149
|
+
\** But avoid losing the strong g in nasal mutation of g (ex : in·Gelydh [iŋgɛlyð]] ) **\
|
150
|
+
\substitute "n·g" "·ŋg"
|
151
|
+
\** Use median dot as word joiner **\
|
152
|
+
\substitute "·" ""
|
153
|
+
|
110
154
|
\** Preprocess numbers **\
|
111
155
|
\elvish_numbers "\\eval numbers_base" "\\eval reverse_numbers"
|
112
156
|
\end
|
@@ -189,8 +233,12 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
189
233
|
{V_D_WN}n{K} --> CALMA {NASAL} {_V_D_WN_}
|
190
234
|
|
191
235
|
\** 2ND LINE **\
|
192
|
-
|
193
|
-
|
236
|
+
\**
|
237
|
+
/ŋg/ : this is ng in middle of words + might be found at word start.
|
238
|
+
See also final/initial ng_ / _ng below
|
239
|
+
**\
|
240
|
+
{L2} === d * b * g * (ng,ngg,ŋg,ñg)
|
241
|
+
{_L2_} === ANDO * UMBAR * UNGWE * UNGWE {NASAL}
|
194
242
|
|
195
243
|
{V_D_WN}[{L2}] --> 2,1 --> [{_L2_}]{_V_D_WN_}
|
196
244
|
|
@@ -210,13 +258,13 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
210
258
|
|
211
259
|
\** 4TH LINE **\
|
212
260
|
{L4} === (đ,ð,ðh,dh) * (v,bh,f_) \** Some noldorin variants here ... **\
|
213
|
-
{_L4_}
|
261
|
+
{_L4_} === ANTO * AMPA
|
214
262
|
|
215
263
|
{V_D_WN}[{L4}] --> 2,1 --> [{_L4_}]{_V_D_WN_}
|
216
264
|
|
217
265
|
\** 5TH LINE **\
|
218
|
-
{L5} === n
|
219
|
-
{_L5_} === NUMEN *
|
266
|
+
{L5} === n * m * (_ng,ng_,ŋ,ñ) * _mh \** weak ng at initial and final **\
|
267
|
+
{_L5_} === NUMEN * MALTA * NWALME * MALTA_W_HOOK
|
220
268
|
|
221
269
|
{V_D_WN}[{L5}] --> 2,1 --> [{_L5_}]{_V_D_WN_}
|
222
270
|
|
@@ -226,7 +274,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
226
274
|
\** 6TH LINE **\
|
227
275
|
|
228
276
|
\** 7TH LINE **\
|
229
|
-
{L7} === r_ * r * l * ll
|
277
|
+
{L7} === r_ * r * l * ll * w
|
230
278
|
{_L7_} === ORE * ROMEN * LAMBE * LAMBE {GEMINATE} * VALA
|
231
279
|
|
232
280
|
{V_D_WN}[{L7}] --> 2,1 --> [{_L7_}]{_V_D_WN_}
|
@@ -252,29 +300,47 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
252
300
|
|
253
301
|
\**
|
254
302
|
Ok here come the labialized consonants which are really tricky
|
255
|
-
The fonts generally do not handle well the
|
303
|
+
The fonts generally do not handle well the wa-tehta curl + tehtar, this should be one more argument for
|
256
304
|
adopting open type anchors with which we can stack diacritics (see the sarati modes).
|
257
305
|
For here, we cheat. Either we don't have any tehta on the tengwa, and it's easy.
|
258
306
|
Or, we put the two signs in their small versions, side by side.
|
259
307
|
We give an option not to use that trick, if the option is not set, we simply do not use
|
260
|
-
the
|
308
|
+
the wa-tehta curl at all when there's a tehta on the tengwa.
|
261
309
|
**\
|
262
310
|
|
263
311
|
\if "labialized_consonants_u_curl == LABIALIZED_U_CURL_NO_TEHTAR || labialized_consonants_u_curl == LABIALIZED_U_CURL_ALWAYS"
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
312
|
+
bw --> UMBAR WA_TEHTA
|
313
|
+
dw --> ANDO WA_TEHTA
|
314
|
+
gw --> UNGWE WA_TEHTA
|
315
|
+
lw --> LAMBE WA_TEHTA
|
316
|
+
nw --> NUMEN WA_TEHTA
|
317
|
+
rw --> ROMEN WA_TEHTA
|
318
|
+
(ng,ngg,ŋg,ñg)w --> UNGWE {NASAL} WA_TEHTA
|
319
|
+
(_ng,ng_,ŋ,ñ)w --> NWALME WA_TEHTA
|
269
320
|
\endif
|
270
321
|
|
271
322
|
\if "labialized_consonants_u_curl == LABIALIZED_U_CURL_ALWAYS"
|
272
|
-
{V_D}
|
273
|
-
{V_D}
|
274
|
-
{V_D}
|
275
|
-
{V_D}
|
276
|
-
{V_D}
|
323
|
+
{V_D}bw --> UMBAR WA_TEHTA {_V_D_}
|
324
|
+
{V_D}dw --> ANDO WA_TEHTA {_V_D_}
|
325
|
+
{V_D}gw --> UNGWE WA_TEHTA {_V_D_}
|
326
|
+
{V_D}lw --> LAMBE WA_TEHTA {_V_D_}
|
327
|
+
{V_D}nw --> NUMEN WA_TEHTA {_V_D_}
|
328
|
+
{V_D}rw --> ROMEN WA_TEHTA {_V_D_}
|
329
|
+
{V_D}(ng,ngg,ŋg,ñg)w --> UNGWE {NASAL} WA_TEHTA {_V_D_}
|
330
|
+
{V_D}(_ng,ng_,ŋ,ñ)w --> NWALME WA_TEHTA {_V_D_}
|
277
331
|
\endif
|
332
|
+
|
333
|
+
\if "apostrophe == APOSTROPHE_IGNORED"
|
334
|
+
' --> {NULL}
|
335
|
+
’ --> {NULL}
|
336
|
+
\else
|
337
|
+
\** use gasdil **\
|
338
|
+
' --> HALLA
|
339
|
+
’ --> HALLA
|
340
|
+
\endif
|
341
|
+
|
342
|
+
\** Forced gasdil **\
|
343
|
+
° --> HALLA
|
278
344
|
\end
|
279
345
|
|
280
346
|
\beg rules punctuation
|
@@ -297,11 +363,6 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
297
363
|
- --> {NULL}
|
298
364
|
– --> PUNCT_TILD
|
299
365
|
— --> PUNCT_TILD
|
300
|
-
|
301
|
-
\** Apostrophe **\
|
302
|
-
|
303
|
-
' --> {NULL}
|
304
|
-
’ --> {NULL}
|
305
366
|
|
306
367
|
\** NBSP **\
|
307
368
|
{NBSP} --> NBSP
|
@@ -323,7 +384,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
323
384
|
> --> PUNCT_PAREN_R
|
324
385
|
|
325
386
|
\** Not universal between fonts ... **\
|
326
|
-
$ -->
|
387
|
+
$ --> ELVISH_PAREN
|
327
388
|
≤ --> RING_MARK_L \** Ring inscription left beautiful stuff **\
|
328
389
|
≥ --> RING_MARK_R \** Ring inscription right beautiful stuff **\
|
329
390
|
\end
|
@@ -34,12 +34,14 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
34
34
|
\entry "0.1.1" "Added support for inlined raw tengwar"
|
35
35
|
\entry "0.1.2" "Added support for non-breaking spaces"
|
36
36
|
\entry "0.1.3" "Correcting visibility options to conform to new glaeml args strict syntax"
|
37
|
+
\entry "0.1.4" "Added support for new unicode charsets"
|
38
|
+
\entry "0.1.5" "Added support for the Tengwar Telcontar font"
|
37
39
|
\end
|
38
40
|
|
39
41
|
\language "Telerin"
|
40
42
|
\writing "Tengwar"
|
41
43
|
\mode "Telerin Tengwar - G*"
|
42
|
-
\version "0.1.
|
44
|
+
\version "0.1.5"
|
43
45
|
\authors "Talagan (Benjamin Babut), based on J.R.R Tolkien"
|
44
46
|
|
45
47
|
\world arda
|
@@ -50,7 +52,15 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
50
52
|
\charset tengwar_ds_eldamar false
|
51
53
|
\charset tengwar_ds_annatar false
|
52
54
|
\charset tengwar_ds_elfica false
|
55
|
+
|
56
|
+
\charset tengwar_guni_sindarin false
|
57
|
+
\charset tengwar_guni_parmaite false
|
58
|
+
\charset tengwar_guni_eldamar false
|
59
|
+
\charset tengwar_guni_annatar false
|
60
|
+
\charset tengwar_guni_elfica false
|
61
|
+
|
53
62
|
\charset tengwar_freemono false
|
63
|
+
\charset tengwar_telcontar false
|
54
64
|
|
55
65
|
\raw_mode "raw-tengwar"
|
56
66
|
|
@@ -237,10 +247,10 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
237
247
|
[ {L1} ] {V_D_WN} --> [ {_L1_} ] {_V_D_WN_}
|
238
248
|
[ {L1_GEMS} ] {V_D_WN} --> [ {_L1_GEMS_} ] {_V_D_WN_}
|
239
249
|
|
240
|
-
ts{V_D_WN} --> TINCO
|
241
|
-
ps{V_D_WN} --> PARMA
|
242
|
-
{K}s{V_D_WN} --> CALMA
|
243
|
-
x{V_D_WN} --> CALMA
|
250
|
+
ts{V_D_WN} --> TINCO SARINCE {_V_D_WN_}
|
251
|
+
ps{V_D_WN} --> PARMA SARINCE {_V_D_WN_}
|
252
|
+
{K}s{V_D_WN} --> CALMA SARINCE {_V_D_WN_}
|
253
|
+
x{V_D_WN} --> CALMA SARINCE {_V_D_WN_} \** render ks for x **\
|
244
254
|
|
245
255
|
\** ===================== **\
|
246
256
|
\** 2ND LINE RULES **\
|
@@ -369,7 +379,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
369
379
|
> --> PUNCT_PAREN_R
|
370
380
|
|
371
381
|
\** Not universal between fonts ... **\
|
372
|
-
$ -->
|
382
|
+
$ --> ELVISH_PAREN
|
373
383
|
≤ --> RING_MARK_L \** Ring inscription left beautiful stuff **\
|
374
384
|
≥ --> RING_MARK_R \** Ring inscription right beautiful stuff **\
|
375
385
|
|
@@ -30,13 +30,15 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
30
30
|
\entry "0.1.0" "Added support for the Tengwar Elfica font"
|
31
31
|
\entry "0.1.1" "Added support for inlined raw tengwar"
|
32
32
|
\entry "0.1.2" "Added support for non-breaking spaces"
|
33
|
+
\entry "0.1.3" "Added support for new unicode charsets"
|
34
|
+
\entry "0.1.4" "Added support for the Tengwar Telcontar font"
|
33
35
|
\end
|
34
36
|
|
35
37
|
\** Westron mode for glaemscribe (MAY BE INCOMPLETE) **\
|
36
38
|
\language Westron
|
37
39
|
\writing Tengwar
|
38
40
|
\mode "Westron Tengwar - G*"
|
39
|
-
\version "0.1.
|
41
|
+
\version "0.1.4"
|
40
42
|
\authors "Talagan (Benjamin Babut), based on J.R.R. Tolkien"
|
41
43
|
|
42
44
|
\world arda
|
@@ -49,7 +51,15 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
49
51
|
\charset tengwar_ds_eldamar false
|
50
52
|
\charset tengwar_ds_annatar false
|
51
53
|
\charset tengwar_ds_elfica false
|
54
|
+
|
55
|
+
\charset tengwar_guni_sindarin false
|
56
|
+
\charset tengwar_guni_parmaite false
|
57
|
+
\charset tengwar_guni_eldamar false
|
58
|
+
\charset tengwar_guni_annatar false
|
59
|
+
\charset tengwar_guni_elfica false
|
60
|
+
|
52
61
|
\charset tengwar_freemono false
|
62
|
+
\charset tengwar_telcontar false
|
53
63
|
|
54
64
|
\beg options
|
55
65
|
|
@@ -293,13 +303,13 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
293
303
|
nz{V_D} --> ESSE_NUQUERNA {NASAL} {_V_D_}
|
294
304
|
nz --> ESSE_NUQUERNA {NASAL}
|
295
305
|
|
296
|
-
ts --> TINCO
|
297
|
-
ps --> PARMA
|
298
|
-
(ks,cs,x) --> QUESSE
|
306
|
+
ts --> TINCO SARINCE
|
307
|
+
ps --> PARMA SARINCE
|
308
|
+
(ks,cs,x) --> QUESSE SARINCE
|
299
309
|
|
300
|
-
ts{V_D} --> TINCO
|
301
|
-
ps{V_D} --> PARMA
|
302
|
-
(ks,cs,x){V_D} --> QUESSE
|
310
|
+
ts{V_D} --> TINCO SARINCE {_V_D_}
|
311
|
+
ps{V_D} --> PARMA SARINCE {_V_D_}
|
312
|
+
(ks,cs,x){V_D} --> QUESSE SARINCE {_V_D_}
|
303
313
|
|
304
314
|
h{V_D} --> HYARMEN {_V_D_}
|
305
315
|
h --> HYARMEN
|
@@ -366,7 +376,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
366
376
|
> --> PUNCT_PAREN_R
|
367
377
|
|
368
378
|
\** Not universal between fonts ... **\
|
369
|
-
$ -->
|
379
|
+
$ --> ELVISH_PAREN
|
370
380
|
≤ --> RING_MARK_L \** Ring inscription left beautiful stuff **\
|
371
381
|
≥ --> RING_MARK_R \** Ring inscription right beautiful stuff **\
|
372
382
|
\end
|
data/lib/api/charset.rb
CHANGED
@@ -30,11 +30,11 @@ module Glaemscribe
|
|
30
30
|
attr_reader :virtual_chars
|
31
31
|
|
32
32
|
class Char
|
33
|
-
attr_accessor :line
|
34
|
-
attr_accessor :code
|
35
|
-
attr_accessor :names
|
36
|
-
attr_accessor :str
|
37
|
-
attr_accessor :charset
|
33
|
+
attr_accessor :line # Line num in the sourcecode
|
34
|
+
attr_accessor :code # Position in unicode
|
35
|
+
attr_accessor :names # Names
|
36
|
+
attr_accessor :str # How does this char resolve as a string
|
37
|
+
attr_accessor :charset # Pointer to parent charset
|
38
38
|
|
39
39
|
def initialize
|
40
40
|
@names = {}
|
@@ -43,9 +43,13 @@ module Glaemscribe
|
|
43
43
|
def virtual?
|
44
44
|
false
|
45
45
|
end
|
46
|
+
|
47
|
+
def sequence?
|
48
|
+
false
|
49
|
+
end
|
46
50
|
end
|
47
51
|
|
48
|
-
class VirtualChar
|
52
|
+
class VirtualChar # Could have had inheritance here ...
|
49
53
|
attr_accessor :line
|
50
54
|
attr_accessor :names
|
51
55
|
attr_accessor :classes
|
@@ -121,6 +125,45 @@ module Glaemscribe
|
|
121
125
|
def virtual?
|
122
126
|
true
|
123
127
|
end
|
128
|
+
|
129
|
+
def sequence?
|
130
|
+
false
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
class SequenceChar
|
135
|
+
attr_accessor :line # Line of code
|
136
|
+
attr_accessor :names # Names
|
137
|
+
attr_accessor :sequence # The sequence of chars
|
138
|
+
attr_accessor :charset # Pointer to parent charset
|
139
|
+
|
140
|
+
def virtual?
|
141
|
+
false
|
142
|
+
end
|
143
|
+
|
144
|
+
def sequence?
|
145
|
+
true
|
146
|
+
end
|
147
|
+
|
148
|
+
def str
|
149
|
+
# A sequence char should never arrive unreplaced
|
150
|
+
VIRTUAL_CHAR_OUTPUT
|
151
|
+
end
|
152
|
+
|
153
|
+
def finalize
|
154
|
+
if @sequence.count == 0
|
155
|
+
@charset.errors << Glaeml::Error.new(@line, "Sequence for sequence char is empty.")
|
156
|
+
end
|
157
|
+
|
158
|
+
@sequence.each{ |symbol|
|
159
|
+
# Check that the sequence is correct
|
160
|
+
found = @charset[symbol]
|
161
|
+
if !found
|
162
|
+
@charset.errors << Glaeml::Error.new(@line, "Sequence char #{symbol} cannot be found in the charset.")
|
163
|
+
end
|
164
|
+
}
|
165
|
+
end
|
166
|
+
|
124
167
|
end
|
125
168
|
|
126
169
|
def initialize(name)
|
@@ -156,10 +199,21 @@ module Glaemscribe
|
|
156
199
|
@chars << c
|
157
200
|
end
|
158
201
|
|
202
|
+
def add_sequence_char(line, names, seq)
|
203
|
+
return if names.empty? || names.include?("?") # Ignore characters with '?'
|
204
|
+
|
205
|
+
c = SequenceChar.new
|
206
|
+
c.line = line
|
207
|
+
c.names = names
|
208
|
+
c.sequence = seq.split.reject{|token| token.empty? }
|
209
|
+
c.charset = self
|
210
|
+
@chars << c
|
211
|
+
end
|
212
|
+
|
159
213
|
def finalize
|
160
214
|
@errors = []
|
161
215
|
@lookup_table = {}
|
162
|
-
@virtual_chars = []
|
216
|
+
@virtual_chars = [] # A convenient filtered array
|
163
217
|
|
164
218
|
@chars.each { |c|
|
165
219
|
c.names.each { |cname|
|
@@ -179,6 +233,12 @@ module Glaemscribe
|
|
179
233
|
end
|
180
234
|
}
|
181
235
|
|
236
|
+
@chars.each{|c|
|
237
|
+
if c.class == SequenceChar
|
238
|
+
c.finalize
|
239
|
+
end
|
240
|
+
}
|
241
|
+
|
182
242
|
API::Debug::log("Finalized charset '#{@name}', #{@lookup_table.count} symbols loaded.")
|
183
243
|
end
|
184
244
|
|
data/lib/api/charset_parser.rb
CHANGED
@@ -47,6 +47,13 @@ module Glaemscribe
|
|
47
47
|
names = char_element.args[1..-1].map{|cname| cname.strip }.reject{ |cname| cname.empty? }
|
48
48
|
@charset.add_char(char_element.line,code,names)
|
49
49
|
}
|
50
|
+
|
51
|
+
doc.root_node.gpath("seq").each{ |seq_elemnt|
|
52
|
+
names = seq_elemnt.args
|
53
|
+
child_node = seq_elemnt.children.first
|
54
|
+
seq = (child_node && child_node.text?)?(child_node.args.first):("")
|
55
|
+
@charset.add_sequence_char(seq_elemnt.line,names,seq)
|
56
|
+
}
|
50
57
|
|
51
58
|
doc.root_node.gpath("virtual").each { |virtual_element|
|
52
59
|
names = virtual_element.args
|
data/lib/api/constants.rb
CHANGED
@@ -23,11 +23,10 @@
|
|
23
23
|
module Glaemscribe
|
24
24
|
module API
|
25
25
|
WORD_BREAKER = "|"
|
26
|
-
WORD_BOUNDARY = "_"
|
27
|
-
|
28
|
-
SPECIAL_CHAR_UNDERSCORE = '➊'
|
29
|
-
SPECIAL_CHAR_NBSP = '➋'
|
30
26
|
|
27
|
+
WORD_BOUNDARY_LANG = "_"
|
28
|
+
WORD_BOUNDARY_TREE = "\u0000"
|
29
|
+
|
31
30
|
UNKNOWN_CHAR_OUTPUT = "☠"
|
32
31
|
VIRTUAL_CHAR_OUTPUT = "☢" # When transcribing a virtual char...
|
33
32
|
end
|