glaemscribe 1.1.14 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/bin/glaemscribe +19 -15
- data/glaemresources/charsets/cirth_ds.cst +205 -0
- data/glaemresources/charsets/sarati_eldamar.cst +256 -0
- data/glaemresources/charsets/tengwar_ds_annatar.cst +546 -0
- data/glaemresources/charsets/tengwar_ds_eldamar.cst +535 -0
- data/glaemresources/charsets/tengwar_ds_elfica.cst +551 -0
- data/glaemresources/charsets/tengwar_ds_parmaite.cst +534 -0
- data/glaemresources/charsets/tengwar_ds_sindarin.cst +531 -0
- data/glaemresources/charsets/tengwar_freemono.cst +217 -0
- data/glaemresources/charsets/tengwar_guni_annatar.cst +628 -0
- data/glaemresources/charsets/tengwar_guni_eldamar.cst +618 -0
- data/glaemresources/charsets/tengwar_guni_elfica.cst +620 -0
- data/glaemresources/charsets/tengwar_guni_parmaite.cst +621 -0
- data/glaemresources/charsets/tengwar_guni_sindarin.cst +617 -0
- data/glaemresources/charsets/tengwar_telcontar.cst +218 -0
- data/glaemresources/charsets/unicode_gothic.cst +64 -0
- data/glaemresources/charsets/unicode_runes.cst +121 -0
- data/glaemresources/modes/{adunaic.glaem → adunaic-tengwar-glaemscrafu.glaem} +14 -2
- data/glaemresources/modes/{blackspeech.glaem → blackspeech-tengwar-general_use.glaem} +12 -2
- data/glaemresources/modes/japanese-tengwar.glaem +771 -0
- data/glaemresources/modes/{khuzdul.glaem → khuzdul-cirth-moria.glaem} +4 -1
- data/glaemresources/modes/{futhorc.glaem → old_english-futhorc.glaem} +0 -0
- data/glaemresources/modes/{mercian.glaem → old_english-tengwar-mercian.glaem} +22 -12
- data/glaemresources/modes/{westsaxon.glaem → old_english-tengwar-westsaxon.glaem} +20 -11
- data/glaemresources/modes/{futhark-runicus.glaem → old_norse-futhark-runicus.glaem} +0 -0
- data/glaemresources/modes/{futhark-younger.glaem → old_norse-futhark-younger.glaem} +0 -0
- data/glaemresources/modes/{quenya.glaem → quenya-tengwar-classical.glaem} +32 -50
- data/glaemresources/modes/raw-tengwar.glaem +46 -23
- data/glaemresources/modes/{rlyehian.glaem → rlyehian-tengwar.glaem} +14 -3
- data/glaemresources/modes/{sindarin-daeron.glaem → sindarin-cirth-daeron.glaem} +55 -14
- data/glaemresources/modes/{sindarin-beleriand.glaem → sindarin-tengwar-beleriand.glaem} +154 -28
- data/glaemresources/modes/{sindarin.glaem → sindarin-tengwar-general_use.glaem} +86 -25
- data/glaemresources/modes/{telerin.glaem → telerin-tengwar-glaemscrafu.glaem} +16 -6
- data/glaemresources/modes/{westron.glaem → westron-tengwar-glaemscrafu.glaem} +18 -8
- data/lib/api/charset.rb +67 -7
- data/lib/api/charset_parser.rb +7 -0
- data/lib/api/constants.rb +3 -4
- data/lib/api/fragment.rb +26 -5
- data/lib/api/if_tree.rb +70 -8
- data/lib/api/macro.rb +40 -0
- data/lib/api/mode.rb +35 -13
- data/lib/api/mode_parser.rb +106 -12
- data/lib/api/object_additions.rb +23 -1
- data/lib/api/option.rb +17 -2
- data/lib/api/post_processor/resolve_virtuals.rb +25 -9
- data/lib/api/resource_manager.rb +1 -0
- data/lib/api/rule_group.rb +170 -26
- data/lib/api/sheaf_chain_iterator.rb +1 -1
- data/lib/api/transcription_processor.rb +3 -3
- data/lib/api/tts.rb +51 -0
- data/lib/glaemscribe.rb +34 -31
- data/lib_espeak/espeakng.for.glaemscribe.nowasm.sync.js +21 -0
- data/lib_espeak/glaemscribe_tts.js +365 -0
- metadata +67 -21
@@ -35,12 +35,16 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
35
35
|
\entry "0.1.0" "Added support for the Tengwar Elfica font"
|
36
36
|
\entry "0.1.1" "Added support for inlined raw tengwar"
|
37
37
|
\entry "0.1.2" "Added support for non-breaking spaces"
|
38
|
+
\entry "0.1.3" "Added support for new unicode charsets"
|
39
|
+
\entry "0.1.4" "Added support for the Tengwar Telcontar font"
|
40
|
+
\entry "0.1.5" "Added a few labial exotic combinations. Reworked median point behaviour, and ng."
|
41
|
+
\entry "0.1.6" "Added gasdil handling."
|
38
42
|
\end
|
39
43
|
|
40
44
|
\language "Sindarin"
|
41
45
|
\writing "Tengwar"
|
42
46
|
\mode "Sindarin Tengwar - General Use"
|
43
|
-
\version "0.1.
|
47
|
+
\version "0.1.6"
|
44
48
|
\authors "J.R.R. Tolkien, impl. Talagan (Benjamin Babut)"
|
45
49
|
|
46
50
|
\world arda
|
@@ -51,7 +55,15 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
51
55
|
\charset tengwar_ds_eldamar false
|
52
56
|
\charset tengwar_ds_annatar false
|
53
57
|
\charset tengwar_ds_elfica false
|
58
|
+
|
59
|
+
\charset tengwar_guni_sindarin false
|
60
|
+
\charset tengwar_guni_parmaite false
|
61
|
+
\charset tengwar_guni_eldamar false
|
62
|
+
\charset tengwar_guni_annatar false
|
63
|
+
\charset tengwar_guni_elfica false
|
64
|
+
|
54
65
|
\charset tengwar_freemono false
|
66
|
+
\charset tengwar_telcontar false
|
55
67
|
|
56
68
|
\raw_mode "raw-tengwar"
|
57
69
|
|
@@ -62,6 +74,16 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
62
74
|
\value U_UP_O_DOWN 2
|
63
75
|
\end
|
64
76
|
|
77
|
+
\beg option apostrophe APOSTROPHE_IGNORED
|
78
|
+
\value APOSTROPHE_IGNORED 0
|
79
|
+
\value APOSTROPHE_GASDIL 1
|
80
|
+
\end
|
81
|
+
|
82
|
+
\beg option hyphen HYPHEN_WORD_BREAKER
|
83
|
+
\value HYPHEN_WORD_BREAKER 0
|
84
|
+
\value HYPHEN_WORD_JOINER 1
|
85
|
+
\end
|
86
|
+
|
65
87
|
\beg option consonant_modification_style CONSONANT_MODIFICATION_STYLE_BAR
|
66
88
|
\value CONSONANT_MODIFICATION_STYLE_WAVE 0
|
67
89
|
\value CONSONANT_MODIFICATION_STYLE_BAR 1
|
@@ -85,6 +107,17 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
85
107
|
\** Work exclusively downcase **\
|
86
108
|
\downcase
|
87
109
|
|
110
|
+
\if "hyphen == HYPHEN_WORD_JOINER"
|
111
|
+
\** Replace hyphen by median point **\
|
112
|
+
\substitute "-" "·"
|
113
|
+
\else
|
114
|
+
\** Replace hyphen by glaemscribe's word breaker **\
|
115
|
+
\substitute "-" "|"
|
116
|
+
\endif
|
117
|
+
|
118
|
+
\** Add keyboard friendly word joiner **\
|
119
|
+
\substitute "*" "·"
|
120
|
+
|
88
121
|
\** Simplify trema vowels **\
|
89
122
|
\substitute ä a
|
90
123
|
\substitute ë e
|
@@ -107,6 +140,17 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
107
140
|
\** Special case of starting 'i' before vowels, replace i by j **\
|
108
141
|
\rxsubstitute "\\bi([aeouyáāâéēêíīîóōôúūûýȳŷ])" "j\\1"
|
109
142
|
|
143
|
+
\** Special case for ng : before the vast majority of consonnants, treat as ŋ **\
|
144
|
+
\** Don't include r / l / lh / w **\
|
145
|
+
\rxsubstitute "ng([tpckbdfðvnmhs])" "ŋ\\1"
|
146
|
+
|
147
|
+
\** Avoid mutated ng of being treated as strong middle word n|g (ex : i·ngelaidh [iŋɛlaið] ) **\
|
148
|
+
\substitute "·ng" "·ŋ"
|
149
|
+
\** But avoid losing the strong g in nasal mutation of g (ex : in·Gelydh [iŋgɛlyð]] ) **\
|
150
|
+
\substitute "n·g" "·ŋg"
|
151
|
+
\** Use median dot as word joiner **\
|
152
|
+
\substitute "·" ""
|
153
|
+
|
110
154
|
\** Preprocess numbers **\
|
111
155
|
\elvish_numbers "\\eval numbers_base" "\\eval reverse_numbers"
|
112
156
|
\end
|
@@ -189,8 +233,12 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
189
233
|
{V_D_WN}n{K} --> CALMA {NASAL} {_V_D_WN_}
|
190
234
|
|
191
235
|
\** 2ND LINE **\
|
192
|
-
|
193
|
-
|
236
|
+
\**
|
237
|
+
/ŋg/ : this is ng in middle of words + might be found at word start.
|
238
|
+
See also final/initial ng_ / _ng below
|
239
|
+
**\
|
240
|
+
{L2} === d * b * g * (ng,ngg,ŋg,ñg)
|
241
|
+
{_L2_} === ANDO * UMBAR * UNGWE * UNGWE {NASAL}
|
194
242
|
|
195
243
|
{V_D_WN}[{L2}] --> 2,1 --> [{_L2_}]{_V_D_WN_}
|
196
244
|
|
@@ -210,13 +258,13 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
210
258
|
|
211
259
|
\** 4TH LINE **\
|
212
260
|
{L4} === (đ,ð,ðh,dh) * (v,bh,f_) \** Some noldorin variants here ... **\
|
213
|
-
{_L4_}
|
261
|
+
{_L4_} === ANTO * AMPA
|
214
262
|
|
215
263
|
{V_D_WN}[{L4}] --> 2,1 --> [{_L4_}]{_V_D_WN_}
|
216
264
|
|
217
265
|
\** 5TH LINE **\
|
218
|
-
{L5} === n
|
219
|
-
{_L5_} === NUMEN *
|
266
|
+
{L5} === n * m * (_ng,ng_,ŋ,ñ) * _mh \** weak ng at initial and final **\
|
267
|
+
{_L5_} === NUMEN * MALTA * NWALME * MALTA_W_HOOK
|
220
268
|
|
221
269
|
{V_D_WN}[{L5}] --> 2,1 --> [{_L5_}]{_V_D_WN_}
|
222
270
|
|
@@ -226,7 +274,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
226
274
|
\** 6TH LINE **\
|
227
275
|
|
228
276
|
\** 7TH LINE **\
|
229
|
-
{L7} === r_ * r * l * ll
|
277
|
+
{L7} === r_ * r * l * ll * w
|
230
278
|
{_L7_} === ORE * ROMEN * LAMBE * LAMBE {GEMINATE} * VALA
|
231
279
|
|
232
280
|
{V_D_WN}[{L7}] --> 2,1 --> [{_L7_}]{_V_D_WN_}
|
@@ -252,29 +300,47 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
252
300
|
|
253
301
|
\**
|
254
302
|
Ok here come the labialized consonants which are really tricky
|
255
|
-
The fonts generally do not handle well the
|
303
|
+
The fonts generally do not handle well the wa-tehta curl + tehtar, this should be one more argument for
|
256
304
|
adopting open type anchors with which we can stack diacritics (see the sarati modes).
|
257
305
|
For here, we cheat. Either we don't have any tehta on the tengwa, and it's easy.
|
258
306
|
Or, we put the two signs in their small versions, side by side.
|
259
307
|
We give an option not to use that trick, if the option is not set, we simply do not use
|
260
|
-
the
|
308
|
+
the wa-tehta curl at all when there's a tehta on the tengwa.
|
261
309
|
**\
|
262
310
|
|
263
311
|
\if "labialized_consonants_u_curl == LABIALIZED_U_CURL_NO_TEHTAR || labialized_consonants_u_curl == LABIALIZED_U_CURL_ALWAYS"
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
312
|
+
bw --> UMBAR WA_TEHTA
|
313
|
+
dw --> ANDO WA_TEHTA
|
314
|
+
gw --> UNGWE WA_TEHTA
|
315
|
+
lw --> LAMBE WA_TEHTA
|
316
|
+
nw --> NUMEN WA_TEHTA
|
317
|
+
rw --> ROMEN WA_TEHTA
|
318
|
+
(ng,ngg,ŋg,ñg)w --> UNGWE {NASAL} WA_TEHTA
|
319
|
+
(_ng,ng_,ŋ,ñ)w --> NWALME WA_TEHTA
|
269
320
|
\endif
|
270
321
|
|
271
322
|
\if "labialized_consonants_u_curl == LABIALIZED_U_CURL_ALWAYS"
|
272
|
-
{V_D}
|
273
|
-
{V_D}
|
274
|
-
{V_D}
|
275
|
-
{V_D}
|
276
|
-
{V_D}
|
323
|
+
{V_D}bw --> UMBAR WA_TEHTA {_V_D_}
|
324
|
+
{V_D}dw --> ANDO WA_TEHTA {_V_D_}
|
325
|
+
{V_D}gw --> UNGWE WA_TEHTA {_V_D_}
|
326
|
+
{V_D}lw --> LAMBE WA_TEHTA {_V_D_}
|
327
|
+
{V_D}nw --> NUMEN WA_TEHTA {_V_D_}
|
328
|
+
{V_D}rw --> ROMEN WA_TEHTA {_V_D_}
|
329
|
+
{V_D}(ng,ngg,ŋg,ñg)w --> UNGWE {NASAL} WA_TEHTA {_V_D_}
|
330
|
+
{V_D}(_ng,ng_,ŋ,ñ)w --> NWALME WA_TEHTA {_V_D_}
|
277
331
|
\endif
|
332
|
+
|
333
|
+
\if "apostrophe == APOSTROPHE_IGNORED"
|
334
|
+
' --> {NULL}
|
335
|
+
’ --> {NULL}
|
336
|
+
\else
|
337
|
+
\** use gasdil **\
|
338
|
+
' --> HALLA
|
339
|
+
’ --> HALLA
|
340
|
+
\endif
|
341
|
+
|
342
|
+
\** Forced gasdil **\
|
343
|
+
° --> HALLA
|
278
344
|
\end
|
279
345
|
|
280
346
|
\beg rules punctuation
|
@@ -297,11 +363,6 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
297
363
|
- --> {NULL}
|
298
364
|
– --> PUNCT_TILD
|
299
365
|
— --> PUNCT_TILD
|
300
|
-
|
301
|
-
\** Apostrophe **\
|
302
|
-
|
303
|
-
' --> {NULL}
|
304
|
-
’ --> {NULL}
|
305
366
|
|
306
367
|
\** NBSP **\
|
307
368
|
{NBSP} --> NBSP
|
@@ -323,7 +384,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
323
384
|
> --> PUNCT_PAREN_R
|
324
385
|
|
325
386
|
\** Not universal between fonts ... **\
|
326
|
-
$ -->
|
387
|
+
$ --> ELVISH_PAREN
|
327
388
|
≤ --> RING_MARK_L \** Ring inscription left beautiful stuff **\
|
328
389
|
≥ --> RING_MARK_R \** Ring inscription right beautiful stuff **\
|
329
390
|
\end
|
@@ -34,12 +34,14 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
34
34
|
\entry "0.1.1" "Added support for inlined raw tengwar"
|
35
35
|
\entry "0.1.2" "Added support for non-breaking spaces"
|
36
36
|
\entry "0.1.3" "Correcting visibility options to conform to new glaeml args strict syntax"
|
37
|
+
\entry "0.1.4" "Added support for new unicode charsets"
|
38
|
+
\entry "0.1.5" "Added support for the Tengwar Telcontar font"
|
37
39
|
\end
|
38
40
|
|
39
41
|
\language "Telerin"
|
40
42
|
\writing "Tengwar"
|
41
43
|
\mode "Telerin Tengwar - G*"
|
42
|
-
\version "0.1.
|
44
|
+
\version "0.1.5"
|
43
45
|
\authors "Talagan (Benjamin Babut), based on J.R.R Tolkien"
|
44
46
|
|
45
47
|
\world arda
|
@@ -50,7 +52,15 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
50
52
|
\charset tengwar_ds_eldamar false
|
51
53
|
\charset tengwar_ds_annatar false
|
52
54
|
\charset tengwar_ds_elfica false
|
55
|
+
|
56
|
+
\charset tengwar_guni_sindarin false
|
57
|
+
\charset tengwar_guni_parmaite false
|
58
|
+
\charset tengwar_guni_eldamar false
|
59
|
+
\charset tengwar_guni_annatar false
|
60
|
+
\charset tengwar_guni_elfica false
|
61
|
+
|
53
62
|
\charset tengwar_freemono false
|
63
|
+
\charset tengwar_telcontar false
|
54
64
|
|
55
65
|
\raw_mode "raw-tengwar"
|
56
66
|
|
@@ -237,10 +247,10 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
237
247
|
[ {L1} ] {V_D_WN} --> [ {_L1_} ] {_V_D_WN_}
|
238
248
|
[ {L1_GEMS} ] {V_D_WN} --> [ {_L1_GEMS_} ] {_V_D_WN_}
|
239
249
|
|
240
|
-
ts{V_D_WN} --> TINCO
|
241
|
-
ps{V_D_WN} --> PARMA
|
242
|
-
{K}s{V_D_WN} --> CALMA
|
243
|
-
x{V_D_WN} --> CALMA
|
250
|
+
ts{V_D_WN} --> TINCO SARINCE {_V_D_WN_}
|
251
|
+
ps{V_D_WN} --> PARMA SARINCE {_V_D_WN_}
|
252
|
+
{K}s{V_D_WN} --> CALMA SARINCE {_V_D_WN_}
|
253
|
+
x{V_D_WN} --> CALMA SARINCE {_V_D_WN_} \** render ks for x **\
|
244
254
|
|
245
255
|
\** ===================== **\
|
246
256
|
\** 2ND LINE RULES **\
|
@@ -369,7 +379,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
369
379
|
> --> PUNCT_PAREN_R
|
370
380
|
|
371
381
|
\** Not universal between fonts ... **\
|
372
|
-
$ -->
|
382
|
+
$ --> ELVISH_PAREN
|
373
383
|
≤ --> RING_MARK_L \** Ring inscription left beautiful stuff **\
|
374
384
|
≥ --> RING_MARK_R \** Ring inscription right beautiful stuff **\
|
375
385
|
|
@@ -30,13 +30,15 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
30
30
|
\entry "0.1.0" "Added support for the Tengwar Elfica font"
|
31
31
|
\entry "0.1.1" "Added support for inlined raw tengwar"
|
32
32
|
\entry "0.1.2" "Added support for non-breaking spaces"
|
33
|
+
\entry "0.1.3" "Added support for new unicode charsets"
|
34
|
+
\entry "0.1.4" "Added support for the Tengwar Telcontar font"
|
33
35
|
\end
|
34
36
|
|
35
37
|
\** Westron mode for glaemscribe (MAY BE INCOMPLETE) **\
|
36
38
|
\language Westron
|
37
39
|
\writing Tengwar
|
38
40
|
\mode "Westron Tengwar - G*"
|
39
|
-
\version "0.1.
|
41
|
+
\version "0.1.4"
|
40
42
|
\authors "Talagan (Benjamin Babut), based on J.R.R. Tolkien"
|
41
43
|
|
42
44
|
\world arda
|
@@ -49,7 +51,15 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
49
51
|
\charset tengwar_ds_eldamar false
|
50
52
|
\charset tengwar_ds_annatar false
|
51
53
|
\charset tengwar_ds_elfica false
|
54
|
+
|
55
|
+
\charset tengwar_guni_sindarin false
|
56
|
+
\charset tengwar_guni_parmaite false
|
57
|
+
\charset tengwar_guni_eldamar false
|
58
|
+
\charset tengwar_guni_annatar false
|
59
|
+
\charset tengwar_guni_elfica false
|
60
|
+
|
52
61
|
\charset tengwar_freemono false
|
62
|
+
\charset tengwar_telcontar false
|
53
63
|
|
54
64
|
\beg options
|
55
65
|
|
@@ -293,13 +303,13 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
293
303
|
nz{V_D} --> ESSE_NUQUERNA {NASAL} {_V_D_}
|
294
304
|
nz --> ESSE_NUQUERNA {NASAL}
|
295
305
|
|
296
|
-
ts --> TINCO
|
297
|
-
ps --> PARMA
|
298
|
-
(ks,cs,x) --> QUESSE
|
306
|
+
ts --> TINCO SARINCE
|
307
|
+
ps --> PARMA SARINCE
|
308
|
+
(ks,cs,x) --> QUESSE SARINCE
|
299
309
|
|
300
|
-
ts{V_D} --> TINCO
|
301
|
-
ps{V_D} --> PARMA
|
302
|
-
(ks,cs,x){V_D} --> QUESSE
|
310
|
+
ts{V_D} --> TINCO SARINCE {_V_D_}
|
311
|
+
ps{V_D} --> PARMA SARINCE {_V_D_}
|
312
|
+
(ks,cs,x){V_D} --> QUESSE SARINCE {_V_D_}
|
303
313
|
|
304
314
|
h{V_D} --> HYARMEN {_V_D_}
|
305
315
|
h --> HYARMEN
|
@@ -366,7 +376,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
366
376
|
> --> PUNCT_PAREN_R
|
367
377
|
|
368
378
|
\** Not universal between fonts ... **\
|
369
|
-
$ -->
|
379
|
+
$ --> ELVISH_PAREN
|
370
380
|
≤ --> RING_MARK_L \** Ring inscription left beautiful stuff **\
|
371
381
|
≥ --> RING_MARK_R \** Ring inscription right beautiful stuff **\
|
372
382
|
\end
|
data/lib/api/charset.rb
CHANGED
@@ -30,11 +30,11 @@ module Glaemscribe
|
|
30
30
|
attr_reader :virtual_chars
|
31
31
|
|
32
32
|
class Char
|
33
|
-
attr_accessor :line
|
34
|
-
attr_accessor :code
|
35
|
-
attr_accessor :names
|
36
|
-
attr_accessor :str
|
37
|
-
attr_accessor :charset
|
33
|
+
attr_accessor :line # Line num in the sourcecode
|
34
|
+
attr_accessor :code # Position in unicode
|
35
|
+
attr_accessor :names # Names
|
36
|
+
attr_accessor :str # How does this char resolve as a string
|
37
|
+
attr_accessor :charset # Pointer to parent charset
|
38
38
|
|
39
39
|
def initialize
|
40
40
|
@names = {}
|
@@ -43,9 +43,13 @@ module Glaemscribe
|
|
43
43
|
def virtual?
|
44
44
|
false
|
45
45
|
end
|
46
|
+
|
47
|
+
def sequence?
|
48
|
+
false
|
49
|
+
end
|
46
50
|
end
|
47
51
|
|
48
|
-
class VirtualChar
|
52
|
+
class VirtualChar # Could have had inheritance here ...
|
49
53
|
attr_accessor :line
|
50
54
|
attr_accessor :names
|
51
55
|
attr_accessor :classes
|
@@ -121,6 +125,45 @@ module Glaemscribe
|
|
121
125
|
def virtual?
|
122
126
|
true
|
123
127
|
end
|
128
|
+
|
129
|
+
def sequence?
|
130
|
+
false
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
class SequenceChar
|
135
|
+
attr_accessor :line # Line of code
|
136
|
+
attr_accessor :names # Names
|
137
|
+
attr_accessor :sequence # The sequence of chars
|
138
|
+
attr_accessor :charset # Pointer to parent charset
|
139
|
+
|
140
|
+
def virtual?
|
141
|
+
false
|
142
|
+
end
|
143
|
+
|
144
|
+
def sequence?
|
145
|
+
true
|
146
|
+
end
|
147
|
+
|
148
|
+
def str
|
149
|
+
# A sequence char should never arrive unreplaced
|
150
|
+
VIRTUAL_CHAR_OUTPUT
|
151
|
+
end
|
152
|
+
|
153
|
+
def finalize
|
154
|
+
if @sequence.count == 0
|
155
|
+
@charset.errors << Glaeml::Error.new(@line, "Sequence for sequence char is empty.")
|
156
|
+
end
|
157
|
+
|
158
|
+
@sequence.each{ |symbol|
|
159
|
+
# Check that the sequence is correct
|
160
|
+
found = @charset[symbol]
|
161
|
+
if !found
|
162
|
+
@charset.errors << Glaeml::Error.new(@line, "Sequence char #{symbol} cannot be found in the charset.")
|
163
|
+
end
|
164
|
+
}
|
165
|
+
end
|
166
|
+
|
124
167
|
end
|
125
168
|
|
126
169
|
def initialize(name)
|
@@ -156,10 +199,21 @@ module Glaemscribe
|
|
156
199
|
@chars << c
|
157
200
|
end
|
158
201
|
|
202
|
+
def add_sequence_char(line, names, seq)
|
203
|
+
return if names.empty? || names.include?("?") # Ignore characters with '?'
|
204
|
+
|
205
|
+
c = SequenceChar.new
|
206
|
+
c.line = line
|
207
|
+
c.names = names
|
208
|
+
c.sequence = seq.split.reject{|token| token.empty? }
|
209
|
+
c.charset = self
|
210
|
+
@chars << c
|
211
|
+
end
|
212
|
+
|
159
213
|
def finalize
|
160
214
|
@errors = []
|
161
215
|
@lookup_table = {}
|
162
|
-
@virtual_chars = []
|
216
|
+
@virtual_chars = [] # A convenient filtered array
|
163
217
|
|
164
218
|
@chars.each { |c|
|
165
219
|
c.names.each { |cname|
|
@@ -179,6 +233,12 @@ module Glaemscribe
|
|
179
233
|
end
|
180
234
|
}
|
181
235
|
|
236
|
+
@chars.each{|c|
|
237
|
+
if c.class == SequenceChar
|
238
|
+
c.finalize
|
239
|
+
end
|
240
|
+
}
|
241
|
+
|
182
242
|
API::Debug::log("Finalized charset '#{@name}', #{@lookup_table.count} symbols loaded.")
|
183
243
|
end
|
184
244
|
|
data/lib/api/charset_parser.rb
CHANGED
@@ -47,6 +47,13 @@ module Glaemscribe
|
|
47
47
|
names = char_element.args[1..-1].map{|cname| cname.strip }.reject{ |cname| cname.empty? }
|
48
48
|
@charset.add_char(char_element.line,code,names)
|
49
49
|
}
|
50
|
+
|
51
|
+
doc.root_node.gpath("seq").each{ |seq_elemnt|
|
52
|
+
names = seq_elemnt.args
|
53
|
+
child_node = seq_elemnt.children.first
|
54
|
+
seq = (child_node && child_node.text?)?(child_node.args.first):("")
|
55
|
+
@charset.add_sequence_char(seq_elemnt.line,names,seq)
|
56
|
+
}
|
50
57
|
|
51
58
|
doc.root_node.gpath("virtual").each { |virtual_element|
|
52
59
|
names = virtual_element.args
|
data/lib/api/constants.rb
CHANGED
@@ -23,11 +23,10 @@
|
|
23
23
|
module Glaemscribe
|
24
24
|
module API
|
25
25
|
WORD_BREAKER = "|"
|
26
|
-
WORD_BOUNDARY = "_"
|
27
|
-
|
28
|
-
SPECIAL_CHAR_UNDERSCORE = '➊'
|
29
|
-
SPECIAL_CHAR_NBSP = '➋'
|
30
26
|
|
27
|
+
WORD_BOUNDARY_LANG = "_"
|
28
|
+
WORD_BOUNDARY_TREE = "\u0000"
|
29
|
+
|
31
30
|
UNKNOWN_CHAR_OUTPUT = "☠"
|
32
31
|
VIRTUAL_CHAR_OUTPUT = "☢" # When transcribing a virtual char...
|
33
32
|
end
|