glaemscribe 1.1.14 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +5 -5
  2. data/bin/glaemscribe +19 -15
  3. data/glaemresources/charsets/cirth_ds.cst +205 -0
  4. data/glaemresources/charsets/sarati_eldamar.cst +256 -0
  5. data/glaemresources/charsets/tengwar_ds_annatar.cst +546 -0
  6. data/glaemresources/charsets/tengwar_ds_eldamar.cst +535 -0
  7. data/glaemresources/charsets/tengwar_ds_elfica.cst +551 -0
  8. data/glaemresources/charsets/tengwar_ds_parmaite.cst +534 -0
  9. data/glaemresources/charsets/tengwar_ds_sindarin.cst +531 -0
  10. data/glaemresources/charsets/tengwar_freemono.cst +217 -0
  11. data/glaemresources/charsets/tengwar_guni_annatar.cst +628 -0
  12. data/glaemresources/charsets/tengwar_guni_eldamar.cst +618 -0
  13. data/glaemresources/charsets/tengwar_guni_elfica.cst +620 -0
  14. data/glaemresources/charsets/tengwar_guni_parmaite.cst +621 -0
  15. data/glaemresources/charsets/tengwar_guni_sindarin.cst +617 -0
  16. data/glaemresources/charsets/tengwar_telcontar.cst +218 -0
  17. data/glaemresources/charsets/unicode_gothic.cst +64 -0
  18. data/glaemresources/charsets/unicode_runes.cst +121 -0
  19. data/glaemresources/modes/{adunaic.glaem → adunaic-tengwar-glaemscrafu.glaem} +14 -2
  20. data/glaemresources/modes/{blackspeech.glaem → blackspeech-tengwar-general_use.glaem} +12 -2
  21. data/glaemresources/modes/japanese-tengwar.glaem +771 -0
  22. data/glaemresources/modes/{khuzdul.glaem → khuzdul-cirth-moria.glaem} +4 -1
  23. data/glaemresources/modes/{futhorc.glaem → old_english-futhorc.glaem} +0 -0
  24. data/glaemresources/modes/{mercian.glaem → old_english-tengwar-mercian.glaem} +22 -12
  25. data/glaemresources/modes/{westsaxon.glaem → old_english-tengwar-westsaxon.glaem} +20 -11
  26. data/glaemresources/modes/{futhark-runicus.glaem → old_norse-futhark-runicus.glaem} +0 -0
  27. data/glaemresources/modes/{futhark-younger.glaem → old_norse-futhark-younger.glaem} +0 -0
  28. data/glaemresources/modes/{quenya.glaem → quenya-tengwar-classical.glaem} +32 -50
  29. data/glaemresources/modes/raw-tengwar.glaem +46 -23
  30. data/glaemresources/modes/{rlyehian.glaem → rlyehian-tengwar.glaem} +14 -3
  31. data/glaemresources/modes/{sindarin-daeron.glaem → sindarin-cirth-daeron.glaem} +55 -14
  32. data/glaemresources/modes/{sindarin-beleriand.glaem → sindarin-tengwar-beleriand.glaem} +154 -28
  33. data/glaemresources/modes/{sindarin.glaem → sindarin-tengwar-general_use.glaem} +86 -25
  34. data/glaemresources/modes/{telerin.glaem → telerin-tengwar-glaemscrafu.glaem} +16 -6
  35. data/glaemresources/modes/{westron.glaem → westron-tengwar-glaemscrafu.glaem} +18 -8
  36. data/lib/api/charset.rb +67 -7
  37. data/lib/api/charset_parser.rb +7 -0
  38. data/lib/api/constants.rb +3 -4
  39. data/lib/api/fragment.rb +26 -5
  40. data/lib/api/if_tree.rb +70 -8
  41. data/lib/api/macro.rb +40 -0
  42. data/lib/api/mode.rb +35 -13
  43. data/lib/api/mode_parser.rb +106 -12
  44. data/lib/api/object_additions.rb +23 -1
  45. data/lib/api/option.rb +17 -2
  46. data/lib/api/post_processor/resolve_virtuals.rb +25 -9
  47. data/lib/api/resource_manager.rb +1 -0
  48. data/lib/api/rule_group.rb +170 -26
  49. data/lib/api/sheaf_chain_iterator.rb +1 -1
  50. data/lib/api/transcription_processor.rb +3 -3
  51. data/lib/api/tts.rb +51 -0
  52. data/lib/glaemscribe.rb +34 -31
  53. data/lib_espeak/espeakng.for.glaemscribe.nowasm.sync.js +21 -0
  54. data/lib_espeak/glaemscribe_tts.js +365 -0
  55. metadata +67 -21
@@ -35,12 +35,16 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
35
35
  \entry "0.1.0" "Added support for the Tengwar Elfica font"
36
36
  \entry "0.1.1" "Added support for inlined raw tengwar"
37
37
  \entry "0.1.2" "Added support for non-breaking spaces"
38
+ \entry "0.1.3" "Added support for new unicode charsets"
39
+ \entry "0.1.4" "Added support for the Tengwar Telcontar font"
40
+ \entry "0.1.5" "Added a few labial exotic combinations. Reworked median point behaviour, and ng."
41
+ \entry "0.1.6" "Added gasdil handling."
38
42
  \end
39
43
 
40
44
  \language "Sindarin"
41
45
  \writing "Tengwar"
42
46
  \mode "Sindarin Tengwar - General Use"
43
- \version "0.1.2"
47
+ \version "0.1.6"
44
48
  \authors "J.R.R. Tolkien, impl. Talagan (Benjamin Babut)"
45
49
 
46
50
  \world arda
@@ -51,7 +55,15 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
51
55
  \charset tengwar_ds_eldamar false
52
56
  \charset tengwar_ds_annatar false
53
57
  \charset tengwar_ds_elfica false
58
+
59
+ \charset tengwar_guni_sindarin false
60
+ \charset tengwar_guni_parmaite false
61
+ \charset tengwar_guni_eldamar false
62
+ \charset tengwar_guni_annatar false
63
+ \charset tengwar_guni_elfica false
64
+
54
65
  \charset tengwar_freemono false
66
+ \charset tengwar_telcontar false
55
67
 
56
68
  \raw_mode "raw-tengwar"
57
69
 
@@ -62,6 +74,16 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
62
74
  \value U_UP_O_DOWN 2
63
75
  \end
64
76
 
77
+ \beg option apostrophe APOSTROPHE_IGNORED
78
+ \value APOSTROPHE_IGNORED 0
79
+ \value APOSTROPHE_GASDIL 1
80
+ \end
81
+
82
+ \beg option hyphen HYPHEN_WORD_BREAKER
83
+ \value HYPHEN_WORD_BREAKER 0
84
+ \value HYPHEN_WORD_JOINER 1
85
+ \end
86
+
65
87
  \beg option consonant_modification_style CONSONANT_MODIFICATION_STYLE_BAR
66
88
  \value CONSONANT_MODIFICATION_STYLE_WAVE 0
67
89
  \value CONSONANT_MODIFICATION_STYLE_BAR 1
@@ -85,6 +107,17 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
85
107
  \** Work exclusively downcase **\
86
108
  \downcase
87
109
 
110
+ \if "hyphen == HYPHEN_WORD_JOINER"
111
+ \** Replace hyphen by median point **\
112
+ \substitute "-" "·"
113
+ \else
114
+ \** Replace hyphen by glaemscribe's word breaker **\
115
+ \substitute "-" "|"
116
+ \endif
117
+
118
+ \** Add keyboard friendly word joiner **\
119
+ \substitute "*" "·"
120
+
88
121
  \** Simplify trema vowels **\
89
122
  \substitute ä a
90
123
  \substitute ë e
@@ -107,6 +140,17 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
107
140
  \** Special case of starting 'i' before vowels, replace i by j **\
108
141
  \rxsubstitute "\\bi([aeouyáāâéēêíīîóōôúūûýȳŷ])" "j\\1"
109
142
 
143
+ \** Special case for ng : before the vast majority of consonnants, treat as ŋ **\
144
+ \** Don't include r / l / lh / w **\
145
+ \rxsubstitute "ng([tpckbdfðvnmhs])" "ŋ\\1"
146
+
147
+ \** Avoid mutated ng of being treated as strong middle word n|g (ex : i·ngelaidh [iŋɛlaið] ) **\
148
+ \substitute "·ng" "·ŋ"
149
+ \** But avoid losing the strong g in nasal mutation of g (ex : in·Gelydh [iŋgɛlyð]] ) **\
150
+ \substitute "n·g" "·ŋg"
151
+ \** Use median dot as word joiner **\
152
+ \substitute "·" ""
153
+
110
154
  \** Preprocess numbers **\
111
155
  \elvish_numbers "\\eval numbers_base" "\\eval reverse_numbers"
112
156
  \end
@@ -189,8 +233,12 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
189
233
  {V_D_WN}n{K} --> CALMA {NASAL} {_V_D_WN_}
190
234
 
191
235
  \** 2ND LINE **\
192
- {L2} === d * b * g * ng \** * g **\
193
- {_L2_} === ANDO * UMBAR * UNGWE * UNGWE {NASAL} \** * s **\
236
+ \**
237
+ /ŋg/ : this is ng in middle of words + might be found at word start.
238
+ See also final/initial ng_ / _ng below
239
+ **\
240
+ {L2} === d * b * g * (ng,ngg,ŋg,ñg)
241
+ {_L2_} === ANDO * UMBAR * UNGWE * UNGWE {NASAL}
194
242
 
195
243
  {V_D_WN}[{L2}] --> 2,1 --> [{_L2_}]{_V_D_WN_}
196
244
 
@@ -210,13 +258,13 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
210
258
 
211
259
  \** 4TH LINE **\
212
260
  {L4} === (đ,ð,ðh,dh) * (v,bh,f_) \** Some noldorin variants here ... **\
213
- {_L4_} === ANTO * AMPA
261
+ {_L4_} === ANTO * AMPA
214
262
 
215
263
  {V_D_WN}[{L4}] --> 2,1 --> [{_L4_}]{_V_D_WN_}
216
264
 
217
265
  \** 5TH LINE **\
218
- {L5} === n * m * _ng * _mh
219
- {_L5_} === NUMEN * MALTA * NWALME * MALTA_W_HOOK
266
+ {L5} === n * m * (_ng,ng_,ŋ,ñ) * _mh \** weak ng at initial and final **\
267
+ {_L5_} === NUMEN * MALTA * NWALME * MALTA_W_HOOK
220
268
 
221
269
  {V_D_WN}[{L5}] --> 2,1 --> [{_L5_}]{_V_D_WN_}
222
270
 
@@ -226,7 +274,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
226
274
  \** 6TH LINE **\
227
275
 
228
276
  \** 7TH LINE **\
229
- {L7} === r_ * r * l * ll * w
277
+ {L7} === r_ * r * l * ll * w
230
278
  {_L7_} === ORE * ROMEN * LAMBE * LAMBE {GEMINATE} * VALA
231
279
 
232
280
  {V_D_WN}[{L7}] --> 2,1 --> [{_L7_}]{_V_D_WN_}
@@ -252,29 +300,47 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
252
300
 
253
301
  \**
254
302
  Ok here come the labialized consonants which are really tricky
255
- The fonts generally do not handle well the u curl + tehtar, this should be one more argument for
303
+ The fonts generally do not handle well the wa-tehta curl + tehtar, this should be one more argument for
256
304
  adopting open type anchors with which we can stack diacritics (see the sarati modes).
257
305
  For here, we cheat. Either we don't have any tehta on the tengwa, and it's easy.
258
306
  Or, we put the two signs in their small versions, side by side.
259
307
  We give an option not to use that trick, if the option is not set, we simply do not use
260
- the u-curl at all when there's a tehta on the tengwa.
308
+ the wa-tehta curl at all when there's a tehta on the tengwa.
261
309
  **\
262
310
 
263
311
  \if "labialized_consonants_u_curl == LABIALIZED_U_CURL_NO_TEHTAR || labialized_consonants_u_curl == LABIALIZED_U_CURL_ALWAYS"
264
- dw --> ANDO SEV_TEHTA
265
- gw --> UNGWE SEV_TEHTA
266
- lw --> LAMBE SEV_TEHTA
267
- nw --> NUMEN SEV_TEHTA
268
- rw --> ROMEN SEV_TEHTA
312
+ bw --> UMBAR WA_TEHTA
313
+ dw --> ANDO WA_TEHTA
314
+ gw --> UNGWE WA_TEHTA
315
+ lw --> LAMBE WA_TEHTA
316
+ nw --> NUMEN WA_TEHTA
317
+ rw --> ROMEN WA_TEHTA
318
+ (ng,ngg,ŋg,ñg)w --> UNGWE {NASAL} WA_TEHTA
319
+ (_ng,ng_,ŋ,ñ)w --> NWALME WA_TEHTA
269
320
  \endif
270
321
 
271
322
  \if "labialized_consonants_u_curl == LABIALIZED_U_CURL_ALWAYS"
272
- {V_D}dw --> ANDO SEV_TEHTA {_V_D_}
273
- {V_D}gw --> UNGWE SEV_TEHTA {_V_D_}
274
- {V_D}lw --> LAMBE SEV_TEHTA {_V_D_}
275
- {V_D}nw --> NUMEN SEV_TEHTA {_V_D_}
276
- {V_D}rw --> ROMEN SEV_TEHTA {_V_D_}
323
+ {V_D}bw --> UMBAR WA_TEHTA {_V_D_}
324
+ {V_D}dw --> ANDO WA_TEHTA {_V_D_}
325
+ {V_D}gw --> UNGWE WA_TEHTA {_V_D_}
326
+ {V_D}lw --> LAMBE WA_TEHTA {_V_D_}
327
+ {V_D}nw --> NUMEN WA_TEHTA {_V_D_}
328
+ {V_D}rw --> ROMEN WA_TEHTA {_V_D_}
329
+ {V_D}(ng,ngg,ŋg,ñg)w --> UNGWE {NASAL} WA_TEHTA {_V_D_}
330
+ {V_D}(_ng,ng_,ŋ,ñ)w --> NWALME WA_TEHTA {_V_D_}
277
331
  \endif
332
+
333
+ \if "apostrophe == APOSTROPHE_IGNORED"
334
+ ' --> {NULL}
335
+ ’ --> {NULL}
336
+ \else
337
+ \** use gasdil **\
338
+ ' --> HALLA
339
+ ’ --> HALLA
340
+ \endif
341
+
342
+ \** Forced gasdil **\
343
+ ° --> HALLA
278
344
  \end
279
345
 
280
346
  \beg rules punctuation
@@ -297,11 +363,6 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
297
363
  - --> {NULL}
298
364
  – --> PUNCT_TILD
299
365
  — --> PUNCT_TILD
300
-
301
- \** Apostrophe **\
302
-
303
- ' --> {NULL}
304
- ’ --> {NULL}
305
366
 
306
367
  \** NBSP **\
307
368
  {NBSP} --> NBSP
@@ -323,7 +384,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
323
384
  > --> PUNCT_PAREN_R
324
385
 
325
386
  \** Not universal between fonts ... **\
326
- $ --> BOOKMARK_SIGN
387
+ $ --> ELVISH_PAREN
327
388
  ≤ --> RING_MARK_L \** Ring inscription left beautiful stuff **\
328
389
  ≥ --> RING_MARK_R \** Ring inscription right beautiful stuff **\
329
390
  \end
@@ -34,12 +34,14 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
34
34
  \entry "0.1.1" "Added support for inlined raw tengwar"
35
35
  \entry "0.1.2" "Added support for non-breaking spaces"
36
36
  \entry "0.1.3" "Correcting visibility options to conform to new glaeml args strict syntax"
37
+ \entry "0.1.4" "Added support for new unicode charsets"
38
+ \entry "0.1.5" "Added support for the Tengwar Telcontar font"
37
39
  \end
38
40
 
39
41
  \language "Telerin"
40
42
  \writing "Tengwar"
41
43
  \mode "Telerin Tengwar - G*"
42
- \version "0.1.3"
44
+ \version "0.1.5"
43
45
  \authors "Talagan (Benjamin Babut), based on J.R.R Tolkien"
44
46
 
45
47
  \world arda
@@ -50,7 +52,15 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
50
52
  \charset tengwar_ds_eldamar false
51
53
  \charset tengwar_ds_annatar false
52
54
  \charset tengwar_ds_elfica false
55
+
56
+ \charset tengwar_guni_sindarin false
57
+ \charset tengwar_guni_parmaite false
58
+ \charset tengwar_guni_eldamar false
59
+ \charset tengwar_guni_annatar false
60
+ \charset tengwar_guni_elfica false
61
+
53
62
  \charset tengwar_freemono false
63
+ \charset tengwar_telcontar false
54
64
 
55
65
  \raw_mode "raw-tengwar"
56
66
 
@@ -237,10 +247,10 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
237
247
  [ {L1} ] {V_D_WN} --> [ {_L1_} ] {_V_D_WN_}
238
248
  [ {L1_GEMS} ] {V_D_WN} --> [ {_L1_GEMS_} ] {_V_D_WN_}
239
249
 
240
- ts{V_D_WN} --> TINCO ALVEOLAR_SIGN {_V_D_WN_}
241
- ps{V_D_WN} --> PARMA ALVEOLAR_SIGN {_V_D_WN_}
242
- {K}s{V_D_WN} --> CALMA ALVEOLAR_SIGN {_V_D_WN_}
243
- x{V_D_WN} --> CALMA ALVEOLAR_SIGN {_V_D_WN_} \** render ks for x **\
250
+ ts{V_D_WN} --> TINCO SARINCE {_V_D_WN_}
251
+ ps{V_D_WN} --> PARMA SARINCE {_V_D_WN_}
252
+ {K}s{V_D_WN} --> CALMA SARINCE {_V_D_WN_}
253
+ x{V_D_WN} --> CALMA SARINCE {_V_D_WN_} \** render ks for x **\
244
254
 
245
255
  \** ===================== **\
246
256
  \** 2ND LINE RULES **\
@@ -369,7 +379,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
369
379
  > --> PUNCT_PAREN_R
370
380
 
371
381
  \** Not universal between fonts ... **\
372
- $ --> BOOKMARK_SIGN
382
+ $ --> ELVISH_PAREN
373
383
  ≤ --> RING_MARK_L \** Ring inscription left beautiful stuff **\
374
384
  ≥ --> RING_MARK_R \** Ring inscription right beautiful stuff **\
375
385
 
@@ -30,13 +30,15 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
30
30
  \entry "0.1.0" "Added support for the Tengwar Elfica font"
31
31
  \entry "0.1.1" "Added support for inlined raw tengwar"
32
32
  \entry "0.1.2" "Added support for non-breaking spaces"
33
+ \entry "0.1.3" "Added support for new unicode charsets"
34
+ \entry "0.1.4" "Added support for the Tengwar Telcontar font"
33
35
  \end
34
36
 
35
37
  \** Westron mode for glaemscribe (MAY BE INCOMPLETE) **\
36
38
  \language Westron
37
39
  \writing Tengwar
38
40
  \mode "Westron Tengwar - G*"
39
- \version "0.1.2"
41
+ \version "0.1.4"
40
42
  \authors "Talagan (Benjamin Babut), based on J.R.R. Tolkien"
41
43
 
42
44
  \world arda
@@ -49,7 +51,15 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
49
51
  \charset tengwar_ds_eldamar false
50
52
  \charset tengwar_ds_annatar false
51
53
  \charset tengwar_ds_elfica false
54
+
55
+ \charset tengwar_guni_sindarin false
56
+ \charset tengwar_guni_parmaite false
57
+ \charset tengwar_guni_eldamar false
58
+ \charset tengwar_guni_annatar false
59
+ \charset tengwar_guni_elfica false
60
+
52
61
  \charset tengwar_freemono false
62
+ \charset tengwar_telcontar false
53
63
 
54
64
  \beg options
55
65
 
@@ -293,13 +303,13 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
293
303
  nz{V_D} --> ESSE_NUQUERNA {NASAL} {_V_D_}
294
304
  nz --> ESSE_NUQUERNA {NASAL}
295
305
 
296
- ts --> TINCO ALVEOLAR_SIGN
297
- ps --> PARMA ALVEOLAR_SIGN
298
- (ks,cs,x) --> QUESSE ALVEOLAR_SIGN
306
+ ts --> TINCO SARINCE
307
+ ps --> PARMA SARINCE
308
+ (ks,cs,x) --> QUESSE SARINCE
299
309
 
300
- ts{V_D} --> TINCO ALVEOLAR_SIGN {_V_D_}
301
- ps{V_D} --> PARMA ALVEOLAR_SIGN {_V_D_}
302
- (ks,cs,x){V_D} --> QUESSE ALVEOLAR_SIGN {_V_D_}
310
+ ts{V_D} --> TINCO SARINCE {_V_D_}
311
+ ps{V_D} --> PARMA SARINCE {_V_D_}
312
+ (ks,cs,x){V_D} --> QUESSE SARINCE {_V_D_}
303
313
 
304
314
  h{V_D} --> HYARMEN {_V_D_}
305
315
  h --> HYARMEN
@@ -366,7 +376,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
366
376
  > --> PUNCT_PAREN_R
367
377
 
368
378
  \** Not universal between fonts ... **\
369
- $ --> BOOKMARK_SIGN
379
+ $ --> ELVISH_PAREN
370
380
  ≤ --> RING_MARK_L \** Ring inscription left beautiful stuff **\
371
381
  ≥ --> RING_MARK_R \** Ring inscription right beautiful stuff **\
372
382
  \end
@@ -30,11 +30,11 @@ module Glaemscribe
30
30
  attr_reader :virtual_chars
31
31
 
32
32
  class Char
33
- attr_accessor :line
34
- attr_accessor :code
35
- attr_accessor :names
36
- attr_accessor :str
37
- attr_accessor :charset
33
+ attr_accessor :line # Line num in the sourcecode
34
+ attr_accessor :code # Position in unicode
35
+ attr_accessor :names # Names
36
+ attr_accessor :str # How does this char resolve as a string
37
+ attr_accessor :charset # Pointer to parent charset
38
38
 
39
39
  def initialize
40
40
  @names = {}
@@ -43,9 +43,13 @@ module Glaemscribe
43
43
  def virtual?
44
44
  false
45
45
  end
46
+
47
+ def sequence?
48
+ false
49
+ end
46
50
  end
47
51
 
48
- class VirtualChar
52
+ class VirtualChar # Could have had inheritance here ...
49
53
  attr_accessor :line
50
54
  attr_accessor :names
51
55
  attr_accessor :classes
@@ -121,6 +125,45 @@ module Glaemscribe
121
125
  def virtual?
122
126
  true
123
127
  end
128
+
129
+ def sequence?
130
+ false
131
+ end
132
+ end
133
+
134
+ class SequenceChar
135
+ attr_accessor :line # Line of code
136
+ attr_accessor :names # Names
137
+ attr_accessor :sequence # The sequence of chars
138
+ attr_accessor :charset # Pointer to parent charset
139
+
140
+ def virtual?
141
+ false
142
+ end
143
+
144
+ def sequence?
145
+ true
146
+ end
147
+
148
+ def str
149
+ # A sequence char should never arrive unreplaced
150
+ VIRTUAL_CHAR_OUTPUT
151
+ end
152
+
153
+ def finalize
154
+ if @sequence.count == 0
155
+ @charset.errors << Glaeml::Error.new(@line, "Sequence for sequence char is empty.")
156
+ end
157
+
158
+ @sequence.each{ |symbol|
159
+ # Check that the sequence is correct
160
+ found = @charset[symbol]
161
+ if !found
162
+ @charset.errors << Glaeml::Error.new(@line, "Sequence char #{symbol} cannot be found in the charset.")
163
+ end
164
+ }
165
+ end
166
+
124
167
  end
125
168
 
126
169
  def initialize(name)
@@ -156,10 +199,21 @@ module Glaemscribe
156
199
  @chars << c
157
200
  end
158
201
 
202
+ def add_sequence_char(line, names, seq)
203
+ return if names.empty? || names.include?("?") # Ignore characters with '?'
204
+
205
+ c = SequenceChar.new
206
+ c.line = line
207
+ c.names = names
208
+ c.sequence = seq.split.reject{|token| token.empty? }
209
+ c.charset = self
210
+ @chars << c
211
+ end
212
+
159
213
  def finalize
160
214
  @errors = []
161
215
  @lookup_table = {}
162
- @virtual_chars = []
216
+ @virtual_chars = [] # A convenient filtered array
163
217
 
164
218
  @chars.each { |c|
165
219
  c.names.each { |cname|
@@ -179,6 +233,12 @@ module Glaemscribe
179
233
  end
180
234
  }
181
235
 
236
+ @chars.each{|c|
237
+ if c.class == SequenceChar
238
+ c.finalize
239
+ end
240
+ }
241
+
182
242
  API::Debug::log("Finalized charset '#{@name}', #{@lookup_table.count} symbols loaded.")
183
243
  end
184
244
 
@@ -47,6 +47,13 @@ module Glaemscribe
47
47
  names = char_element.args[1..-1].map{|cname| cname.strip }.reject{ |cname| cname.empty? }
48
48
  @charset.add_char(char_element.line,code,names)
49
49
  }
50
+
51
+ doc.root_node.gpath("seq").each{ |seq_elemnt|
52
+ names = seq_elemnt.args
53
+ child_node = seq_elemnt.children.first
54
+ seq = (child_node && child_node.text?)?(child_node.args.first):("")
55
+ @charset.add_sequence_char(seq_elemnt.line,names,seq)
56
+ }
50
57
 
51
58
  doc.root_node.gpath("virtual").each { |virtual_element|
52
59
  names = virtual_element.args
@@ -23,11 +23,10 @@
23
23
  module Glaemscribe
24
24
  module API
25
25
  WORD_BREAKER = "|"
26
- WORD_BOUNDARY = "_"
27
-
28
- SPECIAL_CHAR_UNDERSCORE = '➊'
29
- SPECIAL_CHAR_NBSP = '➋'
30
26
 
27
+ WORD_BOUNDARY_LANG = "_"
28
+ WORD_BOUNDARY_TREE = "\u0000"
29
+
31
30
  UNKNOWN_CHAR_OUTPUT = "☠"
32
31
  VIRTUAL_CHAR_OUTPUT = "☢" # When transcribing a virtual char...
33
32
  end