glaemscribe 1.0.15 → 1.0.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/glaemscribe +10 -3
- data/glaemresources/charsets/tengwar_ds.cst +84 -1
- data/glaemresources/modes/adunaic.glaem +51 -64
- data/glaemresources/modes/quenya.glaem +24 -33
- data/lib/api/charset.rb +99 -10
- data/lib/api/charset_parser.rb +13 -0
- data/lib/api/constants.rb +1 -0
- data/lib/api/glaeml.rb +7 -0
- data/lib/api/mode_parser.rb +1 -1
- data/lib/api/post_processor/resolve_virtuals.rb +75 -0
- data/lib/api/post_processor/reverse.rb +1 -1
- data/lib/api/pre_processor/elvish_numbers.rb +2 -2
- data/lib/api/pre_processor/rxsubstitute.rb +2 -2
- data/lib/api/pre_processor/substitute.rb +2 -2
- data/lib/api/pre_processor/up_down_tehta_split.rb +4 -4
- data/lib/api/transcription_pre_post_processor.rb +14 -9
- data/lib/glaemscribe.rb +1 -1
- metadata +3 -3
- data/lib/api/post_processor/csub.rb +0 -64
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5aa0d9e7fab6fa5fe2c50f84c90fdcfb94bd423b
|
4
|
+
data.tar.gz: 5fffb9618dd05644bee52429073a44d0d40af0d5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 764a4736e61caa7a8cf485741f65a24be731d9db2a834160cf25321741a9f27cbb7c3f7d1b9c9bf7f5945891fe4827527a38709ae2f85ac422f2030f67612c80
|
7
|
+
data.tar.gz: 0b83ebe3596cb8313c2c7b5e2f2df5c9ce7b7c5912f0c4a32d3c6fd4156eaa3e977623a31c494581d0342fe2aae342cdf36644d5dbfffc588277315ce91638b5
|
data/bin/glaemscribe
CHANGED
@@ -107,7 +107,7 @@ command :transcribe do |c|
|
|
107
107
|
|
108
108
|
c.syntax = 'glaemscribe transcribe file [options]'
|
109
109
|
c.summary = 'Transcribes a file (default command)'
|
110
|
-
c.description = "Transcribes a file with the given options. You can use '
|
110
|
+
c.description = "Transcribes a file with the given options. You can use 'STDIN' instead of a file name to work with stdin."
|
111
111
|
|
112
112
|
c.option '-m', '--mode mode', String, "The name of the embedded mode to use. See the 'list' command to get a list of available modes names."
|
113
113
|
c.option '-c', '--charset charset', String, 'The name of the charset to use. If not given, glaemscribe will load and use the default charset defined in the mode.'
|
@@ -224,7 +224,7 @@ command :transcribe do |c|
|
|
224
224
|
|
225
225
|
# Ready for transcription ...
|
226
226
|
|
227
|
-
if(filename == "
|
227
|
+
if(filename == "STDIN")
|
228
228
|
perxit "Opened in stdin mode, waiting for input..."
|
229
229
|
begin
|
230
230
|
while to_transcribe = STDIN.gets
|
@@ -302,7 +302,14 @@ command :info do |c|
|
|
302
302
|
puts "#{$terminal.color "Human Name" , :bold} : #{mode.human_name}"
|
303
303
|
puts "#{$terminal.color "Authors " , :bold} : #{mode.authors}"
|
304
304
|
puts "#{$terminal.color "Version " , :bold} : #{mode.version}"
|
305
|
-
|
305
|
+
puts ""
|
306
|
+
puts "#{$terminal.color "Options " , :bold} :"
|
307
|
+
mode.options.each { |name,option|
|
308
|
+
puts " #{$terminal.color name, :bold} (#{option.type}) [#{option.default_value_name}]"
|
309
|
+
option.values.each{ |oname,ovalue|
|
310
|
+
puts " #{$terminal.color oname, :bold}"
|
311
|
+
}
|
312
|
+
}
|
306
313
|
end
|
307
314
|
|
308
315
|
end
|
@@ -31,7 +31,9 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
31
31
|
mainly, tehtar variants. These characters are only virtual, they do not really exist in DS based fonts.
|
32
32
|
**\
|
33
33
|
|
34
|
-
\** ☢
|
34
|
+
\** ☢
|
35
|
+
\char 2622 A_TEHTA A_TEHTA_CIRCUM E_TEHTA I_TEHTA O_TEHTA U_TEHTA THSUP_TICK_INV THSUP_LAMBDA THSUP_TICK THINF_CURL THSUP_SEV THINF_DOT THINF_DDOT THINF_TDOT THINF_STROKE THINF_DSTROKE DASH_INF SHOOK_LEFT SHOOK_RIGHT
|
36
|
+
**\
|
35
37
|
|
36
38
|
\** **\ \char 20 SPACE
|
37
39
|
\** ! **\ \char 21 TW_EXT_11
|
@@ -324,4 +326,85 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
324
326
|
\** 倦 **\ \char 5026 ?
|
325
327
|
\** 倰 **\ \char 5030 ?
|
326
328
|
\** 倹 **\ \char 5039 ?
|
329
|
+
|
330
|
+
|
331
|
+
\** The following virtual chars are used to handle tehtar (& the like) multiple version chosing **\
|
332
|
+
\** It could be avoided with modern fonts with gsub/gpos tables for ligatures and diacritics **\
|
333
|
+
\** placement **\
|
334
|
+
|
335
|
+
\** TODO : Move FORMEN to S, move HYARMEN to XS **\
|
336
|
+
|
337
|
+
\beg virtual A_TEHTA
|
338
|
+
\class A_TEHTA_XS TELCO ARA
|
339
|
+
\class A_TEHTA_S ORE TW_EXT_11 TW_EXT_12 SULE HYARMEN
|
340
|
+
\class A_TEHTA_L TINCO CALMA PARMA QUESSE AHA HWESTA LAMBE ALDA YANTA URE ROMEN ARDA SILME_NUQUERNA ESSE_NUQUERNA VALA ANNA VILYA FORMEN
|
341
|
+
\class A_TEHTA_XL TW_EXT_13 TW_EXT_14 TW_EXT_21 TW_EXT_22 TW_EXT_23 TW_EXT_24 ANDO UMBAR ANGA UNGWE ANTO AMPA ANCA UNQUE NUMEN MALTA NOLDO NWALME
|
342
|
+
\end
|
343
|
+
|
344
|
+
\beg virtual A_TEHTA_CIRCUM
|
345
|
+
\class A_TEHTA_CIRCUM_XS TELCO ARA
|
346
|
+
\class A_TEHTA_CIRCUM_S ORE TW_EXT_11 TW_EXT_12 SULE HYARMEN
|
347
|
+
\class A_TEHTA_CIRCUM_L TINCO CALMA PARMA QUESSE AHA HWESTA LAMBE ALDA YANTA URE ROMEN ARDA SILME_NUQUERNA ESSE_NUQUERNA VALA ANNA VILYA FORMEN
|
348
|
+
\class A_TEHTA_CIRCUM_XL TW_EXT_13 TW_EXT_14 TW_EXT_21 TW_EXT_22 TW_EXT_23 TW_EXT_24 ANDO UMBAR ANGA UNGWE ANTO AMPA ANCA UNQUE NUMEN MALTA NOLDO NWALME
|
349
|
+
\end
|
350
|
+
|
351
|
+
\beg virtual E_TEHTA
|
352
|
+
\class E_TEHTA_XS TELCO ARA
|
353
|
+
\class E_TEHTA_S ORE TW_EXT_11 TW_EXT_12 SULE HYARMEN
|
354
|
+
\class E_TEHTA_L TINCO CALMA PARMA QUESSE AHA HWESTA LAMBE ALDA YANTA URE ROMEN ARDA SILME_NUQUERNA ESSE_NUQUERNA VALA ANNA VILYA FORMEN
|
355
|
+
\class E_TEHTA_XL TW_EXT_13 TW_EXT_14 TW_EXT_21 TW_EXT_22 TW_EXT_23 TW_EXT_24 ANDO UMBAR ANGA UNGWE ANTO AMPA ANCA UNQUE NUMEN MALTA NOLDO NWALME
|
356
|
+
\end
|
357
|
+
|
358
|
+
\beg virtual I_TEHTA
|
359
|
+
\class I_TEHTA_XS TELCO ARA
|
360
|
+
\class I_TEHTA_S ORE TW_EXT_11 TW_EXT_12 SULE HYARMEN
|
361
|
+
\class I_TEHTA_L TINCO CALMA PARMA QUESSE AHA HWESTA LAMBE ALDA YANTA URE ROMEN ARDA SILME_NUQUERNA ESSE_NUQUERNA VALA ANNA VILYA FORMEN
|
362
|
+
\class I_TEHTA_XL TW_EXT_13 TW_EXT_14 TW_EXT_21 TW_EXT_22 TW_EXT_23 TW_EXT_24 ANDO UMBAR ANGA UNGWE ANTO AMPA ANCA UNQUE NUMEN MALTA NOLDO NWALME
|
363
|
+
\end
|
364
|
+
|
365
|
+
\beg virtual O_TEHTA
|
366
|
+
\class O_TEHTA_XS TELCO ARA
|
367
|
+
\class O_TEHTA_S ORE TW_EXT_11 TW_EXT_12 SULE HYARMEN
|
368
|
+
\class O_TEHTA_L TINCO CALMA PARMA QUESSE AHA HWESTA LAMBE ALDA YANTA URE ROMEN ARDA SILME_NUQUERNA ESSE_NUQUERNA VALA ANNA VILYA FORMEN
|
369
|
+
\class O_TEHTA_XL TW_EXT_13 TW_EXT_14 TW_EXT_21 TW_EXT_22 TW_EXT_23 TW_EXT_24 ANDO UMBAR ANGA UNGWE ANTO AMPA ANCA UNQUE NUMEN MALTA NOLDO NWALME
|
370
|
+
\end
|
371
|
+
|
372
|
+
\beg virtual U_TEHTA
|
373
|
+
\class U_TEHTA_XS TELCO ARA
|
374
|
+
\class U_TEHTA_S ORE TW_EXT_11 TW_EXT_12 SULE HYARMEN
|
375
|
+
\class U_TEHTA_L TINCO CALMA PARMA QUESSE AHA HWESTA LAMBE ALDA YANTA URE ROMEN ARDA SILME_NUQUERNA ESSE_NUQUERNA VALA ANNA VILYA FORMEN
|
376
|
+
\class U_TEHTA_XL TW_EXT_13 TW_EXT_14 TW_EXT_21 TW_EXT_22 TW_EXT_23 TW_EXT_24 ANDO UMBAR ANGA UNGWE ANTO AMPA ANCA UNQUE NUMEN MALTA NOLDO NWALME
|
377
|
+
\end
|
378
|
+
|
379
|
+
\** no vowel mark **\
|
380
|
+
\beg virtual NO_VOWEL_DOT
|
381
|
+
\class THINF_DOT_XS TELCO ROMEN ARDA
|
382
|
+
\class THINF_DOT_L ORE TW_EXT_11 TW_EXT_12 TINCO PARMA SULE FORMEN VALA ANNA VILYA SILME ESSE AHA HWESTA HYARMEN YANTA URE
|
383
|
+
\class THINF_DOT_XL QUESSE CALMA TW_EXT_13 TW_EXT_14 TW_EXT_21 TW_EXT_22 TW_EXT_23 TW_EXT_24 ANDO UMBAR ANGA UNGWE ANTO AMPA ANCA UNQUE NUMEN MALTA NOLDO NWALME
|
384
|
+
\class LAMBE_MARK_DOT LAMBE ALDA
|
385
|
+
\end
|
386
|
+
|
387
|
+
\** palatalisation **\
|
388
|
+
\beg virtual PALATAL_SIGN
|
389
|
+
\class THINF_DDOT_XS ROMEN
|
390
|
+
\class THINF_DDOT_S
|
391
|
+
\class THINF_DDOT_L TINCO PARMA HYARMEN ANNA
|
392
|
+
\class THINF_DDOT_XL ANDO ANTO NUMEN MALTA
|
393
|
+
\class LAMBE_MARK_DDOT LAMBE
|
394
|
+
\end
|
395
|
+
|
396
|
+
\** gemination **\
|
397
|
+
\beg virtual GEMINATE_SIGN
|
398
|
+
\class DASH_INF_XS
|
399
|
+
\class DASH_INF_S TINCO PARMA CALMA ROMEN HWESTA SULE
|
400
|
+
\class DASH_INF_L NUMEN MALTA UNGWE ANDO
|
401
|
+
\class DASH_INF_XL
|
402
|
+
\class LAMBE_MARK_TILD LAMBE
|
403
|
+
\end
|
404
|
+
|
405
|
+
\beg virtual ALVEOLAR_SIGN
|
406
|
+
\class SHOOK_LEFT_L CALMA
|
407
|
+
\class SHOOK_RIGHT_L TINCO PARMA
|
408
|
+
\end
|
409
|
+
|
327
410
|
|
@@ -22,14 +22,16 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
22
22
|
|
23
23
|
**\
|
24
24
|
|
25
|
-
|
26
|
-
|
25
|
+
\beg changelog
|
26
|
+
\entry "0.0.2", "Added option for o/u tehtar loop orientation"
|
27
|
+
\entry "0.0.3", "Normalizing to virtual chars"
|
28
|
+
\end
|
27
29
|
|
28
30
|
\** Adunaic mode for glaemscribe (MAY BE INCOMPLETE) **\
|
29
31
|
\language Adûnaic
|
30
32
|
\writing Tengwar
|
31
33
|
\mode Glaemscrafu
|
32
|
-
\version 0.0.
|
34
|
+
\version 0.0.3
|
33
35
|
\authors "Talagan (Benjamin Babut)"
|
34
36
|
|
35
37
|
\charset tengwar_ds true
|
@@ -97,50 +99,32 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
97
99
|
{OOU} === {OO}{U} \** ôu **\
|
98
100
|
|
99
101
|
\if "reverse_o_u_tehtar == U_UP_O_DOWN"
|
100
|
-
|
101
|
-
|
102
|
-
{O_LOOP_L} === O_TEHTA_L
|
103
|
-
{O_LOOP_XL} === O_TEHTA_XL
|
104
|
-
{U_LOOP_XS} === U_TEHTA_XS
|
105
|
-
{U_LOOP_S} === U_TEHTA_S
|
106
|
-
{U_LOOP_L} === U_TEHTA_L
|
107
|
-
{U_LOOP_XL} === U_TEHTA_XL
|
102
|
+
{O_LOOP} === O_TEHTA
|
103
|
+
{U_LOOP} === U_TEHTA
|
108
104
|
\else
|
109
|
-
|
110
|
-
|
111
|
-
{O_LOOP_L} === U_TEHTA_L
|
112
|
-
{O_LOOP_XL} === U_TEHTA_XL
|
113
|
-
{U_LOOP_XS} === O_TEHTA_XS
|
114
|
-
{U_LOOP_S} === O_TEHTA_S
|
115
|
-
{U_LOOP_L} === O_TEHTA_L
|
116
|
-
{U_LOOP_XL} === O_TEHTA_XL
|
105
|
+
{O_LOOP} === U_TEHTA
|
106
|
+
{U_LOOP} === O_TEHTA
|
117
107
|
\endif
|
118
108
|
|
119
|
-
{SDIPHTHONGS} === {AI}
|
120
|
-
{SDIPHTHENGS} === YANTA
|
109
|
+
{SDIPHTHONGS} === {AI} * {AU}
|
110
|
+
{SDIPHTHENGS} === YANTA A_TEHTA * URE A_TEHTA
|
121
111
|
|
122
|
-
{LDIPHTHONGS} === {AAI}
|
123
|
-
{LDIPHTHENGS} === ARA
|
112
|
+
{LDIPHTHONGS} === {AAI} * {AAU} * {EEI} * {EEU} * {OOI} * {OOU}
|
113
|
+
{LDIPHTHENGS} === ARA A_TEHTA YANTA * ARA A_TEHTA URE * ARA E_TEHTA YANTA * ARA E_TEHTA URE * ARA {O_LOOP} YANTA * ARA {O_LOOP} URE
|
124
114
|
|
125
|
-
{VOWELS} === {A}
|
126
|
-
{
|
127
|
-
{TEHTA__S} === A_TEHTA_S * E_TEHTA_S * I_TEHTA_S * {O_LOOP_S} * {U_LOOP_S}
|
128
|
-
{TEHTA__L} === A_TEHTA_L * E_TEHTA_L * I_TEHTA_L * {O_LOOP_L} * {U_LOOP_L}
|
129
|
-
{TEHTA_XL} === A_TEHTA_XL * E_TEHTA_XL * I_TEHTA_XL * {O_LOOP_XL} * {U_LOOP_XL}
|
115
|
+
{VOWELS} === {A} * {E} * {I} * {O} * {U}
|
116
|
+
{_TEHTAR_} === A_TEHTA * E_TEHTA * I_TEHTA * {O_LOOP} * {U_LOOP}
|
130
117
|
|
131
|
-
{LVOWELS} === {AA}
|
132
|
-
{LVOWTNG} === ARA
|
118
|
+
{LVOWELS} === {AA} * {EE} * {II} * {OO} * {UU}
|
119
|
+
{LVOWTNG} === ARA A_TEHTA * ARA E_TEHTA * ARA I_TEHTA * ARA {O_LOOP} * ARA {U_LOOP}
|
133
120
|
|
134
121
|
\** Let' put all vowels/diphthongs in the same basket **\
|
135
122
|
{V_D} === [ {VOWELS} * {LVOWELS} * {SDIPHTHONGS} * {LDIPHTHONGS} ]
|
136
123
|
\** And their images... **\
|
137
|
-
{
|
138
|
-
|
139
|
-
{
|
140
|
-
{
|
141
|
-
|
142
|
-
[{VOWELS}] --> TELCO [{TEHTA_XS}] \** Replace isolated short vowels **\
|
143
|
-
[{LVOWELS}] --> [{LVOWTNG}] \** Replace long vowels **\
|
124
|
+
{_V_D_} === [ {_TEHTAR_} * {LVOWTNG} * {SDIPHTHENGS} * {LDIPHTHENGS} ]
|
125
|
+
|
126
|
+
[{VOWELS}] --> TELCO [{_TEHTAR_}] \** Replace isolated short vowels **\
|
127
|
+
[{LVOWELS}] --> [{LVOWTNG}] \** Replace long vowels **\
|
144
128
|
[{SDIPHTHONGS}] --> [{SDIPHTHENGS}] \** Replace short diphthongs **\
|
145
129
|
[{LDIPHTHONGS}] --> [{LDIPHTHENGS}] \** Replace long diphthongs **\
|
146
130
|
|
@@ -150,64 +134,64 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
150
134
|
{K} === (c,k)
|
151
135
|
{V} === (v,w)
|
152
136
|
|
153
|
-
{L1_S} === {K} * p * t * {K}{K}
|
154
|
-
{L1_T} === QUESSE * PARMA * TINCO * CALMA
|
137
|
+
{L1_S} === {K} * p * t * {K}{K} * pp * tt
|
138
|
+
{L1_T} === QUESSE * PARMA * TINCO * CALMA GEMINATE_SIGN * PARMA GEMINATE_SIGN * TINCO GEMINATE_SIGN
|
155
139
|
|
156
140
|
[{L1_S}] --> [ {L1_T} ]
|
157
|
-
[{L1_S}]{V_D} --> [ {L1_T} ]{
|
141
|
+
[{L1_S}]{V_D} --> [ {L1_T} ]{_V_D_}
|
158
142
|
|
159
|
-
{L2_S} === d * b * g * dd
|
160
|
-
{L2_T} === ANDO * UMBAR * UNGWE * ANDO
|
143
|
+
{L2_S} === d * b * g * dd * bb * gg
|
144
|
+
{L2_T} === ANDO * UMBAR * UNGWE * ANDO GEMINATE_SIGN * UMBAR GEMINATE_SIGN * UNGWE GEMINATE_SIGN
|
161
145
|
[{L2_S}] --> [{L2_T}]
|
162
|
-
[{L2_S}]{V_D} --> [{L2_T}]{
|
146
|
+
[{L2_S}]{V_D} --> [{L2_T}]{_V_D_}
|
163
147
|
|
164
148
|
\** Alignment of tehta is not the same in the font **\
|
165
149
|
\** So we need to split the third line unfortunately **\
|
166
|
-
{L3_1_S} === th * ph * (t,th)th
|
167
|
-
{L3_1_T} === SULE * FORMEN * SULE
|
150
|
+
{L3_1_S} === th * ph * (t,th)th * (p,ph)ph * (t,th)ph * (k,kh)ph * (p,ph)th * (k,kh)th
|
151
|
+
{L3_1_T} === SULE * FORMEN * SULE GEMINATE_SIGN * FORMEN GEMINATE_SIGN * SULE FORMEN * HWESTA FORMEN * FORMEN SULE * HWESTA SULE
|
168
152
|
|
169
|
-
{L3_2_S} === sh * kh * (k,kh)kh
|
170
|
-
{L3_2_T} === AHA * HWESTA * HWESTA
|
153
|
+
{L3_2_S} === sh * kh * (k,kh)kh * (p,ph)kh * (t,th)kh
|
154
|
+
{L3_2_T} === AHA * HWESTA * HWESTA GEMINATE_SIGN * FORMEN HWESTA * SULE HWESTA
|
171
155
|
|
172
156
|
[{L3_1_S}] --> [{L3_1_T}]
|
173
|
-
[{L3_1_S}]{V_D} --> [{L3_1_T}]{
|
157
|
+
[{L3_1_S}]{V_D} --> [{L3_1_T}]{_V_D_}
|
174
158
|
[{L3_2_S}] --> [{L3_2_T}]
|
175
|
-
[{L3_2_S}]{V_D} --> [{L3_2_T}]{
|
159
|
+
[{L3_2_S}]{V_D} --> [{L3_2_T}]{_V_D_}
|
176
160
|
|
177
161
|
{L4_S} === nd * mb * ng
|
178
162
|
{L4_T} === ANTO * AMPA * UNQUE
|
179
163
|
[{L4_S}] --> [{L4_T}]
|
180
|
-
[{L4_S}]{V_D} --> [{L4_T}]{
|
164
|
+
[{L4_S}]{V_D} --> [{L4_T}]{_V_D_}
|
181
165
|
|
182
|
-
{L5_S} === n * m * nn
|
183
|
-
{L5_T} === NUMEN * MALTA * NUMEN
|
166
|
+
{L5_S} === n * m * nn * mm
|
167
|
+
{L5_T} === NUMEN * MALTA * NUMEN GEMINATE_SIGN * MALTA GEMINATE_SIGN
|
184
168
|
[{L5_S}] --> [{L5_T}]
|
185
|
-
[{L5_S}]{V_D} --> [{L5_T}]{
|
169
|
+
[{L5_S}]{V_D} --> [{L5_T}]{_V_D_}
|
186
170
|
|
187
|
-
{L6_S} === {V} * y * rr
|
188
|
-
{L6_T} === VALA * ANNA * ROMEN
|
171
|
+
{L6_S} === {V} * y * rr * {V}{V} * yy
|
172
|
+
{L6_T} === VALA * ANNA * ROMEN GEMINATE_SIGN * VALA GEMINATE_SIGN * ANNA GEMINATE_SIGN
|
189
173
|
[r * {L6_S}] --> [ ORE * {L6_T}]
|
190
|
-
[r * {L6_S}]{V_D} --> [ ROMEN * {L6_T}]{
|
174
|
+
[r * {L6_S}]{V_D} --> [ ROMEN * {L6_T}]{_V_D_}
|
191
175
|
|
192
176
|
\** This one is not useful (redundant with higher) **\
|
193
177
|
\** Keep it for clarity of mind **\
|
194
178
|
r_ --> ORE
|
195
179
|
|
196
|
-
s{V_D} --> SILME_NUQUERNA {
|
180
|
+
s{V_D} --> SILME_NUQUERNA {_V_D_} \** Before a vowel goes down **\
|
197
181
|
s --> SILME \** Any other pos, up **\
|
198
|
-
z{V_D} --> ESSE_NUQUERNA {
|
182
|
+
z{V_D} --> ESSE_NUQUERNA {_V_D_} \** Before a vowel goes down **\
|
199
183
|
z --> ESSE \** Any other pos, up **\
|
200
184
|
|
201
|
-
h{V_D} --> HYARMEN {
|
185
|
+
h{V_D} --> HYARMEN {_V_D_}
|
202
186
|
h --> HYARMEN
|
203
|
-
hh{V_D} --> HYARMEN
|
204
|
-
hh --> HYARMEN
|
187
|
+
hh{V_D} --> HYARMEN GEMINATE_SIGN {_V_D_}
|
188
|
+
hh --> HYARMEN GEMINATE_SIGN
|
205
189
|
|
206
|
-
l{V_D} --> LAMBE {
|
190
|
+
l{V_D} --> LAMBE {_V_D_}
|
207
191
|
l --> LAMBE
|
208
192
|
|
209
|
-
ll{V_D} --> LAMBE
|
210
|
-
ll --> LAMBE
|
193
|
+
ll{V_D} --> LAMBE GEMINATE_SIGN {_V_D_}
|
194
|
+
ll --> LAMBE GEMINATE_SIGN
|
211
195
|
|
212
196
|
\end
|
213
197
|
|
@@ -276,3 +260,6 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
276
260
|
|
277
261
|
\end
|
278
262
|
|
263
|
+
\beg postprocessor
|
264
|
+
\resolve_virtuals
|
265
|
+
\end
|
@@ -22,8 +22,6 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
22
22
|
|
23
23
|
**\
|
24
24
|
|
25
|
-
\** Changelog **\
|
26
|
-
\**
|
27
25
|
\beg changelog
|
28
26
|
\entry "0.0.2", "added χ for the word χarina, correcting ts/ps sequences to work better with eldamar"
|
29
27
|
\entry "0.0.3", "added o/u curl option"
|
@@ -37,11 +35,11 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
37
35
|
\entry "0.1.1", "Added default option for voiced plosives : use mb, nd, ng, ngw"
|
38
36
|
\entry "0.1.2", "Added a tehta shape selection"
|
39
37
|
\entry "0.1.3", "Fixing ks, ps, ts. Fixing dot under ore, romen in implicit a mode."
|
38
|
+
\entry "0.1.4", "Conforming to the new csub format. Cleaning with new csub classes."
|
39
|
+
\entry "0.1.5", "csub removed. Now using virtual chars defined in charsets."
|
40
40
|
\end
|
41
|
-
**\
|
42
41
|
|
43
42
|
\**
|
44
|
-
TODO : Use the new csub directive for all diacritics/signs that could have variants (e.g. THINF_DDOT (LAMBE_MARK_DDOT for LAMBE), DASH_INF (LAMBE_MARK_TILD for LAMBE), SHOOK_LEFT)
|
45
43
|
TODO : Option for dot or not in 'a implicit' option before long vowels ?
|
46
44
|
TODO : bb, dd etc ? (for noobs)
|
47
45
|
**\
|
@@ -49,7 +47,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
49
47
|
\language "Quenya"
|
50
48
|
\writing "Tengwar"
|
51
49
|
\mode "Classical"
|
52
|
-
\version "0.1.
|
50
|
+
\version "0.1.5"
|
53
51
|
\authors "Talagan (Benjamin Babut)"
|
54
52
|
|
55
53
|
\charset tengwar_ds true
|
@@ -134,7 +132,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
134
132
|
|
135
133
|
\if implicit_a
|
136
134
|
{_A_} === {NULL}
|
137
|
-
{_NVOWEL_} ===
|
135
|
+
{_NVOWEL_} === NO_VOWEL
|
138
136
|
\else
|
139
137
|
{_A_} === {A_SHAPE}
|
140
138
|
{_NVOWEL_} === {NULL}
|
@@ -192,20 +190,20 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
192
190
|
{_L1_} === TINCO * PARMA * CALMA * QUESSE
|
193
191
|
|
194
192
|
\** GEMINATED **\
|
195
|
-
{L1_1_GEMS} === tt * pp
|
196
|
-
{_L1_1_GEMS_} === TINCO
|
193
|
+
{L1_1_GEMS} === tt * pp * {K}{K}
|
194
|
+
{_L1_1_GEMS_} === TINCO GEMINATE_SIGN * PARMA GEMINATE_SIGN * CALMA GEMINATE_SIGN
|
197
195
|
|
198
196
|
\** NORMAL **\
|
199
197
|
[ {L1} * {L1_1_GEMS} ] {V_D_WN} --> [ {_L1_} * {_L1_1_GEMS_} ] {_V_D_WN_}
|
200
198
|
|
201
199
|
\** OTHERS **\
|
202
|
-
ty{V_D_WN} --> TINCO
|
203
|
-
py{V_D_WN} --> PARMA
|
200
|
+
ty{V_D_WN} --> TINCO PALATAL_SIGN {_V_D_WN_}
|
201
|
+
py{V_D_WN} --> PARMA PALATAL_SIGN {_V_D_WN_}
|
204
202
|
|
205
|
-
ts{V_D_WN} --> TINCO {_V_D_WN_}
|
206
|
-
ps{V_D_WN} --> PARMA {_V_D_WN_}
|
207
|
-
{K}s{V_D_WN} --> CALMA
|
208
|
-
x{V_D_WN} --> CALMA
|
203
|
+
ts{V_D_WN} --> TINCO {_V_D_WN_} ALVEOLAR_SIGN
|
204
|
+
ps{V_D_WN} --> PARMA {_V_D_WN_} ALVEOLAR_SIGN
|
205
|
+
{K}s{V_D_WN} --> CALMA ALVEOLAR_SIGN {_V_D_WN_}
|
206
|
+
x{V_D_WN} --> CALMA ALVEOLAR_SIGN {_V_D_WN_} \** render ks for x **\
|
209
207
|
|
210
208
|
\** ===================== **\
|
211
209
|
\** 2ND LINE RULES **\
|
@@ -217,7 +215,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
217
215
|
[{L2}]{V_D_WN} --> [{_L2_}]{_V_D_WN_}
|
218
216
|
|
219
217
|
\** Palatalized **\
|
220
|
-
ndy{V_D_WN} --> ANDO
|
218
|
+
ndy{V_D_WN} --> ANDO PALATAL_SIGN {_V_D_WN_}
|
221
219
|
|
222
220
|
\** Have some rules for d,b,g,gw although there are not theoritically possible, aldudénie e.g needs it **\
|
223
221
|
{L2_UN} === d * b * g * gw
|
@@ -248,7 +246,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
248
246
|
[{L3}]{V_D_WN} --> [{_L3_}]{_V_D_WN_}
|
249
247
|
|
250
248
|
\** OTHERS **\
|
251
|
-
hy{V_D_WN} --> HYARMEN
|
249
|
+
hy{V_D_WN} --> HYARMEN PALATAL_SIGN {_V_D_WN_}
|
252
250
|
|
253
251
|
\** Override h with vowels (descendent of hy) **\
|
254
252
|
_h{V_D} --> HYARMEN {_V_D_}
|
@@ -265,7 +263,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
265
263
|
\** NORMAL **\
|
266
264
|
[{L4}]{V_D_WN} --> [{_L4_}]{_V_D_WN_}
|
267
265
|
\** OTHERS **\
|
268
|
-
nty{V_D_WN} --> ANTO
|
266
|
+
nty{V_D_WN} --> ANTO PALATAL_SIGN {_V_D_WN_}
|
269
267
|
|
270
268
|
\** ===================== **\
|
271
269
|
\** 5TH LINE RULES **\
|
@@ -275,10 +273,10 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
275
273
|
|
276
274
|
[{L5}]{V_D_WN} --> [{_L5_}]{_V_D_WN_}
|
277
275
|
|
278
|
-
ny{V_D_WN} --> NUMEN
|
279
|
-
nn{V_D_WN} --> NUMEN
|
280
|
-
my{V_D_WN} --> MALTA
|
281
|
-
mm{V_D_WN} --> MALTA
|
276
|
+
ny{V_D_WN} --> NUMEN PALATAL_SIGN {_V_D_WN_}
|
277
|
+
nn{V_D_WN} --> NUMEN GEMINATE_SIGN {_V_D_WN_}
|
278
|
+
my{V_D_WN} --> MALTA PALATAL_SIGN {_V_D_WN_}
|
279
|
+
mm{V_D_WN} --> MALTA GEMINATE_SIGN {_V_D_WN_}
|
282
280
|
|
283
281
|
\** ===================== **\
|
284
282
|
\** 6TH LINE RULES **\
|
@@ -291,7 +289,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
291
289
|
\endif
|
292
290
|
|
293
291
|
{L6} === r * v * y * w
|
294
|
-
{_L6_} === ROMEN * VALA * ANNA
|
292
|
+
{_L6_} === ROMEN * VALA * ANNA PALATAL_SIGN * VILYA
|
295
293
|
|
296
294
|
[{L6}]{V_D_WN} --> [{_L6_}]{_V_D_WN_}
|
297
295
|
|
@@ -301,8 +299,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
301
299
|
\** Override rule r + null **\
|
302
300
|
r --> {_LONE_R_} {_NVOWEL_}
|
303
301
|
|
304
|
-
rr{V_D_WN} --> ROMEN
|
305
|
-
ry{V_D_WN} --> ROMEN
|
302
|
+
rr{V_D_WN} --> ROMEN GEMINATE_SIGN {_V_D_WN_}
|
303
|
+
ry{V_D_WN} --> ROMEN PALATAL_SIGN {_V_D_WN_}
|
306
304
|
rd{V_D_WN} --> ARDA {_V_D_WN_}
|
307
305
|
|
308
306
|
\** ===================== **\
|
@@ -313,7 +311,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
313
311
|
|
314
312
|
[{LINE_L}]{V_D_WN} --> [{_LINE_L_}]{_V_D_WN_}
|
315
313
|
|
316
|
-
ly{V_D_WN} --> LAMBE
|
314
|
+
ly{V_D_WN} --> LAMBE PALATAL_SIGN {_V_D_WN_}
|
317
315
|
hl{V_D_WN} --> HALLA LAMBE {_V_D_WN_}
|
318
316
|
hr{V_D_WN} --> HALLA ROMEN {_V_D_WN_}
|
319
317
|
|
@@ -405,13 +403,6 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
405
403
|
\end
|
406
404
|
|
407
405
|
\beg postprocessor
|
408
|
-
|
409
|
-
\csub A_TEHTA "A_TEHTA_XS TELCO ARA" "A_TEHTA_S ORE TW_EXT_11 TW_EXT_12 SULE HYARMEN" "A_TEHTA_L TINCO CALMA PARMA QUESSE AHA HWESTA LAMBE ALDA YANTA URE ROMEN ARDA SILME_NUQUERNA ESSE_NUQUERNA VALA ANNA VILYA FORMEN" "A_TEHTA_XL TW_EXT_13 TW_EXT_14 TW_EXT_21 TW_EXT_22 TW_EXT_23 TW_EXT_24 ANDO UMBAR ANGA UNGWE ANTO AMPA ANCA UNQUE NUMEN MALTA NOLDO NWALME"
|
410
|
-
\csub A_TEHTA_CIRCUM "A_TEHTA_CIRCUM_XS TELCO ARA" "A_TEHTA_CIRCUM_S ORE TW_EXT_11 TW_EXT_12 SULE HYARMEN" "A_TEHTA_CIRCUM_L TINCO CALMA PARMA QUESSE AHA HWESTA LAMBE ALDA YANTA URE ROMEN ARDA SILME_NUQUERNA ESSE_NUQUERNA VALA ANNA VILYA FORMEN" "A_TEHTA_CIRCUM_XL TW_EXT_13 TW_EXT_14 TW_EXT_21 TW_EXT_22 TW_EXT_23 TW_EXT_24 ANDO UMBAR ANGA UNGWE ANTO AMPA ANCA UNQUE NUMEN MALTA NOLDO NWALME"
|
411
|
-
\csub E_TEHTA "E_TEHTA_XS TELCO ARA" "E_TEHTA_S ORE TW_EXT_11 TW_EXT_12 SULE HYARMEN" "E_TEHTA_L TINCO CALMA PARMA QUESSE AHA HWESTA LAMBE ALDA YANTA URE ROMEN ARDA SILME_NUQUERNA ESSE_NUQUERNA VALA ANNA VILYA FORMEN" "E_TEHTA_XL TW_EXT_13 TW_EXT_14 TW_EXT_21 TW_EXT_22 TW_EXT_23 TW_EXT_24 ANDO UMBAR ANGA UNGWE ANTO AMPA ANCA UNQUE NUMEN MALTA NOLDO NWALME"
|
412
|
-
\csub I_TEHTA "I_TEHTA_XS TELCO ARA" "I_TEHTA_S ORE TW_EXT_11 TW_EXT_12 SULE HYARMEN" "I_TEHTA_L TINCO CALMA PARMA QUESSE AHA HWESTA LAMBE ALDA YANTA URE ROMEN ARDA SILME_NUQUERNA ESSE_NUQUERNA VALA ANNA VILYA FORMEN" "I_TEHTA_XL TW_EXT_13 TW_EXT_14 TW_EXT_21 TW_EXT_22 TW_EXT_23 TW_EXT_24 ANDO UMBAR ANGA UNGWE ANTO AMPA ANCA UNQUE NUMEN MALTA NOLDO NWALME"
|
413
|
-
\csub O_TEHTA "O_TEHTA_XS TELCO ARA" "O_TEHTA_S ORE TW_EXT_11 TW_EXT_12 SULE HYARMEN" "O_TEHTA_L TINCO CALMA PARMA QUESSE AHA HWESTA LAMBE ALDA YANTA URE ROMEN ARDA SILME_NUQUERNA ESSE_NUQUERNA VALA ANNA VILYA FORMEN" "O_TEHTA_XL TW_EXT_13 TW_EXT_14 TW_EXT_21 TW_EXT_22 TW_EXT_23 TW_EXT_24 ANDO UMBAR ANGA UNGWE ANTO AMPA ANCA UNQUE NUMEN MALTA NOLDO NWALME"
|
414
|
-
\csub U_TEHTA "U_TEHTA_XS TELCO ARA" "U_TEHTA_S ORE TW_EXT_11 TW_EXT_12 SULE HYARMEN" "U_TEHTA_L TINCO CALMA PARMA QUESSE AHA HWESTA LAMBE ALDA YANTA URE ROMEN ARDA SILME_NUQUERNA ESSE_NUQUERNA VALA ANNA VILYA FORMEN" "U_TEHTA_XL TW_EXT_13 TW_EXT_14 TW_EXT_21 TW_EXT_22 TW_EXT_23 TW_EXT_24 ANDO UMBAR ANGA UNGWE ANTO AMPA ANCA UNQUE NUMEN MALTA NOLDO NWALME"
|
415
|
-
\csub THINF_DOT "THINF_DOT_XS TELCO ROMEN ARDA" "THINF_DOT_L ORE TW_EXT_11 TW_EXT_12 TINCO PARMA SULE FORMEN VALA ANNA VILYA SILME ESSE AHA HWESTA HYARMEN YANTA URE" "THINF_DOT_XL QUESSE CALMA TW_EXT_13 TW_EXT_14 TW_EXT_21 TW_EXT_22 TW_EXT_23 TW_EXT_24 ANDO UMBAR ANGA UNGWE ANTO AMPA ANCA UNQUE NUMEN MALTA NOLDO NWALME" "LAMBE_MARK_DOT LAMBE ALDA"
|
406
|
+
\resolve_virtuals
|
416
407
|
\end
|
417
408
|
|
data/lib/api/charset.rb
CHANGED
@@ -27,39 +27,121 @@ module Glaemscribe
|
|
27
27
|
|
28
28
|
attr_accessor :errors
|
29
29
|
attr_reader :chars
|
30
|
+
attr_reader :virtual_chars
|
30
31
|
|
31
32
|
class Char
|
32
33
|
attr_accessor :line
|
33
34
|
attr_accessor :code
|
34
35
|
attr_accessor :names
|
35
36
|
attr_accessor :str
|
37
|
+
attr_accessor :charset
|
36
38
|
|
37
39
|
def initialize
|
38
40
|
@names = {}
|
39
41
|
end
|
42
|
+
|
43
|
+
def virtual?
|
44
|
+
false
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
class VirtualChar
|
49
|
+
attr_accessor :line
|
50
|
+
attr_accessor :names
|
51
|
+
attr_accessor :classes
|
52
|
+
attr_accessor :charset
|
53
|
+
|
54
|
+
class VirtualClass
|
55
|
+
attr_accessor :target
|
56
|
+
attr_accessor :triggers
|
57
|
+
end
|
58
|
+
|
59
|
+
def initialize
|
60
|
+
@classes = {} # result_char_1 => [trigger_char_1, trigger_char_2 ...] , result_char_1 => ...
|
61
|
+
@lookup_table = {}
|
62
|
+
end
|
63
|
+
|
64
|
+
def str
|
65
|
+
VIRTUAL_CHAR_OUTPUT
|
66
|
+
end
|
67
|
+
|
68
|
+
def finalize
|
69
|
+
@lookup_table = {}
|
70
|
+
@classes.each{ |vc|
|
71
|
+
|
72
|
+
result_char = vc.target
|
73
|
+
trigger_chars = vc.triggers
|
74
|
+
|
75
|
+
trigger_chars.each{ |trigger_char|
|
76
|
+
found = @lookup_table[trigger_char]
|
77
|
+
if found
|
78
|
+
@charset.errors << Glaeml::Error.new(@line, "Trigger char #{trigger_char} found twice in virtual char.")
|
79
|
+
else
|
80
|
+
rc = @charset[result_char]
|
81
|
+
tc = @charset[trigger_char]
|
82
|
+
|
83
|
+
if rc.nil?
|
84
|
+
@charset.errors << Glaeml::Error.new(@line, "Trigger char #{trigger_char} points to unknown result char #{result_char}.")
|
85
|
+
elsif tc.nil?
|
86
|
+
@charset.errors << Glaeml::Error.new(@line, "Unknown trigger char #{trigger_char}.")
|
87
|
+
elsif tc.class == VirtualChar
|
88
|
+
@charset.errors << Glaeml::Error.new(@line, "Trigger char #{trigger_char} is virtual. This is not supported!")
|
89
|
+
elsif rc.class == VirtualChar
|
90
|
+
@charset.errors << Glaeml::Error.new(@line, "Trigger char #{trigger_char} points to another virtual char #{result_char}. This is not supported!")
|
91
|
+
else
|
92
|
+
tc.names.each{|trigger_char_name| # Don't forget to match all name variants for that trigger char!
|
93
|
+
@lookup_table[trigger_char_name] = rc
|
94
|
+
}
|
95
|
+
end
|
96
|
+
end
|
97
|
+
}
|
98
|
+
}
|
99
|
+
end
|
100
|
+
|
101
|
+
def [](trigger_char_name)
|
102
|
+
@lookup_table[trigger_char_name]
|
103
|
+
end
|
104
|
+
|
105
|
+
def virtual?
|
106
|
+
true
|
107
|
+
end
|
40
108
|
end
|
41
109
|
|
42
110
|
def initialize(name)
|
43
|
-
@name
|
44
|
-
@chars
|
45
|
-
@errors
|
111
|
+
@name = name
|
112
|
+
@chars = []
|
113
|
+
@errors = []
|
114
|
+
@virtual_chars = []
|
46
115
|
end
|
47
116
|
|
48
117
|
# Pass integer (utf8 num) and array (of strings)
|
49
118
|
def add_char(line, code, names)
|
50
119
|
return if names.empty? || names.include?("?") # Ignore characters with '?'
|
51
120
|
|
52
|
-
c
|
53
|
-
c.line
|
54
|
-
c.code
|
55
|
-
c.names
|
56
|
-
c.str
|
121
|
+
c = Char.new
|
122
|
+
c.line = line
|
123
|
+
c.code = code
|
124
|
+
c.names = names
|
125
|
+
c.str = code.chr('UTF-8')
|
126
|
+
c.charset = self
|
57
127
|
@chars << c
|
58
128
|
end
|
59
129
|
|
130
|
+
def add_virtual_char(line, classes, names)
|
131
|
+
return if names.empty? || names.include?("?") # Ignore characters with '?'
|
132
|
+
|
133
|
+
c = VirtualChar.new
|
134
|
+
c.line = line
|
135
|
+
c.names = names
|
136
|
+
c.classes = classes # We'll check errors in finalize
|
137
|
+
c.charset = self
|
138
|
+
@chars << c
|
139
|
+
end
|
140
|
+
|
60
141
|
def finalize
|
61
|
-
@errors
|
62
|
-
@lookup_table
|
142
|
+
@errors = []
|
143
|
+
@lookup_table = {}
|
144
|
+
@virtual_chars = []
|
63
145
|
|
64
146
|
@chars.each { |c|
|
65
147
|
c.names.each { |cname|
|
@@ -72,6 +154,13 @@ module Glaemscribe
|
|
72
154
|
}
|
73
155
|
}
|
74
156
|
|
157
|
+
@chars.each{ |c|
|
158
|
+
if c.class == VirtualChar
|
159
|
+
c.finalize
|
160
|
+
@virtual_chars << c
|
161
|
+
end
|
162
|
+
}
|
163
|
+
|
75
164
|
API::Debug::log("Finalized charset '#{@name}', #{@lookup_table.count} symbols loaded.")
|
76
165
|
end
|
77
166
|
|
data/lib/api/charset_parser.rb
CHANGED
@@ -45,6 +45,19 @@ module Glaemscribe
|
|
45
45
|
names = char_element.args[1..-1].map{|cname| cname.strip }.reject{ |cname| cname.empty? }
|
46
46
|
@charset.add_char(char_element.line,code,names)
|
47
47
|
}
|
48
|
+
|
49
|
+
doc.root_node.gpath("virtual").each { |virtual_element|
|
50
|
+
names = virtual_element.args
|
51
|
+
classes = []
|
52
|
+
virtual_element.gpath("class").each { |class_element|
|
53
|
+
vc = Charset::VirtualChar::VirtualClass.new
|
54
|
+
vc.target = class_element.args[0]
|
55
|
+
vc.triggers = class_element.args[1..-1].map{|cname| cname.strip }.reject{ |cname| cname.empty? }
|
56
|
+
classes << vc
|
57
|
+
}
|
58
|
+
@charset.add_virtual_char(virtual_element.line,classes,names)
|
59
|
+
}
|
60
|
+
|
48
61
|
@charset.finalize
|
49
62
|
|
50
63
|
@charset
|
data/lib/api/constants.rb
CHANGED
data/lib/api/glaeml.rb
CHANGED
@@ -67,6 +67,13 @@ module Glaemscribe
|
|
67
67
|
@children = []
|
68
68
|
end
|
69
69
|
|
70
|
+
# Make our object clonable
|
71
|
+
def initialize_copy(other)
|
72
|
+
super
|
73
|
+
@args = other.args.clone
|
74
|
+
@children = other.children.map{|c| c.clone}
|
75
|
+
end
|
76
|
+
|
70
77
|
def pathfind_crawl(apath, found)
|
71
78
|
|
72
79
|
children.each{ |c|
|
data/lib/api/mode_parser.rb
CHANGED
@@ -181,7 +181,7 @@ module Glaemscribe
|
|
181
181
|
if !operator_class
|
182
182
|
@mode.errors << Glaeml::Error.new(element.line,"Operator #{operator_name} is unknown.")
|
183
183
|
else
|
184
|
-
term.operators << operator_class.new(element.
|
184
|
+
term.operators << operator_class.new(element.clone)
|
185
185
|
end
|
186
186
|
}
|
187
187
|
|
@@ -0,0 +1,75 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
# the transcription of texts between writing systems, and more
|
5
|
+
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
# invented languages to some of his devised writing systems.
|
7
|
+
#
|
8
|
+
# Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
#
|
10
|
+
# This program is free software: you can redistribute it and/or modify
|
11
|
+
# it under the terms of the GNU Affero General Public License as published by
|
12
|
+
# the Free Software Foundation, either version 3 of the License, or
|
13
|
+
# any later version.
|
14
|
+
#
|
15
|
+
# This program is distributed in the hope that it will be useful,
|
16
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
# GNU Affero General Public License for more details.
|
19
|
+
#
|
20
|
+
# You should have received a copy of the GNU Affero General Public License
|
21
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
|
23
|
+
module Glaemscribe
|
24
|
+
module API
|
25
|
+
|
26
|
+
class ResolveVirtualsPostProcessorOperator < PostProcessorOperator
|
27
|
+
|
28
|
+
def finalize(trans_options)
|
29
|
+
super(trans_options)
|
30
|
+
@last_triggers = {} # Allocate the lookup here to optimize
|
31
|
+
end
|
32
|
+
|
33
|
+
def reset_trigger_states(charset)
|
34
|
+
# For each virtual char in charset, maintain a state.
|
35
|
+
charset.virtual_chars.each{ |vc|
|
36
|
+
@last_triggers[vc] = nil # Clear the state
|
37
|
+
}
|
38
|
+
end
|
39
|
+
|
40
|
+
def apply(tokens,charset)
|
41
|
+
|
42
|
+
reset_trigger_states(charset)
|
43
|
+
|
44
|
+
tokens.each_with_index{ |token,idx|
|
45
|
+
|
46
|
+
if token == '*SPACE'
|
47
|
+
reset_trigger_states(charset)
|
48
|
+
next
|
49
|
+
end
|
50
|
+
|
51
|
+
# Check if token is a virtual char
|
52
|
+
c = charset[token]
|
53
|
+
next if c.nil? # May happen for empty tokens
|
54
|
+
if c.virtual?
|
55
|
+
# Try to replace
|
56
|
+
last_trigger = @last_triggers[c]
|
57
|
+
if last_trigger != nil
|
58
|
+
tokens[idx] = last_trigger.names.first # Take the first name of the non-virtual replacement.
|
59
|
+
end
|
60
|
+
else
|
61
|
+
# Update states of virtual classes
|
62
|
+
charset.virtual_chars.each{|vc|
|
63
|
+
rc = vc[token]
|
64
|
+
@last_triggers[vc] = rc if rc != nil
|
65
|
+
}
|
66
|
+
end
|
67
|
+
}
|
68
|
+
tokens
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
ResourceManager::register_post_processor_class("resolve_virtuals", ResolveVirtualsPostProcessorOperator)
|
73
|
+
|
74
|
+
end
|
75
|
+
end
|
@@ -26,10 +26,10 @@ module Glaemscribe
|
|
26
26
|
class ElvishNumbersPreProcessorOperator < PreProcessorOperator
|
27
27
|
|
28
28
|
def apply(l)
|
29
|
-
base = args[0]
|
29
|
+
base = finalized_glaeml_element.args[0]
|
30
30
|
base = (base)?(base.to_i):(12)
|
31
31
|
|
32
|
-
reverse = args[1]
|
32
|
+
reverse = finalized_glaeml_element.args[1]
|
33
33
|
reverse = (reverse != nil)?(reverse == "true" || reverse == true):(true)
|
34
34
|
|
35
35
|
l.gsub(/\d+/) { |f|
|
@@ -27,8 +27,8 @@ module Glaemscribe
|
|
27
27
|
class RxSubstitutePreProcessorOperator < PreProcessorOperator
|
28
28
|
|
29
29
|
def apply(l)
|
30
|
-
what = /#{
|
31
|
-
with =
|
30
|
+
what = /#{finalized_glaeml_element.args[0]}/
|
31
|
+
with = finalized_glaeml_element.args[1]
|
32
32
|
l.gsub(what, with)
|
33
33
|
end
|
34
34
|
|
@@ -26,8 +26,8 @@ module Glaemscribe
|
|
26
26
|
class SubstitutePreProcessorOperator < PreProcessorOperator
|
27
27
|
|
28
28
|
def apply(l)
|
29
|
-
what =
|
30
|
-
with =
|
29
|
+
what = finalized_glaeml_element.args[0]
|
30
|
+
with = finalized_glaeml_element.args[1]
|
31
31
|
l.gsub(what, with)
|
32
32
|
end
|
33
33
|
end
|
@@ -26,11 +26,11 @@ module Glaemscribe
|
|
26
26
|
class UpDownTehtaSplitPreProcessorOperator < PreProcessorOperator
|
27
27
|
|
28
28
|
attr_reader :vowel_list, :consonant_list
|
29
|
-
def
|
30
|
-
super(
|
29
|
+
def finalize(trans_options)
|
30
|
+
super(trans_options)
|
31
31
|
|
32
|
-
vowel_list = args[0]
|
33
|
-
consonant_list = args[1]
|
32
|
+
vowel_list = finalized_glaeml_element.args[0]
|
33
|
+
consonant_list = finalized_glaeml_element.args[1]
|
34
34
|
|
35
35
|
vowel_list = vowel_list.split(/,/).map{|s| s.strip}
|
36
36
|
consonant_list = consonant_list.split(/,/).map{|s| s.strip}
|
@@ -24,11 +24,11 @@ module Glaemscribe
|
|
24
24
|
module API
|
25
25
|
|
26
26
|
class PrePostProcessorOperator
|
27
|
-
attr_reader :
|
28
|
-
attr_reader :
|
27
|
+
attr_reader :glaeml_element
|
28
|
+
attr_reader :finalized_glaeml_element
|
29
29
|
|
30
|
-
def initialize(
|
31
|
-
@
|
30
|
+
def initialize(glaeml_element)
|
31
|
+
@glaeml_element = glaeml_element
|
32
32
|
end
|
33
33
|
|
34
34
|
def eval_arg(arg, trans_options)
|
@@ -40,11 +40,16 @@ module Glaemscribe
|
|
40
40
|
return arg
|
41
41
|
end
|
42
42
|
|
43
|
-
def
|
44
|
-
|
45
|
-
|
46
|
-
|
43
|
+
def finalize_glaeml_element(ge, trans_options)
|
44
|
+
ge.args.map! { |arg| eval_arg(arg, trans_options) }
|
45
|
+
ge.children.each{ |child|
|
46
|
+
finalize_glaeml_element(child, trans_options)
|
47
47
|
}
|
48
|
+
ge
|
49
|
+
end
|
50
|
+
|
51
|
+
def finalize(trans_options)
|
52
|
+
@finalized_glaeml_element = finalize_glaeml_element(@glaeml_element.clone, trans_options)
|
48
53
|
end
|
49
54
|
|
50
55
|
def apply
|
@@ -122,7 +127,7 @@ module Glaemscribe
|
|
122
127
|
|
123
128
|
# Apply filters
|
124
129
|
@operators.each{ |operator|
|
125
|
-
tokens = operator.apply(tokens)
|
130
|
+
tokens = operator.apply(tokens,out_charset)
|
126
131
|
}
|
127
132
|
|
128
133
|
# Convert output
|
data/lib/glaemscribe.rb
CHANGED
@@ -65,7 +65,7 @@ module Glaemscribe
|
|
65
65
|
require API_PATH + "/api/pre_processor/rxsubstitute.rb"
|
66
66
|
require API_PATH + "/api/pre_processor/up_down_tehta_split.rb"
|
67
67
|
require API_PATH + "/api/post_processor/reverse.rb"
|
68
|
-
require API_PATH + "/api/post_processor/
|
68
|
+
require API_PATH + "/api/post_processor/resolve_virtuals.rb"
|
69
69
|
|
70
70
|
end
|
71
71
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: glaemscribe
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.16
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Benjamin 'Talagan' Babut
|
@@ -72,7 +72,7 @@ files:
|
|
72
72
|
- lib/api/mode.rb
|
73
73
|
- lib/api/mode_parser.rb
|
74
74
|
- lib/api/option.rb
|
75
|
-
- lib/api/post_processor/
|
75
|
+
- lib/api/post_processor/resolve_virtuals.rb
|
76
76
|
- lib/api/post_processor/reverse.rb
|
77
77
|
- lib/api/pre_processor/downcase.rb
|
78
78
|
- lib/api/pre_processor/elvish_numbers.rb
|
@@ -110,7 +110,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
110
110
|
version: '0'
|
111
111
|
requirements: []
|
112
112
|
rubyforge_project:
|
113
|
-
rubygems_version: 2.
|
113
|
+
rubygems_version: 2.4.8
|
114
114
|
signing_key:
|
115
115
|
specification_version: 4
|
116
116
|
summary: Glǽmscribe
|
@@ -1,64 +0,0 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
#
|
3
|
-
# Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
-
# the transcription of texts between writing systems, and more
|
5
|
-
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
-
# invented languages to some of his devised writing systems.
|
7
|
-
#
|
8
|
-
# Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
-
#
|
10
|
-
# This program is free software: you can redistribute it and/or modify
|
11
|
-
# it under the terms of the GNU Affero General Public License as published by
|
12
|
-
# the Free Software Foundation, either version 3 of the License, or
|
13
|
-
# any later version.
|
14
|
-
#
|
15
|
-
# This program is distributed in the hope that it will be useful,
|
16
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
-
# GNU Affero General Public License for more details.
|
19
|
-
#
|
20
|
-
# You should have received a copy of the GNU Affero General Public License
|
21
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
-
|
23
|
-
module Glaemscribe
|
24
|
-
module API
|
25
|
-
|
26
|
-
class CSubPostProcessorOperator < PostProcessorOperator
|
27
|
-
attr_reader :matcher
|
28
|
-
attr_reader :triggers
|
29
|
-
|
30
|
-
def initialize(args)
|
31
|
-
super(args)
|
32
|
-
|
33
|
-
# Build our operator
|
34
|
-
@matcher = self.raw_args[0]
|
35
|
-
@triggers = Hash.new
|
36
|
-
|
37
|
-
self.raw_args.each{ |arg|
|
38
|
-
|
39
|
-
splitted = arg.split()
|
40
|
-
replacer = splitted.shift()
|
41
|
-
|
42
|
-
splitted.each{ |token|
|
43
|
-
@triggers[token] = replacer
|
44
|
-
}
|
45
|
-
}
|
46
|
-
end
|
47
|
-
|
48
|
-
def apply(tokens)
|
49
|
-
last_trigger_replacer = nil
|
50
|
-
tokens.each_with_index{ |token,idx|
|
51
|
-
if token == @matcher && last_trigger_replacer != nil
|
52
|
-
tokens[idx] = last_trigger_replacer
|
53
|
-
elsif @triggers[token] != nil
|
54
|
-
last_trigger_replacer = @triggers[token]
|
55
|
-
end
|
56
|
-
}
|
57
|
-
tokens
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
ResourceManager::register_post_processor_class("csub", CSubPostProcessorOperator)
|
62
|
-
|
63
|
-
end
|
64
|
-
end
|