glaemscribe 1.0.15 → 1.0.16
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/glaemscribe +10 -3
- data/glaemresources/charsets/tengwar_ds.cst +84 -1
- data/glaemresources/modes/adunaic.glaem +51 -64
- data/glaemresources/modes/quenya.glaem +24 -33
- data/lib/api/charset.rb +99 -10
- data/lib/api/charset_parser.rb +13 -0
- data/lib/api/constants.rb +1 -0
- data/lib/api/glaeml.rb +7 -0
- data/lib/api/mode_parser.rb +1 -1
- data/lib/api/post_processor/resolve_virtuals.rb +75 -0
- data/lib/api/post_processor/reverse.rb +1 -1
- data/lib/api/pre_processor/elvish_numbers.rb +2 -2
- data/lib/api/pre_processor/rxsubstitute.rb +2 -2
- data/lib/api/pre_processor/substitute.rb +2 -2
- data/lib/api/pre_processor/up_down_tehta_split.rb +4 -4
- data/lib/api/transcription_pre_post_processor.rb +14 -9
- data/lib/glaemscribe.rb +1 -1
- metadata +3 -3
- data/lib/api/post_processor/csub.rb +0 -64
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5aa0d9e7fab6fa5fe2c50f84c90fdcfb94bd423b
|
4
|
+
data.tar.gz: 5fffb9618dd05644bee52429073a44d0d40af0d5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 764a4736e61caa7a8cf485741f65a24be731d9db2a834160cf25321741a9f27cbb7c3f7d1b9c9bf7f5945891fe4827527a38709ae2f85ac422f2030f67612c80
|
7
|
+
data.tar.gz: 0b83ebe3596cb8313c2c7b5e2f2df5c9ce7b7c5912f0c4a32d3c6fd4156eaa3e977623a31c494581d0342fe2aae342cdf36644d5dbfffc588277315ce91638b5
|
data/bin/glaemscribe
CHANGED
@@ -107,7 +107,7 @@ command :transcribe do |c|
|
|
107
107
|
|
108
108
|
c.syntax = 'glaemscribe transcribe file [options]'
|
109
109
|
c.summary = 'Transcribes a file (default command)'
|
110
|
-
c.description = "Transcribes a file with the given options. You can use '
|
110
|
+
c.description = "Transcribes a file with the given options. You can use 'STDIN' instead of a file name to work with stdin."
|
111
111
|
|
112
112
|
c.option '-m', '--mode mode', String, "The name of the embedded mode to use. See the 'list' command to get a list of available modes names."
|
113
113
|
c.option '-c', '--charset charset', String, 'The name of the charset to use. If not given, glaemscribe will load and use the default charset defined in the mode.'
|
@@ -224,7 +224,7 @@ command :transcribe do |c|
|
|
224
224
|
|
225
225
|
# Ready for transcription ...
|
226
226
|
|
227
|
-
if(filename == "
|
227
|
+
if(filename == "STDIN")
|
228
228
|
perxit "Opened in stdin mode, waiting for input..."
|
229
229
|
begin
|
230
230
|
while to_transcribe = STDIN.gets
|
@@ -302,7 +302,14 @@ command :info do |c|
|
|
302
302
|
puts "#{$terminal.color "Human Name" , :bold} : #{mode.human_name}"
|
303
303
|
puts "#{$terminal.color "Authors " , :bold} : #{mode.authors}"
|
304
304
|
puts "#{$terminal.color "Version " , :bold} : #{mode.version}"
|
305
|
-
|
305
|
+
puts ""
|
306
|
+
puts "#{$terminal.color "Options " , :bold} :"
|
307
|
+
mode.options.each { |name,option|
|
308
|
+
puts " #{$terminal.color name, :bold} (#{option.type}) [#{option.default_value_name}]"
|
309
|
+
option.values.each{ |oname,ovalue|
|
310
|
+
puts " #{$terminal.color oname, :bold}"
|
311
|
+
}
|
312
|
+
}
|
306
313
|
end
|
307
314
|
|
308
315
|
end
|
@@ -31,7 +31,9 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
31
31
|
mainly, tehtar variants. These characters are only virtual, they do not really exist in DS based fonts.
|
32
32
|
**\
|
33
33
|
|
34
|
-
\** ☢
|
34
|
+
\** ☢
|
35
|
+
\char 2622 A_TEHTA A_TEHTA_CIRCUM E_TEHTA I_TEHTA O_TEHTA U_TEHTA THSUP_TICK_INV THSUP_LAMBDA THSUP_TICK THINF_CURL THSUP_SEV THINF_DOT THINF_DDOT THINF_TDOT THINF_STROKE THINF_DSTROKE DASH_INF SHOOK_LEFT SHOOK_RIGHT
|
36
|
+
**\
|
35
37
|
|
36
38
|
\** **\ \char 20 SPACE
|
37
39
|
\** ! **\ \char 21 TW_EXT_11
|
@@ -324,4 +326,85 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
324
326
|
\** 倦 **\ \char 5026 ?
|
325
327
|
\** 倰 **\ \char 5030 ?
|
326
328
|
\** 倹 **\ \char 5039 ?
|
329
|
+
|
330
|
+
|
331
|
+
\** The following virtual chars are used to handle tehtar (& the like) multiple version chosing **\
|
332
|
+
\** It could be avoided with modern fonts with gsub/gpos tables for ligatures and diacritics **\
|
333
|
+
\** placement **\
|
334
|
+
|
335
|
+
\** TODO : Move FORMEN to S, move HYARMEN to XS **\
|
336
|
+
|
337
|
+
\beg virtual A_TEHTA
|
338
|
+
\class A_TEHTA_XS TELCO ARA
|
339
|
+
\class A_TEHTA_S ORE TW_EXT_11 TW_EXT_12 SULE HYARMEN
|
340
|
+
\class A_TEHTA_L TINCO CALMA PARMA QUESSE AHA HWESTA LAMBE ALDA YANTA URE ROMEN ARDA SILME_NUQUERNA ESSE_NUQUERNA VALA ANNA VILYA FORMEN
|
341
|
+
\class A_TEHTA_XL TW_EXT_13 TW_EXT_14 TW_EXT_21 TW_EXT_22 TW_EXT_23 TW_EXT_24 ANDO UMBAR ANGA UNGWE ANTO AMPA ANCA UNQUE NUMEN MALTA NOLDO NWALME
|
342
|
+
\end
|
343
|
+
|
344
|
+
\beg virtual A_TEHTA_CIRCUM
|
345
|
+
\class A_TEHTA_CIRCUM_XS TELCO ARA
|
346
|
+
\class A_TEHTA_CIRCUM_S ORE TW_EXT_11 TW_EXT_12 SULE HYARMEN
|
347
|
+
\class A_TEHTA_CIRCUM_L TINCO CALMA PARMA QUESSE AHA HWESTA LAMBE ALDA YANTA URE ROMEN ARDA SILME_NUQUERNA ESSE_NUQUERNA VALA ANNA VILYA FORMEN
|
348
|
+
\class A_TEHTA_CIRCUM_XL TW_EXT_13 TW_EXT_14 TW_EXT_21 TW_EXT_22 TW_EXT_23 TW_EXT_24 ANDO UMBAR ANGA UNGWE ANTO AMPA ANCA UNQUE NUMEN MALTA NOLDO NWALME
|
349
|
+
\end
|
350
|
+
|
351
|
+
\beg virtual E_TEHTA
|
352
|
+
\class E_TEHTA_XS TELCO ARA
|
353
|
+
\class E_TEHTA_S ORE TW_EXT_11 TW_EXT_12 SULE HYARMEN
|
354
|
+
\class E_TEHTA_L TINCO CALMA PARMA QUESSE AHA HWESTA LAMBE ALDA YANTA URE ROMEN ARDA SILME_NUQUERNA ESSE_NUQUERNA VALA ANNA VILYA FORMEN
|
355
|
+
\class E_TEHTA_XL TW_EXT_13 TW_EXT_14 TW_EXT_21 TW_EXT_22 TW_EXT_23 TW_EXT_24 ANDO UMBAR ANGA UNGWE ANTO AMPA ANCA UNQUE NUMEN MALTA NOLDO NWALME
|
356
|
+
\end
|
357
|
+
|
358
|
+
\beg virtual I_TEHTA
|
359
|
+
\class I_TEHTA_XS TELCO ARA
|
360
|
+
\class I_TEHTA_S ORE TW_EXT_11 TW_EXT_12 SULE HYARMEN
|
361
|
+
\class I_TEHTA_L TINCO CALMA PARMA QUESSE AHA HWESTA LAMBE ALDA YANTA URE ROMEN ARDA SILME_NUQUERNA ESSE_NUQUERNA VALA ANNA VILYA FORMEN
|
362
|
+
\class I_TEHTA_XL TW_EXT_13 TW_EXT_14 TW_EXT_21 TW_EXT_22 TW_EXT_23 TW_EXT_24 ANDO UMBAR ANGA UNGWE ANTO AMPA ANCA UNQUE NUMEN MALTA NOLDO NWALME
|
363
|
+
\end
|
364
|
+
|
365
|
+
\beg virtual O_TEHTA
|
366
|
+
\class O_TEHTA_XS TELCO ARA
|
367
|
+
\class O_TEHTA_S ORE TW_EXT_11 TW_EXT_12 SULE HYARMEN
|
368
|
+
\class O_TEHTA_L TINCO CALMA PARMA QUESSE AHA HWESTA LAMBE ALDA YANTA URE ROMEN ARDA SILME_NUQUERNA ESSE_NUQUERNA VALA ANNA VILYA FORMEN
|
369
|
+
\class O_TEHTA_XL TW_EXT_13 TW_EXT_14 TW_EXT_21 TW_EXT_22 TW_EXT_23 TW_EXT_24 ANDO UMBAR ANGA UNGWE ANTO AMPA ANCA UNQUE NUMEN MALTA NOLDO NWALME
|
370
|
+
\end
|
371
|
+
|
372
|
+
\beg virtual U_TEHTA
|
373
|
+
\class U_TEHTA_XS TELCO ARA
|
374
|
+
\class U_TEHTA_S ORE TW_EXT_11 TW_EXT_12 SULE HYARMEN
|
375
|
+
\class U_TEHTA_L TINCO CALMA PARMA QUESSE AHA HWESTA LAMBE ALDA YANTA URE ROMEN ARDA SILME_NUQUERNA ESSE_NUQUERNA VALA ANNA VILYA FORMEN
|
376
|
+
\class U_TEHTA_XL TW_EXT_13 TW_EXT_14 TW_EXT_21 TW_EXT_22 TW_EXT_23 TW_EXT_24 ANDO UMBAR ANGA UNGWE ANTO AMPA ANCA UNQUE NUMEN MALTA NOLDO NWALME
|
377
|
+
\end
|
378
|
+
|
379
|
+
\** no vowel mark **\
|
380
|
+
\beg virtual NO_VOWEL_DOT
|
381
|
+
\class THINF_DOT_XS TELCO ROMEN ARDA
|
382
|
+
\class THINF_DOT_L ORE TW_EXT_11 TW_EXT_12 TINCO PARMA SULE FORMEN VALA ANNA VILYA SILME ESSE AHA HWESTA HYARMEN YANTA URE
|
383
|
+
\class THINF_DOT_XL QUESSE CALMA TW_EXT_13 TW_EXT_14 TW_EXT_21 TW_EXT_22 TW_EXT_23 TW_EXT_24 ANDO UMBAR ANGA UNGWE ANTO AMPA ANCA UNQUE NUMEN MALTA NOLDO NWALME
|
384
|
+
\class LAMBE_MARK_DOT LAMBE ALDA
|
385
|
+
\end
|
386
|
+
|
387
|
+
\** palatalisation **\
|
388
|
+
\beg virtual PALATAL_SIGN
|
389
|
+
\class THINF_DDOT_XS ROMEN
|
390
|
+
\class THINF_DDOT_S
|
391
|
+
\class THINF_DDOT_L TINCO PARMA HYARMEN ANNA
|
392
|
+
\class THINF_DDOT_XL ANDO ANTO NUMEN MALTA
|
393
|
+
\class LAMBE_MARK_DDOT LAMBE
|
394
|
+
\end
|
395
|
+
|
396
|
+
\** gemination **\
|
397
|
+
\beg virtual GEMINATE_SIGN
|
398
|
+
\class DASH_INF_XS
|
399
|
+
\class DASH_INF_S TINCO PARMA CALMA ROMEN HWESTA SULE
|
400
|
+
\class DASH_INF_L NUMEN MALTA UNGWE ANDO
|
401
|
+
\class DASH_INF_XL
|
402
|
+
\class LAMBE_MARK_TILD LAMBE
|
403
|
+
\end
|
404
|
+
|
405
|
+
\beg virtual ALVEOLAR_SIGN
|
406
|
+
\class SHOOK_LEFT_L CALMA
|
407
|
+
\class SHOOK_RIGHT_L TINCO PARMA
|
408
|
+
\end
|
409
|
+
|
327
410
|
|
@@ -22,14 +22,16 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
22
22
|
|
23
23
|
**\
|
24
24
|
|
25
|
-
|
26
|
-
|
25
|
+
\beg changelog
|
26
|
+
\entry "0.0.2", "Added option for o/u tehtar loop orientation"
|
27
|
+
\entry "0.0.3", "Normalizing to virtual chars"
|
28
|
+
\end
|
27
29
|
|
28
30
|
\** Adunaic mode for glaemscribe (MAY BE INCOMPLETE) **\
|
29
31
|
\language Adûnaic
|
30
32
|
\writing Tengwar
|
31
33
|
\mode Glaemscrafu
|
32
|
-
\version 0.0.
|
34
|
+
\version 0.0.3
|
33
35
|
\authors "Talagan (Benjamin Babut)"
|
34
36
|
|
35
37
|
\charset tengwar_ds true
|
@@ -97,50 +99,32 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
97
99
|
{OOU} === {OO}{U} \** ôu **\
|
98
100
|
|
99
101
|
\if "reverse_o_u_tehtar == U_UP_O_DOWN"
|
100
|
-
|
101
|
-
|
102
|
-
{O_LOOP_L} === O_TEHTA_L
|
103
|
-
{O_LOOP_XL} === O_TEHTA_XL
|
104
|
-
{U_LOOP_XS} === U_TEHTA_XS
|
105
|
-
{U_LOOP_S} === U_TEHTA_S
|
106
|
-
{U_LOOP_L} === U_TEHTA_L
|
107
|
-
{U_LOOP_XL} === U_TEHTA_XL
|
102
|
+
{O_LOOP} === O_TEHTA
|
103
|
+
{U_LOOP} === U_TEHTA
|
108
104
|
\else
|
109
|
-
|
110
|
-
|
111
|
-
{O_LOOP_L} === U_TEHTA_L
|
112
|
-
{O_LOOP_XL} === U_TEHTA_XL
|
113
|
-
{U_LOOP_XS} === O_TEHTA_XS
|
114
|
-
{U_LOOP_S} === O_TEHTA_S
|
115
|
-
{U_LOOP_L} === O_TEHTA_L
|
116
|
-
{U_LOOP_XL} === O_TEHTA_XL
|
105
|
+
{O_LOOP} === U_TEHTA
|
106
|
+
{U_LOOP} === O_TEHTA
|
117
107
|
\endif
|
118
108
|
|
119
|
-
{SDIPHTHONGS} === {AI}
|
120
|
-
{SDIPHTHENGS} === YANTA
|
109
|
+
{SDIPHTHONGS} === {AI} * {AU}
|
110
|
+
{SDIPHTHENGS} === YANTA A_TEHTA * URE A_TEHTA
|
121
111
|
|
122
|
-
{LDIPHTHONGS} === {AAI}
|
123
|
-
{LDIPHTHENGS} === ARA
|
112
|
+
{LDIPHTHONGS} === {AAI} * {AAU} * {EEI} * {EEU} * {OOI} * {OOU}
|
113
|
+
{LDIPHTHENGS} === ARA A_TEHTA YANTA * ARA A_TEHTA URE * ARA E_TEHTA YANTA * ARA E_TEHTA URE * ARA {O_LOOP} YANTA * ARA {O_LOOP} URE
|
124
114
|
|
125
|
-
{VOWELS} === {A}
|
126
|
-
{
|
127
|
-
{TEHTA__S} === A_TEHTA_S * E_TEHTA_S * I_TEHTA_S * {O_LOOP_S} * {U_LOOP_S}
|
128
|
-
{TEHTA__L} === A_TEHTA_L * E_TEHTA_L * I_TEHTA_L * {O_LOOP_L} * {U_LOOP_L}
|
129
|
-
{TEHTA_XL} === A_TEHTA_XL * E_TEHTA_XL * I_TEHTA_XL * {O_LOOP_XL} * {U_LOOP_XL}
|
115
|
+
{VOWELS} === {A} * {E} * {I} * {O} * {U}
|
116
|
+
{_TEHTAR_} === A_TEHTA * E_TEHTA * I_TEHTA * {O_LOOP} * {U_LOOP}
|
130
117
|
|
131
|
-
{LVOWELS} === {AA}
|
132
|
-
{LVOWTNG} === ARA
|
118
|
+
{LVOWELS} === {AA} * {EE} * {II} * {OO} * {UU}
|
119
|
+
{LVOWTNG} === ARA A_TEHTA * ARA E_TEHTA * ARA I_TEHTA * ARA {O_LOOP} * ARA {U_LOOP}
|
133
120
|
|
134
121
|
\** Let' put all vowels/diphthongs in the same basket **\
|
135
122
|
{V_D} === [ {VOWELS} * {LVOWELS} * {SDIPHTHONGS} * {LDIPHTHONGS} ]
|
136
123
|
\** And their images... **\
|
137
|
-
{
|
138
|
-
|
139
|
-
{
|
140
|
-
{
|
141
|
-
|
142
|
-
[{VOWELS}] --> TELCO [{TEHTA_XS}] \** Replace isolated short vowels **\
|
143
|
-
[{LVOWELS}] --> [{LVOWTNG}] \** Replace long vowels **\
|
124
|
+
{_V_D_} === [ {_TEHTAR_} * {LVOWTNG} * {SDIPHTHENGS} * {LDIPHTHENGS} ]
|
125
|
+
|
126
|
+
[{VOWELS}] --> TELCO [{_TEHTAR_}] \** Replace isolated short vowels **\
|
127
|
+
[{LVOWELS}] --> [{LVOWTNG}] \** Replace long vowels **\
|
144
128
|
[{SDIPHTHONGS}] --> [{SDIPHTHENGS}] \** Replace short diphthongs **\
|
145
129
|
[{LDIPHTHONGS}] --> [{LDIPHTHENGS}] \** Replace long diphthongs **\
|
146
130
|
|
@@ -150,64 +134,64 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
150
134
|
{K} === (c,k)
|
151
135
|
{V} === (v,w)
|
152
136
|
|
153
|
-
{L1_S} === {K} * p * t * {K}{K}
|
154
|
-
{L1_T} === QUESSE * PARMA * TINCO * CALMA
|
137
|
+
{L1_S} === {K} * p * t * {K}{K} * pp * tt
|
138
|
+
{L1_T} === QUESSE * PARMA * TINCO * CALMA GEMINATE_SIGN * PARMA GEMINATE_SIGN * TINCO GEMINATE_SIGN
|
155
139
|
|
156
140
|
[{L1_S}] --> [ {L1_T} ]
|
157
|
-
[{L1_S}]{V_D} --> [ {L1_T} ]{
|
141
|
+
[{L1_S}]{V_D} --> [ {L1_T} ]{_V_D_}
|
158
142
|
|
159
|
-
{L2_S} === d * b * g * dd
|
160
|
-
{L2_T} === ANDO * UMBAR * UNGWE * ANDO
|
143
|
+
{L2_S} === d * b * g * dd * bb * gg
|
144
|
+
{L2_T} === ANDO * UMBAR * UNGWE * ANDO GEMINATE_SIGN * UMBAR GEMINATE_SIGN * UNGWE GEMINATE_SIGN
|
161
145
|
[{L2_S}] --> [{L2_T}]
|
162
|
-
[{L2_S}]{V_D} --> [{L2_T}]{
|
146
|
+
[{L2_S}]{V_D} --> [{L2_T}]{_V_D_}
|
163
147
|
|
164
148
|
\** Alignment of tehta is not the same in the font **\
|
165
149
|
\** So we need to split the third line unfortunately **\
|
166
|
-
{L3_1_S} === th * ph * (t,th)th
|
167
|
-
{L3_1_T} === SULE * FORMEN * SULE
|
150
|
+
{L3_1_S} === th * ph * (t,th)th * (p,ph)ph * (t,th)ph * (k,kh)ph * (p,ph)th * (k,kh)th
|
151
|
+
{L3_1_T} === SULE * FORMEN * SULE GEMINATE_SIGN * FORMEN GEMINATE_SIGN * SULE FORMEN * HWESTA FORMEN * FORMEN SULE * HWESTA SULE
|
168
152
|
|
169
|
-
{L3_2_S} === sh * kh * (k,kh)kh
|
170
|
-
{L3_2_T} === AHA * HWESTA * HWESTA
|
153
|
+
{L3_2_S} === sh * kh * (k,kh)kh * (p,ph)kh * (t,th)kh
|
154
|
+
{L3_2_T} === AHA * HWESTA * HWESTA GEMINATE_SIGN * FORMEN HWESTA * SULE HWESTA
|
171
155
|
|
172
156
|
[{L3_1_S}] --> [{L3_1_T}]
|
173
|
-
[{L3_1_S}]{V_D} --> [{L3_1_T}]{
|
157
|
+
[{L3_1_S}]{V_D} --> [{L3_1_T}]{_V_D_}
|
174
158
|
[{L3_2_S}] --> [{L3_2_T}]
|
175
|
-
[{L3_2_S}]{V_D} --> [{L3_2_T}]{
|
159
|
+
[{L3_2_S}]{V_D} --> [{L3_2_T}]{_V_D_}
|
176
160
|
|
177
161
|
{L4_S} === nd * mb * ng
|
178
162
|
{L4_T} === ANTO * AMPA * UNQUE
|
179
163
|
[{L4_S}] --> [{L4_T}]
|
180
|
-
[{L4_S}]{V_D} --> [{L4_T}]{
|
164
|
+
[{L4_S}]{V_D} --> [{L4_T}]{_V_D_}
|
181
165
|
|
182
|
-
{L5_S} === n * m * nn
|
183
|
-
{L5_T} === NUMEN * MALTA * NUMEN
|
166
|
+
{L5_S} === n * m * nn * mm
|
167
|
+
{L5_T} === NUMEN * MALTA * NUMEN GEMINATE_SIGN * MALTA GEMINATE_SIGN
|
184
168
|
[{L5_S}] --> [{L5_T}]
|
185
|
-
[{L5_S}]{V_D} --> [{L5_T}]{
|
169
|
+
[{L5_S}]{V_D} --> [{L5_T}]{_V_D_}
|
186
170
|
|
187
|
-
{L6_S} === {V} * y * rr
|
188
|
-
{L6_T} === VALA * ANNA * ROMEN
|
171
|
+
{L6_S} === {V} * y * rr * {V}{V} * yy
|
172
|
+
{L6_T} === VALA * ANNA * ROMEN GEMINATE_SIGN * VALA GEMINATE_SIGN * ANNA GEMINATE_SIGN
|
189
173
|
[r * {L6_S}] --> [ ORE * {L6_T}]
|
190
|
-
[r * {L6_S}]{V_D} --> [ ROMEN * {L6_T}]{
|
174
|
+
[r * {L6_S}]{V_D} --> [ ROMEN * {L6_T}]{_V_D_}
|
191
175
|
|
192
176
|
\** This one is not useful (redundant with higher) **\
|
193
177
|
\** Keep it for clarity of mind **\
|
194
178
|
r_ --> ORE
|
195
179
|
|
196
|
-
s{V_D} --> SILME_NUQUERNA {
|
180
|
+
s{V_D} --> SILME_NUQUERNA {_V_D_} \** Before a vowel goes down **\
|
197
181
|
s --> SILME \** Any other pos, up **\
|
198
|
-
z{V_D} --> ESSE_NUQUERNA {
|
182
|
+
z{V_D} --> ESSE_NUQUERNA {_V_D_} \** Before a vowel goes down **\
|
199
183
|
z --> ESSE \** Any other pos, up **\
|
200
184
|
|
201
|
-
h{V_D} --> HYARMEN {
|
185
|
+
h{V_D} --> HYARMEN {_V_D_}
|
202
186
|
h --> HYARMEN
|
203
|
-
hh{V_D} --> HYARMEN
|
204
|
-
hh --> HYARMEN
|
187
|
+
hh{V_D} --> HYARMEN GEMINATE_SIGN {_V_D_}
|
188
|
+
hh --> HYARMEN GEMINATE_SIGN
|
205
189
|
|
206
|
-
l{V_D} --> LAMBE {
|
190
|
+
l{V_D} --> LAMBE {_V_D_}
|
207
191
|
l --> LAMBE
|
208
192
|
|
209
|
-
ll{V_D} --> LAMBE
|
210
|
-
ll --> LAMBE
|
193
|
+
ll{V_D} --> LAMBE GEMINATE_SIGN {_V_D_}
|
194
|
+
ll --> LAMBE GEMINATE_SIGN
|
211
195
|
|
212
196
|
\end
|
213
197
|
|
@@ -276,3 +260,6 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
276
260
|
|
277
261
|
\end
|
278
262
|
|
263
|
+
\beg postprocessor
|
264
|
+
\resolve_virtuals
|
265
|
+
\end
|
@@ -22,8 +22,6 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
22
22
|
|
23
23
|
**\
|
24
24
|
|
25
|
-
\** Changelog **\
|
26
|
-
\**
|
27
25
|
\beg changelog
|
28
26
|
\entry "0.0.2", "added χ for the word χarina, correcting ts/ps sequences to work better with eldamar"
|
29
27
|
\entry "0.0.3", "added o/u curl option"
|
@@ -37,11 +35,11 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
37
35
|
\entry "0.1.1", "Added default option for voiced plosives : use mb, nd, ng, ngw"
|
38
36
|
\entry "0.1.2", "Added a tehta shape selection"
|
39
37
|
\entry "0.1.3", "Fixing ks, ps, ts. Fixing dot under ore, romen in implicit a mode."
|
38
|
+
\entry "0.1.4", "Conforming to the new csub format. Cleaning with new csub classes."
|
39
|
+
\entry "0.1.5", "csub removed. Now using virtual chars defined in charsets."
|
40
40
|
\end
|
41
|
-
**\
|
42
41
|
|
43
42
|
\**
|
44
|
-
TODO : Use the new csub directive for all diacritics/signs that could have variants (e.g. THINF_DDOT (LAMBE_MARK_DDOT for LAMBE), DASH_INF (LAMBE_MARK_TILD for LAMBE), SHOOK_LEFT)
|
45
43
|
TODO : Option for dot or not in 'a implicit' option before long vowels ?
|
46
44
|
TODO : bb, dd etc ? (for noobs)
|
47
45
|
**\
|
@@ -49,7 +47,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
49
47
|
\language "Quenya"
|
50
48
|
\writing "Tengwar"
|
51
49
|
\mode "Classical"
|
52
|
-
\version "0.1.
|
50
|
+
\version "0.1.5"
|
53
51
|
\authors "Talagan (Benjamin Babut)"
|
54
52
|
|
55
53
|
\charset tengwar_ds true
|
@@ -134,7 +132,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
134
132
|
|
135
133
|
\if implicit_a
|
136
134
|
{_A_} === {NULL}
|
137
|
-
{_NVOWEL_} ===
|
135
|
+
{_NVOWEL_} === NO_VOWEL
|
138
136
|
\else
|
139
137
|
{_A_} === {A_SHAPE}
|
140
138
|
{_NVOWEL_} === {NULL}
|
@@ -192,20 +190,20 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
192
190
|
{_L1_} === TINCO * PARMA * CALMA * QUESSE
|
193
191
|
|
194
192
|
\** GEMINATED **\
|
195
|
-
{L1_1_GEMS} === tt * pp
|
196
|
-
{_L1_1_GEMS_} === TINCO
|
193
|
+
{L1_1_GEMS} === tt * pp * {K}{K}
|
194
|
+
{_L1_1_GEMS_} === TINCO GEMINATE_SIGN * PARMA GEMINATE_SIGN * CALMA GEMINATE_SIGN
|
197
195
|
|
198
196
|
\** NORMAL **\
|
199
197
|
[ {L1} * {L1_1_GEMS} ] {V_D_WN} --> [ {_L1_} * {_L1_1_GEMS_} ] {_V_D_WN_}
|
200
198
|
|
201
199
|
\** OTHERS **\
|
202
|
-
ty{V_D_WN} --> TINCO
|
203
|
-
py{V_D_WN} --> PARMA
|
200
|
+
ty{V_D_WN} --> TINCO PALATAL_SIGN {_V_D_WN_}
|
201
|
+
py{V_D_WN} --> PARMA PALATAL_SIGN {_V_D_WN_}
|
204
202
|
|
205
|
-
ts{V_D_WN} --> TINCO {_V_D_WN_}
|
206
|
-
ps{V_D_WN} --> PARMA {_V_D_WN_}
|
207
|
-
{K}s{V_D_WN} --> CALMA
|
208
|
-
x{V_D_WN} --> CALMA
|
203
|
+
ts{V_D_WN} --> TINCO {_V_D_WN_} ALVEOLAR_SIGN
|
204
|
+
ps{V_D_WN} --> PARMA {_V_D_WN_} ALVEOLAR_SIGN
|
205
|
+
{K}s{V_D_WN} --> CALMA ALVEOLAR_SIGN {_V_D_WN_}
|
206
|
+
x{V_D_WN} --> CALMA ALVEOLAR_SIGN {_V_D_WN_} \** render ks for x **\
|
209
207
|
|
210
208
|
\** ===================== **\
|
211
209
|
\** 2ND LINE RULES **\
|
@@ -217,7 +215,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
217
215
|
[{L2}]{V_D_WN} --> [{_L2_}]{_V_D_WN_}
|
218
216
|
|
219
217
|
\** Palatalized **\
|
220
|
-
ndy{V_D_WN} --> ANDO
|
218
|
+
ndy{V_D_WN} --> ANDO PALATAL_SIGN {_V_D_WN_}
|
221
219
|
|
222
220
|
\** Have some rules for d,b,g,gw although there are not theoritically possible, aldudénie e.g needs it **\
|
223
221
|
{L2_UN} === d * b * g * gw
|
@@ -248,7 +246,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
248
246
|
[{L3}]{V_D_WN} --> [{_L3_}]{_V_D_WN_}
|
249
247
|
|
250
248
|
\** OTHERS **\
|
251
|
-
hy{V_D_WN} --> HYARMEN
|
249
|
+
hy{V_D_WN} --> HYARMEN PALATAL_SIGN {_V_D_WN_}
|
252
250
|
|
253
251
|
\** Override h with vowels (descendent of hy) **\
|
254
252
|
_h{V_D} --> HYARMEN {_V_D_}
|
@@ -265,7 +263,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
265
263
|
\** NORMAL **\
|
266
264
|
[{L4}]{V_D_WN} --> [{_L4_}]{_V_D_WN_}
|
267
265
|
\** OTHERS **\
|
268
|
-
nty{V_D_WN} --> ANTO
|
266
|
+
nty{V_D_WN} --> ANTO PALATAL_SIGN {_V_D_WN_}
|
269
267
|
|
270
268
|
\** ===================== **\
|
271
269
|
\** 5TH LINE RULES **\
|
@@ -275,10 +273,10 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
275
273
|
|
276
274
|
[{L5}]{V_D_WN} --> [{_L5_}]{_V_D_WN_}
|
277
275
|
|
278
|
-
ny{V_D_WN} --> NUMEN
|
279
|
-
nn{V_D_WN} --> NUMEN
|
280
|
-
my{V_D_WN} --> MALTA
|
281
|
-
mm{V_D_WN} --> MALTA
|
276
|
+
ny{V_D_WN} --> NUMEN PALATAL_SIGN {_V_D_WN_}
|
277
|
+
nn{V_D_WN} --> NUMEN GEMINATE_SIGN {_V_D_WN_}
|
278
|
+
my{V_D_WN} --> MALTA PALATAL_SIGN {_V_D_WN_}
|
279
|
+
mm{V_D_WN} --> MALTA GEMINATE_SIGN {_V_D_WN_}
|
282
280
|
|
283
281
|
\** ===================== **\
|
284
282
|
\** 6TH LINE RULES **\
|
@@ -291,7 +289,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
291
289
|
\endif
|
292
290
|
|
293
291
|
{L6} === r * v * y * w
|
294
|
-
{_L6_} === ROMEN * VALA * ANNA
|
292
|
+
{_L6_} === ROMEN * VALA * ANNA PALATAL_SIGN * VILYA
|
295
293
|
|
296
294
|
[{L6}]{V_D_WN} --> [{_L6_}]{_V_D_WN_}
|
297
295
|
|
@@ -301,8 +299,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
301
299
|
\** Override rule r + null **\
|
302
300
|
r --> {_LONE_R_} {_NVOWEL_}
|
303
301
|
|
304
|
-
rr{V_D_WN} --> ROMEN
|
305
|
-
ry{V_D_WN} --> ROMEN
|
302
|
+
rr{V_D_WN} --> ROMEN GEMINATE_SIGN {_V_D_WN_}
|
303
|
+
ry{V_D_WN} --> ROMEN PALATAL_SIGN {_V_D_WN_}
|
306
304
|
rd{V_D_WN} --> ARDA {_V_D_WN_}
|
307
305
|
|
308
306
|
\** ===================== **\
|
@@ -313,7 +311,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
313
311
|
|
314
312
|
[{LINE_L}]{V_D_WN} --> [{_LINE_L_}]{_V_D_WN_}
|
315
313
|
|
316
|
-
ly{V_D_WN} --> LAMBE
|
314
|
+
ly{V_D_WN} --> LAMBE PALATAL_SIGN {_V_D_WN_}
|
317
315
|
hl{V_D_WN} --> HALLA LAMBE {_V_D_WN_}
|
318
316
|
hr{V_D_WN} --> HALLA ROMEN {_V_D_WN_}
|
319
317
|
|
@@ -405,13 +403,6 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
405
403
|
\end
|
406
404
|
|
407
405
|
\beg postprocessor
|
408
|
-
|
409
|
-
\csub A_TEHTA "A_TEHTA_XS TELCO ARA" "A_TEHTA_S ORE TW_EXT_11 TW_EXT_12 SULE HYARMEN" "A_TEHTA_L TINCO CALMA PARMA QUESSE AHA HWESTA LAMBE ALDA YANTA URE ROMEN ARDA SILME_NUQUERNA ESSE_NUQUERNA VALA ANNA VILYA FORMEN" "A_TEHTA_XL TW_EXT_13 TW_EXT_14 TW_EXT_21 TW_EXT_22 TW_EXT_23 TW_EXT_24 ANDO UMBAR ANGA UNGWE ANTO AMPA ANCA UNQUE NUMEN MALTA NOLDO NWALME"
|
410
|
-
\csub A_TEHTA_CIRCUM "A_TEHTA_CIRCUM_XS TELCO ARA" "A_TEHTA_CIRCUM_S ORE TW_EXT_11 TW_EXT_12 SULE HYARMEN" "A_TEHTA_CIRCUM_L TINCO CALMA PARMA QUESSE AHA HWESTA LAMBE ALDA YANTA URE ROMEN ARDA SILME_NUQUERNA ESSE_NUQUERNA VALA ANNA VILYA FORMEN" "A_TEHTA_CIRCUM_XL TW_EXT_13 TW_EXT_14 TW_EXT_21 TW_EXT_22 TW_EXT_23 TW_EXT_24 ANDO UMBAR ANGA UNGWE ANTO AMPA ANCA UNQUE NUMEN MALTA NOLDO NWALME"
|
411
|
-
\csub E_TEHTA "E_TEHTA_XS TELCO ARA" "E_TEHTA_S ORE TW_EXT_11 TW_EXT_12 SULE HYARMEN" "E_TEHTA_L TINCO CALMA PARMA QUESSE AHA HWESTA LAMBE ALDA YANTA URE ROMEN ARDA SILME_NUQUERNA ESSE_NUQUERNA VALA ANNA VILYA FORMEN" "E_TEHTA_XL TW_EXT_13 TW_EXT_14 TW_EXT_21 TW_EXT_22 TW_EXT_23 TW_EXT_24 ANDO UMBAR ANGA UNGWE ANTO AMPA ANCA UNQUE NUMEN MALTA NOLDO NWALME"
|
412
|
-
\csub I_TEHTA "I_TEHTA_XS TELCO ARA" "I_TEHTA_S ORE TW_EXT_11 TW_EXT_12 SULE HYARMEN" "I_TEHTA_L TINCO CALMA PARMA QUESSE AHA HWESTA LAMBE ALDA YANTA URE ROMEN ARDA SILME_NUQUERNA ESSE_NUQUERNA VALA ANNA VILYA FORMEN" "I_TEHTA_XL TW_EXT_13 TW_EXT_14 TW_EXT_21 TW_EXT_22 TW_EXT_23 TW_EXT_24 ANDO UMBAR ANGA UNGWE ANTO AMPA ANCA UNQUE NUMEN MALTA NOLDO NWALME"
|
413
|
-
\csub O_TEHTA "O_TEHTA_XS TELCO ARA" "O_TEHTA_S ORE TW_EXT_11 TW_EXT_12 SULE HYARMEN" "O_TEHTA_L TINCO CALMA PARMA QUESSE AHA HWESTA LAMBE ALDA YANTA URE ROMEN ARDA SILME_NUQUERNA ESSE_NUQUERNA VALA ANNA VILYA FORMEN" "O_TEHTA_XL TW_EXT_13 TW_EXT_14 TW_EXT_21 TW_EXT_22 TW_EXT_23 TW_EXT_24 ANDO UMBAR ANGA UNGWE ANTO AMPA ANCA UNQUE NUMEN MALTA NOLDO NWALME"
|
414
|
-
\csub U_TEHTA "U_TEHTA_XS TELCO ARA" "U_TEHTA_S ORE TW_EXT_11 TW_EXT_12 SULE HYARMEN" "U_TEHTA_L TINCO CALMA PARMA QUESSE AHA HWESTA LAMBE ALDA YANTA URE ROMEN ARDA SILME_NUQUERNA ESSE_NUQUERNA VALA ANNA VILYA FORMEN" "U_TEHTA_XL TW_EXT_13 TW_EXT_14 TW_EXT_21 TW_EXT_22 TW_EXT_23 TW_EXT_24 ANDO UMBAR ANGA UNGWE ANTO AMPA ANCA UNQUE NUMEN MALTA NOLDO NWALME"
|
415
|
-
\csub THINF_DOT "THINF_DOT_XS TELCO ROMEN ARDA" "THINF_DOT_L ORE TW_EXT_11 TW_EXT_12 TINCO PARMA SULE FORMEN VALA ANNA VILYA SILME ESSE AHA HWESTA HYARMEN YANTA URE" "THINF_DOT_XL QUESSE CALMA TW_EXT_13 TW_EXT_14 TW_EXT_21 TW_EXT_22 TW_EXT_23 TW_EXT_24 ANDO UMBAR ANGA UNGWE ANTO AMPA ANCA UNQUE NUMEN MALTA NOLDO NWALME" "LAMBE_MARK_DOT LAMBE ALDA"
|
406
|
+
\resolve_virtuals
|
416
407
|
\end
|
417
408
|
|
data/lib/api/charset.rb
CHANGED
@@ -27,39 +27,121 @@ module Glaemscribe
|
|
27
27
|
|
28
28
|
attr_accessor :errors
|
29
29
|
attr_reader :chars
|
30
|
+
attr_reader :virtual_chars
|
30
31
|
|
31
32
|
class Char
|
32
33
|
attr_accessor :line
|
33
34
|
attr_accessor :code
|
34
35
|
attr_accessor :names
|
35
36
|
attr_accessor :str
|
37
|
+
attr_accessor :charset
|
36
38
|
|
37
39
|
def initialize
|
38
40
|
@names = {}
|
39
41
|
end
|
42
|
+
|
43
|
+
def virtual?
|
44
|
+
false
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
class VirtualChar
|
49
|
+
attr_accessor :line
|
50
|
+
attr_accessor :names
|
51
|
+
attr_accessor :classes
|
52
|
+
attr_accessor :charset
|
53
|
+
|
54
|
+
class VirtualClass
|
55
|
+
attr_accessor :target
|
56
|
+
attr_accessor :triggers
|
57
|
+
end
|
58
|
+
|
59
|
+
def initialize
|
60
|
+
@classes = {} # result_char_1 => [trigger_char_1, trigger_char_2 ...] , result_char_1 => ...
|
61
|
+
@lookup_table = {}
|
62
|
+
end
|
63
|
+
|
64
|
+
def str
|
65
|
+
VIRTUAL_CHAR_OUTPUT
|
66
|
+
end
|
67
|
+
|
68
|
+
def finalize
|
69
|
+
@lookup_table = {}
|
70
|
+
@classes.each{ |vc|
|
71
|
+
|
72
|
+
result_char = vc.target
|
73
|
+
trigger_chars = vc.triggers
|
74
|
+
|
75
|
+
trigger_chars.each{ |trigger_char|
|
76
|
+
found = @lookup_table[trigger_char]
|
77
|
+
if found
|
78
|
+
@charset.errors << Glaeml::Error.new(@line, "Trigger char #{trigger_char} found twice in virtual char.")
|
79
|
+
else
|
80
|
+
rc = @charset[result_char]
|
81
|
+
tc = @charset[trigger_char]
|
82
|
+
|
83
|
+
if rc.nil?
|
84
|
+
@charset.errors << Glaeml::Error.new(@line, "Trigger char #{trigger_char} points to unknown result char #{result_char}.")
|
85
|
+
elsif tc.nil?
|
86
|
+
@charset.errors << Glaeml::Error.new(@line, "Unknown trigger char #{trigger_char}.")
|
87
|
+
elsif tc.class == VirtualChar
|
88
|
+
@charset.errors << Glaeml::Error.new(@line, "Trigger char #{trigger_char} is virtual. This is not supported!")
|
89
|
+
elsif rc.class == VirtualChar
|
90
|
+
@charset.errors << Glaeml::Error.new(@line, "Trigger char #{trigger_char} points to another virtual char #{result_char}. This is not supported!")
|
91
|
+
else
|
92
|
+
tc.names.each{|trigger_char_name| # Don't forget to match all name variants for that trigger char!
|
93
|
+
@lookup_table[trigger_char_name] = rc
|
94
|
+
}
|
95
|
+
end
|
96
|
+
end
|
97
|
+
}
|
98
|
+
}
|
99
|
+
end
|
100
|
+
|
101
|
+
def [](trigger_char_name)
|
102
|
+
@lookup_table[trigger_char_name]
|
103
|
+
end
|
104
|
+
|
105
|
+
def virtual?
|
106
|
+
true
|
107
|
+
end
|
40
108
|
end
|
41
109
|
|
42
110
|
def initialize(name)
|
43
|
-
@name
|
44
|
-
@chars
|
45
|
-
@errors
|
111
|
+
@name = name
|
112
|
+
@chars = []
|
113
|
+
@errors = []
|
114
|
+
@virtual_chars = []
|
46
115
|
end
|
47
116
|
|
48
117
|
# Pass integer (utf8 num) and array (of strings)
|
49
118
|
def add_char(line, code, names)
|
50
119
|
return if names.empty? || names.include?("?") # Ignore characters with '?'
|
51
120
|
|
52
|
-
c
|
53
|
-
c.line
|
54
|
-
c.code
|
55
|
-
c.names
|
56
|
-
c.str
|
121
|
+
c = Char.new
|
122
|
+
c.line = line
|
123
|
+
c.code = code
|
124
|
+
c.names = names
|
125
|
+
c.str = code.chr('UTF-8')
|
126
|
+
c.charset = self
|
57
127
|
@chars << c
|
58
128
|
end
|
59
129
|
|
130
|
+
def add_virtual_char(line, classes, names)
|
131
|
+
return if names.empty? || names.include?("?") # Ignore characters with '?'
|
132
|
+
|
133
|
+
c = VirtualChar.new
|
134
|
+
c.line = line
|
135
|
+
c.names = names
|
136
|
+
c.classes = classes # We'll check errors in finalize
|
137
|
+
c.charset = self
|
138
|
+
@chars << c
|
139
|
+
end
|
140
|
+
|
60
141
|
def finalize
|
61
|
-
@errors
|
62
|
-
@lookup_table
|
142
|
+
@errors = []
|
143
|
+
@lookup_table = {}
|
144
|
+
@virtual_chars = []
|
63
145
|
|
64
146
|
@chars.each { |c|
|
65
147
|
c.names.each { |cname|
|
@@ -72,6 +154,13 @@ module Glaemscribe
|
|
72
154
|
}
|
73
155
|
}
|
74
156
|
|
157
|
+
@chars.each{ |c|
|
158
|
+
if c.class == VirtualChar
|
159
|
+
c.finalize
|
160
|
+
@virtual_chars << c
|
161
|
+
end
|
162
|
+
}
|
163
|
+
|
75
164
|
API::Debug::log("Finalized charset '#{@name}', #{@lookup_table.count} symbols loaded.")
|
76
165
|
end
|
77
166
|
|
data/lib/api/charset_parser.rb
CHANGED
@@ -45,6 +45,19 @@ module Glaemscribe
|
|
45
45
|
names = char_element.args[1..-1].map{|cname| cname.strip }.reject{ |cname| cname.empty? }
|
46
46
|
@charset.add_char(char_element.line,code,names)
|
47
47
|
}
|
48
|
+
|
49
|
+
doc.root_node.gpath("virtual").each { |virtual_element|
|
50
|
+
names = virtual_element.args
|
51
|
+
classes = []
|
52
|
+
virtual_element.gpath("class").each { |class_element|
|
53
|
+
vc = Charset::VirtualChar::VirtualClass.new
|
54
|
+
vc.target = class_element.args[0]
|
55
|
+
vc.triggers = class_element.args[1..-1].map{|cname| cname.strip }.reject{ |cname| cname.empty? }
|
56
|
+
classes << vc
|
57
|
+
}
|
58
|
+
@charset.add_virtual_char(virtual_element.line,classes,names)
|
59
|
+
}
|
60
|
+
|
48
61
|
@charset.finalize
|
49
62
|
|
50
63
|
@charset
|
data/lib/api/constants.rb
CHANGED
data/lib/api/glaeml.rb
CHANGED
@@ -67,6 +67,13 @@ module Glaemscribe
|
|
67
67
|
@children = []
|
68
68
|
end
|
69
69
|
|
70
|
+
# Make our object clonable
|
71
|
+
def initialize_copy(other)
|
72
|
+
super
|
73
|
+
@args = other.args.clone
|
74
|
+
@children = other.children.map{|c| c.clone}
|
75
|
+
end
|
76
|
+
|
70
77
|
def pathfind_crawl(apath, found)
|
71
78
|
|
72
79
|
children.each{ |c|
|
data/lib/api/mode_parser.rb
CHANGED
@@ -181,7 +181,7 @@ module Glaemscribe
|
|
181
181
|
if !operator_class
|
182
182
|
@mode.errors << Glaeml::Error.new(element.line,"Operator #{operator_name} is unknown.")
|
183
183
|
else
|
184
|
-
term.operators << operator_class.new(element.
|
184
|
+
term.operators << operator_class.new(element.clone)
|
185
185
|
end
|
186
186
|
}
|
187
187
|
|
@@ -0,0 +1,75 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
# the transcription of texts between writing systems, and more
|
5
|
+
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
# invented languages to some of his devised writing systems.
|
7
|
+
#
|
8
|
+
# Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
#
|
10
|
+
# This program is free software: you can redistribute it and/or modify
|
11
|
+
# it under the terms of the GNU Affero General Public License as published by
|
12
|
+
# the Free Software Foundation, either version 3 of the License, or
|
13
|
+
# any later version.
|
14
|
+
#
|
15
|
+
# This program is distributed in the hope that it will be useful,
|
16
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
# GNU Affero General Public License for more details.
|
19
|
+
#
|
20
|
+
# You should have received a copy of the GNU Affero General Public License
|
21
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
|
23
|
+
module Glaemscribe
|
24
|
+
module API
|
25
|
+
|
26
|
+
class ResolveVirtualsPostProcessorOperator < PostProcessorOperator
|
27
|
+
|
28
|
+
def finalize(trans_options)
|
29
|
+
super(trans_options)
|
30
|
+
@last_triggers = {} # Allocate the lookup here to optimize
|
31
|
+
end
|
32
|
+
|
33
|
+
def reset_trigger_states(charset)
|
34
|
+
# For each virtual char in charset, maintain a state.
|
35
|
+
charset.virtual_chars.each{ |vc|
|
36
|
+
@last_triggers[vc] = nil # Clear the state
|
37
|
+
}
|
38
|
+
end
|
39
|
+
|
40
|
+
def apply(tokens,charset)
|
41
|
+
|
42
|
+
reset_trigger_states(charset)
|
43
|
+
|
44
|
+
tokens.each_with_index{ |token,idx|
|
45
|
+
|
46
|
+
if token == '*SPACE'
|
47
|
+
reset_trigger_states(charset)
|
48
|
+
next
|
49
|
+
end
|
50
|
+
|
51
|
+
# Check if token is a virtual char
|
52
|
+
c = charset[token]
|
53
|
+
next if c.nil? # May happen for empty tokens
|
54
|
+
if c.virtual?
|
55
|
+
# Try to replace
|
56
|
+
last_trigger = @last_triggers[c]
|
57
|
+
if last_trigger != nil
|
58
|
+
tokens[idx] = last_trigger.names.first # Take the first name of the non-virtual replacement.
|
59
|
+
end
|
60
|
+
else
|
61
|
+
# Update states of virtual classes
|
62
|
+
charset.virtual_chars.each{|vc|
|
63
|
+
rc = vc[token]
|
64
|
+
@last_triggers[vc] = rc if rc != nil
|
65
|
+
}
|
66
|
+
end
|
67
|
+
}
|
68
|
+
tokens
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
ResourceManager::register_post_processor_class("resolve_virtuals", ResolveVirtualsPostProcessorOperator)
|
73
|
+
|
74
|
+
end
|
75
|
+
end
|
@@ -26,10 +26,10 @@ module Glaemscribe
|
|
26
26
|
class ElvishNumbersPreProcessorOperator < PreProcessorOperator
|
27
27
|
|
28
28
|
def apply(l)
|
29
|
-
base = args[0]
|
29
|
+
base = finalized_glaeml_element.args[0]
|
30
30
|
base = (base)?(base.to_i):(12)
|
31
31
|
|
32
|
-
reverse = args[1]
|
32
|
+
reverse = finalized_glaeml_element.args[1]
|
33
33
|
reverse = (reverse != nil)?(reverse == "true" || reverse == true):(true)
|
34
34
|
|
35
35
|
l.gsub(/\d+/) { |f|
|
@@ -27,8 +27,8 @@ module Glaemscribe
|
|
27
27
|
class RxSubstitutePreProcessorOperator < PreProcessorOperator
|
28
28
|
|
29
29
|
def apply(l)
|
30
|
-
what = /#{
|
31
|
-
with =
|
30
|
+
what = /#{finalized_glaeml_element.args[0]}/
|
31
|
+
with = finalized_glaeml_element.args[1]
|
32
32
|
l.gsub(what, with)
|
33
33
|
end
|
34
34
|
|
@@ -26,8 +26,8 @@ module Glaemscribe
|
|
26
26
|
class SubstitutePreProcessorOperator < PreProcessorOperator
|
27
27
|
|
28
28
|
def apply(l)
|
29
|
-
what =
|
30
|
-
with =
|
29
|
+
what = finalized_glaeml_element.args[0]
|
30
|
+
with = finalized_glaeml_element.args[1]
|
31
31
|
l.gsub(what, with)
|
32
32
|
end
|
33
33
|
end
|
@@ -26,11 +26,11 @@ module Glaemscribe
|
|
26
26
|
class UpDownTehtaSplitPreProcessorOperator < PreProcessorOperator
|
27
27
|
|
28
28
|
attr_reader :vowel_list, :consonant_list
|
29
|
-
def
|
30
|
-
super(
|
29
|
+
def finalize(trans_options)
|
30
|
+
super(trans_options)
|
31
31
|
|
32
|
-
vowel_list = args[0]
|
33
|
-
consonant_list = args[1]
|
32
|
+
vowel_list = finalized_glaeml_element.args[0]
|
33
|
+
consonant_list = finalized_glaeml_element.args[1]
|
34
34
|
|
35
35
|
vowel_list = vowel_list.split(/,/).map{|s| s.strip}
|
36
36
|
consonant_list = consonant_list.split(/,/).map{|s| s.strip}
|
@@ -24,11 +24,11 @@ module Glaemscribe
|
|
24
24
|
module API
|
25
25
|
|
26
26
|
class PrePostProcessorOperator
|
27
|
-
attr_reader :
|
28
|
-
attr_reader :
|
27
|
+
attr_reader :glaeml_element
|
28
|
+
attr_reader :finalized_glaeml_element
|
29
29
|
|
30
|
-
def initialize(
|
31
|
-
@
|
30
|
+
def initialize(glaeml_element)
|
31
|
+
@glaeml_element = glaeml_element
|
32
32
|
end
|
33
33
|
|
34
34
|
def eval_arg(arg, trans_options)
|
@@ -40,11 +40,16 @@ module Glaemscribe
|
|
40
40
|
return arg
|
41
41
|
end
|
42
42
|
|
43
|
-
def
|
44
|
-
|
45
|
-
|
46
|
-
|
43
|
+
def finalize_glaeml_element(ge, trans_options)
|
44
|
+
ge.args.map! { |arg| eval_arg(arg, trans_options) }
|
45
|
+
ge.children.each{ |child|
|
46
|
+
finalize_glaeml_element(child, trans_options)
|
47
47
|
}
|
48
|
+
ge
|
49
|
+
end
|
50
|
+
|
51
|
+
def finalize(trans_options)
|
52
|
+
@finalized_glaeml_element = finalize_glaeml_element(@glaeml_element.clone, trans_options)
|
48
53
|
end
|
49
54
|
|
50
55
|
def apply
|
@@ -122,7 +127,7 @@ module Glaemscribe
|
|
122
127
|
|
123
128
|
# Apply filters
|
124
129
|
@operators.each{ |operator|
|
125
|
-
tokens = operator.apply(tokens)
|
130
|
+
tokens = operator.apply(tokens,out_charset)
|
126
131
|
}
|
127
132
|
|
128
133
|
# Convert output
|
data/lib/glaemscribe.rb
CHANGED
@@ -65,7 +65,7 @@ module Glaemscribe
|
|
65
65
|
require API_PATH + "/api/pre_processor/rxsubstitute.rb"
|
66
66
|
require API_PATH + "/api/pre_processor/up_down_tehta_split.rb"
|
67
67
|
require API_PATH + "/api/post_processor/reverse.rb"
|
68
|
-
require API_PATH + "/api/post_processor/
|
68
|
+
require API_PATH + "/api/post_processor/resolve_virtuals.rb"
|
69
69
|
|
70
70
|
end
|
71
71
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: glaemscribe
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.16
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Benjamin 'Talagan' Babut
|
@@ -72,7 +72,7 @@ files:
|
|
72
72
|
- lib/api/mode.rb
|
73
73
|
- lib/api/mode_parser.rb
|
74
74
|
- lib/api/option.rb
|
75
|
-
- lib/api/post_processor/
|
75
|
+
- lib/api/post_processor/resolve_virtuals.rb
|
76
76
|
- lib/api/post_processor/reverse.rb
|
77
77
|
- lib/api/pre_processor/downcase.rb
|
78
78
|
- lib/api/pre_processor/elvish_numbers.rb
|
@@ -110,7 +110,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
110
110
|
version: '0'
|
111
111
|
requirements: []
|
112
112
|
rubyforge_project:
|
113
|
-
rubygems_version: 2.
|
113
|
+
rubygems_version: 2.4.8
|
114
114
|
signing_key:
|
115
115
|
specification_version: 4
|
116
116
|
summary: Glǽmscribe
|
@@ -1,64 +0,0 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
#
|
3
|
-
# Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
-
# the transcription of texts between writing systems, and more
|
5
|
-
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
-
# invented languages to some of his devised writing systems.
|
7
|
-
#
|
8
|
-
# Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
-
#
|
10
|
-
# This program is free software: you can redistribute it and/or modify
|
11
|
-
# it under the terms of the GNU Affero General Public License as published by
|
12
|
-
# the Free Software Foundation, either version 3 of the License, or
|
13
|
-
# any later version.
|
14
|
-
#
|
15
|
-
# This program is distributed in the hope that it will be useful,
|
16
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
-
# GNU Affero General Public License for more details.
|
19
|
-
#
|
20
|
-
# You should have received a copy of the GNU Affero General Public License
|
21
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
-
|
23
|
-
module Glaemscribe
|
24
|
-
module API
|
25
|
-
|
26
|
-
class CSubPostProcessorOperator < PostProcessorOperator
|
27
|
-
attr_reader :matcher
|
28
|
-
attr_reader :triggers
|
29
|
-
|
30
|
-
def initialize(args)
|
31
|
-
super(args)
|
32
|
-
|
33
|
-
# Build our operator
|
34
|
-
@matcher = self.raw_args[0]
|
35
|
-
@triggers = Hash.new
|
36
|
-
|
37
|
-
self.raw_args.each{ |arg|
|
38
|
-
|
39
|
-
splitted = arg.split()
|
40
|
-
replacer = splitted.shift()
|
41
|
-
|
42
|
-
splitted.each{ |token|
|
43
|
-
@triggers[token] = replacer
|
44
|
-
}
|
45
|
-
}
|
46
|
-
end
|
47
|
-
|
48
|
-
def apply(tokens)
|
49
|
-
last_trigger_replacer = nil
|
50
|
-
tokens.each_with_index{ |token,idx|
|
51
|
-
if token == @matcher && last_trigger_replacer != nil
|
52
|
-
tokens[idx] = last_trigger_replacer
|
53
|
-
elsif @triggers[token] != nil
|
54
|
-
last_trigger_replacer = @triggers[token]
|
55
|
-
end
|
56
|
-
}
|
57
|
-
tokens
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
ResourceManager::register_post_processor_class("csub", CSubPostProcessorOperator)
|
62
|
-
|
63
|
-
end
|
64
|
-
end
|