glaemscribe 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/glaemscribe +2 -2
- data/glaemresources/charsets/cirth_ds.cst +514 -179
- data/glaemresources/charsets/eldamar.cst +210 -0
- data/glaemresources/charsets/tengwar_ds_annatar.cst +2452 -130
- data/glaemresources/charsets/tengwar_ds_eldamar.cst +2319 -125
- data/glaemresources/charsets/tengwar_ds_elfica.cst +2317 -126
- data/glaemresources/charsets/tengwar_ds_parmaite.cst +2319 -127
- data/glaemresources/charsets/tengwar_ds_sindarin.cst +2318 -127
- data/glaemresources/charsets/tengwar_freemono.cst +1 -1
- data/glaemresources/charsets/tengwar_guni_annatar.cst +2451 -131
- data/glaemresources/charsets/tengwar_guni_eldamar.cst +2317 -126
- data/glaemresources/charsets/tengwar_guni_elfica.cst +2316 -127
- data/glaemresources/charsets/tengwar_guni_parmaite.cst +2319 -127
- data/glaemresources/charsets/tengwar_guni_sindarin.cst +2317 -126
- data/glaemresources/charsets/tengwar_telcontar.cst +7 -0
- data/glaemresources/modes/blackspeech-tengwar-general_use.glaem +1 -1
- data/glaemresources/modes/english-cirth-espeak.glaem +687 -0
- data/glaemresources/modes/english-tengwar-espeak.glaem +814 -0
- data/glaemresources/modes/japanese-tengwar.glaem +9 -4
- data/glaemresources/modes/lang_belta-tengwar-dadef.glaem +248 -0
- data/glaemresources/modes/raw-cirth.glaem +154 -0
- data/lib/api/charset_parser.rb +7 -1
- data/lib/api/mode.rb +35 -10
- data/lib/api/mode_parser.rb +21 -12
- data/lib/api/post_processor/outspace.rb +44 -0
- data/lib/api/rule_group.rb +1 -1
- data/lib/api/transcription_pre_post_processor.rb +8 -5
- data/lib/api/transcription_processor.rb +12 -9
- data/lib/glaemscribe.rb +2 -0
- data/lib_espeak/espeakng.for.glaemscribe.nowasm.sync.js +25 -11
- data/lib_espeak/glaemscribe_tts.js +363 -223
- metadata +12 -6
@@ -22,11 +22,16 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
22
22
|
|
23
23
|
**\
|
24
24
|
|
25
|
+
\beg changelog
|
26
|
+
\entry "0.0.1" "Initial version"
|
27
|
+
\entry "0.0.2" "Corrected 億 to 10^8, added support for セィ and ゼィ. Thanks to Roman Rausch for this feedback!"
|
28
|
+
\end
|
29
|
+
|
25
30
|
\language "Japanese"
|
26
31
|
\writing "Tengwar"
|
27
32
|
\mode "Japanese Tengwar - G*"
|
28
33
|
\authors "Talagan (Benjamin Babut)"
|
29
|
-
\version "0.0.
|
34
|
+
\version "0.0.2"
|
30
35
|
|
31
36
|
\world primary
|
32
37
|
\invention experimental
|
@@ -531,14 +536,14 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
531
536
|
\deploy serie {SERIE} {TENGWA} {_PALATAL_} {__SV__} {__LV__} {NULL}
|
532
537
|
|
533
538
|
\** si **\
|
534
|
-
{SERIE} === [(
|
539
|
+
{SERIE} === [(スィ,セィ,si)]
|
535
540
|
{TENGWA} === SILME_NUQUERNA
|
536
541
|
{__SV__} === [{_I_}]
|
537
542
|
{__LV__} === [{_II_}]
|
538
543
|
\deploy serie {SERIE} {TENGWA} {_CANCELLER_} {__SV__} {__LV__} {NULL}
|
539
544
|
|
540
545
|
\** zi **\
|
541
|
-
{SERIE} === [(
|
546
|
+
{SERIE} === [(ズィ,ゼィ,zi)]
|
542
547
|
{TENGWA} === ESSE_NUQUERNA
|
543
548
|
{__SV__} === [{_I_}]
|
544
549
|
{__LV__} === [{_II_}]
|
@@ -749,7 +754,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
749
754
|
百 --> NUM_2 CIRC_TEHTA_INF
|
750
755
|
千 --> NUM_3 CIRC_TEHTA_INF
|
751
756
|
万 --> NUM_4 CIRC_TEHTA_INF
|
752
|
-
億 -->
|
757
|
+
億 --> NUM_8 CIRC_TEHTA_INF
|
753
758
|
0 --> NUM_0
|
754
759
|
1 --> NUM_1
|
755
760
|
2 --> NUM_2
|
@@ -0,0 +1,248 @@
|
|
1
|
+
\**
|
2
|
+
|
3
|
+
Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
the transcription of texts between writing systems, and more
|
5
|
+
specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
invented languages to some of his devised writing systems.
|
7
|
+
|
8
|
+
Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
|
10
|
+
This program is free software: you can redistribute it and/or modify
|
11
|
+
it under the terms of the GNU Affero General Public License as published by
|
12
|
+
the Free Software Foundation, either version 3 of the License, or
|
13
|
+
any later version.
|
14
|
+
|
15
|
+
This program is distributed in the hope that it will be useful,
|
16
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
GNU Affero General Public License for more details.
|
19
|
+
|
20
|
+
You should have received a copy of the GNU Affero General Public License
|
21
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
|
23
|
+
**\
|
24
|
+
|
25
|
+
\beg changelog
|
26
|
+
\entry "0.0.1" "Ported from Westron"
|
27
|
+
\entry "0.0.2" "Simplified"
|
28
|
+
\entry "0.0.3" "Yaterash poetic forms"
|
29
|
+
\entry "0.0.4" "English numeric defaults, use lsd for nasal, poetic finals"
|
30
|
+
\entry "0.0.5" "Bikang option: beacon final hints"
|
31
|
+
\entry "0.0.6" "Lowng & kuwendi options"
|
32
|
+
\entry "0.1.0" "clean up pass"
|
33
|
+
\end
|
34
|
+
\version "0.1.0"
|
35
|
+
|
36
|
+
\** Lang Belta mode for glaemscribe **\
|
37
|
+
\language "Lang Belta"
|
38
|
+
\writing Tengwar
|
39
|
+
\mode "Lang Belta (The Expanse) Tengwar - G*"
|
40
|
+
\authors "Da Def, based on Talagan's Westron"
|
41
|
+
|
42
|
+
\world other_world
|
43
|
+
\invention experimental
|
44
|
+
|
45
|
+
\raw_mode "raw-tengwar"
|
46
|
+
|
47
|
+
\charset tengwar_ds_sindarin false
|
48
|
+
\charset tengwar_ds_parmaite false
|
49
|
+
\charset tengwar_ds_eldamar false
|
50
|
+
\charset tengwar_ds_annatar false
|
51
|
+
\charset tengwar_ds_elfica false
|
52
|
+
|
53
|
+
\charset tengwar_guni_sindarin false
|
54
|
+
\charset tengwar_guni_parmaite false
|
55
|
+
\charset tengwar_guni_eldamar false
|
56
|
+
\charset tengwar_guni_annatar true
|
57
|
+
\charset tengwar_guni_elfica false
|
58
|
+
|
59
|
+
\charset tengwar_freemono false
|
60
|
+
\charset tengwar_telcontar false
|
61
|
+
|
62
|
+
\beg options
|
63
|
+
\option wit_yaterash false
|
64
|
+
|
65
|
+
\beg option bikang true
|
66
|
+
\visible_when "wit_yaterash == true"
|
67
|
+
\end
|
68
|
+
|
69
|
+
\beg option lowng false
|
70
|
+
\visible_when "wit_yaterash == true"
|
71
|
+
\end
|
72
|
+
|
73
|
+
\beg option kuwendi false
|
74
|
+
\visible_when "wit_yaterash == true && lowng == false"
|
75
|
+
\end
|
76
|
+
|
77
|
+
\option implicit_a false
|
78
|
+
\option reverse_numbers false
|
79
|
+
\beg option numbers_base BASE_10
|
80
|
+
\value BASE_10 10
|
81
|
+
\value BASE_12 12
|
82
|
+
\end
|
83
|
+
\end
|
84
|
+
|
85
|
+
\beg preprocessor
|
86
|
+
\** Work exclusively downcase **\
|
87
|
+
\downcase
|
88
|
+
|
89
|
+
\** protect NOLDO and NWALME **\
|
90
|
+
\rxsubstitute "(ny)" "ɲ"
|
91
|
+
\rxsubstitute "(ng)" "ŋ"
|
92
|
+
|
93
|
+
\** Preprocess numbers **\
|
94
|
+
\elvish_numbers "\\eval numbers_base" "\\eval reverse_numbers"
|
95
|
+
\end
|
96
|
+
|
97
|
+
\beg processor
|
98
|
+
\beg rules literal
|
99
|
+
|
100
|
+
\if implicit_a
|
101
|
+
{_A_} === {NULL}
|
102
|
+
{_X_} === NO_VOWEL_DOT
|
103
|
+
\else
|
104
|
+
{_A_} === A_TEHTA
|
105
|
+
{_X_} === {NULL}
|
106
|
+
\endif
|
107
|
+
|
108
|
+
{PENNAR} === a * e * i * ow * o * u
|
109
|
+
{TEHTAR} === {_A_} * E_TEHTA * I_TEHTA * A_TEHTA_REVERSED * O_TEHTA * U_TEHTA
|
110
|
+
|
111
|
+
\** For now, shãsa is a hapax **\
|
112
|
+
{PENNAR} === {PENNAR} * ã
|
113
|
+
{TEHTAR} === {TEHTAR} * E_TEHTA_GRAVE
|
114
|
+
|
115
|
+
\** tentative
|
116
|
+
{PENNAR} === {PENNAR} * ẽ * ĩ * õw * õ * ũ
|
117
|
+
{TEHTAR} === {TEHTAR} * E_TEHTA_DOUBLE * I_TEHTA_DOUBLE * WA_TEHTA * O_TEHTA_DOUBLE * U_TEHTA_DOUBLE
|
118
|
+
**\
|
119
|
+
|
120
|
+
{L1_S} === t * p * ch * k
|
121
|
+
{L1_T} === TINCO * PARMA * CALMA * QUESSE
|
122
|
+
{L2_S} === d * b * dzh * g
|
123
|
+
{L2_T} === ANDO * UMBAR * ANGA * UNGWE
|
124
|
+
{L3_S} === f * sh * x
|
125
|
+
{L3_T} === FORMEN * AHA * HWESTA
|
126
|
+
{L4_S} === v
|
127
|
+
{L4_T} === AMPA
|
128
|
+
{L5_S} === n * m * ɲ * ŋ
|
129
|
+
{L5_T} === NUMEN * MALTA * NOLDO * NWALME
|
130
|
+
{L6_S} === w * y
|
131
|
+
{L6_T} === VALA * ANNA
|
132
|
+
{IR_S} === r * l * s * z
|
133
|
+
{IR_T} === ROMEN * LAMBE * SILME_NUQUERNA * ESSE_NUQUERNA
|
134
|
+
|
135
|
+
{TAPTAR} === {L1_S} * {L2_S} * {L3_S} * {L4_S} * {L5_S} * {L6_S} * {IR_S}
|
136
|
+
{TENGWAR} === {L1_T} * {L2_T} * {L3_T} * {L4_T} * {L5_T} * {L6_T} * {IR_T}
|
137
|
+
|
138
|
+
[{TAPTAR}] --> [{TENGWAR}] {_X_}
|
139
|
+
[{TAPTAR}][{PENNAR}] --> [{TENGWAR}][{TEHTAR}]
|
140
|
+
[{PENNAR}] --> TELCO [{TEHTAR}]
|
141
|
+
|
142
|
+
\if wit_yaterash
|
143
|
+
|
144
|
+
\if lowng
|
145
|
+
{TAPTARN} === nt * nd * mp * mb * nsh * ns
|
146
|
+
{TENGWARN} === TINCO_EXT * ANDO_EXT * PARMA_EXT * UMBAR_EXT * AHA_EXT * TW_HW_LOWDHAM
|
147
|
+
|
148
|
+
tn[{PENNAR}] --> TW_MH [{TEHTAR}]
|
149
|
+
lt[{PENNAR}] --> ALDA [{TEHTAR}]
|
150
|
+
|
151
|
+
[{TAPTARN}][{PENNAR}] --> [{TENGWARN}][{TEHTAR}]
|
152
|
+
\else
|
153
|
+
tn[{PENNAR}] --> NUMEN THINNAS [{TEHTAR}]
|
154
|
+
l[{TAPTAR}][{PENNAR}] --> [{TENGWAR}] PALATAL_SIGN [{TEHTAR}]
|
155
|
+
|
156
|
+
\if kuwendi
|
157
|
+
{NASAL} === NASALIZE_SIGN_TILD
|
158
|
+
\else
|
159
|
+
{NASAL} === CIRC_TEHTA_INF
|
160
|
+
\endif
|
161
|
+
|
162
|
+
(m,n)[{TAPTAR}][{PENNAR}] --> [{TENGWAR}] {NASAL} [{TEHTAR}]
|
163
|
+
\endif
|
164
|
+
|
165
|
+
\if bikang
|
166
|
+
(m,t)_ --> O_TEHTA_INF
|
167
|
+
(ŋ)_ --> E_TEHTA_DOUBLE_INF
|
168
|
+
(f,k,p)_ --> U_TEHTA_INF
|
169
|
+
(l,x,sh)_ --> E_TEHTA_INF
|
170
|
+
\else
|
171
|
+
(t,k,p,f,x,sh,m,ŋ,l)_ --> GEMINATE_SIGN_TILD
|
172
|
+
\endif
|
173
|
+
|
174
|
+
\endif
|
175
|
+
|
176
|
+
\end
|
177
|
+
|
178
|
+
\beg rules punctutation
|
179
|
+
. --> PUNCT_DDOT
|
180
|
+
.. --> PUNCT_DOT PUNCT_DDOT PUNCT_DOT
|
181
|
+
… --> PUNCT_TILD
|
182
|
+
... --> PUNCT_TILD
|
183
|
+
.... --> PUNCT_TILD
|
184
|
+
..... --> PUNCT_TILD
|
185
|
+
...... --> PUNCT_TILD
|
186
|
+
....... --> PUNCT_TILD
|
187
|
+
|
188
|
+
, --> PUNCT_DOT
|
189
|
+
: --> PUNCT_DOT
|
190
|
+
; --> PUNCT_DOT
|
191
|
+
! --> PUNCT_EXCLAM
|
192
|
+
? --> PUNCT_INTERR
|
193
|
+
· --> PUNCT_DOT
|
194
|
+
|
195
|
+
\** Apostrophe **\
|
196
|
+
|
197
|
+
' --> {NULL}
|
198
|
+
’ --> {NULL}
|
199
|
+
|
200
|
+
\** NBSP **\
|
201
|
+
{NBSP} --> NBSP
|
202
|
+
|
203
|
+
\** Quotes **\
|
204
|
+
|
205
|
+
“ --> DQUOT_OPEN
|
206
|
+
” --> DQUOT_CLOSE
|
207
|
+
« --> DQUOT_OPEN
|
208
|
+
» --> DQUOT_CLOSE
|
209
|
+
|
210
|
+
- --> PUNCT_DOT
|
211
|
+
– --> PUNCT_TILD
|
212
|
+
— --> PUNCT_DTILD
|
213
|
+
|
214
|
+
[ --> PUNCT_PAREN_L
|
215
|
+
] --> PUNCT_PAREN_R
|
216
|
+
( --> PUNCT_PAREN_L
|
217
|
+
) --> PUNCT_PAREN_R
|
218
|
+
{ --> PUNCT_PAREN_L
|
219
|
+
} --> PUNCT_PAREN_R
|
220
|
+
< --> PUNCT_PAREN_L
|
221
|
+
> --> PUNCT_PAREN_R
|
222
|
+
|
223
|
+
\** Not universal between fonts ... **\
|
224
|
+
$ --> ELVISH_PAREN
|
225
|
+
≤ --> RING_MARK_L \** Ring inscription left beautiful stuff **\
|
226
|
+
≥ --> RING_MARK_R \** Ring inscription right beautiful stuff **\
|
227
|
+
\end
|
228
|
+
|
229
|
+
\beg rules numbers
|
230
|
+
0 --> NUM_0
|
231
|
+
1 --> NUM_1
|
232
|
+
2 --> NUM_2
|
233
|
+
3 --> NUM_3
|
234
|
+
4 --> NUM_4
|
235
|
+
5 --> NUM_5
|
236
|
+
6 --> NUM_6
|
237
|
+
7 --> NUM_7
|
238
|
+
8 --> NUM_8
|
239
|
+
9 --> NUM_9
|
240
|
+
A --> NUM_10
|
241
|
+
B --> NUM_11
|
242
|
+
\end
|
243
|
+
|
244
|
+
\end
|
245
|
+
|
246
|
+
\beg postprocessor
|
247
|
+
\resolve_virtuals
|
248
|
+
\end
|
@@ -0,0 +1,154 @@
|
|
1
|
+
\**
|
2
|
+
|
3
|
+
Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
the transcription of texts between writing systems, and more
|
5
|
+
specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
invented languages to some of his devised writing systems.
|
7
|
+
|
8
|
+
Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
|
10
|
+
This program is free software: you can redistribute it and/or modify
|
11
|
+
it under the terms of the GNU Affero General Public License as published by
|
12
|
+
the Free Software Foundation, either version 3 of the License, or
|
13
|
+
any later version.
|
14
|
+
|
15
|
+
This program is distributed in the hope that it will be useful,
|
16
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
GNU Affero General Public License for more details.
|
19
|
+
|
20
|
+
You should have received a copy of the GNU Affero General Public License
|
21
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
|
23
|
+
**\
|
24
|
+
|
25
|
+
\** very limited mode, mainly used for writing some doc for glaemscribe. **\
|
26
|
+
|
27
|
+
\beg changelog
|
28
|
+
\entry "0.0.1" "Initial version."
|
29
|
+
\end
|
30
|
+
|
31
|
+
\language "Raw Cirth"
|
32
|
+
\writing "Cirth"
|
33
|
+
\mode "Raw Cirth"
|
34
|
+
\version "0.0.1"
|
35
|
+
\authors "Talagan (Benjamin Babut)"
|
36
|
+
|
37
|
+
\world arda
|
38
|
+
\invention experimental
|
39
|
+
|
40
|
+
\metamode true
|
41
|
+
|
42
|
+
\charset cirth_ds true
|
43
|
+
|
44
|
+
\beg preprocessor
|
45
|
+
\** Work exclusively downcase **\
|
46
|
+
\downcase
|
47
|
+
\end
|
48
|
+
|
49
|
+
\beg processor
|
50
|
+
|
51
|
+
\beg rules litteral
|
52
|
+
_c1_ --> CIRTH_1
|
53
|
+
_c2_ --> CIRTH_2
|
54
|
+
_c3_ --> CIRTH_3
|
55
|
+
_c4_ --> CIRTH_4
|
56
|
+
_c5_ --> CIRTH_5
|
57
|
+
_c6_ --> CIRTH_6
|
58
|
+
_c7_ --> CIRTH_7
|
59
|
+
_c8_ --> CIRTH_8
|
60
|
+
_c9_ --> CIRTH_9
|
61
|
+
_c10_ --> CIRTH_10
|
62
|
+
_c11_ --> CIRTH_11
|
63
|
+
_c12_ --> CIRTH_12
|
64
|
+
_c13_ --> CIRTH_13
|
65
|
+
_c14_ --> CIRTH_14
|
66
|
+
_c15_ --> CIRTH_15
|
67
|
+
_c16_ --> CIRTH_16
|
68
|
+
_c17_ --> CIRTH_17
|
69
|
+
_c18_ --> CIRTH_18
|
70
|
+
_c19_ --> CIRTH_19
|
71
|
+
_c20_ --> CIRTH_20
|
72
|
+
_c21_ --> CIRTH_21
|
73
|
+
_c22_ --> CIRTH_22
|
74
|
+
_c23_ --> CIRTH_23
|
75
|
+
_c24_ --> CIRTH_24
|
76
|
+
_c25_ --> CIRTH_25
|
77
|
+
_c26_ --> CIRTH_26
|
78
|
+
_c27_ --> CIRTH_27
|
79
|
+
_c28_ --> CIRTH_28
|
80
|
+
_c29_ --> CIRTH_29
|
81
|
+
_c30_ --> CIRTH_30
|
82
|
+
_c31_ --> CIRTH_31
|
83
|
+
_c32_ --> CIRTH_32
|
84
|
+
_c33_ --> CIRTH_33
|
85
|
+
_c34_ --> CIRTH_34
|
86
|
+
_c35_ --> CIRTH_35
|
87
|
+
_c36_ --> CIRTH_36
|
88
|
+
_c37_ --> CIRTH_37
|
89
|
+
_c38_ --> CIRTH_38
|
90
|
+
_c39_ --> CIRTH_39
|
91
|
+
_c40_ --> CIRTH_40
|
92
|
+
_c41_ --> CIRTH_41
|
93
|
+
_c42_ --> CIRTH_42
|
94
|
+
_c43_ --> CIRTH_43
|
95
|
+
_c44_ --> CIRTH_44
|
96
|
+
_c45_ --> CIRTH_45
|
97
|
+
_c46_ --> CIRTH_46
|
98
|
+
_c47_ --> CIRTH_47
|
99
|
+
_c48_ --> CIRTH_48
|
100
|
+
_c49_ --> CIRTH_49
|
101
|
+
_c50_ --> CIRTH_50
|
102
|
+
_c51_ --> CIRTH_51
|
103
|
+
_c52_ --> CIRTH_52
|
104
|
+
_c53_ --> CIRTH_53
|
105
|
+
_c54_ --> CIRTH_54
|
106
|
+
_c55_ --> CIRTH_55
|
107
|
+
_c56_ --> CIRTH_56
|
108
|
+
_c57_ --> CIRTH_57
|
109
|
+
_c58_ --> CIRTH_58
|
110
|
+
_c59_ --> CIRTH_59
|
111
|
+
_c60_ --> CIRTH_60
|
112
|
+
|
113
|
+
_c38alt_ --> CIRTH_38_ALT
|
114
|
+
_c45alt_ --> CIRTH_45_ALT
|
115
|
+
_c51alt_ --> CIRTH_51_ALT
|
116
|
+
_c52alt_ --> CIRTH_52_ALT
|
117
|
+
_c55alt_ --> CIRTH_55_ALT
|
118
|
+
_c56alt_ --> CIRTH_56_ALT
|
119
|
+
|
120
|
+
_ce1_ --> CIRTH_EREB_1
|
121
|
+
_ce2_ --> CIRTH_EREB_2
|
122
|
+
_ce3_ --> CIRTH_EREB_3
|
123
|
+
_ce4_ --> CIRTH_EREB_4
|
124
|
+
_ce5_ --> CIRTH_EREB_5
|
125
|
+
_ce6_ --> CIRTH_EREB_6
|
126
|
+
_ce7_ --> CIRTH_EREB_7
|
127
|
+
|
128
|
+
_1_ --> CIRTH_NUMERAL_1
|
129
|
+
_2_ --> CIRTH_NUMERAL_2
|
130
|
+
_3_ --> CIRTH_NUMERAL_3
|
131
|
+
_4_ --> CIRTH_NUMERAL_4
|
132
|
+
_5_ --> CIRTH_NUMERAL_5
|
133
|
+
|
134
|
+
_{UNDERSCORE}_ --> TEHTA_UNDERLINE
|
135
|
+
_sdot_ --> TEHTA_SUB_DOT
|
136
|
+
_(^,circ)_ --> TEHTA_CIRCUM
|
137
|
+
|
138
|
+
{NBSP} --> NBSP
|
139
|
+
\end
|
140
|
+
|
141
|
+
\beg rules punctuation
|
142
|
+
, --> CIRTH_PUNCT_DOT
|
143
|
+
. --> CIRTH_PUNCT_MID_DOT
|
144
|
+
(..,:) --> CIRTH_PUNCT_TWO_DOTS
|
145
|
+
... --> CIRTH_PUNCT_THREE_DOTS
|
146
|
+
(....,::) --> CIRTH_PUNCT_FOUR_DOTS
|
147
|
+
\end
|
148
|
+
|
149
|
+
\end
|
150
|
+
|
151
|
+
\beg postprocessor
|
152
|
+
\resolve_virtuals
|
153
|
+
\end
|
154
|
+
|
data/lib/api/charset_parser.rb
CHANGED
@@ -64,7 +64,13 @@ module Glaemscribe
|
|
64
64
|
virtual_element.gpath("class").each { |class_element|
|
65
65
|
vc = Charset::VirtualChar::VirtualClass.new
|
66
66
|
vc.target = class_element.args[0]
|
67
|
-
vc.triggers = class_element.args[1..-1].map{|cname| cname.strip }.reject{ |cname| cname.empty? }
|
67
|
+
vc.triggers = class_element.args[1..-1].map{|cname| cname.strip }.reject{ |cname| cname.empty? }
|
68
|
+
|
69
|
+
# Allow triggers to be defined inside the body of the class element
|
70
|
+
text_lines = class_element.children.select { |c| c.text? }.map{ |c| c.args.first}
|
71
|
+
inner_triggers = text_lines.join(" ").split(/\s/).select{ |e| e != '' }
|
72
|
+
vc.triggers += inner_triggers
|
73
|
+
|
68
74
|
classes << vc
|
69
75
|
}
|
70
76
|
virtual_element.gpath("reversed").each { |reversed_element|
|
data/lib/api/mode.rb
CHANGED
@@ -22,6 +22,23 @@
|
|
22
22
|
|
23
23
|
module Glaemscribe
|
24
24
|
module API
|
25
|
+
|
26
|
+
class ModeDebugContext
|
27
|
+
attr_accessor :preprocessor_output,
|
28
|
+
:processor_pathes,
|
29
|
+
:processor_output,
|
30
|
+
:postprocessor_output,
|
31
|
+
:tts_output
|
32
|
+
|
33
|
+
def initialize
|
34
|
+
@preprocessor_output = ""
|
35
|
+
@processor_pathes = []
|
36
|
+
@processor_output = []
|
37
|
+
@postprocessor_output = ""
|
38
|
+
@tts_output = ""
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
25
42
|
class Mode
|
26
43
|
|
27
44
|
attr_accessor :errors
|
@@ -46,8 +63,6 @@ module Glaemscribe
|
|
46
63
|
|
47
64
|
attr_reader :latest_option_values
|
48
65
|
|
49
|
-
|
50
|
-
|
51
66
|
def initialize(name)
|
52
67
|
@name = name
|
53
68
|
@errors = []
|
@@ -141,13 +156,14 @@ module Glaemscribe
|
|
141
156
|
@raw_mode = loaded_raw_mode.deep_clone
|
142
157
|
end
|
143
158
|
|
144
|
-
def strict_transcribe(content, charset
|
159
|
+
def strict_transcribe(content, charset, debug_context)
|
145
160
|
charset = default_charset if !charset
|
146
161
|
return false, "*** No charset usable for transcription. Failed!" if !charset
|
147
162
|
|
148
163
|
if has_tts
|
149
164
|
begin
|
150
165
|
content = TTS.ipa(content, @current_tts_voice, (raw_mode != nil) )['ipa']
|
166
|
+
debug_context.tts_output += content
|
151
167
|
rescue StandardError => e
|
152
168
|
return false, "TTS pre-transcription failed : #{e}."
|
153
169
|
end
|
@@ -160,9 +176,16 @@ module Glaemscribe
|
|
160
176
|
l[-1] = ""
|
161
177
|
restore_lf = true
|
162
178
|
end
|
179
|
+
|
163
180
|
l = @pre_processor.apply(l)
|
164
|
-
l
|
181
|
+
debug_context.preprocessor_output += l + "\n"
|
182
|
+
|
183
|
+
l = @processor.apply(l, debug_context)
|
184
|
+
debug_context.processor_output += l
|
185
|
+
|
165
186
|
l = @post_processor.apply(l, charset)
|
187
|
+
debug_context.postprocessor_output += l + "\n"
|
188
|
+
|
166
189
|
l += "\n" if restore_lf
|
167
190
|
l
|
168
191
|
}.join
|
@@ -170,32 +193,34 @@ module Glaemscribe
|
|
170
193
|
end
|
171
194
|
|
172
195
|
def transcribe(content, charset = nil)
|
196
|
+
debug_context = ModeDebugContext.new
|
173
197
|
if raw_mode
|
174
198
|
chunks = content.split(/({{.*?}})/m)
|
175
199
|
ret = ''
|
176
200
|
res = true
|
177
201
|
chunks.each{ |c|
|
178
202
|
if c =~ /{{(.*?)}}/m
|
179
|
-
succ, r = raw_mode.strict_transcribe($1,charset)
|
203
|
+
succ, r = raw_mode.strict_transcribe($1, charset, debug_context)
|
180
204
|
|
181
205
|
if !succ
|
182
|
-
return false, r # Propagate error
|
206
|
+
return false, r, debug_context # Propagate error
|
183
207
|
end
|
184
208
|
|
185
209
|
ret += r
|
186
210
|
else
|
187
|
-
succ, r = strict_transcribe(c,charset)
|
211
|
+
succ, r = strict_transcribe(c,charset,debug_context)
|
188
212
|
|
189
213
|
if !succ
|
190
|
-
return false, r # Propagate error
|
214
|
+
return false, r, debug_context # Propagate error
|
191
215
|
end
|
192
216
|
|
193
217
|
ret += r
|
194
218
|
end
|
195
219
|
}
|
196
|
-
return res,ret
|
220
|
+
return res, ret, debug_context
|
197
221
|
else
|
198
|
-
strict_transcribe(content,charset)
|
222
|
+
succ, r = strict_transcribe(content, charset, debug_context)
|
223
|
+
return succ, r, debug_context
|
199
224
|
end
|
200
225
|
end
|
201
226
|
|
data/lib/api/mode_parser.rb
CHANGED
@@ -83,7 +83,11 @@ module Glaemscribe
|
|
83
83
|
doc.root_node.gpath("preprocessor.if").each{ |e| validate_presence_of_args(e, 1) }
|
84
84
|
doc.root_node.gpath("preprocessor.elsif").each{ |e| validate_presence_of_args(e, 1) }
|
85
85
|
doc.root_node.gpath("postprocessor.if").each{ |e| validate_presence_of_args(e, 1) }
|
86
|
-
doc.root_node.gpath("postprocessor.elsif").each{ |e| validate_presence_of_args(e, 1) }
|
86
|
+
doc.root_node.gpath("postprocessor.elsif").each{ |e| validate_presence_of_args(e, 1) }
|
87
|
+
|
88
|
+
doc.root_node.children.each { |c|
|
89
|
+
@mode.errors << Glaeml::Error.new(c.line, "'if' conditions are not allowed in that scope.") if c.name == 'if'
|
90
|
+
}
|
87
91
|
end
|
88
92
|
|
89
93
|
def create_if_cond_for_if_term(line, if_term, cond)
|
@@ -245,7 +249,7 @@ module Glaemscribe
|
|
245
249
|
if !operator_class
|
246
250
|
@mode.errors << Glaeml::Error.new(element.line,"Operator #{operator_name} is unknown.")
|
247
251
|
else
|
248
|
-
term.operators << operator_class.new(element.clone)
|
252
|
+
term.operators << operator_class.new(@mode, element.clone)
|
249
253
|
end
|
250
254
|
}
|
251
255
|
|
@@ -391,22 +395,27 @@ module Glaemscribe
|
|
391
395
|
}
|
392
396
|
traverse_if_tree(processor_context, text_procedure, element_procedure )
|
393
397
|
}
|
394
|
-
|
395
|
-
|
398
|
+
|
396
399
|
espeak_option = @mode.options['espeak_voice']
|
397
400
|
if espeak_option
|
398
401
|
# Singleton lazy load the TTS engine
|
399
402
|
# If the mode relies on espeak
|
400
|
-
TTS::load_engine
|
401
403
|
@mode.has_tts = true
|
402
404
|
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
if
|
407
|
-
|
408
|
-
|
409
|
-
|
405
|
+
begin
|
406
|
+
TTS::load_engine
|
407
|
+
|
408
|
+
# Check if all voices are supported
|
409
|
+
espeak_option.values.keys.each { |vname|
|
410
|
+
voice = TTS::option_name_to_voice(vname)
|
411
|
+
if !(TTS::voice_list.include? voice)
|
412
|
+
@mode.errors << Glaeml::Error.new(espeak_option.line, "Option has unhandled voice #{voice}.")
|
413
|
+
end
|
414
|
+
}
|
415
|
+
rescue
|
416
|
+
@mode.errors << Glaeml::Error.new(espeak_option.line, "Failed to load TTS engine.")
|
417
|
+
end
|
418
|
+
|
410
419
|
end
|
411
420
|
|
412
421
|
@mode.finalize(mode_options) if !@mode.errors.any?
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
# the transcription of texts between writing systems, and more
|
5
|
+
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
# invented languages to some of his devised writing systems.
|
7
|
+
#
|
8
|
+
# Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
#
|
10
|
+
# This program is free software: you can redistribute it and/or modify
|
11
|
+
# it under the terms of the GNU Affero General Public License as published by
|
12
|
+
# the Free Software Foundation, either version 3 of the License, or
|
13
|
+
# any later version.
|
14
|
+
#
|
15
|
+
# This program is distributed in the hope that it will be useful,
|
16
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
# GNU Affero General Public License for more details.
|
19
|
+
#
|
20
|
+
# You should have received a copy of the GNU Affero General Public License
|
21
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
|
23
|
+
# A post processor operator to replace the out_space on the fly.
|
24
|
+
# This has the same effect as the \outspace parameter
|
25
|
+
# But can be included in the postprocessor and benefit from the if/then logic
|
26
|
+
|
27
|
+
module Glaemscribe
|
28
|
+
module API
|
29
|
+
|
30
|
+
class OutspacePostProcessorOperator < PostProcessorOperator
|
31
|
+
def initialize(mode, glaeml_element)
|
32
|
+
super(mode, glaeml_element)
|
33
|
+
@out_space = @mode.post_processor.out_space = glaeml_element.args[0].split.reject{|token| token.empty? }
|
34
|
+
end
|
35
|
+
|
36
|
+
def apply(tokens, charset)
|
37
|
+
@mode.post_processor.out_space = @out_space
|
38
|
+
tokens
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
ResourceManager::register_post_processor_class("outspace", OutspacePostProcessorOperator)
|
43
|
+
end
|
44
|
+
end
|
data/lib/api/rule_group.rb
CHANGED
@@ -138,7 +138,7 @@ module Glaemscribe
|
|
138
138
|
var_value = apply_vars(term.line, var_value_ex, true)
|
139
139
|
|
140
140
|
if !var_value
|
141
|
-
@mode.errors << Glaeml::Error.new(term.line, "Thus, variable {#{
|
141
|
+
@mode.errors << Glaeml::Error.new(term.line, "Thus, variable {#{arg_name}} could not be declared.")
|
142
142
|
end
|
143
143
|
end
|
144
144
|
|