glaemscribe 1.1.14 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/bin/glaemscribe +21 -17
- data/glaemresources/charsets/cirth_ds.cst +540 -0
- data/glaemresources/charsets/eldamar.cst +210 -0
- data/glaemresources/charsets/sarati_eldamar.cst +256 -0
- data/glaemresources/charsets/tengwar_ds_annatar.cst +2868 -0
- data/glaemresources/charsets/tengwar_ds_eldamar.cst +2729 -0
- data/glaemresources/charsets/tengwar_ds_elfica.cst +2742 -0
- data/glaemresources/charsets/tengwar_ds_parmaite.cst +2726 -0
- data/glaemresources/charsets/tengwar_ds_sindarin.cst +2722 -0
- data/glaemresources/charsets/tengwar_freemono.cst +217 -0
- data/glaemresources/charsets/tengwar_guni_annatar.cst +2948 -0
- data/glaemresources/charsets/tengwar_guni_eldamar.cst +2809 -0
- data/glaemresources/charsets/tengwar_guni_elfica.cst +2809 -0
- data/glaemresources/charsets/tengwar_guni_parmaite.cst +2813 -0
- data/glaemresources/charsets/tengwar_guni_sindarin.cst +2808 -0
- data/glaemresources/charsets/tengwar_telcontar.cst +225 -0
- data/glaemresources/charsets/unicode_gothic.cst +64 -0
- data/glaemresources/charsets/unicode_runes.cst +121 -0
- data/glaemresources/modes/{adunaic.glaem → adunaic-tengwar-glaemscrafu.glaem} +14 -2
- data/glaemresources/modes/{blackspeech.glaem → blackspeech-tengwar-general_use.glaem} +13 -3
- data/glaemresources/modes/english-cirth-espeak.glaem +687 -0
- data/glaemresources/modes/english-tengwar-espeak.glaem +814 -0
- data/glaemresources/modes/japanese-tengwar.glaem +776 -0
- data/glaemresources/modes/{khuzdul.glaem → khuzdul-cirth-moria.glaem} +4 -1
- data/glaemresources/modes/lang_belta-tengwar-dadef.glaem +248 -0
- data/glaemresources/modes/{futhorc.glaem → old_english-futhorc.glaem} +0 -0
- data/glaemresources/modes/{mercian.glaem → old_english-tengwar-mercian.glaem} +22 -12
- data/glaemresources/modes/{westsaxon.glaem → old_english-tengwar-westsaxon.glaem} +20 -11
- data/glaemresources/modes/{futhark-runicus.glaem → old_norse-futhark-runicus.glaem} +0 -0
- data/glaemresources/modes/{futhark-younger.glaem → old_norse-futhark-younger.glaem} +0 -0
- data/glaemresources/modes/{quenya.glaem → quenya-tengwar-classical.glaem} +32 -50
- data/glaemresources/modes/raw-cirth.glaem +154 -0
- data/glaemresources/modes/raw-tengwar.glaem +46 -23
- data/glaemresources/modes/{rlyehian.glaem → rlyehian-tengwar.glaem} +14 -3
- data/glaemresources/modes/{sindarin-daeron.glaem → sindarin-cirth-daeron.glaem} +55 -14
- data/glaemresources/modes/{sindarin-beleriand.glaem → sindarin-tengwar-beleriand.glaem} +154 -28
- data/glaemresources/modes/{sindarin.glaem → sindarin-tengwar-general_use.glaem} +86 -25
- data/glaemresources/modes/{telerin.glaem → telerin-tengwar-glaemscrafu.glaem} +16 -6
- data/glaemresources/modes/{westron.glaem → westron-tengwar-glaemscrafu.glaem} +18 -8
- data/lib/api/charset.rb +67 -7
- data/lib/api/charset_parser.rb +14 -1
- data/lib/api/constants.rb +3 -4
- data/lib/api/fragment.rb +26 -5
- data/lib/api/if_tree.rb +70 -8
- data/lib/api/macro.rb +40 -0
- data/lib/api/mode.rb +66 -19
- data/lib/api/mode_parser.rb +117 -14
- data/lib/api/object_additions.rb +23 -1
- data/lib/api/option.rb +17 -2
- data/lib/api/post_processor/outspace.rb +44 -0
- data/lib/api/post_processor/resolve_virtuals.rb +25 -9
- data/lib/api/resource_manager.rb +1 -0
- data/lib/api/rule_group.rb +170 -26
- data/lib/api/sheaf_chain_iterator.rb +1 -1
- data/lib/api/transcription_pre_post_processor.rb +8 -5
- data/lib/api/transcription_processor.rb +15 -12
- data/lib/api/tts.rb +51 -0
- data/lib/glaemscribe.rb +36 -31
- data/lib_espeak/espeakng.for.glaemscribe.nowasm.sync.js +35 -0
- data/lib_espeak/glaemscribe_tts.js +505 -0
- metadata +76 -24
@@ -0,0 +1,505 @@
|
|
1
|
+
/*
|
2
|
+
|
3
|
+
Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
the transcription of texts between writing systems, and more
|
5
|
+
specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
invented languages to some of his devised writing systems.
|
7
|
+
|
8
|
+
Copyright (C) 2015-2020 Benjamin Babut (Talagan).
|
9
|
+
|
10
|
+
This program is free software: you can redistribute it and/or modify
|
11
|
+
it under the terms of the GNU Affero General Public License as published by
|
12
|
+
the Free Software Foundation, either version 3 of the License, or
|
13
|
+
any later version.
|
14
|
+
|
15
|
+
This program is distributed in the hope that it will be useful,
|
16
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
GNU Affero General Public License for more details.
|
19
|
+
|
20
|
+
You should have received a copy of the GNU Affero General Public License
|
21
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
|
23
|
+
*/
|
24
|
+
|
25
|
+
|
26
|
+
// A wrapper around espeak to perform various TTS tasks,
|
27
|
+
// and generate IPA and/or WAV while keeping punctuation signs or cleaning them up.
|
28
|
+
//
|
29
|
+
// Espeak does not have this feature, so this is a significantly dirty hack.
|
30
|
+
//
|
31
|
+
// Additionally we perform a few glaemscribe-specific tasks, such as preserving raw tengwar
|
32
|
+
// or numbers which are treated independently.
|
33
|
+
|
34
|
+
// For the ruby loader, define the Glaemscribe module.
|
35
|
+
Glaemscribe = (typeof(Glaemscribe) === 'undefined')?({}):(Glaemscribe);
|
36
|
+
|
37
|
+
Glaemscribe.TTS = function() {
|
38
|
+
|
39
|
+
var client = this;
|
40
|
+
client.proxy = new ESpeakNGGlue();
|
41
|
+
}
|
42
|
+
|
43
|
+
Glaemscribe.TTS.ipa_configurations = {
|
44
|
+
'en-tengwar': {
|
45
|
+
|
46
|
+
punct_token: '', // Invariant, for punctuation
|
47
|
+
block_token: '', // Invariant, for special blocks (nums / raw tengwar)
|
48
|
+
|
49
|
+
// Replace by special token AND KEEP when calculating ipa
|
50
|
+
clauseaffecting_punctuation: "!.,;:!?–—",
|
51
|
+
// Replace by special token but do not keep when calculating ipa
|
52
|
+
// For those signs : '’ : apostrophes should stay in the original text !!! Don't break liz's bag !!
|
53
|
+
// Apostrophes shouldn't trigger a pause in the prononciation (e.g. genitives, I've, don't etc)
|
54
|
+
// But apostrophe and single quote are the same thing.
|
55
|
+
// It's necessary to document that single quotes should then be avoided.
|
56
|
+
clauseunaffecting_punctuation: "·“”«»-[](){}⟨⟩<>≤≥$|\""
|
57
|
+
}
|
58
|
+
}
|
59
|
+
|
60
|
+
Glaemscribe.TTS.ipa_configurations['en-tengwar'] = Glaemscribe.TTS.ipa_configurations['en-tengwar'];
|
61
|
+
Glaemscribe.TTS.ipa_configurations['en-tengwar-rp'] = Glaemscribe.TTS.ipa_configurations['en-tengwar'];
|
62
|
+
Glaemscribe.TTS.ipa_configurations['en-tengwar-gb'] = Glaemscribe.TTS.ipa_configurations['en-tengwar'];
|
63
|
+
Glaemscribe.TTS.ipa_configurations['en-tengwar-us'] = Glaemscribe.TTS.ipa_configurations['en-tengwar'];
|
64
|
+
|
65
|
+
|
66
|
+
Glaemscribe.TTS.voice_list = function(voice) {
|
67
|
+
return Object.keys(Glaemscribe.TTS.ipa_configurations);
|
68
|
+
}
|
69
|
+
|
70
|
+
// Static helper. To be used in pure js (not ruby).
|
71
|
+
Glaemscribe.TTS.option_name_to_voice = function(oname) {
|
72
|
+
|
73
|
+
if(!oname)
|
74
|
+
return null;
|
75
|
+
|
76
|
+
return oname.toLowerCase().replace(/^espeak_voice_/,'').replace(/_/g,'-');
|
77
|
+
}
|
78
|
+
|
79
|
+
|
80
|
+
Glaemscribe.TTS.prototype.make_char_checker = function(string){
|
81
|
+
var cc = {};
|
82
|
+
for(var i=0;i<string.length;i++)
|
83
|
+
{
|
84
|
+
cc[string[i]] = string[i];
|
85
|
+
}
|
86
|
+
return cc;
|
87
|
+
}
|
88
|
+
|
89
|
+
Glaemscribe.TTS.prototype.isSpace = function(a) {
|
90
|
+
return (a == ' ' || a == '\t');
|
91
|
+
}
|
92
|
+
|
93
|
+
Glaemscribe.TTS.prototype.read_cap_token = function(text, starti, cap_checker) {
|
94
|
+
|
95
|
+
var client = this
|
96
|
+
var i = starti;
|
97
|
+
var tok = ""
|
98
|
+
|
99
|
+
if(cap_checker[text[i]] == null)
|
100
|
+
return null;
|
101
|
+
|
102
|
+
i++;
|
103
|
+
|
104
|
+
// Advance the sequence
|
105
|
+
for(; i<text.length; i++) {
|
106
|
+
if( (cap_checker[text[i]] == null) && !client.isSpace(text[i])) {
|
107
|
+
break;
|
108
|
+
}
|
109
|
+
}
|
110
|
+
|
111
|
+
// Rewind trailing spaces
|
112
|
+
var toklen = i - starti;
|
113
|
+
|
114
|
+
for(i = starti + toklen - 1; i>=starti ; i--) {
|
115
|
+
if(client.isSpace(text[i]))
|
116
|
+
toklen--;
|
117
|
+
else
|
118
|
+
break;
|
119
|
+
}
|
120
|
+
|
121
|
+
return text.substring(starti,starti+toklen);
|
122
|
+
};
|
123
|
+
|
124
|
+
Glaemscribe.TTS.prototype.preceded_by_space = function(text,i) {
|
125
|
+
var client = this;
|
126
|
+
|
127
|
+
if(i <= 0)
|
128
|
+
return false;
|
129
|
+
else
|
130
|
+
return client.isSpace(text[i-1]);
|
131
|
+
}
|
132
|
+
|
133
|
+
Glaemscribe.TTS.prototype.succeeded_by_space = function(text,i) {
|
134
|
+
var client = this;
|
135
|
+
|
136
|
+
if(i >= text.length-1)
|
137
|
+
return false;
|
138
|
+
else
|
139
|
+
return client.isSpace(text[i+1]);
|
140
|
+
}
|
141
|
+
|
142
|
+
// Escapes raw mode AND numbers
|
143
|
+
Glaemscribe.TTS.prototype.escape_special_blocks = function(voice, entry, for_ipa) {
|
144
|
+
|
145
|
+
var config = Glaemscribe.TTS.ipa_configurations[voice];
|
146
|
+
|
147
|
+
// TODO : make this configurable
|
148
|
+
|
149
|
+
// Tonekize raw_mode escaping + numbers, we don't want them to be converted in IPA
|
150
|
+
// Also, keep numbers in the writing, to prevent espeak from pronuncing them
|
151
|
+
var ipaexpr = /(\s*)({{[\s\S]*?}}|\b[0-9][0-9\s]*\b)(\s*)/g;
|
152
|
+
var wavexpr = /(\s*)({{[\s\S]*?}})(\s*)/g;
|
153
|
+
var rawgexp = (for_ipa)?(ipaexpr):(wavexpr);
|
154
|
+
|
155
|
+
var captured = [];
|
156
|
+
|
157
|
+
var ret = entry.replace(rawgexp, function(match,p1,p2,p3) {
|
158
|
+
captured.push(match);
|
159
|
+
if(!for_ipa)
|
160
|
+
return ' '; // For wav, just replace by empty space and do not pronunce.
|
161
|
+
else {
|
162
|
+
return p1 + config['block_token'] + p3; // For IPA, replace by dummy token.
|
163
|
+
}
|
164
|
+
});
|
165
|
+
|
166
|
+
return [ret, captured];
|
167
|
+
}
|
168
|
+
|
169
|
+
|
170
|
+
Glaemscribe.TTS.prototype.ipa_instrument_punct = function(voice, text) {
|
171
|
+
|
172
|
+
var client = this;
|
173
|
+
var config = Glaemscribe.TTS.ipa_configurations[voice];
|
174
|
+
|
175
|
+
var cap = client.make_char_checker(config['clauseaffecting_punctuation']);
|
176
|
+
var cup = client.make_char_checker(config['clauseunaffecting_punctuation']);
|
177
|
+
|
178
|
+
var accum = "";
|
179
|
+
var kept_signs = [];
|
180
|
+
|
181
|
+
var rescap = null;
|
182
|
+
|
183
|
+
for(var i=0;i<text.length;i++)
|
184
|
+
{
|
185
|
+
if(text[i] == "\n")
|
186
|
+
{
|
187
|
+
accum += config['punct_token'];
|
188
|
+
kept_signs.push(text[i]);
|
189
|
+
}
|
190
|
+
else if(cup[text[i]] != null)
|
191
|
+
{
|
192
|
+
// This sign does not affect clause analysis by espeak.
|
193
|
+
// Replace the sign by a special "word" / token AND REMOVE the sign
|
194
|
+
// We will restore it after IPA calculation.
|
195
|
+
accum += " " + config['punct_token'] + " " ;
|
196
|
+
kept_signs.push(
|
197
|
+
((client.preceded_by_space(text,i))?(" "):("")) +
|
198
|
+
text[i] +
|
199
|
+
((client.succeeded_by_space(text,i))?(" "):(""))
|
200
|
+
);
|
201
|
+
}
|
202
|
+
else if(rescap = client.read_cap_token(text,i,cap))
|
203
|
+
{
|
204
|
+
// This punctuation sign affects clause analysis.
|
205
|
+
// Replace the sign by a special "word" / token AND keep the sign
|
206
|
+
// Always insert spaces, but remember how they were placed
|
207
|
+
accum += " " + text[i] + " " + config['punct_token'] + " " ;
|
208
|
+
kept_signs.push(
|
209
|
+
((client.preceded_by_space(text, i))?(" "):("")) +
|
210
|
+
rescap +
|
211
|
+
((client.succeeded_by_space(text, i + rescap.length - 1))?(" "):(""))
|
212
|
+
);
|
213
|
+
i += rescap.length - 1;
|
214
|
+
}
|
215
|
+
else
|
216
|
+
{
|
217
|
+
accum += text[i];
|
218
|
+
}
|
219
|
+
}
|
220
|
+
|
221
|
+
return [accum, kept_signs];
|
222
|
+
}
|
223
|
+
|
224
|
+
Glaemscribe.TTS.prototype.wav_instrument_punct = function(voice, text) {
|
225
|
+
|
226
|
+
var client = this;
|
227
|
+
var config = Glaemscribe.TTS.ipa_configurations[voice];
|
228
|
+
var cap = client.make_char_checker(config['clauseaffecting_punctuation']);
|
229
|
+
var accum = "";
|
230
|
+
var rescap = null;
|
231
|
+
|
232
|
+
for(var i=0;i<text.length;i++)
|
233
|
+
{
|
234
|
+
if(rescap = client.read_cap_token(text,i,cap))
|
235
|
+
{
|
236
|
+
accum += text[i]; // Just keep the first sign, ignore the others
|
237
|
+
i += rescap.length - 1;
|
238
|
+
}
|
239
|
+
else
|
240
|
+
{
|
241
|
+
accum += text[i];
|
242
|
+
}
|
243
|
+
}
|
244
|
+
|
245
|
+
return accum;
|
246
|
+
}
|
247
|
+
|
248
|
+
Glaemscribe.TTS.prototype.ipa_instrument_blocks = function(voice, text)
|
249
|
+
{
|
250
|
+
var client = this;
|
251
|
+
var config = Glaemscribe.TTS.ipa_configurations[voice];
|
252
|
+
|
253
|
+
return this.escape_special_blocks(voice, text, true);
|
254
|
+
}
|
255
|
+
|
256
|
+
Glaemscribe.TTS.prototype.ipa_restore_tokens = function(text, token, kept_tokens) {
|
257
|
+
|
258
|
+
var rx = new RegExp("\\s*(" + token + ")\\s*","g");
|
259
|
+
|
260
|
+
var nth = -1;
|
261
|
+
text = text.replace(rx,function(match, contents, offset, s) {
|
262
|
+
nth += 1;
|
263
|
+
return kept_tokens[nth];
|
264
|
+
});
|
265
|
+
|
266
|
+
return text;
|
267
|
+
}
|
268
|
+
|
269
|
+
Glaemscribe.TTS.prototype.post_ipa = function(voice, ipa, pre_ipa_res) {
|
270
|
+
|
271
|
+
var client = this;
|
272
|
+
var config = Glaemscribe.TTS.ipa_configurations[voice];
|
273
|
+
ipa = ipa.replace(/\n/g, " ");
|
274
|
+
|
275
|
+
ipa = client.ipa_restore_tokens(ipa, config.punct_token, pre_ipa_res.punct_tokens);
|
276
|
+
ipa = client.ipa_restore_tokens(ipa, config.block_token, pre_ipa_res.block_tokens);
|
277
|
+
|
278
|
+
// Post-treatment of anti 'dot' pronounciation hack
|
279
|
+
if(ipa[ipa.length-1] === "\n")
|
280
|
+
ipa = ipa.slice(0,-1);
|
281
|
+
|
282
|
+
return ipa
|
283
|
+
}
|
284
|
+
|
285
|
+
|
286
|
+
Glaemscribe.TTS.prototype.pre_ipa = function(args, voice, text) {
|
287
|
+
|
288
|
+
var client = this;
|
289
|
+
var config = Glaemscribe.TTS.ipa_configurations[voice];
|
290
|
+
|
291
|
+
if(!config)
|
292
|
+
throw "Trying to use unsupported voice '" + voice + "'!";
|
293
|
+
|
294
|
+
// Normalize all tabs by spaces
|
295
|
+
text = text.replace(/\t/g," ");
|
296
|
+
|
297
|
+
// Small hack to prevent espeak from pronouncing last dot
|
298
|
+
// since our tokenization may isolate it.
|
299
|
+
text += "\n";
|
300
|
+
|
301
|
+
// Instrument blocks first (they may contain punctuation)
|
302
|
+
var bi = client.ipa_instrument_blocks(voice,text);
|
303
|
+
text = bi[0];
|
304
|
+
|
305
|
+
// Instrument punctuation, then
|
306
|
+
var pi = client.ipa_instrument_punct(voice,text);
|
307
|
+
text = pi[0];
|
308
|
+
|
309
|
+
// Small hack to always have a capital after a dot.
|
310
|
+
// And prevent espeak from transcribing/pronuncing "dot"
|
311
|
+
text = text.replace(/(\.\s+.)/g, function(match,p1) {
|
312
|
+
return p1.toUpperCase()
|
313
|
+
});
|
314
|
+
|
315
|
+
return {
|
316
|
+
text: text,
|
317
|
+
block_tokens: bi[1],
|
318
|
+
punct_tokens: pi[1]
|
319
|
+
}
|
320
|
+
}
|
321
|
+
|
322
|
+
Glaemscribe.TTS.prototype.pre_wav = function(args, voice, text) {
|
323
|
+
var client = this;
|
324
|
+
var config = Glaemscribe.TTS.ipa_configurations[voice];
|
325
|
+
|
326
|
+
if(!config)
|
327
|
+
throw "Trying to use unsupported voice '" + voice + "'!";
|
328
|
+
|
329
|
+
// First, escape the special blocks. Just ignore them.
|
330
|
+
if(args.has_raw_mode) {
|
331
|
+
var pre_raw_res = this.escape_special_blocks(voice, text, false);
|
332
|
+
text = pre_raw_res[0];
|
333
|
+
}
|
334
|
+
|
335
|
+
// Now simplify the punctuation to avoid problems.
|
336
|
+
text = this.wav_instrument_punct(voice, text);
|
337
|
+
|
338
|
+
return {
|
339
|
+
text: text
|
340
|
+
}
|
341
|
+
}
|
342
|
+
|
343
|
+
//////////////////
|
344
|
+
// SYNTHESIZE //
|
345
|
+
//////////////////
|
346
|
+
|
347
|
+
|
348
|
+
Glaemscribe.TTS.prototype.synthesize_ipa = function(text, args, onended) {
|
349
|
+
|
350
|
+
var client = this;
|
351
|
+
args = args || {};
|
352
|
+
var voice = args.voice || 'en-tengwar'
|
353
|
+
|
354
|
+
// Pre parse text and find raw mode things {{ ... }}
|
355
|
+
// Cache them. This will also the pre-instrumentation
|
356
|
+
// To treat each block as one word
|
357
|
+
var pipa = client.pre_ipa(args, voice, text);
|
358
|
+
text = pipa['text'];
|
359
|
+
|
360
|
+
// Now the IPA is instrumented.
|
361
|
+
// Prepare client
|
362
|
+
client.proxy.set_voice(voice);
|
363
|
+
|
364
|
+
var ts = new Date();
|
365
|
+
var ret = {};
|
366
|
+
client.proxy.synthesize(text, false, true, true, function(result) {
|
367
|
+
// Post parse ipa
|
368
|
+
result.ipa = client.post_ipa(voice, result.pho, pipa);
|
369
|
+
|
370
|
+
var te = new Date();
|
371
|
+
result.synthesis_time = (te - ts);
|
372
|
+
delete result.pho;
|
373
|
+
|
374
|
+
if(onended)
|
375
|
+
onended(result);
|
376
|
+
|
377
|
+
ret = result;
|
378
|
+
});
|
379
|
+
|
380
|
+
return ret;
|
381
|
+
}
|
382
|
+
|
383
|
+
// Should be kept separated from IPA, because we do not work on the same text
|
384
|
+
Glaemscribe.TTS.prototype.synthesize_wav = function(text, args, onended) {
|
385
|
+
|
386
|
+
var client = this;
|
387
|
+
args = args || {}
|
388
|
+
var voice = args.voice || 'en-tengwar'
|
389
|
+
|
390
|
+
// Pre-trandform text
|
391
|
+
var pwav = client.pre_wav(args, voice, text);
|
392
|
+
text = pwav['text'];
|
393
|
+
|
394
|
+
// Prepare client
|
395
|
+
client.proxy.set_rate(args.rate || 120);
|
396
|
+
client.proxy.set_pitch(args.pitch || 5);
|
397
|
+
client.proxy.set_voice(voice);
|
398
|
+
|
399
|
+
var ts = new Date();
|
400
|
+
var ret = {};
|
401
|
+
client.proxy.synthesize(text, true, false, false, function(result) {
|
402
|
+
var te = new Date();
|
403
|
+
result.synthesis_time = (te - ts);
|
404
|
+
delete result.pho;
|
405
|
+
|
406
|
+
// Uint8Array > Array conversion, for ruby?
|
407
|
+
// ret.wav = [].slice.call(ret.wav);
|
408
|
+
|
409
|
+
if(onended)
|
410
|
+
onended(result);
|
411
|
+
|
412
|
+
ret = result;
|
413
|
+
});
|
414
|
+
|
415
|
+
return ret;
|
416
|
+
}
|
417
|
+
|
418
|
+
|
419
|
+
// Below is an expirement of a parsing tool for orthographic modes.
|
420
|
+
// Not finished and probably not usable.
|
421
|
+
Glaemscribe.TTS.TokenType = {};
|
422
|
+
Glaemscribe.TTS.TokenType.WORD = 'WORD';
|
423
|
+
Glaemscribe.TTS.TokenType.NON_WORD = 'NON_WORD';
|
424
|
+
Glaemscribe.TTS.TokenType.NUM = 'NUM';
|
425
|
+
Glaemscribe.TTS.TokenType.SPACE = 'SPACE';
|
426
|
+
Glaemscribe.TTS.TokenType.PUNCT = 'PUNCT';
|
427
|
+
|
428
|
+
Glaemscribe.TTS.prototype.orthographic_disambiguator_en = function(text) {
|
429
|
+
|
430
|
+
var client = this;
|
431
|
+
|
432
|
+
var uwmatcher = /(\p{L}+)/u;
|
433
|
+
var spl = text.split(uwmatcher);
|
434
|
+
|
435
|
+
var tokens = spl.map(function(s) {
|
436
|
+
var t = {};
|
437
|
+
var is_word = s.match(uwmatcher)
|
438
|
+
|
439
|
+
t.type = (is_word)?(Glaemscribe.TTS.TokenType.WORD):(Glaemscribe.TTS.TokenType.NON_WORD);
|
440
|
+
t.content = s;
|
441
|
+
return t;
|
442
|
+
});
|
443
|
+
|
444
|
+
var tokens2 = [];
|
445
|
+
|
446
|
+
// Handle apostrophe
|
447
|
+
for(var i=0;i<tokens.length;i++) {
|
448
|
+
if( i == 0 || i == tokens.length-1 || tokens[i].type == Glaemscribe.TTS.TokenType.WORD ) {
|
449
|
+
tokens2.push(tokens[i]);
|
450
|
+
continue;
|
451
|
+
}
|
452
|
+
|
453
|
+
if(tokens[i].content == "'" &&
|
454
|
+
tokens[i-1].type == Glaemscribe.TTS.TokenType.WORD &&
|
455
|
+
tokens[i+1].type == Glaemscribe.TTS.TokenType.WORD )
|
456
|
+
{
|
457
|
+
tokens2.pop();
|
458
|
+
var tok = {};
|
459
|
+
tok.type = Glaemscribe.TTS.TokenType.WORD;
|
460
|
+
tok.content = tokens[i-1].content + tokens[i].content + tokens[i+1].content;
|
461
|
+
tokens2.push(tok);
|
462
|
+
i += 1;
|
463
|
+
}
|
464
|
+
else {
|
465
|
+
tokens2.push(tokens[i]);
|
466
|
+
}
|
467
|
+
}
|
468
|
+
tokens = tokens2;
|
469
|
+
|
470
|
+
// Numerize tokens
|
471
|
+
var i = 0;
|
472
|
+
tokens.forEach(function(t) {
|
473
|
+
t.num = i;
|
474
|
+
i += 1;
|
475
|
+
});
|
476
|
+
|
477
|
+
// Remove non-speechable tokens
|
478
|
+
var stokens = tokens.filter(function(t) {
|
479
|
+
return (t.type == Glaemscribe.TTS.TokenType.WORD);
|
480
|
+
});
|
481
|
+
|
482
|
+
// Join speachable tokens
|
483
|
+
var r = stokens.map(function(t) { return t.content}).join(' ');
|
484
|
+
|
485
|
+
var args = {};
|
486
|
+
var voice = args.voice || 'en-tengwar';
|
487
|
+
|
488
|
+
client.proxy.set_voice(voice);
|
489
|
+
client.proxy.synthesize(r, false, true, true, function(result) {
|
490
|
+
r = result.pho;
|
491
|
+
});
|
492
|
+
r = r.split('').map(function(t) { return t.trim() });
|
493
|
+
|
494
|
+
var j = 0;
|
495
|
+
r.forEach(function(w) {
|
496
|
+
tokens[stokens[j].num].ipa = r[j];
|
497
|
+
j += 1;
|
498
|
+
});
|
499
|
+
|
500
|
+
return tokens;
|
501
|
+
}
|
502
|
+
|
503
|
+
Glaemscribe.TTS.is_engine_loaded = function() {
|
504
|
+
return (typeof(ESpeakNGGlue) !== 'undefined');
|
505
|
+
};
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: glaemscribe
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Benjamin 'Talagan' Babut
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-09-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: commander
|
@@ -16,14 +16,40 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '4.
|
19
|
+
version: '4.4'
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 4.4.7
|
20
23
|
type: :runtime
|
21
24
|
prerelease: false
|
22
25
|
version_requirements: !ruby/object:Gem::Requirement
|
23
26
|
requirements:
|
24
27
|
- - "~>"
|
25
28
|
- !ruby/object:Gem::Version
|
26
|
-
version: '4.
|
29
|
+
version: '4.4'
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 4.4.7
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: mini_racer
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - "~>"
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0.2'
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: 0.2.4
|
43
|
+
type: :runtime
|
44
|
+
prerelease: false
|
45
|
+
version_requirements: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - "~>"
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: '0.2'
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: 0.2.4
|
27
53
|
description: Glǽmscribe (also written Glaemscribe) is a software dedicated to the
|
28
54
|
transcription of texts between writing systems, and more specifically dedicated
|
29
55
|
to the transcription of J.R.R. Tolkien's invented languages to some of his devised
|
@@ -36,25 +62,47 @@ extra_rdoc_files: []
|
|
36
62
|
files:
|
37
63
|
- LICENSE.txt
|
38
64
|
- bin/glaemscribe
|
39
|
-
- glaemresources/
|
40
|
-
- glaemresources/
|
41
|
-
- glaemresources/
|
42
|
-
- glaemresources/
|
43
|
-
- glaemresources/
|
65
|
+
- glaemresources/charsets/cirth_ds.cst
|
66
|
+
- glaemresources/charsets/eldamar.cst
|
67
|
+
- glaemresources/charsets/sarati_eldamar.cst
|
68
|
+
- glaemresources/charsets/tengwar_ds_annatar.cst
|
69
|
+
- glaemresources/charsets/tengwar_ds_eldamar.cst
|
70
|
+
- glaemresources/charsets/tengwar_ds_elfica.cst
|
71
|
+
- glaemresources/charsets/tengwar_ds_parmaite.cst
|
72
|
+
- glaemresources/charsets/tengwar_ds_sindarin.cst
|
73
|
+
- glaemresources/charsets/tengwar_freemono.cst
|
74
|
+
- glaemresources/charsets/tengwar_guni_annatar.cst
|
75
|
+
- glaemresources/charsets/tengwar_guni_eldamar.cst
|
76
|
+
- glaemresources/charsets/tengwar_guni_elfica.cst
|
77
|
+
- glaemresources/charsets/tengwar_guni_parmaite.cst
|
78
|
+
- glaemresources/charsets/tengwar_guni_sindarin.cst
|
79
|
+
- glaemresources/charsets/tengwar_telcontar.cst
|
80
|
+
- glaemresources/charsets/unicode_gothic.cst
|
81
|
+
- glaemresources/charsets/unicode_runes.cst
|
82
|
+
- glaemresources/modes/adunaic-tengwar-glaemscrafu.glaem
|
83
|
+
- glaemresources/modes/blackspeech-tengwar-general_use.glaem
|
84
|
+
- glaemresources/modes/english-cirth-espeak.glaem
|
85
|
+
- glaemresources/modes/english-tengwar-espeak.glaem
|
44
86
|
- glaemresources/modes/gothic.glaem
|
45
|
-
- glaemresources/modes/
|
46
|
-
- glaemresources/modes/
|
87
|
+
- glaemresources/modes/japanese-tengwar.glaem
|
88
|
+
- glaemresources/modes/khuzdul-cirth-moria.glaem
|
89
|
+
- glaemresources/modes/lang_belta-tengwar-dadef.glaem
|
90
|
+
- glaemresources/modes/old_english-futhorc.glaem
|
91
|
+
- glaemresources/modes/old_english-tengwar-mercian.glaem
|
92
|
+
- glaemresources/modes/old_english-tengwar-westsaxon.glaem
|
93
|
+
- glaemresources/modes/old_norse-futhark-runicus.glaem
|
94
|
+
- glaemresources/modes/old_norse-futhark-younger.glaem
|
47
95
|
- glaemresources/modes/quenya-sarati.glaem
|
48
|
-
- glaemresources/modes/quenya.glaem
|
96
|
+
- glaemresources/modes/quenya-tengwar-classical.glaem
|
97
|
+
- glaemresources/modes/raw-cirth.glaem
|
49
98
|
- glaemresources/modes/raw-tengwar.glaem
|
50
|
-
- glaemresources/modes/rlyehian.glaem
|
51
|
-
- glaemresources/modes/sindarin-
|
52
|
-
- glaemresources/modes/sindarin-
|
53
|
-
- glaemresources/modes/sindarin.glaem
|
54
|
-
- glaemresources/modes/telerin.glaem
|
99
|
+
- glaemresources/modes/rlyehian-tengwar.glaem
|
100
|
+
- glaemresources/modes/sindarin-cirth-daeron.glaem
|
101
|
+
- glaemresources/modes/sindarin-tengwar-beleriand.glaem
|
102
|
+
- glaemresources/modes/sindarin-tengwar-general_use.glaem
|
103
|
+
- glaemresources/modes/telerin-tengwar-glaemscrafu.glaem
|
55
104
|
- glaemresources/modes/valarin-sarati.glaem
|
56
|
-
- glaemresources/modes/westron.glaem
|
57
|
-
- glaemresources/modes/westsaxon.glaem
|
105
|
+
- glaemresources/modes/westron-tengwar-glaemscrafu.glaem
|
58
106
|
- lib/api/charset.rb
|
59
107
|
- lib/api/charset_parser.rb
|
60
108
|
- lib/api/constants.rb
|
@@ -64,10 +112,12 @@ files:
|
|
64
112
|
- lib/api/glaeml.rb
|
65
113
|
- lib/api/glaeml_shellwords.rb
|
66
114
|
- lib/api/if_tree.rb
|
115
|
+
- lib/api/macro.rb
|
67
116
|
- lib/api/mode.rb
|
68
117
|
- lib/api/mode_parser.rb
|
69
118
|
- lib/api/object_additions.rb
|
70
119
|
- lib/api/option.rb
|
120
|
+
- lib/api/post_processor/outspace.rb
|
71
121
|
- lib/api/post_processor/resolve_virtuals.rb
|
72
122
|
- lib/api/post_processor/reverse.rb
|
73
123
|
- lib/api/pre_processor/downcase.rb
|
@@ -85,12 +135,15 @@ files:
|
|
85
135
|
- lib/api/transcription_pre_post_processor.rb
|
86
136
|
- lib/api/transcription_processor.rb
|
87
137
|
- lib/api/transcription_tree_node.rb
|
138
|
+
- lib/api/tts.rb
|
88
139
|
- lib/glaemscribe.rb
|
140
|
+
- lib_espeak/espeakng.for.glaemscribe.nowasm.sync.js
|
141
|
+
- lib_espeak/glaemscribe_tts.js
|
89
142
|
homepage: https://jrrvf.com/~glaemscrafu/english/glaemscribe.html
|
90
143
|
licenses:
|
91
144
|
- AGPL-3.0
|
92
145
|
metadata: {}
|
93
|
-
post_install_message:
|
146
|
+
post_install_message:
|
94
147
|
rdoc_options: []
|
95
148
|
require_paths:
|
96
149
|
- lib
|
@@ -105,9 +158,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
105
158
|
- !ruby/object:Gem::Version
|
106
159
|
version: '0'
|
107
160
|
requirements: []
|
108
|
-
|
109
|
-
|
110
|
-
signing_key:
|
161
|
+
rubygems_version: 3.3.7
|
162
|
+
signing_key:
|
111
163
|
specification_version: 4
|
112
164
|
summary: Glǽmscribe
|
113
165
|
test_files: []
|