glaemscribe 1.1.14 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/bin/glaemscribe +21 -17
- data/glaemresources/charsets/cirth_ds.cst +540 -0
- data/glaemresources/charsets/eldamar.cst +210 -0
- data/glaemresources/charsets/sarati_eldamar.cst +256 -0
- data/glaemresources/charsets/tengwar_ds_annatar.cst +2868 -0
- data/glaemresources/charsets/tengwar_ds_eldamar.cst +2729 -0
- data/glaemresources/charsets/tengwar_ds_elfica.cst +2742 -0
- data/glaemresources/charsets/tengwar_ds_parmaite.cst +2726 -0
- data/glaemresources/charsets/tengwar_ds_sindarin.cst +2722 -0
- data/glaemresources/charsets/tengwar_freemono.cst +217 -0
- data/glaemresources/charsets/tengwar_guni_annatar.cst +2948 -0
- data/glaemresources/charsets/tengwar_guni_eldamar.cst +2809 -0
- data/glaemresources/charsets/tengwar_guni_elfica.cst +2809 -0
- data/glaemresources/charsets/tengwar_guni_parmaite.cst +2813 -0
- data/glaemresources/charsets/tengwar_guni_sindarin.cst +2808 -0
- data/glaemresources/charsets/tengwar_telcontar.cst +225 -0
- data/glaemresources/charsets/unicode_gothic.cst +64 -0
- data/glaemresources/charsets/unicode_runes.cst +121 -0
- data/glaemresources/modes/{adunaic.glaem → adunaic-tengwar-glaemscrafu.glaem} +14 -2
- data/glaemresources/modes/{blackspeech.glaem → blackspeech-tengwar-general_use.glaem} +13 -3
- data/glaemresources/modes/english-cirth-espeak.glaem +687 -0
- data/glaemresources/modes/english-tengwar-espeak.glaem +814 -0
- data/glaemresources/modes/japanese-tengwar.glaem +776 -0
- data/glaemresources/modes/{khuzdul.glaem → khuzdul-cirth-moria.glaem} +4 -1
- data/glaemresources/modes/lang_belta-tengwar-dadef.glaem +248 -0
- data/glaemresources/modes/{futhorc.glaem → old_english-futhorc.glaem} +0 -0
- data/glaemresources/modes/{mercian.glaem → old_english-tengwar-mercian.glaem} +22 -12
- data/glaemresources/modes/{westsaxon.glaem → old_english-tengwar-westsaxon.glaem} +20 -11
- data/glaemresources/modes/{futhark-runicus.glaem → old_norse-futhark-runicus.glaem} +0 -0
- data/glaemresources/modes/{futhark-younger.glaem → old_norse-futhark-younger.glaem} +0 -0
- data/glaemresources/modes/{quenya.glaem → quenya-tengwar-classical.glaem} +32 -50
- data/glaemresources/modes/raw-cirth.glaem +154 -0
- data/glaemresources/modes/raw-tengwar.glaem +46 -23
- data/glaemresources/modes/{rlyehian.glaem → rlyehian-tengwar.glaem} +14 -3
- data/glaemresources/modes/{sindarin-daeron.glaem → sindarin-cirth-daeron.glaem} +55 -14
- data/glaemresources/modes/{sindarin-beleriand.glaem → sindarin-tengwar-beleriand.glaem} +154 -28
- data/glaemresources/modes/{sindarin.glaem → sindarin-tengwar-general_use.glaem} +86 -25
- data/glaemresources/modes/{telerin.glaem → telerin-tengwar-glaemscrafu.glaem} +16 -6
- data/glaemresources/modes/{westron.glaem → westron-tengwar-glaemscrafu.glaem} +18 -8
- data/lib/api/charset.rb +67 -7
- data/lib/api/charset_parser.rb +14 -1
- data/lib/api/constants.rb +3 -4
- data/lib/api/fragment.rb +26 -5
- data/lib/api/if_tree.rb +70 -8
- data/lib/api/macro.rb +40 -0
- data/lib/api/mode.rb +66 -19
- data/lib/api/mode_parser.rb +117 -14
- data/lib/api/object_additions.rb +23 -1
- data/lib/api/option.rb +17 -2
- data/lib/api/post_processor/outspace.rb +44 -0
- data/lib/api/post_processor/resolve_virtuals.rb +25 -9
- data/lib/api/resource_manager.rb +1 -0
- data/lib/api/rule_group.rb +170 -26
- data/lib/api/sheaf_chain_iterator.rb +1 -1
- data/lib/api/transcription_pre_post_processor.rb +8 -5
- data/lib/api/transcription_processor.rb +15 -12
- data/lib/api/tts.rb +51 -0
- data/lib/glaemscribe.rb +36 -31
- data/lib_espeak/espeakng.for.glaemscribe.nowasm.sync.js +35 -0
- data/lib_espeak/glaemscribe_tts.js +505 -0
- metadata +76 -24
@@ -0,0 +1,505 @@
|
|
1
|
+
/*
|
2
|
+
|
3
|
+
Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
the transcription of texts between writing systems, and more
|
5
|
+
specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
invented languages to some of his devised writing systems.
|
7
|
+
|
8
|
+
Copyright (C) 2015-2020 Benjamin Babut (Talagan).
|
9
|
+
|
10
|
+
This program is free software: you can redistribute it and/or modify
|
11
|
+
it under the terms of the GNU Affero General Public License as published by
|
12
|
+
the Free Software Foundation, either version 3 of the License, or
|
13
|
+
any later version.
|
14
|
+
|
15
|
+
This program is distributed in the hope that it will be useful,
|
16
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
GNU Affero General Public License for more details.
|
19
|
+
|
20
|
+
You should have received a copy of the GNU Affero General Public License
|
21
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
|
23
|
+
*/
|
24
|
+
|
25
|
+
|
26
|
+
// A wrapper around espeak to perform various TTS tasks,
|
27
|
+
// and generate IPA and/or WAV while keeping punctuation signs or cleaning them up.
|
28
|
+
//
|
29
|
+
// Espeak does not have this feature, so this is a significantly dirty hack.
|
30
|
+
//
|
31
|
+
// Additionally we perform a few glaemscribe-specific tasks, such as preserving raw tengwar
|
32
|
+
// or numbers which are treated independently.
|
33
|
+
|
34
|
+
// For the ruby loader, define the Glaemscribe module.
|
35
|
+
Glaemscribe = (typeof(Glaemscribe) === 'undefined')?({}):(Glaemscribe);
|
36
|
+
|
37
|
+
Glaemscribe.TTS = function() {
|
38
|
+
|
39
|
+
var client = this;
|
40
|
+
client.proxy = new ESpeakNGGlue();
|
41
|
+
}
|
42
|
+
|
43
|
+
Glaemscribe.TTS.ipa_configurations = {
|
44
|
+
'en-tengwar': {
|
45
|
+
|
46
|
+
punct_token: '', // Invariant, for punctuation
|
47
|
+
block_token: '', // Invariant, for special blocks (nums / raw tengwar)
|
48
|
+
|
49
|
+
// Replace by special token AND KEEP when calculating ipa
|
50
|
+
clauseaffecting_punctuation: "!.,;:!?–—",
|
51
|
+
// Replace by special token but do not keep when calculating ipa
|
52
|
+
// For those signs : '’ : apostrophes should stay in the original text !!! Don't break liz's bag !!
|
53
|
+
// Apostrophes shouldn't trigger a pause in the prononciation (e.g. genitives, I've, don't etc)
|
54
|
+
// But apostrophe and single quote are the same thing.
|
55
|
+
// It's necessary to document that single quotes should then be avoided.
|
56
|
+
clauseunaffecting_punctuation: "·“”«»-[](){}⟨⟩<>≤≥$|\""
|
57
|
+
}
|
58
|
+
}
|
59
|
+
|
60
|
+
Glaemscribe.TTS.ipa_configurations['en-tengwar'] = Glaemscribe.TTS.ipa_configurations['en-tengwar'];
|
61
|
+
Glaemscribe.TTS.ipa_configurations['en-tengwar-rp'] = Glaemscribe.TTS.ipa_configurations['en-tengwar'];
|
62
|
+
Glaemscribe.TTS.ipa_configurations['en-tengwar-gb'] = Glaemscribe.TTS.ipa_configurations['en-tengwar'];
|
63
|
+
Glaemscribe.TTS.ipa_configurations['en-tengwar-us'] = Glaemscribe.TTS.ipa_configurations['en-tengwar'];
|
64
|
+
|
65
|
+
|
66
|
+
Glaemscribe.TTS.voice_list = function(voice) {
|
67
|
+
return Object.keys(Glaemscribe.TTS.ipa_configurations);
|
68
|
+
}
|
69
|
+
|
70
|
+
// Static helper. To be used in pure js (not ruby).
|
71
|
+
Glaemscribe.TTS.option_name_to_voice = function(oname) {
|
72
|
+
|
73
|
+
if(!oname)
|
74
|
+
return null;
|
75
|
+
|
76
|
+
return oname.toLowerCase().replace(/^espeak_voice_/,'').replace(/_/g,'-');
|
77
|
+
}
|
78
|
+
|
79
|
+
|
80
|
+
Glaemscribe.TTS.prototype.make_char_checker = function(string){
|
81
|
+
var cc = {};
|
82
|
+
for(var i=0;i<string.length;i++)
|
83
|
+
{
|
84
|
+
cc[string[i]] = string[i];
|
85
|
+
}
|
86
|
+
return cc;
|
87
|
+
}
|
88
|
+
|
89
|
+
Glaemscribe.TTS.prototype.isSpace = function(a) {
|
90
|
+
return (a == ' ' || a == '\t');
|
91
|
+
}
|
92
|
+
|
93
|
+
Glaemscribe.TTS.prototype.read_cap_token = function(text, starti, cap_checker) {
|
94
|
+
|
95
|
+
var client = this
|
96
|
+
var i = starti;
|
97
|
+
var tok = ""
|
98
|
+
|
99
|
+
if(cap_checker[text[i]] == null)
|
100
|
+
return null;
|
101
|
+
|
102
|
+
i++;
|
103
|
+
|
104
|
+
// Advance the sequence
|
105
|
+
for(; i<text.length; i++) {
|
106
|
+
if( (cap_checker[text[i]] == null) && !client.isSpace(text[i])) {
|
107
|
+
break;
|
108
|
+
}
|
109
|
+
}
|
110
|
+
|
111
|
+
// Rewind trailing spaces
|
112
|
+
var toklen = i - starti;
|
113
|
+
|
114
|
+
for(i = starti + toklen - 1; i>=starti ; i--) {
|
115
|
+
if(client.isSpace(text[i]))
|
116
|
+
toklen--;
|
117
|
+
else
|
118
|
+
break;
|
119
|
+
}
|
120
|
+
|
121
|
+
return text.substring(starti,starti+toklen);
|
122
|
+
};
|
123
|
+
|
124
|
+
Glaemscribe.TTS.prototype.preceded_by_space = function(text,i) {
|
125
|
+
var client = this;
|
126
|
+
|
127
|
+
if(i <= 0)
|
128
|
+
return false;
|
129
|
+
else
|
130
|
+
return client.isSpace(text[i-1]);
|
131
|
+
}
|
132
|
+
|
133
|
+
Glaemscribe.TTS.prototype.succeeded_by_space = function(text,i) {
|
134
|
+
var client = this;
|
135
|
+
|
136
|
+
if(i >= text.length-1)
|
137
|
+
return false;
|
138
|
+
else
|
139
|
+
return client.isSpace(text[i+1]);
|
140
|
+
}
|
141
|
+
|
142
|
+
// Escapes raw mode AND numbers
|
143
|
+
Glaemscribe.TTS.prototype.escape_special_blocks = function(voice, entry, for_ipa) {
|
144
|
+
|
145
|
+
var config = Glaemscribe.TTS.ipa_configurations[voice];
|
146
|
+
|
147
|
+
// TODO : make this configurable
|
148
|
+
|
149
|
+
// Tonekize raw_mode escaping + numbers, we don't want them to be converted in IPA
|
150
|
+
// Also, keep numbers in the writing, to prevent espeak from pronuncing them
|
151
|
+
var ipaexpr = /(\s*)({{[\s\S]*?}}|\b[0-9][0-9\s]*\b)(\s*)/g;
|
152
|
+
var wavexpr = /(\s*)({{[\s\S]*?}})(\s*)/g;
|
153
|
+
var rawgexp = (for_ipa)?(ipaexpr):(wavexpr);
|
154
|
+
|
155
|
+
var captured = [];
|
156
|
+
|
157
|
+
var ret = entry.replace(rawgexp, function(match,p1,p2,p3) {
|
158
|
+
captured.push(match);
|
159
|
+
if(!for_ipa)
|
160
|
+
return ' '; // For wav, just replace by empty space and do not pronunce.
|
161
|
+
else {
|
162
|
+
return p1 + config['block_token'] + p3; // For IPA, replace by dummy token.
|
163
|
+
}
|
164
|
+
});
|
165
|
+
|
166
|
+
return [ret, captured];
|
167
|
+
}
|
168
|
+
|
169
|
+
|
170
|
+
Glaemscribe.TTS.prototype.ipa_instrument_punct = function(voice, text) {
|
171
|
+
|
172
|
+
var client = this;
|
173
|
+
var config = Glaemscribe.TTS.ipa_configurations[voice];
|
174
|
+
|
175
|
+
var cap = client.make_char_checker(config['clauseaffecting_punctuation']);
|
176
|
+
var cup = client.make_char_checker(config['clauseunaffecting_punctuation']);
|
177
|
+
|
178
|
+
var accum = "";
|
179
|
+
var kept_signs = [];
|
180
|
+
|
181
|
+
var rescap = null;
|
182
|
+
|
183
|
+
for(var i=0;i<text.length;i++)
|
184
|
+
{
|
185
|
+
if(text[i] == "\n")
|
186
|
+
{
|
187
|
+
accum += config['punct_token'];
|
188
|
+
kept_signs.push(text[i]);
|
189
|
+
}
|
190
|
+
else if(cup[text[i]] != null)
|
191
|
+
{
|
192
|
+
// This sign does not affect clause analysis by espeak.
|
193
|
+
// Replace the sign by a special "word" / token AND REMOVE the sign
|
194
|
+
// We will restore it after IPA calculation.
|
195
|
+
accum += " " + config['punct_token'] + " " ;
|
196
|
+
kept_signs.push(
|
197
|
+
((client.preceded_by_space(text,i))?(" "):("")) +
|
198
|
+
text[i] +
|
199
|
+
((client.succeeded_by_space(text,i))?(" "):(""))
|
200
|
+
);
|
201
|
+
}
|
202
|
+
else if(rescap = client.read_cap_token(text,i,cap))
|
203
|
+
{
|
204
|
+
// This punctuation sign affects clause analysis.
|
205
|
+
// Replace the sign by a special "word" / token AND keep the sign
|
206
|
+
// Always insert spaces, but remember how they were placed
|
207
|
+
accum += " " + text[i] + " " + config['punct_token'] + " " ;
|
208
|
+
kept_signs.push(
|
209
|
+
((client.preceded_by_space(text, i))?(" "):("")) +
|
210
|
+
rescap +
|
211
|
+
((client.succeeded_by_space(text, i + rescap.length - 1))?(" "):(""))
|
212
|
+
);
|
213
|
+
i += rescap.length - 1;
|
214
|
+
}
|
215
|
+
else
|
216
|
+
{
|
217
|
+
accum += text[i];
|
218
|
+
}
|
219
|
+
}
|
220
|
+
|
221
|
+
return [accum, kept_signs];
|
222
|
+
}
|
223
|
+
|
224
|
+
Glaemscribe.TTS.prototype.wav_instrument_punct = function(voice, text) {
|
225
|
+
|
226
|
+
var client = this;
|
227
|
+
var config = Glaemscribe.TTS.ipa_configurations[voice];
|
228
|
+
var cap = client.make_char_checker(config['clauseaffecting_punctuation']);
|
229
|
+
var accum = "";
|
230
|
+
var rescap = null;
|
231
|
+
|
232
|
+
for(var i=0;i<text.length;i++)
|
233
|
+
{
|
234
|
+
if(rescap = client.read_cap_token(text,i,cap))
|
235
|
+
{
|
236
|
+
accum += text[i]; // Just keep the first sign, ignore the others
|
237
|
+
i += rescap.length - 1;
|
238
|
+
}
|
239
|
+
else
|
240
|
+
{
|
241
|
+
accum += text[i];
|
242
|
+
}
|
243
|
+
}
|
244
|
+
|
245
|
+
return accum;
|
246
|
+
}
|
247
|
+
|
248
|
+
Glaemscribe.TTS.prototype.ipa_instrument_blocks = function(voice, text)
|
249
|
+
{
|
250
|
+
var client = this;
|
251
|
+
var config = Glaemscribe.TTS.ipa_configurations[voice];
|
252
|
+
|
253
|
+
return this.escape_special_blocks(voice, text, true);
|
254
|
+
}
|
255
|
+
|
256
|
+
Glaemscribe.TTS.prototype.ipa_restore_tokens = function(text, token, kept_tokens) {
|
257
|
+
|
258
|
+
var rx = new RegExp("\\s*(" + token + ")\\s*","g");
|
259
|
+
|
260
|
+
var nth = -1;
|
261
|
+
text = text.replace(rx,function(match, contents, offset, s) {
|
262
|
+
nth += 1;
|
263
|
+
return kept_tokens[nth];
|
264
|
+
});
|
265
|
+
|
266
|
+
return text;
|
267
|
+
}
|
268
|
+
|
269
|
+
Glaemscribe.TTS.prototype.post_ipa = function(voice, ipa, pre_ipa_res) {
|
270
|
+
|
271
|
+
var client = this;
|
272
|
+
var config = Glaemscribe.TTS.ipa_configurations[voice];
|
273
|
+
ipa = ipa.replace(/\n/g, " ");
|
274
|
+
|
275
|
+
ipa = client.ipa_restore_tokens(ipa, config.punct_token, pre_ipa_res.punct_tokens);
|
276
|
+
ipa = client.ipa_restore_tokens(ipa, config.block_token, pre_ipa_res.block_tokens);
|
277
|
+
|
278
|
+
// Post-treatment of anti 'dot' pronounciation hack
|
279
|
+
if(ipa[ipa.length-1] === "\n")
|
280
|
+
ipa = ipa.slice(0,-1);
|
281
|
+
|
282
|
+
return ipa
|
283
|
+
}
|
284
|
+
|
285
|
+
|
286
|
+
Glaemscribe.TTS.prototype.pre_ipa = function(args, voice, text) {
|
287
|
+
|
288
|
+
var client = this;
|
289
|
+
var config = Glaemscribe.TTS.ipa_configurations[voice];
|
290
|
+
|
291
|
+
if(!config)
|
292
|
+
throw "Trying to use unsupported voice '" + voice + "'!";
|
293
|
+
|
294
|
+
// Normalize all tabs by spaces
|
295
|
+
text = text.replace(/\t/g," ");
|
296
|
+
|
297
|
+
// Small hack to prevent espeak from pronouncing last dot
|
298
|
+
// since our tokenization may isolate it.
|
299
|
+
text += "\n";
|
300
|
+
|
301
|
+
// Instrument blocks first (they may contain punctuation)
|
302
|
+
var bi = client.ipa_instrument_blocks(voice,text);
|
303
|
+
text = bi[0];
|
304
|
+
|
305
|
+
// Instrument punctuation, then
|
306
|
+
var pi = client.ipa_instrument_punct(voice,text);
|
307
|
+
text = pi[0];
|
308
|
+
|
309
|
+
// Small hack to always have a capital after a dot.
|
310
|
+
// And prevent espeak from transcribing/pronuncing "dot"
|
311
|
+
text = text.replace(/(\.\s+.)/g, function(match,p1) {
|
312
|
+
return p1.toUpperCase()
|
313
|
+
});
|
314
|
+
|
315
|
+
return {
|
316
|
+
text: text,
|
317
|
+
block_tokens: bi[1],
|
318
|
+
punct_tokens: pi[1]
|
319
|
+
}
|
320
|
+
}
|
321
|
+
|
322
|
+
Glaemscribe.TTS.prototype.pre_wav = function(args, voice, text) {
|
323
|
+
var client = this;
|
324
|
+
var config = Glaemscribe.TTS.ipa_configurations[voice];
|
325
|
+
|
326
|
+
if(!config)
|
327
|
+
throw "Trying to use unsupported voice '" + voice + "'!";
|
328
|
+
|
329
|
+
// First, escape the special blocks. Just ignore them.
|
330
|
+
if(args.has_raw_mode) {
|
331
|
+
var pre_raw_res = this.escape_special_blocks(voice, text, false);
|
332
|
+
text = pre_raw_res[0];
|
333
|
+
}
|
334
|
+
|
335
|
+
// Now simplify the punctuation to avoid problems.
|
336
|
+
text = this.wav_instrument_punct(voice, text);
|
337
|
+
|
338
|
+
return {
|
339
|
+
text: text
|
340
|
+
}
|
341
|
+
}
|
342
|
+
|
343
|
+
//////////////////
|
344
|
+
// SYNTHESIZE //
|
345
|
+
//////////////////
|
346
|
+
|
347
|
+
|
348
|
+
Glaemscribe.TTS.prototype.synthesize_ipa = function(text, args, onended) {
|
349
|
+
|
350
|
+
var client = this;
|
351
|
+
args = args || {};
|
352
|
+
var voice = args.voice || 'en-tengwar'
|
353
|
+
|
354
|
+
// Pre parse text and find raw mode things {{ ... }}
|
355
|
+
// Cache them. This will also the pre-instrumentation
|
356
|
+
// To treat each block as one word
|
357
|
+
var pipa = client.pre_ipa(args, voice, text);
|
358
|
+
text = pipa['text'];
|
359
|
+
|
360
|
+
// Now the IPA is instrumented.
|
361
|
+
// Prepare client
|
362
|
+
client.proxy.set_voice(voice);
|
363
|
+
|
364
|
+
var ts = new Date();
|
365
|
+
var ret = {};
|
366
|
+
client.proxy.synthesize(text, false, true, true, function(result) {
|
367
|
+
// Post parse ipa
|
368
|
+
result.ipa = client.post_ipa(voice, result.pho, pipa);
|
369
|
+
|
370
|
+
var te = new Date();
|
371
|
+
result.synthesis_time = (te - ts);
|
372
|
+
delete result.pho;
|
373
|
+
|
374
|
+
if(onended)
|
375
|
+
onended(result);
|
376
|
+
|
377
|
+
ret = result;
|
378
|
+
});
|
379
|
+
|
380
|
+
return ret;
|
381
|
+
}
|
382
|
+
|
383
|
+
// Should be kept separated from IPA, because we do not work on the same text
|
384
|
+
Glaemscribe.TTS.prototype.synthesize_wav = function(text, args, onended) {
|
385
|
+
|
386
|
+
var client = this;
|
387
|
+
args = args || {}
|
388
|
+
var voice = args.voice || 'en-tengwar'
|
389
|
+
|
390
|
+
// Pre-trandform text
|
391
|
+
var pwav = client.pre_wav(args, voice, text);
|
392
|
+
text = pwav['text'];
|
393
|
+
|
394
|
+
// Prepare client
|
395
|
+
client.proxy.set_rate(args.rate || 120);
|
396
|
+
client.proxy.set_pitch(args.pitch || 5);
|
397
|
+
client.proxy.set_voice(voice);
|
398
|
+
|
399
|
+
var ts = new Date();
|
400
|
+
var ret = {};
|
401
|
+
client.proxy.synthesize(text, true, false, false, function(result) {
|
402
|
+
var te = new Date();
|
403
|
+
result.synthesis_time = (te - ts);
|
404
|
+
delete result.pho;
|
405
|
+
|
406
|
+
// Uint8Array > Array conversion, for ruby?
|
407
|
+
// ret.wav = [].slice.call(ret.wav);
|
408
|
+
|
409
|
+
if(onended)
|
410
|
+
onended(result);
|
411
|
+
|
412
|
+
ret = result;
|
413
|
+
});
|
414
|
+
|
415
|
+
return ret;
|
416
|
+
}
|
417
|
+
|
418
|
+
|
419
|
+
// Below is an expirement of a parsing tool for orthographic modes.
|
420
|
+
// Not finished and probably not usable.
|
421
|
+
Glaemscribe.TTS.TokenType = {};
|
422
|
+
Glaemscribe.TTS.TokenType.WORD = 'WORD';
|
423
|
+
Glaemscribe.TTS.TokenType.NON_WORD = 'NON_WORD';
|
424
|
+
Glaemscribe.TTS.TokenType.NUM = 'NUM';
|
425
|
+
Glaemscribe.TTS.TokenType.SPACE = 'SPACE';
|
426
|
+
Glaemscribe.TTS.TokenType.PUNCT = 'PUNCT';
|
427
|
+
|
428
|
+
Glaemscribe.TTS.prototype.orthographic_disambiguator_en = function(text) {
|
429
|
+
|
430
|
+
var client = this;
|
431
|
+
|
432
|
+
var uwmatcher = /(\p{L}+)/u;
|
433
|
+
var spl = text.split(uwmatcher);
|
434
|
+
|
435
|
+
var tokens = spl.map(function(s) {
|
436
|
+
var t = {};
|
437
|
+
var is_word = s.match(uwmatcher)
|
438
|
+
|
439
|
+
t.type = (is_word)?(Glaemscribe.TTS.TokenType.WORD):(Glaemscribe.TTS.TokenType.NON_WORD);
|
440
|
+
t.content = s;
|
441
|
+
return t;
|
442
|
+
});
|
443
|
+
|
444
|
+
var tokens2 = [];
|
445
|
+
|
446
|
+
// Handle apostrophe
|
447
|
+
for(var i=0;i<tokens.length;i++) {
|
448
|
+
if( i == 0 || i == tokens.length-1 || tokens[i].type == Glaemscribe.TTS.TokenType.WORD ) {
|
449
|
+
tokens2.push(tokens[i]);
|
450
|
+
continue;
|
451
|
+
}
|
452
|
+
|
453
|
+
if(tokens[i].content == "'" &&
|
454
|
+
tokens[i-1].type == Glaemscribe.TTS.TokenType.WORD &&
|
455
|
+
tokens[i+1].type == Glaemscribe.TTS.TokenType.WORD )
|
456
|
+
{
|
457
|
+
tokens2.pop();
|
458
|
+
var tok = {};
|
459
|
+
tok.type = Glaemscribe.TTS.TokenType.WORD;
|
460
|
+
tok.content = tokens[i-1].content + tokens[i].content + tokens[i+1].content;
|
461
|
+
tokens2.push(tok);
|
462
|
+
i += 1;
|
463
|
+
}
|
464
|
+
else {
|
465
|
+
tokens2.push(tokens[i]);
|
466
|
+
}
|
467
|
+
}
|
468
|
+
tokens = tokens2;
|
469
|
+
|
470
|
+
// Numerize tokens
|
471
|
+
var i = 0;
|
472
|
+
tokens.forEach(function(t) {
|
473
|
+
t.num = i;
|
474
|
+
i += 1;
|
475
|
+
});
|
476
|
+
|
477
|
+
// Remove non-speechable tokens
|
478
|
+
var stokens = tokens.filter(function(t) {
|
479
|
+
return (t.type == Glaemscribe.TTS.TokenType.WORD);
|
480
|
+
});
|
481
|
+
|
482
|
+
// Join speachable tokens
|
483
|
+
var r = stokens.map(function(t) { return t.content}).join(' ');
|
484
|
+
|
485
|
+
var args = {};
|
486
|
+
var voice = args.voice || 'en-tengwar';
|
487
|
+
|
488
|
+
client.proxy.set_voice(voice);
|
489
|
+
client.proxy.synthesize(r, false, true, true, function(result) {
|
490
|
+
r = result.pho;
|
491
|
+
});
|
492
|
+
r = r.split('').map(function(t) { return t.trim() });
|
493
|
+
|
494
|
+
var j = 0;
|
495
|
+
r.forEach(function(w) {
|
496
|
+
tokens[stokens[j].num].ipa = r[j];
|
497
|
+
j += 1;
|
498
|
+
});
|
499
|
+
|
500
|
+
return tokens;
|
501
|
+
}
|
502
|
+
|
503
|
+
Glaemscribe.TTS.is_engine_loaded = function() {
|
504
|
+
return (typeof(ESpeakNGGlue) !== 'undefined');
|
505
|
+
};
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: glaemscribe
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Benjamin 'Talagan' Babut
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-09-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: commander
|
@@ -16,14 +16,40 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '4.
|
19
|
+
version: '4.4'
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 4.4.7
|
20
23
|
type: :runtime
|
21
24
|
prerelease: false
|
22
25
|
version_requirements: !ruby/object:Gem::Requirement
|
23
26
|
requirements:
|
24
27
|
- - "~>"
|
25
28
|
- !ruby/object:Gem::Version
|
26
|
-
version: '4.
|
29
|
+
version: '4.4'
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 4.4.7
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: mini_racer
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - "~>"
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0.2'
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: 0.2.4
|
43
|
+
type: :runtime
|
44
|
+
prerelease: false
|
45
|
+
version_requirements: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - "~>"
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: '0.2'
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: 0.2.4
|
27
53
|
description: Glǽmscribe (also written Glaemscribe) is a software dedicated to the
|
28
54
|
transcription of texts between writing systems, and more specifically dedicated
|
29
55
|
to the transcription of J.R.R. Tolkien's invented languages to some of his devised
|
@@ -36,25 +62,47 @@ extra_rdoc_files: []
|
|
36
62
|
files:
|
37
63
|
- LICENSE.txt
|
38
64
|
- bin/glaemscribe
|
39
|
-
- glaemresources/
|
40
|
-
- glaemresources/
|
41
|
-
- glaemresources/
|
42
|
-
- glaemresources/
|
43
|
-
- glaemresources/
|
65
|
+
- glaemresources/charsets/cirth_ds.cst
|
66
|
+
- glaemresources/charsets/eldamar.cst
|
67
|
+
- glaemresources/charsets/sarati_eldamar.cst
|
68
|
+
- glaemresources/charsets/tengwar_ds_annatar.cst
|
69
|
+
- glaemresources/charsets/tengwar_ds_eldamar.cst
|
70
|
+
- glaemresources/charsets/tengwar_ds_elfica.cst
|
71
|
+
- glaemresources/charsets/tengwar_ds_parmaite.cst
|
72
|
+
- glaemresources/charsets/tengwar_ds_sindarin.cst
|
73
|
+
- glaemresources/charsets/tengwar_freemono.cst
|
74
|
+
- glaemresources/charsets/tengwar_guni_annatar.cst
|
75
|
+
- glaemresources/charsets/tengwar_guni_eldamar.cst
|
76
|
+
- glaemresources/charsets/tengwar_guni_elfica.cst
|
77
|
+
- glaemresources/charsets/tengwar_guni_parmaite.cst
|
78
|
+
- glaemresources/charsets/tengwar_guni_sindarin.cst
|
79
|
+
- glaemresources/charsets/tengwar_telcontar.cst
|
80
|
+
- glaemresources/charsets/unicode_gothic.cst
|
81
|
+
- glaemresources/charsets/unicode_runes.cst
|
82
|
+
- glaemresources/modes/adunaic-tengwar-glaemscrafu.glaem
|
83
|
+
- glaemresources/modes/blackspeech-tengwar-general_use.glaem
|
84
|
+
- glaemresources/modes/english-cirth-espeak.glaem
|
85
|
+
- glaemresources/modes/english-tengwar-espeak.glaem
|
44
86
|
- glaemresources/modes/gothic.glaem
|
45
|
-
- glaemresources/modes/
|
46
|
-
- glaemresources/modes/
|
87
|
+
- glaemresources/modes/japanese-tengwar.glaem
|
88
|
+
- glaemresources/modes/khuzdul-cirth-moria.glaem
|
89
|
+
- glaemresources/modes/lang_belta-tengwar-dadef.glaem
|
90
|
+
- glaemresources/modes/old_english-futhorc.glaem
|
91
|
+
- glaemresources/modes/old_english-tengwar-mercian.glaem
|
92
|
+
- glaemresources/modes/old_english-tengwar-westsaxon.glaem
|
93
|
+
- glaemresources/modes/old_norse-futhark-runicus.glaem
|
94
|
+
- glaemresources/modes/old_norse-futhark-younger.glaem
|
47
95
|
- glaemresources/modes/quenya-sarati.glaem
|
48
|
-
- glaemresources/modes/quenya.glaem
|
96
|
+
- glaemresources/modes/quenya-tengwar-classical.glaem
|
97
|
+
- glaemresources/modes/raw-cirth.glaem
|
49
98
|
- glaemresources/modes/raw-tengwar.glaem
|
50
|
-
- glaemresources/modes/rlyehian.glaem
|
51
|
-
- glaemresources/modes/sindarin-
|
52
|
-
- glaemresources/modes/sindarin-
|
53
|
-
- glaemresources/modes/sindarin.glaem
|
54
|
-
- glaemresources/modes/telerin.glaem
|
99
|
+
- glaemresources/modes/rlyehian-tengwar.glaem
|
100
|
+
- glaemresources/modes/sindarin-cirth-daeron.glaem
|
101
|
+
- glaemresources/modes/sindarin-tengwar-beleriand.glaem
|
102
|
+
- glaemresources/modes/sindarin-tengwar-general_use.glaem
|
103
|
+
- glaemresources/modes/telerin-tengwar-glaemscrafu.glaem
|
55
104
|
- glaemresources/modes/valarin-sarati.glaem
|
56
|
-
- glaemresources/modes/westron.glaem
|
57
|
-
- glaemresources/modes/westsaxon.glaem
|
105
|
+
- glaemresources/modes/westron-tengwar-glaemscrafu.glaem
|
58
106
|
- lib/api/charset.rb
|
59
107
|
- lib/api/charset_parser.rb
|
60
108
|
- lib/api/constants.rb
|
@@ -64,10 +112,12 @@ files:
|
|
64
112
|
- lib/api/glaeml.rb
|
65
113
|
- lib/api/glaeml_shellwords.rb
|
66
114
|
- lib/api/if_tree.rb
|
115
|
+
- lib/api/macro.rb
|
67
116
|
- lib/api/mode.rb
|
68
117
|
- lib/api/mode_parser.rb
|
69
118
|
- lib/api/object_additions.rb
|
70
119
|
- lib/api/option.rb
|
120
|
+
- lib/api/post_processor/outspace.rb
|
71
121
|
- lib/api/post_processor/resolve_virtuals.rb
|
72
122
|
- lib/api/post_processor/reverse.rb
|
73
123
|
- lib/api/pre_processor/downcase.rb
|
@@ -85,12 +135,15 @@ files:
|
|
85
135
|
- lib/api/transcription_pre_post_processor.rb
|
86
136
|
- lib/api/transcription_processor.rb
|
87
137
|
- lib/api/transcription_tree_node.rb
|
138
|
+
- lib/api/tts.rb
|
88
139
|
- lib/glaemscribe.rb
|
140
|
+
- lib_espeak/espeakng.for.glaemscribe.nowasm.sync.js
|
141
|
+
- lib_espeak/glaemscribe_tts.js
|
89
142
|
homepage: https://jrrvf.com/~glaemscrafu/english/glaemscribe.html
|
90
143
|
licenses:
|
91
144
|
- AGPL-3.0
|
92
145
|
metadata: {}
|
93
|
-
post_install_message:
|
146
|
+
post_install_message:
|
94
147
|
rdoc_options: []
|
95
148
|
require_paths:
|
96
149
|
- lib
|
@@ -105,9 +158,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
105
158
|
- !ruby/object:Gem::Version
|
106
159
|
version: '0'
|
107
160
|
requirements: []
|
108
|
-
|
109
|
-
|
110
|
-
signing_key:
|
161
|
+
rubygems_version: 3.3.7
|
162
|
+
signing_key:
|
111
163
|
specification_version: 4
|
112
164
|
summary: Glǽmscribe
|
113
165
|
test_files: []
|