glaemscribe 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. checksums.yaml +4 -4
  2. data/bin/glaemscribe +2 -2
  3. data/glaemresources/charsets/cirth_ds.cst +514 -179
  4. data/glaemresources/charsets/eldamar.cst +210 -0
  5. data/glaemresources/charsets/tengwar_ds_annatar.cst +2452 -130
  6. data/glaemresources/charsets/tengwar_ds_eldamar.cst +2319 -125
  7. data/glaemresources/charsets/tengwar_ds_elfica.cst +2317 -126
  8. data/glaemresources/charsets/tengwar_ds_parmaite.cst +2319 -127
  9. data/glaemresources/charsets/tengwar_ds_sindarin.cst +2318 -127
  10. data/glaemresources/charsets/tengwar_freemono.cst +1 -1
  11. data/glaemresources/charsets/tengwar_guni_annatar.cst +2451 -131
  12. data/glaemresources/charsets/tengwar_guni_eldamar.cst +2317 -126
  13. data/glaemresources/charsets/tengwar_guni_elfica.cst +2316 -127
  14. data/glaemresources/charsets/tengwar_guni_parmaite.cst +2319 -127
  15. data/glaemresources/charsets/tengwar_guni_sindarin.cst +2317 -126
  16. data/glaemresources/charsets/tengwar_telcontar.cst +7 -0
  17. data/glaemresources/modes/blackspeech-tengwar-general_use.glaem +1 -1
  18. data/glaemresources/modes/english-cirth-espeak.glaem +687 -0
  19. data/glaemresources/modes/english-tengwar-espeak.glaem +814 -0
  20. data/glaemresources/modes/japanese-tengwar.glaem +9 -4
  21. data/glaemresources/modes/lang_belta-tengwar-dadef.glaem +248 -0
  22. data/glaemresources/modes/raw-cirth.glaem +154 -0
  23. data/lib/api/charset_parser.rb +7 -1
  24. data/lib/api/mode.rb +35 -10
  25. data/lib/api/mode_parser.rb +21 -12
  26. data/lib/api/post_processor/outspace.rb +44 -0
  27. data/lib/api/rule_group.rb +1 -1
  28. data/lib/api/transcription_pre_post_processor.rb +8 -5
  29. data/lib/api/transcription_processor.rb +12 -9
  30. data/lib/glaemscribe.rb +2 -0
  31. data/lib_espeak/espeakng.for.glaemscribe.nowasm.sync.js +25 -11
  32. data/lib_espeak/glaemscribe_tts.js +363 -223
  33. metadata +12 -6
@@ -1,11 +1,11 @@
1
1
  /*
2
2
 
3
3
  Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
- the transcription of texts between writing systems, and more
5
- specifically dedicated to the transcription of J.R.R. Tolkien's
4
+ the transcription of texts between writing systems, and more
5
+ specifically dedicated to the transcription of J.R.R. Tolkien's
6
6
  invented languages to some of his devised writing systems.
7
7
 
8
- Copyright (C) 2015 Benjamin Babut (Talagan).
8
+ Copyright (C) 2015-2020 Benjamin Babut (Talagan).
9
9
 
10
10
  This program is free software: you can redistribute it and/or modify
11
11
  it under the terms of the GNU Affero General Public License as published by
@@ -23,71 +23,44 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
23
23
  */
24
24
 
25
25
 
26
- // A wrapper around espeak to perform handle various TTS tasks,
27
- // and generate IPA and/or WAV while keeping punctuations.
26
+ // A wrapper around espeak to perform various TTS tasks,
27
+ // and generate IPA and/or WAV while keeping punctuation signs or cleaning them up.
28
28
  //
29
29
  // Espeak does not have this feature, so this is a significantly dirty hack.
30
- //
30
+ //
31
31
  // Additionally we perform a few glaemscribe-specific tasks, such as preserving raw tengwar
32
-
32
+ // or numbers which are treated independently.
33
33
 
34
34
  // For the ruby loader, define the Glaemscribe module.
35
35
  Glaemscribe = (typeof(Glaemscribe) === 'undefined')?({}):(Glaemscribe);
36
36
 
37
- Glaemscribe.TTS = function() {
38
-
37
+ Glaemscribe.TTS = function() {
38
+
39
39
  var client = this;
40
40
  client.proxy = new ESpeakNGGlue();
41
41
  }
42
42
 
43
43
  Glaemscribe.TTS.ipa_configurations = {
44
- 'en': {
45
- special_token_ncn: '', // no space / sign / no space
46
- special_token_ncs: '', // no space / sign / space
47
- special_token_scn: '', // space / sign / no space
48
- special_token_scs: '', // space / sign / space
49
-
50
- special_token_ipa_ncn: '',
51
- special_token_ipa_ncs: '',
52
- special_token_ipa_scn: '',
53
- special_token_ipa_scs: '',
54
- // Replace by special token AND KEEP when calculating ipa
55
- clauseaffecting_punctuation: "!.,;:!?–—",
56
- // Replace by special token but do not keep when calculating ipa
57
- // '’ : apostrophes should stay in the original text !!! Don't break liz's bag !!
58
- // This is because apostrophes shouldn't trigger a pause in the prononciation (e.g. genitives)
59
- clauseunaffecting_punctuation: "·“”«»-[](){}<>≤≥$|\""
60
- },
61
- 'fr': {
62
- special_token_ncn: '', // no space / sign / no space
63
- special_token_ncs: '', // no space / sign / space
64
- special_token_scn: '', // space / sign / no space
65
- special_token_scs: '', // space / sign / space
66
-
67
- special_token_ipa_ncn: '',
68
- special_token_ipa_ncs: '',
69
- special_token_ipa_scn: '',
70
- special_token_ipa_scs: '',
44
+ 'en-tengwar': {
45
+
46
+ punct_token: '', // Invariant, for punctuation
47
+ block_token: '', // Invariant, for special blocks (nums / raw tengwar)
48
+
71
49
  // Replace by special token AND KEEP when calculating ipa
72
- clauseaffecting_punctuation: "!.,;:!?–—",
50
+ clauseaffecting_punctuation: "!.,;:!?–—",
73
51
  // Replace by special token but do not keep when calculating ipa
74
- // '’ : apostrophes should stay in the original text, let espeak eat them
75
- // This is because apostrophes shouldn't trigger a pause in the prononciation (e.g. genitives)
76
- clauseunaffecting_punctuation: "·“”«»-[](){}<>≤≥$|\"",
77
- // Callback before reconstituing markers.
78
- pre_reconsitute_markers_callback: function(text) {
79
- // Long vowel back replacement.
80
- return text.replace(/-/g,"ː");
81
- }
52
+ // For those signs : '’ : apostrophes should stay in the original text !!! Don't break liz's bag !!
53
+ // Apostrophes shouldn't trigger a pause in the prononciation (e.g. genitives, I've, don't etc)
54
+ // But apostrophe and single quote are the same thing.
55
+ // It's necessary to document that single quotes should then be avoided.
56
+ clauseunaffecting_punctuation: "·“”«»-[](){}⟨⟩<>≤≥$|\""
82
57
  }
83
58
  }
84
59
 
85
- Glaemscribe.TTS.ipa_configurations['en-us'] = Glaemscribe.TTS.ipa_configurations['en'];
86
- Glaemscribe.TTS.ipa_configurations['en-gb'] = Glaemscribe.TTS.ipa_configurations['en'];
87
- Glaemscribe.TTS.ipa_configurations['en-tengwar-zlegacy'] = Glaemscribe.TTS.ipa_configurations['en'];
88
- Glaemscribe.TTS.ipa_configurations['en-tengwar'] = Glaemscribe.TTS.ipa_configurations['en'];
89
- Glaemscribe.TTS.ipa_configurations['en-tengwar-gb'] = Glaemscribe.TTS.ipa_configurations['en'];
90
- Glaemscribe.TTS.ipa_configurations['en-tengwar-us'] = Glaemscribe.TTS.ipa_configurations['en'];
60
+ Glaemscribe.TTS.ipa_configurations['en-tengwar'] = Glaemscribe.TTS.ipa_configurations['en-tengwar'];
61
+ Glaemscribe.TTS.ipa_configurations['en-tengwar-rp'] = Glaemscribe.TTS.ipa_configurations['en-tengwar'];
62
+ Glaemscribe.TTS.ipa_configurations['en-tengwar-gb'] = Glaemscribe.TTS.ipa_configurations['en-tengwar'];
63
+ Glaemscribe.TTS.ipa_configurations['en-tengwar-us'] = Glaemscribe.TTS.ipa_configurations['en-tengwar'];
91
64
 
92
65
 
93
66
  Glaemscribe.TTS.voice_list = function(voice) {
@@ -96,6 +69,10 @@ Glaemscribe.TTS.voice_list = function(voice) {
96
69
 
97
70
  // Static helper. To be used in pure js (not ruby).
98
71
  Glaemscribe.TTS.option_name_to_voice = function(oname) {
72
+
73
+ if(!oname)
74
+ return null;
75
+
99
76
  return oname.toLowerCase().replace(/^espeak_voice_/,'').replace(/_/g,'-');
100
77
  }
101
78
 
@@ -109,257 +86,420 @@ Glaemscribe.TTS.prototype.make_char_checker = function(string){
109
86
  return cc;
110
87
  }
111
88
 
112
- Glaemscribe.TTS.prototype.escape_raw_mode = function(entry,full_remove) {
113
-
114
- var rawgexp = /({{[\s\S]*?}})/g;
115
- var captured = [];
116
-
117
- var ret = entry.replace(rawgexp, function(match,p1) {
118
-
119
- captured.push(match);
120
- if(full_remove)
121
- return ' ';
89
+ Glaemscribe.TTS.prototype.isSpace = function(a) {
90
+ return (a == ' ' || a == '\t');
91
+ }
92
+
93
+ Glaemscribe.TTS.prototype.read_cap_token = function(text, starti, cap_checker) {
94
+
95
+ var client = this
96
+ var i = starti;
97
+ var tok = ""
98
+
99
+ if(cap_checker[text[i]] == null)
100
+ return null;
101
+
102
+ i++;
103
+
104
+ // Advance the sequence
105
+ for(; i<text.length; i++) {
106
+ if( (cap_checker[text[i]] == null) && !client.isSpace(text[i])) {
107
+ break;
108
+ }
109
+ }
110
+
111
+ // Rewind trailing spaces
112
+ var toklen = i - starti;
113
+
114
+ for(i = starti + toklen - 1; i>=starti ; i--) {
115
+ if(client.isSpace(text[i]))
116
+ toklen--;
122
117
  else
123
- return '∰∰';
118
+ break;
119
+ }
120
+
121
+ return text.substring(starti,starti+toklen);
122
+ };
123
+
124
+ Glaemscribe.TTS.prototype.preceded_by_space = function(text,i) {
125
+ var client = this;
126
+
127
+ if(i <= 0)
128
+ return false;
129
+ else
130
+ return client.isSpace(text[i-1]);
131
+ }
132
+
133
+ Glaemscribe.TTS.prototype.succeeded_by_space = function(text,i) {
134
+ var client = this;
135
+
136
+ if(i >= text.length-1)
137
+ return false;
138
+ else
139
+ return client.isSpace(text[i+1]);
140
+ }
141
+
142
+ // Escapes raw mode AND numbers
143
+ Glaemscribe.TTS.prototype.escape_special_blocks = function(voice, entry, for_ipa) {
144
+
145
+ var config = Glaemscribe.TTS.ipa_configurations[voice];
146
+
147
+ // TODO : make this configurable
148
+
149
+ // Tonekize raw_mode escaping + numbers, we don't want them to be converted in IPA
150
+ // Also, keep numbers in the writing, to prevent espeak from pronuncing them
151
+ var ipaexpr = /(\s*)({{[\s\S]*?}}|\b[0-9][0-9\s]*\b)(\s*)/g;
152
+ var wavexpr = /(\s*)({{[\s\S]*?}})(\s*)/g;
153
+ var rawgexp = (for_ipa)?(ipaexpr):(wavexpr);
154
+
155
+ var captured = [];
156
+
157
+ var ret = entry.replace(rawgexp, function(match,p1,p2,p3) {
158
+ captured.push(match);
159
+ if(!for_ipa)
160
+ return ' '; // For wav, just replace by empty space and do not pronunce.
161
+ else {
162
+ return p1 + config['block_token'] + p3; // For IPA, replace by dummy token.
163
+ }
124
164
  });
125
-
165
+
126
166
  return [ret, captured];
127
167
  }
128
168
 
129
- Glaemscribe.TTS.prototype.pre_ipa = function(voice,text) {
130
-
169
+
170
+ Glaemscribe.TTS.prototype.ipa_instrument_punct = function(voice, text) {
171
+
131
172
  var client = this;
132
173
  var config = Glaemscribe.TTS.ipa_configurations[voice];
133
-
134
- // Normalize all tabs by spaces
135
- text = text.replace(/\t/g," ");
136
- // Small hack to prevent espeak from pronouncing last dot
137
- // since our tokenization may isolate it.
138
- text += "\n";
139
-
174
+
140
175
  var cap = client.make_char_checker(config['clauseaffecting_punctuation']);
141
176
  var cup = client.make_char_checker(config['clauseunaffecting_punctuation']);
142
-
177
+
143
178
  var accum = "";
144
179
  var kept_signs = [];
145
-
146
- var prec_is_space = false;
147
- var next_is_space = false;
148
- for(var i=0;i<text.length;i++)
180
+
181
+ var rescap = null;
182
+
183
+ for(var i=0;i<text.length;i++)
149
184
  {
150
- // Is precedent char a space ?
151
- if(i == 0)
152
- prec_is_space = false;
153
- else
154
- prec_is_space = (text[i-1] == " ");
155
-
156
- // Is precedent char a space ?
157
- if(i == text.length-1)
158
- next_is_space = false;
159
- else
160
- next_is_space = (text[i+1] == " ");
161
-
162
185
  if(text[i] == "\n")
163
186
  {
164
- accum += config['special_token_ncn'];
187
+ accum += config['punct_token'];
165
188
  kept_signs.push(text[i]);
166
189
  }
167
- else if(cap[text[i]] != null)
190
+ else if(cup[text[i]] != null)
168
191
  {
169
- if(!prec_is_space && !next_is_space)
170
- {
171
- // Always insert spaces, but remember how they were placed
172
- accum += " " + config['special_token_ncn'] + " " + text[i] + " ";
173
- kept_signs.push(text[i]);
174
- }
175
- if(!prec_is_space && next_is_space)
176
- {
177
- // Always insert spaces, but remember how they were placed
178
- accum += " " + config['special_token_ncs'] + " " + text[i] + " ";
179
- kept_signs.push(text[i] + " ");
180
- }
181
- if(prec_is_space && !next_is_space)
182
- {
183
- // Always insert spaces, but remember how they were placed
184
- accum += " " + config['special_token_scn'] + " " + text[i] + " ";
185
- kept_signs.push(" " + text[i]);
186
- }
187
- if(prec_is_space && next_is_space)
188
- {
189
- // Always insert spaces, but remember how they were placed
190
- accum += " " + config['special_token_scs'] + " " + text[i] + " ";
191
- kept_signs.push(" " + text[i] + " ");
192
- }
192
+ // This sign does not affect clause analysis by espeak.
193
+ // Replace the sign by a special "word" / token AND REMOVE the sign
194
+ // We will restore it after IPA calculation.
195
+ accum += " " + config['punct_token'] + " " ;
196
+ kept_signs.push(
197
+ ((client.preceded_by_space(text,i))?(" "):("")) +
198
+ text[i] +
199
+ ((client.succeeded_by_space(text,i))?(" "):(""))
200
+ );
193
201
  }
194
- else if(cup[text[i]] != null)
202
+ else if(rescap = client.read_cap_token(text,i,cap))
203
+ {
204
+ // This punctuation sign affects clause analysis.
205
+ // Replace the sign by a special "word" / token AND keep the sign
206
+ // Always insert spaces, but remember how they were placed
207
+ accum += " " + text[i] + " " + config['punct_token'] + " " ;
208
+ kept_signs.push(
209
+ ((client.preceded_by_space(text, i))?(" "):("")) +
210
+ rescap +
211
+ ((client.succeeded_by_space(text, i + rescap.length - 1))?(" "):(""))
212
+ );
213
+ i += rescap.length - 1;
214
+ }
215
+ else
216
+ {
217
+ accum += text[i];
218
+ }
219
+ }
220
+
221
+ return [accum, kept_signs];
222
+ }
223
+
224
+ Glaemscribe.TTS.prototype.wav_instrument_punct = function(voice, text) {
225
+
226
+ var client = this;
227
+ var config = Glaemscribe.TTS.ipa_configurations[voice];
228
+ var cap = client.make_char_checker(config['clauseaffecting_punctuation']);
229
+ var accum = "";
230
+ var rescap = null;
231
+
232
+ for(var i=0;i<text.length;i++)
233
+ {
234
+ if(rescap = client.read_cap_token(text,i,cap))
195
235
  {
196
- // The difference is that we don't keep the sign before calculating ipa.
197
- // Just remove them to avoid espeak spell them
198
- if(!prec_is_space && !next_is_space)
199
- {
200
- accum += " " + config['special_token_ncn'] + " " ;
201
- kept_signs.push(text[i]);
202
- }
203
- if(!prec_is_space && next_is_space)
204
- {
205
- accum += " " + config['special_token_ncs'] + " " ;
206
- kept_signs.push(text[i] + " ");
207
- }
208
- if(prec_is_space && !next_is_space)
209
- {
210
- accum += " " + config['special_token_scn'] + " " ;
211
- kept_signs.push(" " + text[i]);
212
- }
213
- if(prec_is_space && next_is_space)
214
- {
215
- accum += " " + config['special_token_scs'] + " " ;
216
- kept_signs.push(" " + text[i] + " ");
217
- }
236
+ accum += text[i]; // Just keep the first sign, ignore the others
237
+ i += rescap.length - 1;
218
238
  }
219
239
  else
220
240
  {
221
241
  accum += text[i];
222
242
  }
223
243
  }
224
-
225
- //console.log(accum);
226
- //console.log(kept_signs)
227
- return [accum,kept_signs];
244
+
245
+ return accum;
228
246
  }
229
247
 
230
- Glaemscribe.TTS.prototype.post_ipa = function(voice, ipa, kept_tokens) {
231
-
248
+ Glaemscribe.TTS.prototype.ipa_instrument_blocks = function(voice, text)
249
+ {
250
+ var client = this;
232
251
  var config = Glaemscribe.TTS.ipa_configurations[voice];
233
- ipa = ipa.replace(/\n/g," ");
234
- if(config['pre_reconsitute_markers_callback'])
235
- ipa = config['pre_reconsitute_markers_callback'](ipa);
236
-
237
- var ncnr = new RegExp("\\s*(" + config['special_token_ipa_ncn'] + ")\\s*","g");
238
- var scnr = new RegExp("\\s*(" + config['special_token_ipa_scn'] + ")\\s*","g");
239
- var ncsr = new RegExp("\\s*(" + config['special_token_ipa_ncs'] + ")\\s*","g");
240
- var scsr = new RegExp("\\s*(" + config['special_token_ipa_scs'] + ")\\s*","g");
241
-
242
- // console.log("=====")
243
- // console.log(ipa)
244
- // console.log(config)
245
- // console.log(ncsr)
246
-
247
- // Tokens have been accumulated linearly
248
- ipa = ipa.replace(ncnr, function(match, contents, offset, s) {return '∰∰'; });
249
- ipa = ipa.replace(ncsr, function(match, contents, offset, s) {return '∰∰'; });
250
- ipa = ipa.replace(scnr, function(match, contents, offset, s) {return '∰∰'; });
251
- ipa = ipa.replace(scsr, function(match, contents, offset, s) {return '∰∰'; });
252
-
253
- // console.log("=====")
254
- // console.log(ipa)
252
+
253
+ return this.escape_special_blocks(voice, text, true);
254
+ }
255
+
256
+ Glaemscribe.TTS.prototype.ipa_restore_tokens = function(text, token, kept_tokens) {
257
+
258
+ var rx = new RegExp("\\s*(" + token + ")\\s*","g");
259
+
255
260
  var nth = -1;
256
- ipa = ipa.replace(/∰∰/g,function(match, contents, offset, s) {
261
+ text = text.replace(rx,function(match, contents, offset, s) {
257
262
  nth += 1;
258
263
  return kept_tokens[nth];
259
264
  });
260
-
265
+
266
+ return text;
267
+ }
268
+
269
+ Glaemscribe.TTS.prototype.post_ipa = function(voice, ipa, pre_ipa_res) {
270
+
271
+ var client = this;
272
+ var config = Glaemscribe.TTS.ipa_configurations[voice];
273
+ ipa = ipa.replace(/\n/g, " ");
274
+
275
+ ipa = client.ipa_restore_tokens(ipa, config.punct_token, pre_ipa_res.punct_tokens);
276
+ ipa = client.ipa_restore_tokens(ipa, config.block_token, pre_ipa_res.block_tokens);
277
+
261
278
  // Post-treatment of anti 'dot' pronounciation hack
262
- if(ipa[ipa.length-1] === "\n")
279
+ if(ipa[ipa.length-1] === "\n")
263
280
  ipa = ipa.slice(0,-1);
264
-
265
- // console.log("=====")
266
- // console.log(ipa)
281
+
267
282
  return ipa
268
283
  }
269
284
 
270
285
 
271
- Glaemscribe.TTS.prototype.synthesize_ipa = function(text, args, onended) {
272
-
286
+ Glaemscribe.TTS.prototype.pre_ipa = function(args, voice, text) {
287
+
273
288
  var client = this;
274
-
275
- args = args || {}
276
- var voice = args.voice || 'en'
277
-
278
- var ts = new Date();
279
- var tp = ts;
280
-
281
- // Cache raw things
282
- var pre_raw_tokens = [];
289
+ var config = Glaemscribe.TTS.ipa_configurations[voice];
290
+
291
+ if(!config)
292
+ throw "Trying to use unsupported voice '" + voice + "'!";
293
+
294
+ // Normalize all tabs by spaces
295
+ text = text.replace(/\t/g," ");
296
+
297
+ // Small hack to prevent espeak from pronouncing last dot
298
+ // since our tokenization may isolate it.
299
+ text += "\n";
300
+
301
+ // Instrument blocks first (they may contain punctuation)
302
+ var bi = client.ipa_instrument_blocks(voice,text);
303
+ text = bi[0];
304
+
305
+ // Instrument punctuation, then
306
+ var pi = client.ipa_instrument_punct(voice,text);
307
+ text = pi[0];
308
+
309
+ // Small hack to always have a capital after a dot.
310
+ // And prevent espeak from transcribing/pronuncing "dot"
311
+ text = text.replace(/(\.\s+.)/g, function(match,p1) {
312
+ return p1.toUpperCase()
313
+ });
314
+
315
+ return {
316
+ text: text,
317
+ block_tokens: bi[1],
318
+ punct_tokens: pi[1]
319
+ }
320
+ }
321
+
322
+ Glaemscribe.TTS.prototype.pre_wav = function(args, voice, text) {
323
+ var client = this;
324
+ var config = Glaemscribe.TTS.ipa_configurations[voice];
325
+
326
+ if(!config)
327
+ throw "Trying to use unsupported voice '" + voice + "'!";
328
+
329
+ // First, escape the special blocks. Just ignore them.
283
330
  if(args.has_raw_mode) {
284
- var pre_raw_res = this.escape_raw_mode(text,false);
331
+ var pre_raw_res = this.escape_special_blocks(voice, text, false);
285
332
  text = pre_raw_res[0];
286
- pre_raw_tokens = pre_raw_res[1];
287
- }
288
-
289
- // Pre parse ipa
290
- var pre_ipa_tokens = [];
291
- var pre_ipa_res = client.pre_ipa(voice,text);
292
- text = pre_ipa_res[0];
293
- pre_ipa_tokens = pre_ipa_res[1];
294
-
295
- // Restitute raw things
296
- if(args.has_raw_mode) {
297
- var nth = -1;
298
- text = text.replace(/∰∰/g,function(match, contents, offset, s) {
299
- nth += 1;
300
- return pre_raw_tokens[nth];
301
- });
302
333
  }
303
-
304
- args = args || {}
305
- client.proxy.set_voice(args.voice || 'en');
334
+
335
+ // Now simplify the punctuation to avoid problems.
336
+ text = this.wav_instrument_punct(voice, text);
337
+
338
+ return {
339
+ text: text
340
+ }
341
+ }
342
+
343
+ //////////////////
344
+ // SYNTHESIZE //
345
+ //////////////////
346
+
347
+
348
+ Glaemscribe.TTS.prototype.synthesize_ipa = function(text, args, onended) {
349
+
350
+ var client = this;
351
+ args = args || {};
352
+ var voice = args.voice || 'en-tengwar'
353
+
354
+ // Pre parse text and find raw mode things {{ ... }}
355
+ // Cache them. This will also the pre-instrumentation
356
+ // To treat each block as one word
357
+ var pipa = client.pre_ipa(args, voice, text);
358
+ text = pipa['text'];
359
+
360
+ // Now the IPA is instrumented.
361
+ // Prepare client
362
+ client.proxy.set_voice(voice);
306
363
 
307
364
  var ts = new Date();
308
365
  var ret = {};
309
366
  client.proxy.synthesize(text, false, true, true, function(result) {
310
-
311
367
  // Post parse ipa
312
- result.ipa = client.post_ipa(voice, result.pho, pre_ipa_tokens);
368
+ result.ipa = client.post_ipa(voice, result.pho, pipa);
313
369
 
314
370
  var te = new Date();
315
371
  result.synthesis_time = (te - ts);
316
372
  delete result.pho;
317
-
373
+
318
374
  if(onended)
319
375
  onended(result);
320
376
 
321
377
  ret = result;
322
378
  });
379
+
323
380
  return ret;
324
381
  }
325
382
 
326
383
  // Should be kept separated from IPA, because we do not work on the same text
327
384
  Glaemscribe.TTS.prototype.synthesize_wav = function(text, args, onended) {
328
-
329
- var client = this;
330
-
385
+
386
+ var client = this;
331
387
  args = args || {}
332
- var voice = args.voice || 'en'
388
+ var voice = args.voice || 'en-tengwar'
389
+
390
+ // Pre-trandform text
391
+ var pwav = client.pre_wav(args, voice, text);
392
+ text = pwav['text'];
333
393
 
334
- args = args || {}
394
+ // Prepare client
335
395
  client.proxy.set_rate(args.rate || 120);
336
396
  client.proxy.set_pitch(args.pitch || 5);
337
- client.proxy.set_voice(args.voice || 'en');
338
-
339
- if(args.has_raw_mode) {
340
- var pre_raw_res = this.escape_raw_mode(text,true);
341
- text = pre_raw_res[0];
342
- }
397
+ client.proxy.set_voice(voice);
343
398
 
344
- var ret = {};
345
399
  var ts = new Date();
400
+ var ret = {};
346
401
  client.proxy.synthesize(text, true, false, false, function(result) {
347
402
  var te = new Date();
348
403
  result.synthesis_time = (te - ts);
349
404
  delete result.pho;
350
-
405
+
351
406
  // Uint8Array > Array conversion, for ruby?
352
- // ret.wav = [].slice.call(ret.wav);
353
-
407
+ // ret.wav = [].slice.call(ret.wav);
408
+
354
409
  if(onended)
355
410
  onended(result);
356
-
411
+
357
412
  ret = result;
358
413
  });
359
-
414
+
360
415
  return ret;
361
416
  }
362
417
 
418
+
419
+ // Below is an expirement of a parsing tool for orthographic modes.
420
+ // Not finished and probably not usable.
421
+ Glaemscribe.TTS.TokenType = {};
422
+ Glaemscribe.TTS.TokenType.WORD = 'WORD';
423
+ Glaemscribe.TTS.TokenType.NON_WORD = 'NON_WORD';
424
+ Glaemscribe.TTS.TokenType.NUM = 'NUM';
425
+ Glaemscribe.TTS.TokenType.SPACE = 'SPACE';
426
+ Glaemscribe.TTS.TokenType.PUNCT = 'PUNCT';
427
+
428
+ Glaemscribe.TTS.prototype.orthographic_disambiguator_en = function(text) {
429
+
430
+ var client = this;
431
+
432
+ var uwmatcher = /(\p{L}+)/u;
433
+ var spl = text.split(uwmatcher);
434
+
435
+ var tokens = spl.map(function(s) {
436
+ var t = {};
437
+ var is_word = s.match(uwmatcher)
438
+
439
+ t.type = (is_word)?(Glaemscribe.TTS.TokenType.WORD):(Glaemscribe.TTS.TokenType.NON_WORD);
440
+ t.content = s;
441
+ return t;
442
+ });
443
+
444
+ var tokens2 = [];
445
+
446
+ // Handle apostrophe
447
+ for(var i=0;i<tokens.length;i++) {
448
+ if( i == 0 || i == tokens.length-1 || tokens[i].type == Glaemscribe.TTS.TokenType.WORD ) {
449
+ tokens2.push(tokens[i]);
450
+ continue;
451
+ }
452
+
453
+ if(tokens[i].content == "'" &&
454
+ tokens[i-1].type == Glaemscribe.TTS.TokenType.WORD &&
455
+ tokens[i+1].type == Glaemscribe.TTS.TokenType.WORD )
456
+ {
457
+ tokens2.pop();
458
+ var tok = {};
459
+ tok.type = Glaemscribe.TTS.TokenType.WORD;
460
+ tok.content = tokens[i-1].content + tokens[i].content + tokens[i+1].content;
461
+ tokens2.push(tok);
462
+ i += 1;
463
+ }
464
+ else {
465
+ tokens2.push(tokens[i]);
466
+ }
467
+ }
468
+ tokens = tokens2;
469
+
470
+ // Numerize tokens
471
+ var i = 0;
472
+ tokens.forEach(function(t) {
473
+ t.num = i;
474
+ i += 1;
475
+ });
476
+
477
+ // Remove non-speechable tokens
478
+ var stokens = tokens.filter(function(t) {
479
+ return (t.type == Glaemscribe.TTS.TokenType.WORD);
480
+ });
481
+
482
+ // Join speachable tokens
483
+ var r = stokens.map(function(t) { return t.content}).join('  ');
484
+
485
+ var args = {};
486
+ var voice = args.voice || 'en-tengwar';
487
+
488
+ client.proxy.set_voice(voice);
489
+ client.proxy.synthesize(r, false, true, true, function(result) {
490
+ r = result.pho;
491
+ });
492
+ r = r.split('').map(function(t) { return t.trim() });
493
+
494
+ var j = 0;
495
+ r.forEach(function(w) {
496
+ tokens[stokens[j].num].ipa = r[j];
497
+ j += 1;
498
+ });
499
+
500
+ return tokens;
501
+ }
502
+
363
503
  Glaemscribe.TTS.is_engine_loaded = function() {
364
504
  return (typeof(ESpeakNGGlue) !== 'undefined');
365
505
  };