RubyGems - glaemscribe - Versions diffs - 1.2.0 → 1.3.0 - Mend

glaemscribe 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

checksums.yaml +4 -4
data/bin/glaemscribe +2 -2
data/glaemresources/charsets/cirth_ds.cst +514 -179
data/glaemresources/charsets/eldamar.cst +210 -0
data/glaemresources/charsets/tengwar_ds_annatar.cst +2452 -130
data/glaemresources/charsets/tengwar_ds_eldamar.cst +2319 -125
data/glaemresources/charsets/tengwar_ds_elfica.cst +2317 -126
data/glaemresources/charsets/tengwar_ds_parmaite.cst +2319 -127
data/glaemresources/charsets/tengwar_ds_sindarin.cst +2318 -127
data/glaemresources/charsets/tengwar_freemono.cst +1 -1
data/glaemresources/charsets/tengwar_guni_annatar.cst +2451 -131
data/glaemresources/charsets/tengwar_guni_eldamar.cst +2317 -126
data/glaemresources/charsets/tengwar_guni_elfica.cst +2316 -127
data/glaemresources/charsets/tengwar_guni_parmaite.cst +2319 -127
data/glaemresources/charsets/tengwar_guni_sindarin.cst +2317 -126
data/glaemresources/charsets/tengwar_telcontar.cst +7 -0
data/glaemresources/modes/blackspeech-tengwar-general_use.glaem +1 -1
data/glaemresources/modes/english-cirth-espeak.glaem +687 -0
data/glaemresources/modes/english-tengwar-espeak.glaem +814 -0
data/glaemresources/modes/japanese-tengwar.glaem +9 -4
data/glaemresources/modes/lang_belta-tengwar-dadef.glaem +248 -0
data/glaemresources/modes/raw-cirth.glaem +154 -0
data/lib/api/charset_parser.rb +7 -1
data/lib/api/mode.rb +35 -10
data/lib/api/mode_parser.rb +21 -12
data/lib/api/post_processor/outspace.rb +44 -0
data/lib/api/rule_group.rb +1 -1
data/lib/api/transcription_pre_post_processor.rb +8 -5
data/lib/api/transcription_processor.rb +12 -9
data/lib/glaemscribe.rb +2 -0
data/lib_espeak/espeakng.for.glaemscribe.nowasm.sync.js +25 -11
data/lib_espeak/glaemscribe_tts.js +363 -223
metadata +12 -6

data/lib_espeak/glaemscribe_tts.js CHANGED Viewed

@@ -1,11 +1,11 @@
 /*
 Glǽmscribe (also written Glaemscribe) is a software dedicated to
-the transcription of texts between writing systems, and more
-specifically dedicated to the transcription of J.R.R. Tolkien's
+the transcription of texts between writing systems, and more
+specifically dedicated to the transcription of J.R.R. Tolkien's
 invented languages to some of his devised writing systems.
-Copyright (C) 2015 Benjamin Babut (Talagan).
+Copyright (C) 2015-2020 Benjamin Babut (Talagan).
 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU Affero General Public License as published by
@@ -23,71 +23,44 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
-// A wrapper around espeak to perform handle various TTS tasks,
-// and generate IPA and/or WAV while keeping punctuations.
+// A wrapper around espeak to perform various TTS tasks,
+// and generate IPA and/or WAV while keeping punctuation signs or cleaning them up.
 //
 // Espeak does not have this feature, so this is a significantly dirty hack.
-//
+//
 // Additionally we perform a few glaemscribe-specific tasks, such as preserving raw tengwar
+// or numbers which are treated independently.
 // For the ruby loader, define the Glaemscribe module.
 Glaemscribe = (typeof(Glaemscribe) === 'undefined')?({}):(Glaemscribe);
-Glaemscribe.TTS = function() {
+Glaemscribe.TTS = function() {
   var client = this;
   client.proxy = new ESpeakNGGlue();
 }
 Glaemscribe.TTS.ipa_configurations = {
-  'en': {
-    special_token_ncn: '', // no space / sign / no space
-    special_token_ncs: '', // no space / sign / space
-    special_token_scn: '', // space / sign / no space
-    special_token_scs: '', // space / sign / space
-    special_token_ipa_ncn: '',
-    special_token_ipa_ncs: '',
-    special_token_ipa_scn: '',
-    special_token_ipa_scs: '',
-    // Replace by special token AND KEEP when calculating ipa
-    clauseaffecting_punctuation: "!.,;:!?–—",
-    // Replace by special token but do not keep when calculating ipa
-    // '’ : apostrophes should stay in the original text !!! Don't break liz's bag !!
-    // This is because apostrophes shouldn't trigger a pause in the prononciation (e.g. genitives)
-    clauseunaffecting_punctuation: "·“”«»-[](){}<>≤≥$|\""
-  },
-  'fr': {
-    special_token_ncn: '', // no space / sign / no space
-    special_token_ncs: '', // no space / sign / space
-    special_token_scn: '', // space / sign / no space
-    special_token_scs: '', // space / sign / space
-    special_token_ipa_ncn: '',
-    special_token_ipa_ncs: '',
-    special_token_ipa_scn: '',
-    special_token_ipa_scs: '',
+  'en-tengwar': {
+    punct_token: '', // Invariant, for punctuation
+    block_token: '', // Invariant, for special blocks (nums / raw tengwar)
     // Replace by special token AND KEEP when calculating ipa
-    clauseaffecting_punctuation: "!.,;:!?–—",
+    clauseaffecting_punctuation: "!.,;:!?–—",
     // Replace by special token but do not keep when calculating ipa
-    // '’ : apostrophes should stay in the original text, let espeak eat them
-    // This is because apostrophes shouldn't trigger a pause in the prononciation (e.g. genitives)
-    clauseunaffecting_punctuation: "·“”«»-[](){}<>≤≥$|\"",
-    // Callback before reconstituing markers.
-    pre_reconsitute_markers_callback: function(text) {
-      // Long vowel back replacement.
-      return text.replace(/-/g,"ː");
-    }
+    // For those signs : '’ : apostrophes should stay in the original text !!! Don't break liz's bag !!
+    // Apostrophes shouldn't trigger a pause in the prononciation (e.g. genitives, I've, don't etc)
+    // But apostrophe and single quote are the same thing.
+    // It's necessary to document that single quotes should then be avoided.
+    clauseunaffecting_punctuation: "·“”«»-[](){}⟨⟩<>≤≥$|\""
   }
 }
-Glaemscribe.TTS.ipa_configurations['en-us']              = Glaemscribe.TTS.ipa_configurations['en'];
-Glaemscribe.TTS.ipa_configurations['en-gb']              = Glaemscribe.TTS.ipa_configurations['en'];
-Glaemscribe.TTS.ipa_configurations['en-tengwar-zlegacy'] = Glaemscribe.TTS.ipa_configurations['en'];
-Glaemscribe.TTS.ipa_configurations['en-tengwar']         = Glaemscribe.TTS.ipa_configurations['en'];
-Glaemscribe.TTS.ipa_configurations['en-tengwar-gb']      = Glaemscribe.TTS.ipa_configurations['en'];
-Glaemscribe.TTS.ipa_configurations['en-tengwar-us']      = Glaemscribe.TTS.ipa_configurations['en'];
+Glaemscribe.TTS.ipa_configurations['en-tengwar']         = Glaemscribe.TTS.ipa_configurations['en-tengwar'];
+Glaemscribe.TTS.ipa_configurations['en-tengwar-rp']      = Glaemscribe.TTS.ipa_configurations['en-tengwar'];
+Glaemscribe.TTS.ipa_configurations['en-tengwar-gb']      = Glaemscribe.TTS.ipa_configurations['en-tengwar'];
+Glaemscribe.TTS.ipa_configurations['en-tengwar-us']      = Glaemscribe.TTS.ipa_configurations['en-tengwar'];
 Glaemscribe.TTS.voice_list = function(voice) {
@@ -96,6 +69,10 @@ Glaemscribe.TTS.voice_list = function(voice) {
 // Static helper. To be used in pure js (not ruby).
 Glaemscribe.TTS.option_name_to_voice = function(oname) {
+  if(!oname)
+    return null;
   return oname.toLowerCase().replace(/^espeak_voice_/,'').replace(/_/g,'-');
 }
@@ -109,257 +86,420 @@ Glaemscribe.TTS.prototype.make_char_checker = function(string){
   return cc;
 }
-Glaemscribe.TTS.prototype.escape_raw_mode = function(entry,full_remove) {
-  var rawgexp   = /({{[\s\S]*?}})/g;
-  var captured  = [];
-  var ret = entry.replace(rawgexp, function(match,p1) {
-    captured.push(match);
-    if(full_remove)
-      return ' ';
+Glaemscribe.TTS.prototype.isSpace = function(a) {
+  return (a == ' ' || a == '\t');
+}
+Glaemscribe.TTS.prototype.read_cap_token = function(text, starti, cap_checker) {
+  var client = this
+  var i   = starti;
+  var tok = ""
+  if(cap_checker[text[i]] == null)
+    return null;
+  i++;
+  // Advance the sequence
+  for(; i<text.length; i++) {
+    if( (cap_checker[text[i]] == null) && !client.isSpace(text[i])) {
+      break;
+    }
+  }
+  // Rewind trailing spaces
+  var toklen = i - starti;
+  for(i = starti + toklen - 1; i>=starti ; i--) {
+    if(client.isSpace(text[i]))
+      toklen--;
     else
-      return '∰∰';
+      break;
+  }
+  return text.substring(starti,starti+toklen);
+};
+Glaemscribe.TTS.prototype.preceded_by_space = function(text,i) {
+  var client = this;
+  if(i <= 0)
+    return false;
+  else
+    return client.isSpace(text[i-1]);
+}
+Glaemscribe.TTS.prototype.succeeded_by_space = function(text,i) {
+  var client = this;
+  if(i >= text.length-1)
+    return false;
+  else
+    return client.isSpace(text[i+1]);
+}
+// Escapes raw mode AND numbers
+Glaemscribe.TTS.prototype.escape_special_blocks = function(voice, entry, for_ipa) {
+  var config  = Glaemscribe.TTS.ipa_configurations[voice];
+  // TODO : make this configurable
+  // Tonekize raw_mode escaping + numbers, we don't want them to be converted in IPA
+  // Also, keep numbers in the writing, to prevent espeak from pronuncing them
+  var ipaexpr = /(\s*)({{[\s\S]*?}}|\b[0-9][0-9\s]*\b)(\s*)/g;
+  var wavexpr = /(\s*)({{[\s\S]*?}})(\s*)/g;
+  var rawgexp = (for_ipa)?(ipaexpr):(wavexpr);
+  var captured = [];
+  var ret = entry.replace(rawgexp, function(match,p1,p2,p3) {
+    captured.push(match);
+    if(!for_ipa)
+      return ' '; // For wav, just replace by empty space and do not pronunce.
+    else {
+      return p1 + config['block_token'] + p3; // For IPA, replace by dummy token.
+    }
   });
   return [ret, captured];
 }
-Glaemscribe.TTS.prototype.pre_ipa = function(voice,text) {
+Glaemscribe.TTS.prototype.ipa_instrument_punct = function(voice, text) {
   var client = this;
   var config = Glaemscribe.TTS.ipa_configurations[voice];
-  // Normalize all tabs by spaces
-  text = text.replace(/\t/g," ");
-  // Small hack to prevent espeak from pronouncing last dot
-  // since our tokenization may isolate it.
-  text += "\n";
   var cap = client.make_char_checker(config['clauseaffecting_punctuation']);
   var cup = client.make_char_checker(config['clauseunaffecting_punctuation']);
   var accum = "";
   var kept_signs = [];
-  var prec_is_space = false;
-  var next_is_space = false;
-  for(var i=0;i<text.length;i++)
+  var rescap = null;
+	for(var i=0;i<text.length;i++)
   {
-    // Is precedent char a space ?
-    if(i == 0)
-      prec_is_space = false;
-    else
-      prec_is_space = (text[i-1] == " ");
-    // Is precedent char a space ?
-    if(i == text.length-1)
-      next_is_space = false;
-    else
-      next_is_space = (text[i+1] == " ");
     if(text[i] == "\n")
     {
-      accum += config['special_token_ncn'];
+      accum += config['punct_token'];
       kept_signs.push(text[i]);
     }
-    else if(cap[text[i]] != null)
+    else if(cup[text[i]] != null)
     {
-      if(!prec_is_space && !next_is_space)
-      {
-        // Always insert spaces, but remember how they were placed
-        accum += " " + config['special_token_ncn'] + " " + text[i] + " ";
-        kept_signs.push(text[i]);
-      }
-      if(!prec_is_space && next_is_space)
-      {
-        // Always insert spaces, but remember how they were placed
-        accum += " " + config['special_token_ncs'] + " " + text[i] + " ";
-        kept_signs.push(text[i] + " ");
-      }
-       if(prec_is_space && !next_is_space)
-      {
-        // Always insert spaces, but remember how they were placed
-        accum += " " + config['special_token_scn'] + " " + text[i] + " ";
-        kept_signs.push(" " + text[i]);
-      }
-      if(prec_is_space && next_is_space)
-      {
-        // Always insert spaces, but remember how they were placed
-        accum += " " + config['special_token_scs'] + " " + text[i] + " ";
-        kept_signs.push(" " + text[i] + " ");
-      }
+      // This sign does not affect clause analysis by espeak.
+      // Replace the sign by a special "word" / token AND REMOVE the sign
+      // We will restore it after IPA calculation.
+      accum += " " + config['punct_token'] + " " ;
+      kept_signs.push(
+        ((client.preceded_by_space(text,i))?(" "):("")) +
+        text[i] +
+        ((client.succeeded_by_space(text,i))?(" "):(""))
+      );
     }
-    else if(cup[text[i]] != null)
+    else if(rescap = client.read_cap_token(text,i,cap))
+    {
+      // This punctuation sign affects clause analysis.
+      // Replace the sign by a special "word" / token AND keep the sign
+      // Always insert spaces, but remember how they were placed
+      accum += " " + text[i] + " " + config['punct_token'] + " " ;
+      kept_signs.push(
+        ((client.preceded_by_space(text, i))?(" "):("")) +
+        rescap +
+        ((client.succeeded_by_space(text, i + rescap.length - 1))?(" "):(""))
+      );
+      i += rescap.length - 1;
+    }
+    else
+    {
+      accum += text[i];
+    }
+  }
+  return [accum, kept_signs];
+}
+Glaemscribe.TTS.prototype.wav_instrument_punct = function(voice, text) {
+  var client  = this;
+  var config  = Glaemscribe.TTS.ipa_configurations[voice];
+  var cap     =  client.make_char_checker(config['clauseaffecting_punctuation']);
+  var accum   = "";
+  var rescap  = null;
+	for(var i=0;i<text.length;i++)
+  {
+    if(rescap = client.read_cap_token(text,i,cap))
     {
-        // The difference is that we don't keep the sign before calculating ipa.
-        // Just remove them to avoid espeak spell them
-      if(!prec_is_space && !next_is_space)
-      {
-        accum += " " + config['special_token_ncn'] + " " ;
-        kept_signs.push(text[i]);
-      }
-      if(!prec_is_space && next_is_space)
-      {
-        accum += " " + config['special_token_ncs'] + " " ;
-        kept_signs.push(text[i] + " ");
-      }
-       if(prec_is_space && !next_is_space)
-      {
-        accum += " " + config['special_token_scn'] + " " ;
-        kept_signs.push(" " + text[i]);
-      }
-      if(prec_is_space && next_is_space)
-      {
-        accum += " " + config['special_token_scs'] + " " ;
-        kept_signs.push(" " + text[i] + " ");
-      }
+      accum += text[i]; // Just keep the first sign, ignore the others
+      i += rescap.length - 1;
     }
     else
     {
       accum += text[i];
     }
   }
-  //console.log(accum);
-  //console.log(kept_signs)
-  return [accum,kept_signs];
+  return accum;
 }
-Glaemscribe.TTS.prototype.post_ipa = function(voice, ipa, kept_tokens) {
+Glaemscribe.TTS.prototype.ipa_instrument_blocks = function(voice, text)
+{
+  var client = this;
   var config = Glaemscribe.TTS.ipa_configurations[voice];
-  ipa = ipa.replace(/\n/g," ");
-  if(config['pre_reconsitute_markers_callback'])
-    ipa = config['pre_reconsitute_markers_callback'](ipa);
-  var ncnr = new RegExp("\\s*(" + config['special_token_ipa_ncn'] + ")\\s*","g");
-  var scnr = new RegExp("\\s*(" + config['special_token_ipa_scn'] + ")\\s*","g");
-  var ncsr = new RegExp("\\s*(" + config['special_token_ipa_ncs'] + ")\\s*","g");
-  var scsr = new RegExp("\\s*(" + config['special_token_ipa_scs'] + ")\\s*","g");
-  // console.log("=====")
-  // console.log(ipa)
-  // console.log(config)
-  // console.log(ncsr)
-  // Tokens have been accumulated linearly
-  ipa = ipa.replace(ncnr, function(match, contents, offset, s) {return '∰∰'; });
-  ipa = ipa.replace(ncsr, function(match, contents, offset, s) {return '∰∰'; });
-  ipa = ipa.replace(scnr, function(match, contents, offset, s) {return '∰∰'; });
-  ipa = ipa.replace(scsr, function(match, contents, offset, s) {return '∰∰'; });
-  // console.log("=====")
-  // console.log(ipa)
+  return this.escape_special_blocks(voice, text, true);
+}
+Glaemscribe.TTS.prototype.ipa_restore_tokens = function(text, token, kept_tokens) {
+  var rx = new RegExp("\\s*(" + token + ")\\s*","g");
   var nth = -1;
-  ipa = ipa.replace(/∰∰/g,function(match, contents, offset, s) {
+  text = text.replace(rx,function(match, contents, offset, s) {
     nth += 1;
     return kept_tokens[nth];
   });
+  return text;
+}
+Glaemscribe.TTS.prototype.post_ipa = function(voice, ipa, pre_ipa_res) {
+  var client = this;
+  var config = Glaemscribe.TTS.ipa_configurations[voice];
+  ipa = ipa.replace(/\n/g, " ");
+  ipa = client.ipa_restore_tokens(ipa, config.punct_token, pre_ipa_res.punct_tokens);
+  ipa = client.ipa_restore_tokens(ipa, config.block_token, pre_ipa_res.block_tokens);
   // Post-treatment of anti 'dot' pronounciation hack
-  if(ipa[ipa.length-1] === "\n")
+  if(ipa[ipa.length-1] === "\n")
     ipa = ipa.slice(0,-1);
-  // console.log("=====")
-  // console.log(ipa)
   return ipa
 }
-Glaemscribe.TTS.prototype.synthesize_ipa = function(text, args, onended) {
+Glaemscribe.TTS.prototype.pre_ipa = function(args, voice, text) {
   var client = this;
-  args            = args || {}
-  var voice       = args.voice  || 'en'
-  var ts = new Date();
-  var tp = ts;
-  // Cache raw things
-  var pre_raw_tokens = [];
+  var config = Glaemscribe.TTS.ipa_configurations[voice];
+  if(!config)
+    throw "Trying to use unsupported voice '" + voice + "'!";
+  // Normalize all tabs by spaces
+  text = text.replace(/\t/g," ");
+  // Small hack to prevent espeak from pronouncing last dot
+  // since our tokenization may isolate it.
+  text += "\n";
+  // Instrument blocks first (they may contain punctuation)
+  var bi            = client.ipa_instrument_blocks(voice,text);
+  text              = bi[0];
+  // Instrument punctuation, then
+  var pi            = client.ipa_instrument_punct(voice,text);
+  text              = pi[0];
+  // Small hack to always have a capital after a dot.
+  // And prevent espeak from transcribing/pronuncing "dot"
+  text = text.replace(/(\.\s+.)/g, function(match,p1) {
+    return p1.toUpperCase()
+  });
+  return {
+    text: text,
+    block_tokens: bi[1],
+    punct_tokens: pi[1]
+  }
+}
+Glaemscribe.TTS.prototype.pre_wav = function(args, voice, text) {
+  var client = this;
+  var config = Glaemscribe.TTS.ipa_configurations[voice];
+  if(!config)
+    throw "Trying to use unsupported voice '" + voice + "'!";
+  // First, escape the special blocks. Just ignore them.
   if(args.has_raw_mode) {
-    var pre_raw_res    = this.escape_raw_mode(text,false);
+    var pre_raw_res    = this.escape_special_blocks(voice, text, false);
     text               = pre_raw_res[0];
-    pre_raw_tokens     = pre_raw_res[1];
-  }
-  // Pre parse ipa
-  var pre_ipa_tokens  = [];
-  var pre_ipa_res     = client.pre_ipa(voice,text);
-  text                = pre_ipa_res[0];
-  pre_ipa_tokens      = pre_ipa_res[1];
-  // Restitute raw things
-  if(args.has_raw_mode) {
-    var nth = -1;
-    text = text.replace(/∰∰/g,function(match, contents, offset, s) {
-      nth += 1;
-      return pre_raw_tokens[nth];
-    });
   }
-  args = args || {}
-  client.proxy.set_voice(args.voice  || 'en');
+  // Now simplify the punctuation to avoid problems.
+  text = this.wav_instrument_punct(voice, text);
+  return {
+    text: text
+  }
+}
+//////////////////
+//  SYNTHESIZE  //
+//////////////////
+Glaemscribe.TTS.prototype.synthesize_ipa = function(text, args, onended) {
+  var client      = this;
+  args            = args || {};
+  var voice       = args.voice  || 'en-tengwar'
+  // Pre parse text and find raw mode things {{ ... }}
+  // Cache them. This will also the pre-instrumentation
+  // To treat each block as one word
+  var pipa = client.pre_ipa(args, voice, text);
+  text     = pipa['text'];
+  // Now the IPA is instrumented.
+  // Prepare client
+  client.proxy.set_voice(voice);
   var ts = new Date();
   var ret = {};
   client.proxy.synthesize(text, false, true, true, function(result) {
     // Post parse ipa
-    result.ipa            = client.post_ipa(voice, result.pho, pre_ipa_tokens);
+    result.ipa            = client.post_ipa(voice, result.pho, pipa);
     var te = new Date();
     result.synthesis_time = (te - ts);
     delete result.pho;
     if(onended)
       onended(result);
     ret = result;
   });
   return ret;
 }
 // Should be kept separated from IPA, because we do not work on the same text
 Glaemscribe.TTS.prototype.synthesize_wav = function(text, args, onended) {
-  var client = this;
+  var client      = this;
   args            = args || {}
-  var voice       = args.voice  || 'en'
+  var voice       = args.voice  || 'en-tengwar'
+  // Pre-trandform text
+  var pwav = client.pre_wav(args, voice, text);
+  text = pwav['text'];
-  args = args || {}
+  // Prepare client
   client.proxy.set_rate(args.rate    || 120);
   client.proxy.set_pitch(args.pitch  || 5);
-  client.proxy.set_voice(args.voice  || 'en');
-  if(args.has_raw_mode) {
-    var pre_raw_res    = this.escape_raw_mode(text,true);
-    text               = pre_raw_res[0];
-  }
+  client.proxy.set_voice(voice);
-  var ret = {};
   var ts = new Date();
+  var ret = {};
   client.proxy.synthesize(text, true, false, false, function(result) {
     var te = new Date();
     result.synthesis_time = (te - ts);
     delete result.pho;
     // Uint8Array > Array conversion, for ruby?
-    // ret.wav = [].slice.call(ret.wav);
+    // ret.wav = [].slice.call(ret.wav);
     if(onended)
       onended(result);
     ret = result;
   });
   return ret;
 }
+// Below is an expirement of a parsing tool for orthographic modes.
+// Not finished and probably not usable.
+Glaemscribe.TTS.TokenType = {};
+Glaemscribe.TTS.TokenType.WORD      = 'WORD';
+Glaemscribe.TTS.TokenType.NON_WORD  = 'NON_WORD';
+Glaemscribe.TTS.TokenType.NUM       = 'NUM';
+Glaemscribe.TTS.TokenType.SPACE     = 'SPACE';
+Glaemscribe.TTS.TokenType.PUNCT     = 'PUNCT';
+Glaemscribe.TTS.prototype.orthographic_disambiguator_en = function(text) {
+  var client = this;
+  var uwmatcher = /(\p{L}+)/u;
+  var spl       = text.split(uwmatcher);
+  var tokens = spl.map(function(s) {
+    var t       = {};
+    var is_word = s.match(uwmatcher)
+    t.type    = (is_word)?(Glaemscribe.TTS.TokenType.WORD):(Glaemscribe.TTS.TokenType.NON_WORD);
+    t.content = s;
+    return t;
+  });
+  var tokens2 = [];
+  // Handle apostrophe
+  for(var i=0;i<tokens.length;i++) {
+    if( i == 0 || i == tokens.length-1 || tokens[i].type == Glaemscribe.TTS.TokenType.WORD ) {
+      tokens2.push(tokens[i]);
+      continue;
+    }
+    if(tokens[i].content == "'" &&
+      tokens[i-1].type == Glaemscribe.TTS.TokenType.WORD &&
+      tokens[i+1].type == Glaemscribe.TTS.TokenType.WORD )
+    {
+      tokens2.pop();
+      var tok     = {};
+      tok.type    = Glaemscribe.TTS.TokenType.WORD;
+      tok.content = tokens[i-1].content + tokens[i].content + tokens[i+1].content;
+      tokens2.push(tok);
+      i += 1;
+    }
+    else {
+      tokens2.push(tokens[i]);
+    }
+  }
+  tokens = tokens2;
+  // Numerize tokens
+  var i = 0;
+  tokens.forEach(function(t) {
+    t.num = i;
+    i += 1;
+  });
+  // Remove non-speechable tokens
+  var stokens = tokens.filter(function(t) {
+    return (t.type == Glaemscribe.TTS.TokenType.WORD);
+  });
+  // Join speachable tokens
+  var r = stokens.map(function(t) { return t.content}).join('  ');
+  var args  = {};
+  var voice = args.voice  || 'en-tengwar';
+  client.proxy.set_voice(voice);
+  client.proxy.synthesize(r, false, true, true, function(result) {
+    r = result.pho;
+  });
+  r = r.split('').map(function(t) { return t.trim() });
+  var j = 0;
+  r.forEach(function(w) {
+    tokens[stokens[j].num].ipa = r[j];
+    j += 1;
+  });
+  return tokens;
+}
 Glaemscribe.TTS.is_engine_loaded = function() {
   return (typeof(ESpeakNGGlue) !== 'undefined');
 };