glaemscribe 1.2.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/bin/glaemscribe +2 -2
 - data/glaemresources/charsets/cirth_ds.cst +514 -179
 - data/glaemresources/charsets/eldamar.cst +210 -0
 - data/glaemresources/charsets/tengwar_ds_annatar.cst +2776 -348
 - data/glaemresources/charsets/tengwar_ds_eldamar.cst +2648 -351
 - data/glaemresources/charsets/tengwar_ds_elfica.cst +2639 -346
 - data/glaemresources/charsets/tengwar_ds_parmaite.cst +2648 -351
 - data/glaemresources/charsets/tengwar_ds_sindarin.cst +2642 -348
 - data/glaemresources/charsets/tengwar_freemono.cst +1 -1
 - data/glaemresources/charsets/tengwar_guni_annatar.cst +2725 -300
 - data/glaemresources/charsets/tengwar_guni_eldamar.cst +2589 -295
 - data/glaemresources/charsets/tengwar_guni_elfica.cst +2592 -298
 - data/glaemresources/charsets/tengwar_guni_parmaite.cst +2592 -297
 - data/glaemresources/charsets/tengwar_guni_sindarin.cst +2591 -297
 - data/glaemresources/charsets/tengwar_telcontar.cst +7 -0
 - data/glaemresources/modes/blackspeech-tengwar-general_use.glaem +1 -1
 - data/glaemresources/modes/english-cirth-espeak.glaem +687 -0
 - data/glaemresources/modes/english-tengwar-espeak.glaem +814 -0
 - data/glaemresources/modes/japanese-tengwar.glaem +9 -4
 - data/glaemresources/modes/lang_belta-tengwar-dadef.glaem +248 -0
 - data/glaemresources/modes/raw-cirth.glaem +154 -0
 - data/lib/api/charset.rb +124 -57
 - data/lib/api/charset_parser.rb +39 -26
 - data/lib/api/mode.rb +35 -10
 - data/lib/api/mode_parser.rb +21 -12
 - data/lib/api/post_processor/outspace.rb +44 -0
 - data/lib/api/post_processor/resolve_virtuals.rb +41 -19
 - data/lib/api/rule_group.rb +1 -1
 - data/lib/api/transcription_pre_post_processor.rb +51 -45
 - data/lib/api/transcription_processor.rb +12 -9
 - data/lib/glaemscribe.rb +2 -0
 - data/lib_espeak/espeakng.for.glaemscribe.nowasm.sync.js +25 -11
 - data/lib_espeak/glaemscribe_tts.js +363 -223
 - metadata +12 -6
 
| 
         @@ -1,11 +1,11 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            /*
         
     | 
| 
       2 
2 
     | 
    
         | 
| 
       3 
3 
     | 
    
         
             
            Glǽmscribe (also written Glaemscribe) is a software dedicated to
         
     | 
| 
       4 
     | 
    
         
            -
            the transcription of texts between writing systems, and more 
     | 
| 
       5 
     | 
    
         
            -
            specifically dedicated to the transcription of J.R.R. Tolkien's 
     | 
| 
      
 4 
     | 
    
         
            +
            the transcription of texts between writing systems, and more
         
     | 
| 
      
 5 
     | 
    
         
            +
            specifically dedicated to the transcription of J.R.R. Tolkien's
         
     | 
| 
       6 
6 
     | 
    
         
             
            invented languages to some of his devised writing systems.
         
     | 
| 
       7 
7 
     | 
    
         | 
| 
       8 
     | 
    
         
            -
            Copyright (C) 2015 Benjamin Babut (Talagan).
         
     | 
| 
      
 8 
     | 
    
         
            +
            Copyright (C) 2015-2020 Benjamin Babut (Talagan).
         
     | 
| 
       9 
9 
     | 
    
         | 
| 
       10 
10 
     | 
    
         
             
            This program is free software: you can redistribute it and/or modify
         
     | 
| 
       11 
11 
     | 
    
         
             
            it under the terms of the GNU Affero General Public License as published by
         
     | 
| 
         @@ -23,71 +23,44 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>. 
     | 
|
| 
       23 
23 
     | 
    
         
             
            */
         
     | 
| 
       24 
24 
     | 
    
         | 
| 
       25 
25 
     | 
    
         | 
| 
       26 
     | 
    
         
            -
            // A wrapper around espeak to perform  
     | 
| 
       27 
     | 
    
         
            -
            // and generate IPA and/or WAV while keeping  
     | 
| 
      
 26 
     | 
    
         
            +
            // A wrapper around espeak to perform various TTS tasks,
         
     | 
| 
      
 27 
     | 
    
         
            +
            // and generate IPA and/or WAV while keeping punctuation signs or cleaning them up.
         
     | 
| 
       28 
28 
     | 
    
         
             
            //
         
     | 
| 
       29 
29 
     | 
    
         
             
            // Espeak does not have this feature, so this is a significantly dirty hack.
         
     | 
| 
       30 
     | 
    
         
            -
            // 
     | 
| 
      
 30 
     | 
    
         
            +
            //
         
     | 
| 
       31 
31 
     | 
    
         
             
            // Additionally we perform a few glaemscribe-specific tasks, such as preserving raw tengwar
         
     | 
| 
       32 
     | 
    
         
            -
             
     | 
| 
      
 32 
     | 
    
         
            +
            // or numbers which are treated independently.
         
     | 
| 
       33 
33 
     | 
    
         | 
| 
       34 
34 
     | 
    
         
             
            // For the ruby loader, define the Glaemscribe module.
         
     | 
| 
       35 
35 
     | 
    
         
             
            Glaemscribe = (typeof(Glaemscribe) === 'undefined')?({}):(Glaemscribe);
         
     | 
| 
       36 
36 
     | 
    
         | 
| 
       37 
     | 
    
         
            -
            Glaemscribe.TTS = function() { 
     | 
| 
       38 
     | 
    
         
            -
             
     | 
| 
      
 37 
     | 
    
         
            +
            Glaemscribe.TTS = function() {
         
     | 
| 
      
 38 
     | 
    
         
            +
             
     | 
| 
       39 
39 
     | 
    
         
             
              var client = this;
         
     | 
| 
       40 
40 
     | 
    
         
             
              client.proxy = new ESpeakNGGlue();
         
     | 
| 
       41 
41 
     | 
    
         
             
            }
         
     | 
| 
       42 
42 
     | 
    
         | 
| 
       43 
43 
     | 
    
         
             
            Glaemscribe.TTS.ipa_configurations = {
         
     | 
| 
       44 
     | 
    
         
            -
              'en': {
         
     | 
| 
       45 
     | 
    
         
            -
             
     | 
| 
       46 
     | 
    
         
            -
                 
     | 
| 
       47 
     | 
    
         
            -
                 
     | 
| 
       48 
     | 
    
         
            -
             
     | 
| 
       49 
     | 
    
         
            -
                
         
     | 
| 
       50 
     | 
    
         
            -
                special_token_ipa_ncn: '',
         
     | 
| 
       51 
     | 
    
         
            -
                special_token_ipa_ncs: '',
         
     | 
| 
       52 
     | 
    
         
            -
                special_token_ipa_scn: '',
         
     | 
| 
       53 
     | 
    
         
            -
                special_token_ipa_scs: '',
         
     | 
| 
       54 
     | 
    
         
            -
                // Replace by special token AND KEEP when calculating ipa
         
     | 
| 
       55 
     | 
    
         
            -
                clauseaffecting_punctuation: "!.,;:!?–—", 
         
     | 
| 
       56 
     | 
    
         
            -
                // Replace by special token but do not keep when calculating ipa
         
     | 
| 
       57 
     | 
    
         
            -
                // '’ : apostrophes should stay in the original text !!! Don't break liz's bag !!
         
     | 
| 
       58 
     | 
    
         
            -
                // This is because apostrophes shouldn't trigger a pause in the prononciation (e.g. genitives) 
         
     | 
| 
       59 
     | 
    
         
            -
                clauseunaffecting_punctuation: "·“”«»-[](){}<>≤≥$|\"" 
         
     | 
| 
       60 
     | 
    
         
            -
              },  
         
     | 
| 
       61 
     | 
    
         
            -
              'fr': {
         
     | 
| 
       62 
     | 
    
         
            -
                special_token_ncn: '', // no space / sign / no space
         
     | 
| 
       63 
     | 
    
         
            -
                special_token_ncs: '', // no space / sign / space
         
     | 
| 
       64 
     | 
    
         
            -
                special_token_scn: '', // space / sign / no space
         
     | 
| 
       65 
     | 
    
         
            -
                special_token_scs: '', // space / sign / space  
         
     | 
| 
       66 
     | 
    
         
            -
                
         
     | 
| 
       67 
     | 
    
         
            -
                special_token_ipa_ncn: '',
         
     | 
| 
       68 
     | 
    
         
            -
                special_token_ipa_ncs: '',
         
     | 
| 
       69 
     | 
    
         
            -
                special_token_ipa_scn: '',
         
     | 
| 
       70 
     | 
    
         
            -
                special_token_ipa_scs: '',
         
     | 
| 
      
 44 
     | 
    
         
            +
              'en-tengwar': {
         
     | 
| 
      
 45 
     | 
    
         
            +
             
     | 
| 
      
 46 
     | 
    
         
            +
                punct_token: '', // Invariant, for punctuation
         
     | 
| 
      
 47 
     | 
    
         
            +
                block_token: '', // Invariant, for special blocks (nums / raw tengwar)
         
     | 
| 
      
 48 
     | 
    
         
            +
             
     | 
| 
       71 
49 
     | 
    
         
             
                // Replace by special token AND KEEP when calculating ipa
         
     | 
| 
       72 
     | 
    
         
            -
                clauseaffecting_punctuation: "!.,;:!?–—", 
     | 
| 
      
 50 
     | 
    
         
            +
                clauseaffecting_punctuation: "!.,;:!?–—",
         
     | 
| 
       73 
51 
     | 
    
         
             
                // Replace by special token but do not keep when calculating ipa
         
     | 
| 
       74 
     | 
    
         
            -
                // '’ : apostrophes should stay in the original text 
     | 
| 
       75 
     | 
    
         
            -
                //  
     | 
| 
       76 
     | 
    
         
            -
                 
     | 
| 
       77 
     | 
    
         
            -
                //  
     | 
| 
       78 
     | 
    
         
            -
                 
     | 
| 
       79 
     | 
    
         
            -
                  // Long vowel back replacement.
         
     | 
| 
       80 
     | 
    
         
            -
                  return text.replace(/-/g,"ː");
         
     | 
| 
       81 
     | 
    
         
            -
                }
         
     | 
| 
      
 52 
     | 
    
         
            +
                // For those signs : '’ : apostrophes should stay in the original text !!! Don't break liz's bag !!
         
     | 
| 
      
 53 
     | 
    
         
            +
                // Apostrophes shouldn't trigger a pause in the prononciation (e.g. genitives, I've, don't etc)
         
     | 
| 
      
 54 
     | 
    
         
            +
                // But apostrophe and single quote are the same thing.
         
     | 
| 
      
 55 
     | 
    
         
            +
                // It's necessary to document that single quotes should then be avoided.
         
     | 
| 
      
 56 
     | 
    
         
            +
                clauseunaffecting_punctuation: "·“”«»-[](){}⟨⟩<>≤≥$|\""
         
     | 
| 
       82 
57 
     | 
    
         
             
              }
         
     | 
| 
       83 
58 
     | 
    
         
             
            }
         
     | 
| 
       84 
59 
     | 
    
         | 
| 
       85 
     | 
    
         
            -
            Glaemscribe.TTS.ipa_configurations['en- 
     | 
| 
       86 
     | 
    
         
            -
            Glaemscribe.TTS.ipa_configurations['en- 
     | 
| 
       87 
     | 
    
         
            -
            Glaemscribe.TTS.ipa_configurations['en-tengwar- 
     | 
| 
       88 
     | 
    
         
            -
            Glaemscribe.TTS.ipa_configurations['en-tengwar'] 
     | 
| 
       89 
     | 
    
         
            -
            Glaemscribe.TTS.ipa_configurations['en-tengwar-gb']      = Glaemscribe.TTS.ipa_configurations['en'];
         
     | 
| 
       90 
     | 
    
         
            -
            Glaemscribe.TTS.ipa_configurations['en-tengwar-us']      = Glaemscribe.TTS.ipa_configurations['en'];
         
     | 
| 
      
 60 
     | 
    
         
            +
            Glaemscribe.TTS.ipa_configurations['en-tengwar']         = Glaemscribe.TTS.ipa_configurations['en-tengwar'];
         
     | 
| 
      
 61 
     | 
    
         
            +
            Glaemscribe.TTS.ipa_configurations['en-tengwar-rp']      = Glaemscribe.TTS.ipa_configurations['en-tengwar'];
         
     | 
| 
      
 62 
     | 
    
         
            +
            Glaemscribe.TTS.ipa_configurations['en-tengwar-gb']      = Glaemscribe.TTS.ipa_configurations['en-tengwar'];
         
     | 
| 
      
 63 
     | 
    
         
            +
            Glaemscribe.TTS.ipa_configurations['en-tengwar-us']      = Glaemscribe.TTS.ipa_configurations['en-tengwar'];
         
     | 
| 
       91 
64 
     | 
    
         | 
| 
       92 
65 
     | 
    
         | 
| 
       93 
66 
     | 
    
         
             
            Glaemscribe.TTS.voice_list = function(voice) {
         
     | 
| 
         @@ -96,6 +69,10 @@ Glaemscribe.TTS.voice_list = function(voice) { 
     | 
|
| 
       96 
69 
     | 
    
         | 
| 
       97 
70 
     | 
    
         
             
            // Static helper. To be used in pure js (not ruby).
         
     | 
| 
       98 
71 
     | 
    
         
             
            Glaemscribe.TTS.option_name_to_voice = function(oname) {
         
     | 
| 
      
 72 
     | 
    
         
            +
             
     | 
| 
      
 73 
     | 
    
         
            +
              if(!oname)
         
     | 
| 
      
 74 
     | 
    
         
            +
                return null;
         
     | 
| 
      
 75 
     | 
    
         
            +
             
     | 
| 
       99 
76 
     | 
    
         
             
              return oname.toLowerCase().replace(/^espeak_voice_/,'').replace(/_/g,'-');
         
     | 
| 
       100 
77 
     | 
    
         
             
            }
         
     | 
| 
       101 
78 
     | 
    
         | 
| 
         @@ -109,257 +86,420 @@ Glaemscribe.TTS.prototype.make_char_checker = function(string){ 
     | 
|
| 
       109 
86 
     | 
    
         
             
              return cc;
         
     | 
| 
       110 
87 
     | 
    
         
             
            }
         
     | 
| 
       111 
88 
     | 
    
         | 
| 
       112 
     | 
    
         
            -
            Glaemscribe.TTS.prototype. 
     | 
| 
       113 
     | 
    
         
            -
              
         
     | 
| 
       114 
     | 
    
         
            -
             
     | 
| 
       115 
     | 
    
         
            -
             
     | 
| 
       116 
     | 
    
         
            -
             
     | 
| 
       117 
     | 
    
         
            -
             
     | 
| 
       118 
     | 
    
         
            -
             
     | 
| 
       119 
     | 
    
         
            -
             
     | 
| 
       120 
     | 
    
         
            -
             
     | 
| 
       121 
     | 
    
         
            -
             
     | 
| 
      
 89 
     | 
    
         
            +
            Glaemscribe.TTS.prototype.isSpace = function(a) {
         
     | 
| 
      
 90 
     | 
    
         
            +
              return (a == ' ' || a == '\t');
         
     | 
| 
      
 91 
     | 
    
         
            +
            }
         
     | 
| 
      
 92 
     | 
    
         
            +
             
     | 
| 
      
 93 
     | 
    
         
            +
            Glaemscribe.TTS.prototype.read_cap_token = function(text, starti, cap_checker) {
         
     | 
| 
      
 94 
     | 
    
         
            +
             
     | 
| 
      
 95 
     | 
    
         
            +
              var client = this
         
     | 
| 
      
 96 
     | 
    
         
            +
              var i   = starti;
         
     | 
| 
      
 97 
     | 
    
         
            +
              var tok = ""
         
     | 
| 
      
 98 
     | 
    
         
            +
             
     | 
| 
      
 99 
     | 
    
         
            +
              if(cap_checker[text[i]] == null)
         
     | 
| 
      
 100 
     | 
    
         
            +
                return null;
         
     | 
| 
      
 101 
     | 
    
         
            +
             
     | 
| 
      
 102 
     | 
    
         
            +
              i++;
         
     | 
| 
      
 103 
     | 
    
         
            +
             
     | 
| 
      
 104 
     | 
    
         
            +
              // Advance the sequence
         
     | 
| 
      
 105 
     | 
    
         
            +
              for(; i<text.length; i++) {
         
     | 
| 
      
 106 
     | 
    
         
            +
                if( (cap_checker[text[i]] == null) && !client.isSpace(text[i])) {
         
     | 
| 
      
 107 
     | 
    
         
            +
                  break;
         
     | 
| 
      
 108 
     | 
    
         
            +
                }
         
     | 
| 
      
 109 
     | 
    
         
            +
              }
         
     | 
| 
      
 110 
     | 
    
         
            +
             
     | 
| 
      
 111 
     | 
    
         
            +
              // Rewind trailing spaces
         
     | 
| 
      
 112 
     | 
    
         
            +
              var toklen = i - starti;
         
     | 
| 
      
 113 
     | 
    
         
            +
             
     | 
| 
      
 114 
     | 
    
         
            +
              for(i = starti + toklen - 1; i>=starti ; i--) {
         
     | 
| 
      
 115 
     | 
    
         
            +
                if(client.isSpace(text[i]))
         
     | 
| 
      
 116 
     | 
    
         
            +
                  toklen--;
         
     | 
| 
       122 
117 
     | 
    
         
             
                else
         
     | 
| 
       123 
     | 
    
         
            -
                   
     | 
| 
      
 118 
     | 
    
         
            +
                  break;
         
     | 
| 
      
 119 
     | 
    
         
            +
              }
         
     | 
| 
      
 120 
     | 
    
         
            +
             
     | 
| 
      
 121 
     | 
    
         
            +
              return text.substring(starti,starti+toklen);
         
     | 
| 
      
 122 
     | 
    
         
            +
            };
         
     | 
| 
      
 123 
     | 
    
         
            +
             
     | 
| 
      
 124 
     | 
    
         
            +
            Glaemscribe.TTS.prototype.preceded_by_space = function(text,i) {
         
     | 
| 
      
 125 
     | 
    
         
            +
              var client = this;
         
     | 
| 
      
 126 
     | 
    
         
            +
             
     | 
| 
      
 127 
     | 
    
         
            +
              if(i <= 0)
         
     | 
| 
      
 128 
     | 
    
         
            +
                return false;
         
     | 
| 
      
 129 
     | 
    
         
            +
              else
         
     | 
| 
      
 130 
     | 
    
         
            +
                return client.isSpace(text[i-1]);
         
     | 
| 
      
 131 
     | 
    
         
            +
            }
         
     | 
| 
      
 132 
     | 
    
         
            +
             
     | 
| 
      
 133 
     | 
    
         
            +
            Glaemscribe.TTS.prototype.succeeded_by_space = function(text,i) {
         
     | 
| 
      
 134 
     | 
    
         
            +
              var client = this;
         
     | 
| 
      
 135 
     | 
    
         
            +
             
     | 
| 
      
 136 
     | 
    
         
            +
              if(i >= text.length-1)
         
     | 
| 
      
 137 
     | 
    
         
            +
                return false;
         
     | 
| 
      
 138 
     | 
    
         
            +
              else
         
     | 
| 
      
 139 
     | 
    
         
            +
                return client.isSpace(text[i+1]);
         
     | 
| 
      
 140 
     | 
    
         
            +
            }
         
     | 
| 
      
 141 
     | 
    
         
            +
             
     | 
| 
      
 142 
     | 
    
         
            +
            // Escapes raw mode AND numbers
         
     | 
| 
      
 143 
     | 
    
         
            +
            Glaemscribe.TTS.prototype.escape_special_blocks = function(voice, entry, for_ipa) {
         
     | 
| 
      
 144 
     | 
    
         
            +
             
     | 
| 
      
 145 
     | 
    
         
            +
              var config  = Glaemscribe.TTS.ipa_configurations[voice];
         
     | 
| 
      
 146 
     | 
    
         
            +
             
     | 
| 
      
 147 
     | 
    
         
            +
              // TODO : make this configurable
         
     | 
| 
      
 148 
     | 
    
         
            +
             
     | 
| 
      
 149 
     | 
    
         
            +
              // Tonekize raw_mode escaping + numbers, we don't want them to be converted in IPA
         
     | 
| 
      
 150 
     | 
    
         
            +
              // Also, keep numbers in the writing, to prevent espeak from pronuncing them
         
     | 
| 
      
 151 
     | 
    
         
            +
              var ipaexpr = /(\s*)({{[\s\S]*?}}|\b[0-9][0-9\s]*\b)(\s*)/g;
         
     | 
| 
      
 152 
     | 
    
         
            +
              var wavexpr = /(\s*)({{[\s\S]*?}})(\s*)/g;
         
     | 
| 
      
 153 
     | 
    
         
            +
              var rawgexp = (for_ipa)?(ipaexpr):(wavexpr);
         
     | 
| 
      
 154 
     | 
    
         
            +
             
     | 
| 
      
 155 
     | 
    
         
            +
              var captured = [];
         
     | 
| 
      
 156 
     | 
    
         
            +
             
     | 
| 
      
 157 
     | 
    
         
            +
              var ret = entry.replace(rawgexp, function(match,p1,p2,p3) {
         
     | 
| 
      
 158 
     | 
    
         
            +
                captured.push(match);
         
     | 
| 
      
 159 
     | 
    
         
            +
                if(!for_ipa)
         
     | 
| 
      
 160 
     | 
    
         
            +
                  return ' '; // For wav, just replace by empty space and do not pronunce.
         
     | 
| 
      
 161 
     | 
    
         
            +
                else {
         
     | 
| 
      
 162 
     | 
    
         
            +
                  return p1 + config['block_token'] + p3; // For IPA, replace by dummy token.
         
     | 
| 
      
 163 
     | 
    
         
            +
                }
         
     | 
| 
       124 
164 
     | 
    
         
             
              });
         
     | 
| 
       125 
     | 
    
         
            -
             
     | 
| 
      
 165 
     | 
    
         
            +
             
     | 
| 
       126 
166 
     | 
    
         
             
              return [ret, captured];
         
     | 
| 
       127 
167 
     | 
    
         
             
            }
         
     | 
| 
       128 
168 
     | 
    
         | 
| 
       129 
     | 
    
         
            -
             
     | 
| 
       130 
     | 
    
         
            -
             
     | 
| 
      
 169 
     | 
    
         
            +
             
     | 
| 
      
 170 
     | 
    
         
            +
            Glaemscribe.TTS.prototype.ipa_instrument_punct = function(voice, text) {
         
     | 
| 
      
 171 
     | 
    
         
            +
             
     | 
| 
       131 
172 
     | 
    
         
             
              var client = this;
         
     | 
| 
       132 
173 
     | 
    
         
             
              var config = Glaemscribe.TTS.ipa_configurations[voice];
         
     | 
| 
       133 
     | 
    
         
            -
             
     | 
| 
       134 
     | 
    
         
            -
              // Normalize all tabs by spaces
         
     | 
| 
       135 
     | 
    
         
            -
              text = text.replace(/\t/g," ");
         
     | 
| 
       136 
     | 
    
         
            -
              // Small hack to prevent espeak from pronouncing last dot 
         
     | 
| 
       137 
     | 
    
         
            -
              // since our tokenization may isolate it.
         
     | 
| 
       138 
     | 
    
         
            -
              text += "\n";
         
     | 
| 
       139 
     | 
    
         
            -
              
         
     | 
| 
      
 174 
     | 
    
         
            +
             
     | 
| 
       140 
175 
     | 
    
         
             
              var cap = client.make_char_checker(config['clauseaffecting_punctuation']);
         
     | 
| 
       141 
176 
     | 
    
         
             
              var cup = client.make_char_checker(config['clauseunaffecting_punctuation']);
         
     | 
| 
       142 
     | 
    
         
            -
             
     | 
| 
      
 177 
     | 
    
         
            +
             
     | 
| 
       143 
178 
     | 
    
         
             
              var accum = "";
         
     | 
| 
       144 
179 
     | 
    
         
             
              var kept_signs = [];
         
     | 
| 
       145 
     | 
    
         
            -
             
     | 
| 
       146 
     | 
    
         
            -
              var  
     | 
| 
       147 
     | 
    
         
            -
             
     | 
| 
       148 
     | 
    
         
            -
             
     | 
| 
      
 180 
     | 
    
         
            +
             
     | 
| 
      
 181 
     | 
    
         
            +
              var rescap = null;
         
     | 
| 
      
 182 
     | 
    
         
            +
             
     | 
| 
      
 183 
     | 
    
         
            +
            	for(var i=0;i<text.length;i++)
         
     | 
| 
       149 
184 
     | 
    
         
             
              {
         
     | 
| 
       150 
     | 
    
         
            -
                // Is precedent char a space ?
         
     | 
| 
       151 
     | 
    
         
            -
                if(i == 0)
         
     | 
| 
       152 
     | 
    
         
            -
                  prec_is_space = false;
         
     | 
| 
       153 
     | 
    
         
            -
                else
         
     | 
| 
       154 
     | 
    
         
            -
                  prec_is_space = (text[i-1] == " ");
         
     | 
| 
       155 
     | 
    
         
            -
                
         
     | 
| 
       156 
     | 
    
         
            -
                // Is precedent char a space ?
         
     | 
| 
       157 
     | 
    
         
            -
                if(i == text.length-1)
         
     | 
| 
       158 
     | 
    
         
            -
                  next_is_space = false;
         
     | 
| 
       159 
     | 
    
         
            -
                else
         
     | 
| 
       160 
     | 
    
         
            -
                  next_is_space = (text[i+1] == " ");
         
     | 
| 
       161 
     | 
    
         
            -
                
         
     | 
| 
       162 
185 
     | 
    
         
             
                if(text[i] == "\n")
         
     | 
| 
       163 
186 
     | 
    
         
             
                {
         
     | 
| 
       164 
     | 
    
         
            -
                  accum += config[' 
     | 
| 
      
 187 
     | 
    
         
            +
                  accum += config['punct_token'];
         
     | 
| 
       165 
188 
     | 
    
         
             
                  kept_signs.push(text[i]);
         
     | 
| 
       166 
189 
     | 
    
         
             
                }
         
     | 
| 
       167 
     | 
    
         
            -
                else if( 
     | 
| 
      
 190 
     | 
    
         
            +
                else if(cup[text[i]] != null)
         
     | 
| 
       168 
191 
     | 
    
         
             
                {
         
     | 
| 
       169 
     | 
    
         
            -
                   
     | 
| 
       170 
     | 
    
         
            -
                   
     | 
| 
       171 
     | 
    
         
            -
             
     | 
| 
       172 
     | 
    
         
            -
             
     | 
| 
       173 
     | 
    
         
            -
             
     | 
| 
       174 
     | 
    
         
            -
             
     | 
| 
       175 
     | 
    
         
            -
             
     | 
| 
       176 
     | 
    
         
            -
             
     | 
| 
       177 
     | 
    
         
            -
             
     | 
| 
       178 
     | 
    
         
            -
                    accum += " " + config['special_token_ncs'] + " " + text[i] + " "; 
         
     | 
| 
       179 
     | 
    
         
            -
                    kept_signs.push(text[i] + " ");
         
     | 
| 
       180 
     | 
    
         
            -
                  }
         
     | 
| 
       181 
     | 
    
         
            -
                   if(prec_is_space && !next_is_space)
         
     | 
| 
       182 
     | 
    
         
            -
                  {
         
     | 
| 
       183 
     | 
    
         
            -
                    // Always insert spaces, but remember how they were placed
         
     | 
| 
       184 
     | 
    
         
            -
                    accum += " " + config['special_token_scn'] + " " + text[i] + " "; 
         
     | 
| 
       185 
     | 
    
         
            -
                    kept_signs.push(" " + text[i]);
         
     | 
| 
       186 
     | 
    
         
            -
                  }
         
     | 
| 
       187 
     | 
    
         
            -
                  if(prec_is_space && next_is_space)
         
     | 
| 
       188 
     | 
    
         
            -
                  {
         
     | 
| 
       189 
     | 
    
         
            -
                    // Always insert spaces, but remember how they were placed
         
     | 
| 
       190 
     | 
    
         
            -
                    accum += " " + config['special_token_scs'] + " " + text[i] + " "; 
         
     | 
| 
       191 
     | 
    
         
            -
                    kept_signs.push(" " + text[i] + " ");
         
     | 
| 
       192 
     | 
    
         
            -
                  }         
         
     | 
| 
      
 192 
     | 
    
         
            +
                  // This sign does not affect clause analysis by espeak.
         
     | 
| 
      
 193 
     | 
    
         
            +
                  // Replace the sign by a special "word" / token AND REMOVE the sign
         
     | 
| 
      
 194 
     | 
    
         
            +
                  // We will restore it after IPA calculation.
         
     | 
| 
      
 195 
     | 
    
         
            +
                  accum += " " + config['punct_token'] + " " ;
         
     | 
| 
      
 196 
     | 
    
         
            +
                  kept_signs.push(
         
     | 
| 
      
 197 
     | 
    
         
            +
                    ((client.preceded_by_space(text,i))?(" "):("")) +
         
     | 
| 
      
 198 
     | 
    
         
            +
                    text[i] +
         
     | 
| 
      
 199 
     | 
    
         
            +
                    ((client.succeeded_by_space(text,i))?(" "):(""))
         
     | 
| 
      
 200 
     | 
    
         
            +
                  );
         
     | 
| 
       193 
201 
     | 
    
         
             
                }
         
     | 
| 
       194 
     | 
    
         
            -
                else if( 
     | 
| 
      
 202 
     | 
    
         
            +
                else if(rescap = client.read_cap_token(text,i,cap))
         
     | 
| 
      
 203 
     | 
    
         
            +
                {
         
     | 
| 
      
 204 
     | 
    
         
            +
                  // This punctuation sign affects clause analysis.
         
     | 
| 
      
 205 
     | 
    
         
            +
                  // Replace the sign by a special "word" / token AND keep the sign
         
     | 
| 
      
 206 
     | 
    
         
            +
                  // Always insert spaces, but remember how they were placed
         
     | 
| 
      
 207 
     | 
    
         
            +
                  accum += " " + text[i] + " " + config['punct_token'] + " " ;
         
     | 
| 
      
 208 
     | 
    
         
            +
                  kept_signs.push(
         
     | 
| 
      
 209 
     | 
    
         
            +
                    ((client.preceded_by_space(text, i))?(" "):("")) +
         
     | 
| 
      
 210 
     | 
    
         
            +
                    rescap +
         
     | 
| 
      
 211 
     | 
    
         
            +
                    ((client.succeeded_by_space(text, i + rescap.length - 1))?(" "):(""))
         
     | 
| 
      
 212 
     | 
    
         
            +
                  );
         
     | 
| 
      
 213 
     | 
    
         
            +
                  i += rescap.length - 1;
         
     | 
| 
      
 214 
     | 
    
         
            +
                }
         
     | 
| 
      
 215 
     | 
    
         
            +
                else
         
     | 
| 
      
 216 
     | 
    
         
            +
                {
         
     | 
| 
      
 217 
     | 
    
         
            +
                  accum += text[i];
         
     | 
| 
      
 218 
     | 
    
         
            +
                }
         
     | 
| 
      
 219 
     | 
    
         
            +
              }
         
     | 
| 
      
 220 
     | 
    
         
            +
             
     | 
| 
      
 221 
     | 
    
         
            +
              return [accum, kept_signs];
         
     | 
| 
      
 222 
     | 
    
         
            +
            }
         
     | 
| 
      
 223 
     | 
    
         
            +
             
     | 
| 
      
 224 
     | 
    
         
            +
            Glaemscribe.TTS.prototype.wav_instrument_punct = function(voice, text) {
         
     | 
| 
      
 225 
     | 
    
         
            +
             
     | 
| 
      
 226 
     | 
    
         
            +
              var client  = this;
         
     | 
| 
      
 227 
     | 
    
         
            +
              var config  = Glaemscribe.TTS.ipa_configurations[voice];
         
     | 
| 
      
 228 
     | 
    
         
            +
              var cap     =  client.make_char_checker(config['clauseaffecting_punctuation']);
         
     | 
| 
      
 229 
     | 
    
         
            +
              var accum   = "";
         
     | 
| 
      
 230 
     | 
    
         
            +
              var rescap  = null;
         
     | 
| 
      
 231 
     | 
    
         
            +
             
     | 
| 
      
 232 
     | 
    
         
            +
            	for(var i=0;i<text.length;i++)
         
     | 
| 
      
 233 
     | 
    
         
            +
              {
         
     | 
| 
      
 234 
     | 
    
         
            +
                if(rescap = client.read_cap_token(text,i,cap))
         
     | 
| 
       195 
235 
     | 
    
         
             
                {
         
     | 
| 
       196 
     | 
    
         
            -
             
     | 
| 
       197 
     | 
    
         
            -
             
     | 
| 
       198 
     | 
    
         
            -
                  if(!prec_is_space && !next_is_space)
         
     | 
| 
       199 
     | 
    
         
            -
                  {
         
     | 
| 
       200 
     | 
    
         
            -
                    accum += " " + config['special_token_ncn'] + " " ;     
         
     | 
| 
       201 
     | 
    
         
            -
                    kept_signs.push(text[i]);
         
     | 
| 
       202 
     | 
    
         
            -
                  }
         
     | 
| 
       203 
     | 
    
         
            -
                  if(!prec_is_space && next_is_space)
         
     | 
| 
       204 
     | 
    
         
            -
                  {
         
     | 
| 
       205 
     | 
    
         
            -
                    accum += " " + config['special_token_ncs'] + " " ; 
         
     | 
| 
       206 
     | 
    
         
            -
                    kept_signs.push(text[i] + " ");
         
     | 
| 
       207 
     | 
    
         
            -
                  }
         
     | 
| 
       208 
     | 
    
         
            -
                   if(prec_is_space && !next_is_space)
         
     | 
| 
       209 
     | 
    
         
            -
                  {
         
     | 
| 
       210 
     | 
    
         
            -
                    accum += " " + config['special_token_scn'] + " " ; 
         
     | 
| 
       211 
     | 
    
         
            -
                    kept_signs.push(" " + text[i]);
         
     | 
| 
       212 
     | 
    
         
            -
                  }
         
     | 
| 
       213 
     | 
    
         
            -
                  if(prec_is_space && next_is_space)
         
     | 
| 
       214 
     | 
    
         
            -
                  {
         
     | 
| 
       215 
     | 
    
         
            -
                    accum += " " + config['special_token_scs'] + " " ; 
         
     | 
| 
       216 
     | 
    
         
            -
                    kept_signs.push(" " + text[i] + " ");
         
     | 
| 
       217 
     | 
    
         
            -
                  }      
         
     | 
| 
      
 236 
     | 
    
         
            +
                  accum += text[i]; // Just keep the first sign, ignore the others
         
     | 
| 
      
 237 
     | 
    
         
            +
                  i += rescap.length - 1;
         
     | 
| 
       218 
238 
     | 
    
         
             
                }
         
     | 
| 
       219 
239 
     | 
    
         
             
                else
         
     | 
| 
       220 
240 
     | 
    
         
             
                {
         
     | 
| 
       221 
241 
     | 
    
         
             
                  accum += text[i];
         
     | 
| 
       222 
242 
     | 
    
         
             
                }
         
     | 
| 
       223 
243 
     | 
    
         
             
              }
         
     | 
| 
       224 
     | 
    
         
            -
             
     | 
| 
       225 
     | 
    
         
            -
               
     | 
| 
       226 
     | 
    
         
            -
              //console.log(kept_signs)
         
     | 
| 
       227 
     | 
    
         
            -
              return [accum,kept_signs];
         
     | 
| 
      
 244 
     | 
    
         
            +
             
     | 
| 
      
 245 
     | 
    
         
            +
              return accum;
         
     | 
| 
       228 
246 
     | 
    
         
             
            }
         
     | 
| 
       229 
247 
     | 
    
         | 
| 
       230 
     | 
    
         
            -
            Glaemscribe.TTS.prototype. 
     | 
| 
       231 
     | 
    
         
            -
             
     | 
| 
      
 248 
     | 
    
         
            +
            Glaemscribe.TTS.prototype.ipa_instrument_blocks = function(voice, text)
         
     | 
| 
      
 249 
     | 
    
         
            +
            {
         
     | 
| 
      
 250 
     | 
    
         
            +
              var client = this;
         
     | 
| 
       232 
251 
     | 
    
         
             
              var config = Glaemscribe.TTS.ipa_configurations[voice];
         
     | 
| 
       233 
     | 
    
         
            -
             
     | 
| 
       234 
     | 
    
         
            -
               
     | 
| 
       235 
     | 
    
         
            -
             
     | 
| 
       236 
     | 
    
         
            -
             
     | 
| 
       237 
     | 
    
         
            -
             
     | 
| 
       238 
     | 
    
         
            -
             
     | 
| 
       239 
     | 
    
         
            -
              var  
     | 
| 
       240 
     | 
    
         
            -
             
     | 
| 
       241 
     | 
    
         
            -
              
         
     | 
| 
       242 
     | 
    
         
            -
              // console.log("=====")
         
     | 
| 
       243 
     | 
    
         
            -
              // console.log(ipa)
         
     | 
| 
       244 
     | 
    
         
            -
              // console.log(config)
         
     | 
| 
       245 
     | 
    
         
            -
              // console.log(ncsr)
         
     | 
| 
       246 
     | 
    
         
            -
              
         
     | 
| 
       247 
     | 
    
         
            -
              // Tokens have been accumulated linearly
         
     | 
| 
       248 
     | 
    
         
            -
              ipa = ipa.replace(ncnr, function(match, contents, offset, s) {return '∰∰'; });
         
     | 
| 
       249 
     | 
    
         
            -
              ipa = ipa.replace(ncsr, function(match, contents, offset, s) {return '∰∰'; });
         
     | 
| 
       250 
     | 
    
         
            -
              ipa = ipa.replace(scnr, function(match, contents, offset, s) {return '∰∰'; });
         
     | 
| 
       251 
     | 
    
         
            -
              ipa = ipa.replace(scsr, function(match, contents, offset, s) {return '∰∰'; });
         
     | 
| 
       252 
     | 
    
         
            -
                
         
     | 
| 
       253 
     | 
    
         
            -
              // console.log("=====")
         
     | 
| 
       254 
     | 
    
         
            -
              // console.log(ipa)
         
     | 
| 
      
 252 
     | 
    
         
            +
             
     | 
| 
      
 253 
     | 
    
         
            +
              return this.escape_special_blocks(voice, text, true);
         
     | 
| 
      
 254 
     | 
    
         
            +
            }
         
     | 
| 
      
 255 
     | 
    
         
            +
             
     | 
| 
      
 256 
     | 
    
         
            +
            Glaemscribe.TTS.prototype.ipa_restore_tokens = function(text, token, kept_tokens) {
         
     | 
| 
      
 257 
     | 
    
         
            +
             
     | 
| 
      
 258 
     | 
    
         
            +
              var rx = new RegExp("\\s*(" + token + ")\\s*","g");
         
     | 
| 
      
 259 
     | 
    
         
            +
             
     | 
| 
       255 
260 
     | 
    
         
             
              var nth = -1;
         
     | 
| 
       256 
     | 
    
         
            -
               
     | 
| 
      
 261 
     | 
    
         
            +
              text = text.replace(rx,function(match, contents, offset, s) {
         
     | 
| 
       257 
262 
     | 
    
         
             
                nth += 1;
         
     | 
| 
       258 
263 
     | 
    
         
             
                return kept_tokens[nth];
         
     | 
| 
       259 
264 
     | 
    
         
             
              });
         
     | 
| 
       260 
     | 
    
         
            -
             
     | 
| 
      
 265 
     | 
    
         
            +
             
     | 
| 
      
 266 
     | 
    
         
            +
              return text;
         
     | 
| 
      
 267 
     | 
    
         
            +
            }
         
     | 
| 
      
 268 
     | 
    
         
            +
             
     | 
| 
      
 269 
     | 
    
         
            +
            Glaemscribe.TTS.prototype.post_ipa = function(voice, ipa, pre_ipa_res) {
         
     | 
| 
      
 270 
     | 
    
         
            +
             
     | 
| 
      
 271 
     | 
    
         
            +
              var client = this;
         
     | 
| 
      
 272 
     | 
    
         
            +
              var config = Glaemscribe.TTS.ipa_configurations[voice];
         
     | 
| 
      
 273 
     | 
    
         
            +
              ipa = ipa.replace(/\n/g, " ");
         
     | 
| 
      
 274 
     | 
    
         
            +
             
     | 
| 
      
 275 
     | 
    
         
            +
              ipa = client.ipa_restore_tokens(ipa, config.punct_token, pre_ipa_res.punct_tokens);
         
     | 
| 
      
 276 
     | 
    
         
            +
              ipa = client.ipa_restore_tokens(ipa, config.block_token, pre_ipa_res.block_tokens);
         
     | 
| 
      
 277 
     | 
    
         
            +
             
     | 
| 
       261 
278 
     | 
    
         
             
              // Post-treatment of anti 'dot' pronounciation hack
         
     | 
| 
       262 
     | 
    
         
            -
              if(ipa[ipa.length-1] === "\n") 
     | 
| 
      
 279 
     | 
    
         
            +
              if(ipa[ipa.length-1] === "\n")
         
     | 
| 
       263 
280 
     | 
    
         
             
                ipa = ipa.slice(0,-1);
         
     | 
| 
       264 
     | 
    
         
            -
             
     | 
| 
       265 
     | 
    
         
            -
              // console.log("=====")
         
     | 
| 
       266 
     | 
    
         
            -
              // console.log(ipa)
         
     | 
| 
      
 281 
     | 
    
         
            +
             
     | 
| 
       267 
282 
     | 
    
         
             
              return ipa
         
     | 
| 
       268 
283 
     | 
    
         
             
            }
         
     | 
| 
       269 
284 
     | 
    
         | 
| 
       270 
285 
     | 
    
         | 
| 
       271 
     | 
    
         
            -
            Glaemscribe.TTS.prototype. 
     | 
| 
       272 
     | 
    
         
            -
             
     | 
| 
      
 286 
     | 
    
         
            +
            Glaemscribe.TTS.prototype.pre_ipa = function(args, voice, text) {
         
     | 
| 
      
 287 
     | 
    
         
            +
             
     | 
| 
       273 
288 
     | 
    
         
             
              var client = this;
         
     | 
| 
       274 
     | 
    
         
            -
              
         
     | 
| 
       275 
     | 
    
         
            -
             
     | 
| 
       276 
     | 
    
         
            -
               
     | 
| 
       277 
     | 
    
         
            -
             
     | 
| 
       278 
     | 
    
         
            -
             
     | 
| 
       279 
     | 
    
         
            -
               
     | 
| 
       280 
     | 
    
         
            -
             
     | 
| 
       281 
     | 
    
         
            -
             
     | 
| 
       282 
     | 
    
         
            -
               
     | 
| 
      
 289 
     | 
    
         
            +
              var config = Glaemscribe.TTS.ipa_configurations[voice];
         
     | 
| 
      
 290 
     | 
    
         
            +
             
     | 
| 
      
 291 
     | 
    
         
            +
              if(!config)
         
     | 
| 
      
 292 
     | 
    
         
            +
                throw "Trying to use unsupported voice '" + voice + "'!";
         
     | 
| 
      
 293 
     | 
    
         
            +
             
     | 
| 
      
 294 
     | 
    
         
            +
              // Normalize all tabs by spaces
         
     | 
| 
      
 295 
     | 
    
         
            +
              text = text.replace(/\t/g," ");
         
     | 
| 
      
 296 
     | 
    
         
            +
             
     | 
| 
      
 297 
     | 
    
         
            +
              // Small hack to prevent espeak from pronouncing last dot
         
     | 
| 
      
 298 
     | 
    
         
            +
              // since our tokenization may isolate it.
         
     | 
| 
      
 299 
     | 
    
         
            +
              text += "\n";
         
     | 
| 
      
 300 
     | 
    
         
            +
             
     | 
| 
      
 301 
     | 
    
         
            +
              // Instrument blocks first (they may contain punctuation)
         
     | 
| 
      
 302 
     | 
    
         
            +
              var bi            = client.ipa_instrument_blocks(voice,text);
         
     | 
| 
      
 303 
     | 
    
         
            +
              text              = bi[0];
         
     | 
| 
      
 304 
     | 
    
         
            +
             
     | 
| 
      
 305 
     | 
    
         
            +
              // Instrument punctuation, then
         
     | 
| 
      
 306 
     | 
    
         
            +
              var pi            = client.ipa_instrument_punct(voice,text);
         
     | 
| 
      
 307 
     | 
    
         
            +
              text              = pi[0];
         
     | 
| 
      
 308 
     | 
    
         
            +
             
     | 
| 
      
 309 
     | 
    
         
            +
              // Small hack to always have a capital after a dot.
         
     | 
| 
      
 310 
     | 
    
         
            +
              // And prevent espeak from transcribing/pronuncing "dot"
         
     | 
| 
      
 311 
     | 
    
         
            +
              text = text.replace(/(\.\s+.)/g, function(match,p1) {
         
     | 
| 
      
 312 
     | 
    
         
            +
                return p1.toUpperCase()
         
     | 
| 
      
 313 
     | 
    
         
            +
              });
         
     | 
| 
      
 314 
     | 
    
         
            +
             
     | 
| 
      
 315 
     | 
    
         
            +
              return {
         
     | 
| 
      
 316 
     | 
    
         
            +
                text: text,
         
     | 
| 
      
 317 
     | 
    
         
            +
                block_tokens: bi[1],
         
     | 
| 
      
 318 
     | 
    
         
            +
                punct_tokens: pi[1]
         
     | 
| 
      
 319 
     | 
    
         
            +
              }
         
     | 
| 
      
 320 
     | 
    
         
            +
            }
         
     | 
| 
      
 321 
     | 
    
         
            +
             
     | 
| 
      
 322 
     | 
    
         
            +
            Glaemscribe.TTS.prototype.pre_wav = function(args, voice, text) {
         
     | 
| 
      
 323 
     | 
    
         
            +
              var client = this;
         
     | 
| 
      
 324 
     | 
    
         
            +
              var config = Glaemscribe.TTS.ipa_configurations[voice];
         
     | 
| 
      
 325 
     | 
    
         
            +
             
     | 
| 
      
 326 
     | 
    
         
            +
              if(!config)
         
     | 
| 
      
 327 
     | 
    
         
            +
                throw "Trying to use unsupported voice '" + voice + "'!";
         
     | 
| 
      
 328 
     | 
    
         
            +
             
     | 
| 
      
 329 
     | 
    
         
            +
              // First, escape the special blocks. Just ignore them.
         
     | 
| 
       283 
330 
     | 
    
         
             
              if(args.has_raw_mode) {
         
     | 
| 
       284 
     | 
    
         
            -
                var pre_raw_res    = this. 
     | 
| 
      
 331 
     | 
    
         
            +
                var pre_raw_res    = this.escape_special_blocks(voice, text, false);
         
     | 
| 
       285 
332 
     | 
    
         
             
                text               = pre_raw_res[0];
         
     | 
| 
       286 
     | 
    
         
            -
                pre_raw_tokens     = pre_raw_res[1]; 
         
     | 
| 
       287 
     | 
    
         
            -
              } 
         
     | 
| 
       288 
     | 
    
         
            -
                
         
     | 
| 
       289 
     | 
    
         
            -
              // Pre parse ipa
         
     | 
| 
       290 
     | 
    
         
            -
              var pre_ipa_tokens  = [];
         
     | 
| 
       291 
     | 
    
         
            -
              var pre_ipa_res     = client.pre_ipa(voice,text);
         
     | 
| 
       292 
     | 
    
         
            -
              text                = pre_ipa_res[0];
         
     | 
| 
       293 
     | 
    
         
            -
              pre_ipa_tokens      = pre_ipa_res[1];
         
     | 
| 
       294 
     | 
    
         
            -
             
     | 
| 
       295 
     | 
    
         
            -
              // Restitute raw things
         
     | 
| 
       296 
     | 
    
         
            -
              if(args.has_raw_mode) {
         
     | 
| 
       297 
     | 
    
         
            -
                var nth = -1;
         
     | 
| 
       298 
     | 
    
         
            -
                text = text.replace(/∰∰/g,function(match, contents, offset, s) {
         
     | 
| 
       299 
     | 
    
         
            -
                  nth += 1;
         
     | 
| 
       300 
     | 
    
         
            -
                  return pre_raw_tokens[nth];
         
     | 
| 
       301 
     | 
    
         
            -
                });
         
     | 
| 
       302 
333 
     | 
    
         
             
              }
         
     | 
| 
       303 
     | 
    
         
            -
             
     | 
| 
       304 
     | 
    
         
            -
               
     | 
| 
       305 
     | 
    
         
            -
               
     | 
| 
      
 334 
     | 
    
         
            +
             
     | 
| 
      
 335 
     | 
    
         
            +
              // Now simplify the punctuation to avoid problems.
         
     | 
| 
      
 336 
     | 
    
         
            +
              text = this.wav_instrument_punct(voice, text);
         
     | 
| 
      
 337 
     | 
    
         
            +
             
     | 
| 
      
 338 
     | 
    
         
            +
              return {
         
     | 
| 
      
 339 
     | 
    
         
            +
                text: text
         
     | 
| 
      
 340 
     | 
    
         
            +
              }
         
     | 
| 
      
 341 
     | 
    
         
            +
            }
         
     | 
| 
      
 342 
     | 
    
         
            +
             
     | 
| 
      
 343 
     | 
    
         
            +
            //////////////////
         
     | 
| 
      
 344 
     | 
    
         
            +
            //  SYNTHESIZE  //
         
     | 
| 
      
 345 
     | 
    
         
            +
            //////////////////
         
     | 
| 
      
 346 
     | 
    
         
            +
             
     | 
| 
      
 347 
     | 
    
         
            +
             
     | 
| 
      
 348 
     | 
    
         
            +
            Glaemscribe.TTS.prototype.synthesize_ipa = function(text, args, onended) {
         
     | 
| 
      
 349 
     | 
    
         
            +
             
     | 
| 
      
 350 
     | 
    
         
            +
              var client      = this;
         
     | 
| 
      
 351 
     | 
    
         
            +
              args            = args || {};
         
     | 
| 
      
 352 
     | 
    
         
            +
              var voice       = args.voice  || 'en-tengwar'
         
     | 
| 
      
 353 
     | 
    
         
            +
             
     | 
| 
      
 354 
     | 
    
         
            +
              // Pre parse text and find raw mode things {{ ... }}
         
     | 
| 
      
 355 
     | 
    
         
            +
              // Cache them. This will also the pre-instrumentation
         
     | 
| 
      
 356 
     | 
    
         
            +
              // To treat each block as one word
         
     | 
| 
      
 357 
     | 
    
         
            +
              var pipa = client.pre_ipa(args, voice, text);
         
     | 
| 
      
 358 
     | 
    
         
            +
              text     = pipa['text'];
         
     | 
| 
      
 359 
     | 
    
         
            +
             
     | 
| 
      
 360 
     | 
    
         
            +
              // Now the IPA is instrumented.
         
     | 
| 
      
 361 
     | 
    
         
            +
              // Prepare client
         
     | 
| 
      
 362 
     | 
    
         
            +
              client.proxy.set_voice(voice);
         
     | 
| 
       306 
363 
     | 
    
         | 
| 
       307 
364 
     | 
    
         
             
              var ts = new Date();
         
     | 
| 
       308 
365 
     | 
    
         
             
              var ret = {};
         
     | 
| 
       309 
366 
     | 
    
         
             
              client.proxy.synthesize(text, false, true, true, function(result) {
         
     | 
| 
       310 
     | 
    
         
            -
                   
         
     | 
| 
       311 
367 
     | 
    
         
             
                // Post parse ipa
         
     | 
| 
       312 
     | 
    
         
            -
                result.ipa            = client.post_ipa(voice, result.pho,  
     | 
| 
      
 368 
     | 
    
         
            +
                result.ipa            = client.post_ipa(voice, result.pho, pipa);
         
     | 
| 
       313 
369 
     | 
    
         | 
| 
       314 
370 
     | 
    
         
             
                var te = new Date();
         
     | 
| 
       315 
371 
     | 
    
         
             
                result.synthesis_time = (te - ts);
         
     | 
| 
       316 
372 
     | 
    
         
             
                delete result.pho;
         
     | 
| 
       317 
     | 
    
         
            -
             
     | 
| 
      
 373 
     | 
    
         
            +
             
     | 
| 
       318 
374 
     | 
    
         
             
                if(onended)
         
     | 
| 
       319 
375 
     | 
    
         
             
                  onended(result);
         
     | 
| 
       320 
376 
     | 
    
         | 
| 
       321 
377 
     | 
    
         
             
                ret = result;
         
     | 
| 
       322 
378 
     | 
    
         
             
              });
         
     | 
| 
      
 379 
     | 
    
         
            +
             
     | 
| 
       323 
380 
     | 
    
         
             
              return ret;
         
     | 
| 
       324 
381 
     | 
    
         
             
            }
         
     | 
| 
       325 
382 
     | 
    
         | 
| 
       326 
383 
     | 
    
         
             
            // Should be kept separated from IPA, because we do not work on the same text
         
     | 
| 
       327 
384 
     | 
    
         
             
            Glaemscribe.TTS.prototype.synthesize_wav = function(text, args, onended) {
         
     | 
| 
       328 
     | 
    
         
            -
             
     | 
| 
       329 
     | 
    
         
            -
              var client 
     | 
| 
       330 
     | 
    
         
            -
              
         
     | 
| 
      
 385 
     | 
    
         
            +
             
     | 
| 
      
 386 
     | 
    
         
            +
              var client      = this;
         
     | 
| 
       331 
387 
     | 
    
         
             
              args            = args || {}
         
     | 
| 
       332 
     | 
    
         
            -
              var voice       = args.voice  || 'en'
         
     | 
| 
      
 388 
     | 
    
         
            +
              var voice       = args.voice  || 'en-tengwar'
         
     | 
| 
      
 389 
     | 
    
         
            +
             
     | 
| 
      
 390 
     | 
    
         
            +
              // Pre-trandform text
         
     | 
| 
      
 391 
     | 
    
         
            +
              var pwav = client.pre_wav(args, voice, text);
         
     | 
| 
      
 392 
     | 
    
         
            +
              text = pwav['text'];
         
     | 
| 
       333 
393 
     | 
    
         | 
| 
       334 
     | 
    
         
            -
               
     | 
| 
      
 394 
     | 
    
         
            +
              // Prepare client
         
     | 
| 
       335 
395 
     | 
    
         
             
              client.proxy.set_rate(args.rate    || 120);
         
     | 
| 
       336 
396 
     | 
    
         
             
              client.proxy.set_pitch(args.pitch  || 5);
         
     | 
| 
       337 
     | 
    
         
            -
              client.proxy.set_voice( 
     | 
| 
       338 
     | 
    
         
            -
              
         
     | 
| 
       339 
     | 
    
         
            -
              if(args.has_raw_mode) {
         
     | 
| 
       340 
     | 
    
         
            -
                var pre_raw_res    = this.escape_raw_mode(text,true);
         
     | 
| 
       341 
     | 
    
         
            -
                text               = pre_raw_res[0];
         
     | 
| 
       342 
     | 
    
         
            -
              } 
         
     | 
| 
      
 397 
     | 
    
         
            +
              client.proxy.set_voice(voice);
         
     | 
| 
       343 
398 
     | 
    
         | 
| 
       344 
     | 
    
         
            -
              var ret = {};
         
     | 
| 
       345 
399 
     | 
    
         
             
              var ts = new Date();
         
     | 
| 
      
 400 
     | 
    
         
            +
              var ret = {};
         
     | 
| 
       346 
401 
     | 
    
         
             
              client.proxy.synthesize(text, true, false, false, function(result) {
         
     | 
| 
       347 
402 
     | 
    
         
             
                var te = new Date();
         
     | 
| 
       348 
403 
     | 
    
         
             
                result.synthesis_time = (te - ts);
         
     | 
| 
       349 
404 
     | 
    
         
             
                delete result.pho;
         
     | 
| 
       350 
     | 
    
         
            -
             
     | 
| 
      
 405 
     | 
    
         
            +
             
     | 
| 
       351 
406 
     | 
    
         
             
                // Uint8Array > Array conversion, for ruby?
         
     | 
| 
       352 
     | 
    
         
            -
                // ret.wav = [].slice.call(ret.wav); 
     | 
| 
       353 
     | 
    
         
            -
             
     | 
| 
      
 407 
     | 
    
         
            +
                // ret.wav = [].slice.call(ret.wav);
         
     | 
| 
      
 408 
     | 
    
         
            +
             
     | 
| 
       354 
409 
     | 
    
         
             
                if(onended)
         
     | 
| 
       355 
410 
     | 
    
         
             
                  onended(result);
         
     | 
| 
       356 
     | 
    
         
            -
             
     | 
| 
      
 411 
     | 
    
         
            +
             
     | 
| 
       357 
412 
     | 
    
         
             
                ret = result;
         
     | 
| 
       358 
413 
     | 
    
         
             
              });
         
     | 
| 
       359 
     | 
    
         
            -
             
     | 
| 
      
 414 
     | 
    
         
            +
             
     | 
| 
       360 
415 
     | 
    
         
             
              return ret;
         
     | 
| 
       361 
416 
     | 
    
         
             
            }
         
     | 
| 
       362 
417 
     | 
    
         | 
| 
      
 418 
     | 
    
         
            +
             
     | 
| 
      
 419 
     | 
    
         
            +
            // Below is an expirement of a parsing tool for orthographic modes.
         
     | 
| 
      
 420 
     | 
    
         
            +
            // Not finished and probably not usable.
         
     | 
| 
      
 421 
     | 
    
         
            +
            Glaemscribe.TTS.TokenType = {};
         
     | 
| 
      
 422 
     | 
    
         
            +
            Glaemscribe.TTS.TokenType.WORD      = 'WORD';
         
     | 
| 
      
 423 
     | 
    
         
            +
            Glaemscribe.TTS.TokenType.NON_WORD  = 'NON_WORD';
         
     | 
| 
      
 424 
     | 
    
         
            +
            Glaemscribe.TTS.TokenType.NUM       = 'NUM';
         
     | 
| 
      
 425 
     | 
    
         
            +
            Glaemscribe.TTS.TokenType.SPACE     = 'SPACE';
         
     | 
| 
      
 426 
     | 
    
         
            +
            Glaemscribe.TTS.TokenType.PUNCT     = 'PUNCT';
         
     | 
| 
      
 427 
     | 
    
         
            +
             
     | 
| 
      
 428 
     | 
    
         
            +
            Glaemscribe.TTS.prototype.orthographic_disambiguator_en = function(text) {
         
     | 
| 
      
 429 
     | 
    
         
            +
             
     | 
| 
      
 430 
     | 
    
         
            +
              var client = this;
         
     | 
| 
      
 431 
     | 
    
         
            +
             
     | 
| 
      
 432 
     | 
    
         
            +
              var uwmatcher = /(\p{L}+)/u;
         
     | 
| 
      
 433 
     | 
    
         
            +
              var spl       = text.split(uwmatcher);
         
     | 
| 
      
 434 
     | 
    
         
            +
             
     | 
| 
      
 435 
     | 
    
         
            +
              var tokens = spl.map(function(s) {
         
     | 
| 
      
 436 
     | 
    
         
            +
                var t       = {};
         
     | 
| 
      
 437 
     | 
    
         
            +
                var is_word = s.match(uwmatcher)
         
     | 
| 
      
 438 
     | 
    
         
            +
             
     | 
| 
      
 439 
     | 
    
         
            +
                t.type    = (is_word)?(Glaemscribe.TTS.TokenType.WORD):(Glaemscribe.TTS.TokenType.NON_WORD);
         
     | 
| 
      
 440 
     | 
    
         
            +
                t.content = s;
         
     | 
| 
      
 441 
     | 
    
         
            +
                return t;
         
     | 
| 
      
 442 
     | 
    
         
            +
              });
         
     | 
| 
      
 443 
     | 
    
         
            +
             
     | 
| 
      
 444 
     | 
    
         
            +
              var tokens2 = [];
         
     | 
| 
      
 445 
     | 
    
         
            +
             
     | 
| 
      
 446 
     | 
    
         
            +
              // Handle apostrophe
         
     | 
| 
      
 447 
     | 
    
         
            +
              for(var i=0;i<tokens.length;i++) {
         
     | 
| 
      
 448 
     | 
    
         
            +
                if( i == 0 || i == tokens.length-1 || tokens[i].type == Glaemscribe.TTS.TokenType.WORD ) {
         
     | 
| 
      
 449 
     | 
    
         
            +
                  tokens2.push(tokens[i]);
         
     | 
| 
      
 450 
     | 
    
         
            +
                  continue;
         
     | 
| 
      
 451 
     | 
    
         
            +
                }
         
     | 
| 
      
 452 
     | 
    
         
            +
             
     | 
| 
      
 453 
     | 
    
         
            +
                if(tokens[i].content == "'" &&
         
     | 
| 
      
 454 
     | 
    
         
            +
                  tokens[i-1].type == Glaemscribe.TTS.TokenType.WORD &&
         
     | 
| 
      
 455 
     | 
    
         
            +
                  tokens[i+1].type == Glaemscribe.TTS.TokenType.WORD )
         
     | 
| 
      
 456 
     | 
    
         
            +
                {
         
     | 
| 
      
 457 
     | 
    
         
            +
                  tokens2.pop();
         
     | 
| 
      
 458 
     | 
    
         
            +
                  var tok     = {};
         
     | 
| 
      
 459 
     | 
    
         
            +
                  tok.type    = Glaemscribe.TTS.TokenType.WORD;
         
     | 
| 
      
 460 
     | 
    
         
            +
                  tok.content = tokens[i-1].content + tokens[i].content + tokens[i+1].content;
         
     | 
| 
      
 461 
     | 
    
         
            +
                  tokens2.push(tok);
         
     | 
| 
      
 462 
     | 
    
         
            +
                  i += 1;
         
     | 
| 
      
 463 
     | 
    
         
            +
                }
         
     | 
| 
      
 464 
     | 
    
         
            +
                else {
         
     | 
| 
      
 465 
     | 
    
         
            +
                  tokens2.push(tokens[i]);
         
     | 
| 
      
 466 
     | 
    
         
            +
                }
         
     | 
| 
      
 467 
     | 
    
         
            +
              }
         
     | 
| 
      
 468 
     | 
    
         
            +
              tokens = tokens2;
         
     | 
| 
      
 469 
     | 
    
         
            +
             
     | 
| 
      
 470 
     | 
    
         
            +
              // Numerize tokens
         
     | 
| 
      
 471 
     | 
    
         
            +
              var i = 0;
         
     | 
| 
      
 472 
     | 
    
         
            +
              tokens.forEach(function(t) {
         
     | 
| 
      
 473 
     | 
    
         
            +
                t.num = i;
         
     | 
| 
      
 474 
     | 
    
         
            +
                i += 1;
         
     | 
| 
      
 475 
     | 
    
         
            +
              });
         
     | 
| 
      
 476 
     | 
    
         
            +
             
     | 
| 
      
 477 
     | 
    
         
            +
              // Remove non-speechable tokens
         
     | 
| 
      
 478 
     | 
    
         
            +
              var stokens = tokens.filter(function(t) {
         
     | 
| 
      
 479 
     | 
    
         
            +
                return (t.type == Glaemscribe.TTS.TokenType.WORD);
         
     | 
| 
      
 480 
     | 
    
         
            +
              });
         
     | 
| 
      
 481 
     | 
    
         
            +
             
     | 
| 
      
 482 
     | 
    
         
            +
              // Join speachable tokens
         
     | 
| 
      
 483 
     | 
    
         
            +
              var r = stokens.map(function(t) { return t.content}).join('  ');
         
     | 
| 
      
 484 
     | 
    
         
            +
             
     | 
| 
      
 485 
     | 
    
         
            +
              var args  = {};
         
     | 
| 
      
 486 
     | 
    
         
            +
              var voice = args.voice  || 'en-tengwar';
         
     | 
| 
      
 487 
     | 
    
         
            +
             
     | 
| 
      
 488 
     | 
    
         
            +
              client.proxy.set_voice(voice);
         
     | 
| 
      
 489 
     | 
    
         
            +
              client.proxy.synthesize(r, false, true, true, function(result) {
         
     | 
| 
      
 490 
     | 
    
         
            +
                r = result.pho;
         
     | 
| 
      
 491 
     | 
    
         
            +
              });
         
     | 
| 
      
 492 
     | 
    
         
            +
              r = r.split('').map(function(t) { return t.trim() });
         
     | 
| 
      
 493 
     | 
    
         
            +
             
     | 
| 
      
 494 
     | 
    
         
            +
              var j = 0;
         
     | 
| 
      
 495 
     | 
    
         
            +
              r.forEach(function(w) {
         
     | 
| 
      
 496 
     | 
    
         
            +
                tokens[stokens[j].num].ipa = r[j];
         
     | 
| 
      
 497 
     | 
    
         
            +
                j += 1;
         
     | 
| 
      
 498 
     | 
    
         
            +
              });
         
     | 
| 
      
 499 
     | 
    
         
            +
             
     | 
| 
      
 500 
     | 
    
         
            +
              return tokens;
         
     | 
| 
      
 501 
     | 
    
         
            +
            }
         
     | 
| 
      
 502 
     | 
    
         
            +
             
     | 
| 
       363 
503 
     | 
    
         
             
            Glaemscribe.TTS.is_engine_loaded = function() {
         
     | 
| 
       364 
504 
     | 
    
         
             
              return (typeof(ESpeakNGGlue) !== 'undefined');
         
     | 
| 
       365 
505 
     | 
    
         
             
            };
         
     |