RubyGems - glaemscribe - Versions diffs - 1.2.0 → 1.3.0 - Mend

glaemscribe 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

checksums.yaml +4 -4
data/bin/glaemscribe +2 -2
data/glaemresources/charsets/cirth_ds.cst +514 -179
data/glaemresources/charsets/eldamar.cst +210 -0
data/glaemresources/charsets/tengwar_ds_annatar.cst +2452 -130
data/glaemresources/charsets/tengwar_ds_eldamar.cst +2319 -125
data/glaemresources/charsets/tengwar_ds_elfica.cst +2317 -126
data/glaemresources/charsets/tengwar_ds_parmaite.cst +2319 -127
data/glaemresources/charsets/tengwar_ds_sindarin.cst +2318 -127
data/glaemresources/charsets/tengwar_freemono.cst +1 -1
data/glaemresources/charsets/tengwar_guni_annatar.cst +2451 -131
data/glaemresources/charsets/tengwar_guni_eldamar.cst +2317 -126
data/glaemresources/charsets/tengwar_guni_elfica.cst +2316 -127
data/glaemresources/charsets/tengwar_guni_parmaite.cst +2319 -127
data/glaemresources/charsets/tengwar_guni_sindarin.cst +2317 -126
data/glaemresources/charsets/tengwar_telcontar.cst +7 -0
data/glaemresources/modes/blackspeech-tengwar-general_use.glaem +1 -1
data/glaemresources/modes/english-cirth-espeak.glaem +687 -0
data/glaemresources/modes/english-tengwar-espeak.glaem +814 -0
data/glaemresources/modes/japanese-tengwar.glaem +9 -4
data/glaemresources/modes/lang_belta-tengwar-dadef.glaem +248 -0
data/glaemresources/modes/raw-cirth.glaem +154 -0
data/lib/api/charset_parser.rb +7 -1
data/lib/api/mode.rb +35 -10
data/lib/api/mode_parser.rb +21 -12
data/lib/api/post_processor/outspace.rb +44 -0
data/lib/api/rule_group.rb +1 -1
data/lib/api/transcription_pre_post_processor.rb +8 -5
data/lib/api/transcription_processor.rb +12 -9
data/lib/glaemscribe.rb +2 -0
data/lib_espeak/espeakng.for.glaemscribe.nowasm.sync.js +25 -11
data/lib_espeak/glaemscribe_tts.js +363 -223
metadata +12 -6

data/lib/api/transcription_pre_post_processor.rb CHANGED Viewed

@@ -27,7 +27,8 @@ module Glaemscribe
       attr_reader :glaeml_element
       attr_reader :finalized_glaeml_element
-      def initialize(glaeml_element)
+      def initialize(mode, glaeml_element)
+        @mode           = mode
         @glaeml_element = glaeml_element
       end
@@ -121,15 +122,17 @@ module Glaemscribe
       attr_accessor :out_space
       def apply(tokens, out_charset)
-        out_space_str     = " "
-        out_space_str     = @out_space.map{ |token| out_charset[token].str }.join("") if @out_space
         # Apply filters
         @operators.each{ |operator|
           tokens = operator.apply(tokens,out_charset)
         }
+        out_space_str     = " "
+        out_space_str     = @out_space.map{ |token|
+          out_charset[token]&.str || UNKNOWN_CHAR_OUTPUT
+        }.join("") if @out_space
         # Convert output
         ret = ""
         tokens.each{ |token|

data/lib/api/transcription_processor.rb CHANGED Viewed

@@ -73,7 +73,7 @@ module Glaemscribe
         }
       end
-      def apply(l)
+      def apply(l, debug_context)
         ret = []
         current_group     = nil
         accumulated_word  = ""
@@ -81,14 +81,14 @@ module Glaemscribe
         l.split("").each{ |c|
           case c
           when " ", "\t"
-            ret += transcribe_word(accumulated_word)
+            ret += transcribe_word(accumulated_word, debug_context)
             ret += ["*SPACE"]
             accumulated_word = ""
           when "\r"
             # Ignore
           when "\n"
-            ret += transcribe_word(accumulated_word)
+            ret += transcribe_word(accumulated_word, debug_context)
             ret += ["*LF"]
             accumulated_word = ""
@@ -97,24 +97,27 @@ module Glaemscribe
             if c_group == current_group
               accumulated_word += c
             else
-              ret += transcribe_word(accumulated_word)
+              ret += transcribe_word(accumulated_word, debug_context)
               current_group    = c_group
               accumulated_word = c
             end
           end
         }
         # Just in case
-        ret += transcribe_word(accumulated_word)
+        ret += transcribe_word(accumulated_word, debug_context)
         ret
       end
-      def transcribe_word(word)
+      def transcribe_word(word, debug_context)
         res = []
         word = WORD_BOUNDARY_TREE + word + WORD_BOUNDARY_TREE
         while word.length != 0
-          r, len = @transcription_tree.transcribe(word)
-          word = word[len..-1]
-          res += r
+          tokens, len = @transcription_tree.transcribe(word)
+          word        = word[len..-1]
+          eaten       = word[0..len-1]
+          res         += tokens
+          debug_context.processor_pathes << [eaten, tokens, tokens]
         end
         # Return token list
         res

data/lib/glaemscribe.rb CHANGED Viewed

@@ -67,6 +67,8 @@ module Glaemscribe
     require API_PATH + "pre_processor/substitute.rb"
     require API_PATH + "pre_processor/rxsubstitute.rb"
     require API_PATH + "pre_processor/up_down_tehta_split.rb"
+    require API_PATH + "post_processor/outspace.rb"
     require API_PATH + "post_processor/reverse.rb"
     require API_PATH + "post_processor/resolve_virtuals.rb"