RubyGems - glaemscribe - Versions diffs - 1.2.0 → 1.3.1 - Mend

glaemscribe 1.2.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

checksums.yaml +4 -4
data/bin/glaemscribe +2 -2
data/glaemresources/charsets/cirth_ds.cst +514 -179
data/glaemresources/charsets/eldamar.cst +210 -0
data/glaemresources/charsets/tengwar_ds_annatar.cst +2776 -348
data/glaemresources/charsets/tengwar_ds_eldamar.cst +2648 -351
data/glaemresources/charsets/tengwar_ds_elfica.cst +2639 -346
data/glaemresources/charsets/tengwar_ds_parmaite.cst +2648 -351
data/glaemresources/charsets/tengwar_ds_sindarin.cst +2642 -348
data/glaemresources/charsets/tengwar_freemono.cst +1 -1
data/glaemresources/charsets/tengwar_guni_annatar.cst +2725 -300
data/glaemresources/charsets/tengwar_guni_eldamar.cst +2589 -295
data/glaemresources/charsets/tengwar_guni_elfica.cst +2592 -298
data/glaemresources/charsets/tengwar_guni_parmaite.cst +2592 -297
data/glaemresources/charsets/tengwar_guni_sindarin.cst +2591 -297
data/glaemresources/charsets/tengwar_telcontar.cst +7 -0
data/glaemresources/modes/blackspeech-tengwar-general_use.glaem +1 -1
data/glaemresources/modes/english-cirth-espeak.glaem +687 -0
data/glaemresources/modes/english-tengwar-espeak.glaem +814 -0
data/glaemresources/modes/japanese-tengwar.glaem +9 -4
data/glaemresources/modes/lang_belta-tengwar-dadef.glaem +248 -0
data/glaemresources/modes/raw-cirth.glaem +154 -0
data/lib/api/charset.rb +124 -57
data/lib/api/charset_parser.rb +39 -26
data/lib/api/mode.rb +35 -10
data/lib/api/mode_parser.rb +21 -12
data/lib/api/post_processor/outspace.rb +44 -0
data/lib/api/post_processor/resolve_virtuals.rb +41 -19
data/lib/api/rule_group.rb +1 -1
data/lib/api/transcription_pre_post_processor.rb +51 -45
data/lib/api/transcription_processor.rb +12 -9
data/lib/glaemscribe.rb +2 -0
data/lib_espeak/espeakng.for.glaemscribe.nowasm.sync.js +25 -11
data/lib_espeak/glaemscribe_tts.js +363 -223
metadata +12 -6

data/lib/api/post_processor/outspace.rb ADDED Viewed

@@ -0,0 +1,44 @@
+# encoding: UTF-8
+#
+# Glǽmscribe (also written Glaemscribe) is a software dedicated to
+# the transcription of texts between writing systems, and more
+# specifically dedicated to the transcription of J.R.R. Tolkien's
+# invented languages to some of his devised writing systems.
+#
+# Copyright (C) 2015 Benjamin Babut (Talagan).
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+# A post processor operator to replace the out_space on the fly.
+# This has the same effect as the \outspace parameter
+# But can be included in the postprocessor and benefit from the if/then logic
+module Glaemscribe
+  module API
+    class OutspacePostProcessorOperator < PostProcessorOperator
+      def initialize(mode, glaeml_element)
+        super(mode, glaeml_element)
+        @out_space = @mode.post_processor.out_space  = glaeml_element.args[0].split.reject{|token| token.empty? }
+      end
+      def apply(tokens, charset)
+        @mode.post_processor.out_space = @out_space
+        tokens
+      end
+    end
+    ResourceManager::register_post_processor_class("outspace", OutspacePostProcessorOperator)
+  end
+end

data/lib/api/post_processor/resolve_virtuals.rb CHANGED Viewed

@@ -1,22 +1,22 @@
 # encoding: UTF-8
 #
 # Glǽmscribe (also written Glaemscribe) is a software dedicated to
-# the transcription of texts between writing systems, and more
-# specifically dedicated to the transcription of J.R.R. Tolkien's
+# the transcription of texts between writing systems, and more
+# specifically dedicated to the transcription of J.R.R. Tolkien's
 # invented languages to some of his devised writing systems.
-#
+#
 # Copyright (C) 2015 Benjamin Babut (Talagan).
-#
+#
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Affero General Public License as published by
 # the Free Software Foundation, either version 3 of the License, or
 # any later version.
-#
+#
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU Affero General Public License for more details.
-#
+#
 # You should have received a copy of the GNU Affero General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
@@ -24,25 +24,25 @@ module Glaemscribe
   module API
     class ResolveVirtualsPostProcessorOperator < PostProcessorOperator
       def finalize(trans_options)
         super(trans_options)
         @last_triggers = {} # Allocate the lookup here to optimize
       end
       def reset_trigger_states(charset)
         # For each virtual char in charset, maintain a state.
         charset.virtual_chars.each{ |vc|
           @last_triggers[vc] = nil # Clear the state
         }
       end
       def apply_loop(charset, tokens, new_tokens, reversed, token, idx)
         if token == '*SPACE' || token =='*LF'
           reset_trigger_states(charset)
           return
         end
         # Check if token is a virtual char
         c = charset[token]
         return if c.nil? # May happen for empty tokens
@@ -54,14 +54,14 @@ module Glaemscribe
             token           = new_tokens[idx]           # Consider the token replaced, being itself a potential trigger for further virtuals (cascading virtuals)
           end
         end
         # Update states of virtual classes
         charset.virtual_chars.each{|vc|
           rc                  = vc[token]
-          @last_triggers[vc]  = rc if rc != nil
+          @last_triggers[vc]  = rc if rc != nil
         }
       end
       def apply_sequences(charset,tokens)
         ret = []
         tokens.each { |token|
@@ -74,21 +74,43 @@ module Glaemscribe
         }
         ret
       end
+      def apply_swaps(charset, tokens)
+        idx = 0
+        while idx < tokens.length - 1
+          tok = tokens[idx]
+          tgt = tokens[idx+1]
+          trig = charset.swap_for_trigger(tok)
+          if trig && trig.has_target?(tgt)
+            tokens[idx+1] = tok
+            tokens[idx]   = tgt
+          end
+          idx += 1
+        end
+        tokens
+      end
       def apply(tokens,charset)
         # Apply sequence chars
         tokens = apply_sequences(charset,tokens)
+        tokens = apply_swaps(charset, tokens)
         # Clone the tokens so that we can perform ligatures AND diacritics without interferences
         new_tokens = tokens.clone
         # Handle l to r virtuals (diacritics ?)
-        reset_trigger_states(charset)
+        reset_trigger_states(charset)
         tokens.each_with_index{ |token,idx|
           apply_loop(charset,tokens,new_tokens,false,token,idx)
         }
         # Handle r to l virtuals (ligatures ?)
-        reset_trigger_states(charset)
+        reset_trigger_states(charset)
         tokens.reverse_each.with_index{ |token,idx|
           apply_loop(charset,tokens,new_tokens,true,token,tokens.count - 1 - idx)
         }
@@ -96,7 +118,7 @@ module Glaemscribe
       end
     end
-    ResourceManager::register_post_processor_class("resolve_virtuals", ResolveVirtualsPostProcessorOperator)
+    ResourceManager::register_post_processor_class("resolve_virtuals", ResolveVirtualsPostProcessorOperator)
   end
 end

data/lib/api/rule_group.rb CHANGED Viewed

@@ -138,7 +138,7 @@ module Glaemscribe
                 var_value     = apply_vars(term.line, var_value_ex, true)
                 if !var_value
-                  @mode.errors << Glaeml::Error.new(term.line, "Thus, variable {#{var_name}} could not be declared.")
+                  @mode.errors << Glaeml::Error.new(term.line, "Thus, variable {#{arg_name}} could not be declared.")
                 end
               end

data/lib/api/transcription_pre_post_processor.rb CHANGED Viewed

@@ -1,45 +1,46 @@
 # encoding: UTF-8
 #
 # Glǽmscribe (also written Glaemscribe) is a software dedicated to
-# the transcription of texts between writing systems, and more
-# specifically dedicated to the transcription of J.R.R. Tolkien's
+# the transcription of texts between writing systems, and more
+# specifically dedicated to the transcription of J.R.R. Tolkien's
 # invented languages to some of his devised writing systems.
-#
+#
 # Copyright (C) 2015 Benjamin Babut (Talagan).
-#
+#
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Affero General Public License as published by
 # the Free Software Foundation, either version 3 of the License, or
 # any later version.
-#
+#
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU Affero General Public License for more details.
-#
+#
 # You should have received a copy of the GNU Affero General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 module Glaemscribe
   module API
     class PrePostProcessorOperator
       attr_reader :glaeml_element
       attr_reader :finalized_glaeml_element
-      def initialize(glaeml_element)
+      def initialize(mode, glaeml_element)
+        @mode           = mode
         @glaeml_element = glaeml_element
       end
       def eval_arg(arg, trans_options)
         return nil if arg.nil?
         if arg =~ /^\\eval\s/
           to_eval = $'
           return Eval::Parser.new().parse(to_eval, trans_options)
         end
-        return arg
+        return arg
       end
       def finalize_glaeml_element(ge, trans_options)
         ge.args.map! { |arg| eval_arg(arg, trans_options) }
         ge.children.each{ |child|
@@ -47,37 +48,37 @@ module Glaemscribe
         }
         ge
       end
       def finalize(trans_options)
         @finalized_glaeml_element = finalize_glaeml_element(@glaeml_element.clone, trans_options)
       end
       def apply
         raise "Pure virtual method, should be overloaded."
       end
     end
     class TranscriptionPrePostProcessor
       attr_reader :root_code_block
       attr_reader :operators
       def initialize(mode)
         @mode             = mode
-        @root_code_block  = IfTree::CodeBlock.new
+        @root_code_block  = IfTree::CodeBlock.new
       end
       def descend_if_tree(code_block, trans_options)
-        code_block.terms.each{ |term|
+        code_block.terms.each{ |term|
           if(term.is_pre_post_processor_operators?)
             term.operators.each{ |operator|
               @operators << operator
-            }
+            }
           else
             term.if_conds.each{ |if_cond|
               if_eval = Eval::Parser.new()
               if(if_eval.parse(if_cond.expression, trans_options) == true)
                 descend_if_tree(if_cond.child_code_block, trans_options)
                 break
@@ -86,7 +87,7 @@ module Glaemscribe
           end
         }
       end
       def finalize(trans_options)
         @operators = []
         # Select operators depending on conditions
@@ -98,42 +99,47 @@ module Glaemscribe
       end
     end
-    class PreProcessorOperator < PrePostProcessorOperator
+    class PreProcessorOperator < PrePostProcessorOperator
     end
     class PostProcessorOperator < PrePostProcessorOperator
     end
-    class TranscriptionPreProcessor < TranscriptionPrePostProcessor
+    class TranscriptionPreProcessor < TranscriptionPrePostProcessor
       # Apply all preprocessor rules consecutively
       def apply(l)
         ret = l
         @operators.each{ |operator|
           ret = operator.apply(ret)
-        }
+        }
         ret
       end
     end
     class TranscriptionPostProcessor < TranscriptionPrePostProcessor
       attr_accessor :out_space
       def apply(tokens, out_charset)
-        out_space_str     = " "
-        out_space_str     = @out_space.map{ |token| out_charset[token].str }.join("") if @out_space
+        # Cleanup the output of the chain by removing empty tokens
+        tokens.select!{ |tok| tok != "" }
         # Apply filters
         @operators.each{ |operator|
           tokens = operator.apply(tokens,out_charset)
-        }
+        }
+        out_space_str     = " "
+        out_space_str     = @out_space.map{ |token|
+          out_charset[token]&.str || UNKNOWN_CHAR_OUTPUT
+        }.join("") if @out_space
         # Convert output
         ret = ""
         tokens.each{ |token|
-          case token
+          case token
             when ""
             when "*UNKNOWN"
                ret += UNKNOWN_CHAR_OUTPUT
@@ -142,13 +148,13 @@ module Glaemscribe
             when "*LF"
                ret += "\n"
             else
-              c = out_charset[token]
+              c = out_charset[token]
               ret += (c.nil?)?(UNKNOWN_CHAR_OUTPUT):c.str
-          end
+          end
         }
         ret
-      end
-    end
+      end
+    end
   end
 end

data/lib/api/transcription_processor.rb CHANGED Viewed

@@ -73,7 +73,7 @@ module Glaemscribe
         }
       end
-      def apply(l)
+      def apply(l, debug_context)
         ret = []
         current_group     = nil
         accumulated_word  = ""
@@ -81,14 +81,14 @@ module Glaemscribe
         l.split("").each{ |c|
           case c
           when " ", "\t"
-            ret += transcribe_word(accumulated_word)
+            ret += transcribe_word(accumulated_word, debug_context)
             ret += ["*SPACE"]
             accumulated_word = ""
           when "\r"
             # Ignore
           when "\n"
-            ret += transcribe_word(accumulated_word)
+            ret += transcribe_word(accumulated_word, debug_context)
             ret += ["*LF"]
             accumulated_word = ""
@@ -97,24 +97,27 @@ module Glaemscribe
             if c_group == current_group
               accumulated_word += c
             else
-              ret += transcribe_word(accumulated_word)
+              ret += transcribe_word(accumulated_word, debug_context)
               current_group    = c_group
               accumulated_word = c
             end
           end
         }
         # Just in case
-        ret += transcribe_word(accumulated_word)
+        ret += transcribe_word(accumulated_word, debug_context)
         ret
       end
-      def transcribe_word(word)
+      def transcribe_word(word, debug_context)
         res = []
         word = WORD_BOUNDARY_TREE + word + WORD_BOUNDARY_TREE
         while word.length != 0
-          r, len = @transcription_tree.transcribe(word)
-          word = word[len..-1]
-          res += r
+          tokens, len = @transcription_tree.transcribe(word)
+          word        = word[len..-1]
+          eaten       = word[0..len-1]
+          res         += tokens
+          debug_context.processor_pathes << [eaten, tokens, tokens]
         end
         # Return token list
         res

data/lib/glaemscribe.rb CHANGED Viewed

@@ -67,6 +67,8 @@ module Glaemscribe
     require API_PATH + "pre_processor/substitute.rb"
     require API_PATH + "pre_processor/rxsubstitute.rb"
     require API_PATH + "pre_processor/up_down_tehta_split.rb"
+    require API_PATH + "post_processor/outspace.rb"
     require API_PATH + "post_processor/reverse.rb"
     require API_PATH + "post_processor/resolve_virtuals.rb"