RubyGems - glaemscribe - Versions diffs - 1.1.14 → 1.3.0 - Mend

glaemscribe 1.1.14 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

checksums.yaml +5 -5
data/bin/glaemscribe +21 -17
data/glaemresources/charsets/cirth_ds.cst +540 -0
data/glaemresources/charsets/eldamar.cst +210 -0
data/glaemresources/charsets/sarati_eldamar.cst +256 -0
data/glaemresources/charsets/tengwar_ds_annatar.cst +2868 -0
data/glaemresources/charsets/tengwar_ds_eldamar.cst +2729 -0
data/glaemresources/charsets/tengwar_ds_elfica.cst +2742 -0
data/glaemresources/charsets/tengwar_ds_parmaite.cst +2726 -0
data/glaemresources/charsets/tengwar_ds_sindarin.cst +2722 -0
data/glaemresources/charsets/tengwar_freemono.cst +217 -0
data/glaemresources/charsets/tengwar_guni_annatar.cst +2948 -0
data/glaemresources/charsets/tengwar_guni_eldamar.cst +2809 -0
data/glaemresources/charsets/tengwar_guni_elfica.cst +2809 -0
data/glaemresources/charsets/tengwar_guni_parmaite.cst +2813 -0
data/glaemresources/charsets/tengwar_guni_sindarin.cst +2808 -0
data/glaemresources/charsets/tengwar_telcontar.cst +225 -0
data/glaemresources/charsets/unicode_gothic.cst +64 -0
data/glaemresources/charsets/unicode_runes.cst +121 -0
data/glaemresources/modes/{adunaic.glaem → adunaic-tengwar-glaemscrafu.glaem} +14 -2
data/glaemresources/modes/{blackspeech.glaem → blackspeech-tengwar-general_use.glaem} +13 -3
data/glaemresources/modes/english-cirth-espeak.glaem +687 -0
data/glaemresources/modes/english-tengwar-espeak.glaem +814 -0
data/glaemresources/modes/japanese-tengwar.glaem +776 -0
data/glaemresources/modes/{khuzdul.glaem → khuzdul-cirth-moria.glaem} +4 -1
data/glaemresources/modes/lang_belta-tengwar-dadef.glaem +248 -0
data/glaemresources/modes/{futhorc.glaem → old_english-futhorc.glaem} +0 -0
data/glaemresources/modes/{mercian.glaem → old_english-tengwar-mercian.glaem} +22 -12
data/glaemresources/modes/{westsaxon.glaem → old_english-tengwar-westsaxon.glaem} +20 -11
data/glaemresources/modes/{futhark-runicus.glaem → old_norse-futhark-runicus.glaem} +0 -0
data/glaemresources/modes/{futhark-younger.glaem → old_norse-futhark-younger.glaem} +0 -0
data/glaemresources/modes/{quenya.glaem → quenya-tengwar-classical.glaem} +32 -50
data/glaemresources/modes/raw-cirth.glaem +154 -0
data/glaemresources/modes/raw-tengwar.glaem +46 -23
data/glaemresources/modes/{rlyehian.glaem → rlyehian-tengwar.glaem} +14 -3
data/glaemresources/modes/{sindarin-daeron.glaem → sindarin-cirth-daeron.glaem} +55 -14
data/glaemresources/modes/{sindarin-beleriand.glaem → sindarin-tengwar-beleriand.glaem} +154 -28
data/glaemresources/modes/{sindarin.glaem → sindarin-tengwar-general_use.glaem} +86 -25
data/glaemresources/modes/{telerin.glaem → telerin-tengwar-glaemscrafu.glaem} +16 -6
data/glaemresources/modes/{westron.glaem → westron-tengwar-glaemscrafu.glaem} +18 -8
data/lib/api/charset.rb +67 -7
data/lib/api/charset_parser.rb +14 -1
data/lib/api/constants.rb +3 -4
data/lib/api/fragment.rb +26 -5
data/lib/api/if_tree.rb +70 -8
data/lib/api/macro.rb +40 -0
data/lib/api/mode.rb +66 -19
data/lib/api/mode_parser.rb +117 -14
data/lib/api/object_additions.rb +23 -1
data/lib/api/option.rb +17 -2
data/lib/api/post_processor/outspace.rb +44 -0
data/lib/api/post_processor/resolve_virtuals.rb +25 -9
data/lib/api/resource_manager.rb +1 -0
data/lib/api/rule_group.rb +170 -26
data/lib/api/sheaf_chain_iterator.rb +1 -1
data/lib/api/transcription_pre_post_processor.rb +8 -5
data/lib/api/transcription_processor.rb +15 -12
data/lib/api/tts.rb +51 -0
data/lib/glaemscribe.rb +36 -31
data/lib_espeak/espeakng.for.glaemscribe.nowasm.sync.js +35 -0
data/lib_espeak/glaemscribe_tts.js +505 -0
metadata +76 -24

data/lib/api/charset.rb CHANGED Viewed

@@ -30,11 +30,11 @@ module Glaemscribe
       attr_reader   :virtual_chars
       class Char
-        attr_accessor :line
-        attr_accessor :code
-        attr_accessor :names
-        attr_accessor :str
-        attr_accessor :charset
+        attr_accessor :line     # Line num in the sourcecode
+        attr_accessor :code     # Position in unicode
+        attr_accessor :names    # Names
+        attr_accessor :str      # How does this char resolve as a string
+        attr_accessor :charset  # Pointer to parent charset
         def initialize
           @names = {}
@@ -43,9 +43,13 @@ module Glaemscribe
         def virtual?
           false
         end
+        def sequence?
+          false
+        end
       end
-      class VirtualChar
+      class VirtualChar # Could have had inheritance here ...
         attr_accessor :line
         attr_accessor :names
         attr_accessor :classes
@@ -121,6 +125,45 @@ module Glaemscribe
         def virtual?
           true
         end
+        def sequence?
+          false
+        end
+      end
+      class SequenceChar
+        attr_accessor :line     # Line of code
+        attr_accessor :names    # Names
+        attr_accessor :sequence # The sequence of chars
+        attr_accessor :charset  # Pointer to parent charset
+        def virtual?
+          false
+        end
+        def sequence?
+          true
+        end
+        def str
+          # A sequence char should never arrive unreplaced
+          VIRTUAL_CHAR_OUTPUT
+        end
+        def finalize
+          if @sequence.count == 0
+            @charset.errors << Glaeml::Error.new(@line, "Sequence for sequence char is empty.")
+          end
+          @sequence.each{ |symbol|
+            # Check that the sequence is correct
+            found = @charset[symbol]
+            if !found
+              @charset.errors << Glaeml::Error.new(@line, "Sequence char #{symbol} cannot be found in the charset.")
+            end
+          }
+        end
       end
       def initialize(name)
@@ -156,10 +199,21 @@ module Glaemscribe
         @chars << c
       end
+      def add_sequence_char(line, names, seq)
+        return if names.empty? || names.include?("?") # Ignore characters with '?'
+        c             = SequenceChar.new
+        c.line        = line
+        c.names       = names
+        c.sequence    = seq.split.reject{|token| token.empty? }
+        c.charset     = self
+        @chars << c
+      end
       def finalize
         @errors         = []
         @lookup_table   = {}
-        @virtual_chars  = []
+        @virtual_chars  = [] # A convenient filtered array
         @chars.each { |c|
           c.names.each { |cname|
@@ -179,6 +233,12 @@ module Glaemscribe
           end
         }
+        @chars.each{|c|
+          if c.class == SequenceChar
+            c.finalize
+          end
+        }
         API::Debug::log("Finalized charset '#{@name}', #{@lookup_table.count} symbols loaded.")
       end

data/lib/api/charset_parser.rb CHANGED Viewed

@@ -47,6 +47,13 @@ module Glaemscribe
           names  = char_element.args[1..-1].map{|cname| cname.strip }.reject{ |cname| cname.empty? }
           @charset.add_char(char_element.line,code,names)
         }
+        doc.root_node.gpath("seq").each{ |seq_elemnt|
+          names       = seq_elemnt.args
+          child_node  = seq_elemnt.children.first
+          seq         = (child_node && child_node.text?)?(child_node.args.first):("")
+          @charset.add_sequence_char(seq_elemnt.line,names,seq)
+        }
         doc.root_node.gpath("virtual").each { |virtual_element|
           names     = virtual_element.args
@@ -57,7 +64,13 @@ module Glaemscribe
           virtual_element.gpath("class").each { |class_element|
             vc =  Charset::VirtualChar::VirtualClass.new
             vc.target    = class_element.args[0]
-            vc.triggers  = class_element.args[1..-1].map{|cname| cname.strip }.reject{ |cname| cname.empty? }
+            vc.triggers  = class_element.args[1..-1].map{|cname| cname.strip }.reject{ |cname| cname.empty? }
+            # Allow triggers to be defined inside the body of the class element
+            text_lines      = class_element.children.select { |c| c.text? }.map{ |c| c.args.first}
+            inner_triggers  = text_lines.join(" ").split(/\s/).select{ |e| e != '' }
+            vc.triggers    += inner_triggers
             classes << vc
           }
           virtual_element.gpath("reversed").each { |reversed_element|

data/lib/api/constants.rb CHANGED Viewed

@@ -23,11 +23,10 @@
 module Glaemscribe
   module API
     WORD_BREAKER        = "|"
-    WORD_BOUNDARY       = "_"
-    SPECIAL_CHAR_UNDERSCORE = '➊'
-    SPECIAL_CHAR_NBSP       = '➋'
+    WORD_BOUNDARY_LANG  = "_"
+    WORD_BOUNDARY_TREE  = "\u0000"
     UNKNOWN_CHAR_OUTPUT = "☠"
     VIRTUAL_CHAR_OUTPUT = "☢" # When transcribing a virtual char...
   end

data/lib/api/fragment.rb CHANGED Viewed

@@ -41,7 +41,7 @@ module Glaemscribe
       EQUIVALENCE_RX_OUT    = /(\(.*?\))/
       EQUIVALENCE_RX_IN     = /\((.*?)\)/
-      # Should pass a fragment expression, e.g. : "h(a|ä)(i|ï)"
+      # Should pass a fragment expression, e.g. : "h(a,ä)(i,ï)"
       def initialize(sheaf, expression)
         @sheaf      = sheaf
         @mode       = sheaf.mode
@@ -49,16 +49,16 @@ module Glaemscribe
         @expression = expression
         # Split the fragment, turn it into an array of arrays, e.g. [[h],[a,ä],[i,ï]]
-        equivalences = expression.split(EQUIVALENCE_RX_OUT).map{ |eq| eq.strip }
+        equivalences = expression.split(EQUIVALENCE_RX_OUT).map{ |eq| eq.strip }.reject{ |eq| eq == '' }
         equivalences = equivalences.map{ |eq|
           eq =~ EQUIVALENCE_RX_IN
           if $1
             eq = $1.split(EQUIVALENCE_SEPARATOR,-1).map{ |elt|
               elt = elt.strip
-              elt.split(/\s/)
-            }
+              elt.split(/\s/).map{ |leaf| finalize_fragment_leaf(leaf) }
+            }
           else
-            eq = [eq.split(/\s/)] # This equivalence has only one possibility
+            eq = [eq.split(/\s/).map{ |leaf| finalize_fragment_leaf(leaf) }] # This equivalence has only one possibility
           end
         }
@@ -87,6 +87,7 @@ module Glaemscribe
         # Calculate all combinations for this fragment (productize the array of arrays)
         res = equivalences[0]
+        # ((eq0 x eq1) x eq2) x eq3 ) ... )))))
         (equivalences.length-1).times { |i|
           prod  = res.product(equivalences[i+1]).map{ |x,y| x+y}
           res   = prod
@@ -95,6 +96,26 @@ module Glaemscribe
         @combinations = res
       end
+      def finalize_fragment_leaf(leaf)
+        if src?
+          # Replace {UNI_XXXX} by its value to allow any unicode char to be found in the transcription tree
+          leaf = leaf.gsub(RuleGroup::UNICODE_VAR_NAME_REGEXP_OUT) { |cap_var|
+            unival = $1
+            new_char = [unival.hex].pack("U")
+            new_char = "\u0001" if new_char == '_'
+            new_char
+          }
+          # Replace '_' (word boundary) by '\u0000' to allow
+          # the real underscore to be used in the transcription tree
+          # (Do it after replacing the uni_xxx vars because they have underscores inside)
+          leaf = leaf.gsub(WORD_BOUNDARY_LANG, WORD_BOUNDARY_TREE)
+          leaf = leaf.gsub("\u0001","_")
+        end
+        leaf
+      end
       def p
         ret = "---- " + @expression + "\n"

data/lib/api/if_tree.rb CHANGED Viewed

@@ -24,14 +24,36 @@ module Glaemscribe
   module API
     module IfTree
+      # A branching if condition
       class IfCond
         attr_accessor :line, :expression, :parent_if_term, :child_code_block
         def initialize(line, parent_if_term, expression)
           @parent_if_term     = parent_if_term
           @expression         = expression
         end
+        def offset
+          parent_if_term.offset + " "
+        end
+        def prefix
+          offset + "|-"
+        end
+        def inspect
+          "#{prefix} IF #{expression}\n" +
+          "#{child_code_block.inspect}"
+        end
+      end
+      # A line of code
+      class CodeLine
+        attr_accessor :expression, :line
+        def initialize(expression, line)
+          @expression   = expression
+          @line         = line
+        end
       end
+      # A node (code lines / preprocessor operators / ... )
+      # A node may have children or not depending on their nature
       class Term
         attr_accessor :parent_code_block
         def initialize(parent_code_block)
@@ -43,24 +65,30 @@ module Glaemscribe
         def is_pre_post_processor_operators?
           false
         end
+        def is_macro_deploy?
+          false
+        end
+        def offset
+          parent_code_block.offset + " "
+        end
+        def prefix
+          offset + "|- "
+        end
       end
+      # A ifterm may have multiple ifconds (if,elsif,elsif,...,else)
       class IfTerm < Term
         attr_accessor :if_conds
         def initialize(parent_code_block)
           super(parent_code_block)
           @if_conds = []
         end
-      end
-      class CodeLine
-        attr_accessor :expression, :line
-        def initialize(expression, line)
-          @expression   = expression
-          @line         = line
+        def inspect
+          "#{prefix} CONDITIONAL BLOCK\n" +
+            @if_conds.map{ |c| c.inspect }.join("\n")
         end
       end
       class PrePostProcessorOperatorsTerm < Term
         attr_accessor :operators
         def initialize(parent_code_block)
@@ -70,6 +98,9 @@ module Glaemscribe
         def is_pre_post_processor_operators?
           true
         end
+        def inspect
+          "#{prefix} OPERATORS (#{@operators.count})"
+        end
       end
       class CodeLinesTerm < Term
@@ -81,6 +112,25 @@ module Glaemscribe
         def is_code_lines?
           true
         end
+        def inspect
+          "#{prefix} CODE LINES (#{@code_lines.count})"
+        end
+      end
+      class MacroDeployTerm < Term
+        attr_accessor :macro, :line, :arg_value_expressions
+        def initialize(macro, line, parent_code_block, arg_value_expressions)
+          super(parent_code_block)
+          @line                   = line
+          @macro                  = macro
+          @arg_value_expressions  = arg_value_expressions
+        end
+        def is_macro_deploy?
+          true
+        end
+        def inspect
+          "#{prefix} MACRO DEPLOY (#{macro.name})"
+        end
       end
       class CodeBlock
@@ -89,6 +139,18 @@ module Glaemscribe
           @parent_if_cond = parent_if_cond
           @terms          = []
         end
+        def offset
+          ((parent_if_cond)?(parent_if_cond.offset):("")) + " "
+        end
+        def prefix
+          offset + "|- "
+        end
+        def inspect
+          ret = ""
+          ret += "|-ROOT\n" if !parent_if_cond
+          ret += "#{prefix} Code block\n" +
+          @terms.map{|t| t.inspect}.join("\n")
+        end
       end
     end

data/lib/api/macro.rb ADDED Viewed

@@ -0,0 +1,40 @@
+# encoding: UTF-8
+#
+# Glǽmscribe (also written Glaemscribe) is a software dedicated to
+# the transcription of texts between writing systems, and more
+# specifically dedicated to the transcription of J.R.R. Tolkien's
+# invented languages to some of his devised writing systems.
+#
+# Copyright (C) 2015 Benjamin Babut (Talagan).
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+module Glaemscribe
+  module API
+    class Macro
+      attr_reader :name, :rule_group, :mode, :arg_names
+      attr_reader :root_code_block
+      def initialize(rule_group,name,arg_names)
+        @rule_group       = rule_group
+        @mode             = rule_group.mode
+        @name             = name
+        @arg_names        = arg_names
+        @root_code_block  = IfTree::CodeBlock.new
+      end
+    end
+  end
+end

data/lib/api/mode.rb CHANGED Viewed

@@ -22,6 +22,23 @@
 module Glaemscribe
   module API
+    class ModeDebugContext
+      attr_accessor :preprocessor_output,
+        :processor_pathes,
+        :processor_output,
+        :postprocessor_output,
+        :tts_output
+      def initialize
+        @preprocessor_output  = ""
+        @processor_pathes     = []
+        @processor_output     = []
+        @postprocessor_output = ""
+        @tts_output = ""
+      end
+    end
     class Mode
       attr_accessor :errors
@@ -41,6 +58,9 @@ module Glaemscribe
       attr_accessor :world, :invention
+      attr_accessor :has_tts
+      attr_reader   :current_tts_voice
       attr_reader   :latest_option_values
       def initialize(name)
@@ -50,6 +70,8 @@ module Glaemscribe
         @supported_charsets = {}
         @options            = {}
         @last_raw_options   = nil
+        @has_tts            = false
+        @current_tts_voice  = nil
         @pre_processor    = TranscriptionPreProcessor.new(self)
         @processor        = TranscriptionProcessor.new(self)
@@ -95,7 +117,7 @@ module Glaemscribe
         trans_options_converted = {}
-        # Do a conversion to values space
+        # Do a conversion from names to values space
         trans_options.each{ |oname,valname|
           trans_options_converted[oname] = @options[oname].value_for_value_name(valname)
         }
@@ -117,7 +139,13 @@ module Glaemscribe
         @processor.finalize(@latest_option_values)
         raw_mode.finalize options if raw_mode
+        # Update the current espeak voice
+        if @has_tts
+          espeak_option       = @options['espeak_voice'].value_name_for_value(@latest_option_values['espeak_voice'])
+          @current_tts_voice  = TTS.option_name_to_voice(espeak_option)
+        end
         self
       end
@@ -128,16 +156,19 @@ module Glaemscribe
         @raw_mode = loaded_raw_mode.deep_clone
       end
-      def replace_specials(l)
-        l.
-          gsub("_",SPECIAL_CHAR_UNDERSCORE).
-          gsub("\u00a0",SPECIAL_CHAR_NBSP)
-      end
-      def strict_transcribe(content, charset = nil)
+      def strict_transcribe(content, charset, debug_context)
         charset = default_charset if !charset
         return false, "*** No charset usable for transcription. Failed!" if !charset
+        if has_tts
+          begin
+            content = TTS.ipa(content, @current_tts_voice, (raw_mode != nil) )['ipa']
+            debug_context.tts_output += content
+          rescue StandardError => e
+            return false, "TTS pre-transcription failed : #{e}."
+          end
+        end
         # Parser works line by line
         ret = content.lines.map{ |l|
           restore_lf = false
@@ -145,10 +176,16 @@ module Glaemscribe
             l[-1] = ""
             restore_lf = true
           end
           l = @pre_processor.apply(l)
-          l = replace_specials(l)
-          l = @processor.apply(l)
+          debug_context.preprocessor_output += l + "\n"
+          l = @processor.apply(l, debug_context)
+          debug_context.processor_output += l
           l = @post_processor.apply(l, charset)
+          debug_context.postprocessor_output += l + "\n"
           l += "\n" if restore_lf
           l
         }.join
@@ -156,24 +193,34 @@ module Glaemscribe
       end
       def transcribe(content, charset = nil)
+        debug_context = ModeDebugContext.new
         if raw_mode
           chunks = content.split(/({{.*?}})/m)
           ret = ''
           res = true
           chunks.each{ |c|
             if c =~ /{{(.*?)}}/m
-              succ, r = raw_mode.strict_transcribe($1,charset)
-              res = res && succ
-              ret += r if succ
+              succ, r = raw_mode.strict_transcribe($1, charset, debug_context)
+              if !succ
+                return false, r, debug_context # Propagate error
+              end
+              ret += r
             else
-              succ, r = strict_transcribe(c,charset)
-              res = res && succ
-              ret += r if succ
+              succ, r = strict_transcribe(c,charset,debug_context)
+              if !succ
+                return false, r, debug_context # Propagate error
+              end
+              ret += r
             end
           }
-          return res,ret
+          return res, ret, debug_context
         else
-          strict_transcribe(content,charset)
+          succ, r = strict_transcribe(content, charset, debug_context)
+          return succ, r, debug_context
         end
       end