RubyGems - glaemscribe - Versions diffs - 1.0.0 - Mend

glaemscribe 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

checksums.yaml +7 -0
data/LICENSE.txt +19 -0
data/bin/glaemscribe +307 -0
data/glaemresources/charsets/cirth_ds.cst +205 -0
data/glaemresources/charsets/sarati_eldamar.cst +256 -0
data/glaemresources/charsets/tengwar_ds.cst +318 -0
data/glaemresources/charsets/unicode_gothic.cst +64 -0
data/glaemresources/charsets/unicode_runes.cst +120 -0
data/glaemresources/modes/adunaic.glaem +251 -0
data/glaemresources/modes/blackspeech-annatar.glaem +318 -0
data/glaemresources/modes/blackspeech.glaem +260 -0
data/glaemresources/modes/gothic.glaem +78 -0
data/glaemresources/modes/khuzdul.glaem +141 -0
data/glaemresources/modes/mercian.glaem +419 -0
data/glaemresources/modes/oldnorse-medieval.glaem +127 -0
data/glaemresources/modes/quenya-sarati.glaem +320 -0
data/glaemresources/modes/quenya.glaem +307 -0
data/glaemresources/modes/sindarin-beleriand.glaem +285 -0
data/glaemresources/modes/sindarin-classical.glaem +276 -0
data/glaemresources/modes/sindarin-daeron.glaem +182 -0
data/glaemresources/modes/telerin.glaem +302 -0
data/glaemresources/modes/valarin-sarati.glaem +210 -0
data/glaemresources/modes/westron.glaem +340 -0
data/glaemresources/modes/westsaxon.glaem +342 -0
data/lib/api/charset.rb +84 -0
data/lib/api/charset_parser.rb +55 -0
data/lib/api/constants.rb +29 -0
data/lib/api/debug.rb +36 -0
data/lib/api/eval.rb +268 -0
data/lib/api/fragment.rb +113 -0
data/lib/api/glaeml.rb +200 -0
data/lib/api/if_tree.rb +96 -0
data/lib/api/mode.rb +112 -0
data/lib/api/mode_parser.rb +314 -0
data/lib/api/option.rb +64 -0
data/lib/api/post_processor/reverse.rb +36 -0
data/lib/api/pre_processor/downcase.rb +35 -0
data/lib/api/pre_processor/elvish_numbers.rb +47 -0
data/lib/api/pre_processor/rxsubstitute.rb +40 -0
data/lib/api/pre_processor/substitute.rb +38 -0
data/lib/api/pre_processor/up_down_tehta_split.rb +138 -0
data/lib/api/resource_manager.rb +130 -0
data/lib/api/rule.rb +99 -0
data/lib/api/rule_group.rb +159 -0
data/lib/api/sheaf.rb +70 -0
data/lib/api/sheaf_chain.rb +86 -0
data/lib/api/sheaf_chain_iterator.rb +108 -0
data/lib/api/sub_rule.rb +40 -0
data/lib/api/transcription_pre_post_processor.rb +118 -0
data/lib/api/transcription_processor.rb +137 -0
data/lib/api/transcription_tree_node.rb +91 -0
data/lib/glaemscribe.rb +70 -0
metadata +112 -0

data/lib/api/option.rb ADDED Viewed

@@ -0,0 +1,64 @@
+# encoding: UTF-8
+#
+# Glǽmscribe (also written Glaemscribe) is a software dedicated to
+# the transcription of texts between writing systems, and more
+# specifically dedicated to the transcription of J.R.R. Tolkien's
+# invented languages to some of his devised writing systems.
+#
+# Copyright (C) 2015 Benjamin Babut (Talagan).
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+module Glaemscribe
+  module API
+    class Option
+      attr_reader   :name
+      attr_reader   :type
+      attr_reader   :default_value_name
+      attr_reader   :values
+      class Type
+        ENUM = "ENUM"
+        BOOL = "BOOL"
+      end
+      def initialize(name, default_value_name, values)
+        @name               = name
+        @default_value_name = default_value_name
+        @type               = (values.keys.count == 0)?(Type::BOOL):(Type::ENUM)
+        @values             = values
+      end
+      def default_value
+        if @type == Type::BOOL
+          (@default_value_name == 'true')
+        else
+          @values[@default_value_name]
+        end
+      end
+      def value_for_value_name(val_name)
+        if @type == Type::BOOL
+          return true   if(val_name == 'true' || val_name == true)
+          return false  if(val_name == 'false' || val_name == false)
+          return nil
+        else
+          return @values[val_name]
+        end
+      end
+    end
+  end
+end

data/lib/api/post_processor/reverse.rb ADDED Viewed

@@ -0,0 +1,36 @@
+# encoding: UTF-8
+#
+# Glǽmscribe (also written Glaemscribe) is a software dedicated to
+# the transcription of texts between writing systems, and more
+# specifically dedicated to the transcription of J.R.R. Tolkien's
+# invented languages to some of his devised writing systems.
+#
+# Copyright (C) 2015 Benjamin Babut (Talagan).
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+module Glaemscribe
+  module API
+    class ReversePostProcessorOperator < PostProcessorOperator
+      def apply(l)
+        l.reverse
+      end
+    end
+    ResourceManager::register_post_processor_class("reverse", ReversePostProcessorOperator)
+  end
+end

data/lib/api/pre_processor/downcase.rb ADDED Viewed

@@ -0,0 +1,35 @@
+# encoding: UTF-8
+#
+# Glǽmscribe (also written Glaemscribe) is a software dedicated to
+# the transcription of texts between writing systems, and more
+# specifically dedicated to the transcription of J.R.R. Tolkien's
+# invented languages to some of his devised writing systems.
+#
+# Copyright (C) 2015 Benjamin Babut (Talagan).
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+module Glaemscribe
+  module API
+    class DowncasePreProcessorOperator < PreProcessorOperator
+      def apply(l)
+        UnicodeUtils.downcase(l)
+      end
+    end
+    ResourceManager::register_pre_processor_class("downcase", DowncasePreProcessorOperator)
+  end
+end

data/lib/api/pre_processor/elvish_numbers.rb ADDED Viewed

@@ -0,0 +1,47 @@
+# encoding: UTF-8
+#
+# Glǽmscribe (also written Glaemscribe) is a software dedicated to
+# the transcription of texts between writing systems, and more
+# specifically dedicated to the transcription of J.R.R. Tolkien's
+# invented languages to some of his devised writing systems.
+#
+# Copyright (C) 2015 Benjamin Babut (Talagan).
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+module Glaemscribe
+  module API
+    class ElvishNumbersPreProcessorOperator < PreProcessorOperator
+      def apply(l)
+        base    = args[0]
+        base    = (base)?(base.to_i):(12)
+        reverse = args[1]
+        reverse = (reverse != nil)?(reverse == "true" || reverse == true):(true)
+        l.gsub(/\d+/) { |f|
+          ret = f.to_i.to_s(base).upcase()
+          ret = ret.reverse if(reverse)
+          ret
+        }
+      end
+      ResourceManager::register_pre_processor_class("elvish_numbers", ElvishNumbersPreProcessorOperator)
+    end
+  end
+end

data/lib/api/pre_processor/rxsubstitute.rb ADDED Viewed

@@ -0,0 +1,40 @@
+# encoding: UTF-8
+#
+# Glǽmscribe (also written Glaemscribe) is a software dedicated to
+# the transcription of texts between writing systems, and more
+# specifically dedicated to the transcription of J.R.R. Tolkien's
+# invented languages to some of his devised writing systems.
+#
+# Copyright (C) 2015 Benjamin Babut (Talagan).
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+module Glaemscribe
+  module API
+    class RxSubstitutePreProcessorOperator < PreProcessorOperator
+      def apply(l)
+        what = /#{@args[0]}/
+        with = @args[1]
+        l.gsub(what, with)
+      end
+      ResourceManager::register_pre_processor_class("rxsubstitute", RxSubstitutePreProcessorOperator)
+    end
+  end
+end

data/lib/api/pre_processor/substitute.rb ADDED Viewed

@@ -0,0 +1,38 @@
+# encoding: UTF-8
+#
+# Glǽmscribe (also written Glaemscribe) is a software dedicated to
+# the transcription of texts between writing systems, and more
+# specifically dedicated to the transcription of J.R.R. Tolkien's
+# invented languages to some of his devised writing systems.
+#
+# Copyright (C) 2015 Benjamin Babut (Talagan).
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+module Glaemscribe
+  module API
+    class SubstitutePreProcessorOperator < PreProcessorOperator
+      def apply(l)
+        what = @args[0]
+        with = @args[1]
+        l.gsub(what, with)
+      end
+    end
+    ResourceManager::register_pre_processor_class("substitute", SubstitutePreProcessorOperator)
+  end
+end

data/lib/api/pre_processor/up_down_tehta_split.rb ADDED Viewed

@@ -0,0 +1,138 @@
+# encoding: UTF-8
+#
+# Glǽmscribe (also written Glaemscribe) is a software dedicated to
+# the transcription of texts between writing systems, and more
+# specifically dedicated to the transcription of J.R.R. Tolkien's
+# invented languages to some of his devised writing systems.
+#
+# Copyright (C) 2015 Benjamin Babut (Talagan).
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+module Glaemscribe
+  module API
+    class UpDownTehtaSplitPreProcessorOperator < PreProcessorOperator
+      attr_reader :vowel_list, :consonant_list
+      def initialize(args)
+        super(args)
+        vowel_list          = args[0]
+        consonant_list      = args[1]
+        vowel_list          = vowel_list.split(/,/).map{|s| s.strip}
+        consonant_list      = consonant_list.split(/,/).map{|s| s.strip}
+        @vowel_map          = {} # Recognize vowel tokens
+        @consonant_map      = {} # Recognize consonant tokens
+        @splitter_tree      = TranscriptionTreeNode.new(nil,nil) # Recognize tokens
+        @word_split_map     = {}
+        # The word split map will help to recognize words
+        # The splitter tree will help to split words into tokens
+        vowel_list.each      { |v| @splitter_tree.add_subpath(v, v); @vowel_map[v] = v }
+        consonant_list.each  { |c| @splitter_tree.add_subpath(c, c); @consonant_map[c] = c}
+        all_letters = (vowel_list + consonant_list).join("").split(//).sort.uniq
+        all_letters.each{ |l| @word_split_map[l] = l }
+      end
+      def type_of(token)
+        if @vowel_map[token]
+          return "V"
+        elsif @consonant_map[token]
+          return "C"
+        else
+          return "X"
+        end
+      end
+      def apply_to_word(w)
+        res = []
+        if w.strip.empty?
+          res << w
+        else
+          while w.length != 0
+            r, len = @splitter_tree.transcribe(w)
+            if r != [UNKNOWN_CHAR_OUTPUT]
+              res << r
+            else
+              res << w[0..0] # r
+            end
+            w = w[len..-1]
+          end
+        end
+        res_modified = []
+        # We replace the pattern CVC by CvVC where v is a phantom vowel.
+        # This makes the pattern CVC not possible.
+        i = 0
+        while i < res.count - 2 do
+          r0 = res[i]
+          r1 = res[i+1]
+          r2 = res[i+2]
+          t0 = type_of(r0)
+          t1 = type_of(r1)
+          t2 = type_of(r2)
+          if t0 == "C" && t1 == "V" && t2 == "C"
+            res_modified << res[i]
+            res_modified << "@"
+            res_modified << res[i+1]
+            i += 2
+          else
+            res_modified << res[i]
+            i += 1
+          end
+        end
+        # Add the remaining stuff
+        while i < res.count
+          res_modified << res[i]
+          i += 1
+        end
+        return res_modified.join("")
+      end
+      def apply(content)
+        accumulated_word = ""
+        ret = ""
+        content.split(//).each{ |letter|
+          if @word_split_map[letter]
+            accumulated_word += letter
+          else
+            ret += apply_to_word(accumulated_word)
+            ret += letter
+            accumulated_word = ""
+          end
+        }
+        ret += apply_to_word(accumulated_word)
+        ret
+      end
+    end
+    ResourceManager::register_pre_processor_class("up_down_tehta_split", UpDownTehtaSplitPreProcessorOperator)
+  end
+end

data/lib/api/resource_manager.rb ADDED Viewed

@@ -0,0 +1,130 @@
+# encoding: UTF-8
+#
+# Glǽmscribe (also written Glaemscribe) is a software dedicated to
+# the transcription of texts between writing systems, and more
+# specifically dedicated to the transcription of J.R.R. Tolkien's
+# invented languages to some of his devised writing systems.
+#
+# Copyright (C) 2015 Benjamin Babut (Talagan).
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+module Glaemscribe
+  module API
+    module ResourceManager
+      MODE_PATH     = File.dirname(__FILE__) + "/../../glaemresources/modes/"
+      MODE_EXT      = "glaem"
+      CHARSET_PATH  = File.dirname(__FILE__) + "/../../glaemresources/charsets/"
+      CHARSET_EXT   = "cst"
+      ALL           = ["*"]
+      @loaded_modes                     = {}
+      @loaded_charsets                  = {}
+      @pre_processor_operator_classes   = {}
+      @post_processor_operator_classes  = {}
+      def self.available_mode_names
+        Dir.glob(MODE_PATH + "*.#{MODE_EXT}").map { |mode_file|
+          self.mode_name_from_file_path(mode_file)
+        }
+      end
+      def self.loaded_modes
+        @loaded_modes
+      end
+      def self.loaded_charsets
+        @loaded_charsets
+      end
+      def self.register_pre_processor_class(operator_name, operator_class)
+        @pre_processor_operator_classes[operator_name] = operator_class
+      end
+      def self.register_post_processor_class(operator_name, operator_class)
+        @post_processor_operator_classes[operator_name] = operator_class
+      end
+      def self.class_for_pre_processor_operator_name(operator_name)
+        @pre_processor_operator_classes[operator_name]
+      end
+      def self.class_for_post_processor_operator_name(operator_name)
+        @post_processor_operator_classes[operator_name]
+      end
+      def self.p
+        puts @pre_processor_operator_classes.inspect
+        puts @post_processor_operator_classes.inspect
+      end
+      def self.mode_name_from_file_path(file_path)
+        File.basename(file_path,".*")
+      end
+      def self.charset_name_from_file_path(file_path)
+        File.basename(file_path,".*")
+      end
+      def self.load_modes(which_ones = ALL)
+        which_ones = [which_ones] if(which_ones.is_a?(String))
+        Dir.glob(MODE_PATH + "*.#{MODE_EXT}") { |mode_file|
+          mode_name = self.mode_name_from_file_path(mode_file)
+          next if(which_ones != ALL && !which_ones.include?(mode_name))
+          next if(@loaded_modes.include? mode_name) # Don't load a charset twice
+          API::Debug::log("*" * 20)
+          API::Debug::log("Parsing Mode : #{mode_name}")
+          API::Debug::log("*" * 20)
+          mode = API::ModeParser.new().parse(mode_file)
+          @loaded_modes[mode.name] = mode if mode
+        }
+      end
+      def self.load_charsets(which_ones = ALL)
+        which_ones = [which_ones] if(which_ones.is_a?(String))
+        Dir.glob(CHARSET_PATH + "*.#{CHARSET_EXT}") { |charset_file|
+          charset_name = self.charset_name_from_file_path(charset_file)
+          next if(which_ones != ALL && !which_ones.include?(charset_name))
+          next if(@loaded_charsets.include? charset_name) # Don't load a charset twice
+          API::Debug::log("*" * 20)
+          API::Debug::log("Parsing Charset : #{charset_name}")
+          API::Debug::log("*" * 20)
+          charset = API::CharsetParser.new().parse(charset_file)
+          @loaded_charsets[charset.name] = charset if charset
+        }
+      end
+      def self.charset(name)
+        @loaded_charsets[name]
+      end
+    end
+  end
+end