RubyGems - hierogloss - Versions diffs - 0.0.1 → 0.0.2 - Mend

hierogloss 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

checksums.yaml +4 -4
data/Gemfile +4 -0
data/README.md +7 -3
data/examples/disjunction.md +1 -1
data/hierogloss.gemspec +1 -0
data/lib/hierogloss/dictionary.rb +36 -18
data/lib/hierogloss/gloss.rb +12 -4
data/lib/hierogloss/mdc.rb +144 -0
data/lib/hierogloss/metrics/data.rb +1084 -0
data/lib/hierogloss/metrics.rb +47 -0
data/lib/hierogloss/version.rb +1 -1
data/lib/hierogloss.rb +2 -0
data/src/dump_metrics.rb +45 -0
data/test/test_dictionary.rb +2 -9
data/test/test_gloss.rb +4 -2
data/test/test_mdc.rb +55 -0
data/test/test_metrics.rb +29 -0
metadata +24 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 95e96759e2925f55b6fbb05fe67628680de08e7a
-  data.tar.gz: 7ead6a34ffe4281f10ec7cebaa94a14bca713009
+  metadata.gz: e0ddc0d217ce2338569073c4d5da2259a595b62b
+  data.tar.gz: a7f2c0564b348abcc6c864796ad69873896079a4
 SHA512:
-  metadata.gz: 2f6d2686c6ba86ce7cc215652c8f6534fdaa3543a52adb6dee4eecdd32d3fb6b9d446d4ebd408e0bb7aad5faa61043059af9ecfbdc573068ad93cae0a22f74d4
-  data.tar.gz: 5fbf2d371331598c34df3b5786ef9ba954f5f769d43d54fed97e1dc6be8125a8b20ad4605a498ebcdd8fc5c363254820eb0c7851a97c85a75fc698ae27571844
+  metadata.gz: 740be2d088f038a3a2d5ab40fc898799890ccba066047d293124532a62043372c6eebe28f75f9f23c7ab3d238b498a1c40e3e018ba9cbe27ad3e82a734ef77af
+  data.tar.gz: 0faa6a5486c3b58c9ac54e6c65028534ffe42a9472dbd782452887d525c439dbda57f009bf450d33e445798209d79c689bcd712712fe714062490522983b324c

data/Gemfile CHANGED Viewed

@@ -2,3 +2,7 @@ source 'https://rubygems.org'
 # Specify your gem's dependencies in hierogloss.gemspec
 gemspec
+# Temporary until Prawn supports ttfunk 1.1.  It's in the current master
+# branch; it's just not released yet.  Enable this to run dump_metrics.rb.
+gem "ttfunk", git: "https://github.com/prawnpdf/ttfunk.git", ref: "56be4cbb7c72"

data/README.md CHANGED Viewed

@@ -2,15 +2,17 @@
 **WORK IN PROGRESS. Future releases may change how things work.**
-Hierogloss allows you to mix glossed hieroglyphic texts with Markdown-style
-formatting.  For example, you can write:
+Hierogloss is a set of extensions for the [Kramdown][] gem for people
+working with hieroglyphs.  Hierogloss allows you to mix glossed
+hieroglyphic texts with Markdown-style formatting.  For example, you can
+write:
     # Disjunction in Middle Egyptian
     This example is based on one in Allen's excellent [Middle Egyptian: An
     Introduction to the Language and Culture of Hieroglyphs][allen].
-    H: 𓊃𓀀𓏤 | 𓊃𓏏𓁐 | 𓂋𓏤𓊪𓅱
+    H: z:A1*Z1 | 𓊃:𓏏*𓁐 | 𓂋:𓏤-𓊪:𓅱
     L: s | s.t | r-pw
     G: man | woman | whichever
     T: either [a] man or [a] woman
@@ -70,3 +72,5 @@ but pass `input: 'hierogloss'` as an argument:
 3. Commit your changes (`git commit -am 'Add some feature'`)
 4. Push to the branch (`git push origin my-new-feature`)
 5. Create new Pull Request
+[kramdown]: http://kramdown.gettalong.org/

data/examples/disjunction.md CHANGED Viewed

@@ -3,7 +3,7 @@
 This example is based on one in Allen's excellent [Middle Egyptian: An
 Introduction to the Language and Culture of Hieroglyphs][allen].
-H: 𓊃𓀀𓏤 | 𓊃𓏏𓁐 | 𓂋𓏤𓊪𓅱
+H: z:A1*Z1 | 𓊃:𓏏*𓁐 | 𓂋:𓏤-𓊪:𓅱
 L: s | s.t | r-pw
 G: man | woman | whichever
 T: either [a] man or [a] woman

data/hierogloss.gemspec CHANGED Viewed

@@ -19,6 +19,7 @@ Gem::Specification.new do |spec|
   spec.require_paths = ["lib"]
   spec.add_dependency "kramdown", "~> 1.3"
+  spec.add_dependency "parslet", "~> 1.4"
   spec.add_development_dependency "prawn", "~> 0.14.0"
   spec.add_development_dependency "bundler", "~> 1.3"
   spec.add_development_dependency "rake"

data/lib/hierogloss/dictionary.rb CHANGED Viewed

@@ -6,34 +6,52 @@ module Hierogloss
     DATA_DIR = File.join(File.dirname(__FILE__), '..', '..', 'data')
     MDC_MAPPING_PATH = File.join(DATA_DIR, "Unicode-MdC-Mapping-v1.utf8")
-    GARDINER = {}
+    SIGN_TO_GARDINER = {}
+    MDC_TO_SIGN = {}
+    SIGN_TO_MDC = {}
     File.open(MDC_MAPPING_PATH, "r:bom|utf-8") do |f|
       f.each_line do |l|
         l.chomp!
         sign, hex, codes, remarks = l.split(/\t/, 4)
         for code in codes.split(/ /)
-          next unless code =~ /\A[A-Z][0-9]+\z/
-          GARDINER[sign] = code
+          MDC_TO_SIGN[code] = sign
+          # Unliterals.
+          SIGN_TO_MDC[sign] = code if code.length == 1
+          # Gardiner codes, and composite signs starting with Gardiner codes.
+          next unless code =~ /\A[A-Z][0-9]+([-:*].*)?\z/
+          SIGN_TO_GARDINER[sign] = code
+          SIGN_TO_MDC[sign] ||= code
         end
       end
     end
-    "𓄿𓇋𓏭𓂝𓅱𓏲𓃀𓊪𓆑𓅓𓈖𓂋𓉔𓎛𓐍𓄡𓊃𓋴𓈙𓈎𓎡𓎼𓏏𓍿𓂧𓆓".each_char do |c|
-      GARDINER.delete(c)
-    end
-    # Try to kick things into shape for hierogl.ch.
-    def self.headword(word)
-      hw = word
-      hw.gsub!(/[()]/, '')
-      hw.sub!(/=.*\z/, '')
-      hw.sub!(/\.w?t\z/, 't')
-      hw.sub!(/\..*\z/, '')
-      hw
-    end
+    class << self
+      # Try to kick things into shape for hierogl.ch.
+      def headword(word)
+        hw = word
+        hw.gsub!(/[()]/, '')
+        hw.sub!(/=.*\z/, '')
+        hw.sub!(/\.w?t\z/, 't')
+        hw.sub!(/\..*\z/, '')
+        hw
+      end
-    # Given a Unicode hieroglyph, get the corresponding Gardiner sign.
-    def self.gardiner(sign)
-      GARDINER[sign]
+      # Given a Unicode hieroglyph, get the corresponding Gardiner sign.
+      def sign_to_gardiner(sign)
+        SIGN_TO_GARDINER[sign]
+      end
+      # Convert a Manuel de Codage transliteration to the corresponding Unicode
+      # sign.
+      def mdc_to_sign(mdc)
+        MDC_TO_SIGN[mdc]
+      end
+      # Convert a Unicode hieroglyph to a reasonable MdC representation.
+      def sign_to_mdc(sign)
+        SIGN_TO_MDC[sign]
+      end
     end
   end
 end

data/lib/hierogloss/gloss.rb CHANGED Viewed

@@ -7,6 +7,7 @@ module Hierogloss
   #:nodoc:
   class Row
     attr_reader :raw_cells
+    alias :cells :raw_cells
     def initialize(row_text)
       @raw_cells = row_text.split(/\|/).map {|c| c.strip }
@@ -29,7 +30,7 @@ module Hierogloss
     def to_kramdown
       attrs = attributes
       tr = Kramdown::Element.new(:tr, nil, attrs)
-      raw_cells.each do |c|
+      cells.each do |c|
         td = Kramdown::Element.new(:td)
         children = cell_to_kramdown(c)
         if children.kind_of?(Array)
@@ -59,14 +60,21 @@ module Hierogloss
   #:nodoc:
   class HieroglyphRow < Row
+    UNLINKED = {}
+    "𓄿𓇋𓏭𓂝𓅱𓏲𓃀𓊪𓆑𓅓𓈖𓂋𓉔𓎛𓐍𓄡𓊃𓋴𓈙𓈎𓎡𓎼𓏏𓍿𓂧𓆓".each_char {|c| UNLINKED[c] = true }
     def class_attr
       'hgls-h'
     end
+    def cells
+      @cells ||= raw_cells.map {|c| Hierogloss::MdC.parse(c) }
+    end
     def cell_to_kramdown(cell)
-      cell.chars.map do |c|
-        gardiner = Dictionary.gardiner(c)
-        if !gardiner.nil?
+      cell.to_linear_hieroglyphs.chars.map do |c|
+        gardiner = Dictionary.sign_to_gardiner(c)
+        unless gardiner.nil? || UNLINKED[c]
           search_link("Signe:#{gardiner}", c)
         else
           Kramdown::Element.new(:text, c)

data/lib/hierogloss/mdc.rb ADDED Viewed

@@ -0,0 +1,144 @@
+require 'parslet'
+module Hierogloss
+  #:nodoc: Our parser for the Manuel de Codage format.
+  module MdC
+    class Block
+    end
+    class Sign < Block
+      attr_reader :name
+      def initialize(name)
+        @name = name
+      end
+      def to_unicode
+        Hierogloss::Dictionary.mdc_to_sign(name) || name
+      end
+      def to_debug
+        name
+      end
+      def to_linear_hieroglyphs
+        to_unicode
+      end
+      def to_mdc(precedence)
+        mdc = Hierogloss::Dictionary.sign_to_mdc(name) || name
+        # Wrap composite signs in parens.
+        return "(#{mdc})" if mdc =~ /[-:*]/
+        mdc
+      end
+    end
+    class Group < Block
+      attr_reader :blocks
+      def initialize(blocks)
+        @blocks = blocks
+      end
+      def to_debug
+        blocks.map {|b| b.to_debug }
+      end
+      def to_linear_hieroglyphs
+        blocks.map {|b| b.to_linear_hieroglyphs }
+      end
+      protected
+      # This whole precedence business may need more test cases further work.
+      def maybe_parens(current, context, str)
+        if current < context
+          "(#{str})"
+        else
+          str
+        end
+      end
+    end
+    class Sequence < Group
+      def to_mdc(precedence)
+        maybe_parens(2, precedence, blocks.map {|b| b.to_mdc(2) }.join("*"))
+      end
+    end
+    class Stack < Group
+      def to_debug
+        [:stack].concat(super)
+      end
+      def to_mdc(precedence)
+        maybe_parens(1, precedence, blocks.map {|b| b.to_mdc(1) }.join(":"))
+      end
+    end
+    class Quadrats < Group
+      # Actually render to a string here.
+      def to_linear_hieroglyphs
+        super.flatten.join
+      end
+      def to_mdc
+        blocks.map {|b| b.to_mdc(0) }.join("-")
+      end
+    end
+    class Parser < Parslet::Parser
+      # Whitespace and equivalent delimiters.
+      rule(:space) { match('[-_ ]').repeat(1) }
+      rule(:space?) { space.maybe }
+      # Signs.
+      rule(:alpha_sign) { match('[A-Za-z0-9]').repeat(1) }
+      rule(:unicode_sign) { match('[\u{13000}-\u{1342F}]') }
+      rule(:sign) { (alpha_sign | unicode_sign).as(:sign) >> space? }
+      # Parenthesized blocks.
+      rule(:parens) { str('(') >> space? >> sequence >> str(')') >> space? }
+      # "Terminal" chunks in our expression grammar, which will match
+      # an actual, concrete symbol in the first position.
+      rule(:atomic) { sign | parens }
+      # A list of items with separators between them.
+      def separated(item, separator)
+        (item.as(:head) >> (separator >> item).repeat.as(:rest))
+      end
+      # Nested lists of signs separated by "*".
+      rule(:juxtaposed) { separated(atomic, str('*')).as(:juxtaposed) }
+      # Stacks of signs separated by ":".
+      rule(:stack) { separated(juxtaposed, str(':')).as(:stack) }
+      rule(:sequence) { stack.repeat }
+      root(:sequence)
+    end
+    class Transform < Parslet::Transform
+      # If we only have one item, we don't need to build an extra wrapper
+      # class; we can just pass it up.
+      def self.lists_as(klass, list)
+        if list.length == 1
+          list.first
+        else
+          klass.new(list)
+        end
+      end
+      rule(head: subtree(:head), rest: sequence(:rest)) { [head].concat(rest) }
+      rule(sign: simple(:sign)) { Sign.new(sign.to_s) }
+      rule(stack: subtree(:list)) {|d| lists_as(Stack, d[:list]) }
+      rule(juxtaposed: subtree(:list)) {|d| lists_as(Sequence, d[:list]) }
+    end
+    def self.parse(input)
+      parsed = Parser.new.parse(input)
+      Quadrats.new(Transform.new.apply(parsed))
+    end
+  end
+end