RubyGems - gullah - Versions diffs - 0.0.0 - Mend

gullah 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

checksums.yaml +7 -0
data/.gitignore +1 -0
data/LICENSE +21 -0
data/README.md +87 -0
data/Rakefile +11 -0
data/TODO.md +2 -0
data/examples/hat.rb +27 -0
data/examples/trash.rb +42 -0
data/examples/xml.rb +45 -0
data/gullah.gemspec +31 -0
data/lib/gullah/atom.rb +132 -0
data/lib/gullah/boundary.rb +11 -0
data/lib/gullah/dotifier.rb +127 -0
data/lib/gullah/error.rb +7 -0
data/lib/gullah/hopper.rb +142 -0
data/lib/gullah/iterator.rb +67 -0
data/lib/gullah/leaf.rb +24 -0
data/lib/gullah/node.rb +553 -0
data/lib/gullah/parse.rb +233 -0
data/lib/gullah/picker.rb +56 -0
data/lib/gullah/rule.rb +90 -0
data/lib/gullah/segment.rb +92 -0
data/lib/gullah/trash.rb +15 -0
data/lib/gullah/version.rb +7 -0
data/lib/gullah.rb +777 -0
data/test/basic_test.rb +451 -0
data/test/big_tree_test.rb +26 -0
data/test/boundary_test.rb +29 -0
data/test/date_test.rb +111 -0
data/test/error_test.rb +245 -0
data/test/json_test.rb +124 -0
data/test/parse_demo_test.rb +33 -0
data/test/precondition_test.rb +68 -0
data/test/tests_per_subrule_test.rb +49 -0
data/test/tree_walking_test.rb +88 -0
metadata +157 -0

data/test/boundary_test.rb ADDED Viewed

@@ -0,0 +1,29 @@
+# frozen_string_literal: true
+require 'minitest/autorun'
+require 'gullah'
+require 'byebug'
+require 'date'
+# :stopdoc:
+# a test to make sure boundary rules work
+class BoundaryTest < Minitest::Test
+  class Bounded
+    extend Gullah
+    rule :S, 'word+'
+    leaf :word, /\w+/
+    boundary :term, /[.!?](?=\s*\z|\s+"?\p{Lu})|[:;]/
+  end
+  def test_example
+    parses = Bounded.parse 'One sentence. Another sentence.'
+    assert_equal 1, parses.length, 'Got one parse.'
+    parse = parses.first
+    assert_equal 5, parse.length, 'One node per sentence plus one per boundary plus one space.'
+    assert_equal 2, parse.nodes.count(&:boundary?), 'There are two boundary nodes.'
+  end
+end

data/test/date_test.rb ADDED Viewed

@@ -0,0 +1,111 @@
+# frozen_string_literal: true
+require 'minitest/autorun'
+require 'gullah'
+require 'byebug'
+require 'date'
+# :stopdoc:
+class DateTest < Minitest::Test
+  class DateGrammar
+    extend Gullah
+    rule :iso, 'year "/" month "/" day', tests: %i[sane]
+    rule :american, 'month "/" day "/" year', tests: %i[sane]
+    rule :euro, 'day "/" month "/" year', tests: %i[sane]
+    leaf :day, /\b\d{1,2}\b/, tests: %i[day], process: :to_i
+    leaf :month, /\b\d{1,2}\b/, tests: %i[month], process: :to_i
+    # to confirm that we can pass a proc as a processor
+    leaf :year, /\b\d+\b/, process: ->(n) { n.atts[:value] = n.text.to_i }
+    def to_i(n)
+      n.atts[:value] = n.text.to_i
+    end
+    def month(root, n)
+      if root == n.parent
+        month = n.atts[:value]
+        if month < 1
+          [:fail, 'month must be greater than 0']
+        elsif month > 12
+          [:fail, 'month cannot be greater than 12']
+        else
+          :pass
+        end
+      end
+    end
+    def day(root, n)
+      if root == n.parent
+        day = n.atts[:value]
+        if day < 1
+          [:fail, 'day must be greater than 0']
+        elsif day > 31
+          [:fail, 'day cannot be greater than 31']
+        else
+          :pass
+        end
+      end
+    end
+    def sane(n)
+      day = n.descendants.find { |o| o.name == :day }
+      month = n.descendants.find { |o| o.name == :month }
+      year = n.descendants.find { |o| o.name == :year }
+      if day && month && year
+        begin
+          Date.new year.atts[:value], month.atts[:value], day.atts[:value]
+        rescue ArgumentError
+          return [
+            :fail,
+            "month #{month.text} does not have a day #{day.text} in #{year.text}"
+          ]
+        end
+        :pass
+      else
+        [:fail, "we don't have all parts of a date"]
+      end
+    end
+  end
+  def test_iso
+    parses = DateGrammar.parse '2010/5/6'
+    assert_equal 1, parses.length, 'one parse'
+    parse = parses.first
+    assert_equal 1, parse.roots.length, 'one root node'
+    root = parse.roots.first
+    assert_equal :iso, root.name, 'got an iso date'
+  end
+  def test_american
+    parses = DateGrammar.parse '10/31/2021'
+    assert_equal 1, parses.length, 'one parse'
+    parse = parses.first
+    assert_equal 1, parse.roots.length, 'one root node'
+    root = parse.roots.first
+    assert_equal :american, root.name, 'got an American date'
+  end
+  def test_euro
+    parses = DateGrammar.parse '31/10/2021'
+    assert_equal 1, parses.length, 'one parse'
+    parse = parses.first
+    assert_equal 1, parse.roots.length, 'one root node'
+    root = parse.roots.first
+    assert_equal :euro, root.name, 'got a euro date'
+  end
+  def test_ambiguous
+    parses = DateGrammar.parse '5/6/1969'
+    assert_equal 2, parses.length, 'two parses'
+    options = %i[euro american]
+    parses.each do |p|
+      assert_equal 1, p.roots.length
+      options -= [p.roots.first.name]
+    end
+    assert_equal [], options, 'one is american and one euro'
+  end
+end

data/test/error_test.rb ADDED Viewed

@@ -0,0 +1,245 @@
+# frozen_string_literal: true
+require 'minitest/autorun'
+require 'gullah'
+require 'byebug'
+# :stopdoc:
+# tests that all the errors that should be raised are raised
+class ErrorTest < Minitest::Test
+  class NoLeaf
+    extend Gullah
+  end
+  def test_no_leaves
+    e = assert_raises Gullah::Error, 'leaves required' do
+      NoLeaf.parse 'foo'
+    end
+    assert_match(/no leaves/, e.message, 'expected no-leaf message')
+  end
+  class UndefinedRules1
+    extend Gullah
+    rule :foo, 'bar'
+  end
+  def test_undefined_rules_1
+    e = assert_raises Gullah::Error, 'some rules undefined' do
+      UndefinedRules1.parse 'bar'
+    end
+    assert_match(/no leaves/, e.message, 'remain undefined')
+  end
+  class UndefinedRules2
+    extend Gullah
+    rule :foo, 'bar baz'
+    leaf :bar, /bar/
+  end
+  def test_undefined_rules_2
+    e = assert_raises Gullah::Error, 'some rules undefined' do
+      UndefinedRules2.parse 'bar'
+    end
+    assert_match(/remain undefined/, e.message, 'remain undefined')
+  end
+  class AddAfterParse1
+    extend Gullah
+    rule :foo, 'bar baz'
+    leaf :bar, /bar/
+    leaf :baz, /baz/
+  end
+  def test_add_after_parse_1
+    e = assert_raises Gullah::Error, 'definition after parse' do
+      AddAfterParse1.parse 'bar baz'
+      AddAfterParse1.rule :plugh, 'plugh'
+    end
+    assert_match(/must be defined before parsing/, e.message, 'cannot define rule after parsing')
+  end
+  class AddAfterParse2
+    extend Gullah
+    rule :foo, 'bar baz'
+    leaf :bar, /bar/
+    leaf :baz, /baz/
+  end
+  def test_add_after_parse_2
+    e = assert_raises Gullah::Error, 'definition after parse' do
+      AddAfterParse2.parse 'bar baz'
+      AddAfterParse2.leaf :plugh, /plugh/
+    end
+    assert_match(/must be defined before parsing/, e.message, 'cannot define leaf after parsing')
+  end
+  class UndefinedTest
+    extend Gullah
+    rule :foo, 'bar baz', tests: %i[undefined]
+    leaf :bar, /bar/
+    leaf :baz, /baz/
+  end
+  def test_undefined_test
+    e = assert_raises Gullah::Error, 'undefined test' do
+      UndefinedTest.parse 'bar baz'
+    end
+    assert_match(/is not defined/, e.message, 'must define tests')
+  end
+  class UndefinedProcessor
+    extend Gullah
+    rule :foo, 'bar baz', process: :undefined
+    leaf :bar, /bar/
+    leaf :baz, /baz/
+  end
+  def test_undefined_processor
+    e = assert_raises Gullah::Error, 'undefined processor' do
+      UndefinedProcessor.parse 'bar baz'
+    end
+    assert_match(/is not defined/, e.message, 'must define processors')
+  end
+  class UndefinedPrecondition
+    extend Gullah
+    rule :foo, 'bar baz', preconditions: [:undefined]
+    leaf :bar, /bar/
+    leaf :baz, /baz/
+  end
+  def test_undefined_precondition
+    e = assert_raises Gullah::Error, 'undefined precondition' do
+      UndefinedPrecondition.parse 'bar baz'
+    end
+    assert_match(/is not defined/, e.message, 'must define preconditions')
+  end
+  class BadTest
+    extend Gullah
+    rule :foo, 'bar baz', tests: %i[bad]
+    leaf :bar, /bar/
+    leaf :baz, /baz/
+    def bad
+      puts 'I have no arguments at all!'
+    end
+  end
+  def test_zero_arity
+    e = assert_raises Gullah::Error, 'arity 0' do
+      BadTest.parse 'bar baz'
+    end
+    assert_match(/must take either one or two arguments/, e.message, 'needs one arg')
+  end
+  class AlsoBadTest
+    extend Gullah
+    rule :foo, 'bar baz', tests: %i[bad]
+    leaf :bar, /bar/
+    leaf :baz, /baz/
+    def bad(_root, _node, _other, _things)
+      puts 'I have too many arguments!'
+    end
+  end
+  def test_excessive_arity
+    e = assert_raises Gullah::Error, 'arity many' do
+      AlsoBadTest.parse 'bar baz'
+    end
+    assert_match(/must take either one or two arguments/, e.message, 'no more than 2 args')
+  end
+  class MisnamedRule
+    extend Gullah
+    leaf :'bar@', /bar/
+    leaf :baz, /baz/
+  end
+  def test_misnamed_rule
+    e = assert_raises Gullah::Error, 'rule name' do
+      MisnamedRule.rule :foo, 'bar@ baz'
+    end
+    assert_match(/cannot parse/, e.message, 'bad rule name')
+  end
+  class BadSuffix
+    extend Gullah
+    leaf :bar, /bar/
+    leaf :baz, /baz/
+  end
+  def test_bad_suffix_rule
+    e = assert_raises Gullah::Error, 'rule suffix' do
+      MisnamedRule.rule :foo, 'bar{2,1} baz'
+    end
+    assert_match(/is greater than/, e.message, 'bad suffix')
+  end
+  class Decent
+    extend Gullah
+    rule :foo, 'bar baz'
+    leaf :bar, /bar/
+    leaf :baz, /baz/
+  end
+  def test_filters
+    e = assert_raises Gullah::Error, 'unknown filter' do
+      Decent.parse 'bar baz', filters: %i[foo]
+    end
+    assert_match(/unknown filter/, e.message)
+  end
+  class BadTestReturnValue
+    extend Gullah
+    rule :foo, 'bar baz', tests: %i[foo]
+    leaf :bar, /bar/
+    leaf :baz, /baz/
+    def foo(_n)
+      :foo
+    end
+  end
+  def test_test_return_value
+    e = assert_raises Gullah::Error, 'bad test return value' do
+      BadTestReturnValue.parse 'bar baz'
+    end
+    assert_match(/unexpected value/, e.message)
+  end
+  class BadAncestorTestReturnValue
+    extend Gullah
+    rule :foo, 'bar baz'
+    leaf :bar, /bar/, tests: %i[foo]
+    leaf :baz, /baz/
+    def foo(_root, _n)
+      :foo
+    end
+  end
+  def test_ancestor_test_return_value
+    e = assert_raises Gullah::Error, 'bad ancestor test return value' do
+      BadAncestorTestReturnValue.parse 'bar baz'
+    end
+    assert_match(/unexpected value/, e.message)
+  end
+end

data/test/json_test.rb ADDED Viewed

@@ -0,0 +1,124 @@
+# frozen_string_literal: true
+require 'minitest/autorun'
+require 'gullah'
+require 'byebug'
+require 'json'
+# :stopdoc:
+# a proof of concept JSON parser
+class JsonTest < Minitest::Test
+  class Gson
+    extend Gullah
+    # NOTE: these rules have processors to simplify testing
+    # this is *not* the most efficient way to deserialize the JSON string
+    # better would be to convert the AST after parsing
+    rule :object, '"{" key_value_pair* last_pair | empty_object', process: :objectify
+    rule :last_pair, 'key ":" json following_brace', process: :inherit_json_value
+    rule :key_value_pair, 'key ":" json ","', process: :inherit_json_value
+    rule :array, '"[" array_item* json? "]"', process: :arrayify
+    rule :json, 'complex | simple',           process: :inherit_value
+    rule :complex, 'array | object',          process: :inherit_value
+    rule :array_item, 'json ","',             process: :inherit_value
+    rule :simple, 'string | null | integer | si | float | boolean', process: :inherit_value
+    leaf :boolean, /\b(true|false)\b/, process: ->(n) { n.atts[:value] = n.text == 'true' }
+    leaf :string, /'(?:[^'\\]|\\.)*'(?!\s*:)/, process: :clean_string
+    leaf :string, /"(?:[^"\\]|\\.)*"(?!\s*:)/, process: :clean_string
+    leaf :null, /\bnull\b/,                process: ->(n) { n.atts[:value] = nil }
+    leaf :si, /\b\d\.\d+e[1-9]\d*\b/,      process: ->(n) { n.atts[:value] = n.text.to_f }
+    leaf :float, /\b\d+\.\d+\b/,           process: ->(n) { n.atts[:value] = n.text.to_f }
+    leaf :integer, /\b[1-9]\d*\b(?!\.\d)/, process: ->(n) { n.atts[:value] = n.text.to_i }
+    # terrible, horrible, no good, very bad hacks to reduce backtracking
+    leaf :following_brace, /}/
+    leaf :empty_object, /\{\s*\}/
+    leaf :key, /'(?:[^'\\]|\\.)*'(?=\s*:)/, process: :clean_string
+    leaf :key, /"(?:[^"\\]|\\.)*"(?=\s*:)/, process: :clean_string
+    def inherit_json_value(node)
+      node.atts[:value] = node.children.find { |n| n.name == :json }.atts[:value]
+    end
+    def inherit_value(node)
+      node.atts[:value] = node.children.first.atts[:value]
+    end
+    def clean_string(node)
+      text = node.text
+      node.atts[:value] = text[1...(text.length - 1)].gsub(/\\(.)/, '\1')
+    end
+    def arrayify(node)
+      node.atts[:value] = node.children.reject(&:leaf?).map do |n|
+        n.subtree.find { |c| c.name == :json }.atts[:value]
+      end
+    end
+    def objectify(node)
+      node.atts[:value] = if node.children.first.name == :empty_object
+                            {}
+                          else
+                            node.children.reject(&:leaf?).map do |pair|
+                              key, _, value = pair.children
+                              [key.atts[:value], value.atts[:value]]
+                            end.to_h
+                          end
+    end
+  end
+  def test_various
+    [
+      [],
+      {},
+      1,
+      1.1,
+      1.2345678901e10,
+      'string',
+      '"string"',
+      [1],
+      { 'a' => 1 },
+      { 'a' => 1, 'b' => 2 },
+      ['2', { 'a' => false }],
+      [1, nil, '2', { 'a' => false }]
+    ].each do |val|
+      json = JSON.unparse(val)
+      parses = clock(json) { Gson.parse json }
+      assert_equal 1, parses.length, "unambiguous: #{json}"
+      parse = parses.first
+      root = parse.roots.first
+      assert_equal val, root.atts[:value], "parsed value correctly: #{json}"
+    end
+  end
+  # more complex patterns
+  def test_monsters
+    [
+      [[[[1, 2, 3]]]],
+      { 'foo' => { 'foo' => { 'foo' => { 'foo' => { 'foo' => { 'foo' => { 'foo' => { 'foo' => { 'foo' => 1 } } } } } } } } },
+      { 'foo' => [1, 2, true], 'bar' => ['baz'], 'baz' => { 'v1' => nil, 'v2' => [], 'v3' => 'corge' } },
+      [{ 'foo bar' => 1, 'baz' => ['a string with a lot of spaces in it'] }]
+    ].each do |val|
+      json = JSON.unparse(val)
+      parse = clock(json) { Gson.first json }
+      root = parse.roots.first
+      assert_equal val, root.atts[:value], "parsed value correctly: #{json}"
+    end
+  end
+  private
+  # for catching really slow stuff
+  def clock(id)
+    t1 = Time.now
+    value = yield
+    t2 = Time.now
+    delta = t2.to_f - t1.to_f
+    puts "\n#{id}: #{delta} seconds" if delta > 1
+    value
+  end
+end