RubyGems - dendroid - Versions diffs - 0.0.10 → 0.0.11 - Mend

dendroid 0.0.10 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +12 -0
data/lib/dendroid/recognizer/chart.rb +53 -0
data/lib/dendroid/recognizer/e_item.rb +48 -0
data/lib/dendroid/recognizer/item_set.rb +37 -0
data/lib/dendroid/recognizer/recognizer.rb +282 -0
data/spec/dendroid/grm_analysis/grm_analyzer_spec.rb +1 -72
data/spec/dendroid/recognizer/chart_spec.rb +2 -0
data/spec/dendroid/recognizer/e_item_spec.rb +55 -0
data/spec/dendroid/recognizer/item_set_spec.rb +63 -0
data/spec/dendroid/recognizer/recognizer_spec.rb +186 -0
data/spec/dendroid/support/sample_grammars.rb +76 -0
data/version.txt +1 -1
metadata +11 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 270fc74811d70652e19c4ed42cd11138a1fe9fc413e9b1856b982edfa28c5d51
-  data.tar.gz: 280351b252bd5c4a63f3082375053ea7d3bf9a9d0d32acc055dc33cce91ed628
+  metadata.gz: 2564f1269225e08732a9f995b10ebbbbf4710b0a1b0aea73e7fe4b486c34a1aa
+  data.tar.gz: db15f965e9365276ffc576435d514cd6c9170a8727c7fafe1425a9de7ed3e0cd
 SHA512:
-  metadata.gz: 7a34047f56f1f488377afd88c4049b935d03d8a0a902cd44f8ffba3d58578c212c5ef7f0b1229192a7f4606b1d683d70ca479273d45d716d98154a38663f233f
-  data.tar.gz: 36578ffb40a0463a2e411000b24fa8005166c1ede8f6a856293c0122e44fdbb46d3758159042db0c9c4ccacf9c1bf071e49cfb86a64792b98fac8bb89447a85a
+  metadata.gz: 2517fd57cca364571e19ddd183d53fcd4fd642f0cf83ecc58ef0f62e5c7512c343cc3db4f31ee621dad5009386db0161e7c2f67944820dd36cb2a253a4d7af80
+  data.tar.gz: 24b77e7c0c5e97df315102c3434dddd251eacab96efaa3d194006c3874f6d260aeafa076ec5b6bd1bed296fbc675bef00e2fcf49a1e4516c20cc6b3e3b0aefdb

data/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,18 @@
 ## [Unreleased]
+## [0.0.11] - 2023-11-02
+Added Earley recognizer and its ancillary classes.
+### Added
+- Class `Chart` and its spec file
+- Class `EItem` and its spec file
+- Class `ItemSet` and its spec file
+- Class `Recognizer` and its spec file
+### Changed
+- RSpec tests: moved module `SampleGrammars` to separate file in folder `support`
 ## [0.0.10] - 2023-11-01
 Added missing class and method documentation, fixed some `Rubocop` offenses.

data/lib/dendroid/recognizer/chart.rb ADDED Viewed

@@ -0,0 +1,53 @@
+# frozen_string_literal: true
+require_relative 'item_set'
+module Dendroid
+  module Recognizer
+    # Also called a parse table.
+    # Assuming that n == number of input tokens,
+    # then the chart is an array with n + 1 entry sets.
+    class Chart
+      extend Forwardable
+      # @return [Array<Recognizer::ItemSet>] The array of item sets
+      attr_reader :item_sets
+      # @return [Boolean] Indicates whether the recognizer successfully processed the whole input
+      attr_writer :success
+      # @return [StandardError] The exception class in case of an error found by the recognizer
+      attr_accessor :failure_class
+      # @return [String] The error message
+      attr_accessor :failure_reason
+      def_delegators :@item_sets, :[], :last, :size
+      # Constructor
+      # Initialize the chart with one empty item set.
+      def initialize
+        @item_sets = []
+        @success = false
+        append_new_set
+      end
+      # Add a new empty item set at the end of the array of item sets
+      def append_new_set()
+        item_sets << ItemSet.new
+      end
+      # Add an EItem to the last item set
+      # @param e_item [EItem]
+      def seed_last_set(e_item)
+        item_sets.last.add_item(e_item)
+      end
+      # Return true if the input text is valid according to the grammar.
+      # @return [Boolean]
+      def successful?
+        @success
+      end
+    end # class
+  end # module
+end # module

data/lib/dendroid/recognizer/e_item.rb ADDED Viewed

@@ -0,0 +1,48 @@
+# frozen_string_literal: true
+require 'forwardable'
+module Dendroid
+  module Recognizer
+    # An Earley item is essentially a pair consisting of a dotted item and the rank of a token.
+    # It helps to keep track the progress of an Earley recognizer.
+    class EItem
+      extend Forwardable
+      # @return [Dendroid::GrmAnalysis::DottedItem]
+      attr_reader :dotted_item
+      # @return [Integer] the rank of the token that correspond to the start of the rule.
+      attr_reader :origin
+      def_delegators :@dotted_item, :completed?, :expecting?, :next_symbol, :pre_scan?
+      # @param aDottedItem [Dendroid::GrmAnalysis::DottedItem]
+      # @param origin [Integer]
+      def initialize(aDottedItem, origin)
+        @dotted_item = aDottedItem
+        @origin = origin
+      end
+      # @return [Dendroid::Syntax::NonTerminal] the head of the production rule
+      def lhs
+        dotted_item.rule.lhs
+      end
+      # Equality test.
+      # @return [Boolean] true iff dotted items and origins are equal
+      def ==(other)
+        return true if eql?(other)
+        di = dotted_item
+        (origin == other.origin) && (di == other.dotted_item)
+      end
+      # @return [String] the text representation of the Earley item
+      def to_s
+        "#{dotted_item} @ #{origin}"
+      end
+    end # class
+  end # module
+end # module

data/lib/dendroid/recognizer/item_set.rb ADDED Viewed

@@ -0,0 +1,37 @@
+# frozen_string_literal: true
+module Dendroid
+  module Recognizer
+    # Holds the EItem identified by the recognizer when processing at token at given rank.
+    class ItemSet
+      extend Forwardable
+      # @return [Recognizer::EItem]
+      attr_reader :items
+      def_delegators :@items, :clear, :each, :empty?, :select, :size
+      def initialize
+        @items = []
+      end
+      # Add an Early item to the set
+      # @param anItem [Recognizer::EItem]
+      def add_item(anItem)
+        @items << anItem unless items.include? anItem
+      end
+      # Find the items that expect a given grammar symbol
+      # @param aSymbol [Denroid::Syntax::GrmSymbol]
+      # @return [void]
+      def items_expecting(aSymbol)
+        items.select { |itm| itm.expecting?(aSymbol) }
+      end
+      # Return a text representation of the item set
+      # @return [String]
+      def to_s
+        items.join("\n")
+      end
+    end # class
+  end # module
+end # module

data/lib/dendroid/recognizer/recognizer.rb ADDED Viewed

@@ -0,0 +1,282 @@
+# frozen_string_literal: true
+require_relative '../grm_analysis/grm_analyzer'
+require_relative 'e_item'
+require_relative 'chart'
+module Dendroid
+  # This module host classes needed to implement an Earley recognizer
+  module Recognizer
+    # A recognizer determines whether the input text complies to the grammar (syntax) rules.
+    # This class implements the Earley recognition algorithm.
+    class Recognizer
+      # @return [GrmAnalysis::GrmAnalyzer]
+      attr_reader :grm_analysis
+      # @return [Object]
+      attr_reader :tokenizer
+      def initialize(grammar, tokenizer)
+        @grm_analysis = GrmAnalysis::GrmAnalyzer.new(grammar)
+        @tokenizer = tokenizer
+      end
+      def run(source)
+        tokenizer.input = source
+        tok = tokenizer.next_token
+        if tok.nil? && !grm_analysis.grammar.start_symbol.nullable?
+          chart = new_chart
+          chart.failure_class = StandardError
+          chart.failure_reason = 'Error: Input may not be empty nor blank.'
+          chart
+        else
+          earley_parse(tok)
+        end
+      end
+      def earley_parse(initial_token)
+        chart = new_chart
+        tokens = [initial_token]
+        predicted_symbols = [Set.new]
+        eos_reached = initial_token.nil?
+        rank = 0
+        loop do
+          eos_reached = advance_next_token(tokens, predicted_symbols) unless eos_reached
+          advance = false
+          curr_rank = rank
+          curr_set = chart[curr_rank]
+          curr_set.each do |entry|
+            # For each entry, do either completer, scanner or predictor action
+            tick = do_entry_action(chart, entry, curr_rank, tokens, :genuine, predicted_symbols)
+            advance ||= tick
+          end
+          rank += 1 if advance
+          break if eos_reached && !advance
+          break if ! advance
+        end
+        determine_outcome(chart, tokens)
+        chart
+      end
+      private
+      def new_chart
+        top_symbol = grm_analysis.grammar.start_symbol
+        # Reminder: there might be multiple rules for the start symbol
+        prods = grm_analysis.grammar.nonterm2productions[top_symbol]
+        chart = Chart.new
+        prods.each do |prd|
+          seed_items = prd.predicted_items
+          seed_items.each { |item| chart.seed_last_set(EItem.new(item, 0)) }
+        end
+        chart
+      end
+      def advance_next_token(tokens, predicted_symbols)
+        eos_reached = false
+        tok = tokenizer.next_token
+        if tok
+          tokens << tok
+        else
+          eos_reached = true
+        end
+        predicted_symbols << Set.new unless eos_reached
+        eos_reached
+      end
+      def do_entry_action(chart, entry, rank, tokens, mode, predicted_symbols)
+        advance = false
+        if entry.completed?
+          completer(chart, entry, rank, tokens, mode)
+        else
+          if entry.next_symbol.terminal?
+            advance = scanner(chart, entry, rank, tokens)
+          else
+            predictor(chart, entry, rank, tokens, mode, predicted_symbols)
+          end
+        end
+        advance
+      end
+=begin
+    procedure PREDICTOR((A → α•Bβ, j), k)
+        for each (B → γ) in GRAMMAR_RULES_FOR(B) do
+            ADD_TO_SET((B → •γ, k), S[k])
+        end
+      Assuming next symbol is a non-terminal
+      Error case: next actual token matches none of the expected tokens.
+=end
+      def predictor(chart, item, rank, tokens, mode, predicted_symbols)
+        next_symbol = item.next_symbol
+        if mode == :genuine
+          predicted_symbols << Set.new if rank == predicted_symbols.size
+          predicted = predicted_symbols[rank]
+          return if predicted.include?(next_symbol)
+          predicted.add(next_symbol)
+        end
+        prods = grm_analysis.symbol2productions[next_symbol]
+        curr_set = chart[rank]
+        next_token = tokens[rank]
+        prods.each do |prd|
+          entry_items = prd.predicted_items
+          entry_items.each do |entry|
+            member = entry.next_symbol
+            if member&.terminal?
+              next unless next_token
+              next if (member.name != next_token.terminal) && mode == :genuine
+            end
+            new_item = EItem.new(entry, rank)
+            curr_set.add_item(new_item)
+          end
+        end
+        # Use trick from paper John Aycock and R. Nigel Horspool: "Practical Earley Parsing"
+        if next_symbol.nullable?
+          next_item = grm_analysis.next_item(item.dotted_item)
+          if next_item
+            new_item = EItem.new(next_item, item.origin)
+            curr_set.add_item(new_item)
+          end
+        end
+      end
+=begin
+    procedure SCANNER((A → α•aβ, j), k, words)
+        if j < LENGTH(words) and a ⊂ PARTS_OF_SPEECH(words[k]) then
+            ADD_TO_SET((A → αa•β, j), S[k+1])
+        end
+    Assuming next symbol is a terminal
+=end
+      def scanner(chart, scan_item, rank, tokens)
+        advance = false
+        dit = scan_item.dotted_item
+        if rank < tokens.size && dit.next_symbol.name == tokens[rank].terminal
+          new_rank = rank + 1
+          chart.append_new_set if chart[new_rank].nil?
+          next_dotted_item = grm_analysis.next_item(dit)
+          new_item = EItem.new(next_dotted_item, scan_item.origin)
+          chart[new_rank].add_item(new_item)
+          advance = true
+        end
+        advance
+      end
+=begin
+    procedure COMPLETER((B → γ•, x), k)
+        for each (A → α•Bβ, j) in S[x] do
+            ADD_TO_SET((A → αB•β, j), S[k])
+        end
+=end
+      def completer(chart, item, rank, tokens, mode)
+        origin = item.origin
+        curr_set = chart[rank]
+        set_at_origin = chart[origin]
+        next_token = tokens[rank]
+        callers = set_at_origin.items_expecting(item.lhs)
+        callers.each do |call_item|
+          return_item = grm_analysis.next_item(call_item.dotted_item)
+          next unless return_item
+          member = return_item.next_symbol
+          if member&.terminal? && (mode == :genuine)
+            next unless next_token
+            next if member.name != next_token.terminal
+          end
+          new_item = EItem.new(return_item, call_item.origin)
+          curr_set.add_item(new_item)
+        end
+      end
+      def seed_set(chart, rank)
+        curr_set = chart[rank]
+        previous_set = chart[rank - 1]
+        curr_set.clear
+        scan_entries = previous_set.select { |ent| ent.dotted_item.next_symbol&.terminal? }
+        scan_entries.map do |ent|
+          new_item = grm_analysis.next_item(ent.dotted_item)
+          curr_set.add_item(EItem.new(new_item, ent.origin))
+        end
+      end
+      def determine_outcome(chart, tokens)
+        success = false
+        if chart.size == tokens.size + 1
+          top_symbol = grm_analysis.grammar.start_symbol
+          top_rules = grm_analysis.grammar.nonterm2productions[top_symbol]
+          final_items = top_rules.reduce([]) do |items, rule|
+            items.concat(rule.reduce_items)
+          end
+          last_set = chart.item_sets.last
+          last_set.each do |entry|
+            next if ((!entry.origin.zero?) || ! final_items.include?(entry.dotted_item))
+            success = true
+          end
+        end
+        if !success
+          # Error detected...
+          replay_last_set(chart, tokens)
+          if chart.size < tokens.size + 1
+            # Recognizer stopped prematurely...
+            offending_token = tokens[chart.size - 1]
+            pos = offending_token.position
+            (line, col) = [pos.lineno, pos.column]
+            last_set = chart.last
+            terminals = last_set.items.reduce([]) do |result, ent|
+              result << ent.next_symbol if ent.pre_scan?
+              result
+            end
+            terminals.uniq!
+            prefix = "Syntax error at or near token line #{line}, column #{col} >>>#{offending_token.source}<<<"
+            expectation = terminals.size == 1 ? "#{terminals[0].name}" : "one of: [#{terminals.map(&:name).join(', ')}]"
+            err_msg = "#{prefix} Expected #{expectation}, found a #{offending_token.terminal} instead."
+            chart.failure_class = StandardError
+            chart.failure_reason = err_msg
+          elsif chart.size == tokens.size + 1
+            # EOS unexpected...
+            last_token = tokens.last
+            pos = last_token.position
+            (line, col) = [pos.lineno, pos.column]
+            last_set = chart.last
+            terminals = last_set.items.reduce([]) do |result, ent|
+              result << ent.next_symbol if ent.pre_scan?
+              result
+            end
+            terminals.uniq!
+            prefix = "Line #{line}, column #{col}: Premature end of input after '#{last_token.source}'"
+            expectation = terminals.size == 1 ? "#{terminals[0].name}" : "one of: [#{terminals.map(&:name).join(', ')}]"
+            err_msg = "#{prefix}, expected: #{expectation}."
+            chart.failure_class = StandardError
+            chart.failure_reason = err_msg
+          end
+        end
+        chart.success = success
+      end
+      def replay_last_set(chart, tokens)
+        rank = chart.size - 1
+        seed_set(chart, rank) # Re-initialize last set with scan entries
+        # Replay in full the actions for last set
+        chart[rank].each do |entry|
+          do_entry_action(chart, entry, rank, tokens, :error, [Set.new])
+        end
+      end
+    end # class
+  end # module
+end # module

data/spec/dendroid/grm_analysis/grm_analyzer_spec.rb CHANGED Viewed

@@ -1,80 +1,9 @@
 # frozen_string_literal: true
 require_relative '../../spec_helper'
-require_relative '../../../lib/dendroid/grm_dsl/base_grm_builder'
+require_relative '../support/sample_grammars'
 require_relative '../../../lib/dendroid/grm_analysis/grm_analyzer'
-module SampleGrammars
-  def grammar_l1
-    builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
-      # Grammar inspired from Wikipedia entry on Earley parsing
-      declare_terminals('PLUS', 'STAR', 'INTEGER')
-      rule('p' => 's')
-      rule('s' => ['s PLUS m', 'm'])
-      rule('m' => ['m STAR t', 't'])
-      rule('t' => 'INTEGER')
-    end
-    builder.grammar
-  end
-  def tokenizer_l1
-    Utils::BaseTokenizer.new do
-      map_verbatim2terminal({ '+' => :PLUS, '*' => :STAR })
-      scan_verbatim(['+', '*'])
-      scan_value(/\d+/, :INTEGER, ->(txt) { txt.to_i })
-    end
-  end
-  def grammar_l2
-    builder = GrmDSL::BaseGrmBuilder.new do
-      # Grammar inspired from Loup Vaillant's example
-      # https://loup-vaillant.fr/tutorials/earley-parsing/recogniser
-      declare_terminals('PLUS', 'MINUS',  'STAR', 'SLASH')
-      declare_terminals('LPAREN', 'RPAREN', 'NUMBER')
-      rule('p' => 'sum')
-      rule('sum' => ['sum PLUS product', 'sum MINUS product', 'product'])
-      rule('product' => ['product STAR factor', 'product SLASH factor', 'factor'])
-      rule('factor' => ['LPAREN sum RPAREN', 'NUMBER'])
-    end
-    builder.grammar
-  end
-  def tokenizer_l2
-    Utils::BaseTokenizer.new do
-      map_verbatim2terminal({
-                              '+' => :PLUS,
-                              '-' => :MINUS,
-                              '*' => :STAR,
-                              '/' => :SLASH,
-                              '(' => :LPAREN,
-                              ')' => :RPAREN
-                            })
-      scan_verbatim(['+', '-', '*', '/', '(', ')'])
-      scan_value(/\d+/, :NUMBER, ->(txt) { txt.to_i })
-    end
-  end
-  def grammar_l3
-    builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
-      # Grammar inspired from Andrew Appel's example
-      # Modern Compiler Implementation in Java
-      declare_terminals('a', 'c', 'd')
-      rule('Z' => ['d', 'X Y Z'])
-      rule('Y' => ['', 'c'])
-      rule('X' => %w[Y a])
-    end
-    builder.grammar
-  end
-end # module
 describe Dendroid::GrmAnalysis::GrmAnalyzer do
   include SampleGrammars
   let(:grammar) { grammar_l1 }

data/spec/dendroid/recognizer/chart_spec.rb ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ # frozen_string_literal: true
2	+

data/spec/dendroid/recognizer/e_item_spec.rb ADDED Viewed

@@ -0,0 +1,55 @@
+# frozen_string_literal: true
+require_relative '../../spec_helper'
+require_relative '../../../lib/dendroid/syntax/terminal'
+require_relative '../../../lib/dendroid/syntax/non_terminal'
+require_relative '../../../lib/dendroid/syntax/symbol_seq'
+require_relative '../../../lib/dendroid/syntax/production'
+require_relative '../../../lib/dendroid/grm_analysis/dotted_item'
+require_relative '../../../lib/dendroid/recognizer/e_item'
+describe Dendroid::Recognizer::EItem do
+  let(:num_symb) { Dendroid::Syntax::Terminal.new('NUMBER') }
+  let(:plus_symb) { Dendroid::Syntax::Terminal.new('PLUS') }
+  let(:expr_symb) { Dendroid::Syntax::NonTerminal.new('expression') }
+  let(:rhs) { Dendroid::Syntax::SymbolSeq.new([num_symb, plus_symb, num_symb]) }
+  let(:empty_body) { Dendroid::Syntax::SymbolSeq.new([]) }
+  let(:prod) { Dendroid::Syntax::Production.new(expr_symb, rhs) }
+  let(:empty_prod) { Dendroid::Syntax::Production.new(expr_symb, empty_body) }
+  let(:sample_dotted) { Dendroid::GrmAnalysis::DottedItem.new(prod, 1) }
+  let(:other_dotted) { Dendroid::GrmAnalysis::DottedItem.new(empty_prod, 0) }
+  let(:sample_origin) { 3 }
+  subject { described_class.new(sample_dotted, sample_origin) }
+  context 'Initialization:' do
+    it 'is initialized with a dotted item and an origin position' do
+      expect { described_class.new(sample_dotted, sample_origin) }.not_to raise_error
+    end
+    it 'knows its related dotted item' do
+      expect(subject.dotted_item).to eq(sample_dotted)
+    end
+    it 'knows its origin value' do
+      expect(subject.origin).to eq(sample_origin)
+    end
+  end # context
+  context 'Provided service:' do
+    it 'knows the lhs of related production' do
+      expect(subject.lhs).to eq(expr_symb)
+    end # context
+    it 'can compare with another EItem' do
+      expect(subject == subject).to be_truthy
+      expect(subject == described_class.new(sample_dotted, sample_origin)).to be_truthy
+      expect(subject == described_class.new(sample_dotted, 2)).to be_falsey
+      expect(subject == described_class.new(other_dotted, sample_origin)).to be_falsey
+    end
+    it 'can renders a String representation of itself' do
+      expect(subject.to_s).to eq("#{sample_dotted} @ #{sample_origin}")
+    end
+  end # context
+end # describe

data/spec/dendroid/recognizer/item_set_spec.rb ADDED Viewed

@@ -0,0 +1,63 @@
+# frozen_string_literal: true
+require_relative '../../spec_helper'
+require_relative '../../../lib/dendroid/syntax/terminal'
+require_relative '../../../lib/dendroid/syntax/non_terminal'
+require_relative '../../../lib/dendroid/syntax/symbol_seq'
+require_relative '../../../lib/dendroid/syntax/production'
+require_relative '../../../lib/dendroid/grm_analysis/dotted_item'
+require_relative '../../../lib/dendroid/recognizer/e_item'
+require_relative '../../../lib/dendroid/recognizer/item_set'
+describe Dendroid::Recognizer::ItemSet do
+  let(:num_symb) { Dendroid::Syntax::Terminal.new('NUMBER') }
+  let(:plus_symb) { Dendroid::Syntax::Terminal.new('PLUS') }
+  let(:expr_symb) { Dendroid::Syntax::NonTerminal.new('expression') }
+  let(:rhs) { Dendroid::Syntax::SymbolSeq.new([num_symb, plus_symb, num_symb]) }
+  let(:empty_body) { Dendroid::Syntax::SymbolSeq.new([]) }
+  let(:prod) { Dendroid::Syntax::Production.new(expr_symb, rhs) }
+  let(:empty_prod) { Dendroid::Syntax::Production.new(expr_symb, empty_body) }
+  let(:sample_dotted) { Dendroid::GrmAnalysis::DottedItem.new(prod, 1) }
+  let(:sample_origin) { 3 }
+  let(:other_dotted) { Dendroid::GrmAnalysis::DottedItem.new(empty_prod, 0) }
+  let(:first_element) { Dendroid::Recognizer::EItem.new(sample_dotted, sample_origin) }
+  let(:second_element) { Dendroid::Recognizer::EItem.new(other_dotted, 5) }
+  subject { described_class.new }
+  context 'Initialization:' do
+    it 'is initialized without argument' do
+      expect { described_class.new }.not_to raise_error
+    end
+    it 'is empty at creation' do
+      expect(subject).to be_empty
+    end
+  end # context
+  context 'Provided services:' do
+    it 'adds a new element' do
+      subject.add_item(first_element)
+      expect(subject.size).to eq(1)
+      # Trying a second time, doesn't change the set
+      subject.add_item(first_element)
+      expect(subject.size).to eq(1)
+      subject.add_item(second_element)
+      expect(subject.size).to eq(2)
+    end
+    it 'can render a String representation of itself' do
+      subject.add_item(first_element)
+      subject.add_item(second_element)
+      expectations = [
+        'expression => NUMBER . PLUS NUMBER @ 3',
+        'expression => . @ 5'
+      ].join("\n")
+      expect(subject.to_s).to eq(expectations)
+    end
+  end # context
+end # describe

data/spec/dendroid/recognizer/recognizer_spec.rb ADDED Viewed

@@ -0,0 +1,186 @@
+# frozen_string_literal: true
+require_relative '../../spec_helper'
+require_relative '../support/sample_grammars'
+require_relative '../../../lib/dendroid/recognizer/recognizer'
+describe Dendroid::Recognizer::Recognizer do
+  include SampleGrammars
+  let(:grammar1) { grammar_l1 }
+  # Implements a dotted item: expression => NUMBER . PLUS NUMBER
+  subject { described_class.new(grammar1, tokenizer_l1) }
+  context 'Initialization:' do
+    it 'is initialized with a grammar' do
+      expect { described_class.new(grammar1, tokenizer_l1) }.not_to raise_error
+    end
+    it 'knows its grammar analyzer' do
+      expect(subject.grm_analysis).to be_kind_of(Dendroid::GrmAnalysis::GrmAnalyzer)
+      expect(subject.grm_analysis.grammar).to eq(grammar1)
+    end
+    it 'knows its tokenizer' do
+      expect(subject.grm_analysis).to be_kind_of(Dendroid::GrmAnalysis::GrmAnalyzer)
+      expect(subject.grm_analysis.grammar).to eq(grammar1)
+    end
+  end # context
+  context 'Recognizer at work:' do
+    it 'can recognize example from Wikipedia' do
+      chart = subject.run('2 + 3 * 4')
+      expect(chart).to be_successful
+      set0 = [ # . 2 + 3 * 4'
+        'p => . s @ 0',
+        's => . s PLUS m @ 0',
+        's => . m @ 0',
+        'm => . m STAR t @ 0',
+        'm => . t @ 0',
+        't => . INTEGER @ 0'
+      ]
+      set1 = [ # 2 . + 3 * 4'
+        't => INTEGER . @ 0',
+        'm => t . @ 0',
+        's => m . @ 0',
+        #'m => m . STAR t @ 0',
+        'p => s . @ 0',
+        's => s . PLUS m @ 0'
+      ]
+      set2 = [ # 2 + . 3 * 4'
+        's => s PLUS . m @ 0',
+        'm => . m STAR t @ 2',
+        'm => . t @ 2',
+        't => . INTEGER @ 2'
+      ]
+      set3 = [ # 2 + 3 . * 4'
+        't => INTEGER . @ 2',
+        'm => t . @ 2',
+        's => s PLUS m . @ 0',
+        'm => m . STAR t @ 2',
+        'p => s . @ 0',
+      # 's => s . PLUS m @ 0'
+      ]
+      set4 = [ # 2 + 3 * . 4'
+        'm => m STAR . t @ 2',
+        't => . INTEGER @ 4'
+      ]
+      set5 = [ # 2 + 3 * 4 .'
+        't => INTEGER . @ 4',
+        'm => m STAR t . @ 2',
+        's => s PLUS m . @ 0',
+        # 'm => m . STAR t @ 2',
+        'p => s . @ 0'
+      # 's => s . PLUS m @ 0'
+      ]
+      [set0, set1, set2, set3, set4, set5].each_with_index do |set, rank|
+        expect(chart[rank].to_s).to eq(set.join("\n"))
+      end
+    end
+    it 'can recognize example for L2 language' do
+      recognizer = described_class.new(grammar_l2, tokenizer_l2)
+      chart = recognizer.run('1 + (2 * 3 - 4)')
+      expect(chart).to be_successful
+      set0 = [ # . 1 + (2 * 3 - 4)
+        'p => . sum @ 0',
+        'sum => . sum PLUS product @ 0',
+        'sum => . sum MINUS product @ 0',
+        'sum => . product @ 0',
+        'product => . product STAR factor @ 0',
+        'product => . product SLASH factor @ 0',
+        'product => . factor @ 0',
+        # 'factor => . LPAREN sum RPAREN @ 0',
+        'factor => . NUMBER @ 0'
+      ]
+      set1 = [ # 1 . + (2 * 3 - 4)
+        'factor => NUMBER . @ 0',
+        'product => factor . @ 0',
+        'sum => product . @ 0',
+        # 'product => product . STAR factor @ 0',
+        # 'product => product . SLASH factor @ 0',
+        'p => sum . @ 0',
+        'sum => sum . PLUS product @ 0',
+      # 'sum => sum . MINUS product @ 0'
+      ]
+      set2 = [ # 1 + . (2 * 3 - 4)
+        'sum => sum PLUS . product @ 0',
+        'product => . product STAR factor @ 2',
+        'product => . product SLASH factor @ 2',
+        'product => . factor @ 2',
+        'factor => . LPAREN sum RPAREN @ 2',
+      # 'factor => . NUMBER @ 2'
+      ]
+      set3 = [ # 1 + (. 2 * 3 - 4)
+        'factor => LPAREN . sum RPAREN @ 2',
+        'sum => . sum PLUS product @ 3',
+        'sum => . sum MINUS product @ 3',
+        'sum => . product @ 3',
+        'product => . product STAR factor @ 3',
+        'product => . product SLASH factor @ 3',
+        'product => . factor @ 3',
+        # 'factor => . LPAREN sum RPAREN @ 3',
+        'factor => . NUMBER @ 3'
+      ]
+      set4 = [ # 1 + (2 . * 3 - 4)
+        'factor => NUMBER . @ 3',
+        'product => factor . @ 3',
+        'sum => product . @ 3',
+        'product => product . STAR factor @ 3',
+      # 'product => product . SLASH factor @ 3',
+      # 'factor => LPAREN sum . RPAREN @ 2',
+      # 'sum => sum . PLUS product @ 3',
+      # 'sum => sum . MINUS product @ 3'
+      ]
+      set5 = [ # 1 + (2 * . 3 - 4)
+        'product => product STAR . factor @ 3',
+        # 'factor => . LPAREN sum RPAREN @ 5',
+        'factor => . NUMBER @ 5'
+      ]
+      set6 = [ # 1 + (2 * 3 . - 4)
+        'factor => NUMBER . @ 5',
+        'product => product STAR factor . @ 3',
+        'sum => product . @ 3',
+        # 'product => product . STAR factor @ 3',
+        # 'product => product . SLASH factor @ 3',
+        # 'factor => LPAREN sum . RPAREN @ 2',
+        # 'sum => sum . PLUS product @ 3',
+        'sum => sum . MINUS product @ 3'
+      ]
+      set7 = [ # 1 + (2 * 3  - . 4)
+        'sum => sum MINUS . product @ 3',
+        'product => . product STAR factor @ 7',
+        'product => . product SLASH factor @ 7',
+        'product => . factor @ 7',
+        # 'factor => . LPAREN sum RPAREN @ 7',
+        'factor => . NUMBER @ 7'
+      ]
+      set8 = [ # 1 + (2 * 3 - 4 .)
+        'factor => NUMBER . @ 7',
+        'product => factor . @ 7',
+        'sum => sum MINUS product . @ 3',
+        # 'product => product . STAR factor @ 7',
+        # 'product => product . SLASH factor @ 7',
+        'factor => LPAREN sum . RPAREN @ 2',
+      # 'sum => sum . PLUS product @ 3',
+      # 'sum => sum . MINUS product @ 3'
+      ]
+      set9 = [ # 1 + (2 * 3 - 4 ).
+        'factor => LPAREN sum RPAREN . @ 2',
+        'product => factor . @ 2',
+        'sum => sum PLUS product . @ 0',
+        # 'product => product . STAR factor @ 2',
+        # 'product => product . SLASH factor @ 2',
+        'p => sum . @ 0',
+      # 'sum => sum . PLUS product @ 0',
+      # 'sum => sum . MINUS product @ 0'
+      ]
+      expectations = [set0, set1, set2, set3, set4, set5, set6, set7, set8, set9]
+      expectations.each_with_index do |set, rank|
+        expect(chart[rank].to_s).to eq(set.join("\n"))
+      end
+    end
+  end # context
+end # describe

data/spec/dendroid/support/sample_grammars.rb ADDED Viewed

@@ -0,0 +1,76 @@
+# frozen_string_literal: true
+require_relative '../../../lib/dendroid/grm_dsl/base_grm_builder'
+require_relative '../../../lib/dendroid/utils/base_tokenizer'
+module SampleGrammars
+  def grammar_l1
+    builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
+      # Grammar inspired from Wikipedia entry on Earley parsing
+      declare_terminals('PLUS', 'STAR', 'INTEGER')
+      rule('p' => 's')
+      rule('s' => ['s PLUS m', 'm'])
+      # rule('s' => 'm')
+      rule('m' => ['m STAR t', 't'])
+      # rule('m' => 't')
+      rule('t' => 'INTEGER')
+    end
+    builder.grammar
+  end
+  def tokenizer_l1
+    Dendroid::Utils::BaseTokenizer.new do
+      map_verbatim2terminal({ '+' => :PLUS, '*' => :STAR })
+      scan_verbatim(['+', '*'])
+      scan_value(/\d+/, :INTEGER, ->(txt) { txt.to_i })
+    end
+  end
+  def grammar_l2
+    builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
+      # Grammar inspired from Loup Vaillant's example
+      # https://loup-vaillant.fr/tutorials/earley-parsing/recogniser
+      declare_terminals('PLUS', 'MINUS',  'STAR', 'SLASH')
+      declare_terminals('LPAREN', 'RPAREN', 'NUMBER')
+      rule('p' => 'sum')
+      rule('sum' => ['sum PLUS product', 'sum MINUS product', 'product'])
+      rule('product' => ['product STAR factor', 'product SLASH factor', 'factor'])
+      rule('factor' => ['LPAREN sum RPAREN', 'NUMBER'])
+    end
+    builder.grammar
+  end
+  def tokenizer_l2
+    Dendroid::Utils::BaseTokenizer.new do
+      map_verbatim2terminal({
+                              '+' => :PLUS,
+                              '-' => :MINUS,
+                              '*' => :STAR,
+                              '/' => :SLASH,
+                              '(' => :LPAREN,
+                              ')' => :RPAREN })
+      scan_verbatim(['+', '-', '*', '/', '(', ')'])
+      scan_value(/\d+/, :NUMBER, ->(txt) { txt.to_i })
+    end
+  end
+  def grammar_l3
+    builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
+      # Grammar inspired from Andrew Appel's example
+      # Modern Compiler Implementation in Java
+      declare_terminals('a', 'c', 'd')
+      rule('Z' => ['d', 'X Y Z'])
+      rule('Y' => ['', 'c'])
+      rule('X' => ['Y', 'a'])
+    end
+    builder.grammar
+  end
+end # module

data/version.txt CHANGED Viewed

	@@ -1 +1 @@
1	- 0.0.10
1	+ 0.0.11

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: dendroid
 version: !ruby/object:Gem::Version
-  version: 0.0.10
+  version: 0.0.11
 platform: ruby
 authors:
 - Dimitri Geshef
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2023-11-01 00:00:00.000000000 Z
+date: 2023-11-02 00:00:00.000000000 Z
 dependencies: []
 description: WIP. A Ruby implementation of an Earley parser
 email: famished.tiger@yahoo.com
@@ -33,6 +33,10 @@ files:
 - lib/dendroid/lexical/literal.rb
 - lib/dendroid/lexical/token.rb
 - lib/dendroid/lexical/token_position.rb
+- lib/dendroid/recognizer/chart.rb
+- lib/dendroid/recognizer/e_item.rb
+- lib/dendroid/recognizer/item_set.rb
+- lib/dendroid/recognizer/recognizer.rb
 - lib/dendroid/syntax/choice.rb
 - lib/dendroid/syntax/grammar.rb
 - lib/dendroid/syntax/grm_symbol.rb
@@ -51,6 +55,11 @@ files:
 - spec/dendroid/lexical/literal_spec.rb
 - spec/dendroid/lexical/token_position_spec.rb
 - spec/dendroid/lexical/token_spec.rb
+- spec/dendroid/recognizer/chart_spec.rb
+- spec/dendroid/recognizer/e_item_spec.rb
+- spec/dendroid/recognizer/item_set_spec.rb
+- spec/dendroid/recognizer/recognizer_spec.rb
+- spec/dendroid/support/sample_grammars.rb
 - spec/dendroid/syntax/choice_spec.rb
 - spec/dendroid/syntax/grammar_spec.rb
 - spec/dendroid/syntax/grm_symbol_spec.rb