RubyGems - rley - Versions diffs - 0.8.03 → 0.8.05 - Mend

rley 0.8.03 → 0.8.05

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

checksums.yaml +4 -4
data/.rubocop.yml +28 -8
data/CHANGELOG.md +10 -0
data/examples/data_formats/JSON/README.md +34 -0
data/examples/data_formats/JSON/sample01.json +3 -0
data/examples/data_formats/JSON/sample01.svg +36 -0
data/examples/data_formats/JSON/sample02.json +6 -0
data/examples/data_formats/JSON/sample02.svg +128 -0
data/examples/data_formats/JSON/sample03.json +88 -0
data/examples/general/calc_iter1/README.md +26 -0
data/examples/general/calc_iter2/README.md +55 -0
data/examples/general/general_examples.md +37 -0
data/examples/tokenizer/README.md +46 -0
data/examples/tokenizer/loxxy_raw_scanner.rex +98 -0
data/examples/tokenizer/loxxy_raw_scanner.rex.rb +256 -0
data/examples/tokenizer/loxxy_tokenizer.rb +94 -0
data/examples/tokenizer/run_tokenizer.rb +29 -0
data/lib/rley/constants.rb +1 -1
data/lib/rley/lexical/literal.rb +29 -0
data/lib/rley/lexical/token.rb +7 -4
data/lib/rley/syntax/base_grammar_builder.rb +0 -2
data/lib/rley.rb +1 -1
data/spec/rley/lexical/literal_spec.rb +33 -0
data/spec/rley/lexical/token_spec.rb +15 -4
data/spec/rley/notation/grammar_builder_spec.rb +2 -2
data/spec/rley/parser/dangling_else_spec.rb +5 -7
data/spec/rley/parser/gfg_chart_spec.rb +0 -1
data/spec/rley/parser/gfg_earley_parser_spec.rb +131 -134
data/spec/rley/parser/gfg_parsing_spec.rb +1 -2
data/spec/rley/syntax/base_grammar_builder_spec.rb +7 -7
data/spec/rley/syntax/grammar_spec.rb +6 -9
metadata +19 -9
data/lib/rley/parser/parse_tracer.rb +0 -103
data/lib/rley/syntax/literal.rb +0 -20
data/lib/rley/syntax/verbatim_symbol.rb +0 -27
data/spec/rley/syntax/literal_spec.rb +0 -31
data/spec/rley/syntax/verbatim_symbol_spec.rb +0 -38

data/examples/tokenizer/loxxy_raw_scanner.rex ADDED Viewed

@@ -0,0 +1,98 @@
+# rubocop: disable Style/MutableConstant
+# rubocop: disable Layout/SpaceBeforeSemicolon
+# rubocop: disable Style/Alias
+# rubocop: disable Style/AndOr
+# rubocop: disable Style/MultilineIfModifier
+# rubocop: disable Style/StringLiterals
+# rubocop: disable Style/MethodDefParentheses
+# rubocop: disable Security/Open
+# rubocop: disable Style/TrailingCommaInArrayLiteral
+# rubocop: disable Layout/EmptyLinesAroundMethodBody
+# rubocop: disable Style/WhileUntilDo
+# rubocop: disable Style/MultilineWhenThen
+# rubocop: disable Layout/ExtraSpacing
+# rubocop: disable Layout/SpaceInsideRangeLiteral
+# rubocop: disable Style/CaseEquality
+# rubocop: disable Style/EmptyCaseCondition
+# rubocop: disable Style/SymbolArray
+# rubocop: disable Lint/DuplicateBranch
+# rubocop: disable Layout/EmptyLineBetweenDefs
+# rubocop: disable Layout/IndentationConsistency
+class LoxxyRawScanner
+option
+  lineno
+  column
+macro
+  DIGIT /\d/
+  ALPHA /[a-zA-Z_]/
+rule
+    # Delimiters, punctuators, operators
+    /[ \t]+/
+    /\/\/[^\r\n]*/
+    /\r|\n/                        newline
+    /[!=<>]=?/                     { [:SPECIAL, text] }
+    /[(){},;.\-+\/*]/              { [:SPECIAL, text] }
+    # Literals & identifiers
+    /#{DIGIT}+(\.#{DIGIT}+)?/      { [:NUMBER, text] }
+    /nil/                          { [:NIL, text] }
+    /false/                        { [:FALSE, text] }
+    /true/                         { [:TRUE, text] }
+    /#{ALPHA}(#{ALPHA}|#{DIGIT})*/ { [:IDENTIFIER, text] }
+    /""/                           { [:STRING, '""'] }
+    /"/                            :IN_STRING
+  :IN_STRING  /[^"]+/              { [:STRING, "\"#{text}\""] }
+  :IN_STRING  /"/                  nil
+inner
+  def do_parse
+    tokens = []
+    while (tok = next_token) do
+      (type, lexeme) = tok
+      if type == :state
+        self.state = lexeme
+        next
+      else
+        tokens << [type, lexeme, lineno, column]
+      end
+    end
+    tokens
+  end
+  def newline(txt)
+    if txt == '\r'
+      ss.skip(/\n/) # CR LF sequence
+      self.lineno += 1
+      self.start_of_current_line_pos = ss.pos + 1
+    end
+    nil
+  end
+end
+# rubocop: enable Style/MutableConstant
+# rubocop: enable Layout/SpaceBeforeSemicolon
+# rubocop: enable Style/Alias
+# rubocop: enable Style/AndOr
+# rubocop: enable Style/MultilineIfModifier
+# rubocop: enable Style/StringLiterals
+# rubocop: enable Style/MethodDefParentheses
+# rubocop: enable Security/Open
+# rubocop: enable Style/TrailingCommaInArrayLiteral
+# rubocop: enable Layout/EmptyLinesAroundMethodBody
+# rubocop: enable Style/WhileUntilDo
+# rubocop: enable Style/MultilineWhenThen
+# rubocop: enable Layout/ExtraSpacing
+# rubocop: enable Layout/SpaceInsideRangeLiteral
+# rubocop: enable Style/CaseEquality
+# rubocop: enable Style/EmptyCaseCondition
+# rubocop: enable Style/SymbolArray
+# rubocop: enable Lint/DuplicateBranch
+# rubocop: enable Layout/EmptyLineBetweenDefs
+# rubocop: enable Layout/IndentationConsistency

data/examples/tokenizer/loxxy_raw_scanner.rex.rb ADDED Viewed

@@ -0,0 +1,256 @@
+# frozen_string_literal: true
+# encoding: UTF-8
+#--
+# This file is automatically generated. Do not modify it.
+# Generated by: oedipus_lex version 2.5.3.
+# Source: loxxy_raw_scanner.rex
+#++
+# rubocop: disable Style/MutableConstant
+# rubocop: disable Layout/SpaceBeforeSemicolon
+# rubocop: disable Style/Alias
+# rubocop: disable Style/AndOr
+# rubocop: disable Style/MultilineIfModifier
+# rubocop: disable Style/StringLiterals
+# rubocop: disable Style/MethodDefParentheses
+# rubocop: disable Security/Open
+# rubocop: disable Style/TrailingCommaInArrayLiteral
+# rubocop: disable Layout/EmptyLinesAroundMethodBody
+# rubocop: disable Style/WhileUntilDo
+# rubocop: disable Style/MultilineWhenThen
+# rubocop: disable Layout/ExtraSpacing
+# rubocop: disable Layout/SpaceInsideRangeLiteral
+# rubocop: disable Style/CaseEquality
+# rubocop: disable Style/EmptyCaseCondition
+# rubocop: disable Style/SymbolArray
+# rubocop: disable Lint/DuplicateBranch
+# rubocop: disable Layout/EmptyLineBetweenDefs
+# rubocop: disable Layout/IndentationConsistency
+##
+# The generated lexer LoxxyRawScanner
+class LoxxyRawScanner
+  require 'strscan'
+  # :stopdoc:
+  DIGIT = /\d/
+  ALPHA = /[a-zA-Z_]/
+  # :startdoc:
+  # :stopdoc:
+  class LexerError < StandardError ; end
+  class ScanError < LexerError ; end
+  # :startdoc:
+  ##
+  # The current line number.
+  attr_accessor :lineno
+  ##
+  # The file name / path
+  attr_accessor :filename
+  ##
+  # The StringScanner for this lexer.
+  attr_accessor :ss
+  ##
+  # The current lexical state.
+  attr_accessor :state
+  alias :match :ss
+  ##
+  # The match groups for the current scan.
+  def matches
+    m = (1..9).map { |i| ss[i] }
+    m.pop until m[-1] or m.empty?
+    m
+  end
+  ##
+  # Yields on the current action.
+  def action
+    yield
+  end
+  ##
+  # The previous position. Only available if the :column option is on.
+  attr_accessor :old_pos
+  ##
+  # The position of the start of the current line. Only available if the
+  # :column option is on.
+  attr_accessor :start_of_current_line_pos
+  ##
+  # The current column, starting at 0. Only available if the
+  # :column option is on.
+  def column
+    old_pos - start_of_current_line_pos
+  end
+  ##
+  # The current scanner class. Must be overridden in subclasses.
+  def scanner_class
+    StringScanner
+  end unless instance_methods(false).map(&:to_s).include?("scanner_class")
+  ##
+  # Parse the given string.
+  def parse str
+    self.ss     = scanner_class.new str
+    self.lineno = 1
+    self.start_of_current_line_pos = 0
+    self.state  ||= nil
+    do_parse
+  end
+  ##
+  # Read in and parse the file at +path+.
+  def parse_file path
+    self.filename = path
+    open path do |f|
+      parse f.read
+    end
+  end
+  ##
+  # The current location in the parse.
+  def location
+    [
+      (filename || "<input>"),
+      lineno,
+      column,
+    ].compact.join(":")
+  end
+  ##
+  # Lex the next token.
+  def next_token
+    token = nil
+    until ss.eos? or token do
+      if ss.peek(1) == "\n"
+        self.lineno += 1
+        # line starts 1 position after the newline
+        self.start_of_current_line_pos = ss.pos + 1
+      end
+      self.old_pos = ss.pos
+      token =
+        case state
+        when nil then
+          case
+          when ss.skip(/[ \t]+/) then
+            # do nothing
+          when ss.skip(/\/\/[^\r\n]*/) then
+            # do nothing
+          when text = ss.scan(/\r|\n/) then
+            newline text
+          when text = ss.scan(/[!=<>]=?/) then
+            action { [:SPECIAL, text] }
+          when text = ss.scan(/[(){},;.\-+\/*]/) then
+            action { [:SPECIAL, text] }
+          when text = ss.scan(/#{DIGIT}+(\.#{DIGIT}+)?/) then
+            action { [:NUMBER, text] }
+          when text = ss.scan(/nil/) then
+            action { [:NIL, text] }
+          when text = ss.scan(/false/) then
+            action { [:FALSE, text] }
+          when text = ss.scan(/true/) then
+            action { [:TRUE, text] }
+          when text = ss.scan(/#{ALPHA}(#{ALPHA}|#{DIGIT})*/) then
+            action { [:IDENTIFIER, text] }
+          when ss.skip(/""/) then
+            action { [:STRING, '""'] }
+          when ss.skip(/"/) then
+            [:state, :IN_STRING]
+          else
+            text = ss.string[ss.pos .. -1]
+            raise ScanError, "can not match (#{state.inspect}) at #{location}: '#{text}'"
+          end
+        when :IN_STRING then
+          case
+          when text = ss.scan(/[^"]+/) then
+            action { [:STRING, "\"#{text}\""] }
+          when ss.skip(/"/) then
+            [:state, nil]
+          else
+            text = ss.string[ss.pos .. -1]
+            raise ScanError, "can not match (#{state.inspect}) at #{location}: '#{text}'"
+          end
+        else
+          raise ScanError, "undefined state at #{location}: '#{state}'"
+        end # token = case state
+      next unless token # allow functions to trigger redo w/ nil
+    end # while
+    raise LexerError, "bad lexical result at #{location}: #{token.inspect}" unless
+      token.nil? || (Array === token && token.size >= 2)
+    # auto-switch state
+    self.state = token.last if token && token.first == :state
+    token
+  end # def next_token
+    def do_parse
+      tokens = []
+      while (tok = next_token) do
+        (type, lexeme) = tok
+        if type == :state
+          self.state = lexeme
+          next
+        else
+          tokens << [type, lexeme, lineno, column]
+        end
+      end
+      tokens
+    end
+    def newline(txt)
+      if txt == '\r'
+        ss.skip(/\n/) # CR LF sequence
+        self.lineno += 1
+        self.start_of_current_line_pos = ss.pos + 1
+      end
+      nil
+    end
+end # class
+  # rubocop: enable Style/MutableConstant
+  # rubocop: enable Layout/SpaceBeforeSemicolon
+  # rubocop: enable Style/Alias
+  # rubocop: enable Style/AndOr
+  # rubocop: enable Style/MultilineIfModifier
+  # rubocop: enable Style/StringLiterals
+  # rubocop: enable Style/MethodDefParentheses
+  # rubocop: enable Security/Open
+  # rubocop: enable Style/TrailingCommaInArrayLiteral
+  # rubocop: enable Layout/EmptyLinesAroundMethodBody
+  # rubocop: enable Style/WhileUntilDo
+  # rubocop: enable Style/MultilineWhenThen
+  # rubocop: enable Layout/ExtraSpacing
+  # rubocop: enable Layout/SpaceInsideRangeLiteral
+  # rubocop: enable Style/CaseEquality
+  # rubocop: enable Style/EmptyCaseCondition
+  # rubocop: enable Style/SymbolArray
+  # rubocop: enable Lint/DuplicateBranch
+  # rubocop: enable Layout/EmptyLineBetweenDefs
+  # rubocop: enable Layout/IndentationConsistency

data/examples/tokenizer/loxxy_tokenizer.rb ADDED Viewed

@@ -0,0 +1,94 @@
+# frozen_string_literal: true
+require 'rley'
+require_relative 'loxxy_raw_scanner.rex'
+class LoxxyTokenizer
+  # @return [LoxxyRawScanner] Scanner generated by `oedipus_lex`gem.
+  attr_reader :scanner
+   # @return [String] Input text to tokenize
+  attr_reader :input
+  Keyword2name = begin
+    lookup = %w[
+      and class else false fun for if nil or
+      print return super this true var while
+    ].map { |x| [x, x.upcase] }.to_h
+    lookup.default = 'IDENTIFIER'
+    lookup.freeze
+  end
+  Special2name = {
+    '(' => 'LEFT_PAREN',
+    ')' => 'RIGHT_PAREN',
+    '{' => 'LEFT_BRACE',
+    '}' => 'RIGHT_BRACE',
+    ',' => 'COMMA',
+    '.' => 'DOT',
+    '-' => 'MINUS',
+    '+' => 'PLUS',
+    ';' => 'SEMICOLON',
+    '/' => 'SLASH',
+    '*' => 'STAR',
+    '!' => 'BANG',
+    '!=' => 'BANG_EQUAL',
+    '=' => 'EQUAL',
+    '==' => 'EQUAL_EQUAL',
+    '>' => 'GREATER',
+    '>=' => 'GREATER_EQUAL',
+    '<' => 'LESS',
+    '<=' => 'LESS_EQUAL'
+  }.freeze
+  def initialize(source = nil)
+    @scanner = LoxxyRawScanner.new
+    start_with(source)
+  end
+  def start_with(source)
+    @input = source
+  end
+  def tokens
+    raw_tokens = scanner.parse(input)
+    cooked = raw_tokens.map do |(raw_type, raw_text, line, col)|
+      pos = Rley::Lexical::Position.new(line, col + 1)
+      convert(raw_type, raw_text, pos)
+    end
+    forelast = cooked.last
+    last_col = forelast.position.column + forelast.lexeme.length
+    last_pos = Rley::Lexical::Position.new(forelast.position.line, last_col)
+    cooked << Rley::Lexical::Token.new(nil, 'EOF', last_pos)
+    cooked
+  end
+  private
+  def convert(token_kind, token_text, pos)
+    result = case token_kind
+    when :SPECIAL
+      Rley::Lexical::Token.new(token_text, Special2name[token_text])
+    when :FALSE
+      Rley::Lexical::Literal.new(false, token_text, 'FALSE')
+    when :NUMBER
+      num_val = token_text =~ /\.\d+$/ ? token_text.to_f : token_text.to_i
+      Rley::Lexical::Literal.new(num_val, token_text, 'NUMBER')
+    when :NIL
+      Rley::Lexical::Literal.new(nil, token_text, 'NIL')
+    when :STRING
+      str_val = token_text[1..-2]
+      pos.column = pos.column - 1 unless str_val.empty?
+      Rley::Lexical::Literal.new(str_val, token_text, 'STRING')
+    when :TRUE
+      Rley::Lexical::Literal.new(true, token_text, 'TRUE')
+    when :IDENTIFIER
+      Rley::Lexical::Token.new(token_text, Keyword2name[token_text])
+    else
+      raise ScanError, "Error: [line #{pos.line}:#{column}]: Unexpected token #{token_text}"
+    end
+    result.position = pos
+    result
+  end
+end # class

data/examples/tokenizer/run_tokenizer.rb ADDED Viewed

@@ -0,0 +1,29 @@
+# frozen_string_literal: true
+require 'yaml'
+require_relative 'loxxy_tokenizer'
+lox_source = <<LOX_END
+class Base {
+  foo() {
+    print "Base.foo()";
+  }
+}
+class Derived < Base {
+  foo() {
+    print "Derived.foo()";
+    super.foo();
+  }
+}
+Derived().foo();
+// expect: Derived.foo()
+// expect: Base.foo()
+LOX_END
+loxxy_tokenizer = LoxxyTokenizer.new
+loxxy_tokenizer.start_with(lox_source)
+tokens = loxxy_tokenizer.tokens
+File::open('tokens.yaml', 'w') { |f| YAML.dump(tokens, f) }
+puts 'Done: tokenizer results saved in YAML.'

data/lib/rley/constants.rb CHANGED Viewed

@@ -5,7 +5,7 @@
 module Rley # Module used as a namespace
   # The version number of the gem.
-  Version = '0.8.03'
+  Version = '0.8.05'
   # Brief description of the gem.
   Description = "Ruby implementation of the Earley's parsing algorithm"

data/lib/rley/lexical/literal.rb ADDED Viewed

@@ -0,0 +1,29 @@
+# frozen_string_literal: true
+require_relative 'token'
+module Rley # This module is used as a namespace
+  module Lexical # This module is used as a namespace
+    # A literal (value) is a token that represents a data value in the parsed
+    # language. For instance, in Ruby data values such as strings, numbers,
+    # regular expression,... can appear directly in the source code. These are
+    # examples of literal values. One responsibility of a tokenizer/lexer is
+    # to convert the text representation into a corresponding value in a
+    # convenient format for the interpreter/compiler.
+    class Literal < Token
+      # @return [Object] The value expressed in one of the target datatype.
+      attr_reader(:value)
+      # Constructor.
+      # @param aValue [Object] value of the token in internal representation
+      # @param theLexeme [String] the lexeme (= piece of text from input)
+      # @param aTerminal [Syntax::Terminal, String]
+      # @param aPosition [Rley::Lexical::Position] line, column position pf token
+      def initialize(aValue, theLexeme, aTerminal, aPosition = nil)
+        super(theLexeme, aTerminal, aPosition)
+        @value = aValue
+      end
+    end # class
+  end # module
+end # module
+# End of file

data/lib/rley/lexical/token.rb CHANGED Viewed

@@ -1,7 +1,9 @@
 # frozen_string_literal: true
 module Rley # This module is used as a namespace
-  module Lexical # This module is used as a namespace
+  # This module hosts classes that a Rley parser expects
+  # as return values from a tokenizer / lexer.
+  module Lexical
     # A Position is the location of a lexeme within a source file.
     Position = Struct.new(:line, :column) do
       def to_s
@@ -28,14 +30,15 @@ module Rley # This module is used as a namespace
       # @return [String] The name of terminal symbol matching the lexeme.
       attr_reader(:terminal)
-      # @return [Position] The position of the lexeme in the source file.
-      attr_reader(:position)
+      # @return [Position] The position -in "editor" coordinates- of the lexeme in the source file.
+      attr_accessor(:position)
       # Constructor.
       # @param theLexeme [String] the lexeme (= piece of text from input)
       # @param aTerminal [Syntax::Terminal, String]
       #   The terminal symbol corresponding to the lexeme.
-      def initialize(theLexeme, aTerminal, aPosition)
+      # @param aPositiçon [Rley::Lexical::Position] position of the token in source file
+      def initialize(theLexeme, aTerminal, aPosition = nil)
         raise 'Internal error: nil terminal symbol detected' if aTerminal.nil?
         @lexeme = theLexeme

data/lib/rley/syntax/base_grammar_builder.rb CHANGED Viewed

@@ -3,8 +3,6 @@
 require 'set'
 require_relative 'terminal'
 require_relative 'non_terminal'
-require_relative 'literal'
-require_relative 'verbatim_symbol'
 require_relative 'production'
 require_relative 'grammar'

data/lib/rley.rb CHANGED Viewed

@@ -6,7 +6,7 @@
 require_relative './rley/constants'
 require_relative './rley/interface'
-require_relative './rley/lexical/token'
+require_relative './rley/lexical/literal'
 require_relative './rley/parser/gfg_earley_parser'
 require_relative './rley/parse_rep/ast_base_builder'
 require_relative './rley/parse_tree_visitor'

data/spec/rley/lexical/literal_spec.rb ADDED Viewed

@@ -0,0 +1,33 @@
+# frozen_string_literal: true
+require_relative '../../spec_helper'
+require_relative '../../../lib/rley/syntax/terminal'
+# Load the class under test
+require_relative '../../../lib/rley/lexical/literal'
+module Rley # Open this namespace to avoid module qualifier prefixes
+  module Lexical # Open this namespace to avoid module qualifier prefixes
+    describe Literal do
+      let(:lexeme) { '12.34' }
+      let(:a_terminal) { Syntax::Terminal.new('NUMBER') }
+      let(:a_pos) { Position.new(3, 4) }
+      context 'Initialization:' do
+        # Default instantiation rule
+        subject { Literal.new(lexeme.to_f, lexeme, a_terminal, a_pos) }
+        it 'should be created with a value, lexeme, terminal and position' do
+          expect { Literal.new(lexeme.to_f, lexeme, a_terminal, a_pos) }.not_to raise_error
+        end
+        it 'should know its value' do
+          expect(subject.value).to eq(lexeme.to_f)
+        end
+      end # context
+    end # describe
+  end # module
+end # module
+# End of file

data/spec/rley/lexical/token_spec.rb CHANGED Viewed

@@ -13,12 +13,15 @@ module Rley # Open this namespace to avoid module qualifier prefixes
       let(:lexeme) { '"some text"' }
       let(:a_terminal) { Syntax::Terminal.new('if') }
       let(:a_pos) { Position.new(3, 4) }
+      # Default instantiation rule
+      subject { Token.new(lexeme, a_terminal, a_pos) }
       context 'Initialization:' do
-        # Default instantiation rule
-        subject { Token.new(lexeme, a_terminal, a_pos) }
+        it 'could be created with a lexeme and a terminal ' do
+          expect { Token.new(lexeme, a_terminal) }.not_to raise_error
+        end
-        it 'should be created with a lexeme, a terminal and position' do
+        it 'could be created with a lexeme, a terminal and position' do
           expect { Token.new(lexeme, a_terminal, a_pos) }.not_to raise_error
         end
@@ -30,7 +33,15 @@ module Rley # Open this namespace to avoid module qualifier prefixes
           expect(subject.terminal).to eq(a_terminal)
         end
-        it 'should know its terminal' do
+        it 'should know its position' do
+          new_pos = Position.new(5, 7)
+          subject.position = new_pos
+          expect(subject.position).to eq(new_pos)
+        end
+      end # context
+      context 'Initialization:' do
+        it 'should accept a new position' do
           expect(subject.position).to eq(a_pos)
         end
       end # context

data/spec/rley/notation/grammar_builder_spec.rb CHANGED Viewed

@@ -55,8 +55,8 @@ module Rley # Open this namespace to avoid module qualifier prefixes
         it 'should accept already built terminals' do
           a = Syntax::Terminal.new('a')
-          b = Syntax::VerbatimSymbol.new('b')
-          c = Syntax::Literal.new('c', /c/)
+          b = Syntax::Terminal.new('b')
+          c = Syntax::Terminal.new('c')
           subject.add_terminals(a, b, c)
           expect(subject.symbols.size).to eq(3)