RubyGems - lrama - Versions diffs - 0.5.11 → 0.6.0 - Mend

lrama 0.5.11 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

checksums.yaml +4 -4
data/.github/workflows/test.yaml +2 -2
data/Gemfile +1 -1
data/LEGAL.md +1 -0
data/NEWS.md +187 -0
data/README.md +15 -4
data/Steepfile +3 -0
data/lib/lrama/grammar/code/printer_code.rb +1 -1
data/lib/lrama/grammar/code/rule_action.rb +19 -3
data/lib/lrama/grammar/code.rb +19 -7
data/lib/lrama/grammar/parameterizing_rule.rb +6 -0
data/lib/lrama/grammar/parameterizing_rule_builder.rb +34 -0
data/lib/lrama/grammar/parameterizing_rule_resolver.rb +30 -0
data/lib/lrama/grammar/parameterizing_rule_rhs_builder.rb +53 -0
data/lib/lrama/grammar/rule_builder.rb +26 -22
data/lib/lrama/grammar.rb +15 -41
data/lib/lrama/lexer/grammar_file.rb +21 -0
data/lib/lrama/lexer/location.rb +77 -2
data/lib/lrama/lexer/token/instantiate_rule.rb +18 -0
data/lib/lrama/lexer/token/user_code.rb +10 -10
data/lib/lrama/lexer/token.rb +1 -1
data/lib/lrama/lexer.rb +21 -11
data/lib/lrama/parser.rb +619 -454
data/lib/lrama/states_reporter.rb +1 -1
data/lib/lrama/version.rb +1 -1
data/parser.y +95 -30
data/sig/lrama/grammar/code/printer_code.rbs +1 -1
data/sig/lrama/grammar/code.rbs +5 -5
data/sig/lrama/grammar/parameterizing_rule.rbs +10 -0
data/sig/lrama/grammar/parameterizing_rule_builder.rbs +19 -0
data/sig/lrama/grammar/parameterizing_rule_resolver.rbs +16 -0
data/sig/lrama/grammar/parameterizing_rule_rhs_builder.rbs +18 -0
data/sig/lrama/grammar/parameterizing_rules/builder/base.rbs +5 -3
data/sig/lrama/grammar/parameterizing_rules/builder/separated_list.rbs +2 -0
data/sig/lrama/grammar/parameterizing_rules/builder/separated_nonempty_list.rbs +2 -0
data/sig/lrama/grammar/parameterizing_rules/builder.rbs +4 -3
data/sig/lrama/grammar/rule_builder.rbs +2 -4
data/sig/lrama/lexer/grammar_file.rbs +15 -0
data/sig/lrama/lexer/location.rbs +13 -1
data/sig/lrama/lexer/token/instantiate_rule.rbs +12 -0
metadata +16 -6
data/doc/TODO.md +0 -59
data/lib/lrama/lexer/token/parameterizing.rb +0 -34
data/sig/lrama/lexer/token/parameterizing.rbs +0 -17

data/lib/lrama/grammar.rb CHANGED Viewed

@@ -8,6 +8,10 @@ require "lrama/grammar/printer"
 require "lrama/grammar/reference"
 require "lrama/grammar/rule"
 require "lrama/grammar/rule_builder"
+require "lrama/grammar/parameterizing_rule_builder"
+require "lrama/grammar/parameterizing_rule_resolver"
+require "lrama/grammar/parameterizing_rule_rhs_builder"
+require "lrama/grammar/parameterizing_rule"
 require "lrama/grammar/symbol"
 require "lrama/grammar/type"
 require "lrama/grammar/union"
@@ -36,6 +40,7 @@ module Lrama
       @rule_builders = []
       @rules = []
       @sym_to_rules = {}
+      @parameterizing_resolver = ParameterizingRuleResolver.new
       @empty_symbol = nil
       @eof_symbol = nil
       @error_symbol = nil
@@ -69,7 +74,7 @@ module Lrama
         return sym
       end
-      if sym = @symbols.find {|s| s.id == id }
+      if (sym = @symbols.find {|s| s.id == id })
         return sym
       end
@@ -129,6 +134,10 @@ module Lrama
       @rule_builders << builder
     end
+    def add_parameterizing_rule_builder(builder)
+      @parameterizing_resolver.add_parameterizing_rule_builder(builder)
+    end
     def prologue_first_lineno=(prologue_first_lineno)
       @aux.prologue_first_lineno = prologue_first_lineno
     end
@@ -310,7 +319,7 @@ module Lrama
     def setup_rules
       @rule_builders.each do |builder|
-        builder.setup_rules
+        builder.setup_rules(@parameterizing_resolver)
       end
     end
@@ -350,56 +359,21 @@ module Lrama
       @accept_symbol = term
     end
-    # 1. Add $accept rule to the top of rules
-    # 2. Extract action in the middle of RHS into new Empty rule
-    # 3. Append id and extract action then create Rule
-    #
-    # Bison 3.8.2 uses different orders for symbol number and rule number
-    # when a rule has actions in the middle of a rule.
-    #
-    # For example,
-    #
-    # `program: $@1 top_compstmt`
-    #
-    # Rules are ordered like below,
-    #
-    # 1 $@1: ε
-    # 2 program: $@1 top_compstmt
-    #
-    # Symbols are ordered like below,
-    #
-    # 164 program
-    # 165 $@1
-    #
     def normalize_rules
-      # 1. Add $accept rule to the top of rules
-      accept = @accept_symbol
-      eof = @eof_symbol
+      # Add $accept rule to the top of rules
       lineno = @rule_builders.first ? @rule_builders.first.line : 0
-      @rules << Rule.new(id: @rule_counter.increment, _lhs: accept.id, _rhs: [@rule_builders.first.lhs, eof.id], token_code: nil, lineno: lineno)
+      @rules << Rule.new(id: @rule_counter.increment, _lhs: @accept_symbol.id, _rhs: [@rule_builders.first.lhs, @eof_symbol.id], token_code: nil, lineno: lineno)
       setup_rules
       @rule_builders.each do |builder|
-        # Extract actions in the middle of RHS into new rules.
-        builder.midrule_action_rules.each do |rule|
-          @rules << rule
-        end
         builder.rules.each do |rule|
-          add_nterm(id: rule._lhs)
-          @rules << rule
-        end
-        builder.parameterizing_rules.each do |rule|
           add_nterm(id: rule._lhs, tag: rule.lhs_tag)
           @rules << rule
         end
-        builder.midrule_action_rules.each do |rule|
-          add_nterm(id: rule._lhs)
-        end
       end
+      @rules.sort_by!(&:id)
     end
     # Collect symbols from rules

data/lib/lrama/lexer/grammar_file.rb ADDED Viewed

@@ -0,0 +1,21 @@
+module Lrama
+  class Lexer
+    class GrammarFile
+      attr_reader :path, :text
+      def initialize(path, text)
+        @path = path
+        @text = text
+      end
+      def ==(other)
+        self.class == other.class &&
+        self.path == other.path
+      end
+      def lines
+        @lines ||= text.split("\n")
+      end
+    end
+  end
+end

data/lib/lrama/lexer/location.rb CHANGED Viewed

@@ -1,9 +1,10 @@
 module Lrama
   class Lexer
     class Location
-      attr_reader :first_line, :first_column, :last_line, :last_column
+      attr_reader :grammar_file, :first_line, :first_column, :last_line, :last_column
-      def initialize(first_line:, first_column:, last_line:, last_column:)
+      def initialize(grammar_file:, first_line:, first_column:, last_line:, last_column:)
+        @grammar_file = grammar_file
         @first_line = first_line
         @first_column = first_column
         @last_line = last_line
@@ -12,11 +13,85 @@ module Lrama
       def ==(other)
         self.class == other.class &&
+        self.grammar_file == other.grammar_file &&
         self.first_line == other.first_line &&
         self.first_column == other.first_column &&
         self.last_line == other.last_line &&
         self.last_column == other.last_column
       end
+      def partial_location(left, right)
+        offset = -first_column
+        new_first_line = -1
+        new_first_column = -1
+        new_last_line = -1
+        new_last_column = -1
+        _text.each.with_index do |line, index|
+          new_offset = offset + line.length + 1
+          if offset <= left && left <= new_offset
+            new_first_line = first_line + index
+            new_first_column = left - offset
+          end
+          if offset <= right && right <= new_offset
+            new_last_line = first_line + index
+            new_last_column = right - offset
+          end
+          offset = new_offset
+        end
+        Location.new(
+          grammar_file: grammar_file,
+          first_line: new_first_line, first_column: new_first_column,
+          last_line: new_last_line, last_column: new_last_column
+        )
+      end
+      def to_s
+        "#{path} (#{first_line},#{first_column})-(#{last_line},#{last_column})"
+      end
+      def generate_error_message(error_message)
+        <<~ERROR.chomp
+          #{path}:#{first_line}:#{first_column}: #{error_message}
+          #{line_with_carets}
+        ERROR
+      end
+      def line_with_carets
+        <<~TEXT
+          #{text}
+          #{carets}
+        TEXT
+      end
+      private
+      def path
+        grammar_file.path
+      end
+      def blanks
+        (text[0...first_column] or raise "#{first_column} is invalid").gsub(/[^\t]/, ' ')
+      end
+      def carets
+        blanks + '^' * (last_column - first_column)
+      end
+      def text
+        @text ||= _text.join("\n")
+      end
+      def _text
+        @_text ||=begin
+          range = (first_line - 1)...last_line
+          grammar_file.lines[range] or raise "#{range} is invalid"
+        end
+      end
     end
   end
 end

data/lib/lrama/lexer/token/instantiate_rule.rb ADDED Viewed

@@ -0,0 +1,18 @@
+module Lrama
+  class Lexer
+    class Token
+      class InstantiateRule < Token
+        attr_accessor :args
+        def initialize(s_value:, alias_name: nil, location: nil, args: [])
+          super s_value: s_value, alias_name: alias_name, location: location
+          @args = args
+        end
+        def rule_name
+          s_value
+        end
+      end
+    end
+  end
+end

data/lib/lrama/lexer/token/user_code.rb CHANGED Viewed

@@ -35,27 +35,27 @@ module Lrama
           # It need to wrap an identifier with brackets to use ".-" for identifiers
           when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
             tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
-            return Lrama::Grammar::Reference.new(type: :dollar, name: "$", ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
+            return Lrama::Grammar::Reference.new(type: :dollar, name: "$", ex_tag: tag, first_column: start, last_column: scanner.pos)
           when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
             tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
-            return Lrama::Grammar::Reference.new(type: :dollar, index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
+            return Lrama::Grammar::Reference.new(type: :dollar, index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos)
           when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
             tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
-            return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
-          when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
+            return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos)
+          when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $[expr.right], $[expr-right], $<long>[expr.right] (named reference with brackets)
             tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
-            return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
+            return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos)
           # @ references
           # It need to wrap an identifier with brackets to use ".-" for identifiers
           when scanner.scan(/@\$/) # @$
-            return Lrama::Grammar::Reference.new(type: :at, name: "$", first_column: start, last_column: scanner.pos - 1)
+            return Lrama::Grammar::Reference.new(type: :at, name: "$", first_column: start, last_column: scanner.pos)
           when scanner.scan(/@(\d+)/) # @1
-            return Lrama::Grammar::Reference.new(type: :at, index: Integer(scanner[1]), first_column: start, last_column: scanner.pos - 1)
+            return Lrama::Grammar::Reference.new(type: :at, index: Integer(scanner[1]), first_column: start, last_column: scanner.pos)
           when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
-            return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos - 1)
-          when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right  (named reference with brackets)
-            return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos - 1)
+            return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos)
+          when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @[expr.right], @[expr-right]  (named reference with brackets)
+            return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos)
           end
         end
       end

data/lib/lrama/lexer/token.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 require 'lrama/lexer/token/char'
 require 'lrama/lexer/token/ident'
-require 'lrama/lexer/token/parameterizing'
+require 'lrama/lexer/token/instantiate_rule'
 require 'lrama/lexer/token/tag'
 require 'lrama/lexer/token/user_code'

data/lib/lrama/lexer.rb CHANGED Viewed

@@ -1,4 +1,5 @@
 require "strscan"
+require "lrama/lexer/grammar_file"
 require "lrama/lexer/location"
 require "lrama/lexer/token"
@@ -28,10 +29,12 @@ module Lrama
       %error-token
       %empty
       %code
+      %rule
     )
-    def initialize(text)
-      @scanner = StringScanner.new(text)
+    def initialize(grammar_file)
+      @grammar_file = grammar_file
+      @scanner = StringScanner.new(grammar_file.text)
       @head_column = @head = @scanner.pos
       @head_line = @line = 1
       @status = :initial
@@ -57,8 +60,9 @@ module Lrama
     def location
       Location.new(
+        grammar_file: @grammar_file,
         first_line: @head_line, first_column: @head_column,
-        last_line: @line, last_column: column
+        last_line: line, last_column: column
       )
     end
@@ -78,8 +82,7 @@ module Lrama
         end
       end
-      @head_line = line
-      @head_column = column
+      reset_first_position
       case
       when @scanner.eos?
@@ -117,6 +120,8 @@ module Lrama
     def lex_c_code
       nested = 0
       code = ''
+      reset_first_position
       while !@scanner.eos? do
         case
         when @scanner.scan(/{/)
@@ -140,12 +145,12 @@ module Lrama
           @line += @scanner.matched.count("\n")
         when @scanner.scan(/'.*?'/)
           code += %Q(#{@scanner.matched})
+        when @scanner.scan(/[^\"'\{\}\n]+/)
+          code += @scanner.matched
+        when @scanner.scan(/#{Regexp.escape(@end_symbol)}/)
+          code += @scanner.matched
         else
-          if @scanner.scan(/[^\"'\{\}\n#{@end_symbol}]+/)
-            code += @scanner.matched
-          else
-            code += @scanner.getch
-          end
+          code += @scanner.getch
         end
       end
       raise ParseError, "Unexpected code: #{code}."
@@ -166,9 +171,14 @@ module Lrama
       end
     end
+    def reset_first_position
+      @head_line = line
+      @head_column = column
+    end
     def newline
       @line += 1
-      @head = @scanner.pos + 1
+      @head = @scanner.pos
     end
   end
 end