RubyGems - lrama - Versions diffs - 0.5.5 → 0.5.7 - Mend

lrama 0.5.5 → 0.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

checksums.yaml +4 -4
data/.codespellignore +0 -0
data/.github/workflows/codespell.yaml +16 -0
data/.github/workflows/test.yaml +23 -7
data/.gitignore +1 -0
data/Gemfile +1 -0
data/README.md +69 -3
data/Rakefile +12 -0
data/Steepfile +3 -0
data/exe/lrama +1 -1
data/lib/lrama/command.rb +16 -137
data/lib/lrama/context.rb +4 -4
data/lib/lrama/digraph.rb +1 -2
data/lib/lrama/grammar/union.rb +2 -2
data/lib/lrama/grammar.rb +187 -1
data/lib/lrama/lexer/token.rb +7 -1
data/lib/lrama/lexer.rb +131 -289
data/lib/lrama/option_parser.rb +128 -0
data/lib/lrama/options.rb +23 -0
data/lib/lrama/output.rb +27 -15
data/lib/lrama/parser.rb +1759 -255
data/lib/lrama/version.rb +1 -1
data/lib/lrama.rb +2 -0
data/parser.y +416 -0
data/rbs_collection.lock.yaml +1 -1
data/sample/calc.y +0 -2
data/sample/parse.y +0 -3
data/sig/lrama/digraph.rbs +23 -0
data/sig/lrama/lexer/token/type.rbs +17 -0
data/template/bison/_yacc.h +71 -0
data/template/bison/yacc.c +6 -71
data/template/bison/yacc.h +1 -73
metadata +11 -4
data/lib/lrama/parser/token_scanner.rb +0 -56

data/lib/lrama/lexer/token.rb CHANGED Viewed

@@ -28,7 +28,13 @@ module Lrama
               if lhs.referred_by?(ref_name)
                 '$'
               else
-                rhs.find_index {|token| token.referred_by?(ref_name) } + 1
+                index = rhs.find_index {|token| token.referred_by?(ref_name) }
+                if index
+                  index + 1
+                else
+                  raise "'#{ref_name}' is invalid name."
+                end
               end
             [ref[0], value, ref[2], ref[3], ref[4]]
           else

data/lib/lrama/lexer.rb CHANGED Viewed

@@ -1,332 +1,174 @@
 require "strscan"
-require "lrama/report/duration"
 require "lrama/lexer/token"
 module Lrama
-  # Lexer for parse.y
   class Lexer
-    include Lrama::Report::Duration
-    # States
-    #
-    # See: https://www.gnu.org/software/bison/manual/html_node/Grammar-Outline.html
-    Initial = 0
-    Prologue = 1
-    BisonDeclarations = 2
-    GrammarRules = 3
-    Epilogue = 4
-    # Token types
-    attr_reader :prologue, :bison_declarations, :grammar_rules, :epilogue,
-                :bison_declarations_tokens, :grammar_rules_tokens
+    attr_accessor :status
+    attr_accessor :end_symbol
+    SYMBOLS = %w(%{ %} %% { } \[ \] : \| ;)
+    PERCENT_TOKENS = %w(
+      %union
+      %token
+      %type
+      %left
+      %right
+      %nonassoc
+      %expect
+      %define
+      %require
+      %printer
+      %lex-param
+      %parse-param
+      %initial-action
+      %precedence
+      %prec
+      %error-token
+    )
     def initialize(text)
-      @text = text
-      @state = Initial
-      # Array of texts
-      @prologue = []
-      @bison_declarations = []
-      @grammar_rules = []
-      @epilogue = []
-      @bison_declarations_tokens = []
-      @grammar_rules_tokens = []
-      @debug = false
+      @scanner = StringScanner.new(text)
+      @head = @scanner.pos
+      @line = 1
+      @status = :initial
+      @end_symbol = nil
+    end
-      report_duration(:lex) do
-        lex_text
-        lex_bison_declarations_tokens
-        lex_grammar_rules_tokens
+    def next_token
+      case @status
+      when :initial
+        lex_token
+      when :c_declaration
+        lex_c_code
       end
     end
-    private
-    def create_token(type, s_value, line, column)
-      t = Token.new(type: type, s_value: s_value)
-      t.line = line
-      t.column = column
-      return t
+    def line
+      @line
     end
-    # TODO: Remove this
-    def lex_text
-      @text.each_line.with_index(1) do |string, lineno|
-        case @state
-        when Initial
-          # Skip until "%{"
-          if string == "%{\n"
-            @state = Prologue
-            @prologue << ["", lineno]
-            next
-          end
-        when Prologue
-          # Between "%{" and "%}"
-          if string == "%}\n"
-            @state = BisonDeclarations
-            @prologue << ["", lineno]
-            next
-          end
-          @prologue << [string, lineno]
-        when BisonDeclarations
-          if string == "%%\n"
-            @state = GrammarRules
-            next
-          end
-          @bison_declarations << [string, lineno]
-        when GrammarRules
-          # Between "%%" and "%%"
-          if string == "%%\n"
-            @state = Epilogue
-            next
-          end
-          @grammar_rules << [string, lineno]
-        when Epilogue
-          @epilogue << [string, lineno]
-        else
-          raise "Unknown state: #{@state}"
-        end
-      end
+    def column
+      @scanner.pos - @head
     end
-    # See:
-    #   * https://www.gnu.org/software/bison/manual/html_node/Decl-Summary.html
-    #   * https://www.gnu.org/software/bison/manual/html_node/Symbol-Decls.html
-    #   * https://www.gnu.org/software/bison/manual/html_node/Empty-Rules.html
-    def lex_common(lines, tokens)
-      line = lines.first[1]
-      column = 0
-      ss = StringScanner.new(lines.map(&:first).join)
-      while !ss.eos? do
+    def lex_token
+      while !@scanner.eos? do
         case
-        when ss.scan(/\n/)
-          line += 1
-          column = ss.pos
-        when ss.scan(/\s+/)
-          # skip
-        when ss.scan(/;/)
-          tokens << create_token(Token::Semicolon, ss[0], line, ss.pos - column)
-        when ss.scan(/\|/)
-          tokens << create_token(Token::Bar, ss[0], line, ss.pos - column)
-        when ss.scan(/(\d+)/)
-          tokens << create_token(Token::Number, Integer(ss[0]), line, ss.pos - column)
-        when ss.scan(/(<[a-zA-Z0-9_]+>)/)
-          tokens << create_token(Token::Tag, ss[0], line, ss.pos - column)
-        when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]\s*:/)
-          tokens << create_token(Token::Ident_Colon, ss[1], line, ss.pos - column)
-          tokens << create_token(Token::Named_Ref, ss[2], line, ss.pos - column)
-        when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)\s*:/)
-          tokens << create_token(Token::Ident_Colon, ss[1], line, ss.pos - column)
-        when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)/)
-          tokens << create_token(Token::Ident, ss[0], line, ss.pos - column)
-        when ss.scan(/\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/)
-          tokens << create_token(Token::Named_Ref, ss[1], line, ss.pos - column)
-        when ss.scan(/%expect/)
-          tokens << create_token(Token::P_expect, ss[0], line, ss.pos - column)
-        when ss.scan(/%define/)
-          tokens << create_token(Token::P_define, ss[0], line, ss.pos - column)
-        when ss.scan(/%printer/)
-          tokens << create_token(Token::P_printer, ss[0], line, ss.pos - column)
-        when ss.scan(/%error-token/)
-          tokens << create_token(Token::P_error_token, ss[0], line, ss.pos - column)
-        when ss.scan(/%lex-param/)
-          tokens << create_token(Token::P_lex_param, ss[0], line, ss.pos - column)
-        when ss.scan(/%parse-param/)
-          tokens << create_token(Token::P_parse_param, ss[0], line, ss.pos - column)
-        when ss.scan(/%initial-action/)
-          tokens << create_token(Token::P_initial_action, ss[0], line, ss.pos - column)
-        when ss.scan(/%union/)
-          tokens << create_token(Token::P_union, ss[0], line, ss.pos - column)
-        when ss.scan(/%token/)
-          tokens << create_token(Token::P_token, ss[0], line, ss.pos - column)
-        when ss.scan(/%type/)
-          tokens << create_token(Token::P_type, ss[0], line, ss.pos - column)
-        when ss.scan(/%nonassoc/)
-          tokens << create_token(Token::P_nonassoc, ss[0], line, ss.pos - column)
-        when ss.scan(/%left/)
-          tokens << create_token(Token::P_left, ss[0], line, ss.pos - column)
-        when ss.scan(/%right/)
-          tokens << create_token(Token::P_right, ss[0], line, ss.pos - column)
-        when ss.scan(/%precedence/)
-          tokens << create_token(Token::P_precedence, ss[0], line, ss.pos - column)
-        when ss.scan(/%prec/)
-          tokens << create_token(Token::P_prec, ss[0], line, ss.pos - column)
-        when ss.scan(/{/)
-          token, line = lex_user_code(ss, line, ss.pos - column, lines)
-          tokens << token
-        when ss.scan(/"/)
-          string, line = lex_string(ss, "\"", line, lines)
-          token = create_token(Token::String, string, line, ss.pos - column)
-          tokens << token
-        when ss.scan(/\/\*/)
-          # TODO: Need to keep comment?
-          line = lex_comment(ss, line, lines, "")
-        when ss.scan(/\/\//)
-          line = lex_line_comment(ss, line, "")
-        when ss.scan(/'(.)'/)
-          tokens << create_token(Token::Char, ss[0], line, ss.pos - column)
-        when ss.scan(/'\\(.)'/) # '\\', '\t'
-          tokens << create_token(Token::Char, ss[0], line, ss.pos - column)
-        when ss.scan(/'\\(\d+)'/) # '\13'
-          tokens << create_token(Token::Char, ss[0], line, ss.pos - column)
-        when ss.scan(/%empty/)
-          # skip
+        when @scanner.scan(/\n/)
+          newline
+        when @scanner.scan(/\s+/)
+          # noop
+        when @scanner.scan(/\/\*/)
+          lex_comment
+        when @scanner.scan(/\/\//)
+          @scanner.scan_until(/\n/)
+          newline
+        when @scanner.scan(/%empty/)
+          # noop
         else
-          l = line - lines.first[1]
-          split = ss.string.split("\n")
-          col = ss.pos - split[0...l].join("\n").length
-          raise "Parse error (unknown token): #{split[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{col})"
+          break
         end
       end
-    end
-    def lex_bison_declarations_tokens
-      lex_common(@bison_declarations, @bison_declarations_tokens)
+      @head_line = line
+      @head_column = column
+      case
+      when @scanner.eos?
+        return
+      when @scanner.scan(/#{SYMBOLS.join('|')}/)
+        return [@scanner.matched, @scanner.matched]
+      when @scanner.scan(/#{PERCENT_TOKENS.join('|')}/)
+        return [@scanner.matched, @scanner.matched]
+      when @scanner.scan(/<\w+>/)
+        return [:TAG, build_token(type: Token::Tag, s_value: @scanner.matched)]
+      when @scanner.scan(/'.'/)
+        return [:CHARACTER, build_token(type: Token::Char, s_value: @scanner.matched)]
+      when @scanner.scan(/'\\\\'|'\\b'|'\\t'|'\\f'|'\\r'|'\\n'|'\\v'|'\\13'/)
+        return [:CHARACTER, build_token(type: Token::Char, s_value: @scanner.matched)]
+      when @scanner.scan(/"/)
+        return [:STRING, %Q("#{@scanner.scan_until(/"/)})]
+      when @scanner.scan(/\d+/)
+        return [:INTEGER, Integer(@scanner.matched)]
+      when @scanner.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)/)
+        token = build_token(type: Token::Ident, s_value: @scanner.matched)
+        type =
+          if @scanner.check(/\s*(\[\s*[a-zA-Z_.][-a-zA-Z0-9_.]*\s*\])?\s*:/)
+            :IDENT_COLON
+          else
+            :IDENTIFIER
+          end
+        return [type, token]
+      else
+        raise
+      end
     end
-    def lex_user_code(ss, line, column, lines)
-      first_line = line
-      first_column = column
-      debug("Enter lex_user_code: #{line}")
-      brace_count = 1
-      str = "{"
-      # Array of [type, $n, tag, first column, last column]
-      # TODO: Is it better to keep string, like "$$", and use gsub?
-      references = []
-      while !ss.eos? do
+    def lex_c_code
+      nested = 0
+      code = ''
+      while !@scanner.eos? do
         case
-        when ss.scan(/\n/)
-          line += 1
-        when ss.scan(/"/)
-          string, line = lex_string(ss, "\"", line, lines)
-          str << string
-          next
-        when ss.scan(/'/)
-          string, line = lex_string(ss, "'", line, lines)
-          str << string
-          next
-        when ss.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
-          tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
-          references << [:dollar, "$", tag, str.length, str.length + ss[0].length - 1]
-        when ss.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
-          tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
-          references << [:dollar, Integer(ss[2]), tag, str.length, str.length + ss[0].length - 1]
-        when ss.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_.][-a-zA-Z0-9_.]*)/) # $foo, $expr, $<long>program
-          tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
-          references << [:dollar, ss[2], tag, str.length, str.length + ss[0].length - 1]
-        when ss.scan(/@\$/) # @$
-          references << [:at, "$", nil, str.length, str.length + ss[0].length - 1]
-        when ss.scan(/@(\d+)/) # @1
-          references << [:at, Integer(ss[1]), nil, str.length, str.length + ss[0].length - 1]
-        when ss.scan(/{/)
-          brace_count += 1
-        when ss.scan(/}/)
-          brace_count -= 1
-          debug("Return lex_user_code: #{line}")
-          if brace_count == 0
-            str << ss[0]
-            user_code = Token.new(type: Token::User_code, s_value: str.freeze)
-            user_code.line = first_line
-            user_code.column = first_column
-            user_code.references = references
-            return [user_code, line]
+        when @scanner.scan(/{/)
+          code += @scanner.matched
+          nested += 1
+        when @scanner.scan(/}/)
+          if nested == 0 && @end_symbol == '}'
+            @scanner.unscan
+            return [:C_DECLARATION, build_token(type: Token::User_code, s_value: code, references: [])]
+          else
+            code += @scanner.matched
+            nested -= 1
           end
-        when ss.scan(/\/\*/)
-          str << ss[0]
-          line = lex_comment(ss, line, lines, str)
-        when ss.scan(/\/\//)
-          str << ss[0]
-          line = lex_line_comment(ss, line, str)
+        when @scanner.check(/#{@end_symbol}/)
+          return [:C_DECLARATION, build_token(type: Token::User_code, s_value: code, references: [])]
+        when @scanner.scan(/\n/)
+          code += @scanner.matched
+          newline
+        when @scanner.scan(/"/)
+          matched = @scanner.scan_until(/"/)
+          code += %Q("#{matched})
+          @line += matched.count("\n")
+        when @scanner.scan(/'/)
+          matched = @scanner.scan_until(/'/)
+          code += %Q('#{matched})
         else
-          # noop, just consume char
-          str << ss.getch
-          next
+          code += @scanner.getch
         end
-        str << ss[0]
       end
-      # Reach to end of input but brace does not match
-      l = line - lines.first[1]
-      raise "Parse error (brace mismatch): #{ss.string.split("\n")[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{ss.pos})"
+      raise
     end
-    def lex_string(ss, terminator, line, lines)
-      debug("Enter lex_string: #{line}")
-      str = terminator.dup
-      while (c = ss.getch) do
-        str << c
-        case c
-        when "\n"
-          line += 1
-        when terminator
-          debug("Return lex_string: #{line}")
-          return [str, line]
-        else
-          # noop
-        end
-      end
-      # Reach to end of input but quote does not match
-      l = line - lines.first[1]
-      raise "Parse error (quote mismatch): #{ss.string.split("\n")[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{ss.pos})"
-    end
+    private
-    # /*  */ style comment
-    def lex_comment(ss, line, lines, str)
-      while !ss.eos? do
+    def lex_comment
+      while !@scanner.eos? do
         case
-        when ss.scan(/\n/)
-          line += 1
-        when ss.scan(/\*\//)
-          return line
+        when @scanner.scan(/\n/)
+          @line += 1
+          @head = @scanner.pos + 1
+        when @scanner.scan(/\*\//)
+          return
         else
-          str << ss.getch
-          next
+          @scanner.getch
         end
-        str << ss[0]
       end
-      # Reach to end of input but quote does not match
-      l = line - lines.first[1]
-      raise "Parse error (comment mismatch): #{ss.string.split("\n")[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{ss.pos})"
     end
-    # // style comment
-    def lex_line_comment(ss, line, str)
-      while !ss.eos? do
-        case
-        when ss.scan(/\n/)
-          return line + 1
-        else
-          str << ss.getch
-          next
-        end
+    def build_token(type:, s_value:, **options)
+      token = Token.new(type: type, s_value: s_value)
+      token.line = @head_line
+      token.column = @head_column
+      options.each do |attr, value|
+        token.public_send("#{attr}=", value)
       end
-      line # Reach to end of input
-    end
-    def lex_grammar_rules_tokens
-      lex_common(@grammar_rules, @grammar_rules_tokens)
+      token
     end
-    def debug(msg)
-      return unless @debug
-      puts "#{msg}\n"
+    def newline
+      @line += 1
+      @head = @scanner.pos + 1
     end
   end
 end

data/lib/lrama/option_parser.rb ADDED Viewed

@@ -0,0 +1,128 @@
+require 'optparse'
+module Lrama
+  # Handle option parsing for the command line interface.
+  class OptionParser
+    def initialize
+      @options = Options.new
+      @trace = []
+      @report = []
+    end
+    def parse(argv)
+      parse_by_option_parser(argv)
+      @options.trace_opts = validate_trace(@trace)
+      @options.report_opts = validate_report(@report)
+      @options.grammar_file = argv.shift
+      if !@options.grammar_file
+        abort "File should be specified\n"
+      end
+      if @options.grammar_file == '-'
+        @options.grammar_file = argv.shift or abort "File name for STDIN should be specified\n"
+      else
+        @options.y = File.open(@options.grammar_file, 'r')
+      end
+      if !@report.empty? && @options.report_file.nil? && @options.grammar_file
+        @options.report_file = File.dirname(@options.grammar_file) + "/" + File.basename(@options.grammar_file, ".*") + ".output"
+      end
+      if !@options.header_file && @options.header
+        case
+        when @options.outfile
+          @options.header_file = File.dirname(@options.outfile) + "/" + File.basename(@options.outfile, ".*") + ".h"
+        when @options.grammar_file
+          @options.header_file = File.dirname(@options.grammar_file) + "/" + File.basename(@options.grammar_file, ".*") + ".h"
+        end
+      end
+      @options
+    end
+    private
+    def parse_by_option_parser(argv)
+      ::OptionParser.new do |o|
+        o.banner = <<~BANNER
+          Lrama is LALR (1) parser generator written by Ruby.
+          Usage: lrama [options] FILE
+        BANNER
+        o.separator ''
+        o.separator 'STDIN mode:'
+        o.separator 'lrama [options] - FILE               read grammar from STDIN'
+        o.separator ''
+        o.separator 'Tuning the Parser:'
+        o.on('-S', '--skeleton=FILE', 'specify the skeleton to use') {|v| @options.skeleton = v }
+        o.on('-t', 'reserved, do nothing') { }
+        o.separator ''
+        o.separator 'Output:'
+        o.on('-H', '--header=[FILE]', 'also produce a header file named FILE') {|v| @options.header = true; @options.header_file = v }
+        o.on('-h=[FILE]', 'also produce a header file named FILE (deprecated)') {|v| @options.header = true; @options.header_file = v }
+        o.on('-d', 'also produce a header file') { @options.header = true }
+        o.on('-r', '--report=THINGS', Array, 'also produce details on the automaton') {|v| @report = v }
+        o.on('--report-file=FILE', 'also produce details on the automaton output to a file named FILE') {|v| @options.report_file = v }
+        o.on('-o', '--output=FILE', 'leave output to FILE') {|v| @options.outfile = v }
+        o.on('--trace=THINGS', Array, 'also output trace logs at runtime') {|v| @trace = v }
+        o.on('-v', 'reserved, do nothing') { }
+        o.separator ''
+        o.separator 'Error Recovery:'
+        o.on('-e', 'enable error recovery') {|v| @options.error_recovery = true }
+        o.separator ''
+        o.separator 'Other options:'
+        o.on('-V', '--version', "output version information and exit") {|v| puts "lrama #{Lrama::VERSION}"; exit 0 }
+        o.on('--help', "display this help and exit") {|v| puts o; exit 0 }
+        o.separator ''
+        o.parse!(argv)
+      end
+    end
+    def validate_report(report)
+      bison_list = %w[states itemsets lookaheads solved counterexamples cex all none]
+      others = %w[verbose]
+      list = bison_list + others
+      not_supported = %w[cex none]
+      h = { grammar: true }
+      report.each do |r|
+        if list.include?(r) && !not_supported.include?(r)
+          h[r.to_sym] = true
+        else
+          raise "Invalid report option \"#{r}\"."
+        end
+      end
+      if h[:all]
+        (bison_list - not_supported).each do |r|
+          h[r.to_sym] = true
+        end
+        h.delete(:all)
+      end
+      return h
+    end
+    def validate_trace(trace)
+      list = %w[
+        none locations scan parse automaton bitsets
+        closure grammar resource sets muscles tools
+        m4-early m4 skeleton time ielr cex all
+      ]
+      h = {}
+      trace.each do |t|
+        if list.include?(t)
+          h[t.to_sym] = true
+        else
+          raise "Invalid trace option \"#{t}\"."
+        end
+      end
+      return h
+    end
+  end
+end

data/lib/lrama/options.rb ADDED Viewed

@@ -0,0 +1,23 @@
+module Lrama
+  # Command line options.
+  class Options
+    attr_accessor :skeleton, :header, :header_file,
+                  :report_file, :outfile,
+                  :error_recovery, :grammar_file,
+                  :report_file, :trace_opts, :report_opts, :y
+    def initialize
+      @skeleton = "bison/yacc.c"
+      @header = false
+      @header_file = nil
+      @report_file = nil
+      @outfile = "y.tab.c"
+      @error_recovery = false
+      @grammar_file = nil
+      @report_file = nil
+      @trace_opts = nil
+      @report_opts = nil
+      @y = STDIN
+    end
+  end
+end