RubyGems - lrama - Versions diffs - 0.1.0 - Mend

lrama 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

data/lib/lrama/lexer.rb ADDED Viewed

@@ -0,0 +1,349 @@
+require "strscan"
+require "lrama/report"
+module Lrama
+  # Lexer for parse.y
+  class Lexer
+    include Lrama::Report::Duration
+    # s_value is semantic value
+    Token = Struct.new(:type, :s_value, keyword_init: true) do
+      Type = Struct.new(:id, :name, keyword_init: true)
+      attr_accessor :line, :column, :referred
+      # For User_code
+      attr_accessor :references
+      def to_s
+        "#{super} line: #{line}, column: #{column}"
+      end
+      @i = 0
+      @types = []
+      def self.define_type(name)
+        type = Type.new(id: @i, name: name.to_s)
+        const_set(name, type)
+        @types << type
+        @i += 1
+      end
+      # Token types
+      define_type(:P_expect)         # %expect
+      define_type(:P_define)         # %define
+      define_type(:P_printer)        # %printer
+      define_type(:P_lex_param)      # %lex-param
+      define_type(:P_parse_param)    # %parse-param
+      define_type(:P_initial_action) # %initial-action
+      define_type(:P_union)          # %union
+      define_type(:P_token)          # %token
+      define_type(:P_type)           # %type
+      define_type(:P_nonassoc)       # %nonassoc
+      define_type(:P_left)           # %left
+      define_type(:P_right)          # %right
+      define_type(:P_prec)           # %prec
+      define_type(:User_code)        # { ... }
+      define_type(:Tag)              # <int>
+      define_type(:Number)           # 0
+      define_type(:Ident_Colon)      # k_if:, k_if  : (spaces can be there)
+      define_type(:Ident)            # api.pure, tNUMBER
+      define_type(:Semicolon)        # ;
+      define_type(:Bar)              # |
+      define_type(:String)           # "str"
+      define_type(:Char)             # '+'
+    end
+    # States
+    #
+    # See: https://www.gnu.org/software/bison/manual/html_node/Grammar-Outline.html
+    Initial = 0
+    Prologue = 1
+    BisonDeclarations = 2
+    GrammarRules = 3
+    Epilogue = 4
+    # Token types
+    attr_reader :prologue, :bison_declarations, :grammar_rules, :epilogue,
+                :bison_declarations_tokens, :grammar_rules_tokens
+    def initialize(text)
+      @text = text
+      @state = Initial
+      # Array of texts
+      @prologue = []
+      @bison_declarations = []
+      @grammar_rules = []
+      @epilogue = []
+      #
+      @bison_declarations_tokens = []
+      @grammar_rules_tokens = []
+      @debug = false
+      report_duration(:lex) do
+        lex_text
+        lex_bison_declarations_tokens
+        lex_grammar_rules_tokens
+      end
+    end
+    private
+    def create_token(type, s_value, line, column)
+      t = Token.new(type: type, s_value: s_value)
+      t.line = line
+      t.column = column
+      return t
+    end
+    # TODO: Remove this
+    def lex_text
+      @text.each_line.with_index(1) do |string, lineno|
+        case @state
+        when Initial
+          # Skip until "%{"
+          if string == "%{\n"
+            @state = Prologue
+            @prologue << ["", lineno]
+            next
+          end
+        when Prologue
+          # Between "%{" and "%}"
+          if string == "%}\n"
+            @state = BisonDeclarations
+            @prologue << ["", lineno]
+            next
+          end
+          @prologue << [string, lineno]
+        when BisonDeclarations
+          if string == "%%\n"
+            @state = GrammarRules
+            next
+          end
+          @bison_declarations << [string, lineno]
+        when GrammarRules
+          # Between "%%" and "%%"
+          if string == "%%\n"
+            @state = Epilogue
+            next
+          end
+          @grammar_rules << [string, lineno]
+        when Epilogue
+          @epilogue << [string, lineno]
+        else
+          raise "Unknown state: #{@state}"
+        end
+      end
+    end
+    # See:
+    #   * https://www.gnu.org/software/bison/manual/html_node/Decl-Summary.html
+    #   * https://www.gnu.org/software/bison/manual/html_node/Symbol-Decls.html
+    #   * https://www.gnu.org/software/bison/manual/html_node/Empty-Rules.html
+    def lex_common(lines, tokens)
+      line = lines.first[1]
+      column = 0
+      ss = StringScanner.new(lines.map(&:first).join)
+      while !ss.eos? do
+        case
+        when ss.scan(/\n/)
+          line += 1
+          column = ss.pos
+        when ss.scan(/\s+/)
+          # skip
+        when ss.scan(/;/)
+          tokens << create_token(Token::Semicolon, ss[0], line, ss.pos - column)
+        when ss.scan(/\|/)
+          tokens << create_token(Token::Bar, ss[0], line, ss.pos - column)
+        when ss.scan(/(\d+)/)
+          tokens << create_token(Token::Number, Integer(ss[0]), line, ss.pos - column)
+        when ss.scan(/(<[a-zA-Z0-9_]+>)/)
+          tokens << create_token(Token::Tag, ss[0], line, ss.pos - column)
+        when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)\s*:/)
+          tokens << create_token(Token::Ident_Colon, ss[1], line, ss.pos - column)
+        when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)/)
+          tokens << create_token(Token::Ident, ss[0], line, ss.pos - column)
+        when ss.scan(/%expect/)
+          tokens << create_token(Token::P_expect, ss[0], line, ss.pos - column)
+        when ss.scan(/%define/)
+          tokens << create_token(Token::P_define, ss[0], line, ss.pos - column)
+        when ss.scan(/%printer/)
+          tokens << create_token(Token::P_printer, ss[0], line, ss.pos - column)
+        when ss.scan(/%lex-param/)
+          tokens << create_token(Token::P_lex_param, ss[0], line, ss.pos - column)
+        when ss.scan(/%parse-param/)
+          tokens << create_token(Token::P_parse_param, ss[0], line, ss.pos - column)
+        when ss.scan(/%initial-action/)
+          tokens << create_token(Token::P_initial_action, ss[0], line, ss.pos - column)
+        when ss.scan(/%union/)
+          tokens << create_token(Token::P_union, ss[0], line, ss.pos - column)
+        when ss.scan(/%token/)
+          tokens << create_token(Token::P_token, ss[0], line, ss.pos - column)
+        when ss.scan(/%type/)
+          tokens << create_token(Token::P_type, ss[0], line, ss.pos - column)
+        when ss.scan(/%nonassoc/)
+          tokens << create_token(Token::P_nonassoc, ss[0], line, ss.pos - column)
+        when ss.scan(/%left/)
+          tokens << create_token(Token::P_left, ss[0], line, ss.pos - column)
+        when ss.scan(/%right/)
+          tokens << create_token(Token::P_right, ss[0], line, ss.pos - column)
+        when ss.scan(/%prec/)
+          tokens << create_token(Token::P_prec, ss[0], line, ss.pos - column)
+        when ss.scan(/{/)
+          token, line = lex_user_code(ss, line, ss.pos - column, lines)
+          tokens << token
+        when ss.scan(/"/)
+          string, line = lex_string(ss, "\"", line, lines)
+          token = create_token(Token::String, string, line, ss.pos - column)
+          tokens << token
+        when ss.scan(/\/\*/)
+          # TODO: Need to keep comment?
+          line = lex_comment(ss, line, lines, "")
+        when ss.scan(/'(.)'/)
+          tokens << create_token(Token::Char, ss[0], line, ss.pos - column)
+        when ss.scan(/'\\(.)'/) # '\\', '\t'
+          tokens << create_token(Token::Char, ss[0], line, ss.pos - column)
+        when ss.scan(/'\\(\d+)'/) # '\13'
+          tokens << create_token(Token::Char, ss[0], line, ss.pos - column)
+        when ss.scan(/%empty/)
+          # skip
+        else
+          l = line - lines.first[1]
+          split = ss.string.split("\n")
+          col = ss.pos - split[0...l].join("\n").length
+          raise "Parse error (unknow token): #{split[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{col})"
+        end
+      end
+    end
+    def lex_bison_declarations_tokens
+      lex_common(@bison_declarations, @bison_declarations_tokens)
+    end
+    def lex_user_code(ss, line, column, lines)
+      first_line = line
+      first_column = column
+      debug("Enter lex_user_code: #{line}")
+      brace_count = 1
+      str = "{"
+      # Array of [type, $n, tag, first column, last column]
+      # TODO: Is it better to keep string, like "$$", and use gsub?
+      references = []
+      while !ss.eos? do
+        case
+        when ss.scan(/\n/)
+          line += 1
+        when ss.scan(/"/)
+          string, line = lex_string(ss, "\"", line, lines)
+          str << string
+          next
+        when ss.scan(/'/)
+          string, line = lex_string(ss, "'", line, lines)
+          str << string
+          next
+        when ss.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
+          tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
+          references << [:dollar, "$", tag, str.length, str.length + ss[0].length - 1]
+        when ss.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
+          tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
+          references << [:dollar, Integer(ss[2]), tag, str.length, str.length + ss[0].length - 1]
+        when ss.scan(/@\$/) # @$
+          references << [:at, "$", nil, str.length, str.length + ss[0].length - 1]
+        when ss.scan(/@(\d)+/) # @1
+          references << [:at, Integer(ss[1]), nil, str.length, str.length + ss[0].length - 1]
+        when ss.scan(/{/)
+          brace_count += 1
+        when ss.scan(/}/)
+          brace_count -= 1
+          debug("Return lex_user_code: #{line}")
+          if brace_count == 0
+            str << ss[0]
+            user_code = Token.new(type: Token::User_code, s_value: str.freeze)
+            user_code.line = first_line
+            user_code.column = first_column
+            user_code.references = references
+            return [user_code, line]
+          end
+        when ss.scan(/\/\*/)
+          str << ss[0]
+          line = lex_comment(ss, line, lines, str)
+        else
+          # noop, just consume char
+          str << ss.getch
+          next
+        end
+        str << ss[0]
+      end
+      # Reach to end of input but brace does not match
+      l = line - lines.first[1]
+      raise "Parse error (brace mismatch): #{ss.string.split("\n")[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{ss.pos})"
+    end
+    def lex_string(ss, terminator, line, lines)
+      debug("Enter lex_string: #{line}")
+      str = terminator.dup
+      while (c = ss.getch) do
+        str << c
+        case c
+        when "\n"
+          line += 1
+        when terminator
+          debug("Return lex_string: #{line}")
+          return [str, line]
+        else
+          # noop
+        end
+      end
+      # Reach to end of input but quote does not match
+      l = line - lines.first[1]
+      raise "Parse error (quote mismatch): #{ss.string.split("\n")[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{ss.pos})"
+    end
+    # TODO: Need to handle // style comment
+    #
+    # /*  */ style comment
+    def lex_comment(ss, line, lines, str)
+      while !ss.eos? do
+        case
+        when ss.scan(/\n/)
+          line += 1
+        when ss.scan(/\*\//)
+          return line
+        else
+          str << ss.getch
+          next
+        end
+        str << ss[0]
+      end
+      # Reach to end of input but quote does not match
+      l = line - lines.first[1]
+      raise "Parse error (comment mismatch): #{ss.string.split("\n")[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{ss.pos})"
+    end
+    def lex_grammar_rules_tokens
+      lex_common(@grammar_rules, @grammar_rules_tokens)
+    end
+    def debug(msg)
+      return unless @debug
+      puts "#{msg}\n"
+    end
+  end
+end

data/lib/lrama/output.rb ADDED Viewed

@@ -0,0 +1,268 @@
+require "erb"
+require "forwardable"
+require "lrama/report"
+module Lrama
+  class Output
+    extend Forwardable
+    include Report::Duration
+    attr_reader :grammar_file_path, :context, :grammar
+    def_delegators "@context", :yyfinal, :yylast, :yyntokens, :yynnts, :yynrules, :yynstates,
+                               :yymaxutok, :yypact_ninf, :yytable_ninf
+    def_delegators "@grammar", :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol
+    def initialize(out:, output_file_path:, template_name:, grammar_file_path:, header_out: nil, header_file_path: nil, context:, grammar:)
+      @out = out
+      @output_file_path = output_file_path
+      @template_name = template_name
+      @grammar_file_path = grammar_file_path
+      @header_out = header_out
+      @header_file_path = header_file_path
+      @context = context
+      @grammar = grammar
+    end
+    def render
+      report_duration(:render) do
+        erb = ERB.new(File.read(template_file), nil, '-')
+        erb.filename = template_file
+        tmp = erb.result_with_hash(context: @context, output: self)
+        tmp = replace_special_variables(tmp, @output_file_path)
+        @out << tmp
+        if @header_file_path
+          erb = ERB.new(File.read(header_template_file), nil, '-')
+          erb.filename = header_template_file
+          tmp = erb.result_with_hash(context: @context, output: self)
+          tmp = replace_special_variables(tmp, @header_file_path)
+          if @header_out
+            @header_out << tmp
+          else
+            File.open(@header_file_path, "w+") do |f|
+              f << tmp
+            end
+          end
+        end
+      end
+    end
+    # A part of b4_token_enums
+    def token_enums
+      str = ""
+      @context.yytokentype.each do |s_value, token_id, display_name|
+        s = sprintf("%s = %d%s", s_value, token_id, token_id == yymaxutok ? "" : ",")
+        if display_name
+          str << sprintf("    %-30s /* %s  */\n", s, display_name)
+        else
+          str << sprintf("    %s\n", s)
+        end
+      end
+      str
+    end
+    # b4_symbol_enum
+    def symbol_enum
+      str = ""
+      last_sym_number = @context.yysymbol_kind_t.last[1]
+      @context.yysymbol_kind_t.each do |s_value, sym_number, display_name|
+        s = sprintf("%s = %d%s", s_value, sym_number, (sym_number == last_sym_number) ? "" : ",")
+        if display_name
+          str << sprintf("  %-40s /* %s  */\n", s, display_name)
+        else
+          str << sprintf("  %s\n", s)
+        end
+      end
+      str
+    end
+    def yytranslate
+      int_array_to_string(@context.yytranslate)
+    end
+    def yyrline
+      int_array_to_string(@context.yyrline)
+    end
+    def yytname
+      string_array_to_string(@context.yytname) + " YY_NULLPTR"
+    end
+    # b4_int_type_for
+    def int_type_for(ary)
+      min = ary.min
+      max = ary.max
+      case
+      when (-127 <= min && min <= 127) && (-127 <= max && max <= 127)
+        "yytype_int8"
+      when (0 <= min && min <= 255) && (0 <= max && max <= 255)
+        "yytype_uint8"
+      when (-32767 <= min && min <= 32767) && (-32767 <= max && max <= 32767)
+        "yytype_int16"
+      when (0 <= min && min <= 65535) && (0 <= max && max <= 65535)
+        "yytype_uint16"
+      else
+        "int"
+      end
+    end
+    def symbol_actions_for_printer
+      str = ""
+      @grammar.symbols.each do |sym|
+        next unless sym.printer
+        str << <<-STR
+    case #{sym.enum_name}: /* #{sym.comment}  */
+#line #{sym.printer.lineno} "#{@grammar_file_path}"
+         #{sym.printer.translated_code(sym.tag)}
+#line [@oline@] [@ofile@]
+        break;
+        STR
+      end
+      str
+    end
+    # b4_user_actions
+    def user_actions
+      str = ""
+      @context.states.rules.each do |rule|
+        next unless rule.code
+        rule = rule
+        code = rule.code
+        spaces = " " * (code.column - 1)
+        str << <<-STR
+  case #{rule.id + 1}: /* #{rule.as_comment}  */
+#line #{code.line} "#{@grammar_file_path}"
+#{spaces}#{rule.translated_code}
+#line [@oline@] [@ofile@]
+    break;
+        STR
+      end
+      str << <<-STR
+#line [@oline@] [@ofile@]
+      STR
+      str
+    end
+    # b4_parse_param
+    def parse_param
+      # Omit "{}"
+      @grammar.parse_param[1..-2]
+    end
+    # b4_user_formals
+    def user_formals
+      if @grammar.parse_param
+        ", #{parse_param}"
+      else
+        ""
+      end
+    end
+    # b4_table_value_equals
+    def table_value_equals(table, value, literal, symbol)
+      if literal < table.min || table.max < literal
+        "0"
+      else
+        "((#{value}) == #{symbol})"
+      end
+    end
+    def template_basename
+      File.basename(template_file)
+    end
+    def aux
+      @grammar.aux
+    end
+    def int_array_to_string(ary)
+      last = ary.count - 1
+      s = ary.each_with_index.each_slice(10).map do |slice|
+        str = "  "
+        slice.each do |e, i|
+          str << sprintf("%6d%s", e, (i == last) ? "" : ",")
+        end
+        str
+      end
+      s.join("\n")
+    end
+    def spec_mapped_header_file
+      @header_file_path
+    end
+    def b4_cpp_guard__b4_spec_mapped_header_file
+      if @header_file_path
+        "YY_YY_" + @header_file_path.gsub(/[^a-zA-Z_0-9]+/, "_").upcase + "_INCLUDED"
+      else
+        ""
+      end
+    end
+    private
+    def template_file
+      File.join(template_dir, @template_name)
+    end
+    def header_template_file
+      File.join(template_dir, "bison/yacc.h")
+    end
+    def template_dir
+      File.expand_path("../../../template", __FILE__)
+    end
+    def string_array_to_string(ary)
+      str = ""
+      tmp = " "
+      ary.each do |s|
+        s = s.gsub('\\', '\\\\\\\\')
+        s = s.gsub('"', '\\"')
+        if (tmp + s + " \"\",").length > 75
+          str << tmp << "\n"
+          tmp = "  \"#{s}\","
+        else
+          tmp << " \"#{s}\","
+        end
+      end
+      str << tmp
+    end
+    def replace_special_variables(str, ofile)
+      str.each_line.with_index(1).map do |line, i|
+        line.gsub!("[@oline@]", (i + 1).to_s)
+        line.gsub!("[@ofile@]", "\"#{ofile}\"")
+        line
+      end.join
+    end
+  end
+end