RubyGems - yap-shell-parser - Versions diffs - 0.0.1 - Mend

yap-shell-parser 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

checksums.yaml +7 -0
data/.gitignore +14 -0
data/.rspec +2 -0
data/.travis.yml +10 -0
data/Gemfile +4 -0
data/LICENSE.txt +22 -0
data/README.md +33 -0
data/Rakefile +3 -0
data/bin/compile_debug_parser +3 -0
data/bin/compile_parser +3 -0
data/lib/tasks/gem.rake +60 -0
data/lib/yap/shell.rb +2 -0
data/lib/yap/shell/parser.rb +381 -0
data/lib/yap/shell/parser/grammar.y +151 -0
data/lib/yap/shell/parser/lexer.rb +311 -0
data/lib/yap/shell/parser/nodes.rb +205 -0
data/lib/yap/shell/parser/version.rb +5 -0
data/spec/spec_helper.rb +91 -0
data/spec/yap/shell/lexer_spec.rb +697 -0
data/yap-shell-parser.gemspec +25 -0
metadata +109 -0

data/lib/yap/shell/parser/grammar.y ADDED

@@ -0,0 +1,151 @@
+# $Id$
+#
+# convert Array-like string into Ruby's Array.
+class Yap::Shell::Parser
+  token Command LiteralCommand Argument Heredoc InternalEval Separator Conditional Pipe Redirection LValue RValue
+  #
+  # prechigh
+  # #   left '**' '*' '/' '%'
+  # #   left '+' '-'
+  # #   left '&&' '||'
+  # #   left '|' '^' '&'
+  # #   # right Not
+  # left Separator
+  # left Conditional
+  # right Pipe
+  # preclow
+rule
+program : stmts
+stmts : stmts Separator stmt
+    { result = StatementsNode.new(val[0], val[2]) }
+  | stmt
+    { result = StatementsNode.new(val[0]) }
+stmt : stmt Conditional pipeline
+    { result = ConditionalNode.new(val[1].value, val[0], val[2]) }
+  | pipeline
+pipeline : pipeline Pipe stmts2
+    { result = PipelineNode.new(val[0], val[2]) }
+  | stmts2
+stmts2 : '(' stmts ')'
+    { result = val[1] }
+  | command_w_heredoc
+  | internal_eval
+command_w_heredoc : command_w_redirects Heredoc
+    { val[0].heredoc = val[1] ; result = val[0] }
+  | command_w_redirects
+command_w_redirects : command_w_redirects Redirection
+    { val[0].redirects << RedirectionNode.new(val[1].value, val[1].attrs[:target]) ; result = val[0] }
+  | command_w_vars
+  | command
+  | vars
+command_w_vars : vars command
+  { result = EnvWrapperNode.new(val[0], val[1]) }
+vars : vars LValue RValue
+    { val[0].add_var(val[1].value, val[2].value) ; result = val[0] }
+  | LValue RValue
+    { result = EnvNode.new(val[0].value, val[1].value) }
+command : command2
+command2: Command
+    { result = CommandNode.new(val[0].value) }
+  | Command args
+    { result = CommandNode.new(val[0].value, val[1].flatten) }
+  | LiteralCommand
+    { result = CommandNode.new(val[0].value, literal:true) }
+  | LiteralCommand args
+    { result = CommandNode.new(val[0].value, val[1].flatten, literal:true) }
+args : Argument
+    { result = [val[0].value] }
+  | args Argument
+    { result = [val[0], val[1].value] }
+internal_eval : InternalEval
+    { result = InternalEvalNode.new(val[0].value) }
+---- inner
+  $LOAD_PATH.unshift File.dirname(__FILE__) + "/../../"
+  require 'yap/shell/parser/lexer'
+  require 'yap/shell/parser/nodes'
+  include Yap::Shell::Parser::Nodes
+  def parse(str)
+    # @yydebug = true
+    @q = Yap::Shell::Parser::Lexer.new.tokenize(str)
+    # @q.push [false, '$']   # is optional from Racc 1.3.7
+# puts @q.inspect
+# puts "---- parse tree follows ----"
+    __send__(Racc_Main_Parsing_Routine, _racc_setup(), false)
+    #do_parse
+  end
+  def next_token
+    @q.shift
+  end
+---- footer
+if $0 == __FILE__
+  $LOAD_PATH.unshift File.dirname(__FILE__) + "/../../"
+  require 'yap/shell/parser/lexer'
+  require 'yap/shell/parser/nodes'
+    [
+    # "echo foo",
+    # "echo foo ; echo bar baz yep",
+    # "echo foo && echo bar baz yep",
+    # "echo foo && echo bar && ls foo && ls bar",
+    # "echo foo ; echo bar baz yep ; ls foo",
+    # "echo foo && echo bar ; ls baz",
+    # "echo foo && echo bar ; ls baz ; echo zach || echo gretchen",
+    # "echo foo | bar",
+    # "echo foo | bar && foo | bar",
+    # "foo && bar ; word || baz ; yep | grep -v foo",
+    # "( foo )",
+    # "( foo a b && bar c d )",
+    # "( foo a b && (bar c d | baz e f))",
+    # "((((foo))))",
+    # "foo -b -c ; (this ;that ;the; other  ;thing) && yep",
+    # "foo -b -c ; (this ;that && other  ;thing) && yep",
+    # "4 + 5",
+    # "!'hello' ; 4 - 4 && 10 + 3",
+    # "\\foo <<-EOT\nbar\nEOT",
+    # "ls | grep md | grep WISH",
+    # "(!upcase)",
+    # "echo foo > bar.txt",
+    # "ls -l > a.txt ; echo f 2> b.txt ; cat b &> c.txt ; du -sh 1>&2 1>hey.txt",
+    # "!Dir.chdir('..')",
+    # "FOO=123",
+    # "FOO=123 BAR=345",
+    # "FOO=abc bar=2314 car=14ab ls -l",
+    "FOO=abc BAR='hello world' ls -l ; CAR=f echo foo && say hi"
+    ].each do |src|
+      puts 'parsing:'
+      print src
+      puts
+      puts 'result:'
+      require 'pp'
+      ast = Yap::Shell::Parser.new.parse(src)
+      pp ast
+    end
+  # puts "---- Evaluating"
+  #   require 'pry'
+  # binding.pry
+  # Evaluator.new.evaltree(ast)
+end

data/lib/yap/shell/parser/lexer.rb ADDED

@@ -0,0 +1,311 @@
+require 'ostruct'
+module Yap::Shell
+  class Parser::Lexer
+    class Token
+      include Comparable
+      attr_reader :tag, :value, :lineno, :attrs
+      def initialize(tag, value, lineno:,attrs:{})
+        @tag = tag
+        @value = value
+        @lineno = lineno
+        @attrs = attrs
+      end
+      def <=>(other)
+        return -1 if self.class != other.class
+        return 0 if [tag, value, lineno, attrs] == [other.tag, other.value, other.lineno, other.attrs]
+        -1
+      end
+      def inspect
+        "#{tag.inspect} '#{value}' #{attrs.inspect}"
+      end
+      def to_s
+        "Token(#{tag.inspect} #{value.inspect} on #{lineno} with #{attrs.inspect})"
+      end
+      def length
+        to_s.length
+      end
+    end
+    ARG                    = /[^\s;\|\(\)\{\}\[\]\&\!\\\<][^\s;\|\(\)\{\}\[\]\&\!\>\<]*/
+    COMMAND                = /\A(#{ARG})/
+    LITERAL_COMMAND        = /\A\\(#{ARG})/
+    WHITESPACE             = /\A[^\n\S]+/
+    ARGUMENT               = /\A(#{ARG}+)/
+    LH_ASSIGNMENT          = /\A(([A-z_][\w]*)=)/
+    RH_VALUE               = /\A(\S+)/
+    STATEMENT_TERMINATOR   = /\A(;)/
+    PIPE_TERMINATOR        = /\A(\|)/
+    CONDITIONAL_TERMINATOR = /\A(&&|\|\|)/
+    HEREDOC                = /\A<<-?([A-z0-9]+)\s*^(.*)?(^\s*\1\s*$)/m
+    INTERNAL_EVAL          = /\A(?:(\!)|([0-9]+))/
+    SUBGROUP               = /\A(\(|\))/
+    REDIRECTION            = /\A(([12]?>&?[12]?)\s*(?![12]>)(#{ARG})?)/
+    REDIRECTION2           = /\A((&>|<)\s*(#{ARG}))/
+    def tokenize(str)
+      @str = str
+      @tokens = []
+      @lineno = 0
+      @looking_for_args = false
+      max = 100
+      count = 0
+      @current_position = 0
+      process_next_chunk = -> { @chunk = str.slice(@current_position..-1) ; @chunk != "" }
+      while process_next_chunk.call
+        result = subgroup_token ||
+          assignment_token ||
+          literal_command_token ||
+          command_token ||
+          whitespace_token ||
+          terminator_token ||
+          redirection_token ||
+          heredoc_token ||
+          string_argument_token ||
+          argument_token ||
+          internal_eval_token
+        count += 1
+        raise "Infinite loop detected on #{@chunk.inspect}" if count == max
+        @current_position += result.to_i
+      end
+      @tokens
+    end
+    private
+    def token(tag, value, attrs:{})
+      @tokens.push [tag, Token.new(tag, value, lineno:@lineno, attrs:attrs)]
+    end
+    def command_token
+      if !@looking_for_args && md=@chunk.match(COMMAND)
+        @looking_for_args = true
+        token :Command, md[1]
+        md[0].length
+      end
+    end
+    def literal_command_token
+      if !@looking_for_args && md=@chunk.match(LITERAL_COMMAND)
+        @looking_for_args = true
+        token :LiteralCommand, md[1]
+        md[0].length
+      end
+    end
+    def numeric_expr_token
+      if !@looking_for_args && md=@chunk.match(NUMERIC_EXPR)
+        @looking_for_args = true
+        token :NumericExpr, md[1]
+        md[0].length
+      end
+    end
+    def heredoc_token
+      if md=@chunk.match(HEREDOC)
+        token :Heredoc, md[2]
+        md[0].length
+      end
+    end
+    def internal_eval_token
+      if md=@chunk.match(INTERNAL_EVAL)
+        consumed = 0
+        substr = if md[1]                               # begins with !
+          consumed = md[1].length
+          @chunk[consumed..-1]
+        elsif md[2]                                     # begins with a number
+          @chunk[consumed..-1]
+        end
+        result = process_internal_eval substr, consumed: consumed
+        token :InternalEval, result.str
+        return result.consumed_length
+      end
+    end
+    def redirection_token
+      if md=@chunk.match(REDIRECTION)
+        target = nil
+        target = md[3] if md[3] && md[3].length > 0
+        token :Redirection, md[2], attrs: { target: target }
+        md[0].length
+      elsif md=@chunk.match(REDIRECTION2)
+        token :Redirection, md[2], attrs: { target: md[3] }
+        md[0].length
+      end
+    end
+    def subgroup_token
+      if md=@chunk.match(SUBGROUP)
+        token md[0], md[0]
+        return md[0].length
+      end
+    end
+    # Matches and consumes non-meaningful whitespace.
+    def whitespace_token
+      return nil unless md=WHITESPACE.match(@chunk)
+      input = md.to_a[0]
+      input.length
+    end
+    def argument_token
+      if @looking_for_args && md=@chunk.match(ARGUMENT)
+        str = ''
+        i = 0
+        loop do
+          ch = @chunk[i]
+          if %w(' ").include?(ch)
+            result = process_string @chunk[i..-1], ch
+            str << result.str
+            i += result.consumed_length
+          elsif ch !~ ARGUMENT
+            break
+          else
+            str << ch
+            i += 1
+          end
+          break if i >= @chunk.length
+        end
+        token :Argument, str
+        i
+      end
+    end
+    def assignment_token
+      if !@looking_for_args && md=@chunk.match(LH_ASSIGNMENT)
+        token :LValue, md[2]
+        consumed_length = md[1].length
+        i = consumed_length
+        @chunk = @chunk[i..-1]
+        if %w(' ").include?(@chunk[0])
+          result = process_string @chunk[0..-1], @chunk[0]
+          token :RValue, result.str
+          consumed_length += result.consumed_length
+        elsif md=@chunk.match(RH_VALUE)
+          token :RValue, md[1]
+          consumed_length += md[0].length
+        end
+        consumed_length
+      end
+    end
+    def terminator_token
+      if md=@chunk.match(CONDITIONAL_TERMINATOR)
+        @looking_for_args = false
+        token :Conditional, md[0]
+        md[0].length
+      elsif md=@chunk.match(STATEMENT_TERMINATOR)
+        @looking_for_args = false
+        token :Separator, md[0]
+        md[0].length
+      elsif md=@chunk.match(PIPE_TERMINATOR)
+        @looking_for_args = false
+        token :Pipe, md[0]
+        md[0].length
+      end
+    end
+    # Matches single and double quoted strings
+    def string_argument_token
+      if %w(' ").include?(@chunk[0])
+        result = process_string @chunk[0..-1], @chunk[0]
+        token :Argument, result.str
+        return result.consumed_length
+      end
+    end
+    def process_internal_eval(input_str, consumed:0)
+      scope = []
+      words = []
+      str = ''
+      i = 0
+      loop do
+        ch = input_str[i]
+        popped = false
+        if scope.empty? && md=input_str[i..-1].match(/\A(;|\||&&|\))/)
+          return OpenStruct.new(str:str.strip, consumed_length:i+consumed)
+        elsif (i == input_str.length)
+          return OpenStruct.new(str:str.strip, consumed_length:i+consumed)
+        else
+          if scope.last == ch
+            scope.pop
+            popped = true
+          end
+          if !popped
+            if %w(' ").include?(ch)
+              scope << ch
+            elsif ch == "{"
+              scope << "}"
+            elsif ch == "["
+              scope << "]"
+            elsif ch == "("
+              scope << ")"
+            end
+          end
+          str << ch
+        end
+        i += 1
+      end
+    end
+    def process_string(input_str, delimiter, indent=0)
+      return input_str if input_str.length == 0
+      nested_delimiter = "\\#{delimiter}"
+      i = delimiter.length  # start string matching after our delimiter
+      result_str = ''
+      loop do
+        chunk = input_str[i..-1]
+        puts "#{' '*indent}I: #{i}" if ENV["DEBUG"]
+        if i >= input_str.length
+          puts "#{' '*indent}C-yah: result:#{result_str.inspect}  length: #{input_str.length}"  if ENV["DEBUG"]
+          return OpenStruct.new(str:result_str, consumed_length: input_str.length)
+        end
+        if chunk.start_with?(nested_delimiter) # we found a nested escaped string
+          puts "#{' '*indent}A-pre: chunk:#{chunk.inspect}  nested_delimiter:#{nested_delimiter.inspect}" if ENV["DEBUG"]
+          result = process_string(chunk[0..-1], nested_delimiter, indent+2)
+          result_str << [delimiter, result.str, delimiter].join
+          puts "#{' '*indent}A-pos: result:#{result.inspect}  result_str:#{result_str.inspect}  #{nested_delimiter.length} + #{result.consumed_length} + #{nested_delimiter.length}" if ENV["DEBUG"]
+          i += result.consumed_length
+        elsif chunk.start_with?(delimiter)    # we found the end of our current nested escaped string
+          puts "#{' '*indent}B-yah: result:#{result_str.inspect}  length: #{i}" if ENV["DEBUG"]
+          return OpenStruct.new(str:result_str, consumed_length: i+delimiter.length)
+        else
+          char = input_str[i]
+          result_str << char
+          puts "#{' '*indent}D-yah: i:#{i}  char: #{char}   result_str:#{result_str.inspect}" if ENV["DEBUG"]
+          i += 1
+        end
+      end
+    end
+  end
+end