RubyGems - shell_parser - Versions diffs - 0.1.0 - Mend

shell_parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

data/lib/shell_parser.rb ADDED Viewed

@@ -0,0 +1,523 @@
+# frozen_string_literal: true
+require_relative 'shell_parser/version'
+# POSIX Shell Command Language Parser
+# Provides a simple AST for syntax highlighting and shell execution
+module ShellParser
+  # Word part types - a word is composed of these parts
+  # quote_style: :none, :single, :double
+  Literal = Struct.new(:value, :pos, :len, :quote_style)
+  Variable = Struct.new(:name, :pos, :len, :braced, :quote_style) # $VAR or ${VAR}
+  CommandSub = Struct.new(:command, :pos, :len, :style, :quote_style) # $(cmd) or `cmd`, style: :dollar or :backtick
+  # A word is a sequence of parts
+  Word = Struct.new(:parts, :pos, :len) do
+    def to_s
+      parts.map do |part|
+        case part
+        when Literal
+          case part.quote_style
+          when :single then "'#{part.value}'"
+          when :double then "\"#{part.value}\""
+          else part.value
+          end
+        when Variable
+          var_str = part.braced ? "${#{part.name}}" : "$#{part.name}"
+          case part.quote_style
+          when :double then "\"#{var_str}\""
+          else var_str
+          end
+        when CommandSub
+          cmd_str = part.style == :backtick ? "`#{part.command}`" : "$(#{part.command})"
+          case part.quote_style
+          when :double then "\"#{cmd_str}\""
+          else cmd_str
+          end
+        end
+      end.join
+    end
+  end
+  Redirect = Struct.new(:type, :fd, :target) do
+    # type: :in, :out, :append, :heredoc, :clobber
+    # fd: file descriptor number (optional)
+    # target: Word or string
+  end
+  Command = Struct.new(:words, :redirects) do
+    # words: array of Word nodes
+    # redirects: array of Redirect nodes
+    def initialize(words = [], redirects = [])
+      super(words, redirects)
+    end
+  end
+  Pipeline = Struct.new(:commands, :negated) do
+    # commands: array of Command nodes
+    # negated: boolean (for ! pipeline)
+    def initialize(commands = [], negated = false)
+      super(commands, negated)
+    end
+  end
+  List = Struct.new(:left, :op, :right) do
+    # left/right: Command, Pipeline, or List
+    # op: :and, :or, :semi, :background
+  end
+  # Token for lexer
+  Token = Struct.new(:type, :value, :pos, :len)
+  class Lexer
+    OPERATORS = {
+      '&&' => :and_if,
+      '||' => :or_if,
+      ';;' => :dsemi,
+      '<<' => :dless,
+      '>>' => :dgreat,
+      '<&' => :lessand,
+      '>&' => :greatand,
+      '<>' => :lessgreat,
+      '<<-' => :dlessdash,
+      '>|' => :clobber,
+      '|' => :pipe,
+      '&' => :background,
+      ';' => :semi,
+      '<' => :less,
+      '>' => :great,
+      '(' => :lparen,
+      ')' => :rparen,
+      "\n" => :newline
+    }.freeze
+    def initialize(input)
+      @input = input
+      @pos = 0
+      @tokens = []
+    end
+    def tokenize
+      @tokens = []
+      @pos = 0
+      while @pos < @input.length
+        case
+        when whitespace?
+          skip_whitespace
+        when comment?
+          skip_comment
+        when operator?
+          scan_operator
+        else
+          scan_word
+        end
+      end
+      @tokens << Token.new(:eof, nil, @pos, 0)
+      @tokens
+    end
+    private
+    def current_char
+      @input[@pos]
+    end
+    def peek(offset = 1)
+      @input[@pos + offset]
+    end
+    def whitespace?
+      current_char =~ /[ \t\r]/
+    end
+    def skip_whitespace
+      @pos += 1 while @pos < @input.length && whitespace?
+    end
+    def comment?
+      current_char == '#'
+    end
+    def skip_comment
+      @pos += 1 while @pos < @input.length && current_char != "\n"
+    end
+    def operator?
+      OPERATORS.key?(current_char) || OPERATORS.key?(current_char + peek.to_s)
+    end
+    def scan_operator
+      start_pos = @pos
+      # Try two-character operators first
+      two_char = @input[@pos, 2]
+      if OPERATORS.key?(two_char)
+        @tokens << Token.new(OPERATORS[two_char], two_char, start_pos, 2)
+        @pos += 2
+        return
+      end
+      # Single-character operators
+      one_char = current_char
+      if OPERATORS.key?(one_char)
+        @tokens << Token.new(OPERATORS[one_char], one_char, start_pos, 1)
+        @pos += 1
+      end
+    end
+    def scan_word
+      start_pos = @pos
+      parts = []
+      literal_start = @pos
+      literal_buffer = ''
+      # Helper to flush literal buffer into parts
+      flush_literal = -> {
+        if !literal_buffer.empty?
+          parts << Literal.new(literal_buffer, literal_start, literal_buffer.length, :none)
+          literal_buffer = ''
+        end
+      }
+      while @pos < @input.length && !word_boundary?
+        case current_char
+        when "'"
+          flush_literal.call
+          parts << scan_single_quoted_part
+        when '"'
+          flush_literal.call
+          parts.concat(scan_double_quoted_parts)
+        when '\\'
+          if peek && peek != "\n"
+            @pos += 1
+            literal_buffer += current_char
+            @pos += 1
+          elsif peek == "\n"
+            @pos += 2 # line continuation
+          else
+            @pos += 1
+          end
+        when '$'
+          if peek == '('
+            flush_literal.call
+            parts << scan_command_substitution_part(:none)
+          elsif peek =~ /[a-zA-Z_{]/
+            flush_literal.call
+            parts << scan_variable_part(:none)
+          else
+            literal_buffer += current_char
+            @pos += 1
+          end
+        when '`'
+          flush_literal.call
+          parts << scan_backtick_substitution_part(:none)
+        else
+          if literal_buffer.empty?
+            literal_start = @pos
+          end
+          literal_buffer += current_char
+          @pos += 1
+        end
+      end
+      flush_literal.call
+      word = Word.new(parts, start_pos, @pos - start_pos)
+      @tokens << Token.new(:word, word, start_pos, @pos - start_pos)
+    end
+    def word_boundary?
+      return true if @pos >= @input.length
+      char = current_char
+      char =~ /[ \t\r\n]/ || OPERATORS.key?(char) || OPERATORS.key?(char + peek.to_s)
+    end
+    def scan_single_quoted_part
+      start_pos = @pos
+      @pos += 1 # skip opening '
+      content = ''
+      while @pos < @input.length && current_char != "'"
+        content += current_char
+        @pos += 1
+      end
+      @pos += 1 if current_char == "'" # skip closing '
+      Literal.new(content, start_pos, @pos - start_pos, :single)
+    end
+    def scan_double_quoted_parts
+      @pos += 1 # skip opening "
+      parts = []
+      literal_start = @pos
+      literal_buffer = ''
+      flush_literal = -> {
+        if !literal_buffer.empty?
+          parts << Literal.new(literal_buffer, literal_start, literal_buffer.length, :double)
+          literal_buffer = ''
+        end
+      }
+      while @pos < @input.length && current_char != '"'
+        case current_char
+        when '\\'
+          if peek =~ /["$`\\\n]/
+            @pos += 1
+            if current_char == "\n"
+              # line continuation - skip it
+            else
+              literal_buffer += current_char
+            end
+            @pos += 1
+          else
+            literal_buffer += current_char
+            @pos += 1
+          end
+        when '$'
+          if peek == '('
+            flush_literal.call
+            parts << scan_command_substitution_part(:double)
+          elsif peek =~ /[a-zA-Z_{]/
+            flush_literal.call
+            parts << scan_variable_part(:double)
+          else
+            literal_buffer += current_char
+            @pos += 1
+          end
+        when '`'
+          flush_literal.call
+          parts << scan_backtick_substitution_part(:double)
+        else
+          if literal_buffer.empty?
+            literal_start = @pos
+          end
+          literal_buffer += current_char
+          @pos += 1
+        end
+      end
+      flush_literal.call
+      @pos += 1 if current_char == '"' # skip closing "
+      parts
+    end
+    def scan_variable_part(quote_style)
+      start_pos = @pos
+      @pos += 1 # skip $
+      braced = false
+      name = ''
+      if current_char == '{'
+        braced = true
+        @pos += 1
+        while @pos < @input.length && current_char != '}'
+          name += current_char
+          @pos += 1
+        end
+        @pos += 1 if current_char == '}'
+      else
+        while @pos < @input.length && current_char =~ /[a-zA-Z0-9_]/
+          name += current_char
+          @pos += 1
+        end
+      end
+      Variable.new(name, start_pos, @pos - start_pos, braced, quote_style)
+    end
+    def scan_command_substitution_part(quote_style)
+      start_pos = @pos
+      @pos += 2 # skip $(
+      depth = 1
+      command = ''
+      while @pos < @input.length && depth > 0
+        if current_char == '$' && peek == '('
+          command += current_char
+          @pos += 1
+          command += current_char
+          @pos += 1
+          depth += 1
+        elsif current_char == ')'
+          if depth > 1
+            command += current_char
+          end
+          depth -= 1
+          @pos += 1
+        else
+          command += current_char
+          @pos += 1
+        end
+      end
+      CommandSub.new(command, start_pos, @pos - start_pos, :dollar, quote_style)
+    end
+    def scan_backtick_substitution_part(quote_style)
+      start_pos = @pos
+      @pos += 1 # skip opening `
+      command = ''
+      while @pos < @input.length && current_char != '`'
+        if current_char == '\\'
+          command += current_char
+          @pos += 1
+          command += current_char if @pos < @input.length
+          @pos += 1
+        else
+          command += current_char
+          @pos += 1
+        end
+      end
+      @pos += 1 if current_char == '`' # skip closing `
+      CommandSub.new(command, start_pos, @pos - start_pos, :backtick, quote_style)
+    end
+  end
+  class Parser
+    def initialize(tokens)
+      @tokens = tokens
+      @pos = 0
+    end
+    def parse
+      result = parse_list
+      expect(:eof)
+      result
+    end
+    private
+    def current_token
+      @tokens[@pos]
+    end
+    def peek_token(offset = 1)
+      @tokens[@pos + offset]
+    end
+    def advance
+      @pos += 1
+    end
+    def expect(type)
+      if current_token.type != type
+        raise "Expected #{type}, got #{current_token.type} at position #{current_token.pos}"
+      end
+      tok = current_token
+      advance
+      tok
+    end
+    def accept(type)
+      if current_token.type == type
+        tok = current_token
+        advance
+        tok
+      end
+    end
+    def parse_list
+      skip_newlines
+      left = parse_pipeline
+      while current_token.type =~ /^(and_if|or_if|semi|background)$/
+        op_token = current_token
+        advance
+        skip_newlines
+        op = case op_token.type
+        when :and_if then :and
+        when :or_if then :or
+        when :semi then :semi
+        when :background then :background
+        end
+        # For background and semi, right side may be empty
+        if current_token.type == :eof || current_token.type == :rparen
+          left = List.new(left, op, nil)
+          break
+        end
+        right = parse_pipeline
+        left = List.new(left, op, right)
+      end
+      left
+    end
+    def parse_pipeline
+      skip_newlines
+      commands = []
+      commands << parse_command
+      while accept(:pipe)
+        skip_newlines
+        commands << parse_command
+      end
+      commands.length == 1 ? commands[0] : Pipeline.new(commands)
+    end
+    def parse_command
+      skip_newlines
+      words = []
+      redirects = []
+      while current_token.type == :word || redirect_operator?
+        if redirect_operator?
+          redirects << parse_redirect
+        else
+          word_token = current_token
+          words << word_token.value # value is already a Word struct
+          advance
+        end
+      end
+      Command.new(words, redirects)
+    end
+    def redirect_operator?
+      current_token.type =~ /^(less|great|dless|dgreat|lessand|greatand|lessgreat|clobber)$/
+    end
+    def parse_redirect
+      op_token = current_token
+      advance
+      target_token = expect(:word)
+      target = target_token.value # value is already a Word struct
+      type = case op_token.type
+      when :less then :in
+      when :great then :out
+      when :dgreat then :append
+      when :dless then :heredoc
+      when :clobber then :clobber
+      when :lessand then :in_fd
+      when :greatand then :out_fd
+      when :lessgreat then :inout
+      end
+      Redirect.new(type, nil, target)
+    end
+    def skip_newlines
+      advance while current_token.type == :newline
+    end
+  end
+  # Main entry point
+  def self.parse(input)
+    lexer = Lexer.new(input)
+    tokens = lexer.tokenize
+    parser = Parser.new(tokens)
+    parser.parse
+  end
+end

metadata ADDED Viewed

@@ -0,0 +1,85 @@
+--- !ruby/object:Gem::Specification
+name: shell_parser
+version: !ruby/object:Gem::Version
+  version: 0.1.0
+platform: ruby
+authors:
+- Vidar Hokstad
+autorequire:
+bindir: exe
+cert_chain: []
+date: 2026-02-13 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: minitest
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '5.0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '5.0'
+- !ruby/object:Gem::Dependency
+  name: rake
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '13.0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '13.0'
+description: A compact Ruby parser for POSIX shell syntax with structured AST for
+  syntax highlighting and shell execution
+email:
+- vidar@hokstad.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- CHANGELOG.md
+- LICENSE.txt
+- README.md
+- examples/demo_simplified.rb
+- examples/demo_structure.rb
+- examples/examples.rb
+- examples/test.rb
+- examples/test_structure.rb
+- lib/shell_parser.rb
+- lib/shell_parser/version.rb
+homepage: https://github.com/vidarh/shell-parser
+licenses:
+- MIT
+metadata:
+  homepage_uri: https://github.com/vidarh/shell-parser
+  source_code_uri: https://github.com/vidarh/shell-parser
+  changelog_uri: https://github.com/vidarh/shell-parser/blob/master/CHANGELOG.md
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: 2.7.0
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubygems_version: 3.4.10
+signing_key:
+specification_version: 4
+summary: POSIX Shell Command Language Parser
+test_files: []