shell_parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 89579eb0817c4ec11516c0de8bea58b1dc5cf573bb1a144e61eeb0b249d24a26
4
+ data.tar.gz: a59972f41df5a30a64ff6cda53a0b9f1fc106f9c91eeac18b9b52b6f593fa9b6
5
+ SHA512:
6
+ metadata.gz: 2bee7e6ba80006760f78b5de41cc2d289b8f4438ea03aa04e5f6156e9dcf12080e22c319d95a4d832df6684e73f6b41f8bd8ea4fdde2a85006763f97ceda8b48
7
+ data.tar.gz: 1d4bc6898faffe579f56710563f935c46d259120375b5933aef58fffa849a90976305e586ab03805d4eccde506acaae4be627da54814274db48eee87b48d9fd5
data/CHANGELOG.md ADDED
@@ -0,0 +1,24 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [Unreleased]
9
+
10
+ ## [0.1.0] - 2025-10-19
11
+
12
+ ### Added
13
+ - Initial release
14
+ - POSIX shell command language parser
15
+ - Structured AST with Word, Command, Pipeline, and List nodes
16
+ - Word parts: Literal, Variable, CommandSub with quote_style metadata
17
+ - Support for pipes, redirections, and command lists (&&, ||, ;, &)
18
+ - Proper handling of single/double quotes and expansions
19
+ - Position tracking for syntax highlighting
20
+ - Comprehensive test suite using minitest
21
+ - Examples demonstrating syntax highlighting and execution use cases
22
+
23
+ [Unreleased]: https://github.com/vidarh/shell-parser/compare/v0.1.0...HEAD
24
+ [0.1.0]: https://github.com/vidarh/shell-parser/releases/tag/v0.1.0
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Vidar Hokstad
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,330 @@
1
+ # Shell Parser
2
+
3
+ A compact Ruby parser for POSIX Shell Command Language syntax that
4
+ produces a simple AST suitable for syntax highlighting and shell
5
+ execution.
6
+
7
+ ## Goal
8
+
9
+ Remain compact and minimalist in design, while offering a reasonably
10
+ complete parsing.
11
+
12
+ ## Features
13
+
14
+ - **Tokenization** - Breaks shell commands into tokens with position tracking
15
+ - **Simple AST** - Clean, easy-to-traverse abstract syntax tree
16
+ - **POSIX Compliance** - Based on POSIX Shell Command Language specification
17
+ - **Quoting Support** - Handles single quotes, double quotes, and backslash escaping
18
+ - **Expansions** - Parses variable expansions (`$VAR`, `${VAR}`) and command substitution (`$(...)`, `` `...` ``)
19
+ - **Command Structures** - Pipelines, lists (&&, ||, ;, &), and redirections
20
+ - **Compact** - ~320 lines of clean, readable Ruby code
21
+
22
+ ## Installation
23
+
24
+ Add this line to your application's Gemfile:
25
+
26
+ ```ruby
27
+ gem 'shell_parser'
28
+ ```
29
+
30
+ And then execute:
31
+
32
+ ```sh
33
+ bundle install
34
+ ```
35
+
36
+ Or install it yourself as:
37
+
38
+ ```sh
39
+ gem install shell_parser
40
+ ```
41
+
42
+ ## AST Node Types
43
+
44
+ ### Word
45
+ A word is composed of one or more parts:
46
+ ```ruby
47
+ Word = Struct.new(:parts, :pos, :len)
48
+ # parts: array of Literal, Variable, or CommandSub
49
+ # pos: character position in input
50
+ # len: total length
51
+ ```
52
+
53
+ ### Word Parts
54
+
55
+ **Literal** - Plain text (possibly quoted):
56
+ ```ruby
57
+ Literal = Struct.new(:value, :pos, :len, :quote_style)
58
+ # value: the text content
59
+ # quote_style: :none, :single, or :double
60
+ ```
61
+
62
+ **Variable** - Variable expansion:
63
+ ```ruby
64
+ Variable = Struct.new(:name, :pos, :len, :braced, :quote_style)
65
+ # name: variable name (e.g., "HOME" for $HOME)
66
+ # braced: true if ${VAR} form, false if $VAR
67
+ # quote_style: :none or :double (variables don't expand in single quotes)
68
+ ```
69
+
70
+ **CommandSub** - Command substitution:
71
+ ```ruby
72
+ CommandSub = Struct.new(:command, :pos, :len, :style, :quote_style)
73
+ # command: the command text to execute
74
+ # style: :dollar for $(cmd) or :backtick for `cmd`
75
+ # quote_style: :none or :double
76
+ ```
77
+
78
+ ### Command
79
+ A simple command with arguments and redirections:
80
+ ```ruby
81
+ Command = Struct.new(:words, :redirects)
82
+ # words: array of Word nodes
83
+ # redirects: array of Redirect nodes
84
+ ```
85
+
86
+ ### Pipeline
87
+ Commands connected by pipes (`|`):
88
+ ```ruby
89
+ Pipeline = Struct.new(:commands, :negated)
90
+ # commands: array of Command nodes
91
+ # negated: boolean (for future ! pipeline support)
92
+ ```
93
+
94
+ ### List
95
+ Commands connected by control operators:
96
+ ```ruby
97
+ List = Struct.new(:left, :op, :right)
98
+ # left/right: Command, Pipeline, or List
99
+ # op: :and (&&), :or (||), :semi (;), :background (&)
100
+ ```
101
+
102
+ ### Redirect
103
+ I/O redirection:
104
+ ```ruby
105
+ Redirect = Struct.new(:type, :fd, :target)
106
+ # type: :in (<), :out (>), :append (>>), :heredoc (<<), etc.
107
+ # fd: file descriptor number (optional)
108
+ # target: Word node
109
+ ```
110
+
111
+ ## Usage
112
+
113
+ ### Basic Parsing
114
+
115
+ ```ruby
116
+ require_relative 'shell_parser'
117
+
118
+ # Parse a command
119
+ ast = ShellParser.parse("ls -la /tmp")
120
+ # => Command with 3 words
121
+
122
+ # Parse a pipeline
123
+ ast = ShellParser.parse("cat file.txt | grep error | wc -l")
124
+ # => Pipeline with 3 commands
125
+
126
+ # Parse command lists
127
+ ast = ShellParser.parse("make && make test || echo failed")
128
+ # => List with nested lists
129
+ ```
130
+
131
+ ### Syntax Highlighting
132
+
133
+ The parser provides detailed structure perfect for syntax highlighting:
134
+
135
+ ```ruby
136
+ ast = ShellParser.parse("echo $HOME > output.txt")
137
+
138
+ ast.words.each do |word|
139
+ word.parts.each do |part|
140
+ case part
141
+ when ShellParser::Literal
142
+ case part.quote_style
143
+ when :single then highlight_single_quoted(part.value, part.pos, part.len)
144
+ when :double then highlight_double_quoted(part.value, part.pos, part.len)
145
+ else highlight_literal(part.value, part.pos, part.len)
146
+ end
147
+ when ShellParser::Variable
148
+ highlight_variable(part.name, part.pos, part.len, part.braced)
149
+ when ShellParser::CommandSub
150
+ highlight_command_sub(part.command, part.pos, part.len, part.style)
151
+ end
152
+ end
153
+ end
154
+
155
+ ast.redirects.each do |redir|
156
+ highlight_redirection(redir.type, redir.target)
157
+ end
158
+ ```
159
+
160
+ The structured representation makes it easy to apply context-aware highlighting:
161
+ ```ruby
162
+ # "Hello $USER" is represented as:
163
+ word.parts #=> [
164
+ Literal("Hello ", quote_style: :double),
165
+ Variable("USER", quote_style: :double)
166
+ ]
167
+ ```
168
+
169
+ ### Shell Execution
170
+
171
+ The AST makes it easy to traverse and execute commands:
172
+
173
+ ```ruby
174
+ ast = ShellParser.parse("echo $HOME > output.txt")
175
+
176
+ # Expand words by processing their parts
177
+ def expand_word(word)
178
+ word.parts.map do |part|
179
+ case part
180
+ when ShellParser::Literal
181
+ part.value # Use as-is
182
+ when ShellParser::Variable
183
+ ENV[part.name] || "" # Look up variable
184
+ when ShellParser::CommandSub
185
+ `#{part.command}`.chomp # Execute command
186
+ end
187
+ end.join
188
+ end
189
+
190
+ # Execute based on AST structure
191
+ case ast
192
+ when ShellParser::Command
193
+ args = ast.words.map { |w| expand_word(w) }
194
+ execute_command(args, ast.redirects)
195
+
196
+ when ShellParser::Pipeline
197
+ setup_pipe do
198
+ ast.commands.each do |cmd|
199
+ args = cmd.words.map { |w| expand_word(w) }
200
+ execute_in_pipeline(args, cmd.redirects)
201
+ end
202
+ end
203
+
204
+ when ShellParser::List
205
+ result = execute(ast.left)
206
+ case ast.op
207
+ when :and then execute(ast.right) if result == 0
208
+ when :or then execute(ast.right) if result != 0
209
+ when :semi then execute(ast.right)
210
+ when :background then fork { execute(ast.right) }
211
+ end
212
+ end
213
+ ```
214
+
215
+ The quote_style field tells you how to handle word splitting and glob expansion:
216
+ ```ruby
217
+ part.quote_style == :none # Apply glob expansion and word splitting
218
+ part.quote_style == :single # Use literal value, no expansion
219
+ part.quote_style == :double # Expand variables/commands, but no glob/split
220
+ ```
221
+
222
+ ## Supported Syntax
223
+
224
+ ### Simple Commands
225
+ ```sh
226
+ ls -la /tmp
227
+ echo "hello world"
228
+ ```
229
+
230
+ ### Pipelines
231
+ ```sh
232
+ cat file.txt | grep pattern | wc -l
233
+ ```
234
+
235
+ ### Command Lists
236
+ ```sh
237
+ make && make test # AND - execute if previous succeeds
238
+ make || echo "failed" # OR - execute if previous fails
239
+ make ; make test # Sequential - always execute both
240
+ sleep 10 & # Background job
241
+ ```
242
+
243
+ ### Redirections
244
+ ```sh
245
+ command < input.txt # Input redirection
246
+ command > output.txt # Output redirection
247
+ command >> output.txt # Append
248
+ command 2>> error.log # Redirect stderr
249
+ ```
250
+
251
+ ### Quoting
252
+ ```sh
253
+ echo 'single quotes preserve everything literally'
254
+ echo "double quotes allow $VAR expansion"
255
+ echo escaped\ space
256
+ ```
257
+
258
+ ### Expansions
259
+ ```sh
260
+ echo $HOME # Variable expansion
261
+ echo ${USER} # Variable expansion (braced)
262
+ echo $(date) # Command substitution
263
+ echo `whoami` # Command substitution (backticks)
264
+ ```
265
+
266
+ ## Examples
267
+
268
+ See `examples.rb` for complete working examples of:
269
+ - Syntax highlighting with token positions
270
+ - Execution plan generation from AST
271
+ - Pretty-printing AST structures
272
+
273
+ Run examples:
274
+ ```sh
275
+ ruby examples.rb
276
+ ```
277
+
278
+ ## Design Goals
279
+
280
+ 1. **Simplicity** - Clean, understandable code without excessive abstraction
281
+ 2. **Compactness** - Core parser in ~320 lines
282
+ 3. **Practicality** - Focus on two main use cases:
283
+ - Syntax highlighting (needs tokens with positions)
284
+ - Shell execution (needs command structure)
285
+ 4. **POSIX Foundation** - Based on POSIX spec but simplified where practical
286
+
287
+ ## Limitations
288
+
289
+ This is a simplified parser focused on the core syntax. Not currently supported:
290
+
291
+ - Compound commands (`if`, `while`, `for`, `case`, `{...}`, `(...)`)
292
+ - Function definitions
293
+ - Arithmetic expansion `$((...))`
294
+ - Parameter expansion modifiers `${var:-default}`
295
+ - Here-documents (parsed but not fully implemented)
296
+ - Pattern matching and globbing
297
+ - Reserved words as special tokens
298
+
299
+ These can be added incrementally as needed.
300
+
301
+ ## Architecture
302
+
303
+ **Lexer** (`ShellParser::Lexer`)
304
+ - Scans input character by character
305
+ - Handles quoting, escaping, and special characters
306
+ - Produces token stream with position information
307
+ - Preserves metadata for syntax highlighting
308
+
309
+ **Parser** (`ShellParser::Parser`)
310
+ - Recursive descent parser
311
+ - Consumes tokens to build AST
312
+ - Handles operator precedence
313
+ - Simple error reporting
314
+
315
+ **AST** (Struct-based nodes)
316
+ - Lightweight node types using Ruby Structs
317
+ - Easy to pattern match and traverse
318
+ - Minimal memory overhead
319
+
320
+ ## References
321
+
322
+ - [POSIX Shell Command Language Specification](https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html)
323
+
324
+ ## rsh Integration Roadmap
325
+
326
+ [rsh](https://github.com/vidarh/rsh) is a Ruby shell that currently uses an 80-line tokenizer for command parsing. The integration path with shell_parser:
327
+
328
+ 1. **Add as dependency** — Add `gem 'shell_parser'` to rsh's Gemfile and `require 'shell_parser'` in the main entry point.
329
+ 2. **Replace tokenizer** — Replace `tokenize_command` / `parse_shell_command` in `command_parser.rb` with `ShellParser.parse`, gaining proper AST-driven parsing for pipelines, lists, redirects, and quoting.
330
+ 3. **AST-driven execution** — Use the structured AST (`Command`, `Pipeline`, `List`) for execution instead of passing raw command strings to `exec`, enabling proper variable expansion, pipeline setup, and redirection handling within the Ruby process.
@@ -0,0 +1,89 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require_relative '../lib/shell_parser'
5
+
6
+ def show_word(word)
7
+ puts " Word (#{word.parts.length} parts):"
8
+ word.parts.each do |part|
9
+ case part
10
+ when ShellParser::Literal
11
+ quote_info = part.quote_style == :none ? "" : " [#{part.quote_style}]"
12
+ puts " Literal#{quote_info}: #{part.value.inspect}"
13
+ when ShellParser::Variable
14
+ quote_info = part.quote_style == :none ? "" : " [#{part.quote_style}]"
15
+ braced_info = part.braced ? "${...}" : "$..."
16
+ puts " Variable#{quote_info}: #{part.name} (#{braced_info})"
17
+ when ShellParser::CommandSub
18
+ quote_info = part.quote_style == :none ? "" : " [#{part.quote_style}]"
19
+ style_info = part.style == :dollar ? "$()" : "`...`"
20
+ puts " CommandSub#{quote_info}: #{part.command.inspect} (#{style_info})"
21
+ end
22
+ end
23
+ end
24
+
25
+ def demo(description, input)
26
+ puts "=" * 70
27
+ puts description
28
+ puts "=" * 70
29
+ puts "Input: #{input.inspect}\n\n"
30
+
31
+ ast = ShellParser.parse(input)
32
+
33
+ case ast
34
+ when ShellParser::Command
35
+ puts "Command:"
36
+ ast.words.each { |w| show_word(w) }
37
+ when ShellParser::Pipeline
38
+ puts "Pipeline:"
39
+ ast.commands.each_with_index do |cmd, i|
40
+ puts " Command #{i + 1}:"
41
+ cmd.words.each { |w| show_word(w) }
42
+ end
43
+ end
44
+
45
+ puts
46
+ end
47
+
48
+ puts "\nSimplified AST Structure Demo"
49
+ puts "=" * 70
50
+ puts
51
+
52
+ demo(
53
+ "Command substitution - properly extracted",
54
+ "echo $(date)"
55
+ )
56
+
57
+ demo(
58
+ "Variable expansion - name extracted, braced flag set",
59
+ "echo $HOME ${USER}"
60
+ )
61
+
62
+ demo(
63
+ "Double-quoted with expansion - parts have quote_style metadata",
64
+ 'echo "Hello $USER, today is $(date)"'
65
+ )
66
+
67
+ demo(
68
+ "Single-quoted - no expansion, literal preserved",
69
+ "echo 'Literal $HOME text'"
70
+ )
71
+
72
+ demo(
73
+ "Composite word - multiple parts in one word",
74
+ 'log_${DATE}_$(hostname).txt'
75
+ )
76
+
77
+ demo(
78
+ "Mixed quoting - each part tracks its own quoting context",
79
+ 'echo "quoted"unquoted"quoted again"'
80
+ )
81
+
82
+ puts "=" * 70
83
+ puts "Key improvements:"
84
+ puts " • No nested DoubleQuoted/SingleQuoted wrapper nodes"
85
+ puts " • Quoting tracked as metadata (quote_style field)"
86
+ puts " • Variables extracted with name and braced flag"
87
+ puts " • Command substitutions extracted with command text"
88
+ puts " • Clean, flat structure easy to traverse"
89
+ puts "=" * 70
@@ -0,0 +1,116 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require_relative '../lib/shell_parser'
5
+
6
+ def show_word_structure(word, depth = 0)
7
+ indent = " " * depth
8
+
9
+ word.parts.each do |part|
10
+ case part
11
+ when ShellParser::Literal
12
+ puts "#{indent}Literal: #{part.value.inspect}"
13
+ when ShellParser::Variable
14
+ puts "#{indent}Variable: $#{part.braced ? '{' : ''}#{part.name}#{part.braced ? '}' : ''}"
15
+ when ShellParser::CommandSub
16
+ puts "#{indent}CommandSub (#{part.style}): #{part.command.inspect}"
17
+ when ShellParser::SingleQuoted
18
+ puts "#{indent}SingleQuoted: #{part.value.inspect}"
19
+ when ShellParser::DoubleQuoted
20
+ puts "#{indent}DoubleQuoted:"
21
+ part.parts.each do |inner_part|
22
+ case inner_part
23
+ when ShellParser::Literal
24
+ puts "#{indent} Literal: #{inner_part.value.inspect}"
25
+ when ShellParser::Variable
26
+ puts "#{indent} Variable: $#{inner_part.braced ? '{' : ''}#{inner_part.name}#{inner_part.braced ? '}' : ''}"
27
+ when ShellParser::CommandSub
28
+ puts "#{indent} CommandSub (#{inner_part.style}): #{inner_part.command.inspect}"
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
34
+
35
+ def demo(description, input)
36
+ puts "=" * 70
37
+ puts description
38
+ puts "=" * 70
39
+ puts "Input: #{input.inspect}\n\n"
40
+
41
+ ast = ShellParser.parse(input)
42
+
43
+ case ast
44
+ when ShellParser::Command
45
+ puts "Command with #{ast.words.length} word(s):\n\n"
46
+ ast.words.each_with_index do |word, i|
47
+ puts "Word #{i + 1}:"
48
+ show_word_structure(word, 1)
49
+ puts
50
+ end
51
+ when ShellParser::Pipeline
52
+ puts "Pipeline with #{ast.commands.length} command(s):\n\n"
53
+ ast.commands.each_with_index do |cmd, i|
54
+ puts "Command #{i + 1}:"
55
+ cmd.words.each_with_index do |word, j|
56
+ puts " Word #{j + 1}:"
57
+ show_word_structure(word, 2)
58
+ end
59
+ puts
60
+ end
61
+ end
62
+
63
+ puts
64
+ end
65
+
66
+ # Demonstrate structured parsing
67
+
68
+ demo(
69
+ "Simple command substitution",
70
+ "echo $(date)"
71
+ )
72
+
73
+ demo(
74
+ "Variable expansion (braced and unbraced)",
75
+ "echo $HOME and ${USER}"
76
+ )
77
+
78
+ demo(
79
+ "Command substitution inside double quotes",
80
+ 'echo "Today is $(date)"'
81
+ )
82
+
83
+ demo(
84
+ "Multiple expansions in one word",
85
+ 'echo $USER@$HOSTNAME'
86
+ )
87
+
88
+ demo(
89
+ "Mixed quoting and expansions",
90
+ 'echo "Hello $USER, your home is $HOME"'
91
+ )
92
+
93
+ demo(
94
+ "Nested command substitution",
95
+ 'echo $(echo $(whoami))'
96
+ )
97
+
98
+ demo(
99
+ "Complex word with multiple parts",
100
+ 'prefix_${VAR}_$(cmd)_suffix'
101
+ )
102
+
103
+ demo(
104
+ "Backtick style command substitution",
105
+ 'echo `date`'
106
+ )
107
+
108
+ demo(
109
+ "Single quotes (no expansion)",
110
+ "echo 'Literal $HOME text'"
111
+ )
112
+
113
+ demo(
114
+ "Double quotes with escaped characters",
115
+ 'echo "Quote: \" and dollar: \$"'
116
+ )