shell_parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,523 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'shell_parser/version'
4
+
5
+ # POSIX Shell Command Language Parser
6
+ # Provides a simple AST for syntax highlighting and shell execution
7
+
8
+ module ShellParser
9
+ # Word part types - a word is composed of these parts
10
+ # quote_style: :none, :single, :double
11
+ Literal = Struct.new(:value, :pos, :len, :quote_style)
12
+ Variable = Struct.new(:name, :pos, :len, :braced, :quote_style) # $VAR or ${VAR}
13
+ CommandSub = Struct.new(:command, :pos, :len, :style, :quote_style) # $(cmd) or `cmd`, style: :dollar or :backtick
14
+
15
+ # A word is a sequence of parts
16
+ Word = Struct.new(:parts, :pos, :len) do
17
+ def to_s
18
+ parts.map do |part|
19
+ case part
20
+ when Literal
21
+ case part.quote_style
22
+ when :single then "'#{part.value}'"
23
+ when :double then "\"#{part.value}\""
24
+ else part.value
25
+ end
26
+ when Variable
27
+ var_str = part.braced ? "${#{part.name}}" : "$#{part.name}"
28
+ case part.quote_style
29
+ when :double then "\"#{var_str}\""
30
+ else var_str
31
+ end
32
+ when CommandSub
33
+ cmd_str = part.style == :backtick ? "`#{part.command}`" : "$(#{part.command})"
34
+ case part.quote_style
35
+ when :double then "\"#{cmd_str}\""
36
+ else cmd_str
37
+ end
38
+ end
39
+ end.join
40
+ end
41
+ end
42
+
43
+ Redirect = Struct.new(:type, :fd, :target) do
44
+ # type: :in, :out, :append, :heredoc, :clobber
45
+ # fd: file descriptor number (optional)
46
+ # target: Word or string
47
+ end
48
+
49
+ Command = Struct.new(:words, :redirects) do
50
+ # words: array of Word nodes
51
+ # redirects: array of Redirect nodes
52
+ def initialize(words = [], redirects = [])
53
+ super(words, redirects)
54
+ end
55
+ end
56
+
57
+ Pipeline = Struct.new(:commands, :negated) do
58
+ # commands: array of Command nodes
59
+ # negated: boolean (for ! pipeline)
60
+ def initialize(commands = [], negated = false)
61
+ super(commands, negated)
62
+ end
63
+ end
64
+
65
+ List = Struct.new(:left, :op, :right) do
66
+ # left/right: Command, Pipeline, or List
67
+ # op: :and, :or, :semi, :background
68
+ end
69
+
70
+ # Token for lexer
71
+ Token = Struct.new(:type, :value, :pos, :len)
72
+
73
+ class Lexer
74
+ OPERATORS = {
75
+ '&&' => :and_if,
76
+ '||' => :or_if,
77
+ ';;' => :dsemi,
78
+ '<<' => :dless,
79
+ '>>' => :dgreat,
80
+ '<&' => :lessand,
81
+ '>&' => :greatand,
82
+ '<>' => :lessgreat,
83
+ '<<-' => :dlessdash,
84
+ '>|' => :clobber,
85
+ '|' => :pipe,
86
+ '&' => :background,
87
+ ';' => :semi,
88
+ '<' => :less,
89
+ '>' => :great,
90
+ '(' => :lparen,
91
+ ')' => :rparen,
92
+ "\n" => :newline
93
+ }.freeze
94
+
95
+ def initialize(input)
96
+ @input = input
97
+ @pos = 0
98
+ @tokens = []
99
+ end
100
+
101
+ def tokenize
102
+ @tokens = []
103
+ @pos = 0
104
+
105
+ while @pos < @input.length
106
+ case
107
+ when whitespace?
108
+ skip_whitespace
109
+ when comment?
110
+ skip_comment
111
+ when operator?
112
+ scan_operator
113
+ else
114
+ scan_word
115
+ end
116
+ end
117
+
118
+ @tokens << Token.new(:eof, nil, @pos, 0)
119
+ @tokens
120
+ end
121
+
122
+ private
123
+
124
+ def current_char
125
+ @input[@pos]
126
+ end
127
+
128
+ def peek(offset = 1)
129
+ @input[@pos + offset]
130
+ end
131
+
132
+ def whitespace?
133
+ current_char =~ /[ \t\r]/
134
+ end
135
+
136
+ def skip_whitespace
137
+ @pos += 1 while @pos < @input.length && whitespace?
138
+ end
139
+
140
+ def comment?
141
+ current_char == '#'
142
+ end
143
+
144
+ def skip_comment
145
+ @pos += 1 while @pos < @input.length && current_char != "\n"
146
+ end
147
+
148
+ def operator?
149
+ OPERATORS.key?(current_char) || OPERATORS.key?(current_char + peek.to_s)
150
+ end
151
+
152
+ def scan_operator
153
+ start_pos = @pos
154
+
155
+ # Try two-character operators first
156
+ two_char = @input[@pos, 2]
157
+ if OPERATORS.key?(two_char)
158
+ @tokens << Token.new(OPERATORS[two_char], two_char, start_pos, 2)
159
+ @pos += 2
160
+ return
161
+ end
162
+
163
+ # Single-character operators
164
+ one_char = current_char
165
+ if OPERATORS.key?(one_char)
166
+ @tokens << Token.new(OPERATORS[one_char], one_char, start_pos, 1)
167
+ @pos += 1
168
+ end
169
+ end
170
+
171
+ def scan_word
172
+ start_pos = @pos
173
+ parts = []
174
+ literal_start = @pos
175
+ literal_buffer = ''
176
+
177
+ # Helper to flush literal buffer into parts
178
+ flush_literal = -> {
179
+ if !literal_buffer.empty?
180
+ parts << Literal.new(literal_buffer, literal_start, literal_buffer.length, :none)
181
+ literal_buffer = ''
182
+ end
183
+ }
184
+
185
+ while @pos < @input.length && !word_boundary?
186
+ case current_char
187
+ when "'"
188
+ flush_literal.call
189
+ parts << scan_single_quoted_part
190
+ when '"'
191
+ flush_literal.call
192
+ parts.concat(scan_double_quoted_parts)
193
+ when '\\'
194
+ if peek && peek != "\n"
195
+ @pos += 1
196
+ literal_buffer += current_char
197
+ @pos += 1
198
+ elsif peek == "\n"
199
+ @pos += 2 # line continuation
200
+ else
201
+ @pos += 1
202
+ end
203
+ when '$'
204
+ if peek == '('
205
+ flush_literal.call
206
+ parts << scan_command_substitution_part(:none)
207
+ elsif peek =~ /[a-zA-Z_{]/
208
+ flush_literal.call
209
+ parts << scan_variable_part(:none)
210
+ else
211
+ literal_buffer += current_char
212
+ @pos += 1
213
+ end
214
+ when '`'
215
+ flush_literal.call
216
+ parts << scan_backtick_substitution_part(:none)
217
+ else
218
+ if literal_buffer.empty?
219
+ literal_start = @pos
220
+ end
221
+ literal_buffer += current_char
222
+ @pos += 1
223
+ end
224
+ end
225
+
226
+ flush_literal.call
227
+
228
+ word = Word.new(parts, start_pos, @pos - start_pos)
229
+ @tokens << Token.new(:word, word, start_pos, @pos - start_pos)
230
+ end
231
+
232
+ def word_boundary?
233
+ return true if @pos >= @input.length
234
+ char = current_char
235
+ char =~ /[ \t\r\n]/ || OPERATORS.key?(char) || OPERATORS.key?(char + peek.to_s)
236
+ end
237
+
238
+ def scan_single_quoted_part
239
+ start_pos = @pos
240
+ @pos += 1 # skip opening '
241
+ content = ''
242
+
243
+ while @pos < @input.length && current_char != "'"
244
+ content += current_char
245
+ @pos += 1
246
+ end
247
+
248
+ @pos += 1 if current_char == "'" # skip closing '
249
+ Literal.new(content, start_pos, @pos - start_pos, :single)
250
+ end
251
+
252
+ def scan_double_quoted_parts
253
+ @pos += 1 # skip opening "
254
+ parts = []
255
+ literal_start = @pos
256
+ literal_buffer = ''
257
+
258
+ flush_literal = -> {
259
+ if !literal_buffer.empty?
260
+ parts << Literal.new(literal_buffer, literal_start, literal_buffer.length, :double)
261
+ literal_buffer = ''
262
+ end
263
+ }
264
+
265
+ while @pos < @input.length && current_char != '"'
266
+ case current_char
267
+ when '\\'
268
+ if peek =~ /["$`\\\n]/
269
+ @pos += 1
270
+ if current_char == "\n"
271
+ # line continuation - skip it
272
+ else
273
+ literal_buffer += current_char
274
+ end
275
+ @pos += 1
276
+ else
277
+ literal_buffer += current_char
278
+ @pos += 1
279
+ end
280
+ when '$'
281
+ if peek == '('
282
+ flush_literal.call
283
+ parts << scan_command_substitution_part(:double)
284
+ elsif peek =~ /[a-zA-Z_{]/
285
+ flush_literal.call
286
+ parts << scan_variable_part(:double)
287
+ else
288
+ literal_buffer += current_char
289
+ @pos += 1
290
+ end
291
+ when '`'
292
+ flush_literal.call
293
+ parts << scan_backtick_substitution_part(:double)
294
+ else
295
+ if literal_buffer.empty?
296
+ literal_start = @pos
297
+ end
298
+ literal_buffer += current_char
299
+ @pos += 1
300
+ end
301
+ end
302
+
303
+ flush_literal.call
304
+ @pos += 1 if current_char == '"' # skip closing "
305
+ parts
306
+ end
307
+
308
+ def scan_variable_part(quote_style)
309
+ start_pos = @pos
310
+ @pos += 1 # skip $
311
+ braced = false
312
+ name = ''
313
+
314
+ if current_char == '{'
315
+ braced = true
316
+ @pos += 1
317
+ while @pos < @input.length && current_char != '}'
318
+ name += current_char
319
+ @pos += 1
320
+ end
321
+ @pos += 1 if current_char == '}'
322
+ else
323
+ while @pos < @input.length && current_char =~ /[a-zA-Z0-9_]/
324
+ name += current_char
325
+ @pos += 1
326
+ end
327
+ end
328
+
329
+ Variable.new(name, start_pos, @pos - start_pos, braced, quote_style)
330
+ end
331
+
332
+ def scan_command_substitution_part(quote_style)
333
+ start_pos = @pos
334
+ @pos += 2 # skip $(
335
+ depth = 1
336
+ command = ''
337
+
338
+ while @pos < @input.length && depth > 0
339
+ if current_char == '$' && peek == '('
340
+ command += current_char
341
+ @pos += 1
342
+ command += current_char
343
+ @pos += 1
344
+ depth += 1
345
+ elsif current_char == ')'
346
+ if depth > 1
347
+ command += current_char
348
+ end
349
+ depth -= 1
350
+ @pos += 1
351
+ else
352
+ command += current_char
353
+ @pos += 1
354
+ end
355
+ end
356
+
357
+ CommandSub.new(command, start_pos, @pos - start_pos, :dollar, quote_style)
358
+ end
359
+
360
+ def scan_backtick_substitution_part(quote_style)
361
+ start_pos = @pos
362
+ @pos += 1 # skip opening `
363
+ command = ''
364
+
365
+ while @pos < @input.length && current_char != '`'
366
+ if current_char == '\\'
367
+ command += current_char
368
+ @pos += 1
369
+ command += current_char if @pos < @input.length
370
+ @pos += 1
371
+ else
372
+ command += current_char
373
+ @pos += 1
374
+ end
375
+ end
376
+
377
+ @pos += 1 if current_char == '`' # skip closing `
378
+ CommandSub.new(command, start_pos, @pos - start_pos, :backtick, quote_style)
379
+ end
380
+ end
381
+
382
+ class Parser
383
+ def initialize(tokens)
384
+ @tokens = tokens
385
+ @pos = 0
386
+ end
387
+
388
+ def parse
389
+ result = parse_list
390
+ expect(:eof)
391
+ result
392
+ end
393
+
394
+ private
395
+
396
+ def current_token
397
+ @tokens[@pos]
398
+ end
399
+
400
+ def peek_token(offset = 1)
401
+ @tokens[@pos + offset]
402
+ end
403
+
404
+ def advance
405
+ @pos += 1
406
+ end
407
+
408
+ def expect(type)
409
+ if current_token.type != type
410
+ raise "Expected #{type}, got #{current_token.type} at position #{current_token.pos}"
411
+ end
412
+ tok = current_token
413
+ advance
414
+ tok
415
+ end
416
+
417
+ def accept(type)
418
+ if current_token.type == type
419
+ tok = current_token
420
+ advance
421
+ tok
422
+ end
423
+ end
424
+
425
+ def parse_list
426
+ skip_newlines
427
+ left = parse_pipeline
428
+
429
+ while current_token.type =~ /^(and_if|or_if|semi|background)$/
430
+ op_token = current_token
431
+ advance
432
+ skip_newlines
433
+
434
+ op = case op_token.type
435
+ when :and_if then :and
436
+ when :or_if then :or
437
+ when :semi then :semi
438
+ when :background then :background
439
+ end
440
+
441
+ # For background and semi, right side may be empty
442
+ if current_token.type == :eof || current_token.type == :rparen
443
+ left = List.new(left, op, nil)
444
+ break
445
+ end
446
+
447
+ right = parse_pipeline
448
+ left = List.new(left, op, right)
449
+ end
450
+
451
+ left
452
+ end
453
+
454
+ def parse_pipeline
455
+ skip_newlines
456
+ commands = []
457
+
458
+ commands << parse_command
459
+
460
+ while accept(:pipe)
461
+ skip_newlines
462
+ commands << parse_command
463
+ end
464
+
465
+ commands.length == 1 ? commands[0] : Pipeline.new(commands)
466
+ end
467
+
468
+ def parse_command
469
+ skip_newlines
470
+ words = []
471
+ redirects = []
472
+
473
+ while current_token.type == :word || redirect_operator?
474
+ if redirect_operator?
475
+ redirects << parse_redirect
476
+ else
477
+ word_token = current_token
478
+ words << word_token.value # value is already a Word struct
479
+ advance
480
+ end
481
+ end
482
+
483
+ Command.new(words, redirects)
484
+ end
485
+
486
+ def redirect_operator?
487
+ current_token.type =~ /^(less|great|dless|dgreat|lessand|greatand|lessgreat|clobber)$/
488
+ end
489
+
490
+ def parse_redirect
491
+ op_token = current_token
492
+ advance
493
+
494
+ target_token = expect(:word)
495
+ target = target_token.value # value is already a Word struct
496
+
497
+ type = case op_token.type
498
+ when :less then :in
499
+ when :great then :out
500
+ when :dgreat then :append
501
+ when :dless then :heredoc
502
+ when :clobber then :clobber
503
+ when :lessand then :in_fd
504
+ when :greatand then :out_fd
505
+ when :lessgreat then :inout
506
+ end
507
+
508
+ Redirect.new(type, nil, target)
509
+ end
510
+
511
+ def skip_newlines
512
+ advance while current_token.type == :newline
513
+ end
514
+ end
515
+
516
+ # Main entry point
517
+ def self.parse(input)
518
+ lexer = Lexer.new(input)
519
+ tokens = lexer.tokenize
520
+ parser = Parser.new(tokens)
521
+ parser.parse
522
+ end
523
+ end
metadata ADDED
@@ -0,0 +1,85 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: shell_parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Vidar Hokstad
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2026-02-13 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: minitest
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '5.0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '5.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '13.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '13.0'
41
+ description: A compact Ruby parser for POSIX shell syntax with structured AST for
42
+ syntax highlighting and shell execution
43
+ email:
44
+ - vidar@hokstad.com
45
+ executables: []
46
+ extensions: []
47
+ extra_rdoc_files: []
48
+ files:
49
+ - CHANGELOG.md
50
+ - LICENSE.txt
51
+ - README.md
52
+ - examples/demo_simplified.rb
53
+ - examples/demo_structure.rb
54
+ - examples/examples.rb
55
+ - examples/test.rb
56
+ - examples/test_structure.rb
57
+ - lib/shell_parser.rb
58
+ - lib/shell_parser/version.rb
59
+ homepage: https://github.com/vidarh/shell-parser
60
+ licenses:
61
+ - MIT
62
+ metadata:
63
+ homepage_uri: https://github.com/vidarh/shell-parser
64
+ source_code_uri: https://github.com/vidarh/shell-parser
65
+ changelog_uri: https://github.com/vidarh/shell-parser/blob/master/CHANGELOG.md
66
+ post_install_message:
67
+ rdoc_options: []
68
+ require_paths:
69
+ - lib
70
+ required_ruby_version: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: 2.7.0
75
+ required_rubygems_version: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: '0'
80
+ requirements: []
81
+ rubygems_version: 3.4.10
82
+ signing_key:
83
+ specification_version: 4
84
+ summary: POSIX Shell Command Language Parser
85
+ test_files: []