learn_bash_from_session_data 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,623 @@
1
+ """
2
+ Bash Command Parser
3
+
4
+ Parses bash commands using shlex tokenization and regex patterns to extract
5
+ structural information like pipes, redirects, subshells, and variables.
6
+ """
7
+
8
+ import re
9
+ import shlex
10
+ from dataclasses import dataclass, field
11
+ from enum import Enum
12
+ from typing import Optional
13
+
14
+
15
+ class CommandCategory(Enum):
16
+ """Categories for bash commands based on their primary purpose."""
17
+ FILE_OPERATION = "file_operation"
18
+ DIRECTORY = "directory"
19
+ TEXT_PROCESSING = "text_processing"
20
+ SEARCH = "search"
21
+ VERSION_CONTROL = "version_control"
22
+ PACKAGE_MANAGEMENT = "package_management"
23
+ PROCESS_MANAGEMENT = "process_management"
24
+ NETWORK = "network"
25
+ SYSTEM_INFO = "system_info"
26
+ PERMISSION = "permission"
27
+ ARCHIVE = "archive"
28
+ ENVIRONMENT = "environment"
29
+ BUILD = "build"
30
+ TESTING = "testing"
31
+ DOCKER = "docker"
32
+ UNKNOWN = "unknown"
33
+
34
+
35
+ @dataclass
36
+ class ParsedCommand:
37
+ """Represents a fully parsed bash command with structural analysis."""
38
+ raw: str
39
+ description: str
40
+ base_commands: list[str] = field(default_factory=list)
41
+ flags: list[str] = field(default_factory=list)
42
+ pipes: list[str] = field(default_factory=list)
43
+ redirects: list[dict] = field(default_factory=list)
44
+ subshells: list[str] = field(default_factory=list)
45
+ variables: list[dict] = field(default_factory=list)
46
+ logical_ops: list[str] = field(default_factory=list)
47
+ output: str = ""
48
+ complexity_score: int = 0
49
+ category: CommandCategory = CommandCategory.UNKNOWN
50
+ arguments: list[str] = field(default_factory=list)
51
+ is_multiline: bool = False
52
+ has_heredoc: bool = False
53
+ parse_errors: list[str] = field(default_factory=list)
54
+
55
+
56
+ class BashParser:
57
+ """
58
+ Parser for bash commands that extracts structural information.
59
+
60
+ Uses shlex for tokenization and regex patterns for detecting
61
+ bash-specific constructs like pipes, redirects, and subshells.
62
+ """
63
+
64
+ # Command categorization mapping
65
+ COMMAND_CATEGORIES = {
66
+ # File operations
67
+ 'cat': CommandCategory.FILE_OPERATION,
68
+ 'head': CommandCategory.FILE_OPERATION,
69
+ 'tail': CommandCategory.FILE_OPERATION,
70
+ 'cp': CommandCategory.FILE_OPERATION,
71
+ 'mv': CommandCategory.FILE_OPERATION,
72
+ 'rm': CommandCategory.FILE_OPERATION,
73
+ 'touch': CommandCategory.FILE_OPERATION,
74
+ 'ln': CommandCategory.FILE_OPERATION,
75
+ 'file': CommandCategory.FILE_OPERATION,
76
+ 'stat': CommandCategory.FILE_OPERATION,
77
+ 'wc': CommandCategory.FILE_OPERATION,
78
+ 'diff': CommandCategory.FILE_OPERATION,
79
+ 'patch': CommandCategory.FILE_OPERATION,
80
+
81
+ # Directory operations
82
+ 'ls': CommandCategory.DIRECTORY,
83
+ 'cd': CommandCategory.DIRECTORY,
84
+ 'pwd': CommandCategory.DIRECTORY,
85
+ 'mkdir': CommandCategory.DIRECTORY,
86
+ 'rmdir': CommandCategory.DIRECTORY,
87
+ 'tree': CommandCategory.DIRECTORY,
88
+ 'find': CommandCategory.DIRECTORY,
89
+ 'locate': CommandCategory.DIRECTORY,
90
+
91
+ # Text processing
92
+ 'grep': CommandCategory.TEXT_PROCESSING,
93
+ 'sed': CommandCategory.TEXT_PROCESSING,
94
+ 'awk': CommandCategory.TEXT_PROCESSING,
95
+ 'cut': CommandCategory.TEXT_PROCESSING,
96
+ 'sort': CommandCategory.TEXT_PROCESSING,
97
+ 'uniq': CommandCategory.TEXT_PROCESSING,
98
+ 'tr': CommandCategory.TEXT_PROCESSING,
99
+ 'xargs': CommandCategory.TEXT_PROCESSING,
100
+ 'tee': CommandCategory.TEXT_PROCESSING,
101
+ 'paste': CommandCategory.TEXT_PROCESSING,
102
+ 'column': CommandCategory.TEXT_PROCESSING,
103
+ 'jq': CommandCategory.TEXT_PROCESSING,
104
+ 'yq': CommandCategory.TEXT_PROCESSING,
105
+
106
+ # Search
107
+ 'rg': CommandCategory.SEARCH,
108
+ 'ag': CommandCategory.SEARCH,
109
+ 'fzf': CommandCategory.SEARCH,
110
+ 'fd': CommandCategory.SEARCH,
111
+
112
+ # Version control
113
+ 'git': CommandCategory.VERSION_CONTROL,
114
+ 'gh': CommandCategory.VERSION_CONTROL,
115
+ 'svn': CommandCategory.VERSION_CONTROL,
116
+ 'hg': CommandCategory.VERSION_CONTROL,
117
+
118
+ # Package management
119
+ 'npm': CommandCategory.PACKAGE_MANAGEMENT,
120
+ 'npx': CommandCategory.PACKAGE_MANAGEMENT,
121
+ 'yarn': CommandCategory.PACKAGE_MANAGEMENT,
122
+ 'pnpm': CommandCategory.PACKAGE_MANAGEMENT,
123
+ 'pip': CommandCategory.PACKAGE_MANAGEMENT,
124
+ 'pip3': CommandCategory.PACKAGE_MANAGEMENT,
125
+ 'pipx': CommandCategory.PACKAGE_MANAGEMENT,
126
+ 'apt': CommandCategory.PACKAGE_MANAGEMENT,
127
+ 'apt-get': CommandCategory.PACKAGE_MANAGEMENT,
128
+ 'brew': CommandCategory.PACKAGE_MANAGEMENT,
129
+ 'cargo': CommandCategory.PACKAGE_MANAGEMENT,
130
+ 'go': CommandCategory.PACKAGE_MANAGEMENT,
131
+
132
+ # Process management
133
+ 'ps': CommandCategory.PROCESS_MANAGEMENT,
134
+ 'top': CommandCategory.PROCESS_MANAGEMENT,
135
+ 'htop': CommandCategory.PROCESS_MANAGEMENT,
136
+ 'kill': CommandCategory.PROCESS_MANAGEMENT,
137
+ 'pkill': CommandCategory.PROCESS_MANAGEMENT,
138
+ 'pgrep': CommandCategory.PROCESS_MANAGEMENT,
139
+ 'bg': CommandCategory.PROCESS_MANAGEMENT,
140
+ 'fg': CommandCategory.PROCESS_MANAGEMENT,
141
+ 'jobs': CommandCategory.PROCESS_MANAGEMENT,
142
+ 'nohup': CommandCategory.PROCESS_MANAGEMENT,
143
+ 'timeout': CommandCategory.PROCESS_MANAGEMENT,
144
+ 'watch': CommandCategory.PROCESS_MANAGEMENT,
145
+
146
+ # Network
147
+ 'curl': CommandCategory.NETWORK,
148
+ 'wget': CommandCategory.NETWORK,
149
+ 'ssh': CommandCategory.NETWORK,
150
+ 'scp': CommandCategory.NETWORK,
151
+ 'rsync': CommandCategory.NETWORK,
152
+ 'ping': CommandCategory.NETWORK,
153
+ 'netstat': CommandCategory.NETWORK,
154
+ 'nc': CommandCategory.NETWORK,
155
+ 'nmap': CommandCategory.NETWORK,
156
+ 'ifconfig': CommandCategory.NETWORK,
157
+ 'ip': CommandCategory.NETWORK,
158
+
159
+ # System info
160
+ 'uname': CommandCategory.SYSTEM_INFO,
161
+ 'whoami': CommandCategory.SYSTEM_INFO,
162
+ 'hostname': CommandCategory.SYSTEM_INFO,
163
+ 'df': CommandCategory.SYSTEM_INFO,
164
+ 'du': CommandCategory.SYSTEM_INFO,
165
+ 'free': CommandCategory.SYSTEM_INFO,
166
+ 'uptime': CommandCategory.SYSTEM_INFO,
167
+ 'date': CommandCategory.SYSTEM_INFO,
168
+ 'cal': CommandCategory.SYSTEM_INFO,
169
+ 'env': CommandCategory.SYSTEM_INFO,
170
+ 'printenv': CommandCategory.SYSTEM_INFO,
171
+ 'which': CommandCategory.SYSTEM_INFO,
172
+ 'whereis': CommandCategory.SYSTEM_INFO,
173
+ 'type': CommandCategory.SYSTEM_INFO,
174
+ 'man': CommandCategory.SYSTEM_INFO,
175
+ 'help': CommandCategory.SYSTEM_INFO,
176
+
177
+ # Permissions
178
+ 'chmod': CommandCategory.PERMISSION,
179
+ 'chown': CommandCategory.PERMISSION,
180
+ 'chgrp': CommandCategory.PERMISSION,
181
+ 'sudo': CommandCategory.PERMISSION,
182
+ 'su': CommandCategory.PERMISSION,
183
+
184
+ # Archive
185
+ 'tar': CommandCategory.ARCHIVE,
186
+ 'zip': CommandCategory.ARCHIVE,
187
+ 'unzip': CommandCategory.ARCHIVE,
188
+ 'gzip': CommandCategory.ARCHIVE,
189
+ 'gunzip': CommandCategory.ARCHIVE,
190
+ 'bzip2': CommandCategory.ARCHIVE,
191
+ 'xz': CommandCategory.ARCHIVE,
192
+ '7z': CommandCategory.ARCHIVE,
193
+
194
+ # Environment
195
+ 'export': CommandCategory.ENVIRONMENT,
196
+ 'source': CommandCategory.ENVIRONMENT,
197
+ 'alias': CommandCategory.ENVIRONMENT,
198
+ 'unalias': CommandCategory.ENVIRONMENT,
199
+ 'set': CommandCategory.ENVIRONMENT,
200
+ 'unset': CommandCategory.ENVIRONMENT,
201
+ 'eval': CommandCategory.ENVIRONMENT,
202
+
203
+ # Build
204
+ 'make': CommandCategory.BUILD,
205
+ 'cmake': CommandCategory.BUILD,
206
+ 'gcc': CommandCategory.BUILD,
207
+ 'g++': CommandCategory.BUILD,
208
+ 'clang': CommandCategory.BUILD,
209
+ 'rustc': CommandCategory.BUILD,
210
+ 'tsc': CommandCategory.BUILD,
211
+ 'node': CommandCategory.BUILD,
212
+ 'python': CommandCategory.BUILD,
213
+ 'python3': CommandCategory.BUILD,
214
+ 'ruby': CommandCategory.BUILD,
215
+
216
+ # Testing
217
+ 'pytest': CommandCategory.TESTING,
218
+ 'jest': CommandCategory.TESTING,
219
+ 'mocha': CommandCategory.TESTING,
220
+ 'vitest': CommandCategory.TESTING,
221
+ 'test': CommandCategory.TESTING,
222
+
223
+ # Docker
224
+ 'docker': CommandCategory.DOCKER,
225
+ 'docker-compose': CommandCategory.DOCKER,
226
+ 'podman': CommandCategory.DOCKER,
227
+ 'kubectl': CommandCategory.DOCKER,
228
+ }
229
+
230
+ # Regex patterns for bash constructs
231
+ PIPE_PATTERN = re.compile(r'(?<![|])\|(?![|])')
232
+ REDIRECT_PATTERN = re.compile(
233
+ r'(\d*)(>>|>&|&>|2>&1|2>|>|<)'
234
+ r'\s*([^\s&|;<>]+)?'
235
+ )
236
+ SUBSHELL_DOLLAR_PATTERN = re.compile(r'\$\(([^)]+)\)')
237
+ SUBSHELL_BACKTICK_PATTERN = re.compile(r'`([^`]+)`')
238
+ VARIABLE_ASSIGN_PATTERN = re.compile(r'^([A-Za-z_][A-Za-z0-9_]*)=(.*)$')
239
+ VARIABLE_REF_PATTERN = re.compile(r'\$\{?([A-Za-z_][A-Za-z0-9_]*)\}?')
240
+ LOGICAL_AND_PATTERN = re.compile(r'&&')
241
+ LOGICAL_OR_PATTERN = re.compile(r'\|\|')
242
+ HEREDOC_PATTERN = re.compile(r'<<-?\s*[\'"]?(\w+)[\'"]?')
243
+ FLAG_PATTERN = re.compile(r'^-{1,2}[A-Za-z0-9][-A-Za-z0-9_=]*$')
244
+
245
+ def __init__(self):
246
+ """Initialize the parser."""
247
+ pass
248
+
249
+ def parse(self, command: str, description: str = "", output: str = "") -> ParsedCommand:
250
+ """
251
+ Parse a bash command into structural components.
252
+
253
+ Args:
254
+ command: The raw bash command string
255
+ description: Optional description of the command
256
+ output: Optional output from command execution
257
+
258
+ Returns:
259
+ ParsedCommand object with extracted structural information
260
+ """
261
+ result = ParsedCommand(
262
+ raw=command,
263
+ description=description,
264
+ output=output
265
+ )
266
+
267
+ # Check for multiline and heredoc
268
+ result.is_multiline = '\n' in command or '\\' in command
269
+ result.has_heredoc = bool(self.HEREDOC_PATTERN.search(command))
270
+
271
+ # Extract subshells first (before tokenization might fail on them)
272
+ result.subshells = self._extract_subshells(command)
273
+
274
+ # Extract redirects
275
+ result.redirects = self._extract_redirects(command)
276
+
277
+ # Extract variable assignments and references
278
+ result.variables = self._extract_variables(command)
279
+
280
+ # Extract logical operators
281
+ result.logical_ops = self._extract_logical_ops(command)
282
+
283
+ # Extract pipes and their commands
284
+ result.pipes = self._extract_pipes(command)
285
+
286
+ # Tokenize and extract base commands, flags, and arguments
287
+ self._tokenize_and_extract(command, result)
288
+
289
+ # Categorize the command
290
+ result.category = self._categorize(result)
291
+
292
+ # Calculate complexity score
293
+ result.complexity_score = self._calculate_complexity(result)
294
+
295
+ return result
296
+
297
+ def _extract_subshells(self, command: str) -> list[str]:
298
+ """Extract subshell expressions from command."""
299
+ subshells = []
300
+
301
+ # Find $(...) subshells
302
+ for match in self.SUBSHELL_DOLLAR_PATTERN.finditer(command):
303
+ subshells.append(match.group(1))
304
+
305
+ # Find `...` subshells
306
+ for match in self.SUBSHELL_BACKTICK_PATTERN.finditer(command):
307
+ subshells.append(match.group(1))
308
+
309
+ return subshells
310
+
311
+ def _extract_redirects(self, command: str) -> list[dict]:
312
+ """Extract redirect operations from command."""
313
+ redirects = []
314
+
315
+ for match in self.REDIRECT_PATTERN.finditer(command):
316
+ fd = match.group(1) or ''
317
+ operator = match.group(2)
318
+ target = match.group(3) or ''
319
+
320
+ redirect_type = 'unknown'
321
+ if operator in ('>', '>>'):
322
+ redirect_type = 'stdout'
323
+ elif operator == '2>':
324
+ redirect_type = 'stderr'
325
+ elif operator in ('>&', '&>', '2>&1'):
326
+ redirect_type = 'both'
327
+ elif operator == '<':
328
+ redirect_type = 'stdin'
329
+
330
+ redirects.append({
331
+ 'fd': fd,
332
+ 'operator': operator,
333
+ 'target': target,
334
+ 'type': redirect_type
335
+ })
336
+
337
+ return redirects
338
+
339
+ def _extract_variables(self, command: str) -> list[dict]:
340
+ """Extract variable assignments and references from command."""
341
+ variables = []
342
+ seen_assignments = set()
343
+ seen_references = set()
344
+
345
+ # Split by logical operators and pipes to find assignments
346
+ segments = re.split(r'[|&;]', command)
347
+
348
+ for segment in segments:
349
+ segment = segment.strip()
350
+ # Check for variable assignment at start of segment
351
+ match = self.VARIABLE_ASSIGN_PATTERN.match(segment)
352
+ if match:
353
+ var_name = match.group(1)
354
+ var_value = match.group(2)
355
+ if var_name not in seen_assignments:
356
+ variables.append({
357
+ 'name': var_name,
358
+ 'value': var_value,
359
+ 'type': 'assignment'
360
+ })
361
+ seen_assignments.add(var_name)
362
+
363
+ # Find variable references
364
+ for match in self.VARIABLE_REF_PATTERN.finditer(command):
365
+ var_name = match.group(1)
366
+ if var_name not in seen_references and var_name not in seen_assignments:
367
+ variables.append({
368
+ 'name': var_name,
369
+ 'type': 'reference'
370
+ })
371
+ seen_references.add(var_name)
372
+
373
+ return variables
374
+
375
+ def _extract_logical_ops(self, command: str) -> list[str]:
376
+ """Extract logical operators (&&, ||) from command."""
377
+ ops = []
378
+
379
+ for match in self.LOGICAL_AND_PATTERN.finditer(command):
380
+ ops.append('&&')
381
+
382
+ for match in self.LOGICAL_OR_PATTERN.finditer(command):
383
+ ops.append('||')
384
+
385
+ return ops
386
+
387
+ def _extract_pipes(self, command: str) -> list[str]:
388
+ """Extract piped command segments."""
389
+ # Remove subshells temporarily to avoid false positives
390
+ temp_cmd = self.SUBSHELL_DOLLAR_PATTERN.sub('__SUBSHELL__', command)
391
+ temp_cmd = self.SUBSHELL_BACKTICK_PATTERN.sub('__SUBSHELL__', temp_cmd)
392
+
393
+ # Split by single pipes (not ||)
394
+ segments = self.PIPE_PATTERN.split(temp_cmd)
395
+
396
+ if len(segments) <= 1:
397
+ return []
398
+
399
+ # Clean up segments
400
+ pipes = []
401
+ for seg in segments:
402
+ seg = seg.strip()
403
+ if seg and seg != '__SUBSHELL__':
404
+ pipes.append(seg)
405
+
406
+ return pipes
407
+
408
+ def _tokenize_and_extract(self, command: str, result: ParsedCommand) -> None:
409
+ """
410
+ Tokenize command and extract base commands, flags, and arguments.
411
+
412
+ Uses shlex for safe tokenization, with fallback for unparseable commands.
413
+ """
414
+ # Prepare command for tokenization
415
+ # Remove heredocs which break shlex
416
+ tokenize_cmd = self.HEREDOC_PATTERN.sub('', command)
417
+
418
+ # Replace subshells with placeholders
419
+ tokenize_cmd = self.SUBSHELL_DOLLAR_PATTERN.sub('__SUBSHELL__', tokenize_cmd)
420
+ tokenize_cmd = self.SUBSHELL_BACKTICK_PATTERN.sub('__SUBSHELL__', tokenize_cmd)
421
+
422
+ try:
423
+ # Use shlex for tokenization
424
+ lexer = shlex.shlex(tokenize_cmd, posix=True)
425
+ lexer.whitespace_split = True
426
+ lexer.commenters = '' # Don't treat # as comment for first pass
427
+
428
+ tokens = list(lexer)
429
+ except ValueError as e:
430
+ # shlex couldn't parse (unclosed quotes, etc.)
431
+ result.parse_errors.append(f"Tokenization error: {e}")
432
+ # Fallback: simple split
433
+ tokens = tokenize_cmd.split()
434
+
435
+ # Process tokens
436
+ base_commands_set = set()
437
+ in_command_position = True
438
+ skip_next = False
439
+
440
+ for i, token in enumerate(tokens):
441
+ if skip_next:
442
+ skip_next = False
443
+ continue
444
+
445
+ # Skip operators
446
+ if token in ('&&', '||', '|', ';', '&'):
447
+ in_command_position = True
448
+ continue
449
+
450
+ # Skip redirects
451
+ if token in ('>', '>>', '<', '2>', '2>&1', '>&', '&>'):
452
+ skip_next = True
453
+ continue
454
+
455
+ # Skip redirect targets
456
+ if i > 0 and tokens[i-1] in ('>', '>>', '<', '2>', '>&', '&>'):
457
+ continue
458
+
459
+ # Skip placeholders
460
+ if token == '__SUBSHELL__':
461
+ continue
462
+
463
+ # Check for variable assignment
464
+ if '=' in token and not token.startswith('-'):
465
+ match = self.VARIABLE_ASSIGN_PATTERN.match(token)
466
+ if match:
467
+ continue
468
+
469
+ # Check if it's a flag
470
+ if self.FLAG_PATTERN.match(token):
471
+ result.flags.append(token)
472
+ continue
473
+
474
+ # Check if it's a base command
475
+ if in_command_position and not token.startswith('/'):
476
+ # Handle path-prefixed commands
477
+ cmd_name = token.split('/')[-1] if '/' in token else token
478
+ base_commands_set.add(cmd_name)
479
+ in_command_position = False
480
+ else:
481
+ # It's an argument
482
+ if not token.startswith('-'):
483
+ result.arguments.append(token)
484
+
485
+ result.base_commands = list(base_commands_set)
486
+
487
+ def _categorize(self, result: ParsedCommand) -> CommandCategory:
488
+ """Determine the category of the command based on base commands."""
489
+ for cmd in result.base_commands:
490
+ if cmd in self.COMMAND_CATEGORIES:
491
+ return self.COMMAND_CATEGORIES[cmd]
492
+
493
+ return CommandCategory.UNKNOWN
494
+
495
+ def _calculate_complexity(self, result: ParsedCommand) -> int:
496
+ """
497
+ Calculate a complexity score for the command.
498
+
499
+ Higher scores indicate more complex commands.
500
+ """
501
+ score = 0
502
+
503
+ # Base complexity
504
+ score += len(result.base_commands)
505
+
506
+ # Flags add complexity
507
+ score += len(result.flags) * 0.5
508
+
509
+ # Pipes add significant complexity
510
+ score += len(result.pipes) * 2
511
+
512
+ # Redirects add moderate complexity
513
+ score += len(result.redirects) * 1.5
514
+
515
+ # Subshells add significant complexity
516
+ score += len(result.subshells) * 3
517
+
518
+ # Logical operators add complexity
519
+ score += len(result.logical_ops) * 1.5
520
+
521
+ # Variables add some complexity
522
+ score += len(result.variables)
523
+
524
+ # Multiline commands are more complex
525
+ if result.is_multiline:
526
+ score += 2
527
+
528
+ # Heredocs are complex
529
+ if result.has_heredoc:
530
+ score += 3
531
+
532
+ # Arguments add minor complexity
533
+ score += len(result.arguments) * 0.25
534
+
535
+ return int(round(score))
536
+
537
+ def parse_batch(
538
+ self,
539
+ commands: list[tuple[str, str, str]]
540
+ ) -> list[ParsedCommand]:
541
+ """
542
+ Parse multiple commands.
543
+
544
+ Args:
545
+ commands: List of (command, description, output) tuples
546
+
547
+ Returns:
548
+ List of ParsedCommand objects
549
+ """
550
+ return [
551
+ self.parse(cmd, desc, out)
552
+ for cmd, desc, out in commands
553
+ ]
554
+
555
+
556
+ def parse_command(
557
+ command: str,
558
+ description: str = "",
559
+ output: str = ""
560
+ ) -> ParsedCommand:
561
+ """
562
+ Convenience function to parse a single bash command.
563
+
564
+ Args:
565
+ command: The raw bash command string
566
+ description: Optional description
567
+ output: Optional command output
568
+
569
+ Returns:
570
+ ParsedCommand object
571
+ """
572
+ parser = BashParser()
573
+ return parser.parse(command, description, output)
574
+
575
+
576
+ def parse_commands(
577
+ commands: list[tuple[str, str, str]]
578
+ ) -> list[ParsedCommand]:
579
+ """
580
+ Convenience function to parse multiple bash commands.
581
+
582
+ Args:
583
+ commands: List of (command, description, output) tuples
584
+
585
+ Returns:
586
+ List of ParsedCommand objects
587
+ """
588
+ parser = BashParser()
589
+ return parser.parse_batch(commands)
590
+
591
+
592
+ if __name__ == "__main__":
593
+ # Example usage and testing
594
+ test_commands = [
595
+ ("ls -la /tmp", "List files in tmp", ""),
596
+ ("cat file.txt | grep 'pattern' | sort -u", "Search and sort", ""),
597
+ ("git status && git add . && git commit -m 'test'", "Git workflow", ""),
598
+ ("export FOO=bar && echo $FOO", "Set and use variable", "bar"),
599
+ ("find . -name '*.py' -exec grep -l 'import' {} \\;", "Find Python imports", ""),
600
+ ("docker run -d --name test -p 8080:80 nginx:latest", "Run Docker container", ""),
601
+ ("curl -s https://api.example.com | jq '.data[]'", "API request with jq", ""),
602
+ ("cat <<EOF > output.txt\nline1\nline2\nEOF", "Heredoc example", ""),
603
+ ("VAR=$(echo 'hello' | tr 'a-z' 'A-Z')", "Command substitution", ""),
604
+ ("npm install && npm test 2>&1 | tee test.log", "Complex build", ""),
605
+ ]
606
+
607
+ parser = BashParser()
608
+
609
+ for cmd, desc, output in test_commands:
610
+ result = parser.parse(cmd, desc, output)
611
+ print(f"\n{'='*60}")
612
+ print(f"Raw: {result.raw}")
613
+ print(f"Category: {result.category.value}")
614
+ print(f"Base commands: {result.base_commands}")
615
+ print(f"Flags: {result.flags}")
616
+ print(f"Pipes: {len(result.pipes)} segments")
617
+ print(f"Redirects: {result.redirects}")
618
+ print(f"Subshells: {result.subshells}")
619
+ print(f"Variables: {result.variables}")
620
+ print(f"Logical ops: {result.logical_ops}")
621
+ print(f"Complexity: {result.complexity_score}")
622
+ if result.parse_errors:
623
+ print(f"Parse errors: {result.parse_errors}")