learn_bash_from_session_data 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +45 -0
- package/bin/learn-bash.js +328 -0
- package/package.json +23 -0
- package/scripts/__init__.py +34 -0
- package/scripts/analyzer.py +591 -0
- package/scripts/extractor.py +411 -0
- package/scripts/html_generator.py +2029 -0
- package/scripts/knowledge_base.py +1593 -0
- package/scripts/main.py +443 -0
- package/scripts/parser.py +623 -0
- package/scripts/quiz_generator.py +1080 -0
|
@@ -0,0 +1,623 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Bash Command Parser
|
|
3
|
+
|
|
4
|
+
Parses bash commands using shlex tokenization and regex patterns to extract
|
|
5
|
+
structural information like pipes, redirects, subshells, and variables.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import re
|
|
9
|
+
import shlex
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from enum import Enum
|
|
12
|
+
from typing import Optional
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class CommandCategory(Enum):
|
|
16
|
+
"""Categories for bash commands based on their primary purpose."""
|
|
17
|
+
FILE_OPERATION = "file_operation"
|
|
18
|
+
DIRECTORY = "directory"
|
|
19
|
+
TEXT_PROCESSING = "text_processing"
|
|
20
|
+
SEARCH = "search"
|
|
21
|
+
VERSION_CONTROL = "version_control"
|
|
22
|
+
PACKAGE_MANAGEMENT = "package_management"
|
|
23
|
+
PROCESS_MANAGEMENT = "process_management"
|
|
24
|
+
NETWORK = "network"
|
|
25
|
+
SYSTEM_INFO = "system_info"
|
|
26
|
+
PERMISSION = "permission"
|
|
27
|
+
ARCHIVE = "archive"
|
|
28
|
+
ENVIRONMENT = "environment"
|
|
29
|
+
BUILD = "build"
|
|
30
|
+
TESTING = "testing"
|
|
31
|
+
DOCKER = "docker"
|
|
32
|
+
UNKNOWN = "unknown"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class ParsedCommand:
|
|
37
|
+
"""Represents a fully parsed bash command with structural analysis."""
|
|
38
|
+
raw: str
|
|
39
|
+
description: str
|
|
40
|
+
base_commands: list[str] = field(default_factory=list)
|
|
41
|
+
flags: list[str] = field(default_factory=list)
|
|
42
|
+
pipes: list[str] = field(default_factory=list)
|
|
43
|
+
redirects: list[dict] = field(default_factory=list)
|
|
44
|
+
subshells: list[str] = field(default_factory=list)
|
|
45
|
+
variables: list[dict] = field(default_factory=list)
|
|
46
|
+
logical_ops: list[str] = field(default_factory=list)
|
|
47
|
+
output: str = ""
|
|
48
|
+
complexity_score: int = 0
|
|
49
|
+
category: CommandCategory = CommandCategory.UNKNOWN
|
|
50
|
+
arguments: list[str] = field(default_factory=list)
|
|
51
|
+
is_multiline: bool = False
|
|
52
|
+
has_heredoc: bool = False
|
|
53
|
+
parse_errors: list[str] = field(default_factory=list)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class BashParser:
|
|
57
|
+
"""
|
|
58
|
+
Parser for bash commands that extracts structural information.
|
|
59
|
+
|
|
60
|
+
Uses shlex for tokenization and regex patterns for detecting
|
|
61
|
+
bash-specific constructs like pipes, redirects, and subshells.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
# Command categorization mapping
|
|
65
|
+
COMMAND_CATEGORIES = {
|
|
66
|
+
# File operations
|
|
67
|
+
'cat': CommandCategory.FILE_OPERATION,
|
|
68
|
+
'head': CommandCategory.FILE_OPERATION,
|
|
69
|
+
'tail': CommandCategory.FILE_OPERATION,
|
|
70
|
+
'cp': CommandCategory.FILE_OPERATION,
|
|
71
|
+
'mv': CommandCategory.FILE_OPERATION,
|
|
72
|
+
'rm': CommandCategory.FILE_OPERATION,
|
|
73
|
+
'touch': CommandCategory.FILE_OPERATION,
|
|
74
|
+
'ln': CommandCategory.FILE_OPERATION,
|
|
75
|
+
'file': CommandCategory.FILE_OPERATION,
|
|
76
|
+
'stat': CommandCategory.FILE_OPERATION,
|
|
77
|
+
'wc': CommandCategory.FILE_OPERATION,
|
|
78
|
+
'diff': CommandCategory.FILE_OPERATION,
|
|
79
|
+
'patch': CommandCategory.FILE_OPERATION,
|
|
80
|
+
|
|
81
|
+
# Directory operations
|
|
82
|
+
'ls': CommandCategory.DIRECTORY,
|
|
83
|
+
'cd': CommandCategory.DIRECTORY,
|
|
84
|
+
'pwd': CommandCategory.DIRECTORY,
|
|
85
|
+
'mkdir': CommandCategory.DIRECTORY,
|
|
86
|
+
'rmdir': CommandCategory.DIRECTORY,
|
|
87
|
+
'tree': CommandCategory.DIRECTORY,
|
|
88
|
+
'find': CommandCategory.DIRECTORY,
|
|
89
|
+
'locate': CommandCategory.DIRECTORY,
|
|
90
|
+
|
|
91
|
+
# Text processing
|
|
92
|
+
'grep': CommandCategory.TEXT_PROCESSING,
|
|
93
|
+
'sed': CommandCategory.TEXT_PROCESSING,
|
|
94
|
+
'awk': CommandCategory.TEXT_PROCESSING,
|
|
95
|
+
'cut': CommandCategory.TEXT_PROCESSING,
|
|
96
|
+
'sort': CommandCategory.TEXT_PROCESSING,
|
|
97
|
+
'uniq': CommandCategory.TEXT_PROCESSING,
|
|
98
|
+
'tr': CommandCategory.TEXT_PROCESSING,
|
|
99
|
+
'xargs': CommandCategory.TEXT_PROCESSING,
|
|
100
|
+
'tee': CommandCategory.TEXT_PROCESSING,
|
|
101
|
+
'paste': CommandCategory.TEXT_PROCESSING,
|
|
102
|
+
'column': CommandCategory.TEXT_PROCESSING,
|
|
103
|
+
'jq': CommandCategory.TEXT_PROCESSING,
|
|
104
|
+
'yq': CommandCategory.TEXT_PROCESSING,
|
|
105
|
+
|
|
106
|
+
# Search
|
|
107
|
+
'rg': CommandCategory.SEARCH,
|
|
108
|
+
'ag': CommandCategory.SEARCH,
|
|
109
|
+
'fzf': CommandCategory.SEARCH,
|
|
110
|
+
'fd': CommandCategory.SEARCH,
|
|
111
|
+
|
|
112
|
+
# Version control
|
|
113
|
+
'git': CommandCategory.VERSION_CONTROL,
|
|
114
|
+
'gh': CommandCategory.VERSION_CONTROL,
|
|
115
|
+
'svn': CommandCategory.VERSION_CONTROL,
|
|
116
|
+
'hg': CommandCategory.VERSION_CONTROL,
|
|
117
|
+
|
|
118
|
+
# Package management
|
|
119
|
+
'npm': CommandCategory.PACKAGE_MANAGEMENT,
|
|
120
|
+
'npx': CommandCategory.PACKAGE_MANAGEMENT,
|
|
121
|
+
'yarn': CommandCategory.PACKAGE_MANAGEMENT,
|
|
122
|
+
'pnpm': CommandCategory.PACKAGE_MANAGEMENT,
|
|
123
|
+
'pip': CommandCategory.PACKAGE_MANAGEMENT,
|
|
124
|
+
'pip3': CommandCategory.PACKAGE_MANAGEMENT,
|
|
125
|
+
'pipx': CommandCategory.PACKAGE_MANAGEMENT,
|
|
126
|
+
'apt': CommandCategory.PACKAGE_MANAGEMENT,
|
|
127
|
+
'apt-get': CommandCategory.PACKAGE_MANAGEMENT,
|
|
128
|
+
'brew': CommandCategory.PACKAGE_MANAGEMENT,
|
|
129
|
+
'cargo': CommandCategory.PACKAGE_MANAGEMENT,
|
|
130
|
+
'go': CommandCategory.PACKAGE_MANAGEMENT,
|
|
131
|
+
|
|
132
|
+
# Process management
|
|
133
|
+
'ps': CommandCategory.PROCESS_MANAGEMENT,
|
|
134
|
+
'top': CommandCategory.PROCESS_MANAGEMENT,
|
|
135
|
+
'htop': CommandCategory.PROCESS_MANAGEMENT,
|
|
136
|
+
'kill': CommandCategory.PROCESS_MANAGEMENT,
|
|
137
|
+
'pkill': CommandCategory.PROCESS_MANAGEMENT,
|
|
138
|
+
'pgrep': CommandCategory.PROCESS_MANAGEMENT,
|
|
139
|
+
'bg': CommandCategory.PROCESS_MANAGEMENT,
|
|
140
|
+
'fg': CommandCategory.PROCESS_MANAGEMENT,
|
|
141
|
+
'jobs': CommandCategory.PROCESS_MANAGEMENT,
|
|
142
|
+
'nohup': CommandCategory.PROCESS_MANAGEMENT,
|
|
143
|
+
'timeout': CommandCategory.PROCESS_MANAGEMENT,
|
|
144
|
+
'watch': CommandCategory.PROCESS_MANAGEMENT,
|
|
145
|
+
|
|
146
|
+
# Network
|
|
147
|
+
'curl': CommandCategory.NETWORK,
|
|
148
|
+
'wget': CommandCategory.NETWORK,
|
|
149
|
+
'ssh': CommandCategory.NETWORK,
|
|
150
|
+
'scp': CommandCategory.NETWORK,
|
|
151
|
+
'rsync': CommandCategory.NETWORK,
|
|
152
|
+
'ping': CommandCategory.NETWORK,
|
|
153
|
+
'netstat': CommandCategory.NETWORK,
|
|
154
|
+
'nc': CommandCategory.NETWORK,
|
|
155
|
+
'nmap': CommandCategory.NETWORK,
|
|
156
|
+
'ifconfig': CommandCategory.NETWORK,
|
|
157
|
+
'ip': CommandCategory.NETWORK,
|
|
158
|
+
|
|
159
|
+
# System info
|
|
160
|
+
'uname': CommandCategory.SYSTEM_INFO,
|
|
161
|
+
'whoami': CommandCategory.SYSTEM_INFO,
|
|
162
|
+
'hostname': CommandCategory.SYSTEM_INFO,
|
|
163
|
+
'df': CommandCategory.SYSTEM_INFO,
|
|
164
|
+
'du': CommandCategory.SYSTEM_INFO,
|
|
165
|
+
'free': CommandCategory.SYSTEM_INFO,
|
|
166
|
+
'uptime': CommandCategory.SYSTEM_INFO,
|
|
167
|
+
'date': CommandCategory.SYSTEM_INFO,
|
|
168
|
+
'cal': CommandCategory.SYSTEM_INFO,
|
|
169
|
+
'env': CommandCategory.SYSTEM_INFO,
|
|
170
|
+
'printenv': CommandCategory.SYSTEM_INFO,
|
|
171
|
+
'which': CommandCategory.SYSTEM_INFO,
|
|
172
|
+
'whereis': CommandCategory.SYSTEM_INFO,
|
|
173
|
+
'type': CommandCategory.SYSTEM_INFO,
|
|
174
|
+
'man': CommandCategory.SYSTEM_INFO,
|
|
175
|
+
'help': CommandCategory.SYSTEM_INFO,
|
|
176
|
+
|
|
177
|
+
# Permissions
|
|
178
|
+
'chmod': CommandCategory.PERMISSION,
|
|
179
|
+
'chown': CommandCategory.PERMISSION,
|
|
180
|
+
'chgrp': CommandCategory.PERMISSION,
|
|
181
|
+
'sudo': CommandCategory.PERMISSION,
|
|
182
|
+
'su': CommandCategory.PERMISSION,
|
|
183
|
+
|
|
184
|
+
# Archive
|
|
185
|
+
'tar': CommandCategory.ARCHIVE,
|
|
186
|
+
'zip': CommandCategory.ARCHIVE,
|
|
187
|
+
'unzip': CommandCategory.ARCHIVE,
|
|
188
|
+
'gzip': CommandCategory.ARCHIVE,
|
|
189
|
+
'gunzip': CommandCategory.ARCHIVE,
|
|
190
|
+
'bzip2': CommandCategory.ARCHIVE,
|
|
191
|
+
'xz': CommandCategory.ARCHIVE,
|
|
192
|
+
'7z': CommandCategory.ARCHIVE,
|
|
193
|
+
|
|
194
|
+
# Environment
|
|
195
|
+
'export': CommandCategory.ENVIRONMENT,
|
|
196
|
+
'source': CommandCategory.ENVIRONMENT,
|
|
197
|
+
'alias': CommandCategory.ENVIRONMENT,
|
|
198
|
+
'unalias': CommandCategory.ENVIRONMENT,
|
|
199
|
+
'set': CommandCategory.ENVIRONMENT,
|
|
200
|
+
'unset': CommandCategory.ENVIRONMENT,
|
|
201
|
+
'eval': CommandCategory.ENVIRONMENT,
|
|
202
|
+
|
|
203
|
+
# Build
|
|
204
|
+
'make': CommandCategory.BUILD,
|
|
205
|
+
'cmake': CommandCategory.BUILD,
|
|
206
|
+
'gcc': CommandCategory.BUILD,
|
|
207
|
+
'g++': CommandCategory.BUILD,
|
|
208
|
+
'clang': CommandCategory.BUILD,
|
|
209
|
+
'rustc': CommandCategory.BUILD,
|
|
210
|
+
'tsc': CommandCategory.BUILD,
|
|
211
|
+
'node': CommandCategory.BUILD,
|
|
212
|
+
'python': CommandCategory.BUILD,
|
|
213
|
+
'python3': CommandCategory.BUILD,
|
|
214
|
+
'ruby': CommandCategory.BUILD,
|
|
215
|
+
|
|
216
|
+
# Testing
|
|
217
|
+
'pytest': CommandCategory.TESTING,
|
|
218
|
+
'jest': CommandCategory.TESTING,
|
|
219
|
+
'mocha': CommandCategory.TESTING,
|
|
220
|
+
'vitest': CommandCategory.TESTING,
|
|
221
|
+
'test': CommandCategory.TESTING,
|
|
222
|
+
|
|
223
|
+
# Docker
|
|
224
|
+
'docker': CommandCategory.DOCKER,
|
|
225
|
+
'docker-compose': CommandCategory.DOCKER,
|
|
226
|
+
'podman': CommandCategory.DOCKER,
|
|
227
|
+
'kubectl': CommandCategory.DOCKER,
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
# Regex patterns for bash constructs
|
|
231
|
+
PIPE_PATTERN = re.compile(r'(?<![|])\|(?![|])')
|
|
232
|
+
REDIRECT_PATTERN = re.compile(
|
|
233
|
+
r'(\d*)(>>|>&|&>|2>&1|2>|>|<)'
|
|
234
|
+
r'\s*([^\s&|;<>]+)?'
|
|
235
|
+
)
|
|
236
|
+
SUBSHELL_DOLLAR_PATTERN = re.compile(r'\$\(([^)]+)\)')
|
|
237
|
+
SUBSHELL_BACKTICK_PATTERN = re.compile(r'`([^`]+)`')
|
|
238
|
+
VARIABLE_ASSIGN_PATTERN = re.compile(r'^([A-Za-z_][A-Za-z0-9_]*)=(.*)$')
|
|
239
|
+
VARIABLE_REF_PATTERN = re.compile(r'\$\{?([A-Za-z_][A-Za-z0-9_]*)\}?')
|
|
240
|
+
LOGICAL_AND_PATTERN = re.compile(r'&&')
|
|
241
|
+
LOGICAL_OR_PATTERN = re.compile(r'\|\|')
|
|
242
|
+
HEREDOC_PATTERN = re.compile(r'<<-?\s*[\'"]?(\w+)[\'"]?')
|
|
243
|
+
FLAG_PATTERN = re.compile(r'^-{1,2}[A-Za-z0-9][-A-Za-z0-9_=]*$')
|
|
244
|
+
|
|
245
|
+
def __init__(self):
|
|
246
|
+
"""Initialize the parser."""
|
|
247
|
+
pass
|
|
248
|
+
|
|
249
|
+
def parse(self, command: str, description: str = "", output: str = "") -> ParsedCommand:
|
|
250
|
+
"""
|
|
251
|
+
Parse a bash command into structural components.
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
command: The raw bash command string
|
|
255
|
+
description: Optional description of the command
|
|
256
|
+
output: Optional output from command execution
|
|
257
|
+
|
|
258
|
+
Returns:
|
|
259
|
+
ParsedCommand object with extracted structural information
|
|
260
|
+
"""
|
|
261
|
+
result = ParsedCommand(
|
|
262
|
+
raw=command,
|
|
263
|
+
description=description,
|
|
264
|
+
output=output
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
# Check for multiline and heredoc
|
|
268
|
+
result.is_multiline = '\n' in command or '\\' in command
|
|
269
|
+
result.has_heredoc = bool(self.HEREDOC_PATTERN.search(command))
|
|
270
|
+
|
|
271
|
+
# Extract subshells first (before tokenization might fail on them)
|
|
272
|
+
result.subshells = self._extract_subshells(command)
|
|
273
|
+
|
|
274
|
+
# Extract redirects
|
|
275
|
+
result.redirects = self._extract_redirects(command)
|
|
276
|
+
|
|
277
|
+
# Extract variable assignments and references
|
|
278
|
+
result.variables = self._extract_variables(command)
|
|
279
|
+
|
|
280
|
+
# Extract logical operators
|
|
281
|
+
result.logical_ops = self._extract_logical_ops(command)
|
|
282
|
+
|
|
283
|
+
# Extract pipes and their commands
|
|
284
|
+
result.pipes = self._extract_pipes(command)
|
|
285
|
+
|
|
286
|
+
# Tokenize and extract base commands, flags, and arguments
|
|
287
|
+
self._tokenize_and_extract(command, result)
|
|
288
|
+
|
|
289
|
+
# Categorize the command
|
|
290
|
+
result.category = self._categorize(result)
|
|
291
|
+
|
|
292
|
+
# Calculate complexity score
|
|
293
|
+
result.complexity_score = self._calculate_complexity(result)
|
|
294
|
+
|
|
295
|
+
return result
|
|
296
|
+
|
|
297
|
+
def _extract_subshells(self, command: str) -> list[str]:
|
|
298
|
+
"""Extract subshell expressions from command."""
|
|
299
|
+
subshells = []
|
|
300
|
+
|
|
301
|
+
# Find $(...) subshells
|
|
302
|
+
for match in self.SUBSHELL_DOLLAR_PATTERN.finditer(command):
|
|
303
|
+
subshells.append(match.group(1))
|
|
304
|
+
|
|
305
|
+
# Find `...` subshells
|
|
306
|
+
for match in self.SUBSHELL_BACKTICK_PATTERN.finditer(command):
|
|
307
|
+
subshells.append(match.group(1))
|
|
308
|
+
|
|
309
|
+
return subshells
|
|
310
|
+
|
|
311
|
+
def _extract_redirects(self, command: str) -> list[dict]:
|
|
312
|
+
"""Extract redirect operations from command."""
|
|
313
|
+
redirects = []
|
|
314
|
+
|
|
315
|
+
for match in self.REDIRECT_PATTERN.finditer(command):
|
|
316
|
+
fd = match.group(1) or ''
|
|
317
|
+
operator = match.group(2)
|
|
318
|
+
target = match.group(3) or ''
|
|
319
|
+
|
|
320
|
+
redirect_type = 'unknown'
|
|
321
|
+
if operator in ('>', '>>'):
|
|
322
|
+
redirect_type = 'stdout'
|
|
323
|
+
elif operator == '2>':
|
|
324
|
+
redirect_type = 'stderr'
|
|
325
|
+
elif operator in ('>&', '&>', '2>&1'):
|
|
326
|
+
redirect_type = 'both'
|
|
327
|
+
elif operator == '<':
|
|
328
|
+
redirect_type = 'stdin'
|
|
329
|
+
|
|
330
|
+
redirects.append({
|
|
331
|
+
'fd': fd,
|
|
332
|
+
'operator': operator,
|
|
333
|
+
'target': target,
|
|
334
|
+
'type': redirect_type
|
|
335
|
+
})
|
|
336
|
+
|
|
337
|
+
return redirects
|
|
338
|
+
|
|
339
|
+
def _extract_variables(self, command: str) -> list[dict]:
|
|
340
|
+
"""Extract variable assignments and references from command."""
|
|
341
|
+
variables = []
|
|
342
|
+
seen_assignments = set()
|
|
343
|
+
seen_references = set()
|
|
344
|
+
|
|
345
|
+
# Split by logical operators and pipes to find assignments
|
|
346
|
+
segments = re.split(r'[|&;]', command)
|
|
347
|
+
|
|
348
|
+
for segment in segments:
|
|
349
|
+
segment = segment.strip()
|
|
350
|
+
# Check for variable assignment at start of segment
|
|
351
|
+
match = self.VARIABLE_ASSIGN_PATTERN.match(segment)
|
|
352
|
+
if match:
|
|
353
|
+
var_name = match.group(1)
|
|
354
|
+
var_value = match.group(2)
|
|
355
|
+
if var_name not in seen_assignments:
|
|
356
|
+
variables.append({
|
|
357
|
+
'name': var_name,
|
|
358
|
+
'value': var_value,
|
|
359
|
+
'type': 'assignment'
|
|
360
|
+
})
|
|
361
|
+
seen_assignments.add(var_name)
|
|
362
|
+
|
|
363
|
+
# Find variable references
|
|
364
|
+
for match in self.VARIABLE_REF_PATTERN.finditer(command):
|
|
365
|
+
var_name = match.group(1)
|
|
366
|
+
if var_name not in seen_references and var_name not in seen_assignments:
|
|
367
|
+
variables.append({
|
|
368
|
+
'name': var_name,
|
|
369
|
+
'type': 'reference'
|
|
370
|
+
})
|
|
371
|
+
seen_references.add(var_name)
|
|
372
|
+
|
|
373
|
+
return variables
|
|
374
|
+
|
|
375
|
+
def _extract_logical_ops(self, command: str) -> list[str]:
|
|
376
|
+
"""Extract logical operators (&&, ||) from command."""
|
|
377
|
+
ops = []
|
|
378
|
+
|
|
379
|
+
for match in self.LOGICAL_AND_PATTERN.finditer(command):
|
|
380
|
+
ops.append('&&')
|
|
381
|
+
|
|
382
|
+
for match in self.LOGICAL_OR_PATTERN.finditer(command):
|
|
383
|
+
ops.append('||')
|
|
384
|
+
|
|
385
|
+
return ops
|
|
386
|
+
|
|
387
|
+
def _extract_pipes(self, command: str) -> list[str]:
|
|
388
|
+
"""Extract piped command segments."""
|
|
389
|
+
# Remove subshells temporarily to avoid false positives
|
|
390
|
+
temp_cmd = self.SUBSHELL_DOLLAR_PATTERN.sub('__SUBSHELL__', command)
|
|
391
|
+
temp_cmd = self.SUBSHELL_BACKTICK_PATTERN.sub('__SUBSHELL__', temp_cmd)
|
|
392
|
+
|
|
393
|
+
# Split by single pipes (not ||)
|
|
394
|
+
segments = self.PIPE_PATTERN.split(temp_cmd)
|
|
395
|
+
|
|
396
|
+
if len(segments) <= 1:
|
|
397
|
+
return []
|
|
398
|
+
|
|
399
|
+
# Clean up segments
|
|
400
|
+
pipes = []
|
|
401
|
+
for seg in segments:
|
|
402
|
+
seg = seg.strip()
|
|
403
|
+
if seg and seg != '__SUBSHELL__':
|
|
404
|
+
pipes.append(seg)
|
|
405
|
+
|
|
406
|
+
return pipes
|
|
407
|
+
|
|
408
|
+
def _tokenize_and_extract(self, command: str, result: ParsedCommand) -> None:
|
|
409
|
+
"""
|
|
410
|
+
Tokenize command and extract base commands, flags, and arguments.
|
|
411
|
+
|
|
412
|
+
Uses shlex for safe tokenization, with fallback for unparseable commands.
|
|
413
|
+
"""
|
|
414
|
+
# Prepare command for tokenization
|
|
415
|
+
# Remove heredocs which break shlex
|
|
416
|
+
tokenize_cmd = self.HEREDOC_PATTERN.sub('', command)
|
|
417
|
+
|
|
418
|
+
# Replace subshells with placeholders
|
|
419
|
+
tokenize_cmd = self.SUBSHELL_DOLLAR_PATTERN.sub('__SUBSHELL__', tokenize_cmd)
|
|
420
|
+
tokenize_cmd = self.SUBSHELL_BACKTICK_PATTERN.sub('__SUBSHELL__', tokenize_cmd)
|
|
421
|
+
|
|
422
|
+
try:
|
|
423
|
+
# Use shlex for tokenization
|
|
424
|
+
lexer = shlex.shlex(tokenize_cmd, posix=True)
|
|
425
|
+
lexer.whitespace_split = True
|
|
426
|
+
lexer.commenters = '' # Don't treat # as comment for first pass
|
|
427
|
+
|
|
428
|
+
tokens = list(lexer)
|
|
429
|
+
except ValueError as e:
|
|
430
|
+
# shlex couldn't parse (unclosed quotes, etc.)
|
|
431
|
+
result.parse_errors.append(f"Tokenization error: {e}")
|
|
432
|
+
# Fallback: simple split
|
|
433
|
+
tokens = tokenize_cmd.split()
|
|
434
|
+
|
|
435
|
+
# Process tokens
|
|
436
|
+
base_commands_set = set()
|
|
437
|
+
in_command_position = True
|
|
438
|
+
skip_next = False
|
|
439
|
+
|
|
440
|
+
for i, token in enumerate(tokens):
|
|
441
|
+
if skip_next:
|
|
442
|
+
skip_next = False
|
|
443
|
+
continue
|
|
444
|
+
|
|
445
|
+
# Skip operators
|
|
446
|
+
if token in ('&&', '||', '|', ';', '&'):
|
|
447
|
+
in_command_position = True
|
|
448
|
+
continue
|
|
449
|
+
|
|
450
|
+
# Skip redirects
|
|
451
|
+
if token in ('>', '>>', '<', '2>', '2>&1', '>&', '&>'):
|
|
452
|
+
skip_next = True
|
|
453
|
+
continue
|
|
454
|
+
|
|
455
|
+
# Skip redirect targets
|
|
456
|
+
if i > 0 and tokens[i-1] in ('>', '>>', '<', '2>', '>&', '&>'):
|
|
457
|
+
continue
|
|
458
|
+
|
|
459
|
+
# Skip placeholders
|
|
460
|
+
if token == '__SUBSHELL__':
|
|
461
|
+
continue
|
|
462
|
+
|
|
463
|
+
# Check for variable assignment
|
|
464
|
+
if '=' in token and not token.startswith('-'):
|
|
465
|
+
match = self.VARIABLE_ASSIGN_PATTERN.match(token)
|
|
466
|
+
if match:
|
|
467
|
+
continue
|
|
468
|
+
|
|
469
|
+
# Check if it's a flag
|
|
470
|
+
if self.FLAG_PATTERN.match(token):
|
|
471
|
+
result.flags.append(token)
|
|
472
|
+
continue
|
|
473
|
+
|
|
474
|
+
# Check if it's a base command
|
|
475
|
+
if in_command_position and not token.startswith('/'):
|
|
476
|
+
# Handle path-prefixed commands
|
|
477
|
+
cmd_name = token.split('/')[-1] if '/' in token else token
|
|
478
|
+
base_commands_set.add(cmd_name)
|
|
479
|
+
in_command_position = False
|
|
480
|
+
else:
|
|
481
|
+
# It's an argument
|
|
482
|
+
if not token.startswith('-'):
|
|
483
|
+
result.arguments.append(token)
|
|
484
|
+
|
|
485
|
+
result.base_commands = list(base_commands_set)
|
|
486
|
+
|
|
487
|
+
def _categorize(self, result: ParsedCommand) -> CommandCategory:
|
|
488
|
+
"""Determine the category of the command based on base commands."""
|
|
489
|
+
for cmd in result.base_commands:
|
|
490
|
+
if cmd in self.COMMAND_CATEGORIES:
|
|
491
|
+
return self.COMMAND_CATEGORIES[cmd]
|
|
492
|
+
|
|
493
|
+
return CommandCategory.UNKNOWN
|
|
494
|
+
|
|
495
|
+
def _calculate_complexity(self, result: ParsedCommand) -> int:
|
|
496
|
+
"""
|
|
497
|
+
Calculate a complexity score for the command.
|
|
498
|
+
|
|
499
|
+
Higher scores indicate more complex commands.
|
|
500
|
+
"""
|
|
501
|
+
score = 0
|
|
502
|
+
|
|
503
|
+
# Base complexity
|
|
504
|
+
score += len(result.base_commands)
|
|
505
|
+
|
|
506
|
+
# Flags add complexity
|
|
507
|
+
score += len(result.flags) * 0.5
|
|
508
|
+
|
|
509
|
+
# Pipes add significant complexity
|
|
510
|
+
score += len(result.pipes) * 2
|
|
511
|
+
|
|
512
|
+
# Redirects add moderate complexity
|
|
513
|
+
score += len(result.redirects) * 1.5
|
|
514
|
+
|
|
515
|
+
# Subshells add significant complexity
|
|
516
|
+
score += len(result.subshells) * 3
|
|
517
|
+
|
|
518
|
+
# Logical operators add complexity
|
|
519
|
+
score += len(result.logical_ops) * 1.5
|
|
520
|
+
|
|
521
|
+
# Variables add some complexity
|
|
522
|
+
score += len(result.variables)
|
|
523
|
+
|
|
524
|
+
# Multiline commands are more complex
|
|
525
|
+
if result.is_multiline:
|
|
526
|
+
score += 2
|
|
527
|
+
|
|
528
|
+
# Heredocs are complex
|
|
529
|
+
if result.has_heredoc:
|
|
530
|
+
score += 3
|
|
531
|
+
|
|
532
|
+
# Arguments add minor complexity
|
|
533
|
+
score += len(result.arguments) * 0.25
|
|
534
|
+
|
|
535
|
+
return int(round(score))
|
|
536
|
+
|
|
537
|
+
def parse_batch(
|
|
538
|
+
self,
|
|
539
|
+
commands: list[tuple[str, str, str]]
|
|
540
|
+
) -> list[ParsedCommand]:
|
|
541
|
+
"""
|
|
542
|
+
Parse multiple commands.
|
|
543
|
+
|
|
544
|
+
Args:
|
|
545
|
+
commands: List of (command, description, output) tuples
|
|
546
|
+
|
|
547
|
+
Returns:
|
|
548
|
+
List of ParsedCommand objects
|
|
549
|
+
"""
|
|
550
|
+
return [
|
|
551
|
+
self.parse(cmd, desc, out)
|
|
552
|
+
for cmd, desc, out in commands
|
|
553
|
+
]
|
|
554
|
+
|
|
555
|
+
|
|
556
|
+
def parse_command(
|
|
557
|
+
command: str,
|
|
558
|
+
description: str = "",
|
|
559
|
+
output: str = ""
|
|
560
|
+
) -> ParsedCommand:
|
|
561
|
+
"""
|
|
562
|
+
Convenience function to parse a single bash command.
|
|
563
|
+
|
|
564
|
+
Args:
|
|
565
|
+
command: The raw bash command string
|
|
566
|
+
description: Optional description
|
|
567
|
+
output: Optional command output
|
|
568
|
+
|
|
569
|
+
Returns:
|
|
570
|
+
ParsedCommand object
|
|
571
|
+
"""
|
|
572
|
+
parser = BashParser()
|
|
573
|
+
return parser.parse(command, description, output)
|
|
574
|
+
|
|
575
|
+
|
|
576
|
+
def parse_commands(
|
|
577
|
+
commands: list[tuple[str, str, str]]
|
|
578
|
+
) -> list[ParsedCommand]:
|
|
579
|
+
"""
|
|
580
|
+
Convenience function to parse multiple bash commands.
|
|
581
|
+
|
|
582
|
+
Args:
|
|
583
|
+
commands: List of (command, description, output) tuples
|
|
584
|
+
|
|
585
|
+
Returns:
|
|
586
|
+
List of ParsedCommand objects
|
|
587
|
+
"""
|
|
588
|
+
parser = BashParser()
|
|
589
|
+
return parser.parse_batch(commands)
|
|
590
|
+
|
|
591
|
+
|
|
592
|
+
if __name__ == "__main__":
|
|
593
|
+
# Example usage and testing
|
|
594
|
+
test_commands = [
|
|
595
|
+
("ls -la /tmp", "List files in tmp", ""),
|
|
596
|
+
("cat file.txt | grep 'pattern' | sort -u", "Search and sort", ""),
|
|
597
|
+
("git status && git add . && git commit -m 'test'", "Git workflow", ""),
|
|
598
|
+
("export FOO=bar && echo $FOO", "Set and use variable", "bar"),
|
|
599
|
+
("find . -name '*.py' -exec grep -l 'import' {} \\;", "Find Python imports", ""),
|
|
600
|
+
("docker run -d --name test -p 8080:80 nginx:latest", "Run Docker container", ""),
|
|
601
|
+
("curl -s https://api.example.com | jq '.data[]'", "API request with jq", ""),
|
|
602
|
+
("cat <<EOF > output.txt\nline1\nline2\nEOF", "Heredoc example", ""),
|
|
603
|
+
("VAR=$(echo 'hello' | tr 'a-z' 'A-Z')", "Command substitution", ""),
|
|
604
|
+
("npm install && npm test 2>&1 | tee test.log", "Complex build", ""),
|
|
605
|
+
]
|
|
606
|
+
|
|
607
|
+
parser = BashParser()
|
|
608
|
+
|
|
609
|
+
for cmd, desc, output in test_commands:
|
|
610
|
+
result = parser.parse(cmd, desc, output)
|
|
611
|
+
print(f"\n{'='*60}")
|
|
612
|
+
print(f"Raw: {result.raw}")
|
|
613
|
+
print(f"Category: {result.category.value}")
|
|
614
|
+
print(f"Base commands: {result.base_commands}")
|
|
615
|
+
print(f"Flags: {result.flags}")
|
|
616
|
+
print(f"Pipes: {len(result.pipes)} segments")
|
|
617
|
+
print(f"Redirects: {result.redirects}")
|
|
618
|
+
print(f"Subshells: {result.subshells}")
|
|
619
|
+
print(f"Variables: {result.variables}")
|
|
620
|
+
print(f"Logical ops: {result.logical_ops}")
|
|
621
|
+
print(f"Complexity: {result.complexity_score}")
|
|
622
|
+
if result.parse_errors:
|
|
623
|
+
print(f"Parse errors: {result.parse_errors}")
|