mcpower-proxy 0.0.73__py3-none-any.whl → 0.0.77__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ide_tools/common/__init__.py +0 -1
- ide_tools/common/hooks/__init__.py +0 -1
- ide_tools/common/hooks/init.py +28 -24
- ide_tools/common/hooks/output.py +14 -15
- ide_tools/common/hooks/prompt_submit.py +13 -63
- ide_tools/common/hooks/read_file.py +14 -14
- ide_tools/common/hooks/shell_execution.py +140 -79
- ide_tools/common/hooks/shell_parser_bashlex.py +394 -0
- ide_tools/common/hooks/types.py +3 -4
- ide_tools/common/hooks/utils.py +18 -8
- ide_tools/cursor/router.py +1 -0
- {mcpower_proxy-0.0.73.dist-info → mcpower_proxy-0.0.77.dist-info}/METADATA +3 -2
- {mcpower_proxy-0.0.73.dist-info → mcpower_proxy-0.0.77.dist-info}/RECORD +24 -21
- modules/logs/audit_trail.py +5 -4
- modules/redaction/gitleaks_rules.py +1 -1
- modules/redaction/pii_rules.py +0 -48
- modules/utils/platform.py +23 -0
- modules/utils/string.py +17 -0
- wrapper/__version__.py +1 -1
- wrapper/middleware.py +21 -9
- {mcpower_proxy-0.0.73.dist-info → mcpower_proxy-0.0.77.dist-info}/WHEEL +0 -0
- {mcpower_proxy-0.0.73.dist-info → mcpower_proxy-0.0.77.dist-info}/entry_points.txt +0 -0
- {mcpower_proxy-0.0.73.dist-info → mcpower_proxy-0.0.77.dist-info}/licenses/LICENSE +0 -0
- {mcpower_proxy-0.0.73.dist-info → mcpower_proxy-0.0.77.dist-info}/top_level.txt +0 -0
|
@@ -4,28 +4,94 @@ Common shell execution handler - IDE-agnostic
|
|
|
4
4
|
Handles both request (before) and response (after) inspection for shell commands.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
-
import
|
|
7
|
+
import os
|
|
8
8
|
from typing import Optional, Dict, List
|
|
9
9
|
|
|
10
10
|
from modules.logs.audit_trail import AuditTrailLogger
|
|
11
11
|
from modules.logs.logger import MCPLogger
|
|
12
12
|
from modules.redaction import redact
|
|
13
13
|
from modules.utils.ids import get_session_id, read_app_uid, get_project_mcpower_dir
|
|
14
|
-
from .types import HookConfig
|
|
15
14
|
from .output import output_result, output_error
|
|
15
|
+
from .shell_parser_bashlex import parse_shell_command
|
|
16
|
+
from .types import HookConfig
|
|
16
17
|
from .utils import create_validator, inspect_and_enforce
|
|
17
18
|
|
|
18
19
|
|
|
20
|
+
def extract_and_redact_command_files(
|
|
21
|
+
command: str,
|
|
22
|
+
cwd: Optional[str],
|
|
23
|
+
logger: MCPLogger
|
|
24
|
+
) -> Dict[str, str]:
|
|
25
|
+
"""
|
|
26
|
+
Extract input files from a shell command and return their redacted contents.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
command: The shell command to parse
|
|
30
|
+
cwd: Current working directory (for resolving relative paths)
|
|
31
|
+
logger: Logger instance for warnings/errors
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
Dictionary mapping filename to redacted file content
|
|
35
|
+
Format: {filename: redacted_content}
|
|
36
|
+
"""
|
|
37
|
+
files_dict = {}
|
|
38
|
+
|
|
39
|
+
try:
|
|
40
|
+
# Parse command to extract input files
|
|
41
|
+
_, input_files = parse_shell_command(command, initial_cwd=cwd)
|
|
42
|
+
|
|
43
|
+
logger.info(f"Extracted {len(input_files)} input files from command: {input_files}")
|
|
44
|
+
|
|
45
|
+
# Process each file
|
|
46
|
+
for filename in input_files:
|
|
47
|
+
try:
|
|
48
|
+
# Resolve absolute path
|
|
49
|
+
if os.path.isabs(filename):
|
|
50
|
+
filepath = filename
|
|
51
|
+
elif cwd:
|
|
52
|
+
filepath = os.path.join(cwd, filename)
|
|
53
|
+
else:
|
|
54
|
+
filepath = filename
|
|
55
|
+
|
|
56
|
+
# Read file content
|
|
57
|
+
if os.path.exists(filepath) and os.path.isfile(filepath):
|
|
58
|
+
try:
|
|
59
|
+
with open(filepath, 'r', encoding='utf-8') as f:
|
|
60
|
+
content = f.read()
|
|
61
|
+
|
|
62
|
+
# Redact sensitive content
|
|
63
|
+
redacted_content = redact(content)
|
|
64
|
+
|
|
65
|
+
# Add to dict (use original filename, not resolved path)
|
|
66
|
+
files_dict[filename] = redacted_content
|
|
67
|
+
logger.info(f"Successfully read and redacted file: {filename}")
|
|
68
|
+
|
|
69
|
+
except UnicodeDecodeError:
|
|
70
|
+
logger.warning(f"File {filename} is not a text file, skipping")
|
|
71
|
+
except Exception as e:
|
|
72
|
+
logger.warning(f"Failed to read file {filename}: {e}")
|
|
73
|
+
else:
|
|
74
|
+
logger.warning(f"File {filename} does not exist or is not a file, skipping")
|
|
75
|
+
|
|
76
|
+
except Exception as e:
|
|
77
|
+
logger.warning(f"Error processing file {filename}: {e}")
|
|
78
|
+
|
|
79
|
+
except Exception as e:
|
|
80
|
+
logger.warning(f"Failed to parse command for file extraction: {e}")
|
|
81
|
+
|
|
82
|
+
return files_dict
|
|
83
|
+
|
|
84
|
+
|
|
19
85
|
async def handle_shell_execution(
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
86
|
+
logger: MCPLogger,
|
|
87
|
+
audit_logger: AuditTrailLogger,
|
|
88
|
+
stdin_input: str,
|
|
89
|
+
prompt_id: str,
|
|
90
|
+
event_id: str,
|
|
91
|
+
cwd: Optional[str],
|
|
92
|
+
config: HookConfig,
|
|
93
|
+
tool_name: str,
|
|
94
|
+
is_request: bool = True
|
|
29
95
|
):
|
|
30
96
|
"""
|
|
31
97
|
Generic shell execution handler - handles both request and response
|
|
@@ -60,20 +126,20 @@ async def handle_shell_execution(
|
|
|
60
126
|
|
|
61
127
|
|
|
62
128
|
async def _handle_shell_operation(
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
129
|
+
logger: MCPLogger,
|
|
130
|
+
audit_logger: AuditTrailLogger,
|
|
131
|
+
stdin_input: str,
|
|
132
|
+
prompt_id: str,
|
|
133
|
+
event_id: str,
|
|
134
|
+
cwd: Optional[str],
|
|
135
|
+
config: HookConfig,
|
|
136
|
+
is_request: bool,
|
|
137
|
+
required_fields: Dict[str, type],
|
|
138
|
+
redact_fields: List[str],
|
|
139
|
+
tool_name: str,
|
|
140
|
+
operation_name: str,
|
|
141
|
+
audit_event_type: str,
|
|
142
|
+
audit_forwarded_event_type: str
|
|
77
143
|
):
|
|
78
144
|
"""
|
|
79
145
|
Internal shell operation handler - shared logic for request and response
|
|
@@ -88,11 +154,11 @@ async def _handle_shell_operation(
|
|
|
88
154
|
audit_forwarded_event_type: Audit event name for forwarded operation
|
|
89
155
|
"""
|
|
90
156
|
session_id = get_session_id()
|
|
91
|
-
|
|
92
|
-
logger.info(
|
|
93
|
-
|
|
157
|
+
|
|
158
|
+
logger.info(
|
|
159
|
+
f"{tool_name} handler started (client={config.client_name}, prompt_id={prompt_id}, event_id={event_id}, cwd={cwd})")
|
|
160
|
+
|
|
94
161
|
try:
|
|
95
|
-
# Validate input
|
|
96
162
|
try:
|
|
97
163
|
validator = create_validator(required_fields=required_fields)
|
|
98
164
|
input_data = validator(stdin_input)
|
|
@@ -100,43 +166,52 @@ async def _handle_shell_operation(
|
|
|
100
166
|
logger.error(f"Input validation error: {e}")
|
|
101
167
|
output_error(logger, config.output_format, "permission", str(e))
|
|
102
168
|
return
|
|
103
|
-
|
|
169
|
+
|
|
104
170
|
app_uid = read_app_uid(logger, get_project_mcpower_dir(cwd))
|
|
105
171
|
audit_logger.set_app_uid(app_uid)
|
|
106
|
-
|
|
107
|
-
# Redact sensitive data for logging
|
|
172
|
+
|
|
108
173
|
redacted_data = {}
|
|
109
174
|
for k, v in input_data.items():
|
|
110
175
|
if k in required_fields:
|
|
111
176
|
redacted_data[k] = redact(v) if k in redact_fields else v
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
"
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
177
|
+
|
|
178
|
+
# Extract and redact input files for request inspection
|
|
179
|
+
files_dict = {}
|
|
180
|
+
if is_request and "command" in input_data:
|
|
181
|
+
command = input_data["command"]
|
|
182
|
+
files_dict = extract_and_redact_command_files(command, cwd, logger)
|
|
183
|
+
if files_dict:
|
|
184
|
+
logger.info(f"Extracted and redacted {len(files_dict)} files from command")
|
|
185
|
+
|
|
186
|
+
def get_audit_data():
|
|
187
|
+
# Use different structure for request vs response events
|
|
188
|
+
# Requests: params nested, Responses: unpacked at root
|
|
189
|
+
if is_request:
|
|
190
|
+
return {
|
|
191
|
+
"server": config.server_name,
|
|
192
|
+
"tool": tool_name,
|
|
193
|
+
"params": redacted_data,
|
|
194
|
+
"files": list(files_dict.keys()) if files_dict else None
|
|
195
|
+
}
|
|
196
|
+
else:
|
|
197
|
+
return {
|
|
198
|
+
"server": config.server_name,
|
|
199
|
+
"tool": tool_name,
|
|
200
|
+
**redacted_data
|
|
201
|
+
}
|
|
202
|
+
|
|
130
203
|
audit_logger.log_event(
|
|
131
204
|
audit_event_type,
|
|
132
|
-
|
|
133
|
-
event_id=event_id
|
|
205
|
+
get_audit_data(),
|
|
206
|
+
event_id=event_id,
|
|
207
|
+
prompt_id=prompt_id
|
|
134
208
|
)
|
|
135
|
-
|
|
136
|
-
# Build content_data with redacted fields
|
|
137
|
-
content_data = redacted_data
|
|
138
|
-
|
|
139
|
-
|
|
209
|
+
|
|
210
|
+
# Build content_data with redacted fields and files
|
|
211
|
+
content_data = redacted_data.copy()
|
|
212
|
+
if files_dict:
|
|
213
|
+
content_data["files"] = files_dict
|
|
214
|
+
|
|
140
215
|
try:
|
|
141
216
|
decision = await inspect_and_enforce(
|
|
142
217
|
is_request=is_request,
|
|
@@ -152,28 +227,14 @@ async def _handle_shell_operation(
|
|
|
152
227
|
cwd=cwd,
|
|
153
228
|
client_name=config.client_name
|
|
154
229
|
)
|
|
155
|
-
|
|
156
|
-
# Log audit event for forwarding
|
|
157
|
-
# Use different structure for request vs response
|
|
158
|
-
if is_request:
|
|
159
|
-
forwarded_data = {
|
|
160
|
-
"server": config.server_name,
|
|
161
|
-
"tool": tool_name,
|
|
162
|
-
"params": redacted_data
|
|
163
|
-
}
|
|
164
|
-
else:
|
|
165
|
-
forwarded_data = {
|
|
166
|
-
"server": config.server_name,
|
|
167
|
-
"tool": tool_name,
|
|
168
|
-
**redacted_data
|
|
169
|
-
}
|
|
170
|
-
|
|
230
|
+
|
|
171
231
|
audit_logger.log_event(
|
|
172
232
|
audit_forwarded_event_type,
|
|
173
|
-
|
|
174
|
-
event_id=event_id
|
|
233
|
+
get_audit_data(),
|
|
234
|
+
event_id=event_id,
|
|
235
|
+
prompt_id=prompt_id
|
|
175
236
|
)
|
|
176
|
-
|
|
237
|
+
|
|
177
238
|
reasons = decision.get("reasons", [])
|
|
178
239
|
user_message = f"{operation_name} approved"
|
|
179
240
|
if not reasons:
|
|
@@ -181,16 +242,16 @@ async def _handle_shell_operation(
|
|
|
181
242
|
else:
|
|
182
243
|
agent_message = f"{operation_name} approved: {'; '.join(reasons)}"
|
|
183
244
|
output_result(logger, config.output_format, "permission", True, user_message, agent_message)
|
|
184
|
-
|
|
245
|
+
|
|
185
246
|
except Exception as e:
|
|
186
247
|
# Decision enforcement failed - block
|
|
187
248
|
error_msg = str(e)
|
|
188
249
|
user_message = f"{operation_name} blocked by security policy"
|
|
189
250
|
if "User blocked" in error_msg or "User denied" in error_msg:
|
|
190
251
|
user_message = f"{operation_name} blocked by user"
|
|
191
|
-
|
|
252
|
+
|
|
192
253
|
output_result(logger, config.output_format, "permission", False, user_message, error_msg)
|
|
193
|
-
|
|
254
|
+
|
|
194
255
|
except Exception as e:
|
|
195
256
|
logger.error(f"Unexpected error in {tool_name} handler: {e}", exc_info=True)
|
|
196
257
|
output_error(logger, config.output_format, "permission", f"Unexpected error: {str(e)}")
|
|
@@ -0,0 +1,394 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Shell command parser using bashlex library.
|
|
4
|
+
Parses shell commands to extract sub-commands and file references using proper bash parsing.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import bashlex
|
|
8
|
+
import os
|
|
9
|
+
from typing import List, Tuple, Set, Optional, Dict
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def parse_shell_command(command: str, initial_cwd: Optional[str] = None) -> Tuple[List[str], List[str]]:
|
|
13
|
+
"""
|
|
14
|
+
Parse a shell command using bashlex and extract sub-commands and input files.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
command: A shell command string (supports pipes, redirections, etc.)
|
|
18
|
+
initial_cwd: Initial working directory (defaults to current directory)
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
A tuple of (sub_commands, input_files) where:
|
|
22
|
+
- sub_commands: List of individual commands when split by pipes
|
|
23
|
+
- input_files: List of files that are used as inputs (excludes output-only files)
|
|
24
|
+
|
|
25
|
+
Examples:
|
|
26
|
+
>>> parse_shell_command("python a.py | tee b.log")
|
|
27
|
+
(['python a.py', 'tee b.log'], ['a.py', 'b.log'])
|
|
28
|
+
|
|
29
|
+
>>> parse_shell_command("cat a.txt > /tmp/b.txt")
|
|
30
|
+
(['cat a.txt > /tmp/b.txt'], ['a.txt'])
|
|
31
|
+
|
|
32
|
+
>>> parse_shell_command("grep foo file.txt | sort | uniq > output.txt")
|
|
33
|
+
(['grep foo file.txt', 'sort', 'uniq > output.txt'], ['file.txt'])
|
|
34
|
+
"""
|
|
35
|
+
try:
|
|
36
|
+
# Parse the command into an AST
|
|
37
|
+
parts = bashlex.parse(command)
|
|
38
|
+
except Exception as e:
|
|
39
|
+
# If parsing fails, fall back to simple split
|
|
40
|
+
print(f"Warning: bashlex parsing failed: {e}")
|
|
41
|
+
return ([command], [])
|
|
42
|
+
|
|
43
|
+
# Extract sub-commands and files
|
|
44
|
+
sub_commands = []
|
|
45
|
+
all_files: Set[str] = set()
|
|
46
|
+
output_files: Set[str] = set()
|
|
47
|
+
|
|
48
|
+
# Track directory changes
|
|
49
|
+
context = {
|
|
50
|
+
'cwd': initial_cwd or os.getcwd(),
|
|
51
|
+
'file_to_cwd': {} # Map each file to the directory it was found in
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
for ast in parts:
|
|
55
|
+
_extract_from_ast(ast, command, sub_commands, all_files, output_files, False, context)
|
|
56
|
+
|
|
57
|
+
# Remove output-only files from the result
|
|
58
|
+
input_files = sorted(list(all_files - output_files))
|
|
59
|
+
|
|
60
|
+
return sub_commands, input_files
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _extract_from_ast(
|
|
64
|
+
node,
|
|
65
|
+
command: str,
|
|
66
|
+
sub_commands: List[str],
|
|
67
|
+
all_files: Set[str],
|
|
68
|
+
output_files: Set[str],
|
|
69
|
+
parent_is_pipe: bool = False,
|
|
70
|
+
context: Optional[Dict] = None
|
|
71
|
+
) -> None:
|
|
72
|
+
"""
|
|
73
|
+
Recursively extract sub-commands and files from a bashlex AST node.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
node: bashlex AST node
|
|
77
|
+
command: Original command string (for extracting text)
|
|
78
|
+
sub_commands: List to append sub-commands to
|
|
79
|
+
all_files: Set to add all file references to
|
|
80
|
+
output_files: Set to add output-only files to
|
|
81
|
+
parent_is_pipe: True if parent node is a pipe operator
|
|
82
|
+
context: Dictionary with 'cwd' for current working directory
|
|
83
|
+
"""
|
|
84
|
+
if context is None:
|
|
85
|
+
context = {'cwd': os.getcwd()}
|
|
86
|
+
|
|
87
|
+
# Check node kind to determine type
|
|
88
|
+
node_kind = getattr(node, 'kind', None)
|
|
89
|
+
|
|
90
|
+
if node_kind == 'list':
|
|
91
|
+
# List node contains multiple parts connected by operators (&&, ||, ;)
|
|
92
|
+
# Process sequentially to track directory changes
|
|
93
|
+
if hasattr(node, 'parts'):
|
|
94
|
+
for part in node.parts:
|
|
95
|
+
_extract_from_ast(part, command, sub_commands, all_files, output_files, False, context)
|
|
96
|
+
|
|
97
|
+
elif node_kind == 'pipeline':
|
|
98
|
+
# Pipeline node - extract individual commands
|
|
99
|
+
_extract_pipeline(node, command, sub_commands, all_files, output_files, context)
|
|
100
|
+
|
|
101
|
+
elif node_kind == 'command':
|
|
102
|
+
# Command node - extract the command text and analyze its parts
|
|
103
|
+
if hasattr(node, 'pos'):
|
|
104
|
+
start, end = node.pos
|
|
105
|
+
cmd_text = command[start:end]
|
|
106
|
+
sub_commands.append(cmd_text)
|
|
107
|
+
|
|
108
|
+
# Get the command name (first word) for context
|
|
109
|
+
cmd_name = None
|
|
110
|
+
if hasattr(node, 'parts') and len(node.parts) > 0:
|
|
111
|
+
first_part = node.parts[0]
|
|
112
|
+
if hasattr(first_part, 'word'):
|
|
113
|
+
cmd_name = first_part.word
|
|
114
|
+
|
|
115
|
+
# Check if this is a cd command and update context
|
|
116
|
+
if cmd_name == 'cd' and hasattr(node, 'parts') and len(node.parts) > 1:
|
|
117
|
+
second_part = node.parts[1]
|
|
118
|
+
if hasattr(second_part, 'word'):
|
|
119
|
+
target_dir = second_part.word
|
|
120
|
+
# Resolve the new directory
|
|
121
|
+
if os.path.isabs(target_dir):
|
|
122
|
+
context['cwd'] = target_dir
|
|
123
|
+
else:
|
|
124
|
+
context['cwd'] = os.path.normpath(os.path.join(context['cwd'], target_dir))
|
|
125
|
+
|
|
126
|
+
# Extract files from command parts (arguments and redirections)
|
|
127
|
+
if hasattr(node, 'parts'):
|
|
128
|
+
for i, part in enumerate(node.parts):
|
|
129
|
+
part_kind = getattr(part, 'kind', None)
|
|
130
|
+
if part_kind == 'redirect':
|
|
131
|
+
_extract_redirect(part, command, all_files, output_files, context)
|
|
132
|
+
elif i > 0: # Skip the command name itself (index 0)
|
|
133
|
+
_extract_files_from_node(part, command, all_files, output_files, cmd_name, context)
|
|
134
|
+
|
|
135
|
+
elif node_kind == 'compound':
|
|
136
|
+
# Compound command (like if, while, for, etc.)
|
|
137
|
+
if hasattr(node, 'list'):
|
|
138
|
+
for item in node.list:
|
|
139
|
+
_extract_from_ast(item, command, sub_commands, all_files, output_files, False, context)
|
|
140
|
+
|
|
141
|
+
elif node_kind == 'operator':
|
|
142
|
+
# Operator node (like &&, ||, ;) - ignore
|
|
143
|
+
pass
|
|
144
|
+
|
|
145
|
+
elif node_kind == 'pipe':
|
|
146
|
+
# Pipe node - ignore (we handle pipes at the pipeline level)
|
|
147
|
+
pass
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _extract_pipeline(node, command: str, sub_commands: List[str], all_files: Set[str], output_files: Set[str], context: Dict) -> None:
|
|
151
|
+
"""Extract commands from a pipeline node."""
|
|
152
|
+
if hasattr(node, 'parts'):
|
|
153
|
+
for part in node.parts:
|
|
154
|
+
part_kind = getattr(part, 'kind', None)
|
|
155
|
+
# Skip pipe nodes, only process commands
|
|
156
|
+
if part_kind != 'pipe':
|
|
157
|
+
_extract_from_ast(part, command, sub_commands, all_files, output_files, True, context)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _extract_files_from_node(node, command: str, all_files: Set[str], output_files: Set[str], cmd_name: Optional[str] = None, context: Optional[Dict] = None) -> None:
|
|
161
|
+
"""Extract file references from a node.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
node: bashlex AST node
|
|
165
|
+
command: Original command string
|
|
166
|
+
all_files: Set to add all file references to
|
|
167
|
+
output_files: Set to add output-only files to
|
|
168
|
+
cmd_name: Name of the command this node belongs to (for context)
|
|
169
|
+
context: Dictionary with 'cwd' for current working directory
|
|
170
|
+
"""
|
|
171
|
+
if context is None:
|
|
172
|
+
context = {'cwd': os.getcwd()}
|
|
173
|
+
|
|
174
|
+
node_kind = getattr(node, 'kind', None)
|
|
175
|
+
|
|
176
|
+
if node_kind == 'word':
|
|
177
|
+
# Word node - check if it's a file reference
|
|
178
|
+
word = node.word if hasattr(node, 'word') else None
|
|
179
|
+
|
|
180
|
+
if word and _looks_like_file(word, cmd_name):
|
|
181
|
+
# Resolve relative paths against current working directory
|
|
182
|
+
resolved_path = _resolve_path(word, context['cwd'])
|
|
183
|
+
all_files.add(resolved_path)
|
|
184
|
+
|
|
185
|
+
# Recursively check parts (for command substitutions, etc.)
|
|
186
|
+
if hasattr(node, 'parts'):
|
|
187
|
+
for part in node.parts:
|
|
188
|
+
_extract_files_from_node(part, command, all_files, output_files, cmd_name, context)
|
|
189
|
+
|
|
190
|
+
elif node_kind == 'commandsubstitution':
|
|
191
|
+
# Command substitution $(...) - recursively parse
|
|
192
|
+
if hasattr(node, 'command'):
|
|
193
|
+
_extract_from_ast(node.command, command, [], all_files, output_files, False, context)
|
|
194
|
+
|
|
195
|
+
elif node_kind == 'processsubstitution':
|
|
196
|
+
# Process substitution <(...) or >(...) - recursively parse
|
|
197
|
+
if hasattr(node, 'command'):
|
|
198
|
+
_extract_from_ast(node.command, command, [], all_files, output_files, False, context)
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _extract_redirect(redirect, command: str, all_files: Set[str], output_files: Set[str], context: Optional[Dict] = None) -> None:
|
|
202
|
+
"""Extract file references from redirection nodes."""
|
|
203
|
+
if context is None:
|
|
204
|
+
context = {'cwd': os.getcwd()}
|
|
205
|
+
|
|
206
|
+
redirect_type = getattr(redirect, 'type', None)
|
|
207
|
+
|
|
208
|
+
# Get the target of the redirection
|
|
209
|
+
if hasattr(redirect, 'output'):
|
|
210
|
+
target = redirect.output
|
|
211
|
+
target_word = target.word if hasattr(target, 'word') else None
|
|
212
|
+
|
|
213
|
+
# Redirections always point to files, not directories
|
|
214
|
+
if target_word and _looks_like_file(target_word, None):
|
|
215
|
+
# Resolve relative paths against current working directory
|
|
216
|
+
resolved_path = _resolve_path(target_word, context['cwd'])
|
|
217
|
+
|
|
218
|
+
# Determine if it's input or output
|
|
219
|
+
if redirect_type in ('>', '>>', '>&', '>|', '&>'):
|
|
220
|
+
# Output redirection
|
|
221
|
+
output_files.add(resolved_path)
|
|
222
|
+
all_files.add(resolved_path)
|
|
223
|
+
elif redirect_type == '<':
|
|
224
|
+
# Input redirection
|
|
225
|
+
all_files.add(resolved_path)
|
|
226
|
+
else:
|
|
227
|
+
# Unknown, be conservative and include it
|
|
228
|
+
all_files.add(resolved_path)
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def _resolve_path(path: str, cwd: str) -> str:
|
|
232
|
+
"""
|
|
233
|
+
Resolve a file path relative to a working directory.
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
path: File path (relative or absolute)
|
|
237
|
+
cwd: Current working directory
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
Absolute path
|
|
241
|
+
"""
|
|
242
|
+
if os.path.isabs(path):
|
|
243
|
+
return path
|
|
244
|
+
else:
|
|
245
|
+
return os.path.normpath(os.path.join(cwd, path))
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def _looks_like_file(word: str, cmd_name: Optional[str] = None) -> bool:
|
|
249
|
+
"""
|
|
250
|
+
Heuristic to determine if a word is an actual readable file path.
|
|
251
|
+
Not patterns, not variables, not directories - actual files we can open.
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
word: A word from the command
|
|
255
|
+
cmd_name: The command this word belongs to (for context)
|
|
256
|
+
|
|
257
|
+
Returns:
|
|
258
|
+
True if it looks like a file path
|
|
259
|
+
"""
|
|
260
|
+
if not word:
|
|
261
|
+
return False
|
|
262
|
+
|
|
263
|
+
# Commands that take directory arguments, not files
|
|
264
|
+
DIRECTORY_COMMANDS = {
|
|
265
|
+
'cd', 'pushd', 'popd', 'mkdir', 'rmdir', 'chdir',
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
# If this is a directory command, reject all arguments
|
|
269
|
+
if cmd_name and cmd_name in DIRECTORY_COMMANDS:
|
|
270
|
+
return False
|
|
271
|
+
|
|
272
|
+
# Exclude URLs (http://, https://, ftp://, file://, etc.)
|
|
273
|
+
if '://' in word:
|
|
274
|
+
return False
|
|
275
|
+
|
|
276
|
+
# Exclude shell meta-characters and patterns
|
|
277
|
+
if any(char in word for char in ['*', '?', '[', ']']): # Glob patterns
|
|
278
|
+
return False
|
|
279
|
+
|
|
280
|
+
if '$' in word or '`' in word: # Variables or command substitution
|
|
281
|
+
return False
|
|
282
|
+
|
|
283
|
+
# Exclude sed/awk patterns
|
|
284
|
+
if word.startswith('s/') and word.count('/') >= 2:
|
|
285
|
+
return False
|
|
286
|
+
|
|
287
|
+
# Exclude regex patterns
|
|
288
|
+
if word.startswith('^') or word.endswith('$'):
|
|
289
|
+
return False
|
|
290
|
+
|
|
291
|
+
# Exclude options
|
|
292
|
+
if word.startswith('-') or word.startswith('+'):
|
|
293
|
+
return False
|
|
294
|
+
|
|
295
|
+
# Exclude bare dots
|
|
296
|
+
if word in {'.', '..'}:
|
|
297
|
+
return False
|
|
298
|
+
|
|
299
|
+
# Exclude bare directories (but /tmp/file is OK)
|
|
300
|
+
if word in {'/', '/tmp', '/dev', '/usr', '/etc', '/var', '/opt', '/home'}:
|
|
301
|
+
return False
|
|
302
|
+
|
|
303
|
+
# --- POSITIVE CHECKS ---
|
|
304
|
+
|
|
305
|
+
# Has extension = very likely a file
|
|
306
|
+
if '.' in word and not word.startswith('.'):
|
|
307
|
+
# Get the extension
|
|
308
|
+
parts = word.rsplit('.', 1)
|
|
309
|
+
if len(parts) == 2:
|
|
310
|
+
name, ext = parts
|
|
311
|
+
# Be more permissive with extensions
|
|
312
|
+
if name and ext and ext.replace('_', '').replace('-', '').isalnum():
|
|
313
|
+
if len(ext) <= 10: # Most extensions are < 10 chars
|
|
314
|
+
return True
|
|
315
|
+
|
|
316
|
+
# Has path separator = could be a file
|
|
317
|
+
if '/' in word:
|
|
318
|
+
# Check if it's a path to something specific (not just dirs)
|
|
319
|
+
if not word.endswith('/'): # Not ending with / (directory indicator)
|
|
320
|
+
parts = word.split('/')
|
|
321
|
+
last_part = parts[-1] if parts else ''
|
|
322
|
+
|
|
323
|
+
# If last part has extension, definitely a file
|
|
324
|
+
if '.' in last_part and not last_part.startswith('.'):
|
|
325
|
+
return True
|
|
326
|
+
|
|
327
|
+
# If it's under specific directories that contain files
|
|
328
|
+
if word.startswith('/dev/') and len(word) > 5: # /dev/null, /dev/tty, etc.
|
|
329
|
+
return True
|
|
330
|
+
if word.startswith('/tmp/') and len(word) > 5: # /tmp/anything
|
|
331
|
+
return True
|
|
332
|
+
if word.startswith('/etc/') and len(word) > 5: # /etc/passwd, etc.
|
|
333
|
+
return True
|
|
334
|
+
if word.startswith('/usr/bin/') and len(word) > 9: # Executables
|
|
335
|
+
return True
|
|
336
|
+
if word.startswith('/usr/local/bin/') and len(word) > 15:
|
|
337
|
+
return True
|
|
338
|
+
|
|
339
|
+
# If last part looks like a filename (even without extension)
|
|
340
|
+
if last_part and last_part.replace('-', '').replace('_', '').isalnum():
|
|
341
|
+
# Could be an executable or script
|
|
342
|
+
return True
|
|
343
|
+
|
|
344
|
+
# Check for well-known files without extensions (case-insensitive)
|
|
345
|
+
filename_only = word.split('/')[-1].lower()
|
|
346
|
+
if filename_only in {'makefile', 'readme', 'license', 'dockerfile',
|
|
347
|
+
'gemfile', 'rakefile', 'procfile', 'vagrantfile',
|
|
348
|
+
'jenkinsfile', 'cakefile', 'gulpfile', 'gruntfile',
|
|
349
|
+
'brewfile', 'berksfile', 'guardfile', 'fastfile',
|
|
350
|
+
'cartfile', 'appfile', 'podfile', 'snapfile'}:
|
|
351
|
+
return True
|
|
352
|
+
|
|
353
|
+
# Stand-alone word without path - be conservative
|
|
354
|
+
if '/' not in word:
|
|
355
|
+
# If it has an extension, probably a file in current directory
|
|
356
|
+
if '.' in word and not word.startswith('.'):
|
|
357
|
+
return True
|
|
358
|
+
|
|
359
|
+
# Well-known executable names without extensions
|
|
360
|
+
if word in {'script', 'run', 'build', 'test', 'deploy', 'install',
|
|
361
|
+
'configure', 'setup', 'bootstrap', 'init'}:
|
|
362
|
+
return True
|
|
363
|
+
|
|
364
|
+
# Otherwise, we can't be sure it's a file (could be a command)
|
|
365
|
+
return False
|
|
366
|
+
|
|
367
|
+
return False
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
# Testing
|
|
371
|
+
if __name__ == "__main__":
|
|
372
|
+
# Test cases
|
|
373
|
+
test_cases = [
|
|
374
|
+
"cd /Users/user/src/project/server && python test.py",
|
|
375
|
+
"python a.py | tee b.log",
|
|
376
|
+
"cat a.txt > /tmp/b.txt",
|
|
377
|
+
"grep foo file.txt | sort | uniq > output.txt",
|
|
378
|
+
"cat file1.txt file2.txt | grep pattern > result.txt",
|
|
379
|
+
"python script.py < input.txt > output.txt",
|
|
380
|
+
"ls -la /tmp | grep '\\.txt$' | wc -l",
|
|
381
|
+
"tar -xzf archive.tar.gz",
|
|
382
|
+
"find . -name '*.py' | xargs grep pattern",
|
|
383
|
+
]
|
|
384
|
+
|
|
385
|
+
print("Shell Command Parser (bashlex) - Test Cases\n" + "="*60)
|
|
386
|
+
for cmd in test_cases:
|
|
387
|
+
try:
|
|
388
|
+
sub_cmds, files = parse_shell_command(cmd)
|
|
389
|
+
print(f"\nCommand: {cmd}")
|
|
390
|
+
print(f"Sub-commands: {sub_cmds}")
|
|
391
|
+
print(f"Input files: {files}")
|
|
392
|
+
except Exception as e:
|
|
393
|
+
print(f"\nCommand: {cmd}")
|
|
394
|
+
print(f"Error: {e}")
|