mcpower-proxy 0.0.65__py3-none-any.whl → 0.0.79__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcpower-proxy might be problematic. Click here for more details.
- ide_tools/__init__.py +12 -0
- ide_tools/common/__init__.py +5 -0
- ide_tools/common/hooks/__init__.py +5 -0
- ide_tools/common/hooks/init.py +130 -0
- ide_tools/common/hooks/output.py +63 -0
- ide_tools/common/hooks/prompt_submit.py +136 -0
- ide_tools/common/hooks/read_file.py +170 -0
- ide_tools/common/hooks/shell_execution.py +257 -0
- ide_tools/common/hooks/shell_parser_bashlex.py +394 -0
- ide_tools/common/hooks/types.py +34 -0
- ide_tools/common/hooks/utils.py +286 -0
- ide_tools/cursor/__init__.py +11 -0
- ide_tools/cursor/constants.py +77 -0
- ide_tools/cursor/format.py +35 -0
- ide_tools/cursor/router.py +107 -0
- ide_tools/router.py +48 -0
- main.py +11 -4
- {mcpower_proxy-0.0.65.dist-info → mcpower_proxy-0.0.79.dist-info}/METADATA +4 -3
- mcpower_proxy-0.0.79.dist-info/RECORD +62 -0
- {mcpower_proxy-0.0.65.dist-info → mcpower_proxy-0.0.79.dist-info}/top_level.txt +1 -0
- modules/apis/security_policy.py +11 -6
- modules/decision_handler.py +219 -0
- modules/logs/audit_trail.py +20 -18
- modules/logs/logger.py +14 -18
- modules/redaction/gitleaks_rules.py +1 -1
- modules/redaction/pii_rules.py +0 -48
- modules/redaction/redactor.py +112 -107
- modules/ui/__init__.py +1 -1
- modules/ui/confirmation.py +0 -1
- modules/utils/cli.py +36 -6
- modules/utils/ids.py +55 -10
- modules/utils/json.py +3 -3
- modules/utils/platform.py +23 -0
- modules/utils/string.py +17 -0
- wrapper/__version__.py +1 -1
- wrapper/middleware.py +144 -221
- wrapper/server.py +19 -11
- mcpower_proxy-0.0.65.dist-info/RECORD +0 -43
- {mcpower_proxy-0.0.65.dist-info → mcpower_proxy-0.0.79.dist-info}/WHEEL +0 -0
- {mcpower_proxy-0.0.65.dist-info → mcpower_proxy-0.0.79.dist-info}/entry_points.txt +0 -0
- {mcpower_proxy-0.0.65.dist-info → mcpower_proxy-0.0.79.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Common shell execution handler - IDE-agnostic
|
|
3
|
+
|
|
4
|
+
Handles both request (before) and response (after) inspection for shell commands.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
from typing import Optional, Dict, List
|
|
9
|
+
|
|
10
|
+
from modules.logs.audit_trail import AuditTrailLogger
|
|
11
|
+
from modules.logs.logger import MCPLogger
|
|
12
|
+
from modules.redaction import redact
|
|
13
|
+
from modules.utils.ids import get_session_id, read_app_uid, get_project_mcpower_dir
|
|
14
|
+
from .output import output_result, output_error
|
|
15
|
+
from .shell_parser_bashlex import parse_shell_command
|
|
16
|
+
from .types import HookConfig
|
|
17
|
+
from .utils import create_validator, inspect_and_enforce
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def extract_and_redact_command_files(
|
|
21
|
+
command: str,
|
|
22
|
+
cwd: Optional[str],
|
|
23
|
+
logger: MCPLogger
|
|
24
|
+
) -> Dict[str, str]:
|
|
25
|
+
"""
|
|
26
|
+
Extract input files from a shell command and return their redacted contents.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
command: The shell command to parse
|
|
30
|
+
cwd: Current working directory (for resolving relative paths)
|
|
31
|
+
logger: Logger instance for warnings/errors
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
Dictionary mapping filename to redacted file content
|
|
35
|
+
Format: {filename: redacted_content}
|
|
36
|
+
"""
|
|
37
|
+
files_dict = {}
|
|
38
|
+
|
|
39
|
+
try:
|
|
40
|
+
# Parse command to extract input files
|
|
41
|
+
_, input_files = parse_shell_command(command, initial_cwd=cwd)
|
|
42
|
+
|
|
43
|
+
logger.info(f"Extracted {len(input_files)} input files from command: {input_files}")
|
|
44
|
+
|
|
45
|
+
# Process each file
|
|
46
|
+
for filename in input_files:
|
|
47
|
+
try:
|
|
48
|
+
# Resolve absolute path
|
|
49
|
+
if os.path.isabs(filename):
|
|
50
|
+
filepath = filename
|
|
51
|
+
elif cwd:
|
|
52
|
+
filepath = os.path.join(cwd, filename)
|
|
53
|
+
else:
|
|
54
|
+
filepath = filename
|
|
55
|
+
|
|
56
|
+
# Read file content
|
|
57
|
+
if os.path.exists(filepath) and os.path.isfile(filepath):
|
|
58
|
+
try:
|
|
59
|
+
with open(filepath, 'r', encoding='utf-8') as f:
|
|
60
|
+
content = f.read()
|
|
61
|
+
|
|
62
|
+
# Redact sensitive content
|
|
63
|
+
redacted_content = redact(content)
|
|
64
|
+
|
|
65
|
+
# Add to dict (use original filename, not resolved path)
|
|
66
|
+
files_dict[filename] = redacted_content
|
|
67
|
+
logger.info(f"Successfully read and redacted file: {filename}")
|
|
68
|
+
|
|
69
|
+
except UnicodeDecodeError:
|
|
70
|
+
logger.warning(f"File {filename} is not a text file, skipping")
|
|
71
|
+
except Exception as e:
|
|
72
|
+
logger.warning(f"Failed to read file {filename}: {e}")
|
|
73
|
+
else:
|
|
74
|
+
logger.warning(f"File {filename} does not exist or is not a file, skipping")
|
|
75
|
+
|
|
76
|
+
except Exception as e:
|
|
77
|
+
logger.warning(f"Error processing file {filename}: {e}")
|
|
78
|
+
|
|
79
|
+
except Exception as e:
|
|
80
|
+
logger.warning(f"Failed to parse command for file extraction: {e}")
|
|
81
|
+
|
|
82
|
+
return files_dict
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
async def handle_shell_execution(
|
|
86
|
+
logger: MCPLogger,
|
|
87
|
+
audit_logger: AuditTrailLogger,
|
|
88
|
+
stdin_input: str,
|
|
89
|
+
prompt_id: str,
|
|
90
|
+
event_id: str,
|
|
91
|
+
cwd: Optional[str],
|
|
92
|
+
config: HookConfig,
|
|
93
|
+
tool_name: str,
|
|
94
|
+
is_request: bool = True
|
|
95
|
+
):
|
|
96
|
+
"""
|
|
97
|
+
Generic shell execution handler - handles both request and response
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
logger: Logger instance
|
|
101
|
+
audit_logger: Audit logger instance
|
|
102
|
+
stdin_input: Raw input string from stdin
|
|
103
|
+
prompt_id: Prompt identifier
|
|
104
|
+
event_id: Event identifier
|
|
105
|
+
cwd: Current working directory
|
|
106
|
+
config: Hook configuration (IDE-specific)
|
|
107
|
+
tool_name: IDE-specific tool name (e.g., "beforeShellExecution", "PreToolUse(Bash)")
|
|
108
|
+
is_request: True for before (request), False for after (response)
|
|
109
|
+
"""
|
|
110
|
+
await _handle_shell_operation(
|
|
111
|
+
logger=logger,
|
|
112
|
+
audit_logger=audit_logger,
|
|
113
|
+
stdin_input=stdin_input,
|
|
114
|
+
prompt_id=prompt_id,
|
|
115
|
+
event_id=event_id,
|
|
116
|
+
cwd=cwd,
|
|
117
|
+
config=config,
|
|
118
|
+
is_request=is_request,
|
|
119
|
+
required_fields={"command": str, "cwd": str} if is_request else {"command": str, "output": str},
|
|
120
|
+
redact_fields=["command"] if is_request else ["command", "output"],
|
|
121
|
+
tool_name=tool_name,
|
|
122
|
+
operation_name="Command" if is_request else "Command output",
|
|
123
|
+
audit_event_type="agent_request" if is_request else "mcp_response",
|
|
124
|
+
audit_forwarded_event_type="agent_request_forwarded" if is_request else "mcp_response_forwarded"
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
async def _handle_shell_operation(
|
|
129
|
+
logger: MCPLogger,
|
|
130
|
+
audit_logger: AuditTrailLogger,
|
|
131
|
+
stdin_input: str,
|
|
132
|
+
prompt_id: str,
|
|
133
|
+
event_id: str,
|
|
134
|
+
cwd: Optional[str],
|
|
135
|
+
config: HookConfig,
|
|
136
|
+
is_request: bool,
|
|
137
|
+
required_fields: Dict[str, type],
|
|
138
|
+
redact_fields: List[str],
|
|
139
|
+
tool_name: str,
|
|
140
|
+
operation_name: str,
|
|
141
|
+
audit_event_type: str,
|
|
142
|
+
audit_forwarded_event_type: str
|
|
143
|
+
):
|
|
144
|
+
"""
|
|
145
|
+
Internal shell operation handler - shared logic for request and response
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
is_request: True for request inspection, False for response inspection
|
|
149
|
+
required_fields: Fields to validate in input
|
|
150
|
+
redact_fields: Fields to redact for logging and API calls
|
|
151
|
+
tool_name: Hook name (e.g., "beforeShellExecution", "afterShellExecution")
|
|
152
|
+
operation_name: Display name (e.g., "Command", "Command output")
|
|
153
|
+
audit_event_type: Audit event name for incoming operation
|
|
154
|
+
audit_forwarded_event_type: Audit event name for forwarded operation
|
|
155
|
+
"""
|
|
156
|
+
session_id = get_session_id()
|
|
157
|
+
|
|
158
|
+
logger.info(
|
|
159
|
+
f"{tool_name} handler started (client={config.client_name}, prompt_id={prompt_id}, event_id={event_id}, cwd={cwd})")
|
|
160
|
+
|
|
161
|
+
try:
|
|
162
|
+
try:
|
|
163
|
+
validator = create_validator(required_fields=required_fields)
|
|
164
|
+
input_data = validator(stdin_input)
|
|
165
|
+
except ValueError as e:
|
|
166
|
+
logger.error(f"Input validation error: {e}")
|
|
167
|
+
output_error(logger, config.output_format, "permission", str(e))
|
|
168
|
+
return
|
|
169
|
+
|
|
170
|
+
app_uid = read_app_uid(logger, get_project_mcpower_dir(cwd))
|
|
171
|
+
audit_logger.set_app_uid(app_uid)
|
|
172
|
+
|
|
173
|
+
redacted_data = {}
|
|
174
|
+
for k, v in input_data.items():
|
|
175
|
+
if k in required_fields:
|
|
176
|
+
redacted_data[k] = redact(v) if k in redact_fields else v
|
|
177
|
+
|
|
178
|
+
# Extract and redact input files for request inspection
|
|
179
|
+
files_dict = {}
|
|
180
|
+
if is_request and "command" in input_data:
|
|
181
|
+
command = input_data["command"]
|
|
182
|
+
files_dict = extract_and_redact_command_files(command, cwd, logger)
|
|
183
|
+
if files_dict:
|
|
184
|
+
logger.info(f"Extracted and redacted {len(files_dict)} files from command")
|
|
185
|
+
|
|
186
|
+
def get_audit_data():
|
|
187
|
+
# Use different structure for request vs response events
|
|
188
|
+
# Requests: params nested, Responses: unpacked at root
|
|
189
|
+
if is_request:
|
|
190
|
+
return {
|
|
191
|
+
"server": config.server_name,
|
|
192
|
+
"tool": tool_name,
|
|
193
|
+
"params": redacted_data,
|
|
194
|
+
"files": list(files_dict.keys()) if files_dict else None
|
|
195
|
+
}
|
|
196
|
+
else:
|
|
197
|
+
return {
|
|
198
|
+
"server": config.server_name,
|
|
199
|
+
"tool": tool_name,
|
|
200
|
+
**redacted_data
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
audit_logger.log_event(
|
|
204
|
+
audit_event_type,
|
|
205
|
+
get_audit_data(),
|
|
206
|
+
event_id=event_id,
|
|
207
|
+
prompt_id=prompt_id
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
# Build content_data with redacted fields and files
|
|
211
|
+
content_data = redacted_data.copy()
|
|
212
|
+
if files_dict:
|
|
213
|
+
content_data["files"] = files_dict
|
|
214
|
+
|
|
215
|
+
try:
|
|
216
|
+
decision = await inspect_and_enforce(
|
|
217
|
+
is_request=is_request,
|
|
218
|
+
session_id=session_id,
|
|
219
|
+
logger=logger,
|
|
220
|
+
audit_logger=audit_logger,
|
|
221
|
+
app_uid=app_uid,
|
|
222
|
+
event_id=event_id,
|
|
223
|
+
server_name=config.server_name,
|
|
224
|
+
tool_name=tool_name,
|
|
225
|
+
content_data=content_data,
|
|
226
|
+
prompt_id=prompt_id,
|
|
227
|
+
cwd=cwd,
|
|
228
|
+
client_name=config.client_name
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
audit_logger.log_event(
|
|
232
|
+
audit_forwarded_event_type,
|
|
233
|
+
get_audit_data(),
|
|
234
|
+
event_id=event_id,
|
|
235
|
+
prompt_id=prompt_id
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
reasons = decision.get("reasons", [])
|
|
239
|
+
user_message = f"{operation_name} approved"
|
|
240
|
+
if not reasons:
|
|
241
|
+
agent_message = f"{operation_name} approved by security policy"
|
|
242
|
+
else:
|
|
243
|
+
agent_message = f"{operation_name} approved: {'; '.join(reasons)}"
|
|
244
|
+
output_result(logger, config.output_format, "permission", True, user_message, agent_message)
|
|
245
|
+
|
|
246
|
+
except Exception as e:
|
|
247
|
+
# Decision enforcement failed - block
|
|
248
|
+
error_msg = str(e)
|
|
249
|
+
user_message = f"{operation_name} blocked by security policy"
|
|
250
|
+
if "User blocked" in error_msg or "User denied" in error_msg:
|
|
251
|
+
user_message = f"{operation_name} blocked by user"
|
|
252
|
+
|
|
253
|
+
output_result(logger, config.output_format, "permission", False, user_message, error_msg)
|
|
254
|
+
|
|
255
|
+
except Exception as e:
|
|
256
|
+
logger.error(f"Unexpected error in {tool_name} handler: {e}", exc_info=True)
|
|
257
|
+
output_error(logger, config.output_format, "permission", f"Unexpected error: {str(e)}")
|
|
@@ -0,0 +1,394 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Shell command parser using bashlex library.
|
|
4
|
+
Parses shell commands to extract sub-commands and file references using proper bash parsing.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import bashlex
|
|
8
|
+
import os
|
|
9
|
+
from typing import List, Tuple, Set, Optional, Dict
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def parse_shell_command(command: str, initial_cwd: Optional[str] = None) -> Tuple[List[str], List[str]]:
|
|
13
|
+
"""
|
|
14
|
+
Parse a shell command using bashlex and extract sub-commands and input files.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
command: A shell command string (supports pipes, redirections, etc.)
|
|
18
|
+
initial_cwd: Initial working directory (defaults to current directory)
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
A tuple of (sub_commands, input_files) where:
|
|
22
|
+
- sub_commands: List of individual commands when split by pipes
|
|
23
|
+
- input_files: List of files that are used as inputs (excludes output-only files)
|
|
24
|
+
|
|
25
|
+
Examples:
|
|
26
|
+
>>> parse_shell_command("python a.py | tee b.log")
|
|
27
|
+
(['python a.py', 'tee b.log'], ['a.py', 'b.log'])
|
|
28
|
+
|
|
29
|
+
>>> parse_shell_command("cat a.txt > /tmp/b.txt")
|
|
30
|
+
(['cat a.txt > /tmp/b.txt'], ['a.txt'])
|
|
31
|
+
|
|
32
|
+
>>> parse_shell_command("grep foo file.txt | sort | uniq > output.txt")
|
|
33
|
+
(['grep foo file.txt', 'sort', 'uniq > output.txt'], ['file.txt'])
|
|
34
|
+
"""
|
|
35
|
+
try:
|
|
36
|
+
# Parse the command into an AST
|
|
37
|
+
parts = bashlex.parse(command)
|
|
38
|
+
except Exception as e:
|
|
39
|
+
# If parsing fails, fall back to simple split
|
|
40
|
+
print(f"Warning: bashlex parsing failed: {e}")
|
|
41
|
+
return ([command], [])
|
|
42
|
+
|
|
43
|
+
# Extract sub-commands and files
|
|
44
|
+
sub_commands = []
|
|
45
|
+
all_files: Set[str] = set()
|
|
46
|
+
output_files: Set[str] = set()
|
|
47
|
+
|
|
48
|
+
# Track directory changes
|
|
49
|
+
context = {
|
|
50
|
+
'cwd': initial_cwd or os.getcwd(),
|
|
51
|
+
'file_to_cwd': {} # Map each file to the directory it was found in
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
for ast in parts:
|
|
55
|
+
_extract_from_ast(ast, command, sub_commands, all_files, output_files, False, context)
|
|
56
|
+
|
|
57
|
+
# Remove output-only files from the result
|
|
58
|
+
input_files = sorted(list(all_files - output_files))
|
|
59
|
+
|
|
60
|
+
return sub_commands, input_files
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _extract_from_ast(
|
|
64
|
+
node,
|
|
65
|
+
command: str,
|
|
66
|
+
sub_commands: List[str],
|
|
67
|
+
all_files: Set[str],
|
|
68
|
+
output_files: Set[str],
|
|
69
|
+
parent_is_pipe: bool = False,
|
|
70
|
+
context: Optional[Dict] = None
|
|
71
|
+
) -> None:
|
|
72
|
+
"""
|
|
73
|
+
Recursively extract sub-commands and files from a bashlex AST node.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
node: bashlex AST node
|
|
77
|
+
command: Original command string (for extracting text)
|
|
78
|
+
sub_commands: List to append sub-commands to
|
|
79
|
+
all_files: Set to add all file references to
|
|
80
|
+
output_files: Set to add output-only files to
|
|
81
|
+
parent_is_pipe: True if parent node is a pipe operator
|
|
82
|
+
context: Dictionary with 'cwd' for current working directory
|
|
83
|
+
"""
|
|
84
|
+
if context is None:
|
|
85
|
+
context = {'cwd': os.getcwd()}
|
|
86
|
+
|
|
87
|
+
# Check node kind to determine type
|
|
88
|
+
node_kind = getattr(node, 'kind', None)
|
|
89
|
+
|
|
90
|
+
if node_kind == 'list':
|
|
91
|
+
# List node contains multiple parts connected by operators (&&, ||, ;)
|
|
92
|
+
# Process sequentially to track directory changes
|
|
93
|
+
if hasattr(node, 'parts'):
|
|
94
|
+
for part in node.parts:
|
|
95
|
+
_extract_from_ast(part, command, sub_commands, all_files, output_files, False, context)
|
|
96
|
+
|
|
97
|
+
elif node_kind == 'pipeline':
|
|
98
|
+
# Pipeline node - extract individual commands
|
|
99
|
+
_extract_pipeline(node, command, sub_commands, all_files, output_files, context)
|
|
100
|
+
|
|
101
|
+
elif node_kind == 'command':
|
|
102
|
+
# Command node - extract the command text and analyze its parts
|
|
103
|
+
if hasattr(node, 'pos'):
|
|
104
|
+
start, end = node.pos
|
|
105
|
+
cmd_text = command[start:end]
|
|
106
|
+
sub_commands.append(cmd_text)
|
|
107
|
+
|
|
108
|
+
# Get the command name (first word) for context
|
|
109
|
+
cmd_name = None
|
|
110
|
+
if hasattr(node, 'parts') and len(node.parts) > 0:
|
|
111
|
+
first_part = node.parts[0]
|
|
112
|
+
if hasattr(first_part, 'word'):
|
|
113
|
+
cmd_name = first_part.word
|
|
114
|
+
|
|
115
|
+
# Check if this is a cd command and update context
|
|
116
|
+
if cmd_name == 'cd' and hasattr(node, 'parts') and len(node.parts) > 1:
|
|
117
|
+
second_part = node.parts[1]
|
|
118
|
+
if hasattr(second_part, 'word'):
|
|
119
|
+
target_dir = second_part.word
|
|
120
|
+
# Resolve the new directory
|
|
121
|
+
if os.path.isabs(target_dir):
|
|
122
|
+
context['cwd'] = target_dir
|
|
123
|
+
else:
|
|
124
|
+
context['cwd'] = os.path.normpath(os.path.join(context['cwd'], target_dir))
|
|
125
|
+
|
|
126
|
+
# Extract files from command parts (arguments and redirections)
|
|
127
|
+
if hasattr(node, 'parts'):
|
|
128
|
+
for i, part in enumerate(node.parts):
|
|
129
|
+
part_kind = getattr(part, 'kind', None)
|
|
130
|
+
if part_kind == 'redirect':
|
|
131
|
+
_extract_redirect(part, command, all_files, output_files, context)
|
|
132
|
+
elif i > 0: # Skip the command name itself (index 0)
|
|
133
|
+
_extract_files_from_node(part, command, all_files, output_files, cmd_name, context)
|
|
134
|
+
|
|
135
|
+
elif node_kind == 'compound':
|
|
136
|
+
# Compound command (like if, while, for, etc.)
|
|
137
|
+
if hasattr(node, 'list'):
|
|
138
|
+
for item in node.list:
|
|
139
|
+
_extract_from_ast(item, command, sub_commands, all_files, output_files, False, context)
|
|
140
|
+
|
|
141
|
+
elif node_kind == 'operator':
|
|
142
|
+
# Operator node (like &&, ||, ;) - ignore
|
|
143
|
+
pass
|
|
144
|
+
|
|
145
|
+
elif node_kind == 'pipe':
|
|
146
|
+
# Pipe node - ignore (we handle pipes at the pipeline level)
|
|
147
|
+
pass
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _extract_pipeline(node, command: str, sub_commands: List[str], all_files: Set[str], output_files: Set[str], context: Dict) -> None:
|
|
151
|
+
"""Extract commands from a pipeline node."""
|
|
152
|
+
if hasattr(node, 'parts'):
|
|
153
|
+
for part in node.parts:
|
|
154
|
+
part_kind = getattr(part, 'kind', None)
|
|
155
|
+
# Skip pipe nodes, only process commands
|
|
156
|
+
if part_kind != 'pipe':
|
|
157
|
+
_extract_from_ast(part, command, sub_commands, all_files, output_files, True, context)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _extract_files_from_node(node, command: str, all_files: Set[str], output_files: Set[str], cmd_name: Optional[str] = None, context: Optional[Dict] = None) -> None:
|
|
161
|
+
"""Extract file references from a node.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
node: bashlex AST node
|
|
165
|
+
command: Original command string
|
|
166
|
+
all_files: Set to add all file references to
|
|
167
|
+
output_files: Set to add output-only files to
|
|
168
|
+
cmd_name: Name of the command this node belongs to (for context)
|
|
169
|
+
context: Dictionary with 'cwd' for current working directory
|
|
170
|
+
"""
|
|
171
|
+
if context is None:
|
|
172
|
+
context = {'cwd': os.getcwd()}
|
|
173
|
+
|
|
174
|
+
node_kind = getattr(node, 'kind', None)
|
|
175
|
+
|
|
176
|
+
if node_kind == 'word':
|
|
177
|
+
# Word node - check if it's a file reference
|
|
178
|
+
word = node.word if hasattr(node, 'word') else None
|
|
179
|
+
|
|
180
|
+
if word and _looks_like_file(word, cmd_name):
|
|
181
|
+
# Resolve relative paths against current working directory
|
|
182
|
+
resolved_path = _resolve_path(word, context['cwd'])
|
|
183
|
+
all_files.add(resolved_path)
|
|
184
|
+
|
|
185
|
+
# Recursively check parts (for command substitutions, etc.)
|
|
186
|
+
if hasattr(node, 'parts'):
|
|
187
|
+
for part in node.parts:
|
|
188
|
+
_extract_files_from_node(part, command, all_files, output_files, cmd_name, context)
|
|
189
|
+
|
|
190
|
+
elif node_kind == 'commandsubstitution':
|
|
191
|
+
# Command substitution $(...) - recursively parse
|
|
192
|
+
if hasattr(node, 'command'):
|
|
193
|
+
_extract_from_ast(node.command, command, [], all_files, output_files, False, context)
|
|
194
|
+
|
|
195
|
+
elif node_kind == 'processsubstitution':
|
|
196
|
+
# Process substitution <(...) or >(...) - recursively parse
|
|
197
|
+
if hasattr(node, 'command'):
|
|
198
|
+
_extract_from_ast(node.command, command, [], all_files, output_files, False, context)
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _extract_redirect(redirect, command: str, all_files: Set[str], output_files: Set[str], context: Optional[Dict] = None) -> None:
|
|
202
|
+
"""Extract file references from redirection nodes."""
|
|
203
|
+
if context is None:
|
|
204
|
+
context = {'cwd': os.getcwd()}
|
|
205
|
+
|
|
206
|
+
redirect_type = getattr(redirect, 'type', None)
|
|
207
|
+
|
|
208
|
+
# Get the target of the redirection
|
|
209
|
+
if hasattr(redirect, 'output'):
|
|
210
|
+
target = redirect.output
|
|
211
|
+
target_word = target.word if hasattr(target, 'word') else None
|
|
212
|
+
|
|
213
|
+
# Redirections always point to files, not directories
|
|
214
|
+
if target_word and _looks_like_file(target_word, None):
|
|
215
|
+
# Resolve relative paths against current working directory
|
|
216
|
+
resolved_path = _resolve_path(target_word, context['cwd'])
|
|
217
|
+
|
|
218
|
+
# Determine if it's input or output
|
|
219
|
+
if redirect_type in ('>', '>>', '>&', '>|', '&>'):
|
|
220
|
+
# Output redirection
|
|
221
|
+
output_files.add(resolved_path)
|
|
222
|
+
all_files.add(resolved_path)
|
|
223
|
+
elif redirect_type == '<':
|
|
224
|
+
# Input redirection
|
|
225
|
+
all_files.add(resolved_path)
|
|
226
|
+
else:
|
|
227
|
+
# Unknown, be conservative and include it
|
|
228
|
+
all_files.add(resolved_path)
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def _resolve_path(path: str, cwd: str) -> str:
|
|
232
|
+
"""
|
|
233
|
+
Resolve a file path relative to a working directory.
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
path: File path (relative or absolute)
|
|
237
|
+
cwd: Current working directory
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
Absolute path
|
|
241
|
+
"""
|
|
242
|
+
if os.path.isabs(path):
|
|
243
|
+
return path
|
|
244
|
+
else:
|
|
245
|
+
return os.path.normpath(os.path.join(cwd, path))
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def _looks_like_file(word: str, cmd_name: Optional[str] = None) -> bool:
|
|
249
|
+
"""
|
|
250
|
+
Heuristic to determine if a word is an actual readable file path.
|
|
251
|
+
Not patterns, not variables, not directories - actual files we can open.
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
word: A word from the command
|
|
255
|
+
cmd_name: The command this word belongs to (for context)
|
|
256
|
+
|
|
257
|
+
Returns:
|
|
258
|
+
True if it looks like a file path
|
|
259
|
+
"""
|
|
260
|
+
if not word:
|
|
261
|
+
return False
|
|
262
|
+
|
|
263
|
+
# Commands that take directory arguments, not files
|
|
264
|
+
DIRECTORY_COMMANDS = {
|
|
265
|
+
'cd', 'pushd', 'popd', 'mkdir', 'rmdir', 'chdir',
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
# If this is a directory command, reject all arguments
|
|
269
|
+
if cmd_name and cmd_name in DIRECTORY_COMMANDS:
|
|
270
|
+
return False
|
|
271
|
+
|
|
272
|
+
# Exclude URLs (http://, https://, ftp://, file://, etc.)
|
|
273
|
+
if '://' in word:
|
|
274
|
+
return False
|
|
275
|
+
|
|
276
|
+
# Exclude shell meta-characters and patterns
|
|
277
|
+
if any(char in word for char in ['*', '?', '[', ']']): # Glob patterns
|
|
278
|
+
return False
|
|
279
|
+
|
|
280
|
+
if '$' in word or '`' in word: # Variables or command substitution
|
|
281
|
+
return False
|
|
282
|
+
|
|
283
|
+
# Exclude sed/awk patterns
|
|
284
|
+
if word.startswith('s/') and word.count('/') >= 2:
|
|
285
|
+
return False
|
|
286
|
+
|
|
287
|
+
# Exclude regex patterns
|
|
288
|
+
if word.startswith('^') or word.endswith('$'):
|
|
289
|
+
return False
|
|
290
|
+
|
|
291
|
+
# Exclude options
|
|
292
|
+
if word.startswith('-') or word.startswith('+'):
|
|
293
|
+
return False
|
|
294
|
+
|
|
295
|
+
# Exclude bare dots
|
|
296
|
+
if word in {'.', '..'}:
|
|
297
|
+
return False
|
|
298
|
+
|
|
299
|
+
# Exclude bare directories (but /tmp/file is OK)
|
|
300
|
+
if word in {'/', '/tmp', '/dev', '/usr', '/etc', '/var', '/opt', '/home'}:
|
|
301
|
+
return False
|
|
302
|
+
|
|
303
|
+
# --- POSITIVE CHECKS ---
|
|
304
|
+
|
|
305
|
+
# Has extension = very likely a file
|
|
306
|
+
if '.' in word and not word.startswith('.'):
|
|
307
|
+
# Get the extension
|
|
308
|
+
parts = word.rsplit('.', 1)
|
|
309
|
+
if len(parts) == 2:
|
|
310
|
+
name, ext = parts
|
|
311
|
+
# Be more permissive with extensions
|
|
312
|
+
if name and ext and ext.replace('_', '').replace('-', '').isalnum():
|
|
313
|
+
if len(ext) <= 10: # Most extensions are < 10 chars
|
|
314
|
+
return True
|
|
315
|
+
|
|
316
|
+
# Has path separator = could be a file
|
|
317
|
+
if '/' in word:
|
|
318
|
+
# Check if it's a path to something specific (not just dirs)
|
|
319
|
+
if not word.endswith('/'): # Not ending with / (directory indicator)
|
|
320
|
+
parts = word.split('/')
|
|
321
|
+
last_part = parts[-1] if parts else ''
|
|
322
|
+
|
|
323
|
+
# If last part has extension, definitely a file
|
|
324
|
+
if '.' in last_part and not last_part.startswith('.'):
|
|
325
|
+
return True
|
|
326
|
+
|
|
327
|
+
# If it's under specific directories that contain files
|
|
328
|
+
if word.startswith('/dev/') and len(word) > 5: # /dev/null, /dev/tty, etc.
|
|
329
|
+
return True
|
|
330
|
+
if word.startswith('/tmp/') and len(word) > 5: # /tmp/anything
|
|
331
|
+
return True
|
|
332
|
+
if word.startswith('/etc/') and len(word) > 5: # /etc/passwd, etc.
|
|
333
|
+
return True
|
|
334
|
+
if word.startswith('/usr/bin/') and len(word) > 9: # Executables
|
|
335
|
+
return True
|
|
336
|
+
if word.startswith('/usr/local/bin/') and len(word) > 15:
|
|
337
|
+
return True
|
|
338
|
+
|
|
339
|
+
# If last part looks like a filename (even without extension)
|
|
340
|
+
if last_part and last_part.replace('-', '').replace('_', '').isalnum():
|
|
341
|
+
# Could be an executable or script
|
|
342
|
+
return True
|
|
343
|
+
|
|
344
|
+
# Check for well-known files without extensions (case-insensitive)
|
|
345
|
+
filename_only = word.split('/')[-1].lower()
|
|
346
|
+
if filename_only in {'makefile', 'readme', 'license', 'dockerfile',
|
|
347
|
+
'gemfile', 'rakefile', 'procfile', 'vagrantfile',
|
|
348
|
+
'jenkinsfile', 'cakefile', 'gulpfile', 'gruntfile',
|
|
349
|
+
'brewfile', 'berksfile', 'guardfile', 'fastfile',
|
|
350
|
+
'cartfile', 'appfile', 'podfile', 'snapfile'}:
|
|
351
|
+
return True
|
|
352
|
+
|
|
353
|
+
# Stand-alone word without path - be conservative
|
|
354
|
+
if '/' not in word:
|
|
355
|
+
# If it has an extension, probably a file in current directory
|
|
356
|
+
if '.' in word and not word.startswith('.'):
|
|
357
|
+
return True
|
|
358
|
+
|
|
359
|
+
# Well-known executable names without extensions
|
|
360
|
+
if word in {'script', 'run', 'build', 'test', 'deploy', 'install',
|
|
361
|
+
'configure', 'setup', 'bootstrap', 'init'}:
|
|
362
|
+
return True
|
|
363
|
+
|
|
364
|
+
# Otherwise, we can't be sure it's a file (could be a command)
|
|
365
|
+
return False
|
|
366
|
+
|
|
367
|
+
return False
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
# Testing
|
|
371
|
+
if __name__ == "__main__":
|
|
372
|
+
# Test cases
|
|
373
|
+
test_cases = [
|
|
374
|
+
"cd /Users/user/src/project/server && python test.py",
|
|
375
|
+
"python a.py | tee b.log",
|
|
376
|
+
"cat a.txt > /tmp/b.txt",
|
|
377
|
+
"grep foo file.txt | sort | uniq > output.txt",
|
|
378
|
+
"cat file1.txt file2.txt | grep pattern > result.txt",
|
|
379
|
+
"python script.py < input.txt > output.txt",
|
|
380
|
+
"ls -la /tmp | grep '\\.txt$' | wc -l",
|
|
381
|
+
"tar -xzf archive.tar.gz",
|
|
382
|
+
"find . -name '*.py' | xargs grep pattern",
|
|
383
|
+
]
|
|
384
|
+
|
|
385
|
+
print("Shell Command Parser (bashlex) - Test Cases\n" + "="*60)
|
|
386
|
+
for cmd in test_cases:
|
|
387
|
+
try:
|
|
388
|
+
sub_cmds, files = parse_shell_command(cmd)
|
|
389
|
+
print(f"\nCommand: {cmd}")
|
|
390
|
+
print(f"Sub-commands: {sub_cmds}")
|
|
391
|
+
print(f"Input files: {files}")
|
|
392
|
+
except Exception as e:
|
|
393
|
+
print(f"\nCommand: {cmd}")
|
|
394
|
+
print(f"Error: {e}")
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Common types for IDE hooks - IDE-agnostic
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Callable, Optional
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class OutputFormat:
|
|
11
|
+
"""
|
|
12
|
+
Defines how to format hook output for a specific IDE.
|
|
13
|
+
This is a generic interface - IDEs provide their own implementations.
|
|
14
|
+
"""
|
|
15
|
+
# Exit codes
|
|
16
|
+
allow_exit_code: int
|
|
17
|
+
deny_exit_code: int
|
|
18
|
+
error_exit_code: int
|
|
19
|
+
|
|
20
|
+
# Output formatter function
|
|
21
|
+
# Args: (hook_type: str, allowed: bool, user_msg: Optional[str], agent_msg: Optional[str]) -> str
|
|
22
|
+
formatter: Callable[[str, bool, Optional[str], Optional[str]], str]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class HookConfig:
|
|
27
|
+
"""
|
|
28
|
+
Configuration for a specific hook execution.
|
|
29
|
+
IDE-specific modules create instances of this with their own output format.
|
|
30
|
+
"""
|
|
31
|
+
output_format: OutputFormat
|
|
32
|
+
server_name: str # IDE-specific tool server name
|
|
33
|
+
client_name: str # IDE-specific client name (e.g. "cursor", "claude-code")
|
|
34
|
+
max_content_length: int # Maximum content length before skipping API call
|