mcpower-proxy 0.0.65__py3-none-any.whl → 0.0.74__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcpower-proxy might be problematic. Click here for more details.
- ide_tools/__init__.py +12 -0
- ide_tools/common/__init__.py +5 -0
- ide_tools/common/hooks/__init__.py +5 -0
- ide_tools/common/hooks/init.py +124 -0
- ide_tools/common/hooks/output.py +63 -0
- ide_tools/common/hooks/prompt_submit.py +133 -0
- ide_tools/common/hooks/read_file.py +167 -0
- ide_tools/common/hooks/shell_execution.py +255 -0
- ide_tools/common/hooks/shell_parser_bashlex.py +277 -0
- ide_tools/common/hooks/types.py +34 -0
- ide_tools/common/hooks/utils.py +286 -0
- ide_tools/cursor/__init__.py +11 -0
- ide_tools/cursor/constants.py +58 -0
- ide_tools/cursor/format.py +35 -0
- ide_tools/cursor/router.py +100 -0
- ide_tools/router.py +48 -0
- main.py +11 -4
- {mcpower_proxy-0.0.65.dist-info → mcpower_proxy-0.0.74.dist-info}/METADATA +4 -3
- mcpower_proxy-0.0.74.dist-info/RECORD +60 -0
- {mcpower_proxy-0.0.65.dist-info → mcpower_proxy-0.0.74.dist-info}/top_level.txt +1 -0
- modules/apis/security_policy.py +11 -6
- modules/decision_handler.py +219 -0
- modules/logs/audit_trail.py +16 -15
- modules/logs/logger.py +14 -18
- modules/redaction/gitleaks_rules.py +1 -1
- modules/redaction/pii_rules.py +0 -48
- modules/redaction/redactor.py +112 -107
- modules/ui/__init__.py +1 -1
- modules/ui/confirmation.py +0 -1
- modules/utils/cli.py +36 -6
- modules/utils/ids.py +55 -10
- modules/utils/json.py +3 -3
- wrapper/__version__.py +1 -1
- wrapper/middleware.py +135 -217
- wrapper/server.py +19 -11
- mcpower_proxy-0.0.65.dist-info/RECORD +0 -43
- {mcpower_proxy-0.0.65.dist-info → mcpower_proxy-0.0.74.dist-info}/WHEEL +0 -0
- {mcpower_proxy-0.0.65.dist-info → mcpower_proxy-0.0.74.dist-info}/entry_points.txt +0 -0
- {mcpower_proxy-0.0.65.dist-info → mcpower_proxy-0.0.74.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Common shell execution handler - IDE-agnostic
|
|
3
|
+
|
|
4
|
+
Handles both request (before) and response (after) inspection for shell commands.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
from typing import Optional, Dict, List
|
|
9
|
+
|
|
10
|
+
from modules.logs.audit_trail import AuditTrailLogger
|
|
11
|
+
from modules.logs.logger import MCPLogger
|
|
12
|
+
from modules.redaction import redact
|
|
13
|
+
from modules.utils.ids import get_session_id, read_app_uid, get_project_mcpower_dir
|
|
14
|
+
from .output import output_result, output_error
|
|
15
|
+
from .shell_parser_bashlex import parse_shell_command
|
|
16
|
+
from .types import HookConfig
|
|
17
|
+
from .utils import create_validator, inspect_and_enforce
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def extract_and_redact_command_files(
|
|
21
|
+
command: str,
|
|
22
|
+
cwd: Optional[str],
|
|
23
|
+
logger: MCPLogger
|
|
24
|
+
) -> Dict[str, str]:
|
|
25
|
+
"""
|
|
26
|
+
Extract input files from a shell command and return their redacted contents.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
command: The shell command to parse
|
|
30
|
+
cwd: Current working directory (for resolving relative paths)
|
|
31
|
+
logger: Logger instance for warnings/errors
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
Dictionary mapping filename to redacted file content
|
|
35
|
+
Format: {filename: redacted_content}
|
|
36
|
+
"""
|
|
37
|
+
files_dict = {}
|
|
38
|
+
|
|
39
|
+
try:
|
|
40
|
+
# Parse command to extract input files
|
|
41
|
+
_, input_files = parse_shell_command(command)
|
|
42
|
+
|
|
43
|
+
logger.info(f"Extracted {len(input_files)} input files from command: {input_files}")
|
|
44
|
+
|
|
45
|
+
# Process each file
|
|
46
|
+
for filename in input_files:
|
|
47
|
+
try:
|
|
48
|
+
# Resolve absolute path
|
|
49
|
+
if os.path.isabs(filename):
|
|
50
|
+
filepath = filename
|
|
51
|
+
elif cwd:
|
|
52
|
+
filepath = os.path.join(cwd, filename)
|
|
53
|
+
else:
|
|
54
|
+
filepath = filename
|
|
55
|
+
|
|
56
|
+
# Read file content
|
|
57
|
+
if os.path.exists(filepath) and os.path.isfile(filepath):
|
|
58
|
+
try:
|
|
59
|
+
with open(filepath, 'r', encoding='utf-8') as f:
|
|
60
|
+
content = f.read()
|
|
61
|
+
|
|
62
|
+
# Redact sensitive content
|
|
63
|
+
redacted_content = redact(content)
|
|
64
|
+
|
|
65
|
+
# Add to dict (use original filename, not resolved path)
|
|
66
|
+
files_dict[filename] = redacted_content
|
|
67
|
+
logger.info(f"Successfully read and redacted file: {filename}")
|
|
68
|
+
|
|
69
|
+
except UnicodeDecodeError:
|
|
70
|
+
logger.warning(f"File {filename} is not a text file, skipping")
|
|
71
|
+
except Exception as e:
|
|
72
|
+
logger.warning(f"Failed to read file {filename}: {e}")
|
|
73
|
+
else:
|
|
74
|
+
logger.warning(f"File {filename} does not exist or is not a file, skipping")
|
|
75
|
+
|
|
76
|
+
except Exception as e:
|
|
77
|
+
logger.warning(f"Error processing file {filename}: {e}")
|
|
78
|
+
|
|
79
|
+
except Exception as e:
|
|
80
|
+
logger.warning(f"Failed to parse command for file extraction: {e}")
|
|
81
|
+
|
|
82
|
+
return files_dict
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
async def handle_shell_execution(
|
|
86
|
+
logger: MCPLogger,
|
|
87
|
+
audit_logger: AuditTrailLogger,
|
|
88
|
+
stdin_input: str,
|
|
89
|
+
prompt_id: str,
|
|
90
|
+
event_id: str,
|
|
91
|
+
cwd: Optional[str],
|
|
92
|
+
config: HookConfig,
|
|
93
|
+
tool_name: str,
|
|
94
|
+
is_request: bool = True
|
|
95
|
+
):
|
|
96
|
+
"""
|
|
97
|
+
Generic shell execution handler - handles both request and response
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
logger: Logger instance
|
|
101
|
+
audit_logger: Audit logger instance
|
|
102
|
+
stdin_input: Raw input string from stdin
|
|
103
|
+
prompt_id: Prompt identifier
|
|
104
|
+
event_id: Event identifier
|
|
105
|
+
cwd: Current working directory
|
|
106
|
+
config: Hook configuration (IDE-specific)
|
|
107
|
+
tool_name: IDE-specific tool name (e.g., "beforeShellExecution", "PreToolUse(Bash)")
|
|
108
|
+
is_request: True for before (request), False for after (response)
|
|
109
|
+
"""
|
|
110
|
+
await _handle_shell_operation(
|
|
111
|
+
logger=logger,
|
|
112
|
+
audit_logger=audit_logger,
|
|
113
|
+
stdin_input=stdin_input,
|
|
114
|
+
prompt_id=prompt_id,
|
|
115
|
+
event_id=event_id,
|
|
116
|
+
cwd=cwd,
|
|
117
|
+
config=config,
|
|
118
|
+
is_request=is_request,
|
|
119
|
+
required_fields={"command": str, "cwd": str} if is_request else {"command": str, "output": str},
|
|
120
|
+
redact_fields=["command"] if is_request else ["command", "output"],
|
|
121
|
+
tool_name=tool_name,
|
|
122
|
+
operation_name="Command" if is_request else "Command output",
|
|
123
|
+
audit_event_type="agent_request" if is_request else "mcp_response",
|
|
124
|
+
audit_forwarded_event_type="agent_request_forwarded" if is_request else "mcp_response_forwarded"
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
async def _handle_shell_operation(
|
|
129
|
+
logger: MCPLogger,
|
|
130
|
+
audit_logger: AuditTrailLogger,
|
|
131
|
+
stdin_input: str,
|
|
132
|
+
prompt_id: str,
|
|
133
|
+
event_id: str,
|
|
134
|
+
cwd: Optional[str],
|
|
135
|
+
config: HookConfig,
|
|
136
|
+
is_request: bool,
|
|
137
|
+
required_fields: Dict[str, type],
|
|
138
|
+
redact_fields: List[str],
|
|
139
|
+
tool_name: str,
|
|
140
|
+
operation_name: str,
|
|
141
|
+
audit_event_type: str,
|
|
142
|
+
audit_forwarded_event_type: str
|
|
143
|
+
):
|
|
144
|
+
"""
|
|
145
|
+
Internal shell operation handler - shared logic for request and response
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
is_request: True for request inspection, False for response inspection
|
|
149
|
+
required_fields: Fields to validate in input
|
|
150
|
+
redact_fields: Fields to redact for logging and API calls
|
|
151
|
+
tool_name: Hook name (e.g., "beforeShellExecution", "afterShellExecution")
|
|
152
|
+
operation_name: Display name (e.g., "Command", "Command output")
|
|
153
|
+
audit_event_type: Audit event name for incoming operation
|
|
154
|
+
audit_forwarded_event_type: Audit event name for forwarded operation
|
|
155
|
+
"""
|
|
156
|
+
session_id = get_session_id()
|
|
157
|
+
|
|
158
|
+
logger.info(
|
|
159
|
+
f"{tool_name} handler started (client={config.client_name}, prompt_id={prompt_id}, event_id={event_id}, cwd={cwd})")
|
|
160
|
+
|
|
161
|
+
try:
|
|
162
|
+
try:
|
|
163
|
+
validator = create_validator(required_fields=required_fields)
|
|
164
|
+
input_data = validator(stdin_input)
|
|
165
|
+
except ValueError as e:
|
|
166
|
+
logger.error(f"Input validation error: {e}")
|
|
167
|
+
output_error(logger, config.output_format, "permission", str(e))
|
|
168
|
+
return
|
|
169
|
+
|
|
170
|
+
app_uid = read_app_uid(logger, get_project_mcpower_dir(cwd))
|
|
171
|
+
audit_logger.set_app_uid(app_uid)
|
|
172
|
+
|
|
173
|
+
redacted_data = {}
|
|
174
|
+
for k, v in input_data.items():
|
|
175
|
+
if k in required_fields:
|
|
176
|
+
redacted_data[k] = redact(v) if k in redact_fields else v
|
|
177
|
+
|
|
178
|
+
# Extract and redact input files for request inspection
|
|
179
|
+
files_dict = {}
|
|
180
|
+
if is_request and "command" in input_data:
|
|
181
|
+
command = input_data["command"]
|
|
182
|
+
files_dict = extract_and_redact_command_files(command, cwd, logger)
|
|
183
|
+
if files_dict:
|
|
184
|
+
logger.info(f"Extracted and redacted {len(files_dict)} files from command")
|
|
185
|
+
|
|
186
|
+
def get_audit_data():
|
|
187
|
+
# Use different structure for request vs response events
|
|
188
|
+
# Requests: params nested, Responses: unpacked at root
|
|
189
|
+
if is_request:
|
|
190
|
+
return {
|
|
191
|
+
"server": config.server_name,
|
|
192
|
+
"tool": tool_name,
|
|
193
|
+
"params": redacted_data,
|
|
194
|
+
"files": list(files_dict.keys()) if files_dict else None
|
|
195
|
+
}
|
|
196
|
+
else:
|
|
197
|
+
return {
|
|
198
|
+
"server": config.server_name,
|
|
199
|
+
"tool": tool_name,
|
|
200
|
+
**redacted_data
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
audit_logger.log_event(
|
|
204
|
+
audit_event_type,
|
|
205
|
+
get_audit_data(),
|
|
206
|
+
event_id=event_id
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
# Build content_data with redacted fields and files
|
|
210
|
+
content_data = redacted_data.copy()
|
|
211
|
+
if files_dict:
|
|
212
|
+
content_data["files"] = files_dict
|
|
213
|
+
|
|
214
|
+
try:
|
|
215
|
+
decision = await inspect_and_enforce(
|
|
216
|
+
is_request=is_request,
|
|
217
|
+
session_id=session_id,
|
|
218
|
+
logger=logger,
|
|
219
|
+
audit_logger=audit_logger,
|
|
220
|
+
app_uid=app_uid,
|
|
221
|
+
event_id=event_id,
|
|
222
|
+
server_name=config.server_name,
|
|
223
|
+
tool_name=tool_name,
|
|
224
|
+
content_data=content_data,
|
|
225
|
+
prompt_id=prompt_id,
|
|
226
|
+
cwd=cwd,
|
|
227
|
+
client_name=config.client_name
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
audit_logger.log_event(
|
|
231
|
+
audit_forwarded_event_type,
|
|
232
|
+
get_audit_data(),
|
|
233
|
+
event_id=event_id
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
reasons = decision.get("reasons", [])
|
|
237
|
+
user_message = f"{operation_name} approved"
|
|
238
|
+
if not reasons:
|
|
239
|
+
agent_message = f"{operation_name} approved by security policy"
|
|
240
|
+
else:
|
|
241
|
+
agent_message = f"{operation_name} approved: {'; '.join(reasons)}"
|
|
242
|
+
output_result(logger, config.output_format, "permission", True, user_message, agent_message)
|
|
243
|
+
|
|
244
|
+
except Exception as e:
|
|
245
|
+
# Decision enforcement failed - block
|
|
246
|
+
error_msg = str(e)
|
|
247
|
+
user_message = f"{operation_name} blocked by security policy"
|
|
248
|
+
if "User blocked" in error_msg or "User denied" in error_msg:
|
|
249
|
+
user_message = f"{operation_name} blocked by user"
|
|
250
|
+
|
|
251
|
+
output_result(logger, config.output_format, "permission", False, user_message, error_msg)
|
|
252
|
+
|
|
253
|
+
except Exception as e:
|
|
254
|
+
logger.error(f"Unexpected error in {tool_name} handler: {e}", exc_info=True)
|
|
255
|
+
output_error(logger, config.output_format, "permission", f"Unexpected error: {str(e)}")
|
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Shell command parser using bashlex library.
|
|
4
|
+
Parses shell commands to extract sub-commands and file references using proper bash parsing.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import bashlex
|
|
8
|
+
from typing import List, Tuple, Set, Optional
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def parse_shell_command(command: str) -> Tuple[List[str], List[str]]:
|
|
12
|
+
"""
|
|
13
|
+
Parse a shell command using bashlex and extract sub-commands and input files.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
command: A shell command string (supports pipes, redirections, etc.)
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
A tuple of (sub_commands, input_files) where:
|
|
20
|
+
- sub_commands: List of individual commands when split by pipes
|
|
21
|
+
- input_files: List of files that are used as inputs (excludes output-only files)
|
|
22
|
+
|
|
23
|
+
Examples:
|
|
24
|
+
>>> parse_shell_command("python a.py | tee b.log")
|
|
25
|
+
(['python a.py', 'tee b.log'], ['a.py', 'b.log'])
|
|
26
|
+
|
|
27
|
+
>>> parse_shell_command("cat a.txt > /tmp/b.txt")
|
|
28
|
+
(['cat a.txt > /tmp/b.txt'], ['a.txt'])
|
|
29
|
+
|
|
30
|
+
>>> parse_shell_command("grep foo file.txt | sort | uniq > output.txt")
|
|
31
|
+
(['grep foo file.txt', 'sort', 'uniq > output.txt'], ['file.txt'])
|
|
32
|
+
"""
|
|
33
|
+
try:
|
|
34
|
+
# Parse the command into an AST
|
|
35
|
+
parts = bashlex.parse(command)
|
|
36
|
+
except Exception as e:
|
|
37
|
+
# If parsing fails, fall back to simple split
|
|
38
|
+
print(f"Warning: bashlex parsing failed: {e}")
|
|
39
|
+
return ([command], [])
|
|
40
|
+
|
|
41
|
+
# Extract sub-commands and files
|
|
42
|
+
sub_commands = []
|
|
43
|
+
all_files: Set[str] = set()
|
|
44
|
+
output_files: Set[str] = set()
|
|
45
|
+
|
|
46
|
+
for ast in parts:
|
|
47
|
+
_extract_from_ast(ast, command, sub_commands, all_files, output_files)
|
|
48
|
+
|
|
49
|
+
# Remove output-only files from the result
|
|
50
|
+
input_files = sorted(list(all_files - output_files))
|
|
51
|
+
|
|
52
|
+
return sub_commands, input_files
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _extract_from_ast(
|
|
56
|
+
node,
|
|
57
|
+
command: str,
|
|
58
|
+
sub_commands: List[str],
|
|
59
|
+
all_files: Set[str],
|
|
60
|
+
output_files: Set[str],
|
|
61
|
+
parent_is_pipe: bool = False
|
|
62
|
+
) -> None:
|
|
63
|
+
"""
|
|
64
|
+
Recursively extract sub-commands and files from a bashlex AST node.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
node: bashlex AST node
|
|
68
|
+
command: Original command string (for extracting text)
|
|
69
|
+
sub_commands: List to append sub-commands to
|
|
70
|
+
all_files: Set to add all file references to
|
|
71
|
+
output_files: Set to add output-only files to
|
|
72
|
+
parent_is_pipe: True if parent node is a pipe operator
|
|
73
|
+
"""
|
|
74
|
+
# Check node kind to determine type
|
|
75
|
+
node_kind = getattr(node, 'kind', None)
|
|
76
|
+
|
|
77
|
+
if node_kind == 'list':
|
|
78
|
+
# List node contains multiple parts connected by operators
|
|
79
|
+
if hasattr(node, 'parts'):
|
|
80
|
+
for part in node.parts:
|
|
81
|
+
_extract_from_ast(part, command, sub_commands, all_files, output_files, False)
|
|
82
|
+
|
|
83
|
+
elif node_kind == 'pipeline':
|
|
84
|
+
# Pipeline node - extract individual commands
|
|
85
|
+
_extract_pipeline(node, command, sub_commands, all_files, output_files)
|
|
86
|
+
|
|
87
|
+
elif node_kind == 'command':
|
|
88
|
+
# Command node - extract the command text and analyze its parts
|
|
89
|
+
if hasattr(node, 'pos'):
|
|
90
|
+
start, end = node.pos
|
|
91
|
+
cmd_text = command[start:end]
|
|
92
|
+
sub_commands.append(cmd_text)
|
|
93
|
+
|
|
94
|
+
# Extract files from command parts (arguments and redirections)
|
|
95
|
+
if hasattr(node, 'parts'):
|
|
96
|
+
for part in node.parts:
|
|
97
|
+
part_kind = getattr(part, 'kind', None)
|
|
98
|
+
if part_kind == 'redirect':
|
|
99
|
+
_extract_redirect(part, command, all_files, output_files)
|
|
100
|
+
else:
|
|
101
|
+
_extract_files_from_node(part, command, all_files, output_files)
|
|
102
|
+
|
|
103
|
+
elif node_kind == 'compound':
|
|
104
|
+
# Compound command (like if, while, for, etc.)
|
|
105
|
+
if hasattr(node, 'list'):
|
|
106
|
+
for item in node.list:
|
|
107
|
+
_extract_from_ast(item, command, sub_commands, all_files, output_files, False)
|
|
108
|
+
|
|
109
|
+
elif node_kind == 'operator':
|
|
110
|
+
# Operator node (like &&, ||, ;) - ignore
|
|
111
|
+
pass
|
|
112
|
+
|
|
113
|
+
elif node_kind == 'pipe':
|
|
114
|
+
# Pipe node - ignore (we handle pipes at the pipeline level)
|
|
115
|
+
pass
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _extract_pipeline(node, command: str, sub_commands: List[str], all_files: Set[str], output_files: Set[str]) -> None:
|
|
119
|
+
"""Extract commands from a pipeline node."""
|
|
120
|
+
if hasattr(node, 'parts'):
|
|
121
|
+
for part in node.parts:
|
|
122
|
+
part_kind = getattr(part, 'kind', None)
|
|
123
|
+
# Skip pipe nodes, only process commands
|
|
124
|
+
if part_kind != 'pipe':
|
|
125
|
+
_extract_from_ast(part, command, sub_commands, all_files, output_files, True)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _extract_files_from_node(node, command: str, all_files: Set[str], output_files: Set[str]) -> None:
|
|
129
|
+
"""Extract file references from a node."""
|
|
130
|
+
node_kind = getattr(node, 'kind', None)
|
|
131
|
+
|
|
132
|
+
if node_kind == 'word':
|
|
133
|
+
# Word node - check if it's a file reference
|
|
134
|
+
word = node.word if hasattr(node, 'word') else None
|
|
135
|
+
|
|
136
|
+
if word and _looks_like_file(word):
|
|
137
|
+
all_files.add(word)
|
|
138
|
+
|
|
139
|
+
# Recursively check parts (for command substitutions, etc.)
|
|
140
|
+
if hasattr(node, 'parts'):
|
|
141
|
+
for part in node.parts:
|
|
142
|
+
_extract_files_from_node(part, command, all_files, output_files)
|
|
143
|
+
|
|
144
|
+
elif node_kind == 'commandsubstitution':
|
|
145
|
+
# Command substitution $(...) - recursively parse
|
|
146
|
+
if hasattr(node, 'command'):
|
|
147
|
+
_extract_from_ast(node.command, command, [], all_files, output_files, False)
|
|
148
|
+
|
|
149
|
+
elif node_kind == 'processsubstitution':
|
|
150
|
+
# Process substitution <(...) or >(...) - recursively parse
|
|
151
|
+
if hasattr(node, 'command'):
|
|
152
|
+
_extract_from_ast(node.command, command, [], all_files, output_files, False)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def _extract_redirect(redirect, command: str, all_files: Set[str], output_files: Set[str]) -> None:
|
|
156
|
+
"""Extract file references from redirection nodes."""
|
|
157
|
+
redirect_type = getattr(redirect, 'type', None)
|
|
158
|
+
|
|
159
|
+
# Get the target of the redirection
|
|
160
|
+
if hasattr(redirect, 'output'):
|
|
161
|
+
target = redirect.output
|
|
162
|
+
target_word = target.word if hasattr(target, 'word') else None
|
|
163
|
+
|
|
164
|
+
if target_word and _looks_like_file(target_word):
|
|
165
|
+
# Determine if it's input or output
|
|
166
|
+
if redirect_type in ('>', '>>', '>&', '>|', '&>'):
|
|
167
|
+
# Output redirection
|
|
168
|
+
output_files.add(target_word)
|
|
169
|
+
all_files.add(target_word)
|
|
170
|
+
elif redirect_type == '<':
|
|
171
|
+
# Input redirection
|
|
172
|
+
all_files.add(target_word)
|
|
173
|
+
else:
|
|
174
|
+
# Unknown, be conservative and include it
|
|
175
|
+
all_files.add(target_word)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def _looks_like_file(word: str) -> bool:
|
|
179
|
+
"""
|
|
180
|
+
Heuristic to determine if a word looks like a file path.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
word: A word from the command
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
True if it looks like a file path
|
|
187
|
+
"""
|
|
188
|
+
if not word:
|
|
189
|
+
return False
|
|
190
|
+
|
|
191
|
+
# Filter out obvious non-files
|
|
192
|
+
|
|
193
|
+
# Exclude shell glob patterns (wildcards without actual path)
|
|
194
|
+
if word.startswith('*') and '/' not in word:
|
|
195
|
+
return False
|
|
196
|
+
|
|
197
|
+
# Exclude shell expansions and special characters
|
|
198
|
+
if word.startswith('$') or '${' in word or '$(' in word:
|
|
199
|
+
return False
|
|
200
|
+
|
|
201
|
+
# Exclude sed/awk patterns (contain / as delimiter but are patterns)
|
|
202
|
+
if word.startswith('s/') and word.count('/') >= 2:
|
|
203
|
+
return False
|
|
204
|
+
|
|
205
|
+
# Exclude regex patterns (contain escaped characters or special regex chars)
|
|
206
|
+
if '\\' in word or word.startswith('^') or word.endswith('$'):
|
|
207
|
+
return False
|
|
208
|
+
|
|
209
|
+
# Exclude tokens that look like options
|
|
210
|
+
if word.startswith('-') or word.startswith('+') or word.startswith('!'):
|
|
211
|
+
return False
|
|
212
|
+
|
|
213
|
+
# Exclude relative path references
|
|
214
|
+
if word in {'.', '..'}:
|
|
215
|
+
return False
|
|
216
|
+
|
|
217
|
+
# Exclude common directories that are just paths (not files)
|
|
218
|
+
# Like /tmp, /dev, /usr, /etc without a filename
|
|
219
|
+
if word in {'/tmp', '/dev', '/usr', '/etc', '/var', '/opt', '/home'}:
|
|
220
|
+
return False
|
|
221
|
+
|
|
222
|
+
# Check for common file patterns
|
|
223
|
+
# Has an extension (but not just an extension)
|
|
224
|
+
if '.' in word and not word.startswith('.') and len(word) > 3:
|
|
225
|
+
if not word.startswith('*'):
|
|
226
|
+
# Make sure the extension looks reasonable (2-4 chars)
|
|
227
|
+
parts = word.rsplit('.', 1)
|
|
228
|
+
if len(parts) == 2 and 1 <= len(parts[1]) <= 4 and parts[1].isalnum():
|
|
229
|
+
return True
|
|
230
|
+
|
|
231
|
+
# Has a path separator with actual file-looking path components
|
|
232
|
+
if '/' in word:
|
|
233
|
+
parts = word.split('/')
|
|
234
|
+
if len(parts) >= 2:
|
|
235
|
+
last_part = parts[-1]
|
|
236
|
+
# Last part must look like a filename
|
|
237
|
+
if last_part and '.' in last_part and not last_part.startswith('*'):
|
|
238
|
+
return True
|
|
239
|
+
|
|
240
|
+
# Is a special path to a file (not just directory)
|
|
241
|
+
if word.startswith('/dev/') and len(word) > 5:
|
|
242
|
+
return True
|
|
243
|
+
if word.startswith('/tmp/') and len(word) > 5:
|
|
244
|
+
return True
|
|
245
|
+
|
|
246
|
+
# Check for common file patterns without extensions
|
|
247
|
+
filename_only = word.split('/')[-1]
|
|
248
|
+
if filename_only in {'Makefile', 'README', 'LICENSE', 'Dockerfile', 'Gemfile', 'Cargo.toml', 'package.json'}:
|
|
249
|
+
return True
|
|
250
|
+
|
|
251
|
+
return False
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
# Testing
|
|
255
|
+
if __name__ == "__main__":
|
|
256
|
+
# Test cases
|
|
257
|
+
test_cases = [
|
|
258
|
+
"python a.py | tee b.log",
|
|
259
|
+
"cat a.txt > /tmp/b.txt",
|
|
260
|
+
"grep foo file.txt | sort | uniq > output.txt",
|
|
261
|
+
"cat file1.txt file2.txt | grep pattern > result.txt",
|
|
262
|
+
"python script.py < input.txt > output.txt",
|
|
263
|
+
"ls -la /tmp | grep '\\.txt$' | wc -l",
|
|
264
|
+
"tar -xzf archive.tar.gz",
|
|
265
|
+
"find . -name '*.py' | xargs grep pattern",
|
|
266
|
+
]
|
|
267
|
+
|
|
268
|
+
print("Shell Command Parser (bashlex) - Test Cases\n" + "="*60)
|
|
269
|
+
for cmd in test_cases:
|
|
270
|
+
try:
|
|
271
|
+
sub_cmds, files = parse_shell_command(cmd)
|
|
272
|
+
print(f"\nCommand: {cmd}")
|
|
273
|
+
print(f"Sub-commands: {sub_cmds}")
|
|
274
|
+
print(f"Input files: {files}")
|
|
275
|
+
except Exception as e:
|
|
276
|
+
print(f"\nCommand: {cmd}")
|
|
277
|
+
print(f"Error: {e}")
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Common types for IDE hooks - IDE-agnostic
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Callable, Optional
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class OutputFormat:
|
|
11
|
+
"""
|
|
12
|
+
Defines how to format hook output for a specific IDE.
|
|
13
|
+
This is a generic interface - IDEs provide their own implementations.
|
|
14
|
+
"""
|
|
15
|
+
# Exit codes
|
|
16
|
+
allow_exit_code: int
|
|
17
|
+
deny_exit_code: int
|
|
18
|
+
error_exit_code: int
|
|
19
|
+
|
|
20
|
+
# Output formatter function
|
|
21
|
+
# Args: (hook_type: str, allowed: bool, user_msg: Optional[str], agent_msg: Optional[str]) -> str
|
|
22
|
+
formatter: Callable[[str, bool, Optional[str], Optional[str]], str]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class HookConfig:
|
|
27
|
+
"""
|
|
28
|
+
Configuration for a specific hook execution.
|
|
29
|
+
IDE-specific modules create instances of this with their own output format.
|
|
30
|
+
"""
|
|
31
|
+
output_format: OutputFormat
|
|
32
|
+
server_name: str # IDE-specific tool server name
|
|
33
|
+
client_name: str # IDE-specific client name (e.g. "cursor", "claude-code")
|
|
34
|
+
max_content_length: int # Maximum content length before skipping API call
|