code-puppy 0.0.316__py3-none-any.whl → 0.0.325__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,224 @@
1
+ """
2
+ MCP Server Log Management.
3
+
4
+ This module provides persistent log file management for MCP servers.
5
+ Logs are stored in STATE_DIR/mcp_logs/<server_name>.log
6
+ """
7
+
8
+ from datetime import datetime
9
+ from pathlib import Path
10
+ from typing import List, Optional
11
+
12
+ from code_puppy.config import STATE_DIR
13
+
14
+ # Maximum log file size in bytes (5MB)
15
+ MAX_LOG_SIZE = 5 * 1024 * 1024
16
+
17
+ # Number of rotated logs to keep
18
+ MAX_ROTATED_LOGS = 3
19
+
20
+
21
+ def get_mcp_logs_dir() -> Path:
22
+ """
23
+ Get the directory for MCP server logs.
24
+
25
+ Creates the directory if it doesn't exist.
26
+
27
+ Returns:
28
+ Path to the MCP logs directory
29
+ """
30
+ logs_dir = Path(STATE_DIR) / "mcp_logs"
31
+ logs_dir.mkdir(parents=True, exist_ok=True)
32
+ return logs_dir
33
+
34
+
35
+ def get_log_file_path(server_name: str) -> Path:
36
+ """
37
+ Get the log file path for a specific server.
38
+
39
+ Args:
40
+ server_name: Name of the MCP server
41
+
42
+ Returns:
43
+ Path to the server's log file
44
+ """
45
+ # Sanitize server name for filesystem
46
+ safe_name = "".join(c if c.isalnum() or c in "-_" else "_" for c in server_name)
47
+ return get_mcp_logs_dir() / f"{safe_name}.log"
48
+
49
+
50
+ def rotate_log_if_needed(server_name: str) -> None:
51
+ """
52
+ Rotate log file if it exceeds MAX_LOG_SIZE.
53
+
54
+ Args:
55
+ server_name: Name of the MCP server
56
+ """
57
+ log_path = get_log_file_path(server_name)
58
+
59
+ if not log_path.exists():
60
+ return
61
+
62
+ # Check if rotation is needed
63
+ if log_path.stat().st_size < MAX_LOG_SIZE:
64
+ return
65
+
66
+ logs_dir = get_mcp_logs_dir()
67
+ safe_name = "".join(c if c.isalnum() or c in "-_" else "_" for c in server_name)
68
+
69
+ # Remove oldest rotated log if we're at the limit
70
+ oldest = logs_dir / f"{safe_name}.log.{MAX_ROTATED_LOGS}"
71
+ if oldest.exists():
72
+ oldest.unlink()
73
+
74
+ # Shift existing rotated logs
75
+ for i in range(MAX_ROTATED_LOGS - 1, 0, -1):
76
+ old_path = logs_dir / f"{safe_name}.log.{i}"
77
+ new_path = logs_dir / f"{safe_name}.log.{i + 1}"
78
+ if old_path.exists():
79
+ old_path.rename(new_path)
80
+
81
+ # Rotate current log
82
+ rotated_path = logs_dir / f"{safe_name}.log.1"
83
+ log_path.rename(rotated_path)
84
+
85
+
86
+ def write_log(server_name: str, message: str, level: str = "INFO") -> None:
87
+ """
88
+ Write a log message for a server.
89
+
90
+ Args:
91
+ server_name: Name of the MCP server
92
+ message: Log message to write
93
+ level: Log level (INFO, ERROR, WARN, DEBUG)
94
+ """
95
+ rotate_log_if_needed(server_name)
96
+
97
+ log_path = get_log_file_path(server_name)
98
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
99
+
100
+ with open(log_path, "a", encoding="utf-8") as f:
101
+ f.write(f"[{timestamp}] [{level}] {message}\n")
102
+
103
+
104
+ def read_logs(
105
+ server_name: str, lines: Optional[int] = None, include_rotated: bool = False
106
+ ) -> List[str]:
107
+ """
108
+ Read log lines for a server.
109
+
110
+ Args:
111
+ server_name: Name of the MCP server
112
+ lines: Number of lines to return (from end). None means all lines.
113
+ include_rotated: Whether to include rotated log files
114
+
115
+ Returns:
116
+ List of log lines (most recent last)
117
+ """
118
+ all_lines = []
119
+
120
+ # Read rotated logs first (oldest to newest)
121
+ if include_rotated:
122
+ logs_dir = get_mcp_logs_dir()
123
+ safe_name = "".join(c if c.isalnum() or c in "-_" else "_" for c in server_name)
124
+
125
+ for i in range(MAX_ROTATED_LOGS, 0, -1):
126
+ rotated_path = logs_dir / f"{safe_name}.log.{i}"
127
+ if rotated_path.exists():
128
+ with open(rotated_path, "r", encoding="utf-8", errors="replace") as f:
129
+ all_lines.extend(f.read().splitlines())
130
+
131
+ # Read current log
132
+ log_path = get_log_file_path(server_name)
133
+ if log_path.exists():
134
+ with open(log_path, "r", encoding="utf-8", errors="replace") as f:
135
+ all_lines.extend(f.read().splitlines())
136
+
137
+ # Return requested number of lines
138
+ if lines is not None and lines > 0:
139
+ return all_lines[-lines:]
140
+
141
+ return all_lines
142
+
143
+
144
+ def clear_logs(server_name: str, include_rotated: bool = True) -> None:
145
+ """
146
+ Clear logs for a server.
147
+
148
+ Args:
149
+ server_name: Name of the MCP server
150
+ include_rotated: Whether to also clear rotated log files
151
+ """
152
+ log_path = get_log_file_path(server_name)
153
+
154
+ if log_path.exists():
155
+ log_path.unlink()
156
+
157
+ if include_rotated:
158
+ logs_dir = get_mcp_logs_dir()
159
+ safe_name = "".join(c if c.isalnum() or c in "-_" else "_" for c in server_name)
160
+
161
+ for i in range(1, MAX_ROTATED_LOGS + 1):
162
+ rotated_path = logs_dir / f"{safe_name}.log.{i}"
163
+ if rotated_path.exists():
164
+ rotated_path.unlink()
165
+
166
+
167
+ def list_servers_with_logs() -> List[str]:
168
+ """
169
+ List all servers that have log files.
170
+
171
+ Returns:
172
+ List of server names with log files
173
+ """
174
+ logs_dir = get_mcp_logs_dir()
175
+ servers = set()
176
+
177
+ for path in logs_dir.glob("*.log*"):
178
+ # Extract server name from filename
179
+ name = path.stem
180
+ # Remove .log suffix and rotation numbers
181
+ name = name.replace(".log", "").rstrip(".0123456789")
182
+ if name:
183
+ servers.add(name)
184
+
185
+ return sorted(servers)
186
+
187
+
188
+ def get_log_stats(server_name: str) -> dict:
189
+ """
190
+ Get statistics about a server's logs.
191
+
192
+ Args:
193
+ server_name: Name of the MCP server
194
+
195
+ Returns:
196
+ Dictionary with log statistics
197
+ """
198
+ log_path = get_log_file_path(server_name)
199
+
200
+ stats = {
201
+ "exists": log_path.exists(),
202
+ "size_bytes": 0,
203
+ "line_count": 0,
204
+ "rotated_count": 0,
205
+ "total_size_bytes": 0,
206
+ }
207
+
208
+ if log_path.exists():
209
+ stats["size_bytes"] = log_path.stat().st_size
210
+ stats["total_size_bytes"] = stats["size_bytes"]
211
+ with open(log_path, "r", encoding="utf-8", errors="replace") as f:
212
+ stats["line_count"] = sum(1 for _ in f)
213
+
214
+ # Count rotated logs
215
+ logs_dir = get_mcp_logs_dir()
216
+ safe_name = "".join(c if c.isalnum() or c in "-_" else "_" for c in server_name)
217
+
218
+ for i in range(1, MAX_ROTATED_LOGS + 1):
219
+ rotated_path = logs_dir / f"{safe_name}.log.{i}"
220
+ if rotated_path.exists():
221
+ stats["rotated_count"] += 1
222
+ stats["total_size_bytes"] += rotated_path.stat().st_size
223
+
224
+ return stats
@@ -29,6 +29,13 @@ Example (new):
29
29
  >>> bus.emit(TextMessage(level=MessageLevel.INFO, text="Hello"))
30
30
  """
31
31
 
32
+ # =============================================================================
33
+ # Apply Rich Markdown patches (left-justified headers)
34
+ # =============================================================================
35
+ from .markdown_patches import patch_markdown_headings
36
+
37
+ patch_markdown_headings()
38
+
32
39
  # =============================================================================
33
40
  # Legacy API (backward compatible)
34
41
  # =============================================================================
@@ -220,4 +227,6 @@ __all__ = [
220
227
  "RichConsoleRenderer",
221
228
  "DEFAULT_STYLES",
222
229
  "DIFF_STYLES",
230
+ # Markdown patches
231
+ "patch_markdown_headings",
223
232
  ]
@@ -0,0 +1,57 @@
1
+ """Patches for Rich's Markdown rendering.
2
+
3
+ This module provides customizations to Rich's default Markdown rendering,
4
+ particularly for header justification which is hardcoded to center in Rich.
5
+ """
6
+
7
+ from rich import box
8
+ from rich.markdown import Heading, Markdown
9
+ from rich.panel import Panel
10
+ from rich.text import Text
11
+
12
+
13
+ class LeftJustifiedHeading(Heading):
14
+ """A heading that left-justifies text instead of centering.
15
+
16
+ Rich's default Heading class hardcodes `text.justify = 'center'`,
17
+ which can look odd in a CLI context. This subclass overrides that
18
+ to use left justification instead.
19
+ """
20
+
21
+ def __rich_console__(self, console, options):
22
+ """Render the heading with left justification."""
23
+ text = self.text
24
+ text.justify = "left" # Override Rich's default 'center'
25
+
26
+ if self.tag == "h1":
27
+ # Draw a border around h1s (same as Rich default)
28
+ yield Panel(
29
+ text,
30
+ box=box.HEAVY,
31
+ style="markdown.h1.border",
32
+ )
33
+ else:
34
+ # Styled text for h2 and beyond (same as Rich default)
35
+ if self.tag == "h2":
36
+ yield Text("")
37
+ yield text
38
+
39
+
40
+ _patched = False
41
+
42
+
43
+ def patch_markdown_headings():
44
+ """Patch Rich's Markdown to use left-justified headings.
45
+
46
+ This function is idempotent - calling it multiple times has no effect
47
+ after the first call.
48
+ """
49
+ global _patched
50
+ if _patched:
51
+ return
52
+
53
+ Markdown.elements["heading_open"] = LeftJustifiedHeading
54
+ _patched = True
55
+
56
+
57
+ __all__ = ["patch_markdown_headings", "LeftJustifiedHeading"]
@@ -108,6 +108,12 @@ def make_model_settings(
108
108
  # Handle Anthropic extended thinking settings
109
109
  # Remove top_p as Anthropic doesn't support it with extended thinking
110
110
  model_settings_dict.pop("top_p", None)
111
+
112
+ # Claude extended thinking requires temperature=1.0 (API restriction)
113
+ # Default to 1.0 if not explicitly set by user
114
+ if model_settings_dict.get("temperature") is None:
115
+ model_settings_dict["temperature"] = 1.0
116
+
111
117
  extended_thinking = effective_settings.get("extended_thinking", True)
112
118
  budget_tokens = effective_settings.get("budget_tokens", 10000)
113
119
  if extended_thinking and budget_tokens:
@@ -313,9 +319,21 @@ class ModelFactory:
313
319
  http2=http2_enabled,
314
320
  )
315
321
 
322
+ # Check if interleaved thinking is enabled for this model
323
+ # Only applies to Claude 4 models (Opus 4.5, Opus 4.1, Opus 4, Sonnet 4)
324
+ from code_puppy.config import get_effective_model_settings
325
+
326
+ effective_settings = get_effective_model_settings(model_name)
327
+ interleaved_thinking = effective_settings.get("interleaved_thinking", False)
328
+
329
+ default_headers = {}
330
+ if interleaved_thinking:
331
+ default_headers["anthropic-beta"] = "interleaved-thinking-2025-05-14"
332
+
316
333
  anthropic_client = AsyncAnthropic(
317
334
  api_key=api_key,
318
335
  http_client=client,
336
+ default_headers=default_headers if default_headers else None,
319
337
  )
320
338
 
321
339
  # Ensure cache_control is injected at the Anthropic SDK layer
@@ -345,10 +363,21 @@ class ModelFactory:
345
363
  http2=http2_enabled,
346
364
  )
347
365
 
366
+ # Check if interleaved thinking is enabled for this model
367
+ from code_puppy.config import get_effective_model_settings
368
+
369
+ effective_settings = get_effective_model_settings(model_name)
370
+ interleaved_thinking = effective_settings.get("interleaved_thinking", False)
371
+
372
+ default_headers = {}
373
+ if interleaved_thinking:
374
+ default_headers["anthropic-beta"] = "interleaved-thinking-2025-05-14"
375
+
348
376
  anthropic_client = AsyncAnthropic(
349
377
  base_url=url,
350
378
  http_client=client,
351
379
  api_key=api_key,
380
+ default_headers=default_headers if default_headers else None,
352
381
  )
353
382
 
354
383
  # Ensure cache_control is injected at the Anthropic SDK layer
@@ -364,6 +393,31 @@ class ModelFactory:
364
393
  )
365
394
  return None
366
395
 
396
+ # Check if interleaved thinking is enabled (defaults to True for OAuth models)
397
+ from code_puppy.config import get_effective_model_settings
398
+
399
+ effective_settings = get_effective_model_settings(model_name)
400
+ interleaved_thinking = effective_settings.get("interleaved_thinking", True)
401
+
402
+ # Handle anthropic-beta header based on interleaved_thinking setting
403
+ if "anthropic-beta" in headers:
404
+ beta_parts = [p.strip() for p in headers["anthropic-beta"].split(",")]
405
+ if interleaved_thinking:
406
+ # Ensure interleaved-thinking is in the header
407
+ if "interleaved-thinking-2025-05-14" not in beta_parts:
408
+ beta_parts.append("interleaved-thinking-2025-05-14")
409
+ else:
410
+ # Remove interleaved-thinking from the header
411
+ beta_parts = [
412
+ p for p in beta_parts if "interleaved-thinking" not in p
413
+ ]
414
+ headers["anthropic-beta"] = ",".join(beta_parts) if beta_parts else None
415
+ if headers.get("anthropic-beta") is None:
416
+ del headers["anthropic-beta"]
417
+ elif interleaved_thinking:
418
+ # No existing beta header, add one for interleaved thinking
419
+ headers["anthropic-beta"] = "interleaved-thinking-2025-05-14"
420
+
367
421
  # Use a dedicated client wrapper that injects cache_control on /v1/messages
368
422
  if verify is None:
369
423
  verify = get_cert_bundle_path()
code_puppy/models.json CHANGED
@@ -9,9 +9,9 @@
9
9
  "context_length": 200000,
10
10
  "supported_settings": ["temperature", "seed"]
11
11
  },
12
- "synthetic-MiniMax-M2": {
12
+ "synthetic-MiniMax-M2.1": {
13
13
  "type": "custom_openai",
14
- "name": "hf:MiniMaxAI/MiniMax-M2",
14
+ "name": "hf:MiniMaxAI/MiniMax-M2.1",
15
15
  "custom_endpoint": {
16
16
  "url": "https://api.synthetic.new/openai/v1/",
17
17
  "api_key": "$SYN_API_KEY"
@@ -81,7 +81,7 @@
81
81
  "type": "anthropic",
82
82
  "name": "claude-opus-4-5",
83
83
  "context_length": 200000,
84
- "supported_settings": ["temperature", "extended_thinking", "budget_tokens"]
84
+ "supported_settings": ["temperature", "extended_thinking", "budget_tokens", "interleaved_thinking"]
85
85
  },
86
86
  "zai-glm-4.6-coding": {
87
87
  "type": "zai_coding",
@@ -18,6 +18,9 @@ def _load_builtin_plugins(plugins_dir: Path) -> list[str]:
18
18
 
19
19
  Returns list of successfully loaded plugin names.
20
20
  """
21
+ # Import safety permission check for shell_safety plugin
22
+ from code_puppy.config import get_safety_permission_level
23
+
21
24
  loaded = []
22
25
 
23
26
  for item in plugins_dir.iterdir():
@@ -26,6 +29,15 @@ def _load_builtin_plugins(plugins_dir: Path) -> list[str]:
26
29
  callbacks_file = item / "register_callbacks.py"
27
30
 
28
31
  if callbacks_file.exists():
32
+ # Skip shell_safety plugin unless safety_permission_level is "low" or "none"
33
+ if plugin_name == "shell_safety":
34
+ safety_level = get_safety_permission_level()
35
+ if safety_level not in ("none", "low"):
36
+ logger.debug(
37
+ f"Skipping shell_safety plugin - safety_permission_level is '{safety_level}' (needs 'low' or 'none')"
38
+ )
39
+ continue
40
+
29
41
  try:
30
42
  module_name = f"code_puppy.plugins.{plugin_name}.register_callbacks"
31
43
  importlib.import_module(module_name)
@@ -368,6 +368,7 @@ def add_models_to_extra_config(models: List[str]) -> bool:
368
368
  "temperature",
369
369
  "extended_thinking",
370
370
  "budget_tokens",
371
+ "interleaved_thinking",
371
372
  ],
372
373
  }
373
374
  added += 1
@@ -5,13 +5,12 @@ It's designed to be ultra-lightweight with a concise prompt (<200 tokens) and
5
5
  uses structured output for reliable parsing.
6
6
  """
7
7
 
8
- import asyncio
9
8
  from typing import TYPE_CHECKING, List
10
9
 
11
10
  from code_puppy.agents.base_agent import BaseAgent
12
11
 
13
12
  if TYPE_CHECKING:
14
- from code_puppy.tools.command_runner import ShellSafetyAssessment
13
+ pass
15
14
 
16
15
 
17
16
  class ShellSafetyAgent(BaseAgent):
@@ -68,119 +67,3 @@ class ShellSafetyAgent(BaseAgent):
68
67
  def get_available_tools(self) -> List[str]:
69
68
  """This agent uses no tools - pure reasoning only."""
70
69
  return []
71
-
72
- async def assess_command(
73
- self, command: str, cwd: str | None = None
74
- ) -> "ShellSafetyAssessment":
75
- """Assess the safety risk of a shell command.
76
-
77
- Args:
78
- command: The shell command to assess
79
- cwd: Optional working directory context
80
-
81
- Returns:
82
- ShellSafetyAssessment with risk level and reasoning
83
-
84
- Note:
85
- On timeout or error, defaults to 'high' risk with error reasoning
86
- to fail safe. Optionally uses DBOS for durable execution tracking.
87
- """
88
- import uuid
89
-
90
- from pydantic_ai import Agent, UsageLimits
91
-
92
- from code_puppy.config import get_use_dbos
93
- from code_puppy.model_factory import ModelFactory
94
- from code_puppy.tools.command_runner import ShellSafetyAssessment
95
-
96
- try:
97
- # Build the assessment prompt
98
- prompt = f"Assess this shell command:\n\nCommand: {command}"
99
- if cwd:
100
- prompt += f"\nWorking directory: {cwd}"
101
-
102
- # Get the current model
103
- model_name = self.get_model_name()
104
- models_config = ModelFactory.load_config()
105
-
106
- if model_name not in models_config:
107
- # Fall back to high risk if model config fails
108
- return ShellSafetyAssessment(
109
- risk="high",
110
- reasoning="Model configuration unavailable - failing safe",
111
- is_fallback=True,
112
- )
113
-
114
- model = ModelFactory.get_model(model_name, models_config)
115
-
116
- # Handle claude-code models: swap instructions and prepend system prompt
117
- from code_puppy.model_utils import prepare_prompt_for_model
118
-
119
- instructions = self.get_system_prompt()
120
- prepared = prepare_prompt_for_model(model_name, instructions, prompt)
121
- instructions = prepared.instructions
122
- prompt = prepared.user_prompt
123
-
124
- from code_puppy.model_factory import make_model_settings
125
-
126
- model_settings = make_model_settings(model_name)
127
-
128
- temp_agent = Agent(
129
- model=model,
130
- system_prompt=instructions,
131
- retries=2, # Increase from 1 to 2 for better reliability
132
- output_type=ShellSafetyAssessment,
133
- model_settings=model_settings,
134
- )
135
-
136
- # Generate unique agent name and workflow ID for DBOS (if enabled)
137
- agent_name = f"shell-safety-{uuid.uuid4().hex[:8]}"
138
- workflow_id = f"shell-safety-{uuid.uuid4().hex[:8]}"
139
-
140
- # Wrap with DBOS if enabled (same pattern as agent_tools.py)
141
- if get_use_dbos():
142
- from pydantic_ai.durable_exec.dbos import DBOSAgent
143
-
144
- dbos_agent = DBOSAgent(temp_agent, name=agent_name)
145
- temp_agent = dbos_agent
146
-
147
- # Run the agent as a cancellable task
148
- # Import the shared task registry for cancellation support
149
- from code_puppy.tools.agent_tools import _active_subagent_tasks
150
-
151
- if get_use_dbos():
152
- from dbos import DBOS, SetWorkflowID
153
-
154
- with SetWorkflowID(workflow_id):
155
- task = asyncio.create_task(
156
- temp_agent.run(
157
- prompt,
158
- usage_limits=UsageLimits(request_limit=3),
159
- )
160
- )
161
- _active_subagent_tasks.add(task)
162
- else:
163
- task = asyncio.create_task(
164
- temp_agent.run(
165
- prompt,
166
- usage_limits=UsageLimits(request_limit=3),
167
- )
168
- )
169
- _active_subagent_tasks.add(task)
170
-
171
- try:
172
- result = await task
173
- finally:
174
- _active_subagent_tasks.discard(task)
175
- if task.cancelled():
176
- if get_use_dbos():
177
- DBOS.cancel_workflow(workflow_id)
178
-
179
- return result.output
180
-
181
- except Exception as e:
182
- return ShellSafetyAssessment(
183
- risk="high",
184
- reasoning=f"Safety assessment failed: {str(e)[:200]} - failing safe",
185
- is_fallback=True,
186
- )
@@ -7,12 +7,42 @@ and assesses their safety risk before execution.
7
7
  from typing import Any, Dict, Optional
8
8
 
9
9
  from code_puppy.callbacks import register_callback
10
- from code_puppy.config import get_safety_permission_level, get_yolo_mode
10
+ from code_puppy.config import (
11
+ get_global_model_name,
12
+ get_safety_permission_level,
13
+ get_yolo_mode,
14
+ )
11
15
  from code_puppy.messaging import emit_info
12
16
  from code_puppy.plugins.shell_safety.command_cache import (
13
17
  cache_assessment,
14
18
  get_cached_assessment,
15
19
  )
20
+ from code_puppy.tools.command_runner import ShellSafetyAssessment
21
+
22
+ # OAuth model prefixes - these models have their own safety mechanisms
23
+ OAUTH_MODEL_PREFIXES = (
24
+ "claude-code-", # Anthropic OAuth
25
+ "chatgpt-", # OpenAI OAuth
26
+ "gemini-oauth", # Google OAuth
27
+ )
28
+
29
+
30
+ def is_oauth_model(model_name: str | None) -> bool:
31
+ """Check if the model is an OAuth model that should skip safety checks.
32
+
33
+ OAuth models have their own built-in safety mechanisms, so we skip
34
+ the shell safety callback to avoid redundant checks and potential bugs.
35
+
36
+ Args:
37
+ model_name: The name of the current model
38
+
39
+ Returns:
40
+ True if the model is an OAuth model, False otherwise
41
+ """
42
+ if not model_name:
43
+ return False
44
+ return model_name.startswith(OAUTH_MODEL_PREFIXES)
45
+
16
46
 
17
47
  # Risk level hierarchy for numeric comparison
18
48
  # Lower numbers = safer commands, higher numbers = more dangerous
@@ -68,6 +98,11 @@ async def shell_safety_callback(
68
98
  None if command is safe to proceed
69
99
  Dict with rejection info if command should be blocked
70
100
  """
101
+ # Skip safety checks for OAuth models - they have their own safety mechanisms
102
+ current_model = get_global_model_name()
103
+ if is_oauth_model(current_model):
104
+ return None
105
+
71
106
  # Only check safety in yolo_mode - otherwise user is reviewing manually
72
107
  yolo_mode = get_yolo_mode()
73
108
  if not yolo_mode:
@@ -108,8 +143,14 @@ async def shell_safety_callback(
108
143
  # Create agent and assess command
109
144
  agent = ShellSafetyAgent()
110
145
 
111
- # Run async assessment (we're in an async callback now!)
112
- assessment = await agent.assess_command(command, cwd)
146
+ # Build the assessment prompt with optional cwd context
147
+ prompt = f"Assess this shell command:\n\nCommand: {command}"
148
+ if cwd:
149
+ prompt += f"\nWorking directory: {cwd}"
150
+
151
+ # Run async assessment with structured output type
152
+ result = await agent.run_with_mcp(prompt, output_type=ShellSafetyAssessment)
153
+ assessment = result.output
113
154
 
114
155
  # Cache the result for future use, but only if it's not a fallback assessment
115
156
  if not getattr(assessment, "is_fallback", False):