netra-zen 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_interface/__init__.py +26 -0
- agent_interface/base_agent.py +351 -0
- netra_zen-1.0.0.dist-info/METADATA +576 -0
- netra_zen-1.0.0.dist-info/RECORD +15 -0
- netra_zen-1.0.0.dist-info/WHEEL +5 -0
- netra_zen-1.0.0.dist-info/entry_points.txt +2 -0
- netra_zen-1.0.0.dist-info/licenses/LICENSE.md +1 -0
- netra_zen-1.0.0.dist-info/top_level.txt +4 -0
- token_budget/__init__.py +1 -0
- token_budget/budget_manager.py +200 -0
- token_budget/models.py +74 -0
- token_budget/visualization.py +22 -0
- token_transparency/__init__.py +20 -0
- token_transparency/claude_pricing_engine.py +327 -0
- zen_orchestrator.py +2884 -0
zen_orchestrator.py
ADDED
@@ -0,0 +1,2884 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
Usage Examples:
|
4
|
+
|
5
|
+
zen -h # Help
|
6
|
+
|
7
|
+
Direct Command Execution:
|
8
|
+
zen "/single-command-in-claude-commands" # Execute single command directly
|
9
|
+
zen "/analyze-code" --workspace ~/my-project
|
10
|
+
zen "/debug-issue" --instance-name "debug-session"
|
11
|
+
zen "/optimize-performance" --session-id "perf-1"
|
12
|
+
zen "/generate-docs" --clear-history --compact-history
|
13
|
+
|
14
|
+
Configuration File Mode:
|
15
|
+
zen --config config.json
|
16
|
+
zen --config config.json --workspace ~/my-project
|
17
|
+
|
18
|
+
Default Instances Mode:
|
19
|
+
zen --dry-run # Auto-detects workspace from project root
|
20
|
+
zen --workspace ~/my-project --dry-run # Override workspace
|
21
|
+
zen --startup-delay 2.0 # 2 second delay between launches
|
22
|
+
zen --startup-delay 0.5 # 0.5 second delay between launches
|
23
|
+
zen --max-line-length 1000 # Longer output lines
|
24
|
+
zen --status-report-interval 60 # Status reports every 60s
|
25
|
+
zen --quiet # Minimal output, errors only
|
26
|
+
|
27
|
+
Command Discovery:
|
28
|
+
zen --list-commands # Show all available commands
|
29
|
+
zen --inspect-command "/analyze-code" # Inspect specific command
|
30
|
+
|
31
|
+
Scheduling:
|
32
|
+
zen "/analyze-code" --start-at "2h" # Start 2 hours from now
|
33
|
+
zen "/debug-issue" --start-at "30m" # Start in 30 minutes
|
34
|
+
zen "/optimize" --start-at "1am" # Start at 1 AM (today or tomorrow)
|
35
|
+
zen "/review-code" --start-at "14:30" # Start at 2:30 PM (today or tomorrow)
|
36
|
+
zen "/generate-docs" --start-at "10:30pm" # Start at 10:30 PM (today or tomorrow)
|
37
|
+
|
38
|
+
Precedence Rules:
|
39
|
+
1. Direct command (highest) - zen "/command"
|
40
|
+
2. Config file (medium) - zen --config file.json # expected default usage pattern
|
41
|
+
3. Default instances (lowest) - zen
|
42
|
+
"""
|
43
|
+
|
44
|
+
import asyncio
|
45
|
+
import json
|
46
|
+
import logging
|
47
|
+
import subprocess
|
48
|
+
import sys
|
49
|
+
import time
|
50
|
+
import yaml
|
51
|
+
import shutil
|
52
|
+
import os
|
53
|
+
import platform
|
54
|
+
from dataclasses import dataclass, asdict, field
|
55
|
+
from pathlib import Path
|
56
|
+
from typing import Dict, List, Optional, Any
|
57
|
+
import argparse
|
58
|
+
from datetime import datetime, timedelta
|
59
|
+
import re
|
60
|
+
from uuid import uuid4, UUID
|
61
|
+
from enum import Enum
|
62
|
+
|
63
|
+
# Add token budget imports with proper path handling
|
64
|
+
sys.path.insert(0, str(Path(__file__).parent))
|
65
|
+
try:
|
66
|
+
from token_budget.budget_manager import TokenBudgetManager
|
67
|
+
from token_budget.visualization import render_progress_bar
|
68
|
+
except ImportError as e:
|
69
|
+
# Graceful fallback if token budget package is not available
|
70
|
+
TokenBudgetManager = None
|
71
|
+
render_progress_bar = None
|
72
|
+
# Note: logger is not yet defined here, will log warning after logger setup
|
73
|
+
|
74
|
+
# Add token transparency imports
|
75
|
+
try:
|
76
|
+
from token_transparency import ClaudePricingEngine, TokenUsageData
|
77
|
+
except ImportError as e:
|
78
|
+
# Graceful fallback if token transparency package is not available
|
79
|
+
ClaudePricingEngine = None
|
80
|
+
TokenUsageData = None
|
81
|
+
|
82
|
+
# Setup logging
|
83
|
+
logging.basicConfig(
|
84
|
+
level=logging.INFO,
|
85
|
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
86
|
+
)
|
87
|
+
logger = logging.getLogger(__name__)
|
88
|
+
|
89
|
+
class LogLevel(Enum):
|
90
|
+
"""Log level configuration for zen orchestrator output"""
|
91
|
+
SILENT = "silent" # Errors and final summary only
|
92
|
+
CONCISE = "concise" # Essential progress + budget alerts (DEFAULT)
|
93
|
+
DETAILED = "detailed" # All current logging
|
94
|
+
|
95
|
+
def determine_log_level(args) -> LogLevel:
|
96
|
+
"""Determine log level from arguments with backward compatibility."""
|
97
|
+
# Check explicit log_level first (highest priority)
|
98
|
+
if hasattr(args, 'log_level') and args.log_level:
|
99
|
+
return LogLevel[args.log_level.upper()]
|
100
|
+
elif hasattr(args, 'quiet') and args.quiet:
|
101
|
+
return LogLevel.SILENT
|
102
|
+
elif hasattr(args, 'verbose') and args.verbose:
|
103
|
+
return LogLevel.DETAILED
|
104
|
+
else:
|
105
|
+
return LogLevel.CONCISE # New default
|
106
|
+
|
107
|
+
@dataclass
|
108
|
+
class InstanceConfig:
|
109
|
+
"""Configuration for a Claude Code instance"""
|
110
|
+
command: str
|
111
|
+
name: Optional[str] = None
|
112
|
+
description: Optional[str] = None
|
113
|
+
allowed_tools: List[str] = None
|
114
|
+
permission_mode: str = None # Will be set based on platform
|
115
|
+
output_format: str = "stream-json" # Default to stream-json for real-time output
|
116
|
+
session_id: Optional[str] = None
|
117
|
+
clear_history: bool = False
|
118
|
+
compact_history: bool = False
|
119
|
+
pre_commands: List[str] = None # Commands to run before main command
|
120
|
+
max_tokens_per_command: Optional[int] = None # Token budget for this specific command
|
121
|
+
|
122
|
+
def __post_init__(self):
|
123
|
+
"""Set defaults after initialization"""
|
124
|
+
if self.name is None:
|
125
|
+
self.name = self.command
|
126
|
+
if self.description is None:
|
127
|
+
self.description = f"Execute {self.command}"
|
128
|
+
# Set permission mode if not explicitly set
|
129
|
+
if self.permission_mode is None:
|
130
|
+
# Default to bypassPermissions for all platforms to avoid approval prompts
|
131
|
+
# This is not OS-specific - it's a general permission configuration
|
132
|
+
self.permission_mode = "bypassPermissions"
|
133
|
+
|
134
|
+
@dataclass
|
135
|
+
class InstanceStatus:
|
136
|
+
"""Status of a Claude Code instance"""
|
137
|
+
name: str
|
138
|
+
pid: Optional[int] = None
|
139
|
+
status: str = "pending" # pending, running, completed, failed
|
140
|
+
start_time: Optional[float] = None
|
141
|
+
end_time: Optional[float] = None
|
142
|
+
output: str = ""
|
143
|
+
error: str = ""
|
144
|
+
total_tokens: int = 0
|
145
|
+
input_tokens: int = 0
|
146
|
+
output_tokens: int = 0
|
147
|
+
cached_tokens: int = 0 # Backward compatibility - sum of cache_read + cache_creation
|
148
|
+
cache_read_tokens: int = 0 # NEW: Separate cache read tracking
|
149
|
+
cache_creation_tokens: int = 0 # NEW: Separate cache creation tracking
|
150
|
+
tool_calls: int = 0
|
151
|
+
_last_known_total_tokens: int = 0 # For delta tracking in budget management
|
152
|
+
|
153
|
+
# NEW: Message ID deduplication tracking
|
154
|
+
processed_message_ids: set = None # Will be initialized as empty set
|
155
|
+
|
156
|
+
# NEW: Authoritative cost from SDK when available
|
157
|
+
total_cost_usd: Optional[float] = None
|
158
|
+
|
159
|
+
# NEW: Model tracking for transparency
|
160
|
+
model_used: str = "claude-sonnet-4" # Default model
|
161
|
+
|
162
|
+
# NEW: Tool usage details
|
163
|
+
tool_details: Dict[str, int] = None # Tool name -> usage count
|
164
|
+
tool_tokens: Dict[str, int] = None # Tool name -> token usage
|
165
|
+
tool_id_mapping: Dict[str, str] = field(default_factory=dict) # tool_use_id -> tool name mapping
|
166
|
+
|
167
|
+
def __post_init__(self):
|
168
|
+
"""Initialize fields that need special handling"""
|
169
|
+
if self.processed_message_ids is None:
|
170
|
+
self.processed_message_ids = set()
|
171
|
+
if self.tool_details is None:
|
172
|
+
self.tool_details = {}
|
173
|
+
if self.tool_tokens is None:
|
174
|
+
self.tool_tokens = {}
|
175
|
+
if self.tool_id_mapping is None:
|
176
|
+
self.tool_id_mapping = {}
|
177
|
+
|
178
|
+
class ClaudeInstanceOrchestrator:
|
179
|
+
"""Orchestrator for managing multiple Claude Code instances"""
|
180
|
+
|
181
|
+
def __init__(self, workspace_dir: Path, max_console_lines: int = 5, startup_delay: float = 1.0,
|
182
|
+
max_line_length: int = 500, status_report_interval: int = 30,
|
183
|
+
use_cloud_sql: bool = False, quiet: bool = False,
|
184
|
+
overall_token_budget: Optional[int] = None,
|
185
|
+
overall_cost_budget: Optional[float] = None,
|
186
|
+
budget_type: str = "tokens",
|
187
|
+
budget_enforcement_mode: str = "warn",
|
188
|
+
enable_budget_visuals: bool = True,
|
189
|
+
has_command_budgets: bool = False,
|
190
|
+
log_level: LogLevel = LogLevel.CONCISE):
|
191
|
+
self.workspace_dir = workspace_dir
|
192
|
+
self.instances: Dict[str, InstanceConfig] = {}
|
193
|
+
self.statuses: Dict[str, InstanceStatus] = {}
|
194
|
+
self.processes: Dict[str, subprocess.Popen] = {}
|
195
|
+
self.start_datetime = datetime.now()
|
196
|
+
self.max_console_lines = max_console_lines # Max lines to show per instance
|
197
|
+
self.startup_delay = startup_delay # Delay between instance launches in seconds
|
198
|
+
self.max_line_length = max_line_length # Max characters per line in console output
|
199
|
+
self.status_report_interval = status_report_interval # Seconds between status reports
|
200
|
+
self.last_status_report = time.time()
|
201
|
+
self.status_report_task = None # For the rolling status report task
|
202
|
+
self.use_cloud_sql = use_cloud_sql
|
203
|
+
self.quiet = quiet
|
204
|
+
self.log_level = log_level
|
205
|
+
self.batch_id = str(uuid4()) # Generate batch ID for this orchestration run
|
206
|
+
self.optimizer = None
|
207
|
+
|
208
|
+
# Initialize budget manager if any budget settings are provided
|
209
|
+
needs_budget_manager = (overall_token_budget is not None) or (overall_cost_budget is not None) or has_command_budgets
|
210
|
+
if TokenBudgetManager and needs_budget_manager:
|
211
|
+
# Cost budget takes precedence over token budget
|
212
|
+
if overall_cost_budget is not None:
|
213
|
+
self.budget_manager = TokenBudgetManager(
|
214
|
+
overall_cost_budget=overall_cost_budget,
|
215
|
+
enforcement_mode=budget_enforcement_mode,
|
216
|
+
budget_type=budget_type
|
217
|
+
)
|
218
|
+
logger.info(f"šÆ COST BUDGET MANAGER initialized with ${overall_cost_budget} overall budget")
|
219
|
+
else:
|
220
|
+
self.budget_manager = TokenBudgetManager(
|
221
|
+
overall_budget=overall_token_budget, # Can be None
|
222
|
+
enforcement_mode=budget_enforcement_mode,
|
223
|
+
budget_type=budget_type
|
224
|
+
)
|
225
|
+
if overall_token_budget:
|
226
|
+
logger.info(f"šÆ TOKEN BUDGET MANAGER initialized with {overall_token_budget} token overall budget")
|
227
|
+
else:
|
228
|
+
self.budget_manager = None
|
229
|
+
self.enable_budget_visuals = enable_budget_visuals
|
230
|
+
|
231
|
+
# Initialize token transparency pricing engine
|
232
|
+
if ClaudePricingEngine:
|
233
|
+
self.pricing_engine = ClaudePricingEngine()
|
234
|
+
logger.info("šÆ Token transparency pricing engine enabled - Claude pricing compliance active")
|
235
|
+
else:
|
236
|
+
self.pricing_engine = None
|
237
|
+
logger.debug("Token transparency pricing engine disabled (module not available)")
|
238
|
+
|
239
|
+
# Log budget configuration status
|
240
|
+
if self.budget_manager:
|
241
|
+
budget_msg = f"Overall: {overall_token_budget:,} tokens" if overall_token_budget else "No overall limit"
|
242
|
+
logger.info(f"šÆ Token budget tracking enabled - {budget_msg} | Mode: {budget_enforcement_mode.upper()}")
|
243
|
+
else:
|
244
|
+
logger.debug("Token budget tracking disabled (no budget specified)")
|
245
|
+
|
246
|
+
# Configure CloudSQL if requested
|
247
|
+
# CloudSQL functionality available with Netra Apex
|
248
|
+
if use_cloud_sql:
|
249
|
+
logger.warning("CloudSQL functionality has been disabled. Token metrics will be displayed locally only.")
|
250
|
+
logger.info("For data persistence, consider upgrading to Netra Apex.")
|
251
|
+
|
252
|
+
def log_at_level(self, level: LogLevel, message: str, log_func=None):
|
253
|
+
"""Log message only if current log level permits."""
|
254
|
+
if log_func is None:
|
255
|
+
log_func = logger.info
|
256
|
+
|
257
|
+
if self.log_level == LogLevel.SILENT and log_func != logger.error:
|
258
|
+
return
|
259
|
+
elif self.log_level == LogLevel.CONCISE and level == LogLevel.DETAILED:
|
260
|
+
return
|
261
|
+
|
262
|
+
log_func(message)
|
263
|
+
|
264
|
+
def add_instance(self, config: InstanceConfig):
|
265
|
+
"""Add a new instance configuration"""
|
266
|
+
# Validate slash command exists
|
267
|
+
if not self.validate_command(config.command):
|
268
|
+
logger.warning(f"Command '{config.command}' not found in available commands")
|
269
|
+
logger.info(f"Available commands: {', '.join(self.discover_available_commands())}")
|
270
|
+
|
271
|
+
self.instances[config.name] = config
|
272
|
+
self.statuses[config.name] = InstanceStatus(name=config.name)
|
273
|
+
logger.info(f"Added instance: {config.name} - {config.description}")
|
274
|
+
|
275
|
+
def build_claude_command(self, config: InstanceConfig) -> List[str]:
|
276
|
+
"""Build the Claude Code command for an instance"""
|
277
|
+
# Build the full command including pre-commands and session management
|
278
|
+
full_command = []
|
279
|
+
|
280
|
+
# Add session management commands first
|
281
|
+
if config.clear_history:
|
282
|
+
full_command.append("/clear")
|
283
|
+
|
284
|
+
if config.compact_history:
|
285
|
+
full_command.append("/compact")
|
286
|
+
|
287
|
+
# Add any pre-commands
|
288
|
+
if config.pre_commands:
|
289
|
+
full_command.extend(config.pre_commands)
|
290
|
+
|
291
|
+
# Add the main command
|
292
|
+
full_command.append(config.command)
|
293
|
+
|
294
|
+
# Join commands with semicolon for sequential execution
|
295
|
+
command_string = "; ".join(full_command)
|
296
|
+
|
297
|
+
# Find the claude executable with Mac-specific paths
|
298
|
+
claude_cmd = shutil.which("claude")
|
299
|
+
if not claude_cmd:
|
300
|
+
# Try common paths on different platforms
|
301
|
+
possible_paths = [
|
302
|
+
"claude.cmd", # Windows
|
303
|
+
"claude.exe", # Windows
|
304
|
+
"/opt/homebrew/bin/claude", # Mac Homebrew ARM
|
305
|
+
"/usr/local/bin/claude", # Mac Homebrew Intel
|
306
|
+
"~/.local/bin/claude", # User local install
|
307
|
+
"/usr/bin/claude", # System install
|
308
|
+
"claude" # Final fallback
|
309
|
+
]
|
310
|
+
|
311
|
+
for path in possible_paths:
|
312
|
+
# Expand user path if needed
|
313
|
+
expanded_path = Path(path).expanduser()
|
314
|
+
if expanded_path.exists():
|
315
|
+
claude_cmd = str(expanded_path)
|
316
|
+
logger.info(f"Found Claude executable at: {claude_cmd}")
|
317
|
+
break
|
318
|
+
elif shutil.which(path):
|
319
|
+
claude_cmd = path
|
320
|
+
logger.info(f"Found Claude executable via which: {claude_cmd}")
|
321
|
+
break
|
322
|
+
|
323
|
+
if not claude_cmd or claude_cmd == "claude":
|
324
|
+
logger.warning("Claude command not found in PATH or common locations")
|
325
|
+
logger.warning("Please ensure Claude Code is installed and in your PATH")
|
326
|
+
logger.warning("Install with: npm install -g @anthropic/claude-code")
|
327
|
+
claude_cmd = "claude" # Fallback
|
328
|
+
|
329
|
+
# New approach: slash commands can be included directly in prompt
|
330
|
+
cmd = [
|
331
|
+
claude_cmd,
|
332
|
+
"-p", # headless mode
|
333
|
+
command_string, # Full command sequence
|
334
|
+
f"--output-format={config.output_format}",
|
335
|
+
f"--permission-mode={config.permission_mode}"
|
336
|
+
]
|
337
|
+
|
338
|
+
# Add --verbose if using stream-json (required by Claude Code)
|
339
|
+
if config.output_format == "stream-json":
|
340
|
+
cmd.append("--verbose")
|
341
|
+
|
342
|
+
if config.allowed_tools:
|
343
|
+
cmd.append(f"--allowedTools={','.join(config.allowed_tools)}")
|
344
|
+
|
345
|
+
if config.session_id:
|
346
|
+
cmd.extend(["--session-id", config.session_id])
|
347
|
+
|
348
|
+
return cmd
|
349
|
+
|
350
|
+
def discover_available_commands(self) -> List[str]:
|
351
|
+
"""Discover available slash commands from .claude/commands/"""
|
352
|
+
commands = []
|
353
|
+
commands_dir = self.workspace_dir / ".claude" / "commands"
|
354
|
+
|
355
|
+
if commands_dir.exists():
|
356
|
+
for cmd_file in commands_dir.glob("*.md"):
|
357
|
+
# Command name is filename without .md extension
|
358
|
+
cmd_name = f"/{cmd_file.stem}"
|
359
|
+
commands.append(cmd_name)
|
360
|
+
logger.debug(f"Found command: {cmd_name}")
|
361
|
+
|
362
|
+
# Add built-in commands
|
363
|
+
builtin_commands = ["/compact", "/clear", "/help"]
|
364
|
+
commands.extend(builtin_commands)
|
365
|
+
|
366
|
+
return sorted(commands)
|
367
|
+
|
368
|
+
def validate_command(self, command: str) -> bool:
|
369
|
+
"""Validate that a slash command exists"""
|
370
|
+
available_commands = self.discover_available_commands()
|
371
|
+
|
372
|
+
# Extract base command (remove arguments)
|
373
|
+
base_command = command.split()[0] if command.split() else command
|
374
|
+
|
375
|
+
return base_command in available_commands
|
376
|
+
|
377
|
+
def inspect_command(self, command_name: str) -> Dict[str, Any]:
|
378
|
+
"""Inspect a slash command file for YAML frontmatter and configuration"""
|
379
|
+
# Remove leading slash and any arguments
|
380
|
+
base_name = command_name.lstrip('/').split()[0]
|
381
|
+
command_file = self.workspace_dir / ".claude" / "commands" / f"{base_name}.md"
|
382
|
+
|
383
|
+
if not command_file.exists():
|
384
|
+
return {"exists": False}
|
385
|
+
|
386
|
+
try:
|
387
|
+
content = command_file.read_text(encoding='utf-8')
|
388
|
+
|
389
|
+
# Check for YAML frontmatter
|
390
|
+
if content.startswith('---\n'):
|
391
|
+
parts = content.split('---\n', 2)
|
392
|
+
if len(parts) >= 3:
|
393
|
+
frontmatter_text = parts[1]
|
394
|
+
try:
|
395
|
+
frontmatter = yaml.safe_load(frontmatter_text)
|
396
|
+
return {
|
397
|
+
"exists": True,
|
398
|
+
"file_path": str(command_file),
|
399
|
+
"frontmatter": frontmatter,
|
400
|
+
"content_preview": parts[2][:200] + "..." if len(parts[2]) > 200 else parts[2]
|
401
|
+
}
|
402
|
+
except yaml.YAMLError as e:
|
403
|
+
logger.warning(f"Invalid YAML frontmatter in {command_file}: {e}")
|
404
|
+
|
405
|
+
return {
|
406
|
+
"exists": True,
|
407
|
+
"file_path": str(command_file),
|
408
|
+
"frontmatter": {},
|
409
|
+
"content_preview": content[:200] + "..." if len(content) > 200 else content
|
410
|
+
}
|
411
|
+
|
412
|
+
except Exception as e:
|
413
|
+
logger.error(f"Error reading command file {command_file}: {e}")
|
414
|
+
return {"exists": False, "error": str(e)}
|
415
|
+
|
416
|
+
async def run_instance(self, name: str) -> bool:
|
417
|
+
"""Run a single Claude Code instance asynchronously"""
|
418
|
+
if name not in self.instances:
|
419
|
+
logger.error(f"Instance {name} not found")
|
420
|
+
return False
|
421
|
+
|
422
|
+
config = self.instances[name]
|
423
|
+
status = self.statuses[name]
|
424
|
+
|
425
|
+
# --- PRE-EXECUTION BUDGET CHECK ---
|
426
|
+
if self.budget_manager:
|
427
|
+
# V1: Use a simple placeholder or the configured max. Future versions can predict.
|
428
|
+
estimated_tokens = config.max_tokens_per_command or 1000 # Default estimate
|
429
|
+
# ISSUE #1348 FIX: Use consistent command matching logic for budget checking
|
430
|
+
# Match the same logic used in _update_budget_tracking for consistency
|
431
|
+
if config.command and config.command.strip().startswith('/'):
|
432
|
+
# For slash commands, check if budget exists for base command vs full command
|
433
|
+
base_command_part = config.command.split()[0] if config.command else config.command
|
434
|
+
# Check if budget exists for base command, otherwise use full command
|
435
|
+
if base_command_part in self.budget_manager.command_budgets:
|
436
|
+
budget_check_key = base_command_part
|
437
|
+
else:
|
438
|
+
budget_check_key = config.command
|
439
|
+
else:
|
440
|
+
# For non-slash commands/prompts, always use the full command text as budget key
|
441
|
+
budget_check_key = config.command if config.command else config.command
|
442
|
+
|
443
|
+
base_command = budget_check_key
|
444
|
+
|
445
|
+
logger.info(f"šÆ Budget check for {name}: command={base_command}, estimated={estimated_tokens} tokens")
|
446
|
+
|
447
|
+
can_run, reason = self.budget_manager.check_budget(base_command, estimated_tokens)
|
448
|
+
if not can_run:
|
449
|
+
message = f"Budget exceeded for instance {name}: {reason}. Skipping."
|
450
|
+
if self.budget_manager.enforcement_mode == "block":
|
451
|
+
logger.error(f"š« BLOCK MODE: {message}")
|
452
|
+
status.status = "failed"
|
453
|
+
status.error = f"Blocked by budget limit - {reason}"
|
454
|
+
return False
|
455
|
+
else: # warn mode
|
456
|
+
logger.warning(f"ā ļø WARN MODE: {message}")
|
457
|
+
else:
|
458
|
+
logger.info(f"ā
Budget check passed for {name}: {reason}")
|
459
|
+
|
460
|
+
try:
|
461
|
+
logger.info(f"Starting instance: {name}")
|
462
|
+
status.status = "running"
|
463
|
+
status.start_time = time.time()
|
464
|
+
|
465
|
+
cmd = self.build_claude_command(config)
|
466
|
+
logger.info(f"Command: {' '.join(cmd)}")
|
467
|
+
logger.info(f"Permission mode: {config.permission_mode} (Platform: {platform.system()})")
|
468
|
+
|
469
|
+
# Create the async process with Mac-friendly environment
|
470
|
+
env = os.environ.copy()
|
471
|
+
|
472
|
+
# Add common Mac paths to PATH if not present
|
473
|
+
if platform.system() == "Darwin": # macOS
|
474
|
+
mac_paths = [
|
475
|
+
"/opt/homebrew/bin", # Homebrew ARM
|
476
|
+
"/usr/local/bin", # Homebrew Intel
|
477
|
+
"/usr/bin", # System binaries
|
478
|
+
str(Path.home() / ".local" / "bin"), # User local
|
479
|
+
]
|
480
|
+
current_path = env.get("PATH", "")
|
481
|
+
for mac_path in mac_paths:
|
482
|
+
if mac_path not in current_path:
|
483
|
+
env["PATH"] = f"{mac_path}:{current_path}"
|
484
|
+
current_path = env["PATH"]
|
485
|
+
|
486
|
+
# Create the async process
|
487
|
+
process = await asyncio.create_subprocess_exec(
|
488
|
+
*cmd,
|
489
|
+
stdout=asyncio.subprocess.PIPE,
|
490
|
+
stderr=asyncio.subprocess.PIPE,
|
491
|
+
cwd=self.workspace_dir,
|
492
|
+
env=env
|
493
|
+
)
|
494
|
+
|
495
|
+
status.pid = process.pid
|
496
|
+
logger.info(f"Instance {name} started with PID {process.pid}")
|
497
|
+
|
498
|
+
# For stream-json format, stream output in parallel with process execution
|
499
|
+
if config.output_format == "stream-json":
|
500
|
+
# Create streaming task but don't await it yet
|
501
|
+
stream_task = asyncio.create_task(self._stream_output_parallel(name, process))
|
502
|
+
|
503
|
+
# Wait for process to complete
|
504
|
+
returncode = await process.wait()
|
505
|
+
|
506
|
+
# Now wait for streaming to complete
|
507
|
+
await stream_task
|
508
|
+
else:
|
509
|
+
# For non-streaming formats, use traditional communicate
|
510
|
+
stdout, stderr = await process.communicate()
|
511
|
+
returncode = process.returncode
|
512
|
+
|
513
|
+
if stdout:
|
514
|
+
stdout_str = stdout.decode() if isinstance(stdout, bytes) else stdout
|
515
|
+
status.output += stdout_str
|
516
|
+
# Parse token usage from final output
|
517
|
+
self._parse_final_output_token_usage(stdout_str, status, config.output_format, name)
|
518
|
+
if stderr:
|
519
|
+
status.error += stderr.decode() if isinstance(stderr, bytes) else stderr
|
520
|
+
|
521
|
+
status.end_time = time.time()
|
522
|
+
|
523
|
+
# Save metrics to database if CloudSQL is enabled
|
524
|
+
# Database persistence disabled - metrics preserved in local display only
|
525
|
+
if False: # CloudSQL functionality removed
|
526
|
+
await self._save_metrics_to_database(name, config, status)
|
527
|
+
|
528
|
+
if returncode == 0:
|
529
|
+
status.status = "completed"
|
530
|
+
logger.info(f"Instance {name} completed successfully")
|
531
|
+
return True
|
532
|
+
else:
|
533
|
+
status.status = "failed"
|
534
|
+
logger.error(f"Instance {name} failed with return code {returncode}")
|
535
|
+
if status.error:
|
536
|
+
logger.error(f"Error output: {status.error}")
|
537
|
+
return False
|
538
|
+
|
539
|
+
except Exception as e:
|
540
|
+
status.status = "failed"
|
541
|
+
status.error = str(e)
|
542
|
+
logger.error(f"Exception running instance {name}: {e}")
|
543
|
+
return False
|
544
|
+
|
545
|
+
async def _save_metrics_to_database(self, name: str, config: InstanceConfig, status: InstanceStatus):
|
546
|
+
"""Database persistence has been removed for security. Metrics are displayed locally only."""
|
547
|
+
# CloudSQL functionality removed for security and simplicity
|
548
|
+
# Token metrics are preserved in the local display
|
549
|
+
logger.debug(f"Metrics for {name} available in local display only (database persistence disabled)")
|
550
|
+
|
551
|
+
def _calculate_cost(self, status: InstanceStatus) -> float:
|
552
|
+
"""Calculate cost with Claude pricing compliance engine and proper cache handling"""
|
553
|
+
|
554
|
+
# Use authoritative cost if available (preferred)
|
555
|
+
if status.total_cost_usd is not None:
|
556
|
+
return status.total_cost_usd
|
557
|
+
|
558
|
+
# Use pricing engine if available
|
559
|
+
if self.pricing_engine and TokenUsageData:
|
560
|
+
# Create usage data from status
|
561
|
+
usage_data = TokenUsageData(
|
562
|
+
input_tokens=status.input_tokens,
|
563
|
+
output_tokens=status.output_tokens,
|
564
|
+
cache_read_tokens=status.cache_read_tokens,
|
565
|
+
cache_creation_tokens=status.cache_creation_tokens,
|
566
|
+
total_tokens=status.total_tokens,
|
567
|
+
cache_type="5min", # Default to 5min cache
|
568
|
+
model=status.model_used # Use detected model
|
569
|
+
)
|
570
|
+
|
571
|
+
cost_breakdown = self.pricing_engine.calculate_cost(
|
572
|
+
usage_data,
|
573
|
+
status.total_cost_usd,
|
574
|
+
status.tool_tokens # Include tool token costs
|
575
|
+
)
|
576
|
+
return cost_breakdown.total_cost
|
577
|
+
|
578
|
+
# Fallback calculation with current pricing (legacy support)
|
579
|
+
# Claude 3.5 Sonnet current rates (as of 2024-2025)
|
580
|
+
input_cost = (status.input_tokens / 1_000_000) * 3.00 # $3 per M input tokens
|
581
|
+
output_cost = (status.output_tokens / 1_000_000) * 15.00 # $15 per M output tokens
|
582
|
+
|
583
|
+
# Cache costs with CORRECTED pricing based on Claude documentation
|
584
|
+
cache_read_cost = (status.cache_read_tokens / 1_000_000) * (3.00 * 0.1) # 10% of input rate
|
585
|
+
cache_creation_cost = (status.cache_creation_tokens / 1_000_000) * (3.00 * 1.25) # 5min cache: 25% premium
|
586
|
+
|
587
|
+
# Tool costs (fallback calculation)
|
588
|
+
tool_cost = 0.0
|
589
|
+
if status.tool_tokens:
|
590
|
+
for tool_name, tokens in status.tool_tokens.items():
|
591
|
+
tool_cost += (tokens / 1_000_000) * 3.00 # Tool tokens at input rate
|
592
|
+
|
593
|
+
return input_cost + output_cost + cache_read_cost + cache_creation_cost + tool_cost
|
594
|
+
|
595
|
+
async def _stream_output(self, name: str, process):
|
596
|
+
"""Stream output in real-time for stream-json format (DEPRECATED - use _stream_output_parallel)"""
|
597
|
+
status = self.statuses[name]
|
598
|
+
|
599
|
+
async def read_stream(stream, prefix):
|
600
|
+
while True:
|
601
|
+
line = await stream.readline()
|
602
|
+
if not line:
|
603
|
+
break
|
604
|
+
line_str = line.decode() if isinstance(line, bytes) else line
|
605
|
+
print(f"[{name}] {prefix}: {line_str.strip()}")
|
606
|
+
|
607
|
+
# Accumulate output
|
608
|
+
if prefix == "STDOUT":
|
609
|
+
status.output += line_str
|
610
|
+
else:
|
611
|
+
status.error += line_str
|
612
|
+
|
613
|
+
# Run both stdout and stderr reading concurrently
|
614
|
+
await asyncio.gather(
|
615
|
+
read_stream(process.stdout, "STDOUT"),
|
616
|
+
read_stream(process.stderr, "STDERR"),
|
617
|
+
return_exceptions=True
|
618
|
+
)
|
619
|
+
|
620
|
+
async def _stream_output_parallel(self, name: str, process):
|
621
|
+
"""Stream output in real-time for stream-json format with proper parallel execution"""
|
622
|
+
status = self.statuses[name]
|
623
|
+
# Rolling buffer to show only recent lines (prevent console overflow)
|
624
|
+
recent_lines_buffer = []
|
625
|
+
line_count = 0
|
626
|
+
|
627
|
+
def format_instance_line(content: str, prefix: str = "") -> str:
|
628
|
+
"""Format a line with clear instance separation and truncation"""
|
629
|
+
# Truncate content to max_line_length
|
630
|
+
if len(content) > self.max_line_length:
|
631
|
+
content = content[:self.max_line_length-3] + "..."
|
632
|
+
|
633
|
+
# Create clear visual separation
|
634
|
+
instance_header = f"+=[{name}]" + "=" * (20 - len(name) - 4) if len(name) < 16 else f"+=[{name}]="
|
635
|
+
if prefix:
|
636
|
+
instance_header += f" {prefix} "
|
637
|
+
|
638
|
+
return f"{instance_header}\n| {content}\n+" + "=" * (len(instance_header) - 1)
|
639
|
+
|
640
|
+
async def read_stream(stream, prefix):
|
641
|
+
nonlocal line_count
|
642
|
+
try:
|
643
|
+
while True:
|
644
|
+
line = await stream.readline()
|
645
|
+
if not line:
|
646
|
+
break
|
647
|
+
line_str = line.decode() if isinstance(line, bytes) else line
|
648
|
+
line_count += 1
|
649
|
+
|
650
|
+
# Clean the line
|
651
|
+
clean_line = line_str.strip()
|
652
|
+
|
653
|
+
# Add to rolling buffer with formatted display
|
654
|
+
display_line = format_instance_line(clean_line, prefix)
|
655
|
+
recent_lines_buffer.append(display_line)
|
656
|
+
|
657
|
+
# Keep only the most recent lines
|
658
|
+
if len(recent_lines_buffer) > self.max_console_lines:
|
659
|
+
recent_lines_buffer.pop(0)
|
660
|
+
|
661
|
+
# Only show periodic updates to prevent spam
|
662
|
+
# Show every 10th line, or important lines (errors, completions)
|
663
|
+
# Respect quiet mode
|
664
|
+
if self.max_console_lines > 0:
|
665
|
+
should_display = (
|
666
|
+
line_count % 10 == 0 or # Every 10th line
|
667
|
+
prefix == "STDERR" or # All error lines
|
668
|
+
"completed" in clean_line.lower() or
|
669
|
+
"error" in clean_line.lower() or
|
670
|
+
"failed" in clean_line.lower() or
|
671
|
+
"success" in clean_line.lower()
|
672
|
+
)
|
673
|
+
|
674
|
+
if should_display:
|
675
|
+
print(f"\n{display_line}\n", flush=True)
|
676
|
+
elif prefix == "STDERR":
|
677
|
+
# In quiet mode, still show errors
|
678
|
+
error_display = format_instance_line(clean_line, "ERROR")
|
679
|
+
print(f"\n{error_display}\n", flush=True)
|
680
|
+
|
681
|
+
# Accumulate output in status (keep full output for saving)
|
682
|
+
if prefix == "STDOUT":
|
683
|
+
status.output += line_str
|
684
|
+
# Parse token usage from Claude output if present
|
685
|
+
self._parse_token_usage(clean_line, status, name)
|
686
|
+
else:
|
687
|
+
status.error += line_str
|
688
|
+
except Exception as e:
|
689
|
+
logger.error(f"Error reading {prefix} for instance {name}: {e}")
|
690
|
+
|
691
|
+
# Create tasks for reading both streams concurrently
|
692
|
+
stdout_task = asyncio.create_task(read_stream(process.stdout, "STDOUT"))
|
693
|
+
stderr_task = asyncio.create_task(read_stream(process.stderr, "STDERR"))
|
694
|
+
|
695
|
+
# Wait for both streams to be consumed
|
696
|
+
try:
|
697
|
+
await asyncio.gather(stdout_task, stderr_task, return_exceptions=True)
|
698
|
+
except Exception as e:
|
699
|
+
logger.error(f"Error in stream reading for instance {name}: {e}")
|
700
|
+
finally:
|
701
|
+
# Show final summary of recent lines for this instance
|
702
|
+
if recent_lines_buffer and self.max_console_lines > 0:
|
703
|
+
final_header = f"+=== FINAL OUTPUT [{name}] ===+"
|
704
|
+
print(f"\n{final_header}")
|
705
|
+
print(f"| Last {len(recent_lines_buffer)} lines of {line_count} total")
|
706
|
+
print(f"| Status: {status.status}")
|
707
|
+
if status.start_time:
|
708
|
+
duration = time.time() - status.start_time
|
709
|
+
print(f"| Duration: {duration:.1f}s")
|
710
|
+
print("+" + "=" * (len(final_header) - 2) + "+\n")
|
711
|
+
|
712
|
+
# Always show completion message with clear formatting
|
713
|
+
completion_msg = f"š [{name}] COMPLETED - {line_count} lines processed, output saved"
|
714
|
+
print(f"\n{'='*60}")
|
715
|
+
print(f"{completion_msg}")
|
716
|
+
print(f"{'='*60}\n")
|
717
|
+
|
718
|
+
# Note: StreamReader objects in asyncio don't have .close() method
|
719
|
+
# They are automatically closed when the process terminates
|
720
|
+
|
721
|
+
async def run_all_instances(self, timeout: int = 300) -> Dict[str, bool]:
|
722
|
+
"""Run all instances with configurable soft startup delay between launches"""
|
723
|
+
instance_names = list(self.instances.keys())
|
724
|
+
logger.info(f"Starting {len(instance_names)} instances with {self.startup_delay}s delay between launches (timeout: {timeout}s each)")
|
725
|
+
|
726
|
+
# Create tasks with staggered startup
|
727
|
+
tasks = []
|
728
|
+
for i, name in enumerate(instance_names):
|
729
|
+
# Calculate delay for this instance (i * startup_delay seconds)
|
730
|
+
delay = i * self.startup_delay
|
731
|
+
if delay > 0:
|
732
|
+
logger.info(f"Instance '{name}' will start in {delay}s")
|
733
|
+
|
734
|
+
# Create a task that waits for its turn, then starts the instance
|
735
|
+
task = asyncio.create_task(self._run_instance_with_delay(name, delay, timeout))
|
736
|
+
tasks.append(task)
|
737
|
+
|
738
|
+
# Start the rolling status report task if we have instances to monitor
|
739
|
+
# NOTE: Do NOT add status reporter to main tasks list - it runs indefinitely
|
740
|
+
if len(tasks) > 0 and not self.max_console_lines == 0: # Don't show status in quiet mode
|
741
|
+
self.status_report_task = asyncio.create_task(self._rolling_status_reporter())
|
742
|
+
|
743
|
+
# Wait for all instance tasks to complete (not the status reporter)
|
744
|
+
logger.debug(f"ā³ Waiting for {len(tasks)} instance tasks to complete...")
|
745
|
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
746
|
+
logger.debug("ā
All instance tasks completed")
|
747
|
+
|
748
|
+
# Stop the status reporter - CRITICAL: This prevents hanging
|
749
|
+
if hasattr(self, 'status_report_task') and self.status_report_task and not self.status_report_task.done():
|
750
|
+
logger.debug("š Cancelling status reporter task...")
|
751
|
+
self.status_report_task.cancel()
|
752
|
+
try:
|
753
|
+
await self.status_report_task
|
754
|
+
logger.debug("ā
Status reporter task cancelled successfully")
|
755
|
+
except asyncio.CancelledError:
|
756
|
+
logger.debug("ā
Status reporter task cancellation confirmed")
|
757
|
+
pass
|
758
|
+
except Exception as e:
|
759
|
+
logger.warning(f"ā ļø Error cancelling status reporter: {e}")
|
760
|
+
else:
|
761
|
+
logger.debug("ā¹ļø No status reporter task to cancel")
|
762
|
+
|
763
|
+
# Ensure all processes are cleaned up
|
764
|
+
await self._cleanup_all_processes()
|
765
|
+
|
766
|
+
final_results = {}
|
767
|
+
for name, result in zip(self.instances.keys(), results):
|
768
|
+
if isinstance(result, asyncio.TimeoutError):
|
769
|
+
logger.error(f"Instance {name} timed out after {timeout}s")
|
770
|
+
self.statuses[name].status = "failed"
|
771
|
+
self.statuses[name].error = f"Timeout after {timeout}s"
|
772
|
+
final_results[name] = False
|
773
|
+
elif isinstance(result, Exception):
|
774
|
+
logger.error(f"Instance {name} failed with exception: {result}")
|
775
|
+
self.statuses[name].status = "failed"
|
776
|
+
self.statuses[name].error = str(result)
|
777
|
+
final_results[name] = False
|
778
|
+
else:
|
779
|
+
final_results[name] = result
|
780
|
+
|
781
|
+
return final_results
|
782
|
+
|
783
|
+
async def _cleanup_all_processes(self):
|
784
|
+
"""Ensure all processes are properly cleaned up to prevent hanging"""
|
785
|
+
logger.debug("š§¹ Cleaning up all processes...")
|
786
|
+
|
787
|
+
for name, status in self.statuses.items():
|
788
|
+
if status.pid and status.status == "running":
|
789
|
+
try:
|
790
|
+
import signal
|
791
|
+
import os
|
792
|
+
logger.debug(f"š Cleaning up hanging process for {name} (PID: {status.pid})")
|
793
|
+
os.kill(status.pid, signal.SIGTERM)
|
794
|
+
except (OSError, ProcessLookupError):
|
795
|
+
# Process already terminated
|
796
|
+
pass
|
797
|
+
except Exception as e:
|
798
|
+
logger.warning(f"ā ļø Error cleaning up process {status.pid}: {e}")
|
799
|
+
|
800
|
+
# Clear the processes dict
|
801
|
+
if hasattr(self, 'processes'):
|
802
|
+
self.processes.clear()
|
803
|
+
|
804
|
+
logger.debug("ā
Process cleanup completed")
|
805
|
+
|
806
|
+
async def _run_instance_with_delay(self, name: str, delay: float, timeout: int) -> bool:
|
807
|
+
"""Run an instance after a specified delay"""
|
808
|
+
if delay > 0:
|
809
|
+
logger.info(f"Waiting {delay}s before starting instance '{name}'")
|
810
|
+
await asyncio.sleep(delay)
|
811
|
+
|
812
|
+
logger.info(f"Now starting instance '{name}' (after {delay}s delay)")
|
813
|
+
return await asyncio.wait_for(self.run_instance(name), timeout=timeout)
|
814
|
+
|
815
|
+
async def _rolling_status_reporter(self):
|
816
|
+
"""Provide periodic status updates for all running instances"""
|
817
|
+
try:
|
818
|
+
while True:
|
819
|
+
await asyncio.sleep(self.status_report_interval)
|
820
|
+
await self._print_status_report()
|
821
|
+
except asyncio.CancelledError:
|
822
|
+
# Final status report when cancelled
|
823
|
+
await self._print_status_report(final=True)
|
824
|
+
raise
|
825
|
+
except Exception as e:
|
826
|
+
logger.error(f"Error in status reporter: {e}")
|
827
|
+
|
828
|
+
def _format_duration(self, seconds: float) -> str:
|
829
|
+
"""Format duration in seconds to a readable format"""
|
830
|
+
if seconds < 60:
|
831
|
+
return f"{seconds:.1f}s"
|
832
|
+
elif seconds < 3600:
|
833
|
+
minutes = int(seconds // 60)
|
834
|
+
remaining_seconds = seconds % 60
|
835
|
+
return f"{minutes}m{remaining_seconds:.0f}s"
|
836
|
+
else:
|
837
|
+
hours = int(seconds // 3600)
|
838
|
+
remaining_minutes = int((seconds % 3600) // 60)
|
839
|
+
return f"{hours}h{remaining_minutes}m"
|
840
|
+
|
841
|
+
def _format_tokens(self, tokens: int) -> str:
|
842
|
+
"""Format token count with thousands separator"""
|
843
|
+
if tokens >= 1000000:
|
844
|
+
return f"{tokens/1000000:.1f}M"
|
845
|
+
elif tokens >= 1000:
|
846
|
+
return f"{tokens/1000:.1f}K"
|
847
|
+
else:
|
848
|
+
return str(tokens)
|
849
|
+
|
850
|
+
def _get_budget_display(self, instance_name: str) -> str:
|
851
|
+
"""Get budget display string for an instance (e.g., '1.2K/5K' or '-' if no budget)"""
|
852
|
+
if not self.budget_manager:
|
853
|
+
return "-"
|
854
|
+
|
855
|
+
# Get the command for this instance
|
856
|
+
if instance_name not in self.instances:
|
857
|
+
return "-"
|
858
|
+
|
859
|
+
command = self.instances[instance_name].command
|
860
|
+
|
861
|
+
# ISSUE #1348 FIX: Use same command matching logic as _update_budget_tracking
|
862
|
+
if command and command.strip().startswith('/'):
|
863
|
+
# For slash commands, check if budget exists for base command vs full command
|
864
|
+
base_command_part = command.rstrip(';').split()[0] if command else command
|
865
|
+
# Check if budget exists for base command, otherwise use full command
|
866
|
+
if base_command_part in self.budget_manager.command_budgets:
|
867
|
+
budget_display_key = base_command_part
|
868
|
+
else:
|
869
|
+
budget_display_key = command.rstrip(';')
|
870
|
+
else:
|
871
|
+
# For non-slash commands/prompts, always use the full command text as budget key
|
872
|
+
budget_display_key = command.rstrip(';') if command else command
|
873
|
+
|
874
|
+
base_command = budget_display_key
|
875
|
+
|
876
|
+
# Check if this command has a budget
|
877
|
+
if base_command not in self.budget_manager.command_budgets:
|
878
|
+
return "-"
|
879
|
+
|
880
|
+
budget_info = self.budget_manager.command_budgets[base_command]
|
881
|
+
used_formatted = self._format_tokens(budget_info.used)
|
882
|
+
limit_formatted = self._format_tokens(budget_info.limit)
|
883
|
+
|
884
|
+
return f"{used_formatted}/{limit_formatted}"
|
885
|
+
|
886
|
+
def _calculate_token_median(self) -> float:
|
887
|
+
"""Calculate median token usage across all instances"""
|
888
|
+
token_counts = [status.total_tokens for status in self.statuses.values() if status.total_tokens > 0]
|
889
|
+
if not token_counts:
|
890
|
+
return 0
|
891
|
+
|
892
|
+
token_counts.sort()
|
893
|
+
n = len(token_counts)
|
894
|
+
if n % 2 == 0:
|
895
|
+
return (token_counts[n//2 - 1] + token_counts[n//2]) / 2
|
896
|
+
else:
|
897
|
+
return token_counts[n//2]
|
898
|
+
|
899
|
+
def _calculate_token_percentage(self, tokens: int, median: float) -> str:
|
900
|
+
"""Calculate percentage relative to median"""
|
901
|
+
if median == 0:
|
902
|
+
return "N/A"
|
903
|
+
percentage = (tokens / median) * 100
|
904
|
+
if percentage >= 150:
|
905
|
+
return f"+{percentage-100:.0f}%"
|
906
|
+
elif percentage <= 50:
|
907
|
+
return f"-{100-percentage:.0f}%"
|
908
|
+
else:
|
909
|
+
return f"{percentage-100:+.0f}%"
|
910
|
+
|
911
|
+
async def _print_status_report(self, final: bool = False):
|
912
|
+
"""Print a formatted status report of all instances"""
|
913
|
+
if not self.statuses:
|
914
|
+
return
|
915
|
+
|
916
|
+
current_time = time.time()
|
917
|
+
report_type = "FINAL STATUS" if final else "STATUS REPORT"
|
918
|
+
|
919
|
+
# Create status summary
|
920
|
+
status_counts = {"pending": 0, "running": 0, "completed": 0, "failed": 0}
|
921
|
+
|
922
|
+
for name, status in self.statuses.items():
|
923
|
+
status_counts[status.status] += 1
|
924
|
+
|
925
|
+
# Calculate token median
|
926
|
+
token_median = self._calculate_token_median()
|
927
|
+
|
928
|
+
# Print the report header
|
929
|
+
header = f"+=== {report_type} [{datetime.now().strftime('%H:%M:%S')}] ===+"
|
930
|
+
print(f"\n{header}")
|
931
|
+
print(f"| Total: {len(self.statuses)} instances")
|
932
|
+
print(f"| Running: {status_counts['running']}, Completed: {status_counts['completed']}, Failed: {status_counts['failed']}, Pending: {status_counts['pending']}")
|
933
|
+
|
934
|
+
# Show token usage summary
|
935
|
+
total_tokens_all = sum(s.total_tokens for s in self.statuses.values())
|
936
|
+
total_cached_all = sum(s.cached_tokens for s in self.statuses.values())
|
937
|
+
total_tools_all = sum(s.tool_calls for s in self.statuses.values())
|
938
|
+
median_str = self._format_tokens(int(token_median)) if token_median > 0 else "0"
|
939
|
+
print(f"| Tokens: {self._format_tokens(total_tokens_all)} total, {self._format_tokens(total_cached_all)} cached | Median: {median_str} | Tools: {total_tools_all}")
|
940
|
+
|
941
|
+
# --- ADD COST TRANSPARENCY SECTION ---
|
942
|
+
if self.pricing_engine:
|
943
|
+
total_cost = sum(self._calculate_cost(s) for s in self.statuses.values())
|
944
|
+
avg_cost_per_instance = total_cost / len(self.statuses) if self.statuses else 0
|
945
|
+
print(f"| š° Cost: ${total_cost:.4f} total, ${avg_cost_per_instance:.4f} avg/instance | Pricing: Claude compliant")
|
946
|
+
|
947
|
+
# --- ADD BUDGET STATUS SECTION ---
|
948
|
+
if self.budget_manager and self.enable_budget_visuals and render_progress_bar:
|
949
|
+
bm = self.budget_manager
|
950
|
+
used_formatted = self._format_tokens(bm.total_usage)
|
951
|
+
|
952
|
+
print(f"|")
|
953
|
+
print(f"| TOKEN BUDGET STATUS |")
|
954
|
+
|
955
|
+
if bm.overall_budget is not None:
|
956
|
+
overall_bar = render_progress_bar(bm.total_usage, bm.overall_budget)
|
957
|
+
total_formatted = self._format_tokens(bm.overall_budget)
|
958
|
+
print(f"| Overall: {overall_bar} {used_formatted}/{total_formatted}")
|
959
|
+
else:
|
960
|
+
print(f"| Overall: [UNLIMITED] {used_formatted} used")
|
961
|
+
|
962
|
+
if bm.command_budgets:
|
963
|
+
print(f"| Command Budgets:")
|
964
|
+
for name, budget_info in bm.command_budgets.items():
|
965
|
+
bar = render_progress_bar(budget_info.used, budget_info.limit)
|
966
|
+
limit_formatted = self._format_tokens(budget_info.limit)
|
967
|
+
used_cmd_formatted = self._format_tokens(budget_info.used)
|
968
|
+
print(f"|\t\t\t {name:<20} {bar} {used_cmd_formatted}/{limit_formatted}")
|
969
|
+
else:
|
970
|
+
print(f"| Command Budgets: None configured")
|
971
|
+
|
972
|
+
print(f"| ")
|
973
|
+
print(f"| š Model shows actual Claude model used (critical for accurate cost tracking)")
|
974
|
+
print(f"| š” Tip: Model may differ from your config - Claude routes requests intelligently")
|
975
|
+
# Print column headers with separated cache metrics
|
976
|
+
print(f"| {'Status':<8} {'Name':<30} {'Model':<10} {'Duration':<10} {'Overall':<8} {'Tokens':<8} {'Cache Cr':<8} {'Cache Rd':<8} {'Tools':<6} {'Budget':<10}")
|
977
|
+
print(f"| {'-'*8} {'-'*30} {'-'*10} {'-'*10} {'-'*8} {'-'*8} {'-'*8} {'-'*8} {'-'*6} {'-'*10}")
|
978
|
+
|
979
|
+
|
980
|
+
for name, status in self.statuses.items():
|
981
|
+
# Status emoji
|
982
|
+
emoji_map = {
|
983
|
+
"pending": "ā³",
|
984
|
+
"running": "š",
|
985
|
+
"completed": "ā
",
|
986
|
+
"failed": "ā"
|
987
|
+
}
|
988
|
+
emoji = emoji_map.get(status.status, "ā")
|
989
|
+
|
990
|
+
# Calculate uptime/duration
|
991
|
+
if status.start_time:
|
992
|
+
if status.end_time:
|
993
|
+
duration = status.end_time - status.start_time
|
994
|
+
time_info = self._format_duration(duration)
|
995
|
+
else:
|
996
|
+
uptime = current_time - status.start_time
|
997
|
+
time_info = self._format_duration(uptime)
|
998
|
+
else:
|
999
|
+
time_info = "waiting"
|
1000
|
+
|
1001
|
+
# Format separated token information for user-friendly display
|
1002
|
+
# CHANGE LOG (v1.2.0): Separated cache metrics display for better cost visibility
|
1003
|
+
# - Overall: Shows complete token count (input + output + cache_read + cache_creation)
|
1004
|
+
# - Tokens: Shows only core processing tokens (input + output)
|
1005
|
+
# - Cache Cr: Shows cache creation tokens (expensive - the "golden ticket")
|
1006
|
+
# - Cache Rd: Shows cache read tokens (cheap)
|
1007
|
+
# - Formula: Overall = Tokens + Cache Cr + Cache Rd (detailed breakdown)
|
1008
|
+
|
1009
|
+
# Overall = total_tokens (which includes input + output + cache_read + cache_creation)
|
1010
|
+
overall_tokens = self._format_tokens(status.total_tokens) if status.total_tokens > 0 else "0"
|
1011
|
+
|
1012
|
+
# Tokens = input + output only (core processing tokens)
|
1013
|
+
core_tokens = status.input_tokens + status.output_tokens
|
1014
|
+
tokens_info = self._format_tokens(core_tokens) if core_tokens > 0 else "0"
|
1015
|
+
|
1016
|
+
# Cache Creation = cache_creation_tokens (expensive "golden ticket" tokens)
|
1017
|
+
cache_creation_info = self._format_tokens(status.cache_creation_tokens) if status.cache_creation_tokens > 0 else "0"
|
1018
|
+
|
1019
|
+
# Cache Read = cache_read_tokens (cheap cache hits)
|
1020
|
+
cache_read_info = self._format_tokens(status.cache_read_tokens) if status.cache_read_tokens > 0 else "0"
|
1021
|
+
|
1022
|
+
tool_info = str(status.tool_calls) if status.tool_calls > 0 else "0"
|
1023
|
+
|
1024
|
+
# Format model name for display
|
1025
|
+
model_short = status.model_used.replace('claude-', '').replace('-', '') if status.model_used else "unknown"
|
1026
|
+
|
1027
|
+
# Get budget information for this instance
|
1028
|
+
budget_info = self._get_budget_display(name)
|
1029
|
+
|
1030
|
+
# Create detailed line with separated cache metrics
|
1031
|
+
detail = f" {emoji:<8} {name:<30} {model_short:<10} {time_info:<10} {overall_tokens:<8} {tokens_info:<8} {cache_creation_info:<8} {cache_read_info:<8} {tool_info:<6} {budget_info:<10}"
|
1032
|
+
|
1033
|
+
print(f"|{detail}")
|
1034
|
+
|
1035
|
+
footer = "+" + "=" * (len(header) - 2) + "+"
|
1036
|
+
print(f"{footer}")
|
1037
|
+
|
1038
|
+
# --- ADD DETAILED TOOL USAGE TABLE WITH TOKENS AND COSTS ---
|
1039
|
+
all_tools = {}
|
1040
|
+
for status in self.statuses.values():
|
1041
|
+
for tool_name, count in status.tool_details.items():
|
1042
|
+
if tool_name not in all_tools:
|
1043
|
+
all_tools[tool_name] = {"count": 0, "tokens": 0, "instances": []}
|
1044
|
+
all_tools[tool_name]["count"] += count
|
1045
|
+
all_tools[tool_name]["tokens"] += status.tool_tokens.get(tool_name, 0)
|
1046
|
+
|
1047
|
+
# Format instance info with tokens if available
|
1048
|
+
tool_tokens = status.tool_tokens.get(tool_name, 0)
|
1049
|
+
if tool_tokens > 0:
|
1050
|
+
all_tools[tool_name]["instances"].append(f"{status.name}({count} uses, {tool_tokens} tok)")
|
1051
|
+
else:
|
1052
|
+
all_tools[tool_name]["instances"].append(f"{status.name}({count} uses)")
|
1053
|
+
|
1054
|
+
|
1055
|
+
if all_tools:
|
1056
|
+
print(f"\n+=== TOOL USAGE DETAILS ===+")
|
1057
|
+
print(f"| {'Tool Name':<20} {'Uses':<8} {'Tokens':<10} {'Cost ($)':<10} {'Used By':<35}")
|
1058
|
+
print(f"| {'-'*20} {'-'*8} {'-'*10} {'-'*10} {'-'*35}")
|
1059
|
+
|
1060
|
+
total_tool_uses = 0
|
1061
|
+
total_tool_tokens = 0
|
1062
|
+
total_tool_cost = 0.0
|
1063
|
+
|
1064
|
+
for tool_name, details in sorted(all_tools.items()):
|
1065
|
+
tool_count = details["count"]
|
1066
|
+
tool_tokens = details["tokens"]
|
1067
|
+
|
1068
|
+
# Calculate tool cost at current model rates (3.5 sonnet input rate)
|
1069
|
+
tool_cost = (tool_tokens / 1_000_000) * 3.00 if tool_tokens > 0 else 0.0
|
1070
|
+
|
1071
|
+
instances_str = ", ".join(details["instances"][:2]) # Show first 2 instances
|
1072
|
+
if len(details["instances"]) > 2:
|
1073
|
+
instances_str += f" +{len(details['instances'])-2} more"
|
1074
|
+
|
1075
|
+
token_str = f"{tool_tokens:,}" if tool_tokens > 0 else "0"
|
1076
|
+
cost_str = f"{tool_cost:.4f}" if tool_cost > 0 else "0"
|
1077
|
+
|
1078
|
+
print(f"| {tool_name:<20} {tool_count:<8} {token_str:<10} {cost_str:<10} {instances_str:<35}")
|
1079
|
+
|
1080
|
+
total_tool_uses += tool_count
|
1081
|
+
total_tool_tokens += tool_tokens
|
1082
|
+
total_tool_cost += tool_cost
|
1083
|
+
|
1084
|
+
print(f"| {'-'*20} {'-'*8} {'-'*10} {'-'*10} {'-'*35}")
|
1085
|
+
total_tokens_str = f"{total_tool_tokens:,}" if total_tool_tokens > 0 else "0"
|
1086
|
+
total_cost_str = f"{total_tool_cost:.4f}" if total_tool_cost > 0 else "0"
|
1087
|
+
print(f"| {'TOTAL':<20} {total_tool_uses:<8} {total_tokens_str:<10} {total_cost_str:<10}")
|
1088
|
+
print(f"+{'='*95}+")
|
1089
|
+
|
1090
|
+
print()
|
1091
|
+
|
1092
|
+
def _detect_permission_error(self, line: str, status: InstanceStatus, instance_name: str) -> bool:
|
1093
|
+
"""Detect permission errors and command blocking issues - Issue #1320 fix"""
|
1094
|
+
line_stripped = line.strip()
|
1095
|
+
if not line_stripped:
|
1096
|
+
return False
|
1097
|
+
|
1098
|
+
# Try to parse as JSON first
|
1099
|
+
if line_stripped.startswith('{'):
|
1100
|
+
try:
|
1101
|
+
json_data = json.loads(line_stripped)
|
1102
|
+
|
1103
|
+
# Check for permission errors in tool results
|
1104
|
+
if json_data.get('type') == 'user' and 'message' in json_data:
|
1105
|
+
message = json_data.get('message', {})
|
1106
|
+
if isinstance(message, dict) and 'content' in message:
|
1107
|
+
content_list = message.get('content', [])
|
1108
|
+
if isinstance(content_list, list):
|
1109
|
+
for item in content_list:
|
1110
|
+
if isinstance(item, dict) and item.get('type') == 'tool_result':
|
1111
|
+
if item.get('is_error'):
|
1112
|
+
error_content = item.get('content', '')
|
1113
|
+
if any(phrase in error_content.lower() for phrase in [
|
1114
|
+
'requires approval',
|
1115
|
+
'permission denied',
|
1116
|
+
'haven\'t granted it yet',
|
1117
|
+
'claude requested permissions'
|
1118
|
+
]):
|
1119
|
+
# CRITICAL ERROR - Make it VERY visible
|
1120
|
+
error_msg = f"""
|
1121
|
+
+============================================================================+
|
1122
|
+
| šØšØšØ PERMISSION ERROR DETECTED - COMMAND BLOCKED šØšØšØ |
|
1123
|
+
| Instance: {instance_name:<60}|
|
1124
|
+
| Error: {error_content[:68]:<68}|
|
1125
|
+
+============================================================================+
|
1126
|
+
| SOLUTION: zen_orchestrator.py now uses bypassPermissions by default: |
|
1127
|
+
| ⢠Default: bypassPermissions (avoids approval prompts on all platforms) |
|
1128
|
+
| ⢠Users can override via permission_mode in config if needed |
|
1129
|
+
| |
|
1130
|
+
| Current platform: {platform.system():<58}|
|
1131
|
+
| Using permission mode: {self.instances[instance_name].permission_mode:<52}|
|
1132
|
+
+============================================================================+
|
1133
|
+
"""
|
1134
|
+
print(error_msg, flush=True)
|
1135
|
+
logger.critical(f"PERMISSION ERROR in {instance_name}: {error_content}")
|
1136
|
+
status.error += f"\n[PERMISSION ERROR]: {error_content}\n"
|
1137
|
+
return True
|
1138
|
+
except json.JSONDecodeError:
|
1139
|
+
pass
|
1140
|
+
|
1141
|
+
# Check for text-based error patterns
|
1142
|
+
line_lower = line.lower()
|
1143
|
+
if any(phrase in line_lower for phrase in [
|
1144
|
+
'this command requires approval',
|
1145
|
+
'permission denied',
|
1146
|
+
'access denied',
|
1147
|
+
'not authorized',
|
1148
|
+
'insufficient permissions'
|
1149
|
+
]):
|
1150
|
+
error_msg = f"""
|
1151
|
+
+============================================================================+
|
1152
|
+
| ā ļø PERMISSION WARNING DETECTED |
|
1153
|
+
| Instance: {instance_name:<60}|
|
1154
|
+
| Line: {line_stripped[:70]:<70}|
|
1155
|
+
+============================================================================+
|
1156
|
+
"""
|
1157
|
+
print(error_msg, flush=True)
|
1158
|
+
logger.warning(f"Permission warning in {instance_name}: {line_stripped}")
|
1159
|
+
return True
|
1160
|
+
|
1161
|
+
return False
|
1162
|
+
|
1163
|
+
def _parse_token_usage(self, line: str, status: InstanceStatus, instance_name: str):
|
1164
|
+
"""Parse token usage information from Claude Code JSON output lines"""
|
1165
|
+
# FIRST: Check for permission errors (Issue #1320)
|
1166
|
+
if self._detect_permission_error(line, status, instance_name):
|
1167
|
+
return # Don't parse tokens if there's an error
|
1168
|
+
|
1169
|
+
# DEBUG: Log lines with potential token information
|
1170
|
+
if line.strip() and any(keyword in line.lower() for keyword in ['token', 'usage', 'total', 'input', 'output']):
|
1171
|
+
self.log_at_level(LogLevel.DETAILED, f"š TOKEN PARSE [{instance_name}]: {line[:100]}{'...' if len(line) > 100 else ''}", logger.debug)
|
1172
|
+
|
1173
|
+
# Track previous total for delta detection
|
1174
|
+
prev_total = status.total_tokens
|
1175
|
+
|
1176
|
+
# First try to parse as JSON - this is the modern approach for stream-json format
|
1177
|
+
if self._try_parse_json_token_usage(line, status):
|
1178
|
+
# Check if tokens actually changed
|
1179
|
+
if status.total_tokens != prev_total:
|
1180
|
+
self.log_at_level(LogLevel.DETAILED, f"ā
JSON PARSE SUCCESS [{instance_name}]: tokens {prev_total} ā {status.total_tokens}")
|
1181
|
+
self._update_budget_tracking(status, instance_name)
|
1182
|
+
return
|
1183
|
+
|
1184
|
+
# Fallback to regex parsing for backward compatibility or non-JSON output
|
1185
|
+
self._parse_token_usage_fallback(line, status)
|
1186
|
+
|
1187
|
+
# Check if tokens changed in fallback parsing
|
1188
|
+
if status.total_tokens != prev_total:
|
1189
|
+
logger.info(f"ā
REGEX PARSE SUCCESS [{instance_name}]: tokens {prev_total} ā {status.total_tokens}")
|
1190
|
+
|
1191
|
+
self._update_budget_tracking(status, instance_name)
|
1192
|
+
|
1193
|
+
def _update_budget_tracking(self, status: InstanceStatus, instance_name: str):
|
1194
|
+
"""Update budget tracking with token deltas and check for runtime budget violations"""
|
1195
|
+
# Use total_tokens which already includes all token types (input + output + cache_read + cache_creation)
|
1196
|
+
current_billable_tokens = status.total_tokens
|
1197
|
+
|
1198
|
+
# Extract command information
|
1199
|
+
command = self.instances[instance_name].command
|
1200
|
+
# ISSUE #1348 FIX: Use full command as budget key to match config file command_budgets
|
1201
|
+
# This ensures tokens are recorded under the same key that budgets are configured with
|
1202
|
+
if command and command.strip().startswith('/'):
|
1203
|
+
# For slash commands, check if budget exists for base command vs full command
|
1204
|
+
base_command_part = command.rstrip(';').split()[0] if command else command
|
1205
|
+
# Check if budget exists for base command, otherwise use full command
|
1206
|
+
if self.budget_manager and base_command_part in self.budget_manager.command_budgets:
|
1207
|
+
budget_key = base_command_part
|
1208
|
+
logger.debug(f"šÆ SLASH COMMAND: Using base command '{base_command_part}' for budget (found in budgets)")
|
1209
|
+
else:
|
1210
|
+
budget_key = command.rstrip(';')
|
1211
|
+
logger.debug(f"šÆ SLASH COMMAND: Using full command '{budget_key}' for budget (base not found)")
|
1212
|
+
else:
|
1213
|
+
# For non-slash commands/prompts, always use the full command text as budget key
|
1214
|
+
budget_key = command.rstrip(';') if command else command
|
1215
|
+
logger.debug(f"šÆ RAW COMMAND: Using full command '{budget_key}' for budget tracking")
|
1216
|
+
|
1217
|
+
# Use budget_key for all operations instead of base_command
|
1218
|
+
base_command = budget_key
|
1219
|
+
|
1220
|
+
# ENHANCED DEBUG: Log budget tracking state
|
1221
|
+
logger.debug(f"š BUDGET DEBUG [{instance_name}]: command='{base_command}', current_tokens={current_billable_tokens}, last_known={status._last_known_total_tokens}")
|
1222
|
+
|
1223
|
+
# Check if this command has a budget configured
|
1224
|
+
if self.budget_manager and base_command in self.budget_manager.command_budgets:
|
1225
|
+
budget_info = self.budget_manager.command_budgets[base_command]
|
1226
|
+
logger.debug(f"šÆ BUDGET FOUND [{instance_name}]: {base_command} has budget {budget_info.used}/{budget_info.limit} ({budget_info.percentage:.1f}%)")
|
1227
|
+
elif self.budget_manager:
|
1228
|
+
logger.debug(f"ā ļø NO BUDGET [{instance_name}]: command '{base_command}' not in budget keys: {list(self.budget_manager.command_budgets.keys())}")
|
1229
|
+
|
1230
|
+
if self.budget_manager and current_billable_tokens > status._last_known_total_tokens:
|
1231
|
+
new_tokens = current_billable_tokens - status._last_known_total_tokens
|
1232
|
+
|
1233
|
+
self.log_at_level(LogLevel.CONCISE, f"š° BUDGET UPDATE [{instance_name}]: Recording {new_tokens} tokens for command '{base_command}'")
|
1234
|
+
|
1235
|
+
# Record the usage
|
1236
|
+
self.budget_manager.record_usage(base_command, new_tokens)
|
1237
|
+
status._last_known_total_tokens = current_billable_tokens
|
1238
|
+
|
1239
|
+
# Log the new budget state
|
1240
|
+
if base_command in self.budget_manager.command_budgets:
|
1241
|
+
budget_info = self.budget_manager.command_budgets[base_command]
|
1242
|
+
self.log_at_level(LogLevel.CONCISE, f"š BUDGET STATE [{instance_name}]: {base_command} now at {budget_info.used}/{budget_info.limit} tokens ({budget_info.percentage:.1f}%)")
|
1243
|
+
|
1244
|
+
# RUNTIME BUDGET ENFORCEMENT - Check if we've exceeded budgets during execution
|
1245
|
+
self._check_runtime_budget_violation(status, instance_name, base_command)
|
1246
|
+
elif self.budget_manager and current_billable_tokens == 0:
|
1247
|
+
logger.warning(f"š« NO TOKENS [{instance_name}]: total_tokens is still 0 - token detection may be failing")
|
1248
|
+
|
1249
|
+
def _check_runtime_budget_violation(self, status: InstanceStatus, instance_name: str, base_command: str):
|
1250
|
+
"""Check for budget violations during runtime and terminate instances if needed"""
|
1251
|
+
if not self.budget_manager:
|
1252
|
+
return
|
1253
|
+
|
1254
|
+
# Check if current usage violates any budget
|
1255
|
+
violation_detected = False
|
1256
|
+
violation_reason = ""
|
1257
|
+
|
1258
|
+
# Check overall budget
|
1259
|
+
if (self.budget_manager.overall_budget is not None and
|
1260
|
+
self.budget_manager.total_usage > self.budget_manager.overall_budget):
|
1261
|
+
violation_detected = True
|
1262
|
+
violation_reason = f"Overall budget exceeded: {self.budget_manager.total_usage}/{self.budget_manager.overall_budget} tokens"
|
1263
|
+
|
1264
|
+
# Check command budget (only if overall budget check didn't fail)
|
1265
|
+
elif (base_command in self.budget_manager.command_budgets):
|
1266
|
+
command_budget = self.budget_manager.command_budgets[base_command]
|
1267
|
+
if command_budget.used > command_budget.limit:
|
1268
|
+
violation_detected = True
|
1269
|
+
violation_reason = f"Command '{base_command}' budget exceeded: {command_budget.used}/{command_budget.limit} tokens"
|
1270
|
+
|
1271
|
+
if violation_detected:
|
1272
|
+
message = f"Runtime budget violation for {instance_name}: {violation_reason}"
|
1273
|
+
|
1274
|
+
if self.budget_manager.enforcement_mode == "block":
|
1275
|
+
logger.error(f"š« š“ RUNTIME TERMINATION: {message}")
|
1276
|
+
self._terminate_instance(status, instance_name, f"Terminated due to budget violation - {violation_reason}")
|
1277
|
+
else: # warn mode
|
1278
|
+
# EXPLICIT YELLOW WARNING SYMBOLS FOR VISIBILITY
|
1279
|
+
logger.warning(f"š¶ ā ļø š” BUDGET EXCEEDED WARNING: {message}")
|
1280
|
+
print(f"\n{'='*80}")
|
1281
|
+
print(f"š¶ ā ļø š” BUDGET VIOLATION WARNING š” ā ļø š¶")
|
1282
|
+
print(f"Instance: {instance_name}")
|
1283
|
+
print(f"Reason: {violation_reason}")
|
1284
|
+
print(f"{'='*80}\n")
|
1285
|
+
|
1286
|
+
def _terminate_instance(self, status: InstanceStatus, instance_name: str, reason: str):
|
1287
|
+
"""Terminate a running instance due to budget violation"""
|
1288
|
+
try:
|
1289
|
+
if status.pid and status.status == "running":
|
1290
|
+
logger.info(f"Terminating instance {instance_name} (PID: {status.pid}): {reason}")
|
1291
|
+
|
1292
|
+
# Try graceful termination first
|
1293
|
+
import signal
|
1294
|
+
import os
|
1295
|
+
try:
|
1296
|
+
os.kill(status.pid, signal.SIGTERM)
|
1297
|
+
logger.info(f"Sent SIGTERM to {instance_name} (PID: {status.pid})")
|
1298
|
+
except (OSError, ProcessLookupError) as e:
|
1299
|
+
logger.warning(f"Could not send SIGTERM to {status.pid}: {e}")
|
1300
|
+
|
1301
|
+
# Update status
|
1302
|
+
status.status = "failed"
|
1303
|
+
status.error = reason
|
1304
|
+
status.end_time = time.time()
|
1305
|
+
|
1306
|
+
else:
|
1307
|
+
logger.warning(f"Cannot terminate {instance_name}: no PID or not running (status: {status.status})")
|
1308
|
+
|
1309
|
+
except Exception as e:
|
1310
|
+
logger.error(f"Failed to terminate instance {instance_name}: {e}")
|
1311
|
+
|
1312
|
+
def _extract_message_id(self, json_data: dict) -> Optional[str]:
|
1313
|
+
"""Extract message ID from JSON data for deduplication tracking"""
|
1314
|
+
# Try multiple common locations where message ID might be stored
|
1315
|
+
message_id = (
|
1316
|
+
json_data.get('id') or
|
1317
|
+
json_data.get('message_id') or
|
1318
|
+
(json_data.get('message', {}).get('id') if isinstance(json_data.get('message'), dict) else None) or
|
1319
|
+
(json_data.get('response', {}).get('id') if isinstance(json_data.get('response'), dict) else None)
|
1320
|
+
)
|
1321
|
+
return message_id
|
1322
|
+
|
1323
|
+
def _update_cache_tokens_for_compatibility(self, status: InstanceStatus):
|
1324
|
+
"""Update legacy cached_tokens field for backward compatibility"""
|
1325
|
+
# Maintain backward compatibility by updating the combined cached_tokens field
|
1326
|
+
status.cached_tokens = status.cache_read_tokens + status.cache_creation_tokens
|
1327
|
+
|
1328
|
+
def _try_parse_json_token_usage(self, line: str, status: InstanceStatus) -> bool:
|
1329
|
+
"""SDK-compliant token usage parsing with message ID deduplication"""
|
1330
|
+
line = line.strip()
|
1331
|
+
if not line.startswith('{'):
|
1332
|
+
return False
|
1333
|
+
|
1334
|
+
try:
|
1335
|
+
json_data = json.loads(line)
|
1336
|
+
|
1337
|
+
# ADD DEBUG LOGGING FOR TOKEN PARSING
|
1338
|
+
logger.debug(f"š TOKEN PARSING: Analyzing JSON line with keys: {list(json_data.keys())}")
|
1339
|
+
|
1340
|
+
# Special debug for tool detection
|
1341
|
+
if 'type' in json_data:
|
1342
|
+
logger.debug(f"šÆ JSON TYPE: {json_data['type']}")
|
1343
|
+
if json_data['type'] == 'assistant' and 'message' in json_data:
|
1344
|
+
message = json_data.get('message', {})
|
1345
|
+
if isinstance(message, dict) and 'content' in message:
|
1346
|
+
content = message.get('content', [])
|
1347
|
+
if isinstance(content, list):
|
1348
|
+
tool_types = [item.get('type') for item in content if isinstance(item, dict)]
|
1349
|
+
logger.debug(f"šÆ CONTENT TYPES: {tool_types}")
|
1350
|
+
|
1351
|
+
# Check if this looks like a tool usage line
|
1352
|
+
if 'name' in json_data and ('type' in json_data and json_data['type'] in ['tool_use', 'tool_call']):
|
1353
|
+
logger.debug(f"šÆ POTENTIAL TOOL: type={json_data.get('type')}, name={json_data.get('name')}")
|
1354
|
+
|
1355
|
+
# Extract message ID for deduplication
|
1356
|
+
message_id = self._extract_message_id(json_data)
|
1357
|
+
|
1358
|
+
if message_id:
|
1359
|
+
# SDK Rule: Skip if already processed this message ID
|
1360
|
+
if message_id in status.processed_message_ids:
|
1361
|
+
logger.debug(f"Skipping duplicate message ID: {message_id}")
|
1362
|
+
return True
|
1363
|
+
|
1364
|
+
# Mark as processed
|
1365
|
+
status.processed_message_ids.add(message_id)
|
1366
|
+
|
1367
|
+
# DETECT AND STORE MODEL NAME
|
1368
|
+
if self.pricing_engine:
|
1369
|
+
detected_model = self.pricing_engine.detect_model_from_response(json_data)
|
1370
|
+
if detected_model != status.model_used:
|
1371
|
+
logger.debug(f"š¤ MODEL DETECTED: {detected_model} (was {status.model_used})")
|
1372
|
+
status.model_used = detected_model
|
1373
|
+
|
1374
|
+
# Process usage data (only once per message ID)
|
1375
|
+
usage_data = None
|
1376
|
+
if 'usage' in json_data:
|
1377
|
+
usage_data = json_data['usage']
|
1378
|
+
self.log_at_level(LogLevel.DETAILED, f"š TOKEN DATA: Found usage data: {usage_data}")
|
1379
|
+
elif 'message' in json_data and isinstance(json_data['message'], dict) and 'usage' in json_data['message']:
|
1380
|
+
usage_data = json_data['message']['usage']
|
1381
|
+
self.log_at_level(LogLevel.DETAILED, f"š TOKEN DATA: Found nested usage data: {usage_data}")
|
1382
|
+
elif 'tokens' in json_data and isinstance(json_data['tokens'], dict):
|
1383
|
+
# Handle structured token data format
|
1384
|
+
usage_data = json_data['tokens']
|
1385
|
+
self.log_at_level(LogLevel.DETAILED, f"š TOKEN DATA: Found tokens data: {usage_data}")
|
1386
|
+
else:
|
1387
|
+
# Check for direct token fields at the top level
|
1388
|
+
direct_tokens = {}
|
1389
|
+
for key in ['input_tokens', 'output_tokens', 'total_tokens', 'input', 'output', 'total']:
|
1390
|
+
if key in json_data and isinstance(json_data[key], (int, float)):
|
1391
|
+
direct_tokens[key] = json_data[key]
|
1392
|
+
|
1393
|
+
if direct_tokens:
|
1394
|
+
usage_data = direct_tokens
|
1395
|
+
self.log_at_level(LogLevel.DETAILED, f"š TOKEN DATA: Found direct token fields: {usage_data}")
|
1396
|
+
else:
|
1397
|
+
self.log_at_level(LogLevel.DETAILED, f"ā NO TOKEN DATA: No usage fields found in JSON with keys: {list(json_data.keys())}", logger.debug)
|
1398
|
+
|
1399
|
+
if usage_data and isinstance(usage_data, dict):
|
1400
|
+
# FIXED: Use cumulative addition for progressive token counts, not max()
|
1401
|
+
prev_input = status.input_tokens
|
1402
|
+
prev_output = status.output_tokens
|
1403
|
+
|
1404
|
+
if 'input_tokens' in usage_data:
|
1405
|
+
new_input = int(usage_data['input_tokens'])
|
1406
|
+
status.input_tokens = max(status.input_tokens, new_input) # Keep max for final totals
|
1407
|
+
elif 'input' in usage_data: # Alternative format
|
1408
|
+
new_input = int(usage_data['input'])
|
1409
|
+
status.input_tokens = max(status.input_tokens, new_input)
|
1410
|
+
|
1411
|
+
if 'output_tokens' in usage_data:
|
1412
|
+
status.output_tokens = max(status.output_tokens, int(usage_data['output_tokens']))
|
1413
|
+
elif 'output' in usage_data: # Alternative format
|
1414
|
+
status.output_tokens = max(status.output_tokens, int(usage_data['output']))
|
1415
|
+
|
1416
|
+
# Separate cache types for accurate billing
|
1417
|
+
if 'cache_read_input_tokens' in usage_data:
|
1418
|
+
status.cache_read_tokens = max(status.cache_read_tokens, int(usage_data['cache_read_input_tokens']))
|
1419
|
+
if 'cache_creation_input_tokens' in usage_data:
|
1420
|
+
status.cache_creation_tokens = max(status.cache_creation_tokens, int(usage_data['cache_creation_input_tokens']))
|
1421
|
+
|
1422
|
+
# Handle legacy cached field
|
1423
|
+
if 'cached' in usage_data:
|
1424
|
+
# If we don't have separate cache data, use the combined field
|
1425
|
+
if 'cache_read_input_tokens' not in usage_data and 'cache_creation_input_tokens' not in usage_data:
|
1426
|
+
cached_total = int(usage_data['cached'])
|
1427
|
+
status.cache_read_tokens = max(status.cache_read_tokens, cached_total)
|
1428
|
+
|
1429
|
+
# Use authoritative total when available
|
1430
|
+
if 'total_tokens' in usage_data:
|
1431
|
+
total = int(usage_data['total_tokens'])
|
1432
|
+
prev_total = status.total_tokens
|
1433
|
+
|
1434
|
+
# BUDGET FIX: Handle both cumulative and individual message tokens
|
1435
|
+
# If this looks like individual message tokens (has message_id), accumulate
|
1436
|
+
# If this looks like cumulative session tokens (no message_id), use max
|
1437
|
+
if message_id:
|
1438
|
+
# Individual message - accumulate if it represents new work
|
1439
|
+
status.total_tokens += total
|
1440
|
+
logger.debug(f"šÆ TOTAL from 'total_tokens' (individual): {prev_total} + {total} ā {status.total_tokens}")
|
1441
|
+
else:
|
1442
|
+
# Cumulative session total - use max to handle running totals
|
1443
|
+
status.total_tokens = max(status.total_tokens, total)
|
1444
|
+
logger.debug(f"šÆ TOTAL from 'total_tokens' (cumulative): {prev_total} ā {status.total_tokens}")
|
1445
|
+
elif 'total' in usage_data: # Alternative format
|
1446
|
+
total = int(usage_data['total'])
|
1447
|
+
prev_total = status.total_tokens
|
1448
|
+
|
1449
|
+
# BUDGET FIX: Same logic for alternative format
|
1450
|
+
if message_id:
|
1451
|
+
# Individual message - accumulate
|
1452
|
+
status.total_tokens += total
|
1453
|
+
logger.debug(f"šÆ TOTAL from 'total' (individual): {prev_total} + {total} ā {status.total_tokens}")
|
1454
|
+
else:
|
1455
|
+
# Cumulative session total - use max
|
1456
|
+
status.total_tokens = max(status.total_tokens, total)
|
1457
|
+
logger.debug(f"šÆ TOTAL from 'total' (cumulative): {prev_total} ā {status.total_tokens}")
|
1458
|
+
else:
|
1459
|
+
# Calculate total from components if not provided
|
1460
|
+
calculated_total = (status.input_tokens + status.output_tokens +
|
1461
|
+
status.cache_read_tokens + status.cache_creation_tokens)
|
1462
|
+
prev_total = status.total_tokens
|
1463
|
+
status.total_tokens = max(status.total_tokens, calculated_total)
|
1464
|
+
logger.debug(f"šÆ TOTAL calculated: {prev_total} ā {status.total_tokens} (input:{status.input_tokens} + output:{status.output_tokens} + cache_read:{status.cache_read_tokens} + cache_creation:{status.cache_creation_tokens})")
|
1465
|
+
|
1466
|
+
# Store authoritative cost if available
|
1467
|
+
if 'total_cost_usd' in usage_data:
|
1468
|
+
status.total_cost_usd = max(status.total_cost_usd or 0, float(usage_data['total_cost_usd']))
|
1469
|
+
|
1470
|
+
# Update backward compatibility field
|
1471
|
+
self._update_cache_tokens_for_compatibility(status)
|
1472
|
+
|
1473
|
+
# ADD DETAILED LOGGING FOR TOKEN UPDATES
|
1474
|
+
logger.debug(f"ā
TOKEN UPDATE: input={status.input_tokens}, output={status.output_tokens}, "
|
1475
|
+
f"total={status.total_tokens}, cached={status.cached_tokens}")
|
1476
|
+
|
1477
|
+
return True
|
1478
|
+
|
1479
|
+
# Handle tool calls with detailed tracking
|
1480
|
+
if 'type' in json_data:
|
1481
|
+
logger.debug(f"š TOOL DETECTION: Found type='{json_data['type']}', checking for tool usage...")
|
1482
|
+
|
1483
|
+
if json_data['type'] in ['tool_use', 'tool_call', 'tool_execution']:
|
1484
|
+
# Extract tool name for detailed tracking (ALWAYS track, even without message_id)
|
1485
|
+
tool_name = json_data.get('name', json_data.get('tool_name', 'unknown_tool'))
|
1486
|
+
status.tool_details[tool_name] = status.tool_details.get(tool_name, 0) + 1
|
1487
|
+
status.tool_calls += 1
|
1488
|
+
|
1489
|
+
logger.debug(f"š§ TOOL FOUND: {tool_name} (message_id={message_id})")
|
1490
|
+
|
1491
|
+
# Track tool token usage if available
|
1492
|
+
tool_tokens = 0
|
1493
|
+
if 'usage' in json_data and isinstance(json_data['usage'], dict):
|
1494
|
+
tool_usage = json_data['usage']
|
1495
|
+
tool_tokens = tool_usage.get('total_tokens',
|
1496
|
+
tool_usage.get('input_tokens', 0) + tool_usage.get('output_tokens', 0))
|
1497
|
+
elif 'tokens' in json_data:
|
1498
|
+
tool_tokens = int(json_data.get('tokens', 0))
|
1499
|
+
elif 'token_usage' in json_data:
|
1500
|
+
tool_tokens = int(json_data.get('token_usage', 0))
|
1501
|
+
|
1502
|
+
if tool_tokens > 0:
|
1503
|
+
status.tool_tokens[tool_name] = status.tool_tokens.get(tool_name, 0) + tool_tokens
|
1504
|
+
logger.debug(f"š§ TOOL TRACKED: {tool_name} (uses: {status.tool_details[tool_name]}, tokens: {status.tool_tokens[tool_name]})")
|
1505
|
+
else:
|
1506
|
+
logger.debug(f"š§ TOOL TRACKED: {tool_name} (uses: {status.tool_details[tool_name]}, no tokens)")
|
1507
|
+
return True
|
1508
|
+
elif json_data['type'] == 'message' and 'tool_calls' in json_data:
|
1509
|
+
# Count tool calls in message with token tracking
|
1510
|
+
tool_calls = json_data['tool_calls']
|
1511
|
+
logger.debug(f"š§ TOOL MESSAGE: Found tool_calls in message: {tool_calls}")
|
1512
|
+
if isinstance(tool_calls, list):
|
1513
|
+
for tool in tool_calls:
|
1514
|
+
if isinstance(tool, dict):
|
1515
|
+
tool_name = tool.get('name', tool.get('function', {}).get('name', 'unknown_tool'))
|
1516
|
+
status.tool_details[tool_name] = status.tool_details.get(tool_name, 0) + 1
|
1517
|
+
|
1518
|
+
# Track tool tokens if available in tool data
|
1519
|
+
tool_tokens = 0
|
1520
|
+
if 'tokens' in tool:
|
1521
|
+
tool_tokens = int(tool['tokens'])
|
1522
|
+
elif 'usage' in tool and isinstance(tool['usage'], dict):
|
1523
|
+
tool_usage = tool['usage']
|
1524
|
+
tool_tokens = tool_usage.get('total_tokens', 0)
|
1525
|
+
|
1526
|
+
if tool_tokens > 0:
|
1527
|
+
status.tool_tokens[tool_name] = status.tool_tokens.get(tool_name, 0) + tool_tokens
|
1528
|
+
logger.debug(f"š§ TOOL FROM MESSAGE: {tool_name} (tokens: {tool_tokens})")
|
1529
|
+
|
1530
|
+
status.tool_calls += len(tool_calls)
|
1531
|
+
elif isinstance(tool_calls, (int, float)):
|
1532
|
+
# When tool_calls is just a number, add generic tool entries
|
1533
|
+
tool_count = int(tool_calls)
|
1534
|
+
status.tool_calls += tool_count
|
1535
|
+
# Add generic tool details so the table appears
|
1536
|
+
generic_tool_name = "Claude_Tool" # Generic name when specific name unavailable
|
1537
|
+
status.tool_details[generic_tool_name] = status.tool_details.get(generic_tool_name, 0) + tool_count
|
1538
|
+
return True
|
1539
|
+
elif json_data['type'] == 'assistant' and 'message' in json_data:
|
1540
|
+
# Handle Claude Code format: {"type":"assistant","message":{"content":[{"type":"tool_use","name":"Task",...}]}}
|
1541
|
+
message = json_data['message']
|
1542
|
+
if isinstance(message, dict) and 'content' in message:
|
1543
|
+
content = message['content']
|
1544
|
+
if isinstance(content, list):
|
1545
|
+
for item in content:
|
1546
|
+
if isinstance(item, dict) and item.get('type') == 'tool_use':
|
1547
|
+
tool_name = item.get('name', 'unknown_tool')
|
1548
|
+
tool_use_id = item.get('id', '')
|
1549
|
+
|
1550
|
+
# Store the mapping for later tool_result processing
|
1551
|
+
if tool_use_id:
|
1552
|
+
status.tool_id_mapping[tool_use_id] = tool_name
|
1553
|
+
|
1554
|
+
status.tool_details[tool_name] = status.tool_details.get(tool_name, 0) + 1
|
1555
|
+
status.tool_calls += 1
|
1556
|
+
logger.debug(f"š§ TOOL FROM ASSISTANT CONTENT: {tool_name} (id: {tool_use_id})")
|
1557
|
+
return True
|
1558
|
+
elif json_data['type'] == 'user' and 'message' in json_data:
|
1559
|
+
# Handle Claude Code user messages with tool results: {"type":"user","message":{"content":[{"type":"tool_result","tool_use_id":"..."}]}}
|
1560
|
+
message = json_data['message']
|
1561
|
+
if isinstance(message, dict) and 'content' in message:
|
1562
|
+
content = message['content']
|
1563
|
+
if isinstance(content, list):
|
1564
|
+
for item in content:
|
1565
|
+
if isinstance(item, dict):
|
1566
|
+
# Tool result indicates a tool was used
|
1567
|
+
if item.get('type') == 'tool_result' and 'tool_use_id' in item:
|
1568
|
+
# Use stored mapping if available, otherwise extract from content
|
1569
|
+
tool_use_id = item['tool_use_id']
|
1570
|
+
if tool_use_id in status.tool_id_mapping:
|
1571
|
+
tool_name = status.tool_id_mapping[tool_use_id]
|
1572
|
+
else:
|
1573
|
+
tool_name = self._extract_tool_name_from_result(item, tool_use_id)
|
1574
|
+
|
1575
|
+
# Don't double-count if we already counted this in tool_use
|
1576
|
+
if tool_use_id not in status.tool_id_mapping:
|
1577
|
+
status.tool_details[tool_name] = status.tool_details.get(tool_name, 0) + 1
|
1578
|
+
status.tool_calls += 1
|
1579
|
+
|
1580
|
+
# Estimate tool token usage based on content size
|
1581
|
+
tool_tokens = self._estimate_tool_tokens(item)
|
1582
|
+
if tool_tokens > 0:
|
1583
|
+
status.tool_tokens[tool_name] = status.tool_tokens.get(tool_name, 0) + tool_tokens
|
1584
|
+
logger.debug(f"š§ TOOL FROM USER CONTENT: {tool_name} (tool_use_id: {tool_use_id}, estimated_tokens: {tool_tokens})")
|
1585
|
+
else:
|
1586
|
+
logger.debug(f"š§ TOOL FROM USER CONTENT: {tool_name} (tool_use_id: {tool_use_id})")
|
1587
|
+
# Tool use in user message (request)
|
1588
|
+
elif item.get('type') == 'tool_use' and 'name' in item:
|
1589
|
+
tool_name = item.get('name', 'unknown_tool')
|
1590
|
+
status.tool_details[tool_name] = status.tool_details.get(tool_name, 0) + 1
|
1591
|
+
status.tool_calls += 1
|
1592
|
+
|
1593
|
+
# Estimate tool token usage for tool use (typically smaller than results)
|
1594
|
+
tool_tokens = self._estimate_tool_tokens(item, is_tool_use=True)
|
1595
|
+
if tool_tokens > 0:
|
1596
|
+
status.tool_tokens[tool_name] = status.tool_tokens.get(tool_name, 0) + tool_tokens
|
1597
|
+
logger.debug(f"š§ TOOL USE FROM USER CONTENT: {tool_name} (estimated_tokens: {tool_tokens})")
|
1598
|
+
else:
|
1599
|
+
logger.debug(f"š§ TOOL USE FROM USER CONTENT: {tool_name}")
|
1600
|
+
return True
|
1601
|
+
|
1602
|
+
# Handle direct token fields at root level (without message ID - treat as individual message tokens)
|
1603
|
+
token_fields_found = False
|
1604
|
+
if not message_id: # Only process these if no message ID (prevents double counting)
|
1605
|
+
if 'input_tokens' in json_data:
|
1606
|
+
# BUDGET FIX: For direct fields without message_id, accumulate as individual messages
|
1607
|
+
new_input = int(json_data['input_tokens'])
|
1608
|
+
status.input_tokens += new_input
|
1609
|
+
token_fields_found = True
|
1610
|
+
logger.debug(f"šÆ DIRECT input_tokens: +{new_input} ā {status.input_tokens}")
|
1611
|
+
if 'output_tokens' in json_data:
|
1612
|
+
new_output = int(json_data['output_tokens'])
|
1613
|
+
status.output_tokens += new_output
|
1614
|
+
token_fields_found = True
|
1615
|
+
logger.debug(f"šÆ DIRECT output_tokens: +{new_output} ā {status.output_tokens}")
|
1616
|
+
if 'cached_tokens' in json_data:
|
1617
|
+
cached_total = int(json_data['cached_tokens'])
|
1618
|
+
status.cache_read_tokens += cached_total # Accumulate cache tokens too
|
1619
|
+
self._update_cache_tokens_for_compatibility(status)
|
1620
|
+
token_fields_found = True
|
1621
|
+
logger.debug(f"šÆ DIRECT cached_tokens: +{cached_total} ā {status.cache_read_tokens}")
|
1622
|
+
if 'total_tokens' in json_data:
|
1623
|
+
total = int(json_data['total_tokens'])
|
1624
|
+
prev_total = status.total_tokens
|
1625
|
+
status.total_tokens += total # Accumulate total tokens
|
1626
|
+
token_fields_found = True
|
1627
|
+
logger.debug(f"šÆ DIRECT total_tokens: {prev_total} + {total} ā {status.total_tokens}")
|
1628
|
+
if 'tool_calls' in json_data and isinstance(json_data['tool_calls'], (int, float)):
|
1629
|
+
status.tool_calls += int(json_data['tool_calls'])
|
1630
|
+
token_fields_found = True
|
1631
|
+
|
1632
|
+
return token_fields_found
|
1633
|
+
|
1634
|
+
except (json.JSONDecodeError, ValueError, KeyError, TypeError) as e:
|
1635
|
+
# Not valid JSON or doesn't contain expected fields
|
1636
|
+
logger.debug(f"JSON parsing failed for line: {e}")
|
1637
|
+
return False
|
1638
|
+
|
1639
|
+
def _parse_token_usage_fallback(self, line: str, status: InstanceStatus):
|
1640
|
+
"""Fallback regex-based token parsing for backward compatibility"""
|
1641
|
+
line_lower = line.lower()
|
1642
|
+
|
1643
|
+
# Import regex here to avoid overhead when JSON parsing succeeds
|
1644
|
+
import re
|
1645
|
+
|
1646
|
+
# Pattern 1: "Used X tokens" or "X tokens used"
|
1647
|
+
token_match = re.search(r'(?:used|consumed)\s+(\d+)\s+tokens?|(?:(\d+)\s+tokens?\s+(?:used|consumed))', line_lower)
|
1648
|
+
if token_match:
|
1649
|
+
tokens = int(token_match.group(1) or token_match.group(2))
|
1650
|
+
status.total_tokens += tokens
|
1651
|
+
return
|
1652
|
+
|
1653
|
+
# Pattern 2: Input/Output/Cached token breakdown
|
1654
|
+
input_match = re.search(r'input[:\s]+(\d+)\s+tokens?', line_lower)
|
1655
|
+
if input_match:
|
1656
|
+
status.input_tokens += int(input_match.group(1))
|
1657
|
+
|
1658
|
+
output_match = re.search(r'output[:\s]+(\d+)\s+tokens?', line_lower)
|
1659
|
+
if output_match:
|
1660
|
+
status.output_tokens += int(output_match.group(1))
|
1661
|
+
|
1662
|
+
# Pattern 2b: Cached tokens
|
1663
|
+
cached_match = re.search(r'cached[:\s]+(\d+)\s+tokens?', line_lower)
|
1664
|
+
if cached_match:
|
1665
|
+
# Add to cache_read_tokens and update backward compatibility
|
1666
|
+
cached_tokens = int(cached_match.group(1))
|
1667
|
+
status.cache_read_tokens = max(status.cache_read_tokens, cached_tokens)
|
1668
|
+
self._update_cache_tokens_for_compatibility(status)
|
1669
|
+
|
1670
|
+
# Pattern 2c: Cache hit patterns
|
1671
|
+
cache_hit_match = re.search(r'cache\s+hit[:\s]+(\d+)\s+tokens?', line_lower)
|
1672
|
+
if cache_hit_match:
|
1673
|
+
# Add to cache_read_tokens and update backward compatibility
|
1674
|
+
cached_tokens = int(cache_hit_match.group(1))
|
1675
|
+
status.cache_read_tokens = max(status.cache_read_tokens, cached_tokens)
|
1676
|
+
self._update_cache_tokens_for_compatibility(status)
|
1677
|
+
|
1678
|
+
# Pattern 3: Total token counts "Total: X tokens"
|
1679
|
+
total_match = re.search(r'total[:\s]+(\d+)\s+tokens?', line_lower)
|
1680
|
+
if total_match:
|
1681
|
+
total_tokens = int(total_match.group(1))
|
1682
|
+
# Only update if this is larger than current total (avoid double counting)
|
1683
|
+
if total_tokens > status.total_tokens:
|
1684
|
+
status.total_tokens = total_tokens
|
1685
|
+
|
1686
|
+
# Pattern 4: Tool calls - look for tool execution indicators
|
1687
|
+
if any(phrase in line_lower for phrase in ['tool call', 'executing tool', 'calling tool', 'tool execution']):
|
1688
|
+
status.tool_calls += 1
|
1689
|
+
|
1690
|
+
def _parse_final_output_token_usage(self, output: str, status: InstanceStatus, output_format: str, instance_name: str):
|
1691
|
+
"""Parse token usage from final Claude Code output for non-streaming formats"""
|
1692
|
+
if output_format == "json":
|
1693
|
+
# For standard JSON format, try to parse the entire output as JSON
|
1694
|
+
self._parse_json_final_output(output, status, instance_name)
|
1695
|
+
else:
|
1696
|
+
# For other formats, parse line by line
|
1697
|
+
for line in output.split('\n'):
|
1698
|
+
line = line.strip()
|
1699
|
+
if line:
|
1700
|
+
self._parse_token_usage(line, status, instance_name)
|
1701
|
+
|
1702
|
+
def _parse_json_final_output(self, output: str, status: InstanceStatus, instance_name: str):
|
1703
|
+
"""Parse token usage from complete JSON output"""
|
1704
|
+
try:
|
1705
|
+
# Try to parse the entire output as JSON
|
1706
|
+
json_data = json.loads(output)
|
1707
|
+
|
1708
|
+
# Extract token information from the final JSON response
|
1709
|
+
if isinstance(json_data, dict):
|
1710
|
+
# Look for usage information in various locations
|
1711
|
+
|
1712
|
+
# Check for usage stats in root
|
1713
|
+
if 'usage' in json_data:
|
1714
|
+
self._extract_usage_stats(json_data['usage'], status)
|
1715
|
+
|
1716
|
+
# Check for usage nested in message (common Claude Code format)
|
1717
|
+
if 'message' in json_data and isinstance(json_data['message'], dict):
|
1718
|
+
message = json_data['message']
|
1719
|
+
if 'usage' in message:
|
1720
|
+
self._extract_usage_stats(message['usage'], status)
|
1721
|
+
|
1722
|
+
# Check for token info in metadata
|
1723
|
+
if 'metadata' in json_data and 'usage' in json_data['metadata']:
|
1724
|
+
self._extract_usage_stats(json_data['metadata']['usage'], status)
|
1725
|
+
|
1726
|
+
# Check for response-level token info
|
1727
|
+
if 'tokens' in json_data:
|
1728
|
+
self._extract_token_info(json_data['tokens'], status)
|
1729
|
+
|
1730
|
+
# Check for turns/conversations with token info
|
1731
|
+
if 'turns' in json_data:
|
1732
|
+
for turn in json_data['turns']:
|
1733
|
+
if isinstance(turn, dict) and 'usage' in turn:
|
1734
|
+
self._extract_usage_stats(turn['usage'], status)
|
1735
|
+
|
1736
|
+
# Check for tool calls
|
1737
|
+
if 'tool_calls' in json_data:
|
1738
|
+
tool_calls = json_data['tool_calls']
|
1739
|
+
if isinstance(tool_calls, list):
|
1740
|
+
status.tool_calls += len(tool_calls)
|
1741
|
+
elif isinstance(tool_calls, (int, float)):
|
1742
|
+
status.tool_calls += int(tool_calls)
|
1743
|
+
|
1744
|
+
logger.debug(f"Parsed JSON final output: tokens={status.total_tokens}, tools={status.tool_calls}")
|
1745
|
+
|
1746
|
+
except (json.JSONDecodeError, ValueError) as e:
|
1747
|
+
logger.debug(f"Failed to parse final output as JSON: {e}")
|
1748
|
+
# Fallback to line-by-line parsing
|
1749
|
+
for line in output.split('\n'):
|
1750
|
+
line = line.strip()
|
1751
|
+
if line:
|
1752
|
+
self._parse_token_usage(line, status, instance_name)
|
1753
|
+
|
1754
|
+
def _extract_usage_stats(self, usage_data: dict, status: InstanceStatus):
|
1755
|
+
"""Extract usage statistics from a usage object"""
|
1756
|
+
if not isinstance(usage_data, dict):
|
1757
|
+
return
|
1758
|
+
|
1759
|
+
# Standard Claude API usage fields (use max to handle same message IDs)
|
1760
|
+
if 'input_tokens' in usage_data:
|
1761
|
+
status.input_tokens = max(status.input_tokens, int(usage_data['input_tokens']))
|
1762
|
+
if 'output_tokens' in usage_data:
|
1763
|
+
status.output_tokens = max(status.output_tokens, int(usage_data['output_tokens']))
|
1764
|
+
if 'cache_read_input_tokens' in usage_data:
|
1765
|
+
status.cache_read_tokens = max(status.cache_read_tokens, int(usage_data['cache_read_input_tokens']))
|
1766
|
+
|
1767
|
+
# Handle cache_creation_input_tokens separately
|
1768
|
+
if 'cache_creation_input_tokens' in usage_data:
|
1769
|
+
status.cache_creation_tokens = max(status.cache_creation_tokens, int(usage_data['cache_creation_input_tokens']))
|
1770
|
+
|
1771
|
+
# Update backward compatibility field
|
1772
|
+
self._update_cache_tokens_for_compatibility(status)
|
1773
|
+
|
1774
|
+
# Calculate or use provided total
|
1775
|
+
if 'total_tokens' in usage_data:
|
1776
|
+
total = int(usage_data['total_tokens'])
|
1777
|
+
if total > status.total_tokens:
|
1778
|
+
status.total_tokens = total
|
1779
|
+
else:
|
1780
|
+
# Calculate total from all components including cache creation
|
1781
|
+
cache_creation = int(usage_data.get('cache_creation_input_tokens', 0))
|
1782
|
+
cache_read = int(usage_data.get('cache_read_input_tokens', 0))
|
1783
|
+
calculated_total = status.input_tokens + status.output_tokens + cache_creation + cache_read
|
1784
|
+
if calculated_total > status.total_tokens:
|
1785
|
+
status.total_tokens = calculated_total
|
1786
|
+
|
1787
|
+
def _extract_token_info(self, token_data, status: InstanceStatus):
|
1788
|
+
"""Extract token information from various token data formats"""
|
1789
|
+
if isinstance(token_data, dict):
|
1790
|
+
# Structured token data
|
1791
|
+
if 'total' in token_data:
|
1792
|
+
total = int(token_data['total'])
|
1793
|
+
if total > status.total_tokens:
|
1794
|
+
status.total_tokens = total
|
1795
|
+
if 'input' in token_data:
|
1796
|
+
status.input_tokens += int(token_data['input'])
|
1797
|
+
if 'output' in token_data:
|
1798
|
+
status.output_tokens += int(token_data['output'])
|
1799
|
+
if 'cached' in token_data:
|
1800
|
+
cached_tokens = int(token_data['cached'])
|
1801
|
+
status.cache_read_tokens = max(status.cache_read_tokens, cached_tokens)
|
1802
|
+
self._update_cache_tokens_for_compatibility(status)
|
1803
|
+
elif isinstance(token_data, (int, float)):
|
1804
|
+
# Simple token count
|
1805
|
+
status.total_tokens += int(token_data)
|
1806
|
+
|
1807
|
+
def _extract_tool_name_from_result(self, tool_result: dict, tool_use_id: str) -> str:
|
1808
|
+
"""Extract meaningful tool name from tool result using comprehensive Claude Code tool patterns"""
|
1809
|
+
try:
|
1810
|
+
content = tool_result.get('content', '')
|
1811
|
+
|
1812
|
+
if isinstance(content, str):
|
1813
|
+
# Handle empty content first (successful commands with no output)
|
1814
|
+
if content == "" or content.strip() == "":
|
1815
|
+
return 'Bash'
|
1816
|
+
|
1817
|
+
content_lower = content.lower()
|
1818
|
+
|
1819
|
+
# =============================================================================
|
1820
|
+
# PRIORITY PATTERNS - Check these FIRST before other tool patterns
|
1821
|
+
# =============================================================================
|
1822
|
+
|
1823
|
+
# Permission/MCP Tools - Check this FIRST before other patterns that might match
|
1824
|
+
if any(pattern in content_lower for pattern in [
|
1825
|
+
'claude requested permissions', 'haven\'t granted it yet',
|
1826
|
+
'but you haven\'t granted it yet'
|
1827
|
+
]):
|
1828
|
+
return 'permission_request'
|
1829
|
+
|
1830
|
+
# =============================================================================
|
1831
|
+
# CLAUDE CODE OFFICIAL TOOLS - Comprehensive Detection Patterns
|
1832
|
+
# =============================================================================
|
1833
|
+
|
1834
|
+
# Task Tool - Agent spawning and management
|
1835
|
+
if any(pattern in content_lower for pattern in [
|
1836
|
+
'agent', 'subagent', 'spawned', 'task completed', 'agent completed',
|
1837
|
+
'general-purpose', 'statusline-setup', 'output-style-setup'
|
1838
|
+
]):
|
1839
|
+
return 'Task'
|
1840
|
+
|
1841
|
+
# Bash Tool - Command execution (most comprehensive patterns)
|
1842
|
+
if (any(pattern in content_lower for pattern in [
|
1843
|
+
# Git operations
|
1844
|
+
'on branch', 'nothing to commit', 'git pull', 'working tree clean',
|
1845
|
+
'commit', 'staged', 'untracked files', 'changes not staged',
|
1846
|
+
'your branch', 'ahead of', 'behind', 'diverged',
|
1847
|
+
'file changed', 'insertions', 'deletions', 'files changed',
|
1848
|
+
'develop-', 'main-', 'feature-', 'bugfix-',
|
1849
|
+
# Command outputs
|
1850
|
+
'command', 'executed', 'permission denied', 'no such file or directory',
|
1851
|
+
'command not found', 'usage:', 'process completed', 'exit code',
|
1852
|
+
'killed', 'terminated',
|
1853
|
+
# File system outputs
|
1854
|
+
'rw-r--r--', 'drwxr-xr-x'
|
1855
|
+
]) or content.startswith('$') or
|
1856
|
+
(content.startswith('total ') and '\n-rw' in content)):
|
1857
|
+
return 'Bash'
|
1858
|
+
|
1859
|
+
# Glob Tool - File pattern matching
|
1860
|
+
if (any(pattern in content_lower for pattern in [
|
1861
|
+
'files found', 'pattern matching', 'glob', 'file pattern'
|
1862
|
+
]) or (
|
1863
|
+
# Single file path results (like "zen/zen_orchestrator.py")
|
1864
|
+
len(content.strip()) < 200 and '/' in content and content.count('\n') == 0 and
|
1865
|
+
not content.startswith('/') and any(content.endswith(ext) for ext in [
|
1866
|
+
'.py', '.js', '.ts', '.json', '.md', '.txt', '.html', '.css', '.yml', '.yaml'
|
1867
|
+
])
|
1868
|
+
) or (
|
1869
|
+
# Multiple file listings
|
1870
|
+
content.count('\n') > 5 and '/' in content and
|
1871
|
+
not content.startswith('<!DOCTYPE') and not content.startswith('<html')
|
1872
|
+
)):
|
1873
|
+
return 'Glob'
|
1874
|
+
|
1875
|
+
# Grep Tool - Search operations
|
1876
|
+
if any(pattern in content_lower for pattern in [
|
1877
|
+
'matches found', 'pattern', 'searched', 'grep', 'ripgrep', 'no matches',
|
1878
|
+
'search', 'found', 'regex'
|
1879
|
+
]):
|
1880
|
+
return 'Grep'
|
1881
|
+
|
1882
|
+
# LS Tool - Directory listings
|
1883
|
+
if (any(pattern in content_lower for pattern in [
|
1884
|
+
'list_dir', 'directory listing', 'listing files'
|
1885
|
+
]) or (content.startswith('total ') and '\n-rw' in content and 'drwx' in content)):
|
1886
|
+
return 'LS'
|
1887
|
+
|
1888
|
+
# Read Tool - File reading (comprehensive patterns)
|
1889
|
+
if (content.startswith('#!/usr/bin/env') or
|
1890
|
+
any(pattern in content for pattern in [
|
1891
|
+
'import ', 'def ', 'class ', 'function', 'const ', 'var ', 'let ',
|
1892
|
+
'export ', 'module.exports', 'require(', '#include', 'package ',
|
1893
|
+
'use ', 'fn ', 'struct ', 'impl ', 'trait '
|
1894
|
+
]) or
|
1895
|
+
(len(content) > 1000 and any(word in content_lower for word in [
|
1896
|
+
'function', 'class', 'import', 'def', 'module', 'export', 'const'
|
1897
|
+
])) or
|
1898
|
+
(len(content) > 500 and not any(pattern in content_lower for pattern in [
|
1899
|
+
'html', 'http', 'www', 'commit', 'staged', 'branch'
|
1900
|
+
]))):
|
1901
|
+
return 'Read'
|
1902
|
+
|
1903
|
+
# Edit Tool - File editing
|
1904
|
+
if (any(pattern in content_lower for pattern in [
|
1905
|
+
'file has been updated', 'result of running', 'has been updated successfully'
|
1906
|
+
]) or (
|
1907
|
+
'edit' in content_lower and any(pattern in content_lower for pattern in [
|
1908
|
+
'success', 'updated', 'modified', 'changed'
|
1909
|
+
])
|
1910
|
+
)):
|
1911
|
+
return 'Edit'
|
1912
|
+
|
1913
|
+
# MultiEdit Tool - Multiple file edits
|
1914
|
+
if any(pattern in content_lower for pattern in [
|
1915
|
+
'edits have been applied', 'multiple edits', 'multiedit'
|
1916
|
+
]) and 'edit' in content_lower:
|
1917
|
+
return 'MultiEdit'
|
1918
|
+
|
1919
|
+
# Write Tool - File creation
|
1920
|
+
if any(pattern in content_lower for pattern in [
|
1921
|
+
'file created successfully', 'file written', 'written to', 'created successfully'
|
1922
|
+
]):
|
1923
|
+
return 'Write'
|
1924
|
+
|
1925
|
+
# NotebookEdit Tool - Jupyter operations
|
1926
|
+
if any(pattern in content_lower for pattern in [
|
1927
|
+
'notebook', 'jupyter', 'ipynb'
|
1928
|
+
]) or (
|
1929
|
+
'cell' in content_lower and any(pattern in content_lower for pattern in [
|
1930
|
+
'executed', 'output', 'edit', 'code', 'markdown'
|
1931
|
+
])
|
1932
|
+
):
|
1933
|
+
return 'NotebookEdit'
|
1934
|
+
|
1935
|
+
# WebFetch Tool - Web content fetching
|
1936
|
+
if (content.startswith('<!DOCTYPE') or content.startswith('<html') or
|
1937
|
+
any(pattern in content_lower for pattern in [
|
1938
|
+
'http://', 'https://', 'web content', 'webpage', 'url', 'website'
|
1939
|
+
]) or (
|
1940
|
+
any(pattern in content_lower for pattern in ['http', 'web', 'fetch', 'url']) and
|
1941
|
+
any(pattern in content_lower for pattern in ['request', 'response', 'content', 'page'])
|
1942
|
+
)):
|
1943
|
+
return 'WebFetch'
|
1944
|
+
|
1945
|
+
# TodoWrite Tool - Task management (already has good patterns)
|
1946
|
+
if any(pattern in content_lower for pattern in [
|
1947
|
+
'todos have been modified', 'todo list', 'task list', 'progress',
|
1948
|
+
'todo', 'task', 'completed', 'in_progress', 'pending'
|
1949
|
+
]):
|
1950
|
+
return 'TodoWrite'
|
1951
|
+
|
1952
|
+
# WebSearch Tool - Web searching
|
1953
|
+
if any(pattern in content_lower for pattern in [
|
1954
|
+
'search results', 'web search', 'search query', 'internet search'
|
1955
|
+
]) and any(pattern in content_lower for pattern in ['web', 'search', 'internet', 'query']):
|
1956
|
+
return 'WebSearch'
|
1957
|
+
|
1958
|
+
# BashOutput Tool - Background shell output
|
1959
|
+
if any(pattern in content_lower for pattern in [
|
1960
|
+
'shell output', 'background', 'stdout', 'stderr', 'bash output'
|
1961
|
+
]):
|
1962
|
+
return 'BashOutput'
|
1963
|
+
|
1964
|
+
# KillBash Tool - Shell termination
|
1965
|
+
if any(pattern in content_lower for pattern in [
|
1966
|
+
'shell killed', 'terminated', 'killed shell', 'bash killed'
|
1967
|
+
]):
|
1968
|
+
return 'KillBash'
|
1969
|
+
|
1970
|
+
# ExitPlanMode Tool - Plan mode exit
|
1971
|
+
if any(pattern in content_lower for pattern in [
|
1972
|
+
'exit plan', 'plan mode', 'ready to code', 'plan', 'implementation'
|
1973
|
+
]):
|
1974
|
+
return 'ExitPlanMode'
|
1975
|
+
|
1976
|
+
# MCP Tools - Model Context Protocol tools
|
1977
|
+
if 'mcp__' in content:
|
1978
|
+
import re
|
1979
|
+
mcp_match = re.search(r'mcp__[a-zA-Z_]+__[a-zA-Z_]+', content)
|
1980
|
+
if mcp_match:
|
1981
|
+
return mcp_match.group(0)
|
1982
|
+
|
1983
|
+
|
1984
|
+
# Code execution results
|
1985
|
+
if any(pattern in content_lower for pattern in [
|
1986
|
+
'traceback', 'error:', 'exception', 'stack trace'
|
1987
|
+
]):
|
1988
|
+
return 'Execute'
|
1989
|
+
|
1990
|
+
# Error-specific tool identification
|
1991
|
+
if any(pattern in content_lower for pattern in [
|
1992
|
+
'eisdir: illegal operation on a directory', 'directory, read',
|
1993
|
+
'is a directory', 'illegal operation on a directory'
|
1994
|
+
]):
|
1995
|
+
return 'Read' # Read tool trying to read directory
|
1996
|
+
|
1997
|
+
# Command approval/permission errors (often from Task tools)
|
1998
|
+
if any(pattern in content_lower for pattern in [
|
1999
|
+
'this command requires approval', 'requires approval',
|
2000
|
+
'command contains multiple operations'
|
2001
|
+
]):
|
2002
|
+
return 'Bash'
|
2003
|
+
|
2004
|
+
# File size limit errors (Read tool)
|
2005
|
+
if any(pattern in content_lower for pattern in [
|
2006
|
+
'file content', 'exceeds maximum allowed tokens',
|
2007
|
+
'use offset and limit parameters'
|
2008
|
+
]):
|
2009
|
+
return 'Read'
|
2010
|
+
|
2011
|
+
# =============================================================================
|
2012
|
+
# DEFAULT FALLBACK - Try to infer from content characteristics
|
2013
|
+
# =============================================================================
|
2014
|
+
|
2015
|
+
# Very long text content - likely Read
|
2016
|
+
if len(content) > 3000:
|
2017
|
+
return 'Read'
|
2018
|
+
|
2019
|
+
# Medium text with code patterns - likely Read
|
2020
|
+
elif len(content) > 200 and any(pattern in content for pattern in [
|
2021
|
+
'{', '}', '[', ']', '(', ')', ';', '=', '->', '=>'
|
2022
|
+
]):
|
2023
|
+
return 'Read'
|
2024
|
+
|
2025
|
+
# Short technical content - likely command output (Bash)
|
2026
|
+
elif len(content) < 100 and any(char in content for char in ['$', '/', '-', '=']):
|
2027
|
+
return 'Bash'
|
2028
|
+
|
2029
|
+
# Check for error indicators
|
2030
|
+
if tool_result.get('is_error'):
|
2031
|
+
error_content = tool_result.get('content', '')
|
2032
|
+
if 'permission' in error_content.lower():
|
2033
|
+
return 'permission_denied'
|
2034
|
+
elif 'not found' in error_content.lower():
|
2035
|
+
return 'file_not_found'
|
2036
|
+
|
2037
|
+
except Exception as e:
|
2038
|
+
# If pattern matching fails, fall back to tool_use_id
|
2039
|
+
pass
|
2040
|
+
|
2041
|
+
# Enhanced fallback patterns for simple/minimal outputs before generic fallback
|
2042
|
+
content = tool_result.get('content', '')
|
2043
|
+
if isinstance(content, str) and content.strip():
|
2044
|
+
content_stripped = content.strip()
|
2045
|
+
content_lower = content_stripped.lower()
|
2046
|
+
|
2047
|
+
# Simple git branch names (common in GitIssueProgressor)
|
2048
|
+
if (len(content_stripped) < 50 and
|
2049
|
+
any(branch in content_lower for branch in ['develop', 'main', 'feature', 'bugfix', 'release']) and
|
2050
|
+
'-' in content_stripped and not ' ' in content_stripped):
|
2051
|
+
return 'Bash'
|
2052
|
+
|
2053
|
+
# Simple file paths or single values
|
2054
|
+
if (len(content_stripped) < 100 and
|
2055
|
+
('/' in content_stripped or '.' in content_stripped) and
|
2056
|
+
not ' ' in content_stripped and not '\n' in content_stripped):
|
2057
|
+
return 'Bash'
|
2058
|
+
|
2059
|
+
# Very short responses that are likely command outputs
|
2060
|
+
if len(content_stripped) < 20 and not any(char in content_stripped for char in ['<', '>', '{', '}']):
|
2061
|
+
return 'Bash'
|
2062
|
+
|
2063
|
+
# GitHub issue URLs or numbers (from GitIssueProgressor)
|
2064
|
+
if ('github.com' in content_lower and 'issues' in content_lower) or \
|
2065
|
+
(content_stripped.isdigit() and len(content_stripped) <= 4):
|
2066
|
+
return 'WebFetch'
|
2067
|
+
|
2068
|
+
# Date/time formats (common command outputs)
|
2069
|
+
if any(pattern in content_stripped for pattern in [
|
2070
|
+
'-', ':', 'UTC', 'GMT', 'AM', 'PM'
|
2071
|
+
]) and (len(content_stripped.split()) <= 5):
|
2072
|
+
# Simple date/time patterns
|
2073
|
+
if any(char.isdigit() for char in content_stripped):
|
2074
|
+
return 'Bash'
|
2075
|
+
|
2076
|
+
# Import/success messages (from Python imports or similar)
|
2077
|
+
if any(pattern in content_lower for pattern in [
|
2078
|
+
'import', 'successful', 'successfully', 'ā
', 'completed', 'finished'
|
2079
|
+
]):
|
2080
|
+
return 'Bash'
|
2081
|
+
|
2082
|
+
# Absolute file paths
|
2083
|
+
if content_stripped.startswith('/') and len(content_stripped.split()) == 1:
|
2084
|
+
return 'Bash'
|
2085
|
+
|
2086
|
+
# Any other single-line simple responses (catch-all for remaining cases)
|
2087
|
+
if '\n' not in content_stripped and len(content_stripped) < 100:
|
2088
|
+
return 'Bash'
|
2089
|
+
|
2090
|
+
# Fallback to generic name with partial tool_use_id for tracking
|
2091
|
+
short_id = tool_use_id[-8:] if len(tool_use_id) > 8 else tool_use_id
|
2092
|
+
return f"tool_{short_id}"
|
2093
|
+
|
2094
|
+
def _estimate_tool_tokens(self, tool_data: dict, is_tool_use: bool = False) -> int:
|
2095
|
+
"""Estimate token usage for a tool based on content size"""
|
2096
|
+
try:
|
2097
|
+
if is_tool_use:
|
2098
|
+
# For tool_use, estimate based on input parameters
|
2099
|
+
input_data = tool_data.get('input', {})
|
2100
|
+
if isinstance(input_data, dict):
|
2101
|
+
# Rough estimation: ~4 characters per token
|
2102
|
+
text_content = str(input_data)
|
2103
|
+
return max(10, len(text_content) // 4) # Minimum 10 tokens for tool invocation
|
2104
|
+
return 10 # Base cost for tool invocation
|
2105
|
+
else:
|
2106
|
+
# For tool_result, estimate based on content size
|
2107
|
+
content = tool_data.get('content', '')
|
2108
|
+
if isinstance(content, str):
|
2109
|
+
# Rough estimation: ~4 characters per token for output
|
2110
|
+
base_tokens = len(content) // 4
|
2111
|
+
|
2112
|
+
# Add overhead for tool processing
|
2113
|
+
overhead = 20 # Base overhead for tool execution
|
2114
|
+
|
2115
|
+
# Adjust based on content type
|
2116
|
+
if len(content) > 5000: # Large content (like file reads)
|
2117
|
+
overhead += 50
|
2118
|
+
elif len(content) > 1000: # Medium content (like directory listings)
|
2119
|
+
overhead += 20
|
2120
|
+
|
2121
|
+
return max(base_tokens + overhead, 25) # Minimum 25 tokens for any tool result
|
2122
|
+
|
2123
|
+
return 25 # Base tokens for tool result
|
2124
|
+
|
2125
|
+
except Exception as e:
|
2126
|
+
# Fallback to base estimation if any errors occur
|
2127
|
+
return 15 if is_tool_use else 30
|
2128
|
+
|
2129
|
+
def get_status_summary(self) -> Dict:
|
2130
|
+
"""Get summary of all instance statuses"""
|
2131
|
+
summary = {
|
2132
|
+
"total_instances": len(self.instances),
|
2133
|
+
"completed": 0,
|
2134
|
+
"failed": 0,
|
2135
|
+
"running": 0,
|
2136
|
+
"pending": 0,
|
2137
|
+
"instances": {}
|
2138
|
+
}
|
2139
|
+
|
2140
|
+
for name, status in self.statuses.items():
|
2141
|
+
status_dict = asdict(status)
|
2142
|
+
|
2143
|
+
# Convert set to list for JSON serialization
|
2144
|
+
if isinstance(status_dict.get("processed_message_ids"), set):
|
2145
|
+
status_dict["processed_message_ids"] = list(status_dict["processed_message_ids"])
|
2146
|
+
|
2147
|
+
summary["instances"][name] = status_dict
|
2148
|
+
summary[status.status] += 1
|
2149
|
+
|
2150
|
+
# Add duration if completed
|
2151
|
+
if status.start_time and status.end_time:
|
2152
|
+
duration = status.end_time - status.start_time
|
2153
|
+
summary["instances"][name]["duration"] = f"{duration:.2f}s"
|
2154
|
+
|
2155
|
+
return summary
|
2156
|
+
|
2157
|
+
|
2158
|
+
|
2159
|
+
|
2160
|
+
def parse_start_time(start_at_str: str) -> datetime:
|
2161
|
+
"""Parse start time specification into a datetime object"""
|
2162
|
+
if not start_at_str:
|
2163
|
+
return datetime.now()
|
2164
|
+
|
2165
|
+
start_at_str = start_at_str.strip().lower()
|
2166
|
+
now = datetime.now()
|
2167
|
+
|
2168
|
+
# Relative time patterns (e.g., "2h", "30m", "45s")
|
2169
|
+
relative_match = re.match(r'^(\d+(?:\.\d+)?)\s*([hms])$', start_at_str)
|
2170
|
+
if relative_match:
|
2171
|
+
value = float(relative_match.group(1))
|
2172
|
+
unit = relative_match.group(2)
|
2173
|
+
|
2174
|
+
if unit == 'h':
|
2175
|
+
target_time = now + timedelta(hours=value)
|
2176
|
+
elif unit == 'm':
|
2177
|
+
target_time = now + timedelta(minutes=value)
|
2178
|
+
elif unit == 's':
|
2179
|
+
target_time = now + timedelta(seconds=value)
|
2180
|
+
|
2181
|
+
return target_time
|
2182
|
+
|
2183
|
+
# Named time patterns (e.g., "1am", "2:30pm", "14:30")
|
2184
|
+
# Handle formats like "1am", "2pm", "10:30am", "14:30"
|
2185
|
+
time_patterns = [
|
2186
|
+
(r'^(\d{1,2})\s*am$', lambda h: (int(h) % 12, 0)), # 1am -> (1, 0)
|
2187
|
+
(r'^(\d{1,2})\s*pm$', lambda h: ((int(h) % 12) + 12, 0)), # 1pm -> (13, 0)
|
2188
|
+
(r'^(\d{1,2}):(\d{2})\s*am$', lambda h, m: (int(h) % 12, int(m))), # 10:30am -> (10, 30)
|
2189
|
+
(r'^(\d{1,2}):(\d{2})\s*pm$', lambda h, m: ((int(h) % 12) + 12, int(m))), # 2:30pm -> (14, 30)
|
2190
|
+
(r'^(\d{1,2}):(\d{2})$', lambda h, m: (int(h), int(m))) # 14:30 -> (14, 30)
|
2191
|
+
]
|
2192
|
+
|
2193
|
+
for pattern, time_func in time_patterns:
|
2194
|
+
match = re.match(pattern, start_at_str)
|
2195
|
+
if match:
|
2196
|
+
if len(match.groups()) == 1:
|
2197
|
+
hour, minute = time_func(match.group(1))
|
2198
|
+
else:
|
2199
|
+
hour, minute = time_func(match.group(1), match.group(2))
|
2200
|
+
|
2201
|
+
# Create target time for today
|
2202
|
+
target_time = now.replace(hour=hour, minute=minute, second=0, microsecond=0)
|
2203
|
+
|
2204
|
+
# If the time has already passed today, schedule for tomorrow
|
2205
|
+
if target_time <= now:
|
2206
|
+
target_time += timedelta(days=1)
|
2207
|
+
|
2208
|
+
return target_time
|
2209
|
+
|
2210
|
+
# If no pattern matches, raise an error
|
2211
|
+
raise ValueError(f"Invalid start time format: '{start_at_str}'. "
|
2212
|
+
f"Supported formats: '2h' (2 hours), '30m' (30 minutes), '14:30' (2:30 PM), '1am', '2:30pm'")
|
2213
|
+
|
2214
|
+
def create_default_instances(output_format: str = "stream-json") -> List[InstanceConfig]:
|
2215
|
+
"""Create default instance configurations"""
|
2216
|
+
return [
|
2217
|
+
InstanceConfig(
|
2218
|
+
name="analyze-repo",
|
2219
|
+
command="/analyze-repository",
|
2220
|
+
description="Analyze the repository structure and codebase",
|
2221
|
+
# permission_mode will be auto-set based on platform
|
2222
|
+
output_format=output_format,
|
2223
|
+
max_tokens_per_command=5000
|
2224
|
+
),
|
2225
|
+
InstanceConfig(
|
2226
|
+
name="help-overview",
|
2227
|
+
command="/README",
|
2228
|
+
description="Show project README and overview information",
|
2229
|
+
# permission_mode will be auto-set based on platform
|
2230
|
+
output_format=output_format,
|
2231
|
+
max_tokens_per_command=1000
|
2232
|
+
)
|
2233
|
+
]
|
2234
|
+
|
2235
|
+
def create_direct_instance(args, workspace: Path) -> Optional[InstanceConfig]:
|
2236
|
+
"""Create InstanceConfig from direct command arguments.
|
2237
|
+
|
2238
|
+
Args:
|
2239
|
+
args: Parsed command line arguments
|
2240
|
+
workspace: Working directory path
|
2241
|
+
|
2242
|
+
Returns:
|
2243
|
+
InstanceConfig if direct command provided, None otherwise
|
2244
|
+
|
2245
|
+
Raises:
|
2246
|
+
SystemExit: If command validation fails
|
2247
|
+
"""
|
2248
|
+
if not args.command:
|
2249
|
+
return None
|
2250
|
+
|
2251
|
+
# Create temporary orchestrator to validate command
|
2252
|
+
# Note: We use minimal initialization since we only need command validation
|
2253
|
+
temp_orchestrator = ClaudeInstanceOrchestrator(
|
2254
|
+
workspace,
|
2255
|
+
max_console_lines=0, # Minimal console output for validation
|
2256
|
+
startup_delay=0,
|
2257
|
+
quiet=True # Suppress output during validation
|
2258
|
+
)
|
2259
|
+
|
2260
|
+
# Validate command exists or allow as ad-hoc command
|
2261
|
+
available_commands = temp_orchestrator.discover_available_commands()
|
2262
|
+
is_predefined_command = args.command in available_commands
|
2263
|
+
|
2264
|
+
if not is_predefined_command:
|
2265
|
+
# Allow as ad-hoc command - log for transparency
|
2266
|
+
logger.info(f"Using ad-hoc command: {args.command}")
|
2267
|
+
logger.info(f"Note: This is not a predefined command from .claude/commands/")
|
2268
|
+
logger.info(f"Available predefined commands: {', '.join(sorted(available_commands))}")
|
2269
|
+
logger.info("Use 'zen --list-commands' to see all predefined commands with descriptions")
|
2270
|
+
else:
|
2271
|
+
logger.debug(f"Using predefined command: {args.command}")
|
2272
|
+
|
2273
|
+
# Generate instance name if not provided
|
2274
|
+
instance_name = args.instance_name
|
2275
|
+
if not instance_name:
|
2276
|
+
# Create readable name from command
|
2277
|
+
clean_command = args.command.strip('/')
|
2278
|
+
instance_name = f"direct-{clean_command}-{uuid4().hex[:8]}"
|
2279
|
+
|
2280
|
+
# Generate description if not provided
|
2281
|
+
instance_description = args.instance_description
|
2282
|
+
if not instance_description:
|
2283
|
+
instance_description = f"Direct execution of {args.command}"
|
2284
|
+
|
2285
|
+
# Create and return InstanceConfig
|
2286
|
+
return InstanceConfig(
|
2287
|
+
command=args.command,
|
2288
|
+
name=instance_name,
|
2289
|
+
description=instance_description,
|
2290
|
+
output_format=args.output_format,
|
2291
|
+
session_id=args.session_id,
|
2292
|
+
clear_history=args.clear_history,
|
2293
|
+
compact_history=args.compact_history,
|
2294
|
+
max_tokens_per_command=args.overall_token_budget
|
2295
|
+
)
|
2296
|
+
|
2297
|
+
async def main():
|
2298
|
+
"""Main orchestrator function"""
|
2299
|
+
parser = argparse.ArgumentParser(description="Claude Code Instance Orchestrator")
|
2300
|
+
|
2301
|
+
# Direct command argument (positional)
|
2302
|
+
parser.add_argument("command", nargs="?", help="Direct command to execute (e.g., '/analyze-code')")
|
2303
|
+
|
2304
|
+
parser.add_argument("--workspace", type=str, default=None,
|
2305
|
+
help="Workspace directory (default: auto-detect project root or current directory)")
|
2306
|
+
parser.add_argument("--config", type=Path, help="Custom instance configuration file")
|
2307
|
+
parser.add_argument("--dry-run", action="store_true", help="Show commands without running")
|
2308
|
+
parser.add_argument("--list-commands", action="store_true", help="List all available slash commands and exit")
|
2309
|
+
parser.add_argument("--inspect-command", type=str, help="Inspect a specific slash command and exit")
|
2310
|
+
parser.add_argument("--output-format", choices=["json", "stream-json"], default="stream-json",
|
2311
|
+
help="Output format for Claude instances (default: stream-json)")
|
2312
|
+
parser.add_argument("--timeout", type=int, default=10000,
|
2313
|
+
help="Timeout in seconds for each instance (default: 10000)")
|
2314
|
+
parser.add_argument("--max-console-lines", type=int, default=5,
|
2315
|
+
help="Maximum recent lines to show per instance on console (default: 5)")
|
2316
|
+
parser.add_argument("--quiet", action="store_true",
|
2317
|
+
help="Minimize console output, show only errors and final summaries")
|
2318
|
+
parser.add_argument("--log-level", choices=["silent", "concise", "detailed"], default=None,
|
2319
|
+
help="Set log level: 'silent' (errors only), 'concise' (essential progress + budget alerts, default), 'detailed' (all logging)")
|
2320
|
+
parser.add_argument("--verbose", action="store_true",
|
2321
|
+
help="Enable detailed logging (equivalent to --log-level detailed)")
|
2322
|
+
parser.add_argument("--startup-delay", type=float, default=5.0,
|
2323
|
+
help="Delay in seconds between launching each instance (default: 5.0)")
|
2324
|
+
parser.add_argument("--max-line-length", type=int, default=800,
|
2325
|
+
help="Maximum characters per line in console output (default: 500)")
|
2326
|
+
parser.add_argument("--status-report-interval", type=int, default=5,
|
2327
|
+
help="Seconds between rolling status reports (default: 5)")
|
2328
|
+
parser.add_argument("--start-at", type=str, default=None,
|
2329
|
+
help="Schedule orchestration to start at specific time. Examples: '2h' (2 hours from now), '30m' (30 minutes), '14:30' (2:30 PM today), '1am' (1 AM today/tomorrow)")
|
2330
|
+
|
2331
|
+
|
2332
|
+
# Direct command options
|
2333
|
+
parser.add_argument("--instance-name", type=str, help="Instance name for direct command execution")
|
2334
|
+
parser.add_argument("--instance-description", type=str, help="Instance description for direct command execution")
|
2335
|
+
parser.add_argument("--session-id", type=str, help="Session ID for direct command execution")
|
2336
|
+
parser.add_argument("--clear-history", action="store_true", help="Clear history before direct command execution")
|
2337
|
+
parser.add_argument("--compact-history", action="store_true", help="Compact history before direct command execution")
|
2338
|
+
|
2339
|
+
# Token budget arguments
|
2340
|
+
parser.add_argument("--overall-token-budget", type=int, default=None,
|
2341
|
+
help="Global token budget for the entire session.")
|
2342
|
+
parser.add_argument("--command-budget", action='append',
|
2343
|
+
help="Per-command budget in format: '/command_name=limit'. Can be used multiple times.")
|
2344
|
+
parser.add_argument("--budget-enforcement-mode", choices=["warn", "block"], default="warn",
|
2345
|
+
help="Action to take when a budget is exceeded: 'warn' (log and continue) or 'block' (prevent new instances).")
|
2346
|
+
parser.add_argument("--disable-budget-visuals", action="store_true",
|
2347
|
+
help="Disable budget visualization in status reports")
|
2348
|
+
|
2349
|
+
# Cost budget arguments (Issue #1347)
|
2350
|
+
parser.add_argument("--overall-cost-budget", type=float, default=None,
|
2351
|
+
help="Global cost budget for the entire session in USD (e.g., --overall-cost-budget 10.50).")
|
2352
|
+
parser.add_argument("--command-cost-budget", action='append',
|
2353
|
+
help="Per-command cost budget in format: '/command_name=cost'. Can be used multiple times (e.g., --command-cost-budget '/analyze=5.0').")
|
2354
|
+
parser.add_argument("--budget-parameter-type", choices=["tokens", "cost", "mixed"], default="tokens",
|
2355
|
+
help="Type of budget parameters to use: 'tokens' (default, backward compatible), 'cost' (USD-based), or 'mixed' (both).")
|
2356
|
+
|
2357
|
+
# New example and template commands
|
2358
|
+
parser.add_argument("--generate-example", type=str, metavar="TYPE",
|
2359
|
+
help="Generate example configuration (data_analysis, code_review, content_creation, testing_workflow, migration_workflow, debugging_workflow)")
|
2360
|
+
parser.add_argument("--list-examples", action="store_true",
|
2361
|
+
help="List all available example configurations")
|
2362
|
+
parser.add_argument("--show-prompt-template", action="store_true",
|
2363
|
+
help="Show LLM prompt template for configuration generation")
|
2364
|
+
|
2365
|
+
args = parser.parse_args()
|
2366
|
+
|
2367
|
+
# Initialize config budget settings (will be populated if config file is loaded)
|
2368
|
+
config_budget_settings = {}
|
2369
|
+
|
2370
|
+
# Determine workspace directory with auto-detection
|
2371
|
+
if args.workspace:
|
2372
|
+
workspace = Path(args.workspace).expanduser().resolve()
|
2373
|
+
else:
|
2374
|
+
# Auto-detect workspace: use parent directory of zen directory as default
|
2375
|
+
zen_script_path = Path(__file__).resolve()
|
2376
|
+
zen_dir = zen_script_path.parent
|
2377
|
+
|
2378
|
+
# Check if zen is in a subdirectory of a larger project
|
2379
|
+
potential_root = zen_dir.parent
|
2380
|
+
|
2381
|
+
# Look for common project indicators in parent directory
|
2382
|
+
project_indicators = ['.git', '.claude', 'package.json', 'setup.py', 'pyproject.toml', 'Cargo.toml']
|
2383
|
+
|
2384
|
+
if any((potential_root / indicator).exists() for indicator in project_indicators):
|
2385
|
+
workspace = potential_root
|
2386
|
+
logger.info(f"Auto-detected project root as workspace: {workspace}")
|
2387
|
+
else:
|
2388
|
+
# Fallback to current working directory if no project indicators found
|
2389
|
+
workspace = Path.cwd().resolve()
|
2390
|
+
logger.info(f"Using current working directory as workspace: {workspace}")
|
2391
|
+
|
2392
|
+
# If workspace is still the zen directory itself, use parent or current directory
|
2393
|
+
if workspace == zen_dir:
|
2394
|
+
workspace = zen_dir.parent if zen_dir.parent != zen_dir else Path.cwd().resolve()
|
2395
|
+
|
2396
|
+
# Verify workspace exists and is accessible
|
2397
|
+
if not workspace.exists():
|
2398
|
+
logger.error(f"Workspace directory does not exist: {workspace}")
|
2399
|
+
sys.exit(1)
|
2400
|
+
|
2401
|
+
if not workspace.is_dir():
|
2402
|
+
logger.error(f"Workspace path is not a directory: {workspace}")
|
2403
|
+
sys.exit(1)
|
2404
|
+
|
2405
|
+
# Check if it looks like a Claude Code workspace
|
2406
|
+
claude_dir = workspace / ".claude"
|
2407
|
+
if not claude_dir.exists():
|
2408
|
+
logger.warning(f"No .claude directory found in workspace: {workspace}")
|
2409
|
+
logger.warning("This might not be a Claude Code workspace")
|
2410
|
+
|
2411
|
+
logger.info(f"Using workspace: {workspace}")
|
2412
|
+
|
2413
|
+
# Load instance configurations with direct command precedence
|
2414
|
+
direct_instance = create_direct_instance(args, workspace)
|
2415
|
+
|
2416
|
+
if direct_instance:
|
2417
|
+
# Direct command mode - highest precedence
|
2418
|
+
instances = [direct_instance]
|
2419
|
+
logger.info(f"Executing direct command: {direct_instance.command}")
|
2420
|
+
|
2421
|
+
# Load budget settings from config file if available (for direct command mode)
|
2422
|
+
if args.config and args.config.exists():
|
2423
|
+
logger.info(f"Loading budget configuration from {args.config} (direct command mode)")
|
2424
|
+
with open(args.config) as f:
|
2425
|
+
config_data = json.load(f)
|
2426
|
+
budget_config = config_data.get("budget", {})
|
2427
|
+
if budget_config:
|
2428
|
+
config_budget_settings = budget_config
|
2429
|
+
logger.info(f"Loaded budget configuration from config file: {budget_config}")
|
2430
|
+
elif args.config and args.config.exists():
|
2431
|
+
# Config file mode - second precedence
|
2432
|
+
logger.info(f"Loading config from {args.config}")
|
2433
|
+
with open(args.config) as f:
|
2434
|
+
config_data = json.load(f)
|
2435
|
+
instances = [InstanceConfig(**inst) for inst in config_data["instances"]]
|
2436
|
+
|
2437
|
+
# Extract budget configuration from config file
|
2438
|
+
budget_config = config_data.get("budget", {})
|
2439
|
+
if budget_config:
|
2440
|
+
config_budget_settings = budget_config
|
2441
|
+
logger.info(f"Loaded budget configuration from config file: {budget_config}")
|
2442
|
+
else:
|
2443
|
+
# Default instances mode - lowest precedence
|
2444
|
+
logger.info("Using default instance configurations")
|
2445
|
+
instances = create_default_instances(args.output_format)
|
2446
|
+
|
2447
|
+
# Determine final budget settings - CLI args override config file
|
2448
|
+
final_overall_budget = args.overall_token_budget
|
2449
|
+
final_overall_cost_budget = args.overall_cost_budget
|
2450
|
+
final_budget_type = args.budget_parameter_type
|
2451
|
+
final_enforcement_mode = args.budget_enforcement_mode
|
2452
|
+
final_enable_visuals = not args.disable_budget_visuals
|
2453
|
+
|
2454
|
+
# Use config file values if CLI args weren't provided
|
2455
|
+
if final_overall_budget is None and "overall_budget" in config_budget_settings:
|
2456
|
+
final_overall_budget = config_budget_settings["overall_budget"]
|
2457
|
+
logger.info(f"Using overall token budget from config file: {final_overall_budget}")
|
2458
|
+
|
2459
|
+
if final_overall_cost_budget is None and "overall_cost_budget" in config_budget_settings:
|
2460
|
+
final_overall_cost_budget = config_budget_settings["overall_cost_budget"]
|
2461
|
+
logger.info(f"Using overall cost budget from config file: ${final_overall_cost_budget}")
|
2462
|
+
|
2463
|
+
if args.budget_parameter_type == "tokens" and "budget_type" in config_budget_settings:
|
2464
|
+
# Only use config if user didn't explicitly set CLI arg (default is "tokens")
|
2465
|
+
final_budget_type = config_budget_settings["budget_type"]
|
2466
|
+
logger.info(f"Using budget type from config file: {final_budget_type}")
|
2467
|
+
|
2468
|
+
if args.budget_enforcement_mode == "warn" and "enforcement_mode" in config_budget_settings:
|
2469
|
+
# Only use config if user didn't explicitly set CLI arg (default is "warn")
|
2470
|
+
final_enforcement_mode = config_budget_settings["enforcement_mode"]
|
2471
|
+
logger.info(f"Using enforcement mode from config file: {final_enforcement_mode}")
|
2472
|
+
|
2473
|
+
if not args.disable_budget_visuals and "disable_visuals" in config_budget_settings:
|
2474
|
+
# Only use config if user didn't explicitly disable visuals
|
2475
|
+
final_enable_visuals = not config_budget_settings["disable_visuals"]
|
2476
|
+
logger.info(f"Using budget visuals setting from config file: {final_enable_visuals}")
|
2477
|
+
|
2478
|
+
# Cost budget takes precedence over token budget if both are specified
|
2479
|
+
final_budget_for_manager = final_overall_cost_budget if final_overall_cost_budget is not None else final_overall_budget
|
2480
|
+
final_budget_type_for_manager = "cost" if final_overall_cost_budget is not None else final_budget_type
|
2481
|
+
|
2482
|
+
# Determine log level from arguments
|
2483
|
+
log_level = determine_log_level(args)
|
2484
|
+
|
2485
|
+
# Initialize orchestrator with console output settings
|
2486
|
+
max_lines = 0 if args.quiet else args.max_console_lines
|
2487
|
+
|
2488
|
+
# Check if command budgets are configured from config file or CLI args
|
2489
|
+
has_config_command_budgets = bool(config_budget_settings.get("command_budgets"))
|
2490
|
+
has_config_cost_budgets = bool(config_budget_settings.get("command_cost_budgets"))
|
2491
|
+
has_cli_command_budgets = bool(args.command_budget)
|
2492
|
+
has_cli_cost_budgets = bool(args.command_cost_budget)
|
2493
|
+
has_command_budgets = has_config_command_budgets or has_cli_command_budgets or has_config_cost_budgets or has_cli_cost_budgets
|
2494
|
+
|
2495
|
+
orchestrator = ClaudeInstanceOrchestrator(
|
2496
|
+
workspace,
|
2497
|
+
max_console_lines=max_lines,
|
2498
|
+
startup_delay=args.startup_delay,
|
2499
|
+
max_line_length=args.max_line_length,
|
2500
|
+
status_report_interval=args.status_report_interval,
|
2501
|
+
use_cloud_sql=args.use_cloud_sql,
|
2502
|
+
quiet=args.quiet,
|
2503
|
+
overall_token_budget=final_overall_budget,
|
2504
|
+
overall_cost_budget=final_overall_cost_budget,
|
2505
|
+
budget_type=final_budget_type_for_manager,
|
2506
|
+
budget_enforcement_mode=final_enforcement_mode,
|
2507
|
+
enable_budget_visuals=final_enable_visuals,
|
2508
|
+
has_command_budgets=has_command_budgets,
|
2509
|
+
log_level=log_level
|
2510
|
+
)
|
2511
|
+
|
2512
|
+
# Process per-command budgets from config file first, then CLI args (CLI overrides config)
|
2513
|
+
if orchestrator.budget_manager:
|
2514
|
+
# Load command budgets from config file
|
2515
|
+
config_command_budgets = config_budget_settings.get("command_budgets", {})
|
2516
|
+
for command_name, limit in config_command_budgets.items():
|
2517
|
+
try:
|
2518
|
+
orchestrator.budget_manager.set_command_budget(command_name, int(limit))
|
2519
|
+
logger.info(f"šÆ CONFIG BUDGET SET: {command_name} = {limit} tokens")
|
2520
|
+
except (ValueError, TypeError) as e:
|
2521
|
+
logger.error(f"Invalid command budget in config file: '{command_name}={limit}': {e}")
|
2522
|
+
|
2523
|
+
# Load command budgets from CLI args (these override config file)
|
2524
|
+
if args.command_budget:
|
2525
|
+
for budget_str in args.command_budget:
|
2526
|
+
try:
|
2527
|
+
command_name, limit = budget_str.split('=', 1)
|
2528
|
+
# Normalize command name by ensuring it starts with '/'
|
2529
|
+
command_name = command_name.strip()
|
2530
|
+
if not command_name.startswith('/'):
|
2531
|
+
command_name = '/' + command_name
|
2532
|
+
|
2533
|
+
orchestrator.budget_manager.set_command_budget(command_name, int(limit))
|
2534
|
+
logger.info(f"šÆ CLI BUDGET SET: {command_name} = {limit} tokens (overrides config)")
|
2535
|
+
|
2536
|
+
# DEBUG: Log all budget keys after setting
|
2537
|
+
logger.debug(f"š ALL BUDGET KEYS: {list(orchestrator.budget_manager.command_budgets.keys())}")
|
2538
|
+
except ValueError:
|
2539
|
+
logger.error(f"Invalid format for --command-budget: '{budget_str}'. Use '/command=limit'.")
|
2540
|
+
|
2541
|
+
# Load cost budgets from config file
|
2542
|
+
config_command_cost_budgets = config_budget_settings.get("command_cost_budgets", {})
|
2543
|
+
for command_name, limit in config_command_cost_budgets.items():
|
2544
|
+
try:
|
2545
|
+
orchestrator.budget_manager.set_command_cost_budget(command_name, float(limit))
|
2546
|
+
logger.info(f"šÆ CONFIG COST BUDGET SET: {command_name} = ${limit}")
|
2547
|
+
except (ValueError, TypeError, AttributeError) as e:
|
2548
|
+
logger.error(f"Invalid command cost budget in config file: '{command_name}=${limit}': {e}")
|
2549
|
+
|
2550
|
+
# Load cost budgets from CLI args (these override config file)
|
2551
|
+
if args.command_cost_budget:
|
2552
|
+
for budget_str in args.command_cost_budget:
|
2553
|
+
try:
|
2554
|
+
command_name, limit = budget_str.split('=', 1)
|
2555
|
+
# Normalize command name by ensuring it starts with '/'
|
2556
|
+
command_name = command_name.strip()
|
2557
|
+
if not command_name.startswith('/'):
|
2558
|
+
command_name = '/' + command_name
|
2559
|
+
|
2560
|
+
orchestrator.budget_manager.set_command_cost_budget(command_name, float(limit))
|
2561
|
+
logger.info(f"šÆ CLI COST BUDGET SET: {command_name} = ${limit} (overrides config)")
|
2562
|
+
|
2563
|
+
# DEBUG: Log all budget keys after setting
|
2564
|
+
logger.debug(f"š ALL COST BUDGET KEYS: {list(orchestrator.budget_manager.command_budgets.keys())}")
|
2565
|
+
except ValueError:
|
2566
|
+
logger.error(f"Invalid format for --command-cost-budget: '{budget_str}'. Use '/command=cost' (e.g., '/analyze=5.0').")
|
2567
|
+
except AttributeError:
|
2568
|
+
logger.error("Cost budgets require enhanced TokenBudgetManager - feature may not be available.")
|
2569
|
+
|
2570
|
+
# AUTO-BUDGET CREATION: Create command budgets from instance max_tokens_per_command
|
2571
|
+
# This ensures that max_tokens_per_command values from JSON configs automatically
|
2572
|
+
# create command budgets, solving the "None Configured" display issue
|
2573
|
+
logger.info("š AUTO-BUDGET: Scanning instances for max_tokens_per_command values...")
|
2574
|
+
auto_created_count = 0
|
2575
|
+
for instance in instances:
|
2576
|
+
if instance.max_tokens_per_command is not None:
|
2577
|
+
# ISSUE #1348 FIX: Use the SAME logic as _update_budget_tracking for consistency
|
2578
|
+
# This is critical - the budget key must match what budget tracking uses
|
2579
|
+
if instance.command and instance.command.strip().startswith('/'):
|
2580
|
+
# For slash commands, use base command for auto-budget (most common case)
|
2581
|
+
command_key = instance.command.rstrip(';').split()[0] if instance.command else instance.command
|
2582
|
+
logger.debug(f"šÆ AUTO-BUDGET: Slash command using base key: '{command_key}'")
|
2583
|
+
else:
|
2584
|
+
# For non-slash commands/prompts, always use the full command text as budget key
|
2585
|
+
command_key = instance.command.rstrip(';') if instance.command else instance.command
|
2586
|
+
logger.info(f"šÆ AUTO-BUDGET: Raw command will use full text as key: '{instance.command[:50]}...'")
|
2587
|
+
|
2588
|
+
# Only create auto-budget if no explicit budget already exists
|
2589
|
+
if command_key not in orchestrator.budget_manager.command_budgets:
|
2590
|
+
orchestrator.budget_manager.set_command_budget(command_key, instance.max_tokens_per_command)
|
2591
|
+
logger.info(f"šÆ AUTO-BUDGET CREATED: {command_key} = {instance.max_tokens_per_command} tokens (from max_tokens_per_command)")
|
2592
|
+
auto_created_count += 1
|
2593
|
+
else:
|
2594
|
+
logger.debug(f"š AUTO-BUDGET SKIPPED: {command_key} already has explicit budget")
|
2595
|
+
|
2596
|
+
if auto_created_count > 0:
|
2597
|
+
logger.info(f"ā
AUTO-BUDGET: Created {auto_created_count} automatic command budgets from max_tokens_per_command")
|
2598
|
+
else:
|
2599
|
+
logger.debug("š AUTO-BUDGET: No auto-budgets created (no max_tokens_per_command values found)")
|
2600
|
+
|
2601
|
+
# Handle command inspection modes
|
2602
|
+
if args.list_commands:
|
2603
|
+
print("Available Slash Commands:")
|
2604
|
+
print("=" * 50)
|
2605
|
+
commands = orchestrator.discover_available_commands()
|
2606
|
+
for cmd in commands:
|
2607
|
+
cmd_info = orchestrator.inspect_command(cmd)
|
2608
|
+
if cmd_info.get("exists"):
|
2609
|
+
frontmatter = cmd_info.get("frontmatter", {})
|
2610
|
+
description = frontmatter.get("description", "No description available")
|
2611
|
+
print(f"{cmd:25} - {description}")
|
2612
|
+
else:
|
2613
|
+
print(f"{cmd:25} - Built-in command")
|
2614
|
+
return
|
2615
|
+
|
2616
|
+
if args.inspect_command:
|
2617
|
+
cmd_info = orchestrator.inspect_command(args.inspect_command)
|
2618
|
+
print(f"Command: {args.inspect_command}")
|
2619
|
+
print("=" * 50)
|
2620
|
+
if cmd_info.get("exists"):
|
2621
|
+
print(f"File: {cmd_info.get('file_path')}")
|
2622
|
+
if cmd_info.get("frontmatter"):
|
2623
|
+
print("Configuration:")
|
2624
|
+
for key, value in cmd_info["frontmatter"].items():
|
2625
|
+
print(f" {key}: {value}")
|
2626
|
+
print("\nContent Preview:")
|
2627
|
+
print(cmd_info.get("content_preview", "No content available"))
|
2628
|
+
else:
|
2629
|
+
print("Command not found or is a built-in command")
|
2630
|
+
return
|
2631
|
+
|
2632
|
+
# Add instances to orchestrator
|
2633
|
+
for instance in instances:
|
2634
|
+
orchestrator.add_instance(instance)
|
2635
|
+
|
2636
|
+
if args.dry_run:
|
2637
|
+
logger.info("DRY RUN MODE - Commands that would be executed:")
|
2638
|
+
for name, config in orchestrator.instances.items():
|
2639
|
+
cmd = orchestrator.build_claude_command(config)
|
2640
|
+
print(f"{name}: {' '.join(cmd)}")
|
2641
|
+
|
2642
|
+
# Show budget configuration if enabled
|
2643
|
+
if orchestrator.budget_manager:
|
2644
|
+
from token_budget.visualization import render_progress_bar
|
2645
|
+
bm = orchestrator.budget_manager
|
2646
|
+
print(f"\n=== TOKEN BUDGET CONFIGURATION ===")
|
2647
|
+
|
2648
|
+
if bm.overall_budget:
|
2649
|
+
print(f"Overall Budget: {bm.overall_budget:,} tokens")
|
2650
|
+
else:
|
2651
|
+
print(f"Overall Budget: Unlimited")
|
2652
|
+
|
2653
|
+
print(f"Enforcement Mode: {bm.enforcement_mode.upper()}")
|
2654
|
+
|
2655
|
+
if bm.command_budgets:
|
2656
|
+
print(f"Command Budgets:")
|
2657
|
+
for name, budget_info in bm.command_budgets.items():
|
2658
|
+
print(f" {name:<30} {budget_info.limit:,} tokens")
|
2659
|
+
else:
|
2660
|
+
print(f"Command Budgets: None configured")
|
2661
|
+
|
2662
|
+
print(f"=====================================\n")
|
2663
|
+
|
2664
|
+
# Show scheduled start time if provided
|
2665
|
+
if args.start_at:
|
2666
|
+
try:
|
2667
|
+
target_time = parse_start_time(args.start_at)
|
2668
|
+
wait_seconds = (target_time - datetime.now()).total_seconds()
|
2669
|
+
logger.info(f"Orchestration would be scheduled to start at: {target_time.strftime('%Y-%m-%d %H:%M:%S')}")
|
2670
|
+
logger.info(f"Wait time would be: {wait_seconds:.1f} seconds ({wait_seconds/3600:.1f} hours)")
|
2671
|
+
except ValueError as e:
|
2672
|
+
logger.error(f"Invalid start time: {e}")
|
2673
|
+
return
|
2674
|
+
|
2675
|
+
# Handle scheduled start time
|
2676
|
+
if args.start_at:
|
2677
|
+
try:
|
2678
|
+
target_time = parse_start_time(args.start_at)
|
2679
|
+
now = datetime.now()
|
2680
|
+
wait_seconds = (target_time - now).total_seconds()
|
2681
|
+
|
2682
|
+
if wait_seconds <= 0:
|
2683
|
+
logger.warning(f"Target time {target_time.strftime('%Y-%m-%d %H:%M:%S')} is in the past, starting immediately")
|
2684
|
+
else:
|
2685
|
+
logger.info(f"Orchestration scheduled to start at: {target_time.strftime('%Y-%m-%d %H:%M:%S')}")
|
2686
|
+
logger.info(f"Waiting {wait_seconds:.1f} seconds ({wait_seconds/3600:.1f} hours) until start time...")
|
2687
|
+
|
2688
|
+
# Show countdown for long waits
|
2689
|
+
if wait_seconds > 60:
|
2690
|
+
# Show periodic countdown updates
|
2691
|
+
countdown_intervals = [3600, 1800, 900, 300, 60, 30, 10] # 1h, 30m, 15m, 5m, 1m, 30s, 10s
|
2692
|
+
|
2693
|
+
while wait_seconds > 0:
|
2694
|
+
# Find the next appropriate countdown interval
|
2695
|
+
next_update = None
|
2696
|
+
for interval in countdown_intervals:
|
2697
|
+
if wait_seconds > interval:
|
2698
|
+
next_update = interval
|
2699
|
+
break
|
2700
|
+
|
2701
|
+
if next_update:
|
2702
|
+
sleep_time = wait_seconds - next_update
|
2703
|
+
await asyncio.sleep(sleep_time)
|
2704
|
+
wait_seconds = next_update
|
2705
|
+
hours = wait_seconds // 3600
|
2706
|
+
minutes = (wait_seconds % 3600) // 60
|
2707
|
+
seconds = wait_seconds % 60
|
2708
|
+
if hours > 0:
|
2709
|
+
logger.info(f"Orchestration starts in {int(hours)}h {int(minutes)}m")
|
2710
|
+
elif minutes > 0:
|
2711
|
+
logger.info(f"Orchestration starts in {int(minutes)}m {int(seconds)}s")
|
2712
|
+
else:
|
2713
|
+
logger.info(f"Orchestration starts in {int(seconds)}s")
|
2714
|
+
else:
|
2715
|
+
# Final countdown
|
2716
|
+
await asyncio.sleep(wait_seconds)
|
2717
|
+
wait_seconds = 0
|
2718
|
+
else:
|
2719
|
+
# For short waits, just sleep
|
2720
|
+
await asyncio.sleep(wait_seconds)
|
2721
|
+
|
2722
|
+
logger.info("Scheduled start time reached - beginning orchestration")
|
2723
|
+
except ValueError as e:
|
2724
|
+
logger.error(f"Invalid start time: {e}")
|
2725
|
+
sys.exit(1)
|
2726
|
+
|
2727
|
+
# Run all instances
|
2728
|
+
logger.info("Starting Claude Code instance orchestration")
|
2729
|
+
if args.use_cloud_sql:
|
2730
|
+
logger.info(f"Batch ID: {orchestrator.batch_id}")
|
2731
|
+
logger.info("Metrics will be saved to CloudSQL")
|
2732
|
+
start_time = time.time()
|
2733
|
+
|
2734
|
+
results = await orchestrator.run_all_instances(args.timeout)
|
2735
|
+
|
2736
|
+
end_time = time.time()
|
2737
|
+
total_duration = end_time - start_time
|
2738
|
+
|
2739
|
+
# Print summary with token usage
|
2740
|
+
summary = orchestrator.get_status_summary()
|
2741
|
+
total_tokens = sum(status.total_tokens for status in orchestrator.statuses.values())
|
2742
|
+
total_cached = sum(status.cached_tokens for status in orchestrator.statuses.values())
|
2743
|
+
total_tool_calls = sum(status.tool_calls for status in orchestrator.statuses.values())
|
2744
|
+
cache_rate = round(total_cached / max(total_tokens, 1) * 100, 1) if total_tokens > 0 else 0
|
2745
|
+
|
2746
|
+
logger.info(f"Orchestration completed in {total_duration:.2f}s")
|
2747
|
+
logger.info(f"Results: {summary['completed']} completed, {summary['failed']} failed")
|
2748
|
+
logger.info(f"Token Usage: {total_tokens:,} total ({total_cached:,} cached, {cache_rate}% hit rate), {total_tool_calls} tool calls")
|
2749
|
+
|
2750
|
+
# Add cost transparency to final summary
|
2751
|
+
if orchestrator.pricing_engine:
|
2752
|
+
total_cost = sum(orchestrator._calculate_cost(status) for status in orchestrator.statuses.values())
|
2753
|
+
logger.info(f"š° Total Cost: ${total_cost:.4f} (Claude pricing compliant)")
|
2754
|
+
|
2755
|
+
# Note: ZEN provides summary only - upgrade to Apex for detailed data access
|
2756
|
+
|
2757
|
+
# Print detailed results in table format
|
2758
|
+
print("\n" + "="*120)
|
2759
|
+
print("NETRA ZEN RESULTS")
|
2760
|
+
print("="*120)
|
2761
|
+
|
2762
|
+
if orchestrator.statuses:
|
2763
|
+
# Table headers with separated cache metrics
|
2764
|
+
headers = ["Instance", "Status", "Duration", "Total Tokens", "Input", "Output", "Cache Cr", "Cache Rd", "Tools", "Cost"]
|
2765
|
+
col_widths = [20, 10, 10, 12, 8, 8, 8, 8, 6, 10]
|
2766
|
+
|
2767
|
+
# Print header
|
2768
|
+
header_row = "| " + " | ".join(h.ljust(w) for h, w in zip(headers, col_widths)) + " |"
|
2769
|
+
print("+" + "=" * (len(header_row) - 2) + "+")
|
2770
|
+
print(header_row)
|
2771
|
+
print("+" + "-" * (len(header_row) - 2) + "+")
|
2772
|
+
|
2773
|
+
# Print data rows
|
2774
|
+
for name, status in orchestrator.statuses.items():
|
2775
|
+
# Prepare row data
|
2776
|
+
instance_name = name[:19] if len(name) > 19 else name
|
2777
|
+
status_str = status.status
|
2778
|
+
duration_str = f"{status.end_time - status.start_time:.1f}s" if status.start_time and status.end_time else "N/A"
|
2779
|
+
total_tokens_str = f"{status.total_tokens:,}" if status.total_tokens > 0 else "0"
|
2780
|
+
input_tokens_str = f"{status.input_tokens:,}" if status.input_tokens > 0 else "0"
|
2781
|
+
output_tokens_str = f"{status.output_tokens:,}" if status.output_tokens > 0 else "0"
|
2782
|
+
cache_creation_str = f"{status.cache_creation_tokens:,}" if status.cache_creation_tokens > 0 else "0"
|
2783
|
+
cache_read_str = f"{status.cache_read_tokens:,}" if status.cache_read_tokens > 0 else "0"
|
2784
|
+
tools_str = str(status.tool_calls) if status.tool_calls > 0 else "0"
|
2785
|
+
# Calculate cost - use the pricing engine
|
2786
|
+
if status.total_cost_usd is not None:
|
2787
|
+
cost_str = f"${status.total_cost_usd:.4f}"
|
2788
|
+
else:
|
2789
|
+
# Calculate cost using the pricing engine
|
2790
|
+
calculated_cost = orchestrator._calculate_cost(status)
|
2791
|
+
cost_str = f"${calculated_cost:.4f}" if calculated_cost > 0 else "N/A"
|
2792
|
+
|
2793
|
+
row_data = [instance_name, status_str, duration_str, total_tokens_str, input_tokens_str,
|
2794
|
+
output_tokens_str, cache_creation_str, cache_read_str, tools_str, cost_str]
|
2795
|
+
|
2796
|
+
row = "| " + " | ".join(data.ljust(w) for data, w in zip(row_data, col_widths)) + " |"
|
2797
|
+
print(row)
|
2798
|
+
|
2799
|
+
print("+" + "=" * (len(header_row) - 2) + "+")
|
2800
|
+
|
2801
|
+
# Check for permission errors FIRST - Issue #1320
|
2802
|
+
permission_errors = []
|
2803
|
+
for name, status in orchestrator.statuses.items():
|
2804
|
+
if status.error and any(phrase in status.error.lower() for phrase in [
|
2805
|
+
'permission error', 'requires approval', 'permission denied'
|
2806
|
+
]):
|
2807
|
+
permission_errors.append((name, status))
|
2808
|
+
|
2809
|
+
# Display CRITICAL permission errors prominently
|
2810
|
+
if permission_errors:
|
2811
|
+
print(f"""
|
2812
|
+
+============================================================================================+
|
2813
|
+
| šØšØšØ CRITICAL: {len(permission_errors)} PERMISSION ERROR(S) DETECTED - COMMANDS WERE BLOCKED! šØšØšØ |
|
2814
|
+
+============================================================================================+
|
2815
|
+
| Platform: {platform.system():<80}|
|
2816
|
+
| Permission Mode Used: {orchestrator.instances[permission_errors[0][0]].permission_mode if permission_errors else 'Unknown':<68}|
|
2817
|
+
+============================================================================================+
|
2818
|
+
""")
|
2819
|
+
for name, status in permission_errors:
|
2820
|
+
error_preview = status.error.replace('\n', ' ')[:70]
|
2821
|
+
print(f"| ā {name:<20} | {error_preview:<68} |")
|
2822
|
+
print(f"""+============================================================================================+
|
2823
|
+
| SOLUTION: zen_orchestrator.py defaults to bypassPermissions to avoid approval prompts |
|
2824
|
+
| ⢠Default: bypassPermissions (works on all platforms) |
|
2825
|
+
| ⢠Users can override via permission_mode in config if needed |
|
2826
|
+
| |
|
2827
|
+
| If still seeing errors, manually set permission mode in your config or update Claude Code. |
|
2828
|
+
+============================================================================================+
|
2829
|
+
""")
|
2830
|
+
|
2831
|
+
# Print additional details if there are outputs or errors
|
2832
|
+
print("\nAdditional Details:")
|
2833
|
+
print("-" * 40)
|
2834
|
+
for name, status in orchestrator.statuses.items():
|
2835
|
+
has_details = False
|
2836
|
+
|
2837
|
+
if status.output:
|
2838
|
+
if not has_details:
|
2839
|
+
print(f"\n{name.upper()}:")
|
2840
|
+
has_details = True
|
2841
|
+
print(f" Output Preview: {status.output[:150]}...")
|
2842
|
+
|
2843
|
+
if status.error:
|
2844
|
+
if not has_details:
|
2845
|
+
print(f"\n{name.upper()}:")
|
2846
|
+
has_details = True
|
2847
|
+
# Highlight permission errors differently
|
2848
|
+
if any(phrase in status.error.lower() for phrase in ['permission error', 'requires approval']):
|
2849
|
+
print(f" ā ļø PERMISSION ERROR: {status.error[:150]}...")
|
2850
|
+
else:
|
2851
|
+
print(f" Errors: {status.error[:150]}...")
|
2852
|
+
|
2853
|
+
if status.tool_calls > 0 and status.tool_details:
|
2854
|
+
if not has_details:
|
2855
|
+
print(f"\n{name.upper()}:")
|
2856
|
+
has_details = True
|
2857
|
+
print(f" Tools Used ({status.tool_calls}): {', '.join(status.tool_details)}")
|
2858
|
+
else:
|
2859
|
+
print("No instances were processed.")
|
2860
|
+
|
2861
|
+
# For detailed data access
|
2862
|
+
print("\n" + "="*80)
|
2863
|
+
print("š Looking for more?")
|
2864
|
+
print("="*80)
|
2865
|
+
print("Explore Zen with Apex for the most effective AI Ops value for production AI.")
|
2866
|
+
print("")
|
2867
|
+
print("š Learn more: https://netrasystems.ai/")
|
2868
|
+
print("="*80)
|
2869
|
+
|
2870
|
+
# Show CloudSQL info if enabled
|
2871
|
+
if args.use_cloud_sql:
|
2872
|
+
print(f"\nš Local metrics displayed above")
|
2873
|
+
print(f" Batch ID: {orchestrator.batch_id}")
|
2874
|
+
print(f" Database persistence disabled for security")
|
2875
|
+
|
2876
|
+
# Exit with appropriate code
|
2877
|
+
sys.exit(0 if summary['failed'] == 0 else 1)
|
2878
|
+
|
2879
|
+
def run():
|
2880
|
+
"""Synchronous wrapper for the main function to be used as console script entry point."""
|
2881
|
+
asyncio.run(main())
|
2882
|
+
|
2883
|
+
if __name__ == "__main__":
|
2884
|
+
run()
|