cade-cli 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. cade_cli-0.3.3.dist-info/METADATA +151 -0
  2. cade_cli-0.3.3.dist-info/RECORD +44 -0
  3. cade_cli-0.3.3.dist-info/WHEEL +4 -0
  4. cade_cli-0.3.3.dist-info/entry_points.txt +2 -0
  5. cadecoder/__init__.py +1 -0
  6. cadecoder/ai/__init__.py +6 -0
  7. cadecoder/ai/prompts.py +572 -0
  8. cadecoder/cli/__init__.py +0 -0
  9. cadecoder/cli/app.py +147 -0
  10. cadecoder/cli/auth.py +483 -0
  11. cadecoder/cli/commands/__init__.py +5 -0
  12. cadecoder/cli/commands/auth.py +143 -0
  13. cadecoder/cli/commands/chat.py +264 -0
  14. cadecoder/cli/commands/mcp.py +477 -0
  15. cadecoder/cli/commands/tools.py +226 -0
  16. cadecoder/core/__init__.py +12 -0
  17. cadecoder/core/config.py +380 -0
  18. cadecoder/core/constants.py +281 -0
  19. cadecoder/core/errors.py +145 -0
  20. cadecoder/core/logging.py +148 -0
  21. cadecoder/core/types.py +235 -0
  22. cadecoder/core/utils.py +279 -0
  23. cadecoder/execution/__init__.py +46 -0
  24. cadecoder/execution/context_window.py +521 -0
  25. cadecoder/execution/orchestrator.py +562 -0
  26. cadecoder/execution/parallel.py +287 -0
  27. cadecoder/providers/__init__.py +60 -0
  28. cadecoder/providers/base.py +294 -0
  29. cadecoder/providers/openai.py +251 -0
  30. cadecoder/storage/__init__.py +0 -0
  31. cadecoder/storage/threads.py +489 -0
  32. cadecoder/templates/login_failed.html +21 -0
  33. cadecoder/templates/login_success.html +21 -0
  34. cadecoder/templates/styles.css +87 -0
  35. cadecoder/tools/__init__.py +19 -0
  36. cadecoder/tools/builtin.py +644 -0
  37. cadecoder/tools/filesystem.py +315 -0
  38. cadecoder/tools/git.py +221 -0
  39. cadecoder/tools/manager.py +1635 -0
  40. cadecoder/ui/__init__.py +7 -0
  41. cadecoder/ui/display.py +338 -0
  42. cadecoder/ui/input.py +145 -0
  43. cadecoder/ui/session.py +455 -0
  44. cadecoder/ui/state.py +20 -0
@@ -0,0 +1,521 @@
1
+ """Context window management for the agent.
2
+
3
+ Provides token tracking, context compaction, and backup functionality
4
+ to manage the LLM context window effectively.
5
+ """
6
+
7
+ import json
8
+ import logging
9
+ from dataclasses import dataclass, field
10
+ from datetime import datetime
11
+ from enum import Enum
12
+ from pathlib import Path
13
+ from typing import Any
14
+
15
+ # Module-level logger (avoids circular import)
16
+ log = logging.getLogger("cadecoder")
17
+
18
+
19
+ class CompactionStrategy(str, Enum):
20
+ """Strategy for compacting context when window is full."""
21
+
22
+ KEEP_RECENT = "keep_recent" # Keep most recent messages
23
+ SUMMARIZE_EARLY = "summarize_early" # Summarize early messages, keep recent
24
+ KEEP_TOOL_RESULTS_FINAL = "keep_tool_results_final" # Only keep final tool results
25
+ DROP_TOOL_OUTPUTS = "drop_tool_outputs" # Remove tool outputs, keep structure
26
+
27
+
28
+ @dataclass
29
+ class TokenEstimate:
30
+ """Token estimate for a message or context."""
31
+
32
+ text_tokens: int
33
+ tool_call_tokens: int
34
+ total: int
35
+
36
+ @classmethod
37
+ def from_text(cls, text: str, chars_per_token: float = 4.0) -> "TokenEstimate":
38
+ """Estimate tokens from text using character ratio."""
39
+ text_tokens = int(len(text) / chars_per_token)
40
+ return cls(text_tokens=text_tokens, tool_call_tokens=0, total=text_tokens)
41
+
42
+ @classmethod
43
+ def from_message(cls, message: dict[str, Any], chars_per_token: float = 4.0) -> "TokenEstimate":
44
+ """Estimate tokens from a message dict."""
45
+ text_tokens = 0
46
+ tool_tokens = 0
47
+
48
+ content = message.get("content", "")
49
+ if content:
50
+ text_tokens = int(len(content) / chars_per_token)
51
+
52
+ # Count tool calls
53
+ tool_calls = message.get("tool_calls", [])
54
+ for tc in tool_calls:
55
+ func = tc.get("function", {})
56
+ name = func.get("name", "")
57
+ args = func.get("arguments", "")
58
+ tool_tokens += int((len(name) + len(args)) / chars_per_token)
59
+
60
+ return cls(
61
+ text_tokens=text_tokens,
62
+ tool_call_tokens=tool_tokens,
63
+ total=text_tokens + tool_tokens,
64
+ )
65
+
66
+
67
+ @dataclass
68
+ class ContextBackup:
69
+ """Backup of context before compaction."""
70
+
71
+ timestamp: datetime
72
+ messages: list[dict[str, Any]]
73
+ token_count: int
74
+ compaction_reason: str
75
+
76
+ def to_dict(self) -> dict[str, Any]:
77
+ """Convert to dictionary for JSON serialization."""
78
+ return {
79
+ "timestamp": self.timestamp.isoformat(),
80
+ "messages": self.messages,
81
+ "token_count": self.token_count,
82
+ "compaction_reason": self.compaction_reason,
83
+ }
84
+
85
+ @classmethod
86
+ def from_dict(cls, data: dict[str, Any]) -> "ContextBackup":
87
+ """Create from dictionary."""
88
+ return cls(
89
+ timestamp=datetime.fromisoformat(data["timestamp"]),
90
+ messages=data["messages"],
91
+ token_count=data["token_count"],
92
+ compaction_reason=data["compaction_reason"],
93
+ )
94
+
95
+
96
+ @dataclass
97
+ class ToolOutputCollection:
98
+ """Collection of tool outputs with different retention strategies."""
99
+
100
+ all_outputs: list[dict[str, Any]] = field(default_factory=list)
101
+ final_outputs: dict[str, str] = field(default_factory=dict)
102
+
103
+ def add_output(self, tool_name: str, output: str, tool_call_id: str) -> None:
104
+ """Add a tool output to the collection."""
105
+ self.all_outputs.append(
106
+ {
107
+ "tool_name": tool_name,
108
+ "output": output,
109
+ "tool_call_id": tool_call_id,
110
+ "timestamp": datetime.now().isoformat(),
111
+ }
112
+ )
113
+ # Always update final output to latest
114
+ self.final_outputs[tool_name] = output
115
+
116
+ def get_all_outputs(self) -> list[dict[str, Any]]:
117
+ """Get all collected outputs."""
118
+ return self.all_outputs.copy()
119
+
120
+ def get_final_outputs(self) -> dict[str, str]:
121
+ """Get only the final output for each tool."""
122
+ return self.final_outputs.copy()
123
+
124
+ def get_total_size(self) -> int:
125
+ """Get total character size of all outputs."""
126
+ return sum(len(o.get("output", "")) for o in self.all_outputs)
127
+
128
+ def clear(self) -> None:
129
+ """Clear all collected outputs."""
130
+ self.all_outputs.clear()
131
+ self.final_outputs.clear()
132
+
133
+
134
+ class ContextWindowManager:
135
+ """Manages the LLM context window with compaction and backup support.
136
+
137
+ Tracks token usage, manages context size, and provides methods for
138
+ compacting context when the window is full.
139
+ """
140
+
141
+ # Model context limits (updated Jan 2026)
142
+ # OpenAI: https://platform.openai.com/docs/models
143
+ # Anthropic: https://docs.anthropic.com/en/docs/about-claude/models
144
+ MODEL_CONTEXT_LIMITS = {
145
+ # OpenAI GPT-4.1 family (1M context via API, max output 32,768)
146
+ "gpt-4.1": 1_000_000,
147
+ "gpt-4.1-mini": 1_000_000,
148
+ "gpt-4.1-nano": 1_000_000,
149
+ # OpenAI GPT-4o family
150
+ "gpt-4o": 128_000,
151
+ "gpt-4o-mini": 128_000,
152
+ # OpenAI legacy models
153
+ "gpt-4-turbo": 128_000,
154
+ "gpt-4": 8_192,
155
+ "gpt-3.5-turbo": 16_385,
156
+ # Anthropic Claude 4 family (200K standard, 1M beta for Sonnet 4)
157
+ "claude-opus-4.5": 200_000,
158
+ "claude-sonnet-4": 200_000,
159
+ # Anthropic Claude 3 family
160
+ "claude-3-opus": 200_000,
161
+ "claude-3-sonnet": 200_000,
162
+ "claude-3-haiku": 200_000,
163
+ "claude-3.5-sonnet": 200_000,
164
+ "claude-3.5-haiku": 200_000,
165
+ }
166
+
167
+ # Max output tokens per model (for reserving response space)
168
+ MODEL_MAX_OUTPUT = {
169
+ "gpt-4.1": 32_768,
170
+ "gpt-4.1-mini": 32_768,
171
+ "gpt-4.1-nano": 32_768,
172
+ "gpt-4o": 16_384,
173
+ "gpt-4o-mini": 16_384,
174
+ "gpt-4-turbo": 4_096,
175
+ "gpt-4": 4_096, # Legacy GPT-4 has smaller output
176
+ "gpt-3.5-turbo": 4_096,
177
+ "claude-opus-4.5": 8_192,
178
+ "claude-sonnet-4": 8_192,
179
+ "claude-3-opus": 4_096,
180
+ "claude-3-sonnet": 4_096,
181
+ "claude-3-haiku": 4_096,
182
+ "claude-3.5-sonnet": 8_192,
183
+ "claude-3.5-haiku": 8_192,
184
+ }
185
+
186
+ # Reserve tokens for response (fallback if model not in MODEL_MAX_OUTPUT)
187
+ # Uses 10% of context limit or 4096, whichever is smaller
188
+ RESPONSE_RESERVE = 4_096
189
+
190
+ # Compaction threshold (% of window used before compaction)
191
+ COMPACTION_THRESHOLD = 0.85
192
+
193
+ def __init__(
194
+ self,
195
+ model: str = "gpt-4.1",
196
+ backup_dir: Path | None = None,
197
+ chars_per_token: float = 4.0,
198
+ ) -> None:
199
+ """Initialize context window manager.
200
+
201
+ Args:
202
+ model: Model name for context limit lookup
203
+ backup_dir: Directory for storing context backups
204
+ chars_per_token: Character to token ratio for estimation
205
+ """
206
+ self.model = model
207
+ self.chars_per_token = chars_per_token
208
+
209
+ # Set context limit based on model (default to 128K for unknown models)
210
+ self.context_limit = self.MODEL_CONTEXT_LIMITS.get(model, 128_000)
211
+
212
+ # Set response reserve based on model's max output tokens
213
+ self.response_reserve = self.MODEL_MAX_OUTPUT.get(model, self.RESPONSE_RESERVE)
214
+ self.effective_limit = self.context_limit - self.response_reserve
215
+
216
+ # Backup directory
217
+ if backup_dir is None:
218
+ backup_dir = Path.home() / ".cadecoder" / "context_backups"
219
+ self.backup_dir = backup_dir
220
+ self.backup_dir.mkdir(parents=True, exist_ok=True)
221
+
222
+ # Tool output collection
223
+ self.tool_outputs = ToolOutputCollection()
224
+
225
+ # Current context tracking
226
+ self._current_token_count = 0
227
+ self._message_count = 0
228
+ self._backups: list[ContextBackup] = []
229
+
230
+ log.info(
231
+ f"ContextWindowManager initialized: model={model}, "
232
+ f"limit={self.context_limit:,}, effective={self.effective_limit:,}"
233
+ )
234
+
235
+ def estimate_tokens(self, messages: list[dict[str, Any]]) -> int:
236
+ """Estimate total tokens for a list of messages."""
237
+ total = 0
238
+ for msg in messages:
239
+ estimate = TokenEstimate.from_message(msg, self.chars_per_token)
240
+ total += estimate.total
241
+ return total
242
+
243
+ def estimate_message_tokens(self, message: dict[str, Any]) -> TokenEstimate:
244
+ """Estimate tokens for a single message."""
245
+ return TokenEstimate.from_message(message, self.chars_per_token)
246
+
247
+ def check_context_status(self, messages: list[dict[str, Any]]) -> dict[str, Any]:
248
+ """Check current context window status.
249
+
250
+ Returns:
251
+ Dict with token_count, percentage_used, needs_compaction, available_tokens
252
+ """
253
+ token_count = self.estimate_tokens(messages)
254
+ percentage_used = token_count / self.effective_limit
255
+ needs_compaction = percentage_used >= self.COMPACTION_THRESHOLD
256
+
257
+ return {
258
+ "token_count": token_count,
259
+ "percentage_used": round(percentage_used * 100, 1),
260
+ "needs_compaction": needs_compaction,
261
+ "available_tokens": self.effective_limit - token_count,
262
+ "effective_limit": self.effective_limit,
263
+ "message_count": len(messages),
264
+ }
265
+
266
+ def compact_context(
267
+ self,
268
+ messages: list[dict[str, Any]],
269
+ strategy: CompactionStrategy = CompactionStrategy.KEEP_RECENT,
270
+ target_percentage: float = 0.6,
271
+ ) -> tuple[list[dict[str, Any]], ContextBackup]:
272
+ """Compact context to reduce token usage.
273
+
274
+ Args:
275
+ messages: Current message list
276
+ strategy: Compaction strategy to use
277
+ target_percentage: Target percentage of context limit after compaction
278
+
279
+ Returns:
280
+ Tuple of (compacted_messages, backup)
281
+ """
282
+ # Create backup before compaction
283
+ current_tokens = self.estimate_tokens(messages)
284
+ backup = ContextBackup(
285
+ timestamp=datetime.now(),
286
+ messages=messages.copy(),
287
+ token_count=current_tokens,
288
+ compaction_reason=f"strategy={strategy.value}, threshold={self.COMPACTION_THRESHOLD}",
289
+ )
290
+
291
+ # Save backup to disk
292
+ self._save_backup(backup)
293
+ self._backups.append(backup)
294
+
295
+ target_tokens = int(self.effective_limit * target_percentage)
296
+
297
+ log.info(
298
+ f"Compacting context: {current_tokens:,} -> {target_tokens:,} tokens "
299
+ f"(strategy={strategy.value})"
300
+ )
301
+
302
+ if strategy == CompactionStrategy.KEEP_RECENT:
303
+ compacted = self._compact_keep_recent(messages, target_tokens)
304
+ elif strategy == CompactionStrategy.SUMMARIZE_EARLY:
305
+ compacted = self._compact_summarize_early(messages, target_tokens)
306
+ elif strategy == CompactionStrategy.KEEP_TOOL_RESULTS_FINAL:
307
+ compacted = self._compact_keep_final_tool_results(messages, target_tokens)
308
+ elif strategy == CompactionStrategy.DROP_TOOL_OUTPUTS:
309
+ compacted = self._compact_drop_tool_outputs(messages, target_tokens)
310
+ else:
311
+ compacted = self._compact_keep_recent(messages, target_tokens)
312
+
313
+ new_token_count = self.estimate_tokens(compacted)
314
+ log.info(
315
+ f"Compaction complete: {len(messages)} -> {len(compacted)} messages, "
316
+ f"{current_tokens:,} -> {new_token_count:,} tokens"
317
+ )
318
+
319
+ return compacted, backup
320
+
321
+ def _compact_keep_recent(
322
+ self, messages: list[dict[str, Any]], target_tokens: int
323
+ ) -> list[dict[str, Any]]:
324
+ """Keep only the most recent messages within token budget."""
325
+ # Always keep system message if present
326
+ system_msg = None
327
+ other_msgs = []
328
+
329
+ for msg in messages:
330
+ if msg.get("role") == "system":
331
+ system_msg = msg
332
+ else:
333
+ other_msgs.append(msg)
334
+
335
+ # Build from most recent, working backwards
336
+ compacted = []
337
+ current_tokens = 0
338
+
339
+ if system_msg:
340
+ system_tokens = self.estimate_message_tokens(system_msg).total
341
+ current_tokens += system_tokens
342
+
343
+ # Add messages from most recent
344
+ for msg in reversed(other_msgs):
345
+ msg_tokens = self.estimate_message_tokens(msg).total
346
+ if current_tokens + msg_tokens <= target_tokens:
347
+ compacted.insert(0, msg)
348
+ current_tokens += msg_tokens
349
+ else:
350
+ break
351
+
352
+ # Prepend system message
353
+ if system_msg:
354
+ compacted.insert(0, system_msg)
355
+
356
+ return compacted
357
+
358
+ def _compact_summarize_early(
359
+ self, messages: list[dict[str, Any]], target_tokens: int
360
+ ) -> list[dict[str, Any]]:
361
+ """Summarize early messages, keep recent ones in full."""
362
+ # For now, use keep_recent with a summary placeholder
363
+ # Full implementation would call LLM to summarize
364
+ compacted = self._compact_keep_recent(messages, target_tokens)
365
+
366
+ # Add summary placeholder for dropped messages
367
+ dropped_count = len(messages) - len(compacted)
368
+ if dropped_count > 0 and compacted:
369
+ # Find first non-system message
370
+ insert_idx = 1 if compacted[0].get("role") == "system" else 0
371
+ summary_msg = {
372
+ "role": "system",
373
+ "content": f"[Context compacted: {dropped_count} earlier messages summarized. "
374
+ "Continue from here.]",
375
+ }
376
+ compacted.insert(insert_idx, summary_msg)
377
+
378
+ return compacted
379
+
380
+ def _compact_keep_final_tool_results(
381
+ self, messages: list[dict[str, Any]], target_tokens: int
382
+ ) -> list[dict[str, Any]]:
383
+ """Keep only final tool results, replacing intermediate ones with summaries."""
384
+ compacted = []
385
+ tool_results_by_name: dict[str, dict[str, Any]] = {}
386
+
387
+ # First pass: identify final tool results
388
+ for msg in messages:
389
+ if msg.get("role") == "tool":
390
+ tool_name = msg.get("tool_name", msg.get("name", "unknown"))
391
+ tool_results_by_name[tool_name] = msg
392
+ else:
393
+ compacted.append(msg)
394
+
395
+ # Second pass: add final tool results
396
+ for tool_msg in tool_results_by_name.values():
397
+ compacted.append(tool_msg)
398
+
399
+ # If still over budget, apply keep_recent
400
+ if self.estimate_tokens(compacted) > target_tokens:
401
+ compacted = self._compact_keep_recent(compacted, target_tokens)
402
+
403
+ return compacted
404
+
405
+ def _compact_drop_tool_outputs(
406
+ self, messages: list[dict[str, Any]], target_tokens: int
407
+ ) -> list[dict[str, Any]]:
408
+ """Drop tool output content but keep structure."""
409
+ compacted = []
410
+
411
+ for msg in messages:
412
+ if msg.get("role") == "tool":
413
+ # Replace content with placeholder
414
+ compacted.append(
415
+ {
416
+ **msg,
417
+ "content": "[Tool output truncated for context management]",
418
+ }
419
+ )
420
+ else:
421
+ compacted.append(msg)
422
+
423
+ # If still over budget, apply keep_recent
424
+ if self.estimate_tokens(compacted) > target_tokens:
425
+ compacted = self._compact_keep_recent(compacted, target_tokens)
426
+
427
+ return compacted
428
+
429
+ def _save_backup(self, backup: ContextBackup) -> None:
430
+ """Save backup to disk."""
431
+ try:
432
+ filename = f"context_backup_{backup.timestamp.strftime('%Y%m%d_%H%M%S')}.json"
433
+ backup_path = self.backup_dir / filename
434
+
435
+ with open(backup_path, "w", encoding="utf-8") as f:
436
+ json.dump(backup.to_dict(), f, indent=2)
437
+
438
+ log.debug(f"Context backup saved: {backup_path}")
439
+
440
+ # Clean old backups (keep last 10)
441
+ self._cleanup_old_backups(keep_count=10)
442
+
443
+ except Exception as e:
444
+ log.warning(f"Failed to save context backup: {e}")
445
+
446
+ def _cleanup_old_backups(self, keep_count: int = 10) -> None:
447
+ """Remove old backup files, keeping only the most recent."""
448
+ try:
449
+ backup_files = sorted(
450
+ self.backup_dir.glob("context_backup_*.json"),
451
+ key=lambda p: p.stat().st_mtime,
452
+ reverse=True,
453
+ )
454
+
455
+ for old_file in backup_files[keep_count:]:
456
+ old_file.unlink()
457
+ log.debug(f"Removed old context backup: {old_file.name}")
458
+
459
+ except Exception as e:
460
+ log.warning(f"Failed to cleanup old backups: {e}")
461
+
462
+ def load_backup(self, backup_path: Path) -> ContextBackup | None:
463
+ """Load a backup from disk."""
464
+ try:
465
+ with open(backup_path, encoding="utf-8") as f:
466
+ data = json.load(f)
467
+ return ContextBackup.from_dict(data)
468
+ except Exception as e:
469
+ log.warning(f"Failed to load backup {backup_path}: {e}")
470
+ return None
471
+
472
+ def list_backups(self) -> list[Path]:
473
+ """List available backup files."""
474
+ return sorted(
475
+ self.backup_dir.glob("context_backup_*.json"),
476
+ key=lambda p: p.stat().st_mtime,
477
+ reverse=True,
478
+ )
479
+
480
+ def add_tool_output(self, tool_name: str, output: str, tool_call_id: str) -> None:
481
+ """Add a tool output to the collection."""
482
+ self.tool_outputs.add_output(tool_name, output, tool_call_id)
483
+
484
+ def get_tool_outputs_summary(self) -> dict[str, Any]:
485
+ """Get summary of collected tool outputs."""
486
+ return {
487
+ "total_outputs": len(self.tool_outputs.all_outputs),
488
+ "unique_tools": len(self.tool_outputs.final_outputs),
489
+ "total_size_chars": self.tool_outputs.get_total_size(),
490
+ "estimated_tokens": int(self.tool_outputs.get_total_size() / self.chars_per_token),
491
+ }
492
+
493
+ def clear_tool_outputs(self) -> None:
494
+ """Clear collected tool outputs."""
495
+ self.tool_outputs.clear()
496
+
497
+
498
+ def create_context_manager(
499
+ model: str = "gpt-4.1",
500
+ backup_dir: Path | None = None,
501
+ ) -> ContextWindowManager:
502
+ """Factory function to create a context window manager.
503
+
504
+ Args:
505
+ model: Model name for context limit lookup
506
+ backup_dir: Directory for storing context backups
507
+
508
+ Returns:
509
+ Configured ContextWindowManager instance
510
+ """
511
+ return ContextWindowManager(model=model, backup_dir=backup_dir)
512
+
513
+
514
+ __all__ = [
515
+ "ContextWindowManager",
516
+ "CompactionStrategy",
517
+ "TokenEstimate",
518
+ "ContextBackup",
519
+ "ToolOutputCollection",
520
+ "create_context_manager",
521
+ ]