PyPI - ripperdoc - Versions diffs - 0.2.6__py3-none-any.whl → 0.2.7__py3-none-any.whl - Mend

ripperdoc 0.2.6py3-none-any.whl → 0.2.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

ripperdoc/__init__.py +1 -1
ripperdoc/cli/commands/clear_cmd.py +1 -0
ripperdoc/cli/commands/exit_cmd.py +1 -1
ripperdoc/cli/commands/resume_cmd.py +71 -37
ripperdoc/cli/ui/file_mention_completer.py +221 -0
ripperdoc/cli/ui/helpers.py +100 -3
ripperdoc/cli/ui/interrupt_handler.py +175 -0
ripperdoc/cli/ui/message_display.py +249 -0
ripperdoc/cli/ui/panels.py +60 -0
ripperdoc/cli/ui/rich_ui.py +147 -630
ripperdoc/cli/ui/tool_renderers.py +2 -2
ripperdoc/core/agents.py +4 -4
ripperdoc/core/query_utils.py +1 -1
ripperdoc/core/tool.py +1 -1
ripperdoc/tools/bash_tool.py +1 -1
ripperdoc/tools/file_edit_tool.py +2 -2
ripperdoc/tools/file_read_tool.py +1 -1
ripperdoc/tools/multi_edit_tool.py +1 -1
ripperdoc/utils/conversation_compaction.py +476 -0
ripperdoc/utils/message_compaction.py +109 -154
ripperdoc/utils/message_formatting.py +216 -0
ripperdoc/utils/messages.py +31 -9
ripperdoc/utils/session_history.py +19 -7
{ripperdoc-0.2.6.dist-info → ripperdoc-0.2.7.dist-info}/METADATA +1 -1
{ripperdoc-0.2.6.dist-info → ripperdoc-0.2.7.dist-info}/RECORD +29 -23
{ripperdoc-0.2.6.dist-info → ripperdoc-0.2.7.dist-info}/WHEEL +0 -0
{ripperdoc-0.2.6.dist-info → ripperdoc-0.2.7.dist-info}/entry_points.txt +0 -0
{ripperdoc-0.2.6.dist-info → ripperdoc-0.2.7.dist-info}/licenses/LICENSE +0 -0
{ripperdoc-0.2.6.dist-info → ripperdoc-0.2.7.dist-info}/top_level.txt +0 -0

ripperdoc/utils/message_compaction.py CHANGED Viewed

@@ -1,11 +1,11 @@
-"""Utilities for compacting conversation history when context grows too large."""
+"""Context compaction utilities"""
 from __future__ import annotations
 import json
 import os
 from dataclasses import dataclass
-from typing import Any, Callable, Dict, List, Optional, Sequence, Set, Union
+from typing import Any, Dict, List, Optional, Sequence, Set, Union
 from ripperdoc.core.config import GlobalConfig, ModelProfile, get_global_config
 from ripperdoc.utils.log import get_logger
@@ -22,7 +22,7 @@ logger = get_logger()
 ConversationMessage = Union[UserMessage, AssistantMessage, ProgressMessage]
-# Compaction thresholds.
+# Thresholds.
 MAX_TOKENS_SOFT = 20_000
 MAX_TOKENS_HARD = 40_000
 MAX_TOOL_USES_TO_PRESERVE = 3
@@ -30,23 +30,39 @@ IMAGE_TOKEN_COST = 2_000
 AUTO_COMPACT_BUFFER = 13_000
 WARNING_THRESHOLD = 20_000
 ERROR_THRESHOLD = 20_000
-COMPACT_PLACEHOLDER = "[Old tool result content cleared]"
-TOOL_COMMANDS: Set[str] = {"Read", "Bash", "Grep", "Glob", "LS", "WebSearch", "WebFetch"}
+MICRO_PLACEHOLDER = "[Old tool result content cleared]"
-# Defaults roughly match modern 200k context windows while still working for smaller models.
+# Context sizing.
 DEFAULT_CONTEXT_TOKENS = 200_000
 MIN_CONTEXT_TOKENS = 20_000
-# Track tool results we've already compacted so we don't reprocess them.
+# Tools likely to generate large payloads.
+TOOL_COMMANDS: Set[str] = {
+    "Read",
+    "Bash",
+    "Grep",
+    "Glob",
+    "LS",
+    "WebSearch",
+    "WebFetch",
+    "BashOutput",
+    "ListMcpServers",
+    "ListMcpResources",
+    "ReadMcpResource",
+    # "FileEdit",
+    # "MultiEdit",
+    # "NotebookEdit",
+    # "FileWrite",
+}
+# State to avoid re-compacting the same tool results.
 _processed_tool_use_ids: Set[str] = set()
 _token_cache: Dict[str, int] = {}
-_cleanup_callbacks: List[Callable[[], None]] = []
-_is_compacting: bool = False
 @dataclass
 class ContextUsageStatus:
-    """Snapshot of the current context usage."""
+    """Snapshot of current context usage."""
     used_tokens: int
     max_context_tokens: int
@@ -59,7 +75,6 @@ class ContextUsageStatus:
     @property
     def total_tokens(self) -> int:
-        """Alias for backward compatibility."""
         return self.used_tokens
     @property
@@ -75,21 +90,9 @@ class ContextUsageStatus:
         return self.is_above_auto_compact_threshold
-@dataclass
-class CompactionResult:
-    """Result of a compaction run."""
-    messages: List[ConversationMessage]
-    tokens_before: int
-    tokens_after: int
-    tokens_saved: int
-    cleared_tool_ids: Set[str]
-    was_compacted: bool
 @dataclass
 class ContextBreakdown:
-    """Detailed breakdown of context usage for display."""
+    """Detailed breakdown for UI display."""
     max_context_tokens: int
     system_prompt_tokens: int
@@ -112,7 +115,6 @@ class ContextBreakdown:
     @property
     def effective_tokens(self) -> int:
-        """Tokens that count against the limit including any reserved buffer."""
         return min(self.max_context_tokens, self.reported_tokens + self.reserved_tokens)
     @property
@@ -131,8 +133,20 @@ class ContextBreakdown:
         return min(100.0, (tokens / self.max_context_tokens) * 100)
+@dataclass
+class MicroCompactionResult:
+    """Result of a micro-compaction pass."""
+    messages: List[ConversationMessage]
+    tokens_before: int
+    tokens_after: int
+    tokens_saved: int
+    tools_compacted: int
+    trigger_type: str
+    was_compacted: bool
 def _parse_truthy_env_value(value: Optional[str]) -> bool:
-    """Interpret common truthy environment variable values."""
     if value is None:
         return False
     normalized = value.strip().lower()
@@ -140,12 +154,10 @@ def _parse_truthy_env_value(value: Optional[str]) -> bool:
 def estimate_tokens_from_text(text: str) -> int:
-    """Estimate token count using shared token estimation helper."""
-    return estimate_tokens(text)
+    return estimate_tokens(text or "")
 def _stringify_content(content: Union[str, List[MessageContent], None]) -> str:
-    """Convert normalized content into plain text for estimation."""
     if content is None:
         return ""
     if isinstance(content, str):
@@ -153,39 +165,22 @@ def _stringify_content(content: Union[str, List[MessageContent], None]) -> str:
     parts: List[str] = []
     for part in content:
         if isinstance(part, dict):
-            block_type = part.get("type")
-            text_val = part.get("text")
+            text_val = part.get("text") or part.get("content") or ""
             if text_val:
                 parts.append(str(text_val))
-            # Capture nested text for tool_result content blocks
-            nested_content = part.get("content")
-            if isinstance(nested_content, list):
-                nested_text = _stringify_content(nested_content)
+            nested = part.get("content")
+            if isinstance(nested, list):
+                nested_text = _stringify_content(nested)
                 if nested_text:
                     parts.append(nested_text)
-            # Include tool payloads that otherwise don't have "text"
-            if block_type == "tool_use" and part.get("input") is not None:
-                try:
-                    parts.append(json.dumps(part.get("input"), ensure_ascii=False))
-                except (TypeError, ValueError) as exc:
-                    logger.warning(
-                        "[message_compaction] Failed to serialize tool_use input for token estimate: %s: %s",
-                        type(exc).__name__, exc,
-                    )
-                    parts.append(str(part.get("input")))
-            # OpenAI-style arguments blocks
             if part.get("arguments"):
                 parts.append(str(part.get("arguments")))
         elif hasattr(part, "text"):
-            text_val = getattr(part, "text", "")
+            text_val = getattr(part, "text", "") or ""
             if text_val:
-                parts.append(str(text_val))
+                parts.append(text_val)
         else:
             parts.append(str(part))
-    # Filter out empty strings to avoid over-counting separators
     return "\n".join([p for p in parts if p])
@@ -198,7 +193,6 @@ def estimate_conversation_tokens(
     for message in normalized:
         total += estimate_tokens_from_text(_stringify_content(message.get("content")))
-        # Account for OpenAI-style tool_calls payloads (arguments + name)
         tool_calls = message.get("tool_calls")
         if isinstance(tool_calls, list):
             for call in tool_calls:
@@ -219,7 +213,6 @@ def estimate_conversation_tokens(
 def _estimate_tool_schema_tokens(tools: Sequence[Any]) -> int:
-    """Estimate tokens consumed by tool schemas."""
     total = 0
     for tool in tools:
         try:
@@ -229,7 +222,8 @@ def _estimate_tool_schema_tokens(tools: Sequence[Any]) -> int:
         except (AttributeError, TypeError, KeyError, ValueError) as exc:
             logger.warning(
                 "Failed to estimate tokens for tool schema: %s: %s",
-                type(exc).__name__, exc,
+                type(exc).__name__,
+                exc,
                 extra={"tool": getattr(tool, "name", None)},
             )
             continue
@@ -252,51 +246,37 @@ def get_model_context_limit(
     if explicit_limit and explicit_limit > 0:
         return explicit_limit
-    if model_profile and getattr(model_profile, "context_window", None):
-        try:
-            configured = int(model_profile.context_window)  # type: ignore[arg-type]
-            if configured > 0:
-                return configured
-        except (TypeError, ValueError):
-            pass
-    if model_profile and model_profile.model:
-        name = model_profile.model.lower()
-        if "claude" in name:
-            # Claude 4.5 defaults and beta 1M thinking window.
-            if "4.5" in name or "sonnet" in name or "haiku" in name:
-                return 1_000_000 if "1m" in name or "beta" in name else 200_000
-            if "opus" in name or "4.1" in name:
-                return 200_000
-            return 200_000
-        if "gpt-4o" in name or "gpt-4.1" in name or "gpt-4-turbo" in name:
-            return 128_000
-        if "gpt-4" in name:
-            return 32_000
-        if "gpt-3.5" in name:
-            return 16_000
-        if "deepseek" in name:
-            return 128_000
+    try:
+        model = getattr(model_profile, "model", None) or ""
+    except Exception:
+        model = ""
+    # Fallback mapping; tuned for common providers.
+    model = model.lower()
+    if "1000k" in model or "1m" in model:
+        return 1_000_000
+    if "gpt-4o" in model or "gpt4o" in model:
+        return 128_000
+    if "gpt-4" in model:
+        return 32_000
+    if "deepseek" in model:
+        return 128_000
     return DEFAULT_CONTEXT_TOKENS
 def get_remaining_context_tokens(
     model_profile: Optional[ModelProfile], explicit_limit: Optional[int] = None
 ) -> int:
-    """Return the context window minus the model's configured output tokens."""
+    """Context window minus configured output tokens."""
     context_limit = max(get_model_context_limit(model_profile, explicit_limit), MIN_CONTEXT_TOKENS)
     try:
-        max_output_tokens = (
-            int(getattr(model_profile, "max_tokens", 0) or 0) if model_profile else 0
-        )
+        max_output_tokens = int(getattr(model_profile, "max_tokens", 0) or 0) if model_profile else 0
     except (TypeError, ValueError):
         max_output_tokens = 0
     return max(MIN_CONTEXT_TOKENS, context_limit - max(0, max_output_tokens))
 def resolve_auto_compact_enabled(config: GlobalConfig) -> bool:
-    """Return whether auto-compaction is enabled, honoring an env override."""
     env_override = os.getenv("RIPPERDOC_AUTO_COMPACT")
     if env_override is not None:
         normalized = env_override.strip().lower()
@@ -309,7 +289,7 @@ def get_context_usage_status(
     max_context_tokens: Optional[int],
     auto_compact_enabled: bool,
 ) -> ContextUsageStatus:
-    """Compute context usage thresholds using the compaction heuristics."""
+    """Compute usage thresholds."""
     context_limit = max(max_context_tokens or DEFAULT_CONTEXT_TOKENS, MIN_CONTEXT_TOKENS)
     effective_limit = (
         max(MIN_CONTEXT_TOKENS, context_limit - AUTO_COMPACT_BUFFER)
@@ -318,9 +298,7 @@ def get_context_usage_status(
     )
     tokens_left = max(effective_limit - used_tokens, 0)
-    percent_left = (
-        0.0 if effective_limit <= 0 else min(100.0, (tokens_left / effective_limit) * 100)
-    )
+    percent_left = 0.0 if effective_limit <= 0 else min(100.0, (tokens_left / effective_limit) * 100)
     percent_used = 100.0 - percent_left
     warning_limit = max(0, effective_limit - WARNING_THRESHOLD)
@@ -371,9 +349,7 @@ def summarize_context_usage(
     )
-def find_latest_assistant_usage_tokens(
-    messages: Sequence[ConversationMessage],
-) -> int:
+def find_latest_assistant_usage_tokens(messages: Sequence[ConversationMessage]) -> int:
     """Best-effort extraction of usage tokens from the latest assistant message."""
     for message in reversed(messages):
         if getattr(message, "type", "") != "assistant":
@@ -413,7 +389,6 @@ def estimate_used_tokens(
     protocol: str = "anthropic",
     precomputed_total_tokens: Optional[int] = None,
 ) -> int:
-    """Return usage tokens if present; otherwise fall back to an estimated total."""
     usage_tokens = find_latest_assistant_usage_tokens(messages)
     if usage_tokens > 0:
         return usage_tokens
@@ -422,29 +397,6 @@ def estimate_used_tokens(
     return estimate_conversation_tokens(messages, protocol=protocol)
-def register_cleanup_callback(callback: Callable[[], None]) -> Callable[[], None]:
-    """Register a callback that will run after a compaction pass."""
-    _cleanup_callbacks.append(callback)
-    def _unregister() -> None:
-        nonlocal callback
-        _cleanup_callbacks[:] = [cb for cb in _cleanup_callbacks if cb is not callback]
-    return _unregister
-def _run_cleanup_callbacks() -> None:
-    callbacks = list(_cleanup_callbacks)
-    for callback in callbacks:
-        try:
-            callback()
-        except (RuntimeError, TypeError, ValueError, AttributeError) as exc:
-            logger.debug(
-                "[message_compaction] Cleanup callback failed: %s: %s",
-                type(exc).__name__, exc,
-            )
 def _normalize_tool_use_id(block: Any) -> str:
     if block is None:
         return ""
@@ -454,7 +406,7 @@ def _normalize_tool_use_id(block: Any) -> str:
 def _estimate_message_tokens(content_block: Any) -> int:
-    """Estimate tokens for a single content block."""
+    """Estimate tokens for a single content block (text/image only)."""
     if content_block is None:
         return 0
@@ -467,9 +419,7 @@ def _estimate_message_tokens(content_block: Any) -> int:
     if isinstance(content, list):
         total = 0
         for part in content:
-            part_type = getattr(part, "type", None) or (
-                part.get("type") if isinstance(part, dict) else None
-            )
+            part_type = getattr(part, "type", None) or (part.get("type") if isinstance(part, dict) else None)
             if part_type == "text":
                 text_val = getattr(part, "text", None) if hasattr(part, "text") else None
                 if text_val is None and isinstance(part, dict):
@@ -493,37 +443,37 @@ def _get_cached_token_count(cache_key: str, content_block: Any) -> int:
     return estimated
-def compact_messages(
+def micro_compact_messages(
     messages: Sequence[ConversationMessage],
-    max_tokens: Optional[int] = None,
     *,
+    max_tokens: Optional[int] = None,
+    context_limit: Optional[int] = None,
+    auto_compact_enabled: Optional[bool] = None,
     protocol: str = "anthropic",
-) -> CompactionResult:
-    """Compact tool results by replacing older outputs with placeholders."""
-    global _is_compacting
-    _is_compacting = False
+    trigger_type: str = "auto",
+) -> MicroCompactionResult:
+    """Micro-compaction: strip older tool_result payloads to keep context lean."""
     tokens_before = estimate_conversation_tokens(messages, protocol=protocol)
     if _parse_truthy_env_value(os.getenv("DISABLE_MICROCOMPACT")):
-        return CompactionResult(
+        return MicroCompactionResult(
             messages=list(messages),
             tokens_before=tokens_before,
             tokens_after=tokens_before,
             tokens_saved=0,
-            cleared_tool_ids=set(),
+            tools_compacted=0,
+            trigger_type=trigger_type,
             was_compacted=False,
         )
-    # Presence of this flag mirrors the upstream implementation even though we don't act on it.
+    # Legacy flag kept for parity with upstream behavior.
     _parse_truthy_env_value(os.getenv("USE_API_CONTEXT_MANAGEMENT"))
     is_max_tokens_specified = max_tokens is not None
     try:
-        base_max_tokens = int(max_tokens) if max_tokens is not None else MAX_TOKENS_HARD
+        effective_max_tokens = int(max_tokens) if max_tokens is not None else MAX_TOKENS_HARD
     except (TypeError, ValueError):
-        base_max_tokens = MAX_TOKENS_HARD
-    effective_max_tokens = max(base_max_tokens, MIN_CONTEXT_TOKENS)
+        effective_max_tokens = MAX_TOKENS_HARD
     tool_use_ids_to_compact: List[str] = []
     token_counts_by_tool_use_id: Dict[str, int] = {}
@@ -533,6 +483,7 @@ def compact_messages(
         content = getattr(getattr(message, "message", None), "content", None)
         if msg_type not in {"user", "assistant"} or not isinstance(content, list):
             continue
         for content_block in content:
             block_type = getattr(content_block, "type", None) or (
                 content_block.get("type") if isinstance(content_block, dict) else None
@@ -541,6 +492,7 @@ def compact_messages(
             tool_name = getattr(content_block, "name", None)
             if tool_name is None and isinstance(content_block, dict):
                 tool_name = content_block.get("name")
             if block_type == "tool_use" and tool_name in TOOL_COMMANDS:
                 if tool_use_id and tool_use_id not in _processed_tool_use_ids:
                     tool_use_ids_to_compact.append(tool_use_id)
@@ -549,9 +501,7 @@ def compact_messages(
                 token_counts_by_tool_use_id[tool_use_id] = token_count
     latest_tool_use_ids = (
-        tool_use_ids_to_compact[-MAX_TOOL_USES_TO_PRESERVE:]
-        if MAX_TOOL_USES_TO_PRESERVE > 0
-        else []
+        tool_use_ids_to_compact[-MAX_TOOL_USES_TO_PRESERVE:] if MAX_TOOL_USES_TO_PRESERVE > 0 else []
     )
     total_token_count = sum(token_counts_by_tool_use_id.values())
@@ -566,14 +516,16 @@ def compact_messages(
             total_tokens_removed += token_counts_by_tool_use_id.get(tool_use_id, 0)
     if not is_max_tokens_specified:
-        auto_compact_enabled = resolve_auto_compact_enabled(get_global_config())
+        resolved_auto_compact = (
+            auto_compact_enabled
+            if auto_compact_enabled is not None
+            else resolve_auto_compact_enabled(get_global_config())
+        )
         usage_tokens = estimate_used_tokens(
             messages, protocol=protocol, precomputed_total_tokens=tokens_before
         )
         status = get_context_usage_status(
-            usage_tokens,
-            max_context_tokens=max_tokens,
-            auto_compact_enabled=auto_compact_enabled,
+            usage_tokens, max_context_tokens=context_limit, auto_compact_enabled=resolved_auto_compact
         )
         if not status.is_above_warning_threshold or total_tokens_removed < MAX_TOKENS_SOFT:
             ids_to_remove.clear()
@@ -587,12 +539,12 @@ def compact_messages(
     for message in messages:
         msg_type = getattr(message, "type", "")
         content = getattr(getattr(message, "message", None), "content", None)
         if msg_type not in {"user", "assistant"} or not isinstance(content, list):
             compacted_messages.append(message)
             continue
         if msg_type == "assistant" and isinstance(message, AssistantMessage):
-            # Copy content list to avoid mutating the original message.
             compacted_messages.append(
                 AssistantMessage(
                     message=message.message.model_copy(update={"content": list(content)}),
@@ -606,23 +558,21 @@ def compact_messages(
         filtered_content: List[MessageContent] = []
         modified = False
         for content_item in content:
             block_type = getattr(content_item, "type", None) or (
                 content_item.get("type") if isinstance(content_item, dict) else None
             )
             tool_use_id = _normalize_tool_use_id(content_item)
             if block_type == "tool_result" and _should_remove(tool_use_id):
                 modified = True
                 if hasattr(content_item, "model_copy"):
                     new_block = content_item.model_copy()
-                    new_block.text = COMPACT_PLACEHOLDER
+                    new_block.text = MICRO_PLACEHOLDER
                 else:
-                    block_dict = (
-                        dict(content_item)
-                        if isinstance(content_item, dict)
-                        else {"type": "tool_result"}
-                    )
-                    block_dict["text"] = COMPACT_PLACEHOLDER
+                    block_dict = dict(content_item) if isinstance(content_item, dict) else {"type": "tool_result"}
+                    block_dict["text"] = MICRO_PLACEHOLDER
                     block_dict["tool_use_id"] = tool_use_id
                     new_block = MessageContent(**block_dict)
                 filtered_content.append(new_block)
@@ -654,10 +604,8 @@ def compact_messages(
     tokens_saved = max(0, tokens_before - tokens_after)
     if ids_to_remove:
-        _is_compacting = True
-        _run_cleanup_callbacks()
         logger.debug(
-            "[message_compaction] Compacted conversation",
+            "[message_compaction] Micro-compacted conversation",
             extra={
                 "tokens_before": tokens_before,
                 "tokens_after": tokens_after,
@@ -666,11 +614,18 @@ def compact_messages(
             },
         )
-    return CompactionResult(
+    return MicroCompactionResult(
         messages=compacted_messages,
         tokens_before=tokens_before,
         tokens_after=tokens_after,
         tokens_saved=tokens_saved,
-        cleared_tool_ids=ids_to_remove,
+        tools_compacted=len(ids_to_remove),
+        trigger_type="manual" if is_max_tokens_specified else trigger_type,
         was_compacted=bool(ids_to_remove),
     )
+def reset_micro_compaction_state() -> None:
+    """Clear caches and processed IDs (useful for tests)."""
+    _processed_tool_use_ids.clear()
+    _token_cache.clear()

ripperdoc 0.2.6__py3-none-any.whl → 0.2.7__py3-none-any.whl

ripperdoc 0.2.6py3-none-any.whl → 0.2.7py3-none-any.whl