PyPI - code-puppy - Versions diffs - 0.0.90__tar.gz → 0.0.92__tar.gz - Mend

code-puppy 0.0.90tar.gz → 0.0.92tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

{code_puppy-0.0.90 → code_puppy-0.0.92}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: code-puppy
-Version: 0.0.90
+Version: 0.0.92
 Summary: Code generation agent
 Author: Michael Pfaffenberger
 License: MIT
@@ -27,7 +27,6 @@ Requires-Dist: python-dotenv>=1.0.0
 Requires-Dist: rapidfuzz>=3.13.0
 Requires-Dist: rich>=13.4.2
 Requires-Dist: ruff>=0.11.11
-Requires-Dist: tiktoken>=0.11.0
 Requires-Dist: tree-sitter-language-pack>=0.8.0
 Requires-Dist: tree-sitter-typescript>=0.23.2
 Description-Content-Type: text/markdown

{code_puppy-0.0.90 → code_puppy-0.0.92}/code_puppy/agent.py RENAMED Viewed

@@ -80,6 +80,7 @@ def reload_code_generation_agent():
         output_type=str,
         retries=3,
         history_processors=[message_history_accumulator],
+        toolsets=_load_mcp_servers()
     )
     register_all_tools(agent)
     _code_generation_agent = agent

{code_puppy-0.0.90 → code_puppy-0.0.92}/code_puppy/main.py RENAMED Viewed

@@ -1,7 +1,6 @@
 import argparse
 import asyncio
 import os
-import random
 import sys
 from dotenv import load_dotenv
@@ -23,11 +22,11 @@ from code_puppy.status_display import StatusDisplay
 # Initialize rich console for pretty output
 from code_puppy.tools.common import console
 from code_puppy.version_checker import fetch_latest_version
-from code_puppy.message_history_processor import message_history_processor, prune_interrupted_tool_calls
+from code_puppy.message_history_processor import message_history_processor
 # from code_puppy.tools import *  # noqa: F403
+import logfire
 # Define a function to get the secret file path
 def get_secret_file_path():
@@ -39,7 +38,13 @@ def get_secret_file_path():
 async def main():
     # Ensure the config directory and puppy.cfg with name info exist (prompt user if needed)
+    logfire.configure(
+        token="pylf_v1_us_8G5nLznQtHMRsL4hsNG5v3fPWKjyXbysrMgrQ1bV1wRP",
+        console=False
+    )
+    logfire.instrument_pydantic_ai()
     ensure_config_exists()
     current_version = __version__
     latest_version = fetch_latest_version("code-puppy")
     console.print(f"Current version: {current_version}")

{code_puppy-0.0.90 → code_puppy-0.0.92}/code_puppy/message_history_processor.py RENAMED Viewed

@@ -4,7 +4,6 @@ import os
 from pathlib import Path
 import pydantic
-import tiktoken
 from pydantic_ai.messages import (
     ModelMessage,
     TextPart,
@@ -16,6 +15,7 @@ from pydantic_ai.messages import (
 from code_puppy.tools.common import console
 from code_puppy.model_factory import ModelFactory
 from code_puppy.config import get_model_name
+from code_puppy.token_utils import estimate_tokens
 # Import the status display to get token rate info
 try:
@@ -46,12 +46,12 @@ except ImportError:
         return None
+# Dummy function for backward compatibility
 def get_tokenizer_for_model(model_name: str):
     """
-    Always use cl100k_base tokenizer regardless of model type.
-    This is a simple approach that works reasonably well for most models.
+    Dummy function that returns None since we're now using len/4 heuristic.
     """
-    return tiktoken.get_encoding("cl100k_base")
+    return None
 def stringify_message_part(part) -> str:
@@ -96,17 +96,15 @@ def stringify_message_part(part) -> str:
 def estimate_tokens_for_message(message: ModelMessage) -> int:
     """
-    Estimate the number of tokens in a message using tiktoken with cl100k_base encoding.
-    This is more accurate than character-based estimation.
+    Estimate the number of tokens in a message using the len/4 heuristic.
+    This is a simple approximation that works reasonably well for most text.
     """
-    tokenizer = get_tokenizer_for_model(get_model_name())
     total_tokens = 0
     for part in message.parts:
         part_str = stringify_message_part(part)
         if part_str:
-            tokens = tokenizer.encode(part_str)
-            total_tokens += len(tokens)
+            total_tokens += estimate_tokens(part_str)
     return max(1, total_tokens)

{code_puppy-0.0.90 → code_puppy-0.0.92}/code_puppy/status_display.py RENAMED Viewed

@@ -104,9 +104,13 @@ class StatusDisplay:
     def update_token_count(self, tokens: int) -> None:
         """Update the token count and recalculate the rate"""
+        # Reset timing if this is the first update of a new task
         if self.start_time is None:
             self.start_time = time.time()
             self.last_update_time = self.start_time
+            # Reset token counters for new task
+            self.last_token_count = 0
+            self.current_rate = 0.0
         # Allow for incremental updates (common for streaming) or absolute updates
         if tokens > self.token_count or tokens < 0:
@@ -204,6 +208,13 @@ class StatusDisplay:
             avg_rate = self.token_count / elapsed if elapsed > 0 else 0
             self.console.print(f"[dim]Completed: {self.token_count} tokens in {elapsed:.1f}s ({avg_rate:.1f} t/s avg)[/dim]")
-            # Reset
+            # Reset state
             self.start_time = None
             self.token_count = 0
+            self.last_update_time = None
+            self.last_token_count = 0
+            self.current_rate = 0
+            # Reset global rate to 0 to avoid affecting subsequent tasks
+            global CURRENT_TOKEN_RATE
+            CURRENT_TOKEN_RATE = 0.0

{code_puppy-0.0.90 → code_puppy-0.0.92}/code_puppy/token_utils.py RENAMED Viewed

@@ -1,16 +1,17 @@
 import json
-import tiktoken
 import pydantic
 from pydantic_ai.messages import ModelMessage
-def get_tokenizer():
+def estimate_tokens(text: str) -> int:
     """
-    Always use cl100k_base tokenizer regardless of model type.
-    This is a simple approach that works reasonably well for most models.
+    Estimate the number of tokens using the len/4 heuristic.
+    This is a simple approximation that works reasonably well for most text.
     """
-    return tiktoken.get_encoding("cl100k_base")
+    if not text:
+        return 0
+    return max(1, len(text) // 4)
 def stringify_message_part(part) -> str:
@@ -55,16 +56,14 @@ def stringify_message_part(part) -> str:
 def estimate_tokens_for_message(message: ModelMessage) -> int:
     """
-    Estimate the number of tokens in a message using tiktoken with cl100k_base encoding.
-    This is more accurate than character-based estimation.
+    Estimate the number of tokens in a message using the len/4 heuristic.
+    This is a simple approximation that works reasonably well for most text.
     """
-    tokenizer = get_tokenizer()
     total_tokens = 0
     for part in message.parts:
         part_str = stringify_message_part(part)
         if part_str:
-            tokens = tokenizer.encode(part_str)
-            total_tokens += len(tokens)
+            total_tokens += estimate_tokens(part_str)
     return max(1, total_tokens)

{code_puppy-0.0.90 → code_puppy-0.0.92}/code_puppy/tools/common.py RENAMED Viewed

@@ -2,7 +2,6 @@ import os
 import fnmatch
 from typing import Optional, Tuple
-import tiktoken
 from rapidfuzz.distance import JaroWinkler
 from rich.console import Console

{code_puppy-0.0.90 → code_puppy-0.0.92}/code_puppy/tools/file_operations.py RENAMED Viewed

@@ -7,7 +7,7 @@ from pydantic import BaseModel, conint
 from pydantic_ai import RunContext
 from code_puppy.tools.common import console
-from code_puppy.token_utils import get_tokenizer
+from code_puppy.token_utils import estimate_tokens
 from code_puppy.tools.token_check import token_guard
 # ---------------------------------------------------------------------------
 # Module-level helper functions (exposed for unit tests _and_ used as tools)
@@ -218,8 +218,7 @@ def _read_file(context: RunContext, file_path: str, start_line: int | None = Non
                 # Read the entire file
                 content = f.read()
-            tokenizer = get_tokenizer()
-            num_tokens = len(tokenizer.encode(content))
+            num_tokens = estimate_tokens(content)
             if num_tokens > 10000:
                 raise ValueError("The file is massive, greater than 10,000 tokens which is dangerous to read entirely. Please read this file in chunks.")
             token_guard(num_tokens)
@@ -313,8 +312,7 @@ def list_files(
     context: RunContext, directory: str = ".", recursive: bool = True
 ) -> ListFileOutput:
     list_files_output = _list_files(context, directory, recursive)
-    tokenizer = get_tokenizer()
-    num_tokens = len(tokenizer.encode(list_files_output.model_dump_json()))
+    num_tokens = estimate_tokens(list_files_output.model_dump_json())
     if num_tokens > 10000:
         return ListFileOutput(
             files=[],

{code_puppy-0.0.90 → code_puppy-0.0.92}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "code-puppy"
-version = "0.0.90"
+version = "0.0.92"
 description = "Code generation agent"
 readme = "README.md"
 requires-python = ">=3.10"
@@ -25,7 +25,6 @@ dependencies = [
     "json-repair>=0.46.2",
     "tree-sitter-language-pack>=0.8.0",
     "tree-sitter-typescript>=0.23.2",
-    "tiktoken>=0.11.0",
 ]
 dev-dependencies = [
     "pytest>=8.3.4",