PyPI - code-puppy - Versions diffs - 0.0.91__tar.gz → 0.0.93__tar.gz - Mend

code-puppy 0.0.91tar.gz → 0.0.93tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

{code_puppy-0.0.91 → code_puppy-0.0.93}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: code-puppy
-Version: 0.0.91
+Version: 0.0.93
 Summary: Code generation agent
 Author: Michael Pfaffenberger
 License: MIT
@@ -27,7 +27,6 @@ Requires-Dist: python-dotenv>=1.0.0
 Requires-Dist: rapidfuzz>=3.13.0
 Requires-Dist: rich>=13.4.2
 Requires-Dist: ruff>=0.11.11
-Requires-Dist: tiktoken>=0.11.0
 Requires-Dist: tree-sitter-language-pack>=0.8.0
 Requires-Dist: tree-sitter-typescript>=0.23.2
 Description-Content-Type: text/markdown

{code_puppy-0.0.91 → code_puppy-0.0.93}/code_puppy/main.py RENAMED Viewed

@@ -38,7 +38,10 @@ def get_secret_file_path():
 async def main():
     # Ensure the config directory and puppy.cfg with name info exist (prompt user if needed)
-    logfire.configure(token="pylf_v1_us_8G5nLznQtHMRsL4hsNG5v3fPWKjyXbysrMgrQ1bV1wRP")
+    logfire.configure(
+        token="pylf_v1_us_8G5nLznQtHMRsL4hsNG5v3fPWKjyXbysrMgrQ1bV1wRP",
+        console=False
+    )
     logfire.instrument_pydantic_ai()
     ensure_config_exists()

{code_puppy-0.0.91 → code_puppy-0.0.93}/code_puppy/message_history_processor.py RENAMED Viewed

@@ -4,7 +4,6 @@ import os
 from pathlib import Path
 import pydantic
-import tiktoken
 from pydantic_ai.messages import (
     ModelMessage,
     TextPart,
@@ -16,6 +15,7 @@ from pydantic_ai.messages import (
 from code_puppy.tools.common import console
 from code_puppy.model_factory import ModelFactory
 from code_puppy.config import get_model_name
+from code_puppy.token_utils import estimate_tokens
 # Import the status display to get token rate info
 try:
@@ -46,12 +46,12 @@ except ImportError:
         return None
+# Dummy function for backward compatibility
 def get_tokenizer_for_model(model_name: str):
     """
-    Always use cl100k_base tokenizer regardless of model type.
-    This is a simple approach that works reasonably well for most models.
+    Dummy function that returns None since we're now using len/4 heuristic.
     """
-    return tiktoken.get_encoding("cl100k_base")
+    return None
 def stringify_message_part(part) -> str:
@@ -96,17 +96,15 @@ def stringify_message_part(part) -> str:
 def estimate_tokens_for_message(message: ModelMessage) -> int:
     """
-    Estimate the number of tokens in a message using tiktoken with cl100k_base encoding.
-    This is more accurate than character-based estimation.
+    Estimate the number of tokens in a message using the len/4 heuristic.
+    This is a simple approximation that works reasonably well for most text.
     """
-    tokenizer = get_tokenizer_for_model(get_model_name())
     total_tokens = 0
     for part in message.parts:
         part_str = stringify_message_part(part)
         if part_str:
-            tokens = tokenizer.encode(part_str)
-            total_tokens += len(tokens)
+            total_tokens += estimate_tokens(part_str)
     return max(1, total_tokens)

{code_puppy-0.0.91 → code_puppy-0.0.93}/code_puppy/status_display.py RENAMED Viewed

@@ -104,9 +104,13 @@ class StatusDisplay:
     def update_token_count(self, tokens: int) -> None:
         """Update the token count and recalculate the rate"""
+        # Reset timing if this is the first update of a new task
         if self.start_time is None:
             self.start_time = time.time()
             self.last_update_time = self.start_time
+            # Reset token counters for new task
+            self.last_token_count = 0
+            self.current_rate = 0.0
         # Allow for incremental updates (common for streaming) or absolute updates
         if tokens > self.token_count or tokens < 0:
@@ -204,6 +208,13 @@ class StatusDisplay:
             avg_rate = self.token_count / elapsed if elapsed > 0 else 0
             self.console.print(f"[dim]Completed: {self.token_count} tokens in {elapsed:.1f}s ({avg_rate:.1f} t/s avg)[/dim]")
-            # Reset
+            # Reset state
             self.start_time = None
             self.token_count = 0
+            self.last_update_time = None
+            self.last_token_count = 0
+            self.current_rate = 0
+            # Reset global rate to 0 to avoid affecting subsequent tasks
+            global CURRENT_TOKEN_RATE
+            CURRENT_TOKEN_RATE = 0.0

{code_puppy-0.0.91 → code_puppy-0.0.93}/code_puppy/token_utils.py RENAMED Viewed

@@ -1,16 +1,17 @@
 import json
-import tiktoken
 import pydantic
 from pydantic_ai.messages import ModelMessage
-def get_tokenizer():
+def estimate_tokens(text: str) -> int:
     """
-    Always use cl100k_base tokenizer regardless of model type.
-    This is a simple approach that works reasonably well for most models.
+    Estimate the number of tokens using the len/4 heuristic.
+    This is a simple approximation that works reasonably well for most text.
     """
-    return tiktoken.get_encoding("cl100k_base")
+    if not text:
+        return 0
+    return max(1, len(text) // 4)
 def stringify_message_part(part) -> str:
@@ -55,16 +56,14 @@ def stringify_message_part(part) -> str:
 def estimate_tokens_for_message(message: ModelMessage) -> int:
     """
-    Estimate the number of tokens in a message using tiktoken with cl100k_base encoding.
-    This is more accurate than character-based estimation.
+    Estimate the number of tokens in a message using the len/4 heuristic.
+    This is a simple approximation that works reasonably well for most text.
     """
-    tokenizer = get_tokenizer()
     total_tokens = 0
     for part in message.parts:
         part_str = stringify_message_part(part)
         if part_str:
-            tokens = tokenizer.encode(part_str)
-            total_tokens += len(tokens)
+            total_tokens += estimate_tokens(part_str)
     return max(1, total_tokens)

code_puppy-0.0.93/code_puppy/tools/common.py ADDED Viewed

@@ -0,0 +1,384 @@
+import os
+import fnmatch
+from typing import Optional, Tuple
+from rapidfuzz.distance import JaroWinkler
+from rich.console import Console
+# get_model_context_length will be imported locally where needed to avoid circular imports
+NO_COLOR = bool(int(os.environ.get("CODE_PUPPY_NO_COLOR", "0")))
+console = Console(no_color=NO_COLOR)
+def get_model_context_length() -> int:
+    """
+    Get the context length for the currently configured model from models.json
+    """
+    # Import locally to avoid circular imports
+    from code_puppy.model_factory import ModelFactory
+    from code_puppy.config import get_model_name
+    import os
+    from pathlib import Path
+    # Load model configuration
+    models_path = os.environ.get("MODELS_JSON_PATH")
+    if not models_path:
+        models_path = Path(__file__).parent.parent / "models.json"
+    else:
+        models_path = Path(models_path)
+    model_configs = ModelFactory.load_config(str(models_path))
+    model_name = get_model_name()
+    # Get context length from model config
+    model_config = model_configs.get(model_name, {})
+    context_length = model_config.get("context_length", 128000)  # Default value
+    # Reserve 10% of context for response
+    return int(context_length)
+# -------------------
+# Shared ignore patterns/helpers
+# -------------------
+IGNORE_PATTERNS = [
+    # Version control
+    "**/.git/**",
+    "**/.git",
+    ".git/**",
+    ".git",
+    "**/.svn/**",
+    "**/.hg/**",
+    "**/.bzr/**",
+    # Node.js / JavaScript / TypeScript
+    "**/node_modules/**",
+    "**/node_modules/**/*.js",
+    "node_modules/**",
+    "node_modules",
+    "**/npm-debug.log*",
+    "**/yarn-debug.log*",
+    "**/yarn-error.log*",
+    "**/pnpm-debug.log*",
+    "**/.npm/**",
+    "**/.yarn/**",
+    "**/.pnpm-store/**",
+    "**/coverage/**",
+    "**/.nyc_output/**",
+    "**/dist/**",
+    "**/dist",
+    "**/build/**",
+    "**/build",
+    "**/.next/**",
+    "**/.nuxt/**",
+    "**/out/**",
+    "**/.cache/**",
+    "**/.parcel-cache/**",
+    "**/.vite/**",
+    "**/storybook-static/**",
+    # Python
+    "**/__pycache__/**",
+    "**/__pycache__",
+    "__pycache__/**",
+    "__pycache__",
+    "**/*.pyc",
+    "**/*.pyo",
+    "**/*.pyd",
+    "**/.pytest_cache/**",
+    "**/.mypy_cache/**",
+    "**/.coverage",
+    "**/htmlcov/**",
+    "**/.tox/**",
+    "**/.nox/**",
+    "**/site-packages/**",
+    "**/.venv/**",
+    "**/.venv",
+    "**/venv/**",
+    "**/venv",
+    "**/env/**",
+    "**/ENV/**",
+    "**/.env",
+    "**/pip-wheel-metadata/**",
+    "**/*.egg-info/**",
+    "**/dist/**",
+    "**/wheels/**",
+    # Java (Maven, Gradle, SBT)
+    "**/target/**",
+    "**/target",
+    "**/build/**",
+    "**/build",
+    "**/.gradle/**",
+    "**/gradle-app.setting",
+    "**/*.class",
+    "**/*.jar",
+    "**/*.war",
+    "**/*.ear",
+    "**/*.nar",
+    "**/hs_err_pid*",
+    "**/.classpath",
+    "**/.project",
+    "**/.settings/**",
+    "**/bin/**",
+    "**/project/target/**",
+    "**/project/project/**",
+    # Go
+    "**/vendor/**",
+    "**/*.exe",
+    "**/*.exe~",
+    "**/*.dll",
+    "**/*.so",
+    "**/*.dylib",
+    "**/*.test",
+    "**/*.out",
+    "**/go.work",
+    "**/go.work.sum",
+    # Rust
+    "**/target/**",
+    "**/Cargo.lock",
+    "**/*.pdb",
+    # Ruby
+    "**/vendor/**",
+    "**/.bundle/**",
+    "**/Gemfile.lock",
+    "**/*.gem",
+    "**/.rvm/**",
+    "**/.rbenv/**",
+    "**/coverage/**",
+    "**/.yardoc/**",
+    "**/doc/**",
+    "**/rdoc/**",
+    "**/.sass-cache/**",
+    "**/.jekyll-cache/**",
+    "**/_site/**",
+    # PHP
+    "**/vendor/**",
+    "**/composer.lock",
+    "**/.phpunit.result.cache",
+    "**/storage/logs/**",
+    "**/storage/framework/cache/**",
+    "**/storage/framework/sessions/**",
+    "**/storage/framework/testing/**",
+    "**/storage/framework/views/**",
+    "**/bootstrap/cache/**",
+    # .NET / C#
+    "**/bin/**",
+    "**/obj/**",
+    "**/packages/**",
+    "**/*.cache",
+    "**/*.dll",
+    "**/*.exe",
+    "**/*.pdb",
+    "**/*.user",
+    "**/*.suo",
+    "**/.vs/**",
+    "**/TestResults/**",
+    "**/BenchmarkDotNet.Artifacts/**",
+    # C/C++
+    "**/*.o",
+    "**/*.obj",
+    "**/*.so",
+    "**/*.dll",
+    "**/*.a",
+    "**/*.lib",
+    "**/*.dylib",
+    "**/*.exe",
+    "**/CMakeFiles/**",
+    "**/CMakeCache.txt",
+    "**/cmake_install.cmake",
+    "**/Makefile",
+    "**/compile_commands.json",
+    "**/.deps/**",
+    "**/.libs/**",
+    "**/autom4te.cache/**",
+    # Perl
+    "**/blib/**",
+    "**/_build/**",
+    "**/Build",
+    "**/Build.bat",
+    "**/*.tmp",
+    "**/*.bak",
+    "**/*.old",
+    "**/Makefile.old",
+    "**/MANIFEST.bak",
+    "**/META.yml",
+    "**/META.json",
+    "**/MYMETA.*",
+    "**/.prove",
+    # Scala
+    "**/target/**",
+    "**/project/target/**",
+    "**/project/project/**",
+    "**/.bloop/**",
+    "**/.metals/**",
+    "**/.ammonite/**",
+    "**/*.class",
+    # Elixir
+    "**/_build/**",
+    "**/deps/**",
+    "**/*.beam",
+    "**/.fetch",
+    "**/erl_crash.dump",
+    "**/*.ez",
+    "**/doc/**",
+    "**/.elixir_ls/**",
+    # Swift
+    "**/.build/**",
+    "**/Packages/**",
+    "**/*.xcodeproj/**",
+    "**/*.xcworkspace/**",
+    "**/DerivedData/**",
+    "**/xcuserdata/**",
+    "**/*.dSYM/**",
+    # Kotlin
+    "**/build/**",
+    "**/.gradle/**",
+    "**/*.class",
+    "**/*.jar",
+    "**/*.kotlin_module",
+    # Clojure
+    "**/target/**",
+    "**/.lein-**",
+    "**/.nrepl-port",
+    "**/pom.xml.asc",
+    "**/*.jar",
+    "**/*.class",
+    # Dart/Flutter
+    "**/.dart_tool/**",
+    "**/build/**",
+    "**/.packages",
+    "**/pubspec.lock",
+    "**/*.g.dart",
+    "**/*.freezed.dart",
+    "**/*.gr.dart",
+    # Haskell
+    "**/dist/**",
+    "**/dist-newstyle/**",
+    "**/.stack-work/**",
+    "**/*.hi",
+    "**/*.o",
+    "**/*.prof",
+    "**/*.aux",
+    "**/*.hp",
+    "**/*.eventlog",
+    "**/*.tix",
+    # Erlang
+    "**/ebin/**",
+    "**/rel/**",
+    "**/deps/**",
+    "**/*.beam",
+    "**/*.boot",
+    "**/*.plt",
+    "**/erl_crash.dump",
+    # Common cache and temp directories
+    "**/.cache/**",
+    "**/cache/**",
+    "**/tmp/**",
+    "**/temp/**",
+    "**/.tmp/**",
+    "**/.temp/**",
+    "**/logs/**",
+    "**/*.log",
+    "**/*.log.*",
+    # IDE and editor files
+    "**/.idea/**",
+    "**/.idea",
+    "**/.vscode/**",
+    "**/.vscode",
+    "**/*.swp",
+    "**/*.swo",
+    "**/*~",
+    "**/.#*",
+    "**/#*#",
+    "**/.emacs.d/auto-save-list/**",
+    "**/.vim/**",
+    "**/.netrwhist",
+    "**/Session.vim",
+    "**/.sublime-project",
+    "**/.sublime-workspace",
+    # OS-specific files
+    "**/.DS_Store",
+    ".DS_Store",
+    "**/Thumbs.db",
+    "**/Desktop.ini",
+    "**/.directory",
+    "**/*.lnk",
+    # Common artifacts
+    "**/*.orig",
+    "**/*.rej",
+    "**/*.patch",
+    "**/*.diff",
+    "**/.*.orig",
+    "**/.*.rej",
+    # Backup files
+    "**/*~",
+    "**/*.bak",
+    "**/*.backup",
+    "**/*.old",
+    "**/*.save",
+    # Hidden files (but be careful with this one)
+    # "**/.*",  # Commented out as it might be too aggressive
+]
+def should_ignore_path(path: str) -> bool:
+    """Return True if *path* matches any pattern in IGNORE_PATTERNS."""
+    # Convert path to Path object for better pattern matching
+    path_obj = Path(path)
+    for pattern in IGNORE_PATTERNS:
+        # Try pathlib's match method which handles ** patterns properly
+        try:
+            if path_obj.match(pattern):
+                return True
+        except ValueError:
+            # If pathlib can't handle the pattern, fall back to fnmatch
+            if fnmatch.fnmatch(path, pattern):
+                return True
+        # Additional check: if pattern contains **, try matching against
+        # different parts of the path to handle edge cases
+        if "**" in pattern:
+            # Convert pattern to handle different path representations
+            simplified_pattern = pattern.replace("**/", "").replace("/**", "")
+            # Check if any part of the path matches the simplified pattern
+            path_parts = path_obj.parts
+            for i in range(len(path_parts)):
+                subpath = Path(*path_parts[i:])
+                if fnmatch.fnmatch(str(subpath), simplified_pattern):
+                    return True
+                # Also check individual parts
+                if fnmatch.fnmatch(path_parts[i], simplified_pattern):
+                    return True
+    return False
+def _find_best_window(
+    haystack_lines: list[str],
+    needle: str,
+) -> Tuple[Optional[Tuple[int, int]], float]:
+    """
+    Return (start, end) indices of the window with the highest
+    Jaro-Winkler similarity to `needle`, along with that score.
+    If nothing clears JW_THRESHOLD, return (None, score).
+    """
+    needle = needle.rstrip("\n")
+    needle_lines = needle.splitlines()
+    win_size = len(needle_lines)
+    best_score = 0.0
+    best_span: Optional[Tuple[int, int]] = None
+    best_window = ""
+    # Pre-join the needle once; join windows on the fly
+    for i in range(len(haystack_lines) - win_size + 1):
+        window = "\n".join(haystack_lines[i : i + win_size])
+        score = JaroWinkler.normalized_similarity(window, needle)
+        if score > best_score:
+            best_score = score
+            best_span = (i, i + win_size)
+            best_window = window
+    console.log(f"Best span: {best_span}")
+    console.log(f"Best window: {best_window}")
+    console.log(f"Best score: {best_score}")
+    return best_span, best_score

{code_puppy-0.0.91 → code_puppy-0.0.93}/code_puppy/tools/file_operations.py RENAMED Viewed

@@ -7,7 +7,7 @@ from pydantic import BaseModel, conint
 from pydantic_ai import RunContext
 from code_puppy.tools.common import console
-from code_puppy.token_utils import get_tokenizer
+from code_puppy.token_utils import estimate_tokens
 from code_puppy.tools.token_check import token_guard
 # ---------------------------------------------------------------------------
 # Module-level helper functions (exposed for unit tests _and_ used as tools)
@@ -218,8 +218,7 @@ def _read_file(context: RunContext, file_path: str, start_line: int | None = Non
                 # Read the entire file
                 content = f.read()
-            tokenizer = get_tokenizer()
-            num_tokens = len(tokenizer.encode(content))
+            num_tokens = estimate_tokens(content)
             if num_tokens > 10000:
                 raise ValueError("The file is massive, greater than 10,000 tokens which is dangerous to read entirely. Please read this file in chunks.")
             token_guard(num_tokens)
@@ -313,8 +312,7 @@ def list_files(
     context: RunContext, directory: str = ".", recursive: bool = True
 ) -> ListFileOutput:
     list_files_output = _list_files(context, directory, recursive)
-    tokenizer = get_tokenizer()
-    num_tokens = len(tokenizer.encode(list_files_output.model_dump_json()))
+    num_tokens = estimate_tokens(list_files_output.model_dump_json())
     if num_tokens > 10000:
         return ListFileOutput(
             files=[],

{code_puppy-0.0.91 → code_puppy-0.0.93}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "code-puppy"
-version = "0.0.91"
+version = "0.0.93"
 description = "Code generation agent"
 readme = "README.md"
 requires-python = ">=3.10"
@@ -25,7 +25,6 @@ dependencies = [
     "json-repair>=0.46.2",
     "tree-sitter-language-pack>=0.8.0",
     "tree-sitter-typescript>=0.23.2",
-    "tiktoken>=0.11.0",
 ]
 dev-dependencies = [
     "pytest>=8.3.4",

code_puppy-0.0.91/code_puppy/tools/common.py DELETED Viewed

@@ -1,119 +0,0 @@
-import os
-import fnmatch
-from typing import Optional, Tuple
-import tiktoken
-from rapidfuzz.distance import JaroWinkler
-from rich.console import Console
-# get_model_context_length will be imported locally where needed to avoid circular imports
-NO_COLOR = bool(int(os.environ.get("CODE_PUPPY_NO_COLOR", "0")))
-console = Console(no_color=NO_COLOR)
-def get_model_context_length() -> int:
-    """
-    Get the context length for the currently configured model from models.json
-    """
-    # Import locally to avoid circular imports
-    from code_puppy.model_factory import ModelFactory
-    from code_puppy.config import get_model_name
-    import os
-    from pathlib import Path
-    # Load model configuration
-    models_path = os.environ.get("MODELS_JSON_PATH")
-    if not models_path:
-        models_path = Path(__file__).parent.parent / "models.json"
-    else:
-        models_path = Path(models_path)
-    model_configs = ModelFactory.load_config(str(models_path))
-    model_name = get_model_name()
-    # Get context length from model config
-    model_config = model_configs.get(model_name, {})
-    context_length = model_config.get("context_length", 128000)  # Default value
-    # Reserve 10% of context for response
-    return int(context_length)
-# -------------------
-# Shared ignore patterns/helpers
-# -------------------
-IGNORE_PATTERNS = [
-    "**/node_modules/**",
-    "**/node_modules/**/*.js",
-    "node_modules/**",
-    "node_modules",
-    "**/.git/**",
-    "**/.git",
-    ".git/**",
-    ".git",
-    "**/__pycache__/**",
-    "**/__pycache__",
-    "__pycache__/**",
-    "__pycache__",
-    "**/.DS_Store",
-    ".DS_Store",
-    "**/.env",
-    ".env",
-    "**/.venv/**",
-    "**/.venv",
-    "**/venv/**",
-    "**/venv",
-    "**/.idea/**",
-    "**/.idea",
-    "**/.vscode/**",
-    "**/.vscode",
-    "**/dist/**",
-    "**/dist",
-    "**/build/**",
-    "**/build",
-    "**/*.pyc",
-    "**/*.pyo",
-    "**/*.pyd",
-    "**/*.so",
-    "**/*.dll",
-    "**/.*",
-]
-def should_ignore_path(path: str) -> bool:
-    """Return True if *path* matches any pattern in IGNORE_PATTERNS."""
-    for pattern in IGNORE_PATTERNS:
-        if fnmatch.fnmatch(path, pattern):
-            return True
-    return False
-def _find_best_window(
-    haystack_lines: list[str],
-    needle: str,
-) -> Tuple[Optional[Tuple[int, int]], float]:
-    """
-    Return (start, end) indices of the window with the highest
-    Jaro-Winkler similarity to `needle`, along with that score.
-    If nothing clears JW_THRESHOLD, return (None, score).
-    """
-    needle = needle.rstrip("\n")
-    needle_lines = needle.splitlines()
-    win_size = len(needle_lines)
-    best_score = 0.0
-    best_span: Optional[Tuple[int, int]] = None
-    best_window = ""
-    # Pre-join the needle once; join windows on the fly
-    for i in range(len(haystack_lines) - win_size + 1):
-        window = "\n".join(haystack_lines[i : i + win_size])
-        score = JaroWinkler.normalized_similarity(window, needle)
-        if score > best_score:
-            best_score = score
-            best_span = (i, i + win_size)
-            best_window = window
-    console.log(f"Best span: {best_span}")
-    console.log(f"Best window: {best_window}")
-    console.log(f"Best score: {best_score}")
-    return best_span, best_score