code-puppy 0.0.91__tar.gz → 0.0.93__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {code_puppy-0.0.91 → code_puppy-0.0.93}/PKG-INFO +1 -2
  2. {code_puppy-0.0.91 → code_puppy-0.0.93}/code_puppy/main.py +4 -1
  3. {code_puppy-0.0.91 → code_puppy-0.0.93}/code_puppy/message_history_processor.py +7 -9
  4. {code_puppy-0.0.91 → code_puppy-0.0.93}/code_puppy/status_display.py +12 -1
  5. {code_puppy-0.0.91 → code_puppy-0.0.93}/code_puppy/token_utils.py +9 -10
  6. code_puppy-0.0.93/code_puppy/tools/common.py +384 -0
  7. {code_puppy-0.0.91 → code_puppy-0.0.93}/code_puppy/tools/file_operations.py +3 -5
  8. {code_puppy-0.0.91 → code_puppy-0.0.93}/pyproject.toml +1 -2
  9. code_puppy-0.0.91/code_puppy/tools/common.py +0 -119
  10. {code_puppy-0.0.91 → code_puppy-0.0.93}/.gitignore +0 -0
  11. {code_puppy-0.0.91 → code_puppy-0.0.93}/LICENSE +0 -0
  12. {code_puppy-0.0.91 → code_puppy-0.0.93}/README.md +0 -0
  13. {code_puppy-0.0.91 → code_puppy-0.0.93}/code_puppy/__init__.py +0 -0
  14. {code_puppy-0.0.91 → code_puppy-0.0.93}/code_puppy/agent.py +0 -0
  15. {code_puppy-0.0.91 → code_puppy-0.0.93}/code_puppy/agent_prompts.py +0 -0
  16. {code_puppy-0.0.91 → code_puppy-0.0.93}/code_puppy/command_line/__init__.py +0 -0
  17. {code_puppy-0.0.91 → code_puppy-0.0.93}/code_puppy/command_line/file_path_completion.py +0 -0
  18. {code_puppy-0.0.91 → code_puppy-0.0.93}/code_puppy/command_line/meta_command_handler.py +0 -0
  19. {code_puppy-0.0.91 → code_puppy-0.0.93}/code_puppy/command_line/model_picker_completion.py +0 -0
  20. {code_puppy-0.0.91 → code_puppy-0.0.93}/code_puppy/command_line/motd.py +0 -0
  21. {code_puppy-0.0.91 → code_puppy-0.0.93}/code_puppy/command_line/prompt_toolkit_completion.py +0 -0
  22. {code_puppy-0.0.91 → code_puppy-0.0.93}/code_puppy/command_line/utils.py +0 -0
  23. {code_puppy-0.0.91 → code_puppy-0.0.93}/code_puppy/config.py +0 -0
  24. {code_puppy-0.0.91 → code_puppy-0.0.93}/code_puppy/model_factory.py +0 -0
  25. {code_puppy-0.0.91 → code_puppy-0.0.93}/code_puppy/models.json +0 -0
  26. {code_puppy-0.0.91 → code_puppy-0.0.93}/code_puppy/state_management.py +0 -0
  27. {code_puppy-0.0.91 → code_puppy-0.0.93}/code_puppy/summarization_agent.py +0 -0
  28. {code_puppy-0.0.91 → code_puppy-0.0.93}/code_puppy/tools/__init__.py +0 -0
  29. {code_puppy-0.0.91 → code_puppy-0.0.93}/code_puppy/tools/command_runner.py +0 -0
  30. {code_puppy-0.0.91 → code_puppy-0.0.93}/code_puppy/tools/file_modifications.py +0 -0
  31. {code_puppy-0.0.91 → code_puppy-0.0.93}/code_puppy/tools/token_check.py +0 -0
  32. {code_puppy-0.0.91 → code_puppy-0.0.93}/code_puppy/version_checker.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: code-puppy
3
- Version: 0.0.91
3
+ Version: 0.0.93
4
4
  Summary: Code generation agent
5
5
  Author: Michael Pfaffenberger
6
6
  License: MIT
@@ -27,7 +27,6 @@ Requires-Dist: python-dotenv>=1.0.0
27
27
  Requires-Dist: rapidfuzz>=3.13.0
28
28
  Requires-Dist: rich>=13.4.2
29
29
  Requires-Dist: ruff>=0.11.11
30
- Requires-Dist: tiktoken>=0.11.0
31
30
  Requires-Dist: tree-sitter-language-pack>=0.8.0
32
31
  Requires-Dist: tree-sitter-typescript>=0.23.2
33
32
  Description-Content-Type: text/markdown
@@ -38,7 +38,10 @@ def get_secret_file_path():
38
38
 
39
39
  async def main():
40
40
  # Ensure the config directory and puppy.cfg with name info exist (prompt user if needed)
41
- logfire.configure(token="pylf_v1_us_8G5nLznQtHMRsL4hsNG5v3fPWKjyXbysrMgrQ1bV1wRP")
41
+ logfire.configure(
42
+ token="pylf_v1_us_8G5nLznQtHMRsL4hsNG5v3fPWKjyXbysrMgrQ1bV1wRP",
43
+ console=False
44
+ )
42
45
  logfire.instrument_pydantic_ai()
43
46
  ensure_config_exists()
44
47
 
@@ -4,7 +4,6 @@ import os
4
4
  from pathlib import Path
5
5
 
6
6
  import pydantic
7
- import tiktoken
8
7
  from pydantic_ai.messages import (
9
8
  ModelMessage,
10
9
  TextPart,
@@ -16,6 +15,7 @@ from pydantic_ai.messages import (
16
15
  from code_puppy.tools.common import console
17
16
  from code_puppy.model_factory import ModelFactory
18
17
  from code_puppy.config import get_model_name
18
+ from code_puppy.token_utils import estimate_tokens
19
19
 
20
20
  # Import the status display to get token rate info
21
21
  try:
@@ -46,12 +46,12 @@ except ImportError:
46
46
  return None
47
47
 
48
48
 
49
+ # Dummy function for backward compatibility
49
50
  def get_tokenizer_for_model(model_name: str):
50
51
  """
51
- Always use cl100k_base tokenizer regardless of model type.
52
- This is a simple approach that works reasonably well for most models.
52
+ Dummy function that returns None since we're now using len/4 heuristic.
53
53
  """
54
- return tiktoken.get_encoding("cl100k_base")
54
+ return None
55
55
 
56
56
 
57
57
  def stringify_message_part(part) -> str:
@@ -96,17 +96,15 @@ def stringify_message_part(part) -> str:
96
96
 
97
97
  def estimate_tokens_for_message(message: ModelMessage) -> int:
98
98
  """
99
- Estimate the number of tokens in a message using tiktoken with cl100k_base encoding.
100
- This is more accurate than character-based estimation.
99
+ Estimate the number of tokens in a message using the len/4 heuristic.
100
+ This is a simple approximation that works reasonably well for most text.
101
101
  """
102
- tokenizer = get_tokenizer_for_model(get_model_name())
103
102
  total_tokens = 0
104
103
 
105
104
  for part in message.parts:
106
105
  part_str = stringify_message_part(part)
107
106
  if part_str:
108
- tokens = tokenizer.encode(part_str)
109
- total_tokens += len(tokens)
107
+ total_tokens += estimate_tokens(part_str)
110
108
 
111
109
  return max(1, total_tokens)
112
110
 
@@ -104,9 +104,13 @@ class StatusDisplay:
104
104
 
105
105
  def update_token_count(self, tokens: int) -> None:
106
106
  """Update the token count and recalculate the rate"""
107
+ # Reset timing if this is the first update of a new task
107
108
  if self.start_time is None:
108
109
  self.start_time = time.time()
109
110
  self.last_update_time = self.start_time
111
+ # Reset token counters for new task
112
+ self.last_token_count = 0
113
+ self.current_rate = 0.0
110
114
 
111
115
  # Allow for incremental updates (common for streaming) or absolute updates
112
116
  if tokens > self.token_count or tokens < 0:
@@ -204,6 +208,13 @@ class StatusDisplay:
204
208
  avg_rate = self.token_count / elapsed if elapsed > 0 else 0
205
209
  self.console.print(f"[dim]Completed: {self.token_count} tokens in {elapsed:.1f}s ({avg_rate:.1f} t/s avg)[/dim]")
206
210
 
207
- # Reset
211
+ # Reset state
208
212
  self.start_time = None
209
213
  self.token_count = 0
214
+ self.last_update_time = None
215
+ self.last_token_count = 0
216
+ self.current_rate = 0
217
+
218
+ # Reset global rate to 0 to avoid affecting subsequent tasks
219
+ global CURRENT_TOKEN_RATE
220
+ CURRENT_TOKEN_RATE = 0.0
@@ -1,16 +1,17 @@
1
1
  import json
2
- import tiktoken
3
2
 
4
3
  import pydantic
5
4
  from pydantic_ai.messages import ModelMessage
6
5
 
7
6
 
8
- def get_tokenizer():
7
+ def estimate_tokens(text: str) -> int:
9
8
  """
10
- Always use cl100k_base tokenizer regardless of model type.
11
- This is a simple approach that works reasonably well for most models.
9
+ Estimate the number of tokens using the len/4 heuristic.
10
+ This is a simple approximation that works reasonably well for most text.
12
11
  """
13
- return tiktoken.get_encoding("cl100k_base")
12
+ if not text:
13
+ return 0
14
+ return max(1, len(text) // 4)
14
15
 
15
16
 
16
17
  def stringify_message_part(part) -> str:
@@ -55,16 +56,14 @@ def stringify_message_part(part) -> str:
55
56
 
56
57
  def estimate_tokens_for_message(message: ModelMessage) -> int:
57
58
  """
58
- Estimate the number of tokens in a message using tiktoken with cl100k_base encoding.
59
- This is more accurate than character-based estimation.
59
+ Estimate the number of tokens in a message using the len/4 heuristic.
60
+ This is a simple approximation that works reasonably well for most text.
60
61
  """
61
- tokenizer = get_tokenizer()
62
62
  total_tokens = 0
63
63
 
64
64
  for part in message.parts:
65
65
  part_str = stringify_message_part(part)
66
66
  if part_str:
67
- tokens = tokenizer.encode(part_str)
68
- total_tokens += len(tokens)
67
+ total_tokens += estimate_tokens(part_str)
69
68
 
70
69
  return max(1, total_tokens)
@@ -0,0 +1,384 @@
1
+ import os
2
+ import fnmatch
3
+
4
+ from typing import Optional, Tuple
5
+ from rapidfuzz.distance import JaroWinkler
6
+ from rich.console import Console
7
+
8
+ # get_model_context_length will be imported locally where needed to avoid circular imports
9
+
10
+ NO_COLOR = bool(int(os.environ.get("CODE_PUPPY_NO_COLOR", "0")))
11
+ console = Console(no_color=NO_COLOR)
12
+
13
+
14
+ def get_model_context_length() -> int:
15
+ """
16
+ Get the context length for the currently configured model from models.json
17
+ """
18
+ # Import locally to avoid circular imports
19
+ from code_puppy.model_factory import ModelFactory
20
+ from code_puppy.config import get_model_name
21
+ import os
22
+ from pathlib import Path
23
+
24
+ # Load model configuration
25
+ models_path = os.environ.get("MODELS_JSON_PATH")
26
+ if not models_path:
27
+ models_path = Path(__file__).parent.parent / "models.json"
28
+ else:
29
+ models_path = Path(models_path)
30
+
31
+ model_configs = ModelFactory.load_config(str(models_path))
32
+ model_name = get_model_name()
33
+
34
+ # Get context length from model config
35
+ model_config = model_configs.get(model_name, {})
36
+ context_length = model_config.get("context_length", 128000) # Default value
37
+
38
+ # Reserve 10% of context for response
39
+ return int(context_length)
40
+
41
+
42
+ # -------------------
43
+ # Shared ignore patterns/helpers
44
+ # -------------------
45
+ IGNORE_PATTERNS = [
46
+ # Version control
47
+ "**/.git/**",
48
+ "**/.git",
49
+ ".git/**",
50
+ ".git",
51
+ "**/.svn/**",
52
+ "**/.hg/**",
53
+ "**/.bzr/**",
54
+ # Node.js / JavaScript / TypeScript
55
+ "**/node_modules/**",
56
+ "**/node_modules/**/*.js",
57
+ "node_modules/**",
58
+ "node_modules",
59
+ "**/npm-debug.log*",
60
+ "**/yarn-debug.log*",
61
+ "**/yarn-error.log*",
62
+ "**/pnpm-debug.log*",
63
+ "**/.npm/**",
64
+ "**/.yarn/**",
65
+ "**/.pnpm-store/**",
66
+ "**/coverage/**",
67
+ "**/.nyc_output/**",
68
+ "**/dist/**",
69
+ "**/dist",
70
+ "**/build/**",
71
+ "**/build",
72
+ "**/.next/**",
73
+ "**/.nuxt/**",
74
+ "**/out/**",
75
+ "**/.cache/**",
76
+ "**/.parcel-cache/**",
77
+ "**/.vite/**",
78
+ "**/storybook-static/**",
79
+ # Python
80
+ "**/__pycache__/**",
81
+ "**/__pycache__",
82
+ "__pycache__/**",
83
+ "__pycache__",
84
+ "**/*.pyc",
85
+ "**/*.pyo",
86
+ "**/*.pyd",
87
+ "**/.pytest_cache/**",
88
+ "**/.mypy_cache/**",
89
+ "**/.coverage",
90
+ "**/htmlcov/**",
91
+ "**/.tox/**",
92
+ "**/.nox/**",
93
+ "**/site-packages/**",
94
+ "**/.venv/**",
95
+ "**/.venv",
96
+ "**/venv/**",
97
+ "**/venv",
98
+ "**/env/**",
99
+ "**/ENV/**",
100
+ "**/.env",
101
+ "**/pip-wheel-metadata/**",
102
+ "**/*.egg-info/**",
103
+ "**/dist/**",
104
+ "**/wheels/**",
105
+ # Java (Maven, Gradle, SBT)
106
+ "**/target/**",
107
+ "**/target",
108
+ "**/build/**",
109
+ "**/build",
110
+ "**/.gradle/**",
111
+ "**/gradle-app.setting",
112
+ "**/*.class",
113
+ "**/*.jar",
114
+ "**/*.war",
115
+ "**/*.ear",
116
+ "**/*.nar",
117
+ "**/hs_err_pid*",
118
+ "**/.classpath",
119
+ "**/.project",
120
+ "**/.settings/**",
121
+ "**/bin/**",
122
+ "**/project/target/**",
123
+ "**/project/project/**",
124
+ # Go
125
+ "**/vendor/**",
126
+ "**/*.exe",
127
+ "**/*.exe~",
128
+ "**/*.dll",
129
+ "**/*.so",
130
+ "**/*.dylib",
131
+ "**/*.test",
132
+ "**/*.out",
133
+ "**/go.work",
134
+ "**/go.work.sum",
135
+ # Rust
136
+ "**/target/**",
137
+ "**/Cargo.lock",
138
+ "**/*.pdb",
139
+ # Ruby
140
+ "**/vendor/**",
141
+ "**/.bundle/**",
142
+ "**/Gemfile.lock",
143
+ "**/*.gem",
144
+ "**/.rvm/**",
145
+ "**/.rbenv/**",
146
+ "**/coverage/**",
147
+ "**/.yardoc/**",
148
+ "**/doc/**",
149
+ "**/rdoc/**",
150
+ "**/.sass-cache/**",
151
+ "**/.jekyll-cache/**",
152
+ "**/_site/**",
153
+ # PHP
154
+ "**/vendor/**",
155
+ "**/composer.lock",
156
+ "**/.phpunit.result.cache",
157
+ "**/storage/logs/**",
158
+ "**/storage/framework/cache/**",
159
+ "**/storage/framework/sessions/**",
160
+ "**/storage/framework/testing/**",
161
+ "**/storage/framework/views/**",
162
+ "**/bootstrap/cache/**",
163
+ # .NET / C#
164
+ "**/bin/**",
165
+ "**/obj/**",
166
+ "**/packages/**",
167
+ "**/*.cache",
168
+ "**/*.dll",
169
+ "**/*.exe",
170
+ "**/*.pdb",
171
+ "**/*.user",
172
+ "**/*.suo",
173
+ "**/.vs/**",
174
+ "**/TestResults/**",
175
+ "**/BenchmarkDotNet.Artifacts/**",
176
+ # C/C++
177
+ "**/*.o",
178
+ "**/*.obj",
179
+ "**/*.so",
180
+ "**/*.dll",
181
+ "**/*.a",
182
+ "**/*.lib",
183
+ "**/*.dylib",
184
+ "**/*.exe",
185
+ "**/CMakeFiles/**",
186
+ "**/CMakeCache.txt",
187
+ "**/cmake_install.cmake",
188
+ "**/Makefile",
189
+ "**/compile_commands.json",
190
+ "**/.deps/**",
191
+ "**/.libs/**",
192
+ "**/autom4te.cache/**",
193
+ # Perl
194
+ "**/blib/**",
195
+ "**/_build/**",
196
+ "**/Build",
197
+ "**/Build.bat",
198
+ "**/*.tmp",
199
+ "**/*.bak",
200
+ "**/*.old",
201
+ "**/Makefile.old",
202
+ "**/MANIFEST.bak",
203
+ "**/META.yml",
204
+ "**/META.json",
205
+ "**/MYMETA.*",
206
+ "**/.prove",
207
+ # Scala
208
+ "**/target/**",
209
+ "**/project/target/**",
210
+ "**/project/project/**",
211
+ "**/.bloop/**",
212
+ "**/.metals/**",
213
+ "**/.ammonite/**",
214
+ "**/*.class",
215
+ # Elixir
216
+ "**/_build/**",
217
+ "**/deps/**",
218
+ "**/*.beam",
219
+ "**/.fetch",
220
+ "**/erl_crash.dump",
221
+ "**/*.ez",
222
+ "**/doc/**",
223
+ "**/.elixir_ls/**",
224
+ # Swift
225
+ "**/.build/**",
226
+ "**/Packages/**",
227
+ "**/*.xcodeproj/**",
228
+ "**/*.xcworkspace/**",
229
+ "**/DerivedData/**",
230
+ "**/xcuserdata/**",
231
+ "**/*.dSYM/**",
232
+ # Kotlin
233
+ "**/build/**",
234
+ "**/.gradle/**",
235
+ "**/*.class",
236
+ "**/*.jar",
237
+ "**/*.kotlin_module",
238
+ # Clojure
239
+ "**/target/**",
240
+ "**/.lein-**",
241
+ "**/.nrepl-port",
242
+ "**/pom.xml.asc",
243
+ "**/*.jar",
244
+ "**/*.class",
245
+ # Dart/Flutter
246
+ "**/.dart_tool/**",
247
+ "**/build/**",
248
+ "**/.packages",
249
+ "**/pubspec.lock",
250
+ "**/*.g.dart",
251
+ "**/*.freezed.dart",
252
+ "**/*.gr.dart",
253
+ # Haskell
254
+ "**/dist/**",
255
+ "**/dist-newstyle/**",
256
+ "**/.stack-work/**",
257
+ "**/*.hi",
258
+ "**/*.o",
259
+ "**/*.prof",
260
+ "**/*.aux",
261
+ "**/*.hp",
262
+ "**/*.eventlog",
263
+ "**/*.tix",
264
+ # Erlang
265
+ "**/ebin/**",
266
+ "**/rel/**",
267
+ "**/deps/**",
268
+ "**/*.beam",
269
+ "**/*.boot",
270
+ "**/*.plt",
271
+ "**/erl_crash.dump",
272
+ # Common cache and temp directories
273
+ "**/.cache/**",
274
+ "**/cache/**",
275
+ "**/tmp/**",
276
+ "**/temp/**",
277
+ "**/.tmp/**",
278
+ "**/.temp/**",
279
+ "**/logs/**",
280
+ "**/*.log",
281
+ "**/*.log.*",
282
+ # IDE and editor files
283
+ "**/.idea/**",
284
+ "**/.idea",
285
+ "**/.vscode/**",
286
+ "**/.vscode",
287
+ "**/*.swp",
288
+ "**/*.swo",
289
+ "**/*~",
290
+ "**/.#*",
291
+ "**/#*#",
292
+ "**/.emacs.d/auto-save-list/**",
293
+ "**/.vim/**",
294
+ "**/.netrwhist",
295
+ "**/Session.vim",
296
+ "**/.sublime-project",
297
+ "**/.sublime-workspace",
298
+ # OS-specific files
299
+ "**/.DS_Store",
300
+ ".DS_Store",
301
+ "**/Thumbs.db",
302
+ "**/Desktop.ini",
303
+ "**/.directory",
304
+ "**/*.lnk",
305
+ # Common artifacts
306
+ "**/*.orig",
307
+ "**/*.rej",
308
+ "**/*.patch",
309
+ "**/*.diff",
310
+ "**/.*.orig",
311
+ "**/.*.rej",
312
+ # Backup files
313
+ "**/*~",
314
+ "**/*.bak",
315
+ "**/*.backup",
316
+ "**/*.old",
317
+ "**/*.save",
318
+ # Hidden files (but be careful with this one)
319
+ # "**/.*", # Commented out as it might be too aggressive
320
+ ]
321
+
322
+
323
+ def should_ignore_path(path: str) -> bool:
324
+ """Return True if *path* matches any pattern in IGNORE_PATTERNS."""
325
+ # Convert path to Path object for better pattern matching
326
+ path_obj = Path(path)
327
+
328
+ for pattern in IGNORE_PATTERNS:
329
+ # Try pathlib's match method which handles ** patterns properly
330
+ try:
331
+ if path_obj.match(pattern):
332
+ return True
333
+ except ValueError:
334
+ # If pathlib can't handle the pattern, fall back to fnmatch
335
+ if fnmatch.fnmatch(path, pattern):
336
+ return True
337
+
338
+ # Additional check: if pattern contains **, try matching against
339
+ # different parts of the path to handle edge cases
340
+ if "**" in pattern:
341
+ # Convert pattern to handle different path representations
342
+ simplified_pattern = pattern.replace("**/", "").replace("/**", "")
343
+
344
+ # Check if any part of the path matches the simplified pattern
345
+ path_parts = path_obj.parts
346
+ for i in range(len(path_parts)):
347
+ subpath = Path(*path_parts[i:])
348
+ if fnmatch.fnmatch(str(subpath), simplified_pattern):
349
+ return True
350
+ # Also check individual parts
351
+ if fnmatch.fnmatch(path_parts[i], simplified_pattern):
352
+ return True
353
+
354
+ return False
355
+
356
+
357
+ def _find_best_window(
358
+ haystack_lines: list[str],
359
+ needle: str,
360
+ ) -> Tuple[Optional[Tuple[int, int]], float]:
361
+ """
362
+ Return (start, end) indices of the window with the highest
363
+ Jaro-Winkler similarity to `needle`, along with that score.
364
+ If nothing clears JW_THRESHOLD, return (None, score).
365
+ """
366
+ needle = needle.rstrip("\n")
367
+ needle_lines = needle.splitlines()
368
+ win_size = len(needle_lines)
369
+ best_score = 0.0
370
+ best_span: Optional[Tuple[int, int]] = None
371
+ best_window = ""
372
+ # Pre-join the needle once; join windows on the fly
373
+ for i in range(len(haystack_lines) - win_size + 1):
374
+ window = "\n".join(haystack_lines[i : i + win_size])
375
+ score = JaroWinkler.normalized_similarity(window, needle)
376
+ if score > best_score:
377
+ best_score = score
378
+ best_span = (i, i + win_size)
379
+ best_window = window
380
+
381
+ console.log(f"Best span: {best_span}")
382
+ console.log(f"Best window: {best_window}")
383
+ console.log(f"Best score: {best_score}")
384
+ return best_span, best_score
@@ -7,7 +7,7 @@ from pydantic import BaseModel, conint
7
7
  from pydantic_ai import RunContext
8
8
 
9
9
  from code_puppy.tools.common import console
10
- from code_puppy.token_utils import get_tokenizer
10
+ from code_puppy.token_utils import estimate_tokens
11
11
  from code_puppy.tools.token_check import token_guard
12
12
  # ---------------------------------------------------------------------------
13
13
  # Module-level helper functions (exposed for unit tests _and_ used as tools)
@@ -218,8 +218,7 @@ def _read_file(context: RunContext, file_path: str, start_line: int | None = Non
218
218
  # Read the entire file
219
219
  content = f.read()
220
220
 
221
- tokenizer = get_tokenizer()
222
- num_tokens = len(tokenizer.encode(content))
221
+ num_tokens = estimate_tokens(content)
223
222
  if num_tokens > 10000:
224
223
  raise ValueError("The file is massive, greater than 10,000 tokens which is dangerous to read entirely. Please read this file in chunks.")
225
224
  token_guard(num_tokens)
@@ -313,8 +312,7 @@ def list_files(
313
312
  context: RunContext, directory: str = ".", recursive: bool = True
314
313
  ) -> ListFileOutput:
315
314
  list_files_output = _list_files(context, directory, recursive)
316
- tokenizer = get_tokenizer()
317
- num_tokens = len(tokenizer.encode(list_files_output.model_dump_json()))
315
+ num_tokens = estimate_tokens(list_files_output.model_dump_json())
318
316
  if num_tokens > 10000:
319
317
  return ListFileOutput(
320
318
  files=[],
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "code-puppy"
7
- version = "0.0.91"
7
+ version = "0.0.93"
8
8
  description = "Code generation agent"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -25,7 +25,6 @@ dependencies = [
25
25
  "json-repair>=0.46.2",
26
26
  "tree-sitter-language-pack>=0.8.0",
27
27
  "tree-sitter-typescript>=0.23.2",
28
- "tiktoken>=0.11.0",
29
28
  ]
30
29
  dev-dependencies = [
31
30
  "pytest>=8.3.4",
@@ -1,119 +0,0 @@
1
- import os
2
- import fnmatch
3
-
4
- from typing import Optional, Tuple
5
- import tiktoken
6
- from rapidfuzz.distance import JaroWinkler
7
- from rich.console import Console
8
-
9
- # get_model_context_length will be imported locally where needed to avoid circular imports
10
-
11
- NO_COLOR = bool(int(os.environ.get("CODE_PUPPY_NO_COLOR", "0")))
12
- console = Console(no_color=NO_COLOR)
13
-
14
-
15
- def get_model_context_length() -> int:
16
- """
17
- Get the context length for the currently configured model from models.json
18
- """
19
- # Import locally to avoid circular imports
20
- from code_puppy.model_factory import ModelFactory
21
- from code_puppy.config import get_model_name
22
- import os
23
- from pathlib import Path
24
-
25
- # Load model configuration
26
- models_path = os.environ.get("MODELS_JSON_PATH")
27
- if not models_path:
28
- models_path = Path(__file__).parent.parent / "models.json"
29
- else:
30
- models_path = Path(models_path)
31
-
32
- model_configs = ModelFactory.load_config(str(models_path))
33
- model_name = get_model_name()
34
-
35
- # Get context length from model config
36
- model_config = model_configs.get(model_name, {})
37
- context_length = model_config.get("context_length", 128000) # Default value
38
-
39
- # Reserve 10% of context for response
40
- return int(context_length)
41
-
42
-
43
- # -------------------
44
- # Shared ignore patterns/helpers
45
- # -------------------
46
- IGNORE_PATTERNS = [
47
- "**/node_modules/**",
48
- "**/node_modules/**/*.js",
49
- "node_modules/**",
50
- "node_modules",
51
- "**/.git/**",
52
- "**/.git",
53
- ".git/**",
54
- ".git",
55
- "**/__pycache__/**",
56
- "**/__pycache__",
57
- "__pycache__/**",
58
- "__pycache__",
59
- "**/.DS_Store",
60
- ".DS_Store",
61
- "**/.env",
62
- ".env",
63
- "**/.venv/**",
64
- "**/.venv",
65
- "**/venv/**",
66
- "**/venv",
67
- "**/.idea/**",
68
- "**/.idea",
69
- "**/.vscode/**",
70
- "**/.vscode",
71
- "**/dist/**",
72
- "**/dist",
73
- "**/build/**",
74
- "**/build",
75
- "**/*.pyc",
76
- "**/*.pyo",
77
- "**/*.pyd",
78
- "**/*.so",
79
- "**/*.dll",
80
- "**/.*",
81
- ]
82
-
83
-
84
- def should_ignore_path(path: str) -> bool:
85
- """Return True if *path* matches any pattern in IGNORE_PATTERNS."""
86
- for pattern in IGNORE_PATTERNS:
87
- if fnmatch.fnmatch(path, pattern):
88
- return True
89
- return False
90
-
91
-
92
- def _find_best_window(
93
- haystack_lines: list[str],
94
- needle: str,
95
- ) -> Tuple[Optional[Tuple[int, int]], float]:
96
- """
97
- Return (start, end) indices of the window with the highest
98
- Jaro-Winkler similarity to `needle`, along with that score.
99
- If nothing clears JW_THRESHOLD, return (None, score).
100
- """
101
- needle = needle.rstrip("\n")
102
- needle_lines = needle.splitlines()
103
- win_size = len(needle_lines)
104
- best_score = 0.0
105
- best_span: Optional[Tuple[int, int]] = None
106
- best_window = ""
107
- # Pre-join the needle once; join windows on the fly
108
- for i in range(len(haystack_lines) - win_size + 1):
109
- window = "\n".join(haystack_lines[i : i + win_size])
110
- score = JaroWinkler.normalized_similarity(window, needle)
111
- if score > best_score:
112
- best_score = score
113
- best_span = (i, i + win_size)
114
- best_window = window
115
-
116
- console.log(f"Best span: {best_span}")
117
- console.log(f"Best window: {best_window}")
118
- console.log(f"Best score: {best_score}")
119
- return best_span, best_score
File without changes
File without changes
File without changes