cortex-llm 1.0.10__tar.gz → 1.0.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/PKG-INFO +3 -1
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/README.md +2 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/__init__.py +1 -1
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/config.py +46 -10
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/inference_engine.py +69 -32
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/tools/fs_ops.py +60 -13
- cortex_llm-1.0.11/cortex/tools/search.py +135 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/tools/tool_runner.py +68 -8
- cortex_llm-1.0.11/cortex/ui/box_rendering.py +97 -0
- cortex_llm-1.0.11/cortex/ui/cli.py +804 -0
- cortex_llm-1.0.11/cortex/ui/cli_commands.py +61 -0
- cortex_llm-1.0.11/cortex/ui/cli_prompt.py +96 -0
- cortex_llm-1.0.11/cortex/ui/help_ui.py +66 -0
- cortex_llm-1.0.11/cortex/ui/input_box.py +205 -0
- cortex_llm-1.0.11/cortex/ui/model_ui.py +408 -0
- cortex_llm-1.0.11/cortex/ui/status_ui.py +78 -0
- cortex_llm-1.0.11/cortex/ui/tool_activity.py +82 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex_llm.egg-info/PKG-INFO +3 -1
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex_llm.egg-info/SOURCES.txt +11 -1
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/pyproject.toml +1 -1
- cortex_llm-1.0.11/tests/test_stream_normalizer.py +42 -0
- cortex_llm-1.0.11/tests/test_tools.py +163 -0
- cortex_llm-1.0.10/cortex/tools/search.py +0 -70
- cortex_llm-1.0.10/cortex/ui/cli.py +0 -1810
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/LICENSE +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/__main__.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/conversation_manager.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/fine_tuning/__init__.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/fine_tuning/dataset.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/fine_tuning/mlx_lora_trainer.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/fine_tuning/trainer.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/fine_tuning/wizard.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/gpu_validator.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/metal/__init__.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/metal/gpu_validator.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/metal/memory_pool.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/metal/mlx_accelerator.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/metal/mlx_compat.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/metal/mlx_converter.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/metal/mps_optimizer.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/metal/optimizer.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/metal/performance_profiler.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/model_downloader.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/model_manager.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/quantization/__init__.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/quantization/dynamic_quantizer.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/template_registry/__init__.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/template_registry/auto_detector.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/template_registry/config_manager.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/template_registry/interactive.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/template_registry/registry.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/template_registry/template_profiles/__init__.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/template_registry/template_profiles/base.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/template_registry/template_profiles/complex/__init__.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/template_registry/template_profiles/complex/reasoning.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/template_registry/template_profiles/standard/__init__.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/template_registry/template_profiles/standard/alpaca.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/template_registry/template_profiles/standard/chatml.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/template_registry/template_profiles/standard/gemma.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/template_registry/template_profiles/standard/llama.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/template_registry/template_profiles/standard/simple.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/tools/__init__.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/tools/errors.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/tools/protocol.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/ui/__init__.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/ui/markdown_render.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex/ui/terminal_app.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex_llm.egg-info/dependency_links.txt +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex_llm.egg-info/entry_points.txt +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex_llm.egg-info/not-zip-safe +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex_llm.egg-info/requires.txt +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/cortex_llm.egg-info/top_level.txt +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/setup.cfg +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/setup.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/tests/test_apple_silicon.py +0 -0
- {cortex_llm-1.0.10 → cortex_llm-1.0.11}/tests/test_metal_optimization.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cortex-llm
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.11
|
|
4
4
|
Summary: GPU-Accelerated LLM Terminal for Apple Silicon
|
|
5
5
|
Home-page: https://github.com/faisalmumtaz/Cortex
|
|
6
6
|
Author: Cortex Development Team
|
|
@@ -60,6 +60,8 @@ Dynamic: requires-python
|
|
|
60
60
|
|
|
61
61
|
GPU-accelerated local LLMs on Apple Silicon, built for the terminal.
|
|
62
62
|
|
|
63
|
+

|
|
64
|
+
|
|
63
65
|
Cortex is a fast, native CLI for running and fine-tuning LLMs on Apple Silicon using MLX and Metal. It automatically detects chat templates, supports multiple model formats, and keeps your workflow inside the terminal.
|
|
64
66
|
|
|
65
67
|
## Highlights
|
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
GPU-accelerated local LLMs on Apple Silicon, built for the terminal.
|
|
4
4
|
|
|
5
|
+

|
|
6
|
+
|
|
5
7
|
Cortex is a fast, native CLI for running and fine-tuning LLMs on Apple Silicon using MLX and Metal. It automatically detects chat templates, supports multiple model formats, and keeps your workflow inside the terminal.
|
|
6
8
|
|
|
7
9
|
## Highlights
|
|
@@ -5,7 +5,7 @@ A high-performance terminal interface for running Hugging Face LLMs locally
|
|
|
5
5
|
with exclusive GPU acceleration via Metal Performance Shaders (MPS) and MLX.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
__version__ = "1.0.
|
|
8
|
+
__version__ = "1.0.11"
|
|
9
9
|
__author__ = "Cortex Development Team"
|
|
10
10
|
__license__ = "MIT"
|
|
11
11
|
|
|
@@ -146,18 +146,21 @@ class DeveloperConfig(BaseModel):
|
|
|
146
146
|
|
|
147
147
|
class PathsConfig(BaseModel):
|
|
148
148
|
"""Path configuration."""
|
|
149
|
-
claude_md_path: Path = Field(default_factory=lambda: Path("./CLAUDE.md"))
|
|
150
149
|
templates_dir: Path = Field(default_factory=lambda: Path.home() / ".cortex" / "templates")
|
|
151
150
|
plugins_dir: Path = Field(default_factory=lambda: Path.home() / ".cortex" / "plugins")
|
|
152
151
|
|
|
153
152
|
class Config:
|
|
154
153
|
"""Main configuration class for Cortex."""
|
|
155
|
-
|
|
154
|
+
|
|
155
|
+
# State file for runtime state (not committed to git)
|
|
156
|
+
STATE_FILE = Path.home() / ".cortex" / "state.yaml"
|
|
157
|
+
|
|
156
158
|
def __init__(self, config_path: Optional[Path] = None):
|
|
157
159
|
"""Initialize configuration."""
|
|
158
160
|
self.config_path = config_path or Path("config.yaml")
|
|
159
161
|
self._raw_config: Dict[str, Any] = {}
|
|
160
|
-
|
|
162
|
+
self._state: Dict[str, Any] = {}
|
|
163
|
+
|
|
161
164
|
self.gpu: GPUConfig
|
|
162
165
|
self.memory: MemoryConfig
|
|
163
166
|
self.performance: PerformanceConfig
|
|
@@ -169,8 +172,9 @@ class Config:
|
|
|
169
172
|
self.system: SystemConfig
|
|
170
173
|
self.developer: DeveloperConfig
|
|
171
174
|
self.paths: PathsConfig
|
|
172
|
-
|
|
175
|
+
|
|
173
176
|
self.load()
|
|
177
|
+
self._load_state()
|
|
174
178
|
|
|
175
179
|
def load(self) -> None:
|
|
176
180
|
"""Load configuration from YAML file."""
|
|
@@ -273,7 +277,7 @@ class Config:
|
|
|
273
277
|
|
|
274
278
|
self.paths = PathsConfig(**self._get_section({
|
|
275
279
|
k: v for k, v in self._raw_config.items()
|
|
276
|
-
if k in ["
|
|
280
|
+
if k in ["templates_dir", "plugins_dir"]
|
|
277
281
|
}))
|
|
278
282
|
|
|
279
283
|
except Exception as e:
|
|
@@ -303,26 +307,58 @@ class Config:
|
|
|
303
307
|
def save(self, path: Optional[Path] = None) -> None:
|
|
304
308
|
"""Save configuration to YAML file."""
|
|
305
309
|
save_path = path or self.config_path
|
|
306
|
-
|
|
310
|
+
|
|
311
|
+
# Keys that belong in state file, not config file
|
|
312
|
+
state_keys = {"last_used_model"}
|
|
313
|
+
|
|
307
314
|
# Convert Path objects to strings for YAML serialization
|
|
308
315
|
config_dict = {}
|
|
309
316
|
for section in [self.gpu, self.memory, self.performance, self.inference,
|
|
310
317
|
self.model, self.ui, self.logging, self.conversation,
|
|
311
318
|
self.system, self.developer, self.paths]:
|
|
312
319
|
section_dict = section.model_dump()
|
|
313
|
-
# Convert Path objects to strings
|
|
320
|
+
# Convert Path objects to strings and exclude state keys
|
|
314
321
|
for key, value in section_dict.items():
|
|
322
|
+
if key in state_keys:
|
|
323
|
+
continue # Skip state keys - they go in state file
|
|
315
324
|
if isinstance(value, Path):
|
|
316
325
|
section_dict[key] = str(value)
|
|
326
|
+
# Remove state keys from section_dict
|
|
327
|
+
for key in state_keys:
|
|
328
|
+
section_dict.pop(key, None)
|
|
317
329
|
config_dict.update(section_dict)
|
|
318
|
-
|
|
330
|
+
|
|
319
331
|
with open(save_path, 'w') as f:
|
|
320
332
|
yaml.dump(config_dict, f, default_flow_style=False, sort_keys=False)
|
|
321
333
|
|
|
334
|
+
def _load_state(self) -> None:
|
|
335
|
+
"""Load runtime state from state file."""
|
|
336
|
+
if self.STATE_FILE.exists():
|
|
337
|
+
try:
|
|
338
|
+
with open(self.STATE_FILE, 'r') as f:
|
|
339
|
+
self._state = yaml.safe_load(f) or {}
|
|
340
|
+
# Apply state to model config
|
|
341
|
+
if "last_used_model" in self._state:
|
|
342
|
+
self.model.last_used_model = self._state["last_used_model"]
|
|
343
|
+
except Exception as e:
|
|
344
|
+
print(f"Warning: Failed to load state from {self.STATE_FILE}: {e}")
|
|
345
|
+
self._state = {}
|
|
346
|
+
|
|
347
|
+
def _save_state(self) -> None:
|
|
348
|
+
"""Save runtime state to state file."""
|
|
349
|
+
# Ensure directory exists
|
|
350
|
+
self.STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
|
|
351
|
+
try:
|
|
352
|
+
with open(self.STATE_FILE, 'w') as f:
|
|
353
|
+
yaml.dump(self._state, f, default_flow_style=False)
|
|
354
|
+
except Exception as e:
|
|
355
|
+
print(f"Warning: Failed to save state to {self.STATE_FILE}: {e}")
|
|
356
|
+
|
|
322
357
|
def update_last_used_model(self, model_name: str) -> None:
|
|
323
|
-
"""Update the last used model and save to
|
|
358
|
+
"""Update the last used model and save to state file."""
|
|
324
359
|
self.model.last_used_model = model_name
|
|
325
|
-
self.
|
|
360
|
+
self._state["last_used_model"] = model_name
|
|
361
|
+
self._save_state()
|
|
326
362
|
|
|
327
363
|
def __repr__(self) -> str:
|
|
328
364
|
"""String representation."""
|
|
@@ -82,6 +82,62 @@ class GenerationRequest:
|
|
|
82
82
|
if self.stop_sequences is None:
|
|
83
83
|
self.stop_sequences = []
|
|
84
84
|
|
|
85
|
+
|
|
86
|
+
class StreamDeltaNormalizer:
|
|
87
|
+
"""Normalize streaming chunks to deltas, handling cumulative or overlapping output."""
|
|
88
|
+
|
|
89
|
+
def __init__(self, max_overlap: int = 4096, min_cumulative_length: int = 32) -> None:
|
|
90
|
+
self._total_text = ""
|
|
91
|
+
self._max_overlap = max_overlap
|
|
92
|
+
self._min_cumulative_length = min_cumulative_length
|
|
93
|
+
self._cumulative_mode = False
|
|
94
|
+
|
|
95
|
+
def normalize(self, chunk: Any) -> str:
|
|
96
|
+
if chunk is None:
|
|
97
|
+
return ""
|
|
98
|
+
if not isinstance(chunk, str):
|
|
99
|
+
chunk = str(chunk)
|
|
100
|
+
if not chunk:
|
|
101
|
+
return ""
|
|
102
|
+
|
|
103
|
+
if not self._total_text:
|
|
104
|
+
self._total_text = chunk
|
|
105
|
+
return chunk
|
|
106
|
+
|
|
107
|
+
if not self._cumulative_mode:
|
|
108
|
+
if len(chunk) > len(self._total_text) and chunk.startswith(self._total_text):
|
|
109
|
+
self._cumulative_mode = True
|
|
110
|
+
delta = chunk[len(self._total_text):]
|
|
111
|
+
self._total_text = chunk
|
|
112
|
+
return delta
|
|
113
|
+
if chunk == self._total_text and len(chunk) >= self._min_cumulative_length:
|
|
114
|
+
# Likely a cumulative stream repeating the full text; don't re-emit.
|
|
115
|
+
self._cumulative_mode = True
|
|
116
|
+
return ""
|
|
117
|
+
# Default to delta mode to avoid dropping legitimate repeats.
|
|
118
|
+
self._total_text += chunk
|
|
119
|
+
return chunk
|
|
120
|
+
|
|
121
|
+
# Cumulative mode: emit only new suffix.
|
|
122
|
+
if chunk.startswith(self._total_text):
|
|
123
|
+
delta = chunk[len(self._total_text):]
|
|
124
|
+
self._total_text = chunk
|
|
125
|
+
return delta
|
|
126
|
+
|
|
127
|
+
# Handle partial overlap in cumulative streams.
|
|
128
|
+
max_overlap = min(len(self._total_text), len(chunk), self._max_overlap)
|
|
129
|
+
if max_overlap > 0:
|
|
130
|
+
tail = self._total_text[-max_overlap:]
|
|
131
|
+
for i in range(max_overlap, 0, -1):
|
|
132
|
+
if tail[-i:] == chunk[:i]:
|
|
133
|
+
delta = chunk[i:]
|
|
134
|
+
self._total_text += delta
|
|
135
|
+
return delta
|
|
136
|
+
|
|
137
|
+
# Fallback: treat as fresh delta to avoid loss.
|
|
138
|
+
self._total_text += chunk
|
|
139
|
+
return chunk
|
|
140
|
+
|
|
85
141
|
class InferenceEngine:
|
|
86
142
|
"""GPU-accelerated inference engine."""
|
|
87
143
|
|
|
@@ -243,33 +299,7 @@ class InferenceEngine:
|
|
|
243
299
|
tokens_generated = 0
|
|
244
300
|
first_token_time = None
|
|
245
301
|
last_metrics_update = time.time()
|
|
246
|
-
|
|
247
|
-
stream_cumulative = False
|
|
248
|
-
|
|
249
|
-
def normalize_stream_chunk(chunk: Any) -> str:
|
|
250
|
-
"""Normalize streaming output to delta chunks when backend yields cumulative text."""
|
|
251
|
-
nonlocal stream_total_text, stream_cumulative
|
|
252
|
-
if chunk is None:
|
|
253
|
-
return ""
|
|
254
|
-
if not isinstance(chunk, str):
|
|
255
|
-
chunk = str(chunk)
|
|
256
|
-
|
|
257
|
-
if stream_cumulative:
|
|
258
|
-
if chunk.startswith(stream_total_text):
|
|
259
|
-
delta = chunk[len(stream_total_text):]
|
|
260
|
-
stream_total_text = chunk
|
|
261
|
-
return delta
|
|
262
|
-
stream_total_text += chunk
|
|
263
|
-
return chunk
|
|
264
|
-
|
|
265
|
-
if stream_total_text and len(chunk) > len(stream_total_text) and chunk.startswith(stream_total_text):
|
|
266
|
-
stream_cumulative = True
|
|
267
|
-
delta = chunk[len(stream_total_text):]
|
|
268
|
-
stream_total_text = chunk
|
|
269
|
-
return delta
|
|
270
|
-
|
|
271
|
-
stream_total_text += chunk
|
|
272
|
-
return chunk
|
|
302
|
+
normalizer = StreamDeltaNormalizer() if request.stream else None
|
|
273
303
|
|
|
274
304
|
try:
|
|
275
305
|
# Use MLX accelerator's optimized generation if available
|
|
@@ -290,7 +320,7 @@ class InferenceEngine:
|
|
|
290
320
|
self.status = InferenceStatus.CANCELLED
|
|
291
321
|
break
|
|
292
322
|
|
|
293
|
-
delta =
|
|
323
|
+
delta = normalizer.normalize(token) if normalizer else str(token)
|
|
294
324
|
if not delta:
|
|
295
325
|
continue
|
|
296
326
|
|
|
@@ -365,7 +395,7 @@ class InferenceEngine:
|
|
|
365
395
|
else:
|
|
366
396
|
token = str(response)
|
|
367
397
|
|
|
368
|
-
delta =
|
|
398
|
+
delta = normalizer.normalize(token) if normalizer else token
|
|
369
399
|
if request.stream and not delta:
|
|
370
400
|
continue
|
|
371
401
|
|
|
@@ -477,6 +507,7 @@ class InferenceEngine:
|
|
|
477
507
|
if request.stream:
|
|
478
508
|
from transformers import TextIteratorStreamer
|
|
479
509
|
|
|
510
|
+
normalizer = StreamDeltaNormalizer()
|
|
480
511
|
streamer = TextIteratorStreamer(
|
|
481
512
|
tokenizer,
|
|
482
513
|
skip_prompt=True,
|
|
@@ -499,6 +530,10 @@ class InferenceEngine:
|
|
|
499
530
|
if self._cancel_event.is_set():
|
|
500
531
|
self.status = InferenceStatus.CANCELLED
|
|
501
532
|
break
|
|
533
|
+
|
|
534
|
+
delta = normalizer.normalize(token)
|
|
535
|
+
if not delta:
|
|
536
|
+
continue
|
|
502
537
|
|
|
503
538
|
if first_token_time is None:
|
|
504
539
|
first_token_time = time.time() - start_time
|
|
@@ -523,7 +558,7 @@ class InferenceEngine:
|
|
|
523
558
|
)
|
|
524
559
|
last_metrics_update = current_time
|
|
525
560
|
|
|
526
|
-
yield
|
|
561
|
+
yield delta
|
|
527
562
|
|
|
528
563
|
if any(stop in token for stop in request.stop_sequences):
|
|
529
564
|
break
|
|
@@ -603,6 +638,7 @@ class InferenceEngine:
|
|
|
603
638
|
)
|
|
604
639
|
|
|
605
640
|
if request.stream:
|
|
641
|
+
normalizer = StreamDeltaNormalizer()
|
|
606
642
|
# Stream tokens
|
|
607
643
|
for chunk in response:
|
|
608
644
|
if self._cancel_event.is_set():
|
|
@@ -610,11 +646,12 @@ class InferenceEngine:
|
|
|
610
646
|
|
|
611
647
|
if 'choices' in chunk and len(chunk['choices']) > 0:
|
|
612
648
|
token = chunk['choices'][0].get('text', '')
|
|
613
|
-
|
|
649
|
+
delta = normalizer.normalize(token)
|
|
650
|
+
if delta:
|
|
614
651
|
if first_token_time is None:
|
|
615
652
|
first_token_time = time.time()
|
|
616
653
|
tokens_generated += 1
|
|
617
|
-
yield
|
|
654
|
+
yield delta
|
|
618
655
|
else:
|
|
619
656
|
# Return full response
|
|
620
657
|
if 'choices' in response and len(response['choices']) > 0:
|
|
@@ -4,6 +4,7 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
import hashlib
|
|
6
6
|
import os
|
|
7
|
+
import re
|
|
7
8
|
import subprocess
|
|
8
9
|
from pathlib import Path
|
|
9
10
|
from typing import Dict, List, Optional, Tuple
|
|
@@ -20,13 +21,45 @@ class RepoFS:
|
|
|
20
21
|
def resolve_path(self, path: str) -> Path:
|
|
21
22
|
if not path or not isinstance(path, str):
|
|
22
23
|
raise ValidationError("path must be a non-empty string")
|
|
23
|
-
|
|
24
|
-
|
|
24
|
+
if "\x00" in path:
|
|
25
|
+
raise ValidationError("path contains null byte")
|
|
26
|
+
if path.startswith("~"):
|
|
27
|
+
raise ValidationError("path must be repo-relative (no ~)")
|
|
28
|
+
raw = Path(path)
|
|
29
|
+
if raw.is_absolute():
|
|
30
|
+
raise ValidationError("path must be repo-relative (no absolute paths)")
|
|
31
|
+
resolved = (self.root / raw).resolve()
|
|
25
32
|
if not resolved.is_relative_to(self.root):
|
|
26
33
|
raise ValidationError(f"path escapes repo root ({self.root}); use a relative path like '.'")
|
|
27
34
|
return resolved
|
|
28
35
|
|
|
36
|
+
def _validate_bool(self, name: str, value: object) -> None:
|
|
37
|
+
if not isinstance(value, bool):
|
|
38
|
+
raise ValidationError(f"{name} must be a bool")
|
|
39
|
+
|
|
40
|
+
def _validate_int(self, name: str, value: object, minimum: int = 0) -> int:
|
|
41
|
+
if isinstance(value, bool) or not isinstance(value, int):
|
|
42
|
+
raise ValidationError(f"{name} must be an int")
|
|
43
|
+
if value < minimum:
|
|
44
|
+
raise ValidationError(f"{name} must be >= {minimum}")
|
|
45
|
+
return value
|
|
46
|
+
|
|
47
|
+
def _validate_content(self, content: object) -> None:
|
|
48
|
+
if not isinstance(content, str):
|
|
49
|
+
raise ValidationError("content must be a string")
|
|
50
|
+
|
|
51
|
+
def _validate_sha256(self, value: object) -> str:
|
|
52
|
+
if not isinstance(value, str):
|
|
53
|
+
raise ValidationError("expected_sha256 must be a string")
|
|
54
|
+
normalized = value.lower()
|
|
55
|
+
if not re.fullmatch(r"[0-9a-f]{64}", normalized):
|
|
56
|
+
raise ValidationError("expected_sha256 must be a 64-character hex string")
|
|
57
|
+
return normalized
|
|
58
|
+
|
|
29
59
|
def list_dir(self, path: str = ".", recursive: bool = False, max_depth: int = 2, max_entries: int = 200) -> Dict[str, List[str]]:
|
|
60
|
+
self._validate_bool("recursive", recursive)
|
|
61
|
+
max_depth = self._validate_int("max_depth", max_depth, minimum=0)
|
|
62
|
+
max_entries = self._validate_int("max_entries", max_entries, minimum=1)
|
|
30
63
|
target = self.resolve_path(path)
|
|
31
64
|
if not target.is_dir():
|
|
32
65
|
raise ValidationError("path is not a directory")
|
|
@@ -59,25 +92,28 @@ class RepoFS:
|
|
|
59
92
|
return {"entries": entries}
|
|
60
93
|
|
|
61
94
|
def read_text(self, path: str, start_line: int = 1, end_line: Optional[int] = None, max_bytes: int = 2_000_000) -> Dict[str, object]:
|
|
95
|
+
start_line = self._validate_int("start_line", start_line, minimum=1)
|
|
96
|
+
if end_line is not None:
|
|
97
|
+
end_line = self._validate_int("end_line", end_line, minimum=start_line)
|
|
98
|
+
max_bytes = self._validate_int("max_bytes", max_bytes, minimum=1)
|
|
62
99
|
target = self.resolve_path(path)
|
|
63
100
|
if not target.is_file():
|
|
64
101
|
raise ValidationError("path is not a file")
|
|
65
102
|
size = target.stat().st_size
|
|
66
103
|
if size > max_bytes and start_line == 1 and end_line is None:
|
|
67
104
|
raise ToolError("file too large; specify a line range")
|
|
68
|
-
if start_line < 1:
|
|
69
|
-
raise ValidationError("start_line must be >= 1")
|
|
70
|
-
if end_line is not None and end_line < start_line:
|
|
71
|
-
raise ValidationError("end_line must be >= start_line")
|
|
72
105
|
|
|
73
106
|
lines: List[str] = []
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
107
|
+
try:
|
|
108
|
+
with target.open("r", encoding="utf-8") as handle:
|
|
109
|
+
for idx, line in enumerate(handle, start=1):
|
|
110
|
+
if idx < start_line:
|
|
111
|
+
continue
|
|
112
|
+
if end_line is not None and idx > end_line:
|
|
113
|
+
break
|
|
114
|
+
lines.append(line.rstrip("\n"))
|
|
115
|
+
except UnicodeDecodeError as e:
|
|
116
|
+
raise ToolError(f"file is not valid utf-8: {e}") from e
|
|
81
117
|
content = "\n".join(lines)
|
|
82
118
|
return {"path": str(target.relative_to(self.root)), "content": content, "start_line": start_line, "end_line": end_line}
|
|
83
119
|
|
|
@@ -91,6 +127,9 @@ class RepoFS:
|
|
|
91
127
|
raise ToolError(f"file is not valid utf-8: {e}") from e
|
|
92
128
|
|
|
93
129
|
def write_text(self, path: str, content: str, expected_sha256: Optional[str] = None) -> Dict[str, object]:
|
|
130
|
+
self._validate_content(content)
|
|
131
|
+
if expected_sha256 is not None:
|
|
132
|
+
expected_sha256 = self._validate_sha256(expected_sha256)
|
|
94
133
|
target = self.resolve_path(path)
|
|
95
134
|
if not target.exists() or not target.is_file():
|
|
96
135
|
raise ValidationError("path does not exist or is not a file")
|
|
@@ -102,7 +141,11 @@ class RepoFS:
|
|
|
102
141
|
return {"path": str(target.relative_to(self.root)), "sha256": self.sha256_text(content)}
|
|
103
142
|
|
|
104
143
|
def create_text(self, path: str, content: str, overwrite: bool = False) -> Dict[str, object]:
|
|
144
|
+
self._validate_content(content)
|
|
145
|
+
self._validate_bool("overwrite", overwrite)
|
|
105
146
|
target = self.resolve_path(path)
|
|
147
|
+
if target.exists() and target.is_dir():
|
|
148
|
+
raise ValidationError("path already exists and is a directory")
|
|
106
149
|
if target.exists() and not overwrite:
|
|
107
150
|
raise ValidationError("path already exists")
|
|
108
151
|
target.parent.mkdir(parents=True, exist_ok=True)
|
|
@@ -118,6 +161,10 @@ class RepoFS:
|
|
|
118
161
|
target.unlink()
|
|
119
162
|
return {"path": str(target.relative_to(self.root)), "deleted": True}
|
|
120
163
|
|
|
164
|
+
def is_git_tracked(self, target: Path) -> bool:
|
|
165
|
+
"""Return True if the path is tracked by git."""
|
|
166
|
+
return self._is_git_tracked(target)
|
|
167
|
+
|
|
121
168
|
def sha256_text(self, content: str) -> str:
|
|
122
169
|
return hashlib.sha256(content.encode("utf-8")).hexdigest()
|
|
123
170
|
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""Search utilities for repo tools."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import re
|
|
7
|
+
import shutil
|
|
8
|
+
import subprocess
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Dict, List, Optional
|
|
11
|
+
|
|
12
|
+
from cortex.tools.errors import ToolError, ValidationError
|
|
13
|
+
from cortex.tools.fs_ops import RepoFS
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class RepoSearch:
|
|
17
|
+
"""Search helper constrained to a repo root."""
|
|
18
|
+
|
|
19
|
+
_DEFAULT_SKIP_DIRS = {
|
|
20
|
+
".git",
|
|
21
|
+
".cortex",
|
|
22
|
+
".eggs",
|
|
23
|
+
".mypy_cache",
|
|
24
|
+
".pytest_cache",
|
|
25
|
+
".ruff_cache",
|
|
26
|
+
".tox",
|
|
27
|
+
".venv",
|
|
28
|
+
"__pycache__",
|
|
29
|
+
"build",
|
|
30
|
+
"dist",
|
|
31
|
+
"node_modules",
|
|
32
|
+
"venv",
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
def __init__(self, repo_fs: RepoFS) -> None:
|
|
36
|
+
self.repo_fs = repo_fs
|
|
37
|
+
|
|
38
|
+
def search(self, query: str, path: str = ".", use_regex: bool = True, max_results: int = 100) -> Dict[str, List[Dict[str, object]]]:
|
|
39
|
+
if not isinstance(query, str) or not query:
|
|
40
|
+
raise ValidationError("query must be a non-empty string")
|
|
41
|
+
if isinstance(max_results, bool) or not isinstance(max_results, int):
|
|
42
|
+
raise ValidationError("max_results must be an int")
|
|
43
|
+
if max_results < 1:
|
|
44
|
+
raise ValidationError("max_results must be >= 1")
|
|
45
|
+
if not isinstance(use_regex, bool):
|
|
46
|
+
raise ValidationError("use_regex must be a bool")
|
|
47
|
+
target = self.repo_fs.resolve_path(path)
|
|
48
|
+
if not target.exists():
|
|
49
|
+
raise ValidationError("path does not exist")
|
|
50
|
+
|
|
51
|
+
if shutil.which("rg"):
|
|
52
|
+
return {"results": self._rg_search(query, target, use_regex, max_results)}
|
|
53
|
+
return {"results": self._python_search(query, target, use_regex, max_results)}
|
|
54
|
+
|
|
55
|
+
def _rg_search(self, query: str, target: Path, use_regex: bool, max_results: int) -> List[Dict[str, object]]:
|
|
56
|
+
args = ["rg", "--line-number", "--with-filename", "--no-heading"]
|
|
57
|
+
if not use_regex:
|
|
58
|
+
args.append("-F")
|
|
59
|
+
args.extend(["-e", query, str(target)])
|
|
60
|
+
result = subprocess.run(args, cwd=self.repo_fs.root, capture_output=True, text=True)
|
|
61
|
+
if result.returncode not in (0, 1):
|
|
62
|
+
raise ToolError(f"rg failed: {result.stderr.strip()}")
|
|
63
|
+
matches: List[Dict[str, object]] = []
|
|
64
|
+
for line in result.stdout.splitlines():
|
|
65
|
+
try:
|
|
66
|
+
file_path, line_no, text = line.split(":", 2)
|
|
67
|
+
except ValueError:
|
|
68
|
+
continue
|
|
69
|
+
matches.append({"path": file_path, "line": int(line_no), "text": text})
|
|
70
|
+
if len(matches) >= max_results:
|
|
71
|
+
break
|
|
72
|
+
return matches
|
|
73
|
+
|
|
74
|
+
def _python_search(self, query: str, target: Path, use_regex: bool, max_results: int) -> List[Dict[str, object]]:
|
|
75
|
+
pattern: Optional[re.Pattern[str]] = None
|
|
76
|
+
if use_regex:
|
|
77
|
+
try:
|
|
78
|
+
pattern = re.compile(query)
|
|
79
|
+
except re.error as e:
|
|
80
|
+
raise ValidationError(f"invalid regex: {e}") from e
|
|
81
|
+
results: List[Dict[str, object]] = []
|
|
82
|
+
|
|
83
|
+
if target.is_file():
|
|
84
|
+
if self._looks_binary(target):
|
|
85
|
+
return results
|
|
86
|
+
self._scan_file(target, pattern, query, results, max_results)
|
|
87
|
+
return results
|
|
88
|
+
|
|
89
|
+
skip_dirs = set(self._DEFAULT_SKIP_DIRS)
|
|
90
|
+
if target.name in skip_dirs:
|
|
91
|
+
skip_dirs.remove(target.name)
|
|
92
|
+
|
|
93
|
+
for dirpath, dirnames, filenames in os.walk(target):
|
|
94
|
+
dirnames[:] = [d for d in dirnames if d not in skip_dirs]
|
|
95
|
+
for name in filenames:
|
|
96
|
+
path = Path(dirpath) / name
|
|
97
|
+
if self._looks_binary(path):
|
|
98
|
+
continue
|
|
99
|
+
if self._scan_file(path, pattern, query, results, max_results):
|
|
100
|
+
return results
|
|
101
|
+
return results
|
|
102
|
+
|
|
103
|
+
def _scan_file(
|
|
104
|
+
self,
|
|
105
|
+
path: Path,
|
|
106
|
+
pattern: Optional[re.Pattern[str]],
|
|
107
|
+
query: str,
|
|
108
|
+
results: List[Dict[str, object]],
|
|
109
|
+
max_results: int,
|
|
110
|
+
) -> bool:
|
|
111
|
+
try:
|
|
112
|
+
with path.open("r", encoding="utf-8", errors="ignore") as handle:
|
|
113
|
+
for idx, line in enumerate(handle, start=1):
|
|
114
|
+
found = bool(pattern.search(line)) if pattern else (query in line)
|
|
115
|
+
if found:
|
|
116
|
+
results.append(
|
|
117
|
+
{
|
|
118
|
+
"path": str(path.relative_to(self.repo_fs.root)),
|
|
119
|
+
"line": idx,
|
|
120
|
+
"text": line.rstrip("\n"),
|
|
121
|
+
}
|
|
122
|
+
)
|
|
123
|
+
if len(results) >= max_results:
|
|
124
|
+
return True
|
|
125
|
+
except OSError:
|
|
126
|
+
return False
|
|
127
|
+
return False
|
|
128
|
+
|
|
129
|
+
def _looks_binary(self, path: Path, sniff_bytes: int = 4096) -> bool:
|
|
130
|
+
try:
|
|
131
|
+
with path.open("rb") as handle:
|
|
132
|
+
chunk = handle.read(sniff_bytes)
|
|
133
|
+
except OSError:
|
|
134
|
+
return True
|
|
135
|
+
return b"\x00" in chunk
|