cortex-llm 1.0.10__py3-none-any.whl → 1.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cortex/__init__.py CHANGED
@@ -5,7 +5,7 @@ A high-performance terminal interface for running Hugging Face LLMs locally
5
5
  with exclusive GPU acceleration via Metal Performance Shaders (MPS) and MLX.
6
6
  """
7
7
 
8
- __version__ = "1.0.10"
8
+ __version__ = "1.0.11"
9
9
  __author__ = "Cortex Development Team"
10
10
  __license__ = "MIT"
11
11
 
cortex/config.py CHANGED
@@ -146,18 +146,21 @@ class DeveloperConfig(BaseModel):
146
146
 
147
147
  class PathsConfig(BaseModel):
148
148
  """Path configuration."""
149
- claude_md_path: Path = Field(default_factory=lambda: Path("./CLAUDE.md"))
150
149
  templates_dir: Path = Field(default_factory=lambda: Path.home() / ".cortex" / "templates")
151
150
  plugins_dir: Path = Field(default_factory=lambda: Path.home() / ".cortex" / "plugins")
152
151
 
153
152
  class Config:
154
153
  """Main configuration class for Cortex."""
155
-
154
+
155
+ # State file for runtime state (not committed to git)
156
+ STATE_FILE = Path.home() / ".cortex" / "state.yaml"
157
+
156
158
  def __init__(self, config_path: Optional[Path] = None):
157
159
  """Initialize configuration."""
158
160
  self.config_path = config_path or Path("config.yaml")
159
161
  self._raw_config: Dict[str, Any] = {}
160
-
162
+ self._state: Dict[str, Any] = {}
163
+
161
164
  self.gpu: GPUConfig
162
165
  self.memory: MemoryConfig
163
166
  self.performance: PerformanceConfig
@@ -169,8 +172,9 @@ class Config:
169
172
  self.system: SystemConfig
170
173
  self.developer: DeveloperConfig
171
174
  self.paths: PathsConfig
172
-
175
+
173
176
  self.load()
177
+ self._load_state()
174
178
 
175
179
  def load(self) -> None:
176
180
  """Load configuration from YAML file."""
@@ -273,7 +277,7 @@ class Config:
273
277
 
274
278
  self.paths = PathsConfig(**self._get_section({
275
279
  k: v for k, v in self._raw_config.items()
276
- if k in ["claude_md_path", "templates_dir", "plugins_dir"]
280
+ if k in ["templates_dir", "plugins_dir"]
277
281
  }))
278
282
 
279
283
  except Exception as e:
@@ -303,26 +307,58 @@ class Config:
303
307
  def save(self, path: Optional[Path] = None) -> None:
304
308
  """Save configuration to YAML file."""
305
309
  save_path = path or self.config_path
306
-
310
+
311
+ # Keys that belong in state file, not config file
312
+ state_keys = {"last_used_model"}
313
+
307
314
  # Convert Path objects to strings for YAML serialization
308
315
  config_dict = {}
309
316
  for section in [self.gpu, self.memory, self.performance, self.inference,
310
317
  self.model, self.ui, self.logging, self.conversation,
311
318
  self.system, self.developer, self.paths]:
312
319
  section_dict = section.model_dump()
313
- # Convert Path objects to strings
320
+ # Convert Path objects to strings and exclude state keys
314
321
  for key, value in section_dict.items():
322
+ if key in state_keys:
323
+ continue # Skip state keys - they go in state file
315
324
  if isinstance(value, Path):
316
325
  section_dict[key] = str(value)
326
+ # Remove state keys from section_dict
327
+ for key in state_keys:
328
+ section_dict.pop(key, None)
317
329
  config_dict.update(section_dict)
318
-
330
+
319
331
  with open(save_path, 'w') as f:
320
332
  yaml.dump(config_dict, f, default_flow_style=False, sort_keys=False)
321
333
 
334
+ def _load_state(self) -> None:
335
+ """Load runtime state from state file."""
336
+ if self.STATE_FILE.exists():
337
+ try:
338
+ with open(self.STATE_FILE, 'r') as f:
339
+ self._state = yaml.safe_load(f) or {}
340
+ # Apply state to model config
341
+ if "last_used_model" in self._state:
342
+ self.model.last_used_model = self._state["last_used_model"]
343
+ except Exception as e:
344
+ print(f"Warning: Failed to load state from {self.STATE_FILE}: {e}")
345
+ self._state = {}
346
+
347
+ def _save_state(self) -> None:
348
+ """Save runtime state to state file."""
349
+ # Ensure directory exists
350
+ self.STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
351
+ try:
352
+ with open(self.STATE_FILE, 'w') as f:
353
+ yaml.dump(self._state, f, default_flow_style=False)
354
+ except Exception as e:
355
+ print(f"Warning: Failed to save state to {self.STATE_FILE}: {e}")
356
+
322
357
  def update_last_used_model(self, model_name: str) -> None:
323
- """Update the last used model and save to config file."""
358
+ """Update the last used model and save to state file."""
324
359
  self.model.last_used_model = model_name
325
- self.save()
360
+ self._state["last_used_model"] = model_name
361
+ self._save_state()
326
362
 
327
363
  def __repr__(self) -> str:
328
364
  """String representation."""
@@ -82,6 +82,62 @@ class GenerationRequest:
82
82
  if self.stop_sequences is None:
83
83
  self.stop_sequences = []
84
84
 
85
+
86
+ class StreamDeltaNormalizer:
87
+ """Normalize streaming chunks to deltas, handling cumulative or overlapping output."""
88
+
89
+ def __init__(self, max_overlap: int = 4096, min_cumulative_length: int = 32) -> None:
90
+ self._total_text = ""
91
+ self._max_overlap = max_overlap
92
+ self._min_cumulative_length = min_cumulative_length
93
+ self._cumulative_mode = False
94
+
95
+ def normalize(self, chunk: Any) -> str:
96
+ if chunk is None:
97
+ return ""
98
+ if not isinstance(chunk, str):
99
+ chunk = str(chunk)
100
+ if not chunk:
101
+ return ""
102
+
103
+ if not self._total_text:
104
+ self._total_text = chunk
105
+ return chunk
106
+
107
+ if not self._cumulative_mode:
108
+ if len(chunk) > len(self._total_text) and chunk.startswith(self._total_text):
109
+ self._cumulative_mode = True
110
+ delta = chunk[len(self._total_text):]
111
+ self._total_text = chunk
112
+ return delta
113
+ if chunk == self._total_text and len(chunk) >= self._min_cumulative_length:
114
+ # Likely a cumulative stream repeating the full text; don't re-emit.
115
+ self._cumulative_mode = True
116
+ return ""
117
+ # Default to delta mode to avoid dropping legitimate repeats.
118
+ self._total_text += chunk
119
+ return chunk
120
+
121
+ # Cumulative mode: emit only new suffix.
122
+ if chunk.startswith(self._total_text):
123
+ delta = chunk[len(self._total_text):]
124
+ self._total_text = chunk
125
+ return delta
126
+
127
+ # Handle partial overlap in cumulative streams.
128
+ max_overlap = min(len(self._total_text), len(chunk), self._max_overlap)
129
+ if max_overlap > 0:
130
+ tail = self._total_text[-max_overlap:]
131
+ for i in range(max_overlap, 0, -1):
132
+ if tail[-i:] == chunk[:i]:
133
+ delta = chunk[i:]
134
+ self._total_text += delta
135
+ return delta
136
+
137
+ # Fallback: treat as fresh delta to avoid loss.
138
+ self._total_text += chunk
139
+ return chunk
140
+
85
141
  class InferenceEngine:
86
142
  """GPU-accelerated inference engine."""
87
143
 
@@ -243,33 +299,7 @@ class InferenceEngine:
243
299
  tokens_generated = 0
244
300
  first_token_time = None
245
301
  last_metrics_update = time.time()
246
- stream_total_text = ""
247
- stream_cumulative = False
248
-
249
- def normalize_stream_chunk(chunk: Any) -> str:
250
- """Normalize streaming output to delta chunks when backend yields cumulative text."""
251
- nonlocal stream_total_text, stream_cumulative
252
- if chunk is None:
253
- return ""
254
- if not isinstance(chunk, str):
255
- chunk = str(chunk)
256
-
257
- if stream_cumulative:
258
- if chunk.startswith(stream_total_text):
259
- delta = chunk[len(stream_total_text):]
260
- stream_total_text = chunk
261
- return delta
262
- stream_total_text += chunk
263
- return chunk
264
-
265
- if stream_total_text and len(chunk) > len(stream_total_text) and chunk.startswith(stream_total_text):
266
- stream_cumulative = True
267
- delta = chunk[len(stream_total_text):]
268
- stream_total_text = chunk
269
- return delta
270
-
271
- stream_total_text += chunk
272
- return chunk
302
+ normalizer = StreamDeltaNormalizer() if request.stream else None
273
303
 
274
304
  try:
275
305
  # Use MLX accelerator's optimized generation if available
@@ -290,7 +320,7 @@ class InferenceEngine:
290
320
  self.status = InferenceStatus.CANCELLED
291
321
  break
292
322
 
293
- delta = normalize_stream_chunk(token) if request.stream else str(token)
323
+ delta = normalizer.normalize(token) if normalizer else str(token)
294
324
  if not delta:
295
325
  continue
296
326
 
@@ -365,7 +395,7 @@ class InferenceEngine:
365
395
  else:
366
396
  token = str(response)
367
397
 
368
- delta = normalize_stream_chunk(token) if request.stream else token
398
+ delta = normalizer.normalize(token) if normalizer else token
369
399
  if request.stream and not delta:
370
400
  continue
371
401
 
@@ -477,6 +507,7 @@ class InferenceEngine:
477
507
  if request.stream:
478
508
  from transformers import TextIteratorStreamer
479
509
 
510
+ normalizer = StreamDeltaNormalizer()
480
511
  streamer = TextIteratorStreamer(
481
512
  tokenizer,
482
513
  skip_prompt=True,
@@ -499,6 +530,10 @@ class InferenceEngine:
499
530
  if self._cancel_event.is_set():
500
531
  self.status = InferenceStatus.CANCELLED
501
532
  break
533
+
534
+ delta = normalizer.normalize(token)
535
+ if not delta:
536
+ continue
502
537
 
503
538
  if first_token_time is None:
504
539
  first_token_time = time.time() - start_time
@@ -523,7 +558,7 @@ class InferenceEngine:
523
558
  )
524
559
  last_metrics_update = current_time
525
560
 
526
- yield token
561
+ yield delta
527
562
 
528
563
  if any(stop in token for stop in request.stop_sequences):
529
564
  break
@@ -603,6 +638,7 @@ class InferenceEngine:
603
638
  )
604
639
 
605
640
  if request.stream:
641
+ normalizer = StreamDeltaNormalizer()
606
642
  # Stream tokens
607
643
  for chunk in response:
608
644
  if self._cancel_event.is_set():
@@ -610,11 +646,12 @@ class InferenceEngine:
610
646
 
611
647
  if 'choices' in chunk and len(chunk['choices']) > 0:
612
648
  token = chunk['choices'][0].get('text', '')
613
- if token:
649
+ delta = normalizer.normalize(token)
650
+ if delta:
614
651
  if first_token_time is None:
615
652
  first_token_time = time.time()
616
653
  tokens_generated += 1
617
- yield token
654
+ yield delta
618
655
  else:
619
656
  # Return full response
620
657
  if 'choices' in response and len(response['choices']) > 0:
cortex/tools/fs_ops.py CHANGED
@@ -4,6 +4,7 @@ from __future__ import annotations
4
4
 
5
5
  import hashlib
6
6
  import os
7
+ import re
7
8
  import subprocess
8
9
  from pathlib import Path
9
10
  from typing import Dict, List, Optional, Tuple
@@ -20,13 +21,45 @@ class RepoFS:
20
21
  def resolve_path(self, path: str) -> Path:
21
22
  if not path or not isinstance(path, str):
22
23
  raise ValidationError("path must be a non-empty string")
23
- raw = Path(path).expanduser()
24
- resolved = raw.resolve() if raw.is_absolute() else (self.root / raw).resolve()
24
+ if "\x00" in path:
25
+ raise ValidationError("path contains null byte")
26
+ if path.startswith("~"):
27
+ raise ValidationError("path must be repo-relative (no ~)")
28
+ raw = Path(path)
29
+ if raw.is_absolute():
30
+ raise ValidationError("path must be repo-relative (no absolute paths)")
31
+ resolved = (self.root / raw).resolve()
25
32
  if not resolved.is_relative_to(self.root):
26
33
  raise ValidationError(f"path escapes repo root ({self.root}); use a relative path like '.'")
27
34
  return resolved
28
35
 
36
+ def _validate_bool(self, name: str, value: object) -> None:
37
+ if not isinstance(value, bool):
38
+ raise ValidationError(f"{name} must be a bool")
39
+
40
+ def _validate_int(self, name: str, value: object, minimum: int = 0) -> int:
41
+ if isinstance(value, bool) or not isinstance(value, int):
42
+ raise ValidationError(f"{name} must be an int")
43
+ if value < minimum:
44
+ raise ValidationError(f"{name} must be >= {minimum}")
45
+ return value
46
+
47
+ def _validate_content(self, content: object) -> None:
48
+ if not isinstance(content, str):
49
+ raise ValidationError("content must be a string")
50
+
51
+ def _validate_sha256(self, value: object) -> str:
52
+ if not isinstance(value, str):
53
+ raise ValidationError("expected_sha256 must be a string")
54
+ normalized = value.lower()
55
+ if not re.fullmatch(r"[0-9a-f]{64}", normalized):
56
+ raise ValidationError("expected_sha256 must be a 64-character hex string")
57
+ return normalized
58
+
29
59
  def list_dir(self, path: str = ".", recursive: bool = False, max_depth: int = 2, max_entries: int = 200) -> Dict[str, List[str]]:
60
+ self._validate_bool("recursive", recursive)
61
+ max_depth = self._validate_int("max_depth", max_depth, minimum=0)
62
+ max_entries = self._validate_int("max_entries", max_entries, minimum=1)
30
63
  target = self.resolve_path(path)
31
64
  if not target.is_dir():
32
65
  raise ValidationError("path is not a directory")
@@ -59,25 +92,28 @@ class RepoFS:
59
92
  return {"entries": entries}
60
93
 
61
94
  def read_text(self, path: str, start_line: int = 1, end_line: Optional[int] = None, max_bytes: int = 2_000_000) -> Dict[str, object]:
95
+ start_line = self._validate_int("start_line", start_line, minimum=1)
96
+ if end_line is not None:
97
+ end_line = self._validate_int("end_line", end_line, minimum=start_line)
98
+ max_bytes = self._validate_int("max_bytes", max_bytes, minimum=1)
62
99
  target = self.resolve_path(path)
63
100
  if not target.is_file():
64
101
  raise ValidationError("path is not a file")
65
102
  size = target.stat().st_size
66
103
  if size > max_bytes and start_line == 1 and end_line is None:
67
104
  raise ToolError("file too large; specify a line range")
68
- if start_line < 1:
69
- raise ValidationError("start_line must be >= 1")
70
- if end_line is not None and end_line < start_line:
71
- raise ValidationError("end_line must be >= start_line")
72
105
 
73
106
  lines: List[str] = []
74
- with target.open("r", encoding="utf-8") as handle:
75
- for idx, line in enumerate(handle, start=1):
76
- if idx < start_line:
77
- continue
78
- if end_line is not None and idx > end_line:
79
- break
80
- lines.append(line.rstrip("\n"))
107
+ try:
108
+ with target.open("r", encoding="utf-8") as handle:
109
+ for idx, line in enumerate(handle, start=1):
110
+ if idx < start_line:
111
+ continue
112
+ if end_line is not None and idx > end_line:
113
+ break
114
+ lines.append(line.rstrip("\n"))
115
+ except UnicodeDecodeError as e:
116
+ raise ToolError(f"file is not valid utf-8: {e}") from e
81
117
  content = "\n".join(lines)
82
118
  return {"path": str(target.relative_to(self.root)), "content": content, "start_line": start_line, "end_line": end_line}
83
119
 
@@ -91,6 +127,9 @@ class RepoFS:
91
127
  raise ToolError(f"file is not valid utf-8: {e}") from e
92
128
 
93
129
  def write_text(self, path: str, content: str, expected_sha256: Optional[str] = None) -> Dict[str, object]:
130
+ self._validate_content(content)
131
+ if expected_sha256 is not None:
132
+ expected_sha256 = self._validate_sha256(expected_sha256)
94
133
  target = self.resolve_path(path)
95
134
  if not target.exists() or not target.is_file():
96
135
  raise ValidationError("path does not exist or is not a file")
@@ -102,7 +141,11 @@ class RepoFS:
102
141
  return {"path": str(target.relative_to(self.root)), "sha256": self.sha256_text(content)}
103
142
 
104
143
  def create_text(self, path: str, content: str, overwrite: bool = False) -> Dict[str, object]:
144
+ self._validate_content(content)
145
+ self._validate_bool("overwrite", overwrite)
105
146
  target = self.resolve_path(path)
147
+ if target.exists() and target.is_dir():
148
+ raise ValidationError("path already exists and is a directory")
106
149
  if target.exists() and not overwrite:
107
150
  raise ValidationError("path already exists")
108
151
  target.parent.mkdir(parents=True, exist_ok=True)
@@ -118,6 +161,10 @@ class RepoFS:
118
161
  target.unlink()
119
162
  return {"path": str(target.relative_to(self.root)), "deleted": True}
120
163
 
164
+ def is_git_tracked(self, target: Path) -> bool:
165
+ """Return True if the path is tracked by git."""
166
+ return self._is_git_tracked(target)
167
+
121
168
  def sha256_text(self, content: str) -> str:
122
169
  return hashlib.sha256(content.encode("utf-8")).hexdigest()
123
170
 
cortex/tools/search.py CHANGED
@@ -7,7 +7,7 @@ import re
7
7
  import shutil
8
8
  import subprocess
9
9
  from pathlib import Path
10
- from typing import Dict, List
10
+ from typing import Dict, List, Optional
11
11
 
12
12
  from cortex.tools.errors import ToolError, ValidationError
13
13
  from cortex.tools.fs_ops import RepoFS
@@ -16,16 +16,37 @@ from cortex.tools.fs_ops import RepoFS
16
16
  class RepoSearch:
17
17
  """Search helper constrained to a repo root."""
18
18
 
19
+ _DEFAULT_SKIP_DIRS = {
20
+ ".git",
21
+ ".cortex",
22
+ ".eggs",
23
+ ".mypy_cache",
24
+ ".pytest_cache",
25
+ ".ruff_cache",
26
+ ".tox",
27
+ ".venv",
28
+ "__pycache__",
29
+ "build",
30
+ "dist",
31
+ "node_modules",
32
+ "venv",
33
+ }
34
+
19
35
  def __init__(self, repo_fs: RepoFS) -> None:
20
36
  self.repo_fs = repo_fs
21
37
 
22
38
  def search(self, query: str, path: str = ".", use_regex: bool = True, max_results: int = 100) -> Dict[str, List[Dict[str, object]]]:
23
39
  if not isinstance(query, str) or not query:
24
40
  raise ValidationError("query must be a non-empty string")
41
+ if isinstance(max_results, bool) or not isinstance(max_results, int):
42
+ raise ValidationError("max_results must be an int")
25
43
  if max_results < 1:
26
44
  raise ValidationError("max_results must be >= 1")
27
- root = self.repo_fs.root
45
+ if not isinstance(use_regex, bool):
46
+ raise ValidationError("use_regex must be a bool")
28
47
  target = self.repo_fs.resolve_path(path)
48
+ if not target.exists():
49
+ raise ValidationError("path does not exist")
29
50
 
30
51
  if shutil.which("rg"):
31
52
  return {"results": self._rg_search(query, target, use_regex, max_results)}
@@ -51,20 +72,64 @@ class RepoSearch:
51
72
  return matches
52
73
 
53
74
  def _python_search(self, query: str, target: Path, use_regex: bool, max_results: int) -> List[Dict[str, object]]:
54
- pattern = re.compile(query) if use_regex else None
75
+ pattern: Optional[re.Pattern[str]] = None
76
+ if use_regex:
77
+ try:
78
+ pattern = re.compile(query)
79
+ except re.error as e:
80
+ raise ValidationError(f"invalid regex: {e}") from e
55
81
  results: List[Dict[str, object]] = []
82
+
83
+ if target.is_file():
84
+ if self._looks_binary(target):
85
+ return results
86
+ self._scan_file(target, pattern, query, results, max_results)
87
+ return results
88
+
89
+ skip_dirs = set(self._DEFAULT_SKIP_DIRS)
90
+ if target.name in skip_dirs:
91
+ skip_dirs.remove(target.name)
92
+
56
93
  for dirpath, dirnames, filenames in os.walk(target):
57
- dirnames[:] = [d for d in dirnames if d != ".git"]
94
+ dirnames[:] = [d for d in dirnames if d not in skip_dirs]
58
95
  for name in filenames:
59
96
  path = Path(dirpath) / name
60
- try:
61
- text = path.read_text(encoding="utf-8")
62
- except Exception:
97
+ if self._looks_binary(path):
63
98
  continue
64
- for idx, line in enumerate(text.splitlines(), start=1):
99
+ if self._scan_file(path, pattern, query, results, max_results):
100
+ return results
101
+ return results
102
+
103
+ def _scan_file(
104
+ self,
105
+ path: Path,
106
+ pattern: Optional[re.Pattern[str]],
107
+ query: str,
108
+ results: List[Dict[str, object]],
109
+ max_results: int,
110
+ ) -> bool:
111
+ try:
112
+ with path.open("r", encoding="utf-8", errors="ignore") as handle:
113
+ for idx, line in enumerate(handle, start=1):
65
114
  found = bool(pattern.search(line)) if pattern else (query in line)
66
115
  if found:
67
- results.append({"path": str(path.relative_to(self.repo_fs.root)), "line": idx, "text": line})
116
+ results.append(
117
+ {
118
+ "path": str(path.relative_to(self.repo_fs.root)),
119
+ "line": idx,
120
+ "text": line.rstrip("\n"),
121
+ }
122
+ )
68
123
  if len(results) >= max_results:
69
- return results
70
- return results
124
+ return True
125
+ except OSError:
126
+ return False
127
+ return False
128
+
129
+ def _looks_binary(self, path: Path, sniff_bytes: int = 4096) -> bool:
130
+ try:
131
+ with path.open("rb") as handle:
132
+ chunk = handle.read(sniff_bytes)
133
+ except OSError:
134
+ return True
135
+ return b"\x00" in chunk
@@ -4,6 +4,8 @@ from __future__ import annotations
4
4
 
5
5
  import difflib
6
6
  import json
7
+ import os
8
+ import re
7
9
  from pathlib import Path
8
10
  from typing import Any, Callable, Dict, List, Optional
9
11
 
@@ -28,7 +30,7 @@ class ToolRunner:
28
30
 
29
31
  def tool_spec(self) -> Dict[str, Any]:
30
32
  return {
31
- "list_dir": {"args": {"path": "string", "recursive": "bool", "max_depth": "int"}},
33
+ "list_dir": {"args": {"path": "string", "recursive": "bool", "max_depth": "int", "max_entries": "int"}},
32
34
  "read_file": {"args": {"path": "string", "start_line": "int", "end_line": "int", "max_bytes": "int"}},
33
35
  "search": {"args": {"query": "string", "path": "string", "use_regex": "bool", "max_results": "int"}},
34
36
  "write_file": {"args": {"path": "string", "content": "string", "expected_sha256": "string"}},
@@ -43,11 +45,15 @@ class ToolRunner:
43
45
  spec = json.dumps(self.tool_spec(), ensure_ascii=True, indent=2)
44
46
  repo_root = str(self.fs.root)
45
47
  return (
46
- "[CORTEX_TOOL_INSTRUCTIONS v2]\n"
47
- "You have access to file tools. If a tool is required, respond ONLY with a <tool_calls> JSON block.\n"
48
+ "[CORTEX_TOOL_INSTRUCTIONS v3]\n"
49
+ "You have access to repo-scoped file tools.\n"
50
+ "Use tools ONLY when the user asks for repo file operations or when repo data is required to answer.\n"
51
+ "Never use tools for general conversation, creative writing, or questions unrelated to the repo.\n"
52
+ "If a tool is required, respond ONLY with a <tool_calls> JSON block.\n"
48
53
  "Do not include any other text when calling tools.\n"
49
54
  f"Repo root: {repo_root}\n"
50
55
  "All paths must be relative to the repo root (use '.' for root). Do not use absolute paths or ~.\n"
56
+ "For create/write/replace/insert/delete, paths must be file paths (not '.' or directories).\n"
51
57
  "If you are unsure about paths, call list_dir with path '.' first.\n"
52
58
  "Format:\n"
53
59
  "<tool_calls>{\"calls\":[{\"id\":\"call_1\",\"name\":\"tool_name\",\"arguments\":{...}}]}</tool_calls>\n"
@@ -88,23 +94,36 @@ class ToolRunner:
88
94
  return results
89
95
 
90
96
  def _write_file(self, path: str, content: str, expected_sha256: Optional[str] = None) -> Dict[str, Any]:
97
+ self._validate_str("content", content)
98
+ expected_sha256 = self._validate_sha256(expected_sha256)
91
99
  before = self.fs.read_full_text(path)
92
100
  self._confirm_change(path, before, content, "write")
93
101
  return self.fs.write_text(path, content, expected_sha256=expected_sha256)
94
102
 
95
103
  def _create_file(self, path: str, content: str, overwrite: bool = False) -> Dict[str, Any]:
96
- before = ""
104
+ target = self._validate_create_path(path)
105
+ self._validate_str("content", content)
106
+ self._validate_bool("overwrite", overwrite)
107
+ if target.exists() and not overwrite:
108
+ raise ValidationError("path already exists")
109
+ before = self.fs.read_full_text(path) if target.exists() else ""
97
110
  self._confirm_change(path, before, content, "create")
98
111
  return self.fs.create_text(path, content, overwrite=overwrite)
99
112
 
100
113
  def _delete_file(self, path: str) -> Dict[str, Any]:
114
+ target = self.fs.resolve_path(path)
115
+ if not target.exists() or not target.is_file():
116
+ raise ValidationError("path does not exist or is not a file")
117
+ if not self.fs.is_git_tracked(target):
118
+ raise ToolError("delete blocked: file is not tracked by git")
101
119
  before = self.fs.read_full_text(path)
102
120
  self._confirm_change(path, before, "", "delete")
103
121
  return self.fs.delete_file(path)
104
122
 
105
123
  def _replace_in_file(self, path: str, old: str, new: str, expected_replacements: int = 1) -> Dict[str, Any]:
106
- if not old:
107
- raise ValidationError("old must be a non-empty string")
124
+ self._validate_non_empty_str("old", old)
125
+ self._validate_str("new", new)
126
+ expected_replacements = self._validate_int("expected_replacements", expected_replacements, minimum=1)
108
127
  content = self.fs.read_full_text(path)
109
128
  count = content.count(old)
110
129
  if count != expected_replacements:
@@ -114,8 +133,9 @@ class ToolRunner:
114
133
  return self.fs.write_text(path, updated)
115
134
 
116
135
  def _insert_relative(self, path: str, anchor: str, content: str, expected_matches: int = 1, after: bool = True) -> Dict[str, Any]:
117
- if not anchor:
118
- raise ValidationError("anchor must be a non-empty string")
136
+ self._validate_non_empty_str("anchor", anchor)
137
+ self._validate_str("content", content)
138
+ expected_matches = self._validate_int("expected_matches", expected_matches, minimum=1)
119
139
  original = self.fs.read_full_text(path)
120
140
  count = original.count(anchor)
121
141
  if count != expected_matches:
@@ -142,3 +162,43 @@ class ToolRunner:
142
162
  prompt = f"Apply {action} to {path}?\n{diff}\n"
143
163
  if not self.confirm_callback(prompt):
144
164
  raise ToolError("change declined by user")
165
+
166
+ def _validate_str(self, name: str, value: object) -> None:
167
+ if not isinstance(value, str):
168
+ raise ValidationError(f"{name} must be a string")
169
+
170
+ def _validate_non_empty_str(self, name: str, value: object) -> None:
171
+ if not isinstance(value, str) or not value:
172
+ raise ValidationError(f"{name} must be a non-empty string")
173
+
174
+ def _validate_bool(self, name: str, value: object) -> None:
175
+ if not isinstance(value, bool):
176
+ raise ValidationError(f"{name} must be a bool")
177
+
178
+ def _validate_int(self, name: str, value: object, minimum: int = 0) -> int:
179
+ if isinstance(value, bool) or not isinstance(value, int):
180
+ raise ValidationError(f"{name} must be an int")
181
+ if value < minimum:
182
+ raise ValidationError(f"{name} must be >= {minimum}")
183
+ return value
184
+
185
+ def _validate_sha256(self, value: Optional[str]) -> Optional[str]:
186
+ if value is None:
187
+ return None
188
+ if not isinstance(value, str):
189
+ raise ValidationError("expected_sha256 must be a string")
190
+ normalized = value.lower()
191
+ if not re.fullmatch(r"[0-9a-f]{64}", normalized):
192
+ raise ValidationError("expected_sha256 must be a 64-character hex string")
193
+ return normalized
194
+
195
+ def _validate_create_path(self, path: str) -> Path:
196
+ self._validate_non_empty_str("path", path)
197
+ if path in {".", ""}:
198
+ raise ValidationError("path must be a file path, not a directory")
199
+ if path.endswith(("/", os.sep)):
200
+ raise ValidationError("path must be a file path, not a directory")
201
+ target = self.fs.resolve_path(path)
202
+ if target.exists() and target.is_dir():
203
+ raise ValidationError("path already exists and is a directory")
204
+ return target