delimit-cli 3.15.13 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/gateway/ai/license_core.py +2 -1
- package/gateway/ai/notify.py +8 -8
- package/gateway/ai/server.py +7 -15
- package/gateway/ai/swarm.py +2 -2
- package/gateway/core/contract_ledger.py +1 -1
- package/gateway/core/dependency_graph.py +1 -1
- package/gateway/core/dependency_manifest.py +1 -1
- package/gateway/core/event_backbone.py +2 -2
- package/gateway/core/event_schema.py +1 -1
- package/gateway/core/impact_analyzer.py +1 -1
- package/package.json +7 -1
- package/scripts/security-check.sh +50 -6
- package/gateway/ai/cross_model_audit.py +0 -600
- package/gateway/ai/github_scanner.py +0 -622
- package/gateway/ai/handoff_receipts.py +0 -409
- package/gateway/ai/reddit_scanner.py +0 -562
- package/gateway/ai/session_phoenix.py +0 -371
- package/gateway/ai/toolcard_cache.py +0 -327
|
@@ -1,371 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Session Phoenix — Cross-model session resurrection (LED-218).
|
|
3
|
-
|
|
4
|
-
When a session dies from rate limits, context overflow, or model switch,
|
|
5
|
-
the user runs `delimit revive` in any model to restore working state.
|
|
6
|
-
|
|
7
|
-
Architecture:
|
|
8
|
-
capture_soul() -> ~/.delimit/souls/{project_hash}/{timestamp}.json
|
|
9
|
-
revive() -> structured context blob any AI model can read
|
|
10
|
-
|
|
11
|
-
Complements delimit_session_handoff (ledger state) by saving the
|
|
12
|
-
working context: task, decisions, files, blockers, next steps.
|
|
13
|
-
"""
|
|
14
|
-
|
|
15
|
-
import hashlib
|
|
16
|
-
import json
|
|
17
|
-
import os
|
|
18
|
-
import subprocess
|
|
19
|
-
import time
|
|
20
|
-
import uuid
|
|
21
|
-
from dataclasses import asdict, dataclass, field
|
|
22
|
-
from datetime import datetime, timezone
|
|
23
|
-
from pathlib import Path
|
|
24
|
-
from typing import Any, Dict, List, Optional
|
|
25
|
-
|
|
26
|
-
MAX_SOULS_PER_PROJECT = 10
|
|
27
|
-
SOULS_BASE_DIR = Path.home() / ".delimit" / "souls"
|
|
28
|
-
_capture_counter = 0 # Monotonic counter for sub-second ordering
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
@dataclass
|
|
32
|
-
class SessionSoul:
|
|
33
|
-
"""Compressed session state that survives death."""
|
|
34
|
-
|
|
35
|
-
soul_id: str = ""
|
|
36
|
-
created_at: str = ""
|
|
37
|
-
source_model: str = "unknown"
|
|
38
|
-
project_path: str = ""
|
|
39
|
-
|
|
40
|
-
# What was being worked on
|
|
41
|
-
active_task: str = ""
|
|
42
|
-
task_status: str = "in_progress" # in_progress, blocked, almost_done
|
|
43
|
-
|
|
44
|
-
# Key decisions made this session
|
|
45
|
-
decisions: List[str] = field(default_factory=list)
|
|
46
|
-
|
|
47
|
-
# Files touched
|
|
48
|
-
files_modified: List[str] = field(default_factory=list)
|
|
49
|
-
files_created: List[str] = field(default_factory=list)
|
|
50
|
-
|
|
51
|
-
# Context that matters
|
|
52
|
-
key_context: List[str] = field(default_factory=list)
|
|
53
|
-
blockers: List[str] = field(default_factory=list)
|
|
54
|
-
next_steps: List[str] = field(default_factory=list)
|
|
55
|
-
|
|
56
|
-
# Technical state
|
|
57
|
-
git_branch: str = ""
|
|
58
|
-
git_sha: str = ""
|
|
59
|
-
uncommitted_changes: int = 0
|
|
60
|
-
|
|
61
|
-
# Token stats
|
|
62
|
-
tokens_used: int = 0
|
|
63
|
-
context_fullness: float = 0.0
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
def _project_hash(project_path: str) -> str:
|
|
67
|
-
"""Stable hash for a project path, used as directory name."""
|
|
68
|
-
normalized = os.path.realpath(project_path)
|
|
69
|
-
return hashlib.sha256(normalized.encode()).hexdigest()[:12]
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
def _project_dir(project_path: str) -> Path:
|
|
73
|
-
"""Return the soul storage directory for a project."""
|
|
74
|
-
return SOULS_BASE_DIR / _project_hash(project_path)
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
def _run_git(args: List[str], cwd: str = "") -> str:
|
|
78
|
-
"""Run a git command and return stdout, or empty string on failure."""
|
|
79
|
-
try:
|
|
80
|
-
result = subprocess.run(
|
|
81
|
-
["git"] + args,
|
|
82
|
-
capture_output=True,
|
|
83
|
-
text=True,
|
|
84
|
-
timeout=5,
|
|
85
|
-
cwd=cwd or None,
|
|
86
|
-
)
|
|
87
|
-
if result.returncode == 0:
|
|
88
|
-
return result.stdout.strip()
|
|
89
|
-
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
|
90
|
-
pass
|
|
91
|
-
return ""
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
def _detect_git_state(project_path: str) -> Dict[str, Any]:
|
|
95
|
-
"""Auto-detect git branch, sha, modified/created files, uncommitted count."""
|
|
96
|
-
cwd = project_path or os.getcwd()
|
|
97
|
-
|
|
98
|
-
branch = _run_git(["rev-parse", "--abbrev-ref", "HEAD"], cwd=cwd)
|
|
99
|
-
sha = _run_git(["rev-parse", "--short", "HEAD"], cwd=cwd)
|
|
100
|
-
|
|
101
|
-
# Uncommitted changes (staged + unstaged + untracked)
|
|
102
|
-
porcelain = _run_git(["status", "--porcelain"], cwd=cwd)
|
|
103
|
-
porcelain_lines = [l for l in porcelain.splitlines() if l.strip()] if porcelain else []
|
|
104
|
-
uncommitted = len(porcelain_lines)
|
|
105
|
-
|
|
106
|
-
# Files modified (tracked, staged or unstaged)
|
|
107
|
-
diff_names = _run_git(["diff", "--name-only", "HEAD"], cwd=cwd)
|
|
108
|
-
files_modified = [l.strip() for l in diff_names.splitlines() if l.strip()] if diff_names else []
|
|
109
|
-
|
|
110
|
-
# New untracked files
|
|
111
|
-
untracked_raw = _run_git(["ls-files", "--others", "--exclude-standard"], cwd=cwd)
|
|
112
|
-
files_created = [l.strip() for l in untracked_raw.splitlines() if l.strip()] if untracked_raw else []
|
|
113
|
-
|
|
114
|
-
return {
|
|
115
|
-
"git_branch": branch,
|
|
116
|
-
"git_sha": sha,
|
|
117
|
-
"uncommitted_changes": uncommitted,
|
|
118
|
-
"files_modified": files_modified,
|
|
119
|
-
"files_created": files_created,
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
def capture_soul(
|
|
124
|
-
active_task: str = "",
|
|
125
|
-
decisions: Optional[List[str]] = None,
|
|
126
|
-
key_context: Optional[List[str]] = None,
|
|
127
|
-
blockers: Optional[List[str]] = None,
|
|
128
|
-
next_steps: Optional[List[str]] = None,
|
|
129
|
-
source_model: str = "unknown",
|
|
130
|
-
project_path: str = "",
|
|
131
|
-
task_status: str = "in_progress",
|
|
132
|
-
tokens_used: int = 0,
|
|
133
|
-
context_fullness: float = 0.0,
|
|
134
|
-
) -> SessionSoul:
|
|
135
|
-
"""Capture current session state as a soul and persist it to disk."""
|
|
136
|
-
project_path = project_path or os.getcwd()
|
|
137
|
-
git_state = _detect_git_state(project_path)
|
|
138
|
-
|
|
139
|
-
soul = SessionSoul(
|
|
140
|
-
soul_id=str(uuid.uuid4())[:8],
|
|
141
|
-
created_at=datetime.now(timezone.utc).isoformat(),
|
|
142
|
-
source_model=source_model,
|
|
143
|
-
project_path=project_path,
|
|
144
|
-
active_task=active_task,
|
|
145
|
-
task_status=task_status,
|
|
146
|
-
decisions=decisions or [],
|
|
147
|
-
files_modified=git_state["files_modified"],
|
|
148
|
-
files_created=git_state["files_created"],
|
|
149
|
-
key_context=key_context or [],
|
|
150
|
-
blockers=blockers or [],
|
|
151
|
-
next_steps=next_steps or [],
|
|
152
|
-
git_branch=git_state["git_branch"],
|
|
153
|
-
git_sha=git_state["git_sha"],
|
|
154
|
-
uncommitted_changes=git_state["uncommitted_changes"],
|
|
155
|
-
tokens_used=tokens_used,
|
|
156
|
-
context_fullness=context_fullness,
|
|
157
|
-
)
|
|
158
|
-
|
|
159
|
-
_store_soul(soul)
|
|
160
|
-
return soul
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
def _store_soul(soul: SessionSoul) -> Path:
|
|
164
|
-
"""Persist a soul to disk and maintain the latest pointer."""
|
|
165
|
-
global _capture_counter
|
|
166
|
-
proj_dir = _project_dir(soul.project_path)
|
|
167
|
-
proj_dir.mkdir(parents=True, exist_ok=True)
|
|
168
|
-
|
|
169
|
-
# Timestamp + monotonic counter for correct ordering within same second
|
|
170
|
-
_capture_counter += 1
|
|
171
|
-
ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
|
|
172
|
-
filename = f"{ts}_{_capture_counter:06d}_{soul.soul_id}.json"
|
|
173
|
-
filepath = proj_dir / filename
|
|
174
|
-
|
|
175
|
-
data = asdict(soul)
|
|
176
|
-
filepath.write_text(json.dumps(data, indent=2))
|
|
177
|
-
|
|
178
|
-
# Update latest.json as a copy (symlinks can be fragile across systems)
|
|
179
|
-
latest = proj_dir / "latest.json"
|
|
180
|
-
latest.write_text(json.dumps(data, indent=2))
|
|
181
|
-
|
|
182
|
-
# Auto-prune to MAX_SOULS_PER_PROJECT (keep latest N by name sort)
|
|
183
|
-
_prune_souls(proj_dir)
|
|
184
|
-
|
|
185
|
-
return filepath
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
def _prune_souls(proj_dir: Path) -> None:
|
|
189
|
-
"""Keep only the latest MAX_SOULS_PER_PROJECT souls per project."""
|
|
190
|
-
soul_files = sorted(
|
|
191
|
-
[f for f in proj_dir.iterdir() if f.name != "latest.json" and f.suffix == ".json"],
|
|
192
|
-
key=lambda f: f.name,
|
|
193
|
-
)
|
|
194
|
-
while len(soul_files) > MAX_SOULS_PER_PROJECT:
|
|
195
|
-
oldest = soul_files.pop(0)
|
|
196
|
-
oldest.unlink(missing_ok=True)
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
def _load_soul(path: Path) -> Optional[SessionSoul]:
|
|
200
|
-
"""Load a soul from a JSON file."""
|
|
201
|
-
try:
|
|
202
|
-
data = json.loads(path.read_text())
|
|
203
|
-
return SessionSoul(**{k: v for k, v in data.items() if k in SessionSoul.__dataclass_fields__})
|
|
204
|
-
except (json.JSONDecodeError, TypeError, KeyError, OSError):
|
|
205
|
-
return None
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
def list_souls(project_path: str = "") -> List[SessionSoul]:
|
|
209
|
-
"""List all stored souls for a project, newest first."""
|
|
210
|
-
project_path = project_path or os.getcwd()
|
|
211
|
-
proj_dir = _project_dir(project_path)
|
|
212
|
-
if not proj_dir.exists():
|
|
213
|
-
return []
|
|
214
|
-
|
|
215
|
-
soul_files = sorted(
|
|
216
|
-
[f for f in proj_dir.iterdir() if f.name != "latest.json" and f.suffix == ".json"],
|
|
217
|
-
key=lambda f: f.name,
|
|
218
|
-
reverse=True,
|
|
219
|
-
)
|
|
220
|
-
souls = []
|
|
221
|
-
for f in soul_files:
|
|
222
|
-
soul = _load_soul(f)
|
|
223
|
-
if soul:
|
|
224
|
-
souls.append(soul)
|
|
225
|
-
return souls
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
def get_latest_soul(project_path: str = "") -> Optional[SessionSoul]:
|
|
229
|
-
"""Get the most recent soul for a project."""
|
|
230
|
-
project_path = project_path or os.getcwd()
|
|
231
|
-
latest = _project_dir(project_path) / "latest.json"
|
|
232
|
-
if latest.exists():
|
|
233
|
-
return _load_soul(latest)
|
|
234
|
-
return None
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
def _format_revival(soul: SessionSoul) -> str:
|
|
238
|
-
"""Format a soul into a readable context string for any AI model."""
|
|
239
|
-
lines = []
|
|
240
|
-
lines.append("=" * 60)
|
|
241
|
-
lines.append("SESSION PHOENIX -- Revived Session Context")
|
|
242
|
-
lines.append("=" * 60)
|
|
243
|
-
lines.append("")
|
|
244
|
-
|
|
245
|
-
lines.append(f"Soul ID: {soul.soul_id}")
|
|
246
|
-
lines.append(f"Captured: {soul.created_at}")
|
|
247
|
-
lines.append(f"Source Model: {soul.source_model}")
|
|
248
|
-
lines.append(f"Project: {soul.project_path}")
|
|
249
|
-
lines.append("")
|
|
250
|
-
|
|
251
|
-
# Current task
|
|
252
|
-
lines.append("--- ACTIVE TASK ---")
|
|
253
|
-
if soul.active_task:
|
|
254
|
-
lines.append(f" {soul.active_task}")
|
|
255
|
-
lines.append(f" Status: {soul.task_status}")
|
|
256
|
-
else:
|
|
257
|
-
lines.append(" (none recorded)")
|
|
258
|
-
lines.append("")
|
|
259
|
-
|
|
260
|
-
# Decisions
|
|
261
|
-
if soul.decisions:
|
|
262
|
-
lines.append("--- KEY DECISIONS ---")
|
|
263
|
-
for d in soul.decisions:
|
|
264
|
-
lines.append(f" - {d}")
|
|
265
|
-
lines.append("")
|
|
266
|
-
|
|
267
|
-
# Files
|
|
268
|
-
if soul.files_modified or soul.files_created:
|
|
269
|
-
lines.append("--- FILES CHANGED ---")
|
|
270
|
-
for f in soul.files_modified:
|
|
271
|
-
lines.append(f" M {f}")
|
|
272
|
-
for f in soul.files_created:
|
|
273
|
-
lines.append(f" + {f}")
|
|
274
|
-
lines.append("")
|
|
275
|
-
|
|
276
|
-
# Context
|
|
277
|
-
if soul.key_context:
|
|
278
|
-
lines.append("--- KEY CONTEXT ---")
|
|
279
|
-
for c in soul.key_context:
|
|
280
|
-
lines.append(f" - {c}")
|
|
281
|
-
lines.append("")
|
|
282
|
-
|
|
283
|
-
# Blockers
|
|
284
|
-
if soul.blockers:
|
|
285
|
-
lines.append("--- BLOCKERS ---")
|
|
286
|
-
for b in soul.blockers:
|
|
287
|
-
lines.append(f" ! {b}")
|
|
288
|
-
lines.append("")
|
|
289
|
-
|
|
290
|
-
# Next steps
|
|
291
|
-
if soul.next_steps:
|
|
292
|
-
lines.append("--- NEXT STEPS ---")
|
|
293
|
-
for i, s in enumerate(soul.next_steps, 1):
|
|
294
|
-
lines.append(f" {i}. {s}")
|
|
295
|
-
lines.append("")
|
|
296
|
-
|
|
297
|
-
# Git state
|
|
298
|
-
lines.append("--- GIT STATE ---")
|
|
299
|
-
lines.append(f" Branch: {soul.git_branch or '(unknown)'}")
|
|
300
|
-
lines.append(f" SHA: {soul.git_sha or '(unknown)'}")
|
|
301
|
-
lines.append(f" Uncommitted changes: {soul.uncommitted_changes}")
|
|
302
|
-
lines.append("")
|
|
303
|
-
|
|
304
|
-
# Token stats
|
|
305
|
-
if soul.tokens_used or soul.context_fullness:
|
|
306
|
-
lines.append("--- SESSION STATS ---")
|
|
307
|
-
if soul.tokens_used:
|
|
308
|
-
lines.append(f" Tokens used: ~{soul.tokens_used:,}")
|
|
309
|
-
if soul.context_fullness:
|
|
310
|
-
lines.append(f" Context fullness: {soul.context_fullness:.0%}")
|
|
311
|
-
lines.append("")
|
|
312
|
-
|
|
313
|
-
lines.append("=" * 60)
|
|
314
|
-
return "\n".join(lines)
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
def revive(project_path: str = "", soul_id: str = "") -> Dict[str, Any]:
|
|
318
|
-
"""Revive the latest session soul for this project.
|
|
319
|
-
|
|
320
|
-
Returns a structured dict with both the raw soul data and a
|
|
321
|
-
formatted context string that can be injected into any model.
|
|
322
|
-
"""
|
|
323
|
-
project_path = project_path or os.getcwd()
|
|
324
|
-
|
|
325
|
-
if soul_id:
|
|
326
|
-
# Search for a specific soul by ID
|
|
327
|
-
for soul in list_souls(project_path):
|
|
328
|
-
if soul.soul_id == soul_id:
|
|
329
|
-
return {
|
|
330
|
-
"status": "revived",
|
|
331
|
-
"soul": asdict(soul),
|
|
332
|
-
"context": _format_revival(soul),
|
|
333
|
-
}
|
|
334
|
-
return {
|
|
335
|
-
"status": "not_found",
|
|
336
|
-
"message": f"No soul with ID '{soul_id}' found for project {project_path}",
|
|
337
|
-
}
|
|
338
|
-
|
|
339
|
-
# Get latest
|
|
340
|
-
soul = get_latest_soul(project_path)
|
|
341
|
-
if not soul:
|
|
342
|
-
return {
|
|
343
|
-
"status": "no_souls",
|
|
344
|
-
"message": f"No session souls found for {project_path}. Nothing to revive.",
|
|
345
|
-
"hint": "Use delimit_soul_capture to save session state before ending.",
|
|
346
|
-
}
|
|
347
|
-
|
|
348
|
-
return {
|
|
349
|
-
"status": "revived",
|
|
350
|
-
"soul": asdict(soul),
|
|
351
|
-
"context": _format_revival(soul),
|
|
352
|
-
}
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
def should_auto_capture(
|
|
356
|
-
context_fullness: float = 0.0,
|
|
357
|
-
session_age_minutes: int = 0,
|
|
358
|
-
last_capture_minutes_ago: int = -1,
|
|
359
|
-
) -> bool:
|
|
360
|
-
"""Determine if we should auto-capture a soul.
|
|
361
|
-
|
|
362
|
-
Triggers:
|
|
363
|
-
- Context > 70% full
|
|
364
|
-
- Session > 30 minutes old with no capture in the last 15 minutes
|
|
365
|
-
- Explicit session end (handled by caller, not this function)
|
|
366
|
-
"""
|
|
367
|
-
if context_fullness >= 0.7:
|
|
368
|
-
return True
|
|
369
|
-
if session_age_minutes >= 30 and (last_capture_minutes_ago < 0 or last_capture_minutes_ago >= 15):
|
|
370
|
-
return True
|
|
371
|
-
return False
|
|
@@ -1,327 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Toolcard Delta Cache — LED-219
|
|
3
|
-
|
|
4
|
-
MCP servers dump full tool definitions every session. GitHub's MCP server
|
|
5
|
-
alone sends 40K+ tokens of tool schemas. This module stores hashed tool
|
|
6
|
-
schemas and only surfaces diffs when schemas change, cutting token waste
|
|
7
|
-
on tool definitions dramatically.
|
|
8
|
-
|
|
9
|
-
This is a MEASUREMENT tool first — it shows the savings potential. The
|
|
10
|
-
actual MCP protocol optimization to send compressed schemas is a separate
|
|
11
|
-
step.
|
|
12
|
-
|
|
13
|
-
Architecture:
|
|
14
|
-
- SHA256 hash of each tool's canonical schema (name + description + parameters)
|
|
15
|
-
- Persistent JSON cache at ~/.delimit/toolcard_cache.json
|
|
16
|
-
- Per-session JSONL logs at ~/.delimit/toolcard_sessions/{date}.jsonl
|
|
17
|
-
- Thread-safe via atomic writes (write to tmp, rename)
|
|
18
|
-
- No external dependencies — stdlib only
|
|
19
|
-
"""
|
|
20
|
-
|
|
21
|
-
import hashlib
|
|
22
|
-
import json
|
|
23
|
-
import logging
|
|
24
|
-
import os
|
|
25
|
-
import tempfile
|
|
26
|
-
import time
|
|
27
|
-
from datetime import datetime, timezone
|
|
28
|
-
from pathlib import Path
|
|
29
|
-
from typing import Any, Dict, List, Optional
|
|
30
|
-
|
|
31
|
-
logger = logging.getLogger("delimit.toolcard_cache")
|
|
32
|
-
|
|
33
|
-
CACHE_FILE = Path.home() / ".delimit" / "toolcard_cache.json"
|
|
34
|
-
SESSION_DIR = Path.home() / ".delimit" / "toolcard_sessions"
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def _canonical_json(obj: Any) -> str:
|
|
38
|
-
"""Produce a deterministic JSON string for hashing."""
|
|
39
|
-
return json.dumps(obj, sort_keys=True, separators=(",", ":"), ensure_ascii=True)
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
def _hash_schema(tool: Dict[str, Any]) -> str:
|
|
43
|
-
"""SHA256 hash of a tool's canonical schema (name + description + parameters)."""
|
|
44
|
-
canonical = {
|
|
45
|
-
"name": tool.get("name", ""),
|
|
46
|
-
"description": tool.get("description", ""),
|
|
47
|
-
"parameters": tool.get("parameters", {}),
|
|
48
|
-
}
|
|
49
|
-
return hashlib.sha256(_canonical_json(canonical).encode("utf-8")).hexdigest()
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
def _estimate_tokens(obj: Any) -> int:
|
|
53
|
-
"""Estimate token count: len(JSON) / 4 (standard approximation)."""
|
|
54
|
-
return max(1, len(_canonical_json(obj)) // 4)
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
def _atomic_write_json(path: Path, data: Any) -> None:
|
|
58
|
-
"""Write JSON atomically: write to temp file, then rename."""
|
|
59
|
-
path.parent.mkdir(parents=True, exist_ok=True)
|
|
60
|
-
fd, tmp_path = tempfile.mkstemp(
|
|
61
|
-
dir=str(path.parent), suffix=".tmp", prefix=".toolcard_"
|
|
62
|
-
)
|
|
63
|
-
try:
|
|
64
|
-
with os.fdopen(fd, "w") as f:
|
|
65
|
-
json.dump(data, f, indent=2, default=str)
|
|
66
|
-
os.replace(tmp_path, str(path))
|
|
67
|
-
except Exception:
|
|
68
|
-
# Clean up temp file on failure
|
|
69
|
-
try:
|
|
70
|
-
os.unlink(tmp_path)
|
|
71
|
-
except OSError:
|
|
72
|
-
pass
|
|
73
|
-
raise
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
class ToolcardCache:
|
|
77
|
-
"""Hashed tool schema registry. Sends full schemas on first session, diffs after."""
|
|
78
|
-
|
|
79
|
-
def __init__(self, cache_file: Optional[Path] = None, session_dir: Optional[Path] = None):
|
|
80
|
-
self._cache_file = cache_file or CACHE_FILE
|
|
81
|
-
self._session_dir = session_dir or SESSION_DIR
|
|
82
|
-
self.cache: Dict[str, Dict[str, Any]] = self._load()
|
|
83
|
-
# Per-session tracking
|
|
84
|
-
self._session_start = datetime.now(timezone.utc).isoformat()
|
|
85
|
-
self._session_calls: Dict[str, int] = {} # tool_name -> call count
|
|
86
|
-
self._session_registered = 0
|
|
87
|
-
self._session_hits = 0
|
|
88
|
-
self._session_misses = 0
|
|
89
|
-
|
|
90
|
-
def _load(self) -> Dict[str, Dict[str, Any]]:
|
|
91
|
-
"""Load cache from disk. Returns empty dict if missing or corrupt."""
|
|
92
|
-
try:
|
|
93
|
-
if self._cache_file.exists():
|
|
94
|
-
with open(self._cache_file, "r") as f:
|
|
95
|
-
data = json.load(f)
|
|
96
|
-
if isinstance(data, dict):
|
|
97
|
-
return data
|
|
98
|
-
except (json.JSONDecodeError, OSError) as e:
|
|
99
|
-
logger.warning("Toolcard cache load failed: %s", e)
|
|
100
|
-
return {}
|
|
101
|
-
|
|
102
|
-
def _save(self) -> None:
|
|
103
|
-
"""Persist cache to disk atomically."""
|
|
104
|
-
_atomic_write_json(self._cache_file, self.cache)
|
|
105
|
-
|
|
106
|
-
def register_tools(self, tools: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
107
|
-
"""Register tool schemas. Returns only NEW or CHANGED tools.
|
|
108
|
-
|
|
109
|
-
Args:
|
|
110
|
-
tools: List of tool schema dicts, each with 'name', 'description', 'parameters'.
|
|
111
|
-
|
|
112
|
-
Returns:
|
|
113
|
-
Dict with:
|
|
114
|
-
new_tools: list of tool schemas not previously cached
|
|
115
|
-
changed_tools: list of tool schemas whose hash differs
|
|
116
|
-
unchanged_tools: list of tool names (no schema, just names)
|
|
117
|
-
full_tokens: estimated tokens if all schemas were sent
|
|
118
|
-
delta_tokens: actual tokens for just new/changed
|
|
119
|
-
savings_pct: percentage reduction
|
|
120
|
-
saved_tokens: absolute token count saved
|
|
121
|
-
"""
|
|
122
|
-
new_tools = []
|
|
123
|
-
changed_tools = []
|
|
124
|
-
unchanged_names = []
|
|
125
|
-
now = datetime.now(timezone.utc).isoformat()
|
|
126
|
-
|
|
127
|
-
for tool in tools:
|
|
128
|
-
name = tool.get("name", "")
|
|
129
|
-
if not name:
|
|
130
|
-
continue
|
|
131
|
-
h = _hash_schema(tool)
|
|
132
|
-
|
|
133
|
-
if name not in self.cache:
|
|
134
|
-
# New tool
|
|
135
|
-
new_tools.append(tool)
|
|
136
|
-
self.cache[name] = {
|
|
137
|
-
"hash": h,
|
|
138
|
-
"schema": tool,
|
|
139
|
-
"first_seen": now,
|
|
140
|
-
"last_changed": now,
|
|
141
|
-
}
|
|
142
|
-
self._session_misses += 1
|
|
143
|
-
elif self.cache[name]["hash"] != h:
|
|
144
|
-
# Changed tool
|
|
145
|
-
changed_tools.append(tool)
|
|
146
|
-
self.cache[name] = {
|
|
147
|
-
"hash": h,
|
|
148
|
-
"schema": tool,
|
|
149
|
-
"first_seen": self.cache[name].get("first_seen", now),
|
|
150
|
-
"last_changed": now,
|
|
151
|
-
}
|
|
152
|
-
self._session_misses += 1
|
|
153
|
-
else:
|
|
154
|
-
# Unchanged — cache hit
|
|
155
|
-
unchanged_names.append(name)
|
|
156
|
-
self._session_hits += 1
|
|
157
|
-
|
|
158
|
-
self._session_registered = len(tools)
|
|
159
|
-
self._save()
|
|
160
|
-
|
|
161
|
-
# Token calculations
|
|
162
|
-
full_tokens = sum(_estimate_tokens(t) for t in tools)
|
|
163
|
-
delta_schemas = new_tools + changed_tools
|
|
164
|
-
delta_tokens = sum(_estimate_tokens(t) for t in delta_schemas)
|
|
165
|
-
# Unchanged tools still need their names sent (compact summary)
|
|
166
|
-
delta_tokens += sum(len(n) // 4 + 1 for n in unchanged_names)
|
|
167
|
-
|
|
168
|
-
saved_tokens = max(0, full_tokens - delta_tokens)
|
|
169
|
-
savings_pct = round((saved_tokens / full_tokens * 100), 1) if full_tokens > 0 else 0.0
|
|
170
|
-
|
|
171
|
-
return {
|
|
172
|
-
"new_tools": new_tools,
|
|
173
|
-
"changed_tools": changed_tools,
|
|
174
|
-
"unchanged_tools": unchanged_names,
|
|
175
|
-
"full_tokens": full_tokens,
|
|
176
|
-
"delta_tokens": delta_tokens,
|
|
177
|
-
"savings_pct": savings_pct,
|
|
178
|
-
"saved_tokens": saved_tokens,
|
|
179
|
-
"total_registered": len(tools),
|
|
180
|
-
"cache_size": len(self.cache),
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
def get_delta(self, tool_names: List[str]) -> Dict[str, Any]:
|
|
184
|
-
"""Return only schemas that changed since last check.
|
|
185
|
-
|
|
186
|
-
Args:
|
|
187
|
-
tool_names: List of tool names to check against the cache.
|
|
188
|
-
|
|
189
|
-
Returns:
|
|
190
|
-
Dict with cached (hit) and missing (miss) tools.
|
|
191
|
-
"""
|
|
192
|
-
cached = []
|
|
193
|
-
missing = []
|
|
194
|
-
|
|
195
|
-
for name in tool_names:
|
|
196
|
-
if name in self.cache:
|
|
197
|
-
cached.append(name)
|
|
198
|
-
self._session_hits += 1
|
|
199
|
-
else:
|
|
200
|
-
missing.append(name)
|
|
201
|
-
self._session_misses += 1
|
|
202
|
-
|
|
203
|
-
return {
|
|
204
|
-
"cached": cached,
|
|
205
|
-
"missing": missing,
|
|
206
|
-
"cached_count": len(cached),
|
|
207
|
-
"missing_count": len(missing),
|
|
208
|
-
"hit_rate": round(len(cached) / len(tool_names) * 100, 1) if tool_names else 0.0,
|
|
209
|
-
}
|
|
210
|
-
|
|
211
|
-
def record_call(self, tool_name: str) -> None:
|
|
212
|
-
"""Record that a tool was called in the current session."""
|
|
213
|
-
self._session_calls[tool_name] = self._session_calls.get(tool_name, 0) + 1
|
|
214
|
-
|
|
215
|
-
def get_stats(self) -> Dict[str, Any]:
|
|
216
|
-
"""Return cache stats: total tools, cached, cache hit rate, token savings."""
|
|
217
|
-
total_checks = self._session_hits + self._session_misses
|
|
218
|
-
hit_rate = round(
|
|
219
|
-
(self._session_hits / total_checks * 100), 1
|
|
220
|
-
) if total_checks > 0 else 0.0
|
|
221
|
-
|
|
222
|
-
# Estimate total cached schema tokens
|
|
223
|
-
cached_tokens = sum(
|
|
224
|
-
_estimate_tokens(entry.get("schema", {}))
|
|
225
|
-
for entry in self.cache.values()
|
|
226
|
-
)
|
|
227
|
-
|
|
228
|
-
# Most called tools this session
|
|
229
|
-
top_tools = sorted(
|
|
230
|
-
self._session_calls.items(), key=lambda x: x[1], reverse=True
|
|
231
|
-
)[:10]
|
|
232
|
-
|
|
233
|
-
return {
|
|
234
|
-
"total_cached_tools": len(self.cache),
|
|
235
|
-
"session_registered": self._session_registered,
|
|
236
|
-
"session_hits": self._session_hits,
|
|
237
|
-
"session_misses": self._session_misses,
|
|
238
|
-
"session_hit_rate": hit_rate,
|
|
239
|
-
"cached_schema_tokens": cached_tokens,
|
|
240
|
-
"session_tools_called": dict(top_tools),
|
|
241
|
-
"session_start": self._session_start,
|
|
242
|
-
"cache_file": str(self._cache_file),
|
|
243
|
-
}
|
|
244
|
-
|
|
245
|
-
def estimate_savings(self, tools: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
246
|
-
"""Estimate token savings without modifying the cache.
|
|
247
|
-
|
|
248
|
-
Dry-run version of register_tools — shows what WOULD be saved.
|
|
249
|
-
"""
|
|
250
|
-
hits = 0
|
|
251
|
-
misses = 0
|
|
252
|
-
|
|
253
|
-
for tool in tools:
|
|
254
|
-
name = tool.get("name", "")
|
|
255
|
-
if not name:
|
|
256
|
-
continue
|
|
257
|
-
h = _hash_schema(tool)
|
|
258
|
-
if name in self.cache and self.cache[name]["hash"] == h:
|
|
259
|
-
hits += 1
|
|
260
|
-
else:
|
|
261
|
-
misses += 1
|
|
262
|
-
|
|
263
|
-
full_tokens = sum(_estimate_tokens(t) for t in tools)
|
|
264
|
-
# If all cached, only names need sending
|
|
265
|
-
cached_names_tokens = sum(len(t.get("name", "")) // 4 + 1 for t in tools if t.get("name") in self.cache and self.cache.get(t.get("name"), {}).get("hash") == _hash_schema(t))
|
|
266
|
-
missed_tokens = sum(
|
|
267
|
-
_estimate_tokens(t) for t in tools
|
|
268
|
-
if t.get("name") not in self.cache or self.cache.get(t.get("name"), {}).get("hash") != _hash_schema(t)
|
|
269
|
-
)
|
|
270
|
-
delta_tokens = cached_names_tokens + missed_tokens
|
|
271
|
-
saved_tokens = max(0, full_tokens - delta_tokens)
|
|
272
|
-
savings_pct = round((saved_tokens / full_tokens * 100), 1) if full_tokens > 0 else 0.0
|
|
273
|
-
|
|
274
|
-
return {
|
|
275
|
-
"total_tools": len(tools),
|
|
276
|
-
"would_be_cached": hits,
|
|
277
|
-
"would_need_sending": misses,
|
|
278
|
-
"full_tokens": full_tokens,
|
|
279
|
-
"delta_tokens": delta_tokens,
|
|
280
|
-
"savings_pct": savings_pct,
|
|
281
|
-
"saved_tokens": saved_tokens,
|
|
282
|
-
}
|
|
283
|
-
|
|
284
|
-
def clear(self) -> Dict[str, Any]:
|
|
285
|
-
"""Clear the cache. Forces full schema send next session."""
|
|
286
|
-
count = len(self.cache)
|
|
287
|
-
self.cache = {}
|
|
288
|
-
self._save()
|
|
289
|
-
return {
|
|
290
|
-
"cleared": count,
|
|
291
|
-
"message": f"Cleared {count} cached tool schemas. Next session will send full schemas.",
|
|
292
|
-
}
|
|
293
|
-
|
|
294
|
-
def flush_session(self) -> Dict[str, Any]:
|
|
295
|
-
"""Write session stats to the per-session JSONL log."""
|
|
296
|
-
self._session_dir.mkdir(parents=True, exist_ok=True)
|
|
297
|
-
date_str = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
|
298
|
-
session_file = self._session_dir / f"{date_str}.jsonl"
|
|
299
|
-
|
|
300
|
-
record = {
|
|
301
|
-
"session_start": self._session_start,
|
|
302
|
-
"flushed_at": datetime.now(timezone.utc).isoformat(),
|
|
303
|
-
"stats": self.get_stats(),
|
|
304
|
-
}
|
|
305
|
-
|
|
306
|
-
with open(session_file, "a") as f:
|
|
307
|
-
f.write(json.dumps(record, default=str) + "\n")
|
|
308
|
-
|
|
309
|
-
return {"written_to": str(session_file), "record": record}
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
# Module-level singleton for use by server.py
|
|
313
|
-
_cache_instance: Optional[ToolcardCache] = None
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
def get_cache() -> ToolcardCache:
|
|
317
|
-
"""Get or create the module-level cache singleton."""
|
|
318
|
-
global _cache_instance
|
|
319
|
-
if _cache_instance is None:
|
|
320
|
-
_cache_instance = ToolcardCache()
|
|
321
|
-
return _cache_instance
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
def reset_cache() -> None:
|
|
325
|
-
"""Reset the singleton (for testing)."""
|
|
326
|
-
global _cache_instance
|
|
327
|
-
_cache_instance = None
|