delimit-cli 3.15.12 → 3.15.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -590,3 +590,109 @@ def check_docs_freshness(
590
590
  "stale": stale,
591
591
  "message": f"{len(findings)} doc issue(s) found" if findings else "Documentation is up to date",
592
592
  }
593
+
594
+
595
+ # ═══════════════════════════════════════════════════════════════════════
596
+ # LED-279: Self-Extending Swarm — Founder Mode
597
+ # Agents can create new MCP tools when authorized
598
+ # ═══════════════════════════════════════════════════════════════════════
599
+
600
+ TOOLS_DIR = Path.home() / ".delimit" / "swarm" / "custom_tools"
601
+
602
+
603
+ def create_tool(
604
+ name: str,
605
+ code: str,
606
+ venture: str,
607
+ agent_id: str = "",
608
+ description: str = "",
609
+ ) -> Dict[str, Any]:
610
+ """Create a new MCP tool (founder mode only).
611
+
612
+ Writes a Python module that can be loaded by the MCP server.
613
+ Requires reviewer approval before activation.
614
+ """
615
+ if not name or not code:
616
+ return {"error": "name and code are required"}
617
+
618
+ # Verify agent has creation authority
619
+ registry = _load_registry()
620
+ agent = registry["agents"].get(agent_id, {})
621
+ role = agent.get("role", "")
622
+ if role not in ("architect", "senior_dev"):
623
+ return {
624
+ "error": f"Role '{role}' cannot create tools. Only architect and senior_dev have creation authority.",
625
+ "agent_id": agent_id,
626
+ }
627
+
628
+ # Verify venture namespace
629
+ if agent.get("venture", "") != venture:
630
+ return {"error": f"Agent '{agent_id}' cannot create tools for venture '{venture}'"}
631
+
632
+ # Security scan — check for dangerous patterns
633
+ dangerous = [
634
+ "subprocess.call", "os.system", "exec(", "eval(",
635
+ "import socket", "import http.server",
636
+ "__import__", "compile(",
637
+ ]
638
+ for pattern in dangerous:
639
+ if pattern in code:
640
+ return {
641
+ "error": f"Security violation: '{pattern}' is not allowed in custom tools",
642
+ "blocked_pattern": pattern,
643
+ }
644
+
645
+ # Write tool module
646
+ TOOLS_DIR.mkdir(parents=True, exist_ok=True)
647
+ venture_dir = TOOLS_DIR / venture
648
+ venture_dir.mkdir(parents=True, exist_ok=True)
649
+
650
+ safe_name = name.lower().replace("-", "_").replace(" ", "_")
651
+ tool_path = venture_dir / f"{safe_name}.py"
652
+ tool_path.write_text(code)
653
+
654
+ # Log creation
655
+ _log({
656
+ "action": "tool_created",
657
+ "tool_name": safe_name,
658
+ "venture": venture,
659
+ "agent_id": agent_id,
660
+ "path": str(tool_path),
661
+ "lines": len(code.split("\n")),
662
+ "status": "pending_review",
663
+ })
664
+
665
+ return {
666
+ "status": "created",
667
+ "tool_name": safe_name,
668
+ "path": str(tool_path),
669
+ "venture": venture,
670
+ "created_by": agent_id,
671
+ "lines": len(code.split("\n")),
672
+ "next_step": "Reviewer agent must approve before tool is activated",
673
+ "message": f"Tool '{safe_name}' created for {venture}. Pending reviewer approval.",
674
+ }
675
+
676
+
677
+ def list_custom_tools(venture: str = "") -> Dict[str, Any]:
678
+ """List custom tools created by agents."""
679
+ TOOLS_DIR.mkdir(parents=True, exist_ok=True)
680
+ tools = []
681
+
682
+ search_dirs = [TOOLS_DIR / venture] if venture else list(TOOLS_DIR.iterdir())
683
+ for d in search_dirs:
684
+ if d.is_dir():
685
+ for f in sorted(d.glob("*.py")):
686
+ tools.append({
687
+ "name": f.stem,
688
+ "venture": d.name,
689
+ "path": str(f),
690
+ "lines": len(f.read_text().split("\n")),
691
+ })
692
+
693
+ return {
694
+ "status": "ok",
695
+ "tools": tools,
696
+ "total": len(tools),
697
+ "venture_filter": venture or "all",
698
+ }
@@ -14,7 +14,35 @@ Reference: Consensus 118/119/120 — Tool Segmentation Architecture.
14
14
 
15
15
  from typing import Dict, Literal
16
16
 
17
- Tier = Literal["public", "ops_pack", "internal", "experimental"]
17
+ Tier = Literal["core", "public", "ops_pack", "internal", "experimental"]
18
+
19
+ # ─────────────────────────────────────────────────────────────────────
20
+ # CORE WORKFLOWS — the 5 outcomes first users should see
21
+ #
22
+ # 1. Govern: catch breaking changes before deploy
23
+ # 2. Remember: persistent memory across models and sessions
24
+ # 3. Handoff: switch models without losing context
25
+ # 4. Lint: validate API specs for drift
26
+ # 5. Track: ledger for tasks that survive context resets
27
+ #
28
+ # Per consensus (STR-040): sell 5 workflows, not 162 tools
29
+ # ─────────────────────────────────────────────────────────────────────
30
+
31
+ CORE_TOOLS = {
32
+ # Govern
33
+ "delimit_lint", "delimit_scan", "delimit_gov_health", "delimit_drift_check",
34
+ # Remember
35
+ "delimit_memory_store", "delimit_memory_search", "delimit_memory_recent",
36
+ # Handoff
37
+ "delimit_session_handoff", "delimit_session_history",
38
+ # Lint + Diff
39
+ "delimit_diff", "delimit_semver", "delimit_explain",
40
+ # Track
41
+ "delimit_ledger_context", "delimit_ledger_add", "delimit_ledger_done",
42
+ "delimit_ledger_list", "delimit_ledger_update",
43
+ # Setup
44
+ "delimit_init", "delimit_scan", "delimit_version", "delimit_help",
45
+ }
18
46
 
19
47
  # ─────────────────────────────────────────────────────────────────────
20
48
  # TOOL_TIERS: canonical tier assignment for every registered tool.
@@ -31,12 +59,12 @@ Tier = Literal["public", "ops_pack", "internal", "experimental"]
31
59
  # ─────────────────────────────────────────────────────────────────────
32
60
 
33
61
  TOOL_TIERS: Dict[str, Tier] = {
34
- # === Govern domain (all public) ===
35
- "delimit_lint": "public",
36
- "delimit_diff": "public",
62
+ # === Govern domain ===
63
+ "delimit_lint": "core",
64
+ "delimit_diff": "core",
37
65
  "delimit_policy": "public",
38
- "delimit_semver": "public",
39
- "delimit_explain": "public",
66
+ "delimit_semver": "core",
67
+ "delimit_explain": "core",
40
68
  "delimit_zero_spec": "public",
41
69
  "delimit_init": "public",
42
70
  "delimit_gov_health": "public",
@@ -0,0 +1,327 @@
1
+ """
2
+ Toolcard Delta Cache — LED-219
3
+
4
+ MCP servers dump full tool definitions every session. GitHub's MCP server
5
+ alone sends 40K+ tokens of tool schemas. This module stores hashed tool
6
+ schemas and only surfaces diffs when schemas change, cutting token waste
7
+ on tool definitions dramatically.
8
+
9
+ This is a MEASUREMENT tool first — it shows the savings potential. The
10
+ actual MCP protocol optimization to send compressed schemas is a separate
11
+ step.
12
+
13
+ Architecture:
14
+ - SHA256 hash of each tool's canonical schema (name + description + parameters)
15
+ - Persistent JSON cache at ~/.delimit/toolcard_cache.json
16
+ - Per-session JSONL logs at ~/.delimit/toolcard_sessions/{date}.jsonl
17
+ - Thread-safe via atomic writes (write to tmp, rename)
18
+ - No external dependencies — stdlib only
19
+ """
20
+
21
+ import hashlib
22
+ import json
23
+ import logging
24
+ import os
25
+ import tempfile
26
+ import time
27
+ from datetime import datetime, timezone
28
+ from pathlib import Path
29
+ from typing import Any, Dict, List, Optional
30
+
31
+ logger = logging.getLogger("delimit.toolcard_cache")
32
+
33
+ CACHE_FILE = Path.home() / ".delimit" / "toolcard_cache.json"
34
+ SESSION_DIR = Path.home() / ".delimit" / "toolcard_sessions"
35
+
36
+
37
+ def _canonical_json(obj: Any) -> str:
38
+ """Produce a deterministic JSON string for hashing."""
39
+ return json.dumps(obj, sort_keys=True, separators=(",", ":"), ensure_ascii=True)
40
+
41
+
42
+ def _hash_schema(tool: Dict[str, Any]) -> str:
43
+ """SHA256 hash of a tool's canonical schema (name + description + parameters)."""
44
+ canonical = {
45
+ "name": tool.get("name", ""),
46
+ "description": tool.get("description", ""),
47
+ "parameters": tool.get("parameters", {}),
48
+ }
49
+ return hashlib.sha256(_canonical_json(canonical).encode("utf-8")).hexdigest()
50
+
51
+
52
+ def _estimate_tokens(obj: Any) -> int:
53
+ """Estimate token count: len(JSON) / 4 (standard approximation)."""
54
+ return max(1, len(_canonical_json(obj)) // 4)
55
+
56
+
57
+ def _atomic_write_json(path: Path, data: Any) -> None:
58
+ """Write JSON atomically: write to temp file, then rename."""
59
+ path.parent.mkdir(parents=True, exist_ok=True)
60
+ fd, tmp_path = tempfile.mkstemp(
61
+ dir=str(path.parent), suffix=".tmp", prefix=".toolcard_"
62
+ )
63
+ try:
64
+ with os.fdopen(fd, "w") as f:
65
+ json.dump(data, f, indent=2, default=str)
66
+ os.replace(tmp_path, str(path))
67
+ except Exception:
68
+ # Clean up temp file on failure
69
+ try:
70
+ os.unlink(tmp_path)
71
+ except OSError:
72
+ pass
73
+ raise
74
+
75
+
76
+ class ToolcardCache:
77
+ """Hashed tool schema registry. Sends full schemas on first session, diffs after."""
78
+
79
+ def __init__(self, cache_file: Optional[Path] = None, session_dir: Optional[Path] = None):
80
+ self._cache_file = cache_file or CACHE_FILE
81
+ self._session_dir = session_dir or SESSION_DIR
82
+ self.cache: Dict[str, Dict[str, Any]] = self._load()
83
+ # Per-session tracking
84
+ self._session_start = datetime.now(timezone.utc).isoformat()
85
+ self._session_calls: Dict[str, int] = {} # tool_name -> call count
86
+ self._session_registered = 0
87
+ self._session_hits = 0
88
+ self._session_misses = 0
89
+
90
+ def _load(self) -> Dict[str, Dict[str, Any]]:
91
+ """Load cache from disk. Returns empty dict if missing or corrupt."""
92
+ try:
93
+ if self._cache_file.exists():
94
+ with open(self._cache_file, "r") as f:
95
+ data = json.load(f)
96
+ if isinstance(data, dict):
97
+ return data
98
+ except (json.JSONDecodeError, OSError) as e:
99
+ logger.warning("Toolcard cache load failed: %s", e)
100
+ return {}
101
+
102
+ def _save(self) -> None:
103
+ """Persist cache to disk atomically."""
104
+ _atomic_write_json(self._cache_file, self.cache)
105
+
106
+ def register_tools(self, tools: List[Dict[str, Any]]) -> Dict[str, Any]:
107
+ """Register tool schemas. Returns only NEW or CHANGED tools.
108
+
109
+ Args:
110
+ tools: List of tool schema dicts, each with 'name', 'description', 'parameters'.
111
+
112
+ Returns:
113
+ Dict with:
114
+ new_tools: list of tool schemas not previously cached
115
+ changed_tools: list of tool schemas whose hash differs
116
+ unchanged_tools: list of tool names (no schema, just names)
117
+ full_tokens: estimated tokens if all schemas were sent
118
+ delta_tokens: actual tokens for just new/changed
119
+ savings_pct: percentage reduction
120
+ saved_tokens: absolute token count saved
121
+ """
122
+ new_tools = []
123
+ changed_tools = []
124
+ unchanged_names = []
125
+ now = datetime.now(timezone.utc).isoformat()
126
+
127
+ for tool in tools:
128
+ name = tool.get("name", "")
129
+ if not name:
130
+ continue
131
+ h = _hash_schema(tool)
132
+
133
+ if name not in self.cache:
134
+ # New tool
135
+ new_tools.append(tool)
136
+ self.cache[name] = {
137
+ "hash": h,
138
+ "schema": tool,
139
+ "first_seen": now,
140
+ "last_changed": now,
141
+ }
142
+ self._session_misses += 1
143
+ elif self.cache[name]["hash"] != h:
144
+ # Changed tool
145
+ changed_tools.append(tool)
146
+ self.cache[name] = {
147
+ "hash": h,
148
+ "schema": tool,
149
+ "first_seen": self.cache[name].get("first_seen", now),
150
+ "last_changed": now,
151
+ }
152
+ self._session_misses += 1
153
+ else:
154
+ # Unchanged — cache hit
155
+ unchanged_names.append(name)
156
+ self._session_hits += 1
157
+
158
+ self._session_registered = len(tools)
159
+ self._save()
160
+
161
+ # Token calculations
162
+ full_tokens = sum(_estimate_tokens(t) for t in tools)
163
+ delta_schemas = new_tools + changed_tools
164
+ delta_tokens = sum(_estimate_tokens(t) for t in delta_schemas)
165
+ # Unchanged tools still need their names sent (compact summary)
166
+ delta_tokens += sum(len(n) // 4 + 1 for n in unchanged_names)
167
+
168
+ saved_tokens = max(0, full_tokens - delta_tokens)
169
+ savings_pct = round((saved_tokens / full_tokens * 100), 1) if full_tokens > 0 else 0.0
170
+
171
+ return {
172
+ "new_tools": new_tools,
173
+ "changed_tools": changed_tools,
174
+ "unchanged_tools": unchanged_names,
175
+ "full_tokens": full_tokens,
176
+ "delta_tokens": delta_tokens,
177
+ "savings_pct": savings_pct,
178
+ "saved_tokens": saved_tokens,
179
+ "total_registered": len(tools),
180
+ "cache_size": len(self.cache),
181
+ }
182
+
183
+ def get_delta(self, tool_names: List[str]) -> Dict[str, Any]:
184
+ """Return only schemas that changed since last check.
185
+
186
+ Args:
187
+ tool_names: List of tool names to check against the cache.
188
+
189
+ Returns:
190
+ Dict with cached (hit) and missing (miss) tools.
191
+ """
192
+ cached = []
193
+ missing = []
194
+
195
+ for name in tool_names:
196
+ if name in self.cache:
197
+ cached.append(name)
198
+ self._session_hits += 1
199
+ else:
200
+ missing.append(name)
201
+ self._session_misses += 1
202
+
203
+ return {
204
+ "cached": cached,
205
+ "missing": missing,
206
+ "cached_count": len(cached),
207
+ "missing_count": len(missing),
208
+ "hit_rate": round(len(cached) / len(tool_names) * 100, 1) if tool_names else 0.0,
209
+ }
210
+
211
+ def record_call(self, tool_name: str) -> None:
212
+ """Record that a tool was called in the current session."""
213
+ self._session_calls[tool_name] = self._session_calls.get(tool_name, 0) + 1
214
+
215
+ def get_stats(self) -> Dict[str, Any]:
216
+ """Return cache stats: total tools, cached, cache hit rate, token savings."""
217
+ total_checks = self._session_hits + self._session_misses
218
+ hit_rate = round(
219
+ (self._session_hits / total_checks * 100), 1
220
+ ) if total_checks > 0 else 0.0
221
+
222
+ # Estimate total cached schema tokens
223
+ cached_tokens = sum(
224
+ _estimate_tokens(entry.get("schema", {}))
225
+ for entry in self.cache.values()
226
+ )
227
+
228
+ # Most called tools this session
229
+ top_tools = sorted(
230
+ self._session_calls.items(), key=lambda x: x[1], reverse=True
231
+ )[:10]
232
+
233
+ return {
234
+ "total_cached_tools": len(self.cache),
235
+ "session_registered": self._session_registered,
236
+ "session_hits": self._session_hits,
237
+ "session_misses": self._session_misses,
238
+ "session_hit_rate": hit_rate,
239
+ "cached_schema_tokens": cached_tokens,
240
+ "session_tools_called": dict(top_tools),
241
+ "session_start": self._session_start,
242
+ "cache_file": str(self._cache_file),
243
+ }
244
+
245
+ def estimate_savings(self, tools: List[Dict[str, Any]]) -> Dict[str, Any]:
246
+ """Estimate token savings without modifying the cache.
247
+
248
+ Dry-run version of register_tools — shows what WOULD be saved.
249
+ """
250
+ hits = 0
251
+ misses = 0
252
+
253
+ for tool in tools:
254
+ name = tool.get("name", "")
255
+ if not name:
256
+ continue
257
+ h = _hash_schema(tool)
258
+ if name in self.cache and self.cache[name]["hash"] == h:
259
+ hits += 1
260
+ else:
261
+ misses += 1
262
+
263
+ full_tokens = sum(_estimate_tokens(t) for t in tools)
264
+ # If all cached, only names need sending
265
+ cached_names_tokens = sum(len(t.get("name", "")) // 4 + 1 for t in tools if t.get("name") in self.cache and self.cache.get(t.get("name"), {}).get("hash") == _hash_schema(t))
266
+ missed_tokens = sum(
267
+ _estimate_tokens(t) for t in tools
268
+ if t.get("name") not in self.cache or self.cache.get(t.get("name"), {}).get("hash") != _hash_schema(t)
269
+ )
270
+ delta_tokens = cached_names_tokens + missed_tokens
271
+ saved_tokens = max(0, full_tokens - delta_tokens)
272
+ savings_pct = round((saved_tokens / full_tokens * 100), 1) if full_tokens > 0 else 0.0
273
+
274
+ return {
275
+ "total_tools": len(tools),
276
+ "would_be_cached": hits,
277
+ "would_need_sending": misses,
278
+ "full_tokens": full_tokens,
279
+ "delta_tokens": delta_tokens,
280
+ "savings_pct": savings_pct,
281
+ "saved_tokens": saved_tokens,
282
+ }
283
+
284
+ def clear(self) -> Dict[str, Any]:
285
+ """Clear the cache. Forces full schema send next session."""
286
+ count = len(self.cache)
287
+ self.cache = {}
288
+ self._save()
289
+ return {
290
+ "cleared": count,
291
+ "message": f"Cleared {count} cached tool schemas. Next session will send full schemas.",
292
+ }
293
+
294
+ def flush_session(self) -> Dict[str, Any]:
295
+ """Write session stats to the per-session JSONL log."""
296
+ self._session_dir.mkdir(parents=True, exist_ok=True)
297
+ date_str = datetime.now(timezone.utc).strftime("%Y-%m-%d")
298
+ session_file = self._session_dir / f"{date_str}.jsonl"
299
+
300
+ record = {
301
+ "session_start": self._session_start,
302
+ "flushed_at": datetime.now(timezone.utc).isoformat(),
303
+ "stats": self.get_stats(),
304
+ }
305
+
306
+ with open(session_file, "a") as f:
307
+ f.write(json.dumps(record, default=str) + "\n")
308
+
309
+ return {"written_to": str(session_file), "record": record}
310
+
311
+
312
+ # Module-level singleton for use by server.py
313
+ _cache_instance: Optional[ToolcardCache] = None
314
+
315
+
316
+ def get_cache() -> ToolcardCache:
317
+ """Get or create the module-level cache singleton."""
318
+ global _cache_instance
319
+ if _cache_instance is None:
320
+ _cache_instance = ToolcardCache()
321
+ return _cache_instance
322
+
323
+
324
+ def reset_cache() -> None:
325
+ """Reset the singleton (for testing)."""
326
+ global _cache_instance
327
+ _cache_instance = None
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "delimit-cli",
3
3
  "mcpName": "io.github.delimit-ai/delimit-mcp-server",
4
- "version": "3.15.12",
4
+ "version": "3.15.13",
5
5
  "description": "Unify Claude Code, Codex, Cursor, and Gemini CLI with persistent context, governance, and multi-model debate.",
6
6
  "main": "index.js",
7
7
  "files": [