npm - delimit-cli - Versions diffs - 4.0.0 → 4.0.2 - Mend

delimit-cli 4.0.0 → 4.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/README.md +0 -1
package/gateway/ai/cross_model_audit.py +600 -0
package/gateway/ai/github_scanner.py +622 -0
package/gateway/ai/handoff_receipts.py +409 -0
package/gateway/ai/license_core.py +1 -2
package/gateway/ai/notify.py +8 -8
package/gateway/ai/reddit_scanner.py +562 -0
package/gateway/ai/server.py +15 -7
package/gateway/ai/session_phoenix.py +371 -0
package/gateway/ai/swarm.py +2 -2
package/gateway/ai/toolcard_cache.py +327 -0
package/gateway/core/contract_ledger.py +1 -1
package/gateway/core/dependency_graph.py +1 -1
package/gateway/core/dependency_manifest.py +1 -1
package/gateway/core/event_backbone.py +2 -2
package/gateway/core/event_schema.py +1 -1
package/gateway/core/impact_analyzer.py +1 -1
package/package.json +1 -7
package/scripts/security-check.sh +6 -50

package/gateway/ai/toolcard_cache.py ADDED Viewed

@@ -0,0 +1,327 @@
+"""
+Toolcard Delta Cache — LED-219
+MCP servers dump full tool definitions every session. GitHub's MCP server
+alone sends 40K+ tokens of tool schemas. This module stores hashed tool
+schemas and only surfaces diffs when schemas change, cutting token waste
+on tool definitions dramatically.
+This is a MEASUREMENT tool first — it shows the savings potential. The
+actual MCP protocol optimization to send compressed schemas is a separate
+step.
+Architecture:
+  - SHA256 hash of each tool's canonical schema (name + description + parameters)
+  - Persistent JSON cache at ~/.delimit/toolcard_cache.json
+  - Per-session JSONL logs at ~/.delimit/toolcard_sessions/{date}.jsonl
+  - Thread-safe via atomic writes (write to tmp, rename)
+  - No external dependencies — stdlib only
+"""
+import hashlib
+import json
+import logging
+import os
+import tempfile
+import time
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+logger = logging.getLogger("delimit.toolcard_cache")
+CACHE_FILE = Path.home() / ".delimit" / "toolcard_cache.json"
+SESSION_DIR = Path.home() / ".delimit" / "toolcard_sessions"
+def _canonical_json(obj: Any) -> str:
+    """Produce a deterministic JSON string for hashing."""
+    return json.dumps(obj, sort_keys=True, separators=(",", ":"), ensure_ascii=True)
+def _hash_schema(tool: Dict[str, Any]) -> str:
+    """SHA256 hash of a tool's canonical schema (name + description + parameters)."""
+    canonical = {
+        "name": tool.get("name", ""),
+        "description": tool.get("description", ""),
+        "parameters": tool.get("parameters", {}),
+    }
+    return hashlib.sha256(_canonical_json(canonical).encode("utf-8")).hexdigest()
+def _estimate_tokens(obj: Any) -> int:
+    """Estimate token count: len(JSON) / 4 (standard approximation)."""
+    return max(1, len(_canonical_json(obj)) // 4)
+def _atomic_write_json(path: Path, data: Any) -> None:
+    """Write JSON atomically: write to temp file, then rename."""
+    path.parent.mkdir(parents=True, exist_ok=True)
+    fd, tmp_path = tempfile.mkstemp(
+        dir=str(path.parent), suffix=".tmp", prefix=".toolcard_"
+    )
+    try:
+        with os.fdopen(fd, "w") as f:
+            json.dump(data, f, indent=2, default=str)
+        os.replace(tmp_path, str(path))
+    except Exception:
+        # Clean up temp file on failure
+        try:
+            os.unlink(tmp_path)
+        except OSError:
+            pass
+        raise
+class ToolcardCache:
+    """Hashed tool schema registry. Sends full schemas on first session, diffs after."""
+    def __init__(self, cache_file: Optional[Path] = None, session_dir: Optional[Path] = None):
+        self._cache_file = cache_file or CACHE_FILE
+        self._session_dir = session_dir or SESSION_DIR
+        self.cache: Dict[str, Dict[str, Any]] = self._load()
+        # Per-session tracking
+        self._session_start = datetime.now(timezone.utc).isoformat()
+        self._session_calls: Dict[str, int] = {}  # tool_name -> call count
+        self._session_registered = 0
+        self._session_hits = 0
+        self._session_misses = 0
+    def _load(self) -> Dict[str, Dict[str, Any]]:
+        """Load cache from disk. Returns empty dict if missing or corrupt."""
+        try:
+            if self._cache_file.exists():
+                with open(self._cache_file, "r") as f:
+                    data = json.load(f)
+                if isinstance(data, dict):
+                    return data
+        except (json.JSONDecodeError, OSError) as e:
+            logger.warning("Toolcard cache load failed: %s", e)
+        return {}
+    def _save(self) -> None:
+        """Persist cache to disk atomically."""
+        _atomic_write_json(self._cache_file, self.cache)
+    def register_tools(self, tools: List[Dict[str, Any]]) -> Dict[str, Any]:
+        """Register tool schemas. Returns only NEW or CHANGED tools.
+        Args:
+            tools: List of tool schema dicts, each with 'name', 'description', 'parameters'.
+        Returns:
+            Dict with:
+                new_tools: list of tool schemas not previously cached
+                changed_tools: list of tool schemas whose hash differs
+                unchanged_tools: list of tool names (no schema, just names)
+                full_tokens: estimated tokens if all schemas were sent
+                delta_tokens: actual tokens for just new/changed
+                savings_pct: percentage reduction
+                saved_tokens: absolute token count saved
+        """
+        new_tools = []
+        changed_tools = []
+        unchanged_names = []
+        now = datetime.now(timezone.utc).isoformat()
+        for tool in tools:
+            name = tool.get("name", "")
+            if not name:
+                continue
+            h = _hash_schema(tool)
+            if name not in self.cache:
+                # New tool
+                new_tools.append(tool)
+                self.cache[name] = {
+                    "hash": h,
+                    "schema": tool,
+                    "first_seen": now,
+                    "last_changed": now,
+                }
+                self._session_misses += 1
+            elif self.cache[name]["hash"] != h:
+                # Changed tool
+                changed_tools.append(tool)
+                self.cache[name] = {
+                    "hash": h,
+                    "schema": tool,
+                    "first_seen": self.cache[name].get("first_seen", now),
+                    "last_changed": now,
+                }
+                self._session_misses += 1
+            else:
+                # Unchanged — cache hit
+                unchanged_names.append(name)
+                self._session_hits += 1
+        self._session_registered = len(tools)
+        self._save()
+        # Token calculations
+        full_tokens = sum(_estimate_tokens(t) for t in tools)
+        delta_schemas = new_tools + changed_tools
+        delta_tokens = sum(_estimate_tokens(t) for t in delta_schemas)
+        # Unchanged tools still need their names sent (compact summary)
+        delta_tokens += sum(len(n) // 4 + 1 for n in unchanged_names)
+        saved_tokens = max(0, full_tokens - delta_tokens)
+        savings_pct = round((saved_tokens / full_tokens * 100), 1) if full_tokens > 0 else 0.0
+        return {
+            "new_tools": new_tools,
+            "changed_tools": changed_tools,
+            "unchanged_tools": unchanged_names,
+            "full_tokens": full_tokens,
+            "delta_tokens": delta_tokens,
+            "savings_pct": savings_pct,
+            "saved_tokens": saved_tokens,
+            "total_registered": len(tools),
+            "cache_size": len(self.cache),
+        }
+    def get_delta(self, tool_names: List[str]) -> Dict[str, Any]:
+        """Return only schemas that changed since last check.
+        Args:
+            tool_names: List of tool names to check against the cache.
+        Returns:
+            Dict with cached (hit) and missing (miss) tools.
+        """
+        cached = []
+        missing = []
+        for name in tool_names:
+            if name in self.cache:
+                cached.append(name)
+                self._session_hits += 1
+            else:
+                missing.append(name)
+                self._session_misses += 1
+        return {
+            "cached": cached,
+            "missing": missing,
+            "cached_count": len(cached),
+            "missing_count": len(missing),
+            "hit_rate": round(len(cached) / len(tool_names) * 100, 1) if tool_names else 0.0,
+        }
+    def record_call(self, tool_name: str) -> None:
+        """Record that a tool was called in the current session."""
+        self._session_calls[tool_name] = self._session_calls.get(tool_name, 0) + 1
+    def get_stats(self) -> Dict[str, Any]:
+        """Return cache stats: total tools, cached, cache hit rate, token savings."""
+        total_checks = self._session_hits + self._session_misses
+        hit_rate = round(
+            (self._session_hits / total_checks * 100), 1
+        ) if total_checks > 0 else 0.0
+        # Estimate total cached schema tokens
+        cached_tokens = sum(
+            _estimate_tokens(entry.get("schema", {}))
+            for entry in self.cache.values()
+        )
+        # Most called tools this session
+        top_tools = sorted(
+            self._session_calls.items(), key=lambda x: x[1], reverse=True
+        )[:10]
+        return {
+            "total_cached_tools": len(self.cache),
+            "session_registered": self._session_registered,
+            "session_hits": self._session_hits,
+            "session_misses": self._session_misses,
+            "session_hit_rate": hit_rate,
+            "cached_schema_tokens": cached_tokens,
+            "session_tools_called": dict(top_tools),
+            "session_start": self._session_start,
+            "cache_file": str(self._cache_file),
+        }
+    def estimate_savings(self, tools: List[Dict[str, Any]]) -> Dict[str, Any]:
+        """Estimate token savings without modifying the cache.
+        Dry-run version of register_tools — shows what WOULD be saved.
+        """
+        hits = 0
+        misses = 0
+        for tool in tools:
+            name = tool.get("name", "")
+            if not name:
+                continue
+            h = _hash_schema(tool)
+            if name in self.cache and self.cache[name]["hash"] == h:
+                hits += 1
+            else:
+                misses += 1
+        full_tokens = sum(_estimate_tokens(t) for t in tools)
+        # If all cached, only names need sending
+        cached_names_tokens = sum(len(t.get("name", "")) // 4 + 1 for t in tools if t.get("name") in self.cache and self.cache.get(t.get("name"), {}).get("hash") == _hash_schema(t))
+        missed_tokens = sum(
+            _estimate_tokens(t) for t in tools
+            if t.get("name") not in self.cache or self.cache.get(t.get("name"), {}).get("hash") != _hash_schema(t)
+        )
+        delta_tokens = cached_names_tokens + missed_tokens
+        saved_tokens = max(0, full_tokens - delta_tokens)
+        savings_pct = round((saved_tokens / full_tokens * 100), 1) if full_tokens > 0 else 0.0
+        return {
+            "total_tools": len(tools),
+            "would_be_cached": hits,
+            "would_need_sending": misses,
+            "full_tokens": full_tokens,
+            "delta_tokens": delta_tokens,
+            "savings_pct": savings_pct,
+            "saved_tokens": saved_tokens,
+        }
+    def clear(self) -> Dict[str, Any]:
+        """Clear the cache. Forces full schema send next session."""
+        count = len(self.cache)
+        self.cache = {}
+        self._save()
+        return {
+            "cleared": count,
+            "message": f"Cleared {count} cached tool schemas. Next session will send full schemas.",
+        }
+    def flush_session(self) -> Dict[str, Any]:
+        """Write session stats to the per-session JSONL log."""
+        self._session_dir.mkdir(parents=True, exist_ok=True)
+        date_str = datetime.now(timezone.utc).strftime("%Y-%m-%d")
+        session_file = self._session_dir / f"{date_str}.jsonl"
+        record = {
+            "session_start": self._session_start,
+            "flushed_at": datetime.now(timezone.utc).isoformat(),
+            "stats": self.get_stats(),
+        }
+        with open(session_file, "a") as f:
+            f.write(json.dumps(record, default=str) + "\n")
+        return {"written_to": str(session_file), "record": record}
+# Module-level singleton for use by server.py
+_cache_instance: Optional[ToolcardCache] = None
+def get_cache() -> ToolcardCache:
+    """Get or create the module-level cache singleton."""
+    global _cache_instance
+    if _cache_instance is None:
+        _cache_instance = ToolcardCache()
+    return _cache_instance
+def reset_cache() -> None:
+    """Reset the singleton (for testing)."""
+    global _cache_instance
+    _cache_instance = None

package/gateway/core/contract_ledger.py CHANGED Viewed

@@ -3,7 +3,7 @@ Delimit Contract Ledger
 Reads, validates, and queries the append-only JSONL event ledger.
 Optional SQLite index for fast lookups (never required for CI).
-Per Delimit Stability Contract:
+Per Jamsons Doctrine:
 - Deterministic outputs
 - Append-only artifacts
 - SQLite index is optional, not required for CI

package/gateway/core/dependency_graph.py CHANGED Viewed

@@ -5,7 +5,7 @@ Constructs a deterministic service dependency graph from manifests.
 The graph maps each API/service to its downstream consumers,
 enabling impact analysis when an API contract changes.
-Per Delimit Stability Contract:
+Per Jamsons Doctrine:
 - Deterministic outputs (sorted, reproducible)
 - No telemetry
 - Graceful degradation when manifests are missing

package/gateway/core/dependency_manifest.py CHANGED Viewed

@@ -2,7 +2,7 @@
 Delimit Dependency Manifest
 Parses and validates .delimit/dependencies.yaml service dependency declarations.
-Per Delimit Stability Contract:
+Per Jamsons Doctrine:
 - Deterministic outputs
 - No credential discovery
 - No telemetry

package/gateway/core/event_backbone.py CHANGED Viewed

@@ -3,7 +3,7 @@ Delimit Event Backbone
 Constructs ledger events, generates SHA-256 hashes, links hash chains,
 and appends to the append-only JSONL ledger.
-Per Delimit Stability Contract:
+Per Jamsons Doctrine:
 - Deterministic outputs
 - Append-only artifacts
 - Fail-closed CI behavior (ledger failures never affect CI)
@@ -199,7 +199,7 @@ class EventBackbone:
         This is the primary API for event generation. It is best-effort:
         if the ledger write fails, the event is still returned but not persisted.
-        CRITICAL: This method NEVER raises exceptions. Per Delimit Stability Contract,
+        CRITICAL: This method NEVER raises exceptions. Per Jamsons Doctrine,
         ledger failures must not affect CI pass/fail outcome.
         Returns:

package/gateway/core/event_schema.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """
 Delimit Event Schema
 Canonical event schema for API contract evolution tracking.
-Deterministic validation and serialization per Delimit Stability Contract.
+Deterministic validation and serialization per Jamsons Doctrine.
 """
 import hashlib

package/gateway/core/impact_analyzer.py CHANGED Viewed

@@ -3,7 +3,7 @@ Delimit Impact Analyzer
 Determines downstream consumers affected by an API change
 and produces informational impact summaries for CI output.
-Per Delimit Stability Contract:
+Per Jamsons Doctrine:
 - Impact analysis is INFORMATIONAL ONLY
 - NEVER affects CI pass/fail outcome
 - Deterministic outputs

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "delimit-cli",
   "mcpName": "io.github.delimit-ai/delimit-mcp-server",
-  "version": "4.0.0",
+  "version": "4.0.2",
   "description": "Unify Claude Code, Codex, Cursor, and Gemini CLI with persistent context, governance, and multi-model debate.",
   "main": "index.js",
   "files": [
@@ -14,12 +14,6 @@
     "!gateway/ai/founding_users.py",
     "!gateway/ai/inbox_daemon.py",
     "!gateway/ai/deliberation.py",
-    "!gateway/ai/reddit_scanner.py",
-    "!gateway/ai/github_scanner.py",
-    "!gateway/ai/cross_model_audit.py",
-    "!gateway/ai/session_phoenix.py",
-    "!gateway/ai/handoff_receipts.py",
-    "!gateway/ai/toolcard_cache.py",
     "scripts/",
     "server.json",
     "README.md",

package/scripts/security-check.sh CHANGED Viewed

@@ -25,7 +25,7 @@ fi
 # 2. Blocklist terms
 echo -n "  Blocklist... "
-BLOCKLIST="jamsonsholdings|Bladabah|Domainvested26|Delimit26|home/jamsons|infracore|crypttrx|\.wr_env|delimitdev|typed-on-phone|em dash.*ai tell|PAIN_CATEGORIES|VENTURE_CONFIG|VENTURE_SUBREDDITS|karma_building"
+BLOCKLIST="jamsonsholdings|Bladabah|Domainvested26|Delimit26|home/jamsons|infracore|crypttrx|\.wr_env"
 if grep -rEi "$BLOCKLIST" "$TMPDIR/package/" --include="*.py" --include="*.js" --include="*.json" 2>/dev/null; then
     echo "❌ BLOCKED TERMS FOUND"
     FAIL=1
@@ -42,17 +42,9 @@ else
     echo "✅ clean"
 fi
-# 4. Internal ticket IDs
-echo -n "  Internal ticket IDs... "
-if grep -rE "LED-[0-9]{3}|STR-[0-9]{3}" "$TMPDIR/package/" --include="*.py" --include="*.js" 2>/dev/null | grep -v "node_modules" | head -1; then
-    echo "  WARNING: Internal ticket IDs found (cosmetic, not blocking)"
-else
-    echo "clean"
-fi
-# 5. Proprietary files that shouldn't ship
+# 4. Proprietary files that shouldn't ship
 echo -n "  Proprietary files... "
-PROPRIETARY="social_target\.py|social\.py|founding_users\.py|inbox_daemon\.py|deliberation\.py|reddit_scanner\.py|github_scanner\.py|cross_model_audit\.py|session_phoenix\.py|handoff_receipts\.py|toolcard_cache\.py"
+PROPRIETARY="social_target\.py|social\.py|founding_users\.py|inbox_daemon\.py|deliberation\.py"
 if find "$TMPDIR/package/" -name "*.py" | grep -Ei "$PROPRIETARY" 2>/dev/null; then
     echo "❌ PROPRIETARY FILES IN PACKAGE"
     FAIL=1
@@ -60,51 +52,15 @@ else
     echo "✅ clean"
 fi
-# Cleanup npm tarball
+# Cleanup
 rm -rf "$TMPDIR"
-# ── PyPI dist scan (if dist/ exists) ─────────────────────────────────
-PYPI_DIST="/home/delimit/delimit-gateway/dist"
-if [ -d "$PYPI_DIST" ] && ls "$PYPI_DIST"/*.tar.gz 1>/dev/null 2>&1; then
-    echo ""
-    echo "PyPI dist scan..."
-    PYPI_TMPDIR=$(mktemp -d)
-    PYPI_TARBALL=$(ls -t "$PYPI_DIST"/*.tar.gz | head -1)
-    tar -xzf "$PYPI_TARBALL" -C "$PYPI_TMPDIR" 2>/dev/null
-    echo -n "  Credentials... "
-    if grep -rEi '(password|passwd|secret|api_key|apikey)\s*[:=]\s*["\x27][^"\x27]{4,}' "$PYPI_TMPDIR/" --include="*.py" 2>/dev/null | grep -v 'environ\|getenv\|os\.environ\|<configured\|example\|placeholder\|REDACTED'; then
-        echo "FOUND CREDENTIALS IN PYPI DIST"
-        FAIL=1
-    else
-        echo "clean"
-    fi
-    echo -n "  Blocklist... "
-    if grep -rEi "$BLOCKLIST" "$PYPI_TMPDIR/" --include="*.py" 2>/dev/null; then
-        echo "BLOCKED TERMS IN PYPI DIST"
-        FAIL=1
-    else
-        echo "clean"
-    fi
-    echo -n "  PII... "
-    if grep -rEi '[a-z0-9._%+-]+@(gmail|yahoo|hotmail|outlook|proton|jamsons|wire\.report|domainvested)' "$PYPI_TMPDIR/" --include="*.py" 2>/dev/null | grep -v "example\|placeholder\|<configured\|noreply\|e\.g\.\|docstring"; then
-        echo "PII IN PYPI DIST"
-        FAIL=1
-    else
-        echo "clean"
-    fi
-    rm -rf "$PYPI_TMPDIR"
-fi
 if [ $FAIL -ne 0 ]; then
     echo ""
-    echo "SECURITY CHECK FAILED -- do not publish"
+    echo "❌ SECURITY CHECK FAILED — do not publish"
     exit 1
 fi
 echo ""
-echo "All security checks passed"
+echo "✅ All security checks passed"
 exit 0