npm - @pentatonic-ai/ai-agent-sdk - Versions diffs - 0.6.0 → 0.7.0 - Mend

@pentatonic-ai/ai-agent-sdk 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (94) hide show

package/packages/memory-engine/engine/services/nv-embed/Dockerfile ADDED Viewed

@@ -0,0 +1,28 @@
+FROM python:3.11-slim
+WORKDIR /app
+# Install system deps
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+# Install Python deps
+# transformers must stay <4.50 — newer versions break NV-Embed-v2 (removed all_tied_weights_keys)
+RUN pip install --no-cache-dir \
+    "torch>=2.0" \
+    "transformers>=4.42,<4.50" \
+    "datasets>=2.14.0" \
+    "einops>=0.7.0" \
+    "sentence-transformers" \
+    "fastapi>=0.100.0" \
+    "uvicorn>=0.23.0"
+COPY server.py .
+# Model cache volume
+ENV HF_HOME=/cache/huggingface
+EXPOSE 8041
+CMD ["python", "server.py", "--host", "0.0.0.0", "--port", "8041"]

package/packages/memory-engine/engine/services/nv-embed/server.py ADDED Viewed

@@ -0,0 +1,152 @@
+#!/usr/bin/env python3
+"""
+NV-Embed-v2 Embedding Service
+Persistent FastAPI service that keeps the 7B model loaded in GPU memory.
+Exposes OpenAI-compatible /v1/embeddings endpoint.
+Uses SentenceTransformer for better transformers version compatibility.
+Port: 8041 (default)
+"""
+import argparse
+import logging
+import time
+from typing import Any, List, Union
+import torch
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+import uvicorn
+logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
+log = logging.getLogger("nv-embed-service")
+MODEL_ID = "nvidia/NV-Embed-v2"
+INSTRUCTION = "Given a question, retrieve passages that answer the question"
+app = FastAPI(title="NV-Embed-v2 Service", version="1.1.0")
+# Global model reference
+model = None
+load_time = None
+class EmbeddingRequest(BaseModel):
+    input: Union[str, List[str]]
+    model: str = "nv-embed-v2"
+    instruction: str = ""
+class EmbeddingResponse(BaseModel):
+    object: str = "list"
+    data: List[dict]
+    model: str = "nv-embed-v2"
+    usage: dict
+def load_model():
+    """Load NV-Embed-v2 via SentenceTransformer to GPU."""
+    global model, load_time
+    log.info("Loading NV-Embed-v2 via SentenceTransformer...")
+    t0 = time.time()
+    from sentence_transformers import SentenceTransformer
+    # Load to CPU first, then move to GPU to avoid OOM during loading
+    model = SentenceTransformer(MODEL_ID, trust_remote_code=True, device="cpu")
+    model = model.to("cuda")
+    load_time = time.time() - t0
+    log.info(f"Model loaded in {load_time:.1f}s")
+@app.on_event("startup")
+async def startup():
+    load_model()
+@app.post("/v1/embeddings")
+async def create_embeddings(request: EmbeddingRequest) -> dict:
+    """OpenAI-compatible embeddings endpoint."""
+    if model is None:
+        raise HTTPException(status_code=503, detail="Model not loaded yet")
+    texts = [request.input] if isinstance(request.input, str) else request.input
+    if not texts:
+        raise HTTPException(status_code=400, detail="Empty input")
+    # Prepend instruction if provided (NV-Embed-v2 uses instruction-based embedding)
+    instruction = request.instruction or INSTRUCTION
+    if instruction:
+        texts = [f"Instruct: {instruction}\nQuery: {t}" for t in texts]
+    t0 = time.time()
+    try:
+        # SentenceTransformer.encode() returns numpy array
+        with torch.no_grad():
+            embeddings = model.encode(texts, convert_to_numpy=True, normalize_embeddings=True)
+        torch.cuda.empty_cache()
+        result = []
+        for i, emb in enumerate(embeddings):
+            result.append({
+                "object": "embedding",
+                "embedding": emb.tolist(),
+                "index": i,
+            })
+        elapsed = time.time() - t0
+        log.info(f"Embedded {len(texts)} texts in {elapsed:.2f}s ({elapsed/len(texts):.2f}s/text)")
+        return {
+            "object": "list",
+            "data": result,
+            "model": "nv-embed-v2",
+            "usage": {
+                "prompt_tokens": sum(len(t.split()) for t in texts),
+                "total_tokens": sum(len(t.split()) for t in texts),
+            },
+        }
+    except Exception as e:
+        log.error(f"Embedding failed: {e}")
+        import traceback
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/v1/models")
+async def list_models():
+    return {
+        "object": "list",
+        "data": [
+            {
+                "id": "nv-embed-v2",
+                "object": "model",
+                "owned_by": "nvidia",
+                "dimensions": 4096,
+                "loaded": model is not None,
+                "load_time_s": load_time,
+            }
+        ],
+    }
+@app.get("/health")
+async def health():
+    return {
+        "status": "ok" if model is not None else "loading",
+        "model": "nv-embed-v2",
+        "dimensions": 4096,
+        "gpu_loaded": model is not None,
+    }
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--port", type=int, default=8041)
+    parser.add_argument("--host", type=str, default="127.0.0.1")
+    args = parser.parse_args()
+    uvicorn.run(app, host=args.host, port=args.port)

package/packages/memory-engine/pme_memory/__init__.py ADDED Viewed

File without changes

package/packages/memory-engine/pme_memory/__main__.py ADDED Viewed

@@ -0,0 +1,129 @@
+"""
+pme_memory CLI — Communications layer management.
+Usage:
+    python -m pme_memory health              # Check status
+    python -m pme_memory stats               # Collection stats
+    python -m pme_memory index               # Index all sources
+    python -m pme_memory index chats         # Index just chats
+    python -m pme_memory search "query"      # Search all collections
+    python -m pme_memory search "q" -c chats # Search specific collection
+    python -m pme_memory serve               # HTTP API (port 8034)
+"""
+import argparse
+import json
+import os
+import sys
+import time
+from pathlib import Path
+from .store import CommsStore
+from .indexer import index_all
+from .search import search
+from .health import health_check
+def cmd_health(args):
+    store = CommsStore()
+    h = health_check(store)
+    print(json.dumps(h, indent=2))
+def cmd_stats(args):
+    store = CommsStore()
+    h = health_check(store)
+    print(f"\nL5 Communications Layer — {h.get('status', 'unknown')}")
+    print(f"DB: {h.get('db_path', '?')}")
+    print(f"Embeddings: {'OK' if h.get('embeddings') else 'UNAVAILABLE'}")
+    print(f"\nCollections:")
+    for name, info in h.get("collections", {}).items():
+        if info["exists"]:
+            print(f"  {name}: {info['count']:,} chunks")
+        else:
+            print(f"  {name}: not created")
+    print(f"\nTotal: {h.get('total_chunks', 0):,} chunks")
+def cmd_index(args):
+    workspace = Path(os.environ.get("PME_DIR", os.path.expanduser("~/pentatonic")))
+    store = CommsStore()
+    targets = args.targets if args.targets else None
+    t0 = time.time()
+    counts = index_all(store, workspace, targets=targets)
+    elapsed = time.time() - t0
+    total = sum(counts.values())
+    print(f"\nDone: {total:,} chunks indexed in {elapsed:.1f}s")
+def cmd_search(args):
+    query = " ".join(args.query) if args.query else ""
+    if not query:
+        print("Usage: python -m pme_memory search 'your query'")
+        return
+    store = CommsStore()
+    results = search(query, store=store, collection=args.collection, limit=args.limit)
+    for i, r in enumerate(results, 1):
+        print(f"\n--- [{i}] {r['collection']} (score: {r['score']}) ---")
+        print(f"Source: {r['source']}")
+        if r["contact"]:
+            print(f"Contact: {r['contact']}")
+        if r["timestamp"]:
+            print(f"Time: {r['timestamp']}")
+        print(r["text"][:300])
+def cmd_serve(args):
+    try:
+        from fastapi import FastAPI, Query
+        import uvicorn
+    except ImportError:
+        print("Install fastapi + uvicorn: pip install fastapi uvicorn")
+        sys.exit(1)
+    api = FastAPI(title="L5 Communications Layer")
+    store = CommsStore()
+    @api.get("/health")
+    def api_health():
+        return health_check(store)
+    @api.get("/search")
+    def api_search(q: str = Query(...), collection: str = None, limit: int = 10):
+        results = search(q, store=store, collection=collection, limit=limit)
+        return {"query": q, "results": results, "count": len(results)}
+    print(f"\n  L5 Communications Layer — http://127.0.0.1:{args.port}")
+    uvicorn.run(api, host="127.0.0.1", port=args.port, log_level="warning")
+def main():
+    parser = argparse.ArgumentParser(description="L5 Communications Layer")
+    sub = parser.add_subparsers(dest="command")
+    sub.add_parser("health")
+    sub.add_parser("stats")
+    idx = sub.add_parser("index")
+    idx.add_argument("targets", nargs="*", help="chats, emails, contacts, memory")
+    srch = sub.add_parser("search")
+    srch.add_argument("query", nargs="*")
+    srch.add_argument("-c", "--collection", default=None)
+    srch.add_argument("-l", "--limit", type=int, default=10)
+    srv = sub.add_parser("serve")
+    srv.add_argument("-p", "--port", type=int, default=8034)
+    args = parser.parse_args()
+    if not args.command:
+        parser.print_help()
+        return
+    cmds = {"health": cmd_health, "stats": cmd_stats, "index": cmd_index,
+            "search": cmd_search, "serve": cmd_serve}
+    cmds[args.command](args)
+if __name__ == "__main__":
+    main()

package/packages/memory-engine/pme_memory/artifacts.py ADDED Viewed

@@ -0,0 +1,95 @@
+from __future__ import annotations
+import hashlib
+import json
+import uuid
+from dataclasses import asdict, dataclass, field
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+def _utc_now() -> str:
+    return datetime.now(timezone.utc).isoformat()
+def _stable_json(data: Any) -> str:
+    return json.dumps(data, sort_keys=True, separators=(",", ":"), ensure_ascii=False)
+def _sha256(data: Any) -> str:
+    return hashlib.sha256(_stable_json(data).encode("utf-8")).hexdigest()
+@dataclass
+class ArtifactEnvelope:
+    artifact_id: str
+    artifact_type: str
+    producer: str
+    payload: Dict[str, Any]
+    needs: List[str] = field(default_factory=list)
+    parents: List[str] = field(default_factory=list)
+    source_tool: Optional[str] = None
+    topic: Optional[str] = None
+    created_at: str = field(default_factory=_utc_now)
+    content_hash: str = ""
+    @classmethod
+    def create(
+        cls,
+        artifact_type: str,
+        producer: str,
+        payload: Dict[str, Any],
+        *,
+        needs: Optional[List[str]] = None,
+        parents: Optional[List[str]] = None,
+        source_tool: Optional[str] = None,
+        topic: Optional[str] = None,
+    ) -> "ArtifactEnvelope":
+        env = cls(
+            artifact_id=str(uuid.uuid4()),
+            artifact_type=artifact_type,
+            producer=producer,
+            payload=payload,
+            needs=needs or [],
+            parents=parents or [],
+            source_tool=source_tool,
+            topic=topic,
+        )
+        env.content_hash = _sha256({
+            "artifact_type": env.artifact_type,
+            "producer": env.producer,
+            "payload": env.payload,
+            "needs": env.needs,
+            "parents": env.parents,
+            "source_tool": env.source_tool,
+            "topic": env.topic,
+        })
+        return env
+    def to_dict(self) -> Dict[str, Any]:
+        return asdict(self)
+class ArtifactStore:
+    """Append-only local artifact store (JSONL)."""
+    def __init__(self, path: str | Path):
+        self.path = Path(path)
+        self.path.parent.mkdir(parents=True, exist_ok=True)
+        if not self.path.exists():
+            self.path.touch()
+    def append(self, artifact: ArtifactEnvelope) -> None:
+        with self.path.open("a", encoding="utf-8") as f:
+            f.write(_stable_json(artifact.to_dict()) + "\n")
+    def tail(self, n: int = 20) -> List[Dict[str, Any]]:
+        lines = self.path.read_text(encoding="utf-8").splitlines()
+        out: List[Dict[str, Any]] = []
+        for line in lines[-n:]:
+            try:
+                out.append(json.loads(line))
+            except json.JSONDecodeError:
+                continue
+        return out

package/packages/memory-engine/pme_memory/embed.py ADDED Viewed

@@ -0,0 +1,74 @@
+"""
+pme_memory.embed — Embedding backend
+Primary: NV-Embed-v2 service (4096-dim) on localhost:8041
+Fallback: Ollama nomic-embed-text (768-dim) on localhost:11434
+"""
+import os
+import httpx
+import logging
+log = logging.getLogger("pme_memory.embed")
+# NV-Embed-v2 (primary)
+NV_EMBED_URL = os.environ.get("PME_NV_EMBED_URL", "http://localhost:8041/v1/embeddings")
+NV_EMBED_ENABLED = os.environ.get("PME_NV_EMBED_ENABLED", "true").lower() == "true"
+# Ollama (fallback)
+OLLAMA_URL = os.environ.get("PME_EMBED_URL", "http://localhost:11434/api/embed")
+OLLAMA_MODEL = os.environ.get("PME_EMBED_MODEL", "nomic-embed-text")
+# Legacy aliases for backward compatibility
+EMBED_URL = OLLAMA_URL
+EMBED_MODEL = OLLAMA_MODEL
+# Dimension — NV-Embed-v2 is 4096, nomic is 768
+EMBED_DIM = int(os.environ.get("PME_EMBED_DIM", "4096"))
+BATCH_SIZE = 100  # 100 is the sweet spot for NV-Embed-v2 on GB10 (0.02s/text vs 0.48s at batch=64)
+def _embed_nv(texts: list[str]) -> list[list[float]] | None:
+    """Batch embed via NV-Embed-v2 service (OpenAI-compatible)."""
+    try:
+        r = httpx.post(NV_EMBED_URL, json={"input": texts}, timeout=60)
+        r.raise_for_status()
+        data = r.json()["data"]
+        return [d["embedding"] for d in data]
+    except Exception as e:
+        log.warning(f"NV-Embed-v2 failed: {e}")
+        return None
+def _embed_ollama(texts: list[str]) -> list[list[float]]:
+    """Embed one-by-one via Ollama."""
+    results = []
+    for text in texts:
+        try:
+            r = httpx.post(OLLAMA_URL, json={"model": OLLAMA_MODEL, "input": text}, timeout=30)
+            r.raise_for_status()
+            data = r.json()
+            emb = data.get("embeddings", [data.get("embedding", [])])[0]
+            if isinstance(emb, list) and len(emb) > 0:
+                results.append(emb)
+            else:
+                results.append([0.0] * EMBED_DIM)
+        except Exception:
+            results.append([0.0] * EMBED_DIM)
+    return results
+def embed_texts(texts: list[str]) -> list[list[float]]:
+    """Get embeddings. Tries NV-Embed-v2 first, falls back to Ollama."""
+    if NV_EMBED_ENABLED:
+        result = _embed_nv(texts)
+        if result and len(result) == len(texts):
+            return result
+    return _embed_ollama(texts)
+def embed_query(query: str) -> list[float]:
+    """Embed a single query string."""
+    vecs = embed_texts([query])
+    return vecs[0] if vecs else [0.0] * EMBED_DIM

package/packages/memory-engine/pme_memory/health.py ADDED Viewed

@@ -0,0 +1,36 @@
+"""
+pme_memory.health — Health check for the L5 communications layer
+"""
+import httpx
+from .store import CommsStore, COLLECTIONS
+from .embed import EMBED_URL, EMBED_MODEL
+def health_check(store: CommsStore = None) -> dict:
+    """Check L5 health: Milvus connectivity, collection stats, embeddings."""
+    if store is None:
+        store = CommsStore()
+    try:
+        stats = store.collection_stats()
+        total = sum(c["count"] for c in stats.values())
+        # Check embeddings
+        embeddings_ok = False
+        try:
+            r = httpx.get("http://localhost:11434/api/tags", timeout=3)
+            models = [m["name"] for m in r.json().get("models", [])]
+            embeddings_ok = EMBED_MODEL in str(models)
+        except Exception:
+            pass
+        return {
+            "status": "ok",
+            "db_path": store.uri,
+            "collections": stats,
+            "total_chunks": total,
+            "embeddings": embeddings_ok,
+            "embed_model": EMBED_MODEL,
+        }
+    except Exception as e:
+        return {"status": "error", "error": str(e)}

package/packages/memory-engine/pme_memory/hygiene.py ADDED Viewed

@@ -0,0 +1,159 @@
+"""
+pme_memory.hygiene — DAG Hygiene (P2)
+Periodic maintenance for the artifact DAG:
+  1. Dedupe: collapse artifacts with identical content_hash
+  2. Conflict detection: flag contradicting payloads on same topic
+  3. Branch pruning: mark stale/orphaned branches
+  4. Compaction: rewrite store without pruned entries
+"""
+from __future__ import annotations
+import json
+from collections import defaultdict
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Dict, List, Set
+@dataclass
+class HygieneReport:
+    total_artifacts: int
+    duplicates_found: int
+    duplicates_removed: int
+    conflicts_detected: List[Dict[str, Any]]
+    orphans_found: int
+    orphans_pruned: int
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "total_artifacts": self.total_artifacts,
+            "duplicates_found": self.duplicates_found,
+            "duplicates_removed": self.duplicates_removed,
+            "conflicts_detected": self.conflicts_detected,
+            "orphans_found": self.orphans_found,
+            "orphans_pruned": self.orphans_pruned,
+        }
+def _load_all(store_path: Path) -> List[Dict[str, Any]]:
+    if not store_path.exists():
+        return []
+    out = []
+    for line in store_path.read_text(encoding="utf-8").splitlines():
+        try:
+            out.append(json.loads(line))
+        except json.JSONDecodeError:
+            continue
+    return out
+def _write_all(store_path: Path, artifacts: List[Dict[str, Any]]) -> None:
+    with store_path.open("w", encoding="utf-8") as f:
+        for art in artifacts:
+            f.write(json.dumps(art, sort_keys=True, separators=(",", ":")) + "\n")
+def deduplicate(artifacts: List[Dict[str, Any]]) -> tuple[List[Dict[str, Any]], int]:
+    """Remove artifacts with duplicate content_hash, keeping the earliest."""
+    seen: Dict[str, int] = {}
+    unique: List[Dict[str, Any]] = []
+    dupes = 0
+    for art in artifacts:
+        h = art.get("content_hash", "")
+        if h and h in seen:
+            dupes += 1
+            continue
+        if h:
+            seen[h] = len(unique)
+        unique.append(art)
+    return unique, dupes
+def detect_conflicts(artifacts: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """Find artifacts on the same topic with contradicting payload values."""
+    by_topic: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
+    for art in artifacts:
+        topic = art.get("topic")
+        if topic:
+            by_topic[topic].append(art)
+    conflicts = []
+    for topic, arts in by_topic.items():
+        if len(arts) < 2:
+            continue
+        # Compare payload keys across artifacts in same topic
+        for i, a in enumerate(arts):
+            a_payload = a.get("payload", {})
+            for b in arts[i + 1:]:
+                b_payload = b.get("payload", {})
+                shared_keys = set(a_payload.keys()) & set(b_payload.keys())
+                for k in shared_keys:
+                    if a_payload[k] != b_payload[k]:
+                        conflicts.append({
+                            "topic": topic,
+                            "key": k,
+                            "artifact_a": a["artifact_id"][:12],
+                            "value_a": str(a_payload[k])[:80],
+                            "artifact_b": b["artifact_id"][:12],
+                            "value_b": str(b_payload[k])[:80],
+                        })
+    return conflicts
+def find_orphans(artifacts: List[Dict[str, Any]]) -> Set[str]:
+    """Find artifacts that reference parents not in the store."""
+    known_ids = {a["artifact_id"] for a in artifacts}
+    orphan_ids: Set[str] = set()
+    for art in artifacts:
+        for pid in art.get("parents", []):
+            if pid not in known_ids:
+                orphan_ids.add(art["artifact_id"])
+    return orphan_ids
+def run_hygiene(
+    store_path: str | Path,
+    prune_orphans: bool = False,
+    dry_run: bool = True,
+) -> HygieneReport:
+    """Run full DAG hygiene pass.
+    Args:
+        store_path: path to artifacts.jsonl
+        prune_orphans: if True, remove orphaned artifacts
+        dry_run: if True, don't write changes back
+    """
+    store_path = Path(store_path)
+    artifacts = _load_all(store_path)
+    total = len(artifacts)
+    # 1. Deduplicate
+    deduped, dupe_count = deduplicate(artifacts)
+    # 2. Detect conflicts
+    conflicts = detect_conflicts(deduped)
+    # 3. Find orphans
+    orphan_ids = find_orphans(deduped)
+    orphan_count = len(orphan_ids)
+    pruned_count = 0
+    if prune_orphans and orphan_ids:
+        deduped = [a for a in deduped if a["artifact_id"] not in orphan_ids]
+        pruned_count = orphan_count
+    # 4. Write back if not dry_run
+    removed = dupe_count + pruned_count
+    if not dry_run and removed > 0:
+        _write_all(store_path, deduped)
+    return HygieneReport(
+        total_artifacts=total,
+        duplicates_found=dupe_count,
+        duplicates_removed=dupe_count if not dry_run else 0,
+        conflicts_detected=conflicts,
+        orphans_found=orphan_count,
+        orphans_pruned=pruned_count if not dry_run else 0,
+    )