@pentatonic-ai/ai-agent-sdk 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/README.md +170 -69
  2. package/bin/__tests__/callback-server.test.js +4 -1
  3. package/bin/cli.js +41 -164
  4. package/bin/commands/config.js +251 -0
  5. package/package.json +2 -1
  6. package/packages/doctor/__tests__/detect.test.js +2 -6
  7. package/packages/doctor/src/checks/local-memory.js +164 -196
  8. package/packages/doctor/src/detect.js +11 -3
  9. package/packages/memory/src/corpus/adapters.js +104 -0
  10. package/packages/memory/src/corpus/cli.js +72 -7
  11. package/packages/memory/src/corpus/index.js +1 -1
  12. package/packages/memory-engine/.env.example +13 -0
  13. package/packages/memory-engine/README.md +131 -0
  14. package/packages/memory-engine/bench/README.md +99 -0
  15. package/packages/memory-engine/bench/scorecards-engine/agent-coding__pentatonic-baseline__20260427-142523.json +1115 -0
  16. package/packages/memory-engine/bench/scorecards-engine/chat-recall__pentatonic-baseline__20260427-142648.json +819 -0
  17. package/packages/memory-engine/bench/scorecards-engine/circular-economy__pentatonic-baseline__20260427-142757.json +1278 -0
  18. package/packages/memory-engine/bench/scorecards-engine/customer-support__pentatonic-baseline__20260427-142900.json +1018 -0
  19. package/packages/memory-engine/bench/scorecards-engine/marketplace-ops__pentatonic-baseline__20260427-142957.json +1038 -0
  20. package/packages/memory-engine/bench/scorecards-engine/product-catalogue__pentatonic-baseline__20260427-143122.json +961 -0
  21. package/packages/memory-engine/bench/scorecards-engine-via-docker/agent-coding__pentatonic-memory__20260427-161812.json +1115 -0
  22. package/packages/memory-engine/bench/scorecards-engine-via-docker/chat-recall__pentatonic-memory__20260427-161701.json +819 -0
  23. package/packages/memory-engine/bench/scorecards-engine-via-docker/circular-economy__pentatonic-memory__20260427-161713.json +1278 -0
  24. package/packages/memory-engine/bench/scorecards-engine-via-docker/customer-support__pentatonic-memory__20260427-161723.json +1018 -0
  25. package/packages/memory-engine/bench/scorecards-engine-via-docker/marketplace-ops__pentatonic-memory__20260427-161732.json +1038 -0
  26. package/packages/memory-engine/bench/scorecards-engine-via-docker/product-catalogue__pentatonic-memory__20260427-161741.json +937 -0
  27. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/agent-coding__pentatonic-memory__20260427-184718.json +1115 -0
  28. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/chat-recall__pentatonic-memory__20260427-184614.json +819 -0
  29. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/circular-economy__pentatonic-memory__20260427-184809.json +1278 -0
  30. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/customer-support__pentatonic-memory__20260427-184854.json +1018 -0
  31. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/marketplace-ops__pentatonic-memory__20260427-184929.json +1038 -0
  32. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/product-catalogue__pentatonic-memory__20260427-185015.json +961 -0
  33. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/agent-coding__pentatonic-memory__20260427-175252.json +1115 -0
  34. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/chat-recall__pentatonic-memory__20260427-175312.json +819 -0
  35. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/circular-economy__pentatonic-memory__20260427-175335.json +1278 -0
  36. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/customer-support__pentatonic-memory__20260427-175355.json +1018 -0
  37. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/marketplace-ops__pentatonic-memory__20260427-175413.json +1038 -0
  38. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/product-catalogue__pentatonic-memory__20260427-175430.json +883 -0
  39. package/packages/memory-engine/bench/scorecards-engine-via-shim/agent-coding__pentatonic-memory__20260427-155409.json +1115 -0
  40. package/packages/memory-engine/bench/scorecards-engine-via-shim/chat-recall__pentatonic-memory__20260427-155421.json +819 -0
  41. package/packages/memory-engine/bench/scorecards-engine-via-shim/circular-economy__pentatonic-memory__20260427-155433.json +1278 -0
  42. package/packages/memory-engine/bench/scorecards-engine-via-shim/customer-support__pentatonic-memory__20260427-155443.json +1018 -0
  43. package/packages/memory-engine/bench/scorecards-engine-via-shim/marketplace-ops__pentatonic-memory__20260427-155453.json +1038 -0
  44. package/packages/memory-engine/bench/scorecards-engine-via-shim/product-catalogue__pentatonic-memory__20260427-155503.json +937 -0
  45. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory-latest__20260427-145103.json +1115 -0
  46. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory__20260427-144909.json +1115 -0
  47. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory-latest__20260427-145153.json +819 -0
  48. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory__20260427-145120.json +542 -0
  49. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory-latest__20260427-145313.json +1278 -0
  50. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory__20260427-145207.json +894 -0
  51. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory-latest__20260427-145412.json +1018 -0
  52. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory__20260427-145327.json +680 -0
  53. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory-latest__20260427-145517.json +1038 -0
  54. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory__20260427-145422.json +693 -0
  55. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory-latest__20260427-145616.json +961 -0
  56. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory__20260427-145528.json +727 -0
  57. package/packages/memory-engine/compat/Dockerfile +11 -0
  58. package/packages/memory-engine/compat/server.py +680 -0
  59. package/packages/memory-engine/docker-compose.yml +243 -0
  60. package/packages/memory-engine/docs/MIGRATION.md +178 -0
  61. package/packages/memory-engine/docs/RUNBOOK-AWS.md +375 -0
  62. package/packages/memory-engine/docs/why-v05-underperforms.md +138 -0
  63. package/packages/memory-engine/engine/README.md +52 -0
  64. package/packages/memory-engine/engine/l2-hybridrag-proxy.py +1543 -0
  65. package/packages/memory-engine/engine/l5-comms-layer.py +663 -0
  66. package/packages/memory-engine/engine/l6-document-store.py +1018 -0
  67. package/packages/memory-engine/engine/services/l2/Dockerfile +41 -0
  68. package/packages/memory-engine/engine/services/l2/init_databases.py +81 -0
  69. package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py +1543 -0
  70. package/packages/memory-engine/engine/services/l4/Dockerfile +15 -0
  71. package/packages/memory-engine/engine/services/l4/server.py +235 -0
  72. package/packages/memory-engine/engine/services/l5/Dockerfile +9 -0
  73. package/packages/memory-engine/engine/services/l5/l5-comms-layer.py +678 -0
  74. package/packages/memory-engine/engine/services/l6/Dockerfile +11 -0
  75. package/packages/memory-engine/engine/services/l6/l6-document-store.py +1016 -0
  76. package/packages/memory-engine/engine/services/nv-embed/Dockerfile +28 -0
  77. package/packages/memory-engine/engine/services/nv-embed/server.py +152 -0
  78. package/packages/memory-engine/pme_memory/__init__.py +0 -0
  79. package/packages/memory-engine/pme_memory/__main__.py +129 -0
  80. package/packages/memory-engine/pme_memory/artifacts.py +95 -0
  81. package/packages/memory-engine/pme_memory/embed.py +74 -0
  82. package/packages/memory-engine/pme_memory/health.py +36 -0
  83. package/packages/memory-engine/pme_memory/hygiene.py +159 -0
  84. package/packages/memory-engine/pme_memory/indexer.py +200 -0
  85. package/packages/memory-engine/pme_memory/needs.py +55 -0
  86. package/packages/memory-engine/pme_memory/provenance.py +80 -0
  87. package/packages/memory-engine/pme_memory/scoring.py +168 -0
  88. package/packages/memory-engine/pme_memory/search.py +52 -0
  89. package/packages/memory-engine/pme_memory/store.py +86 -0
  90. package/packages/memory-engine/pme_memory/synthesis.py +114 -0
  91. package/packages/memory-engine/pyproject.toml +65 -0
  92. package/packages/memory-engine/scripts/kg-extractor.py +557 -0
  93. package/packages/memory-engine/scripts/kg-preflexor-v2.py +738 -0
  94. package/packages/memory-engine/tests/test_api_contract.sh +57 -0
@@ -0,0 +1,28 @@
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system deps
6
+ RUN apt-get update && apt-get install -y --no-install-recommends \
7
+ build-essential \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ # Install Python deps
11
+ # transformers must stay <4.50 — newer versions break NV-Embed-v2 (removed all_tied_weights_keys)
12
+ RUN pip install --no-cache-dir \
13
+ "torch>=2.0" \
14
+ "transformers>=4.42,<4.50" \
15
+ "datasets>=2.14.0" \
16
+ "einops>=0.7.0" \
17
+ "sentence-transformers" \
18
+ "fastapi>=0.100.0" \
19
+ "uvicorn>=0.23.0"
20
+
21
+ COPY server.py .
22
+
23
+ # Model cache volume
24
+ ENV HF_HOME=/cache/huggingface
25
+
26
+ EXPOSE 8041
27
+
28
+ CMD ["python", "server.py", "--host", "0.0.0.0", "--port", "8041"]
@@ -0,0 +1,152 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ NV-Embed-v2 Embedding Service
4
+
5
+ Persistent FastAPI service that keeps the 7B model loaded in GPU memory.
6
+ Exposes OpenAI-compatible /v1/embeddings endpoint.
7
+
8
+ Uses SentenceTransformer for better transformers version compatibility.
9
+
10
+ Port: 8041 (default)
11
+ """
12
+
13
+ import argparse
14
+ import logging
15
+ import time
16
+ from typing import Any, List, Union
17
+
18
+ import torch
19
+ from fastapi import FastAPI, HTTPException
20
+ from pydantic import BaseModel
21
+ import uvicorn
22
+
23
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
24
+ log = logging.getLogger("nv-embed-service")
25
+
26
+ MODEL_ID = "nvidia/NV-Embed-v2"
27
+ INSTRUCTION = "Given a question, retrieve passages that answer the question"
28
+
29
+ app = FastAPI(title="NV-Embed-v2 Service", version="1.1.0")
30
+
31
+ # Global model reference
32
+ model = None
33
+ load_time = None
34
+
35
+
36
+ class EmbeddingRequest(BaseModel):
37
+ input: Union[str, List[str]]
38
+ model: str = "nv-embed-v2"
39
+ instruction: str = ""
40
+
41
+
42
+ class EmbeddingResponse(BaseModel):
43
+ object: str = "list"
44
+ data: List[dict]
45
+ model: str = "nv-embed-v2"
46
+ usage: dict
47
+
48
+
49
+ def load_model():
50
+ """Load NV-Embed-v2 via SentenceTransformer to GPU."""
51
+ global model, load_time
52
+ log.info("Loading NV-Embed-v2 via SentenceTransformer...")
53
+ t0 = time.time()
54
+
55
+ from sentence_transformers import SentenceTransformer
56
+ # Load to CPU first, then move to GPU to avoid OOM during loading
57
+ model = SentenceTransformer(MODEL_ID, trust_remote_code=True, device="cpu")
58
+ model = model.to("cuda")
59
+
60
+ load_time = time.time() - t0
61
+ log.info(f"Model loaded in {load_time:.1f}s")
62
+
63
+
64
+ @app.on_event("startup")
65
+ async def startup():
66
+ load_model()
67
+
68
+
69
+ @app.post("/v1/embeddings")
70
+ async def create_embeddings(request: EmbeddingRequest) -> dict:
71
+ """OpenAI-compatible embeddings endpoint."""
72
+ if model is None:
73
+ raise HTTPException(status_code=503, detail="Model not loaded yet")
74
+
75
+ texts = [request.input] if isinstance(request.input, str) else request.input
76
+ if not texts:
77
+ raise HTTPException(status_code=400, detail="Empty input")
78
+
79
+ # Prepend instruction if provided (NV-Embed-v2 uses instruction-based embedding)
80
+ instruction = request.instruction or INSTRUCTION
81
+ if instruction:
82
+ texts = [f"Instruct: {instruction}\nQuery: {t}" for t in texts]
83
+
84
+ t0 = time.time()
85
+
86
+ try:
87
+ # SentenceTransformer.encode() returns numpy array
88
+ with torch.no_grad():
89
+ embeddings = model.encode(texts, convert_to_numpy=True, normalize_embeddings=True)
90
+ torch.cuda.empty_cache()
91
+
92
+ result = []
93
+ for i, emb in enumerate(embeddings):
94
+ result.append({
95
+ "object": "embedding",
96
+ "embedding": emb.tolist(),
97
+ "index": i,
98
+ })
99
+
100
+ elapsed = time.time() - t0
101
+ log.info(f"Embedded {len(texts)} texts in {elapsed:.2f}s ({elapsed/len(texts):.2f}s/text)")
102
+
103
+ return {
104
+ "object": "list",
105
+ "data": result,
106
+ "model": "nv-embed-v2",
107
+ "usage": {
108
+ "prompt_tokens": sum(len(t.split()) for t in texts),
109
+ "total_tokens": sum(len(t.split()) for t in texts),
110
+ },
111
+ }
112
+ except Exception as e:
113
+ log.error(f"Embedding failed: {e}")
114
+ import traceback
115
+ traceback.print_exc()
116
+ raise HTTPException(status_code=500, detail=str(e))
117
+
118
+
119
+ @app.get("/v1/models")
120
+ async def list_models():
121
+ return {
122
+ "object": "list",
123
+ "data": [
124
+ {
125
+ "id": "nv-embed-v2",
126
+ "object": "model",
127
+ "owned_by": "nvidia",
128
+ "dimensions": 4096,
129
+ "loaded": model is not None,
130
+ "load_time_s": load_time,
131
+ }
132
+ ],
133
+ }
134
+
135
+
136
+ @app.get("/health")
137
+ async def health():
138
+ return {
139
+ "status": "ok" if model is not None else "loading",
140
+ "model": "nv-embed-v2",
141
+ "dimensions": 4096,
142
+ "gpu_loaded": model is not None,
143
+ }
144
+
145
+
146
+ if __name__ == "__main__":
147
+ parser = argparse.ArgumentParser()
148
+ parser.add_argument("--port", type=int, default=8041)
149
+ parser.add_argument("--host", type=str, default="127.0.0.1")
150
+ args = parser.parse_args()
151
+
152
+ uvicorn.run(app, host=args.host, port=args.port)
File without changes
@@ -0,0 +1,129 @@
1
+ """
2
+ pme_memory CLI — Communications layer management.
3
+
4
+ Usage:
5
+ python -m pme_memory health # Check status
6
+ python -m pme_memory stats # Collection stats
7
+ python -m pme_memory index # Index all sources
8
+ python -m pme_memory index chats # Index just chats
9
+ python -m pme_memory search "query" # Search all collections
10
+ python -m pme_memory search "q" -c chats # Search specific collection
11
+ python -m pme_memory serve # HTTP API (port 8034)
12
+ """
13
+
14
+ import argparse
15
+ import json
16
+ import os
17
+ import sys
18
+ import time
19
+ from pathlib import Path
20
+
21
+ from .store import CommsStore
22
+ from .indexer import index_all
23
+ from .search import search
24
+ from .health import health_check
25
+
26
+
27
+ def cmd_health(args):
28
+ store = CommsStore()
29
+ h = health_check(store)
30
+ print(json.dumps(h, indent=2))
31
+
32
+
33
+ def cmd_stats(args):
34
+ store = CommsStore()
35
+ h = health_check(store)
36
+ print(f"\nL5 Communications Layer — {h.get('status', 'unknown')}")
37
+ print(f"DB: {h.get('db_path', '?')}")
38
+ print(f"Embeddings: {'OK' if h.get('embeddings') else 'UNAVAILABLE'}")
39
+ print(f"\nCollections:")
40
+ for name, info in h.get("collections", {}).items():
41
+ if info["exists"]:
42
+ print(f" {name}: {info['count']:,} chunks")
43
+ else:
44
+ print(f" {name}: not created")
45
+ print(f"\nTotal: {h.get('total_chunks', 0):,} chunks")
46
+
47
+
48
+ def cmd_index(args):
49
+ workspace = Path(os.environ.get("PME_DIR", os.path.expanduser("~/pentatonic")))
50
+ store = CommsStore()
51
+ targets = args.targets if args.targets else None
52
+ t0 = time.time()
53
+ counts = index_all(store, workspace, targets=targets)
54
+ elapsed = time.time() - t0
55
+ total = sum(counts.values())
56
+ print(f"\nDone: {total:,} chunks indexed in {elapsed:.1f}s")
57
+
58
+
59
+ def cmd_search(args):
60
+ query = " ".join(args.query) if args.query else ""
61
+ if not query:
62
+ print("Usage: python -m pme_memory search 'your query'")
63
+ return
64
+ store = CommsStore()
65
+ results = search(query, store=store, collection=args.collection, limit=args.limit)
66
+ for i, r in enumerate(results, 1):
67
+ print(f"\n--- [{i}] {r['collection']} (score: {r['score']}) ---")
68
+ print(f"Source: {r['source']}")
69
+ if r["contact"]:
70
+ print(f"Contact: {r['contact']}")
71
+ if r["timestamp"]:
72
+ print(f"Time: {r['timestamp']}")
73
+ print(r["text"][:300])
74
+
75
+
76
+ def cmd_serve(args):
77
+ try:
78
+ from fastapi import FastAPI, Query
79
+ import uvicorn
80
+ except ImportError:
81
+ print("Install fastapi + uvicorn: pip install fastapi uvicorn")
82
+ sys.exit(1)
83
+
84
+ api = FastAPI(title="L5 Communications Layer")
85
+ store = CommsStore()
86
+
87
+ @api.get("/health")
88
+ def api_health():
89
+ return health_check(store)
90
+
91
+ @api.get("/search")
92
+ def api_search(q: str = Query(...), collection: str = None, limit: int = 10):
93
+ results = search(q, store=store, collection=collection, limit=limit)
94
+ return {"query": q, "results": results, "count": len(results)}
95
+
96
+ print(f"\n L5 Communications Layer — http://127.0.0.1:{args.port}")
97
+ uvicorn.run(api, host="127.0.0.1", port=args.port, log_level="warning")
98
+
99
+
100
+ def main():
101
+ parser = argparse.ArgumentParser(description="L5 Communications Layer")
102
+ sub = parser.add_subparsers(dest="command")
103
+
104
+ sub.add_parser("health")
105
+ sub.add_parser("stats")
106
+
107
+ idx = sub.add_parser("index")
108
+ idx.add_argument("targets", nargs="*", help="chats, emails, contacts, memory")
109
+
110
+ srch = sub.add_parser("search")
111
+ srch.add_argument("query", nargs="*")
112
+ srch.add_argument("-c", "--collection", default=None)
113
+ srch.add_argument("-l", "--limit", type=int, default=10)
114
+
115
+ srv = sub.add_parser("serve")
116
+ srv.add_argument("-p", "--port", type=int, default=8034)
117
+
118
+ args = parser.parse_args()
119
+ if not args.command:
120
+ parser.print_help()
121
+ return
122
+
123
+ cmds = {"health": cmd_health, "stats": cmd_stats, "index": cmd_index,
124
+ "search": cmd_search, "serve": cmd_serve}
125
+ cmds[args.command](args)
126
+
127
+
128
+ if __name__ == "__main__":
129
+ main()
@@ -0,0 +1,95 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ import json
5
+ import uuid
6
+ from dataclasses import asdict, dataclass, field
7
+ from datetime import datetime, timezone
8
+ from pathlib import Path
9
+ from typing import Any, Dict, List, Optional
10
+
11
+
12
+ def _utc_now() -> str:
13
+ return datetime.now(timezone.utc).isoformat()
14
+
15
+
16
+ def _stable_json(data: Any) -> str:
17
+ return json.dumps(data, sort_keys=True, separators=(",", ":"), ensure_ascii=False)
18
+
19
+
20
+ def _sha256(data: Any) -> str:
21
+ return hashlib.sha256(_stable_json(data).encode("utf-8")).hexdigest()
22
+
23
+
24
+ @dataclass
25
+ class ArtifactEnvelope:
26
+ artifact_id: str
27
+ artifact_type: str
28
+ producer: str
29
+ payload: Dict[str, Any]
30
+ needs: List[str] = field(default_factory=list)
31
+ parents: List[str] = field(default_factory=list)
32
+ source_tool: Optional[str] = None
33
+ topic: Optional[str] = None
34
+ created_at: str = field(default_factory=_utc_now)
35
+ content_hash: str = ""
36
+
37
+ @classmethod
38
+ def create(
39
+ cls,
40
+ artifact_type: str,
41
+ producer: str,
42
+ payload: Dict[str, Any],
43
+ *,
44
+ needs: Optional[List[str]] = None,
45
+ parents: Optional[List[str]] = None,
46
+ source_tool: Optional[str] = None,
47
+ topic: Optional[str] = None,
48
+ ) -> "ArtifactEnvelope":
49
+ env = cls(
50
+ artifact_id=str(uuid.uuid4()),
51
+ artifact_type=artifact_type,
52
+ producer=producer,
53
+ payload=payload,
54
+ needs=needs or [],
55
+ parents=parents or [],
56
+ source_tool=source_tool,
57
+ topic=topic,
58
+ )
59
+ env.content_hash = _sha256({
60
+ "artifact_type": env.artifact_type,
61
+ "producer": env.producer,
62
+ "payload": env.payload,
63
+ "needs": env.needs,
64
+ "parents": env.parents,
65
+ "source_tool": env.source_tool,
66
+ "topic": env.topic,
67
+ })
68
+ return env
69
+
70
+ def to_dict(self) -> Dict[str, Any]:
71
+ return asdict(self)
72
+
73
+
74
+ class ArtifactStore:
75
+ """Append-only local artifact store (JSONL)."""
76
+
77
+ def __init__(self, path: str | Path):
78
+ self.path = Path(path)
79
+ self.path.parent.mkdir(parents=True, exist_ok=True)
80
+ if not self.path.exists():
81
+ self.path.touch()
82
+
83
+ def append(self, artifact: ArtifactEnvelope) -> None:
84
+ with self.path.open("a", encoding="utf-8") as f:
85
+ f.write(_stable_json(artifact.to_dict()) + "\n")
86
+
87
+ def tail(self, n: int = 20) -> List[Dict[str, Any]]:
88
+ lines = self.path.read_text(encoding="utf-8").splitlines()
89
+ out: List[Dict[str, Any]] = []
90
+ for line in lines[-n:]:
91
+ try:
92
+ out.append(json.loads(line))
93
+ except json.JSONDecodeError:
94
+ continue
95
+ return out
@@ -0,0 +1,74 @@
1
+ """
2
+ pme_memory.embed — Embedding backend
3
+
4
+ Primary: NV-Embed-v2 service (4096-dim) on localhost:8041
5
+ Fallback: Ollama nomic-embed-text (768-dim) on localhost:11434
6
+ """
7
+
8
+ import os
9
+ import httpx
10
+ import logging
11
+
12
+ log = logging.getLogger("pme_memory.embed")
13
+
14
+ # NV-Embed-v2 (primary)
15
+ NV_EMBED_URL = os.environ.get("PME_NV_EMBED_URL", "http://localhost:8041/v1/embeddings")
16
+ NV_EMBED_ENABLED = os.environ.get("PME_NV_EMBED_ENABLED", "true").lower() == "true"
17
+
18
+ # Ollama (fallback)
19
+ OLLAMA_URL = os.environ.get("PME_EMBED_URL", "http://localhost:11434/api/embed")
20
+ OLLAMA_MODEL = os.environ.get("PME_EMBED_MODEL", "nomic-embed-text")
21
+
22
+ # Legacy aliases for backward compatibility
23
+ EMBED_URL = OLLAMA_URL
24
+ EMBED_MODEL = OLLAMA_MODEL
25
+
26
+ # Dimension — NV-Embed-v2 is 4096, nomic is 768
27
+ EMBED_DIM = int(os.environ.get("PME_EMBED_DIM", "4096"))
28
+ BATCH_SIZE = 100 # 100 is the sweet spot for NV-Embed-v2 on GB10 (0.02s/text vs 0.48s at batch=64)
29
+
30
+
31
+ def _embed_nv(texts: list[str]) -> list[list[float]] | None:
32
+ """Batch embed via NV-Embed-v2 service (OpenAI-compatible)."""
33
+ try:
34
+ r = httpx.post(NV_EMBED_URL, json={"input": texts}, timeout=60)
35
+ r.raise_for_status()
36
+ data = r.json()["data"]
37
+ return [d["embedding"] for d in data]
38
+ except Exception as e:
39
+ log.warning(f"NV-Embed-v2 failed: {e}")
40
+ return None
41
+
42
+
43
+ def _embed_ollama(texts: list[str]) -> list[list[float]]:
44
+ """Embed one-by-one via Ollama."""
45
+ results = []
46
+ for text in texts:
47
+ try:
48
+ r = httpx.post(OLLAMA_URL, json={"model": OLLAMA_MODEL, "input": text}, timeout=30)
49
+ r.raise_for_status()
50
+ data = r.json()
51
+ emb = data.get("embeddings", [data.get("embedding", [])])[0]
52
+ if isinstance(emb, list) and len(emb) > 0:
53
+ results.append(emb)
54
+ else:
55
+ results.append([0.0] * EMBED_DIM)
56
+ except Exception:
57
+ results.append([0.0] * EMBED_DIM)
58
+ return results
59
+
60
+
61
+ def embed_texts(texts: list[str]) -> list[list[float]]:
62
+ """Get embeddings. Tries NV-Embed-v2 first, falls back to Ollama."""
63
+ if NV_EMBED_ENABLED:
64
+ result = _embed_nv(texts)
65
+ if result and len(result) == len(texts):
66
+ return result
67
+
68
+ return _embed_ollama(texts)
69
+
70
+
71
+ def embed_query(query: str) -> list[float]:
72
+ """Embed a single query string."""
73
+ vecs = embed_texts([query])
74
+ return vecs[0] if vecs else [0.0] * EMBED_DIM
@@ -0,0 +1,36 @@
1
+ """
2
+ pme_memory.health — Health check for the L5 communications layer
3
+ """
4
+
5
+ import httpx
6
+ from .store import CommsStore, COLLECTIONS
7
+ from .embed import EMBED_URL, EMBED_MODEL
8
+
9
+
10
+ def health_check(store: CommsStore = None) -> dict:
11
+ """Check L5 health: Milvus connectivity, collection stats, embeddings."""
12
+ if store is None:
13
+ store = CommsStore()
14
+ try:
15
+ stats = store.collection_stats()
16
+ total = sum(c["count"] for c in stats.values())
17
+
18
+ # Check embeddings
19
+ embeddings_ok = False
20
+ try:
21
+ r = httpx.get("http://localhost:11434/api/tags", timeout=3)
22
+ models = [m["name"] for m in r.json().get("models", [])]
23
+ embeddings_ok = EMBED_MODEL in str(models)
24
+ except Exception:
25
+ pass
26
+
27
+ return {
28
+ "status": "ok",
29
+ "db_path": store.uri,
30
+ "collections": stats,
31
+ "total_chunks": total,
32
+ "embeddings": embeddings_ok,
33
+ "embed_model": EMBED_MODEL,
34
+ }
35
+ except Exception as e:
36
+ return {"status": "error", "error": str(e)}
@@ -0,0 +1,159 @@
1
+ """
2
+ pme_memory.hygiene — DAG Hygiene (P2)
3
+
4
+ Periodic maintenance for the artifact DAG:
5
+ 1. Dedupe: collapse artifacts with identical content_hash
6
+ 2. Conflict detection: flag contradicting payloads on same topic
7
+ 3. Branch pruning: mark stale/orphaned branches
8
+ 4. Compaction: rewrite store without pruned entries
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ from collections import defaultdict
15
+ from dataclasses import dataclass
16
+ from pathlib import Path
17
+ from typing import Any, Dict, List, Set
18
+
19
+
20
+ @dataclass
21
+ class HygieneReport:
22
+ total_artifacts: int
23
+ duplicates_found: int
24
+ duplicates_removed: int
25
+ conflicts_detected: List[Dict[str, Any]]
26
+ orphans_found: int
27
+ orphans_pruned: int
28
+
29
+ def to_dict(self) -> Dict[str, Any]:
30
+ return {
31
+ "total_artifacts": self.total_artifacts,
32
+ "duplicates_found": self.duplicates_found,
33
+ "duplicates_removed": self.duplicates_removed,
34
+ "conflicts_detected": self.conflicts_detected,
35
+ "orphans_found": self.orphans_found,
36
+ "orphans_pruned": self.orphans_pruned,
37
+ }
38
+
39
+
40
+ def _load_all(store_path: Path) -> List[Dict[str, Any]]:
41
+ if not store_path.exists():
42
+ return []
43
+ out = []
44
+ for line in store_path.read_text(encoding="utf-8").splitlines():
45
+ try:
46
+ out.append(json.loads(line))
47
+ except json.JSONDecodeError:
48
+ continue
49
+ return out
50
+
51
+
52
+ def _write_all(store_path: Path, artifacts: List[Dict[str, Any]]) -> None:
53
+ with store_path.open("w", encoding="utf-8") as f:
54
+ for art in artifacts:
55
+ f.write(json.dumps(art, sort_keys=True, separators=(",", ":")) + "\n")
56
+
57
+
58
+ def deduplicate(artifacts: List[Dict[str, Any]]) -> tuple[List[Dict[str, Any]], int]:
59
+ """Remove artifacts with duplicate content_hash, keeping the earliest."""
60
+ seen: Dict[str, int] = {}
61
+ unique: List[Dict[str, Any]] = []
62
+ dupes = 0
63
+ for art in artifacts:
64
+ h = art.get("content_hash", "")
65
+ if h and h in seen:
66
+ dupes += 1
67
+ continue
68
+ if h:
69
+ seen[h] = len(unique)
70
+ unique.append(art)
71
+ return unique, dupes
72
+
73
+
74
+ def detect_conflicts(artifacts: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
75
+ """Find artifacts on the same topic with contradicting payload values."""
76
+ by_topic: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
77
+ for art in artifacts:
78
+ topic = art.get("topic")
79
+ if topic:
80
+ by_topic[topic].append(art)
81
+
82
+ conflicts = []
83
+ for topic, arts in by_topic.items():
84
+ if len(arts) < 2:
85
+ continue
86
+ # Compare payload keys across artifacts in same topic
87
+ for i, a in enumerate(arts):
88
+ a_payload = a.get("payload", {})
89
+ for b in arts[i + 1:]:
90
+ b_payload = b.get("payload", {})
91
+ shared_keys = set(a_payload.keys()) & set(b_payload.keys())
92
+ for k in shared_keys:
93
+ if a_payload[k] != b_payload[k]:
94
+ conflicts.append({
95
+ "topic": topic,
96
+ "key": k,
97
+ "artifact_a": a["artifact_id"][:12],
98
+ "value_a": str(a_payload[k])[:80],
99
+ "artifact_b": b["artifact_id"][:12],
100
+ "value_b": str(b_payload[k])[:80],
101
+ })
102
+ return conflicts
103
+
104
+
105
+ def find_orphans(artifacts: List[Dict[str, Any]]) -> Set[str]:
106
+ """Find artifacts that reference parents not in the store."""
107
+ known_ids = {a["artifact_id"] for a in artifacts}
108
+ orphan_ids: Set[str] = set()
109
+ for art in artifacts:
110
+ for pid in art.get("parents", []):
111
+ if pid not in known_ids:
112
+ orphan_ids.add(art["artifact_id"])
113
+ return orphan_ids
114
+
115
+
116
+ def run_hygiene(
117
+ store_path: str | Path,
118
+ prune_orphans: bool = False,
119
+ dry_run: bool = True,
120
+ ) -> HygieneReport:
121
+ """Run full DAG hygiene pass.
122
+
123
+ Args:
124
+ store_path: path to artifacts.jsonl
125
+ prune_orphans: if True, remove orphaned artifacts
126
+ dry_run: if True, don't write changes back
127
+ """
128
+ store_path = Path(store_path)
129
+ artifacts = _load_all(store_path)
130
+ total = len(artifacts)
131
+
132
+ # 1. Deduplicate
133
+ deduped, dupe_count = deduplicate(artifacts)
134
+
135
+ # 2. Detect conflicts
136
+ conflicts = detect_conflicts(deduped)
137
+
138
+ # 3. Find orphans
139
+ orphan_ids = find_orphans(deduped)
140
+ orphan_count = len(orphan_ids)
141
+ pruned_count = 0
142
+
143
+ if prune_orphans and orphan_ids:
144
+ deduped = [a for a in deduped if a["artifact_id"] not in orphan_ids]
145
+ pruned_count = orphan_count
146
+
147
+ # 4. Write back if not dry_run
148
+ removed = dupe_count + pruned_count
149
+ if not dry_run and removed > 0:
150
+ _write_all(store_path, deduped)
151
+
152
+ return HygieneReport(
153
+ total_artifacts=total,
154
+ duplicates_found=dupe_count,
155
+ duplicates_removed=dupe_count if not dry_run else 0,
156
+ conflicts_detected=conflicts,
157
+ orphans_found=orphan_count,
158
+ orphans_pruned=pruned_count if not dry_run else 0,
159
+ )