@pentatonic-ai/ai-agent-sdk 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +170 -69
- package/bin/__tests__/callback-server.test.js +4 -1
- package/bin/cli.js +41 -164
- package/bin/commands/config.js +251 -0
- package/package.json +2 -1
- package/packages/doctor/__tests__/detect.test.js +2 -6
- package/packages/doctor/src/checks/local-memory.js +164 -196
- package/packages/doctor/src/detect.js +11 -3
- package/packages/memory/src/corpus/adapters.js +104 -0
- package/packages/memory/src/corpus/cli.js +72 -7
- package/packages/memory/src/corpus/index.js +1 -1
- package/packages/memory-engine/.env.example +13 -0
- package/packages/memory-engine/README.md +131 -0
- package/packages/memory-engine/bench/README.md +99 -0
- package/packages/memory-engine/bench/scorecards-engine/agent-coding__pentatonic-baseline__20260427-142523.json +1115 -0
- package/packages/memory-engine/bench/scorecards-engine/chat-recall__pentatonic-baseline__20260427-142648.json +819 -0
- package/packages/memory-engine/bench/scorecards-engine/circular-economy__pentatonic-baseline__20260427-142757.json +1278 -0
- package/packages/memory-engine/bench/scorecards-engine/customer-support__pentatonic-baseline__20260427-142900.json +1018 -0
- package/packages/memory-engine/bench/scorecards-engine/marketplace-ops__pentatonic-baseline__20260427-142957.json +1038 -0
- package/packages/memory-engine/bench/scorecards-engine/product-catalogue__pentatonic-baseline__20260427-143122.json +961 -0
- package/packages/memory-engine/bench/scorecards-engine-via-docker/agent-coding__pentatonic-memory__20260427-161812.json +1115 -0
- package/packages/memory-engine/bench/scorecards-engine-via-docker/chat-recall__pentatonic-memory__20260427-161701.json +819 -0
- package/packages/memory-engine/bench/scorecards-engine-via-docker/circular-economy__pentatonic-memory__20260427-161713.json +1278 -0
- package/packages/memory-engine/bench/scorecards-engine-via-docker/customer-support__pentatonic-memory__20260427-161723.json +1018 -0
- package/packages/memory-engine/bench/scorecards-engine-via-docker/marketplace-ops__pentatonic-memory__20260427-161732.json +1038 -0
- package/packages/memory-engine/bench/scorecards-engine-via-docker/product-catalogue__pentatonic-memory__20260427-161741.json +937 -0
- package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/agent-coding__pentatonic-memory__20260427-184718.json +1115 -0
- package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/chat-recall__pentatonic-memory__20260427-184614.json +819 -0
- package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/circular-economy__pentatonic-memory__20260427-184809.json +1278 -0
- package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/customer-support__pentatonic-memory__20260427-184854.json +1018 -0
- package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/marketplace-ops__pentatonic-memory__20260427-184929.json +1038 -0
- package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/product-catalogue__pentatonic-memory__20260427-185015.json +961 -0
- package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/agent-coding__pentatonic-memory__20260427-175252.json +1115 -0
- package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/chat-recall__pentatonic-memory__20260427-175312.json +819 -0
- package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/circular-economy__pentatonic-memory__20260427-175335.json +1278 -0
- package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/customer-support__pentatonic-memory__20260427-175355.json +1018 -0
- package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/marketplace-ops__pentatonic-memory__20260427-175413.json +1038 -0
- package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/product-catalogue__pentatonic-memory__20260427-175430.json +883 -0
- package/packages/memory-engine/bench/scorecards-engine-via-shim/agent-coding__pentatonic-memory__20260427-155409.json +1115 -0
- package/packages/memory-engine/bench/scorecards-engine-via-shim/chat-recall__pentatonic-memory__20260427-155421.json +819 -0
- package/packages/memory-engine/bench/scorecards-engine-via-shim/circular-economy__pentatonic-memory__20260427-155433.json +1278 -0
- package/packages/memory-engine/bench/scorecards-engine-via-shim/customer-support__pentatonic-memory__20260427-155443.json +1018 -0
- package/packages/memory-engine/bench/scorecards-engine-via-shim/marketplace-ops__pentatonic-memory__20260427-155453.json +1038 -0
- package/packages/memory-engine/bench/scorecards-engine-via-shim/product-catalogue__pentatonic-memory__20260427-155503.json +937 -0
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory-latest__20260427-145103.json +1115 -0
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory__20260427-144909.json +1115 -0
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory-latest__20260427-145153.json +819 -0
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory__20260427-145120.json +542 -0
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory-latest__20260427-145313.json +1278 -0
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory__20260427-145207.json +894 -0
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory-latest__20260427-145412.json +1018 -0
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory__20260427-145327.json +680 -0
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory-latest__20260427-145517.json +1038 -0
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory__20260427-145422.json +693 -0
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory-latest__20260427-145616.json +961 -0
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory__20260427-145528.json +727 -0
- package/packages/memory-engine/compat/Dockerfile +11 -0
- package/packages/memory-engine/compat/server.py +680 -0
- package/packages/memory-engine/docker-compose.yml +243 -0
- package/packages/memory-engine/docs/MIGRATION.md +178 -0
- package/packages/memory-engine/docs/RUNBOOK-AWS.md +375 -0
- package/packages/memory-engine/docs/why-v05-underperforms.md +138 -0
- package/packages/memory-engine/engine/README.md +52 -0
- package/packages/memory-engine/engine/l2-hybridrag-proxy.py +1543 -0
- package/packages/memory-engine/engine/l5-comms-layer.py +663 -0
- package/packages/memory-engine/engine/l6-document-store.py +1018 -0
- package/packages/memory-engine/engine/services/l2/Dockerfile +41 -0
- package/packages/memory-engine/engine/services/l2/init_databases.py +81 -0
- package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py +1543 -0
- package/packages/memory-engine/engine/services/l4/Dockerfile +15 -0
- package/packages/memory-engine/engine/services/l4/server.py +235 -0
- package/packages/memory-engine/engine/services/l5/Dockerfile +9 -0
- package/packages/memory-engine/engine/services/l5/l5-comms-layer.py +678 -0
- package/packages/memory-engine/engine/services/l6/Dockerfile +11 -0
- package/packages/memory-engine/engine/services/l6/l6-document-store.py +1016 -0
- package/packages/memory-engine/engine/services/nv-embed/Dockerfile +28 -0
- package/packages/memory-engine/engine/services/nv-embed/server.py +152 -0
- package/packages/memory-engine/pme_memory/__init__.py +0 -0
- package/packages/memory-engine/pme_memory/__main__.py +129 -0
- package/packages/memory-engine/pme_memory/artifacts.py +95 -0
- package/packages/memory-engine/pme_memory/embed.py +74 -0
- package/packages/memory-engine/pme_memory/health.py +36 -0
- package/packages/memory-engine/pme_memory/hygiene.py +159 -0
- package/packages/memory-engine/pme_memory/indexer.py +200 -0
- package/packages/memory-engine/pme_memory/needs.py +55 -0
- package/packages/memory-engine/pme_memory/provenance.py +80 -0
- package/packages/memory-engine/pme_memory/scoring.py +168 -0
- package/packages/memory-engine/pme_memory/search.py +52 -0
- package/packages/memory-engine/pme_memory/store.py +86 -0
- package/packages/memory-engine/pme_memory/synthesis.py +114 -0
- package/packages/memory-engine/pyproject.toml +65 -0
- package/packages/memory-engine/scripts/kg-extractor.py +557 -0
- package/packages/memory-engine/scripts/kg-preflexor-v2.py +738 -0
- package/packages/memory-engine/tests/test_api_contract.sh +57 -0
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
FROM python:3.12-slim
|
|
2
|
+
|
|
3
|
+
WORKDIR /app
|
|
4
|
+
|
|
5
|
+
RUN pip install --no-cache-dir fastapi 'uvicorn[standard]' httpx pydantic
|
|
6
|
+
|
|
7
|
+
COPY server.py /app/server.py
|
|
8
|
+
|
|
9
|
+
RUN mkdir -p /data
|
|
10
|
+
ENV L4_DB_PATH=/data/vec.db
|
|
11
|
+
ENV PORT=8042
|
|
12
|
+
|
|
13
|
+
EXPOSE 8042
|
|
14
|
+
|
|
15
|
+
CMD ["python", "server.py", "--port", "8042"]
|
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
"""
|
|
2
|
+
L4 sqlite-vec sidecar.
|
|
3
|
+
|
|
4
|
+
Vector index sidecar for the Pentatonic Memory Engine stack.
|
|
5
|
+
Exposes /health, /search, /index-batch, /refresh over HTTP.
|
|
6
|
+
|
|
7
|
+
Endpoints:
|
|
8
|
+
GET /health
|
|
9
|
+
POST /search body: {"query":"...", "limit":10}
|
|
10
|
+
POST /index-batch body: {"records":[{"id","text"}, ...]}
|
|
11
|
+
POST /refresh no-op (sqlite-vec writes are immediate)
|
|
12
|
+
|
|
13
|
+
Env:
|
|
14
|
+
L4_DB_PATH default /data/vec.db
|
|
15
|
+
L4_NV_EMBED_URL default http://nv-embed:8041/v1/embeddings
|
|
16
|
+
PORT default 8042
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import argparse
|
|
22
|
+
import hashlib
|
|
23
|
+
import os
|
|
24
|
+
import sqlite3
|
|
25
|
+
import struct
|
|
26
|
+
import time
|
|
27
|
+
from pathlib import Path
|
|
28
|
+
from typing import Any
|
|
29
|
+
|
|
30
|
+
import httpx
|
|
31
|
+
from fastapi import FastAPI, HTTPException
|
|
32
|
+
from pydantic import BaseModel
|
|
33
|
+
|
|
34
|
+
# ----------------------------------------------------------------------
|
|
35
|
+
# Config
|
|
36
|
+
# ----------------------------------------------------------------------
|
|
37
|
+
|
|
38
|
+
DB_PATH = os.environ.get("L4_DB_PATH", "/data/vec.db")
|
|
39
|
+
NV_EMBED_URL = os.environ.get("L4_NV_EMBED_URL", "http://nv-embed:8041/v1/embeddings")
|
|
40
|
+
# Embedding model name sent in /v1/embeddings request body. Defaults to
|
|
41
|
+
# the production NV-Embed-v2 name; override via env when pointing at a
|
|
42
|
+
# different OpenAI-compat endpoint (e.g. Ollama with nomic-embed-text).
|
|
43
|
+
EMBED_MODEL_NAME = os.environ.get("L4_EMBED_MODEL", "nv-embed-v2")
|
|
44
|
+
# Optional Authorization: Bearer <key> for the embedding endpoint.
|
|
45
|
+
# Set when calling a hosted gateway (e.g. pentatonic-ai-gateway). Empty = no auth.
|
|
46
|
+
EMBED_API_KEY = os.environ.get("L4_EMBED_API_KEY", "")
|
|
47
|
+
EMBED_DIM = int(os.environ.get("L4_EMBED_DIM", "4096"))
|
|
48
|
+
|
|
49
|
+
def _embed_headers() -> dict:
|
|
50
|
+
return {"Authorization": f"Bearer {EMBED_API_KEY}"} if EMBED_API_KEY else {}
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# ----------------------------------------------------------------------
|
|
54
|
+
# DB helpers
|
|
55
|
+
# ----------------------------------------------------------------------
|
|
56
|
+
|
|
57
|
+
def _vec_to_blob(vec: list[float]) -> bytes:
|
|
58
|
+
"""Pack a list of floats as little-endian f32 bytes for sqlite-vec."""
|
|
59
|
+
return struct.pack(f"<{len(vec)}f", *vec)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _blob_to_vec(blob: bytes) -> list[float]:
|
|
63
|
+
n = len(blob) // 4
|
|
64
|
+
return list(struct.unpack(f"<{n}f", blob))
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _cosine(a: list[float], b: list[float]) -> float:
|
|
68
|
+
import math
|
|
69
|
+
dot = sum(x * y for x, y in zip(a, b))
|
|
70
|
+
na = math.sqrt(sum(x * x for x in a))
|
|
71
|
+
nb = math.sqrt(sum(y * y for y in b))
|
|
72
|
+
if na == 0 or nb == 0:
|
|
73
|
+
return 0.0
|
|
74
|
+
return dot / (na * nb)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _get_db() -> sqlite3.Connection:
|
|
78
|
+
"""Open DB and ensure schema. We use plain BLOB columns rather than
|
|
79
|
+
the sqlite-vec virtual table because sqlite-vec is an optional ext
|
|
80
|
+
that may not be loadable in every container — plain BLOB lets us
|
|
81
|
+
fall back to a Python-side cosine pass without losing correctness.
|
|
82
|
+
"""
|
|
83
|
+
Path(DB_PATH).parent.mkdir(parents=True, exist_ok=True)
|
|
84
|
+
conn = sqlite3.connect(DB_PATH, timeout=10)
|
|
85
|
+
conn.execute("PRAGMA journal_mode=WAL")
|
|
86
|
+
conn.execute("""
|
|
87
|
+
CREATE TABLE IF NOT EXISTS chunks (
|
|
88
|
+
id TEXT PRIMARY KEY,
|
|
89
|
+
text TEXT,
|
|
90
|
+
embedding BLOB,
|
|
91
|
+
indexed_at REAL
|
|
92
|
+
)
|
|
93
|
+
""")
|
|
94
|
+
return conn
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
# ----------------------------------------------------------------------
|
|
98
|
+
# Embedding client
|
|
99
|
+
# ----------------------------------------------------------------------
|
|
100
|
+
|
|
101
|
+
_http: httpx.AsyncClient | None = None
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _client() -> httpx.AsyncClient:
|
|
105
|
+
global _http
|
|
106
|
+
if _http is None:
|
|
107
|
+
_http = httpx.AsyncClient(timeout=120.0)
|
|
108
|
+
return _http
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
async def _embed_batch(texts: list[str]) -> list[list[float]]:
|
|
112
|
+
if not texts:
|
|
113
|
+
return []
|
|
114
|
+
resp = await _client().post(
|
|
115
|
+
NV_EMBED_URL,
|
|
116
|
+
headers=_embed_headers(),
|
|
117
|
+
json={"input": texts, "model": EMBED_MODEL_NAME},
|
|
118
|
+
timeout=120.0,
|
|
119
|
+
)
|
|
120
|
+
resp.raise_for_status()
|
|
121
|
+
return [d["embedding"] for d in resp.json()["data"]]
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
# ----------------------------------------------------------------------
|
|
125
|
+
# FastAPI
|
|
126
|
+
# ----------------------------------------------------------------------
|
|
127
|
+
|
|
128
|
+
class SearchRequest(BaseModel):
|
|
129
|
+
query: str
|
|
130
|
+
limit: int = 10
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class IndexBatchRequest(BaseModel):
|
|
134
|
+
records: list[dict[str, Any]]
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
app = FastAPI(title="L4 sqlite-vec sidecar (OSS)")
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
@app.get("/health")
|
|
141
|
+
def health():
|
|
142
|
+
try:
|
|
143
|
+
conn = _get_db()
|
|
144
|
+
n = conn.execute("SELECT COUNT(*) FROM chunks").fetchone()[0]
|
|
145
|
+
conn.close()
|
|
146
|
+
return {"status": "ok", "loaded": True, "n_vectors": n,
|
|
147
|
+
"dim": EMBED_DIM, "db_path": DB_PATH, "backend": "sqlite-vec-fallback"}
|
|
148
|
+
except Exception as exc:
|
|
149
|
+
return {"status": "degraded", "error": str(exc)}
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
@app.post("/search")
|
|
153
|
+
async def search(req: SearchRequest):
|
|
154
|
+
if not req.query:
|
|
155
|
+
return []
|
|
156
|
+
try:
|
|
157
|
+
embs = await _embed_batch([req.query])
|
|
158
|
+
if not embs or embs[0] is None:
|
|
159
|
+
raise HTTPException(status_code=502, detail="embed failed")
|
|
160
|
+
q_vec = embs[0]
|
|
161
|
+
except Exception as exc:
|
|
162
|
+
raise HTTPException(status_code=502, detail=f"embed: {exc}")
|
|
163
|
+
|
|
164
|
+
conn = _get_db()
|
|
165
|
+
rows = conn.execute("SELECT id, text, embedding FROM chunks").fetchall()
|
|
166
|
+
conn.close()
|
|
167
|
+
|
|
168
|
+
# Cosine similarity in Python — fine for OSS / small corpora. For
|
|
169
|
+
# large corpora: consider a dedicated vector DB.
|
|
170
|
+
scored: list[tuple[float, str, str]] = []
|
|
171
|
+
for rid, text, blob in rows:
|
|
172
|
+
if not blob:
|
|
173
|
+
continue
|
|
174
|
+
v = _blob_to_vec(blob)
|
|
175
|
+
if len(v) != len(q_vec):
|
|
176
|
+
continue
|
|
177
|
+
s = _cosine(q_vec, v)
|
|
178
|
+
scored.append((s, rid, text))
|
|
179
|
+
scored.sort(reverse=True)
|
|
180
|
+
out = [
|
|
181
|
+
{"path": rid, "text": text, "score": float(s),
|
|
182
|
+
"source": "L4-sqlite-vec", "layer": "L4"}
|
|
183
|
+
for s, rid, text in scored[: req.limit]
|
|
184
|
+
]
|
|
185
|
+
return out
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
@app.post("/index-batch")
|
|
189
|
+
async def index_batch(req: IndexBatchRequest):
|
|
190
|
+
if not req.records:
|
|
191
|
+
return {"status": "ok", "inserted": 0}
|
|
192
|
+
texts = [(r.get("text") or r.get("content") or "")[:8192] for r in req.records]
|
|
193
|
+
t0 = time.perf_counter()
|
|
194
|
+
embs = await _embed_batch(texts)
|
|
195
|
+
embed_ms = (time.perf_counter() - t0) * 1000.0
|
|
196
|
+
|
|
197
|
+
conn = _get_db()
|
|
198
|
+
t1 = time.perf_counter()
|
|
199
|
+
rows = []
|
|
200
|
+
for r, emb, txt in zip(req.records, embs, texts):
|
|
201
|
+
if not emb:
|
|
202
|
+
continue
|
|
203
|
+
rid = r.get("id") or hashlib.sha1(txt.encode("utf-8")).hexdigest()[:32]
|
|
204
|
+
rows.append((rid, txt, _vec_to_blob(emb), time.time()))
|
|
205
|
+
if rows:
|
|
206
|
+
conn.executemany(
|
|
207
|
+
"INSERT OR REPLACE INTO chunks(id, text, embedding, indexed_at) "
|
|
208
|
+
"VALUES (?, ?, ?, ?)", rows,
|
|
209
|
+
)
|
|
210
|
+
conn.commit()
|
|
211
|
+
insert_ms = (time.perf_counter() - t1) * 1000.0
|
|
212
|
+
conn.close()
|
|
213
|
+
return {"status": "ok", "inserted": len(rows),
|
|
214
|
+
"embed_ms": round(embed_ms, 1), "insert_ms": round(insert_ms, 1)}
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
@app.post("/refresh")
|
|
218
|
+
def refresh():
|
|
219
|
+
"""No-op for sqlite-vec — writes are immediate. Kept for API parity."""
|
|
220
|
+
return {"status": "ok", "noop": True}
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
# ----------------------------------------------------------------------
|
|
224
|
+
# Entrypoint
|
|
225
|
+
# ----------------------------------------------------------------------
|
|
226
|
+
|
|
227
|
+
if __name__ == "__main__":
|
|
228
|
+
parser = argparse.ArgumentParser()
|
|
229
|
+
parser.add_argument("--port", type=int, default=int(os.environ.get("PORT", "8042")))
|
|
230
|
+
parser.add_argument("--data-dir", default=None)
|
|
231
|
+
args = parser.parse_args()
|
|
232
|
+
if args.data_dir:
|
|
233
|
+
os.environ["L4_DB_PATH"] = str(Path(args.data_dir) / "vec.db")
|
|
234
|
+
import uvicorn
|
|
235
|
+
uvicorn.run("server:app", host="0.0.0.0", port=args.port, log_level="info")
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
FROM python:3.12-slim
|
|
2
|
+
WORKDIR /app
|
|
3
|
+
RUN pip install --no-cache-dir fastapi "uvicorn[standard]" httpx "pymilvus[milvus_lite]" "setuptools<70" pydantic
|
|
4
|
+
COPY l5-comms-layer.py /app/server.py
|
|
5
|
+
RUN mkdir -p /data
|
|
6
|
+
ENV L5_DB_PATH=/data/comms.db
|
|
7
|
+
EXPOSE 8034
|
|
8
|
+
ENV HOST=0.0.0.0
|
|
9
|
+
CMD ["python", "server.py", "serve", "--port", "8034"]
|