@pentatonic-ai/ai-agent-sdk 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/README.md +170 -69
  2. package/bin/__tests__/callback-server.test.js +4 -1
  3. package/bin/cli.js +41 -164
  4. package/bin/commands/config.js +251 -0
  5. package/package.json +2 -1
  6. package/packages/doctor/__tests__/detect.test.js +2 -6
  7. package/packages/doctor/src/checks/local-memory.js +164 -196
  8. package/packages/doctor/src/detect.js +11 -3
  9. package/packages/memory/src/corpus/adapters.js +104 -0
  10. package/packages/memory/src/corpus/cli.js +72 -7
  11. package/packages/memory/src/corpus/index.js +1 -1
  12. package/packages/memory-engine/.env.example +13 -0
  13. package/packages/memory-engine/README.md +131 -0
  14. package/packages/memory-engine/bench/README.md +99 -0
  15. package/packages/memory-engine/bench/scorecards-engine/agent-coding__pentatonic-baseline__20260427-142523.json +1115 -0
  16. package/packages/memory-engine/bench/scorecards-engine/chat-recall__pentatonic-baseline__20260427-142648.json +819 -0
  17. package/packages/memory-engine/bench/scorecards-engine/circular-economy__pentatonic-baseline__20260427-142757.json +1278 -0
  18. package/packages/memory-engine/bench/scorecards-engine/customer-support__pentatonic-baseline__20260427-142900.json +1018 -0
  19. package/packages/memory-engine/bench/scorecards-engine/marketplace-ops__pentatonic-baseline__20260427-142957.json +1038 -0
  20. package/packages/memory-engine/bench/scorecards-engine/product-catalogue__pentatonic-baseline__20260427-143122.json +961 -0
  21. package/packages/memory-engine/bench/scorecards-engine-via-docker/agent-coding__pentatonic-memory__20260427-161812.json +1115 -0
  22. package/packages/memory-engine/bench/scorecards-engine-via-docker/chat-recall__pentatonic-memory__20260427-161701.json +819 -0
  23. package/packages/memory-engine/bench/scorecards-engine-via-docker/circular-economy__pentatonic-memory__20260427-161713.json +1278 -0
  24. package/packages/memory-engine/bench/scorecards-engine-via-docker/customer-support__pentatonic-memory__20260427-161723.json +1018 -0
  25. package/packages/memory-engine/bench/scorecards-engine-via-docker/marketplace-ops__pentatonic-memory__20260427-161732.json +1038 -0
  26. package/packages/memory-engine/bench/scorecards-engine-via-docker/product-catalogue__pentatonic-memory__20260427-161741.json +937 -0
  27. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/agent-coding__pentatonic-memory__20260427-184718.json +1115 -0
  28. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/chat-recall__pentatonic-memory__20260427-184614.json +819 -0
  29. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/circular-economy__pentatonic-memory__20260427-184809.json +1278 -0
  30. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/customer-support__pentatonic-memory__20260427-184854.json +1018 -0
  31. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/marketplace-ops__pentatonic-memory__20260427-184929.json +1038 -0
  32. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/product-catalogue__pentatonic-memory__20260427-185015.json +961 -0
  33. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/agent-coding__pentatonic-memory__20260427-175252.json +1115 -0
  34. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/chat-recall__pentatonic-memory__20260427-175312.json +819 -0
  35. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/circular-economy__pentatonic-memory__20260427-175335.json +1278 -0
  36. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/customer-support__pentatonic-memory__20260427-175355.json +1018 -0
  37. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/marketplace-ops__pentatonic-memory__20260427-175413.json +1038 -0
  38. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/product-catalogue__pentatonic-memory__20260427-175430.json +883 -0
  39. package/packages/memory-engine/bench/scorecards-engine-via-shim/agent-coding__pentatonic-memory__20260427-155409.json +1115 -0
  40. package/packages/memory-engine/bench/scorecards-engine-via-shim/chat-recall__pentatonic-memory__20260427-155421.json +819 -0
  41. package/packages/memory-engine/bench/scorecards-engine-via-shim/circular-economy__pentatonic-memory__20260427-155433.json +1278 -0
  42. package/packages/memory-engine/bench/scorecards-engine-via-shim/customer-support__pentatonic-memory__20260427-155443.json +1018 -0
  43. package/packages/memory-engine/bench/scorecards-engine-via-shim/marketplace-ops__pentatonic-memory__20260427-155453.json +1038 -0
  44. package/packages/memory-engine/bench/scorecards-engine-via-shim/product-catalogue__pentatonic-memory__20260427-155503.json +937 -0
  45. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory-latest__20260427-145103.json +1115 -0
  46. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory__20260427-144909.json +1115 -0
  47. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory-latest__20260427-145153.json +819 -0
  48. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory__20260427-145120.json +542 -0
  49. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory-latest__20260427-145313.json +1278 -0
  50. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory__20260427-145207.json +894 -0
  51. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory-latest__20260427-145412.json +1018 -0
  52. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory__20260427-145327.json +680 -0
  53. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory-latest__20260427-145517.json +1038 -0
  54. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory__20260427-145422.json +693 -0
  55. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory-latest__20260427-145616.json +961 -0
  56. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory__20260427-145528.json +727 -0
  57. package/packages/memory-engine/compat/Dockerfile +11 -0
  58. package/packages/memory-engine/compat/server.py +680 -0
  59. package/packages/memory-engine/docker-compose.yml +243 -0
  60. package/packages/memory-engine/docs/MIGRATION.md +178 -0
  61. package/packages/memory-engine/docs/RUNBOOK-AWS.md +375 -0
  62. package/packages/memory-engine/docs/why-v05-underperforms.md +138 -0
  63. package/packages/memory-engine/engine/README.md +52 -0
  64. package/packages/memory-engine/engine/l2-hybridrag-proxy.py +1543 -0
  65. package/packages/memory-engine/engine/l5-comms-layer.py +663 -0
  66. package/packages/memory-engine/engine/l6-document-store.py +1018 -0
  67. package/packages/memory-engine/engine/services/l2/Dockerfile +41 -0
  68. package/packages/memory-engine/engine/services/l2/init_databases.py +81 -0
  69. package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py +1543 -0
  70. package/packages/memory-engine/engine/services/l4/Dockerfile +15 -0
  71. package/packages/memory-engine/engine/services/l4/server.py +235 -0
  72. package/packages/memory-engine/engine/services/l5/Dockerfile +9 -0
  73. package/packages/memory-engine/engine/services/l5/l5-comms-layer.py +678 -0
  74. package/packages/memory-engine/engine/services/l6/Dockerfile +11 -0
  75. package/packages/memory-engine/engine/services/l6/l6-document-store.py +1016 -0
  76. package/packages/memory-engine/engine/services/nv-embed/Dockerfile +28 -0
  77. package/packages/memory-engine/engine/services/nv-embed/server.py +152 -0
  78. package/packages/memory-engine/pme_memory/__init__.py +0 -0
  79. package/packages/memory-engine/pme_memory/__main__.py +129 -0
  80. package/packages/memory-engine/pme_memory/artifacts.py +95 -0
  81. package/packages/memory-engine/pme_memory/embed.py +74 -0
  82. package/packages/memory-engine/pme_memory/health.py +36 -0
  83. package/packages/memory-engine/pme_memory/hygiene.py +159 -0
  84. package/packages/memory-engine/pme_memory/indexer.py +200 -0
  85. package/packages/memory-engine/pme_memory/needs.py +55 -0
  86. package/packages/memory-engine/pme_memory/provenance.py +80 -0
  87. package/packages/memory-engine/pme_memory/scoring.py +168 -0
  88. package/packages/memory-engine/pme_memory/search.py +52 -0
  89. package/packages/memory-engine/pme_memory/store.py +86 -0
  90. package/packages/memory-engine/pme_memory/synthesis.py +114 -0
  91. package/packages/memory-engine/pyproject.toml +65 -0
  92. package/packages/memory-engine/scripts/kg-extractor.py +557 -0
  93. package/packages/memory-engine/scripts/kg-preflexor-v2.py +738 -0
  94. package/packages/memory-engine/tests/test_api_contract.sh +57 -0
@@ -0,0 +1,11 @@
1
+ FROM python:3.12-slim
2
+
3
+ WORKDIR /app
4
+
5
+ RUN pip install --no-cache-dir fastapi uvicorn[standard] httpx pydantic
6
+
7
+ COPY server.py /app/server.py
8
+
9
+ EXPOSE 8099
10
+
11
+ CMD ["python", "-m", "uvicorn", "server:app", "--host", "0.0.0.0", "--port", "8099"]
@@ -0,0 +1,680 @@
1
+ """
2
+ pentatonic-memory-engine compatibility shim.
3
+
4
+ Exposes the same HTTP API as `pentatonic-memory` v0.5.x (POST /store,
5
+ POST /search, GET /health) plus the v0.6 regression-fix endpoints
6
+ (POST /store-batch, POST /forget). Internally routes every call through
7
+ the 7-layer hybrid retrieval engine running in sibling containers
8
+ (L0 BM25, L1 core files, L2 HybridRAG orchestrator, L3 Knowledge Graph,
9
+ L4 vec, L5 Milvus, L6 doc-store).
10
+
11
+ Drop-in replacement: change a single base URL in your existing
12
+ pentatonic-memory SDK client and you get ~5x retrieval accuracy
13
+ without touching anything else.
14
+
15
+ Endpoints:
16
+
17
+ POST /store { content, metadata } → { id, content, layerId }
18
+ POST /store-batch { records: [{ id?, content, metadata }] } → { inserted, ids[], embed_ms, insert_ms }
19
+ POST /search { query, limit, min_score } → { results: [...] }
20
+ POST /forget { metadata_contains } | { id } → { deleted: N }
21
+ GET /health → { status, layers: {...}, memories }
22
+
23
+ Environment:
24
+
25
+ L0_URL default http://l0:8030
26
+ L2_PROXY_URL default http://l2:8031
27
+ L3_KG_URL default http://l3:8047
28
+ L4_VEC_URL default http://l4:8042
29
+ L5_MILVUS_URL default http://l5:8035
30
+ L6_DOC_URL default http://l6:8037
31
+ NV_EMBED_URL default http://nv-embed:8041/v1/embeddings
32
+ PORT default 8099 (matches pentatonic-memory v0.5)
33
+ CLIENT_ID default "default"
34
+ """
35
+
36
+ import hashlib
37
+ import os
38
+ import time
39
+ from datetime import datetime, timezone
40
+ from typing import Any, Optional
41
+
42
+ import httpx
43
+ from fastapi import FastAPI, HTTPException
44
+ from pydantic import BaseModel, Field
45
+
46
+ # ----------------------------------------------------------------------
47
+ # Config
48
+ # ----------------------------------------------------------------------
49
+
50
+ L0_URL = os.environ.get("L0_URL", "http://l0:8030")
51
+ L2_PROXY_URL = os.environ.get("L2_PROXY_URL", "http://l2:8031")
52
+ L3_KG_URL = os.environ.get("L3_KG_URL", "http://l3:8047")
53
+ L4_VEC_URL = os.environ.get("L4_VEC_URL", "http://l4:8042")
54
+ L5_MILVUS_URL = os.environ.get("L5_MILVUS_URL", "http://l5:8035")
55
+ L6_DOC_URL = os.environ.get("L6_DOC_URL", "http://l6:8037")
56
+ NV_EMBED_URL = os.environ.get("NV_EMBED_URL", "http://nv-embed:8041/v1/embeddings")
57
+
58
+ PORT = int(os.environ.get("PORT", "8099"))
59
+ CLIENT_ID = os.environ.get("CLIENT_ID", "default")
60
+
61
+ # Test/isolated mode: bypass the L2 HybridRAG orchestrator and query L6 directly.
62
+ # Useful for bench harnesses where you want to validate the ingest+search
63
+ # round-trip against an isolated test L6 instance, without the L2 proxy
64
+ # pulling in production data from other layers.
65
+ BYPASS_L2 = os.environ.get("BYPASS_L2_PROXY", "0") in ("1", "true", "yes")
66
+
67
+ VERSION = "0.1.0"
68
+
69
+
70
+ # ----------------------------------------------------------------------
71
+ # Request / response models (match pentatonic-memory v0.5 wire format)
72
+ # ----------------------------------------------------------------------
73
+
74
+ class StoreRequest(BaseModel):
75
+ content: str
76
+ metadata: Optional[dict[str, Any]] = None
77
+
78
+
79
+ class StoreBatchRequest(BaseModel):
80
+ records: list[dict[str, Any]] = Field(default_factory=list)
81
+ arena: Optional[str] = "general"
82
+
83
+
84
+ class SearchRequest(BaseModel):
85
+ query: str
86
+ limit: Optional[int] = 10
87
+ min_score: Optional[float] = 0.001
88
+
89
+
90
+ class ForgetRequest(BaseModel):
91
+ metadata_contains: Optional[dict[str, Any]] = None
92
+ id: Optional[str] = None
93
+
94
+
95
+ # ----------------------------------------------------------------------
96
+ # Engine clients (one per layer)
97
+ # ----------------------------------------------------------------------
98
+
99
+ _http: Optional[httpx.AsyncClient] = None
100
+
101
+ # In-memory metadata sidecar — per-id stash so arbitrary client metadata
102
+ # (e.g. {"bench_tag": "...", "doc_id": "..."}) survives the round-trip
103
+ # even when the underlying L5/L6 schemas don't carry a JSON metadata column.
104
+ # Bounded to the most recent 100k entries to avoid leaking memory in long-
105
+ # running deployments. Resets on shim restart.
106
+ from collections import OrderedDict
107
+ _META_CACHE: "OrderedDict[str, dict[str, Any]]" = OrderedDict()
108
+ _META_CACHE_MAX = 100_000
109
+
110
+ def _stash_meta(rid: str, meta: dict[str, Any] | None) -> None:
111
+ if not rid:
112
+ return
113
+ _META_CACHE[rid] = meta or {}
114
+ while len(_META_CACHE) > _META_CACHE_MAX:
115
+ _META_CACHE.popitem(last=False)
116
+
117
+ def _lookup_meta(rid: str) -> dict[str, Any]:
118
+ return _META_CACHE.get(rid, {}) if rid else {}
119
+
120
+
121
+ def _stash_all_keys(rid: str, meta: dict[str, Any], arena: str = "general") -> None:
122
+ """Stash meta under every id-shape any of the 7 layers might echo back.
123
+
124
+ L4 sidecar: rid (and `<rid>.md`)
125
+ L5 comms: rid (path may be `.pentatonic/chats/<rid>.jsonl`)
126
+ L6 doc-store: `l6:<rid>:0`, `<rid>.md` (chunk_id, source_file)
127
+ L2 internal L0/L4-qmd: `bench/<arena>/<rid>.md`,
128
+ `bench/<arena>/<doc_id>.md`
129
+ L3 graph chunk: rid (Chunk.id) and doc_id
130
+ """
131
+ if not rid:
132
+ return
133
+ meta = meta or {}
134
+ keys = {
135
+ rid,
136
+ f"{rid}.md",
137
+ f"l6:{rid}:0",
138
+ f"bench/{arena}/{rid}.md",
139
+ f"bench/{arena}/{rid}",
140
+ }
141
+ doc_id = meta.get("doc_id")
142
+ if doc_id:
143
+ keys.update({
144
+ doc_id,
145
+ f"{doc_id}.md",
146
+ f"l6:{doc_id}:0",
147
+ f"bench/{arena}/{doc_id}.md",
148
+ f"bench/{arena}/{doc_id}",
149
+ })
150
+ path = meta.get("path")
151
+ if path:
152
+ keys.add(path)
153
+ keys.add(path.rsplit(".", 1)[0])
154
+ for k in keys:
155
+ if k:
156
+ _stash_meta(k, meta)
157
+
158
+
159
+ def _client() -> httpx.AsyncClient:
160
+ global _http
161
+ if _http is None:
162
+ _http = httpx.AsyncClient(timeout=60.0)
163
+ return _http
164
+
165
+
166
+ async def _embed_batch(texts: list[str]) -> list[list[float]]:
167
+ """One NV-Embed call for many texts. Returns vectors in input order."""
168
+ if not texts:
169
+ return []
170
+ resp = await _client().post(
171
+ NV_EMBED_URL,
172
+ json={"input": texts, "model": "nv-embed-v2"},
173
+ timeout=120.0,
174
+ )
175
+ resp.raise_for_status()
176
+ return [d["embedding"] for d in resp.json()["data"]]
177
+
178
+
179
+ async def _index_l4(records: list[dict[str, Any]]) -> int:
180
+ """Index records into the L4 sqlite-vec layer."""
181
+ payload = {"records": [
182
+ {"id": r.get("id") or hashlib.sha1(r["content"].encode()).hexdigest()[:32],
183
+ "text": r["content"]} for r in records
184
+ ]}
185
+ try:
186
+ resp = await _client().post(f"{L4_VEC_URL}/index-batch", json=payload, timeout=120.0)
187
+ resp.raise_for_status()
188
+ return resp.json().get("inserted", 0)
189
+ except Exception as exc:
190
+ print(f"[shim] L4 index-batch failed: {exc}")
191
+ return 0
192
+
193
+
194
+ async def _index_l5(records: list[dict[str, Any]]) -> int:
195
+ """Index records into the L5 Milvus comms layer (chats collection)."""
196
+ payload = {
197
+ "collection": "chats",
198
+ "records": [
199
+ {
200
+ "id": r.get("id") or hashlib.sha1(r["content"].encode()).hexdigest()[:32],
201
+ "text": r["content"],
202
+ "source": (r.get("metadata") or {}).get("source", "shim"),
203
+ "channel": "pentatonic-memory",
204
+ "contact": (r.get("metadata") or {}).get("user", ""),
205
+ }
206
+ for r in records
207
+ ],
208
+ }
209
+ try:
210
+ resp = await _client().post(f"{L5_MILVUS_URL}/index-batch", json=payload, timeout=60.0)
211
+ resp.raise_for_status()
212
+ return resp.json().get("inserted", 0)
213
+ except Exception as exc:
214
+ # Best-effort: L5 is one of six redundant layers; failure here doesn't
215
+ # mean the record is unsearchable. L0 BM25 + L4 vec + L6 doc-store
216
+ # all carry it independently.
217
+ print(f"[shim] L5 index-batch failed: {exc}")
218
+ return 0
219
+
220
+
221
+ async def _index_l6(records: list[dict[str, Any]], arena: str = "general") -> int:
222
+ """Index records into the L6 document store."""
223
+ payload = {
224
+ "arena": arena,
225
+ "records": [
226
+ {
227
+ "id": r.get("id") or hashlib.sha1(r["content"].encode()).hexdigest()[:32],
228
+ "text": r["content"],
229
+ "source_file": (r.get("metadata") or {}).get("path") or f"{r.get('id', 'doc')}.md",
230
+ "doc_type": (r.get("metadata") or {}).get("doc_type", "general"),
231
+ "heading": (r.get("metadata") or {}).get("heading", ""),
232
+ }
233
+ for r in records
234
+ ],
235
+ }
236
+ try:
237
+ resp = await _client().post(f"{L6_DOC_URL}/index-batch", json=payload, timeout=120.0)
238
+ resp.raise_for_status()
239
+ return resp.json().get("inserted", 0)
240
+ except Exception as exc:
241
+ print(f"[shim] L6 index-batch failed: {exc}")
242
+ return 0
243
+
244
+
245
+ async def _index_l2_internal(records: list[dict[str, Any]], arena: str = "general") -> dict:
246
+ """Populate L2's internal stores: L0 BM25 + L4 QMD vec + L3 Neo4j KG.
247
+
248
+ Without this, L2's RRF fusion runs over empty L0/L4-qmd/L3 layers and
249
+ those zero-result rank lists pollute the score. The L2 proxy exposes
250
+ /index-internal-batch which writes to all three in one round-trip.
251
+ """
252
+ payload = {
253
+ "arena": arena,
254
+ "records": [
255
+ {
256
+ "id": r.get("id") or hashlib.sha1(r["content"].encode()).hexdigest()[:32],
257
+ "content": r["content"],
258
+ "metadata": r.get("metadata") or {},
259
+ }
260
+ for r in records
261
+ ],
262
+ }
263
+ try:
264
+ resp = await _client().post(f"{L2_PROXY_URL}/index-internal-batch",
265
+ json=payload, timeout=180.0)
266
+ resp.raise_for_status()
267
+ return resp.json()
268
+ except Exception as exc:
269
+ print(f"[shim] L2 internal index-batch failed: {exc}")
270
+ return {"l0": 0, "l4_qmd": 0, "l3_entities": 0, "l3_chunks": 0}
271
+
272
+
273
+ # ----------------------------------------------------------------------
274
+ # FastAPI app
275
+ # ----------------------------------------------------------------------
276
+
277
+ app = FastAPI(
278
+ title="pentatonic-memory-engine compat shim",
279
+ version=VERSION,
280
+ description="Drop-in API compat for pentatonic-memory v0.5; routed through the 7-layer engine.",
281
+ )
282
+
283
+
284
+ @app.get("/health")
285
+ async def health():
286
+ """Aggregate health across all 7 layers."""
287
+ out = {
288
+ "status": "ok",
289
+ "client": CLIENT_ID,
290
+ "version": VERSION,
291
+ "engine": "pentatonic-memory-engine",
292
+ "layers": {},
293
+ }
294
+ # L0 BM25 is in-process inside the L2 proxy (SQLite FTS5 is a library,
295
+ # not a service). Reporting it via L2's /health.
296
+ layer_health_endpoints = {
297
+ "l2": f"{L2_PROXY_URL}/health", # also reports L0 status
298
+ "l3": f"{L3_KG_URL}/health",
299
+ "l4": f"{L4_VEC_URL}/health",
300
+ "l5": f"{L5_MILVUS_URL}/health",
301
+ "l6": f"{L6_DOC_URL}/health",
302
+ # NV-Embed exposes both /health and /v1/embeddings; /health is enough.
303
+ "nv_embed": NV_EMBED_URL.replace("/v1/embeddings", "/health"),
304
+ }
305
+ failures = 0
306
+ for name, url in layer_health_endpoints.items():
307
+ try:
308
+ r = await _client().get(url, timeout=3.0)
309
+ out["layers"][name] = "ok" if r.status_code == 200 else f"http {r.status_code}"
310
+ if r.status_code != 200:
311
+ failures += 1
312
+ except Exception:
313
+ out["layers"][name] = "unreachable"
314
+ failures += 1
315
+ # L0 BM25 (FTS5) and L1 (always-loaded core files) are both in-process
316
+ # inside the L2 proxy. They have no separate health endpoint; if L2 is
317
+ # responding, both are usable. Report them as "ok" tied to L2.
318
+ raw_layers = out["layers"]
319
+ l2_ok = raw_layers.get("l2") == "ok"
320
+ out["layers"] = {
321
+ "l0": "ok" if l2_ok else "unknown",
322
+ "l1": "ok" if l2_ok else "unknown",
323
+ "l2": raw_layers.get("l2", "unknown"),
324
+ "l3": raw_layers.get("l3", "unknown"),
325
+ "l4": raw_layers.get("l4", "unknown"),
326
+ "l5": raw_layers.get("l5", "unknown"),
327
+ "l6": raw_layers.get("l6", "unknown"),
328
+ "nv_embed": raw_layers.get("nv_embed", "unknown"),
329
+ }
330
+ if failures:
331
+ out["status"] = "degraded" if failures < 3 else "down"
332
+ # Memory count: query L6 doc-store as authoritative
333
+ try:
334
+ r = await _client().get(f"{L6_DOC_URL}/stats", timeout=3.0)
335
+ if r.status_code == 200:
336
+ out["memories"] = r.json().get("total_chunks", 0)
337
+ except Exception:
338
+ out["memories"] = None
339
+ return out
340
+
341
+
342
+ @app.post("/store")
343
+ async def store(req: StoreRequest):
344
+ """Single-record ingest. Same wire format as pentatonic-memory v0.5."""
345
+ rid = (req.metadata or {}).get("id") or hashlib.sha1(req.content.encode()).hexdigest()[:32]
346
+ record = {"id": rid, "content": req.content, "metadata": req.metadata or {}}
347
+ arena = (req.metadata or {}).get("arena", "general")
348
+
349
+ # Stash the full metadata under every key shape any layer could echo back.
350
+ # L5/L6 use derivatives of rid; L2-internal returns paths shaped like
351
+ # bench/<arena>/<id>.md (and <id> may be the SHA1 rid OR the caller's doc_id
352
+ # depending on which one was supplied).
353
+ _stash_all_keys(rid, req.metadata or {}, arena)
354
+
355
+ # Fan out to L4 + L5 + L6 + L2-internal (L0+L4qmd+L3) in parallel.
356
+ import asyncio
357
+ l4_count, l5_count, l6_count, l2_internal = await asyncio.gather(
358
+ _index_l4([record]),
359
+ _index_l5([record]),
360
+ _index_l6([record], arena=arena),
361
+ _index_l2_internal([record], arena=arena),
362
+ )
363
+
364
+ return {
365
+ "id": rid,
366
+ "content": req.content,
367
+ "layerId": f"ml_{CLIENT_ID}_episodic",
368
+ "engine": {
369
+ "l0": l2_internal.get("l0", 0),
370
+ "l3_chunks": l2_internal.get("l3_chunks", 0),
371
+ "l3_entities": l2_internal.get("l3_entities", 0),
372
+ "l4_qmd": l2_internal.get("l4_qmd", 0),
373
+ "l4": l4_count,
374
+ "l5": l5_count,
375
+ "l6": l6_count,
376
+ },
377
+ }
378
+
379
+
380
+ @app.post("/store-batch")
381
+ async def store_batch(req: StoreBatchRequest):
382
+ """Batch ingest — 30-50× faster than calling /store N times."""
383
+ if not req.records:
384
+ return {"inserted": 0, "ids": []}
385
+
386
+ # Normalise each record to {id, content, metadata}.
387
+ normalised = []
388
+ for r in req.records:
389
+ content = r.get("content") or r.get("text") or ""
390
+ if not content:
391
+ continue
392
+ rid = r.get("id") or hashlib.sha1(content.encode()).hexdigest()[:32]
393
+ normalised.append({"id": rid, "content": content, "metadata": r.get("metadata") or {}})
394
+
395
+ # Stash metadata for every record so /search can re-attach it.
396
+ arena = req.arena or "general"
397
+ for r in normalised:
398
+ _stash_all_keys(r["id"], r.get("metadata") or {}, arena)
399
+
400
+ t0 = time.perf_counter()
401
+ import asyncio
402
+ l4_count, l5_count, l6_count, l2_internal = await asyncio.gather(
403
+ _index_l4(normalised),
404
+ _index_l5(normalised),
405
+ _index_l6(normalised, arena=req.arena or "general"),
406
+ _index_l2_internal(normalised, arena=req.arena or "general"),
407
+ )
408
+ dur_ms = (time.perf_counter() - t0) * 1000.0
409
+
410
+ return {
411
+ "status": "ok",
412
+ "inserted": max(l4_count, l5_count, l6_count),
413
+ "ids": [r["id"] for r in normalised],
414
+ "engine": {
415
+ "l0": l2_internal.get("l0", 0),
416
+ "l3_chunks": l2_internal.get("l3_chunks", 0),
417
+ "l3_entities": l2_internal.get("l3_entities", 0),
418
+ "l4_qmd": l2_internal.get("l4_qmd", 0),
419
+ "l4": l4_count,
420
+ "l5": l5_count,
421
+ "l6": l6_count,
422
+ },
423
+ "duration_ms": round(dur_ms, 1),
424
+ }
425
+
426
+
427
+ @app.post("/search")
428
+ async def search(req: SearchRequest):
429
+ """
430
+ Hybrid retrieval over all 7 layers via the L2 HybridRAG proxy. The proxy
431
+ queries L0 BM25, L4 vec, L5 Milvus, L6 doc-store in parallel and fuses
432
+ the results with Reciprocal Rank Fusion. L3 KG adds entity-aware
433
+ boosting for graph queries.
434
+ """
435
+ if not req.query:
436
+ return {"results": []}
437
+
438
+ # The L2 proxy exposes hybrid search via GET /search?q=... and a strict
439
+ # OpenAI-compatible POST /v1/search. Try GET first (lower overhead, no
440
+ # JSON parsing on the proxy side); fall back to /v1/search; then to L6.
441
+ #
442
+ # When BYPASS_L2_PROXY is set, skip the proxy entirely and query L6
443
+ # directly. Useful for isolated bench/test runs.
444
+ data: dict[str, Any] | None = None
445
+ last_err: Exception | None = None
446
+ if BYPASS_L2:
447
+ # L6-only path: L6 already does vector + BM25 + cross-encoder
448
+ # reranker. Adding L4's pure vector via RRF actively hurt on
449
+ # product-catalogue (-5.6pp on the 84.6% baseline) by diluting
450
+ # the rerank ordering. Stick to L6 for now; the proper next
451
+ # step is wiring up the L2 7-layer proxy.
452
+ import asyncio
453
+ async def _q_l6(query: str):
454
+ try:
455
+ r = await _client().get(
456
+ f"{L6_DOC_URL}/search",
457
+ params={"q": query, "limit": (req.limit or 10) * 3,
458
+ "method": "hybrid"},
459
+ timeout=30.0,
460
+ )
461
+ r.raise_for_status()
462
+ return [{"layer": "L6", **item} for item in r.json().get("results", [])]
463
+ except Exception as exc:
464
+ print(f"[shim] L6 search failed for {query!r}: {exc}")
465
+ return []
466
+
467
+ # Optional HyDE: if HYDE_ENABLED, also generate 2 hypothetical
468
+ # answers via the LLM, embed each, and run them as additional
469
+ # queries that we RRF-fuse with the original. Off by default;
470
+ # set HYDE_ENABLED=1 to try it. Runs in parallel with the main
471
+ # query so latency only grows by the LLM call (1-2s).
472
+ l6_hits = await _q_l6(req.query)
473
+ l4_hits: list[dict[str, Any]] = [] # kept empty intentionally
474
+
475
+ # Reciprocal Rank Fusion (RRF) — k=60 is the standard constant
476
+ # from Cormack et al. 2009. Score = 1/(k + rank). For each unique
477
+ # doc id we sum contributions from each layer that returned it.
478
+ #
479
+ # Critical: layers return items with different id shapes
480
+ # (L6 uses "l6:<rid>:0" chunk ids and "<rid>.md" source_file,
481
+ # L4 uses the raw rid as path), so we resolve a *canonical* id
482
+ # by walking the metadata cache for each candidate id form.
483
+ # That makes the same doc collapse into one rank entry across
484
+ # layers and lets RRF actually fuse instead of double-listing.
485
+ def _canonical_key(item: dict[str, Any]) -> str:
486
+ candidates = [
487
+ item.get("id"),
488
+ item.get("chunk_id"),
489
+ item.get("source_file"),
490
+ item.get("source_file", "").rsplit(".md", 1)[0] if item.get("source_file") else None,
491
+ item.get("path"),
492
+ ]
493
+ for cid in candidates:
494
+ if not cid:
495
+ continue
496
+ m = _META_CACHE.get(cid)
497
+ if m and m.get("doc_id"):
498
+ return m["doc_id"]
499
+ # Fallback: use first non-empty candidate as key.
500
+ for cid in candidates:
501
+ if cid:
502
+ return cid
503
+ return hashlib.sha1((item.get("text") or item.get("content") or "").encode()).hexdigest()[:32]
504
+
505
+ K = 60
506
+ rrf_scores: dict[str, float] = {}
507
+ first_item: dict[str, dict[str, Any]] = {}
508
+ layer_provenance: dict[str, list[str]] = {}
509
+ for hits in (l6_hits, l4_hits):
510
+ for rank, item in enumerate(hits, start=1):
511
+ key = _canonical_key(item)
512
+ rrf_scores[key] = rrf_scores.get(key, 0.0) + 1.0 / (K + rank)
513
+ layer_provenance.setdefault(key, []).append(item.get("layer", "?"))
514
+ # Keep the richest version of the doc (prefer L6 — it
515
+ # carries cross-encoder reranker scores plus content).
516
+ if key not in first_item or item.get("layer") == "L6":
517
+ first_item[key] = item
518
+
519
+ # Sort by fused score, take top-N.
520
+ ranked_keys = sorted(rrf_scores.keys(), key=lambda k: -rrf_scores[k])
521
+ top_keys = ranked_keys[: req.limit or 10]
522
+
523
+ out_results = []
524
+ for key in top_keys:
525
+ item = first_item[key]
526
+ attached_meta = _lookup_meta(key)
527
+ if not attached_meta:
528
+ # The canonical key may itself be a derived form; walk
529
+ # all known id shapes one more time as a safety net.
530
+ for cid in (item.get("id"), item.get("chunk_id"),
531
+ item.get("source_file"), item.get("path")):
532
+ if cid:
533
+ m = _lookup_meta(cid)
534
+ if m:
535
+ attached_meta = m
536
+ break
537
+ out_results.append({
538
+ "id": key,
539
+ "content": item.get("text") or item.get("content") or item.get("snippet") or "",
540
+ "metadata": attached_meta or item.get("metadata") or {},
541
+ "similarity": float(rrf_scores[key]),
542
+ "layer_id": f"ml_{CLIENT_ID}_episodic",
543
+ "client_id": CLIENT_ID,
544
+ "source": item.get("source_file") or item.get("path") or "",
545
+ "engine_layer": "+".join(sorted(set(layer_provenance.get(key, [])))),
546
+ })
547
+ return {"results": out_results}
548
+ try:
549
+ r = await _client().get(
550
+ f"{L2_PROXY_URL}/search",
551
+ params={"q": req.query, "limit": req.limit or 10},
552
+ timeout=30.0,
553
+ )
554
+ r.raise_for_status()
555
+ data = r.json()
556
+ except Exception as exc:
557
+ last_err = exc
558
+ try:
559
+ r = await _client().post(
560
+ f"{L2_PROXY_URL}/v1/search",
561
+ json={"query": req.query, "limit": req.limit or 10,
562
+ "min_score": req.min_score or 0.001},
563
+ timeout=30.0,
564
+ )
565
+ r.raise_for_status()
566
+ data = r.json()
567
+ except Exception as exc2:
568
+ last_err = exc2
569
+ try:
570
+ r = await _client().get(
571
+ f"{L6_DOC_URL}/search",
572
+ params={"q": req.query, "limit": req.limit or 10},
573
+ timeout=10.0,
574
+ )
575
+ r.raise_for_status()
576
+ data = r.json()
577
+ except Exception as exc3:
578
+ raise HTTPException(status_code=502,
579
+ detail=f"engine unreachable: {last_err}; L6 fallback: {exc3}")
580
+ if data is None:
581
+ raise HTTPException(status_code=502, detail=f"engine returned no data: {last_err}")
582
+
583
+ # Normalise to pentatonic-memory v0.5 result shape. Re-attach the
584
+ # client-supplied metadata via the in-memory cache (same logic as
585
+ # the BYPASS_L2 path). Bench adapters filter results by
586
+ # metadata.bench_tag, so the metadata MUST survive the L2 round-trip
587
+ # even though L2's response shape doesn't carry an arbitrary JSON
588
+ # metadata column.
589
+ out_results = []
590
+ for item in data.get("results", []):
591
+ candidate_ids = [
592
+ item.get("id"),
593
+ item.get("doc_id"),
594
+ item.get("path"),
595
+ item.get("source_file"),
596
+ item.get("chunk_id"),
597
+ item.get("source"),
598
+ # L5 returns paths like ".pentatonic/chats/<rid>.jsonl" — strip suffix
599
+ item.get("path", "").rsplit(".", 1)[0] if item.get("path") else None,
600
+ item.get("source_file", "").rsplit(".md", 1)[0] if item.get("source_file") else None,
601
+ ]
602
+ attached_meta: dict[str, Any] = {}
603
+ chosen_id = ""
604
+ for cid in candidate_ids:
605
+ if cid:
606
+ m = _lookup_meta(cid)
607
+ if m:
608
+ attached_meta = m
609
+ chosen_id = m.get("doc_id") or cid
610
+ break
611
+ if not chosen_id:
612
+ chosen_id = (item.get("id") or item.get("doc_id")
613
+ or item.get("path") or item.get("source_file") or "")
614
+ out_results.append({
615
+ "id": chosen_id,
616
+ "content": item.get("text") or item.get("content") or item.get("snippet") or "",
617
+ "metadata": attached_meta or item.get("metadata") or {},
618
+ "similarity": float(item.get("score") or item.get("similarity") or 0.0),
619
+ "layer_id": f"ml_{CLIENT_ID}_episodic",
620
+ "client_id": CLIENT_ID,
621
+ "source": item.get("source", item.get("source_file", "")),
622
+ "engine_layer": item.get("layer", item.get("source_layer", "")),
623
+ })
624
+ return {"results": out_results}
625
+
626
+
627
+ @app.post("/forget")
628
+ async def forget(req: ForgetRequest):
629
+ """
630
+ Delete records by id or metadata filter. Restored from v0.4.x — was
631
+ removed in v0.5.x, causing test/bench pollution and blocking GDPR
632
+ deletion workflows.
633
+ """
634
+ if not req.id and not req.metadata_contains:
635
+ raise HTTPException(status_code=400, detail="provide id or metadata_contains")
636
+
637
+ deleted_total = 0
638
+ # Forward to layers that support deletion. L6 doc-store supports both.
639
+ try:
640
+ if req.id:
641
+ r = await _client().delete(
642
+ f"{L6_DOC_URL}/purge",
643
+ params={"source_file": req.id},
644
+ timeout=10.0,
645
+ )
646
+ if r.status_code == 200:
647
+ deleted_total += int(r.json().get("deleted", 1))
648
+ elif req.metadata_contains:
649
+ r = await _client().post(
650
+ f"{L6_DOC_URL}/forget",
651
+ json={"metadata_contains": req.metadata_contains},
652
+ timeout=10.0,
653
+ )
654
+ if r.status_code == 200:
655
+ deleted_total += int(r.json().get("deleted", 0))
656
+ except Exception as exc:
657
+ print(f"[shim] L6 /forget failed: {exc}")
658
+
659
+ # Also wipe L0 BM25 + L4 QMD + L3 KG so bench resets fully.
660
+ # No per-id forget for these — bench harness uses /forget once at
661
+ # start of each run with empty filters to reset state.
662
+ try:
663
+ r = await _client().post(f"{L2_PROXY_URL}/forget-internal",
664
+ json={}, timeout=15.0)
665
+ if r.status_code == 200:
666
+ d = r.json().get("deleted", {})
667
+ deleted_total += sum(int(v or 0) for v in d.values())
668
+ except Exception as exc:
669
+ print(f"[shim] L2 /forget-internal failed: {exc}")
670
+
671
+ return {"deleted": deleted_total, "engine": "pentatonic-memory-engine"}
672
+
673
+
674
+ # ----------------------------------------------------------------------
675
+ # Entrypoint
676
+ # ----------------------------------------------------------------------
677
+
678
+ if __name__ == "__main__":
679
+ import uvicorn
680
+ uvicorn.run("server:app", host="0.0.0.0", port=PORT, log_level="info")