@pentatonic-ai/ai-agent-sdk 0.8.3 → 0.8.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/packages/memory-engine/compat/server.py +84 -10
- package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py +187 -116
- package/packages/memory-engine/engine/services/l4/server.py +79 -1
- package/packages/memory-engine/engine/services/l5/l5-comms-layer.py +79 -0
- package/packages/memory-engine/engine/services/l6/Dockerfile +19 -2
- package/packages/memory-engine/engine/services/l6/l6-document-store.py +68 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pentatonic-ai/ai-agent-sdk",
|
|
3
|
-
"version": "0.8.
|
|
3
|
+
"version": "0.8.5",
|
|
4
4
|
"description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.cjs",
|
|
@@ -453,21 +453,95 @@ async def health():
|
|
|
453
453
|
if failures:
|
|
454
454
|
out["status"] = "degraded" if failures < 3 else "down"
|
|
455
455
|
|
|
456
|
-
#
|
|
457
|
-
#
|
|
458
|
-
#
|
|
459
|
-
#
|
|
456
|
+
# Per-layer chunk counts. Replaces the previous single `memories` int
|
|
457
|
+
# which only reflected L6's vector chunk count — misleading because
|
|
458
|
+
# L0/L4/L5 hold different (overlapping) projections of the corpus.
|
|
459
|
+
# Each layer is independently probed; transient failure on one layer
|
|
460
|
+
# leaves its slot null rather than zeroing the whole field.
|
|
461
|
+
memories: dict[str, int | None] = {
|
|
462
|
+
"l0_bm25_chunks": None,
|
|
463
|
+
"l4_vectors": None,
|
|
464
|
+
"l5_chats_chunks": None,
|
|
465
|
+
"l6_vector_chunks": None,
|
|
466
|
+
"l6_fts_chunks": None,
|
|
467
|
+
}
|
|
468
|
+
# L0 lives inside L2; L2 exposes /index-internal-stats with the counts.
|
|
469
|
+
try:
|
|
470
|
+
r = await _client().get(f"{L2_PROXY_URL}/index-internal-stats", timeout=3.0)
|
|
471
|
+
if r.status_code == 200:
|
|
472
|
+
stats = r.json()
|
|
473
|
+
memories["l0_bm25_chunks"] = int(stats.get("l0_chunks") or 0)
|
|
474
|
+
except Exception:
|
|
475
|
+
pass
|
|
476
|
+
# L4 reports n_vectors on its own /health.
|
|
477
|
+
try:
|
|
478
|
+
r = await _client().get(f"{L4_VEC_URL}/health", timeout=3.0)
|
|
479
|
+
if r.status_code == 200:
|
|
480
|
+
memories["l4_vectors"] = int(r.json().get("n_vectors") or 0)
|
|
481
|
+
except Exception:
|
|
482
|
+
pass
|
|
483
|
+
# L5 reports per-collection counts on /health. We surface chats —
|
|
484
|
+
# the only collection currently populated; emails/contacts/memory
|
|
485
|
+
# collections coming back online with L5 collection bootstrap.
|
|
486
|
+
try:
|
|
487
|
+
r = await _client().get(f"{L5_MILVUS_URL}/health", timeout=3.0)
|
|
488
|
+
if r.status_code == 200:
|
|
489
|
+
colls = r.json().get("collections") or {}
|
|
490
|
+
chats = colls.get("chats") if isinstance(colls, dict) else None
|
|
491
|
+
if isinstance(chats, dict):
|
|
492
|
+
memories["l5_chats_chunks"] = int(chats.get("count") or 0)
|
|
493
|
+
except Exception:
|
|
494
|
+
pass
|
|
495
|
+
# L6 exposes vector vs fts splits on /stats.
|
|
460
496
|
try:
|
|
461
497
|
r = await _client().get(f"{L6_DOC_URL}/stats", timeout=3.0)
|
|
462
498
|
if r.status_code == 200:
|
|
463
499
|
stats = r.json()
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
int(stats.get("fts_chunks") or 0),
|
|
467
|
-
int(stats.get("total_chunks") or 0),
|
|
468
|
-
)
|
|
500
|
+
memories["l6_vector_chunks"] = int(stats.get("vector_chunks") or 0)
|
|
501
|
+
memories["l6_fts_chunks"] = int(stats.get("fts_chunks") or 0)
|
|
469
502
|
except Exception:
|
|
470
|
-
|
|
503
|
+
pass
|
|
504
|
+
out["memories"] = memories
|
|
505
|
+
return out
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
@app.get("/health/deep")
|
|
509
|
+
async def health_deep():
|
|
510
|
+
"""Aggregate functional probe — fans out to each layer's /health/deep
|
|
511
|
+
and reports per-layer ok/status.
|
|
512
|
+
|
|
513
|
+
Where /health checks "process up + port answering", /health/deep
|
|
514
|
+
actually exercises embed → write → search round-trips on each layer
|
|
515
|
+
that supports it. Catches the class of bug that masquerades as
|
|
516
|
+
"healthy" — request handlers 500'ing while the process stays up.
|
|
517
|
+
|
|
518
|
+
Slower than /health (~1–2s); intended for ops/monitoring/cron use,
|
|
519
|
+
not the deploy gate or compose healthcheck.
|
|
520
|
+
"""
|
|
521
|
+
import asyncio
|
|
522
|
+
out: dict[str, Any] = {"status": "ok", "ok": True, "layers": {}}
|
|
523
|
+
|
|
524
|
+
async def _probe_deep(name: str, url: str) -> tuple[str, dict]:
|
|
525
|
+
try:
|
|
526
|
+
r = await _client().get(url, timeout=15.0)
|
|
527
|
+
if r.status_code != 200:
|
|
528
|
+
return name, {"ok": False, "status": f"http {r.status_code}"}
|
|
529
|
+
return name, r.json()
|
|
530
|
+
except Exception as exc:
|
|
531
|
+
return name, {"ok": False, "status": f"unreachable: {type(exc).__name__}"}
|
|
532
|
+
|
|
533
|
+
results = await asyncio.gather(
|
|
534
|
+
_probe_deep("l4", f"{L4_VEC_URL}/health/deep"),
|
|
535
|
+
_probe_deep("l5", f"{L5_MILVUS_URL}/health/deep"),
|
|
536
|
+
_probe_deep("l6", f"{L6_DOC_URL}/health/deep"),
|
|
537
|
+
)
|
|
538
|
+
for name, body in results:
|
|
539
|
+
out["layers"][name] = body
|
|
540
|
+
if not body.get("ok", False):
|
|
541
|
+
out["ok"] = False
|
|
542
|
+
|
|
543
|
+
if not out["ok"]:
|
|
544
|
+
out["status"] = "degraded"
|
|
471
545
|
return out
|
|
472
546
|
|
|
473
547
|
|
|
@@ -12,6 +12,7 @@ Port: 8031 (replaces neo4j-qmd-proxy.py)
|
|
|
12
12
|
"""
|
|
13
13
|
|
|
14
14
|
import argparse
|
|
15
|
+
import asyncio
|
|
15
16
|
import hashlib
|
|
16
17
|
import json
|
|
17
18
|
import logging
|
|
@@ -19,14 +20,16 @@ import os
|
|
|
19
20
|
import sqlite3
|
|
20
21
|
import sys
|
|
21
22
|
import time
|
|
23
|
+
from contextlib import asynccontextmanager
|
|
22
24
|
from datetime import datetime
|
|
23
25
|
from pathlib import Path
|
|
24
|
-
from typing import Any, Dict, List, Optional, Set
|
|
26
|
+
from typing import Any, AsyncIterator, Dict, List, Optional, Set
|
|
25
27
|
|
|
26
28
|
import re
|
|
29
|
+
import httpx
|
|
27
30
|
import requests
|
|
28
31
|
from fastapi import FastAPI, HTTPException, Request
|
|
29
|
-
from neo4j import
|
|
32
|
+
from neo4j import AsyncGraphDatabase
|
|
30
33
|
from neo4j.time import DateTime as Neo4jDateTime, Date as Neo4jDate
|
|
31
34
|
from pydantic import BaseModel
|
|
32
35
|
import uvicorn
|
|
@@ -129,7 +132,67 @@ TRACKER_FILE = WORKSPACE / "memory" / "memory-tracker.jsonl"
|
|
|
129
132
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
|
130
133
|
log = logging.getLogger("sequential-hybridrag")
|
|
131
134
|
|
|
132
|
-
|
|
135
|
+
|
|
136
|
+
# ---------------------------------------------------------------------------
|
|
137
|
+
# Async driver + HTTP client singletons
|
|
138
|
+
# ---------------------------------------------------------------------------
|
|
139
|
+
#
|
|
140
|
+
# v0.8.4: migrated from sync neo4j (`GraphDatabase`) to async
|
|
141
|
+
# (`AsyncGraphDatabase`) and from per-call drivers to one process-wide
|
|
142
|
+
# pool. Previously every handler created a fresh sync driver inline,
|
|
143
|
+
# which (a) blocked the event loop on connection establishment and
|
|
144
|
+
# every query, and (b) held one thread per in-flight request. Under
|
|
145
|
+
# sustained ingest the threadpool exhausted and /health itself timed
|
|
146
|
+
# out (the `l0/l1/l2: unreachable: ReadTimeout` we saw in prod).
|
|
147
|
+
#
|
|
148
|
+
# The async driver multiplexes many queries through bolt without
|
|
149
|
+
# blocking the event loop; one shared pool means connection
|
|
150
|
+
# establishment is amortised. fastapi lifespan handles open/close.
|
|
151
|
+
|
|
152
|
+
_neo4j_driver: "AsyncGraphDatabase | None" = None
|
|
153
|
+
_http_client: "httpx.AsyncClient | None" = None
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def get_neo4j_driver():
|
|
157
|
+
"""Lazy module-level singleton — created at first call (which the
|
|
158
|
+
lifespan handler does at startup). Sharing across requests is the
|
|
159
|
+
documented neo4j Python-driver pattern; the driver itself is
|
|
160
|
+
thread- and task-safe."""
|
|
161
|
+
global _neo4j_driver
|
|
162
|
+
if _neo4j_driver is None:
|
|
163
|
+
_neo4j_driver = AsyncGraphDatabase.driver(NEO4J_URI, auth=NEO4J_AUTH)
|
|
164
|
+
return _neo4j_driver
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def get_http_client() -> httpx.AsyncClient:
|
|
168
|
+
"""Shared async HTTP client for L4/L5/L6 fan-out and embedding-proxy
|
|
169
|
+
pass-through. Reuses TCP connections via httpx's built-in pool."""
|
|
170
|
+
global _http_client
|
|
171
|
+
if _http_client is None:
|
|
172
|
+
_http_client = httpx.AsyncClient(timeout=30.0)
|
|
173
|
+
return _http_client
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
@asynccontextmanager
|
|
177
|
+
async def lifespan(_app: FastAPI) -> AsyncIterator[None]:
|
|
178
|
+
"""Open the neo4j driver + HTTP client at process startup, close on
|
|
179
|
+
shutdown. Without this, the first request pays driver-open latency
|
|
180
|
+
and the driver is never properly closed on SIGTERM (leaking conns)."""
|
|
181
|
+
global _neo4j_driver, _http_client
|
|
182
|
+
_neo4j_driver = AsyncGraphDatabase.driver(NEO4J_URI, auth=NEO4J_AUTH)
|
|
183
|
+
_http_client = httpx.AsyncClient(timeout=30.0)
|
|
184
|
+
try:
|
|
185
|
+
yield
|
|
186
|
+
finally:
|
|
187
|
+
if _neo4j_driver is not None:
|
|
188
|
+
await _neo4j_driver.close()
|
|
189
|
+
_neo4j_driver = None
|
|
190
|
+
if _http_client is not None:
|
|
191
|
+
await _http_client.aclose()
|
|
192
|
+
_http_client = None
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
app = FastAPI(title="Sequential HybridRAG Proxy", version="1.0.0", lifespan=lifespan)
|
|
133
196
|
|
|
134
197
|
# ---------------------------------------------------------------------------
|
|
135
198
|
# Memory Usage Tracking
|
|
@@ -267,7 +330,7 @@ def extract_query_entities(query: str) -> List[str]:
|
|
|
267
330
|
log.info(f"Extracted entities: {potential_entities}")
|
|
268
331
|
return potential_entities
|
|
269
332
|
|
|
270
|
-
def _hebbian_strengthen(session, arenas: List[str], node_names: List[str], increment: float = 0.05) -> None:
|
|
333
|
+
async def _hebbian_strengthen(session, arenas: List[str], node_names: List[str], increment: float = 0.05) -> None:
|
|
271
334
|
"""Hebbian: strengthen edges between co-accessed nodes during query.
|
|
272
335
|
|
|
273
336
|
Scoped by arena so a search inside tenant A can't reinforce edges
|
|
@@ -281,7 +344,7 @@ def _hebbian_strengthen(session, arenas: List[str], node_names: List[str], incre
|
|
|
281
344
|
for i, n1 in enumerate(node_names):
|
|
282
345
|
for n2 in node_names[i+1:]:
|
|
283
346
|
try:
|
|
284
|
-
session.run(
|
|
347
|
+
await session.run(
|
|
285
348
|
"""MATCH (a {name: $n1})-[r]-(b {name: $n2})
|
|
286
349
|
WHERE a.arena IN $arenas AND b.arena IN $arenas
|
|
287
350
|
SET r.weight = coalesce(r.weight, 1.0) + $inc,
|
|
@@ -292,7 +355,7 @@ def _hebbian_strengthen(session, arenas: List[str], node_names: List[str], incre
|
|
|
292
355
|
pass # non-critical
|
|
293
356
|
|
|
294
357
|
|
|
295
|
-
def search_neo4j_sequential(query: str, entities: List[str], arenas: List[str], limit: int = 12) -> Dict:
|
|
358
|
+
async def search_neo4j_sequential(query: str, entities: List[str], arenas: List[str], limit: int = 12) -> Dict:
|
|
296
359
|
"""Phase 1: Neo4j graph search with spreading activation + Hebbian.
|
|
297
360
|
|
|
298
361
|
`arenas` is the tenant-scope set the caller is authorised for —
|
|
@@ -306,11 +369,11 @@ def search_neo4j_sequential(query: str, entities: List[str], arenas: List[str],
|
|
|
306
369
|
log.warning("search_neo4j_sequential called without arenas — returning empty results")
|
|
307
370
|
return {"results": [], "graph_entities": [], "entity_count": 0}
|
|
308
371
|
try:
|
|
309
|
-
driver =
|
|
372
|
+
driver = get_neo4j_driver()
|
|
310
373
|
results = []
|
|
311
374
|
graph_entities = set()
|
|
312
375
|
|
|
313
|
-
with driver.session() as session:
|
|
376
|
+
async with driver.session() as session:
|
|
314
377
|
# Search for specific entities — use weighted spreading activation
|
|
315
378
|
for entity in entities:
|
|
316
379
|
# Direct match first — arena-scoped on every node we touch.
|
|
@@ -326,9 +389,9 @@ def search_neo4j_sequential(query: str, entities: List[str], arenas: List[str],
|
|
|
326
389
|
LIMIT $limit
|
|
327
390
|
"""
|
|
328
391
|
|
|
329
|
-
|
|
392
|
+
result = await session.run(cypher, entity=entity, arenas=arenas, limit=8)
|
|
330
393
|
|
|
331
|
-
for record in
|
|
394
|
+
async for record in result:
|
|
332
395
|
node = _serialize_neo4j_value(dict(record["n"]))
|
|
333
396
|
rel = record["r"]
|
|
334
397
|
connected = record["connected"]
|
|
@@ -366,7 +429,7 @@ def search_neo4j_sequential(query: str, entities: List[str], arenas: List[str],
|
|
|
366
429
|
# the filter, an activation could walk into another
|
|
367
430
|
# tenant's graph via a name-collision on the start node.
|
|
368
431
|
if entity:
|
|
369
|
-
activation_results = session.run("""
|
|
432
|
+
activation_results = await session.run("""
|
|
370
433
|
MATCH (start)-[r1]-(mid)-[r2]-(end)
|
|
371
434
|
WHERE start.name CONTAINS $entity
|
|
372
435
|
AND start.arena IN $arenas
|
|
@@ -382,7 +445,7 @@ def search_neo4j_sequential(query: str, entities: List[str], arenas: List[str],
|
|
|
382
445
|
LIMIT 5
|
|
383
446
|
""", entity=entity, arenas=arenas)
|
|
384
447
|
|
|
385
|
-
for rec in activation_results:
|
|
448
|
+
async for rec in activation_results:
|
|
386
449
|
end_node = _serialize_neo4j_value(dict(rec["end"])) if rec["end"] else {}
|
|
387
450
|
name = end_node.get("name", "")
|
|
388
451
|
if name and name not in graph_entities:
|
|
@@ -413,9 +476,9 @@ def search_neo4j_sequential(query: str, entities: List[str], arenas: List[str],
|
|
|
413
476
|
LIMIT $limit
|
|
414
477
|
"""
|
|
415
478
|
|
|
416
|
-
|
|
479
|
+
result = await session.run(cypher, term=word, arenas=arenas, limit=4)
|
|
417
480
|
|
|
418
|
-
for record in
|
|
481
|
+
async for record in result:
|
|
419
482
|
node = _serialize_neo4j_value(dict(record["n"]))
|
|
420
483
|
context = f"Related: {node}"
|
|
421
484
|
graph_entities.add(node.get('name', ''))
|
|
@@ -430,9 +493,9 @@ def search_neo4j_sequential(query: str, entities: List[str], arenas: List[str],
|
|
|
430
493
|
})
|
|
431
494
|
|
|
432
495
|
# Hebbian: strengthen edges between all accessed entities
|
|
433
|
-
_hebbian_strengthen(session, arenas, list(graph_entities))
|
|
496
|
+
await _hebbian_strengthen(session, arenas, list(graph_entities))
|
|
434
497
|
|
|
435
|
-
driver.
|
|
498
|
+
# Note: driver is a module-level singleton, do NOT close here.
|
|
436
499
|
|
|
437
500
|
return {
|
|
438
501
|
"results": results[:limit],
|
|
@@ -861,8 +924,8 @@ def search_l0_bm25(query: str, limit: int = 6, arena: str = None,
|
|
|
861
924
|
|
|
862
925
|
L5_API_URL = os.environ.get("PME_L5_URL", "http://127.0.0.1:8034")
|
|
863
926
|
|
|
864
|
-
def search_l5_communications(query: str, limit: int = 6, arena: str = None,
|
|
865
|
-
|
|
927
|
+
async def search_l5_communications(query: str, limit: int = 6, arena: str = None,
|
|
928
|
+
arenas: List[str] = None) -> List[Dict]:
|
|
866
929
|
"""Search L5 Communications Context via L5 API (emails, chats, calendar).
|
|
867
930
|
|
|
868
931
|
arena / arenas (optional): forwarded to L5; filters Milvus by the
|
|
@@ -876,7 +939,7 @@ def search_l5_communications(query: str, limit: int = 6, arena: str = None,
|
|
|
876
939
|
params: list = [("q", query), ("limit", str(limit))]
|
|
877
940
|
for a in arena_list:
|
|
878
941
|
params.append(("arenas", a))
|
|
879
|
-
resp =
|
|
942
|
+
resp = await get_http_client().get(
|
|
880
943
|
f"{L5_API_URL}/search",
|
|
881
944
|
params=params,
|
|
882
945
|
timeout=10,
|
|
@@ -922,8 +985,8 @@ def search_l5_communications(query: str, limit: int = 6, arena: str = None,
|
|
|
922
985
|
# L6: Document Store Search
|
|
923
986
|
L6_URL = os.environ.get("PME_L6_URL", "http://localhost:8037")
|
|
924
987
|
|
|
925
|
-
def search_l6_documents(query: str, limit: int = 6, arena: str = None,
|
|
926
|
-
|
|
988
|
+
async def search_l6_documents(query: str, limit: int = 6, arena: str = None,
|
|
989
|
+
arenas: List[str] = None) -> List[Dict]:
|
|
927
990
|
"""Search L6 Document Store (research, legal, financial, project docs).
|
|
928
991
|
|
|
929
992
|
arena / arenas (optional): forwarded to L6 — L6 supports multi-arena
|
|
@@ -939,7 +1002,7 @@ def search_l6_documents(query: str, limit: int = 6, arena: str = None,
|
|
|
939
1002
|
]
|
|
940
1003
|
for a in arena_list:
|
|
941
1004
|
params.append(("arenas", a))
|
|
942
|
-
resp =
|
|
1005
|
+
resp = await get_http_client().get(
|
|
943
1006
|
f"{L6_URL}/search",
|
|
944
1007
|
params=params,
|
|
945
1008
|
timeout=10,
|
|
@@ -986,50 +1049,52 @@ def search_l6_documents(query: str, limit: int = 6, arena: str = None,
|
|
|
986
1049
|
return []
|
|
987
1050
|
|
|
988
1051
|
|
|
989
|
-
def sequential_hybridrag_search(query: str, limit: int = 16,
|
|
990
|
-
|
|
991
|
-
|
|
1052
|
+
async def sequential_hybridrag_search(query: str, limit: int = 16,
|
|
1053
|
+
arena: str = None,
|
|
1054
|
+
arenas: List[str] = None) -> List[Dict]:
|
|
992
1055
|
"""Main HybridRAG processing: L0 BM25 → L1 System Files → L2 HybridRAG (L3 Graph + L4 Vector + L5 Comms + L6 Docs).
|
|
993
1056
|
|
|
994
1057
|
arena / arenas (optional): tenant + user scope. Multi-arena lets a
|
|
995
1058
|
user's search span tenant-wide rows + their own user-scoped rows in
|
|
996
1059
|
a single hybrid pass. Forwarded to L0, L5, L6 native filters; L4
|
|
997
1060
|
and L3 still rely on the compat shim post-filter.
|
|
1061
|
+
|
|
1062
|
+
Async since v0.8.4: independent layer fan-out runs concurrently via
|
|
1063
|
+
asyncio.gather; sync workers (sqlite, file I/O, PyTorch reranker)
|
|
1064
|
+
are dispatched to threads via asyncio.to_thread to keep the event
|
|
1065
|
+
loop responsive under sustained ingest.
|
|
998
1066
|
"""
|
|
999
1067
|
arena_list = list(arenas) if arenas else ([arena] if arena else [])
|
|
1000
1068
|
start_time = time.time()
|
|
1001
1069
|
log.info(f"Starting sequential HybridRAG search for: '{query}' arenas={arena_list!r}")
|
|
1002
1070
|
|
|
1003
|
-
# L0
|
|
1004
|
-
|
|
1071
|
+
# L0 (sqlite) + L1 (file I/O) + entity extraction run in parallel.
|
|
1072
|
+
# Each is sync — offloaded to the threadpool so the event loop stays
|
|
1073
|
+
# free for the in-flight L3/L5/L6 calls.
|
|
1074
|
+
l0_results, system_results, entities = await asyncio.gather(
|
|
1075
|
+
asyncio.to_thread(search_l0_bm25, query, 6, None, arena_list),
|
|
1076
|
+
asyncio.to_thread(search_core_memory_files, query, 4),
|
|
1077
|
+
asyncio.to_thread(extract_query_entities, query),
|
|
1078
|
+
)
|
|
1005
1079
|
log.info(f"L0 BM25 workspace: {len(l0_results)} results")
|
|
1006
|
-
|
|
1007
|
-
# L1: System Files (HIGHEST PRIORITY)
|
|
1008
|
-
system_results = search_core_memory_files(query, limit=4)
|
|
1009
1080
|
log.info(f"L1 System files: {len(system_results)} results")
|
|
1010
1081
|
|
|
1011
|
-
#
|
|
1012
|
-
|
|
1013
|
-
# tenant's search can never traverse another tenant's entity graph
|
|
1014
|
-
# via name collisions on shared :Entity nodes. The post-filter shim
|
|
1015
|
-
# protects chunks; this protects the entity-walking layer too.
|
|
1016
|
-
entities = extract_query_entities(query)
|
|
1017
|
-
graph_context = search_neo4j_sequential(query, entities, arena_list, limit=8)
|
|
1082
|
+
# L3: Graph search (now native async via AsyncGraphDatabase).
|
|
1083
|
+
graph_context = await search_neo4j_sequential(query, entities, arena_list, limit=8)
|
|
1018
1084
|
log.info(f"L3 Graph search: {len(graph_context['results'])} results, {graph_context['entity_count']} entities")
|
|
1019
1085
|
|
|
1020
|
-
# HyDE
|
|
1021
|
-
hyde_query = hyde_expand
|
|
1086
|
+
# HyDE expansion is sync (LLM call), offload to thread.
|
|
1087
|
+
hyde_query = await asyncio.to_thread(hyde_expand, query)
|
|
1022
1088
|
|
|
1023
|
-
# L4
|
|
1024
|
-
|
|
1089
|
+
# L4/L5/L6 fan out concurrently — L4 (sqlite) via to_thread, L5/L6
|
|
1090
|
+
# native async via httpx.AsyncClient.
|
|
1091
|
+
vector_results, l5_results, l6_results = await asyncio.gather(
|
|
1092
|
+
asyncio.to_thread(search_qmd_informed, hyde_query, graph_context, 8),
|
|
1093
|
+
search_l5_communications(hyde_query, limit=6, arenas=arena_list),
|
|
1094
|
+
search_l6_documents(hyde_query, limit=6, arenas=arena_list),
|
|
1095
|
+
)
|
|
1025
1096
|
log.info(f"L4 Vector search: {len(vector_results)} results (HyDE={'on' if hyde_query != query else 'off'})")
|
|
1026
|
-
|
|
1027
|
-
# L5: Communications Context (emails, chats, calendar) — also use HyDE
|
|
1028
|
-
l5_results = search_l5_communications(hyde_query, limit=6, arenas=arena_list)
|
|
1029
1097
|
log.info(f"L5 Communications: {len(l5_results)} results")
|
|
1030
|
-
|
|
1031
|
-
# L6: Document Store (research, legal, financial, project docs)
|
|
1032
|
-
l6_results = search_l6_documents(hyde_query, limit=6, arenas=arena_list)
|
|
1033
1098
|
log.info(f"L6 Documents: {len(l6_results)} results")
|
|
1034
1099
|
|
|
1035
1100
|
# L2: HybridRAG fusion (combines all layers with L1 priority)
|
|
@@ -1046,8 +1111,9 @@ def sequential_hybridrag_search(query: str, limit: int = 16,
|
|
|
1046
1111
|
# Sort by layer priority: L1 System (1.0) > L3 Graph (0.9) > L4 Vector (0.7+)
|
|
1047
1112
|
deduplicated.sort(key=lambda x: x["score"], reverse=True)
|
|
1048
1113
|
|
|
1049
|
-
# Cross-encoder reranking: re-embed top results and blend scores
|
|
1050
|
-
|
|
1114
|
+
# Cross-encoder reranking: re-embed top results and blend scores.
|
|
1115
|
+
# PyTorch CrossEncoder.predict is sync — offload to thread.
|
|
1116
|
+
deduplicated = await asyncio.to_thread(cross_encoder_rerank, query, deduplicated, limit)
|
|
1051
1117
|
|
|
1052
1118
|
# Track layer usage for evolution
|
|
1053
1119
|
search_time_ms = (time.time() - start_time) * 1000
|
|
@@ -1094,7 +1160,7 @@ async def search_endpoint(request: Request) -> dict:
|
|
|
1094
1160
|
if not query:
|
|
1095
1161
|
raise HTTPException(status_code=400, detail="query is required")
|
|
1096
1162
|
|
|
1097
|
-
results = sequential_hybridrag_search(query, limit=limit, arena=arena, arenas=arenas)
|
|
1163
|
+
results = await sequential_hybridrag_search(query, limit=limit, arena=arena, arenas=arenas)
|
|
1098
1164
|
|
|
1099
1165
|
# Also return raw graph entities for context enrichment.
|
|
1100
1166
|
# Same arena scope as the cascade search above — without it
|
|
@@ -1172,7 +1238,7 @@ async def chat_completions(request: ChatCompletionRequest) -> dict:
|
|
|
1172
1238
|
# empty when no arenas are supplied; callers that need L3 must
|
|
1173
1239
|
# pass `arena` or `arenas` on the request body.
|
|
1174
1240
|
start_time = time.time()
|
|
1175
|
-
results = sequential_hybridrag_search(
|
|
1241
|
+
results = await sequential_hybridrag_search(
|
|
1176
1242
|
query, limit=16, arena=request.arena, arenas=request.arenas,
|
|
1177
1243
|
)
|
|
1178
1244
|
search_time = time.time() - start_time
|
|
@@ -1244,33 +1310,35 @@ async def check_contradictions(node_name: str, arena: Optional[str] = None) -> d
|
|
|
1244
1310
|
detail="arena query parameter is required to scope contradiction lookup",
|
|
1245
1311
|
)
|
|
1246
1312
|
try:
|
|
1247
|
-
driver =
|
|
1313
|
+
driver = get_neo4j_driver()
|
|
1248
1314
|
contradictions = []
|
|
1249
|
-
with driver.session() as session:
|
|
1315
|
+
async with driver.session() as session:
|
|
1250
1316
|
# Find the node — must be in the caller's arena.
|
|
1251
|
-
|
|
1317
|
+
result = await session.run(
|
|
1252
1318
|
"""MATCH (n) WHERE toLower(n.name) = toLower($name) AND n.arena = $arena
|
|
1253
1319
|
RETURN elementId(n) AS id""",
|
|
1254
1320
|
name=node_name, arena=arena,
|
|
1255
|
-
)
|
|
1321
|
+
)
|
|
1322
|
+
node = await result.single()
|
|
1256
1323
|
if not node:
|
|
1257
1324
|
return {"node": node_name, "contradictions": [], "error": "Node not found"}
|
|
1258
1325
|
nid = node["id"]
|
|
1259
1326
|
|
|
1260
1327
|
# Explicit CONTRADICTS — both endpoints must be in the same arena.
|
|
1261
|
-
|
|
1328
|
+
result = await session.run(
|
|
1262
1329
|
"""MATCH (a)-[r:CONTRADICTS]-(b)
|
|
1263
1330
|
WHERE elementId(a) = $nid AND b.arena = $arena
|
|
1264
1331
|
RETURN a.name AS a, b.name AS b, r.reason AS reason""",
|
|
1265
1332
|
nid=nid, arena=arena,
|
|
1266
|
-
)
|
|
1333
|
+
)
|
|
1334
|
+
async for rec in result:
|
|
1267
1335
|
contradictions.append({"type": "explicit", "a": rec["a"], "b": rec["b"], "reason": rec["reason"]})
|
|
1268
1336
|
|
|
1269
1337
|
# Property conflicts via shared neighbour — every node along
|
|
1270
1338
|
# the (a)--(shared)--(b) path filtered by arena so a shared
|
|
1271
1339
|
# neighbour from another tenant can't trigger a false-positive
|
|
1272
1340
|
# conflict in this tenant's view.
|
|
1273
|
-
|
|
1341
|
+
result = await session.run(
|
|
1274
1342
|
"""MATCH (a)--(shared)--(b)
|
|
1275
1343
|
WHERE elementId(a) = $nid AND a <> b
|
|
1276
1344
|
AND shared.arena = $arena AND b.arena = $arena
|
|
@@ -1281,28 +1349,28 @@ async def check_contradictions(node_name: str, arena: Optional[str] = None) -> d
|
|
|
1281
1349
|
WHERE size(ck) > 0
|
|
1282
1350
|
RETURN a.name AS a, b.name AS b, shared.name AS via, ck
|
|
1283
1351
|
LIMIT 10""", nid=nid, arena=arena,
|
|
1284
|
-
)
|
|
1352
|
+
)
|
|
1353
|
+
async for rec in result:
|
|
1285
1354
|
contradictions.append({
|
|
1286
1355
|
"type": "property_conflict", "a": rec["a"], "b": rec["b"],
|
|
1287
1356
|
"via": rec["via"], "conflicting_keys": rec["ck"]
|
|
1288
1357
|
})
|
|
1289
|
-
driver.close()
|
|
1290
1358
|
return {"node": node_name, "contradictions": contradictions, "count": len(contradictions)}
|
|
1291
1359
|
except Exception as e:
|
|
1292
1360
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1293
1361
|
|
|
1294
|
-
def _check_l5_health() -> bool:
|
|
1362
|
+
async def _check_l5_health() -> bool:
|
|
1295
1363
|
"""Quick check if L5 Communications API is responding."""
|
|
1296
1364
|
try:
|
|
1297
|
-
resp =
|
|
1365
|
+
resp = await get_http_client().get(f"{L5_API_URL}/health", timeout=3)
|
|
1298
1366
|
return resp.status_code == 200
|
|
1299
1367
|
except Exception:
|
|
1300
1368
|
return False
|
|
1301
1369
|
|
|
1302
|
-
def _check_l6_health() -> bool:
|
|
1370
|
+
async def _check_l6_health() -> bool:
|
|
1303
1371
|
"""Quick check if L6 Document Store is responding."""
|
|
1304
1372
|
try:
|
|
1305
|
-
resp =
|
|
1373
|
+
resp = await get_http_client().get(f"{L6_URL}/health", timeout=3)
|
|
1306
1374
|
return resp.status_code == 200 and resp.json().get("status") in ("ok", "degraded")
|
|
1307
1375
|
except Exception:
|
|
1308
1376
|
return False
|
|
@@ -1323,16 +1391,15 @@ async def health() -> dict:
|
|
|
1323
1391
|
|
|
1324
1392
|
neo4j_healthy = False
|
|
1325
1393
|
try:
|
|
1326
|
-
driver =
|
|
1327
|
-
with driver.session() as session:
|
|
1328
|
-
session.run("RETURN 1")
|
|
1394
|
+
driver = get_neo4j_driver()
|
|
1395
|
+
async with driver.session() as session:
|
|
1396
|
+
await session.run("RETURN 1")
|
|
1329
1397
|
neo4j_healthy = True
|
|
1330
|
-
driver.close()
|
|
1331
1398
|
except Exception as e:
|
|
1332
1399
|
logging.debug(f"Suppressed: {e}")
|
|
1333
1400
|
|
|
1334
|
-
l5_reachable = _check_l5_health()
|
|
1335
|
-
l6_reachable = _check_l6_health()
|
|
1401
|
+
l5_reachable = await _check_l5_health()
|
|
1402
|
+
l6_reachable = await _check_l6_health()
|
|
1336
1403
|
|
|
1337
1404
|
# Top-level status: degrade only on layers L2 is the sole gatekeeper for.
|
|
1338
1405
|
# L5/L6 are independent services probed by the compat shim.
|
|
@@ -1411,15 +1478,19 @@ def _extract_entities_for_kg(text: str, max_entities: int = 32) -> List[str]:
|
|
|
1411
1478
|
return found[:max_entities]
|
|
1412
1479
|
|
|
1413
1480
|
|
|
1414
|
-
def _embed_batch_local(texts: List[str]) -> List[List[float]]:
|
|
1415
|
-
"""Batch embed via the shared EmbedClient. Returns vectors in input order."""
|
|
1481
|
+
async def _embed_batch_local(texts: List[str]) -> List[List[float]]:
|
|
1482
|
+
"""Batch embed via the shared EmbedClient (async). Returns vectors in input order."""
|
|
1416
1483
|
if not texts:
|
|
1417
1484
|
return []
|
|
1418
1485
|
try:
|
|
1419
|
-
return _embed_client().
|
|
1486
|
+
return await _embed_client().embed_batch_async(texts)
|
|
1420
1487
|
except Exception as e:
|
|
1421
1488
|
log.warning(f"NV-Embed batch failed: {e}; trying singletons")
|
|
1422
|
-
|
|
1489
|
+
# Singleton fallback stays sync (each one-shot embed is small);
|
|
1490
|
+
# offload to thread so we don't block the loop.
|
|
1491
|
+
return await asyncio.gather(
|
|
1492
|
+
*(asyncio.to_thread(get_embedding, t) for t in texts)
|
|
1493
|
+
)
|
|
1423
1494
|
|
|
1424
1495
|
|
|
1425
1496
|
class IndexInternalBatchRequest(BaseModel):
|
|
@@ -1504,7 +1575,7 @@ async def index_internal_batch(req: IndexInternalBatchRequest) -> dict:
|
|
|
1504
1575
|
# ---- L4 QMD vec (qmd.sqlite) ----------------------------------------
|
|
1505
1576
|
l4_inserted = 0
|
|
1506
1577
|
try:
|
|
1507
|
-
embeddings = _embed_batch_local([n["content"] for n in norm])
|
|
1578
|
+
embeddings = await _embed_batch_local([n["content"] for n in norm])
|
|
1508
1579
|
if len(embeddings) != len(norm):
|
|
1509
1580
|
log.warning(f"L4 embed count mismatch: {len(embeddings)} != {len(norm)}")
|
|
1510
1581
|
qmd_db = Path(QMD_DB_PATH)
|
|
@@ -1560,30 +1631,30 @@ async def index_internal_batch(req: IndexInternalBatchRequest) -> dict:
|
|
|
1560
1631
|
l3_entities = 0
|
|
1561
1632
|
l3_chunks = 0
|
|
1562
1633
|
try:
|
|
1563
|
-
driver =
|
|
1564
|
-
with driver.session() as session:
|
|
1634
|
+
driver = get_neo4j_driver()
|
|
1635
|
+
async with driver.session() as session:
|
|
1565
1636
|
# Indexes — idempotent. The compound (arena, name) is the
|
|
1566
1637
|
# right shape now that entities are arena-scoped; the legacy
|
|
1567
1638
|
# entity_name index stays for the wipe-migration to work
|
|
1568
1639
|
# against pre-arena rows, then can be dropped in a follow-up.
|
|
1569
1640
|
try:
|
|
1570
|
-
session.run("CREATE INDEX entity_arena_name IF NOT EXISTS FOR (n:Entity) ON (n.arena, n.name)")
|
|
1571
|
-
session.run("CREATE INDEX person_arena_email IF NOT EXISTS FOR (n:Person) ON (n.arena, n.email)")
|
|
1572
|
-
session.run("CREATE INDEX chunk_arena IF NOT EXISTS FOR (c:Chunk) ON (c.arena)")
|
|
1573
|
-
session.run("CREATE INDEX chunk_id IF NOT EXISTS FOR (c:Chunk) ON (c.id)")
|
|
1641
|
+
await session.run("CREATE INDEX entity_arena_name IF NOT EXISTS FOR (n:Entity) ON (n.arena, n.name)")
|
|
1642
|
+
await session.run("CREATE INDEX person_arena_email IF NOT EXISTS FOR (n:Person) ON (n.arena, n.email)")
|
|
1643
|
+
await session.run("CREATE INDEX chunk_arena IF NOT EXISTS FOR (c:Chunk) ON (c.arena)")
|
|
1644
|
+
await session.run("CREATE INDEX chunk_id IF NOT EXISTS FOR (c:Chunk) ON (c.id)")
|
|
1574
1645
|
# ChannelStat is the denormalised aggregate read by
|
|
1575
1646
|
# /aggregate on the fast path. Compound index covers
|
|
1576
1647
|
# the (arena, person_email) lookup that the reader
|
|
1577
1648
|
# uses; the per-channel rows are returned in one
|
|
1578
1649
|
# range scan.
|
|
1579
|
-
session.run("CREATE INDEX channelstat_arena_email IF NOT EXISTS FOR (s:ChannelStat) ON (s.arena, s.person_email)")
|
|
1650
|
+
await session.run("CREATE INDEX channelstat_arena_email IF NOT EXISTS FOR (s:ChannelStat) ON (s.arena, s.person_email)")
|
|
1580
1651
|
# UNIQUE constraint on the writer's MERGE key. Without
|
|
1581
1652
|
# this, two concurrent index-internal-batch transactions
|
|
1582
1653
|
# can both decide a ChannelStat doesn't exist and create
|
|
1583
1654
|
# rival nodes — the index doesn't lock, the constraint
|
|
1584
1655
|
# does. The constraint also implies an index on the
|
|
1585
1656
|
# full key so the MERGE locks efficiently.
|
|
1586
|
-
session.run("CREATE CONSTRAINT channelstat_unique IF NOT EXISTS FOR (s:ChannelStat) REQUIRE (s.arena, s.person_email, s.channel) IS UNIQUE")
|
|
1657
|
+
await session.run("CREATE CONSTRAINT channelstat_unique IF NOT EXISTS FOR (s:ChannelStat) REQUIRE (s.arena, s.person_email, s.channel) IS UNIQUE")
|
|
1587
1658
|
except Exception:
|
|
1588
1659
|
pass
|
|
1589
1660
|
for n in norm:
|
|
@@ -1602,7 +1673,7 @@ async def index_internal_batch(req: IndexInternalBatchRequest) -> dict:
|
|
|
1602
1673
|
# tenant-isolation anchor. Every read traverses through
|
|
1603
1674
|
# this node, so getting the arena right here is the
|
|
1604
1675
|
# single most important invariant of this whole block.
|
|
1605
|
-
session.run(
|
|
1676
|
+
await session.run(
|
|
1606
1677
|
"""
|
|
1607
1678
|
MERGE (c:Chunk {id: $cid})
|
|
1608
1679
|
SET c.text = $text,
|
|
@@ -1618,7 +1689,7 @@ async def index_internal_batch(req: IndexInternalBatchRequest) -> dict:
|
|
|
1618
1689
|
|
|
1619
1690
|
# Concept entities — heuristic, arena-scoped.
|
|
1620
1691
|
for ent in heuristic_entities:
|
|
1621
|
-
session.run(
|
|
1692
|
+
await session.run(
|
|
1622
1693
|
"""
|
|
1623
1694
|
MERGE (e:Entity:Concept {arena: $arena, name: $name})
|
|
1624
1695
|
ON CREATE SET e.type = 'Concept',
|
|
@@ -1639,7 +1710,7 @@ async def index_internal_batch(req: IndexInternalBatchRequest) -> dict:
|
|
|
1639
1710
|
if len(heuristic_entities) >= 2:
|
|
1640
1711
|
for i in range(len(heuristic_entities)):
|
|
1641
1712
|
for j in range(i + 1, len(heuristic_entities)):
|
|
1642
|
-
session.run(
|
|
1713
|
+
await session.run(
|
|
1643
1714
|
"""
|
|
1644
1715
|
MATCH (a:Entity:Concept {arena: $arena, name: $a})
|
|
1645
1716
|
MATCH (b:Entity:Concept {arena: $arena, name: $b})
|
|
@@ -1666,7 +1737,7 @@ async def index_internal_batch(req: IndexInternalBatchRequest) -> dict:
|
|
|
1666
1737
|
# flipped to true after the stat update, so replays
|
|
1667
1738
|
# of the same eventId never double-count even when
|
|
1668
1739
|
# the chunk already exists.
|
|
1669
|
-
session.run(
|
|
1740
|
+
await session.run(
|
|
1670
1741
|
"""
|
|
1671
1742
|
MERGE (p:Entity:Person {arena: $arena, email: $email})
|
|
1672
1743
|
ON CREATE SET p.created_at = $now,
|
|
@@ -1725,7 +1796,7 @@ async def index_internal_batch(req: IndexInternalBatchRequest) -> dict:
|
|
|
1725
1796
|
l3_entities += 1
|
|
1726
1797
|
if isinstance(contact_name, str) and contact_name.strip():
|
|
1727
1798
|
cname = contact_name.strip()
|
|
1728
|
-
session.run(
|
|
1799
|
+
await session.run(
|
|
1729
1800
|
"""
|
|
1730
1801
|
MERGE (p:Entity:Person {arena: $arena, name: $name})
|
|
1731
1802
|
ON CREATE SET p.created_at = $now,
|
|
@@ -1751,7 +1822,7 @@ async def index_internal_batch(req: IndexInternalBatchRequest) -> dict:
|
|
|
1751
1822
|
# Link name→email node so the relationships query
|
|
1752
1823
|
# can resolve either alias to the same person.
|
|
1753
1824
|
if person_email_node:
|
|
1754
|
-
session.run(
|
|
1825
|
+
await session.run(
|
|
1755
1826
|
"""
|
|
1756
1827
|
MATCH (n:Person {arena: $arena, name: $name})
|
|
1757
1828
|
MATCH (e:Person {arena: $arena, email: $email})
|
|
@@ -1759,7 +1830,6 @@ async def index_internal_batch(req: IndexInternalBatchRequest) -> dict:
|
|
|
1759
1830
|
""",
|
|
1760
1831
|
arena=arena, name=cname, email=person_email_node,
|
|
1761
1832
|
)
|
|
1762
|
-
driver.close()
|
|
1763
1833
|
except Exception as e:
|
|
1764
1834
|
log.error(f"L3 KG write failed: {e}")
|
|
1765
1835
|
|
|
@@ -1841,25 +1911,28 @@ async def forget_internal(request: Request) -> dict:
|
|
|
1841
1911
|
# Neo4j chunks AND entities both carry arena now, so tenant-scoped
|
|
1842
1912
|
# delete works correctly here even if L0/L4 still need a migration.
|
|
1843
1913
|
try:
|
|
1844
|
-
driver =
|
|
1845
|
-
with driver.session() as session:
|
|
1914
|
+
driver = get_neo4j_driver()
|
|
1915
|
+
async with driver.session() as session:
|
|
1846
1916
|
if arena:
|
|
1847
|
-
r1 = session.run(
|
|
1917
|
+
r1 = await session.run(
|
|
1848
1918
|
"MATCH (c:Chunk {arena: $arena}) DETACH DELETE c RETURN count(c) AS n",
|
|
1849
1919
|
arena=arena,
|
|
1850
1920
|
)
|
|
1851
|
-
|
|
1852
|
-
|
|
1921
|
+
rec = await r1.single()
|
|
1922
|
+
deleted["l3_chunks"] = rec["n"]
|
|
1923
|
+
r2 = await session.run(
|
|
1853
1924
|
"MATCH (e:Entity {arena: $arena}) DETACH DELETE e RETURN count(e) AS n",
|
|
1854
1925
|
arena=arena,
|
|
1855
1926
|
)
|
|
1856
|
-
|
|
1927
|
+
rec = await r2.single()
|
|
1928
|
+
deleted["l3_entities"] = rec["n"]
|
|
1857
1929
|
else: # confirm == "GLOBAL_WIPE", validated above
|
|
1858
|
-
r1 = session.run("MATCH (c:Chunk) DETACH DELETE c RETURN count(c) AS n")
|
|
1859
|
-
|
|
1860
|
-
|
|
1861
|
-
|
|
1862
|
-
|
|
1930
|
+
r1 = await session.run("MATCH (c:Chunk) DETACH DELETE c RETURN count(c) AS n")
|
|
1931
|
+
rec = await r1.single()
|
|
1932
|
+
deleted["l3_chunks"] = rec["n"]
|
|
1933
|
+
r2 = await session.run("MATCH (e:Entity) DETACH DELETE e RETURN count(e) AS n")
|
|
1934
|
+
rec = await r2.single()
|
|
1935
|
+
deleted["l3_entities"] = rec["n"]
|
|
1863
1936
|
except Exception as e:
|
|
1864
1937
|
log.error(f"L3 forget failed: {e}")
|
|
1865
1938
|
return {"status": "ok", "deleted": deleted, "arena": arena, "global_wipe": confirm == "GLOBAL_WIPE"}
|
|
@@ -1948,13 +2021,10 @@ async def aggregate_internal(req: AggregateInternalRequest) -> AggregateInternal
|
|
|
1948
2021
|
seen.add(k)
|
|
1949
2022
|
safe_group_by.append(k)
|
|
1950
2023
|
|
|
1951
|
-
|
|
1952
|
-
driver = GraphDatabase.driver(NEO4J_URI, auth=NEO4J_AUTH)
|
|
1953
|
-
except Exception as e:
|
|
1954
|
-
raise HTTPException(status_code=500, detail=f"neo4j connect: {e}")
|
|
2024
|
+
driver = get_neo4j_driver()
|
|
1955
2025
|
|
|
1956
2026
|
try:
|
|
1957
|
-
with driver.session() as session:
|
|
2027
|
+
async with driver.session() as session:
|
|
1958
2028
|
# Fast path: read from the ChannelStat denormalisation
|
|
1959
2029
|
# whenever the caller has an email and is grouping by
|
|
1960
2030
|
# channel. ChannelStats are written by /index-internal-batch
|
|
@@ -1973,7 +2043,7 @@ async def aggregate_internal(req: AggregateInternalRequest) -> AggregateInternal
|
|
|
1973
2043
|
not safe_group_by or safe_group_by == ["channel"]
|
|
1974
2044
|
)
|
|
1975
2045
|
if fast_path_eligible:
|
|
1976
|
-
|
|
2046
|
+
_res = await session.run(
|
|
1977
2047
|
"MATCH (s:ChannelStat {arena: $arena, person_email: $email})\n"
|
|
1978
2048
|
"RETURN s.channel AS channel,\n"
|
|
1979
2049
|
" s.count AS count,\n"
|
|
@@ -1983,7 +2053,8 @@ async def aggregate_internal(req: AggregateInternalRequest) -> AggregateInternal
|
|
|
1983
2053
|
" s.first_seen AS first_seen\n"
|
|
1984
2054
|
"ORDER BY s.count DESC\n",
|
|
1985
2055
|
arena=arena, email=contact_email,
|
|
1986
|
-
)
|
|
2056
|
+
)
|
|
2057
|
+
stats_rows = [rec async for rec in _res]
|
|
1987
2058
|
if stats_rows:
|
|
1988
2059
|
# Build buckets directly. When group_by=[] we
|
|
1989
2060
|
# collapse to a single overall bucket; otherwise
|
|
@@ -2098,7 +2169,8 @@ async def aggregate_internal(req: AggregateInternalRequest) -> AggregateInternal
|
|
|
2098
2169
|
buckets: List[AggregateBucket] = []
|
|
2099
2170
|
total = 0
|
|
2100
2171
|
latest: Optional[str] = None
|
|
2101
|
-
|
|
2172
|
+
_res = await session.run(cypher, **params)
|
|
2173
|
+
async for rec in _res:
|
|
2102
2174
|
count = int(rec["count"] or 0)
|
|
2103
2175
|
total += count
|
|
2104
2176
|
last_seen = rec["last_seen"]
|
|
@@ -2126,8 +2198,6 @@ async def aggregate_internal(req: AggregateInternalRequest) -> AggregateInternal
|
|
|
2126
2198
|
except Exception as e:
|
|
2127
2199
|
log.error(f"aggregate-internal failed: {e}")
|
|
2128
2200
|
raise HTTPException(status_code=500, detail=f"aggregate failed: {e}")
|
|
2129
|
-
finally:
|
|
2130
|
-
driver.close()
|
|
2131
2201
|
|
|
2132
2202
|
|
|
2133
2203
|
@app.get("/index-internal-stats")
|
|
@@ -2153,13 +2223,14 @@ async def index_internal_stats() -> dict:
|
|
|
2153
2223
|
except Exception as e:
|
|
2154
2224
|
out["l4_qmd_error"] = str(e)
|
|
2155
2225
|
try:
|
|
2156
|
-
driver =
|
|
2157
|
-
with driver.session() as session:
|
|
2158
|
-
|
|
2226
|
+
driver = get_neo4j_driver()
|
|
2227
|
+
async with driver.session() as session:
|
|
2228
|
+
res = await session.run("MATCH (c:Chunk) RETURN count(c) AS n")
|
|
2229
|
+
r = await res.single()
|
|
2159
2230
|
out["l3_chunks"] = r["n"] if r else 0
|
|
2160
|
-
|
|
2231
|
+
res = await session.run("MATCH (e:Entity) RETURN count(e) AS n")
|
|
2232
|
+
r = await res.single()
|
|
2161
2233
|
out["l3_entities"] = r["n"] if r else 0
|
|
2162
|
-
driver.close()
|
|
2163
2234
|
except Exception as e:
|
|
2164
2235
|
out["l3_error"] = str(e)
|
|
2165
2236
|
return out
|
|
@@ -136,7 +136,11 @@ def health():
|
|
|
136
136
|
n = conn.execute("SELECT COUNT(*) FROM chunks").fetchone()[0]
|
|
137
137
|
conn.close()
|
|
138
138
|
return {"status": "ok", "loaded": True, "n_vectors": n,
|
|
139
|
-
"dim": EMBED_DIM, "db_path": DB_PATH,
|
|
139
|
+
"dim": EMBED_DIM, "db_path": DB_PATH,
|
|
140
|
+
# BLOB+Python-cosine is the intentional implementation path,
|
|
141
|
+
# not a degraded fallback (see _get_db docstring). The previous
|
|
142
|
+
# "sqlite-vec-fallback" label gave operators the wrong signal.
|
|
143
|
+
"backend": "sqlite-vec"}
|
|
140
144
|
except Exception as exc:
|
|
141
145
|
return {"status": "degraded", "error": str(exc)}
|
|
142
146
|
|
|
@@ -212,6 +216,80 @@ def refresh():
|
|
|
212
216
|
return {"status": "ok", "noop": True}
|
|
213
217
|
|
|
214
218
|
|
|
219
|
+
# ----------------------------------------------------------------------
|
|
220
|
+
# /health/deep — synthetic round-trip
|
|
221
|
+
# ----------------------------------------------------------------------
|
|
222
|
+
|
|
223
|
+
# Fixed sentinel id used by /health/deep. Upserted on every probe call,
|
|
224
|
+
# so the row is idempotent. Kept under id="__healthcheck__sentinel" so
|
|
225
|
+
# the L4 corpus has at most one healthcheck row regardless of probe rate.
|
|
226
|
+
_HEALTH_SENTINEL_ID = "__healthcheck__sentinel"
|
|
227
|
+
_HEALTH_SENTINEL_TEXT = (
|
|
228
|
+
"healthcheck sentinel — embed-write-search round-trip verifier"
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
@app.get("/health/deep")
|
|
233
|
+
async def health_deep():
|
|
234
|
+
"""Real functional probe: embed → write → search the sentinel.
|
|
235
|
+
|
|
236
|
+
Catches the class of failure that plain /health misses — broken
|
|
237
|
+
embed paths, write 500s, query path bugs — i.e. exactly the bug
|
|
238
|
+
shape that silently degraded L6 from v0.8.0 → v0.8.2.
|
|
239
|
+
|
|
240
|
+
Returns:
|
|
241
|
+
{status, embed_ms, write_ms, search_ms, hit, ok}
|
|
242
|
+
|
|
243
|
+
`hit` confirms the sentinel was returned from search; `ok` is the
|
|
244
|
+
aggregate `embed_ok AND write_ok AND hit`. HTTP status stays 200
|
|
245
|
+
regardless so callers can read the body for diagnostics; status:
|
|
246
|
+
field carries the verdict.
|
|
247
|
+
"""
|
|
248
|
+
t_total = time.perf_counter()
|
|
249
|
+
out: dict[str, Any] = {"status": "ok", "ok": True}
|
|
250
|
+
try:
|
|
251
|
+
t0 = time.perf_counter()
|
|
252
|
+
embs = await _embed_batch([_HEALTH_SENTINEL_TEXT])
|
|
253
|
+
out["embed_ms"] = round((time.perf_counter() - t0) * 1000.0, 1)
|
|
254
|
+
if not embs or not embs[0]:
|
|
255
|
+
out["status"] = "embed_failed"
|
|
256
|
+
out["ok"] = False
|
|
257
|
+
return out
|
|
258
|
+
vec = embs[0]
|
|
259
|
+
except Exception as exc:
|
|
260
|
+
out["status"] = f"embed_error: {type(exc).__name__}"
|
|
261
|
+
out["ok"] = False
|
|
262
|
+
return out
|
|
263
|
+
|
|
264
|
+
try:
|
|
265
|
+
conn = _get_db()
|
|
266
|
+
t1 = time.perf_counter()
|
|
267
|
+
conn.execute(
|
|
268
|
+
"INSERT OR REPLACE INTO chunks(id, text, embedding, indexed_at) "
|
|
269
|
+
"VALUES (?, ?, ?, ?)",
|
|
270
|
+
(_HEALTH_SENTINEL_ID, _HEALTH_SENTINEL_TEXT, _vec_to_blob(vec), time.time()),
|
|
271
|
+
)
|
|
272
|
+
conn.commit()
|
|
273
|
+
out["write_ms"] = round((time.perf_counter() - t1) * 1000.0, 1)
|
|
274
|
+
|
|
275
|
+
t2 = time.perf_counter()
|
|
276
|
+
rows = conn.execute("SELECT id, embedding FROM chunks WHERE id = ?",
|
|
277
|
+
(_HEALTH_SENTINEL_ID,)).fetchone()
|
|
278
|
+
out["search_ms"] = round((time.perf_counter() - t2) * 1000.0, 1)
|
|
279
|
+
conn.close()
|
|
280
|
+
except Exception as exc:
|
|
281
|
+
out["status"] = f"db_error: {type(exc).__name__}"
|
|
282
|
+
out["ok"] = False
|
|
283
|
+
return out
|
|
284
|
+
|
|
285
|
+
out["hit"] = rows is not None
|
|
286
|
+
if not out["hit"]:
|
|
287
|
+
out["status"] = "sentinel_missing"
|
|
288
|
+
out["ok"] = False
|
|
289
|
+
out["total_ms"] = round((time.perf_counter() - t_total) * 1000.0, 1)
|
|
290
|
+
return out
|
|
291
|
+
|
|
292
|
+
|
|
215
293
|
# ----------------------------------------------------------------------
|
|
216
294
|
# Entrypoint
|
|
217
295
|
# ----------------------------------------------------------------------
|
|
@@ -558,6 +558,20 @@ def serve(port=8034):
|
|
|
558
558
|
from fastapi import FastAPI, Query
|
|
559
559
|
import uvicorn
|
|
560
560
|
|
|
561
|
+
# Bootstrap all 4 collections on startup. Previously only `chats`
|
|
562
|
+
# was being created (the indexer entrypoints below each call their
|
|
563
|
+
# own ensure_collection lazily, so collections without an indexer
|
|
564
|
+
# — i.e. those fed solely via /index-internal or the compat shim's
|
|
565
|
+
# _index_l5 fan-out — never came into existence and writes to them
|
|
566
|
+
# 500'd). Idempotent: ensure_collection short-circuits if exists.
|
|
567
|
+
try:
|
|
568
|
+
bootstrap_client = get_client()
|
|
569
|
+
for _name in ("chats", "emails", "contacts", "memory"):
|
|
570
|
+
ensure_collection(bootstrap_client, _name)
|
|
571
|
+
logging.info("L5 collections bootstrapped: chats, emails, contacts, memory")
|
|
572
|
+
except Exception as exc:
|
|
573
|
+
logging.warning(f"L5 collection bootstrap failed (continuing): {exc}")
|
|
574
|
+
|
|
561
575
|
api = FastAPI(title="L5 Communications Layer")
|
|
562
576
|
|
|
563
577
|
@api.get("/health")
|
|
@@ -658,6 +672,71 @@ def serve(port=8034):
|
|
|
658
672
|
"insert_ms": round(insert_ms, 1),
|
|
659
673
|
}
|
|
660
674
|
|
|
675
|
+
@api.get("/health/deep")
|
|
676
|
+
def api_health_deep():
|
|
677
|
+
"""Real functional probe: assert all 4 collections exist + run
|
|
678
|
+
embed+insert+search of a sentinel chunk in the `chats` collection.
|
|
679
|
+
Catches missing-collection regressions (Issue 3) and embed/insert/
|
|
680
|
+
search path bugs the shallow /health misses."""
|
|
681
|
+
import time as _time, hashlib as _hashlib
|
|
682
|
+
out = {"status": "ok", "ok": True}
|
|
683
|
+
client = get_client()
|
|
684
|
+
|
|
685
|
+
# 1. Collection presence
|
|
686
|
+
expected = ("chats", "emails", "contacts", "memory")
|
|
687
|
+
present = {n: client.has_collection(n) for n in expected}
|
|
688
|
+
out["collections"] = present
|
|
689
|
+
missing = [n for n, ok in present.items() if not ok]
|
|
690
|
+
if missing:
|
|
691
|
+
out["status"] = f"missing_collections:{','.join(missing)}"
|
|
692
|
+
out["ok"] = False
|
|
693
|
+
return out
|
|
694
|
+
|
|
695
|
+
# 2. Synthetic embed → insert → search in `chats`
|
|
696
|
+
sentinel_id = "__healthcheck__sentinel"
|
|
697
|
+
sentinel_text = "healthcheck sentinel — L5 embed-write-search round-trip verifier"
|
|
698
|
+
try:
|
|
699
|
+
t0 = _time.time()
|
|
700
|
+
embs = _embed_post([sentinel_text])
|
|
701
|
+
out["embed_ms"] = round((_time.time() - t0) * 1000.0, 1)
|
|
702
|
+
if not embs or embs[0] is None:
|
|
703
|
+
out["status"] = "embed_failed"
|
|
704
|
+
out["ok"] = False
|
|
705
|
+
return out
|
|
706
|
+
|
|
707
|
+
t1 = _time.time()
|
|
708
|
+
from datetime import datetime as _dt, timezone as _tz
|
|
709
|
+
client.upsert(collection_name="chats", data=[{
|
|
710
|
+
"id": sentinel_id,
|
|
711
|
+
"vector": embs[0],
|
|
712
|
+
"text": sentinel_text,
|
|
713
|
+
"source": "healthcheck",
|
|
714
|
+
"channel": "__healthcheck__",
|
|
715
|
+
"contact": "",
|
|
716
|
+
"timestamp": _dt.now(_tz.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
|
717
|
+
"arena": "__healthcheck__",
|
|
718
|
+
}])
|
|
719
|
+
out["write_ms"] = round((_time.time() - t1) * 1000.0, 1)
|
|
720
|
+
|
|
721
|
+
t2 = _time.time()
|
|
722
|
+
hits = client.search(
|
|
723
|
+
collection_name="chats",
|
|
724
|
+
data=[embs[0]],
|
|
725
|
+
limit=1,
|
|
726
|
+
filter='arena == "__healthcheck__"',
|
|
727
|
+
output_fields=["id"],
|
|
728
|
+
)
|
|
729
|
+
out["search_ms"] = round((_time.time() - t2) * 1000.0, 1)
|
|
730
|
+
found = bool(hits and hits[0] and hits[0][0].get("entity", {}).get("id") == sentinel_id)
|
|
731
|
+
out["hit"] = found
|
|
732
|
+
if not found:
|
|
733
|
+
out["status"] = "sentinel_missing"
|
|
734
|
+
out["ok"] = False
|
|
735
|
+
except Exception as exc:
|
|
736
|
+
out["status"] = f"probe_error: {type(exc).__name__}: {exc}"
|
|
737
|
+
out["ok"] = False
|
|
738
|
+
return out
|
|
739
|
+
|
|
661
740
|
print(f"\n L5 Communications Layer — http://127.0.0.1:{port}")
|
|
662
741
|
uvicorn.run(api, host=os.environ.get("HOST","127.0.0.1"), port=port, log_level="warning")
|
|
663
742
|
|
|
@@ -1,8 +1,25 @@
|
|
|
1
1
|
FROM python:3.12-slim
|
|
2
2
|
WORKDIR /app
|
|
3
|
-
|
|
4
|
-
|
|
3
|
+
# gcc/g++ needed by some sentence-transformers transitive deps; curl kept
|
|
4
|
+
# for in-container debugging.
|
|
5
|
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
6
|
+
gcc g++ curl \
|
|
7
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
8
|
+
# Core deps + sentence-transformers/torch for the cross-encoder reranker.
|
|
9
|
+
# Torch CPU wheel is enough — the reranker is small (MiniLM L-6) and
|
|
10
|
+
# CPU-bound throughput is fine at L6's request volume. Without these,
|
|
11
|
+
# get_reranker() falls back to RRF-only, capping recall ranking quality.
|
|
12
|
+
RUN pip install --no-cache-dir \
|
|
13
|
+
fastapi "uvicorn[standard]" httpx "pymilvus[milvus_lite]" "setuptools<70" pydantic spacy \
|
|
14
|
+
"sentence-transformers" \
|
|
15
|
+
"torch" --extra-index-url https://download.pytorch.org/whl/cpu
|
|
5
16
|
RUN python -m spacy download en_core_web_sm
|
|
17
|
+
ENV HF_HOME=/data/.cache/huggingface
|
|
18
|
+
# Pre-download the cross-encoder so cold-start doesn't pay first-pull
|
|
19
|
+
# latency. The model is small (~80MB) and gets cached at /data — survives
|
|
20
|
+
# container recreates since /data is a volume mount.
|
|
21
|
+
RUN mkdir -p /data/.cache/huggingface && \
|
|
22
|
+
python -c "from sentence_transformers import CrossEncoder; CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2', cache_folder='/data/.cache/huggingface')"
|
|
6
23
|
# Shared embed_provider module (build context is engine/services).
|
|
7
24
|
COPY _shared /app/_shared
|
|
8
25
|
COPY l6/l6-document-store.py /app/server.py
|
|
@@ -838,6 +838,74 @@ def serve(port: int = DEFAULT_PORT):
|
|
|
838
838
|
def api_health():
|
|
839
839
|
return health()
|
|
840
840
|
|
|
841
|
+
@api.get("/health/deep")
|
|
842
|
+
def api_health_deep():
|
|
843
|
+
"""Real functional probe: embed → insert via /index-batch path →
|
|
844
|
+
search the sentinel via hybrid search → assert reranker loaded.
|
|
845
|
+
|
|
846
|
+
Built to catch the v0.8.0–0.8.2 L6 _embed_client shadowing bug
|
|
847
|
+
and its kind (request-handler-level breakage with the layer
|
|
848
|
+
process appearing healthy)."""
|
|
849
|
+
import time as _time
|
|
850
|
+
sentinel_id = "__healthcheck__sentinel"
|
|
851
|
+
sentinel_text = "healthcheck sentinel — L6 embed-write-search round-trip verifier"
|
|
852
|
+
out = {"status": "ok", "ok": True}
|
|
853
|
+
try:
|
|
854
|
+
t0 = _time.time()
|
|
855
|
+
try:
|
|
856
|
+
emb = embed_text(sentinel_text)
|
|
857
|
+
except Exception as exc:
|
|
858
|
+
out["status"] = f"embed_failed: {type(exc).__name__}: {exc}"
|
|
859
|
+
out["ok"] = False
|
|
860
|
+
return out
|
|
861
|
+
out["embed_ms"] = round((_time.time() - t0) * 1000.0, 1)
|
|
862
|
+
|
|
863
|
+
# Insert via the same path real ingest uses, so the probe
|
|
864
|
+
# actually exercises /index-batch's code.
|
|
865
|
+
t1 = _time.time()
|
|
866
|
+
import httpx as _httpx
|
|
867
|
+
r = _httpx.post(
|
|
868
|
+
f"http://localhost:{DEFAULT_PORT}/index-batch",
|
|
869
|
+
json={
|
|
870
|
+
"arena": "__healthcheck__",
|
|
871
|
+
"records": [{"id": sentinel_id, "text": sentinel_text}],
|
|
872
|
+
},
|
|
873
|
+
timeout=15.0,
|
|
874
|
+
)
|
|
875
|
+
out["write_ms"] = round((_time.time() - t1) * 1000.0, 1)
|
|
876
|
+
if r.status_code != 200:
|
|
877
|
+
out["status"] = f"write_failed: http {r.status_code}"
|
|
878
|
+
out["ok"] = False
|
|
879
|
+
return out
|
|
880
|
+
|
|
881
|
+
t2 = _time.time()
|
|
882
|
+
results = search(
|
|
883
|
+
sentinel_text, method="hybrid", limit=3,
|
|
884
|
+
arena="__healthcheck__", enable_rerank=False,
|
|
885
|
+
)
|
|
886
|
+
out["search_ms"] = round((_time.time() - t2) * 1000.0, 1)
|
|
887
|
+
# L6 search hits don't expose a stable `id` field — match by
|
|
888
|
+
# text content (verbatim — the sentinel was just written and
|
|
889
|
+
# the chunking layer doesn't split it) against the arena that
|
|
890
|
+
# the search was already filtered to.
|
|
891
|
+
hit = any(
|
|
892
|
+
sentinel_text in (r.get("text") or "")
|
|
893
|
+
for r in (results or [])
|
|
894
|
+
)
|
|
895
|
+
out["hit"] = hit
|
|
896
|
+
if not hit:
|
|
897
|
+
out["status"] = "sentinel_missing"
|
|
898
|
+
out["ok"] = False
|
|
899
|
+
|
|
900
|
+
# Reranker check — informational. Failure here doesn't flip
|
|
901
|
+
# ok=False because L6 falls back to RRF and still serves
|
|
902
|
+
# results; it just caps the recall ranking quality.
|
|
903
|
+
out["reranker"] = "ok" if get_reranker() is not None else "rrf_fallback"
|
|
904
|
+
except Exception as exc:
|
|
905
|
+
out["status"] = f"probe_error: {type(exc).__name__}: {exc}"
|
|
906
|
+
out["ok"] = False
|
|
907
|
+
return out
|
|
908
|
+
|
|
841
909
|
@api.get("/stats")
|
|
842
910
|
def api_stats():
|
|
843
911
|
return get_stats()
|