@pentatonic-ai/ai-agent-sdk 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/README.md +170 -69
  2. package/bin/__tests__/callback-server.test.js +4 -1
  3. package/bin/cli.js +41 -164
  4. package/bin/commands/config.js +251 -0
  5. package/package.json +2 -1
  6. package/packages/doctor/__tests__/detect.test.js +2 -6
  7. package/packages/doctor/src/checks/local-memory.js +164 -196
  8. package/packages/doctor/src/detect.js +11 -3
  9. package/packages/memory/src/corpus/adapters.js +104 -0
  10. package/packages/memory/src/corpus/cli.js +72 -7
  11. package/packages/memory/src/corpus/index.js +1 -1
  12. package/packages/memory-engine/.env.example +13 -0
  13. package/packages/memory-engine/README.md +131 -0
  14. package/packages/memory-engine/bench/README.md +99 -0
  15. package/packages/memory-engine/bench/scorecards-engine/agent-coding__pentatonic-baseline__20260427-142523.json +1115 -0
  16. package/packages/memory-engine/bench/scorecards-engine/chat-recall__pentatonic-baseline__20260427-142648.json +819 -0
  17. package/packages/memory-engine/bench/scorecards-engine/circular-economy__pentatonic-baseline__20260427-142757.json +1278 -0
  18. package/packages/memory-engine/bench/scorecards-engine/customer-support__pentatonic-baseline__20260427-142900.json +1018 -0
  19. package/packages/memory-engine/bench/scorecards-engine/marketplace-ops__pentatonic-baseline__20260427-142957.json +1038 -0
  20. package/packages/memory-engine/bench/scorecards-engine/product-catalogue__pentatonic-baseline__20260427-143122.json +961 -0
  21. package/packages/memory-engine/bench/scorecards-engine-via-docker/agent-coding__pentatonic-memory__20260427-161812.json +1115 -0
  22. package/packages/memory-engine/bench/scorecards-engine-via-docker/chat-recall__pentatonic-memory__20260427-161701.json +819 -0
  23. package/packages/memory-engine/bench/scorecards-engine-via-docker/circular-economy__pentatonic-memory__20260427-161713.json +1278 -0
  24. package/packages/memory-engine/bench/scorecards-engine-via-docker/customer-support__pentatonic-memory__20260427-161723.json +1018 -0
  25. package/packages/memory-engine/bench/scorecards-engine-via-docker/marketplace-ops__pentatonic-memory__20260427-161732.json +1038 -0
  26. package/packages/memory-engine/bench/scorecards-engine-via-docker/product-catalogue__pentatonic-memory__20260427-161741.json +937 -0
  27. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/agent-coding__pentatonic-memory__20260427-184718.json +1115 -0
  28. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/chat-recall__pentatonic-memory__20260427-184614.json +819 -0
  29. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/circular-economy__pentatonic-memory__20260427-184809.json +1278 -0
  30. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/customer-support__pentatonic-memory__20260427-184854.json +1018 -0
  31. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/marketplace-ops__pentatonic-memory__20260427-184929.json +1038 -0
  32. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/product-catalogue__pentatonic-memory__20260427-185015.json +961 -0
  33. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/agent-coding__pentatonic-memory__20260427-175252.json +1115 -0
  34. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/chat-recall__pentatonic-memory__20260427-175312.json +819 -0
  35. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/circular-economy__pentatonic-memory__20260427-175335.json +1278 -0
  36. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/customer-support__pentatonic-memory__20260427-175355.json +1018 -0
  37. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/marketplace-ops__pentatonic-memory__20260427-175413.json +1038 -0
  38. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/product-catalogue__pentatonic-memory__20260427-175430.json +883 -0
  39. package/packages/memory-engine/bench/scorecards-engine-via-shim/agent-coding__pentatonic-memory__20260427-155409.json +1115 -0
  40. package/packages/memory-engine/bench/scorecards-engine-via-shim/chat-recall__pentatonic-memory__20260427-155421.json +819 -0
  41. package/packages/memory-engine/bench/scorecards-engine-via-shim/circular-economy__pentatonic-memory__20260427-155433.json +1278 -0
  42. package/packages/memory-engine/bench/scorecards-engine-via-shim/customer-support__pentatonic-memory__20260427-155443.json +1018 -0
  43. package/packages/memory-engine/bench/scorecards-engine-via-shim/marketplace-ops__pentatonic-memory__20260427-155453.json +1038 -0
  44. package/packages/memory-engine/bench/scorecards-engine-via-shim/product-catalogue__pentatonic-memory__20260427-155503.json +937 -0
  45. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory-latest__20260427-145103.json +1115 -0
  46. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory__20260427-144909.json +1115 -0
  47. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory-latest__20260427-145153.json +819 -0
  48. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory__20260427-145120.json +542 -0
  49. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory-latest__20260427-145313.json +1278 -0
  50. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory__20260427-145207.json +894 -0
  51. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory-latest__20260427-145412.json +1018 -0
  52. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory__20260427-145327.json +680 -0
  53. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory-latest__20260427-145517.json +1038 -0
  54. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory__20260427-145422.json +693 -0
  55. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory-latest__20260427-145616.json +961 -0
  56. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory__20260427-145528.json +727 -0
  57. package/packages/memory-engine/compat/Dockerfile +11 -0
  58. package/packages/memory-engine/compat/server.py +680 -0
  59. package/packages/memory-engine/docker-compose.yml +243 -0
  60. package/packages/memory-engine/docs/MIGRATION.md +178 -0
  61. package/packages/memory-engine/docs/RUNBOOK-AWS.md +375 -0
  62. package/packages/memory-engine/docs/why-v05-underperforms.md +138 -0
  63. package/packages/memory-engine/engine/README.md +52 -0
  64. package/packages/memory-engine/engine/l2-hybridrag-proxy.py +1543 -0
  65. package/packages/memory-engine/engine/l5-comms-layer.py +663 -0
  66. package/packages/memory-engine/engine/l6-document-store.py +1018 -0
  67. package/packages/memory-engine/engine/services/l2/Dockerfile +41 -0
  68. package/packages/memory-engine/engine/services/l2/init_databases.py +81 -0
  69. package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py +1543 -0
  70. package/packages/memory-engine/engine/services/l4/Dockerfile +15 -0
  71. package/packages/memory-engine/engine/services/l4/server.py +235 -0
  72. package/packages/memory-engine/engine/services/l5/Dockerfile +9 -0
  73. package/packages/memory-engine/engine/services/l5/l5-comms-layer.py +678 -0
  74. package/packages/memory-engine/engine/services/l6/Dockerfile +11 -0
  75. package/packages/memory-engine/engine/services/l6/l6-document-store.py +1016 -0
  76. package/packages/memory-engine/engine/services/nv-embed/Dockerfile +28 -0
  77. package/packages/memory-engine/engine/services/nv-embed/server.py +152 -0
  78. package/packages/memory-engine/pme_memory/__init__.py +0 -0
  79. package/packages/memory-engine/pme_memory/__main__.py +129 -0
  80. package/packages/memory-engine/pme_memory/artifacts.py +95 -0
  81. package/packages/memory-engine/pme_memory/embed.py +74 -0
  82. package/packages/memory-engine/pme_memory/health.py +36 -0
  83. package/packages/memory-engine/pme_memory/hygiene.py +159 -0
  84. package/packages/memory-engine/pme_memory/indexer.py +200 -0
  85. package/packages/memory-engine/pme_memory/needs.py +55 -0
  86. package/packages/memory-engine/pme_memory/provenance.py +80 -0
  87. package/packages/memory-engine/pme_memory/scoring.py +168 -0
  88. package/packages/memory-engine/pme_memory/search.py +52 -0
  89. package/packages/memory-engine/pme_memory/store.py +86 -0
  90. package/packages/memory-engine/pme_memory/synthesis.py +114 -0
  91. package/packages/memory-engine/pyproject.toml +65 -0
  92. package/packages/memory-engine/scripts/kg-extractor.py +557 -0
  93. package/packages/memory-engine/scripts/kg-preflexor-v2.py +738 -0
  94. package/packages/memory-engine/tests/test_api_contract.sh +57 -0
@@ -0,0 +1,1543 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Sequential HybridRAG Proxy — Graph-First Architecture
4
+
5
+ Based on BlackRock-NVIDIA paper methodology:
6
+ 1. HybridRAG trigger (early detection)
7
+ 2. Neo4j graph search (entities/relationships first)
8
+ 3. QMD vector search (informed by graph context)
9
+ 4. Result fusion with graph-priority scoring
10
+
11
+ Port: 8031 (replaces neo4j-qmd-proxy.py)
12
+ """
13
+
14
+ import argparse
15
+ import hashlib
16
+ import json
17
+ import logging
18
+ import os
19
+ import sqlite3
20
+ import time
21
+ from datetime import datetime
22
+ from pathlib import Path
23
+ from typing import Any, Dict, List, Optional, Set
24
+
25
+ import re
26
+ import requests
27
+ from fastapi import FastAPI, HTTPException, Request
28
+ from neo4j import GraphDatabase
29
+ from neo4j.time import DateTime as Neo4jDateTime, Date as Neo4jDate
30
+ from pydantic import BaseModel
31
+ import uvicorn
32
+
33
+
34
+ def _serialize_neo4j_value(v: Any) -> Any:
35
+ """Convert neo4j-specific types to JSON-serialisable equivalents.
36
+
37
+ Without this, FastAPI's pydantic serialiser raises
38
+ ``PydanticSerializationError: Unable to serialize unknown type:
39
+ <class 'neo4j.time.DateTime'>`` when L3 graph results include nodes
40
+ with datetime properties (e.g. created_at on entity nodes).
41
+ """
42
+ if isinstance(v, (Neo4jDateTime, Neo4jDate)):
43
+ return v.iso_format()
44
+ if isinstance(v, dict):
45
+ return {k: _serialize_neo4j_value(x) for k, x in v.items()}
46
+ if isinstance(v, (list, tuple)):
47
+ return [_serialize_neo4j_value(x) for x in v]
48
+ return v
49
+
50
+ # ---------------------------------------------------------------------------
51
+ # Config
52
+ # ---------------------------------------------------------------------------
53
+
54
+ WORKSPACE = Path(os.environ.get("PME_WORKSPACE", Path.home() / "pentatonic"))
55
+
56
+ NEO4J_URI = os.environ.get("PME_NEO4J_URI", "bolt://localhost:7687")
57
+
58
+ def _load_neo4j_password() -> str:
59
+ """Resolve Neo4j password from env or secrets file."""
60
+ pw = os.environ.get("PME_NEO4J_PASSWORD", os.environ.get("NEO4J_PASSWORD"))
61
+ if pw:
62
+ return pw
63
+ for path in [
64
+ WORKSPACE / ".secrets.json",
65
+ Path.home() / ".pentatonic" / "workspace" / ".secrets.json",
66
+ Path.home() / ".pentatonic" / "secrets.json",
67
+ ]:
68
+ if path.exists():
69
+ try:
70
+ data = json.loads(path.read_text())
71
+ for key in ("neo4j_password", "NEO4J_PASSWORD", "neo4jPassword"):
72
+ if key in data:
73
+ return data[key]
74
+ except (json.JSONDecodeError, OSError):
75
+ continue
76
+ return "password" # fallback default
77
+
78
+ NEO4J_AUTH = ("neo4j", _load_neo4j_password())
79
+ def _resolve_qmd_db() -> str:
80
+ """Resolve QMD DB path, checking common locations."""
81
+ env = os.environ.get("PME_QMD_DB")
82
+ if env:
83
+ return env
84
+ for candidate in [
85
+ Path.home() / ".pentatonic" / "memory" / "main.sqlite",
86
+ Path.home() / ".openclaw" / "memory" / "main.sqlite",
87
+ ]:
88
+ if candidate.exists():
89
+ return str(candidate)
90
+ return str(Path.home() / ".pentatonic" / "memory" / "main.sqlite")
91
+
92
+ QMD_DB_PATH = _resolve_qmd_db()
93
+ OLLAMA_URL = os.environ.get("PME_OLLAMA_URL", "http://localhost:11434/api/embeddings")
94
+ EMBEDDING_MODEL = os.environ.get("PME_EMBED_MODEL", "nomic-embed-text")
95
+
96
+ # NV-Embed-v2 service (primary, 4096-dim)
97
+ NV_EMBED_URL = os.environ.get("PME_NV_EMBED_URL", "http://localhost:8041/v1/embeddings")
98
+ NV_EMBED_ENABLED = os.environ.get("PME_NV_EMBED_ENABLED", "true").lower() == "true"
99
+
100
+ # Sequential processing weights - OPTIMIZED FOR QUALITY
101
+ GRAPH_PRIORITY_BOOST = 0.5 # Extra score for graph-derived results (↑ for better entity/relationship context)
102
+ VECTOR_BASE_WEIGHT = 0.5 # Base weight for vector results (↓ balanced for accuracy over speed)
103
+
104
+ # Memory tracking
105
+ TRACKER_FILE = WORKSPACE / "memory" / "memory-tracker.jsonl"
106
+
107
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
108
+ log = logging.getLogger("sequential-hybridrag")
109
+
110
+ app = FastAPI(title="Sequential HybridRAG Proxy", version="1.0.0")
111
+
112
+ # ---------------------------------------------------------------------------
113
+ # Memory Usage Tracking
114
+ # ---------------------------------------------------------------------------
115
+
116
+ def log_memory_usage(query: str, layers_hit: List[str], l1_hits: int = 0,
117
+ l3_hits: int = 0, l4_hits: int = 0, search_time_ms: float = 0.0,
118
+ entities_extracted: List[str] = None) -> None:
119
+ """Log memory layer usage for evolution tracking."""
120
+ try:
121
+ TRACKER_FILE.parent.mkdir(parents=True, exist_ok=True)
122
+
123
+ # L2 HybridRAG is active whenever L3 or L4 are used
124
+ l2_active = 1 if (l3_hits > 0 or l4_hits > 0) else 0
125
+
126
+ event = {
127
+ "timestamp": datetime.utcnow().isoformat() + "Z",
128
+ "query": query[:100], # Truncate for privacy
129
+ "source": "sequential-hybridrag",
130
+ "layers_hit": layers_hit,
131
+ "l1_system_hits": l1_hits,
132
+ "l2_hybridrag_active": l2_active,
133
+ "l3_graph_hits": l3_hits,
134
+ "l4_vector_hits": l4_hits,
135
+ "total_hits": l1_hits + l3_hits + l4_hits,
136
+ "search_time_ms": round(search_time_ms, 1),
137
+ "entities_extracted": entities_extracted or [],
138
+ "entity_count": len(entities_extracted or [])
139
+ }
140
+
141
+ with open(TRACKER_FILE, "a", encoding="utf-8") as f:
142
+ f.write(json.dumps(event) + "\n")
143
+
144
+ except Exception as e:
145
+ log.warning(f"Memory tracking failed: {e}")
146
+
147
+ def get_layer_stats() -> Dict:
148
+ """Get usage statistics by layer."""
149
+ if not TRACKER_FILE.exists():
150
+ return {"error": "No tracking data found"}
151
+
152
+ try:
153
+ layer_counts = {"L1": 0, "L2": 0, "L3": 0, "L4": 0}
154
+ total_queries = 0
155
+ recent_queries = 0
156
+
157
+ with open(TRACKER_FILE, "r", encoding="utf-8") as f:
158
+ for line in f:
159
+ try:
160
+ event = json.loads(line.strip())
161
+ if event.get("source") == "sequential-hybridrag":
162
+ total_queries += 1
163
+
164
+ # Count layer hits
165
+ layers = event.get("layers_hit", [])
166
+ if "system" in layers or event.get("l1_system_hits", 0) > 0:
167
+ layer_counts["L1"] += 1
168
+
169
+ # L2 HybridRAG orchestration
170
+ if event.get("l2_hybridrag_active", 0) > 0:
171
+ layer_counts["L2"] += 1
172
+
173
+ if "graph" in layers or event.get("l3_graph_hits", 0) > 0:
174
+ layer_counts["L3"] += 1
175
+ if "vector" in layers or event.get("l4_vector_hits", 0) > 0:
176
+ layer_counts["L4"] += 1
177
+
178
+ # Count recent (last 24h)
179
+ event_time = datetime.fromisoformat(event["timestamp"].replace("Z", "+00:00"))
180
+ if (datetime.now(event_time.tzinfo) - event_time).days < 1:
181
+ recent_queries += 1
182
+ except Exception as e:
183
+ logging.debug(f"Suppressed: {e}")
184
+
185
+ return {
186
+ "total_queries": total_queries,
187
+ "recent_24h": recent_queries,
188
+ "layer_usage": layer_counts,
189
+ "layer_percentages": {
190
+ f"L{i}": round(count / max(total_queries, 1) * 100, 1)
191
+ for i, count in enumerate(layer_counts.values(), 1)
192
+ } if total_queries > 0 else {}
193
+ }
194
+
195
+ except Exception as e:
196
+ return {"error": str(e)}
197
+
198
+ # ---------------------------------------------------------------------------
199
+ # Models
200
+ # ---------------------------------------------------------------------------
201
+
202
+ class ChatMessage(BaseModel):
203
+ role: str
204
+ content: str
205
+
206
+ class ChatCompletionRequest(BaseModel):
207
+ messages: List[ChatMessage]
208
+ model: str = "gpt-3.5-turbo"
209
+ max_tokens: int = 1000
210
+ temperature: float = 0.1
211
+
212
+ class EmbeddingRequest(BaseModel):
213
+ input: Any
214
+ model: str = EMBEDDING_MODEL
215
+
216
+ # ---------------------------------------------------------------------------
217
+ # HybridRAG Processing Functions
218
+ # ---------------------------------------------------------------------------
219
+
220
+ def extract_query_entities(query: str) -> List[str]:
221
+ """Extract potential entities from query (early detection)."""
222
+ import re
223
+ # Strip punctuation from words
224
+ words = [re.sub(r'[^\w\s-]', '', w).strip() for w in query.split()]
225
+ words = [w for w in words if w]
226
+ potential_entities = []
227
+ stop_words = {'what', 'who', 'where', 'when', 'how', 'does', 'did', 'the', 'and', 'for', 'with', 'from', 'about', 'this', 'that'}
228
+
229
+ # Look for capitalized words (proper nouns)
230
+ for word in words:
231
+ if word.istitle() and len(word) > 2 and word.lower() not in stop_words:
232
+ potential_entities.append(word)
233
+
234
+ # Look for multi-word entities (title case phrases)
235
+ for i in range(len(words) - 1):
236
+ if words[i].istitle() and words[i+1].istitle() and words[i].lower() not in stop_words:
237
+ potential_entities.append(f"{words[i]} {words[i+1]}")
238
+
239
+ log.info(f"Extracted entities: {potential_entities}")
240
+ return potential_entities
241
+
242
+ def _hebbian_strengthen(session, node_names: List[str], increment: float = 0.05) -> None:
243
+ """Hebbian: strengthen edges between co-accessed nodes during query."""
244
+ if len(node_names) < 2:
245
+ return
246
+ now = datetime.utcnow().isoformat() + "Z"
247
+ for i, n1 in enumerate(node_names):
248
+ for n2 in node_names[i+1:]:
249
+ try:
250
+ session.run(
251
+ """MATCH (a {name: $n1})-[r]-(b {name: $n2})
252
+ SET r.weight = coalesce(r.weight, 1.0) + $inc,
253
+ r.last_accessed = $now""",
254
+ n1=n1, n2=n2, inc=increment, now=now
255
+ )
256
+ except Exception:
257
+ pass # non-critical
258
+
259
+
260
+ def search_neo4j_sequential(query: str, entities: List[str], limit: int = 12) -> Dict:
261
+ """Phase 1: Neo4j graph search with spreading activation + Hebbian."""
262
+ try:
263
+ driver = GraphDatabase.driver(NEO4J_URI, auth=NEO4J_AUTH)
264
+ results = []
265
+ graph_entities = set()
266
+
267
+ with driver.session() as session:
268
+ # Search for specific entities — use weighted spreading activation
269
+ for entity in entities:
270
+ # Direct match first
271
+ cypher = """
272
+ MATCH (n)
273
+ WHERE n.name CONTAINS $entity
274
+ OPTIONAL MATCH (n)-[r]-(connected)
275
+ WHERE coalesce(r.weight, 1.0) >= 0.2
276
+ RETURN n, r, connected, $entity as search_entity,
277
+ coalesce(r.weight, 1.0) AS edge_weight
278
+ ORDER BY edge_weight DESC
279
+ LIMIT $limit
280
+ """
281
+
282
+ records = session.run(cypher, entity=entity, limit=8)
283
+
284
+ for record in records:
285
+ node = _serialize_neo4j_value(dict(record["n"]))
286
+ rel = record["r"]
287
+ connected = record["connected"]
288
+ search_entity = record["search_entity"]
289
+ edge_weight = record["edge_weight"]
290
+
291
+ context = f"Entity: {node.get('name', 'Unknown')} (type: {node.get('type', 'Unknown')})"
292
+ if rel and connected:
293
+ rel_type = type(rel).__name__ if rel else "CONNECTED_TO"
294
+ connected_dict = _serialize_neo4j_value(dict(connected)) if connected else {}
295
+ context += f" → {rel_type} → {connected_dict.get('name', 'Unknown')}"
296
+ if edge_weight != 1.0:
297
+ context += f" [weight: {edge_weight:.2f}]"
298
+
299
+ if 'source_file' in node:
300
+ context += f"\nSource: {node['source_file']}"
301
+
302
+ name = node.get('name', '')
303
+ graph_entities.add(name)
304
+
305
+ # Score boosted by edge weight (spreading activation)
306
+ score = min(0.95, 0.7 + (edge_weight * 0.1))
307
+
308
+ results.append({
309
+ "path": f"neo4j://entity/{search_entity}",
310
+ "text": context,
311
+ "score": score,
312
+ "source": "graph",
313
+ "entity": search_entity,
314
+ "node_data": node
315
+ })
316
+
317
+ # 2-hop spreading activation for high-weight paths
318
+ if entity:
319
+ activation_results = session.run("""
320
+ MATCH (start)-[r1]-(mid)-[r2]-(end)
321
+ WHERE start.name CONTAINS $entity
322
+ AND coalesce(r1.weight, 1.0) >= 0.5
323
+ AND coalesce(r2.weight, 1.0) >= 0.5
324
+ AND start <> end
325
+ RETURN end,
326
+ coalesce(r1.weight, 1.0) * coalesce(r2.weight, 1.0) AS activation,
327
+ mid.name AS via
328
+ ORDER BY activation DESC
329
+ LIMIT 5
330
+ """, entity=entity)
331
+
332
+ for rec in activation_results:
333
+ end_node = _serialize_neo4j_value(dict(rec["end"])) if rec["end"] else {}
334
+ name = end_node.get("name", "")
335
+ if name and name not in graph_entities:
336
+ graph_entities.add(name)
337
+ results.append({
338
+ "path": f"neo4j://activation/{entity}/{name}",
339
+ "text": f"Activated: {name} (via {rec['via']}, activation: {rec['activation']:.3f})",
340
+ "score": min(0.85, 0.5 + rec["activation"] * 0.05),
341
+ "source": "graph",
342
+ "entity": entity,
343
+ "node_data": end_node
344
+ })
345
+
346
+ # General query search if no specific entities found
347
+ if not results:
348
+ general_words = [w for w in query.split() if len(w) > 3 and w.lower() not in ['what', 'who', 'where', 'when', 'how']]
349
+
350
+ for word in general_words[:2]:
351
+ cypher = """
352
+ MATCH (n)
353
+ WHERE ANY(prop IN keys(n) WHERE n[prop] IS :: STRING AND n[prop] CONTAINS $term)
354
+ OPTIONAL MATCH (n)-[r]-(connected)
355
+ RETURN n, r, connected
356
+ LIMIT $limit
357
+ """
358
+
359
+ records = session.run(cypher, term=word, limit=4)
360
+
361
+ for record in records:
362
+ node = _serialize_neo4j_value(dict(record["n"]))
363
+ context = f"Related: {node}"
364
+ graph_entities.add(node.get('name', ''))
365
+
366
+ results.append({
367
+ "path": f"neo4j://search/{word}",
368
+ "text": context,
369
+ "score": 0.7,
370
+ "source": "graph",
371
+ "entity": word,
372
+ "node_data": node
373
+ })
374
+
375
+ # Hebbian: strengthen edges between all accessed entities
376
+ _hebbian_strengthen(session, list(graph_entities))
377
+
378
+ driver.close()
379
+
380
+ return {
381
+ "results": results[:limit],
382
+ "graph_entities": list(graph_entities),
383
+ "entity_count": len(graph_entities)
384
+ }
385
+
386
+ except Exception as e:
387
+ log.error(f"Neo4j search failed: {e}")
388
+ return {"results": [], "graph_entities": [], "entity_count": 0}
389
+
390
+ def get_embedding(text: str) -> List[float]:
391
+ """Get embedding — tries NV-Embed-v2 (4096-dim) first, falls back to Ollama."""
392
+ # Try NV-Embed-v2 service first
393
+ if NV_EMBED_ENABLED:
394
+ try:
395
+ r = requests.post(NV_EMBED_URL, json={"input": text}, timeout=30)
396
+ r.raise_for_status()
397
+ return r.json()["data"][0]["embedding"]
398
+ except Exception as e:
399
+ log.warning(f"NV-Embed-v2 failed, falling back to Ollama: {e}")
400
+
401
+ # Fallback to Ollama
402
+ try:
403
+ r = requests.post(OLLAMA_URL, json={"model": EMBEDDING_MODEL, "prompt": text}, timeout=30)
404
+ r.raise_for_status()
405
+ return r.json()["embedding"]
406
+ except Exception as e:
407
+ log.error(f"Embedding failed (both NV-Embed-v2 and Ollama): {e}")
408
+ return []
409
+
410
+
411
+ # ---------------------------------------------------------------------------
412
+ # HyDE — Hypothetical Document Embeddings
413
+ # ---------------------------------------------------------------------------
414
+
415
+ HYDE_MODEL = os.environ.get("PME_HYDE_MODEL", "qwen2.5:7b")
416
+ HYDE_ENABLED = os.environ.get("PME_HYDE_ENABLED", "true").lower() == "true"
417
+
418
+ def hyde_expand(query: str) -> str:
419
+ """Generate a hypothetical answer to the query, then concatenate with the
420
+ original query for richer vector search embeddings.
421
+ Uses a small local LLM via Ollama /api/generate."""
422
+ if not HYDE_ENABLED:
423
+ return query
424
+ try:
425
+ r = requests.post(
426
+ "http://localhost:11434/api/generate",
427
+ json={
428
+ "model": HYDE_MODEL,
429
+ "prompt": f"Answer this question in one concise sentence:\n{query}",
430
+ "stream": False,
431
+ "options": {"temperature": 0.3, "num_predict": 60},
432
+ },
433
+ timeout=15,
434
+ )
435
+ r.raise_for_status()
436
+ hypo = r.json().get("response", "").strip()
437
+ if hypo:
438
+ log.info(f"HyDE expansion: '{query}' → +'{hypo[:80]}…'")
439
+ return f"{query} {hypo}"
440
+ except Exception as e:
441
+ log.warning(f"HyDE expansion failed (falling back to raw query): {e}")
442
+ return query
443
+
444
+
445
+ # ---------------------------------------------------------------------------
446
+ # Cross-Encoder Reranking
447
+ # ---------------------------------------------------------------------------
448
+
449
+ RERANK_ENABLED = os.environ.get("PME_RERANK_ENABLED", "true").lower() == "true"
450
+ RERANK_WINDOW = int(os.environ.get("PME_RERANK_WINDOW", "5"))
451
+
452
+ def _cosine_sim(a: List[float], b: List[float]) -> float:
453
+ """Cosine similarity between two vectors (handles mismatched dims)."""
454
+ if len(a) != len(b):
455
+ # Truncate to shorter length — still meaningful for cosine
456
+ min_len = min(len(a), len(b))
457
+ a, b = a[:min_len], b[:min_len]
458
+ dot = sum(x * y for x, y in zip(a, b))
459
+ na = sum(x * x for x in a) ** 0.5
460
+ nb = sum(x * x for x in b) ** 0.5
461
+ return dot / (na * nb) if na and nb else 0.0
462
+
463
+ def cross_encoder_rerank(query: str, results: List[Dict], top_k: int = 16) -> List[Dict]:
464
+ """Re-embed the query and top-N result texts, then sort by cosine
465
+ similarity. Acts as a lightweight cross-encoder reranker without needing
466
+ a dedicated reranking model."""
467
+ if not RERANK_ENABLED or len(results) <= top_k:
468
+ return results
469
+
470
+ query_emb = get_embedding(query)
471
+ if not query_emb:
472
+ return results[:top_k]
473
+
474
+ window = results[:RERANK_WINDOW]
475
+ scored = []
476
+ for r in window:
477
+ # Get text from 'text' or 'content' field, fallback to path
478
+ text_content = r.get("text") or r.get("content") or r.get("path", "")
479
+ if not text_content:
480
+ scored.append(r)
481
+ continue
482
+ r_emb = get_embedding(text_content[:512])
483
+ if r_emb:
484
+ sim = _cosine_sim(query_emb, r_emb)
485
+ # Blend original score (layer priority) with cosine similarity
486
+ blended = 0.6 * r["score"] + 0.4 * sim
487
+ scored.append({**r, "score": round(blended, 4), "_rerank_sim": round(sim, 4)})
488
+ else:
489
+ scored.append(r)
490
+
491
+ scored.sort(key=lambda x: x["score"], reverse=True)
492
+ # Append remaining results (outside rerank window) unchanged
493
+ remaining = results[RERANK_WINDOW:]
494
+ return scored[:top_k] + remaining
495
+
496
+ def search_qmd_informed(query: str, graph_context: Dict, limit: int = 12) -> List[Dict]:
497
+ """Phase 2: QMD vector search informed by graph results."""
498
+ if not os.path.exists(QMD_DB_PATH):
499
+ return []
500
+
501
+ query_embedding = get_embedding(query)
502
+ if not query_embedding:
503
+ return []
504
+
505
+ # Enhance query with graph entities for better vector search
506
+ enhanced_query = query
507
+ if graph_context["graph_entities"]:
508
+ enhanced_query += " " + " ".join(graph_context["graph_entities"][:3])
509
+
510
+ enhanced_embedding = get_embedding(enhanced_query)
511
+ if not enhanced_embedding:
512
+ enhanced_embedding = query_embedding
513
+
514
+ try:
515
+ conn = sqlite3.connect(QMD_DB_PATH, timeout=5)
516
+ conn.row_factory = sqlite3.Row
517
+
518
+ # Get vectors and compute similarity
519
+ rows = conn.execute("""
520
+ SELECT id, path, text, embedding
521
+ FROM chunks
522
+ WHERE embedding IS NOT NULL
523
+ ORDER BY id
524
+ LIMIT 2000
525
+ """).fetchall()
526
+
527
+ results = []
528
+ for row in rows:
529
+ try:
530
+ # Deserialize embedding
531
+ embedding_data = row["embedding"]
532
+ if isinstance(embedding_data, str):
533
+ embedding = json.loads(embedding_data)
534
+ else:
535
+ embedding = list(embedding_data)
536
+
537
+ # Cosine similarity with enhanced query
538
+ dot = sum(a * b for a, b in zip(enhanced_embedding, embedding))
539
+ norm_q = sum(x * x for x in enhanced_embedding) ** 0.5
540
+ norm_e = sum(x * x for x in embedding) ** 0.5
541
+
542
+ if norm_q > 0 and norm_e > 0:
543
+ similarity = dot / (norm_q * norm_e)
544
+
545
+ # Boost score if path contains graph entities
546
+ entity_boost = 0
547
+ path_lower = row["path"].lower()
548
+ for entity in graph_context["graph_entities"]:
549
+ if entity.lower() in path_lower or entity.lower() in row["text"].lower():
550
+ entity_boost = GRAPH_PRIORITY_BOOST
551
+ break
552
+
553
+ final_score = (similarity * VECTOR_BASE_WEIGHT) + entity_boost
554
+
555
+ if similarity > 0.2: # Threshold for inclusion
556
+ results.append({
557
+ "path": row["path"],
558
+ "text": row["text"][:600],
559
+ "score": final_score,
560
+ "source": "vector",
561
+ "base_similarity": similarity,
562
+ "entity_boost": entity_boost
563
+ })
564
+ except Exception as e:
565
+ logging.debug(f"Suppressed: {e}")
566
+
567
+ conn.close()
568
+ results.sort(key=lambda x: x["score"], reverse=True)
569
+ return results[:limit]
570
+
571
+ except Exception as e:
572
+ log.error(f"QMD search failed: {e}")
573
+ return []
574
+
575
+ def search_core_memory_files(query: str, limit: int = 8) -> List[Dict]:
576
+ """L1 System Files - Section-aware search with synonym expansion (v2)."""
577
+ core_files = [
578
+ "MEMORY.md",
579
+ "plans.md",
580
+ "SESSION-STATE.md",
581
+ "SOUL.md",
582
+ "USER.md",
583
+ f"memory/daily/{time.strftime('%Y-%m-%d')}.md", # Today's notes
584
+ f"memory/daily/{time.strftime('%Y-%m-%d', time.localtime(time.time() - 86400))}.md" # Yesterday
585
+ ]
586
+
587
+ # Synonym expansion for common query terms
588
+ SYNONYMS = {
589
+ 'birthday': ['birthday', 'born', 'birth', 'date'],
590
+ 'car': ['car', 'vehicle', 'inster', 'hyundai'],
591
+ 'wife': ['wife', 'spouse', 'partner', 'family'],
592
+ 'budget': ['budget', 'cap', 'cost', 'monthly', 'spending'],
593
+ 'port': ['port', 'listen', 'service', 'endpoint'],
594
+ 'neo4j': ['neo4j', 'graph', 'knowledge'],
595
+ 'tes': ['tes', 'thing', 'event', 'system'], # example: add domain-specific terms
596
+ 'phone': ['phone', 'mobile', 'number', 'contact'],
597
+ 'password': ['password', 'secret', 'credential'],
598
+ 'home': ['home', 'address', 'live', 'residence'],
599
+ 'hobbies': ['hobbies', 'hobby', 'interests', 'leisure'],
600
+ 'hobby': ['hobbies', 'hobby', 'interests', 'leisure'],
601
+ 'interests': ['hobbies', 'hobby', 'interests', 'leisure'],
602
+ 'patent': ['patent', 'deadline', 'filing'], # example: legal terms
603
+ 'deadline': ['deadline', 'patent', 'due', 'expiry'],
604
+ 'dimensions': ['dimensions', 'embed', 'vector', 'dim', '4096'],
605
+ 'embed': ['embed', 'embedding', 'dimensions', 'vector', 'nv'],
606
+ }
607
+
608
+ stop_words = {'what', 'where', 'when', 'how', 'who', 'which', 'does', 'the',
609
+ 'is', 'are', 'was', 'were', 'for', 'and', 'with', 'has', 'have'}
610
+
611
+ query_lower = query.lower()
612
+ raw_keywords = [w for w in re.findall(r'\w+', query_lower) if len(w) > 2 and w not in stop_words]
613
+
614
+ # Expand with synonyms
615
+ expanded = set(raw_keywords)
616
+ for k in raw_keywords:
617
+ if k in SYNONYMS:
618
+ expanded.update(SYNONYMS[k])
619
+ keywords = list(expanded)
620
+
621
+ results = []
622
+
623
+ for file_path in core_files:
624
+ try:
625
+ full_path = WORKSPACE / file_path
626
+ if not full_path.exists():
627
+ continue
628
+ content = full_path.read_text(encoding='utf-8', errors='ignore')
629
+
630
+ # Split into sections by ## headers for granular matching
631
+ sections = re.split(r'\n(?=##\s)', content)
632
+
633
+ for section in sections:
634
+ lines = section.strip().split('\n')
635
+ header = lines[0] if lines and lines[0].startswith('#') else ""
636
+ section_lower = section.lower()
637
+ header_lower = header.lower()
638
+
639
+ # Score 1: Header keyword match (3x weight — header match is very precise)
640
+ header_score = sum(k in header_lower for k in keywords) * 3
641
+
642
+ # Score 2: Content keyword hits
643
+ content_score = sum(section_lower.count(k) for k in keywords)
644
+
645
+ # Score 3: Exact answer patterns (IPs, ports, dates, reg plates)
646
+ exact_patterns = re.findall(
647
+ r'\b(?:\d{1,3}\.){3}\d{1,3}\b' # IP addresses
648
+ r'|\b\d{4,5}\b' # port numbers
649
+ r'|[A-Z]{2}\d{2}[A-Z]{3}' # UK reg plates
650
+ r'|\b\d{1,2}\s+(?:January|February|March|April|May|June|July|'
651
+ r'August|September|October|November|December)\b', # dates
652
+ section
653
+ )
654
+ exact_score = len(exact_patterns)
655
+
656
+ total_score = header_score + content_score + exact_score
657
+
658
+ if total_score > 0:
659
+ results.append({
660
+ "path": file_path,
661
+ "text": section[:600],
662
+ "score": 1.0, # HIGHEST PRIORITY
663
+ "source": "core_memory",
664
+ "keyword_hits": total_score,
665
+ "file_type": "core",
666
+ "layer": "L1_system",
667
+ })
668
+
669
+ except Exception as e:
670
+ log.warning(f"Failed to read core file {file_path}: {e}")
671
+ continue
672
+
673
+ # Sort by total score, deduplicate by path+header prefix
674
+ results.sort(key=lambda x: x["keyword_hits"], reverse=True)
675
+ seen = set()
676
+ deduped = []
677
+ for r in results:
678
+ key = r["text"][:50]
679
+ if key not in seen:
680
+ seen.add(key)
681
+ deduped.append(r)
682
+
683
+ return deduped[:limit]
684
+
685
+ def extract_relevant_snippet(content: str, keywords: List[str], max_chars: int = 400) -> str:
686
+ """Extract most relevant snippet from content around keywords."""
687
+ content_lower = content.lower()
688
+ best_pos = 0
689
+ best_score = 0
690
+
691
+ # Find position with highest keyword density
692
+ for i in range(0, len(content), 100):
693
+ window = content_lower[i:i + max_chars]
694
+ score = sum(window.count(keyword) for keyword in keywords)
695
+ if score > best_score:
696
+ best_score = score
697
+ best_pos = i
698
+
699
+ # Extract snippet around best position
700
+ start = max(0, best_pos - 50)
701
+ end = min(len(content), best_pos + max_chars + 50)
702
+ snippet = content[start:end].strip()
703
+
704
+ # Clean up to sentence boundaries if possible
705
+ if '. ' in snippet:
706
+ sentences = snippet.split('. ')
707
+ # Keep middle sentences (most likely to be complete)
708
+ if len(sentences) > 2:
709
+ snippet = '. '.join(sentences[1:-1]) + '.'
710
+
711
+ return snippet
712
+
713
+ # ---------------------------------------------------------------------------
714
+ # L0: Native BM25 Workspace Memory Search
715
+ # ---------------------------------------------------------------------------
716
+
717
+ L0_MEMORY_DB = Path(os.environ.get(
718
+ "PME_MEMORY_DB",
719
+ str(Path.home() / ".pentatonic" / "memory" / "main.sqlite"),
720
+ ))
721
+
722
+ def search_l0_bm25(query: str, limit: int = 6) -> List[Dict]:
723
+ """Search native BM25 index over workspace memory files.
724
+
725
+ Covers chunks from daily notes, memory files, people profiles,
726
+ infrastructure docs, project files — corpus that L3-L6 don't index.
727
+ Sub-millisecond local SQLite reads, zero network overhead.
728
+ """
729
+ if not L0_MEMORY_DB.exists():
730
+ return []
731
+ try:
732
+ # Tokenize query for FTS5 match
733
+ tokens = query.lower().split()
734
+ meaningful = [t for t in tokens if len(t) > 2 and t not in {
735
+ "the", "and", "for", "with", "that", "this", "from", "what",
736
+ "how", "does", "have", "has", "are", "was", "were", "been",
737
+ }]
738
+ if not meaningful:
739
+ return []
740
+ fts_query = " OR ".join(f'"{t}"' for t in meaningful)
741
+
742
+ conn = sqlite3.connect(str(L0_MEMORY_DB), timeout=2)
743
+ conn.execute("PRAGMA journal_mode=WAL")
744
+ rows = conn.execute("""
745
+ SELECT path, text, bm25(chunks_fts) as rank
746
+ FROM chunks_fts
747
+ WHERE chunks_fts MATCH ?
748
+ AND path NOT LIKE '%/snapshots/%'
749
+ AND path NOT LIKE '%/archive/%'
750
+ AND path NOT LIKE '%-backup-%'
751
+ ORDER BY rank ASC
752
+ LIMIT ?
753
+ """, (fts_query, limit * 2)).fetchall()
754
+ conn.close()
755
+
756
+ results = []
757
+ seen_paths = set()
758
+ for path, text, rank in rows:
759
+ if path in seen_paths:
760
+ continue
761
+ seen_paths.add(path)
762
+ relevance = -rank if rank < 0 else 0.001
763
+ score = min(relevance / (1 + relevance) * 0.85, 0.75)
764
+ results.append({
765
+ "path": f"L0/{path}",
766
+ "snippet": text[:500],
767
+ "score": round(score, 4),
768
+ "layer": "L0_workspace_bm25",
769
+ "source": path,
770
+ })
771
+ if len(results) >= limit:
772
+ break
773
+ return results
774
+ except Exception as e:
775
+ log.debug(f"L0 BM25 search error: {e}")
776
+ return []
777
+
778
+
779
+ # ---------------------------------------------------------------------------
780
+ # L5: Communications Context Search
781
+ # ---------------------------------------------------------------------------
782
+
783
+ L5_API_URL = os.environ.get("PME_L5_URL", "http://127.0.0.1:8034")
784
+
785
+ def search_l5_communications(query: str, limit: int = 6) -> List[Dict]:
786
+ """Search L5 Communications Context via L5 API (emails, chats, calendar)."""
787
+ try:
788
+ resp = requests.get(
789
+ f"{L5_API_URL}/search",
790
+ params={"q": query, "limit": limit},
791
+ timeout=10,
792
+ )
793
+ if resp.status_code != 200:
794
+ return []
795
+ data = resp.json()
796
+ results = []
797
+ for hit in data.get("results", []):
798
+ source = hit.get("source", "")
799
+ score = hit.get("score", 0)
800
+ # Scale Milvus cosine similarity to HybridRAG range
801
+ # Milvus returns 0.4-0.7 for relevant results; boost to compete with other layers
802
+ scaled_score = round(min((score - 0.3) * 2.0 + 0.5, 0.82), 4)
803
+ if scaled_score < 0.4:
804
+ continue # skip low relevance
805
+ contact = hit.get("contact", "")
806
+ channel = hit.get("channel", "")
807
+ path_label = f"L5/{source}"
808
+ if contact:
809
+ path_label = f"L5/{channel}/{contact}"
810
+ results.append({
811
+ "path": path_label,
812
+ "snippet": hit.get("text", "")[:500],
813
+ "score": scaled_score,
814
+ "layer": "L5_communications",
815
+ "source": source,
816
+ "collection": hit.get("collection", ""),
817
+ "timestamp": hit.get("timestamp", ""),
818
+ })
819
+ return results
820
+ except Exception as e:
821
+ log.debug(f"L5 search error: {e}")
822
+ return []
823
+
824
+
825
+ # L6: Document Store Search
826
+ L6_URL = os.environ.get("PME_L6_URL", "http://localhost:8037")
827
+
828
+ def search_l6_documents(query: str, limit: int = 6) -> List[Dict]:
829
+ """Search L6 Document Store (research, legal, financial, project docs)."""
830
+ try:
831
+ resp = requests.get(
832
+ f"{L6_URL}/search",
833
+ params={"q": query, "method": "hybrid", "limit": limit, "rerank": "true"},
834
+ timeout=10,
835
+ )
836
+ if resp.status_code != 200:
837
+ return []
838
+ data = resp.json()
839
+ results = []
840
+ for hit in data.get("results", []):
841
+ source = hit.get("source_file", "")
842
+ heading = hit.get("heading", "")
843
+ arena = hit.get("arena", "")
844
+ rrf = hit.get("rrf_score", 0)
845
+ reranker = hit.get("reranker_score", None)
846
+
847
+ # Scale score: RRF max is ~0.033, normalize to 0.0-0.82 range
848
+ # (below L1's 1.0, comparable to L3/L4/L5)
849
+ score = min(rrf * 25, 0.82)
850
+ if reranker is not None and reranker > 0:
851
+ score = min(score + 0.05, 0.85)
852
+
853
+ path_label = f"L6/{arena}/{Path(source).name}" if arena else f"L6/{Path(source).name}"
854
+ if heading:
855
+ path_label += f": {heading[:50]}"
856
+
857
+ snippet = hit.get("text", "")[:500]
858
+ entities = hit.get("entities", [])
859
+ if entities:
860
+ snippet += f"\n[Entities: {', '.join(entities[:5])}]"
861
+
862
+ results.append({
863
+ "path": f"l6:{Path(source).stem}:{hit.get('chunk_index', 0)}",
864
+ "snippet": snippet,
865
+ "score": round(score, 4),
866
+ "layer": "L6_documents",
867
+ "source": source,
868
+ "arena": arena,
869
+ "doc_type": hit.get("doc_type", ""),
870
+ "engines": hit.get("engines", []),
871
+ })
872
+ return results
873
+ except Exception as e:
874
+ log.debug(f"L6 search error: {e}")
875
+ return []
876
+
877
+
878
+ def sequential_hybridrag_search(query: str, limit: int = 16) -> List[Dict]:
879
+ """Main HybridRAG processing: L0 BM25 → L1 System Files → L2 HybridRAG (L3 Graph + L4 Vector + L5 Comms + L6 Docs)."""
880
+ start_time = time.time()
881
+ log.info(f"Starting sequential HybridRAG search for: '{query}'")
882
+
883
+ # L0: BM25 workspace memory (keyword search — complements semantic layers)
884
+ l0_results = search_l0_bm25(query, limit=6)
885
+ log.info(f"L0 BM25 workspace: {len(l0_results)} results")
886
+
887
+ # L1: System Files (HIGHEST PRIORITY)
888
+ system_results = search_core_memory_files(query, limit=4)
889
+ log.info(f"L1 System files: {len(system_results)} results")
890
+
891
+ # L2: HybridRAG orchestration
892
+ # L3: Graph search (entity extraction + Neo4j)
893
+ entities = extract_query_entities(query)
894
+ graph_context = search_neo4j_sequential(query, entities, limit=8)
895
+ log.info(f"L3 Graph search: {len(graph_context['results'])} results, {graph_context['entity_count']} entities")
896
+
897
+ # HyDE: expand query for better vector embeddings
898
+ hyde_query = hyde_expand(query)
899
+
900
+ # L4: Vector search (informed by L3 graph context + HyDE)
901
+ vector_results = search_qmd_informed(hyde_query, graph_context, limit=8)
902
+ log.info(f"L4 Vector search: {len(vector_results)} results (HyDE={'on' if hyde_query != query else 'off'})")
903
+
904
+ # L5: Communications Context (emails, chats, calendar) — also use HyDE
905
+ l5_results = search_l5_communications(hyde_query, limit=6)
906
+ log.info(f"L5 Communications: {len(l5_results)} results")
907
+
908
+ # L6: Document Store (research, legal, financial, project docs)
909
+ l6_results = search_l6_documents(hyde_query, limit=6)
910
+ log.info(f"L6 Documents: {len(l6_results)} results")
911
+
912
+ # L2: HybridRAG fusion (combines all layers with L1 priority)
913
+ all_results = l0_results + system_results + graph_context["results"] + vector_results + l5_results + l6_results
914
+
915
+ # Remove duplicates by path
916
+ seen_paths = set()
917
+ deduplicated = []
918
+ for result in all_results:
919
+ if result["path"] not in seen_paths:
920
+ deduplicated.append(result)
921
+ seen_paths.add(result["path"])
922
+
923
+ # Sort by layer priority: L1 System (1.0) > L3 Graph (0.9) > L4 Vector (0.7+)
924
+ deduplicated.sort(key=lambda x: x["score"], reverse=True)
925
+
926
+ # Cross-encoder reranking: re-embed top results and blend scores
927
+ deduplicated = cross_encoder_rerank(query, deduplicated, top_k=limit)
928
+
929
+ # Track layer usage for evolution
930
+ search_time_ms = (time.time() - start_time) * 1000
931
+ layers_used = []
932
+ if len(l0_results) > 0:
933
+ layers_used.append("workspace_bm25")
934
+ if len(system_results) > 0:
935
+ layers_used.append("system")
936
+ if len(graph_context["results"]) > 0:
937
+ layers_used.append("graph")
938
+ if len(vector_results) > 0:
939
+ layers_used.append("vector")
940
+ if len(l5_results) > 0:
941
+ layers_used.append("communications")
942
+ if len(l6_results) > 0:
943
+ layers_used.append("documents")
944
+
945
+ log_memory_usage(
946
+ query=query,
947
+ layers_hit=layers_used,
948
+ l1_hits=len(system_results),
949
+ l3_hits=len(graph_context["results"]),
950
+ l4_hits=len(vector_results),
951
+ search_time_ms=search_time_ms,
952
+ entities_extracted=entities
953
+ )
954
+
955
+ log.info(f"L2 HybridRAG final: {len(deduplicated[:limit])} total results")
956
+ return deduplicated[:limit]
957
+
958
+ # ---------------------------------------------------------------------------
959
+ # API Endpoints
960
+ # ---------------------------------------------------------------------------
961
+
962
+ @app.post("/v1/search")
963
+ async def search_endpoint(request: Request) -> dict:
964
+ """Direct L1→L3→L4 cascade search. Returns structured results for programmatic use."""
965
+ try:
966
+ body = await request.json()
967
+ query = body.get("query", "")
968
+ limit = body.get("limit", 16)
969
+ if not query:
970
+ raise HTTPException(status_code=400, detail="query is required")
971
+
972
+ results = sequential_hybridrag_search(query, limit=limit)
973
+
974
+ # Also return raw graph entities for context enrichment
975
+ entities = extract_query_entities(query)
976
+ graph_context = search_neo4j_sequential(query, entities, limit=8)
977
+
978
+ return {
979
+ "results": results,
980
+ "entities": entities,
981
+ "graph_nodes": graph_context.get("entity_count", 0),
982
+ "graph_entities": graph_context.get("graph_entities", []),
983
+ "layers_active": {
984
+ "L1_system": True,
985
+ "L3_graph": True,
986
+ "L4_vector": True,
987
+ "L5_communications": True
988
+ }
989
+ }
990
+ except HTTPException:
991
+ raise
992
+ except Exception as e:
993
+ log.error(f"Search endpoint error: {e}")
994
+ raise HTTPException(status_code=500, detail=str(e))
995
+
996
+
997
+ @app.get("/v1/models")
998
+ async def list_models() -> dict:
999
+ """OpenAI-compatible model listing."""
1000
+ return {
1001
+ "object": "list",
1002
+ "data": [
1003
+ {"id": "gpt-3.5-turbo", "object": "model", "owned_by": "sequential-hybridrag"}
1004
+ ]
1005
+ }
1006
+
1007
+ @app.post("/v1/embeddings")
1008
+ async def create_embeddings(request: EmbeddingRequest) -> dict:
1009
+ """Pass-through to NV-Embed-v2 (4096-dim). Batch-native — forwards the full
1010
+ input list in a single HTTP call instead of looping one-at-a-time."""
1011
+ try:
1012
+ import httpx
1013
+ inputs = [request.input] if isinstance(request.input, str) else request.input
1014
+ async with httpx.AsyncClient(timeout=60) as client:
1015
+ resp = await client.post(
1016
+ NV_EMBED_URL,
1017
+ json={"input": inputs, "model": request.model or "nv-embed-v2"}
1018
+ )
1019
+ resp.raise_for_status()
1020
+ return resp.json()
1021
+ except Exception as e:
1022
+ raise HTTPException(status_code=500, detail=str(e))
1023
+
1024
+ @app.post("/v1/chat/completions")
1025
+ async def chat_completions(request: ChatCompletionRequest) -> dict:
1026
+ """Sequential HybridRAG memory search."""
1027
+ try:
1028
+ # Extract query from last user message
1029
+ user_messages = [m for m in request.messages if m.role == "user"]
1030
+ if not user_messages:
1031
+ raise HTTPException(status_code=400, detail="No user message found")
1032
+
1033
+ query = user_messages[-1].content
1034
+
1035
+ # Perform sequential HybridRAG search
1036
+ start_time = time.time()
1037
+ results = sequential_hybridrag_search(query, limit=16)
1038
+ search_time = time.time() - start_time
1039
+
1040
+ # Format results with correct layer structure
1041
+ context_parts = []
1042
+ system_count = sum(1 for r in results if r["source"] == "core_memory")
1043
+ graph_count = sum(1 for r in results if r["source"] == "graph")
1044
+ vector_count = sum(1 for r in results if r["source"] == "vector")
1045
+
1046
+ context_parts.append(f"# HybridRAG Results (L1 System → L2 HybridRAG → L3 Graph → L4 Vector)")
1047
+ context_parts.append(f"Query: {query}")
1048
+ context_parts.append(f"Results: {system_count} system + {graph_count} graph + {vector_count} vector = {len(results)} total")
1049
+ context_parts.append(f"Search time: {search_time:.3f}s")
1050
+ context_parts.append("")
1051
+
1052
+ current_tier = None
1053
+ for i, result in enumerate(results):
1054
+ # Group by layer for clarity
1055
+ source = result['source']
1056
+ if source != current_tier:
1057
+ if source == "core_memory":
1058
+ context_parts.append(f"## L1 SYSTEM FILES (Highest Priority)")
1059
+ elif source == "graph":
1060
+ context_parts.append(f"## L3 GRAPH SEARCH (via L2 HybridRAG)")
1061
+ elif source == "vector":
1062
+ context_parts.append(f"## L4 VECTOR SEARCH (via L2 HybridRAG)")
1063
+ context_parts.append("")
1064
+ current_tier = source
1065
+
1066
+ context_parts.append(f"**{result['path']}** (score: {result['score']:.3f})")
1067
+ context_parts.append("")
1068
+ context_parts.append(result['text'][:800])
1069
+ context_parts.append("")
1070
+
1071
+ response_content = "\n".join(context_parts) if context_parts else "No relevant context found."
1072
+
1073
+ return {
1074
+ "id": f"seq-hybridrag-{int(time.time())}",
1075
+ "object": "chat.completion",
1076
+ "choices": [{
1077
+ "message": {
1078
+ "role": "assistant",
1079
+ "content": response_content
1080
+ },
1081
+ "index": 0,
1082
+ "finish_reason": "stop"
1083
+ }],
1084
+ "usage": {"total_tokens": len(response_content)},
1085
+ "model": request.model
1086
+ }
1087
+
1088
+ except Exception as e:
1089
+ log.error(f"HybridRAG search failed: {e}")
1090
+ raise HTTPException(status_code=500, detail=str(e))
1091
+
1092
+ @app.get("/contradictions/{node_name}")
1093
+ async def check_contradictions(node_name: str) -> dict:
1094
+ """Detect contradictions around a named node."""
1095
+ try:
1096
+ driver = GraphDatabase.driver(NEO4J_URI, auth=NEO4J_AUTH)
1097
+ contradictions = []
1098
+ with driver.session() as session:
1099
+ # Find the node
1100
+ node = session.run(
1101
+ "MATCH (n) WHERE toLower(n.name) = toLower($name) RETURN elementId(n) AS id", name=node_name
1102
+ ).single()
1103
+ if not node:
1104
+ return {"node": node_name, "contradictions": [], "error": "Node not found"}
1105
+ nid = node["id"]
1106
+
1107
+ # Explicit CONTRADICTS
1108
+ for rec in session.run(
1109
+ """MATCH (a)-[r:CONTRADICTS]-(b) WHERE elementId(a) = $nid
1110
+ RETURN a.name AS a, b.name AS b, r.reason AS reason""", nid=nid
1111
+ ):
1112
+ contradictions.append({"type": "explicit", "a": rec["a"], "b": rec["b"], "reason": rec["reason"]})
1113
+
1114
+ # Property conflicts via shared neighbour
1115
+ for rec in session.run(
1116
+ """MATCH (a)--(shared)--(b)
1117
+ WHERE elementId(a) = $nid AND a <> b
1118
+ WITH a, b, shared, properties(a) AS pa, properties(b) AS pb
1119
+ WITH a, b, shared,
1120
+ [k IN keys(pa) WHERE k IN keys(pb) AND pa[k] <> pb[k]
1121
+ AND NOT k IN ['last_accessed','embedding','created_at','updated_at','id','weight']] AS ck
1122
+ WHERE size(ck) > 0
1123
+ RETURN a.name AS a, b.name AS b, shared.name AS via, ck
1124
+ LIMIT 10""", nid=nid
1125
+ ):
1126
+ contradictions.append({
1127
+ "type": "property_conflict", "a": rec["a"], "b": rec["b"],
1128
+ "via": rec["via"], "conflicting_keys": rec["ck"]
1129
+ })
1130
+ driver.close()
1131
+ return {"node": node_name, "contradictions": contradictions, "count": len(contradictions)}
1132
+ except Exception as e:
1133
+ raise HTTPException(status_code=500, detail=str(e))
1134
+
1135
+ def _check_l5_health() -> bool:
1136
+ """Quick check if L5 Communications API is responding."""
1137
+ try:
1138
+ resp = requests.get(f"{L5_API_URL}/health", timeout=3)
1139
+ return resp.status_code == 200
1140
+ except Exception:
1141
+ return False
1142
+
1143
+ def _check_l6_health() -> bool:
1144
+ """Quick check if L6 Document Store is responding."""
1145
+ try:
1146
+ resp = requests.get(f"{L6_URL}/health", timeout=3)
1147
+ return resp.status_code == 200 and resp.json().get("status") in ("ok", "degraded")
1148
+ except Exception:
1149
+ return False
1150
+
1151
+ @app.get("/health")
1152
+ async def health() -> dict:
1153
+ """System health check."""
1154
+ qmd_healthy = os.path.exists(QMD_DB_PATH)
1155
+
1156
+ neo4j_healthy = False
1157
+ try:
1158
+ driver = GraphDatabase.driver(NEO4J_URI, auth=NEO4J_AUTH)
1159
+ with driver.session() as session:
1160
+ session.run("RETURN 1")
1161
+ neo4j_healthy = True
1162
+ driver.close()
1163
+ except Exception as e:
1164
+ logging.debug(f"Suppressed: {e}")
1165
+
1166
+ ollama_healthy = False
1167
+ try:
1168
+ r = requests.get("http://localhost:11434/api/tags", timeout=5)
1169
+ ollama_healthy = r.status_code == 200
1170
+ except Exception as e:
1171
+ logging.debug(f"Suppressed: {e}")
1172
+
1173
+ return {
1174
+ "proxy": "healthy",
1175
+ "architecture": "sequential-hybridrag-proper-layers",
1176
+ "layers": {
1177
+ "L0_workspace_bm25": {"status": "healthy" if L0_MEMORY_DB.exists() else "unavailable", "backend": "sqlite-fts5"},
1178
+ "L1_system_files": {"status": "healthy", "description": "MEMORY.md, plans.md, daily notes"},
1179
+ "L2_hybridrag": {"status": "healthy", "description": "Orchestrates L3+L4 fusion"},
1180
+ "L3_graph_search": {"status": "healthy" if neo4j_healthy else "unavailable", "backend": "neo4j"},
1181
+ "L4_vector_search": {"status": "healthy" if qmd_healthy else "unavailable", "backend": "qmd+ollama"},
1182
+ "L5_communications": {"status": "healthy" if _check_l5_health() else "unavailable", "backend": "sqlite+ollama"},
1183
+ "L6_document_store": {"status": "healthy" if _check_l6_health() else "unavailable", "backend": "milvus+fts5+reranker", "port": 8037},
1184
+ "ollama_embeddings": {"status": "healthy" if ollama_healthy else "unavailable"}
1185
+ }
1186
+ }
1187
+
1188
+ @app.get("/stats")
1189
+ async def layer_statistics() -> Any:
1190
+ """Memory layer usage statistics for evolution tracking."""
1191
+ return get_layer_stats()
1192
+
1193
+ # ---------------------------------------------------------------------------
1194
+ # Internal write endpoints (L0 BM25 + L4 QMD + L3 KG)
1195
+ #
1196
+ # These let the compat shim populate the three layers that L2 reads from
1197
+ # directly (rather than via HTTP sidecars). Without these the L2-via-shim
1198
+ # path runs with empty L0/L4-qmd/L3 and RRF fusion is polluted by zero-
1199
+ # result rank lists.
1200
+ # ---------------------------------------------------------------------------
1201
+
1202
+ _ENTITY_STOP = {
1203
+ 'what', 'who', 'where', 'when', 'how', 'does', 'did', 'the', 'and',
1204
+ 'for', 'with', 'from', 'about', 'this', 'that', 'have', 'has', 'are',
1205
+ 'was', 'were', 'been', 'will', 'would', 'could', 'should', 'into',
1206
+ }
1207
+
1208
+ def _extract_entities_for_kg(text: str, max_entities: int = 32) -> List[str]:
1209
+ """Mirror of extract_query_entities, but applied to ingest content.
1210
+
1211
+ Picks single-word title-case tokens + bigrams of consecutive title-case
1212
+ tokens. Same heuristic as query-side so node names and search terms line
1213
+ up. Caps at max_entities to keep ingest fast.
1214
+ """
1215
+ if not text:
1216
+ return []
1217
+ words = [re.sub(r'[^\w\s-]', '', w).strip() for w in text.split()]
1218
+ words = [w for w in words if w]
1219
+ found: List[str] = []
1220
+ seen: Set[str] = set()
1221
+ # Single-word title-case
1222
+ for w in words:
1223
+ if w.istitle() and len(w) > 2 and w.lower() not in _ENTITY_STOP:
1224
+ key = w.lower()
1225
+ if key not in seen:
1226
+ found.append(w)
1227
+ seen.add(key)
1228
+ # Bigrams of consecutive title-case
1229
+ for i in range(len(words) - 1):
1230
+ a, b = words[i], words[i + 1]
1231
+ if (a.istitle() and b.istitle()
1232
+ and a.lower() not in _ENTITY_STOP
1233
+ and b.lower() not in _ENTITY_STOP
1234
+ and len(a) > 1 and len(b) > 1):
1235
+ phrase = f"{a} {b}"
1236
+ key = phrase.lower()
1237
+ if key not in seen:
1238
+ found.append(phrase)
1239
+ seen.add(key)
1240
+ if len(found) >= max_entities:
1241
+ break
1242
+ return found[:max_entities]
1243
+
1244
+
1245
+ def _embed_batch_local(texts: List[str]) -> List[List[float]]:
1246
+ """Batch embed via NV-Embed. Returns vectors in input order."""
1247
+ if not texts:
1248
+ return []
1249
+ try:
1250
+ r = requests.post(NV_EMBED_URL,
1251
+ json={"input": texts, "model": "nv-embed-v2"},
1252
+ timeout=120)
1253
+ r.raise_for_status()
1254
+ data = r.json().get("data", [])
1255
+ # NV-Embed returns [{embedding: [...]}, ...]
1256
+ return [d["embedding"] for d in data]
1257
+ except Exception as e:
1258
+ log.warning(f"NV-Embed batch failed: {e}; trying singletons")
1259
+ return [get_embedding(t) for t in texts]
1260
+
1261
+
1262
+ class IndexInternalBatchRequest(BaseModel):
1263
+ records: List[Dict[str, Any]] # [{"id": str, "content": str, "metadata": dict}, ...]
1264
+ arena: Optional[str] = "general"
1265
+
1266
+
1267
+ @app.post("/index-internal-batch")
1268
+ async def index_internal_batch(req: IndexInternalBatchRequest) -> dict:
1269
+ """Populate L0 BM25 + L4 QMD vec + L3 Neo4j KG from one ingest call.
1270
+
1271
+ Called by the compat shim on /store-batch so the L2 7-layer fusion
1272
+ has real data in every layer (not just L5/L6). Sequential within the
1273
+ handler since all three writes are local (SQLite + bolt to a sibling
1274
+ container).
1275
+ """
1276
+ t0 = time.time()
1277
+ records = [r for r in (req.records or [])
1278
+ if (r.get("content") or r.get("text"))]
1279
+ if not records:
1280
+ return {"status": "ok", "inserted": 0, "l0": 0, "l4_qmd": 0, "l3_entities": 0,
1281
+ "l3_chunks": 0, "duration_ms": 0.0}
1282
+
1283
+ # Normalise
1284
+ norm = []
1285
+ for r in records:
1286
+ content = r.get("content") or r.get("text") or ""
1287
+ rid = (r.get("id")
1288
+ or hashlib.sha1(content.encode()).hexdigest()[:32])
1289
+ meta = r.get("metadata") or {}
1290
+ path = meta.get("path") or meta.get("doc_id") or rid
1291
+ norm.append({"id": str(rid), "content": content, "path": str(path),
1292
+ "metadata": meta})
1293
+
1294
+ now_iso = datetime.utcnow().isoformat() + "Z"
1295
+ arena = req.arena or "general"
1296
+
1297
+ # ---- L0 BM25 (workspace.db) -----------------------------------------
1298
+ l0_inserted = 0
1299
+ try:
1300
+ l0_db = Path(os.environ.get("PME_MEMORY_DB", str(L0_MEMORY_DB)))
1301
+ l0_db.parent.mkdir(parents=True, exist_ok=True)
1302
+ conn = sqlite3.connect(str(l0_db), timeout=10)
1303
+ conn.execute("PRAGMA journal_mode=WAL")
1304
+ # Schema is created by init_databases.py at container start, but be
1305
+ # defensive in case L2 is run standalone.
1306
+ conn.execute("""
1307
+ CREATE TABLE IF NOT EXISTS chunks (
1308
+ id INTEGER PRIMARY KEY,
1309
+ path TEXT,
1310
+ text TEXT,
1311
+ file_type TEXT,
1312
+ chunk_index INTEGER,
1313
+ created_at TEXT
1314
+ )
1315
+ """)
1316
+ conn.execute("""
1317
+ CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
1318
+ path, text, file_type,
1319
+ content='chunks',
1320
+ content_rowid='id'
1321
+ )
1322
+ """)
1323
+ conn.execute("""
1324
+ CREATE TRIGGER IF NOT EXISTS chunks_ai AFTER INSERT ON chunks BEGIN
1325
+ INSERT INTO chunks_fts(rowid, path, text, file_type)
1326
+ VALUES (new.id, new.path, new.text, new.file_type);
1327
+ END
1328
+ """)
1329
+ for n in norm:
1330
+ cur = conn.execute(
1331
+ "INSERT INTO chunks (path, text, file_type, chunk_index, created_at) VALUES (?, ?, ?, ?, ?)",
1332
+ (f"bench/{arena}/{n['path']}.md", n["content"], "md", 0, now_iso),
1333
+ )
1334
+ if cur.rowcount > 0:
1335
+ l0_inserted += 1
1336
+ conn.commit()
1337
+ conn.close()
1338
+ except Exception as e:
1339
+ log.error(f"L0 BM25 write failed: {e}")
1340
+
1341
+ # ---- L4 QMD vec (qmd.sqlite) ----------------------------------------
1342
+ l4_inserted = 0
1343
+ try:
1344
+ embeddings = _embed_batch_local([n["content"] for n in norm])
1345
+ if len(embeddings) != len(norm):
1346
+ log.warning(f"L4 embed count mismatch: {len(embeddings)} != {len(norm)}")
1347
+ qmd_db = Path(QMD_DB_PATH)
1348
+ qmd_db.parent.mkdir(parents=True, exist_ok=True)
1349
+ conn = sqlite3.connect(str(qmd_db), timeout=10)
1350
+ conn.execute("PRAGMA journal_mode=WAL")
1351
+ conn.execute("""
1352
+ CREATE TABLE IF NOT EXISTS chunks (
1353
+ id INTEGER PRIMARY KEY,
1354
+ path TEXT,
1355
+ text TEXT,
1356
+ embedding TEXT,
1357
+ embedding_model TEXT,
1358
+ embedding_dim INTEGER,
1359
+ chunk_index INTEGER,
1360
+ created_at TEXT
1361
+ )
1362
+ """)
1363
+ for n, vec in zip(norm, embeddings):
1364
+ if not vec:
1365
+ continue
1366
+ conn.execute(
1367
+ "INSERT INTO chunks (path, text, embedding, embedding_model, embedding_dim, chunk_index, created_at) VALUES (?, ?, ?, ?, ?, ?, ?)",
1368
+ (f"bench/{arena}/{n['path']}.md", n["content"],
1369
+ json.dumps(vec), "nv-embed-v2", len(vec), 0, now_iso),
1370
+ )
1371
+ l4_inserted += 1
1372
+ conn.commit()
1373
+ conn.close()
1374
+ except Exception as e:
1375
+ log.error(f"L4 QMD write failed: {e}")
1376
+
1377
+ # ---- L3 Neo4j KG ----------------------------------------------------
1378
+ l3_entities = 0
1379
+ l3_chunks = 0
1380
+ try:
1381
+ driver = GraphDatabase.driver(NEO4J_URI, auth=NEO4J_AUTH)
1382
+ with driver.session() as session:
1383
+ # Index for fast lookup (idempotent)
1384
+ try:
1385
+ session.run("CREATE INDEX entity_name IF NOT EXISTS FOR (n:Entity) ON (n.name)")
1386
+ session.run("CREATE INDEX chunk_id IF NOT EXISTS FOR (c:Chunk) ON (c.id)")
1387
+ except Exception:
1388
+ pass
1389
+ for n in norm:
1390
+ entities = _extract_entities_for_kg(n["content"])
1391
+ if not entities:
1392
+ continue
1393
+ # Create the chunk node
1394
+ session.run(
1395
+ """
1396
+ MERGE (c:Chunk {id: $cid})
1397
+ SET c.text = $text,
1398
+ c.path = $path,
1399
+ c.source_file = $path,
1400
+ c.arena = $arena,
1401
+ c.created_at = $now
1402
+ """,
1403
+ cid=n["id"], text=n["content"][:2000], path=n["path"],
1404
+ arena=arena, now=now_iso,
1405
+ )
1406
+ l3_chunks += 1
1407
+ # Create/MERGE entities and MENTIONS edge
1408
+ for ent in entities:
1409
+ session.run(
1410
+ """
1411
+ MERGE (e:Entity {name: $name})
1412
+ ON CREATE SET e.type = 'Concept',
1413
+ e.created_at = $now,
1414
+ e.weight = 1.0
1415
+ WITH e
1416
+ MATCH (c:Chunk {id: $cid})
1417
+ MERGE (e)-[r:MENTIONS]->(c)
1418
+ ON CREATE SET r.weight = 1.0, r.created_at = $now
1419
+ ON MATCH SET r.weight = coalesce(r.weight, 1.0) + 0.1
1420
+ """,
1421
+ name=ent, cid=n["id"], now=now_iso,
1422
+ )
1423
+ l3_entities += 1
1424
+ # Create entity-entity co-occurrence edges (within this chunk)
1425
+ # so spreading activation has structure to walk.
1426
+ if len(entities) >= 2:
1427
+ for i in range(len(entities)):
1428
+ for j in range(i + 1, len(entities)):
1429
+ session.run(
1430
+ """
1431
+ MATCH (a:Entity {name: $a})
1432
+ MATCH (b:Entity {name: $b})
1433
+ MERGE (a)-[r:CO_OCCURS]->(b)
1434
+ ON CREATE SET r.weight = 0.5, r.created_at = $now
1435
+ ON MATCH SET r.weight = coalesce(r.weight, 0.5) + 0.05
1436
+ """,
1437
+ a=entities[i], b=entities[j], now=now_iso,
1438
+ )
1439
+ driver.close()
1440
+ except Exception as e:
1441
+ log.error(f"L3 KG write failed: {e}")
1442
+
1443
+ dur_ms = (time.time() - t0) * 1000.0
1444
+ return {
1445
+ "status": "ok",
1446
+ "inserted": len(norm),
1447
+ "l0": l0_inserted,
1448
+ "l4_qmd": l4_inserted,
1449
+ "l3_entities": l3_entities,
1450
+ "l3_chunks": l3_chunks,
1451
+ "duration_ms": round(dur_ms, 1),
1452
+ }
1453
+
1454
+
1455
+ @app.post("/forget-internal")
1456
+ async def forget_internal(request: Request) -> dict:
1457
+ """Wipe L0 + L4-qmd + L3. Used by bench harness to reset between runs."""
1458
+ try:
1459
+ body = await request.json()
1460
+ except Exception:
1461
+ body = {}
1462
+ arena = body.get("arena") # optional scoping
1463
+ deleted = {"l0": 0, "l4_qmd": 0, "l3_entities": 0, "l3_chunks": 0}
1464
+ try:
1465
+ l0_db = Path(os.environ.get("PME_MEMORY_DB", str(L0_MEMORY_DB)))
1466
+ if l0_db.exists():
1467
+ conn = sqlite3.connect(str(l0_db), timeout=5)
1468
+ cur = conn.execute("DELETE FROM chunks")
1469
+ deleted["l0"] = cur.rowcount
1470
+ try:
1471
+ conn.execute("INSERT INTO chunks_fts(chunks_fts) VALUES('rebuild')")
1472
+ except Exception:
1473
+ pass
1474
+ conn.commit(); conn.close()
1475
+ except Exception as e:
1476
+ log.error(f"L0 forget failed: {e}")
1477
+ try:
1478
+ if Path(QMD_DB_PATH).exists():
1479
+ conn = sqlite3.connect(QMD_DB_PATH, timeout=5)
1480
+ cur = conn.execute("DELETE FROM chunks")
1481
+ deleted["l4_qmd"] = cur.rowcount
1482
+ conn.commit(); conn.close()
1483
+ except Exception as e:
1484
+ log.error(f"L4 QMD forget failed: {e}")
1485
+ try:
1486
+ driver = GraphDatabase.driver(NEO4J_URI, auth=NEO4J_AUTH)
1487
+ with driver.session() as session:
1488
+ r1 = session.run("MATCH (c:Chunk) DETACH DELETE c RETURN count(c) AS n")
1489
+ deleted["l3_chunks"] = r1.single()["n"]
1490
+ r2 = session.run("MATCH (e:Entity) DETACH DELETE e RETURN count(e) AS n")
1491
+ deleted["l3_entities"] = r2.single()["n"]
1492
+ driver.close()
1493
+ except Exception as e:
1494
+ log.error(f"L3 forget failed: {e}")
1495
+ return {"status": "ok", "deleted": deleted, "arena": arena}
1496
+
1497
+
1498
+ @app.get("/index-internal-stats")
1499
+ async def index_internal_stats() -> dict:
1500
+ """Quick sanity check that the L0/L4-qmd/L3 stores are populated."""
1501
+ out = {"l0_chunks": 0, "l4_qmd_chunks": 0,
1502
+ "l3_chunks": 0, "l3_entities": 0}
1503
+ try:
1504
+ l0_db = Path(os.environ.get("PME_MEMORY_DB", str(L0_MEMORY_DB)))
1505
+ if l0_db.exists():
1506
+ conn = sqlite3.connect(str(l0_db), timeout=5)
1507
+ r = conn.execute("SELECT COUNT(*) FROM chunks").fetchone()
1508
+ out["l0_chunks"] = r[0] if r else 0
1509
+ conn.close()
1510
+ except Exception as e:
1511
+ out["l0_error"] = str(e)
1512
+ try:
1513
+ if Path(QMD_DB_PATH).exists():
1514
+ conn = sqlite3.connect(QMD_DB_PATH, timeout=5)
1515
+ r = conn.execute("SELECT COUNT(*) FROM chunks WHERE embedding IS NOT NULL").fetchone()
1516
+ out["l4_qmd_chunks"] = r[0] if r else 0
1517
+ conn.close()
1518
+ except Exception as e:
1519
+ out["l4_qmd_error"] = str(e)
1520
+ try:
1521
+ driver = GraphDatabase.driver(NEO4J_URI, auth=NEO4J_AUTH)
1522
+ with driver.session() as session:
1523
+ r = session.run("MATCH (c:Chunk) RETURN count(c) AS n").single()
1524
+ out["l3_chunks"] = r["n"] if r else 0
1525
+ r = session.run("MATCH (e:Entity) RETURN count(e) AS n").single()
1526
+ out["l3_entities"] = r["n"] if r else 0
1527
+ driver.close()
1528
+ except Exception as e:
1529
+ out["l3_error"] = str(e)
1530
+ return out
1531
+
1532
+ # ---------------------------------------------------------------------------
1533
+ # Main
1534
+ # ---------------------------------------------------------------------------
1535
+
1536
+ if __name__ == "__main__":
1537
+ parser = argparse.ArgumentParser()
1538
+ parser.add_argument("--port", type=int, default=8031)
1539
+ parser.add_argument("--host", type=str, default="127.0.0.1")
1540
+ args = parser.parse_args()
1541
+
1542
+ log.info(f"Starting Sequential HybridRAG Proxy (L1 System → L2 HybridRAG → L3 Graph → L4 Vector) on {args.host}:{args.port}")
1543
+ uvicorn.run(app, host=args.host, port=args.port)