@pentatonic-ai/ai-agent-sdk 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/README.md +170 -69
  2. package/bin/__tests__/callback-server.test.js +4 -1
  3. package/bin/cli.js +41 -164
  4. package/bin/commands/config.js +251 -0
  5. package/package.json +2 -1
  6. package/packages/doctor/__tests__/detect.test.js +2 -6
  7. package/packages/doctor/src/checks/local-memory.js +164 -196
  8. package/packages/doctor/src/detect.js +11 -3
  9. package/packages/memory/src/corpus/adapters.js +104 -0
  10. package/packages/memory/src/corpus/cli.js +72 -7
  11. package/packages/memory/src/corpus/index.js +1 -1
  12. package/packages/memory-engine/.env.example +13 -0
  13. package/packages/memory-engine/README.md +131 -0
  14. package/packages/memory-engine/bench/README.md +99 -0
  15. package/packages/memory-engine/bench/scorecards-engine/agent-coding__pentatonic-baseline__20260427-142523.json +1115 -0
  16. package/packages/memory-engine/bench/scorecards-engine/chat-recall__pentatonic-baseline__20260427-142648.json +819 -0
  17. package/packages/memory-engine/bench/scorecards-engine/circular-economy__pentatonic-baseline__20260427-142757.json +1278 -0
  18. package/packages/memory-engine/bench/scorecards-engine/customer-support__pentatonic-baseline__20260427-142900.json +1018 -0
  19. package/packages/memory-engine/bench/scorecards-engine/marketplace-ops__pentatonic-baseline__20260427-142957.json +1038 -0
  20. package/packages/memory-engine/bench/scorecards-engine/product-catalogue__pentatonic-baseline__20260427-143122.json +961 -0
  21. package/packages/memory-engine/bench/scorecards-engine-via-docker/agent-coding__pentatonic-memory__20260427-161812.json +1115 -0
  22. package/packages/memory-engine/bench/scorecards-engine-via-docker/chat-recall__pentatonic-memory__20260427-161701.json +819 -0
  23. package/packages/memory-engine/bench/scorecards-engine-via-docker/circular-economy__pentatonic-memory__20260427-161713.json +1278 -0
  24. package/packages/memory-engine/bench/scorecards-engine-via-docker/customer-support__pentatonic-memory__20260427-161723.json +1018 -0
  25. package/packages/memory-engine/bench/scorecards-engine-via-docker/marketplace-ops__pentatonic-memory__20260427-161732.json +1038 -0
  26. package/packages/memory-engine/bench/scorecards-engine-via-docker/product-catalogue__pentatonic-memory__20260427-161741.json +937 -0
  27. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/agent-coding__pentatonic-memory__20260427-184718.json +1115 -0
  28. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/chat-recall__pentatonic-memory__20260427-184614.json +819 -0
  29. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/circular-economy__pentatonic-memory__20260427-184809.json +1278 -0
  30. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/customer-support__pentatonic-memory__20260427-184854.json +1018 -0
  31. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/marketplace-ops__pentatonic-memory__20260427-184929.json +1038 -0
  32. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/product-catalogue__pentatonic-memory__20260427-185015.json +961 -0
  33. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/agent-coding__pentatonic-memory__20260427-175252.json +1115 -0
  34. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/chat-recall__pentatonic-memory__20260427-175312.json +819 -0
  35. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/circular-economy__pentatonic-memory__20260427-175335.json +1278 -0
  36. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/customer-support__pentatonic-memory__20260427-175355.json +1018 -0
  37. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/marketplace-ops__pentatonic-memory__20260427-175413.json +1038 -0
  38. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/product-catalogue__pentatonic-memory__20260427-175430.json +883 -0
  39. package/packages/memory-engine/bench/scorecards-engine-via-shim/agent-coding__pentatonic-memory__20260427-155409.json +1115 -0
  40. package/packages/memory-engine/bench/scorecards-engine-via-shim/chat-recall__pentatonic-memory__20260427-155421.json +819 -0
  41. package/packages/memory-engine/bench/scorecards-engine-via-shim/circular-economy__pentatonic-memory__20260427-155433.json +1278 -0
  42. package/packages/memory-engine/bench/scorecards-engine-via-shim/customer-support__pentatonic-memory__20260427-155443.json +1018 -0
  43. package/packages/memory-engine/bench/scorecards-engine-via-shim/marketplace-ops__pentatonic-memory__20260427-155453.json +1038 -0
  44. package/packages/memory-engine/bench/scorecards-engine-via-shim/product-catalogue__pentatonic-memory__20260427-155503.json +937 -0
  45. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory-latest__20260427-145103.json +1115 -0
  46. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory__20260427-144909.json +1115 -0
  47. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory-latest__20260427-145153.json +819 -0
  48. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory__20260427-145120.json +542 -0
  49. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory-latest__20260427-145313.json +1278 -0
  50. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory__20260427-145207.json +894 -0
  51. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory-latest__20260427-145412.json +1018 -0
  52. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory__20260427-145327.json +680 -0
  53. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory-latest__20260427-145517.json +1038 -0
  54. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory__20260427-145422.json +693 -0
  55. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory-latest__20260427-145616.json +961 -0
  56. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory__20260427-145528.json +727 -0
  57. package/packages/memory-engine/compat/Dockerfile +11 -0
  58. package/packages/memory-engine/compat/server.py +680 -0
  59. package/packages/memory-engine/docker-compose.yml +243 -0
  60. package/packages/memory-engine/docs/MIGRATION.md +178 -0
  61. package/packages/memory-engine/docs/RUNBOOK-AWS.md +375 -0
  62. package/packages/memory-engine/docs/why-v05-underperforms.md +138 -0
  63. package/packages/memory-engine/engine/README.md +52 -0
  64. package/packages/memory-engine/engine/l2-hybridrag-proxy.py +1543 -0
  65. package/packages/memory-engine/engine/l5-comms-layer.py +663 -0
  66. package/packages/memory-engine/engine/l6-document-store.py +1018 -0
  67. package/packages/memory-engine/engine/services/l2/Dockerfile +41 -0
  68. package/packages/memory-engine/engine/services/l2/init_databases.py +81 -0
  69. package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py +1543 -0
  70. package/packages/memory-engine/engine/services/l4/Dockerfile +15 -0
  71. package/packages/memory-engine/engine/services/l4/server.py +235 -0
  72. package/packages/memory-engine/engine/services/l5/Dockerfile +9 -0
  73. package/packages/memory-engine/engine/services/l5/l5-comms-layer.py +678 -0
  74. package/packages/memory-engine/engine/services/l6/Dockerfile +11 -0
  75. package/packages/memory-engine/engine/services/l6/l6-document-store.py +1016 -0
  76. package/packages/memory-engine/engine/services/nv-embed/Dockerfile +28 -0
  77. package/packages/memory-engine/engine/services/nv-embed/server.py +152 -0
  78. package/packages/memory-engine/pme_memory/__init__.py +0 -0
  79. package/packages/memory-engine/pme_memory/__main__.py +129 -0
  80. package/packages/memory-engine/pme_memory/artifacts.py +95 -0
  81. package/packages/memory-engine/pme_memory/embed.py +74 -0
  82. package/packages/memory-engine/pme_memory/health.py +36 -0
  83. package/packages/memory-engine/pme_memory/hygiene.py +159 -0
  84. package/packages/memory-engine/pme_memory/indexer.py +200 -0
  85. package/packages/memory-engine/pme_memory/needs.py +55 -0
  86. package/packages/memory-engine/pme_memory/provenance.py +80 -0
  87. package/packages/memory-engine/pme_memory/scoring.py +168 -0
  88. package/packages/memory-engine/pme_memory/search.py +52 -0
  89. package/packages/memory-engine/pme_memory/store.py +86 -0
  90. package/packages/memory-engine/pme_memory/synthesis.py +114 -0
  91. package/packages/memory-engine/pyproject.toml +65 -0
  92. package/packages/memory-engine/scripts/kg-extractor.py +557 -0
  93. package/packages/memory-engine/scripts/kg-preflexor-v2.py +738 -0
  94. package/packages/memory-engine/tests/test_api_contract.sh +57 -0
@@ -0,0 +1,1016 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ L6 Document Store — HybridRAG for Document Retrieval
4
+
5
+ Features:
6
+ - Milvus Lite (vector) + SQLite FTS5 (BM25) + RRF fusion
7
+ - Cross-encoder reranker (ms-marco-MiniLM-L-6-v2)
8
+ - Ingest-time entity extraction via Ollama graph-preflexor
9
+ - Adaptive chunk sizing by doc_type
10
+ - Freshness-aware dedup (purge-and-replace on re-index)
11
+ - Confidence scoring (RRF + engine_count + reranker_score)
12
+
13
+ Port: 8037
14
+ """
15
+
16
+ import argparse
17
+ import hashlib
18
+ import json
19
+ import logging
20
+ import os
21
+ import re
22
+ import sqlite3
23
+ import time
24
+ from datetime import datetime, timezone
25
+ from pathlib import Path
26
+ from typing import Any, Dict, List, Optional, Tuple
27
+
28
+ import httpx
29
+ from pymilvus import MilvusClient, DataType, CollectionSchema, FieldSchema
30
+ from pymilvus.milvus_client.index import IndexParams
31
+
32
+ # ---------------------------------------------------------------------------
33
+ # Config
34
+ # ---------------------------------------------------------------------------
35
+
36
+ DATA_DIR = Path(os.environ.get("L6_DATA_DIR", str(Path.home() / "l6-document-store" / "data")))
37
+ MILVUS_DB = str(DATA_DIR / "documents.db")
38
+ FTS_DB = str(DATA_DIR / "documents_fts.db")
39
+ OLLAMA_URL = os.environ.get("L6_OLLAMA_URL", "http://localhost:11434")
40
+ EMBED_MODEL = os.environ.get("L6_EMBED_MODEL", "nomic-embed-text")
41
+ NV_EMBED_URL = os.environ.get("L6_NV_EMBED_URL", "http://localhost:8041/v1/embeddings")
42
+ NV_EMBED_ENABLED = os.environ.get("L6_NV_EMBED_ENABLED", "true").lower() == "true"
43
+ EMBED_DIM = int(os.environ.get("L6_EMBED_DIM", "4096"))
44
+ # Optional Authorization: Bearer <key> for the embedding endpoint.
45
+ EMBED_API_KEY = os.environ.get("L6_EMBED_API_KEY", "")
46
+
47
+ def _embed_headers() -> dict:
48
+ return {"Authorization": f"Bearer {EMBED_API_KEY}"} if EMBED_API_KEY else {}
49
+ COLLECTION_NAME = "documents"
50
+ RRF_K = 60
51
+ DEFAULT_PORT = 8037
52
+
53
+ # Chunk sizes by doc_type
54
+ CHUNK_CONFIG = {
55
+ "legal": {"max_chars": 2500, "overlap": 400},
56
+ "financial": {"max_chars": 2500, "overlap": 400},
57
+ "governance": {"max_chars": 2500, "overlap": 400},
58
+ "technical": {"max_chars": 2000, "overlap": 300},
59
+ "general": {"max_chars": 1500, "overlap": 200},
60
+ }
61
+
62
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
63
+ log = logging.getLogger("l6-document-store")
64
+
65
+ # ---------------------------------------------------------------------------
66
+ # Embedding
67
+ # ---------------------------------------------------------------------------
68
+
69
+ _embed_client = httpx.Client(timeout=60)
70
+
71
+ def embed_text(text: str) -> List[float]:
72
+ """Get embedding — NV-Embed-v2 primary, Ollama fallback."""
73
+ if NV_EMBED_ENABLED:
74
+ try:
75
+ resp = _embed_client.post(NV_EMBED_URL, json={"input": text[:4000]})
76
+ resp.raise_for_status()
77
+ return resp.json()["data"][0]["embedding"]
78
+ except Exception as e:
79
+ log.warning(f"NV-Embed-v2 failed, falling back to Ollama: {e}")
80
+
81
+ # Ollama fallback
82
+ resp = _embed_client.post(
83
+ f"{OLLAMA_URL}/api/embeddings",
84
+ json={"model": EMBED_MODEL, "prompt": text[:8000]},
85
+ )
86
+ resp.raise_for_status()
87
+ return resp.json()["embedding"]
88
+
89
+
90
+ def embed_batch(texts: List[str]) -> List[List[float]]:
91
+ """Embed a batch of texts — NV-Embed-v2 supports native batching."""
92
+ if NV_EMBED_ENABLED:
93
+ try:
94
+ resp = _embed_client.post(NV_EMBED_URL, json={"input": [t[:4000] for t in texts]})
95
+ resp.raise_for_status()
96
+ return [d["embedding"] for d in resp.json()["data"]]
97
+ except Exception as e:
98
+ log.warning(f"NV-Embed-v2 batch failed, falling back to sequential: {e}")
99
+
100
+ return [embed_text(t) for t in texts]
101
+
102
+ # ---------------------------------------------------------------------------
103
+ # Cross-Encoder Reranker
104
+ # ---------------------------------------------------------------------------
105
+
106
+ _reranker = None
107
+ _reranker_loaded = False
108
+
109
+ def get_reranker():
110
+ """Lazy-load cross-encoder reranker."""
111
+ global _reranker, _reranker_loaded
112
+ if not _reranker_loaded:
113
+ try:
114
+ from sentence_transformers import CrossEncoder
115
+ _reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
116
+ log.info("Cross-encoder reranker loaded (ms-marco-MiniLM-L-6-v2)")
117
+ except Exception as e:
118
+ log.warning(f"Cross-encoder not available: {e}")
119
+ _reranker = None
120
+ _reranker_loaded = True
121
+ return _reranker
122
+
123
+
124
+ def rerank(query: str, results: List[Dict], top_k: int = 10) -> List[Dict]:
125
+ """Rerank results using cross-encoder."""
126
+ reranker = get_reranker()
127
+ if not reranker or not results:
128
+ return results[:top_k]
129
+
130
+ pairs = [(query, r["text"][:512]) for r in results[:20]]
131
+ scores = reranker.predict(pairs)
132
+
133
+ for i, r in enumerate(results[:20]):
134
+ r["reranker_score"] = float(scores[i])
135
+
136
+ results[:20] = sorted(results[:20], key=lambda x: x.get("reranker_score", -999), reverse=True)
137
+ return results[:top_k]
138
+
139
+ # ---------------------------------------------------------------------------
140
+ # Entity Extraction (ingest-time)
141
+ # ---------------------------------------------------------------------------
142
+
143
+ def extract_entities(text: str) -> List[str]:
144
+ """Extract entities from text using Ollama graph-preflexor."""
145
+ try:
146
+ resp = _embed_client.post(
147
+ f"{OLLAMA_URL}/api/generate",
148
+ json={
149
+ "model": "graph-preflexor",
150
+ "prompt": f"Extract all named entities (people, companies, products, places, dates) from this text. Return ONLY a JSON array of strings, nothing else.\n\nText: {text[:2000]}",
151
+ "stream": False,
152
+ },
153
+ timeout=15,
154
+ )
155
+ if resp.status_code == 200:
156
+ raw = resp.json().get("response", "")
157
+ # Try to parse JSON array from response
158
+ match = re.search(r'\[.*?\]', raw, re.DOTALL)
159
+ if match:
160
+ entities = json.loads(match.group())
161
+ return [str(e).strip() for e in entities if e and len(str(e).strip()) > 1][:20]
162
+ except Exception as e:
163
+ log.debug(f"Entity extraction failed: {e}")
164
+ return []
165
+
166
+ # ---------------------------------------------------------------------------
167
+ # Document Processing
168
+ # ---------------------------------------------------------------------------
169
+
170
+ def detect_doc_type(path: str) -> str:
171
+ """Detect document type from path patterns."""
172
+ p = path.lower()
173
+ if any(k in p for k in ["legal", "contract", "nda", "agreement", "terms"]):
174
+ return "legal"
175
+ if any(k in p for k in ["finance", "financial", "investor", "revenue", "budget", "portfolio"]):
176
+ return "financial"
177
+ if any(k in p for k in ["governance", "policy", "compliance", "audit"]):
178
+ return "governance"
179
+ if any(k in p for k in ["technical", "architecture", "api", "schema", "code"]):
180
+ return "technical"
181
+ return "general"
182
+
183
+
184
+ def detect_arena(path: str) -> str:
185
+ """Detect arena/domain from path patterns."""
186
+ p = path.lower()
187
+ if "company" in p or "internal" in p:
188
+ return "company"
189
+ if "project" in p or "proj-" in p:
190
+ return "project"
191
+ if "sarai" in p or "defence" in p:
192
+ return "sarai"
193
+ if "research" in p:
194
+ return "research"
195
+ if "finance" in p or "portfolio" in p or "stock" in p:
196
+ return "finance"
197
+ return "general"
198
+
199
+
200
+ def content_hash(text: str) -> str:
201
+ """SHA256 hash for dedup."""
202
+ return hashlib.sha256(text.encode()).hexdigest()[:16]
203
+
204
+
205
+ def chunk_markdown(text: str, doc_type: str = "general") -> List[Dict]:
206
+ """Split markdown into chunks with adaptive sizing."""
207
+ cfg = CHUNK_CONFIG.get(doc_type, CHUNK_CONFIG["general"])
208
+ max_chars = cfg["max_chars"]
209
+ overlap = cfg["overlap"]
210
+
211
+ chunks = []
212
+ current_heading = ""
213
+
214
+ # Split on ## or ### headings
215
+ sections = re.split(r'(^#{2,3}\s+.+$)', text, flags=re.MULTILINE)
216
+
217
+ current_text = ""
218
+ for part in sections:
219
+ if re.match(r'^#{2,3}\s+', part):
220
+ # Save previous section
221
+ if current_text.strip():
222
+ chunks.extend(_split_section(current_text.strip(), current_heading, max_chars, overlap))
223
+ current_heading = part.strip().lstrip('#').strip()
224
+ current_text = ""
225
+ else:
226
+ current_text += part
227
+
228
+ # Don't forget last section
229
+ if current_text.strip():
230
+ chunks.extend(_split_section(current_text.strip(), current_heading, max_chars, overlap))
231
+
232
+ # If no headings found, chunk the whole thing
233
+ if not chunks and text.strip():
234
+ chunks = _split_section(text.strip(), "", max_chars, overlap)
235
+
236
+ return chunks
237
+
238
+
239
+ def _split_section(text: str, heading: str, max_chars: int, overlap: int) -> List[Dict]:
240
+ """Split a section into overlapping chunks."""
241
+ if len(text) <= max_chars:
242
+ return [{"text": text, "heading": heading}]
243
+
244
+ chunks = []
245
+ start = 0
246
+ while start < len(text):
247
+ end = start + max_chars
248
+
249
+ # Try to break at paragraph boundary
250
+ if end < len(text):
251
+ para_break = text.rfind('\n\n', start, end)
252
+ if para_break > start + max_chars // 2:
253
+ end = para_break
254
+
255
+ chunk_text = text[start:end].strip()
256
+ if chunk_text:
257
+ chunks.append({"text": chunk_text, "heading": heading})
258
+
259
+ start = end - overlap
260
+ if start >= len(text):
261
+ break
262
+
263
+ return chunks
264
+
265
+ # ---------------------------------------------------------------------------
266
+ # Milvus Operations
267
+ # ---------------------------------------------------------------------------
268
+
269
+ def get_milvus() -> MilvusClient:
270
+ """Get or create Milvus client."""
271
+ DATA_DIR.mkdir(parents=True, exist_ok=True)
272
+ client = MilvusClient(uri=MILVUS_DB)
273
+
274
+ if COLLECTION_NAME not in client.list_collections():
275
+ schema = CollectionSchema(fields=[
276
+ FieldSchema("id", DataType.VARCHAR, is_primary=True, max_length=64),
277
+ FieldSchema("vector", DataType.FLOAT_VECTOR, dim=EMBED_DIM),
278
+ FieldSchema("text", DataType.VARCHAR, max_length=16000),
279
+ FieldSchema("source_file", DataType.VARCHAR, max_length=500),
280
+ FieldSchema("arena", DataType.VARCHAR, max_length=60),
281
+ FieldSchema("doc_type", DataType.VARCHAR, max_length=30),
282
+ FieldSchema("heading", DataType.VARCHAR, max_length=300),
283
+ FieldSchema("chunk_index", DataType.INT64),
284
+ FieldSchema("content_hash", DataType.VARCHAR, max_length=20),
285
+ FieldSchema("entities_json", DataType.VARCHAR, max_length=2000),
286
+ FieldSchema("indexed_at", DataType.VARCHAR, max_length=30),
287
+ ])
288
+ client.create_collection(
289
+ collection_name=COLLECTION_NAME,
290
+ schema=schema,
291
+ )
292
+ # Create index
293
+ idx = IndexParams()
294
+ idx.add_index(field_name="vector", index_type="AUTOINDEX", metric_type="COSINE")
295
+ client.create_index(collection_name=COLLECTION_NAME, index_params=idx)
296
+ client.load_collection(COLLECTION_NAME)
297
+ log.info(f"Created Milvus collection '{COLLECTION_NAME}'")
298
+
299
+ return client
300
+
301
+
302
+ def search_vector(client: MilvusClient, query_vec: List[float], limit: int = 20,
303
+ arena: Optional[str] = None) -> List[Dict]:
304
+ """Vector similarity search."""
305
+ filter_expr = f'arena == "{arena}"' if arena else ""
306
+ results = client.search(
307
+ collection_name=COLLECTION_NAME,
308
+ data=[query_vec],
309
+ limit=limit,
310
+ output_fields=["text", "source_file", "arena", "doc_type", "heading",
311
+ "chunk_index", "content_hash", "entities_json", "indexed_at"],
312
+ filter=filter_expr if filter_expr else None,
313
+ )
314
+ out = []
315
+ for hits in results:
316
+ for hit in hits:
317
+ entity = hit.get("entity", {})
318
+ out.append({
319
+ "text": entity.get("text", ""),
320
+ "source_file": entity.get("source_file", ""),
321
+ "arena": entity.get("arena", ""),
322
+ "doc_type": entity.get("doc_type", ""),
323
+ "heading": entity.get("heading", ""),
324
+ "chunk_index": entity.get("chunk_index", 0),
325
+ "content_hash": entity.get("content_hash", ""),
326
+ "entities": _parse_entities_json(entity.get("entities_json", "[]")),
327
+ "score": hit.get("distance", 0),
328
+ "engine": "vector",
329
+ })
330
+ return out
331
+
332
+ # ---------------------------------------------------------------------------
333
+ # FTS5 Operations
334
+ # ---------------------------------------------------------------------------
335
+
336
+ def get_fts_db() -> sqlite3.Connection:
337
+ """Get or create FTS5 database."""
338
+ DATA_DIR.mkdir(parents=True, exist_ok=True)
339
+ conn = sqlite3.connect(FTS_DB)
340
+ conn.execute("PRAGMA journal_mode=WAL")
341
+
342
+ # Create content table
343
+ conn.execute("""
344
+ CREATE TABLE IF NOT EXISTS chunks (
345
+ id TEXT PRIMARY KEY,
346
+ text TEXT,
347
+ source_file TEXT,
348
+ arena TEXT,
349
+ doc_type TEXT,
350
+ heading TEXT,
351
+ chunk_index INTEGER,
352
+ content_hash TEXT,
353
+ entities_json TEXT,
354
+ indexed_at TEXT
355
+ )
356
+ """)
357
+
358
+ # Create FTS5 virtual table
359
+ conn.execute("""
360
+ CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
361
+ text, source_file, arena, heading, entities_json,
362
+ content='chunks',
363
+ content_rowid='rowid'
364
+ )
365
+ """)
366
+
367
+ # Triggers for sync
368
+ conn.execute("""
369
+ CREATE TRIGGER IF NOT EXISTS chunks_ai AFTER INSERT ON chunks BEGIN
370
+ INSERT INTO chunks_fts(rowid, text, source_file, arena, heading, entities_json)
371
+ VALUES (new.rowid, new.text, new.source_file, new.arena, new.heading, new.entities_json);
372
+ END
373
+ """)
374
+ conn.execute("""
375
+ CREATE TRIGGER IF NOT EXISTS chunks_ad AFTER DELETE ON chunks BEGIN
376
+ INSERT INTO chunks_fts(chunks_fts, rowid, text, source_file, arena, heading, entities_json)
377
+ VALUES ('delete', old.rowid, old.text, old.source_file, old.arena, old.heading, old.entities_json);
378
+ END
379
+ """)
380
+
381
+ conn.commit()
382
+ return conn
383
+
384
+
385
+ def search_fts(conn: sqlite3.Connection, query: str, limit: int = 20,
386
+ arena: Optional[str] = None) -> List[Dict]:
387
+ """BM25 keyword search via FTS5."""
388
+ # Escape FTS5 special chars
389
+ safe_query = re.sub(r'[^\w\s]', ' ', query).strip()
390
+ if not safe_query:
391
+ return []
392
+
393
+ arena_filter = f"AND c.arena = ?" if arena else ""
394
+ params = [safe_query, limit] if not arena else [safe_query, arena, limit]
395
+
396
+ sql = f"""
397
+ SELECT c.*, bm25(chunks_fts) as rank
398
+ FROM chunks_fts f
399
+ JOIN chunks c ON c.rowid = f.rowid
400
+ WHERE chunks_fts MATCH ?
401
+ {arena_filter}
402
+ ORDER BY rank
403
+ LIMIT ?
404
+ """
405
+
406
+ try:
407
+ rows = conn.execute(sql, params).fetchall()
408
+ except sqlite3.OperationalError as e:
409
+ log.warning(f"FTS query failed: {e}")
410
+ return []
411
+
412
+ cols = ["id", "text", "source_file", "arena", "doc_type", "heading",
413
+ "chunk_index", "content_hash", "entities_json", "indexed_at", "rank"]
414
+ out = []
415
+ for row in rows:
416
+ d = dict(zip(cols, row))
417
+ out.append({
418
+ "text": d.get("text", ""),
419
+ "source_file": d.get("source_file", ""),
420
+ "arena": d.get("arena", ""),
421
+ "doc_type": d.get("doc_type", ""),
422
+ "heading": d.get("heading", ""),
423
+ "chunk_index": d.get("chunk_index", 0),
424
+ "content_hash": d.get("content_hash", ""),
425
+ "entities": _parse_entities_json(d.get("entities_json", "[]")),
426
+ "score": -d.get("rank", 0), # BM25 returns negative scores
427
+ "engine": "bm25",
428
+ })
429
+ return out
430
+
431
+ # ---------------------------------------------------------------------------
432
+ # RRF Fusion
433
+ # ---------------------------------------------------------------------------
434
+
435
+ def rrf_fuse(vector_results: List[Dict], bm25_results: List[Dict]) -> List[Dict]:
436
+ """Reciprocal Rank Fusion combining vector and BM25 results."""
437
+ scored = {}
438
+
439
+ for rank, r in enumerate(vector_results):
440
+ key = (r["source_file"], r["chunk_index"])
441
+ if key not in scored:
442
+ scored[key] = {"result": r, "rrf_score": 0, "engines": set()}
443
+ scored[key]["rrf_score"] += 1.0 / (RRF_K + rank + 1)
444
+ scored[key]["engines"].add("vector")
445
+
446
+ for rank, r in enumerate(bm25_results):
447
+ key = (r["source_file"], r["chunk_index"])
448
+ if key not in scored:
449
+ scored[key] = {"result": r, "rrf_score": 0, "engines": set()}
450
+ scored[key]["rrf_score"] += 1.0 / (RRF_K + rank + 1)
451
+ scored[key]["engines"].add("bm25")
452
+
453
+ # Sort by RRF score
454
+ fused = sorted(scored.values(), key=lambda x: x["rrf_score"], reverse=True)
455
+
456
+ out = []
457
+ for item in fused:
458
+ r = item["result"]
459
+ r["rrf_score"] = round(item["rrf_score"], 6)
460
+ r["engine_count"] = len(item["engines"])
461
+ r["engines"] = list(item["engines"])
462
+ out.append(r)
463
+
464
+ return out
465
+
466
+ # ---------------------------------------------------------------------------
467
+ # Indexing
468
+ # ---------------------------------------------------------------------------
469
+
470
+ def index_documents(paths: List[str], arena: Optional[str] = None,
471
+ doc_type: Optional[str] = None,
472
+ extract_entities_flag: bool = True,
473
+ use_enhanced_ingest: bool = True) -> Dict:
474
+ """Index documents into both Milvus and FTS5.
475
+
476
+ Supports: .md, .txt, .markdown, .pdf (via enhanced_ingest)
477
+ """
478
+ milvus = get_milvus()
479
+ fts_conn = get_fts_db()
480
+
481
+ stats = {"files": 0, "chunks": 0, "entities_extracted": 0, "errors": 0, "skipped": 0,
482
+ "tables": 0, "semantic_chunks": 0}
483
+ now = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
484
+
485
+ for file_path in paths:
486
+ p = Path(file_path)
487
+ if not p.exists():
488
+ log.warning(f"File not found: {file_path}")
489
+ stats["errors"] += 1
490
+ continue
491
+
492
+ supported_exts = (
493
+ '.md', '.txt', '.markdown', '.pdf',
494
+ # enhanced_ingest formats
495
+ '.csv', '.json', '.yaml', '.yml', '.toml',
496
+ '.py', '.js', '.ts', '.go', '.rs', '.java', '.c', '.cpp', '.h',
497
+ '.docx', '.doc', '.pptx', '.ppt', '.xlsx', '.xls',
498
+ '.rtf', '.odt', '.epub', '.tex',
499
+ '.html', '.htm', '.xml',
500
+ '.ipynb',
501
+ )
502
+ if not p.suffix.lower() in supported_exts:
503
+ log.debug(f"Skipping unsupported: {file_path}")
504
+ stats["skipped"] += 1
505
+ continue
506
+
507
+ # Use enhanced ingest for PDFs (and optionally for all docs)
508
+ if p.suffix.lower() == '.pdf' or use_enhanced_ingest:
509
+ try:
510
+ from enhanced_ingest import ingest_document, Chunk
511
+ result = ingest_document(str(p), arena or detect_arena(str(p)))
512
+
513
+ file_arena = result["arena"]
514
+ file_doc_type = doc_type or detect_doc_type(str(p))
515
+ source_file = str(p)
516
+
517
+ # Purge old chunks
518
+ _purge_file(milvus, fts_conn, source_file)
519
+
520
+ milvus_batch = []
521
+ for chunk_data in result["chunks"]:
522
+ chunk_text = chunk_data["text"]
523
+ c_hash = content_hash(chunk_text)
524
+ idx = chunk_data["chunk_index"]
525
+ chunk_id = f"{c_hash}_{idx}"
526
+
527
+ # Track semantic vs fixed chunks
528
+ if chunk_data.get("metadata", {}).get("type") == "semantic":
529
+ stats["semantic_chunks"] += 1
530
+ if chunk_data.get("metadata", {}).get("type") == "table":
531
+ stats["tables"] += 1
532
+
533
+ # Extract entities
534
+ entities = []
535
+ if extract_entities_flag and len(chunk_text) > 50:
536
+ entities = extract_entities(chunk_text)
537
+ if entities:
538
+ stats["entities_extracted"] += len(entities)
539
+
540
+ entities_json = json.dumps(entities)
541
+ vector = embed_text(chunk_text)
542
+
543
+ milvus_batch.append({
544
+ "id": chunk_id,
545
+ "vector": vector,
546
+ "text": chunk_text[:15000],
547
+ "source_file": source_file[:500],
548
+ "arena": file_arena[:60],
549
+ "doc_type": file_doc_type[:30],
550
+ "heading": chunk_data.get("heading", "")[:300],
551
+ "chunk_index": idx,
552
+ "content_hash": c_hash,
553
+ "entities_json": entities_json[:2000],
554
+ "indexed_at": now,
555
+ })
556
+
557
+ fts_conn.execute(
558
+ "INSERT OR REPLACE INTO chunks VALUES (?,?,?,?,?,?,?,?,?,?)",
559
+ (chunk_id, chunk_text[:15000], source_file[:500], file_arena[:60],
560
+ file_doc_type[:30], chunk_data.get("heading", "")[:300], idx,
561
+ c_hash, entities_json[:2000], now),
562
+ )
563
+
564
+ if milvus_batch:
565
+ # pymilvus 2.6+ requires keyword args; old positional form
566
+ # silently no-ops which causes vector hits to be empty.
567
+ milvus.insert(collection_name=COLLECTION_NAME, data=milvus_batch)
568
+ fts_conn.commit()
569
+
570
+ stats["files"] += 1
571
+ stats["chunks"] += len(result["chunks"])
572
+ log.info(f"Indexed (enhanced): {p.name} — {len(result['chunks'])} chunks, {len(result.get('tables', []))} tables")
573
+ continue
574
+
575
+ except ImportError:
576
+ log.warning("enhanced_ingest not available, falling back to basic chunking")
577
+ except Exception as e:
578
+ log.error(f"Enhanced ingest failed for {file_path}: {e}")
579
+ if p.suffix.lower() == '.pdf':
580
+ stats["errors"] += 1
581
+ continue
582
+ # Fall through to basic chunking for non-PDFs
583
+
584
+ try:
585
+ text = p.read_text(errors="replace")
586
+ if len(text.strip()) < 20:
587
+ stats["skipped"] += 1
588
+ continue
589
+
590
+ file_arena = arena or detect_arena(str(p))
591
+ file_doc_type = doc_type or detect_doc_type(str(p))
592
+ source_file = str(p)
593
+
594
+ # Purge old chunks for this file (freshness-aware dedup)
595
+ _purge_file(milvus, fts_conn, source_file)
596
+
597
+ # Chunk the document
598
+ chunks = chunk_markdown(text, file_doc_type)
599
+
600
+ # Process each chunk
601
+ milvus_batch = []
602
+ for idx, chunk in enumerate(chunks):
603
+ chunk_text = chunk["text"]
604
+ c_hash = content_hash(chunk_text)
605
+ chunk_id = f"{c_hash}_{idx}"
606
+
607
+ # Extract entities (ingest-time)
608
+ entities = []
609
+ if extract_entities_flag and len(chunk_text) > 50:
610
+ entities = extract_entities(chunk_text)
611
+ if entities:
612
+ stats["entities_extracted"] += len(entities)
613
+
614
+ entities_json = json.dumps(entities)
615
+
616
+ # Embed
617
+ vector = embed_text(chunk_text)
618
+
619
+ # Prepare Milvus record
620
+ milvus_batch.append({
621
+ "id": chunk_id,
622
+ "vector": vector,
623
+ "text": chunk_text[:15000],
624
+ "source_file": source_file[:500],
625
+ "arena": file_arena[:60],
626
+ "doc_type": file_doc_type[:30],
627
+ "heading": chunk.get("heading", "")[:300],
628
+ "chunk_index": idx,
629
+ "content_hash": c_hash,
630
+ "entities_json": entities_json[:2000],
631
+ "indexed_at": now,
632
+ })
633
+
634
+ # Insert into FTS5
635
+ fts_conn.execute(
636
+ "INSERT OR REPLACE INTO chunks VALUES (?,?,?,?,?,?,?,?,?,?)",
637
+ (chunk_id, chunk_text[:15000], source_file[:500], file_arena[:60],
638
+ file_doc_type[:30], chunk.get("heading", "")[:300], idx,
639
+ c_hash, entities_json[:2000], now),
640
+ )
641
+
642
+ stats["chunks"] += 1
643
+
644
+ # Batch insert into Milvus
645
+ if milvus_batch:
646
+ milvus.upsert(collection_name=COLLECTION_NAME, data=milvus_batch)
647
+
648
+ stats["files"] += 1
649
+ log.info(f"Indexed {p.name}: {len(chunks)} chunks, arena={file_arena}, type={file_doc_type}")
650
+
651
+ except Exception as e:
652
+ log.error(f"Error indexing {file_path}: {e}")
653
+ stats["errors"] += 1
654
+
655
+ fts_conn.commit()
656
+ fts_conn.close()
657
+ return stats
658
+
659
+
660
+ def _purge_file(milvus: MilvusClient, fts_conn: sqlite3.Connection, source_file: str):
661
+ """Remove all chunks for a source file (freshness-aware re-index)."""
662
+ try:
663
+ # Purge from Milvus
664
+ milvus.delete(
665
+ collection_name=COLLECTION_NAME,
666
+ filter=f'source_file == "{source_file}"',
667
+ )
668
+ except Exception as e:
669
+ log.debug(f"Milvus purge (may be empty): {e}")
670
+
671
+ try:
672
+ # Purge from FTS
673
+ fts_conn.execute("DELETE FROM chunks WHERE source_file = ?", (source_file,))
674
+ except Exception as e:
675
+ log.debug(f"FTS purge: {e}")
676
+
677
+
678
+ def _parse_entities_json(s: str) -> List[str]:
679
+ """Safely parse entities JSON."""
680
+ try:
681
+ return json.loads(s) if s else []
682
+ except (json.JSONDecodeError, TypeError):
683
+ return []
684
+
685
+ # ---------------------------------------------------------------------------
686
+ # Search
687
+ # ---------------------------------------------------------------------------
688
+
689
+ def search(query: str, method: str = "hybrid", limit: int = 10,
690
+ arena: Optional[str] = None, enable_rerank: bool = True) -> List[Dict]:
691
+ """Search documents with specified method."""
692
+
693
+ if method == "vector":
694
+ vec = embed_text(query)
695
+ results = search_vector(get_milvus(), vec, limit=limit, arena=arena)
696
+ elif method == "bm25":
697
+ results = search_fts(get_fts_db(), query, limit=limit, arena=arena)
698
+ else:
699
+ # Hybrid: RRF fusion
700
+ vec = embed_text(query)
701
+ vector_results = search_vector(get_milvus(), vec, limit=20, arena=arena)
702
+ bm25_results = search_fts(get_fts_db(), query, limit=20, arena=arena)
703
+ results = rrf_fuse(vector_results, bm25_results)
704
+
705
+ # Rerank if enabled
706
+ if enable_rerank and len(results) > 1:
707
+ results = rerank(query, results, top_k=limit)
708
+
709
+ return results[:limit]
710
+
711
+ # ---------------------------------------------------------------------------
712
+ # Stats & Health
713
+ # ---------------------------------------------------------------------------
714
+
715
+ def get_stats() -> Dict:
716
+ """Get index statistics."""
717
+ stats = {"vector_chunks": 0, "fts_chunks": 0, "arenas": {}, "doc_types": {}}
718
+
719
+ try:
720
+ milvus = get_milvus()
721
+ info = milvus.get_collection_stats(COLLECTION_NAME)
722
+ stats["vector_chunks"] = info.get("row_count", 0)
723
+ except Exception:
724
+ pass
725
+
726
+ try:
727
+ conn = get_fts_db()
728
+ row = conn.execute("SELECT COUNT(*) FROM chunks").fetchone()
729
+ stats["fts_chunks"] = row[0] if row else 0
730
+
731
+ for row in conn.execute("SELECT arena, COUNT(*) FROM chunks GROUP BY arena").fetchall():
732
+ stats["arenas"][row[0]] = row[1]
733
+
734
+ for row in conn.execute("SELECT doc_type, COUNT(*) FROM chunks GROUP BY doc_type").fetchall():
735
+ stats["doc_types"][row[0]] = row[1]
736
+
737
+ conn.close()
738
+ except Exception:
739
+ pass
740
+
741
+ return stats
742
+
743
+
744
+ def health() -> Dict:
745
+ """Health check."""
746
+ status = {"status": "ok", "milvus": "unknown", "fts": "unknown", "ollama": "unknown", "reranker": "unknown"}
747
+
748
+ # Milvus
749
+ try:
750
+ client = get_milvus()
751
+ colls = client.list_collections()
752
+ status["milvus"] = f"ok ({len(colls)} collections)"
753
+ except Exception as e:
754
+ status["milvus"] = f"error: {e}"
755
+ status["status"] = "degraded"
756
+
757
+ # FTS
758
+ try:
759
+ conn = get_fts_db()
760
+ cnt = conn.execute("SELECT COUNT(*) FROM chunks").fetchone()[0]
761
+ status["fts"] = f"ok ({cnt} chunks)"
762
+ conn.close()
763
+ except Exception as e:
764
+ status["fts"] = f"error: {e}"
765
+ status["status"] = "degraded"
766
+
767
+ # Ollama
768
+ try:
769
+ resp = _embed_client.get(f"{OLLAMA_URL}/api/tags", timeout=5)
770
+ status["ollama"] = "ok" if resp.status_code == 200 else f"http {resp.status_code}"
771
+ except Exception as e:
772
+ status["ollama"] = f"error: {e}"
773
+ status["status"] = "degraded"
774
+
775
+ # Reranker
776
+ reranker = get_reranker()
777
+ status["reranker"] = "loaded" if reranker else "unavailable (CPU fallback to RRF)"
778
+
779
+ return status
780
+
781
+ # ---------------------------------------------------------------------------
782
+ # FastAPI Server
783
+ # ---------------------------------------------------------------------------
784
+
785
+ def serve(port: int = DEFAULT_PORT):
786
+ """Run as HTTP API server."""
787
+ from fastapi import FastAPI, Query as Q, HTTPException
788
+ from pydantic import BaseModel
789
+ import uvicorn
790
+
791
+ api = FastAPI(title="L6 Document Store", version="1.0.0")
792
+
793
+ class IndexRequest(BaseModel):
794
+ paths: List[str]
795
+ arena: Optional[str] = None
796
+ doc_type: Optional[str] = None
797
+ extract_entities: bool = True
798
+
799
+ @api.get("/health")
800
+ def api_health():
801
+ return health()
802
+
803
+ @api.get("/stats")
804
+ def api_stats():
805
+ return get_stats()
806
+
807
+ @api.get("/search")
808
+ def api_search(
809
+ q: str = Q(..., description="Search query"),
810
+ method: str = Q("hybrid", description="hybrid|vector|bm25"),
811
+ limit: int = Q(10, ge=1, le=50),
812
+ arena: Optional[str] = Q(None),
813
+ rerank: bool = Q(True),
814
+ ):
815
+ results = search(q, method=method, limit=limit, arena=arena, enable_rerank=rerank)
816
+ return {"query": q, "method": method, "results": results, "count": len(results)}
817
+
818
+ @api.post("/search")
819
+ def api_search_post(
820
+ q: str,
821
+ method: str = "hybrid",
822
+ limit: int = 10,
823
+ arena: Optional[str] = None,
824
+ rerank: bool = True,
825
+ ):
826
+ """POST version of search for compatibility."""
827
+ results = search(q, method=method, limit=limit, arena=arena, enable_rerank=rerank)
828
+ return {"query": q, "method": method, "results": results, "count": len(results)}
829
+
830
+ @api.post("/index")
831
+ def api_index(req: IndexRequest):
832
+ stats = index_documents(
833
+ req.paths, arena=req.arena, doc_type=req.doc_type,
834
+ extract_entities_flag=req.extract_entities,
835
+ )
836
+ return {"status": "ok", "stats": stats}
837
+
838
+ @api.post("/index-batch")
839
+ def api_index_batch(req: dict):
840
+ """Index a batch of in-memory documents in a single batched
841
+ NV-Embed call + a single milvus insert + one FTS write.
842
+
843
+ Roughly 30-50x faster than calling /index for the equivalent
844
+ files because the legacy path does one embed roundtrip per
845
+ chunk. This endpoint exists for tests, smoke runs and bench
846
+ harnesses where small corpora need to land quickly.
847
+
848
+ Request body::
849
+
850
+ {
851
+ "arena": "benchmark",
852
+ "records": [
853
+ {
854
+ "id": "doc1", # required, becomes chunk id prefix
855
+ "text": "…", # required, indexed as one chunk
856
+ "source_file": "doc1.md", # optional
857
+ "doc_type": "general", # optional, default "general"
858
+ "heading": "…" # optional
859
+ }, …
860
+ ]
861
+ }
862
+
863
+ Returns::
864
+
865
+ {"status": "ok", "inserted": N, "embed_ms": float, "insert_ms": float}
866
+ """
867
+ import time as _time, hashlib as _hashlib, httpx as _httpx
868
+ from datetime import datetime as _dt, timezone as _tz
869
+
870
+ records = req.get("records") or []
871
+ arena = req.get("arena") or "general"
872
+ if not records:
873
+ return {"status": "ok", "inserted": 0}
874
+
875
+ texts = [(r.get("text") or "")[:16000] for r in records]
876
+
877
+ # Single batched NV-Embed call.
878
+ t0 = _time.time()
879
+ try:
880
+ resp = _httpx.post(
881
+ NV_EMBED_URL, headers=_embed_headers(),
882
+ json={"input": texts, "model": EMBED_MODEL},
883
+ timeout=120,
884
+ )
885
+ resp.raise_for_status()
886
+ embs = [d["embedding"] for d in resp.json()["data"]]
887
+ except Exception as exc:
888
+ raise HTTPException(status_code=500, detail=f"embed failed: {exc}")
889
+ embed_ms = (_time.time() - t0) * 1000.0
890
+
891
+ # Single milvus insert.
892
+ milvus = get_milvus()
893
+ now = _dt.now(_tz.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
894
+ rows = []
895
+ for r, emb, txt in zip(records, embs, texts):
896
+ if emb is None:
897
+ continue
898
+ rid = r.get("id") or _hashlib.sha1(txt.encode("utf-8")).hexdigest()[:32]
899
+ chunk_id = f"l6:{rid}:0"[:63]
900
+ rows.append({
901
+ "id": chunk_id,
902
+ "vector": emb,
903
+ "text": txt,
904
+ "source_file": (r.get("source_file") or f"{rid}.md")[:500],
905
+ "arena": arena[:60],
906
+ "doc_type": (r.get("doc_type") or "general")[:30],
907
+ "heading": (r.get("heading") or "")[:300],
908
+ "chunk_index": 0,
909
+ "content_hash": _hashlib.sha1(txt.encode("utf-8")).hexdigest()[:20],
910
+ "entities_json": "[]",
911
+ "indexed_at": now,
912
+ })
913
+ t1 = _time.time()
914
+ if rows:
915
+ milvus.insert(collection_name=COLLECTION_NAME, data=rows)
916
+ insert_ms = (_time.time() - t1) * 1000.0
917
+
918
+ # Single FTS write (best-effort — search still works without it).
919
+ try:
920
+ fts_conn = get_fts_db()
921
+ for r, txt in zip(records, texts):
922
+ rid = r.get("id") or _hashlib.sha1(txt.encode("utf-8")).hexdigest()[:32]
923
+ fts_conn.execute(
924
+ "INSERT INTO chunks_fts(text, source_file, arena, heading, entities_json) "
925
+ "VALUES (?, ?, ?, ?, ?)",
926
+ (txt, (r.get("source_file") or f"{rid}.md"), arena,
927
+ (r.get("heading") or ""), "[]"),
928
+ )
929
+ fts_conn.commit()
930
+ fts_conn.close()
931
+ except Exception as exc:
932
+ log.warning("FTS write failed in /index-batch: %s", exc)
933
+
934
+ return {
935
+ "status": "ok",
936
+ "inserted": len(rows),
937
+ "embed_ms": round(embed_ms, 1),
938
+ "insert_ms": round(insert_ms, 1),
939
+ }
940
+
941
+ @api.delete("/purge")
942
+ def api_purge(source_file: str = Q(...)):
943
+ """Remove all chunks for a source file."""
944
+ milvus = get_milvus()
945
+ fts_conn = get_fts_db()
946
+ _purge_file(milvus, fts_conn, source_file)
947
+ fts_conn.commit()
948
+ fts_conn.close()
949
+ return {"status": "purged", "source_file": source_file}
950
+
951
+ @api.post("/rebuild-index")
952
+ def api_rebuild():
953
+ """Force Milvus index rebuild."""
954
+ milvus = get_milvus()
955
+ milvus.release_collection(COLLECTION_NAME)
956
+ milvus.load_collection(COLLECTION_NAME)
957
+ return {"status": "rebuilt"}
958
+
959
+ log.info(f"L6 Document Store — http://127.0.0.1:{port}")
960
+ uvicorn.run(api, host=os.environ.get("HOST","127.0.0.1"), port=port, log_level="info")
961
+
962
+ # ---------------------------------------------------------------------------
963
+ # CLI
964
+ # ---------------------------------------------------------------------------
965
+
966
+ def main():
967
+ parser = argparse.ArgumentParser(description="L6 Document Store")
968
+ parser.add_argument("command", choices=["serve", "index", "search", "health", "stats"])
969
+ parser.add_argument("args", nargs="*")
970
+ parser.add_argument("--port", "-p", type=int, default=DEFAULT_PORT)
971
+ parser.add_argument("--arena", "-a", type=str, default=None)
972
+ parser.add_argument("--doc-type", "-t", type=str, default=None)
973
+ parser.add_argument("--method", "-m", type=str, default="hybrid")
974
+ parser.add_argument("--limit", "-l", type=int, default=10)
975
+ parser.add_argument("--no-entities", action="store_true")
976
+ parser.add_argument("--no-rerank", action="store_true")
977
+
978
+ args = parser.parse_args()
979
+
980
+ if args.command == "serve":
981
+ serve(port=args.port)
982
+
983
+ elif args.command == "index":
984
+ paths = args.args
985
+ if not paths:
986
+ print("Usage: l6-document-store.py index <file1.md> [file2.md ...]")
987
+ print(" l6-document-store.py index ~/memory/research/*.md")
988
+ return
989
+ stats = index_documents(paths, arena=args.arena, doc_type=args.doc_type,
990
+ extract_entities_flag=not args.no_entities)
991
+ print(json.dumps(stats, indent=2))
992
+
993
+ elif args.command == "search":
994
+ query = " ".join(args.args) if args.args else ""
995
+ if not query:
996
+ print("Usage: l6-document-store.py search 'your query'")
997
+ return
998
+ results = search(query, method=args.method, limit=args.limit,
999
+ arena=args.arena, enable_rerank=not args.no_rerank)
1000
+ for i, r in enumerate(results, 1):
1001
+ print(f"\n--- [{i}] {r.get('source_file','?')} (rrf={r.get('rrf_score',0):.4f}, engines={r.get('engines','?')}) ---")
1002
+ if r.get("heading"):
1003
+ print(f"Heading: {r['heading']}")
1004
+ if r.get("entities"):
1005
+ print(f"Entities: {', '.join(r['entities'][:10])}")
1006
+ print(r["text"][:300])
1007
+
1008
+ elif args.command == "health":
1009
+ print(json.dumps(health(), indent=2))
1010
+
1011
+ elif args.command == "stats":
1012
+ print(json.dumps(get_stats(), indent=2))
1013
+
1014
+
1015
+ if __name__ == "__main__":
1016
+ main()