@pentatonic-ai/ai-agent-sdk 0.6.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/README.md +178 -69
  2. package/bin/__tests__/callback-server.test.js +4 -1
  3. package/bin/cli.js +41 -164
  4. package/bin/commands/config.js +251 -0
  5. package/bin/commands/login.js +10 -3
  6. package/package.json +2 -1
  7. package/packages/doctor/__tests__/detect.test.js +2 -6
  8. package/packages/doctor/src/checks/local-memory.js +164 -196
  9. package/packages/doctor/src/detect.js +11 -3
  10. package/packages/memory/src/corpus/adapters.js +104 -0
  11. package/packages/memory/src/corpus/cli.js +72 -7
  12. package/packages/memory/src/corpus/index.js +1 -1
  13. package/packages/memory-engine/.env.example +13 -0
  14. package/packages/memory-engine/README.md +131 -0
  15. package/packages/memory-engine/bench/README.md +99 -0
  16. package/packages/memory-engine/bench/scorecards-engine/agent-coding__pentatonic-baseline__20260427-142523.json +1115 -0
  17. package/packages/memory-engine/bench/scorecards-engine/chat-recall__pentatonic-baseline__20260427-142648.json +819 -0
  18. package/packages/memory-engine/bench/scorecards-engine/circular-economy__pentatonic-baseline__20260427-142757.json +1278 -0
  19. package/packages/memory-engine/bench/scorecards-engine/customer-support__pentatonic-baseline__20260427-142900.json +1018 -0
  20. package/packages/memory-engine/bench/scorecards-engine/marketplace-ops__pentatonic-baseline__20260427-142957.json +1038 -0
  21. package/packages/memory-engine/bench/scorecards-engine/product-catalogue__pentatonic-baseline__20260427-143122.json +961 -0
  22. package/packages/memory-engine/bench/scorecards-engine-via-docker/agent-coding__pentatonic-memory__20260427-161812.json +1115 -0
  23. package/packages/memory-engine/bench/scorecards-engine-via-docker/chat-recall__pentatonic-memory__20260427-161701.json +819 -0
  24. package/packages/memory-engine/bench/scorecards-engine-via-docker/circular-economy__pentatonic-memory__20260427-161713.json +1278 -0
  25. package/packages/memory-engine/bench/scorecards-engine-via-docker/customer-support__pentatonic-memory__20260427-161723.json +1018 -0
  26. package/packages/memory-engine/bench/scorecards-engine-via-docker/marketplace-ops__pentatonic-memory__20260427-161732.json +1038 -0
  27. package/packages/memory-engine/bench/scorecards-engine-via-docker/product-catalogue__pentatonic-memory__20260427-161741.json +937 -0
  28. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/agent-coding__pentatonic-memory__20260427-184718.json +1115 -0
  29. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/chat-recall__pentatonic-memory__20260427-184614.json +819 -0
  30. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/circular-economy__pentatonic-memory__20260427-184809.json +1278 -0
  31. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/customer-support__pentatonic-memory__20260427-184854.json +1018 -0
  32. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/marketplace-ops__pentatonic-memory__20260427-184929.json +1038 -0
  33. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/product-catalogue__pentatonic-memory__20260427-185015.json +961 -0
  34. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/agent-coding__pentatonic-memory__20260427-175252.json +1115 -0
  35. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/chat-recall__pentatonic-memory__20260427-175312.json +819 -0
  36. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/circular-economy__pentatonic-memory__20260427-175335.json +1278 -0
  37. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/customer-support__pentatonic-memory__20260427-175355.json +1018 -0
  38. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/marketplace-ops__pentatonic-memory__20260427-175413.json +1038 -0
  39. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/product-catalogue__pentatonic-memory__20260427-175430.json +883 -0
  40. package/packages/memory-engine/bench/scorecards-engine-via-shim/agent-coding__pentatonic-memory__20260427-155409.json +1115 -0
  41. package/packages/memory-engine/bench/scorecards-engine-via-shim/chat-recall__pentatonic-memory__20260427-155421.json +819 -0
  42. package/packages/memory-engine/bench/scorecards-engine-via-shim/circular-economy__pentatonic-memory__20260427-155433.json +1278 -0
  43. package/packages/memory-engine/bench/scorecards-engine-via-shim/customer-support__pentatonic-memory__20260427-155443.json +1018 -0
  44. package/packages/memory-engine/bench/scorecards-engine-via-shim/marketplace-ops__pentatonic-memory__20260427-155453.json +1038 -0
  45. package/packages/memory-engine/bench/scorecards-engine-via-shim/product-catalogue__pentatonic-memory__20260427-155503.json +937 -0
  46. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory-latest__20260427-145103.json +1115 -0
  47. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory__20260427-144909.json +1115 -0
  48. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory-latest__20260427-145153.json +819 -0
  49. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory__20260427-145120.json +542 -0
  50. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory-latest__20260427-145313.json +1278 -0
  51. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory__20260427-145207.json +894 -0
  52. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory-latest__20260427-145412.json +1018 -0
  53. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory__20260427-145327.json +680 -0
  54. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory-latest__20260427-145517.json +1038 -0
  55. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory__20260427-145422.json +693 -0
  56. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory-latest__20260427-145616.json +961 -0
  57. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory__20260427-145528.json +727 -0
  58. package/packages/memory-engine/compat/Dockerfile +11 -0
  59. package/packages/memory-engine/compat/server.py +680 -0
  60. package/packages/memory-engine/docker-compose.yml +243 -0
  61. package/packages/memory-engine/engine/README.md +52 -0
  62. package/packages/memory-engine/engine/l2-hybridrag-proxy.py +1543 -0
  63. package/packages/memory-engine/engine/l5-comms-layer.py +663 -0
  64. package/packages/memory-engine/engine/l6-document-store.py +1018 -0
  65. package/packages/memory-engine/engine/services/l2/Dockerfile +41 -0
  66. package/packages/memory-engine/engine/services/l2/init_databases.py +81 -0
  67. package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py +1543 -0
  68. package/packages/memory-engine/engine/services/l4/Dockerfile +15 -0
  69. package/packages/memory-engine/engine/services/l4/server.py +265 -0
  70. package/packages/memory-engine/engine/services/l5/Dockerfile +9 -0
  71. package/packages/memory-engine/engine/services/l5/l5-comms-layer.py +696 -0
  72. package/packages/memory-engine/engine/services/l6/Dockerfile +11 -0
  73. package/packages/memory-engine/engine/services/l6/l6-document-store.py +1035 -0
  74. package/packages/memory-engine/engine/services/nv-embed/Dockerfile +28 -0
  75. package/packages/memory-engine/engine/services/nv-embed/server.py +152 -0
  76. package/packages/memory-engine/pme_memory/__init__.py +0 -0
  77. package/packages/memory-engine/pme_memory/__main__.py +129 -0
  78. package/packages/memory-engine/pme_memory/artifacts.py +95 -0
  79. package/packages/memory-engine/pme_memory/embed.py +74 -0
  80. package/packages/memory-engine/pme_memory/health.py +36 -0
  81. package/packages/memory-engine/pme_memory/hygiene.py +159 -0
  82. package/packages/memory-engine/pme_memory/indexer.py +200 -0
  83. package/packages/memory-engine/pme_memory/needs.py +55 -0
  84. package/packages/memory-engine/pme_memory/provenance.py +80 -0
  85. package/packages/memory-engine/pme_memory/scoring.py +168 -0
  86. package/packages/memory-engine/pme_memory/search.py +52 -0
  87. package/packages/memory-engine/pme_memory/store.py +86 -0
  88. package/packages/memory-engine/pme_memory/synthesis.py +114 -0
  89. package/packages/memory-engine/pyproject.toml +65 -0
  90. package/packages/memory-engine/scripts/kg-extractor.py +557 -0
  91. package/packages/memory-engine/scripts/kg-preflexor-v2.py +738 -0
  92. package/packages/memory-engine/tests/test_api_contract.sh +57 -0
@@ -0,0 +1,1035 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ L6 Document Store — HybridRAG for Document Retrieval
4
+
5
+ Features:
6
+ - Milvus Lite (vector) + SQLite FTS5 (BM25) + RRF fusion
7
+ - Cross-encoder reranker (ms-marco-MiniLM-L-6-v2)
8
+ - Ingest-time entity extraction via Ollama graph-preflexor
9
+ - Adaptive chunk sizing by doc_type
10
+ - Freshness-aware dedup (purge-and-replace on re-index)
11
+ - Confidence scoring (RRF + engine_count + reranker_score)
12
+
13
+ Port: 8037
14
+ """
15
+
16
+ import argparse
17
+ import hashlib
18
+ import json
19
+ import logging
20
+ import os
21
+ import re
22
+ import sqlite3
23
+ import time
24
+ from datetime import datetime, timezone
25
+ from pathlib import Path
26
+ from typing import Any, Dict, List, Optional, Tuple
27
+
28
+ import httpx
29
+ from pymilvus import MilvusClient, DataType, CollectionSchema, FieldSchema
30
+ from pymilvus.milvus_client.index import IndexParams
31
+
32
+ # ---------------------------------------------------------------------------
33
+ # Config
34
+ # ---------------------------------------------------------------------------
35
+
36
+ DATA_DIR = Path(os.environ.get("L6_DATA_DIR", str(Path.home() / "l6-document-store" / "data")))
37
+ MILVUS_DB = str(DATA_DIR / "documents.db")
38
+ FTS_DB = str(DATA_DIR / "documents_fts.db")
39
+ OLLAMA_URL = os.environ.get("L6_OLLAMA_URL", "http://localhost:11434")
40
+ EMBED_MODEL = os.environ.get("L6_EMBED_MODEL", "nomic-embed-text")
41
+ NV_EMBED_URL = os.environ.get("L6_NV_EMBED_URL", "http://localhost:8041/v1/embeddings")
42
+ NV_EMBED_ENABLED = os.environ.get("L6_NV_EMBED_ENABLED", "true").lower() == "true"
43
+ EMBED_DIM = int(os.environ.get("L6_EMBED_DIM", "4096"))
44
+ # Optional Authorization: Bearer <key> for the embedding endpoint.
45
+ EMBED_API_KEY = os.environ.get("L6_EMBED_API_KEY", "")
46
+
47
+ def _embed_post(texts):
48
+ """POST to embedding endpoint. Tries OpenAI-compat shape first;
49
+ falls back to Pentatonic-AI lambda-gateway native shape on failure.
50
+ See L4 / L5 for the same pattern."""
51
+ import httpx as _httpx
52
+ payload = {"input": texts, "model": EMBED_MODEL}
53
+ try:
54
+ r = _httpx.post(
55
+ NV_EMBED_URL,
56
+ headers={"Authorization": f"Bearer {EMBED_API_KEY}"} if EMBED_API_KEY else {},
57
+ json=payload,
58
+ timeout=120,
59
+ )
60
+ r.raise_for_status()
61
+ return [d["embedding"] for d in r.json()["data"]]
62
+ except Exception:
63
+ pass
64
+ fallback_url = NV_EMBED_URL.replace("/v1/embeddings", "/v1/embed").replace("/embeddings", "/embed")
65
+ r = _httpx.post(
66
+ fallback_url,
67
+ headers={"X-API-Key": EMBED_API_KEY} if EMBED_API_KEY else {},
68
+ json=payload,
69
+ timeout=120,
70
+ )
71
+ r.raise_for_status()
72
+ return r.json()["embeddings"]
73
+
74
+ COLLECTION_NAME = "documents"
75
+ RRF_K = 60
76
+ DEFAULT_PORT = 8037
77
+
78
+ # Chunk sizes by doc_type
79
+ CHUNK_CONFIG = {
80
+ "legal": {"max_chars": 2500, "overlap": 400},
81
+ "financial": {"max_chars": 2500, "overlap": 400},
82
+ "governance": {"max_chars": 2500, "overlap": 400},
83
+ "technical": {"max_chars": 2000, "overlap": 300},
84
+ "general": {"max_chars": 1500, "overlap": 200},
85
+ }
86
+
87
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
88
+ log = logging.getLogger("l6-document-store")
89
+
90
+ # ---------------------------------------------------------------------------
91
+ # Embedding
92
+ # ---------------------------------------------------------------------------
93
+
94
+ _embed_client = httpx.Client(timeout=60)
95
+
96
+ def embed_text(text: str) -> List[float]:
97
+ """Get embedding — NV-Embed-v2 primary, Ollama fallback."""
98
+ if NV_EMBED_ENABLED:
99
+ try:
100
+ resp = _embed_client.post(NV_EMBED_URL, json={"input": text[:4000]})
101
+ resp.raise_for_status()
102
+ return resp.json()["data"][0]["embedding"]
103
+ except Exception as e:
104
+ log.warning(f"NV-Embed-v2 failed, falling back to Ollama: {e}")
105
+
106
+ # Ollama fallback
107
+ resp = _embed_client.post(
108
+ f"{OLLAMA_URL}/api/embeddings",
109
+ json={"model": EMBED_MODEL, "prompt": text[:8000]},
110
+ )
111
+ resp.raise_for_status()
112
+ return resp.json()["embedding"]
113
+
114
+
115
+ def embed_batch(texts: List[str]) -> List[List[float]]:
116
+ """Embed a batch of texts — NV-Embed-v2 supports native batching."""
117
+ if NV_EMBED_ENABLED:
118
+ try:
119
+ resp = _embed_client.post(NV_EMBED_URL, json={"input": [t[:4000] for t in texts]})
120
+ resp.raise_for_status()
121
+ return [d["embedding"] for d in resp.json()["data"]]
122
+ except Exception as e:
123
+ log.warning(f"NV-Embed-v2 batch failed, falling back to sequential: {e}")
124
+
125
+ return [embed_text(t) for t in texts]
126
+
127
+ # ---------------------------------------------------------------------------
128
+ # Cross-Encoder Reranker
129
+ # ---------------------------------------------------------------------------
130
+
131
+ _reranker = None
132
+ _reranker_loaded = False
133
+
134
+ def get_reranker():
135
+ """Lazy-load cross-encoder reranker."""
136
+ global _reranker, _reranker_loaded
137
+ if not _reranker_loaded:
138
+ try:
139
+ from sentence_transformers import CrossEncoder
140
+ _reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
141
+ log.info("Cross-encoder reranker loaded (ms-marco-MiniLM-L-6-v2)")
142
+ except Exception as e:
143
+ log.warning(f"Cross-encoder not available: {e}")
144
+ _reranker = None
145
+ _reranker_loaded = True
146
+ return _reranker
147
+
148
+
149
+ def rerank(query: str, results: List[Dict], top_k: int = 10) -> List[Dict]:
150
+ """Rerank results using cross-encoder."""
151
+ reranker = get_reranker()
152
+ if not reranker or not results:
153
+ return results[:top_k]
154
+
155
+ pairs = [(query, r["text"][:512]) for r in results[:20]]
156
+ scores = reranker.predict(pairs)
157
+
158
+ for i, r in enumerate(results[:20]):
159
+ r["reranker_score"] = float(scores[i])
160
+
161
+ results[:20] = sorted(results[:20], key=lambda x: x.get("reranker_score", -999), reverse=True)
162
+ return results[:top_k]
163
+
164
+ # ---------------------------------------------------------------------------
165
+ # Entity Extraction (ingest-time)
166
+ # ---------------------------------------------------------------------------
167
+
168
+ def extract_entities(text: str) -> List[str]:
169
+ """Extract entities from text using Ollama graph-preflexor."""
170
+ try:
171
+ resp = _embed_client.post(
172
+ f"{OLLAMA_URL}/api/generate",
173
+ json={
174
+ "model": "graph-preflexor",
175
+ "prompt": f"Extract all named entities (people, companies, products, places, dates) from this text. Return ONLY a JSON array of strings, nothing else.\n\nText: {text[:2000]}",
176
+ "stream": False,
177
+ },
178
+ timeout=15,
179
+ )
180
+ if resp.status_code == 200:
181
+ raw = resp.json().get("response", "")
182
+ # Try to parse JSON array from response
183
+ match = re.search(r'\[.*?\]', raw, re.DOTALL)
184
+ if match:
185
+ entities = json.loads(match.group())
186
+ return [str(e).strip() for e in entities if e and len(str(e).strip()) > 1][:20]
187
+ except Exception as e:
188
+ log.debug(f"Entity extraction failed: {e}")
189
+ return []
190
+
191
+ # ---------------------------------------------------------------------------
192
+ # Document Processing
193
+ # ---------------------------------------------------------------------------
194
+
195
+ def detect_doc_type(path: str) -> str:
196
+ """Detect document type from path patterns."""
197
+ p = path.lower()
198
+ if any(k in p for k in ["legal", "contract", "nda", "agreement", "terms"]):
199
+ return "legal"
200
+ if any(k in p for k in ["finance", "financial", "investor", "revenue", "budget", "portfolio"]):
201
+ return "financial"
202
+ if any(k in p for k in ["governance", "policy", "compliance", "audit"]):
203
+ return "governance"
204
+ if any(k in p for k in ["technical", "architecture", "api", "schema", "code"]):
205
+ return "technical"
206
+ return "general"
207
+
208
+
209
+ def detect_arena(path: str) -> str:
210
+ """Detect arena/domain from path patterns."""
211
+ p = path.lower()
212
+ if "company" in p or "internal" in p:
213
+ return "company"
214
+ if "project" in p or "proj-" in p:
215
+ return "project"
216
+ if "sarai" in p or "defence" in p:
217
+ return "sarai"
218
+ if "research" in p:
219
+ return "research"
220
+ if "finance" in p or "portfolio" in p or "stock" in p:
221
+ return "finance"
222
+ return "general"
223
+
224
+
225
+ def content_hash(text: str) -> str:
226
+ """SHA256 hash for dedup."""
227
+ return hashlib.sha256(text.encode()).hexdigest()[:16]
228
+
229
+
230
+ def chunk_markdown(text: str, doc_type: str = "general") -> List[Dict]:
231
+ """Split markdown into chunks with adaptive sizing."""
232
+ cfg = CHUNK_CONFIG.get(doc_type, CHUNK_CONFIG["general"])
233
+ max_chars = cfg["max_chars"]
234
+ overlap = cfg["overlap"]
235
+
236
+ chunks = []
237
+ current_heading = ""
238
+
239
+ # Split on ## or ### headings
240
+ sections = re.split(r'(^#{2,3}\s+.+$)', text, flags=re.MULTILINE)
241
+
242
+ current_text = ""
243
+ for part in sections:
244
+ if re.match(r'^#{2,3}\s+', part):
245
+ # Save previous section
246
+ if current_text.strip():
247
+ chunks.extend(_split_section(current_text.strip(), current_heading, max_chars, overlap))
248
+ current_heading = part.strip().lstrip('#').strip()
249
+ current_text = ""
250
+ else:
251
+ current_text += part
252
+
253
+ # Don't forget last section
254
+ if current_text.strip():
255
+ chunks.extend(_split_section(current_text.strip(), current_heading, max_chars, overlap))
256
+
257
+ # If no headings found, chunk the whole thing
258
+ if not chunks and text.strip():
259
+ chunks = _split_section(text.strip(), "", max_chars, overlap)
260
+
261
+ return chunks
262
+
263
+
264
+ def _split_section(text: str, heading: str, max_chars: int, overlap: int) -> List[Dict]:
265
+ """Split a section into overlapping chunks."""
266
+ if len(text) <= max_chars:
267
+ return [{"text": text, "heading": heading}]
268
+
269
+ chunks = []
270
+ start = 0
271
+ while start < len(text):
272
+ end = start + max_chars
273
+
274
+ # Try to break at paragraph boundary
275
+ if end < len(text):
276
+ para_break = text.rfind('\n\n', start, end)
277
+ if para_break > start + max_chars // 2:
278
+ end = para_break
279
+
280
+ chunk_text = text[start:end].strip()
281
+ if chunk_text:
282
+ chunks.append({"text": chunk_text, "heading": heading})
283
+
284
+ start = end - overlap
285
+ if start >= len(text):
286
+ break
287
+
288
+ return chunks
289
+
290
+ # ---------------------------------------------------------------------------
291
+ # Milvus Operations
292
+ # ---------------------------------------------------------------------------
293
+
294
+ def get_milvus() -> MilvusClient:
295
+ """Get or create Milvus client."""
296
+ DATA_DIR.mkdir(parents=True, exist_ok=True)
297
+ client = MilvusClient(uri=MILVUS_DB)
298
+
299
+ if COLLECTION_NAME not in client.list_collections():
300
+ schema = CollectionSchema(fields=[
301
+ FieldSchema("id", DataType.VARCHAR, is_primary=True, max_length=64),
302
+ FieldSchema("vector", DataType.FLOAT_VECTOR, dim=EMBED_DIM),
303
+ FieldSchema("text", DataType.VARCHAR, max_length=16000),
304
+ FieldSchema("source_file", DataType.VARCHAR, max_length=500),
305
+ FieldSchema("arena", DataType.VARCHAR, max_length=60),
306
+ FieldSchema("doc_type", DataType.VARCHAR, max_length=30),
307
+ FieldSchema("heading", DataType.VARCHAR, max_length=300),
308
+ FieldSchema("chunk_index", DataType.INT64),
309
+ FieldSchema("content_hash", DataType.VARCHAR, max_length=20),
310
+ FieldSchema("entities_json", DataType.VARCHAR, max_length=2000),
311
+ FieldSchema("indexed_at", DataType.VARCHAR, max_length=30),
312
+ ])
313
+ client.create_collection(
314
+ collection_name=COLLECTION_NAME,
315
+ schema=schema,
316
+ )
317
+ # Create index
318
+ idx = IndexParams()
319
+ idx.add_index(field_name="vector", index_type="AUTOINDEX", metric_type="COSINE")
320
+ client.create_index(collection_name=COLLECTION_NAME, index_params=idx)
321
+ client.load_collection(COLLECTION_NAME)
322
+ log.info(f"Created Milvus collection '{COLLECTION_NAME}'")
323
+
324
+ return client
325
+
326
+
327
+ def search_vector(client: MilvusClient, query_vec: List[float], limit: int = 20,
328
+ arena: Optional[str] = None) -> List[Dict]:
329
+ """Vector similarity search."""
330
+ filter_expr = f'arena == "{arena}"' if arena else ""
331
+ results = client.search(
332
+ collection_name=COLLECTION_NAME,
333
+ data=[query_vec],
334
+ limit=limit,
335
+ output_fields=["text", "source_file", "arena", "doc_type", "heading",
336
+ "chunk_index", "content_hash", "entities_json", "indexed_at"],
337
+ filter=filter_expr if filter_expr else None,
338
+ )
339
+ out = []
340
+ for hits in results:
341
+ for hit in hits:
342
+ entity = hit.get("entity", {})
343
+ out.append({
344
+ "text": entity.get("text", ""),
345
+ "source_file": entity.get("source_file", ""),
346
+ "arena": entity.get("arena", ""),
347
+ "doc_type": entity.get("doc_type", ""),
348
+ "heading": entity.get("heading", ""),
349
+ "chunk_index": entity.get("chunk_index", 0),
350
+ "content_hash": entity.get("content_hash", ""),
351
+ "entities": _parse_entities_json(entity.get("entities_json", "[]")),
352
+ "score": hit.get("distance", 0),
353
+ "engine": "vector",
354
+ })
355
+ return out
356
+
357
+ # ---------------------------------------------------------------------------
358
+ # FTS5 Operations
359
+ # ---------------------------------------------------------------------------
360
+
361
+ def get_fts_db() -> sqlite3.Connection:
362
+ """Get or create FTS5 database."""
363
+ DATA_DIR.mkdir(parents=True, exist_ok=True)
364
+ conn = sqlite3.connect(FTS_DB)
365
+ conn.execute("PRAGMA journal_mode=WAL")
366
+
367
+ # Create content table
368
+ conn.execute("""
369
+ CREATE TABLE IF NOT EXISTS chunks (
370
+ id TEXT PRIMARY KEY,
371
+ text TEXT,
372
+ source_file TEXT,
373
+ arena TEXT,
374
+ doc_type TEXT,
375
+ heading TEXT,
376
+ chunk_index INTEGER,
377
+ content_hash TEXT,
378
+ entities_json TEXT,
379
+ indexed_at TEXT
380
+ )
381
+ """)
382
+
383
+ # Create FTS5 virtual table
384
+ conn.execute("""
385
+ CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
386
+ text, source_file, arena, heading, entities_json,
387
+ content='chunks',
388
+ content_rowid='rowid'
389
+ )
390
+ """)
391
+
392
+ # Triggers for sync
393
+ conn.execute("""
394
+ CREATE TRIGGER IF NOT EXISTS chunks_ai AFTER INSERT ON chunks BEGIN
395
+ INSERT INTO chunks_fts(rowid, text, source_file, arena, heading, entities_json)
396
+ VALUES (new.rowid, new.text, new.source_file, new.arena, new.heading, new.entities_json);
397
+ END
398
+ """)
399
+ conn.execute("""
400
+ CREATE TRIGGER IF NOT EXISTS chunks_ad AFTER DELETE ON chunks BEGIN
401
+ INSERT INTO chunks_fts(chunks_fts, rowid, text, source_file, arena, heading, entities_json)
402
+ VALUES ('delete', old.rowid, old.text, old.source_file, old.arena, old.heading, old.entities_json);
403
+ END
404
+ """)
405
+
406
+ conn.commit()
407
+ return conn
408
+
409
+
410
+ def search_fts(conn: sqlite3.Connection, query: str, limit: int = 20,
411
+ arena: Optional[str] = None) -> List[Dict]:
412
+ """BM25 keyword search via FTS5."""
413
+ # Escape FTS5 special chars
414
+ safe_query = re.sub(r'[^\w\s]', ' ', query).strip()
415
+ if not safe_query:
416
+ return []
417
+
418
+ arena_filter = f"AND c.arena = ?" if arena else ""
419
+ params = [safe_query, limit] if not arena else [safe_query, arena, limit]
420
+
421
+ sql = f"""
422
+ SELECT c.*, bm25(chunks_fts) as rank
423
+ FROM chunks_fts f
424
+ JOIN chunks c ON c.rowid = f.rowid
425
+ WHERE chunks_fts MATCH ?
426
+ {arena_filter}
427
+ ORDER BY rank
428
+ LIMIT ?
429
+ """
430
+
431
+ try:
432
+ rows = conn.execute(sql, params).fetchall()
433
+ except sqlite3.OperationalError as e:
434
+ log.warning(f"FTS query failed: {e}")
435
+ return []
436
+
437
+ cols = ["id", "text", "source_file", "arena", "doc_type", "heading",
438
+ "chunk_index", "content_hash", "entities_json", "indexed_at", "rank"]
439
+ out = []
440
+ for row in rows:
441
+ d = dict(zip(cols, row))
442
+ out.append({
443
+ "text": d.get("text", ""),
444
+ "source_file": d.get("source_file", ""),
445
+ "arena": d.get("arena", ""),
446
+ "doc_type": d.get("doc_type", ""),
447
+ "heading": d.get("heading", ""),
448
+ "chunk_index": d.get("chunk_index", 0),
449
+ "content_hash": d.get("content_hash", ""),
450
+ "entities": _parse_entities_json(d.get("entities_json", "[]")),
451
+ "score": -d.get("rank", 0), # BM25 returns negative scores
452
+ "engine": "bm25",
453
+ })
454
+ return out
455
+
456
+ # ---------------------------------------------------------------------------
457
+ # RRF Fusion
458
+ # ---------------------------------------------------------------------------
459
+
460
+ def rrf_fuse(vector_results: List[Dict], bm25_results: List[Dict]) -> List[Dict]:
461
+ """Reciprocal Rank Fusion combining vector and BM25 results."""
462
+ scored = {}
463
+
464
+ for rank, r in enumerate(vector_results):
465
+ key = (r["source_file"], r["chunk_index"])
466
+ if key not in scored:
467
+ scored[key] = {"result": r, "rrf_score": 0, "engines": set()}
468
+ scored[key]["rrf_score"] += 1.0 / (RRF_K + rank + 1)
469
+ scored[key]["engines"].add("vector")
470
+
471
+ for rank, r in enumerate(bm25_results):
472
+ key = (r["source_file"], r["chunk_index"])
473
+ if key not in scored:
474
+ scored[key] = {"result": r, "rrf_score": 0, "engines": set()}
475
+ scored[key]["rrf_score"] += 1.0 / (RRF_K + rank + 1)
476
+ scored[key]["engines"].add("bm25")
477
+
478
+ # Sort by RRF score
479
+ fused = sorted(scored.values(), key=lambda x: x["rrf_score"], reverse=True)
480
+
481
+ out = []
482
+ for item in fused:
483
+ r = item["result"]
484
+ r["rrf_score"] = round(item["rrf_score"], 6)
485
+ r["engine_count"] = len(item["engines"])
486
+ r["engines"] = list(item["engines"])
487
+ out.append(r)
488
+
489
+ return out
490
+
491
+ # ---------------------------------------------------------------------------
492
+ # Indexing
493
+ # ---------------------------------------------------------------------------
494
+
495
+ def index_documents(paths: List[str], arena: Optional[str] = None,
496
+ doc_type: Optional[str] = None,
497
+ extract_entities_flag: bool = True,
498
+ use_enhanced_ingest: bool = True) -> Dict:
499
+ """Index documents into both Milvus and FTS5.
500
+
501
+ Supports: .md, .txt, .markdown, .pdf (via enhanced_ingest)
502
+ """
503
+ milvus = get_milvus()
504
+ fts_conn = get_fts_db()
505
+
506
+ stats = {"files": 0, "chunks": 0, "entities_extracted": 0, "errors": 0, "skipped": 0,
507
+ "tables": 0, "semantic_chunks": 0}
508
+ now = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
509
+
510
+ for file_path in paths:
511
+ p = Path(file_path)
512
+ if not p.exists():
513
+ log.warning(f"File not found: {file_path}")
514
+ stats["errors"] += 1
515
+ continue
516
+
517
+ supported_exts = (
518
+ '.md', '.txt', '.markdown', '.pdf',
519
+ # enhanced_ingest formats
520
+ '.csv', '.json', '.yaml', '.yml', '.toml',
521
+ '.py', '.js', '.ts', '.go', '.rs', '.java', '.c', '.cpp', '.h',
522
+ '.docx', '.doc', '.pptx', '.ppt', '.xlsx', '.xls',
523
+ '.rtf', '.odt', '.epub', '.tex',
524
+ '.html', '.htm', '.xml',
525
+ '.ipynb',
526
+ )
527
+ if not p.suffix.lower() in supported_exts:
528
+ log.debug(f"Skipping unsupported: {file_path}")
529
+ stats["skipped"] += 1
530
+ continue
531
+
532
+ # Use enhanced ingest for PDFs (and optionally for all docs)
533
+ if p.suffix.lower() == '.pdf' or use_enhanced_ingest:
534
+ try:
535
+ from enhanced_ingest import ingest_document, Chunk
536
+ result = ingest_document(str(p), arena or detect_arena(str(p)))
537
+
538
+ file_arena = result["arena"]
539
+ file_doc_type = doc_type or detect_doc_type(str(p))
540
+ source_file = str(p)
541
+
542
+ # Purge old chunks
543
+ _purge_file(milvus, fts_conn, source_file)
544
+
545
+ milvus_batch = []
546
+ for chunk_data in result["chunks"]:
547
+ chunk_text = chunk_data["text"]
548
+ c_hash = content_hash(chunk_text)
549
+ idx = chunk_data["chunk_index"]
550
+ chunk_id = f"{c_hash}_{idx}"
551
+
552
+ # Track semantic vs fixed chunks
553
+ if chunk_data.get("metadata", {}).get("type") == "semantic":
554
+ stats["semantic_chunks"] += 1
555
+ if chunk_data.get("metadata", {}).get("type") == "table":
556
+ stats["tables"] += 1
557
+
558
+ # Extract entities
559
+ entities = []
560
+ if extract_entities_flag and len(chunk_text) > 50:
561
+ entities = extract_entities(chunk_text)
562
+ if entities:
563
+ stats["entities_extracted"] += len(entities)
564
+
565
+ entities_json = json.dumps(entities)
566
+ vector = embed_text(chunk_text)
567
+
568
+ milvus_batch.append({
569
+ "id": chunk_id,
570
+ "vector": vector,
571
+ "text": chunk_text[:15000],
572
+ "source_file": source_file[:500],
573
+ "arena": file_arena[:60],
574
+ "doc_type": file_doc_type[:30],
575
+ "heading": chunk_data.get("heading", "")[:300],
576
+ "chunk_index": idx,
577
+ "content_hash": c_hash,
578
+ "entities_json": entities_json[:2000],
579
+ "indexed_at": now,
580
+ })
581
+
582
+ fts_conn.execute(
583
+ "INSERT OR REPLACE INTO chunks VALUES (?,?,?,?,?,?,?,?,?,?)",
584
+ (chunk_id, chunk_text[:15000], source_file[:500], file_arena[:60],
585
+ file_doc_type[:30], chunk_data.get("heading", "")[:300], idx,
586
+ c_hash, entities_json[:2000], now),
587
+ )
588
+
589
+ if milvus_batch:
590
+ # pymilvus 2.6+ requires keyword args; old positional form
591
+ # silently no-ops which causes vector hits to be empty.
592
+ milvus.insert(collection_name=COLLECTION_NAME, data=milvus_batch)
593
+ fts_conn.commit()
594
+
595
+ stats["files"] += 1
596
+ stats["chunks"] += len(result["chunks"])
597
+ log.info(f"Indexed (enhanced): {p.name} — {len(result['chunks'])} chunks, {len(result.get('tables', []))} tables")
598
+ continue
599
+
600
+ except ImportError:
601
+ log.warning("enhanced_ingest not available, falling back to basic chunking")
602
+ except Exception as e:
603
+ log.error(f"Enhanced ingest failed for {file_path}: {e}")
604
+ if p.suffix.lower() == '.pdf':
605
+ stats["errors"] += 1
606
+ continue
607
+ # Fall through to basic chunking for non-PDFs
608
+
609
+ try:
610
+ text = p.read_text(errors="replace")
611
+ if len(text.strip()) < 20:
612
+ stats["skipped"] += 1
613
+ continue
614
+
615
+ file_arena = arena or detect_arena(str(p))
616
+ file_doc_type = doc_type or detect_doc_type(str(p))
617
+ source_file = str(p)
618
+
619
+ # Purge old chunks for this file (freshness-aware dedup)
620
+ _purge_file(milvus, fts_conn, source_file)
621
+
622
+ # Chunk the document
623
+ chunks = chunk_markdown(text, file_doc_type)
624
+
625
+ # Process each chunk
626
+ milvus_batch = []
627
+ for idx, chunk in enumerate(chunks):
628
+ chunk_text = chunk["text"]
629
+ c_hash = content_hash(chunk_text)
630
+ chunk_id = f"{c_hash}_{idx}"
631
+
632
+ # Extract entities (ingest-time)
633
+ entities = []
634
+ if extract_entities_flag and len(chunk_text) > 50:
635
+ entities = extract_entities(chunk_text)
636
+ if entities:
637
+ stats["entities_extracted"] += len(entities)
638
+
639
+ entities_json = json.dumps(entities)
640
+
641
+ # Embed
642
+ vector = embed_text(chunk_text)
643
+
644
+ # Prepare Milvus record
645
+ milvus_batch.append({
646
+ "id": chunk_id,
647
+ "vector": vector,
648
+ "text": chunk_text[:15000],
649
+ "source_file": source_file[:500],
650
+ "arena": file_arena[:60],
651
+ "doc_type": file_doc_type[:30],
652
+ "heading": chunk.get("heading", "")[:300],
653
+ "chunk_index": idx,
654
+ "content_hash": c_hash,
655
+ "entities_json": entities_json[:2000],
656
+ "indexed_at": now,
657
+ })
658
+
659
+ # Insert into FTS5
660
+ fts_conn.execute(
661
+ "INSERT OR REPLACE INTO chunks VALUES (?,?,?,?,?,?,?,?,?,?)",
662
+ (chunk_id, chunk_text[:15000], source_file[:500], file_arena[:60],
663
+ file_doc_type[:30], chunk.get("heading", "")[:300], idx,
664
+ c_hash, entities_json[:2000], now),
665
+ )
666
+
667
+ stats["chunks"] += 1
668
+
669
+ # Batch insert into Milvus
670
+ if milvus_batch:
671
+ milvus.upsert(collection_name=COLLECTION_NAME, data=milvus_batch)
672
+
673
+ stats["files"] += 1
674
+ log.info(f"Indexed {p.name}: {len(chunks)} chunks, arena={file_arena}, type={file_doc_type}")
675
+
676
+ except Exception as e:
677
+ log.error(f"Error indexing {file_path}: {e}")
678
+ stats["errors"] += 1
679
+
680
+ fts_conn.commit()
681
+ fts_conn.close()
682
+ return stats
683
+
684
+
685
+ def _purge_file(milvus: MilvusClient, fts_conn: sqlite3.Connection, source_file: str):
686
+ """Remove all chunks for a source file (freshness-aware re-index)."""
687
+ try:
688
+ # Purge from Milvus
689
+ milvus.delete(
690
+ collection_name=COLLECTION_NAME,
691
+ filter=f'source_file == "{source_file}"',
692
+ )
693
+ except Exception as e:
694
+ log.debug(f"Milvus purge (may be empty): {e}")
695
+
696
+ try:
697
+ # Purge from FTS
698
+ fts_conn.execute("DELETE FROM chunks WHERE source_file = ?", (source_file,))
699
+ except Exception as e:
700
+ log.debug(f"FTS purge: {e}")
701
+
702
+
703
+ def _parse_entities_json(s: str) -> List[str]:
704
+ """Safely parse entities JSON."""
705
+ try:
706
+ return json.loads(s) if s else []
707
+ except (json.JSONDecodeError, TypeError):
708
+ return []
709
+
710
+ # ---------------------------------------------------------------------------
711
+ # Search
712
+ # ---------------------------------------------------------------------------
713
+
714
+ def search(query: str, method: str = "hybrid", limit: int = 10,
715
+ arena: Optional[str] = None, enable_rerank: bool = True) -> List[Dict]:
716
+ """Search documents with specified method."""
717
+
718
+ if method == "vector":
719
+ vec = embed_text(query)
720
+ results = search_vector(get_milvus(), vec, limit=limit, arena=arena)
721
+ elif method == "bm25":
722
+ results = search_fts(get_fts_db(), query, limit=limit, arena=arena)
723
+ else:
724
+ # Hybrid: RRF fusion
725
+ vec = embed_text(query)
726
+ vector_results = search_vector(get_milvus(), vec, limit=20, arena=arena)
727
+ bm25_results = search_fts(get_fts_db(), query, limit=20, arena=arena)
728
+ results = rrf_fuse(vector_results, bm25_results)
729
+
730
+ # Rerank if enabled
731
+ if enable_rerank and len(results) > 1:
732
+ results = rerank(query, results, top_k=limit)
733
+
734
+ return results[:limit]
735
+
736
+ # ---------------------------------------------------------------------------
737
+ # Stats & Health
738
+ # ---------------------------------------------------------------------------
739
+
740
+ def get_stats() -> Dict:
741
+ """Get index statistics."""
742
+ stats = {"vector_chunks": 0, "fts_chunks": 0, "arenas": {}, "doc_types": {}}
743
+
744
+ try:
745
+ milvus = get_milvus()
746
+ info = milvus.get_collection_stats(COLLECTION_NAME)
747
+ stats["vector_chunks"] = info.get("row_count", 0)
748
+ except Exception:
749
+ pass
750
+
751
+ try:
752
+ conn = get_fts_db()
753
+ row = conn.execute("SELECT COUNT(*) FROM chunks").fetchone()
754
+ stats["fts_chunks"] = row[0] if row else 0
755
+
756
+ for row in conn.execute("SELECT arena, COUNT(*) FROM chunks GROUP BY arena").fetchall():
757
+ stats["arenas"][row[0]] = row[1]
758
+
759
+ for row in conn.execute("SELECT doc_type, COUNT(*) FROM chunks GROUP BY doc_type").fetchall():
760
+ stats["doc_types"][row[0]] = row[1]
761
+
762
+ conn.close()
763
+ except Exception:
764
+ pass
765
+
766
+ return stats
767
+
768
+
769
+ def health() -> Dict:
770
+ """Health check."""
771
+ status = {"status": "ok", "milvus": "unknown", "fts": "unknown", "ollama": "unknown", "reranker": "unknown"}
772
+
773
+ # Milvus
774
+ try:
775
+ client = get_milvus()
776
+ colls = client.list_collections()
777
+ status["milvus"] = f"ok ({len(colls)} collections)"
778
+ except Exception as e:
779
+ status["milvus"] = f"error: {e}"
780
+ status["status"] = "degraded"
781
+
782
+ # FTS
783
+ try:
784
+ conn = get_fts_db()
785
+ cnt = conn.execute("SELECT COUNT(*) FROM chunks").fetchone()[0]
786
+ status["fts"] = f"ok ({cnt} chunks)"
787
+ conn.close()
788
+ except Exception as e:
789
+ status["fts"] = f"error: {e}"
790
+ status["status"] = "degraded"
791
+
792
+ # Ollama
793
+ try:
794
+ resp = _embed_client.get(f"{OLLAMA_URL}/api/tags", timeout=5)
795
+ status["ollama"] = "ok" if resp.status_code == 200 else f"http {resp.status_code}"
796
+ except Exception as e:
797
+ status["ollama"] = f"error: {e}"
798
+ status["status"] = "degraded"
799
+
800
+ # Reranker
801
+ reranker = get_reranker()
802
+ status["reranker"] = "loaded" if reranker else "unavailable (CPU fallback to RRF)"
803
+
804
+ return status
805
+
806
+ # ---------------------------------------------------------------------------
807
+ # FastAPI Server
808
+ # ---------------------------------------------------------------------------
809
+
810
+ def serve(port: int = DEFAULT_PORT):
811
+ """Run as HTTP API server."""
812
+ from fastapi import FastAPI, Query as Q, HTTPException
813
+ from pydantic import BaseModel
814
+ import uvicorn
815
+
816
+ api = FastAPI(title="L6 Document Store", version="1.0.0")
817
+
818
+ class IndexRequest(BaseModel):
819
+ paths: List[str]
820
+ arena: Optional[str] = None
821
+ doc_type: Optional[str] = None
822
+ extract_entities: bool = True
823
+
824
+ @api.get("/health")
825
+ def api_health():
826
+ return health()
827
+
828
+ @api.get("/stats")
829
+ def api_stats():
830
+ return get_stats()
831
+
832
+ @api.get("/search")
833
+ def api_search(
834
+ q: str = Q(..., description="Search query"),
835
+ method: str = Q("hybrid", description="hybrid|vector|bm25"),
836
+ limit: int = Q(10, ge=1, le=50),
837
+ arena: Optional[str] = Q(None),
838
+ rerank: bool = Q(True),
839
+ ):
840
+ results = search(q, method=method, limit=limit, arena=arena, enable_rerank=rerank)
841
+ return {"query": q, "method": method, "results": results, "count": len(results)}
842
+
843
+ @api.post("/search")
844
+ def api_search_post(
845
+ q: str,
846
+ method: str = "hybrid",
847
+ limit: int = 10,
848
+ arena: Optional[str] = None,
849
+ rerank: bool = True,
850
+ ):
851
+ """POST version of search for compatibility."""
852
+ results = search(q, method=method, limit=limit, arena=arena, enable_rerank=rerank)
853
+ return {"query": q, "method": method, "results": results, "count": len(results)}
854
+
855
+ @api.post("/index")
856
+ def api_index(req: IndexRequest):
857
+ stats = index_documents(
858
+ req.paths, arena=req.arena, doc_type=req.doc_type,
859
+ extract_entities_flag=req.extract_entities,
860
+ )
861
+ return {"status": "ok", "stats": stats}
862
+
863
+ @api.post("/index-batch")
864
+ def api_index_batch(req: dict):
865
+ """Index a batch of in-memory documents in a single batched
866
+ NV-Embed call + a single milvus insert + one FTS write.
867
+
868
+ Roughly 30-50x faster than calling /index for the equivalent
869
+ files because the legacy path does one embed roundtrip per
870
+ chunk. This endpoint exists for tests, smoke runs and bench
871
+ harnesses where small corpora need to land quickly.
872
+
873
+ Request body::
874
+
875
+ {
876
+ "arena": "benchmark",
877
+ "records": [
878
+ {
879
+ "id": "doc1", # required, becomes chunk id prefix
880
+ "text": "…", # required, indexed as one chunk
881
+ "source_file": "doc1.md", # optional
882
+ "doc_type": "general", # optional, default "general"
883
+ "heading": "…" # optional
884
+ }, …
885
+ ]
886
+ }
887
+
888
+ Returns::
889
+
890
+ {"status": "ok", "inserted": N, "embed_ms": float, "insert_ms": float}
891
+ """
892
+ import time as _time, hashlib as _hashlib, httpx as _httpx
893
+ from datetime import datetime as _dt, timezone as _tz
894
+
895
+ records = req.get("records") or []
896
+ arena = req.get("arena") or "general"
897
+ if not records:
898
+ return {"status": "ok", "inserted": 0}
899
+
900
+ texts = [(r.get("text") or "")[:16000] for r in records]
901
+
902
+ # Single batched embed call (OpenAI-compat first, lambda-gateway fallback).
903
+ t0 = _time.time()
904
+ try:
905
+ embs = _embed_post(texts)
906
+ except Exception as exc:
907
+ raise HTTPException(status_code=500, detail=f"embed failed: {exc}")
908
+ embed_ms = (_time.time() - t0) * 1000.0
909
+
910
+ # Single milvus insert.
911
+ milvus = get_milvus()
912
+ now = _dt.now(_tz.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
913
+ rows = []
914
+ for r, emb, txt in zip(records, embs, texts):
915
+ if emb is None:
916
+ continue
917
+ rid = r.get("id") or _hashlib.sha1(txt.encode("utf-8")).hexdigest()[:32]
918
+ chunk_id = f"l6:{rid}:0"[:63]
919
+ rows.append({
920
+ "id": chunk_id,
921
+ "vector": emb,
922
+ "text": txt,
923
+ "source_file": (r.get("source_file") or f"{rid}.md")[:500],
924
+ "arena": arena[:60],
925
+ "doc_type": (r.get("doc_type") or "general")[:30],
926
+ "heading": (r.get("heading") or "")[:300],
927
+ "chunk_index": 0,
928
+ "content_hash": _hashlib.sha1(txt.encode("utf-8")).hexdigest()[:20],
929
+ "entities_json": "[]",
930
+ "indexed_at": now,
931
+ })
932
+ t1 = _time.time()
933
+ if rows:
934
+ milvus.insert(collection_name=COLLECTION_NAME, data=rows)
935
+ insert_ms = (_time.time() - t1) * 1000.0
936
+
937
+ # Single FTS write (best-effort — search still works without it).
938
+ try:
939
+ fts_conn = get_fts_db()
940
+ for r, txt in zip(records, texts):
941
+ rid = r.get("id") or _hashlib.sha1(txt.encode("utf-8")).hexdigest()[:32]
942
+ fts_conn.execute(
943
+ "INSERT INTO chunks_fts(text, source_file, arena, heading, entities_json) "
944
+ "VALUES (?, ?, ?, ?, ?)",
945
+ (txt, (r.get("source_file") or f"{rid}.md"), arena,
946
+ (r.get("heading") or ""), "[]"),
947
+ )
948
+ fts_conn.commit()
949
+ fts_conn.close()
950
+ except Exception as exc:
951
+ log.warning("FTS write failed in /index-batch: %s", exc)
952
+
953
+ return {
954
+ "status": "ok",
955
+ "inserted": len(rows),
956
+ "embed_ms": round(embed_ms, 1),
957
+ "insert_ms": round(insert_ms, 1),
958
+ }
959
+
960
+ @api.delete("/purge")
961
+ def api_purge(source_file: str = Q(...)):
962
+ """Remove all chunks for a source file."""
963
+ milvus = get_milvus()
964
+ fts_conn = get_fts_db()
965
+ _purge_file(milvus, fts_conn, source_file)
966
+ fts_conn.commit()
967
+ fts_conn.close()
968
+ return {"status": "purged", "source_file": source_file}
969
+
970
+ @api.post("/rebuild-index")
971
+ def api_rebuild():
972
+ """Force Milvus index rebuild."""
973
+ milvus = get_milvus()
974
+ milvus.release_collection(COLLECTION_NAME)
975
+ milvus.load_collection(COLLECTION_NAME)
976
+ return {"status": "rebuilt"}
977
+
978
+ log.info(f"L6 Document Store — http://127.0.0.1:{port}")
979
+ uvicorn.run(api, host=os.environ.get("HOST","127.0.0.1"), port=port, log_level="info")
980
+
981
+ # ---------------------------------------------------------------------------
982
+ # CLI
983
+ # ---------------------------------------------------------------------------
984
+
985
+ def main():
986
+ parser = argparse.ArgumentParser(description="L6 Document Store")
987
+ parser.add_argument("command", choices=["serve", "index", "search", "health", "stats"])
988
+ parser.add_argument("args", nargs="*")
989
+ parser.add_argument("--port", "-p", type=int, default=DEFAULT_PORT)
990
+ parser.add_argument("--arena", "-a", type=str, default=None)
991
+ parser.add_argument("--doc-type", "-t", type=str, default=None)
992
+ parser.add_argument("--method", "-m", type=str, default="hybrid")
993
+ parser.add_argument("--limit", "-l", type=int, default=10)
994
+ parser.add_argument("--no-entities", action="store_true")
995
+ parser.add_argument("--no-rerank", action="store_true")
996
+
997
+ args = parser.parse_args()
998
+
999
+ if args.command == "serve":
1000
+ serve(port=args.port)
1001
+
1002
+ elif args.command == "index":
1003
+ paths = args.args
1004
+ if not paths:
1005
+ print("Usage: l6-document-store.py index <file1.md> [file2.md ...]")
1006
+ print(" l6-document-store.py index ~/memory/research/*.md")
1007
+ return
1008
+ stats = index_documents(paths, arena=args.arena, doc_type=args.doc_type,
1009
+ extract_entities_flag=not args.no_entities)
1010
+ print(json.dumps(stats, indent=2))
1011
+
1012
+ elif args.command == "search":
1013
+ query = " ".join(args.args) if args.args else ""
1014
+ if not query:
1015
+ print("Usage: l6-document-store.py search 'your query'")
1016
+ return
1017
+ results = search(query, method=args.method, limit=args.limit,
1018
+ arena=args.arena, enable_rerank=not args.no_rerank)
1019
+ for i, r in enumerate(results, 1):
1020
+ print(f"\n--- [{i}] {r.get('source_file','?')} (rrf={r.get('rrf_score',0):.4f}, engines={r.get('engines','?')}) ---")
1021
+ if r.get("heading"):
1022
+ print(f"Heading: {r['heading']}")
1023
+ if r.get("entities"):
1024
+ print(f"Entities: {', '.join(r['entities'][:10])}")
1025
+ print(r["text"][:300])
1026
+
1027
+ elif args.command == "health":
1028
+ print(json.dumps(health(), indent=2))
1029
+
1030
+ elif args.command == "stats":
1031
+ print(json.dumps(get_stats(), indent=2))
1032
+
1033
+
1034
+ if __name__ == "__main__":
1035
+ main()