@pentatonic-ai/ai-agent-sdk 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. package/README.md +3 -3
  2. package/bin/cli.js +1 -1
  3. package/bin/commands/config.js +1 -1
  4. package/dist/index.cjs +1 -1
  5. package/dist/index.js +1 -1
  6. package/package.json +2 -2
  7. package/packages/doctor/src/checks/local-memory.js +2 -2
  8. package/packages/memory/README.md +2 -2
  9. package/packages/memory/openclaw-plugin/README.md +2 -2
  10. package/packages/memory/openclaw-plugin/openclaw.plugin.json +1 -1
  11. package/packages/memory/src/server.js +2 -2
  12. package/packages/memory-engine-v2/.env.example +30 -0
  13. package/packages/memory-engine-v2/README.md +125 -0
  14. package/packages/memory-engine-v2/compat/Dockerfile +11 -0
  15. package/packages/memory-engine-v2/compat/requirements.txt +6 -0
  16. package/packages/memory-engine-v2/compat/server.py +1047 -0
  17. package/packages/memory-engine-v2/docker-compose.aws.yml +78 -0
  18. package/packages/memory-engine-v2/docker-compose.yml +206 -0
  19. package/packages/memory-engine-v2/extractor-async/Dockerfile +14 -0
  20. package/packages/memory-engine-v2/extractor-async/confidence.py +62 -0
  21. package/packages/memory-engine-v2/extractor-async/noise_filter.py +144 -0
  22. package/packages/memory-engine-v2/extractor-async/requirements.txt +2 -0
  23. package/packages/memory-engine-v2/extractor-async/test_confidence.py +76 -0
  24. package/packages/memory-engine-v2/extractor-async/test_noise_filter.py +177 -0
  25. package/packages/memory-engine-v2/extractor-async/worker.py +797 -0
  26. package/packages/memory-engine-v2/extractor-sync/Dockerfile +11 -0
  27. package/packages/memory-engine-v2/extractor-sync/requirements.txt +4 -0
  28. package/packages/memory-engine-v2/extractor-sync/server.py +424 -0
  29. package/packages/memory-engine-v2/org-model/migrations/001_init.sql +390 -0
  30. package/packages/memory-engine-v2/tests/e2e_smoke.py +356 -0
  31. package/packages/memory-engine-v2/tests/fixtures/generate_synthetic_corpus.py +758 -0
  32. package/packages/memory-engine/.env.example +0 -13
  33. package/packages/memory-engine/MIGRATION.md +0 -219
  34. package/packages/memory-engine/README.md +0 -145
  35. package/packages/memory-engine/bench/README.md +0 -99
  36. package/packages/memory-engine/bench/scorecards-engine/agent-coding__pentatonic-baseline__20260427-142523.json +0 -1115
  37. package/packages/memory-engine/bench/scorecards-engine/chat-recall__pentatonic-baseline__20260427-142648.json +0 -819
  38. package/packages/memory-engine/bench/scorecards-engine/circular-economy__pentatonic-baseline__20260427-142757.json +0 -1278
  39. package/packages/memory-engine/bench/scorecards-engine/customer-support__pentatonic-baseline__20260427-142900.json +0 -1018
  40. package/packages/memory-engine/bench/scorecards-engine/marketplace-ops__pentatonic-baseline__20260427-142957.json +0 -1038
  41. package/packages/memory-engine/bench/scorecards-engine/product-catalogue__pentatonic-baseline__20260427-143122.json +0 -961
  42. package/packages/memory-engine/bench/scorecards-engine-via-docker/agent-coding__pentatonic-memory__20260427-161812.json +0 -1115
  43. package/packages/memory-engine/bench/scorecards-engine-via-docker/chat-recall__pentatonic-memory__20260427-161701.json +0 -819
  44. package/packages/memory-engine/bench/scorecards-engine-via-docker/circular-economy__pentatonic-memory__20260427-161713.json +0 -1278
  45. package/packages/memory-engine/bench/scorecards-engine-via-docker/customer-support__pentatonic-memory__20260427-161723.json +0 -1018
  46. package/packages/memory-engine/bench/scorecards-engine-via-docker/marketplace-ops__pentatonic-memory__20260427-161732.json +0 -1038
  47. package/packages/memory-engine/bench/scorecards-engine-via-docker/product-catalogue__pentatonic-memory__20260427-161741.json +0 -937
  48. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/agent-coding__pentatonic-memory__20260427-184718.json +0 -1115
  49. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/chat-recall__pentatonic-memory__20260427-184614.json +0 -819
  50. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/circular-economy__pentatonic-memory__20260427-184809.json +0 -1278
  51. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/customer-support__pentatonic-memory__20260427-184854.json +0 -1018
  52. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/marketplace-ops__pentatonic-memory__20260427-184929.json +0 -1038
  53. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/product-catalogue__pentatonic-memory__20260427-185015.json +0 -961
  54. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/agent-coding__pentatonic-memory__20260427-175252.json +0 -1115
  55. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/chat-recall__pentatonic-memory__20260427-175312.json +0 -819
  56. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/circular-economy__pentatonic-memory__20260427-175335.json +0 -1278
  57. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/customer-support__pentatonic-memory__20260427-175355.json +0 -1018
  58. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/marketplace-ops__pentatonic-memory__20260427-175413.json +0 -1038
  59. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/product-catalogue__pentatonic-memory__20260427-175430.json +0 -883
  60. package/packages/memory-engine/bench/scorecards-engine-via-shim/agent-coding__pentatonic-memory__20260427-155409.json +0 -1115
  61. package/packages/memory-engine/bench/scorecards-engine-via-shim/chat-recall__pentatonic-memory__20260427-155421.json +0 -819
  62. package/packages/memory-engine/bench/scorecards-engine-via-shim/circular-economy__pentatonic-memory__20260427-155433.json +0 -1278
  63. package/packages/memory-engine/bench/scorecards-engine-via-shim/customer-support__pentatonic-memory__20260427-155443.json +0 -1018
  64. package/packages/memory-engine/bench/scorecards-engine-via-shim/marketplace-ops__pentatonic-memory__20260427-155453.json +0 -1038
  65. package/packages/memory-engine/bench/scorecards-engine-via-shim/product-catalogue__pentatonic-memory__20260427-155503.json +0 -937
  66. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory-latest__20260427-145103.json +0 -1115
  67. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory__20260427-144909.json +0 -1115
  68. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory-latest__20260427-145153.json +0 -819
  69. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory__20260427-145120.json +0 -542
  70. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory-latest__20260427-145313.json +0 -1278
  71. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory__20260427-145207.json +0 -894
  72. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory-latest__20260427-145412.json +0 -1018
  73. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory__20260427-145327.json +0 -680
  74. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory-latest__20260427-145517.json +0 -1038
  75. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory__20260427-145422.json +0 -693
  76. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory-latest__20260427-145616.json +0 -961
  77. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory__20260427-145528.json +0 -727
  78. package/packages/memory-engine/compat/Dockerfile +0 -22
  79. package/packages/memory-engine/compat/server.py +0 -1255
  80. package/packages/memory-engine/docker-compose.test.yml +0 -59
  81. package/packages/memory-engine/docker-compose.yml +0 -255
  82. package/packages/memory-engine/engine/README.md +0 -52
  83. package/packages/memory-engine/engine/l2-hybridrag-proxy.py +0 -1543
  84. package/packages/memory-engine/engine/l5-comms-layer.py +0 -663
  85. package/packages/memory-engine/engine/l6-document-store.py +0 -1018
  86. package/packages/memory-engine/engine/services/_shared/__init__.py +0 -1
  87. package/packages/memory-engine/engine/services/_shared/embed_provider.py +0 -562
  88. package/packages/memory-engine/engine/services/l2/Dockerfile +0 -50
  89. package/packages/memory-engine/engine/services/l2/init_databases.py +0 -81
  90. package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py +0 -2721
  91. package/packages/memory-engine/engine/services/l5/Dockerfile +0 -11
  92. package/packages/memory-engine/engine/services/l5/l5-comms-layer.py +0 -808
  93. package/packages/memory-engine/engine/services/l6/Dockerfile +0 -30
  94. package/packages/memory-engine/engine/services/l6/l6-document-store.py +0 -1221
  95. package/packages/memory-engine/engine/services/nv-embed/Dockerfile +0 -28
  96. package/packages/memory-engine/engine/services/nv-embed/server.py +0 -152
  97. package/packages/memory-engine/pme_memory/__init__.py +0 -0
  98. package/packages/memory-engine/pme_memory/__main__.py +0 -129
  99. package/packages/memory-engine/pme_memory/artifacts.py +0 -95
  100. package/packages/memory-engine/pme_memory/embed.py +0 -74
  101. package/packages/memory-engine/pme_memory/health.py +0 -36
  102. package/packages/memory-engine/pme_memory/hygiene.py +0 -159
  103. package/packages/memory-engine/pme_memory/indexer.py +0 -200
  104. package/packages/memory-engine/pme_memory/needs.py +0 -55
  105. package/packages/memory-engine/pme_memory/provenance.py +0 -80
  106. package/packages/memory-engine/pme_memory/scoring.py +0 -168
  107. package/packages/memory-engine/pme_memory/search.py +0 -52
  108. package/packages/memory-engine/pme_memory/store.py +0 -86
  109. package/packages/memory-engine/pme_memory/synthesis.py +0 -114
  110. package/packages/memory-engine/pyproject.toml +0 -65
  111. package/packages/memory-engine/scripts/kg-extractor.py +0 -557
  112. package/packages/memory-engine/scripts/kg-preflexor-v2.py +0 -738
  113. package/packages/memory-engine/scripts/wipe-legacy-l3-entities.py +0 -128
  114. package/packages/memory-engine/tests/e2e_arena.sh +0 -259
  115. package/packages/memory-engine/tests/embed_stub/Dockerfile +0 -13
  116. package/packages/memory-engine/tests/embed_stub/server.py +0 -80
  117. package/packages/memory-engine/tests/test_aggregate.py +0 -333
  118. package/packages/memory-engine/tests/test_api_contract.sh +0 -57
  119. package/packages/memory-engine/tests/test_arena_safety.py +0 -232
  120. package/packages/memory-engine/tests/test_channel_stat_reader.py +0 -437
  121. package/packages/memory-engine/tests/test_channel_stat_rollups.py +0 -308
  122. package/packages/memory-engine/tests/test_compat_nv_embed_probe.py +0 -48
  123. package/packages/memory-engine/tests/test_embed_provider.py +0 -693
  124. package/packages/memory-engine/tests/test_l2_qmd_vec_search.py +0 -280
  125. package/packages/memory-engine/tests/test_l3_arena_isolation.py +0 -412
  126. package/packages/memory-engine/tests/test_l6_module_load.py +0 -84
  127. package/packages/memory-engine/tests/test_people_list_reader.py +0 -432
@@ -1,1221 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- L6 Document Store — HybridRAG for Document Retrieval
4
-
5
- Features:
6
- - Milvus Lite (vector) + SQLite FTS5 (BM25) + RRF fusion
7
- - Cross-encoder reranker (ms-marco-MiniLM-L-6-v2)
8
- - Ingest-time entity extraction via Ollama graph-preflexor
9
- - Adaptive chunk sizing by doc_type
10
- - Freshness-aware dedup (purge-and-replace on re-index)
11
- - Confidence scoring (RRF + engine_count + reranker_score)
12
-
13
- Port: 8037
14
- """
15
-
16
- import argparse
17
- import hashlib
18
- import json
19
- import logging
20
- import os
21
- import re
22
- import sqlite3
23
- import sys
24
- import time
25
- from datetime import datetime, timezone
26
- from pathlib import Path
27
- from typing import Any, Dict, List, Optional, Tuple
28
-
29
- import httpx
30
- from pymilvus import MilvusClient, DataType, CollectionSchema, FieldSchema
31
- from pymilvus.milvus_client.index import IndexParams
32
-
33
- # Shared embed client lives at engine/services/_shared/.
34
- sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
35
- from _shared.embed_provider import EmbedClient # noqa: E402
36
-
37
- # ---------------------------------------------------------------------------
38
- # Config
39
- # ---------------------------------------------------------------------------
40
-
41
- DATA_DIR = Path(os.environ.get("L6_DATA_DIR", str(Path.home() / "l6-document-store" / "data")))
42
- MILVUS_DB = str(DATA_DIR / "documents.db")
43
- FTS_DB = str(DATA_DIR / "documents_fts.db")
44
- OLLAMA_URL = os.environ.get("L6_OLLAMA_URL", "http://localhost:11434")
45
- NV_EMBED_ENABLED = os.environ.get("L6_NV_EMBED_ENABLED", "true").lower() == "true"
46
- EMBED_DIM = int(os.environ.get("L6_EMBED_DIM", "4096"))
47
-
48
- _embed: EmbedClient | None = None
49
-
50
-
51
- def _embed_client() -> EmbedClient:
52
- """Lazily build the shared EmbedClient for L6."""
53
- global _embed
54
- if _embed is None:
55
- _embed = EmbedClient.from_env(
56
- prefix="L6_",
57
- default_url="http://localhost:8041/v1/embeddings",
58
- default_model="nomic-embed-text",
59
- )
60
- return _embed
61
-
62
-
63
- def _embed_post(texts):
64
- """Embed a batch of texts via the shared EmbedClient. Provider profile
65
- chosen by L6_EMBED_PROVIDER env (openai | pentatonic-gateway | cohere
66
- | custom). See engine/services/_shared/embed_provider.py for details."""
67
- return _embed_client().embed_batch(texts)
68
-
69
- COLLECTION_NAME = "documents"
70
- RRF_K = 60
71
- DEFAULT_PORT = 8037
72
-
73
- # Chunk sizes by doc_type
74
- CHUNK_CONFIG = {
75
- "legal": {"max_chars": 2500, "overlap": 400},
76
- "financial": {"max_chars": 2500, "overlap": 400},
77
- "governance": {"max_chars": 2500, "overlap": 400},
78
- "technical": {"max_chars": 2000, "overlap": 300},
79
- "general": {"max_chars": 1500, "overlap": 200},
80
- }
81
-
82
- logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
83
- log = logging.getLogger("l6-document-store")
84
-
85
- # ---------------------------------------------------------------------------
86
- # Embedding
87
- # ---------------------------------------------------------------------------
88
-
89
- # HTTP client for Ollama entity extraction (extract_entities below). Named
90
- # `_ollama_http`, not `_embed_client`, because the embedding HTTP client now
91
- # lives behind the EmbedClient factory above — sharing the `_embed_client`
92
- # identifier caused a TypeError in v0.8.0–0.8.2 where the legacy module-level
93
- # binding shadowed the factory function.
94
- _ollama_http = httpx.Client(timeout=60)
95
-
96
- def embed_text(text: str) -> List[float]:
97
- """Single-text embed via _embed_post (OpenAI-compat first, lambda-gateway fallback)."""
98
- return _embed_post([text[:8000]])[0]
99
-
100
-
101
- def embed_batch(texts: List[str]) -> List[List[float]]:
102
- """Batched embed via _embed_post."""
103
- return _embed_post([t[:8000] for t in texts])
104
-
105
- # ---------------------------------------------------------------------------
106
- # Cross-Encoder Reranker
107
- # ---------------------------------------------------------------------------
108
-
109
- _reranker = None
110
- _reranker_loaded = False
111
-
112
- def get_reranker():
113
- """Lazy-load cross-encoder reranker."""
114
- global _reranker, _reranker_loaded
115
- if not _reranker_loaded:
116
- try:
117
- from sentence_transformers import CrossEncoder
118
- _reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
119
- log.info("Cross-encoder reranker loaded (ms-marco-MiniLM-L-6-v2)")
120
- except Exception as e:
121
- log.warning(f"Cross-encoder not available: {e}")
122
- _reranker = None
123
- _reranker_loaded = True
124
- return _reranker
125
-
126
-
127
- def rerank(query: str, results: List[Dict], top_k: int = 10) -> List[Dict]:
128
- """Rerank results using cross-encoder."""
129
- reranker = get_reranker()
130
- if not reranker or not results:
131
- return results[:top_k]
132
-
133
- pairs = [(query, r["text"][:512]) for r in results[:20]]
134
- scores = reranker.predict(pairs)
135
-
136
- for i, r in enumerate(results[:20]):
137
- r["reranker_score"] = float(scores[i])
138
-
139
- results[:20] = sorted(results[:20], key=lambda x: x.get("reranker_score", -999), reverse=True)
140
- return results[:top_k]
141
-
142
- # ---------------------------------------------------------------------------
143
- # Entity Extraction (ingest-time)
144
- # ---------------------------------------------------------------------------
145
-
146
- def extract_entities(text: str) -> List[str]:
147
- """Extract entities from text using Ollama graph-preflexor."""
148
- try:
149
- resp = _ollama_http.post(
150
- f"{OLLAMA_URL}/api/generate",
151
- json={
152
- "model": "graph-preflexor",
153
- "prompt": f"Extract all named entities (people, companies, products, places, dates) from this text. Return ONLY a JSON array of strings, nothing else.\n\nText: {text[:2000]}",
154
- "stream": False,
155
- },
156
- timeout=15,
157
- )
158
- if resp.status_code == 200:
159
- raw = resp.json().get("response", "")
160
- # Try to parse JSON array from response
161
- match = re.search(r'\[.*?\]', raw, re.DOTALL)
162
- if match:
163
- entities = json.loads(match.group())
164
- return [str(e).strip() for e in entities if e and len(str(e).strip()) > 1][:20]
165
- except Exception as e:
166
- log.debug(f"Entity extraction failed: {e}")
167
- return []
168
-
169
- # ---------------------------------------------------------------------------
170
- # Document Processing
171
- # ---------------------------------------------------------------------------
172
-
173
- def detect_doc_type(path: str) -> str:
174
- """Detect document type from path patterns."""
175
- p = path.lower()
176
- if any(k in p for k in ["legal", "contract", "nda", "agreement", "terms"]):
177
- return "legal"
178
- if any(k in p for k in ["finance", "financial", "investor", "revenue", "budget", "portfolio"]):
179
- return "financial"
180
- if any(k in p for k in ["governance", "policy", "compliance", "audit"]):
181
- return "governance"
182
- if any(k in p for k in ["technical", "architecture", "api", "schema", "code"]):
183
- return "technical"
184
- return "general"
185
-
186
-
187
- def detect_arena(path: str) -> str:
188
- """Detect arena/domain from path patterns."""
189
- p = path.lower()
190
- if "company" in p or "internal" in p:
191
- return "company"
192
- if "project" in p or "proj-" in p:
193
- return "project"
194
- if "sarai" in p or "defence" in p:
195
- return "sarai"
196
- if "research" in p:
197
- return "research"
198
- if "finance" in p or "portfolio" in p or "stock" in p:
199
- return "finance"
200
- return "general"
201
-
202
-
203
- def content_hash(text: str) -> str:
204
- """SHA256 hash for dedup."""
205
- return hashlib.sha256(text.encode()).hexdigest()[:16]
206
-
207
-
208
- def chunk_markdown(text: str, doc_type: str = "general") -> List[Dict]:
209
- """Split markdown into chunks with adaptive sizing."""
210
- cfg = CHUNK_CONFIG.get(doc_type, CHUNK_CONFIG["general"])
211
- max_chars = cfg["max_chars"]
212
- overlap = cfg["overlap"]
213
-
214
- chunks = []
215
- current_heading = ""
216
-
217
- # Split on ## or ### headings
218
- sections = re.split(r'(^#{2,3}\s+.+$)', text, flags=re.MULTILINE)
219
-
220
- current_text = ""
221
- for part in sections:
222
- if re.match(r'^#{2,3}\s+', part):
223
- # Save previous section
224
- if current_text.strip():
225
- chunks.extend(_split_section(current_text.strip(), current_heading, max_chars, overlap))
226
- current_heading = part.strip().lstrip('#').strip()
227
- current_text = ""
228
- else:
229
- current_text += part
230
-
231
- # Don't forget last section
232
- if current_text.strip():
233
- chunks.extend(_split_section(current_text.strip(), current_heading, max_chars, overlap))
234
-
235
- # If no headings found, chunk the whole thing
236
- if not chunks and text.strip():
237
- chunks = _split_section(text.strip(), "", max_chars, overlap)
238
-
239
- return chunks
240
-
241
-
242
- def _split_section(text: str, heading: str, max_chars: int, overlap: int) -> List[Dict]:
243
- """Split a section into overlapping chunks."""
244
- if len(text) <= max_chars:
245
- return [{"text": text, "heading": heading}]
246
-
247
- chunks = []
248
- start = 0
249
- while start < len(text):
250
- end = start + max_chars
251
-
252
- # Try to break at paragraph boundary
253
- if end < len(text):
254
- para_break = text.rfind('\n\n', start, end)
255
- if para_break > start + max_chars // 2:
256
- end = para_break
257
-
258
- chunk_text = text[start:end].strip()
259
- if chunk_text:
260
- chunks.append({"text": chunk_text, "heading": heading})
261
-
262
- start = end - overlap
263
- if start >= len(text):
264
- break
265
-
266
- return chunks
267
-
268
- # ---------------------------------------------------------------------------
269
- # Milvus Operations
270
- # ---------------------------------------------------------------------------
271
-
272
- def get_milvus() -> MilvusClient:
273
- """Get or create Milvus client."""
274
- DATA_DIR.mkdir(parents=True, exist_ok=True)
275
- client = MilvusClient(uri=MILVUS_DB)
276
-
277
- if COLLECTION_NAME not in client.list_collections():
278
- schema = CollectionSchema(fields=[
279
- FieldSchema("id", DataType.VARCHAR, is_primary=True, max_length=64),
280
- FieldSchema("vector", DataType.FLOAT_VECTOR, dim=EMBED_DIM),
281
- FieldSchema("text", DataType.VARCHAR, max_length=16000),
282
- FieldSchema("source_file", DataType.VARCHAR, max_length=500),
283
- FieldSchema("arena", DataType.VARCHAR, max_length=60),
284
- FieldSchema("doc_type", DataType.VARCHAR, max_length=30),
285
- FieldSchema("heading", DataType.VARCHAR, max_length=300),
286
- FieldSchema("chunk_index", DataType.INT64),
287
- FieldSchema("content_hash", DataType.VARCHAR, max_length=20),
288
- FieldSchema("entities_json", DataType.VARCHAR, max_length=2000),
289
- FieldSchema("indexed_at", DataType.VARCHAR, max_length=30),
290
- ])
291
- client.create_collection(
292
- collection_name=COLLECTION_NAME,
293
- schema=schema,
294
- )
295
- # Create index
296
- idx = IndexParams()
297
- idx.add_index(field_name="vector", index_type="AUTOINDEX", metric_type="COSINE")
298
- client.create_index(collection_name=COLLECTION_NAME, index_params=idx)
299
- client.load_collection(COLLECTION_NAME)
300
- log.info(f"Created Milvus collection '{COLLECTION_NAME}'")
301
-
302
- return client
303
-
304
-
305
- def search_vector(client: MilvusClient, query_vec: List[float], limit: int = 20,
306
- arena: Optional[str] = None,
307
- arenas: Optional[List[str]] = None) -> List[Dict]:
308
- """Vector similarity search.
309
-
310
- Multi-arena: pass `arenas=[...]` to span more than one tenant scope
311
- (e.g. tenant-wide + a single user-scope). Builds an `arena IN [...]`
312
- Milvus filter. `arena` is treated as a single-element list when set.
313
- """
314
- arena_list = list(arenas) if arenas else ([arena] if arena else [])
315
- if len(arena_list) == 1:
316
- safe = str(arena_list[0]).replace('"', '\\"')
317
- filter_expr = f'arena == "{safe}"'
318
- elif len(arena_list) > 1:
319
- quoted = ", ".join(
320
- '"{}"'.format(str(a).replace('"', '\\"')) for a in arena_list
321
- )
322
- filter_expr = f'arena in [{quoted}]'
323
- else:
324
- filter_expr = ""
325
- results = client.search(
326
- collection_name=COLLECTION_NAME,
327
- data=[query_vec],
328
- limit=limit,
329
- output_fields=["text", "source_file", "arena", "doc_type", "heading",
330
- "chunk_index", "content_hash", "entities_json", "indexed_at"],
331
- filter=filter_expr if filter_expr else None,
332
- )
333
- out = []
334
- for hits in results:
335
- for hit in hits:
336
- entity = hit.get("entity", {})
337
- out.append({
338
- "text": entity.get("text", ""),
339
- "source_file": entity.get("source_file", ""),
340
- "arena": entity.get("arena", ""),
341
- "doc_type": entity.get("doc_type", ""),
342
- "heading": entity.get("heading", ""),
343
- "chunk_index": entity.get("chunk_index", 0),
344
- "content_hash": entity.get("content_hash", ""),
345
- "entities": _parse_entities_json(entity.get("entities_json", "[]")),
346
- "score": hit.get("distance", 0),
347
- "engine": "vector",
348
- })
349
- return out
350
-
351
- # ---------------------------------------------------------------------------
352
- # FTS5 Operations
353
- # ---------------------------------------------------------------------------
354
-
355
- def get_fts_db() -> sqlite3.Connection:
356
- """Get or create FTS5 database."""
357
- DATA_DIR.mkdir(parents=True, exist_ok=True)
358
- conn = sqlite3.connect(FTS_DB)
359
- conn.execute("PRAGMA journal_mode=WAL")
360
-
361
- # Create content table
362
- conn.execute("""
363
- CREATE TABLE IF NOT EXISTS chunks (
364
- id TEXT PRIMARY KEY,
365
- text TEXT,
366
- source_file TEXT,
367
- arena TEXT,
368
- doc_type TEXT,
369
- heading TEXT,
370
- chunk_index INTEGER,
371
- content_hash TEXT,
372
- entities_json TEXT,
373
- indexed_at TEXT
374
- )
375
- """)
376
-
377
- # Create FTS5 virtual table
378
- conn.execute("""
379
- CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
380
- text, source_file, arena, heading, entities_json,
381
- content='chunks',
382
- content_rowid='rowid'
383
- )
384
- """)
385
-
386
- # Triggers for sync
387
- conn.execute("""
388
- CREATE TRIGGER IF NOT EXISTS chunks_ai AFTER INSERT ON chunks BEGIN
389
- INSERT INTO chunks_fts(rowid, text, source_file, arena, heading, entities_json)
390
- VALUES (new.rowid, new.text, new.source_file, new.arena, new.heading, new.entities_json);
391
- END
392
- """)
393
- conn.execute("""
394
- CREATE TRIGGER IF NOT EXISTS chunks_ad AFTER DELETE ON chunks BEGIN
395
- INSERT INTO chunks_fts(chunks_fts, rowid, text, source_file, arena, heading, entities_json)
396
- VALUES ('delete', old.rowid, old.text, old.source_file, old.arena, old.heading, old.entities_json);
397
- END
398
- """)
399
-
400
- conn.commit()
401
- return conn
402
-
403
-
404
- def search_fts(conn: sqlite3.Connection, query: str, limit: int = 20,
405
- arena: Optional[str] = None,
406
- arenas: Optional[List[str]] = None) -> List[Dict]:
407
- """BM25 keyword search via FTS5.
408
-
409
- Multi-arena: pass `arenas=[...]` to OR multiple `c.arena = ?` clauses,
410
- so a single search can span tenant-wide + own user-scope.
411
- """
412
- # Escape FTS5 special chars
413
- safe_query = re.sub(r'[^\w\s]', ' ', query).strip()
414
- if not safe_query:
415
- return []
416
-
417
- arena_list = list(arenas) if arenas else ([arena] if arena else [])
418
- if arena_list:
419
- placeholders = ", ".join(["?"] * len(arena_list))
420
- arena_filter = f"AND c.arena IN ({placeholders})"
421
- params = [safe_query, *arena_list, limit]
422
- else:
423
- arena_filter = ""
424
- params = [safe_query, limit]
425
-
426
- sql = f"""
427
- SELECT c.*, bm25(chunks_fts) as rank
428
- FROM chunks_fts f
429
- JOIN chunks c ON c.rowid = f.rowid
430
- WHERE chunks_fts MATCH ?
431
- {arena_filter}
432
- ORDER BY rank
433
- LIMIT ?
434
- """
435
-
436
- try:
437
- rows = conn.execute(sql, params).fetchall()
438
- except sqlite3.OperationalError as e:
439
- log.warning(f"FTS query failed: {e}")
440
- return []
441
-
442
- cols = ["id", "text", "source_file", "arena", "doc_type", "heading",
443
- "chunk_index", "content_hash", "entities_json", "indexed_at", "rank"]
444
- out = []
445
- for row in rows:
446
- d = dict(zip(cols, row))
447
- out.append({
448
- "text": d.get("text", ""),
449
- "source_file": d.get("source_file", ""),
450
- "arena": d.get("arena", ""),
451
- "doc_type": d.get("doc_type", ""),
452
- "heading": d.get("heading", ""),
453
- "chunk_index": d.get("chunk_index", 0),
454
- "content_hash": d.get("content_hash", ""),
455
- "entities": _parse_entities_json(d.get("entities_json", "[]")),
456
- "score": -d.get("rank", 0), # BM25 returns negative scores
457
- "engine": "bm25",
458
- })
459
- return out
460
-
461
- # ---------------------------------------------------------------------------
462
- # RRF Fusion
463
- # ---------------------------------------------------------------------------
464
-
465
- def rrf_fuse(vector_results: List[Dict], bm25_results: List[Dict]) -> List[Dict]:
466
- """Reciprocal Rank Fusion combining vector and BM25 results."""
467
- scored = {}
468
-
469
- for rank, r in enumerate(vector_results):
470
- key = (r["source_file"], r["chunk_index"])
471
- if key not in scored:
472
- scored[key] = {"result": r, "rrf_score": 0, "engines": set()}
473
- scored[key]["rrf_score"] += 1.0 / (RRF_K + rank + 1)
474
- scored[key]["engines"].add("vector")
475
-
476
- for rank, r in enumerate(bm25_results):
477
- key = (r["source_file"], r["chunk_index"])
478
- if key not in scored:
479
- scored[key] = {"result": r, "rrf_score": 0, "engines": set()}
480
- scored[key]["rrf_score"] += 1.0 / (RRF_K + rank + 1)
481
- scored[key]["engines"].add("bm25")
482
-
483
- # Sort by RRF score
484
- fused = sorted(scored.values(), key=lambda x: x["rrf_score"], reverse=True)
485
-
486
- out = []
487
- for item in fused:
488
- r = item["result"]
489
- r["rrf_score"] = round(item["rrf_score"], 6)
490
- r["engine_count"] = len(item["engines"])
491
- r["engines"] = list(item["engines"])
492
- out.append(r)
493
-
494
- return out
495
-
496
- # ---------------------------------------------------------------------------
497
- # Indexing
498
- # ---------------------------------------------------------------------------
499
-
500
- def index_documents(paths: List[str], arena: Optional[str] = None,
501
- doc_type: Optional[str] = None,
502
- extract_entities_flag: bool = True,
503
- use_enhanced_ingest: bool = True) -> Dict:
504
- """Index documents into both Milvus and FTS5.
505
-
506
- Supports: .md, .txt, .markdown, .pdf (via enhanced_ingest)
507
- """
508
- milvus = get_milvus()
509
- fts_conn = get_fts_db()
510
-
511
- stats = {"files": 0, "chunks": 0, "entities_extracted": 0, "errors": 0, "skipped": 0,
512
- "tables": 0, "semantic_chunks": 0}
513
- now = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
514
-
515
- for file_path in paths:
516
- p = Path(file_path)
517
- if not p.exists():
518
- log.warning(f"File not found: {file_path}")
519
- stats["errors"] += 1
520
- continue
521
-
522
- supported_exts = (
523
- '.md', '.txt', '.markdown', '.pdf',
524
- # enhanced_ingest formats
525
- '.csv', '.json', '.yaml', '.yml', '.toml',
526
- '.py', '.js', '.ts', '.go', '.rs', '.java', '.c', '.cpp', '.h',
527
- '.docx', '.doc', '.pptx', '.ppt', '.xlsx', '.xls',
528
- '.rtf', '.odt', '.epub', '.tex',
529
- '.html', '.htm', '.xml',
530
- '.ipynb',
531
- )
532
- if not p.suffix.lower() in supported_exts:
533
- log.debug(f"Skipping unsupported: {file_path}")
534
- stats["skipped"] += 1
535
- continue
536
-
537
- # Use enhanced ingest for PDFs (and optionally for all docs)
538
- if p.suffix.lower() == '.pdf' or use_enhanced_ingest:
539
- try:
540
- from enhanced_ingest import ingest_document, Chunk
541
- result = ingest_document(str(p), arena or detect_arena(str(p)))
542
-
543
- file_arena = result["arena"]
544
- file_doc_type = doc_type or detect_doc_type(str(p))
545
- source_file = str(p)
546
-
547
- # Purge old chunks
548
- _purge_file(milvus, fts_conn, source_file)
549
-
550
- milvus_batch = []
551
- for chunk_data in result["chunks"]:
552
- chunk_text = chunk_data["text"]
553
- c_hash = content_hash(chunk_text)
554
- idx = chunk_data["chunk_index"]
555
- chunk_id = f"{c_hash}_{idx}"
556
-
557
- # Track semantic vs fixed chunks
558
- if chunk_data.get("metadata", {}).get("type") == "semantic":
559
- stats["semantic_chunks"] += 1
560
- if chunk_data.get("metadata", {}).get("type") == "table":
561
- stats["tables"] += 1
562
-
563
- # Extract entities
564
- entities = []
565
- if extract_entities_flag and len(chunk_text) > 50:
566
- entities = extract_entities(chunk_text)
567
- if entities:
568
- stats["entities_extracted"] += len(entities)
569
-
570
- entities_json = json.dumps(entities)
571
- vector = embed_text(chunk_text)
572
-
573
- milvus_batch.append({
574
- "id": chunk_id,
575
- "vector": vector,
576
- "text": chunk_text[:15000],
577
- "source_file": source_file[:500],
578
- "arena": file_arena[:60],
579
- "doc_type": file_doc_type[:30],
580
- "heading": chunk_data.get("heading", "")[:300],
581
- "chunk_index": idx,
582
- "content_hash": c_hash,
583
- "entities_json": entities_json[:2000],
584
- "indexed_at": now,
585
- })
586
-
587
- fts_conn.execute(
588
- "INSERT OR REPLACE INTO chunks VALUES (?,?,?,?,?,?,?,?,?,?)",
589
- (chunk_id, chunk_text[:15000], source_file[:500], file_arena[:60],
590
- file_doc_type[:30], chunk_data.get("heading", "")[:300], idx,
591
- c_hash, entities_json[:2000], now),
592
- )
593
-
594
- if milvus_batch:
595
- # pymilvus 2.6+ requires keyword args; old positional form
596
- # silently no-ops which causes vector hits to be empty.
597
- milvus.insert(collection_name=COLLECTION_NAME, data=milvus_batch)
598
- fts_conn.commit()
599
-
600
- stats["files"] += 1
601
- stats["chunks"] += len(result["chunks"])
602
- log.info(f"Indexed (enhanced): {p.name} — {len(result['chunks'])} chunks, {len(result.get('tables', []))} tables")
603
- continue
604
-
605
- except ImportError:
606
- log.warning("enhanced_ingest not available, falling back to basic chunking")
607
- except Exception as e:
608
- log.error(f"Enhanced ingest failed for {file_path}: {e}")
609
- if p.suffix.lower() == '.pdf':
610
- stats["errors"] += 1
611
- continue
612
- # Fall through to basic chunking for non-PDFs
613
-
614
- try:
615
- text = p.read_text(errors="replace")
616
- if len(text.strip()) < 20:
617
- stats["skipped"] += 1
618
- continue
619
-
620
- file_arena = arena or detect_arena(str(p))
621
- file_doc_type = doc_type or detect_doc_type(str(p))
622
- source_file = str(p)
623
-
624
- # Purge old chunks for this file (freshness-aware dedup)
625
- _purge_file(milvus, fts_conn, source_file)
626
-
627
- # Chunk the document
628
- chunks = chunk_markdown(text, file_doc_type)
629
-
630
- # Process each chunk
631
- milvus_batch = []
632
- for idx, chunk in enumerate(chunks):
633
- chunk_text = chunk["text"]
634
- c_hash = content_hash(chunk_text)
635
- chunk_id = f"{c_hash}_{idx}"
636
-
637
- # Extract entities (ingest-time)
638
- entities = []
639
- if extract_entities_flag and len(chunk_text) > 50:
640
- entities = extract_entities(chunk_text)
641
- if entities:
642
- stats["entities_extracted"] += len(entities)
643
-
644
- entities_json = json.dumps(entities)
645
-
646
- # Embed
647
- vector = embed_text(chunk_text)
648
-
649
- # Prepare Milvus record
650
- milvus_batch.append({
651
- "id": chunk_id,
652
- "vector": vector,
653
- "text": chunk_text[:15000],
654
- "source_file": source_file[:500],
655
- "arena": file_arena[:60],
656
- "doc_type": file_doc_type[:30],
657
- "heading": chunk.get("heading", "")[:300],
658
- "chunk_index": idx,
659
- "content_hash": c_hash,
660
- "entities_json": entities_json[:2000],
661
- "indexed_at": now,
662
- })
663
-
664
- # Insert into FTS5
665
- fts_conn.execute(
666
- "INSERT OR REPLACE INTO chunks VALUES (?,?,?,?,?,?,?,?,?,?)",
667
- (chunk_id, chunk_text[:15000], source_file[:500], file_arena[:60],
668
- file_doc_type[:30], chunk.get("heading", "")[:300], idx,
669
- c_hash, entities_json[:2000], now),
670
- )
671
-
672
- stats["chunks"] += 1
673
-
674
- # Batch insert into Milvus
675
- if milvus_batch:
676
- milvus.upsert(collection_name=COLLECTION_NAME, data=milvus_batch)
677
-
678
- stats["files"] += 1
679
- log.info(f"Indexed {p.name}: {len(chunks)} chunks, arena={file_arena}, type={file_doc_type}")
680
-
681
- except Exception as e:
682
- log.error(f"Error indexing {file_path}: {e}")
683
- stats["errors"] += 1
684
-
685
- fts_conn.commit()
686
- fts_conn.close()
687
- return stats
688
-
689
-
690
- def _purge_file(milvus: MilvusClient, fts_conn: sqlite3.Connection, source_file: str):
691
- """Remove all chunks for a source file (freshness-aware re-index)."""
692
- try:
693
- # Purge from Milvus
694
- milvus.delete(
695
- collection_name=COLLECTION_NAME,
696
- filter=f'source_file == "{source_file}"',
697
- )
698
- except Exception as e:
699
- log.debug(f"Milvus purge (may be empty): {e}")
700
-
701
- try:
702
- # Purge from FTS
703
- fts_conn.execute("DELETE FROM chunks WHERE source_file = ?", (source_file,))
704
- except Exception as e:
705
- log.debug(f"FTS purge: {e}")
706
-
707
-
708
- def _parse_entities_json(s: str) -> List[str]:
709
- """Safely parse entities JSON."""
710
- try:
711
- return json.loads(s) if s else []
712
- except (json.JSONDecodeError, TypeError):
713
- return []
714
-
715
- # ---------------------------------------------------------------------------
716
- # Search
717
- # ---------------------------------------------------------------------------
718
-
719
- def search(query: str, method: str = "hybrid", limit: int = 10,
720
- arena: Optional[str] = None,
721
- arenas: Optional[List[str]] = None,
722
- enable_rerank: bool = True) -> List[Dict]:
723
- """Search documents with specified method.
724
-
725
- arena / arenas: pass either; multi-arena lets a single query span
726
- multiple tenant scopes (tenant-wide + user-scope). Forwarded
727
- natively to both the vector path (Milvus `arena IN [...]`) and the
728
- BM25 path (SQLite `c.arena IN (...)`).
729
- """
730
- arena_list = list(arenas) if arenas else ([arena] if arena else [])
731
-
732
- if method == "vector":
733
- vec = embed_text(query)
734
- results = search_vector(get_milvus(), vec, limit=limit, arenas=arena_list)
735
- elif method == "bm25":
736
- results = search_fts(get_fts_db(), query, limit=limit, arenas=arena_list)
737
- else:
738
- # Hybrid: RRF fusion
739
- vec = embed_text(query)
740
- vector_results = search_vector(get_milvus(), vec, limit=20, arenas=arena_list)
741
- bm25_results = search_fts(get_fts_db(), query, limit=20, arenas=arena_list)
742
- results = rrf_fuse(vector_results, bm25_results)
743
-
744
- # Rerank if enabled
745
- if enable_rerank and len(results) > 1:
746
- results = rerank(query, results, top_k=limit)
747
-
748
- return results[:limit]
749
-
750
- # ---------------------------------------------------------------------------
751
- # Stats & Health
752
- # ---------------------------------------------------------------------------
753
-
754
- def get_stats() -> Dict:
755
- """Get index statistics."""
756
- stats = {"vector_chunks": 0, "fts_chunks": 0, "arenas": {}, "doc_types": {}}
757
-
758
- try:
759
- milvus = get_milvus()
760
- info = milvus.get_collection_stats(COLLECTION_NAME)
761
- stats["vector_chunks"] = info.get("row_count", 0)
762
- except Exception:
763
- pass
764
-
765
- try:
766
- conn = get_fts_db()
767
- row = conn.execute("SELECT COUNT(*) FROM chunks").fetchone()
768
- stats["fts_chunks"] = row[0] if row else 0
769
-
770
- for row in conn.execute("SELECT arena, COUNT(*) FROM chunks GROUP BY arena").fetchall():
771
- stats["arenas"][row[0]] = row[1]
772
-
773
- for row in conn.execute("SELECT doc_type, COUNT(*) FROM chunks GROUP BY doc_type").fetchall():
774
- stats["doc_types"][row[0]] = row[1]
775
-
776
- conn.close()
777
- except Exception:
778
- pass
779
-
780
- return stats
781
-
782
-
783
- def health() -> Dict:
784
- """Health check.
785
-
786
- Reports "ok" iff Milvus and the FTS sidecar both answer. Embeddings
787
- are NOT probed here — the compat shim's nv_embed entry covers that.
788
- Ollama was a legacy fallback that is not used in any deployment, so
789
- its previous probe was a false negative on prod.
790
- """
791
- out = {"status": "ok", "milvus": "unknown", "fts": "unknown", "reranker": "unknown"}
792
-
793
- # Milvus — vector store
794
- try:
795
- client = get_milvus()
796
- colls = client.list_collections()
797
- out["milvus"] = f"ok ({len(colls)} collections)"
798
- except Exception as e:
799
- out["milvus"] = f"error: {e}"
800
- out["status"] = "degraded"
801
-
802
- # FTS — keyword fallback over the same chunk set
803
- try:
804
- conn = get_fts_db()
805
- cnt = conn.execute("SELECT COUNT(*) FROM chunks").fetchone()[0]
806
- out["fts"] = f"ok ({cnt} chunks)"
807
- conn.close()
808
- except Exception as e:
809
- out["fts"] = f"error: {e}"
810
- out["status"] = "degraded"
811
-
812
- # Reranker — informational; CPU fallback to RRF is acceptable, so
813
- # don't degrade overall status when it's unavailable.
814
- reranker = get_reranker()
815
- out["reranker"] = "loaded" if reranker else "unavailable (CPU fallback to RRF)"
816
-
817
- return out
818
-
819
- # ---------------------------------------------------------------------------
820
- # FastAPI Server
821
- # ---------------------------------------------------------------------------
822
-
823
- def serve(port: int = DEFAULT_PORT):
824
- """Run as HTTP API server."""
825
- from fastapi import FastAPI, Query as Q, HTTPException
826
- from pydantic import BaseModel
827
- import uvicorn
828
-
829
- api = FastAPI(title="L6 Document Store", version="1.0.0")
830
-
831
- class IndexRequest(BaseModel):
832
- paths: List[str]
833
- arena: Optional[str] = None
834
- doc_type: Optional[str] = None
835
- extract_entities: bool = True
836
-
837
- @api.get("/health")
838
- def api_health():
839
- return health()
840
-
841
- @api.get("/health/deep")
842
- def api_health_deep():
843
- """Real functional probe: embed → insert via /index-batch path →
844
- search the sentinel via hybrid search → assert reranker loaded.
845
-
846
- Built to catch the v0.8.0–0.8.2 L6 _embed_client shadowing bug
847
- and its kind (request-handler-level breakage with the layer
848
- process appearing healthy)."""
849
- import time as _time
850
- sentinel_id = "__healthcheck__sentinel"
851
- sentinel_text = "healthcheck sentinel — L6 embed-write-search round-trip verifier"
852
- out = {"status": "ok", "ok": True}
853
- try:
854
- t0 = _time.time()
855
- try:
856
- emb = embed_text(sentinel_text)
857
- except Exception as exc:
858
- out["status"] = f"embed_failed: {type(exc).__name__}: {exc}"
859
- out["ok"] = False
860
- return out
861
- out["embed_ms"] = round((_time.time() - t0) * 1000.0, 1)
862
-
863
- # Insert via the same path real ingest uses, so the probe
864
- # actually exercises /index-batch's code.
865
- t1 = _time.time()
866
- import httpx as _httpx
867
- r = _httpx.post(
868
- f"http://localhost:{DEFAULT_PORT}/index-batch",
869
- json={
870
- "arena": "__healthcheck__",
871
- "records": [{"id": sentinel_id, "text": sentinel_text}],
872
- },
873
- timeout=15.0,
874
- )
875
- out["write_ms"] = round((_time.time() - t1) * 1000.0, 1)
876
- if r.status_code != 200:
877
- out["status"] = f"write_failed: http {r.status_code}"
878
- out["ok"] = False
879
- return out
880
-
881
- t2 = _time.time()
882
- results = search(
883
- sentinel_text, method="hybrid", limit=3,
884
- arena="__healthcheck__", enable_rerank=False,
885
- )
886
- out["search_ms"] = round((_time.time() - t2) * 1000.0, 1)
887
- # L6 search hits don't expose a stable `id` field — match by
888
- # text content (verbatim — the sentinel was just written and
889
- # the chunking layer doesn't split it) against the arena that
890
- # the search was already filtered to.
891
- hit = any(
892
- sentinel_text in (r.get("text") or "")
893
- for r in (results or [])
894
- )
895
- out["hit"] = hit
896
- if not hit:
897
- out["status"] = "sentinel_missing"
898
- out["ok"] = False
899
-
900
- # Reranker check — informational. Failure here doesn't flip
901
- # ok=False because L6 falls back to RRF and still serves
902
- # results; it just caps the recall ranking quality.
903
- out["reranker"] = "ok" if get_reranker() is not None else "rrf_fallback"
904
- except Exception as exc:
905
- out["status"] = f"probe_error: {type(exc).__name__}: {exc}"
906
- out["ok"] = False
907
- return out
908
-
909
- @api.get("/stats")
910
- def api_stats():
911
- return get_stats()
912
-
913
- @api.get("/search")
914
- def api_search(
915
- q: str = Q(..., description="Search query"),
916
- method: str = Q("hybrid", description="hybrid|vector|bm25"),
917
- limit: int = Q(10, ge=1, le=50),
918
- arena: Optional[str] = Q(None),
919
- arenas: List[str] = Q(default=[]),
920
- rerank: bool = Q(True),
921
- ):
922
- results = search(
923
- q, method=method, limit=limit,
924
- arena=arena, arenas=arenas or None,
925
- enable_rerank=rerank,
926
- )
927
- return {"query": q, "method": method, "results": results, "count": len(results)}
928
-
929
- @api.post("/search")
930
- def api_search_post(
931
- q: str,
932
- method: str = "hybrid",
933
- limit: int = 10,
934
- arena: Optional[str] = None,
935
- arenas: Optional[List[str]] = None,
936
- rerank: bool = True,
937
- ):
938
- """POST version of search for compatibility."""
939
- results = search(
940
- q, method=method, limit=limit,
941
- arena=arena, arenas=arenas,
942
- enable_rerank=rerank,
943
- )
944
- return {"query": q, "method": method, "results": results, "count": len(results)}
945
-
946
- @api.post("/index")
947
- def api_index(req: IndexRequest):
948
- stats = index_documents(
949
- req.paths, arena=req.arena, doc_type=req.doc_type,
950
- extract_entities_flag=req.extract_entities,
951
- )
952
- return {"status": "ok", "stats": stats}
953
-
954
- @api.post("/index-batch")
955
- def api_index_batch(req: dict):
956
- """Index a batch of in-memory documents in a single batched
957
- NV-Embed call + a single milvus insert + one FTS write.
958
-
959
- Roughly 30-50x faster than calling /index for the equivalent
960
- files because the legacy path does one embed roundtrip per
961
- chunk. This endpoint exists for tests, smoke runs and bench
962
- harnesses where small corpora need to land quickly.
963
-
964
- Request body::
965
-
966
- {
967
- "arena": "benchmark",
968
- "records": [
969
- {
970
- "id": "doc1", # required, becomes chunk id prefix
971
- "text": "…", # required, indexed as one chunk
972
- "source_file": "doc1.md", # optional
973
- "doc_type": "general", # optional, default "general"
974
- "heading": "…" # optional
975
- }, …
976
- ]
977
- }
978
-
979
- Returns::
980
-
981
- {"status": "ok", "inserted": N, "embed_ms": float, "insert_ms": float}
982
- """
983
- import time as _time, hashlib as _hashlib, httpx as _httpx
984
- from datetime import datetime as _dt, timezone as _tz
985
-
986
- records = req.get("records") or []
987
- arena = req.get("arena") or "general"
988
- if not records:
989
- return {"status": "ok", "inserted": 0}
990
-
991
- texts = [(r.get("text") or "")[:16000] for r in records]
992
-
993
- # Shared-embed shortcut: caller (compat shim) computed vectors
994
- # once and forwards them so we skip the embed RPC. Length must
995
- # match records — fall back to per-layer embed if it doesn't.
996
- shared_embs = req.get("embeddings")
997
- t0 = _time.time()
998
- if isinstance(shared_embs, list) and len(shared_embs) == len(records):
999
- embs = shared_embs
1000
- else:
1001
- try:
1002
- embs = _embed_post(texts)
1003
- except Exception as exc:
1004
- raise HTTPException(status_code=500, detail=f"embed failed: {exc}")
1005
- embed_ms = (_time.time() - t0) * 1000.0
1006
-
1007
- # Single milvus insert.
1008
- milvus = get_milvus()
1009
- now = _dt.now(_tz.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
1010
- rows = []
1011
- for r, emb, txt in zip(records, embs, texts):
1012
- if emb is None:
1013
- continue
1014
- rid = r.get("id") or _hashlib.sha1(txt.encode("utf-8")).hexdigest()[:32]
1015
- chunk_id = f"l6:{rid}:0"[:63]
1016
- rows.append({
1017
- "id": chunk_id,
1018
- "vector": emb,
1019
- "text": txt,
1020
- "source_file": (r.get("source_file") or f"{rid}.md")[:500],
1021
- "arena": arena[:60],
1022
- "doc_type": (r.get("doc_type") or "general")[:30],
1023
- "heading": (r.get("heading") or "")[:300],
1024
- "chunk_index": 0,
1025
- "content_hash": _hashlib.sha1(txt.encode("utf-8")).hexdigest()[:20],
1026
- "entities_json": "[]",
1027
- "indexed_at": now,
1028
- })
1029
- t1 = _time.time()
1030
- if rows:
1031
- milvus.insert(collection_name=COLLECTION_NAME, data=rows)
1032
- insert_ms = (_time.time() - t1) * 1000.0
1033
-
1034
- # Single FTS write — into the `chunks` content table; the
1035
- # AFTER INSERT trigger replicates rows into the chunks_fts
1036
- # virtual table so BM25 search (which JOINs chunks ON rowid)
1037
- # actually finds them. Earlier versions wrote directly to
1038
- # chunks_fts, leaving `chunks` empty — which made BM25 return
1039
- # zero hits AND broke the /stats fts_chunks counter.
1040
- try:
1041
- fts_conn = get_fts_db()
1042
- for r, txt in zip(records, texts):
1043
- rid = r.get("id") or _hashlib.sha1(txt.encode("utf-8")).hexdigest()[:32]
1044
- chunk_id = f"l6:{rid}:0"[:63]
1045
- fts_conn.execute(
1046
- "INSERT OR REPLACE INTO chunks "
1047
- "(id, text, source_file, arena, doc_type, heading, "
1048
- " chunk_index, content_hash, entities_json, indexed_at) "
1049
- "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
1050
- (
1051
- chunk_id,
1052
- txt,
1053
- (r.get("source_file") or f"{rid}.md"),
1054
- arena,
1055
- (r.get("doc_type") or "general"),
1056
- (r.get("heading") or ""),
1057
- 0,
1058
- _hashlib.sha1(txt.encode("utf-8")).hexdigest()[:20],
1059
- "[]",
1060
- now,
1061
- ),
1062
- )
1063
- fts_conn.commit()
1064
- fts_conn.close()
1065
- except Exception as exc:
1066
- log.warning("FTS write failed in /index-batch: %s", exc)
1067
-
1068
- return {
1069
- "status": "ok",
1070
- "inserted": len(rows),
1071
- "embed_ms": round(embed_ms, 1),
1072
- "insert_ms": round(insert_ms, 1),
1073
- }
1074
-
1075
- @api.delete("/purge")
1076
- def api_purge(source_file: str = Q(...)):
1077
- """Remove all chunks for a source file."""
1078
- milvus = get_milvus()
1079
- fts_conn = get_fts_db()
1080
- _purge_file(milvus, fts_conn, source_file)
1081
- fts_conn.commit()
1082
- fts_conn.close()
1083
- return {"status": "purged", "source_file": source_file}
1084
-
1085
- @api.post("/rebuild-index")
1086
- def api_rebuild():
1087
- """Force Milvus index rebuild."""
1088
- milvus = get_milvus()
1089
- milvus.release_collection(COLLECTION_NAME)
1090
- milvus.load_collection(COLLECTION_NAME)
1091
- return {"status": "rebuilt"}
1092
-
1093
- @api.post("/repair-fts")
1094
- def api_repair_fts():
1095
- """Backfill the SQLite `chunks` content table from Milvus.
1096
-
1097
- Pre-v0.7.6 the /index-batch path wrote straight to the FTS5
1098
- virtual table and never populated `chunks`. The result: BM25
1099
- search (which JOINs chunks ON rowid) returned zero hits even
1100
- though Milvus had the data, and /stats fts_chunks reported 0.
1101
-
1102
- v0.7.6 fixed new writes; this endpoint cleans up old rows by
1103
- walking Milvus and INSERT-OR-REPLACE'ing every row into chunks.
1104
- The trigger on chunks then mirrors them into chunks_fts. Idempotent.
1105
- Runs in-process so the L6 service's already-open Milvus handle
1106
- is reused — no file-lock conflict (Milvus Lite locks the .db).
1107
- """
1108
- milvus = get_milvus()
1109
- # Milvus Lite caps query() at 16384 per call; page through.
1110
- page_size = 16384
1111
- offset = 0
1112
- rows: list = []
1113
- while True:
1114
- page = milvus.query(
1115
- COLLECTION_NAME,
1116
- filter="id != ''",
1117
- output_fields=[
1118
- "id", "text", "source_file", "arena", "doc_type",
1119
- "heading", "chunk_index", "content_hash",
1120
- "entities_json", "indexed_at",
1121
- ],
1122
- limit=page_size,
1123
- offset=offset,
1124
- )
1125
- if not page:
1126
- break
1127
- rows.extend(page)
1128
- if len(page) < page_size:
1129
- break
1130
- offset += page_size
1131
- fts_conn = get_fts_db()
1132
- repaired = 0
1133
- for r in rows:
1134
- try:
1135
- fts_conn.execute(
1136
- "INSERT OR REPLACE INTO chunks "
1137
- "(id, text, source_file, arena, doc_type, heading, "
1138
- " chunk_index, content_hash, entities_json, indexed_at) "
1139
- "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
1140
- (
1141
- r.get("id"),
1142
- r.get("text", "") or "",
1143
- r.get("source_file", "") or "",
1144
- r.get("arena", "") or "general",
1145
- r.get("doc_type", "") or "general",
1146
- r.get("heading", "") or "",
1147
- int(r.get("chunk_index", 0) or 0),
1148
- r.get("content_hash", "") or "",
1149
- r.get("entities_json", "") or "[]",
1150
- r.get("indexed_at", "") or "",
1151
- ),
1152
- )
1153
- repaired += 1
1154
- except Exception as exc:
1155
- log.warning("repair-fts: skipping row %s: %s", r.get("id"), exc)
1156
- fts_conn.commit()
1157
- fts_conn.close()
1158
- return {
1159
- "status": "ok",
1160
- "milvus_rows": len(rows),
1161
- "repaired": repaired,
1162
- }
1163
-
1164
- log.info(f"L6 Document Store — http://127.0.0.1:{port}")
1165
- uvicorn.run(api, host=os.environ.get("HOST","127.0.0.1"), port=port, log_level="info")
1166
-
1167
- # ---------------------------------------------------------------------------
1168
- # CLI
1169
- # ---------------------------------------------------------------------------
1170
-
1171
- def main():
1172
- parser = argparse.ArgumentParser(description="L6 Document Store")
1173
- parser.add_argument("command", choices=["serve", "index", "search", "health", "stats"])
1174
- parser.add_argument("args", nargs="*")
1175
- parser.add_argument("--port", "-p", type=int, default=DEFAULT_PORT)
1176
- parser.add_argument("--arena", "-a", type=str, default=None)
1177
- parser.add_argument("--doc-type", "-t", type=str, default=None)
1178
- parser.add_argument("--method", "-m", type=str, default="hybrid")
1179
- parser.add_argument("--limit", "-l", type=int, default=10)
1180
- parser.add_argument("--no-entities", action="store_true")
1181
- parser.add_argument("--no-rerank", action="store_true")
1182
-
1183
- args = parser.parse_args()
1184
-
1185
- if args.command == "serve":
1186
- serve(port=args.port)
1187
-
1188
- elif args.command == "index":
1189
- paths = args.args
1190
- if not paths:
1191
- print("Usage: l6-document-store.py index <file1.md> [file2.md ...]")
1192
- print(" l6-document-store.py index ~/memory/research/*.md")
1193
- return
1194
- stats = index_documents(paths, arena=args.arena, doc_type=args.doc_type,
1195
- extract_entities_flag=not args.no_entities)
1196
- print(json.dumps(stats, indent=2))
1197
-
1198
- elif args.command == "search":
1199
- query = " ".join(args.args) if args.args else ""
1200
- if not query:
1201
- print("Usage: l6-document-store.py search 'your query'")
1202
- return
1203
- results = search(query, method=args.method, limit=args.limit,
1204
- arena=args.arena, enable_rerank=not args.no_rerank)
1205
- for i, r in enumerate(results, 1):
1206
- print(f"\n--- [{i}] {r.get('source_file','?')} (rrf={r.get('rrf_score',0):.4f}, engines={r.get('engines','?')}) ---")
1207
- if r.get("heading"):
1208
- print(f"Heading: {r['heading']}")
1209
- if r.get("entities"):
1210
- print(f"Entities: {', '.join(r['entities'][:10])}")
1211
- print(r["text"][:300])
1212
-
1213
- elif args.command == "health":
1214
- print(json.dumps(health(), indent=2))
1215
-
1216
- elif args.command == "stats":
1217
- print(json.dumps(get_stats(), indent=2))
1218
-
1219
-
1220
- if __name__ == "__main__":
1221
- main()