@remnic/plugin-openclaw 1.0.35 → 1.0.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/README.md +38 -4
  2. package/dist/{calibration-Z5WWNV7U.js → calibration-RKL2LRW4.js} +4 -4
  3. package/dist/{capsule-cli-GBM3WPAM.js → capsule-cli-EHZPMXBC.js} +2 -2
  4. package/dist/{capsule-crypto-K3IRTKRH.js → capsule-crypto-JS67OSWM.js} +3 -3
  5. package/dist/capsule-export-DX53CPIT.js +17 -0
  6. package/dist/capsule-import-4OXCPHOT.js +16 -0
  7. package/dist/{capsule-merge-IWOQ34KL.js → capsule-merge-25AUN33Q.js} +7 -7
  8. package/dist/{causal-chain-WYN5QOPS.js → causal-chain-BVTOWZKC.js} +4 -4
  9. package/dist/{causal-consolidation-C64NNE4T.js → causal-consolidation-DRPM2KOE.js} +13 -10
  10. package/dist/{causal-retrieval-NZHQOZOE.js → causal-retrieval-XAP6QKHZ.js} +4 -5
  11. package/dist/{causal-trajectory-graph-VBPE2WPM.js → causal-trajectory-graph-ZWQWZ7N5.js} +2 -2
  12. package/dist/{chunk-5LE4HTVL.js → chunk-25J4PXDH.js} +0 -18
  13. package/dist/{chunk-6UFI73TJ.js → chunk-3IKMUNW5.js} +53 -46
  14. package/dist/{chunk-EXDYWXMB.js → chunk-4XDQ3KEC.js} +1 -2
  15. package/dist/{chunk-JGIUTWZS.js → chunk-6O3H3DPL.js} +2 -2
  16. package/dist/{chunk-UTDLHBBV.js → chunk-BLC3RQNV.js} +5 -555
  17. package/dist/{chunk-4G2XCSD2.js → chunk-BZ4EYURA.js} +0 -5
  18. package/dist/{chunk-L6I4MQKO.js → chunk-CEL5ZLKP.js} +6 -6
  19. package/dist/{chunk-TDRJVMUP.js → chunk-EH4AXGRO.js} +0 -12
  20. package/dist/{chunk-EYCLXMIV.js → chunk-G3CZA4SD.js} +9 -427
  21. package/dist/chunk-I2KLQ2HA.js +22 -0
  22. package/dist/chunk-IO5WWY6A.js +156 -0
  23. package/dist/{contradiction-scan-A5NOTZPN.js → chunk-JC3FCKYL.js} +189 -86
  24. package/dist/{chunk-SVSQAG6M.js → chunk-KC7KSQR4.js} +47 -28
  25. package/dist/chunk-LZCGPRHS.js +228 -0
  26. package/dist/{chunk-CXM7EBAO.js → chunk-MXFJXUHC.js} +1 -1
  27. package/dist/{chunk-VRGUUHBV.js → chunk-NUWDSTP7.js} +1 -1
  28. package/dist/{chunk-4LYQ4ONL.js → chunk-QCCP4RU5.js} +8 -3
  29. package/dist/{chunk-6OJAU466.js → chunk-QMUQV5NP.js} +0 -1
  30. package/dist/{chunk-LLUROTZJ.js → chunk-QQXJODFL.js} +9 -9
  31. package/dist/{chunk-6F6EKSVP.js → chunk-QXXEF7VI.js} +1 -1
  32. package/dist/{chunk-CMKR6NDQ.js → chunk-SEGEX7W4.js} +73 -241
  33. package/dist/{chunk-VFULKFKI.js → chunk-SWOYEQN2.js} +42 -17
  34. package/dist/chunk-TH5FF5SC.js +16 -0
  35. package/dist/{chunk-FGTYFLL5.js → chunk-TXOEHSVP.js} +29 -32
  36. package/dist/chunk-UZJ7EERS.js +272 -0
  37. package/dist/chunk-YJYZMLD5.js +360 -0
  38. package/dist/{chunk-NKVIN6RD.js → chunk-YKV4EFUI.js} +84 -2
  39. package/dist/{chunk-SSFTU6LP.js → chunk-ZS6VABML.js} +4 -4
  40. package/dist/{cipher-VHAFCG7Z.js → cipher-E23BHBSO.js} +1 -1
  41. package/dist/{consolidation-undo-5ZSX4MWO.js → consolidation-undo-FKJZCJHS.js} +2 -2
  42. package/dist/contradiction-review-WJRWNQ5N.js +29 -0
  43. package/dist/contradiction-scan-5X423QGT.js +12 -0
  44. package/dist/{dreams-ledger-3I52ISYR.js → dreams-ledger-KDX44I7R.js} +1 -1
  45. package/dist/{engine-47AKKYJ4.js → engine-5P774HTZ.js} +6 -6
  46. package/dist/{extraction-judge-telemetry-GHOTVYMP.js → extraction-judge-telemetry-O4ZVGLTU.js} +1 -1
  47. package/dist/{fallback-llm-45A755XP.js → fallback-llm-43UMEXNJ.js} +3 -3
  48. package/dist/{first-start-migration-I24M2JEE.js → first-start-migration-H2SAXAGR.js} +4 -4
  49. package/dist/{forget-NI4RBDPB.js → forget-ZECIDNL5.js} +1 -1
  50. package/dist/{fs-utils-PZRI2HDZ.js → fs-utils-OYXSZSVV.js} +12 -2
  51. package/dist/{graph-edge-decay-5CVKWBYH.js → graph-edge-decay-24ZKD5QL.js} +5 -5
  52. package/dist/index.js +7098 -84293
  53. package/dist/{kdf-H5B23ZM2.js → kdf-RXKIWHRU.js} +1 -1
  54. package/dist/legacy-hook-compat-QHHKF4GK.js +2 -0
  55. package/dist/{logger-TNOKCH7X.js → logger-XG7JKLPS.js} +1 -1
  56. package/dist/{memory-governance-QS7Z425Y.js → memory-governance-6K4M4YXD.js} +5 -5
  57. package/dist/{metadata-JAGIWHEA.js → metadata-WK2TRPYZ.js} +1 -1
  58. package/dist/{migrate-from-identity-anchor-7MMSPEUM.js → migrate-from-identity-anchor-SNDNKHZD.js} +1 -1
  59. package/dist/path-ZKO74XXC.js +7 -0
  60. package/dist/{peers-KRFXWRQ6.js → peers-W53WSDXG.js} +1 -1
  61. package/dist/{purge-XN2VSPZ2.js → purge-IKJISXEQ.js} +1 -1
  62. package/dist/resolution-BN35OXDS.js +11 -0
  63. package/dist/{secure-store-A4NGCNXV.js → secure-store-F75I54O5.js} +3 -3
  64. package/dist/{state-PVISYXRH.js → state-4ITLYMAU.js} +1 -1
  65. package/dist/{state-store-N6TFBFSP.js → state-store-ET3ADVY5.js} +3 -3
  66. package/dist/{storage-DDYQGLXA.js → storage-5EY6T7ON.js} +3 -3
  67. package/dist/{tier-stats-IZNW66NC.js → tier-stats-ZRQBV6G2.js} +4 -4
  68. package/dist/{trace-NJESSGH7.js → trace-IL2Y34EH.js} +1 -1
  69. package/dist/{tui-MGK2LYJY.js → tui-7KRDCMYK.js} +1 -1
  70. package/dist/{types-R4DO7AKM.js → types-MBUINTB2.js} +3 -3
  71. package/openclaw.plugin.json +164 -8
  72. package/package.json +9 -6
  73. package/scripts/faiss_index.py +816 -0
  74. package/scripts/faiss_requirements.txt +3 -0
  75. package/dist/capsule-export-IXVERCQG.js +0 -17
  76. package/dist/capsule-import-IA6VIOPQ.js +0 -16
  77. package/dist/chunk-3GUF7RQI.js +0 -559
  78. package/dist/chunk-7OQEPGQF.js +0 -533
  79. package/dist/chunk-DIZW6H5J.js +0 -136
  80. package/dist/chunk-FQRSVYY4.js +0 -110
  81. package/dist/chunk-GUSMRW4H.js +0 -12
  82. package/dist/chunk-MLKGABMK.js +0 -9
  83. package/dist/chunk-WPINX4MF.js +0 -380
  84. package/dist/contradiction-review-SVGBS3V5.js +0 -21
  85. package/dist/legacy-hook-compat-XQ7FP6FV.js +0 -35
  86. package/dist/path-JIEGNWFL.js +0 -7
  87. package/dist/resolution-YITUVUTH.js +0 -100
@@ -0,0 +1,816 @@
1
+ #!/usr/bin/env python3
2
+ """FAISS conversation index sidecar.
3
+
4
+ JSON-in/JSON-out CLI used by src/conversation-index/faiss-adapter.ts.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import argparse
10
+ import hashlib
11
+ import json
12
+ import os
13
+ import subprocess
14
+ import sys
15
+ import time
16
+ import uuid
17
+ from pathlib import Path
18
+ from typing import Any
19
+
20
+ MODEL_CACHE: dict[str, Any] = {}
21
+ LOCK_OWNERS: dict[str, str] = {}
22
+ HASH_EMBED_DIM = 128
23
+ LOCK_TIMEOUT_SECONDS = 10.0
24
+ LOCK_STALE_SECONDS = 120.0
25
+ MANIFEST_VERSION = 1
26
+ MODEL_ID_ALIASES = {
27
+ "text-embedding-3-small": "sentence-transformers/all-MiniLM-L6-v2",
28
+ "text-embedding-3-large": "sentence-transformers/all-mpnet-base-v2",
29
+ "text-embedding-ada-002": "sentence-transformers/all-MiniLM-L6-v2",
30
+ }
31
+
32
+
33
+ class SidecarError(Exception):
34
+ pass
35
+
36
+
37
+ class DependencyError(SidecarError):
38
+ pass
39
+
40
+
41
+ def emit(payload: dict[str, Any]) -> None:
42
+ sys.stdout.write(json.dumps(payload, separators=(",", ":"), ensure_ascii=False))
43
+ sys.stdout.flush()
44
+
45
+
46
+ def read_payload() -> dict[str, Any]:
47
+ raw = sys.stdin.read()
48
+ if not raw.strip():
49
+ raise SidecarError("empty stdin payload")
50
+ try:
51
+ payload = json.loads(raw)
52
+ except json.JSONDecodeError as exc:
53
+ raise SidecarError(f"invalid JSON payload: {exc}") from exc
54
+ if not isinstance(payload, dict):
55
+ raise SidecarError("payload must be a JSON object")
56
+ return payload
57
+
58
+
59
+ def ensure_index_dir(index_path: str) -> Path:
60
+ if not isinstance(index_path, str) or not index_path.strip():
61
+ raise SidecarError("indexPath is required")
62
+ path = Path(index_path)
63
+ path.mkdir(parents=True, exist_ok=True)
64
+ return path
65
+
66
+
67
+ def metadata_file(index_dir: Path) -> Path:
68
+ return index_dir / "metadata.jsonl"
69
+
70
+
71
+ def index_file(index_dir: Path) -> Path:
72
+ return index_dir / "index.faiss"
73
+
74
+
75
+ def manifest_file(index_dir: Path) -> Path:
76
+ return index_dir / "manifest.json"
77
+
78
+
79
+ def read_metadata(path: Path) -> list[dict[str, Any]]:
80
+ if not path.exists():
81
+ return []
82
+
83
+ rows: list[dict[str, Any]] = []
84
+ for line in path.read_text(encoding="utf-8").splitlines():
85
+ line = line.strip()
86
+ if not line:
87
+ continue
88
+ try:
89
+ row = json.loads(line)
90
+ except json.JSONDecodeError:
91
+ continue
92
+ if not isinstance(row, dict):
93
+ continue
94
+ row_id = row.get("id")
95
+ text = row.get("text")
96
+ if not isinstance(row_id, str) or not row_id:
97
+ continue
98
+ if not isinstance(text, str):
99
+ continue
100
+ rows.append(
101
+ {
102
+ "id": row_id,
103
+ "sessionKey": row.get("sessionKey") if isinstance(row.get("sessionKey"), str) else "",
104
+ "text": text,
105
+ "startTs": row.get("startTs") if isinstance(row.get("startTs"), str) else "",
106
+ "endTs": row.get("endTs") if isinstance(row.get("endTs"), str) else "",
107
+ }
108
+ )
109
+ return rows
110
+
111
+
112
+ def write_metadata(path: Path, rows: list[dict[str, Any]]) -> None:
113
+ tmp = path.with_suffix(".jsonl.tmp")
114
+ with tmp.open("w", encoding="utf-8") as handle:
115
+ for row in rows:
116
+ handle.write(json.dumps(row, separators=(",", ":"), ensure_ascii=False))
117
+ handle.write("\n")
118
+ os.replace(tmp, path)
119
+
120
+
121
+ def read_manifest(path: Path) -> dict[str, Any] | None:
122
+ if not path.exists():
123
+ return None
124
+ try:
125
+ raw = json.loads(path.read_text(encoding="utf-8"))
126
+ except Exception:
127
+ return None
128
+ return raw if isinstance(raw, dict) else None
129
+
130
+
131
+ def write_manifest(path: Path, manifest: dict[str, Any]) -> None:
132
+ tmp = path.with_suffix(".json.tmp")
133
+ tmp.write_text(
134
+ json.dumps(manifest, separators=(",", ":"), ensure_ascii=False),
135
+ encoding="utf-8",
136
+ )
137
+ os.replace(tmp, path)
138
+
139
+
140
+ def load_vector_dependencies() -> tuple[Any, Any]:
141
+ try:
142
+ import numpy as np # type: ignore
143
+ import faiss # type: ignore
144
+ except Exception as exc:
145
+ raise DependencyError(f"missing faiss dependencies: {exc}") from exc
146
+ return np, faiss
147
+
148
+
149
+ def sentence_transformers_enabled() -> bool:
150
+ if "REMNIC_FAISS_ENABLE_ST" in os.environ:
151
+ value = os.environ["REMNIC_FAISS_ENABLE_ST"]
152
+ else:
153
+ value = os.environ.get("ENGRAM_FAISS_ENABLE_ST", "")
154
+ value = value.strip().lower()
155
+ return value in ("1", "true", "yes", "on")
156
+
157
+
158
+ def normalize_model_id(model_id: str) -> str:
159
+ cleaned = (model_id or "").strip()
160
+ if not cleaned:
161
+ cleaned = "sentence-transformers/all-MiniLM-L6-v2"
162
+ resolved = MODEL_ID_ALIASES.get(cleaned, cleaned)
163
+ if resolved in ("__hash__", "hash"):
164
+ return "__hash__"
165
+ if not sentence_transformers_enabled():
166
+ return "__hash__"
167
+ return resolved
168
+
169
+
170
+ def get_embedder(model_id: str) -> Any:
171
+ resolved_model_id = normalize_model_id(model_id)
172
+ if resolved_model_id == "__hash__":
173
+ return None
174
+ if resolved_model_id in MODEL_CACHE:
175
+ return MODEL_CACHE[resolved_model_id]
176
+ try:
177
+ from sentence_transformers import SentenceTransformer # type: ignore
178
+ except Exception as exc:
179
+ raise DependencyError(f"missing sentence-transformers dependency: {exc}") from exc
180
+ MODEL_CACHE[resolved_model_id] = SentenceTransformer(resolved_model_id)
181
+ return MODEL_CACHE[resolved_model_id]
182
+
183
+
184
+ def embed_with_hash(texts: list[str], np: Any) -> Any:
185
+ vectors = np.zeros((len(texts), HASH_EMBED_DIM), dtype="float32")
186
+ for row_index, text in enumerate(texts):
187
+ digest = hashlib.sha256(text.encode("utf-8")).digest()
188
+ for byte_index in range(HASH_EMBED_DIM):
189
+ vectors[row_index, byte_index] = (digest[byte_index % len(digest)] / 255.0) - 0.5
190
+ return vectors
191
+
192
+
193
+ def embed_texts(texts: list[str], model_id: str) -> tuple[Any, Any, Any]:
194
+ np, faiss = load_vector_dependencies()
195
+ embedder = get_embedder(model_id)
196
+ if embedder is None:
197
+ arr = embed_with_hash(texts, np)
198
+ else:
199
+ vectors = embedder.encode(
200
+ texts,
201
+ normalize_embeddings=False,
202
+ convert_to_numpy=True,
203
+ show_progress_bar=False,
204
+ )
205
+ arr = np.asarray(vectors, dtype="float32")
206
+ if arr.ndim == 1:
207
+ arr = arr.reshape(1, -1)
208
+ if arr.shape[0] > 0:
209
+ faiss.normalize_L2(arr)
210
+ return arr, np, faiss
211
+
212
+
213
+ def write_index(path: Path, vectors: Any, faiss: Any) -> None:
214
+ dim = int(vectors.shape[1])
215
+ index = faiss.IndexFlatIP(dim)
216
+ if int(vectors.shape[0]) > 0:
217
+ index.add(vectors)
218
+ tmp = path.with_suffix(".faiss.tmp")
219
+ faiss.write_index(index, str(tmp))
220
+ os.replace(tmp, path)
221
+
222
+
223
+ def resolve_vector_dimension(model_id: str) -> int:
224
+ probe, _np, _faiss = embed_texts([""], model_id)
225
+ return int(probe.shape[1])
226
+
227
+
228
+ def build_empty_vectors(model_id: str) -> tuple[Any, Any]:
229
+ np, faiss = load_vector_dependencies()
230
+ dim = resolve_vector_dimension(model_id)
231
+ vectors = np.zeros((0, dim), dtype="float32")
232
+ return vectors, faiss
233
+
234
+
235
+ def build_manifest(
236
+ model_id: str,
237
+ vector_dim: int,
238
+ chunk_count: int,
239
+ *,
240
+ generated_at: str | None = None,
241
+ last_successful_rebuild_at: str | None = None,
242
+ ) -> dict[str, Any]:
243
+ normalized_model_id = normalize_model_id(model_id)
244
+ now_iso = generated_at or time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
245
+ return {
246
+ "version": MANIFEST_VERSION,
247
+ "modelId": model_id,
248
+ "normalizedModelId": normalized_model_id,
249
+ "dimension": int(vector_dim),
250
+ "chunkCount": int(chunk_count),
251
+ "updatedAt": now_iso,
252
+ "lastSuccessfulRebuildAt": last_successful_rebuild_at or now_iso,
253
+ }
254
+
255
+
256
+ def validate_index_manifest(
257
+ manifest: dict[str, Any] | None,
258
+ *,
259
+ requested_model_id: str,
260
+ actual_dimension: int | None = None,
261
+ expected_dimension: int | None = None,
262
+ ) -> dict[str, Any]:
263
+ if manifest is None:
264
+ raise SidecarError("missing index manifest; rebuild the FAISS conversation index")
265
+
266
+ version = manifest.get("version")
267
+ if not isinstance(version, int) or isinstance(version, bool) or version != MANIFEST_VERSION:
268
+ raise SidecarError("unsupported index manifest version; rebuild the FAISS conversation index")
269
+
270
+ normalized_manifest_model_id = manifest.get("normalizedModelId")
271
+ if not isinstance(normalized_manifest_model_id, str) or not normalized_manifest_model_id:
272
+ raise SidecarError("index manifest missing normalized model id; rebuild the FAISS conversation index")
273
+
274
+ requested_normalized_model_id = normalize_model_id(requested_model_id)
275
+ if normalized_manifest_model_id != requested_normalized_model_id:
276
+ raise SidecarError(
277
+ "index model mismatch "
278
+ f"(index={normalized_manifest_model_id}, query={requested_normalized_model_id}); "
279
+ "rebuild the FAISS conversation index"
280
+ )
281
+
282
+ manifest_dimension = manifest.get("dimension")
283
+ if not isinstance(manifest_dimension, int) or isinstance(manifest_dimension, bool) or manifest_dimension <= 0:
284
+ raise SidecarError("index manifest missing vector dimension; rebuild the FAISS conversation index")
285
+
286
+ chunk_count = manifest.get("chunkCount")
287
+ if not isinstance(chunk_count, int) or isinstance(chunk_count, bool) or chunk_count < 0:
288
+ raise SidecarError("index manifest missing chunk count; rebuild the FAISS conversation index")
289
+
290
+ if actual_dimension is not None and manifest_dimension != int(actual_dimension):
291
+ raise SidecarError(
292
+ f"index dimension mismatch (manifest={manifest_dimension}, index={int(actual_dimension)}); "
293
+ "rebuild the FAISS conversation index"
294
+ )
295
+
296
+ if expected_dimension is not None and manifest_dimension != int(expected_dimension):
297
+ raise SidecarError(
298
+ f"index dimension mismatch (manifest={manifest_dimension}, query={int(expected_dimension)}); "
299
+ "rebuild the FAISS conversation index"
300
+ )
301
+
302
+ return {
303
+ "version": version,
304
+ "modelId": manifest.get("modelId") if isinstance(manifest.get("modelId"), str) else "",
305
+ "normalizedModelId": normalized_manifest_model_id,
306
+ "dimension": manifest_dimension,
307
+ "chunkCount": chunk_count,
308
+ "updatedAt": manifest.get("updatedAt") if isinstance(manifest.get("updatedAt"), str) else "",
309
+ "lastSuccessfulRebuildAt": (
310
+ manifest.get("lastSuccessfulRebuildAt")
311
+ if isinstance(manifest.get("lastSuccessfulRebuildAt"), str)
312
+ else ""
313
+ ),
314
+ }
315
+
316
+
317
+ def validate_artifact_counts(index: Any, rows: list[dict[str, Any]], manifest: dict[str, Any]) -> None:
318
+ index_count = int(getattr(index, "ntotal", -1))
319
+ metadata_count = len(rows)
320
+ manifest_count = manifest["chunkCount"]
321
+
322
+ if index_count != metadata_count or index_count != int(manifest_count):
323
+ raise SidecarError(
324
+ "index artifact count mismatch "
325
+ f"(index={index_count}, metadata={metadata_count}, manifest={int(manifest_count)}); "
326
+ "rebuild the FAISS conversation index"
327
+ )
328
+
329
+
330
+ def parse_chunks(payload: dict[str, Any]) -> list[dict[str, Any]]:
331
+ raw_chunks = payload.get("chunks")
332
+ if not isinstance(raw_chunks, list):
333
+ raise SidecarError("chunks must be an array")
334
+ chunks: list[dict[str, Any]] = []
335
+ for item in raw_chunks:
336
+ if not isinstance(item, dict):
337
+ continue
338
+ chunk_id = item.get("id")
339
+ text = item.get("text")
340
+ if not isinstance(chunk_id, str) or not chunk_id:
341
+ continue
342
+ if not isinstance(text, str):
343
+ continue
344
+ chunks.append(
345
+ {
346
+ "id": chunk_id,
347
+ "sessionKey": item.get("sessionKey") if isinstance(item.get("sessionKey"), str) else "",
348
+ "text": text,
349
+ "startTs": item.get("startTs") if isinstance(item.get("startTs"), str) else "",
350
+ "endTs": item.get("endTs") if isinstance(item.get("endTs"), str) else "",
351
+ }
352
+ )
353
+ return chunks
354
+
355
+
356
+ def metadata_row_key(row: dict[str, Any]) -> tuple[str, str]:
357
+ session_key = (
358
+ row.get("sessionKey") if isinstance(row.get("sessionKey"), str) else ""
359
+ )
360
+ row_id = row.get("id") if isinstance(row.get("id"), str) else ""
361
+ return (session_key, row_id)
362
+
363
+
364
+ def merge_rows(existing: list[dict[str, Any]], updates: list[dict[str, Any]]) -> list[dict[str, Any]]:
365
+ by_key: dict[tuple[str, str], dict[str, Any]] = {
366
+ metadata_row_key(row): row for row in existing
367
+ }
368
+ order = [metadata_row_key(row) for row in existing]
369
+ for update in updates:
370
+ update_key = metadata_row_key(update)
371
+ if update_key not in by_key:
372
+ order.append(update_key)
373
+ by_key[update_key] = update
374
+ return [by_key[row_key] for row_key in order]
375
+
376
+
377
+ def lock_owner_key(lock_path: Path) -> str:
378
+ return str(lock_path.resolve())
379
+
380
+
381
+ def make_lock_owner_token() -> str:
382
+ return f"{os.getpid()}:{uuid.uuid4().hex}"
383
+
384
+
385
+ def read_lock_contents(lock_path: Path) -> str | None:
386
+ try:
387
+ raw = lock_path.read_text(encoding="utf-8").strip()
388
+ except Exception:
389
+ return None
390
+ if not raw:
391
+ return None
392
+ return raw
393
+
394
+
395
+ def read_lock_owner_pid(lock_path: Path) -> int | None:
396
+ raw = read_lock_contents(lock_path)
397
+ if raw is None:
398
+ return None
399
+ pid_raw = raw.split(":", 1)[0].strip()
400
+ try:
401
+ pid = int(pid_raw)
402
+ except ValueError:
403
+ return None
404
+ return pid if pid > 0 else None
405
+
406
+
407
+ def lock_stat_matches(current_stat: os.stat_result, observed_stat: os.stat_result) -> bool:
408
+ if current_stat.st_size != observed_stat.st_size:
409
+ return False
410
+ if current_stat.st_mtime_ns != observed_stat.st_mtime_ns:
411
+ return False
412
+ if getattr(current_stat, "st_ino", 0) and getattr(observed_stat, "st_ino", 0):
413
+ return current_stat.st_ino == observed_stat.st_ino
414
+ return True
415
+
416
+
417
+ def unlink_lock_if_unchanged(
418
+ lock_path: Path,
419
+ observed_owner_token: str | None,
420
+ observed_stat: os.stat_result,
421
+ ) -> bool:
422
+ try:
423
+ current_stat = lock_path.stat()
424
+ except FileNotFoundError:
425
+ return False
426
+ current_owner_token = read_lock_contents(lock_path)
427
+ if current_owner_token != observed_owner_token:
428
+ return False
429
+ if not lock_stat_matches(current_stat, observed_stat):
430
+ return False
431
+ try:
432
+ lock_path.unlink()
433
+ except FileNotFoundError:
434
+ return False
435
+ return True
436
+
437
+
438
+ def is_process_alive(pid: int) -> bool:
439
+ if pid <= 0:
440
+ return False
441
+
442
+ if os.name == "nt":
443
+ try:
444
+ probe = subprocess.run(
445
+ ["tasklist", "/FI", f"PID eq {pid}", "/FO", "CSV", "/NH"],
446
+ capture_output=True,
447
+ text=True,
448
+ timeout=2,
449
+ )
450
+ except Exception:
451
+ return False
452
+ output = probe.stdout.strip()
453
+ if not output:
454
+ return False
455
+ if output.startswith("INFO:"):
456
+ return False
457
+ return f'"{pid}"' in output
458
+
459
+ try:
460
+ os.kill(pid, 0)
461
+ return True
462
+ except ProcessLookupError:
463
+ return False
464
+ except PermissionError:
465
+ return True
466
+ except OSError:
467
+ return False
468
+
469
+
470
+ def acquire_lock(index_dir: Path, lock_name: str) -> Path:
471
+ lock_path = index_dir / lock_name
472
+ lock_owner_token = make_lock_owner_token()
473
+ lock_label = lock_name.strip(".")
474
+ if lock_label.endswith(".lock"):
475
+ lock_label = lock_label[: -len(".lock")]
476
+ lock_label = lock_label.replace(".", " ")
477
+ deadline = time.monotonic() + LOCK_TIMEOUT_SECONDS
478
+
479
+ while True:
480
+ try:
481
+ fd = os.open(str(lock_path), os.O_CREAT | os.O_EXCL | os.O_WRONLY)
482
+ with os.fdopen(fd, "w", encoding="utf-8") as handle:
483
+ handle.write(lock_owner_token)
484
+ LOCK_OWNERS[lock_owner_key(lock_path)] = lock_owner_token
485
+ return lock_path
486
+ except FileExistsError:
487
+ try:
488
+ observed_stat = lock_path.stat()
489
+ except FileNotFoundError:
490
+ continue
491
+
492
+ age = time.time() - observed_stat.st_mtime
493
+ observed_owner_token = read_lock_contents(lock_path)
494
+ owner_pid = read_lock_owner_pid(lock_path)
495
+ owner_alive = is_process_alive(owner_pid) if owner_pid is not None else False
496
+
497
+ if age > LOCK_STALE_SECONDS and not owner_alive:
498
+ unlink_lock_if_unchanged(lock_path, observed_owner_token, observed_stat)
499
+ continue
500
+
501
+ if time.monotonic() >= deadline:
502
+ raise SidecarError(f"timed out waiting for FAISS {lock_label} lock")
503
+ time.sleep(0.05)
504
+
505
+
506
+ def acquire_index_lock(index_dir: Path) -> Path:
507
+ return acquire_lock(index_dir, ".index.lock")
508
+
509
+
510
+ def acquire_writer_lock(index_dir: Path) -> Path:
511
+ return acquire_lock(index_dir, ".writer.lock")
512
+
513
+
514
+ def release_lock(lock_path: Path) -> None:
515
+ lock_owner_token = LOCK_OWNERS.pop(lock_owner_key(lock_path), None)
516
+ if lock_owner_token is None:
517
+ return
518
+ try:
519
+ observed_stat = lock_path.stat()
520
+ except FileNotFoundError:
521
+ return
522
+ unlink_lock_if_unchanged(lock_path, lock_owner_token, observed_stat)
523
+
524
+
525
+ def release_index_lock(lock_path: Path) -> None:
526
+ release_lock(lock_path)
527
+
528
+
529
+ def run_upsert(payload: dict[str, Any]) -> dict[str, Any]:
530
+ model_id = payload.get("modelId")
531
+ if not isinstance(model_id, str) or not model_id:
532
+ raise SidecarError("modelId is required")
533
+
534
+ index_dir = ensure_index_dir(str(payload.get("indexPath", "")))
535
+ chunks = parse_chunks(payload)
536
+
537
+ if not chunks:
538
+ return {"ok": True, "upserted": 0}
539
+
540
+ meta_path = metadata_file(index_dir)
541
+ idx_path = index_file(index_dir)
542
+ manifest_path = manifest_file(index_dir)
543
+
544
+ writer_lock_path = acquire_writer_lock(index_dir)
545
+ try:
546
+ existing = read_metadata(meta_path)
547
+ existing_manifest = read_manifest(manifest_path)
548
+ merged = merge_rows(existing, chunks)
549
+
550
+ texts = [row["text"] for row in merged]
551
+ vectors, _np, faiss = embed_texts(texts, model_id)
552
+
553
+ lock_path = acquire_index_lock(index_dir)
554
+ try:
555
+ # Commit FAISS index first; metadata follows so we never point at missing vectors.
556
+ write_index(idx_path, vectors, faiss)
557
+ write_metadata(meta_path, merged)
558
+ preserved_rebuild_at = (
559
+ existing_manifest.get("lastSuccessfulRebuildAt")
560
+ if isinstance(existing_manifest, dict)
561
+ and isinstance(existing_manifest.get("lastSuccessfulRebuildAt"), str)
562
+ and existing_manifest.get("lastSuccessfulRebuildAt")
563
+ else None
564
+ )
565
+ write_manifest(
566
+ manifest_path,
567
+ build_manifest(
568
+ model_id,
569
+ int(vectors.shape[1]),
570
+ len(merged),
571
+ last_successful_rebuild_at=preserved_rebuild_at,
572
+ ),
573
+ )
574
+ finally:
575
+ release_index_lock(lock_path)
576
+ finally:
577
+ release_lock(writer_lock_path)
578
+
579
+ return {"ok": True, "upserted": len(chunks)}
580
+
581
+
582
+ def run_rebuild(payload: dict[str, Any]) -> dict[str, Any]:
583
+ model_id = payload.get("modelId")
584
+ if not isinstance(model_id, str) or not model_id:
585
+ raise SidecarError("modelId is required")
586
+
587
+ index_dir = ensure_index_dir(str(payload.get("indexPath", "")))
588
+ chunks = parse_chunks(payload)
589
+
590
+ writer_lock_path = acquire_writer_lock(index_dir)
591
+ try:
592
+ if chunks:
593
+ texts = [row["text"] for row in chunks]
594
+ vectors, _np, faiss = embed_texts(texts, model_id)
595
+ chunk_count = len(chunks)
596
+ else:
597
+ vectors, faiss = build_empty_vectors(model_id)
598
+ chunk_count = 0
599
+
600
+ lock_path = acquire_index_lock(index_dir)
601
+ try:
602
+ meta_path = metadata_file(index_dir)
603
+ idx_path = index_file(index_dir)
604
+ manifest_path = manifest_file(index_dir)
605
+
606
+ write_index(idx_path, vectors, faiss)
607
+ write_metadata(meta_path, chunks)
608
+ write_manifest(manifest_path, build_manifest(model_id, int(vectors.shape[1]), chunk_count))
609
+ finally:
610
+ release_index_lock(lock_path)
611
+ finally:
612
+ release_lock(writer_lock_path)
613
+
614
+ return {"ok": True, "rebuilt": len(chunks)}
615
+
616
+
617
+ def run_search(payload: dict[str, Any]) -> dict[str, Any]:
618
+ model_id = payload.get("modelId")
619
+ query = payload.get("query")
620
+ top_k = payload.get("topK")
621
+ if not isinstance(model_id, str) or not model_id:
622
+ raise SidecarError("modelId is required")
623
+ if not isinstance(query, str) or not query.strip():
624
+ raise SidecarError("query is required")
625
+ if not isinstance(top_k, int) or isinstance(top_k, bool) or top_k <= 0:
626
+ raise SidecarError("topK must be a positive integer")
627
+
628
+ index_dir = ensure_index_dir(str(payload.get("indexPath", "")))
629
+ meta_path = metadata_file(index_dir)
630
+ idx_path = index_file(index_dir)
631
+ manifest_path = manifest_file(index_dir)
632
+
633
+ lock_path = acquire_index_lock(index_dir)
634
+ try:
635
+ has_index = idx_path.exists()
636
+ has_metadata = meta_path.exists()
637
+ has_manifest = manifest_path.exists()
638
+ if not has_index and not has_metadata and not has_manifest:
639
+ return {"ok": True, "results": []}
640
+ if not has_index or not has_metadata or not has_manifest:
641
+ raise SidecarError("conversation index artifacts incomplete; rebuild the FAISS conversation index")
642
+
643
+ rows = read_metadata(meta_path)
644
+ _np, faiss = load_vector_dependencies()
645
+ index = faiss.read_index(str(idx_path))
646
+ manifest = validate_index_manifest(
647
+ read_manifest(manifest_path),
648
+ requested_model_id=model_id,
649
+ actual_dimension=int(index.d),
650
+ )
651
+ validate_artifact_counts(index, rows, manifest)
652
+ finally:
653
+ release_index_lock(lock_path)
654
+
655
+ if not rows:
656
+ return {"ok": True, "results": []}
657
+
658
+ query_vector, _np2, _faiss2 = embed_texts([query], model_id)
659
+ query_dimension = int(query_vector.shape[1])
660
+ if int(manifest["dimension"]) != query_dimension:
661
+ raise SidecarError(
662
+ f"index dimension mismatch (manifest={int(manifest['dimension'])}, query={query_dimension}); "
663
+ "rebuild the FAISS conversation index"
664
+ )
665
+
666
+ distances, indices = index.search(query_vector, top_k)
667
+ results: list[dict[str, Any]] = []
668
+ for score, idx in zip(distances[0], indices[0]):
669
+ idx_i = int(idx)
670
+ if idx_i < 0 or idx_i >= len(rows):
671
+ continue
672
+ row = rows[idx_i]
673
+ results.append(
674
+ {
675
+ "path": row["id"],
676
+ "snippet": row["text"][:280],
677
+ "score": float(score),
678
+ }
679
+ )
680
+
681
+ return {"ok": True, "results": results}
682
+
683
+
684
+ def build_health_response(payload: dict[str, Any], *, include_metadata: bool = False) -> dict[str, Any]:
685
+ index_dir = ensure_index_dir(str(payload.get("indexPath", "")))
686
+ meta_path = metadata_file(index_dir)
687
+ idx_path = index_file(index_dir)
688
+ manifest_path = manifest_file(index_dir)
689
+
690
+ status = "ok"
691
+ error = ""
692
+ model_id = normalize_model_id(str(payload.get("modelId", "")))
693
+ manifest_details: dict[str, Any] | None = None
694
+ metadata_details: dict[str, Any] | None = None
695
+
696
+ try:
697
+ load_vector_dependencies()
698
+ if model_id != "__hash__":
699
+ try:
700
+ import sentence_transformers # type: ignore # noqa: F401
701
+ except Exception as exc:
702
+ raise DependencyError(f"missing sentence-transformers dependency: {exc}") from exc
703
+ except Exception as exc:
704
+ status = "degraded"
705
+ error = str(exc)
706
+
707
+ lock_path: Path | None = None
708
+ try:
709
+ lock_path = acquire_index_lock(index_dir)
710
+ has_index = idx_path.exists()
711
+ has_metadata = meta_path.exists()
712
+ has_manifest = manifest_path.exists()
713
+ rows: list[dict[str, Any]] | None = None
714
+
715
+ if has_index or has_metadata or has_manifest:
716
+ if not has_index or not has_metadata or not has_manifest:
717
+ if status == "ok":
718
+ status = "degraded"
719
+ if not error:
720
+ error = "conversation index artifacts incomplete; rebuild the FAISS conversation index"
721
+ else:
722
+ try:
723
+ rows = read_metadata(meta_path)
724
+ _np, faiss = load_vector_dependencies()
725
+ index = faiss.read_index(str(idx_path))
726
+ manifest_details = validate_index_manifest(
727
+ read_manifest(manifest_path),
728
+ requested_model_id=model_id,
729
+ actual_dimension=int(index.d),
730
+ )
731
+ validate_artifact_counts(index, rows, manifest_details)
732
+ except Exception as exc:
733
+ if status == "ok":
734
+ status = "degraded"
735
+ if not error:
736
+ error = str(exc)
737
+ elif status == "ok":
738
+ status = "degraded"
739
+ error = "conversation index artifacts missing; build the FAISS conversation index"
740
+
741
+ if include_metadata:
742
+ if rows is None:
743
+ rows = read_metadata(meta_path)
744
+ metadata_details = {
745
+ "chunkCount": len(rows),
746
+ "hasIndex": has_index,
747
+ "hasMetadata": has_metadata,
748
+ "hasManifest": has_manifest,
749
+ }
750
+ except Exception as exc:
751
+ if status == "ok":
752
+ status = "degraded"
753
+ if not error:
754
+ error = str(exc)
755
+ if include_metadata:
756
+ try:
757
+ chunk_count = len(read_metadata(meta_path))
758
+ except Exception:
759
+ chunk_count = 0
760
+ metadata_details = {
761
+ "chunkCount": chunk_count,
762
+ "hasIndex": idx_path.exists(),
763
+ "hasMetadata": meta_path.exists(),
764
+ "hasManifest": manifest_path.exists(),
765
+ }
766
+ finally:
767
+ if lock_path is not None:
768
+ release_index_lock(lock_path)
769
+
770
+ response: dict[str, Any] = {"ok": True, "status": status}
771
+ if error:
772
+ response["error"] = error
773
+ if manifest_details is not None:
774
+ response["manifest"] = manifest_details
775
+ if metadata_details is not None:
776
+ response["metadata"] = metadata_details
777
+ return response
778
+
779
+
780
+ def run_health(payload: dict[str, Any]) -> dict[str, Any]:
781
+ return build_health_response(payload)
782
+
783
+
784
+ def run_inspect(payload: dict[str, Any]) -> dict[str, Any]:
785
+ return build_health_response(payload, include_metadata=True)
786
+
787
+
788
+ def main() -> int:
789
+ parser = argparse.ArgumentParser()
790
+ parser.add_argument("command", choices=["upsert", "rebuild", "search", "health", "inspect"])
791
+ args = parser.parse_args()
792
+
793
+ try:
794
+ payload = read_payload()
795
+ if args.command == "upsert":
796
+ emit(run_upsert(payload))
797
+ elif args.command == "rebuild":
798
+ emit(run_rebuild(payload))
799
+ elif args.command == "search":
800
+ emit(run_search(payload))
801
+ elif args.command == "inspect":
802
+ emit(run_inspect(payload))
803
+ else:
804
+ emit(run_health(payload))
805
+ return 0
806
+ except (SidecarError, DependencyError) as exc:
807
+ emit({"ok": False, "error": str(exc)})
808
+ return 0
809
+ except Exception as exc:
810
+ print(f"faiss sidecar internal error: {exc}", file=sys.stderr)
811
+ emit({"ok": False, "error": "internal sidecar error"})
812
+ return 1
813
+
814
+
815
+ if __name__ == "__main__":
816
+ raise SystemExit(main())