@remnic/plugin-openclaw 1.0.34 → 1.0.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/README.md +59 -10
  2. package/dist/{calibration-JD4AU7FB.js → calibration-RKL2LRW4.js} +4 -4
  3. package/dist/{capsule-cli-GBM3WPAM.js → capsule-cli-EHZPMXBC.js} +2 -2
  4. package/dist/{capsule-crypto-K3IRTKRH.js → capsule-crypto-JS67OSWM.js} +3 -3
  5. package/dist/capsule-export-YPDWRB3C.js +17 -0
  6. package/dist/capsule-import-SWPOFG6F.js +16 -0
  7. package/dist/{capsule-merge-IWOQ34KL.js → capsule-merge-YXAF7ZJW.js} +7 -7
  8. package/dist/{causal-chain-WYN5QOPS.js → causal-chain-BVTOWZKC.js} +4 -4
  9. package/dist/{causal-consolidation-DSLFN64P.js → causal-consolidation-DRPM2KOE.js} +13 -10
  10. package/dist/{causal-retrieval-NZHQOZOE.js → causal-retrieval-XAP6QKHZ.js} +4 -5
  11. package/dist/{causal-trajectory-graph-VBPE2WPM.js → causal-trajectory-graph-ZWQWZ7N5.js} +2 -2
  12. package/dist/{chunk-5LE4HTVL.js → chunk-25J4PXDH.js} +0 -18
  13. package/dist/{chunk-FGTYFLL5.js → chunk-3IHGISUN.js} +29 -32
  14. package/dist/{chunk-6UFI73TJ.js → chunk-3IKMUNW5.js} +53 -46
  15. package/dist/{chunk-EXDYWXMB.js → chunk-4XDQ3KEC.js} +1 -2
  16. package/dist/{chunk-4UA6KMRO.js → chunk-6O3H3DPL.js} +2 -2
  17. package/dist/{chunk-7NMHI4IC.js → chunk-BLC3RQNV.js} +5 -555
  18. package/dist/{chunk-4G2XCSD2.js → chunk-BZ4EYURA.js} +0 -5
  19. package/dist/{chunk-4LYQ4ONL.js → chunk-E4RM7637.js} +1 -1
  20. package/dist/{chunk-TDRJVMUP.js → chunk-EH4AXGRO.js} +0 -12
  21. package/dist/{chunk-ZXLYEVOP.js → chunk-G3CZA4SD.js} +60 -362
  22. package/dist/chunk-I2KLQ2HA.js +22 -0
  23. package/dist/chunk-IO5WWY6A.js +156 -0
  24. package/dist/{contradiction-scan-U3QKHWQN.js → chunk-JC3FCKYL.js} +191 -87
  25. package/dist/{chunk-SVSQAG6M.js → chunk-KC7KSQR4.js} +47 -28
  26. package/dist/chunk-LZCGPRHS.js +228 -0
  27. package/dist/{chunk-CXM7EBAO.js → chunk-MXFJXUHC.js} +1 -1
  28. package/dist/{chunk-L6I4MQKO.js → chunk-NNAN63QK.js} +6 -6
  29. package/dist/{chunk-VRGUUHBV.js → chunk-NUWDSTP7.js} +1 -1
  30. package/dist/{chunk-6OJAU466.js → chunk-QMUQV5NP.js} +0 -1
  31. package/dist/{chunk-LLUROTZJ.js → chunk-QQXJODFL.js} +9 -9
  32. package/dist/{chunk-6F6EKSVP.js → chunk-QXXEF7VI.js} +1 -1
  33. package/dist/{chunk-NDZNURDM.js → chunk-SEGEX7W4.js} +73 -241
  34. package/dist/{chunk-7NUFIRM3.js → chunk-SWOYEQN2.js} +97 -21
  35. package/dist/chunk-TH5FF5SC.js +16 -0
  36. package/dist/chunk-UZJ7EERS.js +272 -0
  37. package/dist/chunk-YJYZMLD5.js +360 -0
  38. package/dist/{chunk-NKVIN6RD.js → chunk-YKV4EFUI.js} +84 -2
  39. package/dist/{chunk-SSFTU6LP.js → chunk-ZS6VABML.js} +4 -4
  40. package/dist/{cipher-VHAFCG7Z.js → cipher-E23BHBSO.js} +1 -1
  41. package/dist/{consolidation-undo-5ZSX4MWO.js → consolidation-undo-FKJZCJHS.js} +2 -2
  42. package/dist/contradiction-review-WJRWNQ5N.js +29 -0
  43. package/dist/contradiction-scan-5X423QGT.js +12 -0
  44. package/dist/{dreams-ledger-3I52ISYR.js → dreams-ledger-KDX44I7R.js} +1 -1
  45. package/dist/{engine-57HLTQBN.js → engine-5P774HTZ.js} +6 -6
  46. package/dist/{extraction-judge-telemetry-GHOTVYMP.js → extraction-judge-telemetry-O4ZVGLTU.js} +1 -1
  47. package/dist/{fallback-llm-33SPYXQY.js → fallback-llm-43UMEXNJ.js} +3 -3
  48. package/dist/{first-start-migration-I24M2JEE.js → first-start-migration-H2SAXAGR.js} +4 -4
  49. package/dist/{forget-NI4RBDPB.js → forget-ZECIDNL5.js} +1 -1
  50. package/dist/{fs-utils-PZRI2HDZ.js → fs-utils-OYXSZSVV.js} +12 -2
  51. package/dist/{graph-edge-decay-5CVKWBYH.js → graph-edge-decay-24ZKD5QL.js} +5 -5
  52. package/dist/index.js +7187 -71983
  53. package/dist/{kdf-H5B23ZM2.js → kdf-RXKIWHRU.js} +1 -1
  54. package/dist/legacy-hook-compat-QHHKF4GK.js +2 -0
  55. package/dist/{logger-TNOKCH7X.js → logger-XG7JKLPS.js} +1 -1
  56. package/dist/{memory-governance-FEQCA35V.js → memory-governance-6K4M4YXD.js} +5 -5
  57. package/dist/{metadata-JAGIWHEA.js → metadata-WK2TRPYZ.js} +1 -1
  58. package/dist/{migrate-from-identity-anchor-7MMSPEUM.js → migrate-from-identity-anchor-SNDNKHZD.js} +1 -1
  59. package/dist/path-ZKO74XXC.js +7 -0
  60. package/dist/{peers-KRFXWRQ6.js → peers-W53WSDXG.js} +1 -1
  61. package/dist/{purge-XN2VSPZ2.js → purge-IKJISXEQ.js} +1 -1
  62. package/dist/resolution-BN35OXDS.js +11 -0
  63. package/dist/{secure-store-A4NGCNXV.js → secure-store-F75I54O5.js} +3 -3
  64. package/dist/{state-PVISYXRH.js → state-4ITLYMAU.js} +1 -1
  65. package/dist/{state-store-N6TFBFSP.js → state-store-ET3ADVY5.js} +3 -3
  66. package/dist/{storage-R3V6ZFQT.js → storage-5EY6T7ON.js} +3 -3
  67. package/dist/{tier-stats-IZNW66NC.js → tier-stats-ZRQBV6G2.js} +4 -4
  68. package/dist/{trace-NJESSGH7.js → trace-IL2Y34EH.js} +1 -1
  69. package/dist/{tui-MGK2LYJY.js → tui-7KRDCMYK.js} +1 -1
  70. package/dist/{types-R4DO7AKM.js → types-7L34HYDW.js} +3 -3
  71. package/openclaw.plugin.json +153 -20
  72. package/package.json +18 -9
  73. package/scripts/faiss_index.py +756 -0
  74. package/scripts/faiss_requirements.txt +3 -0
  75. package/dist/capsule-export-IXVERCQG.js +0 -17
  76. package/dist/capsule-import-IA6VIOPQ.js +0 -16
  77. package/dist/chunk-3GUF7RQI.js +0 -559
  78. package/dist/chunk-7OQEPGQF.js +0 -533
  79. package/dist/chunk-DIZW6H5J.js +0 -136
  80. package/dist/chunk-FQRSVYY4.js +0 -110
  81. package/dist/chunk-GUSMRW4H.js +0 -12
  82. package/dist/chunk-MLKGABMK.js +0 -9
  83. package/dist/chunk-WPINX4MF.js +0 -380
  84. package/dist/contradiction-review-SVGBS3V5.js +0 -21
  85. package/dist/legacy-hook-compat-XQ7FP6FV.js +0 -35
  86. package/dist/path-JIEGNWFL.js +0 -7
  87. package/dist/resolution-YITUVUTH.js +0 -100
@@ -0,0 +1,756 @@
1
+ #!/usr/bin/env python3
2
+ """FAISS conversation index sidecar.
3
+
4
+ JSON-in/JSON-out CLI used by src/conversation-index/faiss-adapter.ts.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import argparse
10
+ import hashlib
11
+ import json
12
+ import os
13
+ import subprocess
14
+ import sys
15
+ import time
16
+ from pathlib import Path
17
+ from typing import Any
18
+
19
+ MODEL_CACHE: dict[str, Any] = {}
20
+ HASH_EMBED_DIM = 128
21
+ LOCK_TIMEOUT_SECONDS = 10.0
22
+ LOCK_STALE_SECONDS = 120.0
23
+ MANIFEST_VERSION = 1
24
+ MODEL_ID_ALIASES = {
25
+ "text-embedding-3-small": "sentence-transformers/all-MiniLM-L6-v2",
26
+ "text-embedding-3-large": "sentence-transformers/all-mpnet-base-v2",
27
+ "text-embedding-ada-002": "sentence-transformers/all-MiniLM-L6-v2",
28
+ }
29
+
30
+
31
+ class SidecarError(Exception):
32
+ pass
33
+
34
+
35
+ class DependencyError(SidecarError):
36
+ pass
37
+
38
+
39
+ def emit(payload: dict[str, Any]) -> None:
40
+ sys.stdout.write(json.dumps(payload, separators=(",", ":"), ensure_ascii=False))
41
+ sys.stdout.flush()
42
+
43
+
44
+ def read_payload() -> dict[str, Any]:
45
+ raw = sys.stdin.read()
46
+ if not raw.strip():
47
+ raise SidecarError("empty stdin payload")
48
+ try:
49
+ payload = json.loads(raw)
50
+ except json.JSONDecodeError as exc:
51
+ raise SidecarError(f"invalid JSON payload: {exc}") from exc
52
+ if not isinstance(payload, dict):
53
+ raise SidecarError("payload must be a JSON object")
54
+ return payload
55
+
56
+
57
+ def ensure_index_dir(index_path: str) -> Path:
58
+ if not isinstance(index_path, str) or not index_path.strip():
59
+ raise SidecarError("indexPath is required")
60
+ path = Path(index_path)
61
+ path.mkdir(parents=True, exist_ok=True)
62
+ return path
63
+
64
+
65
+ def metadata_file(index_dir: Path) -> Path:
66
+ return index_dir / "metadata.jsonl"
67
+
68
+
69
+ def index_file(index_dir: Path) -> Path:
70
+ return index_dir / "index.faiss"
71
+
72
+
73
+ def manifest_file(index_dir: Path) -> Path:
74
+ return index_dir / "manifest.json"
75
+
76
+
77
+ def read_metadata(path: Path) -> list[dict[str, Any]]:
78
+ if not path.exists():
79
+ return []
80
+
81
+ rows: list[dict[str, Any]] = []
82
+ for line in path.read_text(encoding="utf-8").splitlines():
83
+ line = line.strip()
84
+ if not line:
85
+ continue
86
+ try:
87
+ row = json.loads(line)
88
+ except json.JSONDecodeError:
89
+ continue
90
+ if not isinstance(row, dict):
91
+ continue
92
+ row_id = row.get("id")
93
+ text = row.get("text")
94
+ if not isinstance(row_id, str) or not row_id:
95
+ continue
96
+ if not isinstance(text, str):
97
+ continue
98
+ rows.append(
99
+ {
100
+ "id": row_id,
101
+ "sessionKey": row.get("sessionKey") if isinstance(row.get("sessionKey"), str) else "",
102
+ "text": text,
103
+ "startTs": row.get("startTs") if isinstance(row.get("startTs"), str) else "",
104
+ "endTs": row.get("endTs") if isinstance(row.get("endTs"), str) else "",
105
+ }
106
+ )
107
+ return rows
108
+
109
+
110
+ def write_metadata(path: Path, rows: list[dict[str, Any]]) -> None:
111
+ tmp = path.with_suffix(".jsonl.tmp")
112
+ with tmp.open("w", encoding="utf-8") as handle:
113
+ for row in rows:
114
+ handle.write(json.dumps(row, separators=(",", ":"), ensure_ascii=False))
115
+ handle.write("\n")
116
+ os.replace(tmp, path)
117
+
118
+
119
+ def read_manifest(path: Path) -> dict[str, Any] | None:
120
+ if not path.exists():
121
+ return None
122
+ try:
123
+ raw = json.loads(path.read_text(encoding="utf-8"))
124
+ except Exception:
125
+ return None
126
+ return raw if isinstance(raw, dict) else None
127
+
128
+
129
+ def write_manifest(path: Path, manifest: dict[str, Any]) -> None:
130
+ tmp = path.with_suffix(".json.tmp")
131
+ tmp.write_text(
132
+ json.dumps(manifest, separators=(",", ":"), ensure_ascii=False),
133
+ encoding="utf-8",
134
+ )
135
+ os.replace(tmp, path)
136
+
137
+
138
+ def load_vector_dependencies() -> tuple[Any, Any]:
139
+ try:
140
+ import numpy as np # type: ignore
141
+ import faiss # type: ignore
142
+ except Exception as exc:
143
+ raise DependencyError(f"missing faiss dependencies: {exc}") from exc
144
+ return np, faiss
145
+
146
+
147
+ def sentence_transformers_enabled() -> bool:
148
+ if "REMNIC_FAISS_ENABLE_ST" in os.environ:
149
+ value = os.environ["REMNIC_FAISS_ENABLE_ST"]
150
+ else:
151
+ value = os.environ.get("ENGRAM_FAISS_ENABLE_ST", "")
152
+ value = value.strip().lower()
153
+ return value in ("1", "true", "yes", "on")
154
+
155
+
156
+ def normalize_model_id(model_id: str) -> str:
157
+ cleaned = (model_id or "").strip()
158
+ if not cleaned:
159
+ cleaned = "sentence-transformers/all-MiniLM-L6-v2"
160
+ resolved = MODEL_ID_ALIASES.get(cleaned, cleaned)
161
+ if resolved in ("__hash__", "hash"):
162
+ return "__hash__"
163
+ if not sentence_transformers_enabled():
164
+ return "__hash__"
165
+ return resolved
166
+
167
+
168
+ def get_embedder(model_id: str) -> Any:
169
+ resolved_model_id = normalize_model_id(model_id)
170
+ if resolved_model_id == "__hash__":
171
+ return None
172
+ if resolved_model_id in MODEL_CACHE:
173
+ return MODEL_CACHE[resolved_model_id]
174
+ try:
175
+ from sentence_transformers import SentenceTransformer # type: ignore
176
+ except Exception as exc:
177
+ raise DependencyError(f"missing sentence-transformers dependency: {exc}") from exc
178
+ MODEL_CACHE[resolved_model_id] = SentenceTransformer(resolved_model_id)
179
+ return MODEL_CACHE[resolved_model_id]
180
+
181
+
182
+ def embed_with_hash(texts: list[str], np: Any) -> Any:
183
+ vectors = np.zeros((len(texts), HASH_EMBED_DIM), dtype="float32")
184
+ for row_index, text in enumerate(texts):
185
+ digest = hashlib.sha256(text.encode("utf-8")).digest()
186
+ for byte_index in range(HASH_EMBED_DIM):
187
+ vectors[row_index, byte_index] = (digest[byte_index % len(digest)] / 255.0) - 0.5
188
+ return vectors
189
+
190
+
191
+ def embed_texts(texts: list[str], model_id: str) -> tuple[Any, Any, Any]:
192
+ np, faiss = load_vector_dependencies()
193
+ embedder = get_embedder(model_id)
194
+ if embedder is None:
195
+ arr = embed_with_hash(texts, np)
196
+ else:
197
+ vectors = embedder.encode(
198
+ texts,
199
+ normalize_embeddings=False,
200
+ convert_to_numpy=True,
201
+ show_progress_bar=False,
202
+ )
203
+ arr = np.asarray(vectors, dtype="float32")
204
+ if arr.ndim == 1:
205
+ arr = arr.reshape(1, -1)
206
+ if arr.shape[0] > 0:
207
+ faiss.normalize_L2(arr)
208
+ return arr, np, faiss
209
+
210
+
211
+ def write_index(path: Path, vectors: Any, faiss: Any) -> None:
212
+ dim = int(vectors.shape[1])
213
+ index = faiss.IndexFlatIP(dim)
214
+ if int(vectors.shape[0]) > 0:
215
+ index.add(vectors)
216
+ tmp = path.with_suffix(".faiss.tmp")
217
+ faiss.write_index(index, str(tmp))
218
+ os.replace(tmp, path)
219
+
220
+
221
+ def resolve_vector_dimension(model_id: str) -> int:
222
+ probe, _np, _faiss = embed_texts([""], model_id)
223
+ return int(probe.shape[1])
224
+
225
+
226
+ def build_empty_vectors(model_id: str) -> tuple[Any, Any]:
227
+ np, faiss = load_vector_dependencies()
228
+ dim = resolve_vector_dimension(model_id)
229
+ vectors = np.zeros((0, dim), dtype="float32")
230
+ return vectors, faiss
231
+
232
+
233
+ def build_manifest(
234
+ model_id: str,
235
+ vector_dim: int,
236
+ chunk_count: int,
237
+ *,
238
+ generated_at: str | None = None,
239
+ last_successful_rebuild_at: str | None = None,
240
+ ) -> dict[str, Any]:
241
+ normalized_model_id = normalize_model_id(model_id)
242
+ now_iso = generated_at or time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
243
+ return {
244
+ "version": MANIFEST_VERSION,
245
+ "modelId": model_id,
246
+ "normalizedModelId": normalized_model_id,
247
+ "dimension": int(vector_dim),
248
+ "chunkCount": int(chunk_count),
249
+ "updatedAt": now_iso,
250
+ "lastSuccessfulRebuildAt": last_successful_rebuild_at or now_iso,
251
+ }
252
+
253
+
254
+ def validate_index_manifest(
255
+ manifest: dict[str, Any] | None,
256
+ *,
257
+ requested_model_id: str,
258
+ actual_dimension: int | None = None,
259
+ expected_dimension: int | None = None,
260
+ ) -> dict[str, Any]:
261
+ if manifest is None:
262
+ raise SidecarError("missing index manifest; rebuild the FAISS conversation index")
263
+
264
+ version = manifest.get("version")
265
+ if not isinstance(version, int) or isinstance(version, bool) or version != MANIFEST_VERSION:
266
+ raise SidecarError("unsupported index manifest version; rebuild the FAISS conversation index")
267
+
268
+ normalized_manifest_model_id = manifest.get("normalizedModelId")
269
+ if not isinstance(normalized_manifest_model_id, str) or not normalized_manifest_model_id:
270
+ raise SidecarError("index manifest missing normalized model id; rebuild the FAISS conversation index")
271
+
272
+ requested_normalized_model_id = normalize_model_id(requested_model_id)
273
+ if normalized_manifest_model_id != requested_normalized_model_id:
274
+ raise SidecarError(
275
+ "index model mismatch "
276
+ f"(index={normalized_manifest_model_id}, query={requested_normalized_model_id}); "
277
+ "rebuild the FAISS conversation index"
278
+ )
279
+
280
+ manifest_dimension = manifest.get("dimension")
281
+ if not isinstance(manifest_dimension, int) or isinstance(manifest_dimension, bool) or manifest_dimension <= 0:
282
+ raise SidecarError("index manifest missing vector dimension; rebuild the FAISS conversation index")
283
+
284
+ chunk_count = manifest.get("chunkCount")
285
+ if not isinstance(chunk_count, int) or isinstance(chunk_count, bool) or chunk_count < 0:
286
+ raise SidecarError("index manifest missing chunk count; rebuild the FAISS conversation index")
287
+
288
+ if actual_dimension is not None and manifest_dimension != int(actual_dimension):
289
+ raise SidecarError(
290
+ f"index dimension mismatch (manifest={manifest_dimension}, index={int(actual_dimension)}); "
291
+ "rebuild the FAISS conversation index"
292
+ )
293
+
294
+ if expected_dimension is not None and manifest_dimension != int(expected_dimension):
295
+ raise SidecarError(
296
+ f"index dimension mismatch (manifest={manifest_dimension}, query={int(expected_dimension)}); "
297
+ "rebuild the FAISS conversation index"
298
+ )
299
+
300
+ return {
301
+ "version": version,
302
+ "modelId": manifest.get("modelId") if isinstance(manifest.get("modelId"), str) else "",
303
+ "normalizedModelId": normalized_manifest_model_id,
304
+ "dimension": manifest_dimension,
305
+ "chunkCount": chunk_count,
306
+ "updatedAt": manifest.get("updatedAt") if isinstance(manifest.get("updatedAt"), str) else "",
307
+ "lastSuccessfulRebuildAt": (
308
+ manifest.get("lastSuccessfulRebuildAt")
309
+ if isinstance(manifest.get("lastSuccessfulRebuildAt"), str)
310
+ else ""
311
+ ),
312
+ }
313
+
314
+
315
+ def validate_artifact_counts(index: Any, rows: list[dict[str, Any]], manifest: dict[str, Any]) -> None:
316
+ index_count = int(getattr(index, "ntotal", -1))
317
+ metadata_count = len(rows)
318
+ manifest_count = manifest["chunkCount"]
319
+
320
+ if index_count != metadata_count or index_count != int(manifest_count):
321
+ raise SidecarError(
322
+ "index artifact count mismatch "
323
+ f"(index={index_count}, metadata={metadata_count}, manifest={int(manifest_count)}); "
324
+ "rebuild the FAISS conversation index"
325
+ )
326
+
327
+
328
+ def parse_chunks(payload: dict[str, Any]) -> list[dict[str, Any]]:
329
+ raw_chunks = payload.get("chunks")
330
+ if not isinstance(raw_chunks, list):
331
+ raise SidecarError("chunks must be an array")
332
+ chunks: list[dict[str, Any]] = []
333
+ for item in raw_chunks:
334
+ if not isinstance(item, dict):
335
+ continue
336
+ chunk_id = item.get("id")
337
+ text = item.get("text")
338
+ if not isinstance(chunk_id, str) or not chunk_id:
339
+ continue
340
+ if not isinstance(text, str):
341
+ continue
342
+ chunks.append(
343
+ {
344
+ "id": chunk_id,
345
+ "sessionKey": item.get("sessionKey") if isinstance(item.get("sessionKey"), str) else "",
346
+ "text": text,
347
+ "startTs": item.get("startTs") if isinstance(item.get("startTs"), str) else "",
348
+ "endTs": item.get("endTs") if isinstance(item.get("endTs"), str) else "",
349
+ }
350
+ )
351
+ return chunks
352
+
353
+
354
+ def metadata_row_key(row: dict[str, Any]) -> tuple[str, str]:
355
+ session_key = (
356
+ row.get("sessionKey") if isinstance(row.get("sessionKey"), str) else ""
357
+ )
358
+ row_id = row.get("id") if isinstance(row.get("id"), str) else ""
359
+ return (session_key, row_id)
360
+
361
+
362
+ def merge_rows(existing: list[dict[str, Any]], updates: list[dict[str, Any]]) -> list[dict[str, Any]]:
363
+ by_key: dict[tuple[str, str], dict[str, Any]] = {
364
+ metadata_row_key(row): row for row in existing
365
+ }
366
+ order = [metadata_row_key(row) for row in existing]
367
+ for update in updates:
368
+ update_key = metadata_row_key(update)
369
+ if update_key not in by_key:
370
+ order.append(update_key)
371
+ by_key[update_key] = update
372
+ return [by_key[row_key] for row_key in order]
373
+
374
+
375
+ def read_lock_owner_pid(lock_path: Path) -> int | None:
376
+ try:
377
+ raw = lock_path.read_text(encoding="utf-8").strip()
378
+ except Exception:
379
+ return None
380
+ if not raw:
381
+ return None
382
+ try:
383
+ pid = int(raw)
384
+ except ValueError:
385
+ return None
386
+ return pid if pid > 0 else None
387
+
388
+
389
+ def is_process_alive(pid: int) -> bool:
390
+ if pid <= 0:
391
+ return False
392
+
393
+ if os.name == "nt":
394
+ try:
395
+ probe = subprocess.run(
396
+ ["tasklist", "/FI", f"PID eq {pid}", "/FO", "CSV", "/NH"],
397
+ capture_output=True,
398
+ text=True,
399
+ timeout=2,
400
+ )
401
+ except Exception:
402
+ return False
403
+ output = probe.stdout.strip()
404
+ if not output:
405
+ return False
406
+ if output.startswith("INFO:"):
407
+ return False
408
+ return f'"{pid}"' in output
409
+
410
+ try:
411
+ os.kill(pid, 0)
412
+ return True
413
+ except ProcessLookupError:
414
+ return False
415
+ except PermissionError:
416
+ return True
417
+ except OSError:
418
+ return False
419
+
420
+
421
+ def acquire_lock(index_dir: Path, lock_name: str) -> Path:
422
+ lock_path = index_dir / lock_name
423
+ lock_label = lock_name.strip(".")
424
+ if lock_label.endswith(".lock"):
425
+ lock_label = lock_label[: -len(".lock")]
426
+ lock_label = lock_label.replace(".", " ")
427
+ deadline = time.monotonic() + LOCK_TIMEOUT_SECONDS
428
+
429
+ while True:
430
+ try:
431
+ fd = os.open(str(lock_path), os.O_CREAT | os.O_EXCL | os.O_WRONLY)
432
+ with os.fdopen(fd, "w", encoding="utf-8") as handle:
433
+ handle.write(str(os.getpid()))
434
+ return lock_path
435
+ except FileExistsError:
436
+ try:
437
+ age = time.time() - lock_path.stat().st_mtime
438
+ except FileNotFoundError:
439
+ continue
440
+
441
+ owner_pid = read_lock_owner_pid(lock_path)
442
+ owner_alive = is_process_alive(owner_pid) if owner_pid is not None else False
443
+
444
+ if age > LOCK_STALE_SECONDS and not owner_alive:
445
+ lock_path.unlink(missing_ok=True)
446
+ continue
447
+
448
+ if time.monotonic() >= deadline:
449
+ raise SidecarError(f"timed out waiting for FAISS {lock_label} lock")
450
+ time.sleep(0.05)
451
+
452
+
453
+ def acquire_index_lock(index_dir: Path) -> Path:
454
+ return acquire_lock(index_dir, ".index.lock")
455
+
456
+
457
+ def acquire_writer_lock(index_dir: Path) -> Path:
458
+ return acquire_lock(index_dir, ".writer.lock")
459
+
460
+
461
+ def release_lock(lock_path: Path) -> None:
462
+ lock_path.unlink(missing_ok=True)
463
+
464
+
465
+ def release_index_lock(lock_path: Path) -> None:
466
+ release_lock(lock_path)
467
+
468
+
469
+ def run_upsert(payload: dict[str, Any]) -> dict[str, Any]:
470
+ model_id = payload.get("modelId")
471
+ if not isinstance(model_id, str) or not model_id:
472
+ raise SidecarError("modelId is required")
473
+
474
+ index_dir = ensure_index_dir(str(payload.get("indexPath", "")))
475
+ chunks = parse_chunks(payload)
476
+
477
+ if not chunks:
478
+ return {"ok": True, "upserted": 0}
479
+
480
+ meta_path = metadata_file(index_dir)
481
+ idx_path = index_file(index_dir)
482
+ manifest_path = manifest_file(index_dir)
483
+
484
+ writer_lock_path = acquire_writer_lock(index_dir)
485
+ try:
486
+ existing = read_metadata(meta_path)
487
+ existing_manifest = read_manifest(manifest_path)
488
+ merged = merge_rows(existing, chunks)
489
+
490
+ texts = [row["text"] for row in merged]
491
+ vectors, _np, faiss = embed_texts(texts, model_id)
492
+
493
+ lock_path = acquire_index_lock(index_dir)
494
+ try:
495
+ # Commit FAISS index first; metadata follows so we never point at missing vectors.
496
+ write_index(idx_path, vectors, faiss)
497
+ write_metadata(meta_path, merged)
498
+ preserved_rebuild_at = (
499
+ existing_manifest.get("lastSuccessfulRebuildAt")
500
+ if isinstance(existing_manifest, dict)
501
+ and isinstance(existing_manifest.get("lastSuccessfulRebuildAt"), str)
502
+ and existing_manifest.get("lastSuccessfulRebuildAt")
503
+ else None
504
+ )
505
+ write_manifest(
506
+ manifest_path,
507
+ build_manifest(
508
+ model_id,
509
+ int(vectors.shape[1]),
510
+ len(merged),
511
+ last_successful_rebuild_at=preserved_rebuild_at,
512
+ ),
513
+ )
514
+ finally:
515
+ release_index_lock(lock_path)
516
+ finally:
517
+ release_lock(writer_lock_path)
518
+
519
+ return {"ok": True, "upserted": len(chunks)}
520
+
521
+
522
+ def run_rebuild(payload: dict[str, Any]) -> dict[str, Any]:
523
+ model_id = payload.get("modelId")
524
+ if not isinstance(model_id, str) or not model_id:
525
+ raise SidecarError("modelId is required")
526
+
527
+ index_dir = ensure_index_dir(str(payload.get("indexPath", "")))
528
+ chunks = parse_chunks(payload)
529
+
530
+ writer_lock_path = acquire_writer_lock(index_dir)
531
+ try:
532
+ if chunks:
533
+ texts = [row["text"] for row in chunks]
534
+ vectors, _np, faiss = embed_texts(texts, model_id)
535
+ chunk_count = len(chunks)
536
+ else:
537
+ vectors, faiss = build_empty_vectors(model_id)
538
+ chunk_count = 0
539
+
540
+ lock_path = acquire_index_lock(index_dir)
541
+ try:
542
+ meta_path = metadata_file(index_dir)
543
+ idx_path = index_file(index_dir)
544
+ manifest_path = manifest_file(index_dir)
545
+
546
+ write_index(idx_path, vectors, faiss)
547
+ write_metadata(meta_path, chunks)
548
+ write_manifest(manifest_path, build_manifest(model_id, int(vectors.shape[1]), chunk_count))
549
+ finally:
550
+ release_index_lock(lock_path)
551
+ finally:
552
+ release_lock(writer_lock_path)
553
+
554
+ return {"ok": True, "rebuilt": len(chunks)}
555
+
556
+
557
+ def run_search(payload: dict[str, Any]) -> dict[str, Any]:
558
+ model_id = payload.get("modelId")
559
+ query = payload.get("query")
560
+ top_k = payload.get("topK")
561
+ if not isinstance(model_id, str) or not model_id:
562
+ raise SidecarError("modelId is required")
563
+ if not isinstance(query, str) or not query.strip():
564
+ raise SidecarError("query is required")
565
+ if not isinstance(top_k, int) or isinstance(top_k, bool) or top_k <= 0:
566
+ raise SidecarError("topK must be a positive integer")
567
+
568
+ index_dir = ensure_index_dir(str(payload.get("indexPath", "")))
569
+ meta_path = metadata_file(index_dir)
570
+ idx_path = index_file(index_dir)
571
+ manifest_path = manifest_file(index_dir)
572
+
573
+ lock_path = acquire_index_lock(index_dir)
574
+ try:
575
+ has_index = idx_path.exists()
576
+ has_metadata = meta_path.exists()
577
+ has_manifest = manifest_path.exists()
578
+ if not has_index and not has_metadata and not has_manifest:
579
+ return {"ok": True, "results": []}
580
+ if not has_index or not has_metadata or not has_manifest:
581
+ raise SidecarError("conversation index artifacts incomplete; rebuild the FAISS conversation index")
582
+
583
+ rows = read_metadata(meta_path)
584
+ _np, faiss = load_vector_dependencies()
585
+ index = faiss.read_index(str(idx_path))
586
+ manifest = validate_index_manifest(
587
+ read_manifest(manifest_path),
588
+ requested_model_id=model_id,
589
+ actual_dimension=int(index.d),
590
+ )
591
+ validate_artifact_counts(index, rows, manifest)
592
+ finally:
593
+ release_index_lock(lock_path)
594
+
595
+ if not rows:
596
+ return {"ok": True, "results": []}
597
+
598
+ query_vector, _np2, _faiss2 = embed_texts([query], model_id)
599
+ query_dimension = int(query_vector.shape[1])
600
+ if int(manifest["dimension"]) != query_dimension:
601
+ raise SidecarError(
602
+ f"index dimension mismatch (manifest={int(manifest['dimension'])}, query={query_dimension}); "
603
+ "rebuild the FAISS conversation index"
604
+ )
605
+
606
+ distances, indices = index.search(query_vector, top_k)
607
+ results: list[dict[str, Any]] = []
608
+ for score, idx in zip(distances[0], indices[0]):
609
+ idx_i = int(idx)
610
+ if idx_i < 0 or idx_i >= len(rows):
611
+ continue
612
+ row = rows[idx_i]
613
+ results.append(
614
+ {
615
+ "path": row["id"],
616
+ "snippet": row["text"][:280],
617
+ "score": float(score),
618
+ }
619
+ )
620
+
621
+ return {"ok": True, "results": results}
622
+
623
+
624
+ def build_health_response(payload: dict[str, Any], *, include_metadata: bool = False) -> dict[str, Any]:
625
+ index_dir = ensure_index_dir(str(payload.get("indexPath", "")))
626
+ meta_path = metadata_file(index_dir)
627
+ idx_path = index_file(index_dir)
628
+ manifest_path = manifest_file(index_dir)
629
+
630
+ status = "ok"
631
+ error = ""
632
+ model_id = normalize_model_id(str(payload.get("modelId", "")))
633
+ manifest_details: dict[str, Any] | None = None
634
+ metadata_details: dict[str, Any] | None = None
635
+
636
+ try:
637
+ load_vector_dependencies()
638
+ if model_id != "__hash__":
639
+ try:
640
+ import sentence_transformers # type: ignore # noqa: F401
641
+ except Exception as exc:
642
+ raise DependencyError(f"missing sentence-transformers dependency: {exc}") from exc
643
+ except Exception as exc:
644
+ status = "degraded"
645
+ error = str(exc)
646
+
647
+ lock_path: Path | None = None
648
+ try:
649
+ lock_path = acquire_index_lock(index_dir)
650
+ has_index = idx_path.exists()
651
+ has_metadata = meta_path.exists()
652
+ has_manifest = manifest_path.exists()
653
+ rows: list[dict[str, Any]] | None = None
654
+
655
+ if has_index or has_metadata or has_manifest:
656
+ if not has_index or not has_metadata or not has_manifest:
657
+ if status == "ok":
658
+ status = "degraded"
659
+ if not error:
660
+ error = "conversation index artifacts incomplete; rebuild the FAISS conversation index"
661
+ else:
662
+ try:
663
+ rows = read_metadata(meta_path)
664
+ _np, faiss = load_vector_dependencies()
665
+ index = faiss.read_index(str(idx_path))
666
+ manifest_details = validate_index_manifest(
667
+ read_manifest(manifest_path),
668
+ requested_model_id=model_id,
669
+ actual_dimension=int(index.d),
670
+ )
671
+ validate_artifact_counts(index, rows, manifest_details)
672
+ except Exception as exc:
673
+ if status == "ok":
674
+ status = "degraded"
675
+ if not error:
676
+ error = str(exc)
677
+ elif status == "ok":
678
+ status = "degraded"
679
+ error = "conversation index artifacts missing; build the FAISS conversation index"
680
+
681
+ if include_metadata:
682
+ if rows is None:
683
+ rows = read_metadata(meta_path)
684
+ metadata_details = {
685
+ "chunkCount": len(rows),
686
+ "hasIndex": has_index,
687
+ "hasMetadata": has_metadata,
688
+ "hasManifest": has_manifest,
689
+ }
690
+ except Exception as exc:
691
+ if status == "ok":
692
+ status = "degraded"
693
+ if not error:
694
+ error = str(exc)
695
+ if include_metadata:
696
+ try:
697
+ chunk_count = len(read_metadata(meta_path))
698
+ except Exception:
699
+ chunk_count = 0
700
+ metadata_details = {
701
+ "chunkCount": chunk_count,
702
+ "hasIndex": idx_path.exists(),
703
+ "hasMetadata": meta_path.exists(),
704
+ "hasManifest": manifest_path.exists(),
705
+ }
706
+ finally:
707
+ if lock_path is not None:
708
+ release_index_lock(lock_path)
709
+
710
+ response: dict[str, Any] = {"ok": True, "status": status}
711
+ if error:
712
+ response["error"] = error
713
+ if manifest_details is not None:
714
+ response["manifest"] = manifest_details
715
+ if metadata_details is not None:
716
+ response["metadata"] = metadata_details
717
+ return response
718
+
719
+
720
+ def run_health(payload: dict[str, Any]) -> dict[str, Any]:
721
+ return build_health_response(payload)
722
+
723
+
724
+ def run_inspect(payload: dict[str, Any]) -> dict[str, Any]:
725
+ return build_health_response(payload, include_metadata=True)
726
+
727
+
728
+ def main() -> int:
729
+ parser = argparse.ArgumentParser()
730
+ parser.add_argument("command", choices=["upsert", "rebuild", "search", "health", "inspect"])
731
+ args = parser.parse_args()
732
+
733
+ try:
734
+ payload = read_payload()
735
+ if args.command == "upsert":
736
+ emit(run_upsert(payload))
737
+ elif args.command == "rebuild":
738
+ emit(run_rebuild(payload))
739
+ elif args.command == "search":
740
+ emit(run_search(payload))
741
+ elif args.command == "inspect":
742
+ emit(run_inspect(payload))
743
+ else:
744
+ emit(run_health(payload))
745
+ return 0
746
+ except (SidecarError, DependencyError) as exc:
747
+ emit({"ok": False, "error": str(exc)})
748
+ return 0
749
+ except Exception as exc:
750
+ print(f"faiss sidecar internal error: {exc}", file=sys.stderr)
751
+ emit({"ok": False, "error": "internal sidecar error"})
752
+ return 1
753
+
754
+
755
+ if __name__ == "__main__":
756
+ raise SystemExit(main())