@getrift/rift 0.1.0-beta.21 → 0.1.0-beta.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (320) hide show
  1. package/README.md +7 -3
  2. package/dist/src/capture/auto-capture.d.ts +105 -4
  3. package/dist/src/capture/auto-capture.d.ts.map +1 -1
  4. package/dist/src/capture/auto-capture.js +313 -34
  5. package/dist/src/capture/auto-capture.js.map +1 -1
  6. package/dist/src/capture/claude-cli-triage-provider.d.ts +28 -0
  7. package/dist/src/capture/claude-cli-triage-provider.d.ts.map +1 -0
  8. package/dist/src/capture/claude-cli-triage-provider.js +88 -0
  9. package/dist/src/capture/claude-cli-triage-provider.js.map +1 -0
  10. package/dist/src/capture/codex-cli-triage-provider.d.ts.map +1 -1
  11. package/dist/src/capture/codex-cli-triage-provider.js +1 -33
  12. package/dist/src/capture/codex-cli-triage-provider.js.map +1 -1
  13. package/dist/src/capture/cursor-capture.d.ts +89 -0
  14. package/dist/src/capture/cursor-capture.d.ts.map +1 -0
  15. package/dist/src/capture/cursor-capture.js +121 -0
  16. package/dist/src/capture/cursor-capture.js.map +1 -0
  17. package/dist/src/capture/observability.d.ts +30 -0
  18. package/dist/src/capture/observability.d.ts.map +1 -1
  19. package/dist/src/capture/observability.js +29 -0
  20. package/dist/src/capture/observability.js.map +1 -1
  21. package/dist/src/capture/recover-quarantine.d.ts +4 -4
  22. package/dist/src/capture/sources.d.ts +41 -3
  23. package/dist/src/capture/sources.d.ts.map +1 -1
  24. package/dist/src/capture/sources.js +43 -1
  25. package/dist/src/capture/sources.js.map +1 -1
  26. package/dist/src/capture/triage-classification.d.ts +69 -0
  27. package/dist/src/capture/triage-classification.d.ts.map +1 -0
  28. package/dist/src/capture/triage-classification.js +62 -0
  29. package/dist/src/capture/triage-classification.js.map +1 -0
  30. package/dist/src/capture/triage-provider-factory.d.ts +36 -0
  31. package/dist/src/capture/triage-provider-factory.d.ts.map +1 -0
  32. package/dist/src/capture/triage-provider-factory.js +55 -0
  33. package/dist/src/capture/triage-provider-factory.js.map +1 -0
  34. package/dist/src/capture/triage.d.ts +1 -1
  35. package/dist/src/capture/triage.d.ts.map +1 -1
  36. package/dist/src/capture/triage.js +8 -6
  37. package/dist/src/capture/triage.js.map +1 -1
  38. package/dist/src/cli/commands/capture.d.ts.map +1 -1
  39. package/dist/src/cli/commands/capture.js +79 -17
  40. package/dist/src/cli/commands/capture.js.map +1 -1
  41. package/dist/src/cli/commands/chunk-backfill.d.ts +13 -0
  42. package/dist/src/cli/commands/chunk-backfill.d.ts.map +1 -0
  43. package/dist/src/cli/commands/chunk-backfill.js +157 -0
  44. package/dist/src/cli/commands/chunk-backfill.js.map +1 -0
  45. package/dist/src/cli/commands/cursor-probe.d.ts +20 -0
  46. package/dist/src/cli/commands/cursor-probe.d.ts.map +1 -0
  47. package/dist/src/cli/commands/cursor-probe.js +162 -0
  48. package/dist/src/cli/commands/cursor-probe.js.map +1 -0
  49. package/dist/src/cli/commands/menubar.d.ts +3 -1
  50. package/dist/src/cli/commands/menubar.d.ts.map +1 -1
  51. package/dist/src/cli/commands/menubar.js +36 -12
  52. package/dist/src/cli/commands/menubar.js.map +1 -1
  53. package/dist/src/cli/commands/onboard.d.ts +22 -2
  54. package/dist/src/cli/commands/onboard.d.ts.map +1 -1
  55. package/dist/src/cli/commands/onboard.js +160 -32
  56. package/dist/src/cli/commands/onboard.js.map +1 -1
  57. package/dist/src/cli/commands/status.d.ts.map +1 -1
  58. package/dist/src/cli/commands/status.js +12 -0
  59. package/dist/src/cli/commands/status.js.map +1 -1
  60. package/dist/src/cli/commands/update.d.ts +34 -1
  61. package/dist/src/cli/commands/update.d.ts.map +1 -1
  62. package/dist/src/cli/commands/update.js +166 -1
  63. package/dist/src/cli/commands/update.js.map +1 -1
  64. package/dist/src/cli/index.d.ts.map +1 -1
  65. package/dist/src/cli/index.js +4 -0
  66. package/dist/src/cli/index.js.map +1 -1
  67. package/dist/src/cli/postinstall-menubar.d.ts +20 -13
  68. package/dist/src/cli/postinstall-menubar.d.ts.map +1 -1
  69. package/dist/src/cli/postinstall-menubar.js +56 -1
  70. package/dist/src/cli/postinstall-menubar.js.map +1 -1
  71. package/dist/src/cli/status/friend-header.d.ts +16 -3
  72. package/dist/src/cli/status/friend-header.d.ts.map +1 -1
  73. package/dist/src/cli/status/friend-header.js +186 -10
  74. package/dist/src/cli/status/friend-header.js.map +1 -1
  75. package/dist/src/cli/status/local-signals.d.ts +42 -4
  76. package/dist/src/cli/status/local-signals.d.ts.map +1 -1
  77. package/dist/src/cli/status/local-signals.js +52 -1
  78. package/dist/src/cli/status/local-signals.js.map +1 -1
  79. package/dist/src/config/schema.d.ts +220 -14
  80. package/dist/src/config/schema.d.ts.map +1 -1
  81. package/dist/src/config/schema.js +82 -7
  82. package/dist/src/config/schema.js.map +1 -1
  83. package/dist/src/diagnostics/claude-preflight.d.ts +35 -0
  84. package/dist/src/diagnostics/claude-preflight.d.ts.map +1 -0
  85. package/dist/src/diagnostics/claude-preflight.js +90 -0
  86. package/dist/src/diagnostics/claude-preflight.js.map +1 -0
  87. package/dist/src/diagnostics/codex-preflight.d.ts +1 -1
  88. package/dist/src/diagnostics/codex-preflight.d.ts.map +1 -1
  89. package/dist/src/diagnostics/codex-preflight.js +24 -0
  90. package/dist/src/diagnostics/codex-preflight.js.map +1 -1
  91. package/dist/src/diagnostics/doctor.d.ts +7 -4
  92. package/dist/src/diagnostics/doctor.d.ts.map +1 -1
  93. package/dist/src/diagnostics/doctor.js +70 -11
  94. package/dist/src/diagnostics/doctor.js.map +1 -1
  95. package/dist/src/diagnostics/memory-coverage.d.ts +54 -0
  96. package/dist/src/diagnostics/memory-coverage.d.ts.map +1 -0
  97. package/dist/src/diagnostics/memory-coverage.js +272 -0
  98. package/dist/src/diagnostics/memory-coverage.js.map +1 -0
  99. package/dist/src/diagnostics/notify.d.ts +20 -3
  100. package/dist/src/diagnostics/notify.d.ts.map +1 -1
  101. package/dist/src/diagnostics/notify.js +54 -14
  102. package/dist/src/diagnostics/notify.js.map +1 -1
  103. package/dist/src/ingestion/chunk-meta.d.ts +85 -0
  104. package/dist/src/ingestion/chunk-meta.d.ts.map +1 -0
  105. package/dist/src/ingestion/chunk-meta.js +167 -0
  106. package/dist/src/ingestion/chunk-meta.js.map +1 -0
  107. package/dist/src/ingestion/chunk-text.d.ts +39 -0
  108. package/dist/src/ingestion/chunk-text.d.ts.map +1 -0
  109. package/dist/src/ingestion/chunk-text.js +114 -0
  110. package/dist/src/ingestion/chunk-text.js.map +1 -0
  111. package/dist/src/ingestion/cursor/cursor-store.d.ts +177 -0
  112. package/dist/src/ingestion/cursor/cursor-store.d.ts.map +1 -0
  113. package/dist/src/ingestion/cursor/cursor-store.js +243 -0
  114. package/dist/src/ingestion/cursor/cursor-store.js.map +1 -0
  115. package/dist/src/ingestion/cursor/enrich-roots.d.ts +16 -0
  116. package/dist/src/ingestion/cursor/enrich-roots.d.ts.map +1 -0
  117. package/dist/src/ingestion/cursor/enrich-roots.js +22 -0
  118. package/dist/src/ingestion/cursor/enrich-roots.js.map +1 -0
  119. package/dist/src/ingestion/cursor/vscdb-reader.d.ts +32 -0
  120. package/dist/src/ingestion/cursor/vscdb-reader.d.ts.map +1 -0
  121. package/dist/src/ingestion/cursor/vscdb-reader.js +113 -0
  122. package/dist/src/ingestion/cursor/vscdb-reader.js.map +1 -0
  123. package/dist/src/ingestion/cursor/workspace-root.d.ts +96 -0
  124. package/dist/src/ingestion/cursor/workspace-root.d.ts.map +1 -0
  125. package/dist/src/ingestion/cursor/workspace-root.js +187 -0
  126. package/dist/src/ingestion/cursor/workspace-root.js.map +1 -0
  127. package/dist/src/ingestion/indexer.d.ts.map +1 -1
  128. package/dist/src/ingestion/indexer.js +41 -32
  129. package/dist/src/ingestion/indexer.js.map +1 -1
  130. package/dist/src/jobs/handlers/compact.d.ts.map +1 -1
  131. package/dist/src/jobs/handlers/compact.js +9 -4
  132. package/dist/src/jobs/handlers/compact.js.map +1 -1
  133. package/dist/src/jobs/handlers/ingest.d.ts.map +1 -1
  134. package/dist/src/jobs/handlers/ingest.js +60 -30
  135. package/dist/src/jobs/handlers/ingest.js.map +1 -1
  136. package/dist/src/jobs/handlers/reconcile.d.ts.map +1 -1
  137. package/dist/src/jobs/handlers/reconcile.js +128 -45
  138. package/dist/src/jobs/handlers/reconcile.js.map +1 -1
  139. package/dist/src/jobs/handlers/save.d.ts.map +1 -1
  140. package/dist/src/jobs/handlers/save.js +122 -72
  141. package/dist/src/jobs/handlers/save.js.map +1 -1
  142. package/dist/src/jobs/types.d.ts +1 -1
  143. package/dist/src/main.js +27 -16
  144. package/dist/src/main.js.map +1 -1
  145. package/dist/src/mcp/capture-diagnostics.d.ts +51 -0
  146. package/dist/src/mcp/capture-diagnostics.d.ts.map +1 -0
  147. package/dist/src/mcp/capture-diagnostics.js +127 -0
  148. package/dist/src/mcp/capture-diagnostics.js.map +1 -0
  149. package/dist/src/mcp/memory-diagnostics.d.ts +6 -0
  150. package/dist/src/mcp/memory-diagnostics.d.ts.map +1 -0
  151. package/dist/src/mcp/memory-diagnostics.js +51 -0
  152. package/dist/src/mcp/memory-diagnostics.js.map +1 -0
  153. package/dist/src/mcp/server.d.ts.map +1 -1
  154. package/dist/src/mcp/server.js +10 -3
  155. package/dist/src/mcp/server.js.map +1 -1
  156. package/dist/src/mcp/tools/context-pack.d.ts.map +1 -1
  157. package/dist/src/mcp/tools/context-pack.js +7 -1
  158. package/dist/src/mcp/tools/context-pack.js.map +1 -1
  159. package/dist/src/mcp/tools/conversations-search.d.ts +1 -1
  160. package/dist/src/mcp/tools/conversations-search.d.ts.map +1 -1
  161. package/dist/src/mcp/tools/conversations-search.js +7 -1
  162. package/dist/src/mcp/tools/conversations-search.js.map +1 -1
  163. package/dist/src/mcp/tools/evidence-feedback.d.ts +60 -0
  164. package/dist/src/mcp/tools/evidence-feedback.d.ts.map +1 -0
  165. package/dist/src/mcp/tools/evidence-feedback.js +62 -0
  166. package/dist/src/mcp/tools/evidence-feedback.js.map +1 -0
  167. package/dist/src/mcp/tools/log-outcome.d.ts +72 -0
  168. package/dist/src/mcp/tools/log-outcome.d.ts.map +1 -0
  169. package/dist/src/mcp/tools/log-outcome.js +59 -0
  170. package/dist/src/mcp/tools/log-outcome.js.map +1 -0
  171. package/dist/src/mcp/tools/open-evidence.d.ts +37 -0
  172. package/dist/src/mcp/tools/open-evidence.d.ts.map +1 -0
  173. package/dist/src/mcp/tools/open-evidence.js +72 -0
  174. package/dist/src/mcp/tools/open-evidence.js.map +1 -0
  175. package/dist/src/mcp/tools/save.d.ts +7 -2
  176. package/dist/src/mcp/tools/save.d.ts.map +1 -1
  177. package/dist/src/mcp/tools/save.js +7 -2
  178. package/dist/src/mcp/tools/save.js.map +1 -1
  179. package/dist/src/mcp/tools/search.d.ts.map +1 -1
  180. package/dist/src/mcp/tools/search.js +7 -1
  181. package/dist/src/mcp/tools/search.js.map +1 -1
  182. package/dist/src/mcp/tools/status.d.ts +15 -1
  183. package/dist/src/mcp/tools/status.d.ts.map +1 -1
  184. package/dist/src/mcp/tools/status.js +53 -2
  185. package/dist/src/mcp/tools/status.js.map +1 -1
  186. package/dist/src/observability/retrieval-feedback.d.ts +82 -0
  187. package/dist/src/observability/retrieval-feedback.d.ts.map +1 -0
  188. package/dist/src/observability/retrieval-feedback.js +231 -0
  189. package/dist/src/observability/retrieval-feedback.js.map +1 -0
  190. package/dist/src/observability/rift-context.d.ts.map +1 -1
  191. package/dist/src/observability/rift-context.js +3 -0
  192. package/dist/src/observability/rift-context.js.map +1 -1
  193. package/dist/src/observability/tool-usage-stats.d.ts +13 -0
  194. package/dist/src/observability/tool-usage-stats.d.ts.map +1 -1
  195. package/dist/src/observability/tool-usage-stats.js +15 -0
  196. package/dist/src/observability/tool-usage-stats.js.map +1 -1
  197. package/dist/src/observability/tool-usage.d.ts +56 -0
  198. package/dist/src/observability/tool-usage.d.ts.map +1 -1
  199. package/dist/src/observability/tool-usage.js +86 -0
  200. package/dist/src/observability/tool-usage.js.map +1 -1
  201. package/dist/src/providers/claude-cli-metadata-extraction.d.ts +47 -0
  202. package/dist/src/providers/claude-cli-metadata-extraction.d.ts.map +1 -0
  203. package/dist/src/providers/claude-cli-metadata-extraction.js +120 -0
  204. package/dist/src/providers/claude-cli-metadata-extraction.js.map +1 -0
  205. package/dist/src/providers/claude-cli-runner.d.ts +92 -0
  206. package/dist/src/providers/claude-cli-runner.d.ts.map +1 -0
  207. package/dist/src/providers/claude-cli-runner.js +598 -0
  208. package/dist/src/providers/claude-cli-runner.js.map +1 -0
  209. package/dist/src/providers/codex-cli-metadata-extraction.d.ts.map +1 -1
  210. package/dist/src/providers/codex-cli-metadata-extraction.js +1 -40
  211. package/dist/src/providers/codex-cli-metadata-extraction.js.map +1 -1
  212. package/dist/src/providers/codex-cli-runner.d.ts +7 -0
  213. package/dist/src/providers/codex-cli-runner.d.ts.map +1 -1
  214. package/dist/src/providers/codex-cli-runner.js +131 -5
  215. package/dist/src/providers/codex-cli-runner.js.map +1 -1
  216. package/dist/src/providers/conversation-generation.d.ts +10 -0
  217. package/dist/src/providers/conversation-generation.d.ts.map +1 -1
  218. package/dist/src/providers/conversation-generation.js +54 -13
  219. package/dist/src/providers/conversation-generation.js.map +1 -1
  220. package/dist/src/providers/openai-metadata-extraction.d.ts +48 -1
  221. package/dist/src/providers/openai-metadata-extraction.d.ts.map +1 -1
  222. package/dist/src/providers/openai-metadata-extraction.js +51 -2
  223. package/dist/src/providers/openai-metadata-extraction.js.map +1 -1
  224. package/dist/src/providers/types.d.ts +1 -1
  225. package/dist/src/providers/types.d.ts.map +1 -1
  226. package/dist/src/providers/types.js +4 -0
  227. package/dist/src/providers/types.js.map +1 -1
  228. package/dist/src/retrieval/canonical-files.d.ts +48 -0
  229. package/dist/src/retrieval/canonical-files.d.ts.map +1 -0
  230. package/dist/src/retrieval/canonical-files.js +210 -0
  231. package/dist/src/retrieval/canonical-files.js.map +1 -0
  232. package/dist/src/retrieval/compact.d.ts +95 -0
  233. package/dist/src/retrieval/compact.d.ts.map +1 -1
  234. package/dist/src/retrieval/compact.js +254 -8
  235. package/dist/src/retrieval/compact.js.map +1 -1
  236. package/dist/src/retrieval/context-pack.d.ts.map +1 -1
  237. package/dist/src/retrieval/context-pack.js +65 -15
  238. package/dist/src/retrieval/context-pack.js.map +1 -1
  239. package/dist/src/retrieval/conversation-dedup.d.ts +40 -0
  240. package/dist/src/retrieval/conversation-dedup.d.ts.map +1 -0
  241. package/dist/src/retrieval/conversation-dedup.js +141 -0
  242. package/dist/src/retrieval/conversation-dedup.js.map +1 -0
  243. package/dist/src/retrieval/evidence-key.d.ts +48 -0
  244. package/dist/src/retrieval/evidence-key.d.ts.map +1 -0
  245. package/dist/src/retrieval/evidence-key.js +131 -0
  246. package/dist/src/retrieval/evidence-key.js.map +1 -0
  247. package/dist/src/retrieval/feedback-ranking.d.ts +49 -0
  248. package/dist/src/retrieval/feedback-ranking.d.ts.map +1 -0
  249. package/dist/src/retrieval/feedback-ranking.js +138 -0
  250. package/dist/src/retrieval/feedback-ranking.js.map +1 -0
  251. package/dist/src/retrieval/git-state.d.ts +9 -0
  252. package/dist/src/retrieval/git-state.d.ts.map +1 -1
  253. package/dist/src/retrieval/git-state.js +18 -0
  254. package/dist/src/retrieval/git-state.js.map +1 -1
  255. package/dist/src/retrieval/group-by-parent.d.ts +38 -0
  256. package/dist/src/retrieval/group-by-parent.d.ts.map +1 -0
  257. package/dist/src/retrieval/group-by-parent.js +40 -0
  258. package/dist/src/retrieval/group-by-parent.js.map +1 -0
  259. package/dist/src/retrieval/lexical.d.ts.map +1 -1
  260. package/dist/src/retrieval/lexical.js +1 -3
  261. package/dist/src/retrieval/lexical.js.map +1 -1
  262. package/dist/src/retrieval/receipt.d.ts +57 -0
  263. package/dist/src/retrieval/receipt.d.ts.map +1 -0
  264. package/dist/src/retrieval/receipt.js +119 -0
  265. package/dist/src/retrieval/receipt.js.map +1 -0
  266. package/dist/src/retrieval/reranker.d.ts +49 -2
  267. package/dist/src/retrieval/reranker.d.ts.map +1 -1
  268. package/dist/src/retrieval/reranker.js +64 -4
  269. package/dist/src/retrieval/reranker.js.map +1 -1
  270. package/dist/src/retrieval/stitch-chunks.d.ts +73 -0
  271. package/dist/src/retrieval/stitch-chunks.d.ts.map +1 -0
  272. package/dist/src/retrieval/stitch-chunks.js +106 -0
  273. package/dist/src/retrieval/stitch-chunks.js.map +1 -0
  274. package/dist/src/server/app.d.ts +1 -1
  275. package/dist/src/server/app.d.ts.map +1 -1
  276. package/dist/src/server/app.js +20 -3
  277. package/dist/src/server/app.js.map +1 -1
  278. package/dist/src/server/routes/conversations-search.d.ts.map +1 -1
  279. package/dist/src/server/routes/conversations-search.js +22 -3
  280. package/dist/src/server/routes/conversations-search.js.map +1 -1
  281. package/dist/src/server/routes/friend-status.d.ts +64 -6
  282. package/dist/src/server/routes/friend-status.d.ts.map +1 -1
  283. package/dist/src/server/routes/friend-status.js +114 -18
  284. package/dist/src/server/routes/friend-status.js.map +1 -1
  285. package/dist/src/server/routes/mcp-usage.d.ts +9 -6
  286. package/dist/src/server/routes/mcp-usage.d.ts.map +1 -1
  287. package/dist/src/server/routes/mcp-usage.js.map +1 -1
  288. package/dist/src/server/routes/retrieval-feedback.d.ts +3 -0
  289. package/dist/src/server/routes/retrieval-feedback.d.ts.map +1 -0
  290. package/dist/src/server/routes/retrieval-feedback.js +290 -0
  291. package/dist/src/server/routes/retrieval-feedback.js.map +1 -0
  292. package/dist/src/server/routes/save.d.ts +3 -3
  293. package/dist/src/server/routes/save.d.ts.map +1 -1
  294. package/dist/src/server/routes/save.js +6 -2
  295. package/dist/src/server/routes/save.js.map +1 -1
  296. package/dist/src/server/routes/search.d.ts +1 -1
  297. package/dist/src/server/routes/search.d.ts.map +1 -1
  298. package/dist/src/server/routes/search.js +55 -8
  299. package/dist/src/server/routes/search.js.map +1 -1
  300. package/dist/src/server/serving-marker.d.ts +85 -0
  301. package/dist/src/server/serving-marker.d.ts.map +1 -0
  302. package/dist/src/server/serving-marker.js +226 -0
  303. package/dist/src/server/serving-marker.js.map +1 -0
  304. package/dist/src/storage/chunk-backfill.d.ts +39 -0
  305. package/dist/src/storage/chunk-backfill.d.ts.map +1 -0
  306. package/dist/src/storage/chunk-backfill.js +295 -0
  307. package/dist/src/storage/chunk-backfill.js.map +1 -0
  308. package/dist/src/storage/filter.d.ts +42 -0
  309. package/dist/src/storage/filter.d.ts.map +1 -0
  310. package/dist/src/storage/filter.js +70 -0
  311. package/dist/src/storage/filter.js.map +1 -0
  312. package/dist/src/storage/rebuild.d.ts.map +1 -1
  313. package/dist/src/storage/rebuild.js +44 -27
  314. package/dist/src/storage/rebuild.js.map +1 -1
  315. package/dist/src/storage/tables.d.ts +41 -0
  316. package/dist/src/storage/tables.d.ts.map +1 -1
  317. package/dist/src/storage/tables.js +64 -1
  318. package/dist/src/storage/tables.js.map +1 -1
  319. package/operator/swiftbar/render-menu.py +60 -18
  320. package/package.json +6 -4
@@ -0,0 +1,85 @@
1
+ /**
2
+ * Explicit capability flags. Capability booleans — not a commit/version string —
3
+ * are the contract: `package.json` still reads `beta.21` on chunk-aware `main`,
4
+ * so a version string cannot distinguish builds. Each build declares what IT can
5
+ * do; a future build that drops a capability flips the flag rather than relying
6
+ * on commit-ancestry math.
7
+ */
8
+ export interface ServingCapabilities {
9
+ /** The conversation tables carry `parent_id`/`chunk_index`/`chunk_count`. */
10
+ conversation_chunk_columns: boolean;
11
+ /** Retrieval reassembles chunk sets on the expand path (`detail="full"`). */
12
+ expand_stitching: boolean;
13
+ /** Safe for `chunk-backfill` to write a chunk-column conversation set here. */
14
+ chunk_backfill_write_compatible: boolean;
15
+ }
16
+ export interface ServingMarker {
17
+ version: string;
18
+ commit: string;
19
+ booted_at: string;
20
+ capabilities: ServingCapabilities;
21
+ }
22
+ /**
23
+ * Capabilities of THIS build. All true: this code defines the chunk columns in
24
+ * the conversation seed/migration, reassembles chunk sets on expand, and writes
25
+ * chunk-column sets coherently. A future build that regresses any of these must
26
+ * set the corresponding flag false.
27
+ */
28
+ export declare const CURRENT_SERVING_CAPABILITIES: ServingCapabilities;
29
+ /** Path of the marker within a data dir (under `observability/`, daemon-owned). */
30
+ export declare function servingMarkerPath(dataDir: string): string;
31
+ /** Assemble the marker for this build at boot time. */
32
+ export declare function buildServingMarker(bootedAt: string): ServingMarker;
33
+ /**
34
+ * Stamp the serving-build marker into the data dir. Atomic (write-temp +
35
+ * rename) so a crash mid-write never leaves a half-written marker that would
36
+ * read as malformed → refused. Best-effort: any failure is logged and
37
+ * swallowed so it can never break daemon boot.
38
+ *
39
+ * MUST be called only from the serving daemon boot path, never from a CLI tool.
40
+ */
41
+ export declare function writeServingMarker(dataDir: string, bootedAt?: string): void;
42
+ /** Read the marker. Returns null if absent, unreadable, or malformed. */
43
+ export declare function readServingMarker(dataDir: string): ServingMarker | null;
44
+ export interface CompatibilityVerdict {
45
+ ok: boolean;
46
+ /** Operator-facing reason when `ok` is false. */
47
+ reason: string;
48
+ }
49
+ /**
50
+ * Maximum age of a serving-build marker the write guard will trust (24h). The
51
+ * marker certifies a RECENT boot, not an ancient one — see the downgrade note in
52
+ * the file header. The backfill tool requires the daemon stopped, so the owner
53
+ * naturally does a boot→stop cycle right before backfilling; that boot re-stamps
54
+ * the marker fresh. A window of a day gives slack ("booted this morning, backfill
55
+ * this evening") while still rejecting a months-old, possibly downgrade-era
56
+ * marker. Small future clock skew is tolerated up to {@link MAX_FUTURE_SKEW_MS}.
57
+ */
58
+ export declare const MAX_MARKER_AGE_MS: number;
59
+ /** Clock-skew slack for a marker dated slightly in the future (5 min). */
60
+ export declare const MAX_FUTURE_SKEW_MS: number;
61
+ /**
62
+ * Decide whether a destructive `chunk-backfill` WRITE is safe against the
63
+ * install that serves `dataDir`. Refuses unless the serving daemon has stamped
64
+ * a well-formed, RECENT marker in which EVERY capability in
65
+ * {@link REQUIRED_WRITE_CAPABILITIES} is true.
66
+ *
67
+ * Refusal cases:
68
+ * - Absent/malformed marker: a pre-chunk daemon never writes one, so we cannot
69
+ * prove the install that restarts after the write can serve chunk rows.
70
+ * - Unparseable `booted_at`: a marker without a real boot time cannot prove
71
+ * freshness, so it is treated as malformed.
72
+ * - Missing capability: the serving build self-declares it cannot serve a
73
+ * chunk-column set.
74
+ * - Stale `booted_at` (older than `maxAgeMs`): proves only that SOME chunk-aware
75
+ * daemon booted this dir long ago, not that the CURRENT install is chunk-aware
76
+ * (closes the silent-downgrade gap — see file header).
77
+ *
78
+ * `now`/`maxAgeMs` are injectable for deterministic tests; production callers use
79
+ * the wall clock and {@link MAX_MARKER_AGE_MS}.
80
+ */
81
+ export declare function assertChunkBackfillWriteCompatible(dataDir: string, opts?: {
82
+ now?: Date;
83
+ maxAgeMs?: number;
84
+ }): CompatibilityVerdict;
85
+ //# sourceMappingURL=serving-marker.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"serving-marker.d.ts","sourceRoot":"","sources":["../../../src/server/serving-marker.ts"],"names":[],"mappings":"AA8CA;;;;;;GAMG;AACH,MAAM,WAAW,mBAAmB;IAClC,6EAA6E;IAC7E,0BAA0B,EAAE,OAAO,CAAC;IACpC,6EAA6E;IAC7E,gBAAgB,EAAE,OAAO,CAAC;IAC1B,+EAA+E;IAC/E,+BAA+B,EAAE,OAAO,CAAC;CAC1C;AAED,MAAM,WAAW,aAAa;IAC5B,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,mBAAmB,CAAC;CACnC;AAED;;;;;GAKG;AACH,eAAO,MAAM,4BAA4B,EAAE,mBAI1C,CAAC;AAEF,mFAAmF;AACnF,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAEzD;AAED,uDAAuD;AACvD,wBAAgB,kBAAkB,CAAC,QAAQ,EAAE,MAAM,GAAG,aAAa,CAQlE;AAED;;;;;;;GAOG;AACH,wBAAgB,kBAAkB,CAChC,OAAO,EAAE,MAAM,EACf,QAAQ,GAAE,MAAiC,GAC1C,IAAI,CAcN;AAsBD,yEAAyE;AACzE,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,MAAM,GAAG,aAAa,GAAG,IAAI,CAQvE;AAED,MAAM,WAAW,oBAAoB;IACnC,EAAE,EAAE,OAAO,CAAC;IACZ,iDAAiD;IACjD,MAAM,EAAE,MAAM,CAAC;CAChB;AAgBD;;;;;;;;GAQG;AACH,eAAO,MAAM,iBAAiB,QAAsB,CAAC;AAErD,0EAA0E;AAC1E,eAAO,MAAM,kBAAkB,QAAgB,CAAC;AAEhD;;;;;;;;;;;;;;;;;;;GAmBG;AACH,wBAAgB,kCAAkC,CAChD,OAAO,EAAE,MAAM,EACf,IAAI,GAAE;IAAE,GAAG,CAAC,EAAE,IAAI,CAAC;IAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAO,GAC3C,oBAAoB,CAoEtB"}
@@ -0,0 +1,226 @@
1
+ /**
2
+ * Serving-build capability marker.
3
+ *
4
+ * The serving daemon stamps a small JSON marker into the data dir on every
5
+ * boot, recording WHICH code last served this data dir and what that code is
6
+ * capable of. This is the contract that lets an OFFLINE tool — one that runs
7
+ * with the daemon stopped, so it cannot probe `GET /version` — decide whether a
8
+ * destructive, schema-touching operation is safe against the install that will
9
+ * restart and serve this dir.
10
+ *
11
+ * Why a capability marker and not the obvious alternatives:
12
+ * - On-disk table schema is the very thing that gets accidentally mutated
13
+ * (the chunk-backfill `--dry-run` incident added the chunk columns to a
14
+ * pre-chunk dir), so the schema cannot certify the serving code.
15
+ * - Observability events are an indirect history signal — they tell you what
16
+ * happened, not what the currently-installed code can do.
17
+ * - `RIFT_CHUNKING` gates whether NEW conversations are split, not whether the
18
+ * permanent chunk columns / expand read path exist in the serving build.
19
+ * Only a marker written BY the serving build answers the question we actually
20
+ * care about: "what code last served this dir, and can it handle a
21
+ * chunk-backfill write?".
22
+ *
23
+ * Discipline:
24
+ * - Written ONLY by the serving daemon (never by CLI tools). A CLI tool that
25
+ * stamped the marker would be certifying itself, defeating the purpose.
26
+ * - Best-effort and non-fatal — a failed marker write must never break boot.
27
+ * - Absent / unknown / malformed is treated by the write guard as "cannot
28
+ * confirm compatible → refuse". A pre-chunk daemon (e.g. beta.21) never
29
+ * writes the marker, so it always reads as absent → refused.
30
+ * - STALE markers are refused too (freshness window, see
31
+ * {@link MAX_MARKER_AGE_MS}). A marker proves "a chunk-aware daemon booted
32
+ * this dir RECENTLY", not "a chunk-aware daemon booted this dir once, ever".
33
+ * This closes the downgrade gap: if a chunk-aware daemon stamps a marker and
34
+ * a pre-chunk daemon (beta.21) later serves the same dir, beta.21 neither
35
+ * overwrites nor removes the stale chunk-aware marker — so without a
36
+ * freshness check the offline write guard would still pass against an install
37
+ * that can no longer serve chunk rows. Requiring a recent boot means the
38
+ * owner must (re)boot the chunk-aware daemon immediately before backfilling,
39
+ * which re-stamps the marker with the CURRENT install's real capabilities.
40
+ * Daemon DOWNGRADE (chunk-aware → pre-chunk) after a marker write is
41
+ * otherwise unsupported in this owner-only phase.
42
+ */
43
+ import fs from "node:fs";
44
+ import path from "node:path";
45
+ import { getBuildInfo } from "./build-info.js";
46
+ /**
47
+ * Capabilities of THIS build. All true: this code defines the chunk columns in
48
+ * the conversation seed/migration, reassembles chunk sets on expand, and writes
49
+ * chunk-column sets coherently. A future build that regresses any of these must
50
+ * set the corresponding flag false.
51
+ */
52
+ export const CURRENT_SERVING_CAPABILITIES = {
53
+ conversation_chunk_columns: true,
54
+ expand_stitching: true,
55
+ chunk_backfill_write_compatible: true,
56
+ };
57
+ /** Path of the marker within a data dir (under `observability/`, daemon-owned). */
58
+ export function servingMarkerPath(dataDir) {
59
+ return path.join(dataDir, "observability", "serving-build.json");
60
+ }
61
+ /** Assemble the marker for this build at boot time. */
62
+ export function buildServingMarker(bootedAt) {
63
+ const info = getBuildInfo();
64
+ return {
65
+ version: info.version,
66
+ commit: info.commit,
67
+ booted_at: bootedAt,
68
+ capabilities: { ...CURRENT_SERVING_CAPABILITIES },
69
+ };
70
+ }
71
+ /**
72
+ * Stamp the serving-build marker into the data dir. Atomic (write-temp +
73
+ * rename) so a crash mid-write never leaves a half-written marker that would
74
+ * read as malformed → refused. Best-effort: any failure is logged and
75
+ * swallowed so it can never break daemon boot.
76
+ *
77
+ * MUST be called only from the serving daemon boot path, never from a CLI tool.
78
+ */
79
+ export function writeServingMarker(dataDir, bootedAt = new Date().toISOString()) {
80
+ try {
81
+ const target = servingMarkerPath(dataDir);
82
+ fs.mkdirSync(path.dirname(target), { recursive: true });
83
+ const tmp = `${target}.tmp-${process.pid}`;
84
+ fs.writeFileSync(tmp, `${JSON.stringify(buildServingMarker(bootedAt), null, 2)}\n`);
85
+ fs.renameSync(tmp, target);
86
+ }
87
+ catch (err) {
88
+ process.stderr.write(`serving-marker: failed to write (non-fatal): ${err instanceof Error ? err.message : String(err)}\n`);
89
+ }
90
+ }
91
+ function isServingMarker(v) {
92
+ if (!v || typeof v !== "object")
93
+ return false;
94
+ const m = v;
95
+ if (typeof m.version !== "string" ||
96
+ typeof m.commit !== "string" ||
97
+ typeof m.booted_at !== "string") {
98
+ return false;
99
+ }
100
+ const c = m.capabilities;
101
+ if (!c || typeof c !== "object")
102
+ return false;
103
+ const caps = c;
104
+ return (typeof caps.conversation_chunk_columns === "boolean" &&
105
+ typeof caps.expand_stitching === "boolean" &&
106
+ typeof caps.chunk_backfill_write_compatible === "boolean");
107
+ }
108
+ /** Read the marker. Returns null if absent, unreadable, or malformed. */
109
+ export function readServingMarker(dataDir) {
110
+ try {
111
+ const raw = fs.readFileSync(servingMarkerPath(dataDir), "utf8");
112
+ const parsed = JSON.parse(raw);
113
+ return isServingMarker(parsed) ? parsed : null;
114
+ }
115
+ catch {
116
+ return null;
117
+ }
118
+ }
119
+ /**
120
+ * Every capability a chunk-backfill WRITE depends on. The write doesn't just
121
+ * need a serving build that *tolerates* a chunk-column write — it needs one that
122
+ * has the columns (`conversation_chunk_columns`), reassembles the sets it writes
123
+ * on the read path (`expand_stitching`), AND self-certifies the write as safe
124
+ * (`chunk_backfill_write_compatible`). A build that regresses ANY of these would
125
+ * serve the backfilled corpus incorrectly, so all must be true.
126
+ */
127
+ const REQUIRED_WRITE_CAPABILITIES = [
128
+ "conversation_chunk_columns",
129
+ "expand_stitching",
130
+ "chunk_backfill_write_compatible",
131
+ ];
132
+ /**
133
+ * Maximum age of a serving-build marker the write guard will trust (24h). The
134
+ * marker certifies a RECENT boot, not an ancient one — see the downgrade note in
135
+ * the file header. The backfill tool requires the daemon stopped, so the owner
136
+ * naturally does a boot→stop cycle right before backfilling; that boot re-stamps
137
+ * the marker fresh. A window of a day gives slack ("booted this morning, backfill
138
+ * this evening") while still rejecting a months-old, possibly downgrade-era
139
+ * marker. Small future clock skew is tolerated up to {@link MAX_FUTURE_SKEW_MS}.
140
+ */
141
+ export const MAX_MARKER_AGE_MS = 24 * 60 * 60 * 1000;
142
+ /** Clock-skew slack for a marker dated slightly in the future (5 min). */
143
+ export const MAX_FUTURE_SKEW_MS = 5 * 60 * 1000;
144
+ /**
145
+ * Decide whether a destructive `chunk-backfill` WRITE is safe against the
146
+ * install that serves `dataDir`. Refuses unless the serving daemon has stamped
147
+ * a well-formed, RECENT marker in which EVERY capability in
148
+ * {@link REQUIRED_WRITE_CAPABILITIES} is true.
149
+ *
150
+ * Refusal cases:
151
+ * - Absent/malformed marker: a pre-chunk daemon never writes one, so we cannot
152
+ * prove the install that restarts after the write can serve chunk rows.
153
+ * - Unparseable `booted_at`: a marker without a real boot time cannot prove
154
+ * freshness, so it is treated as malformed.
155
+ * - Missing capability: the serving build self-declares it cannot serve a
156
+ * chunk-column set.
157
+ * - Stale `booted_at` (older than `maxAgeMs`): proves only that SOME chunk-aware
158
+ * daemon booted this dir long ago, not that the CURRENT install is chunk-aware
159
+ * (closes the silent-downgrade gap — see file header).
160
+ *
161
+ * `now`/`maxAgeMs` are injectable for deterministic tests; production callers use
162
+ * the wall clock and {@link MAX_MARKER_AGE_MS}.
163
+ */
164
+ export function assertChunkBackfillWriteCompatible(dataDir, opts = {}) {
165
+ const now = opts.now ?? new Date();
166
+ const maxAgeMs = opts.maxAgeMs ?? MAX_MARKER_AGE_MS;
167
+ const marker = readServingMarker(dataDir);
168
+ if (!marker) {
169
+ return {
170
+ ok: false,
171
+ reason: `No serving-build marker at ${servingMarkerPath(dataDir)}. Cannot ` +
172
+ `confirm the install that serves this data dir is chunk-aware — a ` +
173
+ `pre-chunk daemon (e.g. beta.21) never writes this marker. Boot the ` +
174
+ `chunk-aware serving daemon on this dir first (it stamps the marker on ` +
175
+ `startup), or — if this is a DISPOSABLE COPY, never your live data dir — ` +
176
+ `re-run with --allow-uncertified-copy to override.`,
177
+ };
178
+ }
179
+ const missing = REQUIRED_WRITE_CAPABILITIES.filter((cap) => !marker.capabilities[cap]);
180
+ if (missing.length > 0) {
181
+ return {
182
+ ok: false,
183
+ reason: `Serving-build marker (version ${marker.version}, commit ` +
184
+ `${marker.commit.slice(0, 12)}) is missing required capabilities: ` +
185
+ `${missing.map((c) => `${c}=false`).join(", ")} — the serving install ` +
186
+ `cannot safely serve a chunk-column conversation set. Upgrade the ` +
187
+ `serving daemon before backfilling.`,
188
+ };
189
+ }
190
+ const bootedMs = Date.parse(marker.booted_at);
191
+ if (Number.isNaN(bootedMs)) {
192
+ return {
193
+ ok: false,
194
+ reason: `Serving-build marker has an invalid booted_at ("${marker.booted_at}") ` +
195
+ `— cannot prove the chunk-aware daemon booted recently. Reboot the ` +
196
+ `chunk-aware serving daemon on this dir to re-stamp the marker.`,
197
+ };
198
+ }
199
+ const ageMs = now.getTime() - bootedMs;
200
+ if (ageMs > maxAgeMs) {
201
+ const ageHours = Math.round(ageMs / 3_600_000);
202
+ const maxHours = Math.round(maxAgeMs / 3_600_000);
203
+ return {
204
+ ok: false,
205
+ reason: `Serving-build marker is stale (booted_at ${marker.booted_at}, ~${ageHours}h ` +
206
+ `old > ${maxHours}h freshness window). A stale marker only proves a ` +
207
+ `chunk-aware daemon booted this dir long ago — not that the CURRENT ` +
208
+ `install is chunk-aware (a since-downgraded daemon leaves the old marker ` +
209
+ `in place). (Re)boot the chunk-aware serving daemon on this dir ` +
210
+ `immediately before backfilling to re-stamp the marker, or — if this is ` +
211
+ `a DISPOSABLE COPY, never your live data dir — re-run with ` +
212
+ `--allow-uncertified-copy to override.`,
213
+ };
214
+ }
215
+ if (ageMs < -MAX_FUTURE_SKEW_MS) {
216
+ return {
217
+ ok: false,
218
+ reason: `Serving-build marker is dated in the future (booted_at ` +
219
+ `${marker.booted_at}) beyond tolerated clock skew — refusing rather than ` +
220
+ `trusting a marker that may have been hand-edited. Reboot the chunk-aware ` +
221
+ `serving daemon on this dir to re-stamp the marker.`,
222
+ };
223
+ }
224
+ return { ok: true, reason: "" };
225
+ }
226
+ //# sourceMappingURL=serving-marker.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"serving-marker.js","sourceRoot":"","sources":["../../../src/server/serving-marker.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAyCG;AACH,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AAyB/C;;;;;GAKG;AACH,MAAM,CAAC,MAAM,4BAA4B,GAAwB;IAC/D,0BAA0B,EAAE,IAAI;IAChC,gBAAgB,EAAE,IAAI;IACtB,+BAA+B,EAAE,IAAI;CACtC,CAAC;AAEF,mFAAmF;AACnF,MAAM,UAAU,iBAAiB,CAAC,OAAe;IAC/C,OAAO,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,eAAe,EAAE,oBAAoB,CAAC,CAAC;AACnE,CAAC;AAED,uDAAuD;AACvD,MAAM,UAAU,kBAAkB,CAAC,QAAgB;IACjD,MAAM,IAAI,GAAG,YAAY,EAAE,CAAC;IAC5B,OAAO;QACL,OAAO,EAAE,IAAI,CAAC,OAAO;QACrB,MAAM,EAAE,IAAI,CAAC,MAAM;QACnB,SAAS,EAAE,QAAQ;QACnB,YAAY,EAAE,EAAE,GAAG,4BAA4B,EAAE;KAClD,CAAC;AACJ,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,kBAAkB,CAChC,OAAe,EACf,WAAmB,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;IAE3C,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,iBAAiB,CAAC,OAAO,CAAC,CAAC;QAC1C,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACxD,MAAM,GAAG,GAAG,GAAG,MAAM,QAAQ,OAAO,CAAC,GAAG,EAAE,CAAC;QAC3C,EAAE,CAAC,aAAa,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC,kBAAkB,CAAC,QAAQ,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC;QACpF,EAAE,CAAC,UAAU,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;IAC7B,CAAC;IAAC,OAAO,GAAY,EAAE,CAAC;QACtB,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,gDACE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CACjD,IAAI,CACL,CAAC;IACJ,CAAC;AACH,CAAC;AAED,SAAS,eAAe,CAAC,CAAU;IACjC,IAAI,CAAC,CAAC,IAAI,OAAO,CAAC,KAAK,QAAQ;QAAE,OAAO,KAAK,CAAC;IAC9C,MAAM,CAAC,GAAG,CAA4B,CAAC;IACvC,IACE,OAAO,CAAC,CAAC,OAAO,KAAK,QAAQ;QAC7B,OAAO,CAAC,CAAC,MAAM,KAAK,QAAQ;QAC5B,OAAO,CAAC,CAAC,SAAS,KAAK,QAAQ,EAC/B,CAAC;QACD,OAAO,KAAK,CAAC;IACf,CAAC;IACD,MAAM,CAAC,GAAG,CAAC,CAAC,YAAY,CAAC;IACzB,IAAI,CAAC,CAAC,IAAI,OAAO,CAAC,KAAK,QAAQ;QAAE,OAAO,KAAK,CAAC;IAC9C,MAAM,IAAI,GAAG,CAA4B,CAAC;IAC1C,OAAO,CACL,OAAO,IAAI,CAAC,0BAA0B,KAAK,SAAS;QACpD,OAAO,IAAI,CAAC,gBAAgB,KAAK,SAAS;QAC1C,OAAO,IAAI,CAAC,+BAA+B,KAAK,SAAS,CAC1D,CAAC;AACJ,CAAC;AAED,yEAAyE;AACzE,MAAM,UAAU,iBAAiB,CAAC,OAAe;IAC/C,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,EAAE,CAAC,YAAY,CAAC,iBAAiB,CAAC,OAAO,CAAC,EAAE,MAAM,CAAC,CAAC;QAChE,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAY,CAAC;QAC1C,OAAO,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC;IACjD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAQD;;;;;;;GAOG;AACH,MAAM,2BAA2B,GAA6C;IAC5E,4BAA4B;IAC5B,kBAAkB;IAClB,iCAAiC;CAClC,CAAC;AAEF;;;;;;;;GAQG;AACH,MAAM,CAAC,MAAM,iBAAiB,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC;AAErD,0EAA0E;AAC1E,MAAM,CAAC,MAAM,kBAAkB,GAAG,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC;AAEhD;;;;;;;;;;;;;;;;;;;GAmBG;AACH,MAAM,UAAU,kCAAkC,CAChD,OAAe,EACf,OAA0C,EAAE;IAE5C,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,IAAI,IAAI,IAAI,EAAE,CAAC;IACnC,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,IAAI,iBAAiB,CAAC;IACpD,MAAM,MAAM,GAAG,iBAAiB,CAAC,OAAO,CAAC,CAAC;IAC1C,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,OAAO;YACL,EAAE,EAAE,KAAK;YACT,MAAM,EACJ,8BAA8B,iBAAiB,CAAC,OAAO,CAAC,WAAW;gBACnE,mEAAmE;gBACnE,qEAAqE;gBACrE,wEAAwE;gBACxE,0EAA0E;gBAC1E,mDAAmD;SACtD,CAAC;IACJ,CAAC;IACD,MAAM,OAAO,GAAG,2BAA2B,CAAC,MAAM,CAChD,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,CACnC,CAAC;IACF,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,OAAO;YACL,EAAE,EAAE,KAAK;YACT,MAAM,EACJ,iCAAiC,MAAM,CAAC,OAAO,WAAW;gBAC1D,GAAG,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,sCAAsC;gBACnE,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,yBAAyB;gBACvE,mEAAmE;gBACnE,oCAAoC;SACvC,CAAC;IACJ,CAAC;IACD,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;IAC9C,IAAI,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC3B,OAAO;YACL,EAAE,EAAE,KAAK;YACT,MAAM,EACJ,mDAAmD,MAAM,CAAC,SAAS,KAAK;gBACxE,oEAAoE;gBACpE,gEAAgE;SACnE,CAAC;IACJ,CAAC;IACD,MAAM,KAAK,GAAG,GAAG,CAAC,OAAO,EAAE,GAAG,QAAQ,CAAC;IACvC,IAAI,KAAK,GAAG,QAAQ,EAAE,CAAC;QACrB,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,SAAS,CAAC,CAAC;QAC/C,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,GAAG,SAAS,CAAC,CAAC;QAClD,OAAO;YACL,EAAE,EAAE,KAAK;YACT,MAAM,EACJ,4CAA4C,MAAM,CAAC,SAAS,MAAM,QAAQ,IAAI;gBAC9E,SAAS,QAAQ,oDAAoD;gBACrE,qEAAqE;gBACrE,0EAA0E;gBAC1E,iEAAiE;gBACjE,yEAAyE;gBACzE,4DAA4D;gBAC5D,uCAAuC;SAC1C,CAAC;IACJ,CAAC;IACD,IAAI,KAAK,GAAG,CAAC,kBAAkB,EAAE,CAAC;QAChC,OAAO;YACL,EAAE,EAAE,KAAK;YACT,MAAM,EACJ,yDAAyD;gBACzD,GAAG,MAAM,CAAC,SAAS,uDAAuD;gBAC1E,2EAA2E;gBAC3E,oDAAoD;SACvD,CAAC;IACJ,CAAC;IACD,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC;AAClC,CAAC"}
@@ -0,0 +1,39 @@
1
+ import type { EmbeddingProvider } from "../providers/types.js";
2
+ export interface ChunkBackfillOptions {
3
+ embedding: EmbeddingProvider;
4
+ dataDir: string;
5
+ /** Report what would change without embedding or writing anything. */
6
+ dryRun?: boolean;
7
+ /** Stop after this many conversations have been (re)chunked. 0/undefined = no cap. */
8
+ limit?: number;
9
+ /** Sleep this many ms between conversations to spare the embedding API. */
10
+ throttleMs?: number;
11
+ /** Cooperative cancel — checked between conversations (Ctrl-C safe). */
12
+ signal?: AbortSignal;
13
+ /** Progress sink (one line per meaningful event). */
14
+ log?: (msg: string) => void;
15
+ }
16
+ export interface ChunkBackfillSummary {
17
+ /** Distinct conversations examined (most-recent raw per id). */
18
+ scanned: number;
19
+ /** Conversations split into a multi-chunk set this run (or that would be, in dryRun). */
20
+ rechunked: number;
21
+ /** Conversations already in the desired multi-chunk shape — skipped. */
22
+ alreadyChunked: number;
23
+ /** Conversations short enough to stay a single unmarked row — skipped. */
24
+ singleChunk: number;
25
+ /** Raw artifacts with no matching indexed row — skipped (not resurrected). */
26
+ orphaned: number;
27
+ /** Raw artifacts that could not be read/parsed — skipped. */
28
+ errored: number;
29
+ /** True if an abort signal stopped the run before all conversations were seen. */
30
+ aborted: boolean;
31
+ }
32
+ /**
33
+ * Re-chunk historical conversations from raw into chunk sets. Returns a summary
34
+ * of what changed. Idempotent: a conversation already in its desired shape (a
35
+ * complete set of the right count, or a single unmarked row for a short
36
+ * conversation) is skipped, so re-running only touches the remainder.
37
+ */
38
+ export declare function backfillConversationChunks(opts: ChunkBackfillOptions): Promise<ChunkBackfillSummary>;
39
+ //# sourceMappingURL=chunk-backfill.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chunk-backfill.d.ts","sourceRoot":"","sources":["../../../src/storage/chunk-backfill.ts"],"names":[],"mappings":"AA8BA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAC;AA0B/D,MAAM,WAAW,oBAAoB;IACnC,SAAS,EAAE,iBAAiB,CAAC;IAC7B,OAAO,EAAE,MAAM,CAAC;IAChB,sEAAsE;IACtE,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,sFAAsF;IACtF,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,2EAA2E;IAC3E,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,wEAAwE;IACxE,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,qDAAqD;IACrD,GAAG,CAAC,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,IAAI,CAAC;CAC7B;AAED,MAAM,WAAW,oBAAoB;IACnC,gEAAgE;IAChE,OAAO,EAAE,MAAM,CAAC;IAChB,yFAAyF;IACzF,SAAS,EAAE,MAAM,CAAC;IAClB,wEAAwE;IACxE,cAAc,EAAE,MAAM,CAAC;IACvB,0EAA0E;IAC1E,WAAW,EAAE,MAAM,CAAC;IACpB,8EAA8E;IAC9E,QAAQ,EAAE,MAAM,CAAC;IACjB,6DAA6D;IAC7D,OAAO,EAAE,MAAM,CAAC;IAChB,kFAAkF;IAClF,OAAO,EAAE,OAAO,CAAC;CAClB;AAyGD;;;;;GAKG;AACH,wBAAsB,0BAA0B,CAC9C,IAAI,EAAE,oBAAoB,GACzB,OAAO,CAAC,oBAAoB,CAAC,CAuH/B"}
@@ -0,0 +1,295 @@
1
+ /**
2
+ * Owner-only historical conversation chunk backfill.
3
+ *
4
+ * Index-time chunking (behind `RIFT_CHUNKING`) only splits NEW conversations as
5
+ * they are saved/ingested; conversations indexed before the flag was on are still
6
+ * stored as one whole row each. This module re-chunks those historical
7
+ * conversations from their raw artifacts so the owner can dogfood retrieval over a
8
+ * fully-chunked corpus and gather OFF-vs-ON eval evidence before any product
9
+ * default flip.
10
+ *
11
+ * Deliberately CRUDE and owner-grade — NOT the productized beta-user migration:
12
+ * - explicit, one-shot (run from the offline CLI; no background daemon job),
13
+ * - `dryRun` reports what would change without embedding or writing,
14
+ * - lightly throttleable, and safe to Ctrl-C (an `AbortSignal` is checked
15
+ * between conversations, so the in-flight conversation's atomic write always
16
+ * finishes before we stop — never a half-written set on interrupt),
17
+ * - no resumable status UI, pause/cancel, or rollback tooling.
18
+ *
19
+ * Data-safety mirrors the save/ingest chunk-write path: every chunk of a
20
+ * conversation is embedded BEFORE any destructive delete, and the raw artifact
21
+ * that drove the re-chunk is left on disk, so an interrupt between delete and
22
+ * insert is fully repairable by re-running (or by reconcile/rebuild).
23
+ *
24
+ * Reads are compatible with the `expand` stitching slice: a backfilled set uses
25
+ * the same `parent_id` / `chunk_index` / `chunk_count` markers and `convChunkId`
26
+ * scheme, so `detail="full"` reassembles a backfilled conversation through the
27
+ * exact same path as a natively-chunked one.
28
+ */
29
+ import fs from "node:fs";
30
+ import path from "node:path";
31
+ import { CONVERSATION_SOURCES } from "../providers/types.js";
32
+ import { RIFT_NONE_EMBEDDING_PROVIDER, isPlaceholderEmbeddingProvider, } from "../providers/placeholder-embed.js";
33
+ import { recordEmbed } from "../observability/embedding-events.js";
34
+ import { recordIndexWrite } from "../observability/index-events.js";
35
+ import { eqFilter } from "./filter.js";
36
+ import { getTable } from "./tables.js";
37
+ import { chunkConversation } from "../ingestion/chunk-text.js";
38
+ import { convChunkId, conversationChunkColumns, chunkSetComplete, expectedChunkCount, } from "../ingestion/chunk-meta.js";
39
+ import { conversationContentFingerprint } from "../ingestion/inbox-core/conversation-fingerprint.js";
40
+ import { extractIdFromFilename, compareRawRecencyDesc } from "./rebuild.js";
41
+ const ALL_CONV_SOURCES = [...CONVERSATION_SOURCES, "inbox"];
42
+ const CONV_TABLES = [
43
+ "conversations_hot",
44
+ "conversations_cold",
45
+ ];
46
+ function sleep(ms) {
47
+ return new Promise((resolve) => setTimeout(resolve, ms));
48
+ }
49
+ /**
50
+ * Collect the most-recent raw artifact per conversation id across every source.
51
+ * Same recency policy as reconcile/rebuild (`compareRawRecencyDesc`) so all three
52
+ * tools canonicalise a duplicated id to the same winner.
53
+ */
54
+ function collectRawConversations(dataDir) {
55
+ const byId = new Map();
56
+ for (const source of ALL_CONV_SOURCES) {
57
+ const rawDir = path.join(dataDir, "raw", "conversations", source);
58
+ if (!fs.existsSync(rawDir))
59
+ continue;
60
+ for (const file of fs.readdirSync(rawDir)) {
61
+ if (!file.endsWith(".json"))
62
+ continue;
63
+ const id = extractIdFromFilename(file);
64
+ if (!id)
65
+ continue;
66
+ const rawPath = path.join(rawDir, file);
67
+ const prior = byId.get(id);
68
+ if (!prior || compareRawRecencyDesc(rawPath, prior) < 0) {
69
+ byId.set(id, rawPath);
70
+ }
71
+ }
72
+ }
73
+ return [...byId.entries()].map(([id, rawPath]) => ({ id, rawPath }));
74
+ }
75
+ /**
76
+ * All currently-indexed rows of a conversation (the whole row OR its chunk set),
77
+ * gathered across BOTH tiers. The same id can appear in hot AND cold after a
78
+ * compaction crash ("copied to cold but not yet removed from hot"); we dedupe by
79
+ * id with the cold copy winning, and pick cold as the write target so the
80
+ * rechunk collapses the duplicate rather than leaving a stale copy in the other
81
+ * tier.
82
+ */
83
+ async function fetchConversationRows(id) {
84
+ const idFilter = eqFilter("id", id, { validateAsRowId: true });
85
+ const parentFilter = eqFilter("parent_id", id, { validateAsRowId: true });
86
+ const uniq = new Map();
87
+ let hasCold = false;
88
+ let hasCrossTierRows = false;
89
+ // Iterate hot → cold so the canonical cold row overwrites its hot duplicate.
90
+ for (const table of CONV_TABLES) {
91
+ const handle = getTable(table);
92
+ // Schema-aware filter. A legacy pre-chunk table (e.g. beta.21) has NO
93
+ // `parent_id` column; filtering on it would throw. When the column is
94
+ // absent we match by id only and the rows are necessarily legacy unchunked
95
+ // (no chunk set can exist without the column). This lets dry-run recon run
96
+ // against the exact incident shape WITHOUT migrating the columns on — which
97
+ // is the whole point of the read-only path.
98
+ const hasParentId = (await handle.schema()).fields.some((f) => f.name === "parent_id");
99
+ const setFilter = hasParentId
100
+ ? `(${idFilter}) OR (${parentFilter})`
101
+ : idFilter;
102
+ const rows = (await handle
103
+ .query()
104
+ .where(setFilter)
105
+ .toArray());
106
+ if (rows.length === 0)
107
+ continue;
108
+ if (table === "conversations_cold")
109
+ hasCold = true;
110
+ for (const r of rows) {
111
+ // Already seen this id in an earlier tier → it lives in both → duplicate.
112
+ if (uniq.has(r.id))
113
+ hasCrossTierRows = true;
114
+ uniq.set(r.id, r);
115
+ }
116
+ }
117
+ if (uniq.size === 0)
118
+ return null;
119
+ return {
120
+ targetTable: hasCold ? "conversations_cold" : "conversations_hot",
121
+ rows: [...uniq.values()],
122
+ hasCrossTierRows,
123
+ };
124
+ }
125
+ /**
126
+ * Re-chunk historical conversations from raw into chunk sets. Returns a summary
127
+ * of what changed. Idempotent: a conversation already in its desired shape (a
128
+ * complete set of the right count, or a single unmarked row for a short
129
+ * conversation) is skipped, so re-running only touches the remainder.
130
+ */
131
+ export async function backfillConversationChunks(opts) {
132
+ const { embedding, dataDir } = opts;
133
+ const dryRun = opts.dryRun ?? false;
134
+ const throttleMs = opts.throttleMs ?? 0;
135
+ const limit = opts.limit ?? 0;
136
+ const log = opts.log ?? (() => { });
137
+ const summary = {
138
+ scanned: 0,
139
+ rechunked: 0,
140
+ alreadyChunked: 0,
141
+ singleChunk: 0,
142
+ orphaned: 0,
143
+ errored: 0,
144
+ aborted: false,
145
+ };
146
+ const raws = collectRawConversations(dataDir);
147
+ log(`Found ${raws.length} historical conversation(s) to examine.`);
148
+ for (const { id, rawPath } of raws) {
149
+ if (opts.signal?.aborted) {
150
+ summary.aborted = true;
151
+ log("Aborted — stopping before the next conversation.");
152
+ break;
153
+ }
154
+ if (limit > 0 && summary.rechunked >= limit) {
155
+ log(`Reached --limit ${limit}; stopping.`);
156
+ break;
157
+ }
158
+ summary.scanned++;
159
+ let raw;
160
+ try {
161
+ raw = JSON.parse(fs.readFileSync(rawPath, "utf-8"));
162
+ }
163
+ catch {
164
+ summary.errored++;
165
+ log(`! ${id}: unreadable raw artifact — skipped.`);
166
+ continue;
167
+ }
168
+ // Treat whitespace-only as empty: a raw with `"content": " "` is truthy
169
+ // but `chunkConversation` trims it to zero chunks, which would otherwise
170
+ // drive an empty replacement set straight to the delete path below.
171
+ const pickContent = (v) => typeof v === "string" && v.trim().length > 0 ? v : "";
172
+ const content = pickContent(raw.content) || pickContent(raw.summary);
173
+ if (content.trim().length === 0) {
174
+ summary.errored++;
175
+ log(`! ${id}: raw has no content — skipped.`);
176
+ continue;
177
+ }
178
+ const existing = await fetchConversationRows(id);
179
+ if (!existing) {
180
+ // Raw with no indexed row: an orphan (failed save / pruned row). Do NOT
181
+ // resurrect it here — that's reconcile/rebuild's job, with their full
182
+ // dedup machinery. Backfill only re-shapes already-indexed conversations.
183
+ summary.orphaned++;
184
+ continue;
185
+ }
186
+ const chunks = chunkConversation(content);
187
+ const desiredCount = chunks.length;
188
+ // Hard guard against ever reaching the delete path with an empty
189
+ // replacement set. The content check above already rejects blank raws (the
190
+ // only input for which `chunkConversation` returns []), so this is purely
191
+ // defensive — but a zero-chunk set means `rechunkConversation` would delete
192
+ // the existing rows from both tiers and insert nothing, silently dropping an
193
+ // indexed conversation. Skip and count it as an error instead.
194
+ if (desiredCount === 0) {
195
+ summary.errored++;
196
+ log(`! ${id}: produced no chunks — skipped (existing rows untouched).`);
197
+ continue;
198
+ }
199
+ // Already in the desired shape? (complete set of the right count, or a
200
+ // single unmarked row for a short conversation.) Skip — keeps re-runs cheap
201
+ // and avoids needless re-embedding. But NEVER skip a cross-tier duplicate:
202
+ // the deduped `existing.rows` can look complete while a stale copy survives
203
+ // in the other tier, so it still needs the delete-both-tiers repair below.
204
+ if (!existing.hasCrossTierRows &&
205
+ chunkSetComplete(existing.rows) &&
206
+ expectedChunkCount(existing.rows) === desiredCount) {
207
+ if (desiredCount > 1)
208
+ summary.alreadyChunked++;
209
+ else
210
+ summary.singleChunk++;
211
+ continue;
212
+ }
213
+ if (dryRun) {
214
+ summary.rechunked++;
215
+ log(`~ ${id}: would re-chunk into ${desiredCount} section(s) [${existing.targetTable}] (dry-run).`);
216
+ if (throttleMs > 0)
217
+ await sleep(throttleMs);
218
+ continue;
219
+ }
220
+ await rechunkConversation(embedding, dataDir, id, content, chunks, existing);
221
+ summary.rechunked++;
222
+ log(`+ ${id}: re-chunked into ${desiredCount} section(s) [${existing.targetTable}].`);
223
+ if (throttleMs > 0)
224
+ await sleep(throttleMs);
225
+ }
226
+ return summary;
227
+ }
228
+ /**
229
+ * Embed the full chunk set, then atomically replace the conversation's rows in
230
+ * its current tier. Metadata (domain/intent/quality/topics/decisions/key_outputs/
231
+ * summary/idempotency_key/fingerprint/metadata_provider) is preserved from the
232
+ * existing indexed row so an enriched conversation is NOT downgraded to its raw
233
+ * artifact's basic metadata — only `content`/`embedding`/the chunk markers change.
234
+ *
235
+ * Ordering = embed-all → delete-old → insert-new (same invariant as reconcile):
236
+ * the embeddings exist before any delete, and the raw artifact survives, so an
237
+ * interrupt in the brief delete→insert window is repaired by re-running.
238
+ */
239
+ async function rechunkConversation(embedding, dataDir, id, content, chunks, existing) {
240
+ const count = chunks.length;
241
+ const embeddings = await recordEmbed(dataDir, embedding, {
242
+ pipeline: "backfill",
243
+ operation: "rechunk_conversation",
244
+ input_count: count,
245
+ }, () => embedding.embedBatch(chunks));
246
+ // Representative = lowest chunk_index (or the lone row) — its metadata is the
247
+ // conversation-level metadata shared by every chunk.
248
+ const rep = [...existing.rows].sort((a, b) => Number(a.chunk_index ?? 0) - Number(b.chunk_index ?? 0))[0];
249
+ const fingerprint = typeof rep.conversation_fingerprint === "string" &&
250
+ rep.conversation_fingerprint.length > 0
251
+ ? rep.conversation_fingerprint
252
+ : conversationContentFingerprint(content);
253
+ const embeddingProviderMarker = isPlaceholderEmbeddingProvider(embedding)
254
+ ? RIFT_NONE_EMBEDDING_PROVIDER
255
+ : "";
256
+ const indexedAt = new Date().toISOString();
257
+ const rows = chunks.map((chunk, i) => ({
258
+ id: count > 1 ? convChunkId(id, i) : id,
259
+ content: chunk,
260
+ summary: rep.summary,
261
+ embedding: embeddings[i],
262
+ source: rep.source,
263
+ domain: rep.domain,
264
+ intent: rep.intent,
265
+ quality: rep.quality,
266
+ topics: rep.topics,
267
+ decisions: rep.decisions,
268
+ key_outputs: rep.key_outputs,
269
+ indexed_at: indexedAt,
270
+ idempotency_key: rep.idempotency_key,
271
+ conversation_fingerprint: fingerprint,
272
+ metadata_provider: typeof rep.metadata_provider === "string" ? rep.metadata_provider : "",
273
+ embedding_provider: embeddingProviderMarker,
274
+ ...conversationChunkColumns(id, i, count),
275
+ }));
276
+ // Delete every prior row for this conversation (the whole row keyed by id and
277
+ // any chunk set keyed by parent_id) from BOTH tiers — a compaction crash can
278
+ // leave the same id in hot AND cold, and deleting only one tier would leave a
279
+ // stale duplicate behind. Then insert the new set into the canonical target
280
+ // tier (cold if the conversation lives in cold, else hot), preserving the
281
+ // hot/cold placement rather than promoting old conversations into hot.
282
+ const idFilter = eqFilter("id", id, { validateAsRowId: true });
283
+ const parentFilter = eqFilter("parent_id", id, { validateAsRowId: true });
284
+ for (const table of CONV_TABLES) {
285
+ await getTable(table).delete(idFilter);
286
+ await getTable(table).delete(parentFilter);
287
+ }
288
+ await recordIndexWrite(dataDir, {
289
+ table: existing.targetTable,
290
+ pipeline: "backfill",
291
+ operation: "rechunk_conversation",
292
+ row_count: rows.length,
293
+ }, () => getTable(existing.targetTable).add(rows));
294
+ }
295
+ //# sourceMappingURL=chunk-backfill.js.map