@getrift/rift 0.0.0 → 0.1.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (628) hide show
  1. package/README.dev.md +110 -0
  2. package/README.md +130 -0
  3. package/dist/src/auth/keychain.d.ts +25 -0
  4. package/dist/src/auth/keychain.d.ts.map +1 -0
  5. package/dist/src/auth/keychain.js +113 -0
  6. package/dist/src/auth/keychain.js.map +1 -0
  7. package/dist/src/auth/middleware.d.ts +20 -0
  8. package/dist/src/auth/middleware.d.ts.map +1 -0
  9. package/dist/src/auth/middleware.js +49 -0
  10. package/dist/src/auth/middleware.js.map +1 -0
  11. package/dist/src/auth/rate-limit.d.ts +16 -0
  12. package/dist/src/auth/rate-limit.d.ts.map +1 -0
  13. package/dist/src/auth/rate-limit.js +38 -0
  14. package/dist/src/auth/rate-limit.js.map +1 -0
  15. package/dist/src/auth/rotation.d.ts +67 -0
  16. package/dist/src/auth/rotation.d.ts.map +1 -0
  17. package/dist/src/auth/rotation.js +190 -0
  18. package/dist/src/auth/rotation.js.map +1 -0
  19. package/dist/src/backfill/project-context-batch-constructor.d.ts +127 -0
  20. package/dist/src/backfill/project-context-batch-constructor.d.ts.map +1 -0
  21. package/dist/src/backfill/project-context-batch-constructor.js +210 -0
  22. package/dist/src/backfill/project-context-batch-constructor.js.map +1 -0
  23. package/dist/src/capture/auto-capture.d.ts +162 -0
  24. package/dist/src/capture/auto-capture.d.ts.map +1 -0
  25. package/dist/src/capture/auto-capture.js +601 -0
  26. package/dist/src/capture/auto-capture.js.map +1 -0
  27. package/dist/src/capture/batch-budget.d.ts +90 -0
  28. package/dist/src/capture/batch-budget.d.ts.map +1 -0
  29. package/dist/src/capture/batch-budget.js +148 -0
  30. package/dist/src/capture/batch-budget.js.map +1 -0
  31. package/dist/src/capture/codex-cli-triage-provider.d.ts +17 -0
  32. package/dist/src/capture/codex-cli-triage-provider.d.ts.map +1 -0
  33. package/dist/src/capture/codex-cli-triage-provider.js +109 -0
  34. package/dist/src/capture/codex-cli-triage-provider.js.map +1 -0
  35. package/dist/src/capture/observability.d.ts +42 -0
  36. package/dist/src/capture/observability.d.ts.map +1 -0
  37. package/dist/src/capture/observability.js +87 -0
  38. package/dist/src/capture/observability.js.map +1 -0
  39. package/dist/src/capture/openai-triage-provider.d.ts +92 -0
  40. package/dist/src/capture/openai-triage-provider.d.ts.map +1 -0
  41. package/dist/src/capture/openai-triage-provider.js +267 -0
  42. package/dist/src/capture/openai-triage-provider.js.map +1 -0
  43. package/dist/src/capture/review-queue-index.d.ts +51 -0
  44. package/dist/src/capture/review-queue-index.d.ts.map +1 -0
  45. package/dist/src/capture/review-queue-index.js +204 -0
  46. package/dist/src/capture/review-queue-index.js.map +1 -0
  47. package/dist/src/capture/review-queue.d.ts +43 -0
  48. package/dist/src/capture/review-queue.d.ts.map +1 -0
  49. package/dist/src/capture/review-queue.js +116 -0
  50. package/dist/src/capture/review-queue.js.map +1 -0
  51. package/dist/src/capture/sources.d.ts +7 -0
  52. package/dist/src/capture/sources.d.ts.map +1 -0
  53. package/dist/src/capture/sources.js +3 -0
  54. package/dist/src/capture/sources.js.map +1 -0
  55. package/dist/src/capture/triage-lane.d.ts +39 -0
  56. package/dist/src/capture/triage-lane.d.ts.map +1 -0
  57. package/dist/src/capture/triage-lane.js +217 -0
  58. package/dist/src/capture/triage-lane.js.map +1 -0
  59. package/dist/src/capture/triage-provider.d.ts +75 -0
  60. package/dist/src/capture/triage-provider.d.ts.map +1 -0
  61. package/dist/src/capture/triage-provider.js +120 -0
  62. package/dist/src/capture/triage-provider.js.map +1 -0
  63. package/dist/src/capture/triage.d.ts +30 -0
  64. package/dist/src/capture/triage.d.ts.map +1 -0
  65. package/dist/src/capture/triage.js +48 -0
  66. package/dist/src/capture/triage.js.map +1 -0
  67. package/dist/src/cli/commands/backfill.d.ts +3 -0
  68. package/dist/src/cli/commands/backfill.d.ts.map +1 -0
  69. package/dist/src/cli/commands/backfill.js +1376 -0
  70. package/dist/src/cli/commands/backfill.js.map +1 -0
  71. package/dist/src/cli/commands/bulk-ingest.d.ts +3 -0
  72. package/dist/src/cli/commands/bulk-ingest.d.ts.map +1 -0
  73. package/dist/src/cli/commands/bulk-ingest.js +126 -0
  74. package/dist/src/cli/commands/bulk-ingest.js.map +1 -0
  75. package/dist/src/cli/commands/capture.d.ts +12 -0
  76. package/dist/src/cli/commands/capture.d.ts.map +1 -0
  77. package/dist/src/cli/commands/capture.js +123 -0
  78. package/dist/src/cli/commands/capture.js.map +1 -0
  79. package/dist/src/cli/commands/compact.d.ts +3 -0
  80. package/dist/src/cli/commands/compact.d.ts.map +1 -0
  81. package/dist/src/cli/commands/compact.js +70 -0
  82. package/dist/src/cli/commands/compact.js.map +1 -0
  83. package/dist/src/cli/commands/feedback.d.ts +22 -0
  84. package/dist/src/cli/commands/feedback.d.ts.map +1 -0
  85. package/dist/src/cli/commands/feedback.js +125 -0
  86. package/dist/src/cli/commands/feedback.js.map +1 -0
  87. package/dist/src/cli/commands/import.d.ts +19 -0
  88. package/dist/src/cli/commands/import.d.ts.map +1 -0
  89. package/dist/src/cli/commands/import.js +258 -0
  90. package/dist/src/cli/commands/import.js.map +1 -0
  91. package/dist/src/cli/commands/ingest.d.ts +3 -0
  92. package/dist/src/cli/commands/ingest.d.ts.map +1 -0
  93. package/dist/src/cli/commands/ingest.js +80 -0
  94. package/dist/src/cli/commands/ingest.js.map +1 -0
  95. package/dist/src/cli/commands/mcp-install.d.ts +25 -0
  96. package/dist/src/cli/commands/mcp-install.d.ts.map +1 -0
  97. package/dist/src/cli/commands/mcp-install.js +134 -0
  98. package/dist/src/cli/commands/mcp-install.js.map +1 -0
  99. package/dist/src/cli/commands/onboard.d.ts +98 -0
  100. package/dist/src/cli/commands/onboard.d.ts.map +1 -0
  101. package/dist/src/cli/commands/onboard.js +742 -0
  102. package/dist/src/cli/commands/onboard.js.map +1 -0
  103. package/dist/src/cli/commands/rebuild.d.ts +12 -0
  104. package/dist/src/cli/commands/rebuild.d.ts.map +1 -0
  105. package/dist/src/cli/commands/rebuild.js +164 -0
  106. package/dist/src/cli/commands/rebuild.js.map +1 -0
  107. package/dist/src/cli/commands/reconcile.d.ts +3 -0
  108. package/dist/src/cli/commands/reconcile.d.ts.map +1 -0
  109. package/dist/src/cli/commands/reconcile.js +56 -0
  110. package/dist/src/cli/commands/reconcile.js.map +1 -0
  111. package/dist/src/cli/commands/reindex.d.ts +3 -0
  112. package/dist/src/cli/commands/reindex.d.ts.map +1 -0
  113. package/dist/src/cli/commands/reindex.js +66 -0
  114. package/dist/src/cli/commands/reindex.js.map +1 -0
  115. package/dist/src/cli/commands/review.d.ts +13 -0
  116. package/dist/src/cli/commands/review.d.ts.map +1 -0
  117. package/dist/src/cli/commands/review.js +383 -0
  118. package/dist/src/cli/commands/review.js.map +1 -0
  119. package/dist/src/cli/commands/save.d.ts +3 -0
  120. package/dist/src/cli/commands/save.d.ts.map +1 -0
  121. package/dist/src/cli/commands/save.js +111 -0
  122. package/dist/src/cli/commands/save.js.map +1 -0
  123. package/dist/src/cli/commands/search.d.ts +35 -0
  124. package/dist/src/cli/commands/search.d.ts.map +1 -0
  125. package/dist/src/cli/commands/search.js +88 -0
  126. package/dist/src/cli/commands/search.js.map +1 -0
  127. package/dist/src/cli/commands/stats.d.ts +3 -0
  128. package/dist/src/cli/commands/stats.d.ts.map +1 -0
  129. package/dist/src/cli/commands/stats.js +42 -0
  130. package/dist/src/cli/commands/stats.js.map +1 -0
  131. package/dist/src/cli/commands/status.d.ts +15 -0
  132. package/dist/src/cli/commands/status.d.ts.map +1 -0
  133. package/dist/src/cli/commands/status.js +89 -0
  134. package/dist/src/cli/commands/status.js.map +1 -0
  135. package/dist/src/cli/commands/token-issue.d.ts +3 -0
  136. package/dist/src/cli/commands/token-issue.d.ts.map +1 -0
  137. package/dist/src/cli/commands/token-issue.js +25 -0
  138. package/dist/src/cli/commands/token-issue.js.map +1 -0
  139. package/dist/src/cli/commands/triage.d.ts +3 -0
  140. package/dist/src/cli/commands/triage.d.ts.map +1 -0
  141. package/dist/src/cli/commands/triage.js +125 -0
  142. package/dist/src/cli/commands/triage.js.map +1 -0
  143. package/dist/src/cli/commands/uninstall.d.ts +3 -0
  144. package/dist/src/cli/commands/uninstall.d.ts.map +1 -0
  145. package/dist/src/cli/commands/uninstall.js +238 -0
  146. package/dist/src/cli/commands/uninstall.js.map +1 -0
  147. package/dist/src/cli/feedback/feedback-config.d.ts +21 -0
  148. package/dist/src/cli/feedback/feedback-config.d.ts.map +1 -0
  149. package/dist/src/cli/feedback/feedback-config.js +43 -0
  150. package/dist/src/cli/feedback/feedback-config.js.map +1 -0
  151. package/dist/src/cli/feedback/feedback-history.d.ts +4 -0
  152. package/dist/src/cli/feedback/feedback-history.d.ts.map +1 -0
  153. package/dist/src/cli/feedback/feedback-history.js +115 -0
  154. package/dist/src/cli/feedback/feedback-history.js.map +1 -0
  155. package/dist/src/cli/feedback/feedback-payload.d.ts +53 -0
  156. package/dist/src/cli/feedback/feedback-payload.d.ts.map +1 -0
  157. package/dist/src/cli/feedback/feedback-payload.js +10 -0
  158. package/dist/src/cli/feedback/feedback-payload.js.map +1 -0
  159. package/dist/src/cli/feedback/feedback-relay.d.ts +15 -0
  160. package/dist/src/cli/feedback/feedback-relay.d.ts.map +1 -0
  161. package/dist/src/cli/feedback/feedback-relay.js +47 -0
  162. package/dist/src/cli/feedback/feedback-relay.js.map +1 -0
  163. package/dist/src/cli/feedback/feedback-status.d.ts +11 -0
  164. package/dist/src/cli/feedback/feedback-status.d.ts.map +1 -0
  165. package/dist/src/cli/feedback/feedback-status.js +122 -0
  166. package/dist/src/cli/feedback/feedback-status.js.map +1 -0
  167. package/dist/src/cli/http-client.d.ts +36 -0
  168. package/dist/src/cli/http-client.d.ts.map +1 -0
  169. package/dist/src/cli/http-client.js +153 -0
  170. package/dist/src/cli/http-client.js.map +1 -0
  171. package/dist/src/cli/index.d.ts +4 -0
  172. package/dist/src/cli/index.d.ts.map +1 -0
  173. package/dist/src/cli/index.js +66 -0
  174. package/dist/src/cli/index.js.map +1 -0
  175. package/dist/src/cli/job-poller.d.ts +13 -0
  176. package/dist/src/cli/job-poller.d.ts.map +1 -0
  177. package/dist/src/cli/job-poller.js +29 -0
  178. package/dist/src/cli/job-poller.js.map +1 -0
  179. package/dist/src/cli/mcp-config-writers/codex-toml.d.ts +10 -0
  180. package/dist/src/cli/mcp-config-writers/codex-toml.d.ts.map +1 -0
  181. package/dist/src/cli/mcp-config-writers/codex-toml.js +410 -0
  182. package/dist/src/cli/mcp-config-writers/codex-toml.js.map +1 -0
  183. package/dist/src/cli/mcp-config-writers/errors.d.ts +17 -0
  184. package/dist/src/cli/mcp-config-writers/errors.d.ts.map +1 -0
  185. package/dist/src/cli/mcp-config-writers/errors.js +13 -0
  186. package/dist/src/cli/mcp-config-writers/errors.js.map +1 -0
  187. package/dist/src/cli/mcp-config-writers/index.d.ts +18 -0
  188. package/dist/src/cli/mcp-config-writers/index.d.ts.map +1 -0
  189. package/dist/src/cli/mcp-config-writers/index.js +49 -0
  190. package/dist/src/cli/mcp-config-writers/index.js.map +1 -0
  191. package/dist/src/cli/mcp-config-writers/json-config.d.ts +12 -0
  192. package/dist/src/cli/mcp-config-writers/json-config.d.ts.map +1 -0
  193. package/dist/src/cli/mcp-config-writers/json-config.js +177 -0
  194. package/dist/src/cli/mcp-config-writers/json-config.js.map +1 -0
  195. package/dist/src/cli/mcp-config-writers/redact.d.ts +28 -0
  196. package/dist/src/cli/mcp-config-writers/redact.d.ts.map +1 -0
  197. package/dist/src/cli/mcp-config-writers/redact.js +48 -0
  198. package/dist/src/cli/mcp-config-writers/redact.js.map +1 -0
  199. package/dist/src/cli/mcp-config-writers/types.d.ts +32 -0
  200. package/dist/src/cli/mcp-config-writers/types.d.ts.map +1 -0
  201. package/dist/src/cli/mcp-config-writers/types.js +5 -0
  202. package/dist/src/cli/mcp-config-writers/types.js.map +1 -0
  203. package/dist/src/cli/output.d.ts +8 -0
  204. package/dist/src/cli/output.d.ts.map +1 -0
  205. package/dist/src/cli/output.js +34 -0
  206. package/dist/src/cli/output.js.map +1 -0
  207. package/dist/src/cli/status/friend-header.d.ts +33 -0
  208. package/dist/src/cli/status/friend-header.d.ts.map +1 -0
  209. package/dist/src/cli/status/friend-header.js +108 -0
  210. package/dist/src/cli/status/friend-header.js.map +1 -0
  211. package/dist/src/cli/status/local-signals.d.ts +14 -0
  212. package/dist/src/cli/status/local-signals.d.ts.map +1 -0
  213. package/dist/src/cli/status/local-signals.js +73 -0
  214. package/dist/src/cli/status/local-signals.js.map +1 -0
  215. package/dist/src/cli/token.d.ts +37 -0
  216. package/dist/src/cli/token.d.ts.map +1 -0
  217. package/dist/src/cli/token.js +105 -0
  218. package/dist/src/cli/token.js.map +1 -0
  219. package/dist/src/cli/uninstall/mcp-uninstall.d.ts +33 -0
  220. package/dist/src/cli/uninstall/mcp-uninstall.d.ts.map +1 -0
  221. package/dist/src/cli/uninstall/mcp-uninstall.js +181 -0
  222. package/dist/src/cli/uninstall/mcp-uninstall.js.map +1 -0
  223. package/dist/src/config/loader.d.ts +9 -0
  224. package/dist/src/config/loader.d.ts.map +1 -0
  225. package/dist/src/config/loader.js +73 -0
  226. package/dist/src/config/loader.js.map +1 -0
  227. package/dist/src/config/schema.d.ts +635 -0
  228. package/dist/src/config/schema.d.ts.map +1 -0
  229. package/dist/src/config/schema.js +208 -0
  230. package/dist/src/config/schema.js.map +1 -0
  231. package/dist/src/ingestion/bulk-ingest.d.ts +11 -0
  232. package/dist/src/ingestion/bulk-ingest.d.ts.map +1 -0
  233. package/dist/src/ingestion/bulk-ingest.js +11 -0
  234. package/dist/src/ingestion/bulk-ingest.js.map +1 -0
  235. package/dist/src/ingestion/extractor.d.ts +16 -0
  236. package/dist/src/ingestion/extractor.d.ts.map +1 -0
  237. package/dist/src/ingestion/extractor.js +85 -0
  238. package/dist/src/ingestion/extractor.js.map +1 -0
  239. package/dist/src/ingestion/extractors/docx.d.ts +3 -0
  240. package/dist/src/ingestion/extractors/docx.d.ts.map +1 -0
  241. package/dist/src/ingestion/extractors/docx.js +20 -0
  242. package/dist/src/ingestion/extractors/docx.js.map +1 -0
  243. package/dist/src/ingestion/extractors/pdf.d.ts +3 -0
  244. package/dist/src/ingestion/extractors/pdf.d.ts.map +1 -0
  245. package/dist/src/ingestion/extractors/pdf.js +32 -0
  246. package/dist/src/ingestion/extractors/pdf.js.map +1 -0
  247. package/dist/src/ingestion/historical-campaign.d.ts +340 -0
  248. package/dist/src/ingestion/historical-campaign.d.ts.map +1 -0
  249. package/dist/src/ingestion/historical-campaign.js +1010 -0
  250. package/dist/src/ingestion/historical-campaign.js.map +1 -0
  251. package/dist/src/ingestion/ignored-paths.d.ts +20 -0
  252. package/dist/src/ingestion/ignored-paths.d.ts.map +1 -0
  253. package/dist/src/ingestion/ignored-paths.js +45 -0
  254. package/dist/src/ingestion/ignored-paths.js.map +1 -0
  255. package/dist/src/ingestion/inbox-watcher.d.ts +12 -0
  256. package/dist/src/ingestion/inbox-watcher.d.ts.map +1 -0
  257. package/dist/src/ingestion/inbox-watcher.js +99 -0
  258. package/dist/src/ingestion/inbox-watcher.js.map +1 -0
  259. package/dist/src/ingestion/indexer.d.ts +32 -0
  260. package/dist/src/ingestion/indexer.d.ts.map +1 -0
  261. package/dist/src/ingestion/indexer.js +68 -0
  262. package/dist/src/ingestion/indexer.js.map +1 -0
  263. package/dist/src/ingestion/metadata-extraction.d.ts +53 -0
  264. package/dist/src/ingestion/metadata-extraction.d.ts.map +1 -0
  265. package/dist/src/ingestion/metadata-extraction.js +132 -0
  266. package/dist/src/ingestion/metadata-extraction.js.map +1 -0
  267. package/dist/src/ingestion/parsers/chatgpt-web.d.ts +29 -0
  268. package/dist/src/ingestion/parsers/chatgpt-web.d.ts.map +1 -0
  269. package/dist/src/ingestion/parsers/chatgpt-web.js +100 -0
  270. package/dist/src/ingestion/parsers/chatgpt-web.js.map +1 -0
  271. package/dist/src/ingestion/parsers/claude-code-jsonl.d.ts +16 -0
  272. package/dist/src/ingestion/parsers/claude-code-jsonl.d.ts.map +1 -0
  273. package/dist/src/ingestion/parsers/claude-code-jsonl.js +123 -0
  274. package/dist/src/ingestion/parsers/claude-code-jsonl.js.map +1 -0
  275. package/dist/src/ingestion/parsers/claude-web.d.ts +24 -0
  276. package/dist/src/ingestion/parsers/claude-web.d.ts.map +1 -0
  277. package/dist/src/ingestion/parsers/claude-web.js +78 -0
  278. package/dist/src/ingestion/parsers/claude-web.js.map +1 -0
  279. package/dist/src/ingestion/parsers/codex-jsonl.d.ts +18 -0
  280. package/dist/src/ingestion/parsers/codex-jsonl.d.ts.map +1 -0
  281. package/dist/src/ingestion/parsers/codex-jsonl.js +125 -0
  282. package/dist/src/ingestion/parsers/codex-jsonl.js.map +1 -0
  283. package/dist/src/ingestion/parsers/gemini-web.d.ts +16 -0
  284. package/dist/src/ingestion/parsers/gemini-web.d.ts.map +1 -0
  285. package/dist/src/ingestion/parsers/gemini-web.js +170 -0
  286. package/dist/src/ingestion/parsers/gemini-web.js.map +1 -0
  287. package/dist/src/ingestion/parsers/grok-web.d.ts +40 -0
  288. package/dist/src/ingestion/parsers/grok-web.d.ts.map +1 -0
  289. package/dist/src/ingestion/parsers/grok-web.js +67 -0
  290. package/dist/src/ingestion/parsers/grok-web.js.map +1 -0
  291. package/dist/src/ingestion/parsers/types.d.ts +34 -0
  292. package/dist/src/ingestion/parsers/types.d.ts.map +1 -0
  293. package/dist/src/ingestion/parsers/types.js +26 -0
  294. package/dist/src/ingestion/parsers/types.js.map +1 -0
  295. package/dist/src/ingestion/scanner.d.ts +48 -0
  296. package/dist/src/ingestion/scanner.d.ts.map +1 -0
  297. package/dist/src/ingestion/scanner.js +131 -0
  298. package/dist/src/ingestion/scanner.js.map +1 -0
  299. package/dist/src/ingestion/staging.d.ts +109 -0
  300. package/dist/src/ingestion/staging.d.ts.map +1 -0
  301. package/dist/src/ingestion/staging.js +411 -0
  302. package/dist/src/ingestion/staging.js.map +1 -0
  303. package/dist/src/ingestion/watcher.d.ts +65 -0
  304. package/dist/src/ingestion/watcher.d.ts.map +1 -0
  305. package/dist/src/ingestion/watcher.js +182 -0
  306. package/dist/src/ingestion/watcher.js.map +1 -0
  307. package/dist/src/jobs/codex-override-handler.d.ts +3 -0
  308. package/dist/src/jobs/codex-override-handler.d.ts.map +1 -0
  309. package/dist/src/jobs/codex-override-handler.js +16 -0
  310. package/dist/src/jobs/codex-override-handler.js.map +1 -0
  311. package/dist/src/jobs/handlers/compact.d.ts +30 -0
  312. package/dist/src/jobs/handlers/compact.d.ts.map +1 -0
  313. package/dist/src/jobs/handlers/compact.js +329 -0
  314. package/dist/src/jobs/handlers/compact.js.map +1 -0
  315. package/dist/src/jobs/handlers/ingest.d.ts +13 -0
  316. package/dist/src/jobs/handlers/ingest.d.ts.map +1 -0
  317. package/dist/src/jobs/handlers/ingest.js +255 -0
  318. package/dist/src/jobs/handlers/ingest.js.map +1 -0
  319. package/dist/src/jobs/handlers/reconcile.d.ts +29 -0
  320. package/dist/src/jobs/handlers/reconcile.d.ts.map +1 -0
  321. package/dist/src/jobs/handlers/reconcile.js +476 -0
  322. package/dist/src/jobs/handlers/reconcile.js.map +1 -0
  323. package/dist/src/jobs/handlers/reindex.d.ts +38 -0
  324. package/dist/src/jobs/handlers/reindex.d.ts.map +1 -0
  325. package/dist/src/jobs/handlers/reindex.js +52 -0
  326. package/dist/src/jobs/handlers/reindex.js.map +1 -0
  327. package/dist/src/jobs/handlers/save.d.ts +10 -0
  328. package/dist/src/jobs/handlers/save.d.ts.map +1 -0
  329. package/dist/src/jobs/handlers/save.js +206 -0
  330. package/dist/src/jobs/handlers/save.js.map +1 -0
  331. package/dist/src/jobs/handlers/triage.d.ts +47 -0
  332. package/dist/src/jobs/handlers/triage.d.ts.map +1 -0
  333. package/dist/src/jobs/handlers/triage.js +95 -0
  334. package/dist/src/jobs/handlers/triage.js.map +1 -0
  335. package/dist/src/jobs/queue.d.ts +107 -0
  336. package/dist/src/jobs/queue.d.ts.map +1 -0
  337. package/dist/src/jobs/queue.js +319 -0
  338. package/dist/src/jobs/queue.js.map +1 -0
  339. package/dist/src/jobs/types.d.ts +39 -0
  340. package/dist/src/jobs/types.d.ts.map +1 -0
  341. package/dist/src/jobs/types.js +29 -0
  342. package/dist/src/jobs/types.js.map +1 -0
  343. package/dist/src/jobs/worker-entry.d.ts +10 -0
  344. package/dist/src/jobs/worker-entry.d.ts.map +1 -0
  345. package/dist/src/jobs/worker-entry.js +210 -0
  346. package/dist/src/jobs/worker-entry.js.map +1 -0
  347. package/dist/src/jobs/worker-process.d.ts +50 -0
  348. package/dist/src/jobs/worker-process.d.ts.map +1 -0
  349. package/dist/src/jobs/worker-process.js +186 -0
  350. package/dist/src/jobs/worker-process.js.map +1 -0
  351. package/dist/src/jobs/worker.d.ts +11 -0
  352. package/dist/src/jobs/worker.d.ts.map +1 -0
  353. package/dist/src/jobs/worker.js +14 -0
  354. package/dist/src/jobs/worker.js.map +1 -0
  355. package/dist/src/main.d.ts +2 -0
  356. package/dist/src/main.d.ts.map +1 -0
  357. package/dist/src/main.js +425 -0
  358. package/dist/src/main.js.map +1 -0
  359. package/dist/src/mcp/errors.d.ts +8 -0
  360. package/dist/src/mcp/errors.d.ts.map +1 -0
  361. package/dist/src/mcp/errors.js +50 -0
  362. package/dist/src/mcp/errors.js.map +1 -0
  363. package/dist/src/mcp/server.d.ts +10 -0
  364. package/dist/src/mcp/server.d.ts.map +1 -0
  365. package/dist/src/mcp/server.js +94 -0
  366. package/dist/src/mcp/server.js.map +1 -0
  367. package/dist/src/mcp/tools/context-pack.d.ts +35 -0
  368. package/dist/src/mcp/tools/context-pack.d.ts.map +1 -0
  369. package/dist/src/mcp/tools/context-pack.js +97 -0
  370. package/dist/src/mcp/tools/context-pack.js.map +1 -0
  371. package/dist/src/mcp/tools/conversations-search.d.ts +38 -0
  372. package/dist/src/mcp/tools/conversations-search.d.ts.map +1 -0
  373. package/dist/src/mcp/tools/conversations-search.js +73 -0
  374. package/dist/src/mcp/tools/conversations-search.js.map +1 -0
  375. package/dist/src/mcp/tools/save.d.ts +32 -0
  376. package/dist/src/mcp/tools/save.d.ts.map +1 -0
  377. package/dist/src/mcp/tools/save.js +60 -0
  378. package/dist/src/mcp/tools/save.js.map +1 -0
  379. package/dist/src/mcp/tools/search.d.ts +33 -0
  380. package/dist/src/mcp/tools/search.d.ts.map +1 -0
  381. package/dist/src/mcp/tools/search.js +58 -0
  382. package/dist/src/mcp/tools/search.js.map +1 -0
  383. package/dist/src/mcp/tools/status.d.ts +17 -0
  384. package/dist/src/mcp/tools/status.d.ts.map +1 -0
  385. package/dist/src/mcp/tools/status.js +12 -0
  386. package/dist/src/mcp/tools/status.js.map +1 -0
  387. package/dist/src/observability/coverage.d.ts +100 -0
  388. package/dist/src/observability/coverage.d.ts.map +1 -0
  389. package/dist/src/observability/coverage.js +180 -0
  390. package/dist/src/observability/coverage.js.map +1 -0
  391. package/dist/src/observability/rift-context.d.ts +47 -0
  392. package/dist/src/observability/rift-context.d.ts.map +1 -0
  393. package/dist/src/observability/rift-context.js +118 -0
  394. package/dist/src/observability/rift-context.js.map +1 -0
  395. package/dist/src/observability/staleness.d.ts +43 -0
  396. package/dist/src/observability/staleness.d.ts.map +1 -0
  397. package/dist/src/observability/staleness.js +74 -0
  398. package/dist/src/observability/staleness.js.map +1 -0
  399. package/dist/src/observability/tool-usage-stats.d.ts +23 -0
  400. package/dist/src/observability/tool-usage-stats.d.ts.map +1 -0
  401. package/dist/src/observability/tool-usage-stats.js +83 -0
  402. package/dist/src/observability/tool-usage-stats.js.map +1 -0
  403. package/dist/src/observability/tool-usage.d.ts +68 -0
  404. package/dist/src/observability/tool-usage.d.ts.map +1 -0
  405. package/dist/src/observability/tool-usage.js +207 -0
  406. package/dist/src/observability/tool-usage.js.map +1 -0
  407. package/dist/src/onboarding/daemon-control.d.ts +33 -0
  408. package/dist/src/onboarding/daemon-control.d.ts.map +1 -0
  409. package/dist/src/onboarding/daemon-control.js +92 -0
  410. package/dist/src/onboarding/daemon-control.js.map +1 -0
  411. package/dist/src/onboarding/env-file.d.ts +18 -0
  412. package/dist/src/onboarding/env-file.d.ts.map +1 -0
  413. package/dist/src/onboarding/env-file.js +89 -0
  414. package/dist/src/onboarding/env-file.js.map +1 -0
  415. package/dist/src/onboarding/voyage-validate.d.ts +16 -0
  416. package/dist/src/onboarding/voyage-validate.d.ts.map +1 -0
  417. package/dist/src/onboarding/voyage-validate.js +85 -0
  418. package/dist/src/onboarding/voyage-validate.js.map +1 -0
  419. package/dist/src/providers/anthropic-digest.d.ts +23 -0
  420. package/dist/src/providers/anthropic-digest.d.ts.map +1 -0
  421. package/dist/src/providers/anthropic-digest.js +91 -0
  422. package/dist/src/providers/anthropic-digest.js.map +1 -0
  423. package/dist/src/providers/codex-cli-digest.d.ts +12 -0
  424. package/dist/src/providers/codex-cli-digest.d.ts.map +1 -0
  425. package/dist/src/providers/codex-cli-digest.js +70 -0
  426. package/dist/src/providers/codex-cli-digest.js.map +1 -0
  427. package/dist/src/providers/codex-cli-metadata-extraction.d.ts +14 -0
  428. package/dist/src/providers/codex-cli-metadata-extraction.d.ts.map +1 -0
  429. package/dist/src/providers/codex-cli-metadata-extraction.js +101 -0
  430. package/dist/src/providers/codex-cli-metadata-extraction.js.map +1 -0
  431. package/dist/src/providers/codex-cli-runner.d.ts +14 -0
  432. package/dist/src/providers/codex-cli-runner.d.ts.map +1 -0
  433. package/dist/src/providers/codex-cli-runner.js +272 -0
  434. package/dist/src/providers/codex-cli-runner.js.map +1 -0
  435. package/dist/src/providers/conversation-generation.d.ts +10 -0
  436. package/dist/src/providers/conversation-generation.d.ts.map +1 -0
  437. package/dist/src/providers/conversation-generation.js +54 -0
  438. package/dist/src/providers/conversation-generation.js.map +1 -0
  439. package/dist/src/providers/ollama-embed.d.ts +22 -0
  440. package/dist/src/providers/ollama-embed.d.ts.map +1 -0
  441. package/dist/src/providers/ollama-embed.js +133 -0
  442. package/dist/src/providers/ollama-embed.js.map +1 -0
  443. package/dist/src/providers/ollama.d.ts +42 -0
  444. package/dist/src/providers/ollama.d.ts.map +1 -0
  445. package/dist/src/providers/ollama.js +169 -0
  446. package/dist/src/providers/ollama.js.map +1 -0
  447. package/dist/src/providers/openai-metadata-extraction.d.ts +73 -0
  448. package/dist/src/providers/openai-metadata-extraction.d.ts.map +1 -0
  449. package/dist/src/providers/openai-metadata-extraction.js +161 -0
  450. package/dist/src/providers/openai-metadata-extraction.js.map +1 -0
  451. package/dist/src/providers/operator-overrides.d.ts +24 -0
  452. package/dist/src/providers/operator-overrides.d.ts.map +1 -0
  453. package/dist/src/providers/operator-overrides.js +84 -0
  454. package/dist/src/providers/operator-overrides.js.map +1 -0
  455. package/dist/src/providers/stub.d.ts +17 -0
  456. package/dist/src/providers/stub.d.ts.map +1 -0
  457. package/dist/src/providers/stub.js +72 -0
  458. package/dist/src/providers/stub.js.map +1 -0
  459. package/dist/src/providers/types.d.ts +82 -0
  460. package/dist/src/providers/types.d.ts.map +1 -0
  461. package/dist/src/providers/types.js +52 -0
  462. package/dist/src/providers/types.js.map +1 -0
  463. package/dist/src/providers/voyage.d.ts +23 -0
  464. package/dist/src/providers/voyage.d.ts.map +1 -0
  465. package/dist/src/providers/voyage.js +135 -0
  466. package/dist/src/providers/voyage.js.map +1 -0
  467. package/dist/src/retrieval/compact.d.ts +89 -0
  468. package/dist/src/retrieval/compact.d.ts.map +1 -0
  469. package/dist/src/retrieval/compact.js +348 -0
  470. package/dist/src/retrieval/compact.js.map +1 -0
  471. package/dist/src/retrieval/context-pack.d.ts +123 -0
  472. package/dist/src/retrieval/context-pack.d.ts.map +1 -0
  473. package/dist/src/retrieval/context-pack.js +553 -0
  474. package/dist/src/retrieval/context-pack.js.map +1 -0
  475. package/dist/src/retrieval/cwd.d.ts +25 -0
  476. package/dist/src/retrieval/cwd.d.ts.map +1 -0
  477. package/dist/src/retrieval/cwd.js +48 -0
  478. package/dist/src/retrieval/cwd.js.map +1 -0
  479. package/dist/src/retrieval/degraded.d.ts +20 -0
  480. package/dist/src/retrieval/degraded.d.ts.map +1 -0
  481. package/dist/src/retrieval/degraded.js +43 -0
  482. package/dist/src/retrieval/degraded.js.map +1 -0
  483. package/dist/src/retrieval/hybrid.d.ts +38 -0
  484. package/dist/src/retrieval/hybrid.d.ts.map +1 -0
  485. package/dist/src/retrieval/hybrid.js +82 -0
  486. package/dist/src/retrieval/hybrid.js.map +1 -0
  487. package/dist/src/retrieval/lexical.d.ts +28 -0
  488. package/dist/src/retrieval/lexical.d.ts.map +1 -0
  489. package/dist/src/retrieval/lexical.js +301 -0
  490. package/dist/src/retrieval/lexical.js.map +1 -0
  491. package/dist/src/retrieval/post-filter.d.ts +32 -0
  492. package/dist/src/retrieval/post-filter.d.ts.map +1 -0
  493. package/dist/src/retrieval/post-filter.js +57 -0
  494. package/dist/src/retrieval/post-filter.js.map +1 -0
  495. package/dist/src/retrieval/reranker.d.ts +72 -0
  496. package/dist/src/retrieval/reranker.d.ts.map +1 -0
  497. package/dist/src/retrieval/reranker.js +129 -0
  498. package/dist/src/retrieval/reranker.js.map +1 -0
  499. package/dist/src/retrieval/vector.d.ts +47 -0
  500. package/dist/src/retrieval/vector.d.ts.map +1 -0
  501. package/dist/src/retrieval/vector.js +112 -0
  502. package/dist/src/retrieval/vector.js.map +1 -0
  503. package/dist/src/runtime/legacy-migration.d.ts +27 -0
  504. package/dist/src/runtime/legacy-migration.d.ts.map +1 -0
  505. package/dist/src/runtime/legacy-migration.js +140 -0
  506. package/dist/src/runtime/legacy-migration.js.map +1 -0
  507. package/dist/src/runtime/legacy-name-guard.d.ts +35 -0
  508. package/dist/src/runtime/legacy-name-guard.d.ts.map +1 -0
  509. package/dist/src/runtime/legacy-name-guard.js +58 -0
  510. package/dist/src/runtime/legacy-name-guard.js.map +1 -0
  511. package/dist/src/runtime/rift-env.d.ts +14 -0
  512. package/dist/src/runtime/rift-env.d.ts.map +1 -0
  513. package/dist/src/runtime/rift-env.js +79 -0
  514. package/dist/src/runtime/rift-env.js.map +1 -0
  515. package/dist/src/runtime/watcher-startup.d.ts +2 -0
  516. package/dist/src/runtime/watcher-startup.d.ts.map +1 -0
  517. package/dist/src/runtime/watcher-startup.js +4 -0
  518. package/dist/src/runtime/watcher-startup.js.map +1 -0
  519. package/dist/src/security/archive.d.ts +23 -0
  520. package/dist/src/security/archive.d.ts.map +1 -0
  521. package/dist/src/security/archive.js +163 -0
  522. package/dist/src/security/archive.js.map +1 -0
  523. package/dist/src/security/paths.d.ts +21 -0
  524. package/dist/src/security/paths.d.ts.map +1 -0
  525. package/dist/src/security/paths.js +67 -0
  526. package/dist/src/security/paths.js.map +1 -0
  527. package/dist/src/server/app.d.ts +29 -0
  528. package/dist/src/server/app.d.ts.map +1 -0
  529. package/dist/src/server/app.js +226 -0
  530. package/dist/src/server/app.js.map +1 -0
  531. package/dist/src/server/build-info.d.ts +8 -0
  532. package/dist/src/server/build-info.d.ts.map +1 -0
  533. package/dist/src/server/build-info.js +61 -0
  534. package/dist/src/server/build-info.js.map +1 -0
  535. package/dist/src/server/lifecycle.d.ts +30 -0
  536. package/dist/src/server/lifecycle.d.ts.map +1 -0
  537. package/dist/src/server/lifecycle.js +59 -0
  538. package/dist/src/server/lifecycle.js.map +1 -0
  539. package/dist/src/server/middleware/multipart.d.ts +51 -0
  540. package/dist/src/server/middleware/multipart.d.ts.map +1 -0
  541. package/dist/src/server/middleware/multipart.js +86 -0
  542. package/dist/src/server/middleware/multipart.js.map +1 -0
  543. package/dist/src/server/routes/compact.d.ts +37 -0
  544. package/dist/src/server/routes/compact.d.ts.map +1 -0
  545. package/dist/src/server/routes/compact.js +77 -0
  546. package/dist/src/server/routes/compact.js.map +1 -0
  547. package/dist/src/server/routes/context.d.ts +5 -0
  548. package/dist/src/server/routes/context.d.ts.map +1 -0
  549. package/dist/src/server/routes/context.js +50 -0
  550. package/dist/src/server/routes/context.js.map +1 -0
  551. package/dist/src/server/routes/conversations-search.d.ts +4 -0
  552. package/dist/src/server/routes/conversations-search.d.ts.map +1 -0
  553. package/dist/src/server/routes/conversations-search.js +243 -0
  554. package/dist/src/server/routes/conversations-search.js.map +1 -0
  555. package/dist/src/server/routes/friend-status.d.ts +72 -0
  556. package/dist/src/server/routes/friend-status.d.ts.map +1 -0
  557. package/dist/src/server/routes/friend-status.js +71 -0
  558. package/dist/src/server/routes/friend-status.js.map +1 -0
  559. package/dist/src/server/routes/ingest.d.ts +15 -0
  560. package/dist/src/server/routes/ingest.d.ts.map +1 -0
  561. package/dist/src/server/routes/ingest.js +139 -0
  562. package/dist/src/server/routes/ingest.js.map +1 -0
  563. package/dist/src/server/routes/jobs.d.ts +10 -0
  564. package/dist/src/server/routes/jobs.d.ts.map +1 -0
  565. package/dist/src/server/routes/jobs.js +29 -0
  566. package/dist/src/server/routes/jobs.js.map +1 -0
  567. package/dist/src/server/routes/mcp-usage.d.ts +13 -0
  568. package/dist/src/server/routes/mcp-usage.d.ts.map +1 -0
  569. package/dist/src/server/routes/mcp-usage.js +17 -0
  570. package/dist/src/server/routes/mcp-usage.js.map +1 -0
  571. package/dist/src/server/routes/reconcile.d.ts +4 -0
  572. package/dist/src/server/routes/reconcile.d.ts.map +1 -0
  573. package/dist/src/server/routes/reconcile.js +43 -0
  574. package/dist/src/server/routes/reconcile.js.map +1 -0
  575. package/dist/src/server/routes/reindex.d.ts +4 -0
  576. package/dist/src/server/routes/reindex.d.ts.map +1 -0
  577. package/dist/src/server/routes/reindex.js +74 -0
  578. package/dist/src/server/routes/reindex.js.map +1 -0
  579. package/dist/src/server/routes/save.d.ts +40 -0
  580. package/dist/src/server/routes/save.d.ts.map +1 -0
  581. package/dist/src/server/routes/save.js +112 -0
  582. package/dist/src/server/routes/save.js.map +1 -0
  583. package/dist/src/server/routes/search.d.ts +5 -0
  584. package/dist/src/server/routes/search.d.ts.map +1 -0
  585. package/dist/src/server/routes/search.js +400 -0
  586. package/dist/src/server/routes/search.js.map +1 -0
  587. package/dist/src/server/routes/stats.d.ts +10 -0
  588. package/dist/src/server/routes/stats.d.ts.map +1 -0
  589. package/dist/src/server/routes/stats.js +15 -0
  590. package/dist/src/server/routes/stats.js.map +1 -0
  591. package/dist/src/server/routes/status.d.ts +20 -0
  592. package/dist/src/server/routes/status.d.ts.map +1 -0
  593. package/dist/src/server/routes/status.js +31 -0
  594. package/dist/src/server/routes/status.js.map +1 -0
  595. package/dist/src/server/routes/triage.d.ts +4 -0
  596. package/dist/src/server/routes/triage.d.ts.map +1 -0
  597. package/dist/src/server/routes/triage.js +94 -0
  598. package/dist/src/server/routes/triage.js.map +1 -0
  599. package/dist/src/server/save-quality.d.ts +21 -0
  600. package/dist/src/server/save-quality.d.ts.map +1 -0
  601. package/dist/src/server/save-quality.js +51 -0
  602. package/dist/src/server/save-quality.js.map +1 -0
  603. package/dist/src/storage/atomic.d.ts +8 -0
  604. package/dist/src/storage/atomic.d.ts.map +1 -0
  605. package/dist/src/storage/atomic.js +22 -0
  606. package/dist/src/storage/atomic.js.map +1 -0
  607. package/dist/src/storage/db.d.ts +15 -0
  608. package/dist/src/storage/db.d.ts.map +1 -0
  609. package/dist/src/storage/db.js +43 -0
  610. package/dist/src/storage/db.js.map +1 -0
  611. package/dist/src/storage/integrity.d.ts +11 -0
  612. package/dist/src/storage/integrity.d.ts.map +1 -0
  613. package/dist/src/storage/integrity.js +66 -0
  614. package/dist/src/storage/integrity.js.map +1 -0
  615. package/dist/src/storage/rebuild.d.ts +37 -0
  616. package/dist/src/storage/rebuild.d.ts.map +1 -0
  617. package/dist/src/storage/rebuild.js +353 -0
  618. package/dist/src/storage/rebuild.js.map +1 -0
  619. package/dist/src/storage/shadow-swap.d.ts +20 -0
  620. package/dist/src/storage/shadow-swap.d.ts.map +1 -0
  621. package/dist/src/storage/shadow-swap.js +163 -0
  622. package/dist/src/storage/shadow-swap.js.map +1 -0
  623. package/dist/src/storage/tables.d.ts +77 -0
  624. package/dist/src/storage/tables.d.ts.map +1 -0
  625. package/dist/src/storage/tables.js +196 -0
  626. package/dist/src/storage/tables.js.map +1 -0
  627. package/package.json +45 -14
  628. package/index.js +0 -3
@@ -0,0 +1,1376 @@
1
+ /**
2
+ * backfill CLI command — staged historical import.
3
+ *
4
+ * Imports web conversation exports from a dedicated staging area.
5
+ * The founder moves/copies exports into batch folders and runs:
6
+ *
7
+ * rift backfill --batch data/imports/exports-batch-1 --source chatgpt_web
8
+ *
9
+ * Slice 4 introduces a preflight-first workflow. Every run:
10
+ * - refreshes `manifest.json` (catalog of batch files)
11
+ * - refreshes `preflight.json` (junk + duplicate classification)
12
+ *
13
+ * The default operator flow is:
14
+ * 1. rift backfill --batch ... --source ... --dry-run (preflight only)
15
+ * 2. inspect manifest.json / preflight.json
16
+ * 3. rift backfill --batch ... --source ... (ingest)
17
+ *
18
+ * Reuses the existing POST /ingest pipeline (archive security,
19
+ * parsers, extraction, quarantine). Emits a per-batch report.
20
+ *
21
+ * Batch rerun is idempotent — the ingest pipeline's idempotency_key
22
+ * prevents duplicate conversations.
23
+ */
24
+ import fs from "node:fs";
25
+ import path from "node:path";
26
+ import crypto from "node:crypto";
27
+ import { Command } from "commander";
28
+ import { loadConfig } from "../../config/loader.js";
29
+ import { createHttpClient, readToken, resolveBaseUrl, CliError, } from "../http-client.js";
30
+ import { pollJob } from "../job-poller.js";
31
+ import { isJobFailure } from "../output.js";
32
+ import { isSourceSupported, INGEST_SOURCES, SUPPORTED_INGEST_SOURCES, } from "../../ingestion/parsers/types.js";
33
+ import { stageBatch, SOURCE_EXTENSIONS, } from "../../ingestion/staging.js";
34
+ import { extractArchive } from "../../security/archive.js";
35
+ import { parseChatGPTWeb } from "../../ingestion/parsers/chatgpt-web.js";
36
+ import { parseClaudeWeb } from "../../ingestion/parsers/claude-web.js";
37
+ import { parseGeminiWeb } from "../../ingestion/parsers/gemini-web.js";
38
+ import { parseGrokWeb } from "../../ingestion/parsers/grok-web.js";
39
+ import { addToReview, reviewQueueSize } from "../../capture/review-queue.js";
40
+ import { loadHistoricalCampaignState, writeHistoricalCampaignState, ensureHistoricalCampaignBatch, startHistoricalCampaignBatch, listTerminalHistoricalItemIds, listTerminalHistoricalFiles, isFileQuarantineCandidate, collectHistoricalBatchQuarantineReasons, markHistoricalFile, markHistoricalItem, finalizeHistoricalFiles, addHistoricalRunBudgetUsage, evaluateHistoricalBatch, getHistoricalCampaignBudgetWindow, recomputeHistoricalCampaignSummary, historicalCampaignReportPath, summarizeHistoricalBatchState, snapshotHistoricalCampaignReport, snapshotHistoricalNoOpReport, writeHistoricalCampaignReport, formatHistoricalCampaignSummary, reportHistoricalCampaignBlockers, } from "../../ingestion/historical-campaign.js";
41
+ import { HISTORICAL_BACKFILL_CAPABILITY_HEADER, INTERNAL_HISTORICAL_INGEST_CODEX_ROUTE, INTERNAL_HISTORICAL_TRIAGE_CODEX_ROUTE, effectiveTriageProviderName, isOperatorOverrideProvider, readHistoricalBackfillCapability, triageMeteringModeForProvider, triageMeteringModeForProviderName, } from "../../providers/operator-overrides.js";
42
+ const HISTORICAL_PARSERS = {
43
+ chatgpt_web: parseChatGPTWeb,
44
+ claude_web: parseClaudeWeb,
45
+ gemini_web: parseGeminiWeb,
46
+ grok_web: parseGrokWeb,
47
+ };
48
+ export function makeBackfillCommand() {
49
+ return new Command("backfill")
50
+ .description("Stage and import web conversation exports from a batch folder")
51
+ .requiredOption("--batch <path>", "Path to batch folder (e.g., data/imports/exports-batch-1)")
52
+ .requiredOption("--source <source>", "Source type (currently supported: chatgpt_web, claude_web, gemini_web, grok_web)")
53
+ .option("--dry-run", "Preflight only — refresh manifest/preflight and exit without ingesting")
54
+ .option("--force-unstaged", "Ingest files even if they are flagged as junk in preflight")
55
+ .option("--provider <provider>", "Backfill-only override for both triage and extraction (supported: codex-cli)")
56
+ .option("--triage-provider <provider>", "Backfill-only triage override (supported: codex-cli)")
57
+ .option("--extraction-provider <provider>", "Backfill-only extraction override (supported: codex-cli)")
58
+ .option("--limit <count>", "Cap this run to the first N unresolved files for smoke testing", parsePositiveInteger)
59
+ .action(async (opts, cmd) => {
60
+ const globalOpts = cmd.optsWithGlobals();
61
+ try {
62
+ // Validate source
63
+ const allSources = INGEST_SOURCES;
64
+ if (!allSources.includes(opts.source)) {
65
+ process.stderr.write(`Error: Unknown source "${opts.source}". ` +
66
+ `Known sources: ${INGEST_SOURCES.join(", ")}\n`);
67
+ process.exitCode = 1;
68
+ return;
69
+ }
70
+ if (!isSourceSupported(opts.source)) {
71
+ process.stderr.write(`Error: "${opts.source}" is not yet supported — no real export fixture exists to validate the parser against.\n` +
72
+ `To enable: add a real export sample to tests/fixtures/exports/${opts.source}/ and implement the parser.\n` +
73
+ `Currently supported: ${SUPPORTED_INGEST_SOURCES.join(", ")}\n`);
74
+ process.exitCode = 1;
75
+ return;
76
+ }
77
+ // Validate batch directory
78
+ const batchDir = path.resolve(opts.batch);
79
+ if (!fs.existsSync(batchDir) ||
80
+ !fs.statSync(batchDir).isDirectory()) {
81
+ process.stderr.write(`Error: Not a directory: ${batchDir}\n`);
82
+ process.exitCode = 1;
83
+ return;
84
+ }
85
+ const source = opts.source;
86
+ const providerOverride = resolveBackfillProviderOverride(opts.provider, "provider");
87
+ const triageProviderOverride = resolveBackfillProviderOverride(opts.triageProvider ?? providerOverride, "triage-provider");
88
+ const extractionProviderOverride = resolveBackfillProviderOverride(opts.extractionProvider ?? providerOverride, "extraction-provider");
89
+ const requestedTriageMeteringMode = triageMeteringModeForProvider(triageProviderOverride);
90
+ const explicitLimit = typeof opts.limit === "number" && Number.isFinite(opts.limit)
91
+ ? opts.limit
92
+ : undefined;
93
+ // --- Stage the batch: manifest + preflight, persisted to disk. ---
94
+ const { manifest, preflight } = stageBatch({ batchDir, source });
95
+ const batchName = manifest.batch_name;
96
+ writePreflightSummary({
97
+ batchDir,
98
+ manifest,
99
+ preflight,
100
+ stderr: process.stderr,
101
+ });
102
+ if (opts.dryRun) {
103
+ if (globalOpts.json) {
104
+ process.stdout.write(JSON.stringify({
105
+ mode: "preflight",
106
+ batch: batchName,
107
+ source,
108
+ manifest_path: path.join(batchDir, "manifest.json"),
109
+ preflight_path: path.join(batchDir, "preflight.json"),
110
+ preflight,
111
+ }, null, 2) + "\n");
112
+ }
113
+ process.stderr.write("\n(dry-run — no changes made beyond manifest.json / preflight.json)\n");
114
+ return;
115
+ }
116
+ const config = loadConfig(globalOpts.config);
117
+ const dataDir = config.data_paths.data_dir;
118
+ const now = new Date();
119
+ const nowIso = now.toISOString();
120
+ const reportPath = historicalCampaignReportPath(batchDir);
121
+ const campaignState = loadHistoricalCampaignState(dataDir);
122
+ const campaignBatch = ensureHistoricalCampaignBatch(campaignState, batchName, source, nowIso);
123
+ const reviewBacklogBefore = reviewQueueSize(dataDir);
124
+ const terminalItemIds = listTerminalHistoricalItemIds(campaignBatch);
125
+ const terminalFilesBefore = listTerminalHistoricalFiles(campaignBatch);
126
+ const rerunnableFiles = preflight.files
127
+ .filter((file) => !terminalFilesBefore.has(file.name))
128
+ .sort((left, right) => left.name.localeCompare(right.name));
129
+ for (const file of rerunnableFiles) {
130
+ ensureHistoricalPendingFile(campaignBatch, file.name, nowIso);
131
+ }
132
+ if (rerunnableFiles.length === 0) {
133
+ const noOpNextAction = "Batch is already terminal. Prepare the next staged batch.";
134
+ const terminalReport = snapshotHistoricalNoOpReport({
135
+ state: campaignState,
136
+ batch: campaignBatch,
137
+ preflight,
138
+ now,
139
+ reviewBacklogNow: reviewBacklogBefore,
140
+ nextAction: noOpNextAction,
141
+ });
142
+ campaignBatch.next_action = noOpNextAction;
143
+ campaignBatch.report_path = reportPath;
144
+ campaignState.last_run_at = nowIso;
145
+ await writeHistoricalCampaignReport(reportPath, terminalReport);
146
+ await writeHistoricalCampaignState(dataDir, campaignState);
147
+ if (globalOpts.json) {
148
+ process.stdout.write(JSON.stringify(terminalReport, null, 2) + "\n");
149
+ }
150
+ else {
151
+ process.stdout.write(formatHistoricalCampaignSummary(terminalReport));
152
+ }
153
+ return;
154
+ }
155
+ const budgetWindow = getHistoricalCampaignBudgetWindow(campaignState, now);
156
+ const runFileCount = Math.min(rerunnableFiles.length, budgetWindow.remaining_daily_files, budgetWindow.remaining_weekly_files, explicitLimit ?? Number.POSITIVE_INFINITY);
157
+ const filesForRun = rerunnableFiles.slice(0, runFileCount);
158
+ const deferredFiles = rerunnableFiles.slice(runFileCount);
159
+ const limitedByOperator = explicitLimit !== undefined &&
160
+ runFileCount === explicitLimit &&
161
+ rerunnableFiles.length > explicitLimit;
162
+ const preflightQuarantined = filesForRun.filter(isFileQuarantineCandidate);
163
+ const preflightQuarantineReasons = collectHistoricalBatchQuarantineReasons({
164
+ preflight,
165
+ failedFilesRate: 0,
166
+ });
167
+ const ingestable = filesForRun.filter((file) => {
168
+ if (preflightQuarantined.some((entry) => entry.name === file.name))
169
+ return false;
170
+ if (!file.supported)
171
+ return false;
172
+ if (file.junk_markers.length === 0)
173
+ return true;
174
+ return Boolean(opts.forceUnstaged);
175
+ });
176
+ const skipped = filesForRun.filter((file) => !ingestable.includes(file) &&
177
+ !preflightQuarantined.some((entry) => entry.name === file.name));
178
+ const triageCandidates = ingestable
179
+ .flatMap((file) => buildHistoricalTriageItem(file, source))
180
+ .filter((item) => !terminalItemIds.has(item.id));
181
+ const triageCandidateById = new Map(triageCandidates.map((item) => [item.id, item]));
182
+ const cachedTriageById = new Map();
183
+ const requestedTriageProviderName = effectiveTriageProviderName(triageProviderOverride);
184
+ for (const candidate of triageCandidates) {
185
+ const existingItem = campaignBatch.items[candidate.id];
186
+ if (existingItem?.terminal || !existingItem?.triage)
187
+ continue;
188
+ if (existingItem.triage.provider !== requestedTriageProviderName) {
189
+ continue;
190
+ }
191
+ cachedTriageById.set(candidate.id, {
192
+ item_id: candidate.id,
193
+ file: candidate.file.name,
194
+ kind: candidate.kind,
195
+ ...(candidate.conversationId
196
+ ? { conversation_id: candidate.conversationId }
197
+ : {}),
198
+ ...(candidate.title ? { title: candidate.title } : {}),
199
+ ...(candidate.createdAt ? { created_at: candidate.createdAt } : {}),
200
+ conversation_count: candidate.conversationCount,
201
+ triage: existingItem.triage,
202
+ });
203
+ }
204
+ const triageCandidatesNeedingWork = triageCandidates.filter((candidate) => !cachedTriageById.has(candidate.id));
205
+ const blockers = reportHistoricalCampaignBlockers({
206
+ state: campaignState,
207
+ now,
208
+ reviewBacklogBefore,
209
+ meteringMode: requestedTriageMeteringMode,
210
+ triageBatchCostCapUsd: config.openai.max_cost_per_batch_usd,
211
+ triageCandidates: triageCandidatesNeedingWork.length,
212
+ });
213
+ if (runFileCount === 0 || blockers.length > 0) {
214
+ const nextAction = blockers.length > 0
215
+ ? blockers.join("; ")
216
+ : `Daily or weekly file budget is exhausted. Resume the remaining ${rerunnableFiles.length} file(s) in the next budget window.`;
217
+ const blockedStatus = blockers.some((reason) => reason.includes("triage spend") || reason.includes("review backlog"))
218
+ ? "failed"
219
+ : "running";
220
+ const previousBatchStatus = campaignBatch.status;
221
+ const preserveTerminalOutcome = previousBatchStatus === "failed" ||
222
+ previousBatchStatus === "quarantined";
223
+ const finalBlockedStatus = preserveTerminalOutcome ? previousBatchStatus : blockedStatus;
224
+ startHistoricalCampaignBatch(campaignBatch, {
225
+ totalFiles: preflight.total_files,
226
+ runFiles: 0,
227
+ triageCandidates: triageCandidates.length,
228
+ reviewBacklogBefore,
229
+ meteringMode: requestedTriageMeteringMode,
230
+ nowIso,
231
+ });
232
+ campaignBatch.last_run_at = nowIso;
233
+ campaignBatch.status = finalBlockedStatus;
234
+ campaignBatch.review_backlog_after = reviewBacklogBefore;
235
+ campaignBatch.next_action = nextAction;
236
+ campaignBatch.report_path = reportPath;
237
+ const blockedReport = snapshotHistoricalCampaignReport({
238
+ state: campaignState,
239
+ batch: campaignBatch,
240
+ preflight,
241
+ now,
242
+ runFileCount: 0,
243
+ reviewBacklogAfter: reviewBacklogBefore,
244
+ triageSpendUsd: 0,
245
+ processedFiles: [],
246
+ status: finalBlockedStatus,
247
+ nextAction,
248
+ results: [],
249
+ });
250
+ recomputeHistoricalCampaignSummary(campaignState);
251
+ campaignState.last_run_at = nowIso;
252
+ await writeHistoricalCampaignReport(reportPath, blockedReport);
253
+ await writeHistoricalCampaignState(dataDir, campaignState);
254
+ if (globalOpts.json) {
255
+ process.stdout.write(JSON.stringify(blockedReport, null, 2) + "\n");
256
+ }
257
+ else {
258
+ process.stdout.write(formatHistoricalCampaignSummary(blockedReport));
259
+ }
260
+ if (blockedStatus === "failed") {
261
+ process.exitCode = 1;
262
+ }
263
+ return;
264
+ }
265
+ startHistoricalCampaignBatch(campaignBatch, {
266
+ totalFiles: preflight.total_files,
267
+ runFiles: runFileCount,
268
+ triageCandidates: triageCandidates.length,
269
+ reviewBacklogBefore,
270
+ meteringMode: requestedTriageMeteringMode,
271
+ nowIso,
272
+ });
273
+ for (const file of filesForRun) {
274
+ markHistoricalFile(campaignBatch, file.name, {
275
+ terminal: false,
276
+ lastResult: "running",
277
+ nowIso,
278
+ });
279
+ }
280
+ const results = [];
281
+ let triageSpendUsd = 0;
282
+ let reviewBacklogAfter = reviewBacklogBefore;
283
+ const touchedFilesThisRun = new Set();
284
+ let stopReason = deferredFiles.length > 0
285
+ ? limitedByOperator
286
+ ? `Processed ${runFileCount} file(s) in this smoke run. Resume the remaining ${deferredFiles.length} file(s) when ready.`
287
+ : `Processed ${runFileCount} file(s) in this run. Resume the remaining ${deferredFiles.length} file(s) in the next daily or weekly budget window.`
288
+ : null;
289
+ let reviewBacklogStopped = false;
290
+ let budgetUsageCommitted = false;
291
+ await persistHistoricalCampaignCheckpoint({
292
+ dataDir,
293
+ state: campaignState,
294
+ batch: campaignBatch,
295
+ preflight,
296
+ now,
297
+ reportPath,
298
+ runFileCount,
299
+ reviewBacklogAfter,
300
+ triageSpendUsd,
301
+ processedFiles: filesForRun.map((file) => file.name),
302
+ results,
303
+ status: "running",
304
+ nextAction: stopReason ??
305
+ `Running historical batch ${batchName}. Resume only unresolved files if interrupted.`,
306
+ });
307
+ try {
308
+ for (const file of preflightQuarantined) {
309
+ touchedFilesThisRun.add(file.name);
310
+ markHistoricalFile(campaignBatch, file.name, {
311
+ terminal: true,
312
+ terminalStatus: "quarantined",
313
+ lastResult: "quarantined",
314
+ nowIso,
315
+ });
316
+ results.push({
317
+ file: file.name,
318
+ status: "quarantined",
319
+ error: file.junk_markers.join(",") ||
320
+ file.notes.join(",") ||
321
+ "quarantined_by_policy",
322
+ });
323
+ }
324
+ await persistHistoricalCampaignCheckpoint({
325
+ dataDir,
326
+ state: campaignState,
327
+ batch: campaignBatch,
328
+ preflight,
329
+ now,
330
+ reportPath,
331
+ runFileCount,
332
+ reviewBacklogAfter,
333
+ triageSpendUsd,
334
+ processedFiles: filesForRun.map((file) => file.name),
335
+ results,
336
+ status: "running",
337
+ nextAction: stopReason ??
338
+ `Running historical batch ${batchName}. Resume only unresolved files if interrupted.`,
339
+ });
340
+ if (preflightQuarantineReasons.length > 0) {
341
+ for (const file of filesForRun.filter((candidate) => !preflightQuarantined.some((entry) => entry.name === candidate.name))) {
342
+ touchedFilesThisRun.add(file.name);
343
+ markHistoricalFile(campaignBatch, file.name, {
344
+ terminal: true,
345
+ terminalStatus: "quarantined",
346
+ lastResult: "quarantined",
347
+ nowIso,
348
+ });
349
+ results.push({
350
+ file: file.name,
351
+ status: "quarantined",
352
+ error: `batch_quarantined:${preflightQuarantineReasons.join(";")}`,
353
+ });
354
+ }
355
+ finalizeHistoricalFiles(campaignBatch, nowIso);
356
+ addHistoricalRunBudgetUsage(campaignState, {
357
+ weekKey: budgetWindow.week_key,
358
+ dayKey: budgetWindow.day_key,
359
+ filesProcessed: touchedFilesThisRun.size,
360
+ triageSpendUsd,
361
+ });
362
+ budgetUsageCommitted = true;
363
+ const evaluation = evaluateHistoricalBatch({
364
+ state: campaignState,
365
+ batch: campaignBatch,
366
+ preflight,
367
+ now,
368
+ runFileCount,
369
+ reviewBacklogAfter,
370
+ triageSpendUsd,
371
+ reportPath,
372
+ processedFiles: filesForRun.map((file) => file.name),
373
+ results,
374
+ });
375
+ campaignBatch.report_path = reportPath;
376
+ campaignState.last_run_at = nowIso;
377
+ await writeHistoricalCampaignReport(reportPath, evaluation.report);
378
+ await writeHistoricalCampaignState(dataDir, campaignState);
379
+ if (globalOpts.json) {
380
+ process.stdout.write(JSON.stringify(evaluation.report, null, 2) + "\n");
381
+ }
382
+ else {
383
+ process.stdout.write(formatHistoricalCampaignSummary(evaluation.report));
384
+ }
385
+ process.exitCode = 1;
386
+ return;
387
+ }
388
+ for (const file of skipped) {
389
+ touchedFilesThisRun.add(file.name);
390
+ markHistoricalFile(campaignBatch, file.name, {
391
+ terminal: false,
392
+ lastResult: "skipped",
393
+ nowIso,
394
+ });
395
+ results.push({
396
+ file: file.name,
397
+ status: "skipped",
398
+ error: file.junk_markers.join(",") || "skipped_by_preflight",
399
+ });
400
+ }
401
+ await persistHistoricalCampaignCheckpoint({
402
+ dataDir,
403
+ state: campaignState,
404
+ batch: campaignBatch,
405
+ preflight,
406
+ now,
407
+ reportPath,
408
+ runFileCount,
409
+ reviewBacklogAfter,
410
+ triageSpendUsd,
411
+ processedFiles: filesForRun.map((file) => file.name),
412
+ results,
413
+ status: "running",
414
+ nextAction: stopReason ??
415
+ `Running historical batch ${batchName}. Resume only unresolved files if interrupted.`,
416
+ });
417
+ const triageById = new Map(cachedTriageById);
418
+ let client = null;
419
+ if (triageCandidates.length > 0) {
420
+ const baseUrl = resolveBaseUrl(globalOpts.config);
421
+ const token = await readToken();
422
+ if (!token) {
423
+ throw new Error("No auth token. Run: rift token issue");
424
+ }
425
+ client = createHttpClient({ baseUrl, token });
426
+ }
427
+ if (triageCandidatesNeedingWork.length > 0) {
428
+ const triageOutput = await runHistoricalTriage({
429
+ batchDir,
430
+ client: client,
431
+ dataDir,
432
+ items: triageCandidatesNeedingWork,
433
+ ...(triageProviderOverride
434
+ ? { providerOverride: triageProviderOverride }
435
+ : {}),
436
+ });
437
+ triageSpendUsd = triageOutput.spent_usd ?? 0;
438
+ if (triageOutput.metering_mode) {
439
+ campaignBatch.metering_mode = triageOutput.metering_mode;
440
+ }
441
+ for (const item of triageOutput.items) {
442
+ triageById.set(item.item_id, item);
443
+ }
444
+ for (const candidate of triageCandidatesNeedingWork) {
445
+ const triageItem = triageById.get(candidate.id);
446
+ if (!triageItem?.triage)
447
+ continue;
448
+ touchedFilesThisRun.add(candidate.file.name);
449
+ markHistoricalItem(campaignBatch, {
450
+ itemId: candidate.id,
451
+ file: candidate.file.name,
452
+ ...(candidate.conversationId
453
+ ? { conversationId: candidate.conversationId }
454
+ : {}),
455
+ ...(candidate.title ? { title: candidate.title } : {}),
456
+ lastResult: "triaged",
457
+ terminal: false,
458
+ triage: triageItem.triage,
459
+ nowIso,
460
+ });
461
+ }
462
+ await persistHistoricalCampaignCheckpoint({
463
+ dataDir,
464
+ state: campaignState,
465
+ batch: campaignBatch,
466
+ preflight,
467
+ now,
468
+ reportPath,
469
+ runFileCount,
470
+ reviewBacklogAfter,
471
+ triageSpendUsd,
472
+ processedFiles: filesForRun.map((file) => file.name),
473
+ results,
474
+ status: "running",
475
+ nextAction: stopReason ??
476
+ `Running historical batch ${batchName}. Resume only unresolved files if interrupted.`,
477
+ });
478
+ }
479
+ const actualTriageProviderName = resolveHistoricalRunTriageProviderName(triageCandidates, triageById);
480
+ if (actualTriageProviderName) {
481
+ campaignBatch.metering_mode = triageMeteringModeForProviderName(actualTriageProviderName);
482
+ }
483
+ for (const candidate of triageCandidates) {
484
+ const label = formatHistoricalLabel(candidate);
485
+ const triageItem = triageById.get(candidate.id);
486
+ if (!triageItem) {
487
+ touchedFilesThisRun.add(candidate.file.name);
488
+ markHistoricalItem(campaignBatch, {
489
+ itemId: candidate.id,
490
+ file: candidate.file.name,
491
+ ...(candidate.conversationId
492
+ ? { conversationId: candidate.conversationId }
493
+ : {}),
494
+ ...(candidate.title ? { title: candidate.title } : {}),
495
+ lastResult: "error",
496
+ terminal: false,
497
+ nowIso,
498
+ });
499
+ results.push({
500
+ file: candidate.file.name,
501
+ ...(candidate.conversationId
502
+ ? { conversation_id: candidate.conversationId }
503
+ : {}),
504
+ ...(candidate.title ? { title: candidate.title } : {}),
505
+ status: "error",
506
+ error: "missing_triage_result",
507
+ });
508
+ process.stderr.write(` ${label}: missing triage result\n`);
509
+ continue;
510
+ }
511
+ if (triageItem.error) {
512
+ touchedFilesThisRun.add(candidate.file.name);
513
+ markHistoricalItem(campaignBatch, {
514
+ itemId: candidate.id,
515
+ file: candidate.file.name,
516
+ ...(candidate.conversationId
517
+ ? { conversationId: candidate.conversationId }
518
+ : {}),
519
+ ...(candidate.title ? { title: candidate.title } : {}),
520
+ lastResult: "error",
521
+ terminal: false,
522
+ nowIso,
523
+ });
524
+ results.push({
525
+ file: candidate.file.name,
526
+ ...(candidate.conversationId
527
+ ? { conversation_id: candidate.conversationId }
528
+ : {}),
529
+ ...(candidate.title ? { title: candidate.title } : {}),
530
+ status: "error",
531
+ error: triageItem.error,
532
+ });
533
+ process.stderr.write(` ${label}: triage error: ${triageItem.error}\n`);
534
+ continue;
535
+ }
536
+ if (!triageItem.triage) {
537
+ touchedFilesThisRun.add(candidate.file.name);
538
+ markHistoricalItem(campaignBatch, {
539
+ itemId: candidate.id,
540
+ file: candidate.file.name,
541
+ ...(candidate.conversationId
542
+ ? { conversationId: candidate.conversationId }
543
+ : {}),
544
+ ...(candidate.title ? { title: candidate.title } : {}),
545
+ lastResult: "error",
546
+ terminal: false,
547
+ nowIso,
548
+ });
549
+ results.push({
550
+ file: candidate.file.name,
551
+ ...(candidate.conversationId
552
+ ? { conversation_id: candidate.conversationId }
553
+ : {}),
554
+ ...(candidate.title ? { title: candidate.title } : {}),
555
+ status: "error",
556
+ error: "triage_result_missing",
557
+ });
558
+ process.stderr.write(` ${label}: triage result missing\n`);
559
+ continue;
560
+ }
561
+ const triage = triageItem.triage;
562
+ process.stderr.write(` ${label}: ${triage.lane} (score=${triage.score.toFixed(3)}, decision=${triage.decision})\n`);
563
+ if (triage.lane === "archive_only") {
564
+ touchedFilesThisRun.add(candidate.file.name);
565
+ markHistoricalItem(campaignBatch, {
566
+ itemId: candidate.id,
567
+ file: candidate.file.name,
568
+ ...(candidate.conversationId
569
+ ? { conversationId: candidate.conversationId }
570
+ : {}),
571
+ ...(candidate.title ? { title: candidate.title } : {}),
572
+ lastResult: "archive_only",
573
+ terminal: true,
574
+ terminalStatus: "archive_only",
575
+ triage,
576
+ nowIso,
577
+ });
578
+ results.push({
579
+ file: candidate.file.name,
580
+ ...(candidate.conversationId
581
+ ? { conversation_id: candidate.conversationId }
582
+ : {}),
583
+ ...(candidate.title ? { title: candidate.title } : {}),
584
+ status: "archive_only",
585
+ lane: triage.lane,
586
+ });
587
+ continue;
588
+ }
589
+ if (triage.lane === "review") {
590
+ touchedFilesThisRun.add(candidate.file.name);
591
+ await addToReview(dataDir, {
592
+ source,
593
+ conversationId: candidate.kind === "conversation" && candidate.conversationId
594
+ ? historicalConversationKey(source, candidate.conversationId)
595
+ : `backfill:${batchName}:${candidate.file.name}`,
596
+ summary: triage.summary ||
597
+ candidate.title ||
598
+ candidate.file.name,
599
+ content: triageCandidateById.get(candidate.id)?.reviewContent ??
600
+ candidate.file.name,
601
+ triage,
602
+ });
603
+ markHistoricalItem(campaignBatch, {
604
+ itemId: candidate.id,
605
+ file: candidate.file.name,
606
+ ...(candidate.conversationId
607
+ ? { conversationId: candidate.conversationId }
608
+ : {}),
609
+ ...(candidate.title ? { title: candidate.title } : {}),
610
+ lastResult: "review",
611
+ terminal: true,
612
+ terminalStatus: "review",
613
+ triage,
614
+ nowIso,
615
+ });
616
+ results.push({
617
+ file: candidate.file.name,
618
+ ...(candidate.conversationId
619
+ ? { conversation_id: candidate.conversationId }
620
+ : {}),
621
+ ...(candidate.title ? { title: candidate.title } : {}),
622
+ status: "review",
623
+ lane: triage.lane,
624
+ });
625
+ reviewBacklogAfter = reviewQueueSize(dataDir);
626
+ if (reviewBacklogAfter >
627
+ 400) {
628
+ stopReason =
629
+ "Stop historical import. Review backlog exceeds 400 open items.";
630
+ reviewBacklogStopped = true;
631
+ process.stderr.write(` ${label}: stopping batch because review backlog is now ${reviewBacklogAfter}\n`);
632
+ break;
633
+ }
634
+ continue;
635
+ }
636
+ const upload = candidate.upload;
637
+ if (!upload) {
638
+ touchedFilesThisRun.add(candidate.file.name);
639
+ const error = candidate.kind === "file_fallback"
640
+ ? "parser_zero_conversations"
641
+ : "missing_promoted_payload";
642
+ markHistoricalItem(campaignBatch, {
643
+ itemId: candidate.id,
644
+ file: candidate.file.name,
645
+ ...(candidate.conversationId
646
+ ? { conversationId: candidate.conversationId }
647
+ : {}),
648
+ ...(candidate.title ? { title: candidate.title } : {}),
649
+ lastResult: "error",
650
+ terminal: false,
651
+ triage,
652
+ nowIso,
653
+ });
654
+ results.push({
655
+ file: candidate.file.name,
656
+ ...(candidate.conversationId
657
+ ? { conversation_id: candidate.conversationId }
658
+ : {}),
659
+ ...(candidate.title ? { title: candidate.title } : {}),
660
+ status: "error",
661
+ lane: triage.lane,
662
+ error,
663
+ });
664
+ process.stderr.write(` Error: ${error} for ${label}\n`);
665
+ continue;
666
+ }
667
+ process.stderr.write(` Promoting ${label} into /ingest\n`);
668
+ try {
669
+ if (!client) {
670
+ throw new Error("Missing HTTP client for historical promotion");
671
+ }
672
+ const form = new FormData();
673
+ form.append("source", source);
674
+ // Fastify's multipart parser validates fields available at the
675
+ // moment it encounters the file part. Keep the historical
676
+ // idempotency key ahead of the file so larger uploads don't
677
+ // lose it and fail the internal override route validation.
678
+ form.append("idempotency_key", candidate.kind === "conversation" && candidate.conversationId
679
+ ? historicalConversationKey(source, candidate.conversationId)
680
+ : `backfill:${batchName}:${candidate.file.name}`);
681
+ form.append("file", new Blob([new Uint8Array(upload.fileData)]), upload.filename);
682
+ const { data } = await client.postMultipart(historicalIngestEndpointPath(extractionProviderOverride), form, historicalInternalRouteOptions(dataDir, extractionProviderOverride));
683
+ const resp = data;
684
+ if (resp.duplicate) {
685
+ touchedFilesThisRun.add(candidate.file.name);
686
+ markHistoricalItem(campaignBatch, {
687
+ itemId: candidate.id,
688
+ file: candidate.file.name,
689
+ ...(candidate.conversationId
690
+ ? { conversationId: candidate.conversationId }
691
+ : {}),
692
+ ...(candidate.title ? { title: candidate.title } : {}),
693
+ lastResult: "duplicate",
694
+ terminal: true,
695
+ terminalStatus: "promote_to_extract",
696
+ triage,
697
+ nowIso,
698
+ });
699
+ process.stderr.write(" Duplicate (already ingested)\n");
700
+ results.push({
701
+ file: candidate.file.name,
702
+ ...(candidate.conversationId
703
+ ? { conversation_id: candidate.conversationId }
704
+ : {}),
705
+ ...(candidate.title ? { title: candidate.title } : {}),
706
+ status: "duplicate",
707
+ lane: triage.lane,
708
+ });
709
+ continue;
710
+ }
711
+ const result = await pollJob({
712
+ get: client.get.bind(client),
713
+ jobId: resp.job_id,
714
+ });
715
+ if (isJobFailure(result.job, result.timedOut)) {
716
+ touchedFilesThisRun.add(candidate.file.name);
717
+ const error = result.job.error ??
718
+ (result.timedOut ? "timed out" : "unknown");
719
+ markHistoricalItem(campaignBatch, {
720
+ itemId: candidate.id,
721
+ file: candidate.file.name,
722
+ ...(candidate.conversationId
723
+ ? { conversationId: candidate.conversationId }
724
+ : {}),
725
+ ...(candidate.title ? { title: candidate.title } : {}),
726
+ lastResult: "failed",
727
+ terminal: false,
728
+ triage,
729
+ nowIso,
730
+ });
731
+ results.push({
732
+ file: candidate.file.name,
733
+ ...(candidate.conversationId
734
+ ? { conversation_id: candidate.conversationId }
735
+ : {}),
736
+ ...(candidate.title ? { title: candidate.title } : {}),
737
+ status: "failed",
738
+ lane: triage.lane,
739
+ error,
740
+ });
741
+ process.stderr.write(` Failed: ${error}\n`);
742
+ }
743
+ else {
744
+ touchedFilesThisRun.add(candidate.file.name);
745
+ markHistoricalItem(campaignBatch, {
746
+ itemId: candidate.id,
747
+ file: candidate.file.name,
748
+ ...(candidate.conversationId
749
+ ? { conversationId: candidate.conversationId }
750
+ : {}),
751
+ ...(candidate.title ? { title: candidate.title } : {}),
752
+ lastResult: "promote_to_extract",
753
+ terminal: true,
754
+ terminalStatus: "promote_to_extract",
755
+ triage,
756
+ nowIso,
757
+ });
758
+ results.push({
759
+ file: candidate.file.name,
760
+ ...(candidate.conversationId
761
+ ? { conversation_id: candidate.conversationId }
762
+ : {}),
763
+ ...(candidate.title ? { title: candidate.title } : {}),
764
+ status: "completed",
765
+ lane: triage.lane,
766
+ });
767
+ process.stderr.write(" Completed\n");
768
+ }
769
+ }
770
+ catch (err) {
771
+ touchedFilesThisRun.add(candidate.file.name);
772
+ const msg = err instanceof CliError
773
+ ? formatCliErrorForHistoricalBackfill(err)
774
+ : err instanceof Error
775
+ ? err.message
776
+ : String(err);
777
+ markHistoricalItem(campaignBatch, {
778
+ itemId: candidate.id,
779
+ file: candidate.file.name,
780
+ ...(candidate.conversationId
781
+ ? { conversationId: candidate.conversationId }
782
+ : {}),
783
+ ...(candidate.title ? { title: candidate.title } : {}),
784
+ lastResult: "error",
785
+ terminal: false,
786
+ triage,
787
+ nowIso,
788
+ });
789
+ results.push({
790
+ file: candidate.file.name,
791
+ ...(candidate.conversationId
792
+ ? { conversation_id: candidate.conversationId }
793
+ : {}),
794
+ ...(candidate.title ? { title: candidate.title } : {}),
795
+ status: "error",
796
+ lane: triage.lane,
797
+ error: msg,
798
+ });
799
+ process.stderr.write(` Error: ${msg}\n`);
800
+ }
801
+ }
802
+ finalizeHistoricalFiles(campaignBatch, nowIso);
803
+ reviewBacklogAfter = reviewQueueSize(dataDir);
804
+ addHistoricalRunBudgetUsage(campaignState, {
805
+ weekKey: budgetWindow.week_key,
806
+ dayKey: budgetWindow.day_key,
807
+ filesProcessed: touchedFilesThisRun.size,
808
+ triageSpendUsd,
809
+ });
810
+ budgetUsageCommitted = true;
811
+ const hasProcessingErrors = results.some((result) => result.status === "failed" || result.status === "error");
812
+ const operatorLimitedCompletion = limitedByOperator && !hasProcessingErrors;
813
+ const evaluation = evaluateHistoricalBatch({
814
+ state: campaignState,
815
+ batch: campaignBatch,
816
+ preflight,
817
+ now,
818
+ runFileCount,
819
+ reviewBacklogAfter,
820
+ triageSpendUsd,
821
+ reportPath,
822
+ processedFiles: filesForRun.map((file) => file.name),
823
+ results,
824
+ ...(!operatorLimitedCompletion &&
825
+ !reviewBacklogStopped &&
826
+ stopReason &&
827
+ !hasProcessingErrors
828
+ ? { incompleteStatus: "running" }
829
+ : {}),
830
+ });
831
+ const isResumableIncompleteStatus = evaluation.report.status === "running";
832
+ if (operatorLimitedCompletion &&
833
+ isResumableIncompleteStatus &&
834
+ evaluation.report.status === "running") {
835
+ evaluation.report.status = "completed_with_warnings";
836
+ evaluation.report.next_action = stopReason;
837
+ campaignBatch.status = "completed_with_warnings";
838
+ campaignBatch.next_action = stopReason;
839
+ recomputeHistoricalCampaignSummary(campaignState);
840
+ }
841
+ else if (reviewBacklogStopped &&
842
+ !hasProcessingErrors &&
843
+ isResumableIncompleteStatus &&
844
+ evaluation.report.status === "running") {
845
+ evaluation.report.status = "completed_with_warnings";
846
+ evaluation.report.next_action = stopReason;
847
+ campaignBatch.status = "completed_with_warnings";
848
+ campaignBatch.next_action = stopReason;
849
+ recomputeHistoricalCampaignSummary(campaignState);
850
+ }
851
+ else if (stopReason && !hasProcessingErrors && isResumableIncompleteStatus) {
852
+ evaluation.report.status = "running";
853
+ evaluation.report.next_action = stopReason;
854
+ campaignBatch.status = "running";
855
+ campaignBatch.next_action = stopReason;
856
+ recomputeHistoricalCampaignSummary(campaignState);
857
+ }
858
+ campaignBatch.report_path = reportPath;
859
+ campaignState.last_run_at = nowIso;
860
+ await writeHistoricalCampaignReport(reportPath, evaluation.report);
861
+ await writeHistoricalCampaignState(dataDir, campaignState);
862
+ if (globalOpts.json) {
863
+ process.stdout.write(JSON.stringify(evaluation.report, null, 2) + "\n");
864
+ }
865
+ else {
866
+ process.stdout.write(formatHistoricalCampaignSummary(evaluation.report));
867
+ }
868
+ if (campaignBatch.status === "failed" || campaignBatch.status === "quarantined") {
869
+ process.exitCode = 1;
870
+ }
871
+ }
872
+ catch (runErr) {
873
+ finalizeHistoricalFiles(campaignBatch, new Date().toISOString());
874
+ reviewBacklogAfter = reviewQueueSize(dataDir);
875
+ if (!budgetUsageCommitted && (triageSpendUsd > 0 || results.length > 0)) {
876
+ addHistoricalRunBudgetUsage(campaignState, {
877
+ weekKey: budgetWindow.week_key,
878
+ dayKey: budgetWindow.day_key,
879
+ filesProcessed: touchedFilesThisRun.size,
880
+ triageSpendUsd,
881
+ });
882
+ budgetUsageCommitted = true;
883
+ }
884
+ await persistHistoricalCampaignCheckpoint({
885
+ dataDir,
886
+ state: campaignState,
887
+ batch: campaignBatch,
888
+ preflight,
889
+ now,
890
+ reportPath,
891
+ runFileCount,
892
+ reviewBacklogAfter,
893
+ triageSpendUsd,
894
+ processedFiles: filesForRun.map((file) => file.name),
895
+ results,
896
+ status: "failed",
897
+ nextAction: "Batch failed mid-run. Resume only unresolved files after fixing the error.",
898
+ incompleteStatus: "failed",
899
+ });
900
+ throw runErr;
901
+ }
902
+ }
903
+ catch (err) {
904
+ process.stderr.write(`Error: ${err instanceof CliError ? err.message : err instanceof Error ? err.message : String(err)}\n`);
905
+ process.exitCode = 1;
906
+ }
907
+ });
908
+ }
909
+ function ensureHistoricalPendingFile(batch, fileName, nowIso) {
910
+ batch.files[fileName] ??= {
911
+ file: fileName,
912
+ terminal: false,
913
+ item_ids: [],
914
+ last_result: "pending",
915
+ attempts: 0,
916
+ updated_at: nowIso,
917
+ };
918
+ }
919
+ async function persistHistoricalCampaignCheckpoint(params) {
920
+ let report;
921
+ params.batch.report_path = params.reportPath;
922
+ params.batch.next_action = params.nextAction;
923
+ if (params.status === "running") {
924
+ params.batch.status = "running";
925
+ report = snapshotHistoricalCampaignReport({
926
+ state: params.state,
927
+ batch: params.batch,
928
+ preflight: params.preflight,
929
+ now: params.now,
930
+ runFileCount: params.runFileCount,
931
+ reviewBacklogAfter: params.reviewBacklogAfter,
932
+ triageSpendUsd: params.triageSpendUsd,
933
+ processedFiles: params.processedFiles,
934
+ status: "running",
935
+ nextAction: params.nextAction,
936
+ nextBatchSize: currentHistoricalBatchLevel(params.batch.total_files),
937
+ results: params.results,
938
+ });
939
+ params.batch.counts = summarizeHistoricalBatchState(params.batch, params.preflight).counts;
940
+ params.batch.review_backlog_after = params.reviewBacklogAfter;
941
+ }
942
+ else {
943
+ const evaluation = evaluateHistoricalBatch({
944
+ state: params.state,
945
+ batch: params.batch,
946
+ preflight: params.preflight,
947
+ now: params.now,
948
+ runFileCount: params.runFileCount,
949
+ reviewBacklogAfter: params.reviewBacklogAfter,
950
+ triageSpendUsd: params.triageSpendUsd,
951
+ reportPath: params.reportPath,
952
+ processedFiles: params.processedFiles,
953
+ results: params.results,
954
+ ...(params.incompleteStatus
955
+ ? { incompleteStatus: params.incompleteStatus }
956
+ : {}),
957
+ });
958
+ report = evaluation.report;
959
+ report.status = params.status;
960
+ report.next_action = params.nextAction;
961
+ report.results = params.results;
962
+ params.batch.status = params.status;
963
+ params.batch.next_action = params.nextAction;
964
+ }
965
+ recomputeHistoricalCampaignSummary(params.state);
966
+ params.state.last_run_at = params.now.toISOString();
967
+ await writeHistoricalCampaignReport(params.reportPath, report);
968
+ await writeHistoricalCampaignState(params.dataDir, params.state);
969
+ return report;
970
+ }
971
+ function currentHistoricalBatchLevel(totalFiles) {
972
+ if (totalFiles <= 100)
973
+ return 100;
974
+ if (totalFiles <= 250)
975
+ return 250;
976
+ return 500;
977
+ }
978
+ function buildHistoricalTriageItem(file, source) {
979
+ const fileBuffer = fs.readFileSync(file.path);
980
+ const extracted = extractArchive(fileBuffer, file.name);
981
+ const parser = HISTORICAL_PARSERS[source];
982
+ if (!parser) {
983
+ throw new Error(`No historical triage parser for source: ${source}`);
984
+ }
985
+ const parsed = parser(extracted.files);
986
+ if (parsed.length === 0) {
987
+ const fallback = fileBuffer.toString("utf-8").slice(0, 12_000);
988
+ return [{
989
+ id: file.name,
990
+ kind: "file_fallback",
991
+ source,
992
+ file,
993
+ triageContent: fallback || `File: ${file.name}`,
994
+ reviewContent: fallback || `File: ${file.name}`,
995
+ conversationCount: 0,
996
+ }];
997
+ }
998
+ const rawByConversationId = collectRawConversations(source, extracted.files);
999
+ return parsed.map((conversation) => {
1000
+ const reviewContent = buildConversationReviewContent({
1001
+ source,
1002
+ fileName: file.name,
1003
+ conversationId: conversation.id,
1004
+ title: conversation.title,
1005
+ createdAt: conversation.createdAt,
1006
+ content: conversation.content,
1007
+ });
1008
+ return {
1009
+ id: historicalTriageItemId(file.name, conversation.id),
1010
+ kind: "conversation",
1011
+ source,
1012
+ file,
1013
+ conversationId: conversation.id,
1014
+ title: conversation.title,
1015
+ createdAt: conversation.createdAt,
1016
+ triageContent: reviewContent.slice(0, 12_000),
1017
+ reviewContent,
1018
+ conversationCount: 1,
1019
+ ...(rawByConversationId.has(conversation.id)
1020
+ ? {
1021
+ upload: buildSyntheticConversationUpload(source, conversation.id, rawByConversationId.get(conversation.id)),
1022
+ }
1023
+ : {}),
1024
+ };
1025
+ });
1026
+ }
1027
+ async function runHistoricalTriage(params) {
1028
+ const endpointPath = historicalTriageEndpointPath(params.providerOverride);
1029
+ const body = {
1030
+ items: params.items.map((item) => ({
1031
+ id: item.id,
1032
+ content: item.triageContent,
1033
+ source: item.source,
1034
+ })),
1035
+ idempotency_key: historicalTriageIdempotencyKey(params.batchDir, params.items, params.providerOverride),
1036
+ };
1037
+ const { data } = await params.client.post(endpointPath, body, historicalInternalRouteOptions(params.dataDir, params.providerOverride));
1038
+ const resp = data;
1039
+ const triageJob = await pollJob({
1040
+ get: params.client.get.bind(params.client),
1041
+ jobId: resp.job_id,
1042
+ });
1043
+ if (isJobFailure(triageJob.job, triageJob.timedOut)) {
1044
+ throw new Error(triageJob.job.error ??
1045
+ (triageJob.timedOut ? "triage timed out" : "triage failed"));
1046
+ }
1047
+ const resultsPath = path.join(params.dataDir, "triage-results", `${resp.job_id}.json`);
1048
+ if (!fs.existsSync(resultsPath)) {
1049
+ throw new Error(`Missing triage results file: ${resultsPath}`);
1050
+ }
1051
+ const output = JSON.parse(fs.readFileSync(resultsPath, "utf-8"));
1052
+ const artifact = {
1053
+ batch: path.basename(params.batchDir),
1054
+ source: params.items[0]?.source,
1055
+ generated_at: new Date().toISOString(),
1056
+ ...(output.metering_mode ? { metering_mode: output.metering_mode } : {}),
1057
+ ...(output.spend_note ? { spend_note: output.spend_note } : {}),
1058
+ ...(typeof output.spent_usd === "number"
1059
+ ? { spent_usd: output.spent_usd }
1060
+ : {}),
1061
+ items: params.items.map((item) => {
1062
+ const result = output.items.find((entry) => entry.id === item.id);
1063
+ return {
1064
+ item_id: item.id,
1065
+ file: item.file.name,
1066
+ kind: item.kind,
1067
+ ...(item.conversationId
1068
+ ? { conversation_id: item.conversationId }
1069
+ : {}),
1070
+ ...(item.title ? { title: item.title } : {}),
1071
+ ...(item.createdAt ? { created_at: item.createdAt } : {}),
1072
+ conversation_count: item.conversationCount,
1073
+ ...(result?.triage ? { triage: result.triage } : {}),
1074
+ ...(result?.error ? { error: result.error } : {}),
1075
+ };
1076
+ }),
1077
+ };
1078
+ fs.writeFileSync(path.join(params.batchDir, "triage.json"), JSON.stringify(artifact, null, 2) + "\n", "utf-8");
1079
+ return artifact;
1080
+ }
1081
+ function writePreflightSummary(params) {
1082
+ const { batchDir, manifest, preflight, stderr } = params;
1083
+ const allowed = SOURCE_EXTENSIONS[manifest.source].join(", ");
1084
+ stderr.write(`Preflight: ${manifest.batch_name}\n` +
1085
+ ` Path: ${batchDir}\n` +
1086
+ ` Source: ${manifest.source} (allowed extensions: ${allowed})\n` +
1087
+ ` Total files: ${preflight.total_files}\n` +
1088
+ ` Supported: ${preflight.supported_files}\n` +
1089
+ ` Unsupported: ${preflight.unsupported_files}\n` +
1090
+ ` Junk candidates: ${preflight.junk_candidates}\n` +
1091
+ ` Duplicate hashes: ${preflight.duplicate_candidates}\n` +
1092
+ ` Size (min/med/max/total bytes): ${preflight.size_distribution.min}/${preflight.size_distribution.median}/${preflight.size_distribution.max}/${preflight.size_distribution.total}\n` +
1093
+ ` Manifest: ${path.join(batchDir, "manifest.json")}\n` +
1094
+ ` Preflight: ${path.join(batchDir, "preflight.json")}\n`);
1095
+ const flagged = preflight.files.filter((f) => f.junk_markers.length > 0);
1096
+ if (flagged.length > 0) {
1097
+ stderr.write(`\nFlagged files:\n`);
1098
+ for (const f of flagged) {
1099
+ stderr.write(` - ${f.name}: ${f.junk_markers.join(", ")}\n`);
1100
+ }
1101
+ }
1102
+ }
1103
+ function historicalTriageItemId(fileName, conversationId) {
1104
+ return `${fileName}::${conversationId}`;
1105
+ }
1106
+ function historicalConversationKey(source, conversationId) {
1107
+ return `historical:${source}:${conversationId}`;
1108
+ }
1109
+ function formatHistoricalLabel(item) {
1110
+ if (item.kind === "conversation" && item.conversationId) {
1111
+ return `${item.file.name}#${item.conversationId}`;
1112
+ }
1113
+ return item.file.name;
1114
+ }
1115
+ function buildConversationReviewContent(params) {
1116
+ return [
1117
+ `Source: ${params.source}`,
1118
+ `File: ${params.fileName}`,
1119
+ `Conversation ID: ${params.conversationId}`,
1120
+ `Title: ${params.title}`,
1121
+ `Created: ${params.createdAt}`,
1122
+ "",
1123
+ params.content,
1124
+ ].join("\n");
1125
+ }
1126
+ function collectRawConversations(source, files) {
1127
+ switch (source) {
1128
+ case "chatgpt_web":
1129
+ return collectRawChatGptConversations(files);
1130
+ case "claude_web":
1131
+ return collectRawClaudeConversations(files);
1132
+ case "gemini_web":
1133
+ return collectRawGeminiConversations(files);
1134
+ case "grok_web":
1135
+ return collectRawGrokConversations(files);
1136
+ default:
1137
+ return new Map();
1138
+ }
1139
+ }
1140
+ function collectRawChatGptConversations(files) {
1141
+ const raw = new Map();
1142
+ for (const file of files) {
1143
+ if (!file.path.toLowerCase().endsWith(".json"))
1144
+ continue;
1145
+ let parsed;
1146
+ try {
1147
+ parsed = JSON.parse(file.data.toString("utf-8"));
1148
+ }
1149
+ catch {
1150
+ continue;
1151
+ }
1152
+ if (!Array.isArray(parsed))
1153
+ continue;
1154
+ for (const entry of parsed) {
1155
+ if (!entry || typeof entry !== "object")
1156
+ continue;
1157
+ const conversation = entry;
1158
+ const id = typeof conversation.conversation_id === "string"
1159
+ ? conversation.conversation_id
1160
+ : typeof conversation.id === "string"
1161
+ ? conversation.id
1162
+ : null;
1163
+ if (!id || raw.has(id))
1164
+ continue;
1165
+ raw.set(id, entry);
1166
+ }
1167
+ }
1168
+ return raw;
1169
+ }
1170
+ function collectRawClaudeConversations(files) {
1171
+ const raw = new Map();
1172
+ for (const file of files) {
1173
+ const lower = file.path.toLowerCase();
1174
+ if (!lower.endsWith(".json"))
1175
+ continue;
1176
+ const basename = lower.split("/").pop() ?? "";
1177
+ if (basename !== "conversations.json" &&
1178
+ !basename.startsWith("conversations")) {
1179
+ continue;
1180
+ }
1181
+ let parsed;
1182
+ try {
1183
+ parsed = JSON.parse(file.data.toString("utf-8"));
1184
+ }
1185
+ catch {
1186
+ continue;
1187
+ }
1188
+ if (!Array.isArray(parsed))
1189
+ continue;
1190
+ for (const entry of parsed) {
1191
+ if (!entry || typeof entry !== "object")
1192
+ continue;
1193
+ const conversation = entry;
1194
+ if (typeof conversation.uuid !== "string" || raw.has(conversation.uuid)) {
1195
+ continue;
1196
+ }
1197
+ raw.set(conversation.uuid, entry);
1198
+ }
1199
+ }
1200
+ return raw;
1201
+ }
1202
+ function collectRawGeminiConversations(files) {
1203
+ const raw = new Map();
1204
+ for (const file of files) {
1205
+ if (!file.path.toLowerCase().endsWith(".json"))
1206
+ continue;
1207
+ let parsed;
1208
+ try {
1209
+ parsed = JSON.parse(file.data.toString("utf-8"));
1210
+ }
1211
+ catch {
1212
+ continue;
1213
+ }
1214
+ if (!Array.isArray(parsed))
1215
+ continue;
1216
+ for (const entry of parsed) {
1217
+ if (!entry || typeof entry !== "object")
1218
+ continue;
1219
+ const conversation = entry;
1220
+ if (typeof conversation.conversation_id !== "string" ||
1221
+ conversation.source !== "gemini_web" ||
1222
+ conversation.operative_unit !== "gemini_activity_card" ||
1223
+ raw.has(conversation.conversation_id)) {
1224
+ continue;
1225
+ }
1226
+ raw.set(conversation.conversation_id, entry);
1227
+ }
1228
+ }
1229
+ return raw;
1230
+ }
1231
+ function collectRawGrokConversations(files) {
1232
+ const raw = new Map();
1233
+ for (const file of files) {
1234
+ if (!file.path.toLowerCase().endsWith(".json"))
1235
+ continue;
1236
+ let parsed;
1237
+ try {
1238
+ parsed = JSON.parse(file.data.toString("utf-8"));
1239
+ }
1240
+ catch {
1241
+ continue;
1242
+ }
1243
+ const conversations = parsed?.conversations;
1244
+ if (!Array.isArray(conversations))
1245
+ continue;
1246
+ for (const wrapper of conversations) {
1247
+ if (!wrapper || typeof wrapper !== "object")
1248
+ continue;
1249
+ const id = wrapper.conversation?.id;
1250
+ if (typeof id !== "string" || raw.has(id))
1251
+ continue;
1252
+ raw.set(id, wrapper);
1253
+ }
1254
+ }
1255
+ return raw;
1256
+ }
1257
+ function buildSyntheticConversationUpload(source, conversationId, rawConversation) {
1258
+ // Grok's parser expects the top-level export shape
1259
+ // `{ conversations: [...], projects, tasks, media_posts }`
1260
+ // rather than a bare array of conversations, so the synthetic
1261
+ // single-row upload has to preserve that wrapper.
1262
+ const payload = source === "grok_web"
1263
+ ? { conversations: [rawConversation], projects: [], tasks: [], media_posts: [] }
1264
+ : [rawConversation];
1265
+ return {
1266
+ filename: syntheticConversationFilename(source, conversationId),
1267
+ fileData: Buffer.from(JSON.stringify(payload, null, 2) + "\n", "utf-8"),
1268
+ };
1269
+ }
1270
+ function syntheticConversationFilename(source, conversationId) {
1271
+ const safeConversationId = conversationId.replace(/[^a-zA-Z0-9_-]/g, "_");
1272
+ if (source === "claude_web") {
1273
+ return `conversations-${safeConversationId}.json`;
1274
+ }
1275
+ return `conversation-${safeConversationId}.json`;
1276
+ }
1277
+ function historicalTriageIdempotencyKey(batchDir, items, providerOverride) {
1278
+ const digest = crypto
1279
+ .createHash("sha256")
1280
+ .update(JSON.stringify({
1281
+ batch_dir: path.resolve(batchDir),
1282
+ source: items[0]?.source,
1283
+ triage_provider: effectiveTriageProviderName(providerOverride),
1284
+ items: items
1285
+ .map((item) => ({
1286
+ id: item.id,
1287
+ file: item.file.name,
1288
+ file_path: item.file.path,
1289
+ content_hash: item.file.content_hash,
1290
+ conversation_id: item.conversationId,
1291
+ }))
1292
+ .sort((a, b) => a.id.localeCompare(b.id)),
1293
+ }))
1294
+ .digest("hex")
1295
+ .slice(0, 24);
1296
+ return `backfill-triage:${digest}`;
1297
+ }
1298
+ function resolveBackfillProviderOverride(value, flagName) {
1299
+ if (value === undefined)
1300
+ return undefined;
1301
+ if (!isOperatorOverrideProvider(value)) {
1302
+ throw new Error(`Unsupported ${flagName} "${String(value)}". Supported overrides: codex-cli`);
1303
+ }
1304
+ return value;
1305
+ }
1306
+ function historicalTriageEndpointPath(providerOverride) {
1307
+ return providerOverride === "codex-cli"
1308
+ ? INTERNAL_HISTORICAL_TRIAGE_CODEX_ROUTE
1309
+ : "/triage";
1310
+ }
1311
+ function historicalIngestEndpointPath(providerOverride) {
1312
+ return providerOverride === "codex-cli"
1313
+ ? INTERNAL_HISTORICAL_INGEST_CODEX_ROUTE
1314
+ : "/ingest";
1315
+ }
1316
+ function historicalInternalRouteOptions(dataDir, providerOverride) {
1317
+ if (providerOverride !== "codex-cli") {
1318
+ return undefined;
1319
+ }
1320
+ return {
1321
+ headers: {
1322
+ [HISTORICAL_BACKFILL_CAPABILITY_HEADER]: readRequiredHistoricalCapability(dataDir),
1323
+ },
1324
+ };
1325
+ }
1326
+ function readRequiredHistoricalCapability(dataDir) {
1327
+ const capability = readHistoricalBackfillCapability(dataDir);
1328
+ if (!capability) {
1329
+ throw new Error("Historical Codex override is not initialized on this machine. Start the daemon first so it can create the local backfill capability.");
1330
+ }
1331
+ return capability;
1332
+ }
1333
+ function resolveHistoricalRunTriageProviderName(candidates, triageById) {
1334
+ const providers = new Set(candidates
1335
+ .map((candidate) => triageById.get(candidate.id)?.triage?.provider)
1336
+ .filter((provider) => typeof provider === "string"));
1337
+ return providers.size === 1 ? [...providers][0] : undefined;
1338
+ }
1339
+ function formatCliErrorForHistoricalBackfill(err) {
1340
+ const bodyMessage = extractCliErrorBodyMessage(err.body);
1341
+ return bodyMessage ? `${err.message}: ${bodyMessage}` : err.message;
1342
+ }
1343
+ function extractCliErrorBodyMessage(body) {
1344
+ if (!body || typeof body !== "object")
1345
+ return undefined;
1346
+ const record = body;
1347
+ if (typeof record.message === "string" && record.message.trim()) {
1348
+ return record.message.trim();
1349
+ }
1350
+ if (Array.isArray(record.details)) {
1351
+ const detailMessages = record.details
1352
+ .map((detail) => {
1353
+ if (!detail || typeof detail !== "object")
1354
+ return null;
1355
+ const message = detail.message;
1356
+ return typeof message === "string" && message.trim()
1357
+ ? message.trim()
1358
+ : null;
1359
+ })
1360
+ .filter((message) => message !== null);
1361
+ if (detailMessages.length > 0) {
1362
+ return detailMessages.join("; ");
1363
+ }
1364
+ }
1365
+ return typeof record.error === "string" && record.error.trim()
1366
+ ? record.error.trim()
1367
+ : undefined;
1368
+ }
1369
+ function parsePositiveInteger(value) {
1370
+ const parsed = Number.parseInt(value, 10);
1371
+ if (!Number.isFinite(parsed) || parsed <= 0) {
1372
+ throw new Error(`Expected a positive integer, got "${value}"`);
1373
+ }
1374
+ return parsed;
1375
+ }
1376
+ //# sourceMappingURL=backfill.js.map