@getrift/rift 0.1.0-beta.2 → 0.1.0-beta.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (390) hide show
  1. package/README.md +35 -9
  2. package/dist/src/auth/keychain.d.ts +9 -0
  3. package/dist/src/auth/keychain.d.ts.map +1 -1
  4. package/dist/src/auth/keychain.js +37 -0
  5. package/dist/src/auth/keychain.js.map +1 -1
  6. package/dist/src/capture/auto-capture.d.ts +7 -0
  7. package/dist/src/capture/auto-capture.d.ts.map +1 -1
  8. package/dist/src/capture/auto-capture.js +82 -15
  9. package/dist/src/capture/auto-capture.js.map +1 -1
  10. package/dist/src/capture/auto-repair.d.ts +110 -0
  11. package/dist/src/capture/auto-repair.d.ts.map +1 -0
  12. package/dist/src/capture/auto-repair.js +269 -0
  13. package/dist/src/capture/auto-repair.js.map +1 -0
  14. package/dist/src/capture/codex-cli-triage-provider.d.ts.map +1 -1
  15. package/dist/src/capture/codex-cli-triage-provider.js +4 -3
  16. package/dist/src/capture/codex-cli-triage-provider.js.map +1 -1
  17. package/dist/src/capture/observability.d.ts +42 -0
  18. package/dist/src/capture/observability.d.ts.map +1 -1
  19. package/dist/src/capture/observability.js +45 -4
  20. package/dist/src/capture/observability.js.map +1 -1
  21. package/dist/src/capture/recover-quarantine.d.ts +260 -0
  22. package/dist/src/capture/recover-quarantine.d.ts.map +1 -0
  23. package/dist/src/capture/recover-quarantine.js +522 -0
  24. package/dist/src/capture/recover-quarantine.js.map +1 -0
  25. package/dist/src/cli/commands/backfill.d.ts.map +1 -1
  26. package/dist/src/cli/commands/backfill.js +5 -2
  27. package/dist/src/cli/commands/backfill.js.map +1 -1
  28. package/dist/src/cli/commands/capture-recover.d.ts +40 -0
  29. package/dist/src/cli/commands/capture-recover.d.ts.map +1 -0
  30. package/dist/src/cli/commands/capture-recover.js +184 -0
  31. package/dist/src/cli/commands/capture-recover.js.map +1 -0
  32. package/dist/src/cli/commands/capture.d.ts.map +1 -1
  33. package/dist/src/cli/commands/capture.js +96 -5
  34. package/dist/src/cli/commands/capture.js.map +1 -1
  35. package/dist/src/cli/commands/doctor.d.ts +6 -0
  36. package/dist/src/cli/commands/doctor.d.ts.map +1 -0
  37. package/dist/src/cli/commands/doctor.js +242 -0
  38. package/dist/src/cli/commands/doctor.js.map +1 -0
  39. package/dist/src/cli/commands/feedback.d.ts +12 -0
  40. package/dist/src/cli/commands/feedback.d.ts.map +1 -1
  41. package/dist/src/cli/commands/feedback.js +93 -4
  42. package/dist/src/cli/commands/feedback.js.map +1 -1
  43. package/dist/src/cli/commands/mcp-install.js +5 -2
  44. package/dist/src/cli/commands/mcp-install.js.map +1 -1
  45. package/dist/src/cli/commands/menubar.d.ts +80 -0
  46. package/dist/src/cli/commands/menubar.d.ts.map +1 -0
  47. package/dist/src/cli/commands/menubar.js +388 -0
  48. package/dist/src/cli/commands/menubar.js.map +1 -0
  49. package/dist/src/cli/commands/onboard.d.ts +143 -5
  50. package/dist/src/cli/commands/onboard.d.ts.map +1 -1
  51. package/dist/src/cli/commands/onboard.js +844 -188
  52. package/dist/src/cli/commands/onboard.js.map +1 -1
  53. package/dist/src/cli/commands/rebuild.d.ts.map +1 -1
  54. package/dist/src/cli/commands/rebuild.js +6 -3
  55. package/dist/src/cli/commands/rebuild.js.map +1 -1
  56. package/dist/src/cli/commands/reconcile.d.ts.map +1 -1
  57. package/dist/src/cli/commands/reconcile.js +12 -0
  58. package/dist/src/cli/commands/reconcile.js.map +1 -1
  59. package/dist/src/cli/commands/review.d.ts.map +1 -1
  60. package/dist/src/cli/commands/review.js +22 -7
  61. package/dist/src/cli/commands/review.js.map +1 -1
  62. package/dist/src/cli/commands/search.d.ts +2 -0
  63. package/dist/src/cli/commands/search.d.ts.map +1 -1
  64. package/dist/src/cli/commands/search.js +34 -4
  65. package/dist/src/cli/commands/search.js.map +1 -1
  66. package/dist/src/cli/commands/status.d.ts +9 -7
  67. package/dist/src/cli/commands/status.d.ts.map +1 -1
  68. package/dist/src/cli/commands/status.js +117 -12
  69. package/dist/src/cli/commands/status.js.map +1 -1
  70. package/dist/src/cli/commands/token-issue.d.ts.map +1 -1
  71. package/dist/src/cli/commands/token-issue.js +9 -1
  72. package/dist/src/cli/commands/token-issue.js.map +1 -1
  73. package/dist/src/cli/commands/triage.d.ts.map +1 -1
  74. package/dist/src/cli/commands/triage.js +7 -5
  75. package/dist/src/cli/commands/triage.js.map +1 -1
  76. package/dist/src/cli/commands/update.d.ts +80 -0
  77. package/dist/src/cli/commands/update.d.ts.map +1 -0
  78. package/dist/src/cli/commands/update.js +390 -0
  79. package/dist/src/cli/commands/update.js.map +1 -0
  80. package/dist/src/cli/default-config-path.d.ts +15 -0
  81. package/dist/src/cli/default-config-path.d.ts.map +1 -0
  82. package/dist/src/cli/default-config-path.js +27 -0
  83. package/dist/src/cli/default-config-path.js.map +1 -0
  84. package/dist/src/cli/feedback/feedback-config.d.ts +46 -0
  85. package/dist/src/cli/feedback/feedback-config.d.ts.map +1 -1
  86. package/dist/src/cli/feedback/feedback-config.js +130 -4
  87. package/dist/src/cli/feedback/feedback-config.js.map +1 -1
  88. package/dist/src/cli/feedback/feedback-history.d.ts +7 -0
  89. package/dist/src/cli/feedback/feedback-history.d.ts.map +1 -1
  90. package/dist/src/cli/feedback/feedback-history.js +39 -9
  91. package/dist/src/cli/feedback/feedback-history.js.map +1 -1
  92. package/dist/src/cli/feedback/feedback-payload.d.ts +22 -1
  93. package/dist/src/cli/feedback/feedback-payload.d.ts.map +1 -1
  94. package/dist/src/cli/feedback/feedback-payload.js.map +1 -1
  95. package/dist/src/cli/feedback/feedback-relay.d.ts +2 -2
  96. package/dist/src/cli/feedback/feedback-relay.d.ts.map +1 -1
  97. package/dist/src/cli/feedback/feedback-relay.js.map +1 -1
  98. package/dist/src/cli/feedback/invite.d.ts +17 -0
  99. package/dist/src/cli/feedback/invite.d.ts.map +1 -0
  100. package/dist/src/cli/feedback/invite.js +67 -0
  101. package/dist/src/cli/feedback/invite.js.map +1 -0
  102. package/dist/src/cli/feedback/relay-secret-store.d.ts +32 -0
  103. package/dist/src/cli/feedback/relay-secret-store.d.ts.map +1 -0
  104. package/dist/src/cli/feedback/relay-secret-store.js +137 -0
  105. package/dist/src/cli/feedback/relay-secret-store.js.map +1 -0
  106. package/dist/src/cli/http-client.d.ts +93 -1
  107. package/dist/src/cli/http-client.d.ts.map +1 -1
  108. package/dist/src/cli/http-client.js +254 -6
  109. package/dist/src/cli/http-client.js.map +1 -1
  110. package/dist/src/cli/index.d.ts.map +1 -1
  111. package/dist/src/cli/index.js +29 -6
  112. package/dist/src/cli/index.js.map +1 -1
  113. package/dist/src/cli/postinstall-menubar.d.ts +22 -0
  114. package/dist/src/cli/postinstall-menubar.d.ts.map +1 -0
  115. package/dist/src/cli/postinstall-menubar.js +53 -0
  116. package/dist/src/cli/postinstall-menubar.js.map +1 -0
  117. package/dist/src/cli/status/friend-header.d.ts +16 -1
  118. package/dist/src/cli/status/friend-header.d.ts.map +1 -1
  119. package/dist/src/cli/status/friend-header.js +354 -26
  120. package/dist/src/cli/status/friend-header.js.map +1 -1
  121. package/dist/src/cli/status/local-signals.d.ts +18 -0
  122. package/dist/src/cli/status/local-signals.d.ts.map +1 -1
  123. package/dist/src/cli/status/local-signals.js +29 -0
  124. package/dist/src/cli/status/local-signals.js.map +1 -1
  125. package/dist/src/cli/ui.d.ts +47 -0
  126. package/dist/src/cli/ui.d.ts.map +1 -0
  127. package/dist/src/cli/ui.js +166 -0
  128. package/dist/src/cli/ui.js.map +1 -0
  129. package/dist/src/config/schema.d.ts +79 -0
  130. package/dist/src/config/schema.d.ts.map +1 -1
  131. package/dist/src/config/schema.js +44 -0
  132. package/dist/src/config/schema.js.map +1 -1
  133. package/dist/src/diagnostics/codex-preflight.d.ts +33 -0
  134. package/dist/src/diagnostics/codex-preflight.d.ts.map +1 -0
  135. package/dist/src/diagnostics/codex-preflight.js +75 -0
  136. package/dist/src/diagnostics/codex-preflight.js.map +1 -0
  137. package/dist/src/diagnostics/doctor.d.ts +114 -0
  138. package/dist/src/diagnostics/doctor.d.ts.map +1 -0
  139. package/dist/src/diagnostics/doctor.js +352 -0
  140. package/dist/src/diagnostics/doctor.js.map +1 -0
  141. package/dist/src/diagnostics/notify.d.ts +90 -0
  142. package/dist/src/diagnostics/notify.d.ts.map +1 -0
  143. package/dist/src/diagnostics/notify.js +177 -0
  144. package/dist/src/diagnostics/notify.js.map +1 -0
  145. package/dist/src/diagnostics/repair-prompt.d.ts +49 -0
  146. package/dist/src/diagnostics/repair-prompt.d.ts.map +1 -0
  147. package/dist/src/diagnostics/repair-prompt.js +223 -0
  148. package/dist/src/diagnostics/repair-prompt.js.map +1 -0
  149. package/dist/src/ingestion/inbox-core/conversation-fingerprint.d.ts +2 -0
  150. package/dist/src/ingestion/inbox-core/conversation-fingerprint.d.ts.map +1 -0
  151. package/dist/src/ingestion/inbox-core/conversation-fingerprint.js +27 -0
  152. package/dist/src/ingestion/inbox-core/conversation-fingerprint.js.map +1 -0
  153. package/dist/src/ingestion/inbox-core/conversation-key.d.ts +2 -0
  154. package/dist/src/ingestion/inbox-core/conversation-key.d.ts.map +1 -0
  155. package/dist/src/ingestion/inbox-core/conversation-key.js +31 -0
  156. package/dist/src/ingestion/inbox-core/conversation-key.js.map +1 -0
  157. package/dist/src/ingestion/inbox-core/extensions.d.ts +3 -0
  158. package/dist/src/ingestion/inbox-core/extensions.d.ts.map +1 -0
  159. package/dist/src/ingestion/inbox-core/extensions.js +16 -0
  160. package/dist/src/ingestion/inbox-core/extensions.js.map +1 -0
  161. package/dist/src/ingestion/inbox-core/idempotency.d.ts +2 -0
  162. package/dist/src/ingestion/inbox-core/idempotency.d.ts.map +1 -0
  163. package/dist/src/ingestion/inbox-core/idempotency.js +22 -0
  164. package/dist/src/ingestion/inbox-core/idempotency.js.map +1 -0
  165. package/dist/src/ingestion/inbox-core/index.d.ts +20 -0
  166. package/dist/src/ingestion/inbox-core/index.d.ts.map +1 -0
  167. package/dist/src/ingestion/inbox-core/index.js +20 -0
  168. package/dist/src/ingestion/inbox-core/index.js.map +1 -0
  169. package/dist/src/ingestion/inbox-core/source-detection.d.ts +2 -0
  170. package/dist/src/ingestion/inbox-core/source-detection.d.ts.map +1 -0
  171. package/dist/src/ingestion/inbox-core/source-detection.js +23 -0
  172. package/dist/src/ingestion/inbox-core/source-detection.js.map +1 -0
  173. package/dist/src/ingestion/inbox-core/source-sniffer.d.ts +11 -0
  174. package/dist/src/ingestion/inbox-core/source-sniffer.d.ts.map +1 -0
  175. package/dist/src/ingestion/inbox-core/source-sniffer.js +69 -0
  176. package/dist/src/ingestion/inbox-core/source-sniffer.js.map +1 -0
  177. package/dist/src/ingestion/inbox-core/zip-sniffer.d.ts +70 -0
  178. package/dist/src/ingestion/inbox-core/zip-sniffer.d.ts.map +1 -0
  179. package/dist/src/ingestion/inbox-core/zip-sniffer.js +161 -0
  180. package/dist/src/ingestion/inbox-core/zip-sniffer.js.map +1 -0
  181. package/dist/src/ingestion/inbox-watcher.d.ts.map +1 -1
  182. package/dist/src/ingestion/inbox-watcher.js +34 -50
  183. package/dist/src/ingestion/inbox-watcher.js.map +1 -1
  184. package/dist/src/ingestion/indexer.d.ts +7 -0
  185. package/dist/src/ingestion/indexer.d.ts.map +1 -1
  186. package/dist/src/ingestion/indexer.js +36 -2
  187. package/dist/src/ingestion/indexer.js.map +1 -1
  188. package/dist/src/ingestion/metadata-extraction.d.ts +8 -5
  189. package/dist/src/ingestion/metadata-extraction.d.ts.map +1 -1
  190. package/dist/src/ingestion/metadata-extraction.js +24 -5
  191. package/dist/src/ingestion/metadata-extraction.js.map +1 -1
  192. package/dist/src/ingestion/skip-quarantine.d.ts +10 -0
  193. package/dist/src/ingestion/skip-quarantine.d.ts.map +1 -0
  194. package/dist/src/ingestion/skip-quarantine.js +35 -0
  195. package/dist/src/ingestion/skip-quarantine.js.map +1 -0
  196. package/dist/src/jobs/handlers/compact.d.ts.map +1 -1
  197. package/dist/src/jobs/handlers/compact.js +30 -4
  198. package/dist/src/jobs/handlers/compact.js.map +1 -1
  199. package/dist/src/jobs/handlers/dedupe-conversations.d.ts +134 -0
  200. package/dist/src/jobs/handlers/dedupe-conversations.d.ts.map +1 -0
  201. package/dist/src/jobs/handlers/dedupe-conversations.js +371 -0
  202. package/dist/src/jobs/handlers/dedupe-conversations.js.map +1 -0
  203. package/dist/src/jobs/handlers/ingest.d.ts.map +1 -1
  204. package/dist/src/jobs/handlers/ingest.js +295 -41
  205. package/dist/src/jobs/handlers/ingest.js.map +1 -1
  206. package/dist/src/jobs/handlers/reconcile.d.ts +28 -0
  207. package/dist/src/jobs/handlers/reconcile.d.ts.map +1 -1
  208. package/dist/src/jobs/handlers/reconcile.js +145 -19
  209. package/dist/src/jobs/handlers/reconcile.js.map +1 -1
  210. package/dist/src/jobs/handlers/reindex.d.ts.map +1 -1
  211. package/dist/src/jobs/handlers/reindex.js +13 -2
  212. package/dist/src/jobs/handlers/reindex.js.map +1 -1
  213. package/dist/src/jobs/handlers/save.d.ts.map +1 -1
  214. package/dist/src/jobs/handlers/save.js +57 -3
  215. package/dist/src/jobs/handlers/save.js.map +1 -1
  216. package/dist/src/jobs/queue.d.ts +51 -1
  217. package/dist/src/jobs/queue.d.ts.map +1 -1
  218. package/dist/src/jobs/queue.js +466 -26
  219. package/dist/src/jobs/queue.js.map +1 -1
  220. package/dist/src/jobs/worker-entry.d.ts.map +1 -1
  221. package/dist/src/jobs/worker-entry.js +35 -7
  222. package/dist/src/jobs/worker-entry.js.map +1 -1
  223. package/dist/src/jobs/worker-process.d.ts +11 -0
  224. package/dist/src/jobs/worker-process.d.ts.map +1 -1
  225. package/dist/src/jobs/worker-process.js +37 -4
  226. package/dist/src/jobs/worker-process.js.map +1 -1
  227. package/dist/src/main.js +199 -46
  228. package/dist/src/main.js.map +1 -1
  229. package/dist/src/mcp/errors.d.ts.map +1 -1
  230. package/dist/src/mcp/errors.js +20 -1
  231. package/dist/src/mcp/errors.js.map +1 -1
  232. package/dist/src/mcp/server.d.ts.map +1 -1
  233. package/dist/src/mcp/server.js +43 -3
  234. package/dist/src/mcp/server.js.map +1 -1
  235. package/dist/src/mcp/tools/context-pack.d.ts.map +1 -1
  236. package/dist/src/mcp/tools/context-pack.js +164 -23
  237. package/dist/src/mcp/tools/context-pack.js.map +1 -1
  238. package/dist/src/mcp/tools/search.d.ts +6 -2
  239. package/dist/src/mcp/tools/search.d.ts.map +1 -1
  240. package/dist/src/mcp/tools/search.js +35 -4
  241. package/dist/src/mcp/tools/search.js.map +1 -1
  242. package/dist/src/observability/embedding-events.d.ts +52 -0
  243. package/dist/src/observability/embedding-events.d.ts.map +1 -0
  244. package/dist/src/observability/embedding-events.js +149 -0
  245. package/dist/src/observability/embedding-events.js.map +1 -0
  246. package/dist/src/observability/index-events.d.ts +70 -0
  247. package/dist/src/observability/index-events.d.ts.map +1 -0
  248. package/dist/src/observability/index-events.js +148 -0
  249. package/dist/src/observability/index-events.js.map +1 -0
  250. package/dist/src/observability/onboarding-metric.d.ts +131 -0
  251. package/dist/src/observability/onboarding-metric.d.ts.map +1 -0
  252. package/dist/src/observability/onboarding-metric.js +351 -0
  253. package/dist/src/observability/onboarding-metric.js.map +1 -0
  254. package/dist/src/observability/tool-usage-stats.d.ts +77 -4
  255. package/dist/src/observability/tool-usage-stats.d.ts.map +1 -1
  256. package/dist/src/observability/tool-usage-stats.js +112 -32
  257. package/dist/src/observability/tool-usage-stats.js.map +1 -1
  258. package/dist/src/observability/tool-usage.d.ts +100 -7
  259. package/dist/src/observability/tool-usage.d.ts.map +1 -1
  260. package/dist/src/observability/tool-usage.js +196 -33
  261. package/dist/src/observability/tool-usage.js.map +1 -1
  262. package/dist/src/observability/version-check.d.ts +71 -0
  263. package/dist/src/observability/version-check.d.ts.map +1 -0
  264. package/dist/src/observability/version-check.js +198 -0
  265. package/dist/src/observability/version-check.js.map +1 -0
  266. package/dist/src/providers/basic-metadata-extraction.d.ts +60 -0
  267. package/dist/src/providers/basic-metadata-extraction.d.ts.map +1 -0
  268. package/dist/src/providers/basic-metadata-extraction.js +114 -0
  269. package/dist/src/providers/basic-metadata-extraction.js.map +1 -0
  270. package/dist/src/providers/codex-cli-metadata-extraction.d.ts +1 -0
  271. package/dist/src/providers/codex-cli-metadata-extraction.d.ts.map +1 -1
  272. package/dist/src/providers/codex-cli-metadata-extraction.js +6 -2
  273. package/dist/src/providers/codex-cli-metadata-extraction.js.map +1 -1
  274. package/dist/src/providers/codex-cli-model.d.ts +61 -0
  275. package/dist/src/providers/codex-cli-model.d.ts.map +1 -0
  276. package/dist/src/providers/codex-cli-model.js +194 -0
  277. package/dist/src/providers/codex-cli-model.js.map +1 -0
  278. package/dist/src/providers/codex-cli-runner.d.ts +39 -0
  279. package/dist/src/providers/codex-cli-runner.d.ts.map +1 -1
  280. package/dist/src/providers/codex-cli-runner.js +234 -48
  281. package/dist/src/providers/codex-cli-runner.js.map +1 -1
  282. package/dist/src/providers/conversation-generation.d.ts.map +1 -1
  283. package/dist/src/providers/conversation-generation.js +43 -6
  284. package/dist/src/providers/conversation-generation.js.map +1 -1
  285. package/dist/src/providers/ollama-embed.d.ts +2 -1
  286. package/dist/src/providers/ollama-embed.d.ts.map +1 -1
  287. package/dist/src/providers/ollama-embed.js +1 -0
  288. package/dist/src/providers/ollama-embed.js.map +1 -1
  289. package/dist/src/providers/openai-metadata-extraction.d.ts +3 -3
  290. package/dist/src/providers/openai-metadata-extraction.d.ts.map +1 -1
  291. package/dist/src/providers/openai-metadata-extraction.js +18 -3
  292. package/dist/src/providers/openai-metadata-extraction.js.map +1 -1
  293. package/dist/src/providers/placeholder-embed.d.ts +56 -0
  294. package/dist/src/providers/placeholder-embed.d.ts.map +1 -0
  295. package/dist/src/providers/placeholder-embed.js +64 -0
  296. package/dist/src/providers/placeholder-embed.js.map +1 -0
  297. package/dist/src/providers/stub.d.ts +2 -0
  298. package/dist/src/providers/stub.d.ts.map +1 -1
  299. package/dist/src/providers/stub.js +2 -0
  300. package/dist/src/providers/stub.js.map +1 -1
  301. package/dist/src/providers/types.d.ts +11 -0
  302. package/dist/src/providers/types.d.ts.map +1 -1
  303. package/dist/src/providers/voyage.d.ts +2 -1
  304. package/dist/src/providers/voyage.d.ts.map +1 -1
  305. package/dist/src/providers/voyage.js +1 -0
  306. package/dist/src/providers/voyage.js.map +1 -1
  307. package/dist/src/retrieval/compact.d.ts +116 -2
  308. package/dist/src/retrieval/compact.d.ts.map +1 -1
  309. package/dist/src/retrieval/compact.js +158 -5
  310. package/dist/src/retrieval/compact.js.map +1 -1
  311. package/dist/src/retrieval/context-pack.d.ts +114 -0
  312. package/dist/src/retrieval/context-pack.d.ts.map +1 -1
  313. package/dist/src/retrieval/context-pack.js +292 -8
  314. package/dist/src/retrieval/context-pack.js.map +1 -1
  315. package/dist/src/retrieval/current-truth.d.ts +360 -0
  316. package/dist/src/retrieval/current-truth.d.ts.map +1 -0
  317. package/dist/src/retrieval/current-truth.js +766 -0
  318. package/dist/src/retrieval/current-truth.js.map +1 -0
  319. package/dist/src/retrieval/git-state.d.ts +53 -0
  320. package/dist/src/retrieval/git-state.d.ts.map +1 -0
  321. package/dist/src/retrieval/git-state.js +174 -0
  322. package/dist/src/retrieval/git-state.js.map +1 -0
  323. package/dist/src/retrieval/lexical.d.ts.map +1 -1
  324. package/dist/src/retrieval/lexical.js +19 -3
  325. package/dist/src/retrieval/lexical.js.map +1 -1
  326. package/dist/src/retrieval/locator-boost.d.ts +37 -0
  327. package/dist/src/retrieval/locator-boost.d.ts.map +1 -0
  328. package/dist/src/retrieval/locator-boost.js +129 -0
  329. package/dist/src/retrieval/locator-boost.js.map +1 -0
  330. package/dist/src/retrieval/report-demotion.d.ts +46 -0
  331. package/dist/src/retrieval/report-demotion.d.ts.map +1 -0
  332. package/dist/src/retrieval/report-demotion.js +169 -0
  333. package/dist/src/retrieval/report-demotion.js.map +1 -0
  334. package/dist/src/retrieval/vector.d.ts.map +1 -1
  335. package/dist/src/retrieval/vector.js +11 -2
  336. package/dist/src/retrieval/vector.js.map +1 -1
  337. package/dist/src/server/app.d.ts.map +1 -1
  338. package/dist/src/server/app.js +92 -11
  339. package/dist/src/server/app.js.map +1 -1
  340. package/dist/src/server/routes/compact.d.ts.map +1 -1
  341. package/dist/src/server/routes/compact.js +4 -1
  342. package/dist/src/server/routes/compact.js.map +1 -1
  343. package/dist/src/server/routes/context.d.ts +1 -1
  344. package/dist/src/server/routes/context.d.ts.map +1 -1
  345. package/dist/src/server/routes/context.js +2 -1
  346. package/dist/src/server/routes/context.js.map +1 -1
  347. package/dist/src/server/routes/conversations-search.d.ts.map +1 -1
  348. package/dist/src/server/routes/conversations-search.js +28 -3
  349. package/dist/src/server/routes/conversations-search.js.map +1 -1
  350. package/dist/src/server/routes/enqueue.d.ts +11 -0
  351. package/dist/src/server/routes/enqueue.d.ts.map +1 -0
  352. package/dist/src/server/routes/enqueue.js +17 -0
  353. package/dist/src/server/routes/enqueue.js.map +1 -0
  354. package/dist/src/server/routes/friend-status.d.ts +339 -3
  355. package/dist/src/server/routes/friend-status.d.ts.map +1 -1
  356. package/dist/src/server/routes/friend-status.js +447 -13
  357. package/dist/src/server/routes/friend-status.js.map +1 -1
  358. package/dist/src/server/routes/ingest.d.ts.map +1 -1
  359. package/dist/src/server/routes/ingest.js +5 -2
  360. package/dist/src/server/routes/ingest.js.map +1 -1
  361. package/dist/src/server/routes/mcp-usage.d.ts +5 -4
  362. package/dist/src/server/routes/mcp-usage.d.ts.map +1 -1
  363. package/dist/src/server/routes/mcp-usage.js.map +1 -1
  364. package/dist/src/server/routes/reconcile.d.ts.map +1 -1
  365. package/dist/src/server/routes/reconcile.js +20 -1
  366. package/dist/src/server/routes/reconcile.js.map +1 -1
  367. package/dist/src/server/routes/reindex.d.ts.map +1 -1
  368. package/dist/src/server/routes/reindex.js +4 -1
  369. package/dist/src/server/routes/reindex.js.map +1 -1
  370. package/dist/src/server/routes/save.d.ts.map +1 -1
  371. package/dist/src/server/routes/save.js +4 -1
  372. package/dist/src/server/routes/save.js.map +1 -1
  373. package/dist/src/server/routes/search.d.ts +1 -1
  374. package/dist/src/server/routes/search.d.ts.map +1 -1
  375. package/dist/src/server/routes/search.js +253 -29
  376. package/dist/src/server/routes/search.js.map +1 -1
  377. package/dist/src/server/routes/triage.d.ts.map +1 -1
  378. package/dist/src/server/routes/triage.js +4 -1
  379. package/dist/src/server/routes/triage.js.map +1 -1
  380. package/dist/src/storage/rebuild.d.ts +35 -1
  381. package/dist/src/storage/rebuild.d.ts.map +1 -1
  382. package/dist/src/storage/rebuild.js +288 -64
  383. package/dist/src/storage/rebuild.js.map +1 -1
  384. package/dist/src/storage/tables.d.ts +29 -0
  385. package/dist/src/storage/tables.d.ts.map +1 -1
  386. package/dist/src/storage/tables.js +32 -1
  387. package/dist/src/storage/tables.js.map +1 -1
  388. package/operator/swiftbar/render-menu.py +524 -0
  389. package/operator/swiftbar/rift.10s.sh +176 -0
  390. package/package.json +9 -3
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/ingestion/inbox-core/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AACH,OAAO,EAAE,mBAAmB,EAAE,MAAM,kBAAkB,CAAC;AACvD,OAAO,EAAE,uBAAuB,EAAE,MAAM,uBAAuB,CAAC;AAChE,OAAO,EAAE,8BAA8B,EAAE,MAAM,+BAA+B,CAAC;AAC/E,OAAO,EAAE,sBAAsB,EAAE,MAAM,uBAAuB,CAAC;AAC/D,OAAO,EACL,0BAA0B,EAC1B,yBAAyB,GAC1B,MAAM,iBAAiB,CAAC;AACzB,OAAO,EAAE,gBAAgB,EAAE,KAAK,eAAe,EAAE,MAAM,qBAAqB,CAAC;AAC7E,OAAO,EACL,gCAAgC,EAChC,2BAA2B,EAC3B,cAAc,GACf,MAAM,kBAAkB,CAAC"}
@@ -0,0 +1,20 @@
1
+ /**
2
+ * inbox-core — pure file-detection primitives for an inbox watcher.
3
+ *
4
+ * Carved as a seam so the same logic can power both Rift's
5
+ * inbox-watcher.ts (which hands off to the job queue + ingest route)
6
+ * and a future standalone "AI Chat Backup Inbox" shell (which hands
7
+ * off to a local archive folder, with optional Rift indexing).
8
+ *
9
+ * Invariant: this module has zero Rift internals — no Fastify, no
10
+ * LanceDB, no Voyage, no JobQueue, no config loader, no parser
11
+ * registry. Only `node:` stdlib. The adapter wires it up.
12
+ */
13
+ export { inboxIdempotencyKey } from "./idempotency.js";
14
+ export { stableConversationRowId } from "./conversation-key.js";
15
+ export { conversationContentFingerprint } from "./conversation-fingerprint.js";
16
+ export { detectSourceFromSubdir } from "./source-detection.js";
17
+ export { SUPPORTED_INBOX_EXTENSIONS, isSupportedInboxExtension, } from "./extensions.js";
18
+ export { sniffInboxSource } from "./source-sniffer.js";
19
+ export { readZipCentralDirectoryFilenames, sniffProviderFromZipEntries, sniffZipBuffer, } from "./zip-sniffer.js";
20
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../../src/ingestion/inbox-core/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AACH,OAAO,EAAE,mBAAmB,EAAE,MAAM,kBAAkB,CAAC;AACvD,OAAO,EAAE,uBAAuB,EAAE,MAAM,uBAAuB,CAAC;AAChE,OAAO,EAAE,8BAA8B,EAAE,MAAM,+BAA+B,CAAC;AAC/E,OAAO,EAAE,sBAAsB,EAAE,MAAM,uBAAuB,CAAC;AAC/D,OAAO,EACL,0BAA0B,EAC1B,yBAAyB,GAC1B,MAAM,iBAAiB,CAAC;AACzB,OAAO,EAAE,gBAAgB,EAAwB,MAAM,qBAAqB,CAAC;AAC7E,OAAO,EACL,gCAAgC,EAChC,2BAA2B,EAC3B,cAAc,GACf,MAAM,kBAAkB,CAAC"}
@@ -0,0 +1,2 @@
1
+ export declare function detectSourceFromSubdir(filePath: string, inboxDir: string, knownSources: readonly string[]): string | null;
2
+ //# sourceMappingURL=source-detection.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"source-detection.d.ts","sourceRoot":"","sources":["../../../../src/ingestion/inbox-core/source-detection.ts"],"names":[],"mappings":"AAaA,wBAAgB,sBAAsB,CACpC,QAAQ,EAAE,MAAM,EAChB,QAAQ,EAAE,MAAM,EAChB,YAAY,EAAE,SAAS,MAAM,EAAE,GAC9B,MAAM,GAAG,IAAI,CAWf"}
@@ -0,0 +1,23 @@
1
+ /**
2
+ * Subdirectory-based source detection for inbox drops.
3
+ *
4
+ * If `data/inbox/<source>/file.json` exists and `<source>` is in
5
+ * `knownSources`, the source is `<source>`. Otherwise returns null
6
+ * and the caller decides on a default.
7
+ *
8
+ * Pure: no filesystem access, no Rift coupling. `knownSources` is
9
+ * passed in so the standalone shell can supply its own provider list
10
+ * without dragging in `parsers/types.ts`.
11
+ */
12
+ import path from "node:path";
13
+ export function detectSourceFromSubdir(filePath, inboxDir, knownSources) {
14
+ const relative = path.relative(inboxDir, filePath);
15
+ const firstSegment = relative.split(path.sep)[0];
16
+ if (firstSegment &&
17
+ firstSegment !== path.basename(filePath) &&
18
+ knownSources.includes(firstSegment)) {
19
+ return firstSegment;
20
+ }
21
+ return null;
22
+ }
23
+ //# sourceMappingURL=source-detection.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"source-detection.js","sourceRoot":"","sources":["../../../../src/ingestion/inbox-core/source-detection.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AACH,OAAO,IAAI,MAAM,WAAW,CAAC;AAE7B,MAAM,UAAU,sBAAsB,CACpC,QAAgB,EAChB,QAAgB,EAChB,YAA+B;IAE/B,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;IACnD,MAAM,YAAY,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IACjD,IACE,YAAY;QACZ,YAAY,KAAK,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC;QACxC,YAAY,CAAC,QAAQ,CAAC,YAAY,CAAC,EACnC,CAAC;QACD,OAAO,YAAY,CAAC;IACtB,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC"}
@@ -0,0 +1,11 @@
1
+ export type SniffedProvider = "claude_web" | "grok_web" | "gemini_web";
2
+ /**
3
+ * Attempt to identify a non-ChatGPT provider for a file dropped in
4
+ * the inbox root. Returns null if no confident match — caller should
5
+ * fall back to its default (chatgpt_web in Rift's adapter).
6
+ *
7
+ * `data` may be undefined when the caller has only the filename
8
+ * (e.g., before reading the file). Filename-only heuristics still run.
9
+ */
10
+ export declare function sniffInboxSource(filename: string, data?: Buffer, peekBytes?: number): SniffedProvider | null;
11
+ //# sourceMappingURL=source-sniffer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"source-sniffer.d.ts","sourceRoot":"","sources":["../../../../src/ingestion/inbox-core/source-sniffer.ts"],"names":[],"mappings":"AAuBA,MAAM,MAAM,eAAe,GAAG,YAAY,GAAG,UAAU,GAAG,YAAY,CAAC;AAIvE;;;;;;;GAOG;AACH,wBAAgB,gBAAgB,CAC9B,QAAQ,EAAE,MAAM,EAChB,IAAI,CAAC,EAAE,MAAM,EACb,SAAS,GAAE,MAA2B,GACrC,eAAe,GAAG,IAAI,CAuCxB"}
@@ -0,0 +1,69 @@
1
+ /**
2
+ * Conservative content/filename sniffer for inbox drops.
3
+ *
4
+ * Purpose: when a file lands in the inbox root (no subdirectory hint),
5
+ * identify non-ChatGPT providers so they reach the right parser
6
+ * instead of silently failing under the ChatGPT default.
7
+ *
8
+ * Design: identify ONLY non-default providers (claude_web, grok_web,
9
+ * gemini_web). ChatGPT shapes are not matched here — the adapter's
10
+ * default fallback already handles them. Returning null means "no
11
+ * confident match; let the caller decide."
12
+ *
13
+ * Heuristics are cheap and ordered most-specific first:
14
+ * 1. Filename pattern (no I/O)
15
+ * 2. JSON content head substring match (bounded peek)
16
+ *
17
+ * Pure: no filesystem access, no Rift internals. The buffer is passed
18
+ * in by the caller — Rift's adapter reads with `fs`, a standalone
19
+ * shell would read however it likes.
20
+ */
21
+ import path from "node:path";
22
+ import { sniffZipBuffer } from "./zip-sniffer.js";
23
+ const DEFAULT_PEEK_BYTES = 64 * 1024;
24
+ /**
25
+ * Attempt to identify a non-ChatGPT provider for a file dropped in
26
+ * the inbox root. Returns null if no confident match — caller should
27
+ * fall back to its default (chatgpt_web in Rift's adapter).
28
+ *
29
+ * `data` may be undefined when the caller has only the filename
30
+ * (e.g., before reading the file). Filename-only heuristics still run.
31
+ */
32
+ export function sniffInboxSource(filename, data, peekBytes = DEFAULT_PEEK_BYTES) {
33
+ const fnameLower = path.basename(filename).toLowerCase();
34
+ const ext = path.extname(fnameLower);
35
+ // --- Filename hints (cheap, no I/O) ---
36
+ if (fnameLower.startsWith("prod-grok-backend"))
37
+ return "grok_web";
38
+ if (fnameLower.includes("claude"))
39
+ return "claude_web";
40
+ if (fnameLower.includes("takeout"))
41
+ return "gemini_web";
42
+ // --- Content hints ---
43
+ if (!data || data.length === 0)
44
+ return null;
45
+ // ZIPs: walk the central directory and match on marker filenames.
46
+ // We never decompress here — the parser layer handles extraction.
47
+ if (ext === ".zip") {
48
+ return sniffZipBuffer(data);
49
+ }
50
+ if (ext !== ".json")
51
+ return null;
52
+ const head = data.subarray(0, Math.min(peekBytes, data.length)).toString("utf-8");
53
+ // Claude exports always carry chat_messages arrays on every conversation.
54
+ if (head.includes('"chat_messages"'))
55
+ return "claude_web";
56
+ // Gemini activity cards carry timestamp_text or assistant_turns.
57
+ if (head.includes('"timestamp_text"') || head.includes('"assistant_turns"')) {
58
+ return "gemini_web";
59
+ }
60
+ // Grok exports nest responses[] inside each conversation. The
61
+ // combined presence of both top-level keys is what distinguishes
62
+ // them from the (rare) ChatGPT JSON that happens to mention
63
+ // "conversations" or "responses" in isolation.
64
+ if (head.includes('"conversations"') && head.includes('"responses"')) {
65
+ return "grok_web";
66
+ }
67
+ return null;
68
+ }
69
+ //# sourceMappingURL=source-sniffer.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"source-sniffer.js","sourceRoot":"","sources":["../../../../src/ingestion/inbox-core/source-sniffer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AACH,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AAIlD,MAAM,kBAAkB,GAAG,EAAE,GAAG,IAAI,CAAC;AAErC;;;;;;;GAOG;AACH,MAAM,UAAU,gBAAgB,CAC9B,QAAgB,EAChB,IAAa,EACb,YAAoB,kBAAkB;IAEtC,MAAM,UAAU,GAAG,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IACzD,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;IAErC,yCAAyC;IACzC,IAAI,UAAU,CAAC,UAAU,CAAC,mBAAmB,CAAC;QAAE,OAAO,UAAU,CAAC;IAClE,IAAI,UAAU,CAAC,QAAQ,CAAC,QAAQ,CAAC;QAAE,OAAO,YAAY,CAAC;IACvD,IAAI,UAAU,CAAC,QAAQ,CAAC,SAAS,CAAC;QAAE,OAAO,YAAY,CAAC;IAExD,wBAAwB;IACxB,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAE5C,kEAAkE;IAClE,kEAAkE;IAClE,IAAI,GAAG,KAAK,MAAM,EAAE,CAAC;QACnB,OAAO,cAAc,CAAC,IAAI,CAAC,CAAC;IAC9B,CAAC;IAED,IAAI,GAAG,KAAK,OAAO;QAAE,OAAO,IAAI,CAAC;IAEjC,MAAM,IAAI,GAAG,IAAI,CAAC,QAAQ,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;IAElF,0EAA0E;IAC1E,IAAI,IAAI,CAAC,QAAQ,CAAC,iBAAiB,CAAC;QAAE,OAAO,YAAY,CAAC;IAE1D,iEAAiE;IACjE,IAAI,IAAI,CAAC,QAAQ,CAAC,kBAAkB,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,mBAAmB,CAAC,EAAE,CAAC;QAC5E,OAAO,YAAY,CAAC;IACtB,CAAC;IAED,8DAA8D;IAC9D,iEAAiE;IACjE,4DAA4D;IAC5D,+CAA+C;IAC/C,IAAI,IAAI,CAAC,QAAQ,CAAC,iBAAiB,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;QACrE,OAAO,UAAU,CAAC;IACpB,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC"}
@@ -0,0 +1,70 @@
1
+ /**
2
+ * Minimal ZIP central-directory reader for inbox source-sniffing.
3
+ *
4
+ * We do NOT extract or decompress. We only walk the central directory
5
+ * to learn the names of the entries inside the archive, so we can
6
+ * decide which provider an inbox-dropped ZIP belongs to.
7
+ *
8
+ * Why hand-rolled (no adm-zip): inbox-core must stay free of Rift
9
+ * internals AND of npm dependencies — only `node:` stdlib. The
10
+ * adapter (inbox-watcher) and a future standalone shell both compose
11
+ * this against their own filesystem.
12
+ *
13
+ * Scope: the regular (non-ZIP64) End-Of-Central-Directory record is
14
+ * enough for personal-use chat exports (Claude / ChatGPT / Gemini /
15
+ * Grok exports are all well under 4 GB and have far fewer than 65535
16
+ * entries). Archives that look like ZIP64, are truncated, or have a
17
+ * corrupt EOCD return null; the caller falls back to filename hints
18
+ * or the chatgpt_web default.
19
+ *
20
+ * ZIP layout we rely on:
21
+ * - End of Central Directory (EOCD): 22 bytes + optional comment,
22
+ * near the end of the file, signature 0x06054b50.
23
+ * - Central Directory file headers: signature 0x02014b50, 46 bytes
24
+ * fixed + variable filename / extra / comment fields.
25
+ */
26
+ import type { SniffedProvider } from "./source-sniffer.js";
27
+ /**
28
+ * Read entry filenames from a ZIP's central directory.
29
+ *
30
+ * Returns null when:
31
+ * - buf is too small to contain a ZIP,
32
+ * - the EOCD signature is not found in the trailing 64 KiB,
33
+ * - the central directory pointers are out of range (truncated /
34
+ * corrupted archive),
35
+ * - the archive declares ZIP64 marker values (we don't follow the
36
+ * ZIP64 locator — out of scope for inbox sniffing),
37
+ * - any individual CD header signature mismatches.
38
+ *
39
+ * The list is in central-directory order, which is the order zip
40
+ * tools usually preserve. Filenames are decoded as UTF-8; ZIP entries
41
+ * default to CP437 if bit 11 of the general purpose flag is not set,
42
+ * but modern tools (Claude, Google Takeout, ChatGPT) all write UTF-8
43
+ * filenames for the marker files we look at, so we don't branch.
44
+ */
45
+ export declare function readZipCentralDirectoryFilenames(buf: Buffer, options?: {
46
+ maxEntries?: number;
47
+ }): string[] | null;
48
+ /**
49
+ * Identify a non-ChatGPT provider from the entry list of a ZIP's
50
+ * central directory. ChatGPT-shaped zips intentionally return null so
51
+ * the adapter's default (`chatgpt_web`) handles them.
52
+ *
53
+ * Markers:
54
+ * - Any entry path starts with `Takeout/` → gemini_web.
55
+ * Google Takeout ZIPs use this exact top-level prefix.
56
+ * - A root-level `projects.json` together with `conversations.json`
57
+ * and NO `chat.html` / `shared_conversations.json` → claude_web.
58
+ * ChatGPT exports contain conversations.json too but ship chat.html
59
+ * and shared_conversations.json at root and have never shipped a
60
+ * projects.json; Claude exports always ship projects.json at root.
61
+ * - Grok ZIPs are caught upstream by filename hint
62
+ * (`prod-grok-backend*.zip`), so we don't look for them here.
63
+ */
64
+ export declare function sniffProviderFromZipEntries(entries: ReadonlyArray<string>): SniffedProvider | null;
65
+ /**
66
+ * Convenience wrapper: read the CD and classify in one call. Returns
67
+ * null on a non-ZIP buffer, a corrupted CD, or an unclassified shape.
68
+ */
69
+ export declare function sniffZipBuffer(buf: Buffer): SniffedProvider | null;
70
+ //# sourceMappingURL=zip-sniffer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"zip-sniffer.d.ts","sourceRoot":"","sources":["../../../../src/ingestion/inbox-core/zip-sniffer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AAiB3D;;;;;;;;;;;;;;;;;GAiBG;AACH,wBAAgB,gCAAgC,CAC9C,GAAG,EAAE,MAAM,EACX,OAAO,CAAC,EAAE;IAAE,UAAU,CAAC,EAAE,MAAM,CAAA;CAAE,GAChC,MAAM,EAAE,GAAG,IAAI,CAmEjB;AAED;;;;;;;;;;;;;;;GAeG;AACH,wBAAgB,2BAA2B,CACzC,OAAO,EAAE,aAAa,CAAC,MAAM,CAAC,GAC7B,eAAe,GAAG,IAAI,CAuCxB;AAED;;;GAGG;AACH,wBAAgB,cAAc,CAAC,GAAG,EAAE,MAAM,GAAG,eAAe,GAAG,IAAI,CAIlE"}
@@ -0,0 +1,161 @@
1
+ const EOCD_SIGNATURE = 0x06054b50;
2
+ const CD_HEADER_SIGNATURE = 0x02014b50;
3
+ const EOCD_MIN_SIZE = 22;
4
+ const EOCD_MAX_COMMENT = 0xffff;
5
+ const EOCD_MAX_SCAN = EOCD_MIN_SIZE + EOCD_MAX_COMMENT;
6
+ const CD_HEADER_FIXED_SIZE = 46;
7
+ /**
8
+ * Cap how much of the central directory we read. Real chat exports
9
+ * are well under this; the cap guards against pathological archives
10
+ * (gzip-bomb-shaped CDs, untrusted inbox drops) without ever needing
11
+ * to extract data.
12
+ */
13
+ const DEFAULT_MAX_ENTRIES = 4096;
14
+ /**
15
+ * Read entry filenames from a ZIP's central directory.
16
+ *
17
+ * Returns null when:
18
+ * - buf is too small to contain a ZIP,
19
+ * - the EOCD signature is not found in the trailing 64 KiB,
20
+ * - the central directory pointers are out of range (truncated /
21
+ * corrupted archive),
22
+ * - the archive declares ZIP64 marker values (we don't follow the
23
+ * ZIP64 locator — out of scope for inbox sniffing),
24
+ * - any individual CD header signature mismatches.
25
+ *
26
+ * The list is in central-directory order, which is the order zip
27
+ * tools usually preserve. Filenames are decoded as UTF-8; ZIP entries
28
+ * default to CP437 if bit 11 of the general purpose flag is not set,
29
+ * but modern tools (Claude, Google Takeout, ChatGPT) all write UTF-8
30
+ * filenames for the marker files we look at, so we don't branch.
31
+ */
32
+ export function readZipCentralDirectoryFilenames(buf, options) {
33
+ if (!Buffer.isBuffer(buf) || buf.length < EOCD_MIN_SIZE)
34
+ return null;
35
+ const maxEntries = options?.maxEntries ?? DEFAULT_MAX_ENTRIES;
36
+ // --- Locate EOCD by scanning backwards from the end for the
37
+ // signature. The EOCD lives within the last 22 + 65535 bytes.
38
+ const scanStart = Math.max(0, buf.length - EOCD_MAX_SCAN);
39
+ let eocdOffset = -1;
40
+ for (let i = buf.length - EOCD_MIN_SIZE; i >= scanStart; i--) {
41
+ if (buf.readUInt32LE(i) === EOCD_SIGNATURE) {
42
+ eocdOffset = i;
43
+ break;
44
+ }
45
+ }
46
+ if (eocdOffset < 0)
47
+ return null;
48
+ // EOCD fields (little-endian):
49
+ // off 4: disk number (uint16)
50
+ // off 6: disk where CD starts (uint16)
51
+ // off 8: CD entries on this disk (uint16)
52
+ // off 10: total CD entries (uint16)
53
+ // off 12: CD size in bytes (uint32)
54
+ // off 16: CD offset from archive start (uint32)
55
+ // off 20: comment length (uint16)
56
+ const totalEntries = buf.readUInt16LE(eocdOffset + 10);
57
+ const cdSize = buf.readUInt32LE(eocdOffset + 12);
58
+ const cdOffset = buf.readUInt32LE(eocdOffset + 16);
59
+ // ZIP64 sentinel values — bail out rather than misread.
60
+ if (totalEntries === 0xffff || cdSize === 0xffffffff || cdOffset === 0xffffffff) {
61
+ return null;
62
+ }
63
+ if (cdOffset + cdSize > buf.length)
64
+ return null;
65
+ if (cdOffset >= buf.length)
66
+ return null;
67
+ if (totalEntries === 0)
68
+ return [];
69
+ // --- Walk CD entries.
70
+ const names = [];
71
+ let cursor = cdOffset;
72
+ const cdEnd = cdOffset + cdSize;
73
+ const cap = Math.min(totalEntries, maxEntries);
74
+ for (let i = 0; i < cap; i++) {
75
+ if (cursor + CD_HEADER_FIXED_SIZE > cdEnd)
76
+ return null;
77
+ if (buf.readUInt32LE(cursor) !== CD_HEADER_SIGNATURE)
78
+ return null;
79
+ // CD header field offsets relative to cursor:
80
+ // off 28: filename length (uint16)
81
+ // off 30: extra field length (uint16)
82
+ // off 32: file comment length (uint16)
83
+ const filenameLen = buf.readUInt16LE(cursor + 28);
84
+ const extraLen = buf.readUInt16LE(cursor + 30);
85
+ const commentLen = buf.readUInt16LE(cursor + 32);
86
+ const filenameStart = cursor + CD_HEADER_FIXED_SIZE;
87
+ const filenameEnd = filenameStart + filenameLen;
88
+ if (filenameEnd > cdEnd)
89
+ return null;
90
+ const filename = buf.toString("utf-8", filenameStart, filenameEnd);
91
+ names.push(filename);
92
+ cursor = filenameEnd + extraLen + commentLen;
93
+ }
94
+ return names;
95
+ }
96
+ /**
97
+ * Identify a non-ChatGPT provider from the entry list of a ZIP's
98
+ * central directory. ChatGPT-shaped zips intentionally return null so
99
+ * the adapter's default (`chatgpt_web`) handles them.
100
+ *
101
+ * Markers:
102
+ * - Any entry path starts with `Takeout/` → gemini_web.
103
+ * Google Takeout ZIPs use this exact top-level prefix.
104
+ * - A root-level `projects.json` together with `conversations.json`
105
+ * and NO `chat.html` / `shared_conversations.json` → claude_web.
106
+ * ChatGPT exports contain conversations.json too but ship chat.html
107
+ * and shared_conversations.json at root and have never shipped a
108
+ * projects.json; Claude exports always ship projects.json at root.
109
+ * - Grok ZIPs are caught upstream by filename hint
110
+ * (`prod-grok-backend*.zip`), so we don't look for them here.
111
+ */
112
+ export function sniffProviderFromZipEntries(entries) {
113
+ if (entries.length === 0)
114
+ return null;
115
+ let hasTakeoutPrefix = false;
116
+ let hasRootConversations = false;
117
+ let hasRootProjects = false;
118
+ let hasChatHtml = false;
119
+ let hasSharedConversations = false;
120
+ for (const raw of entries) {
121
+ const path = raw.replace(/^\.\//, "");
122
+ if (path.startsWith("Takeout/")) {
123
+ hasTakeoutPrefix = true;
124
+ // Keep scanning — a Takeout ZIP can legitimately co-ship root
125
+ // files, but the prefix is enough to commit to gemini_web.
126
+ // Short-circuit safe: nothing else will override.
127
+ break;
128
+ }
129
+ if (!path.includes("/")) {
130
+ const lower = path.toLowerCase();
131
+ if (lower === "conversations.json")
132
+ hasRootConversations = true;
133
+ else if (lower === "projects.json")
134
+ hasRootProjects = true;
135
+ else if (lower === "chat.html")
136
+ hasChatHtml = true;
137
+ else if (lower === "shared_conversations.json")
138
+ hasSharedConversations = true;
139
+ }
140
+ }
141
+ if (hasTakeoutPrefix)
142
+ return "gemini_web";
143
+ if (hasRootProjects &&
144
+ hasRootConversations &&
145
+ !hasChatHtml &&
146
+ !hasSharedConversations) {
147
+ return "claude_web";
148
+ }
149
+ return null;
150
+ }
151
+ /**
152
+ * Convenience wrapper: read the CD and classify in one call. Returns
153
+ * null on a non-ZIP buffer, a corrupted CD, or an unclassified shape.
154
+ */
155
+ export function sniffZipBuffer(buf) {
156
+ const entries = readZipCentralDirectoryFilenames(buf);
157
+ if (!entries)
158
+ return null;
159
+ return sniffProviderFromZipEntries(entries);
160
+ }
161
+ //# sourceMappingURL=zip-sniffer.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"zip-sniffer.js","sourceRoot":"","sources":["../../../../src/ingestion/inbox-core/zip-sniffer.ts"],"names":[],"mappings":"AA2BA,MAAM,cAAc,GAAG,UAAU,CAAC;AAClC,MAAM,mBAAmB,GAAG,UAAU,CAAC;AACvC,MAAM,aAAa,GAAG,EAAE,CAAC;AACzB,MAAM,gBAAgB,GAAG,MAAM,CAAC;AAChC,MAAM,aAAa,GAAG,aAAa,GAAG,gBAAgB,CAAC;AACvD,MAAM,oBAAoB,GAAG,EAAE,CAAC;AAEhC;;;;;GAKG;AACH,MAAM,mBAAmB,GAAG,IAAI,CAAC;AAEjC;;;;;;;;;;;;;;;;;GAiBG;AACH,MAAM,UAAU,gCAAgC,CAC9C,GAAW,EACX,OAAiC;IAEjC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,MAAM,GAAG,aAAa;QAAE,OAAO,IAAI,CAAC;IAErE,MAAM,UAAU,GAAG,OAAO,EAAE,UAAU,IAAI,mBAAmB,CAAC;IAE9D,6DAA6D;IAC7D,kEAAkE;IAClE,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,GAAG,CAAC,MAAM,GAAG,aAAa,CAAC,CAAC;IAC1D,IAAI,UAAU,GAAG,CAAC,CAAC,CAAC;IACpB,KAAK,IAAI,CAAC,GAAG,GAAG,CAAC,MAAM,GAAG,aAAa,EAAE,CAAC,IAAI,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;QAC7D,IAAI,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,KAAK,cAAc,EAAE,CAAC;YAC3C,UAAU,GAAG,CAAC,CAAC;YACf,MAAM;QACR,CAAC;IACH,CAAC;IACD,IAAI,UAAU,GAAG,CAAC;QAAE,OAAO,IAAI,CAAC;IAEhC,+BAA+B;IAC/B,iCAAiC;IACjC,0CAA0C;IAC1C,6CAA6C;IAC7C,sCAAsC;IACtC,sCAAsC;IACtC,kDAAkD;IAClD,oCAAoC;IACpC,MAAM,YAAY,GAAG,GAAG,CAAC,YAAY,CAAC,UAAU,GAAG,EAAE,CAAC,CAAC;IACvD,MAAM,MAAM,GAAG,GAAG,CAAC,YAAY,CAAC,UAAU,GAAG,EAAE,CAAC,CAAC;IACjD,MAAM,QAAQ,GAAG,GAAG,CAAC,YAAY,CAAC,UAAU,GAAG,EAAE,CAAC,CAAC;IAEnD,wDAAwD;IACxD,IAAI,YAAY,KAAK,MAAM,IAAI,MAAM,KAAK,UAAU,IAAI,QAAQ,KAAK,UAAU,EAAE,CAAC;QAChF,OAAO,IAAI,CAAC;IACd,CAAC;IAED,IAAI,QAAQ,GAAG,MAAM,GAAG,GAAG,CAAC,MAAM;QAAE,OAAO,IAAI,CAAC;IAChD,IAAI,QAAQ,IAAI,GAAG,CAAC,MAAM;QAAE,OAAO,IAAI,CAAC;IACxC,IAAI,YAAY,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAElC,uBAAuB;IACvB,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,IAAI,MAAM,GAAG,QAAQ,CAAC;IACtB,MAAM,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;IAChC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,YAAY,EAAE,UAAU,CAAC,CAAC;IAE/C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;QAC7B,IAAI,MAAM,GAAG,oBAAoB,GAAG,KAAK;YAAE,OAAO,IAAI,CAAC;QACvD,IAAI,GAAG,CAAC,YAAY,CAAC,MAAM,CAAC,KAAK,mBAAmB;YAAE,OAAO,IAAI,CAAC;QAElE,8CAA8C;QAC9C,qCAAqC;QACrC,wCAAwC;QACxC,yCAAyC;QACzC,MAAM,WAAW,GAAG,GAAG,CAAC,YAAY,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC;QAClD,MAAM,QAAQ,GAAG,GAAG,CAAC,YAAY,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC;QAC/C,MAAM,UAAU,GAAG,GAAG,CAAC,YAAY,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC;QAEjD,MAAM,aAAa,GAAG,MAAM,GAAG,oBAAoB,CAAC;QACpD,MAAM,WAAW,GAAG,aAAa,GAAG,WAAW,CAAC;QAChD,IAAI,WAAW,GAAG,KAAK;YAAE,OAAO,IAAI,CAAC;QAErC,MAAM,QAAQ,GAAG,GAAG,CAAC,QAAQ,CAAC,OAAO,EAAE,aAAa,EAAE,WAAW,CAAC,CAAC;QACnE,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAErB,MAAM,GAAG,WAAW,GAAG,QAAQ,GAAG,UAAU,CAAC;IAC/C,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;;;;;;;;;;;;;GAeG;AACH,MAAM,UAAU,2BAA2B,CACzC,OAA8B;IAE9B,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAEtC,IAAI,gBAAgB,GAAG,KAAK,CAAC;IAC7B,IAAI,oBAAoB,GAAG,KAAK,CAAC;IACjC,IAAI,eAAe,GAAG,KAAK,CAAC;IAC5B,IAAI,WAAW,GAAG,KAAK,CAAC;IACxB,IAAI,sBAAsB,GAAG,KAAK,CAAC;IAEnC,KAAK,MAAM,GAAG,IAAI,OAAO,EAAE,CAAC;QAC1B,MAAM,IAAI,GAAG,GAAG,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;QACtC,IAAI,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;YAChC,gBAAgB,GAAG,IAAI,CAAC;YACxB,8DAA8D;YAC9D,2DAA2D;YAC3D,kDAAkD;YAClD,MAAM;QACR,CAAC;QACD,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;YACxB,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;YACjC,IAAI,KAAK,KAAK,oBAAoB;gBAAE,oBAAoB,GAAG,IAAI,CAAC;iBAC3D,IAAI,KAAK,KAAK,eAAe;gBAAE,eAAe,GAAG,IAAI,CAAC;iBACtD,IAAI,KAAK,KAAK,WAAW;gBAAE,WAAW,GAAG,IAAI,CAAC;iBAC9C,IAAI,KAAK,KAAK,2BAA2B;gBAAE,sBAAsB,GAAG,IAAI,CAAC;QAChF,CAAC;IACH,CAAC;IAED,IAAI,gBAAgB;QAAE,OAAO,YAAY,CAAC;IAE1C,IACE,eAAe;QACf,oBAAoB;QACpB,CAAC,WAAW;QACZ,CAAC,sBAAsB,EACvB,CAAC;QACD,OAAO,YAAY,CAAC;IACtB,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,GAAW;IACxC,MAAM,OAAO,GAAG,gCAAgC,CAAC,GAAG,CAAC,CAAC;IACtD,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAC;IAC1B,OAAO,2BAA2B,CAAC,OAAO,CAAC,CAAC;AAC9C,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"inbox-watcher.d.ts","sourceRoot":"","sources":["../../../src/ingestion/inbox-watcher.ts"],"names":[],"mappings":"AAgBA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AACjD,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAWvC,MAAM,WAAW,mBAAmB;IAClC,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,QAAQ,CAAC;CACjB;AAyCD;;;GAGG;AACH,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,mBAAmB,GAAG,OAAO,CAsCrE"}
1
+ {"version":3,"file":"inbox-watcher.d.ts","sourceRoot":"","sources":["../../../src/ingestion/inbox-watcher.ts"],"names":[],"mappings":"AAmBA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AACjD,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAcvC,MAAM,WAAW,mBAAmB;IAClC,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,QAAQ,CAAC;CACjB;AAED;;;GAGG;AACH,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,mBAAmB,GAAG,OAAO,CA2DrE"}
@@ -1,5 +1,5 @@
1
1
  /**
2
- * Inbox watcher — Slice 19.
2
+ * Inbox watcher — Slice 19, refactored to compose `inbox-core`.
3
3
  *
4
4
  * Watches data/inbox/ for dropped files and creates ingest jobs.
5
5
  * Reuses the existing secure ingest pipeline (archive security,
@@ -10,53 +10,18 @@
10
10
  * or defaults to "chatgpt_web" for files in inbox root.
11
11
  * - Raw files land in data/raw/conversations/inbox/ via rawSource override.
12
12
  * - Idempotency key prevents duplicate processing on restart.
13
+ *
14
+ * This file is the Rift adapter — it owns `fs`, the JobQueue handoff,
15
+ * and Rift's parser registry. The pure detection primitives live in
16
+ * `inbox-core/` so a future standalone shell can reuse them.
13
17
  */
14
- import crypto from "node:crypto";
15
18
  import fs from "node:fs";
16
19
  import path from "node:path";
17
20
  import { Watcher } from "./watcher.js";
18
21
  import { SUPPORTED_INGEST_SOURCES } from "./parsers/types.js";
19
- // Web conversation exports are JSON or archives of JSON. No parser for
20
- // raw markdown exists markdown dropped here would fail at the parser
21
- // stage, so reject it up front.
22
- const SUPPORTED_EXTENSIONS = new Set([".json", ".zip"]);
22
+ import { detectSourceFromSubdir, inboxIdempotencyKey, isSupportedInboxExtension, sniffInboxSource, } from "./inbox-core/index.js";
23
+ import { writeSkipQuarantine } from "./skip-quarantine.js";
23
24
  const DEFAULT_SOURCE = "chatgpt_web";
24
- /**
25
- * Detect the parser source from a file path relative to the inbox directory.
26
- * Files in subdirectories use the subdirectory name as source
27
- * (e.g., data/inbox/chatgpt_web/export.zip → chatgpt_web).
28
- * Files directly in inbox/ default to chatgpt_web.
29
- */
30
- function detectSource(filePath, inboxDir) {
31
- const relative = path.relative(inboxDir, filePath);
32
- const firstSegment = relative.split(path.sep)[0];
33
- if (firstSegment &&
34
- firstSegment !== path.basename(filePath) &&
35
- SUPPORTED_INGEST_SOURCES.includes(firstSegment)) {
36
- return firstSegment;
37
- }
38
- return DEFAULT_SOURCE;
39
- }
40
- /**
41
- * Generate a stable idempotency key for an inbox file.
42
- * Includes path + mtime so re-dropping the same filename with new
43
- * content gets a fresh key, while restart doesn't re-process.
44
- */
45
- function inboxIdempotencyKey(filePath) {
46
- let mtimeMs = 0;
47
- try {
48
- mtimeMs = fs.statSync(filePath).mtimeMs;
49
- }
50
- catch {
51
- // File may have been moved; use 0
52
- }
53
- const hash = crypto
54
- .createHash("sha256")
55
- .update(`${filePath}:${mtimeMs}`)
56
- .digest("hex")
57
- .slice(0, 16);
58
- return `inbox:${hash}`;
59
- }
60
25
  /**
61
26
  * Create an inbox watcher. Returns a Watcher instance that can be
62
27
  * started, paused, and stopped like any other watcher.
@@ -65,24 +30,43 @@ export function createInboxWatcher(opts) {
65
30
  const inboxDir = path.join(opts.dataDir, "inbox");
66
31
  fs.mkdirSync(inboxDir, { recursive: true });
67
32
  const handler = async (event) => {
68
- // Only process new files (not changes or deletions).
69
- if (event.type !== "add")
33
+ // Ignore deletions; otherwise process both `add` and `change` so a
34
+ // user who overwrites `export.zip` in place picks up the new
35
+ // content. The idempotency key includes mtime, so an unchanged
36
+ // `change` event collapses at the queue layer.
37
+ if (event.type === "unlink")
70
38
  return;
71
- // Check file extension.
72
- const ext = path.extname(event.path).toLowerCase();
73
- if (!SUPPORTED_EXTENSIONS.has(ext))
39
+ if (!isSupportedInboxExtension(event.path)) {
40
+ // Markdown / images / other types are not parseable as web
41
+ // exports. Record a friendly skip so the user can see why their
42
+ // drop didn't ingest, instead of it disappearing silently.
43
+ await writeSkipQuarantine(opts.dataDir, {
44
+ reason: "inbox_unsupported_extension",
45
+ source_path: event.path,
46
+ metadata: { extension: path.extname(event.path).toLowerCase() },
47
+ }).catch(() => {
48
+ // Quarantine write failure must not break the watcher loop.
49
+ });
74
50
  return;
75
- // Read file contents.
51
+ }
76
52
  let fileData;
53
+ let mtimeMs;
77
54
  try {
78
55
  fileData = fs.readFileSync(event.path);
56
+ mtimeMs = fs.statSync(event.path).mtimeMs;
79
57
  }
80
58
  catch {
81
59
  // File may have been removed between detection and read.
82
60
  return;
83
61
  }
84
- const source = detectSource(event.path, inboxDir);
85
- const idempotencyKey = inboxIdempotencyKey(event.path);
62
+ // Source resolution order:
63
+ // 1. Subdirectory hint (data/inbox/<provider>/file.json) — strongest.
64
+ // 2. Content/filename sniff — identifies non-ChatGPT providers.
65
+ // 3. DEFAULT_SOURCE — historical default for unidentified drops.
66
+ const source = detectSourceFromSubdir(event.path, inboxDir, SUPPORTED_INGEST_SOURCES) ??
67
+ sniffInboxSource(path.basename(event.path), fileData) ??
68
+ DEFAULT_SOURCE;
69
+ const idempotencyKey = inboxIdempotencyKey(event.path, mtimeMs);
86
70
  const payload = {
87
71
  source: source,
88
72
  filename: path.basename(event.path),
@@ -1 +1 @@
1
- {"version":3,"file":"inbox-watcher.js","sourceRoot":"","sources":["../../../src/ingestion/inbox-watcher.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AACH,OAAO,MAAM,MAAM,aAAa,CAAC;AACjC,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,IAAI,MAAM,WAAW,CAAC;AAE7B,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAGvC,OAAO,EAAE,wBAAwB,EAAE,MAAM,oBAAoB,CAAC;AAE9D,uEAAuE;AACvE,uEAAuE;AACvE,gCAAgC;AAChC,MAAM,oBAAoB,GAAG,IAAI,GAAG,CAAC,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC;AACxD,MAAM,cAAc,GAAG,aAAa,CAAC;AAOrC;;;;;GAKG;AACH,SAAS,YAAY,CAAC,QAAgB,EAAE,QAAgB;IACtD,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;IACnD,MAAM,YAAY,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IACjD,IACE,YAAY;QACZ,YAAY,KAAK,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC;QACvC,wBAA8C,CAAC,QAAQ,CAAC,YAAY,CAAC,EACtE,CAAC;QACD,OAAO,YAAY,CAAC;IACtB,CAAC;IACD,OAAO,cAAc,CAAC;AACxB,CAAC;AAED;;;;GAIG;AACH,SAAS,mBAAmB,CAAC,QAAgB;IAC3C,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,IAAI,CAAC;QACH,OAAO,GAAG,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,OAAO,CAAC;IAC1C,CAAC;IAAC,MAAM,CAAC;QACP,kCAAkC;IACpC,CAAC;IACD,MAAM,IAAI,GAAG,MAAM;SAChB,UAAU,CAAC,QAAQ,CAAC;SACpB,MAAM,CAAC,GAAG,QAAQ,IAAI,OAAO,EAAE,CAAC;SAChC,MAAM,CAAC,KAAK,CAAC;SACb,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IAChB,OAAO,SAAS,IAAI,EAAE,CAAC;AACzB,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,kBAAkB,CAAC,IAAyB;IAC1D,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;IAClD,EAAE,CAAC,SAAS,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAE5C,MAAM,OAAO,GAAG,KAAK,EAAE,KAAgB,EAAiB,EAAE;QACxD,qDAAqD;QACrD,IAAI,KAAK,CAAC,IAAI,KAAK,KAAK;YAAE,OAAO;QAEjC,wBAAwB;QACxB,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,WAAW,EAAE,CAAC;QACnD,IAAI,CAAC,oBAAoB,CAAC,GAAG,CAAC,GAAG,CAAC;YAAE,OAAO;QAE3C,sBAAsB;QACtB,IAAI,QAAgB,CAAC;QACrB,IAAI,CAAC;YACH,QAAQ,GAAG,EAAE,CAAC,YAAY,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QACzC,CAAC;QAAC,MAAM,CAAC;YACP,yDAAyD;YACzD,OAAO;QACT,CAAC;QAED,MAAM,MAAM,GAAG,YAAY,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;QAClD,MAAM,cAAc,GAAG,mBAAmB,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAEvD,MAAM,OAAO,GAAkB;YAC7B,MAAM,EAAE,MAAiC;YACzC,QAAQ,EAAE,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC;YACnC,QAAQ,EAAE,QAAQ,CAAC,QAAQ,CAAC,QAAQ,CAAC;YACrC,SAAS,EAAE,OAAO;SACnB,CAAC;QAEF,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE;YAChC,eAAe,EAAE,cAAc;YAC/B,OAAO;SACR,CAAC,CAAC;IACL,CAAC,CAAC;IAEF,OAAO,IAAI,OAAO,CAAC,CAAC,QAAQ,CAAC,EAAE,OAAO,CAAC,CAAC;AAC1C,CAAC"}
1
+ {"version":3,"file":"inbox-watcher.js","sourceRoot":"","sources":["../../../src/ingestion/inbox-watcher.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AACH,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,IAAI,MAAM,WAAW,CAAC;AAE7B,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAGvC,OAAO,EAAE,wBAAwB,EAAE,MAAM,oBAAoB,CAAC;AAC9D,OAAO,EACL,sBAAsB,EACtB,mBAAmB,EACnB,yBAAyB,EACzB,gBAAgB,GACjB,MAAM,uBAAuB,CAAC;AAC/B,OAAO,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAC;AAE3D,MAAM,cAAc,GAAG,aAAa,CAAC;AAOrC;;;GAGG;AACH,MAAM,UAAU,kBAAkB,CAAC,IAAyB;IAC1D,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;IAClD,EAAE,CAAC,SAAS,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAE5C,MAAM,OAAO,GAAG,KAAK,EAAE,KAAgB,EAAiB,EAAE;QACxD,mEAAmE;QACnE,6DAA6D;QAC7D,+DAA+D;QAC/D,+CAA+C;QAC/C,IAAI,KAAK,CAAC,IAAI,KAAK,QAAQ;YAAE,OAAO;QAEpC,IAAI,CAAC,yBAAyB,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;YAC3C,2DAA2D;YAC3D,gEAAgE;YAChE,2DAA2D;YAC3D,MAAM,mBAAmB,CAAC,IAAI,CAAC,OAAO,EAAE;gBACtC,MAAM,EAAE,6BAA6B;gBACrC,WAAW,EAAE,KAAK,CAAC,IAAI;gBACvB,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,WAAW,EAAE,EAAE;aAChE,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE;gBACZ,4DAA4D;YAC9D,CAAC,CAAC,CAAC;YACH,OAAO;QACT,CAAC;QAED,IAAI,QAAgB,CAAC;QACrB,IAAI,OAAe,CAAC;QACpB,IAAI,CAAC;YACH,QAAQ,GAAG,EAAE,CAAC,YAAY,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YACvC,OAAO,GAAG,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC;QAC5C,CAAC;QAAC,MAAM,CAAC;YACP,yDAAyD;YACzD,OAAO;QACT,CAAC;QAED,2BAA2B;QAC3B,wEAAwE;QACxE,kEAAkE;QAClE,mEAAmE;QACnE,MAAM,MAAM,GACV,sBAAsB,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,EAAE,wBAAwB,CAAC;YACtE,gBAAgB,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,QAAQ,CAAC;YACrD,cAAc,CAAC;QACjB,MAAM,cAAc,GAAG,mBAAmB,CAAC,KAAK,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QAEhE,MAAM,OAAO,GAAkB;YAC7B,MAAM,EAAE,MAAiC;YACzC,QAAQ,EAAE,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC;YACnC,QAAQ,EAAE,QAAQ,CAAC,QAAQ,CAAC,QAAQ,CAAC;YACrC,SAAS,EAAE,OAAO;SACnB,CAAC;QAEF,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE;YAChC,eAAe,EAAE,cAAc;YAC/B,OAAO;SACR,CAAC,CAAC;IACL,CAAC,CAAC;IAEF,OAAO,IAAI,OAAO,CAAC,CAAC,QAAQ,CAAC,EAAE,OAAO,CAAC,CAAC;AAC1C,CAAC"}
@@ -10,6 +10,13 @@ export interface IndexerConfig {
10
10
  allowedRoots: readonly string[];
11
11
  /** Target table for document storage. Defaults to "structured_docs". */
12
12
  tableName?: "structured_docs" | "structured_docs_local";
13
+ /**
14
+ * Data directory root. Used to write skip-quarantine records when a file
15
+ * cannot be embedded (e.g. empty extracted content). Required so every
16
+ * production code path — watch, scheduled scan, reconcile — produces a
17
+ * visible explanation when a file is dropped, instead of silent stderr.
18
+ */
19
+ dataDir: string;
13
20
  }
14
21
  /**
15
22
  * Deterministic row ID from the canonical source path.
@@ -1 +1 @@
1
- {"version":3,"file":"indexer.d.ts","sourceRoot":"","sources":["../../../src/ingestion/indexer.ts"],"names":[],"mappings":"AAQA,OAAO,KAAK,EAAE,iBAAiB,EAAE,UAAU,EAAE,WAAW,EAAE,MAAM,uBAAuB,CAAC;AAMxF,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,KAAK,GAAG,QAAQ,GAAG,QAAQ,CAAC;IAClC,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,aAAa;IAC5B,UAAU,EAAE,UAAU,CAAC;IACvB,WAAW,EAAE,WAAW,CAAC;IACzB,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,EAAE,SAAS,MAAM,EAAE,CAAC;IAChC,wEAAwE;IACxE,SAAS,CAAC,EAAE,iBAAiB,GAAG,uBAAuB,CAAC;CACzD;AAED;;;GAGG;AACH,wBAAgB,MAAM,CAAC,UAAU,EAAE,MAAM,GAAG,MAAM,CAEjD;AAED,qBAAa,OAAO;IAClB,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAoB;IAC9C,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAgB;gBAE3B,SAAS,EAAE,iBAAiB,EAAE,MAAM,EAAE,aAAa;IAK/D;;;;OAIG;IACG,WAAW,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC;YAcpC,YAAY;YAwBZ,YAAY;CAK3B"}
1
+ {"version":3,"file":"indexer.d.ts","sourceRoot":"","sources":["../../../src/ingestion/indexer.ts"],"names":[],"mappings":"AAQA,OAAO,KAAK,EAAE,iBAAiB,EAAE,UAAU,EAAE,WAAW,EAAE,MAAM,uBAAuB,CAAC;AASxF,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,KAAK,GAAG,QAAQ,GAAG,QAAQ,CAAC;IAClC,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,aAAa;IAC5B,UAAU,EAAE,UAAU,CAAC;IACvB,WAAW,EAAE,WAAW,CAAC;IACzB,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,EAAE,SAAS,MAAM,EAAE,CAAC;IAChC,wEAAwE;IACxE,SAAS,CAAC,EAAE,iBAAiB,GAAG,uBAAuB,CAAC;IACxD;;;;;OAKG;IACH,OAAO,EAAE,MAAM,CAAC;CACjB;AAED;;;GAGG;AACH,wBAAgB,MAAM,CAAC,UAAU,EAAE,MAAM,GAAG,MAAM,CAEjD;AAED,qBAAa,OAAO;IAClB,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAoB;IAC9C,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAgB;gBAE3B,SAAS,EAAE,iBAAiB,EAAE,MAAM,EAAE,aAAa;IAK/D;;;;OAIG;IACG,WAAW,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC;YAcpC,YAAY;YAiEZ,YAAY;CAK3B"}
@@ -9,6 +9,9 @@ import crypto from "node:crypto";
9
9
  import { getTable } from "../storage/tables.js";
10
10
  import { extract, isSupported } from "./extractor.js";
11
11
  import { validatePath, validateUnlinkPath } from "../security/paths.js";
12
+ import { writeSkipQuarantine } from "./skip-quarantine.js";
13
+ import { recordEmbed } from "../observability/embedding-events.js";
14
+ import { recordIndexWrite } from "../observability/index-events.js";
12
15
  /**
13
16
  * Deterministic row ID from the canonical source path.
14
17
  * Same file always gets the same ID, enabling upsert via delete+add.
@@ -41,7 +44,28 @@ export class Indexer {
41
44
  }
42
45
  async handleUpsert(filePath) {
43
46
  const doc = await extract(filePath);
44
- const embeddingVec = await this.embedding.embed(doc.content);
47
+ // Empty/blank extracted content cannot be embedded — Voyage rejects empty
48
+ // strings with HTTP 400, and indexing an empty row produces nothing
49
+ // searchable anyway. Quarantine the skip so it's visible (not stderr-only)
50
+ // and remove any stale row from a prior good extraction of the same path.
51
+ if (isBlank(doc.content)) {
52
+ const id = fileId(filePath);
53
+ const table = getTable(this.config.tableName ?? "structured_docs");
54
+ await table.delete(`id = '${id}'`);
55
+ await writeSkipQuarantine(this.config.dataDir, {
56
+ reason: "empty_extracted_content",
57
+ source_path: filePath,
58
+ metadata: doc.metadata,
59
+ });
60
+ return;
61
+ }
62
+ const embeddingVec = await recordEmbed(this.config.dataDir, this.embedding, {
63
+ pipeline: this.config.sourceType === "filesystem_watched"
64
+ ? "watcher"
65
+ : "scheduled_scan",
66
+ operation: "document_embedding",
67
+ input_count: 1,
68
+ }, () => this.embedding.embed(doc.content));
45
69
  const id = fileId(filePath);
46
70
  const table = getTable(this.config.tableName ?? "structured_docs");
47
71
  // Upsert: delete existing row (if any), then add new one.
@@ -57,7 +81,14 @@ export class Indexer {
57
81
  indexed_at: new Date().toISOString(),
58
82
  metadata: JSON.stringify(doc.metadata),
59
83
  };
60
- await table.add([row]);
84
+ await recordIndexWrite(this.config.dataDir, {
85
+ table: this.config.tableName ?? "structured_docs",
86
+ pipeline: this.config.sourceType === "filesystem_watched"
87
+ ? "watcher"
88
+ : "scheduled_scan",
89
+ operation: "structured_doc_upsert",
90
+ row_count: 1,
91
+ }, () => table.add([row]));
61
92
  }
62
93
  async handleDelete(filePath) {
63
94
  const id = fileId(filePath);
@@ -65,4 +96,7 @@ export class Indexer {
65
96
  await table.delete(`id = '${id}'`);
66
97
  }
67
98
  }
99
+ function isBlank(s) {
100
+ return s.trim().length === 0;
101
+ }
68
102
  //# sourceMappingURL=indexer.js.map