@getrift/rift 0.1.0-beta.20 → 0.1.0-beta.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (280) hide show
  1. package/README.md +7 -3
  2. package/dist/src/capture/auto-capture.d.ts +105 -4
  3. package/dist/src/capture/auto-capture.d.ts.map +1 -1
  4. package/dist/src/capture/auto-capture.js +313 -34
  5. package/dist/src/capture/auto-capture.js.map +1 -1
  6. package/dist/src/capture/claude-cli-triage-provider.d.ts +28 -0
  7. package/dist/src/capture/claude-cli-triage-provider.d.ts.map +1 -0
  8. package/dist/src/capture/claude-cli-triage-provider.js +88 -0
  9. package/dist/src/capture/claude-cli-triage-provider.js.map +1 -0
  10. package/dist/src/capture/codex-cli-triage-provider.d.ts.map +1 -1
  11. package/dist/src/capture/codex-cli-triage-provider.js +1 -33
  12. package/dist/src/capture/codex-cli-triage-provider.js.map +1 -1
  13. package/dist/src/capture/cursor-capture.d.ts +89 -0
  14. package/dist/src/capture/cursor-capture.d.ts.map +1 -0
  15. package/dist/src/capture/cursor-capture.js +121 -0
  16. package/dist/src/capture/cursor-capture.js.map +1 -0
  17. package/dist/src/capture/observability.d.ts +30 -0
  18. package/dist/src/capture/observability.d.ts.map +1 -1
  19. package/dist/src/capture/observability.js +29 -0
  20. package/dist/src/capture/observability.js.map +1 -1
  21. package/dist/src/capture/sources.d.ts +41 -3
  22. package/dist/src/capture/sources.d.ts.map +1 -1
  23. package/dist/src/capture/sources.js +43 -1
  24. package/dist/src/capture/sources.js.map +1 -1
  25. package/dist/src/capture/triage-classification.d.ts +69 -0
  26. package/dist/src/capture/triage-classification.d.ts.map +1 -0
  27. package/dist/src/capture/triage-classification.js +62 -0
  28. package/dist/src/capture/triage-classification.js.map +1 -0
  29. package/dist/src/capture/triage-provider-factory.d.ts +36 -0
  30. package/dist/src/capture/triage-provider-factory.d.ts.map +1 -0
  31. package/dist/src/capture/triage-provider-factory.js +55 -0
  32. package/dist/src/capture/triage-provider-factory.js.map +1 -0
  33. package/dist/src/capture/triage.d.ts +1 -1
  34. package/dist/src/capture/triage.d.ts.map +1 -1
  35. package/dist/src/capture/triage.js +8 -6
  36. package/dist/src/capture/triage.js.map +1 -1
  37. package/dist/src/cli/commands/capture.d.ts.map +1 -1
  38. package/dist/src/cli/commands/capture.js +72 -17
  39. package/dist/src/cli/commands/capture.js.map +1 -1
  40. package/dist/src/cli/commands/chunk-backfill.d.ts +13 -0
  41. package/dist/src/cli/commands/chunk-backfill.d.ts.map +1 -0
  42. package/dist/src/cli/commands/chunk-backfill.js +157 -0
  43. package/dist/src/cli/commands/chunk-backfill.js.map +1 -0
  44. package/dist/src/cli/commands/cursor-probe.d.ts +20 -0
  45. package/dist/src/cli/commands/cursor-probe.d.ts.map +1 -0
  46. package/dist/src/cli/commands/cursor-probe.js +162 -0
  47. package/dist/src/cli/commands/cursor-probe.js.map +1 -0
  48. package/dist/src/cli/commands/menubar.d.ts +50 -0
  49. package/dist/src/cli/commands/menubar.d.ts.map +1 -1
  50. package/dist/src/cli/commands/menubar.js +224 -16
  51. package/dist/src/cli/commands/menubar.js.map +1 -1
  52. package/dist/src/cli/commands/onboard.d.ts +36 -7
  53. package/dist/src/cli/commands/onboard.d.ts.map +1 -1
  54. package/dist/src/cli/commands/onboard.js +256 -53
  55. package/dist/src/cli/commands/onboard.js.map +1 -1
  56. package/dist/src/cli/commands/status.d.ts.map +1 -1
  57. package/dist/src/cli/commands/status.js +16 -0
  58. package/dist/src/cli/commands/status.js.map +1 -1
  59. package/dist/src/cli/commands/update.d.ts +34 -1
  60. package/dist/src/cli/commands/update.d.ts.map +1 -1
  61. package/dist/src/cli/commands/update.js +179 -2
  62. package/dist/src/cli/commands/update.js.map +1 -1
  63. package/dist/src/cli/index.d.ts.map +1 -1
  64. package/dist/src/cli/index.js +4 -0
  65. package/dist/src/cli/index.js.map +1 -1
  66. package/dist/src/cli/postinstall-menubar.d.ts.map +1 -1
  67. package/dist/src/cli/postinstall-menubar.js +14 -0
  68. package/dist/src/cli/postinstall-menubar.js.map +1 -1
  69. package/dist/src/cli/status/friend-header.d.ts +18 -0
  70. package/dist/src/cli/status/friend-header.d.ts.map +1 -1
  71. package/dist/src/cli/status/friend-header.js +137 -0
  72. package/dist/src/cli/status/friend-header.js.map +1 -1
  73. package/dist/src/cli/status/local-signals.d.ts +41 -0
  74. package/dist/src/cli/status/local-signals.d.ts.map +1 -1
  75. package/dist/src/cli/status/local-signals.js +48 -0
  76. package/dist/src/cli/status/local-signals.js.map +1 -1
  77. package/dist/src/config/schema.d.ts +220 -14
  78. package/dist/src/config/schema.d.ts.map +1 -1
  79. package/dist/src/config/schema.js +82 -7
  80. package/dist/src/config/schema.js.map +1 -1
  81. package/dist/src/diagnostics/claude-preflight.d.ts +34 -0
  82. package/dist/src/diagnostics/claude-preflight.d.ts.map +1 -0
  83. package/dist/src/diagnostics/claude-preflight.js +89 -0
  84. package/dist/src/diagnostics/claude-preflight.js.map +1 -0
  85. package/dist/src/diagnostics/codex-preflight.d.ts +1 -1
  86. package/dist/src/diagnostics/codex-preflight.d.ts.map +1 -1
  87. package/dist/src/diagnostics/codex-preflight.js +14 -0
  88. package/dist/src/diagnostics/codex-preflight.js.map +1 -1
  89. package/dist/src/diagnostics/doctor.d.ts +9 -1
  90. package/dist/src/diagnostics/doctor.d.ts.map +1 -1
  91. package/dist/src/diagnostics/doctor.js +57 -2
  92. package/dist/src/diagnostics/doctor.js.map +1 -1
  93. package/dist/src/ingestion/chunk-meta.d.ts +85 -0
  94. package/dist/src/ingestion/chunk-meta.d.ts.map +1 -0
  95. package/dist/src/ingestion/chunk-meta.js +167 -0
  96. package/dist/src/ingestion/chunk-meta.js.map +1 -0
  97. package/dist/src/ingestion/chunk-text.d.ts +39 -0
  98. package/dist/src/ingestion/chunk-text.d.ts.map +1 -0
  99. package/dist/src/ingestion/chunk-text.js +114 -0
  100. package/dist/src/ingestion/chunk-text.js.map +1 -0
  101. package/dist/src/ingestion/cursor/cursor-store.d.ts +177 -0
  102. package/dist/src/ingestion/cursor/cursor-store.d.ts.map +1 -0
  103. package/dist/src/ingestion/cursor/cursor-store.js +243 -0
  104. package/dist/src/ingestion/cursor/cursor-store.js.map +1 -0
  105. package/dist/src/ingestion/cursor/enrich-roots.d.ts +16 -0
  106. package/dist/src/ingestion/cursor/enrich-roots.d.ts.map +1 -0
  107. package/dist/src/ingestion/cursor/enrich-roots.js +22 -0
  108. package/dist/src/ingestion/cursor/enrich-roots.js.map +1 -0
  109. package/dist/src/ingestion/cursor/vscdb-reader.d.ts +32 -0
  110. package/dist/src/ingestion/cursor/vscdb-reader.d.ts.map +1 -0
  111. package/dist/src/ingestion/cursor/vscdb-reader.js +113 -0
  112. package/dist/src/ingestion/cursor/vscdb-reader.js.map +1 -0
  113. package/dist/src/ingestion/cursor/workspace-root.d.ts +96 -0
  114. package/dist/src/ingestion/cursor/workspace-root.d.ts.map +1 -0
  115. package/dist/src/ingestion/cursor/workspace-root.js +187 -0
  116. package/dist/src/ingestion/cursor/workspace-root.js.map +1 -0
  117. package/dist/src/ingestion/indexer.d.ts.map +1 -1
  118. package/dist/src/ingestion/indexer.js +41 -32
  119. package/dist/src/ingestion/indexer.js.map +1 -1
  120. package/dist/src/jobs/handlers/compact.d.ts.map +1 -1
  121. package/dist/src/jobs/handlers/compact.js +9 -4
  122. package/dist/src/jobs/handlers/compact.js.map +1 -1
  123. package/dist/src/jobs/handlers/ingest.d.ts.map +1 -1
  124. package/dist/src/jobs/handlers/ingest.js +60 -30
  125. package/dist/src/jobs/handlers/ingest.js.map +1 -1
  126. package/dist/src/jobs/handlers/reconcile.d.ts.map +1 -1
  127. package/dist/src/jobs/handlers/reconcile.js +128 -45
  128. package/dist/src/jobs/handlers/reconcile.js.map +1 -1
  129. package/dist/src/jobs/handlers/save.d.ts.map +1 -1
  130. package/dist/src/jobs/handlers/save.js +122 -72
  131. package/dist/src/jobs/handlers/save.js.map +1 -1
  132. package/dist/src/jobs/types.d.ts +1 -1
  133. package/dist/src/main.js +26 -15
  134. package/dist/src/main.js.map +1 -1
  135. package/dist/src/mcp/server.d.ts.map +1 -1
  136. package/dist/src/mcp/server.js +10 -3
  137. package/dist/src/mcp/server.js.map +1 -1
  138. package/dist/src/mcp/tools/context-pack.d.ts.map +1 -1
  139. package/dist/src/mcp/tools/context-pack.js +7 -1
  140. package/dist/src/mcp/tools/context-pack.js.map +1 -1
  141. package/dist/src/mcp/tools/conversations-search.d.ts +1 -1
  142. package/dist/src/mcp/tools/conversations-search.d.ts.map +1 -1
  143. package/dist/src/mcp/tools/conversations-search.js +7 -1
  144. package/dist/src/mcp/tools/conversations-search.js.map +1 -1
  145. package/dist/src/mcp/tools/evidence-feedback.d.ts +60 -0
  146. package/dist/src/mcp/tools/evidence-feedback.d.ts.map +1 -0
  147. package/dist/src/mcp/tools/evidence-feedback.js +62 -0
  148. package/dist/src/mcp/tools/evidence-feedback.js.map +1 -0
  149. package/dist/src/mcp/tools/log-outcome.d.ts +72 -0
  150. package/dist/src/mcp/tools/log-outcome.d.ts.map +1 -0
  151. package/dist/src/mcp/tools/log-outcome.js +59 -0
  152. package/dist/src/mcp/tools/log-outcome.js.map +1 -0
  153. package/dist/src/mcp/tools/open-evidence.d.ts +37 -0
  154. package/dist/src/mcp/tools/open-evidence.d.ts.map +1 -0
  155. package/dist/src/mcp/tools/open-evidence.js +72 -0
  156. package/dist/src/mcp/tools/open-evidence.js.map +1 -0
  157. package/dist/src/mcp/tools/save.d.ts +7 -2
  158. package/dist/src/mcp/tools/save.d.ts.map +1 -1
  159. package/dist/src/mcp/tools/save.js +7 -2
  160. package/dist/src/mcp/tools/save.js.map +1 -1
  161. package/dist/src/mcp/tools/search.d.ts.map +1 -1
  162. package/dist/src/mcp/tools/search.js +7 -1
  163. package/dist/src/mcp/tools/search.js.map +1 -1
  164. package/dist/src/observability/retrieval-feedback.d.ts +82 -0
  165. package/dist/src/observability/retrieval-feedback.d.ts.map +1 -0
  166. package/dist/src/observability/retrieval-feedback.js +231 -0
  167. package/dist/src/observability/retrieval-feedback.js.map +1 -0
  168. package/dist/src/observability/rift-context.d.ts.map +1 -1
  169. package/dist/src/observability/rift-context.js +3 -0
  170. package/dist/src/observability/rift-context.js.map +1 -1
  171. package/dist/src/observability/tool-usage-stats.d.ts +13 -0
  172. package/dist/src/observability/tool-usage-stats.d.ts.map +1 -1
  173. package/dist/src/observability/tool-usage-stats.js +15 -0
  174. package/dist/src/observability/tool-usage-stats.js.map +1 -1
  175. package/dist/src/observability/tool-usage.d.ts +56 -0
  176. package/dist/src/observability/tool-usage.d.ts.map +1 -1
  177. package/dist/src/observability/tool-usage.js +86 -0
  178. package/dist/src/observability/tool-usage.js.map +1 -1
  179. package/dist/src/providers/claude-cli-metadata-extraction.d.ts +47 -0
  180. package/dist/src/providers/claude-cli-metadata-extraction.d.ts.map +1 -0
  181. package/dist/src/providers/claude-cli-metadata-extraction.js +120 -0
  182. package/dist/src/providers/claude-cli-metadata-extraction.js.map +1 -0
  183. package/dist/src/providers/claude-cli-runner.d.ts +92 -0
  184. package/dist/src/providers/claude-cli-runner.d.ts.map +1 -0
  185. package/dist/src/providers/claude-cli-runner.js +598 -0
  186. package/dist/src/providers/claude-cli-runner.js.map +1 -0
  187. package/dist/src/providers/codex-cli-metadata-extraction.d.ts.map +1 -1
  188. package/dist/src/providers/codex-cli-metadata-extraction.js +1 -40
  189. package/dist/src/providers/codex-cli-metadata-extraction.js.map +1 -1
  190. package/dist/src/providers/codex-cli-runner.d.ts +7 -0
  191. package/dist/src/providers/codex-cli-runner.d.ts.map +1 -1
  192. package/dist/src/providers/codex-cli-runner.js +131 -5
  193. package/dist/src/providers/codex-cli-runner.js.map +1 -1
  194. package/dist/src/providers/conversation-generation.d.ts +10 -0
  195. package/dist/src/providers/conversation-generation.d.ts.map +1 -1
  196. package/dist/src/providers/conversation-generation.js +54 -13
  197. package/dist/src/providers/conversation-generation.js.map +1 -1
  198. package/dist/src/providers/openai-metadata-extraction.d.ts +48 -1
  199. package/dist/src/providers/openai-metadata-extraction.d.ts.map +1 -1
  200. package/dist/src/providers/openai-metadata-extraction.js +51 -2
  201. package/dist/src/providers/openai-metadata-extraction.js.map +1 -1
  202. package/dist/src/providers/types.d.ts +1 -1
  203. package/dist/src/providers/types.d.ts.map +1 -1
  204. package/dist/src/providers/types.js +4 -0
  205. package/dist/src/providers/types.js.map +1 -1
  206. package/dist/src/retrieval/compact.d.ts +81 -0
  207. package/dist/src/retrieval/compact.d.ts.map +1 -1
  208. package/dist/src/retrieval/compact.js +248 -8
  209. package/dist/src/retrieval/compact.js.map +1 -1
  210. package/dist/src/retrieval/context-pack.d.ts.map +1 -1
  211. package/dist/src/retrieval/context-pack.js +28 -14
  212. package/dist/src/retrieval/context-pack.js.map +1 -1
  213. package/dist/src/retrieval/evidence-key.d.ts +48 -0
  214. package/dist/src/retrieval/evidence-key.d.ts.map +1 -0
  215. package/dist/src/retrieval/evidence-key.js +131 -0
  216. package/dist/src/retrieval/evidence-key.js.map +1 -0
  217. package/dist/src/retrieval/group-by-parent.d.ts +38 -0
  218. package/dist/src/retrieval/group-by-parent.d.ts.map +1 -0
  219. package/dist/src/retrieval/group-by-parent.js +40 -0
  220. package/dist/src/retrieval/group-by-parent.js.map +1 -0
  221. package/dist/src/retrieval/lexical.d.ts.map +1 -1
  222. package/dist/src/retrieval/lexical.js +1 -3
  223. package/dist/src/retrieval/lexical.js.map +1 -1
  224. package/dist/src/retrieval/receipt.d.ts +57 -0
  225. package/dist/src/retrieval/receipt.d.ts.map +1 -0
  226. package/dist/src/retrieval/receipt.js +119 -0
  227. package/dist/src/retrieval/receipt.js.map +1 -0
  228. package/dist/src/retrieval/reranker.d.ts +12 -2
  229. package/dist/src/retrieval/reranker.d.ts.map +1 -1
  230. package/dist/src/retrieval/reranker.js +11 -4
  231. package/dist/src/retrieval/reranker.js.map +1 -1
  232. package/dist/src/retrieval/stitch-chunks.d.ts +73 -0
  233. package/dist/src/retrieval/stitch-chunks.d.ts.map +1 -0
  234. package/dist/src/retrieval/stitch-chunks.js +106 -0
  235. package/dist/src/retrieval/stitch-chunks.js.map +1 -0
  236. package/dist/src/server/app.d.ts.map +1 -1
  237. package/dist/src/server/app.js +17 -1
  238. package/dist/src/server/app.js.map +1 -1
  239. package/dist/src/server/routes/conversations-search.d.ts.map +1 -1
  240. package/dist/src/server/routes/conversations-search.js +12 -3
  241. package/dist/src/server/routes/conversations-search.js.map +1 -1
  242. package/dist/src/server/routes/friend-status.d.ts +44 -5
  243. package/dist/src/server/routes/friend-status.d.ts.map +1 -1
  244. package/dist/src/server/routes/friend-status.js +74 -6
  245. package/dist/src/server/routes/friend-status.js.map +1 -1
  246. package/dist/src/server/routes/mcp-usage.d.ts +9 -6
  247. package/dist/src/server/routes/mcp-usage.d.ts.map +1 -1
  248. package/dist/src/server/routes/mcp-usage.js.map +1 -1
  249. package/dist/src/server/routes/retrieval-feedback.d.ts +3 -0
  250. package/dist/src/server/routes/retrieval-feedback.d.ts.map +1 -0
  251. package/dist/src/server/routes/retrieval-feedback.js +290 -0
  252. package/dist/src/server/routes/retrieval-feedback.js.map +1 -0
  253. package/dist/src/server/routes/save.d.ts +3 -3
  254. package/dist/src/server/routes/save.d.ts.map +1 -1
  255. package/dist/src/server/routes/save.js +6 -2
  256. package/dist/src/server/routes/save.js.map +1 -1
  257. package/dist/src/server/routes/search.d.ts.map +1 -1
  258. package/dist/src/server/routes/search.js +19 -7
  259. package/dist/src/server/routes/search.js.map +1 -1
  260. package/dist/src/server/serving-marker.d.ts +85 -0
  261. package/dist/src/server/serving-marker.d.ts.map +1 -0
  262. package/dist/src/server/serving-marker.js +226 -0
  263. package/dist/src/server/serving-marker.js.map +1 -0
  264. package/dist/src/storage/chunk-backfill.d.ts +39 -0
  265. package/dist/src/storage/chunk-backfill.d.ts.map +1 -0
  266. package/dist/src/storage/chunk-backfill.js +295 -0
  267. package/dist/src/storage/chunk-backfill.js.map +1 -0
  268. package/dist/src/storage/filter.d.ts +42 -0
  269. package/dist/src/storage/filter.d.ts.map +1 -0
  270. package/dist/src/storage/filter.js +70 -0
  271. package/dist/src/storage/filter.js.map +1 -0
  272. package/dist/src/storage/rebuild.d.ts.map +1 -1
  273. package/dist/src/storage/rebuild.js +44 -27
  274. package/dist/src/storage/rebuild.js.map +1 -1
  275. package/dist/src/storage/tables.d.ts +41 -0
  276. package/dist/src/storage/tables.d.ts.map +1 -1
  277. package/dist/src/storage/tables.js +64 -1
  278. package/dist/src/storage/tables.js.map +1 -1
  279. package/operator/swiftbar/render-menu.py +57 -15
  280. package/package.json +5 -3
@@ -0,0 +1,113 @@
1
+ /**
2
+ * Read-only reader for Cursor's `state.vscdb` SQLite stores.
3
+ *
4
+ * DISCOVERY SLICE ONLY — this is the single I/O boundary for Cursor capture.
5
+ * It is deliberately read-only by THREE independent guarantees:
6
+ * 1. SQLite is opened with the `immutable=1` URI flag, which forbids any
7
+ * write and never creates/touches the -wal/-shm sidecar files.
8
+ * 2. Only SELECT statements are ever issued.
9
+ * 3. We shell out to the system `sqlite3` binary rather than linking a
10
+ * writable driver. (Cursor itself holds the db open under WAL; immutable
11
+ * lets us read a consistent snapshot without contending for the lock.)
12
+ *
13
+ * Why shell out instead of a native dep: Rift's Node engine floor still
14
+ * includes Node 20 (no `node:sqlite`), and we don't want to pull a native
15
+ * addon (better-sqlite3) into the tree for a read-only probe. macOS always
16
+ * ships `/usr/bin/sqlite3`, and Cursor is macOS-first. The future capture slice
17
+ * can revisit this (node:sqlite once the floor rises). See the discovery doc.
18
+ *
19
+ * PRIVACY: by default we fetch only the session index + per-session metadata
20
+ * (`composer.composerHeaders`, `composerData:*`). We never SELECT `cursorAuth/*`
21
+ * (access/refresh tokens) or `secret://*` (MCP OAuth secrets), and we do not
22
+ * fetch bubble bodies (`bubbleId:*`, the actual message text) unless the caller
23
+ * explicitly opts in.
24
+ */
25
+ import { execFileSync } from "node:child_process";
26
+ import fs from "node:fs";
27
+ import os from "node:os";
28
+ import path from "node:path";
29
+ /** Default macOS location of Cursor's per-user application support dir. */
30
+ export function defaultCursorDir() {
31
+ return path.join(os.homedir(), "Library", "Application Support", "Cursor");
32
+ }
33
+ /** Global KV store path under a Cursor application-support dir. */
34
+ export function globalStatePath(cursorDir) {
35
+ return path.join(cursorDir, "User", "globalStorage", "state.vscdb");
36
+ }
37
+ export class CursorReaderError extends Error {
38
+ code;
39
+ constructor(message, code) {
40
+ super(message);
41
+ this.code = code;
42
+ this.name = "CursorReaderError";
43
+ }
44
+ }
45
+ function sqlite3Available() {
46
+ try {
47
+ execFileSync("sqlite3", ["--version"], { stdio: ["ignore", "ignore", "ignore"] });
48
+ return true;
49
+ }
50
+ catch {
51
+ return false;
52
+ }
53
+ }
54
+ /**
55
+ * Run a single SELECT against an immutable (read-only) view of the db and
56
+ * return rows as objects. Uses `.mode json` so values come back as strings.
57
+ */
58
+ function selectJson(dbPath, sql) {
59
+ const uri = `file:${dbPath}?immutable=1`;
60
+ let stdout;
61
+ try {
62
+ stdout = execFileSync("sqlite3", ["-cmd", ".mode json", uri, sql], {
63
+ encoding: "utf-8",
64
+ maxBuffer: 256 * 1024 * 1024, // composer blobs can be large
65
+ });
66
+ }
67
+ catch (err) {
68
+ throw new CursorReaderError(`sqlite3 query failed: ${err instanceof Error ? err.message : String(err)}`, "query_failed");
69
+ }
70
+ const trimmed = stdout.trim();
71
+ if (!trimmed)
72
+ return [];
73
+ try {
74
+ return JSON.parse(trimmed);
75
+ }
76
+ catch (err) {
77
+ throw new CursorReaderError(`failed to parse sqlite3 JSON output: ${err instanceof Error ? err.message : String(err)}`, "query_failed");
78
+ }
79
+ }
80
+ function toKvRows(records) {
81
+ const rows = [];
82
+ for (const rec of records) {
83
+ if (typeof rec.key === "string" && typeof rec.value === "string") {
84
+ rows.push({ key: rec.key, value: rec.value });
85
+ }
86
+ }
87
+ return rows;
88
+ }
89
+ /**
90
+ * Read the global Cursor state.vscdb read-only and return raw rows for the pure
91
+ * discovery layer. Throws {@link CursorReaderError} with a stable `code` for the
92
+ * three expected failure modes so callers can render friendly guidance.
93
+ */
94
+ export function readCursorGlobalState(options = {}) {
95
+ const cursorDir = options.cursorDir ?? defaultCursorDir();
96
+ const dbPath = globalStatePath(cursorDir);
97
+ if (!fs.existsSync(dbPath)) {
98
+ throw new CursorReaderError(`Cursor state.vscdb not found at ${dbPath}`, "db_missing");
99
+ }
100
+ if (!sqlite3Available()) {
101
+ throw new CursorReaderError("The `sqlite3` binary is required to read Cursor's store but was not found on PATH.", "sqlite_missing");
102
+ }
103
+ // Session index (ItemTable). Single, small row.
104
+ const itemTable = toKvRows(selectJson(dbPath, "SELECT key, value FROM ItemTable WHERE key = 'composer.composerHeaders'"));
105
+ // Per-session metadata (cursorDiskKV). Small; never tokens/secrets.
106
+ const composerData = toKvRows(selectJson(dbPath, "SELECT key, value FROM cursorDiskKV WHERE key LIKE 'composerData:%'"));
107
+ const cursorDiskKV = [...composerData];
108
+ if (options.includeBubbles) {
109
+ cursorDiskKV.push(...toKvRows(selectJson(dbPath, "SELECT key, value FROM cursorDiskKV WHERE key LIKE 'bubbleId:%'")));
110
+ }
111
+ return { dbPath, rows: { itemTable, cursorDiskKV } };
112
+ }
113
+ //# sourceMappingURL=vscdb-reader.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"vscdb-reader.js","sourceRoot":"","sources":["../../../../src/ingestion/cursor/vscdb-reader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,IAAI,MAAM,WAAW,CAAC;AAI7B,2EAA2E;AAC3E,MAAM,UAAU,gBAAgB;IAC9B,OAAO,IAAI,CAAC,IAAI,CACd,EAAE,CAAC,OAAO,EAAE,EACZ,SAAS,EACT,qBAAqB,EACrB,QAAQ,CACT,CAAC;AACJ,CAAC;AAED,mEAAmE;AACnE,MAAM,UAAU,eAAe,CAAC,SAAiB;IAC/C,OAAO,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,MAAM,EAAE,eAAe,EAAE,aAAa,CAAC,CAAC;AACtE,CAAC;AASD,MAAM,OAAO,iBAAkB,SAAQ,KAAK;IAG/B;IAFX,YACE,OAAe,EACN,IAGS;QAElB,KAAK,CAAC,OAAO,CAAC,CAAC;QALN,SAAI,GAAJ,IAAI,CAGK;QAGlB,IAAI,CAAC,IAAI,GAAG,mBAAmB,CAAC;IAClC,CAAC;CACF;AAED,SAAS,gBAAgB;IACvB,IAAI,CAAC;QACH,YAAY,CAAC,SAAS,EAAE,CAAC,WAAW,CAAC,EAAE,EAAE,KAAK,EAAE,CAAC,QAAQ,EAAE,QAAQ,EAAE,QAAQ,CAAC,EAAE,CAAC,CAAC;QAClF,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,SAAS,UAAU,CAAC,MAAc,EAAE,GAAW;IAC7C,MAAM,GAAG,GAAG,QAAQ,MAAM,cAAc,CAAC;IACzC,IAAI,MAAc,CAAC;IACnB,IAAI,CAAC;QACH,MAAM,GAAG,YAAY,CAAC,SAAS,EAAE,CAAC,MAAM,EAAE,YAAY,EAAE,GAAG,EAAE,GAAG,CAAC,EAAE;YACjE,QAAQ,EAAE,OAAO;YACjB,SAAS,EAAE,GAAG,GAAG,IAAI,GAAG,IAAI,EAAE,8BAA8B;SAC7D,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,IAAI,iBAAiB,CACzB,yBAAyB,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,EAC3E,cAAc,CACf,CAAC;IACJ,CAAC;IACD,MAAM,OAAO,GAAG,MAAM,CAAC,IAAI,EAAE,CAAC;IAC9B,IAAI,CAAC,OAAO;QAAE,OAAO,EAAE,CAAC;IACxB,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,KAAK,CAAC,OAAO,CAAkC,CAAC;IAC9D,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,IAAI,iBAAiB,CACzB,wCAAwC,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,EAC1F,cAAc,CACf,CAAC;IACJ,CAAC;AACH,CAAC;AAED,SAAS,QAAQ,CAAC,OAAsC;IACtD,MAAM,IAAI,GAAkB,EAAE,CAAC;IAC/B,KAAK,MAAM,GAAG,IAAI,OAAO,EAAE,CAAC;QAC1B,IAAI,OAAO,GAAG,CAAC,GAAG,KAAK,QAAQ,IAAI,OAAO,GAAG,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;YACjE,IAAI,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,GAAG,CAAC,GAAG,EAAE,KAAK,EAAE,GAAG,CAAC,KAAK,EAAE,CAAC,CAAC;QAChD,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAaD;;;;GAIG;AACH,MAAM,UAAU,qBAAqB,CAAC,UAAuB,EAAE;IAC7D,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,gBAAgB,EAAE,CAAC;IAC1D,MAAM,MAAM,GAAG,eAAe,CAAC,SAAS,CAAC,CAAC;IAE1C,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC;QAC3B,MAAM,IAAI,iBAAiB,CACzB,mCAAmC,MAAM,EAAE,EAC3C,YAAY,CACb,CAAC;IACJ,CAAC;IACD,IAAI,CAAC,gBAAgB,EAAE,EAAE,CAAC;QACxB,MAAM,IAAI,iBAAiB,CACzB,oFAAoF,EACpF,gBAAgB,CACjB,CAAC;IACJ,CAAC;IAED,gDAAgD;IAChD,MAAM,SAAS,GAAG,QAAQ,CACxB,UAAU,CACR,MAAM,EACN,yEAAyE,CAC1E,CACF,CAAC;IAEF,oEAAoE;IACpE,MAAM,YAAY,GAAG,QAAQ,CAC3B,UAAU,CACR,MAAM,EACN,qEAAqE,CACtE,CACF,CAAC;IAEF,MAAM,YAAY,GAAG,CAAC,GAAG,YAAY,CAAC,CAAC;IAEvC,IAAI,OAAO,CAAC,cAAc,EAAE,CAAC;QAC3B,YAAY,CAAC,IAAI,CACf,GAAG,QAAQ,CACT,UAAU,CACR,MAAM,EACN,iEAAiE,CAClE,CACF,CACF,CAAC;IACJ,CAAC;IAED,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,EAAE,SAAS,EAAE,YAAY,EAAE,EAAE,CAAC;AACvD,CAAC"}
@@ -0,0 +1,96 @@
1
+ /** Sentinel `workspaceIdentifier.id` Cursor uses when no folder is open. */
2
+ export declare const EMPTY_WINDOW_ID = "empty-window";
3
+ /** The `uri` object Cursor embeds in a folder-bound composer header. */
4
+ export interface CursorWorkspaceUri {
5
+ scheme?: string;
6
+ fsPath?: string;
7
+ path?: string;
8
+ external?: string;
9
+ }
10
+ /** A composer header's `workspaceIdentifier` (id + optional inline uri). */
11
+ export interface CursorWorkspaceIdentifier {
12
+ id?: string;
13
+ uri?: CursorWorkspaceUri | null;
14
+ }
15
+ /**
16
+ * Outcome of resolving a workspace identifier to a project root. Every
17
+ * non-`resolved` status is a deliberate, named outcome — the capture slice must
18
+ * branch on these rather than ever fabricate a root.
19
+ */
20
+ export type ProjectRootResolution = {
21
+ status: "resolved";
22
+ /** Absolute filesystem path of the project root. */
23
+ root: string;
24
+ /** Which link produced the root. */
25
+ source: "header_uri" | "workspace_json";
26
+ workspaceId: string | null;
27
+ }
28
+ /** No folder open (`empty-window`). Expected; not attributable. */
29
+ | {
30
+ status: "empty_window";
31
+ workspaceId: "empty-window";
32
+ }
33
+ /** Multi-root `.code-workspace`; root ambiguous. */
34
+ | {
35
+ status: "multi_root";
36
+ workspaceId: string;
37
+ configPath: string | null;
38
+ }
39
+ /** Non-`file` scheme (remote/ssh/wsl/devcontainer); not a local path. */
40
+ | {
41
+ status: "remote";
42
+ workspaceId: string | null;
43
+ scheme: string;
44
+ }
45
+ /** Id present but no inline uri and no readable `workspace.json`. */
46
+ | {
47
+ status: "unresolved";
48
+ workspaceId: string | null;
49
+ };
50
+ /**
51
+ * Parse a `workspace.json` file's text. Returns the folder root (single-folder
52
+ * window), a multi-root marker (`.code-workspace`), or unknown. Pure.
53
+ */
54
+ export declare function parseWorkspaceJson(text: string): {
55
+ kind: "folder";
56
+ scheme: string | null;
57
+ root: string | null;
58
+ } | {
59
+ kind: "multi_root";
60
+ configPath: string | null;
61
+ } | {
62
+ kind: "unknown";
63
+ };
64
+ /** Injected dependencies for the pure resolver. */
65
+ export interface ResolveOptions {
66
+ /**
67
+ * Returns the raw text of `workspaceStorage/<id>/workspace.json`, or null if
68
+ * absent. Injected so the resolver stays pure and fully fixture-testable.
69
+ * Used only as a fallback when the header carries no inline uri.
70
+ */
71
+ readWorkspaceJson?: (workspaceId: string) => string | null;
72
+ }
73
+ /**
74
+ * Resolve a composer header's `workspaceIdentifier` to a project root. Pure:
75
+ * the only I/O is the injected {@link ResolveOptions.readWorkspaceJson}. Tries
76
+ * the inline header uri first (current builds), then the on-disk
77
+ * `workspace.json` join (legacy builds / uri stripped). Never guesses — every
78
+ * non-resolvable case maps to an explicit status.
79
+ */
80
+ export declare function resolveProjectRoot(identifier: CursorWorkspaceIdentifier | null | undefined, options?: ResolveOptions): ProjectRootResolution;
81
+ /** Path to a workspace's `workspace.json` under a Cursor app-support dir. */
82
+ export declare function workspaceJsonPath(cursorDir: string, workspaceId: string): string;
83
+ /** Read `workspace.json` text read-only; null if it does not exist / unreadable. */
84
+ export declare function readWorkspaceJsonFromDisk(cursorDir: string, workspaceId: string): string | null;
85
+ /**
86
+ * Filesystem-backed resolution: resolve a header's identifier against a real
87
+ * Cursor app-support dir, reading `workspace.json` read-only when needed.
88
+ */
89
+ export declare function resolveCursorProjectRoot(identifier: CursorWorkspaceIdentifier | null | undefined, cursorDir: string): ProjectRootResolution;
90
+ /**
91
+ * Resolve a project root from a workspace id alone (no inline uri available).
92
+ * Used by the probe to upgrade `unresolved` discovery candidates on legacy
93
+ * builds via the on-disk `workspace.json` join.
94
+ */
95
+ export declare function resolveProjectRootByWorkspaceId(cursorDir: string, workspaceId: string): ProjectRootResolution;
96
+ //# sourceMappingURL=workspace-root.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"workspace-root.d.ts","sourceRoot":"","sources":["../../../../src/ingestion/cursor/workspace-root.ts"],"names":[],"mappings":"AAsDA,4EAA4E;AAC5E,eAAO,MAAM,eAAe,iBAAiB,CAAC;AAE9C,wEAAwE;AACxE,MAAM,WAAW,kBAAkB;IACjC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,4EAA4E;AAC5E,MAAM,WAAW,yBAAyB;IACxC,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,GAAG,CAAC,EAAE,kBAAkB,GAAG,IAAI,CAAC;CACjC;AAED;;;;GAIG;AACH,MAAM,MAAM,qBAAqB,GAC7B;IACE,MAAM,EAAE,UAAU,CAAC;IACnB,oDAAoD;IACpD,IAAI,EAAE,MAAM,CAAC;IACb,oCAAoC;IACpC,MAAM,EAAE,YAAY,GAAG,gBAAgB,CAAC;IACxC,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;CAC5B;AACH,mEAAmE;GACjE;IAAE,MAAM,EAAE,cAAc,CAAC;IAAC,WAAW,EAAE,cAAc,CAAA;CAAE;AACzD,oDAAoD;GAClD;IAAE,MAAM,EAAE,YAAY,CAAC;IAAC,WAAW,EAAE,MAAM,CAAC;IAAC,UAAU,EAAE,MAAM,GAAG,IAAI,CAAA;CAAE;AAC1E,yEAAyE;GACvE;IAAE,MAAM,EAAE,QAAQ,CAAC;IAAC,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE;AAClE,qEAAqE;GACnE;IAAE,MAAM,EAAE,YAAY,CAAC;IAAC,WAAW,EAAE,MAAM,GAAG,IAAI,CAAA;CAAE,CAAC;AAoCzD;;;GAGG;AACH,wBAAgB,kBAAkB,CAChC,IAAI,EAAE,MAAM,GAEV;IAAE,IAAI,EAAE,QAAQ,CAAC;IAAC,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAA;CAAE,GAC9D;IAAE,IAAI,EAAE,YAAY,CAAC;IAAC,UAAU,EAAE,MAAM,GAAG,IAAI,CAAA;CAAE,GACjD;IAAE,IAAI,EAAE,SAAS,CAAA;CAAE,CAkBtB;AAED,mDAAmD;AACnD,MAAM,WAAW,cAAc;IAC7B;;;;OAIG;IACH,iBAAiB,CAAC,EAAE,CAAC,WAAW,EAAE,MAAM,KAAK,MAAM,GAAG,IAAI,CAAC;CAC5D;AAED;;;;;;GAMG;AACH,wBAAgB,kBAAkB,CAChC,UAAU,EAAE,yBAAyB,GAAG,IAAI,GAAG,SAAS,EACxD,OAAO,GAAE,cAAmB,GAC3B,qBAAqB,CAqCvB;AAED,6EAA6E;AAC7E,wBAAgB,iBAAiB,CAAC,SAAS,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,GAAG,MAAM,CAEhF;AAED,oFAAoF;AACpF,wBAAgB,yBAAyB,CACvC,SAAS,EAAE,MAAM,EACjB,WAAW,EAAE,MAAM,GAClB,MAAM,GAAG,IAAI,CAMf;AAED;;;GAGG;AACH,wBAAgB,wBAAwB,CACtC,UAAU,EAAE,yBAAyB,GAAG,IAAI,GAAG,SAAS,EACxD,SAAS,EAAE,MAAM,GAChB,qBAAqB,CAIvB;AAED;;;;GAIG;AACH,wBAAgB,+BAA+B,CAC7C,SAAS,EAAE,MAAM,EACjB,WAAW,EAAE,MAAM,GAClB,qBAAqB,CAEvB"}
@@ -0,0 +1,187 @@
1
+ /**
2
+ * Resolve a native Cursor session's `workspaceIdentifier` to a filesystem
3
+ * project root — the missing half of Cursor project-root attribution.
4
+ *
5
+ * DISCOVERY SLICE ONLY. Read-only, no LanceDB writes, no daemon, no capture.
6
+ * The pure resolver ({@link resolveProjectRoot}) takes an injected
7
+ * `readWorkspaceJson` so it is exhaustively unit-testable from synthetic
8
+ * metadata with no filesystem and no real Cursor install. The fs-backed wrapper
9
+ * ({@link resolveCursorProjectRoot}) only reads `workspace.json` (a tiny
10
+ * `{ "folder": "file://…" }` file) — never the workspace's `state.vscdb`,
11
+ * never tokens, never message content.
12
+ *
13
+ * ## The join contract (empirically verified, macOS Cursor "glass", 2026-06)
14
+ *
15
+ * On opening a folder window, Cursor writes the composer header's
16
+ * `workspaceIdentifier` with TWO independent links to the project root:
17
+ *
18
+ * "workspaceIdentifier": {
19
+ * "id": "<workspaceStorageHash>", // e.g. "11ed9332b5bd79fd69ad93a7636abd28"
20
+ * "uri": { "scheme": "file",
21
+ * "fsPath": "/Users/me/projects/foo",
22
+ * "path": "/Users/me/projects/foo",
23
+ * "external": "file:///Users/me/projects/foo" }
24
+ * }
25
+ *
26
+ * 1. INLINE (primary, current builds): `uri.fsPath` IS the project root.
27
+ * No disk lookup needed.
28
+ * 2. ON-DISK (authoritative fallback): `id` equals the directory name under
29
+ * `~/Library/Application Support/Cursor/User/workspaceStorage/<id>/`,
30
+ * whose `workspace.json` holds `{ "folder": "file://<root>" }`. Verified:
31
+ * the same hash appears as both `workspaceIdentifier.id` and the storage
32
+ * dir name, and its `workspace.json.folder` matches `uri.fsPath`.
33
+ *
34
+ * The storage hash is NOT a recomputable md5 of the folder URI (checked: no
35
+ * simple encoding reproduces it) — so resolution MUST read `workspace.json` (or
36
+ * trust the inline uri), never re-derive the hash.
37
+ *
38
+ * ## Failure modes (all first-class — never guessed)
39
+ * - `id === "empty-window"` and no `uri` → no folder open. NOT attributable.
40
+ * This is the common case on a machine with only empty windows.
41
+ * - `workspace.json` has `"workspace": "…code-workspace"` (multi-root) instead
42
+ * of `"folder"` → project root is ambiguous. Do not pick one.
43
+ * - `uri.scheme` / `folder` scheme is not `file` (ssh-remote, wsl, devcontainer)
44
+ * → not a local path.
45
+ * - `id` present but `workspace.json` missing/purged and no inline `uri`
46
+ * → unresolved.
47
+ *
48
+ * The capture slice should scope to a project root ONLY on `resolved`; for every
49
+ * other status, capture WITHOUT project scoping (or behind a user-confirmed
50
+ * fallback). See docs/cursor-capture-discovery.md.
51
+ */
52
+ import fs from "node:fs";
53
+ import path from "node:path";
54
+ /** Sentinel `workspaceIdentifier.id` Cursor uses when no folder is open. */
55
+ export const EMPTY_WINDOW_ID = "empty-window";
56
+ /** Parse a URI string into `{ scheme, path }`; `path` is null for non-file. */
57
+ function parseUriString(uri) {
58
+ let parsed;
59
+ try {
60
+ parsed = new URL(uri);
61
+ }
62
+ catch {
63
+ return { scheme: null, path: null };
64
+ }
65
+ const scheme = parsed.protocol.replace(/:$/, "");
66
+ if (scheme !== "file")
67
+ return { scheme, path: null };
68
+ // file:///abs/path -> decode percent-encoding (spaces, accents in client dirs)
69
+ return { scheme, path: decodeURIComponent(parsed.pathname) };
70
+ }
71
+ /** Derive `{ scheme, root }` from an inline header `uri` object. */
72
+ function rootFromInlineUri(uri) {
73
+ if (!uri || typeof uri !== "object")
74
+ return { scheme: null, root: null };
75
+ const scheme = typeof uri.scheme === "string" ? uri.scheme : null;
76
+ if (scheme && scheme !== "file")
77
+ return { scheme, root: null };
78
+ // fsPath / path are already decoded absolute paths in Cursor's serialization.
79
+ const direct = (typeof uri.fsPath === "string" && uri.fsPath) ||
80
+ (typeof uri.path === "string" && uri.path) ||
81
+ null;
82
+ if (direct)
83
+ return { scheme: scheme ?? "file", root: direct };
84
+ if (typeof uri.external === "string") {
85
+ const ext = parseUriString(uri.external);
86
+ return { scheme: ext.scheme ?? scheme, root: ext.path };
87
+ }
88
+ return { scheme, root: null };
89
+ }
90
+ /**
91
+ * Parse a `workspace.json` file's text. Returns the folder root (single-folder
92
+ * window), a multi-root marker (`.code-workspace`), or unknown. Pure.
93
+ */
94
+ export function parseWorkspaceJson(text) {
95
+ let obj;
96
+ try {
97
+ obj = JSON.parse(text);
98
+ }
99
+ catch {
100
+ return { kind: "unknown" };
101
+ }
102
+ if (!obj || typeof obj !== "object")
103
+ return { kind: "unknown" };
104
+ const rec = obj;
105
+ if (typeof rec.folder === "string") {
106
+ const { scheme, path: p } = parseUriString(rec.folder);
107
+ return { kind: "folder", scheme, root: p };
108
+ }
109
+ if (typeof rec.workspace === "string") {
110
+ const { path: p } = parseUriString(rec.workspace);
111
+ return { kind: "multi_root", configPath: p };
112
+ }
113
+ return { kind: "unknown" };
114
+ }
115
+ /**
116
+ * Resolve a composer header's `workspaceIdentifier` to a project root. Pure:
117
+ * the only I/O is the injected {@link ResolveOptions.readWorkspaceJson}. Tries
118
+ * the inline header uri first (current builds), then the on-disk
119
+ * `workspace.json` join (legacy builds / uri stripped). Never guesses — every
120
+ * non-resolvable case maps to an explicit status.
121
+ */
122
+ export function resolveProjectRoot(identifier, options = {}) {
123
+ const id = typeof identifier?.id === "string" ? identifier.id : null;
124
+ // 1. Sentinel: no folder open. Common, expected, not attributable.
125
+ if (id === EMPTY_WINDOW_ID) {
126
+ return { status: "empty_window", workspaceId: EMPTY_WINDOW_ID };
127
+ }
128
+ // 2. Inline uri (Cursor embeds the folder directly on the header).
129
+ const inline = rootFromInlineUri(identifier?.uri);
130
+ if (inline.scheme && inline.scheme !== "file") {
131
+ return { status: "remote", workspaceId: id, scheme: inline.scheme };
132
+ }
133
+ if (inline.root) {
134
+ return { status: "resolved", root: inline.root, source: "header_uri", workspaceId: id };
135
+ }
136
+ // 3. On-disk fallback: join id -> workspaceStorage/<id>/workspace.json.
137
+ if (id && options.readWorkspaceJson) {
138
+ const text = options.readWorkspaceJson(id);
139
+ if (text) {
140
+ const parsed = parseWorkspaceJson(text);
141
+ if (parsed.kind === "folder") {
142
+ if (parsed.scheme && parsed.scheme !== "file") {
143
+ return { status: "remote", workspaceId: id, scheme: parsed.scheme };
144
+ }
145
+ if (parsed.root) {
146
+ return { status: "resolved", root: parsed.root, source: "workspace_json", workspaceId: id };
147
+ }
148
+ }
149
+ else if (parsed.kind === "multi_root") {
150
+ return { status: "multi_root", workspaceId: id, configPath: parsed.configPath };
151
+ }
152
+ }
153
+ }
154
+ // 4. Nothing reliable — do not guess.
155
+ return { status: "unresolved", workspaceId: id };
156
+ }
157
+ /** Path to a workspace's `workspace.json` under a Cursor app-support dir. */
158
+ export function workspaceJsonPath(cursorDir, workspaceId) {
159
+ return path.join(cursorDir, "User", "workspaceStorage", workspaceId, "workspace.json");
160
+ }
161
+ /** Read `workspace.json` text read-only; null if it does not exist / unreadable. */
162
+ export function readWorkspaceJsonFromDisk(cursorDir, workspaceId) {
163
+ try {
164
+ return fs.readFileSync(workspaceJsonPath(cursorDir, workspaceId), "utf-8");
165
+ }
166
+ catch {
167
+ return null;
168
+ }
169
+ }
170
+ /**
171
+ * Filesystem-backed resolution: resolve a header's identifier against a real
172
+ * Cursor app-support dir, reading `workspace.json` read-only when needed.
173
+ */
174
+ export function resolveCursorProjectRoot(identifier, cursorDir) {
175
+ return resolveProjectRoot(identifier, {
176
+ readWorkspaceJson: (workspaceId) => readWorkspaceJsonFromDisk(cursorDir, workspaceId),
177
+ });
178
+ }
179
+ /**
180
+ * Resolve a project root from a workspace id alone (no inline uri available).
181
+ * Used by the probe to upgrade `unresolved` discovery candidates on legacy
182
+ * builds via the on-disk `workspace.json` join.
183
+ */
184
+ export function resolveProjectRootByWorkspaceId(cursorDir, workspaceId) {
185
+ return resolveCursorProjectRoot({ id: workspaceId }, cursorDir);
186
+ }
187
+ //# sourceMappingURL=workspace-root.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"workspace-root.js","sourceRoot":"","sources":["../../../../src/ingestion/cursor/workspace-root.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkDG;AACH,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,IAAI,MAAM,WAAW,CAAC;AAE7B,4EAA4E;AAC5E,MAAM,CAAC,MAAM,eAAe,GAAG,cAAc,CAAC;AAuC9C,+EAA+E;AAC/E,SAAS,cAAc,CAAC,GAAW;IACjC,IAAI,MAAW,CAAC;IAChB,IAAI,CAAC;QACH,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;IACxB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IACtC,CAAC;IACD,MAAM,MAAM,GAAG,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;IACjD,IAAI,MAAM,KAAK,MAAM;QAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IACrD,+EAA+E;IAC/E,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,kBAAkB,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;AAC/D,CAAC;AAED,oEAAoE;AACpE,SAAS,iBAAiB,CACxB,GAA0C;IAE1C,IAAI,CAAC,GAAG,IAAI,OAAO,GAAG,KAAK,QAAQ;QAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IACzE,MAAM,MAAM,GAAG,OAAO,GAAG,CAAC,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC;IAClE,IAAI,MAAM,IAAI,MAAM,KAAK,MAAM;QAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IAC/D,8EAA8E;IAC9E,MAAM,MAAM,GACV,CAAC,OAAO,GAAG,CAAC,MAAM,KAAK,QAAQ,IAAI,GAAG,CAAC,MAAM,CAAC;QAC9C,CAAC,OAAO,GAAG,CAAC,IAAI,KAAK,QAAQ,IAAI,GAAG,CAAC,IAAI,CAAC;QAC1C,IAAI,CAAC;IACP,IAAI,MAAM;QAAE,OAAO,EAAE,MAAM,EAAE,MAAM,IAAI,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC;IAC9D,IAAI,OAAO,GAAG,CAAC,QAAQ,KAAK,QAAQ,EAAE,CAAC;QACrC,MAAM,GAAG,GAAG,cAAc,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACzC,OAAO,EAAE,MAAM,EAAE,GAAG,CAAC,MAAM,IAAI,MAAM,EAAE,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,CAAC;IAC1D,CAAC;IACD,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;AAChC,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,kBAAkB,CAChC,IAAY;IAKZ,IAAI,GAAY,CAAC;IACjB,IAAI,CAAC;QACH,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACzB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;IAC7B,CAAC;IACD,IAAI,CAAC,GAAG,IAAI,OAAO,GAAG,KAAK,QAAQ;QAAE,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;IAChE,MAAM,GAAG,GAAG,GAA8B,CAAC;IAC3C,IAAI,OAAO,GAAG,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;QACnC,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,EAAE,GAAG,cAAc,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QACvD,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,EAAE,CAAC;IAC7C,CAAC;IACD,IAAI,OAAO,GAAG,CAAC,SAAS,KAAK,QAAQ,EAAE,CAAC;QACtC,MAAM,EAAE,IAAI,EAAE,CAAC,EAAE,GAAG,cAAc,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QAClD,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,UAAU,EAAE,CAAC,EAAE,CAAC;IAC/C,CAAC;IACD,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;AAC7B,CAAC;AAYD;;;;;;GAMG;AACH,MAAM,UAAU,kBAAkB,CAChC,UAAwD,EACxD,UAA0B,EAAE;IAE5B,MAAM,EAAE,GAAG,OAAO,UAAU,EAAE,EAAE,KAAK,QAAQ,CAAC,CAAC,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;IAErE,mEAAmE;IACnE,IAAI,EAAE,KAAK,eAAe,EAAE,CAAC;QAC3B,OAAO,EAAE,MAAM,EAAE,cAAc,EAAE,WAAW,EAAE,eAAe,EAAE,CAAC;IAClE,CAAC;IAED,mEAAmE;IACnE,MAAM,MAAM,GAAG,iBAAiB,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC;IAClD,IAAI,MAAM,CAAC,MAAM,IAAI,MAAM,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;QAC9C,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,WAAW,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,CAAC;IACtE,CAAC;IACD,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;QAChB,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,IAAI,EAAE,MAAM,CAAC,IAAI,EAAE,MAAM,EAAE,YAAY,EAAE,WAAW,EAAE,EAAE,EAAE,CAAC;IAC1F,CAAC;IAED,wEAAwE;IACxE,IAAI,EAAE,IAAI,OAAO,CAAC,iBAAiB,EAAE,CAAC;QACpC,MAAM,IAAI,GAAG,OAAO,CAAC,iBAAiB,CAAC,EAAE,CAAC,CAAC;QAC3C,IAAI,IAAI,EAAE,CAAC;YACT,MAAM,MAAM,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAC;YACxC,IAAI,MAAM,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;gBAC7B,IAAI,MAAM,CAAC,MAAM,IAAI,MAAM,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;oBAC9C,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,WAAW,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,CAAC;gBACtE,CAAC;gBACD,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;oBAChB,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,IAAI,EAAE,MAAM,CAAC,IAAI,EAAE,MAAM,EAAE,gBAAgB,EAAE,WAAW,EAAE,EAAE,EAAE,CAAC;gBAC9F,CAAC;YACH,CAAC;iBAAM,IAAI,MAAM,CAAC,IAAI,KAAK,YAAY,EAAE,CAAC;gBACxC,OAAO,EAAE,MAAM,EAAE,YAAY,EAAE,WAAW,EAAE,EAAE,EAAE,UAAU,EAAE,MAAM,CAAC,UAAU,EAAE,CAAC;YAClF,CAAC;QACH,CAAC;IACH,CAAC;IAED,sCAAsC;IACtC,OAAO,EAAE,MAAM,EAAE,YAAY,EAAE,WAAW,EAAE,EAAE,EAAE,CAAC;AACnD,CAAC;AAED,6EAA6E;AAC7E,MAAM,UAAU,iBAAiB,CAAC,SAAiB,EAAE,WAAmB;IACtE,OAAO,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,MAAM,EAAE,kBAAkB,EAAE,WAAW,EAAE,gBAAgB,CAAC,CAAC;AACzF,CAAC;AAED,oFAAoF;AACpF,MAAM,UAAU,yBAAyB,CACvC,SAAiB,EACjB,WAAmB;IAEnB,IAAI,CAAC;QACH,OAAO,EAAE,CAAC,YAAY,CAAC,iBAAiB,CAAC,SAAS,EAAE,WAAW,CAAC,EAAE,OAAO,CAAC,CAAC;IAC7E,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,wBAAwB,CACtC,UAAwD,EACxD,SAAiB;IAEjB,OAAO,kBAAkB,CAAC,UAAU,EAAE;QACpC,iBAAiB,EAAE,CAAC,WAAW,EAAE,EAAE,CAAC,yBAAyB,CAAC,SAAS,EAAE,WAAW,CAAC;KACtF,CAAC,CAAC;AACL,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,+BAA+B,CAC7C,SAAiB,EACjB,WAAmB;IAEnB,OAAO,wBAAwB,CAAC,EAAE,EAAE,EAAE,WAAW,EAAE,EAAE,SAAS,CAAC,CAAC;AAClE,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"indexer.d.ts","sourceRoot":"","sources":["../../../src/ingestion/indexer.ts"],"names":[],"mappings":"AAQA,OAAO,KAAK,EAAE,iBAAiB,EAAE,UAAU,EAAE,WAAW,EAAE,MAAM,uBAAuB,CAAC;AASxF,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,KAAK,GAAG,QAAQ,GAAG,QAAQ,CAAC;IAClC,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,aAAa;IAC5B,UAAU,EAAE,UAAU,CAAC;IACvB,WAAW,EAAE,WAAW,CAAC;IACzB,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,EAAE,SAAS,MAAM,EAAE,CAAC;IAChC,wEAAwE;IACxE,SAAS,CAAC,EAAE,iBAAiB,GAAG,uBAAuB,CAAC;IACxD;;;;;OAKG;IACH,OAAO,EAAE,MAAM,CAAC;CACjB;AAED;;;GAGG;AACH,wBAAgB,MAAM,CAAC,UAAU,EAAE,MAAM,GAAG,MAAM,CAEjD;AAED,qBAAa,OAAO;IAClB,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAoB;IAC9C,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAgB;gBAE3B,SAAS,EAAE,iBAAiB,EAAE,MAAM,EAAE,aAAa;IAK/D;;;;OAIG;IACG,WAAW,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC;YAcpC,YAAY;YAiEZ,YAAY;CAK3B"}
1
+ {"version":3,"file":"indexer.d.ts","sourceRoot":"","sources":["../../../src/ingestion/indexer.ts"],"names":[],"mappings":"AAQA,OAAO,KAAK,EAAE,iBAAiB,EAAE,UAAU,EAAE,WAAW,EAAE,MAAM,uBAAuB,CAAC;AAYxF,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,KAAK,GAAG,QAAQ,GAAG,QAAQ,CAAC;IAClC,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,aAAa;IAC5B,UAAU,EAAE,UAAU,CAAC;IACvB,WAAW,EAAE,WAAW,CAAC;IACzB,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,EAAE,SAAS,MAAM,EAAE,CAAC;IAChC,wEAAwE;IACxE,SAAS,CAAC,EAAE,iBAAiB,GAAG,uBAAuB,CAAC;IACxD;;;;;OAKG;IACH,OAAO,EAAE,MAAM,CAAC;CACjB;AAED;;;GAGG;AACH,wBAAgB,MAAM,CAAC,UAAU,EAAE,MAAM,GAAG,MAAM,CAEjD;AAED,qBAAa,OAAO;IAClB,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAoB;IAC9C,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAgB;gBAE3B,SAAS,EAAE,iBAAiB,EAAE,MAAM,EAAE,aAAa;IAK/D;;;;OAIG;IACG,WAAW,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC;YAcpC,YAAY;YAuEZ,YAAY;CAK3B"}
@@ -12,6 +12,9 @@ import { validatePath, validateUnlinkPath } from "../security/paths.js";
12
12
  import { writeSkipQuarantine } from "./skip-quarantine.js";
13
13
  import { recordEmbed } from "../observability/embedding-events.js";
14
14
  import { recordIndexWrite } from "../observability/index-events.js";
15
+ import { chunkText, chunkingEnabled } from "./chunk-text.js";
16
+ import { docChunkId, docChunkMetadata } from "./chunk-meta.js";
17
+ import { eqFilter } from "../storage/filter.js";
15
18
  /**
16
19
  * Deterministic row ID from the canonical source path.
17
20
  * Same file always gets the same ID, enabling upsert via delete+add.
@@ -48,10 +51,11 @@ export class Indexer {
48
51
  // strings with HTTP 400, and indexing an empty row produces nothing
49
52
  // searchable anyway. Quarantine the skip so it's visible (not stderr-only)
50
53
  // and remove any stale row from a prior good extraction of the same path.
54
+ const table = getTable(this.config.tableName ?? "structured_docs");
55
+ const sourcePathFilter = eqFilter("source_path", filePath);
51
56
  if (isBlank(doc.content)) {
52
- const id = fileId(filePath);
53
- const table = getTable(this.config.tableName ?? "structured_docs");
54
- await table.delete(`id = '${id}'`);
57
+ // Remove the whole prior set (single row or chunk set) for this path.
58
+ await table.delete(sourcePathFilter);
55
59
  await writeSkipQuarantine(this.config.dataDir, {
56
60
  reason: "empty_extracted_content",
57
61
  source_path: filePath,
@@ -59,41 +63,46 @@ export class Indexer {
59
63
  });
60
64
  return;
61
65
  }
62
- const embeddingVec = await recordEmbed(this.config.dataDir, this.embedding, {
63
- pipeline: this.config.sourceType === "filesystem_watched"
64
- ? "watcher"
65
- : "scheduled_scan",
66
- operation: "document_embedding",
67
- input_count: 1,
68
- }, () => this.embedding.embed(doc.content));
69
- const id = fileId(filePath);
70
- const table = getTable(this.config.tableName ?? "structured_docs");
71
- // Upsert: delete existing row (if any), then add new one.
72
- await table.delete(`id = '${id}'`);
73
- const row = {
74
- id,
75
- source_path: filePath,
76
- content: doc.content,
77
- embedding: embeddingVec,
78
- source_type: this.config.sourceType,
79
- source_scope: this.config.sourceScope,
80
- client_name: this.config.clientName,
81
- indexed_at: new Date().toISOString(),
82
- metadata: JSON.stringify(doc.metadata),
83
- };
66
+ const pipeline = this.config.sourceType === "filesystem_watched" ? "watcher" : "scheduled_scan";
67
+ const baseId = fileId(filePath);
68
+ // Chunk (flag-gated). chunkText short-circuits small docs to a single
69
+ // chunk, so a below-threshold doc — and every doc with the flag off —
70
+ // produces exactly one unmarked row, byte-identical to the prior behavior.
71
+ const chunks = chunkingEnabled() ? chunkText(doc.content) : [doc.content];
72
+ const count = chunks.length;
73
+ // Data-safety: embed the FULL new set BEFORE the destructive delete, so a
74
+ // failure leaves the old set intact (filesystem is the source of truth).
75
+ const indexedAt = new Date().toISOString();
76
+ const rows = [];
77
+ for (let i = 0; i < count; i++) {
78
+ const chunk = chunks[i];
79
+ const embeddingVec = await recordEmbed(this.config.dataDir, this.embedding, { pipeline, operation: "document_embedding", input_count: 1 }, () => this.embedding.embed(chunk));
80
+ rows.push({
81
+ id: count > 1 ? docChunkId(baseId, i) : baseId,
82
+ source_path: filePath,
83
+ content: chunk,
84
+ embedding: embeddingVec,
85
+ source_type: this.config.sourceType,
86
+ source_scope: this.config.sourceScope,
87
+ client_name: this.config.clientName,
88
+ indexed_at: indexedAt,
89
+ metadata: JSON.stringify(docChunkMetadata(doc.metadata, i, count)),
90
+ });
91
+ }
92
+ // Upsert by source_path: removes any prior chunk set AND any legacy
93
+ // single row for this file before writing the new set (no dup/stale chunks).
94
+ await table.delete(sourcePathFilter);
84
95
  await recordIndexWrite(this.config.dataDir, {
85
96
  table: this.config.tableName ?? "structured_docs",
86
- pipeline: this.config.sourceType === "filesystem_watched"
87
- ? "watcher"
88
- : "scheduled_scan",
97
+ pipeline,
89
98
  operation: "structured_doc_upsert",
90
- row_count: 1,
91
- }, () => table.add([row]));
99
+ row_count: rows.length,
100
+ }, () => table.add(rows));
92
101
  }
93
102
  async handleDelete(filePath) {
94
- const id = fileId(filePath);
95
103
  const table = getTable(this.config.tableName ?? "structured_docs");
96
- await table.delete(`id = '${id}'`);
104
+ // Delete by source_path so all chunks of the file are removed together.
105
+ await table.delete(eqFilter("source_path", filePath));
97
106
  }
98
107
  }
99
108
  function isBlank(s) {
@@ -1 +1 @@
1
- {"version":3,"file":"indexer.js","sourceRoot":"","sources":["../../../src/ingestion/indexer.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AACH,OAAO,MAAM,MAAM,aAAa,CAAC;AAGjC,OAAO,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAC;AAChD,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AACtD,OAAO,EAAE,YAAY,EAAE,kBAAkB,EAAE,MAAM,sBAAsB,CAAC;AACxE,OAAO,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAC;AAC3D,OAAO,EAAE,WAAW,EAAE,MAAM,sCAAsC,CAAC;AACnE,OAAO,EAAE,gBAAgB,EAAE,MAAM,kCAAkC,CAAC;AAuBpE;;;GAGG;AACH,MAAM,UAAU,MAAM,CAAC,UAAkB;IACvC,OAAO,MAAM,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;AACnF,CAAC;AAED,MAAM,OAAO,OAAO;IACD,SAAS,CAAoB;IAC7B,MAAM,CAAgB;IAEvC,YAAY,SAA4B,EAAE,MAAqB;QAC7D,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,WAAW,CAAC,KAAgB;QAChC,IAAI,KAAK,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YAC5B,MAAM,SAAS,GAAG,kBAAkB,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC;YAC3E,MAAM,IAAI,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC;YACnC,OAAO;QACT,CAAC;QAED,MAAM,SAAS,GAAG,YAAY,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC;QAErE,IAAI,CAAC,WAAW,CAAC,SAAS,CAAC;YAAE,OAAO;QAEpC,MAAM,IAAI,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC;IACrC,CAAC;IAEO,KAAK,CAAC,YAAY,CAAC,QAAgB;QACzC,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,QAAQ,CAAC,CAAC;QAEpC,0EAA0E;QAC1E,oEAAoE;QACpE,2EAA2E;QAC3E,0EAA0E;QAC1E,IAAI,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;YACzB,MAAM,EAAE,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC;YAC5B,MAAM,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,IAAI,iBAAiB,CAAC,CAAC;YACnE,MAAM,KAAK,CAAC,MAAM,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;YACnC,MAAM,mBAAmB,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE;gBAC7C,MAAM,EAAE,yBAAyB;gBACjC,WAAW,EAAE,QAAQ;gBACrB,QAAQ,EAAE,GAAG,CAAC,QAAQ;aACvB,CAAC,CAAC;YACH,OAAO;QACT,CAAC;QAED,MAAM,YAAY,GAAG,MAAM,WAAW,CACpC,IAAI,CAAC,MAAM,CAAC,OAAO,EACnB,IAAI,CAAC,SAAS,EACd;YACE,QAAQ,EACN,IAAI,CAAC,MAAM,CAAC,UAAU,KAAK,oBAAoB;gBAC7C,CAAC,CAAC,SAAS;gBACX,CAAC,CAAC,gBAAgB;YACtB,SAAS,EAAE,oBAAoB;YAC/B,WAAW,EAAE,CAAC;SACf,EACD,GAAG,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC,CACxC,CAAC;QACF,MAAM,EAAE,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC;QAC5B,MAAM,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,IAAI,iBAAiB,CAAC,CAAC;QAEnE,0DAA0D;QAC1D,MAAM,KAAK,CAAC,MAAM,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;QAEnC,MAAM,GAAG,GAAqB;YAC5B,EAAE;YACF,WAAW,EAAE,QAAQ;YACrB,OAAO,EAAE,GAAG,CAAC,OAAO;YACpB,SAAS,EAAE,YAAY;YACvB,WAAW,EAAE,IAAI,CAAC,MAAM,CAAC,UAAU;YACnC,YAAY,EAAE,IAAI,CAAC,MAAM,CAAC,WAAW;YACrC,WAAW,EAAE,IAAI,CAAC,MAAM,CAAC,UAAU;YACnC,UAAU,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YACpC,QAAQ,EAAE,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,QAAQ,CAAC;SACvC,CAAC;QAEF,MAAM,gBAAgB,CACpB,IAAI,CAAC,MAAM,CAAC,OAAO,EACnB;YACE,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS,IAAI,iBAAiB;YACjD,QAAQ,EACN,IAAI,CAAC,MAAM,CAAC,UAAU,KAAK,oBAAoB;gBAC7C,CAAC,CAAC,SAAS;gBACX,CAAC,CAAC,gBAAgB;YACtB,SAAS,EAAE,uBAAuB;YAClC,SAAS,EAAE,CAAC;SACb,EACD,GAAG,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CACvB,CAAC;IACJ,CAAC;IAEO,KAAK,CAAC,YAAY,CAAC,QAAgB;QACzC,MAAM,EAAE,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC;QAC5B,MAAM,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,IAAI,iBAAiB,CAAC,CAAC;QACnE,MAAM,KAAK,CAAC,MAAM,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;IACrC,CAAC;CACF;AAED,SAAS,OAAO,CAAC,CAAS;IACxB,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,CAAC;AAC/B,CAAC"}
1
+ {"version":3,"file":"indexer.js","sourceRoot":"","sources":["../../../src/ingestion/indexer.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AACH,OAAO,MAAM,MAAM,aAAa,CAAC;AAGjC,OAAO,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAC;AAChD,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AACtD,OAAO,EAAE,YAAY,EAAE,kBAAkB,EAAE,MAAM,sBAAsB,CAAC;AACxE,OAAO,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAC;AAC3D,OAAO,EAAE,WAAW,EAAE,MAAM,sCAAsC,CAAC;AACnE,OAAO,EAAE,gBAAgB,EAAE,MAAM,kCAAkC,CAAC;AACpE,OAAO,EAAE,SAAS,EAAE,eAAe,EAAE,MAAM,iBAAiB,CAAC;AAC7D,OAAO,EAAE,UAAU,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AAC/D,OAAO,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAC;AAuBhD;;;GAGG;AACH,MAAM,UAAU,MAAM,CAAC,UAAkB;IACvC,OAAO,MAAM,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;AACnF,CAAC;AAED,MAAM,OAAO,OAAO;IACD,SAAS,CAAoB;IAC7B,MAAM,CAAgB;IAEvC,YAAY,SAA4B,EAAE,MAAqB;QAC7D,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,WAAW,CAAC,KAAgB;QAChC,IAAI,KAAK,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YAC5B,MAAM,SAAS,GAAG,kBAAkB,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC;YAC3E,MAAM,IAAI,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC;YACnC,OAAO;QACT,CAAC;QAED,MAAM,SAAS,GAAG,YAAY,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC;QAErE,IAAI,CAAC,WAAW,CAAC,SAAS,CAAC;YAAE,OAAO;QAEpC,MAAM,IAAI,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC;IACrC,CAAC;IAEO,KAAK,CAAC,YAAY,CAAC,QAAgB;QACzC,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,QAAQ,CAAC,CAAC;QAEpC,0EAA0E;QAC1E,oEAAoE;QACpE,2EAA2E;QAC3E,0EAA0E;QAC1E,MAAM,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,IAAI,iBAAiB,CAAC,CAAC;QACnE,MAAM,gBAAgB,GAAG,QAAQ,CAAC,aAAa,EAAE,QAAQ,CAAC,CAAC;QAE3D,IAAI,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;YACzB,sEAAsE;YACtE,MAAM,KAAK,CAAC,MAAM,CAAC,gBAAgB,CAAC,CAAC;YACrC,MAAM,mBAAmB,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE;gBAC7C,MAAM,EAAE,yBAAyB;gBACjC,WAAW,EAAE,QAAQ;gBACrB,QAAQ,EAAE,GAAG,CAAC,QAAQ;aACvB,CAAC,CAAC;YACH,OAAO;QACT,CAAC;QAED,MAAM,QAAQ,GACZ,IAAI,CAAC,MAAM,CAAC,UAAU,KAAK,oBAAoB,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,gBAAgB,CAAC;QACjF,MAAM,MAAM,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC;QAEhC,sEAAsE;QACtE,sEAAsE;QACtE,2EAA2E;QAC3E,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QAC1E,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC;QAE5B,0EAA0E;QAC1E,yEAAyE;QACzE,MAAM,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QAC3C,MAAM,IAAI,GAAuB,EAAE,CAAC;QACpC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC;YAC/B,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAE,CAAC;YACzB,MAAM,YAAY,GAAG,MAAM,WAAW,CACpC,IAAI,CAAC,MAAM,CAAC,OAAO,EACnB,IAAI,CAAC,SAAS,EACd,EAAE,QAAQ,EAAE,SAAS,EAAE,oBAAoB,EAAE,WAAW,EAAE,CAAC,EAAE,EAC7D,GAAG,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,KAAK,CAAC,CAClC,CAAC;YACF,IAAI,CAAC,IAAI,CAAC;gBACR,EAAE,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM;gBAC9C,WAAW,EAAE,QAAQ;gBACrB,OAAO,EAAE,KAAK;gBACd,SAAS,EAAE,YAAY;gBACvB,WAAW,EAAE,IAAI,CAAC,MAAM,CAAC,UAAU;gBACnC,YAAY,EAAE,IAAI,CAAC,MAAM,CAAC,WAAW;gBACrC,WAAW,EAAE,IAAI,CAAC,MAAM,CAAC,UAAU;gBACnC,UAAU,EAAE,SAAS;gBACrB,QAAQ,EAAE,IAAI,CAAC,SAAS,CAAC,gBAAgB,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,EAAE,KAAK,CAAC,CAAC;aACnE,CAAC,CAAC;QACL,CAAC;QAED,oEAAoE;QACpE,6EAA6E;QAC7E,MAAM,KAAK,CAAC,MAAM,CAAC,gBAAgB,CAAC,CAAC;QACrC,MAAM,gBAAgB,CACpB,IAAI,CAAC,MAAM,CAAC,OAAO,EACnB;YACE,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS,IAAI,iBAAiB;YACjD,QAAQ;YACR,SAAS,EAAE,uBAAuB;YAClC,SAAS,EAAE,IAAI,CAAC,MAAM;SACvB,EACD,GAAG,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CACtB,CAAC;IACJ,CAAC;IAEO,KAAK,CAAC,YAAY,CAAC,QAAgB;QACzC,MAAM,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,IAAI,iBAAiB,CAAC,CAAC;QACnE,wEAAwE;QACxE,MAAM,KAAK,CAAC,MAAM,CAAC,QAAQ,CAAC,aAAa,EAAE,QAAQ,CAAC,CAAC,CAAC;IACxD,CAAC;CACF;AAED,SAAS,OAAO,CAAC,CAAS;IACxB,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,CAAC;AAC/B,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"compact.d.ts","sourceRoot":"","sources":["../../../../src/jobs/handlers/compact.ts"],"names":[],"mappings":"AAgCA,OAAO,KAAK,EAAO,UAAU,EAAE,MAAM,aAAa,CAAC;AACnD,OAAO,KAAK,EAAE,iBAAiB,EAAE,gBAAgB,EAAqB,MAAM,0BAA0B,CAAC;AACvG,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,4BAA4B,CAAC;AAC1D,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,4BAA4B,CAAC;AAQ1D,MAAM,WAAW,WAAW;IAC1B,iBAAiB,EAAE,iBAAiB,CAAC;IACrC,gBAAgB,EAAE,gBAAgB,CAAC;IACnC,OAAO,EAAE,MAAM,CAAC;IAChB,gBAAgB,EAAE,MAAM,CAAC;IACzB,QAAQ,EAAE,OAAO,EAAE,CAAC;IACpB,OAAO,EAAE,OAAO,GAAG,IAAI,CAAC;CACzB;AAED,MAAM,WAAW,aAAa;IAC5B,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,OAAO,CAAC;CACnB;AAED,MAAM,WAAW,gBAAgB;IAC/B,EAAE,EAAE,MAAM,CAAC;IACX,UAAU,EAAE,MAAM,CAAC;IACnB,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,SAAS,EAAE,MAAM,CAAC;IAClB,eAAe,EAAE,MAAM,CAAC;IACxB,QAAQ,EAAE,OAAO,CAAC;CACnB;AASD,4DAA4D;AAC5D,wBAAgB,mBAAmB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAE5D;AAMD,wBAAgB,oBAAoB,CAAC,IAAI,EAAE,WAAW,GAAG,UAAU,CAwBlE"}
1
+ {"version":3,"file":"compact.d.ts","sourceRoot":"","sources":["../../../../src/jobs/handlers/compact.ts"],"names":[],"mappings":"AAgCA,OAAO,KAAK,EAAO,UAAU,EAAE,MAAM,aAAa,CAAC;AACnD,OAAO,KAAK,EAAE,iBAAiB,EAAE,gBAAgB,EAAqB,MAAM,0BAA0B,CAAC;AACvG,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,4BAA4B,CAAC;AAC1D,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,4BAA4B,CAAC;AAS1D,MAAM,WAAW,WAAW;IAC1B,iBAAiB,EAAE,iBAAiB,CAAC;IACrC,gBAAgB,EAAE,gBAAgB,CAAC;IACnC,OAAO,EAAE,MAAM,CAAC;IAChB,gBAAgB,EAAE,MAAM,CAAC;IACzB,QAAQ,EAAE,OAAO,EAAE,CAAC;IACpB,OAAO,EAAE,OAAO,GAAG,IAAI,CAAC;CACzB;AAED,MAAM,WAAW,aAAa;IAC5B,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,OAAO,CAAC;CACnB;AAED,MAAM,WAAW,gBAAgB;IAC/B,EAAE,EAAE,MAAM,CAAC;IACX,UAAU,EAAE,MAAM,CAAC;IACnB,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,SAAS,EAAE,MAAM,CAAC;IAClB,eAAe,EAAE,MAAM,CAAC;IACxB,QAAQ,EAAE,OAAO,CAAC;CACnB;AASD,4DAA4D;AAC5D,wBAAgB,mBAAmB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAE5D;AAMD,wBAAgB,oBAAoB,CAAC,IAAI,EAAE,WAAW,GAAG,UAAU,CAwBlE"}
@@ -31,6 +31,7 @@ import crypto from "node:crypto";
31
31
  import fs from "node:fs";
32
32
  import path from "node:path";
33
33
  import { getTable } from "../../storage/tables.js";
34
+ import { eqFilter } from "../../storage/filter.js";
34
35
  import { atomicWrite } from "../../storage/atomic.js";
35
36
  import { recordEmbed } from "../../observability/embedding-events.js";
36
37
  import { recordIndexWrite } from "../../observability/index-events.js";
@@ -139,7 +140,7 @@ async function handleCompaction(deps, dryRun) {
139
140
  }, () => coldTable.add(toMove.map(cleanConversationRow)));
140
141
  // 2. Remove from hot
141
142
  for (const row of toMove) {
142
- await hotTable.delete(`id = '${row.id}'`);
143
+ await hotTable.delete(eqFilter("id", row.id, { validateAsRowId: true }));
143
144
  }
144
145
  // 3. Write raw digest artifact BEFORE table insert (crash safety:
145
146
  // if we crash after table insert but before raw write, reconcile
@@ -212,7 +213,7 @@ async function handleRollback(deps, dryRun) {
212
213
  for (const convId of manifest.conversation_ids) {
213
214
  const rows = (await coldTable
214
215
  .query()
215
- .where(`id = '${convId}'`)
216
+ .where(eqFilter("id", convId, { validateAsRowId: true }))
216
217
  .toArray());
217
218
  if (rows.length > 0) {
218
219
  await recordIndexWrite(deps.dataDir, {
@@ -221,11 +222,11 @@ async function handleRollback(deps, dryRun) {
221
222
  operation: "conversation_rollback_to_hot",
222
223
  row_count: rows.length,
223
224
  }, () => hotTable.add(rows.map(cleanConversationRow)));
224
- await coldTable.delete(`id = '${convId}'`);
225
+ await coldTable.delete(eqFilter("id", convId, { validateAsRowId: true }));
225
226
  }
226
227
  }
227
228
  // Remove the digest row from table
228
- await digestTable.delete(`id = '${manifest.digest_id}'`);
229
+ await digestTable.delete(eqFilter("id", manifest.digest_id, { validateAsRowId: true }));
229
230
  // Remove the raw digest file so reconcile cannot resurrect it
230
231
  if (manifest.digest_filename) {
231
232
  const digestPath = path.join(deps.dataDir, "raw", "digests", manifest.digest_filename);
@@ -341,6 +342,10 @@ function cleanConversationRow(row) {
341
342
  // basic/placeholder row stays a backfill target after compaction.
342
343
  metadata_provider: row.metadata_provider ?? "",
343
344
  embedding_provider: row.embedding_provider ?? "",
345
+ // Preserve chunk identity so a chunk set survives hot → cold relocation.
346
+ parent_id: row.parent_id ?? "",
347
+ chunk_index: row.chunk_index ?? "",
348
+ chunk_count: row.chunk_count ?? "",
344
349
  };
345
350
  }
346
351
  function safeParseArray(json) {