@getrift/rift 0.1.0-beta.21 → 0.1.0-beta.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (273) hide show
  1. package/README.md +7 -3
  2. package/dist/src/capture/auto-capture.d.ts +105 -4
  3. package/dist/src/capture/auto-capture.d.ts.map +1 -1
  4. package/dist/src/capture/auto-capture.js +313 -34
  5. package/dist/src/capture/auto-capture.js.map +1 -1
  6. package/dist/src/capture/claude-cli-triage-provider.d.ts +28 -0
  7. package/dist/src/capture/claude-cli-triage-provider.d.ts.map +1 -0
  8. package/dist/src/capture/claude-cli-triage-provider.js +88 -0
  9. package/dist/src/capture/claude-cli-triage-provider.js.map +1 -0
  10. package/dist/src/capture/codex-cli-triage-provider.d.ts.map +1 -1
  11. package/dist/src/capture/codex-cli-triage-provider.js +1 -33
  12. package/dist/src/capture/codex-cli-triage-provider.js.map +1 -1
  13. package/dist/src/capture/cursor-capture.d.ts +89 -0
  14. package/dist/src/capture/cursor-capture.d.ts.map +1 -0
  15. package/dist/src/capture/cursor-capture.js +121 -0
  16. package/dist/src/capture/cursor-capture.js.map +1 -0
  17. package/dist/src/capture/observability.d.ts +30 -0
  18. package/dist/src/capture/observability.d.ts.map +1 -1
  19. package/dist/src/capture/observability.js +29 -0
  20. package/dist/src/capture/observability.js.map +1 -1
  21. package/dist/src/capture/sources.d.ts +41 -3
  22. package/dist/src/capture/sources.d.ts.map +1 -1
  23. package/dist/src/capture/sources.js +43 -1
  24. package/dist/src/capture/sources.js.map +1 -1
  25. package/dist/src/capture/triage-classification.d.ts +69 -0
  26. package/dist/src/capture/triage-classification.d.ts.map +1 -0
  27. package/dist/src/capture/triage-classification.js +62 -0
  28. package/dist/src/capture/triage-classification.js.map +1 -0
  29. package/dist/src/capture/triage-provider-factory.d.ts +36 -0
  30. package/dist/src/capture/triage-provider-factory.d.ts.map +1 -0
  31. package/dist/src/capture/triage-provider-factory.js +55 -0
  32. package/dist/src/capture/triage-provider-factory.js.map +1 -0
  33. package/dist/src/capture/triage.d.ts +1 -1
  34. package/dist/src/capture/triage.d.ts.map +1 -1
  35. package/dist/src/capture/triage.js +8 -6
  36. package/dist/src/capture/triage.js.map +1 -1
  37. package/dist/src/cli/commands/capture.d.ts.map +1 -1
  38. package/dist/src/cli/commands/capture.js +72 -17
  39. package/dist/src/cli/commands/capture.js.map +1 -1
  40. package/dist/src/cli/commands/chunk-backfill.d.ts +13 -0
  41. package/dist/src/cli/commands/chunk-backfill.d.ts.map +1 -0
  42. package/dist/src/cli/commands/chunk-backfill.js +157 -0
  43. package/dist/src/cli/commands/chunk-backfill.js.map +1 -0
  44. package/dist/src/cli/commands/cursor-probe.d.ts +20 -0
  45. package/dist/src/cli/commands/cursor-probe.d.ts.map +1 -0
  46. package/dist/src/cli/commands/cursor-probe.js +162 -0
  47. package/dist/src/cli/commands/cursor-probe.js.map +1 -0
  48. package/dist/src/cli/commands/onboard.d.ts +22 -2
  49. package/dist/src/cli/commands/onboard.d.ts.map +1 -1
  50. package/dist/src/cli/commands/onboard.js +160 -32
  51. package/dist/src/cli/commands/onboard.js.map +1 -1
  52. package/dist/src/cli/commands/status.d.ts.map +1 -1
  53. package/dist/src/cli/commands/status.js +12 -0
  54. package/dist/src/cli/commands/status.js.map +1 -1
  55. package/dist/src/cli/commands/update.d.ts +34 -1
  56. package/dist/src/cli/commands/update.d.ts.map +1 -1
  57. package/dist/src/cli/commands/update.js +166 -1
  58. package/dist/src/cli/commands/update.js.map +1 -1
  59. package/dist/src/cli/index.d.ts.map +1 -1
  60. package/dist/src/cli/index.js +4 -0
  61. package/dist/src/cli/index.js.map +1 -1
  62. package/dist/src/cli/status/friend-header.d.ts +10 -0
  63. package/dist/src/cli/status/friend-header.d.ts.map +1 -1
  64. package/dist/src/cli/status/friend-header.js +117 -0
  65. package/dist/src/cli/status/friend-header.js.map +1 -1
  66. package/dist/src/cli/status/local-signals.d.ts +23 -0
  67. package/dist/src/cli/status/local-signals.d.ts.map +1 -1
  68. package/dist/src/cli/status/local-signals.js +19 -0
  69. package/dist/src/cli/status/local-signals.js.map +1 -1
  70. package/dist/src/config/schema.d.ts +220 -14
  71. package/dist/src/config/schema.d.ts.map +1 -1
  72. package/dist/src/config/schema.js +82 -7
  73. package/dist/src/config/schema.js.map +1 -1
  74. package/dist/src/diagnostics/claude-preflight.d.ts +34 -0
  75. package/dist/src/diagnostics/claude-preflight.d.ts.map +1 -0
  76. package/dist/src/diagnostics/claude-preflight.js +89 -0
  77. package/dist/src/diagnostics/claude-preflight.js.map +1 -0
  78. package/dist/src/diagnostics/codex-preflight.d.ts +1 -1
  79. package/dist/src/diagnostics/codex-preflight.d.ts.map +1 -1
  80. package/dist/src/diagnostics/codex-preflight.js +14 -0
  81. package/dist/src/diagnostics/codex-preflight.js.map +1 -1
  82. package/dist/src/diagnostics/doctor.d.ts +1 -1
  83. package/dist/src/diagnostics/doctor.d.ts.map +1 -1
  84. package/dist/src/diagnostics/doctor.js +39 -2
  85. package/dist/src/diagnostics/doctor.js.map +1 -1
  86. package/dist/src/ingestion/chunk-meta.d.ts +85 -0
  87. package/dist/src/ingestion/chunk-meta.d.ts.map +1 -0
  88. package/dist/src/ingestion/chunk-meta.js +167 -0
  89. package/dist/src/ingestion/chunk-meta.js.map +1 -0
  90. package/dist/src/ingestion/chunk-text.d.ts +39 -0
  91. package/dist/src/ingestion/chunk-text.d.ts.map +1 -0
  92. package/dist/src/ingestion/chunk-text.js +114 -0
  93. package/dist/src/ingestion/chunk-text.js.map +1 -0
  94. package/dist/src/ingestion/cursor/cursor-store.d.ts +177 -0
  95. package/dist/src/ingestion/cursor/cursor-store.d.ts.map +1 -0
  96. package/dist/src/ingestion/cursor/cursor-store.js +243 -0
  97. package/dist/src/ingestion/cursor/cursor-store.js.map +1 -0
  98. package/dist/src/ingestion/cursor/enrich-roots.d.ts +16 -0
  99. package/dist/src/ingestion/cursor/enrich-roots.d.ts.map +1 -0
  100. package/dist/src/ingestion/cursor/enrich-roots.js +22 -0
  101. package/dist/src/ingestion/cursor/enrich-roots.js.map +1 -0
  102. package/dist/src/ingestion/cursor/vscdb-reader.d.ts +32 -0
  103. package/dist/src/ingestion/cursor/vscdb-reader.d.ts.map +1 -0
  104. package/dist/src/ingestion/cursor/vscdb-reader.js +113 -0
  105. package/dist/src/ingestion/cursor/vscdb-reader.js.map +1 -0
  106. package/dist/src/ingestion/cursor/workspace-root.d.ts +96 -0
  107. package/dist/src/ingestion/cursor/workspace-root.d.ts.map +1 -0
  108. package/dist/src/ingestion/cursor/workspace-root.js +187 -0
  109. package/dist/src/ingestion/cursor/workspace-root.js.map +1 -0
  110. package/dist/src/ingestion/indexer.d.ts.map +1 -1
  111. package/dist/src/ingestion/indexer.js +41 -32
  112. package/dist/src/ingestion/indexer.js.map +1 -1
  113. package/dist/src/jobs/handlers/compact.d.ts.map +1 -1
  114. package/dist/src/jobs/handlers/compact.js +9 -4
  115. package/dist/src/jobs/handlers/compact.js.map +1 -1
  116. package/dist/src/jobs/handlers/ingest.d.ts.map +1 -1
  117. package/dist/src/jobs/handlers/ingest.js +60 -30
  118. package/dist/src/jobs/handlers/ingest.js.map +1 -1
  119. package/dist/src/jobs/handlers/reconcile.d.ts.map +1 -1
  120. package/dist/src/jobs/handlers/reconcile.js +128 -45
  121. package/dist/src/jobs/handlers/reconcile.js.map +1 -1
  122. package/dist/src/jobs/handlers/save.d.ts.map +1 -1
  123. package/dist/src/jobs/handlers/save.js +122 -72
  124. package/dist/src/jobs/handlers/save.js.map +1 -1
  125. package/dist/src/jobs/types.d.ts +1 -1
  126. package/dist/src/main.js +26 -15
  127. package/dist/src/main.js.map +1 -1
  128. package/dist/src/mcp/server.d.ts.map +1 -1
  129. package/dist/src/mcp/server.js +10 -3
  130. package/dist/src/mcp/server.js.map +1 -1
  131. package/dist/src/mcp/tools/context-pack.d.ts.map +1 -1
  132. package/dist/src/mcp/tools/context-pack.js +7 -1
  133. package/dist/src/mcp/tools/context-pack.js.map +1 -1
  134. package/dist/src/mcp/tools/conversations-search.d.ts +1 -1
  135. package/dist/src/mcp/tools/conversations-search.d.ts.map +1 -1
  136. package/dist/src/mcp/tools/conversations-search.js +7 -1
  137. package/dist/src/mcp/tools/conversations-search.js.map +1 -1
  138. package/dist/src/mcp/tools/evidence-feedback.d.ts +60 -0
  139. package/dist/src/mcp/tools/evidence-feedback.d.ts.map +1 -0
  140. package/dist/src/mcp/tools/evidence-feedback.js +62 -0
  141. package/dist/src/mcp/tools/evidence-feedback.js.map +1 -0
  142. package/dist/src/mcp/tools/log-outcome.d.ts +72 -0
  143. package/dist/src/mcp/tools/log-outcome.d.ts.map +1 -0
  144. package/dist/src/mcp/tools/log-outcome.js +59 -0
  145. package/dist/src/mcp/tools/log-outcome.js.map +1 -0
  146. package/dist/src/mcp/tools/open-evidence.d.ts +37 -0
  147. package/dist/src/mcp/tools/open-evidence.d.ts.map +1 -0
  148. package/dist/src/mcp/tools/open-evidence.js +72 -0
  149. package/dist/src/mcp/tools/open-evidence.js.map +1 -0
  150. package/dist/src/mcp/tools/save.d.ts +7 -2
  151. package/dist/src/mcp/tools/save.d.ts.map +1 -1
  152. package/dist/src/mcp/tools/save.js +7 -2
  153. package/dist/src/mcp/tools/save.js.map +1 -1
  154. package/dist/src/mcp/tools/search.d.ts.map +1 -1
  155. package/dist/src/mcp/tools/search.js +7 -1
  156. package/dist/src/mcp/tools/search.js.map +1 -1
  157. package/dist/src/observability/retrieval-feedback.d.ts +82 -0
  158. package/dist/src/observability/retrieval-feedback.d.ts.map +1 -0
  159. package/dist/src/observability/retrieval-feedback.js +231 -0
  160. package/dist/src/observability/retrieval-feedback.js.map +1 -0
  161. package/dist/src/observability/rift-context.d.ts.map +1 -1
  162. package/dist/src/observability/rift-context.js +3 -0
  163. package/dist/src/observability/rift-context.js.map +1 -1
  164. package/dist/src/observability/tool-usage-stats.d.ts +13 -0
  165. package/dist/src/observability/tool-usage-stats.d.ts.map +1 -1
  166. package/dist/src/observability/tool-usage-stats.js +15 -0
  167. package/dist/src/observability/tool-usage-stats.js.map +1 -1
  168. package/dist/src/observability/tool-usage.d.ts +56 -0
  169. package/dist/src/observability/tool-usage.d.ts.map +1 -1
  170. package/dist/src/observability/tool-usage.js +86 -0
  171. package/dist/src/observability/tool-usage.js.map +1 -1
  172. package/dist/src/providers/claude-cli-metadata-extraction.d.ts +47 -0
  173. package/dist/src/providers/claude-cli-metadata-extraction.d.ts.map +1 -0
  174. package/dist/src/providers/claude-cli-metadata-extraction.js +120 -0
  175. package/dist/src/providers/claude-cli-metadata-extraction.js.map +1 -0
  176. package/dist/src/providers/claude-cli-runner.d.ts +92 -0
  177. package/dist/src/providers/claude-cli-runner.d.ts.map +1 -0
  178. package/dist/src/providers/claude-cli-runner.js +598 -0
  179. package/dist/src/providers/claude-cli-runner.js.map +1 -0
  180. package/dist/src/providers/codex-cli-metadata-extraction.d.ts.map +1 -1
  181. package/dist/src/providers/codex-cli-metadata-extraction.js +1 -40
  182. package/dist/src/providers/codex-cli-metadata-extraction.js.map +1 -1
  183. package/dist/src/providers/codex-cli-runner.d.ts +7 -0
  184. package/dist/src/providers/codex-cli-runner.d.ts.map +1 -1
  185. package/dist/src/providers/codex-cli-runner.js +131 -5
  186. package/dist/src/providers/codex-cli-runner.js.map +1 -1
  187. package/dist/src/providers/conversation-generation.d.ts +10 -0
  188. package/dist/src/providers/conversation-generation.d.ts.map +1 -1
  189. package/dist/src/providers/conversation-generation.js +54 -13
  190. package/dist/src/providers/conversation-generation.js.map +1 -1
  191. package/dist/src/providers/openai-metadata-extraction.d.ts +48 -1
  192. package/dist/src/providers/openai-metadata-extraction.d.ts.map +1 -1
  193. package/dist/src/providers/openai-metadata-extraction.js +51 -2
  194. package/dist/src/providers/openai-metadata-extraction.js.map +1 -1
  195. package/dist/src/providers/types.d.ts +1 -1
  196. package/dist/src/providers/types.d.ts.map +1 -1
  197. package/dist/src/providers/types.js +4 -0
  198. package/dist/src/providers/types.js.map +1 -1
  199. package/dist/src/retrieval/compact.d.ts +81 -0
  200. package/dist/src/retrieval/compact.d.ts.map +1 -1
  201. package/dist/src/retrieval/compact.js +248 -8
  202. package/dist/src/retrieval/compact.js.map +1 -1
  203. package/dist/src/retrieval/context-pack.d.ts.map +1 -1
  204. package/dist/src/retrieval/context-pack.js +28 -14
  205. package/dist/src/retrieval/context-pack.js.map +1 -1
  206. package/dist/src/retrieval/evidence-key.d.ts +48 -0
  207. package/dist/src/retrieval/evidence-key.d.ts.map +1 -0
  208. package/dist/src/retrieval/evidence-key.js +131 -0
  209. package/dist/src/retrieval/evidence-key.js.map +1 -0
  210. package/dist/src/retrieval/group-by-parent.d.ts +38 -0
  211. package/dist/src/retrieval/group-by-parent.d.ts.map +1 -0
  212. package/dist/src/retrieval/group-by-parent.js +40 -0
  213. package/dist/src/retrieval/group-by-parent.js.map +1 -0
  214. package/dist/src/retrieval/lexical.d.ts.map +1 -1
  215. package/dist/src/retrieval/lexical.js +1 -3
  216. package/dist/src/retrieval/lexical.js.map +1 -1
  217. package/dist/src/retrieval/receipt.d.ts +57 -0
  218. package/dist/src/retrieval/receipt.d.ts.map +1 -0
  219. package/dist/src/retrieval/receipt.js +119 -0
  220. package/dist/src/retrieval/receipt.js.map +1 -0
  221. package/dist/src/retrieval/reranker.d.ts +12 -2
  222. package/dist/src/retrieval/reranker.d.ts.map +1 -1
  223. package/dist/src/retrieval/reranker.js +11 -4
  224. package/dist/src/retrieval/reranker.js.map +1 -1
  225. package/dist/src/retrieval/stitch-chunks.d.ts +73 -0
  226. package/dist/src/retrieval/stitch-chunks.d.ts.map +1 -0
  227. package/dist/src/retrieval/stitch-chunks.js +106 -0
  228. package/dist/src/retrieval/stitch-chunks.js.map +1 -0
  229. package/dist/src/server/app.d.ts.map +1 -1
  230. package/dist/src/server/app.js +17 -1
  231. package/dist/src/server/app.js.map +1 -1
  232. package/dist/src/server/routes/conversations-search.d.ts.map +1 -1
  233. package/dist/src/server/routes/conversations-search.js +12 -3
  234. package/dist/src/server/routes/conversations-search.js.map +1 -1
  235. package/dist/src/server/routes/friend-status.d.ts +44 -5
  236. package/dist/src/server/routes/friend-status.d.ts.map +1 -1
  237. package/dist/src/server/routes/friend-status.js +74 -6
  238. package/dist/src/server/routes/friend-status.js.map +1 -1
  239. package/dist/src/server/routes/mcp-usage.d.ts +9 -6
  240. package/dist/src/server/routes/mcp-usage.d.ts.map +1 -1
  241. package/dist/src/server/routes/mcp-usage.js.map +1 -1
  242. package/dist/src/server/routes/retrieval-feedback.d.ts +3 -0
  243. package/dist/src/server/routes/retrieval-feedback.d.ts.map +1 -0
  244. package/dist/src/server/routes/retrieval-feedback.js +290 -0
  245. package/dist/src/server/routes/retrieval-feedback.js.map +1 -0
  246. package/dist/src/server/routes/save.d.ts +3 -3
  247. package/dist/src/server/routes/save.d.ts.map +1 -1
  248. package/dist/src/server/routes/save.js +6 -2
  249. package/dist/src/server/routes/save.js.map +1 -1
  250. package/dist/src/server/routes/search.d.ts.map +1 -1
  251. package/dist/src/server/routes/search.js +19 -7
  252. package/dist/src/server/routes/search.js.map +1 -1
  253. package/dist/src/server/serving-marker.d.ts +85 -0
  254. package/dist/src/server/serving-marker.d.ts.map +1 -0
  255. package/dist/src/server/serving-marker.js +226 -0
  256. package/dist/src/server/serving-marker.js.map +1 -0
  257. package/dist/src/storage/chunk-backfill.d.ts +39 -0
  258. package/dist/src/storage/chunk-backfill.d.ts.map +1 -0
  259. package/dist/src/storage/chunk-backfill.js +295 -0
  260. package/dist/src/storage/chunk-backfill.js.map +1 -0
  261. package/dist/src/storage/filter.d.ts +42 -0
  262. package/dist/src/storage/filter.d.ts.map +1 -0
  263. package/dist/src/storage/filter.js +70 -0
  264. package/dist/src/storage/filter.js.map +1 -0
  265. package/dist/src/storage/rebuild.d.ts.map +1 -1
  266. package/dist/src/storage/rebuild.js +44 -27
  267. package/dist/src/storage/rebuild.js.map +1 -1
  268. package/dist/src/storage/tables.d.ts +41 -0
  269. package/dist/src/storage/tables.d.ts.map +1 -1
  270. package/dist/src/storage/tables.js +64 -1
  271. package/dist/src/storage/tables.js.map +1 -1
  272. package/operator/swiftbar/render-menu.py +57 -15
  273. package/package.json +5 -3
@@ -0,0 +1,22 @@
1
+ import { resolveProjectRootByWorkspaceId } from "./workspace-root.js";
2
+ export function enrichCursorProjectRoots(candidates, cursorDir) {
3
+ return candidates.map((c) => {
4
+ if (c.projectRootStatus !== "unresolved" || !c.workspaceId)
5
+ return c;
6
+ const r = resolveProjectRootByWorkspaceId(cursorDir, c.workspaceId);
7
+ if (r.status === "resolved") {
8
+ return {
9
+ ...c,
10
+ projectRoot: r.root,
11
+ projectRootSource: r.source,
12
+ projectRootStatus: r.status,
13
+ };
14
+ }
15
+ // A non-`resolved`, non-`unresolved` on-disk outcome (empty_window /
16
+ // multi_root / remote) is still more informative than bare `unresolved`.
17
+ if (r.status !== "unresolved")
18
+ return { ...c, projectRootStatus: r.status };
19
+ return c;
20
+ });
21
+ }
22
+ //# sourceMappingURL=enrich-roots.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"enrich-roots.js","sourceRoot":"","sources":["../../../../src/ingestion/cursor/enrich-roots.ts"],"names":[],"mappings":"AAcA,OAAO,EAAE,+BAA+B,EAAE,MAAM,qBAAqB,CAAC;AAEtE,MAAM,UAAU,wBAAwB,CACtC,UAAoC,EACpC,SAAiB;IAEjB,OAAO,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;QAC1B,IAAI,CAAC,CAAC,iBAAiB,KAAK,YAAY,IAAI,CAAC,CAAC,CAAC,WAAW;YAAE,OAAO,CAAC,CAAC;QACrE,MAAM,CAAC,GAAG,+BAA+B,CAAC,SAAS,EAAE,CAAC,CAAC,WAAW,CAAC,CAAC;QACpE,IAAI,CAAC,CAAC,MAAM,KAAK,UAAU,EAAE,CAAC;YAC5B,OAAO;gBACL,GAAG,CAAC;gBACJ,WAAW,EAAE,CAAC,CAAC,IAAI;gBACnB,iBAAiB,EAAE,CAAC,CAAC,MAAM;gBAC3B,iBAAiB,EAAE,CAAC,CAAC,MAAM;aAC5B,CAAC;QACJ,CAAC;QACD,qEAAqE;QACrE,yEAAyE;QACzE,IAAI,CAAC,CAAC,MAAM,KAAK,YAAY;YAAE,OAAO,EAAE,GAAG,CAAC,EAAE,iBAAiB,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC;QAC5E,OAAO,CAAC,CAAC;IACX,CAAC,CAAC,CAAC;AACL,CAAC"}
@@ -0,0 +1,32 @@
1
+ import type { CursorRawRows } from "./cursor-store.js";
2
+ /** Default macOS location of Cursor's per-user application support dir. */
3
+ export declare function defaultCursorDir(): string;
4
+ /** Global KV store path under a Cursor application-support dir. */
5
+ export declare function globalStatePath(cursorDir: string): string;
6
+ export interface CursorReadResult {
7
+ /** Absolute path of the db that was read. */
8
+ dbPath: string;
9
+ /** Raw rows for the pure discovery layer. */
10
+ rows: CursorRawRows;
11
+ }
12
+ export declare class CursorReaderError extends Error {
13
+ readonly code: "sqlite_missing" | "db_missing" | "query_failed";
14
+ constructor(message: string, code: "sqlite_missing" | "db_missing" | "query_failed");
15
+ }
16
+ export interface ReadOptions {
17
+ /** Cursor application-support dir; defaults to the standard macOS path. */
18
+ cursorDir?: string;
19
+ /**
20
+ * When true, ALSO fetch bubble bodies (`bubbleId:*`) — the actual message
21
+ * text. Off by default: discovery never needs private content. Only enable
22
+ * when the caller has acknowledged it will read prose (e.g. parse round-trip).
23
+ */
24
+ includeBubbles?: boolean;
25
+ }
26
+ /**
27
+ * Read the global Cursor state.vscdb read-only and return raw rows for the pure
28
+ * discovery layer. Throws {@link CursorReaderError} with a stable `code` for the
29
+ * three expected failure modes so callers can render friendly guidance.
30
+ */
31
+ export declare function readCursorGlobalState(options?: ReadOptions): CursorReadResult;
32
+ //# sourceMappingURL=vscdb-reader.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"vscdb-reader.d.ts","sourceRoot":"","sources":["../../../../src/ingestion/cursor/vscdb-reader.ts"],"names":[],"mappings":"AA6BA,OAAO,KAAK,EAAe,aAAa,EAAE,MAAM,mBAAmB,CAAC;AAEpE,2EAA2E;AAC3E,wBAAgB,gBAAgB,IAAI,MAAM,CAOzC;AAED,mEAAmE;AACnE,wBAAgB,eAAe,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,CAEzD;AAED,MAAM,WAAW,gBAAgB;IAC/B,6CAA6C;IAC7C,MAAM,EAAE,MAAM,CAAC;IACf,6CAA6C;IAC7C,IAAI,EAAE,aAAa,CAAC;CACrB;AAED,qBAAa,iBAAkB,SAAQ,KAAK;IAGxC,QAAQ,CAAC,IAAI,EACT,gBAAgB,GAChB,YAAY,GACZ,cAAc;gBAJlB,OAAO,EAAE,MAAM,EACN,IAAI,EACT,gBAAgB,GAChB,YAAY,GACZ,cAAc;CAKrB;AAmDD,MAAM,WAAW,WAAW;IAC1B,2EAA2E;IAC3E,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;;OAIG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;CAC1B;AAED;;;;GAIG;AACH,wBAAgB,qBAAqB,CAAC,OAAO,GAAE,WAAgB,GAAG,gBAAgB,CA+CjF"}
@@ -0,0 +1,113 @@
1
+ /**
2
+ * Read-only reader for Cursor's `state.vscdb` SQLite stores.
3
+ *
4
+ * DISCOVERY SLICE ONLY — this is the single I/O boundary for Cursor capture.
5
+ * It is deliberately read-only by THREE independent guarantees:
6
+ * 1. SQLite is opened with the `immutable=1` URI flag, which forbids any
7
+ * write and never creates/touches the -wal/-shm sidecar files.
8
+ * 2. Only SELECT statements are ever issued.
9
+ * 3. We shell out to the system `sqlite3` binary rather than linking a
10
+ * writable driver. (Cursor itself holds the db open under WAL; immutable
11
+ * lets us read a consistent snapshot without contending for the lock.)
12
+ *
13
+ * Why shell out instead of a native dep: Rift's Node engine floor still
14
+ * includes Node 20 (no `node:sqlite`), and we don't want to pull a native
15
+ * addon (better-sqlite3) into the tree for a read-only probe. macOS always
16
+ * ships `/usr/bin/sqlite3`, and Cursor is macOS-first. The future capture slice
17
+ * can revisit this (node:sqlite once the floor rises). See the discovery doc.
18
+ *
19
+ * PRIVACY: by default we fetch only the session index + per-session metadata
20
+ * (`composer.composerHeaders`, `composerData:*`). We never SELECT `cursorAuth/*`
21
+ * (access/refresh tokens) or `secret://*` (MCP OAuth secrets), and we do not
22
+ * fetch bubble bodies (`bubbleId:*`, the actual message text) unless the caller
23
+ * explicitly opts in.
24
+ */
25
+ import { execFileSync } from "node:child_process";
26
+ import fs from "node:fs";
27
+ import os from "node:os";
28
+ import path from "node:path";
29
+ /** Default macOS location of Cursor's per-user application support dir. */
30
+ export function defaultCursorDir() {
31
+ return path.join(os.homedir(), "Library", "Application Support", "Cursor");
32
+ }
33
+ /** Global KV store path under a Cursor application-support dir. */
34
+ export function globalStatePath(cursorDir) {
35
+ return path.join(cursorDir, "User", "globalStorage", "state.vscdb");
36
+ }
37
+ export class CursorReaderError extends Error {
38
+ code;
39
+ constructor(message, code) {
40
+ super(message);
41
+ this.code = code;
42
+ this.name = "CursorReaderError";
43
+ }
44
+ }
45
+ function sqlite3Available() {
46
+ try {
47
+ execFileSync("sqlite3", ["--version"], { stdio: ["ignore", "ignore", "ignore"] });
48
+ return true;
49
+ }
50
+ catch {
51
+ return false;
52
+ }
53
+ }
54
+ /**
55
+ * Run a single SELECT against an immutable (read-only) view of the db and
56
+ * return rows as objects. Uses `.mode json` so values come back as strings.
57
+ */
58
+ function selectJson(dbPath, sql) {
59
+ const uri = `file:${dbPath}?immutable=1`;
60
+ let stdout;
61
+ try {
62
+ stdout = execFileSync("sqlite3", ["-cmd", ".mode json", uri, sql], {
63
+ encoding: "utf-8",
64
+ maxBuffer: 256 * 1024 * 1024, // composer blobs can be large
65
+ });
66
+ }
67
+ catch (err) {
68
+ throw new CursorReaderError(`sqlite3 query failed: ${err instanceof Error ? err.message : String(err)}`, "query_failed");
69
+ }
70
+ const trimmed = stdout.trim();
71
+ if (!trimmed)
72
+ return [];
73
+ try {
74
+ return JSON.parse(trimmed);
75
+ }
76
+ catch (err) {
77
+ throw new CursorReaderError(`failed to parse sqlite3 JSON output: ${err instanceof Error ? err.message : String(err)}`, "query_failed");
78
+ }
79
+ }
80
+ function toKvRows(records) {
81
+ const rows = [];
82
+ for (const rec of records) {
83
+ if (typeof rec.key === "string" && typeof rec.value === "string") {
84
+ rows.push({ key: rec.key, value: rec.value });
85
+ }
86
+ }
87
+ return rows;
88
+ }
89
+ /**
90
+ * Read the global Cursor state.vscdb read-only and return raw rows for the pure
91
+ * discovery layer. Throws {@link CursorReaderError} with a stable `code` for the
92
+ * three expected failure modes so callers can render friendly guidance.
93
+ */
94
+ export function readCursorGlobalState(options = {}) {
95
+ const cursorDir = options.cursorDir ?? defaultCursorDir();
96
+ const dbPath = globalStatePath(cursorDir);
97
+ if (!fs.existsSync(dbPath)) {
98
+ throw new CursorReaderError(`Cursor state.vscdb not found at ${dbPath}`, "db_missing");
99
+ }
100
+ if (!sqlite3Available()) {
101
+ throw new CursorReaderError("The `sqlite3` binary is required to read Cursor's store but was not found on PATH.", "sqlite_missing");
102
+ }
103
+ // Session index (ItemTable). Single, small row.
104
+ const itemTable = toKvRows(selectJson(dbPath, "SELECT key, value FROM ItemTable WHERE key = 'composer.composerHeaders'"));
105
+ // Per-session metadata (cursorDiskKV). Small; never tokens/secrets.
106
+ const composerData = toKvRows(selectJson(dbPath, "SELECT key, value FROM cursorDiskKV WHERE key LIKE 'composerData:%'"));
107
+ const cursorDiskKV = [...composerData];
108
+ if (options.includeBubbles) {
109
+ cursorDiskKV.push(...toKvRows(selectJson(dbPath, "SELECT key, value FROM cursorDiskKV WHERE key LIKE 'bubbleId:%'")));
110
+ }
111
+ return { dbPath, rows: { itemTable, cursorDiskKV } };
112
+ }
113
+ //# sourceMappingURL=vscdb-reader.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"vscdb-reader.js","sourceRoot":"","sources":["../../../../src/ingestion/cursor/vscdb-reader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,IAAI,MAAM,WAAW,CAAC;AAI7B,2EAA2E;AAC3E,MAAM,UAAU,gBAAgB;IAC9B,OAAO,IAAI,CAAC,IAAI,CACd,EAAE,CAAC,OAAO,EAAE,EACZ,SAAS,EACT,qBAAqB,EACrB,QAAQ,CACT,CAAC;AACJ,CAAC;AAED,mEAAmE;AACnE,MAAM,UAAU,eAAe,CAAC,SAAiB;IAC/C,OAAO,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,MAAM,EAAE,eAAe,EAAE,aAAa,CAAC,CAAC;AACtE,CAAC;AASD,MAAM,OAAO,iBAAkB,SAAQ,KAAK;IAG/B;IAFX,YACE,OAAe,EACN,IAGS;QAElB,KAAK,CAAC,OAAO,CAAC,CAAC;QALN,SAAI,GAAJ,IAAI,CAGK;QAGlB,IAAI,CAAC,IAAI,GAAG,mBAAmB,CAAC;IAClC,CAAC;CACF;AAED,SAAS,gBAAgB;IACvB,IAAI,CAAC;QACH,YAAY,CAAC,SAAS,EAAE,CAAC,WAAW,CAAC,EAAE,EAAE,KAAK,EAAE,CAAC,QAAQ,EAAE,QAAQ,EAAE,QAAQ,CAAC,EAAE,CAAC,CAAC;QAClF,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,SAAS,UAAU,CAAC,MAAc,EAAE,GAAW;IAC7C,MAAM,GAAG,GAAG,QAAQ,MAAM,cAAc,CAAC;IACzC,IAAI,MAAc,CAAC;IACnB,IAAI,CAAC;QACH,MAAM,GAAG,YAAY,CAAC,SAAS,EAAE,CAAC,MAAM,EAAE,YAAY,EAAE,GAAG,EAAE,GAAG,CAAC,EAAE;YACjE,QAAQ,EAAE,OAAO;YACjB,SAAS,EAAE,GAAG,GAAG,IAAI,GAAG,IAAI,EAAE,8BAA8B;SAC7D,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,IAAI,iBAAiB,CACzB,yBAAyB,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,EAC3E,cAAc,CACf,CAAC;IACJ,CAAC;IACD,MAAM,OAAO,GAAG,MAAM,CAAC,IAAI,EAAE,CAAC;IAC9B,IAAI,CAAC,OAAO;QAAE,OAAO,EAAE,CAAC;IACxB,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,KAAK,CAAC,OAAO,CAAkC,CAAC;IAC9D,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,IAAI,iBAAiB,CACzB,wCAAwC,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,EAC1F,cAAc,CACf,CAAC;IACJ,CAAC;AACH,CAAC;AAED,SAAS,QAAQ,CAAC,OAAsC;IACtD,MAAM,IAAI,GAAkB,EAAE,CAAC;IAC/B,KAAK,MAAM,GAAG,IAAI,OAAO,EAAE,CAAC;QAC1B,IAAI,OAAO,GAAG,CAAC,GAAG,KAAK,QAAQ,IAAI,OAAO,GAAG,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;YACjE,IAAI,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,GAAG,CAAC,GAAG,EAAE,KAAK,EAAE,GAAG,CAAC,KAAK,EAAE,CAAC,CAAC;QAChD,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAaD;;;;GAIG;AACH,MAAM,UAAU,qBAAqB,CAAC,UAAuB,EAAE;IAC7D,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,gBAAgB,EAAE,CAAC;IAC1D,MAAM,MAAM,GAAG,eAAe,CAAC,SAAS,CAAC,CAAC;IAE1C,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC;QAC3B,MAAM,IAAI,iBAAiB,CACzB,mCAAmC,MAAM,EAAE,EAC3C,YAAY,CACb,CAAC;IACJ,CAAC;IACD,IAAI,CAAC,gBAAgB,EAAE,EAAE,CAAC;QACxB,MAAM,IAAI,iBAAiB,CACzB,oFAAoF,EACpF,gBAAgB,CACjB,CAAC;IACJ,CAAC;IAED,gDAAgD;IAChD,MAAM,SAAS,GAAG,QAAQ,CACxB,UAAU,CACR,MAAM,EACN,yEAAyE,CAC1E,CACF,CAAC;IAEF,oEAAoE;IACpE,MAAM,YAAY,GAAG,QAAQ,CAC3B,UAAU,CACR,MAAM,EACN,qEAAqE,CACtE,CACF,CAAC;IAEF,MAAM,YAAY,GAAG,CAAC,GAAG,YAAY,CAAC,CAAC;IAEvC,IAAI,OAAO,CAAC,cAAc,EAAE,CAAC;QAC3B,YAAY,CAAC,IAAI,CACf,GAAG,QAAQ,CACT,UAAU,CACR,MAAM,EACN,iEAAiE,CAClE,CACF,CACF,CAAC;IACJ,CAAC;IAED,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,EAAE,SAAS,EAAE,YAAY,EAAE,EAAE,CAAC;AACvD,CAAC"}
@@ -0,0 +1,96 @@
1
+ /** Sentinel `workspaceIdentifier.id` Cursor uses when no folder is open. */
2
+ export declare const EMPTY_WINDOW_ID = "empty-window";
3
+ /** The `uri` object Cursor embeds in a folder-bound composer header. */
4
+ export interface CursorWorkspaceUri {
5
+ scheme?: string;
6
+ fsPath?: string;
7
+ path?: string;
8
+ external?: string;
9
+ }
10
+ /** A composer header's `workspaceIdentifier` (id + optional inline uri). */
11
+ export interface CursorWorkspaceIdentifier {
12
+ id?: string;
13
+ uri?: CursorWorkspaceUri | null;
14
+ }
15
+ /**
16
+ * Outcome of resolving a workspace identifier to a project root. Every
17
+ * non-`resolved` status is a deliberate, named outcome — the capture slice must
18
+ * branch on these rather than ever fabricate a root.
19
+ */
20
+ export type ProjectRootResolution = {
21
+ status: "resolved";
22
+ /** Absolute filesystem path of the project root. */
23
+ root: string;
24
+ /** Which link produced the root. */
25
+ source: "header_uri" | "workspace_json";
26
+ workspaceId: string | null;
27
+ }
28
+ /** No folder open (`empty-window`). Expected; not attributable. */
29
+ | {
30
+ status: "empty_window";
31
+ workspaceId: "empty-window";
32
+ }
33
+ /** Multi-root `.code-workspace`; root ambiguous. */
34
+ | {
35
+ status: "multi_root";
36
+ workspaceId: string;
37
+ configPath: string | null;
38
+ }
39
+ /** Non-`file` scheme (remote/ssh/wsl/devcontainer); not a local path. */
40
+ | {
41
+ status: "remote";
42
+ workspaceId: string | null;
43
+ scheme: string;
44
+ }
45
+ /** Id present but no inline uri and no readable `workspace.json`. */
46
+ | {
47
+ status: "unresolved";
48
+ workspaceId: string | null;
49
+ };
50
+ /**
51
+ * Parse a `workspace.json` file's text. Returns the folder root (single-folder
52
+ * window), a multi-root marker (`.code-workspace`), or unknown. Pure.
53
+ */
54
+ export declare function parseWorkspaceJson(text: string): {
55
+ kind: "folder";
56
+ scheme: string | null;
57
+ root: string | null;
58
+ } | {
59
+ kind: "multi_root";
60
+ configPath: string | null;
61
+ } | {
62
+ kind: "unknown";
63
+ };
64
+ /** Injected dependencies for the pure resolver. */
65
+ export interface ResolveOptions {
66
+ /**
67
+ * Returns the raw text of `workspaceStorage/<id>/workspace.json`, or null if
68
+ * absent. Injected so the resolver stays pure and fully fixture-testable.
69
+ * Used only as a fallback when the header carries no inline uri.
70
+ */
71
+ readWorkspaceJson?: (workspaceId: string) => string | null;
72
+ }
73
+ /**
74
+ * Resolve a composer header's `workspaceIdentifier` to a project root. Pure:
75
+ * the only I/O is the injected {@link ResolveOptions.readWorkspaceJson}. Tries
76
+ * the inline header uri first (current builds), then the on-disk
77
+ * `workspace.json` join (legacy builds / uri stripped). Never guesses — every
78
+ * non-resolvable case maps to an explicit status.
79
+ */
80
+ export declare function resolveProjectRoot(identifier: CursorWorkspaceIdentifier | null | undefined, options?: ResolveOptions): ProjectRootResolution;
81
+ /** Path to a workspace's `workspace.json` under a Cursor app-support dir. */
82
+ export declare function workspaceJsonPath(cursorDir: string, workspaceId: string): string;
83
+ /** Read `workspace.json` text read-only; null if it does not exist / unreadable. */
84
+ export declare function readWorkspaceJsonFromDisk(cursorDir: string, workspaceId: string): string | null;
85
+ /**
86
+ * Filesystem-backed resolution: resolve a header's identifier against a real
87
+ * Cursor app-support dir, reading `workspace.json` read-only when needed.
88
+ */
89
+ export declare function resolveCursorProjectRoot(identifier: CursorWorkspaceIdentifier | null | undefined, cursorDir: string): ProjectRootResolution;
90
+ /**
91
+ * Resolve a project root from a workspace id alone (no inline uri available).
92
+ * Used by the probe to upgrade `unresolved` discovery candidates on legacy
93
+ * builds via the on-disk `workspace.json` join.
94
+ */
95
+ export declare function resolveProjectRootByWorkspaceId(cursorDir: string, workspaceId: string): ProjectRootResolution;
96
+ //# sourceMappingURL=workspace-root.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"workspace-root.d.ts","sourceRoot":"","sources":["../../../../src/ingestion/cursor/workspace-root.ts"],"names":[],"mappings":"AAsDA,4EAA4E;AAC5E,eAAO,MAAM,eAAe,iBAAiB,CAAC;AAE9C,wEAAwE;AACxE,MAAM,WAAW,kBAAkB;IACjC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,4EAA4E;AAC5E,MAAM,WAAW,yBAAyB;IACxC,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,GAAG,CAAC,EAAE,kBAAkB,GAAG,IAAI,CAAC;CACjC;AAED;;;;GAIG;AACH,MAAM,MAAM,qBAAqB,GAC7B;IACE,MAAM,EAAE,UAAU,CAAC;IACnB,oDAAoD;IACpD,IAAI,EAAE,MAAM,CAAC;IACb,oCAAoC;IACpC,MAAM,EAAE,YAAY,GAAG,gBAAgB,CAAC;IACxC,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;CAC5B;AACH,mEAAmE;GACjE;IAAE,MAAM,EAAE,cAAc,CAAC;IAAC,WAAW,EAAE,cAAc,CAAA;CAAE;AACzD,oDAAoD;GAClD;IAAE,MAAM,EAAE,YAAY,CAAC;IAAC,WAAW,EAAE,MAAM,CAAC;IAAC,UAAU,EAAE,MAAM,GAAG,IAAI,CAAA;CAAE;AAC1E,yEAAyE;GACvE;IAAE,MAAM,EAAE,QAAQ,CAAC;IAAC,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE;AAClE,qEAAqE;GACnE;IAAE,MAAM,EAAE,YAAY,CAAC;IAAC,WAAW,EAAE,MAAM,GAAG,IAAI,CAAA;CAAE,CAAC;AAoCzD;;;GAGG;AACH,wBAAgB,kBAAkB,CAChC,IAAI,EAAE,MAAM,GAEV;IAAE,IAAI,EAAE,QAAQ,CAAC;IAAC,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAA;CAAE,GAC9D;IAAE,IAAI,EAAE,YAAY,CAAC;IAAC,UAAU,EAAE,MAAM,GAAG,IAAI,CAAA;CAAE,GACjD;IAAE,IAAI,EAAE,SAAS,CAAA;CAAE,CAkBtB;AAED,mDAAmD;AACnD,MAAM,WAAW,cAAc;IAC7B;;;;OAIG;IACH,iBAAiB,CAAC,EAAE,CAAC,WAAW,EAAE,MAAM,KAAK,MAAM,GAAG,IAAI,CAAC;CAC5D;AAED;;;;;;GAMG;AACH,wBAAgB,kBAAkB,CAChC,UAAU,EAAE,yBAAyB,GAAG,IAAI,GAAG,SAAS,EACxD,OAAO,GAAE,cAAmB,GAC3B,qBAAqB,CAqCvB;AAED,6EAA6E;AAC7E,wBAAgB,iBAAiB,CAAC,SAAS,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,GAAG,MAAM,CAEhF;AAED,oFAAoF;AACpF,wBAAgB,yBAAyB,CACvC,SAAS,EAAE,MAAM,EACjB,WAAW,EAAE,MAAM,GAClB,MAAM,GAAG,IAAI,CAMf;AAED;;;GAGG;AACH,wBAAgB,wBAAwB,CACtC,UAAU,EAAE,yBAAyB,GAAG,IAAI,GAAG,SAAS,EACxD,SAAS,EAAE,MAAM,GAChB,qBAAqB,CAIvB;AAED;;;;GAIG;AACH,wBAAgB,+BAA+B,CAC7C,SAAS,EAAE,MAAM,EACjB,WAAW,EAAE,MAAM,GAClB,qBAAqB,CAEvB"}
@@ -0,0 +1,187 @@
1
+ /**
2
+ * Resolve a native Cursor session's `workspaceIdentifier` to a filesystem
3
+ * project root — the missing half of Cursor project-root attribution.
4
+ *
5
+ * DISCOVERY SLICE ONLY. Read-only, no LanceDB writes, no daemon, no capture.
6
+ * The pure resolver ({@link resolveProjectRoot}) takes an injected
7
+ * `readWorkspaceJson` so it is exhaustively unit-testable from synthetic
8
+ * metadata with no filesystem and no real Cursor install. The fs-backed wrapper
9
+ * ({@link resolveCursorProjectRoot}) only reads `workspace.json` (a tiny
10
+ * `{ "folder": "file://…" }` file) — never the workspace's `state.vscdb`,
11
+ * never tokens, never message content.
12
+ *
13
+ * ## The join contract (empirically verified, macOS Cursor "glass", 2026-06)
14
+ *
15
+ * On opening a folder window, Cursor writes the composer header's
16
+ * `workspaceIdentifier` with TWO independent links to the project root:
17
+ *
18
+ * "workspaceIdentifier": {
19
+ * "id": "<workspaceStorageHash>", // e.g. "11ed9332b5bd79fd69ad93a7636abd28"
20
+ * "uri": { "scheme": "file",
21
+ * "fsPath": "/Users/me/projects/foo",
22
+ * "path": "/Users/me/projects/foo",
23
+ * "external": "file:///Users/me/projects/foo" }
24
+ * }
25
+ *
26
+ * 1. INLINE (primary, current builds): `uri.fsPath` IS the project root.
27
+ * No disk lookup needed.
28
+ * 2. ON-DISK (authoritative fallback): `id` equals the directory name under
29
+ * `~/Library/Application Support/Cursor/User/workspaceStorage/<id>/`,
30
+ * whose `workspace.json` holds `{ "folder": "file://<root>" }`. Verified:
31
+ * the same hash appears as both `workspaceIdentifier.id` and the storage
32
+ * dir name, and its `workspace.json.folder` matches `uri.fsPath`.
33
+ *
34
+ * The storage hash is NOT a recomputable md5 of the folder URI (checked: no
35
+ * simple encoding reproduces it) — so resolution MUST read `workspace.json` (or
36
+ * trust the inline uri), never re-derive the hash.
37
+ *
38
+ * ## Failure modes (all first-class — never guessed)
39
+ * - `id === "empty-window"` and no `uri` → no folder open. NOT attributable.
40
+ * This is the common case on a machine with only empty windows.
41
+ * - `workspace.json` has `"workspace": "…code-workspace"` (multi-root) instead
42
+ * of `"folder"` → project root is ambiguous. Do not pick one.
43
+ * - `uri.scheme` / `folder` scheme is not `file` (ssh-remote, wsl, devcontainer)
44
+ * → not a local path.
45
+ * - `id` present but `workspace.json` missing/purged and no inline `uri`
46
+ * → unresolved.
47
+ *
48
+ * The capture slice should scope to a project root ONLY on `resolved`; for every
49
+ * other status, capture WITHOUT project scoping (or behind a user-confirmed
50
+ * fallback). See docs/cursor-capture-discovery.md.
51
+ */
52
+ import fs from "node:fs";
53
+ import path from "node:path";
54
+ /** Sentinel `workspaceIdentifier.id` Cursor uses when no folder is open. */
55
+ export const EMPTY_WINDOW_ID = "empty-window";
56
+ /** Parse a URI string into `{ scheme, path }`; `path` is null for non-file. */
57
+ function parseUriString(uri) {
58
+ let parsed;
59
+ try {
60
+ parsed = new URL(uri);
61
+ }
62
+ catch {
63
+ return { scheme: null, path: null };
64
+ }
65
+ const scheme = parsed.protocol.replace(/:$/, "");
66
+ if (scheme !== "file")
67
+ return { scheme, path: null };
68
+ // file:///abs/path -> decode percent-encoding (spaces, accents in client dirs)
69
+ return { scheme, path: decodeURIComponent(parsed.pathname) };
70
+ }
71
+ /** Derive `{ scheme, root }` from an inline header `uri` object. */
72
+ function rootFromInlineUri(uri) {
73
+ if (!uri || typeof uri !== "object")
74
+ return { scheme: null, root: null };
75
+ const scheme = typeof uri.scheme === "string" ? uri.scheme : null;
76
+ if (scheme && scheme !== "file")
77
+ return { scheme, root: null };
78
+ // fsPath / path are already decoded absolute paths in Cursor's serialization.
79
+ const direct = (typeof uri.fsPath === "string" && uri.fsPath) ||
80
+ (typeof uri.path === "string" && uri.path) ||
81
+ null;
82
+ if (direct)
83
+ return { scheme: scheme ?? "file", root: direct };
84
+ if (typeof uri.external === "string") {
85
+ const ext = parseUriString(uri.external);
86
+ return { scheme: ext.scheme ?? scheme, root: ext.path };
87
+ }
88
+ return { scheme, root: null };
89
+ }
90
+ /**
91
+ * Parse a `workspace.json` file's text. Returns the folder root (single-folder
92
+ * window), a multi-root marker (`.code-workspace`), or unknown. Pure.
93
+ */
94
+ export function parseWorkspaceJson(text) {
95
+ let obj;
96
+ try {
97
+ obj = JSON.parse(text);
98
+ }
99
+ catch {
100
+ return { kind: "unknown" };
101
+ }
102
+ if (!obj || typeof obj !== "object")
103
+ return { kind: "unknown" };
104
+ const rec = obj;
105
+ if (typeof rec.folder === "string") {
106
+ const { scheme, path: p } = parseUriString(rec.folder);
107
+ return { kind: "folder", scheme, root: p };
108
+ }
109
+ if (typeof rec.workspace === "string") {
110
+ const { path: p } = parseUriString(rec.workspace);
111
+ return { kind: "multi_root", configPath: p };
112
+ }
113
+ return { kind: "unknown" };
114
+ }
115
+ /**
116
+ * Resolve a composer header's `workspaceIdentifier` to a project root. Pure:
117
+ * the only I/O is the injected {@link ResolveOptions.readWorkspaceJson}. Tries
118
+ * the inline header uri first (current builds), then the on-disk
119
+ * `workspace.json` join (legacy builds / uri stripped). Never guesses — every
120
+ * non-resolvable case maps to an explicit status.
121
+ */
122
+ export function resolveProjectRoot(identifier, options = {}) {
123
+ const id = typeof identifier?.id === "string" ? identifier.id : null;
124
+ // 1. Sentinel: no folder open. Common, expected, not attributable.
125
+ if (id === EMPTY_WINDOW_ID) {
126
+ return { status: "empty_window", workspaceId: EMPTY_WINDOW_ID };
127
+ }
128
+ // 2. Inline uri (Cursor embeds the folder directly on the header).
129
+ const inline = rootFromInlineUri(identifier?.uri);
130
+ if (inline.scheme && inline.scheme !== "file") {
131
+ return { status: "remote", workspaceId: id, scheme: inline.scheme };
132
+ }
133
+ if (inline.root) {
134
+ return { status: "resolved", root: inline.root, source: "header_uri", workspaceId: id };
135
+ }
136
+ // 3. On-disk fallback: join id -> workspaceStorage/<id>/workspace.json.
137
+ if (id && options.readWorkspaceJson) {
138
+ const text = options.readWorkspaceJson(id);
139
+ if (text) {
140
+ const parsed = parseWorkspaceJson(text);
141
+ if (parsed.kind === "folder") {
142
+ if (parsed.scheme && parsed.scheme !== "file") {
143
+ return { status: "remote", workspaceId: id, scheme: parsed.scheme };
144
+ }
145
+ if (parsed.root) {
146
+ return { status: "resolved", root: parsed.root, source: "workspace_json", workspaceId: id };
147
+ }
148
+ }
149
+ else if (parsed.kind === "multi_root") {
150
+ return { status: "multi_root", workspaceId: id, configPath: parsed.configPath };
151
+ }
152
+ }
153
+ }
154
+ // 4. Nothing reliable — do not guess.
155
+ return { status: "unresolved", workspaceId: id };
156
+ }
157
+ /** Path to a workspace's `workspace.json` under a Cursor app-support dir. */
158
+ export function workspaceJsonPath(cursorDir, workspaceId) {
159
+ return path.join(cursorDir, "User", "workspaceStorage", workspaceId, "workspace.json");
160
+ }
161
+ /** Read `workspace.json` text read-only; null if it does not exist / unreadable. */
162
+ export function readWorkspaceJsonFromDisk(cursorDir, workspaceId) {
163
+ try {
164
+ return fs.readFileSync(workspaceJsonPath(cursorDir, workspaceId), "utf-8");
165
+ }
166
+ catch {
167
+ return null;
168
+ }
169
+ }
170
+ /**
171
+ * Filesystem-backed resolution: resolve a header's identifier against a real
172
+ * Cursor app-support dir, reading `workspace.json` read-only when needed.
173
+ */
174
+ export function resolveCursorProjectRoot(identifier, cursorDir) {
175
+ return resolveProjectRoot(identifier, {
176
+ readWorkspaceJson: (workspaceId) => readWorkspaceJsonFromDisk(cursorDir, workspaceId),
177
+ });
178
+ }
179
+ /**
180
+ * Resolve a project root from a workspace id alone (no inline uri available).
181
+ * Used by the probe to upgrade `unresolved` discovery candidates on legacy
182
+ * builds via the on-disk `workspace.json` join.
183
+ */
184
+ export function resolveProjectRootByWorkspaceId(cursorDir, workspaceId) {
185
+ return resolveCursorProjectRoot({ id: workspaceId }, cursorDir);
186
+ }
187
+ //# sourceMappingURL=workspace-root.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"workspace-root.js","sourceRoot":"","sources":["../../../../src/ingestion/cursor/workspace-root.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkDG;AACH,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,IAAI,MAAM,WAAW,CAAC;AAE7B,4EAA4E;AAC5E,MAAM,CAAC,MAAM,eAAe,GAAG,cAAc,CAAC;AAuC9C,+EAA+E;AAC/E,SAAS,cAAc,CAAC,GAAW;IACjC,IAAI,MAAW,CAAC;IAChB,IAAI,CAAC;QACH,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;IACxB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IACtC,CAAC;IACD,MAAM,MAAM,GAAG,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;IACjD,IAAI,MAAM,KAAK,MAAM;QAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IACrD,+EAA+E;IAC/E,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,kBAAkB,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;AAC/D,CAAC;AAED,oEAAoE;AACpE,SAAS,iBAAiB,CACxB,GAA0C;IAE1C,IAAI,CAAC,GAAG,IAAI,OAAO,GAAG,KAAK,QAAQ;QAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IACzE,MAAM,MAAM,GAAG,OAAO,GAAG,CAAC,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC;IAClE,IAAI,MAAM,IAAI,MAAM,KAAK,MAAM;QAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IAC/D,8EAA8E;IAC9E,MAAM,MAAM,GACV,CAAC,OAAO,GAAG,CAAC,MAAM,KAAK,QAAQ,IAAI,GAAG,CAAC,MAAM,CAAC;QAC9C,CAAC,OAAO,GAAG,CAAC,IAAI,KAAK,QAAQ,IAAI,GAAG,CAAC,IAAI,CAAC;QAC1C,IAAI,CAAC;IACP,IAAI,MAAM;QAAE,OAAO,EAAE,MAAM,EAAE,MAAM,IAAI,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC;IAC9D,IAAI,OAAO,GAAG,CAAC,QAAQ,KAAK,QAAQ,EAAE,CAAC;QACrC,MAAM,GAAG,GAAG,cAAc,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACzC,OAAO,EAAE,MAAM,EAAE,GAAG,CAAC,MAAM,IAAI,MAAM,EAAE,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,CAAC;IAC1D,CAAC;IACD,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;AAChC,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,kBAAkB,CAChC,IAAY;IAKZ,IAAI,GAAY,CAAC;IACjB,IAAI,CAAC;QACH,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACzB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;IAC7B,CAAC;IACD,IAAI,CAAC,GAAG,IAAI,OAAO,GAAG,KAAK,QAAQ;QAAE,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;IAChE,MAAM,GAAG,GAAG,GAA8B,CAAC;IAC3C,IAAI,OAAO,GAAG,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;QACnC,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,EAAE,GAAG,cAAc,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QACvD,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,EAAE,CAAC;IAC7C,CAAC;IACD,IAAI,OAAO,GAAG,CAAC,SAAS,KAAK,QAAQ,EAAE,CAAC;QACtC,MAAM,EAAE,IAAI,EAAE,CAAC,EAAE,GAAG,cAAc,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QAClD,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,UAAU,EAAE,CAAC,EAAE,CAAC;IAC/C,CAAC;IACD,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;AAC7B,CAAC;AAYD;;;;;;GAMG;AACH,MAAM,UAAU,kBAAkB,CAChC,UAAwD,EACxD,UAA0B,EAAE;IAE5B,MAAM,EAAE,GAAG,OAAO,UAAU,EAAE,EAAE,KAAK,QAAQ,CAAC,CAAC,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;IAErE,mEAAmE;IACnE,IAAI,EAAE,KAAK,eAAe,EAAE,CAAC;QAC3B,OAAO,EAAE,MAAM,EAAE,cAAc,EAAE,WAAW,EAAE,eAAe,EAAE,CAAC;IAClE,CAAC;IAED,mEAAmE;IACnE,MAAM,MAAM,GAAG,iBAAiB,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC;IAClD,IAAI,MAAM,CAAC,MAAM,IAAI,MAAM,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;QAC9C,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,WAAW,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,CAAC;IACtE,CAAC;IACD,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;QAChB,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,IAAI,EAAE,MAAM,CAAC,IAAI,EAAE,MAAM,EAAE,YAAY,EAAE,WAAW,EAAE,EAAE,EAAE,CAAC;IAC1F,CAAC;IAED,wEAAwE;IACxE,IAAI,EAAE,IAAI,OAAO,CAAC,iBAAiB,EAAE,CAAC;QACpC,MAAM,IAAI,GAAG,OAAO,CAAC,iBAAiB,CAAC,EAAE,CAAC,CAAC;QAC3C,IAAI,IAAI,EAAE,CAAC;YACT,MAAM,MAAM,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAC;YACxC,IAAI,MAAM,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;gBAC7B,IAAI,MAAM,CAAC,MAAM,IAAI,MAAM,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;oBAC9C,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,WAAW,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,CAAC;gBACtE,CAAC;gBACD,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;oBAChB,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,IAAI,EAAE,MAAM,CAAC,IAAI,EAAE,MAAM,EAAE,gBAAgB,EAAE,WAAW,EAAE,EAAE,EAAE,CAAC;gBAC9F,CAAC;YACH,CAAC;iBAAM,IAAI,MAAM,CAAC,IAAI,KAAK,YAAY,EAAE,CAAC;gBACxC,OAAO,EAAE,MAAM,EAAE,YAAY,EAAE,WAAW,EAAE,EAAE,EAAE,UAAU,EAAE,MAAM,CAAC,UAAU,EAAE,CAAC;YAClF,CAAC;QACH,CAAC;IACH,CAAC;IAED,sCAAsC;IACtC,OAAO,EAAE,MAAM,EAAE,YAAY,EAAE,WAAW,EAAE,EAAE,EAAE,CAAC;AACnD,CAAC;AAED,6EAA6E;AAC7E,MAAM,UAAU,iBAAiB,CAAC,SAAiB,EAAE,WAAmB;IACtE,OAAO,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,MAAM,EAAE,kBAAkB,EAAE,WAAW,EAAE,gBAAgB,CAAC,CAAC;AACzF,CAAC;AAED,oFAAoF;AACpF,MAAM,UAAU,yBAAyB,CACvC,SAAiB,EACjB,WAAmB;IAEnB,IAAI,CAAC;QACH,OAAO,EAAE,CAAC,YAAY,CAAC,iBAAiB,CAAC,SAAS,EAAE,WAAW,CAAC,EAAE,OAAO,CAAC,CAAC;IAC7E,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,wBAAwB,CACtC,UAAwD,EACxD,SAAiB;IAEjB,OAAO,kBAAkB,CAAC,UAAU,EAAE;QACpC,iBAAiB,EAAE,CAAC,WAAW,EAAE,EAAE,CAAC,yBAAyB,CAAC,SAAS,EAAE,WAAW,CAAC;KACtF,CAAC,CAAC;AACL,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,+BAA+B,CAC7C,SAAiB,EACjB,WAAmB;IAEnB,OAAO,wBAAwB,CAAC,EAAE,EAAE,EAAE,WAAW,EAAE,EAAE,SAAS,CAAC,CAAC;AAClE,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"indexer.d.ts","sourceRoot":"","sources":["../../../src/ingestion/indexer.ts"],"names":[],"mappings":"AAQA,OAAO,KAAK,EAAE,iBAAiB,EAAE,UAAU,EAAE,WAAW,EAAE,MAAM,uBAAuB,CAAC;AASxF,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,KAAK,GAAG,QAAQ,GAAG,QAAQ,CAAC;IAClC,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,aAAa;IAC5B,UAAU,EAAE,UAAU,CAAC;IACvB,WAAW,EAAE,WAAW,CAAC;IACzB,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,EAAE,SAAS,MAAM,EAAE,CAAC;IAChC,wEAAwE;IACxE,SAAS,CAAC,EAAE,iBAAiB,GAAG,uBAAuB,CAAC;IACxD;;;;;OAKG;IACH,OAAO,EAAE,MAAM,CAAC;CACjB;AAED;;;GAGG;AACH,wBAAgB,MAAM,CAAC,UAAU,EAAE,MAAM,GAAG,MAAM,CAEjD;AAED,qBAAa,OAAO;IAClB,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAoB;IAC9C,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAgB;gBAE3B,SAAS,EAAE,iBAAiB,EAAE,MAAM,EAAE,aAAa;IAK/D;;;;OAIG;IACG,WAAW,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC;YAcpC,YAAY;YAiEZ,YAAY;CAK3B"}
1
+ {"version":3,"file":"indexer.d.ts","sourceRoot":"","sources":["../../../src/ingestion/indexer.ts"],"names":[],"mappings":"AAQA,OAAO,KAAK,EAAE,iBAAiB,EAAE,UAAU,EAAE,WAAW,EAAE,MAAM,uBAAuB,CAAC;AAYxF,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,KAAK,GAAG,QAAQ,GAAG,QAAQ,CAAC;IAClC,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,aAAa;IAC5B,UAAU,EAAE,UAAU,CAAC;IACvB,WAAW,EAAE,WAAW,CAAC;IACzB,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,EAAE,SAAS,MAAM,EAAE,CAAC;IAChC,wEAAwE;IACxE,SAAS,CAAC,EAAE,iBAAiB,GAAG,uBAAuB,CAAC;IACxD;;;;;OAKG;IACH,OAAO,EAAE,MAAM,CAAC;CACjB;AAED;;;GAGG;AACH,wBAAgB,MAAM,CAAC,UAAU,EAAE,MAAM,GAAG,MAAM,CAEjD;AAED,qBAAa,OAAO;IAClB,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAoB;IAC9C,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAgB;gBAE3B,SAAS,EAAE,iBAAiB,EAAE,MAAM,EAAE,aAAa;IAK/D;;;;OAIG;IACG,WAAW,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC;YAcpC,YAAY;YAuEZ,YAAY;CAK3B"}
@@ -12,6 +12,9 @@ import { validatePath, validateUnlinkPath } from "../security/paths.js";
12
12
  import { writeSkipQuarantine } from "./skip-quarantine.js";
13
13
  import { recordEmbed } from "../observability/embedding-events.js";
14
14
  import { recordIndexWrite } from "../observability/index-events.js";
15
+ import { chunkText, chunkingEnabled } from "./chunk-text.js";
16
+ import { docChunkId, docChunkMetadata } from "./chunk-meta.js";
17
+ import { eqFilter } from "../storage/filter.js";
15
18
  /**
16
19
  * Deterministic row ID from the canonical source path.
17
20
  * Same file always gets the same ID, enabling upsert via delete+add.
@@ -48,10 +51,11 @@ export class Indexer {
48
51
  // strings with HTTP 400, and indexing an empty row produces nothing
49
52
  // searchable anyway. Quarantine the skip so it's visible (not stderr-only)
50
53
  // and remove any stale row from a prior good extraction of the same path.
54
+ const table = getTable(this.config.tableName ?? "structured_docs");
55
+ const sourcePathFilter = eqFilter("source_path", filePath);
51
56
  if (isBlank(doc.content)) {
52
- const id = fileId(filePath);
53
- const table = getTable(this.config.tableName ?? "structured_docs");
54
- await table.delete(`id = '${id}'`);
57
+ // Remove the whole prior set (single row or chunk set) for this path.
58
+ await table.delete(sourcePathFilter);
55
59
  await writeSkipQuarantine(this.config.dataDir, {
56
60
  reason: "empty_extracted_content",
57
61
  source_path: filePath,
@@ -59,41 +63,46 @@ export class Indexer {
59
63
  });
60
64
  return;
61
65
  }
62
- const embeddingVec = await recordEmbed(this.config.dataDir, this.embedding, {
63
- pipeline: this.config.sourceType === "filesystem_watched"
64
- ? "watcher"
65
- : "scheduled_scan",
66
- operation: "document_embedding",
67
- input_count: 1,
68
- }, () => this.embedding.embed(doc.content));
69
- const id = fileId(filePath);
70
- const table = getTable(this.config.tableName ?? "structured_docs");
71
- // Upsert: delete existing row (if any), then add new one.
72
- await table.delete(`id = '${id}'`);
73
- const row = {
74
- id,
75
- source_path: filePath,
76
- content: doc.content,
77
- embedding: embeddingVec,
78
- source_type: this.config.sourceType,
79
- source_scope: this.config.sourceScope,
80
- client_name: this.config.clientName,
81
- indexed_at: new Date().toISOString(),
82
- metadata: JSON.stringify(doc.metadata),
83
- };
66
+ const pipeline = this.config.sourceType === "filesystem_watched" ? "watcher" : "scheduled_scan";
67
+ const baseId = fileId(filePath);
68
+ // Chunk (flag-gated). chunkText short-circuits small docs to a single
69
+ // chunk, so a below-threshold doc — and every doc with the flag off —
70
+ // produces exactly one unmarked row, byte-identical to the prior behavior.
71
+ const chunks = chunkingEnabled() ? chunkText(doc.content) : [doc.content];
72
+ const count = chunks.length;
73
+ // Data-safety: embed the FULL new set BEFORE the destructive delete, so a
74
+ // failure leaves the old set intact (filesystem is the source of truth).
75
+ const indexedAt = new Date().toISOString();
76
+ const rows = [];
77
+ for (let i = 0; i < count; i++) {
78
+ const chunk = chunks[i];
79
+ const embeddingVec = await recordEmbed(this.config.dataDir, this.embedding, { pipeline, operation: "document_embedding", input_count: 1 }, () => this.embedding.embed(chunk));
80
+ rows.push({
81
+ id: count > 1 ? docChunkId(baseId, i) : baseId,
82
+ source_path: filePath,
83
+ content: chunk,
84
+ embedding: embeddingVec,
85
+ source_type: this.config.sourceType,
86
+ source_scope: this.config.sourceScope,
87
+ client_name: this.config.clientName,
88
+ indexed_at: indexedAt,
89
+ metadata: JSON.stringify(docChunkMetadata(doc.metadata, i, count)),
90
+ });
91
+ }
92
+ // Upsert by source_path: removes any prior chunk set AND any legacy
93
+ // single row for this file before writing the new set (no dup/stale chunks).
94
+ await table.delete(sourcePathFilter);
84
95
  await recordIndexWrite(this.config.dataDir, {
85
96
  table: this.config.tableName ?? "structured_docs",
86
- pipeline: this.config.sourceType === "filesystem_watched"
87
- ? "watcher"
88
- : "scheduled_scan",
97
+ pipeline,
89
98
  operation: "structured_doc_upsert",
90
- row_count: 1,
91
- }, () => table.add([row]));
99
+ row_count: rows.length,
100
+ }, () => table.add(rows));
92
101
  }
93
102
  async handleDelete(filePath) {
94
- const id = fileId(filePath);
95
103
  const table = getTable(this.config.tableName ?? "structured_docs");
96
- await table.delete(`id = '${id}'`);
104
+ // Delete by source_path so all chunks of the file are removed together.
105
+ await table.delete(eqFilter("source_path", filePath));
97
106
  }
98
107
  }
99
108
  function isBlank(s) {
@@ -1 +1 @@
1
- {"version":3,"file":"indexer.js","sourceRoot":"","sources":["../../../src/ingestion/indexer.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AACH,OAAO,MAAM,MAAM,aAAa,CAAC;AAGjC,OAAO,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAC;AAChD,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AACtD,OAAO,EAAE,YAAY,EAAE,kBAAkB,EAAE,MAAM,sBAAsB,CAAC;AACxE,OAAO,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAC;AAC3D,OAAO,EAAE,WAAW,EAAE,MAAM,sCAAsC,CAAC;AACnE,OAAO,EAAE,gBAAgB,EAAE,MAAM,kCAAkC,CAAC;AAuBpE;;;GAGG;AACH,MAAM,UAAU,MAAM,CAAC,UAAkB;IACvC,OAAO,MAAM,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;AACnF,CAAC;AAED,MAAM,OAAO,OAAO;IACD,SAAS,CAAoB;IAC7B,MAAM,CAAgB;IAEvC,YAAY,SAA4B,EAAE,MAAqB;QAC7D,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,WAAW,CAAC,KAAgB;QAChC,IAAI,KAAK,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YAC5B,MAAM,SAAS,GAAG,kBAAkB,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC;YAC3E,MAAM,IAAI,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC;YACnC,OAAO;QACT,CAAC;QAED,MAAM,SAAS,GAAG,YAAY,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC;QAErE,IAAI,CAAC,WAAW,CAAC,SAAS,CAAC;YAAE,OAAO;QAEpC,MAAM,IAAI,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC;IACrC,CAAC;IAEO,KAAK,CAAC,YAAY,CAAC,QAAgB;QACzC,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,QAAQ,CAAC,CAAC;QAEpC,0EAA0E;QAC1E,oEAAoE;QACpE,2EAA2E;QAC3E,0EAA0E;QAC1E,IAAI,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;YACzB,MAAM,EAAE,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC;YAC5B,MAAM,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,IAAI,iBAAiB,CAAC,CAAC;YACnE,MAAM,KAAK,CAAC,MAAM,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;YACnC,MAAM,mBAAmB,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE;gBAC7C,MAAM,EAAE,yBAAyB;gBACjC,WAAW,EAAE,QAAQ;gBACrB,QAAQ,EAAE,GAAG,CAAC,QAAQ;aACvB,CAAC,CAAC;YACH,OAAO;QACT,CAAC;QAED,MAAM,YAAY,GAAG,MAAM,WAAW,CACpC,IAAI,CAAC,MAAM,CAAC,OAAO,EACnB,IAAI,CAAC,SAAS,EACd;YACE,QAAQ,EACN,IAAI,CAAC,MAAM,CAAC,UAAU,KAAK,oBAAoB;gBAC7C,CAAC,CAAC,SAAS;gBACX,CAAC,CAAC,gBAAgB;YACtB,SAAS,EAAE,oBAAoB;YAC/B,WAAW,EAAE,CAAC;SACf,EACD,GAAG,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC,CACxC,CAAC;QACF,MAAM,EAAE,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC;QAC5B,MAAM,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,IAAI,iBAAiB,CAAC,CAAC;QAEnE,0DAA0D;QAC1D,MAAM,KAAK,CAAC,MAAM,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;QAEnC,MAAM,GAAG,GAAqB;YAC5B,EAAE;YACF,WAAW,EAAE,QAAQ;YACrB,OAAO,EAAE,GAAG,CAAC,OAAO;YACpB,SAAS,EAAE,YAAY;YACvB,WAAW,EAAE,IAAI,CAAC,MAAM,CAAC,UAAU;YACnC,YAAY,EAAE,IAAI,CAAC,MAAM,CAAC,WAAW;YACrC,WAAW,EAAE,IAAI,CAAC,MAAM,CAAC,UAAU;YACnC,UAAU,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YACpC,QAAQ,EAAE,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,QAAQ,CAAC;SACvC,CAAC;QAEF,MAAM,gBAAgB,CACpB,IAAI,CAAC,MAAM,CAAC,OAAO,EACnB;YACE,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS,IAAI,iBAAiB;YACjD,QAAQ,EACN,IAAI,CAAC,MAAM,CAAC,UAAU,KAAK,oBAAoB;gBAC7C,CAAC,CAAC,SAAS;gBACX,CAAC,CAAC,gBAAgB;YACtB,SAAS,EAAE,uBAAuB;YAClC,SAAS,EAAE,CAAC;SACb,EACD,GAAG,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CACvB,CAAC;IACJ,CAAC;IAEO,KAAK,CAAC,YAAY,CAAC,QAAgB;QACzC,MAAM,EAAE,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC;QAC5B,MAAM,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,IAAI,iBAAiB,CAAC,CAAC;QACnE,MAAM,KAAK,CAAC,MAAM,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;IACrC,CAAC;CACF;AAED,SAAS,OAAO,CAAC,CAAS;IACxB,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,CAAC;AAC/B,CAAC"}
1
+ {"version":3,"file":"indexer.js","sourceRoot":"","sources":["../../../src/ingestion/indexer.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AACH,OAAO,MAAM,MAAM,aAAa,CAAC;AAGjC,OAAO,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAC;AAChD,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AACtD,OAAO,EAAE,YAAY,EAAE,kBAAkB,EAAE,MAAM,sBAAsB,CAAC;AACxE,OAAO,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAC;AAC3D,OAAO,EAAE,WAAW,EAAE,MAAM,sCAAsC,CAAC;AACnE,OAAO,EAAE,gBAAgB,EAAE,MAAM,kCAAkC,CAAC;AACpE,OAAO,EAAE,SAAS,EAAE,eAAe,EAAE,MAAM,iBAAiB,CAAC;AAC7D,OAAO,EAAE,UAAU,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AAC/D,OAAO,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAC;AAuBhD;;;GAGG;AACH,MAAM,UAAU,MAAM,CAAC,UAAkB;IACvC,OAAO,MAAM,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;AACnF,CAAC;AAED,MAAM,OAAO,OAAO;IACD,SAAS,CAAoB;IAC7B,MAAM,CAAgB;IAEvC,YAAY,SAA4B,EAAE,MAAqB;QAC7D,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,WAAW,CAAC,KAAgB;QAChC,IAAI,KAAK,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YAC5B,MAAM,SAAS,GAAG,kBAAkB,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC;YAC3E,MAAM,IAAI,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC;YACnC,OAAO;QACT,CAAC;QAED,MAAM,SAAS,GAAG,YAAY,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC;QAErE,IAAI,CAAC,WAAW,CAAC,SAAS,CAAC;YAAE,OAAO;QAEpC,MAAM,IAAI,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC;IACrC,CAAC;IAEO,KAAK,CAAC,YAAY,CAAC,QAAgB;QACzC,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,QAAQ,CAAC,CAAC;QAEpC,0EAA0E;QAC1E,oEAAoE;QACpE,2EAA2E;QAC3E,0EAA0E;QAC1E,MAAM,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,IAAI,iBAAiB,CAAC,CAAC;QACnE,MAAM,gBAAgB,GAAG,QAAQ,CAAC,aAAa,EAAE,QAAQ,CAAC,CAAC;QAE3D,IAAI,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;YACzB,sEAAsE;YACtE,MAAM,KAAK,CAAC,MAAM,CAAC,gBAAgB,CAAC,CAAC;YACrC,MAAM,mBAAmB,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE;gBAC7C,MAAM,EAAE,yBAAyB;gBACjC,WAAW,EAAE,QAAQ;gBACrB,QAAQ,EAAE,GAAG,CAAC,QAAQ;aACvB,CAAC,CAAC;YACH,OAAO;QACT,CAAC;QAED,MAAM,QAAQ,GACZ,IAAI,CAAC,MAAM,CAAC,UAAU,KAAK,oBAAoB,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,gBAAgB,CAAC;QACjF,MAAM,MAAM,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC;QAEhC,sEAAsE;QACtE,sEAAsE;QACtE,2EAA2E;QAC3E,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QAC1E,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC;QAE5B,0EAA0E;QAC1E,yEAAyE;QACzE,MAAM,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QAC3C,MAAM,IAAI,GAAuB,EAAE,CAAC;QACpC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC;YAC/B,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAE,CAAC;YACzB,MAAM,YAAY,GAAG,MAAM,WAAW,CACpC,IAAI,CAAC,MAAM,CAAC,OAAO,EACnB,IAAI,CAAC,SAAS,EACd,EAAE,QAAQ,EAAE,SAAS,EAAE,oBAAoB,EAAE,WAAW,EAAE,CAAC,EAAE,EAC7D,GAAG,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,KAAK,CAAC,CAClC,CAAC;YACF,IAAI,CAAC,IAAI,CAAC;gBACR,EAAE,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM;gBAC9C,WAAW,EAAE,QAAQ;gBACrB,OAAO,EAAE,KAAK;gBACd,SAAS,EAAE,YAAY;gBACvB,WAAW,EAAE,IAAI,CAAC,MAAM,CAAC,UAAU;gBACnC,YAAY,EAAE,IAAI,CAAC,MAAM,CAAC,WAAW;gBACrC,WAAW,EAAE,IAAI,CAAC,MAAM,CAAC,UAAU;gBACnC,UAAU,EAAE,SAAS;gBACrB,QAAQ,EAAE,IAAI,CAAC,SAAS,CAAC,gBAAgB,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,EAAE,KAAK,CAAC,CAAC;aACnE,CAAC,CAAC;QACL,CAAC;QAED,oEAAoE;QACpE,6EAA6E;QAC7E,MAAM,KAAK,CAAC,MAAM,CAAC,gBAAgB,CAAC,CAAC;QACrC,MAAM,gBAAgB,CACpB,IAAI,CAAC,MAAM,CAAC,OAAO,EACnB;YACE,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS,IAAI,iBAAiB;YACjD,QAAQ;YACR,SAAS,EAAE,uBAAuB;YAClC,SAAS,EAAE,IAAI,CAAC,MAAM;SACvB,EACD,GAAG,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CACtB,CAAC;IACJ,CAAC;IAEO,KAAK,CAAC,YAAY,CAAC,QAAgB;QACzC,MAAM,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,IAAI,iBAAiB,CAAC,CAAC;QACnE,wEAAwE;QACxE,MAAM,KAAK,CAAC,MAAM,CAAC,QAAQ,CAAC,aAAa,EAAE,QAAQ,CAAC,CAAC,CAAC;IACxD,CAAC;CACF;AAED,SAAS,OAAO,CAAC,CAAS;IACxB,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,CAAC;AAC/B,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"compact.d.ts","sourceRoot":"","sources":["../../../../src/jobs/handlers/compact.ts"],"names":[],"mappings":"AAgCA,OAAO,KAAK,EAAO,UAAU,EAAE,MAAM,aAAa,CAAC;AACnD,OAAO,KAAK,EAAE,iBAAiB,EAAE,gBAAgB,EAAqB,MAAM,0BAA0B,CAAC;AACvG,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,4BAA4B,CAAC;AAC1D,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,4BAA4B,CAAC;AAQ1D,MAAM,WAAW,WAAW;IAC1B,iBAAiB,EAAE,iBAAiB,CAAC;IACrC,gBAAgB,EAAE,gBAAgB,CAAC;IACnC,OAAO,EAAE,MAAM,CAAC;IAChB,gBAAgB,EAAE,MAAM,CAAC;IACzB,QAAQ,EAAE,OAAO,EAAE,CAAC;IACpB,OAAO,EAAE,OAAO,GAAG,IAAI,CAAC;CACzB;AAED,MAAM,WAAW,aAAa;IAC5B,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,OAAO,CAAC;CACnB;AAED,MAAM,WAAW,gBAAgB;IAC/B,EAAE,EAAE,MAAM,CAAC;IACX,UAAU,EAAE,MAAM,CAAC;IACnB,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,SAAS,EAAE,MAAM,CAAC;IAClB,eAAe,EAAE,MAAM,CAAC;IACxB,QAAQ,EAAE,OAAO,CAAC;CACnB;AASD,4DAA4D;AAC5D,wBAAgB,mBAAmB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAE5D;AAMD,wBAAgB,oBAAoB,CAAC,IAAI,EAAE,WAAW,GAAG,UAAU,CAwBlE"}
1
+ {"version":3,"file":"compact.d.ts","sourceRoot":"","sources":["../../../../src/jobs/handlers/compact.ts"],"names":[],"mappings":"AAgCA,OAAO,KAAK,EAAO,UAAU,EAAE,MAAM,aAAa,CAAC;AACnD,OAAO,KAAK,EAAE,iBAAiB,EAAE,gBAAgB,EAAqB,MAAM,0BAA0B,CAAC;AACvG,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,4BAA4B,CAAC;AAC1D,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,4BAA4B,CAAC;AAS1D,MAAM,WAAW,WAAW;IAC1B,iBAAiB,EAAE,iBAAiB,CAAC;IACrC,gBAAgB,EAAE,gBAAgB,CAAC;IACnC,OAAO,EAAE,MAAM,CAAC;IAChB,gBAAgB,EAAE,MAAM,CAAC;IACzB,QAAQ,EAAE,OAAO,EAAE,CAAC;IACpB,OAAO,EAAE,OAAO,GAAG,IAAI,CAAC;CACzB;AAED,MAAM,WAAW,aAAa;IAC5B,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,OAAO,CAAC;CACnB;AAED,MAAM,WAAW,gBAAgB;IAC/B,EAAE,EAAE,MAAM,CAAC;IACX,UAAU,EAAE,MAAM,CAAC;IACnB,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,SAAS,EAAE,MAAM,CAAC;IAClB,eAAe,EAAE,MAAM,CAAC;IACxB,QAAQ,EAAE,OAAO,CAAC;CACnB;AASD,4DAA4D;AAC5D,wBAAgB,mBAAmB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAE5D;AAMD,wBAAgB,oBAAoB,CAAC,IAAI,EAAE,WAAW,GAAG,UAAU,CAwBlE"}