@getrift/rift 0.1.0-beta.20 → 0.1.0-beta.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -3
- package/dist/src/capture/auto-capture.d.ts +105 -4
- package/dist/src/capture/auto-capture.d.ts.map +1 -1
- package/dist/src/capture/auto-capture.js +313 -34
- package/dist/src/capture/auto-capture.js.map +1 -1
- package/dist/src/capture/claude-cli-triage-provider.d.ts +28 -0
- package/dist/src/capture/claude-cli-triage-provider.d.ts.map +1 -0
- package/dist/src/capture/claude-cli-triage-provider.js +88 -0
- package/dist/src/capture/claude-cli-triage-provider.js.map +1 -0
- package/dist/src/capture/codex-cli-triage-provider.d.ts.map +1 -1
- package/dist/src/capture/codex-cli-triage-provider.js +1 -33
- package/dist/src/capture/codex-cli-triage-provider.js.map +1 -1
- package/dist/src/capture/cursor-capture.d.ts +89 -0
- package/dist/src/capture/cursor-capture.d.ts.map +1 -0
- package/dist/src/capture/cursor-capture.js +121 -0
- package/dist/src/capture/cursor-capture.js.map +1 -0
- package/dist/src/capture/observability.d.ts +30 -0
- package/dist/src/capture/observability.d.ts.map +1 -1
- package/dist/src/capture/observability.js +29 -0
- package/dist/src/capture/observability.js.map +1 -1
- package/dist/src/capture/sources.d.ts +41 -3
- package/dist/src/capture/sources.d.ts.map +1 -1
- package/dist/src/capture/sources.js +43 -1
- package/dist/src/capture/sources.js.map +1 -1
- package/dist/src/capture/triage-classification.d.ts +69 -0
- package/dist/src/capture/triage-classification.d.ts.map +1 -0
- package/dist/src/capture/triage-classification.js +62 -0
- package/dist/src/capture/triage-classification.js.map +1 -0
- package/dist/src/capture/triage-provider-factory.d.ts +36 -0
- package/dist/src/capture/triage-provider-factory.d.ts.map +1 -0
- package/dist/src/capture/triage-provider-factory.js +55 -0
- package/dist/src/capture/triage-provider-factory.js.map +1 -0
- package/dist/src/capture/triage.d.ts +1 -1
- package/dist/src/capture/triage.d.ts.map +1 -1
- package/dist/src/capture/triage.js +8 -6
- package/dist/src/capture/triage.js.map +1 -1
- package/dist/src/cli/commands/capture.d.ts.map +1 -1
- package/dist/src/cli/commands/capture.js +72 -17
- package/dist/src/cli/commands/capture.js.map +1 -1
- package/dist/src/cli/commands/chunk-backfill.d.ts +13 -0
- package/dist/src/cli/commands/chunk-backfill.d.ts.map +1 -0
- package/dist/src/cli/commands/chunk-backfill.js +157 -0
- package/dist/src/cli/commands/chunk-backfill.js.map +1 -0
- package/dist/src/cli/commands/cursor-probe.d.ts +20 -0
- package/dist/src/cli/commands/cursor-probe.d.ts.map +1 -0
- package/dist/src/cli/commands/cursor-probe.js +162 -0
- package/dist/src/cli/commands/cursor-probe.js.map +1 -0
- package/dist/src/cli/commands/menubar.d.ts +50 -0
- package/dist/src/cli/commands/menubar.d.ts.map +1 -1
- package/dist/src/cli/commands/menubar.js +224 -16
- package/dist/src/cli/commands/menubar.js.map +1 -1
- package/dist/src/cli/commands/onboard.d.ts +36 -7
- package/dist/src/cli/commands/onboard.d.ts.map +1 -1
- package/dist/src/cli/commands/onboard.js +256 -53
- package/dist/src/cli/commands/onboard.js.map +1 -1
- package/dist/src/cli/commands/status.d.ts.map +1 -1
- package/dist/src/cli/commands/status.js +16 -0
- package/dist/src/cli/commands/status.js.map +1 -1
- package/dist/src/cli/commands/update.d.ts +34 -1
- package/dist/src/cli/commands/update.d.ts.map +1 -1
- package/dist/src/cli/commands/update.js +179 -2
- package/dist/src/cli/commands/update.js.map +1 -1
- package/dist/src/cli/index.d.ts.map +1 -1
- package/dist/src/cli/index.js +4 -0
- package/dist/src/cli/index.js.map +1 -1
- package/dist/src/cli/postinstall-menubar.d.ts.map +1 -1
- package/dist/src/cli/postinstall-menubar.js +14 -0
- package/dist/src/cli/postinstall-menubar.js.map +1 -1
- package/dist/src/cli/status/friend-header.d.ts +18 -0
- package/dist/src/cli/status/friend-header.d.ts.map +1 -1
- package/dist/src/cli/status/friend-header.js +137 -0
- package/dist/src/cli/status/friend-header.js.map +1 -1
- package/dist/src/cli/status/local-signals.d.ts +41 -0
- package/dist/src/cli/status/local-signals.d.ts.map +1 -1
- package/dist/src/cli/status/local-signals.js +48 -0
- package/dist/src/cli/status/local-signals.js.map +1 -1
- package/dist/src/config/schema.d.ts +220 -14
- package/dist/src/config/schema.d.ts.map +1 -1
- package/dist/src/config/schema.js +82 -7
- package/dist/src/config/schema.js.map +1 -1
- package/dist/src/diagnostics/claude-preflight.d.ts +34 -0
- package/dist/src/diagnostics/claude-preflight.d.ts.map +1 -0
- package/dist/src/diagnostics/claude-preflight.js +89 -0
- package/dist/src/diagnostics/claude-preflight.js.map +1 -0
- package/dist/src/diagnostics/codex-preflight.d.ts +1 -1
- package/dist/src/diagnostics/codex-preflight.d.ts.map +1 -1
- package/dist/src/diagnostics/codex-preflight.js +14 -0
- package/dist/src/diagnostics/codex-preflight.js.map +1 -1
- package/dist/src/diagnostics/doctor.d.ts +9 -1
- package/dist/src/diagnostics/doctor.d.ts.map +1 -1
- package/dist/src/diagnostics/doctor.js +57 -2
- package/dist/src/diagnostics/doctor.js.map +1 -1
- package/dist/src/ingestion/chunk-meta.d.ts +85 -0
- package/dist/src/ingestion/chunk-meta.d.ts.map +1 -0
- package/dist/src/ingestion/chunk-meta.js +167 -0
- package/dist/src/ingestion/chunk-meta.js.map +1 -0
- package/dist/src/ingestion/chunk-text.d.ts +39 -0
- package/dist/src/ingestion/chunk-text.d.ts.map +1 -0
- package/dist/src/ingestion/chunk-text.js +114 -0
- package/dist/src/ingestion/chunk-text.js.map +1 -0
- package/dist/src/ingestion/cursor/cursor-store.d.ts +177 -0
- package/dist/src/ingestion/cursor/cursor-store.d.ts.map +1 -0
- package/dist/src/ingestion/cursor/cursor-store.js +243 -0
- package/dist/src/ingestion/cursor/cursor-store.js.map +1 -0
- package/dist/src/ingestion/cursor/enrich-roots.d.ts +16 -0
- package/dist/src/ingestion/cursor/enrich-roots.d.ts.map +1 -0
- package/dist/src/ingestion/cursor/enrich-roots.js +22 -0
- package/dist/src/ingestion/cursor/enrich-roots.js.map +1 -0
- package/dist/src/ingestion/cursor/vscdb-reader.d.ts +32 -0
- package/dist/src/ingestion/cursor/vscdb-reader.d.ts.map +1 -0
- package/dist/src/ingestion/cursor/vscdb-reader.js +113 -0
- package/dist/src/ingestion/cursor/vscdb-reader.js.map +1 -0
- package/dist/src/ingestion/cursor/workspace-root.d.ts +96 -0
- package/dist/src/ingestion/cursor/workspace-root.d.ts.map +1 -0
- package/dist/src/ingestion/cursor/workspace-root.js +187 -0
- package/dist/src/ingestion/cursor/workspace-root.js.map +1 -0
- package/dist/src/ingestion/indexer.d.ts.map +1 -1
- package/dist/src/ingestion/indexer.js +41 -32
- package/dist/src/ingestion/indexer.js.map +1 -1
- package/dist/src/jobs/handlers/compact.d.ts.map +1 -1
- package/dist/src/jobs/handlers/compact.js +9 -4
- package/dist/src/jobs/handlers/compact.js.map +1 -1
- package/dist/src/jobs/handlers/ingest.d.ts.map +1 -1
- package/dist/src/jobs/handlers/ingest.js +60 -30
- package/dist/src/jobs/handlers/ingest.js.map +1 -1
- package/dist/src/jobs/handlers/reconcile.d.ts.map +1 -1
- package/dist/src/jobs/handlers/reconcile.js +128 -45
- package/dist/src/jobs/handlers/reconcile.js.map +1 -1
- package/dist/src/jobs/handlers/save.d.ts.map +1 -1
- package/dist/src/jobs/handlers/save.js +122 -72
- package/dist/src/jobs/handlers/save.js.map +1 -1
- package/dist/src/jobs/types.d.ts +1 -1
- package/dist/src/main.js +26 -15
- package/dist/src/main.js.map +1 -1
- package/dist/src/mcp/server.d.ts.map +1 -1
- package/dist/src/mcp/server.js +10 -3
- package/dist/src/mcp/server.js.map +1 -1
- package/dist/src/mcp/tools/context-pack.d.ts.map +1 -1
- package/dist/src/mcp/tools/context-pack.js +7 -1
- package/dist/src/mcp/tools/context-pack.js.map +1 -1
- package/dist/src/mcp/tools/conversations-search.d.ts +1 -1
- package/dist/src/mcp/tools/conversations-search.d.ts.map +1 -1
- package/dist/src/mcp/tools/conversations-search.js +7 -1
- package/dist/src/mcp/tools/conversations-search.js.map +1 -1
- package/dist/src/mcp/tools/evidence-feedback.d.ts +60 -0
- package/dist/src/mcp/tools/evidence-feedback.d.ts.map +1 -0
- package/dist/src/mcp/tools/evidence-feedback.js +62 -0
- package/dist/src/mcp/tools/evidence-feedback.js.map +1 -0
- package/dist/src/mcp/tools/log-outcome.d.ts +72 -0
- package/dist/src/mcp/tools/log-outcome.d.ts.map +1 -0
- package/dist/src/mcp/tools/log-outcome.js +59 -0
- package/dist/src/mcp/tools/log-outcome.js.map +1 -0
- package/dist/src/mcp/tools/open-evidence.d.ts +37 -0
- package/dist/src/mcp/tools/open-evidence.d.ts.map +1 -0
- package/dist/src/mcp/tools/open-evidence.js +72 -0
- package/dist/src/mcp/tools/open-evidence.js.map +1 -0
- package/dist/src/mcp/tools/save.d.ts +7 -2
- package/dist/src/mcp/tools/save.d.ts.map +1 -1
- package/dist/src/mcp/tools/save.js +7 -2
- package/dist/src/mcp/tools/save.js.map +1 -1
- package/dist/src/mcp/tools/search.d.ts.map +1 -1
- package/dist/src/mcp/tools/search.js +7 -1
- package/dist/src/mcp/tools/search.js.map +1 -1
- package/dist/src/observability/retrieval-feedback.d.ts +82 -0
- package/dist/src/observability/retrieval-feedback.d.ts.map +1 -0
- package/dist/src/observability/retrieval-feedback.js +231 -0
- package/dist/src/observability/retrieval-feedback.js.map +1 -0
- package/dist/src/observability/rift-context.d.ts.map +1 -1
- package/dist/src/observability/rift-context.js +3 -0
- package/dist/src/observability/rift-context.js.map +1 -1
- package/dist/src/observability/tool-usage-stats.d.ts +13 -0
- package/dist/src/observability/tool-usage-stats.d.ts.map +1 -1
- package/dist/src/observability/tool-usage-stats.js +15 -0
- package/dist/src/observability/tool-usage-stats.js.map +1 -1
- package/dist/src/observability/tool-usage.d.ts +56 -0
- package/dist/src/observability/tool-usage.d.ts.map +1 -1
- package/dist/src/observability/tool-usage.js +86 -0
- package/dist/src/observability/tool-usage.js.map +1 -1
- package/dist/src/providers/claude-cli-metadata-extraction.d.ts +47 -0
- package/dist/src/providers/claude-cli-metadata-extraction.d.ts.map +1 -0
- package/dist/src/providers/claude-cli-metadata-extraction.js +120 -0
- package/dist/src/providers/claude-cli-metadata-extraction.js.map +1 -0
- package/dist/src/providers/claude-cli-runner.d.ts +92 -0
- package/dist/src/providers/claude-cli-runner.d.ts.map +1 -0
- package/dist/src/providers/claude-cli-runner.js +598 -0
- package/dist/src/providers/claude-cli-runner.js.map +1 -0
- package/dist/src/providers/codex-cli-metadata-extraction.d.ts.map +1 -1
- package/dist/src/providers/codex-cli-metadata-extraction.js +1 -40
- package/dist/src/providers/codex-cli-metadata-extraction.js.map +1 -1
- package/dist/src/providers/codex-cli-runner.d.ts +7 -0
- package/dist/src/providers/codex-cli-runner.d.ts.map +1 -1
- package/dist/src/providers/codex-cli-runner.js +131 -5
- package/dist/src/providers/codex-cli-runner.js.map +1 -1
- package/dist/src/providers/conversation-generation.d.ts +10 -0
- package/dist/src/providers/conversation-generation.d.ts.map +1 -1
- package/dist/src/providers/conversation-generation.js +54 -13
- package/dist/src/providers/conversation-generation.js.map +1 -1
- package/dist/src/providers/openai-metadata-extraction.d.ts +48 -1
- package/dist/src/providers/openai-metadata-extraction.d.ts.map +1 -1
- package/dist/src/providers/openai-metadata-extraction.js +51 -2
- package/dist/src/providers/openai-metadata-extraction.js.map +1 -1
- package/dist/src/providers/types.d.ts +1 -1
- package/dist/src/providers/types.d.ts.map +1 -1
- package/dist/src/providers/types.js +4 -0
- package/dist/src/providers/types.js.map +1 -1
- package/dist/src/retrieval/compact.d.ts +81 -0
- package/dist/src/retrieval/compact.d.ts.map +1 -1
- package/dist/src/retrieval/compact.js +248 -8
- package/dist/src/retrieval/compact.js.map +1 -1
- package/dist/src/retrieval/context-pack.d.ts.map +1 -1
- package/dist/src/retrieval/context-pack.js +28 -14
- package/dist/src/retrieval/context-pack.js.map +1 -1
- package/dist/src/retrieval/evidence-key.d.ts +48 -0
- package/dist/src/retrieval/evidence-key.d.ts.map +1 -0
- package/dist/src/retrieval/evidence-key.js +131 -0
- package/dist/src/retrieval/evidence-key.js.map +1 -0
- package/dist/src/retrieval/group-by-parent.d.ts +38 -0
- package/dist/src/retrieval/group-by-parent.d.ts.map +1 -0
- package/dist/src/retrieval/group-by-parent.js +40 -0
- package/dist/src/retrieval/group-by-parent.js.map +1 -0
- package/dist/src/retrieval/lexical.d.ts.map +1 -1
- package/dist/src/retrieval/lexical.js +1 -3
- package/dist/src/retrieval/lexical.js.map +1 -1
- package/dist/src/retrieval/receipt.d.ts +57 -0
- package/dist/src/retrieval/receipt.d.ts.map +1 -0
- package/dist/src/retrieval/receipt.js +119 -0
- package/dist/src/retrieval/receipt.js.map +1 -0
- package/dist/src/retrieval/reranker.d.ts +12 -2
- package/dist/src/retrieval/reranker.d.ts.map +1 -1
- package/dist/src/retrieval/reranker.js +11 -4
- package/dist/src/retrieval/reranker.js.map +1 -1
- package/dist/src/retrieval/stitch-chunks.d.ts +73 -0
- package/dist/src/retrieval/stitch-chunks.d.ts.map +1 -0
- package/dist/src/retrieval/stitch-chunks.js +106 -0
- package/dist/src/retrieval/stitch-chunks.js.map +1 -0
- package/dist/src/server/app.d.ts.map +1 -1
- package/dist/src/server/app.js +17 -1
- package/dist/src/server/app.js.map +1 -1
- package/dist/src/server/routes/conversations-search.d.ts.map +1 -1
- package/dist/src/server/routes/conversations-search.js +12 -3
- package/dist/src/server/routes/conversations-search.js.map +1 -1
- package/dist/src/server/routes/friend-status.d.ts +44 -5
- package/dist/src/server/routes/friend-status.d.ts.map +1 -1
- package/dist/src/server/routes/friend-status.js +74 -6
- package/dist/src/server/routes/friend-status.js.map +1 -1
- package/dist/src/server/routes/mcp-usage.d.ts +9 -6
- package/dist/src/server/routes/mcp-usage.d.ts.map +1 -1
- package/dist/src/server/routes/mcp-usage.js.map +1 -1
- package/dist/src/server/routes/retrieval-feedback.d.ts +3 -0
- package/dist/src/server/routes/retrieval-feedback.d.ts.map +1 -0
- package/dist/src/server/routes/retrieval-feedback.js +290 -0
- package/dist/src/server/routes/retrieval-feedback.js.map +1 -0
- package/dist/src/server/routes/save.d.ts +3 -3
- package/dist/src/server/routes/save.d.ts.map +1 -1
- package/dist/src/server/routes/save.js +6 -2
- package/dist/src/server/routes/save.js.map +1 -1
- package/dist/src/server/routes/search.d.ts.map +1 -1
- package/dist/src/server/routes/search.js +19 -7
- package/dist/src/server/routes/search.js.map +1 -1
- package/dist/src/server/serving-marker.d.ts +85 -0
- package/dist/src/server/serving-marker.d.ts.map +1 -0
- package/dist/src/server/serving-marker.js +226 -0
- package/dist/src/server/serving-marker.js.map +1 -0
- package/dist/src/storage/chunk-backfill.d.ts +39 -0
- package/dist/src/storage/chunk-backfill.d.ts.map +1 -0
- package/dist/src/storage/chunk-backfill.js +295 -0
- package/dist/src/storage/chunk-backfill.js.map +1 -0
- package/dist/src/storage/filter.d.ts +42 -0
- package/dist/src/storage/filter.d.ts.map +1 -0
- package/dist/src/storage/filter.js +70 -0
- package/dist/src/storage/filter.js.map +1 -0
- package/dist/src/storage/rebuild.d.ts.map +1 -1
- package/dist/src/storage/rebuild.js +44 -27
- package/dist/src/storage/rebuild.js.map +1 -1
- package/dist/src/storage/tables.d.ts +41 -0
- package/dist/src/storage/tables.d.ts.map +1 -1
- package/dist/src/storage/tables.js +64 -1
- package/dist/src/storage/tables.js.map +1 -1
- package/operator/swiftbar/render-menu.py +57 -15
- package/package.json +5 -3
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Read-only reader for Cursor's `state.vscdb` SQLite stores.
|
|
3
|
+
*
|
|
4
|
+
* DISCOVERY SLICE ONLY — this is the single I/O boundary for Cursor capture.
|
|
5
|
+
* It is deliberately read-only by THREE independent guarantees:
|
|
6
|
+
* 1. SQLite is opened with the `immutable=1` URI flag, which forbids any
|
|
7
|
+
* write and never creates/touches the -wal/-shm sidecar files.
|
|
8
|
+
* 2. Only SELECT statements are ever issued.
|
|
9
|
+
* 3. We shell out to the system `sqlite3` binary rather than linking a
|
|
10
|
+
* writable driver. (Cursor itself holds the db open under WAL; immutable
|
|
11
|
+
* lets us read a consistent snapshot without contending for the lock.)
|
|
12
|
+
*
|
|
13
|
+
* Why shell out instead of a native dep: Rift's Node engine floor still
|
|
14
|
+
* includes Node 20 (no `node:sqlite`), and we don't want to pull a native
|
|
15
|
+
* addon (better-sqlite3) into the tree for a read-only probe. macOS always
|
|
16
|
+
* ships `/usr/bin/sqlite3`, and Cursor is macOS-first. The future capture slice
|
|
17
|
+
* can revisit this (node:sqlite once the floor rises). See the discovery doc.
|
|
18
|
+
*
|
|
19
|
+
* PRIVACY: by default we fetch only the session index + per-session metadata
|
|
20
|
+
* (`composer.composerHeaders`, `composerData:*`). We never SELECT `cursorAuth/*`
|
|
21
|
+
* (access/refresh tokens) or `secret://*` (MCP OAuth secrets), and we do not
|
|
22
|
+
* fetch bubble bodies (`bubbleId:*`, the actual message text) unless the caller
|
|
23
|
+
* explicitly opts in.
|
|
24
|
+
*/
|
|
25
|
+
import { execFileSync } from "node:child_process";
|
|
26
|
+
import fs from "node:fs";
|
|
27
|
+
import os from "node:os";
|
|
28
|
+
import path from "node:path";
|
|
29
|
+
/** Default macOS location of Cursor's per-user application support dir. */
|
|
30
|
+
export function defaultCursorDir() {
|
|
31
|
+
return path.join(os.homedir(), "Library", "Application Support", "Cursor");
|
|
32
|
+
}
|
|
33
|
+
/** Global KV store path under a Cursor application-support dir. */
|
|
34
|
+
export function globalStatePath(cursorDir) {
|
|
35
|
+
return path.join(cursorDir, "User", "globalStorage", "state.vscdb");
|
|
36
|
+
}
|
|
37
|
+
export class CursorReaderError extends Error {
|
|
38
|
+
code;
|
|
39
|
+
constructor(message, code) {
|
|
40
|
+
super(message);
|
|
41
|
+
this.code = code;
|
|
42
|
+
this.name = "CursorReaderError";
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
function sqlite3Available() {
|
|
46
|
+
try {
|
|
47
|
+
execFileSync("sqlite3", ["--version"], { stdio: ["ignore", "ignore", "ignore"] });
|
|
48
|
+
return true;
|
|
49
|
+
}
|
|
50
|
+
catch {
|
|
51
|
+
return false;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Run a single SELECT against an immutable (read-only) view of the db and
|
|
56
|
+
* return rows as objects. Uses `.mode json` so values come back as strings.
|
|
57
|
+
*/
|
|
58
|
+
function selectJson(dbPath, sql) {
|
|
59
|
+
const uri = `file:${dbPath}?immutable=1`;
|
|
60
|
+
let stdout;
|
|
61
|
+
try {
|
|
62
|
+
stdout = execFileSync("sqlite3", ["-cmd", ".mode json", uri, sql], {
|
|
63
|
+
encoding: "utf-8",
|
|
64
|
+
maxBuffer: 256 * 1024 * 1024, // composer blobs can be large
|
|
65
|
+
});
|
|
66
|
+
}
|
|
67
|
+
catch (err) {
|
|
68
|
+
throw new CursorReaderError(`sqlite3 query failed: ${err instanceof Error ? err.message : String(err)}`, "query_failed");
|
|
69
|
+
}
|
|
70
|
+
const trimmed = stdout.trim();
|
|
71
|
+
if (!trimmed)
|
|
72
|
+
return [];
|
|
73
|
+
try {
|
|
74
|
+
return JSON.parse(trimmed);
|
|
75
|
+
}
|
|
76
|
+
catch (err) {
|
|
77
|
+
throw new CursorReaderError(`failed to parse sqlite3 JSON output: ${err instanceof Error ? err.message : String(err)}`, "query_failed");
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
function toKvRows(records) {
|
|
81
|
+
const rows = [];
|
|
82
|
+
for (const rec of records) {
|
|
83
|
+
if (typeof rec.key === "string" && typeof rec.value === "string") {
|
|
84
|
+
rows.push({ key: rec.key, value: rec.value });
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
return rows;
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Read the global Cursor state.vscdb read-only and return raw rows for the pure
|
|
91
|
+
* discovery layer. Throws {@link CursorReaderError} with a stable `code` for the
|
|
92
|
+
* three expected failure modes so callers can render friendly guidance.
|
|
93
|
+
*/
|
|
94
|
+
export function readCursorGlobalState(options = {}) {
|
|
95
|
+
const cursorDir = options.cursorDir ?? defaultCursorDir();
|
|
96
|
+
const dbPath = globalStatePath(cursorDir);
|
|
97
|
+
if (!fs.existsSync(dbPath)) {
|
|
98
|
+
throw new CursorReaderError(`Cursor state.vscdb not found at ${dbPath}`, "db_missing");
|
|
99
|
+
}
|
|
100
|
+
if (!sqlite3Available()) {
|
|
101
|
+
throw new CursorReaderError("The `sqlite3` binary is required to read Cursor's store but was not found on PATH.", "sqlite_missing");
|
|
102
|
+
}
|
|
103
|
+
// Session index (ItemTable). Single, small row.
|
|
104
|
+
const itemTable = toKvRows(selectJson(dbPath, "SELECT key, value FROM ItemTable WHERE key = 'composer.composerHeaders'"));
|
|
105
|
+
// Per-session metadata (cursorDiskKV). Small; never tokens/secrets.
|
|
106
|
+
const composerData = toKvRows(selectJson(dbPath, "SELECT key, value FROM cursorDiskKV WHERE key LIKE 'composerData:%'"));
|
|
107
|
+
const cursorDiskKV = [...composerData];
|
|
108
|
+
if (options.includeBubbles) {
|
|
109
|
+
cursorDiskKV.push(...toKvRows(selectJson(dbPath, "SELECT key, value FROM cursorDiskKV WHERE key LIKE 'bubbleId:%'")));
|
|
110
|
+
}
|
|
111
|
+
return { dbPath, rows: { itemTable, cursorDiskKV } };
|
|
112
|
+
}
|
|
113
|
+
//# sourceMappingURL=vscdb-reader.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"vscdb-reader.js","sourceRoot":"","sources":["../../../../src/ingestion/cursor/vscdb-reader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,IAAI,MAAM,WAAW,CAAC;AAI7B,2EAA2E;AAC3E,MAAM,UAAU,gBAAgB;IAC9B,OAAO,IAAI,CAAC,IAAI,CACd,EAAE,CAAC,OAAO,EAAE,EACZ,SAAS,EACT,qBAAqB,EACrB,QAAQ,CACT,CAAC;AACJ,CAAC;AAED,mEAAmE;AACnE,MAAM,UAAU,eAAe,CAAC,SAAiB;IAC/C,OAAO,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,MAAM,EAAE,eAAe,EAAE,aAAa,CAAC,CAAC;AACtE,CAAC;AASD,MAAM,OAAO,iBAAkB,SAAQ,KAAK;IAG/B;IAFX,YACE,OAAe,EACN,IAGS;QAElB,KAAK,CAAC,OAAO,CAAC,CAAC;QALN,SAAI,GAAJ,IAAI,CAGK;QAGlB,IAAI,CAAC,IAAI,GAAG,mBAAmB,CAAC;IAClC,CAAC;CACF;AAED,SAAS,gBAAgB;IACvB,IAAI,CAAC;QACH,YAAY,CAAC,SAAS,EAAE,CAAC,WAAW,CAAC,EAAE,EAAE,KAAK,EAAE,CAAC,QAAQ,EAAE,QAAQ,EAAE,QAAQ,CAAC,EAAE,CAAC,CAAC;QAClF,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,SAAS,UAAU,CAAC,MAAc,EAAE,GAAW;IAC7C,MAAM,GAAG,GAAG,QAAQ,MAAM,cAAc,CAAC;IACzC,IAAI,MAAc,CAAC;IACnB,IAAI,CAAC;QACH,MAAM,GAAG,YAAY,CAAC,SAAS,EAAE,CAAC,MAAM,EAAE,YAAY,EAAE,GAAG,EAAE,GAAG,CAAC,EAAE;YACjE,QAAQ,EAAE,OAAO;YACjB,SAAS,EAAE,GAAG,GAAG,IAAI,GAAG,IAAI,EAAE,8BAA8B;SAC7D,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,IAAI,iBAAiB,CACzB,yBAAyB,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,EAC3E,cAAc,CACf,CAAC;IACJ,CAAC;IACD,MAAM,OAAO,GAAG,MAAM,CAAC,IAAI,EAAE,CAAC;IAC9B,IAAI,CAAC,OAAO;QAAE,OAAO,EAAE,CAAC;IACxB,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,KAAK,CAAC,OAAO,CAAkC,CAAC;IAC9D,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,IAAI,iBAAiB,CACzB,wCAAwC,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,EAC1F,cAAc,CACf,CAAC;IACJ,CAAC;AACH,CAAC;AAED,SAAS,QAAQ,CAAC,OAAsC;IACtD,MAAM,IAAI,GAAkB,EAAE,CAAC;IAC/B,KAAK,MAAM,GAAG,IAAI,OAAO,EAAE,CAAC;QAC1B,IAAI,OAAO,GAAG,CAAC,GAAG,KAAK,QAAQ,IAAI,OAAO,GAAG,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;YACjE,IAAI,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,GAAG,CAAC,GAAG,EAAE,KAAK,EAAE,GAAG,CAAC,KAAK,EAAE,CAAC,CAAC;QAChD,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAaD;;;;GAIG;AACH,MAAM,UAAU,qBAAqB,CAAC,UAAuB,EAAE;IAC7D,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,gBAAgB,EAAE,CAAC;IAC1D,MAAM,MAAM,GAAG,eAAe,CAAC,SAAS,CAAC,CAAC;IAE1C,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC;QAC3B,MAAM,IAAI,iBAAiB,CACzB,mCAAmC,MAAM,EAAE,EAC3C,YAAY,CACb,CAAC;IACJ,CAAC;IACD,IAAI,CAAC,gBAAgB,EAAE,EAAE,CAAC;QACxB,MAAM,IAAI,iBAAiB,CACzB,oFAAoF,EACpF,gBAAgB,CACjB,CAAC;IACJ,CAAC;IAED,gDAAgD;IAChD,MAAM,SAAS,GAAG,QAAQ,CACxB,UAAU,CACR,MAAM,EACN,yEAAyE,CAC1E,CACF,CAAC;IAEF,oEAAoE;IACpE,MAAM,YAAY,GAAG,QAAQ,CAC3B,UAAU,CACR,MAAM,EACN,qEAAqE,CACtE,CACF,CAAC;IAEF,MAAM,YAAY,GAAG,CAAC,GAAG,YAAY,CAAC,CAAC;IAEvC,IAAI,OAAO,CAAC,cAAc,EAAE,CAAC;QAC3B,YAAY,CAAC,IAAI,CACf,GAAG,QAAQ,CACT,UAAU,CACR,MAAM,EACN,iEAAiE,CAClE,CACF,CACF,CAAC;IACJ,CAAC;IAED,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,EAAE,SAAS,EAAE,YAAY,EAAE,EAAE,CAAC;AACvD,CAAC"}
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
/** Sentinel `workspaceIdentifier.id` Cursor uses when no folder is open. */
|
|
2
|
+
export declare const EMPTY_WINDOW_ID = "empty-window";
|
|
3
|
+
/** The `uri` object Cursor embeds in a folder-bound composer header. */
|
|
4
|
+
export interface CursorWorkspaceUri {
|
|
5
|
+
scheme?: string;
|
|
6
|
+
fsPath?: string;
|
|
7
|
+
path?: string;
|
|
8
|
+
external?: string;
|
|
9
|
+
}
|
|
10
|
+
/** A composer header's `workspaceIdentifier` (id + optional inline uri). */
|
|
11
|
+
export interface CursorWorkspaceIdentifier {
|
|
12
|
+
id?: string;
|
|
13
|
+
uri?: CursorWorkspaceUri | null;
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Outcome of resolving a workspace identifier to a project root. Every
|
|
17
|
+
* non-`resolved` status is a deliberate, named outcome — the capture slice must
|
|
18
|
+
* branch on these rather than ever fabricate a root.
|
|
19
|
+
*/
|
|
20
|
+
export type ProjectRootResolution = {
|
|
21
|
+
status: "resolved";
|
|
22
|
+
/** Absolute filesystem path of the project root. */
|
|
23
|
+
root: string;
|
|
24
|
+
/** Which link produced the root. */
|
|
25
|
+
source: "header_uri" | "workspace_json";
|
|
26
|
+
workspaceId: string | null;
|
|
27
|
+
}
|
|
28
|
+
/** No folder open (`empty-window`). Expected; not attributable. */
|
|
29
|
+
| {
|
|
30
|
+
status: "empty_window";
|
|
31
|
+
workspaceId: "empty-window";
|
|
32
|
+
}
|
|
33
|
+
/** Multi-root `.code-workspace`; root ambiguous. */
|
|
34
|
+
| {
|
|
35
|
+
status: "multi_root";
|
|
36
|
+
workspaceId: string;
|
|
37
|
+
configPath: string | null;
|
|
38
|
+
}
|
|
39
|
+
/** Non-`file` scheme (remote/ssh/wsl/devcontainer); not a local path. */
|
|
40
|
+
| {
|
|
41
|
+
status: "remote";
|
|
42
|
+
workspaceId: string | null;
|
|
43
|
+
scheme: string;
|
|
44
|
+
}
|
|
45
|
+
/** Id present but no inline uri and no readable `workspace.json`. */
|
|
46
|
+
| {
|
|
47
|
+
status: "unresolved";
|
|
48
|
+
workspaceId: string | null;
|
|
49
|
+
};
|
|
50
|
+
/**
|
|
51
|
+
* Parse a `workspace.json` file's text. Returns the folder root (single-folder
|
|
52
|
+
* window), a multi-root marker (`.code-workspace`), or unknown. Pure.
|
|
53
|
+
*/
|
|
54
|
+
export declare function parseWorkspaceJson(text: string): {
|
|
55
|
+
kind: "folder";
|
|
56
|
+
scheme: string | null;
|
|
57
|
+
root: string | null;
|
|
58
|
+
} | {
|
|
59
|
+
kind: "multi_root";
|
|
60
|
+
configPath: string | null;
|
|
61
|
+
} | {
|
|
62
|
+
kind: "unknown";
|
|
63
|
+
};
|
|
64
|
+
/** Injected dependencies for the pure resolver. */
|
|
65
|
+
export interface ResolveOptions {
|
|
66
|
+
/**
|
|
67
|
+
* Returns the raw text of `workspaceStorage/<id>/workspace.json`, or null if
|
|
68
|
+
* absent. Injected so the resolver stays pure and fully fixture-testable.
|
|
69
|
+
* Used only as a fallback when the header carries no inline uri.
|
|
70
|
+
*/
|
|
71
|
+
readWorkspaceJson?: (workspaceId: string) => string | null;
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Resolve a composer header's `workspaceIdentifier` to a project root. Pure:
|
|
75
|
+
* the only I/O is the injected {@link ResolveOptions.readWorkspaceJson}. Tries
|
|
76
|
+
* the inline header uri first (current builds), then the on-disk
|
|
77
|
+
* `workspace.json` join (legacy builds / uri stripped). Never guesses — every
|
|
78
|
+
* non-resolvable case maps to an explicit status.
|
|
79
|
+
*/
|
|
80
|
+
export declare function resolveProjectRoot(identifier: CursorWorkspaceIdentifier | null | undefined, options?: ResolveOptions): ProjectRootResolution;
|
|
81
|
+
/** Path to a workspace's `workspace.json` under a Cursor app-support dir. */
|
|
82
|
+
export declare function workspaceJsonPath(cursorDir: string, workspaceId: string): string;
|
|
83
|
+
/** Read `workspace.json` text read-only; null if it does not exist / unreadable. */
|
|
84
|
+
export declare function readWorkspaceJsonFromDisk(cursorDir: string, workspaceId: string): string | null;
|
|
85
|
+
/**
|
|
86
|
+
* Filesystem-backed resolution: resolve a header's identifier against a real
|
|
87
|
+
* Cursor app-support dir, reading `workspace.json` read-only when needed.
|
|
88
|
+
*/
|
|
89
|
+
export declare function resolveCursorProjectRoot(identifier: CursorWorkspaceIdentifier | null | undefined, cursorDir: string): ProjectRootResolution;
|
|
90
|
+
/**
|
|
91
|
+
* Resolve a project root from a workspace id alone (no inline uri available).
|
|
92
|
+
* Used by the probe to upgrade `unresolved` discovery candidates on legacy
|
|
93
|
+
* builds via the on-disk `workspace.json` join.
|
|
94
|
+
*/
|
|
95
|
+
export declare function resolveProjectRootByWorkspaceId(cursorDir: string, workspaceId: string): ProjectRootResolution;
|
|
96
|
+
//# sourceMappingURL=workspace-root.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"workspace-root.d.ts","sourceRoot":"","sources":["../../../../src/ingestion/cursor/workspace-root.ts"],"names":[],"mappings":"AAsDA,4EAA4E;AAC5E,eAAO,MAAM,eAAe,iBAAiB,CAAC;AAE9C,wEAAwE;AACxE,MAAM,WAAW,kBAAkB;IACjC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,4EAA4E;AAC5E,MAAM,WAAW,yBAAyB;IACxC,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,GAAG,CAAC,EAAE,kBAAkB,GAAG,IAAI,CAAC;CACjC;AAED;;;;GAIG;AACH,MAAM,MAAM,qBAAqB,GAC7B;IACE,MAAM,EAAE,UAAU,CAAC;IACnB,oDAAoD;IACpD,IAAI,EAAE,MAAM,CAAC;IACb,oCAAoC;IACpC,MAAM,EAAE,YAAY,GAAG,gBAAgB,CAAC;IACxC,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;CAC5B;AACH,mEAAmE;GACjE;IAAE,MAAM,EAAE,cAAc,CAAC;IAAC,WAAW,EAAE,cAAc,CAAA;CAAE;AACzD,oDAAoD;GAClD;IAAE,MAAM,EAAE,YAAY,CAAC;IAAC,WAAW,EAAE,MAAM,CAAC;IAAC,UAAU,EAAE,MAAM,GAAG,IAAI,CAAA;CAAE;AAC1E,yEAAyE;GACvE;IAAE,MAAM,EAAE,QAAQ,CAAC;IAAC,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE;AAClE,qEAAqE;GACnE;IAAE,MAAM,EAAE,YAAY,CAAC;IAAC,WAAW,EAAE,MAAM,GAAG,IAAI,CAAA;CAAE,CAAC;AAoCzD;;;GAGG;AACH,wBAAgB,kBAAkB,CAChC,IAAI,EAAE,MAAM,GAEV;IAAE,IAAI,EAAE,QAAQ,CAAC;IAAC,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAA;CAAE,GAC9D;IAAE,IAAI,EAAE,YAAY,CAAC;IAAC,UAAU,EAAE,MAAM,GAAG,IAAI,CAAA;CAAE,GACjD;IAAE,IAAI,EAAE,SAAS,CAAA;CAAE,CAkBtB;AAED,mDAAmD;AACnD,MAAM,WAAW,cAAc;IAC7B;;;;OAIG;IACH,iBAAiB,CAAC,EAAE,CAAC,WAAW,EAAE,MAAM,KAAK,MAAM,GAAG,IAAI,CAAC;CAC5D;AAED;;;;;;GAMG;AACH,wBAAgB,kBAAkB,CAChC,UAAU,EAAE,yBAAyB,GAAG,IAAI,GAAG,SAAS,EACxD,OAAO,GAAE,cAAmB,GAC3B,qBAAqB,CAqCvB;AAED,6EAA6E;AAC7E,wBAAgB,iBAAiB,CAAC,SAAS,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,GAAG,MAAM,CAEhF;AAED,oFAAoF;AACpF,wBAAgB,yBAAyB,CACvC,SAAS,EAAE,MAAM,EACjB,WAAW,EAAE,MAAM,GAClB,MAAM,GAAG,IAAI,CAMf;AAED;;;GAGG;AACH,wBAAgB,wBAAwB,CACtC,UAAU,EAAE,yBAAyB,GAAG,IAAI,GAAG,SAAS,EACxD,SAAS,EAAE,MAAM,GAChB,qBAAqB,CAIvB;AAED;;;;GAIG;AACH,wBAAgB,+BAA+B,CAC7C,SAAS,EAAE,MAAM,EACjB,WAAW,EAAE,MAAM,GAClB,qBAAqB,CAEvB"}
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Resolve a native Cursor session's `workspaceIdentifier` to a filesystem
|
|
3
|
+
* project root — the missing half of Cursor project-root attribution.
|
|
4
|
+
*
|
|
5
|
+
* DISCOVERY SLICE ONLY. Read-only, no LanceDB writes, no daemon, no capture.
|
|
6
|
+
* The pure resolver ({@link resolveProjectRoot}) takes an injected
|
|
7
|
+
* `readWorkspaceJson` so it is exhaustively unit-testable from synthetic
|
|
8
|
+
* metadata with no filesystem and no real Cursor install. The fs-backed wrapper
|
|
9
|
+
* ({@link resolveCursorProjectRoot}) only reads `workspace.json` (a tiny
|
|
10
|
+
* `{ "folder": "file://…" }` file) — never the workspace's `state.vscdb`,
|
|
11
|
+
* never tokens, never message content.
|
|
12
|
+
*
|
|
13
|
+
* ## The join contract (empirically verified, macOS Cursor "glass", 2026-06)
|
|
14
|
+
*
|
|
15
|
+
* On opening a folder window, Cursor writes the composer header's
|
|
16
|
+
* `workspaceIdentifier` with TWO independent links to the project root:
|
|
17
|
+
*
|
|
18
|
+
* "workspaceIdentifier": {
|
|
19
|
+
* "id": "<workspaceStorageHash>", // e.g. "11ed9332b5bd79fd69ad93a7636abd28"
|
|
20
|
+
* "uri": { "scheme": "file",
|
|
21
|
+
* "fsPath": "/Users/me/projects/foo",
|
|
22
|
+
* "path": "/Users/me/projects/foo",
|
|
23
|
+
* "external": "file:///Users/me/projects/foo" }
|
|
24
|
+
* }
|
|
25
|
+
*
|
|
26
|
+
* 1. INLINE (primary, current builds): `uri.fsPath` IS the project root.
|
|
27
|
+
* No disk lookup needed.
|
|
28
|
+
* 2. ON-DISK (authoritative fallback): `id` equals the directory name under
|
|
29
|
+
* `~/Library/Application Support/Cursor/User/workspaceStorage/<id>/`,
|
|
30
|
+
* whose `workspace.json` holds `{ "folder": "file://<root>" }`. Verified:
|
|
31
|
+
* the same hash appears as both `workspaceIdentifier.id` and the storage
|
|
32
|
+
* dir name, and its `workspace.json.folder` matches `uri.fsPath`.
|
|
33
|
+
*
|
|
34
|
+
* The storage hash is NOT a recomputable md5 of the folder URI (checked: no
|
|
35
|
+
* simple encoding reproduces it) — so resolution MUST read `workspace.json` (or
|
|
36
|
+
* trust the inline uri), never re-derive the hash.
|
|
37
|
+
*
|
|
38
|
+
* ## Failure modes (all first-class — never guessed)
|
|
39
|
+
* - `id === "empty-window"` and no `uri` → no folder open. NOT attributable.
|
|
40
|
+
* This is the common case on a machine with only empty windows.
|
|
41
|
+
* - `workspace.json` has `"workspace": "…code-workspace"` (multi-root) instead
|
|
42
|
+
* of `"folder"` → project root is ambiguous. Do not pick one.
|
|
43
|
+
* - `uri.scheme` / `folder` scheme is not `file` (ssh-remote, wsl, devcontainer)
|
|
44
|
+
* → not a local path.
|
|
45
|
+
* - `id` present but `workspace.json` missing/purged and no inline `uri`
|
|
46
|
+
* → unresolved.
|
|
47
|
+
*
|
|
48
|
+
* The capture slice should scope to a project root ONLY on `resolved`; for every
|
|
49
|
+
* other status, capture WITHOUT project scoping (or behind a user-confirmed
|
|
50
|
+
* fallback). See docs/cursor-capture-discovery.md.
|
|
51
|
+
*/
|
|
52
|
+
import fs from "node:fs";
|
|
53
|
+
import path from "node:path";
|
|
54
|
+
/** Sentinel `workspaceIdentifier.id` Cursor uses when no folder is open. */
|
|
55
|
+
export const EMPTY_WINDOW_ID = "empty-window";
|
|
56
|
+
/** Parse a URI string into `{ scheme, path }`; `path` is null for non-file. */
|
|
57
|
+
function parseUriString(uri) {
|
|
58
|
+
let parsed;
|
|
59
|
+
try {
|
|
60
|
+
parsed = new URL(uri);
|
|
61
|
+
}
|
|
62
|
+
catch {
|
|
63
|
+
return { scheme: null, path: null };
|
|
64
|
+
}
|
|
65
|
+
const scheme = parsed.protocol.replace(/:$/, "");
|
|
66
|
+
if (scheme !== "file")
|
|
67
|
+
return { scheme, path: null };
|
|
68
|
+
// file:///abs/path -> decode percent-encoding (spaces, accents in client dirs)
|
|
69
|
+
return { scheme, path: decodeURIComponent(parsed.pathname) };
|
|
70
|
+
}
|
|
71
|
+
/** Derive `{ scheme, root }` from an inline header `uri` object. */
|
|
72
|
+
function rootFromInlineUri(uri) {
|
|
73
|
+
if (!uri || typeof uri !== "object")
|
|
74
|
+
return { scheme: null, root: null };
|
|
75
|
+
const scheme = typeof uri.scheme === "string" ? uri.scheme : null;
|
|
76
|
+
if (scheme && scheme !== "file")
|
|
77
|
+
return { scheme, root: null };
|
|
78
|
+
// fsPath / path are already decoded absolute paths in Cursor's serialization.
|
|
79
|
+
const direct = (typeof uri.fsPath === "string" && uri.fsPath) ||
|
|
80
|
+
(typeof uri.path === "string" && uri.path) ||
|
|
81
|
+
null;
|
|
82
|
+
if (direct)
|
|
83
|
+
return { scheme: scheme ?? "file", root: direct };
|
|
84
|
+
if (typeof uri.external === "string") {
|
|
85
|
+
const ext = parseUriString(uri.external);
|
|
86
|
+
return { scheme: ext.scheme ?? scheme, root: ext.path };
|
|
87
|
+
}
|
|
88
|
+
return { scheme, root: null };
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Parse a `workspace.json` file's text. Returns the folder root (single-folder
|
|
92
|
+
* window), a multi-root marker (`.code-workspace`), or unknown. Pure.
|
|
93
|
+
*/
|
|
94
|
+
export function parseWorkspaceJson(text) {
|
|
95
|
+
let obj;
|
|
96
|
+
try {
|
|
97
|
+
obj = JSON.parse(text);
|
|
98
|
+
}
|
|
99
|
+
catch {
|
|
100
|
+
return { kind: "unknown" };
|
|
101
|
+
}
|
|
102
|
+
if (!obj || typeof obj !== "object")
|
|
103
|
+
return { kind: "unknown" };
|
|
104
|
+
const rec = obj;
|
|
105
|
+
if (typeof rec.folder === "string") {
|
|
106
|
+
const { scheme, path: p } = parseUriString(rec.folder);
|
|
107
|
+
return { kind: "folder", scheme, root: p };
|
|
108
|
+
}
|
|
109
|
+
if (typeof rec.workspace === "string") {
|
|
110
|
+
const { path: p } = parseUriString(rec.workspace);
|
|
111
|
+
return { kind: "multi_root", configPath: p };
|
|
112
|
+
}
|
|
113
|
+
return { kind: "unknown" };
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Resolve a composer header's `workspaceIdentifier` to a project root. Pure:
|
|
117
|
+
* the only I/O is the injected {@link ResolveOptions.readWorkspaceJson}. Tries
|
|
118
|
+
* the inline header uri first (current builds), then the on-disk
|
|
119
|
+
* `workspace.json` join (legacy builds / uri stripped). Never guesses — every
|
|
120
|
+
* non-resolvable case maps to an explicit status.
|
|
121
|
+
*/
|
|
122
|
+
export function resolveProjectRoot(identifier, options = {}) {
|
|
123
|
+
const id = typeof identifier?.id === "string" ? identifier.id : null;
|
|
124
|
+
// 1. Sentinel: no folder open. Common, expected, not attributable.
|
|
125
|
+
if (id === EMPTY_WINDOW_ID) {
|
|
126
|
+
return { status: "empty_window", workspaceId: EMPTY_WINDOW_ID };
|
|
127
|
+
}
|
|
128
|
+
// 2. Inline uri (Cursor embeds the folder directly on the header).
|
|
129
|
+
const inline = rootFromInlineUri(identifier?.uri);
|
|
130
|
+
if (inline.scheme && inline.scheme !== "file") {
|
|
131
|
+
return { status: "remote", workspaceId: id, scheme: inline.scheme };
|
|
132
|
+
}
|
|
133
|
+
if (inline.root) {
|
|
134
|
+
return { status: "resolved", root: inline.root, source: "header_uri", workspaceId: id };
|
|
135
|
+
}
|
|
136
|
+
// 3. On-disk fallback: join id -> workspaceStorage/<id>/workspace.json.
|
|
137
|
+
if (id && options.readWorkspaceJson) {
|
|
138
|
+
const text = options.readWorkspaceJson(id);
|
|
139
|
+
if (text) {
|
|
140
|
+
const parsed = parseWorkspaceJson(text);
|
|
141
|
+
if (parsed.kind === "folder") {
|
|
142
|
+
if (parsed.scheme && parsed.scheme !== "file") {
|
|
143
|
+
return { status: "remote", workspaceId: id, scheme: parsed.scheme };
|
|
144
|
+
}
|
|
145
|
+
if (parsed.root) {
|
|
146
|
+
return { status: "resolved", root: parsed.root, source: "workspace_json", workspaceId: id };
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
else if (parsed.kind === "multi_root") {
|
|
150
|
+
return { status: "multi_root", workspaceId: id, configPath: parsed.configPath };
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
// 4. Nothing reliable — do not guess.
|
|
155
|
+
return { status: "unresolved", workspaceId: id };
|
|
156
|
+
}
|
|
157
|
+
/** Path to a workspace's `workspace.json` under a Cursor app-support dir. */
|
|
158
|
+
export function workspaceJsonPath(cursorDir, workspaceId) {
|
|
159
|
+
return path.join(cursorDir, "User", "workspaceStorage", workspaceId, "workspace.json");
|
|
160
|
+
}
|
|
161
|
+
/** Read `workspace.json` text read-only; null if it does not exist / unreadable. */
|
|
162
|
+
export function readWorkspaceJsonFromDisk(cursorDir, workspaceId) {
|
|
163
|
+
try {
|
|
164
|
+
return fs.readFileSync(workspaceJsonPath(cursorDir, workspaceId), "utf-8");
|
|
165
|
+
}
|
|
166
|
+
catch {
|
|
167
|
+
return null;
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
/**
|
|
171
|
+
* Filesystem-backed resolution: resolve a header's identifier against a real
|
|
172
|
+
* Cursor app-support dir, reading `workspace.json` read-only when needed.
|
|
173
|
+
*/
|
|
174
|
+
export function resolveCursorProjectRoot(identifier, cursorDir) {
|
|
175
|
+
return resolveProjectRoot(identifier, {
|
|
176
|
+
readWorkspaceJson: (workspaceId) => readWorkspaceJsonFromDisk(cursorDir, workspaceId),
|
|
177
|
+
});
|
|
178
|
+
}
|
|
179
|
+
/**
|
|
180
|
+
* Resolve a project root from a workspace id alone (no inline uri available).
|
|
181
|
+
* Used by the probe to upgrade `unresolved` discovery candidates on legacy
|
|
182
|
+
* builds via the on-disk `workspace.json` join.
|
|
183
|
+
*/
|
|
184
|
+
export function resolveProjectRootByWorkspaceId(cursorDir, workspaceId) {
|
|
185
|
+
return resolveCursorProjectRoot({ id: workspaceId }, cursorDir);
|
|
186
|
+
}
|
|
187
|
+
//# sourceMappingURL=workspace-root.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"workspace-root.js","sourceRoot":"","sources":["../../../../src/ingestion/cursor/workspace-root.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkDG;AACH,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,IAAI,MAAM,WAAW,CAAC;AAE7B,4EAA4E;AAC5E,MAAM,CAAC,MAAM,eAAe,GAAG,cAAc,CAAC;AAuC9C,+EAA+E;AAC/E,SAAS,cAAc,CAAC,GAAW;IACjC,IAAI,MAAW,CAAC;IAChB,IAAI,CAAC;QACH,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;IACxB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IACtC,CAAC;IACD,MAAM,MAAM,GAAG,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;IACjD,IAAI,MAAM,KAAK,MAAM;QAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IACrD,+EAA+E;IAC/E,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,kBAAkB,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;AAC/D,CAAC;AAED,oEAAoE;AACpE,SAAS,iBAAiB,CACxB,GAA0C;IAE1C,IAAI,CAAC,GAAG,IAAI,OAAO,GAAG,KAAK,QAAQ;QAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IACzE,MAAM,MAAM,GAAG,OAAO,GAAG,CAAC,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC;IAClE,IAAI,MAAM,IAAI,MAAM,KAAK,MAAM;QAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IAC/D,8EAA8E;IAC9E,MAAM,MAAM,GACV,CAAC,OAAO,GAAG,CAAC,MAAM,KAAK,QAAQ,IAAI,GAAG,CAAC,MAAM,CAAC;QAC9C,CAAC,OAAO,GAAG,CAAC,IAAI,KAAK,QAAQ,IAAI,GAAG,CAAC,IAAI,CAAC;QAC1C,IAAI,CAAC;IACP,IAAI,MAAM;QAAE,OAAO,EAAE,MAAM,EAAE,MAAM,IAAI,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC;IAC9D,IAAI,OAAO,GAAG,CAAC,QAAQ,KAAK,QAAQ,EAAE,CAAC;QACrC,MAAM,GAAG,GAAG,cAAc,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACzC,OAAO,EAAE,MAAM,EAAE,GAAG,CAAC,MAAM,IAAI,MAAM,EAAE,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,CAAC;IAC1D,CAAC;IACD,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;AAChC,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,kBAAkB,CAChC,IAAY;IAKZ,IAAI,GAAY,CAAC;IACjB,IAAI,CAAC;QACH,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACzB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;IAC7B,CAAC;IACD,IAAI,CAAC,GAAG,IAAI,OAAO,GAAG,KAAK,QAAQ;QAAE,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;IAChE,MAAM,GAAG,GAAG,GAA8B,CAAC;IAC3C,IAAI,OAAO,GAAG,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;QACnC,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,EAAE,GAAG,cAAc,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QACvD,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,EAAE,CAAC;IAC7C,CAAC;IACD,IAAI,OAAO,GAAG,CAAC,SAAS,KAAK,QAAQ,EAAE,CAAC;QACtC,MAAM,EAAE,IAAI,EAAE,CAAC,EAAE,GAAG,cAAc,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QAClD,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,UAAU,EAAE,CAAC,EAAE,CAAC;IAC/C,CAAC;IACD,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;AAC7B,CAAC;AAYD;;;;;;GAMG;AACH,MAAM,UAAU,kBAAkB,CAChC,UAAwD,EACxD,UAA0B,EAAE;IAE5B,MAAM,EAAE,GAAG,OAAO,UAAU,EAAE,EAAE,KAAK,QAAQ,CAAC,CAAC,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;IAErE,mEAAmE;IACnE,IAAI,EAAE,KAAK,eAAe,EAAE,CAAC;QAC3B,OAAO,EAAE,MAAM,EAAE,cAAc,EAAE,WAAW,EAAE,eAAe,EAAE,CAAC;IAClE,CAAC;IAED,mEAAmE;IACnE,MAAM,MAAM,GAAG,iBAAiB,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC;IAClD,IAAI,MAAM,CAAC,MAAM,IAAI,MAAM,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;QAC9C,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,WAAW,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,CAAC;IACtE,CAAC;IACD,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;QAChB,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,IAAI,EAAE,MAAM,CAAC,IAAI,EAAE,MAAM,EAAE,YAAY,EAAE,WAAW,EAAE,EAAE,EAAE,CAAC;IAC1F,CAAC;IAED,wEAAwE;IACxE,IAAI,EAAE,IAAI,OAAO,CAAC,iBAAiB,EAAE,CAAC;QACpC,MAAM,IAAI,GAAG,OAAO,CAAC,iBAAiB,CAAC,EAAE,CAAC,CAAC;QAC3C,IAAI,IAAI,EAAE,CAAC;YACT,MAAM,MAAM,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAC;YACxC,IAAI,MAAM,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;gBAC7B,IAAI,MAAM,CAAC,MAAM,IAAI,MAAM,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;oBAC9C,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,WAAW,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,CAAC;gBACtE,CAAC;gBACD,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;oBAChB,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,IAAI,EAAE,MAAM,CAAC,IAAI,EAAE,MAAM,EAAE,gBAAgB,EAAE,WAAW,EAAE,EAAE,EAAE,CAAC;gBAC9F,CAAC;YACH,CAAC;iBAAM,IAAI,MAAM,CAAC,IAAI,KAAK,YAAY,EAAE,CAAC;gBACxC,OAAO,EAAE,MAAM,EAAE,YAAY,EAAE,WAAW,EAAE,EAAE,EAAE,UAAU,EAAE,MAAM,CAAC,UAAU,EAAE,CAAC;YAClF,CAAC;QACH,CAAC;IACH,CAAC;IAED,sCAAsC;IACtC,OAAO,EAAE,MAAM,EAAE,YAAY,EAAE,WAAW,EAAE,EAAE,EAAE,CAAC;AACnD,CAAC;AAED,6EAA6E;AAC7E,MAAM,UAAU,iBAAiB,CAAC,SAAiB,EAAE,WAAmB;IACtE,OAAO,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,MAAM,EAAE,kBAAkB,EAAE,WAAW,EAAE,gBAAgB,CAAC,CAAC;AACzF,CAAC;AAED,oFAAoF;AACpF,MAAM,UAAU,yBAAyB,CACvC,SAAiB,EACjB,WAAmB;IAEnB,IAAI,CAAC;QACH,OAAO,EAAE,CAAC,YAAY,CAAC,iBAAiB,CAAC,SAAS,EAAE,WAAW,CAAC,EAAE,OAAO,CAAC,CAAC;IAC7E,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,wBAAwB,CACtC,UAAwD,EACxD,SAAiB;IAEjB,OAAO,kBAAkB,CAAC,UAAU,EAAE;QACpC,iBAAiB,EAAE,CAAC,WAAW,EAAE,EAAE,CAAC,yBAAyB,CAAC,SAAS,EAAE,WAAW,CAAC;KACtF,CAAC,CAAC;AACL,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,+BAA+B,CAC7C,SAAiB,EACjB,WAAmB;IAEnB,OAAO,wBAAwB,CAAC,EAAE,EAAE,EAAE,WAAW,EAAE,EAAE,SAAS,CAAC,CAAC;AAClE,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"indexer.d.ts","sourceRoot":"","sources":["../../../src/ingestion/indexer.ts"],"names":[],"mappings":"AAQA,OAAO,KAAK,EAAE,iBAAiB,EAAE,UAAU,EAAE,WAAW,EAAE,MAAM,uBAAuB,CAAC;
|
|
1
|
+
{"version":3,"file":"indexer.d.ts","sourceRoot":"","sources":["../../../src/ingestion/indexer.ts"],"names":[],"mappings":"AAQA,OAAO,KAAK,EAAE,iBAAiB,EAAE,UAAU,EAAE,WAAW,EAAE,MAAM,uBAAuB,CAAC;AAYxF,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,KAAK,GAAG,QAAQ,GAAG,QAAQ,CAAC;IAClC,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,aAAa;IAC5B,UAAU,EAAE,UAAU,CAAC;IACvB,WAAW,EAAE,WAAW,CAAC;IACzB,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,EAAE,SAAS,MAAM,EAAE,CAAC;IAChC,wEAAwE;IACxE,SAAS,CAAC,EAAE,iBAAiB,GAAG,uBAAuB,CAAC;IACxD;;;;;OAKG;IACH,OAAO,EAAE,MAAM,CAAC;CACjB;AAED;;;GAGG;AACH,wBAAgB,MAAM,CAAC,UAAU,EAAE,MAAM,GAAG,MAAM,CAEjD;AAED,qBAAa,OAAO;IAClB,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAoB;IAC9C,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAgB;gBAE3B,SAAS,EAAE,iBAAiB,EAAE,MAAM,EAAE,aAAa;IAK/D;;;;OAIG;IACG,WAAW,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC;YAcpC,YAAY;YAuEZ,YAAY;CAK3B"}
|
|
@@ -12,6 +12,9 @@ import { validatePath, validateUnlinkPath } from "../security/paths.js";
|
|
|
12
12
|
import { writeSkipQuarantine } from "./skip-quarantine.js";
|
|
13
13
|
import { recordEmbed } from "../observability/embedding-events.js";
|
|
14
14
|
import { recordIndexWrite } from "../observability/index-events.js";
|
|
15
|
+
import { chunkText, chunkingEnabled } from "./chunk-text.js";
|
|
16
|
+
import { docChunkId, docChunkMetadata } from "./chunk-meta.js";
|
|
17
|
+
import { eqFilter } from "../storage/filter.js";
|
|
15
18
|
/**
|
|
16
19
|
* Deterministic row ID from the canonical source path.
|
|
17
20
|
* Same file always gets the same ID, enabling upsert via delete+add.
|
|
@@ -48,10 +51,11 @@ export class Indexer {
|
|
|
48
51
|
// strings with HTTP 400, and indexing an empty row produces nothing
|
|
49
52
|
// searchable anyway. Quarantine the skip so it's visible (not stderr-only)
|
|
50
53
|
// and remove any stale row from a prior good extraction of the same path.
|
|
54
|
+
const table = getTable(this.config.tableName ?? "structured_docs");
|
|
55
|
+
const sourcePathFilter = eqFilter("source_path", filePath);
|
|
51
56
|
if (isBlank(doc.content)) {
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
await table.delete(`id = '${id}'`);
|
|
57
|
+
// Remove the whole prior set (single row or chunk set) for this path.
|
|
58
|
+
await table.delete(sourcePathFilter);
|
|
55
59
|
await writeSkipQuarantine(this.config.dataDir, {
|
|
56
60
|
reason: "empty_extracted_content",
|
|
57
61
|
source_path: filePath,
|
|
@@ -59,41 +63,46 @@ export class Indexer {
|
|
|
59
63
|
});
|
|
60
64
|
return;
|
|
61
65
|
}
|
|
62
|
-
const
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
66
|
+
const pipeline = this.config.sourceType === "filesystem_watched" ? "watcher" : "scheduled_scan";
|
|
67
|
+
const baseId = fileId(filePath);
|
|
68
|
+
// Chunk (flag-gated). chunkText short-circuits small docs to a single
|
|
69
|
+
// chunk, so a below-threshold doc — and every doc with the flag off —
|
|
70
|
+
// produces exactly one unmarked row, byte-identical to the prior behavior.
|
|
71
|
+
const chunks = chunkingEnabled() ? chunkText(doc.content) : [doc.content];
|
|
72
|
+
const count = chunks.length;
|
|
73
|
+
// Data-safety: embed the FULL new set BEFORE the destructive delete, so a
|
|
74
|
+
// failure leaves the old set intact (filesystem is the source of truth).
|
|
75
|
+
const indexedAt = new Date().toISOString();
|
|
76
|
+
const rows = [];
|
|
77
|
+
for (let i = 0; i < count; i++) {
|
|
78
|
+
const chunk = chunks[i];
|
|
79
|
+
const embeddingVec = await recordEmbed(this.config.dataDir, this.embedding, { pipeline, operation: "document_embedding", input_count: 1 }, () => this.embedding.embed(chunk));
|
|
80
|
+
rows.push({
|
|
81
|
+
id: count > 1 ? docChunkId(baseId, i) : baseId,
|
|
82
|
+
source_path: filePath,
|
|
83
|
+
content: chunk,
|
|
84
|
+
embedding: embeddingVec,
|
|
85
|
+
source_type: this.config.sourceType,
|
|
86
|
+
source_scope: this.config.sourceScope,
|
|
87
|
+
client_name: this.config.clientName,
|
|
88
|
+
indexed_at: indexedAt,
|
|
89
|
+
metadata: JSON.stringify(docChunkMetadata(doc.metadata, i, count)),
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
// Upsert by source_path: removes any prior chunk set AND any legacy
|
|
93
|
+
// single row for this file before writing the new set (no dup/stale chunks).
|
|
94
|
+
await table.delete(sourcePathFilter);
|
|
84
95
|
await recordIndexWrite(this.config.dataDir, {
|
|
85
96
|
table: this.config.tableName ?? "structured_docs",
|
|
86
|
-
pipeline
|
|
87
|
-
? "watcher"
|
|
88
|
-
: "scheduled_scan",
|
|
97
|
+
pipeline,
|
|
89
98
|
operation: "structured_doc_upsert",
|
|
90
|
-
row_count:
|
|
91
|
-
}, () => table.add(
|
|
99
|
+
row_count: rows.length,
|
|
100
|
+
}, () => table.add(rows));
|
|
92
101
|
}
|
|
93
102
|
async handleDelete(filePath) {
|
|
94
|
-
const id = fileId(filePath);
|
|
95
103
|
const table = getTable(this.config.tableName ?? "structured_docs");
|
|
96
|
-
|
|
104
|
+
// Delete by source_path so all chunks of the file are removed together.
|
|
105
|
+
await table.delete(eqFilter("source_path", filePath));
|
|
97
106
|
}
|
|
98
107
|
}
|
|
99
108
|
function isBlank(s) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"indexer.js","sourceRoot":"","sources":["../../../src/ingestion/indexer.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AACH,OAAO,MAAM,MAAM,aAAa,CAAC;AAGjC,OAAO,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAC;AAChD,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AACtD,OAAO,EAAE,YAAY,EAAE,kBAAkB,EAAE,MAAM,sBAAsB,CAAC;AACxE,OAAO,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAC;AAC3D,OAAO,EAAE,WAAW,EAAE,MAAM,sCAAsC,CAAC;AACnE,OAAO,EAAE,gBAAgB,EAAE,MAAM,kCAAkC,CAAC;
|
|
1
|
+
{"version":3,"file":"indexer.js","sourceRoot":"","sources":["../../../src/ingestion/indexer.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AACH,OAAO,MAAM,MAAM,aAAa,CAAC;AAGjC,OAAO,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAC;AAChD,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AACtD,OAAO,EAAE,YAAY,EAAE,kBAAkB,EAAE,MAAM,sBAAsB,CAAC;AACxE,OAAO,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAC;AAC3D,OAAO,EAAE,WAAW,EAAE,MAAM,sCAAsC,CAAC;AACnE,OAAO,EAAE,gBAAgB,EAAE,MAAM,kCAAkC,CAAC;AACpE,OAAO,EAAE,SAAS,EAAE,eAAe,EAAE,MAAM,iBAAiB,CAAC;AAC7D,OAAO,EAAE,UAAU,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AAC/D,OAAO,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAC;AAuBhD;;;GAGG;AACH,MAAM,UAAU,MAAM,CAAC,UAAkB;IACvC,OAAO,MAAM,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;AACnF,CAAC;AAED,MAAM,OAAO,OAAO;IACD,SAAS,CAAoB;IAC7B,MAAM,CAAgB;IAEvC,YAAY,SAA4B,EAAE,MAAqB;QAC7D,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,WAAW,CAAC,KAAgB;QAChC,IAAI,KAAK,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YAC5B,MAAM,SAAS,GAAG,kBAAkB,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC;YAC3E,MAAM,IAAI,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC;YACnC,OAAO;QACT,CAAC;QAED,MAAM,SAAS,GAAG,YAAY,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC;QAErE,IAAI,CAAC,WAAW,CAAC,SAAS,CAAC;YAAE,OAAO;QAEpC,MAAM,IAAI,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC;IACrC,CAAC;IAEO,KAAK,CAAC,YAAY,CAAC,QAAgB;QACzC,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,QAAQ,CAAC,CAAC;QAEpC,0EAA0E;QAC1E,oEAAoE;QACpE,2EAA2E;QAC3E,0EAA0E;QAC1E,MAAM,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,IAAI,iBAAiB,CAAC,CAAC;QACnE,MAAM,gBAAgB,GAAG,QAAQ,CAAC,aAAa,EAAE,QAAQ,CAAC,CAAC;QAE3D,IAAI,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;YACzB,sEAAsE;YACtE,MAAM,KAAK,CAAC,MAAM,CAAC,gBAAgB,CAAC,CAAC;YACrC,MAAM,mBAAmB,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE;gBAC7C,MAAM,EAAE,yBAAyB;gBACjC,WAAW,EAAE,QAAQ;gBACrB,QAAQ,EAAE,GAAG,CAAC,QAAQ;aACvB,CAAC,CAAC;YACH,OAAO;QACT,CAAC;QAED,MAAM,QAAQ,GACZ,IAAI,CAAC,MAAM,CAAC,UAAU,KAAK,oBAAoB,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,gBAAgB,CAAC;QACjF,MAAM,MAAM,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC;QAEhC,sEAAsE;QACtE,sEAAsE;QACtE,2EAA2E;QAC3E,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QAC1E,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC;QAE5B,0EAA0E;QAC1E,yEAAyE;QACzE,MAAM,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QAC3C,MAAM,IAAI,GAAuB,EAAE,CAAC;QACpC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC;YAC/B,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAE,CAAC;YACzB,MAAM,YAAY,GAAG,MAAM,WAAW,CACpC,IAAI,CAAC,MAAM,CAAC,OAAO,EACnB,IAAI,CAAC,SAAS,EACd,EAAE,QAAQ,EAAE,SAAS,EAAE,oBAAoB,EAAE,WAAW,EAAE,CAAC,EAAE,EAC7D,GAAG,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,KAAK,CAAC,CAClC,CAAC;YACF,IAAI,CAAC,IAAI,CAAC;gBACR,EAAE,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM;gBAC9C,WAAW,EAAE,QAAQ;gBACrB,OAAO,EAAE,KAAK;gBACd,SAAS,EAAE,YAAY;gBACvB,WAAW,EAAE,IAAI,CAAC,MAAM,CAAC,UAAU;gBACnC,YAAY,EAAE,IAAI,CAAC,MAAM,CAAC,WAAW;gBACrC,WAAW,EAAE,IAAI,CAAC,MAAM,CAAC,UAAU;gBACnC,UAAU,EAAE,SAAS;gBACrB,QAAQ,EAAE,IAAI,CAAC,SAAS,CAAC,gBAAgB,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,EAAE,KAAK,CAAC,CAAC;aACnE,CAAC,CAAC;QACL,CAAC;QAED,oEAAoE;QACpE,6EAA6E;QAC7E,MAAM,KAAK,CAAC,MAAM,CAAC,gBAAgB,CAAC,CAAC;QACrC,MAAM,gBAAgB,CACpB,IAAI,CAAC,MAAM,CAAC,OAAO,EACnB;YACE,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS,IAAI,iBAAiB;YACjD,QAAQ;YACR,SAAS,EAAE,uBAAuB;YAClC,SAAS,EAAE,IAAI,CAAC,MAAM;SACvB,EACD,GAAG,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CACtB,CAAC;IACJ,CAAC;IAEO,KAAK,CAAC,YAAY,CAAC,QAAgB;QACzC,MAAM,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,IAAI,iBAAiB,CAAC,CAAC;QACnE,wEAAwE;QACxE,MAAM,KAAK,CAAC,MAAM,CAAC,QAAQ,CAAC,aAAa,EAAE,QAAQ,CAAC,CAAC,CAAC;IACxD,CAAC;CACF;AAED,SAAS,OAAO,CAAC,CAAS;IACxB,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,CAAC;AAC/B,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"compact.d.ts","sourceRoot":"","sources":["../../../../src/jobs/handlers/compact.ts"],"names":[],"mappings":"AAgCA,OAAO,KAAK,EAAO,UAAU,EAAE,MAAM,aAAa,CAAC;AACnD,OAAO,KAAK,EAAE,iBAAiB,EAAE,gBAAgB,EAAqB,MAAM,0BAA0B,CAAC;AACvG,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,4BAA4B,CAAC;AAC1D,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,4BAA4B,CAAC;
|
|
1
|
+
{"version":3,"file":"compact.d.ts","sourceRoot":"","sources":["../../../../src/jobs/handlers/compact.ts"],"names":[],"mappings":"AAgCA,OAAO,KAAK,EAAO,UAAU,EAAE,MAAM,aAAa,CAAC;AACnD,OAAO,KAAK,EAAE,iBAAiB,EAAE,gBAAgB,EAAqB,MAAM,0BAA0B,CAAC;AACvG,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,4BAA4B,CAAC;AAC1D,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,4BAA4B,CAAC;AAS1D,MAAM,WAAW,WAAW;IAC1B,iBAAiB,EAAE,iBAAiB,CAAC;IACrC,gBAAgB,EAAE,gBAAgB,CAAC;IACnC,OAAO,EAAE,MAAM,CAAC;IAChB,gBAAgB,EAAE,MAAM,CAAC;IACzB,QAAQ,EAAE,OAAO,EAAE,CAAC;IACpB,OAAO,EAAE,OAAO,GAAG,IAAI,CAAC;CACzB;AAED,MAAM,WAAW,aAAa;IAC5B,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,OAAO,CAAC;CACnB;AAED,MAAM,WAAW,gBAAgB;IAC/B,EAAE,EAAE,MAAM,CAAC;IACX,UAAU,EAAE,MAAM,CAAC;IACnB,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,SAAS,EAAE,MAAM,CAAC;IAClB,eAAe,EAAE,MAAM,CAAC;IACxB,QAAQ,EAAE,OAAO,CAAC;CACnB;AASD,4DAA4D;AAC5D,wBAAgB,mBAAmB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAE5D;AAMD,wBAAgB,oBAAoB,CAAC,IAAI,EAAE,WAAW,GAAG,UAAU,CAwBlE"}
|
|
@@ -31,6 +31,7 @@ import crypto from "node:crypto";
|
|
|
31
31
|
import fs from "node:fs";
|
|
32
32
|
import path from "node:path";
|
|
33
33
|
import { getTable } from "../../storage/tables.js";
|
|
34
|
+
import { eqFilter } from "../../storage/filter.js";
|
|
34
35
|
import { atomicWrite } from "../../storage/atomic.js";
|
|
35
36
|
import { recordEmbed } from "../../observability/embedding-events.js";
|
|
36
37
|
import { recordIndexWrite } from "../../observability/index-events.js";
|
|
@@ -139,7 +140,7 @@ async function handleCompaction(deps, dryRun) {
|
|
|
139
140
|
}, () => coldTable.add(toMove.map(cleanConversationRow)));
|
|
140
141
|
// 2. Remove from hot
|
|
141
142
|
for (const row of toMove) {
|
|
142
|
-
await hotTable.delete(
|
|
143
|
+
await hotTable.delete(eqFilter("id", row.id, { validateAsRowId: true }));
|
|
143
144
|
}
|
|
144
145
|
// 3. Write raw digest artifact BEFORE table insert (crash safety:
|
|
145
146
|
// if we crash after table insert but before raw write, reconcile
|
|
@@ -212,7 +213,7 @@ async function handleRollback(deps, dryRun) {
|
|
|
212
213
|
for (const convId of manifest.conversation_ids) {
|
|
213
214
|
const rows = (await coldTable
|
|
214
215
|
.query()
|
|
215
|
-
.where(
|
|
216
|
+
.where(eqFilter("id", convId, { validateAsRowId: true }))
|
|
216
217
|
.toArray());
|
|
217
218
|
if (rows.length > 0) {
|
|
218
219
|
await recordIndexWrite(deps.dataDir, {
|
|
@@ -221,11 +222,11 @@ async function handleRollback(deps, dryRun) {
|
|
|
221
222
|
operation: "conversation_rollback_to_hot",
|
|
222
223
|
row_count: rows.length,
|
|
223
224
|
}, () => hotTable.add(rows.map(cleanConversationRow)));
|
|
224
|
-
await coldTable.delete(
|
|
225
|
+
await coldTable.delete(eqFilter("id", convId, { validateAsRowId: true }));
|
|
225
226
|
}
|
|
226
227
|
}
|
|
227
228
|
// Remove the digest row from table
|
|
228
|
-
await digestTable.delete(
|
|
229
|
+
await digestTable.delete(eqFilter("id", manifest.digest_id, { validateAsRowId: true }));
|
|
229
230
|
// Remove the raw digest file so reconcile cannot resurrect it
|
|
230
231
|
if (manifest.digest_filename) {
|
|
231
232
|
const digestPath = path.join(deps.dataDir, "raw", "digests", manifest.digest_filename);
|
|
@@ -341,6 +342,10 @@ function cleanConversationRow(row) {
|
|
|
341
342
|
// basic/placeholder row stays a backfill target after compaction.
|
|
342
343
|
metadata_provider: row.metadata_provider ?? "",
|
|
343
344
|
embedding_provider: row.embedding_provider ?? "",
|
|
345
|
+
// Preserve chunk identity so a chunk set survives hot → cold relocation.
|
|
346
|
+
parent_id: row.parent_id ?? "",
|
|
347
|
+
chunk_index: row.chunk_index ?? "",
|
|
348
|
+
chunk_count: row.chunk_count ?? "",
|
|
344
349
|
};
|
|
345
350
|
}
|
|
346
351
|
function safeParseArray(json) {
|