@getrift/rift 0.1.0-beta.21 → 0.1.0-beta.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -3
- package/dist/src/capture/auto-capture.d.ts +105 -4
- package/dist/src/capture/auto-capture.d.ts.map +1 -1
- package/dist/src/capture/auto-capture.js +313 -34
- package/dist/src/capture/auto-capture.js.map +1 -1
- package/dist/src/capture/claude-cli-triage-provider.d.ts +28 -0
- package/dist/src/capture/claude-cli-triage-provider.d.ts.map +1 -0
- package/dist/src/capture/claude-cli-triage-provider.js +88 -0
- package/dist/src/capture/claude-cli-triage-provider.js.map +1 -0
- package/dist/src/capture/codex-cli-triage-provider.d.ts.map +1 -1
- package/dist/src/capture/codex-cli-triage-provider.js +1 -33
- package/dist/src/capture/codex-cli-triage-provider.js.map +1 -1
- package/dist/src/capture/cursor-capture.d.ts +89 -0
- package/dist/src/capture/cursor-capture.d.ts.map +1 -0
- package/dist/src/capture/cursor-capture.js +121 -0
- package/dist/src/capture/cursor-capture.js.map +1 -0
- package/dist/src/capture/observability.d.ts +30 -0
- package/dist/src/capture/observability.d.ts.map +1 -1
- package/dist/src/capture/observability.js +29 -0
- package/dist/src/capture/observability.js.map +1 -1
- package/dist/src/capture/recover-quarantine.d.ts +4 -4
- package/dist/src/capture/sources.d.ts +41 -3
- package/dist/src/capture/sources.d.ts.map +1 -1
- package/dist/src/capture/sources.js +43 -1
- package/dist/src/capture/sources.js.map +1 -1
- package/dist/src/capture/triage-classification.d.ts +69 -0
- package/dist/src/capture/triage-classification.d.ts.map +1 -0
- package/dist/src/capture/triage-classification.js +62 -0
- package/dist/src/capture/triage-classification.js.map +1 -0
- package/dist/src/capture/triage-provider-factory.d.ts +36 -0
- package/dist/src/capture/triage-provider-factory.d.ts.map +1 -0
- package/dist/src/capture/triage-provider-factory.js +55 -0
- package/dist/src/capture/triage-provider-factory.js.map +1 -0
- package/dist/src/capture/triage.d.ts +1 -1
- package/dist/src/capture/triage.d.ts.map +1 -1
- package/dist/src/capture/triage.js +8 -6
- package/dist/src/capture/triage.js.map +1 -1
- package/dist/src/cli/commands/capture.d.ts.map +1 -1
- package/dist/src/cli/commands/capture.js +79 -17
- package/dist/src/cli/commands/capture.js.map +1 -1
- package/dist/src/cli/commands/chunk-backfill.d.ts +13 -0
- package/dist/src/cli/commands/chunk-backfill.d.ts.map +1 -0
- package/dist/src/cli/commands/chunk-backfill.js +157 -0
- package/dist/src/cli/commands/chunk-backfill.js.map +1 -0
- package/dist/src/cli/commands/cursor-probe.d.ts +20 -0
- package/dist/src/cli/commands/cursor-probe.d.ts.map +1 -0
- package/dist/src/cli/commands/cursor-probe.js +162 -0
- package/dist/src/cli/commands/cursor-probe.js.map +1 -0
- package/dist/src/cli/commands/menubar.d.ts +3 -1
- package/dist/src/cli/commands/menubar.d.ts.map +1 -1
- package/dist/src/cli/commands/menubar.js +36 -12
- package/dist/src/cli/commands/menubar.js.map +1 -1
- package/dist/src/cli/commands/onboard.d.ts +22 -2
- package/dist/src/cli/commands/onboard.d.ts.map +1 -1
- package/dist/src/cli/commands/onboard.js +160 -32
- package/dist/src/cli/commands/onboard.js.map +1 -1
- package/dist/src/cli/commands/status.d.ts.map +1 -1
- package/dist/src/cli/commands/status.js +12 -0
- package/dist/src/cli/commands/status.js.map +1 -1
- package/dist/src/cli/commands/update.d.ts +34 -1
- package/dist/src/cli/commands/update.d.ts.map +1 -1
- package/dist/src/cli/commands/update.js +166 -1
- package/dist/src/cli/commands/update.js.map +1 -1
- package/dist/src/cli/index.d.ts.map +1 -1
- package/dist/src/cli/index.js +4 -0
- package/dist/src/cli/index.js.map +1 -1
- package/dist/src/cli/postinstall-menubar.d.ts +20 -13
- package/dist/src/cli/postinstall-menubar.d.ts.map +1 -1
- package/dist/src/cli/postinstall-menubar.js +56 -1
- package/dist/src/cli/postinstall-menubar.js.map +1 -1
- package/dist/src/cli/status/friend-header.d.ts +16 -3
- package/dist/src/cli/status/friend-header.d.ts.map +1 -1
- package/dist/src/cli/status/friend-header.js +186 -10
- package/dist/src/cli/status/friend-header.js.map +1 -1
- package/dist/src/cli/status/local-signals.d.ts +42 -4
- package/dist/src/cli/status/local-signals.d.ts.map +1 -1
- package/dist/src/cli/status/local-signals.js +52 -1
- package/dist/src/cli/status/local-signals.js.map +1 -1
- package/dist/src/config/schema.d.ts +220 -14
- package/dist/src/config/schema.d.ts.map +1 -1
- package/dist/src/config/schema.js +82 -7
- package/dist/src/config/schema.js.map +1 -1
- package/dist/src/diagnostics/claude-preflight.d.ts +35 -0
- package/dist/src/diagnostics/claude-preflight.d.ts.map +1 -0
- package/dist/src/diagnostics/claude-preflight.js +90 -0
- package/dist/src/diagnostics/claude-preflight.js.map +1 -0
- package/dist/src/diagnostics/codex-preflight.d.ts +1 -1
- package/dist/src/diagnostics/codex-preflight.d.ts.map +1 -1
- package/dist/src/diagnostics/codex-preflight.js +24 -0
- package/dist/src/diagnostics/codex-preflight.js.map +1 -1
- package/dist/src/diagnostics/doctor.d.ts +7 -4
- package/dist/src/diagnostics/doctor.d.ts.map +1 -1
- package/dist/src/diagnostics/doctor.js +70 -11
- package/dist/src/diagnostics/doctor.js.map +1 -1
- package/dist/src/diagnostics/memory-coverage.d.ts +54 -0
- package/dist/src/diagnostics/memory-coverage.d.ts.map +1 -0
- package/dist/src/diagnostics/memory-coverage.js +272 -0
- package/dist/src/diagnostics/memory-coverage.js.map +1 -0
- package/dist/src/diagnostics/notify.d.ts +20 -3
- package/dist/src/diagnostics/notify.d.ts.map +1 -1
- package/dist/src/diagnostics/notify.js +54 -14
- package/dist/src/diagnostics/notify.js.map +1 -1
- package/dist/src/ingestion/chunk-meta.d.ts +85 -0
- package/dist/src/ingestion/chunk-meta.d.ts.map +1 -0
- package/dist/src/ingestion/chunk-meta.js +167 -0
- package/dist/src/ingestion/chunk-meta.js.map +1 -0
- package/dist/src/ingestion/chunk-text.d.ts +39 -0
- package/dist/src/ingestion/chunk-text.d.ts.map +1 -0
- package/dist/src/ingestion/chunk-text.js +114 -0
- package/dist/src/ingestion/chunk-text.js.map +1 -0
- package/dist/src/ingestion/cursor/cursor-store.d.ts +177 -0
- package/dist/src/ingestion/cursor/cursor-store.d.ts.map +1 -0
- package/dist/src/ingestion/cursor/cursor-store.js +243 -0
- package/dist/src/ingestion/cursor/cursor-store.js.map +1 -0
- package/dist/src/ingestion/cursor/enrich-roots.d.ts +16 -0
- package/dist/src/ingestion/cursor/enrich-roots.d.ts.map +1 -0
- package/dist/src/ingestion/cursor/enrich-roots.js +22 -0
- package/dist/src/ingestion/cursor/enrich-roots.js.map +1 -0
- package/dist/src/ingestion/cursor/vscdb-reader.d.ts +32 -0
- package/dist/src/ingestion/cursor/vscdb-reader.d.ts.map +1 -0
- package/dist/src/ingestion/cursor/vscdb-reader.js +113 -0
- package/dist/src/ingestion/cursor/vscdb-reader.js.map +1 -0
- package/dist/src/ingestion/cursor/workspace-root.d.ts +96 -0
- package/dist/src/ingestion/cursor/workspace-root.d.ts.map +1 -0
- package/dist/src/ingestion/cursor/workspace-root.js +187 -0
- package/dist/src/ingestion/cursor/workspace-root.js.map +1 -0
- package/dist/src/ingestion/indexer.d.ts.map +1 -1
- package/dist/src/ingestion/indexer.js +41 -32
- package/dist/src/ingestion/indexer.js.map +1 -1
- package/dist/src/jobs/handlers/compact.d.ts.map +1 -1
- package/dist/src/jobs/handlers/compact.js +9 -4
- package/dist/src/jobs/handlers/compact.js.map +1 -1
- package/dist/src/jobs/handlers/ingest.d.ts.map +1 -1
- package/dist/src/jobs/handlers/ingest.js +60 -30
- package/dist/src/jobs/handlers/ingest.js.map +1 -1
- package/dist/src/jobs/handlers/reconcile.d.ts.map +1 -1
- package/dist/src/jobs/handlers/reconcile.js +128 -45
- package/dist/src/jobs/handlers/reconcile.js.map +1 -1
- package/dist/src/jobs/handlers/save.d.ts.map +1 -1
- package/dist/src/jobs/handlers/save.js +122 -72
- package/dist/src/jobs/handlers/save.js.map +1 -1
- package/dist/src/jobs/types.d.ts +1 -1
- package/dist/src/main.js +27 -16
- package/dist/src/main.js.map +1 -1
- package/dist/src/mcp/capture-diagnostics.d.ts +51 -0
- package/dist/src/mcp/capture-diagnostics.d.ts.map +1 -0
- package/dist/src/mcp/capture-diagnostics.js +127 -0
- package/dist/src/mcp/capture-diagnostics.js.map +1 -0
- package/dist/src/mcp/memory-diagnostics.d.ts +6 -0
- package/dist/src/mcp/memory-diagnostics.d.ts.map +1 -0
- package/dist/src/mcp/memory-diagnostics.js +51 -0
- package/dist/src/mcp/memory-diagnostics.js.map +1 -0
- package/dist/src/mcp/server.d.ts.map +1 -1
- package/dist/src/mcp/server.js +10 -3
- package/dist/src/mcp/server.js.map +1 -1
- package/dist/src/mcp/tools/context-pack.d.ts.map +1 -1
- package/dist/src/mcp/tools/context-pack.js +7 -1
- package/dist/src/mcp/tools/context-pack.js.map +1 -1
- package/dist/src/mcp/tools/conversations-search.d.ts +1 -1
- package/dist/src/mcp/tools/conversations-search.d.ts.map +1 -1
- package/dist/src/mcp/tools/conversations-search.js +7 -1
- package/dist/src/mcp/tools/conversations-search.js.map +1 -1
- package/dist/src/mcp/tools/evidence-feedback.d.ts +60 -0
- package/dist/src/mcp/tools/evidence-feedback.d.ts.map +1 -0
- package/dist/src/mcp/tools/evidence-feedback.js +62 -0
- package/dist/src/mcp/tools/evidence-feedback.js.map +1 -0
- package/dist/src/mcp/tools/log-outcome.d.ts +72 -0
- package/dist/src/mcp/tools/log-outcome.d.ts.map +1 -0
- package/dist/src/mcp/tools/log-outcome.js +59 -0
- package/dist/src/mcp/tools/log-outcome.js.map +1 -0
- package/dist/src/mcp/tools/open-evidence.d.ts +37 -0
- package/dist/src/mcp/tools/open-evidence.d.ts.map +1 -0
- package/dist/src/mcp/tools/open-evidence.js +72 -0
- package/dist/src/mcp/tools/open-evidence.js.map +1 -0
- package/dist/src/mcp/tools/save.d.ts +7 -2
- package/dist/src/mcp/tools/save.d.ts.map +1 -1
- package/dist/src/mcp/tools/save.js +7 -2
- package/dist/src/mcp/tools/save.js.map +1 -1
- package/dist/src/mcp/tools/search.d.ts.map +1 -1
- package/dist/src/mcp/tools/search.js +7 -1
- package/dist/src/mcp/tools/search.js.map +1 -1
- package/dist/src/mcp/tools/status.d.ts +15 -1
- package/dist/src/mcp/tools/status.d.ts.map +1 -1
- package/dist/src/mcp/tools/status.js +53 -2
- package/dist/src/mcp/tools/status.js.map +1 -1
- package/dist/src/observability/retrieval-feedback.d.ts +82 -0
- package/dist/src/observability/retrieval-feedback.d.ts.map +1 -0
- package/dist/src/observability/retrieval-feedback.js +231 -0
- package/dist/src/observability/retrieval-feedback.js.map +1 -0
- package/dist/src/observability/rift-context.d.ts.map +1 -1
- package/dist/src/observability/rift-context.js +3 -0
- package/dist/src/observability/rift-context.js.map +1 -1
- package/dist/src/observability/tool-usage-stats.d.ts +13 -0
- package/dist/src/observability/tool-usage-stats.d.ts.map +1 -1
- package/dist/src/observability/tool-usage-stats.js +15 -0
- package/dist/src/observability/tool-usage-stats.js.map +1 -1
- package/dist/src/observability/tool-usage.d.ts +56 -0
- package/dist/src/observability/tool-usage.d.ts.map +1 -1
- package/dist/src/observability/tool-usage.js +86 -0
- package/dist/src/observability/tool-usage.js.map +1 -1
- package/dist/src/providers/claude-cli-metadata-extraction.d.ts +47 -0
- package/dist/src/providers/claude-cli-metadata-extraction.d.ts.map +1 -0
- package/dist/src/providers/claude-cli-metadata-extraction.js +120 -0
- package/dist/src/providers/claude-cli-metadata-extraction.js.map +1 -0
- package/dist/src/providers/claude-cli-runner.d.ts +92 -0
- package/dist/src/providers/claude-cli-runner.d.ts.map +1 -0
- package/dist/src/providers/claude-cli-runner.js +598 -0
- package/dist/src/providers/claude-cli-runner.js.map +1 -0
- package/dist/src/providers/codex-cli-metadata-extraction.d.ts.map +1 -1
- package/dist/src/providers/codex-cli-metadata-extraction.js +1 -40
- package/dist/src/providers/codex-cli-metadata-extraction.js.map +1 -1
- package/dist/src/providers/codex-cli-runner.d.ts +7 -0
- package/dist/src/providers/codex-cli-runner.d.ts.map +1 -1
- package/dist/src/providers/codex-cli-runner.js +131 -5
- package/dist/src/providers/codex-cli-runner.js.map +1 -1
- package/dist/src/providers/conversation-generation.d.ts +10 -0
- package/dist/src/providers/conversation-generation.d.ts.map +1 -1
- package/dist/src/providers/conversation-generation.js +54 -13
- package/dist/src/providers/conversation-generation.js.map +1 -1
- package/dist/src/providers/openai-metadata-extraction.d.ts +48 -1
- package/dist/src/providers/openai-metadata-extraction.d.ts.map +1 -1
- package/dist/src/providers/openai-metadata-extraction.js +51 -2
- package/dist/src/providers/openai-metadata-extraction.js.map +1 -1
- package/dist/src/providers/types.d.ts +1 -1
- package/dist/src/providers/types.d.ts.map +1 -1
- package/dist/src/providers/types.js +4 -0
- package/dist/src/providers/types.js.map +1 -1
- package/dist/src/retrieval/canonical-files.d.ts +48 -0
- package/dist/src/retrieval/canonical-files.d.ts.map +1 -0
- package/dist/src/retrieval/canonical-files.js +210 -0
- package/dist/src/retrieval/canonical-files.js.map +1 -0
- package/dist/src/retrieval/compact.d.ts +95 -0
- package/dist/src/retrieval/compact.d.ts.map +1 -1
- package/dist/src/retrieval/compact.js +254 -8
- package/dist/src/retrieval/compact.js.map +1 -1
- package/dist/src/retrieval/context-pack.d.ts.map +1 -1
- package/dist/src/retrieval/context-pack.js +65 -15
- package/dist/src/retrieval/context-pack.js.map +1 -1
- package/dist/src/retrieval/conversation-dedup.d.ts +40 -0
- package/dist/src/retrieval/conversation-dedup.d.ts.map +1 -0
- package/dist/src/retrieval/conversation-dedup.js +141 -0
- package/dist/src/retrieval/conversation-dedup.js.map +1 -0
- package/dist/src/retrieval/evidence-key.d.ts +48 -0
- package/dist/src/retrieval/evidence-key.d.ts.map +1 -0
- package/dist/src/retrieval/evidence-key.js +131 -0
- package/dist/src/retrieval/evidence-key.js.map +1 -0
- package/dist/src/retrieval/feedback-ranking.d.ts +49 -0
- package/dist/src/retrieval/feedback-ranking.d.ts.map +1 -0
- package/dist/src/retrieval/feedback-ranking.js +138 -0
- package/dist/src/retrieval/feedback-ranking.js.map +1 -0
- package/dist/src/retrieval/git-state.d.ts +9 -0
- package/dist/src/retrieval/git-state.d.ts.map +1 -1
- package/dist/src/retrieval/git-state.js +18 -0
- package/dist/src/retrieval/git-state.js.map +1 -1
- package/dist/src/retrieval/group-by-parent.d.ts +38 -0
- package/dist/src/retrieval/group-by-parent.d.ts.map +1 -0
- package/dist/src/retrieval/group-by-parent.js +40 -0
- package/dist/src/retrieval/group-by-parent.js.map +1 -0
- package/dist/src/retrieval/lexical.d.ts.map +1 -1
- package/dist/src/retrieval/lexical.js +1 -3
- package/dist/src/retrieval/lexical.js.map +1 -1
- package/dist/src/retrieval/receipt.d.ts +57 -0
- package/dist/src/retrieval/receipt.d.ts.map +1 -0
- package/dist/src/retrieval/receipt.js +119 -0
- package/dist/src/retrieval/receipt.js.map +1 -0
- package/dist/src/retrieval/reranker.d.ts +49 -2
- package/dist/src/retrieval/reranker.d.ts.map +1 -1
- package/dist/src/retrieval/reranker.js +64 -4
- package/dist/src/retrieval/reranker.js.map +1 -1
- package/dist/src/retrieval/stitch-chunks.d.ts +73 -0
- package/dist/src/retrieval/stitch-chunks.d.ts.map +1 -0
- package/dist/src/retrieval/stitch-chunks.js +106 -0
- package/dist/src/retrieval/stitch-chunks.js.map +1 -0
- package/dist/src/server/app.d.ts +1 -1
- package/dist/src/server/app.d.ts.map +1 -1
- package/dist/src/server/app.js +20 -3
- package/dist/src/server/app.js.map +1 -1
- package/dist/src/server/routes/conversations-search.d.ts.map +1 -1
- package/dist/src/server/routes/conversations-search.js +22 -3
- package/dist/src/server/routes/conversations-search.js.map +1 -1
- package/dist/src/server/routes/friend-status.d.ts +64 -6
- package/dist/src/server/routes/friend-status.d.ts.map +1 -1
- package/dist/src/server/routes/friend-status.js +114 -18
- package/dist/src/server/routes/friend-status.js.map +1 -1
- package/dist/src/server/routes/mcp-usage.d.ts +9 -6
- package/dist/src/server/routes/mcp-usage.d.ts.map +1 -1
- package/dist/src/server/routes/mcp-usage.js.map +1 -1
- package/dist/src/server/routes/retrieval-feedback.d.ts +3 -0
- package/dist/src/server/routes/retrieval-feedback.d.ts.map +1 -0
- package/dist/src/server/routes/retrieval-feedback.js +290 -0
- package/dist/src/server/routes/retrieval-feedback.js.map +1 -0
- package/dist/src/server/routes/save.d.ts +3 -3
- package/dist/src/server/routes/save.d.ts.map +1 -1
- package/dist/src/server/routes/save.js +6 -2
- package/dist/src/server/routes/save.js.map +1 -1
- package/dist/src/server/routes/search.d.ts +1 -1
- package/dist/src/server/routes/search.d.ts.map +1 -1
- package/dist/src/server/routes/search.js +55 -8
- package/dist/src/server/routes/search.js.map +1 -1
- package/dist/src/server/serving-marker.d.ts +85 -0
- package/dist/src/server/serving-marker.d.ts.map +1 -0
- package/dist/src/server/serving-marker.js +226 -0
- package/dist/src/server/serving-marker.js.map +1 -0
- package/dist/src/storage/chunk-backfill.d.ts +39 -0
- package/dist/src/storage/chunk-backfill.d.ts.map +1 -0
- package/dist/src/storage/chunk-backfill.js +295 -0
- package/dist/src/storage/chunk-backfill.js.map +1 -0
- package/dist/src/storage/filter.d.ts +42 -0
- package/dist/src/storage/filter.d.ts.map +1 -0
- package/dist/src/storage/filter.js +70 -0
- package/dist/src/storage/filter.js.map +1 -0
- package/dist/src/storage/rebuild.d.ts.map +1 -1
- package/dist/src/storage/rebuild.js +44 -27
- package/dist/src/storage/rebuild.js.map +1 -1
- package/dist/src/storage/tables.d.ts +41 -0
- package/dist/src/storage/tables.d.ts.map +1 -1
- package/dist/src/storage/tables.js +64 -1
- package/dist/src/storage/tables.js.map +1 -1
- package/operator/swiftbar/render-menu.py +60 -18
- package/package.json +6 -4
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
/** Separators for chunk row ids. Distinct per family for readability; ids are
|
|
2
|
+
* unique either way and are NOT load-bearing for ordering (use chunk_index). */
|
|
3
|
+
export const DOC_CHUNK_SEP = ":";
|
|
4
|
+
export const CONV_CHUNK_SEP = "::";
|
|
5
|
+
function parseIntOr(value, fallback) {
|
|
6
|
+
if (typeof value === "number" && Number.isFinite(value))
|
|
7
|
+
return value;
|
|
8
|
+
if (typeof value === "string" && value.trim() !== "") {
|
|
9
|
+
const n = Number.parseInt(value, 10);
|
|
10
|
+
if (Number.isFinite(n))
|
|
11
|
+
return n;
|
|
12
|
+
}
|
|
13
|
+
return fallback;
|
|
14
|
+
}
|
|
15
|
+
function parseMetadata(value) {
|
|
16
|
+
if (typeof value !== "string" || value.trim() === "")
|
|
17
|
+
return {};
|
|
18
|
+
try {
|
|
19
|
+
const parsed = JSON.parse(value);
|
|
20
|
+
return parsed && typeof parsed === "object"
|
|
21
|
+
? parsed
|
|
22
|
+
: {};
|
|
23
|
+
}
|
|
24
|
+
catch {
|
|
25
|
+
return {};
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
const CONVERSATION_TABLES = new Set([
|
|
29
|
+
"conversations_hot",
|
|
30
|
+
"conversations_cold",
|
|
31
|
+
]);
|
|
32
|
+
const DOC_TABLES = new Set([
|
|
33
|
+
"structured_docs",
|
|
34
|
+
"structured_docs_local",
|
|
35
|
+
]);
|
|
36
|
+
/** Read the chunk marker from a conversation row, or null if unchunked/legacy. */
|
|
37
|
+
export function readConversationChunkMarker(row) {
|
|
38
|
+
const count = parseIntOr(row["chunk_count"], 1);
|
|
39
|
+
if (count <= 1)
|
|
40
|
+
return null;
|
|
41
|
+
const parentId = typeof row["parent_id"] === "string" ? row["parent_id"] : "";
|
|
42
|
+
if (!parentId)
|
|
43
|
+
return null;
|
|
44
|
+
return { parentId, index: parseIntOr(row["chunk_index"], 0), count };
|
|
45
|
+
}
|
|
46
|
+
/** Read the chunk marker from a structured-doc row, or null if unchunked/legacy. */
|
|
47
|
+
export function readDocChunkMarker(row) {
|
|
48
|
+
const sourcePath = typeof row["source_path"] === "string" ? row["source_path"] : "";
|
|
49
|
+
if (!sourcePath)
|
|
50
|
+
return null;
|
|
51
|
+
const meta = parseMetadata(row["metadata"]);
|
|
52
|
+
const count = parseIntOr(meta["chunk_count"], 1);
|
|
53
|
+
if (count <= 1)
|
|
54
|
+
return null;
|
|
55
|
+
return { parentId: sourcePath, index: parseIntOr(meta["chunk_index"], 0), count };
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Namespaced parent key for retrieval grouping, or `null` when the row carries
|
|
59
|
+
* NO explicit chunk marker (the flag-off / legacy case — never grouped).
|
|
60
|
+
*
|
|
61
|
+
* Namespacing guarantees mixed-vintage / cross-table rows can never collapse:
|
|
62
|
+
* - docs → `doc|<source_table>|<source_path>` (so `structured_docs` vs
|
|
63
|
+
* `structured_docs_local` with the same path stay separate)
|
|
64
|
+
* - convs → `conv|<source>|<parent_id>`
|
|
65
|
+
*/
|
|
66
|
+
export function parentKeyForRow(sourceTable, row) {
|
|
67
|
+
if (CONVERSATION_TABLES.has(sourceTable)) {
|
|
68
|
+
const marker = readConversationChunkMarker(row);
|
|
69
|
+
if (!marker)
|
|
70
|
+
return null;
|
|
71
|
+
const source = typeof row["source"] === "string" ? row["source"] : "";
|
|
72
|
+
return `conv|${source}|${marker.parentId}`;
|
|
73
|
+
}
|
|
74
|
+
if (DOC_TABLES.has(sourceTable)) {
|
|
75
|
+
const marker = readDocChunkMarker(row);
|
|
76
|
+
if (!marker)
|
|
77
|
+
return null;
|
|
78
|
+
return `doc|${sourceTable}|${marker.parentId}`;
|
|
79
|
+
}
|
|
80
|
+
return null; // digests and anything else never chunk
|
|
81
|
+
}
|
|
82
|
+
/** True iff the row is part of a genuine chunk set (`chunk_count > 1`). */
|
|
83
|
+
export function isChunked(sourceTable, row) {
|
|
84
|
+
return parentKeyForRow(sourceTable, row) !== null;
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* Unified chunk-marker reader across the conversation and document families,
|
|
88
|
+
* or `null` for an unchunked/legacy row (no marker / `chunk_count <= 1`) and for
|
|
89
|
+
* digests (which never chunk). Lets the read-side (expand stitching) inspect a
|
|
90
|
+
* row's position in its set without re-deriving the per-family column vs metadata
|
|
91
|
+
* split. `parentId` is the raw column value (`parent_id` / `source_path`), not the
|
|
92
|
+
* namespaced retrieval key.
|
|
93
|
+
*/
|
|
94
|
+
export function readChunkMarker(sourceTable, row) {
|
|
95
|
+
if (CONVERSATION_TABLES.has(sourceTable)) {
|
|
96
|
+
return readConversationChunkMarker(row);
|
|
97
|
+
}
|
|
98
|
+
if (DOC_TABLES.has(sourceTable)) {
|
|
99
|
+
return readDocChunkMarker(row);
|
|
100
|
+
}
|
|
101
|
+
return null;
|
|
102
|
+
}
|
|
103
|
+
// --- Builders (producers) ---
|
|
104
|
+
/** Conversation chunk columns. count <= 1 → empty markers (matches seed/legacy). */
|
|
105
|
+
export function conversationChunkColumns(parentId, index, count) {
|
|
106
|
+
if (count <= 1)
|
|
107
|
+
return { parent_id: "", chunk_index: "", chunk_count: "" };
|
|
108
|
+
return {
|
|
109
|
+
parent_id: parentId,
|
|
110
|
+
chunk_index: String(index),
|
|
111
|
+
chunk_count: String(count),
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
/** Merge chunk ordinals into a doc metadata object. count <= 1 → unchanged
|
|
115
|
+
* (NO `chunk_*` keys), so a single/legacy doc stays byte-identical. */
|
|
116
|
+
export function docChunkMetadata(base, index, count) {
|
|
117
|
+
if (count <= 1)
|
|
118
|
+
return base;
|
|
119
|
+
return { ...base, chunk_index: index, chunk_count: count };
|
|
120
|
+
}
|
|
121
|
+
export function convChunkId(parentId, index) {
|
|
122
|
+
return `${parentId}${CONV_CHUNK_SEP}${index}`;
|
|
123
|
+
}
|
|
124
|
+
export function docChunkId(fileId, index) {
|
|
125
|
+
return `${fileId}${DOC_CHUNK_SEP}${index}`;
|
|
126
|
+
}
|
|
127
|
+
// --- Completeness (replay / dedup) ---
|
|
128
|
+
/** The largest declared `chunk_count` across a set of rows (≥ 1). */
|
|
129
|
+
export function expectedChunkCount(rows) {
|
|
130
|
+
let max = 1;
|
|
131
|
+
for (const r of rows)
|
|
132
|
+
max = Math.max(max, parseIntOr(r["chunk_count"], 1));
|
|
133
|
+
return max;
|
|
134
|
+
}
|
|
135
|
+
/**
|
|
136
|
+
* True iff the rows already indexed for a parent form a COMPLETE set. Used by
|
|
137
|
+
* save/ingest/reconcile dedup so a partially-written set (crash mid-append) is
|
|
138
|
+
* repaired rather than mistaken for "already indexed".
|
|
139
|
+
*
|
|
140
|
+
* Completeness is stricter than a row count: a chunked set is complete only when
|
|
141
|
+
* every chunk index `0 .. count-1` is present EXACTLY once and every row agrees
|
|
142
|
+
* on the same `chunk_count`. This rejects the degenerate set that a bare
|
|
143
|
+
* `rows.length >= count` check would wave through — e.g. a duplicated chunk 0
|
|
144
|
+
* with a missing chunk 1 (length 2, count 2, but broken). An unchunked set
|
|
145
|
+
* (`count <= 1`, all legacy/`/ingest`-job rows) keeps the legacy semantics: any
|
|
146
|
+
* existing row counts as complete, so this never changes flag-off behavior.
|
|
147
|
+
*/
|
|
148
|
+
export function chunkSetComplete(rows) {
|
|
149
|
+
if (rows.length === 0)
|
|
150
|
+
return false;
|
|
151
|
+
const count = expectedChunkCount(rows);
|
|
152
|
+
if (count <= 1)
|
|
153
|
+
return true; // unchunked/legacy: one existing row is enough
|
|
154
|
+
const seen = new Set();
|
|
155
|
+
for (const r of rows) {
|
|
156
|
+
// Every row in a genuine chunk set must declare the same total; drift means
|
|
157
|
+
// we are looking at a mixed/partial set and cannot trust it as complete.
|
|
158
|
+
if (parseIntOr(r["chunk_count"], 1) !== count)
|
|
159
|
+
return false;
|
|
160
|
+
const idx = parseIntOr(r["chunk_index"], -1);
|
|
161
|
+
if (idx < 0 || idx >= count || seen.has(idx))
|
|
162
|
+
return false;
|
|
163
|
+
seen.add(idx);
|
|
164
|
+
}
|
|
165
|
+
return seen.size === count;
|
|
166
|
+
}
|
|
167
|
+
//# sourceMappingURL=chunk-meta.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunk-meta.js","sourceRoot":"","sources":["../../../src/ingestion/chunk-meta.ts"],"names":[],"mappings":"AA8BA;iFACiF;AACjF,MAAM,CAAC,MAAM,aAAa,GAAG,GAAG,CAAC;AACjC,MAAM,CAAC,MAAM,cAAc,GAAG,IAAI,CAAC;AAEnC,SAAS,UAAU,CAAC,KAAc,EAAE,QAAgB;IAClD,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC;IACtE,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;QACrD,MAAM,CAAC,GAAG,MAAM,CAAC,QAAQ,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QACrC,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC;YAAE,OAAO,CAAC,CAAC;IACnC,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,SAAS,aAAa,CAAC,KAAc;IACnC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,CAAC,IAAI,EAAE,KAAK,EAAE;QAAE,OAAO,EAAE,CAAC;IAChE,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAY,CAAC;QAC5C,OAAO,MAAM,IAAI,OAAO,MAAM,KAAK,QAAQ;YACzC,CAAC,CAAE,MAAkC;YACrC,CAAC,CAAC,EAAE,CAAC;IACT,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,CAAC;IACZ,CAAC;AACH,CAAC;AAED,MAAM,mBAAmB,GAAG,IAAI,GAAG,CAAY;IAC7C,mBAAmB;IACnB,oBAAoB;CACrB,CAAC,CAAC;AACH,MAAM,UAAU,GAAG,IAAI,GAAG,CAAY;IACpC,iBAAiB;IACjB,uBAAuB;CACxB,CAAC,CAAC;AAEH,kFAAkF;AAClF,MAAM,UAAU,2BAA2B,CACzC,GAA4B;IAE5B,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,CAAC,aAAa,CAAC,EAAE,CAAC,CAAC,CAAC;IAChD,IAAI,KAAK,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IAC5B,MAAM,QAAQ,GAAG,OAAO,GAAG,CAAC,WAAW,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAC9E,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAC;IAC3B,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,UAAU,CAAC,GAAG,CAAC,aAAa,CAAC,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC;AACvE,CAAC;AAED,oFAAoF;AACpF,MAAM,UAAU,kBAAkB,CAChC,GAA4B;IAE5B,MAAM,UAAU,GACd,OAAO,GAAG,CAAC,aAAa,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IACnE,IAAI,CAAC,UAAU;QAAE,OAAO,IAAI,CAAC;IAC7B,MAAM,IAAI,GAAG,aAAa,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC;IAC5C,MAAM,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,aAAa,CAAC,EAAE,CAAC,CAAC,CAAC;IACjD,IAAI,KAAK,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IAC5B,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,KAAK,EAAE,UAAU,CAAC,IAAI,CAAC,aAAa,CAAC,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC;AACpF,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,eAAe,CAC7B,WAAsB,EACtB,GAA4B;IAE5B,IAAI,mBAAmB,CAAC,GAAG,CAAC,WAAW,CAAC,EAAE,CAAC;QACzC,MAAM,MAAM,GAAG,2BAA2B,CAAC,GAAG,CAAC,CAAC;QAChD,IAAI,CAAC,MAAM;YAAE,OAAO,IAAI,CAAC;QACzB,MAAM,MAAM,GAAG,OAAO,GAAG,CAAC,QAAQ,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QACtE,OAAO,QAAQ,MAAM,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;IAC7C,CAAC;IACD,IAAI,UAAU,CAAC,GAAG,CAAC,WAAW,CAAC,EAAE,CAAC;QAChC,MAAM,MAAM,GAAG,kBAAkB,CAAC,GAAG,CAAC,CAAC;QACvC,IAAI,CAAC,MAAM;YAAE,OAAO,IAAI,CAAC;QACzB,OAAO,OAAO,WAAW,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;IACjD,CAAC;IACD,OAAO,IAAI,CAAC,CAAC,wCAAwC;AACvD,CAAC;AAED,2EAA2E;AAC3E,MAAM,UAAU,SAAS,CACvB,WAAsB,EACtB,GAA4B;IAE5B,OAAO,eAAe,CAAC,WAAW,EAAE,GAAG,CAAC,KAAK,IAAI,CAAC;AACpD,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,eAAe,CAC7B,WAAsB,EACtB,GAA4B;IAE5B,IAAI,mBAAmB,CAAC,GAAG,CAAC,WAAW,CAAC,EAAE,CAAC;QACzC,OAAO,2BAA2B,CAAC,GAAG,CAAC,CAAC;IAC1C,CAAC;IACD,IAAI,UAAU,CAAC,GAAG,CAAC,WAAW,CAAC,EAAE,CAAC;QAChC,OAAO,kBAAkB,CAAC,GAAG,CAAC,CAAC;IACjC,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,+BAA+B;AAE/B,oFAAoF;AACpF,MAAM,UAAU,wBAAwB,CACtC,QAAgB,EAChB,KAAa,EACb,KAAa;IAEb,IAAI,KAAK,IAAI,CAAC;QAAE,OAAO,EAAE,SAAS,EAAE,EAAE,EAAE,WAAW,EAAE,EAAE,EAAE,WAAW,EAAE,EAAE,EAAE,CAAC;IAC3E,OAAO;QACL,SAAS,EAAE,QAAQ;QACnB,WAAW,EAAE,MAAM,CAAC,KAAK,CAAC;QAC1B,WAAW,EAAE,MAAM,CAAC,KAAK,CAAC;KAC3B,CAAC;AACJ,CAAC;AAED;wEACwE;AACxE,MAAM,UAAU,gBAAgB,CAC9B,IAA6B,EAC7B,KAAa,EACb,KAAa;IAEb,IAAI,KAAK,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IAC5B,OAAO,EAAE,GAAG,IAAI,EAAE,WAAW,EAAE,KAAK,EAAE,WAAW,EAAE,KAAK,EAAE,CAAC;AAC7D,CAAC;AAED,MAAM,UAAU,WAAW,CAAC,QAAgB,EAAE,KAAa;IACzD,OAAO,GAAG,QAAQ,GAAG,cAAc,GAAG,KAAK,EAAE,CAAC;AAChD,CAAC;AAED,MAAM,UAAU,UAAU,CAAC,MAAc,EAAE,KAAa;IACtD,OAAO,GAAG,MAAM,GAAG,aAAa,GAAG,KAAK,EAAE,CAAC;AAC7C,CAAC;AAED,wCAAwC;AAExC,qEAAqE;AACrE,MAAM,UAAU,kBAAkB,CAAC,IAA+B;IAChE,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,MAAM,CAAC,IAAI,IAAI;QAAE,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC,CAAC,aAAa,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IAC3E,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;;;;;;;;;;;GAYG;AACH,MAAM,UAAU,gBAAgB,CAAC,IAA+B;IAC9D,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC;IACpC,MAAM,KAAK,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAC;IACvC,IAAI,KAAK,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC,CAAC,+CAA+C;IAC5E,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,KAAK,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;QACrB,4EAA4E;QAC5E,yEAAyE;QACzE,IAAI,UAAU,CAAC,CAAC,CAAC,aAAa,CAAC,EAAE,CAAC,CAAC,KAAK,KAAK;YAAE,OAAO,KAAK,CAAC;QAC5D,MAAM,GAAG,GAAG,UAAU,CAAC,CAAC,CAAC,aAAa,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QAC7C,IAAI,GAAG,GAAG,CAAC,IAAI,GAAG,IAAI,KAAK,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;YAAE,OAAO,KAAK,CAAC;QAC3D,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IAChB,CAAC;IACD,OAAO,IAAI,CAAC,IAAI,KAAK,KAAK,CAAC;AAC7B,CAAC"}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Index-time chunkers.
|
|
3
|
+
*
|
|
4
|
+
* `chunkText` splits a long document into overlapping windows at natural
|
|
5
|
+
* (paragraph / heading) boundaries; `chunkConversation` prefers turn boundaries
|
|
6
|
+
* (`User:` / `Assistant:`) and falls back to `chunkText`. Both short-circuit to a
|
|
7
|
+
* single chunk when the content is below `CHUNK_THRESHOLD_CHARS`, so small
|
|
8
|
+
* documents/conversations are unaffected.
|
|
9
|
+
*
|
|
10
|
+
* Deterministic and dependency-free. Sibling of `chunkByTurns` in
|
|
11
|
+
* `src/capture/recover-quarantine.ts` (that one is byte-budgeted, for recovery
|
|
12
|
+
* summarization; these are char-windowed, for index-time embedding).
|
|
13
|
+
*/
|
|
14
|
+
export interface ChunkOptions {
|
|
15
|
+
/** Soft target size per chunk (chars). */
|
|
16
|
+
targetChars: number;
|
|
17
|
+
/** Approx overlap carried from the previous chunk (chars). */
|
|
18
|
+
overlapChars: number;
|
|
19
|
+
/** Trailing chunks smaller than this are merged back into the previous one. */
|
|
20
|
+
minChars: number;
|
|
21
|
+
/** A single unit larger than this is hard-split at a word boundary. */
|
|
22
|
+
maxChars: number;
|
|
23
|
+
/** Content at or below this size is emitted as one chunk (no chunking). */
|
|
24
|
+
thresholdChars: number;
|
|
25
|
+
}
|
|
26
|
+
export declare const CHUNK_THRESHOLD_CHARS = 2400;
|
|
27
|
+
/**
|
|
28
|
+
* Whether index-time chunking is enabled. Single env flag `RIFT_CHUNKING`
|
|
29
|
+
* (default OFF), read at call time so tests can toggle it. With it OFF, every
|
|
30
|
+
* write path emits a single row with no chunk markers — byte-for-byte the
|
|
31
|
+
* pre-chunking behavior.
|
|
32
|
+
*/
|
|
33
|
+
export declare function chunkingEnabled(env?: NodeJS.ProcessEnv): boolean;
|
|
34
|
+
export declare const DEFAULT_CHUNK_OPTIONS: ChunkOptions;
|
|
35
|
+
/** Split a document into overlapping chunks at paragraph boundaries. */
|
|
36
|
+
export declare function chunkText(content: string, options?: Partial<ChunkOptions>): string[];
|
|
37
|
+
/** Split a conversation into overlapping chunks, preferring turn boundaries. */
|
|
38
|
+
export declare function chunkConversation(content: string, options?: Partial<ChunkOptions>): string[];
|
|
39
|
+
//# sourceMappingURL=chunk-text.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunk-text.d.ts","sourceRoot":"","sources":["../../../src/ingestion/chunk-text.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AACH,MAAM,WAAW,YAAY;IAC3B,0CAA0C;IAC1C,WAAW,EAAE,MAAM,CAAC;IACpB,8DAA8D;IAC9D,YAAY,EAAE,MAAM,CAAC;IACrB,+EAA+E;IAC/E,QAAQ,EAAE,MAAM,CAAC;IACjB,uEAAuE;IACvE,QAAQ,EAAE,MAAM,CAAC;IACjB,2EAA2E;IAC3E,cAAc,EAAE,MAAM,CAAC;CACxB;AAED,eAAO,MAAM,qBAAqB,OAAO,CAAC;AAE1C;;;;;GAKG;AACH,wBAAgB,eAAe,CAAC,GAAG,GAAE,MAAM,CAAC,UAAwB,GAAG,OAAO,CAE7E;AAED,eAAO,MAAM,qBAAqB,EAAE,YAMnC,CAAC;AAIF,wEAAwE;AACxE,wBAAgB,SAAS,CACvB,OAAO,EAAE,MAAM,EACf,OAAO,GAAE,OAAO,CAAC,YAAY,CAAM,GAClC,MAAM,EAAE,CAMV;AAED,gFAAgF;AAChF,wBAAgB,iBAAiB,CAC/B,OAAO,EAAE,MAAM,EACf,OAAO,GAAE,OAAO,CAAC,YAAY,CAAM,GAClC,MAAM,EAAE,CAYV"}
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
export const CHUNK_THRESHOLD_CHARS = 2400;
|
|
2
|
+
/**
|
|
3
|
+
* Whether index-time chunking is enabled. Single env flag `RIFT_CHUNKING`
|
|
4
|
+
* (default OFF), read at call time so tests can toggle it. With it OFF, every
|
|
5
|
+
* write path emits a single row with no chunk markers — byte-for-byte the
|
|
6
|
+
* pre-chunking behavior.
|
|
7
|
+
*/
|
|
8
|
+
export function chunkingEnabled(env = process.env) {
|
|
9
|
+
return env["RIFT_CHUNKING"] === "1";
|
|
10
|
+
}
|
|
11
|
+
export const DEFAULT_CHUNK_OPTIONS = {
|
|
12
|
+
targetChars: 1800,
|
|
13
|
+
overlapChars: 216,
|
|
14
|
+
minChars: 400,
|
|
15
|
+
maxChars: 3600,
|
|
16
|
+
thresholdChars: CHUNK_THRESHOLD_CHARS,
|
|
17
|
+
};
|
|
18
|
+
const PARAGRAPH_SEP = "\n\n";
|
|
19
|
+
/** Split a document into overlapping chunks at paragraph boundaries. */
|
|
20
|
+
export function chunkText(content, options = {}) {
|
|
21
|
+
const opts = { ...DEFAULT_CHUNK_OPTIONS, ...options };
|
|
22
|
+
if (content.trim().length === 0)
|
|
23
|
+
return [];
|
|
24
|
+
if (content.length <= opts.thresholdChars)
|
|
25
|
+
return [content];
|
|
26
|
+
const units = splitParagraphs(content, opts.maxChars);
|
|
27
|
+
return packUnits(units, opts);
|
|
28
|
+
}
|
|
29
|
+
/** Split a conversation into overlapping chunks, preferring turn boundaries. */
|
|
30
|
+
export function chunkConversation(content, options = {}) {
|
|
31
|
+
const opts = { ...DEFAULT_CHUNK_OPTIONS, ...options };
|
|
32
|
+
if (content.trim().length === 0)
|
|
33
|
+
return [];
|
|
34
|
+
if (content.length <= opts.thresholdChars)
|
|
35
|
+
return [content];
|
|
36
|
+
const turns = content
|
|
37
|
+
.split(/\n\n(?=(?:User|Assistant): )/)
|
|
38
|
+
.filter((s) => s.trim().length > 0);
|
|
39
|
+
if (turns.length <= 1)
|
|
40
|
+
return chunkText(content, options);
|
|
41
|
+
const units = turns.flatMap((t) => t.length <= opts.maxChars ? [t] : hardWindow(t, opts.maxChars));
|
|
42
|
+
return packUnits(units, opts);
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Greedily pack pre-split units into chunks near `targetChars`, seeding each new
|
|
46
|
+
* chunk with an overlap tail of the previous one. A too-small trailing chunk is
|
|
47
|
+
* merged back so we never emit a sliver.
|
|
48
|
+
*/
|
|
49
|
+
function packUnits(units, opts) {
|
|
50
|
+
const chunks = [];
|
|
51
|
+
let cur = "";
|
|
52
|
+
for (const unit of units) {
|
|
53
|
+
if (cur === "") {
|
|
54
|
+
cur = unit;
|
|
55
|
+
continue;
|
|
56
|
+
}
|
|
57
|
+
if (cur.length + PARAGRAPH_SEP.length + unit.length <= opts.targetChars) {
|
|
58
|
+
cur = `${cur}${PARAGRAPH_SEP}${unit}`;
|
|
59
|
+
}
|
|
60
|
+
else {
|
|
61
|
+
chunks.push(cur);
|
|
62
|
+
const tail = overlapTail(cur, opts.overlapChars);
|
|
63
|
+
cur = tail ? `${tail}${PARAGRAPH_SEP}${unit}` : unit;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
if (cur)
|
|
67
|
+
chunks.push(cur);
|
|
68
|
+
// Merge a tiny trailing chunk into its predecessor.
|
|
69
|
+
if (chunks.length >= 2 &&
|
|
70
|
+
chunks[chunks.length - 1].length < opts.minChars) {
|
|
71
|
+
const last = chunks.pop();
|
|
72
|
+
chunks[chunks.length - 1] = `${chunks[chunks.length - 1]}${PARAGRAPH_SEP}${last}`;
|
|
73
|
+
}
|
|
74
|
+
return chunks;
|
|
75
|
+
}
|
|
76
|
+
/** Split into paragraphs; hard-window any paragraph larger than `maxChars`. */
|
|
77
|
+
function splitParagraphs(text, maxChars) {
|
|
78
|
+
const out = [];
|
|
79
|
+
for (const para of text.split(/\n{2,}/)) {
|
|
80
|
+
if (para.trim().length === 0)
|
|
81
|
+
continue;
|
|
82
|
+
if (para.length <= maxChars)
|
|
83
|
+
out.push(para);
|
|
84
|
+
else
|
|
85
|
+
out.push(...hardWindow(para, maxChars));
|
|
86
|
+
}
|
|
87
|
+
return out;
|
|
88
|
+
}
|
|
89
|
+
/** Hard-split an oversized string into <= maxChars pieces at word boundaries. */
|
|
90
|
+
function hardWindow(text, maxChars) {
|
|
91
|
+
const out = [];
|
|
92
|
+
let rest = text;
|
|
93
|
+
while (rest.length > maxChars) {
|
|
94
|
+
let cut = rest.lastIndexOf(" ", maxChars);
|
|
95
|
+
if (cut <= 0)
|
|
96
|
+
cut = maxChars; // no space — hard cut
|
|
97
|
+
out.push(rest.slice(0, cut));
|
|
98
|
+
rest = rest.slice(cut).replace(/^\s+/, "");
|
|
99
|
+
}
|
|
100
|
+
if (rest.length > 0)
|
|
101
|
+
out.push(rest);
|
|
102
|
+
return out;
|
|
103
|
+
}
|
|
104
|
+
/** Last ~overlap chars of `text`, snapped forward to a word boundary. */
|
|
105
|
+
function overlapTail(text, overlap) {
|
|
106
|
+
if (overlap <= 0 || text.length <= overlap)
|
|
107
|
+
return text.length <= overlap ? text : "";
|
|
108
|
+
let slice = text.slice(text.length - overlap);
|
|
109
|
+
const sp = slice.indexOf(" ");
|
|
110
|
+
if (sp > 0 && sp < slice.length - 1)
|
|
111
|
+
slice = slice.slice(sp + 1);
|
|
112
|
+
return slice;
|
|
113
|
+
}
|
|
114
|
+
//# sourceMappingURL=chunk-text.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunk-text.js","sourceRoot":"","sources":["../../../src/ingestion/chunk-text.ts"],"names":[],"mappings":"AA0BA,MAAM,CAAC,MAAM,qBAAqB,GAAG,IAAI,CAAC;AAE1C;;;;;GAKG;AACH,MAAM,UAAU,eAAe,CAAC,MAAyB,OAAO,CAAC,GAAG;IAClE,OAAO,GAAG,CAAC,eAAe,CAAC,KAAK,GAAG,CAAC;AACtC,CAAC;AAED,MAAM,CAAC,MAAM,qBAAqB,GAAiB;IACjD,WAAW,EAAE,IAAI;IACjB,YAAY,EAAE,GAAG;IACjB,QAAQ,EAAE,GAAG;IACb,QAAQ,EAAE,IAAI;IACd,cAAc,EAAE,qBAAqB;CACtC,CAAC;AAEF,MAAM,aAAa,GAAG,MAAM,CAAC;AAE7B,wEAAwE;AACxE,MAAM,UAAU,SAAS,CACvB,OAAe,EACf,UAAiC,EAAE;IAEnC,MAAM,IAAI,GAAG,EAAE,GAAG,qBAAqB,EAAE,GAAG,OAAO,EAAE,CAAC;IACtD,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAC3C,IAAI,OAAO,CAAC,MAAM,IAAI,IAAI,CAAC,cAAc;QAAE,OAAO,CAAC,OAAO,CAAC,CAAC;IAC5D,MAAM,KAAK,GAAG,eAAe,CAAC,OAAO,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;IACtD,OAAO,SAAS,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;AAChC,CAAC;AAED,gFAAgF;AAChF,MAAM,UAAU,iBAAiB,CAC/B,OAAe,EACf,UAAiC,EAAE;IAEnC,MAAM,IAAI,GAAG,EAAE,GAAG,qBAAqB,EAAE,GAAG,OAAO,EAAE,CAAC;IACtD,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAC3C,IAAI,OAAO,CAAC,MAAM,IAAI,IAAI,CAAC,cAAc;QAAE,OAAO,CAAC,OAAO,CAAC,CAAC;IAC5D,MAAM,KAAK,GAAG,OAAO;SAClB,KAAK,CAAC,8BAA8B,CAAC;SACrC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACtC,IAAI,KAAK,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,SAAS,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;IAC1D,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAChC,CAAC,CAAC,MAAM,IAAI,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,EAAE,IAAI,CAAC,QAAQ,CAAC,CAC/D,CAAC;IACF,OAAO,SAAS,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;AAChC,CAAC;AAED;;;;GAIG;AACH,SAAS,SAAS,CAAC,KAAe,EAAE,IAAkB;IACpD,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,IAAI,GAAG,GAAG,EAAE,CAAC;IACb,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,GAAG,KAAK,EAAE,EAAE,CAAC;YACf,GAAG,GAAG,IAAI,CAAC;YACX,SAAS;QACX,CAAC;QACD,IAAI,GAAG,CAAC,MAAM,GAAG,aAAa,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACxE,GAAG,GAAG,GAAG,GAAG,GAAG,aAAa,GAAG,IAAI,EAAE,CAAC;QACxC,CAAC;aAAM,CAAC;YACN,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACjB,MAAM,IAAI,GAAG,WAAW,CAAC,GAAG,EAAE,IAAI,CAAC,YAAY,CAAC,CAAC;YACjD,GAAG,GAAG,IAAI,CAAC,CAAC,CAAC,GAAG,IAAI,GAAG,aAAa,GAAG,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;QACvD,CAAC;IACH,CAAC;IACD,IAAI,GAAG;QAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAC1B,oDAAoD;IACpD,IACE,MAAM,CAAC,MAAM,IAAI,CAAC;QAClB,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAE,CAAC,MAAM,GAAG,IAAI,CAAC,QAAQ,EACjD,CAAC;QACD,MAAM,IAAI,GAAG,MAAM,CAAC,GAAG,EAAG,CAAC;QAC3B,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAE,GAAG,aAAa,GAAG,IAAI,EAAE,CAAC;IACrF,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,+EAA+E;AAC/E,SAAS,eAAe,CAAC,IAAY,EAAE,QAAgB;IACrD,MAAM,GAAG,GAAa,EAAE,CAAC;IACzB,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,EAAE,CAAC;QACxC,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QACvC,IAAI,IAAI,CAAC,MAAM,IAAI,QAAQ;YAAE,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;;YACvC,GAAG,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,CAAC;IAC/C,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,iFAAiF;AACjF,SAAS,UAAU,CAAC,IAAY,EAAE,QAAgB;IAChD,MAAM,GAAG,GAAa,EAAE,CAAC;IACzB,IAAI,IAAI,GAAG,IAAI,CAAC;IAChB,OAAO,IAAI,CAAC,MAAM,GAAG,QAAQ,EAAE,CAAC;QAC9B,IAAI,GAAG,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;QAC1C,IAAI,GAAG,IAAI,CAAC;YAAE,GAAG,GAAG,QAAQ,CAAC,CAAC,sBAAsB;QACpD,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC;QAC7B,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;IAC7C,CAAC;IACD,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC;QAAE,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACpC,OAAO,GAAG,CAAC;AACb,CAAC;AAED,yEAAyE;AACzE,SAAS,WAAW,CAAC,IAAY,EAAE,OAAe;IAChD,IAAI,OAAO,IAAI,CAAC,IAAI,IAAI,CAAC,MAAM,IAAI,OAAO;QAAE,OAAO,IAAI,CAAC,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;IACtF,IAAI,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,OAAO,CAAC,CAAC;IAC9C,MAAM,EAAE,GAAG,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IAC9B,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC;QAAE,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC;IACjE,OAAO,KAAK,CAAC;AACf,CAAC"}
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cursor native composer/chat store — read-only discovery layer.
|
|
3
|
+
*
|
|
4
|
+
* DISCOVERY SLICE ONLY. This module interprets the raw key/value rows that
|
|
5
|
+
* Cursor (an Electron/VS Code fork by Anysphere) persists in its SQLite
|
|
6
|
+
* `state.vscdb` files. It does NOT read those files itself — see
|
|
7
|
+
* `vscdb-reader.ts` for the I/O adapter — and it does NOT write anything to
|
|
8
|
+
* LanceDB, the daemon, or capture state. Keeping the interpretation pure makes
|
|
9
|
+
* it exhaustively unit-testable from synthetic fixtures with no DB or binary.
|
|
10
|
+
*
|
|
11
|
+
* What lives in Cursor's vscdb (empirically verified on macOS, Cursor "glass"
|
|
12
|
+
* build, 2026-06):
|
|
13
|
+
* - ItemTable['composer.composerHeaders'] -> { allComposers: ComposerHeader[] }
|
|
14
|
+
* The session index: one entry per composer/chat session.
|
|
15
|
+
* - cursorDiskKV['composerData:{composerId}'] -> CursorComposerData
|
|
16
|
+
* Per-session metadata incl. modelConfig + ordered bubble header list.
|
|
17
|
+
* - cursorDiskKV['bubbleId:{composerId}:{bubbleId}'] -> CursorBubble
|
|
18
|
+
* One conversation turn. type 1 = user, type 2 = assistant.
|
|
19
|
+
*
|
|
20
|
+
* CRITICAL SCOPE NOTE — the three requested "modes":
|
|
21
|
+
* 1. Claude Code used *through* Cursor -> NOT here. The `claude` CLI writes
|
|
22
|
+
* to ~/.claude/projects/**.jsonl regardless of which terminal launched it.
|
|
23
|
+
* Already covered by claude-code-jsonl.ts.
|
|
24
|
+
* 2. Codex used *through* Cursor -> NOT here. Same story: ~/.codex.
|
|
25
|
+
* Already covered by codex-jsonl.ts.
|
|
26
|
+
* 3. Cursor *native* composer/agent/chat -> THIS module. The only AI sessions
|
|
27
|
+
* that actually live in Cursor's vscdb.
|
|
28
|
+
* So this store owns exactly mode 3. Modes 1/2 are a provenance-tagging concern
|
|
29
|
+
* on the existing CLI parsers, not a new store. See docs/cursor-capture-discovery.md.
|
|
30
|
+
*/
|
|
31
|
+
import type { ParsedConversation } from "../parsers/types.js";
|
|
32
|
+
import { type ProjectRootResolution } from "./workspace-root.js";
|
|
33
|
+
/** A single key/value row, value still a raw JSON string (BLOB/TEXT). */
|
|
34
|
+
export interface CursorKvRow {
|
|
35
|
+
key: string;
|
|
36
|
+
value: string;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* The minimal raw input the discovery layer needs. The reader pulls only these
|
|
40
|
+
* two namespaces and deliberately never selects `cursorAuth/*` (tokens) or
|
|
41
|
+
* `secret://*` (MCP OAuth secrets).
|
|
42
|
+
*/
|
|
43
|
+
export interface CursorRawRows {
|
|
44
|
+
/** Rows from the `ItemTable` table. */
|
|
45
|
+
itemTable: CursorKvRow[];
|
|
46
|
+
/** Rows from the `cursorDiskKV` table. */
|
|
47
|
+
cursorDiskKV: CursorKvRow[];
|
|
48
|
+
}
|
|
49
|
+
/** Coarse classification of a native Cursor session by selected model. */
|
|
50
|
+
export type CursorBackend = "cursor_native" | "anthropic" | "openai" | "google" | "default" | "unknown";
|
|
51
|
+
/**
|
|
52
|
+
* A discovered native Cursor session. Metadata only — by design this carries NO
|
|
53
|
+
* message text so discovery can run without ever touching private content.
|
|
54
|
+
*/
|
|
55
|
+
export interface CursorSessionCandidate {
|
|
56
|
+
/** Stable session id = Cursor's composerId (a UUID). */
|
|
57
|
+
composerId: string;
|
|
58
|
+
/** User-given session name, if any (semantically a title; may be private). */
|
|
59
|
+
name: string | null;
|
|
60
|
+
/** "agent" | "chat" | other; null if absent. */
|
|
61
|
+
mode: string | null;
|
|
62
|
+
/** Cursor's own model label, e.g. "composer-2.5" or "default". */
|
|
63
|
+
modelName: string | null;
|
|
64
|
+
/** Coarse backend inferred from {@link modelName}. */
|
|
65
|
+
backend: CursorBackend;
|
|
66
|
+
/** Workspace identifier from the header (folder window id or "empty-window"). */
|
|
67
|
+
workspaceId: string | null;
|
|
68
|
+
/**
|
|
69
|
+
* Resolved filesystem project root, or null when not attributable (empty
|
|
70
|
+
* window, multi-root, remote, or unresolved). Resolved here from the inline
|
|
71
|
+
* header uri only (no fs); the probe upgrades legacy `unresolved` rows via the
|
|
72
|
+
* on-disk workspace.json join. See {@link projectRootStatus}.
|
|
73
|
+
*/
|
|
74
|
+
projectRoot: string | null;
|
|
75
|
+
/** Which link produced {@link projectRoot}, or null when unresolved. */
|
|
76
|
+
projectRootSource: "header_uri" | "workspace_json" | null;
|
|
77
|
+
/** First-class resolution outcome — capture scopes to a root ONLY on "resolved". */
|
|
78
|
+
projectRootStatus: ProjectRootResolution["status"];
|
|
79
|
+
/** ISO 8601 creation time (from epoch ms), or null if unknown. */
|
|
80
|
+
createdAt: string | null;
|
|
81
|
+
/** ISO 8601 last-update time (from epoch ms), or null if unknown. */
|
|
82
|
+
updatedAt: string | null;
|
|
83
|
+
/** Number of conversation turns (bubbles) referenced by the session. */
|
|
84
|
+
turnCount: number;
|
|
85
|
+
/** True if Cursor flagged this as an unsent draft (skip on capture). */
|
|
86
|
+
isDraft: boolean;
|
|
87
|
+
/** True when a composerData body row exists for this id (parseable later). */
|
|
88
|
+
hasBody: boolean;
|
|
89
|
+
}
|
|
90
|
+
/** Proposed provenance row for the future capture slice (NOT persisted here). */
|
|
91
|
+
export interface ProposedCursorProvenance {
|
|
92
|
+
/** New ConversationSource variant proposed for native Cursor sessions. */
|
|
93
|
+
source: "cursor_composer";
|
|
94
|
+
/** Originating application, distinguishes from web/CLI captures. */
|
|
95
|
+
source_app: "cursor";
|
|
96
|
+
/** Stable upstream id. */
|
|
97
|
+
source_session_id: string;
|
|
98
|
+
/** Stable idempotency key — see {@link cursorSessionKey}. */
|
|
99
|
+
idempotency_key: string;
|
|
100
|
+
/** Cursor workspace id (workspaceStorage hash or "empty-window"). */
|
|
101
|
+
workspace_id: string | null;
|
|
102
|
+
/** Resolved filesystem project root, or null when not attributable. */
|
|
103
|
+
project_root: string | null;
|
|
104
|
+
/** Provenance of {@link project_root}: which link resolved it (or null). */
|
|
105
|
+
project_root_source: "header_uri" | "workspace_json" | null;
|
|
106
|
+
/** Model label as recorded by Cursor. */
|
|
107
|
+
model_name: string | null;
|
|
108
|
+
backend: CursorBackend;
|
|
109
|
+
mode: string | null;
|
|
110
|
+
created_at: string | null;
|
|
111
|
+
updated_at: string | null;
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* Map a Cursor model label to a coarse backend. Cursor records its OWN labels
|
|
115
|
+
* (e.g. "composer-2.5"), not raw provider ids, and very often "default" (the
|
|
116
|
+
* account-level pick), which is NOT resolvable from local artifacts. This is a
|
|
117
|
+
* best-effort heuristic — see "Unknowns" in the discovery doc.
|
|
118
|
+
*/
|
|
119
|
+
export declare function classifyBackend(modelName: string | null | undefined): CursorBackend;
|
|
120
|
+
/**
|
|
121
|
+
* Stable, deterministic session key for idempotent capture. composerId is a
|
|
122
|
+
* UUID minted once per session by Cursor and never reused, so it is the natural
|
|
123
|
+
* idempotency anchor. Prefixed by source so it can't collide with CLI keys.
|
|
124
|
+
*
|
|
125
|
+
* Change detection across re-captures is a SEPARATE concern: pair this key with
|
|
126
|
+
* a content fingerprint (see {@link cursorConversationFingerprintInput}) and the
|
|
127
|
+
* session's lastUpdatedAt, exactly as the CLI lanes do. Designing this now
|
|
128
|
+
* satisfies the "stable key before any scheduled capture" requirement.
|
|
129
|
+
*/
|
|
130
|
+
export declare function cursorSessionKey(composerId: string): string;
|
|
131
|
+
/**
|
|
132
|
+
* Discover native Cursor composer/chat sessions from raw vscdb rows.
|
|
133
|
+
*
|
|
134
|
+
* Pure and side-effect free. Uses ONLY the session index + per-session metadata
|
|
135
|
+
* rows — never bubble bodies — so discovery reveals no private message text.
|
|
136
|
+
* Sessions are returned newest-updated first.
|
|
137
|
+
*/
|
|
138
|
+
export declare function discoverCursorComposerSessions(raw: CursorRawRows): CursorSessionCandidate[];
|
|
139
|
+
/** Build the proposed provenance row for a candidate (design artifact only). */
|
|
140
|
+
export declare function proposeCursorProvenance(candidate: CursorSessionCandidate): ProposedCursorProvenance;
|
|
141
|
+
/** One ordered conversation turn as stored, before any role/prose filtering. */
|
|
142
|
+
export interface CursorOrderedBubble {
|
|
143
|
+
bubbleId: string;
|
|
144
|
+
/** Raw bubble type (1 = user, 2 = assistant, other = tool/system). */
|
|
145
|
+
type: number;
|
|
146
|
+
/** Raw bubble text (may be empty for tool-only/assistant bubbles). */
|
|
147
|
+
text: string;
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* Collect a session's bubbles in conversation order. Order comes from the
|
|
151
|
+
* session body's `fullConversationHeadersOnly` when present; otherwise we fall
|
|
152
|
+
* back to whatever bubble rows exist for this composer, in key order. Returns
|
|
153
|
+
* EVERY referenced bubble (including empty/tool-only ones) so callers — notably
|
|
154
|
+
* the fingerprint — can see structural changes that carry no prose.
|
|
155
|
+
*/
|
|
156
|
+
export declare function collectOrderedBubbles(raw: CursorRawRows, composerId: string): CursorOrderedBubble[];
|
|
157
|
+
/**
|
|
158
|
+
* Deterministic fingerprint input for a session, over the ordered
|
|
159
|
+
* `(bubbleId, type, text)` of every turn. This is the change-detection contract
|
|
160
|
+
* the capture slice will hash (NOT hashed here — the hashing util stays in one
|
|
161
|
+
* place): re-running on an unchanged session yields an identical string, while
|
|
162
|
+
* an edited/added/removed turn — including a tool-only turn that carries no
|
|
163
|
+
* prose — changes it. Requires bubble bodies to be present in `raw`.
|
|
164
|
+
*/
|
|
165
|
+
export declare function cursorConversationFingerprintInput(raw: CursorRawRows, composerId: string): string;
|
|
166
|
+
/**
|
|
167
|
+
* Full parse of a single native Cursor session into the shared
|
|
168
|
+
* {@link ParsedConversation} contract. Requires the session's bubble bodies to
|
|
169
|
+
* be present in `raw.cursorDiskKV` (the reader only fetches these on explicit,
|
|
170
|
+
* privacy-acknowledged opt-in). Returns null when no user/assistant turns with
|
|
171
|
+
* text exist.
|
|
172
|
+
*
|
|
173
|
+
* Provided for the FUTURE capture slice and for round-trip tests; the discovery
|
|
174
|
+
* probe does not call it by default.
|
|
175
|
+
*/
|
|
176
|
+
export declare function parseCursorComposerSession(raw: CursorRawRows, composerId: string): ParsedConversation | null;
|
|
177
|
+
//# sourceMappingURL=cursor-store.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cursor-store.d.ts","sourceRoot":"","sources":["../../../../src/ingestion/cursor/cursor-store.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AACH,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,qBAAqB,CAAC;AAC9D,OAAO,EAGL,KAAK,qBAAqB,EAC3B,MAAM,qBAAqB,CAAC;AAM7B,yEAAyE;AACzE,MAAM,WAAW,WAAW;IAC1B,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,MAAM,CAAC;CACf;AAED;;;;GAIG;AACH,MAAM,WAAW,aAAa;IAC5B,uCAAuC;IACvC,SAAS,EAAE,WAAW,EAAE,CAAC;IACzB,0CAA0C;IAC1C,YAAY,EAAE,WAAW,EAAE,CAAC;CAC7B;AAyDD,0EAA0E;AAC1E,MAAM,MAAM,aAAa,GACrB,eAAe,GACf,WAAW,GACX,QAAQ,GACR,QAAQ,GACR,SAAS,GACT,SAAS,CAAC;AAEd;;;GAGG;AACH,MAAM,WAAW,sBAAsB;IACrC,wDAAwD;IACxD,UAAU,EAAE,MAAM,CAAC;IACnB,8EAA8E;IAC9E,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACpB,gDAAgD;IAChD,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACpB,kEAAkE;IAClE,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,sDAAsD;IACtD,OAAO,EAAE,aAAa,CAAC;IACvB,iFAAiF;IACjF,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B;;;;;OAKG;IACH,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,wEAAwE;IACxE,iBAAiB,EAAE,YAAY,GAAG,gBAAgB,GAAG,IAAI,CAAC;IAC1D,oFAAoF;IACpF,iBAAiB,EAAE,qBAAqB,CAAC,QAAQ,CAAC,CAAC;IACnD,kEAAkE;IAClE,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,qEAAqE;IACrE,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,wEAAwE;IACxE,SAAS,EAAE,MAAM,CAAC;IAClB,wEAAwE;IACxE,OAAO,EAAE,OAAO,CAAC;IACjB,8EAA8E;IAC9E,OAAO,EAAE,OAAO,CAAC;CAClB;AAED,iFAAiF;AACjF,MAAM,WAAW,wBAAwB;IACvC,0EAA0E;IAC1E,MAAM,EAAE,iBAAiB,CAAC;IAC1B,oEAAoE;IACpE,UAAU,EAAE,QAAQ,CAAC;IACrB,0BAA0B;IAC1B,iBAAiB,EAAE,MAAM,CAAC;IAC1B,6DAA6D;IAC7D,eAAe,EAAE,MAAM,CAAC;IACxB,qEAAqE;IACrE,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,uEAAuE;IACvE,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,4EAA4E;IAC5E,mBAAmB,EAAE,YAAY,GAAG,gBAAgB,GAAG,IAAI,CAAC;IAC5D,yCAAyC;IACzC,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,OAAO,EAAE,aAAa,CAAC;IACvB,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACpB,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;CAC3B;AA4BD;;;;;GAKG;AACH,wBAAgB,eAAe,CAAC,SAAS,EAAE,MAAM,GAAG,IAAI,GAAG,SAAS,GAAG,aAAa,CAanF;AAED;;;;;;;;;GASG;AACH,wBAAgB,gBAAgB,CAAC,UAAU,EAAE,MAAM,GAAG,MAAM,CAE3D;AAED;;;;;;GAMG;AACH,wBAAgB,8BAA8B,CAC5C,GAAG,EAAE,aAAa,GACjB,sBAAsB,EAAE,CAwE1B;AAED,gFAAgF;AAChF,wBAAgB,uBAAuB,CACrC,SAAS,EAAE,sBAAsB,GAChC,wBAAwB,CAe1B;AAED,gFAAgF;AAChF,MAAM,WAAW,mBAAmB;IAClC,QAAQ,EAAE,MAAM,CAAC;IACjB,sEAAsE;IACtE,IAAI,EAAE,MAAM,CAAC;IACb,sEAAsE;IACtE,IAAI,EAAE,MAAM,CAAC;CACd;AAED;;;;;;GAMG;AACH,wBAAgB,qBAAqB,CACnC,GAAG,EAAE,aAAa,EAClB,UAAU,EAAE,MAAM,GACjB,mBAAmB,EAAE,CA+BvB;AAED;;;;;;;GAOG;AACH,wBAAgB,kCAAkC,CAChD,GAAG,EAAE,aAAa,EAClB,UAAU,EAAE,MAAM,GACjB,MAAM,CAIR;AAED;;;;;;;;;GASG;AACH,wBAAgB,0BAA0B,CACxC,GAAG,EAAE,aAAa,EAClB,UAAU,EAAE,MAAM,GACjB,kBAAkB,GAAG,IAAI,CA6B3B"}
|