@getrift/rift 0.1.0-beta.21 → 0.1.0-beta.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -3
- package/dist/src/capture/auto-capture.d.ts +105 -4
- package/dist/src/capture/auto-capture.d.ts.map +1 -1
- package/dist/src/capture/auto-capture.js +313 -34
- package/dist/src/capture/auto-capture.js.map +1 -1
- package/dist/src/capture/claude-cli-triage-provider.d.ts +28 -0
- package/dist/src/capture/claude-cli-triage-provider.d.ts.map +1 -0
- package/dist/src/capture/claude-cli-triage-provider.js +88 -0
- package/dist/src/capture/claude-cli-triage-provider.js.map +1 -0
- package/dist/src/capture/codex-cli-triage-provider.d.ts.map +1 -1
- package/dist/src/capture/codex-cli-triage-provider.js +1 -33
- package/dist/src/capture/codex-cli-triage-provider.js.map +1 -1
- package/dist/src/capture/cursor-capture.d.ts +89 -0
- package/dist/src/capture/cursor-capture.d.ts.map +1 -0
- package/dist/src/capture/cursor-capture.js +121 -0
- package/dist/src/capture/cursor-capture.js.map +1 -0
- package/dist/src/capture/observability.d.ts +30 -0
- package/dist/src/capture/observability.d.ts.map +1 -1
- package/dist/src/capture/observability.js +29 -0
- package/dist/src/capture/observability.js.map +1 -1
- package/dist/src/capture/recover-quarantine.d.ts +4 -4
- package/dist/src/capture/sources.d.ts +41 -3
- package/dist/src/capture/sources.d.ts.map +1 -1
- package/dist/src/capture/sources.js +43 -1
- package/dist/src/capture/sources.js.map +1 -1
- package/dist/src/capture/triage-classification.d.ts +69 -0
- package/dist/src/capture/triage-classification.d.ts.map +1 -0
- package/dist/src/capture/triage-classification.js +62 -0
- package/dist/src/capture/triage-classification.js.map +1 -0
- package/dist/src/capture/triage-provider-factory.d.ts +36 -0
- package/dist/src/capture/triage-provider-factory.d.ts.map +1 -0
- package/dist/src/capture/triage-provider-factory.js +55 -0
- package/dist/src/capture/triage-provider-factory.js.map +1 -0
- package/dist/src/capture/triage.d.ts +1 -1
- package/dist/src/capture/triage.d.ts.map +1 -1
- package/dist/src/capture/triage.js +8 -6
- package/dist/src/capture/triage.js.map +1 -1
- package/dist/src/cli/commands/capture.d.ts.map +1 -1
- package/dist/src/cli/commands/capture.js +79 -17
- package/dist/src/cli/commands/capture.js.map +1 -1
- package/dist/src/cli/commands/chunk-backfill.d.ts +13 -0
- package/dist/src/cli/commands/chunk-backfill.d.ts.map +1 -0
- package/dist/src/cli/commands/chunk-backfill.js +157 -0
- package/dist/src/cli/commands/chunk-backfill.js.map +1 -0
- package/dist/src/cli/commands/cursor-probe.d.ts +20 -0
- package/dist/src/cli/commands/cursor-probe.d.ts.map +1 -0
- package/dist/src/cli/commands/cursor-probe.js +162 -0
- package/dist/src/cli/commands/cursor-probe.js.map +1 -0
- package/dist/src/cli/commands/menubar.d.ts +3 -1
- package/dist/src/cli/commands/menubar.d.ts.map +1 -1
- package/dist/src/cli/commands/menubar.js +36 -12
- package/dist/src/cli/commands/menubar.js.map +1 -1
- package/dist/src/cli/commands/onboard.d.ts +22 -2
- package/dist/src/cli/commands/onboard.d.ts.map +1 -1
- package/dist/src/cli/commands/onboard.js +160 -32
- package/dist/src/cli/commands/onboard.js.map +1 -1
- package/dist/src/cli/commands/status.d.ts.map +1 -1
- package/dist/src/cli/commands/status.js +12 -0
- package/dist/src/cli/commands/status.js.map +1 -1
- package/dist/src/cli/commands/update.d.ts +34 -1
- package/dist/src/cli/commands/update.d.ts.map +1 -1
- package/dist/src/cli/commands/update.js +166 -1
- package/dist/src/cli/commands/update.js.map +1 -1
- package/dist/src/cli/index.d.ts.map +1 -1
- package/dist/src/cli/index.js +4 -0
- package/dist/src/cli/index.js.map +1 -1
- package/dist/src/cli/postinstall-menubar.d.ts +20 -13
- package/dist/src/cli/postinstall-menubar.d.ts.map +1 -1
- package/dist/src/cli/postinstall-menubar.js +56 -1
- package/dist/src/cli/postinstall-menubar.js.map +1 -1
- package/dist/src/cli/status/friend-header.d.ts +16 -3
- package/dist/src/cli/status/friend-header.d.ts.map +1 -1
- package/dist/src/cli/status/friend-header.js +186 -10
- package/dist/src/cli/status/friend-header.js.map +1 -1
- package/dist/src/cli/status/local-signals.d.ts +42 -4
- package/dist/src/cli/status/local-signals.d.ts.map +1 -1
- package/dist/src/cli/status/local-signals.js +52 -1
- package/dist/src/cli/status/local-signals.js.map +1 -1
- package/dist/src/config/schema.d.ts +220 -14
- package/dist/src/config/schema.d.ts.map +1 -1
- package/dist/src/config/schema.js +82 -7
- package/dist/src/config/schema.js.map +1 -1
- package/dist/src/diagnostics/claude-preflight.d.ts +35 -0
- package/dist/src/diagnostics/claude-preflight.d.ts.map +1 -0
- package/dist/src/diagnostics/claude-preflight.js +90 -0
- package/dist/src/diagnostics/claude-preflight.js.map +1 -0
- package/dist/src/diagnostics/codex-preflight.d.ts +1 -1
- package/dist/src/diagnostics/codex-preflight.d.ts.map +1 -1
- package/dist/src/diagnostics/codex-preflight.js +24 -0
- package/dist/src/diagnostics/codex-preflight.js.map +1 -1
- package/dist/src/diagnostics/doctor.d.ts +7 -4
- package/dist/src/diagnostics/doctor.d.ts.map +1 -1
- package/dist/src/diagnostics/doctor.js +70 -11
- package/dist/src/diagnostics/doctor.js.map +1 -1
- package/dist/src/diagnostics/memory-coverage.d.ts +54 -0
- package/dist/src/diagnostics/memory-coverage.d.ts.map +1 -0
- package/dist/src/diagnostics/memory-coverage.js +272 -0
- package/dist/src/diagnostics/memory-coverage.js.map +1 -0
- package/dist/src/diagnostics/notify.d.ts +20 -3
- package/dist/src/diagnostics/notify.d.ts.map +1 -1
- package/dist/src/diagnostics/notify.js +54 -14
- package/dist/src/diagnostics/notify.js.map +1 -1
- package/dist/src/ingestion/chunk-meta.d.ts +85 -0
- package/dist/src/ingestion/chunk-meta.d.ts.map +1 -0
- package/dist/src/ingestion/chunk-meta.js +167 -0
- package/dist/src/ingestion/chunk-meta.js.map +1 -0
- package/dist/src/ingestion/chunk-text.d.ts +39 -0
- package/dist/src/ingestion/chunk-text.d.ts.map +1 -0
- package/dist/src/ingestion/chunk-text.js +114 -0
- package/dist/src/ingestion/chunk-text.js.map +1 -0
- package/dist/src/ingestion/cursor/cursor-store.d.ts +177 -0
- package/dist/src/ingestion/cursor/cursor-store.d.ts.map +1 -0
- package/dist/src/ingestion/cursor/cursor-store.js +243 -0
- package/dist/src/ingestion/cursor/cursor-store.js.map +1 -0
- package/dist/src/ingestion/cursor/enrich-roots.d.ts +16 -0
- package/dist/src/ingestion/cursor/enrich-roots.d.ts.map +1 -0
- package/dist/src/ingestion/cursor/enrich-roots.js +22 -0
- package/dist/src/ingestion/cursor/enrich-roots.js.map +1 -0
- package/dist/src/ingestion/cursor/vscdb-reader.d.ts +32 -0
- package/dist/src/ingestion/cursor/vscdb-reader.d.ts.map +1 -0
- package/dist/src/ingestion/cursor/vscdb-reader.js +113 -0
- package/dist/src/ingestion/cursor/vscdb-reader.js.map +1 -0
- package/dist/src/ingestion/cursor/workspace-root.d.ts +96 -0
- package/dist/src/ingestion/cursor/workspace-root.d.ts.map +1 -0
- package/dist/src/ingestion/cursor/workspace-root.js +187 -0
- package/dist/src/ingestion/cursor/workspace-root.js.map +1 -0
- package/dist/src/ingestion/indexer.d.ts.map +1 -1
- package/dist/src/ingestion/indexer.js +41 -32
- package/dist/src/ingestion/indexer.js.map +1 -1
- package/dist/src/jobs/handlers/compact.d.ts.map +1 -1
- package/dist/src/jobs/handlers/compact.js +9 -4
- package/dist/src/jobs/handlers/compact.js.map +1 -1
- package/dist/src/jobs/handlers/ingest.d.ts.map +1 -1
- package/dist/src/jobs/handlers/ingest.js +60 -30
- package/dist/src/jobs/handlers/ingest.js.map +1 -1
- package/dist/src/jobs/handlers/reconcile.d.ts.map +1 -1
- package/dist/src/jobs/handlers/reconcile.js +128 -45
- package/dist/src/jobs/handlers/reconcile.js.map +1 -1
- package/dist/src/jobs/handlers/save.d.ts.map +1 -1
- package/dist/src/jobs/handlers/save.js +122 -72
- package/dist/src/jobs/handlers/save.js.map +1 -1
- package/dist/src/jobs/types.d.ts +1 -1
- package/dist/src/main.js +27 -16
- package/dist/src/main.js.map +1 -1
- package/dist/src/mcp/capture-diagnostics.d.ts +51 -0
- package/dist/src/mcp/capture-diagnostics.d.ts.map +1 -0
- package/dist/src/mcp/capture-diagnostics.js +127 -0
- package/dist/src/mcp/capture-diagnostics.js.map +1 -0
- package/dist/src/mcp/memory-diagnostics.d.ts +6 -0
- package/dist/src/mcp/memory-diagnostics.d.ts.map +1 -0
- package/dist/src/mcp/memory-diagnostics.js +51 -0
- package/dist/src/mcp/memory-diagnostics.js.map +1 -0
- package/dist/src/mcp/server.d.ts.map +1 -1
- package/dist/src/mcp/server.js +10 -3
- package/dist/src/mcp/server.js.map +1 -1
- package/dist/src/mcp/tools/context-pack.d.ts.map +1 -1
- package/dist/src/mcp/tools/context-pack.js +7 -1
- package/dist/src/mcp/tools/context-pack.js.map +1 -1
- package/dist/src/mcp/tools/conversations-search.d.ts +1 -1
- package/dist/src/mcp/tools/conversations-search.d.ts.map +1 -1
- package/dist/src/mcp/tools/conversations-search.js +7 -1
- package/dist/src/mcp/tools/conversations-search.js.map +1 -1
- package/dist/src/mcp/tools/evidence-feedback.d.ts +60 -0
- package/dist/src/mcp/tools/evidence-feedback.d.ts.map +1 -0
- package/dist/src/mcp/tools/evidence-feedback.js +62 -0
- package/dist/src/mcp/tools/evidence-feedback.js.map +1 -0
- package/dist/src/mcp/tools/log-outcome.d.ts +72 -0
- package/dist/src/mcp/tools/log-outcome.d.ts.map +1 -0
- package/dist/src/mcp/tools/log-outcome.js +59 -0
- package/dist/src/mcp/tools/log-outcome.js.map +1 -0
- package/dist/src/mcp/tools/open-evidence.d.ts +37 -0
- package/dist/src/mcp/tools/open-evidence.d.ts.map +1 -0
- package/dist/src/mcp/tools/open-evidence.js +72 -0
- package/dist/src/mcp/tools/open-evidence.js.map +1 -0
- package/dist/src/mcp/tools/save.d.ts +7 -2
- package/dist/src/mcp/tools/save.d.ts.map +1 -1
- package/dist/src/mcp/tools/save.js +7 -2
- package/dist/src/mcp/tools/save.js.map +1 -1
- package/dist/src/mcp/tools/search.d.ts.map +1 -1
- package/dist/src/mcp/tools/search.js +7 -1
- package/dist/src/mcp/tools/search.js.map +1 -1
- package/dist/src/mcp/tools/status.d.ts +15 -1
- package/dist/src/mcp/tools/status.d.ts.map +1 -1
- package/dist/src/mcp/tools/status.js +53 -2
- package/dist/src/mcp/tools/status.js.map +1 -1
- package/dist/src/observability/retrieval-feedback.d.ts +82 -0
- package/dist/src/observability/retrieval-feedback.d.ts.map +1 -0
- package/dist/src/observability/retrieval-feedback.js +231 -0
- package/dist/src/observability/retrieval-feedback.js.map +1 -0
- package/dist/src/observability/rift-context.d.ts.map +1 -1
- package/dist/src/observability/rift-context.js +3 -0
- package/dist/src/observability/rift-context.js.map +1 -1
- package/dist/src/observability/tool-usage-stats.d.ts +13 -0
- package/dist/src/observability/tool-usage-stats.d.ts.map +1 -1
- package/dist/src/observability/tool-usage-stats.js +15 -0
- package/dist/src/observability/tool-usage-stats.js.map +1 -1
- package/dist/src/observability/tool-usage.d.ts +56 -0
- package/dist/src/observability/tool-usage.d.ts.map +1 -1
- package/dist/src/observability/tool-usage.js +86 -0
- package/dist/src/observability/tool-usage.js.map +1 -1
- package/dist/src/providers/claude-cli-metadata-extraction.d.ts +47 -0
- package/dist/src/providers/claude-cli-metadata-extraction.d.ts.map +1 -0
- package/dist/src/providers/claude-cli-metadata-extraction.js +120 -0
- package/dist/src/providers/claude-cli-metadata-extraction.js.map +1 -0
- package/dist/src/providers/claude-cli-runner.d.ts +92 -0
- package/dist/src/providers/claude-cli-runner.d.ts.map +1 -0
- package/dist/src/providers/claude-cli-runner.js +598 -0
- package/dist/src/providers/claude-cli-runner.js.map +1 -0
- package/dist/src/providers/codex-cli-metadata-extraction.d.ts.map +1 -1
- package/dist/src/providers/codex-cli-metadata-extraction.js +1 -40
- package/dist/src/providers/codex-cli-metadata-extraction.js.map +1 -1
- package/dist/src/providers/codex-cli-runner.d.ts +7 -0
- package/dist/src/providers/codex-cli-runner.d.ts.map +1 -1
- package/dist/src/providers/codex-cli-runner.js +131 -5
- package/dist/src/providers/codex-cli-runner.js.map +1 -1
- package/dist/src/providers/conversation-generation.d.ts +10 -0
- package/dist/src/providers/conversation-generation.d.ts.map +1 -1
- package/dist/src/providers/conversation-generation.js +54 -13
- package/dist/src/providers/conversation-generation.js.map +1 -1
- package/dist/src/providers/openai-metadata-extraction.d.ts +48 -1
- package/dist/src/providers/openai-metadata-extraction.d.ts.map +1 -1
- package/dist/src/providers/openai-metadata-extraction.js +51 -2
- package/dist/src/providers/openai-metadata-extraction.js.map +1 -1
- package/dist/src/providers/types.d.ts +1 -1
- package/dist/src/providers/types.d.ts.map +1 -1
- package/dist/src/providers/types.js +4 -0
- package/dist/src/providers/types.js.map +1 -1
- package/dist/src/retrieval/canonical-files.d.ts +48 -0
- package/dist/src/retrieval/canonical-files.d.ts.map +1 -0
- package/dist/src/retrieval/canonical-files.js +210 -0
- package/dist/src/retrieval/canonical-files.js.map +1 -0
- package/dist/src/retrieval/compact.d.ts +95 -0
- package/dist/src/retrieval/compact.d.ts.map +1 -1
- package/dist/src/retrieval/compact.js +254 -8
- package/dist/src/retrieval/compact.js.map +1 -1
- package/dist/src/retrieval/context-pack.d.ts.map +1 -1
- package/dist/src/retrieval/context-pack.js +65 -15
- package/dist/src/retrieval/context-pack.js.map +1 -1
- package/dist/src/retrieval/conversation-dedup.d.ts +40 -0
- package/dist/src/retrieval/conversation-dedup.d.ts.map +1 -0
- package/dist/src/retrieval/conversation-dedup.js +141 -0
- package/dist/src/retrieval/conversation-dedup.js.map +1 -0
- package/dist/src/retrieval/evidence-key.d.ts +48 -0
- package/dist/src/retrieval/evidence-key.d.ts.map +1 -0
- package/dist/src/retrieval/evidence-key.js +131 -0
- package/dist/src/retrieval/evidence-key.js.map +1 -0
- package/dist/src/retrieval/feedback-ranking.d.ts +49 -0
- package/dist/src/retrieval/feedback-ranking.d.ts.map +1 -0
- package/dist/src/retrieval/feedback-ranking.js +138 -0
- package/dist/src/retrieval/feedback-ranking.js.map +1 -0
- package/dist/src/retrieval/git-state.d.ts +9 -0
- package/dist/src/retrieval/git-state.d.ts.map +1 -1
- package/dist/src/retrieval/git-state.js +18 -0
- package/dist/src/retrieval/git-state.js.map +1 -1
- package/dist/src/retrieval/group-by-parent.d.ts +38 -0
- package/dist/src/retrieval/group-by-parent.d.ts.map +1 -0
- package/dist/src/retrieval/group-by-parent.js +40 -0
- package/dist/src/retrieval/group-by-parent.js.map +1 -0
- package/dist/src/retrieval/lexical.d.ts.map +1 -1
- package/dist/src/retrieval/lexical.js +1 -3
- package/dist/src/retrieval/lexical.js.map +1 -1
- package/dist/src/retrieval/receipt.d.ts +57 -0
- package/dist/src/retrieval/receipt.d.ts.map +1 -0
- package/dist/src/retrieval/receipt.js +119 -0
- package/dist/src/retrieval/receipt.js.map +1 -0
- package/dist/src/retrieval/reranker.d.ts +49 -2
- package/dist/src/retrieval/reranker.d.ts.map +1 -1
- package/dist/src/retrieval/reranker.js +64 -4
- package/dist/src/retrieval/reranker.js.map +1 -1
- package/dist/src/retrieval/stitch-chunks.d.ts +73 -0
- package/dist/src/retrieval/stitch-chunks.d.ts.map +1 -0
- package/dist/src/retrieval/stitch-chunks.js +106 -0
- package/dist/src/retrieval/stitch-chunks.js.map +1 -0
- package/dist/src/server/app.d.ts +1 -1
- package/dist/src/server/app.d.ts.map +1 -1
- package/dist/src/server/app.js +20 -3
- package/dist/src/server/app.js.map +1 -1
- package/dist/src/server/routes/conversations-search.d.ts.map +1 -1
- package/dist/src/server/routes/conversations-search.js +22 -3
- package/dist/src/server/routes/conversations-search.js.map +1 -1
- package/dist/src/server/routes/friend-status.d.ts +64 -6
- package/dist/src/server/routes/friend-status.d.ts.map +1 -1
- package/dist/src/server/routes/friend-status.js +114 -18
- package/dist/src/server/routes/friend-status.js.map +1 -1
- package/dist/src/server/routes/mcp-usage.d.ts +9 -6
- package/dist/src/server/routes/mcp-usage.d.ts.map +1 -1
- package/dist/src/server/routes/mcp-usage.js.map +1 -1
- package/dist/src/server/routes/retrieval-feedback.d.ts +3 -0
- package/dist/src/server/routes/retrieval-feedback.d.ts.map +1 -0
- package/dist/src/server/routes/retrieval-feedback.js +290 -0
- package/dist/src/server/routes/retrieval-feedback.js.map +1 -0
- package/dist/src/server/routes/save.d.ts +3 -3
- package/dist/src/server/routes/save.d.ts.map +1 -1
- package/dist/src/server/routes/save.js +6 -2
- package/dist/src/server/routes/save.js.map +1 -1
- package/dist/src/server/routes/search.d.ts +1 -1
- package/dist/src/server/routes/search.d.ts.map +1 -1
- package/dist/src/server/routes/search.js +55 -8
- package/dist/src/server/routes/search.js.map +1 -1
- package/dist/src/server/serving-marker.d.ts +85 -0
- package/dist/src/server/serving-marker.d.ts.map +1 -0
- package/dist/src/server/serving-marker.js +226 -0
- package/dist/src/server/serving-marker.js.map +1 -0
- package/dist/src/storage/chunk-backfill.d.ts +39 -0
- package/dist/src/storage/chunk-backfill.d.ts.map +1 -0
- package/dist/src/storage/chunk-backfill.js +295 -0
- package/dist/src/storage/chunk-backfill.js.map +1 -0
- package/dist/src/storage/filter.d.ts +42 -0
- package/dist/src/storage/filter.d.ts.map +1 -0
- package/dist/src/storage/filter.js +70 -0
- package/dist/src/storage/filter.js.map +1 -0
- package/dist/src/storage/rebuild.d.ts.map +1 -1
- package/dist/src/storage/rebuild.js +44 -27
- package/dist/src/storage/rebuild.js.map +1 -1
- package/dist/src/storage/tables.d.ts +41 -0
- package/dist/src/storage/tables.d.ts.map +1 -1
- package/dist/src/storage/tables.js +64 -1
- package/dist/src/storage/tables.js.map +1 -1
- package/operator/swiftbar/render-menu.py +60 -18
- package/package.json +6 -4
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Explicit capability flags. Capability booleans — not a commit/version string —
|
|
3
|
+
* are the contract: `package.json` still reads `beta.21` on chunk-aware `main`,
|
|
4
|
+
* so a version string cannot distinguish builds. Each build declares what IT can
|
|
5
|
+
* do; a future build that drops a capability flips the flag rather than relying
|
|
6
|
+
* on commit-ancestry math.
|
|
7
|
+
*/
|
|
8
|
+
export interface ServingCapabilities {
|
|
9
|
+
/** The conversation tables carry `parent_id`/`chunk_index`/`chunk_count`. */
|
|
10
|
+
conversation_chunk_columns: boolean;
|
|
11
|
+
/** Retrieval reassembles chunk sets on the expand path (`detail="full"`). */
|
|
12
|
+
expand_stitching: boolean;
|
|
13
|
+
/** Safe for `chunk-backfill` to write a chunk-column conversation set here. */
|
|
14
|
+
chunk_backfill_write_compatible: boolean;
|
|
15
|
+
}
|
|
16
|
+
export interface ServingMarker {
|
|
17
|
+
version: string;
|
|
18
|
+
commit: string;
|
|
19
|
+
booted_at: string;
|
|
20
|
+
capabilities: ServingCapabilities;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Capabilities of THIS build. All true: this code defines the chunk columns in
|
|
24
|
+
* the conversation seed/migration, reassembles chunk sets on expand, and writes
|
|
25
|
+
* chunk-column sets coherently. A future build that regresses any of these must
|
|
26
|
+
* set the corresponding flag false.
|
|
27
|
+
*/
|
|
28
|
+
export declare const CURRENT_SERVING_CAPABILITIES: ServingCapabilities;
|
|
29
|
+
/** Path of the marker within a data dir (under `observability/`, daemon-owned). */
|
|
30
|
+
export declare function servingMarkerPath(dataDir: string): string;
|
|
31
|
+
/** Assemble the marker for this build at boot time. */
|
|
32
|
+
export declare function buildServingMarker(bootedAt: string): ServingMarker;
|
|
33
|
+
/**
|
|
34
|
+
* Stamp the serving-build marker into the data dir. Atomic (write-temp +
|
|
35
|
+
* rename) so a crash mid-write never leaves a half-written marker that would
|
|
36
|
+
* read as malformed → refused. Best-effort: any failure is logged and
|
|
37
|
+
* swallowed so it can never break daemon boot.
|
|
38
|
+
*
|
|
39
|
+
* MUST be called only from the serving daemon boot path, never from a CLI tool.
|
|
40
|
+
*/
|
|
41
|
+
export declare function writeServingMarker(dataDir: string, bootedAt?: string): void;
|
|
42
|
+
/** Read the marker. Returns null if absent, unreadable, or malformed. */
|
|
43
|
+
export declare function readServingMarker(dataDir: string): ServingMarker | null;
|
|
44
|
+
export interface CompatibilityVerdict {
|
|
45
|
+
ok: boolean;
|
|
46
|
+
/** Operator-facing reason when `ok` is false. */
|
|
47
|
+
reason: string;
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Maximum age of a serving-build marker the write guard will trust (24h). The
|
|
51
|
+
* marker certifies a RECENT boot, not an ancient one — see the downgrade note in
|
|
52
|
+
* the file header. The backfill tool requires the daemon stopped, so the owner
|
|
53
|
+
* naturally does a boot→stop cycle right before backfilling; that boot re-stamps
|
|
54
|
+
* the marker fresh. A window of a day gives slack ("booted this morning, backfill
|
|
55
|
+
* this evening") while still rejecting a months-old, possibly downgrade-era
|
|
56
|
+
* marker. Small future clock skew is tolerated up to {@link MAX_FUTURE_SKEW_MS}.
|
|
57
|
+
*/
|
|
58
|
+
export declare const MAX_MARKER_AGE_MS: number;
|
|
59
|
+
/** Clock-skew slack for a marker dated slightly in the future (5 min). */
|
|
60
|
+
export declare const MAX_FUTURE_SKEW_MS: number;
|
|
61
|
+
/**
|
|
62
|
+
* Decide whether a destructive `chunk-backfill` WRITE is safe against the
|
|
63
|
+
* install that serves `dataDir`. Refuses unless the serving daemon has stamped
|
|
64
|
+
* a well-formed, RECENT marker in which EVERY capability in
|
|
65
|
+
* {@link REQUIRED_WRITE_CAPABILITIES} is true.
|
|
66
|
+
*
|
|
67
|
+
* Refusal cases:
|
|
68
|
+
* - Absent/malformed marker: a pre-chunk daemon never writes one, so we cannot
|
|
69
|
+
* prove the install that restarts after the write can serve chunk rows.
|
|
70
|
+
* - Unparseable `booted_at`: a marker without a real boot time cannot prove
|
|
71
|
+
* freshness, so it is treated as malformed.
|
|
72
|
+
* - Missing capability: the serving build self-declares it cannot serve a
|
|
73
|
+
* chunk-column set.
|
|
74
|
+
* - Stale `booted_at` (older than `maxAgeMs`): proves only that SOME chunk-aware
|
|
75
|
+
* daemon booted this dir long ago, not that the CURRENT install is chunk-aware
|
|
76
|
+
* (closes the silent-downgrade gap — see file header).
|
|
77
|
+
*
|
|
78
|
+
* `now`/`maxAgeMs` are injectable for deterministic tests; production callers use
|
|
79
|
+
* the wall clock and {@link MAX_MARKER_AGE_MS}.
|
|
80
|
+
*/
|
|
81
|
+
export declare function assertChunkBackfillWriteCompatible(dataDir: string, opts?: {
|
|
82
|
+
now?: Date;
|
|
83
|
+
maxAgeMs?: number;
|
|
84
|
+
}): CompatibilityVerdict;
|
|
85
|
+
//# sourceMappingURL=serving-marker.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"serving-marker.d.ts","sourceRoot":"","sources":["../../../src/server/serving-marker.ts"],"names":[],"mappings":"AA8CA;;;;;;GAMG;AACH,MAAM,WAAW,mBAAmB;IAClC,6EAA6E;IAC7E,0BAA0B,EAAE,OAAO,CAAC;IACpC,6EAA6E;IAC7E,gBAAgB,EAAE,OAAO,CAAC;IAC1B,+EAA+E;IAC/E,+BAA+B,EAAE,OAAO,CAAC;CAC1C;AAED,MAAM,WAAW,aAAa;IAC5B,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,mBAAmB,CAAC;CACnC;AAED;;;;;GAKG;AACH,eAAO,MAAM,4BAA4B,EAAE,mBAI1C,CAAC;AAEF,mFAAmF;AACnF,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAEzD;AAED,uDAAuD;AACvD,wBAAgB,kBAAkB,CAAC,QAAQ,EAAE,MAAM,GAAG,aAAa,CAQlE;AAED;;;;;;;GAOG;AACH,wBAAgB,kBAAkB,CAChC,OAAO,EAAE,MAAM,EACf,QAAQ,GAAE,MAAiC,GAC1C,IAAI,CAcN;AAsBD,yEAAyE;AACzE,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,MAAM,GAAG,aAAa,GAAG,IAAI,CAQvE;AAED,MAAM,WAAW,oBAAoB;IACnC,EAAE,EAAE,OAAO,CAAC;IACZ,iDAAiD;IACjD,MAAM,EAAE,MAAM,CAAC;CAChB;AAgBD;;;;;;;;GAQG;AACH,eAAO,MAAM,iBAAiB,QAAsB,CAAC;AAErD,0EAA0E;AAC1E,eAAO,MAAM,kBAAkB,QAAgB,CAAC;AAEhD;;;;;;;;;;;;;;;;;;;GAmBG;AACH,wBAAgB,kCAAkC,CAChD,OAAO,EAAE,MAAM,EACf,IAAI,GAAE;IAAE,GAAG,CAAC,EAAE,IAAI,CAAC;IAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAO,GAC3C,oBAAoB,CAoEtB"}
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Serving-build capability marker.
|
|
3
|
+
*
|
|
4
|
+
* The serving daemon stamps a small JSON marker into the data dir on every
|
|
5
|
+
* boot, recording WHICH code last served this data dir and what that code is
|
|
6
|
+
* capable of. This is the contract that lets an OFFLINE tool — one that runs
|
|
7
|
+
* with the daemon stopped, so it cannot probe `GET /version` — decide whether a
|
|
8
|
+
* destructive, schema-touching operation is safe against the install that will
|
|
9
|
+
* restart and serve this dir.
|
|
10
|
+
*
|
|
11
|
+
* Why a capability marker and not the obvious alternatives:
|
|
12
|
+
* - On-disk table schema is the very thing that gets accidentally mutated
|
|
13
|
+
* (the chunk-backfill `--dry-run` incident added the chunk columns to a
|
|
14
|
+
* pre-chunk dir), so the schema cannot certify the serving code.
|
|
15
|
+
* - Observability events are an indirect history signal — they tell you what
|
|
16
|
+
* happened, not what the currently-installed code can do.
|
|
17
|
+
* - `RIFT_CHUNKING` gates whether NEW conversations are split, not whether the
|
|
18
|
+
* permanent chunk columns / expand read path exist in the serving build.
|
|
19
|
+
* Only a marker written BY the serving build answers the question we actually
|
|
20
|
+
* care about: "what code last served this dir, and can it handle a
|
|
21
|
+
* chunk-backfill write?".
|
|
22
|
+
*
|
|
23
|
+
* Discipline:
|
|
24
|
+
* - Written ONLY by the serving daemon (never by CLI tools). A CLI tool that
|
|
25
|
+
* stamped the marker would be certifying itself, defeating the purpose.
|
|
26
|
+
* - Best-effort and non-fatal — a failed marker write must never break boot.
|
|
27
|
+
* - Absent / unknown / malformed is treated by the write guard as "cannot
|
|
28
|
+
* confirm compatible → refuse". A pre-chunk daemon (e.g. beta.21) never
|
|
29
|
+
* writes the marker, so it always reads as absent → refused.
|
|
30
|
+
* - STALE markers are refused too (freshness window, see
|
|
31
|
+
* {@link MAX_MARKER_AGE_MS}). A marker proves "a chunk-aware daemon booted
|
|
32
|
+
* this dir RECENTLY", not "a chunk-aware daemon booted this dir once, ever".
|
|
33
|
+
* This closes the downgrade gap: if a chunk-aware daemon stamps a marker and
|
|
34
|
+
* a pre-chunk daemon (beta.21) later serves the same dir, beta.21 neither
|
|
35
|
+
* overwrites nor removes the stale chunk-aware marker — so without a
|
|
36
|
+
* freshness check the offline write guard would still pass against an install
|
|
37
|
+
* that can no longer serve chunk rows. Requiring a recent boot means the
|
|
38
|
+
* owner must (re)boot the chunk-aware daemon immediately before backfilling,
|
|
39
|
+
* which re-stamps the marker with the CURRENT install's real capabilities.
|
|
40
|
+
* Daemon DOWNGRADE (chunk-aware → pre-chunk) after a marker write is
|
|
41
|
+
* otherwise unsupported in this owner-only phase.
|
|
42
|
+
*/
|
|
43
|
+
import fs from "node:fs";
|
|
44
|
+
import path from "node:path";
|
|
45
|
+
import { getBuildInfo } from "./build-info.js";
|
|
46
|
+
/**
|
|
47
|
+
* Capabilities of THIS build. All true: this code defines the chunk columns in
|
|
48
|
+
* the conversation seed/migration, reassembles chunk sets on expand, and writes
|
|
49
|
+
* chunk-column sets coherently. A future build that regresses any of these must
|
|
50
|
+
* set the corresponding flag false.
|
|
51
|
+
*/
|
|
52
|
+
export const CURRENT_SERVING_CAPABILITIES = {
|
|
53
|
+
conversation_chunk_columns: true,
|
|
54
|
+
expand_stitching: true,
|
|
55
|
+
chunk_backfill_write_compatible: true,
|
|
56
|
+
};
|
|
57
|
+
/** Path of the marker within a data dir (under `observability/`, daemon-owned). */
|
|
58
|
+
export function servingMarkerPath(dataDir) {
|
|
59
|
+
return path.join(dataDir, "observability", "serving-build.json");
|
|
60
|
+
}
|
|
61
|
+
/** Assemble the marker for this build at boot time. */
|
|
62
|
+
export function buildServingMarker(bootedAt) {
|
|
63
|
+
const info = getBuildInfo();
|
|
64
|
+
return {
|
|
65
|
+
version: info.version,
|
|
66
|
+
commit: info.commit,
|
|
67
|
+
booted_at: bootedAt,
|
|
68
|
+
capabilities: { ...CURRENT_SERVING_CAPABILITIES },
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* Stamp the serving-build marker into the data dir. Atomic (write-temp +
|
|
73
|
+
* rename) so a crash mid-write never leaves a half-written marker that would
|
|
74
|
+
* read as malformed → refused. Best-effort: any failure is logged and
|
|
75
|
+
* swallowed so it can never break daemon boot.
|
|
76
|
+
*
|
|
77
|
+
* MUST be called only from the serving daemon boot path, never from a CLI tool.
|
|
78
|
+
*/
|
|
79
|
+
export function writeServingMarker(dataDir, bootedAt = new Date().toISOString()) {
|
|
80
|
+
try {
|
|
81
|
+
const target = servingMarkerPath(dataDir);
|
|
82
|
+
fs.mkdirSync(path.dirname(target), { recursive: true });
|
|
83
|
+
const tmp = `${target}.tmp-${process.pid}`;
|
|
84
|
+
fs.writeFileSync(tmp, `${JSON.stringify(buildServingMarker(bootedAt), null, 2)}\n`);
|
|
85
|
+
fs.renameSync(tmp, target);
|
|
86
|
+
}
|
|
87
|
+
catch (err) {
|
|
88
|
+
process.stderr.write(`serving-marker: failed to write (non-fatal): ${err instanceof Error ? err.message : String(err)}\n`);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
function isServingMarker(v) {
|
|
92
|
+
if (!v || typeof v !== "object")
|
|
93
|
+
return false;
|
|
94
|
+
const m = v;
|
|
95
|
+
if (typeof m.version !== "string" ||
|
|
96
|
+
typeof m.commit !== "string" ||
|
|
97
|
+
typeof m.booted_at !== "string") {
|
|
98
|
+
return false;
|
|
99
|
+
}
|
|
100
|
+
const c = m.capabilities;
|
|
101
|
+
if (!c || typeof c !== "object")
|
|
102
|
+
return false;
|
|
103
|
+
const caps = c;
|
|
104
|
+
return (typeof caps.conversation_chunk_columns === "boolean" &&
|
|
105
|
+
typeof caps.expand_stitching === "boolean" &&
|
|
106
|
+
typeof caps.chunk_backfill_write_compatible === "boolean");
|
|
107
|
+
}
|
|
108
|
+
/** Read the marker. Returns null if absent, unreadable, or malformed. */
|
|
109
|
+
export function readServingMarker(dataDir) {
|
|
110
|
+
try {
|
|
111
|
+
const raw = fs.readFileSync(servingMarkerPath(dataDir), "utf8");
|
|
112
|
+
const parsed = JSON.parse(raw);
|
|
113
|
+
return isServingMarker(parsed) ? parsed : null;
|
|
114
|
+
}
|
|
115
|
+
catch {
|
|
116
|
+
return null;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
/**
|
|
120
|
+
* Every capability a chunk-backfill WRITE depends on. The write doesn't just
|
|
121
|
+
* need a serving build that *tolerates* a chunk-column write — it needs one that
|
|
122
|
+
* has the columns (`conversation_chunk_columns`), reassembles the sets it writes
|
|
123
|
+
* on the read path (`expand_stitching`), AND self-certifies the write as safe
|
|
124
|
+
* (`chunk_backfill_write_compatible`). A build that regresses ANY of these would
|
|
125
|
+
* serve the backfilled corpus incorrectly, so all must be true.
|
|
126
|
+
*/
|
|
127
|
+
const REQUIRED_WRITE_CAPABILITIES = [
|
|
128
|
+
"conversation_chunk_columns",
|
|
129
|
+
"expand_stitching",
|
|
130
|
+
"chunk_backfill_write_compatible",
|
|
131
|
+
];
|
|
132
|
+
/**
|
|
133
|
+
* Maximum age of a serving-build marker the write guard will trust (24h). The
|
|
134
|
+
* marker certifies a RECENT boot, not an ancient one — see the downgrade note in
|
|
135
|
+
* the file header. The backfill tool requires the daemon stopped, so the owner
|
|
136
|
+
* naturally does a boot→stop cycle right before backfilling; that boot re-stamps
|
|
137
|
+
* the marker fresh. A window of a day gives slack ("booted this morning, backfill
|
|
138
|
+
* this evening") while still rejecting a months-old, possibly downgrade-era
|
|
139
|
+
* marker. Small future clock skew is tolerated up to {@link MAX_FUTURE_SKEW_MS}.
|
|
140
|
+
*/
|
|
141
|
+
export const MAX_MARKER_AGE_MS = 24 * 60 * 60 * 1000;
|
|
142
|
+
/** Clock-skew slack for a marker dated slightly in the future (5 min). */
|
|
143
|
+
export const MAX_FUTURE_SKEW_MS = 5 * 60 * 1000;
|
|
144
|
+
/**
|
|
145
|
+
* Decide whether a destructive `chunk-backfill` WRITE is safe against the
|
|
146
|
+
* install that serves `dataDir`. Refuses unless the serving daemon has stamped
|
|
147
|
+
* a well-formed, RECENT marker in which EVERY capability in
|
|
148
|
+
* {@link REQUIRED_WRITE_CAPABILITIES} is true.
|
|
149
|
+
*
|
|
150
|
+
* Refusal cases:
|
|
151
|
+
* - Absent/malformed marker: a pre-chunk daemon never writes one, so we cannot
|
|
152
|
+
* prove the install that restarts after the write can serve chunk rows.
|
|
153
|
+
* - Unparseable `booted_at`: a marker without a real boot time cannot prove
|
|
154
|
+
* freshness, so it is treated as malformed.
|
|
155
|
+
* - Missing capability: the serving build self-declares it cannot serve a
|
|
156
|
+
* chunk-column set.
|
|
157
|
+
* - Stale `booted_at` (older than `maxAgeMs`): proves only that SOME chunk-aware
|
|
158
|
+
* daemon booted this dir long ago, not that the CURRENT install is chunk-aware
|
|
159
|
+
* (closes the silent-downgrade gap — see file header).
|
|
160
|
+
*
|
|
161
|
+
* `now`/`maxAgeMs` are injectable for deterministic tests; production callers use
|
|
162
|
+
* the wall clock and {@link MAX_MARKER_AGE_MS}.
|
|
163
|
+
*/
|
|
164
|
+
export function assertChunkBackfillWriteCompatible(dataDir, opts = {}) {
|
|
165
|
+
const now = opts.now ?? new Date();
|
|
166
|
+
const maxAgeMs = opts.maxAgeMs ?? MAX_MARKER_AGE_MS;
|
|
167
|
+
const marker = readServingMarker(dataDir);
|
|
168
|
+
if (!marker) {
|
|
169
|
+
return {
|
|
170
|
+
ok: false,
|
|
171
|
+
reason: `No serving-build marker at ${servingMarkerPath(dataDir)}. Cannot ` +
|
|
172
|
+
`confirm the install that serves this data dir is chunk-aware — a ` +
|
|
173
|
+
`pre-chunk daemon (e.g. beta.21) never writes this marker. Boot the ` +
|
|
174
|
+
`chunk-aware serving daemon on this dir first (it stamps the marker on ` +
|
|
175
|
+
`startup), or — if this is a DISPOSABLE COPY, never your live data dir — ` +
|
|
176
|
+
`re-run with --allow-uncertified-copy to override.`,
|
|
177
|
+
};
|
|
178
|
+
}
|
|
179
|
+
const missing = REQUIRED_WRITE_CAPABILITIES.filter((cap) => !marker.capabilities[cap]);
|
|
180
|
+
if (missing.length > 0) {
|
|
181
|
+
return {
|
|
182
|
+
ok: false,
|
|
183
|
+
reason: `Serving-build marker (version ${marker.version}, commit ` +
|
|
184
|
+
`${marker.commit.slice(0, 12)}) is missing required capabilities: ` +
|
|
185
|
+
`${missing.map((c) => `${c}=false`).join(", ")} — the serving install ` +
|
|
186
|
+
`cannot safely serve a chunk-column conversation set. Upgrade the ` +
|
|
187
|
+
`serving daemon before backfilling.`,
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
const bootedMs = Date.parse(marker.booted_at);
|
|
191
|
+
if (Number.isNaN(bootedMs)) {
|
|
192
|
+
return {
|
|
193
|
+
ok: false,
|
|
194
|
+
reason: `Serving-build marker has an invalid booted_at ("${marker.booted_at}") ` +
|
|
195
|
+
`— cannot prove the chunk-aware daemon booted recently. Reboot the ` +
|
|
196
|
+
`chunk-aware serving daemon on this dir to re-stamp the marker.`,
|
|
197
|
+
};
|
|
198
|
+
}
|
|
199
|
+
const ageMs = now.getTime() - bootedMs;
|
|
200
|
+
if (ageMs > maxAgeMs) {
|
|
201
|
+
const ageHours = Math.round(ageMs / 3_600_000);
|
|
202
|
+
const maxHours = Math.round(maxAgeMs / 3_600_000);
|
|
203
|
+
return {
|
|
204
|
+
ok: false,
|
|
205
|
+
reason: `Serving-build marker is stale (booted_at ${marker.booted_at}, ~${ageHours}h ` +
|
|
206
|
+
`old > ${maxHours}h freshness window). A stale marker only proves a ` +
|
|
207
|
+
`chunk-aware daemon booted this dir long ago — not that the CURRENT ` +
|
|
208
|
+
`install is chunk-aware (a since-downgraded daemon leaves the old marker ` +
|
|
209
|
+
`in place). (Re)boot the chunk-aware serving daemon on this dir ` +
|
|
210
|
+
`immediately before backfilling to re-stamp the marker, or — if this is ` +
|
|
211
|
+
`a DISPOSABLE COPY, never your live data dir — re-run with ` +
|
|
212
|
+
`--allow-uncertified-copy to override.`,
|
|
213
|
+
};
|
|
214
|
+
}
|
|
215
|
+
if (ageMs < -MAX_FUTURE_SKEW_MS) {
|
|
216
|
+
return {
|
|
217
|
+
ok: false,
|
|
218
|
+
reason: `Serving-build marker is dated in the future (booted_at ` +
|
|
219
|
+
`${marker.booted_at}) beyond tolerated clock skew — refusing rather than ` +
|
|
220
|
+
`trusting a marker that may have been hand-edited. Reboot the chunk-aware ` +
|
|
221
|
+
`serving daemon on this dir to re-stamp the marker.`,
|
|
222
|
+
};
|
|
223
|
+
}
|
|
224
|
+
return { ok: true, reason: "" };
|
|
225
|
+
}
|
|
226
|
+
//# sourceMappingURL=serving-marker.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"serving-marker.js","sourceRoot":"","sources":["../../../src/server/serving-marker.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAyCG;AACH,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AAyB/C;;;;;GAKG;AACH,MAAM,CAAC,MAAM,4BAA4B,GAAwB;IAC/D,0BAA0B,EAAE,IAAI;IAChC,gBAAgB,EAAE,IAAI;IACtB,+BAA+B,EAAE,IAAI;CACtC,CAAC;AAEF,mFAAmF;AACnF,MAAM,UAAU,iBAAiB,CAAC,OAAe;IAC/C,OAAO,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,eAAe,EAAE,oBAAoB,CAAC,CAAC;AACnE,CAAC;AAED,uDAAuD;AACvD,MAAM,UAAU,kBAAkB,CAAC,QAAgB;IACjD,MAAM,IAAI,GAAG,YAAY,EAAE,CAAC;IAC5B,OAAO;QACL,OAAO,EAAE,IAAI,CAAC,OAAO;QACrB,MAAM,EAAE,IAAI,CAAC,MAAM;QACnB,SAAS,EAAE,QAAQ;QACnB,YAAY,EAAE,EAAE,GAAG,4BAA4B,EAAE;KAClD,CAAC;AACJ,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,kBAAkB,CAChC,OAAe,EACf,WAAmB,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;IAE3C,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,iBAAiB,CAAC,OAAO,CAAC,CAAC;QAC1C,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACxD,MAAM,GAAG,GAAG,GAAG,MAAM,QAAQ,OAAO,CAAC,GAAG,EAAE,CAAC;QAC3C,EAAE,CAAC,aAAa,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC,kBAAkB,CAAC,QAAQ,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC;QACpF,EAAE,CAAC,UAAU,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;IAC7B,CAAC;IAAC,OAAO,GAAY,EAAE,CAAC;QACtB,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,gDACE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CACjD,IAAI,CACL,CAAC;IACJ,CAAC;AACH,CAAC;AAED,SAAS,eAAe,CAAC,CAAU;IACjC,IAAI,CAAC,CAAC,IAAI,OAAO,CAAC,KAAK,QAAQ;QAAE,OAAO,KAAK,CAAC;IAC9C,MAAM,CAAC,GAAG,CAA4B,CAAC;IACvC,IACE,OAAO,CAAC,CAAC,OAAO,KAAK,QAAQ;QAC7B,OAAO,CAAC,CAAC,MAAM,KAAK,QAAQ;QAC5B,OAAO,CAAC,CAAC,SAAS,KAAK,QAAQ,EAC/B,CAAC;QACD,OAAO,KAAK,CAAC;IACf,CAAC;IACD,MAAM,CAAC,GAAG,CAAC,CAAC,YAAY,CAAC;IACzB,IAAI,CAAC,CAAC,IAAI,OAAO,CAAC,KAAK,QAAQ;QAAE,OAAO,KAAK,CAAC;IAC9C,MAAM,IAAI,GAAG,CAA4B,CAAC;IAC1C,OAAO,CACL,OAAO,IAAI,CAAC,0BAA0B,KAAK,SAAS;QACpD,OAAO,IAAI,CAAC,gBAAgB,KAAK,SAAS;QAC1C,OAAO,IAAI,CAAC,+BAA+B,KAAK,SAAS,CAC1D,CAAC;AACJ,CAAC;AAED,yEAAyE;AACzE,MAAM,UAAU,iBAAiB,CAAC,OAAe;IAC/C,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,EAAE,CAAC,YAAY,CAAC,iBAAiB,CAAC,OAAO,CAAC,EAAE,MAAM,CAAC,CAAC;QAChE,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAY,CAAC;QAC1C,OAAO,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC;IACjD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAQD;;;;;;;GAOG;AACH,MAAM,2BAA2B,GAA6C;IAC5E,4BAA4B;IAC5B,kBAAkB;IAClB,iCAAiC;CAClC,CAAC;AAEF;;;;;;;;GAQG;AACH,MAAM,CAAC,MAAM,iBAAiB,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC;AAErD,0EAA0E;AAC1E,MAAM,CAAC,MAAM,kBAAkB,GAAG,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC;AAEhD;;;;;;;;;;;;;;;;;;;GAmBG;AACH,MAAM,UAAU,kCAAkC,CAChD,OAAe,EACf,OAA0C,EAAE;IAE5C,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,IAAI,IAAI,IAAI,EAAE,CAAC;IACnC,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,IAAI,iBAAiB,CAAC;IACpD,MAAM,MAAM,GAAG,iBAAiB,CAAC,OAAO,CAAC,CAAC;IAC1C,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,OAAO;YACL,EAAE,EAAE,KAAK;YACT,MAAM,EACJ,8BAA8B,iBAAiB,CAAC,OAAO,CAAC,WAAW;gBACnE,mEAAmE;gBACnE,qEAAqE;gBACrE,wEAAwE;gBACxE,0EAA0E;gBAC1E,mDAAmD;SACtD,CAAC;IACJ,CAAC;IACD,MAAM,OAAO,GAAG,2BAA2B,CAAC,MAAM,CAChD,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,CACnC,CAAC;IACF,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,OAAO;YACL,EAAE,EAAE,KAAK;YACT,MAAM,EACJ,iCAAiC,MAAM,CAAC,OAAO,WAAW;gBAC1D,GAAG,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,sCAAsC;gBACnE,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,yBAAyB;gBACvE,mEAAmE;gBACnE,oCAAoC;SACvC,CAAC;IACJ,CAAC;IACD,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;IAC9C,IAAI,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC3B,OAAO;YACL,EAAE,EAAE,KAAK;YACT,MAAM,EACJ,mDAAmD,MAAM,CAAC,SAAS,KAAK;gBACxE,oEAAoE;gBACpE,gEAAgE;SACnE,CAAC;IACJ,CAAC;IACD,MAAM,KAAK,GAAG,GAAG,CAAC,OAAO,EAAE,GAAG,QAAQ,CAAC;IACvC,IAAI,KAAK,GAAG,QAAQ,EAAE,CAAC;QACrB,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,SAAS,CAAC,CAAC;QAC/C,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,GAAG,SAAS,CAAC,CAAC;QAClD,OAAO;YACL,EAAE,EAAE,KAAK;YACT,MAAM,EACJ,4CAA4C,MAAM,CAAC,SAAS,MAAM,QAAQ,IAAI;gBAC9E,SAAS,QAAQ,oDAAoD;gBACrE,qEAAqE;gBACrE,0EAA0E;gBAC1E,iEAAiE;gBACjE,yEAAyE;gBACzE,4DAA4D;gBAC5D,uCAAuC;SAC1C,CAAC;IACJ,CAAC;IACD,IAAI,KAAK,GAAG,CAAC,kBAAkB,EAAE,CAAC;QAChC,OAAO;YACL,EAAE,EAAE,KAAK;YACT,MAAM,EACJ,yDAAyD;gBACzD,GAAG,MAAM,CAAC,SAAS,uDAAuD;gBAC1E,2EAA2E;gBAC3E,oDAAoD;SACvD,CAAC;IACJ,CAAC;IACD,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC;AAClC,CAAC"}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import type { EmbeddingProvider } from "../providers/types.js";
|
|
2
|
+
export interface ChunkBackfillOptions {
|
|
3
|
+
embedding: EmbeddingProvider;
|
|
4
|
+
dataDir: string;
|
|
5
|
+
/** Report what would change without embedding or writing anything. */
|
|
6
|
+
dryRun?: boolean;
|
|
7
|
+
/** Stop after this many conversations have been (re)chunked. 0/undefined = no cap. */
|
|
8
|
+
limit?: number;
|
|
9
|
+
/** Sleep this many ms between conversations to spare the embedding API. */
|
|
10
|
+
throttleMs?: number;
|
|
11
|
+
/** Cooperative cancel — checked between conversations (Ctrl-C safe). */
|
|
12
|
+
signal?: AbortSignal;
|
|
13
|
+
/** Progress sink (one line per meaningful event). */
|
|
14
|
+
log?: (msg: string) => void;
|
|
15
|
+
}
|
|
16
|
+
export interface ChunkBackfillSummary {
|
|
17
|
+
/** Distinct conversations examined (most-recent raw per id). */
|
|
18
|
+
scanned: number;
|
|
19
|
+
/** Conversations split into a multi-chunk set this run (or that would be, in dryRun). */
|
|
20
|
+
rechunked: number;
|
|
21
|
+
/** Conversations already in the desired multi-chunk shape — skipped. */
|
|
22
|
+
alreadyChunked: number;
|
|
23
|
+
/** Conversations short enough to stay a single unmarked row — skipped. */
|
|
24
|
+
singleChunk: number;
|
|
25
|
+
/** Raw artifacts with no matching indexed row — skipped (not resurrected). */
|
|
26
|
+
orphaned: number;
|
|
27
|
+
/** Raw artifacts that could not be read/parsed — skipped. */
|
|
28
|
+
errored: number;
|
|
29
|
+
/** True if an abort signal stopped the run before all conversations were seen. */
|
|
30
|
+
aborted: boolean;
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Re-chunk historical conversations from raw into chunk sets. Returns a summary
|
|
34
|
+
* of what changed. Idempotent: a conversation already in its desired shape (a
|
|
35
|
+
* complete set of the right count, or a single unmarked row for a short
|
|
36
|
+
* conversation) is skipped, so re-running only touches the remainder.
|
|
37
|
+
*/
|
|
38
|
+
export declare function backfillConversationChunks(opts: ChunkBackfillOptions): Promise<ChunkBackfillSummary>;
|
|
39
|
+
//# sourceMappingURL=chunk-backfill.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunk-backfill.d.ts","sourceRoot":"","sources":["../../../src/storage/chunk-backfill.ts"],"names":[],"mappings":"AA8BA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAC;AA0B/D,MAAM,WAAW,oBAAoB;IACnC,SAAS,EAAE,iBAAiB,CAAC;IAC7B,OAAO,EAAE,MAAM,CAAC;IAChB,sEAAsE;IACtE,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,sFAAsF;IACtF,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,2EAA2E;IAC3E,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,wEAAwE;IACxE,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,qDAAqD;IACrD,GAAG,CAAC,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,IAAI,CAAC;CAC7B;AAED,MAAM,WAAW,oBAAoB;IACnC,gEAAgE;IAChE,OAAO,EAAE,MAAM,CAAC;IAChB,yFAAyF;IACzF,SAAS,EAAE,MAAM,CAAC;IAClB,wEAAwE;IACxE,cAAc,EAAE,MAAM,CAAC;IACvB,0EAA0E;IAC1E,WAAW,EAAE,MAAM,CAAC;IACpB,8EAA8E;IAC9E,QAAQ,EAAE,MAAM,CAAC;IACjB,6DAA6D;IAC7D,OAAO,EAAE,MAAM,CAAC;IAChB,kFAAkF;IAClF,OAAO,EAAE,OAAO,CAAC;CAClB;AAyGD;;;;;GAKG;AACH,wBAAsB,0BAA0B,CAC9C,IAAI,EAAE,oBAAoB,GACzB,OAAO,CAAC,oBAAoB,CAAC,CAuH/B"}
|
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Owner-only historical conversation chunk backfill.
|
|
3
|
+
*
|
|
4
|
+
* Index-time chunking (behind `RIFT_CHUNKING`) only splits NEW conversations as
|
|
5
|
+
* they are saved/ingested; conversations indexed before the flag was on are still
|
|
6
|
+
* stored as one whole row each. This module re-chunks those historical
|
|
7
|
+
* conversations from their raw artifacts so the owner can dogfood retrieval over a
|
|
8
|
+
* fully-chunked corpus and gather OFF-vs-ON eval evidence before any product
|
|
9
|
+
* default flip.
|
|
10
|
+
*
|
|
11
|
+
* Deliberately CRUDE and owner-grade — NOT the productized beta-user migration:
|
|
12
|
+
* - explicit, one-shot (run from the offline CLI; no background daemon job),
|
|
13
|
+
* - `dryRun` reports what would change without embedding or writing,
|
|
14
|
+
* - lightly throttleable, and safe to Ctrl-C (an `AbortSignal` is checked
|
|
15
|
+
* between conversations, so the in-flight conversation's atomic write always
|
|
16
|
+
* finishes before we stop — never a half-written set on interrupt),
|
|
17
|
+
* - no resumable status UI, pause/cancel, or rollback tooling.
|
|
18
|
+
*
|
|
19
|
+
* Data-safety mirrors the save/ingest chunk-write path: every chunk of a
|
|
20
|
+
* conversation is embedded BEFORE any destructive delete, and the raw artifact
|
|
21
|
+
* that drove the re-chunk is left on disk, so an interrupt between delete and
|
|
22
|
+
* insert is fully repairable by re-running (or by reconcile/rebuild).
|
|
23
|
+
*
|
|
24
|
+
* Reads are compatible with the `expand` stitching slice: a backfilled set uses
|
|
25
|
+
* the same `parent_id` / `chunk_index` / `chunk_count` markers and `convChunkId`
|
|
26
|
+
* scheme, so `detail="full"` reassembles a backfilled conversation through the
|
|
27
|
+
* exact same path as a natively-chunked one.
|
|
28
|
+
*/
|
|
29
|
+
import fs from "node:fs";
|
|
30
|
+
import path from "node:path";
|
|
31
|
+
import { CONVERSATION_SOURCES } from "../providers/types.js";
|
|
32
|
+
import { RIFT_NONE_EMBEDDING_PROVIDER, isPlaceholderEmbeddingProvider, } from "../providers/placeholder-embed.js";
|
|
33
|
+
import { recordEmbed } from "../observability/embedding-events.js";
|
|
34
|
+
import { recordIndexWrite } from "../observability/index-events.js";
|
|
35
|
+
import { eqFilter } from "./filter.js";
|
|
36
|
+
import { getTable } from "./tables.js";
|
|
37
|
+
import { chunkConversation } from "../ingestion/chunk-text.js";
|
|
38
|
+
import { convChunkId, conversationChunkColumns, chunkSetComplete, expectedChunkCount, } from "../ingestion/chunk-meta.js";
|
|
39
|
+
import { conversationContentFingerprint } from "../ingestion/inbox-core/conversation-fingerprint.js";
|
|
40
|
+
import { extractIdFromFilename, compareRawRecencyDesc } from "./rebuild.js";
|
|
41
|
+
const ALL_CONV_SOURCES = [...CONVERSATION_SOURCES, "inbox"];
|
|
42
|
+
const CONV_TABLES = [
|
|
43
|
+
"conversations_hot",
|
|
44
|
+
"conversations_cold",
|
|
45
|
+
];
|
|
46
|
+
function sleep(ms) {
|
|
47
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Collect the most-recent raw artifact per conversation id across every source.
|
|
51
|
+
* Same recency policy as reconcile/rebuild (`compareRawRecencyDesc`) so all three
|
|
52
|
+
* tools canonicalise a duplicated id to the same winner.
|
|
53
|
+
*/
|
|
54
|
+
function collectRawConversations(dataDir) {
|
|
55
|
+
const byId = new Map();
|
|
56
|
+
for (const source of ALL_CONV_SOURCES) {
|
|
57
|
+
const rawDir = path.join(dataDir, "raw", "conversations", source);
|
|
58
|
+
if (!fs.existsSync(rawDir))
|
|
59
|
+
continue;
|
|
60
|
+
for (const file of fs.readdirSync(rawDir)) {
|
|
61
|
+
if (!file.endsWith(".json"))
|
|
62
|
+
continue;
|
|
63
|
+
const id = extractIdFromFilename(file);
|
|
64
|
+
if (!id)
|
|
65
|
+
continue;
|
|
66
|
+
const rawPath = path.join(rawDir, file);
|
|
67
|
+
const prior = byId.get(id);
|
|
68
|
+
if (!prior || compareRawRecencyDesc(rawPath, prior) < 0) {
|
|
69
|
+
byId.set(id, rawPath);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
return [...byId.entries()].map(([id, rawPath]) => ({ id, rawPath }));
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* All currently-indexed rows of a conversation (the whole row OR its chunk set),
|
|
77
|
+
* gathered across BOTH tiers. The same id can appear in hot AND cold after a
|
|
78
|
+
* compaction crash ("copied to cold but not yet removed from hot"); we dedupe by
|
|
79
|
+
* id with the cold copy winning, and pick cold as the write target so the
|
|
80
|
+
* rechunk collapses the duplicate rather than leaving a stale copy in the other
|
|
81
|
+
* tier.
|
|
82
|
+
*/
|
|
83
|
+
async function fetchConversationRows(id) {
|
|
84
|
+
const idFilter = eqFilter("id", id, { validateAsRowId: true });
|
|
85
|
+
const parentFilter = eqFilter("parent_id", id, { validateAsRowId: true });
|
|
86
|
+
const uniq = new Map();
|
|
87
|
+
let hasCold = false;
|
|
88
|
+
let hasCrossTierRows = false;
|
|
89
|
+
// Iterate hot → cold so the canonical cold row overwrites its hot duplicate.
|
|
90
|
+
for (const table of CONV_TABLES) {
|
|
91
|
+
const handle = getTable(table);
|
|
92
|
+
// Schema-aware filter. A legacy pre-chunk table (e.g. beta.21) has NO
|
|
93
|
+
// `parent_id` column; filtering on it would throw. When the column is
|
|
94
|
+
// absent we match by id only and the rows are necessarily legacy unchunked
|
|
95
|
+
// (no chunk set can exist without the column). This lets dry-run recon run
|
|
96
|
+
// against the exact incident shape WITHOUT migrating the columns on — which
|
|
97
|
+
// is the whole point of the read-only path.
|
|
98
|
+
const hasParentId = (await handle.schema()).fields.some((f) => f.name === "parent_id");
|
|
99
|
+
const setFilter = hasParentId
|
|
100
|
+
? `(${idFilter}) OR (${parentFilter})`
|
|
101
|
+
: idFilter;
|
|
102
|
+
const rows = (await handle
|
|
103
|
+
.query()
|
|
104
|
+
.where(setFilter)
|
|
105
|
+
.toArray());
|
|
106
|
+
if (rows.length === 0)
|
|
107
|
+
continue;
|
|
108
|
+
if (table === "conversations_cold")
|
|
109
|
+
hasCold = true;
|
|
110
|
+
for (const r of rows) {
|
|
111
|
+
// Already seen this id in an earlier tier → it lives in both → duplicate.
|
|
112
|
+
if (uniq.has(r.id))
|
|
113
|
+
hasCrossTierRows = true;
|
|
114
|
+
uniq.set(r.id, r);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
if (uniq.size === 0)
|
|
118
|
+
return null;
|
|
119
|
+
return {
|
|
120
|
+
targetTable: hasCold ? "conversations_cold" : "conversations_hot",
|
|
121
|
+
rows: [...uniq.values()],
|
|
122
|
+
hasCrossTierRows,
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Re-chunk historical conversations from raw into chunk sets. Returns a summary
|
|
127
|
+
* of what changed. Idempotent: a conversation already in its desired shape (a
|
|
128
|
+
* complete set of the right count, or a single unmarked row for a short
|
|
129
|
+
* conversation) is skipped, so re-running only touches the remainder.
|
|
130
|
+
*/
|
|
131
|
+
export async function backfillConversationChunks(opts) {
|
|
132
|
+
const { embedding, dataDir } = opts;
|
|
133
|
+
const dryRun = opts.dryRun ?? false;
|
|
134
|
+
const throttleMs = opts.throttleMs ?? 0;
|
|
135
|
+
const limit = opts.limit ?? 0;
|
|
136
|
+
const log = opts.log ?? (() => { });
|
|
137
|
+
const summary = {
|
|
138
|
+
scanned: 0,
|
|
139
|
+
rechunked: 0,
|
|
140
|
+
alreadyChunked: 0,
|
|
141
|
+
singleChunk: 0,
|
|
142
|
+
orphaned: 0,
|
|
143
|
+
errored: 0,
|
|
144
|
+
aborted: false,
|
|
145
|
+
};
|
|
146
|
+
const raws = collectRawConversations(dataDir);
|
|
147
|
+
log(`Found ${raws.length} historical conversation(s) to examine.`);
|
|
148
|
+
for (const { id, rawPath } of raws) {
|
|
149
|
+
if (opts.signal?.aborted) {
|
|
150
|
+
summary.aborted = true;
|
|
151
|
+
log("Aborted — stopping before the next conversation.");
|
|
152
|
+
break;
|
|
153
|
+
}
|
|
154
|
+
if (limit > 0 && summary.rechunked >= limit) {
|
|
155
|
+
log(`Reached --limit ${limit}; stopping.`);
|
|
156
|
+
break;
|
|
157
|
+
}
|
|
158
|
+
summary.scanned++;
|
|
159
|
+
let raw;
|
|
160
|
+
try {
|
|
161
|
+
raw = JSON.parse(fs.readFileSync(rawPath, "utf-8"));
|
|
162
|
+
}
|
|
163
|
+
catch {
|
|
164
|
+
summary.errored++;
|
|
165
|
+
log(`! ${id}: unreadable raw artifact — skipped.`);
|
|
166
|
+
continue;
|
|
167
|
+
}
|
|
168
|
+
// Treat whitespace-only as empty: a raw with `"content": " "` is truthy
|
|
169
|
+
// but `chunkConversation` trims it to zero chunks, which would otherwise
|
|
170
|
+
// drive an empty replacement set straight to the delete path below.
|
|
171
|
+
const pickContent = (v) => typeof v === "string" && v.trim().length > 0 ? v : "";
|
|
172
|
+
const content = pickContent(raw.content) || pickContent(raw.summary);
|
|
173
|
+
if (content.trim().length === 0) {
|
|
174
|
+
summary.errored++;
|
|
175
|
+
log(`! ${id}: raw has no content — skipped.`);
|
|
176
|
+
continue;
|
|
177
|
+
}
|
|
178
|
+
const existing = await fetchConversationRows(id);
|
|
179
|
+
if (!existing) {
|
|
180
|
+
// Raw with no indexed row: an orphan (failed save / pruned row). Do NOT
|
|
181
|
+
// resurrect it here — that's reconcile/rebuild's job, with their full
|
|
182
|
+
// dedup machinery. Backfill only re-shapes already-indexed conversations.
|
|
183
|
+
summary.orphaned++;
|
|
184
|
+
continue;
|
|
185
|
+
}
|
|
186
|
+
const chunks = chunkConversation(content);
|
|
187
|
+
const desiredCount = chunks.length;
|
|
188
|
+
// Hard guard against ever reaching the delete path with an empty
|
|
189
|
+
// replacement set. The content check above already rejects blank raws (the
|
|
190
|
+
// only input for which `chunkConversation` returns []), so this is purely
|
|
191
|
+
// defensive — but a zero-chunk set means `rechunkConversation` would delete
|
|
192
|
+
// the existing rows from both tiers and insert nothing, silently dropping an
|
|
193
|
+
// indexed conversation. Skip and count it as an error instead.
|
|
194
|
+
if (desiredCount === 0) {
|
|
195
|
+
summary.errored++;
|
|
196
|
+
log(`! ${id}: produced no chunks — skipped (existing rows untouched).`);
|
|
197
|
+
continue;
|
|
198
|
+
}
|
|
199
|
+
// Already in the desired shape? (complete set of the right count, or a
|
|
200
|
+
// single unmarked row for a short conversation.) Skip — keeps re-runs cheap
|
|
201
|
+
// and avoids needless re-embedding. But NEVER skip a cross-tier duplicate:
|
|
202
|
+
// the deduped `existing.rows` can look complete while a stale copy survives
|
|
203
|
+
// in the other tier, so it still needs the delete-both-tiers repair below.
|
|
204
|
+
if (!existing.hasCrossTierRows &&
|
|
205
|
+
chunkSetComplete(existing.rows) &&
|
|
206
|
+
expectedChunkCount(existing.rows) === desiredCount) {
|
|
207
|
+
if (desiredCount > 1)
|
|
208
|
+
summary.alreadyChunked++;
|
|
209
|
+
else
|
|
210
|
+
summary.singleChunk++;
|
|
211
|
+
continue;
|
|
212
|
+
}
|
|
213
|
+
if (dryRun) {
|
|
214
|
+
summary.rechunked++;
|
|
215
|
+
log(`~ ${id}: would re-chunk into ${desiredCount} section(s) [${existing.targetTable}] (dry-run).`);
|
|
216
|
+
if (throttleMs > 0)
|
|
217
|
+
await sleep(throttleMs);
|
|
218
|
+
continue;
|
|
219
|
+
}
|
|
220
|
+
await rechunkConversation(embedding, dataDir, id, content, chunks, existing);
|
|
221
|
+
summary.rechunked++;
|
|
222
|
+
log(`+ ${id}: re-chunked into ${desiredCount} section(s) [${existing.targetTable}].`);
|
|
223
|
+
if (throttleMs > 0)
|
|
224
|
+
await sleep(throttleMs);
|
|
225
|
+
}
|
|
226
|
+
return summary;
|
|
227
|
+
}
|
|
228
|
+
/**
|
|
229
|
+
* Embed the full chunk set, then atomically replace the conversation's rows in
|
|
230
|
+
* its current tier. Metadata (domain/intent/quality/topics/decisions/key_outputs/
|
|
231
|
+
* summary/idempotency_key/fingerprint/metadata_provider) is preserved from the
|
|
232
|
+
* existing indexed row so an enriched conversation is NOT downgraded to its raw
|
|
233
|
+
* artifact's basic metadata — only `content`/`embedding`/the chunk markers change.
|
|
234
|
+
*
|
|
235
|
+
* Ordering = embed-all → delete-old → insert-new (same invariant as reconcile):
|
|
236
|
+
* the embeddings exist before any delete, and the raw artifact survives, so an
|
|
237
|
+
* interrupt in the brief delete→insert window is repaired by re-running.
|
|
238
|
+
*/
|
|
239
|
+
async function rechunkConversation(embedding, dataDir, id, content, chunks, existing) {
|
|
240
|
+
const count = chunks.length;
|
|
241
|
+
const embeddings = await recordEmbed(dataDir, embedding, {
|
|
242
|
+
pipeline: "backfill",
|
|
243
|
+
operation: "rechunk_conversation",
|
|
244
|
+
input_count: count,
|
|
245
|
+
}, () => embedding.embedBatch(chunks));
|
|
246
|
+
// Representative = lowest chunk_index (or the lone row) — its metadata is the
|
|
247
|
+
// conversation-level metadata shared by every chunk.
|
|
248
|
+
const rep = [...existing.rows].sort((a, b) => Number(a.chunk_index ?? 0) - Number(b.chunk_index ?? 0))[0];
|
|
249
|
+
const fingerprint = typeof rep.conversation_fingerprint === "string" &&
|
|
250
|
+
rep.conversation_fingerprint.length > 0
|
|
251
|
+
? rep.conversation_fingerprint
|
|
252
|
+
: conversationContentFingerprint(content);
|
|
253
|
+
const embeddingProviderMarker = isPlaceholderEmbeddingProvider(embedding)
|
|
254
|
+
? RIFT_NONE_EMBEDDING_PROVIDER
|
|
255
|
+
: "";
|
|
256
|
+
const indexedAt = new Date().toISOString();
|
|
257
|
+
const rows = chunks.map((chunk, i) => ({
|
|
258
|
+
id: count > 1 ? convChunkId(id, i) : id,
|
|
259
|
+
content: chunk,
|
|
260
|
+
summary: rep.summary,
|
|
261
|
+
embedding: embeddings[i],
|
|
262
|
+
source: rep.source,
|
|
263
|
+
domain: rep.domain,
|
|
264
|
+
intent: rep.intent,
|
|
265
|
+
quality: rep.quality,
|
|
266
|
+
topics: rep.topics,
|
|
267
|
+
decisions: rep.decisions,
|
|
268
|
+
key_outputs: rep.key_outputs,
|
|
269
|
+
indexed_at: indexedAt,
|
|
270
|
+
idempotency_key: rep.idempotency_key,
|
|
271
|
+
conversation_fingerprint: fingerprint,
|
|
272
|
+
metadata_provider: typeof rep.metadata_provider === "string" ? rep.metadata_provider : "",
|
|
273
|
+
embedding_provider: embeddingProviderMarker,
|
|
274
|
+
...conversationChunkColumns(id, i, count),
|
|
275
|
+
}));
|
|
276
|
+
// Delete every prior row for this conversation (the whole row keyed by id and
|
|
277
|
+
// any chunk set keyed by parent_id) from BOTH tiers — a compaction crash can
|
|
278
|
+
// leave the same id in hot AND cold, and deleting only one tier would leave a
|
|
279
|
+
// stale duplicate behind. Then insert the new set into the canonical target
|
|
280
|
+
// tier (cold if the conversation lives in cold, else hot), preserving the
|
|
281
|
+
// hot/cold placement rather than promoting old conversations into hot.
|
|
282
|
+
const idFilter = eqFilter("id", id, { validateAsRowId: true });
|
|
283
|
+
const parentFilter = eqFilter("parent_id", id, { validateAsRowId: true });
|
|
284
|
+
for (const table of CONV_TABLES) {
|
|
285
|
+
await getTable(table).delete(idFilter);
|
|
286
|
+
await getTable(table).delete(parentFilter);
|
|
287
|
+
}
|
|
288
|
+
await recordIndexWrite(dataDir, {
|
|
289
|
+
table: existing.targetTable,
|
|
290
|
+
pipeline: "backfill",
|
|
291
|
+
operation: "rechunk_conversation",
|
|
292
|
+
row_count: rows.length,
|
|
293
|
+
}, () => getTable(existing.targetTable).add(rows));
|
|
294
|
+
}
|
|
295
|
+
//# sourceMappingURL=chunk-backfill.js.map
|