@getrift/rift 0.0.0 → 0.1.0-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.dev.md +110 -0
- package/README.md +130 -0
- package/dist/src/auth/keychain.d.ts +25 -0
- package/dist/src/auth/keychain.d.ts.map +1 -0
- package/dist/src/auth/keychain.js +113 -0
- package/dist/src/auth/keychain.js.map +1 -0
- package/dist/src/auth/middleware.d.ts +20 -0
- package/dist/src/auth/middleware.d.ts.map +1 -0
- package/dist/src/auth/middleware.js +49 -0
- package/dist/src/auth/middleware.js.map +1 -0
- package/dist/src/auth/rate-limit.d.ts +16 -0
- package/dist/src/auth/rate-limit.d.ts.map +1 -0
- package/dist/src/auth/rate-limit.js +38 -0
- package/dist/src/auth/rate-limit.js.map +1 -0
- package/dist/src/auth/rotation.d.ts +67 -0
- package/dist/src/auth/rotation.d.ts.map +1 -0
- package/dist/src/auth/rotation.js +190 -0
- package/dist/src/auth/rotation.js.map +1 -0
- package/dist/src/backfill/project-context-batch-constructor.d.ts +127 -0
- package/dist/src/backfill/project-context-batch-constructor.d.ts.map +1 -0
- package/dist/src/backfill/project-context-batch-constructor.js +210 -0
- package/dist/src/backfill/project-context-batch-constructor.js.map +1 -0
- package/dist/src/capture/auto-capture.d.ts +162 -0
- package/dist/src/capture/auto-capture.d.ts.map +1 -0
- package/dist/src/capture/auto-capture.js +601 -0
- package/dist/src/capture/auto-capture.js.map +1 -0
- package/dist/src/capture/batch-budget.d.ts +90 -0
- package/dist/src/capture/batch-budget.d.ts.map +1 -0
- package/dist/src/capture/batch-budget.js +148 -0
- package/dist/src/capture/batch-budget.js.map +1 -0
- package/dist/src/capture/codex-cli-triage-provider.d.ts +17 -0
- package/dist/src/capture/codex-cli-triage-provider.d.ts.map +1 -0
- package/dist/src/capture/codex-cli-triage-provider.js +109 -0
- package/dist/src/capture/codex-cli-triage-provider.js.map +1 -0
- package/dist/src/capture/observability.d.ts +42 -0
- package/dist/src/capture/observability.d.ts.map +1 -0
- package/dist/src/capture/observability.js +87 -0
- package/dist/src/capture/observability.js.map +1 -0
- package/dist/src/capture/openai-triage-provider.d.ts +92 -0
- package/dist/src/capture/openai-triage-provider.d.ts.map +1 -0
- package/dist/src/capture/openai-triage-provider.js +267 -0
- package/dist/src/capture/openai-triage-provider.js.map +1 -0
- package/dist/src/capture/review-queue-index.d.ts +51 -0
- package/dist/src/capture/review-queue-index.d.ts.map +1 -0
- package/dist/src/capture/review-queue-index.js +204 -0
- package/dist/src/capture/review-queue-index.js.map +1 -0
- package/dist/src/capture/review-queue.d.ts +43 -0
- package/dist/src/capture/review-queue.d.ts.map +1 -0
- package/dist/src/capture/review-queue.js +116 -0
- package/dist/src/capture/review-queue.js.map +1 -0
- package/dist/src/capture/sources.d.ts +7 -0
- package/dist/src/capture/sources.d.ts.map +1 -0
- package/dist/src/capture/sources.js +3 -0
- package/dist/src/capture/sources.js.map +1 -0
- package/dist/src/capture/triage-lane.d.ts +39 -0
- package/dist/src/capture/triage-lane.d.ts.map +1 -0
- package/dist/src/capture/triage-lane.js +217 -0
- package/dist/src/capture/triage-lane.js.map +1 -0
- package/dist/src/capture/triage-provider.d.ts +75 -0
- package/dist/src/capture/triage-provider.d.ts.map +1 -0
- package/dist/src/capture/triage-provider.js +120 -0
- package/dist/src/capture/triage-provider.js.map +1 -0
- package/dist/src/capture/triage.d.ts +30 -0
- package/dist/src/capture/triage.d.ts.map +1 -0
- package/dist/src/capture/triage.js +48 -0
- package/dist/src/capture/triage.js.map +1 -0
- package/dist/src/cli/commands/backfill.d.ts +3 -0
- package/dist/src/cli/commands/backfill.d.ts.map +1 -0
- package/dist/src/cli/commands/backfill.js +1376 -0
- package/dist/src/cli/commands/backfill.js.map +1 -0
- package/dist/src/cli/commands/bulk-ingest.d.ts +3 -0
- package/dist/src/cli/commands/bulk-ingest.d.ts.map +1 -0
- package/dist/src/cli/commands/bulk-ingest.js +126 -0
- package/dist/src/cli/commands/bulk-ingest.js.map +1 -0
- package/dist/src/cli/commands/capture.d.ts +12 -0
- package/dist/src/cli/commands/capture.d.ts.map +1 -0
- package/dist/src/cli/commands/capture.js +123 -0
- package/dist/src/cli/commands/capture.js.map +1 -0
- package/dist/src/cli/commands/compact.d.ts +3 -0
- package/dist/src/cli/commands/compact.d.ts.map +1 -0
- package/dist/src/cli/commands/compact.js +70 -0
- package/dist/src/cli/commands/compact.js.map +1 -0
- package/dist/src/cli/commands/feedback.d.ts +22 -0
- package/dist/src/cli/commands/feedback.d.ts.map +1 -0
- package/dist/src/cli/commands/feedback.js +125 -0
- package/dist/src/cli/commands/feedback.js.map +1 -0
- package/dist/src/cli/commands/import.d.ts +19 -0
- package/dist/src/cli/commands/import.d.ts.map +1 -0
- package/dist/src/cli/commands/import.js +258 -0
- package/dist/src/cli/commands/import.js.map +1 -0
- package/dist/src/cli/commands/ingest.d.ts +3 -0
- package/dist/src/cli/commands/ingest.d.ts.map +1 -0
- package/dist/src/cli/commands/ingest.js +80 -0
- package/dist/src/cli/commands/ingest.js.map +1 -0
- package/dist/src/cli/commands/mcp-install.d.ts +25 -0
- package/dist/src/cli/commands/mcp-install.d.ts.map +1 -0
- package/dist/src/cli/commands/mcp-install.js +134 -0
- package/dist/src/cli/commands/mcp-install.js.map +1 -0
- package/dist/src/cli/commands/onboard.d.ts +98 -0
- package/dist/src/cli/commands/onboard.d.ts.map +1 -0
- package/dist/src/cli/commands/onboard.js +742 -0
- package/dist/src/cli/commands/onboard.js.map +1 -0
- package/dist/src/cli/commands/rebuild.d.ts +12 -0
- package/dist/src/cli/commands/rebuild.d.ts.map +1 -0
- package/dist/src/cli/commands/rebuild.js +164 -0
- package/dist/src/cli/commands/rebuild.js.map +1 -0
- package/dist/src/cli/commands/reconcile.d.ts +3 -0
- package/dist/src/cli/commands/reconcile.d.ts.map +1 -0
- package/dist/src/cli/commands/reconcile.js +56 -0
- package/dist/src/cli/commands/reconcile.js.map +1 -0
- package/dist/src/cli/commands/reindex.d.ts +3 -0
- package/dist/src/cli/commands/reindex.d.ts.map +1 -0
- package/dist/src/cli/commands/reindex.js +66 -0
- package/dist/src/cli/commands/reindex.js.map +1 -0
- package/dist/src/cli/commands/review.d.ts +13 -0
- package/dist/src/cli/commands/review.d.ts.map +1 -0
- package/dist/src/cli/commands/review.js +383 -0
- package/dist/src/cli/commands/review.js.map +1 -0
- package/dist/src/cli/commands/save.d.ts +3 -0
- package/dist/src/cli/commands/save.d.ts.map +1 -0
- package/dist/src/cli/commands/save.js +111 -0
- package/dist/src/cli/commands/save.js.map +1 -0
- package/dist/src/cli/commands/search.d.ts +35 -0
- package/dist/src/cli/commands/search.d.ts.map +1 -0
- package/dist/src/cli/commands/search.js +88 -0
- package/dist/src/cli/commands/search.js.map +1 -0
- package/dist/src/cli/commands/stats.d.ts +3 -0
- package/dist/src/cli/commands/stats.d.ts.map +1 -0
- package/dist/src/cli/commands/stats.js +42 -0
- package/dist/src/cli/commands/stats.js.map +1 -0
- package/dist/src/cli/commands/status.d.ts +15 -0
- package/dist/src/cli/commands/status.d.ts.map +1 -0
- package/dist/src/cli/commands/status.js +89 -0
- package/dist/src/cli/commands/status.js.map +1 -0
- package/dist/src/cli/commands/token-issue.d.ts +3 -0
- package/dist/src/cli/commands/token-issue.d.ts.map +1 -0
- package/dist/src/cli/commands/token-issue.js +25 -0
- package/dist/src/cli/commands/token-issue.js.map +1 -0
- package/dist/src/cli/commands/triage.d.ts +3 -0
- package/dist/src/cli/commands/triage.d.ts.map +1 -0
- package/dist/src/cli/commands/triage.js +125 -0
- package/dist/src/cli/commands/triage.js.map +1 -0
- package/dist/src/cli/commands/uninstall.d.ts +3 -0
- package/dist/src/cli/commands/uninstall.d.ts.map +1 -0
- package/dist/src/cli/commands/uninstall.js +238 -0
- package/dist/src/cli/commands/uninstall.js.map +1 -0
- package/dist/src/cli/feedback/feedback-config.d.ts +21 -0
- package/dist/src/cli/feedback/feedback-config.d.ts.map +1 -0
- package/dist/src/cli/feedback/feedback-config.js +43 -0
- package/dist/src/cli/feedback/feedback-config.js.map +1 -0
- package/dist/src/cli/feedback/feedback-history.d.ts +4 -0
- package/dist/src/cli/feedback/feedback-history.d.ts.map +1 -0
- package/dist/src/cli/feedback/feedback-history.js +115 -0
- package/dist/src/cli/feedback/feedback-history.js.map +1 -0
- package/dist/src/cli/feedback/feedback-payload.d.ts +53 -0
- package/dist/src/cli/feedback/feedback-payload.d.ts.map +1 -0
- package/dist/src/cli/feedback/feedback-payload.js +10 -0
- package/dist/src/cli/feedback/feedback-payload.js.map +1 -0
- package/dist/src/cli/feedback/feedback-relay.d.ts +15 -0
- package/dist/src/cli/feedback/feedback-relay.d.ts.map +1 -0
- package/dist/src/cli/feedback/feedback-relay.js +47 -0
- package/dist/src/cli/feedback/feedback-relay.js.map +1 -0
- package/dist/src/cli/feedback/feedback-status.d.ts +11 -0
- package/dist/src/cli/feedback/feedback-status.d.ts.map +1 -0
- package/dist/src/cli/feedback/feedback-status.js +122 -0
- package/dist/src/cli/feedback/feedback-status.js.map +1 -0
- package/dist/src/cli/http-client.d.ts +36 -0
- package/dist/src/cli/http-client.d.ts.map +1 -0
- package/dist/src/cli/http-client.js +153 -0
- package/dist/src/cli/http-client.js.map +1 -0
- package/dist/src/cli/index.d.ts +4 -0
- package/dist/src/cli/index.d.ts.map +1 -0
- package/dist/src/cli/index.js +66 -0
- package/dist/src/cli/index.js.map +1 -0
- package/dist/src/cli/job-poller.d.ts +13 -0
- package/dist/src/cli/job-poller.d.ts.map +1 -0
- package/dist/src/cli/job-poller.js +29 -0
- package/dist/src/cli/job-poller.js.map +1 -0
- package/dist/src/cli/mcp-config-writers/codex-toml.d.ts +10 -0
- package/dist/src/cli/mcp-config-writers/codex-toml.d.ts.map +1 -0
- package/dist/src/cli/mcp-config-writers/codex-toml.js +410 -0
- package/dist/src/cli/mcp-config-writers/codex-toml.js.map +1 -0
- package/dist/src/cli/mcp-config-writers/errors.d.ts +17 -0
- package/dist/src/cli/mcp-config-writers/errors.d.ts.map +1 -0
- package/dist/src/cli/mcp-config-writers/errors.js +13 -0
- package/dist/src/cli/mcp-config-writers/errors.js.map +1 -0
- package/dist/src/cli/mcp-config-writers/index.d.ts +18 -0
- package/dist/src/cli/mcp-config-writers/index.d.ts.map +1 -0
- package/dist/src/cli/mcp-config-writers/index.js +49 -0
- package/dist/src/cli/mcp-config-writers/index.js.map +1 -0
- package/dist/src/cli/mcp-config-writers/json-config.d.ts +12 -0
- package/dist/src/cli/mcp-config-writers/json-config.d.ts.map +1 -0
- package/dist/src/cli/mcp-config-writers/json-config.js +177 -0
- package/dist/src/cli/mcp-config-writers/json-config.js.map +1 -0
- package/dist/src/cli/mcp-config-writers/redact.d.ts +28 -0
- package/dist/src/cli/mcp-config-writers/redact.d.ts.map +1 -0
- package/dist/src/cli/mcp-config-writers/redact.js +48 -0
- package/dist/src/cli/mcp-config-writers/redact.js.map +1 -0
- package/dist/src/cli/mcp-config-writers/types.d.ts +32 -0
- package/dist/src/cli/mcp-config-writers/types.d.ts.map +1 -0
- package/dist/src/cli/mcp-config-writers/types.js +5 -0
- package/dist/src/cli/mcp-config-writers/types.js.map +1 -0
- package/dist/src/cli/output.d.ts +8 -0
- package/dist/src/cli/output.d.ts.map +1 -0
- package/dist/src/cli/output.js +34 -0
- package/dist/src/cli/output.js.map +1 -0
- package/dist/src/cli/status/friend-header.d.ts +33 -0
- package/dist/src/cli/status/friend-header.d.ts.map +1 -0
- package/dist/src/cli/status/friend-header.js +108 -0
- package/dist/src/cli/status/friend-header.js.map +1 -0
- package/dist/src/cli/status/local-signals.d.ts +14 -0
- package/dist/src/cli/status/local-signals.d.ts.map +1 -0
- package/dist/src/cli/status/local-signals.js +73 -0
- package/dist/src/cli/status/local-signals.js.map +1 -0
- package/dist/src/cli/token.d.ts +37 -0
- package/dist/src/cli/token.d.ts.map +1 -0
- package/dist/src/cli/token.js +105 -0
- package/dist/src/cli/token.js.map +1 -0
- package/dist/src/cli/uninstall/mcp-uninstall.d.ts +33 -0
- package/dist/src/cli/uninstall/mcp-uninstall.d.ts.map +1 -0
- package/dist/src/cli/uninstall/mcp-uninstall.js +181 -0
- package/dist/src/cli/uninstall/mcp-uninstall.js.map +1 -0
- package/dist/src/config/loader.d.ts +9 -0
- package/dist/src/config/loader.d.ts.map +1 -0
- package/dist/src/config/loader.js +73 -0
- package/dist/src/config/loader.js.map +1 -0
- package/dist/src/config/schema.d.ts +635 -0
- package/dist/src/config/schema.d.ts.map +1 -0
- package/dist/src/config/schema.js +208 -0
- package/dist/src/config/schema.js.map +1 -0
- package/dist/src/ingestion/bulk-ingest.d.ts +11 -0
- package/dist/src/ingestion/bulk-ingest.d.ts.map +1 -0
- package/dist/src/ingestion/bulk-ingest.js +11 -0
- package/dist/src/ingestion/bulk-ingest.js.map +1 -0
- package/dist/src/ingestion/extractor.d.ts +16 -0
- package/dist/src/ingestion/extractor.d.ts.map +1 -0
- package/dist/src/ingestion/extractor.js +85 -0
- package/dist/src/ingestion/extractor.js.map +1 -0
- package/dist/src/ingestion/extractors/docx.d.ts +3 -0
- package/dist/src/ingestion/extractors/docx.d.ts.map +1 -0
- package/dist/src/ingestion/extractors/docx.js +20 -0
- package/dist/src/ingestion/extractors/docx.js.map +1 -0
- package/dist/src/ingestion/extractors/pdf.d.ts +3 -0
- package/dist/src/ingestion/extractors/pdf.d.ts.map +1 -0
- package/dist/src/ingestion/extractors/pdf.js +32 -0
- package/dist/src/ingestion/extractors/pdf.js.map +1 -0
- package/dist/src/ingestion/historical-campaign.d.ts +340 -0
- package/dist/src/ingestion/historical-campaign.d.ts.map +1 -0
- package/dist/src/ingestion/historical-campaign.js +1010 -0
- package/dist/src/ingestion/historical-campaign.js.map +1 -0
- package/dist/src/ingestion/ignored-paths.d.ts +20 -0
- package/dist/src/ingestion/ignored-paths.d.ts.map +1 -0
- package/dist/src/ingestion/ignored-paths.js +45 -0
- package/dist/src/ingestion/ignored-paths.js.map +1 -0
- package/dist/src/ingestion/inbox-watcher.d.ts +12 -0
- package/dist/src/ingestion/inbox-watcher.d.ts.map +1 -0
- package/dist/src/ingestion/inbox-watcher.js +99 -0
- package/dist/src/ingestion/inbox-watcher.js.map +1 -0
- package/dist/src/ingestion/indexer.d.ts +32 -0
- package/dist/src/ingestion/indexer.d.ts.map +1 -0
- package/dist/src/ingestion/indexer.js +68 -0
- package/dist/src/ingestion/indexer.js.map +1 -0
- package/dist/src/ingestion/metadata-extraction.d.ts +53 -0
- package/dist/src/ingestion/metadata-extraction.d.ts.map +1 -0
- package/dist/src/ingestion/metadata-extraction.js +132 -0
- package/dist/src/ingestion/metadata-extraction.js.map +1 -0
- package/dist/src/ingestion/parsers/chatgpt-web.d.ts +29 -0
- package/dist/src/ingestion/parsers/chatgpt-web.d.ts.map +1 -0
- package/dist/src/ingestion/parsers/chatgpt-web.js +100 -0
- package/dist/src/ingestion/parsers/chatgpt-web.js.map +1 -0
- package/dist/src/ingestion/parsers/claude-code-jsonl.d.ts +16 -0
- package/dist/src/ingestion/parsers/claude-code-jsonl.d.ts.map +1 -0
- package/dist/src/ingestion/parsers/claude-code-jsonl.js +123 -0
- package/dist/src/ingestion/parsers/claude-code-jsonl.js.map +1 -0
- package/dist/src/ingestion/parsers/claude-web.d.ts +24 -0
- package/dist/src/ingestion/parsers/claude-web.d.ts.map +1 -0
- package/dist/src/ingestion/parsers/claude-web.js +78 -0
- package/dist/src/ingestion/parsers/claude-web.js.map +1 -0
- package/dist/src/ingestion/parsers/codex-jsonl.d.ts +18 -0
- package/dist/src/ingestion/parsers/codex-jsonl.d.ts.map +1 -0
- package/dist/src/ingestion/parsers/codex-jsonl.js +125 -0
- package/dist/src/ingestion/parsers/codex-jsonl.js.map +1 -0
- package/dist/src/ingestion/parsers/gemini-web.d.ts +16 -0
- package/dist/src/ingestion/parsers/gemini-web.d.ts.map +1 -0
- package/dist/src/ingestion/parsers/gemini-web.js +170 -0
- package/dist/src/ingestion/parsers/gemini-web.js.map +1 -0
- package/dist/src/ingestion/parsers/grok-web.d.ts +40 -0
- package/dist/src/ingestion/parsers/grok-web.d.ts.map +1 -0
- package/dist/src/ingestion/parsers/grok-web.js +67 -0
- package/dist/src/ingestion/parsers/grok-web.js.map +1 -0
- package/dist/src/ingestion/parsers/types.d.ts +34 -0
- package/dist/src/ingestion/parsers/types.d.ts.map +1 -0
- package/dist/src/ingestion/parsers/types.js +26 -0
- package/dist/src/ingestion/parsers/types.js.map +1 -0
- package/dist/src/ingestion/scanner.d.ts +48 -0
- package/dist/src/ingestion/scanner.d.ts.map +1 -0
- package/dist/src/ingestion/scanner.js +131 -0
- package/dist/src/ingestion/scanner.js.map +1 -0
- package/dist/src/ingestion/staging.d.ts +109 -0
- package/dist/src/ingestion/staging.d.ts.map +1 -0
- package/dist/src/ingestion/staging.js +411 -0
- package/dist/src/ingestion/staging.js.map +1 -0
- package/dist/src/ingestion/watcher.d.ts +65 -0
- package/dist/src/ingestion/watcher.d.ts.map +1 -0
- package/dist/src/ingestion/watcher.js +182 -0
- package/dist/src/ingestion/watcher.js.map +1 -0
- package/dist/src/jobs/codex-override-handler.d.ts +3 -0
- package/dist/src/jobs/codex-override-handler.d.ts.map +1 -0
- package/dist/src/jobs/codex-override-handler.js +16 -0
- package/dist/src/jobs/codex-override-handler.js.map +1 -0
- package/dist/src/jobs/handlers/compact.d.ts +30 -0
- package/dist/src/jobs/handlers/compact.d.ts.map +1 -0
- package/dist/src/jobs/handlers/compact.js +329 -0
- package/dist/src/jobs/handlers/compact.js.map +1 -0
- package/dist/src/jobs/handlers/ingest.d.ts +13 -0
- package/dist/src/jobs/handlers/ingest.d.ts.map +1 -0
- package/dist/src/jobs/handlers/ingest.js +255 -0
- package/dist/src/jobs/handlers/ingest.js.map +1 -0
- package/dist/src/jobs/handlers/reconcile.d.ts +29 -0
- package/dist/src/jobs/handlers/reconcile.d.ts.map +1 -0
- package/dist/src/jobs/handlers/reconcile.js +476 -0
- package/dist/src/jobs/handlers/reconcile.js.map +1 -0
- package/dist/src/jobs/handlers/reindex.d.ts +38 -0
- package/dist/src/jobs/handlers/reindex.d.ts.map +1 -0
- package/dist/src/jobs/handlers/reindex.js +52 -0
- package/dist/src/jobs/handlers/reindex.js.map +1 -0
- package/dist/src/jobs/handlers/save.d.ts +10 -0
- package/dist/src/jobs/handlers/save.d.ts.map +1 -0
- package/dist/src/jobs/handlers/save.js +206 -0
- package/dist/src/jobs/handlers/save.js.map +1 -0
- package/dist/src/jobs/handlers/triage.d.ts +47 -0
- package/dist/src/jobs/handlers/triage.d.ts.map +1 -0
- package/dist/src/jobs/handlers/triage.js +95 -0
- package/dist/src/jobs/handlers/triage.js.map +1 -0
- package/dist/src/jobs/queue.d.ts +107 -0
- package/dist/src/jobs/queue.d.ts.map +1 -0
- package/dist/src/jobs/queue.js +319 -0
- package/dist/src/jobs/queue.js.map +1 -0
- package/dist/src/jobs/types.d.ts +39 -0
- package/dist/src/jobs/types.d.ts.map +1 -0
- package/dist/src/jobs/types.js +29 -0
- package/dist/src/jobs/types.js.map +1 -0
- package/dist/src/jobs/worker-entry.d.ts +10 -0
- package/dist/src/jobs/worker-entry.d.ts.map +1 -0
- package/dist/src/jobs/worker-entry.js +210 -0
- package/dist/src/jobs/worker-entry.js.map +1 -0
- package/dist/src/jobs/worker-process.d.ts +50 -0
- package/dist/src/jobs/worker-process.d.ts.map +1 -0
- package/dist/src/jobs/worker-process.js +186 -0
- package/dist/src/jobs/worker-process.js.map +1 -0
- package/dist/src/jobs/worker.d.ts +11 -0
- package/dist/src/jobs/worker.d.ts.map +1 -0
- package/dist/src/jobs/worker.js +14 -0
- package/dist/src/jobs/worker.js.map +1 -0
- package/dist/src/main.d.ts +2 -0
- package/dist/src/main.d.ts.map +1 -0
- package/dist/src/main.js +425 -0
- package/dist/src/main.js.map +1 -0
- package/dist/src/mcp/errors.d.ts +8 -0
- package/dist/src/mcp/errors.d.ts.map +1 -0
- package/dist/src/mcp/errors.js +50 -0
- package/dist/src/mcp/errors.js.map +1 -0
- package/dist/src/mcp/server.d.ts +10 -0
- package/dist/src/mcp/server.d.ts.map +1 -0
- package/dist/src/mcp/server.js +94 -0
- package/dist/src/mcp/server.js.map +1 -0
- package/dist/src/mcp/tools/context-pack.d.ts +35 -0
- package/dist/src/mcp/tools/context-pack.d.ts.map +1 -0
- package/dist/src/mcp/tools/context-pack.js +97 -0
- package/dist/src/mcp/tools/context-pack.js.map +1 -0
- package/dist/src/mcp/tools/conversations-search.d.ts +38 -0
- package/dist/src/mcp/tools/conversations-search.d.ts.map +1 -0
- package/dist/src/mcp/tools/conversations-search.js +73 -0
- package/dist/src/mcp/tools/conversations-search.js.map +1 -0
- package/dist/src/mcp/tools/save.d.ts +32 -0
- package/dist/src/mcp/tools/save.d.ts.map +1 -0
- package/dist/src/mcp/tools/save.js +60 -0
- package/dist/src/mcp/tools/save.js.map +1 -0
- package/dist/src/mcp/tools/search.d.ts +33 -0
- package/dist/src/mcp/tools/search.d.ts.map +1 -0
- package/dist/src/mcp/tools/search.js +58 -0
- package/dist/src/mcp/tools/search.js.map +1 -0
- package/dist/src/mcp/tools/status.d.ts +17 -0
- package/dist/src/mcp/tools/status.d.ts.map +1 -0
- package/dist/src/mcp/tools/status.js +12 -0
- package/dist/src/mcp/tools/status.js.map +1 -0
- package/dist/src/observability/coverage.d.ts +100 -0
- package/dist/src/observability/coverage.d.ts.map +1 -0
- package/dist/src/observability/coverage.js +180 -0
- package/dist/src/observability/coverage.js.map +1 -0
- package/dist/src/observability/rift-context.d.ts +47 -0
- package/dist/src/observability/rift-context.d.ts.map +1 -0
- package/dist/src/observability/rift-context.js +118 -0
- package/dist/src/observability/rift-context.js.map +1 -0
- package/dist/src/observability/staleness.d.ts +43 -0
- package/dist/src/observability/staleness.d.ts.map +1 -0
- package/dist/src/observability/staleness.js +74 -0
- package/dist/src/observability/staleness.js.map +1 -0
- package/dist/src/observability/tool-usage-stats.d.ts +23 -0
- package/dist/src/observability/tool-usage-stats.d.ts.map +1 -0
- package/dist/src/observability/tool-usage-stats.js +83 -0
- package/dist/src/observability/tool-usage-stats.js.map +1 -0
- package/dist/src/observability/tool-usage.d.ts +68 -0
- package/dist/src/observability/tool-usage.d.ts.map +1 -0
- package/dist/src/observability/tool-usage.js +207 -0
- package/dist/src/observability/tool-usage.js.map +1 -0
- package/dist/src/onboarding/daemon-control.d.ts +33 -0
- package/dist/src/onboarding/daemon-control.d.ts.map +1 -0
- package/dist/src/onboarding/daemon-control.js +92 -0
- package/dist/src/onboarding/daemon-control.js.map +1 -0
- package/dist/src/onboarding/env-file.d.ts +18 -0
- package/dist/src/onboarding/env-file.d.ts.map +1 -0
- package/dist/src/onboarding/env-file.js +89 -0
- package/dist/src/onboarding/env-file.js.map +1 -0
- package/dist/src/onboarding/voyage-validate.d.ts +16 -0
- package/dist/src/onboarding/voyage-validate.d.ts.map +1 -0
- package/dist/src/onboarding/voyage-validate.js +85 -0
- package/dist/src/onboarding/voyage-validate.js.map +1 -0
- package/dist/src/providers/anthropic-digest.d.ts +23 -0
- package/dist/src/providers/anthropic-digest.d.ts.map +1 -0
- package/dist/src/providers/anthropic-digest.js +91 -0
- package/dist/src/providers/anthropic-digest.js.map +1 -0
- package/dist/src/providers/codex-cli-digest.d.ts +12 -0
- package/dist/src/providers/codex-cli-digest.d.ts.map +1 -0
- package/dist/src/providers/codex-cli-digest.js +70 -0
- package/dist/src/providers/codex-cli-digest.js.map +1 -0
- package/dist/src/providers/codex-cli-metadata-extraction.d.ts +14 -0
- package/dist/src/providers/codex-cli-metadata-extraction.d.ts.map +1 -0
- package/dist/src/providers/codex-cli-metadata-extraction.js +101 -0
- package/dist/src/providers/codex-cli-metadata-extraction.js.map +1 -0
- package/dist/src/providers/codex-cli-runner.d.ts +14 -0
- package/dist/src/providers/codex-cli-runner.d.ts.map +1 -0
- package/dist/src/providers/codex-cli-runner.js +272 -0
- package/dist/src/providers/codex-cli-runner.js.map +1 -0
- package/dist/src/providers/conversation-generation.d.ts +10 -0
- package/dist/src/providers/conversation-generation.d.ts.map +1 -0
- package/dist/src/providers/conversation-generation.js +54 -0
- package/dist/src/providers/conversation-generation.js.map +1 -0
- package/dist/src/providers/ollama-embed.d.ts +22 -0
- package/dist/src/providers/ollama-embed.d.ts.map +1 -0
- package/dist/src/providers/ollama-embed.js +133 -0
- package/dist/src/providers/ollama-embed.js.map +1 -0
- package/dist/src/providers/ollama.d.ts +42 -0
- package/dist/src/providers/ollama.d.ts.map +1 -0
- package/dist/src/providers/ollama.js +169 -0
- package/dist/src/providers/ollama.js.map +1 -0
- package/dist/src/providers/openai-metadata-extraction.d.ts +73 -0
- package/dist/src/providers/openai-metadata-extraction.d.ts.map +1 -0
- package/dist/src/providers/openai-metadata-extraction.js +161 -0
- package/dist/src/providers/openai-metadata-extraction.js.map +1 -0
- package/dist/src/providers/operator-overrides.d.ts +24 -0
- package/dist/src/providers/operator-overrides.d.ts.map +1 -0
- package/dist/src/providers/operator-overrides.js +84 -0
- package/dist/src/providers/operator-overrides.js.map +1 -0
- package/dist/src/providers/stub.d.ts +17 -0
- package/dist/src/providers/stub.d.ts.map +1 -0
- package/dist/src/providers/stub.js +72 -0
- package/dist/src/providers/stub.js.map +1 -0
- package/dist/src/providers/types.d.ts +82 -0
- package/dist/src/providers/types.d.ts.map +1 -0
- package/dist/src/providers/types.js +52 -0
- package/dist/src/providers/types.js.map +1 -0
- package/dist/src/providers/voyage.d.ts +23 -0
- package/dist/src/providers/voyage.d.ts.map +1 -0
- package/dist/src/providers/voyage.js +135 -0
- package/dist/src/providers/voyage.js.map +1 -0
- package/dist/src/retrieval/compact.d.ts +89 -0
- package/dist/src/retrieval/compact.d.ts.map +1 -0
- package/dist/src/retrieval/compact.js +348 -0
- package/dist/src/retrieval/compact.js.map +1 -0
- package/dist/src/retrieval/context-pack.d.ts +123 -0
- package/dist/src/retrieval/context-pack.d.ts.map +1 -0
- package/dist/src/retrieval/context-pack.js +553 -0
- package/dist/src/retrieval/context-pack.js.map +1 -0
- package/dist/src/retrieval/cwd.d.ts +25 -0
- package/dist/src/retrieval/cwd.d.ts.map +1 -0
- package/dist/src/retrieval/cwd.js +48 -0
- package/dist/src/retrieval/cwd.js.map +1 -0
- package/dist/src/retrieval/degraded.d.ts +20 -0
- package/dist/src/retrieval/degraded.d.ts.map +1 -0
- package/dist/src/retrieval/degraded.js +43 -0
- package/dist/src/retrieval/degraded.js.map +1 -0
- package/dist/src/retrieval/hybrid.d.ts +38 -0
- package/dist/src/retrieval/hybrid.d.ts.map +1 -0
- package/dist/src/retrieval/hybrid.js +82 -0
- package/dist/src/retrieval/hybrid.js.map +1 -0
- package/dist/src/retrieval/lexical.d.ts +28 -0
- package/dist/src/retrieval/lexical.d.ts.map +1 -0
- package/dist/src/retrieval/lexical.js +301 -0
- package/dist/src/retrieval/lexical.js.map +1 -0
- package/dist/src/retrieval/post-filter.d.ts +32 -0
- package/dist/src/retrieval/post-filter.d.ts.map +1 -0
- package/dist/src/retrieval/post-filter.js +57 -0
- package/dist/src/retrieval/post-filter.js.map +1 -0
- package/dist/src/retrieval/reranker.d.ts +72 -0
- package/dist/src/retrieval/reranker.d.ts.map +1 -0
- package/dist/src/retrieval/reranker.js +129 -0
- package/dist/src/retrieval/reranker.js.map +1 -0
- package/dist/src/retrieval/vector.d.ts +47 -0
- package/dist/src/retrieval/vector.d.ts.map +1 -0
- package/dist/src/retrieval/vector.js +112 -0
- package/dist/src/retrieval/vector.js.map +1 -0
- package/dist/src/runtime/legacy-migration.d.ts +27 -0
- package/dist/src/runtime/legacy-migration.d.ts.map +1 -0
- package/dist/src/runtime/legacy-migration.js +140 -0
- package/dist/src/runtime/legacy-migration.js.map +1 -0
- package/dist/src/runtime/legacy-name-guard.d.ts +35 -0
- package/dist/src/runtime/legacy-name-guard.d.ts.map +1 -0
- package/dist/src/runtime/legacy-name-guard.js +58 -0
- package/dist/src/runtime/legacy-name-guard.js.map +1 -0
- package/dist/src/runtime/rift-env.d.ts +14 -0
- package/dist/src/runtime/rift-env.d.ts.map +1 -0
- package/dist/src/runtime/rift-env.js +79 -0
- package/dist/src/runtime/rift-env.js.map +1 -0
- package/dist/src/runtime/watcher-startup.d.ts +2 -0
- package/dist/src/runtime/watcher-startup.d.ts.map +1 -0
- package/dist/src/runtime/watcher-startup.js +4 -0
- package/dist/src/runtime/watcher-startup.js.map +1 -0
- package/dist/src/security/archive.d.ts +23 -0
- package/dist/src/security/archive.d.ts.map +1 -0
- package/dist/src/security/archive.js +163 -0
- package/dist/src/security/archive.js.map +1 -0
- package/dist/src/security/paths.d.ts +21 -0
- package/dist/src/security/paths.d.ts.map +1 -0
- package/dist/src/security/paths.js +67 -0
- package/dist/src/security/paths.js.map +1 -0
- package/dist/src/server/app.d.ts +29 -0
- package/dist/src/server/app.d.ts.map +1 -0
- package/dist/src/server/app.js +226 -0
- package/dist/src/server/app.js.map +1 -0
- package/dist/src/server/build-info.d.ts +8 -0
- package/dist/src/server/build-info.d.ts.map +1 -0
- package/dist/src/server/build-info.js +61 -0
- package/dist/src/server/build-info.js.map +1 -0
- package/dist/src/server/lifecycle.d.ts +30 -0
- package/dist/src/server/lifecycle.d.ts.map +1 -0
- package/dist/src/server/lifecycle.js +59 -0
- package/dist/src/server/lifecycle.js.map +1 -0
- package/dist/src/server/middleware/multipart.d.ts +51 -0
- package/dist/src/server/middleware/multipart.d.ts.map +1 -0
- package/dist/src/server/middleware/multipart.js +86 -0
- package/dist/src/server/middleware/multipart.js.map +1 -0
- package/dist/src/server/routes/compact.d.ts +37 -0
- package/dist/src/server/routes/compact.d.ts.map +1 -0
- package/dist/src/server/routes/compact.js +77 -0
- package/dist/src/server/routes/compact.js.map +1 -0
- package/dist/src/server/routes/context.d.ts +5 -0
- package/dist/src/server/routes/context.d.ts.map +1 -0
- package/dist/src/server/routes/context.js +50 -0
- package/dist/src/server/routes/context.js.map +1 -0
- package/dist/src/server/routes/conversations-search.d.ts +4 -0
- package/dist/src/server/routes/conversations-search.d.ts.map +1 -0
- package/dist/src/server/routes/conversations-search.js +243 -0
- package/dist/src/server/routes/conversations-search.js.map +1 -0
- package/dist/src/server/routes/friend-status.d.ts +72 -0
- package/dist/src/server/routes/friend-status.d.ts.map +1 -0
- package/dist/src/server/routes/friend-status.js +71 -0
- package/dist/src/server/routes/friend-status.js.map +1 -0
- package/dist/src/server/routes/ingest.d.ts +15 -0
- package/dist/src/server/routes/ingest.d.ts.map +1 -0
- package/dist/src/server/routes/ingest.js +139 -0
- package/dist/src/server/routes/ingest.js.map +1 -0
- package/dist/src/server/routes/jobs.d.ts +10 -0
- package/dist/src/server/routes/jobs.d.ts.map +1 -0
- package/dist/src/server/routes/jobs.js +29 -0
- package/dist/src/server/routes/jobs.js.map +1 -0
- package/dist/src/server/routes/mcp-usage.d.ts +13 -0
- package/dist/src/server/routes/mcp-usage.d.ts.map +1 -0
- package/dist/src/server/routes/mcp-usage.js +17 -0
- package/dist/src/server/routes/mcp-usage.js.map +1 -0
- package/dist/src/server/routes/reconcile.d.ts +4 -0
- package/dist/src/server/routes/reconcile.d.ts.map +1 -0
- package/dist/src/server/routes/reconcile.js +43 -0
- package/dist/src/server/routes/reconcile.js.map +1 -0
- package/dist/src/server/routes/reindex.d.ts +4 -0
- package/dist/src/server/routes/reindex.d.ts.map +1 -0
- package/dist/src/server/routes/reindex.js +74 -0
- package/dist/src/server/routes/reindex.js.map +1 -0
- package/dist/src/server/routes/save.d.ts +40 -0
- package/dist/src/server/routes/save.d.ts.map +1 -0
- package/dist/src/server/routes/save.js +112 -0
- package/dist/src/server/routes/save.js.map +1 -0
- package/dist/src/server/routes/search.d.ts +5 -0
- package/dist/src/server/routes/search.d.ts.map +1 -0
- package/dist/src/server/routes/search.js +400 -0
- package/dist/src/server/routes/search.js.map +1 -0
- package/dist/src/server/routes/stats.d.ts +10 -0
- package/dist/src/server/routes/stats.d.ts.map +1 -0
- package/dist/src/server/routes/stats.js +15 -0
- package/dist/src/server/routes/stats.js.map +1 -0
- package/dist/src/server/routes/status.d.ts +20 -0
- package/dist/src/server/routes/status.d.ts.map +1 -0
- package/dist/src/server/routes/status.js +31 -0
- package/dist/src/server/routes/status.js.map +1 -0
- package/dist/src/server/routes/triage.d.ts +4 -0
- package/dist/src/server/routes/triage.d.ts.map +1 -0
- package/dist/src/server/routes/triage.js +94 -0
- package/dist/src/server/routes/triage.js.map +1 -0
- package/dist/src/server/save-quality.d.ts +21 -0
- package/dist/src/server/save-quality.d.ts.map +1 -0
- package/dist/src/server/save-quality.js +51 -0
- package/dist/src/server/save-quality.js.map +1 -0
- package/dist/src/storage/atomic.d.ts +8 -0
- package/dist/src/storage/atomic.d.ts.map +1 -0
- package/dist/src/storage/atomic.js +22 -0
- package/dist/src/storage/atomic.js.map +1 -0
- package/dist/src/storage/db.d.ts +15 -0
- package/dist/src/storage/db.d.ts.map +1 -0
- package/dist/src/storage/db.js +43 -0
- package/dist/src/storage/db.js.map +1 -0
- package/dist/src/storage/integrity.d.ts +11 -0
- package/dist/src/storage/integrity.d.ts.map +1 -0
- package/dist/src/storage/integrity.js +66 -0
- package/dist/src/storage/integrity.js.map +1 -0
- package/dist/src/storage/rebuild.d.ts +37 -0
- package/dist/src/storage/rebuild.d.ts.map +1 -0
- package/dist/src/storage/rebuild.js +353 -0
- package/dist/src/storage/rebuild.js.map +1 -0
- package/dist/src/storage/shadow-swap.d.ts +20 -0
- package/dist/src/storage/shadow-swap.d.ts.map +1 -0
- package/dist/src/storage/shadow-swap.js +163 -0
- package/dist/src/storage/shadow-swap.js.map +1 -0
- package/dist/src/storage/tables.d.ts +77 -0
- package/dist/src/storage/tables.d.ts.map +1 -0
- package/dist/src/storage/tables.js +196 -0
- package/dist/src/storage/tables.js.map +1 -0
- package/package.json +45 -14
- package/index.js +0 -3
|
@@ -0,0 +1,1376 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* backfill CLI command — staged historical import.
|
|
3
|
+
*
|
|
4
|
+
* Imports web conversation exports from a dedicated staging area.
|
|
5
|
+
* The founder moves/copies exports into batch folders and runs:
|
|
6
|
+
*
|
|
7
|
+
* rift backfill --batch data/imports/exports-batch-1 --source chatgpt_web
|
|
8
|
+
*
|
|
9
|
+
* Slice 4 introduces a preflight-first workflow. Every run:
|
|
10
|
+
* - refreshes `manifest.json` (catalog of batch files)
|
|
11
|
+
* - refreshes `preflight.json` (junk + duplicate classification)
|
|
12
|
+
*
|
|
13
|
+
* The default operator flow is:
|
|
14
|
+
* 1. rift backfill --batch ... --source ... --dry-run (preflight only)
|
|
15
|
+
* 2. inspect manifest.json / preflight.json
|
|
16
|
+
* 3. rift backfill --batch ... --source ... (ingest)
|
|
17
|
+
*
|
|
18
|
+
* Reuses the existing POST /ingest pipeline (archive security,
|
|
19
|
+
* parsers, extraction, quarantine). Emits a per-batch report.
|
|
20
|
+
*
|
|
21
|
+
* Batch rerun is idempotent — the ingest pipeline's idempotency_key
|
|
22
|
+
* prevents duplicate conversations.
|
|
23
|
+
*/
|
|
24
|
+
import fs from "node:fs";
|
|
25
|
+
import path from "node:path";
|
|
26
|
+
import crypto from "node:crypto";
|
|
27
|
+
import { Command } from "commander";
|
|
28
|
+
import { loadConfig } from "../../config/loader.js";
|
|
29
|
+
import { createHttpClient, readToken, resolveBaseUrl, CliError, } from "../http-client.js";
|
|
30
|
+
import { pollJob } from "../job-poller.js";
|
|
31
|
+
import { isJobFailure } from "../output.js";
|
|
32
|
+
import { isSourceSupported, INGEST_SOURCES, SUPPORTED_INGEST_SOURCES, } from "../../ingestion/parsers/types.js";
|
|
33
|
+
import { stageBatch, SOURCE_EXTENSIONS, } from "../../ingestion/staging.js";
|
|
34
|
+
import { extractArchive } from "../../security/archive.js";
|
|
35
|
+
import { parseChatGPTWeb } from "../../ingestion/parsers/chatgpt-web.js";
|
|
36
|
+
import { parseClaudeWeb } from "../../ingestion/parsers/claude-web.js";
|
|
37
|
+
import { parseGeminiWeb } from "../../ingestion/parsers/gemini-web.js";
|
|
38
|
+
import { parseGrokWeb } from "../../ingestion/parsers/grok-web.js";
|
|
39
|
+
import { addToReview, reviewQueueSize } from "../../capture/review-queue.js";
|
|
40
|
+
import { loadHistoricalCampaignState, writeHistoricalCampaignState, ensureHistoricalCampaignBatch, startHistoricalCampaignBatch, listTerminalHistoricalItemIds, listTerminalHistoricalFiles, isFileQuarantineCandidate, collectHistoricalBatchQuarantineReasons, markHistoricalFile, markHistoricalItem, finalizeHistoricalFiles, addHistoricalRunBudgetUsage, evaluateHistoricalBatch, getHistoricalCampaignBudgetWindow, recomputeHistoricalCampaignSummary, historicalCampaignReportPath, summarizeHistoricalBatchState, snapshotHistoricalCampaignReport, snapshotHistoricalNoOpReport, writeHistoricalCampaignReport, formatHistoricalCampaignSummary, reportHistoricalCampaignBlockers, } from "../../ingestion/historical-campaign.js";
|
|
41
|
+
import { HISTORICAL_BACKFILL_CAPABILITY_HEADER, INTERNAL_HISTORICAL_INGEST_CODEX_ROUTE, INTERNAL_HISTORICAL_TRIAGE_CODEX_ROUTE, effectiveTriageProviderName, isOperatorOverrideProvider, readHistoricalBackfillCapability, triageMeteringModeForProvider, triageMeteringModeForProviderName, } from "../../providers/operator-overrides.js";
|
|
42
|
+
const HISTORICAL_PARSERS = {
|
|
43
|
+
chatgpt_web: parseChatGPTWeb,
|
|
44
|
+
claude_web: parseClaudeWeb,
|
|
45
|
+
gemini_web: parseGeminiWeb,
|
|
46
|
+
grok_web: parseGrokWeb,
|
|
47
|
+
};
|
|
48
|
+
export function makeBackfillCommand() {
|
|
49
|
+
return new Command("backfill")
|
|
50
|
+
.description("Stage and import web conversation exports from a batch folder")
|
|
51
|
+
.requiredOption("--batch <path>", "Path to batch folder (e.g., data/imports/exports-batch-1)")
|
|
52
|
+
.requiredOption("--source <source>", "Source type (currently supported: chatgpt_web, claude_web, gemini_web, grok_web)")
|
|
53
|
+
.option("--dry-run", "Preflight only — refresh manifest/preflight and exit without ingesting")
|
|
54
|
+
.option("--force-unstaged", "Ingest files even if they are flagged as junk in preflight")
|
|
55
|
+
.option("--provider <provider>", "Backfill-only override for both triage and extraction (supported: codex-cli)")
|
|
56
|
+
.option("--triage-provider <provider>", "Backfill-only triage override (supported: codex-cli)")
|
|
57
|
+
.option("--extraction-provider <provider>", "Backfill-only extraction override (supported: codex-cli)")
|
|
58
|
+
.option("--limit <count>", "Cap this run to the first N unresolved files for smoke testing", parsePositiveInteger)
|
|
59
|
+
.action(async (opts, cmd) => {
|
|
60
|
+
const globalOpts = cmd.optsWithGlobals();
|
|
61
|
+
try {
|
|
62
|
+
// Validate source
|
|
63
|
+
const allSources = INGEST_SOURCES;
|
|
64
|
+
if (!allSources.includes(opts.source)) {
|
|
65
|
+
process.stderr.write(`Error: Unknown source "${opts.source}". ` +
|
|
66
|
+
`Known sources: ${INGEST_SOURCES.join(", ")}\n`);
|
|
67
|
+
process.exitCode = 1;
|
|
68
|
+
return;
|
|
69
|
+
}
|
|
70
|
+
if (!isSourceSupported(opts.source)) {
|
|
71
|
+
process.stderr.write(`Error: "${opts.source}" is not yet supported — no real export fixture exists to validate the parser against.\n` +
|
|
72
|
+
`To enable: add a real export sample to tests/fixtures/exports/${opts.source}/ and implement the parser.\n` +
|
|
73
|
+
`Currently supported: ${SUPPORTED_INGEST_SOURCES.join(", ")}\n`);
|
|
74
|
+
process.exitCode = 1;
|
|
75
|
+
return;
|
|
76
|
+
}
|
|
77
|
+
// Validate batch directory
|
|
78
|
+
const batchDir = path.resolve(opts.batch);
|
|
79
|
+
if (!fs.existsSync(batchDir) ||
|
|
80
|
+
!fs.statSync(batchDir).isDirectory()) {
|
|
81
|
+
process.stderr.write(`Error: Not a directory: ${batchDir}\n`);
|
|
82
|
+
process.exitCode = 1;
|
|
83
|
+
return;
|
|
84
|
+
}
|
|
85
|
+
const source = opts.source;
|
|
86
|
+
const providerOverride = resolveBackfillProviderOverride(opts.provider, "provider");
|
|
87
|
+
const triageProviderOverride = resolveBackfillProviderOverride(opts.triageProvider ?? providerOverride, "triage-provider");
|
|
88
|
+
const extractionProviderOverride = resolveBackfillProviderOverride(opts.extractionProvider ?? providerOverride, "extraction-provider");
|
|
89
|
+
const requestedTriageMeteringMode = triageMeteringModeForProvider(triageProviderOverride);
|
|
90
|
+
const explicitLimit = typeof opts.limit === "number" && Number.isFinite(opts.limit)
|
|
91
|
+
? opts.limit
|
|
92
|
+
: undefined;
|
|
93
|
+
// --- Stage the batch: manifest + preflight, persisted to disk. ---
|
|
94
|
+
const { manifest, preflight } = stageBatch({ batchDir, source });
|
|
95
|
+
const batchName = manifest.batch_name;
|
|
96
|
+
writePreflightSummary({
|
|
97
|
+
batchDir,
|
|
98
|
+
manifest,
|
|
99
|
+
preflight,
|
|
100
|
+
stderr: process.stderr,
|
|
101
|
+
});
|
|
102
|
+
if (opts.dryRun) {
|
|
103
|
+
if (globalOpts.json) {
|
|
104
|
+
process.stdout.write(JSON.stringify({
|
|
105
|
+
mode: "preflight",
|
|
106
|
+
batch: batchName,
|
|
107
|
+
source,
|
|
108
|
+
manifest_path: path.join(batchDir, "manifest.json"),
|
|
109
|
+
preflight_path: path.join(batchDir, "preflight.json"),
|
|
110
|
+
preflight,
|
|
111
|
+
}, null, 2) + "\n");
|
|
112
|
+
}
|
|
113
|
+
process.stderr.write("\n(dry-run — no changes made beyond manifest.json / preflight.json)\n");
|
|
114
|
+
return;
|
|
115
|
+
}
|
|
116
|
+
const config = loadConfig(globalOpts.config);
|
|
117
|
+
const dataDir = config.data_paths.data_dir;
|
|
118
|
+
const now = new Date();
|
|
119
|
+
const nowIso = now.toISOString();
|
|
120
|
+
const reportPath = historicalCampaignReportPath(batchDir);
|
|
121
|
+
const campaignState = loadHistoricalCampaignState(dataDir);
|
|
122
|
+
const campaignBatch = ensureHistoricalCampaignBatch(campaignState, batchName, source, nowIso);
|
|
123
|
+
const reviewBacklogBefore = reviewQueueSize(dataDir);
|
|
124
|
+
const terminalItemIds = listTerminalHistoricalItemIds(campaignBatch);
|
|
125
|
+
const terminalFilesBefore = listTerminalHistoricalFiles(campaignBatch);
|
|
126
|
+
const rerunnableFiles = preflight.files
|
|
127
|
+
.filter((file) => !terminalFilesBefore.has(file.name))
|
|
128
|
+
.sort((left, right) => left.name.localeCompare(right.name));
|
|
129
|
+
for (const file of rerunnableFiles) {
|
|
130
|
+
ensureHistoricalPendingFile(campaignBatch, file.name, nowIso);
|
|
131
|
+
}
|
|
132
|
+
if (rerunnableFiles.length === 0) {
|
|
133
|
+
const noOpNextAction = "Batch is already terminal. Prepare the next staged batch.";
|
|
134
|
+
const terminalReport = snapshotHistoricalNoOpReport({
|
|
135
|
+
state: campaignState,
|
|
136
|
+
batch: campaignBatch,
|
|
137
|
+
preflight,
|
|
138
|
+
now,
|
|
139
|
+
reviewBacklogNow: reviewBacklogBefore,
|
|
140
|
+
nextAction: noOpNextAction,
|
|
141
|
+
});
|
|
142
|
+
campaignBatch.next_action = noOpNextAction;
|
|
143
|
+
campaignBatch.report_path = reportPath;
|
|
144
|
+
campaignState.last_run_at = nowIso;
|
|
145
|
+
await writeHistoricalCampaignReport(reportPath, terminalReport);
|
|
146
|
+
await writeHistoricalCampaignState(dataDir, campaignState);
|
|
147
|
+
if (globalOpts.json) {
|
|
148
|
+
process.stdout.write(JSON.stringify(terminalReport, null, 2) + "\n");
|
|
149
|
+
}
|
|
150
|
+
else {
|
|
151
|
+
process.stdout.write(formatHistoricalCampaignSummary(terminalReport));
|
|
152
|
+
}
|
|
153
|
+
return;
|
|
154
|
+
}
|
|
155
|
+
const budgetWindow = getHistoricalCampaignBudgetWindow(campaignState, now);
|
|
156
|
+
const runFileCount = Math.min(rerunnableFiles.length, budgetWindow.remaining_daily_files, budgetWindow.remaining_weekly_files, explicitLimit ?? Number.POSITIVE_INFINITY);
|
|
157
|
+
const filesForRun = rerunnableFiles.slice(0, runFileCount);
|
|
158
|
+
const deferredFiles = rerunnableFiles.slice(runFileCount);
|
|
159
|
+
const limitedByOperator = explicitLimit !== undefined &&
|
|
160
|
+
runFileCount === explicitLimit &&
|
|
161
|
+
rerunnableFiles.length > explicitLimit;
|
|
162
|
+
const preflightQuarantined = filesForRun.filter(isFileQuarantineCandidate);
|
|
163
|
+
const preflightQuarantineReasons = collectHistoricalBatchQuarantineReasons({
|
|
164
|
+
preflight,
|
|
165
|
+
failedFilesRate: 0,
|
|
166
|
+
});
|
|
167
|
+
const ingestable = filesForRun.filter((file) => {
|
|
168
|
+
if (preflightQuarantined.some((entry) => entry.name === file.name))
|
|
169
|
+
return false;
|
|
170
|
+
if (!file.supported)
|
|
171
|
+
return false;
|
|
172
|
+
if (file.junk_markers.length === 0)
|
|
173
|
+
return true;
|
|
174
|
+
return Boolean(opts.forceUnstaged);
|
|
175
|
+
});
|
|
176
|
+
const skipped = filesForRun.filter((file) => !ingestable.includes(file) &&
|
|
177
|
+
!preflightQuarantined.some((entry) => entry.name === file.name));
|
|
178
|
+
const triageCandidates = ingestable
|
|
179
|
+
.flatMap((file) => buildHistoricalTriageItem(file, source))
|
|
180
|
+
.filter((item) => !terminalItemIds.has(item.id));
|
|
181
|
+
const triageCandidateById = new Map(triageCandidates.map((item) => [item.id, item]));
|
|
182
|
+
const cachedTriageById = new Map();
|
|
183
|
+
const requestedTriageProviderName = effectiveTriageProviderName(triageProviderOverride);
|
|
184
|
+
for (const candidate of triageCandidates) {
|
|
185
|
+
const existingItem = campaignBatch.items[candidate.id];
|
|
186
|
+
if (existingItem?.terminal || !existingItem?.triage)
|
|
187
|
+
continue;
|
|
188
|
+
if (existingItem.triage.provider !== requestedTriageProviderName) {
|
|
189
|
+
continue;
|
|
190
|
+
}
|
|
191
|
+
cachedTriageById.set(candidate.id, {
|
|
192
|
+
item_id: candidate.id,
|
|
193
|
+
file: candidate.file.name,
|
|
194
|
+
kind: candidate.kind,
|
|
195
|
+
...(candidate.conversationId
|
|
196
|
+
? { conversation_id: candidate.conversationId }
|
|
197
|
+
: {}),
|
|
198
|
+
...(candidate.title ? { title: candidate.title } : {}),
|
|
199
|
+
...(candidate.createdAt ? { created_at: candidate.createdAt } : {}),
|
|
200
|
+
conversation_count: candidate.conversationCount,
|
|
201
|
+
triage: existingItem.triage,
|
|
202
|
+
});
|
|
203
|
+
}
|
|
204
|
+
const triageCandidatesNeedingWork = triageCandidates.filter((candidate) => !cachedTriageById.has(candidate.id));
|
|
205
|
+
const blockers = reportHistoricalCampaignBlockers({
|
|
206
|
+
state: campaignState,
|
|
207
|
+
now,
|
|
208
|
+
reviewBacklogBefore,
|
|
209
|
+
meteringMode: requestedTriageMeteringMode,
|
|
210
|
+
triageBatchCostCapUsd: config.openai.max_cost_per_batch_usd,
|
|
211
|
+
triageCandidates: triageCandidatesNeedingWork.length,
|
|
212
|
+
});
|
|
213
|
+
if (runFileCount === 0 || blockers.length > 0) {
|
|
214
|
+
const nextAction = blockers.length > 0
|
|
215
|
+
? blockers.join("; ")
|
|
216
|
+
: `Daily or weekly file budget is exhausted. Resume the remaining ${rerunnableFiles.length} file(s) in the next budget window.`;
|
|
217
|
+
const blockedStatus = blockers.some((reason) => reason.includes("triage spend") || reason.includes("review backlog"))
|
|
218
|
+
? "failed"
|
|
219
|
+
: "running";
|
|
220
|
+
const previousBatchStatus = campaignBatch.status;
|
|
221
|
+
const preserveTerminalOutcome = previousBatchStatus === "failed" ||
|
|
222
|
+
previousBatchStatus === "quarantined";
|
|
223
|
+
const finalBlockedStatus = preserveTerminalOutcome ? previousBatchStatus : blockedStatus;
|
|
224
|
+
startHistoricalCampaignBatch(campaignBatch, {
|
|
225
|
+
totalFiles: preflight.total_files,
|
|
226
|
+
runFiles: 0,
|
|
227
|
+
triageCandidates: triageCandidates.length,
|
|
228
|
+
reviewBacklogBefore,
|
|
229
|
+
meteringMode: requestedTriageMeteringMode,
|
|
230
|
+
nowIso,
|
|
231
|
+
});
|
|
232
|
+
campaignBatch.last_run_at = nowIso;
|
|
233
|
+
campaignBatch.status = finalBlockedStatus;
|
|
234
|
+
campaignBatch.review_backlog_after = reviewBacklogBefore;
|
|
235
|
+
campaignBatch.next_action = nextAction;
|
|
236
|
+
campaignBatch.report_path = reportPath;
|
|
237
|
+
const blockedReport = snapshotHistoricalCampaignReport({
|
|
238
|
+
state: campaignState,
|
|
239
|
+
batch: campaignBatch,
|
|
240
|
+
preflight,
|
|
241
|
+
now,
|
|
242
|
+
runFileCount: 0,
|
|
243
|
+
reviewBacklogAfter: reviewBacklogBefore,
|
|
244
|
+
triageSpendUsd: 0,
|
|
245
|
+
processedFiles: [],
|
|
246
|
+
status: finalBlockedStatus,
|
|
247
|
+
nextAction,
|
|
248
|
+
results: [],
|
|
249
|
+
});
|
|
250
|
+
recomputeHistoricalCampaignSummary(campaignState);
|
|
251
|
+
campaignState.last_run_at = nowIso;
|
|
252
|
+
await writeHistoricalCampaignReport(reportPath, blockedReport);
|
|
253
|
+
await writeHistoricalCampaignState(dataDir, campaignState);
|
|
254
|
+
if (globalOpts.json) {
|
|
255
|
+
process.stdout.write(JSON.stringify(blockedReport, null, 2) + "\n");
|
|
256
|
+
}
|
|
257
|
+
else {
|
|
258
|
+
process.stdout.write(formatHistoricalCampaignSummary(blockedReport));
|
|
259
|
+
}
|
|
260
|
+
if (blockedStatus === "failed") {
|
|
261
|
+
process.exitCode = 1;
|
|
262
|
+
}
|
|
263
|
+
return;
|
|
264
|
+
}
|
|
265
|
+
startHistoricalCampaignBatch(campaignBatch, {
|
|
266
|
+
totalFiles: preflight.total_files,
|
|
267
|
+
runFiles: runFileCount,
|
|
268
|
+
triageCandidates: triageCandidates.length,
|
|
269
|
+
reviewBacklogBefore,
|
|
270
|
+
meteringMode: requestedTriageMeteringMode,
|
|
271
|
+
nowIso,
|
|
272
|
+
});
|
|
273
|
+
for (const file of filesForRun) {
|
|
274
|
+
markHistoricalFile(campaignBatch, file.name, {
|
|
275
|
+
terminal: false,
|
|
276
|
+
lastResult: "running",
|
|
277
|
+
nowIso,
|
|
278
|
+
});
|
|
279
|
+
}
|
|
280
|
+
const results = [];
|
|
281
|
+
let triageSpendUsd = 0;
|
|
282
|
+
let reviewBacklogAfter = reviewBacklogBefore;
|
|
283
|
+
const touchedFilesThisRun = new Set();
|
|
284
|
+
let stopReason = deferredFiles.length > 0
|
|
285
|
+
? limitedByOperator
|
|
286
|
+
? `Processed ${runFileCount} file(s) in this smoke run. Resume the remaining ${deferredFiles.length} file(s) when ready.`
|
|
287
|
+
: `Processed ${runFileCount} file(s) in this run. Resume the remaining ${deferredFiles.length} file(s) in the next daily or weekly budget window.`
|
|
288
|
+
: null;
|
|
289
|
+
let reviewBacklogStopped = false;
|
|
290
|
+
let budgetUsageCommitted = false;
|
|
291
|
+
await persistHistoricalCampaignCheckpoint({
|
|
292
|
+
dataDir,
|
|
293
|
+
state: campaignState,
|
|
294
|
+
batch: campaignBatch,
|
|
295
|
+
preflight,
|
|
296
|
+
now,
|
|
297
|
+
reportPath,
|
|
298
|
+
runFileCount,
|
|
299
|
+
reviewBacklogAfter,
|
|
300
|
+
triageSpendUsd,
|
|
301
|
+
processedFiles: filesForRun.map((file) => file.name),
|
|
302
|
+
results,
|
|
303
|
+
status: "running",
|
|
304
|
+
nextAction: stopReason ??
|
|
305
|
+
`Running historical batch ${batchName}. Resume only unresolved files if interrupted.`,
|
|
306
|
+
});
|
|
307
|
+
try {
|
|
308
|
+
for (const file of preflightQuarantined) {
|
|
309
|
+
touchedFilesThisRun.add(file.name);
|
|
310
|
+
markHistoricalFile(campaignBatch, file.name, {
|
|
311
|
+
terminal: true,
|
|
312
|
+
terminalStatus: "quarantined",
|
|
313
|
+
lastResult: "quarantined",
|
|
314
|
+
nowIso,
|
|
315
|
+
});
|
|
316
|
+
results.push({
|
|
317
|
+
file: file.name,
|
|
318
|
+
status: "quarantined",
|
|
319
|
+
error: file.junk_markers.join(",") ||
|
|
320
|
+
file.notes.join(",") ||
|
|
321
|
+
"quarantined_by_policy",
|
|
322
|
+
});
|
|
323
|
+
}
|
|
324
|
+
await persistHistoricalCampaignCheckpoint({
|
|
325
|
+
dataDir,
|
|
326
|
+
state: campaignState,
|
|
327
|
+
batch: campaignBatch,
|
|
328
|
+
preflight,
|
|
329
|
+
now,
|
|
330
|
+
reportPath,
|
|
331
|
+
runFileCount,
|
|
332
|
+
reviewBacklogAfter,
|
|
333
|
+
triageSpendUsd,
|
|
334
|
+
processedFiles: filesForRun.map((file) => file.name),
|
|
335
|
+
results,
|
|
336
|
+
status: "running",
|
|
337
|
+
nextAction: stopReason ??
|
|
338
|
+
`Running historical batch ${batchName}. Resume only unresolved files if interrupted.`,
|
|
339
|
+
});
|
|
340
|
+
if (preflightQuarantineReasons.length > 0) {
|
|
341
|
+
for (const file of filesForRun.filter((candidate) => !preflightQuarantined.some((entry) => entry.name === candidate.name))) {
|
|
342
|
+
touchedFilesThisRun.add(file.name);
|
|
343
|
+
markHistoricalFile(campaignBatch, file.name, {
|
|
344
|
+
terminal: true,
|
|
345
|
+
terminalStatus: "quarantined",
|
|
346
|
+
lastResult: "quarantined",
|
|
347
|
+
nowIso,
|
|
348
|
+
});
|
|
349
|
+
results.push({
|
|
350
|
+
file: file.name,
|
|
351
|
+
status: "quarantined",
|
|
352
|
+
error: `batch_quarantined:${preflightQuarantineReasons.join(";")}`,
|
|
353
|
+
});
|
|
354
|
+
}
|
|
355
|
+
finalizeHistoricalFiles(campaignBatch, nowIso);
|
|
356
|
+
addHistoricalRunBudgetUsage(campaignState, {
|
|
357
|
+
weekKey: budgetWindow.week_key,
|
|
358
|
+
dayKey: budgetWindow.day_key,
|
|
359
|
+
filesProcessed: touchedFilesThisRun.size,
|
|
360
|
+
triageSpendUsd,
|
|
361
|
+
});
|
|
362
|
+
budgetUsageCommitted = true;
|
|
363
|
+
const evaluation = evaluateHistoricalBatch({
|
|
364
|
+
state: campaignState,
|
|
365
|
+
batch: campaignBatch,
|
|
366
|
+
preflight,
|
|
367
|
+
now,
|
|
368
|
+
runFileCount,
|
|
369
|
+
reviewBacklogAfter,
|
|
370
|
+
triageSpendUsd,
|
|
371
|
+
reportPath,
|
|
372
|
+
processedFiles: filesForRun.map((file) => file.name),
|
|
373
|
+
results,
|
|
374
|
+
});
|
|
375
|
+
campaignBatch.report_path = reportPath;
|
|
376
|
+
campaignState.last_run_at = nowIso;
|
|
377
|
+
await writeHistoricalCampaignReport(reportPath, evaluation.report);
|
|
378
|
+
await writeHistoricalCampaignState(dataDir, campaignState);
|
|
379
|
+
if (globalOpts.json) {
|
|
380
|
+
process.stdout.write(JSON.stringify(evaluation.report, null, 2) + "\n");
|
|
381
|
+
}
|
|
382
|
+
else {
|
|
383
|
+
process.stdout.write(formatHistoricalCampaignSummary(evaluation.report));
|
|
384
|
+
}
|
|
385
|
+
process.exitCode = 1;
|
|
386
|
+
return;
|
|
387
|
+
}
|
|
388
|
+
for (const file of skipped) {
|
|
389
|
+
touchedFilesThisRun.add(file.name);
|
|
390
|
+
markHistoricalFile(campaignBatch, file.name, {
|
|
391
|
+
terminal: false,
|
|
392
|
+
lastResult: "skipped",
|
|
393
|
+
nowIso,
|
|
394
|
+
});
|
|
395
|
+
results.push({
|
|
396
|
+
file: file.name,
|
|
397
|
+
status: "skipped",
|
|
398
|
+
error: file.junk_markers.join(",") || "skipped_by_preflight",
|
|
399
|
+
});
|
|
400
|
+
}
|
|
401
|
+
await persistHistoricalCampaignCheckpoint({
|
|
402
|
+
dataDir,
|
|
403
|
+
state: campaignState,
|
|
404
|
+
batch: campaignBatch,
|
|
405
|
+
preflight,
|
|
406
|
+
now,
|
|
407
|
+
reportPath,
|
|
408
|
+
runFileCount,
|
|
409
|
+
reviewBacklogAfter,
|
|
410
|
+
triageSpendUsd,
|
|
411
|
+
processedFiles: filesForRun.map((file) => file.name),
|
|
412
|
+
results,
|
|
413
|
+
status: "running",
|
|
414
|
+
nextAction: stopReason ??
|
|
415
|
+
`Running historical batch ${batchName}. Resume only unresolved files if interrupted.`,
|
|
416
|
+
});
|
|
417
|
+
const triageById = new Map(cachedTriageById);
|
|
418
|
+
let client = null;
|
|
419
|
+
if (triageCandidates.length > 0) {
|
|
420
|
+
const baseUrl = resolveBaseUrl(globalOpts.config);
|
|
421
|
+
const token = await readToken();
|
|
422
|
+
if (!token) {
|
|
423
|
+
throw new Error("No auth token. Run: rift token issue");
|
|
424
|
+
}
|
|
425
|
+
client = createHttpClient({ baseUrl, token });
|
|
426
|
+
}
|
|
427
|
+
if (triageCandidatesNeedingWork.length > 0) {
|
|
428
|
+
const triageOutput = await runHistoricalTriage({
|
|
429
|
+
batchDir,
|
|
430
|
+
client: client,
|
|
431
|
+
dataDir,
|
|
432
|
+
items: triageCandidatesNeedingWork,
|
|
433
|
+
...(triageProviderOverride
|
|
434
|
+
? { providerOverride: triageProviderOverride }
|
|
435
|
+
: {}),
|
|
436
|
+
});
|
|
437
|
+
triageSpendUsd = triageOutput.spent_usd ?? 0;
|
|
438
|
+
if (triageOutput.metering_mode) {
|
|
439
|
+
campaignBatch.metering_mode = triageOutput.metering_mode;
|
|
440
|
+
}
|
|
441
|
+
for (const item of triageOutput.items) {
|
|
442
|
+
triageById.set(item.item_id, item);
|
|
443
|
+
}
|
|
444
|
+
for (const candidate of triageCandidatesNeedingWork) {
|
|
445
|
+
const triageItem = triageById.get(candidate.id);
|
|
446
|
+
if (!triageItem?.triage)
|
|
447
|
+
continue;
|
|
448
|
+
touchedFilesThisRun.add(candidate.file.name);
|
|
449
|
+
markHistoricalItem(campaignBatch, {
|
|
450
|
+
itemId: candidate.id,
|
|
451
|
+
file: candidate.file.name,
|
|
452
|
+
...(candidate.conversationId
|
|
453
|
+
? { conversationId: candidate.conversationId }
|
|
454
|
+
: {}),
|
|
455
|
+
...(candidate.title ? { title: candidate.title } : {}),
|
|
456
|
+
lastResult: "triaged",
|
|
457
|
+
terminal: false,
|
|
458
|
+
triage: triageItem.triage,
|
|
459
|
+
nowIso,
|
|
460
|
+
});
|
|
461
|
+
}
|
|
462
|
+
await persistHistoricalCampaignCheckpoint({
|
|
463
|
+
dataDir,
|
|
464
|
+
state: campaignState,
|
|
465
|
+
batch: campaignBatch,
|
|
466
|
+
preflight,
|
|
467
|
+
now,
|
|
468
|
+
reportPath,
|
|
469
|
+
runFileCount,
|
|
470
|
+
reviewBacklogAfter,
|
|
471
|
+
triageSpendUsd,
|
|
472
|
+
processedFiles: filesForRun.map((file) => file.name),
|
|
473
|
+
results,
|
|
474
|
+
status: "running",
|
|
475
|
+
nextAction: stopReason ??
|
|
476
|
+
`Running historical batch ${batchName}. Resume only unresolved files if interrupted.`,
|
|
477
|
+
});
|
|
478
|
+
}
|
|
479
|
+
const actualTriageProviderName = resolveHistoricalRunTriageProviderName(triageCandidates, triageById);
|
|
480
|
+
if (actualTriageProviderName) {
|
|
481
|
+
campaignBatch.metering_mode = triageMeteringModeForProviderName(actualTriageProviderName);
|
|
482
|
+
}
|
|
483
|
+
for (const candidate of triageCandidates) {
|
|
484
|
+
const label = formatHistoricalLabel(candidate);
|
|
485
|
+
const triageItem = triageById.get(candidate.id);
|
|
486
|
+
if (!triageItem) {
|
|
487
|
+
touchedFilesThisRun.add(candidate.file.name);
|
|
488
|
+
markHistoricalItem(campaignBatch, {
|
|
489
|
+
itemId: candidate.id,
|
|
490
|
+
file: candidate.file.name,
|
|
491
|
+
...(candidate.conversationId
|
|
492
|
+
? { conversationId: candidate.conversationId }
|
|
493
|
+
: {}),
|
|
494
|
+
...(candidate.title ? { title: candidate.title } : {}),
|
|
495
|
+
lastResult: "error",
|
|
496
|
+
terminal: false,
|
|
497
|
+
nowIso,
|
|
498
|
+
});
|
|
499
|
+
results.push({
|
|
500
|
+
file: candidate.file.name,
|
|
501
|
+
...(candidate.conversationId
|
|
502
|
+
? { conversation_id: candidate.conversationId }
|
|
503
|
+
: {}),
|
|
504
|
+
...(candidate.title ? { title: candidate.title } : {}),
|
|
505
|
+
status: "error",
|
|
506
|
+
error: "missing_triage_result",
|
|
507
|
+
});
|
|
508
|
+
process.stderr.write(` ${label}: missing triage result\n`);
|
|
509
|
+
continue;
|
|
510
|
+
}
|
|
511
|
+
if (triageItem.error) {
|
|
512
|
+
touchedFilesThisRun.add(candidate.file.name);
|
|
513
|
+
markHistoricalItem(campaignBatch, {
|
|
514
|
+
itemId: candidate.id,
|
|
515
|
+
file: candidate.file.name,
|
|
516
|
+
...(candidate.conversationId
|
|
517
|
+
? { conversationId: candidate.conversationId }
|
|
518
|
+
: {}),
|
|
519
|
+
...(candidate.title ? { title: candidate.title } : {}),
|
|
520
|
+
lastResult: "error",
|
|
521
|
+
terminal: false,
|
|
522
|
+
nowIso,
|
|
523
|
+
});
|
|
524
|
+
results.push({
|
|
525
|
+
file: candidate.file.name,
|
|
526
|
+
...(candidate.conversationId
|
|
527
|
+
? { conversation_id: candidate.conversationId }
|
|
528
|
+
: {}),
|
|
529
|
+
...(candidate.title ? { title: candidate.title } : {}),
|
|
530
|
+
status: "error",
|
|
531
|
+
error: triageItem.error,
|
|
532
|
+
});
|
|
533
|
+
process.stderr.write(` ${label}: triage error: ${triageItem.error}\n`);
|
|
534
|
+
continue;
|
|
535
|
+
}
|
|
536
|
+
if (!triageItem.triage) {
|
|
537
|
+
touchedFilesThisRun.add(candidate.file.name);
|
|
538
|
+
markHistoricalItem(campaignBatch, {
|
|
539
|
+
itemId: candidate.id,
|
|
540
|
+
file: candidate.file.name,
|
|
541
|
+
...(candidate.conversationId
|
|
542
|
+
? { conversationId: candidate.conversationId }
|
|
543
|
+
: {}),
|
|
544
|
+
...(candidate.title ? { title: candidate.title } : {}),
|
|
545
|
+
lastResult: "error",
|
|
546
|
+
terminal: false,
|
|
547
|
+
nowIso,
|
|
548
|
+
});
|
|
549
|
+
results.push({
|
|
550
|
+
file: candidate.file.name,
|
|
551
|
+
...(candidate.conversationId
|
|
552
|
+
? { conversation_id: candidate.conversationId }
|
|
553
|
+
: {}),
|
|
554
|
+
...(candidate.title ? { title: candidate.title } : {}),
|
|
555
|
+
status: "error",
|
|
556
|
+
error: "triage_result_missing",
|
|
557
|
+
});
|
|
558
|
+
process.stderr.write(` ${label}: triage result missing\n`);
|
|
559
|
+
continue;
|
|
560
|
+
}
|
|
561
|
+
const triage = triageItem.triage;
|
|
562
|
+
process.stderr.write(` ${label}: ${triage.lane} (score=${triage.score.toFixed(3)}, decision=${triage.decision})\n`);
|
|
563
|
+
if (triage.lane === "archive_only") {
|
|
564
|
+
touchedFilesThisRun.add(candidate.file.name);
|
|
565
|
+
markHistoricalItem(campaignBatch, {
|
|
566
|
+
itemId: candidate.id,
|
|
567
|
+
file: candidate.file.name,
|
|
568
|
+
...(candidate.conversationId
|
|
569
|
+
? { conversationId: candidate.conversationId }
|
|
570
|
+
: {}),
|
|
571
|
+
...(candidate.title ? { title: candidate.title } : {}),
|
|
572
|
+
lastResult: "archive_only",
|
|
573
|
+
terminal: true,
|
|
574
|
+
terminalStatus: "archive_only",
|
|
575
|
+
triage,
|
|
576
|
+
nowIso,
|
|
577
|
+
});
|
|
578
|
+
results.push({
|
|
579
|
+
file: candidate.file.name,
|
|
580
|
+
...(candidate.conversationId
|
|
581
|
+
? { conversation_id: candidate.conversationId }
|
|
582
|
+
: {}),
|
|
583
|
+
...(candidate.title ? { title: candidate.title } : {}),
|
|
584
|
+
status: "archive_only",
|
|
585
|
+
lane: triage.lane,
|
|
586
|
+
});
|
|
587
|
+
continue;
|
|
588
|
+
}
|
|
589
|
+
if (triage.lane === "review") {
|
|
590
|
+
touchedFilesThisRun.add(candidate.file.name);
|
|
591
|
+
await addToReview(dataDir, {
|
|
592
|
+
source,
|
|
593
|
+
conversationId: candidate.kind === "conversation" && candidate.conversationId
|
|
594
|
+
? historicalConversationKey(source, candidate.conversationId)
|
|
595
|
+
: `backfill:${batchName}:${candidate.file.name}`,
|
|
596
|
+
summary: triage.summary ||
|
|
597
|
+
candidate.title ||
|
|
598
|
+
candidate.file.name,
|
|
599
|
+
content: triageCandidateById.get(candidate.id)?.reviewContent ??
|
|
600
|
+
candidate.file.name,
|
|
601
|
+
triage,
|
|
602
|
+
});
|
|
603
|
+
markHistoricalItem(campaignBatch, {
|
|
604
|
+
itemId: candidate.id,
|
|
605
|
+
file: candidate.file.name,
|
|
606
|
+
...(candidate.conversationId
|
|
607
|
+
? { conversationId: candidate.conversationId }
|
|
608
|
+
: {}),
|
|
609
|
+
...(candidate.title ? { title: candidate.title } : {}),
|
|
610
|
+
lastResult: "review",
|
|
611
|
+
terminal: true,
|
|
612
|
+
terminalStatus: "review",
|
|
613
|
+
triage,
|
|
614
|
+
nowIso,
|
|
615
|
+
});
|
|
616
|
+
results.push({
|
|
617
|
+
file: candidate.file.name,
|
|
618
|
+
...(candidate.conversationId
|
|
619
|
+
? { conversation_id: candidate.conversationId }
|
|
620
|
+
: {}),
|
|
621
|
+
...(candidate.title ? { title: candidate.title } : {}),
|
|
622
|
+
status: "review",
|
|
623
|
+
lane: triage.lane,
|
|
624
|
+
});
|
|
625
|
+
reviewBacklogAfter = reviewQueueSize(dataDir);
|
|
626
|
+
if (reviewBacklogAfter >
|
|
627
|
+
400) {
|
|
628
|
+
stopReason =
|
|
629
|
+
"Stop historical import. Review backlog exceeds 400 open items.";
|
|
630
|
+
reviewBacklogStopped = true;
|
|
631
|
+
process.stderr.write(` ${label}: stopping batch because review backlog is now ${reviewBacklogAfter}\n`);
|
|
632
|
+
break;
|
|
633
|
+
}
|
|
634
|
+
continue;
|
|
635
|
+
}
|
|
636
|
+
const upload = candidate.upload;
|
|
637
|
+
if (!upload) {
|
|
638
|
+
touchedFilesThisRun.add(candidate.file.name);
|
|
639
|
+
const error = candidate.kind === "file_fallback"
|
|
640
|
+
? "parser_zero_conversations"
|
|
641
|
+
: "missing_promoted_payload";
|
|
642
|
+
markHistoricalItem(campaignBatch, {
|
|
643
|
+
itemId: candidate.id,
|
|
644
|
+
file: candidate.file.name,
|
|
645
|
+
...(candidate.conversationId
|
|
646
|
+
? { conversationId: candidate.conversationId }
|
|
647
|
+
: {}),
|
|
648
|
+
...(candidate.title ? { title: candidate.title } : {}),
|
|
649
|
+
lastResult: "error",
|
|
650
|
+
terminal: false,
|
|
651
|
+
triage,
|
|
652
|
+
nowIso,
|
|
653
|
+
});
|
|
654
|
+
results.push({
|
|
655
|
+
file: candidate.file.name,
|
|
656
|
+
...(candidate.conversationId
|
|
657
|
+
? { conversation_id: candidate.conversationId }
|
|
658
|
+
: {}),
|
|
659
|
+
...(candidate.title ? { title: candidate.title } : {}),
|
|
660
|
+
status: "error",
|
|
661
|
+
lane: triage.lane,
|
|
662
|
+
error,
|
|
663
|
+
});
|
|
664
|
+
process.stderr.write(` Error: ${error} for ${label}\n`);
|
|
665
|
+
continue;
|
|
666
|
+
}
|
|
667
|
+
process.stderr.write(` Promoting ${label} into /ingest\n`);
|
|
668
|
+
try {
|
|
669
|
+
if (!client) {
|
|
670
|
+
throw new Error("Missing HTTP client for historical promotion");
|
|
671
|
+
}
|
|
672
|
+
const form = new FormData();
|
|
673
|
+
form.append("source", source);
|
|
674
|
+
// Fastify's multipart parser validates fields available at the
|
|
675
|
+
// moment it encounters the file part. Keep the historical
|
|
676
|
+
// idempotency key ahead of the file so larger uploads don't
|
|
677
|
+
// lose it and fail the internal override route validation.
|
|
678
|
+
form.append("idempotency_key", candidate.kind === "conversation" && candidate.conversationId
|
|
679
|
+
? historicalConversationKey(source, candidate.conversationId)
|
|
680
|
+
: `backfill:${batchName}:${candidate.file.name}`);
|
|
681
|
+
form.append("file", new Blob([new Uint8Array(upload.fileData)]), upload.filename);
|
|
682
|
+
const { data } = await client.postMultipart(historicalIngestEndpointPath(extractionProviderOverride), form, historicalInternalRouteOptions(dataDir, extractionProviderOverride));
|
|
683
|
+
const resp = data;
|
|
684
|
+
if (resp.duplicate) {
|
|
685
|
+
touchedFilesThisRun.add(candidate.file.name);
|
|
686
|
+
markHistoricalItem(campaignBatch, {
|
|
687
|
+
itemId: candidate.id,
|
|
688
|
+
file: candidate.file.name,
|
|
689
|
+
...(candidate.conversationId
|
|
690
|
+
? { conversationId: candidate.conversationId }
|
|
691
|
+
: {}),
|
|
692
|
+
...(candidate.title ? { title: candidate.title } : {}),
|
|
693
|
+
lastResult: "duplicate",
|
|
694
|
+
terminal: true,
|
|
695
|
+
terminalStatus: "promote_to_extract",
|
|
696
|
+
triage,
|
|
697
|
+
nowIso,
|
|
698
|
+
});
|
|
699
|
+
process.stderr.write(" Duplicate (already ingested)\n");
|
|
700
|
+
results.push({
|
|
701
|
+
file: candidate.file.name,
|
|
702
|
+
...(candidate.conversationId
|
|
703
|
+
? { conversation_id: candidate.conversationId }
|
|
704
|
+
: {}),
|
|
705
|
+
...(candidate.title ? { title: candidate.title } : {}),
|
|
706
|
+
status: "duplicate",
|
|
707
|
+
lane: triage.lane,
|
|
708
|
+
});
|
|
709
|
+
continue;
|
|
710
|
+
}
|
|
711
|
+
const result = await pollJob({
|
|
712
|
+
get: client.get.bind(client),
|
|
713
|
+
jobId: resp.job_id,
|
|
714
|
+
});
|
|
715
|
+
if (isJobFailure(result.job, result.timedOut)) {
|
|
716
|
+
touchedFilesThisRun.add(candidate.file.name);
|
|
717
|
+
const error = result.job.error ??
|
|
718
|
+
(result.timedOut ? "timed out" : "unknown");
|
|
719
|
+
markHistoricalItem(campaignBatch, {
|
|
720
|
+
itemId: candidate.id,
|
|
721
|
+
file: candidate.file.name,
|
|
722
|
+
...(candidate.conversationId
|
|
723
|
+
? { conversationId: candidate.conversationId }
|
|
724
|
+
: {}),
|
|
725
|
+
...(candidate.title ? { title: candidate.title } : {}),
|
|
726
|
+
lastResult: "failed",
|
|
727
|
+
terminal: false,
|
|
728
|
+
triage,
|
|
729
|
+
nowIso,
|
|
730
|
+
});
|
|
731
|
+
results.push({
|
|
732
|
+
file: candidate.file.name,
|
|
733
|
+
...(candidate.conversationId
|
|
734
|
+
? { conversation_id: candidate.conversationId }
|
|
735
|
+
: {}),
|
|
736
|
+
...(candidate.title ? { title: candidate.title } : {}),
|
|
737
|
+
status: "failed",
|
|
738
|
+
lane: triage.lane,
|
|
739
|
+
error,
|
|
740
|
+
});
|
|
741
|
+
process.stderr.write(` Failed: ${error}\n`);
|
|
742
|
+
}
|
|
743
|
+
else {
|
|
744
|
+
touchedFilesThisRun.add(candidate.file.name);
|
|
745
|
+
markHistoricalItem(campaignBatch, {
|
|
746
|
+
itemId: candidate.id,
|
|
747
|
+
file: candidate.file.name,
|
|
748
|
+
...(candidate.conversationId
|
|
749
|
+
? { conversationId: candidate.conversationId }
|
|
750
|
+
: {}),
|
|
751
|
+
...(candidate.title ? { title: candidate.title } : {}),
|
|
752
|
+
lastResult: "promote_to_extract",
|
|
753
|
+
terminal: true,
|
|
754
|
+
terminalStatus: "promote_to_extract",
|
|
755
|
+
triage,
|
|
756
|
+
nowIso,
|
|
757
|
+
});
|
|
758
|
+
results.push({
|
|
759
|
+
file: candidate.file.name,
|
|
760
|
+
...(candidate.conversationId
|
|
761
|
+
? { conversation_id: candidate.conversationId }
|
|
762
|
+
: {}),
|
|
763
|
+
...(candidate.title ? { title: candidate.title } : {}),
|
|
764
|
+
status: "completed",
|
|
765
|
+
lane: triage.lane,
|
|
766
|
+
});
|
|
767
|
+
process.stderr.write(" Completed\n");
|
|
768
|
+
}
|
|
769
|
+
}
|
|
770
|
+
catch (err) {
|
|
771
|
+
touchedFilesThisRun.add(candidate.file.name);
|
|
772
|
+
const msg = err instanceof CliError
|
|
773
|
+
? formatCliErrorForHistoricalBackfill(err)
|
|
774
|
+
: err instanceof Error
|
|
775
|
+
? err.message
|
|
776
|
+
: String(err);
|
|
777
|
+
markHistoricalItem(campaignBatch, {
|
|
778
|
+
itemId: candidate.id,
|
|
779
|
+
file: candidate.file.name,
|
|
780
|
+
...(candidate.conversationId
|
|
781
|
+
? { conversationId: candidate.conversationId }
|
|
782
|
+
: {}),
|
|
783
|
+
...(candidate.title ? { title: candidate.title } : {}),
|
|
784
|
+
lastResult: "error",
|
|
785
|
+
terminal: false,
|
|
786
|
+
triage,
|
|
787
|
+
nowIso,
|
|
788
|
+
});
|
|
789
|
+
results.push({
|
|
790
|
+
file: candidate.file.name,
|
|
791
|
+
...(candidate.conversationId
|
|
792
|
+
? { conversation_id: candidate.conversationId }
|
|
793
|
+
: {}),
|
|
794
|
+
...(candidate.title ? { title: candidate.title } : {}),
|
|
795
|
+
status: "error",
|
|
796
|
+
lane: triage.lane,
|
|
797
|
+
error: msg,
|
|
798
|
+
});
|
|
799
|
+
process.stderr.write(` Error: ${msg}\n`);
|
|
800
|
+
}
|
|
801
|
+
}
|
|
802
|
+
finalizeHistoricalFiles(campaignBatch, nowIso);
|
|
803
|
+
reviewBacklogAfter = reviewQueueSize(dataDir);
|
|
804
|
+
addHistoricalRunBudgetUsage(campaignState, {
|
|
805
|
+
weekKey: budgetWindow.week_key,
|
|
806
|
+
dayKey: budgetWindow.day_key,
|
|
807
|
+
filesProcessed: touchedFilesThisRun.size,
|
|
808
|
+
triageSpendUsd,
|
|
809
|
+
});
|
|
810
|
+
budgetUsageCommitted = true;
|
|
811
|
+
const hasProcessingErrors = results.some((result) => result.status === "failed" || result.status === "error");
|
|
812
|
+
const operatorLimitedCompletion = limitedByOperator && !hasProcessingErrors;
|
|
813
|
+
const evaluation = evaluateHistoricalBatch({
|
|
814
|
+
state: campaignState,
|
|
815
|
+
batch: campaignBatch,
|
|
816
|
+
preflight,
|
|
817
|
+
now,
|
|
818
|
+
runFileCount,
|
|
819
|
+
reviewBacklogAfter,
|
|
820
|
+
triageSpendUsd,
|
|
821
|
+
reportPath,
|
|
822
|
+
processedFiles: filesForRun.map((file) => file.name),
|
|
823
|
+
results,
|
|
824
|
+
...(!operatorLimitedCompletion &&
|
|
825
|
+
!reviewBacklogStopped &&
|
|
826
|
+
stopReason &&
|
|
827
|
+
!hasProcessingErrors
|
|
828
|
+
? { incompleteStatus: "running" }
|
|
829
|
+
: {}),
|
|
830
|
+
});
|
|
831
|
+
const isResumableIncompleteStatus = evaluation.report.status === "running";
|
|
832
|
+
if (operatorLimitedCompletion &&
|
|
833
|
+
isResumableIncompleteStatus &&
|
|
834
|
+
evaluation.report.status === "running") {
|
|
835
|
+
evaluation.report.status = "completed_with_warnings";
|
|
836
|
+
evaluation.report.next_action = stopReason;
|
|
837
|
+
campaignBatch.status = "completed_with_warnings";
|
|
838
|
+
campaignBatch.next_action = stopReason;
|
|
839
|
+
recomputeHistoricalCampaignSummary(campaignState);
|
|
840
|
+
}
|
|
841
|
+
else if (reviewBacklogStopped &&
|
|
842
|
+
!hasProcessingErrors &&
|
|
843
|
+
isResumableIncompleteStatus &&
|
|
844
|
+
evaluation.report.status === "running") {
|
|
845
|
+
evaluation.report.status = "completed_with_warnings";
|
|
846
|
+
evaluation.report.next_action = stopReason;
|
|
847
|
+
campaignBatch.status = "completed_with_warnings";
|
|
848
|
+
campaignBatch.next_action = stopReason;
|
|
849
|
+
recomputeHistoricalCampaignSummary(campaignState);
|
|
850
|
+
}
|
|
851
|
+
else if (stopReason && !hasProcessingErrors && isResumableIncompleteStatus) {
|
|
852
|
+
evaluation.report.status = "running";
|
|
853
|
+
evaluation.report.next_action = stopReason;
|
|
854
|
+
campaignBatch.status = "running";
|
|
855
|
+
campaignBatch.next_action = stopReason;
|
|
856
|
+
recomputeHistoricalCampaignSummary(campaignState);
|
|
857
|
+
}
|
|
858
|
+
campaignBatch.report_path = reportPath;
|
|
859
|
+
campaignState.last_run_at = nowIso;
|
|
860
|
+
await writeHistoricalCampaignReport(reportPath, evaluation.report);
|
|
861
|
+
await writeHistoricalCampaignState(dataDir, campaignState);
|
|
862
|
+
if (globalOpts.json) {
|
|
863
|
+
process.stdout.write(JSON.stringify(evaluation.report, null, 2) + "\n");
|
|
864
|
+
}
|
|
865
|
+
else {
|
|
866
|
+
process.stdout.write(formatHistoricalCampaignSummary(evaluation.report));
|
|
867
|
+
}
|
|
868
|
+
if (campaignBatch.status === "failed" || campaignBatch.status === "quarantined") {
|
|
869
|
+
process.exitCode = 1;
|
|
870
|
+
}
|
|
871
|
+
}
|
|
872
|
+
catch (runErr) {
|
|
873
|
+
finalizeHistoricalFiles(campaignBatch, new Date().toISOString());
|
|
874
|
+
reviewBacklogAfter = reviewQueueSize(dataDir);
|
|
875
|
+
if (!budgetUsageCommitted && (triageSpendUsd > 0 || results.length > 0)) {
|
|
876
|
+
addHistoricalRunBudgetUsage(campaignState, {
|
|
877
|
+
weekKey: budgetWindow.week_key,
|
|
878
|
+
dayKey: budgetWindow.day_key,
|
|
879
|
+
filesProcessed: touchedFilesThisRun.size,
|
|
880
|
+
triageSpendUsd,
|
|
881
|
+
});
|
|
882
|
+
budgetUsageCommitted = true;
|
|
883
|
+
}
|
|
884
|
+
await persistHistoricalCampaignCheckpoint({
|
|
885
|
+
dataDir,
|
|
886
|
+
state: campaignState,
|
|
887
|
+
batch: campaignBatch,
|
|
888
|
+
preflight,
|
|
889
|
+
now,
|
|
890
|
+
reportPath,
|
|
891
|
+
runFileCount,
|
|
892
|
+
reviewBacklogAfter,
|
|
893
|
+
triageSpendUsd,
|
|
894
|
+
processedFiles: filesForRun.map((file) => file.name),
|
|
895
|
+
results,
|
|
896
|
+
status: "failed",
|
|
897
|
+
nextAction: "Batch failed mid-run. Resume only unresolved files after fixing the error.",
|
|
898
|
+
incompleteStatus: "failed",
|
|
899
|
+
});
|
|
900
|
+
throw runErr;
|
|
901
|
+
}
|
|
902
|
+
}
|
|
903
|
+
catch (err) {
|
|
904
|
+
process.stderr.write(`Error: ${err instanceof CliError ? err.message : err instanceof Error ? err.message : String(err)}\n`);
|
|
905
|
+
process.exitCode = 1;
|
|
906
|
+
}
|
|
907
|
+
});
|
|
908
|
+
}
|
|
909
|
+
function ensureHistoricalPendingFile(batch, fileName, nowIso) {
|
|
910
|
+
batch.files[fileName] ??= {
|
|
911
|
+
file: fileName,
|
|
912
|
+
terminal: false,
|
|
913
|
+
item_ids: [],
|
|
914
|
+
last_result: "pending",
|
|
915
|
+
attempts: 0,
|
|
916
|
+
updated_at: nowIso,
|
|
917
|
+
};
|
|
918
|
+
}
|
|
919
|
+
async function persistHistoricalCampaignCheckpoint(params) {
|
|
920
|
+
let report;
|
|
921
|
+
params.batch.report_path = params.reportPath;
|
|
922
|
+
params.batch.next_action = params.nextAction;
|
|
923
|
+
if (params.status === "running") {
|
|
924
|
+
params.batch.status = "running";
|
|
925
|
+
report = snapshotHistoricalCampaignReport({
|
|
926
|
+
state: params.state,
|
|
927
|
+
batch: params.batch,
|
|
928
|
+
preflight: params.preflight,
|
|
929
|
+
now: params.now,
|
|
930
|
+
runFileCount: params.runFileCount,
|
|
931
|
+
reviewBacklogAfter: params.reviewBacklogAfter,
|
|
932
|
+
triageSpendUsd: params.triageSpendUsd,
|
|
933
|
+
processedFiles: params.processedFiles,
|
|
934
|
+
status: "running",
|
|
935
|
+
nextAction: params.nextAction,
|
|
936
|
+
nextBatchSize: currentHistoricalBatchLevel(params.batch.total_files),
|
|
937
|
+
results: params.results,
|
|
938
|
+
});
|
|
939
|
+
params.batch.counts = summarizeHistoricalBatchState(params.batch, params.preflight).counts;
|
|
940
|
+
params.batch.review_backlog_after = params.reviewBacklogAfter;
|
|
941
|
+
}
|
|
942
|
+
else {
|
|
943
|
+
const evaluation = evaluateHistoricalBatch({
|
|
944
|
+
state: params.state,
|
|
945
|
+
batch: params.batch,
|
|
946
|
+
preflight: params.preflight,
|
|
947
|
+
now: params.now,
|
|
948
|
+
runFileCount: params.runFileCount,
|
|
949
|
+
reviewBacklogAfter: params.reviewBacklogAfter,
|
|
950
|
+
triageSpendUsd: params.triageSpendUsd,
|
|
951
|
+
reportPath: params.reportPath,
|
|
952
|
+
processedFiles: params.processedFiles,
|
|
953
|
+
results: params.results,
|
|
954
|
+
...(params.incompleteStatus
|
|
955
|
+
? { incompleteStatus: params.incompleteStatus }
|
|
956
|
+
: {}),
|
|
957
|
+
});
|
|
958
|
+
report = evaluation.report;
|
|
959
|
+
report.status = params.status;
|
|
960
|
+
report.next_action = params.nextAction;
|
|
961
|
+
report.results = params.results;
|
|
962
|
+
params.batch.status = params.status;
|
|
963
|
+
params.batch.next_action = params.nextAction;
|
|
964
|
+
}
|
|
965
|
+
recomputeHistoricalCampaignSummary(params.state);
|
|
966
|
+
params.state.last_run_at = params.now.toISOString();
|
|
967
|
+
await writeHistoricalCampaignReport(params.reportPath, report);
|
|
968
|
+
await writeHistoricalCampaignState(params.dataDir, params.state);
|
|
969
|
+
return report;
|
|
970
|
+
}
|
|
971
|
+
function currentHistoricalBatchLevel(totalFiles) {
|
|
972
|
+
if (totalFiles <= 100)
|
|
973
|
+
return 100;
|
|
974
|
+
if (totalFiles <= 250)
|
|
975
|
+
return 250;
|
|
976
|
+
return 500;
|
|
977
|
+
}
|
|
978
|
+
function buildHistoricalTriageItem(file, source) {
|
|
979
|
+
const fileBuffer = fs.readFileSync(file.path);
|
|
980
|
+
const extracted = extractArchive(fileBuffer, file.name);
|
|
981
|
+
const parser = HISTORICAL_PARSERS[source];
|
|
982
|
+
if (!parser) {
|
|
983
|
+
throw new Error(`No historical triage parser for source: ${source}`);
|
|
984
|
+
}
|
|
985
|
+
const parsed = parser(extracted.files);
|
|
986
|
+
if (parsed.length === 0) {
|
|
987
|
+
const fallback = fileBuffer.toString("utf-8").slice(0, 12_000);
|
|
988
|
+
return [{
|
|
989
|
+
id: file.name,
|
|
990
|
+
kind: "file_fallback",
|
|
991
|
+
source,
|
|
992
|
+
file,
|
|
993
|
+
triageContent: fallback || `File: ${file.name}`,
|
|
994
|
+
reviewContent: fallback || `File: ${file.name}`,
|
|
995
|
+
conversationCount: 0,
|
|
996
|
+
}];
|
|
997
|
+
}
|
|
998
|
+
const rawByConversationId = collectRawConversations(source, extracted.files);
|
|
999
|
+
return parsed.map((conversation) => {
|
|
1000
|
+
const reviewContent = buildConversationReviewContent({
|
|
1001
|
+
source,
|
|
1002
|
+
fileName: file.name,
|
|
1003
|
+
conversationId: conversation.id,
|
|
1004
|
+
title: conversation.title,
|
|
1005
|
+
createdAt: conversation.createdAt,
|
|
1006
|
+
content: conversation.content,
|
|
1007
|
+
});
|
|
1008
|
+
return {
|
|
1009
|
+
id: historicalTriageItemId(file.name, conversation.id),
|
|
1010
|
+
kind: "conversation",
|
|
1011
|
+
source,
|
|
1012
|
+
file,
|
|
1013
|
+
conversationId: conversation.id,
|
|
1014
|
+
title: conversation.title,
|
|
1015
|
+
createdAt: conversation.createdAt,
|
|
1016
|
+
triageContent: reviewContent.slice(0, 12_000),
|
|
1017
|
+
reviewContent,
|
|
1018
|
+
conversationCount: 1,
|
|
1019
|
+
...(rawByConversationId.has(conversation.id)
|
|
1020
|
+
? {
|
|
1021
|
+
upload: buildSyntheticConversationUpload(source, conversation.id, rawByConversationId.get(conversation.id)),
|
|
1022
|
+
}
|
|
1023
|
+
: {}),
|
|
1024
|
+
};
|
|
1025
|
+
});
|
|
1026
|
+
}
|
|
1027
|
+
async function runHistoricalTriage(params) {
|
|
1028
|
+
const endpointPath = historicalTriageEndpointPath(params.providerOverride);
|
|
1029
|
+
const body = {
|
|
1030
|
+
items: params.items.map((item) => ({
|
|
1031
|
+
id: item.id,
|
|
1032
|
+
content: item.triageContent,
|
|
1033
|
+
source: item.source,
|
|
1034
|
+
})),
|
|
1035
|
+
idempotency_key: historicalTriageIdempotencyKey(params.batchDir, params.items, params.providerOverride),
|
|
1036
|
+
};
|
|
1037
|
+
const { data } = await params.client.post(endpointPath, body, historicalInternalRouteOptions(params.dataDir, params.providerOverride));
|
|
1038
|
+
const resp = data;
|
|
1039
|
+
const triageJob = await pollJob({
|
|
1040
|
+
get: params.client.get.bind(params.client),
|
|
1041
|
+
jobId: resp.job_id,
|
|
1042
|
+
});
|
|
1043
|
+
if (isJobFailure(triageJob.job, triageJob.timedOut)) {
|
|
1044
|
+
throw new Error(triageJob.job.error ??
|
|
1045
|
+
(triageJob.timedOut ? "triage timed out" : "triage failed"));
|
|
1046
|
+
}
|
|
1047
|
+
const resultsPath = path.join(params.dataDir, "triage-results", `${resp.job_id}.json`);
|
|
1048
|
+
if (!fs.existsSync(resultsPath)) {
|
|
1049
|
+
throw new Error(`Missing triage results file: ${resultsPath}`);
|
|
1050
|
+
}
|
|
1051
|
+
const output = JSON.parse(fs.readFileSync(resultsPath, "utf-8"));
|
|
1052
|
+
const artifact = {
|
|
1053
|
+
batch: path.basename(params.batchDir),
|
|
1054
|
+
source: params.items[0]?.source,
|
|
1055
|
+
generated_at: new Date().toISOString(),
|
|
1056
|
+
...(output.metering_mode ? { metering_mode: output.metering_mode } : {}),
|
|
1057
|
+
...(output.spend_note ? { spend_note: output.spend_note } : {}),
|
|
1058
|
+
...(typeof output.spent_usd === "number"
|
|
1059
|
+
? { spent_usd: output.spent_usd }
|
|
1060
|
+
: {}),
|
|
1061
|
+
items: params.items.map((item) => {
|
|
1062
|
+
const result = output.items.find((entry) => entry.id === item.id);
|
|
1063
|
+
return {
|
|
1064
|
+
item_id: item.id,
|
|
1065
|
+
file: item.file.name,
|
|
1066
|
+
kind: item.kind,
|
|
1067
|
+
...(item.conversationId
|
|
1068
|
+
? { conversation_id: item.conversationId }
|
|
1069
|
+
: {}),
|
|
1070
|
+
...(item.title ? { title: item.title } : {}),
|
|
1071
|
+
...(item.createdAt ? { created_at: item.createdAt } : {}),
|
|
1072
|
+
conversation_count: item.conversationCount,
|
|
1073
|
+
...(result?.triage ? { triage: result.triage } : {}),
|
|
1074
|
+
...(result?.error ? { error: result.error } : {}),
|
|
1075
|
+
};
|
|
1076
|
+
}),
|
|
1077
|
+
};
|
|
1078
|
+
fs.writeFileSync(path.join(params.batchDir, "triage.json"), JSON.stringify(artifact, null, 2) + "\n", "utf-8");
|
|
1079
|
+
return artifact;
|
|
1080
|
+
}
|
|
1081
|
+
function writePreflightSummary(params) {
|
|
1082
|
+
const { batchDir, manifest, preflight, stderr } = params;
|
|
1083
|
+
const allowed = SOURCE_EXTENSIONS[manifest.source].join(", ");
|
|
1084
|
+
stderr.write(`Preflight: ${manifest.batch_name}\n` +
|
|
1085
|
+
` Path: ${batchDir}\n` +
|
|
1086
|
+
` Source: ${manifest.source} (allowed extensions: ${allowed})\n` +
|
|
1087
|
+
` Total files: ${preflight.total_files}\n` +
|
|
1088
|
+
` Supported: ${preflight.supported_files}\n` +
|
|
1089
|
+
` Unsupported: ${preflight.unsupported_files}\n` +
|
|
1090
|
+
` Junk candidates: ${preflight.junk_candidates}\n` +
|
|
1091
|
+
` Duplicate hashes: ${preflight.duplicate_candidates}\n` +
|
|
1092
|
+
` Size (min/med/max/total bytes): ${preflight.size_distribution.min}/${preflight.size_distribution.median}/${preflight.size_distribution.max}/${preflight.size_distribution.total}\n` +
|
|
1093
|
+
` Manifest: ${path.join(batchDir, "manifest.json")}\n` +
|
|
1094
|
+
` Preflight: ${path.join(batchDir, "preflight.json")}\n`);
|
|
1095
|
+
const flagged = preflight.files.filter((f) => f.junk_markers.length > 0);
|
|
1096
|
+
if (flagged.length > 0) {
|
|
1097
|
+
stderr.write(`\nFlagged files:\n`);
|
|
1098
|
+
for (const f of flagged) {
|
|
1099
|
+
stderr.write(` - ${f.name}: ${f.junk_markers.join(", ")}\n`);
|
|
1100
|
+
}
|
|
1101
|
+
}
|
|
1102
|
+
}
|
|
1103
|
+
function historicalTriageItemId(fileName, conversationId) {
|
|
1104
|
+
return `${fileName}::${conversationId}`;
|
|
1105
|
+
}
|
|
1106
|
+
function historicalConversationKey(source, conversationId) {
|
|
1107
|
+
return `historical:${source}:${conversationId}`;
|
|
1108
|
+
}
|
|
1109
|
+
function formatHistoricalLabel(item) {
|
|
1110
|
+
if (item.kind === "conversation" && item.conversationId) {
|
|
1111
|
+
return `${item.file.name}#${item.conversationId}`;
|
|
1112
|
+
}
|
|
1113
|
+
return item.file.name;
|
|
1114
|
+
}
|
|
1115
|
+
function buildConversationReviewContent(params) {
|
|
1116
|
+
return [
|
|
1117
|
+
`Source: ${params.source}`,
|
|
1118
|
+
`File: ${params.fileName}`,
|
|
1119
|
+
`Conversation ID: ${params.conversationId}`,
|
|
1120
|
+
`Title: ${params.title}`,
|
|
1121
|
+
`Created: ${params.createdAt}`,
|
|
1122
|
+
"",
|
|
1123
|
+
params.content,
|
|
1124
|
+
].join("\n");
|
|
1125
|
+
}
|
|
1126
|
+
function collectRawConversations(source, files) {
|
|
1127
|
+
switch (source) {
|
|
1128
|
+
case "chatgpt_web":
|
|
1129
|
+
return collectRawChatGptConversations(files);
|
|
1130
|
+
case "claude_web":
|
|
1131
|
+
return collectRawClaudeConversations(files);
|
|
1132
|
+
case "gemini_web":
|
|
1133
|
+
return collectRawGeminiConversations(files);
|
|
1134
|
+
case "grok_web":
|
|
1135
|
+
return collectRawGrokConversations(files);
|
|
1136
|
+
default:
|
|
1137
|
+
return new Map();
|
|
1138
|
+
}
|
|
1139
|
+
}
|
|
1140
|
+
function collectRawChatGptConversations(files) {
|
|
1141
|
+
const raw = new Map();
|
|
1142
|
+
for (const file of files) {
|
|
1143
|
+
if (!file.path.toLowerCase().endsWith(".json"))
|
|
1144
|
+
continue;
|
|
1145
|
+
let parsed;
|
|
1146
|
+
try {
|
|
1147
|
+
parsed = JSON.parse(file.data.toString("utf-8"));
|
|
1148
|
+
}
|
|
1149
|
+
catch {
|
|
1150
|
+
continue;
|
|
1151
|
+
}
|
|
1152
|
+
if (!Array.isArray(parsed))
|
|
1153
|
+
continue;
|
|
1154
|
+
for (const entry of parsed) {
|
|
1155
|
+
if (!entry || typeof entry !== "object")
|
|
1156
|
+
continue;
|
|
1157
|
+
const conversation = entry;
|
|
1158
|
+
const id = typeof conversation.conversation_id === "string"
|
|
1159
|
+
? conversation.conversation_id
|
|
1160
|
+
: typeof conversation.id === "string"
|
|
1161
|
+
? conversation.id
|
|
1162
|
+
: null;
|
|
1163
|
+
if (!id || raw.has(id))
|
|
1164
|
+
continue;
|
|
1165
|
+
raw.set(id, entry);
|
|
1166
|
+
}
|
|
1167
|
+
}
|
|
1168
|
+
return raw;
|
|
1169
|
+
}
|
|
1170
|
+
function collectRawClaudeConversations(files) {
|
|
1171
|
+
const raw = new Map();
|
|
1172
|
+
for (const file of files) {
|
|
1173
|
+
const lower = file.path.toLowerCase();
|
|
1174
|
+
if (!lower.endsWith(".json"))
|
|
1175
|
+
continue;
|
|
1176
|
+
const basename = lower.split("/").pop() ?? "";
|
|
1177
|
+
if (basename !== "conversations.json" &&
|
|
1178
|
+
!basename.startsWith("conversations")) {
|
|
1179
|
+
continue;
|
|
1180
|
+
}
|
|
1181
|
+
let parsed;
|
|
1182
|
+
try {
|
|
1183
|
+
parsed = JSON.parse(file.data.toString("utf-8"));
|
|
1184
|
+
}
|
|
1185
|
+
catch {
|
|
1186
|
+
continue;
|
|
1187
|
+
}
|
|
1188
|
+
if (!Array.isArray(parsed))
|
|
1189
|
+
continue;
|
|
1190
|
+
for (const entry of parsed) {
|
|
1191
|
+
if (!entry || typeof entry !== "object")
|
|
1192
|
+
continue;
|
|
1193
|
+
const conversation = entry;
|
|
1194
|
+
if (typeof conversation.uuid !== "string" || raw.has(conversation.uuid)) {
|
|
1195
|
+
continue;
|
|
1196
|
+
}
|
|
1197
|
+
raw.set(conversation.uuid, entry);
|
|
1198
|
+
}
|
|
1199
|
+
}
|
|
1200
|
+
return raw;
|
|
1201
|
+
}
|
|
1202
|
+
function collectRawGeminiConversations(files) {
|
|
1203
|
+
const raw = new Map();
|
|
1204
|
+
for (const file of files) {
|
|
1205
|
+
if (!file.path.toLowerCase().endsWith(".json"))
|
|
1206
|
+
continue;
|
|
1207
|
+
let parsed;
|
|
1208
|
+
try {
|
|
1209
|
+
parsed = JSON.parse(file.data.toString("utf-8"));
|
|
1210
|
+
}
|
|
1211
|
+
catch {
|
|
1212
|
+
continue;
|
|
1213
|
+
}
|
|
1214
|
+
if (!Array.isArray(parsed))
|
|
1215
|
+
continue;
|
|
1216
|
+
for (const entry of parsed) {
|
|
1217
|
+
if (!entry || typeof entry !== "object")
|
|
1218
|
+
continue;
|
|
1219
|
+
const conversation = entry;
|
|
1220
|
+
if (typeof conversation.conversation_id !== "string" ||
|
|
1221
|
+
conversation.source !== "gemini_web" ||
|
|
1222
|
+
conversation.operative_unit !== "gemini_activity_card" ||
|
|
1223
|
+
raw.has(conversation.conversation_id)) {
|
|
1224
|
+
continue;
|
|
1225
|
+
}
|
|
1226
|
+
raw.set(conversation.conversation_id, entry);
|
|
1227
|
+
}
|
|
1228
|
+
}
|
|
1229
|
+
return raw;
|
|
1230
|
+
}
|
|
1231
|
+
function collectRawGrokConversations(files) {
|
|
1232
|
+
const raw = new Map();
|
|
1233
|
+
for (const file of files) {
|
|
1234
|
+
if (!file.path.toLowerCase().endsWith(".json"))
|
|
1235
|
+
continue;
|
|
1236
|
+
let parsed;
|
|
1237
|
+
try {
|
|
1238
|
+
parsed = JSON.parse(file.data.toString("utf-8"));
|
|
1239
|
+
}
|
|
1240
|
+
catch {
|
|
1241
|
+
continue;
|
|
1242
|
+
}
|
|
1243
|
+
const conversations = parsed?.conversations;
|
|
1244
|
+
if (!Array.isArray(conversations))
|
|
1245
|
+
continue;
|
|
1246
|
+
for (const wrapper of conversations) {
|
|
1247
|
+
if (!wrapper || typeof wrapper !== "object")
|
|
1248
|
+
continue;
|
|
1249
|
+
const id = wrapper.conversation?.id;
|
|
1250
|
+
if (typeof id !== "string" || raw.has(id))
|
|
1251
|
+
continue;
|
|
1252
|
+
raw.set(id, wrapper);
|
|
1253
|
+
}
|
|
1254
|
+
}
|
|
1255
|
+
return raw;
|
|
1256
|
+
}
|
|
1257
|
+
function buildSyntheticConversationUpload(source, conversationId, rawConversation) {
|
|
1258
|
+
// Grok's parser expects the top-level export shape
|
|
1259
|
+
// `{ conversations: [...], projects, tasks, media_posts }`
|
|
1260
|
+
// rather than a bare array of conversations, so the synthetic
|
|
1261
|
+
// single-row upload has to preserve that wrapper.
|
|
1262
|
+
const payload = source === "grok_web"
|
|
1263
|
+
? { conversations: [rawConversation], projects: [], tasks: [], media_posts: [] }
|
|
1264
|
+
: [rawConversation];
|
|
1265
|
+
return {
|
|
1266
|
+
filename: syntheticConversationFilename(source, conversationId),
|
|
1267
|
+
fileData: Buffer.from(JSON.stringify(payload, null, 2) + "\n", "utf-8"),
|
|
1268
|
+
};
|
|
1269
|
+
}
|
|
1270
|
+
function syntheticConversationFilename(source, conversationId) {
|
|
1271
|
+
const safeConversationId = conversationId.replace(/[^a-zA-Z0-9_-]/g, "_");
|
|
1272
|
+
if (source === "claude_web") {
|
|
1273
|
+
return `conversations-${safeConversationId}.json`;
|
|
1274
|
+
}
|
|
1275
|
+
return `conversation-${safeConversationId}.json`;
|
|
1276
|
+
}
|
|
1277
|
+
function historicalTriageIdempotencyKey(batchDir, items, providerOverride) {
|
|
1278
|
+
const digest = crypto
|
|
1279
|
+
.createHash("sha256")
|
|
1280
|
+
.update(JSON.stringify({
|
|
1281
|
+
batch_dir: path.resolve(batchDir),
|
|
1282
|
+
source: items[0]?.source,
|
|
1283
|
+
triage_provider: effectiveTriageProviderName(providerOverride),
|
|
1284
|
+
items: items
|
|
1285
|
+
.map((item) => ({
|
|
1286
|
+
id: item.id,
|
|
1287
|
+
file: item.file.name,
|
|
1288
|
+
file_path: item.file.path,
|
|
1289
|
+
content_hash: item.file.content_hash,
|
|
1290
|
+
conversation_id: item.conversationId,
|
|
1291
|
+
}))
|
|
1292
|
+
.sort((a, b) => a.id.localeCompare(b.id)),
|
|
1293
|
+
}))
|
|
1294
|
+
.digest("hex")
|
|
1295
|
+
.slice(0, 24);
|
|
1296
|
+
return `backfill-triage:${digest}`;
|
|
1297
|
+
}
|
|
1298
|
+
function resolveBackfillProviderOverride(value, flagName) {
|
|
1299
|
+
if (value === undefined)
|
|
1300
|
+
return undefined;
|
|
1301
|
+
if (!isOperatorOverrideProvider(value)) {
|
|
1302
|
+
throw new Error(`Unsupported ${flagName} "${String(value)}". Supported overrides: codex-cli`);
|
|
1303
|
+
}
|
|
1304
|
+
return value;
|
|
1305
|
+
}
|
|
1306
|
+
function historicalTriageEndpointPath(providerOverride) {
|
|
1307
|
+
return providerOverride === "codex-cli"
|
|
1308
|
+
? INTERNAL_HISTORICAL_TRIAGE_CODEX_ROUTE
|
|
1309
|
+
: "/triage";
|
|
1310
|
+
}
|
|
1311
|
+
function historicalIngestEndpointPath(providerOverride) {
|
|
1312
|
+
return providerOverride === "codex-cli"
|
|
1313
|
+
? INTERNAL_HISTORICAL_INGEST_CODEX_ROUTE
|
|
1314
|
+
: "/ingest";
|
|
1315
|
+
}
|
|
1316
|
+
function historicalInternalRouteOptions(dataDir, providerOverride) {
|
|
1317
|
+
if (providerOverride !== "codex-cli") {
|
|
1318
|
+
return undefined;
|
|
1319
|
+
}
|
|
1320
|
+
return {
|
|
1321
|
+
headers: {
|
|
1322
|
+
[HISTORICAL_BACKFILL_CAPABILITY_HEADER]: readRequiredHistoricalCapability(dataDir),
|
|
1323
|
+
},
|
|
1324
|
+
};
|
|
1325
|
+
}
|
|
1326
|
+
function readRequiredHistoricalCapability(dataDir) {
|
|
1327
|
+
const capability = readHistoricalBackfillCapability(dataDir);
|
|
1328
|
+
if (!capability) {
|
|
1329
|
+
throw new Error("Historical Codex override is not initialized on this machine. Start the daemon first so it can create the local backfill capability.");
|
|
1330
|
+
}
|
|
1331
|
+
return capability;
|
|
1332
|
+
}
|
|
1333
|
+
function resolveHistoricalRunTriageProviderName(candidates, triageById) {
|
|
1334
|
+
const providers = new Set(candidates
|
|
1335
|
+
.map((candidate) => triageById.get(candidate.id)?.triage?.provider)
|
|
1336
|
+
.filter((provider) => typeof provider === "string"));
|
|
1337
|
+
return providers.size === 1 ? [...providers][0] : undefined;
|
|
1338
|
+
}
|
|
1339
|
+
function formatCliErrorForHistoricalBackfill(err) {
|
|
1340
|
+
const bodyMessage = extractCliErrorBodyMessage(err.body);
|
|
1341
|
+
return bodyMessage ? `${err.message}: ${bodyMessage}` : err.message;
|
|
1342
|
+
}
|
|
1343
|
+
function extractCliErrorBodyMessage(body) {
|
|
1344
|
+
if (!body || typeof body !== "object")
|
|
1345
|
+
return undefined;
|
|
1346
|
+
const record = body;
|
|
1347
|
+
if (typeof record.message === "string" && record.message.trim()) {
|
|
1348
|
+
return record.message.trim();
|
|
1349
|
+
}
|
|
1350
|
+
if (Array.isArray(record.details)) {
|
|
1351
|
+
const detailMessages = record.details
|
|
1352
|
+
.map((detail) => {
|
|
1353
|
+
if (!detail || typeof detail !== "object")
|
|
1354
|
+
return null;
|
|
1355
|
+
const message = detail.message;
|
|
1356
|
+
return typeof message === "string" && message.trim()
|
|
1357
|
+
? message.trim()
|
|
1358
|
+
: null;
|
|
1359
|
+
})
|
|
1360
|
+
.filter((message) => message !== null);
|
|
1361
|
+
if (detailMessages.length > 0) {
|
|
1362
|
+
return detailMessages.join("; ");
|
|
1363
|
+
}
|
|
1364
|
+
}
|
|
1365
|
+
return typeof record.error === "string" && record.error.trim()
|
|
1366
|
+
? record.error.trim()
|
|
1367
|
+
: undefined;
|
|
1368
|
+
}
|
|
1369
|
+
function parsePositiveInteger(value) {
|
|
1370
|
+
const parsed = Number.parseInt(value, 10);
|
|
1371
|
+
if (!Number.isFinite(parsed) || parsed <= 0) {
|
|
1372
|
+
throw new Error(`Expected a positive integer, got "${value}"`);
|
|
1373
|
+
}
|
|
1374
|
+
return parsed;
|
|
1375
|
+
}
|
|
1376
|
+
//# sourceMappingURL=backfill.js.map
|