@getrift/rift 0.1.0-beta.2 → 0.1.0-beta.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -9
- package/dist/src/auth/keychain.d.ts +9 -0
- package/dist/src/auth/keychain.d.ts.map +1 -1
- package/dist/src/auth/keychain.js +37 -0
- package/dist/src/auth/keychain.js.map +1 -1
- package/dist/src/capture/auto-capture.d.ts +7 -0
- package/dist/src/capture/auto-capture.d.ts.map +1 -1
- package/dist/src/capture/auto-capture.js +82 -15
- package/dist/src/capture/auto-capture.js.map +1 -1
- package/dist/src/capture/auto-repair.d.ts +110 -0
- package/dist/src/capture/auto-repair.d.ts.map +1 -0
- package/dist/src/capture/auto-repair.js +269 -0
- package/dist/src/capture/auto-repair.js.map +1 -0
- package/dist/src/capture/codex-cli-triage-provider.d.ts.map +1 -1
- package/dist/src/capture/codex-cli-triage-provider.js +4 -3
- package/dist/src/capture/codex-cli-triage-provider.js.map +1 -1
- package/dist/src/capture/observability.d.ts +42 -0
- package/dist/src/capture/observability.d.ts.map +1 -1
- package/dist/src/capture/observability.js +45 -4
- package/dist/src/capture/observability.js.map +1 -1
- package/dist/src/capture/recover-quarantine.d.ts +260 -0
- package/dist/src/capture/recover-quarantine.d.ts.map +1 -0
- package/dist/src/capture/recover-quarantine.js +522 -0
- package/dist/src/capture/recover-quarantine.js.map +1 -0
- package/dist/src/cli/commands/backfill.d.ts.map +1 -1
- package/dist/src/cli/commands/backfill.js +5 -2
- package/dist/src/cli/commands/backfill.js.map +1 -1
- package/dist/src/cli/commands/capture-recover.d.ts +40 -0
- package/dist/src/cli/commands/capture-recover.d.ts.map +1 -0
- package/dist/src/cli/commands/capture-recover.js +184 -0
- package/dist/src/cli/commands/capture-recover.js.map +1 -0
- package/dist/src/cli/commands/capture.d.ts.map +1 -1
- package/dist/src/cli/commands/capture.js +96 -5
- package/dist/src/cli/commands/capture.js.map +1 -1
- package/dist/src/cli/commands/doctor.d.ts +6 -0
- package/dist/src/cli/commands/doctor.d.ts.map +1 -0
- package/dist/src/cli/commands/doctor.js +242 -0
- package/dist/src/cli/commands/doctor.js.map +1 -0
- package/dist/src/cli/commands/feedback.d.ts +12 -0
- package/dist/src/cli/commands/feedback.d.ts.map +1 -1
- package/dist/src/cli/commands/feedback.js +93 -4
- package/dist/src/cli/commands/feedback.js.map +1 -1
- package/dist/src/cli/commands/mcp-install.js +5 -2
- package/dist/src/cli/commands/mcp-install.js.map +1 -1
- package/dist/src/cli/commands/menubar.d.ts +80 -0
- package/dist/src/cli/commands/menubar.d.ts.map +1 -0
- package/dist/src/cli/commands/menubar.js +388 -0
- package/dist/src/cli/commands/menubar.js.map +1 -0
- package/dist/src/cli/commands/onboard.d.ts +143 -5
- package/dist/src/cli/commands/onboard.d.ts.map +1 -1
- package/dist/src/cli/commands/onboard.js +844 -188
- package/dist/src/cli/commands/onboard.js.map +1 -1
- package/dist/src/cli/commands/rebuild.d.ts.map +1 -1
- package/dist/src/cli/commands/rebuild.js +6 -3
- package/dist/src/cli/commands/rebuild.js.map +1 -1
- package/dist/src/cli/commands/reconcile.d.ts.map +1 -1
- package/dist/src/cli/commands/reconcile.js +12 -0
- package/dist/src/cli/commands/reconcile.js.map +1 -1
- package/dist/src/cli/commands/review.d.ts.map +1 -1
- package/dist/src/cli/commands/review.js +22 -7
- package/dist/src/cli/commands/review.js.map +1 -1
- package/dist/src/cli/commands/search.d.ts +2 -0
- package/dist/src/cli/commands/search.d.ts.map +1 -1
- package/dist/src/cli/commands/search.js +34 -4
- package/dist/src/cli/commands/search.js.map +1 -1
- package/dist/src/cli/commands/status.d.ts +9 -7
- package/dist/src/cli/commands/status.d.ts.map +1 -1
- package/dist/src/cli/commands/status.js +117 -12
- package/dist/src/cli/commands/status.js.map +1 -1
- package/dist/src/cli/commands/token-issue.d.ts.map +1 -1
- package/dist/src/cli/commands/token-issue.js +9 -1
- package/dist/src/cli/commands/token-issue.js.map +1 -1
- package/dist/src/cli/commands/triage.d.ts.map +1 -1
- package/dist/src/cli/commands/triage.js +7 -5
- package/dist/src/cli/commands/triage.js.map +1 -1
- package/dist/src/cli/commands/update.d.ts +80 -0
- package/dist/src/cli/commands/update.d.ts.map +1 -0
- package/dist/src/cli/commands/update.js +390 -0
- package/dist/src/cli/commands/update.js.map +1 -0
- package/dist/src/cli/default-config-path.d.ts +15 -0
- package/dist/src/cli/default-config-path.d.ts.map +1 -0
- package/dist/src/cli/default-config-path.js +27 -0
- package/dist/src/cli/default-config-path.js.map +1 -0
- package/dist/src/cli/feedback/feedback-config.d.ts +46 -0
- package/dist/src/cli/feedback/feedback-config.d.ts.map +1 -1
- package/dist/src/cli/feedback/feedback-config.js +130 -4
- package/dist/src/cli/feedback/feedback-config.js.map +1 -1
- package/dist/src/cli/feedback/feedback-history.d.ts +7 -0
- package/dist/src/cli/feedback/feedback-history.d.ts.map +1 -1
- package/dist/src/cli/feedback/feedback-history.js +39 -9
- package/dist/src/cli/feedback/feedback-history.js.map +1 -1
- package/dist/src/cli/feedback/feedback-payload.d.ts +22 -1
- package/dist/src/cli/feedback/feedback-payload.d.ts.map +1 -1
- package/dist/src/cli/feedback/feedback-payload.js.map +1 -1
- package/dist/src/cli/feedback/feedback-relay.d.ts +2 -2
- package/dist/src/cli/feedback/feedback-relay.d.ts.map +1 -1
- package/dist/src/cli/feedback/feedback-relay.js.map +1 -1
- package/dist/src/cli/feedback/invite.d.ts +17 -0
- package/dist/src/cli/feedback/invite.d.ts.map +1 -0
- package/dist/src/cli/feedback/invite.js +67 -0
- package/dist/src/cli/feedback/invite.js.map +1 -0
- package/dist/src/cli/feedback/relay-secret-store.d.ts +32 -0
- package/dist/src/cli/feedback/relay-secret-store.d.ts.map +1 -0
- package/dist/src/cli/feedback/relay-secret-store.js +137 -0
- package/dist/src/cli/feedback/relay-secret-store.js.map +1 -0
- package/dist/src/cli/http-client.d.ts +93 -1
- package/dist/src/cli/http-client.d.ts.map +1 -1
- package/dist/src/cli/http-client.js +254 -6
- package/dist/src/cli/http-client.js.map +1 -1
- package/dist/src/cli/index.d.ts.map +1 -1
- package/dist/src/cli/index.js +29 -6
- package/dist/src/cli/index.js.map +1 -1
- package/dist/src/cli/postinstall-menubar.d.ts +22 -0
- package/dist/src/cli/postinstall-menubar.d.ts.map +1 -0
- package/dist/src/cli/postinstall-menubar.js +53 -0
- package/dist/src/cli/postinstall-menubar.js.map +1 -0
- package/dist/src/cli/status/friend-header.d.ts +16 -1
- package/dist/src/cli/status/friend-header.d.ts.map +1 -1
- package/dist/src/cli/status/friend-header.js +354 -26
- package/dist/src/cli/status/friend-header.js.map +1 -1
- package/dist/src/cli/status/local-signals.d.ts +18 -0
- package/dist/src/cli/status/local-signals.d.ts.map +1 -1
- package/dist/src/cli/status/local-signals.js +29 -0
- package/dist/src/cli/status/local-signals.js.map +1 -1
- package/dist/src/cli/ui.d.ts +47 -0
- package/dist/src/cli/ui.d.ts.map +1 -0
- package/dist/src/cli/ui.js +166 -0
- package/dist/src/cli/ui.js.map +1 -0
- package/dist/src/config/schema.d.ts +79 -0
- package/dist/src/config/schema.d.ts.map +1 -1
- package/dist/src/config/schema.js +44 -0
- package/dist/src/config/schema.js.map +1 -1
- package/dist/src/diagnostics/codex-preflight.d.ts +33 -0
- package/dist/src/diagnostics/codex-preflight.d.ts.map +1 -0
- package/dist/src/diagnostics/codex-preflight.js +75 -0
- package/dist/src/diagnostics/codex-preflight.js.map +1 -0
- package/dist/src/diagnostics/doctor.d.ts +114 -0
- package/dist/src/diagnostics/doctor.d.ts.map +1 -0
- package/dist/src/diagnostics/doctor.js +352 -0
- package/dist/src/diagnostics/doctor.js.map +1 -0
- package/dist/src/diagnostics/notify.d.ts +90 -0
- package/dist/src/diagnostics/notify.d.ts.map +1 -0
- package/dist/src/diagnostics/notify.js +177 -0
- package/dist/src/diagnostics/notify.js.map +1 -0
- package/dist/src/diagnostics/repair-prompt.d.ts +49 -0
- package/dist/src/diagnostics/repair-prompt.d.ts.map +1 -0
- package/dist/src/diagnostics/repair-prompt.js +223 -0
- package/dist/src/diagnostics/repair-prompt.js.map +1 -0
- package/dist/src/ingestion/inbox-core/conversation-fingerprint.d.ts +2 -0
- package/dist/src/ingestion/inbox-core/conversation-fingerprint.d.ts.map +1 -0
- package/dist/src/ingestion/inbox-core/conversation-fingerprint.js +27 -0
- package/dist/src/ingestion/inbox-core/conversation-fingerprint.js.map +1 -0
- package/dist/src/ingestion/inbox-core/conversation-key.d.ts +2 -0
- package/dist/src/ingestion/inbox-core/conversation-key.d.ts.map +1 -0
- package/dist/src/ingestion/inbox-core/conversation-key.js +31 -0
- package/dist/src/ingestion/inbox-core/conversation-key.js.map +1 -0
- package/dist/src/ingestion/inbox-core/extensions.d.ts +3 -0
- package/dist/src/ingestion/inbox-core/extensions.d.ts.map +1 -0
- package/dist/src/ingestion/inbox-core/extensions.js +16 -0
- package/dist/src/ingestion/inbox-core/extensions.js.map +1 -0
- package/dist/src/ingestion/inbox-core/idempotency.d.ts +2 -0
- package/dist/src/ingestion/inbox-core/idempotency.d.ts.map +1 -0
- package/dist/src/ingestion/inbox-core/idempotency.js +22 -0
- package/dist/src/ingestion/inbox-core/idempotency.js.map +1 -0
- package/dist/src/ingestion/inbox-core/index.d.ts +20 -0
- package/dist/src/ingestion/inbox-core/index.d.ts.map +1 -0
- package/dist/src/ingestion/inbox-core/index.js +20 -0
- package/dist/src/ingestion/inbox-core/index.js.map +1 -0
- package/dist/src/ingestion/inbox-core/source-detection.d.ts +2 -0
- package/dist/src/ingestion/inbox-core/source-detection.d.ts.map +1 -0
- package/dist/src/ingestion/inbox-core/source-detection.js +23 -0
- package/dist/src/ingestion/inbox-core/source-detection.js.map +1 -0
- package/dist/src/ingestion/inbox-core/source-sniffer.d.ts +11 -0
- package/dist/src/ingestion/inbox-core/source-sniffer.d.ts.map +1 -0
- package/dist/src/ingestion/inbox-core/source-sniffer.js +69 -0
- package/dist/src/ingestion/inbox-core/source-sniffer.js.map +1 -0
- package/dist/src/ingestion/inbox-core/zip-sniffer.d.ts +70 -0
- package/dist/src/ingestion/inbox-core/zip-sniffer.d.ts.map +1 -0
- package/dist/src/ingestion/inbox-core/zip-sniffer.js +161 -0
- package/dist/src/ingestion/inbox-core/zip-sniffer.js.map +1 -0
- package/dist/src/ingestion/inbox-watcher.d.ts.map +1 -1
- package/dist/src/ingestion/inbox-watcher.js +34 -50
- package/dist/src/ingestion/inbox-watcher.js.map +1 -1
- package/dist/src/ingestion/indexer.d.ts +7 -0
- package/dist/src/ingestion/indexer.d.ts.map +1 -1
- package/dist/src/ingestion/indexer.js +36 -2
- package/dist/src/ingestion/indexer.js.map +1 -1
- package/dist/src/ingestion/metadata-extraction.d.ts +8 -5
- package/dist/src/ingestion/metadata-extraction.d.ts.map +1 -1
- package/dist/src/ingestion/metadata-extraction.js +24 -5
- package/dist/src/ingestion/metadata-extraction.js.map +1 -1
- package/dist/src/ingestion/skip-quarantine.d.ts +10 -0
- package/dist/src/ingestion/skip-quarantine.d.ts.map +1 -0
- package/dist/src/ingestion/skip-quarantine.js +35 -0
- package/dist/src/ingestion/skip-quarantine.js.map +1 -0
- package/dist/src/jobs/handlers/compact.d.ts.map +1 -1
- package/dist/src/jobs/handlers/compact.js +30 -4
- package/dist/src/jobs/handlers/compact.js.map +1 -1
- package/dist/src/jobs/handlers/dedupe-conversations.d.ts +134 -0
- package/dist/src/jobs/handlers/dedupe-conversations.d.ts.map +1 -0
- package/dist/src/jobs/handlers/dedupe-conversations.js +371 -0
- package/dist/src/jobs/handlers/dedupe-conversations.js.map +1 -0
- package/dist/src/jobs/handlers/ingest.d.ts.map +1 -1
- package/dist/src/jobs/handlers/ingest.js +295 -41
- package/dist/src/jobs/handlers/ingest.js.map +1 -1
- package/dist/src/jobs/handlers/reconcile.d.ts +28 -0
- package/dist/src/jobs/handlers/reconcile.d.ts.map +1 -1
- package/dist/src/jobs/handlers/reconcile.js +145 -19
- package/dist/src/jobs/handlers/reconcile.js.map +1 -1
- package/dist/src/jobs/handlers/reindex.d.ts.map +1 -1
- package/dist/src/jobs/handlers/reindex.js +13 -2
- package/dist/src/jobs/handlers/reindex.js.map +1 -1
- package/dist/src/jobs/handlers/save.d.ts.map +1 -1
- package/dist/src/jobs/handlers/save.js +57 -3
- package/dist/src/jobs/handlers/save.js.map +1 -1
- package/dist/src/jobs/queue.d.ts +51 -1
- package/dist/src/jobs/queue.d.ts.map +1 -1
- package/dist/src/jobs/queue.js +466 -26
- package/dist/src/jobs/queue.js.map +1 -1
- package/dist/src/jobs/worker-entry.d.ts.map +1 -1
- package/dist/src/jobs/worker-entry.js +35 -7
- package/dist/src/jobs/worker-entry.js.map +1 -1
- package/dist/src/jobs/worker-process.d.ts +11 -0
- package/dist/src/jobs/worker-process.d.ts.map +1 -1
- package/dist/src/jobs/worker-process.js +37 -4
- package/dist/src/jobs/worker-process.js.map +1 -1
- package/dist/src/main.js +199 -46
- package/dist/src/main.js.map +1 -1
- package/dist/src/mcp/errors.d.ts.map +1 -1
- package/dist/src/mcp/errors.js +20 -1
- package/dist/src/mcp/errors.js.map +1 -1
- package/dist/src/mcp/server.d.ts.map +1 -1
- package/dist/src/mcp/server.js +43 -3
- package/dist/src/mcp/server.js.map +1 -1
- package/dist/src/mcp/tools/context-pack.d.ts.map +1 -1
- package/dist/src/mcp/tools/context-pack.js +164 -23
- package/dist/src/mcp/tools/context-pack.js.map +1 -1
- package/dist/src/mcp/tools/search.d.ts +6 -2
- package/dist/src/mcp/tools/search.d.ts.map +1 -1
- package/dist/src/mcp/tools/search.js +35 -4
- package/dist/src/mcp/tools/search.js.map +1 -1
- package/dist/src/observability/embedding-events.d.ts +52 -0
- package/dist/src/observability/embedding-events.d.ts.map +1 -0
- package/dist/src/observability/embedding-events.js +149 -0
- package/dist/src/observability/embedding-events.js.map +1 -0
- package/dist/src/observability/index-events.d.ts +70 -0
- package/dist/src/observability/index-events.d.ts.map +1 -0
- package/dist/src/observability/index-events.js +148 -0
- package/dist/src/observability/index-events.js.map +1 -0
- package/dist/src/observability/onboarding-metric.d.ts +131 -0
- package/dist/src/observability/onboarding-metric.d.ts.map +1 -0
- package/dist/src/observability/onboarding-metric.js +351 -0
- package/dist/src/observability/onboarding-metric.js.map +1 -0
- package/dist/src/observability/tool-usage-stats.d.ts +77 -4
- package/dist/src/observability/tool-usage-stats.d.ts.map +1 -1
- package/dist/src/observability/tool-usage-stats.js +112 -32
- package/dist/src/observability/tool-usage-stats.js.map +1 -1
- package/dist/src/observability/tool-usage.d.ts +100 -7
- package/dist/src/observability/tool-usage.d.ts.map +1 -1
- package/dist/src/observability/tool-usage.js +196 -33
- package/dist/src/observability/tool-usage.js.map +1 -1
- package/dist/src/observability/version-check.d.ts +71 -0
- package/dist/src/observability/version-check.d.ts.map +1 -0
- package/dist/src/observability/version-check.js +198 -0
- package/dist/src/observability/version-check.js.map +1 -0
- package/dist/src/providers/basic-metadata-extraction.d.ts +60 -0
- package/dist/src/providers/basic-metadata-extraction.d.ts.map +1 -0
- package/dist/src/providers/basic-metadata-extraction.js +114 -0
- package/dist/src/providers/basic-metadata-extraction.js.map +1 -0
- package/dist/src/providers/codex-cli-metadata-extraction.d.ts +1 -0
- package/dist/src/providers/codex-cli-metadata-extraction.d.ts.map +1 -1
- package/dist/src/providers/codex-cli-metadata-extraction.js +6 -2
- package/dist/src/providers/codex-cli-metadata-extraction.js.map +1 -1
- package/dist/src/providers/codex-cli-model.d.ts +61 -0
- package/dist/src/providers/codex-cli-model.d.ts.map +1 -0
- package/dist/src/providers/codex-cli-model.js +194 -0
- package/dist/src/providers/codex-cli-model.js.map +1 -0
- package/dist/src/providers/codex-cli-runner.d.ts +39 -0
- package/dist/src/providers/codex-cli-runner.d.ts.map +1 -1
- package/dist/src/providers/codex-cli-runner.js +234 -48
- package/dist/src/providers/codex-cli-runner.js.map +1 -1
- package/dist/src/providers/conversation-generation.d.ts.map +1 -1
- package/dist/src/providers/conversation-generation.js +43 -6
- package/dist/src/providers/conversation-generation.js.map +1 -1
- package/dist/src/providers/ollama-embed.d.ts +2 -1
- package/dist/src/providers/ollama-embed.d.ts.map +1 -1
- package/dist/src/providers/ollama-embed.js +1 -0
- package/dist/src/providers/ollama-embed.js.map +1 -1
- package/dist/src/providers/openai-metadata-extraction.d.ts +3 -3
- package/dist/src/providers/openai-metadata-extraction.d.ts.map +1 -1
- package/dist/src/providers/openai-metadata-extraction.js +18 -3
- package/dist/src/providers/openai-metadata-extraction.js.map +1 -1
- package/dist/src/providers/placeholder-embed.d.ts +56 -0
- package/dist/src/providers/placeholder-embed.d.ts.map +1 -0
- package/dist/src/providers/placeholder-embed.js +64 -0
- package/dist/src/providers/placeholder-embed.js.map +1 -0
- package/dist/src/providers/stub.d.ts +2 -0
- package/dist/src/providers/stub.d.ts.map +1 -1
- package/dist/src/providers/stub.js +2 -0
- package/dist/src/providers/stub.js.map +1 -1
- package/dist/src/providers/types.d.ts +11 -0
- package/dist/src/providers/types.d.ts.map +1 -1
- package/dist/src/providers/voyage.d.ts +2 -1
- package/dist/src/providers/voyage.d.ts.map +1 -1
- package/dist/src/providers/voyage.js +1 -0
- package/dist/src/providers/voyage.js.map +1 -1
- package/dist/src/retrieval/compact.d.ts +116 -2
- package/dist/src/retrieval/compact.d.ts.map +1 -1
- package/dist/src/retrieval/compact.js +158 -5
- package/dist/src/retrieval/compact.js.map +1 -1
- package/dist/src/retrieval/context-pack.d.ts +114 -0
- package/dist/src/retrieval/context-pack.d.ts.map +1 -1
- package/dist/src/retrieval/context-pack.js +292 -8
- package/dist/src/retrieval/context-pack.js.map +1 -1
- package/dist/src/retrieval/current-truth.d.ts +360 -0
- package/dist/src/retrieval/current-truth.d.ts.map +1 -0
- package/dist/src/retrieval/current-truth.js +766 -0
- package/dist/src/retrieval/current-truth.js.map +1 -0
- package/dist/src/retrieval/git-state.d.ts +53 -0
- package/dist/src/retrieval/git-state.d.ts.map +1 -0
- package/dist/src/retrieval/git-state.js +174 -0
- package/dist/src/retrieval/git-state.js.map +1 -0
- package/dist/src/retrieval/lexical.d.ts.map +1 -1
- package/dist/src/retrieval/lexical.js +19 -3
- package/dist/src/retrieval/lexical.js.map +1 -1
- package/dist/src/retrieval/locator-boost.d.ts +37 -0
- package/dist/src/retrieval/locator-boost.d.ts.map +1 -0
- package/dist/src/retrieval/locator-boost.js +129 -0
- package/dist/src/retrieval/locator-boost.js.map +1 -0
- package/dist/src/retrieval/report-demotion.d.ts +46 -0
- package/dist/src/retrieval/report-demotion.d.ts.map +1 -0
- package/dist/src/retrieval/report-demotion.js +169 -0
- package/dist/src/retrieval/report-demotion.js.map +1 -0
- package/dist/src/retrieval/vector.d.ts.map +1 -1
- package/dist/src/retrieval/vector.js +11 -2
- package/dist/src/retrieval/vector.js.map +1 -1
- package/dist/src/server/app.d.ts.map +1 -1
- package/dist/src/server/app.js +92 -11
- package/dist/src/server/app.js.map +1 -1
- package/dist/src/server/routes/compact.d.ts.map +1 -1
- package/dist/src/server/routes/compact.js +4 -1
- package/dist/src/server/routes/compact.js.map +1 -1
- package/dist/src/server/routes/context.d.ts +1 -1
- package/dist/src/server/routes/context.d.ts.map +1 -1
- package/dist/src/server/routes/context.js +2 -1
- package/dist/src/server/routes/context.js.map +1 -1
- package/dist/src/server/routes/conversations-search.d.ts.map +1 -1
- package/dist/src/server/routes/conversations-search.js +28 -3
- package/dist/src/server/routes/conversations-search.js.map +1 -1
- package/dist/src/server/routes/enqueue.d.ts +11 -0
- package/dist/src/server/routes/enqueue.d.ts.map +1 -0
- package/dist/src/server/routes/enqueue.js +17 -0
- package/dist/src/server/routes/enqueue.js.map +1 -0
- package/dist/src/server/routes/friend-status.d.ts +339 -3
- package/dist/src/server/routes/friend-status.d.ts.map +1 -1
- package/dist/src/server/routes/friend-status.js +447 -13
- package/dist/src/server/routes/friend-status.js.map +1 -1
- package/dist/src/server/routes/ingest.d.ts.map +1 -1
- package/dist/src/server/routes/ingest.js +5 -2
- package/dist/src/server/routes/ingest.js.map +1 -1
- package/dist/src/server/routes/mcp-usage.d.ts +5 -4
- package/dist/src/server/routes/mcp-usage.d.ts.map +1 -1
- package/dist/src/server/routes/mcp-usage.js.map +1 -1
- package/dist/src/server/routes/reconcile.d.ts.map +1 -1
- package/dist/src/server/routes/reconcile.js +20 -1
- package/dist/src/server/routes/reconcile.js.map +1 -1
- package/dist/src/server/routes/reindex.d.ts.map +1 -1
- package/dist/src/server/routes/reindex.js +4 -1
- package/dist/src/server/routes/reindex.js.map +1 -1
- package/dist/src/server/routes/save.d.ts.map +1 -1
- package/dist/src/server/routes/save.js +4 -1
- package/dist/src/server/routes/save.js.map +1 -1
- package/dist/src/server/routes/search.d.ts +1 -1
- package/dist/src/server/routes/search.d.ts.map +1 -1
- package/dist/src/server/routes/search.js +253 -29
- package/dist/src/server/routes/search.js.map +1 -1
- package/dist/src/server/routes/triage.d.ts.map +1 -1
- package/dist/src/server/routes/triage.js +4 -1
- package/dist/src/server/routes/triage.js.map +1 -1
- package/dist/src/storage/rebuild.d.ts +35 -1
- package/dist/src/storage/rebuild.d.ts.map +1 -1
- package/dist/src/storage/rebuild.js +288 -64
- package/dist/src/storage/rebuild.js.map +1 -1
- package/dist/src/storage/tables.d.ts +29 -0
- package/dist/src/storage/tables.d.ts.map +1 -1
- package/dist/src/storage/tables.js +32 -1
- package/dist/src/storage/tables.js.map +1 -1
- package/operator/swiftbar/render-menu.py +524 -0
- package/operator/swiftbar/rift.10s.sh +176 -0
- package/package.json +9 -3
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/ingestion/inbox-core/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AACH,OAAO,EAAE,mBAAmB,EAAE,MAAM,kBAAkB,CAAC;AACvD,OAAO,EAAE,uBAAuB,EAAE,MAAM,uBAAuB,CAAC;AAChE,OAAO,EAAE,8BAA8B,EAAE,MAAM,+BAA+B,CAAC;AAC/E,OAAO,EAAE,sBAAsB,EAAE,MAAM,uBAAuB,CAAC;AAC/D,OAAO,EACL,0BAA0B,EAC1B,yBAAyB,GAC1B,MAAM,iBAAiB,CAAC;AACzB,OAAO,EAAE,gBAAgB,EAAE,KAAK,eAAe,EAAE,MAAM,qBAAqB,CAAC;AAC7E,OAAO,EACL,gCAAgC,EAChC,2BAA2B,EAC3B,cAAc,GACf,MAAM,kBAAkB,CAAC"}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* inbox-core — pure file-detection primitives for an inbox watcher.
|
|
3
|
+
*
|
|
4
|
+
* Carved as a seam so the same logic can power both Rift's
|
|
5
|
+
* inbox-watcher.ts (which hands off to the job queue + ingest route)
|
|
6
|
+
* and a future standalone "AI Chat Backup Inbox" shell (which hands
|
|
7
|
+
* off to a local archive folder, with optional Rift indexing).
|
|
8
|
+
*
|
|
9
|
+
* Invariant: this module has zero Rift internals — no Fastify, no
|
|
10
|
+
* LanceDB, no Voyage, no JobQueue, no config loader, no parser
|
|
11
|
+
* registry. Only `node:` stdlib. The adapter wires it up.
|
|
12
|
+
*/
|
|
13
|
+
export { inboxIdempotencyKey } from "./idempotency.js";
|
|
14
|
+
export { stableConversationRowId } from "./conversation-key.js";
|
|
15
|
+
export { conversationContentFingerprint } from "./conversation-fingerprint.js";
|
|
16
|
+
export { detectSourceFromSubdir } from "./source-detection.js";
|
|
17
|
+
export { SUPPORTED_INBOX_EXTENSIONS, isSupportedInboxExtension, } from "./extensions.js";
|
|
18
|
+
export { sniffInboxSource } from "./source-sniffer.js";
|
|
19
|
+
export { readZipCentralDirectoryFilenames, sniffProviderFromZipEntries, sniffZipBuffer, } from "./zip-sniffer.js";
|
|
20
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../../src/ingestion/inbox-core/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AACH,OAAO,EAAE,mBAAmB,EAAE,MAAM,kBAAkB,CAAC;AACvD,OAAO,EAAE,uBAAuB,EAAE,MAAM,uBAAuB,CAAC;AAChE,OAAO,EAAE,8BAA8B,EAAE,MAAM,+BAA+B,CAAC;AAC/E,OAAO,EAAE,sBAAsB,EAAE,MAAM,uBAAuB,CAAC;AAC/D,OAAO,EACL,0BAA0B,EAC1B,yBAAyB,GAC1B,MAAM,iBAAiB,CAAC;AACzB,OAAO,EAAE,gBAAgB,EAAwB,MAAM,qBAAqB,CAAC;AAC7E,OAAO,EACL,gCAAgC,EAChC,2BAA2B,EAC3B,cAAc,GACf,MAAM,kBAAkB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"source-detection.d.ts","sourceRoot":"","sources":["../../../../src/ingestion/inbox-core/source-detection.ts"],"names":[],"mappings":"AAaA,wBAAgB,sBAAsB,CACpC,QAAQ,EAAE,MAAM,EAChB,QAAQ,EAAE,MAAM,EAChB,YAAY,EAAE,SAAS,MAAM,EAAE,GAC9B,MAAM,GAAG,IAAI,CAWf"}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Subdirectory-based source detection for inbox drops.
|
|
3
|
+
*
|
|
4
|
+
* If `data/inbox/<source>/file.json` exists and `<source>` is in
|
|
5
|
+
* `knownSources`, the source is `<source>`. Otherwise returns null
|
|
6
|
+
* and the caller decides on a default.
|
|
7
|
+
*
|
|
8
|
+
* Pure: no filesystem access, no Rift coupling. `knownSources` is
|
|
9
|
+
* passed in so the standalone shell can supply its own provider list
|
|
10
|
+
* without dragging in `parsers/types.ts`.
|
|
11
|
+
*/
|
|
12
|
+
import path from "node:path";
|
|
13
|
+
export function detectSourceFromSubdir(filePath, inboxDir, knownSources) {
|
|
14
|
+
const relative = path.relative(inboxDir, filePath);
|
|
15
|
+
const firstSegment = relative.split(path.sep)[0];
|
|
16
|
+
if (firstSegment &&
|
|
17
|
+
firstSegment !== path.basename(filePath) &&
|
|
18
|
+
knownSources.includes(firstSegment)) {
|
|
19
|
+
return firstSegment;
|
|
20
|
+
}
|
|
21
|
+
return null;
|
|
22
|
+
}
|
|
23
|
+
//# sourceMappingURL=source-detection.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"source-detection.js","sourceRoot":"","sources":["../../../../src/ingestion/inbox-core/source-detection.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AACH,OAAO,IAAI,MAAM,WAAW,CAAC;AAE7B,MAAM,UAAU,sBAAsB,CACpC,QAAgB,EAChB,QAAgB,EAChB,YAA+B;IAE/B,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;IACnD,MAAM,YAAY,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IACjD,IACE,YAAY;QACZ,YAAY,KAAK,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC;QACxC,YAAY,CAAC,QAAQ,CAAC,YAAY,CAAC,EACnC,CAAC;QACD,OAAO,YAAY,CAAC;IACtB,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export type SniffedProvider = "claude_web" | "grok_web" | "gemini_web";
|
|
2
|
+
/**
|
|
3
|
+
* Attempt to identify a non-ChatGPT provider for a file dropped in
|
|
4
|
+
* the inbox root. Returns null if no confident match — caller should
|
|
5
|
+
* fall back to its default (chatgpt_web in Rift's adapter).
|
|
6
|
+
*
|
|
7
|
+
* `data` may be undefined when the caller has only the filename
|
|
8
|
+
* (e.g., before reading the file). Filename-only heuristics still run.
|
|
9
|
+
*/
|
|
10
|
+
export declare function sniffInboxSource(filename: string, data?: Buffer, peekBytes?: number): SniffedProvider | null;
|
|
11
|
+
//# sourceMappingURL=source-sniffer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"source-sniffer.d.ts","sourceRoot":"","sources":["../../../../src/ingestion/inbox-core/source-sniffer.ts"],"names":[],"mappings":"AAuBA,MAAM,MAAM,eAAe,GAAG,YAAY,GAAG,UAAU,GAAG,YAAY,CAAC;AAIvE;;;;;;;GAOG;AACH,wBAAgB,gBAAgB,CAC9B,QAAQ,EAAE,MAAM,EAChB,IAAI,CAAC,EAAE,MAAM,EACb,SAAS,GAAE,MAA2B,GACrC,eAAe,GAAG,IAAI,CAuCxB"}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Conservative content/filename sniffer for inbox drops.
|
|
3
|
+
*
|
|
4
|
+
* Purpose: when a file lands in the inbox root (no subdirectory hint),
|
|
5
|
+
* identify non-ChatGPT providers so they reach the right parser
|
|
6
|
+
* instead of silently failing under the ChatGPT default.
|
|
7
|
+
*
|
|
8
|
+
* Design: identify ONLY non-default providers (claude_web, grok_web,
|
|
9
|
+
* gemini_web). ChatGPT shapes are not matched here — the adapter's
|
|
10
|
+
* default fallback already handles them. Returning null means "no
|
|
11
|
+
* confident match; let the caller decide."
|
|
12
|
+
*
|
|
13
|
+
* Heuristics are cheap and ordered most-specific first:
|
|
14
|
+
* 1. Filename pattern (no I/O)
|
|
15
|
+
* 2. JSON content head substring match (bounded peek)
|
|
16
|
+
*
|
|
17
|
+
* Pure: no filesystem access, no Rift internals. The buffer is passed
|
|
18
|
+
* in by the caller — Rift's adapter reads with `fs`, a standalone
|
|
19
|
+
* shell would read however it likes.
|
|
20
|
+
*/
|
|
21
|
+
import path from "node:path";
|
|
22
|
+
import { sniffZipBuffer } from "./zip-sniffer.js";
|
|
23
|
+
const DEFAULT_PEEK_BYTES = 64 * 1024;
|
|
24
|
+
/**
|
|
25
|
+
* Attempt to identify a non-ChatGPT provider for a file dropped in
|
|
26
|
+
* the inbox root. Returns null if no confident match — caller should
|
|
27
|
+
* fall back to its default (chatgpt_web in Rift's adapter).
|
|
28
|
+
*
|
|
29
|
+
* `data` may be undefined when the caller has only the filename
|
|
30
|
+
* (e.g., before reading the file). Filename-only heuristics still run.
|
|
31
|
+
*/
|
|
32
|
+
export function sniffInboxSource(filename, data, peekBytes = DEFAULT_PEEK_BYTES) {
|
|
33
|
+
const fnameLower = path.basename(filename).toLowerCase();
|
|
34
|
+
const ext = path.extname(fnameLower);
|
|
35
|
+
// --- Filename hints (cheap, no I/O) ---
|
|
36
|
+
if (fnameLower.startsWith("prod-grok-backend"))
|
|
37
|
+
return "grok_web";
|
|
38
|
+
if (fnameLower.includes("claude"))
|
|
39
|
+
return "claude_web";
|
|
40
|
+
if (fnameLower.includes("takeout"))
|
|
41
|
+
return "gemini_web";
|
|
42
|
+
// --- Content hints ---
|
|
43
|
+
if (!data || data.length === 0)
|
|
44
|
+
return null;
|
|
45
|
+
// ZIPs: walk the central directory and match on marker filenames.
|
|
46
|
+
// We never decompress here — the parser layer handles extraction.
|
|
47
|
+
if (ext === ".zip") {
|
|
48
|
+
return sniffZipBuffer(data);
|
|
49
|
+
}
|
|
50
|
+
if (ext !== ".json")
|
|
51
|
+
return null;
|
|
52
|
+
const head = data.subarray(0, Math.min(peekBytes, data.length)).toString("utf-8");
|
|
53
|
+
// Claude exports always carry chat_messages arrays on every conversation.
|
|
54
|
+
if (head.includes('"chat_messages"'))
|
|
55
|
+
return "claude_web";
|
|
56
|
+
// Gemini activity cards carry timestamp_text or assistant_turns.
|
|
57
|
+
if (head.includes('"timestamp_text"') || head.includes('"assistant_turns"')) {
|
|
58
|
+
return "gemini_web";
|
|
59
|
+
}
|
|
60
|
+
// Grok exports nest responses[] inside each conversation. The
|
|
61
|
+
// combined presence of both top-level keys is what distinguishes
|
|
62
|
+
// them from the (rare) ChatGPT JSON that happens to mention
|
|
63
|
+
// "conversations" or "responses" in isolation.
|
|
64
|
+
if (head.includes('"conversations"') && head.includes('"responses"')) {
|
|
65
|
+
return "grok_web";
|
|
66
|
+
}
|
|
67
|
+
return null;
|
|
68
|
+
}
|
|
69
|
+
//# sourceMappingURL=source-sniffer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"source-sniffer.js","sourceRoot":"","sources":["../../../../src/ingestion/inbox-core/source-sniffer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AACH,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AAIlD,MAAM,kBAAkB,GAAG,EAAE,GAAG,IAAI,CAAC;AAErC;;;;;;;GAOG;AACH,MAAM,UAAU,gBAAgB,CAC9B,QAAgB,EAChB,IAAa,EACb,YAAoB,kBAAkB;IAEtC,MAAM,UAAU,GAAG,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IACzD,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;IAErC,yCAAyC;IACzC,IAAI,UAAU,CAAC,UAAU,CAAC,mBAAmB,CAAC;QAAE,OAAO,UAAU,CAAC;IAClE,IAAI,UAAU,CAAC,QAAQ,CAAC,QAAQ,CAAC;QAAE,OAAO,YAAY,CAAC;IACvD,IAAI,UAAU,CAAC,QAAQ,CAAC,SAAS,CAAC;QAAE,OAAO,YAAY,CAAC;IAExD,wBAAwB;IACxB,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAE5C,kEAAkE;IAClE,kEAAkE;IAClE,IAAI,GAAG,KAAK,MAAM,EAAE,CAAC;QACnB,OAAO,cAAc,CAAC,IAAI,CAAC,CAAC;IAC9B,CAAC;IAED,IAAI,GAAG,KAAK,OAAO;QAAE,OAAO,IAAI,CAAC;IAEjC,MAAM,IAAI,GAAG,IAAI,CAAC,QAAQ,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;IAElF,0EAA0E;IAC1E,IAAI,IAAI,CAAC,QAAQ,CAAC,iBAAiB,CAAC;QAAE,OAAO,YAAY,CAAC;IAE1D,iEAAiE;IACjE,IAAI,IAAI,CAAC,QAAQ,CAAC,kBAAkB,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,mBAAmB,CAAC,EAAE,CAAC;QAC5E,OAAO,YAAY,CAAC;IACtB,CAAC;IAED,8DAA8D;IAC9D,iEAAiE;IACjE,4DAA4D;IAC5D,+CAA+C;IAC/C,IAAI,IAAI,CAAC,QAAQ,CAAC,iBAAiB,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;QACrE,OAAO,UAAU,CAAC;IACpB,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC"}
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Minimal ZIP central-directory reader for inbox source-sniffing.
|
|
3
|
+
*
|
|
4
|
+
* We do NOT extract or decompress. We only walk the central directory
|
|
5
|
+
* to learn the names of the entries inside the archive, so we can
|
|
6
|
+
* decide which provider an inbox-dropped ZIP belongs to.
|
|
7
|
+
*
|
|
8
|
+
* Why hand-rolled (no adm-zip): inbox-core must stay free of Rift
|
|
9
|
+
* internals AND of npm dependencies — only `node:` stdlib. The
|
|
10
|
+
* adapter (inbox-watcher) and a future standalone shell both compose
|
|
11
|
+
* this against their own filesystem.
|
|
12
|
+
*
|
|
13
|
+
* Scope: the regular (non-ZIP64) End-Of-Central-Directory record is
|
|
14
|
+
* enough for personal-use chat exports (Claude / ChatGPT / Gemini /
|
|
15
|
+
* Grok exports are all well under 4 GB and have far fewer than 65535
|
|
16
|
+
* entries). Archives that look like ZIP64, are truncated, or have a
|
|
17
|
+
* corrupt EOCD return null; the caller falls back to filename hints
|
|
18
|
+
* or the chatgpt_web default.
|
|
19
|
+
*
|
|
20
|
+
* ZIP layout we rely on:
|
|
21
|
+
* - End of Central Directory (EOCD): 22 bytes + optional comment,
|
|
22
|
+
* near the end of the file, signature 0x06054b50.
|
|
23
|
+
* - Central Directory file headers: signature 0x02014b50, 46 bytes
|
|
24
|
+
* fixed + variable filename / extra / comment fields.
|
|
25
|
+
*/
|
|
26
|
+
import type { SniffedProvider } from "./source-sniffer.js";
|
|
27
|
+
/**
|
|
28
|
+
* Read entry filenames from a ZIP's central directory.
|
|
29
|
+
*
|
|
30
|
+
* Returns null when:
|
|
31
|
+
* - buf is too small to contain a ZIP,
|
|
32
|
+
* - the EOCD signature is not found in the trailing 64 KiB,
|
|
33
|
+
* - the central directory pointers are out of range (truncated /
|
|
34
|
+
* corrupted archive),
|
|
35
|
+
* - the archive declares ZIP64 marker values (we don't follow the
|
|
36
|
+
* ZIP64 locator — out of scope for inbox sniffing),
|
|
37
|
+
* - any individual CD header signature mismatches.
|
|
38
|
+
*
|
|
39
|
+
* The list is in central-directory order, which is the order zip
|
|
40
|
+
* tools usually preserve. Filenames are decoded as UTF-8; ZIP entries
|
|
41
|
+
* default to CP437 if bit 11 of the general purpose flag is not set,
|
|
42
|
+
* but modern tools (Claude, Google Takeout, ChatGPT) all write UTF-8
|
|
43
|
+
* filenames for the marker files we look at, so we don't branch.
|
|
44
|
+
*/
|
|
45
|
+
export declare function readZipCentralDirectoryFilenames(buf: Buffer, options?: {
|
|
46
|
+
maxEntries?: number;
|
|
47
|
+
}): string[] | null;
|
|
48
|
+
/**
|
|
49
|
+
* Identify a non-ChatGPT provider from the entry list of a ZIP's
|
|
50
|
+
* central directory. ChatGPT-shaped zips intentionally return null so
|
|
51
|
+
* the adapter's default (`chatgpt_web`) handles them.
|
|
52
|
+
*
|
|
53
|
+
* Markers:
|
|
54
|
+
* - Any entry path starts with `Takeout/` → gemini_web.
|
|
55
|
+
* Google Takeout ZIPs use this exact top-level prefix.
|
|
56
|
+
* - A root-level `projects.json` together with `conversations.json`
|
|
57
|
+
* and NO `chat.html` / `shared_conversations.json` → claude_web.
|
|
58
|
+
* ChatGPT exports contain conversations.json too but ship chat.html
|
|
59
|
+
* and shared_conversations.json at root and have never shipped a
|
|
60
|
+
* projects.json; Claude exports always ship projects.json at root.
|
|
61
|
+
* - Grok ZIPs are caught upstream by filename hint
|
|
62
|
+
* (`prod-grok-backend*.zip`), so we don't look for them here.
|
|
63
|
+
*/
|
|
64
|
+
export declare function sniffProviderFromZipEntries(entries: ReadonlyArray<string>): SniffedProvider | null;
|
|
65
|
+
/**
|
|
66
|
+
* Convenience wrapper: read the CD and classify in one call. Returns
|
|
67
|
+
* null on a non-ZIP buffer, a corrupted CD, or an unclassified shape.
|
|
68
|
+
*/
|
|
69
|
+
export declare function sniffZipBuffer(buf: Buffer): SniffedProvider | null;
|
|
70
|
+
//# sourceMappingURL=zip-sniffer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"zip-sniffer.d.ts","sourceRoot":"","sources":["../../../../src/ingestion/inbox-core/zip-sniffer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AAiB3D;;;;;;;;;;;;;;;;;GAiBG;AACH,wBAAgB,gCAAgC,CAC9C,GAAG,EAAE,MAAM,EACX,OAAO,CAAC,EAAE;IAAE,UAAU,CAAC,EAAE,MAAM,CAAA;CAAE,GAChC,MAAM,EAAE,GAAG,IAAI,CAmEjB;AAED;;;;;;;;;;;;;;;GAeG;AACH,wBAAgB,2BAA2B,CACzC,OAAO,EAAE,aAAa,CAAC,MAAM,CAAC,GAC7B,eAAe,GAAG,IAAI,CAuCxB;AAED;;;GAGG;AACH,wBAAgB,cAAc,CAAC,GAAG,EAAE,MAAM,GAAG,eAAe,GAAG,IAAI,CAIlE"}
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
const EOCD_SIGNATURE = 0x06054b50;
|
|
2
|
+
const CD_HEADER_SIGNATURE = 0x02014b50;
|
|
3
|
+
const EOCD_MIN_SIZE = 22;
|
|
4
|
+
const EOCD_MAX_COMMENT = 0xffff;
|
|
5
|
+
const EOCD_MAX_SCAN = EOCD_MIN_SIZE + EOCD_MAX_COMMENT;
|
|
6
|
+
const CD_HEADER_FIXED_SIZE = 46;
|
|
7
|
+
/**
|
|
8
|
+
* Cap how much of the central directory we read. Real chat exports
|
|
9
|
+
* are well under this; the cap guards against pathological archives
|
|
10
|
+
* (gzip-bomb-shaped CDs, untrusted inbox drops) without ever needing
|
|
11
|
+
* to extract data.
|
|
12
|
+
*/
|
|
13
|
+
const DEFAULT_MAX_ENTRIES = 4096;
|
|
14
|
+
/**
|
|
15
|
+
* Read entry filenames from a ZIP's central directory.
|
|
16
|
+
*
|
|
17
|
+
* Returns null when:
|
|
18
|
+
* - buf is too small to contain a ZIP,
|
|
19
|
+
* - the EOCD signature is not found in the trailing 64 KiB,
|
|
20
|
+
* - the central directory pointers are out of range (truncated /
|
|
21
|
+
* corrupted archive),
|
|
22
|
+
* - the archive declares ZIP64 marker values (we don't follow the
|
|
23
|
+
* ZIP64 locator — out of scope for inbox sniffing),
|
|
24
|
+
* - any individual CD header signature mismatches.
|
|
25
|
+
*
|
|
26
|
+
* The list is in central-directory order, which is the order zip
|
|
27
|
+
* tools usually preserve. Filenames are decoded as UTF-8; ZIP entries
|
|
28
|
+
* default to CP437 if bit 11 of the general purpose flag is not set,
|
|
29
|
+
* but modern tools (Claude, Google Takeout, ChatGPT) all write UTF-8
|
|
30
|
+
* filenames for the marker files we look at, so we don't branch.
|
|
31
|
+
*/
|
|
32
|
+
export function readZipCentralDirectoryFilenames(buf, options) {
|
|
33
|
+
if (!Buffer.isBuffer(buf) || buf.length < EOCD_MIN_SIZE)
|
|
34
|
+
return null;
|
|
35
|
+
const maxEntries = options?.maxEntries ?? DEFAULT_MAX_ENTRIES;
|
|
36
|
+
// --- Locate EOCD by scanning backwards from the end for the
|
|
37
|
+
// signature. The EOCD lives within the last 22 + 65535 bytes.
|
|
38
|
+
const scanStart = Math.max(0, buf.length - EOCD_MAX_SCAN);
|
|
39
|
+
let eocdOffset = -1;
|
|
40
|
+
for (let i = buf.length - EOCD_MIN_SIZE; i >= scanStart; i--) {
|
|
41
|
+
if (buf.readUInt32LE(i) === EOCD_SIGNATURE) {
|
|
42
|
+
eocdOffset = i;
|
|
43
|
+
break;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
if (eocdOffset < 0)
|
|
47
|
+
return null;
|
|
48
|
+
// EOCD fields (little-endian):
|
|
49
|
+
// off 4: disk number (uint16)
|
|
50
|
+
// off 6: disk where CD starts (uint16)
|
|
51
|
+
// off 8: CD entries on this disk (uint16)
|
|
52
|
+
// off 10: total CD entries (uint16)
|
|
53
|
+
// off 12: CD size in bytes (uint32)
|
|
54
|
+
// off 16: CD offset from archive start (uint32)
|
|
55
|
+
// off 20: comment length (uint16)
|
|
56
|
+
const totalEntries = buf.readUInt16LE(eocdOffset + 10);
|
|
57
|
+
const cdSize = buf.readUInt32LE(eocdOffset + 12);
|
|
58
|
+
const cdOffset = buf.readUInt32LE(eocdOffset + 16);
|
|
59
|
+
// ZIP64 sentinel values — bail out rather than misread.
|
|
60
|
+
if (totalEntries === 0xffff || cdSize === 0xffffffff || cdOffset === 0xffffffff) {
|
|
61
|
+
return null;
|
|
62
|
+
}
|
|
63
|
+
if (cdOffset + cdSize > buf.length)
|
|
64
|
+
return null;
|
|
65
|
+
if (cdOffset >= buf.length)
|
|
66
|
+
return null;
|
|
67
|
+
if (totalEntries === 0)
|
|
68
|
+
return [];
|
|
69
|
+
// --- Walk CD entries.
|
|
70
|
+
const names = [];
|
|
71
|
+
let cursor = cdOffset;
|
|
72
|
+
const cdEnd = cdOffset + cdSize;
|
|
73
|
+
const cap = Math.min(totalEntries, maxEntries);
|
|
74
|
+
for (let i = 0; i < cap; i++) {
|
|
75
|
+
if (cursor + CD_HEADER_FIXED_SIZE > cdEnd)
|
|
76
|
+
return null;
|
|
77
|
+
if (buf.readUInt32LE(cursor) !== CD_HEADER_SIGNATURE)
|
|
78
|
+
return null;
|
|
79
|
+
// CD header field offsets relative to cursor:
|
|
80
|
+
// off 28: filename length (uint16)
|
|
81
|
+
// off 30: extra field length (uint16)
|
|
82
|
+
// off 32: file comment length (uint16)
|
|
83
|
+
const filenameLen = buf.readUInt16LE(cursor + 28);
|
|
84
|
+
const extraLen = buf.readUInt16LE(cursor + 30);
|
|
85
|
+
const commentLen = buf.readUInt16LE(cursor + 32);
|
|
86
|
+
const filenameStart = cursor + CD_HEADER_FIXED_SIZE;
|
|
87
|
+
const filenameEnd = filenameStart + filenameLen;
|
|
88
|
+
if (filenameEnd > cdEnd)
|
|
89
|
+
return null;
|
|
90
|
+
const filename = buf.toString("utf-8", filenameStart, filenameEnd);
|
|
91
|
+
names.push(filename);
|
|
92
|
+
cursor = filenameEnd + extraLen + commentLen;
|
|
93
|
+
}
|
|
94
|
+
return names;
|
|
95
|
+
}
|
|
96
|
+
/**
|
|
97
|
+
* Identify a non-ChatGPT provider from the entry list of a ZIP's
|
|
98
|
+
* central directory. ChatGPT-shaped zips intentionally return null so
|
|
99
|
+
* the adapter's default (`chatgpt_web`) handles them.
|
|
100
|
+
*
|
|
101
|
+
* Markers:
|
|
102
|
+
* - Any entry path starts with `Takeout/` → gemini_web.
|
|
103
|
+
* Google Takeout ZIPs use this exact top-level prefix.
|
|
104
|
+
* - A root-level `projects.json` together with `conversations.json`
|
|
105
|
+
* and NO `chat.html` / `shared_conversations.json` → claude_web.
|
|
106
|
+
* ChatGPT exports contain conversations.json too but ship chat.html
|
|
107
|
+
* and shared_conversations.json at root and have never shipped a
|
|
108
|
+
* projects.json; Claude exports always ship projects.json at root.
|
|
109
|
+
* - Grok ZIPs are caught upstream by filename hint
|
|
110
|
+
* (`prod-grok-backend*.zip`), so we don't look for them here.
|
|
111
|
+
*/
|
|
112
|
+
export function sniffProviderFromZipEntries(entries) {
|
|
113
|
+
if (entries.length === 0)
|
|
114
|
+
return null;
|
|
115
|
+
let hasTakeoutPrefix = false;
|
|
116
|
+
let hasRootConversations = false;
|
|
117
|
+
let hasRootProjects = false;
|
|
118
|
+
let hasChatHtml = false;
|
|
119
|
+
let hasSharedConversations = false;
|
|
120
|
+
for (const raw of entries) {
|
|
121
|
+
const path = raw.replace(/^\.\//, "");
|
|
122
|
+
if (path.startsWith("Takeout/")) {
|
|
123
|
+
hasTakeoutPrefix = true;
|
|
124
|
+
// Keep scanning — a Takeout ZIP can legitimately co-ship root
|
|
125
|
+
// files, but the prefix is enough to commit to gemini_web.
|
|
126
|
+
// Short-circuit safe: nothing else will override.
|
|
127
|
+
break;
|
|
128
|
+
}
|
|
129
|
+
if (!path.includes("/")) {
|
|
130
|
+
const lower = path.toLowerCase();
|
|
131
|
+
if (lower === "conversations.json")
|
|
132
|
+
hasRootConversations = true;
|
|
133
|
+
else if (lower === "projects.json")
|
|
134
|
+
hasRootProjects = true;
|
|
135
|
+
else if (lower === "chat.html")
|
|
136
|
+
hasChatHtml = true;
|
|
137
|
+
else if (lower === "shared_conversations.json")
|
|
138
|
+
hasSharedConversations = true;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
if (hasTakeoutPrefix)
|
|
142
|
+
return "gemini_web";
|
|
143
|
+
if (hasRootProjects &&
|
|
144
|
+
hasRootConversations &&
|
|
145
|
+
!hasChatHtml &&
|
|
146
|
+
!hasSharedConversations) {
|
|
147
|
+
return "claude_web";
|
|
148
|
+
}
|
|
149
|
+
return null;
|
|
150
|
+
}
|
|
151
|
+
/**
|
|
152
|
+
* Convenience wrapper: read the CD and classify in one call. Returns
|
|
153
|
+
* null on a non-ZIP buffer, a corrupted CD, or an unclassified shape.
|
|
154
|
+
*/
|
|
155
|
+
export function sniffZipBuffer(buf) {
|
|
156
|
+
const entries = readZipCentralDirectoryFilenames(buf);
|
|
157
|
+
if (!entries)
|
|
158
|
+
return null;
|
|
159
|
+
return sniffProviderFromZipEntries(entries);
|
|
160
|
+
}
|
|
161
|
+
//# sourceMappingURL=zip-sniffer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"zip-sniffer.js","sourceRoot":"","sources":["../../../../src/ingestion/inbox-core/zip-sniffer.ts"],"names":[],"mappings":"AA2BA,MAAM,cAAc,GAAG,UAAU,CAAC;AAClC,MAAM,mBAAmB,GAAG,UAAU,CAAC;AACvC,MAAM,aAAa,GAAG,EAAE,CAAC;AACzB,MAAM,gBAAgB,GAAG,MAAM,CAAC;AAChC,MAAM,aAAa,GAAG,aAAa,GAAG,gBAAgB,CAAC;AACvD,MAAM,oBAAoB,GAAG,EAAE,CAAC;AAEhC;;;;;GAKG;AACH,MAAM,mBAAmB,GAAG,IAAI,CAAC;AAEjC;;;;;;;;;;;;;;;;;GAiBG;AACH,MAAM,UAAU,gCAAgC,CAC9C,GAAW,EACX,OAAiC;IAEjC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,MAAM,GAAG,aAAa;QAAE,OAAO,IAAI,CAAC;IAErE,MAAM,UAAU,GAAG,OAAO,EAAE,UAAU,IAAI,mBAAmB,CAAC;IAE9D,6DAA6D;IAC7D,kEAAkE;IAClE,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,GAAG,CAAC,MAAM,GAAG,aAAa,CAAC,CAAC;IAC1D,IAAI,UAAU,GAAG,CAAC,CAAC,CAAC;IACpB,KAAK,IAAI,CAAC,GAAG,GAAG,CAAC,MAAM,GAAG,aAAa,EAAE,CAAC,IAAI,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;QAC7D,IAAI,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,KAAK,cAAc,EAAE,CAAC;YAC3C,UAAU,GAAG,CAAC,CAAC;YACf,MAAM;QACR,CAAC;IACH,CAAC;IACD,IAAI,UAAU,GAAG,CAAC;QAAE,OAAO,IAAI,CAAC;IAEhC,+BAA+B;IAC/B,iCAAiC;IACjC,0CAA0C;IAC1C,6CAA6C;IAC7C,sCAAsC;IACtC,sCAAsC;IACtC,kDAAkD;IAClD,oCAAoC;IACpC,MAAM,YAAY,GAAG,GAAG,CAAC,YAAY,CAAC,UAAU,GAAG,EAAE,CAAC,CAAC;IACvD,MAAM,MAAM,GAAG,GAAG,CAAC,YAAY,CAAC,UAAU,GAAG,EAAE,CAAC,CAAC;IACjD,MAAM,QAAQ,GAAG,GAAG,CAAC,YAAY,CAAC,UAAU,GAAG,EAAE,CAAC,CAAC;IAEnD,wDAAwD;IACxD,IAAI,YAAY,KAAK,MAAM,IAAI,MAAM,KAAK,UAAU,IAAI,QAAQ,KAAK,UAAU,EAAE,CAAC;QAChF,OAAO,IAAI,CAAC;IACd,CAAC;IAED,IAAI,QAAQ,GAAG,MAAM,GAAG,GAAG,CAAC,MAAM;QAAE,OAAO,IAAI,CAAC;IAChD,IAAI,QAAQ,IAAI,GAAG,CAAC,MAAM;QAAE,OAAO,IAAI,CAAC;IACxC,IAAI,YAAY,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAElC,uBAAuB;IACvB,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,IAAI,MAAM,GAAG,QAAQ,CAAC;IACtB,MAAM,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;IAChC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,YAAY,EAAE,UAAU,CAAC,CAAC;IAE/C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;QAC7B,IAAI,MAAM,GAAG,oBAAoB,GAAG,KAAK;YAAE,OAAO,IAAI,CAAC;QACvD,IAAI,GAAG,CAAC,YAAY,CAAC,MAAM,CAAC,KAAK,mBAAmB;YAAE,OAAO,IAAI,CAAC;QAElE,8CAA8C;QAC9C,qCAAqC;QACrC,wCAAwC;QACxC,yCAAyC;QACzC,MAAM,WAAW,GAAG,GAAG,CAAC,YAAY,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC;QAClD,MAAM,QAAQ,GAAG,GAAG,CAAC,YAAY,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC;QAC/C,MAAM,UAAU,GAAG,GAAG,CAAC,YAAY,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC;QAEjD,MAAM,aAAa,GAAG,MAAM,GAAG,oBAAoB,CAAC;QACpD,MAAM,WAAW,GAAG,aAAa,GAAG,WAAW,CAAC;QAChD,IAAI,WAAW,GAAG,KAAK;YAAE,OAAO,IAAI,CAAC;QAErC,MAAM,QAAQ,GAAG,GAAG,CAAC,QAAQ,CAAC,OAAO,EAAE,aAAa,EAAE,WAAW,CAAC,CAAC;QACnE,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAErB,MAAM,GAAG,WAAW,GAAG,QAAQ,GAAG,UAAU,CAAC;IAC/C,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;;;;;;;;;;;;;GAeG;AACH,MAAM,UAAU,2BAA2B,CACzC,OAA8B;IAE9B,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAEtC,IAAI,gBAAgB,GAAG,KAAK,CAAC;IAC7B,IAAI,oBAAoB,GAAG,KAAK,CAAC;IACjC,IAAI,eAAe,GAAG,KAAK,CAAC;IAC5B,IAAI,WAAW,GAAG,KAAK,CAAC;IACxB,IAAI,sBAAsB,GAAG,KAAK,CAAC;IAEnC,KAAK,MAAM,GAAG,IAAI,OAAO,EAAE,CAAC;QAC1B,MAAM,IAAI,GAAG,GAAG,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;QACtC,IAAI,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;YAChC,gBAAgB,GAAG,IAAI,CAAC;YACxB,8DAA8D;YAC9D,2DAA2D;YAC3D,kDAAkD;YAClD,MAAM;QACR,CAAC;QACD,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;YACxB,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;YACjC,IAAI,KAAK,KAAK,oBAAoB;gBAAE,oBAAoB,GAAG,IAAI,CAAC;iBAC3D,IAAI,KAAK,KAAK,eAAe;gBAAE,eAAe,GAAG,IAAI,CAAC;iBACtD,IAAI,KAAK,KAAK,WAAW;gBAAE,WAAW,GAAG,IAAI,CAAC;iBAC9C,IAAI,KAAK,KAAK,2BAA2B;gBAAE,sBAAsB,GAAG,IAAI,CAAC;QAChF,CAAC;IACH,CAAC;IAED,IAAI,gBAAgB;QAAE,OAAO,YAAY,CAAC;IAE1C,IACE,eAAe;QACf,oBAAoB;QACpB,CAAC,WAAW;QACZ,CAAC,sBAAsB,EACvB,CAAC;QACD,OAAO,YAAY,CAAC;IACtB,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,GAAW;IACxC,MAAM,OAAO,GAAG,gCAAgC,CAAC,GAAG,CAAC,CAAC;IACtD,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAC;IAC1B,OAAO,2BAA2B,CAAC,OAAO,CAAC,CAAC;AAC9C,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"inbox-watcher.d.ts","sourceRoot":"","sources":["../../../src/ingestion/inbox-watcher.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"inbox-watcher.d.ts","sourceRoot":"","sources":["../../../src/ingestion/inbox-watcher.ts"],"names":[],"mappings":"AAmBA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AACjD,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAcvC,MAAM,WAAW,mBAAmB;IAClC,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,QAAQ,CAAC;CACjB;AAED;;;GAGG;AACH,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,mBAAmB,GAAG,OAAO,CA2DrE"}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Inbox watcher — Slice 19
|
|
2
|
+
* Inbox watcher — Slice 19, refactored to compose `inbox-core`.
|
|
3
3
|
*
|
|
4
4
|
* Watches data/inbox/ for dropped files and creates ingest jobs.
|
|
5
5
|
* Reuses the existing secure ingest pipeline (archive security,
|
|
@@ -10,53 +10,18 @@
|
|
|
10
10
|
* or defaults to "chatgpt_web" for files in inbox root.
|
|
11
11
|
* - Raw files land in data/raw/conversations/inbox/ via rawSource override.
|
|
12
12
|
* - Idempotency key prevents duplicate processing on restart.
|
|
13
|
+
*
|
|
14
|
+
* This file is the Rift adapter — it owns `fs`, the JobQueue handoff,
|
|
15
|
+
* and Rift's parser registry. The pure detection primitives live in
|
|
16
|
+
* `inbox-core/` so a future standalone shell can reuse them.
|
|
13
17
|
*/
|
|
14
|
-
import crypto from "node:crypto";
|
|
15
18
|
import fs from "node:fs";
|
|
16
19
|
import path from "node:path";
|
|
17
20
|
import { Watcher } from "./watcher.js";
|
|
18
21
|
import { SUPPORTED_INGEST_SOURCES } from "./parsers/types.js";
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
// stage, so reject it up front.
|
|
22
|
-
const SUPPORTED_EXTENSIONS = new Set([".json", ".zip"]);
|
|
22
|
+
import { detectSourceFromSubdir, inboxIdempotencyKey, isSupportedInboxExtension, sniffInboxSource, } from "./inbox-core/index.js";
|
|
23
|
+
import { writeSkipQuarantine } from "./skip-quarantine.js";
|
|
23
24
|
const DEFAULT_SOURCE = "chatgpt_web";
|
|
24
|
-
/**
|
|
25
|
-
* Detect the parser source from a file path relative to the inbox directory.
|
|
26
|
-
* Files in subdirectories use the subdirectory name as source
|
|
27
|
-
* (e.g., data/inbox/chatgpt_web/export.zip → chatgpt_web).
|
|
28
|
-
* Files directly in inbox/ default to chatgpt_web.
|
|
29
|
-
*/
|
|
30
|
-
function detectSource(filePath, inboxDir) {
|
|
31
|
-
const relative = path.relative(inboxDir, filePath);
|
|
32
|
-
const firstSegment = relative.split(path.sep)[0];
|
|
33
|
-
if (firstSegment &&
|
|
34
|
-
firstSegment !== path.basename(filePath) &&
|
|
35
|
-
SUPPORTED_INGEST_SOURCES.includes(firstSegment)) {
|
|
36
|
-
return firstSegment;
|
|
37
|
-
}
|
|
38
|
-
return DEFAULT_SOURCE;
|
|
39
|
-
}
|
|
40
|
-
/**
|
|
41
|
-
* Generate a stable idempotency key for an inbox file.
|
|
42
|
-
* Includes path + mtime so re-dropping the same filename with new
|
|
43
|
-
* content gets a fresh key, while restart doesn't re-process.
|
|
44
|
-
*/
|
|
45
|
-
function inboxIdempotencyKey(filePath) {
|
|
46
|
-
let mtimeMs = 0;
|
|
47
|
-
try {
|
|
48
|
-
mtimeMs = fs.statSync(filePath).mtimeMs;
|
|
49
|
-
}
|
|
50
|
-
catch {
|
|
51
|
-
// File may have been moved; use 0
|
|
52
|
-
}
|
|
53
|
-
const hash = crypto
|
|
54
|
-
.createHash("sha256")
|
|
55
|
-
.update(`${filePath}:${mtimeMs}`)
|
|
56
|
-
.digest("hex")
|
|
57
|
-
.slice(0, 16);
|
|
58
|
-
return `inbox:${hash}`;
|
|
59
|
-
}
|
|
60
25
|
/**
|
|
61
26
|
* Create an inbox watcher. Returns a Watcher instance that can be
|
|
62
27
|
* started, paused, and stopped like any other watcher.
|
|
@@ -65,24 +30,43 @@ export function createInboxWatcher(opts) {
|
|
|
65
30
|
const inboxDir = path.join(opts.dataDir, "inbox");
|
|
66
31
|
fs.mkdirSync(inboxDir, { recursive: true });
|
|
67
32
|
const handler = async (event) => {
|
|
68
|
-
//
|
|
69
|
-
|
|
33
|
+
// Ignore deletions; otherwise process both `add` and `change` so a
|
|
34
|
+
// user who overwrites `export.zip` in place picks up the new
|
|
35
|
+
// content. The idempotency key includes mtime, so an unchanged
|
|
36
|
+
// `change` event collapses at the queue layer.
|
|
37
|
+
if (event.type === "unlink")
|
|
70
38
|
return;
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
39
|
+
if (!isSupportedInboxExtension(event.path)) {
|
|
40
|
+
// Markdown / images / other types are not parseable as web
|
|
41
|
+
// exports. Record a friendly skip so the user can see why their
|
|
42
|
+
// drop didn't ingest, instead of it disappearing silently.
|
|
43
|
+
await writeSkipQuarantine(opts.dataDir, {
|
|
44
|
+
reason: "inbox_unsupported_extension",
|
|
45
|
+
source_path: event.path,
|
|
46
|
+
metadata: { extension: path.extname(event.path).toLowerCase() },
|
|
47
|
+
}).catch(() => {
|
|
48
|
+
// Quarantine write failure must not break the watcher loop.
|
|
49
|
+
});
|
|
74
50
|
return;
|
|
75
|
-
|
|
51
|
+
}
|
|
76
52
|
let fileData;
|
|
53
|
+
let mtimeMs;
|
|
77
54
|
try {
|
|
78
55
|
fileData = fs.readFileSync(event.path);
|
|
56
|
+
mtimeMs = fs.statSync(event.path).mtimeMs;
|
|
79
57
|
}
|
|
80
58
|
catch {
|
|
81
59
|
// File may have been removed between detection and read.
|
|
82
60
|
return;
|
|
83
61
|
}
|
|
84
|
-
|
|
85
|
-
|
|
62
|
+
// Source resolution order:
|
|
63
|
+
// 1. Subdirectory hint (data/inbox/<provider>/file.json) — strongest.
|
|
64
|
+
// 2. Content/filename sniff — identifies non-ChatGPT providers.
|
|
65
|
+
// 3. DEFAULT_SOURCE — historical default for unidentified drops.
|
|
66
|
+
const source = detectSourceFromSubdir(event.path, inboxDir, SUPPORTED_INGEST_SOURCES) ??
|
|
67
|
+
sniffInboxSource(path.basename(event.path), fileData) ??
|
|
68
|
+
DEFAULT_SOURCE;
|
|
69
|
+
const idempotencyKey = inboxIdempotencyKey(event.path, mtimeMs);
|
|
86
70
|
const payload = {
|
|
87
71
|
source: source,
|
|
88
72
|
filename: path.basename(event.path),
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"inbox-watcher.js","sourceRoot":"","sources":["../../../src/ingestion/inbox-watcher.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"inbox-watcher.js","sourceRoot":"","sources":["../../../src/ingestion/inbox-watcher.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AACH,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,IAAI,MAAM,WAAW,CAAC;AAE7B,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAGvC,OAAO,EAAE,wBAAwB,EAAE,MAAM,oBAAoB,CAAC;AAC9D,OAAO,EACL,sBAAsB,EACtB,mBAAmB,EACnB,yBAAyB,EACzB,gBAAgB,GACjB,MAAM,uBAAuB,CAAC;AAC/B,OAAO,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAC;AAE3D,MAAM,cAAc,GAAG,aAAa,CAAC;AAOrC;;;GAGG;AACH,MAAM,UAAU,kBAAkB,CAAC,IAAyB;IAC1D,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;IAClD,EAAE,CAAC,SAAS,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAE5C,MAAM,OAAO,GAAG,KAAK,EAAE,KAAgB,EAAiB,EAAE;QACxD,mEAAmE;QACnE,6DAA6D;QAC7D,+DAA+D;QAC/D,+CAA+C;QAC/C,IAAI,KAAK,CAAC,IAAI,KAAK,QAAQ;YAAE,OAAO;QAEpC,IAAI,CAAC,yBAAyB,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;YAC3C,2DAA2D;YAC3D,gEAAgE;YAChE,2DAA2D;YAC3D,MAAM,mBAAmB,CAAC,IAAI,CAAC,OAAO,EAAE;gBACtC,MAAM,EAAE,6BAA6B;gBACrC,WAAW,EAAE,KAAK,CAAC,IAAI;gBACvB,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,WAAW,EAAE,EAAE;aAChE,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE;gBACZ,4DAA4D;YAC9D,CAAC,CAAC,CAAC;YACH,OAAO;QACT,CAAC;QAED,IAAI,QAAgB,CAAC;QACrB,IAAI,OAAe,CAAC;QACpB,IAAI,CAAC;YACH,QAAQ,GAAG,EAAE,CAAC,YAAY,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YACvC,OAAO,GAAG,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC;QAC5C,CAAC;QAAC,MAAM,CAAC;YACP,yDAAyD;YACzD,OAAO;QACT,CAAC;QAED,2BAA2B;QAC3B,wEAAwE;QACxE,kEAAkE;QAClE,mEAAmE;QACnE,MAAM,MAAM,GACV,sBAAsB,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,EAAE,wBAAwB,CAAC;YACtE,gBAAgB,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,QAAQ,CAAC;YACrD,cAAc,CAAC;QACjB,MAAM,cAAc,GAAG,mBAAmB,CAAC,KAAK,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QAEhE,MAAM,OAAO,GAAkB;YAC7B,MAAM,EAAE,MAAiC;YACzC,QAAQ,EAAE,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC;YACnC,QAAQ,EAAE,QAAQ,CAAC,QAAQ,CAAC,QAAQ,CAAC;YACrC,SAAS,EAAE,OAAO;SACnB,CAAC;QAEF,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE;YAChC,eAAe,EAAE,cAAc;YAC/B,OAAO;SACR,CAAC,CAAC;IACL,CAAC,CAAC;IAEF,OAAO,IAAI,OAAO,CAAC,CAAC,QAAQ,CAAC,EAAE,OAAO,CAAC,CAAC;AAC1C,CAAC"}
|
|
@@ -10,6 +10,13 @@ export interface IndexerConfig {
|
|
|
10
10
|
allowedRoots: readonly string[];
|
|
11
11
|
/** Target table for document storage. Defaults to "structured_docs". */
|
|
12
12
|
tableName?: "structured_docs" | "structured_docs_local";
|
|
13
|
+
/**
|
|
14
|
+
* Data directory root. Used to write skip-quarantine records when a file
|
|
15
|
+
* cannot be embedded (e.g. empty extracted content). Required so every
|
|
16
|
+
* production code path — watch, scheduled scan, reconcile — produces a
|
|
17
|
+
* visible explanation when a file is dropped, instead of silent stderr.
|
|
18
|
+
*/
|
|
19
|
+
dataDir: string;
|
|
13
20
|
}
|
|
14
21
|
/**
|
|
15
22
|
* Deterministic row ID from the canonical source path.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"indexer.d.ts","sourceRoot":"","sources":["../../../src/ingestion/indexer.ts"],"names":[],"mappings":"AAQA,OAAO,KAAK,EAAE,iBAAiB,EAAE,UAAU,EAAE,WAAW,EAAE,MAAM,uBAAuB,CAAC;
|
|
1
|
+
{"version":3,"file":"indexer.d.ts","sourceRoot":"","sources":["../../../src/ingestion/indexer.ts"],"names":[],"mappings":"AAQA,OAAO,KAAK,EAAE,iBAAiB,EAAE,UAAU,EAAE,WAAW,EAAE,MAAM,uBAAuB,CAAC;AASxF,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,KAAK,GAAG,QAAQ,GAAG,QAAQ,CAAC;IAClC,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,aAAa;IAC5B,UAAU,EAAE,UAAU,CAAC;IACvB,WAAW,EAAE,WAAW,CAAC;IACzB,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,EAAE,SAAS,MAAM,EAAE,CAAC;IAChC,wEAAwE;IACxE,SAAS,CAAC,EAAE,iBAAiB,GAAG,uBAAuB,CAAC;IACxD;;;;;OAKG;IACH,OAAO,EAAE,MAAM,CAAC;CACjB;AAED;;;GAGG;AACH,wBAAgB,MAAM,CAAC,UAAU,EAAE,MAAM,GAAG,MAAM,CAEjD;AAED,qBAAa,OAAO;IAClB,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAoB;IAC9C,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAgB;gBAE3B,SAAS,EAAE,iBAAiB,EAAE,MAAM,EAAE,aAAa;IAK/D;;;;OAIG;IACG,WAAW,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC;YAcpC,YAAY;YAiEZ,YAAY;CAK3B"}
|
|
@@ -9,6 +9,9 @@ import crypto from "node:crypto";
|
|
|
9
9
|
import { getTable } from "../storage/tables.js";
|
|
10
10
|
import { extract, isSupported } from "./extractor.js";
|
|
11
11
|
import { validatePath, validateUnlinkPath } from "../security/paths.js";
|
|
12
|
+
import { writeSkipQuarantine } from "./skip-quarantine.js";
|
|
13
|
+
import { recordEmbed } from "../observability/embedding-events.js";
|
|
14
|
+
import { recordIndexWrite } from "../observability/index-events.js";
|
|
12
15
|
/**
|
|
13
16
|
* Deterministic row ID from the canonical source path.
|
|
14
17
|
* Same file always gets the same ID, enabling upsert via delete+add.
|
|
@@ -41,7 +44,28 @@ export class Indexer {
|
|
|
41
44
|
}
|
|
42
45
|
async handleUpsert(filePath) {
|
|
43
46
|
const doc = await extract(filePath);
|
|
44
|
-
|
|
47
|
+
// Empty/blank extracted content cannot be embedded — Voyage rejects empty
|
|
48
|
+
// strings with HTTP 400, and indexing an empty row produces nothing
|
|
49
|
+
// searchable anyway. Quarantine the skip so it's visible (not stderr-only)
|
|
50
|
+
// and remove any stale row from a prior good extraction of the same path.
|
|
51
|
+
if (isBlank(doc.content)) {
|
|
52
|
+
const id = fileId(filePath);
|
|
53
|
+
const table = getTable(this.config.tableName ?? "structured_docs");
|
|
54
|
+
await table.delete(`id = '${id}'`);
|
|
55
|
+
await writeSkipQuarantine(this.config.dataDir, {
|
|
56
|
+
reason: "empty_extracted_content",
|
|
57
|
+
source_path: filePath,
|
|
58
|
+
metadata: doc.metadata,
|
|
59
|
+
});
|
|
60
|
+
return;
|
|
61
|
+
}
|
|
62
|
+
const embeddingVec = await recordEmbed(this.config.dataDir, this.embedding, {
|
|
63
|
+
pipeline: this.config.sourceType === "filesystem_watched"
|
|
64
|
+
? "watcher"
|
|
65
|
+
: "scheduled_scan",
|
|
66
|
+
operation: "document_embedding",
|
|
67
|
+
input_count: 1,
|
|
68
|
+
}, () => this.embedding.embed(doc.content));
|
|
45
69
|
const id = fileId(filePath);
|
|
46
70
|
const table = getTable(this.config.tableName ?? "structured_docs");
|
|
47
71
|
// Upsert: delete existing row (if any), then add new one.
|
|
@@ -57,7 +81,14 @@ export class Indexer {
|
|
|
57
81
|
indexed_at: new Date().toISOString(),
|
|
58
82
|
metadata: JSON.stringify(doc.metadata),
|
|
59
83
|
};
|
|
60
|
-
await
|
|
84
|
+
await recordIndexWrite(this.config.dataDir, {
|
|
85
|
+
table: this.config.tableName ?? "structured_docs",
|
|
86
|
+
pipeline: this.config.sourceType === "filesystem_watched"
|
|
87
|
+
? "watcher"
|
|
88
|
+
: "scheduled_scan",
|
|
89
|
+
operation: "structured_doc_upsert",
|
|
90
|
+
row_count: 1,
|
|
91
|
+
}, () => table.add([row]));
|
|
61
92
|
}
|
|
62
93
|
async handleDelete(filePath) {
|
|
63
94
|
const id = fileId(filePath);
|
|
@@ -65,4 +96,7 @@ export class Indexer {
|
|
|
65
96
|
await table.delete(`id = '${id}'`);
|
|
66
97
|
}
|
|
67
98
|
}
|
|
99
|
+
function isBlank(s) {
|
|
100
|
+
return s.trim().length === 0;
|
|
101
|
+
}
|
|
68
102
|
//# sourceMappingURL=indexer.js.map
|