@vellumai/assistant 0.8.4 → 0.8.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +33 -1
- package/ARCHITECTURE.md +3 -3
- package/bunfig.toml +6 -1
- package/docs/browser-use-architecture-phase2.md +1 -1
- package/docs/credential-execution-service.md +6 -6
- package/docs/plugins.md +4 -3
- package/knip.json +2 -1
- package/node_modules/@vellumai/skill-host-contracts/src/client.ts +12 -13
- package/node_modules/@vellumai/skill-host-contracts/src/skill-host.ts +4 -1
- package/node_modules/@vellumai/skill-host-contracts/src/tool-types.ts +16 -14
- package/openapi.yaml +2748 -216
- package/package.json +1 -1
- package/src/__tests__/actor-token-service.test.ts +3 -2
- package/src/__tests__/agent-loop-exit-reason.test.ts +102 -9
- package/src/__tests__/agent-loop-override-profile.test.ts +2 -1
- package/src/__tests__/agent-wake-disk-pressure-callsite.test.ts +1 -0
- package/src/__tests__/agent-wake-override-profile.test.ts +1 -0
- package/src/__tests__/always-loaded-tools-guard.test.ts +2 -2
- package/src/__tests__/annotate-risk-options.test.ts +1 -0
- package/src/__tests__/anthropic-provider.test.ts +34 -37
- package/src/__tests__/approval-cascade.test.ts +1 -0
- package/src/__tests__/approval-routes-http.test.ts +9 -13
- package/src/__tests__/assert-not-live-db.ts +79 -0
- package/src/__tests__/assistant-event-hub-self-exclusion.test.ts +293 -0
- package/src/__tests__/assistant-feature-flags-integration.test.ts +12 -28
- package/src/__tests__/audit-log-rotation.test.ts +72 -18
- package/src/__tests__/auto-analysis-end-to-end.test.ts +6 -6
- package/src/__tests__/background-workers-disk-pressure.test.ts +8 -11
- package/src/__tests__/browser-skill-endstate.test.ts +3 -3
- package/src/__tests__/btw-routes.test.ts +5 -5
- package/src/__tests__/call-controller.test.ts +3 -3
- package/src/__tests__/cancel-resolves-conversation-key.test.ts +1 -1
- package/src/__tests__/channel-approval-routes.test.ts +3 -2
- package/src/__tests__/channel-guardian.test.ts +6 -5
- package/src/__tests__/channel-readiness-slack-remote.test.ts +175 -0
- package/src/__tests__/channel-reply-delivery.test.ts +35 -0
- package/src/__tests__/channel-retry-sweep.test.ts +320 -3
- package/src/__tests__/checker.test.ts +18 -27
- package/src/__tests__/compaction-events.test.ts +2 -0
- package/src/__tests__/compaction-trail-store.test.ts +264 -0
- package/src/__tests__/compactor-call-site-logging.test.ts +215 -0
- package/src/__tests__/compactor-preserved-tail-count.test.ts +1 -0
- package/src/__tests__/computer-use-skill-manifest-regression.test.ts +12 -16
- package/src/__tests__/computer-use-tools.test.ts +14 -18
- package/src/__tests__/config-loader-backfill.test.ts +13 -28
- package/src/__tests__/config-loader-corrupt.test.ts +5 -5
- package/src/__tests__/config-loader-platform-defaults.test.ts +93 -26
- package/src/__tests__/config-loader-quarantine-bulletin.test.ts +3 -3
- package/src/__tests__/config-managed-gemini-defaults.test.ts +3 -4
- package/src/__tests__/config-schema.test.ts +10 -10
- package/src/__tests__/confirmation-request-guardian-bridge.test.ts +0 -1
- package/src/__tests__/connection-model-compat.test.ts +83 -0
- package/src/__tests__/contacts-tools.test.ts +3 -2
- package/src/__tests__/context-token-estimator.test.ts +22 -0
- package/src/__tests__/conversation-abort-tool-results.test.ts +5 -0
- package/src/__tests__/conversation-agent-loop-disk-pressure.test.ts +2 -1
- package/src/__tests__/conversation-agent-loop-handlers-max-tokens.test.ts +55 -0
- package/src/__tests__/conversation-agent-loop-inference-profile.test.ts +2 -1
- package/src/__tests__/conversation-agent-loop-overflow.test.ts +231 -2
- package/src/__tests__/conversation-agent-loop.test.ts +581 -54
- package/src/__tests__/conversation-analysis-routes.test.ts +1 -0
- package/src/__tests__/conversation-app-control-instantiation.test.ts +31 -24
- package/src/__tests__/conversation-app-control-lifecycle.test.ts +1 -0
- package/src/__tests__/conversation-attention-store.test.ts +101 -0
- package/src/__tests__/conversation-attention-telegram.test.ts +3 -2
- package/src/__tests__/conversation-clear-safety.test.ts +25 -25
- package/src/__tests__/conversation-confirmation-signals.test.ts +1 -0
- package/src/__tests__/conversation-delete-schedule-cleanup.test.ts +1 -1
- package/src/__tests__/conversation-disk-view-integration.test.ts +2 -2
- package/src/__tests__/conversation-error.test.ts +61 -0
- package/src/__tests__/conversation-fork-crud.test.ts +239 -15
- package/src/__tests__/conversation-fork-route.test.ts +3 -2
- package/src/__tests__/conversation-history-web-search.test.ts +1 -0
- package/src/__tests__/conversation-inference-profile-list.test.ts +3 -2
- package/src/__tests__/conversation-inference-profile-route.test.ts +3 -2
- package/src/__tests__/conversation-lifecycle.test.ts +53 -11
- package/src/__tests__/conversation-list-source.test.ts +3 -2
- package/src/__tests__/conversation-load-history-repair.test.ts +2 -1
- package/src/__tests__/{conversation-load-cleaned-at.test.ts → conversation-load-history-stripped.test.ts} +14 -13
- package/src/__tests__/conversation-pairing.test.ts +53 -0
- package/src/__tests__/conversation-process-app-control-preactivation.test.ts +26 -7
- package/src/__tests__/conversation-process-callsite.test.ts +1 -0
- package/src/__tests__/conversation-provider-retry-repair.test.ts +6 -0
- package/src/__tests__/conversation-queue.test.ts +333 -291
- package/src/__tests__/conversation-routes-disk-view.test.ts +112 -18
- package/src/__tests__/conversation-routes-guardian-reply.test.ts +33 -8
- package/src/__tests__/conversation-routes-slash-commands.test.ts +68 -2
- package/src/__tests__/conversation-runtime-assembly.test.ts +78 -0
- package/src/__tests__/conversation-skill-tools.test.ts +40 -147
- package/src/__tests__/conversation-slash-queue.test.ts +84 -32
- package/src/__tests__/conversation-slash-unknown.test.ts +5 -0
- package/src/__tests__/conversation-speed-override.test.ts +1 -0
- package/src/__tests__/conversation-store.test.ts +1 -1
- package/src/__tests__/conversation-surfaces-action-delivery.test.ts +46 -0
- package/src/__tests__/conversation-surfaces-data-persist.test.ts +1 -0
- package/src/__tests__/conversation-surfaces-standalone-payloads.test.ts +6 -3
- package/src/__tests__/conversation-surfaces-standalone.test.ts +6 -3
- package/src/__tests__/conversation-surfaces-state-update.test.ts +3 -3
- package/src/__tests__/conversation-surfaces-table-action.test.ts +7 -17
- package/src/__tests__/conversation-sync-tags.test.ts +218 -35
- package/src/__tests__/conversation-title-service.test.ts +1 -0
- package/src/__tests__/conversation-tool-setup-app-refresh.test.ts +30 -0
- package/src/__tests__/conversation-usage.test.ts +1 -0
- package/src/__tests__/conversation-workspace-cache-state.test.ts +2 -0
- package/src/__tests__/conversation-workspace-injection.test.ts +6 -1
- package/src/__tests__/conversation-workspace-tool-tracking.test.ts +6 -1
- package/src/__tests__/credential-broker-browser-fill.test.ts +3 -3
- package/src/__tests__/credential-broker-server-use.test.ts +5 -5
- package/src/__tests__/credential-execution-client.test.ts +72 -1
- package/src/__tests__/credential-execution-feature-gates.test.ts +19 -19
- package/src/__tests__/credential-execution-tools.test.ts +6 -6
- package/src/__tests__/credential-health-service.test.ts +252 -3
- package/src/__tests__/credential-security-invariants.test.ts +6 -5
- package/src/__tests__/credential-vault-unit.test.ts +21 -21
- package/src/__tests__/credential-vault.test.ts +5 -5
- package/src/__tests__/cross-provider-web-search.test.ts +56 -2
- package/src/__tests__/db-connection-isolation.test.ts +7 -6
- package/src/__tests__/db-conversation-fork-lineage-migration.test.ts +8 -10
- package/src/__tests__/db-conversation-inference-profile-migration.test.ts +7 -10
- package/src/__tests__/db-llm-request-log-provider-migration.test.ts +9 -15
- package/src/__tests__/db-test-helpers.ts +58 -0
- package/src/__tests__/disk-pressure-guard.test.ts +58 -41
- package/src/__tests__/disk-pressure-lifecycle.test.ts +13 -10
- package/src/__tests__/disk-pressure-routes.test.ts +0 -33
- package/src/__tests__/disk-pressure-tools.test.ts +0 -4
- package/src/__tests__/dm-persistence.test.ts +26 -40
- package/src/__tests__/document-create-dedupe.test.ts +189 -0
- package/src/__tests__/document-find-replace.test.ts +3 -2
- package/src/__tests__/document-tool-security.test.ts +81 -2
- package/src/__tests__/dynamic-page-surface.test.ts +2 -2
- package/src/__tests__/dynamic-skill-workflow-prompt.test.ts +5 -4
- package/src/__tests__/email-html-renderer.test.ts +12 -0
- package/src/__tests__/encrypted-store-test-helpers.ts +56 -0
- package/src/__tests__/encrypted-store.test.ts +11 -9
- package/src/__tests__/feature-flag-test-helpers.ts +53 -0
- package/src/__tests__/filing-service.test.ts +1 -0
- package/src/__tests__/first-greeting.test.ts +62 -12
- package/src/__tests__/gateway-flag-listener.test.ts +236 -0
- package/src/__tests__/gemini-provider.test.ts +104 -0
- package/src/__tests__/guardian-action-sweep.test.ts +3 -2
- package/src/__tests__/guardian-dispatch.test.ts +0 -1
- package/src/__tests__/guardian-outbound-http.test.ts +10 -7
- package/src/__tests__/handlers-skills-memory-v2-reseed.test.ts +48 -3
- package/src/__tests__/handlers-user-message-approval-consumption.test.ts +2 -1
- package/src/__tests__/heartbeat-disk-pressure.test.ts +5 -0
- package/src/__tests__/heartbeat-service.test.ts +5 -0
- package/src/__tests__/helpers/mock-logger.ts +26 -0
- package/src/__tests__/host-bash-routes.test.ts +1 -0
- package/src/__tests__/host-cu-routes-targeted.test.ts +1 -0
- package/src/__tests__/host-file-routes-targeted.test.ts +1 -0
- package/src/__tests__/host-shell-tool.test.ts +6 -5
- package/src/__tests__/host-transfer-routes-targeted.test.ts +1 -0
- package/src/__tests__/http-conversation-lineage.test.ts +3 -2
- package/src/__tests__/http-user-message-parity.test.ts +29 -7
- package/src/__tests__/identity-intro-cache.test.ts +133 -22
- package/src/__tests__/inbound-slack-persistence.test.ts +44 -72
- package/src/__tests__/inference-profile-reaper.test.ts +3 -2
- package/src/__tests__/inference-profile-session-ipc.test.ts +3 -2
- package/src/__tests__/init-feature-flag-overrides.test.ts +5 -6
- package/src/__tests__/injector-disk-pressure.test.ts +3 -17
- package/src/__tests__/inline-skill-load-permissions.test.ts +4 -4
- package/src/__tests__/list-messages-hidden-metadata.test.ts +80 -0
- package/src/__tests__/list-messages-tool-merge.test.ts +70 -11
- package/src/__tests__/llm-context-normalization.test.ts +42 -0
- package/src/__tests__/llm-request-log-call-site.test.ts +136 -0
- package/src/__tests__/llm-request-log-source-clickhouse.test.ts +26 -0
- package/src/__tests__/llm-resolver.test.ts +408 -9
- package/src/__tests__/llm-schema.test.ts +1 -1
- package/src/__tests__/llm-usage-store.test.ts +66 -0
- package/src/__tests__/logger.test.ts +89 -0
- package/src/__tests__/manual-token-reconciliation.test.ts +76 -1
- package/src/__tests__/mcp-abort-signal.test.ts +16 -2
- package/src/__tests__/mcp-client-auth.test.ts +14 -0
- package/src/__tests__/media-generate-image.test.ts +31 -0
- package/src/__tests__/memory-v2-static-injector.test.ts +7 -7
- package/src/__tests__/messaging-send-tool.test.ts +1 -0
- package/src/__tests__/migration-import-from-url.test.ts +3 -3
- package/src/__tests__/mock-gateway-ipc.ts +18 -2
- package/src/__tests__/model-intents.test.ts +4 -6
- package/src/__tests__/native-web-search.test.ts +30 -2
- package/src/__tests__/notification-deep-link.test.ts +62 -0
- package/src/__tests__/notification-guardian-path.test.ts +0 -1
- package/src/__tests__/oauth-commands-routes.test.ts +37 -0
- package/src/__tests__/oauth-provider-visibility.test.ts +8 -8
- package/src/__tests__/oauth-store.test.ts +3 -2
- package/src/__tests__/onboarding-template-contract.test.ts +4 -3
- package/src/__tests__/openai-provider.test.ts +54 -9
- package/src/__tests__/openai-responses-provider.test.ts +176 -14
- package/src/__tests__/openrouter-provider-only.test.ts +27 -5
- package/src/__tests__/outbound-slack-persistence.test.ts +46 -1
- package/src/__tests__/pending-interactions-resolved-event.test.ts +0 -1
- package/src/__tests__/persistence-pipeline.test.ts +139 -1
- package/src/__tests__/persistence-secret-redaction.test.ts +83 -12
- package/src/__tests__/platform-bash-auto-approve.test.ts +2 -2
- package/src/__tests__/platform.test.ts +2 -2
- package/src/__tests__/plugin-api-tool-definition.test.ts +92 -0
- package/src/__tests__/plugin-bootstrap.test.ts +11 -13
- package/src/__tests__/plugin-tool-contribution.test.ts +50 -40
- package/src/__tests__/plugin-types.test.ts +3 -2
- package/src/__tests__/prechat-onboarding-contract.test.ts +131 -98
- package/src/__tests__/pricing.test.ts +12 -0
- package/src/__tests__/process-message-background-slack.test.ts +21 -16
- package/src/__tests__/process-message-display-content.test.ts +19 -22
- package/src/__tests__/provider-catalog-visibility.test.ts +9 -9
- package/src/__tests__/provider-platform-proxy-integration.test.ts +216 -4
- package/src/__tests__/provider-registry-ollama.test.ts +45 -22
- package/src/__tests__/prune-jobs-changes-parser.test.ts +61 -0
- package/src/__tests__/recording-handler.test.ts +1 -0
- package/src/__tests__/regenerate-fire-and-forget-trace.test.ts +1 -0
- package/src/__tests__/registry.test.ts +84 -84
- package/src/__tests__/relay-server.test.ts +10 -10
- package/src/__tests__/require-fresh-approval.test.ts +2 -2
- package/src/__tests__/runtime-attachment-metadata.test.ts +3 -2
- package/src/__tests__/runtime-events-sse-bilingual.test.ts +154 -0
- package/src/__tests__/schedule-store.test.ts +16 -1
- package/src/__tests__/scheduler-reuse-conversation.test.ts +48 -3
- package/src/__tests__/secret-ingress-http.test.ts +5 -1
- package/src/__tests__/secure-keys.test.ts +3 -3
- package/src/__tests__/send-endpoint-busy.test.ts +81 -42
- package/src/__tests__/server-history-render.test.ts +4 -1
- package/src/__tests__/shell-tool-proxy-mode.test.ts +1 -1
- package/src/__tests__/skill-feature-flags-integration.test.ts +8 -10
- package/src/__tests__/skill-feature-flags.test.ts +16 -18
- package/src/__tests__/skill-load-feature-flag.test.ts +5 -5
- package/src/__tests__/skill-projection-feature-flag.test.ts +48 -37
- package/src/__tests__/skill-projection.benchmark.test.ts +7 -13
- package/src/__tests__/skill-tool-factory.test.ts +97 -96
- package/src/__tests__/slack-channel-config.test.ts +3 -3
- package/src/__tests__/subagent-call-site-routing.test.ts +11 -3
- package/src/__tests__/subagent-disposal.test.ts +27 -8
- package/src/__tests__/subagent-fork-notifications.test.ts +24 -9
- package/src/__tests__/subagent-fork-spawn.test.ts +13 -4
- package/src/__tests__/subagent-manager-notify.test.ts +20 -8
- package/src/__tests__/subagent-notify-parent.test.ts +6 -5
- package/src/__tests__/subagent-spawn-tool-fork.test.ts +58 -0
- package/src/__tests__/subagent-tools.test.ts +2 -1
- package/src/__tests__/suggestion-routes.test.ts +2 -0
- package/src/__tests__/sync-message-contract.test.ts +59 -0
- package/src/__tests__/system-prompt.test.ts +183 -131
- package/src/__tests__/terminal-tools.test.ts +1 -1
- package/src/__tests__/test-preload-verifier.ts +68 -0
- package/src/__tests__/test-preload.ts +32 -39
- package/src/__tests__/tool-approval-handler.test.ts +1 -5
- package/src/__tests__/tool-execute-pipeline.test.ts +2 -2
- package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +2 -5
- package/src/__tests__/tool-executor-lifecycle-events.test.ts +35 -12
- package/src/__tests__/tool-executor.test.ts +64 -72
- package/src/__tests__/tool-grant-request-escalation.test.ts +1 -6
- package/src/__tests__/tool-preview-lifecycle.test.ts +1 -0
- package/src/__tests__/tool-result-metadata-plumbing.test.ts +1 -0
- package/src/__tests__/trusted-contact-approval-notifier.test.ts +0 -1
- package/src/__tests__/trusted-contact-inline-approval-integration.test.ts +1 -6
- package/src/__tests__/trusted-contact-multichannel.test.ts +0 -1
- package/src/__tests__/twilio-routes.test.ts +3 -2
- package/src/__tests__/ui-file-upload-surface.test.ts +2 -2
- package/src/__tests__/usage-routes.test.ts +3 -0
- package/src/__tests__/validate-input.test.ts +381 -0
- package/src/__tests__/verification-control-plane-policy.test.ts +3 -2
- package/src/__tests__/voice-scoped-grant-consumer.test.ts +2 -1
- package/src/__tests__/voice-session-bridge.test.ts +37 -28
- package/src/__tests__/workspace-git-service.test.ts +6 -5
- package/src/__tests__/workspace-migration-089-move-memory-tree-out-of-v3.test.ts +86 -0
- package/src/__tests__/workspace-migration-090-memory-router-cost-optimized-profile.test.ts +326 -0
- package/src/__tests__/workspace-migration-091-retighten-migration-onboarding-thread.test.ts +166 -0
- package/src/acp/__tests__/prepare-agent-env.test.ts +146 -0
- package/src/acp/prepare-agent-env.ts +78 -0
- package/src/acp/session-manager.ts +6 -7
- package/src/agent/loop.ts +88 -0
- package/src/api/README.md +127 -0
- package/src/api/constants/call-sites.ts +27 -0
- package/src/api/events/assistant-outbound-attachment.ts +51 -0
- package/src/api/events/assistant-text-delta.ts +32 -0
- package/src/api/events/assistant-turn-start.ts +33 -0
- package/src/api/events/document-comment-created.ts +48 -0
- package/src/api/events/document-comment-deleted.ts +24 -0
- package/src/api/events/document-comment-reopened.ts +25 -0
- package/src/api/events/document-comment-resolved.ts +27 -0
- package/src/api/events/generation-cancelled.ts +24 -0
- package/src/api/events/generation-handoff.ts +41 -0
- package/src/api/events/message-complete.ts +42 -0
- package/src/api/events/open-url.ts +30 -0
- package/src/api/events/relationship-state-updated.ts +25 -0
- package/src/api/events/tool-use-start.ts +32 -0
- package/src/api/index.ts +129 -0
- package/src/api/package.json +10 -0
- package/src/api/responses/llm-context-response.ts +39 -0
- package/src/api/responses/llm-request-log-entry.ts +93 -0
- package/src/api/responses/memory-recall-log.ts +65 -0
- package/src/api/responses/memory-v2-activation-log.ts +78 -0
- package/src/background-wake/background-wake-routes.test.ts +868 -0
- package/src/background-wake/platform-client.test.ts +308 -0
- package/src/background-wake/platform-client.ts +167 -0
- package/src/background-wake/publisher.ts +91 -0
- package/src/background-wake/runtime-registry.ts +24 -0
- package/src/background-wake/wake-intent-hooks.test.ts +282 -0
- package/src/calls/guardian-dispatch.ts +1 -0
- package/src/calls/voice-session-bridge.ts +4 -4
- package/src/cli/commands/__tests__/browser.test.ts +23 -5
- package/src/cli/commands/__tests__/conversations-slack.test.ts +16 -0
- package/src/cli/commands/__tests__/domain-register.test.ts +110 -0
- package/src/cli/commands/__tests__/domain-status.test.ts +33 -33
- package/src/cli/commands/__tests__/inference-send.test.ts +108 -5
- package/src/cli/commands/__tests__/memory-v2-compare-render.test.ts +98 -0
- package/src/cli/commands/__tests__/memory-v2.test.ts +1 -0
- package/src/cli/commands/__tests__/memory-v3-render.test.ts +340 -0
- package/src/cli/commands/__tests__/notifications.test.ts +184 -40
- package/src/cli/commands/browser.ts +247 -0
- package/src/cli/commands/channels/__tests__/channels.test.ts +143 -0
- package/src/cli/commands/channels/index.ts +229 -0
- package/src/cli/commands/domain.ts +91 -41
- package/src/cli/commands/inference.ts +93 -40
- package/src/cli/commands/memory-v2-compare-render.ts +115 -0
- package/src/cli/commands/memory-v2.ts +176 -1
- package/src/cli/commands/memory-v3-render.ts +491 -0
- package/src/cli/commands/memory-v3.ts +567 -0
- package/src/cli/commands/notifications.ts +365 -55
- package/src/cli/lib/open-browser.ts +7 -2
- package/src/cli/program.ts +4 -0
- package/src/config/assistant-feature-flags.ts +39 -46
- package/src/config/bundled-skills/document-editor/SKILL.md +16 -3
- package/src/config/bundled-skills/document-editor/TOOLS.json +18 -0
- package/src/config/bundled-skills/document-editor/tools/document-open.ts +12 -0
- package/src/config/bundled-skills/image-studio/SKILL.md +4 -0
- package/src/config/bundled-skills/image-studio/tools/media-generate-image.ts +2 -2
- package/src/config/bundled-skills/media-processing/tools/ingest-media.ts +13 -8
- package/src/config/bundled-skills/messaging/tools/messaging-analyze-style.ts +10 -3
- package/src/config/bundled-skills/phone-calls/references/TRANSCRIPTS.md +16 -14
- package/src/config/bundled-skills/playbooks/tools/playbook-create.ts +7 -2
- package/src/config/bundled-skills/playbooks/tools/playbook-update.ts +7 -2
- package/src/config/bundled-skills/schedule/SKILL.md +1 -1
- package/src/config/bundled-skills/schedule/TOOLS.json +2 -2
- package/src/config/bundled-skills/settings/tools/open-system-settings.ts +1 -0
- package/src/config/bundled-tool-registry.ts +2 -0
- package/src/config/call-site-defaults.ts +8 -7
- package/src/config/feature-flag-cache.ts +86 -0
- package/src/config/feature-flag-registry.json +33 -17
- package/src/config/llm-context-resolution.ts +10 -1
- package/src/config/llm-resolver.ts +121 -15
- package/src/config/loader.ts +4 -5
- package/src/config/schemas/__tests__/memory-v2.test.ts +228 -1
- package/src/config/schemas/call-site-catalog.ts +21 -7
- package/src/config/schemas/heartbeat.ts +1 -1
- package/src/config/schemas/llm.ts +102 -2
- package/src/config/schemas/memory-v2.ts +272 -0
- package/src/config/schemas/memory.ts +2 -1
- package/src/config/schemas/services.ts +6 -2
- package/src/config/seed-inference-profiles.ts +36 -16
- package/src/context/compactor.ts +52 -0
- package/src/context/token-estimator.ts +10 -5
- package/src/conversations/__tests__/message-consolidation.test.ts +350 -0
- package/src/conversations/message-consolidation.ts +404 -0
- package/src/credential-execution/executable-discovery.ts +40 -0
- package/src/credential-execution/process-manager.ts +6 -2
- package/src/credential-health/credential-health-service.ts +125 -40
- package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +3 -6
- package/src/daemon/__tests__/conversation-surfaces-launch.test.ts +13 -15
- package/src/daemon/__tests__/conversation-tool-setup-exclude.test.ts +2 -3
- package/src/daemon/__tests__/daemon-skill-host.test.ts +2 -0
- package/src/daemon/__tests__/meet-manifest-loader.test.ts +25 -12
- package/src/daemon/__tests__/native-web-search-metadata.test.ts +1 -0
- package/src/daemon/__tests__/switch-inference-profile-tool.test.ts +107 -0
- package/src/daemon/__tests__/web-search-status-text.test.ts +1 -0
- package/src/daemon/conversation-agent-loop-handlers.ts +390 -80
- package/src/daemon/conversation-agent-loop.ts +244 -90
- package/src/daemon/conversation-error.ts +64 -6
- package/src/daemon/conversation-lifecycle.ts +27 -22
- package/src/daemon/conversation-messaging.ts +84 -43
- package/src/daemon/conversation-process.ts +74 -37
- package/src/daemon/conversation-runtime-assembly.ts +38 -17
- package/src/daemon/conversation-skill-tools.ts +14 -30
- package/src/daemon/conversation-surfaces.ts +69 -34
- package/src/daemon/conversation-tool-setup.ts +77 -32
- package/src/daemon/conversation-usage.ts +2 -0
- package/src/daemon/conversation.ts +40 -75
- package/src/daemon/daemon-control.ts +1 -1
- package/src/daemon/daemon-skill-host.ts +9 -2
- package/src/daemon/disk-pressure-guard.ts +39 -29
- package/src/daemon/first-greeting.ts +31 -13
- package/src/daemon/handlers/config-model.test.ts +1 -0
- package/src/daemon/handlers/conversations.ts +11 -3
- package/src/daemon/handlers/shared.ts +6 -1
- package/src/daemon/host-browser-proxy.ts +5 -5
- package/src/daemon/host-cu-proxy.ts +4 -4
- package/src/daemon/host-file-proxy.ts +4 -4
- package/src/daemon/host-proxy-base.ts +4 -4
- package/src/daemon/host-transfer-proxy.ts +10 -10
- package/src/daemon/lifecycle.ts +29 -26
- package/src/daemon/mcp-reload-service.ts +1 -1
- package/src/daemon/meet-manifest-loader.ts +11 -24
- package/src/daemon/message-types/conversations.ts +22 -27
- package/src/daemon/message-types/document-comments.ts +8 -44
- package/src/daemon/message-types/home.ts +2 -14
- package/src/daemon/message-types/integrations.ts +2 -7
- package/src/daemon/message-types/messages.ts +25 -48
- package/src/daemon/message-types/subagents.ts +6 -0
- package/src/daemon/message-types/sync.ts +14 -0
- package/src/daemon/process-message.ts +9 -9
- package/src/daemon/providers-setup.ts +1 -1
- package/src/daemon/server.ts +16 -0
- package/src/daemon/shutdown-handlers.ts +24 -5
- package/src/daemon/switch-inference-profile-tool.ts +62 -0
- package/src/daemon/tool-setup-types.ts +7 -0
- package/src/daemon/wake-target-adapter.ts +10 -0
- package/src/documents/document-store.ts +38 -0
- package/src/export/__tests__/transcript-formatter.test.ts +1 -0
- package/src/heartbeat/__tests__/heartbeat-service.test.ts +30 -1
- package/src/heartbeat/heartbeat-service.ts +63 -0
- package/src/home/__tests__/feed-writer.test.ts +161 -0
- package/src/home/__tests__/post-connect-feed.test.ts +1 -0
- package/src/home/__tests__/suggested-prompts.test.ts +55 -59
- package/src/home/feed-writer.ts +146 -7
- package/src/home/home-greeting.ts +0 -9
- package/src/home/suggested-prompts.ts +27 -154
- package/src/ipc/__tests__/cli-ipc.test.ts +1 -0
- package/src/ipc/gateway-client.test.ts +4 -1
- package/src/ipc/gateway-flag-listener.ts +123 -0
- package/src/ipc/skill-routes/__tests__/memory.test.ts +1 -0
- package/src/ipc/skill-routes/__tests__/registries.test.ts +36 -7
- package/src/ipc/skill-routes/memory.ts +4 -3
- package/src/ipc/skill-routes/registries.ts +35 -40
- package/src/memory/__tests__/db-async-query.test.ts +165 -0
- package/src/memory/__tests__/db-maintenance.test.ts +115 -0
- package/src/memory/__tests__/jobs-store-enqueue-gate.test.ts +242 -0
- package/src/memory/__tests__/jobs-store-job-classes.test.ts +28 -1
- package/src/memory/__tests__/jobs-worker-v2-schedule.test.ts +26 -5
- package/src/memory/__tests__/memory-retrospective-enqueue.test.ts +1 -0
- package/src/memory/__tests__/memory-retrospective-job.test.ts +8 -0
- package/src/memory/__tests__/memory-retrospective-startup-cleanup.test.ts +1 -0
- package/src/memory/__tests__/memory-v2-activation-log-store.test.ts +31 -0
- package/src/memory/auto-analysis-enqueue.ts +5 -1
- package/src/memory/conversation-attention-store.ts +17 -3
- package/src/memory/conversation-crud.ts +423 -182
- package/src/memory/conversation-starters-cadence.ts +3 -1
- package/src/memory/conversation-title-service.ts +19 -3
- package/src/memory/db-async-query.ts +214 -0
- package/src/memory/db-connection.ts +29 -19
- package/src/memory/db-init.ts +14 -0
- package/src/memory/db-maintenance.ts +30 -21
- package/src/memory/db-singleton.ts +77 -0
- package/src/memory/delivery-channels.ts +82 -0
- package/src/memory/graph/__tests__/conversation-graph-memory-v2-routing.test.ts +2 -4
- package/src/memory/graph/bootstrap.ts +8 -1
- package/src/memory/graph/capability-seed.ts +7 -3
- package/src/memory/graph/conversation-graph-memory.ts +100 -17
- package/src/memory/graph/extraction.ts +1 -5
- package/src/memory/graph/graph-search.ts +7 -1
- package/src/memory/graph/retriever.test.ts +3 -3
- package/src/memory/indexer.ts +28 -18
- package/src/memory/job-handlers/cleanup.ts +76 -18
- package/src/memory/job-handlers/conversation-starters.ts +1 -4
- package/src/memory/job-handlers/embedding.test.ts +3 -2
- package/src/memory/jobs/__tests__/embed-concept-page.test.ts +5 -2
- package/src/memory/jobs/embed-pkb-file.ts +6 -1
- package/src/memory/jobs-store.ts +14 -0
- package/src/memory/jobs-worker.ts +66 -22
- package/src/memory/llm-request-log-source-clickhouse.ts +122 -2
- package/src/memory/llm-request-log-source-local.ts +31 -0
- package/src/memory/llm-request-log-source.ts +40 -2
- package/src/memory/llm-request-log-store.ts +228 -1
- package/src/memory/llm-usage-store.ts +24 -0
- package/src/memory/memory-retrospective-enqueue.ts +8 -1
- package/src/memory/memory-retrospective-job.ts +5 -0
- package/src/memory/memory-v2-activation-log-store.ts +110 -7
- package/src/memory/migrations/260-rename-cleaned-at.ts +44 -0
- package/src/memory/migrations/261-llm-usage-add-raw-usage.ts +36 -0
- package/src/memory/migrations/262-memory-v3-coactivation.ts +57 -0
- package/src/memory/migrations/263-memory-v3-auto-edges.ts +50 -0
- package/src/memory/migrations/264-llm-request-log-call-site.ts +29 -0
- package/src/memory/migrations/265-drop-provider-connection-status.ts +26 -0
- package/src/memory/migrations/266-messages-client-message-id.ts +43 -0
- package/src/memory/migrations/index.ts +19 -0
- package/src/memory/migrations/registry.ts +33 -0
- package/src/memory/schema/conversations.ts +10 -2
- package/src/memory/schema/inference.ts +0 -1
- package/src/memory/schema/infrastructure.ts +21 -0
- package/src/memory/tool-usage-store.ts +36 -8
- package/src/memory/v2/__tests__/backfill-jobs.test.ts +5 -2
- package/src/memory/v2/__tests__/consolidation-job.test.ts +1 -0
- package/src/memory/v2/__tests__/harness-compare.test.ts +186 -0
- package/src/memory/v2/__tests__/harness-metrics.test.ts +83 -0
- package/src/memory/v2/__tests__/harness-oracle.test.ts +257 -0
- package/src/memory/v2/__tests__/harness-replay-input.test.ts +230 -0
- package/src/memory/v2/__tests__/harness-runner.test.ts +135 -0
- package/src/memory/v2/__tests__/injection.test.ts +127 -98
- package/src/memory/v2/__tests__/qdrant.test.ts +36 -0
- package/src/memory/v2/__tests__/router.test.ts +171 -3
- package/src/memory/v2/__tests__/sweep-job.test.ts +6 -3
- package/src/memory/v2/harness/compare.ts +57 -0
- package/src/memory/v2/harness/metrics.ts +128 -0
- package/src/memory/v2/harness/oracle.ts +145 -0
- package/src/memory/v2/harness/replay-input.ts +240 -0
- package/src/memory/v2/harness/retriever.ts +74 -0
- package/src/memory/v2/harness/router-retriever.ts +43 -0
- package/src/memory/v2/harness/runner.ts +112 -0
- package/src/memory/v2/harness/trace.ts +64 -0
- package/src/memory/v2/injection.ts +21 -15
- package/src/memory/v2/prompts/router.ts +26 -1
- package/src/memory/v2/qdrant.ts +14 -2
- package/src/memory/v2/router.ts +171 -18
- package/src/memory/v3/__tests__/coactivation-store.test.ts +422 -0
- package/src/memory/v3/__tests__/consolidation-job.test.ts +466 -0
- package/src/memory/v3/__tests__/coretrieval-seed.test.ts +270 -0
- package/src/memory/v3/__tests__/edge-learning-job.test.ts +324 -0
- package/src/memory/v3/__tests__/edges.test.ts +706 -0
- package/src/memory/v3/__tests__/filter.test.ts +560 -0
- package/src/memory/v3/__tests__/gate.test.ts +637 -0
- package/src/memory/v3/__tests__/index-composition.test.ts +291 -0
- package/src/memory/v3/__tests__/loop.test.ts +775 -0
- package/src/memory/v3/__tests__/retriever.test.ts +226 -0
- package/src/memory/v3/__tests__/scouts.test.ts +489 -0
- package/src/memory/v3/__tests__/shadow-diff.test.ts +225 -0
- package/src/memory/v3/__tests__/shadow-middleware.test.ts +398 -0
- package/src/memory/v3/__tests__/system-prompts.test.ts +154 -0
- package/src/memory/v3/__tests__/traversal.test.ts +508 -0
- package/src/memory/v3/__tests__/tree-index.test.ts +280 -0
- package/src/memory/v3/__tests__/tree-store.test.ts +529 -0
- package/src/memory/v3/__tests__/tree-walk.test.ts +784 -0
- package/src/memory/v3/__tests__/validate.test.ts +277 -0
- package/src/memory/v3/auto-edges.ts +223 -0
- package/src/memory/v3/coactivation-store.ts +124 -0
- package/src/memory/v3/consolidation-job.ts +323 -0
- package/src/memory/v3/coretrieval-seed.ts +240 -0
- package/src/memory/v3/edge-learning-job.ts +160 -0
- package/src/memory/v3/edges.ts +286 -0
- package/src/memory/v3/filter.ts +286 -0
- package/src/memory/v3/gate.ts +349 -0
- package/src/memory/v3/index-composition.ts +126 -0
- package/src/memory/v3/llm-capture.ts +46 -0
- package/src/memory/v3/loop.ts +430 -0
- package/src/memory/v3/maintenance.ts +144 -0
- package/src/memory/v3/prompt-context.ts +33 -0
- package/src/memory/v3/prompts/consolidation.ts +458 -0
- package/src/memory/v3/prompts/system-prompts.ts +196 -0
- package/src/memory/v3/retriever.ts +33 -0
- package/src/memory/v3/scouts.ts +431 -0
- package/src/memory/v3/shadow-diff.ts +287 -0
- package/src/memory/v3/shadow-middleware.ts +347 -0
- package/src/memory/v3/traversal.ts +211 -0
- package/src/memory/v3/tree-index.ts +237 -0
- package/src/memory/v3/tree-store.ts +394 -0
- package/src/memory/v3/tree-walk.ts +356 -0
- package/src/memory/v3/types.ts +65 -0
- package/src/memory/v3/validate.ts +323 -0
- package/src/notifications/__tests__/emit-signal-home-feed.test.ts +1 -0
- package/src/notifications/__tests__/home-feed-side-effect.test.ts +1 -0
- package/src/notifications/adapters/macos.ts +18 -1
- package/src/notifications/adapters/platform.ts +1 -1
- package/src/notifications/adapters/slack.ts +45 -11
- package/src/notifications/broadcaster.ts +114 -63
- package/src/notifications/conversation-pairing.ts +23 -3
- package/src/notifications/decision-engine.ts +1 -4
- package/src/notifications/decisions-store.ts +32 -1
- package/src/notifications/deliveries-store.ts +45 -0
- package/src/notifications/edit-notification.ts +201 -0
- package/src/notifications/emit-signal.ts +40 -50
- package/src/notifications/signal.ts +10 -0
- package/src/notifications/types.ts +37 -0
- package/src/oauth/byo-connection.test.ts +67 -3
- package/src/oauth/byo-connection.ts +32 -5
- package/src/oauth/connect-orchestrator.ts +9 -0
- package/src/oauth/connection-resolver.test.ts +76 -0
- package/src/oauth/connection-resolver.ts +49 -10
- package/src/oauth/manual-token-connection.ts +51 -3
- package/src/oauth/seed-providers.ts +3 -0
- package/src/permissions/approval-policy.test.ts +19 -5
- package/src/permissions/approval-policy.ts +14 -3
- package/src/permissions/checker.ts +21 -8
- package/src/permissions/prompter.ts +3 -3
- package/src/permissions/question-prompter.ts +5 -2
- package/src/permissions/secret-prompter.ts +2 -2
- package/src/platform/client.test.ts +24 -1
- package/src/platform/client.ts +8 -0
- package/src/platform/feature-gate.ts +15 -0
- package/src/plugin-api/index.ts +4 -0
- package/src/plugin-api/types.ts +7 -33
- package/src/plugins/defaults/index.ts +6 -0
- package/src/plugins/defaults/injectors.ts +20 -19
- package/src/plugins/defaults/persistence.ts +25 -6
- package/src/plugins/external-plugin-loader.ts +5 -68
- package/src/plugins/types.ts +68 -29
- package/src/proactive-artifact/aux-message-injector.ts +17 -4
- package/src/proactive-artifact/job.test.ts +1 -0
- package/src/prompts/__tests__/system-prompt.test.ts +4 -4
- package/src/prompts/__tests__/task-progress-hint-section.test.ts +3 -9
- package/src/prompts/persona-resolver.ts +36 -21
- package/src/prompts/sections.ts +39 -7
- package/src/prompts/system-prompt.ts +84 -221
- package/src/prompts/template-detection.ts +10 -4
- package/src/prompts/templates/BOOTSTRAP.md +9 -13
- package/src/prompts/templates/IDENTITY.md +0 -2
- package/src/prompts/templates/system-sections.ts +230 -8
- package/src/providers/__tests__/connection-model-compat.test.ts +233 -0
- package/src/providers/__tests__/registry-native-web-search.test.ts +122 -0
- package/src/providers/__tests__/retry-callsite.test.ts +85 -5
- package/src/providers/anthropic/client.ts +32 -66
- package/src/providers/call-site-routing.ts +42 -6
- package/src/providers/connection-model-compat.ts +61 -0
- package/src/providers/connection-resolution.ts +47 -14
- package/src/providers/fireworks/client.ts +1 -0
- package/src/providers/gemini/client.ts +70 -6
- package/src/providers/inference/__tests__/adapter-factory-openai-compatible.test.ts +0 -2
- package/src/providers/inference/__tests__/base-url-security.test.ts +2 -3
- package/src/providers/inference/__tests__/{connections-status-label.test.ts → connections-label.test.ts} +12 -111
- package/src/providers/inference/adapter-factory.ts +3 -0
- package/src/providers/inference/auth.ts +0 -8
- package/src/providers/inference/connections.ts +3 -66
- package/src/providers/inference/resolve-auth.ts +2 -3
- package/src/providers/minimax/client.ts +106 -0
- package/src/providers/model-catalog.ts +78 -1
- package/src/providers/model-intents.ts +4 -4
- package/src/providers/openai/__tests__/api-error-detail.test.ts +120 -0
- package/src/providers/openai/__tests__/chat-completions-provider-reasoning.test.ts +157 -5
- package/src/providers/openai/chat-completions-provider.ts +116 -15
- package/src/providers/openai/codex-models.ts +20 -0
- package/src/providers/openai/responses-provider.ts +87 -30
- package/src/providers/openrouter/client.ts +13 -8
- package/src/providers/provider-send-message.ts +20 -5
- package/src/providers/registry.ts +48 -8
- package/src/providers/retry.ts +50 -7
- package/src/providers/search-provider-catalog.ts +17 -9
- package/src/providers/thinking-config.ts +26 -1
- package/src/providers/types.ts +9 -0
- package/src/providers/usage-tracking.ts +2 -0
- package/src/runtime/AGENTS.md +2 -2
- package/src/runtime/__tests__/agent-wake.test.ts +1 -0
- package/src/runtime/__tests__/background-job-runner.test.ts +1 -0
- package/src/runtime/access-request-helper.ts +1 -0
- package/src/runtime/agent-wake.ts +1 -0
- package/src/runtime/assistant-event-hub.ts +76 -6
- package/src/runtime/auth/route-policy.ts +46 -0
- package/src/runtime/btw-sidechain.ts +0 -6
- package/src/runtime/channel-readiness-service.ts +68 -0
- package/src/runtime/channel-reply-delivery.ts +23 -0
- package/src/runtime/channel-retry-sweep.ts +47 -14
- package/src/runtime/confirmation-request-guardian-bridge.ts +1 -1
- package/src/runtime/http-types.ts +0 -2
- package/src/runtime/migrations/vbundle-builder.ts +12 -4
- package/src/runtime/pending-interactions.ts +0 -1
- package/src/runtime/routes/__tests__/bookmark-routes.test.ts +1 -0
- package/src/runtime/routes/__tests__/conversation-compaction-routes.test.ts +406 -0
- package/src/runtime/routes/__tests__/conversation-query-routes.test.ts +204 -0
- package/src/runtime/routes/__tests__/heartbeat-routes.test.ts +1 -1
- package/src/runtime/routes/__tests__/home-feed-routes.test.ts +209 -1
- package/src/runtime/routes/__tests__/inference-provider-connection-routes.test.ts +13 -50
- package/src/runtime/routes/__tests__/memory-v2-simulate-route.test.ts +76 -9
- package/src/runtime/routes/__tests__/memory-v3-simulate-params.test.ts +35 -0
- package/src/runtime/routes/__tests__/plugins-routes.test.ts +512 -0
- package/src/runtime/routes/__tests__/slack-channel-routes.test.ts +3 -2
- package/src/runtime/routes/__tests__/surface-content-routes.test.ts +294 -0
- package/src/runtime/routes/__tests__/task-routes.test.ts +48 -3
- package/src/runtime/routes/acp-routes-list.test.ts +3 -0
- package/src/runtime/routes/acp-routes.test.ts +255 -6
- package/src/runtime/routes/acp-routes.ts +8 -1
- package/src/runtime/routes/app-management-routes.ts +111 -4
- package/src/runtime/routes/avatar-routes.ts +10 -10
- package/src/runtime/routes/background-wake-routes.ts +356 -0
- package/src/runtime/routes/browser-tabs-routes.ts +200 -0
- package/src/runtime/routes/btw-routes.ts +4 -10
- package/src/runtime/routes/conversation-analysis-routes.ts +6 -0
- package/src/runtime/routes/conversation-cli-routes.ts +1 -1
- package/src/runtime/routes/conversation-compaction-routes.ts +263 -0
- package/src/runtime/routes/conversation-list-routes.ts +159 -4
- package/src/runtime/routes/conversation-management-routes.ts +108 -26
- package/src/runtime/routes/conversation-query-routes.ts +200 -44
- package/src/runtime/routes/conversation-routes.ts +409 -521
- package/src/runtime/routes/conversation-starter-routes.ts +6 -3
- package/src/runtime/routes/conversations-import-routes.ts +19 -6
- package/src/runtime/routes/disk-pressure-routes.ts +1 -1
- package/src/runtime/routes/documents-routes.ts +10 -1
- package/src/runtime/routes/domain-routes.ts +60 -10
- package/src/runtime/routes/email-routes.ts +5 -2
- package/src/runtime/routes/events-routes.ts +54 -10
- package/src/runtime/routes/group-routes.ts +35 -8
- package/src/runtime/routes/home-feed-routes.ts +129 -0
- package/src/runtime/routes/host-browser-routes.ts +10 -2
- package/src/runtime/routes/host-cu-routes.ts +2 -2
- package/src/runtime/routes/identity-intro-cache.ts +61 -16
- package/src/runtime/routes/identity-routes.ts +30 -9
- package/src/runtime/routes/inbound-stages/acl-enforcement.ts +96 -3
- package/src/runtime/routes/inbound-stages/background-dispatch.test.ts +530 -6
- package/src/runtime/routes/inbound-stages/background-dispatch.ts +57 -8
- package/src/runtime/routes/index.ts +10 -0
- package/src/runtime/routes/inference-profile-session-handler.ts +22 -12
- package/src/runtime/routes/inference-profile-session-routes.ts +7 -1
- package/src/runtime/routes/inference-provider-connection-routes.ts +5 -26
- package/src/runtime/routes/integrations/vercel.ts +15 -0
- package/src/runtime/routes/llm-call-sites-routes.ts +32 -5
- package/src/runtime/routes/llm-context-normalization.ts +7 -2
- package/src/runtime/routes/memory-item-routes.ts +8 -3
- package/src/runtime/routes/memory-v2-routes.ts +215 -5
- package/src/runtime/routes/memory-v3-routes.ts +474 -0
- package/src/runtime/routes/migration-routes.ts +32 -28
- package/src/runtime/routes/notification-routes.ts +63 -1
- package/src/runtime/routes/oauth-commands-routes.ts +6 -1
- package/src/runtime/routes/plugins-routes.ts +337 -0
- package/src/runtime/routes/rename-conversation-routes.ts +6 -2
- package/src/runtime/routes/secret-routes.ts +25 -5
- package/src/runtime/routes/settings-routes.ts +12 -11
- package/src/runtime/routes/slack-channel-routes.ts +5 -4
- package/src/runtime/routes/surface-action-routes.ts +1 -38
- package/src/runtime/routes/surface-content-routes.ts +12 -5
- package/src/runtime/routes/surface-conversation-resolver.ts +65 -0
- package/src/runtime/routes/wipe-conversation-routes.ts +3 -0
- package/src/runtime/routes/workspace-routes.ts +25 -10
- package/src/runtime/services/__tests__/analyze-conversation.test.ts +2 -0
- package/src/runtime/slack-dm-text-delivery.ts +177 -0
- package/src/runtime/sync/resource-sync-events.ts +106 -38
- package/src/runtime/sync/sync-publisher.test.ts +49 -0
- package/src/runtime/sync/sync-publisher.ts +2 -1
- package/src/runtime/tool-grant-request-helper.ts +1 -0
- package/src/runtime/verification-outbound-actions.ts +73 -1
- package/src/schedule/schedule-store.ts +8 -1
- package/src/schedule/scheduler.ts +111 -15
- package/src/security/__tests__/provider-key-env-fallback.test.ts +3 -3
- package/src/security/encrypted-store.ts +7 -16
- package/src/security/store-path-override.ts +61 -0
- package/src/signals/user-message.ts +5 -8
- package/src/skills/validate-input.ts +177 -0
- package/src/subagent/manager.ts +13 -13
- package/src/subagent/types.ts +6 -0
- package/src/tasks/tool-sanitizer.ts +2 -2
- package/src/telemetry/types.ts +12 -0
- package/src/telemetry/usage-telemetry-reporter.test.ts +48 -0
- package/src/telemetry/usage-telemetry-reporter.ts +1 -0
- package/src/tools/acp/spawn.test.ts +119 -0
- package/src/tools/acp/spawn.ts +15 -2
- package/src/tools/apps/definitions.ts +36 -28
- package/src/tools/ask-question/ask-question-tool.test.ts +3 -3
- package/src/tools/ask-question/ask-question-tool.ts +38 -45
- package/src/tools/browser/__tests__/browser-execution-acquire.test.ts +2 -8
- package/src/tools/browser/__tests__/pinned-tabs.test.ts +70 -0
- package/src/tools/browser/browser-execution.ts +16 -3
- package/src/tools/browser/cdp-client/__tests__/browser-tabs-factory.test.ts +402 -0
- package/src/tools/browser/cdp-client/__tests__/types.test.ts +3 -0
- package/src/tools/browser/cdp-client/cdp-inspect-client.ts +12 -0
- package/src/tools/browser/cdp-client/extension-cdp-client.ts +27 -1
- package/src/tools/browser/cdp-client/factory.ts +100 -17
- package/src/tools/browser/cdp-client/local-cdp-client.ts +12 -0
- package/src/tools/browser/cdp-client/types.ts +65 -0
- package/src/tools/browser/pinned-tabs.ts +96 -40
- package/src/tools/computer-use/definitions.ts +282 -336
- package/src/tools/credential-execution/make-authenticated-request.ts +3 -9
- package/src/tools/credential-execution/manage-secure-command-tool.ts +3 -9
- package/src/tools/credential-execution/run-authenticated-command.ts +3 -9
- package/src/tools/credentials/vault.ts +3 -9
- package/src/tools/document/document-tool.ts +189 -7
- package/src/tools/execution-target.ts +18 -23
- package/src/tools/executor.ts +24 -56
- package/src/tools/filesystem/edit.ts +3 -9
- package/src/tools/filesystem/list.ts +3 -9
- package/src/tools/filesystem/read.ts +3 -9
- package/src/tools/filesystem/write.ts +3 -9
- package/src/tools/host-filesystem/edit.test.ts +1 -0
- package/src/tools/host-filesystem/edit.ts +3 -9
- package/src/tools/host-filesystem/read.test.ts +1 -0
- package/src/tools/host-filesystem/read.ts +3 -9
- package/src/tools/host-filesystem/transfer.test.ts +31 -6
- package/src/tools/host-filesystem/transfer.ts +3 -9
- package/src/tools/host-filesystem/write.test.ts +1 -0
- package/src/tools/host-filesystem/write.ts +3 -9
- package/src/tools/host-terminal/host-shell.ts +3 -9
- package/src/tools/mcp/mcp-tool-factory.ts +1 -10
- package/src/tools/memory/register.test.ts +1 -1
- package/src/tools/memory/register.ts +4 -9
- package/src/tools/network/__tests__/managed-search-proxy.test.ts +282 -0
- package/src/tools/network/__tests__/web-search.test.ts +211 -3
- package/src/tools/network/managed-search-proxy.ts +183 -0
- package/src/tools/network/web-fetch.ts +3 -9
- package/src/tools/network/web-search.ts +224 -76
- package/src/tools/policy-context.ts +3 -1
- package/src/tools/registry.ts +150 -123
- package/src/tools/schedule/create.ts +1 -1
- package/src/tools/schema-transforms.ts +1 -1
- package/src/tools/skills/execute.ts +3 -9
- package/src/tools/skills/load.ts +3 -9
- package/src/tools/skills/skill-tool-factory.ts +18 -44
- package/src/tools/subagent/notify-parent.ts +3 -9
- package/src/tools/subagent/spawn.ts +3 -0
- package/src/tools/system/request-permission.ts +3 -9
- package/src/tools/terminal/shell.ts +3 -9
- package/src/tools/tool-approval-handler.ts +10 -4
- package/src/tools/tool-defaults.ts +94 -0
- package/src/tools/tool-name-aliases.ts +72 -14
- package/src/tools/types.ts +32 -101
- package/src/tools/ui-surface/definitions.ts +104 -108
- package/src/types/onboarding-context.ts +6 -0
- package/src/usage/attribution.ts +32 -1
- package/src/usage/pricing.ts +23 -0
- package/src/usage/types.ts +12 -0
- package/src/util/browser.ts +7 -2
- package/src/util/logger.ts +16 -7
- package/src/util/platform.ts +7 -2
- package/src/util/sqlite3-runtime.ts +65 -0
- package/src/workspace/migrations/086-revert-stale-gemini-mis-rewrites.ts +1 -0
- package/src/workspace/migrations/089-move-memory-tree-out-of-v3.ts +86 -0
- package/src/workspace/migrations/090-memory-router-cost-optimized-profile.ts +109 -0
- package/src/workspace/migrations/091-retighten-migration-onboarding-thread.ts +41 -0
- package/src/workspace/migrations/registry.ts +6 -0
- package/src/__tests__/compaction-strip-metadata-clear.test.ts +0 -206
- package/src/__tests__/message-complete-display-id.test.ts +0 -175
- package/src/daemon/query-complexity-router.ts +0 -75
- package/src/prompts/cache-boundary.ts +0 -8
|
@@ -116,7 +116,7 @@ mock.module("../../../providers/provider-send-message.js", () => ({
|
|
|
116
116
|
// them. No mock needed for `daemon/identity-helpers.js`; it tolerates a
|
|
117
117
|
// missing IDENTITY.md by returning null.
|
|
118
118
|
|
|
119
|
-
const { runRouter } = await import("../router.js");
|
|
119
|
+
const { runRouter, applyHistoricalCharBudget } = await import("../router.js");
|
|
120
120
|
const { getPageIndex, invalidatePageIndex } = await import("../page-index.js");
|
|
121
121
|
const { writePage } = await import("../page-store.js");
|
|
122
122
|
|
|
@@ -220,6 +220,7 @@ function makeConfig(overrides?: {
|
|
|
220
220
|
batchSize?: number | null;
|
|
221
221
|
tier1Size?: number | null;
|
|
222
222
|
tier2Size?: number | null;
|
|
223
|
+
historicalPairsMaxChars?: number | null;
|
|
223
224
|
}) {
|
|
224
225
|
return {
|
|
225
226
|
memory: {
|
|
@@ -231,6 +232,8 @@ function makeConfig(overrides?: {
|
|
|
231
232
|
batch_size: overrides?.batchSize ?? null,
|
|
232
233
|
tier1_size: overrides?.tier1Size ?? null,
|
|
233
234
|
tier2_size: overrides?.tier2Size ?? null,
|
|
235
|
+
historical_pairs_max_chars:
|
|
236
|
+
overrides?.historicalPairsMaxChars ?? null,
|
|
234
237
|
},
|
|
235
238
|
},
|
|
236
239
|
},
|
|
@@ -238,8 +241,12 @@ function makeConfig(overrides?: {
|
|
|
238
241
|
}
|
|
239
242
|
|
|
240
243
|
const COMMON_PARAMS = {
|
|
241
|
-
|
|
242
|
-
|
|
244
|
+
recentTurnPairs: [
|
|
245
|
+
{
|
|
246
|
+
assistantMessage: "Let me check your plan.",
|
|
247
|
+
userMessage: "What's on my plate today?",
|
|
248
|
+
},
|
|
249
|
+
],
|
|
243
250
|
nowText: "2026-05-10 14:00 PT",
|
|
244
251
|
priorEverInjected: [] as { slug: string; turn: number }[],
|
|
245
252
|
};
|
|
@@ -418,6 +425,78 @@ describe("runRouter — successful tool_use", () => {
|
|
|
418
425
|
expect(blockB.cache_control).toBeUndefined();
|
|
419
426
|
});
|
|
420
427
|
|
|
428
|
+
test("runRouterBatch front-truncates the oldest <last_turn> message when the char budget is exceeded", async () => {
|
|
429
|
+
await writePage(workspaceDir, makePage("alpha", { summary: "A" }));
|
|
430
|
+
providerStub = makeProvider(toolUseResponse([1]));
|
|
431
|
+
|
|
432
|
+
const longAssistant = "A".repeat(2_000);
|
|
433
|
+
const longUser = "B".repeat(2_000);
|
|
434
|
+
const recentAssistant = "Short prior.";
|
|
435
|
+
const justArrived = "What's relevant?";
|
|
436
|
+
|
|
437
|
+
await runRouter({
|
|
438
|
+
workspaceDir,
|
|
439
|
+
recentTurnPairs: [
|
|
440
|
+
{ assistantMessage: longAssistant, userMessage: longUser },
|
|
441
|
+
{ assistantMessage: recentAssistant, userMessage: justArrived },
|
|
442
|
+
],
|
|
443
|
+
nowText: "now",
|
|
444
|
+
priorEverInjected: [],
|
|
445
|
+
// Budget: just enough room for the most-recent pair plus the old user
|
|
446
|
+
// line in full, leaving a small slice for the very oldest assistant
|
|
447
|
+
// (which should be front-truncated with the `…` marker).
|
|
448
|
+
config: makeConfig({
|
|
449
|
+
historicalPairsMaxChars:
|
|
450
|
+
recentAssistant.length + justArrived.length + longUser.length + 50,
|
|
451
|
+
}),
|
|
452
|
+
});
|
|
453
|
+
|
|
454
|
+
const [call] = providerCalls;
|
|
455
|
+
const userMsg = call.messages[0];
|
|
456
|
+
const blockB = userMsg.content[1] as { text: string };
|
|
457
|
+
|
|
458
|
+
// The just-arrived user message and the prior assistant reply survive
|
|
459
|
+
// verbatim because they're newest in the walk.
|
|
460
|
+
expect(blockB.text).toContain(`[user]: ${justArrived}`);
|
|
461
|
+
expect(blockB.text).toContain(`[assistant]: ${recentAssistant}`);
|
|
462
|
+
|
|
463
|
+
// The older user message survives verbatim (next newest after the
|
|
464
|
+
// most-recent pair).
|
|
465
|
+
expect(blockB.text).toContain(`[user]: ${longUser}`);
|
|
466
|
+
|
|
467
|
+
// The oldest message in the walk (the older assistant) is
|
|
468
|
+
// front-truncated, so its rendered line starts with the `…` marker
|
|
469
|
+
// and ends with the suffix of the original text.
|
|
470
|
+
expect(blockB.text).toContain("[assistant]: …");
|
|
471
|
+
expect(blockB.text.endsWith(`A\n</last_turn>`)).toBe(false); // sanity
|
|
472
|
+
// The full untruncated long-assistant string must NOT appear.
|
|
473
|
+
expect(blockB.text.includes(longAssistant)).toBe(false);
|
|
474
|
+
// The TAIL of the long-assistant string SHOULD appear (kept from front-truncation).
|
|
475
|
+
expect(blockB.text).toContain(longAssistant.slice(-10));
|
|
476
|
+
});
|
|
477
|
+
|
|
478
|
+
test("null historical_pairs_max_chars renders pairs verbatim regardless of size", async () => {
|
|
479
|
+
await writePage(workspaceDir, makePage("alpha", { summary: "A" }));
|
|
480
|
+
providerStub = makeProvider(toolUseResponse([1]));
|
|
481
|
+
|
|
482
|
+
const huge = "X".repeat(5_000);
|
|
483
|
+
await runRouter({
|
|
484
|
+
workspaceDir,
|
|
485
|
+
recentTurnPairs: [
|
|
486
|
+
{ assistantMessage: huge, userMessage: "just arrived" },
|
|
487
|
+
],
|
|
488
|
+
nowText: "now",
|
|
489
|
+
priorEverInjected: [],
|
|
490
|
+
config: makeConfig(), // historical_pairs_max_chars: null
|
|
491
|
+
});
|
|
492
|
+
|
|
493
|
+
const [call] = providerCalls;
|
|
494
|
+
const blockB = call.messages[0].content[1] as { text: string };
|
|
495
|
+
expect(blockB.text).toContain(`[assistant]: ${huge}`);
|
|
496
|
+
expect(blockB.text).toContain("[user]: just arrived");
|
|
497
|
+
expect(blockB.text).not.toContain("…");
|
|
498
|
+
});
|
|
499
|
+
|
|
421
500
|
test("de-duplicates repeated IDs from the model while preserving order", async () => {
|
|
422
501
|
providerStub = makeProvider(toolUseResponse([2, 1, 2]));
|
|
423
502
|
|
|
@@ -1017,3 +1096,92 @@ describe("runRouter — tier 2 (highest EMA)", () => {
|
|
|
1017
1096
|
expect(warned).toBe(true);
|
|
1018
1097
|
});
|
|
1019
1098
|
});
|
|
1099
|
+
|
|
1100
|
+
// ---------------------------------------------------------------------------
|
|
1101
|
+
// applyHistoricalCharBudget — pure helper covering the cap semantics.
|
|
1102
|
+
// ---------------------------------------------------------------------------
|
|
1103
|
+
|
|
1104
|
+
describe("applyHistoricalCharBudget", () => {
|
|
1105
|
+
test("null budget is a no-op (returns a shallow copy)", () => {
|
|
1106
|
+
const pairs = [
|
|
1107
|
+
{ assistantMessage: "older asst", userMessage: "older user" },
|
|
1108
|
+
{ assistantMessage: "newer asst", userMessage: "newer user" },
|
|
1109
|
+
];
|
|
1110
|
+
const out = applyHistoricalCharBudget(pairs, null);
|
|
1111
|
+
expect(out).toEqual(pairs);
|
|
1112
|
+
// shallow copy — not the same array reference, so callers can mutate freely
|
|
1113
|
+
expect(out).not.toBe(pairs);
|
|
1114
|
+
});
|
|
1115
|
+
|
|
1116
|
+
test("budget that fits every message returns content unchanged", () => {
|
|
1117
|
+
const pairs = [
|
|
1118
|
+
{ assistantMessage: "AA", userMessage: "UU" },
|
|
1119
|
+
{ assistantMessage: "BB", userMessage: "VV" },
|
|
1120
|
+
];
|
|
1121
|
+
const total = "AA".length + "UU".length + "BB".length + "VV".length; // 8
|
|
1122
|
+
const out = applyHistoricalCharBudget(pairs, total);
|
|
1123
|
+
expect(out).toEqual(pairs);
|
|
1124
|
+
});
|
|
1125
|
+
|
|
1126
|
+
test("front-truncates the oldest still-includable message when the cap is exceeded", () => {
|
|
1127
|
+
// Newest user is 10 chars, newest assistant is 10, older user is 10,
|
|
1128
|
+
// older assistant is 20. Budget 35 leaves remaining = 35 - 10 - 10 - 10 = 5
|
|
1129
|
+
// for the older assistant; 5 - 1 marker char = 4 kept chars from the END.
|
|
1130
|
+
const pairs = [
|
|
1131
|
+
{ assistantMessage: "ABCDEFGHIJKLMNOPQRST", userMessage: "old-user--" },
|
|
1132
|
+
{ assistantMessage: "abcdefghij", userMessage: "uvwxyzUVWX" },
|
|
1133
|
+
];
|
|
1134
|
+
const out = applyHistoricalCharBudget(pairs, 35);
|
|
1135
|
+
expect(out).toEqual([
|
|
1136
|
+
{ assistantMessage: "…QRST", userMessage: "old-user--" },
|
|
1137
|
+
{ assistantMessage: "abcdefghij", userMessage: "uvwxyzUVWX" },
|
|
1138
|
+
]);
|
|
1139
|
+
// Sanity: total content chars equals the budget.
|
|
1140
|
+
const totalChars = out.reduce(
|
|
1141
|
+
(acc, p) => acc + p.assistantMessage.length + p.userMessage.length,
|
|
1142
|
+
0,
|
|
1143
|
+
);
|
|
1144
|
+
expect(totalChars).toBe(35);
|
|
1145
|
+
});
|
|
1146
|
+
|
|
1147
|
+
test("drops older pairs entirely when even their first message has no room", () => {
|
|
1148
|
+
// Budget 20 fits the most-recent pair exactly (10 + 10 = 20) and leaves
|
|
1149
|
+
// zero room for the older pair, which is dropped entirely.
|
|
1150
|
+
const pairs = [
|
|
1151
|
+
{ assistantMessage: "OLD-ASST00", userMessage: "OLD-USER00" },
|
|
1152
|
+
{ assistantMessage: "NEW-ASST00", userMessage: "NEW-USER00" },
|
|
1153
|
+
];
|
|
1154
|
+
const out = applyHistoricalCharBudget(pairs, 20);
|
|
1155
|
+
expect(out).toEqual([
|
|
1156
|
+
{ assistantMessage: "NEW-ASST00", userMessage: "NEW-USER00" },
|
|
1157
|
+
]);
|
|
1158
|
+
});
|
|
1159
|
+
|
|
1160
|
+
test("drops the older message of the current pair when the user line consumes the whole budget", () => {
|
|
1161
|
+
// Budget 10 just barely covers the newest user (10 chars). The pair's
|
|
1162
|
+
// own assistant message has no room and is dropped (left empty).
|
|
1163
|
+
const pairs = [
|
|
1164
|
+
{ assistantMessage: "ASSISTANTX", userMessage: "USER-NEW10" },
|
|
1165
|
+
];
|
|
1166
|
+
const out = applyHistoricalCharBudget(pairs, 10);
|
|
1167
|
+
expect(out).toEqual([{ assistantMessage: "", userMessage: "USER-NEW10" }]);
|
|
1168
|
+
});
|
|
1169
|
+
|
|
1170
|
+
test("non-positive budgets return an empty array (no message survives)", () => {
|
|
1171
|
+
const pairs = [{ assistantMessage: "x", userMessage: "y" }];
|
|
1172
|
+
expect(applyHistoricalCharBudget(pairs, 0)).toEqual(pairs);
|
|
1173
|
+
// Negative budgets are degenerate but should not throw.
|
|
1174
|
+
expect(applyHistoricalCharBudget(pairs, -5)).toEqual(pairs);
|
|
1175
|
+
});
|
|
1176
|
+
|
|
1177
|
+
test("budget smaller than the truncation marker drops the would-truncate message", () => {
|
|
1178
|
+
// Budget 11: covers full newest user (10 chars). Remaining 1 char is not
|
|
1179
|
+
// enough room for the marker, so the next message (newest assistant)
|
|
1180
|
+
// is dropped entirely rather than emitting a marker-only message.
|
|
1181
|
+
const pairs = [
|
|
1182
|
+
{ assistantMessage: "ASSISTANTX", userMessage: "USER-NEW10" },
|
|
1183
|
+
];
|
|
1184
|
+
const out = applyHistoricalCharBudget(pairs, 11);
|
|
1185
|
+
expect(out).toEqual([{ assistantMessage: "", userMessage: "USER-NEW10" }]);
|
|
1186
|
+
});
|
|
1187
|
+
});
|
|
@@ -99,7 +99,10 @@ afterAll(() => {
|
|
|
99
99
|
rmSync(tmpWorkspace, { recursive: true, force: true });
|
|
100
100
|
});
|
|
101
101
|
|
|
102
|
-
const {
|
|
102
|
+
const { getDb } = await import("../../db-connection.js");
|
|
103
|
+
const { resetDbForTesting } = await import(
|
|
104
|
+
"../../../__tests__/db-test-helpers.js"
|
|
105
|
+
);
|
|
103
106
|
const { initializeDb } = await import("../../db-init.js");
|
|
104
107
|
const { messages, conversations } = await import("../../schema.js");
|
|
105
108
|
const { memoryV2SweepJob } = await import("../sweep-job.js");
|
|
@@ -212,7 +215,7 @@ function seedMessages(
|
|
|
212
215
|
}
|
|
213
216
|
|
|
214
217
|
beforeEach(() => {
|
|
215
|
-
|
|
218
|
+
resetDbForTesting();
|
|
216
219
|
initializeDb();
|
|
217
220
|
// Fresh memory dir per test — keeps assertions on file contents independent.
|
|
218
221
|
rmSync(join(tmpWorkspace, "memory"), { recursive: true, force: true });
|
|
@@ -275,7 +278,7 @@ describe("memoryV2SweepJob — no recent messages", () => {
|
|
|
275
278
|
});
|
|
276
279
|
|
|
277
280
|
// Per-test conversation id ensures each test seeds a row that doesn't
|
|
278
|
-
// collide with the previous test's row in the (shared) test DB. `
|
|
281
|
+
// collide with the previous test's row in the (shared) test DB. `resetDbForTesting`
|
|
279
282
|
// is called in the outer beforeEach, but bun's mock module flow keeps the
|
|
280
283
|
// DB intact long enough for the SQL inserts here to clash.
|
|
281
284
|
let convCounter = 0;
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Run the comparison harness over a sample of historical turns.
|
|
3
|
+
*
|
|
4
|
+
* Ties the harness pieces together: pull oracle turns from telemetry, run each
|
|
5
|
+
* retriever over each turn's reconstructed inputs, score against the logged
|
|
6
|
+
* ground truth. Kept separate from the route handler so it can be unit-tested
|
|
7
|
+
* with a stub retriever and a fixture DB — no live router / LLM.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import type { AssistantConfig } from "../../../config/types.js";
|
|
11
|
+
import type { DrizzleDb } from "../../db-connection.js";
|
|
12
|
+
import { extractOracleTurns } from "./oracle.js";
|
|
13
|
+
import { reconstructInput } from "./replay-input.js";
|
|
14
|
+
import type { Retriever } from "./retriever.js";
|
|
15
|
+
import { type ComparisonReport, runComparison } from "./runner.js";
|
|
16
|
+
|
|
17
|
+
export interface RunComparisonOverHistoryParams {
|
|
18
|
+
db: DrizzleDb;
|
|
19
|
+
workspaceDir: string;
|
|
20
|
+
config: AssistantConfig;
|
|
21
|
+
retrievers: readonly Retriever[];
|
|
22
|
+
ks: number[];
|
|
23
|
+
limit?: number;
|
|
24
|
+
strategy?: "recent" | "random";
|
|
25
|
+
conversationIds?: string[];
|
|
26
|
+
includeNotInjected?: boolean;
|
|
27
|
+
pageExists?: (slug: string) => boolean;
|
|
28
|
+
signal?: AbortSignal;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export async function runComparisonOverHistory(
|
|
32
|
+
params: RunComparisonOverHistoryParams,
|
|
33
|
+
): Promise<ComparisonReport> {
|
|
34
|
+
const { db, workspaceDir, config } = params;
|
|
35
|
+
|
|
36
|
+
const oracleTurns = extractOracleTurns(db, {
|
|
37
|
+
...(params.limit !== undefined ? { limit: params.limit } : {}),
|
|
38
|
+
...(params.strategy !== undefined ? { strategy: params.strategy } : {}),
|
|
39
|
+
...(params.conversationIds !== undefined
|
|
40
|
+
? { conversationIds: params.conversationIds }
|
|
41
|
+
: {}),
|
|
42
|
+
...(params.includeNotInjected !== undefined
|
|
43
|
+
? { includeNotInjected: params.includeNotInjected }
|
|
44
|
+
: {}),
|
|
45
|
+
...(params.pageExists !== undefined
|
|
46
|
+
? { pageExists: params.pageExists }
|
|
47
|
+
: {}),
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
return runComparison({
|
|
51
|
+
retrievers: params.retrievers,
|
|
52
|
+
oracleTurns,
|
|
53
|
+
reconstruct: (turn) => reconstructInput(db, turn, config, workspaceDir),
|
|
54
|
+
ks: params.ks,
|
|
55
|
+
...(params.signal !== undefined ? { signal: params.signal } : {}),
|
|
56
|
+
});
|
|
57
|
+
}
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Recall@k and per-lane diff for the comparison harness.
|
|
3
|
+
*
|
|
4
|
+
* Ground truth is the current router's logged selections (see `oracle.ts`). A
|
|
5
|
+
* retriever's "extras" (selected, not in ground truth) are reported as a
|
|
6
|
+
* *diff*, not an error — a better retriever may legitimately surface pages the
|
|
7
|
+
* router missed. recall@k is the primary signal.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import type { RetrievalOutput } from "./retriever.js";
|
|
11
|
+
|
|
12
|
+
export interface TurnEval {
|
|
13
|
+
groundTruth: string[];
|
|
14
|
+
selected: string[];
|
|
15
|
+
/** Ground-truth slugs the retriever selected (anywhere in its output). */
|
|
16
|
+
hits: string[];
|
|
17
|
+
/** Ground-truth slugs the retriever missed entirely. */
|
|
18
|
+
misses: string[];
|
|
19
|
+
/** Selected slugs not in ground truth — diff, not error. */
|
|
20
|
+
extras: string[];
|
|
21
|
+
/** recall@k for each requested k. */
|
|
22
|
+
recallAtK: Record<number, number>;
|
|
23
|
+
/** Counts of hits grouped by the retriever's source/lane labels. */
|
|
24
|
+
hitsByLane: Record<string, number>;
|
|
25
|
+
costUsd?: number;
|
|
26
|
+
failureReason: string | null;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export interface AggregateEval {
|
|
30
|
+
turns: number;
|
|
31
|
+
meanRecallAtK: Record<number, number>;
|
|
32
|
+
failureRate: number;
|
|
33
|
+
meanCostUsd?: number;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* recall@k = |topK(selected) ∩ G| / |G|. An empty ground-truth set is defined
|
|
38
|
+
* as recall 1 (nothing to recall — vacuously complete).
|
|
39
|
+
*
|
|
40
|
+
* The top-k window is deduped before intersecting with the ground-truth set so
|
|
41
|
+
* a retriever that emits the same slug twice (e.g. `['a','a']`) cannot count it
|
|
42
|
+
* twice and push recall above 1.0. Recall is therefore bounded in [0, 1].
|
|
43
|
+
*/
|
|
44
|
+
export function recallAtK(
|
|
45
|
+
selected: readonly string[],
|
|
46
|
+
groundTruth: ReadonlySet<string>,
|
|
47
|
+
k: number,
|
|
48
|
+
): number {
|
|
49
|
+
if (groundTruth.size === 0) return 1;
|
|
50
|
+
let hit = 0;
|
|
51
|
+
for (const slug of new Set(selected.slice(0, k))) {
|
|
52
|
+
if (groundTruth.has(slug)) hit++;
|
|
53
|
+
}
|
|
54
|
+
return hit / groundTruth.size;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export function evalTurn(
|
|
58
|
+
output: RetrievalOutput,
|
|
59
|
+
groundTruth: readonly string[],
|
|
60
|
+
ks: readonly number[],
|
|
61
|
+
): TurnEval {
|
|
62
|
+
const gtList = Array.from(new Set(groundTruth));
|
|
63
|
+
const gtSet = new Set(gtList);
|
|
64
|
+
const selectedSet = new Set(output.selectedSlugs);
|
|
65
|
+
|
|
66
|
+
const hits: string[] = [];
|
|
67
|
+
const misses: string[] = [];
|
|
68
|
+
for (const slug of gtList) {
|
|
69
|
+
(selectedSet.has(slug) ? hits : misses).push(slug);
|
|
70
|
+
}
|
|
71
|
+
const extras = output.selectedSlugs.filter((s) => !gtSet.has(s));
|
|
72
|
+
|
|
73
|
+
const recall: Record<number, number> = {};
|
|
74
|
+
for (const k of ks) {
|
|
75
|
+
recall[k] = recallAtK(output.selectedSlugs, gtSet, k);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const hitsByLane: Record<string, number> = {};
|
|
79
|
+
for (const slug of hits) {
|
|
80
|
+
const lane = output.sourceBySlug.get(slug) ?? "unknown";
|
|
81
|
+
hitsByLane[lane] = (hitsByLane[lane] ?? 0) + 1;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
return {
|
|
85
|
+
groundTruth: gtList,
|
|
86
|
+
selected: output.selectedSlugs,
|
|
87
|
+
hits,
|
|
88
|
+
misses,
|
|
89
|
+
extras,
|
|
90
|
+
recallAtK: recall,
|
|
91
|
+
hitsByLane,
|
|
92
|
+
...(output.cost?.usd !== undefined ? { costUsd: output.cost.usd } : {}),
|
|
93
|
+
failureReason: output.failureReason ?? null,
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
export function aggregate(
|
|
98
|
+
perTurn: readonly TurnEval[],
|
|
99
|
+
ks: readonly number[],
|
|
100
|
+
): AggregateEval {
|
|
101
|
+
const turns = perTurn.length;
|
|
102
|
+
|
|
103
|
+
const meanRecallAtK: Record<number, number> = {};
|
|
104
|
+
for (const k of ks) {
|
|
105
|
+
if (turns === 0) {
|
|
106
|
+
meanRecallAtK[k] = 0;
|
|
107
|
+
continue;
|
|
108
|
+
}
|
|
109
|
+
let sum = 0;
|
|
110
|
+
for (const t of perTurn) sum += t.recallAtK[k] ?? 0;
|
|
111
|
+
meanRecallAtK[k] = sum / turns;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const failures = perTurn.filter((t) => t.failureReason != null).length;
|
|
115
|
+
const costed = perTurn.filter((t) => t.costUsd !== undefined);
|
|
116
|
+
|
|
117
|
+
return {
|
|
118
|
+
turns,
|
|
119
|
+
meanRecallAtK,
|
|
120
|
+
failureRate: turns === 0 ? 0 : failures / turns,
|
|
121
|
+
...(costed.length > 0
|
|
122
|
+
? {
|
|
123
|
+
meanCostUsd:
|
|
124
|
+
costed.reduce((s, t) => s + (t.costUsd ?? 0), 0) / costed.length,
|
|
125
|
+
}
|
|
126
|
+
: {}),
|
|
127
|
+
};
|
|
128
|
+
}
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Oracle extraction — the current router's logged selections as silver-standard
|
|
3
|
+
* ground truth.
|
|
4
|
+
*
|
|
5
|
+
* Source: `memory_v2_activation_logs` rows with `mode = 'router'`. Each row's
|
|
6
|
+
* `messageId` is backfilled to the turn's assistant message (see
|
|
7
|
+
* `backfillMemoryV2ActivationMessageId`), so we join `messageId → messages.id`
|
|
8
|
+
* to anchor the turn — robust, no fragile turn-counting. Rows whose messageId
|
|
9
|
+
* is null (the in-flight turn) or no longer resolves are skipped.
|
|
10
|
+
*
|
|
11
|
+
* Ground truth G(turn) = selected slugs with status ∈ {injected, in_context}
|
|
12
|
+
* (what actually reached the model), optionally + not_injected, and — when a
|
|
13
|
+
* `pageExists` predicate is supplied — only slugs whose page still exists
|
|
14
|
+
* (neither retriever can find a nonexistent page). page_missing / corrupt are
|
|
15
|
+
* always excluded.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import { and, desc, eq, inArray, isNotNull, sql } from "drizzle-orm";
|
|
19
|
+
|
|
20
|
+
import type { DrizzleDb } from "../../db-connection.js";
|
|
21
|
+
import type {
|
|
22
|
+
MemoryV2ConceptRowRecord,
|
|
23
|
+
MemoryV2ConfigSnapshot,
|
|
24
|
+
} from "../../memory-v2-activation-log-store.js";
|
|
25
|
+
import { memoryV2ActivationLogs, messages } from "../../schema.js";
|
|
26
|
+
|
|
27
|
+
export interface OracleTurn {
|
|
28
|
+
conversationId: string;
|
|
29
|
+
turn: number;
|
|
30
|
+
/** Backfilled assistant-message id for this turn — the reconstruction anchor. */
|
|
31
|
+
anchorMessageId: string;
|
|
32
|
+
/** `created_at` of the anchor message; reconstruction cuts strictly before it. */
|
|
33
|
+
anchorCreatedAt: number;
|
|
34
|
+
/** Slugs the router's judgment put in front of the model (the recall target). */
|
|
35
|
+
groundTruthSlugs: string[];
|
|
36
|
+
loggedConfig: MemoryV2ConfigSnapshot;
|
|
37
|
+
createdAt: number;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export interface ExtractOracleOptions {
|
|
41
|
+
/** Max log rows to scan (default 50). Some are skipped, so result ≤ limit. */
|
|
42
|
+
limit?: number;
|
|
43
|
+
strategy?: "recent" | "random";
|
|
44
|
+
conversationIds?: string[];
|
|
45
|
+
/** Include status "not_injected" (selected but cut by the cap) in G. Default false. */
|
|
46
|
+
includeNotInjected?: boolean;
|
|
47
|
+
/**
|
|
48
|
+
* Page-existence predicate, typically backed by `getPageIndex().bySlug`.
|
|
49
|
+
* When provided, ground-truth slugs whose page no longer exists are dropped.
|
|
50
|
+
* Omit in unit tests.
|
|
51
|
+
*/
|
|
52
|
+
pageExists?: (slug: string) => boolean;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
export function extractOracleTurns(
|
|
56
|
+
db: DrizzleDb,
|
|
57
|
+
options: ExtractOracleOptions = {},
|
|
58
|
+
): OracleTurn[] {
|
|
59
|
+
const {
|
|
60
|
+
limit = 50,
|
|
61
|
+
strategy = "recent",
|
|
62
|
+
conversationIds,
|
|
63
|
+
includeNotInjected = false,
|
|
64
|
+
pageExists,
|
|
65
|
+
} = options;
|
|
66
|
+
|
|
67
|
+
const allowedStatuses = new Set<string>(["injected", "in_context"]);
|
|
68
|
+
if (includeNotInjected) allowedStatuses.add("not_injected");
|
|
69
|
+
|
|
70
|
+
const filters = [
|
|
71
|
+
eq(memoryV2ActivationLogs.mode, "router"),
|
|
72
|
+
isNotNull(memoryV2ActivationLogs.messageId),
|
|
73
|
+
];
|
|
74
|
+
if (conversationIds && conversationIds.length > 0) {
|
|
75
|
+
filters.push(
|
|
76
|
+
inArray(memoryV2ActivationLogs.conversationId, conversationIds),
|
|
77
|
+
);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const rows = db
|
|
81
|
+
.select({
|
|
82
|
+
conversationId: memoryV2ActivationLogs.conversationId,
|
|
83
|
+
messageId: memoryV2ActivationLogs.messageId,
|
|
84
|
+
turn: memoryV2ActivationLogs.turn,
|
|
85
|
+
conceptsJson: memoryV2ActivationLogs.conceptsJson,
|
|
86
|
+
configJson: memoryV2ActivationLogs.configJson,
|
|
87
|
+
createdAt: memoryV2ActivationLogs.createdAt,
|
|
88
|
+
})
|
|
89
|
+
.from(memoryV2ActivationLogs)
|
|
90
|
+
.where(and(...filters))
|
|
91
|
+
.orderBy(
|
|
92
|
+
strategy === "random"
|
|
93
|
+
? sql`RANDOM()`
|
|
94
|
+
: desc(memoryV2ActivationLogs.createdAt),
|
|
95
|
+
)
|
|
96
|
+
.limit(limit)
|
|
97
|
+
.all();
|
|
98
|
+
|
|
99
|
+
const turns: OracleTurn[] = [];
|
|
100
|
+
for (const row of rows) {
|
|
101
|
+
const messageId = row.messageId;
|
|
102
|
+
if (messageId == null) continue;
|
|
103
|
+
|
|
104
|
+
const anchor = db
|
|
105
|
+
.select({ createdAt: messages.createdAt })
|
|
106
|
+
.from(messages)
|
|
107
|
+
.where(eq(messages.id, messageId))
|
|
108
|
+
.limit(1)
|
|
109
|
+
.all();
|
|
110
|
+
const anchorRow = anchor[0];
|
|
111
|
+
if (!anchorRow) continue;
|
|
112
|
+
|
|
113
|
+
let concepts: MemoryV2ConceptRowRecord[];
|
|
114
|
+
let loggedConfig: MemoryV2ConfigSnapshot;
|
|
115
|
+
try {
|
|
116
|
+
concepts = JSON.parse(row.conceptsJson) as MemoryV2ConceptRowRecord[];
|
|
117
|
+
loggedConfig = JSON.parse(row.configJson) as MemoryV2ConfigSnapshot;
|
|
118
|
+
} catch {
|
|
119
|
+
continue;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
const seen = new Set<string>();
|
|
123
|
+
const groundTruthSlugs: string[] = [];
|
|
124
|
+
for (const concept of concepts) {
|
|
125
|
+
if (!allowedStatuses.has(concept.status)) continue;
|
|
126
|
+
if (pageExists && !pageExists(concept.slug)) continue;
|
|
127
|
+
if (seen.has(concept.slug)) continue;
|
|
128
|
+
seen.add(concept.slug);
|
|
129
|
+
groundTruthSlugs.push(concept.slug);
|
|
130
|
+
}
|
|
131
|
+
if (groundTruthSlugs.length === 0) continue;
|
|
132
|
+
|
|
133
|
+
turns.push({
|
|
134
|
+
conversationId: row.conversationId,
|
|
135
|
+
turn: row.turn,
|
|
136
|
+
anchorMessageId: messageId,
|
|
137
|
+
anchorCreatedAt: anchorRow.createdAt,
|
|
138
|
+
groundTruthSlugs,
|
|
139
|
+
loggedConfig,
|
|
140
|
+
createdAt: row.createdAt,
|
|
141
|
+
});
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
return turns;
|
|
145
|
+
}
|