@vellumai/assistant 0.8.4 → 0.8.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +2 -2
- package/docs/browser-use-architecture-phase2.md +1 -1
- package/knip.json +2 -1
- package/openapi.yaml +809 -11
- package/package.json +1 -1
- package/src/__tests__/anthropic-provider.test.ts +34 -37
- package/src/__tests__/assistant-event-hub-self-exclusion.test.ts +293 -0
- package/src/__tests__/assistant-feature-flags-integration.test.ts +3 -3
- package/src/__tests__/audit-log-rotation.test.ts +70 -16
- package/src/__tests__/background-workers-disk-pressure.test.ts +3 -3
- package/src/__tests__/btw-routes.test.ts +2 -3
- package/src/__tests__/call-controller.test.ts +0 -1
- package/src/__tests__/cancel-resolves-conversation-key.test.ts +1 -1
- package/src/__tests__/channel-guardian.test.ts +3 -3
- package/src/__tests__/checker.test.ts +6 -15
- package/src/__tests__/compaction-events.test.ts +1 -0
- package/src/__tests__/compactor-call-site-logging.test.ts +214 -0
- package/src/__tests__/computer-use-skill-manifest-regression.test.ts +5 -11
- package/src/__tests__/computer-use-tools.test.ts +2 -4
- package/src/__tests__/confirmation-request-guardian-bridge.test.ts +0 -1
- package/src/__tests__/conversation-agent-loop-disk-pressure.test.ts +1 -1
- package/src/__tests__/conversation-agent-loop-inference-profile.test.ts +1 -1
- package/src/__tests__/conversation-agent-loop-overflow.test.ts +197 -2
- package/src/__tests__/conversation-agent-loop.test.ts +163 -122
- package/src/__tests__/conversation-app-control-instantiation.test.ts +2 -5
- package/src/__tests__/conversation-clear-safety.test.ts +25 -25
- package/src/__tests__/conversation-delete-schedule-cleanup.test.ts +1 -1
- package/src/__tests__/conversation-disk-view-integration.test.ts +2 -2
- package/src/__tests__/conversation-error.test.ts +31 -0
- package/src/__tests__/conversation-fork-crud.test.ts +178 -15
- package/src/__tests__/conversation-lifecycle.test.ts +52 -11
- package/src/__tests__/{conversation-load-cleaned-at.test.ts → conversation-load-history-stripped.test.ts} +13 -13
- package/src/__tests__/conversation-provider-retry-repair.test.ts +1 -0
- package/src/__tests__/conversation-routes-disk-view.test.ts +109 -0
- package/src/__tests__/conversation-routes-slash-commands.test.ts +35 -0
- package/src/__tests__/conversation-skill-tools.test.ts +2 -5
- package/src/__tests__/conversation-store.test.ts +1 -1
- package/src/__tests__/conversation-sync-tags.test.ts +99 -32
- package/src/__tests__/conversation-workspace-cache-state.test.ts +1 -0
- package/src/__tests__/conversation-workspace-injection.test.ts +1 -1
- package/src/__tests__/conversation-workspace-tool-tracking.test.ts +1 -1
- package/src/__tests__/credential-execution-feature-gates.test.ts +9 -7
- package/src/__tests__/credential-execution-tools.test.ts +6 -6
- package/src/__tests__/credential-security-invariants.test.ts +1 -0
- package/src/__tests__/credential-vault-unit.test.ts +2 -2
- package/src/__tests__/dynamic-page-surface.test.ts +2 -2
- package/src/__tests__/email-html-renderer.test.ts +12 -0
- package/src/__tests__/gateway-flag-listener.test.ts +237 -0
- package/src/__tests__/gemini-provider.test.ts +78 -0
- package/src/__tests__/guardian-dispatch.test.ts +0 -1
- package/src/__tests__/guardian-outbound-http.test.ts +7 -5
- package/src/__tests__/handlers-user-message-approval-consumption.test.ts +1 -1
- package/src/__tests__/heartbeat-disk-pressure.test.ts +4 -0
- package/src/__tests__/heartbeat-service.test.ts +4 -0
- package/src/__tests__/host-shell-tool.test.ts +1 -1
- package/src/__tests__/init-feature-flag-overrides.test.ts +5 -6
- package/src/__tests__/list-messages-tool-merge.test.ts +70 -11
- package/src/__tests__/llm-request-log-call-site.test.ts +136 -0
- package/src/__tests__/llm-request-log-source-clickhouse.test.ts +26 -0
- package/src/__tests__/llm-resolver.test.ts +77 -9
- package/src/__tests__/llm-usage-store.test.ts +66 -0
- package/src/__tests__/logger.test.ts +89 -0
- package/src/__tests__/mcp-abort-signal.test.ts +2 -2
- package/src/__tests__/media-generate-image.test.ts +31 -0
- package/src/__tests__/memory-v2-static-injector.test.ts +7 -7
- package/src/__tests__/model-intents.test.ts +2 -4
- package/src/__tests__/notification-guardian-path.test.ts +0 -1
- package/src/__tests__/onboarding-template-contract.test.ts +1 -1
- package/src/__tests__/openai-provider.test.ts +46 -0
- package/src/__tests__/openai-responses-provider.test.ts +114 -12
- package/src/__tests__/pending-interactions-resolved-event.test.ts +0 -1
- package/src/__tests__/platform-bash-auto-approve.test.ts +2 -2
- package/src/__tests__/platform.test.ts +2 -2
- package/src/__tests__/plugin-api-tool-definition.test.ts +92 -0
- package/src/__tests__/plugin-bootstrap.test.ts +2 -2
- package/src/__tests__/plugin-tool-contribution.test.ts +13 -6
- package/src/__tests__/plugin-types.test.ts +3 -2
- package/src/__tests__/prechat-onboarding-contract.test.ts +131 -98
- package/src/__tests__/pricing.test.ts +12 -0
- package/src/__tests__/prune-jobs-changes-parser.test.ts +61 -0
- package/src/__tests__/registry.test.ts +2 -8
- package/src/__tests__/require-fresh-approval.test.ts +2 -2
- package/src/__tests__/runtime-events-sse-bilingual.test.ts +154 -0
- package/src/__tests__/shell-tool-proxy-mode.test.ts +1 -1
- package/src/__tests__/skill-feature-flags.test.ts +2 -2
- package/src/__tests__/skill-projection-feature-flag.test.ts +4 -7
- package/src/__tests__/skill-projection.benchmark.test.ts +2 -6
- package/src/__tests__/skill-tool-factory.test.ts +1 -1
- package/src/__tests__/subagent-notify-parent.test.ts +1 -1
- package/src/__tests__/suggestion-routes.test.ts +1 -0
- package/src/__tests__/sync-message-contract.test.ts +59 -0
- package/src/__tests__/system-prompt.test.ts +145 -131
- package/src/__tests__/terminal-tools.test.ts +1 -1
- package/src/__tests__/tool-approval-handler.test.ts +1 -5
- package/src/__tests__/tool-execute-pipeline.test.ts +2 -2
- package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +2 -5
- package/src/__tests__/tool-executor-lifecycle-events.test.ts +15 -5
- package/src/__tests__/tool-executor.test.ts +9 -62
- package/src/__tests__/tool-grant-request-escalation.test.ts +1 -6
- package/src/__tests__/trusted-contact-approval-notifier.test.ts +0 -1
- package/src/__tests__/trusted-contact-inline-approval-integration.test.ts +1 -6
- package/src/__tests__/trusted-contact-multichannel.test.ts +0 -1
- package/src/__tests__/ui-file-upload-surface.test.ts +2 -2
- package/src/__tests__/usage-routes.test.ts +3 -0
- package/src/__tests__/verification-control-plane-policy.test.ts +2 -2
- package/src/__tests__/workspace-git-service.test.ts +6 -5
- package/src/__tests__/workspace-migration-089-move-memory-tree-out-of-v3.test.ts +86 -0
- package/src/acp/__tests__/prepare-agent-env.test.ts +146 -0
- package/src/acp/prepare-agent-env.ts +78 -0
- package/src/acp/session-manager.ts +1 -1
- package/src/agent/loop.ts +8 -0
- package/src/api/README.md +5 -0
- package/src/api/index.ts +4 -0
- package/src/api/package.json +10 -0
- package/src/background-wake/background-wake-routes.test.ts +233 -0
- package/src/background-wake/runtime-registry.ts +24 -0
- package/src/cli/commands/__tests__/browser.test.ts +23 -5
- package/src/cli/commands/__tests__/domain-register.test.ts +110 -0
- package/src/cli/commands/__tests__/domain-status.test.ts +33 -33
- package/src/cli/commands/__tests__/inference-send.test.ts +108 -5
- package/src/cli/commands/__tests__/memory-v2-compare-render.test.ts +98 -0
- package/src/cli/commands/__tests__/memory-v2.test.ts +1 -0
- package/src/cli/commands/__tests__/memory-v3-render.test.ts +340 -0
- package/src/cli/commands/browser.ts +247 -0
- package/src/cli/commands/domain.ts +91 -41
- package/src/cli/commands/inference.ts +93 -40
- package/src/cli/commands/memory-v2-compare-render.ts +115 -0
- package/src/cli/commands/memory-v2.ts +176 -1
- package/src/cli/commands/memory-v3-render.ts +344 -0
- package/src/cli/commands/memory-v3.ts +316 -0
- package/src/cli/program.ts +2 -0
- package/src/config/assistant-feature-flags.ts +21 -9
- package/src/config/bundled-skills/document-editor/SKILL.md +11 -2
- package/src/config/bundled-skills/document-editor/TOOLS.json +18 -0
- package/src/config/bundled-skills/document-editor/tools/document-open.ts +12 -0
- package/src/config/bundled-skills/image-studio/SKILL.md +4 -0
- package/src/config/bundled-skills/image-studio/tools/media-generate-image.ts +2 -2
- package/src/config/bundled-skills/media-processing/tools/ingest-media.ts +13 -8
- package/src/config/bundled-skills/messaging/tools/messaging-analyze-style.ts +10 -3
- package/src/config/bundled-skills/phone-calls/references/TRANSCRIPTS.md +16 -14
- package/src/config/bundled-skills/playbooks/tools/playbook-create.ts +7 -2
- package/src/config/bundled-skills/playbooks/tools/playbook-update.ts +7 -2
- package/src/config/bundled-tool-registry.ts +2 -0
- package/src/config/call-site-defaults.ts +7 -6
- package/src/config/feature-flag-registry.json +16 -0
- package/src/config/schemas/__tests__/memory-v2.test.ts +213 -1
- package/src/config/schemas/call-site-catalog.ts +21 -7
- package/src/config/schemas/llm.ts +12 -1
- package/src/config/schemas/memory-v2.ts +246 -0
- package/src/config/schemas/memory.ts +2 -1
- package/src/context/compactor.ts +52 -0
- package/src/conversations/__tests__/message-consolidation.test.ts +350 -0
- package/src/conversations/message-consolidation.ts +404 -0
- package/src/daemon/__tests__/conversation-tool-setup-exclude.test.ts +1 -1
- package/src/daemon/__tests__/meet-manifest-loader.test.ts +1 -1
- package/src/daemon/conversation-agent-loop-handlers.ts +2 -13
- package/src/daemon/conversation-agent-loop.ts +126 -76
- package/src/daemon/conversation-error.ts +31 -1
- package/src/daemon/conversation-lifecycle.ts +27 -22
- package/src/daemon/conversation-runtime-assembly.ts +10 -9
- package/src/daemon/conversation-tool-setup.ts +63 -3
- package/src/daemon/conversation-usage.ts +2 -0
- package/src/daemon/conversation.ts +14 -29
- package/src/daemon/disk-pressure-guard.ts +14 -2
- package/src/daemon/handlers/config-model.test.ts +1 -0
- package/src/daemon/handlers/conversations.ts +11 -3
- package/src/daemon/host-browser-proxy.ts +5 -5
- package/src/daemon/host-cu-proxy.ts +4 -4
- package/src/daemon/host-file-proxy.ts +4 -4
- package/src/daemon/host-proxy-base.ts +4 -4
- package/src/daemon/host-transfer-proxy.ts +10 -10
- package/src/daemon/lifecycle.ts +23 -20
- package/src/daemon/meet-manifest-loader.ts +1 -7
- package/src/daemon/message-types/conversations.ts +6 -9
- package/src/daemon/message-types/home.ts +1 -13
- package/src/daemon/message-types/messages.ts +6 -14
- package/src/daemon/message-types/sync.ts +14 -0
- package/src/daemon/shutdown-handlers.ts +24 -5
- package/src/daemon/switch-inference-profile-tool.ts +52 -0
- package/src/daemon/tool-setup-types.ts +13 -0
- package/src/events/relationship-state-updated.ts +25 -0
- package/src/heartbeat/__tests__/heartbeat-service.test.ts +1 -1
- package/src/home/home-greeting.ts +0 -9
- package/src/home/suggested-prompts.ts +0 -9
- package/src/ipc/gateway-flag-listener.ts +123 -0
- package/src/ipc/skill-routes/registries.ts +8 -12
- package/src/memory/__tests__/db-async-query.test.ts +165 -0
- package/src/memory/__tests__/db-maintenance.test.ts +115 -0
- package/src/memory/__tests__/jobs-store-enqueue-gate.test.ts +241 -0
- package/src/memory/__tests__/jobs-store-job-classes.test.ts +28 -1
- package/src/memory/__tests__/memory-retrospective-job.test.ts +7 -0
- package/src/memory/auto-analysis-enqueue.ts +5 -1
- package/src/memory/conversation-crud.ts +71 -70
- package/src/memory/conversation-starters-cadence.ts +3 -1
- package/src/memory/conversation-title-service.ts +19 -3
- package/src/memory/db-async-query.ts +214 -0
- package/src/memory/db-init.ts +10 -0
- package/src/memory/db-maintenance.ts +30 -21
- package/src/memory/graph/bootstrap.ts +8 -1
- package/src/memory/graph/capability-seed.ts +7 -3
- package/src/memory/graph/conversation-graph-memory.ts +100 -17
- package/src/memory/graph/extraction.ts +1 -5
- package/src/memory/graph/graph-search.ts +7 -1
- package/src/memory/indexer.ts +28 -18
- package/src/memory/job-handlers/cleanup.ts +76 -18
- package/src/memory/job-handlers/conversation-starters.ts +1 -4
- package/src/memory/jobs/embed-pkb-file.ts +6 -1
- package/src/memory/jobs-store.ts +14 -0
- package/src/memory/jobs-worker.ts +55 -22
- package/src/memory/llm-request-log-source-clickhouse.ts +42 -2
- package/src/memory/llm-request-log-source-local.ts +7 -0
- package/src/memory/llm-request-log-source.ts +9 -2
- package/src/memory/llm-request-log-store.ts +43 -1
- package/src/memory/llm-usage-store.ts +24 -0
- package/src/memory/memory-retrospective-enqueue.ts +8 -1
- package/src/memory/memory-retrospective-job.ts +5 -0
- package/src/memory/memory-v2-activation-log-store.ts +15 -6
- package/src/memory/migrations/260-rename-cleaned-at.ts +44 -0
- package/src/memory/migrations/261-llm-usage-add-raw-usage.ts +36 -0
- package/src/memory/migrations/262-memory-v3-coactivation.ts +57 -0
- package/src/memory/migrations/263-memory-v3-auto-edges.ts +50 -0
- package/src/memory/migrations/264-llm-request-log-call-site.ts +29 -0
- package/src/memory/migrations/index.ts +17 -0
- package/src/memory/migrations/registry.ts +33 -0
- package/src/memory/schema/conversations.ts +1 -1
- package/src/memory/schema/infrastructure.ts +21 -0
- package/src/memory/tool-usage-store.ts +36 -8
- package/src/memory/v2/__tests__/consolidation-job.test.ts +1 -0
- package/src/memory/v2/__tests__/harness-compare.test.ts +186 -0
- package/src/memory/v2/__tests__/harness-metrics.test.ts +74 -0
- package/src/memory/v2/__tests__/harness-oracle.test.ts +257 -0
- package/src/memory/v2/__tests__/harness-replay-input.test.ts +225 -0
- package/src/memory/v2/__tests__/harness-runner.test.ts +109 -0
- package/src/memory/v2/__tests__/injection.test.ts +127 -98
- package/src/memory/v2/__tests__/qdrant.test.ts +36 -0
- package/src/memory/v2/__tests__/router.test.ts +171 -3
- package/src/memory/v2/harness/compare.ts +57 -0
- package/src/memory/v2/harness/metrics.ts +124 -0
- package/src/memory/v2/harness/oracle.ts +145 -0
- package/src/memory/v2/harness/replay-input.ts +224 -0
- package/src/memory/v2/harness/retriever.ts +74 -0
- package/src/memory/v2/harness/router-retriever.ts +43 -0
- package/src/memory/v2/harness/runner.ts +106 -0
- package/src/memory/v2/harness/trace.ts +58 -0
- package/src/memory/v2/injection.ts +21 -15
- package/src/memory/v2/prompts/router.ts +26 -1
- package/src/memory/v2/qdrant.ts +14 -2
- package/src/memory/v2/router.ts +171 -18
- package/src/memory/v3/__tests__/coactivation-store.test.ts +422 -0
- package/src/memory/v3/__tests__/consolidation-job.test.ts +468 -0
- package/src/memory/v3/__tests__/edge-learning-job.test.ts +324 -0
- package/src/memory/v3/__tests__/edges.test.ts +563 -0
- package/src/memory/v3/__tests__/filter.test.ts +512 -0
- package/src/memory/v3/__tests__/gate.test.ts +574 -0
- package/src/memory/v3/__tests__/index-composition.test.ts +233 -0
- package/src/memory/v3/__tests__/loop.test.ts +530 -0
- package/src/memory/v3/__tests__/retriever.test.ts +226 -0
- package/src/memory/v3/__tests__/scouts.test.ts +440 -0
- package/src/memory/v3/__tests__/shadow-middleware.test.ts +312 -0
- package/src/memory/v3/__tests__/system-prompts.test.ts +154 -0
- package/src/memory/v3/__tests__/traversal.test.ts +469 -0
- package/src/memory/v3/__tests__/tree-index.test.ts +280 -0
- package/src/memory/v3/__tests__/tree-store.test.ts +529 -0
- package/src/memory/v3/__tests__/tree-walk.test.ts +707 -0
- package/src/memory/v3/__tests__/validate.test.ts +245 -0
- package/src/memory/v3/auto-edges.ts +223 -0
- package/src/memory/v3/coactivation-store.ts +124 -0
- package/src/memory/v3/consolidation-job.ts +323 -0
- package/src/memory/v3/edge-learning-job.ts +160 -0
- package/src/memory/v3/edges.ts +249 -0
- package/src/memory/v3/filter.ts +281 -0
- package/src/memory/v3/gate.ts +334 -0
- package/src/memory/v3/index-composition.ts +113 -0
- package/src/memory/v3/llm-capture.ts +46 -0
- package/src/memory/v3/loop.ts +382 -0
- package/src/memory/v3/maintenance.ts +144 -0
- package/src/memory/v3/prompt-context.ts +33 -0
- package/src/memory/v3/prompts/consolidation.ts +458 -0
- package/src/memory/v3/prompts/system-prompts.ts +196 -0
- package/src/memory/v3/retriever.ts +33 -0
- package/src/memory/v3/scouts.ts +420 -0
- package/src/memory/v3/shadow-middleware.ts +305 -0
- package/src/memory/v3/traversal.ts +206 -0
- package/src/memory/v3/tree-index.ts +237 -0
- package/src/memory/v3/tree-store.ts +394 -0
- package/src/memory/v3/tree-walk.ts +351 -0
- package/src/memory/v3/types.ts +65 -0
- package/src/memory/v3/validate.ts +300 -0
- package/src/notifications/adapters/macos.ts +18 -1
- package/src/notifications/adapters/platform.ts +1 -1
- package/src/notifications/decision-engine.ts +1 -4
- package/src/notifications/emit-signal.ts +29 -49
- package/src/permissions/prompter.ts +3 -3
- package/src/permissions/question-prompter.ts +5 -2
- package/src/permissions/secret-prompter.ts +2 -2
- package/src/plugin-api/index.ts +4 -0
- package/src/plugin-api/types.ts +7 -33
- package/src/plugins/defaults/index.ts +6 -0
- package/src/plugins/defaults/injectors.ts +18 -11
- package/src/plugins/external-plugin-loader.ts +5 -68
- package/src/plugins/types.ts +11 -16
- package/src/proactive-artifact/aux-message-injector.ts +17 -4
- package/src/prompts/__tests__/task-progress-hint-section.test.ts +3 -9
- package/src/prompts/persona-resolver.ts +36 -21
- package/src/prompts/sections.ts +39 -7
- package/src/prompts/system-prompt.ts +50 -185
- package/src/prompts/templates/BOOTSTRAP.md +2 -2
- package/src/prompts/templates/system-sections.ts +230 -8
- package/src/providers/__tests__/connection-model-compat.test.ts +234 -0
- package/src/providers/__tests__/retry-callsite.test.ts +85 -5
- package/src/providers/anthropic/client.ts +32 -66
- package/src/providers/call-site-routing.ts +14 -2
- package/src/providers/connection-model-compat.ts +38 -0
- package/src/providers/connection-resolution.ts +16 -2
- package/src/providers/gemini/client.ts +49 -6
- package/src/providers/inference/adapter-factory.ts +3 -0
- package/src/providers/minimax/client.ts +106 -0
- package/src/providers/model-catalog.ts +43 -0
- package/src/providers/model-intents.ts +1 -1
- package/src/providers/openai/chat-completions-provider.ts +6 -3
- package/src/providers/openai/codex-models.ts +18 -0
- package/src/providers/openai/responses-provider.ts +78 -21
- package/src/providers/provider-send-message.ts +7 -1
- package/src/providers/retry.ts +34 -3
- package/src/providers/thinking-config.ts +26 -1
- package/src/providers/usage-tracking.ts +2 -0
- package/src/runtime/AGENTS.md +2 -2
- package/src/runtime/agent-wake.ts +1 -0
- package/src/runtime/assistant-event-hub.ts +76 -6
- package/src/runtime/auth/route-policy.ts +36 -0
- package/src/runtime/btw-sidechain.ts +0 -6
- package/src/runtime/http-types.ts +0 -2
- package/src/runtime/migrations/vbundle-builder.ts +10 -3
- package/src/runtime/pending-interactions.ts +0 -1
- package/src/runtime/routes/__tests__/conversation-query-routes.test.ts +106 -0
- package/src/runtime/routes/__tests__/memory-v2-simulate-route.test.ts +25 -6
- package/src/runtime/routes/__tests__/plugins-routes.test.ts +512 -0
- package/src/runtime/routes/acp-routes.test.ts +255 -6
- package/src/runtime/routes/acp-routes.ts +8 -1
- package/src/runtime/routes/avatar-routes.ts +10 -10
- package/src/runtime/routes/background-wake-routes.ts +188 -0
- package/src/runtime/routes/browser-tabs-routes.ts +200 -0
- package/src/runtime/routes/btw-routes.ts +0 -6
- package/src/runtime/routes/conversation-cli-routes.ts +1 -1
- package/src/runtime/routes/conversation-list-routes.ts +12 -4
- package/src/runtime/routes/conversation-management-routes.ts +77 -20
- package/src/runtime/routes/conversation-query-routes.ts +142 -36
- package/src/runtime/routes/conversation-routes.ts +252 -410
- package/src/runtime/routes/conversation-starter-routes.ts +6 -3
- package/src/runtime/routes/disk-pressure-routes.ts +1 -1
- package/src/runtime/routes/domain-routes.ts +60 -10
- package/src/runtime/routes/email-routes.ts +5 -2
- package/src/runtime/routes/events-routes.ts +54 -10
- package/src/runtime/routes/group-routes.ts +24 -8
- package/src/runtime/routes/host-browser-routes.ts +10 -2
- package/src/runtime/routes/host-cu-routes.ts +2 -2
- package/src/runtime/routes/inbound-stages/acl-enforcement.ts +96 -3
- package/src/runtime/routes/index.ts +8 -0
- package/src/runtime/routes/inference-profile-session-handler.ts +22 -12
- package/src/runtime/routes/inference-profile-session-routes.ts +7 -1
- package/src/runtime/routes/llm-call-sites-routes.ts +32 -5
- package/src/runtime/routes/memory-item-routes.ts +8 -3
- package/src/runtime/routes/memory-v2-routes.ts +215 -5
- package/src/runtime/routes/memory-v3-routes.ts +316 -0
- package/src/runtime/routes/migration-routes.ts +21 -24
- package/src/runtime/routes/plugins-routes.ts +337 -0
- package/src/runtime/routes/rename-conversation-routes.ts +6 -2
- package/src/runtime/routes/secret-routes.ts +25 -5
- package/src/runtime/routes/settings-routes.ts +12 -11
- package/src/runtime/routes/slack-channel-routes.ts +5 -4
- package/src/runtime/routes/workspace-routes.ts +25 -10
- package/src/runtime/sync/resource-sync-events.ts +106 -38
- package/src/runtime/sync/sync-publisher.test.ts +49 -0
- package/src/runtime/sync/sync-publisher.ts +2 -1
- package/src/runtime/verification-outbound-actions.ts +73 -1
- package/src/telemetry/types.ts +12 -0
- package/src/telemetry/usage-telemetry-reporter.test.ts +48 -0
- package/src/telemetry/usage-telemetry-reporter.ts +1 -0
- package/src/tools/acp/spawn.test.ts +119 -0
- package/src/tools/acp/spawn.ts +15 -2
- package/src/tools/apps/definitions.ts +2 -8
- package/src/tools/ask-question/ask-question-tool.test.ts +3 -3
- package/src/tools/ask-question/ask-question-tool.ts +38 -45
- package/src/tools/browser/__tests__/pinned-tabs.test.ts +70 -0
- package/src/tools/browser/browser-execution.ts +16 -3
- package/src/tools/browser/cdp-client/__tests__/browser-tabs-factory.test.ts +402 -0
- package/src/tools/browser/cdp-client/__tests__/types.test.ts +3 -0
- package/src/tools/browser/cdp-client/cdp-inspect-client.ts +12 -0
- package/src/tools/browser/cdp-client/extension-cdp-client.ts +27 -1
- package/src/tools/browser/cdp-client/factory.ts +100 -17
- package/src/tools/browser/cdp-client/local-cdp-client.ts +12 -0
- package/src/tools/browser/cdp-client/types.ts +65 -0
- package/src/tools/browser/pinned-tabs.ts +96 -40
- package/src/tools/computer-use/definitions.ts +22 -78
- package/src/tools/credential-execution/make-authenticated-request.ts +3 -9
- package/src/tools/credential-execution/manage-secure-command-tool.ts +3 -9
- package/src/tools/credential-execution/run-authenticated-command.ts +3 -9
- package/src/tools/credentials/vault.ts +3 -9
- package/src/tools/document/document-tool.ts +59 -0
- package/src/tools/execution-target.ts +21 -23
- package/src/tools/executor.ts +6 -1
- package/src/tools/filesystem/edit.ts +3 -9
- package/src/tools/filesystem/list.ts +3 -9
- package/src/tools/filesystem/read.ts +3 -9
- package/src/tools/filesystem/write.ts +3 -9
- package/src/tools/host-filesystem/edit.ts +3 -9
- package/src/tools/host-filesystem/read.ts +3 -9
- package/src/tools/host-filesystem/transfer.ts +3 -9
- package/src/tools/host-filesystem/write.ts +3 -9
- package/src/tools/host-terminal/host-shell.ts +3 -9
- package/src/tools/mcp/mcp-tool-factory.ts +1 -8
- package/src/tools/memory/register.test.ts +1 -1
- package/src/tools/memory/register.ts +4 -9
- package/src/tools/network/web-fetch.ts +3 -9
- package/src/tools/network/web-search.ts +25 -32
- package/src/tools/registry.ts +7 -23
- package/src/tools/schema-transforms.ts +1 -1
- package/src/tools/skills/execute.ts +3 -9
- package/src/tools/skills/load.ts +3 -9
- package/src/tools/skills/skill-tool-factory.ts +1 -8
- package/src/tools/subagent/notify-parent.ts +3 -9
- package/src/tools/system/request-permission.ts +3 -9
- package/src/tools/terminal/shell.ts +3 -9
- package/src/tools/tool-defaults.ts +94 -0
- package/src/tools/types.ts +27 -98
- package/src/tools/ui-surface/definitions.ts +6 -22
- package/src/usage/pricing.ts +23 -0
- package/src/usage/types.ts +12 -0
- package/src/util/logger.ts +16 -7
- package/src/util/platform.ts +7 -2
- package/src/util/sqlite3-runtime.ts +65 -0
- package/src/workspace/migrations/086-revert-stale-gemini-mis-rewrites.ts +1 -0
- package/src/workspace/migrations/089-move-memory-tree-out-of-v3.ts +86 -0
- package/src/workspace/migrations/registry.ts +2 -0
- package/src/__tests__/compaction-strip-metadata-clear.test.ts +0 -206
- package/src/__tests__/message-complete-display-id.test.ts +0 -175
- package/src/daemon/query-complexity-router.ts +0 -75
- package/src/prompts/cache-boundary.ts +0 -8
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Input reconstruction — rebuild a retriever's per-turn inputs from telemetry.
|
|
3
|
+
*
|
|
4
|
+
* The activation log stores only outputs, so replaying a historical turn means
|
|
5
|
+
* reconstructing the inputs:
|
|
6
|
+
* - `recentTurnPairs`: the (assistant, user) pairs ending at the turn's user
|
|
7
|
+
* message, windowed by `historical_pairs` and extracted exactly as
|
|
8
|
+
* production does (mirrors `extractRecentTurnPairs` in
|
|
9
|
+
* `conversation-graph-memory.ts`).
|
|
10
|
+
* - `nowText`: read from current workspace files (`loadNowText`). NOT stored
|
|
11
|
+
* in the log, so it may differ from what the live turn saw —
|
|
12
|
+
* always-approximate; see `ReconstructionMeta.nowReconstructedFromCurrent`.
|
|
13
|
+
* - `priorEverInjected`: the union of injected / in_context slugs from earlier
|
|
14
|
+
* `mode='router'` logs in the same conversation (turn < target).
|
|
15
|
+
*
|
|
16
|
+
* The anchor is the turn's assistant reply; the messages the router saw are
|
|
17
|
+
* those strictly before it, so we fetch a bounded recent window up to the
|
|
18
|
+
* anchor's timestamp and cut at the anchor row.
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
import { and, asc, desc, eq, lt, lte } from "drizzle-orm";
|
|
22
|
+
|
|
23
|
+
import type { AssistantConfig } from "../../../config/types.js";
|
|
24
|
+
import type { ContentBlock } from "../../../providers/types.js";
|
|
25
|
+
import type { DrizzleDb } from "../../db-connection.js";
|
|
26
|
+
import type { MemoryV2ConceptRowRecord } from "../../memory-v2-activation-log-store.js";
|
|
27
|
+
import { memoryV2ActivationLogs, messages } from "../../schema.js";
|
|
28
|
+
import { loadNowText } from "../now-text.js";
|
|
29
|
+
import type { RouterTurnPair } from "../router.js";
|
|
30
|
+
import type { EverInjectedEntry } from "../types.js";
|
|
31
|
+
import type { OracleTurn } from "./oracle.js";
|
|
32
|
+
import type { RetrievalInput } from "./retriever.js";
|
|
33
|
+
|
|
34
|
+
export interface ReconstructionMeta {
|
|
35
|
+
/** `historical_pairs` window requested. */
|
|
36
|
+
windowPairs: number;
|
|
37
|
+
/** Pairs actually reconstructed (may be < window near conversation start). */
|
|
38
|
+
pairsReconstructed: number;
|
|
39
|
+
/** `priorEverInjected` entries reconstructed from earlier router logs. */
|
|
40
|
+
priorEverInjectedCount: number;
|
|
41
|
+
/**
|
|
42
|
+
* NOW text is read from current workspace files — it is not stored in the
|
|
43
|
+
* log and may differ from what the live turn saw. Always true; a recall gap
|
|
44
|
+
* is partly attributable to this unmeasured drift.
|
|
45
|
+
*/
|
|
46
|
+
nowReconstructedFromCurrent: true;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export interface ReconstructedInput {
|
|
50
|
+
input: RetrievalInput;
|
|
51
|
+
meta: ReconstructionMeta;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/** Minimal message shape for pair extraction. */
|
|
55
|
+
interface PlainMessage {
|
|
56
|
+
role: string;
|
|
57
|
+
content: ContentBlock[];
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Mirror of production `extractRecentTurnPairs`: walk messages newest-first,
|
|
62
|
+
* pair each user message with the preceding assistant reply, keep the last `k`
|
|
63
|
+
* pairs (oldest first). A leading user message with no prior assistant reply is
|
|
64
|
+
* emitted with an empty `assistantMessage`.
|
|
65
|
+
*/
|
|
66
|
+
function extractRecentTurnPairs(
|
|
67
|
+
msgs: readonly PlainMessage[],
|
|
68
|
+
k: number,
|
|
69
|
+
): RouterTurnPair[] {
|
|
70
|
+
const messageText = (msg: PlainMessage): string =>
|
|
71
|
+
msg.content
|
|
72
|
+
.filter(
|
|
73
|
+
(b): b is Extract<ContentBlock, { type: "text" }> => b.type === "text",
|
|
74
|
+
)
|
|
75
|
+
.map((b) => b.text)
|
|
76
|
+
.join(" ");
|
|
77
|
+
|
|
78
|
+
const pairs: RouterTurnPair[] = [];
|
|
79
|
+
let pendingUser: string | null = null;
|
|
80
|
+
for (let i = msgs.length - 1; i >= 0 && pairs.length < k; i--) {
|
|
81
|
+
const msg = msgs[i]!;
|
|
82
|
+
if (msg.role === "user" && pendingUser === null) {
|
|
83
|
+
pendingUser = messageText(msg);
|
|
84
|
+
} else if (msg.role === "assistant" && pendingUser !== null) {
|
|
85
|
+
pairs.unshift({
|
|
86
|
+
assistantMessage: messageText(msg),
|
|
87
|
+
userMessage: pendingUser,
|
|
88
|
+
});
|
|
89
|
+
pendingUser = null;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
if (pendingUser !== null && pairs.length < k) {
|
|
93
|
+
pairs.unshift({ assistantMessage: "", userMessage: pendingUser });
|
|
94
|
+
}
|
|
95
|
+
if (pairs.length === 0) {
|
|
96
|
+
pairs.push({ assistantMessage: "", userMessage: "" });
|
|
97
|
+
}
|
|
98
|
+
return pairs;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
function parseContent(raw: string): ContentBlock[] {
|
|
102
|
+
try {
|
|
103
|
+
const parsed = JSON.parse(raw);
|
|
104
|
+
return Array.isArray(parsed) ? (parsed as ContentBlock[]) : [];
|
|
105
|
+
} catch {
|
|
106
|
+
return [];
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
export async function reconstructInput(
|
|
111
|
+
db: DrizzleDb,
|
|
112
|
+
turn: OracleTurn,
|
|
113
|
+
config: AssistantConfig,
|
|
114
|
+
workspaceDir: string,
|
|
115
|
+
): Promise<ReconstructedInput | null> {
|
|
116
|
+
const windowPairs = config.memory.v2.router.historical_pairs;
|
|
117
|
+
|
|
118
|
+
// Fetch a bounded recent window up to the anchor's timestamp (newest first),
|
|
119
|
+
// then cut everything at/after the anchor reply. We only need the last
|
|
120
|
+
// `windowPairs` (assistant,user) pairs, so a small generous bound suffices
|
|
121
|
+
// even for very long conversations.
|
|
122
|
+
const fetchWindow = Math.max(20, windowPairs * 12);
|
|
123
|
+
const recent = db
|
|
124
|
+
.select({
|
|
125
|
+
id: messages.id,
|
|
126
|
+
role: messages.role,
|
|
127
|
+
content: messages.content,
|
|
128
|
+
})
|
|
129
|
+
.from(messages)
|
|
130
|
+
.where(
|
|
131
|
+
and(
|
|
132
|
+
eq(messages.conversationId, turn.conversationId),
|
|
133
|
+
lte(messages.createdAt, turn.anchorCreatedAt),
|
|
134
|
+
),
|
|
135
|
+
)
|
|
136
|
+
.orderBy(desc(messages.createdAt), desc(messages.id))
|
|
137
|
+
.limit(fetchWindow)
|
|
138
|
+
.all();
|
|
139
|
+
|
|
140
|
+
const anchorPos = recent.findIndex((m) => m.id === turn.anchorMessageId);
|
|
141
|
+
if (anchorPos < 0) return null;
|
|
142
|
+
const beforeAnchor = recent.slice(anchorPos + 1);
|
|
143
|
+
if (beforeAnchor.length === 0) return null;
|
|
144
|
+
|
|
145
|
+
const plain: PlainMessage[] = beforeAnchor
|
|
146
|
+
.slice()
|
|
147
|
+
.reverse()
|
|
148
|
+
.map((m) => ({ role: m.role, content: parseContent(m.content) }));
|
|
149
|
+
|
|
150
|
+
const recentTurnPairs = extractRecentTurnPairs(plain, windowPairs);
|
|
151
|
+
const priorEverInjected = reconstructPriorEverInjected(
|
|
152
|
+
db,
|
|
153
|
+
turn.conversationId,
|
|
154
|
+
turn.turn,
|
|
155
|
+
);
|
|
156
|
+
const nowText = await loadNowText(workspaceDir);
|
|
157
|
+
|
|
158
|
+
return {
|
|
159
|
+
input: {
|
|
160
|
+
workspaceDir,
|
|
161
|
+
recentTurnPairs,
|
|
162
|
+
nowText,
|
|
163
|
+
priorEverInjected,
|
|
164
|
+
config,
|
|
165
|
+
},
|
|
166
|
+
meta: {
|
|
167
|
+
windowPairs,
|
|
168
|
+
pairsReconstructed: recentTurnPairs.length,
|
|
169
|
+
priorEverInjectedCount: priorEverInjected.length,
|
|
170
|
+
nowReconstructedFromCurrent: true,
|
|
171
|
+
},
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
const PRIOR_STATUSES = new Set<string>(["injected", "in_context"]);
|
|
176
|
+
|
|
177
|
+
/**
|
|
178
|
+
* Union of slugs injected on earlier `mode='router'` turns in this conversation
|
|
179
|
+
* (turn < `currentTurn`), each tagged with the earliest turn it appeared on —
|
|
180
|
+
* the harness analogue of the running `everInjected` list production maintains.
|
|
181
|
+
*/
|
|
182
|
+
function reconstructPriorEverInjected(
|
|
183
|
+
db: DrizzleDb,
|
|
184
|
+
conversationId: string,
|
|
185
|
+
currentTurn: number,
|
|
186
|
+
): EverInjectedEntry[] {
|
|
187
|
+
const rows = db
|
|
188
|
+
.select({
|
|
189
|
+
turn: memoryV2ActivationLogs.turn,
|
|
190
|
+
conceptsJson: memoryV2ActivationLogs.conceptsJson,
|
|
191
|
+
})
|
|
192
|
+
.from(memoryV2ActivationLogs)
|
|
193
|
+
.where(
|
|
194
|
+
and(
|
|
195
|
+
eq(memoryV2ActivationLogs.conversationId, conversationId),
|
|
196
|
+
eq(memoryV2ActivationLogs.mode, "router"),
|
|
197
|
+
lt(memoryV2ActivationLogs.turn, currentTurn),
|
|
198
|
+
),
|
|
199
|
+
)
|
|
200
|
+
.orderBy(asc(memoryV2ActivationLogs.turn))
|
|
201
|
+
.all();
|
|
202
|
+
|
|
203
|
+
const firstTurnBySlug = new Map<string, number>();
|
|
204
|
+
for (const row of rows) {
|
|
205
|
+
let concepts: MemoryV2ConceptRowRecord[];
|
|
206
|
+
try {
|
|
207
|
+
concepts = JSON.parse(row.conceptsJson) as MemoryV2ConceptRowRecord[];
|
|
208
|
+
} catch {
|
|
209
|
+
continue;
|
|
210
|
+
}
|
|
211
|
+
for (const concept of concepts) {
|
|
212
|
+
if (!PRIOR_STATUSES.has(concept.status)) continue;
|
|
213
|
+
if (!firstTurnBySlug.has(concept.slug)) {
|
|
214
|
+
firstTurnBySlug.set(concept.slug, row.turn);
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
const entries: EverInjectedEntry[] = [];
|
|
220
|
+
firstTurnBySlug.forEach((turn, slug) => {
|
|
221
|
+
entries.push({ slug, turn });
|
|
222
|
+
});
|
|
223
|
+
return entries;
|
|
224
|
+
}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* The retriever seam for the memory comparison harness.
|
|
3
|
+
*
|
|
4
|
+
* A `Retriever` maps one turn's reconstructed context to a set of selected
|
|
5
|
+
* concept-page slugs. Multiple strategies (the production router, an
|
|
6
|
+
* alternative retrieval loop) implement this single interface, so the harness
|
|
7
|
+
* can run them over the same turns and diff their selections against the oracle
|
|
8
|
+
* (see `oracle.ts`). Offline only — nothing here runs in the live injection
|
|
9
|
+
* path.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import type { AssistantConfig } from "../../../config/types.js";
|
|
13
|
+
import type { RouterTurnPair } from "../router.js";
|
|
14
|
+
import type { EverInjectedEntry } from "../types.js";
|
|
15
|
+
import type { DescentTrace } from "./trace.js";
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Per-turn context a retriever needs, mirroring the live router's inputs
|
|
19
|
+
* (`RunRouterParams`). Reconstructed from historical telemetry by
|
|
20
|
+
* `reconstructInput` (see `replay-input.ts`).
|
|
21
|
+
*/
|
|
22
|
+
export interface RetrievalInput {
|
|
23
|
+
workspaceDir: string;
|
|
24
|
+
/**
|
|
25
|
+
* Recent (assistant, user) pairs, oldest first. The last entry's
|
|
26
|
+
* `userMessage` is the just-arrived turn being routed.
|
|
27
|
+
*/
|
|
28
|
+
recentTurnPairs: readonly RouterTurnPair[];
|
|
29
|
+
/** NOW context (essentials/threads/recent), verbatim. */
|
|
30
|
+
nowText: string;
|
|
31
|
+
/** Slugs already injected on prior turns. */
|
|
32
|
+
priorEverInjected: readonly EverInjectedEntry[];
|
|
33
|
+
config: AssistantConfig;
|
|
34
|
+
signal?: AbortSignal;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/** Optional cost accounting for a single retrieval. */
|
|
38
|
+
export interface RetrievalCost {
|
|
39
|
+
inputTokens?: number;
|
|
40
|
+
outputTokens?: number;
|
|
41
|
+
usd?: number;
|
|
42
|
+
ms?: number;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/** What a retriever returns for one turn. */
|
|
46
|
+
export interface RetrievalOutput {
|
|
47
|
+
/** Selected page slugs, in the retriever's own ranked order. */
|
|
48
|
+
selectedSlugs: string[];
|
|
49
|
+
/**
|
|
50
|
+
* Per-slug provenance / lane label, retriever-defined — router tiers
|
|
51
|
+
* (`tier1`, `tier3:0`, …) for the current router, or loop lanes (`sparse`,
|
|
52
|
+
* `dense`, `tree`, `edge`) for the future loop. Drives per-lane attribution
|
|
53
|
+
* in `metrics.ts`.
|
|
54
|
+
*/
|
|
55
|
+
sourceBySlug: ReadonlyMap<string, string>;
|
|
56
|
+
/**
|
|
57
|
+
* Loop-only descent trace. Tier-based retrievers (the current router) have
|
|
58
|
+
* no tree walk and leave this `undefined`; renderers show "(no descent
|
|
59
|
+
* trace)".
|
|
60
|
+
*/
|
|
61
|
+
trace?: DescentTrace;
|
|
62
|
+
cost?: RetrievalCost;
|
|
63
|
+
/** Non-null when the retriever could not produce a usable selection. */
|
|
64
|
+
failureReason?: string | null;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* A named retrieval strategy. Implementations must not mutate production state
|
|
69
|
+
* — the harness runs them offline over historical turns.
|
|
70
|
+
*/
|
|
71
|
+
export interface Retriever {
|
|
72
|
+
readonly name: string;
|
|
73
|
+
retrieve(input: RetrievalInput): Promise<RetrievalOutput>;
|
|
74
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Router retriever — the current production router (`runRouter`) adapted to the
|
|
3
|
+
* harness `Retriever` interface.
|
|
4
|
+
*
|
|
5
|
+
* The union cap is left ON (no `disableUnionCap`) so the selection matches what
|
|
6
|
+
* production would actually inject — the self-test grades the router against
|
|
7
|
+
* its own injected ground truth.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import type { DrizzleDb } from "../../db-connection.js";
|
|
11
|
+
import { runRouter } from "../router.js";
|
|
12
|
+
import type {
|
|
13
|
+
RetrievalInput,
|
|
14
|
+
RetrievalOutput,
|
|
15
|
+
Retriever,
|
|
16
|
+
} from "./retriever.js";
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* @param database optional handle for tier-2 EMA scoring, forwarded to
|
|
20
|
+
* `runRouter`. Omit to exercise only the tier-1 / tier-3 paths (as the router's
|
|
21
|
+
* own tests do).
|
|
22
|
+
*/
|
|
23
|
+
export function createRouterRetriever(database?: DrizzleDb): Retriever {
|
|
24
|
+
return {
|
|
25
|
+
name: "router",
|
|
26
|
+
async retrieve(input: RetrievalInput): Promise<RetrievalOutput> {
|
|
27
|
+
const result = await runRouter({
|
|
28
|
+
workspaceDir: input.workspaceDir,
|
|
29
|
+
recentTurnPairs: input.recentTurnPairs,
|
|
30
|
+
nowText: input.nowText,
|
|
31
|
+
priorEverInjected: input.priorEverInjected,
|
|
32
|
+
config: input.config,
|
|
33
|
+
...(input.signal ? { signal: input.signal } : {}),
|
|
34
|
+
...(database ? { database } : {}),
|
|
35
|
+
});
|
|
36
|
+
return {
|
|
37
|
+
selectedSlugs: result.selectedSlugs,
|
|
38
|
+
sourceBySlug: result.sourceBySlug,
|
|
39
|
+
failureReason: result.failureReason,
|
|
40
|
+
};
|
|
41
|
+
},
|
|
42
|
+
};
|
|
43
|
+
}
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Comparison runner — execute N retrievers over a set of oracle turns and score
|
|
3
|
+
* each against ground truth.
|
|
4
|
+
*
|
|
5
|
+
* The runner is DB/workspace-agnostic: input reconstruction is injected as a
|
|
6
|
+
* function, so it can be unit-tested with stubs and the route/CLI can wire in
|
|
7
|
+
* the real `reconstructInput` (which needs a DB + workspace).
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import {
|
|
11
|
+
aggregate,
|
|
12
|
+
type AggregateEval,
|
|
13
|
+
evalTurn,
|
|
14
|
+
type TurnEval,
|
|
15
|
+
} from "./metrics.js";
|
|
16
|
+
import type { OracleTurn } from "./oracle.js";
|
|
17
|
+
import type { ReconstructedInput } from "./replay-input.js";
|
|
18
|
+
import type { Retriever } from "./retriever.js";
|
|
19
|
+
|
|
20
|
+
export interface ComparisonTurnResult {
|
|
21
|
+
conversationId: string;
|
|
22
|
+
turn: number;
|
|
23
|
+
/** Per-retriever evaluation for this turn, keyed by retriever name. */
|
|
24
|
+
byRetriever: Record<string, TurnEval>;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export interface RetrieverReport {
|
|
28
|
+
name: string;
|
|
29
|
+
aggregate: AggregateEval;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export interface ComparisonReport {
|
|
33
|
+
ks: number[];
|
|
34
|
+
/** Oracle turns handed to the runner. */
|
|
35
|
+
turnsConsidered: number;
|
|
36
|
+
/** Turns actually scored (reconstruction succeeded). */
|
|
37
|
+
turnsScored: number;
|
|
38
|
+
/** Turns skipped because input reconstruction returned null. */
|
|
39
|
+
turnsSkipped: number;
|
|
40
|
+
perTurn: ComparisonTurnResult[];
|
|
41
|
+
retrievers: RetrieverReport[];
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export interface RunComparisonParams {
|
|
45
|
+
retrievers: readonly Retriever[];
|
|
46
|
+
oracleTurns: readonly OracleTurn[];
|
|
47
|
+
/** Reconstruct a turn's retriever input; return null to skip the turn. */
|
|
48
|
+
reconstruct: (turn: OracleTurn) => Promise<ReconstructedInput | null>;
|
|
49
|
+
ks: readonly number[];
|
|
50
|
+
signal?: AbortSignal;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export async function runComparison(
|
|
54
|
+
params: RunComparisonParams,
|
|
55
|
+
): Promise<ComparisonReport> {
|
|
56
|
+
const { retrievers, oracleTurns, reconstruct, ks, signal } = params;
|
|
57
|
+
|
|
58
|
+
const perTurn: ComparisonTurnResult[] = [];
|
|
59
|
+
const perRetrieverEvals = new Map<string, TurnEval[]>();
|
|
60
|
+
for (const retriever of retrievers) {
|
|
61
|
+
perRetrieverEvals.set(retriever.name, []);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
let turnsScored = 0;
|
|
65
|
+
let turnsSkipped = 0;
|
|
66
|
+
|
|
67
|
+
for (const turn of oracleTurns) {
|
|
68
|
+
if (signal?.aborted) break;
|
|
69
|
+
|
|
70
|
+
const reconstructed = await reconstruct(turn);
|
|
71
|
+
if (!reconstructed) {
|
|
72
|
+
turnsSkipped++;
|
|
73
|
+
continue;
|
|
74
|
+
}
|
|
75
|
+
turnsScored++;
|
|
76
|
+
|
|
77
|
+
const byRetriever: Record<string, TurnEval> = {};
|
|
78
|
+
for (const retriever of retrievers) {
|
|
79
|
+
if (signal?.aborted) break;
|
|
80
|
+
const output = await retriever.retrieve(reconstructed.input);
|
|
81
|
+
const turnEval = evalTurn(output, turn.groundTruthSlugs, ks);
|
|
82
|
+
byRetriever[retriever.name] = turnEval;
|
|
83
|
+
perRetrieverEvals.get(retriever.name)?.push(turnEval);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
perTurn.push({
|
|
87
|
+
conversationId: turn.conversationId,
|
|
88
|
+
turn: turn.turn,
|
|
89
|
+
byRetriever,
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
const retrieverReports: RetrieverReport[] = retrievers.map((retriever) => ({
|
|
94
|
+
name: retriever.name,
|
|
95
|
+
aggregate: aggregate(perRetrieverEvals.get(retriever.name) ?? [], ks),
|
|
96
|
+
}));
|
|
97
|
+
|
|
98
|
+
return {
|
|
99
|
+
ks: [...ks],
|
|
100
|
+
turnsConsidered: oracleTurns.length,
|
|
101
|
+
turnsScored,
|
|
102
|
+
turnsSkipped,
|
|
103
|
+
perTurn,
|
|
104
|
+
retrievers: retrieverReports,
|
|
105
|
+
};
|
|
106
|
+
}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Descent-trace schema for a tree-walking retriever.
|
|
3
|
+
*
|
|
4
|
+
* Defined ahead of its producer: the comparison harness renders this and a
|
|
5
|
+
* tree-walking retriever emits it; a tier-based retriever (no tree walk) leaves
|
|
6
|
+
* `RetrievalOutput.trace` undefined. Per level it records which branches were
|
|
7
|
+
* considered, descended, and skipped plus the model's reasoning, so a wrong
|
|
8
|
+
* high-level skip is observable rather than silent.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import type { RetrievalCost } from "./retriever.js";
|
|
12
|
+
|
|
13
|
+
/** A scout lane's contribution on one pass. */
|
|
14
|
+
export interface ScoutResult {
|
|
15
|
+
lane: "hot" | "sparse" | "dense";
|
|
16
|
+
slugs: string[];
|
|
17
|
+
/** Optional per-slug score (BM25 / cosine / EMA) for inspection. */
|
|
18
|
+
scoreBySlug?: Record<string, number>;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/** One level of the tree walk: what was considered, descended, and skipped. */
|
|
22
|
+
export interface TreeLevel {
|
|
23
|
+
/** Node whose index page was read ("" for root, else a branch path). */
|
|
24
|
+
node: string;
|
|
25
|
+
considered: string[];
|
|
26
|
+
descended: string[];
|
|
27
|
+
skipped: string[];
|
|
28
|
+
/** The model's stated reason for the descend/skip split at this node. */
|
|
29
|
+
reasoning: string;
|
|
30
|
+
cost?: RetrievalCost;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/** A 1–2 hop walk along the curated `edges:` graph from a seed page. */
|
|
34
|
+
export interface EdgeExpansion {
|
|
35
|
+
from: string;
|
|
36
|
+
pulled: string[];
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/** The gate's decision at the end of a pass. */
|
|
40
|
+
export interface GateDecision {
|
|
41
|
+
decision: "ready" | "more";
|
|
42
|
+
/** When "more", the generated follow-up queries seeding the next pass. */
|
|
43
|
+
questions?: string[];
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/** Everything that happened on one pass of the loop. */
|
|
47
|
+
export interface DescentPass {
|
|
48
|
+
passNumber: number;
|
|
49
|
+
scouts?: ScoutResult[];
|
|
50
|
+
treeLevels?: TreeLevel[];
|
|
51
|
+
edgeExpansions?: EdgeExpansion[];
|
|
52
|
+
gate?: GateDecision;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/** A full loop execution, pass by pass. */
|
|
56
|
+
export interface DescentTrace {
|
|
57
|
+
passes: DescentPass[];
|
|
58
|
+
}
|
|
@@ -45,7 +45,7 @@ import {
|
|
|
45
45
|
import { getEdgeIndex } from "./edge-index.js";
|
|
46
46
|
import { recordInjectionEvents } from "./injection-events.js";
|
|
47
47
|
import { readPage, renderPageContent } from "./page-store.js";
|
|
48
|
-
import { runRouter } from "./router.js";
|
|
48
|
+
import { type RouterTurnPair, runRouter } from "./router.js";
|
|
49
49
|
import { getSkillCapability, isSkillSlug } from "./skill-store.js";
|
|
50
50
|
import type { ActivationState, EverInjectedEntry } from "./types.js";
|
|
51
51
|
|
|
@@ -81,10 +81,13 @@ export interface InjectMemoryV2BlockParams {
|
|
|
81
81
|
conversationId: string;
|
|
82
82
|
/** Caller-tracked turn number, persisted with each new everInjected entry. */
|
|
83
83
|
currentTurn: number;
|
|
84
|
-
/**
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
84
|
+
/**
|
|
85
|
+
* Recent assistant/user turn pairs, oldest first. Must contain at least
|
|
86
|
+
* one entry whose `userMessage` is the just-arrived turn that triggered
|
|
87
|
+
* this call. The number of pairs the production caller passes is
|
|
88
|
+
* controlled by `memory.v2.router.historical_pairs`.
|
|
89
|
+
*/
|
|
90
|
+
recentTurnPairs: readonly RouterTurnPair[];
|
|
88
91
|
/** NOW context (autoloaded essentials/threads/recent or NOW.md). */
|
|
89
92
|
nowText: string;
|
|
90
93
|
/** Resolved messageId to persist on the activation_state row. */
|
|
@@ -140,14 +143,21 @@ export async function injectMemoryV2Block(
|
|
|
140
143
|
database,
|
|
141
144
|
conversationId,
|
|
142
145
|
currentTurn,
|
|
143
|
-
|
|
144
|
-
assistantMessage,
|
|
146
|
+
recentTurnPairs,
|
|
145
147
|
nowText,
|
|
146
148
|
messageId,
|
|
147
149
|
config,
|
|
148
150
|
signal,
|
|
149
151
|
} = params;
|
|
150
152
|
|
|
153
|
+
// The spreading-activation fallback (only used when the router is off)
|
|
154
|
+
// still needs the most recent (assistant, user) pair for semantic
|
|
155
|
+
// scoring. Pulling these from the last pair preserves bit-identical
|
|
156
|
+
// K=1 behavior — the router-off path never benefits from extra pairs.
|
|
157
|
+
const lastPair = recentTurnPairs[recentTurnPairs.length - 1];
|
|
158
|
+
const userMessage = lastPair.userMessage;
|
|
159
|
+
const assistantMessage = lastPair.assistantMessage;
|
|
160
|
+
|
|
151
161
|
const workspaceDir = getWorkspaceDir();
|
|
152
162
|
const mode: InjectMemoryV2Mode = params.mode ?? "per-turn";
|
|
153
163
|
|
|
@@ -174,8 +184,7 @@ export async function injectMemoryV2Block(
|
|
|
174
184
|
database,
|
|
175
185
|
conversationId,
|
|
176
186
|
currentTurn,
|
|
177
|
-
|
|
178
|
-
assistantMessage,
|
|
187
|
+
recentTurnPairs,
|
|
179
188
|
nowText,
|
|
180
189
|
messageId,
|
|
181
190
|
config,
|
|
@@ -521,8 +530,7 @@ async function injectViaRouter(args: {
|
|
|
521
530
|
database: DrizzleDb;
|
|
522
531
|
conversationId: string;
|
|
523
532
|
currentTurn: number;
|
|
524
|
-
|
|
525
|
-
assistantMessage: string;
|
|
533
|
+
recentTurnPairs: readonly RouterTurnPair[];
|
|
526
534
|
nowText: string;
|
|
527
535
|
messageId: string;
|
|
528
536
|
config: AssistantConfig;
|
|
@@ -534,8 +542,7 @@ async function injectViaRouter(args: {
|
|
|
534
542
|
database,
|
|
535
543
|
conversationId,
|
|
536
544
|
currentTurn,
|
|
537
|
-
|
|
538
|
-
assistantMessage,
|
|
545
|
+
recentTurnPairs,
|
|
539
546
|
nowText,
|
|
540
547
|
messageId,
|
|
541
548
|
config,
|
|
@@ -548,8 +555,7 @@ async function injectViaRouter(args: {
|
|
|
548
555
|
|
|
549
556
|
const routerResult = await runRouter({
|
|
550
557
|
workspaceDir,
|
|
551
|
-
|
|
552
|
-
assistantMessage,
|
|
558
|
+
recentTurnPairs,
|
|
553
559
|
nowText,
|
|
554
560
|
priorEverInjected,
|
|
555
561
|
config,
|
|
@@ -56,8 +56,11 @@ const PAGE_INDEX_PLACEHOLDER = "{{PAGE_INDEX}}";
|
|
|
56
56
|
* Recent message context and `<now>` / `<already_injected_ids>` blocks are
|
|
57
57
|
* appended at the call site so we don't inadvertently expand `{{` inside
|
|
58
58
|
* dynamic content.
|
|
59
|
+
*
|
|
60
|
+
* Exported so the simulator route can return the bundled template verbatim
|
|
61
|
+
* for the playground's "Load default" affordance.
|
|
59
62
|
*/
|
|
60
|
-
const ROUTER_PROMPT = `You are a background helper for ${ASSISTANT_NAME_PLACEHOLDER}. Your job is to route memory pages for the next assistant turn between ${ASSISTANT_NAME_PLACEHOLDER} and ${USER_NAME_PLACEHOLDER}.
|
|
63
|
+
export const ROUTER_PROMPT = `You are a background helper for ${ASSISTANT_NAME_PLACEHOLDER}. Your job is to route memory pages for the next assistant turn between ${ASSISTANT_NAME_PLACEHOLDER} and ${USER_NAME_PLACEHOLDER}.
|
|
61
64
|
|
|
62
65
|
You will be shown the recent conversation, a \`<now>\` marker for the current time, an \`<already_injected_ids>\` block listing pages picked on the previous turn, and a \`# Concept Page Index\` listing every routable page on this workspace.
|
|
63
66
|
|
|
@@ -112,7 +115,29 @@ export function resolveRouterPrompt(
|
|
|
112
115
|
overridePath: string | null,
|
|
113
116
|
workspaceDir: string,
|
|
114
117
|
opts: RenderRouterPromptOpts,
|
|
118
|
+
inlineOverride?: string | null,
|
|
115
119
|
): string {
|
|
120
|
+
// Inline override (e.g. simulator playground) takes precedence over the
|
|
121
|
+
// configured file path and the bundled prompt. Same placeholder
|
|
122
|
+
// substitution + size guard as the file-path branch; empty/whitespace
|
|
123
|
+
// bodies fall through to file/bundled resolution so a "cleared" textarea
|
|
124
|
+
// is treated as no override.
|
|
125
|
+
if (inlineOverride !== undefined && inlineOverride !== null) {
|
|
126
|
+
if (inlineOverride.length > MAX_PROMPT_BYTES) {
|
|
127
|
+
log.warn(
|
|
128
|
+
{
|
|
129
|
+
size: inlineOverride.length,
|
|
130
|
+
limit: MAX_PROMPT_BYTES,
|
|
131
|
+
reason: "oversized_inline_override",
|
|
132
|
+
fallback: "path_or_bundled",
|
|
133
|
+
},
|
|
134
|
+
"inline router prompt override exceeds size limit; falling back",
|
|
135
|
+
);
|
|
136
|
+
} else if (inlineOverride.trim().length > 0) {
|
|
137
|
+
return substitutePlaceholders(inlineOverride, opts);
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
116
141
|
if (overridePath === null) return renderRouterPrompt(opts);
|
|
117
142
|
|
|
118
143
|
const resolvedPath = resolveOverridePath(overridePath, workspaceDir);
|
package/src/memory/v2/qdrant.ts
CHANGED
|
@@ -703,6 +703,10 @@ export async function dropLegacySkillsCollection(): Promise<void> {
|
|
|
703
703
|
* candidate set is already known so we don't waste hits on unrelated pages.
|
|
704
704
|
* An empty list short-circuits to no results — the caller is asking for
|
|
705
705
|
* "nothing", not "everything".
|
|
706
|
+
*
|
|
707
|
+
* An empty `dense` vector runs a sparse-only query: the dense channels are
|
|
708
|
+
* skipped rather than sent to Qdrant (a 0-dimension vector would 400). This is
|
|
709
|
+
* the dense counterpart to the `skipSparse` option.
|
|
706
710
|
*/
|
|
707
711
|
export async function hybridQueryConceptPages(
|
|
708
712
|
dense: number[],
|
|
@@ -730,6 +734,14 @@ export async function hybridQueryConceptPages(
|
|
|
730
734
|
// Qdrant 1.13.x sparse-index crash that we've reproduced in the wild.
|
|
731
735
|
const skipSparse = options?.skipSparse ?? false;
|
|
732
736
|
|
|
737
|
+
// An empty dense query vector means "sparse-only": skip the dense channels
|
|
738
|
+
// instead of sending a 0-dimension vector to Qdrant (which rejects it with a
|
|
739
|
+
// "Vector dimension error: expected dim N, got 0" 400). Symmetric to
|
|
740
|
+
// skipSparse — the fuser treats a missing dense score as a 0 contribution, so
|
|
741
|
+
// omitting the dense query is equivalent to weighting it to zero. Callers like
|
|
742
|
+
// the v3 sparse scout lane rely on this to run a BM25-only query.
|
|
743
|
+
const skipDense = dense.length === 0;
|
|
744
|
+
|
|
733
745
|
const queryDense = (using: string) =>
|
|
734
746
|
client.query(MEMORY_V2_COLLECTION, {
|
|
735
747
|
query: dense,
|
|
@@ -756,9 +768,9 @@ export async function hybridQueryConceptPages(
|
|
|
756
768
|
};
|
|
757
769
|
const runQueries = async () =>
|
|
758
770
|
Promise.all([
|
|
759
|
-
queryDense("dense"),
|
|
771
|
+
skipDense ? emptyResult : queryDense("dense"),
|
|
760
772
|
skipSparse ? emptyResult : querySparse("sparse"),
|
|
761
|
-
queryDense("summary_dense"),
|
|
773
|
+
skipDense ? emptyResult : queryDense("summary_dense"),
|
|
762
774
|
skipSparse ? emptyResult : querySparse("summary_sparse"),
|
|
763
775
|
]);
|
|
764
776
|
|