@vellumai/assistant 0.8.3 → 0.8.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +2 -2
- package/docker-entrypoint.sh +0 -1
- package/docs/browser-use-architecture-phase2.md +1 -1
- package/knip.json +2 -1
- package/node_modules/@vellumai/gateway-client/src/types.ts +2 -0
- package/openapi.yaml +1492 -100
- package/package.json +1 -1
- package/src/__tests__/agent-loop-exit-reason.test.ts +4 -5
- package/src/__tests__/agent-loop-override-profile.test.ts +1 -1
- package/src/__tests__/agent-loop.test.ts +88 -3
- package/src/__tests__/anthropic-provider.test.ts +302 -33
- package/src/__tests__/approval-cascade.test.ts +1 -1
- package/src/__tests__/assistant-event-hub-self-exclusion.test.ts +293 -0
- package/src/__tests__/assistant-feature-flags-integration.test.ts +3 -3
- package/src/__tests__/audit-log-rotation.test.ts +70 -16
- package/src/__tests__/background-workers-disk-pressure.test.ts +4 -3
- package/src/__tests__/btw-routes.test.ts +2 -3
- package/src/__tests__/call-controller.test.ts +0 -1
- package/src/__tests__/cancel-resolves-conversation-key.test.ts +1 -1
- package/src/__tests__/channel-delivery-store.test.ts +193 -0
- package/src/__tests__/channel-guardian.test.ts +3 -3
- package/src/__tests__/channel-reply-delivery.test.ts +284 -5
- package/src/__tests__/channel-retry-sweep.test.ts +274 -1
- package/src/__tests__/checker.test.ts +6 -15
- package/src/__tests__/compaction-events.test.ts +2 -1
- package/src/__tests__/compactor-call-site-logging.test.ts +214 -0
- package/src/__tests__/compactor-preserved-tail-count.test.ts +110 -0
- package/src/__tests__/computer-use-skill-manifest-regression.test.ts +5 -11
- package/src/__tests__/computer-use-tools.test.ts +2 -4
- package/src/__tests__/config-watcher.test.ts +1 -1
- package/src/__tests__/confirmation-request-guardian-bridge.test.ts +0 -1
- package/src/__tests__/context-token-estimator.test.ts +91 -1
- package/src/__tests__/conversation-abort-tool-results.test.ts +1 -1
- package/src/__tests__/conversation-agent-loop-disk-pressure.test.ts +1 -1
- package/src/__tests__/conversation-agent-loop-inference-profile.test.ts +55 -4
- package/src/__tests__/conversation-agent-loop-overflow.test.ts +228 -8
- package/src/__tests__/conversation-agent-loop.test.ts +188 -129
- package/src/__tests__/conversation-app-control-instantiation.test.ts +2 -5
- package/src/__tests__/conversation-app-control-lifecycle.test.ts +1 -1
- package/src/__tests__/conversation-clean-command.test.ts +137 -0
- package/src/__tests__/conversation-clear-safety.test.ts +25 -25
- package/src/__tests__/conversation-confirmation-signals.test.ts +1 -1
- package/src/__tests__/conversation-delete-schedule-cleanup.test.ts +1 -1
- package/src/__tests__/conversation-disk-view-integration.test.ts +2 -2
- package/src/__tests__/conversation-error.test.ts +31 -0
- package/src/__tests__/conversation-fork-crud.test.ts +324 -0
- package/src/__tests__/conversation-lifecycle.test.ts +53 -12
- package/src/__tests__/conversation-load-history-repair.test.ts +1 -1
- package/src/__tests__/conversation-load-history-stripped.test.ts +279 -0
- package/src/__tests__/conversation-pairing.test.ts +2 -2
- package/src/__tests__/conversation-process-callsite.test.ts +1 -1
- package/src/__tests__/conversation-provider-retry-repair.test.ts +2 -1
- package/src/__tests__/conversation-queue.test.ts +1 -1
- package/src/__tests__/conversation-routes-disk-view.test.ts +109 -0
- package/src/__tests__/conversation-routes-slash-commands.test.ts +35 -0
- package/src/__tests__/conversation-runtime-assembly.test.ts +264 -81
- package/src/__tests__/conversation-seed-composer.test.ts +66 -4
- package/src/__tests__/conversation-skill-tools.test.ts +2 -5
- package/src/__tests__/conversation-slash-commands.test.ts +36 -8
- package/src/__tests__/conversation-slash-queue.test.ts +1 -1
- package/src/__tests__/conversation-slash-unknown.test.ts +1 -1
- package/src/__tests__/conversation-speed-override.test.ts +1 -1
- package/src/__tests__/conversation-store.test.ts +1 -1
- package/src/__tests__/conversation-surfaces-task-progress.test.ts +220 -0
- package/src/__tests__/conversation-sync-tags.test.ts +99 -32
- package/src/__tests__/conversation-workspace-cache-state.test.ts +2 -1
- package/src/__tests__/conversation-workspace-injection.test.ts +5 -1
- package/src/__tests__/conversation-workspace-tool-tracking.test.ts +5 -1
- package/src/__tests__/credential-execution-feature-gates.test.ts +9 -7
- package/src/__tests__/credential-execution-tools.test.ts +6 -6
- package/src/__tests__/credential-security-invariants.test.ts +7 -0
- package/src/__tests__/credential-vault-unit.test.ts +2 -2
- package/src/__tests__/cu-unified-flow.test.ts +10 -1
- package/src/__tests__/dm-backfill.test.ts +64 -0
- package/src/__tests__/dm-persistence.test.ts +33 -0
- package/src/__tests__/document-find-replace.test.ts +501 -0
- package/src/__tests__/dynamic-page-surface.test.ts +2 -2
- package/src/__tests__/email-html-renderer.test.ts +12 -0
- package/src/__tests__/first-greeting.test.ts +23 -2
- package/src/__tests__/gateway-flag-listener.test.ts +237 -0
- package/src/__tests__/gemini-provider.test.ts +78 -0
- package/src/__tests__/guardian-dispatch.test.ts +0 -1
- package/src/__tests__/guardian-outbound-http.test.ts +7 -5
- package/src/__tests__/handlers-user-message-approval-consumption.test.ts +1 -1
- package/src/__tests__/headless-browser-navigate.test.ts +172 -0
- package/src/__tests__/heartbeat-disk-pressure.test.ts +4 -0
- package/src/__tests__/heartbeat-service.test.ts +4 -0
- package/src/__tests__/host-bash-proxy.test.ts +6 -0
- package/src/__tests__/host-browser-proxy.test.ts +10 -0
- package/src/__tests__/host-cu-proxy.test.ts +8 -1
- package/src/__tests__/host-file-proxy.test.ts +8 -1
- package/src/__tests__/host-shell-tool.test.ts +1 -1
- package/src/__tests__/host-transfer-proxy.test.ts +8 -1
- package/src/__tests__/identity-routes.test.ts +57 -0
- package/src/__tests__/inbound-slack-persistence.test.ts +3 -0
- package/src/__tests__/init-feature-flag-overrides.test.ts +5 -6
- package/src/__tests__/injector-chain.test.ts +2 -0
- package/src/__tests__/injector-document-comments.test.ts +378 -0
- package/src/__tests__/injector-pkb-v2-silenced.test.ts +4 -25
- package/src/__tests__/list-messages-attachments.test.ts +21 -17
- package/src/__tests__/list-messages-hidden-metadata.test.ts +217 -0
- package/src/__tests__/list-messages-page-latest.test.ts +130 -14
- package/src/__tests__/list-messages-tool-merge.test.ts +77 -17
- package/src/__tests__/llm-context-normalization.test.ts +0 -2
- package/src/__tests__/llm-request-log-call-site.test.ts +136 -0
- package/src/__tests__/llm-request-log-source-clickhouse.test.ts +26 -0
- package/src/__tests__/llm-resolver.test.ts +161 -9
- package/src/__tests__/llm-usage-store.test.ts +66 -0
- package/src/__tests__/log-export-routes.test.ts +99 -2
- package/src/__tests__/logger.test.ts +89 -0
- package/src/__tests__/mcp-abort-signal.test.ts +2 -2
- package/src/__tests__/media-generate-image.test.ts +31 -0
- package/src/__tests__/memory-v2-static-injector.test.ts +7 -7
- package/src/__tests__/message-queue-steer.test.ts +114 -0
- package/src/__tests__/model-intents.test.ts +2 -4
- package/src/__tests__/notification-guardian-path.test.ts +0 -1
- package/src/__tests__/onboarding-template-contract.test.ts +1 -1
- package/src/__tests__/openai-provider.test.ts +151 -0
- package/src/__tests__/openai-responses-provider.test.ts +118 -16
- package/src/__tests__/outbound-slack-persistence.test.ts +187 -20
- package/src/__tests__/pending-interactions-resolved-event.test.ts +189 -0
- package/src/__tests__/platform-bash-auto-approve.test.ts +2 -2
- package/src/__tests__/platform.test.ts +2 -5
- package/src/__tests__/plugin-api-tool-definition.test.ts +92 -0
- package/src/__tests__/plugin-bootstrap.test.ts +2 -2
- package/src/__tests__/plugin-source-watcher.test.ts +302 -0
- package/src/__tests__/plugin-tool-contribution.test.ts +13 -6
- package/src/__tests__/plugin-types.test.ts +3 -2
- package/src/__tests__/prechat-onboarding-contract.test.ts +131 -98
- package/src/__tests__/pricing.test.ts +12 -0
- package/src/__tests__/process-message-background-slack.test.ts +1 -51
- package/src/__tests__/process-message-display-content.test.ts +21 -16
- package/src/__tests__/prune-jobs-changes-parser.test.ts +61 -0
- package/src/__tests__/registry.test.ts +2 -8
- package/src/__tests__/require-fresh-approval.test.ts +2 -2
- package/src/__tests__/runtime-events-sse-bilingual.test.ts +154 -0
- package/src/__tests__/server-history-render.test.ts +83 -4
- package/src/__tests__/shell-tool-proxy-mode.test.ts +1 -1
- package/src/__tests__/skill-feature-flags.test.ts +2 -2
- package/src/__tests__/skill-projection-feature-flag.test.ts +4 -7
- package/src/__tests__/skill-projection.benchmark.test.ts +2 -6
- package/src/__tests__/skill-tool-factory.test.ts +1 -1
- package/src/__tests__/steer-tool-repair.test.ts +249 -0
- package/src/__tests__/subagent-notify-parent.test.ts +1 -1
- package/src/__tests__/suggestion-routes.test.ts +1 -0
- package/src/__tests__/sync-message-contract.test.ts +59 -0
- package/src/__tests__/system-prompt.test.ts +161 -124
- package/src/__tests__/terminal-tools.test.ts +12 -2
- package/src/__tests__/thinking-block-replay.test.ts +113 -0
- package/src/__tests__/thread-backfill.test.ts +370 -22
- package/src/__tests__/tool-approval-handler.test.ts +1 -5
- package/src/__tests__/tool-execute-pipeline.test.ts +2 -2
- package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +2 -5
- package/src/__tests__/tool-executor-lifecycle-events.test.ts +15 -5
- package/src/__tests__/tool-executor.test.ts +89 -53
- package/src/__tests__/tool-grant-request-escalation.test.ts +1 -6
- package/src/__tests__/tool-result-metadata-plumbing.test.ts +167 -0
- package/src/__tests__/trusted-contact-approval-notifier.test.ts +0 -1
- package/src/__tests__/trusted-contact-inline-approval-integration.test.ts +1 -6
- package/src/__tests__/trusted-contact-multichannel.test.ts +0 -1
- package/src/__tests__/twilio-routes.test.ts +1 -1
- package/src/__tests__/ui-file-upload-surface.test.ts +2 -2
- package/src/__tests__/usage-routes.test.ts +3 -0
- package/src/__tests__/verification-control-plane-policy.test.ts +2 -2
- package/src/__tests__/web-fetch.test.ts +2 -2
- package/src/__tests__/workspace-git-service.test.ts +94 -10
- package/src/__tests__/workspace-migration-088-deprecate-background-conversation-override.test.ts +158 -0
- package/src/__tests__/workspace-migration-089-move-memory-tree-out-of-v3.test.ts +86 -0
- package/src/acp/__tests__/prepare-agent-env.test.ts +146 -0
- package/src/acp/prepare-agent-env.ts +78 -0
- package/src/acp/session-manager.ts +1 -1
- package/src/agent/attachments.ts +1 -0
- package/src/agent/loop.ts +65 -20
- package/src/api/README.md +5 -0
- package/src/api/index.ts +4 -0
- package/src/api/package.json +10 -0
- package/src/background-wake/background-wake-routes.test.ts +233 -0
- package/src/background-wake/next-wake.test.ts +289 -0
- package/src/background-wake/next-wake.ts +172 -0
- package/src/background-wake/runtime-registry.ts +24 -0
- package/src/browser/operations.ts +15 -0
- package/src/cli/commands/__tests__/browser.test.ts +23 -5
- package/src/cli/commands/__tests__/conversations-slack.test.ts +572 -0
- package/src/cli/commands/__tests__/domain-register.test.ts +110 -0
- package/src/cli/commands/__tests__/domain-status.test.ts +33 -33
- package/src/cli/commands/__tests__/inference-send.test.ts +108 -5
- package/src/cli/commands/__tests__/memory-v2-compare-render.test.ts +98 -0
- package/src/cli/commands/__tests__/memory-v2.test.ts +10 -12
- package/src/cli/commands/__tests__/memory-v3-render.test.ts +340 -0
- package/src/cli/commands/browser.ts +247 -0
- package/src/cli/commands/conversations.ts +128 -1
- package/src/cli/commands/domain.ts +91 -41
- package/src/cli/commands/inference-providers.ts +147 -1
- package/src/cli/commands/inference.ts +93 -40
- package/src/cli/commands/memory-v2-compare-render.ts +115 -0
- package/src/cli/commands/memory-v2.ts +483 -0
- package/src/cli/commands/memory-v3-render.ts +344 -0
- package/src/cli/commands/memory-v3.ts +316 -0
- package/src/cli/commands/notifications.ts +24 -2
- package/src/cli/program.ts +2 -0
- package/src/cli/utils/conversation-id.ts +17 -5
- package/src/config/assistant-feature-flags.ts +21 -9
- package/src/config/bundled-skills/app-builder/SKILL.md +2 -2
- package/src/config/bundled-skills/document-editor/SKILL.md +124 -0
- package/src/config/bundled-skills/document-editor/TOOLS.json +258 -0
- package/src/config/bundled-skills/document-editor/tools/comment-list.ts +12 -0
- package/src/config/bundled-skills/document-editor/tools/comment-reply.ts +12 -0
- package/src/config/bundled-skills/document-editor/tools/comment-resolve.ts +12 -0
- package/src/config/bundled-skills/document-editor/tools/document-find.ts +12 -0
- package/src/config/bundled-skills/document-editor/tools/document-open.ts +12 -0
- package/src/config/bundled-skills/document-editor/tools/document-replace-text.ts +12 -0
- package/src/config/bundled-skills/image-studio/SKILL.md +4 -0
- package/src/config/bundled-skills/image-studio/tools/media-generate-image.ts +2 -2
- package/src/config/bundled-skills/media-processing/SKILL.md +8 -0
- package/src/config/bundled-skills/media-processing/tools/ingest-media.ts +13 -8
- package/src/config/bundled-skills/messaging/tools/messaging-analyze-style.ts +10 -3
- package/src/config/bundled-skills/phone-calls/references/TRANSCRIPTS.md +16 -14
- package/src/config/bundled-skills/playbooks/tools/playbook-create.ts +7 -2
- package/src/config/bundled-skills/playbooks/tools/playbook-update.ts +7 -2
- package/src/config/bundled-skills/schedule/SKILL.md +8 -0
- package/src/config/bundled-tool-registry.ts +24 -12
- package/src/config/call-site-defaults.ts +20 -0
- package/src/config/feature-flag-registry.json +115 -3
- package/src/config/llm-resolver.ts +16 -2
- package/src/config/schemas/__tests__/memory-v2.test.ts +217 -1
- package/src/config/schemas/call-site-catalog.ts +35 -0
- package/src/config/schemas/llm.ts +14 -0
- package/src/config/schemas/memory-v2.ts +294 -1
- package/src/config/schemas/memory.ts +2 -1
- package/src/context/compactor.ts +60 -1
- package/src/context/token-estimator.ts +47 -4
- package/src/context/window-manager.ts +25 -0
- package/src/conversations/__tests__/message-consolidation.test.ts +350 -0
- package/src/conversations/message-consolidation.ts +404 -0
- package/src/credential-health/credential-health-service.ts +34 -19
- package/src/daemon/__tests__/conversation-tool-setup-exclude.test.ts +1 -1
- package/src/daemon/__tests__/conversation-tool-setup.test.ts +66 -6
- package/src/daemon/__tests__/meet-manifest-loader.test.ts +1 -1
- package/src/daemon/__tests__/native-web-search-metadata.test.ts +357 -0
- package/src/daemon/__tests__/web-search-status-text.test.ts +287 -0
- package/src/daemon/conversation-agent-loop-handlers.ts +155 -36
- package/src/daemon/conversation-agent-loop.ts +307 -88
- package/src/daemon/conversation-error.ts +31 -1
- package/src/daemon/conversation-lifecycle.ts +149 -118
- package/src/daemon/conversation-messaging.ts +3 -0
- package/src/daemon/conversation-process.ts +273 -0
- package/src/daemon/conversation-queue-manager.ts +14 -0
- package/src/daemon/conversation-runtime-assembly.ts +145 -84
- package/src/daemon/conversation-slash.ts +37 -5
- package/src/daemon/conversation-surfaces.ts +45 -2
- package/src/daemon/conversation-tool-setup.ts +70 -3
- package/src/daemon/conversation-usage.ts +2 -0
- package/src/daemon/conversation.ts +54 -32
- package/src/daemon/disk-pressure-guard.ts +14 -2
- package/src/daemon/first-greeting.ts +10 -0
- package/src/daemon/handlers/__tests__/config-a2a-accept.test.ts +498 -0
- package/src/daemon/handlers/config-a2a.ts +160 -0
- package/src/daemon/handlers/config-model.test.ts +2 -0
- package/src/daemon/handlers/conversations.ts +90 -3
- package/src/daemon/handlers/shared.ts +92 -29
- package/src/daemon/host-bash-proxy.ts +1 -1
- package/src/daemon/host-browser-proxy.ts +5 -5
- package/src/daemon/host-cu-proxy.ts +5 -5
- package/src/daemon/host-file-proxy.ts +5 -5
- package/src/daemon/host-proxy-base.ts +4 -4
- package/src/daemon/host-transfer-proxy.ts +11 -11
- package/src/daemon/lifecycle.ts +40 -23
- package/src/daemon/meet-manifest-loader.ts +1 -7
- package/src/daemon/message-protocol.ts +4 -0
- package/src/daemon/message-types/conversations.ts +14 -9
- package/src/daemon/message-types/document-comments.ts +50 -0
- package/src/daemon/message-types/home.ts +1 -13
- package/src/daemon/message-types/messages.ts +66 -7
- package/src/daemon/message-types/surfaces.ts +3 -1
- package/src/daemon/message-types/sync.ts +14 -0
- package/src/daemon/message-types/web-activity.ts +57 -0
- package/src/daemon/plugin-source-watcher.ts +135 -3
- package/src/daemon/process-message.ts +69 -12
- package/src/daemon/shutdown-handlers.ts +24 -5
- package/src/daemon/switch-inference-profile-tool.ts +52 -0
- package/src/daemon/tool-setup-types.ts +13 -0
- package/src/daemon/trust-context.ts +6 -0
- package/src/documents/document-comments-store.test.ts +338 -0
- package/src/documents/document-comments-store.ts +237 -0
- package/src/documents/document-store.ts +202 -0
- package/src/events/relationship-state-updated.ts +25 -0
- package/src/heartbeat/__tests__/heartbeat-service.test.ts +1 -2
- package/src/heartbeat/heartbeat-service.ts +1 -0
- package/src/home/__tests__/suggested-prompts.test.ts +33 -2
- package/src/home/feed-types.ts +6 -1
- package/src/home/home-content-refresh.ts +52 -0
- package/src/home/home-greeting-cache.ts +69 -0
- package/src/home/home-greeting.ts +85 -0
- package/src/home/suggested-prompts.ts +168 -9
- package/src/ipc/gateway-flag-listener.ts +123 -0
- package/src/ipc/skill-routes/registries.ts +8 -12
- package/src/memory/__tests__/db-async-query.test.ts +165 -0
- package/src/memory/__tests__/db-maintenance.test.ts +115 -0
- package/src/memory/__tests__/jobs-store-enqueue-gate.test.ts +241 -0
- package/src/memory/__tests__/jobs-store-job-classes.test.ts +28 -1
- package/src/memory/__tests__/jobs-worker-v2-schedule.test.ts +135 -2
- package/src/memory/__tests__/memory-retrospective-job.test.ts +327 -6
- package/src/memory/auto-analysis-enqueue.ts +5 -1
- package/src/memory/conversation-crud.ts +191 -100
- package/src/memory/conversation-starters-cadence.ts +3 -1
- package/src/memory/conversation-title-service.ts +19 -3
- package/src/memory/db-async-query.ts +214 -0
- package/src/memory/db-init.ts +26 -0
- package/src/memory/db-maintenance.ts +30 -21
- package/src/memory/delivery-crud.ts +41 -0
- package/src/memory/delivery-status.ts +141 -15
- package/src/memory/external-conversation-store.ts +32 -1
- package/src/memory/graph/bootstrap.ts +8 -1
- package/src/memory/graph/capability-seed.ts +7 -3
- package/src/memory/graph/conversation-graph-memory.ts +100 -17
- package/src/memory/graph/extraction.ts +1 -5
- package/src/memory/graph/graph-search.ts +7 -1
- package/src/memory/indexer.ts +28 -18
- package/src/memory/job-handlers/cleanup.ts +76 -18
- package/src/memory/job-handlers/conversation-starters.ts +1 -4
- package/src/memory/jobs/embed-pkb-file.ts +6 -1
- package/src/memory/jobs-store.ts +14 -0
- package/src/memory/jobs-worker.ts +68 -15
- package/src/memory/llm-request-log-source-clickhouse.ts +42 -2
- package/src/memory/llm-request-log-source-local.ts +7 -0
- package/src/memory/llm-request-log-source.ts +9 -2
- package/src/memory/llm-request-log-store.ts +43 -1
- package/src/memory/llm-usage-store.ts +24 -0
- package/src/memory/memory-retrospective-constants.ts +28 -0
- package/src/memory/memory-retrospective-enqueue.ts +11 -3
- package/src/memory/memory-retrospective-job.ts +413 -18
- package/src/memory/memory-retrospective-startup-cleanup.ts +3 -3
- package/src/memory/memory-v2-activation-log-store.ts +41 -14
- package/src/memory/migrations/100-core-tables.ts +1 -0
- package/src/memory/migrations/109-external-conversation-bindings.ts +1 -0
- package/src/memory/migrations/253-conversation-last-notified-profile.ts +15 -0
- package/src/memory/migrations/253-document-comments.ts +47 -0
- package/src/memory/migrations/254-external-conversation-binding-chat-name.ts +43 -0
- package/src/memory/migrations/255-channel-inbound-delivery-attempts.ts +24 -0
- package/src/memory/migrations/256-memory-v2-injection-events.ts +113 -0
- package/src/memory/migrations/257-strip-base-url-non-openai-compatible.ts +22 -0
- package/src/memory/migrations/258-onboarding-events-prior-assistants.ts +13 -0
- package/src/memory/migrations/259-conversation-cleaned-at.ts +33 -0
- package/src/memory/migrations/260-rename-cleaned-at.ts +44 -0
- package/src/memory/migrations/261-llm-usage-add-raw-usage.ts +36 -0
- package/src/memory/migrations/262-memory-v3-coactivation.ts +57 -0
- package/src/memory/migrations/263-memory-v3-auto-edges.ts +50 -0
- package/src/memory/migrations/264-llm-request-log-call-site.ts +29 -0
- package/src/memory/migrations/index.ts +34 -0
- package/src/memory/migrations/registry.ts +58 -0
- package/src/memory/onboarding-events-store.ts +7 -0
- package/src/memory/schema/calls.ts +1 -0
- package/src/memory/schema/conversations.ts +3 -0
- package/src/memory/schema/infrastructure.ts +22 -0
- package/src/memory/tool-usage-store.ts +36 -8
- package/src/memory/v2/__tests__/consolidation-job.test.ts +1 -0
- package/src/memory/v2/__tests__/harness-compare.test.ts +186 -0
- package/src/memory/v2/__tests__/harness-metrics.test.ts +74 -0
- package/src/memory/v2/__tests__/harness-oracle.test.ts +257 -0
- package/src/memory/v2/__tests__/harness-replay-input.test.ts +225 -0
- package/src/memory/v2/__tests__/harness-runner.test.ts +109 -0
- package/src/memory/v2/__tests__/injection-events.test.ts +318 -0
- package/src/memory/v2/__tests__/injection.test.ts +158 -112
- package/src/memory/v2/__tests__/page-index.test.ts +365 -1
- package/src/memory/v2/__tests__/qdrant.test.ts +36 -0
- package/src/memory/v2/__tests__/router.test.ts +660 -4
- package/src/memory/v2/consolidation-job.ts +14 -0
- package/src/memory/v2/harness/compare.ts +57 -0
- package/src/memory/v2/harness/metrics.ts +124 -0
- package/src/memory/v2/harness/oracle.ts +145 -0
- package/src/memory/v2/harness/replay-input.ts +224 -0
- package/src/memory/v2/harness/retriever.ts +74 -0
- package/src/memory/v2/harness/router-retriever.ts +43 -0
- package/src/memory/v2/harness/runner.ts +106 -0
- package/src/memory/v2/harness/trace.ts +58 -0
- package/src/memory/v2/injection-events.ts +101 -0
- package/src/memory/v2/injection.ts +42 -25
- package/src/memory/v2/page-index.ts +209 -7
- package/src/memory/v2/page-store.ts +18 -0
- package/src/memory/v2/prompts/router.ts +26 -1
- package/src/memory/v2/qdrant.ts +14 -2
- package/src/memory/v2/router.ts +369 -62
- package/src/memory/v3/__tests__/coactivation-store.test.ts +422 -0
- package/src/memory/v3/__tests__/consolidation-job.test.ts +468 -0
- package/src/memory/v3/__tests__/edge-learning-job.test.ts +324 -0
- package/src/memory/v3/__tests__/edges.test.ts +563 -0
- package/src/memory/v3/__tests__/filter.test.ts +512 -0
- package/src/memory/v3/__tests__/gate.test.ts +574 -0
- package/src/memory/v3/__tests__/index-composition.test.ts +233 -0
- package/src/memory/v3/__tests__/loop.test.ts +530 -0
- package/src/memory/v3/__tests__/retriever.test.ts +226 -0
- package/src/memory/v3/__tests__/scouts.test.ts +440 -0
- package/src/memory/v3/__tests__/shadow-middleware.test.ts +312 -0
- package/src/memory/v3/__tests__/system-prompts.test.ts +154 -0
- package/src/memory/v3/__tests__/traversal.test.ts +469 -0
- package/src/memory/v3/__tests__/tree-index.test.ts +280 -0
- package/src/memory/v3/__tests__/tree-store.test.ts +529 -0
- package/src/memory/v3/__tests__/tree-walk.test.ts +707 -0
- package/src/memory/v3/__tests__/validate.test.ts +245 -0
- package/src/memory/v3/auto-edges.ts +223 -0
- package/src/memory/v3/coactivation-store.ts +124 -0
- package/src/memory/v3/consolidation-job.ts +323 -0
- package/src/memory/v3/edge-learning-job.ts +160 -0
- package/src/memory/v3/edges.ts +249 -0
- package/src/memory/v3/filter.ts +281 -0
- package/src/memory/v3/gate.ts +334 -0
- package/src/memory/v3/index-composition.ts +113 -0
- package/src/memory/v3/llm-capture.ts +46 -0
- package/src/memory/v3/loop.ts +382 -0
- package/src/memory/v3/maintenance.ts +144 -0
- package/src/memory/v3/prompt-context.ts +33 -0
- package/src/memory/v3/prompts/consolidation.ts +458 -0
- package/src/memory/v3/prompts/system-prompts.ts +196 -0
- package/src/memory/v3/retriever.ts +33 -0
- package/src/memory/v3/scouts.ts +420 -0
- package/src/memory/v3/shadow-middleware.ts +305 -0
- package/src/memory/v3/traversal.ts +206 -0
- package/src/memory/v3/tree-index.ts +237 -0
- package/src/memory/v3/tree-store.ts +394 -0
- package/src/memory/v3/tree-walk.ts +351 -0
- package/src/memory/v3/types.ts +65 -0
- package/src/memory/v3/validate.ts +300 -0
- package/src/messaging/providers/index.ts +7 -1
- package/src/messaging/providers/slack/__tests__/adapter-mention-rendering.test.ts +329 -3
- package/src/messaging/providers/slack/__tests__/adapter-token-routing.test.ts +34 -1
- package/src/messaging/providers/slack/adapter.ts +178 -25
- package/src/messaging/providers/slack/api.test.ts +54 -0
- package/src/messaging/providers/slack/api.ts +119 -3
- package/src/messaging/providers/slack/client.ts +12 -0
- package/src/messaging/providers/slack/deep-link.ts +20 -1
- package/src/messaging/providers/slack/message-metadata.test.ts +48 -0
- package/src/messaging/providers/slack/message-metadata.ts +156 -0
- package/src/messaging/providers/slack/render-transcript.test.ts +107 -75
- package/src/messaging/providers/slack/render-transcript.ts +176 -49
- package/src/messaging/providers/slack/send.test.ts +77 -0
- package/src/messaging/providers/slack/send.ts +8 -2
- package/src/messaging/providers/slack/types.ts +14 -0
- package/src/notifications/__tests__/emit-signal-home-feed.test.ts +4 -1
- package/src/notifications/__tests__/home-feed-side-effect.test.ts +116 -54
- package/src/notifications/adapters/macos.ts +18 -1
- package/src/notifications/adapters/platform.ts +1 -1
- package/src/notifications/conversation-seed-composer.ts +14 -2
- package/src/notifications/decision-engine.ts +1 -4
- package/src/notifications/deferred-emit.ts +135 -0
- package/src/notifications/emit-signal.ts +38 -50
- package/src/notifications/home-feed-side-effect.ts +60 -30
- package/src/oauth/connect-orchestrator.ts +3 -0
- package/src/oauth/credential-token-resolver.ts +2 -0
- package/src/oauth/manual-token-connection.ts +19 -0
- package/src/oauth/oauth-store.ts +12 -0
- package/src/oauth/seed-providers.ts +22 -0
- package/src/permissions/prompter.ts +8 -5
- package/src/permissions/question-prompter.ts +5 -2
- package/src/permissions/secret-prompter.ts +6 -3
- package/src/plugin-api/index.ts +4 -0
- package/src/plugin-api/types.ts +7 -33
- package/src/plugins/defaults/index.ts +6 -0
- package/src/plugins/defaults/injectors.ts +100 -20
- package/src/plugins/external-plugin-loader.ts +5 -68
- package/src/plugins/types.ts +11 -16
- package/src/proactive-artifact/aux-message-injector.ts +17 -4
- package/src/prompts/__tests__/system-prompt.test.ts +46 -2
- package/src/prompts/__tests__/task-progress-hint-section.test.ts +3 -9
- package/src/prompts/normalize-onboarding.ts +40 -0
- package/src/prompts/persona-resolver.ts +36 -21
- package/src/prompts/sections.ts +69 -19
- package/src/prompts/system-prompt.ts +118 -216
- package/src/prompts/template-detection.ts +37 -0
- package/src/prompts/templates/BOOTSTRAP-CONTENT-AUTOMATION.md +141 -0
- package/src/prompts/templates/BOOTSTRAP.md +10 -2
- package/src/prompts/templates/VOICE.md +3 -0
- package/src/prompts/templates/system-sections.ts +281 -9
- package/src/providers/__tests__/connection-model-compat.test.ts +234 -0
- package/src/providers/__tests__/retry-callsite.test.ts +85 -5
- package/src/providers/anthropic/client.ts +159 -66
- package/src/providers/call-site-routing.ts +14 -2
- package/src/providers/connection-model-compat.ts +38 -0
- package/src/providers/connection-resolution.ts +16 -2
- package/src/providers/fireworks/client.ts +20 -2
- package/src/providers/gemini/client.ts +49 -6
- package/src/providers/inference/__tests__/base-url-route-validation.test.ts +342 -0
- package/src/providers/inference/__tests__/base-url-security.test.ts +189 -0
- package/src/providers/inference/__tests__/codex-token-refresh.test.ts +254 -0
- package/src/providers/inference/adapter-factory.ts +18 -1
- package/src/providers/inference/auth.ts +3 -3
- package/src/providers/inference/codex-token-refresh.ts +128 -0
- package/src/providers/inference/resolve-auth.ts +49 -6
- package/src/providers/minimax/client.ts +106 -0
- package/src/providers/model-catalog.ts +91 -1
- package/src/providers/model-intents.ts +1 -1
- package/src/providers/openai/chat-completions-provider.ts +63 -23
- package/src/providers/openai/codex-models.ts +18 -0
- package/src/providers/openai/responses-provider.ts +86 -23
- package/src/providers/openrouter/client.ts +5 -1
- package/src/providers/provider-send-message.ts +7 -1
- package/src/providers/retry.ts +34 -3
- package/src/providers/thinking-config.ts +26 -1
- package/src/providers/types.ts +25 -0
- package/src/providers/usage-tracking.ts +2 -0
- package/src/runtime/AGENTS.md +2 -2
- package/src/runtime/__tests__/agent-wake.test.ts +214 -0
- package/src/runtime/__tests__/background-job-runner.test.ts +128 -0
- package/src/runtime/agent-wake.ts +152 -56
- package/src/runtime/assistant-event-hub.ts +76 -6
- package/src/runtime/auth/route-policy.ts +43 -3
- package/src/runtime/background-job-runner.ts +26 -0
- package/src/runtime/btw-sidechain.ts +0 -6
- package/src/runtime/channel-reply-delivery.ts +182 -47
- package/src/runtime/channel-retry-sweep.ts +141 -16
- package/src/runtime/http-types.ts +7 -6
- package/src/runtime/migrations/vbundle-builder.ts +10 -3
- package/src/runtime/pending-interactions.ts +50 -8
- package/src/runtime/routes/__tests__/content-source-routes.test.ts +162 -0
- package/src/runtime/routes/__tests__/conversation-query-routes.test.ts +161 -1
- package/src/runtime/routes/__tests__/memory-v2-routes.test.ts +14 -0
- package/src/runtime/routes/__tests__/memory-v2-simulate-route.test.ts +290 -0
- package/src/runtime/routes/__tests__/plugins-routes.test.ts +512 -0
- package/src/runtime/routes/__tests__/sanity-routes.test.ts +280 -0
- package/src/runtime/routes/__tests__/slack-channel-routes.test.ts +266 -0
- package/src/runtime/routes/acp-routes.test.ts +255 -6
- package/src/runtime/routes/acp-routes.ts +8 -1
- package/src/runtime/routes/approval-routes.ts +4 -1
- package/src/runtime/routes/avatar-routes.ts +10 -10
- package/src/runtime/routes/background-wake-routes.ts +188 -0
- package/src/runtime/routes/browser-tabs-routes.ts +200 -0
- package/src/runtime/routes/btw-routes.ts +0 -6
- package/src/runtime/routes/chatgpt-subscription-auth-routes.ts +246 -0
- package/src/runtime/routes/content-source-routes.ts +78 -0
- package/src/runtime/routes/conversation-cli-routes.ts +147 -2
- package/src/runtime/routes/conversation-list-routes.ts +12 -4
- package/src/runtime/routes/conversation-management-routes.ts +77 -20
- package/src/runtime/routes/conversation-query-routes.ts +196 -31
- package/src/runtime/routes/conversation-routes.ts +472 -425
- package/src/runtime/routes/conversation-starter-routes.ts +6 -3
- package/src/runtime/routes/disk-pressure-routes.ts +1 -1
- package/src/runtime/routes/document-comments-routes.ts +287 -0
- package/src/runtime/routes/documents-routes.ts +33 -0
- package/src/runtime/routes/domain-routes.ts +60 -10
- package/src/runtime/routes/email-routes.ts +5 -2
- package/src/runtime/routes/events-routes.ts +54 -10
- package/src/runtime/routes/group-routes.ts +24 -8
- package/src/runtime/routes/home-feed-routes.ts +6 -3
- package/src/runtime/routes/host-app-control-routes.ts +1 -1
- package/src/runtime/routes/host-browser-routes.ts +17 -2
- package/src/runtime/routes/host-cu-routes.ts +2 -2
- package/src/runtime/routes/identity-routes.ts +21 -0
- package/src/runtime/routes/inbound-message-handler.ts +288 -58
- package/src/runtime/routes/inbound-stages/acl-enforcement.ts +96 -3
- package/src/runtime/routes/inbound-stages/background-dispatch.test.ts +365 -6
- package/src/runtime/routes/inbound-stages/background-dispatch.ts +283 -82
- package/src/runtime/routes/index.ts +20 -4
- package/src/runtime/routes/inference-profile-session-handler.ts +22 -12
- package/src/runtime/routes/inference-profile-session-routes.ts +7 -1
- package/src/runtime/routes/inference-provider-connection-routes.ts +63 -7
- package/src/runtime/routes/integrations/a2a.ts +60 -1
- package/src/runtime/routes/llm-call-sites-routes.ts +32 -5
- package/src/runtime/routes/log-export-routes.ts +39 -0
- package/src/runtime/routes/memory-item-routes.ts +8 -3
- package/src/runtime/routes/memory-v2-routes.ts +427 -0
- package/src/runtime/routes/memory-v3-routes.ts +316 -0
- package/src/runtime/routes/migration-routes.ts +21 -24
- package/src/runtime/routes/notification-routes.ts +19 -2
- package/src/runtime/routes/plugins-routes.ts +337 -0
- package/src/runtime/routes/question-routes.ts +4 -1
- package/src/runtime/routes/rename-conversation-routes.ts +6 -2
- package/src/runtime/routes/sanity-routes.ts +159 -0
- package/src/runtime/routes/secret-routes.ts +25 -5
- package/src/runtime/routes/settings-routes.ts +12 -11
- package/src/runtime/routes/slack-channel-routes.ts +188 -0
- package/src/runtime/routes/workspace-routes.ts +25 -10
- package/src/runtime/services/conversation-serializer.ts +30 -4
- package/src/runtime/sync/resource-sync-events.ts +106 -38
- package/src/runtime/sync/sync-publisher.test.ts +49 -0
- package/src/runtime/sync/sync-publisher.ts +2 -1
- package/src/runtime/verification-outbound-actions.ts +73 -1
- package/src/schedule/integration-status.ts +3 -1
- package/src/security/__tests__/oauth2-device-code.test.ts +479 -0
- package/src/security/oauth2-device-code.ts +307 -0
- package/src/security/oauth2.ts +26 -9
- package/src/security/secure-keys.ts +5 -0
- package/src/skills/catalog-install.ts +6 -2
- package/src/telemetry/types.ts +12 -0
- package/src/telemetry/usage-telemetry-reporter.test.ts +48 -0
- package/src/telemetry/usage-telemetry-reporter.ts +1 -0
- package/src/tools/acp/spawn.test.ts +119 -0
- package/src/tools/acp/spawn.ts +15 -2
- package/src/tools/apps/definitions.ts +2 -8
- package/src/tools/ask-question/ask-question-tool.test.ts +3 -3
- package/src/tools/ask-question/ask-question-tool.ts +38 -45
- package/src/tools/browser/__tests__/pinned-tabs.test.ts +150 -0
- package/src/tools/browser/browser-execution.ts +106 -0
- package/src/tools/browser/cdp-client/__tests__/browser-tabs-factory.test.ts +402 -0
- package/src/tools/browser/cdp-client/__tests__/factory.test.ts +28 -0
- package/src/tools/browser/cdp-client/__tests__/types.test.ts +4 -0
- package/src/tools/browser/cdp-client/cdp-inspect-client.ts +22 -0
- package/src/tools/browser/cdp-client/extension-cdp-client.ts +42 -2
- package/src/tools/browser/cdp-client/factory.ts +171 -4
- package/src/tools/browser/cdp-client/local-cdp-client.ts +21 -0
- package/src/tools/browser/cdp-client/types.ts +101 -0
- package/src/tools/browser/pinned-tabs.ts +146 -0
- package/src/tools/computer-use/definitions.ts +22 -78
- package/src/tools/credential-execution/make-authenticated-request.ts +3 -9
- package/src/tools/credential-execution/manage-secure-command-tool.ts +3 -9
- package/src/tools/credential-execution/run-authenticated-command.ts +3 -9
- package/src/tools/credentials/vault.ts +3 -9
- package/src/tools/document/document-comment-tool.test.ts +379 -0
- package/src/tools/document/document-comment-tool.ts +156 -0
- package/src/tools/document/document-tool.ts +187 -2
- package/src/tools/execution-target.ts +21 -23
- package/src/tools/executor.ts +6 -1
- package/src/tools/filesystem/edit.ts +3 -9
- package/src/tools/filesystem/list.ts +3 -9
- package/src/tools/filesystem/read.ts +3 -9
- package/src/tools/filesystem/write.ts +3 -9
- package/src/tools/host-filesystem/edit.ts +3 -9
- package/src/tools/host-filesystem/read.ts +3 -9
- package/src/tools/host-filesystem/transfer.ts +3 -9
- package/src/tools/host-filesystem/write.ts +3 -9
- package/src/tools/host-terminal/host-shell.ts +3 -9
- package/src/tools/mcp/mcp-tool-factory.ts +1 -8
- package/src/tools/memory/register.test.ts +1 -1
- package/src/tools/memory/register.ts +4 -9
- package/src/tools/network/__tests__/web-fetch-metadata.test.ts +229 -0
- package/src/tools/network/__tests__/web-search-metadata.test.ts +346 -0
- package/src/tools/network/domain-normalize.ts +17 -0
- package/src/tools/network/web-fetch.ts +216 -73
- package/src/tools/network/web-search.ts +216 -98
- package/src/tools/registry.ts +7 -23
- package/src/tools/schema-transforms.ts +1 -1
- package/src/tools/skills/execute.ts +3 -9
- package/src/tools/skills/load.ts +3 -9
- package/src/tools/skills/skill-tool-factory.ts +1 -8
- package/src/tools/subagent/notify-parent.ts +3 -9
- package/src/tools/system/request-permission.ts +3 -9
- package/src/tools/terminal/safe-env.ts +3 -2
- package/src/tools/terminal/shell.ts +3 -9
- package/src/tools/tool-approval-handler.ts +19 -12
- package/src/tools/tool-defaults.ts +94 -0
- package/src/tools/types.ts +31 -98
- package/src/tools/ui-surface/definitions.ts +9 -23
- package/src/types/onboarding-context.ts +4 -0
- package/src/usage/pricing.ts +23 -0
- package/src/usage/types.ts +12 -0
- package/src/util/__tests__/favicon.test.ts +84 -0
- package/src/util/favicon.ts +40 -0
- package/src/util/logger.ts +16 -7
- package/src/util/platform.ts +7 -7
- package/src/util/sqlite3-runtime.ts +65 -0
- package/src/workspace/git-service.ts +75 -4
- package/src/workspace/migrations/086-revert-stale-gemini-mis-rewrites.ts +1 -0
- package/src/workspace/migrations/088-deprecate-background-conversation-override.ts +103 -0
- package/src/workspace/migrations/089-move-memory-tree-out-of-v3.ts +86 -0
- package/src/workspace/migrations/registry.ts +4 -0
- package/src/__tests__/compaction-strip-metadata-clear.test.ts +0 -206
- package/src/__tests__/message-complete-display-id.test.ts +0 -175
- package/src/config/bundled-skills/document/SKILL.md +0 -54
- package/src/config/bundled-skills/document/TOOLS.json +0 -106
- package/src/daemon/seed-files.ts +0 -18
- package/src/prompts/cache-boundary.ts +0 -8
- package/src/runtime/routes/interface-routes.ts +0 -43
- /package/src/config/bundled-skills/{document → document-editor}/tools/document-create.ts +0 -0
- /package/src/config/bundled-skills/{document → document-editor}/tools/document-delete.ts +0 -0
- /package/src/config/bundled-skills/{document → document-editor}/tools/document-list.ts +0 -0
- /package/src/config/bundled-skills/{document → document-editor}/tools/document-read.ts +0 -0
- /package/src/config/bundled-skills/{document → document-editor}/tools/document-update.ts +0 -0
|
@@ -244,6 +244,20 @@ function readBufferContent(bufferPath: string): string {
|
|
|
244
244
|
}
|
|
245
245
|
}
|
|
246
246
|
|
|
247
|
+
/**
|
|
248
|
+
* Count non-empty lines in `memory/buffer.md`. Used by the scheduler to
|
|
249
|
+
* implement the size-based consolidation trigger. Missing file → 0.
|
|
250
|
+
*
|
|
251
|
+
* Each entry is one line (`- [Mon D, h:mm AM/PM] …\n`), so non-empty-line
|
|
252
|
+
* count == entry count for a well-formed buffer; blank lines and trailing
|
|
253
|
+
* newlines don't inflate the count.
|
|
254
|
+
*/
|
|
255
|
+
export function countBufferLines(bufferPath: string): number {
|
|
256
|
+
const content = readBufferContent(bufferPath);
|
|
257
|
+
if (content.length === 0) return 0;
|
|
258
|
+
return content.split("\n").filter((line) => line.trim().length > 0).length;
|
|
259
|
+
}
|
|
260
|
+
|
|
247
261
|
/**
|
|
248
262
|
* Atomically create the lock file with `wx` (O_CREAT | O_EXCL) flags. Returns
|
|
249
263
|
* `null` on success, or the current holder string (file contents, typically
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Run the comparison harness over a sample of historical turns.
|
|
3
|
+
*
|
|
4
|
+
* Ties the harness pieces together: pull oracle turns from telemetry, run each
|
|
5
|
+
* retriever over each turn's reconstructed inputs, score against the logged
|
|
6
|
+
* ground truth. Kept separate from the route handler so it can be unit-tested
|
|
7
|
+
* with a stub retriever and a fixture DB — no live router / LLM.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import type { AssistantConfig } from "../../../config/types.js";
|
|
11
|
+
import type { DrizzleDb } from "../../db-connection.js";
|
|
12
|
+
import { extractOracleTurns } from "./oracle.js";
|
|
13
|
+
import { reconstructInput } from "./replay-input.js";
|
|
14
|
+
import type { Retriever } from "./retriever.js";
|
|
15
|
+
import { type ComparisonReport, runComparison } from "./runner.js";
|
|
16
|
+
|
|
17
|
+
export interface RunComparisonOverHistoryParams {
|
|
18
|
+
db: DrizzleDb;
|
|
19
|
+
workspaceDir: string;
|
|
20
|
+
config: AssistantConfig;
|
|
21
|
+
retrievers: readonly Retriever[];
|
|
22
|
+
ks: number[];
|
|
23
|
+
limit?: number;
|
|
24
|
+
strategy?: "recent" | "random";
|
|
25
|
+
conversationIds?: string[];
|
|
26
|
+
includeNotInjected?: boolean;
|
|
27
|
+
pageExists?: (slug: string) => boolean;
|
|
28
|
+
signal?: AbortSignal;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export async function runComparisonOverHistory(
|
|
32
|
+
params: RunComparisonOverHistoryParams,
|
|
33
|
+
): Promise<ComparisonReport> {
|
|
34
|
+
const { db, workspaceDir, config } = params;
|
|
35
|
+
|
|
36
|
+
const oracleTurns = extractOracleTurns(db, {
|
|
37
|
+
...(params.limit !== undefined ? { limit: params.limit } : {}),
|
|
38
|
+
...(params.strategy !== undefined ? { strategy: params.strategy } : {}),
|
|
39
|
+
...(params.conversationIds !== undefined
|
|
40
|
+
? { conversationIds: params.conversationIds }
|
|
41
|
+
: {}),
|
|
42
|
+
...(params.includeNotInjected !== undefined
|
|
43
|
+
? { includeNotInjected: params.includeNotInjected }
|
|
44
|
+
: {}),
|
|
45
|
+
...(params.pageExists !== undefined
|
|
46
|
+
? { pageExists: params.pageExists }
|
|
47
|
+
: {}),
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
return runComparison({
|
|
51
|
+
retrievers: params.retrievers,
|
|
52
|
+
oracleTurns,
|
|
53
|
+
reconstruct: (turn) => reconstructInput(db, turn, config, workspaceDir),
|
|
54
|
+
ks: params.ks,
|
|
55
|
+
...(params.signal !== undefined ? { signal: params.signal } : {}),
|
|
56
|
+
});
|
|
57
|
+
}
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Recall@k and per-lane diff for the comparison harness.
|
|
3
|
+
*
|
|
4
|
+
* Ground truth is the current router's logged selections (see `oracle.ts`). A
|
|
5
|
+
* retriever's "extras" (selected, not in ground truth) are reported as a
|
|
6
|
+
* *diff*, not an error — a better retriever may legitimately surface pages the
|
|
7
|
+
* router missed. recall@k is the primary signal.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import type { RetrievalOutput } from "./retriever.js";
|
|
11
|
+
|
|
12
|
+
export interface TurnEval {
|
|
13
|
+
groundTruth: string[];
|
|
14
|
+
selected: string[];
|
|
15
|
+
/** Ground-truth slugs the retriever selected (anywhere in its output). */
|
|
16
|
+
hits: string[];
|
|
17
|
+
/** Ground-truth slugs the retriever missed entirely. */
|
|
18
|
+
misses: string[];
|
|
19
|
+
/** Selected slugs not in ground truth — diff, not error. */
|
|
20
|
+
extras: string[];
|
|
21
|
+
/** recall@k for each requested k. */
|
|
22
|
+
recallAtK: Record<number, number>;
|
|
23
|
+
/** Counts of hits grouped by the retriever's source/lane labels. */
|
|
24
|
+
hitsByLane: Record<string, number>;
|
|
25
|
+
costUsd?: number;
|
|
26
|
+
failureReason: string | null;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export interface AggregateEval {
|
|
30
|
+
turns: number;
|
|
31
|
+
meanRecallAtK: Record<number, number>;
|
|
32
|
+
failureRate: number;
|
|
33
|
+
meanCostUsd?: number;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* recall@k = |topK(selected) ∩ G| / |G|. An empty ground-truth set is defined
|
|
38
|
+
* as recall 1 (nothing to recall — vacuously complete).
|
|
39
|
+
*/
|
|
40
|
+
export function recallAtK(
|
|
41
|
+
selected: readonly string[],
|
|
42
|
+
groundTruth: ReadonlySet<string>,
|
|
43
|
+
k: number,
|
|
44
|
+
): number {
|
|
45
|
+
if (groundTruth.size === 0) return 1;
|
|
46
|
+
let hit = 0;
|
|
47
|
+
for (const slug of selected.slice(0, k)) {
|
|
48
|
+
if (groundTruth.has(slug)) hit++;
|
|
49
|
+
}
|
|
50
|
+
return hit / groundTruth.size;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export function evalTurn(
|
|
54
|
+
output: RetrievalOutput,
|
|
55
|
+
groundTruth: readonly string[],
|
|
56
|
+
ks: readonly number[],
|
|
57
|
+
): TurnEval {
|
|
58
|
+
const gtList = Array.from(new Set(groundTruth));
|
|
59
|
+
const gtSet = new Set(gtList);
|
|
60
|
+
const selectedSet = new Set(output.selectedSlugs);
|
|
61
|
+
|
|
62
|
+
const hits: string[] = [];
|
|
63
|
+
const misses: string[] = [];
|
|
64
|
+
for (const slug of gtList) {
|
|
65
|
+
(selectedSet.has(slug) ? hits : misses).push(slug);
|
|
66
|
+
}
|
|
67
|
+
const extras = output.selectedSlugs.filter((s) => !gtSet.has(s));
|
|
68
|
+
|
|
69
|
+
const recall: Record<number, number> = {};
|
|
70
|
+
for (const k of ks) {
|
|
71
|
+
recall[k] = recallAtK(output.selectedSlugs, gtSet, k);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const hitsByLane: Record<string, number> = {};
|
|
75
|
+
for (const slug of hits) {
|
|
76
|
+
const lane = output.sourceBySlug.get(slug) ?? "unknown";
|
|
77
|
+
hitsByLane[lane] = (hitsByLane[lane] ?? 0) + 1;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
return {
|
|
81
|
+
groundTruth: gtList,
|
|
82
|
+
selected: output.selectedSlugs,
|
|
83
|
+
hits,
|
|
84
|
+
misses,
|
|
85
|
+
extras,
|
|
86
|
+
recallAtK: recall,
|
|
87
|
+
hitsByLane,
|
|
88
|
+
...(output.cost?.usd !== undefined ? { costUsd: output.cost.usd } : {}),
|
|
89
|
+
failureReason: output.failureReason ?? null,
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
export function aggregate(
|
|
94
|
+
perTurn: readonly TurnEval[],
|
|
95
|
+
ks: readonly number[],
|
|
96
|
+
): AggregateEval {
|
|
97
|
+
const turns = perTurn.length;
|
|
98
|
+
|
|
99
|
+
const meanRecallAtK: Record<number, number> = {};
|
|
100
|
+
for (const k of ks) {
|
|
101
|
+
if (turns === 0) {
|
|
102
|
+
meanRecallAtK[k] = 0;
|
|
103
|
+
continue;
|
|
104
|
+
}
|
|
105
|
+
let sum = 0;
|
|
106
|
+
for (const t of perTurn) sum += t.recallAtK[k] ?? 0;
|
|
107
|
+
meanRecallAtK[k] = sum / turns;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
const failures = perTurn.filter((t) => t.failureReason != null).length;
|
|
111
|
+
const costed = perTurn.filter((t) => t.costUsd !== undefined);
|
|
112
|
+
|
|
113
|
+
return {
|
|
114
|
+
turns,
|
|
115
|
+
meanRecallAtK,
|
|
116
|
+
failureRate: turns === 0 ? 0 : failures / turns,
|
|
117
|
+
...(costed.length > 0
|
|
118
|
+
? {
|
|
119
|
+
meanCostUsd:
|
|
120
|
+
costed.reduce((s, t) => s + (t.costUsd ?? 0), 0) / costed.length,
|
|
121
|
+
}
|
|
122
|
+
: {}),
|
|
123
|
+
};
|
|
124
|
+
}
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Oracle extraction — the current router's logged selections as silver-standard
|
|
3
|
+
* ground truth.
|
|
4
|
+
*
|
|
5
|
+
* Source: `memory_v2_activation_logs` rows with `mode = 'router'`. Each row's
|
|
6
|
+
* `messageId` is backfilled to the turn's assistant message (see
|
|
7
|
+
* `backfillMemoryV2ActivationMessageId`), so we join `messageId → messages.id`
|
|
8
|
+
* to anchor the turn — robust, no fragile turn-counting. Rows whose messageId
|
|
9
|
+
* is null (the in-flight turn) or no longer resolves are skipped.
|
|
10
|
+
*
|
|
11
|
+
* Ground truth G(turn) = selected slugs with status ∈ {injected, in_context}
|
|
12
|
+
* (what actually reached the model), optionally + not_injected, and — when a
|
|
13
|
+
* `pageExists` predicate is supplied — only slugs whose page still exists
|
|
14
|
+
* (neither retriever can find a nonexistent page). page_missing / corrupt are
|
|
15
|
+
* always excluded.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import { and, desc, eq, inArray, isNotNull, sql } from "drizzle-orm";
|
|
19
|
+
|
|
20
|
+
import type { DrizzleDb } from "../../db-connection.js";
|
|
21
|
+
import type {
|
|
22
|
+
MemoryV2ConceptRowRecord,
|
|
23
|
+
MemoryV2ConfigSnapshot,
|
|
24
|
+
} from "../../memory-v2-activation-log-store.js";
|
|
25
|
+
import { memoryV2ActivationLogs, messages } from "../../schema.js";
|
|
26
|
+
|
|
27
|
+
export interface OracleTurn {
|
|
28
|
+
conversationId: string;
|
|
29
|
+
turn: number;
|
|
30
|
+
/** Backfilled assistant-message id for this turn — the reconstruction anchor. */
|
|
31
|
+
anchorMessageId: string;
|
|
32
|
+
/** `created_at` of the anchor message; reconstruction cuts strictly before it. */
|
|
33
|
+
anchorCreatedAt: number;
|
|
34
|
+
/** Slugs the router's judgment put in front of the model (the recall target). */
|
|
35
|
+
groundTruthSlugs: string[];
|
|
36
|
+
loggedConfig: MemoryV2ConfigSnapshot;
|
|
37
|
+
createdAt: number;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export interface ExtractOracleOptions {
|
|
41
|
+
/** Max log rows to scan (default 50). Some are skipped, so result ≤ limit. */
|
|
42
|
+
limit?: number;
|
|
43
|
+
strategy?: "recent" | "random";
|
|
44
|
+
conversationIds?: string[];
|
|
45
|
+
/** Include status "not_injected" (selected but cut by the cap) in G. Default false. */
|
|
46
|
+
includeNotInjected?: boolean;
|
|
47
|
+
/**
|
|
48
|
+
* Page-existence predicate, typically backed by `getPageIndex().bySlug`.
|
|
49
|
+
* When provided, ground-truth slugs whose page no longer exists are dropped.
|
|
50
|
+
* Omit in unit tests.
|
|
51
|
+
*/
|
|
52
|
+
pageExists?: (slug: string) => boolean;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
export function extractOracleTurns(
|
|
56
|
+
db: DrizzleDb,
|
|
57
|
+
options: ExtractOracleOptions = {},
|
|
58
|
+
): OracleTurn[] {
|
|
59
|
+
const {
|
|
60
|
+
limit = 50,
|
|
61
|
+
strategy = "recent",
|
|
62
|
+
conversationIds,
|
|
63
|
+
includeNotInjected = false,
|
|
64
|
+
pageExists,
|
|
65
|
+
} = options;
|
|
66
|
+
|
|
67
|
+
const allowedStatuses = new Set<string>(["injected", "in_context"]);
|
|
68
|
+
if (includeNotInjected) allowedStatuses.add("not_injected");
|
|
69
|
+
|
|
70
|
+
const filters = [
|
|
71
|
+
eq(memoryV2ActivationLogs.mode, "router"),
|
|
72
|
+
isNotNull(memoryV2ActivationLogs.messageId),
|
|
73
|
+
];
|
|
74
|
+
if (conversationIds && conversationIds.length > 0) {
|
|
75
|
+
filters.push(
|
|
76
|
+
inArray(memoryV2ActivationLogs.conversationId, conversationIds),
|
|
77
|
+
);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const rows = db
|
|
81
|
+
.select({
|
|
82
|
+
conversationId: memoryV2ActivationLogs.conversationId,
|
|
83
|
+
messageId: memoryV2ActivationLogs.messageId,
|
|
84
|
+
turn: memoryV2ActivationLogs.turn,
|
|
85
|
+
conceptsJson: memoryV2ActivationLogs.conceptsJson,
|
|
86
|
+
configJson: memoryV2ActivationLogs.configJson,
|
|
87
|
+
createdAt: memoryV2ActivationLogs.createdAt,
|
|
88
|
+
})
|
|
89
|
+
.from(memoryV2ActivationLogs)
|
|
90
|
+
.where(and(...filters))
|
|
91
|
+
.orderBy(
|
|
92
|
+
strategy === "random"
|
|
93
|
+
? sql`RANDOM()`
|
|
94
|
+
: desc(memoryV2ActivationLogs.createdAt),
|
|
95
|
+
)
|
|
96
|
+
.limit(limit)
|
|
97
|
+
.all();
|
|
98
|
+
|
|
99
|
+
const turns: OracleTurn[] = [];
|
|
100
|
+
for (const row of rows) {
|
|
101
|
+
const messageId = row.messageId;
|
|
102
|
+
if (messageId == null) continue;
|
|
103
|
+
|
|
104
|
+
const anchor = db
|
|
105
|
+
.select({ createdAt: messages.createdAt })
|
|
106
|
+
.from(messages)
|
|
107
|
+
.where(eq(messages.id, messageId))
|
|
108
|
+
.limit(1)
|
|
109
|
+
.all();
|
|
110
|
+
const anchorRow = anchor[0];
|
|
111
|
+
if (!anchorRow) continue;
|
|
112
|
+
|
|
113
|
+
let concepts: MemoryV2ConceptRowRecord[];
|
|
114
|
+
let loggedConfig: MemoryV2ConfigSnapshot;
|
|
115
|
+
try {
|
|
116
|
+
concepts = JSON.parse(row.conceptsJson) as MemoryV2ConceptRowRecord[];
|
|
117
|
+
loggedConfig = JSON.parse(row.configJson) as MemoryV2ConfigSnapshot;
|
|
118
|
+
} catch {
|
|
119
|
+
continue;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
const seen = new Set<string>();
|
|
123
|
+
const groundTruthSlugs: string[] = [];
|
|
124
|
+
for (const concept of concepts) {
|
|
125
|
+
if (!allowedStatuses.has(concept.status)) continue;
|
|
126
|
+
if (pageExists && !pageExists(concept.slug)) continue;
|
|
127
|
+
if (seen.has(concept.slug)) continue;
|
|
128
|
+
seen.add(concept.slug);
|
|
129
|
+
groundTruthSlugs.push(concept.slug);
|
|
130
|
+
}
|
|
131
|
+
if (groundTruthSlugs.length === 0) continue;
|
|
132
|
+
|
|
133
|
+
turns.push({
|
|
134
|
+
conversationId: row.conversationId,
|
|
135
|
+
turn: row.turn,
|
|
136
|
+
anchorMessageId: messageId,
|
|
137
|
+
anchorCreatedAt: anchorRow.createdAt,
|
|
138
|
+
groundTruthSlugs,
|
|
139
|
+
loggedConfig,
|
|
140
|
+
createdAt: row.createdAt,
|
|
141
|
+
});
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
return turns;
|
|
145
|
+
}
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Input reconstruction — rebuild a retriever's per-turn inputs from telemetry.
|
|
3
|
+
*
|
|
4
|
+
* The activation log stores only outputs, so replaying a historical turn means
|
|
5
|
+
* reconstructing the inputs:
|
|
6
|
+
* - `recentTurnPairs`: the (assistant, user) pairs ending at the turn's user
|
|
7
|
+
* message, windowed by `historical_pairs` and extracted exactly as
|
|
8
|
+
* production does (mirrors `extractRecentTurnPairs` in
|
|
9
|
+
* `conversation-graph-memory.ts`).
|
|
10
|
+
* - `nowText`: read from current workspace files (`loadNowText`). NOT stored
|
|
11
|
+
* in the log, so it may differ from what the live turn saw —
|
|
12
|
+
* always-approximate; see `ReconstructionMeta.nowReconstructedFromCurrent`.
|
|
13
|
+
* - `priorEverInjected`: the union of injected / in_context slugs from earlier
|
|
14
|
+
* `mode='router'` logs in the same conversation (turn < target).
|
|
15
|
+
*
|
|
16
|
+
* The anchor is the turn's assistant reply; the messages the router saw are
|
|
17
|
+
* those strictly before it, so we fetch a bounded recent window up to the
|
|
18
|
+
* anchor's timestamp and cut at the anchor row.
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
import { and, asc, desc, eq, lt, lte } from "drizzle-orm";
|
|
22
|
+
|
|
23
|
+
import type { AssistantConfig } from "../../../config/types.js";
|
|
24
|
+
import type { ContentBlock } from "../../../providers/types.js";
|
|
25
|
+
import type { DrizzleDb } from "../../db-connection.js";
|
|
26
|
+
import type { MemoryV2ConceptRowRecord } from "../../memory-v2-activation-log-store.js";
|
|
27
|
+
import { memoryV2ActivationLogs, messages } from "../../schema.js";
|
|
28
|
+
import { loadNowText } from "../now-text.js";
|
|
29
|
+
import type { RouterTurnPair } from "../router.js";
|
|
30
|
+
import type { EverInjectedEntry } from "../types.js";
|
|
31
|
+
import type { OracleTurn } from "./oracle.js";
|
|
32
|
+
import type { RetrievalInput } from "./retriever.js";
|
|
33
|
+
|
|
34
|
+
export interface ReconstructionMeta {
|
|
35
|
+
/** `historical_pairs` window requested. */
|
|
36
|
+
windowPairs: number;
|
|
37
|
+
/** Pairs actually reconstructed (may be < window near conversation start). */
|
|
38
|
+
pairsReconstructed: number;
|
|
39
|
+
/** `priorEverInjected` entries reconstructed from earlier router logs. */
|
|
40
|
+
priorEverInjectedCount: number;
|
|
41
|
+
/**
|
|
42
|
+
* NOW text is read from current workspace files — it is not stored in the
|
|
43
|
+
* log and may differ from what the live turn saw. Always true; a recall gap
|
|
44
|
+
* is partly attributable to this unmeasured drift.
|
|
45
|
+
*/
|
|
46
|
+
nowReconstructedFromCurrent: true;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export interface ReconstructedInput {
|
|
50
|
+
input: RetrievalInput;
|
|
51
|
+
meta: ReconstructionMeta;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/** Minimal message shape for pair extraction. */
|
|
55
|
+
interface PlainMessage {
|
|
56
|
+
role: string;
|
|
57
|
+
content: ContentBlock[];
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Mirror of production `extractRecentTurnPairs`: walk messages newest-first,
|
|
62
|
+
* pair each user message with the preceding assistant reply, keep the last `k`
|
|
63
|
+
* pairs (oldest first). A leading user message with no prior assistant reply is
|
|
64
|
+
* emitted with an empty `assistantMessage`.
|
|
65
|
+
*/
|
|
66
|
+
function extractRecentTurnPairs(
|
|
67
|
+
msgs: readonly PlainMessage[],
|
|
68
|
+
k: number,
|
|
69
|
+
): RouterTurnPair[] {
|
|
70
|
+
const messageText = (msg: PlainMessage): string =>
|
|
71
|
+
msg.content
|
|
72
|
+
.filter(
|
|
73
|
+
(b): b is Extract<ContentBlock, { type: "text" }> => b.type === "text",
|
|
74
|
+
)
|
|
75
|
+
.map((b) => b.text)
|
|
76
|
+
.join(" ");
|
|
77
|
+
|
|
78
|
+
const pairs: RouterTurnPair[] = [];
|
|
79
|
+
let pendingUser: string | null = null;
|
|
80
|
+
for (let i = msgs.length - 1; i >= 0 && pairs.length < k; i--) {
|
|
81
|
+
const msg = msgs[i]!;
|
|
82
|
+
if (msg.role === "user" && pendingUser === null) {
|
|
83
|
+
pendingUser = messageText(msg);
|
|
84
|
+
} else if (msg.role === "assistant" && pendingUser !== null) {
|
|
85
|
+
pairs.unshift({
|
|
86
|
+
assistantMessage: messageText(msg),
|
|
87
|
+
userMessage: pendingUser,
|
|
88
|
+
});
|
|
89
|
+
pendingUser = null;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
if (pendingUser !== null && pairs.length < k) {
|
|
93
|
+
pairs.unshift({ assistantMessage: "", userMessage: pendingUser });
|
|
94
|
+
}
|
|
95
|
+
if (pairs.length === 0) {
|
|
96
|
+
pairs.push({ assistantMessage: "", userMessage: "" });
|
|
97
|
+
}
|
|
98
|
+
return pairs;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
function parseContent(raw: string): ContentBlock[] {
|
|
102
|
+
try {
|
|
103
|
+
const parsed = JSON.parse(raw);
|
|
104
|
+
return Array.isArray(parsed) ? (parsed as ContentBlock[]) : [];
|
|
105
|
+
} catch {
|
|
106
|
+
return [];
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
export async function reconstructInput(
|
|
111
|
+
db: DrizzleDb,
|
|
112
|
+
turn: OracleTurn,
|
|
113
|
+
config: AssistantConfig,
|
|
114
|
+
workspaceDir: string,
|
|
115
|
+
): Promise<ReconstructedInput | null> {
|
|
116
|
+
const windowPairs = config.memory.v2.router.historical_pairs;
|
|
117
|
+
|
|
118
|
+
// Fetch a bounded recent window up to the anchor's timestamp (newest first),
|
|
119
|
+
// then cut everything at/after the anchor reply. We only need the last
|
|
120
|
+
// `windowPairs` (assistant,user) pairs, so a small generous bound suffices
|
|
121
|
+
// even for very long conversations.
|
|
122
|
+
const fetchWindow = Math.max(20, windowPairs * 12);
|
|
123
|
+
const recent = db
|
|
124
|
+
.select({
|
|
125
|
+
id: messages.id,
|
|
126
|
+
role: messages.role,
|
|
127
|
+
content: messages.content,
|
|
128
|
+
})
|
|
129
|
+
.from(messages)
|
|
130
|
+
.where(
|
|
131
|
+
and(
|
|
132
|
+
eq(messages.conversationId, turn.conversationId),
|
|
133
|
+
lte(messages.createdAt, turn.anchorCreatedAt),
|
|
134
|
+
),
|
|
135
|
+
)
|
|
136
|
+
.orderBy(desc(messages.createdAt), desc(messages.id))
|
|
137
|
+
.limit(fetchWindow)
|
|
138
|
+
.all();
|
|
139
|
+
|
|
140
|
+
const anchorPos = recent.findIndex((m) => m.id === turn.anchorMessageId);
|
|
141
|
+
if (anchorPos < 0) return null;
|
|
142
|
+
const beforeAnchor = recent.slice(anchorPos + 1);
|
|
143
|
+
if (beforeAnchor.length === 0) return null;
|
|
144
|
+
|
|
145
|
+
const plain: PlainMessage[] = beforeAnchor
|
|
146
|
+
.slice()
|
|
147
|
+
.reverse()
|
|
148
|
+
.map((m) => ({ role: m.role, content: parseContent(m.content) }));
|
|
149
|
+
|
|
150
|
+
const recentTurnPairs = extractRecentTurnPairs(plain, windowPairs);
|
|
151
|
+
const priorEverInjected = reconstructPriorEverInjected(
|
|
152
|
+
db,
|
|
153
|
+
turn.conversationId,
|
|
154
|
+
turn.turn,
|
|
155
|
+
);
|
|
156
|
+
const nowText = await loadNowText(workspaceDir);
|
|
157
|
+
|
|
158
|
+
return {
|
|
159
|
+
input: {
|
|
160
|
+
workspaceDir,
|
|
161
|
+
recentTurnPairs,
|
|
162
|
+
nowText,
|
|
163
|
+
priorEverInjected,
|
|
164
|
+
config,
|
|
165
|
+
},
|
|
166
|
+
meta: {
|
|
167
|
+
windowPairs,
|
|
168
|
+
pairsReconstructed: recentTurnPairs.length,
|
|
169
|
+
priorEverInjectedCount: priorEverInjected.length,
|
|
170
|
+
nowReconstructedFromCurrent: true,
|
|
171
|
+
},
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
const PRIOR_STATUSES = new Set<string>(["injected", "in_context"]);
|
|
176
|
+
|
|
177
|
+
/**
|
|
178
|
+
* Union of slugs injected on earlier `mode='router'` turns in this conversation
|
|
179
|
+
* (turn < `currentTurn`), each tagged with the earliest turn it appeared on —
|
|
180
|
+
* the harness analogue of the running `everInjected` list production maintains.
|
|
181
|
+
*/
|
|
182
|
+
function reconstructPriorEverInjected(
|
|
183
|
+
db: DrizzleDb,
|
|
184
|
+
conversationId: string,
|
|
185
|
+
currentTurn: number,
|
|
186
|
+
): EverInjectedEntry[] {
|
|
187
|
+
const rows = db
|
|
188
|
+
.select({
|
|
189
|
+
turn: memoryV2ActivationLogs.turn,
|
|
190
|
+
conceptsJson: memoryV2ActivationLogs.conceptsJson,
|
|
191
|
+
})
|
|
192
|
+
.from(memoryV2ActivationLogs)
|
|
193
|
+
.where(
|
|
194
|
+
and(
|
|
195
|
+
eq(memoryV2ActivationLogs.conversationId, conversationId),
|
|
196
|
+
eq(memoryV2ActivationLogs.mode, "router"),
|
|
197
|
+
lt(memoryV2ActivationLogs.turn, currentTurn),
|
|
198
|
+
),
|
|
199
|
+
)
|
|
200
|
+
.orderBy(asc(memoryV2ActivationLogs.turn))
|
|
201
|
+
.all();
|
|
202
|
+
|
|
203
|
+
const firstTurnBySlug = new Map<string, number>();
|
|
204
|
+
for (const row of rows) {
|
|
205
|
+
let concepts: MemoryV2ConceptRowRecord[];
|
|
206
|
+
try {
|
|
207
|
+
concepts = JSON.parse(row.conceptsJson) as MemoryV2ConceptRowRecord[];
|
|
208
|
+
} catch {
|
|
209
|
+
continue;
|
|
210
|
+
}
|
|
211
|
+
for (const concept of concepts) {
|
|
212
|
+
if (!PRIOR_STATUSES.has(concept.status)) continue;
|
|
213
|
+
if (!firstTurnBySlug.has(concept.slug)) {
|
|
214
|
+
firstTurnBySlug.set(concept.slug, row.turn);
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
const entries: EverInjectedEntry[] = [];
|
|
220
|
+
firstTurnBySlug.forEach((turn, slug) => {
|
|
221
|
+
entries.push({ slug, turn });
|
|
222
|
+
});
|
|
223
|
+
return entries;
|
|
224
|
+
}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* The retriever seam for the memory comparison harness.
|
|
3
|
+
*
|
|
4
|
+
* A `Retriever` maps one turn's reconstructed context to a set of selected
|
|
5
|
+
* concept-page slugs. Multiple strategies (the production router, an
|
|
6
|
+
* alternative retrieval loop) implement this single interface, so the harness
|
|
7
|
+
* can run them over the same turns and diff their selections against the oracle
|
|
8
|
+
* (see `oracle.ts`). Offline only — nothing here runs in the live injection
|
|
9
|
+
* path.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import type { AssistantConfig } from "../../../config/types.js";
|
|
13
|
+
import type { RouterTurnPair } from "../router.js";
|
|
14
|
+
import type { EverInjectedEntry } from "../types.js";
|
|
15
|
+
import type { DescentTrace } from "./trace.js";
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Per-turn context a retriever needs, mirroring the live router's inputs
|
|
19
|
+
* (`RunRouterParams`). Reconstructed from historical telemetry by
|
|
20
|
+
* `reconstructInput` (see `replay-input.ts`).
|
|
21
|
+
*/
|
|
22
|
+
export interface RetrievalInput {
|
|
23
|
+
workspaceDir: string;
|
|
24
|
+
/**
|
|
25
|
+
* Recent (assistant, user) pairs, oldest first. The last entry's
|
|
26
|
+
* `userMessage` is the just-arrived turn being routed.
|
|
27
|
+
*/
|
|
28
|
+
recentTurnPairs: readonly RouterTurnPair[];
|
|
29
|
+
/** NOW context (essentials/threads/recent), verbatim. */
|
|
30
|
+
nowText: string;
|
|
31
|
+
/** Slugs already injected on prior turns. */
|
|
32
|
+
priorEverInjected: readonly EverInjectedEntry[];
|
|
33
|
+
config: AssistantConfig;
|
|
34
|
+
signal?: AbortSignal;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/** Optional cost accounting for a single retrieval. */
|
|
38
|
+
export interface RetrievalCost {
|
|
39
|
+
inputTokens?: number;
|
|
40
|
+
outputTokens?: number;
|
|
41
|
+
usd?: number;
|
|
42
|
+
ms?: number;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/** What a retriever returns for one turn. */
|
|
46
|
+
export interface RetrievalOutput {
|
|
47
|
+
/** Selected page slugs, in the retriever's own ranked order. */
|
|
48
|
+
selectedSlugs: string[];
|
|
49
|
+
/**
|
|
50
|
+
* Per-slug provenance / lane label, retriever-defined — router tiers
|
|
51
|
+
* (`tier1`, `tier3:0`, …) for the current router, or loop lanes (`sparse`,
|
|
52
|
+
* `dense`, `tree`, `edge`) for the future loop. Drives per-lane attribution
|
|
53
|
+
* in `metrics.ts`.
|
|
54
|
+
*/
|
|
55
|
+
sourceBySlug: ReadonlyMap<string, string>;
|
|
56
|
+
/**
|
|
57
|
+
* Loop-only descent trace. Tier-based retrievers (the current router) have
|
|
58
|
+
* no tree walk and leave this `undefined`; renderers show "(no descent
|
|
59
|
+
* trace)".
|
|
60
|
+
*/
|
|
61
|
+
trace?: DescentTrace;
|
|
62
|
+
cost?: RetrievalCost;
|
|
63
|
+
/** Non-null when the retriever could not produce a usable selection. */
|
|
64
|
+
failureReason?: string | null;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* A named retrieval strategy. Implementations must not mutate production state
|
|
69
|
+
* — the harness runs them offline over historical turns.
|
|
70
|
+
*/
|
|
71
|
+
export interface Retriever {
|
|
72
|
+
readonly name: string;
|
|
73
|
+
retrieve(input: RetrievalInput): Promise<RetrievalOutput>;
|
|
74
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Router retriever — the current production router (`runRouter`) adapted to the
|
|
3
|
+
* harness `Retriever` interface.
|
|
4
|
+
*
|
|
5
|
+
* The union cap is left ON (no `disableUnionCap`) so the selection matches what
|
|
6
|
+
* production would actually inject — the self-test grades the router against
|
|
7
|
+
* its own injected ground truth.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import type { DrizzleDb } from "../../db-connection.js";
|
|
11
|
+
import { runRouter } from "../router.js";
|
|
12
|
+
import type {
|
|
13
|
+
RetrievalInput,
|
|
14
|
+
RetrievalOutput,
|
|
15
|
+
Retriever,
|
|
16
|
+
} from "./retriever.js";
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* @param database optional handle for tier-2 EMA scoring, forwarded to
|
|
20
|
+
* `runRouter`. Omit to exercise only the tier-1 / tier-3 paths (as the router's
|
|
21
|
+
* own tests do).
|
|
22
|
+
*/
|
|
23
|
+
export function createRouterRetriever(database?: DrizzleDb): Retriever {
|
|
24
|
+
return {
|
|
25
|
+
name: "router",
|
|
26
|
+
async retrieve(input: RetrievalInput): Promise<RetrievalOutput> {
|
|
27
|
+
const result = await runRouter({
|
|
28
|
+
workspaceDir: input.workspaceDir,
|
|
29
|
+
recentTurnPairs: input.recentTurnPairs,
|
|
30
|
+
nowText: input.nowText,
|
|
31
|
+
priorEverInjected: input.priorEverInjected,
|
|
32
|
+
config: input.config,
|
|
33
|
+
...(input.signal ? { signal: input.signal } : {}),
|
|
34
|
+
...(database ? { database } : {}),
|
|
35
|
+
});
|
|
36
|
+
return {
|
|
37
|
+
selectedSlugs: result.selectedSlugs,
|
|
38
|
+
sourceBySlug: result.sourceBySlug,
|
|
39
|
+
failureReason: result.failureReason,
|
|
40
|
+
};
|
|
41
|
+
},
|
|
42
|
+
};
|
|
43
|
+
}
|