@vellumai/assistant 0.6.5 → 0.6.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +9 -1
- package/ARCHITECTURE.md +15 -17
- package/Dockerfile +6 -4
- package/__tests__/permissions/gateway-threshold-reader.test.ts +283 -0
- package/docs/architecture/integrations.md +32 -39
- package/docs/architecture/memory.md +25 -30
- package/docs/architecture/security.md +7 -6
- package/docs/browser-use-architecture-phase2.md +63 -20
- package/docs/plugins.md +761 -0
- package/examples/plugins/echo/README.md +132 -0
- package/examples/plugins/echo/package.json +17 -0
- package/examples/plugins/echo/register.ts +187 -0
- package/node_modules/@vellumai/egress-proxy/src/types.ts +19 -0
- package/openapi.yaml +212 -68
- package/package.json +1 -1
- package/src/__tests__/app-compiler.test.ts +57 -0
- package/src/__tests__/approval-cascade.test.ts +7 -2
- package/src/__tests__/auto-analysis-end-to-end.test.ts +1 -0
- package/src/__tests__/avatar-generator.test.ts +4 -2
- package/src/__tests__/bundled-asset.test.ts +6 -6
- package/src/__tests__/catalog-cache.test.ts +69 -0
- package/src/__tests__/checker.test.ts +459 -171
- package/src/__tests__/circuit-breaker-pipeline.test.ts +406 -0
- package/src/__tests__/compaction-events.test.ts +501 -0
- package/src/__tests__/compaction-pipeline.test.ts +210 -0
- package/src/__tests__/compaction-strip-metadata-clear.test.ts +181 -0
- package/src/__tests__/compaction-timeout-recovery.test.ts +262 -0
- package/src/__tests__/config-model-image-provider.test.ts +110 -0
- package/src/__tests__/config-schema.test.ts +22 -9
- package/src/__tests__/config-watcher-cleanup-throttle.test.ts +0 -4
- package/src/__tests__/contacts-tools.test.ts +26 -0
- package/src/__tests__/context-overflow-policy.test.ts +7 -7
- package/src/__tests__/context-window-manager.test.ts +355 -4
- package/src/__tests__/conversation-abort-tool-results.test.ts +4 -1
- package/src/__tests__/conversation-agent-loop-overflow.test.ts +26 -30
- package/src/__tests__/conversation-agent-loop.test.ts +30 -141
- package/src/__tests__/conversation-confirmation-signals.test.ts +6 -1
- package/src/__tests__/conversation-history-web-search.test.ts +1 -0
- package/src/__tests__/conversation-init.benchmark.test.ts +2 -16
- package/src/__tests__/conversation-pairing.test.ts +174 -10
- package/src/__tests__/conversation-pre-run-repair.test.ts +4 -1
- package/src/__tests__/conversation-process-callsite.test.ts +3 -0
- package/src/__tests__/conversation-provider-retry-repair.test.ts +16 -7
- package/src/__tests__/conversation-queue.test.ts +29 -14
- package/src/__tests__/conversation-routes-disk-view.test.ts +7 -6
- package/src/__tests__/conversation-runtime-assembly.test.ts +155 -110
- package/src/__tests__/conversation-runtime-workspace.test.ts +23 -38
- package/src/__tests__/conversation-seed-composer.test.ts +2 -2
- package/src/__tests__/conversation-slash-queue.test.ts +7 -2
- package/src/__tests__/conversation-slash-unknown.test.ts +25 -2
- package/src/__tests__/conversation-speed-override.test.ts +6 -1
- package/src/__tests__/conversation-title-service.test.ts +116 -0
- package/src/__tests__/conversation-tool-setup-app-refresh.test.ts +41 -2
- package/src/__tests__/conversation-usage.test.ts +1 -1
- package/src/__tests__/conversation-workspace-cache-state.test.ts +4 -1
- package/src/__tests__/conversation-workspace-injection.test.ts +3 -0
- package/src/__tests__/conversation-workspace-tool-tracking.test.ts +4 -1
- package/src/__tests__/credential-health-service.test.ts +78 -9
- package/src/__tests__/credential-security-invariants.test.ts +2 -2
- package/src/__tests__/db-schedule-syntax-migration.test.ts +1 -0
- package/src/__tests__/empty-response-pipeline.test.ts +305 -0
- package/src/__tests__/extension-id-sync-guard.test.ts +3 -3
- package/src/__tests__/first-greeting.test.ts +247 -5
- package/src/__tests__/headless-browser-mode.test.ts +57 -0
- package/src/__tests__/history-repair-pipeline.test.ts +399 -0
- package/src/__tests__/host-browser-e2e-cloud.test.ts +307 -0
- package/src/__tests__/host-browser-e2e-self-hosted.test.ts +3 -3
- package/src/__tests__/host-proxy-interface.test.ts +36 -2
- package/src/__tests__/image-credentials.test.ts +137 -0
- package/src/__tests__/image-service-dispatcher.test.ts +186 -0
- package/src/__tests__/injector-chain.test.ts +526 -0
- package/src/__tests__/intent-routing.test.ts +0 -26
- package/src/__tests__/llm-call-pipeline.test.ts +285 -0
- package/src/__tests__/llm-schema.test.ts +1 -1
- package/src/__tests__/media-generate-image.test.ts +119 -13
- package/src/__tests__/memory-retrieval-pipeline.test.ts +401 -0
- package/src/__tests__/memory-upsert-concurrency.test.ts +1 -0
- package/src/__tests__/migration-import-from-url.test.ts +5 -68
- package/src/__tests__/model-intents.test.ts +4 -2
- package/src/__tests__/notification-broadcaster.test.ts +3 -3
- package/src/__tests__/notification-decision-strategy.test.ts +0 -11
- package/src/__tests__/notification-schedule-notify-dedup.test.ts +108 -0
- package/src/__tests__/oauth-apps-routes.test.ts +1 -1
- package/src/__tests__/oauth-cli.test.ts +14 -12
- package/src/__tests__/oauth-connect-orchestrator.test.ts +4 -13
- package/src/__tests__/oauth-provider-serializer.test.ts +6 -4
- package/src/__tests__/oauth-provider-visibility.test.ts +3 -5
- package/src/__tests__/oauth-providers-routes.test.ts +3 -2
- package/src/__tests__/oauth-store.test.ts +41 -76
- package/src/__tests__/onboarding-template-contract.test.ts +16 -64
- package/src/__tests__/openai-image-service.test.ts +368 -0
- package/src/__tests__/overflow-reduce-pipeline.test.ts +676 -0
- package/src/__tests__/permission-checker-host-gate.test.ts +0 -24
- package/src/__tests__/persist-onboarding-artifacts.test.ts +266 -0
- package/src/__tests__/persistence-pipeline.test.ts +377 -0
- package/src/__tests__/pipeline-runner.test.ts +565 -0
- package/src/__tests__/platform.test.ts +5 -2
- package/src/__tests__/plugin-bootstrap.test.ts +483 -0
- package/src/__tests__/plugin-registry.test.ts +273 -0
- package/src/__tests__/plugin-route-contribution.test.ts +288 -0
- package/src/__tests__/plugin-skill-contribution.test.ts +367 -0
- package/src/__tests__/plugin-tool-contribution.test.ts +286 -0
- package/src/__tests__/plugin-types.test.ts +320 -0
- package/src/__tests__/pricing.test.ts +44 -12
- package/src/__tests__/proxy-approval-callback.test.ts +69 -8
- package/src/__tests__/reaction-persistence.test.ts +1 -0
- package/src/__tests__/regenerate-fire-and-forget-trace.test.ts +1 -0
- package/src/__tests__/registry.test.ts +0 -2
- package/src/__tests__/schedule-routes.test.ts +131 -1
- package/src/__tests__/scheduler-recurrence.test.ts +14 -70
- package/src/__tests__/scheduler-reuse-conversation.test.ts +10 -50
- package/src/__tests__/secret-detection-handler.test.ts +0 -10
- package/src/__tests__/shell-identity.test.ts +0 -134
- package/src/__tests__/suggestion-routes.test.ts +103 -4
- package/src/__tests__/task-memory-cleanup.test.ts +1 -0
- package/src/__tests__/task-scheduler.test.ts +3 -15
- package/src/__tests__/test-preload.ts +11 -0
- package/src/__tests__/title-generate-pipeline.test.ts +224 -0
- package/src/__tests__/token-estimate-pipeline.test.ts +431 -0
- package/src/__tests__/tool-error-pipeline.test.ts +244 -0
- package/src/__tests__/tool-execute-pipeline.test.ts +431 -0
- package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +0 -6
- package/src/__tests__/tool-executor-shell-integration.test.ts +7 -10
- package/src/__tests__/tool-executor.test.ts +141 -0
- package/src/__tests__/tool-result-truncate-pipeline.test.ts +356 -0
- package/src/__tests__/tool-result-truncation.test.ts +0 -110
- package/src/__tests__/user-plugin-loader.test.ts +191 -0
- package/src/__tests__/workspace-migration-046-seed-conversation-starters-callsite.test.ts +185 -0
- package/src/__tests__/workspace-migration-049-release-notes-default-sonnet.test.ts +100 -0
- package/src/__tests__/workspace-migration-050-seed-main-agent-opus-callsite.test.ts +171 -0
- package/src/__tests__/workspace-migration-051-seed-conversation-summarization-callsite.test.ts +252 -0
- package/src/__tests__/workspace-migration-remove-hooks.test.ts +99 -0
- package/src/__tests__/workspace-policy.test.ts +21 -3
- package/src/agent/loop.ts +340 -102
- package/src/approvals/__tests__/guardian-feed-event.test.ts +304 -0
- package/src/approvals/guardian-request-resolvers.ts +80 -0
- package/src/backup/__tests__/backup-worker.test.ts +2 -13
- package/src/backup/backup-worker.ts +3 -15
- package/src/bundler/app-compiler.ts +84 -1
- package/src/calls/call-state.ts +2 -2
- package/src/channels/__tests__/types.test.ts +3 -3
- package/src/channels/types.ts +6 -4
- package/src/cli/__tests__/notifications.test.ts +87 -211
- package/src/cli/commands/__tests__/backup.test.ts +1 -1
- package/src/cli/commands/__tests__/image-generation.test.ts +255 -35
- package/src/cli/commands/__tests__/inference-send.test.ts +12 -0
- package/src/cli/commands/__tests__/tts-synthesize.test.ts +12 -0
- package/src/cli/commands/backup.ts +2 -2
- package/src/cli/commands/clients.ts +138 -0
- package/src/cli/commands/completions.ts +2 -9
- package/src/cli/commands/conversations.ts +55 -7
- package/src/cli/commands/image-generation.ts +33 -34
- package/src/cli/commands/notifications.ts +68 -103
- package/src/cli/commands/oauth/__tests__/providers-register.test.ts +1 -1
- package/src/cli/commands/oauth/__tests__/providers-update.test.ts +1 -1
- package/src/cli/commands/oauth/connect.ts +2 -2
- package/src/cli/commands/oauth/providers.ts +176 -8
- package/src/cli/commands/oauth/status.ts +46 -36
- package/src/cli/commands/skills.ts +3 -4
- package/src/cli/program.ts +25 -29
- package/src/config/__tests__/backup-schema.test.ts +7 -2
- package/src/config/bundled-skills/app-builder/SKILL.md +2 -2
- package/src/config/bundled-skills/app-builder/references/WIDGETS.md +10 -10
- package/src/config/bundled-skills/contacts/tools/contact-merge.ts +66 -87
- package/src/config/bundled-skills/contacts/tools/contact-search.ts +28 -51
- package/src/config/bundled-skills/contacts/tools/contact-upsert.ts +22 -40
- package/src/config/bundled-skills/image-studio/SKILL.md +2 -1
- package/src/config/bundled-skills/image-studio/TOOLS.json +2 -1
- package/src/config/bundled-skills/image-studio/tools/media-generate-image.ts +23 -39
- package/src/config/bundled-skills/messaging/SKILL.md +3 -3
- package/src/config/bundled-skills/messaging/tools/__tests__/messaging-feed-events.test.ts +207 -0
- package/src/config/bundled-skills/messaging/tools/messaging-archive-by-sender.ts +12 -0
- package/src/config/bundled-skills/messaging/tools/messaging-send.ts +58 -0
- package/src/config/bundled-skills/schedule/SKILL.md +8 -3
- package/src/config/bundled-skills/schedule/TOOLS.json +15 -7
- package/src/config/bundled-skills/schedule/references/SCRIPT_MODE_PATTERNS.md +59 -0
- package/src/config/bundled-tool-registry.ts +0 -15
- package/src/config/feature-flag-registry.json +17 -1
- package/src/config/schema.ts +19 -0
- package/src/config/schemas/backup.ts +1 -1
- package/src/config/schemas/conversations.ts +16 -0
- package/src/config/schemas/llm.ts +2 -3
- package/src/config/schemas/security.ts +6 -6
- package/src/config/schemas/tts.ts +11 -0
- package/src/config/skill-state.ts +6 -2
- package/src/config/skills.ts +94 -5
- package/src/context/__tests__/compact-prompt.test.ts +27 -9
- package/src/context/prompts/compact.md +26 -12
- package/src/context/tool-result-truncation.ts +3 -63
- package/src/context/window-manager.ts +190 -16
- package/src/credential-health/credential-health-service.ts +19 -6
- package/src/daemon/__tests__/conversation-feed-event.test.ts +317 -0
- package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +4 -12
- package/src/daemon/__tests__/conversation-tool-setup.test.ts +14 -15
- package/src/daemon/config-watcher.ts +0 -2
- package/src/daemon/context-overflow-policy.ts +4 -13
- package/src/daemon/conversation-agent-loop-handlers.ts +83 -22
- package/src/daemon/conversation-agent-loop.ts +984 -683
- package/src/daemon/conversation-history.ts +10 -19
- package/src/daemon/conversation-lifecycle.ts +37 -19
- package/src/daemon/conversation-notifiers.ts +2 -110
- package/src/daemon/conversation-process.ts +14 -7
- package/src/daemon/conversation-runtime-assembly.ts +532 -411
- package/src/daemon/conversation-tool-setup.ts +41 -4
- package/src/daemon/conversation.ts +80 -35
- package/src/daemon/external-plugins-bootstrap.ts +478 -0
- package/src/daemon/first-greeting.ts +191 -14
- package/src/daemon/handlers/config-model.ts +11 -0
- package/src/daemon/handlers/skills.ts +5 -1
- package/src/daemon/lifecycle.ts +33 -68
- package/src/daemon/message-types/computer-use.ts +2 -34
- package/src/daemon/message-types/conversations.ts +49 -0
- package/src/daemon/message-types/messages.ts +12 -0
- package/src/daemon/server.ts +5 -3
- package/src/daemon/shutdown-handlers.ts +2 -12
- package/src/daemon/tool-side-effects.ts +14 -56
- package/src/heartbeat/__tests__/heartbeat-feed-event.test.ts +160 -0
- package/src/heartbeat/heartbeat-service.ts +24 -1
- package/src/home/__tests__/feed-population-integration.test.ts +312 -0
- package/src/home/emit-feed-event.ts +7 -0
- package/src/home/feed-types.ts +41 -2
- package/src/home/rewrite-command-preview.ts +66 -0
- package/src/ipc/__tests__/socket-path.test.ts +11 -50
- package/src/ipc/cli-client.ts +1 -1
- package/src/ipc/cli-server.ts +3 -3
- package/src/ipc/gateway-client.ts +4 -1
- package/src/ipc/routes/browser-context.ts +2 -0
- package/src/ipc/routes/browser.ts +1 -0
- package/src/ipc/routes/get-contact.ts +16 -0
- package/src/ipc/routes/index.ts +14 -0
- package/src/ipc/routes/list-clients.ts +31 -0
- package/src/ipc/routes/merge-contacts.ts +17 -0
- package/src/ipc/routes/notification.ts +133 -0
- package/src/ipc/routes/rename-conversation.ts +59 -0
- package/src/ipc/routes/search-contacts.ts +19 -0
- package/src/ipc/routes/upsert-contact.ts +25 -0
- package/src/ipc/socket-path.ts +14 -38
- package/src/media/app-icon-generator.ts +23 -46
- package/src/media/avatar-router.ts +26 -41
- package/src/media/gemini-image-service.ts +8 -41
- package/src/media/image-credentials.ts +73 -0
- package/src/media/image-service.ts +85 -0
- package/src/media/openai-image-service.ts +131 -0
- package/src/media/types.ts +46 -0
- package/src/memory/conversation-crud.ts +48 -18
- package/src/memory/conversation-queries.ts +57 -4
- package/src/memory/conversation-title-service.ts +25 -0
- package/src/memory/db-init.ts +8 -0
- package/src/memory/embedding-gemini.test.ts +41 -2
- package/src/memory/embedding-gemini.ts +6 -1
- package/src/memory/graph/bootstrap.test.ts +282 -0
- package/src/memory/graph/bootstrap.ts +8 -5
- package/src/memory/graph/extraction.ts +10 -2
- package/src/memory/graph/graph-search.test.ts +1 -0
- package/src/memory/graph/inspect.ts +2 -2
- package/src/memory/graph/retriever.ts +10 -3
- package/src/memory/migrations/041-approval-prompt-ts-tracker.ts +26 -0
- package/src/memory/migrations/149-oauth-tables.ts +1 -0
- package/src/memory/migrations/223-schedule-script-column.ts +11 -0
- package/src/memory/migrations/224-oauth-providers-managed-service-is-paid.ts +24 -0
- package/src/memory/migrations/225-oauth-providers-available-scopes.ts +13 -0
- package/src/memory/migrations/index.ts +4 -0
- package/src/memory/pkb/pkb-index.test.ts +1 -0
- package/src/memory/pkb/pkb-reconcile.test.ts +1 -0
- package/src/memory/pkb/pkb-search.test.ts +65 -4
- package/src/memory/pkb/pkb-search.ts +40 -18
- package/src/memory/qdrant-client.test.ts +60 -0
- package/src/memory/qdrant-client.ts +25 -0
- package/src/memory/schema/infrastructure.ts +1 -0
- package/src/memory/schema/oauth.ts +4 -1
- package/src/messaging/providers/slack/render-transcript.test.ts +77 -29
- package/src/messaging/providers/slack/render-transcript.ts +58 -0
- package/src/notifications/conversation-pairing.ts +78 -19
- package/src/notifications/copy-composer.ts +0 -5
- package/src/notifications/emit-signal.ts +1 -1
- package/src/notifications/signal.ts +1 -2
- package/src/oauth/AGENTS.md +1 -1
- package/src/oauth/__tests__/identity-verifier.test.ts +2 -1
- package/src/oauth/connect-orchestrator.ts +8 -34
- package/src/oauth/connect-types.ts +6 -10
- package/src/oauth/manual-token-connection.ts +23 -0
- package/src/oauth/oauth-store.ts +30 -14
- package/src/oauth/provider-serializer.ts +6 -1
- package/src/oauth/seed-providers.ts +56 -108
- package/src/outbound-proxy/http-forwarder.ts +9 -0
- package/src/permissions/approval-policy.test.ts +293 -18
- package/src/permissions/approval-policy.ts +110 -58
- package/src/permissions/arg-parser.test.ts +161 -0
- package/src/permissions/arg-parser.ts +141 -0
- package/src/permissions/bash-risk-classifier.test.ts +414 -2
- package/src/permissions/bash-risk-classifier.ts +303 -60
- package/src/permissions/checker.ts +157 -29
- package/src/permissions/command-registry.test.ts +239 -0
- package/src/permissions/command-registry.ts +234 -54
- package/src/permissions/defaults.ts +5 -4
- package/src/permissions/gateway-threshold-reader.ts +196 -0
- package/src/permissions/prompter.ts +4 -0
- package/src/permissions/risk-types.ts +61 -4
- package/src/permissions/schedule-risk-classifier.test.ts +129 -0
- package/src/permissions/schedule-risk-classifier.ts +85 -0
- package/src/permissions/shell-identity.ts +2 -42
- package/src/permissions/types.ts +2 -0
- package/src/permissions/workspace-policy.ts +8 -3
- package/src/plugins/defaults/circuit-breaker.ts +146 -0
- package/src/plugins/defaults/compaction.ts +145 -0
- package/src/plugins/defaults/empty-response.ts +126 -0
- package/src/plugins/defaults/history-repair.ts +85 -0
- package/src/plugins/defaults/index.ts +116 -0
- package/src/plugins/defaults/injectors.ts +491 -0
- package/src/plugins/defaults/llm-call.ts +82 -0
- package/src/plugins/defaults/memory-retrieval.ts +226 -0
- package/src/plugins/defaults/overflow-reduce.ts +181 -0
- package/src/plugins/defaults/persistence.ts +129 -0
- package/src/plugins/defaults/title-generate.ts +95 -0
- package/src/plugins/defaults/token-estimate.ts +104 -0
- package/src/plugins/defaults/tool-error.ts +126 -0
- package/src/plugins/defaults/tool-execute.ts +89 -0
- package/src/plugins/defaults/tool-result-truncate.ts +88 -0
- package/src/plugins/pipeline.ts +316 -0
- package/src/plugins/plugin-skill-contributions.ts +292 -0
- package/src/plugins/registry.ts +241 -0
- package/src/plugins/types.ts +1134 -0
- package/src/plugins/user-loader.ts +177 -0
- package/src/prompts/templates/BOOTSTRAP.md +27 -77
- package/src/providers/model-catalog.ts +52 -29
- package/src/providers/model-intents.ts +1 -1
- package/src/providers/openrouter/client.ts +5 -1
- package/src/providers/speech-to-text/deepgram-realtime.test.ts +61 -0
- package/src/providers/speech-to-text/deepgram-realtime.ts +57 -0
- package/src/providers/speech-to-text/xai-realtime.test.ts +72 -4
- package/src/providers/speech-to-text/xai-realtime.ts +39 -14
- package/src/runtime/AGENTS.md +25 -16
- package/src/runtime/__tests__/browser-extension-pair-routes.test.ts +3 -3
- package/src/runtime/__tests__/client-registry.test.ts +293 -0
- package/src/runtime/client-registry.ts +261 -0
- package/src/runtime/http-server.ts +77 -8
- package/src/runtime/http-types.ts +0 -2
- package/src/runtime/migrations/vbundle-builder.ts +1 -22
- package/src/runtime/routes/approval-prompt-ts-tracker.ts +51 -31
- package/src/runtime/routes/approval-routes.ts +17 -0
- package/src/runtime/routes/browser-extension-pair-routes.ts +27 -8
- package/src/runtime/routes/conversation-routes.ts +223 -116
- package/src/runtime/routes/inbound-message-handler.ts +88 -13
- package/src/runtime/routes/memory-item-routes.test.ts +1 -0
- package/src/runtime/routes/migration-routes.ts +0 -3
- package/src/runtime/routes/playground/__tests__/force-compact.test.ts +284 -0
- package/src/runtime/routes/playground/__tests__/guard.test.ts +80 -0
- package/src/runtime/routes/playground/__tests__/inject-failures.test.ts +294 -0
- package/src/runtime/routes/playground/__tests__/reset-circuit.test.ts +271 -0
- package/src/runtime/routes/playground/__tests__/seed-conversation.test.ts +202 -0
- package/src/runtime/routes/playground/__tests__/seeded-conversations.test.ts +309 -0
- package/src/runtime/routes/playground/__tests__/state.test.ts +224 -0
- package/src/runtime/routes/playground/conversation-not-found.ts +29 -0
- package/src/runtime/routes/playground/deps.ts +56 -0
- package/src/runtime/routes/playground/force-compact.ts +73 -0
- package/src/runtime/routes/playground/guard.ts +37 -0
- package/src/runtime/routes/playground/index.ts +28 -0
- package/src/runtime/routes/playground/inject-failures.ts +159 -0
- package/src/runtime/routes/playground/reset-circuit.ts +115 -0
- package/src/runtime/routes/playground/seed-conversation.ts +139 -0
- package/src/runtime/routes/playground/seeded-conversations.ts +78 -0
- package/src/runtime/routes/playground/state.ts +78 -0
- package/src/runtime/routes/schedule-routes.ts +89 -8
- package/src/runtime/skill-route-registry.ts +75 -15
- package/src/schedule/run-script.ts +68 -0
- package/src/schedule/schedule-store.ts +7 -1
- package/src/schedule/scheduler.ts +48 -8
- package/src/skills/catalog-cache.ts +12 -5
- package/src/tools/browser/__tests__/browser-status.test.ts +189 -0
- package/src/tools/browser/browser-execution.ts +88 -19
- package/src/tools/browser/cdp-client/__tests__/extension-cdp-client.test.ts +230 -0
- package/src/tools/browser/cdp-client/__tests__/factory.test.ts +146 -3
- package/src/tools/browser/cdp-client/extension-cdp-client.ts +54 -3
- package/src/tools/browser/cdp-client/factory.ts +15 -4
- package/src/tools/executor.ts +126 -74
- package/src/tools/network/script-proxy/session-manager.ts +37 -1
- package/src/tools/permission-checker.ts +98 -49
- package/src/tools/policy-context.ts +4 -0
- package/src/tools/registry.ts +140 -3
- package/src/tools/schedule/create.ts +23 -8
- package/src/tools/schedule/update.ts +3 -1
- package/src/tools/secret-detection-handler.ts +0 -51
- package/src/tools/system/avatar-generator.ts +6 -2
- package/src/tools/types.ts +28 -2
- package/src/util/platform.ts +7 -2
- package/src/util/pricing.ts +26 -3
- package/src/workspace/migrations/006-services-config.ts +2 -4
- package/src/workspace/migrations/022-move-hooks-to-workspace.ts +2 -3
- package/src/workspace/migrations/041-backfill-google-gmail-settings-scope.ts +3 -4
- package/src/workspace/migrations/046-seed-conversation-starters-callsite.ts +108 -0
- package/src/workspace/migrations/047-remove-watch-callsites.ts +54 -0
- package/src/workspace/migrations/048-remove-workspace-hooks.ts +81 -0
- package/src/workspace/migrations/049-release-notes-default-sonnet.ts +80 -0
- package/src/workspace/migrations/050-seed-main-agent-opus-callsite.ts +86 -0
- package/src/workspace/migrations/051-seed-conversation-summarization-callsite.ts +128 -0
- package/src/workspace/migrations/registry.ts +12 -0
- package/tsconfig.json +1 -1
- package/hook-templates/debug-prompt-logger/hook.json +0 -7
- package/hook-templates/debug-prompt-logger/run.sh +0 -66
- package/src/__tests__/compaction-circuit-breaker.test.ts +0 -336
- package/src/__tests__/context-overflow-approval.test.ts +0 -156
- package/src/__tests__/hooks-blocking.test.ts +0 -178
- package/src/__tests__/hooks-cli.test.ts +0 -182
- package/src/__tests__/hooks-config.test.ts +0 -108
- package/src/__tests__/hooks-discovery.test.ts +0 -211
- package/src/__tests__/hooks-integration.test.ts +0 -196
- package/src/__tests__/hooks-manager.test.ts +0 -226
- package/src/__tests__/hooks-runner.test.ts +0 -175
- package/src/__tests__/hooks-settings.test.ts +0 -160
- package/src/__tests__/hooks-templates.test.ts +0 -169
- package/src/__tests__/hooks-ts-runner.test.ts +0 -170
- package/src/__tests__/hooks-watch.test.ts +0 -112
- package/src/__tests__/notification-schedule-dedup.test.ts +0 -213
- package/src/__tests__/oauth-scope-policy.test.ts +0 -180
- package/src/__tests__/send-notification-tool.test.ts +0 -83
- package/src/cli/commands/shotgun.ts +0 -266
- package/src/config/bundled-skills/conversations/SKILL.md +0 -20
- package/src/config/bundled-skills/conversations/TOOLS.json +0 -23
- package/src/config/bundled-skills/conversations/tools/rename-conversation.ts +0 -88
- package/src/config/bundled-skills/heartbeat/SKILL.md +0 -43
- package/src/config/bundled-skills/notifications/SKILL.md +0 -40
- package/src/config/bundled-skills/notifications/TOOLS.json +0 -80
- package/src/config/bundled-skills/notifications/tools/send-notification.ts +0 -152
- package/src/config/bundled-skills/notifications/tools/shared.ts +0 -13
- package/src/config/bundled-skills/screen-watch/SKILL.md +0 -27
- package/src/config/bundled-skills/screen-watch/TOOLS.json +0 -35
- package/src/config/bundled-skills/screen-watch/tools/start-screen-watch.ts +0 -12
- package/src/config/bundled-skills/skills-catalog/SKILL.md +0 -84
- package/src/daemon/context-overflow-approval.ts +0 -52
- package/src/daemon/watch-handler.ts +0 -399
- package/src/hooks/cli.ts +0 -253
- package/src/hooks/config.ts +0 -100
- package/src/hooks/discovery.ts +0 -135
- package/src/hooks/manager.ts +0 -179
- package/src/hooks/runner.ts +0 -117
- package/src/hooks/templates.ts +0 -77
- package/src/hooks/types.ts +0 -75
- package/src/oauth/scope-policy.ts +0 -89
- package/src/runtime/gateway-internal-client.ts +0 -94
- package/src/runtime/routes/watch-routes.ts +0 -156
- package/src/signals/shotgun.ts +0 -203
- package/src/tools/watch/screen-watch.ts +0 -144
- package/src/tools/watch/watch-state.ts +0 -142
|
@@ -0,0 +1,431 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for the `tokenEstimate` plugin pipeline (PR 22 of the
|
|
3
|
+
* agent-plugin-system plan).
|
|
4
|
+
*
|
|
5
|
+
* Covers:
|
|
6
|
+
* - The default plugin's terminal middleware matches
|
|
7
|
+
* {@link estimatePromptTokensRaw} output exactly across a set of golden
|
|
8
|
+
* inputs (empty history, text-only, tools, provider-specific image sizing).
|
|
9
|
+
* - Running the pipeline end-to-end with the default registered produces
|
|
10
|
+
* the same numeric result as calling `estimatePromptTokensRaw` directly.
|
|
11
|
+
* - A custom plugin that short-circuits the chain can override the default,
|
|
12
|
+
* proving the extension point works.
|
|
13
|
+
*
|
|
14
|
+
* These tests exercise the registry + runner directly. They do not touch
|
|
15
|
+
* `bootstrapPlugins` — the default registration path is covered by the
|
|
16
|
+
* bootstrap suite.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
|
|
20
|
+
|
|
21
|
+
import {
|
|
22
|
+
estimatePromptTokensRaw,
|
|
23
|
+
estimateToolsTokens,
|
|
24
|
+
} from "../context/token-estimator.js";
|
|
25
|
+
import type { TrustContext } from "../daemon/conversation-runtime-assembly.js";
|
|
26
|
+
import {
|
|
27
|
+
defaultTokenEstimatePlugin,
|
|
28
|
+
defaultTokenEstimateTerminal,
|
|
29
|
+
} from "../plugins/defaults/token-estimate.js";
|
|
30
|
+
import { DEFAULT_TIMEOUTS, runPipeline } from "../plugins/pipeline.js";
|
|
31
|
+
import {
|
|
32
|
+
getMiddlewaresFor,
|
|
33
|
+
registerPlugin,
|
|
34
|
+
resetPluginRegistryForTests,
|
|
35
|
+
} from "../plugins/registry.js";
|
|
36
|
+
import type {
|
|
37
|
+
EstimateArgs,
|
|
38
|
+
EstimateResult,
|
|
39
|
+
Middleware,
|
|
40
|
+
Plugin,
|
|
41
|
+
TurnContext,
|
|
42
|
+
} from "../plugins/types.js";
|
|
43
|
+
import type { Message, ToolDefinition } from "../providers/types.js";
|
|
44
|
+
|
|
45
|
+
// ── Fixtures ─────────────────────────────────────────────────────────────
|
|
46
|
+
|
|
47
|
+
const trust: TrustContext = {
|
|
48
|
+
sourceChannel: "vellum",
|
|
49
|
+
trustClass: "guardian",
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
function makeCtx(overrides: Partial<TurnContext> = {}): TurnContext {
|
|
53
|
+
return {
|
|
54
|
+
requestId: "req-token-estimate-test",
|
|
55
|
+
conversationId: "conv-token-estimate-test",
|
|
56
|
+
turnIndex: 0,
|
|
57
|
+
trust,
|
|
58
|
+
...overrides,
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
const EMPTY_HISTORY: Message[] = [];
|
|
63
|
+
|
|
64
|
+
const TEXT_HISTORY: Message[] = [
|
|
65
|
+
{ role: "user", content: [{ type: "text", text: "hello there" }] },
|
|
66
|
+
{
|
|
67
|
+
role: "assistant",
|
|
68
|
+
content: [
|
|
69
|
+
{ type: "text", text: "hi! how can I help you today?" },
|
|
70
|
+
{ type: "text", text: "a second text block for good measure" },
|
|
71
|
+
],
|
|
72
|
+
},
|
|
73
|
+
];
|
|
74
|
+
|
|
75
|
+
const TOOL_USE_HISTORY: Message[] = [
|
|
76
|
+
{ role: "user", content: [{ type: "text", text: "what's in the log?" }] },
|
|
77
|
+
{
|
|
78
|
+
role: "assistant",
|
|
79
|
+
content: [
|
|
80
|
+
{
|
|
81
|
+
type: "tool_use",
|
|
82
|
+
id: "tu-1",
|
|
83
|
+
name: "bash",
|
|
84
|
+
input: { command: "tail -n 5 server.log" },
|
|
85
|
+
},
|
|
86
|
+
],
|
|
87
|
+
},
|
|
88
|
+
{
|
|
89
|
+
role: "user",
|
|
90
|
+
content: [
|
|
91
|
+
{
|
|
92
|
+
type: "tool_result",
|
|
93
|
+
tool_use_id: "tu-1",
|
|
94
|
+
content: "line1\nline2\nline3",
|
|
95
|
+
},
|
|
96
|
+
],
|
|
97
|
+
},
|
|
98
|
+
];
|
|
99
|
+
|
|
100
|
+
const SYSTEM_PROMPT = "You are a helpful assistant with a long preamble.";
|
|
101
|
+
|
|
102
|
+
const SAMPLE_TOOLS: ToolDefinition[] = [
|
|
103
|
+
{
|
|
104
|
+
name: "bash",
|
|
105
|
+
description: "Execute a shell command and return its output.",
|
|
106
|
+
input_schema: {
|
|
107
|
+
type: "object",
|
|
108
|
+
properties: { command: { type: "string" } },
|
|
109
|
+
required: ["command"],
|
|
110
|
+
},
|
|
111
|
+
},
|
|
112
|
+
{
|
|
113
|
+
name: "file_read",
|
|
114
|
+
description: "Read a file from the workspace.",
|
|
115
|
+
input_schema: {
|
|
116
|
+
type: "object",
|
|
117
|
+
properties: { path: { type: "string" } },
|
|
118
|
+
required: ["path"],
|
|
119
|
+
},
|
|
120
|
+
},
|
|
121
|
+
];
|
|
122
|
+
|
|
123
|
+
// ── Helpers ──────────────────────────────────────────────────────────────
|
|
124
|
+
|
|
125
|
+
function registerDefault(): void {
|
|
126
|
+
registerPlugin(defaultTokenEstimatePlugin);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
function rawEstimate(
|
|
130
|
+
args: Pick<EstimateArgs, "history" | "systemPrompt" | "providerName"> & {
|
|
131
|
+
tools: ToolDefinition[];
|
|
132
|
+
},
|
|
133
|
+
): number {
|
|
134
|
+
const toolTokenBudget =
|
|
135
|
+
args.tools.length > 0 ? estimateToolsTokens(args.tools) : 0;
|
|
136
|
+
return estimatePromptTokensRaw(args.history, args.systemPrompt, {
|
|
137
|
+
providerName: args.providerName,
|
|
138
|
+
toolTokenBudget,
|
|
139
|
+
});
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
async function runViaPipeline(args: EstimateArgs): Promise<EstimateResult> {
|
|
143
|
+
return runPipeline<EstimateArgs, EstimateResult>(
|
|
144
|
+
"tokenEstimate",
|
|
145
|
+
getMiddlewaresFor("tokenEstimate"),
|
|
146
|
+
// Mirror the production wiring in `daemon/conversation-agent-loop.ts`:
|
|
147
|
+
// the default plugin's middleware is a passthrough, so the terminal is
|
|
148
|
+
// wired in by the call site. Using the same terminal here means the
|
|
149
|
+
// tests exercise the exact composition shape that ships.
|
|
150
|
+
defaultTokenEstimateTerminal,
|
|
151
|
+
args,
|
|
152
|
+
makeCtx(),
|
|
153
|
+
DEFAULT_TIMEOUTS.tokenEstimate,
|
|
154
|
+
);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// ── Tests ────────────────────────────────────────────────────────────────
|
|
158
|
+
|
|
159
|
+
beforeEach(() => {
|
|
160
|
+
resetPluginRegistryForTests();
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
afterEach(() => {
|
|
164
|
+
resetPluginRegistryForTests();
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
describe("tokenEstimate pipeline — default plugin parity", () => {
|
|
168
|
+
test("default matches estimatePromptTokensRaw on empty history", async () => {
|
|
169
|
+
registerDefault();
|
|
170
|
+
const args: EstimateArgs = {
|
|
171
|
+
history: EMPTY_HISTORY,
|
|
172
|
+
systemPrompt: undefined,
|
|
173
|
+
tools: [],
|
|
174
|
+
providerName: undefined,
|
|
175
|
+
};
|
|
176
|
+
const pipelineResult = await runViaPipeline(args);
|
|
177
|
+
expect(pipelineResult).toBe(rawEstimate(args));
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
test("default matches estimatePromptTokensRaw on text-only history", async () => {
|
|
181
|
+
registerDefault();
|
|
182
|
+
const args: EstimateArgs = {
|
|
183
|
+
history: TEXT_HISTORY,
|
|
184
|
+
systemPrompt: SYSTEM_PROMPT,
|
|
185
|
+
tools: [],
|
|
186
|
+
providerName: "anthropic",
|
|
187
|
+
};
|
|
188
|
+
const pipelineResult = await runViaPipeline(args);
|
|
189
|
+
expect(pipelineResult).toBe(rawEstimate(args));
|
|
190
|
+
// Sanity: the system prompt adds real token cost, so the number is
|
|
191
|
+
// strictly larger than the bare-history estimate.
|
|
192
|
+
expect(pipelineResult).toBeGreaterThan(
|
|
193
|
+
rawEstimate({
|
|
194
|
+
history: TEXT_HISTORY,
|
|
195
|
+
systemPrompt: undefined,
|
|
196
|
+
tools: [],
|
|
197
|
+
providerName: "anthropic",
|
|
198
|
+
}),
|
|
199
|
+
);
|
|
200
|
+
});
|
|
201
|
+
|
|
202
|
+
test("default matches estimatePromptTokensRaw with tool_use/tool_result blocks", async () => {
|
|
203
|
+
registerDefault();
|
|
204
|
+
const args: EstimateArgs = {
|
|
205
|
+
history: TOOL_USE_HISTORY,
|
|
206
|
+
systemPrompt: SYSTEM_PROMPT,
|
|
207
|
+
tools: SAMPLE_TOOLS,
|
|
208
|
+
providerName: "anthropic",
|
|
209
|
+
};
|
|
210
|
+
const pipelineResult = await runViaPipeline(args);
|
|
211
|
+
expect(pipelineResult).toBe(rawEstimate(args));
|
|
212
|
+
});
|
|
213
|
+
|
|
214
|
+
test("default folds tool definition tokens into the result", async () => {
|
|
215
|
+
registerDefault();
|
|
216
|
+
const baseArgs: EstimateArgs = {
|
|
217
|
+
history: TEXT_HISTORY,
|
|
218
|
+
systemPrompt: SYSTEM_PROMPT,
|
|
219
|
+
tools: [],
|
|
220
|
+
providerName: "anthropic",
|
|
221
|
+
};
|
|
222
|
+
const withoutTools = await runViaPipeline(baseArgs);
|
|
223
|
+
const withTools = await runViaPipeline({
|
|
224
|
+
...baseArgs,
|
|
225
|
+
tools: SAMPLE_TOOLS,
|
|
226
|
+
});
|
|
227
|
+
// Tools contribute non-zero overhead; the pipeline result must grow.
|
|
228
|
+
const toolBudget = estimateToolsTokens(SAMPLE_TOOLS);
|
|
229
|
+
expect(toolBudget).toBeGreaterThan(0);
|
|
230
|
+
expect(withTools - withoutTools).toBe(toolBudget);
|
|
231
|
+
});
|
|
232
|
+
|
|
233
|
+
test("provider-specific image sizing flows through the default", async () => {
|
|
234
|
+
registerDefault();
|
|
235
|
+
// Two providers see different image token costs for the same content —
|
|
236
|
+
// the raw estimator is the source of truth, so the pipeline must agree
|
|
237
|
+
// under both provider names.
|
|
238
|
+
const imageHistory: Message[] = [
|
|
239
|
+
{
|
|
240
|
+
role: "user",
|
|
241
|
+
content: [
|
|
242
|
+
{
|
|
243
|
+
type: "image",
|
|
244
|
+
source: {
|
|
245
|
+
type: "base64",
|
|
246
|
+
media_type: "image/png",
|
|
247
|
+
// Small fake PNG-ish payload; the estimator's fallback path
|
|
248
|
+
// kicks in when parseImageDimensions fails, which is fine —
|
|
249
|
+
// the two providers still diverge on overhead.
|
|
250
|
+
data: "a".repeat(128),
|
|
251
|
+
},
|
|
252
|
+
},
|
|
253
|
+
],
|
|
254
|
+
},
|
|
255
|
+
];
|
|
256
|
+
const anthropicArgs: EstimateArgs = {
|
|
257
|
+
history: imageHistory,
|
|
258
|
+
systemPrompt: undefined,
|
|
259
|
+
tools: [],
|
|
260
|
+
providerName: "anthropic",
|
|
261
|
+
};
|
|
262
|
+
const openaiArgs: EstimateArgs = {
|
|
263
|
+
...anthropicArgs,
|
|
264
|
+
providerName: "openai",
|
|
265
|
+
};
|
|
266
|
+
const anthropicResult = await runViaPipeline(anthropicArgs);
|
|
267
|
+
const openaiResult = await runViaPipeline(openaiArgs);
|
|
268
|
+
expect(anthropicResult).toBe(rawEstimate(anthropicArgs));
|
|
269
|
+
expect(openaiResult).toBe(rawEstimate(openaiArgs));
|
|
270
|
+
});
|
|
271
|
+
});
|
|
272
|
+
|
|
273
|
+
describe("tokenEstimate pipeline — custom override", () => {
|
|
274
|
+
test("custom plugin short-circuit returns a different value than the default", async () => {
|
|
275
|
+
// A plugin that completely replaces the default with a fixed value,
|
|
276
|
+
// proving plugins can substitute provider-native tokenizers (e.g.
|
|
277
|
+
// `countTokens`) without touching orchestrator code.
|
|
278
|
+
const FIXED = 424242;
|
|
279
|
+
const override: Middleware<EstimateArgs, EstimateResult> = async (
|
|
280
|
+
_args,
|
|
281
|
+
_next,
|
|
282
|
+
_ctx,
|
|
283
|
+
) => FIXED;
|
|
284
|
+
const customPlugin: Plugin = {
|
|
285
|
+
manifest: {
|
|
286
|
+
name: "custom-token-estimate",
|
|
287
|
+
version: "1.0.0",
|
|
288
|
+
requires: { pluginRuntime: "v1", tokenEstimateApi: "v1" },
|
|
289
|
+
},
|
|
290
|
+
middleware: { tokenEstimate: override },
|
|
291
|
+
};
|
|
292
|
+
|
|
293
|
+
// Register the custom plugin FIRST so it sits outermost and short-
|
|
294
|
+
// circuits before the default's terminal runs.
|
|
295
|
+
registerPlugin(customPlugin);
|
|
296
|
+
registerDefault();
|
|
297
|
+
|
|
298
|
+
const args: EstimateArgs = {
|
|
299
|
+
history: TEXT_HISTORY,
|
|
300
|
+
systemPrompt: SYSTEM_PROMPT,
|
|
301
|
+
tools: SAMPLE_TOOLS,
|
|
302
|
+
providerName: "anthropic",
|
|
303
|
+
};
|
|
304
|
+
const pipelineResult = await runViaPipeline(args);
|
|
305
|
+
expect(pipelineResult).toBe(FIXED);
|
|
306
|
+
// And for contrast: the default alone would have given the raw value.
|
|
307
|
+
expect(pipelineResult).not.toBe(rawEstimate(args));
|
|
308
|
+
});
|
|
309
|
+
|
|
310
|
+
test("wrapper middleware that scales the downstream result composes with the default", async () => {
|
|
311
|
+
// A plugin that wraps the downstream estimate, doubling it. This
|
|
312
|
+
// exercises the onion composition: outer middleware sees the raw
|
|
313
|
+
// default result and returns its own modification.
|
|
314
|
+
const doubler: Middleware<EstimateArgs, EstimateResult> = async (
|
|
315
|
+
args,
|
|
316
|
+
next,
|
|
317
|
+
_ctx,
|
|
318
|
+
) => {
|
|
319
|
+
const inner = await next(args);
|
|
320
|
+
return inner * 2;
|
|
321
|
+
};
|
|
322
|
+
const wrapperPlugin: Plugin = {
|
|
323
|
+
manifest: {
|
|
324
|
+
name: "doubling-token-estimate",
|
|
325
|
+
version: "1.0.0",
|
|
326
|
+
requires: { pluginRuntime: "v1", tokenEstimateApi: "v1" },
|
|
327
|
+
},
|
|
328
|
+
middleware: { tokenEstimate: doubler },
|
|
329
|
+
};
|
|
330
|
+
|
|
331
|
+
registerPlugin(wrapperPlugin);
|
|
332
|
+
registerDefault();
|
|
333
|
+
|
|
334
|
+
const args: EstimateArgs = {
|
|
335
|
+
history: TEXT_HISTORY,
|
|
336
|
+
systemPrompt: SYSTEM_PROMPT,
|
|
337
|
+
tools: SAMPLE_TOOLS,
|
|
338
|
+
providerName: "anthropic",
|
|
339
|
+
};
|
|
340
|
+
const pipelineResult = await runViaPipeline(args);
|
|
341
|
+
expect(pipelineResult).toBe(rawEstimate(args) * 2);
|
|
342
|
+
});
|
|
343
|
+
});
|
|
344
|
+
|
|
345
|
+
describe("tokenEstimate pipeline — default does not shadow late plugins", () => {
|
|
346
|
+
test("user middleware registered AFTER the default still runs", async () => {
|
|
347
|
+
// Regression test for the default-first shadowing hazard: defaults are
|
|
348
|
+
// registered before user plugins in `bootstrapPlugins()`, putting the
|
|
349
|
+
// default at the OUTERMOST onion position. If the default middleware
|
|
350
|
+
// runs the estimate directly instead of calling `next(args)`, any user
|
|
351
|
+
// plugin loaded afterward is invisible. The default is a passthrough —
|
|
352
|
+
// this test fails loudly if that invariant ever regresses.
|
|
353
|
+
registerDefault();
|
|
354
|
+
const observed: EstimateArgs[] = [];
|
|
355
|
+
const observer: Middleware<EstimateArgs, EstimateResult> = async (
|
|
356
|
+
args,
|
|
357
|
+
next,
|
|
358
|
+
_ctx,
|
|
359
|
+
) => {
|
|
360
|
+
observed.push(args);
|
|
361
|
+
// Return a sentinel so we can distinguish the observer's result from
|
|
362
|
+
// the default's output.
|
|
363
|
+
await next(args);
|
|
364
|
+
return 999_999;
|
|
365
|
+
};
|
|
366
|
+
const userPlugin: Plugin = {
|
|
367
|
+
manifest: {
|
|
368
|
+
name: "late-registered-observer",
|
|
369
|
+
version: "1.0.0",
|
|
370
|
+
requires: { pluginRuntime: "v1", tokenEstimateApi: "v1" },
|
|
371
|
+
},
|
|
372
|
+
middleware: { tokenEstimate: observer },
|
|
373
|
+
};
|
|
374
|
+
registerPlugin(userPlugin);
|
|
375
|
+
|
|
376
|
+
const args: EstimateArgs = {
|
|
377
|
+
history: TEXT_HISTORY,
|
|
378
|
+
systemPrompt: SYSTEM_PROMPT,
|
|
379
|
+
tools: [],
|
|
380
|
+
providerName: "anthropic",
|
|
381
|
+
};
|
|
382
|
+
const result = await runViaPipeline(args);
|
|
383
|
+
expect(observed.length).toBe(1);
|
|
384
|
+
expect(result).toBe(999_999);
|
|
385
|
+
});
|
|
386
|
+
});
|
|
387
|
+
|
|
388
|
+
describe("tokenEstimate pipeline — args are immutable to middleware", () => {
|
|
389
|
+
test("frozen history/tools reject in-place mutation attempts", () => {
|
|
390
|
+
// The call site freezes shallow clones of `history` and `tools` before
|
|
391
|
+
// handing them to the pipeline. This mirrors the runtime protection
|
|
392
|
+
// that stops a misbehaving middleware from trimming `args.history` in
|
|
393
|
+
// place — which would silently drop prompt context from the
|
|
394
|
+
// orchestrator's live `runMessages` array before the provider call.
|
|
395
|
+
const frozenHistory = Object.freeze([...TEXT_HISTORY]);
|
|
396
|
+
const frozenTools = Object.freeze([...SAMPLE_TOOLS]);
|
|
397
|
+
expect(() => {
|
|
398
|
+
(frozenHistory as Message[]).pop();
|
|
399
|
+
}).toThrow(TypeError);
|
|
400
|
+
expect(() => {
|
|
401
|
+
(frozenTools as ToolDefinition[]).push({
|
|
402
|
+
name: "extra",
|
|
403
|
+
description: "",
|
|
404
|
+
input_schema: { type: "object", properties: {} },
|
|
405
|
+
});
|
|
406
|
+
}).toThrow(TypeError);
|
|
407
|
+
});
|
|
408
|
+
});
|
|
409
|
+
|
|
410
|
+
describe("tokenEstimate pipeline — empty registry fallback", () => {
|
|
411
|
+
test("without any plugin registered, the terminal receives the call", async () => {
|
|
412
|
+
// `runViaPipeline` uses a throwing terminal, so here we run the
|
|
413
|
+
// pipeline with an explicit terminal that returns a sentinel to prove
|
|
414
|
+
// that an empty middleware list falls through.
|
|
415
|
+
const SENTINEL = 12345;
|
|
416
|
+
const result = await runPipeline<EstimateArgs, EstimateResult>(
|
|
417
|
+
"tokenEstimate",
|
|
418
|
+
getMiddlewaresFor("tokenEstimate"),
|
|
419
|
+
async () => SENTINEL,
|
|
420
|
+
{
|
|
421
|
+
history: TEXT_HISTORY,
|
|
422
|
+
systemPrompt: SYSTEM_PROMPT,
|
|
423
|
+
tools: [],
|
|
424
|
+
providerName: "anthropic",
|
|
425
|
+
},
|
|
426
|
+
makeCtx(),
|
|
427
|
+
DEFAULT_TIMEOUTS.tokenEstimate,
|
|
428
|
+
);
|
|
429
|
+
expect(result).toBe(SENTINEL);
|
|
430
|
+
});
|
|
431
|
+
});
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for the `toolError` pipeline (PR 19).
|
|
3
|
+
*
|
|
4
|
+
* Covers:
|
|
5
|
+
* - Default plugin nudges on the first error turn and keeps nudging up to the
|
|
6
|
+
* `maxConsecutiveErrorNudges` cap.
|
|
7
|
+
* - Default plugin suppresses the nudge once the cap is exceeded (the error is
|
|
8
|
+
* likely unrecoverable — burning tokens on more nudges is wasteful).
|
|
9
|
+
* - Default plugin uses the canonical {@link DEFAULT_TOOL_ERROR_NUDGE_TEXT}.
|
|
10
|
+
* - Default plugin skips when `hasToolError` is false, regardless of the
|
|
11
|
+
* consecutive counter (no error this turn → nothing to nudge).
|
|
12
|
+
* - Swapping in a user plugin that provides its own `toolError` middleware
|
|
13
|
+
* changes the nudge text end-to-end through `runPipeline`.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { beforeEach, describe, expect, test } from "bun:test";
|
|
17
|
+
|
|
18
|
+
import type { TrustContext } from "../daemon/conversation-runtime-assembly.js";
|
|
19
|
+
import {
|
|
20
|
+
DEFAULT_TOOL_ERROR_NUDGE_TEXT,
|
|
21
|
+
defaultToolErrorPlugin,
|
|
22
|
+
defaultToolErrorTerminal,
|
|
23
|
+
} from "../plugins/defaults/tool-error.js";
|
|
24
|
+
import { runPipeline } from "../plugins/pipeline.js";
|
|
25
|
+
import {
|
|
26
|
+
getMiddlewaresFor,
|
|
27
|
+
registerPlugin,
|
|
28
|
+
resetPluginRegistryForTests,
|
|
29
|
+
} from "../plugins/registry.js";
|
|
30
|
+
import {
|
|
31
|
+
type Middleware,
|
|
32
|
+
type Plugin,
|
|
33
|
+
type ToolErrorArgs,
|
|
34
|
+
type ToolErrorDecision,
|
|
35
|
+
type TurnContext,
|
|
36
|
+
} from "../plugins/types.js";
|
|
37
|
+
|
|
38
|
+
const trust: TrustContext = {
|
|
39
|
+
sourceChannel: "vellum",
|
|
40
|
+
trustClass: "guardian",
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
function makeCtx(): TurnContext {
|
|
44
|
+
return {
|
|
45
|
+
requestId: "req-tool-error-test",
|
|
46
|
+
conversationId: "conv-tool-error-test",
|
|
47
|
+
turnIndex: 0,
|
|
48
|
+
trust,
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
async function runToolErrorPipeline(
|
|
53
|
+
args: ToolErrorArgs,
|
|
54
|
+
): Promise<ToolErrorDecision> {
|
|
55
|
+
// Mirror the production call site in `agent/loop.ts`: the pipeline terminal
|
|
56
|
+
// is `defaultToolErrorTerminal`, not a no-op. The default plugin's
|
|
57
|
+
// middleware is a passthrough that calls `next(args)`, so the decision
|
|
58
|
+
// logic lives in the terminal.
|
|
59
|
+
return runPipeline<ToolErrorArgs, ToolErrorDecision>(
|
|
60
|
+
"toolError",
|
|
61
|
+
getMiddlewaresFor("toolError"),
|
|
62
|
+
async (pipelineArgs) => defaultToolErrorTerminal(pipelineArgs),
|
|
63
|
+
args,
|
|
64
|
+
makeCtx(),
|
|
65
|
+
500,
|
|
66
|
+
);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
describe("toolError pipeline", () => {
|
|
70
|
+
describe("default plugin", () => {
|
|
71
|
+
beforeEach(() => {
|
|
72
|
+
resetPluginRegistryForTests();
|
|
73
|
+
registerPlugin(defaultToolErrorPlugin);
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
test("nudges on first error turn with canonical text", async () => {
|
|
77
|
+
const decision = await runToolErrorPipeline({
|
|
78
|
+
hasToolError: true,
|
|
79
|
+
consecutiveErrorTurns: 1,
|
|
80
|
+
maxConsecutiveErrorNudges: 3,
|
|
81
|
+
});
|
|
82
|
+
expect(decision.action).toBe("nudge");
|
|
83
|
+
if (decision.action === "nudge") {
|
|
84
|
+
expect(decision.nudgeText).toBe(DEFAULT_TOOL_ERROR_NUDGE_TEXT);
|
|
85
|
+
}
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
test("keeps nudging up to and including the cap", async () => {
|
|
89
|
+
// Cap of 3: turns 1, 2, and 3 all nudge. Turn 4 is past the cap.
|
|
90
|
+
for (let turn = 1; turn <= 3; turn++) {
|
|
91
|
+
const decision = await runToolErrorPipeline({
|
|
92
|
+
hasToolError: true,
|
|
93
|
+
consecutiveErrorTurns: turn,
|
|
94
|
+
maxConsecutiveErrorNudges: 3,
|
|
95
|
+
});
|
|
96
|
+
expect(decision.action).toBe("nudge");
|
|
97
|
+
}
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
test("suppresses the nudge once the consecutive counter exceeds the cap", async () => {
|
|
101
|
+
const decision = await runToolErrorPipeline({
|
|
102
|
+
hasToolError: true,
|
|
103
|
+
consecutiveErrorTurns: 4,
|
|
104
|
+
maxConsecutiveErrorNudges: 3,
|
|
105
|
+
});
|
|
106
|
+
expect(decision.action).toBe("skip");
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
test("skips when there is no tool error this turn, regardless of counter", async () => {
|
|
110
|
+
// Counter is non-zero (the previous turn errored) but this turn succeeded,
|
|
111
|
+
// so nothing to nudge about.
|
|
112
|
+
const decision = await runToolErrorPipeline({
|
|
113
|
+
hasToolError: false,
|
|
114
|
+
consecutiveErrorTurns: 2,
|
|
115
|
+
maxConsecutiveErrorNudges: 3,
|
|
116
|
+
});
|
|
117
|
+
expect(decision.action).toBe("skip");
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
test("honors a caller-supplied cap of zero (never nudges)", async () => {
|
|
121
|
+
// Some call-sites may want to disable nudging entirely by passing cap = 0.
|
|
122
|
+
// The decision logic uses `<=`, so counter 0 with cap 0 does nudge; counter
|
|
123
|
+
// 1 with cap 0 suppresses. The cap is inclusive.
|
|
124
|
+
const turn1 = await runToolErrorPipeline({
|
|
125
|
+
hasToolError: true,
|
|
126
|
+
consecutiveErrorTurns: 1,
|
|
127
|
+
maxConsecutiveErrorNudges: 0,
|
|
128
|
+
});
|
|
129
|
+
expect(turn1.action).toBe("skip");
|
|
130
|
+
});
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
describe("user-supplied plugin", () => {
|
|
134
|
+
beforeEach(() => {
|
|
135
|
+
resetPluginRegistryForTests();
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
test("swapping in a plugin changes the nudge text", async () => {
|
|
139
|
+
const customText = "<system_notice>Custom error hint.</system_notice>";
|
|
140
|
+
const customMiddleware: Middleware<
|
|
141
|
+
ToolErrorArgs,
|
|
142
|
+
ToolErrorDecision
|
|
143
|
+
> = async (args) => {
|
|
144
|
+
if (args.hasToolError) {
|
|
145
|
+
return { action: "nudge", nudgeText: customText };
|
|
146
|
+
}
|
|
147
|
+
return { action: "skip" };
|
|
148
|
+
};
|
|
149
|
+
const customPlugin: Plugin = {
|
|
150
|
+
manifest: {
|
|
151
|
+
name: "custom-tool-error",
|
|
152
|
+
version: "0.0.1",
|
|
153
|
+
requires: { pluginRuntime: "v1", toolErrorApi: "v1" },
|
|
154
|
+
},
|
|
155
|
+
middleware: { toolError: customMiddleware },
|
|
156
|
+
};
|
|
157
|
+
registerPlugin(customPlugin);
|
|
158
|
+
|
|
159
|
+
const decision = await runToolErrorPipeline({
|
|
160
|
+
hasToolError: true,
|
|
161
|
+
consecutiveErrorTurns: 1,
|
|
162
|
+
maxConsecutiveErrorNudges: 3,
|
|
163
|
+
});
|
|
164
|
+
expect(decision.action).toBe("nudge");
|
|
165
|
+
if (decision.action === "nudge") {
|
|
166
|
+
expect(decision.nudgeText).toBe(customText);
|
|
167
|
+
}
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
test("swapping in a plugin can suppress nudges even when the default would nudge", async () => {
|
|
171
|
+
const suppressingMiddleware: Middleware<
|
|
172
|
+
ToolErrorArgs,
|
|
173
|
+
ToolErrorDecision
|
|
174
|
+
> = async () => ({ action: "skip" });
|
|
175
|
+
const plugin: Plugin = {
|
|
176
|
+
manifest: {
|
|
177
|
+
name: "no-nudge",
|
|
178
|
+
version: "0.0.1",
|
|
179
|
+
requires: { pluginRuntime: "v1", toolErrorApi: "v1" },
|
|
180
|
+
},
|
|
181
|
+
middleware: { toolError: suppressingMiddleware },
|
|
182
|
+
};
|
|
183
|
+
registerPlugin(plugin);
|
|
184
|
+
|
|
185
|
+
const decision = await runToolErrorPipeline({
|
|
186
|
+
hasToolError: true,
|
|
187
|
+
consecutiveErrorTurns: 1,
|
|
188
|
+
maxConsecutiveErrorNudges: 3,
|
|
189
|
+
});
|
|
190
|
+
expect(decision.action).toBe("skip");
|
|
191
|
+
});
|
|
192
|
+
|
|
193
|
+
test("terminal still produces the legacy nudge when no plugin is registered", async () => {
|
|
194
|
+
// No registerPlugin call — the registry is empty for this slot. Since
|
|
195
|
+
// `agent/loop.ts` now passes `defaultToolErrorTerminal` as the pipeline
|
|
196
|
+
// terminal (rather than an inline `() => skip`), direct AgentLoop
|
|
197
|
+
// callers that skip `bootstrapPlugins()` still get the legacy nudge.
|
|
198
|
+
const decision = await runToolErrorPipeline({
|
|
199
|
+
hasToolError: true,
|
|
200
|
+
consecutiveErrorTurns: 1,
|
|
201
|
+
maxConsecutiveErrorNudges: 3,
|
|
202
|
+
});
|
|
203
|
+
expect(decision.action).toBe("nudge");
|
|
204
|
+
if (decision.action === "nudge") {
|
|
205
|
+
expect(decision.nudgeText).toBe(DEFAULT_TOOL_ERROR_NUDGE_TEXT);
|
|
206
|
+
}
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
test("user plugin registered AFTER the default still runs (no shadowing)", async () => {
|
|
210
|
+
// Production registration order: defaults load first via the side-effect
|
|
211
|
+
// imports in `defaults/index.ts`, then user plugins register on top via
|
|
212
|
+
// `bootstrapPlugins()`. The user's middleware ends up at a deeper onion
|
|
213
|
+
// layer than the default. If the default's middleware were to bypass
|
|
214
|
+
// `next` and call the decision logic directly, the user middleware
|
|
215
|
+
// would never run — this test guards against that regression.
|
|
216
|
+
registerPlugin(defaultToolErrorPlugin);
|
|
217
|
+
|
|
218
|
+
let userMiddlewareRan = false;
|
|
219
|
+
const userMiddleware: Middleware<
|
|
220
|
+
ToolErrorArgs,
|
|
221
|
+
ToolErrorDecision
|
|
222
|
+
> = async (args, next) => {
|
|
223
|
+
userMiddlewareRan = true;
|
|
224
|
+
return next(args);
|
|
225
|
+
};
|
|
226
|
+
registerPlugin({
|
|
227
|
+
manifest: {
|
|
228
|
+
name: "late-user-plugin",
|
|
229
|
+
version: "0.0.1",
|
|
230
|
+
requires: { pluginRuntime: "v1", toolErrorApi: "v1" },
|
|
231
|
+
},
|
|
232
|
+
middleware: { toolError: userMiddleware },
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
await runToolErrorPipeline({
|
|
236
|
+
hasToolError: true,
|
|
237
|
+
consecutiveErrorTurns: 1,
|
|
238
|
+
maxConsecutiveErrorNudges: 3,
|
|
239
|
+
});
|
|
240
|
+
|
|
241
|
+
expect(userMiddlewareRan).toBe(true);
|
|
242
|
+
});
|
|
243
|
+
});
|
|
244
|
+
});
|