@vellumai/assistant 0.8.7 → 0.8.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile +20 -4
- package/docker-entrypoint.sh +4 -2
- package/docker-init-apt-root.sh +3 -1
- package/docker-kata-apt-env.sh +3 -1
- package/docker-kata-runtime-family.sh +12 -0
- package/docs/architecture/memory.md +1 -1
- package/docs/plugins.md +75 -79
- package/examples/plugins/echo/README.md +6 -12
- package/examples/plugins/echo/register.ts +0 -41
- package/node_modules/@vellumai/skill-host-contracts/src/server-message.ts +3 -3
- package/openapi.yaml +3381 -348
- package/package.json +1 -1
- package/scripts/generate-openapi.ts +68 -41
- package/src/__tests__/agent-loop-exit-reason.test.ts +34 -39
- package/src/__tests__/agent-loop-provider-error-recording.test.ts +1 -1
- package/src/__tests__/agent-loop.test.ts +37 -87
- package/src/__tests__/agent-wake-disk-pressure-callsite.test.ts +2 -0
- package/src/__tests__/annotate-activity-metadata.test.ts +262 -0
- package/src/__tests__/annotate-risk-options.test.ts +2 -3
- package/src/__tests__/anthropic-provider.test.ts +95 -2
- package/src/__tests__/assistant-event-hub.test.ts +25 -0
- package/src/__tests__/assistant-events-sse-shed.test.ts +8 -0
- package/src/__tests__/{conversation-stream-state.test.ts → assistant-stream-state.test.ts} +252 -91
- package/src/__tests__/auth-fallback-events-store.test.ts +116 -0
- package/src/__tests__/background-workers-disk-pressure.test.ts +6 -0
- package/src/__tests__/btw-routes.test.ts +62 -3
- package/src/__tests__/build-persisted-content.test.ts +184 -0
- package/src/__tests__/catalog-files.test.ts +1 -1
- package/src/__tests__/clawhub-files.test.ts +1 -1
- package/src/__tests__/compaction-pipeline.test.ts +1 -1
- package/src/__tests__/compaction.benchmark.test.ts +0 -30
- package/src/__tests__/config-watcher.test.ts +1 -1
- package/src/__tests__/conversation-abort-tool-results.test.ts +57 -19
- package/src/__tests__/conversation-agent-loop-disk-pressure.test.ts +6 -2
- package/src/__tests__/conversation-agent-loop-inference-profile.test.ts +10 -4
- package/src/__tests__/conversation-agent-loop-overflow.test.ts +313 -1136
- package/src/__tests__/conversation-agent-loop.test.ts +596 -1616
- package/src/__tests__/conversation-analysis-routes.test.ts +6 -0
- package/src/__tests__/conversation-history-web-search.test.ts +11 -1
- package/src/__tests__/conversation-pairing.test.ts +4 -31
- package/src/__tests__/conversation-process-app-control-preactivation.test.ts +6 -0
- package/src/__tests__/conversation-provider-retry-repair.test.ts +26 -5
- package/src/__tests__/conversation-queue.test.ts +2 -0
- package/src/__tests__/conversation-routes-disk-view.test.ts +3 -0
- package/src/__tests__/conversation-routes-slash-commands.test.ts +6 -5
- package/src/__tests__/conversation-runtime-assembly.test.ts +170 -229
- package/src/__tests__/conversation-runtime-workspace.test.ts +3 -24
- package/src/__tests__/conversation-slash-commands.test.ts +8 -42
- package/src/__tests__/conversation-slash-queue.test.ts +6 -1
- package/src/__tests__/conversation-surfaces-action-delivery.test.ts +84 -0
- package/src/__tests__/conversation-sync-tags.test.ts +27 -15
- package/src/__tests__/conversation-title-service.test.ts +135 -2
- package/src/__tests__/conversation-workspace-injection.test.ts +6 -1
- package/src/__tests__/cross-provider-web-search.test.ts +214 -1
- package/src/__tests__/db-schedule-syntax-migration.test.ts +5 -0
- package/src/__tests__/dm-persistence.test.ts +5 -1
- package/src/__tests__/empty-response-hook.test.ts +304 -0
- package/src/__tests__/feature-flag-test-helpers.ts +2 -2
- package/src/__tests__/gemini-image-service.test.ts +13 -0
- package/src/__tests__/helpers/mock-provider.ts +110 -0
- package/src/__tests__/helpers/native-web-search-harness.ts +129 -0
- package/src/__tests__/history-repair-hook.test.ts +1 -0
- package/src/__tests__/identity-intro-cache.test.ts +12 -100
- package/src/__tests__/identity-routes.test.ts +248 -7
- package/src/__tests__/inbound-slack-persistence.test.ts +5 -1
- package/src/__tests__/injector-background-turn.test.ts +2 -8
- package/src/__tests__/injector-chain.test.ts +106 -270
- package/src/__tests__/injector-disk-pressure.test.ts +3 -12
- package/src/__tests__/injector-document-comments.test.ts +2 -2
- package/src/__tests__/injector-pkb-v2-silenced.test.ts +30 -22
- package/src/__tests__/injector-v3-suppression.test.ts +31 -37
- package/src/__tests__/internal-telemetry-routes.test.ts +109 -0
- package/src/__tests__/list-messages-page-latest.test.ts +60 -0
- package/src/__tests__/list-messages-tool-merge.test.ts +20 -0
- package/src/__tests__/llm-usage-store.test.ts +223 -1
- package/src/__tests__/memory-retrieval-hook.test.ts +297 -0
- package/src/__tests__/memory-v2-static-injector.test.ts +103 -35
- package/src/__tests__/native-web-search.test.ts +191 -0
- package/src/__tests__/onboarding-template-contract.test.ts +2 -0
- package/src/__tests__/openai-image-service.test.ts +17 -0
- package/src/__tests__/openai-provider.test.ts +31 -1
- package/src/__tests__/persist-unsendable-image.test.ts +215 -0
- package/src/__tests__/persistence-secret-redaction.test.ts +1 -0
- package/src/__tests__/pipeline-runner.test.ts +29 -39
- package/src/__tests__/pkb-autoinject.test.ts +2 -5
- package/src/__tests__/plugin-bootstrap.test.ts +13 -28
- package/src/__tests__/plugin-registry.test.ts +0 -27
- package/src/__tests__/plugin-types.test.ts +2 -125
- package/src/__tests__/process-message-display-content.test.ts +6 -2
- package/src/__tests__/regenerate-fire-and-forget-trace.test.ts +5 -1
- package/src/__tests__/resolve-trust-class.test.ts +4 -4
- package/src/__tests__/runtime-events-sse-reconnect.test.ts +60 -23
- package/src/__tests__/schedule-routes.test.ts +603 -2
- package/src/__tests__/schedule-store.test.ts +41 -0
- package/src/__tests__/schedule-tools.test.ts +35 -0
- package/src/__tests__/server-history-render.test.ts +314 -1
- package/src/__tests__/skillssh-files.test.ts +1 -1
- package/src/__tests__/system-prompt.test.ts +20 -0
- package/src/__tests__/task-scheduler.test.ts +162 -1
- package/src/__tests__/terminal-tools.test.ts +6 -1
- package/src/__tests__/title-generate-hook.test.ts +319 -0
- package/src/__tests__/tool-error-hook.test.ts +278 -0
- package/src/__tests__/tool-preview-lifecycle.test.ts +468 -5
- package/src/__tests__/tool-result-metadata-plumbing.test.ts +1 -0
- package/src/__tests__/tool-result-truncate-hook.test.ts +127 -0
- package/src/__tests__/tool-result-truncation.test.ts +0 -2
- package/src/__tests__/ui-choice-copy-surfaces.test.ts +254 -0
- package/src/__tests__/ui-work-result-surface.test.ts +159 -0
- package/src/__tests__/usage-routes.test.ts +285 -1
- package/src/__tests__/user-plugin-loader.test.ts +2 -2
- package/src/__tests__/voice-session-bridge.test.ts +6 -3
- package/src/__tests__/web-search-backend-failure.test.ts +166 -0
- package/src/agent/loop.ts +346 -442
- package/src/api/events/assistant-thinking-delta.ts +33 -0
- package/src/api/events/tool-output-chunk.ts +45 -0
- package/src/api/events/tool-use-preview-start.ts +32 -0
- package/src/api/events/trace-event.ts +69 -0
- package/src/api/index.ts +48 -13
- package/src/api/responses/conversation-message.ts +368 -0
- package/src/avatar/__tests__/avatar-store.test.ts +34 -29
- package/src/cli/commands/__tests__/notifications.test.ts +58 -14
- package/src/cli/commands/notifications.ts +112 -60
- package/src/config/assistant-feature-flags.ts +22 -11
- package/src/config/bundled-skills/app-builder/SKILL.md +3 -20
- package/src/config/bundled-skills/app-builder/references/examples/README.md +17 -0
- package/src/config/bundled-skills/app-builder/references/examples/expense-tracker.md +515 -0
- package/src/config/bundled-skills/app-builder/references/examples/focus-timer.md +342 -0
- package/src/config/bundled-skills/app-builder/references/examples/habit-tracker.md +490 -0
- package/src/config/bundled-skills/document-editor/SKILL.md +1 -1
- package/src/config/bundled-skills/messaging/SKILL.md +0 -7
- package/src/config/feature-flag-cache.ts +3 -3
- package/src/config/feature-flag-registry.json +35 -3
- package/src/config/schemas/__tests__/memory-v2.test.ts +1 -0
- package/src/config/schemas/__tests__/memory-v3.test.ts +25 -0
- package/src/config/schemas/llm.ts +1 -0
- package/src/config/schemas/memory-v2.ts +8 -0
- package/src/config/schemas/memory-v3.ts +8 -0
- package/src/config/schemas/platform.ts +8 -0
- package/src/config/seed-inference-profiles.ts +2 -2
- package/src/config/skills.ts +13 -0
- package/src/context/compactor.ts +1 -1
- package/src/context/strip-injections.ts +122 -0
- package/src/context/token-estimator.ts +23 -0
- package/src/context/tool-result-truncation.ts +0 -23
- package/src/context/window-manager.ts +3 -6
- package/src/credential-execution/executable-discovery.ts +16 -0
- package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +6 -0
- package/src/daemon/__tests__/inference-profile-notification.test.ts +153 -0
- package/src/daemon/__tests__/native-web-search-metadata.test.ts +10 -8
- package/src/daemon/assistant-attachments.ts +1 -1
- package/src/daemon/config-watcher.ts +2 -2
- package/src/daemon/context-overflow-reducer.ts +0 -1
- package/src/daemon/conversation-agent-loop-handlers.ts +605 -153
- package/src/daemon/conversation-agent-loop.ts +281 -760
- package/src/daemon/conversation-history.ts +5 -4
- package/src/daemon/conversation-lifecycle.ts +3 -4
- package/src/daemon/conversation-messaging.ts +7 -6
- package/src/daemon/conversation-process.ts +11 -16
- package/src/daemon/conversation-runtime-assembly.ts +130 -347
- package/src/daemon/conversation-slash.ts +6 -25
- package/src/daemon/conversation-surfaces.ts +222 -4
- package/src/daemon/conversation-tool-setup.ts +2 -29
- package/src/daemon/conversation.ts +32 -14
- package/src/daemon/external-plugins-bootstrap.ts +9 -10
- package/src/daemon/handlers/config-a2a.ts +51 -36
- package/src/daemon/handlers/config-slack-channel.ts +20 -14
- package/src/daemon/handlers/config-telegram.ts +16 -2
- package/src/daemon/handlers/shared.ts +156 -84
- package/src/daemon/handlers/skills.ts +39 -10
- package/src/daemon/lifecycle.ts +4 -0
- package/src/daemon/message-types/apps.ts +1 -29
- package/src/daemon/message-types/messages.ts +9 -57
- package/src/daemon/message-types/skills.ts +2 -0
- package/src/daemon/message-types/surfaces.ts +136 -3
- package/src/daemon/now-scratchpad.ts +21 -0
- package/src/daemon/orphan-reaper.test.ts +210 -0
- package/src/daemon/orphan-reaper.ts +240 -0
- package/src/daemon/persist-unsendable-image.ts +117 -0
- package/src/daemon/process-message.ts +1 -3
- package/src/daemon/trace-emitter.ts +6 -4
- package/src/daemon/trust-context.ts +19 -0
- package/src/daemon/wake-target-adapter.ts +3 -1
- package/src/home/home-greeting-cache.ts +24 -1
- package/src/ipc/gateway-client.test.ts +2 -2
- package/src/ipc/gateway-client.ts +3 -3
- package/src/media/gemini-image-service.ts +15 -0
- package/src/media/openai-image-service.ts +14 -0
- package/src/media/types.ts +34 -0
- package/src/memory/__tests__/jobs-worker-v2-schedule.test.ts +56 -0
- package/src/memory/auth-fallback-events-store.ts +94 -0
- package/src/memory/conversation-title-service.ts +65 -41
- package/src/memory/db-init.ts +4 -0
- package/src/memory/graph/__tests__/conversation-graph-memory-registry.test.ts +119 -0
- package/src/memory/graph/conversation-graph-memory.ts +65 -0
- package/src/memory/jobs-store.ts +33 -0
- package/src/memory/jobs-worker.ts +31 -4
- package/src/memory/llm-usage-store.ts +224 -50
- package/src/memory/migrations/222-strip-placeholder-sentinels-from-messages.ts +6 -5
- package/src/memory/migrations/270-schedule-source-conversation.ts +13 -0
- package/src/memory/migrations/271-create-auth-fallback-events.ts +21 -0
- package/src/memory/migrations/index.ts +2 -0
- package/src/memory/pkb/autoinject.ts +61 -0
- package/src/memory/pkb/context.ts +50 -0
- package/src/memory/pkb/types.ts +14 -0
- package/src/memory/schedule-attribution-sql.ts +104 -0
- package/src/memory/schema/infrastructure.ts +16 -0
- package/src/memory/usage-grouped-buckets.ts +6 -1
- package/src/memory/v2/__tests__/consolidation-job.test.ts +1 -1
- package/src/memory/v2/consolidation-job.ts +1 -1
- package/src/memory/v3/__tests__/health.test.ts +16 -0
- package/src/memory/v3/__tests__/orchestrate.test.ts +45 -9
- package/src/memory/v3/__tests__/provider-blocks.test.ts +13 -0
- package/src/memory/v3/__tests__/router.test.ts +101 -29
- package/src/memory/v3/__tests__/selector.test.ts +93 -27
- package/src/memory/v3/__tests__/shadow-plugin.test.ts +23 -5
- package/src/memory/v3/health.ts +0 -0
- package/src/memory/v3/llm-retry.ts +32 -0
- package/src/memory/v3/orchestrate.ts +26 -14
- package/src/memory/v3/provider-blocks.ts +15 -5
- package/src/memory/v3/router.ts +48 -42
- package/src/memory/v3/selector.ts +57 -42
- package/src/memory/v3/shadow-plugin.ts +47 -15
- package/src/memory/v3/types.ts +8 -0
- package/src/notifications/conversation-pairing.ts +8 -15
- package/src/notifications/decision-engine.ts +6 -3
- package/src/notifications/home-feed-side-effect.ts +12 -1
- package/src/permissions/prompter.ts +4 -0
- package/src/plugin-api/constants.ts +4 -0
- package/src/plugin-api/index.ts +8 -1
- package/src/plugin-api/types.ts +151 -1
- package/src/plugins/defaults/empty-response/hooks/stop.ts +126 -0
- package/src/plugins/defaults/empty-response/register.ts +8 -13
- package/src/plugins/defaults/index.ts +1 -15
- package/src/plugins/defaults/injectors/register.ts +243 -74
- package/src/plugins/defaults/memory-retrieval/hooks/post-compact.ts +91 -0
- package/src/plugins/defaults/memory-retrieval/hooks/user-prompt-submit-temp.ts +216 -0
- package/src/plugins/defaults/memory-retrieval/injector-chain.ts +35 -0
- package/src/plugins/defaults/title-generate/hooks/stop.ts +75 -0
- package/src/plugins/defaults/title-generate/hooks/user-prompt-submit.ts +35 -0
- package/src/plugins/defaults/title-generate/package.json +1 -1
- package/src/plugins/defaults/title-generate/register.ts +18 -18
- package/src/plugins/defaults/tool-error/hooks/post-tool-use.ts +118 -0
- package/src/plugins/defaults/tool-error/package.json +1 -1
- package/src/plugins/defaults/tool-error/register.ts +9 -21
- package/src/plugins/defaults/tool-result-truncate/hooks/post-tool-use.ts +32 -0
- package/src/plugins/defaults/tool-result-truncate/register.ts +10 -21
- package/src/plugins/defaults/tool-result-truncate/terminal.ts +37 -18
- package/src/plugins/pipeline.ts +6 -18
- package/src/plugins/registry.ts +8 -25
- package/src/plugins/types.ts +43 -474
- package/src/proactive-artifact/aux-message-injector.ts +3 -3
- package/src/proactive-artifact/job.test.ts +7 -12
- package/src/prompts/__tests__/system-prompt.test.ts +36 -0
- package/src/prompts/templates/BOOTSTRAP-ACTIVATION-RAIL.md +62 -0
- package/src/prompts/templates/BOOTSTRAP.md +2 -2
- package/src/prompts/templates/system-sections.ts +15 -0
- package/src/providers/anthropic/client.ts +37 -29
- package/src/providers/openai/__tests__/chat-completions-provider-reasoning.test.ts +112 -0
- package/src/providers/openai/chat-completions-provider.ts +44 -0
- package/src/providers/openrouter/client.ts +1 -0
- package/src/providers/placeholder-sentinels.ts +35 -0
- package/src/runtime/__tests__/agent-wake.test.ts +5 -1
- package/src/runtime/agent-wake.ts +2 -2
- package/src/runtime/assistant-event-hub.ts +36 -6
- package/src/runtime/{conversation-stream-state.ts → assistant-stream-state.ts} +132 -58
- package/src/runtime/http-router.ts +16 -21
- package/src/runtime/http-types.ts +16 -70
- package/src/runtime/pending-interactions.ts +1 -0
- package/src/runtime/routes/__tests__/consolidation-routes.test.ts +265 -2
- package/src/runtime/routes/__tests__/conversation-query-routes.test.ts +31 -1
- package/src/runtime/routes/__tests__/memory-v2-routes.test.ts +6 -2
- package/src/runtime/routes/__tests__/tts-routes.test.ts +6 -2
- package/src/runtime/routes/app-management-routes.ts +6 -117
- package/src/runtime/routes/app-routes.ts +13 -15
- package/src/runtime/routes/attachment-routes.ts +26 -15
- package/src/runtime/routes/avatar-routes.ts +26 -0
- package/src/runtime/routes/btw-routes.ts +29 -23
- package/src/runtime/routes/consolidation-routes.ts +120 -20
- package/src/runtime/routes/conversation-query-routes.ts +2 -0
- package/src/runtime/routes/conversation-routes.ts +358 -184
- package/src/runtime/routes/documents-routes.ts +4 -0
- package/src/runtime/routes/domain-routes.ts +51 -37
- package/src/runtime/routes/epoch-millis-range.ts +34 -0
- package/src/runtime/routes/events-routes.ts +28 -34
- package/src/runtime/routes/gateway-log-routes.ts +26 -4
- package/src/runtime/routes/heartbeat-routes.ts +32 -12
- package/src/runtime/routes/identity-intro-cache.ts +11 -34
- package/src/runtime/routes/identity-routes.ts +208 -17
- package/src/runtime/routes/image-generation-routes.ts +40 -2
- package/src/runtime/routes/index.ts +2 -0
- package/src/runtime/routes/integrations/a2a.ts +12 -10
- package/src/runtime/routes/integrations/slack/__tests__/channel.test.ts +16 -0
- package/src/runtime/routes/integrations/slack/channel.ts +4 -0
- package/src/runtime/routes/integrations/slack/share.ts +27 -6
- package/src/runtime/routes/integrations/telegram.ts +6 -0
- package/src/runtime/routes/integrations/twilio.ts +42 -0
- package/src/runtime/routes/internal-telemetry-routes.ts +88 -0
- package/src/runtime/routes/log-export-routes.ts +8 -0
- package/src/runtime/routes/memory-v2-routes.ts +15 -8
- package/src/runtime/routes/memory-v3-routes.ts +50 -28
- package/src/runtime/routes/oauth-apps.ts +66 -12
- package/src/runtime/routes/oauth-providers.ts +44 -5
- package/src/runtime/routes/platform-routes.ts +81 -5
- package/src/runtime/routes/playground/__tests__/force-compact.test.ts +6 -4
- package/src/runtime/routes/playground/force-compact.ts +1 -1
- package/src/runtime/routes/rename-conversation-routes.ts +5 -0
- package/src/runtime/routes/schedule-routes.ts +152 -42
- package/src/runtime/routes/secret-routes.ts +14 -2
- package/src/runtime/routes/skills-routes.ts +43 -14
- package/src/runtime/routes/tool-call-confirmation-enrichment.test.ts +161 -0
- package/src/runtime/routes/tool-call-confirmation-enrichment.ts +107 -0
- package/src/runtime/routes/trust-rules-routes.ts +26 -2
- package/src/runtime/routes/tts-routes.ts +35 -0
- package/src/runtime/routes/types.ts +66 -8
- package/src/runtime/routes/usage-routes.ts +47 -39
- package/src/runtime/routes/webhook-routes.ts +41 -2
- package/src/runtime/routes/workspace-routes.ts +4 -0
- package/src/runtime/services/__tests__/analyze-conversation.test.ts +6 -0
- package/src/runtime/services/analyze-conversation.ts +2 -2
- package/src/schedule/schedule-store.ts +20 -1
- package/src/schedule/schedule-usage-store.ts +83 -0
- package/src/schedule/scheduler.ts +12 -5
- package/src/skills/catalog-files.ts +2 -2
- package/src/skills/catalog-install.ts +3 -0
- package/src/skills/categories-cache.ts +118 -0
- package/src/skills/clawhub-files.ts +1 -2
- package/src/skills/skillssh-files.ts +1 -2
- package/src/telemetry/types.ts +29 -1
- package/src/telemetry/usage-telemetry-reporter.test.ts +112 -3
- package/src/telemetry/usage-telemetry-reporter.ts +57 -2
- package/src/tools/executor.ts +1 -53
- package/src/tools/network/__tests__/web-search-metadata.test.ts +7 -1
- package/src/tools/network/__tests__/web-search.test.ts +11 -3
- package/src/tools/network/web-search-error.test.ts +248 -0
- package/src/tools/network/web-search-error.ts +267 -0
- package/src/tools/network/web-search.ts +207 -48
- package/src/tools/schedule/create.ts +2 -0
- package/src/tools/terminal/safe-env.ts +10 -1
- package/src/tools/ui-surface/definitions.ts +9 -1
- package/src/tts/__tests__/provider-catalog-consistency.test.ts +85 -1
- package/src/tts/provider-catalog.ts +76 -1
- package/src/util/mutex.ts +47 -0
- package/src/workspace/git-service.ts +1 -42
- package/src/workspace/migrations/095-bump-heartbeat-interval-30m-to-60m.ts +51 -0
- package/src/workspace/migrations/096-reduce-quality-profile-effort.ts +72 -0
- package/src/workspace/migrations/097-enable-adaptive-thinking-managed-profiles.ts +93 -0
- package/src/workspace/migrations/registry.ts +6 -0
- package/src/__tests__/bootstrap-turn-cleanup.test.ts +0 -44
- package/src/__tests__/empty-response-pipeline.test.ts +0 -423
- package/src/__tests__/llm-call-pipeline.test.ts +0 -287
- package/src/__tests__/memory-retrieval-pipeline.test.ts +0 -418
- package/src/__tests__/persistence-pipeline.test.ts +0 -503
- package/src/__tests__/title-generate-pipeline.test.ts +0 -211
- package/src/__tests__/token-estimate-pipeline.test.ts +0 -479
- package/src/__tests__/tool-error-pipeline.test.ts +0 -241
- package/src/__tests__/tool-execute-pipeline.test.ts +0 -417
- package/src/__tests__/tool-result-truncate-pipeline.test.ts +0 -341
- package/src/daemon/bootstrap-turn-cleanup.ts +0 -45
- package/src/gallery/default-gallery.ts +0 -1359
- package/src/gallery/gallery-manifest.ts +0 -28
- package/src/home/feature-gate.ts +0 -22
- package/src/plugins/defaults/empty-response/middlewares/emptyResponse.ts +0 -22
- package/src/plugins/defaults/empty-response/terminal.ts +0 -106
- package/src/plugins/defaults/injectors/package.json +0 -15
- package/src/plugins/defaults/llm-call/middlewares/llmCall.ts +0 -17
- package/src/plugins/defaults/llm-call/package.json +0 -15
- package/src/plugins/defaults/llm-call/register.ts +0 -45
- package/src/plugins/defaults/memory-retrieval/middlewares/memoryRetrieval.ts +0 -17
- package/src/plugins/defaults/memory-retrieval/package.json +0 -15
- package/src/plugins/defaults/memory-retrieval/register.ts +0 -181
- package/src/plugins/defaults/persistence/middlewares/persistence.ts +0 -19
- package/src/plugins/defaults/persistence/package.json +0 -15
- package/src/plugins/defaults/persistence/register.ts +0 -38
- package/src/plugins/defaults/persistence/terminal.ts +0 -83
- package/src/plugins/defaults/title-generate/terminal.ts +0 -31
- package/src/plugins/defaults/token-estimate/middlewares/tokenEstimate.ts +0 -23
- package/src/plugins/defaults/token-estimate/package.json +0 -15
- package/src/plugins/defaults/token-estimate/register.ts +0 -34
- package/src/plugins/defaults/token-estimate/terminal.ts +0 -40
- package/src/plugins/defaults/tool-error/middlewares/toolError.ts +0 -21
- package/src/plugins/defaults/tool-error/terminal.ts +0 -47
- package/src/plugins/defaults/tool-execute/middlewares/toolExecute.ts +0 -23
- package/src/plugins/defaults/tool-execute/package.json +0 -15
- package/src/plugins/defaults/tool-execute/register.ts +0 -49
- package/src/plugins/defaults/tool-result-truncate/middlewares/toolResultTruncate.ts +0 -23
- package/src/plugins/defaults/tool-result-truncate/types.ts +0 -22
- package/src/skills/category-inference.ts +0 -111
|
@@ -1,211 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Tests for the `titleGenerate` pipeline (PR 28).
|
|
3
|
-
*
|
|
4
|
-
* The title-generation side effect used to be a direct call to
|
|
5
|
-
* `queueGenerateConversationTitle` inside `conversation-agent-loop.ts`.
|
|
6
|
-
* After PR 28 the assistant routes that call through the plugin pipeline
|
|
7
|
-
* runner, giving plugins an opportunity to observe/replace the default
|
|
8
|
-
* implementation.
|
|
9
|
-
*
|
|
10
|
-
* Covers:
|
|
11
|
-
* - The default plugin's terminal delegates to
|
|
12
|
-
* `queueGenerateConversationTitle` with the same arguments the agent
|
|
13
|
-
* loop constructs.
|
|
14
|
-
* - A custom plugin can install a short-circuit middleware that replaces
|
|
15
|
-
* the terminal with a deterministic generator. The default terminal is
|
|
16
|
-
* NOT invoked in that case.
|
|
17
|
-
*
|
|
18
|
-
* Mocks `memory/conversation-title-service.js` so the tests don't touch
|
|
19
|
-
* the real provider stack, and resets the plugin registry between cases.
|
|
20
|
-
*/
|
|
21
|
-
|
|
22
|
-
import { beforeEach, describe, expect, mock, test } from "bun:test";
|
|
23
|
-
|
|
24
|
-
// Stub the title-generation service before importing anything that binds
|
|
25
|
-
// to it, so both the default plugin and the agent loop capture the
|
|
26
|
-
// stubbed binding.
|
|
27
|
-
const queueGenerateConversationTitleMock = mock(
|
|
28
|
-
(_params: {
|
|
29
|
-
conversationId: string;
|
|
30
|
-
provider?: unknown;
|
|
31
|
-
userMessage?: string;
|
|
32
|
-
onTitleUpdated?: (title: string) => void;
|
|
33
|
-
}): void => undefined,
|
|
34
|
-
);
|
|
35
|
-
mock.module("../memory/conversation-title-service.js", () => ({
|
|
36
|
-
queueGenerateConversationTitle: queueGenerateConversationTitleMock,
|
|
37
|
-
}));
|
|
38
|
-
|
|
39
|
-
import { defaultTitleGenerateTerminal } from "../plugins/defaults/title-generate/terminal.js";
|
|
40
|
-
import { DEFAULT_TIMEOUTS, runPipeline } from "../plugins/pipeline.js";
|
|
41
|
-
import {
|
|
42
|
-
getMiddlewaresFor,
|
|
43
|
-
registerPlugin,
|
|
44
|
-
resetPluginRegistryForTests,
|
|
45
|
-
} from "../plugins/registry.js";
|
|
46
|
-
import type {
|
|
47
|
-
Middleware,
|
|
48
|
-
Plugin,
|
|
49
|
-
TitleArgs,
|
|
50
|
-
TitleResult,
|
|
51
|
-
TurnContext,
|
|
52
|
-
} from "../plugins/types.js";
|
|
53
|
-
|
|
54
|
-
function makeCtx(overrides: Partial<TurnContext> = {}): TurnContext {
|
|
55
|
-
return {
|
|
56
|
-
requestId: "req-1",
|
|
57
|
-
conversationId: "conv-1",
|
|
58
|
-
turnIndex: 0,
|
|
59
|
-
trust: { sourceChannel: "vellum", trustClass: "unknown" },
|
|
60
|
-
...overrides,
|
|
61
|
-
};
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
function makeArgs(overrides: Partial<TitleArgs> = {}): TitleArgs {
|
|
65
|
-
return {
|
|
66
|
-
conversationId: "conv-1",
|
|
67
|
-
userMessage: "hello world",
|
|
68
|
-
...overrides,
|
|
69
|
-
};
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
describe("titleGenerate pipeline", () => {
|
|
73
|
-
beforeEach(() => {
|
|
74
|
-
resetPluginRegistryForTests();
|
|
75
|
-
queueGenerateConversationTitleMock.mockReset();
|
|
76
|
-
queueGenerateConversationTitleMock.mockImplementation(() => undefined);
|
|
77
|
-
// Re-register the default plugin after the registry reset so tests see
|
|
78
|
-
// the same shape the daemon sees at runtime.
|
|
79
|
-
registerPlugin({
|
|
80
|
-
manifest: {
|
|
81
|
-
name: "default-title-generate",
|
|
82
|
-
version: "1.0.0",
|
|
83
|
-
},
|
|
84
|
-
});
|
|
85
|
-
});
|
|
86
|
-
|
|
87
|
-
test("default: pipeline terminal queues a title-generation job", async () => {
|
|
88
|
-
const ctx = makeCtx();
|
|
89
|
-
const onTitleUpdated = mock((_title: string) => undefined);
|
|
90
|
-
const args = makeArgs({
|
|
91
|
-
conversationId: "conv-1",
|
|
92
|
-
userMessage: "first message",
|
|
93
|
-
onTitleUpdated,
|
|
94
|
-
});
|
|
95
|
-
|
|
96
|
-
await runPipeline(
|
|
97
|
-
"titleGenerate",
|
|
98
|
-
getMiddlewaresFor("titleGenerate"),
|
|
99
|
-
defaultTitleGenerateTerminal,
|
|
100
|
-
args,
|
|
101
|
-
ctx,
|
|
102
|
-
DEFAULT_TIMEOUTS.titleGenerate,
|
|
103
|
-
);
|
|
104
|
-
|
|
105
|
-
// The default terminal must have delegated to queueGenerateConversationTitle
|
|
106
|
-
// with every argument the caller supplied, including the callback.
|
|
107
|
-
expect(queueGenerateConversationTitleMock).toHaveBeenCalledTimes(1);
|
|
108
|
-
const call = queueGenerateConversationTitleMock.mock.calls[0]?.[0];
|
|
109
|
-
expect(call?.conversationId).toBe("conv-1");
|
|
110
|
-
expect(call?.userMessage).toBe("first message");
|
|
111
|
-
expect(call?.onTitleUpdated).toBe(onTitleUpdated);
|
|
112
|
-
});
|
|
113
|
-
|
|
114
|
-
test("default: pipeline result is the empty object from the terminal", async () => {
|
|
115
|
-
const ctx = makeCtx();
|
|
116
|
-
const result = await runPipeline(
|
|
117
|
-
"titleGenerate",
|
|
118
|
-
getMiddlewaresFor("titleGenerate"),
|
|
119
|
-
defaultTitleGenerateTerminal,
|
|
120
|
-
makeArgs(),
|
|
121
|
-
ctx,
|
|
122
|
-
DEFAULT_TIMEOUTS.titleGenerate,
|
|
123
|
-
);
|
|
124
|
-
expect(result).toEqual({});
|
|
125
|
-
});
|
|
126
|
-
|
|
127
|
-
test("custom plugin: short-circuit middleware replaces the default with a deterministic generator", async () => {
|
|
128
|
-
// A custom plugin installs middleware that fabricates a title
|
|
129
|
-
// deterministically and never calls `next`, so the default terminal
|
|
130
|
-
// (which would delegate to queueGenerateConversationTitle) is
|
|
131
|
-
// skipped entirely.
|
|
132
|
-
const observedTitles: string[] = [];
|
|
133
|
-
|
|
134
|
-
const deterministicMw: Middleware<TitleArgs, TitleResult> = async (
|
|
135
|
-
args,
|
|
136
|
-
) => {
|
|
137
|
-
const fabricated = `[deterministic] ${args.userMessage}`;
|
|
138
|
-
args.onTitleUpdated?.(fabricated);
|
|
139
|
-
observedTitles.push(fabricated);
|
|
140
|
-
return {};
|
|
141
|
-
};
|
|
142
|
-
|
|
143
|
-
const customPlugin: Plugin = {
|
|
144
|
-
manifest: {
|
|
145
|
-
name: "custom-deterministic-title",
|
|
146
|
-
version: "0.0.1",
|
|
147
|
-
},
|
|
148
|
-
middleware: { titleGenerate: deterministicMw },
|
|
149
|
-
};
|
|
150
|
-
registerPlugin(customPlugin);
|
|
151
|
-
|
|
152
|
-
const receivedTitle: string[] = [];
|
|
153
|
-
const args = makeArgs({
|
|
154
|
-
userMessage: "what is the weather",
|
|
155
|
-
onTitleUpdated: (title) => {
|
|
156
|
-
receivedTitle.push(title);
|
|
157
|
-
},
|
|
158
|
-
});
|
|
159
|
-
|
|
160
|
-
await runPipeline(
|
|
161
|
-
"titleGenerate",
|
|
162
|
-
getMiddlewaresFor("titleGenerate"),
|
|
163
|
-
defaultTitleGenerateTerminal,
|
|
164
|
-
args,
|
|
165
|
-
makeCtx(),
|
|
166
|
-
DEFAULT_TIMEOUTS.titleGenerate,
|
|
167
|
-
);
|
|
168
|
-
|
|
169
|
-
// Deterministic middleware produced the expected title and invoked
|
|
170
|
-
// the caller's callback.
|
|
171
|
-
expect(observedTitles).toEqual(["[deterministic] what is the weather"]);
|
|
172
|
-
expect(receivedTitle).toEqual(["[deterministic] what is the weather"]);
|
|
173
|
-
// The default terminal must NOT have been reached — it would have
|
|
174
|
-
// called the real title-service stub.
|
|
175
|
-
expect(queueGenerateConversationTitleMock).not.toHaveBeenCalled();
|
|
176
|
-
});
|
|
177
|
-
|
|
178
|
-
test("custom plugin: passthrough middleware leaves the default in charge", async () => {
|
|
179
|
-
// A plugin that always calls `next` just observes — the default
|
|
180
|
-
// terminal still runs and queues the title-generation job.
|
|
181
|
-
let middlewareSawArgs = false;
|
|
182
|
-
|
|
183
|
-
const passthroughMw: Middleware<TitleArgs, TitleResult> = async (
|
|
184
|
-
args,
|
|
185
|
-
next,
|
|
186
|
-
) => {
|
|
187
|
-
middlewareSawArgs = true;
|
|
188
|
-
return next(args);
|
|
189
|
-
};
|
|
190
|
-
|
|
191
|
-
registerPlugin({
|
|
192
|
-
manifest: {
|
|
193
|
-
name: "observer",
|
|
194
|
-
version: "0.0.1",
|
|
195
|
-
},
|
|
196
|
-
middleware: { titleGenerate: passthroughMw },
|
|
197
|
-
});
|
|
198
|
-
|
|
199
|
-
await runPipeline(
|
|
200
|
-
"titleGenerate",
|
|
201
|
-
getMiddlewaresFor("titleGenerate"),
|
|
202
|
-
defaultTitleGenerateTerminal,
|
|
203
|
-
makeArgs(),
|
|
204
|
-
makeCtx(),
|
|
205
|
-
DEFAULT_TIMEOUTS.titleGenerate,
|
|
206
|
-
);
|
|
207
|
-
|
|
208
|
-
expect(middlewareSawArgs).toBe(true);
|
|
209
|
-
expect(queueGenerateConversationTitleMock).toHaveBeenCalledTimes(1);
|
|
210
|
-
});
|
|
211
|
-
});
|
|
@@ -1,479 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Tests for the `tokenEstimate` plugin pipeline (PR 22 of the
|
|
3
|
-
* agent-plugin-system plan).
|
|
4
|
-
*
|
|
5
|
-
* Covers:
|
|
6
|
-
* - The default plugin's terminal middleware matches
|
|
7
|
-
* {@link estimatePromptTokens} output exactly across a set of golden
|
|
8
|
-
* inputs (empty history, text-only, tools, provider-specific image sizing).
|
|
9
|
-
* - Running the pipeline end-to-end with the default registered produces
|
|
10
|
-
* the same numeric result as calling `estimatePromptTokens` directly.
|
|
11
|
-
* - A custom plugin that short-circuits the chain can override the default,
|
|
12
|
-
* proving the extension point works.
|
|
13
|
-
* - When a non-1.0 EWMA calibration sample has been recorded, the terminal's
|
|
14
|
-
* output reflects that correction rather than the raw estimate.
|
|
15
|
-
*
|
|
16
|
-
* These tests exercise the registry + runner directly. They do not touch
|
|
17
|
-
* `bootstrapPlugins` — the default registration path is covered by the
|
|
18
|
-
* bootstrap suite.
|
|
19
|
-
*/
|
|
20
|
-
|
|
21
|
-
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
|
|
22
|
-
|
|
23
|
-
import {
|
|
24
|
-
recordEstimate,
|
|
25
|
-
resetCalibrations,
|
|
26
|
-
} from "../context/estimator-calibration.js";
|
|
27
|
-
import {
|
|
28
|
-
estimatePromptTokens,
|
|
29
|
-
estimatePromptTokensRaw,
|
|
30
|
-
estimateToolsTokens,
|
|
31
|
-
} from "../context/token-estimator.js";
|
|
32
|
-
import type { TrustContext } from "../daemon/trust-context.js";
|
|
33
|
-
import { defaultTokenEstimatePlugin } from "../plugins/defaults/token-estimate/register.js";
|
|
34
|
-
import { defaultTokenEstimateTerminal } from "../plugins/defaults/token-estimate/terminal.js";
|
|
35
|
-
import { DEFAULT_TIMEOUTS, runPipeline } from "../plugins/pipeline.js";
|
|
36
|
-
import {
|
|
37
|
-
getMiddlewaresFor,
|
|
38
|
-
registerPlugin,
|
|
39
|
-
resetPluginRegistryForTests,
|
|
40
|
-
} from "../plugins/registry.js";
|
|
41
|
-
import type {
|
|
42
|
-
EstimateArgs,
|
|
43
|
-
EstimateResult,
|
|
44
|
-
Middleware,
|
|
45
|
-
Plugin,
|
|
46
|
-
TurnContext,
|
|
47
|
-
} from "../plugins/types.js";
|
|
48
|
-
import type { Message, ToolDefinition } from "../providers/types.js";
|
|
49
|
-
|
|
50
|
-
// ── Fixtures ─────────────────────────────────────────────────────────────
|
|
51
|
-
|
|
52
|
-
const trust: TrustContext = {
|
|
53
|
-
sourceChannel: "vellum",
|
|
54
|
-
trustClass: "guardian",
|
|
55
|
-
};
|
|
56
|
-
|
|
57
|
-
function makeCtx(overrides: Partial<TurnContext> = {}): TurnContext {
|
|
58
|
-
return {
|
|
59
|
-
requestId: "req-token-estimate-test",
|
|
60
|
-
conversationId: "conv-token-estimate-test",
|
|
61
|
-
turnIndex: 0,
|
|
62
|
-
trust,
|
|
63
|
-
...overrides,
|
|
64
|
-
};
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
const EMPTY_HISTORY: Message[] = [];
|
|
68
|
-
|
|
69
|
-
const TEXT_HISTORY: Message[] = [
|
|
70
|
-
{ role: "user", content: [{ type: "text", text: "hello there" }] },
|
|
71
|
-
{
|
|
72
|
-
role: "assistant",
|
|
73
|
-
content: [
|
|
74
|
-
{ type: "text", text: "hi! how can I help you today?" },
|
|
75
|
-
{ type: "text", text: "a second text block for good measure" },
|
|
76
|
-
],
|
|
77
|
-
},
|
|
78
|
-
];
|
|
79
|
-
|
|
80
|
-
const TOOL_USE_HISTORY: Message[] = [
|
|
81
|
-
{ role: "user", content: [{ type: "text", text: "what's in the log?" }] },
|
|
82
|
-
{
|
|
83
|
-
role: "assistant",
|
|
84
|
-
content: [
|
|
85
|
-
{
|
|
86
|
-
type: "tool_use",
|
|
87
|
-
id: "tu-1",
|
|
88
|
-
name: "bash",
|
|
89
|
-
input: { command: "tail -n 5 server.log" },
|
|
90
|
-
},
|
|
91
|
-
],
|
|
92
|
-
},
|
|
93
|
-
{
|
|
94
|
-
role: "user",
|
|
95
|
-
content: [
|
|
96
|
-
{
|
|
97
|
-
type: "tool_result",
|
|
98
|
-
tool_use_id: "tu-1",
|
|
99
|
-
content: "line1\nline2\nline3",
|
|
100
|
-
},
|
|
101
|
-
],
|
|
102
|
-
},
|
|
103
|
-
];
|
|
104
|
-
|
|
105
|
-
const SYSTEM_PROMPT = "You are a helpful assistant with a long preamble.";
|
|
106
|
-
|
|
107
|
-
const SAMPLE_TOOLS: ToolDefinition[] = [
|
|
108
|
-
{
|
|
109
|
-
name: "bash",
|
|
110
|
-
description: "Execute a shell command and return its output.",
|
|
111
|
-
input_schema: {
|
|
112
|
-
type: "object",
|
|
113
|
-
properties: { command: { type: "string" } },
|
|
114
|
-
required: ["command"],
|
|
115
|
-
},
|
|
116
|
-
},
|
|
117
|
-
{
|
|
118
|
-
name: "file_read",
|
|
119
|
-
description: "Read a file from the workspace.",
|
|
120
|
-
input_schema: {
|
|
121
|
-
type: "object",
|
|
122
|
-
properties: { path: { type: "string" } },
|
|
123
|
-
required: ["path"],
|
|
124
|
-
},
|
|
125
|
-
},
|
|
126
|
-
];
|
|
127
|
-
|
|
128
|
-
// ── Helpers ──────────────────────────────────────────────────────────────
|
|
129
|
-
|
|
130
|
-
function registerDefault(): void {
|
|
131
|
-
registerPlugin(defaultTokenEstimatePlugin);
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
function calibratedEstimate(
|
|
135
|
-
args: Pick<EstimateArgs, "history" | "systemPrompt" | "providerName"> & {
|
|
136
|
-
tools: ToolDefinition[];
|
|
137
|
-
},
|
|
138
|
-
): number {
|
|
139
|
-
const toolTokenBudget =
|
|
140
|
-
args.tools.length > 0 ? estimateToolsTokens(args.tools) : 0;
|
|
141
|
-
return estimatePromptTokens(args.history, args.systemPrompt, {
|
|
142
|
-
providerName: args.providerName,
|
|
143
|
-
toolTokenBudget,
|
|
144
|
-
});
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
function rawEstimate(
|
|
148
|
-
args: Pick<EstimateArgs, "history" | "systemPrompt" | "providerName"> & {
|
|
149
|
-
tools: ToolDefinition[];
|
|
150
|
-
},
|
|
151
|
-
): number {
|
|
152
|
-
const toolTokenBudget =
|
|
153
|
-
args.tools.length > 0 ? estimateToolsTokens(args.tools) : 0;
|
|
154
|
-
return estimatePromptTokensRaw(args.history, args.systemPrompt, {
|
|
155
|
-
providerName: args.providerName,
|
|
156
|
-
toolTokenBudget,
|
|
157
|
-
});
|
|
158
|
-
}
|
|
159
|
-
|
|
160
|
-
async function runViaPipeline(args: EstimateArgs): Promise<EstimateResult> {
|
|
161
|
-
return runPipeline<EstimateArgs, EstimateResult>(
|
|
162
|
-
"tokenEstimate",
|
|
163
|
-
getMiddlewaresFor("tokenEstimate"),
|
|
164
|
-
// Mirror the production wiring in `daemon/conversation-agent-loop.ts`:
|
|
165
|
-
// the default plugin's middleware is a passthrough, so the terminal is
|
|
166
|
-
// wired in by the call site. Using the same terminal here means the
|
|
167
|
-
// tests exercise the exact composition shape that ships.
|
|
168
|
-
defaultTokenEstimateTerminal,
|
|
169
|
-
args,
|
|
170
|
-
makeCtx(),
|
|
171
|
-
DEFAULT_TIMEOUTS.tokenEstimate,
|
|
172
|
-
);
|
|
173
|
-
}
|
|
174
|
-
|
|
175
|
-
// ── Tests ────────────────────────────────────────────────────────────────
|
|
176
|
-
|
|
177
|
-
beforeEach(() => {
|
|
178
|
-
resetPluginRegistryForTests();
|
|
179
|
-
resetCalibrations();
|
|
180
|
-
});
|
|
181
|
-
|
|
182
|
-
afterEach(() => {
|
|
183
|
-
resetPluginRegistryForTests();
|
|
184
|
-
resetCalibrations();
|
|
185
|
-
});
|
|
186
|
-
|
|
187
|
-
describe("tokenEstimate pipeline — default plugin parity", () => {
|
|
188
|
-
test("default matches estimatePromptTokens on empty history", async () => {
|
|
189
|
-
registerDefault();
|
|
190
|
-
const args: EstimateArgs = {
|
|
191
|
-
history: EMPTY_HISTORY,
|
|
192
|
-
systemPrompt: undefined,
|
|
193
|
-
tools: [],
|
|
194
|
-
providerName: undefined,
|
|
195
|
-
};
|
|
196
|
-
const pipelineResult = await runViaPipeline(args);
|
|
197
|
-
expect(pipelineResult).toBe(calibratedEstimate(args));
|
|
198
|
-
});
|
|
199
|
-
|
|
200
|
-
test("default matches estimatePromptTokens on text-only history", async () => {
|
|
201
|
-
registerDefault();
|
|
202
|
-
const args: EstimateArgs = {
|
|
203
|
-
history: TEXT_HISTORY,
|
|
204
|
-
systemPrompt: SYSTEM_PROMPT,
|
|
205
|
-
tools: [],
|
|
206
|
-
providerName: "anthropic",
|
|
207
|
-
};
|
|
208
|
-
const pipelineResult = await runViaPipeline(args);
|
|
209
|
-
expect(pipelineResult).toBe(calibratedEstimate(args));
|
|
210
|
-
// Sanity: the system prompt adds real token cost, so the number is
|
|
211
|
-
// strictly larger than the bare-history estimate.
|
|
212
|
-
expect(pipelineResult).toBeGreaterThan(
|
|
213
|
-
calibratedEstimate({
|
|
214
|
-
history: TEXT_HISTORY,
|
|
215
|
-
systemPrompt: undefined,
|
|
216
|
-
tools: [],
|
|
217
|
-
providerName: "anthropic",
|
|
218
|
-
}),
|
|
219
|
-
);
|
|
220
|
-
});
|
|
221
|
-
|
|
222
|
-
test("default matches estimatePromptTokens with tool_use/tool_result blocks", async () => {
|
|
223
|
-
registerDefault();
|
|
224
|
-
const args: EstimateArgs = {
|
|
225
|
-
history: TOOL_USE_HISTORY,
|
|
226
|
-
systemPrompt: SYSTEM_PROMPT,
|
|
227
|
-
tools: SAMPLE_TOOLS,
|
|
228
|
-
providerName: "anthropic",
|
|
229
|
-
};
|
|
230
|
-
const pipelineResult = await runViaPipeline(args);
|
|
231
|
-
expect(pipelineResult).toBe(calibratedEstimate(args));
|
|
232
|
-
});
|
|
233
|
-
|
|
234
|
-
test("default folds tool definition tokens into the result", async () => {
|
|
235
|
-
registerDefault();
|
|
236
|
-
const baseArgs: EstimateArgs = {
|
|
237
|
-
history: TEXT_HISTORY,
|
|
238
|
-
systemPrompt: SYSTEM_PROMPT,
|
|
239
|
-
tools: [],
|
|
240
|
-
providerName: "anthropic",
|
|
241
|
-
};
|
|
242
|
-
const withoutTools = await runViaPipeline(baseArgs);
|
|
243
|
-
const withTools = await runViaPipeline({
|
|
244
|
-
...baseArgs,
|
|
245
|
-
tools: SAMPLE_TOOLS,
|
|
246
|
-
});
|
|
247
|
-
// Tools contribute non-zero overhead; the pipeline result must grow.
|
|
248
|
-
const toolBudget = estimateToolsTokens(SAMPLE_TOOLS);
|
|
249
|
-
expect(toolBudget).toBeGreaterThan(0);
|
|
250
|
-
expect(withTools - withoutTools).toBe(toolBudget);
|
|
251
|
-
});
|
|
252
|
-
|
|
253
|
-
test("provider-specific image sizing flows through the default", async () => {
|
|
254
|
-
registerDefault();
|
|
255
|
-
// Two providers see different image token costs for the same content —
|
|
256
|
-
// the raw estimator is the source of truth, so the pipeline must agree
|
|
257
|
-
// under both provider names.
|
|
258
|
-
const imageHistory: Message[] = [
|
|
259
|
-
{
|
|
260
|
-
role: "user",
|
|
261
|
-
content: [
|
|
262
|
-
{
|
|
263
|
-
type: "image",
|
|
264
|
-
source: {
|
|
265
|
-
type: "base64",
|
|
266
|
-
media_type: "image/png",
|
|
267
|
-
// Small fake PNG-ish payload; the estimator's fallback path
|
|
268
|
-
// kicks in when parseImageDimensions fails, which is fine —
|
|
269
|
-
// the two providers still diverge on overhead.
|
|
270
|
-
data: "a".repeat(128),
|
|
271
|
-
},
|
|
272
|
-
},
|
|
273
|
-
],
|
|
274
|
-
},
|
|
275
|
-
];
|
|
276
|
-
const anthropicArgs: EstimateArgs = {
|
|
277
|
-
history: imageHistory,
|
|
278
|
-
systemPrompt: undefined,
|
|
279
|
-
tools: [],
|
|
280
|
-
providerName: "anthropic",
|
|
281
|
-
};
|
|
282
|
-
const openaiArgs: EstimateArgs = {
|
|
283
|
-
...anthropicArgs,
|
|
284
|
-
providerName: "openai",
|
|
285
|
-
};
|
|
286
|
-
const anthropicResult = await runViaPipeline(anthropicArgs);
|
|
287
|
-
const openaiResult = await runViaPipeline(openaiArgs);
|
|
288
|
-
expect(anthropicResult).toBe(calibratedEstimate(anthropicArgs));
|
|
289
|
-
expect(openaiResult).toBe(calibratedEstimate(openaiArgs));
|
|
290
|
-
});
|
|
291
|
-
});
|
|
292
|
-
|
|
293
|
-
describe("tokenEstimate pipeline — calibration correction", () => {
|
|
294
|
-
// Large-ish synthetic history so the raw estimate clears the
|
|
295
|
-
// MIN_SAMPLE_MAGNITUDE (500) guard in the calibrator — otherwise
|
|
296
|
-
// `recordEstimate` drops the sample as noise and the correction stays 1.0.
|
|
297
|
-
const LARGE_TEXT = "lorem ipsum dolor sit amet ".repeat(500);
|
|
298
|
-
const LARGE_HISTORY: Message[] = [
|
|
299
|
-
{ role: "user", content: [{ type: "text", text: LARGE_TEXT }] },
|
|
300
|
-
];
|
|
301
|
-
|
|
302
|
-
test("seeded EWMA sample shifts the terminal's output off the raw estimate", async () => {
|
|
303
|
-
registerDefault();
|
|
304
|
-
const args: EstimateArgs = {
|
|
305
|
-
history: LARGE_HISTORY,
|
|
306
|
-
systemPrompt: SYSTEM_PROMPT,
|
|
307
|
-
tools: [],
|
|
308
|
-
providerName: "anthropic",
|
|
309
|
-
};
|
|
310
|
-
const raw = rawEstimate(args);
|
|
311
|
-
// Provider reports ~30% more tokens than we estimated — a plausible
|
|
312
|
-
// under-count bias. Seed the aggregate (provider, "") key that the
|
|
313
|
-
// terminal consults.
|
|
314
|
-
const actual = Math.round(raw * 1.3);
|
|
315
|
-
recordEstimate("anthropic", "", raw, actual);
|
|
316
|
-
|
|
317
|
-
const pipelineResult = await runViaPipeline(args);
|
|
318
|
-
expect(pipelineResult).toBe(calibratedEstimate(args));
|
|
319
|
-
expect(pipelineResult).not.toBe(raw);
|
|
320
|
-
expect(pipelineResult).toBeGreaterThan(raw);
|
|
321
|
-
});
|
|
322
|
-
});
|
|
323
|
-
|
|
324
|
-
describe("tokenEstimate pipeline — custom override", () => {
|
|
325
|
-
test("custom plugin short-circuit returns a different value than the default", async () => {
|
|
326
|
-
// A plugin that completely replaces the default with a fixed value,
|
|
327
|
-
// proving plugins can substitute provider-native tokenizers (e.g.
|
|
328
|
-
// `countTokens`) without touching orchestrator code.
|
|
329
|
-
const FIXED = 424242;
|
|
330
|
-
const override: Middleware<EstimateArgs, EstimateResult> = async (
|
|
331
|
-
_args,
|
|
332
|
-
_next,
|
|
333
|
-
_ctx,
|
|
334
|
-
) => FIXED;
|
|
335
|
-
const customPlugin: Plugin = {
|
|
336
|
-
manifest: {
|
|
337
|
-
name: "custom-token-estimate",
|
|
338
|
-
version: "1.0.0",
|
|
339
|
-
},
|
|
340
|
-
middleware: { tokenEstimate: override },
|
|
341
|
-
};
|
|
342
|
-
|
|
343
|
-
// Register the custom plugin FIRST so it sits outermost and short-
|
|
344
|
-
// circuits before the default's terminal runs.
|
|
345
|
-
registerPlugin(customPlugin);
|
|
346
|
-
registerDefault();
|
|
347
|
-
|
|
348
|
-
const args: EstimateArgs = {
|
|
349
|
-
history: TEXT_HISTORY,
|
|
350
|
-
systemPrompt: SYSTEM_PROMPT,
|
|
351
|
-
tools: SAMPLE_TOOLS,
|
|
352
|
-
providerName: "anthropic",
|
|
353
|
-
};
|
|
354
|
-
const pipelineResult = await runViaPipeline(args);
|
|
355
|
-
expect(pipelineResult).toBe(FIXED);
|
|
356
|
-
// And for contrast: the default alone would have given the calibrated value.
|
|
357
|
-
expect(pipelineResult).not.toBe(calibratedEstimate(args));
|
|
358
|
-
});
|
|
359
|
-
|
|
360
|
-
test("wrapper middleware that scales the downstream result composes with the default", async () => {
|
|
361
|
-
// A plugin that wraps the downstream estimate, doubling it. This
|
|
362
|
-
// exercises the onion composition: outer middleware sees the raw
|
|
363
|
-
// default result and returns its own modification.
|
|
364
|
-
const doubler: Middleware<EstimateArgs, EstimateResult> = async (
|
|
365
|
-
args,
|
|
366
|
-
next,
|
|
367
|
-
_ctx,
|
|
368
|
-
) => {
|
|
369
|
-
const inner = await next(args);
|
|
370
|
-
return inner * 2;
|
|
371
|
-
};
|
|
372
|
-
const wrapperPlugin: Plugin = {
|
|
373
|
-
manifest: {
|
|
374
|
-
name: "doubling-token-estimate",
|
|
375
|
-
version: "1.0.0",
|
|
376
|
-
},
|
|
377
|
-
middleware: { tokenEstimate: doubler },
|
|
378
|
-
};
|
|
379
|
-
|
|
380
|
-
registerPlugin(wrapperPlugin);
|
|
381
|
-
registerDefault();
|
|
382
|
-
|
|
383
|
-
const args: EstimateArgs = {
|
|
384
|
-
history: TEXT_HISTORY,
|
|
385
|
-
systemPrompt: SYSTEM_PROMPT,
|
|
386
|
-
tools: SAMPLE_TOOLS,
|
|
387
|
-
providerName: "anthropic",
|
|
388
|
-
};
|
|
389
|
-
const pipelineResult = await runViaPipeline(args);
|
|
390
|
-
expect(pipelineResult).toBe(calibratedEstimate(args) * 2);
|
|
391
|
-
});
|
|
392
|
-
});
|
|
393
|
-
|
|
394
|
-
describe("tokenEstimate pipeline — default does not shadow late plugins", () => {
|
|
395
|
-
test("user middleware registered AFTER the default still runs", async () => {
|
|
396
|
-
// Regression test for the default-first shadowing hazard: defaults are
|
|
397
|
-
// registered before user plugins in `bootstrapPlugins()`, putting the
|
|
398
|
-
// default at the OUTERMOST onion position. If the default middleware
|
|
399
|
-
// runs the estimate directly instead of calling `next(args)`, any user
|
|
400
|
-
// plugin loaded afterward is invisible. The default is a passthrough —
|
|
401
|
-
// this test fails loudly if that invariant ever regresses.
|
|
402
|
-
registerDefault();
|
|
403
|
-
const observed: EstimateArgs[] = [];
|
|
404
|
-
const observer: Middleware<EstimateArgs, EstimateResult> = async (
|
|
405
|
-
args,
|
|
406
|
-
next,
|
|
407
|
-
_ctx,
|
|
408
|
-
) => {
|
|
409
|
-
observed.push(args);
|
|
410
|
-
// Return a sentinel so we can distinguish the observer's result from
|
|
411
|
-
// the default's output.
|
|
412
|
-
await next(args);
|
|
413
|
-
return 999_999;
|
|
414
|
-
};
|
|
415
|
-
const userPlugin: Plugin = {
|
|
416
|
-
manifest: {
|
|
417
|
-
name: "late-registered-observer",
|
|
418
|
-
version: "1.0.0",
|
|
419
|
-
},
|
|
420
|
-
middleware: { tokenEstimate: observer },
|
|
421
|
-
};
|
|
422
|
-
registerPlugin(userPlugin);
|
|
423
|
-
|
|
424
|
-
const args: EstimateArgs = {
|
|
425
|
-
history: TEXT_HISTORY,
|
|
426
|
-
systemPrompt: SYSTEM_PROMPT,
|
|
427
|
-
tools: [],
|
|
428
|
-
providerName: "anthropic",
|
|
429
|
-
};
|
|
430
|
-
const result = await runViaPipeline(args);
|
|
431
|
-
expect(observed.length).toBe(1);
|
|
432
|
-
expect(result).toBe(999_999);
|
|
433
|
-
});
|
|
434
|
-
});
|
|
435
|
-
|
|
436
|
-
describe("tokenEstimate pipeline — args are immutable to middleware", () => {
|
|
437
|
-
test("frozen history/tools reject in-place mutation attempts", () => {
|
|
438
|
-
// The call site freezes shallow clones of `history` and `tools` before
|
|
439
|
-
// handing them to the pipeline. This mirrors the runtime protection
|
|
440
|
-
// that stops a misbehaving middleware from trimming `args.history` in
|
|
441
|
-
// place — which would silently drop prompt context from the
|
|
442
|
-
// orchestrator's live `runMessages` array before the provider call.
|
|
443
|
-
const frozenHistory = Object.freeze([...TEXT_HISTORY]);
|
|
444
|
-
const frozenTools = Object.freeze([...SAMPLE_TOOLS]);
|
|
445
|
-
expect(() => {
|
|
446
|
-
(frozenHistory as Message[]).pop();
|
|
447
|
-
}).toThrow(TypeError);
|
|
448
|
-
expect(() => {
|
|
449
|
-
(frozenTools as ToolDefinition[]).push({
|
|
450
|
-
name: "extra",
|
|
451
|
-
description: "",
|
|
452
|
-
input_schema: { type: "object", properties: {} },
|
|
453
|
-
});
|
|
454
|
-
}).toThrow(TypeError);
|
|
455
|
-
});
|
|
456
|
-
});
|
|
457
|
-
|
|
458
|
-
describe("tokenEstimate pipeline — empty registry fallback", () => {
|
|
459
|
-
test("without any plugin registered, the terminal receives the call", async () => {
|
|
460
|
-
// `runViaPipeline` uses a throwing terminal, so here we run the
|
|
461
|
-
// pipeline with an explicit terminal that returns a sentinel to prove
|
|
462
|
-
// that an empty middleware list falls through.
|
|
463
|
-
const SENTINEL = 12345;
|
|
464
|
-
const result = await runPipeline<EstimateArgs, EstimateResult>(
|
|
465
|
-
"tokenEstimate",
|
|
466
|
-
getMiddlewaresFor("tokenEstimate"),
|
|
467
|
-
async () => SENTINEL,
|
|
468
|
-
{
|
|
469
|
-
history: TEXT_HISTORY,
|
|
470
|
-
systemPrompt: SYSTEM_PROMPT,
|
|
471
|
-
tools: [],
|
|
472
|
-
providerName: "anthropic",
|
|
473
|
-
},
|
|
474
|
-
makeCtx(),
|
|
475
|
-
DEFAULT_TIMEOUTS.tokenEstimate,
|
|
476
|
-
);
|
|
477
|
-
expect(result).toBe(SENTINEL);
|
|
478
|
-
});
|
|
479
|
-
});
|