@vellumai/assistant 0.8.7 → 0.8.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile +20 -4
- package/docker-entrypoint.sh +4 -2
- package/docker-init-apt-root.sh +3 -1
- package/docker-kata-apt-env.sh +3 -1
- package/docker-kata-runtime-family.sh +12 -0
- package/docs/architecture/memory.md +1 -1
- package/docs/plugins.md +75 -79
- package/examples/plugins/echo/README.md +6 -12
- package/examples/plugins/echo/register.ts +0 -41
- package/node_modules/@vellumai/skill-host-contracts/src/server-message.ts +3 -3
- package/openapi.yaml +3381 -348
- package/package.json +1 -1
- package/scripts/generate-openapi.ts +68 -41
- package/src/__tests__/agent-loop-exit-reason.test.ts +34 -39
- package/src/__tests__/agent-loop-provider-error-recording.test.ts +1 -1
- package/src/__tests__/agent-loop.test.ts +37 -87
- package/src/__tests__/agent-wake-disk-pressure-callsite.test.ts +2 -0
- package/src/__tests__/annotate-activity-metadata.test.ts +262 -0
- package/src/__tests__/annotate-risk-options.test.ts +2 -3
- package/src/__tests__/anthropic-provider.test.ts +95 -2
- package/src/__tests__/assistant-event-hub.test.ts +25 -0
- package/src/__tests__/assistant-events-sse-shed.test.ts +8 -0
- package/src/__tests__/{conversation-stream-state.test.ts → assistant-stream-state.test.ts} +252 -91
- package/src/__tests__/auth-fallback-events-store.test.ts +116 -0
- package/src/__tests__/background-workers-disk-pressure.test.ts +6 -0
- package/src/__tests__/btw-routes.test.ts +62 -3
- package/src/__tests__/build-persisted-content.test.ts +184 -0
- package/src/__tests__/catalog-files.test.ts +1 -1
- package/src/__tests__/clawhub-files.test.ts +1 -1
- package/src/__tests__/compaction-pipeline.test.ts +1 -1
- package/src/__tests__/compaction.benchmark.test.ts +0 -30
- package/src/__tests__/config-watcher.test.ts +1 -1
- package/src/__tests__/conversation-abort-tool-results.test.ts +57 -19
- package/src/__tests__/conversation-agent-loop-disk-pressure.test.ts +6 -2
- package/src/__tests__/conversation-agent-loop-inference-profile.test.ts +10 -4
- package/src/__tests__/conversation-agent-loop-overflow.test.ts +313 -1136
- package/src/__tests__/conversation-agent-loop.test.ts +596 -1616
- package/src/__tests__/conversation-analysis-routes.test.ts +6 -0
- package/src/__tests__/conversation-history-web-search.test.ts +11 -1
- package/src/__tests__/conversation-pairing.test.ts +4 -31
- package/src/__tests__/conversation-process-app-control-preactivation.test.ts +6 -0
- package/src/__tests__/conversation-provider-retry-repair.test.ts +26 -5
- package/src/__tests__/conversation-queue.test.ts +2 -0
- package/src/__tests__/conversation-routes-disk-view.test.ts +3 -0
- package/src/__tests__/conversation-routes-slash-commands.test.ts +6 -5
- package/src/__tests__/conversation-runtime-assembly.test.ts +170 -229
- package/src/__tests__/conversation-runtime-workspace.test.ts +3 -24
- package/src/__tests__/conversation-slash-commands.test.ts +8 -42
- package/src/__tests__/conversation-slash-queue.test.ts +6 -1
- package/src/__tests__/conversation-surfaces-action-delivery.test.ts +84 -0
- package/src/__tests__/conversation-sync-tags.test.ts +27 -15
- package/src/__tests__/conversation-title-service.test.ts +135 -2
- package/src/__tests__/conversation-workspace-injection.test.ts +6 -1
- package/src/__tests__/cross-provider-web-search.test.ts +214 -1
- package/src/__tests__/db-schedule-syntax-migration.test.ts +5 -0
- package/src/__tests__/dm-persistence.test.ts +5 -1
- package/src/__tests__/empty-response-hook.test.ts +304 -0
- package/src/__tests__/feature-flag-test-helpers.ts +2 -2
- package/src/__tests__/gemini-image-service.test.ts +13 -0
- package/src/__tests__/helpers/mock-provider.ts +110 -0
- package/src/__tests__/helpers/native-web-search-harness.ts +129 -0
- package/src/__tests__/history-repair-hook.test.ts +1 -0
- package/src/__tests__/identity-intro-cache.test.ts +12 -100
- package/src/__tests__/identity-routes.test.ts +248 -7
- package/src/__tests__/inbound-slack-persistence.test.ts +5 -1
- package/src/__tests__/injector-background-turn.test.ts +2 -8
- package/src/__tests__/injector-chain.test.ts +106 -270
- package/src/__tests__/injector-disk-pressure.test.ts +3 -12
- package/src/__tests__/injector-document-comments.test.ts +2 -2
- package/src/__tests__/injector-pkb-v2-silenced.test.ts +30 -22
- package/src/__tests__/injector-v3-suppression.test.ts +31 -37
- package/src/__tests__/internal-telemetry-routes.test.ts +109 -0
- package/src/__tests__/list-messages-page-latest.test.ts +60 -0
- package/src/__tests__/list-messages-tool-merge.test.ts +20 -0
- package/src/__tests__/llm-usage-store.test.ts +223 -1
- package/src/__tests__/memory-retrieval-hook.test.ts +297 -0
- package/src/__tests__/memory-v2-static-injector.test.ts +103 -35
- package/src/__tests__/native-web-search.test.ts +191 -0
- package/src/__tests__/onboarding-template-contract.test.ts +2 -0
- package/src/__tests__/openai-image-service.test.ts +17 -0
- package/src/__tests__/openai-provider.test.ts +31 -1
- package/src/__tests__/persist-unsendable-image.test.ts +215 -0
- package/src/__tests__/persistence-secret-redaction.test.ts +1 -0
- package/src/__tests__/pipeline-runner.test.ts +29 -39
- package/src/__tests__/pkb-autoinject.test.ts +2 -5
- package/src/__tests__/plugin-bootstrap.test.ts +13 -28
- package/src/__tests__/plugin-registry.test.ts +0 -27
- package/src/__tests__/plugin-types.test.ts +2 -125
- package/src/__tests__/process-message-display-content.test.ts +6 -2
- package/src/__tests__/regenerate-fire-and-forget-trace.test.ts +5 -1
- package/src/__tests__/resolve-trust-class.test.ts +4 -4
- package/src/__tests__/runtime-events-sse-reconnect.test.ts +60 -23
- package/src/__tests__/schedule-routes.test.ts +603 -2
- package/src/__tests__/schedule-store.test.ts +41 -0
- package/src/__tests__/schedule-tools.test.ts +35 -0
- package/src/__tests__/server-history-render.test.ts +314 -1
- package/src/__tests__/skillssh-files.test.ts +1 -1
- package/src/__tests__/system-prompt.test.ts +20 -0
- package/src/__tests__/task-scheduler.test.ts +162 -1
- package/src/__tests__/terminal-tools.test.ts +6 -1
- package/src/__tests__/title-generate-hook.test.ts +319 -0
- package/src/__tests__/tool-error-hook.test.ts +278 -0
- package/src/__tests__/tool-preview-lifecycle.test.ts +468 -5
- package/src/__tests__/tool-result-metadata-plumbing.test.ts +1 -0
- package/src/__tests__/tool-result-truncate-hook.test.ts +127 -0
- package/src/__tests__/tool-result-truncation.test.ts +0 -2
- package/src/__tests__/ui-choice-copy-surfaces.test.ts +254 -0
- package/src/__tests__/ui-work-result-surface.test.ts +159 -0
- package/src/__tests__/usage-routes.test.ts +285 -1
- package/src/__tests__/user-plugin-loader.test.ts +2 -2
- package/src/__tests__/voice-session-bridge.test.ts +6 -3
- package/src/__tests__/web-search-backend-failure.test.ts +166 -0
- package/src/agent/loop.ts +346 -442
- package/src/api/events/assistant-thinking-delta.ts +33 -0
- package/src/api/events/tool-output-chunk.ts +45 -0
- package/src/api/events/tool-use-preview-start.ts +32 -0
- package/src/api/events/trace-event.ts +69 -0
- package/src/api/index.ts +48 -13
- package/src/api/responses/conversation-message.ts +368 -0
- package/src/avatar/__tests__/avatar-store.test.ts +34 -29
- package/src/cli/commands/__tests__/notifications.test.ts +58 -14
- package/src/cli/commands/notifications.ts +112 -60
- package/src/config/assistant-feature-flags.ts +22 -11
- package/src/config/bundled-skills/app-builder/SKILL.md +3 -20
- package/src/config/bundled-skills/app-builder/references/examples/README.md +17 -0
- package/src/config/bundled-skills/app-builder/references/examples/expense-tracker.md +515 -0
- package/src/config/bundled-skills/app-builder/references/examples/focus-timer.md +342 -0
- package/src/config/bundled-skills/app-builder/references/examples/habit-tracker.md +490 -0
- package/src/config/bundled-skills/document-editor/SKILL.md +1 -1
- package/src/config/bundled-skills/messaging/SKILL.md +0 -7
- package/src/config/feature-flag-cache.ts +3 -3
- package/src/config/feature-flag-registry.json +35 -3
- package/src/config/schemas/__tests__/memory-v2.test.ts +1 -0
- package/src/config/schemas/__tests__/memory-v3.test.ts +25 -0
- package/src/config/schemas/llm.ts +1 -0
- package/src/config/schemas/memory-v2.ts +8 -0
- package/src/config/schemas/memory-v3.ts +8 -0
- package/src/config/schemas/platform.ts +8 -0
- package/src/config/seed-inference-profiles.ts +2 -2
- package/src/config/skills.ts +13 -0
- package/src/context/compactor.ts +1 -1
- package/src/context/strip-injections.ts +122 -0
- package/src/context/token-estimator.ts +23 -0
- package/src/context/tool-result-truncation.ts +0 -23
- package/src/context/window-manager.ts +3 -6
- package/src/credential-execution/executable-discovery.ts +16 -0
- package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +6 -0
- package/src/daemon/__tests__/inference-profile-notification.test.ts +153 -0
- package/src/daemon/__tests__/native-web-search-metadata.test.ts +10 -8
- package/src/daemon/assistant-attachments.ts +1 -1
- package/src/daemon/config-watcher.ts +2 -2
- package/src/daemon/context-overflow-reducer.ts +0 -1
- package/src/daemon/conversation-agent-loop-handlers.ts +605 -153
- package/src/daemon/conversation-agent-loop.ts +281 -760
- package/src/daemon/conversation-history.ts +5 -4
- package/src/daemon/conversation-lifecycle.ts +3 -4
- package/src/daemon/conversation-messaging.ts +7 -6
- package/src/daemon/conversation-process.ts +11 -16
- package/src/daemon/conversation-runtime-assembly.ts +130 -347
- package/src/daemon/conversation-slash.ts +6 -25
- package/src/daemon/conversation-surfaces.ts +222 -4
- package/src/daemon/conversation-tool-setup.ts +2 -29
- package/src/daemon/conversation.ts +32 -14
- package/src/daemon/external-plugins-bootstrap.ts +9 -10
- package/src/daemon/handlers/config-a2a.ts +51 -36
- package/src/daemon/handlers/config-slack-channel.ts +20 -14
- package/src/daemon/handlers/config-telegram.ts +16 -2
- package/src/daemon/handlers/shared.ts +156 -84
- package/src/daemon/handlers/skills.ts +39 -10
- package/src/daemon/lifecycle.ts +4 -0
- package/src/daemon/message-types/apps.ts +1 -29
- package/src/daemon/message-types/messages.ts +9 -57
- package/src/daemon/message-types/skills.ts +2 -0
- package/src/daemon/message-types/surfaces.ts +136 -3
- package/src/daemon/now-scratchpad.ts +21 -0
- package/src/daemon/orphan-reaper.test.ts +210 -0
- package/src/daemon/orphan-reaper.ts +240 -0
- package/src/daemon/persist-unsendable-image.ts +117 -0
- package/src/daemon/process-message.ts +1 -3
- package/src/daemon/trace-emitter.ts +6 -4
- package/src/daemon/trust-context.ts +19 -0
- package/src/daemon/wake-target-adapter.ts +3 -1
- package/src/home/home-greeting-cache.ts +24 -1
- package/src/ipc/gateway-client.test.ts +2 -2
- package/src/ipc/gateway-client.ts +3 -3
- package/src/media/gemini-image-service.ts +15 -0
- package/src/media/openai-image-service.ts +14 -0
- package/src/media/types.ts +34 -0
- package/src/memory/__tests__/jobs-worker-v2-schedule.test.ts +56 -0
- package/src/memory/auth-fallback-events-store.ts +94 -0
- package/src/memory/conversation-title-service.ts +65 -41
- package/src/memory/db-init.ts +4 -0
- package/src/memory/graph/__tests__/conversation-graph-memory-registry.test.ts +119 -0
- package/src/memory/graph/conversation-graph-memory.ts +65 -0
- package/src/memory/jobs-store.ts +33 -0
- package/src/memory/jobs-worker.ts +31 -4
- package/src/memory/llm-usage-store.ts +224 -50
- package/src/memory/migrations/222-strip-placeholder-sentinels-from-messages.ts +6 -5
- package/src/memory/migrations/270-schedule-source-conversation.ts +13 -0
- package/src/memory/migrations/271-create-auth-fallback-events.ts +21 -0
- package/src/memory/migrations/index.ts +2 -0
- package/src/memory/pkb/autoinject.ts +61 -0
- package/src/memory/pkb/context.ts +50 -0
- package/src/memory/pkb/types.ts +14 -0
- package/src/memory/schedule-attribution-sql.ts +104 -0
- package/src/memory/schema/infrastructure.ts +16 -0
- package/src/memory/usage-grouped-buckets.ts +6 -1
- package/src/memory/v2/__tests__/consolidation-job.test.ts +1 -1
- package/src/memory/v2/consolidation-job.ts +1 -1
- package/src/memory/v3/__tests__/health.test.ts +16 -0
- package/src/memory/v3/__tests__/orchestrate.test.ts +45 -9
- package/src/memory/v3/__tests__/provider-blocks.test.ts +13 -0
- package/src/memory/v3/__tests__/router.test.ts +101 -29
- package/src/memory/v3/__tests__/selector.test.ts +93 -27
- package/src/memory/v3/__tests__/shadow-plugin.test.ts +23 -5
- package/src/memory/v3/health.ts +0 -0
- package/src/memory/v3/llm-retry.ts +32 -0
- package/src/memory/v3/orchestrate.ts +26 -14
- package/src/memory/v3/provider-blocks.ts +15 -5
- package/src/memory/v3/router.ts +48 -42
- package/src/memory/v3/selector.ts +57 -42
- package/src/memory/v3/shadow-plugin.ts +47 -15
- package/src/memory/v3/types.ts +8 -0
- package/src/notifications/conversation-pairing.ts +8 -15
- package/src/notifications/decision-engine.ts +6 -3
- package/src/notifications/home-feed-side-effect.ts +12 -1
- package/src/permissions/prompter.ts +4 -0
- package/src/plugin-api/constants.ts +4 -0
- package/src/plugin-api/index.ts +8 -1
- package/src/plugin-api/types.ts +151 -1
- package/src/plugins/defaults/empty-response/hooks/stop.ts +126 -0
- package/src/plugins/defaults/empty-response/register.ts +8 -13
- package/src/plugins/defaults/index.ts +1 -15
- package/src/plugins/defaults/injectors/register.ts +243 -74
- package/src/plugins/defaults/memory-retrieval/hooks/post-compact.ts +91 -0
- package/src/plugins/defaults/memory-retrieval/hooks/user-prompt-submit-temp.ts +216 -0
- package/src/plugins/defaults/memory-retrieval/injector-chain.ts +35 -0
- package/src/plugins/defaults/title-generate/hooks/stop.ts +75 -0
- package/src/plugins/defaults/title-generate/hooks/user-prompt-submit.ts +35 -0
- package/src/plugins/defaults/title-generate/package.json +1 -1
- package/src/plugins/defaults/title-generate/register.ts +18 -18
- package/src/plugins/defaults/tool-error/hooks/post-tool-use.ts +118 -0
- package/src/plugins/defaults/tool-error/package.json +1 -1
- package/src/plugins/defaults/tool-error/register.ts +9 -21
- package/src/plugins/defaults/tool-result-truncate/hooks/post-tool-use.ts +32 -0
- package/src/plugins/defaults/tool-result-truncate/register.ts +10 -21
- package/src/plugins/defaults/tool-result-truncate/terminal.ts +37 -18
- package/src/plugins/pipeline.ts +6 -18
- package/src/plugins/registry.ts +8 -25
- package/src/plugins/types.ts +43 -474
- package/src/proactive-artifact/aux-message-injector.ts +3 -3
- package/src/proactive-artifact/job.test.ts +7 -12
- package/src/prompts/__tests__/system-prompt.test.ts +36 -0
- package/src/prompts/templates/BOOTSTRAP-ACTIVATION-RAIL.md +62 -0
- package/src/prompts/templates/BOOTSTRAP.md +2 -2
- package/src/prompts/templates/system-sections.ts +15 -0
- package/src/providers/anthropic/client.ts +37 -29
- package/src/providers/openai/__tests__/chat-completions-provider-reasoning.test.ts +112 -0
- package/src/providers/openai/chat-completions-provider.ts +44 -0
- package/src/providers/openrouter/client.ts +1 -0
- package/src/providers/placeholder-sentinels.ts +35 -0
- package/src/runtime/__tests__/agent-wake.test.ts +5 -1
- package/src/runtime/agent-wake.ts +2 -2
- package/src/runtime/assistant-event-hub.ts +36 -6
- package/src/runtime/{conversation-stream-state.ts → assistant-stream-state.ts} +132 -58
- package/src/runtime/http-router.ts +16 -21
- package/src/runtime/http-types.ts +16 -70
- package/src/runtime/pending-interactions.ts +1 -0
- package/src/runtime/routes/__tests__/consolidation-routes.test.ts +265 -2
- package/src/runtime/routes/__tests__/conversation-query-routes.test.ts +31 -1
- package/src/runtime/routes/__tests__/memory-v2-routes.test.ts +6 -2
- package/src/runtime/routes/__tests__/tts-routes.test.ts +6 -2
- package/src/runtime/routes/app-management-routes.ts +6 -117
- package/src/runtime/routes/app-routes.ts +13 -15
- package/src/runtime/routes/attachment-routes.ts +26 -15
- package/src/runtime/routes/avatar-routes.ts +26 -0
- package/src/runtime/routes/btw-routes.ts +29 -23
- package/src/runtime/routes/consolidation-routes.ts +120 -20
- package/src/runtime/routes/conversation-query-routes.ts +2 -0
- package/src/runtime/routes/conversation-routes.ts +358 -184
- package/src/runtime/routes/documents-routes.ts +4 -0
- package/src/runtime/routes/domain-routes.ts +51 -37
- package/src/runtime/routes/epoch-millis-range.ts +34 -0
- package/src/runtime/routes/events-routes.ts +28 -34
- package/src/runtime/routes/gateway-log-routes.ts +26 -4
- package/src/runtime/routes/heartbeat-routes.ts +32 -12
- package/src/runtime/routes/identity-intro-cache.ts +11 -34
- package/src/runtime/routes/identity-routes.ts +208 -17
- package/src/runtime/routes/image-generation-routes.ts +40 -2
- package/src/runtime/routes/index.ts +2 -0
- package/src/runtime/routes/integrations/a2a.ts +12 -10
- package/src/runtime/routes/integrations/slack/__tests__/channel.test.ts +16 -0
- package/src/runtime/routes/integrations/slack/channel.ts +4 -0
- package/src/runtime/routes/integrations/slack/share.ts +27 -6
- package/src/runtime/routes/integrations/telegram.ts +6 -0
- package/src/runtime/routes/integrations/twilio.ts +42 -0
- package/src/runtime/routes/internal-telemetry-routes.ts +88 -0
- package/src/runtime/routes/log-export-routes.ts +8 -0
- package/src/runtime/routes/memory-v2-routes.ts +15 -8
- package/src/runtime/routes/memory-v3-routes.ts +50 -28
- package/src/runtime/routes/oauth-apps.ts +66 -12
- package/src/runtime/routes/oauth-providers.ts +44 -5
- package/src/runtime/routes/platform-routes.ts +81 -5
- package/src/runtime/routes/playground/__tests__/force-compact.test.ts +6 -4
- package/src/runtime/routes/playground/force-compact.ts +1 -1
- package/src/runtime/routes/rename-conversation-routes.ts +5 -0
- package/src/runtime/routes/schedule-routes.ts +152 -42
- package/src/runtime/routes/secret-routes.ts +14 -2
- package/src/runtime/routes/skills-routes.ts +43 -14
- package/src/runtime/routes/tool-call-confirmation-enrichment.test.ts +161 -0
- package/src/runtime/routes/tool-call-confirmation-enrichment.ts +107 -0
- package/src/runtime/routes/trust-rules-routes.ts +26 -2
- package/src/runtime/routes/tts-routes.ts +35 -0
- package/src/runtime/routes/types.ts +66 -8
- package/src/runtime/routes/usage-routes.ts +47 -39
- package/src/runtime/routes/webhook-routes.ts +41 -2
- package/src/runtime/routes/workspace-routes.ts +4 -0
- package/src/runtime/services/__tests__/analyze-conversation.test.ts +6 -0
- package/src/runtime/services/analyze-conversation.ts +2 -2
- package/src/schedule/schedule-store.ts +20 -1
- package/src/schedule/schedule-usage-store.ts +83 -0
- package/src/schedule/scheduler.ts +12 -5
- package/src/skills/catalog-files.ts +2 -2
- package/src/skills/catalog-install.ts +3 -0
- package/src/skills/categories-cache.ts +118 -0
- package/src/skills/clawhub-files.ts +1 -2
- package/src/skills/skillssh-files.ts +1 -2
- package/src/telemetry/types.ts +29 -1
- package/src/telemetry/usage-telemetry-reporter.test.ts +112 -3
- package/src/telemetry/usage-telemetry-reporter.ts +57 -2
- package/src/tools/executor.ts +1 -53
- package/src/tools/network/__tests__/web-search-metadata.test.ts +7 -1
- package/src/tools/network/__tests__/web-search.test.ts +11 -3
- package/src/tools/network/web-search-error.test.ts +248 -0
- package/src/tools/network/web-search-error.ts +267 -0
- package/src/tools/network/web-search.ts +207 -48
- package/src/tools/schedule/create.ts +2 -0
- package/src/tools/terminal/safe-env.ts +10 -1
- package/src/tools/ui-surface/definitions.ts +9 -1
- package/src/tts/__tests__/provider-catalog-consistency.test.ts +85 -1
- package/src/tts/provider-catalog.ts +76 -1
- package/src/util/mutex.ts +47 -0
- package/src/workspace/git-service.ts +1 -42
- package/src/workspace/migrations/095-bump-heartbeat-interval-30m-to-60m.ts +51 -0
- package/src/workspace/migrations/096-reduce-quality-profile-effort.ts +72 -0
- package/src/workspace/migrations/097-enable-adaptive-thinking-managed-profiles.ts +93 -0
- package/src/workspace/migrations/registry.ts +6 -0
- package/src/__tests__/bootstrap-turn-cleanup.test.ts +0 -44
- package/src/__tests__/empty-response-pipeline.test.ts +0 -423
- package/src/__tests__/llm-call-pipeline.test.ts +0 -287
- package/src/__tests__/memory-retrieval-pipeline.test.ts +0 -418
- package/src/__tests__/persistence-pipeline.test.ts +0 -503
- package/src/__tests__/title-generate-pipeline.test.ts +0 -211
- package/src/__tests__/token-estimate-pipeline.test.ts +0 -479
- package/src/__tests__/tool-error-pipeline.test.ts +0 -241
- package/src/__tests__/tool-execute-pipeline.test.ts +0 -417
- package/src/__tests__/tool-result-truncate-pipeline.test.ts +0 -341
- package/src/daemon/bootstrap-turn-cleanup.ts +0 -45
- package/src/gallery/default-gallery.ts +0 -1359
- package/src/gallery/gallery-manifest.ts +0 -28
- package/src/home/feature-gate.ts +0 -22
- package/src/plugins/defaults/empty-response/middlewares/emptyResponse.ts +0 -22
- package/src/plugins/defaults/empty-response/terminal.ts +0 -106
- package/src/plugins/defaults/injectors/package.json +0 -15
- package/src/plugins/defaults/llm-call/middlewares/llmCall.ts +0 -17
- package/src/plugins/defaults/llm-call/package.json +0 -15
- package/src/plugins/defaults/llm-call/register.ts +0 -45
- package/src/plugins/defaults/memory-retrieval/middlewares/memoryRetrieval.ts +0 -17
- package/src/plugins/defaults/memory-retrieval/package.json +0 -15
- package/src/plugins/defaults/memory-retrieval/register.ts +0 -181
- package/src/plugins/defaults/persistence/middlewares/persistence.ts +0 -19
- package/src/plugins/defaults/persistence/package.json +0 -15
- package/src/plugins/defaults/persistence/register.ts +0 -38
- package/src/plugins/defaults/persistence/terminal.ts +0 -83
- package/src/plugins/defaults/title-generate/terminal.ts +0 -31
- package/src/plugins/defaults/token-estimate/middlewares/tokenEstimate.ts +0 -23
- package/src/plugins/defaults/token-estimate/package.json +0 -15
- package/src/plugins/defaults/token-estimate/register.ts +0 -34
- package/src/plugins/defaults/token-estimate/terminal.ts +0 -40
- package/src/plugins/defaults/tool-error/middlewares/toolError.ts +0 -21
- package/src/plugins/defaults/tool-error/terminal.ts +0 -47
- package/src/plugins/defaults/tool-execute/middlewares/toolExecute.ts +0 -23
- package/src/plugins/defaults/tool-execute/package.json +0 -15
- package/src/plugins/defaults/tool-execute/register.ts +0 -49
- package/src/plugins/defaults/tool-result-truncate/middlewares/toolResultTruncate.ts +0 -23
- package/src/plugins/defaults/tool-result-truncate/types.ts +0 -22
- package/src/skills/category-inference.ts +0 -111
|
@@ -1,423 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Tests for the `emptyResponse` plugin pipeline (PR 18).
|
|
3
|
-
*
|
|
4
|
-
* Covers:
|
|
5
|
-
* - Default plugin decision matches the original inline loop logic for the
|
|
6
|
-
* canonical cases (empty-after-tools → nudge, visible-text → accept,
|
|
7
|
-
* tool-use-blocks-present → accept, retries-exhausted → accept,
|
|
8
|
-
* prior-visible-text-in-run → accept).
|
|
9
|
-
* - Swapping in a custom middleware that returns `action: "accept"` prevents
|
|
10
|
-
* the nudge and lets the loop fall through to history append.
|
|
11
|
-
* - Swapping in a custom middleware that returns `action: "error"` is
|
|
12
|
-
* propagated by the pipeline so the loop can surface a clear error.
|
|
13
|
-
*
|
|
14
|
-
* The loop's actual side-effects (history append, retry counter bump, log
|
|
15
|
-
* emission) live in `agent/loop.ts` and are covered by integration tests in
|
|
16
|
-
* `conversation-agent-loop.test.ts`. This file isolates the pipeline.
|
|
17
|
-
*/
|
|
18
|
-
|
|
19
|
-
import { beforeEach, describe, expect, test } from "bun:test";
|
|
20
|
-
|
|
21
|
-
import type { TrustContext } from "../daemon/trust-context.js";
|
|
22
|
-
import { defaultEmptyResponsePlugin } from "../plugins/defaults/empty-response/register.js";
|
|
23
|
-
import { defaultEmptyResponseTerminal } from "../plugins/defaults/empty-response/terminal.js";
|
|
24
|
-
import { DEFAULT_TIMEOUTS, runPipeline } from "../plugins/pipeline.js";
|
|
25
|
-
import {
|
|
26
|
-
getMiddlewaresFor,
|
|
27
|
-
registerPlugin,
|
|
28
|
-
resetPluginRegistryForTests,
|
|
29
|
-
} from "../plugins/registry.js";
|
|
30
|
-
import type {
|
|
31
|
-
EmptyResponseArgs,
|
|
32
|
-
EmptyResponseDecision,
|
|
33
|
-
Middleware,
|
|
34
|
-
Plugin,
|
|
35
|
-
TurnContext,
|
|
36
|
-
} from "../plugins/types.js";
|
|
37
|
-
import type { ContentBlock } from "../providers/types.js";
|
|
38
|
-
|
|
39
|
-
// ─── Fixtures ────────────────────────────────────────────────────────────────
|
|
40
|
-
|
|
41
|
-
const trust: TrustContext = {
|
|
42
|
-
sourceChannel: "vellum",
|
|
43
|
-
trustClass: "guardian",
|
|
44
|
-
};
|
|
45
|
-
|
|
46
|
-
function makeCtx(): TurnContext {
|
|
47
|
-
return {
|
|
48
|
-
requestId: "req-empty-response",
|
|
49
|
-
conversationId: "conv-empty-response",
|
|
50
|
-
turnIndex: 2,
|
|
51
|
-
trust,
|
|
52
|
-
};
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
/**
|
|
56
|
-
* The nudge text has to match the loop's original inline string verbatim —
|
|
57
|
-
* clients (and the model) may match on this exact text.
|
|
58
|
-
*/
|
|
59
|
-
const CANONICAL_NUDGE_TEXT =
|
|
60
|
-
"<system_notice>Your previous response was empty. You must respond to the user with a summary of what you found or did. Do not use any tools — just respond with text.</system_notice>";
|
|
61
|
-
|
|
62
|
-
const emptyTextBlock: ContentBlock = { type: "text", text: " " };
|
|
63
|
-
|
|
64
|
-
function makeArgs(
|
|
65
|
-
overrides: Partial<EmptyResponseArgs> = {},
|
|
66
|
-
): EmptyResponseArgs {
|
|
67
|
-
return {
|
|
68
|
-
responseContent: [],
|
|
69
|
-
toolUseBlocksLength: 0,
|
|
70
|
-
toolUseTurns: 1,
|
|
71
|
-
emptyResponseRetries: 0,
|
|
72
|
-
maxEmptyResponseRetries: 1,
|
|
73
|
-
priorAssistantHadVisibleText: false,
|
|
74
|
-
// Default to `null` (no stop reason reported) so existing fixtures
|
|
75
|
-
// exercise the "organic empty turn" path. The refusal branch
|
|
76
|
-
// dedicated tests below set this to `"refusal"` explicitly.
|
|
77
|
-
stopReason: null,
|
|
78
|
-
...overrides,
|
|
79
|
-
};
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
/**
|
|
83
|
-
* Refusal-specific nudge text — keep in sync with `register.ts`. Clients
|
|
84
|
-
* (and the model) may match on this exact text.
|
|
85
|
-
*/
|
|
86
|
-
const CANONICAL_REFUSAL_NUDGE_TEXT =
|
|
87
|
-
'<system_notice>Your previous response was empty because the upstream provider returned stop_reason="refusal". Please answer the user\'s last message directly with a plain-text response. Do not use any tools — just respond with text.</system_notice>';
|
|
88
|
-
|
|
89
|
-
async function runEmpty(
|
|
90
|
-
args: EmptyResponseArgs,
|
|
91
|
-
): Promise<EmptyResponseDecision> {
|
|
92
|
-
return runPipeline(
|
|
93
|
-
"emptyResponse",
|
|
94
|
-
getMiddlewaresFor("emptyResponse"),
|
|
95
|
-
async (a) => defaultEmptyResponseTerminal(a),
|
|
96
|
-
args,
|
|
97
|
-
makeCtx(),
|
|
98
|
-
DEFAULT_TIMEOUTS.emptyResponse,
|
|
99
|
-
);
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
// ─── Tests ───────────────────────────────────────────────────────────────────
|
|
103
|
-
|
|
104
|
-
describe("emptyResponse pipeline — default decisions", () => {
|
|
105
|
-
beforeEach(() => {
|
|
106
|
-
resetPluginRegistryForTests();
|
|
107
|
-
registerPlugin(defaultEmptyResponsePlugin);
|
|
108
|
-
});
|
|
109
|
-
|
|
110
|
-
test("empty turn after tool results → nudge with canonical text", async () => {
|
|
111
|
-
// Whitespace-only text counts as empty (matches inline `trim().length > 0`).
|
|
112
|
-
const decision = await runEmpty(
|
|
113
|
-
makeArgs({
|
|
114
|
-
responseContent: [emptyTextBlock],
|
|
115
|
-
toolUseBlocksLength: 0,
|
|
116
|
-
toolUseTurns: 2,
|
|
117
|
-
emptyResponseRetries: 0,
|
|
118
|
-
priorAssistantHadVisibleText: false,
|
|
119
|
-
}),
|
|
120
|
-
);
|
|
121
|
-
expect(decision.action).toBe("nudge");
|
|
122
|
-
expect(decision.nudgeText).toBe(CANONICAL_NUDGE_TEXT);
|
|
123
|
-
});
|
|
124
|
-
|
|
125
|
-
test("turn contains visible text → accept", async () => {
|
|
126
|
-
const decision = await runEmpty(
|
|
127
|
-
makeArgs({
|
|
128
|
-
responseContent: [{ type: "text", text: "here is a summary" }],
|
|
129
|
-
}),
|
|
130
|
-
);
|
|
131
|
-
expect(decision.action).toBe("accept");
|
|
132
|
-
});
|
|
133
|
-
|
|
134
|
-
test("turn contains tool_use blocks → accept (not empty)", async () => {
|
|
135
|
-
const decision = await runEmpty(
|
|
136
|
-
makeArgs({
|
|
137
|
-
responseContent: [
|
|
138
|
-
{
|
|
139
|
-
type: "tool_use",
|
|
140
|
-
id: "tu-1",
|
|
141
|
-
name: "read",
|
|
142
|
-
input: { path: "/tmp/x" },
|
|
143
|
-
} as ContentBlock,
|
|
144
|
-
],
|
|
145
|
-
toolUseBlocksLength: 1,
|
|
146
|
-
}),
|
|
147
|
-
);
|
|
148
|
-
expect(decision.action).toBe("accept");
|
|
149
|
-
});
|
|
150
|
-
|
|
151
|
-
test("retries already exhausted → accept", async () => {
|
|
152
|
-
const decision = await runEmpty(
|
|
153
|
-
makeArgs({
|
|
154
|
-
responseContent: [],
|
|
155
|
-
toolUseTurns: 3,
|
|
156
|
-
emptyResponseRetries: 1,
|
|
157
|
-
maxEmptyResponseRetries: 1,
|
|
158
|
-
}),
|
|
159
|
-
);
|
|
160
|
-
expect(decision.action).toBe("accept");
|
|
161
|
-
});
|
|
162
|
-
|
|
163
|
-
test("prior assistant turn already delivered visible text → accept", async () => {
|
|
164
|
-
// Model said its piece earlier, ended with a side-effect tool, returned
|
|
165
|
-
// empty. Nudging would force a verbatim re-send of text the user already
|
|
166
|
-
// saw. Default must accept.
|
|
167
|
-
const decision = await runEmpty(
|
|
168
|
-
makeArgs({
|
|
169
|
-
responseContent: [],
|
|
170
|
-
toolUseTurns: 2,
|
|
171
|
-
priorAssistantHadVisibleText: true,
|
|
172
|
-
}),
|
|
173
|
-
);
|
|
174
|
-
expect(decision.action).toBe("accept");
|
|
175
|
-
});
|
|
176
|
-
|
|
177
|
-
test("no prior tool-use turn (toolUseTurns === 0) → accept", async () => {
|
|
178
|
-
// Empty first assistant response with no tools is not the pattern the
|
|
179
|
-
// organic-empty-turn nudge guards against. Default accepts (unless the
|
|
180
|
-
// stop reason is `"refusal"` — see the refusal-specific tests below).
|
|
181
|
-
const decision = await runEmpty(
|
|
182
|
-
makeArgs({
|
|
183
|
-
responseContent: [],
|
|
184
|
-
toolUseTurns: 0,
|
|
185
|
-
}),
|
|
186
|
-
);
|
|
187
|
-
expect(decision.action).toBe("accept");
|
|
188
|
-
});
|
|
189
|
-
|
|
190
|
-
// ─── Refusal stop ────────────────────────────────────────────────────────
|
|
191
|
-
|
|
192
|
-
test("stopReason='refusal' on turn 0 with no content → nudge with refusal text", async () => {
|
|
193
|
-
// The canonical failure mode this branch exists to catch: Anthropic's
|
|
194
|
-
// safety classifier zeros the response on the very first model call,
|
|
195
|
-
// returning a single thinking block and `stopReason: "refusal"`. Without
|
|
196
|
-
// this branch, the terminal would `accept` and the loop would persist
|
|
197
|
-
// an empty assistant bubble to the user.
|
|
198
|
-
const decision = await runEmpty(
|
|
199
|
-
makeArgs({
|
|
200
|
-
stopReason: "refusal",
|
|
201
|
-
responseContent: [],
|
|
202
|
-
toolUseBlocksLength: 0,
|
|
203
|
-
toolUseTurns: 0,
|
|
204
|
-
emptyResponseRetries: 0,
|
|
205
|
-
priorAssistantHadVisibleText: false,
|
|
206
|
-
}),
|
|
207
|
-
);
|
|
208
|
-
expect(decision.action).toBe("nudge");
|
|
209
|
-
expect(decision.nudgeText).toBe(CANONICAL_REFUSAL_NUDGE_TEXT);
|
|
210
|
-
});
|
|
211
|
-
|
|
212
|
-
test("stopReason='refusal' with a thinking-only block still nudges", async () => {
|
|
213
|
-
// Thinking blocks aren't visible text — the user sees nothing. A
|
|
214
|
-
// refusal with only thinking content matches the same shape the
|
|
215
|
-
// production log captured (`contentBlocks: 1, toolUseCount: 0`).
|
|
216
|
-
const decision = await runEmpty(
|
|
217
|
-
makeArgs({
|
|
218
|
-
stopReason: "refusal",
|
|
219
|
-
responseContent: [
|
|
220
|
-
{
|
|
221
|
-
type: "thinking",
|
|
222
|
-
thinking: "...",
|
|
223
|
-
signature: "sig",
|
|
224
|
-
} as ContentBlock,
|
|
225
|
-
],
|
|
226
|
-
toolUseBlocksLength: 0,
|
|
227
|
-
toolUseTurns: 0,
|
|
228
|
-
}),
|
|
229
|
-
);
|
|
230
|
-
expect(decision.action).toBe("nudge");
|
|
231
|
-
expect(decision.nudgeText).toBe(CANONICAL_REFUSAL_NUDGE_TEXT);
|
|
232
|
-
});
|
|
233
|
-
|
|
234
|
-
test("stopReason='refusal' but visible text present → accept (model recovered)", async () => {
|
|
235
|
-
// The classifier can flag a partial response; if the model already
|
|
236
|
-
// delivered some visible text before refusing, the user has something
|
|
237
|
-
// to see. Accept.
|
|
238
|
-
const decision = await runEmpty(
|
|
239
|
-
makeArgs({
|
|
240
|
-
stopReason: "refusal",
|
|
241
|
-
responseContent: [{ type: "text", text: "partial answer" }],
|
|
242
|
-
}),
|
|
243
|
-
);
|
|
244
|
-
expect(decision.action).toBe("accept");
|
|
245
|
-
});
|
|
246
|
-
|
|
247
|
-
test("stopReason='refusal' but tool_use blocks present → accept", async () => {
|
|
248
|
-
// A refusal with tool_use blocks is unusual (the model wouldn't normally
|
|
249
|
-
// issue tools after a classifier hit) but we still shouldn't nudge —
|
|
250
|
-
// the loop will execute the tools and the model will get another shot.
|
|
251
|
-
const decision = await runEmpty(
|
|
252
|
-
makeArgs({
|
|
253
|
-
stopReason: "refusal",
|
|
254
|
-
responseContent: [
|
|
255
|
-
{
|
|
256
|
-
type: "tool_use",
|
|
257
|
-
id: "tu-1",
|
|
258
|
-
name: "read",
|
|
259
|
-
input: { path: "/tmp/x" },
|
|
260
|
-
} as ContentBlock,
|
|
261
|
-
],
|
|
262
|
-
toolUseBlocksLength: 1,
|
|
263
|
-
}),
|
|
264
|
-
);
|
|
265
|
-
expect(decision.action).toBe("accept");
|
|
266
|
-
});
|
|
267
|
-
|
|
268
|
-
test("stopReason='refusal' but retries exhausted → accept (no infinite loop)", async () => {
|
|
269
|
-
// Persistent classifier hit shouldn't burn turns indefinitely. Once
|
|
270
|
-
// we've used our retry budget, accept (the user will see an empty
|
|
271
|
-
// bubble, but the loop terminates).
|
|
272
|
-
const decision = await runEmpty(
|
|
273
|
-
makeArgs({
|
|
274
|
-
stopReason: "refusal",
|
|
275
|
-
responseContent: [],
|
|
276
|
-
toolUseTurns: 0,
|
|
277
|
-
emptyResponseRetries: 1,
|
|
278
|
-
maxEmptyResponseRetries: 1,
|
|
279
|
-
}),
|
|
280
|
-
);
|
|
281
|
-
expect(decision.action).toBe("accept");
|
|
282
|
-
});
|
|
283
|
-
|
|
284
|
-
test("stopReason='refusal' beats post-tool-empty nudge text (refusal-specific wording)", async () => {
|
|
285
|
-
// When both branches would fire, refusal wins because the refusal
|
|
286
|
-
// text is more accurate ("safety classifier zeroed the response"
|
|
287
|
-
// vs. "summary of what you found or did"). This guards against a
|
|
288
|
-
// future refactor that orders the branches differently.
|
|
289
|
-
const decision = await runEmpty(
|
|
290
|
-
makeArgs({
|
|
291
|
-
stopReason: "refusal",
|
|
292
|
-
responseContent: [],
|
|
293
|
-
toolUseBlocksLength: 0,
|
|
294
|
-
toolUseTurns: 2, // would trip the post-tool branch too
|
|
295
|
-
priorAssistantHadVisibleText: false,
|
|
296
|
-
}),
|
|
297
|
-
);
|
|
298
|
-
expect(decision.action).toBe("nudge");
|
|
299
|
-
expect(decision.nudgeText).toBe(CANONICAL_REFUSAL_NUDGE_TEXT);
|
|
300
|
-
});
|
|
301
|
-
});
|
|
302
|
-
|
|
303
|
-
describe("emptyResponse pipeline — custom middleware overrides", () => {
|
|
304
|
-
beforeEach(() => {
|
|
305
|
-
resetPluginRegistryForTests();
|
|
306
|
-
});
|
|
307
|
-
|
|
308
|
-
test("plugin returning action:accept suppresses the nudge", async () => {
|
|
309
|
-
// Build a plugin whose middleware short-circuits with accept. Register it
|
|
310
|
-
// as the ONLY plugin so its decision is authoritative. The loop-side
|
|
311
|
-
// effect (no nudge appended) is covered by integration tests; here we
|
|
312
|
-
// assert the pipeline returns what the plugin returned.
|
|
313
|
-
const acceptPlugin: Plugin = {
|
|
314
|
-
manifest: {
|
|
315
|
-
name: "force-accept",
|
|
316
|
-
version: "1.0.0",
|
|
317
|
-
},
|
|
318
|
-
middleware: {
|
|
319
|
-
emptyResponse: async () => ({ action: "accept" }),
|
|
320
|
-
},
|
|
321
|
-
};
|
|
322
|
-
registerPlugin(acceptPlugin);
|
|
323
|
-
|
|
324
|
-
const decision = await runEmpty(
|
|
325
|
-
makeArgs({
|
|
326
|
-
// Conditions the default would nudge on — but the custom plugin wins.
|
|
327
|
-
responseContent: [],
|
|
328
|
-
toolUseTurns: 2,
|
|
329
|
-
emptyResponseRetries: 0,
|
|
330
|
-
priorAssistantHadVisibleText: false,
|
|
331
|
-
}),
|
|
332
|
-
);
|
|
333
|
-
expect(decision.action).toBe("accept");
|
|
334
|
-
// `nudgeText` must not leak from the acceptance branch.
|
|
335
|
-
expect(decision.nudgeText).toBeUndefined();
|
|
336
|
-
});
|
|
337
|
-
|
|
338
|
-
test("plugin returning action:error is propagated to the caller", async () => {
|
|
339
|
-
const errorPlugin: Plugin = {
|
|
340
|
-
manifest: {
|
|
341
|
-
name: "force-error",
|
|
342
|
-
version: "1.0.0",
|
|
343
|
-
},
|
|
344
|
-
middleware: {
|
|
345
|
-
emptyResponse: async () => ({ action: "error" }),
|
|
346
|
-
},
|
|
347
|
-
};
|
|
348
|
-
registerPlugin(errorPlugin);
|
|
349
|
-
|
|
350
|
-
const decision = await runEmpty(makeArgs());
|
|
351
|
-
expect(decision.action).toBe("error");
|
|
352
|
-
});
|
|
353
|
-
|
|
354
|
-
test("plugin overriding default nudge text changes the returned text", async () => {
|
|
355
|
-
// Exercises the wrapping semantics: the custom plugin observes the
|
|
356
|
-
// default's decision via `next(args)` and rewrites only the text. This
|
|
357
|
-
// is the canonical "plugin wraps default" pattern.
|
|
358
|
-
const rewriterPlugin: Plugin = {
|
|
359
|
-
manifest: {
|
|
360
|
-
name: "rewrite-nudge",
|
|
361
|
-
version: "1.0.0",
|
|
362
|
-
},
|
|
363
|
-
middleware: {
|
|
364
|
-
emptyResponse: async (args, next, ctx) => {
|
|
365
|
-
const downstream = await next(args);
|
|
366
|
-
if (downstream.action !== "nudge") return downstream;
|
|
367
|
-
void ctx; // silence lint
|
|
368
|
-
return { action: "nudge", nudgeText: "ALTERED_NUDGE" };
|
|
369
|
-
},
|
|
370
|
-
},
|
|
371
|
-
};
|
|
372
|
-
// Register the custom plugin FIRST so it is the outermost middleware; the
|
|
373
|
-
// default registers second and acts as the inner decision maker.
|
|
374
|
-
registerPlugin(rewriterPlugin);
|
|
375
|
-
registerPlugin(defaultEmptyResponsePlugin);
|
|
376
|
-
|
|
377
|
-
const decision = await runEmpty(
|
|
378
|
-
makeArgs({
|
|
379
|
-
responseContent: [],
|
|
380
|
-
toolUseTurns: 2,
|
|
381
|
-
priorAssistantHadVisibleText: false,
|
|
382
|
-
}),
|
|
383
|
-
);
|
|
384
|
-
expect(decision.action).toBe("nudge");
|
|
385
|
-
expect(decision.nudgeText).toBe("ALTERED_NUDGE");
|
|
386
|
-
});
|
|
387
|
-
|
|
388
|
-
test("user plugin registered AFTER the default still runs (no shadowing)", async () => {
|
|
389
|
-
// Production registration order: defaults load first via the side-effect
|
|
390
|
-
// imports in `defaults/index.ts`, then user plugins register on top via
|
|
391
|
-
// `bootstrapPlugins()`. The user's middleware ends up at a deeper onion
|
|
392
|
-
// layer than the default. If the default's middleware were to bypass
|
|
393
|
-
// `next` and decide directly, the user middleware would never run — this
|
|
394
|
-
// test guards against that regression.
|
|
395
|
-
registerPlugin(defaultEmptyResponsePlugin);
|
|
396
|
-
|
|
397
|
-
let userMiddlewareRan = false;
|
|
398
|
-
const userMiddleware: Middleware<
|
|
399
|
-
EmptyResponseArgs,
|
|
400
|
-
EmptyResponseDecision
|
|
401
|
-
> = async (args, next) => {
|
|
402
|
-
userMiddlewareRan = true;
|
|
403
|
-
return next(args);
|
|
404
|
-
};
|
|
405
|
-
registerPlugin({
|
|
406
|
-
manifest: {
|
|
407
|
-
name: "late-user-empty-response",
|
|
408
|
-
version: "0.0.1",
|
|
409
|
-
},
|
|
410
|
-
middleware: { emptyResponse: userMiddleware },
|
|
411
|
-
});
|
|
412
|
-
|
|
413
|
-
await runEmpty(
|
|
414
|
-
makeArgs({
|
|
415
|
-
responseContent: [],
|
|
416
|
-
toolUseTurns: 2,
|
|
417
|
-
priorAssistantHadVisibleText: false,
|
|
418
|
-
}),
|
|
419
|
-
);
|
|
420
|
-
|
|
421
|
-
expect(userMiddlewareRan).toBe(true);
|
|
422
|
-
});
|
|
423
|
-
});
|
|
@@ -1,287 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Unit tests for the `llmCall` pipeline wrapping (PR 15).
|
|
3
|
-
*
|
|
4
|
-
* Exercises the three behaviors the plan calls out:
|
|
5
|
-
*
|
|
6
|
-
* 1. The default `llmCall` pipeline delegates to `provider.sendMessage(...)`
|
|
7
|
-
* and returns its response unchanged.
|
|
8
|
-
* 2. A spy middleware registered for `llmCall` observes the full argument
|
|
9
|
-
* payload before the provider is called.
|
|
10
|
-
* 3. A short-circuit middleware synthesizes a `ProviderResponse` and prevents
|
|
11
|
-
* the real `provider.sendMessage` from running.
|
|
12
|
-
*/
|
|
13
|
-
|
|
14
|
-
import { afterAll, beforeEach, describe, expect, test } from "bun:test";
|
|
15
|
-
|
|
16
|
-
import type { TrustContext } from "../daemon/trust-context.js";
|
|
17
|
-
import { defaultLlmCallPlugin } from "../plugins/defaults/llm-call/register.js";
|
|
18
|
-
import { DEFAULT_TIMEOUTS, runPipeline } from "../plugins/pipeline.js";
|
|
19
|
-
import {
|
|
20
|
-
getMiddlewaresFor,
|
|
21
|
-
registerPlugin,
|
|
22
|
-
resetPluginRegistryForTests,
|
|
23
|
-
} from "../plugins/registry.js";
|
|
24
|
-
import type {
|
|
25
|
-
LLMCallArgs,
|
|
26
|
-
LLMCallResult,
|
|
27
|
-
Middleware,
|
|
28
|
-
Plugin,
|
|
29
|
-
TurnContext,
|
|
30
|
-
} from "../plugins/types.js";
|
|
31
|
-
import type {
|
|
32
|
-
Message,
|
|
33
|
-
Provider,
|
|
34
|
-
ProviderResponse,
|
|
35
|
-
SendMessageOptions,
|
|
36
|
-
ToolDefinition,
|
|
37
|
-
} from "../providers/types.js";
|
|
38
|
-
|
|
39
|
-
// ─── Fixtures ───────────────────────────────────────────────────────────────
|
|
40
|
-
|
|
41
|
-
const trust: TrustContext = {
|
|
42
|
-
sourceChannel: "vellum",
|
|
43
|
-
trustClass: "guardian",
|
|
44
|
-
};
|
|
45
|
-
|
|
46
|
-
function makeCtx(overrides: Partial<TurnContext> = {}): TurnContext {
|
|
47
|
-
return {
|
|
48
|
-
requestId: "req-test",
|
|
49
|
-
conversationId: "conv-test",
|
|
50
|
-
turnIndex: 0,
|
|
51
|
-
trust,
|
|
52
|
-
...overrides,
|
|
53
|
-
};
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
function makeResponse(
|
|
57
|
-
overrides: Partial<ProviderResponse> = {},
|
|
58
|
-
): ProviderResponse {
|
|
59
|
-
return {
|
|
60
|
-
content: [{ type: "text", text: "hello from provider" }],
|
|
61
|
-
model: "fake-model",
|
|
62
|
-
usage: {
|
|
63
|
-
inputTokens: 10,
|
|
64
|
-
outputTokens: 5,
|
|
65
|
-
},
|
|
66
|
-
stopReason: "end_turn",
|
|
67
|
-
...overrides,
|
|
68
|
-
};
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
type FakeProviderCall = {
|
|
72
|
-
messages: Message[];
|
|
73
|
-
tools?: ToolDefinition[];
|
|
74
|
-
systemPrompt?: string;
|
|
75
|
-
};
|
|
76
|
-
|
|
77
|
-
function makeFakeProvider(
|
|
78
|
-
response: ProviderResponse = makeResponse(),
|
|
79
|
-
): Provider & { calls: FakeProviderCall[] } {
|
|
80
|
-
const calls: FakeProviderCall[] = [];
|
|
81
|
-
return {
|
|
82
|
-
name: "fake-provider",
|
|
83
|
-
calls,
|
|
84
|
-
async sendMessage(messages: Message[], options?: SendMessageOptions) {
|
|
85
|
-
calls.push({
|
|
86
|
-
messages,
|
|
87
|
-
tools: options?.tools,
|
|
88
|
-
systemPrompt: options?.systemPrompt,
|
|
89
|
-
});
|
|
90
|
-
return response;
|
|
91
|
-
},
|
|
92
|
-
};
|
|
93
|
-
}
|
|
94
|
-
|
|
95
|
-
function makeArgs(
|
|
96
|
-
provider: Provider,
|
|
97
|
-
overrides: Partial<LLMCallArgs> = {},
|
|
98
|
-
): LLMCallArgs {
|
|
99
|
-
return {
|
|
100
|
-
provider,
|
|
101
|
-
messages: [{ role: "user", content: [{ type: "text", text: "hi" }] }],
|
|
102
|
-
options: { systemPrompt: "you are a helpful assistant", config: {} },
|
|
103
|
-
...overrides,
|
|
104
|
-
};
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
// The terminal passed into `runPipeline` matches the one in `agent/loop.ts`:
|
|
108
|
-
// it delegates straight to `args.provider.sendMessage(...)` with no
|
|
109
|
-
// transformation. Keeping it identical here means the test exercises the
|
|
110
|
-
// exact call shape the real loop uses.
|
|
111
|
-
const terminal = (args: LLMCallArgs): Promise<LLMCallResult> =>
|
|
112
|
-
args.provider.sendMessage(args.messages, args.options);
|
|
113
|
-
|
|
114
|
-
// ─── Tests ──────────────────────────────────────────────────────────────────
|
|
115
|
-
|
|
116
|
-
describe("llmCall pipeline", () => {
|
|
117
|
-
beforeEach(() => {
|
|
118
|
-
resetPluginRegistryForTests();
|
|
119
|
-
});
|
|
120
|
-
|
|
121
|
-
// Clear the registry on the way out too so later test files in the same
|
|
122
|
-
// `bun test` run don't inherit `llmCall` middleware from our final test.
|
|
123
|
-
// Bun runs files sequentially within a process; `beforeEach` only clears
|
|
124
|
-
// at the start of each case, leaving whatever the final test registered
|
|
125
|
-
// in place for the next file.
|
|
126
|
-
afterAll(() => {
|
|
127
|
-
resetPluginRegistryForTests();
|
|
128
|
-
});
|
|
129
|
-
|
|
130
|
-
test("default pipeline invokes provider.sendMessage and returns its response", async () => {
|
|
131
|
-
registerPlugin(defaultLlmCallPlugin);
|
|
132
|
-
|
|
133
|
-
const expected = makeResponse({ model: "expected-model" });
|
|
134
|
-
const provider = makeFakeProvider(expected);
|
|
135
|
-
const args = makeArgs(provider);
|
|
136
|
-
|
|
137
|
-
const result = await runPipeline<LLMCallArgs, LLMCallResult>(
|
|
138
|
-
"llmCall",
|
|
139
|
-
getMiddlewaresFor("llmCall"),
|
|
140
|
-
terminal,
|
|
141
|
-
args,
|
|
142
|
-
makeCtx(),
|
|
143
|
-
DEFAULT_TIMEOUTS.llmCall,
|
|
144
|
-
);
|
|
145
|
-
|
|
146
|
-
expect(result).toBe(expected);
|
|
147
|
-
expect(provider.calls).toHaveLength(1);
|
|
148
|
-
expect(provider.calls[0]!.messages).toBe(args.messages);
|
|
149
|
-
expect(provider.calls[0]!.systemPrompt).toBe("you are a helpful assistant");
|
|
150
|
-
});
|
|
151
|
-
|
|
152
|
-
test("spy middleware records the full invocation arguments", async () => {
|
|
153
|
-
const observed: LLMCallArgs[] = [];
|
|
154
|
-
const spyPlugin: Plugin = {
|
|
155
|
-
manifest: {
|
|
156
|
-
name: "spy-llm",
|
|
157
|
-
version: "0.0.1",
|
|
158
|
-
},
|
|
159
|
-
middleware: {
|
|
160
|
-
llmCall: async (args, next, _ctx) => {
|
|
161
|
-
observed.push(args);
|
|
162
|
-
return next(args);
|
|
163
|
-
},
|
|
164
|
-
},
|
|
165
|
-
};
|
|
166
|
-
|
|
167
|
-
registerPlugin(spyPlugin);
|
|
168
|
-
registerPlugin(defaultLlmCallPlugin);
|
|
169
|
-
|
|
170
|
-
const provider = makeFakeProvider();
|
|
171
|
-
const tools: ToolDefinition[] = [
|
|
172
|
-
{
|
|
173
|
-
name: "echo",
|
|
174
|
-
description: "echoes its input",
|
|
175
|
-
input_schema: { type: "object" },
|
|
176
|
-
},
|
|
177
|
-
];
|
|
178
|
-
const args = makeArgs(provider, {
|
|
179
|
-
options: {
|
|
180
|
-
tools,
|
|
181
|
-
systemPrompt: "you are a helpful assistant",
|
|
182
|
-
config: {},
|
|
183
|
-
},
|
|
184
|
-
});
|
|
185
|
-
|
|
186
|
-
await runPipeline<LLMCallArgs, LLMCallResult>(
|
|
187
|
-
"llmCall",
|
|
188
|
-
getMiddlewaresFor("llmCall"),
|
|
189
|
-
terminal,
|
|
190
|
-
args,
|
|
191
|
-
makeCtx(),
|
|
192
|
-
DEFAULT_TIMEOUTS.llmCall,
|
|
193
|
-
);
|
|
194
|
-
|
|
195
|
-
expect(observed).toHaveLength(1);
|
|
196
|
-
expect(observed[0]!.provider).toBe(provider);
|
|
197
|
-
expect(observed[0]!.messages).toBe(args.messages);
|
|
198
|
-
expect(observed[0]!.options?.tools).toBe(tools);
|
|
199
|
-
expect(observed[0]!.options?.systemPrompt).toBe(
|
|
200
|
-
"you are a helpful assistant",
|
|
201
|
-
);
|
|
202
|
-
expect(provider.calls).toHaveLength(1);
|
|
203
|
-
});
|
|
204
|
-
|
|
205
|
-
test("default registered first does not shadow later-registered user middleware", async () => {
|
|
206
|
-
// The default plugin registers at module load (before `bootstrapPlugins()`
|
|
207
|
-
// loads user plugins), so it sits at the outermost layer in the onion.
|
|
208
|
-
// This test registers the default FIRST (matching production ordering)
|
|
209
|
-
// and asserts that a user-registered spy still runs — confirming that
|
|
210
|
-
// the outermost middleware forwards via `next(args)` rather than
|
|
211
|
-
// short-circuiting the chain.
|
|
212
|
-
const observed: LLMCallArgs[] = [];
|
|
213
|
-
const spyPlugin: Plugin = {
|
|
214
|
-
manifest: {
|
|
215
|
-
name: "spy-llm-after-default",
|
|
216
|
-
version: "0.0.1",
|
|
217
|
-
},
|
|
218
|
-
middleware: {
|
|
219
|
-
llmCall: async (args, next, _ctx) => {
|
|
220
|
-
observed.push(args);
|
|
221
|
-
return next(args);
|
|
222
|
-
},
|
|
223
|
-
},
|
|
224
|
-
};
|
|
225
|
-
|
|
226
|
-
registerPlugin(defaultLlmCallPlugin);
|
|
227
|
-
registerPlugin(spyPlugin);
|
|
228
|
-
|
|
229
|
-
const provider = makeFakeProvider();
|
|
230
|
-
const args = makeArgs(provider);
|
|
231
|
-
|
|
232
|
-
await runPipeline<LLMCallArgs, LLMCallResult>(
|
|
233
|
-
"llmCall",
|
|
234
|
-
getMiddlewaresFor("llmCall"),
|
|
235
|
-
terminal,
|
|
236
|
-
args,
|
|
237
|
-
makeCtx(),
|
|
238
|
-
DEFAULT_TIMEOUTS.llmCall,
|
|
239
|
-
);
|
|
240
|
-
|
|
241
|
-
expect(observed).toHaveLength(1);
|
|
242
|
-
expect(observed[0]!.provider).toBe(provider);
|
|
243
|
-
expect(provider.calls).toHaveLength(1);
|
|
244
|
-
});
|
|
245
|
-
|
|
246
|
-
test("short-circuit middleware prevents the real provider call", async () => {
|
|
247
|
-
const synthetic = makeResponse({
|
|
248
|
-
model: "synthetic-model",
|
|
249
|
-
content: [{ type: "text", text: "synthesized" }],
|
|
250
|
-
});
|
|
251
|
-
|
|
252
|
-
const shortCircuit: Middleware<LLMCallArgs, LLMCallResult> = async (
|
|
253
|
-
_args,
|
|
254
|
-
_next,
|
|
255
|
-
_ctx,
|
|
256
|
-
) => synthetic;
|
|
257
|
-
|
|
258
|
-
const shortCircuitPlugin: Plugin = {
|
|
259
|
-
manifest: {
|
|
260
|
-
name: "short-circuit-llm",
|
|
261
|
-
version: "0.0.1",
|
|
262
|
-
},
|
|
263
|
-
middleware: { llmCall: shortCircuit },
|
|
264
|
-
};
|
|
265
|
-
|
|
266
|
-
registerPlugin(shortCircuitPlugin);
|
|
267
|
-
registerPlugin(defaultLlmCallPlugin);
|
|
268
|
-
|
|
269
|
-
const provider = makeFakeProvider();
|
|
270
|
-
const args = makeArgs(provider);
|
|
271
|
-
|
|
272
|
-
const result = await runPipeline<LLMCallArgs, LLMCallResult>(
|
|
273
|
-
"llmCall",
|
|
274
|
-
getMiddlewaresFor("llmCall"),
|
|
275
|
-
terminal,
|
|
276
|
-
args,
|
|
277
|
-
makeCtx(),
|
|
278
|
-
DEFAULT_TIMEOUTS.llmCall,
|
|
279
|
-
);
|
|
280
|
-
|
|
281
|
-
expect(result).toBe(synthetic);
|
|
282
|
-
// The short-circuit middleware never calls `next`, so the terminal and
|
|
283
|
-
// every downstream middleware (including the default) are skipped and
|
|
284
|
-
// the provider is never contacted.
|
|
285
|
-
expect(provider.calls).toHaveLength(0);
|
|
286
|
-
});
|
|
287
|
-
});
|