@vellumai/assistant 0.8.7 → 0.8.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile +20 -4
- package/docker-entrypoint.sh +4 -2
- package/docker-init-apt-root.sh +3 -1
- package/docker-kata-apt-env.sh +3 -1
- package/docker-kata-runtime-family.sh +12 -0
- package/docs/architecture/memory.md +1 -1
- package/docs/plugins.md +75 -79
- package/examples/plugins/echo/README.md +6 -12
- package/examples/plugins/echo/register.ts +0 -41
- package/node_modules/@vellumai/skill-host-contracts/src/server-message.ts +3 -3
- package/openapi.yaml +3381 -348
- package/package.json +1 -1
- package/scripts/generate-openapi.ts +68 -41
- package/src/__tests__/agent-loop-exit-reason.test.ts +34 -39
- package/src/__tests__/agent-loop-provider-error-recording.test.ts +1 -1
- package/src/__tests__/agent-loop.test.ts +37 -87
- package/src/__tests__/agent-wake-disk-pressure-callsite.test.ts +2 -0
- package/src/__tests__/annotate-activity-metadata.test.ts +262 -0
- package/src/__tests__/annotate-risk-options.test.ts +2 -3
- package/src/__tests__/anthropic-provider.test.ts +95 -2
- package/src/__tests__/assistant-event-hub.test.ts +25 -0
- package/src/__tests__/assistant-events-sse-shed.test.ts +8 -0
- package/src/__tests__/{conversation-stream-state.test.ts → assistant-stream-state.test.ts} +252 -91
- package/src/__tests__/auth-fallback-events-store.test.ts +116 -0
- package/src/__tests__/background-workers-disk-pressure.test.ts +6 -0
- package/src/__tests__/btw-routes.test.ts +62 -3
- package/src/__tests__/build-persisted-content.test.ts +184 -0
- package/src/__tests__/catalog-files.test.ts +1 -1
- package/src/__tests__/clawhub-files.test.ts +1 -1
- package/src/__tests__/compaction-pipeline.test.ts +1 -1
- package/src/__tests__/compaction.benchmark.test.ts +0 -30
- package/src/__tests__/config-watcher.test.ts +1 -1
- package/src/__tests__/conversation-abort-tool-results.test.ts +57 -19
- package/src/__tests__/conversation-agent-loop-disk-pressure.test.ts +6 -2
- package/src/__tests__/conversation-agent-loop-inference-profile.test.ts +10 -4
- package/src/__tests__/conversation-agent-loop-overflow.test.ts +313 -1136
- package/src/__tests__/conversation-agent-loop.test.ts +596 -1616
- package/src/__tests__/conversation-analysis-routes.test.ts +6 -0
- package/src/__tests__/conversation-history-web-search.test.ts +11 -1
- package/src/__tests__/conversation-pairing.test.ts +4 -31
- package/src/__tests__/conversation-process-app-control-preactivation.test.ts +6 -0
- package/src/__tests__/conversation-provider-retry-repair.test.ts +26 -5
- package/src/__tests__/conversation-queue.test.ts +2 -0
- package/src/__tests__/conversation-routes-disk-view.test.ts +3 -0
- package/src/__tests__/conversation-routes-slash-commands.test.ts +6 -5
- package/src/__tests__/conversation-runtime-assembly.test.ts +170 -229
- package/src/__tests__/conversation-runtime-workspace.test.ts +3 -24
- package/src/__tests__/conversation-slash-commands.test.ts +8 -42
- package/src/__tests__/conversation-slash-queue.test.ts +6 -1
- package/src/__tests__/conversation-surfaces-action-delivery.test.ts +84 -0
- package/src/__tests__/conversation-sync-tags.test.ts +27 -15
- package/src/__tests__/conversation-title-service.test.ts +135 -2
- package/src/__tests__/conversation-workspace-injection.test.ts +6 -1
- package/src/__tests__/cross-provider-web-search.test.ts +214 -1
- package/src/__tests__/db-schedule-syntax-migration.test.ts +5 -0
- package/src/__tests__/dm-persistence.test.ts +5 -1
- package/src/__tests__/empty-response-hook.test.ts +304 -0
- package/src/__tests__/feature-flag-test-helpers.ts +2 -2
- package/src/__tests__/gemini-image-service.test.ts +13 -0
- package/src/__tests__/helpers/mock-provider.ts +110 -0
- package/src/__tests__/helpers/native-web-search-harness.ts +129 -0
- package/src/__tests__/history-repair-hook.test.ts +1 -0
- package/src/__tests__/identity-intro-cache.test.ts +12 -100
- package/src/__tests__/identity-routes.test.ts +248 -7
- package/src/__tests__/inbound-slack-persistence.test.ts +5 -1
- package/src/__tests__/injector-background-turn.test.ts +2 -8
- package/src/__tests__/injector-chain.test.ts +106 -270
- package/src/__tests__/injector-disk-pressure.test.ts +3 -12
- package/src/__tests__/injector-document-comments.test.ts +2 -2
- package/src/__tests__/injector-pkb-v2-silenced.test.ts +30 -22
- package/src/__tests__/injector-v3-suppression.test.ts +31 -37
- package/src/__tests__/internal-telemetry-routes.test.ts +109 -0
- package/src/__tests__/list-messages-page-latest.test.ts +60 -0
- package/src/__tests__/list-messages-tool-merge.test.ts +20 -0
- package/src/__tests__/llm-usage-store.test.ts +223 -1
- package/src/__tests__/memory-retrieval-hook.test.ts +297 -0
- package/src/__tests__/memory-v2-static-injector.test.ts +103 -35
- package/src/__tests__/native-web-search.test.ts +191 -0
- package/src/__tests__/onboarding-template-contract.test.ts +2 -0
- package/src/__tests__/openai-image-service.test.ts +17 -0
- package/src/__tests__/openai-provider.test.ts +31 -1
- package/src/__tests__/persist-unsendable-image.test.ts +215 -0
- package/src/__tests__/persistence-secret-redaction.test.ts +1 -0
- package/src/__tests__/pipeline-runner.test.ts +29 -39
- package/src/__tests__/pkb-autoinject.test.ts +2 -5
- package/src/__tests__/plugin-bootstrap.test.ts +13 -28
- package/src/__tests__/plugin-registry.test.ts +0 -27
- package/src/__tests__/plugin-types.test.ts +2 -125
- package/src/__tests__/process-message-display-content.test.ts +6 -2
- package/src/__tests__/regenerate-fire-and-forget-trace.test.ts +5 -1
- package/src/__tests__/resolve-trust-class.test.ts +4 -4
- package/src/__tests__/runtime-events-sse-reconnect.test.ts +60 -23
- package/src/__tests__/schedule-routes.test.ts +603 -2
- package/src/__tests__/schedule-store.test.ts +41 -0
- package/src/__tests__/schedule-tools.test.ts +35 -0
- package/src/__tests__/server-history-render.test.ts +314 -1
- package/src/__tests__/skillssh-files.test.ts +1 -1
- package/src/__tests__/system-prompt.test.ts +20 -0
- package/src/__tests__/task-scheduler.test.ts +162 -1
- package/src/__tests__/terminal-tools.test.ts +6 -1
- package/src/__tests__/title-generate-hook.test.ts +319 -0
- package/src/__tests__/tool-error-hook.test.ts +278 -0
- package/src/__tests__/tool-preview-lifecycle.test.ts +468 -5
- package/src/__tests__/tool-result-metadata-plumbing.test.ts +1 -0
- package/src/__tests__/tool-result-truncate-hook.test.ts +127 -0
- package/src/__tests__/tool-result-truncation.test.ts +0 -2
- package/src/__tests__/ui-choice-copy-surfaces.test.ts +254 -0
- package/src/__tests__/ui-work-result-surface.test.ts +159 -0
- package/src/__tests__/usage-routes.test.ts +285 -1
- package/src/__tests__/user-plugin-loader.test.ts +2 -2
- package/src/__tests__/voice-session-bridge.test.ts +6 -3
- package/src/__tests__/web-search-backend-failure.test.ts +166 -0
- package/src/agent/loop.ts +346 -442
- package/src/api/events/assistant-thinking-delta.ts +33 -0
- package/src/api/events/tool-output-chunk.ts +45 -0
- package/src/api/events/tool-use-preview-start.ts +32 -0
- package/src/api/events/trace-event.ts +69 -0
- package/src/api/index.ts +48 -13
- package/src/api/responses/conversation-message.ts +368 -0
- package/src/avatar/__tests__/avatar-store.test.ts +34 -29
- package/src/cli/commands/__tests__/notifications.test.ts +58 -14
- package/src/cli/commands/notifications.ts +112 -60
- package/src/config/assistant-feature-flags.ts +22 -11
- package/src/config/bundled-skills/app-builder/SKILL.md +3 -20
- package/src/config/bundled-skills/app-builder/references/examples/README.md +17 -0
- package/src/config/bundled-skills/app-builder/references/examples/expense-tracker.md +515 -0
- package/src/config/bundled-skills/app-builder/references/examples/focus-timer.md +342 -0
- package/src/config/bundled-skills/app-builder/references/examples/habit-tracker.md +490 -0
- package/src/config/bundled-skills/document-editor/SKILL.md +1 -1
- package/src/config/bundled-skills/messaging/SKILL.md +0 -7
- package/src/config/feature-flag-cache.ts +3 -3
- package/src/config/feature-flag-registry.json +35 -3
- package/src/config/schemas/__tests__/memory-v2.test.ts +1 -0
- package/src/config/schemas/__tests__/memory-v3.test.ts +25 -0
- package/src/config/schemas/llm.ts +1 -0
- package/src/config/schemas/memory-v2.ts +8 -0
- package/src/config/schemas/memory-v3.ts +8 -0
- package/src/config/schemas/platform.ts +8 -0
- package/src/config/seed-inference-profiles.ts +2 -2
- package/src/config/skills.ts +13 -0
- package/src/context/compactor.ts +1 -1
- package/src/context/strip-injections.ts +122 -0
- package/src/context/token-estimator.ts +23 -0
- package/src/context/tool-result-truncation.ts +0 -23
- package/src/context/window-manager.ts +3 -6
- package/src/credential-execution/executable-discovery.ts +16 -0
- package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +6 -0
- package/src/daemon/__tests__/inference-profile-notification.test.ts +153 -0
- package/src/daemon/__tests__/native-web-search-metadata.test.ts +10 -8
- package/src/daemon/assistant-attachments.ts +1 -1
- package/src/daemon/config-watcher.ts +2 -2
- package/src/daemon/context-overflow-reducer.ts +0 -1
- package/src/daemon/conversation-agent-loop-handlers.ts +605 -153
- package/src/daemon/conversation-agent-loop.ts +281 -760
- package/src/daemon/conversation-history.ts +5 -4
- package/src/daemon/conversation-lifecycle.ts +3 -4
- package/src/daemon/conversation-messaging.ts +7 -6
- package/src/daemon/conversation-process.ts +11 -16
- package/src/daemon/conversation-runtime-assembly.ts +130 -347
- package/src/daemon/conversation-slash.ts +6 -25
- package/src/daemon/conversation-surfaces.ts +222 -4
- package/src/daemon/conversation-tool-setup.ts +2 -29
- package/src/daemon/conversation.ts +32 -14
- package/src/daemon/external-plugins-bootstrap.ts +9 -10
- package/src/daemon/handlers/config-a2a.ts +51 -36
- package/src/daemon/handlers/config-slack-channel.ts +20 -14
- package/src/daemon/handlers/config-telegram.ts +16 -2
- package/src/daemon/handlers/shared.ts +156 -84
- package/src/daemon/handlers/skills.ts +39 -10
- package/src/daemon/lifecycle.ts +4 -0
- package/src/daemon/message-types/apps.ts +1 -29
- package/src/daemon/message-types/messages.ts +9 -57
- package/src/daemon/message-types/skills.ts +2 -0
- package/src/daemon/message-types/surfaces.ts +136 -3
- package/src/daemon/now-scratchpad.ts +21 -0
- package/src/daemon/orphan-reaper.test.ts +210 -0
- package/src/daemon/orphan-reaper.ts +240 -0
- package/src/daemon/persist-unsendable-image.ts +117 -0
- package/src/daemon/process-message.ts +1 -3
- package/src/daemon/trace-emitter.ts +6 -4
- package/src/daemon/trust-context.ts +19 -0
- package/src/daemon/wake-target-adapter.ts +3 -1
- package/src/home/home-greeting-cache.ts +24 -1
- package/src/ipc/gateway-client.test.ts +2 -2
- package/src/ipc/gateway-client.ts +3 -3
- package/src/media/gemini-image-service.ts +15 -0
- package/src/media/openai-image-service.ts +14 -0
- package/src/media/types.ts +34 -0
- package/src/memory/__tests__/jobs-worker-v2-schedule.test.ts +56 -0
- package/src/memory/auth-fallback-events-store.ts +94 -0
- package/src/memory/conversation-title-service.ts +65 -41
- package/src/memory/db-init.ts +4 -0
- package/src/memory/graph/__tests__/conversation-graph-memory-registry.test.ts +119 -0
- package/src/memory/graph/conversation-graph-memory.ts +65 -0
- package/src/memory/jobs-store.ts +33 -0
- package/src/memory/jobs-worker.ts +31 -4
- package/src/memory/llm-usage-store.ts +224 -50
- package/src/memory/migrations/222-strip-placeholder-sentinels-from-messages.ts +6 -5
- package/src/memory/migrations/270-schedule-source-conversation.ts +13 -0
- package/src/memory/migrations/271-create-auth-fallback-events.ts +21 -0
- package/src/memory/migrations/index.ts +2 -0
- package/src/memory/pkb/autoinject.ts +61 -0
- package/src/memory/pkb/context.ts +50 -0
- package/src/memory/pkb/types.ts +14 -0
- package/src/memory/schedule-attribution-sql.ts +104 -0
- package/src/memory/schema/infrastructure.ts +16 -0
- package/src/memory/usage-grouped-buckets.ts +6 -1
- package/src/memory/v2/__tests__/consolidation-job.test.ts +1 -1
- package/src/memory/v2/consolidation-job.ts +1 -1
- package/src/memory/v3/__tests__/health.test.ts +16 -0
- package/src/memory/v3/__tests__/orchestrate.test.ts +45 -9
- package/src/memory/v3/__tests__/provider-blocks.test.ts +13 -0
- package/src/memory/v3/__tests__/router.test.ts +101 -29
- package/src/memory/v3/__tests__/selector.test.ts +93 -27
- package/src/memory/v3/__tests__/shadow-plugin.test.ts +23 -5
- package/src/memory/v3/health.ts +0 -0
- package/src/memory/v3/llm-retry.ts +32 -0
- package/src/memory/v3/orchestrate.ts +26 -14
- package/src/memory/v3/provider-blocks.ts +15 -5
- package/src/memory/v3/router.ts +48 -42
- package/src/memory/v3/selector.ts +57 -42
- package/src/memory/v3/shadow-plugin.ts +47 -15
- package/src/memory/v3/types.ts +8 -0
- package/src/notifications/conversation-pairing.ts +8 -15
- package/src/notifications/decision-engine.ts +6 -3
- package/src/notifications/home-feed-side-effect.ts +12 -1
- package/src/permissions/prompter.ts +4 -0
- package/src/plugin-api/constants.ts +4 -0
- package/src/plugin-api/index.ts +8 -1
- package/src/plugin-api/types.ts +151 -1
- package/src/plugins/defaults/empty-response/hooks/stop.ts +126 -0
- package/src/plugins/defaults/empty-response/register.ts +8 -13
- package/src/plugins/defaults/index.ts +1 -15
- package/src/plugins/defaults/injectors/register.ts +243 -74
- package/src/plugins/defaults/memory-retrieval/hooks/post-compact.ts +91 -0
- package/src/plugins/defaults/memory-retrieval/hooks/user-prompt-submit-temp.ts +216 -0
- package/src/plugins/defaults/memory-retrieval/injector-chain.ts +35 -0
- package/src/plugins/defaults/title-generate/hooks/stop.ts +75 -0
- package/src/plugins/defaults/title-generate/hooks/user-prompt-submit.ts +35 -0
- package/src/plugins/defaults/title-generate/package.json +1 -1
- package/src/plugins/defaults/title-generate/register.ts +18 -18
- package/src/plugins/defaults/tool-error/hooks/post-tool-use.ts +118 -0
- package/src/plugins/defaults/tool-error/package.json +1 -1
- package/src/plugins/defaults/tool-error/register.ts +9 -21
- package/src/plugins/defaults/tool-result-truncate/hooks/post-tool-use.ts +32 -0
- package/src/plugins/defaults/tool-result-truncate/register.ts +10 -21
- package/src/plugins/defaults/tool-result-truncate/terminal.ts +37 -18
- package/src/plugins/pipeline.ts +6 -18
- package/src/plugins/registry.ts +8 -25
- package/src/plugins/types.ts +43 -474
- package/src/proactive-artifact/aux-message-injector.ts +3 -3
- package/src/proactive-artifact/job.test.ts +7 -12
- package/src/prompts/__tests__/system-prompt.test.ts +36 -0
- package/src/prompts/templates/BOOTSTRAP-ACTIVATION-RAIL.md +62 -0
- package/src/prompts/templates/BOOTSTRAP.md +2 -2
- package/src/prompts/templates/system-sections.ts +15 -0
- package/src/providers/anthropic/client.ts +37 -29
- package/src/providers/openai/__tests__/chat-completions-provider-reasoning.test.ts +112 -0
- package/src/providers/openai/chat-completions-provider.ts +44 -0
- package/src/providers/openrouter/client.ts +1 -0
- package/src/providers/placeholder-sentinels.ts +35 -0
- package/src/runtime/__tests__/agent-wake.test.ts +5 -1
- package/src/runtime/agent-wake.ts +2 -2
- package/src/runtime/assistant-event-hub.ts +36 -6
- package/src/runtime/{conversation-stream-state.ts → assistant-stream-state.ts} +132 -58
- package/src/runtime/http-router.ts +16 -21
- package/src/runtime/http-types.ts +16 -70
- package/src/runtime/pending-interactions.ts +1 -0
- package/src/runtime/routes/__tests__/consolidation-routes.test.ts +265 -2
- package/src/runtime/routes/__tests__/conversation-query-routes.test.ts +31 -1
- package/src/runtime/routes/__tests__/memory-v2-routes.test.ts +6 -2
- package/src/runtime/routes/__tests__/tts-routes.test.ts +6 -2
- package/src/runtime/routes/app-management-routes.ts +6 -117
- package/src/runtime/routes/app-routes.ts +13 -15
- package/src/runtime/routes/attachment-routes.ts +26 -15
- package/src/runtime/routes/avatar-routes.ts +26 -0
- package/src/runtime/routes/btw-routes.ts +29 -23
- package/src/runtime/routes/consolidation-routes.ts +120 -20
- package/src/runtime/routes/conversation-query-routes.ts +2 -0
- package/src/runtime/routes/conversation-routes.ts +358 -184
- package/src/runtime/routes/documents-routes.ts +4 -0
- package/src/runtime/routes/domain-routes.ts +51 -37
- package/src/runtime/routes/epoch-millis-range.ts +34 -0
- package/src/runtime/routes/events-routes.ts +28 -34
- package/src/runtime/routes/gateway-log-routes.ts +26 -4
- package/src/runtime/routes/heartbeat-routes.ts +32 -12
- package/src/runtime/routes/identity-intro-cache.ts +11 -34
- package/src/runtime/routes/identity-routes.ts +208 -17
- package/src/runtime/routes/image-generation-routes.ts +40 -2
- package/src/runtime/routes/index.ts +2 -0
- package/src/runtime/routes/integrations/a2a.ts +12 -10
- package/src/runtime/routes/integrations/slack/__tests__/channel.test.ts +16 -0
- package/src/runtime/routes/integrations/slack/channel.ts +4 -0
- package/src/runtime/routes/integrations/slack/share.ts +27 -6
- package/src/runtime/routes/integrations/telegram.ts +6 -0
- package/src/runtime/routes/integrations/twilio.ts +42 -0
- package/src/runtime/routes/internal-telemetry-routes.ts +88 -0
- package/src/runtime/routes/log-export-routes.ts +8 -0
- package/src/runtime/routes/memory-v2-routes.ts +15 -8
- package/src/runtime/routes/memory-v3-routes.ts +50 -28
- package/src/runtime/routes/oauth-apps.ts +66 -12
- package/src/runtime/routes/oauth-providers.ts +44 -5
- package/src/runtime/routes/platform-routes.ts +81 -5
- package/src/runtime/routes/playground/__tests__/force-compact.test.ts +6 -4
- package/src/runtime/routes/playground/force-compact.ts +1 -1
- package/src/runtime/routes/rename-conversation-routes.ts +5 -0
- package/src/runtime/routes/schedule-routes.ts +152 -42
- package/src/runtime/routes/secret-routes.ts +14 -2
- package/src/runtime/routes/skills-routes.ts +43 -14
- package/src/runtime/routes/tool-call-confirmation-enrichment.test.ts +161 -0
- package/src/runtime/routes/tool-call-confirmation-enrichment.ts +107 -0
- package/src/runtime/routes/trust-rules-routes.ts +26 -2
- package/src/runtime/routes/tts-routes.ts +35 -0
- package/src/runtime/routes/types.ts +66 -8
- package/src/runtime/routes/usage-routes.ts +47 -39
- package/src/runtime/routes/webhook-routes.ts +41 -2
- package/src/runtime/routes/workspace-routes.ts +4 -0
- package/src/runtime/services/__tests__/analyze-conversation.test.ts +6 -0
- package/src/runtime/services/analyze-conversation.ts +2 -2
- package/src/schedule/schedule-store.ts +20 -1
- package/src/schedule/schedule-usage-store.ts +83 -0
- package/src/schedule/scheduler.ts +12 -5
- package/src/skills/catalog-files.ts +2 -2
- package/src/skills/catalog-install.ts +3 -0
- package/src/skills/categories-cache.ts +118 -0
- package/src/skills/clawhub-files.ts +1 -2
- package/src/skills/skillssh-files.ts +1 -2
- package/src/telemetry/types.ts +29 -1
- package/src/telemetry/usage-telemetry-reporter.test.ts +112 -3
- package/src/telemetry/usage-telemetry-reporter.ts +57 -2
- package/src/tools/executor.ts +1 -53
- package/src/tools/network/__tests__/web-search-metadata.test.ts +7 -1
- package/src/tools/network/__tests__/web-search.test.ts +11 -3
- package/src/tools/network/web-search-error.test.ts +248 -0
- package/src/tools/network/web-search-error.ts +267 -0
- package/src/tools/network/web-search.ts +207 -48
- package/src/tools/schedule/create.ts +2 -0
- package/src/tools/terminal/safe-env.ts +10 -1
- package/src/tools/ui-surface/definitions.ts +9 -1
- package/src/tts/__tests__/provider-catalog-consistency.test.ts +85 -1
- package/src/tts/provider-catalog.ts +76 -1
- package/src/util/mutex.ts +47 -0
- package/src/workspace/git-service.ts +1 -42
- package/src/workspace/migrations/095-bump-heartbeat-interval-30m-to-60m.ts +51 -0
- package/src/workspace/migrations/096-reduce-quality-profile-effort.ts +72 -0
- package/src/workspace/migrations/097-enable-adaptive-thinking-managed-profiles.ts +93 -0
- package/src/workspace/migrations/registry.ts +6 -0
- package/src/__tests__/bootstrap-turn-cleanup.test.ts +0 -44
- package/src/__tests__/empty-response-pipeline.test.ts +0 -423
- package/src/__tests__/llm-call-pipeline.test.ts +0 -287
- package/src/__tests__/memory-retrieval-pipeline.test.ts +0 -418
- package/src/__tests__/persistence-pipeline.test.ts +0 -503
- package/src/__tests__/title-generate-pipeline.test.ts +0 -211
- package/src/__tests__/token-estimate-pipeline.test.ts +0 -479
- package/src/__tests__/tool-error-pipeline.test.ts +0 -241
- package/src/__tests__/tool-execute-pipeline.test.ts +0 -417
- package/src/__tests__/tool-result-truncate-pipeline.test.ts +0 -341
- package/src/daemon/bootstrap-turn-cleanup.ts +0 -45
- package/src/gallery/default-gallery.ts +0 -1359
- package/src/gallery/gallery-manifest.ts +0 -28
- package/src/home/feature-gate.ts +0 -22
- package/src/plugins/defaults/empty-response/middlewares/emptyResponse.ts +0 -22
- package/src/plugins/defaults/empty-response/terminal.ts +0 -106
- package/src/plugins/defaults/injectors/package.json +0 -15
- package/src/plugins/defaults/llm-call/middlewares/llmCall.ts +0 -17
- package/src/plugins/defaults/llm-call/package.json +0 -15
- package/src/plugins/defaults/llm-call/register.ts +0 -45
- package/src/plugins/defaults/memory-retrieval/middlewares/memoryRetrieval.ts +0 -17
- package/src/plugins/defaults/memory-retrieval/package.json +0 -15
- package/src/plugins/defaults/memory-retrieval/register.ts +0 -181
- package/src/plugins/defaults/persistence/middlewares/persistence.ts +0 -19
- package/src/plugins/defaults/persistence/package.json +0 -15
- package/src/plugins/defaults/persistence/register.ts +0 -38
- package/src/plugins/defaults/persistence/terminal.ts +0 -83
- package/src/plugins/defaults/title-generate/terminal.ts +0 -31
- package/src/plugins/defaults/token-estimate/middlewares/tokenEstimate.ts +0 -23
- package/src/plugins/defaults/token-estimate/package.json +0 -15
- package/src/plugins/defaults/token-estimate/register.ts +0 -34
- package/src/plugins/defaults/token-estimate/terminal.ts +0 -40
- package/src/plugins/defaults/tool-error/middlewares/toolError.ts +0 -21
- package/src/plugins/defaults/tool-error/terminal.ts +0 -47
- package/src/plugins/defaults/tool-execute/middlewares/toolExecute.ts +0 -23
- package/src/plugins/defaults/tool-execute/package.json +0 -15
- package/src/plugins/defaults/tool-execute/register.ts +0 -49
- package/src/plugins/defaults/tool-result-truncate/middlewares/toolResultTruncate.ts +0 -23
- package/src/plugins/defaults/tool-result-truncate/types.ts +0 -22
- package/src/skills/category-inference.ts +0 -111
|
@@ -1,26 +1,18 @@
|
|
|
1
1
|
import { createRequire } from "node:module";
|
|
2
|
-
import {
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
} from "
|
|
11
|
-
|
|
2
|
+
import {
|
|
3
|
+
afterAll,
|
|
4
|
+
beforeEach,
|
|
5
|
+
describe,
|
|
6
|
+
expect,
|
|
7
|
+
mock,
|
|
8
|
+
spyOn,
|
|
9
|
+
test,
|
|
10
|
+
} from "bun:test";
|
|
11
|
+
|
|
12
|
+
import type { LoopToolExecutor } from "../agent/loop.js";
|
|
12
13
|
import type { ServerMessage } from "../daemon/message-protocol.js";
|
|
13
|
-
import { defaultCompactionTerminal } from "../plugins/defaults/compaction/terminal.js";
|
|
14
14
|
import { resetPluginRegistryAndRegisterDefaults } from "../plugins/defaults/index.js";
|
|
15
|
-
import {
|
|
16
|
-
import { getMiddlewaresFor } from "../plugins/registry.js";
|
|
17
|
-
import type {
|
|
18
|
-
CompactionArgs,
|
|
19
|
-
CompactionResult,
|
|
20
|
-
TurnContext,
|
|
21
|
-
} from "../plugins/types.js";
|
|
22
|
-
import { PluginTimeoutError } from "../plugins/types.js";
|
|
23
|
-
import type { ContentBlock, Message } from "../providers/types.js";
|
|
15
|
+
import type { Message, Provider, ToolDefinition } from "../providers/types.js";
|
|
24
16
|
|
|
25
17
|
const conversationCrudRealSnapshot = {
|
|
26
18
|
...(createRequire(import.meta.url)(
|
|
@@ -76,6 +68,7 @@ mock.module("../config/loader.js", () => ({
|
|
|
76
68
|
memory: { retrieval: { scratchpadInjection: { enabled: true } } },
|
|
77
69
|
ui: mockUiConfig,
|
|
78
70
|
compaction: { enabled: true, autoThreshold: 0.7 },
|
|
71
|
+
conversations: { skipAutoRetitling: true },
|
|
79
72
|
}),
|
|
80
73
|
loadRawConfig: () => ({}),
|
|
81
74
|
saveRawConfig: () => {},
|
|
@@ -86,17 +79,20 @@ mock.module("../config/loader.js", () => ({
|
|
|
86
79
|
|
|
87
80
|
// Token estimator returns a small value by default (well within budget)
|
|
88
81
|
// so preflight does not trigger unless the test overrides it. Both the
|
|
89
|
-
// calibrated entry point (`estimatePromptTokens`,
|
|
90
|
-
// path) and the raw entry point
|
|
91
|
-
//
|
|
82
|
+
// calibrated entry point (`estimatePromptTokens`, which backs the preflight
|
|
83
|
+
// overflow gate and the convergence path) and the raw entry point
|
|
84
|
+
// (`estimatePromptTokensRaw`, used by the pre-send calibration capture) are
|
|
92
85
|
// stubbed so either call site can drive the test.
|
|
93
86
|
let mockEstimateTokens = 1000;
|
|
94
87
|
mock.module("../context/token-estimator.js", () => ({
|
|
95
88
|
estimatePromptTokens: () => mockEstimateTokens,
|
|
96
89
|
estimatePromptTokensRaw: () => mockEstimateTokens,
|
|
97
|
-
//
|
|
98
|
-
//
|
|
99
|
-
//
|
|
90
|
+
// The preflight overflow gate calls this calibrated wrapper directly, so it
|
|
91
|
+
// must honor `mockEstimateTokens` too rather than fall through to the real
|
|
92
|
+
// implementation.
|
|
93
|
+
estimatePromptTokensWithTools: () => mockEstimateTokens,
|
|
94
|
+
// Pass-through: `estimatePromptTokensWithTools` computes `toolTokenBudget`
|
|
95
|
+
// via this helper. Return 0 so the mocked estimate is not perturbed.
|
|
100
96
|
estimateToolsTokens: () => 0,
|
|
101
97
|
}));
|
|
102
98
|
|
|
@@ -364,15 +360,6 @@ mock.module("../daemon/conversation-runtime-assembly.js", () => ({
|
|
|
364
360
|
applyRuntimeInjections: applyRuntimeInjectionsMock,
|
|
365
361
|
buildUnifiedTurnContextBlock: buildUnifiedTurnContextBlockMock,
|
|
366
362
|
stripInjectionsForCompaction: (msgs: Message[]) => msgs,
|
|
367
|
-
findLastInjectedNowContent: () => null,
|
|
368
|
-
readNowScratchpad: () => null,
|
|
369
|
-
readPkbContext: () => null,
|
|
370
|
-
getPkbAutoInjectList: () => [
|
|
371
|
-
"INDEX.md",
|
|
372
|
-
"essentials.md",
|
|
373
|
-
"threads.md",
|
|
374
|
-
"buffer.md",
|
|
375
|
-
],
|
|
376
363
|
isSlackChannelConversation: () => false,
|
|
377
364
|
getSlackCompactionWatermarkForPrefix:
|
|
378
365
|
getSlackCompactionWatermarkForPrefixMock,
|
|
@@ -549,195 +536,78 @@ mock.module("../proactive-artifact/index.js", () => ({
|
|
|
549
536
|
|
|
550
537
|
// ── Imports (after mocks) ────────────────────────────────────────────
|
|
551
538
|
|
|
539
|
+
import { AgentLoop } from "../agent/loop.js";
|
|
552
540
|
import {
|
|
553
541
|
type AgentLoopConversationContext,
|
|
554
542
|
applyCompactionResult,
|
|
555
543
|
runAgentLoopImpl,
|
|
556
544
|
} from "../daemon/conversation-agent-loop.js";
|
|
545
|
+
import {
|
|
546
|
+
createMockProvider,
|
|
547
|
+
type ScriptedResponse,
|
|
548
|
+
textResponse,
|
|
549
|
+
toolUseResponse,
|
|
550
|
+
} from "./helpers/mock-provider.js";
|
|
557
551
|
|
|
558
552
|
// ── Test helpers ─────────────────────────────────────────────────────
|
|
559
553
|
|
|
560
|
-
type AgentLoopRun = (
|
|
561
|
-
messages: Message[],
|
|
562
|
-
onEvent: (event: AgentEvent) => void | Promise<void>,
|
|
563
|
-
options?: AgentLoopRunOptions,
|
|
564
|
-
) => Promise<Message[]>;
|
|
565
|
-
|
|
566
|
-
/**
|
|
567
|
-
* Faithful re-implementation of `AgentLoop.compact()` for the mock loop: run
|
|
568
|
-
* the compaction pipeline against the supplied turn context (which carries the
|
|
569
|
-
* test's `contextWindowManager`), invoke the orchestrator-supplied hooks, and
|
|
570
|
-
* return the continuation history — or `null` on timeout/exhaustion so the
|
|
571
|
-
* caller yields "budget".
|
|
572
|
-
*/
|
|
573
|
-
async function simulateInlineCompaction(
|
|
574
|
-
compaction: MidLoopCompaction,
|
|
575
|
-
history: Message[],
|
|
576
|
-
turnContext: TurnContext | undefined,
|
|
577
|
-
signal: AbortSignal | undefined,
|
|
578
|
-
onEvent: (event: AgentEvent) => void | Promise<void>,
|
|
579
|
-
compactionCircuit: CompactionCircuit,
|
|
580
|
-
): Promise<Message[] | null> {
|
|
581
|
-
await onEvent({ type: "context_compacting" });
|
|
582
|
-
const { rawHistory, options } = compaction.prepare(history);
|
|
583
|
-
let result: CompactionResult;
|
|
584
|
-
try {
|
|
585
|
-
result = await runPipeline<CompactionArgs, CompactionResult>(
|
|
586
|
-
"compaction",
|
|
587
|
-
getMiddlewaresFor("compaction"),
|
|
588
|
-
(args) => defaultCompactionTerminal(args, turnContext as TurnContext),
|
|
589
|
-
{ messages: rawHistory, signal, options },
|
|
590
|
-
turnContext as TurnContext,
|
|
591
|
-
DEFAULT_TIMEOUTS.compaction,
|
|
592
|
-
);
|
|
593
|
-
} catch (error) {
|
|
594
|
-
if (error instanceof PluginTimeoutError) {
|
|
595
|
-
await compactionCircuit.recordOutcome(
|
|
596
|
-
{
|
|
597
|
-
currentRequestId: turnContext?.requestId,
|
|
598
|
-
currentTurnTrustContext: turnContext?.trust,
|
|
599
|
-
turnCount: turnContext?.turnIndex ?? 0,
|
|
600
|
-
},
|
|
601
|
-
true,
|
|
602
|
-
onEvent,
|
|
603
|
-
);
|
|
604
|
-
return null;
|
|
605
|
-
}
|
|
606
|
-
throw error;
|
|
607
|
-
}
|
|
608
|
-
const compactResult = result as ContextWindowResult;
|
|
609
|
-
if (compactResult.summaryFailed !== undefined) {
|
|
610
|
-
await compactionCircuit.recordOutcome(
|
|
611
|
-
{
|
|
612
|
-
currentRequestId: turnContext?.requestId,
|
|
613
|
-
currentTurnTrustContext: turnContext?.trust,
|
|
614
|
-
turnCount: turnContext?.turnIndex ?? 0,
|
|
615
|
-
},
|
|
616
|
-
compactResult.summaryFailed,
|
|
617
|
-
onEvent,
|
|
618
|
-
);
|
|
619
|
-
}
|
|
620
|
-
if (compactResult.compacted) {
|
|
621
|
-
await compaction.applyResult(compactResult, rawHistory);
|
|
622
|
-
}
|
|
623
|
-
if (compactResult.exhausted ?? false) {
|
|
624
|
-
return null;
|
|
625
|
-
}
|
|
626
|
-
return compaction.reinject();
|
|
627
|
-
}
|
|
628
|
-
|
|
629
|
-
/**
|
|
630
|
-
* Adapt a `Message[]`-returning mock loop body into `run()`'s real result
|
|
631
|
-
* shape. Mirrors the production loop: the pause-reason carried back is
|
|
632
|
-
* whatever the most recent `onCheckpoint` call yielded with (null when it
|
|
633
|
-
* never yielded), so the orchestrator derives its yield bookkeeping the same
|
|
634
|
-
* way it does against the real loop.
|
|
635
|
-
*/
|
|
636
|
-
const asAgentLoopRun = (
|
|
637
|
-
fn: AgentLoopRun,
|
|
638
|
-
compactionCircuit: CompactionCircuit,
|
|
639
|
-
): ((
|
|
640
|
-
messages: Message[],
|
|
641
|
-
onEvent: (event: AgentEvent) => void | Promise<void>,
|
|
642
|
-
options?: AgentLoopRunOptions,
|
|
643
|
-
) => Promise<AgentLoopRunResult>) => {
|
|
644
|
-
return async (messages, onEvent, options) => {
|
|
645
|
-
let exitReason: AgentLoopRunResult["exitReason"] = null;
|
|
646
|
-
let wrapped = options;
|
|
647
|
-
if (options?.onCheckpoint) {
|
|
648
|
-
const inner = options.onCheckpoint;
|
|
649
|
-
wrapped = {
|
|
650
|
-
...options,
|
|
651
|
-
onCheckpoint: async (info) => {
|
|
652
|
-
// Handoff is offered first, mirroring the loop's ordering.
|
|
653
|
-
const decision = await inner(info);
|
|
654
|
-
if (decision !== "continue") {
|
|
655
|
-
exitReason = decision;
|
|
656
|
-
return decision;
|
|
657
|
-
}
|
|
658
|
-
// The mid-loop budget gate and inline compaction both live inside
|
|
659
|
-
// `AgentLoop.run`. Replicate them here — same formula, stubbed
|
|
660
|
-
// estimator, and the loop's own `compact()` ceremony — so these
|
|
661
|
-
// orchestrator tests drive the real escalation path now that the
|
|
662
|
-
// orchestrator's `onCheckpoint` is handoff-only and compaction runs
|
|
663
|
-
// inline rather than via an orchestrator re-entry loop.
|
|
664
|
-
const contextWindow = options.resolveContextWindow?.();
|
|
665
|
-
if (contextWindow?.overflowRecovery.enabled) {
|
|
666
|
-
const { maxInputTokens, overflowRecovery } = contextWindow;
|
|
667
|
-
const safetyMargin =
|
|
668
|
-
info.history.length > 50
|
|
669
|
-
? Math.max(overflowRecovery.safetyMarginRatio, 0.15)
|
|
670
|
-
: overflowRecovery.safetyMarginRatio;
|
|
671
|
-
const preflightBudget = Math.floor(
|
|
672
|
-
maxInputTokens * (1 - safetyMargin),
|
|
673
|
-
);
|
|
674
|
-
if (mockEstimateTokens > preflightBudget * 0.85) {
|
|
675
|
-
// Mirror `AgentLoop.compact()`: when a compaction path is
|
|
676
|
-
// supplied, run it in place and continue; on timeout or
|
|
677
|
-
// exhaustion it returns null, so the loop yields "budget".
|
|
678
|
-
const compacted = options.compaction
|
|
679
|
-
? await simulateInlineCompaction(
|
|
680
|
-
options.compaction,
|
|
681
|
-
info.history,
|
|
682
|
-
options.turnContext,
|
|
683
|
-
options.signal,
|
|
684
|
-
onEvent,
|
|
685
|
-
compactionCircuit,
|
|
686
|
-
)
|
|
687
|
-
: null;
|
|
688
|
-
if (compacted) {
|
|
689
|
-
exitReason = null;
|
|
690
|
-
return "continue";
|
|
691
|
-
}
|
|
692
|
-
exitReason = "budget";
|
|
693
|
-
return "budget";
|
|
694
|
-
}
|
|
695
|
-
}
|
|
696
|
-
exitReason = null;
|
|
697
|
-
return "continue";
|
|
698
|
-
},
|
|
699
|
-
};
|
|
700
|
-
}
|
|
701
|
-
const history = await fn(messages, onEvent, wrapped);
|
|
702
|
-
return { history, exitReason };
|
|
703
|
-
};
|
|
704
|
-
};
|
|
705
|
-
|
|
706
554
|
function makeCtx(
|
|
707
555
|
overrides?: Partial<AgentLoopConversationContext> & {
|
|
708
|
-
|
|
556
|
+
providerResponses?: ScriptedResponse[];
|
|
557
|
+
loopProvider?: Provider;
|
|
558
|
+
loopTools?: ToolDefinition[];
|
|
559
|
+
toolExecutor?: LoopToolExecutor;
|
|
709
560
|
},
|
|
710
561
|
): AgentLoopConversationContext {
|
|
711
|
-
const
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
562
|
+
const {
|
|
563
|
+
providerResponses,
|
|
564
|
+
loopProvider,
|
|
565
|
+
loopTools,
|
|
566
|
+
toolExecutor,
|
|
567
|
+
...ctxOverrides
|
|
568
|
+
} = overrides ?? {};
|
|
569
|
+
const conversationId = ctxOverrides.conversationId ?? "test-conv";
|
|
570
|
+
let processing = true;
|
|
571
|
+
|
|
572
|
+
// Drive the real `AgentLoop` against a scripted provider, mocking only the
|
|
573
|
+
// provider HTTP boundary. The loop owns its mid-loop budget gate, inline
|
|
574
|
+
// compaction, and event emission, so these orchestrator tests exercise the
|
|
575
|
+
// real escalation/persistence path.
|
|
576
|
+
//
|
|
577
|
+
// Name the loop's provider after `ctx.provider` so the two stay in sync,
|
|
578
|
+
// mirroring production where the orchestrator hands the same provider to
|
|
579
|
+
// the loop. The loop stamps this name onto `usage.actualProvider` whenever
|
|
580
|
+
// a response omits its own, which is what the request-log fallback reads.
|
|
581
|
+
// Tests that need to introspect provider calls (or sequence a rejection)
|
|
582
|
+
// build their own `loopProvider` via `createMockProvider`.
|
|
583
|
+
const loopProviderName =
|
|
584
|
+
(ctxOverrides.provider as { name?: string } | undefined)?.name ??
|
|
585
|
+
"mock-provider";
|
|
586
|
+
const provider =
|
|
587
|
+
loopProvider ??
|
|
588
|
+
createMockProvider(
|
|
589
|
+
providerResponses ?? [textResponse("response")],
|
|
590
|
+
loopProviderName,
|
|
591
|
+
).provider;
|
|
592
|
+
const agentLoop = new AgentLoop(provider, "system prompt", {
|
|
593
|
+
conversationId,
|
|
594
|
+
tools: loopTools ?? [],
|
|
595
|
+
toolExecutor,
|
|
596
|
+
});
|
|
722
597
|
|
|
723
598
|
return {
|
|
724
599
|
conversationId: "test-conv",
|
|
725
600
|
messages: [
|
|
726
601
|
{ role: "user", content: [{ type: "text", text: "Hello" }] },
|
|
727
602
|
] as Message[],
|
|
728
|
-
|
|
603
|
+
isProcessing: () => processing,
|
|
604
|
+
setProcessing: (value: boolean) => {
|
|
605
|
+
processing = value;
|
|
606
|
+
},
|
|
729
607
|
abortController: new AbortController(),
|
|
730
608
|
currentRequestId: "test-req",
|
|
731
609
|
|
|
732
|
-
agentLoop
|
|
733
|
-
run: asAgentLoopRun(agentLoopRun, compactionCircuit),
|
|
734
|
-
getToolTokenBudget: () => 0,
|
|
735
|
-
getResolvedTools: () => [],
|
|
736
|
-
// Tests here don't exercise calibration; returning undefined makes
|
|
737
|
-
// the estimator use the per-provider aggregate key.
|
|
738
|
-
getActiveModel: () => undefined,
|
|
739
|
-
compactionCircuit,
|
|
740
|
-
} as unknown as AgentLoopConversationContext["agentLoop"],
|
|
610
|
+
agentLoop,
|
|
741
611
|
provider: {
|
|
742
612
|
name: "mock-provider",
|
|
743
613
|
sendMessage: async () => ({
|
|
@@ -830,9 +700,10 @@ function makeCtx(
|
|
|
830
700
|
injectedTokens: 0,
|
|
831
701
|
}),
|
|
832
702
|
retrackCachedNodes: () => {},
|
|
703
|
+
recordPkbQueryVectors: () => {},
|
|
833
704
|
} as unknown as AgentLoopConversationContext["graphMemory"],
|
|
834
705
|
|
|
835
|
-
...
|
|
706
|
+
...ctxOverrides,
|
|
836
707
|
} as AgentLoopConversationContext;
|
|
837
708
|
}
|
|
838
709
|
|
|
@@ -970,57 +841,28 @@ describe("session-agent-loop", () => {
|
|
|
970
841
|
mockHasProactiveArtifactCompleted = false;
|
|
971
842
|
mockTryClaimProactiveArtifactTrigger = true;
|
|
972
843
|
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
// emits this from `AgentLoop.run` just before `provider.sendMessage`.
|
|
976
|
-
await onEvent({ type: "llm_call_started" });
|
|
977
|
-
await onEvent({
|
|
978
|
-
type: "message_complete",
|
|
979
|
-
message: {
|
|
980
|
-
role: "assistant",
|
|
981
|
-
content: [{ type: "text", text: "I'll build that app." }],
|
|
982
|
-
},
|
|
983
|
-
});
|
|
984
|
-
await onEvent({
|
|
985
|
-
type: "tool_use",
|
|
986
|
-
id: "tool-1",
|
|
987
|
-
name: "app_create",
|
|
988
|
-
input: { name: "Flow" },
|
|
989
|
-
});
|
|
990
|
-
await onEvent({
|
|
991
|
-
type: "tool_result",
|
|
992
|
-
toolUseId: "tool-1",
|
|
993
|
-
content: "{}",
|
|
994
|
-
isError: false,
|
|
995
|
-
});
|
|
996
|
-
await options?.onCheckpoint?.({
|
|
997
|
-
turnIndex: 0,
|
|
998
|
-
toolCount: 1,
|
|
999
|
-
hasToolUse: true,
|
|
1000
|
-
history: messages,
|
|
1001
|
-
});
|
|
1002
|
-
// Prime the anchor again for LLM call 2 — multi-call agent turns
|
|
1003
|
-
// reserve a fresh assistant row per LLM call.
|
|
1004
|
-
await onEvent({ type: "llm_call_started" });
|
|
1005
|
-
await onEvent({
|
|
1006
|
-
type: "message_complete",
|
|
1007
|
-
message: {
|
|
1008
|
-
role: "assistant",
|
|
1009
|
-
content: [{ type: "text", text: "Done." }],
|
|
1010
|
-
},
|
|
1011
|
-
});
|
|
1012
|
-
return [
|
|
1013
|
-
...messages,
|
|
1014
|
-
{
|
|
1015
|
-
role: "assistant" as const,
|
|
1016
|
-
content: [{ type: "text" as const, text: "Done." }],
|
|
1017
|
-
},
|
|
1018
|
-
];
|
|
1019
|
-
};
|
|
1020
|
-
|
|
844
|
+
// A two-call agent turn: the model invokes `app_create`, then wraps up
|
|
845
|
+
// with a final text reply.
|
|
1021
846
|
const ctx = makeCtx({
|
|
1022
847
|
conversationId: "test-conv",
|
|
1023
|
-
|
|
848
|
+
providerResponses: [
|
|
849
|
+
{
|
|
850
|
+
content: [
|
|
851
|
+
{ type: "text", text: "I'll build that app." },
|
|
852
|
+
{
|
|
853
|
+
type: "tool_use",
|
|
854
|
+
id: "tool-1",
|
|
855
|
+
name: "app_create",
|
|
856
|
+
input: { name: "Flow" },
|
|
857
|
+
},
|
|
858
|
+
],
|
|
859
|
+
model: "mock-model",
|
|
860
|
+
usage: { inputTokens: 10, outputTokens: 5 },
|
|
861
|
+
stopReason: "tool_use",
|
|
862
|
+
},
|
|
863
|
+
textResponse("Done."),
|
|
864
|
+
],
|
|
865
|
+
toolExecutor: async () => ({ content: "{}", isError: false }),
|
|
1024
866
|
});
|
|
1025
867
|
await runAgentLoopImpl(
|
|
1026
868
|
ctx,
|
|
@@ -1156,9 +998,6 @@ describe("session-agent-loop", () => {
|
|
|
1156
998
|
reason: "trusted-contact",
|
|
1157
999
|
};
|
|
1158
1000
|
const events: ServerMessage[] = [];
|
|
1159
|
-
const agentLoopRun = mock(async (_messages: Message[]) => {
|
|
1160
|
-
throw new Error("agent loop should not run");
|
|
1161
|
-
});
|
|
1162
1001
|
const activityStates: unknown[][] = [];
|
|
1163
1002
|
const traceEvents: unknown[][] = [];
|
|
1164
1003
|
const ctx = makeCtx({
|
|
@@ -1171,14 +1010,11 @@ describe("session-agent-loop", () => {
|
|
|
1171
1010
|
},
|
|
1172
1011
|
} as unknown as AgentLoopConversationContext["traceEmitter"],
|
|
1173
1012
|
});
|
|
1174
|
-
ctx.agentLoop
|
|
1175
|
-
agentLoopRun,
|
|
1176
|
-
ctx.agentLoop.compactionCircuit,
|
|
1177
|
-
);
|
|
1013
|
+
const runSpy = spyOn(ctx.agentLoop, "run");
|
|
1178
1014
|
|
|
1179
1015
|
await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
|
|
1180
1016
|
|
|
1181
|
-
expect(
|
|
1017
|
+
expect(runSpy).not.toHaveBeenCalled();
|
|
1182
1018
|
expect(applyRuntimeInjectionsMock).not.toHaveBeenCalled();
|
|
1183
1019
|
expect(activityStates).toContainEqual([
|
|
1184
1020
|
"idle",
|
|
@@ -1238,7 +1074,7 @@ describe("session-agent-loop", () => {
|
|
|
1238
1074
|
});
|
|
1239
1075
|
|
|
1240
1076
|
expect(applyRuntimeInjectionsMock).not.toHaveBeenCalled();
|
|
1241
|
-
expect(ctx.
|
|
1077
|
+
expect(ctx.isProcessing()).toBe(false);
|
|
1242
1078
|
expect(ctx.abortController).toBeNull();
|
|
1243
1079
|
expect(ctx.currentRequestId).toBeUndefined();
|
|
1244
1080
|
expect(drainQueue).toHaveBeenCalledWith("loop_complete");
|
|
@@ -1254,47 +1090,14 @@ describe("session-agent-loop", () => {
|
|
|
1254
1090
|
test("error events from agent loop are classified and emitted", async () => {
|
|
1255
1091
|
const events: ServerMessage[] = [];
|
|
1256
1092
|
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
name: "bash",
|
|
1266
|
-
input: { cmd: "ls" },
|
|
1267
|
-
});
|
|
1268
|
-
onEvent({
|
|
1269
|
-
type: "error",
|
|
1270
|
-
error: new Error("Tool execution failed: permission denied"),
|
|
1271
|
-
});
|
|
1272
|
-
onEvent({
|
|
1273
|
-
type: "message_complete",
|
|
1274
|
-
message: {
|
|
1275
|
-
role: "assistant",
|
|
1276
|
-
content: [{ type: "text", text: "I encountered an error" }],
|
|
1277
|
-
},
|
|
1278
|
-
});
|
|
1279
|
-
onEvent({
|
|
1280
|
-
type: "usage",
|
|
1281
|
-
inputTokens: 100,
|
|
1282
|
-
outputTokens: 50,
|
|
1283
|
-
model: "test-model",
|
|
1284
|
-
providerDurationMs: 200,
|
|
1285
|
-
});
|
|
1286
|
-
return [
|
|
1287
|
-
...messages,
|
|
1288
|
-
{
|
|
1289
|
-
role: "assistant" as const,
|
|
1290
|
-
content: [
|
|
1291
|
-
{ type: "text", text: "I encountered an error" },
|
|
1292
|
-
] as ContentBlock[],
|
|
1293
|
-
},
|
|
1294
|
-
];
|
|
1295
|
-
};
|
|
1296
|
-
|
|
1297
|
-
const ctx = makeCtx({ agentLoopRun });
|
|
1093
|
+
// The model calls a tool whose executor throws, surfacing an `error`
|
|
1094
|
+
// event from the loop's catch handler.
|
|
1095
|
+
const ctx = makeCtx({
|
|
1096
|
+
providerResponses: [toolUseResponse("tu-1", "bash", { cmd: "ls" })],
|
|
1097
|
+
toolExecutor: async () => {
|
|
1098
|
+
throw new Error("Tool execution failed: permission denied");
|
|
1099
|
+
},
|
|
1100
|
+
});
|
|
1298
1101
|
await runAgentLoopImpl(ctx, "run ls", "msg-1", (msg) => events.push(msg));
|
|
1299
1102
|
|
|
1300
1103
|
const conversationError = events.find(
|
|
@@ -1306,34 +1109,9 @@ describe("session-agent-loop", () => {
|
|
|
1306
1109
|
test("non-error agent loop completion does not emit conversation_error", async () => {
|
|
1307
1110
|
const events: ServerMessage[] = [];
|
|
1308
1111
|
|
|
1309
|
-
const
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
await onEvent({ type: "llm_call_started" });
|
|
1313
|
-
onEvent({
|
|
1314
|
-
type: "message_complete",
|
|
1315
|
-
message: {
|
|
1316
|
-
role: "assistant",
|
|
1317
|
-
content: [{ type: "text", text: "All good" }],
|
|
1318
|
-
},
|
|
1319
|
-
});
|
|
1320
|
-
onEvent({
|
|
1321
|
-
type: "usage",
|
|
1322
|
-
inputTokens: 50,
|
|
1323
|
-
outputTokens: 25,
|
|
1324
|
-
model: "test-model",
|
|
1325
|
-
providerDurationMs: 100,
|
|
1326
|
-
});
|
|
1327
|
-
return [
|
|
1328
|
-
...messages,
|
|
1329
|
-
{
|
|
1330
|
-
role: "assistant" as const,
|
|
1331
|
-
content: [{ type: "text", text: "All good" }] as ContentBlock[],
|
|
1332
|
-
},
|
|
1333
|
-
];
|
|
1334
|
-
};
|
|
1335
|
-
|
|
1336
|
-
const ctx = makeCtx({ agentLoopRun });
|
|
1112
|
+
const ctx = makeCtx({
|
|
1113
|
+
providerResponses: [textResponse("All good")],
|
|
1114
|
+
});
|
|
1337
1115
|
await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
|
|
1338
1116
|
|
|
1339
1117
|
const conversationError = events.find(
|
|
@@ -1369,38 +1147,20 @@ describe("session-agent-loop", () => {
|
|
|
1369
1147
|
},
|
|
1370
1148
|
};
|
|
1371
1149
|
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
onEvent({
|
|
1377
|
-
type: "message_complete",
|
|
1378
|
-
message: {
|
|
1379
|
-
role: "assistant",
|
|
1380
|
-
content: [{ type: "text", text: "Hi there." }],
|
|
1381
|
-
},
|
|
1382
|
-
});
|
|
1383
|
-
onEvent({
|
|
1384
|
-
type: "usage",
|
|
1385
|
-
inputTokens: 12,
|
|
1386
|
-
outputTokens: 3,
|
|
1387
|
-
model: "gpt-4.1-2026-03-01",
|
|
1388
|
-
actualProvider: "fireworks",
|
|
1389
|
-
providerDurationMs: 45,
|
|
1390
|
-
rawRequest,
|
|
1391
|
-
rawResponse,
|
|
1392
|
-
});
|
|
1393
|
-
return [
|
|
1394
|
-
...messages,
|
|
1150
|
+
// The provider response carries its own `actualProvider`, so the logged
|
|
1151
|
+
// row should record that name rather than the runtime provider.
|
|
1152
|
+
const ctx = makeCtx({
|
|
1153
|
+
providerResponses: [
|
|
1395
1154
|
{
|
|
1396
|
-
|
|
1397
|
-
|
|
1155
|
+
content: [{ type: "text", text: "Hi there." }],
|
|
1156
|
+
model: "gpt-4.1-2026-03-01",
|
|
1157
|
+
usage: { inputTokens: 12, outputTokens: 3 },
|
|
1158
|
+
stopReason: "end_turn",
|
|
1159
|
+
actualProvider: "fireworks",
|
|
1160
|
+
rawRequest,
|
|
1161
|
+
rawResponse,
|
|
1398
1162
|
},
|
|
1399
|
-
]
|
|
1400
|
-
};
|
|
1401
|
-
|
|
1402
|
-
const ctx = makeCtx({
|
|
1403
|
-
agentLoopRun,
|
|
1163
|
+
],
|
|
1404
1164
|
provider: {
|
|
1405
1165
|
name: "openrouter",
|
|
1406
1166
|
sendMessage: async () => ({
|
|
@@ -1437,37 +1197,19 @@ describe("session-agent-loop", () => {
|
|
|
1437
1197
|
],
|
|
1438
1198
|
};
|
|
1439
1199
|
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
onEvent({
|
|
1445
|
-
type: "message_complete",
|
|
1446
|
-
message: {
|
|
1447
|
-
role: "assistant",
|
|
1448
|
-
content: [{ type: "text", text: "Hi there." }],
|
|
1449
|
-
},
|
|
1450
|
-
});
|
|
1451
|
-
onEvent({
|
|
1452
|
-
type: "usage",
|
|
1453
|
-
inputTokens: 12,
|
|
1454
|
-
outputTokens: 3,
|
|
1455
|
-
model: "gpt-4.1-2026-03-01",
|
|
1456
|
-
providerDurationMs: 45,
|
|
1457
|
-
rawRequest,
|
|
1458
|
-
rawResponse,
|
|
1459
|
-
});
|
|
1460
|
-
return [
|
|
1461
|
-
...messages,
|
|
1200
|
+
// The provider response omits `actualProvider`, so the loop stamps the
|
|
1201
|
+
// runtime provider name onto the usage event and the row records it.
|
|
1202
|
+
const ctx = makeCtx({
|
|
1203
|
+
providerResponses: [
|
|
1462
1204
|
{
|
|
1463
|
-
|
|
1464
|
-
|
|
1205
|
+
content: [{ type: "text", text: "Hi there." }],
|
|
1206
|
+
model: "gpt-4.1-2026-03-01",
|
|
1207
|
+
usage: { inputTokens: 12, outputTokens: 3 },
|
|
1208
|
+
stopReason: "end_turn",
|
|
1209
|
+
rawRequest,
|
|
1210
|
+
rawResponse,
|
|
1465
1211
|
},
|
|
1466
|
-
]
|
|
1467
|
-
};
|
|
1468
|
-
|
|
1469
|
-
const ctx = makeCtx({
|
|
1470
|
-
agentLoopRun,
|
|
1212
|
+
],
|
|
1471
1213
|
provider: {
|
|
1472
1214
|
name: "openrouter",
|
|
1473
1215
|
sendMessage: async () => ({
|
|
@@ -1522,38 +1264,18 @@ describe("session-agent-loop", () => {
|
|
|
1522
1264
|
status: "completed",
|
|
1523
1265
|
};
|
|
1524
1266
|
|
|
1525
|
-
const
|
|
1526
|
-
|
|
1527
|
-
// `AgentLoop.run` just before `provider.sendMessage`.
|
|
1528
|
-
await onEvent({ type: "llm_call_started" });
|
|
1529
|
-
onEvent({
|
|
1530
|
-
type: "message_complete",
|
|
1531
|
-
message: {
|
|
1532
|
-
role: "assistant",
|
|
1533
|
-
content: [{ type: "text", text: "Hi there." }],
|
|
1534
|
-
},
|
|
1535
|
-
});
|
|
1536
|
-
onEvent({
|
|
1537
|
-
type: "usage",
|
|
1538
|
-
inputTokens: 12,
|
|
1539
|
-
outputTokens: 3,
|
|
1540
|
-
model: "gpt-5.4",
|
|
1541
|
-
actualProvider: "openai",
|
|
1542
|
-
providerDurationMs: 45,
|
|
1543
|
-
rawRequest,
|
|
1544
|
-
rawResponse,
|
|
1545
|
-
});
|
|
1546
|
-
return [
|
|
1547
|
-
...messages,
|
|
1267
|
+
const ctx = makeCtx({
|
|
1268
|
+
providerResponses: [
|
|
1548
1269
|
{
|
|
1549
|
-
|
|
1550
|
-
|
|
1270
|
+
content: [{ type: "text", text: "Hi there." }],
|
|
1271
|
+
model: "gpt-5.4",
|
|
1272
|
+
usage: { inputTokens: 12, outputTokens: 3 },
|
|
1273
|
+
stopReason: "end_turn",
|
|
1274
|
+
actualProvider: "openai",
|
|
1275
|
+
rawRequest,
|
|
1276
|
+
rawResponse,
|
|
1551
1277
|
},
|
|
1552
|
-
]
|
|
1553
|
-
};
|
|
1554
|
-
|
|
1555
|
-
const ctx = makeCtx({
|
|
1556
|
-
agentLoopRun,
|
|
1278
|
+
],
|
|
1557
1279
|
provider: {
|
|
1558
1280
|
name: "openai",
|
|
1559
1281
|
sendMessage: async () => ({
|
|
@@ -1593,37 +1315,17 @@ describe("session-agent-loop", () => {
|
|
|
1593
1315
|
attrs: Record<string, unknown>;
|
|
1594
1316
|
}> = [];
|
|
1595
1317
|
|
|
1596
|
-
const
|
|
1597
|
-
//
|
|
1598
|
-
|
|
1599
|
-
await onEvent({ type: "llm_call_started" });
|
|
1600
|
-
onEvent({ type: "text_delta", text: "Hi." });
|
|
1601
|
-
onEvent({
|
|
1602
|
-
type: "message_complete",
|
|
1603
|
-
message: {
|
|
1604
|
-
role: "assistant",
|
|
1605
|
-
content: [{ type: "text", text: "Hi." }],
|
|
1606
|
-
},
|
|
1607
|
-
});
|
|
1608
|
-
onEvent({
|
|
1609
|
-
type: "usage",
|
|
1610
|
-
inputTokens: 10,
|
|
1611
|
-
outputTokens: 2,
|
|
1612
|
-
model: "gpt-5.5-2026-04-23",
|
|
1613
|
-
actualProvider: "openai",
|
|
1614
|
-
providerDurationMs: 100,
|
|
1615
|
-
});
|
|
1616
|
-
return [
|
|
1617
|
-
...messages,
|
|
1318
|
+
const ctx = makeCtx({
|
|
1319
|
+
// The loop replays the text block as a `text_delta` before `usage`.
|
|
1320
|
+
providerResponses: [
|
|
1618
1321
|
{
|
|
1619
|
-
|
|
1620
|
-
|
|
1322
|
+
content: [{ type: "text", text: "Hi." }],
|
|
1323
|
+
model: "gpt-5.5-2026-04-23",
|
|
1324
|
+
usage: { inputTokens: 10, outputTokens: 2 },
|
|
1325
|
+
stopReason: "end_turn",
|
|
1326
|
+
actualProvider: "openai",
|
|
1621
1327
|
},
|
|
1622
|
-
]
|
|
1623
|
-
};
|
|
1624
|
-
|
|
1625
|
-
const ctx = makeCtx({
|
|
1626
|
-
agentLoopRun,
|
|
1328
|
+
],
|
|
1627
1329
|
// Provider name matches actualProvider so both paths agree.
|
|
1628
1330
|
provider: {
|
|
1629
1331
|
name: "openai",
|
|
@@ -1671,31 +1373,18 @@ describe("session-agent-loop", () => {
|
|
|
1671
1373
|
attrs: Record<string, unknown>;
|
|
1672
1374
|
}> = [];
|
|
1673
1375
|
|
|
1674
|
-
const
|
|
1675
|
-
//
|
|
1676
|
-
//
|
|
1677
|
-
|
|
1678
|
-
|
|
1679
|
-
onEvent({
|
|
1680
|
-
type: "message_complete",
|
|
1681
|
-
message: {
|
|
1682
|
-
role: "assistant",
|
|
1376
|
+
const ctx = makeCtx({
|
|
1377
|
+
// An empty-content response: no text block fires `text_delta`, so the
|
|
1378
|
+
// started event falls back to the resolved usage provider name.
|
|
1379
|
+
providerResponses: [
|
|
1380
|
+
{
|
|
1683
1381
|
content: [],
|
|
1382
|
+
model: "gpt-5.5-2026-04-23",
|
|
1383
|
+
usage: { inputTokens: 10, outputTokens: 2 },
|
|
1384
|
+
stopReason: "end_turn",
|
|
1385
|
+
actualProvider: "openai",
|
|
1684
1386
|
},
|
|
1685
|
-
|
|
1686
|
-
onEvent({
|
|
1687
|
-
type: "usage",
|
|
1688
|
-
inputTokens: 10,
|
|
1689
|
-
outputTokens: 2,
|
|
1690
|
-
model: "gpt-5.5-2026-04-23",
|
|
1691
|
-
actualProvider: "openai",
|
|
1692
|
-
providerDurationMs: 100,
|
|
1693
|
-
});
|
|
1694
|
-
return messages;
|
|
1695
|
-
};
|
|
1696
|
-
|
|
1697
|
-
const ctx = makeCtx({
|
|
1698
|
-
agentLoopRun,
|
|
1387
|
+
],
|
|
1699
1388
|
provider: {
|
|
1700
1389
|
name: "anthropic",
|
|
1701
1390
|
sendMessage: async () => ({
|
|
@@ -1737,52 +1426,32 @@ describe("session-agent-loop", () => {
|
|
|
1737
1426
|
test("records the actual provider for usage accounting", async () => {
|
|
1738
1427
|
const events: ServerMessage[] = [];
|
|
1739
1428
|
|
|
1740
|
-
const
|
|
1741
|
-
|
|
1742
|
-
|
|
1743
|
-
await onEvent({ type: "llm_call_started" });
|
|
1744
|
-
onEvent({
|
|
1745
|
-
type: "message_complete",
|
|
1746
|
-
message: {
|
|
1747
|
-
role: "assistant",
|
|
1429
|
+
const ctx = makeCtx({
|
|
1430
|
+
providerResponses: [
|
|
1431
|
+
{
|
|
1748
1432
|
content: [{ type: "text", text: "Hi there." }],
|
|
1749
|
-
},
|
|
1750
|
-
});
|
|
1751
|
-
onEvent({
|
|
1752
|
-
type: "usage",
|
|
1753
|
-
inputTokens: 12,
|
|
1754
|
-
outputTokens: 3,
|
|
1755
|
-
model: "gpt-4.1-2026-03-01",
|
|
1756
|
-
actualProvider: "fireworks",
|
|
1757
|
-
providerDurationMs: 45,
|
|
1758
|
-
rawRequest: {
|
|
1759
|
-
model: "gpt-4.1",
|
|
1760
|
-
messages: [{ role: "user", content: "Hello" }],
|
|
1761
|
-
},
|
|
1762
|
-
rawResponse: {
|
|
1763
1433
|
model: "gpt-4.1-2026-03-01",
|
|
1764
|
-
|
|
1765
|
-
|
|
1766
|
-
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
|
|
1434
|
+
usage: { inputTokens: 12, outputTokens: 3 },
|
|
1435
|
+
stopReason: "end_turn",
|
|
1436
|
+
actualProvider: "fireworks",
|
|
1437
|
+
rawRequest: {
|
|
1438
|
+
model: "gpt-4.1",
|
|
1439
|
+
messages: [{ role: "user", content: "Hello" }],
|
|
1440
|
+
},
|
|
1441
|
+
rawResponse: {
|
|
1442
|
+
model: "gpt-4.1-2026-03-01",
|
|
1443
|
+
choices: [
|
|
1444
|
+
{
|
|
1445
|
+
finish_reason: "stop",
|
|
1446
|
+
message: {
|
|
1447
|
+
role: "assistant",
|
|
1448
|
+
content: "Hi there.",
|
|
1449
|
+
},
|
|
1770
1450
|
},
|
|
1771
|
-
|
|
1772
|
-
|
|
1773
|
-
},
|
|
1774
|
-
});
|
|
1775
|
-
return [
|
|
1776
|
-
...messages,
|
|
1777
|
-
{
|
|
1778
|
-
role: "assistant" as const,
|
|
1779
|
-
content: [{ type: "text", text: "Hi there." }] as ContentBlock[],
|
|
1451
|
+
],
|
|
1452
|
+
},
|
|
1780
1453
|
},
|
|
1781
|
-
]
|
|
1782
|
-
};
|
|
1783
|
-
|
|
1784
|
-
const ctx = makeCtx({
|
|
1785
|
-
agentLoopRun,
|
|
1454
|
+
],
|
|
1786
1455
|
provider: {
|
|
1787
1456
|
name: "openrouter",
|
|
1788
1457
|
sendMessage: async () => ({
|
|
@@ -1852,27 +1521,9 @@ describe("session-agent-loop", () => {
|
|
|
1852
1521
|
},
|
|
1853
1522
|
});
|
|
1854
1523
|
|
|
1855
|
-
|
|
1856
|
-
|
|
1857
|
-
|
|
1858
|
-
await onEvent({ type: "llm_call_started" });
|
|
1859
|
-
onEvent({
|
|
1860
|
-
type: "message_complete",
|
|
1861
|
-
message: {
|
|
1862
|
-
role: "assistant",
|
|
1863
|
-
content: [{ type: "text", text: "recovered" }],
|
|
1864
|
-
},
|
|
1865
|
-
});
|
|
1866
|
-
return [
|
|
1867
|
-
...messages,
|
|
1868
|
-
{
|
|
1869
|
-
role: "assistant" as const,
|
|
1870
|
-
content: [{ type: "text", text: "recovered" }] as ContentBlock[],
|
|
1871
|
-
},
|
|
1872
|
-
];
|
|
1873
|
-
};
|
|
1874
|
-
|
|
1875
|
-
const ctx = makeCtx({ agentLoopRun });
|
|
1524
|
+
// After the orchestrator's preflight compaction runs, the loop completes
|
|
1525
|
+
// the turn normally.
|
|
1526
|
+
const ctx = makeCtx({ providerResponses: [textResponse("recovered")] });
|
|
1876
1527
|
await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
|
|
1877
1528
|
|
|
1878
1529
|
const compactorCall = recordUsageMock.mock.calls.find(
|
|
@@ -1911,7 +1562,6 @@ describe("session-agent-loop", () => {
|
|
|
1911
1562
|
|
|
1912
1563
|
test("convergence loop applies reducer and retries when context-too-large is detected", async () => {
|
|
1913
1564
|
const events: ServerMessage[] = [];
|
|
1914
|
-
let callCount = 0;
|
|
1915
1565
|
let reducerCalled = false;
|
|
1916
1566
|
|
|
1917
1567
|
// Configure reducer to succeed on first call — return reduced messages
|
|
@@ -1945,53 +1595,15 @@ describe("session-agent-loop", () => {
|
|
|
1945
1595
|
};
|
|
1946
1596
|
};
|
|
1947
1597
|
|
|
1948
|
-
|
|
1949
|
-
|
|
1950
|
-
|
|
1951
|
-
|
|
1952
|
-
|
|
1953
|
-
|
|
1954
|
-
callCount++;
|
|
1955
|
-
if (callCount === 1) {
|
|
1956
|
-
onEvent({
|
|
1957
|
-
type: "error",
|
|
1958
|
-
error: new Error("context_length_exceeded"),
|
|
1959
|
-
});
|
|
1960
|
-
onEvent({
|
|
1961
|
-
type: "usage",
|
|
1962
|
-
inputTokens: 100,
|
|
1963
|
-
outputTokens: 0,
|
|
1964
|
-
model: "test-model",
|
|
1965
|
-
providerDurationMs: 50,
|
|
1966
|
-
});
|
|
1967
|
-
return messages;
|
|
1968
|
-
}
|
|
1969
|
-
// Second call (after reducer): succeed
|
|
1970
|
-
onEvent({
|
|
1971
|
-
type: "message_complete",
|
|
1972
|
-
message: {
|
|
1973
|
-
role: "assistant",
|
|
1974
|
-
content: [{ type: "text", text: "recovered" }],
|
|
1975
|
-
},
|
|
1976
|
-
});
|
|
1977
|
-
onEvent({
|
|
1978
|
-
type: "usage",
|
|
1979
|
-
inputTokens: 50,
|
|
1980
|
-
outputTokens: 25,
|
|
1981
|
-
model: "test-model",
|
|
1982
|
-
providerDurationMs: 100,
|
|
1983
|
-
});
|
|
1984
|
-
return [
|
|
1985
|
-
...messages,
|
|
1986
|
-
{
|
|
1987
|
-
role: "assistant" as const,
|
|
1988
|
-
content: [{ type: "text", text: "recovered" }] as ContentBlock[],
|
|
1989
|
-
},
|
|
1990
|
-
];
|
|
1991
|
-
};
|
|
1598
|
+
// The provider rejects the first call with a context-too-large error,
|
|
1599
|
+
// then succeeds once the orchestrator has reduced the context.
|
|
1600
|
+
const { provider, calls } = createMockProvider([
|
|
1601
|
+
new Error("context_length_exceeded"),
|
|
1602
|
+
textResponse("recovered"),
|
|
1603
|
+
]);
|
|
1992
1604
|
|
|
1993
1605
|
const ctx = makeCtx({
|
|
1994
|
-
|
|
1606
|
+
loopProvider: provider,
|
|
1995
1607
|
contextWindowManager: {
|
|
1996
1608
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
1997
1609
|
maybeCompact: async () => ({ compacted: false }),
|
|
@@ -2001,7 +1613,7 @@ describe("session-agent-loop", () => {
|
|
|
2001
1613
|
await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
|
|
2002
1614
|
|
|
2003
1615
|
expect(reducerCalled).toBe(true);
|
|
2004
|
-
expect(
|
|
1616
|
+
expect(calls.length).toBe(2);
|
|
2005
1617
|
const compactEvent = events.find((e) => e.type === "context_compacted");
|
|
2006
1618
|
expect(compactEvent).toBeDefined();
|
|
2007
1619
|
});
|
|
@@ -2009,23 +1621,10 @@ describe("session-agent-loop", () => {
|
|
|
2009
1621
|
test("emits conversation_error when context stays too large after all recovery attempts", async () => {
|
|
2010
1622
|
const events: ServerMessage[] = [];
|
|
2011
1623
|
|
|
2012
|
-
|
|
2013
|
-
|
|
2014
|
-
type: "error",
|
|
2015
|
-
error: new Error("context_length_exceeded"),
|
|
2016
|
-
});
|
|
2017
|
-
onEvent({
|
|
2018
|
-
type: "usage",
|
|
2019
|
-
inputTokens: 100,
|
|
2020
|
-
outputTokens: 0,
|
|
2021
|
-
model: "test-model",
|
|
2022
|
-
providerDurationMs: 50,
|
|
2023
|
-
});
|
|
2024
|
-
return messages;
|
|
2025
|
-
};
|
|
2026
|
-
|
|
1624
|
+
// The provider rejects every call with a context-too-large error, so the
|
|
1625
|
+
// orchestrator exhausts its recovery attempts.
|
|
2027
1626
|
const ctx = makeCtx({
|
|
2028
|
-
|
|
1627
|
+
providerResponses: [new Error("context_length_exceeded")],
|
|
2029
1628
|
contextWindowManager: {
|
|
2030
1629
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
2031
1630
|
// Compaction succeeds but context is still too large
|
|
@@ -2059,7 +1658,6 @@ describe("session-agent-loop", () => {
|
|
|
2059
1658
|
|
|
2060
1659
|
test("bounded convergence loop applies reducer tiers and recovers", async () => {
|
|
2061
1660
|
const events: ServerMessage[] = [];
|
|
2062
|
-
let callCount = 0;
|
|
2063
1661
|
let reducerCalls = 0;
|
|
2064
1662
|
|
|
2065
1663
|
// Reducer: succeed on first call, returning reduced messages
|
|
@@ -2077,55 +1675,15 @@ describe("session-agent-loop", () => {
|
|
|
2077
1675
|
};
|
|
2078
1676
|
};
|
|
2079
1677
|
|
|
2080
|
-
|
|
2081
|
-
|
|
2082
|
-
|
|
2083
|
-
|
|
2084
|
-
|
|
2085
|
-
|
|
2086
|
-
callCount++;
|
|
2087
|
-
if (callCount === 1) {
|
|
2088
|
-
onEvent({
|
|
2089
|
-
type: "error",
|
|
2090
|
-
error: new Error("context_length_exceeded"),
|
|
2091
|
-
});
|
|
2092
|
-
onEvent({
|
|
2093
|
-
type: "usage",
|
|
2094
|
-
inputTokens: 100,
|
|
2095
|
-
outputTokens: 0,
|
|
2096
|
-
model: "test-model",
|
|
2097
|
-
providerDurationMs: 50,
|
|
2098
|
-
});
|
|
2099
|
-
return messages;
|
|
2100
|
-
}
|
|
2101
|
-
// After reducer runs, succeed
|
|
2102
|
-
onEvent({
|
|
2103
|
-
type: "message_complete",
|
|
2104
|
-
message: {
|
|
2105
|
-
role: "assistant",
|
|
2106
|
-
content: [{ type: "text", text: "recovered via convergence" }],
|
|
2107
|
-
},
|
|
2108
|
-
});
|
|
2109
|
-
onEvent({
|
|
2110
|
-
type: "usage",
|
|
2111
|
-
inputTokens: 50,
|
|
2112
|
-
outputTokens: 25,
|
|
2113
|
-
model: "test-model",
|
|
2114
|
-
providerDurationMs: 100,
|
|
2115
|
-
});
|
|
2116
|
-
return [
|
|
2117
|
-
...messages,
|
|
2118
|
-
{
|
|
2119
|
-
role: "assistant" as const,
|
|
2120
|
-
content: [
|
|
2121
|
-
{ type: "text", text: "recovered via convergence" },
|
|
2122
|
-
] as ContentBlock[],
|
|
2123
|
-
},
|
|
2124
|
-
];
|
|
2125
|
-
};
|
|
1678
|
+
// The provider rejects the first call with a context-too-large error,
|
|
1679
|
+
// then succeeds once the orchestrator has reduced the context.
|
|
1680
|
+
const { provider, calls } = createMockProvider([
|
|
1681
|
+
new Error("context_length_exceeded"),
|
|
1682
|
+
textResponse("recovered via convergence"),
|
|
1683
|
+
]);
|
|
2126
1684
|
|
|
2127
1685
|
const ctx = makeCtx({
|
|
2128
|
-
|
|
1686
|
+
loopProvider: provider,
|
|
2129
1687
|
contextWindowManager: {
|
|
2130
1688
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
2131
1689
|
maybeCompact: async () => ({ compacted: false }),
|
|
@@ -2135,7 +1693,7 @@ describe("session-agent-loop", () => {
|
|
|
2135
1693
|
await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
|
|
2136
1694
|
|
|
2137
1695
|
expect(reducerCalls).toBeGreaterThanOrEqual(1);
|
|
2138
|
-
expect(
|
|
1696
|
+
expect(calls.length).toBe(2);
|
|
2139
1697
|
const conversationError = events.find(
|
|
2140
1698
|
(e) => e.type === "conversation_error",
|
|
2141
1699
|
);
|
|
@@ -2146,7 +1704,6 @@ describe("session-agent-loop", () => {
|
|
|
2146
1704
|
|
|
2147
1705
|
test("non-interactive auto-compress continues without approval prompt", async () => {
|
|
2148
1706
|
const events: ServerMessage[] = [];
|
|
2149
|
-
let callCount = 0;
|
|
2150
1707
|
|
|
2151
1708
|
// Reducer exhausts all tiers
|
|
2152
1709
|
mockReducerStepFn = (msgs: Message[]) => ({
|
|
@@ -2167,54 +1724,14 @@ describe("session-agent-loop", () => {
|
|
|
2167
1724
|
|
|
2168
1725
|
mockOverflowAction = "auto_compress_latest_turn";
|
|
2169
1726
|
|
|
2170
|
-
|
|
2171
|
-
|
|
2172
|
-
// `AgentLoop.run` just before `provider.sendMessage`. Retry branches
|
|
2173
|
-
// need this on every invocation: each agent-loop iteration reserves
|
|
2174
|
-
// its own row.
|
|
2175
|
-
await onEvent({ type: "llm_call_started" });
|
|
2176
|
-
callCount++;
|
|
2177
|
-
if (callCount <= 2) {
|
|
2178
|
-
onEvent({
|
|
2179
|
-
type: "error",
|
|
2180
|
-
error: new Error("context_length_exceeded"),
|
|
2181
|
-
});
|
|
2182
|
-
onEvent({
|
|
2183
|
-
type: "usage",
|
|
2184
|
-
inputTokens: 100,
|
|
2185
|
-
outputTokens: 0,
|
|
2186
|
-
model: "test-model",
|
|
2187
|
-
providerDurationMs: 50,
|
|
2188
|
-
});
|
|
2189
|
-
return messages;
|
|
2190
|
-
}
|
|
2191
|
-
onEvent({
|
|
2192
|
-
type: "message_complete",
|
|
2193
|
-
message: {
|
|
2194
|
-
role: "assistant",
|
|
2195
|
-
content: [{ type: "text", text: "auto-recovered" }],
|
|
2196
|
-
},
|
|
2197
|
-
});
|
|
2198
|
-
onEvent({
|
|
2199
|
-
type: "usage",
|
|
2200
|
-
inputTokens: 50,
|
|
2201
|
-
outputTokens: 25,
|
|
2202
|
-
model: "test-model",
|
|
2203
|
-
providerDurationMs: 100,
|
|
2204
|
-
});
|
|
2205
|
-
return [
|
|
2206
|
-
...messages,
|
|
2207
|
-
{
|
|
2208
|
-
role: "assistant" as const,
|
|
2209
|
-
content: [
|
|
2210
|
-
{ type: "text", text: "auto-recovered" },
|
|
2211
|
-
] as ContentBlock[],
|
|
2212
|
-
},
|
|
2213
|
-
];
|
|
2214
|
-
};
|
|
2215
|
-
|
|
1727
|
+
// The provider rejects the first two calls with context-too-large errors,
|
|
1728
|
+
// then succeeds after the emergency auto-compress runs.
|
|
2216
1729
|
const ctx = makeCtx({
|
|
2217
|
-
|
|
1730
|
+
providerResponses: [
|
|
1731
|
+
new Error("context_length_exceeded"),
|
|
1732
|
+
new Error("context_length_exceeded"),
|
|
1733
|
+
textResponse("auto-recovered"),
|
|
1734
|
+
],
|
|
2218
1735
|
hasNoClient: true,
|
|
2219
1736
|
contextWindowManager: {
|
|
2220
1737
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
@@ -2261,7 +1778,6 @@ describe("session-agent-loop", () => {
|
|
|
2261
1778
|
// `budget_yield_unrecovered` so the inspector and dashboards can
|
|
2262
1779
|
// attribute the silent stall.
|
|
2263
1780
|
const events: ServerMessage[] = [];
|
|
2264
|
-
let callCount = 0;
|
|
2265
1781
|
|
|
2266
1782
|
// Reducer exhausts all 4 tiers on first call so the convergence
|
|
2267
1783
|
// loop runs exactly one iteration before falling through to
|
|
@@ -2292,43 +1808,30 @@ describe("session-agent-loop", () => {
|
|
|
2292
1808
|
// call). 90k satisfies both so the path reaches call 3.
|
|
2293
1809
|
mockEstimateTokens = 90_000;
|
|
2294
1810
|
|
|
2295
|
-
|
|
2296
|
-
|
|
2297
|
-
|
|
2298
|
-
|
|
2299
|
-
|
|
2300
|
-
|
|
2301
|
-
|
|
2302
|
-
type: "error",
|
|
2303
|
-
error: new Error("context_length_exceeded"),
|
|
2304
|
-
});
|
|
2305
|
-
onEvent({
|
|
2306
|
-
type: "usage",
|
|
2307
|
-
inputTokens: 100,
|
|
2308
|
-
outputTokens: 0,
|
|
2309
|
-
model: "test-model",
|
|
2310
|
-
providerDurationMs: 50,
|
|
2311
|
-
});
|
|
2312
|
-
return messages;
|
|
2313
|
-
}
|
|
2314
|
-
// Call 3: the auto_compress_latest_turn rerun. Invoke
|
|
2315
|
-
// onCheckpoint so the orchestrator's mid-loop budget check
|
|
2316
|
-
// flips `yieldedForBudget` to true, then return without
|
|
2317
|
-
// finishing — mirroring what AgentLoop.run does when its
|
|
2318
|
-
// checkpoint returns "yield".
|
|
2319
|
-
if (options?.onCheckpoint) {
|
|
2320
|
-
await options.onCheckpoint({
|
|
2321
|
-
turnIndex: 0,
|
|
2322
|
-
toolCount: 1,
|
|
2323
|
-
hasToolUse: true,
|
|
2324
|
-
history: messages,
|
|
2325
|
-
});
|
|
2326
|
-
}
|
|
2327
|
-
return messages;
|
|
2328
|
-
};
|
|
2329
|
-
|
|
1811
|
+
// Calls 1 (initial) and 2 (convergence rerun) reject with
|
|
1812
|
+
// context-too-large so `contextTooLargeDetected` stays true through the
|
|
1813
|
+
// convergence exit and the orchestrator enters the auto_compress branch.
|
|
1814
|
+
// Call 3 (the auto_compress rerun) is a tool turn: the loop runs it
|
|
1815
|
+
// without a compaction hook, so when its mid-loop budget gate trips on
|
|
1816
|
+
// the still-oversized estimate it yields `exitReason = "budget"` rather
|
|
1817
|
+
// than recovering — the silent-stall path under test.
|
|
2330
1818
|
const ctx = makeCtx({
|
|
2331
|
-
|
|
1819
|
+
providerResponses: [
|
|
1820
|
+
new Error("context_length_exceeded"),
|
|
1821
|
+
new Error("context_length_exceeded"),
|
|
1822
|
+
toolUseResponse("t1", "read_file", { path: "/a.txt" }),
|
|
1823
|
+
],
|
|
1824
|
+
loopTools: [
|
|
1825
|
+
{
|
|
1826
|
+
name: "read_file",
|
|
1827
|
+
description: "Read a file",
|
|
1828
|
+
input_schema: {
|
|
1829
|
+
type: "object",
|
|
1830
|
+
properties: { path: { type: "string" } },
|
|
1831
|
+
},
|
|
1832
|
+
},
|
|
1833
|
+
],
|
|
1834
|
+
toolExecutor: async () => ({ content: "data", isError: false }),
|
|
2332
1835
|
hasNoClient: true,
|
|
2333
1836
|
contextWindowManager: {
|
|
2334
1837
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
@@ -2411,23 +1914,10 @@ describe("session-agent-loop", () => {
|
|
|
2411
1914
|
};
|
|
2412
1915
|
};
|
|
2413
1916
|
|
|
2414
|
-
|
|
2415
|
-
|
|
2416
|
-
type: "error",
|
|
2417
|
-
error: new Error("context_length_exceeded"),
|
|
2418
|
-
});
|
|
2419
|
-
onEvent({
|
|
2420
|
-
type: "usage",
|
|
2421
|
-
inputTokens: 100,
|
|
2422
|
-
outputTokens: 0,
|
|
2423
|
-
model: "test-model",
|
|
2424
|
-
providerDurationMs: 50,
|
|
2425
|
-
});
|
|
2426
|
-
return messages;
|
|
2427
|
-
};
|
|
2428
|
-
|
|
1917
|
+
// The provider rejects every call with a context-too-large error, so the
|
|
1918
|
+
// orchestrator keeps retrying until it hits the attempt ceiling.
|
|
2429
1919
|
const ctx = makeCtx({
|
|
2430
|
-
|
|
1920
|
+
providerResponses: [new Error("context_length_exceeded")],
|
|
2431
1921
|
contextWindowManager: {
|
|
2432
1922
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
2433
1923
|
maybeCompact: async () => ({ compacted: false }),
|
|
@@ -2443,7 +1933,6 @@ describe("session-agent-loop", () => {
|
|
|
2443
1933
|
test("preflight budget evaluation invokes reducer before provider call", async () => {
|
|
2444
1934
|
const events: ServerMessage[] = [];
|
|
2445
1935
|
let reducerCalls = 0;
|
|
2446
|
-
let agentLoopCalls = 0;
|
|
2447
1936
|
|
|
2448
1937
|
// Set token estimate above budget (100000 * 0.95 = 95000)
|
|
2449
1938
|
mockEstimateTokens = 96000;
|
|
@@ -2462,36 +1951,11 @@ describe("session-agent-loop", () => {
|
|
|
2462
1951
|
};
|
|
2463
1952
|
};
|
|
2464
1953
|
|
|
2465
|
-
|
|
2466
|
-
|
|
2467
|
-
|
|
2468
|
-
// `AgentLoop.run` just before `provider.sendMessage`.
|
|
2469
|
-
await onEvent({ type: "llm_call_started" });
|
|
2470
|
-
onEvent({
|
|
2471
|
-
type: "message_complete",
|
|
2472
|
-
message: {
|
|
2473
|
-
role: "assistant",
|
|
2474
|
-
content: [{ type: "text", text: "ok" }],
|
|
2475
|
-
},
|
|
2476
|
-
});
|
|
2477
|
-
onEvent({
|
|
2478
|
-
type: "usage",
|
|
2479
|
-
inputTokens: 50,
|
|
2480
|
-
outputTokens: 25,
|
|
2481
|
-
model: "test-model",
|
|
2482
|
-
providerDurationMs: 100,
|
|
2483
|
-
});
|
|
2484
|
-
return [
|
|
2485
|
-
...messages,
|
|
2486
|
-
{
|
|
2487
|
-
role: "assistant" as const,
|
|
2488
|
-
content: [{ type: "text", text: "ok" }] as ContentBlock[],
|
|
2489
|
-
},
|
|
2490
|
-
];
|
|
2491
|
-
};
|
|
2492
|
-
|
|
1954
|
+
// After the preflight reducer brings the estimate under budget, the loop
|
|
1955
|
+
// completes the turn in a single provider call.
|
|
1956
|
+
const { provider, calls } = createMockProvider([textResponse("ok")]);
|
|
2493
1957
|
const ctx = makeCtx({
|
|
2494
|
-
|
|
1958
|
+
loopProvider: provider,
|
|
2495
1959
|
contextWindowManager: {
|
|
2496
1960
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
2497
1961
|
maybeCompact: async () => ({ compacted: false }),
|
|
@@ -2502,8 +1966,8 @@ describe("session-agent-loop", () => {
|
|
|
2502
1966
|
|
|
2503
1967
|
// Reducer should have been called during preflight
|
|
2504
1968
|
expect(reducerCalls).toBeGreaterThanOrEqual(1);
|
|
2505
|
-
// Agent loop should still succeed
|
|
2506
|
-
expect(
|
|
1969
|
+
// Agent loop should still succeed in a single provider call
|
|
1970
|
+
expect(calls.length).toBe(1);
|
|
2507
1971
|
const complete = events.find((e) => e.type === "message_complete");
|
|
2508
1972
|
expect(complete).toBeDefined();
|
|
2509
1973
|
});
|
|
@@ -2512,78 +1976,28 @@ describe("session-agent-loop", () => {
|
|
|
2512
1976
|
describe("provider ordering error retry", () => {
|
|
2513
1977
|
test("retries with deep repair when ordering error is detected", async () => {
|
|
2514
1978
|
const events: ServerMessage[] = [];
|
|
2515
|
-
let callCount = 0;
|
|
2516
|
-
|
|
2517
|
-
const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
|
|
2518
|
-
// Prime the assistant row anchor — production code emits this from
|
|
2519
|
-
// `AgentLoop.run` just before `provider.sendMessage`. Retry branches
|
|
2520
|
-
// need this on every invocation: each agent-loop iteration reserves
|
|
2521
|
-
// its own row.
|
|
2522
|
-
await onEvent({ type: "llm_call_started" });
|
|
2523
|
-
callCount++;
|
|
2524
|
-
if (callCount === 1) {
|
|
2525
|
-
onEvent({
|
|
2526
|
-
type: "error",
|
|
2527
|
-
error: new Error("messages ordering error"),
|
|
2528
|
-
});
|
|
2529
|
-
onEvent({
|
|
2530
|
-
type: "usage",
|
|
2531
|
-
inputTokens: 100,
|
|
2532
|
-
outputTokens: 0,
|
|
2533
|
-
model: "test-model",
|
|
2534
|
-
providerDurationMs: 50,
|
|
2535
|
-
});
|
|
2536
|
-
return messages;
|
|
2537
|
-
}
|
|
2538
|
-
// Retry succeeds
|
|
2539
|
-
onEvent({
|
|
2540
|
-
type: "message_complete",
|
|
2541
|
-
message: {
|
|
2542
|
-
role: "assistant",
|
|
2543
|
-
content: [{ type: "text", text: "fixed" }],
|
|
2544
|
-
},
|
|
2545
|
-
});
|
|
2546
|
-
onEvent({
|
|
2547
|
-
type: "usage",
|
|
2548
|
-
inputTokens: 50,
|
|
2549
|
-
outputTokens: 25,
|
|
2550
|
-
model: "test-model",
|
|
2551
|
-
providerDurationMs: 100,
|
|
2552
|
-
});
|
|
2553
|
-
return [
|
|
2554
|
-
...messages,
|
|
2555
|
-
{
|
|
2556
|
-
role: "assistant" as const,
|
|
2557
|
-
content: [{ type: "text", text: "fixed" }] as ContentBlock[],
|
|
2558
|
-
},
|
|
2559
|
-
];
|
|
2560
|
-
};
|
|
2561
1979
|
|
|
2562
|
-
|
|
1980
|
+
// The provider rejects the first call with an ordering error, then
|
|
1981
|
+
// succeeds once the orchestrator's deep repair re-sends the turn.
|
|
1982
|
+
const { provider, calls } = createMockProvider([
|
|
1983
|
+
new Error("messages ordering error"),
|
|
1984
|
+
textResponse("fixed"),
|
|
1985
|
+
]);
|
|
1986
|
+
|
|
1987
|
+
const ctx = makeCtx({ loopProvider: provider });
|
|
2563
1988
|
await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
|
|
2564
1989
|
|
|
2565
|
-
expect(
|
|
1990
|
+
expect(calls.length).toBe(2);
|
|
2566
1991
|
});
|
|
2567
1992
|
|
|
2568
1993
|
test("emits deferred ordering error when retry also fails", async () => {
|
|
2569
1994
|
const events: ServerMessage[] = [];
|
|
2570
1995
|
|
|
2571
|
-
|
|
2572
|
-
|
|
2573
|
-
|
|
2574
|
-
|
|
2575
|
-
|
|
2576
|
-
onEvent({
|
|
2577
|
-
type: "usage",
|
|
2578
|
-
inputTokens: 100,
|
|
2579
|
-
outputTokens: 0,
|
|
2580
|
-
model: "test-model",
|
|
2581
|
-
providerDurationMs: 50,
|
|
2582
|
-
});
|
|
2583
|
-
return messages;
|
|
2584
|
-
};
|
|
2585
|
-
|
|
2586
|
-
const ctx = makeCtx({ agentLoopRun });
|
|
1996
|
+
// The provider rejects every call with an ordering error, so even the
|
|
1997
|
+
// deep-repair retry fails and the orchestrator surfaces the error.
|
|
1998
|
+
const ctx = makeCtx({
|
|
1999
|
+
providerResponses: [new Error("messages ordering error")],
|
|
2000
|
+
});
|
|
2587
2001
|
await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
|
|
2588
2002
|
|
|
2589
2003
|
const conversationError = events.find(
|
|
@@ -2597,62 +2011,18 @@ describe("session-agent-loop", () => {
|
|
|
2597
2011
|
test("yields at checkpoint when canHandoffAtCheckpoint returns true", async () => {
|
|
2598
2012
|
const events: ServerMessage[] = [];
|
|
2599
2013
|
|
|
2600
|
-
|
|
2601
|
-
|
|
2602
|
-
|
|
2603
|
-
|
|
2604
|
-
|
|
2605
|
-
await onEvent({ type: "llm_call_started" });
|
|
2606
|
-
// Simulate tool use followed by checkpoint
|
|
2607
|
-
onEvent({ type: "tool_use", id: "tu-1", name: "file_read", input: {} });
|
|
2608
|
-
onEvent({
|
|
2609
|
-
type: "tool_result",
|
|
2610
|
-
toolUseId: "tu-1",
|
|
2611
|
-
content: "file content",
|
|
2612
|
-
isError: false,
|
|
2613
|
-
});
|
|
2614
|
-
onEvent({
|
|
2615
|
-
type: "message_complete",
|
|
2616
|
-
message: {
|
|
2617
|
-
role: "assistant",
|
|
2618
|
-
content: [{ type: "text", text: "partial" }],
|
|
2619
|
-
},
|
|
2620
|
-
});
|
|
2621
|
-
onEvent({
|
|
2622
|
-
type: "usage",
|
|
2623
|
-
inputTokens: 100,
|
|
2624
|
-
outputTokens: 50,
|
|
2625
|
-
model: "test-model",
|
|
2626
|
-
providerDurationMs: 100,
|
|
2627
|
-
});
|
|
2628
|
-
if (options?.onCheckpoint) {
|
|
2629
|
-
const decision = await options.onCheckpoint({
|
|
2630
|
-
turnIndex: 0,
|
|
2631
|
-
toolCount: 1,
|
|
2632
|
-
hasToolUse: true,
|
|
2633
|
-
history: messages,
|
|
2634
|
-
});
|
|
2635
|
-
if (decision !== "continue") {
|
|
2636
|
-
return [
|
|
2637
|
-
...messages,
|
|
2638
|
-
{
|
|
2639
|
-
role: "assistant" as const,
|
|
2640
|
-
content: [{ type: "text", text: "partial" }] as ContentBlock[],
|
|
2641
|
-
},
|
|
2642
|
-
];
|
|
2643
|
-
}
|
|
2644
|
-
}
|
|
2645
|
-
return [
|
|
2646
|
-
...messages,
|
|
2014
|
+
// A tool turn drives the loop to its first mid-loop checkpoint, where the
|
|
2015
|
+
// orchestrator yields for a queued handoff.
|
|
2016
|
+
const ctx = makeCtx({
|
|
2017
|
+
providerResponses: [toolUseResponse("tu-1", "file_read", {})],
|
|
2018
|
+
loopTools: [
|
|
2647
2019
|
{
|
|
2648
|
-
|
|
2649
|
-
|
|
2020
|
+
name: "file_read",
|
|
2021
|
+
description: "Read a file",
|
|
2022
|
+
input_schema: { type: "object", properties: {} },
|
|
2650
2023
|
},
|
|
2651
|
-
]
|
|
2652
|
-
|
|
2653
|
-
|
|
2654
|
-
const ctx = makeCtx({
|
|
2655
|
-
agentLoopRun,
|
|
2024
|
+
],
|
|
2025
|
+
toolExecutor: async () => ({ content: "file content", isError: false }),
|
|
2656
2026
|
canHandoffAtCheckpoint: () => true,
|
|
2657
2027
|
} as unknown as Partial<AgentLoopConversationContext>);
|
|
2658
2028
|
|
|
@@ -2669,52 +2039,21 @@ describe("session-agent-loop", () => {
|
|
|
2669
2039
|
test("continues when canHandoffAtCheckpoint returns false", async () => {
|
|
2670
2040
|
const events: ServerMessage[] = [];
|
|
2671
2041
|
|
|
2672
|
-
|
|
2673
|
-
|
|
2674
|
-
|
|
2675
|
-
|
|
2676
|
-
|
|
2677
|
-
|
|
2678
|
-
|
|
2679
|
-
|
|
2680
|
-
type: "tool_result",
|
|
2681
|
-
toolUseId: "tu-1",
|
|
2682
|
-
content: "content",
|
|
2683
|
-
isError: false,
|
|
2684
|
-
});
|
|
2685
|
-
onEvent({
|
|
2686
|
-
type: "message_complete",
|
|
2687
|
-
message: {
|
|
2688
|
-
role: "assistant",
|
|
2689
|
-
content: [{ type: "text", text: "done" }],
|
|
2690
|
-
},
|
|
2691
|
-
});
|
|
2692
|
-
onEvent({
|
|
2693
|
-
type: "usage",
|
|
2694
|
-
inputTokens: 100,
|
|
2695
|
-
outputTokens: 50,
|
|
2696
|
-
model: "test-model",
|
|
2697
|
-
providerDurationMs: 100,
|
|
2698
|
-
});
|
|
2699
|
-
if (options?.onCheckpoint) {
|
|
2700
|
-
await options.onCheckpoint({
|
|
2701
|
-
turnIndex: 0,
|
|
2702
|
-
toolCount: 1,
|
|
2703
|
-
hasToolUse: true,
|
|
2704
|
-
history: messages,
|
|
2705
|
-
});
|
|
2706
|
-
}
|
|
2707
|
-
return [
|
|
2708
|
-
...messages,
|
|
2042
|
+
// The tool turn reaches a checkpoint, but with handoff disabled the loop
|
|
2043
|
+
// continues to the next turn and completes normally.
|
|
2044
|
+
const ctx = makeCtx({
|
|
2045
|
+
providerResponses: [
|
|
2046
|
+
toolUseResponse("tu-1", "file_read", {}),
|
|
2047
|
+
textResponse("done"),
|
|
2048
|
+
],
|
|
2049
|
+
loopTools: [
|
|
2709
2050
|
{
|
|
2710
|
-
|
|
2711
|
-
|
|
2051
|
+
name: "file_read",
|
|
2052
|
+
description: "Read a file",
|
|
2053
|
+
input_schema: { type: "object", properties: {} },
|
|
2712
2054
|
},
|
|
2713
|
-
]
|
|
2714
|
-
|
|
2715
|
-
|
|
2716
|
-
const ctx = makeCtx({
|
|
2717
|
-
agentLoopRun,
|
|
2055
|
+
],
|
|
2056
|
+
toolExecutor: async () => ({ content: "content", isError: false }),
|
|
2718
2057
|
canHandoffAtCheckpoint: () => false,
|
|
2719
2058
|
} as unknown as Partial<AgentLoopConversationContext>);
|
|
2720
2059
|
|
|
@@ -2736,36 +2075,18 @@ describe("session-agent-loop", () => {
|
|
|
2736
2075
|
const events: ServerMessage[] = [];
|
|
2737
2076
|
const abortController = new AbortController();
|
|
2738
2077
|
|
|
2739
|
-
|
|
2740
|
-
|
|
2741
|
-
|
|
2742
|
-
|
|
2743
|
-
|
|
2744
|
-
type: "
|
|
2745
|
-
|
|
2746
|
-
|
|
2747
|
-
|
|
2748
|
-
},
|
|
2749
|
-
});
|
|
2750
|
-
onEvent({
|
|
2751
|
-
type: "usage",
|
|
2752
|
-
inputTokens: 100,
|
|
2753
|
-
outputTokens: 50,
|
|
2754
|
-
model: "test-model",
|
|
2755
|
-
providerDurationMs: 100,
|
|
2756
|
-
});
|
|
2757
|
-
// Simulate abort after processing
|
|
2758
|
-
abortController.abort();
|
|
2759
|
-
return [
|
|
2760
|
-
...messages,
|
|
2761
|
-
{
|
|
2762
|
-
role: "assistant" as const,
|
|
2763
|
-
content: [{ type: "text", text: "partial" }] as ContentBlock[],
|
|
2764
|
-
},
|
|
2765
|
-
];
|
|
2078
|
+
// The provider completes its response but the user cancels mid-turn, so
|
|
2079
|
+
// the orchestrator observes the aborted signal once the loop returns.
|
|
2080
|
+
const provider: Provider = {
|
|
2081
|
+
name: "mock",
|
|
2082
|
+
async sendMessage(_messages, options) {
|
|
2083
|
+
options?.onEvent?.({ type: "text_delta", text: "partial" });
|
|
2084
|
+
abortController.abort();
|
|
2085
|
+
return textResponse("partial");
|
|
2086
|
+
},
|
|
2766
2087
|
};
|
|
2767
2088
|
|
|
2768
|
-
const ctx = makeCtx({
|
|
2089
|
+
const ctx = makeCtx({ loopProvider: provider, abortController });
|
|
2769
2090
|
await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
|
|
2770
2091
|
|
|
2771
2092
|
const cancelled = events.find((e) => e.type === "generation_cancelled");
|
|
@@ -2776,13 +2097,16 @@ describe("session-agent-loop", () => {
|
|
|
2776
2097
|
const events: ServerMessage[] = [];
|
|
2777
2098
|
const abortController = new AbortController();
|
|
2778
2099
|
|
|
2779
|
-
|
|
2780
|
-
|
|
2781
|
-
|
|
2782
|
-
|
|
2100
|
+
// The provider rejects with an AbortError after the user cancels.
|
|
2101
|
+
const provider: Provider = {
|
|
2102
|
+
name: "mock",
|
|
2103
|
+
async sendMessage() {
|
|
2104
|
+
abortController.abort();
|
|
2105
|
+
throw new DOMException("The operation was aborted", "AbortError");
|
|
2106
|
+
},
|
|
2783
2107
|
};
|
|
2784
2108
|
|
|
2785
|
-
const ctx = makeCtx({
|
|
2109
|
+
const ctx = makeCtx({ loopProvider: provider, abortController });
|
|
2786
2110
|
await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
|
|
2787
2111
|
|
|
2788
2112
|
const cancelled = events.find((e) => e.type === "generation_cancelled");
|
|
@@ -2799,36 +2123,17 @@ describe("session-agent-loop", () => {
|
|
|
2799
2123
|
const abortController = new AbortController();
|
|
2800
2124
|
resolveAssistantAttachmentsMock.mockClear();
|
|
2801
2125
|
|
|
2802
|
-
|
|
2803
|
-
|
|
2804
|
-
|
|
2805
|
-
|
|
2806
|
-
|
|
2807
|
-
|
|
2808
|
-
|
|
2809
|
-
|
|
2810
|
-
content: [{ type: "text", text: "partial" }],
|
|
2811
|
-
},
|
|
2812
|
-
});
|
|
2813
|
-
onEvent({
|
|
2814
|
-
type: "usage",
|
|
2815
|
-
inputTokens: 100,
|
|
2816
|
-
outputTokens: 50,
|
|
2817
|
-
model: "test-model",
|
|
2818
|
-
providerDurationMs: 100,
|
|
2819
|
-
});
|
|
2820
|
-
// Simulate abort after processing
|
|
2821
|
-
abortController.abort();
|
|
2822
|
-
return [
|
|
2823
|
-
...messages,
|
|
2824
|
-
{
|
|
2825
|
-
role: "assistant" as const,
|
|
2826
|
-
content: [{ type: "text", text: "partial" }] as ContentBlock[],
|
|
2827
|
-
},
|
|
2828
|
-
];
|
|
2126
|
+
// The provider completes its response but the user cancels mid-turn.
|
|
2127
|
+
const provider: Provider = {
|
|
2128
|
+
name: "mock",
|
|
2129
|
+
async sendMessage(_messages, options) {
|
|
2130
|
+
options?.onEvent?.({ type: "text_delta", text: "partial" });
|
|
2131
|
+
abortController.abort();
|
|
2132
|
+
return textResponse("partial");
|
|
2133
|
+
},
|
|
2829
2134
|
};
|
|
2830
2135
|
|
|
2831
|
-
const ctx = makeCtx({
|
|
2136
|
+
const ctx = makeCtx({ loopProvider: provider, abortController });
|
|
2832
2137
|
await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
|
|
2833
2138
|
|
|
2834
2139
|
const cancelled = events.find((e) => e.type === "generation_cancelled");
|
|
@@ -2840,96 +2145,50 @@ describe("session-agent-loop", () => {
|
|
|
2840
2145
|
|
|
2841
2146
|
describe("finally block cleanup", () => {
|
|
2842
2147
|
test("increments turnCount after successful run", async () => {
|
|
2843
|
-
|
|
2844
|
-
|
|
2845
|
-
// Prime the assistant row anchor — production code emits this from
|
|
2846
|
-
// `AgentLoop.run` just before `provider.sendMessage`.
|
|
2847
|
-
await onEvent({ type: "llm_call_started" });
|
|
2848
|
-
onEvent({
|
|
2849
|
-
type: "message_complete",
|
|
2850
|
-
message: {
|
|
2851
|
-
role: "assistant",
|
|
2852
|
-
content: [{ type: "text", text: "hi" }],
|
|
2853
|
-
},
|
|
2854
|
-
});
|
|
2855
|
-
onEvent({
|
|
2856
|
-
type: "usage",
|
|
2857
|
-
inputTokens: 10,
|
|
2858
|
-
outputTokens: 5,
|
|
2859
|
-
model: "test",
|
|
2860
|
-
providerDurationMs: 50,
|
|
2861
|
-
});
|
|
2862
|
-
return [
|
|
2863
|
-
...messages,
|
|
2864
|
-
{
|
|
2865
|
-
role: "assistant" as const,
|
|
2866
|
-
content: [{ type: "text", text: "hi" }] as ContentBlock[],
|
|
2867
|
-
},
|
|
2868
|
-
];
|
|
2869
|
-
},
|
|
2870
|
-
});
|
|
2148
|
+
// GIVEN a real loop that answers in a single text turn
|
|
2149
|
+
const ctx = makeCtx({ providerResponses: [textResponse("hi")] });
|
|
2871
2150
|
expect(ctx.turnCount).toBe(0);
|
|
2872
2151
|
|
|
2152
|
+
// WHEN the orchestrator runs the turn to completion
|
|
2873
2153
|
await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
|
|
2874
2154
|
|
|
2155
|
+
// THEN the finally block increments the turn count
|
|
2875
2156
|
expect(ctx.turnCount).toBe(1);
|
|
2876
2157
|
});
|
|
2877
2158
|
|
|
2878
2159
|
test("clears processing state and abort controller", async () => {
|
|
2879
|
-
|
|
2880
|
-
|
|
2881
|
-
// Prime the assistant row anchor — production code emits this from
|
|
2882
|
-
// `AgentLoop.run` just before `provider.sendMessage`.
|
|
2883
|
-
await onEvent({ type: "llm_call_started" });
|
|
2884
|
-
onEvent({
|
|
2885
|
-
type: "message_complete",
|
|
2886
|
-
message: {
|
|
2887
|
-
role: "assistant",
|
|
2888
|
-
content: [{ type: "text", text: "hi" }],
|
|
2889
|
-
},
|
|
2890
|
-
});
|
|
2891
|
-
onEvent({
|
|
2892
|
-
type: "usage",
|
|
2893
|
-
inputTokens: 10,
|
|
2894
|
-
outputTokens: 5,
|
|
2895
|
-
model: "test",
|
|
2896
|
-
providerDurationMs: 50,
|
|
2897
|
-
});
|
|
2898
|
-
return [
|
|
2899
|
-
...messages,
|
|
2900
|
-
{
|
|
2901
|
-
role: "assistant" as const,
|
|
2902
|
-
content: [{ type: "text", text: "hi" }] as ContentBlock[],
|
|
2903
|
-
},
|
|
2904
|
-
];
|
|
2905
|
-
},
|
|
2906
|
-
});
|
|
2160
|
+
// GIVEN a real loop that answers in a single text turn
|
|
2161
|
+
const ctx = makeCtx({ providerResponses: [textResponse("hi")] });
|
|
2907
2162
|
|
|
2163
|
+
// WHEN the orchestrator runs the turn to completion
|
|
2908
2164
|
await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
|
|
2909
2165
|
|
|
2910
|
-
|
|
2166
|
+
// THEN the finally block clears all per-turn processing state
|
|
2167
|
+
expect(ctx.isProcessing()).toBe(false);
|
|
2911
2168
|
expect(ctx.abortController).toBeNull();
|
|
2912
2169
|
expect(ctx.currentRequestId).toBeUndefined();
|
|
2913
2170
|
expect(ctx.commandIntent).toBeUndefined();
|
|
2914
2171
|
});
|
|
2915
2172
|
|
|
2916
|
-
test("clears state
|
|
2173
|
+
test("clears state and surfaces a processing error when the provider call fails", async () => {
|
|
2174
|
+
// GIVEN a real loop whose provider rejects with an unexpected error
|
|
2917
2175
|
const events: ServerMessage[] = [];
|
|
2918
2176
|
const ctx = makeCtx({
|
|
2919
|
-
|
|
2920
|
-
|
|
2921
|
-
|
|
2177
|
+
loopProvider: {
|
|
2178
|
+
name: "mock-provider",
|
|
2179
|
+
async sendMessage() {
|
|
2180
|
+
throw new Error("unexpected crash");
|
|
2181
|
+
},
|
|
2182
|
+
} as unknown as Provider,
|
|
2922
2183
|
});
|
|
2923
2184
|
|
|
2185
|
+
// WHEN the orchestrator runs the turn
|
|
2924
2186
|
await runAgentLoopImpl(ctx, "hi", "msg-1", (msg) => events.push(msg));
|
|
2925
2187
|
|
|
2926
|
-
|
|
2188
|
+
// THEN the finally block clears per-turn state and the failure is
|
|
2189
|
+
// surfaced as a processing-failed conversation error
|
|
2190
|
+
expect(ctx.isProcessing()).toBe(false);
|
|
2927
2191
|
expect(ctx.abortController).toBeNull();
|
|
2928
|
-
expect(events.find((event) => event.type === "error")).toMatchObject({
|
|
2929
|
-
type: "error",
|
|
2930
|
-
code: "CONVERSATION_PROCESSING_FAILED",
|
|
2931
|
-
errorCategory: "processing_failed",
|
|
2932
|
-
});
|
|
2933
2192
|
expect(
|
|
2934
2193
|
events.find((event) => event.type === "conversation_error"),
|
|
2935
2194
|
).toMatchObject({
|
|
@@ -2940,46 +2199,19 @@ describe("session-agent-loop", () => {
|
|
|
2940
2199
|
});
|
|
2941
2200
|
|
|
2942
2201
|
test("drains queue after completion", async () => {
|
|
2202
|
+
// GIVEN a real loop that answers in a single text turn
|
|
2943
2203
|
let drainReason: string | undefined;
|
|
2944
2204
|
const ctx = makeCtx({
|
|
2945
|
-
|
|
2946
|
-
messages: Message[],
|
|
2947
|
-
onEvent: (event: AgentEvent) => void | Promise<void>,
|
|
2948
|
-
) => {
|
|
2949
|
-
// Prime the assistant row anchor — production code emits this from
|
|
2950
|
-
// `AgentLoop.run` just before `provider.sendMessage`. Must be
|
|
2951
|
-
// awaited so the assistant row is reserved before message_complete
|
|
2952
|
-
// tries to write into it.
|
|
2953
|
-
await onEvent({ type: "llm_call_started" });
|
|
2954
|
-
onEvent({
|
|
2955
|
-
type: "message_complete",
|
|
2956
|
-
message: {
|
|
2957
|
-
role: "assistant",
|
|
2958
|
-
content: [{ type: "text", text: "ok" }],
|
|
2959
|
-
},
|
|
2960
|
-
});
|
|
2961
|
-
onEvent({
|
|
2962
|
-
type: "usage",
|
|
2963
|
-
inputTokens: 10,
|
|
2964
|
-
outputTokens: 5,
|
|
2965
|
-
model: "test",
|
|
2966
|
-
providerDurationMs: 50,
|
|
2967
|
-
});
|
|
2968
|
-
return [
|
|
2969
|
-
...messages,
|
|
2970
|
-
{
|
|
2971
|
-
role: "assistant" as const,
|
|
2972
|
-
content: [{ type: "text", text: "ok" }] as ContentBlock[],
|
|
2973
|
-
},
|
|
2974
|
-
];
|
|
2975
|
-
},
|
|
2205
|
+
providerResponses: [textResponse("ok")],
|
|
2976
2206
|
drainQueue: (reason: string) => {
|
|
2977
2207
|
drainReason = reason;
|
|
2978
2208
|
},
|
|
2979
2209
|
} as unknown as Partial<AgentLoopConversationContext>);
|
|
2980
2210
|
|
|
2211
|
+
// WHEN the orchestrator runs the turn to completion
|
|
2981
2212
|
await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
|
|
2982
2213
|
|
|
2214
|
+
// THEN the queue is drained with the loop-complete reason
|
|
2983
2215
|
expect(drainReason).toBe("loop_complete");
|
|
2984
2216
|
});
|
|
2985
2217
|
});
|
|
@@ -3098,7 +2330,7 @@ describe("session-agent-loop", () => {
|
|
|
3098
2330
|
isUserMessage: true,
|
|
3099
2331
|
});
|
|
3100
2332
|
|
|
3101
|
-
expect(ctx.
|
|
2333
|
+
expect(ctx.isProcessing()).toBe(false);
|
|
3102
2334
|
expect(ctx.abortController).toBeNull();
|
|
3103
2335
|
expect(ctx.currentRequestId).toBeUndefined();
|
|
3104
2336
|
});
|
|
@@ -3208,24 +2440,17 @@ describe("session-agent-loop", () => {
|
|
|
3208
2440
|
test("synthesizes error assistant message when provider returns no response", async () => {
|
|
3209
2441
|
const events: ServerMessage[] = [];
|
|
3210
2442
|
|
|
3211
|
-
|
|
3212
|
-
|
|
3213
|
-
|
|
3214
|
-
|
|
3215
|
-
|
|
3216
|
-
|
|
3217
|
-
|
|
3218
|
-
|
|
3219
|
-
|
|
3220
|
-
|
|
3221
|
-
|
|
3222
|
-
providerDurationMs: 50,
|
|
3223
|
-
});
|
|
3224
|
-
// Return same messages (no assistant message appended)
|
|
3225
|
-
return messages;
|
|
3226
|
-
};
|
|
3227
|
-
|
|
3228
|
-
const ctx = makeCtx({ agentLoopRun });
|
|
2443
|
+
// GIVEN a real loop whose provider rejects with a generic error
|
|
2444
|
+
// (non-ordering, non-context-too-large) so the loop emits `error` and
|
|
2445
|
+
// the orchestrator sets `providerErrorUserMessage`.
|
|
2446
|
+
const ctx = makeCtx({
|
|
2447
|
+
loopProvider: {
|
|
2448
|
+
name: "mock-provider",
|
|
2449
|
+
async sendMessage() {
|
|
2450
|
+
throw new Error("Internal processing failure");
|
|
2451
|
+
},
|
|
2452
|
+
} as unknown as Provider,
|
|
2453
|
+
});
|
|
3229
2454
|
await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
|
|
3230
2455
|
|
|
3231
2456
|
// The error should be sent as a conversation_error (not as an
|
|
@@ -3249,26 +2474,19 @@ describe("session-agent-loop", () => {
|
|
|
3249
2474
|
// sweep would wrong-attach this row to the wrong assistant message.
|
|
3250
2475
|
const events: ServerMessage[] = [];
|
|
3251
2476
|
|
|
3252
|
-
|
|
3253
|
-
|
|
3254
|
-
|
|
3255
|
-
|
|
3256
|
-
|
|
3257
|
-
|
|
3258
|
-
|
|
3259
|
-
|
|
3260
|
-
|
|
3261
|
-
|
|
3262
|
-
|
|
3263
|
-
|
|
3264
|
-
|
|
3265
|
-
error: new Error("upstream 500"),
|
|
3266
|
-
});
|
|
3267
|
-
// Provider returned no assistant content — same messages back.
|
|
3268
|
-
return messages;
|
|
3269
|
-
};
|
|
3270
|
-
|
|
3271
|
-
const ctx = makeCtx({ agentLoopRun });
|
|
2477
|
+
// GIVEN a real loop whose provider rejects: the loop emits
|
|
2478
|
+
// `provider_error` (writing an `llm_request_logs` row with
|
|
2479
|
+
// messageId=null — the orphan we link) then `error` (which sets
|
|
2480
|
+
// `state.providerErrorUserMessage`, activating the synthetic-message
|
|
2481
|
+
// branch below the loop).
|
|
2482
|
+
const ctx = makeCtx({
|
|
2483
|
+
loopProvider: {
|
|
2484
|
+
name: "mock-provider",
|
|
2485
|
+
async sendMessage() {
|
|
2486
|
+
throw new Error("upstream 500");
|
|
2487
|
+
},
|
|
2488
|
+
} as unknown as Provider,
|
|
2489
|
+
});
|
|
3272
2490
|
await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
|
|
3273
2491
|
|
|
3274
2492
|
// The orphan was written with messageId=undefined.
|
|
@@ -3315,39 +2533,10 @@ describe("session-agent-loop", () => {
|
|
|
3315
2533
|
// observe the sync-invalidation publish path on the same turn.
|
|
3316
2534
|
projectAssistantMessageMock.mockImplementationOnce(() => true);
|
|
3317
2535
|
|
|
3318
|
-
|
|
3319
|
-
|
|
3320
|
-
|
|
3321
|
-
|
|
3322
|
-
// or before the loop returns. Without the await the projector's
|
|
3323
|
-
// synchronous call still races against the test's assertion phase
|
|
3324
|
-
// because the indexer's `await` yields microtasks.
|
|
3325
|
-
await onEvent({
|
|
3326
|
-
type: "message_complete",
|
|
3327
|
-
message: {
|
|
3328
|
-
role: "assistant",
|
|
3329
|
-
content: [{ type: "text", text: "indexed reply" }],
|
|
3330
|
-
},
|
|
3331
|
-
});
|
|
3332
|
-
onEvent({
|
|
3333
|
-
type: "usage",
|
|
3334
|
-
inputTokens: 10,
|
|
3335
|
-
outputTokens: 5,
|
|
3336
|
-
model: "test",
|
|
3337
|
-
providerDurationMs: 50,
|
|
3338
|
-
});
|
|
3339
|
-
return [
|
|
3340
|
-
...messages,
|
|
3341
|
-
{
|
|
3342
|
-
role: "assistant" as const,
|
|
3343
|
-
content: [
|
|
3344
|
-
{ type: "text", text: "indexed reply" },
|
|
3345
|
-
] as ContentBlock[],
|
|
3346
|
-
},
|
|
3347
|
-
];
|
|
3348
|
-
};
|
|
3349
|
-
|
|
3350
|
-
const ctx = makeCtx({ agentLoopRun });
|
|
2536
|
+
// GIVEN a real loop that answers with a single finalized assistant turn
|
|
2537
|
+
const ctx = makeCtx({
|
|
2538
|
+
providerResponses: [textResponse("indexed reply")],
|
|
2539
|
+
});
|
|
3351
2540
|
await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
|
|
3352
2541
|
|
|
3353
2542
|
// Indexer fired with the reserved row's id + the finalized content.
|
|
@@ -3410,34 +2599,8 @@ describe("session-agent-loop", () => {
|
|
|
3410
2599
|
metadata: null,
|
|
3411
2600
|
};
|
|
3412
2601
|
|
|
3413
|
-
|
|
3414
|
-
|
|
3415
|
-
// See sibling test — `message_complete` must be awaited so the
|
|
3416
|
-
// projector call lands before the assertion phase.
|
|
3417
|
-
await onEvent({
|
|
3418
|
-
type: "message_complete",
|
|
3419
|
-
message: {
|
|
3420
|
-
role: "assistant",
|
|
3421
|
-
content: [{ type: "text", text: "quiet" }],
|
|
3422
|
-
},
|
|
3423
|
-
});
|
|
3424
|
-
onEvent({
|
|
3425
|
-
type: "usage",
|
|
3426
|
-
inputTokens: 1,
|
|
3427
|
-
outputTokens: 1,
|
|
3428
|
-
model: "test",
|
|
3429
|
-
providerDurationMs: 1,
|
|
3430
|
-
});
|
|
3431
|
-
return [
|
|
3432
|
-
...messages,
|
|
3433
|
-
{
|
|
3434
|
-
role: "assistant" as const,
|
|
3435
|
-
content: [{ type: "text", text: "quiet" }] as ContentBlock[],
|
|
3436
|
-
},
|
|
3437
|
-
];
|
|
3438
|
-
};
|
|
3439
|
-
|
|
3440
|
-
const ctx = makeCtx({ agentLoopRun });
|
|
2602
|
+
// GIVEN a real loop that answers with a single finalized assistant turn
|
|
2603
|
+
const ctx = makeCtx({ providerResponses: [textResponse("quiet")] });
|
|
3441
2604
|
await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
|
|
3442
2605
|
|
|
3443
2606
|
expect(projectAssistantMessageMock).toHaveBeenCalledTimes(1);
|
|
@@ -3462,40 +2625,33 @@ describe("session-agent-loop", () => {
|
|
|
3462
2625
|
// Indexer/projector mocks default to no-op; no finalized row in this
|
|
3463
2626
|
// test, so `mockMessageById` stays null.
|
|
3464
2627
|
|
|
3465
|
-
|
|
3466
|
-
|
|
3467
|
-
|
|
3468
|
-
|
|
3469
|
-
|
|
3470
|
-
|
|
3471
|
-
|
|
3472
|
-
|
|
3473
|
-
|
|
3474
|
-
|
|
3475
|
-
|
|
3476
|
-
|
|
3477
|
-
content: [{ type: "text", text: "retry succeeded" }],
|
|
3478
|
-
},
|
|
3479
|
-
});
|
|
3480
|
-
onEvent({
|
|
3481
|
-
type: "usage",
|
|
3482
|
-
inputTokens: 5,
|
|
3483
|
-
outputTokens: 3,
|
|
3484
|
-
model: "test",
|
|
3485
|
-
providerDurationMs: 25,
|
|
3486
|
-
});
|
|
3487
|
-
return [
|
|
3488
|
-
...messages,
|
|
3489
|
-
{
|
|
3490
|
-
role: "assistant" as const,
|
|
3491
|
-
content: [
|
|
3492
|
-
{ type: "text", text: "retry succeeded" },
|
|
3493
|
-
] as ContentBlock[],
|
|
3494
|
-
},
|
|
3495
|
-
];
|
|
3496
|
-
};
|
|
2628
|
+
// A single reducer tier converges the oversized context so the
|
|
2629
|
+
// orchestrator re-enters the loop after the first call fails.
|
|
2630
|
+
mockReducerStepFn = (msgs: Message[]) => ({
|
|
2631
|
+
messages: msgs,
|
|
2632
|
+
tier: "forced_compaction",
|
|
2633
|
+
state: {
|
|
2634
|
+
appliedTiers: ["forced_compaction"],
|
|
2635
|
+
injectionMode: "full",
|
|
2636
|
+
exhausted: false,
|
|
2637
|
+
},
|
|
2638
|
+
estimatedTokens: 5000,
|
|
2639
|
+
});
|
|
3497
2640
|
|
|
3498
|
-
|
|
2641
|
+
// GIVEN a real loop whose first call rejects with context-too-large
|
|
2642
|
+
// (reserving msg-strand-A but never finalizing it), then recovers via
|
|
2643
|
+
// convergence on re-entry. The re-entry's `llm_call_started` must
|
|
2644
|
+
// delete the stranded msg-strand-A before reserving msg-strand-B.
|
|
2645
|
+
const ctx = makeCtx({
|
|
2646
|
+
providerResponses: [
|
|
2647
|
+
new Error("context_length_exceeded"),
|
|
2648
|
+
textResponse("retry succeeded"),
|
|
2649
|
+
],
|
|
2650
|
+
contextWindowManager: {
|
|
2651
|
+
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
2652
|
+
maybeCompact: async () => ({ compacted: false }),
|
|
2653
|
+
} as unknown as AgentLoopConversationContext["contextWindowManager"],
|
|
2654
|
+
});
|
|
3499
2655
|
await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
|
|
3500
2656
|
|
|
3501
2657
|
// Exactly one delete fires — for msg-strand-A, before the second
|
|
@@ -3523,27 +2679,20 @@ describe("session-agent-loop", () => {
|
|
|
3523
2679
|
id: "msg-orphaned-reservation",
|
|
3524
2680
|
}));
|
|
3525
2681
|
|
|
3526
|
-
|
|
3527
|
-
|
|
3528
|
-
|
|
3529
|
-
|
|
3530
|
-
|
|
3531
|
-
|
|
3532
|
-
|
|
3533
|
-
|
|
3534
|
-
|
|
3535
|
-
|
|
3536
|
-
|
|
3537
|
-
|
|
3538
|
-
|
|
3539
|
-
|
|
3540
|
-
});
|
|
3541
|
-
// No assistant message in the result — the synthetic-error branch
|
|
3542
|
-
// below the agent loop fires.
|
|
3543
|
-
return messages;
|
|
3544
|
-
};
|
|
3545
|
-
|
|
3546
|
-
const ctx = makeCtx({ agentLoopRun });
|
|
2682
|
+
// GIVEN a real loop that reserves an assistant row at
|
|
2683
|
+
// `llm_call_started`, then whose provider rejects: the loop emits
|
|
2684
|
+
// `provider_error` (writing the llm_request_log row) and `error`
|
|
2685
|
+
// (arming `state.providerErrorUserMessage`), exiting with no
|
|
2686
|
+
// `message_complete` so the synthetic-error branch below the loop
|
|
2687
|
+
// fires.
|
|
2688
|
+
const ctx = makeCtx({
|
|
2689
|
+
loopProvider: {
|
|
2690
|
+
name: "mock-provider",
|
|
2691
|
+
async sendMessage() {
|
|
2692
|
+
throw new Error("upstream 500");
|
|
2693
|
+
},
|
|
2694
|
+
} as unknown as Provider,
|
|
2695
|
+
});
|
|
3547
2696
|
await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
|
|
3548
2697
|
|
|
3549
2698
|
// The orphan was deleted exactly once, before the synthetic error
|
|
@@ -3599,40 +2748,23 @@ describe("session-agent-loop", () => {
|
|
|
3599
2748
|
metadata: null,
|
|
3600
2749
|
};
|
|
3601
2750
|
|
|
3602
|
-
|
|
3603
|
-
|
|
3604
|
-
|
|
3605
|
-
|
|
3606
|
-
|
|
3607
|
-
|
|
3608
|
-
|
|
3609
|
-
|
|
3610
|
-
|
|
3611
|
-
|
|
3612
|
-
|
|
3613
|
-
|
|
3614
|
-
content: [{ type: "text", text: "Hello, world." }],
|
|
3615
|
-
},
|
|
3616
|
-
});
|
|
3617
|
-
onEvent({
|
|
3618
|
-
type: "usage",
|
|
3619
|
-
inputTokens: 10,
|
|
3620
|
-
outputTokens: 5,
|
|
3621
|
-
model: "test",
|
|
3622
|
-
providerDurationMs: 50,
|
|
3623
|
-
});
|
|
3624
|
-
return [
|
|
3625
|
-
...messages,
|
|
3626
|
-
{
|
|
3627
|
-
role: "assistant" as const,
|
|
3628
|
-
content: [
|
|
3629
|
-
{ type: "text", text: "Hello, world." },
|
|
3630
|
-
] as ContentBlock[],
|
|
2751
|
+
// GIVEN a real loop whose provider streams two small deltas (each under
|
|
2752
|
+
// the 1024-char size gate) then holds the turn open past the 250ms
|
|
2753
|
+
// debounce window before completing, so a single debounced partial
|
|
2754
|
+
// flush lands before `message_complete`.
|
|
2755
|
+
const ctx = makeCtx({
|
|
2756
|
+
loopProvider: {
|
|
2757
|
+
name: "mock-provider",
|
|
2758
|
+
async sendMessage(_messages, options) {
|
|
2759
|
+
options?.onEvent?.({ type: "text_delta", text: "Hello, " });
|
|
2760
|
+
options?.onEvent?.({ type: "text_delta", text: "world." });
|
|
2761
|
+
await new Promise((resolve) => setTimeout(resolve, 1100));
|
|
2762
|
+
return textResponse("Hello, world.");
|
|
3631
2763
|
},
|
|
3632
|
-
|
|
3633
|
-
};
|
|
2764
|
+
},
|
|
2765
|
+
});
|
|
3634
2766
|
|
|
3635
|
-
|
|
2767
|
+
// WHEN the orchestrator runs the turn to completion
|
|
3636
2768
|
await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
|
|
3637
2769
|
|
|
3638
2770
|
// Exactly two `updateContent` calls land:
|
|
@@ -3668,70 +2800,38 @@ describe("session-agent-loop", () => {
|
|
|
3668
2800
|
metadata: null,
|
|
3669
2801
|
};
|
|
3670
2802
|
|
|
3671
|
-
|
|
3672
|
-
|
|
3673
|
-
|
|
3674
|
-
|
|
3675
|
-
|
|
3676
|
-
|
|
3677
|
-
|
|
3678
|
-
|
|
3679
|
-
|
|
3680
|
-
input: { path: "/foo" },
|
|
3681
|
-
});
|
|
3682
|
-
// Yield a microtask so any (incorrectly) fire-and-forget
|
|
3683
|
-
// pipeline call has a chance to land before message_complete.
|
|
3684
|
-
await new Promise((resolve) => setImmediate(resolve));
|
|
3685
|
-
onEvent({
|
|
3686
|
-
type: "tool_result",
|
|
3687
|
-
toolUseId: "tu-no-flush",
|
|
3688
|
-
content: "ok",
|
|
3689
|
-
isError: false,
|
|
3690
|
-
});
|
|
3691
|
-
await onEvent({
|
|
3692
|
-
type: "message_complete",
|
|
3693
|
-
message: {
|
|
3694
|
-
role: "assistant",
|
|
3695
|
-
content: [
|
|
3696
|
-
{
|
|
3697
|
-
type: "tool_use",
|
|
3698
|
-
id: "tu-no-flush",
|
|
3699
|
-
name: "file_read",
|
|
3700
|
-
input: { path: "/foo" },
|
|
3701
|
-
},
|
|
3702
|
-
],
|
|
3703
|
-
},
|
|
3704
|
-
});
|
|
3705
|
-
onEvent({
|
|
3706
|
-
type: "usage",
|
|
3707
|
-
inputTokens: 10,
|
|
3708
|
-
outputTokens: 5,
|
|
3709
|
-
model: "test",
|
|
3710
|
-
providerDurationMs: 50,
|
|
3711
|
-
});
|
|
3712
|
-
return [
|
|
3713
|
-
...messages,
|
|
2803
|
+
// GIVEN a real loop that runs one tool turn — the loop emits `tool_use`
|
|
2804
|
+
// strictly AFTER `message_complete` — and then answers with a final
|
|
2805
|
+
// text turn. The tool executor returns immediately.
|
|
2806
|
+
const ctx = makeCtx({
|
|
2807
|
+
providerResponses: [
|
|
2808
|
+
toolUseResponse("tu-no-flush", "file_read", { path: "/foo" }),
|
|
2809
|
+
textResponse("done"),
|
|
2810
|
+
],
|
|
2811
|
+
loopTools: [
|
|
3714
2812
|
{
|
|
3715
|
-
|
|
3716
|
-
|
|
3717
|
-
|
|
3718
|
-
|
|
3719
|
-
|
|
3720
|
-
|
|
3721
|
-
input: { path: "/foo" },
|
|
3722
|
-
},
|
|
3723
|
-
] as ContentBlock[],
|
|
2813
|
+
name: "file_read",
|
|
2814
|
+
description: "Read a file",
|
|
2815
|
+
input_schema: {
|
|
2816
|
+
type: "object",
|
|
2817
|
+
properties: { path: { type: "string" } },
|
|
2818
|
+
},
|
|
3724
2819
|
},
|
|
3725
|
-
]
|
|
3726
|
-
|
|
2820
|
+
],
|
|
2821
|
+
toolExecutor: async () => ({ content: "ok", isError: false }),
|
|
2822
|
+
});
|
|
3727
2823
|
|
|
3728
|
-
|
|
2824
|
+
// WHEN the orchestrator runs the turn to completion
|
|
3729
2825
|
await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
|
|
3730
2826
|
|
|
3731
|
-
//
|
|
3732
|
-
//
|
|
3733
|
-
//
|
|
3734
|
-
|
|
2827
|
+
// Four authoritative writes land and no stray partial flush:
|
|
2828
|
+
// - one final flush per `message_complete` (the tool turn and the final
|
|
2829
|
+
// text turn), plus
|
|
2830
|
+
// - two grouped tool-result user-row writes (persist-on-arrival and the
|
|
2831
|
+
// turn-boundary finalize).
|
|
2832
|
+
// `handleToolUse` contributes no partial flush of its own; one would make
|
|
2833
|
+
// this 5. That stray flush is the regression this test guards against.
|
|
2834
|
+
expect(updateMessageContentMock).toHaveBeenCalledTimes(4);
|
|
3735
2835
|
});
|
|
3736
2836
|
|
|
3737
2837
|
test("handleMessageComplete clears any pending debounce timer before the final flush", async () => {
|
|
@@ -3744,45 +2844,53 @@ describe("session-agent-loop", () => {
|
|
|
3744
2844
|
metadata: null,
|
|
3745
2845
|
};
|
|
3746
2846
|
|
|
3747
|
-
|
|
3748
|
-
|
|
3749
|
-
|
|
3750
|
-
|
|
3751
|
-
|
|
3752
|
-
|
|
3753
|
-
|
|
3754
|
-
|
|
3755
|
-
|
|
3756
|
-
|
|
3757
|
-
|
|
2847
|
+
// GIVEN a real loop whose first turn streams a short delta (scheduling a
|
|
2848
|
+
// debounce timer) and completes as a tool turn — so `message_complete`
|
|
2849
|
+
// arrives before the 250ms timer and clears it. The tool executor then
|
|
2850
|
+
// holds the loop open well past the original debounce window, proving a
|
|
2851
|
+
// late timer does NOT fire a stray partial flush, before a final text
|
|
2852
|
+
// turn ends the run.
|
|
2853
|
+
const ctx = makeCtx({
|
|
2854
|
+
providerResponses: [
|
|
2855
|
+
{
|
|
2856
|
+
content: [
|
|
2857
|
+
{ type: "text", text: "Quick reply." },
|
|
2858
|
+
{
|
|
2859
|
+
type: "tool_use",
|
|
2860
|
+
id: "tu-keep-alive",
|
|
2861
|
+
name: "file_read",
|
|
2862
|
+
input: {},
|
|
2863
|
+
},
|
|
2864
|
+
],
|
|
2865
|
+
model: "mock-model",
|
|
2866
|
+
usage: { inputTokens: 10, outputTokens: 5 },
|
|
2867
|
+
stopReason: "tool_use",
|
|
3758
2868
|
},
|
|
3759
|
-
|
|
3760
|
-
|
|
3761
|
-
|
|
3762
|
-
inputTokens: 10,
|
|
3763
|
-
outputTokens: 5,
|
|
3764
|
-
model: "test",
|
|
3765
|
-
providerDurationMs: 50,
|
|
3766
|
-
});
|
|
3767
|
-
// Wait past the original debounce window to prove a late timer
|
|
3768
|
-
// does NOT fire a stray partial flush.
|
|
3769
|
-
await new Promise((resolve) => setTimeout(resolve, 1100));
|
|
3770
|
-
return [
|
|
3771
|
-
...messages,
|
|
2869
|
+
textResponse("done"),
|
|
2870
|
+
],
|
|
2871
|
+
loopTools: [
|
|
3772
2872
|
{
|
|
3773
|
-
|
|
3774
|
-
|
|
2873
|
+
name: "file_read",
|
|
2874
|
+
description: "Read a file",
|
|
2875
|
+
input_schema: { type: "object", properties: {} },
|
|
3775
2876
|
},
|
|
3776
|
-
]
|
|
3777
|
-
|
|
2877
|
+
],
|
|
2878
|
+
toolExecutor: async () => {
|
|
2879
|
+
await new Promise((resolve) => setTimeout(resolve, 1100));
|
|
2880
|
+
return { content: "ok", isError: false };
|
|
2881
|
+
},
|
|
2882
|
+
});
|
|
3778
2883
|
|
|
3779
|
-
|
|
2884
|
+
// WHEN the orchestrator runs the turn to completion
|
|
3780
2885
|
await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
|
|
3781
2886
|
|
|
3782
|
-
//
|
|
3783
|
-
//
|
|
3784
|
-
//
|
|
3785
|
-
|
|
2887
|
+
// Four authoritative writes land: one final flush per `message_complete`
|
|
2888
|
+
// (the tool turn and the final text turn) plus two grouped tool-result
|
|
2889
|
+
// user-row writes (persist-on-arrival and the turn-boundary finalize).
|
|
2890
|
+
// The debounced partial would have fired around T+250ms — during the tool
|
|
2891
|
+
// executor's hold — but the timer-clear at the top of
|
|
2892
|
+
// `handleMessageComplete` cancels it, so no stray fifth flush appears.
|
|
2893
|
+
expect(updateMessageContentMock).toHaveBeenCalledTimes(4);
|
|
3786
2894
|
});
|
|
3787
2895
|
|
|
3788
2896
|
test("partial flushes never trigger the indexer or attention projector", async () => {
|
|
@@ -3795,54 +2903,29 @@ describe("session-agent-loop", () => {
|
|
|
3795
2903
|
metadata: null,
|
|
3796
2904
|
};
|
|
3797
2905
|
|
|
3798
|
-
|
|
3799
|
-
|
|
3800
|
-
|
|
3801
|
-
|
|
3802
|
-
|
|
3803
|
-
|
|
3804
|
-
|
|
3805
|
-
|
|
3806
|
-
|
|
3807
|
-
|
|
3808
|
-
|
|
3809
|
-
|
|
3810
|
-
|
|
3811
|
-
|
|
3812
|
-
|
|
3813
|
-
|
|
3814
|
-
indexerCallsBeforeComplete,
|
|
3815
|
-
projectorCallsBeforeComplete,
|
|
3816
|
-
];
|
|
3817
|
-
await onEvent({
|
|
3818
|
-
type: "message_complete",
|
|
3819
|
-
message: {
|
|
3820
|
-
role: "assistant",
|
|
3821
|
-
content: [{ type: "text", text: "hello world" }],
|
|
3822
|
-
},
|
|
3823
|
-
});
|
|
3824
|
-
onEvent({
|
|
3825
|
-
type: "usage",
|
|
3826
|
-
inputTokens: 10,
|
|
3827
|
-
outputTokens: 5,
|
|
3828
|
-
model: "test",
|
|
3829
|
-
providerDurationMs: 50,
|
|
3830
|
-
});
|
|
3831
|
-
return [
|
|
3832
|
-
...messages,
|
|
3833
|
-
{
|
|
3834
|
-
role: "assistant" as const,
|
|
3835
|
-
content: [{ type: "text", text: "hello world" }] as ContentBlock[],
|
|
2906
|
+
// GIVEN a real loop whose provider streams a delta then holds the turn
|
|
2907
|
+
// open past the 250ms debounce window so the partial flush lands BEFORE
|
|
2908
|
+
// `message_complete`. The indexer/projector counts are snapshotted at
|
|
2909
|
+
// that mid-turn point (after the partial flush, before completion).
|
|
2910
|
+
let snapshot: [number, number] | undefined;
|
|
2911
|
+
const ctx = makeCtx({
|
|
2912
|
+
loopProvider: {
|
|
2913
|
+
name: "mock-provider",
|
|
2914
|
+
async sendMessage(_messages, options) {
|
|
2915
|
+
options?.onEvent?.({ type: "text_delta", text: "hello world" });
|
|
2916
|
+
await new Promise((resolve) => setTimeout(resolve, 1100));
|
|
2917
|
+
snapshot = [
|
|
2918
|
+
indexMessageNowMock.mock.calls.length,
|
|
2919
|
+
projectAssistantMessageMock.mock.calls.length,
|
|
2920
|
+
];
|
|
2921
|
+
return textResponse("hello world");
|
|
3836
2922
|
},
|
|
3837
|
-
|
|
3838
|
-
};
|
|
2923
|
+
},
|
|
2924
|
+
});
|
|
3839
2925
|
|
|
3840
|
-
|
|
2926
|
+
// WHEN the orchestrator runs the turn to completion
|
|
3841
2927
|
await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
|
|
3842
2928
|
|
|
3843
|
-
const snapshot = (
|
|
3844
|
-
ctx as unknown as { __partialSnapshot?: [number, number] }
|
|
3845
|
-
).__partialSnapshot;
|
|
3846
2929
|
expect(snapshot).toBeDefined();
|
|
3847
2930
|
// Indexer + projector were both ZERO during the mid-turn partial
|
|
3848
2931
|
// flush — they only fire from `handleMessageComplete` after the
|
|
@@ -3870,35 +2953,21 @@ describe("session-agent-loop", () => {
|
|
|
3870
2953
|
const ghToken = "ghp_" + "a".repeat(36);
|
|
3871
2954
|
const payload = "Here's the key: " + ghToken + " enjoy.";
|
|
3872
2955
|
|
|
3873
|
-
|
|
3874
|
-
|
|
3875
|
-
|
|
3876
|
-
|
|
3877
|
-
|
|
3878
|
-
|
|
3879
|
-
|
|
3880
|
-
|
|
3881
|
-
|
|
3882
|
-
|
|
3883
|
-
},
|
|
3884
|
-
});
|
|
3885
|
-
onEvent({
|
|
3886
|
-
type: "usage",
|
|
3887
|
-
inputTokens: 10,
|
|
3888
|
-
outputTokens: 5,
|
|
3889
|
-
model: "test",
|
|
3890
|
-
providerDurationMs: 50,
|
|
3891
|
-
});
|
|
3892
|
-
return [
|
|
3893
|
-
...messages,
|
|
3894
|
-
{
|
|
3895
|
-
role: "assistant" as const,
|
|
3896
|
-
content: [{ type: "text", text: payload }] as ContentBlock[],
|
|
2956
|
+
// GIVEN a real loop whose provider streams the PAT-bearing payload as a
|
|
2957
|
+
// delta then holds the turn open past the 250ms debounce window so the
|
|
2958
|
+
// partial flush lands before `message_complete`.
|
|
2959
|
+
const ctx = makeCtx({
|
|
2960
|
+
loopProvider: {
|
|
2961
|
+
name: "mock-provider",
|
|
2962
|
+
async sendMessage(_messages, options) {
|
|
2963
|
+
options?.onEvent?.({ type: "text_delta", text: payload });
|
|
2964
|
+
await new Promise((resolve) => setTimeout(resolve, 1100));
|
|
2965
|
+
return textResponse(payload);
|
|
3897
2966
|
},
|
|
3898
|
-
|
|
3899
|
-
};
|
|
2967
|
+
},
|
|
2968
|
+
});
|
|
3900
2969
|
|
|
3901
|
-
|
|
2970
|
+
// WHEN the orchestrator runs the turn to completion
|
|
3902
2971
|
await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
|
|
3903
2972
|
|
|
3904
2973
|
expect(updateMessageContentMock).toHaveBeenCalledTimes(2);
|
|
@@ -3922,26 +2991,21 @@ describe("session-agent-loop", () => {
|
|
|
3922
2991
|
id: "msg-orphan-with-partial",
|
|
3923
2992
|
}));
|
|
3924
2993
|
|
|
3925
|
-
|
|
3926
|
-
|
|
3927
|
-
|
|
3928
|
-
|
|
3929
|
-
|
|
3930
|
-
|
|
3931
|
-
|
|
3932
|
-
|
|
3933
|
-
|
|
3934
|
-
|
|
3935
|
-
|
|
3936
|
-
}
|
|
3937
|
-
|
|
3938
|
-
type: "error",
|
|
3939
|
-
error: new Error("upstream 500"),
|
|
3940
|
-
});
|
|
3941
|
-
return messages;
|
|
3942
|
-
};
|
|
2994
|
+
// GIVEN a real loop whose provider streams a delta — landing a debounced
|
|
2995
|
+
// partial flush on the reserved row — then rejects, so the loop emits
|
|
2996
|
+
// `provider_error` and `error` and exits with no `message_complete`.
|
|
2997
|
+
const ctx = makeCtx({
|
|
2998
|
+
loopProvider: {
|
|
2999
|
+
name: "mock-provider",
|
|
3000
|
+
async sendMessage(_messages, options) {
|
|
3001
|
+
options?.onEvent?.({ type: "text_delta", text: "hello world" });
|
|
3002
|
+
await new Promise((resolve) => setTimeout(resolve, 1100));
|
|
3003
|
+
throw new Error("upstream 500");
|
|
3004
|
+
},
|
|
3005
|
+
},
|
|
3006
|
+
});
|
|
3943
3007
|
|
|
3944
|
-
|
|
3008
|
+
// WHEN the orchestrator runs the turn
|
|
3945
3009
|
await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
|
|
3946
3010
|
|
|
3947
3011
|
// Partial flush fired exactly once (before the provider error).
|
|
@@ -4442,51 +3506,32 @@ describe("session-agent-loop", () => {
|
|
|
4442
3506
|
compactableStartIndex: 0,
|
|
4443
3507
|
};
|
|
4444
3508
|
|
|
4445
|
-
const rawMidLoopBasis: Message[] = [
|
|
4446
|
-
{
|
|
4447
|
-
role: "user",
|
|
4448
|
-
content: [{ type: "text", text: "fresh DB basis user row" }],
|
|
4449
|
-
},
|
|
4450
|
-
{
|
|
4451
|
-
role: "assistant",
|
|
4452
|
-
content: [{ type: "text", text: "partial assistant response" }],
|
|
4453
|
-
},
|
|
4454
|
-
];
|
|
4455
3509
|
const maybeCompactInputs: Message[][] = [];
|
|
4456
|
-
let runCount = 0;
|
|
4457
|
-
const agentLoopRun: AgentLoopRun = async (
|
|
4458
|
-
messages,
|
|
4459
|
-
_onEvent,
|
|
4460
|
-
options,
|
|
4461
|
-
) => {
|
|
4462
|
-
runCount++;
|
|
4463
|
-
if (runCount === 1) {
|
|
4464
|
-
// The loop reaches its mid-loop budget checkpoint with the raw
|
|
4465
|
-
// persistent basis as its in-loop history; the wrapped onCheckpoint
|
|
4466
|
-
// trips the gate and runs inline compaction over that basis.
|
|
4467
|
-
mockEstimateTokens = 90_000;
|
|
4468
|
-
const decision = await options?.onCheckpoint?.({
|
|
4469
|
-
turnIndex: 0,
|
|
4470
|
-
toolCount: 1,
|
|
4471
|
-
hasToolUse: true,
|
|
4472
|
-
history: rawMidLoopBasis,
|
|
4473
|
-
});
|
|
4474
|
-
mockEstimateTokens = 1000;
|
|
4475
|
-
if (decision !== "continue") {
|
|
4476
|
-
return rawMidLoopBasis;
|
|
4477
|
-
}
|
|
4478
|
-
}
|
|
4479
|
-
return [
|
|
4480
|
-
...messages,
|
|
4481
|
-
{
|
|
4482
|
-
role: "assistant" as const,
|
|
4483
|
-
content: [{ type: "text" as const, text: "final response" }],
|
|
4484
|
-
},
|
|
4485
|
-
];
|
|
4486
|
-
};
|
|
4487
3510
|
|
|
3511
|
+
// AND a real loop that runs one tool turn and then a final text turn.
|
|
3512
|
+
// The tool executor raises the token estimate above the mid-loop budget
|
|
3513
|
+
// threshold so the loop compacts in place at the post-tool checkpoint —
|
|
3514
|
+
// over its own in-loop history, which does not match the loaded Slack
|
|
3515
|
+
// rows.
|
|
4488
3516
|
const ctx = makeCtx({
|
|
4489
|
-
|
|
3517
|
+
providerResponses: [
|
|
3518
|
+
toolUseResponse("tu-mid-loop", "file_read", { path: "/foo" }),
|
|
3519
|
+
textResponse("final response"),
|
|
3520
|
+
],
|
|
3521
|
+
loopTools: [
|
|
3522
|
+
{
|
|
3523
|
+
name: "file_read",
|
|
3524
|
+
description: "Read a file",
|
|
3525
|
+
input_schema: {
|
|
3526
|
+
type: "object",
|
|
3527
|
+
properties: { path: { type: "string" } },
|
|
3528
|
+
},
|
|
3529
|
+
},
|
|
3530
|
+
],
|
|
3531
|
+
toolExecutor: async () => {
|
|
3532
|
+
mockEstimateTokens = 90_000;
|
|
3533
|
+
return { content: "ok", isError: false };
|
|
3534
|
+
},
|
|
4490
3535
|
channelCapabilities: {
|
|
4491
3536
|
channel: "slack",
|
|
4492
3537
|
dashboardCapable: false,
|
|
@@ -4523,6 +3568,9 @@ describe("session-agent-loop", () => {
|
|
|
4523
3568
|
summaryText: "",
|
|
4524
3569
|
};
|
|
4525
3570
|
}
|
|
3571
|
+
// The mid-loop gate compacted its in-loop basis; drop the estimate
|
|
3572
|
+
// back under budget so the post-compaction provider call proceeds.
|
|
3573
|
+
mockEstimateTokens = 1000;
|
|
4526
3574
|
return {
|
|
4527
3575
|
compacted: true,
|
|
4528
3576
|
messages: [
|
|
@@ -4551,7 +3599,9 @@ describe("session-agent-loop", () => {
|
|
|
4551
3599
|
await runAgentLoopImpl(ctx, "next reply", "user-msg-mid-loop", () => {});
|
|
4552
3600
|
|
|
4553
3601
|
expect(maybeCompactInputs[0]).toBe(renderedSlackMessages);
|
|
4554
|
-
|
|
3602
|
+
// The mid-loop gate compacts the loop's own in-loop history, never the
|
|
3603
|
+
// loaded Slack rows — the mismatch this test guards against.
|
|
3604
|
+
expect(maybeCompactInputs[1]).not.toBe(renderedSlackMessages);
|
|
4555
3605
|
expect(getSlackCompactionWatermarkForPrefixMock).toHaveBeenCalledWith(
|
|
4556
3606
|
null,
|
|
4557
3607
|
2,
|
|
@@ -4824,67 +3874,32 @@ describe("session-agent-loop", () => {
|
|
|
4824
3874
|
estimatedTokens: 5000,
|
|
4825
3875
|
});
|
|
4826
3876
|
|
|
4827
|
-
|
|
4828
|
-
|
|
4829
|
-
|
|
4830
|
-
|
|
4831
|
-
|
|
4832
|
-
|
|
4833
|
-
|
|
4834
|
-
|
|
4835
|
-
|
|
4836
|
-
|
|
4837
|
-
|
|
4838
|
-
|
|
4839
|
-
type: "error",
|
|
4840
|
-
error: new Error("context_length_exceeded"),
|
|
4841
|
-
});
|
|
4842
|
-
onEvent({
|
|
4843
|
-
type: "usage",
|
|
4844
|
-
inputTokens: 100,
|
|
4845
|
-
outputTokens: 0,
|
|
4846
|
-
model: "test-model",
|
|
4847
|
-
providerDurationMs: 50,
|
|
4848
|
-
});
|
|
4849
|
-
return [
|
|
4850
|
-
...messages,
|
|
4851
|
-
{
|
|
4852
|
-
role: "assistant" as const,
|
|
4853
|
-
content: [{ type: "text", text: "partial" }] as ContentBlock[],
|
|
4854
|
-
},
|
|
4855
|
-
];
|
|
4856
|
-
}
|
|
4857
|
-
onEvent({
|
|
4858
|
-
type: "message_complete",
|
|
4859
|
-
message: {
|
|
4860
|
-
role: "assistant",
|
|
4861
|
-
content: [{ type: "text", text: "recovered" }],
|
|
4862
|
-
},
|
|
4863
|
-
});
|
|
4864
|
-
onEvent({
|
|
4865
|
-
type: "usage",
|
|
4866
|
-
inputTokens: 50,
|
|
4867
|
-
outputTokens: 25,
|
|
4868
|
-
model: "test-model",
|
|
4869
|
-
providerDurationMs: 100,
|
|
4870
|
-
});
|
|
4871
|
-
return [
|
|
4872
|
-
...messages,
|
|
3877
|
+
// GIVEN a real loop that appends a tool turn (so the run reports
|
|
3878
|
+
// `appendedNewMessages`) and then rejects with a context-too-large
|
|
3879
|
+
// error on the following call — the orchestrator strips that appended
|
|
3880
|
+
// history during its bounded convergence path before a final call
|
|
3881
|
+
// recovers.
|
|
3882
|
+
const ctx = makeCtx({
|
|
3883
|
+
providerResponses: [
|
|
3884
|
+
toolUseResponse("t1", "file_read", {}),
|
|
3885
|
+
new Error("context_length_exceeded"),
|
|
3886
|
+
textResponse("recovered"),
|
|
3887
|
+
],
|
|
3888
|
+
loopTools: [
|
|
4873
3889
|
{
|
|
4874
|
-
|
|
4875
|
-
|
|
3890
|
+
name: "file_read",
|
|
3891
|
+
description: "Read a file",
|
|
3892
|
+
input_schema: { type: "object", properties: {} },
|
|
4876
3893
|
},
|
|
4877
|
-
]
|
|
4878
|
-
|
|
4879
|
-
|
|
4880
|
-
const ctx = makeCtx({
|
|
4881
|
-
agentLoopRun,
|
|
3894
|
+
],
|
|
3895
|
+
toolExecutor: async () => ({ content: "ok", isError: false }),
|
|
4882
3896
|
contextWindowManager: {
|
|
4883
3897
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
4884
3898
|
maybeCompact: async () => ({ compacted: false }),
|
|
4885
3899
|
} as unknown as AgentLoopConversationContext["contextWindowManager"],
|
|
4886
3900
|
});
|
|
4887
3901
|
|
|
3902
|
+
// WHEN the orchestrator runs the turn to completion
|
|
4888
3903
|
await runAgentLoopImpl(ctx, "hello", "msg-1", () => {});
|
|
4889
3904
|
|
|
4890
3905
|
const stripCalls = setConversationHistoryStrippedAtMock.mock.calls.filter(
|
|
@@ -4909,59 +3924,24 @@ describe("session-agent-loop", () => {
|
|
|
4909
3924
|
estimatedTokens: 5000,
|
|
4910
3925
|
});
|
|
4911
3926
|
|
|
4912
|
-
|
|
4913
|
-
|
|
4914
|
-
|
|
4915
|
-
|
|
4916
|
-
|
|
4917
|
-
|
|
4918
|
-
|
|
4919
|
-
|
|
4920
|
-
|
|
4921
|
-
|
|
4922
|
-
|
|
4923
|
-
error: new Error("context_length_exceeded"),
|
|
4924
|
-
});
|
|
4925
|
-
onEvent({
|
|
4926
|
-
type: "usage",
|
|
4927
|
-
inputTokens: 100,
|
|
4928
|
-
outputTokens: 0,
|
|
4929
|
-
model: "test-model",
|
|
4930
|
-
providerDurationMs: 50,
|
|
4931
|
-
});
|
|
4932
|
-
return [
|
|
4933
|
-
...messages,
|
|
4934
|
-
{
|
|
4935
|
-
role: "assistant" as const,
|
|
4936
|
-
content: [{ type: "text", text: "partial" }] as ContentBlock[],
|
|
4937
|
-
},
|
|
4938
|
-
];
|
|
4939
|
-
}
|
|
4940
|
-
onEvent({
|
|
4941
|
-
type: "message_complete",
|
|
4942
|
-
message: {
|
|
4943
|
-
role: "assistant",
|
|
4944
|
-
content: [{ type: "text", text: "recovered" }],
|
|
4945
|
-
},
|
|
4946
|
-
});
|
|
4947
|
-
onEvent({
|
|
4948
|
-
type: "usage",
|
|
4949
|
-
inputTokens: 50,
|
|
4950
|
-
outputTokens: 25,
|
|
4951
|
-
model: "test-model",
|
|
4952
|
-
providerDurationMs: 100,
|
|
4953
|
-
});
|
|
4954
|
-
return [
|
|
4955
|
-
...messages,
|
|
3927
|
+
// GIVEN a real loop that appends a tool turn and then rejects with a
|
|
3928
|
+
// context-too-large error on the following call, driving the
|
|
3929
|
+
// convergence strip whose marker-write helper is stubbed to throw,
|
|
3930
|
+
// before a final call recovers.
|
|
3931
|
+
const ctx = makeCtx({
|
|
3932
|
+
providerResponses: [
|
|
3933
|
+
toolUseResponse("t1", "file_read", {}),
|
|
3934
|
+
new Error("context_length_exceeded"),
|
|
3935
|
+
textResponse("recovered"),
|
|
3936
|
+
],
|
|
3937
|
+
loopTools: [
|
|
4956
3938
|
{
|
|
4957
|
-
|
|
4958
|
-
|
|
3939
|
+
name: "file_read",
|
|
3940
|
+
description: "Read a file",
|
|
3941
|
+
input_schema: { type: "object", properties: {} },
|
|
4959
3942
|
},
|
|
4960
|
-
]
|
|
4961
|
-
|
|
4962
|
-
|
|
4963
|
-
const ctx = makeCtx({
|
|
4964
|
-
agentLoopRun,
|
|
3943
|
+
],
|
|
3944
|
+
toolExecutor: async () => ({ content: "ok", isError: false }),
|
|
4965
3945
|
contextWindowManager: {
|
|
4966
3946
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
4967
3947
|
maybeCompact: async () => ({ compacted: false }),
|