@vellumai/assistant 0.8.7 → 0.8.8-dev.202606052332.17fc8ea
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile +20 -4
- package/bun.lock +2 -2
- package/docker-entrypoint.sh +4 -2
- package/docker-init-apt-root.sh +3 -1
- package/docker-kata-apt-env.sh +3 -1
- package/docker-kata-runtime-family.sh +12 -0
- package/docs/architecture/memory.md +1 -1
- package/examples/plugins/echo/README.md +61 -66
- package/examples/plugins/echo/hooks/post-tool-use.ts +18 -0
- package/examples/plugins/echo/hooks/stop.ts +16 -0
- package/examples/plugins/echo/hooks/user-prompt-submit.ts +18 -0
- package/examples/plugins/echo/package.json +1 -2
- package/examples/plugins/echo/src/emit.ts +19 -0
- package/node_modules/@vellumai/skill-host-contracts/src/server-message.ts +3 -3
- package/node_modules/@vellumai/skill-host-contracts/src/skill-host.ts +7 -6
- package/openapi.yaml +3378 -335
- package/package.json +2 -2
- package/scripts/generate-openapi.ts +68 -41
- package/src/__tests__/agent-loop-exit-reason.test.ts +35 -93
- package/src/__tests__/agent-loop-provider-error-recording.test.ts +1 -1
- package/src/__tests__/agent-loop.test.ts +37 -87
- package/src/__tests__/agent-wake-disk-pressure-callsite.test.ts +2 -0
- package/src/__tests__/annotate-activity-metadata.test.ts +262 -0
- package/src/__tests__/annotate-risk-options.test.ts +2 -3
- package/src/__tests__/anthropic-provider.test.ts +95 -2
- package/src/__tests__/app-control-flow.test.ts +1 -1
- package/src/__tests__/app-dir-path-guard.test.ts +1 -0
- package/src/__tests__/approval-routes-http.test.ts +4 -1
- package/src/__tests__/assistant-event-hub.test.ts +25 -0
- package/src/__tests__/assistant-events-sse-shed.test.ts +8 -0
- package/src/__tests__/{conversation-stream-state.test.ts → assistant-stream-state.test.ts} +252 -91
- package/src/__tests__/auth-fallback-events-store.test.ts +116 -0
- package/src/__tests__/background-workers-disk-pressure.test.ts +6 -0
- package/src/__tests__/btw-routes.test.ts +62 -3
- package/src/__tests__/build-persisted-content.test.ts +184 -0
- package/src/__tests__/catalog-files.test.ts +1 -1
- package/src/__tests__/channel-approval-routes.test.ts +1 -1
- package/src/__tests__/channel-approvals.test.ts +1 -1
- package/src/__tests__/clawhub-files.test.ts +1 -1
- package/src/__tests__/compaction-circuit.test.ts +258 -0
- package/src/__tests__/compaction-direct.test.ts +132 -0
- package/src/__tests__/compaction.benchmark.test.ts +0 -30
- package/src/__tests__/config-watcher.test.ts +1 -1
- package/src/__tests__/conversation-abort-tool-results.test.ts +57 -19
- package/src/__tests__/conversation-agent-loop-disk-pressure.test.ts +6 -5
- package/src/__tests__/conversation-agent-loop-inference-profile.test.ts +10 -7
- package/src/__tests__/conversation-agent-loop-overflow.test.ts +316 -1143
- package/src/__tests__/conversation-agent-loop.test.ts +638 -1655
- package/src/__tests__/conversation-analysis-routes.test.ts +6 -0
- package/src/__tests__/conversation-clean-command.test.ts +5 -2
- package/src/__tests__/conversation-history-web-search.test.ts +11 -1
- package/src/__tests__/conversation-pairing.test.ts +4 -31
- package/src/__tests__/conversation-process-app-control-preactivation.test.ts +6 -0
- package/src/__tests__/conversation-provider-retry-repair.test.ts +30 -10
- package/src/__tests__/conversation-queue.test.ts +2 -0
- package/src/__tests__/conversation-routes-disk-view.test.ts +3 -0
- package/src/__tests__/conversation-routes-slash-commands.test.ts +6 -5
- package/src/__tests__/conversation-runtime-assembly.test.ts +310 -300
- package/src/__tests__/conversation-runtime-workspace.test.ts +105 -45
- package/src/__tests__/conversation-slash-commands.test.ts +8 -42
- package/src/__tests__/conversation-slash-queue.test.ts +6 -1
- package/src/__tests__/conversation-starter-routes.test.ts +14 -6
- package/src/__tests__/conversation-surfaces-action-delivery.test.ts +84 -0
- package/src/__tests__/conversation-sync-tags.test.ts +27 -15
- package/src/__tests__/conversation-title-service.test.ts +135 -2
- package/src/__tests__/conversation-workspace-cache-state.test.ts +17 -16
- package/src/__tests__/conversation-workspace-injection.test.ts +67 -2
- package/src/__tests__/conversation-workspace-tool-tracking.test.ts +7 -6
- package/src/__tests__/conversations-import-system-filter.test.ts +101 -0
- package/src/__tests__/cross-provider-web-search.test.ts +214 -1
- package/src/__tests__/db-acp-history.test.ts +101 -0
- package/src/__tests__/db-schedule-syntax-migration.test.ts +5 -0
- package/src/__tests__/dm-persistence.test.ts +5 -1
- package/src/__tests__/dynamic-page-surface.test.ts +31 -0
- package/src/__tests__/empty-response-hook.test.ts +304 -0
- package/src/__tests__/feature-flag-test-helpers.ts +2 -2
- package/src/__tests__/file-write-tool.test.ts +63 -0
- package/src/__tests__/gateway-only-guard.test.ts +12 -2
- package/src/__tests__/gemini-image-service.test.ts +13 -0
- package/src/__tests__/guardian-grant-minting.test.ts +1 -1
- package/src/__tests__/guardian-routing-invariants.test.ts +2 -4
- package/src/__tests__/handlers-user-message-approval-consumption.test.ts +1 -1
- package/src/__tests__/heartbeat-disk-pressure.test.ts +1 -0
- package/src/__tests__/heartbeat-service.test.ts +1 -0
- package/src/__tests__/helpers/mock-provider.ts +110 -0
- package/src/__tests__/helpers/native-web-search-harness.ts +129 -0
- package/src/__tests__/history-repair-hook.test.ts +1 -0
- package/src/__tests__/host-app-control-routes.test.ts +1 -1
- package/src/__tests__/host-cu-routes-targeted.test.ts +3 -3
- package/src/__tests__/identity-intro-cache.test.ts +12 -100
- package/src/__tests__/identity-routes.test.ts +248 -7
- package/src/__tests__/inbound-slack-persistence.test.ts +5 -1
- package/src/__tests__/injector-background-turn.test.ts +3 -9
- package/src/__tests__/injector-chain.test.ts +139 -275
- package/src/__tests__/injector-disk-pressure.test.ts +75 -41
- package/src/__tests__/injector-document-comments.test.ts +3 -3
- package/src/__tests__/injector-pkb-v2-silenced.test.ts +30 -22
- package/src/__tests__/injector-v3-suppression.test.ts +31 -37
- package/src/__tests__/internal-telemetry-routes.test.ts +109 -0
- package/src/__tests__/list-messages-hidden-metadata.test.ts +38 -0
- package/src/__tests__/list-messages-page-latest.test.ts +60 -0
- package/src/__tests__/list-messages-tool-merge.test.ts +20 -0
- package/src/__tests__/llm-usage-store.test.ts +223 -1
- package/src/__tests__/memory-retrieval-hook.test.ts +297 -0
- package/src/__tests__/memory-v2-static-injector.test.ts +103 -35
- package/src/__tests__/native-web-search.test.ts +191 -0
- package/src/__tests__/onboarding-template-contract.test.ts +2 -0
- package/src/__tests__/openai-image-service.test.ts +17 -0
- package/src/__tests__/openai-provider.test.ts +31 -1
- package/src/__tests__/{overflow-reduce-pipeline.test.ts → overflow-reduction-loop.test.ts} +64 -284
- package/src/__tests__/persist-unsendable-image.test.ts +215 -0
- package/src/__tests__/persistence-secret-redaction.test.ts +1 -0
- package/src/__tests__/pkb-autoinject.test.ts +2 -5
- package/src/__tests__/plugin-api-shim.test.ts +3 -6
- package/src/__tests__/plugin-bootstrap.test.ts +14 -40
- package/src/__tests__/plugin-registry.test.ts +3 -76
- package/src/__tests__/plugin-types.test.ts +0 -193
- package/src/__tests__/process-message-display-content.test.ts +6 -2
- package/src/__tests__/reaction-persistence.test.ts +1 -1
- package/src/__tests__/regenerate-fire-and-forget-trace.test.ts +5 -1
- package/src/__tests__/resolve-trust-class.test.ts +4 -4
- package/src/__tests__/runtime-events-sse-reconnect.test.ts +60 -23
- package/src/__tests__/schedule-routes.test.ts +603 -2
- package/src/__tests__/schedule-store.test.ts +41 -0
- package/src/__tests__/schedule-tools.test.ts +35 -0
- package/src/__tests__/send-endpoint-busy.test.ts +4 -1
- package/src/__tests__/server-history-render.test.ts +314 -1
- package/src/__tests__/skill-feature-flags-integration.test.ts +33 -0
- package/src/__tests__/skillssh-files.test.ts +1 -1
- package/src/__tests__/subagent-call-site-routing.test.ts +1 -1
- package/src/__tests__/subagent-fork-notifications.test.ts +1 -3
- package/src/__tests__/subagent-fork-spawn.test.ts +1 -1
- package/src/__tests__/subagent-manager-notify.test.ts +1 -3
- package/src/__tests__/subagent-notify-parent.test.ts +1 -3
- package/src/__tests__/subagent-spawn-tool-fork.test.ts +1 -1
- package/src/__tests__/system-prompt.test.ts +20 -0
- package/src/__tests__/task-scheduler.test.ts +162 -1
- package/src/__tests__/terminal-tools.test.ts +6 -1
- package/src/__tests__/title-generate-hook.test.ts +319 -0
- package/src/__tests__/tool-error-hook.test.ts +278 -0
- package/src/__tests__/tool-preview-lifecycle.test.ts +468 -5
- package/src/__tests__/tool-result-metadata-plumbing.test.ts +1 -0
- package/src/__tests__/tool-result-truncate-hook.test.ts +127 -0
- package/src/__tests__/tool-result-truncation.test.ts +0 -2
- package/src/__tests__/ui-choice-copy-surfaces.test.ts +254 -0
- package/src/__tests__/ui-work-result-surface.test.ts +159 -0
- package/src/__tests__/usage-routes.test.ts +285 -1
- package/src/__tests__/user-plugin-loader.test.ts +54 -286
- package/src/__tests__/voice-session-bridge.test.ts +6 -3
- package/src/__tests__/web-search-backend-failure.test.ts +166 -0
- package/src/acp/__tests__/agent-process.test.ts +161 -0
- package/src/acp/__tests__/client-handler.test.ts +40 -0
- package/src/acp/__tests__/helpers/acp-history-db.ts +82 -0
- package/src/acp/__tests__/helpers/exec-file-stub.ts +101 -0
- package/src/acp/__tests__/prepare-agent-env.test.ts +137 -0
- package/src/acp/__tests__/session-manager-persistence.test.ts +95 -28
- package/src/acp/__tests__/session-manager-resume.test.ts +736 -0
- package/src/acp/agent-process.ts +61 -1
- package/src/acp/auto-install.test.ts +196 -0
- package/src/acp/auto-install.ts +177 -0
- package/src/acp/client-handler.ts +31 -0
- package/src/acp/feature-gate.test.ts +48 -0
- package/src/acp/feature-gate.ts +34 -0
- package/src/acp/prepare-agent-env.ts +83 -29
- package/src/acp/resolve-agent.test.ts +320 -7
- package/src/acp/resolve-agent.ts +182 -18
- package/src/acp/resume-hint.ts +25 -0
- package/src/acp/session-manager.ts +495 -73
- package/src/acp/types.ts +8 -0
- package/src/agent/compaction-circuit.ts +60 -102
- package/src/agent/loop.ts +362 -485
- package/src/api/events/assistant-thinking-delta.ts +33 -0
- package/src/api/events/tool-output-chunk.ts +45 -0
- package/src/api/events/tool-use-preview-start.ts +32 -0
- package/src/api/events/trace-event.ts +69 -0
- package/src/api/index.ts +48 -13
- package/src/api/responses/conversation-message.ts +374 -0
- package/src/approvals/guardian-request-resolvers.ts +1 -1
- package/src/avatar/__tests__/avatar-store.test.ts +34 -29
- package/src/background-wake/next-wake.ts +1 -0
- package/src/cli/commands/__tests__/notifications.test.ts +58 -14
- package/src/cli/commands/notifications.ts +112 -60
- package/src/config/__tests__/feature-flag-registry-guard.test.ts +2 -2
- package/src/config/acp-defaults.test.ts +10 -0
- package/src/config/acp-defaults.ts +6 -0
- package/src/config/assistant-feature-flags.ts +22 -11
- package/src/config/bundled-skills/acp/SKILL.md +83 -31
- package/src/config/bundled-skills/acp/TOOLS.json +4 -4
- package/src/config/bundled-skills/app-builder/SKILL.md +224 -398
- package/src/config/bundled-skills/app-builder/TOOLS.json +29 -0
- package/src/config/bundled-skills/app-builder/references/DESIGN_SYSTEM.md +48 -0
- package/src/config/bundled-skills/app-builder/references/RESPONSIVE.md +57 -0
- package/src/config/bundled-skills/app-builder/references/SLIDES.md +38 -0
- package/src/config/bundled-skills/app-builder/references/examples/README.md +17 -0
- package/src/config/bundled-skills/app-builder/references/examples/expense-tracker.md +515 -0
- package/src/config/bundled-skills/app-builder/references/examples/focus-timer.md +342 -0
- package/src/config/bundled-skills/app-builder/references/examples/habit-tracker.md +490 -0
- package/src/config/bundled-skills/app-builder/tools/app-list.ts +62 -0
- package/src/config/bundled-skills/document-editor/SKILL.md +28 -23
- package/src/config/bundled-skills/document-editor/TOOLS.json +1 -1
- package/src/config/bundled-skills/messaging/SKILL.md +0 -7
- package/src/config/bundled-tool-registry.ts +2 -0
- package/src/config/feature-flag-cache.ts +3 -3
- package/src/config/feature-flag-registry.json +48 -7
- package/src/config/schemas/__tests__/memory-v2.test.ts +1 -0
- package/src/config/schemas/__tests__/memory-v3.test.ts +25 -0
- package/src/config/schemas/heartbeat.ts +9 -0
- package/src/config/schemas/llm.ts +1 -0
- package/src/config/schemas/memory-v2.ts +8 -0
- package/src/config/schemas/memory-v3.ts +8 -0
- package/src/config/schemas/platform.ts +8 -0
- package/src/config/seed-inference-profiles.ts +2 -2
- package/src/config/skills.ts +13 -0
- package/src/context/compactor.ts +1 -1
- package/src/context/strip-injections.ts +128 -0
- package/src/context/token-estimator.ts +23 -0
- package/src/context/tool-result-truncation.ts +0 -23
- package/src/context/window-manager.ts +5 -7
- package/src/credential-execution/executable-discovery.ts +16 -0
- package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +6 -0
- package/src/daemon/__tests__/inference-profile-notification.test.ts +153 -0
- package/src/daemon/__tests__/native-web-search-metadata.test.ts +10 -8
- package/src/daemon/assistant-attachments.ts +1 -1
- package/src/daemon/config-watcher.ts +2 -2
- package/src/daemon/context-overflow-reducer.ts +0 -1
- package/src/daemon/conversation-agent-loop-handlers.ts +594 -153
- package/src/daemon/conversation-agent-loop.ts +301 -997
- package/src/daemon/conversation-history.ts +5 -4
- package/src/daemon/conversation-lifecycle.ts +3 -4
- package/src/daemon/conversation-messaging.ts +7 -6
- package/src/daemon/conversation-process.ts +11 -16
- package/src/daemon/conversation-registry.ts +159 -0
- package/src/daemon/conversation-runtime-assembly.ts +218 -398
- package/src/daemon/conversation-slash.ts +6 -25
- package/src/daemon/conversation-store.ts +9 -90
- package/src/daemon/conversation-surfaces.ts +222 -4
- package/src/daemon/conversation-tool-setup.ts +2 -29
- package/src/daemon/conversation-workspace.ts +17 -0
- package/src/daemon/conversation.ts +32 -20
- package/src/daemon/external-plugins-bootstrap.ts +17 -18
- package/src/daemon/handlers/config-a2a.ts +51 -36
- package/src/daemon/handlers/config-slack-channel.ts +20 -14
- package/src/daemon/handlers/config-telegram.ts +16 -2
- package/src/daemon/handlers/conversations.ts +3 -1
- package/src/daemon/handlers/shared.ts +156 -84
- package/src/daemon/handlers/skills.ts +42 -10
- package/src/daemon/lifecycle.ts +25 -0
- package/src/daemon/message-types/apps.ts +1 -29
- package/src/daemon/message-types/messages.ts +9 -57
- package/src/daemon/message-types/skills.ts +2 -0
- package/src/daemon/message-types/surfaces.ts +136 -3
- package/src/daemon/now-scratchpad.ts +21 -0
- package/src/daemon/orphan-reaper.test.ts +210 -0
- package/src/daemon/orphan-reaper.ts +240 -0
- package/src/daemon/overflow-reduction-loop.ts +230 -0
- package/src/daemon/persist-unsendable-image.ts +117 -0
- package/src/daemon/process-message.ts +1 -3
- package/src/daemon/server.ts +2 -0
- package/src/daemon/trace-emitter.ts +6 -4
- package/src/daemon/trust-context.ts +19 -0
- package/src/daemon/wake-target-adapter.ts +3 -1
- package/src/heartbeat/__tests__/heartbeat-service.test.ts +3 -0
- package/src/heartbeat/heartbeat-run-store.ts +23 -1
- package/src/heartbeat/heartbeat-service.ts +26 -0
- package/src/home/home-greeting-cache.ts +24 -1
- package/src/ipc/__tests__/browser-ipc.test.ts +1 -1
- package/src/ipc/__tests__/ui-request-route.test.ts +3 -3
- package/src/ipc/gateway-client.test.ts +2 -2
- package/src/ipc/gateway-client.ts +3 -3
- package/src/ipc/skill-routes/__tests__/memory.test.ts +15 -0
- package/src/ipc/skill-routes/memory.ts +4 -2
- package/src/media/gemini-image-service.ts +15 -0
- package/src/media/openai-image-service.ts +14 -0
- package/src/media/types.ts +34 -0
- package/src/memory/__tests__/jobs-worker-v2-schedule.test.ts +56 -0
- package/src/memory/auth-fallback-events-store.ts +94 -0
- package/src/memory/conversation-starter-checkpoints.ts +1 -0
- package/src/memory/conversation-title-service.ts +65 -41
- package/src/memory/db-init.ts +6 -0
- package/src/memory/graph/__tests__/conversation-graph-memory-registry.test.ts +119 -0
- package/src/memory/graph/conversation-graph-memory.ts +65 -0
- package/src/memory/job-handlers/conversation-starters.ts +13 -2
- package/src/memory/jobs-store.ts +33 -0
- package/src/memory/jobs-worker.ts +32 -5
- package/src/memory/llm-usage-store.ts +224 -50
- package/src/memory/migrations/222-strip-placeholder-sentinels-from-messages.ts +6 -5
- package/src/memory/migrations/270-schedule-source-conversation.ts +13 -0
- package/src/memory/migrations/271-create-auth-fallback-events.ts +21 -0
- package/src/memory/migrations/272-acp-session-history-cwd.ts +36 -0
- package/src/memory/migrations/index.ts +3 -0
- package/src/memory/pkb/autoinject.ts +61 -0
- package/src/memory/pkb/context.ts +50 -0
- package/src/memory/pkb/types.ts +14 -0
- package/src/memory/schedule-attribution-sql.ts +104 -0
- package/src/memory/schema/acp.ts +4 -0
- package/src/memory/schema/infrastructure.ts +16 -0
- package/src/memory/usage-grouped-buckets.ts +6 -1
- package/src/memory/v2/__tests__/consolidation-job.test.ts +4 -4
- package/src/memory/v2/consolidation-job.ts +14 -5
- package/src/notifications/conversation-pairing.ts +8 -15
- package/src/notifications/decision-engine.ts +6 -3
- package/src/notifications/home-feed-side-effect.ts +12 -1
- package/src/permissions/prompter.ts +4 -0
- package/src/plugin-api/constants.ts +4 -0
- package/src/plugin-api/index.ts +7 -5
- package/src/plugin-api/types.ts +151 -1
- package/src/plugins/defaults/compaction/compact.ts +59 -0
- package/src/plugins/defaults/compaction/package.json +1 -1
- package/src/plugins/defaults/compaction/register.ts +8 -19
- package/src/plugins/defaults/empty-response/hooks/stop.ts +126 -0
- package/src/plugins/defaults/empty-response/register.ts +8 -13
- package/src/plugins/defaults/index.ts +2 -18
- package/src/plugins/defaults/memory-retrieval/hooks/post-compact.ts +95 -0
- package/src/plugins/defaults/memory-retrieval/hooks/user-prompt-submit-temp.ts +216 -0
- package/src/plugins/defaults/memory-retrieval/injector-chain.ts +35 -0
- package/src/plugins/defaults/{injectors/register.ts → memory-retrieval/injectors.ts} +288 -81
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/assign.test.ts +4 -4
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/health.test.ts +16 -0
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/live-integration.test.ts +4 -4
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/maintain-job.test.ts +5 -5
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/orchestrate.test.ts +48 -12
- package/src/plugins/defaults/memory-v3-shadow/__tests__/provider-blocks.test.ts +13 -0
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/reconcile.test.ts +2 -2
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/render-injection.test.ts +1 -1
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/router.test.ts +104 -32
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/selection-log-store.test.ts +8 -8
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/selector.test.ts +96 -30
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/shadow-plugin.test.ts +34 -16
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/assign.ts +5 -5
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/capabilities.ts +2 -2
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/health.ts +0 -0
- package/src/plugins/defaults/memory-v3-shadow/hooks/post-compact.ts +14 -0
- package/src/plugins/defaults/memory-v3-shadow/hooks/user-prompt-submit.ts +19 -0
- package/src/plugins/defaults/memory-v3-shadow/injector.ts +75 -0
- package/src/plugins/defaults/memory-v3-shadow/llm-retry.ts +32 -0
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/maintain-job.ts +8 -8
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/orchestrate.ts +26 -14
- package/src/plugins/defaults/{llm-call → memory-v3-shadow}/package.json +2 -2
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/page-content.ts +2 -2
- package/src/plugins/defaults/memory-v3-shadow/provider-blocks.ts +26 -0
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/reconcile.ts +3 -3
- package/src/plugins/defaults/memory-v3-shadow/register.ts +26 -0
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/render-injection.ts +1 -1
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/router.ts +51 -45
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/selection-log-store.ts +4 -4
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/selector.ts +61 -46
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/shadow-plugin.ts +69 -99
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/tree.ts +1 -1
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/types.ts +8 -0
- package/src/plugins/defaults/title-generate/hooks/stop.ts +75 -0
- package/src/plugins/defaults/title-generate/hooks/user-prompt-submit.ts +35 -0
- package/src/plugins/defaults/title-generate/package.json +1 -1
- package/src/plugins/defaults/title-generate/register.ts +18 -18
- package/src/plugins/defaults/tool-error/hooks/post-tool-use.ts +118 -0
- package/src/plugins/defaults/tool-error/package.json +1 -1
- package/src/plugins/defaults/tool-error/register.ts +9 -21
- package/src/plugins/defaults/tool-result-truncate/hooks/post-tool-use.ts +32 -0
- package/src/plugins/defaults/tool-result-truncate/register.ts +10 -21
- package/src/plugins/defaults/tool-result-truncate/terminal.ts +37 -18
- package/src/plugins/external-api.ts +2 -2
- package/src/plugins/pipeline.ts +6 -305
- package/src/plugins/registry.ts +10 -55
- package/src/plugins/types.ts +62 -797
- package/src/plugins/user-loader.ts +30 -127
- package/src/proactive-artifact/aux-message-injector.ts +4 -4
- package/src/proactive-artifact/job.test.ts +8 -13
- package/src/prompts/__tests__/system-prompt.test.ts +42 -0
- package/src/prompts/templates/BOOTSTRAP-ACTIVATION-RAIL.md +64 -0
- package/src/prompts/templates/BOOTSTRAP.md +2 -2
- package/src/prompts/templates/system-sections.ts +15 -0
- package/src/providers/anthropic/client.ts +37 -29
- package/src/providers/openai/__tests__/chat-completions-provider-reasoning.test.ts +112 -0
- package/src/providers/openai/chat-completions-provider.ts +44 -0
- package/src/providers/openrouter/client.ts +1 -0
- package/src/providers/placeholder-sentinels.ts +35 -0
- package/src/runtime/__tests__/agent-wake.test.ts +10 -6
- package/src/runtime/__tests__/interactive-ui.test.ts +1 -1
- package/src/runtime/agent-wake.ts +2 -5
- package/src/runtime/assistant-event-hub.ts +37 -7
- package/src/runtime/{conversation-stream-state.ts → assistant-stream-state.ts} +132 -58
- package/src/runtime/channel-approvals.ts +1 -1
- package/src/runtime/http-router.ts +16 -21
- package/src/runtime/http-types.ts +16 -70
- package/src/runtime/interactive-ui.ts +1 -1
- package/src/runtime/pending-interactions.ts +1 -0
- package/src/runtime/routes/__tests__/acp-routes.test.ts +283 -55
- package/src/runtime/routes/__tests__/consolidation-routes.test.ts +265 -2
- package/src/runtime/routes/__tests__/conversation-list-routes.test.ts +1 -1
- package/src/runtime/routes/__tests__/conversation-query-routes.test.ts +31 -1
- package/src/runtime/routes/__tests__/memory-v2-routes.test.ts +6 -2
- package/src/runtime/routes/__tests__/surface-action-routes.test.ts +5 -4
- package/src/runtime/routes/__tests__/surface-content-routes.test.ts +4 -1
- package/src/runtime/routes/__tests__/tts-routes.test.ts +6 -2
- package/src/runtime/routes/acp-routes.test.ts +89 -25
- package/src/runtime/routes/acp-routes.ts +81 -29
- package/src/runtime/routes/app-management-routes.ts +6 -117
- package/src/runtime/routes/app-routes.ts +13 -15
- package/src/runtime/routes/approval-routes.ts +1 -1
- package/src/runtime/routes/attachment-routes.ts +26 -15
- package/src/runtime/routes/avatar-routes.ts +26 -0
- package/src/runtime/routes/browser-routes.ts +1 -1
- package/src/runtime/routes/browser-tabs-routes.ts +6 -10
- package/src/runtime/routes/btw-routes.ts +29 -23
- package/src/runtime/routes/consolidation-routes.ts +120 -20
- package/src/runtime/routes/conversation-cli-routes.ts +1 -1
- package/src/runtime/routes/conversation-list-routes.ts +1 -1
- package/src/runtime/routes/conversation-query-routes.ts +3 -1
- package/src/runtime/routes/conversation-routes.ts +372 -185
- package/src/runtime/routes/conversation-starter-routes.ts +13 -7
- package/src/runtime/routes/conversations-import-routes.ts +24 -7
- package/src/runtime/routes/documents-routes.ts +4 -0
- package/src/runtime/routes/domain-routes.ts +51 -37
- package/src/runtime/routes/epoch-millis-range.ts +34 -0
- package/src/runtime/routes/events-routes.ts +28 -34
- package/src/runtime/routes/gateway-log-routes.ts +26 -4
- package/src/runtime/routes/heartbeat-routes.ts +32 -12
- package/src/runtime/routes/host-app-control-routes.ts +1 -1
- package/src/runtime/routes/host-cu-routes.ts +1 -1
- package/src/runtime/routes/identity-intro-cache.ts +11 -34
- package/src/runtime/routes/identity-routes.ts +224 -18
- package/src/runtime/routes/image-generation-routes.ts +40 -2
- package/src/runtime/routes/inbound-message-handler.ts +1 -1
- package/src/runtime/routes/index.ts +2 -0
- package/src/runtime/routes/integrations/a2a.ts +12 -10
- package/src/runtime/routes/integrations/slack/__tests__/channel.test.ts +16 -0
- package/src/runtime/routes/integrations/slack/channel.ts +4 -0
- package/src/runtime/routes/integrations/slack/share.ts +27 -6
- package/src/runtime/routes/integrations/telegram.ts +6 -0
- package/src/runtime/routes/integrations/twilio.ts +42 -0
- package/src/runtime/routes/internal-telemetry-routes.ts +88 -0
- package/src/runtime/routes/log-export-routes.ts +8 -0
- package/src/runtime/routes/memory-v2-routes.ts +15 -8
- package/src/runtime/routes/memory-v3-routes.ts +66 -34
- package/src/runtime/routes/oauth-apps.ts +66 -12
- package/src/runtime/routes/oauth-providers.ts +44 -5
- package/src/runtime/routes/platform-routes.ts +81 -5
- package/src/runtime/routes/playground/__tests__/force-compact.test.ts +6 -4
- package/src/runtime/routes/playground/force-compact.ts +1 -1
- package/src/runtime/routes/playground/helpers.ts +1 -1
- package/src/runtime/routes/rename-conversation-routes.ts +5 -0
- package/src/runtime/routes/schedule-routes.ts +152 -42
- package/src/runtime/routes/secret-routes.ts +14 -2
- package/src/runtime/routes/skills-routes.ts +43 -14
- package/src/runtime/routes/surface-conversation-resolver.ts +4 -3
- package/src/runtime/routes/tool-call-confirmation-enrichment.test.ts +161 -0
- package/src/runtime/routes/tool-call-confirmation-enrichment.ts +107 -0
- package/src/runtime/routes/trust-rules-routes.ts +26 -2
- package/src/runtime/routes/tts-routes.ts +35 -0
- package/src/runtime/routes/types.ts +66 -8
- package/src/runtime/routes/usage-routes.ts +47 -39
- package/src/runtime/routes/webhook-routes.ts +41 -2
- package/src/runtime/routes/work-items-routes.ts +2 -4
- package/src/runtime/routes/workspace-routes.ts +4 -0
- package/src/runtime/services/__tests__/analyze-conversation.test.ts +6 -0
- package/src/runtime/services/analyze-conversation.ts +2 -2
- package/src/runtime/services/conversation-serializer.ts +1 -1
- package/src/schedule/schedule-store.ts +20 -1
- package/src/schedule/schedule-usage-store.ts +83 -0
- package/src/schedule/scheduler.ts +12 -5
- package/src/signals/cancel.ts +2 -4
- package/src/skills/catalog-files.ts +2 -2
- package/src/skills/catalog-install.ts +3 -0
- package/src/skills/categories-cache.ts +118 -0
- package/src/skills/clawhub-files.ts +1 -2
- package/src/skills/skillssh-files.ts +1 -2
- package/src/subagent/manager.ts +17 -5
- package/src/telemetry/types.ts +29 -1
- package/src/telemetry/usage-telemetry-reporter.test.ts +112 -3
- package/src/telemetry/usage-telemetry-reporter.ts +57 -2
- package/src/tools/acp/context.ts +20 -0
- package/src/tools/acp/list-agents.test.ts +7 -1
- package/src/tools/acp/spawn.test.ts +158 -55
- package/src/tools/acp/spawn.ts +47 -72
- package/src/tools/acp/steer.test.ts +105 -8
- package/src/tools/acp/steer.ts +48 -17
- package/src/tools/apps/executors.ts +13 -8
- package/src/tools/executor.ts +1 -53
- package/src/tools/filesystem/write.ts +34 -0
- package/src/tools/network/__tests__/web-search-metadata.test.ts +7 -1
- package/src/tools/network/__tests__/web-search.test.ts +11 -3
- package/src/tools/network/web-search-error.test.ts +248 -0
- package/src/tools/network/web-search-error.ts +267 -0
- package/src/tools/network/web-search.ts +207 -48
- package/src/tools/schedule/create.ts +2 -0
- package/src/tools/subagent/spawn.ts +2 -4
- package/src/tools/terminal/safe-env.ts +10 -1
- package/src/tools/ui-surface/definitions.ts +34 -5
- package/src/tts/__tests__/provider-catalog-consistency.test.ts +85 -1
- package/src/tts/provider-catalog.ts +76 -1
- package/src/util/mutex.ts +47 -0
- package/src/workspace/git-service.ts +1 -42
- package/src/workspace/migrations/051-seed-conversation-summarization-callsite.ts +4 -5
- package/src/workspace/migrations/095-bump-heartbeat-interval-30m-to-60m.ts +51 -0
- package/src/workspace/migrations/096-reduce-quality-profile-effort.ts +72 -0
- package/src/workspace/migrations/097-enable-adaptive-thinking-managed-profiles.ts +117 -0
- package/src/workspace/migrations/registry.ts +6 -0
- package/docs/plugins.md +0 -836
- package/examples/plugins/echo/register.ts +0 -184
- package/src/__tests__/bootstrap-turn-cleanup.test.ts +0 -44
- package/src/__tests__/circuit-breaker-pipeline.test.ts +0 -405
- package/src/__tests__/compaction-pipeline.test.ts +0 -210
- package/src/__tests__/compaction-timeout-recovery.test.ts +0 -251
- package/src/__tests__/empty-response-pipeline.test.ts +0 -423
- package/src/__tests__/llm-call-pipeline.test.ts +0 -287
- package/src/__tests__/memory-retrieval-pipeline.test.ts +0 -418
- package/src/__tests__/persistence-pipeline.test.ts +0 -503
- package/src/__tests__/pipeline-runner.test.ts +0 -564
- package/src/__tests__/title-generate-pipeline.test.ts +0 -211
- package/src/__tests__/token-estimate-pipeline.test.ts +0 -479
- package/src/__tests__/tool-error-pipeline.test.ts +0 -241
- package/src/__tests__/tool-execute-pipeline.test.ts +0 -417
- package/src/__tests__/tool-result-truncate-pipeline.test.ts +0 -341
- package/src/daemon/bootstrap-turn-cleanup.ts +0 -45
- package/src/gallery/default-gallery.ts +0 -1359
- package/src/gallery/gallery-manifest.ts +0 -28
- package/src/home/feature-gate.ts +0 -22
- package/src/memory/v3/provider-blocks.ts +0 -16
- package/src/plugins/defaults/circuit-breaker/middlewares/circuitBreaker.ts +0 -93
- package/src/plugins/defaults/circuit-breaker/package.json +0 -15
- package/src/plugins/defaults/circuit-breaker/register.ts +0 -39
- package/src/plugins/defaults/compaction/middlewares/compaction.ts +0 -25
- package/src/plugins/defaults/compaction/terminal.ts +0 -73
- package/src/plugins/defaults/empty-response/middlewares/emptyResponse.ts +0 -22
- package/src/plugins/defaults/empty-response/terminal.ts +0 -106
- package/src/plugins/defaults/injectors/package.json +0 -15
- package/src/plugins/defaults/llm-call/middlewares/llmCall.ts +0 -17
- package/src/plugins/defaults/llm-call/register.ts +0 -45
- package/src/plugins/defaults/memory-retrieval/middlewares/memoryRetrieval.ts +0 -17
- package/src/plugins/defaults/memory-retrieval/package.json +0 -15
- package/src/plugins/defaults/memory-retrieval/register.ts +0 -181
- package/src/plugins/defaults/overflow-reduce/middlewares/overflowReduce.ts +0 -126
- package/src/plugins/defaults/overflow-reduce/package.json +0 -15
- package/src/plugins/defaults/overflow-reduce/register.ts +0 -42
- package/src/plugins/defaults/persistence/middlewares/persistence.ts +0 -19
- package/src/plugins/defaults/persistence/package.json +0 -15
- package/src/plugins/defaults/persistence/register.ts +0 -38
- package/src/plugins/defaults/persistence/terminal.ts +0 -83
- package/src/plugins/defaults/title-generate/terminal.ts +0 -31
- package/src/plugins/defaults/token-estimate/middlewares/tokenEstimate.ts +0 -23
- package/src/plugins/defaults/token-estimate/package.json +0 -15
- package/src/plugins/defaults/token-estimate/register.ts +0 -34
- package/src/plugins/defaults/token-estimate/terminal.ts +0 -40
- package/src/plugins/defaults/tool-error/middlewares/toolError.ts +0 -21
- package/src/plugins/defaults/tool-error/terminal.ts +0 -47
- package/src/plugins/defaults/tool-execute/middlewares/toolExecute.ts +0 -23
- package/src/plugins/defaults/tool-execute/package.json +0 -15
- package/src/plugins/defaults/tool-execute/register.ts +0 -49
- package/src/plugins/defaults/tool-result-truncate/middlewares/toolResultTruncate.ts +0 -23
- package/src/plugins/defaults/tool-result-truncate/types.ts +0 -22
- package/src/skills/category-inference.ts +0 -111
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/capabilities.test.ts +0 -0
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/core.test.ts +0 -0
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/fixtures/eval-turns.json +0 -0
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/fixtures/live-turns.json +0 -0
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/needle.test.ts +0 -0
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/snapshot.test.ts +0 -0
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/tree.test.ts +0 -0
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/types.test.ts +0 -0
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/working-set-eviction.test.ts +0 -0
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/working-set-skeleton.test.ts +0 -0
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/core.ts +0 -0
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/data/README.md +0 -0
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/data/assignments.json +0 -0
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/data/core.json +0 -0
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/data/leaves/domain-a/topic-x.md +0 -0
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/data/leaves/domain-a/topic-y.md +0 -0
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/data/leaves/domain-b/topic-z.md +0 -0
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/needle.ts +0 -0
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/snapshot.ts +0 -0
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/working-set.ts +0 -0
|
@@ -1,26 +1,18 @@
|
|
|
1
1
|
import { createRequire } from "node:module";
|
|
2
|
-
import {
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
} from "
|
|
11
|
-
|
|
2
|
+
import {
|
|
3
|
+
afterAll,
|
|
4
|
+
beforeEach,
|
|
5
|
+
describe,
|
|
6
|
+
expect,
|
|
7
|
+
mock,
|
|
8
|
+
spyOn,
|
|
9
|
+
test,
|
|
10
|
+
} from "bun:test";
|
|
11
|
+
|
|
12
|
+
import type { LoopToolExecutor } from "../agent/loop.js";
|
|
12
13
|
import type { ServerMessage } from "../daemon/message-protocol.js";
|
|
13
|
-
import { defaultCompactionTerminal } from "../plugins/defaults/compaction/terminal.js";
|
|
14
14
|
import { resetPluginRegistryAndRegisterDefaults } from "../plugins/defaults/index.js";
|
|
15
|
-
import {
|
|
16
|
-
import { getMiddlewaresFor } from "../plugins/registry.js";
|
|
17
|
-
import type {
|
|
18
|
-
CompactionArgs,
|
|
19
|
-
CompactionResult,
|
|
20
|
-
TurnContext,
|
|
21
|
-
} from "../plugins/types.js";
|
|
22
|
-
import { PluginTimeoutError } from "../plugins/types.js";
|
|
23
|
-
import type { ContentBlock, Message } from "../providers/types.js";
|
|
15
|
+
import type { Message, Provider, ToolDefinition } from "../providers/types.js";
|
|
24
16
|
|
|
25
17
|
const conversationCrudRealSnapshot = {
|
|
26
18
|
...(createRequire(import.meta.url)(
|
|
@@ -76,6 +68,7 @@ mock.module("../config/loader.js", () => ({
|
|
|
76
68
|
memory: { retrieval: { scratchpadInjection: { enabled: true } } },
|
|
77
69
|
ui: mockUiConfig,
|
|
78
70
|
compaction: { enabled: true, autoThreshold: 0.7 },
|
|
71
|
+
conversations: { skipAutoRetitling: true },
|
|
79
72
|
}),
|
|
80
73
|
loadRawConfig: () => ({}),
|
|
81
74
|
saveRawConfig: () => {},
|
|
@@ -86,17 +79,20 @@ mock.module("../config/loader.js", () => ({
|
|
|
86
79
|
|
|
87
80
|
// Token estimator returns a small value by default (well within budget)
|
|
88
81
|
// so preflight does not trigger unless the test overrides it. Both the
|
|
89
|
-
// calibrated entry point (`estimatePromptTokens`,
|
|
90
|
-
// path) and the raw entry point
|
|
91
|
-
//
|
|
82
|
+
// calibrated entry point (`estimatePromptTokens`, which backs the preflight
|
|
83
|
+
// overflow gate and the convergence path) and the raw entry point
|
|
84
|
+
// (`estimatePromptTokensRaw`, used by the pre-send calibration capture) are
|
|
92
85
|
// stubbed so either call site can drive the test.
|
|
93
86
|
let mockEstimateTokens = 1000;
|
|
94
87
|
mock.module("../context/token-estimator.js", () => ({
|
|
95
88
|
estimatePromptTokens: () => mockEstimateTokens,
|
|
96
89
|
estimatePromptTokensRaw: () => mockEstimateTokens,
|
|
97
|
-
//
|
|
98
|
-
//
|
|
99
|
-
//
|
|
90
|
+
// The preflight overflow gate calls this calibrated wrapper directly, so it
|
|
91
|
+
// must honor `mockEstimateTokens` too rather than fall through to the real
|
|
92
|
+
// implementation.
|
|
93
|
+
estimatePromptTokensWithTools: () => mockEstimateTokens,
|
|
94
|
+
// Pass-through: `estimatePromptTokensWithTools` computes `toolTokenBudget`
|
|
95
|
+
// via this helper. Return 0 so the mocked estimate is not perturbed.
|
|
100
96
|
estimateToolsTokens: () => 0,
|
|
101
97
|
}));
|
|
102
98
|
|
|
@@ -320,12 +316,14 @@ const buildUnifiedTurnContextBlockMock = mock(
|
|
|
320
316
|
(options: Record<string, unknown>) =>
|
|
321
317
|
`<turn_context>\ncurrent_time: ${String(options.timestamp)}\n</turn_context>`,
|
|
322
318
|
);
|
|
323
|
-
const
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
319
|
+
const defaultApplyRuntimeInjectionsImpl = async (
|
|
320
|
+
msgs: Message[],
|
|
321
|
+
_options?: unknown,
|
|
322
|
+
) => ({
|
|
323
|
+
messages: msgs,
|
|
324
|
+
blocks: { ...mockInjectionBlocks },
|
|
325
|
+
});
|
|
326
|
+
const applyRuntimeInjectionsMock = mock(defaultApplyRuntimeInjectionsImpl);
|
|
329
327
|
let mockSlackChronologicalContext: {
|
|
330
328
|
renderedMessages: Array<{
|
|
331
329
|
message: Message;
|
|
@@ -364,15 +362,6 @@ mock.module("../daemon/conversation-runtime-assembly.js", () => ({
|
|
|
364
362
|
applyRuntimeInjections: applyRuntimeInjectionsMock,
|
|
365
363
|
buildUnifiedTurnContextBlock: buildUnifiedTurnContextBlockMock,
|
|
366
364
|
stripInjectionsForCompaction: (msgs: Message[]) => msgs,
|
|
367
|
-
findLastInjectedNowContent: () => null,
|
|
368
|
-
readNowScratchpad: () => null,
|
|
369
|
-
readPkbContext: () => null,
|
|
370
|
-
getPkbAutoInjectList: () => [
|
|
371
|
-
"INDEX.md",
|
|
372
|
-
"essentials.md",
|
|
373
|
-
"threads.md",
|
|
374
|
-
"buffer.md",
|
|
375
|
-
],
|
|
376
365
|
isSlackChannelConversation: () => false,
|
|
377
366
|
getSlackCompactionWatermarkForPrefix:
|
|
378
367
|
getSlackCompactionWatermarkForPrefixMock,
|
|
@@ -549,195 +538,78 @@ mock.module("../proactive-artifact/index.js", () => ({
|
|
|
549
538
|
|
|
550
539
|
// ── Imports (after mocks) ────────────────────────────────────────────
|
|
551
540
|
|
|
541
|
+
import { AgentLoop } from "../agent/loop.js";
|
|
552
542
|
import {
|
|
553
543
|
type AgentLoopConversationContext,
|
|
554
544
|
applyCompactionResult,
|
|
555
545
|
runAgentLoopImpl,
|
|
556
546
|
} from "../daemon/conversation-agent-loop.js";
|
|
547
|
+
import {
|
|
548
|
+
createMockProvider,
|
|
549
|
+
type ScriptedResponse,
|
|
550
|
+
textResponse,
|
|
551
|
+
toolUseResponse,
|
|
552
|
+
} from "./helpers/mock-provider.js";
|
|
557
553
|
|
|
558
554
|
// ── Test helpers ─────────────────────────────────────────────────────
|
|
559
555
|
|
|
560
|
-
type AgentLoopRun = (
|
|
561
|
-
messages: Message[],
|
|
562
|
-
onEvent: (event: AgentEvent) => void | Promise<void>,
|
|
563
|
-
options?: AgentLoopRunOptions,
|
|
564
|
-
) => Promise<Message[]>;
|
|
565
|
-
|
|
566
|
-
/**
|
|
567
|
-
* Faithful re-implementation of `AgentLoop.compact()` for the mock loop: run
|
|
568
|
-
* the compaction pipeline against the supplied turn context (which carries the
|
|
569
|
-
* test's `contextWindowManager`), invoke the orchestrator-supplied hooks, and
|
|
570
|
-
* return the continuation history — or `null` on timeout/exhaustion so the
|
|
571
|
-
* caller yields "budget".
|
|
572
|
-
*/
|
|
573
|
-
async function simulateInlineCompaction(
|
|
574
|
-
compaction: MidLoopCompaction,
|
|
575
|
-
history: Message[],
|
|
576
|
-
turnContext: TurnContext | undefined,
|
|
577
|
-
signal: AbortSignal | undefined,
|
|
578
|
-
onEvent: (event: AgentEvent) => void | Promise<void>,
|
|
579
|
-
compactionCircuit: CompactionCircuit,
|
|
580
|
-
): Promise<Message[] | null> {
|
|
581
|
-
await onEvent({ type: "context_compacting" });
|
|
582
|
-
const { rawHistory, options } = compaction.prepare(history);
|
|
583
|
-
let result: CompactionResult;
|
|
584
|
-
try {
|
|
585
|
-
result = await runPipeline<CompactionArgs, CompactionResult>(
|
|
586
|
-
"compaction",
|
|
587
|
-
getMiddlewaresFor("compaction"),
|
|
588
|
-
(args) => defaultCompactionTerminal(args, turnContext as TurnContext),
|
|
589
|
-
{ messages: rawHistory, signal, options },
|
|
590
|
-
turnContext as TurnContext,
|
|
591
|
-
DEFAULT_TIMEOUTS.compaction,
|
|
592
|
-
);
|
|
593
|
-
} catch (error) {
|
|
594
|
-
if (error instanceof PluginTimeoutError) {
|
|
595
|
-
await compactionCircuit.recordOutcome(
|
|
596
|
-
{
|
|
597
|
-
currentRequestId: turnContext?.requestId,
|
|
598
|
-
currentTurnTrustContext: turnContext?.trust,
|
|
599
|
-
turnCount: turnContext?.turnIndex ?? 0,
|
|
600
|
-
},
|
|
601
|
-
true,
|
|
602
|
-
onEvent,
|
|
603
|
-
);
|
|
604
|
-
return null;
|
|
605
|
-
}
|
|
606
|
-
throw error;
|
|
607
|
-
}
|
|
608
|
-
const compactResult = result as ContextWindowResult;
|
|
609
|
-
if (compactResult.summaryFailed !== undefined) {
|
|
610
|
-
await compactionCircuit.recordOutcome(
|
|
611
|
-
{
|
|
612
|
-
currentRequestId: turnContext?.requestId,
|
|
613
|
-
currentTurnTrustContext: turnContext?.trust,
|
|
614
|
-
turnCount: turnContext?.turnIndex ?? 0,
|
|
615
|
-
},
|
|
616
|
-
compactResult.summaryFailed,
|
|
617
|
-
onEvent,
|
|
618
|
-
);
|
|
619
|
-
}
|
|
620
|
-
if (compactResult.compacted) {
|
|
621
|
-
await compaction.applyResult(compactResult, rawHistory);
|
|
622
|
-
}
|
|
623
|
-
if (compactResult.exhausted ?? false) {
|
|
624
|
-
return null;
|
|
625
|
-
}
|
|
626
|
-
return compaction.reinject();
|
|
627
|
-
}
|
|
628
|
-
|
|
629
|
-
/**
|
|
630
|
-
* Adapt a `Message[]`-returning mock loop body into `run()`'s real result
|
|
631
|
-
* shape. Mirrors the production loop: the pause-reason carried back is
|
|
632
|
-
* whatever the most recent `onCheckpoint` call yielded with (null when it
|
|
633
|
-
* never yielded), so the orchestrator derives its yield bookkeeping the same
|
|
634
|
-
* way it does against the real loop.
|
|
635
|
-
*/
|
|
636
|
-
const asAgentLoopRun = (
|
|
637
|
-
fn: AgentLoopRun,
|
|
638
|
-
compactionCircuit: CompactionCircuit,
|
|
639
|
-
): ((
|
|
640
|
-
messages: Message[],
|
|
641
|
-
onEvent: (event: AgentEvent) => void | Promise<void>,
|
|
642
|
-
options?: AgentLoopRunOptions,
|
|
643
|
-
) => Promise<AgentLoopRunResult>) => {
|
|
644
|
-
return async (messages, onEvent, options) => {
|
|
645
|
-
let exitReason: AgentLoopRunResult["exitReason"] = null;
|
|
646
|
-
let wrapped = options;
|
|
647
|
-
if (options?.onCheckpoint) {
|
|
648
|
-
const inner = options.onCheckpoint;
|
|
649
|
-
wrapped = {
|
|
650
|
-
...options,
|
|
651
|
-
onCheckpoint: async (info) => {
|
|
652
|
-
// Handoff is offered first, mirroring the loop's ordering.
|
|
653
|
-
const decision = await inner(info);
|
|
654
|
-
if (decision !== "continue") {
|
|
655
|
-
exitReason = decision;
|
|
656
|
-
return decision;
|
|
657
|
-
}
|
|
658
|
-
// The mid-loop budget gate and inline compaction both live inside
|
|
659
|
-
// `AgentLoop.run`. Replicate them here — same formula, stubbed
|
|
660
|
-
// estimator, and the loop's own `compact()` ceremony — so these
|
|
661
|
-
// orchestrator tests drive the real escalation path now that the
|
|
662
|
-
// orchestrator's `onCheckpoint` is handoff-only and compaction runs
|
|
663
|
-
// inline rather than via an orchestrator re-entry loop.
|
|
664
|
-
const contextWindow = options.resolveContextWindow?.();
|
|
665
|
-
if (contextWindow?.overflowRecovery.enabled) {
|
|
666
|
-
const { maxInputTokens, overflowRecovery } = contextWindow;
|
|
667
|
-
const safetyMargin =
|
|
668
|
-
info.history.length > 50
|
|
669
|
-
? Math.max(overflowRecovery.safetyMarginRatio, 0.15)
|
|
670
|
-
: overflowRecovery.safetyMarginRatio;
|
|
671
|
-
const preflightBudget = Math.floor(
|
|
672
|
-
maxInputTokens * (1 - safetyMargin),
|
|
673
|
-
);
|
|
674
|
-
if (mockEstimateTokens > preflightBudget * 0.85) {
|
|
675
|
-
// Mirror `AgentLoop.compact()`: when a compaction path is
|
|
676
|
-
// supplied, run it in place and continue; on timeout or
|
|
677
|
-
// exhaustion it returns null, so the loop yields "budget".
|
|
678
|
-
const compacted = options.compaction
|
|
679
|
-
? await simulateInlineCompaction(
|
|
680
|
-
options.compaction,
|
|
681
|
-
info.history,
|
|
682
|
-
options.turnContext,
|
|
683
|
-
options.signal,
|
|
684
|
-
onEvent,
|
|
685
|
-
compactionCircuit,
|
|
686
|
-
)
|
|
687
|
-
: null;
|
|
688
|
-
if (compacted) {
|
|
689
|
-
exitReason = null;
|
|
690
|
-
return "continue";
|
|
691
|
-
}
|
|
692
|
-
exitReason = "budget";
|
|
693
|
-
return "budget";
|
|
694
|
-
}
|
|
695
|
-
}
|
|
696
|
-
exitReason = null;
|
|
697
|
-
return "continue";
|
|
698
|
-
},
|
|
699
|
-
};
|
|
700
|
-
}
|
|
701
|
-
const history = await fn(messages, onEvent, wrapped);
|
|
702
|
-
return { history, exitReason };
|
|
703
|
-
};
|
|
704
|
-
};
|
|
705
|
-
|
|
706
556
|
function makeCtx(
|
|
707
557
|
overrides?: Partial<AgentLoopConversationContext> & {
|
|
708
|
-
|
|
558
|
+
providerResponses?: ScriptedResponse[];
|
|
559
|
+
loopProvider?: Provider;
|
|
560
|
+
loopTools?: ToolDefinition[];
|
|
561
|
+
toolExecutor?: LoopToolExecutor;
|
|
709
562
|
},
|
|
710
563
|
): AgentLoopConversationContext {
|
|
711
|
-
const
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
564
|
+
const {
|
|
565
|
+
providerResponses,
|
|
566
|
+
loopProvider,
|
|
567
|
+
loopTools,
|
|
568
|
+
toolExecutor,
|
|
569
|
+
...ctxOverrides
|
|
570
|
+
} = overrides ?? {};
|
|
571
|
+
const conversationId = ctxOverrides.conversationId ?? "test-conv";
|
|
572
|
+
let processing = true;
|
|
573
|
+
|
|
574
|
+
// Drive the real `AgentLoop` against a scripted provider, mocking only the
|
|
575
|
+
// provider HTTP boundary. The loop owns its mid-loop budget gate, inline
|
|
576
|
+
// compaction, and event emission, so these orchestrator tests exercise the
|
|
577
|
+
// real escalation/persistence path.
|
|
578
|
+
//
|
|
579
|
+
// Name the loop's provider after `ctx.provider` so the two stay in sync,
|
|
580
|
+
// mirroring production where the orchestrator hands the same provider to
|
|
581
|
+
// the loop. The loop stamps this name onto `usage.actualProvider` whenever
|
|
582
|
+
// a response omits its own, which is what the request-log fallback reads.
|
|
583
|
+
// Tests that need to introspect provider calls (or sequence a rejection)
|
|
584
|
+
// build their own `loopProvider` via `createMockProvider`.
|
|
585
|
+
const loopProviderName =
|
|
586
|
+
(ctxOverrides.provider as { name?: string } | undefined)?.name ??
|
|
587
|
+
"mock-provider";
|
|
588
|
+
const provider =
|
|
589
|
+
loopProvider ??
|
|
590
|
+
createMockProvider(
|
|
591
|
+
providerResponses ?? [textResponse("response")],
|
|
592
|
+
loopProviderName,
|
|
593
|
+
).provider;
|
|
594
|
+
const agentLoop = new AgentLoop(provider, "system prompt", {
|
|
595
|
+
conversationId,
|
|
596
|
+
tools: loopTools ?? [],
|
|
597
|
+
toolExecutor,
|
|
598
|
+
});
|
|
722
599
|
|
|
723
600
|
return {
|
|
724
601
|
conversationId: "test-conv",
|
|
725
602
|
messages: [
|
|
726
603
|
{ role: "user", content: [{ type: "text", text: "Hello" }] },
|
|
727
604
|
] as Message[],
|
|
728
|
-
|
|
605
|
+
isProcessing: () => processing,
|
|
606
|
+
setProcessing: (value: boolean) => {
|
|
607
|
+
processing = value;
|
|
608
|
+
},
|
|
729
609
|
abortController: new AbortController(),
|
|
730
610
|
currentRequestId: "test-req",
|
|
731
611
|
|
|
732
|
-
agentLoop
|
|
733
|
-
run: asAgentLoopRun(agentLoopRun, compactionCircuit),
|
|
734
|
-
getToolTokenBudget: () => 0,
|
|
735
|
-
getResolvedTools: () => [],
|
|
736
|
-
// Tests here don't exercise calibration; returning undefined makes
|
|
737
|
-
// the estimator use the per-provider aggregate key.
|
|
738
|
-
getActiveModel: () => undefined,
|
|
739
|
-
compactionCircuit,
|
|
740
|
-
} as unknown as AgentLoopConversationContext["agentLoop"],
|
|
612
|
+
agentLoop,
|
|
741
613
|
provider: {
|
|
742
614
|
name: "mock-provider",
|
|
743
615
|
sendMessage: async () => ({
|
|
@@ -766,8 +638,6 @@ function makeCtx(
|
|
|
766
638
|
currentTurnSurfaces: [],
|
|
767
639
|
|
|
768
640
|
workingDir: "/tmp",
|
|
769
|
-
workspaceTopLevelContext: null,
|
|
770
|
-
workspaceTopLevelDirty: false,
|
|
771
641
|
channelCapabilities: undefined,
|
|
772
642
|
commandIntent: undefined,
|
|
773
643
|
trustContext: undefined,
|
|
@@ -804,7 +674,6 @@ function makeCtx(
|
|
|
804
674
|
getWorkspaceGitService: () => ({ ensureInitialized: async () => {} }),
|
|
805
675
|
commitTurnChanges: async () => {},
|
|
806
676
|
|
|
807
|
-
refreshWorkspaceTopLevelContextIfNeeded: () => {},
|
|
808
677
|
markWorkspaceTopLevelDirty: () => {},
|
|
809
678
|
emitActivityState: () => {},
|
|
810
679
|
getQueueDepth: () => 0,
|
|
@@ -830,9 +699,10 @@ function makeCtx(
|
|
|
830
699
|
injectedTokens: 0,
|
|
831
700
|
}),
|
|
832
701
|
retrackCachedNodes: () => {},
|
|
702
|
+
recordPkbQueryVectors: () => {},
|
|
833
703
|
} as unknown as AgentLoopConversationContext["graphMemory"],
|
|
834
704
|
|
|
835
|
-
...
|
|
705
|
+
...ctxOverrides,
|
|
836
706
|
} as AgentLoopConversationContext;
|
|
837
707
|
}
|
|
838
708
|
|
|
@@ -873,6 +743,9 @@ beforeEach(() => {
|
|
|
873
743
|
setConversationHistoryStrippedAtMock.mockClear();
|
|
874
744
|
setConversationHistoryStrippedAtMock.mockImplementation(() => {});
|
|
875
745
|
applyRuntimeInjectionsMock.mockClear();
|
|
746
|
+
applyRuntimeInjectionsMock.mockImplementation(
|
|
747
|
+
defaultApplyRuntimeInjectionsImpl,
|
|
748
|
+
);
|
|
876
749
|
buildUnifiedTurnContextBlockMock.mockClear();
|
|
877
750
|
resolveTurnTimezoneContextMock.mockClear();
|
|
878
751
|
formatTurnTimestampMock.mockClear();
|
|
@@ -886,11 +759,10 @@ beforeEach(() => {
|
|
|
886
759
|
projectAssistantMessageMock.mockClear();
|
|
887
760
|
publishSyncInvalidationMock.mockClear();
|
|
888
761
|
mockMessageById = null;
|
|
889
|
-
//
|
|
890
|
-
//
|
|
891
|
-
//
|
|
892
|
-
//
|
|
893
|
-
// hitting the bare terminals.
|
|
762
|
+
// The compaction pipeline runs through the plugin registry; reset and
|
|
763
|
+
// re-register every default so it dispatches to middleware backed by the
|
|
764
|
+
// mocked collaborators these tests install (`syncMessageToDisk`, etc.)
|
|
765
|
+
// instead of hitting the bare terminal.
|
|
894
766
|
resetPluginRegistryAndRegisterDefaults();
|
|
895
767
|
});
|
|
896
768
|
|
|
@@ -970,57 +842,28 @@ describe("session-agent-loop", () => {
|
|
|
970
842
|
mockHasProactiveArtifactCompleted = false;
|
|
971
843
|
mockTryClaimProactiveArtifactTrigger = true;
|
|
972
844
|
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
// emits this from `AgentLoop.run` just before `provider.sendMessage`.
|
|
976
|
-
await onEvent({ type: "llm_call_started" });
|
|
977
|
-
await onEvent({
|
|
978
|
-
type: "message_complete",
|
|
979
|
-
message: {
|
|
980
|
-
role: "assistant",
|
|
981
|
-
content: [{ type: "text", text: "I'll build that app." }],
|
|
982
|
-
},
|
|
983
|
-
});
|
|
984
|
-
await onEvent({
|
|
985
|
-
type: "tool_use",
|
|
986
|
-
id: "tool-1",
|
|
987
|
-
name: "app_create",
|
|
988
|
-
input: { name: "Flow" },
|
|
989
|
-
});
|
|
990
|
-
await onEvent({
|
|
991
|
-
type: "tool_result",
|
|
992
|
-
toolUseId: "tool-1",
|
|
993
|
-
content: "{}",
|
|
994
|
-
isError: false,
|
|
995
|
-
});
|
|
996
|
-
await options?.onCheckpoint?.({
|
|
997
|
-
turnIndex: 0,
|
|
998
|
-
toolCount: 1,
|
|
999
|
-
hasToolUse: true,
|
|
1000
|
-
history: messages,
|
|
1001
|
-
});
|
|
1002
|
-
// Prime the anchor again for LLM call 2 — multi-call agent turns
|
|
1003
|
-
// reserve a fresh assistant row per LLM call.
|
|
1004
|
-
await onEvent({ type: "llm_call_started" });
|
|
1005
|
-
await onEvent({
|
|
1006
|
-
type: "message_complete",
|
|
1007
|
-
message: {
|
|
1008
|
-
role: "assistant",
|
|
1009
|
-
content: [{ type: "text", text: "Done." }],
|
|
1010
|
-
},
|
|
1011
|
-
});
|
|
1012
|
-
return [
|
|
1013
|
-
...messages,
|
|
1014
|
-
{
|
|
1015
|
-
role: "assistant" as const,
|
|
1016
|
-
content: [{ type: "text" as const, text: "Done." }],
|
|
1017
|
-
},
|
|
1018
|
-
];
|
|
1019
|
-
};
|
|
1020
|
-
|
|
845
|
+
// A two-call agent turn: the model invokes `app_create`, then wraps up
|
|
846
|
+
// with a final text reply.
|
|
1021
847
|
const ctx = makeCtx({
|
|
1022
848
|
conversationId: "test-conv",
|
|
1023
|
-
|
|
849
|
+
providerResponses: [
|
|
850
|
+
{
|
|
851
|
+
content: [
|
|
852
|
+
{ type: "text", text: "I'll build that app." },
|
|
853
|
+
{
|
|
854
|
+
type: "tool_use",
|
|
855
|
+
id: "tool-1",
|
|
856
|
+
name: "app_create",
|
|
857
|
+
input: { name: "Flow" },
|
|
858
|
+
},
|
|
859
|
+
],
|
|
860
|
+
model: "mock-model",
|
|
861
|
+
usage: { inputTokens: 10, outputTokens: 5 },
|
|
862
|
+
stopReason: "tool_use",
|
|
863
|
+
},
|
|
864
|
+
textResponse("Done."),
|
|
865
|
+
],
|
|
866
|
+
toolExecutor: async () => ({ content: "{}", isError: false }),
|
|
1024
867
|
});
|
|
1025
868
|
await runAgentLoopImpl(
|
|
1026
869
|
ctx,
|
|
@@ -1038,7 +881,23 @@ describe("session-agent-loop", () => {
|
|
|
1038
881
|
});
|
|
1039
882
|
|
|
1040
883
|
describe("disk pressure injection context", () => {
|
|
1041
|
-
|
|
884
|
+
// The loop sets `ctx.diskPressureCleanupModeActive` for the duration of the
|
|
885
|
+
// turn (the disk-pressure-warning injector reads it via the per-conversation
|
|
886
|
+
// registry) and resets it in the turn-end cleanup path. Snapshot the flag at
|
|
887
|
+
// each `applyRuntimeInjections` call so assertions observe its value while
|
|
888
|
+
// injection runs, not the post-turn reset.
|
|
889
|
+
function captureCleanupFlagDuringInjection(ctx: {
|
|
890
|
+
diskPressureCleanupModeActive?: boolean;
|
|
891
|
+
}): () => Array<boolean | undefined> {
|
|
892
|
+
const observed: Array<boolean | undefined> = [];
|
|
893
|
+
applyRuntimeInjectionsMock.mockImplementation(async (msgs: Message[]) => {
|
|
894
|
+
observed.push(ctx.diskPressureCleanupModeActive);
|
|
895
|
+
return { messages: msgs, blocks: { ...mockInjectionBlocks } };
|
|
896
|
+
});
|
|
897
|
+
return () => observed;
|
|
898
|
+
}
|
|
899
|
+
|
|
900
|
+
test("sets the cleanup-mode flag on the conversation for cleanup-mode turns", async () => {
|
|
1042
901
|
mockDiskPressureDecision = {
|
|
1043
902
|
action: "allow-cleanup-mode",
|
|
1044
903
|
reason: "guardian",
|
|
@@ -1061,6 +920,7 @@ describe("session-agent-loop", () => {
|
|
|
1061
920
|
trustClass: "guardian",
|
|
1062
921
|
} as AgentLoopConversationContext["trustContext"],
|
|
1063
922
|
});
|
|
923
|
+
const cleanupFlagDuringInjection = captureCleanupFlagDuringInjection(ctx);
|
|
1064
924
|
|
|
1065
925
|
await runAgentLoopImpl(ctx, "free up space", "msg-1", () => {});
|
|
1066
926
|
|
|
@@ -1079,21 +939,16 @@ describe("session-agent-loop", () => {
|
|
|
1079
939
|
},
|
|
1080
940
|
}),
|
|
1081
941
|
);
|
|
1082
|
-
|
|
1083
|
-
.calls[0]![1] as {
|
|
1084
|
-
diskPressureContext?: { cleanupModeActive: boolean } | null;
|
|
1085
|
-
};
|
|
1086
|
-
expect(firstInjectionOptions.diskPressureContext).toEqual({
|
|
1087
|
-
cleanupModeActive: true,
|
|
1088
|
-
});
|
|
942
|
+
expect(cleanupFlagDuringInjection()).toEqual([true]);
|
|
1089
943
|
});
|
|
1090
944
|
|
|
1091
|
-
test("
|
|
945
|
+
test("sets the cleanup-mode flag on the conversation for local-owner turns", async () => {
|
|
1092
946
|
mockDiskPressureDecision = {
|
|
1093
947
|
action: "allow-cleanup-mode",
|
|
1094
948
|
reason: "local-owner",
|
|
1095
949
|
};
|
|
1096
950
|
const ctx = makeCtx();
|
|
951
|
+
const cleanupFlagDuringInjection = captureCleanupFlagDuringInjection(ctx);
|
|
1097
952
|
|
|
1098
953
|
await runAgentLoopImpl(ctx, "free up space", "msg-1", () => {});
|
|
1099
954
|
|
|
@@ -1105,16 +960,10 @@ describe("session-agent-loop", () => {
|
|
|
1105
960
|
trustContext: null,
|
|
1106
961
|
}),
|
|
1107
962
|
);
|
|
1108
|
-
|
|
1109
|
-
.calls[0]![1] as {
|
|
1110
|
-
diskPressureContext?: { cleanupModeActive: boolean } | null;
|
|
1111
|
-
};
|
|
1112
|
-
expect(firstInjectionOptions.diskPressureContext).toEqual({
|
|
1113
|
-
cleanupModeActive: true,
|
|
1114
|
-
});
|
|
963
|
+
expect(cleanupFlagDuringInjection()).toEqual([true]);
|
|
1115
964
|
});
|
|
1116
965
|
|
|
1117
|
-
test("keeps cleanup
|
|
966
|
+
test("keeps the cleanup-mode flag set across overflow recovery reinjection", async () => {
|
|
1118
967
|
mockDiskPressureDecision = {
|
|
1119
968
|
action: "allow-cleanup-mode",
|
|
1120
969
|
reason: "guardian",
|
|
@@ -1136,18 +985,14 @@ describe("session-agent-loop", () => {
|
|
|
1136
985
|
trustClass: "guardian",
|
|
1137
986
|
} as AgentLoopConversationContext["trustContext"],
|
|
1138
987
|
});
|
|
988
|
+
const cleanupFlagDuringInjection = captureCleanupFlagDuringInjection(ctx);
|
|
1139
989
|
|
|
1140
990
|
await runAgentLoopImpl(ctx, "free up space", "msg-1", () => {});
|
|
1141
991
|
|
|
1142
992
|
expect(applyRuntimeInjectionsMock.mock.calls.length).toBeGreaterThan(1);
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
};
|
|
1147
|
-
expect(options.diskPressureContext).toEqual({
|
|
1148
|
-
cleanupModeActive: true,
|
|
1149
|
-
});
|
|
1150
|
-
}
|
|
993
|
+
const flags = cleanupFlagDuringInjection();
|
|
994
|
+
expect(flags.length).toBeGreaterThan(1);
|
|
995
|
+
expect(flags.every((flag) => flag === true)).toBe(true);
|
|
1151
996
|
});
|
|
1152
997
|
|
|
1153
998
|
test("blocks policy-denied turns before runtime injection or model execution", async () => {
|
|
@@ -1156,9 +1001,6 @@ describe("session-agent-loop", () => {
|
|
|
1156
1001
|
reason: "trusted-contact",
|
|
1157
1002
|
};
|
|
1158
1003
|
const events: ServerMessage[] = [];
|
|
1159
|
-
const agentLoopRun = mock(async (_messages: Message[]) => {
|
|
1160
|
-
throw new Error("agent loop should not run");
|
|
1161
|
-
});
|
|
1162
1004
|
const activityStates: unknown[][] = [];
|
|
1163
1005
|
const traceEvents: unknown[][] = [];
|
|
1164
1006
|
const ctx = makeCtx({
|
|
@@ -1171,14 +1013,11 @@ describe("session-agent-loop", () => {
|
|
|
1171
1013
|
},
|
|
1172
1014
|
} as unknown as AgentLoopConversationContext["traceEmitter"],
|
|
1173
1015
|
});
|
|
1174
|
-
ctx.agentLoop
|
|
1175
|
-
agentLoopRun,
|
|
1176
|
-
ctx.agentLoop.compactionCircuit,
|
|
1177
|
-
);
|
|
1016
|
+
const runSpy = spyOn(ctx.agentLoop, "run");
|
|
1178
1017
|
|
|
1179
1018
|
await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
|
|
1180
1019
|
|
|
1181
|
-
expect(
|
|
1020
|
+
expect(runSpy).not.toHaveBeenCalled();
|
|
1182
1021
|
expect(applyRuntimeInjectionsMock).not.toHaveBeenCalled();
|
|
1183
1022
|
expect(activityStates).toContainEqual([
|
|
1184
1023
|
"idle",
|
|
@@ -1238,7 +1077,7 @@ describe("session-agent-loop", () => {
|
|
|
1238
1077
|
});
|
|
1239
1078
|
|
|
1240
1079
|
expect(applyRuntimeInjectionsMock).not.toHaveBeenCalled();
|
|
1241
|
-
expect(ctx.
|
|
1080
|
+
expect(ctx.isProcessing()).toBe(false);
|
|
1242
1081
|
expect(ctx.abortController).toBeNull();
|
|
1243
1082
|
expect(ctx.currentRequestId).toBeUndefined();
|
|
1244
1083
|
expect(drainQueue).toHaveBeenCalledWith("loop_complete");
|
|
@@ -1254,47 +1093,14 @@ describe("session-agent-loop", () => {
|
|
|
1254
1093
|
test("error events from agent loop are classified and emitted", async () => {
|
|
1255
1094
|
const events: ServerMessage[] = [];
|
|
1256
1095
|
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
name: "bash",
|
|
1266
|
-
input: { cmd: "ls" },
|
|
1267
|
-
});
|
|
1268
|
-
onEvent({
|
|
1269
|
-
type: "error",
|
|
1270
|
-
error: new Error("Tool execution failed: permission denied"),
|
|
1271
|
-
});
|
|
1272
|
-
onEvent({
|
|
1273
|
-
type: "message_complete",
|
|
1274
|
-
message: {
|
|
1275
|
-
role: "assistant",
|
|
1276
|
-
content: [{ type: "text", text: "I encountered an error" }],
|
|
1277
|
-
},
|
|
1278
|
-
});
|
|
1279
|
-
onEvent({
|
|
1280
|
-
type: "usage",
|
|
1281
|
-
inputTokens: 100,
|
|
1282
|
-
outputTokens: 50,
|
|
1283
|
-
model: "test-model",
|
|
1284
|
-
providerDurationMs: 200,
|
|
1285
|
-
});
|
|
1286
|
-
return [
|
|
1287
|
-
...messages,
|
|
1288
|
-
{
|
|
1289
|
-
role: "assistant" as const,
|
|
1290
|
-
content: [
|
|
1291
|
-
{ type: "text", text: "I encountered an error" },
|
|
1292
|
-
] as ContentBlock[],
|
|
1293
|
-
},
|
|
1294
|
-
];
|
|
1295
|
-
};
|
|
1296
|
-
|
|
1297
|
-
const ctx = makeCtx({ agentLoopRun });
|
|
1096
|
+
// The model calls a tool whose executor throws, surfacing an `error`
|
|
1097
|
+
// event from the loop's catch handler.
|
|
1098
|
+
const ctx = makeCtx({
|
|
1099
|
+
providerResponses: [toolUseResponse("tu-1", "bash", { cmd: "ls" })],
|
|
1100
|
+
toolExecutor: async () => {
|
|
1101
|
+
throw new Error("Tool execution failed: permission denied");
|
|
1102
|
+
},
|
|
1103
|
+
});
|
|
1298
1104
|
await runAgentLoopImpl(ctx, "run ls", "msg-1", (msg) => events.push(msg));
|
|
1299
1105
|
|
|
1300
1106
|
const conversationError = events.find(
|
|
@@ -1306,34 +1112,9 @@ describe("session-agent-loop", () => {
|
|
|
1306
1112
|
test("non-error agent loop completion does not emit conversation_error", async () => {
|
|
1307
1113
|
const events: ServerMessage[] = [];
|
|
1308
1114
|
|
|
1309
|
-
const
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
await onEvent({ type: "llm_call_started" });
|
|
1313
|
-
onEvent({
|
|
1314
|
-
type: "message_complete",
|
|
1315
|
-
message: {
|
|
1316
|
-
role: "assistant",
|
|
1317
|
-
content: [{ type: "text", text: "All good" }],
|
|
1318
|
-
},
|
|
1319
|
-
});
|
|
1320
|
-
onEvent({
|
|
1321
|
-
type: "usage",
|
|
1322
|
-
inputTokens: 50,
|
|
1323
|
-
outputTokens: 25,
|
|
1324
|
-
model: "test-model",
|
|
1325
|
-
providerDurationMs: 100,
|
|
1326
|
-
});
|
|
1327
|
-
return [
|
|
1328
|
-
...messages,
|
|
1329
|
-
{
|
|
1330
|
-
role: "assistant" as const,
|
|
1331
|
-
content: [{ type: "text", text: "All good" }] as ContentBlock[],
|
|
1332
|
-
},
|
|
1333
|
-
];
|
|
1334
|
-
};
|
|
1335
|
-
|
|
1336
|
-
const ctx = makeCtx({ agentLoopRun });
|
|
1115
|
+
const ctx = makeCtx({
|
|
1116
|
+
providerResponses: [textResponse("All good")],
|
|
1117
|
+
});
|
|
1337
1118
|
await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
|
|
1338
1119
|
|
|
1339
1120
|
const conversationError = events.find(
|
|
@@ -1369,38 +1150,20 @@ describe("session-agent-loop", () => {
|
|
|
1369
1150
|
},
|
|
1370
1151
|
};
|
|
1371
1152
|
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
onEvent({
|
|
1377
|
-
type: "message_complete",
|
|
1378
|
-
message: {
|
|
1379
|
-
role: "assistant",
|
|
1380
|
-
content: [{ type: "text", text: "Hi there." }],
|
|
1381
|
-
},
|
|
1382
|
-
});
|
|
1383
|
-
onEvent({
|
|
1384
|
-
type: "usage",
|
|
1385
|
-
inputTokens: 12,
|
|
1386
|
-
outputTokens: 3,
|
|
1387
|
-
model: "gpt-4.1-2026-03-01",
|
|
1388
|
-
actualProvider: "fireworks",
|
|
1389
|
-
providerDurationMs: 45,
|
|
1390
|
-
rawRequest,
|
|
1391
|
-
rawResponse,
|
|
1392
|
-
});
|
|
1393
|
-
return [
|
|
1394
|
-
...messages,
|
|
1153
|
+
// The provider response carries its own `actualProvider`, so the logged
|
|
1154
|
+
// row should record that name rather than the runtime provider.
|
|
1155
|
+
const ctx = makeCtx({
|
|
1156
|
+
providerResponses: [
|
|
1395
1157
|
{
|
|
1396
|
-
|
|
1397
|
-
|
|
1158
|
+
content: [{ type: "text", text: "Hi there." }],
|
|
1159
|
+
model: "gpt-4.1-2026-03-01",
|
|
1160
|
+
usage: { inputTokens: 12, outputTokens: 3 },
|
|
1161
|
+
stopReason: "end_turn",
|
|
1162
|
+
actualProvider: "fireworks",
|
|
1163
|
+
rawRequest,
|
|
1164
|
+
rawResponse,
|
|
1398
1165
|
},
|
|
1399
|
-
]
|
|
1400
|
-
};
|
|
1401
|
-
|
|
1402
|
-
const ctx = makeCtx({
|
|
1403
|
-
agentLoopRun,
|
|
1166
|
+
],
|
|
1404
1167
|
provider: {
|
|
1405
1168
|
name: "openrouter",
|
|
1406
1169
|
sendMessage: async () => ({
|
|
@@ -1437,37 +1200,19 @@ describe("session-agent-loop", () => {
|
|
|
1437
1200
|
],
|
|
1438
1201
|
};
|
|
1439
1202
|
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
onEvent({
|
|
1445
|
-
type: "message_complete",
|
|
1446
|
-
message: {
|
|
1447
|
-
role: "assistant",
|
|
1448
|
-
content: [{ type: "text", text: "Hi there." }],
|
|
1449
|
-
},
|
|
1450
|
-
});
|
|
1451
|
-
onEvent({
|
|
1452
|
-
type: "usage",
|
|
1453
|
-
inputTokens: 12,
|
|
1454
|
-
outputTokens: 3,
|
|
1455
|
-
model: "gpt-4.1-2026-03-01",
|
|
1456
|
-
providerDurationMs: 45,
|
|
1457
|
-
rawRequest,
|
|
1458
|
-
rawResponse,
|
|
1459
|
-
});
|
|
1460
|
-
return [
|
|
1461
|
-
...messages,
|
|
1203
|
+
// The provider response omits `actualProvider`, so the loop stamps the
|
|
1204
|
+
// runtime provider name onto the usage event and the row records it.
|
|
1205
|
+
const ctx = makeCtx({
|
|
1206
|
+
providerResponses: [
|
|
1462
1207
|
{
|
|
1463
|
-
|
|
1464
|
-
|
|
1208
|
+
content: [{ type: "text", text: "Hi there." }],
|
|
1209
|
+
model: "gpt-4.1-2026-03-01",
|
|
1210
|
+
usage: { inputTokens: 12, outputTokens: 3 },
|
|
1211
|
+
stopReason: "end_turn",
|
|
1212
|
+
rawRequest,
|
|
1213
|
+
rawResponse,
|
|
1465
1214
|
},
|
|
1466
|
-
]
|
|
1467
|
-
};
|
|
1468
|
-
|
|
1469
|
-
const ctx = makeCtx({
|
|
1470
|
-
agentLoopRun,
|
|
1215
|
+
],
|
|
1471
1216
|
provider: {
|
|
1472
1217
|
name: "openrouter",
|
|
1473
1218
|
sendMessage: async () => ({
|
|
@@ -1522,38 +1267,18 @@ describe("session-agent-loop", () => {
|
|
|
1522
1267
|
status: "completed",
|
|
1523
1268
|
};
|
|
1524
1269
|
|
|
1525
|
-
const
|
|
1526
|
-
|
|
1527
|
-
// `AgentLoop.run` just before `provider.sendMessage`.
|
|
1528
|
-
await onEvent({ type: "llm_call_started" });
|
|
1529
|
-
onEvent({
|
|
1530
|
-
type: "message_complete",
|
|
1531
|
-
message: {
|
|
1532
|
-
role: "assistant",
|
|
1533
|
-
content: [{ type: "text", text: "Hi there." }],
|
|
1534
|
-
},
|
|
1535
|
-
});
|
|
1536
|
-
onEvent({
|
|
1537
|
-
type: "usage",
|
|
1538
|
-
inputTokens: 12,
|
|
1539
|
-
outputTokens: 3,
|
|
1540
|
-
model: "gpt-5.4",
|
|
1541
|
-
actualProvider: "openai",
|
|
1542
|
-
providerDurationMs: 45,
|
|
1543
|
-
rawRequest,
|
|
1544
|
-
rawResponse,
|
|
1545
|
-
});
|
|
1546
|
-
return [
|
|
1547
|
-
...messages,
|
|
1270
|
+
const ctx = makeCtx({
|
|
1271
|
+
providerResponses: [
|
|
1548
1272
|
{
|
|
1549
|
-
|
|
1550
|
-
|
|
1273
|
+
content: [{ type: "text", text: "Hi there." }],
|
|
1274
|
+
model: "gpt-5.4",
|
|
1275
|
+
usage: { inputTokens: 12, outputTokens: 3 },
|
|
1276
|
+
stopReason: "end_turn",
|
|
1277
|
+
actualProvider: "openai",
|
|
1278
|
+
rawRequest,
|
|
1279
|
+
rawResponse,
|
|
1551
1280
|
},
|
|
1552
|
-
]
|
|
1553
|
-
};
|
|
1554
|
-
|
|
1555
|
-
const ctx = makeCtx({
|
|
1556
|
-
agentLoopRun,
|
|
1281
|
+
],
|
|
1557
1282
|
provider: {
|
|
1558
1283
|
name: "openai",
|
|
1559
1284
|
sendMessage: async () => ({
|
|
@@ -1593,37 +1318,17 @@ describe("session-agent-loop", () => {
|
|
|
1593
1318
|
attrs: Record<string, unknown>;
|
|
1594
1319
|
}> = [];
|
|
1595
1320
|
|
|
1596
|
-
const
|
|
1597
|
-
//
|
|
1598
|
-
|
|
1599
|
-
await onEvent({ type: "llm_call_started" });
|
|
1600
|
-
onEvent({ type: "text_delta", text: "Hi." });
|
|
1601
|
-
onEvent({
|
|
1602
|
-
type: "message_complete",
|
|
1603
|
-
message: {
|
|
1604
|
-
role: "assistant",
|
|
1605
|
-
content: [{ type: "text", text: "Hi." }],
|
|
1606
|
-
},
|
|
1607
|
-
});
|
|
1608
|
-
onEvent({
|
|
1609
|
-
type: "usage",
|
|
1610
|
-
inputTokens: 10,
|
|
1611
|
-
outputTokens: 2,
|
|
1612
|
-
model: "gpt-5.5-2026-04-23",
|
|
1613
|
-
actualProvider: "openai",
|
|
1614
|
-
providerDurationMs: 100,
|
|
1615
|
-
});
|
|
1616
|
-
return [
|
|
1617
|
-
...messages,
|
|
1321
|
+
const ctx = makeCtx({
|
|
1322
|
+
// The loop replays the text block as a `text_delta` before `usage`.
|
|
1323
|
+
providerResponses: [
|
|
1618
1324
|
{
|
|
1619
|
-
|
|
1620
|
-
|
|
1325
|
+
content: [{ type: "text", text: "Hi." }],
|
|
1326
|
+
model: "gpt-5.5-2026-04-23",
|
|
1327
|
+
usage: { inputTokens: 10, outputTokens: 2 },
|
|
1328
|
+
stopReason: "end_turn",
|
|
1329
|
+
actualProvider: "openai",
|
|
1621
1330
|
},
|
|
1622
|
-
]
|
|
1623
|
-
};
|
|
1624
|
-
|
|
1625
|
-
const ctx = makeCtx({
|
|
1626
|
-
agentLoopRun,
|
|
1331
|
+
],
|
|
1627
1332
|
// Provider name matches actualProvider so both paths agree.
|
|
1628
1333
|
provider: {
|
|
1629
1334
|
name: "openai",
|
|
@@ -1671,31 +1376,18 @@ describe("session-agent-loop", () => {
|
|
|
1671
1376
|
attrs: Record<string, unknown>;
|
|
1672
1377
|
}> = [];
|
|
1673
1378
|
|
|
1674
|
-
const
|
|
1675
|
-
//
|
|
1676
|
-
//
|
|
1677
|
-
|
|
1678
|
-
|
|
1679
|
-
onEvent({
|
|
1680
|
-
type: "message_complete",
|
|
1681
|
-
message: {
|
|
1682
|
-
role: "assistant",
|
|
1379
|
+
const ctx = makeCtx({
|
|
1380
|
+
// An empty-content response: no text block fires `text_delta`, so the
|
|
1381
|
+
// started event falls back to the resolved usage provider name.
|
|
1382
|
+
providerResponses: [
|
|
1383
|
+
{
|
|
1683
1384
|
content: [],
|
|
1385
|
+
model: "gpt-5.5-2026-04-23",
|
|
1386
|
+
usage: { inputTokens: 10, outputTokens: 2 },
|
|
1387
|
+
stopReason: "end_turn",
|
|
1388
|
+
actualProvider: "openai",
|
|
1684
1389
|
},
|
|
1685
|
-
|
|
1686
|
-
onEvent({
|
|
1687
|
-
type: "usage",
|
|
1688
|
-
inputTokens: 10,
|
|
1689
|
-
outputTokens: 2,
|
|
1690
|
-
model: "gpt-5.5-2026-04-23",
|
|
1691
|
-
actualProvider: "openai",
|
|
1692
|
-
providerDurationMs: 100,
|
|
1693
|
-
});
|
|
1694
|
-
return messages;
|
|
1695
|
-
};
|
|
1696
|
-
|
|
1697
|
-
const ctx = makeCtx({
|
|
1698
|
-
agentLoopRun,
|
|
1390
|
+
],
|
|
1699
1391
|
provider: {
|
|
1700
1392
|
name: "anthropic",
|
|
1701
1393
|
sendMessage: async () => ({
|
|
@@ -1737,52 +1429,32 @@ describe("session-agent-loop", () => {
|
|
|
1737
1429
|
test("records the actual provider for usage accounting", async () => {
|
|
1738
1430
|
const events: ServerMessage[] = [];
|
|
1739
1431
|
|
|
1740
|
-
const
|
|
1741
|
-
|
|
1742
|
-
|
|
1743
|
-
await onEvent({ type: "llm_call_started" });
|
|
1744
|
-
onEvent({
|
|
1745
|
-
type: "message_complete",
|
|
1746
|
-
message: {
|
|
1747
|
-
role: "assistant",
|
|
1432
|
+
const ctx = makeCtx({
|
|
1433
|
+
providerResponses: [
|
|
1434
|
+
{
|
|
1748
1435
|
content: [{ type: "text", text: "Hi there." }],
|
|
1749
|
-
},
|
|
1750
|
-
});
|
|
1751
|
-
onEvent({
|
|
1752
|
-
type: "usage",
|
|
1753
|
-
inputTokens: 12,
|
|
1754
|
-
outputTokens: 3,
|
|
1755
|
-
model: "gpt-4.1-2026-03-01",
|
|
1756
|
-
actualProvider: "fireworks",
|
|
1757
|
-
providerDurationMs: 45,
|
|
1758
|
-
rawRequest: {
|
|
1759
|
-
model: "gpt-4.1",
|
|
1760
|
-
messages: [{ role: "user", content: "Hello" }],
|
|
1761
|
-
},
|
|
1762
|
-
rawResponse: {
|
|
1763
1436
|
model: "gpt-4.1-2026-03-01",
|
|
1764
|
-
|
|
1765
|
-
|
|
1766
|
-
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
|
|
1437
|
+
usage: { inputTokens: 12, outputTokens: 3 },
|
|
1438
|
+
stopReason: "end_turn",
|
|
1439
|
+
actualProvider: "fireworks",
|
|
1440
|
+
rawRequest: {
|
|
1441
|
+
model: "gpt-4.1",
|
|
1442
|
+
messages: [{ role: "user", content: "Hello" }],
|
|
1443
|
+
},
|
|
1444
|
+
rawResponse: {
|
|
1445
|
+
model: "gpt-4.1-2026-03-01",
|
|
1446
|
+
choices: [
|
|
1447
|
+
{
|
|
1448
|
+
finish_reason: "stop",
|
|
1449
|
+
message: {
|
|
1450
|
+
role: "assistant",
|
|
1451
|
+
content: "Hi there.",
|
|
1452
|
+
},
|
|
1770
1453
|
},
|
|
1771
|
-
|
|
1772
|
-
|
|
1773
|
-
},
|
|
1774
|
-
});
|
|
1775
|
-
return [
|
|
1776
|
-
...messages,
|
|
1777
|
-
{
|
|
1778
|
-
role: "assistant" as const,
|
|
1779
|
-
content: [{ type: "text", text: "Hi there." }] as ContentBlock[],
|
|
1454
|
+
],
|
|
1455
|
+
},
|
|
1780
1456
|
},
|
|
1781
|
-
]
|
|
1782
|
-
};
|
|
1783
|
-
|
|
1784
|
-
const ctx = makeCtx({
|
|
1785
|
-
agentLoopRun,
|
|
1457
|
+
],
|
|
1786
1458
|
provider: {
|
|
1787
1459
|
name: "openrouter",
|
|
1788
1460
|
sendMessage: async () => ({
|
|
@@ -1852,27 +1524,9 @@ describe("session-agent-loop", () => {
|
|
|
1852
1524
|
},
|
|
1853
1525
|
});
|
|
1854
1526
|
|
|
1855
|
-
|
|
1856
|
-
|
|
1857
|
-
|
|
1858
|
-
await onEvent({ type: "llm_call_started" });
|
|
1859
|
-
onEvent({
|
|
1860
|
-
type: "message_complete",
|
|
1861
|
-
message: {
|
|
1862
|
-
role: "assistant",
|
|
1863
|
-
content: [{ type: "text", text: "recovered" }],
|
|
1864
|
-
},
|
|
1865
|
-
});
|
|
1866
|
-
return [
|
|
1867
|
-
...messages,
|
|
1868
|
-
{
|
|
1869
|
-
role: "assistant" as const,
|
|
1870
|
-
content: [{ type: "text", text: "recovered" }] as ContentBlock[],
|
|
1871
|
-
},
|
|
1872
|
-
];
|
|
1873
|
-
};
|
|
1874
|
-
|
|
1875
|
-
const ctx = makeCtx({ agentLoopRun });
|
|
1527
|
+
// After the orchestrator's preflight compaction runs, the loop completes
|
|
1528
|
+
// the turn normally.
|
|
1529
|
+
const ctx = makeCtx({ providerResponses: [textResponse("recovered")] });
|
|
1876
1530
|
await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
|
|
1877
1531
|
|
|
1878
1532
|
const compactorCall = recordUsageMock.mock.calls.find(
|
|
@@ -1911,7 +1565,6 @@ describe("session-agent-loop", () => {
|
|
|
1911
1565
|
|
|
1912
1566
|
test("convergence loop applies reducer and retries when context-too-large is detected", async () => {
|
|
1913
1567
|
const events: ServerMessage[] = [];
|
|
1914
|
-
let callCount = 0;
|
|
1915
1568
|
let reducerCalled = false;
|
|
1916
1569
|
|
|
1917
1570
|
// Configure reducer to succeed on first call — return reduced messages
|
|
@@ -1945,53 +1598,15 @@ describe("session-agent-loop", () => {
|
|
|
1945
1598
|
};
|
|
1946
1599
|
};
|
|
1947
1600
|
|
|
1948
|
-
|
|
1949
|
-
|
|
1950
|
-
|
|
1951
|
-
|
|
1952
|
-
|
|
1953
|
-
|
|
1954
|
-
callCount++;
|
|
1955
|
-
if (callCount === 1) {
|
|
1956
|
-
onEvent({
|
|
1957
|
-
type: "error",
|
|
1958
|
-
error: new Error("context_length_exceeded"),
|
|
1959
|
-
});
|
|
1960
|
-
onEvent({
|
|
1961
|
-
type: "usage",
|
|
1962
|
-
inputTokens: 100,
|
|
1963
|
-
outputTokens: 0,
|
|
1964
|
-
model: "test-model",
|
|
1965
|
-
providerDurationMs: 50,
|
|
1966
|
-
});
|
|
1967
|
-
return messages;
|
|
1968
|
-
}
|
|
1969
|
-
// Second call (after reducer): succeed
|
|
1970
|
-
onEvent({
|
|
1971
|
-
type: "message_complete",
|
|
1972
|
-
message: {
|
|
1973
|
-
role: "assistant",
|
|
1974
|
-
content: [{ type: "text", text: "recovered" }],
|
|
1975
|
-
},
|
|
1976
|
-
});
|
|
1977
|
-
onEvent({
|
|
1978
|
-
type: "usage",
|
|
1979
|
-
inputTokens: 50,
|
|
1980
|
-
outputTokens: 25,
|
|
1981
|
-
model: "test-model",
|
|
1982
|
-
providerDurationMs: 100,
|
|
1983
|
-
});
|
|
1984
|
-
return [
|
|
1985
|
-
...messages,
|
|
1986
|
-
{
|
|
1987
|
-
role: "assistant" as const,
|
|
1988
|
-
content: [{ type: "text", text: "recovered" }] as ContentBlock[],
|
|
1989
|
-
},
|
|
1990
|
-
];
|
|
1991
|
-
};
|
|
1601
|
+
// The provider rejects the first call with a context-too-large error,
|
|
1602
|
+
// then succeeds once the orchestrator has reduced the context.
|
|
1603
|
+
const { provider, calls } = createMockProvider([
|
|
1604
|
+
new Error("context_length_exceeded"),
|
|
1605
|
+
textResponse("recovered"),
|
|
1606
|
+
]);
|
|
1992
1607
|
|
|
1993
1608
|
const ctx = makeCtx({
|
|
1994
|
-
|
|
1609
|
+
loopProvider: provider,
|
|
1995
1610
|
contextWindowManager: {
|
|
1996
1611
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
1997
1612
|
maybeCompact: async () => ({ compacted: false }),
|
|
@@ -2001,7 +1616,7 @@ describe("session-agent-loop", () => {
|
|
|
2001
1616
|
await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
|
|
2002
1617
|
|
|
2003
1618
|
expect(reducerCalled).toBe(true);
|
|
2004
|
-
expect(
|
|
1619
|
+
expect(calls.length).toBe(2);
|
|
2005
1620
|
const compactEvent = events.find((e) => e.type === "context_compacted");
|
|
2006
1621
|
expect(compactEvent).toBeDefined();
|
|
2007
1622
|
});
|
|
@@ -2009,23 +1624,10 @@ describe("session-agent-loop", () => {
|
|
|
2009
1624
|
test("emits conversation_error when context stays too large after all recovery attempts", async () => {
|
|
2010
1625
|
const events: ServerMessage[] = [];
|
|
2011
1626
|
|
|
2012
|
-
|
|
2013
|
-
|
|
2014
|
-
type: "error",
|
|
2015
|
-
error: new Error("context_length_exceeded"),
|
|
2016
|
-
});
|
|
2017
|
-
onEvent({
|
|
2018
|
-
type: "usage",
|
|
2019
|
-
inputTokens: 100,
|
|
2020
|
-
outputTokens: 0,
|
|
2021
|
-
model: "test-model",
|
|
2022
|
-
providerDurationMs: 50,
|
|
2023
|
-
});
|
|
2024
|
-
return messages;
|
|
2025
|
-
};
|
|
2026
|
-
|
|
1627
|
+
// The provider rejects every call with a context-too-large error, so the
|
|
1628
|
+
// orchestrator exhausts its recovery attempts.
|
|
2027
1629
|
const ctx = makeCtx({
|
|
2028
|
-
|
|
1630
|
+
providerResponses: [new Error("context_length_exceeded")],
|
|
2029
1631
|
contextWindowManager: {
|
|
2030
1632
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
2031
1633
|
// Compaction succeeds but context is still too large
|
|
@@ -2059,7 +1661,6 @@ describe("session-agent-loop", () => {
|
|
|
2059
1661
|
|
|
2060
1662
|
test("bounded convergence loop applies reducer tiers and recovers", async () => {
|
|
2061
1663
|
const events: ServerMessage[] = [];
|
|
2062
|
-
let callCount = 0;
|
|
2063
1664
|
let reducerCalls = 0;
|
|
2064
1665
|
|
|
2065
1666
|
// Reducer: succeed on first call, returning reduced messages
|
|
@@ -2077,55 +1678,15 @@ describe("session-agent-loop", () => {
|
|
|
2077
1678
|
};
|
|
2078
1679
|
};
|
|
2079
1680
|
|
|
2080
|
-
|
|
2081
|
-
|
|
2082
|
-
|
|
2083
|
-
|
|
2084
|
-
|
|
2085
|
-
|
|
2086
|
-
callCount++;
|
|
2087
|
-
if (callCount === 1) {
|
|
2088
|
-
onEvent({
|
|
2089
|
-
type: "error",
|
|
2090
|
-
error: new Error("context_length_exceeded"),
|
|
2091
|
-
});
|
|
2092
|
-
onEvent({
|
|
2093
|
-
type: "usage",
|
|
2094
|
-
inputTokens: 100,
|
|
2095
|
-
outputTokens: 0,
|
|
2096
|
-
model: "test-model",
|
|
2097
|
-
providerDurationMs: 50,
|
|
2098
|
-
});
|
|
2099
|
-
return messages;
|
|
2100
|
-
}
|
|
2101
|
-
// After reducer runs, succeed
|
|
2102
|
-
onEvent({
|
|
2103
|
-
type: "message_complete",
|
|
2104
|
-
message: {
|
|
2105
|
-
role: "assistant",
|
|
2106
|
-
content: [{ type: "text", text: "recovered via convergence" }],
|
|
2107
|
-
},
|
|
2108
|
-
});
|
|
2109
|
-
onEvent({
|
|
2110
|
-
type: "usage",
|
|
2111
|
-
inputTokens: 50,
|
|
2112
|
-
outputTokens: 25,
|
|
2113
|
-
model: "test-model",
|
|
2114
|
-
providerDurationMs: 100,
|
|
2115
|
-
});
|
|
2116
|
-
return [
|
|
2117
|
-
...messages,
|
|
2118
|
-
{
|
|
2119
|
-
role: "assistant" as const,
|
|
2120
|
-
content: [
|
|
2121
|
-
{ type: "text", text: "recovered via convergence" },
|
|
2122
|
-
] as ContentBlock[],
|
|
2123
|
-
},
|
|
2124
|
-
];
|
|
2125
|
-
};
|
|
1681
|
+
// The provider rejects the first call with a context-too-large error,
|
|
1682
|
+
// then succeeds once the orchestrator has reduced the context.
|
|
1683
|
+
const { provider, calls } = createMockProvider([
|
|
1684
|
+
new Error("context_length_exceeded"),
|
|
1685
|
+
textResponse("recovered via convergence"),
|
|
1686
|
+
]);
|
|
2126
1687
|
|
|
2127
1688
|
const ctx = makeCtx({
|
|
2128
|
-
|
|
1689
|
+
loopProvider: provider,
|
|
2129
1690
|
contextWindowManager: {
|
|
2130
1691
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
2131
1692
|
maybeCompact: async () => ({ compacted: false }),
|
|
@@ -2135,7 +1696,7 @@ describe("session-agent-loop", () => {
|
|
|
2135
1696
|
await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
|
|
2136
1697
|
|
|
2137
1698
|
expect(reducerCalls).toBeGreaterThanOrEqual(1);
|
|
2138
|
-
expect(
|
|
1699
|
+
expect(calls.length).toBe(2);
|
|
2139
1700
|
const conversationError = events.find(
|
|
2140
1701
|
(e) => e.type === "conversation_error",
|
|
2141
1702
|
);
|
|
@@ -2146,7 +1707,6 @@ describe("session-agent-loop", () => {
|
|
|
2146
1707
|
|
|
2147
1708
|
test("non-interactive auto-compress continues without approval prompt", async () => {
|
|
2148
1709
|
const events: ServerMessage[] = [];
|
|
2149
|
-
let callCount = 0;
|
|
2150
1710
|
|
|
2151
1711
|
// Reducer exhausts all tiers
|
|
2152
1712
|
mockReducerStepFn = (msgs: Message[]) => ({
|
|
@@ -2167,54 +1727,14 @@ describe("session-agent-loop", () => {
|
|
|
2167
1727
|
|
|
2168
1728
|
mockOverflowAction = "auto_compress_latest_turn";
|
|
2169
1729
|
|
|
2170
|
-
|
|
2171
|
-
|
|
2172
|
-
// `AgentLoop.run` just before `provider.sendMessage`. Retry branches
|
|
2173
|
-
// need this on every invocation: each agent-loop iteration reserves
|
|
2174
|
-
// its own row.
|
|
2175
|
-
await onEvent({ type: "llm_call_started" });
|
|
2176
|
-
callCount++;
|
|
2177
|
-
if (callCount <= 2) {
|
|
2178
|
-
onEvent({
|
|
2179
|
-
type: "error",
|
|
2180
|
-
error: new Error("context_length_exceeded"),
|
|
2181
|
-
});
|
|
2182
|
-
onEvent({
|
|
2183
|
-
type: "usage",
|
|
2184
|
-
inputTokens: 100,
|
|
2185
|
-
outputTokens: 0,
|
|
2186
|
-
model: "test-model",
|
|
2187
|
-
providerDurationMs: 50,
|
|
2188
|
-
});
|
|
2189
|
-
return messages;
|
|
2190
|
-
}
|
|
2191
|
-
onEvent({
|
|
2192
|
-
type: "message_complete",
|
|
2193
|
-
message: {
|
|
2194
|
-
role: "assistant",
|
|
2195
|
-
content: [{ type: "text", text: "auto-recovered" }],
|
|
2196
|
-
},
|
|
2197
|
-
});
|
|
2198
|
-
onEvent({
|
|
2199
|
-
type: "usage",
|
|
2200
|
-
inputTokens: 50,
|
|
2201
|
-
outputTokens: 25,
|
|
2202
|
-
model: "test-model",
|
|
2203
|
-
providerDurationMs: 100,
|
|
2204
|
-
});
|
|
2205
|
-
return [
|
|
2206
|
-
...messages,
|
|
2207
|
-
{
|
|
2208
|
-
role: "assistant" as const,
|
|
2209
|
-
content: [
|
|
2210
|
-
{ type: "text", text: "auto-recovered" },
|
|
2211
|
-
] as ContentBlock[],
|
|
2212
|
-
},
|
|
2213
|
-
];
|
|
2214
|
-
};
|
|
2215
|
-
|
|
1730
|
+
// The provider rejects the first two calls with context-too-large errors,
|
|
1731
|
+
// then succeeds after the emergency auto-compress runs.
|
|
2216
1732
|
const ctx = makeCtx({
|
|
2217
|
-
|
|
1733
|
+
providerResponses: [
|
|
1734
|
+
new Error("context_length_exceeded"),
|
|
1735
|
+
new Error("context_length_exceeded"),
|
|
1736
|
+
textResponse("auto-recovered"),
|
|
1737
|
+
],
|
|
2218
1738
|
hasNoClient: true,
|
|
2219
1739
|
contextWindowManager: {
|
|
2220
1740
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
@@ -2261,7 +1781,6 @@ describe("session-agent-loop", () => {
|
|
|
2261
1781
|
// `budget_yield_unrecovered` so the inspector and dashboards can
|
|
2262
1782
|
// attribute the silent stall.
|
|
2263
1783
|
const events: ServerMessage[] = [];
|
|
2264
|
-
let callCount = 0;
|
|
2265
1784
|
|
|
2266
1785
|
// Reducer exhausts all 4 tiers on first call so the convergence
|
|
2267
1786
|
// loop runs exactly one iteration before falling through to
|
|
@@ -2292,43 +1811,30 @@ describe("session-agent-loop", () => {
|
|
|
2292
1811
|
// call). 90k satisfies both so the path reaches call 3.
|
|
2293
1812
|
mockEstimateTokens = 90_000;
|
|
2294
1813
|
|
|
2295
|
-
|
|
2296
|
-
|
|
2297
|
-
|
|
2298
|
-
|
|
2299
|
-
|
|
2300
|
-
|
|
2301
|
-
|
|
2302
|
-
type: "error",
|
|
2303
|
-
error: new Error("context_length_exceeded"),
|
|
2304
|
-
});
|
|
2305
|
-
onEvent({
|
|
2306
|
-
type: "usage",
|
|
2307
|
-
inputTokens: 100,
|
|
2308
|
-
outputTokens: 0,
|
|
2309
|
-
model: "test-model",
|
|
2310
|
-
providerDurationMs: 50,
|
|
2311
|
-
});
|
|
2312
|
-
return messages;
|
|
2313
|
-
}
|
|
2314
|
-
// Call 3: the auto_compress_latest_turn rerun. Invoke
|
|
2315
|
-
// onCheckpoint so the orchestrator's mid-loop budget check
|
|
2316
|
-
// flips `yieldedForBudget` to true, then return without
|
|
2317
|
-
// finishing — mirroring what AgentLoop.run does when its
|
|
2318
|
-
// checkpoint returns "yield".
|
|
2319
|
-
if (options?.onCheckpoint) {
|
|
2320
|
-
await options.onCheckpoint({
|
|
2321
|
-
turnIndex: 0,
|
|
2322
|
-
toolCount: 1,
|
|
2323
|
-
hasToolUse: true,
|
|
2324
|
-
history: messages,
|
|
2325
|
-
});
|
|
2326
|
-
}
|
|
2327
|
-
return messages;
|
|
2328
|
-
};
|
|
2329
|
-
|
|
1814
|
+
// Calls 1 (initial) and 2 (convergence rerun) reject with
|
|
1815
|
+
// context-too-large so `contextTooLargeDetected` stays true through the
|
|
1816
|
+
// convergence exit and the orchestrator enters the auto_compress branch.
|
|
1817
|
+
// Call 3 (the auto_compress rerun) is a tool turn: the loop runs it
|
|
1818
|
+
// without a compaction hook, so when its mid-loop budget gate trips on
|
|
1819
|
+
// the still-oversized estimate it yields `exitReason = "budget"` rather
|
|
1820
|
+
// than recovering — the silent-stall path under test.
|
|
2330
1821
|
const ctx = makeCtx({
|
|
2331
|
-
|
|
1822
|
+
providerResponses: [
|
|
1823
|
+
new Error("context_length_exceeded"),
|
|
1824
|
+
new Error("context_length_exceeded"),
|
|
1825
|
+
toolUseResponse("t1", "read_file", { path: "/a.txt" }),
|
|
1826
|
+
],
|
|
1827
|
+
loopTools: [
|
|
1828
|
+
{
|
|
1829
|
+
name: "read_file",
|
|
1830
|
+
description: "Read a file",
|
|
1831
|
+
input_schema: {
|
|
1832
|
+
type: "object",
|
|
1833
|
+
properties: { path: { type: "string" } },
|
|
1834
|
+
},
|
|
1835
|
+
},
|
|
1836
|
+
],
|
|
1837
|
+
toolExecutor: async () => ({ content: "data", isError: false }),
|
|
2332
1838
|
hasNoClient: true,
|
|
2333
1839
|
contextWindowManager: {
|
|
2334
1840
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
@@ -2411,23 +1917,10 @@ describe("session-agent-loop", () => {
|
|
|
2411
1917
|
};
|
|
2412
1918
|
};
|
|
2413
1919
|
|
|
2414
|
-
|
|
2415
|
-
|
|
2416
|
-
type: "error",
|
|
2417
|
-
error: new Error("context_length_exceeded"),
|
|
2418
|
-
});
|
|
2419
|
-
onEvent({
|
|
2420
|
-
type: "usage",
|
|
2421
|
-
inputTokens: 100,
|
|
2422
|
-
outputTokens: 0,
|
|
2423
|
-
model: "test-model",
|
|
2424
|
-
providerDurationMs: 50,
|
|
2425
|
-
});
|
|
2426
|
-
return messages;
|
|
2427
|
-
};
|
|
2428
|
-
|
|
1920
|
+
// The provider rejects every call with a context-too-large error, so the
|
|
1921
|
+
// orchestrator keeps retrying until it hits the attempt ceiling.
|
|
2429
1922
|
const ctx = makeCtx({
|
|
2430
|
-
|
|
1923
|
+
providerResponses: [new Error("context_length_exceeded")],
|
|
2431
1924
|
contextWindowManager: {
|
|
2432
1925
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
2433
1926
|
maybeCompact: async () => ({ compacted: false }),
|
|
@@ -2443,7 +1936,6 @@ describe("session-agent-loop", () => {
|
|
|
2443
1936
|
test("preflight budget evaluation invokes reducer before provider call", async () => {
|
|
2444
1937
|
const events: ServerMessage[] = [];
|
|
2445
1938
|
let reducerCalls = 0;
|
|
2446
|
-
let agentLoopCalls = 0;
|
|
2447
1939
|
|
|
2448
1940
|
// Set token estimate above budget (100000 * 0.95 = 95000)
|
|
2449
1941
|
mockEstimateTokens = 96000;
|
|
@@ -2462,36 +1954,11 @@ describe("session-agent-loop", () => {
|
|
|
2462
1954
|
};
|
|
2463
1955
|
};
|
|
2464
1956
|
|
|
2465
|
-
|
|
2466
|
-
|
|
2467
|
-
|
|
2468
|
-
// `AgentLoop.run` just before `provider.sendMessage`.
|
|
2469
|
-
await onEvent({ type: "llm_call_started" });
|
|
2470
|
-
onEvent({
|
|
2471
|
-
type: "message_complete",
|
|
2472
|
-
message: {
|
|
2473
|
-
role: "assistant",
|
|
2474
|
-
content: [{ type: "text", text: "ok" }],
|
|
2475
|
-
},
|
|
2476
|
-
});
|
|
2477
|
-
onEvent({
|
|
2478
|
-
type: "usage",
|
|
2479
|
-
inputTokens: 50,
|
|
2480
|
-
outputTokens: 25,
|
|
2481
|
-
model: "test-model",
|
|
2482
|
-
providerDurationMs: 100,
|
|
2483
|
-
});
|
|
2484
|
-
return [
|
|
2485
|
-
...messages,
|
|
2486
|
-
{
|
|
2487
|
-
role: "assistant" as const,
|
|
2488
|
-
content: [{ type: "text", text: "ok" }] as ContentBlock[],
|
|
2489
|
-
},
|
|
2490
|
-
];
|
|
2491
|
-
};
|
|
2492
|
-
|
|
1957
|
+
// After the preflight reducer brings the estimate under budget, the loop
|
|
1958
|
+
// completes the turn in a single provider call.
|
|
1959
|
+
const { provider, calls } = createMockProvider([textResponse("ok")]);
|
|
2493
1960
|
const ctx = makeCtx({
|
|
2494
|
-
|
|
1961
|
+
loopProvider: provider,
|
|
2495
1962
|
contextWindowManager: {
|
|
2496
1963
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
2497
1964
|
maybeCompact: async () => ({ compacted: false }),
|
|
@@ -2502,8 +1969,8 @@ describe("session-agent-loop", () => {
|
|
|
2502
1969
|
|
|
2503
1970
|
// Reducer should have been called during preflight
|
|
2504
1971
|
expect(reducerCalls).toBeGreaterThanOrEqual(1);
|
|
2505
|
-
// Agent loop should still succeed
|
|
2506
|
-
expect(
|
|
1972
|
+
// Agent loop should still succeed in a single provider call
|
|
1973
|
+
expect(calls.length).toBe(1);
|
|
2507
1974
|
const complete = events.find((e) => e.type === "message_complete");
|
|
2508
1975
|
expect(complete).toBeDefined();
|
|
2509
1976
|
});
|
|
@@ -2512,78 +1979,28 @@ describe("session-agent-loop", () => {
|
|
|
2512
1979
|
describe("provider ordering error retry", () => {
|
|
2513
1980
|
test("retries with deep repair when ordering error is detected", async () => {
|
|
2514
1981
|
const events: ServerMessage[] = [];
|
|
2515
|
-
let callCount = 0;
|
|
2516
|
-
|
|
2517
|
-
const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
|
|
2518
|
-
// Prime the assistant row anchor — production code emits this from
|
|
2519
|
-
// `AgentLoop.run` just before `provider.sendMessage`. Retry branches
|
|
2520
|
-
// need this on every invocation: each agent-loop iteration reserves
|
|
2521
|
-
// its own row.
|
|
2522
|
-
await onEvent({ type: "llm_call_started" });
|
|
2523
|
-
callCount++;
|
|
2524
|
-
if (callCount === 1) {
|
|
2525
|
-
onEvent({
|
|
2526
|
-
type: "error",
|
|
2527
|
-
error: new Error("messages ordering error"),
|
|
2528
|
-
});
|
|
2529
|
-
onEvent({
|
|
2530
|
-
type: "usage",
|
|
2531
|
-
inputTokens: 100,
|
|
2532
|
-
outputTokens: 0,
|
|
2533
|
-
model: "test-model",
|
|
2534
|
-
providerDurationMs: 50,
|
|
2535
|
-
});
|
|
2536
|
-
return messages;
|
|
2537
|
-
}
|
|
2538
|
-
// Retry succeeds
|
|
2539
|
-
onEvent({
|
|
2540
|
-
type: "message_complete",
|
|
2541
|
-
message: {
|
|
2542
|
-
role: "assistant",
|
|
2543
|
-
content: [{ type: "text", text: "fixed" }],
|
|
2544
|
-
},
|
|
2545
|
-
});
|
|
2546
|
-
onEvent({
|
|
2547
|
-
type: "usage",
|
|
2548
|
-
inputTokens: 50,
|
|
2549
|
-
outputTokens: 25,
|
|
2550
|
-
model: "test-model",
|
|
2551
|
-
providerDurationMs: 100,
|
|
2552
|
-
});
|
|
2553
|
-
return [
|
|
2554
|
-
...messages,
|
|
2555
|
-
{
|
|
2556
|
-
role: "assistant" as const,
|
|
2557
|
-
content: [{ type: "text", text: "fixed" }] as ContentBlock[],
|
|
2558
|
-
},
|
|
2559
|
-
];
|
|
2560
|
-
};
|
|
2561
1982
|
|
|
2562
|
-
|
|
1983
|
+
// The provider rejects the first call with an ordering error, then
|
|
1984
|
+
// succeeds once the orchestrator's deep repair re-sends the turn.
|
|
1985
|
+
const { provider, calls } = createMockProvider([
|
|
1986
|
+
new Error("messages ordering error"),
|
|
1987
|
+
textResponse("fixed"),
|
|
1988
|
+
]);
|
|
1989
|
+
|
|
1990
|
+
const ctx = makeCtx({ loopProvider: provider });
|
|
2563
1991
|
await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
|
|
2564
1992
|
|
|
2565
|
-
expect(
|
|
1993
|
+
expect(calls.length).toBe(2);
|
|
2566
1994
|
});
|
|
2567
1995
|
|
|
2568
1996
|
test("emits deferred ordering error when retry also fails", async () => {
|
|
2569
1997
|
const events: ServerMessage[] = [];
|
|
2570
1998
|
|
|
2571
|
-
|
|
2572
|
-
|
|
2573
|
-
|
|
2574
|
-
|
|
2575
|
-
|
|
2576
|
-
onEvent({
|
|
2577
|
-
type: "usage",
|
|
2578
|
-
inputTokens: 100,
|
|
2579
|
-
outputTokens: 0,
|
|
2580
|
-
model: "test-model",
|
|
2581
|
-
providerDurationMs: 50,
|
|
2582
|
-
});
|
|
2583
|
-
return messages;
|
|
2584
|
-
};
|
|
2585
|
-
|
|
2586
|
-
const ctx = makeCtx({ agentLoopRun });
|
|
1999
|
+
// The provider rejects every call with an ordering error, so even the
|
|
2000
|
+
// deep-repair retry fails and the orchestrator surfaces the error.
|
|
2001
|
+
const ctx = makeCtx({
|
|
2002
|
+
providerResponses: [new Error("messages ordering error")],
|
|
2003
|
+
});
|
|
2587
2004
|
await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
|
|
2588
2005
|
|
|
2589
2006
|
const conversationError = events.find(
|
|
@@ -2597,62 +2014,18 @@ describe("session-agent-loop", () => {
|
|
|
2597
2014
|
test("yields at checkpoint when canHandoffAtCheckpoint returns true", async () => {
|
|
2598
2015
|
const events: ServerMessage[] = [];
|
|
2599
2016
|
|
|
2600
|
-
|
|
2601
|
-
|
|
2602
|
-
|
|
2603
|
-
|
|
2604
|
-
|
|
2605
|
-
await onEvent({ type: "llm_call_started" });
|
|
2606
|
-
// Simulate tool use followed by checkpoint
|
|
2607
|
-
onEvent({ type: "tool_use", id: "tu-1", name: "file_read", input: {} });
|
|
2608
|
-
onEvent({
|
|
2609
|
-
type: "tool_result",
|
|
2610
|
-
toolUseId: "tu-1",
|
|
2611
|
-
content: "file content",
|
|
2612
|
-
isError: false,
|
|
2613
|
-
});
|
|
2614
|
-
onEvent({
|
|
2615
|
-
type: "message_complete",
|
|
2616
|
-
message: {
|
|
2617
|
-
role: "assistant",
|
|
2618
|
-
content: [{ type: "text", text: "partial" }],
|
|
2619
|
-
},
|
|
2620
|
-
});
|
|
2621
|
-
onEvent({
|
|
2622
|
-
type: "usage",
|
|
2623
|
-
inputTokens: 100,
|
|
2624
|
-
outputTokens: 50,
|
|
2625
|
-
model: "test-model",
|
|
2626
|
-
providerDurationMs: 100,
|
|
2627
|
-
});
|
|
2628
|
-
if (options?.onCheckpoint) {
|
|
2629
|
-
const decision = await options.onCheckpoint({
|
|
2630
|
-
turnIndex: 0,
|
|
2631
|
-
toolCount: 1,
|
|
2632
|
-
hasToolUse: true,
|
|
2633
|
-
history: messages,
|
|
2634
|
-
});
|
|
2635
|
-
if (decision !== "continue") {
|
|
2636
|
-
return [
|
|
2637
|
-
...messages,
|
|
2638
|
-
{
|
|
2639
|
-
role: "assistant" as const,
|
|
2640
|
-
content: [{ type: "text", text: "partial" }] as ContentBlock[],
|
|
2641
|
-
},
|
|
2642
|
-
];
|
|
2643
|
-
}
|
|
2644
|
-
}
|
|
2645
|
-
return [
|
|
2646
|
-
...messages,
|
|
2017
|
+
// A tool turn drives the loop to its first mid-loop checkpoint, where the
|
|
2018
|
+
// orchestrator yields for a queued handoff.
|
|
2019
|
+
const ctx = makeCtx({
|
|
2020
|
+
providerResponses: [toolUseResponse("tu-1", "file_read", {})],
|
|
2021
|
+
loopTools: [
|
|
2647
2022
|
{
|
|
2648
|
-
|
|
2649
|
-
|
|
2023
|
+
name: "file_read",
|
|
2024
|
+
description: "Read a file",
|
|
2025
|
+
input_schema: { type: "object", properties: {} },
|
|
2650
2026
|
},
|
|
2651
|
-
]
|
|
2652
|
-
|
|
2653
|
-
|
|
2654
|
-
const ctx = makeCtx({
|
|
2655
|
-
agentLoopRun,
|
|
2027
|
+
],
|
|
2028
|
+
toolExecutor: async () => ({ content: "file content", isError: false }),
|
|
2656
2029
|
canHandoffAtCheckpoint: () => true,
|
|
2657
2030
|
} as unknown as Partial<AgentLoopConversationContext>);
|
|
2658
2031
|
|
|
@@ -2669,52 +2042,21 @@ describe("session-agent-loop", () => {
|
|
|
2669
2042
|
test("continues when canHandoffAtCheckpoint returns false", async () => {
|
|
2670
2043
|
const events: ServerMessage[] = [];
|
|
2671
2044
|
|
|
2672
|
-
|
|
2673
|
-
|
|
2674
|
-
|
|
2675
|
-
|
|
2676
|
-
|
|
2677
|
-
|
|
2678
|
-
|
|
2679
|
-
|
|
2680
|
-
type: "tool_result",
|
|
2681
|
-
toolUseId: "tu-1",
|
|
2682
|
-
content: "content",
|
|
2683
|
-
isError: false,
|
|
2684
|
-
});
|
|
2685
|
-
onEvent({
|
|
2686
|
-
type: "message_complete",
|
|
2687
|
-
message: {
|
|
2688
|
-
role: "assistant",
|
|
2689
|
-
content: [{ type: "text", text: "done" }],
|
|
2690
|
-
},
|
|
2691
|
-
});
|
|
2692
|
-
onEvent({
|
|
2693
|
-
type: "usage",
|
|
2694
|
-
inputTokens: 100,
|
|
2695
|
-
outputTokens: 50,
|
|
2696
|
-
model: "test-model",
|
|
2697
|
-
providerDurationMs: 100,
|
|
2698
|
-
});
|
|
2699
|
-
if (options?.onCheckpoint) {
|
|
2700
|
-
await options.onCheckpoint({
|
|
2701
|
-
turnIndex: 0,
|
|
2702
|
-
toolCount: 1,
|
|
2703
|
-
hasToolUse: true,
|
|
2704
|
-
history: messages,
|
|
2705
|
-
});
|
|
2706
|
-
}
|
|
2707
|
-
return [
|
|
2708
|
-
...messages,
|
|
2045
|
+
// The tool turn reaches a checkpoint, but with handoff disabled the loop
|
|
2046
|
+
// continues to the next turn and completes normally.
|
|
2047
|
+
const ctx = makeCtx({
|
|
2048
|
+
providerResponses: [
|
|
2049
|
+
toolUseResponse("tu-1", "file_read", {}),
|
|
2050
|
+
textResponse("done"),
|
|
2051
|
+
],
|
|
2052
|
+
loopTools: [
|
|
2709
2053
|
{
|
|
2710
|
-
|
|
2711
|
-
|
|
2054
|
+
name: "file_read",
|
|
2055
|
+
description: "Read a file",
|
|
2056
|
+
input_schema: { type: "object", properties: {} },
|
|
2712
2057
|
},
|
|
2713
|
-
]
|
|
2714
|
-
|
|
2715
|
-
|
|
2716
|
-
const ctx = makeCtx({
|
|
2717
|
-
agentLoopRun,
|
|
2058
|
+
],
|
|
2059
|
+
toolExecutor: async () => ({ content: "content", isError: false }),
|
|
2718
2060
|
canHandoffAtCheckpoint: () => false,
|
|
2719
2061
|
} as unknown as Partial<AgentLoopConversationContext>);
|
|
2720
2062
|
|
|
@@ -2736,36 +2078,18 @@ describe("session-agent-loop", () => {
|
|
|
2736
2078
|
const events: ServerMessage[] = [];
|
|
2737
2079
|
const abortController = new AbortController();
|
|
2738
2080
|
|
|
2739
|
-
|
|
2740
|
-
|
|
2741
|
-
|
|
2742
|
-
|
|
2743
|
-
|
|
2744
|
-
type: "
|
|
2745
|
-
|
|
2746
|
-
|
|
2747
|
-
|
|
2748
|
-
},
|
|
2749
|
-
});
|
|
2750
|
-
onEvent({
|
|
2751
|
-
type: "usage",
|
|
2752
|
-
inputTokens: 100,
|
|
2753
|
-
outputTokens: 50,
|
|
2754
|
-
model: "test-model",
|
|
2755
|
-
providerDurationMs: 100,
|
|
2756
|
-
});
|
|
2757
|
-
// Simulate abort after processing
|
|
2758
|
-
abortController.abort();
|
|
2759
|
-
return [
|
|
2760
|
-
...messages,
|
|
2761
|
-
{
|
|
2762
|
-
role: "assistant" as const,
|
|
2763
|
-
content: [{ type: "text", text: "partial" }] as ContentBlock[],
|
|
2764
|
-
},
|
|
2765
|
-
];
|
|
2081
|
+
// The provider completes its response but the user cancels mid-turn, so
|
|
2082
|
+
// the orchestrator observes the aborted signal once the loop returns.
|
|
2083
|
+
const provider: Provider = {
|
|
2084
|
+
name: "mock",
|
|
2085
|
+
async sendMessage(_messages, options) {
|
|
2086
|
+
options?.onEvent?.({ type: "text_delta", text: "partial" });
|
|
2087
|
+
abortController.abort();
|
|
2088
|
+
return textResponse("partial");
|
|
2089
|
+
},
|
|
2766
2090
|
};
|
|
2767
2091
|
|
|
2768
|
-
const ctx = makeCtx({
|
|
2092
|
+
const ctx = makeCtx({ loopProvider: provider, abortController });
|
|
2769
2093
|
await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
|
|
2770
2094
|
|
|
2771
2095
|
const cancelled = events.find((e) => e.type === "generation_cancelled");
|
|
@@ -2776,13 +2100,16 @@ describe("session-agent-loop", () => {
|
|
|
2776
2100
|
const events: ServerMessage[] = [];
|
|
2777
2101
|
const abortController = new AbortController();
|
|
2778
2102
|
|
|
2779
|
-
|
|
2780
|
-
|
|
2781
|
-
|
|
2782
|
-
|
|
2103
|
+
// The provider rejects with an AbortError after the user cancels.
|
|
2104
|
+
const provider: Provider = {
|
|
2105
|
+
name: "mock",
|
|
2106
|
+
async sendMessage() {
|
|
2107
|
+
abortController.abort();
|
|
2108
|
+
throw new DOMException("The operation was aborted", "AbortError");
|
|
2109
|
+
},
|
|
2783
2110
|
};
|
|
2784
2111
|
|
|
2785
|
-
const ctx = makeCtx({
|
|
2112
|
+
const ctx = makeCtx({ loopProvider: provider, abortController });
|
|
2786
2113
|
await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
|
|
2787
2114
|
|
|
2788
2115
|
const cancelled = events.find((e) => e.type === "generation_cancelled");
|
|
@@ -2799,36 +2126,17 @@ describe("session-agent-loop", () => {
|
|
|
2799
2126
|
const abortController = new AbortController();
|
|
2800
2127
|
resolveAssistantAttachmentsMock.mockClear();
|
|
2801
2128
|
|
|
2802
|
-
|
|
2803
|
-
|
|
2804
|
-
|
|
2805
|
-
|
|
2806
|
-
|
|
2807
|
-
|
|
2808
|
-
|
|
2809
|
-
|
|
2810
|
-
content: [{ type: "text", text: "partial" }],
|
|
2811
|
-
},
|
|
2812
|
-
});
|
|
2813
|
-
onEvent({
|
|
2814
|
-
type: "usage",
|
|
2815
|
-
inputTokens: 100,
|
|
2816
|
-
outputTokens: 50,
|
|
2817
|
-
model: "test-model",
|
|
2818
|
-
providerDurationMs: 100,
|
|
2819
|
-
});
|
|
2820
|
-
// Simulate abort after processing
|
|
2821
|
-
abortController.abort();
|
|
2822
|
-
return [
|
|
2823
|
-
...messages,
|
|
2824
|
-
{
|
|
2825
|
-
role: "assistant" as const,
|
|
2826
|
-
content: [{ type: "text", text: "partial" }] as ContentBlock[],
|
|
2827
|
-
},
|
|
2828
|
-
];
|
|
2129
|
+
// The provider completes its response but the user cancels mid-turn.
|
|
2130
|
+
const provider: Provider = {
|
|
2131
|
+
name: "mock",
|
|
2132
|
+
async sendMessage(_messages, options) {
|
|
2133
|
+
options?.onEvent?.({ type: "text_delta", text: "partial" });
|
|
2134
|
+
abortController.abort();
|
|
2135
|
+
return textResponse("partial");
|
|
2136
|
+
},
|
|
2829
2137
|
};
|
|
2830
2138
|
|
|
2831
|
-
const ctx = makeCtx({
|
|
2139
|
+
const ctx = makeCtx({ loopProvider: provider, abortController });
|
|
2832
2140
|
await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
|
|
2833
2141
|
|
|
2834
2142
|
const cancelled = events.find((e) => e.type === "generation_cancelled");
|
|
@@ -2840,96 +2148,50 @@ describe("session-agent-loop", () => {
|
|
|
2840
2148
|
|
|
2841
2149
|
describe("finally block cleanup", () => {
|
|
2842
2150
|
test("increments turnCount after successful run", async () => {
|
|
2843
|
-
|
|
2844
|
-
|
|
2845
|
-
// Prime the assistant row anchor — production code emits this from
|
|
2846
|
-
// `AgentLoop.run` just before `provider.sendMessage`.
|
|
2847
|
-
await onEvent({ type: "llm_call_started" });
|
|
2848
|
-
onEvent({
|
|
2849
|
-
type: "message_complete",
|
|
2850
|
-
message: {
|
|
2851
|
-
role: "assistant",
|
|
2852
|
-
content: [{ type: "text", text: "hi" }],
|
|
2853
|
-
},
|
|
2854
|
-
});
|
|
2855
|
-
onEvent({
|
|
2856
|
-
type: "usage",
|
|
2857
|
-
inputTokens: 10,
|
|
2858
|
-
outputTokens: 5,
|
|
2859
|
-
model: "test",
|
|
2860
|
-
providerDurationMs: 50,
|
|
2861
|
-
});
|
|
2862
|
-
return [
|
|
2863
|
-
...messages,
|
|
2864
|
-
{
|
|
2865
|
-
role: "assistant" as const,
|
|
2866
|
-
content: [{ type: "text", text: "hi" }] as ContentBlock[],
|
|
2867
|
-
},
|
|
2868
|
-
];
|
|
2869
|
-
},
|
|
2870
|
-
});
|
|
2151
|
+
// GIVEN a real loop that answers in a single text turn
|
|
2152
|
+
const ctx = makeCtx({ providerResponses: [textResponse("hi")] });
|
|
2871
2153
|
expect(ctx.turnCount).toBe(0);
|
|
2872
2154
|
|
|
2155
|
+
// WHEN the orchestrator runs the turn to completion
|
|
2873
2156
|
await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
|
|
2874
2157
|
|
|
2158
|
+
// THEN the finally block increments the turn count
|
|
2875
2159
|
expect(ctx.turnCount).toBe(1);
|
|
2876
2160
|
});
|
|
2877
2161
|
|
|
2878
2162
|
test("clears processing state and abort controller", async () => {
|
|
2879
|
-
|
|
2880
|
-
|
|
2881
|
-
// Prime the assistant row anchor — production code emits this from
|
|
2882
|
-
// `AgentLoop.run` just before `provider.sendMessage`.
|
|
2883
|
-
await onEvent({ type: "llm_call_started" });
|
|
2884
|
-
onEvent({
|
|
2885
|
-
type: "message_complete",
|
|
2886
|
-
message: {
|
|
2887
|
-
role: "assistant",
|
|
2888
|
-
content: [{ type: "text", text: "hi" }],
|
|
2889
|
-
},
|
|
2890
|
-
});
|
|
2891
|
-
onEvent({
|
|
2892
|
-
type: "usage",
|
|
2893
|
-
inputTokens: 10,
|
|
2894
|
-
outputTokens: 5,
|
|
2895
|
-
model: "test",
|
|
2896
|
-
providerDurationMs: 50,
|
|
2897
|
-
});
|
|
2898
|
-
return [
|
|
2899
|
-
...messages,
|
|
2900
|
-
{
|
|
2901
|
-
role: "assistant" as const,
|
|
2902
|
-
content: [{ type: "text", text: "hi" }] as ContentBlock[],
|
|
2903
|
-
},
|
|
2904
|
-
];
|
|
2905
|
-
},
|
|
2906
|
-
});
|
|
2163
|
+
// GIVEN a real loop that answers in a single text turn
|
|
2164
|
+
const ctx = makeCtx({ providerResponses: [textResponse("hi")] });
|
|
2907
2165
|
|
|
2166
|
+
// WHEN the orchestrator runs the turn to completion
|
|
2908
2167
|
await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
|
|
2909
2168
|
|
|
2910
|
-
|
|
2169
|
+
// THEN the finally block clears all per-turn processing state
|
|
2170
|
+
expect(ctx.isProcessing()).toBe(false);
|
|
2911
2171
|
expect(ctx.abortController).toBeNull();
|
|
2912
2172
|
expect(ctx.currentRequestId).toBeUndefined();
|
|
2913
2173
|
expect(ctx.commandIntent).toBeUndefined();
|
|
2914
2174
|
});
|
|
2915
2175
|
|
|
2916
|
-
test("clears state
|
|
2176
|
+
test("clears state and surfaces a processing error when the provider call fails", async () => {
|
|
2177
|
+
// GIVEN a real loop whose provider rejects with an unexpected error
|
|
2917
2178
|
const events: ServerMessage[] = [];
|
|
2918
2179
|
const ctx = makeCtx({
|
|
2919
|
-
|
|
2920
|
-
|
|
2921
|
-
|
|
2180
|
+
loopProvider: {
|
|
2181
|
+
name: "mock-provider",
|
|
2182
|
+
async sendMessage() {
|
|
2183
|
+
throw new Error("unexpected crash");
|
|
2184
|
+
},
|
|
2185
|
+
} as unknown as Provider,
|
|
2922
2186
|
});
|
|
2923
2187
|
|
|
2188
|
+
// WHEN the orchestrator runs the turn
|
|
2924
2189
|
await runAgentLoopImpl(ctx, "hi", "msg-1", (msg) => events.push(msg));
|
|
2925
2190
|
|
|
2926
|
-
|
|
2191
|
+
// THEN the finally block clears per-turn state and the failure is
|
|
2192
|
+
// surfaced as a processing-failed conversation error
|
|
2193
|
+
expect(ctx.isProcessing()).toBe(false);
|
|
2927
2194
|
expect(ctx.abortController).toBeNull();
|
|
2928
|
-
expect(events.find((event) => event.type === "error")).toMatchObject({
|
|
2929
|
-
type: "error",
|
|
2930
|
-
code: "CONVERSATION_PROCESSING_FAILED",
|
|
2931
|
-
errorCategory: "processing_failed",
|
|
2932
|
-
});
|
|
2933
2195
|
expect(
|
|
2934
2196
|
events.find((event) => event.type === "conversation_error"),
|
|
2935
2197
|
).toMatchObject({
|
|
@@ -2940,46 +2202,19 @@ describe("session-agent-loop", () => {
|
|
|
2940
2202
|
});
|
|
2941
2203
|
|
|
2942
2204
|
test("drains queue after completion", async () => {
|
|
2205
|
+
// GIVEN a real loop that answers in a single text turn
|
|
2943
2206
|
let drainReason: string | undefined;
|
|
2944
2207
|
const ctx = makeCtx({
|
|
2945
|
-
|
|
2946
|
-
messages: Message[],
|
|
2947
|
-
onEvent: (event: AgentEvent) => void | Promise<void>,
|
|
2948
|
-
) => {
|
|
2949
|
-
// Prime the assistant row anchor — production code emits this from
|
|
2950
|
-
// `AgentLoop.run` just before `provider.sendMessage`. Must be
|
|
2951
|
-
// awaited so the assistant row is reserved before message_complete
|
|
2952
|
-
// tries to write into it.
|
|
2953
|
-
await onEvent({ type: "llm_call_started" });
|
|
2954
|
-
onEvent({
|
|
2955
|
-
type: "message_complete",
|
|
2956
|
-
message: {
|
|
2957
|
-
role: "assistant",
|
|
2958
|
-
content: [{ type: "text", text: "ok" }],
|
|
2959
|
-
},
|
|
2960
|
-
});
|
|
2961
|
-
onEvent({
|
|
2962
|
-
type: "usage",
|
|
2963
|
-
inputTokens: 10,
|
|
2964
|
-
outputTokens: 5,
|
|
2965
|
-
model: "test",
|
|
2966
|
-
providerDurationMs: 50,
|
|
2967
|
-
});
|
|
2968
|
-
return [
|
|
2969
|
-
...messages,
|
|
2970
|
-
{
|
|
2971
|
-
role: "assistant" as const,
|
|
2972
|
-
content: [{ type: "text", text: "ok" }] as ContentBlock[],
|
|
2973
|
-
},
|
|
2974
|
-
];
|
|
2975
|
-
},
|
|
2208
|
+
providerResponses: [textResponse("ok")],
|
|
2976
2209
|
drainQueue: (reason: string) => {
|
|
2977
2210
|
drainReason = reason;
|
|
2978
2211
|
},
|
|
2979
2212
|
} as unknown as Partial<AgentLoopConversationContext>);
|
|
2980
2213
|
|
|
2214
|
+
// WHEN the orchestrator runs the turn to completion
|
|
2981
2215
|
await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
|
|
2982
2216
|
|
|
2217
|
+
// THEN the queue is drained with the loop-complete reason
|
|
2983
2218
|
expect(drainReason).toBe("loop_complete");
|
|
2984
2219
|
});
|
|
2985
2220
|
});
|
|
@@ -3098,7 +2333,7 @@ describe("session-agent-loop", () => {
|
|
|
3098
2333
|
isUserMessage: true,
|
|
3099
2334
|
});
|
|
3100
2335
|
|
|
3101
|
-
expect(ctx.
|
|
2336
|
+
expect(ctx.isProcessing()).toBe(false);
|
|
3102
2337
|
expect(ctx.abortController).toBeNull();
|
|
3103
2338
|
expect(ctx.currentRequestId).toBeUndefined();
|
|
3104
2339
|
});
|
|
@@ -3208,24 +2443,17 @@ describe("session-agent-loop", () => {
|
|
|
3208
2443
|
test("synthesizes error assistant message when provider returns no response", async () => {
|
|
3209
2444
|
const events: ServerMessage[] = [];
|
|
3210
2445
|
|
|
3211
|
-
|
|
3212
|
-
|
|
3213
|
-
|
|
3214
|
-
|
|
3215
|
-
|
|
3216
|
-
|
|
3217
|
-
|
|
3218
|
-
|
|
3219
|
-
|
|
3220
|
-
|
|
3221
|
-
|
|
3222
|
-
providerDurationMs: 50,
|
|
3223
|
-
});
|
|
3224
|
-
// Return same messages (no assistant message appended)
|
|
3225
|
-
return messages;
|
|
3226
|
-
};
|
|
3227
|
-
|
|
3228
|
-
const ctx = makeCtx({ agentLoopRun });
|
|
2446
|
+
// GIVEN a real loop whose provider rejects with a generic error
|
|
2447
|
+
// (non-ordering, non-context-too-large) so the loop emits `error` and
|
|
2448
|
+
// the orchestrator sets `providerErrorUserMessage`.
|
|
2449
|
+
const ctx = makeCtx({
|
|
2450
|
+
loopProvider: {
|
|
2451
|
+
name: "mock-provider",
|
|
2452
|
+
async sendMessage() {
|
|
2453
|
+
throw new Error("Internal processing failure");
|
|
2454
|
+
},
|
|
2455
|
+
} as unknown as Provider,
|
|
2456
|
+
});
|
|
3229
2457
|
await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
|
|
3230
2458
|
|
|
3231
2459
|
// The error should be sent as a conversation_error (not as an
|
|
@@ -3249,26 +2477,19 @@ describe("session-agent-loop", () => {
|
|
|
3249
2477
|
// sweep would wrong-attach this row to the wrong assistant message.
|
|
3250
2478
|
const events: ServerMessage[] = [];
|
|
3251
2479
|
|
|
3252
|
-
|
|
3253
|
-
|
|
3254
|
-
|
|
3255
|
-
|
|
3256
|
-
|
|
3257
|
-
|
|
3258
|
-
|
|
3259
|
-
|
|
3260
|
-
|
|
3261
|
-
|
|
3262
|
-
|
|
3263
|
-
|
|
3264
|
-
|
|
3265
|
-
error: new Error("upstream 500"),
|
|
3266
|
-
});
|
|
3267
|
-
// Provider returned no assistant content — same messages back.
|
|
3268
|
-
return messages;
|
|
3269
|
-
};
|
|
3270
|
-
|
|
3271
|
-
const ctx = makeCtx({ agentLoopRun });
|
|
2480
|
+
// GIVEN a real loop whose provider rejects: the loop emits
|
|
2481
|
+
// `provider_error` (writing an `llm_request_logs` row with
|
|
2482
|
+
// messageId=null — the orphan we link) then `error` (which sets
|
|
2483
|
+
// `state.providerErrorUserMessage`, activating the synthetic-message
|
|
2484
|
+
// branch below the loop).
|
|
2485
|
+
const ctx = makeCtx({
|
|
2486
|
+
loopProvider: {
|
|
2487
|
+
name: "mock-provider",
|
|
2488
|
+
async sendMessage() {
|
|
2489
|
+
throw new Error("upstream 500");
|
|
2490
|
+
},
|
|
2491
|
+
} as unknown as Provider,
|
|
2492
|
+
});
|
|
3272
2493
|
await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
|
|
3273
2494
|
|
|
3274
2495
|
// The orphan was written with messageId=undefined.
|
|
@@ -3315,39 +2536,10 @@ describe("session-agent-loop", () => {
|
|
|
3315
2536
|
// observe the sync-invalidation publish path on the same turn.
|
|
3316
2537
|
projectAssistantMessageMock.mockImplementationOnce(() => true);
|
|
3317
2538
|
|
|
3318
|
-
|
|
3319
|
-
|
|
3320
|
-
|
|
3321
|
-
|
|
3322
|
-
// or before the loop returns. Without the await the projector's
|
|
3323
|
-
// synchronous call still races against the test's assertion phase
|
|
3324
|
-
// because the indexer's `await` yields microtasks.
|
|
3325
|
-
await onEvent({
|
|
3326
|
-
type: "message_complete",
|
|
3327
|
-
message: {
|
|
3328
|
-
role: "assistant",
|
|
3329
|
-
content: [{ type: "text", text: "indexed reply" }],
|
|
3330
|
-
},
|
|
3331
|
-
});
|
|
3332
|
-
onEvent({
|
|
3333
|
-
type: "usage",
|
|
3334
|
-
inputTokens: 10,
|
|
3335
|
-
outputTokens: 5,
|
|
3336
|
-
model: "test",
|
|
3337
|
-
providerDurationMs: 50,
|
|
3338
|
-
});
|
|
3339
|
-
return [
|
|
3340
|
-
...messages,
|
|
3341
|
-
{
|
|
3342
|
-
role: "assistant" as const,
|
|
3343
|
-
content: [
|
|
3344
|
-
{ type: "text", text: "indexed reply" },
|
|
3345
|
-
] as ContentBlock[],
|
|
3346
|
-
},
|
|
3347
|
-
];
|
|
3348
|
-
};
|
|
3349
|
-
|
|
3350
|
-
const ctx = makeCtx({ agentLoopRun });
|
|
2539
|
+
// GIVEN a real loop that answers with a single finalized assistant turn
|
|
2540
|
+
const ctx = makeCtx({
|
|
2541
|
+
providerResponses: [textResponse("indexed reply")],
|
|
2542
|
+
});
|
|
3351
2543
|
await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
|
|
3352
2544
|
|
|
3353
2545
|
// Indexer fired with the reserved row's id + the finalized content.
|
|
@@ -3410,34 +2602,8 @@ describe("session-agent-loop", () => {
|
|
|
3410
2602
|
metadata: null,
|
|
3411
2603
|
};
|
|
3412
2604
|
|
|
3413
|
-
|
|
3414
|
-
|
|
3415
|
-
// See sibling test — `message_complete` must be awaited so the
|
|
3416
|
-
// projector call lands before the assertion phase.
|
|
3417
|
-
await onEvent({
|
|
3418
|
-
type: "message_complete",
|
|
3419
|
-
message: {
|
|
3420
|
-
role: "assistant",
|
|
3421
|
-
content: [{ type: "text", text: "quiet" }],
|
|
3422
|
-
},
|
|
3423
|
-
});
|
|
3424
|
-
onEvent({
|
|
3425
|
-
type: "usage",
|
|
3426
|
-
inputTokens: 1,
|
|
3427
|
-
outputTokens: 1,
|
|
3428
|
-
model: "test",
|
|
3429
|
-
providerDurationMs: 1,
|
|
3430
|
-
});
|
|
3431
|
-
return [
|
|
3432
|
-
...messages,
|
|
3433
|
-
{
|
|
3434
|
-
role: "assistant" as const,
|
|
3435
|
-
content: [{ type: "text", text: "quiet" }] as ContentBlock[],
|
|
3436
|
-
},
|
|
3437
|
-
];
|
|
3438
|
-
};
|
|
3439
|
-
|
|
3440
|
-
const ctx = makeCtx({ agentLoopRun });
|
|
2605
|
+
// GIVEN a real loop that answers with a single finalized assistant turn
|
|
2606
|
+
const ctx = makeCtx({ providerResponses: [textResponse("quiet")] });
|
|
3441
2607
|
await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
|
|
3442
2608
|
|
|
3443
2609
|
expect(projectAssistantMessageMock).toHaveBeenCalledTimes(1);
|
|
@@ -3462,40 +2628,33 @@ describe("session-agent-loop", () => {
|
|
|
3462
2628
|
// Indexer/projector mocks default to no-op; no finalized row in this
|
|
3463
2629
|
// test, so `mockMessageById` stays null.
|
|
3464
2630
|
|
|
3465
|
-
|
|
3466
|
-
|
|
3467
|
-
|
|
3468
|
-
|
|
3469
|
-
|
|
3470
|
-
|
|
3471
|
-
|
|
3472
|
-
|
|
3473
|
-
|
|
3474
|
-
|
|
3475
|
-
|
|
3476
|
-
|
|
3477
|
-
content: [{ type: "text", text: "retry succeeded" }],
|
|
3478
|
-
},
|
|
3479
|
-
});
|
|
3480
|
-
onEvent({
|
|
3481
|
-
type: "usage",
|
|
3482
|
-
inputTokens: 5,
|
|
3483
|
-
outputTokens: 3,
|
|
3484
|
-
model: "test",
|
|
3485
|
-
providerDurationMs: 25,
|
|
3486
|
-
});
|
|
3487
|
-
return [
|
|
3488
|
-
...messages,
|
|
3489
|
-
{
|
|
3490
|
-
role: "assistant" as const,
|
|
3491
|
-
content: [
|
|
3492
|
-
{ type: "text", text: "retry succeeded" },
|
|
3493
|
-
] as ContentBlock[],
|
|
3494
|
-
},
|
|
3495
|
-
];
|
|
3496
|
-
};
|
|
2631
|
+
// A single reducer tier converges the oversized context so the
|
|
2632
|
+
// orchestrator re-enters the loop after the first call fails.
|
|
2633
|
+
mockReducerStepFn = (msgs: Message[]) => ({
|
|
2634
|
+
messages: msgs,
|
|
2635
|
+
tier: "forced_compaction",
|
|
2636
|
+
state: {
|
|
2637
|
+
appliedTiers: ["forced_compaction"],
|
|
2638
|
+
injectionMode: "full",
|
|
2639
|
+
exhausted: false,
|
|
2640
|
+
},
|
|
2641
|
+
estimatedTokens: 5000,
|
|
2642
|
+
});
|
|
3497
2643
|
|
|
3498
|
-
|
|
2644
|
+
// GIVEN a real loop whose first call rejects with context-too-large
|
|
2645
|
+
// (reserving msg-strand-A but never finalizing it), then recovers via
|
|
2646
|
+
// convergence on re-entry. The re-entry's `llm_call_started` must
|
|
2647
|
+
// delete the stranded msg-strand-A before reserving msg-strand-B.
|
|
2648
|
+
const ctx = makeCtx({
|
|
2649
|
+
providerResponses: [
|
|
2650
|
+
new Error("context_length_exceeded"),
|
|
2651
|
+
textResponse("retry succeeded"),
|
|
2652
|
+
],
|
|
2653
|
+
contextWindowManager: {
|
|
2654
|
+
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
2655
|
+
maybeCompact: async () => ({ compacted: false }),
|
|
2656
|
+
} as unknown as AgentLoopConversationContext["contextWindowManager"],
|
|
2657
|
+
});
|
|
3499
2658
|
await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
|
|
3500
2659
|
|
|
3501
2660
|
// Exactly one delete fires — for msg-strand-A, before the second
|
|
@@ -3523,27 +2682,20 @@ describe("session-agent-loop", () => {
|
|
|
3523
2682
|
id: "msg-orphaned-reservation",
|
|
3524
2683
|
}));
|
|
3525
2684
|
|
|
3526
|
-
|
|
3527
|
-
|
|
3528
|
-
|
|
3529
|
-
|
|
3530
|
-
|
|
3531
|
-
|
|
3532
|
-
|
|
3533
|
-
|
|
3534
|
-
|
|
3535
|
-
|
|
3536
|
-
|
|
3537
|
-
|
|
3538
|
-
|
|
3539
|
-
|
|
3540
|
-
});
|
|
3541
|
-
// No assistant message in the result — the synthetic-error branch
|
|
3542
|
-
// below the agent loop fires.
|
|
3543
|
-
return messages;
|
|
3544
|
-
};
|
|
3545
|
-
|
|
3546
|
-
const ctx = makeCtx({ agentLoopRun });
|
|
2685
|
+
// GIVEN a real loop that reserves an assistant row at
|
|
2686
|
+
// `llm_call_started`, then whose provider rejects: the loop emits
|
|
2687
|
+
// `provider_error` (writing the llm_request_log row) and `error`
|
|
2688
|
+
// (arming `state.providerErrorUserMessage`), exiting with no
|
|
2689
|
+
// `message_complete` so the synthetic-error branch below the loop
|
|
2690
|
+
// fires.
|
|
2691
|
+
const ctx = makeCtx({
|
|
2692
|
+
loopProvider: {
|
|
2693
|
+
name: "mock-provider",
|
|
2694
|
+
async sendMessage() {
|
|
2695
|
+
throw new Error("upstream 500");
|
|
2696
|
+
},
|
|
2697
|
+
} as unknown as Provider,
|
|
2698
|
+
});
|
|
3547
2699
|
await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
|
|
3548
2700
|
|
|
3549
2701
|
// The orphan was deleted exactly once, before the synthetic error
|
|
@@ -3599,40 +2751,23 @@ describe("session-agent-loop", () => {
|
|
|
3599
2751
|
metadata: null,
|
|
3600
2752
|
};
|
|
3601
2753
|
|
|
3602
|
-
|
|
3603
|
-
|
|
3604
|
-
|
|
3605
|
-
|
|
3606
|
-
|
|
3607
|
-
|
|
3608
|
-
|
|
3609
|
-
|
|
3610
|
-
|
|
3611
|
-
|
|
3612
|
-
|
|
3613
|
-
|
|
3614
|
-
content: [{ type: "text", text: "Hello, world." }],
|
|
3615
|
-
},
|
|
3616
|
-
});
|
|
3617
|
-
onEvent({
|
|
3618
|
-
type: "usage",
|
|
3619
|
-
inputTokens: 10,
|
|
3620
|
-
outputTokens: 5,
|
|
3621
|
-
model: "test",
|
|
3622
|
-
providerDurationMs: 50,
|
|
3623
|
-
});
|
|
3624
|
-
return [
|
|
3625
|
-
...messages,
|
|
3626
|
-
{
|
|
3627
|
-
role: "assistant" as const,
|
|
3628
|
-
content: [
|
|
3629
|
-
{ type: "text", text: "Hello, world." },
|
|
3630
|
-
] as ContentBlock[],
|
|
2754
|
+
// GIVEN a real loop whose provider streams two small deltas (each under
|
|
2755
|
+
// the 1024-char size gate) then holds the turn open past the 250ms
|
|
2756
|
+
// debounce window before completing, so a single debounced partial
|
|
2757
|
+
// flush lands before `message_complete`.
|
|
2758
|
+
const ctx = makeCtx({
|
|
2759
|
+
loopProvider: {
|
|
2760
|
+
name: "mock-provider",
|
|
2761
|
+
async sendMessage(_messages, options) {
|
|
2762
|
+
options?.onEvent?.({ type: "text_delta", text: "Hello, " });
|
|
2763
|
+
options?.onEvent?.({ type: "text_delta", text: "world." });
|
|
2764
|
+
await new Promise((resolve) => setTimeout(resolve, 1100));
|
|
2765
|
+
return textResponse("Hello, world.");
|
|
3631
2766
|
},
|
|
3632
|
-
|
|
3633
|
-
};
|
|
2767
|
+
},
|
|
2768
|
+
});
|
|
3634
2769
|
|
|
3635
|
-
|
|
2770
|
+
// WHEN the orchestrator runs the turn to completion
|
|
3636
2771
|
await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
|
|
3637
2772
|
|
|
3638
2773
|
// Exactly two `updateContent` calls land:
|
|
@@ -3668,70 +2803,38 @@ describe("session-agent-loop", () => {
|
|
|
3668
2803
|
metadata: null,
|
|
3669
2804
|
};
|
|
3670
2805
|
|
|
3671
|
-
|
|
3672
|
-
|
|
3673
|
-
|
|
3674
|
-
|
|
3675
|
-
|
|
3676
|
-
|
|
3677
|
-
|
|
3678
|
-
|
|
3679
|
-
|
|
3680
|
-
input: { path: "/foo" },
|
|
3681
|
-
});
|
|
3682
|
-
// Yield a microtask so any (incorrectly) fire-and-forget
|
|
3683
|
-
// pipeline call has a chance to land before message_complete.
|
|
3684
|
-
await new Promise((resolve) => setImmediate(resolve));
|
|
3685
|
-
onEvent({
|
|
3686
|
-
type: "tool_result",
|
|
3687
|
-
toolUseId: "tu-no-flush",
|
|
3688
|
-
content: "ok",
|
|
3689
|
-
isError: false,
|
|
3690
|
-
});
|
|
3691
|
-
await onEvent({
|
|
3692
|
-
type: "message_complete",
|
|
3693
|
-
message: {
|
|
3694
|
-
role: "assistant",
|
|
3695
|
-
content: [
|
|
3696
|
-
{
|
|
3697
|
-
type: "tool_use",
|
|
3698
|
-
id: "tu-no-flush",
|
|
3699
|
-
name: "file_read",
|
|
3700
|
-
input: { path: "/foo" },
|
|
3701
|
-
},
|
|
3702
|
-
],
|
|
3703
|
-
},
|
|
3704
|
-
});
|
|
3705
|
-
onEvent({
|
|
3706
|
-
type: "usage",
|
|
3707
|
-
inputTokens: 10,
|
|
3708
|
-
outputTokens: 5,
|
|
3709
|
-
model: "test",
|
|
3710
|
-
providerDurationMs: 50,
|
|
3711
|
-
});
|
|
3712
|
-
return [
|
|
3713
|
-
...messages,
|
|
2806
|
+
// GIVEN a real loop that runs one tool turn — the loop emits `tool_use`
|
|
2807
|
+
// strictly AFTER `message_complete` — and then answers with a final
|
|
2808
|
+
// text turn. The tool executor returns immediately.
|
|
2809
|
+
const ctx = makeCtx({
|
|
2810
|
+
providerResponses: [
|
|
2811
|
+
toolUseResponse("tu-no-flush", "file_read", { path: "/foo" }),
|
|
2812
|
+
textResponse("done"),
|
|
2813
|
+
],
|
|
2814
|
+
loopTools: [
|
|
3714
2815
|
{
|
|
3715
|
-
|
|
3716
|
-
|
|
3717
|
-
|
|
3718
|
-
|
|
3719
|
-
|
|
3720
|
-
|
|
3721
|
-
input: { path: "/foo" },
|
|
3722
|
-
},
|
|
3723
|
-
] as ContentBlock[],
|
|
2816
|
+
name: "file_read",
|
|
2817
|
+
description: "Read a file",
|
|
2818
|
+
input_schema: {
|
|
2819
|
+
type: "object",
|
|
2820
|
+
properties: { path: { type: "string" } },
|
|
2821
|
+
},
|
|
3724
2822
|
},
|
|
3725
|
-
]
|
|
3726
|
-
|
|
2823
|
+
],
|
|
2824
|
+
toolExecutor: async () => ({ content: "ok", isError: false }),
|
|
2825
|
+
});
|
|
3727
2826
|
|
|
3728
|
-
|
|
2827
|
+
// WHEN the orchestrator runs the turn to completion
|
|
3729
2828
|
await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
|
|
3730
2829
|
|
|
3731
|
-
//
|
|
3732
|
-
//
|
|
3733
|
-
//
|
|
3734
|
-
|
|
2830
|
+
// Four authoritative writes land and no stray partial flush:
|
|
2831
|
+
// - one final flush per `message_complete` (the tool turn and the final
|
|
2832
|
+
// text turn), plus
|
|
2833
|
+
// - two grouped tool-result user-row writes (persist-on-arrival and the
|
|
2834
|
+
// turn-boundary finalize).
|
|
2835
|
+
// `handleToolUse` contributes no partial flush of its own; one would make
|
|
2836
|
+
// this 5. That stray flush is the regression this test guards against.
|
|
2837
|
+
expect(updateMessageContentMock).toHaveBeenCalledTimes(4);
|
|
3735
2838
|
});
|
|
3736
2839
|
|
|
3737
2840
|
test("handleMessageComplete clears any pending debounce timer before the final flush", async () => {
|
|
@@ -3744,45 +2847,53 @@ describe("session-agent-loop", () => {
|
|
|
3744
2847
|
metadata: null,
|
|
3745
2848
|
};
|
|
3746
2849
|
|
|
3747
|
-
|
|
3748
|
-
|
|
3749
|
-
|
|
3750
|
-
|
|
3751
|
-
|
|
3752
|
-
|
|
3753
|
-
|
|
3754
|
-
|
|
3755
|
-
|
|
3756
|
-
|
|
3757
|
-
|
|
2850
|
+
// GIVEN a real loop whose first turn streams a short delta (scheduling a
|
|
2851
|
+
// debounce timer) and completes as a tool turn — so `message_complete`
|
|
2852
|
+
// arrives before the 250ms timer and clears it. The tool executor then
|
|
2853
|
+
// holds the loop open well past the original debounce window, proving a
|
|
2854
|
+
// late timer does NOT fire a stray partial flush, before a final text
|
|
2855
|
+
// turn ends the run.
|
|
2856
|
+
const ctx = makeCtx({
|
|
2857
|
+
providerResponses: [
|
|
2858
|
+
{
|
|
2859
|
+
content: [
|
|
2860
|
+
{ type: "text", text: "Quick reply." },
|
|
2861
|
+
{
|
|
2862
|
+
type: "tool_use",
|
|
2863
|
+
id: "tu-keep-alive",
|
|
2864
|
+
name: "file_read",
|
|
2865
|
+
input: {},
|
|
2866
|
+
},
|
|
2867
|
+
],
|
|
2868
|
+
model: "mock-model",
|
|
2869
|
+
usage: { inputTokens: 10, outputTokens: 5 },
|
|
2870
|
+
stopReason: "tool_use",
|
|
3758
2871
|
},
|
|
3759
|
-
|
|
3760
|
-
|
|
3761
|
-
|
|
3762
|
-
inputTokens: 10,
|
|
3763
|
-
outputTokens: 5,
|
|
3764
|
-
model: "test",
|
|
3765
|
-
providerDurationMs: 50,
|
|
3766
|
-
});
|
|
3767
|
-
// Wait past the original debounce window to prove a late timer
|
|
3768
|
-
// does NOT fire a stray partial flush.
|
|
3769
|
-
await new Promise((resolve) => setTimeout(resolve, 1100));
|
|
3770
|
-
return [
|
|
3771
|
-
...messages,
|
|
2872
|
+
textResponse("done"),
|
|
2873
|
+
],
|
|
2874
|
+
loopTools: [
|
|
3772
2875
|
{
|
|
3773
|
-
|
|
3774
|
-
|
|
2876
|
+
name: "file_read",
|
|
2877
|
+
description: "Read a file",
|
|
2878
|
+
input_schema: { type: "object", properties: {} },
|
|
3775
2879
|
},
|
|
3776
|
-
]
|
|
3777
|
-
|
|
2880
|
+
],
|
|
2881
|
+
toolExecutor: async () => {
|
|
2882
|
+
await new Promise((resolve) => setTimeout(resolve, 1100));
|
|
2883
|
+
return { content: "ok", isError: false };
|
|
2884
|
+
},
|
|
2885
|
+
});
|
|
3778
2886
|
|
|
3779
|
-
|
|
2887
|
+
// WHEN the orchestrator runs the turn to completion
|
|
3780
2888
|
await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
|
|
3781
2889
|
|
|
3782
|
-
//
|
|
3783
|
-
//
|
|
3784
|
-
//
|
|
3785
|
-
|
|
2890
|
+
// Four authoritative writes land: one final flush per `message_complete`
|
|
2891
|
+
// (the tool turn and the final text turn) plus two grouped tool-result
|
|
2892
|
+
// user-row writes (persist-on-arrival and the turn-boundary finalize).
|
|
2893
|
+
// The debounced partial would have fired around T+250ms — during the tool
|
|
2894
|
+
// executor's hold — but the timer-clear at the top of
|
|
2895
|
+
// `handleMessageComplete` cancels it, so no stray fifth flush appears.
|
|
2896
|
+
expect(updateMessageContentMock).toHaveBeenCalledTimes(4);
|
|
3786
2897
|
});
|
|
3787
2898
|
|
|
3788
2899
|
test("partial flushes never trigger the indexer or attention projector", async () => {
|
|
@@ -3795,54 +2906,29 @@ describe("session-agent-loop", () => {
|
|
|
3795
2906
|
metadata: null,
|
|
3796
2907
|
};
|
|
3797
2908
|
|
|
3798
|
-
|
|
3799
|
-
|
|
3800
|
-
|
|
3801
|
-
|
|
3802
|
-
|
|
3803
|
-
|
|
3804
|
-
|
|
3805
|
-
|
|
3806
|
-
|
|
3807
|
-
|
|
3808
|
-
|
|
3809
|
-
|
|
3810
|
-
|
|
3811
|
-
|
|
3812
|
-
|
|
3813
|
-
|
|
3814
|
-
indexerCallsBeforeComplete,
|
|
3815
|
-
projectorCallsBeforeComplete,
|
|
3816
|
-
];
|
|
3817
|
-
await onEvent({
|
|
3818
|
-
type: "message_complete",
|
|
3819
|
-
message: {
|
|
3820
|
-
role: "assistant",
|
|
3821
|
-
content: [{ type: "text", text: "hello world" }],
|
|
3822
|
-
},
|
|
3823
|
-
});
|
|
3824
|
-
onEvent({
|
|
3825
|
-
type: "usage",
|
|
3826
|
-
inputTokens: 10,
|
|
3827
|
-
outputTokens: 5,
|
|
3828
|
-
model: "test",
|
|
3829
|
-
providerDurationMs: 50,
|
|
3830
|
-
});
|
|
3831
|
-
return [
|
|
3832
|
-
...messages,
|
|
3833
|
-
{
|
|
3834
|
-
role: "assistant" as const,
|
|
3835
|
-
content: [{ type: "text", text: "hello world" }] as ContentBlock[],
|
|
2909
|
+
// GIVEN a real loop whose provider streams a delta then holds the turn
|
|
2910
|
+
// open past the 250ms debounce window so the partial flush lands BEFORE
|
|
2911
|
+
// `message_complete`. The indexer/projector counts are snapshotted at
|
|
2912
|
+
// that mid-turn point (after the partial flush, before completion).
|
|
2913
|
+
let snapshot: [number, number] | undefined;
|
|
2914
|
+
const ctx = makeCtx({
|
|
2915
|
+
loopProvider: {
|
|
2916
|
+
name: "mock-provider",
|
|
2917
|
+
async sendMessage(_messages, options) {
|
|
2918
|
+
options?.onEvent?.({ type: "text_delta", text: "hello world" });
|
|
2919
|
+
await new Promise((resolve) => setTimeout(resolve, 1100));
|
|
2920
|
+
snapshot = [
|
|
2921
|
+
indexMessageNowMock.mock.calls.length,
|
|
2922
|
+
projectAssistantMessageMock.mock.calls.length,
|
|
2923
|
+
];
|
|
2924
|
+
return textResponse("hello world");
|
|
3836
2925
|
},
|
|
3837
|
-
|
|
3838
|
-
};
|
|
2926
|
+
},
|
|
2927
|
+
});
|
|
3839
2928
|
|
|
3840
|
-
|
|
2929
|
+
// WHEN the orchestrator runs the turn to completion
|
|
3841
2930
|
await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
|
|
3842
2931
|
|
|
3843
|
-
const snapshot = (
|
|
3844
|
-
ctx as unknown as { __partialSnapshot?: [number, number] }
|
|
3845
|
-
).__partialSnapshot;
|
|
3846
2932
|
expect(snapshot).toBeDefined();
|
|
3847
2933
|
// Indexer + projector were both ZERO during the mid-turn partial
|
|
3848
2934
|
// flush — they only fire from `handleMessageComplete` after the
|
|
@@ -3870,35 +2956,21 @@ describe("session-agent-loop", () => {
|
|
|
3870
2956
|
const ghToken = "ghp_" + "a".repeat(36);
|
|
3871
2957
|
const payload = "Here's the key: " + ghToken + " enjoy.";
|
|
3872
2958
|
|
|
3873
|
-
|
|
3874
|
-
|
|
3875
|
-
|
|
3876
|
-
|
|
3877
|
-
|
|
3878
|
-
|
|
3879
|
-
|
|
3880
|
-
|
|
3881
|
-
|
|
3882
|
-
|
|
3883
|
-
},
|
|
3884
|
-
});
|
|
3885
|
-
onEvent({
|
|
3886
|
-
type: "usage",
|
|
3887
|
-
inputTokens: 10,
|
|
3888
|
-
outputTokens: 5,
|
|
3889
|
-
model: "test",
|
|
3890
|
-
providerDurationMs: 50,
|
|
3891
|
-
});
|
|
3892
|
-
return [
|
|
3893
|
-
...messages,
|
|
3894
|
-
{
|
|
3895
|
-
role: "assistant" as const,
|
|
3896
|
-
content: [{ type: "text", text: payload }] as ContentBlock[],
|
|
2959
|
+
// GIVEN a real loop whose provider streams the PAT-bearing payload as a
|
|
2960
|
+
// delta then holds the turn open past the 250ms debounce window so the
|
|
2961
|
+
// partial flush lands before `message_complete`.
|
|
2962
|
+
const ctx = makeCtx({
|
|
2963
|
+
loopProvider: {
|
|
2964
|
+
name: "mock-provider",
|
|
2965
|
+
async sendMessage(_messages, options) {
|
|
2966
|
+
options?.onEvent?.({ type: "text_delta", text: payload });
|
|
2967
|
+
await new Promise((resolve) => setTimeout(resolve, 1100));
|
|
2968
|
+
return textResponse(payload);
|
|
3897
2969
|
},
|
|
3898
|
-
|
|
3899
|
-
};
|
|
2970
|
+
},
|
|
2971
|
+
});
|
|
3900
2972
|
|
|
3901
|
-
|
|
2973
|
+
// WHEN the orchestrator runs the turn to completion
|
|
3902
2974
|
await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
|
|
3903
2975
|
|
|
3904
2976
|
expect(updateMessageContentMock).toHaveBeenCalledTimes(2);
|
|
@@ -3922,26 +2994,21 @@ describe("session-agent-loop", () => {
|
|
|
3922
2994
|
id: "msg-orphan-with-partial",
|
|
3923
2995
|
}));
|
|
3924
2996
|
|
|
3925
|
-
|
|
3926
|
-
|
|
3927
|
-
|
|
3928
|
-
|
|
3929
|
-
|
|
3930
|
-
|
|
3931
|
-
|
|
3932
|
-
|
|
3933
|
-
|
|
3934
|
-
|
|
3935
|
-
|
|
3936
|
-
}
|
|
3937
|
-
|
|
3938
|
-
type: "error",
|
|
3939
|
-
error: new Error("upstream 500"),
|
|
3940
|
-
});
|
|
3941
|
-
return messages;
|
|
3942
|
-
};
|
|
2997
|
+
// GIVEN a real loop whose provider streams a delta — landing a debounced
|
|
2998
|
+
// partial flush on the reserved row — then rejects, so the loop emits
|
|
2999
|
+
// `provider_error` and `error` and exits with no `message_complete`.
|
|
3000
|
+
const ctx = makeCtx({
|
|
3001
|
+
loopProvider: {
|
|
3002
|
+
name: "mock-provider",
|
|
3003
|
+
async sendMessage(_messages, options) {
|
|
3004
|
+
options?.onEvent?.({ type: "text_delta", text: "hello world" });
|
|
3005
|
+
await new Promise((resolve) => setTimeout(resolve, 1100));
|
|
3006
|
+
throw new Error("upstream 500");
|
|
3007
|
+
},
|
|
3008
|
+
},
|
|
3009
|
+
});
|
|
3943
3010
|
|
|
3944
|
-
|
|
3011
|
+
// WHEN the orchestrator runs the turn
|
|
3945
3012
|
await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
|
|
3946
3013
|
|
|
3947
3014
|
// Partial flush fired exactly once (before the provider error).
|
|
@@ -4442,51 +3509,32 @@ describe("session-agent-loop", () => {
|
|
|
4442
3509
|
compactableStartIndex: 0,
|
|
4443
3510
|
};
|
|
4444
3511
|
|
|
4445
|
-
const rawMidLoopBasis: Message[] = [
|
|
4446
|
-
{
|
|
4447
|
-
role: "user",
|
|
4448
|
-
content: [{ type: "text", text: "fresh DB basis user row" }],
|
|
4449
|
-
},
|
|
4450
|
-
{
|
|
4451
|
-
role: "assistant",
|
|
4452
|
-
content: [{ type: "text", text: "partial assistant response" }],
|
|
4453
|
-
},
|
|
4454
|
-
];
|
|
4455
3512
|
const maybeCompactInputs: Message[][] = [];
|
|
4456
|
-
let runCount = 0;
|
|
4457
|
-
const agentLoopRun: AgentLoopRun = async (
|
|
4458
|
-
messages,
|
|
4459
|
-
_onEvent,
|
|
4460
|
-
options,
|
|
4461
|
-
) => {
|
|
4462
|
-
runCount++;
|
|
4463
|
-
if (runCount === 1) {
|
|
4464
|
-
// The loop reaches its mid-loop budget checkpoint with the raw
|
|
4465
|
-
// persistent basis as its in-loop history; the wrapped onCheckpoint
|
|
4466
|
-
// trips the gate and runs inline compaction over that basis.
|
|
4467
|
-
mockEstimateTokens = 90_000;
|
|
4468
|
-
const decision = await options?.onCheckpoint?.({
|
|
4469
|
-
turnIndex: 0,
|
|
4470
|
-
toolCount: 1,
|
|
4471
|
-
hasToolUse: true,
|
|
4472
|
-
history: rawMidLoopBasis,
|
|
4473
|
-
});
|
|
4474
|
-
mockEstimateTokens = 1000;
|
|
4475
|
-
if (decision !== "continue") {
|
|
4476
|
-
return rawMidLoopBasis;
|
|
4477
|
-
}
|
|
4478
|
-
}
|
|
4479
|
-
return [
|
|
4480
|
-
...messages,
|
|
4481
|
-
{
|
|
4482
|
-
role: "assistant" as const,
|
|
4483
|
-
content: [{ type: "text" as const, text: "final response" }],
|
|
4484
|
-
},
|
|
4485
|
-
];
|
|
4486
|
-
};
|
|
4487
3513
|
|
|
3514
|
+
// AND a real loop that runs one tool turn and then a final text turn.
|
|
3515
|
+
// The tool executor raises the token estimate above the mid-loop budget
|
|
3516
|
+
// threshold so the loop compacts in place at the post-tool checkpoint —
|
|
3517
|
+
// over its own in-loop history, which does not match the loaded Slack
|
|
3518
|
+
// rows.
|
|
4488
3519
|
const ctx = makeCtx({
|
|
4489
|
-
|
|
3520
|
+
providerResponses: [
|
|
3521
|
+
toolUseResponse("tu-mid-loop", "file_read", { path: "/foo" }),
|
|
3522
|
+
textResponse("final response"),
|
|
3523
|
+
],
|
|
3524
|
+
loopTools: [
|
|
3525
|
+
{
|
|
3526
|
+
name: "file_read",
|
|
3527
|
+
description: "Read a file",
|
|
3528
|
+
input_schema: {
|
|
3529
|
+
type: "object",
|
|
3530
|
+
properties: { path: { type: "string" } },
|
|
3531
|
+
},
|
|
3532
|
+
},
|
|
3533
|
+
],
|
|
3534
|
+
toolExecutor: async () => {
|
|
3535
|
+
mockEstimateTokens = 90_000;
|
|
3536
|
+
return { content: "ok", isError: false };
|
|
3537
|
+
},
|
|
4490
3538
|
channelCapabilities: {
|
|
4491
3539
|
channel: "slack",
|
|
4492
3540
|
dashboardCapable: false,
|
|
@@ -4523,6 +3571,9 @@ describe("session-agent-loop", () => {
|
|
|
4523
3571
|
summaryText: "",
|
|
4524
3572
|
};
|
|
4525
3573
|
}
|
|
3574
|
+
// The mid-loop gate compacted its in-loop basis; drop the estimate
|
|
3575
|
+
// back under budget so the post-compaction provider call proceeds.
|
|
3576
|
+
mockEstimateTokens = 1000;
|
|
4526
3577
|
return {
|
|
4527
3578
|
compacted: true,
|
|
4528
3579
|
messages: [
|
|
@@ -4551,7 +3602,9 @@ describe("session-agent-loop", () => {
|
|
|
4551
3602
|
await runAgentLoopImpl(ctx, "next reply", "user-msg-mid-loop", () => {});
|
|
4552
3603
|
|
|
4553
3604
|
expect(maybeCompactInputs[0]).toBe(renderedSlackMessages);
|
|
4554
|
-
|
|
3605
|
+
// The mid-loop gate compacts the loop's own in-loop history, never the
|
|
3606
|
+
// loaded Slack rows — the mismatch this test guards against.
|
|
3607
|
+
expect(maybeCompactInputs[1]).not.toBe(renderedSlackMessages);
|
|
4555
3608
|
expect(getSlackCompactionWatermarkForPrefixMock).toHaveBeenCalledWith(
|
|
4556
3609
|
null,
|
|
4557
3610
|
2,
|
|
@@ -4824,67 +3877,32 @@ describe("session-agent-loop", () => {
|
|
|
4824
3877
|
estimatedTokens: 5000,
|
|
4825
3878
|
});
|
|
4826
3879
|
|
|
4827
|
-
|
|
4828
|
-
|
|
4829
|
-
|
|
4830
|
-
|
|
4831
|
-
|
|
4832
|
-
|
|
4833
|
-
|
|
4834
|
-
|
|
4835
|
-
|
|
4836
|
-
|
|
4837
|
-
|
|
4838
|
-
|
|
4839
|
-
type: "error",
|
|
4840
|
-
error: new Error("context_length_exceeded"),
|
|
4841
|
-
});
|
|
4842
|
-
onEvent({
|
|
4843
|
-
type: "usage",
|
|
4844
|
-
inputTokens: 100,
|
|
4845
|
-
outputTokens: 0,
|
|
4846
|
-
model: "test-model",
|
|
4847
|
-
providerDurationMs: 50,
|
|
4848
|
-
});
|
|
4849
|
-
return [
|
|
4850
|
-
...messages,
|
|
4851
|
-
{
|
|
4852
|
-
role: "assistant" as const,
|
|
4853
|
-
content: [{ type: "text", text: "partial" }] as ContentBlock[],
|
|
4854
|
-
},
|
|
4855
|
-
];
|
|
4856
|
-
}
|
|
4857
|
-
onEvent({
|
|
4858
|
-
type: "message_complete",
|
|
4859
|
-
message: {
|
|
4860
|
-
role: "assistant",
|
|
4861
|
-
content: [{ type: "text", text: "recovered" }],
|
|
4862
|
-
},
|
|
4863
|
-
});
|
|
4864
|
-
onEvent({
|
|
4865
|
-
type: "usage",
|
|
4866
|
-
inputTokens: 50,
|
|
4867
|
-
outputTokens: 25,
|
|
4868
|
-
model: "test-model",
|
|
4869
|
-
providerDurationMs: 100,
|
|
4870
|
-
});
|
|
4871
|
-
return [
|
|
4872
|
-
...messages,
|
|
3880
|
+
// GIVEN a real loop that appends a tool turn (so the run reports
|
|
3881
|
+
// `appendedNewMessages`) and then rejects with a context-too-large
|
|
3882
|
+
// error on the following call — the orchestrator strips that appended
|
|
3883
|
+
// history during its bounded convergence path before a final call
|
|
3884
|
+
// recovers.
|
|
3885
|
+
const ctx = makeCtx({
|
|
3886
|
+
providerResponses: [
|
|
3887
|
+
toolUseResponse("t1", "file_read", {}),
|
|
3888
|
+
new Error("context_length_exceeded"),
|
|
3889
|
+
textResponse("recovered"),
|
|
3890
|
+
],
|
|
3891
|
+
loopTools: [
|
|
4873
3892
|
{
|
|
4874
|
-
|
|
4875
|
-
|
|
3893
|
+
name: "file_read",
|
|
3894
|
+
description: "Read a file",
|
|
3895
|
+
input_schema: { type: "object", properties: {} },
|
|
4876
3896
|
},
|
|
4877
|
-
]
|
|
4878
|
-
|
|
4879
|
-
|
|
4880
|
-
const ctx = makeCtx({
|
|
4881
|
-
agentLoopRun,
|
|
3897
|
+
],
|
|
3898
|
+
toolExecutor: async () => ({ content: "ok", isError: false }),
|
|
4882
3899
|
contextWindowManager: {
|
|
4883
3900
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
4884
3901
|
maybeCompact: async () => ({ compacted: false }),
|
|
4885
3902
|
} as unknown as AgentLoopConversationContext["contextWindowManager"],
|
|
4886
3903
|
});
|
|
4887
3904
|
|
|
3905
|
+
// WHEN the orchestrator runs the turn to completion
|
|
4888
3906
|
await runAgentLoopImpl(ctx, "hello", "msg-1", () => {});
|
|
4889
3907
|
|
|
4890
3908
|
const stripCalls = setConversationHistoryStrippedAtMock.mock.calls.filter(
|
|
@@ -4909,59 +3927,24 @@ describe("session-agent-loop", () => {
|
|
|
4909
3927
|
estimatedTokens: 5000,
|
|
4910
3928
|
});
|
|
4911
3929
|
|
|
4912
|
-
|
|
4913
|
-
|
|
4914
|
-
|
|
4915
|
-
|
|
4916
|
-
|
|
4917
|
-
|
|
4918
|
-
|
|
4919
|
-
|
|
4920
|
-
|
|
4921
|
-
|
|
4922
|
-
|
|
4923
|
-
error: new Error("context_length_exceeded"),
|
|
4924
|
-
});
|
|
4925
|
-
onEvent({
|
|
4926
|
-
type: "usage",
|
|
4927
|
-
inputTokens: 100,
|
|
4928
|
-
outputTokens: 0,
|
|
4929
|
-
model: "test-model",
|
|
4930
|
-
providerDurationMs: 50,
|
|
4931
|
-
});
|
|
4932
|
-
return [
|
|
4933
|
-
...messages,
|
|
4934
|
-
{
|
|
4935
|
-
role: "assistant" as const,
|
|
4936
|
-
content: [{ type: "text", text: "partial" }] as ContentBlock[],
|
|
4937
|
-
},
|
|
4938
|
-
];
|
|
4939
|
-
}
|
|
4940
|
-
onEvent({
|
|
4941
|
-
type: "message_complete",
|
|
4942
|
-
message: {
|
|
4943
|
-
role: "assistant",
|
|
4944
|
-
content: [{ type: "text", text: "recovered" }],
|
|
4945
|
-
},
|
|
4946
|
-
});
|
|
4947
|
-
onEvent({
|
|
4948
|
-
type: "usage",
|
|
4949
|
-
inputTokens: 50,
|
|
4950
|
-
outputTokens: 25,
|
|
4951
|
-
model: "test-model",
|
|
4952
|
-
providerDurationMs: 100,
|
|
4953
|
-
});
|
|
4954
|
-
return [
|
|
4955
|
-
...messages,
|
|
3930
|
+
// GIVEN a real loop that appends a tool turn and then rejects with a
|
|
3931
|
+
// context-too-large error on the following call, driving the
|
|
3932
|
+
// convergence strip whose marker-write helper is stubbed to throw,
|
|
3933
|
+
// before a final call recovers.
|
|
3934
|
+
const ctx = makeCtx({
|
|
3935
|
+
providerResponses: [
|
|
3936
|
+
toolUseResponse("t1", "file_read", {}),
|
|
3937
|
+
new Error("context_length_exceeded"),
|
|
3938
|
+
textResponse("recovered"),
|
|
3939
|
+
],
|
|
3940
|
+
loopTools: [
|
|
4956
3941
|
{
|
|
4957
|
-
|
|
4958
|
-
|
|
3942
|
+
name: "file_read",
|
|
3943
|
+
description: "Read a file",
|
|
3944
|
+
input_schema: { type: "object", properties: {} },
|
|
4959
3945
|
},
|
|
4960
|
-
]
|
|
4961
|
-
|
|
4962
|
-
|
|
4963
|
-
const ctx = makeCtx({
|
|
4964
|
-
agentLoopRun,
|
|
3946
|
+
],
|
|
3947
|
+
toolExecutor: async () => ({ content: "ok", isError: false }),
|
|
4965
3948
|
contextWindowManager: {
|
|
4966
3949
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
4967
3950
|
maybeCompact: async () => ({ compacted: false }),
|