@vellumai/assistant 0.8.7 → 0.8.8-dev.202606052332.17fc8ea
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile +20 -4
- package/bun.lock +2 -2
- package/docker-entrypoint.sh +4 -2
- package/docker-init-apt-root.sh +3 -1
- package/docker-kata-apt-env.sh +3 -1
- package/docker-kata-runtime-family.sh +12 -0
- package/docs/architecture/memory.md +1 -1
- package/examples/plugins/echo/README.md +61 -66
- package/examples/plugins/echo/hooks/post-tool-use.ts +18 -0
- package/examples/plugins/echo/hooks/stop.ts +16 -0
- package/examples/plugins/echo/hooks/user-prompt-submit.ts +18 -0
- package/examples/plugins/echo/package.json +1 -2
- package/examples/plugins/echo/src/emit.ts +19 -0
- package/node_modules/@vellumai/skill-host-contracts/src/server-message.ts +3 -3
- package/node_modules/@vellumai/skill-host-contracts/src/skill-host.ts +7 -6
- package/openapi.yaml +3378 -335
- package/package.json +2 -2
- package/scripts/generate-openapi.ts +68 -41
- package/src/__tests__/agent-loop-exit-reason.test.ts +35 -93
- package/src/__tests__/agent-loop-provider-error-recording.test.ts +1 -1
- package/src/__tests__/agent-loop.test.ts +37 -87
- package/src/__tests__/agent-wake-disk-pressure-callsite.test.ts +2 -0
- package/src/__tests__/annotate-activity-metadata.test.ts +262 -0
- package/src/__tests__/annotate-risk-options.test.ts +2 -3
- package/src/__tests__/anthropic-provider.test.ts +95 -2
- package/src/__tests__/app-control-flow.test.ts +1 -1
- package/src/__tests__/app-dir-path-guard.test.ts +1 -0
- package/src/__tests__/approval-routes-http.test.ts +4 -1
- package/src/__tests__/assistant-event-hub.test.ts +25 -0
- package/src/__tests__/assistant-events-sse-shed.test.ts +8 -0
- package/src/__tests__/{conversation-stream-state.test.ts → assistant-stream-state.test.ts} +252 -91
- package/src/__tests__/auth-fallback-events-store.test.ts +116 -0
- package/src/__tests__/background-workers-disk-pressure.test.ts +6 -0
- package/src/__tests__/btw-routes.test.ts +62 -3
- package/src/__tests__/build-persisted-content.test.ts +184 -0
- package/src/__tests__/catalog-files.test.ts +1 -1
- package/src/__tests__/channel-approval-routes.test.ts +1 -1
- package/src/__tests__/channel-approvals.test.ts +1 -1
- package/src/__tests__/clawhub-files.test.ts +1 -1
- package/src/__tests__/compaction-circuit.test.ts +258 -0
- package/src/__tests__/compaction-direct.test.ts +132 -0
- package/src/__tests__/compaction.benchmark.test.ts +0 -30
- package/src/__tests__/config-watcher.test.ts +1 -1
- package/src/__tests__/conversation-abort-tool-results.test.ts +57 -19
- package/src/__tests__/conversation-agent-loop-disk-pressure.test.ts +6 -5
- package/src/__tests__/conversation-agent-loop-inference-profile.test.ts +10 -7
- package/src/__tests__/conversation-agent-loop-overflow.test.ts +316 -1143
- package/src/__tests__/conversation-agent-loop.test.ts +638 -1655
- package/src/__tests__/conversation-analysis-routes.test.ts +6 -0
- package/src/__tests__/conversation-clean-command.test.ts +5 -2
- package/src/__tests__/conversation-history-web-search.test.ts +11 -1
- package/src/__tests__/conversation-pairing.test.ts +4 -31
- package/src/__tests__/conversation-process-app-control-preactivation.test.ts +6 -0
- package/src/__tests__/conversation-provider-retry-repair.test.ts +30 -10
- package/src/__tests__/conversation-queue.test.ts +2 -0
- package/src/__tests__/conversation-routes-disk-view.test.ts +3 -0
- package/src/__tests__/conversation-routes-slash-commands.test.ts +6 -5
- package/src/__tests__/conversation-runtime-assembly.test.ts +310 -300
- package/src/__tests__/conversation-runtime-workspace.test.ts +105 -45
- package/src/__tests__/conversation-slash-commands.test.ts +8 -42
- package/src/__tests__/conversation-slash-queue.test.ts +6 -1
- package/src/__tests__/conversation-starter-routes.test.ts +14 -6
- package/src/__tests__/conversation-surfaces-action-delivery.test.ts +84 -0
- package/src/__tests__/conversation-sync-tags.test.ts +27 -15
- package/src/__tests__/conversation-title-service.test.ts +135 -2
- package/src/__tests__/conversation-workspace-cache-state.test.ts +17 -16
- package/src/__tests__/conversation-workspace-injection.test.ts +67 -2
- package/src/__tests__/conversation-workspace-tool-tracking.test.ts +7 -6
- package/src/__tests__/conversations-import-system-filter.test.ts +101 -0
- package/src/__tests__/cross-provider-web-search.test.ts +214 -1
- package/src/__tests__/db-acp-history.test.ts +101 -0
- package/src/__tests__/db-schedule-syntax-migration.test.ts +5 -0
- package/src/__tests__/dm-persistence.test.ts +5 -1
- package/src/__tests__/dynamic-page-surface.test.ts +31 -0
- package/src/__tests__/empty-response-hook.test.ts +304 -0
- package/src/__tests__/feature-flag-test-helpers.ts +2 -2
- package/src/__tests__/file-write-tool.test.ts +63 -0
- package/src/__tests__/gateway-only-guard.test.ts +12 -2
- package/src/__tests__/gemini-image-service.test.ts +13 -0
- package/src/__tests__/guardian-grant-minting.test.ts +1 -1
- package/src/__tests__/guardian-routing-invariants.test.ts +2 -4
- package/src/__tests__/handlers-user-message-approval-consumption.test.ts +1 -1
- package/src/__tests__/heartbeat-disk-pressure.test.ts +1 -0
- package/src/__tests__/heartbeat-service.test.ts +1 -0
- package/src/__tests__/helpers/mock-provider.ts +110 -0
- package/src/__tests__/helpers/native-web-search-harness.ts +129 -0
- package/src/__tests__/history-repair-hook.test.ts +1 -0
- package/src/__tests__/host-app-control-routes.test.ts +1 -1
- package/src/__tests__/host-cu-routes-targeted.test.ts +3 -3
- package/src/__tests__/identity-intro-cache.test.ts +12 -100
- package/src/__tests__/identity-routes.test.ts +248 -7
- package/src/__tests__/inbound-slack-persistence.test.ts +5 -1
- package/src/__tests__/injector-background-turn.test.ts +3 -9
- package/src/__tests__/injector-chain.test.ts +139 -275
- package/src/__tests__/injector-disk-pressure.test.ts +75 -41
- package/src/__tests__/injector-document-comments.test.ts +3 -3
- package/src/__tests__/injector-pkb-v2-silenced.test.ts +30 -22
- package/src/__tests__/injector-v3-suppression.test.ts +31 -37
- package/src/__tests__/internal-telemetry-routes.test.ts +109 -0
- package/src/__tests__/list-messages-hidden-metadata.test.ts +38 -0
- package/src/__tests__/list-messages-page-latest.test.ts +60 -0
- package/src/__tests__/list-messages-tool-merge.test.ts +20 -0
- package/src/__tests__/llm-usage-store.test.ts +223 -1
- package/src/__tests__/memory-retrieval-hook.test.ts +297 -0
- package/src/__tests__/memory-v2-static-injector.test.ts +103 -35
- package/src/__tests__/native-web-search.test.ts +191 -0
- package/src/__tests__/onboarding-template-contract.test.ts +2 -0
- package/src/__tests__/openai-image-service.test.ts +17 -0
- package/src/__tests__/openai-provider.test.ts +31 -1
- package/src/__tests__/{overflow-reduce-pipeline.test.ts → overflow-reduction-loop.test.ts} +64 -284
- package/src/__tests__/persist-unsendable-image.test.ts +215 -0
- package/src/__tests__/persistence-secret-redaction.test.ts +1 -0
- package/src/__tests__/pkb-autoinject.test.ts +2 -5
- package/src/__tests__/plugin-api-shim.test.ts +3 -6
- package/src/__tests__/plugin-bootstrap.test.ts +14 -40
- package/src/__tests__/plugin-registry.test.ts +3 -76
- package/src/__tests__/plugin-types.test.ts +0 -193
- package/src/__tests__/process-message-display-content.test.ts +6 -2
- package/src/__tests__/reaction-persistence.test.ts +1 -1
- package/src/__tests__/regenerate-fire-and-forget-trace.test.ts +5 -1
- package/src/__tests__/resolve-trust-class.test.ts +4 -4
- package/src/__tests__/runtime-events-sse-reconnect.test.ts +60 -23
- package/src/__tests__/schedule-routes.test.ts +603 -2
- package/src/__tests__/schedule-store.test.ts +41 -0
- package/src/__tests__/schedule-tools.test.ts +35 -0
- package/src/__tests__/send-endpoint-busy.test.ts +4 -1
- package/src/__tests__/server-history-render.test.ts +314 -1
- package/src/__tests__/skill-feature-flags-integration.test.ts +33 -0
- package/src/__tests__/skillssh-files.test.ts +1 -1
- package/src/__tests__/subagent-call-site-routing.test.ts +1 -1
- package/src/__tests__/subagent-fork-notifications.test.ts +1 -3
- package/src/__tests__/subagent-fork-spawn.test.ts +1 -1
- package/src/__tests__/subagent-manager-notify.test.ts +1 -3
- package/src/__tests__/subagent-notify-parent.test.ts +1 -3
- package/src/__tests__/subagent-spawn-tool-fork.test.ts +1 -1
- package/src/__tests__/system-prompt.test.ts +20 -0
- package/src/__tests__/task-scheduler.test.ts +162 -1
- package/src/__tests__/terminal-tools.test.ts +6 -1
- package/src/__tests__/title-generate-hook.test.ts +319 -0
- package/src/__tests__/tool-error-hook.test.ts +278 -0
- package/src/__tests__/tool-preview-lifecycle.test.ts +468 -5
- package/src/__tests__/tool-result-metadata-plumbing.test.ts +1 -0
- package/src/__tests__/tool-result-truncate-hook.test.ts +127 -0
- package/src/__tests__/tool-result-truncation.test.ts +0 -2
- package/src/__tests__/ui-choice-copy-surfaces.test.ts +254 -0
- package/src/__tests__/ui-work-result-surface.test.ts +159 -0
- package/src/__tests__/usage-routes.test.ts +285 -1
- package/src/__tests__/user-plugin-loader.test.ts +54 -286
- package/src/__tests__/voice-session-bridge.test.ts +6 -3
- package/src/__tests__/web-search-backend-failure.test.ts +166 -0
- package/src/acp/__tests__/agent-process.test.ts +161 -0
- package/src/acp/__tests__/client-handler.test.ts +40 -0
- package/src/acp/__tests__/helpers/acp-history-db.ts +82 -0
- package/src/acp/__tests__/helpers/exec-file-stub.ts +101 -0
- package/src/acp/__tests__/prepare-agent-env.test.ts +137 -0
- package/src/acp/__tests__/session-manager-persistence.test.ts +95 -28
- package/src/acp/__tests__/session-manager-resume.test.ts +736 -0
- package/src/acp/agent-process.ts +61 -1
- package/src/acp/auto-install.test.ts +196 -0
- package/src/acp/auto-install.ts +177 -0
- package/src/acp/client-handler.ts +31 -0
- package/src/acp/feature-gate.test.ts +48 -0
- package/src/acp/feature-gate.ts +34 -0
- package/src/acp/prepare-agent-env.ts +83 -29
- package/src/acp/resolve-agent.test.ts +320 -7
- package/src/acp/resolve-agent.ts +182 -18
- package/src/acp/resume-hint.ts +25 -0
- package/src/acp/session-manager.ts +495 -73
- package/src/acp/types.ts +8 -0
- package/src/agent/compaction-circuit.ts +60 -102
- package/src/agent/loop.ts +362 -485
- package/src/api/events/assistant-thinking-delta.ts +33 -0
- package/src/api/events/tool-output-chunk.ts +45 -0
- package/src/api/events/tool-use-preview-start.ts +32 -0
- package/src/api/events/trace-event.ts +69 -0
- package/src/api/index.ts +48 -13
- package/src/api/responses/conversation-message.ts +374 -0
- package/src/approvals/guardian-request-resolvers.ts +1 -1
- package/src/avatar/__tests__/avatar-store.test.ts +34 -29
- package/src/background-wake/next-wake.ts +1 -0
- package/src/cli/commands/__tests__/notifications.test.ts +58 -14
- package/src/cli/commands/notifications.ts +112 -60
- package/src/config/__tests__/feature-flag-registry-guard.test.ts +2 -2
- package/src/config/acp-defaults.test.ts +10 -0
- package/src/config/acp-defaults.ts +6 -0
- package/src/config/assistant-feature-flags.ts +22 -11
- package/src/config/bundled-skills/acp/SKILL.md +83 -31
- package/src/config/bundled-skills/acp/TOOLS.json +4 -4
- package/src/config/bundled-skills/app-builder/SKILL.md +224 -398
- package/src/config/bundled-skills/app-builder/TOOLS.json +29 -0
- package/src/config/bundled-skills/app-builder/references/DESIGN_SYSTEM.md +48 -0
- package/src/config/bundled-skills/app-builder/references/RESPONSIVE.md +57 -0
- package/src/config/bundled-skills/app-builder/references/SLIDES.md +38 -0
- package/src/config/bundled-skills/app-builder/references/examples/README.md +17 -0
- package/src/config/bundled-skills/app-builder/references/examples/expense-tracker.md +515 -0
- package/src/config/bundled-skills/app-builder/references/examples/focus-timer.md +342 -0
- package/src/config/bundled-skills/app-builder/references/examples/habit-tracker.md +490 -0
- package/src/config/bundled-skills/app-builder/tools/app-list.ts +62 -0
- package/src/config/bundled-skills/document-editor/SKILL.md +28 -23
- package/src/config/bundled-skills/document-editor/TOOLS.json +1 -1
- package/src/config/bundled-skills/messaging/SKILL.md +0 -7
- package/src/config/bundled-tool-registry.ts +2 -0
- package/src/config/feature-flag-cache.ts +3 -3
- package/src/config/feature-flag-registry.json +48 -7
- package/src/config/schemas/__tests__/memory-v2.test.ts +1 -0
- package/src/config/schemas/__tests__/memory-v3.test.ts +25 -0
- package/src/config/schemas/heartbeat.ts +9 -0
- package/src/config/schemas/llm.ts +1 -0
- package/src/config/schemas/memory-v2.ts +8 -0
- package/src/config/schemas/memory-v3.ts +8 -0
- package/src/config/schemas/platform.ts +8 -0
- package/src/config/seed-inference-profiles.ts +2 -2
- package/src/config/skills.ts +13 -0
- package/src/context/compactor.ts +1 -1
- package/src/context/strip-injections.ts +128 -0
- package/src/context/token-estimator.ts +23 -0
- package/src/context/tool-result-truncation.ts +0 -23
- package/src/context/window-manager.ts +5 -7
- package/src/credential-execution/executable-discovery.ts +16 -0
- package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +6 -0
- package/src/daemon/__tests__/inference-profile-notification.test.ts +153 -0
- package/src/daemon/__tests__/native-web-search-metadata.test.ts +10 -8
- package/src/daemon/assistant-attachments.ts +1 -1
- package/src/daemon/config-watcher.ts +2 -2
- package/src/daemon/context-overflow-reducer.ts +0 -1
- package/src/daemon/conversation-agent-loop-handlers.ts +594 -153
- package/src/daemon/conversation-agent-loop.ts +301 -997
- package/src/daemon/conversation-history.ts +5 -4
- package/src/daemon/conversation-lifecycle.ts +3 -4
- package/src/daemon/conversation-messaging.ts +7 -6
- package/src/daemon/conversation-process.ts +11 -16
- package/src/daemon/conversation-registry.ts +159 -0
- package/src/daemon/conversation-runtime-assembly.ts +218 -398
- package/src/daemon/conversation-slash.ts +6 -25
- package/src/daemon/conversation-store.ts +9 -90
- package/src/daemon/conversation-surfaces.ts +222 -4
- package/src/daemon/conversation-tool-setup.ts +2 -29
- package/src/daemon/conversation-workspace.ts +17 -0
- package/src/daemon/conversation.ts +32 -20
- package/src/daemon/external-plugins-bootstrap.ts +17 -18
- package/src/daemon/handlers/config-a2a.ts +51 -36
- package/src/daemon/handlers/config-slack-channel.ts +20 -14
- package/src/daemon/handlers/config-telegram.ts +16 -2
- package/src/daemon/handlers/conversations.ts +3 -1
- package/src/daemon/handlers/shared.ts +156 -84
- package/src/daemon/handlers/skills.ts +42 -10
- package/src/daemon/lifecycle.ts +25 -0
- package/src/daemon/message-types/apps.ts +1 -29
- package/src/daemon/message-types/messages.ts +9 -57
- package/src/daemon/message-types/skills.ts +2 -0
- package/src/daemon/message-types/surfaces.ts +136 -3
- package/src/daemon/now-scratchpad.ts +21 -0
- package/src/daemon/orphan-reaper.test.ts +210 -0
- package/src/daemon/orphan-reaper.ts +240 -0
- package/src/daemon/overflow-reduction-loop.ts +230 -0
- package/src/daemon/persist-unsendable-image.ts +117 -0
- package/src/daemon/process-message.ts +1 -3
- package/src/daemon/server.ts +2 -0
- package/src/daemon/trace-emitter.ts +6 -4
- package/src/daemon/trust-context.ts +19 -0
- package/src/daemon/wake-target-adapter.ts +3 -1
- package/src/heartbeat/__tests__/heartbeat-service.test.ts +3 -0
- package/src/heartbeat/heartbeat-run-store.ts +23 -1
- package/src/heartbeat/heartbeat-service.ts +26 -0
- package/src/home/home-greeting-cache.ts +24 -1
- package/src/ipc/__tests__/browser-ipc.test.ts +1 -1
- package/src/ipc/__tests__/ui-request-route.test.ts +3 -3
- package/src/ipc/gateway-client.test.ts +2 -2
- package/src/ipc/gateway-client.ts +3 -3
- package/src/ipc/skill-routes/__tests__/memory.test.ts +15 -0
- package/src/ipc/skill-routes/memory.ts +4 -2
- package/src/media/gemini-image-service.ts +15 -0
- package/src/media/openai-image-service.ts +14 -0
- package/src/media/types.ts +34 -0
- package/src/memory/__tests__/jobs-worker-v2-schedule.test.ts +56 -0
- package/src/memory/auth-fallback-events-store.ts +94 -0
- package/src/memory/conversation-starter-checkpoints.ts +1 -0
- package/src/memory/conversation-title-service.ts +65 -41
- package/src/memory/db-init.ts +6 -0
- package/src/memory/graph/__tests__/conversation-graph-memory-registry.test.ts +119 -0
- package/src/memory/graph/conversation-graph-memory.ts +65 -0
- package/src/memory/job-handlers/conversation-starters.ts +13 -2
- package/src/memory/jobs-store.ts +33 -0
- package/src/memory/jobs-worker.ts +32 -5
- package/src/memory/llm-usage-store.ts +224 -50
- package/src/memory/migrations/222-strip-placeholder-sentinels-from-messages.ts +6 -5
- package/src/memory/migrations/270-schedule-source-conversation.ts +13 -0
- package/src/memory/migrations/271-create-auth-fallback-events.ts +21 -0
- package/src/memory/migrations/272-acp-session-history-cwd.ts +36 -0
- package/src/memory/migrations/index.ts +3 -0
- package/src/memory/pkb/autoinject.ts +61 -0
- package/src/memory/pkb/context.ts +50 -0
- package/src/memory/pkb/types.ts +14 -0
- package/src/memory/schedule-attribution-sql.ts +104 -0
- package/src/memory/schema/acp.ts +4 -0
- package/src/memory/schema/infrastructure.ts +16 -0
- package/src/memory/usage-grouped-buckets.ts +6 -1
- package/src/memory/v2/__tests__/consolidation-job.test.ts +4 -4
- package/src/memory/v2/consolidation-job.ts +14 -5
- package/src/notifications/conversation-pairing.ts +8 -15
- package/src/notifications/decision-engine.ts +6 -3
- package/src/notifications/home-feed-side-effect.ts +12 -1
- package/src/permissions/prompter.ts +4 -0
- package/src/plugin-api/constants.ts +4 -0
- package/src/plugin-api/index.ts +7 -5
- package/src/plugin-api/types.ts +151 -1
- package/src/plugins/defaults/compaction/compact.ts +59 -0
- package/src/plugins/defaults/compaction/package.json +1 -1
- package/src/plugins/defaults/compaction/register.ts +8 -19
- package/src/plugins/defaults/empty-response/hooks/stop.ts +126 -0
- package/src/plugins/defaults/empty-response/register.ts +8 -13
- package/src/plugins/defaults/index.ts +2 -18
- package/src/plugins/defaults/memory-retrieval/hooks/post-compact.ts +95 -0
- package/src/plugins/defaults/memory-retrieval/hooks/user-prompt-submit-temp.ts +216 -0
- package/src/plugins/defaults/memory-retrieval/injector-chain.ts +35 -0
- package/src/plugins/defaults/{injectors/register.ts → memory-retrieval/injectors.ts} +288 -81
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/assign.test.ts +4 -4
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/health.test.ts +16 -0
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/live-integration.test.ts +4 -4
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/maintain-job.test.ts +5 -5
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/orchestrate.test.ts +48 -12
- package/src/plugins/defaults/memory-v3-shadow/__tests__/provider-blocks.test.ts +13 -0
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/reconcile.test.ts +2 -2
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/render-injection.test.ts +1 -1
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/router.test.ts +104 -32
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/selection-log-store.test.ts +8 -8
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/selector.test.ts +96 -30
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/shadow-plugin.test.ts +34 -16
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/assign.ts +5 -5
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/capabilities.ts +2 -2
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/health.ts +0 -0
- package/src/plugins/defaults/memory-v3-shadow/hooks/post-compact.ts +14 -0
- package/src/plugins/defaults/memory-v3-shadow/hooks/user-prompt-submit.ts +19 -0
- package/src/plugins/defaults/memory-v3-shadow/injector.ts +75 -0
- package/src/plugins/defaults/memory-v3-shadow/llm-retry.ts +32 -0
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/maintain-job.ts +8 -8
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/orchestrate.ts +26 -14
- package/src/plugins/defaults/{llm-call → memory-v3-shadow}/package.json +2 -2
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/page-content.ts +2 -2
- package/src/plugins/defaults/memory-v3-shadow/provider-blocks.ts +26 -0
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/reconcile.ts +3 -3
- package/src/plugins/defaults/memory-v3-shadow/register.ts +26 -0
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/render-injection.ts +1 -1
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/router.ts +51 -45
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/selection-log-store.ts +4 -4
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/selector.ts +61 -46
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/shadow-plugin.ts +69 -99
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/tree.ts +1 -1
- package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/types.ts +8 -0
- package/src/plugins/defaults/title-generate/hooks/stop.ts +75 -0
- package/src/plugins/defaults/title-generate/hooks/user-prompt-submit.ts +35 -0
- package/src/plugins/defaults/title-generate/package.json +1 -1
- package/src/plugins/defaults/title-generate/register.ts +18 -18
- package/src/plugins/defaults/tool-error/hooks/post-tool-use.ts +118 -0
- package/src/plugins/defaults/tool-error/package.json +1 -1
- package/src/plugins/defaults/tool-error/register.ts +9 -21
- package/src/plugins/defaults/tool-result-truncate/hooks/post-tool-use.ts +32 -0
- package/src/plugins/defaults/tool-result-truncate/register.ts +10 -21
- package/src/plugins/defaults/tool-result-truncate/terminal.ts +37 -18
- package/src/plugins/external-api.ts +2 -2
- package/src/plugins/pipeline.ts +6 -305
- package/src/plugins/registry.ts +10 -55
- package/src/plugins/types.ts +62 -797
- package/src/plugins/user-loader.ts +30 -127
- package/src/proactive-artifact/aux-message-injector.ts +4 -4
- package/src/proactive-artifact/job.test.ts +8 -13
- package/src/prompts/__tests__/system-prompt.test.ts +42 -0
- package/src/prompts/templates/BOOTSTRAP-ACTIVATION-RAIL.md +64 -0
- package/src/prompts/templates/BOOTSTRAP.md +2 -2
- package/src/prompts/templates/system-sections.ts +15 -0
- package/src/providers/anthropic/client.ts +37 -29
- package/src/providers/openai/__tests__/chat-completions-provider-reasoning.test.ts +112 -0
- package/src/providers/openai/chat-completions-provider.ts +44 -0
- package/src/providers/openrouter/client.ts +1 -0
- package/src/providers/placeholder-sentinels.ts +35 -0
- package/src/runtime/__tests__/agent-wake.test.ts +10 -6
- package/src/runtime/__tests__/interactive-ui.test.ts +1 -1
- package/src/runtime/agent-wake.ts +2 -5
- package/src/runtime/assistant-event-hub.ts +37 -7
- package/src/runtime/{conversation-stream-state.ts → assistant-stream-state.ts} +132 -58
- package/src/runtime/channel-approvals.ts +1 -1
- package/src/runtime/http-router.ts +16 -21
- package/src/runtime/http-types.ts +16 -70
- package/src/runtime/interactive-ui.ts +1 -1
- package/src/runtime/pending-interactions.ts +1 -0
- package/src/runtime/routes/__tests__/acp-routes.test.ts +283 -55
- package/src/runtime/routes/__tests__/consolidation-routes.test.ts +265 -2
- package/src/runtime/routes/__tests__/conversation-list-routes.test.ts +1 -1
- package/src/runtime/routes/__tests__/conversation-query-routes.test.ts +31 -1
- package/src/runtime/routes/__tests__/memory-v2-routes.test.ts +6 -2
- package/src/runtime/routes/__tests__/surface-action-routes.test.ts +5 -4
- package/src/runtime/routes/__tests__/surface-content-routes.test.ts +4 -1
- package/src/runtime/routes/__tests__/tts-routes.test.ts +6 -2
- package/src/runtime/routes/acp-routes.test.ts +89 -25
- package/src/runtime/routes/acp-routes.ts +81 -29
- package/src/runtime/routes/app-management-routes.ts +6 -117
- package/src/runtime/routes/app-routes.ts +13 -15
- package/src/runtime/routes/approval-routes.ts +1 -1
- package/src/runtime/routes/attachment-routes.ts +26 -15
- package/src/runtime/routes/avatar-routes.ts +26 -0
- package/src/runtime/routes/browser-routes.ts +1 -1
- package/src/runtime/routes/browser-tabs-routes.ts +6 -10
- package/src/runtime/routes/btw-routes.ts +29 -23
- package/src/runtime/routes/consolidation-routes.ts +120 -20
- package/src/runtime/routes/conversation-cli-routes.ts +1 -1
- package/src/runtime/routes/conversation-list-routes.ts +1 -1
- package/src/runtime/routes/conversation-query-routes.ts +3 -1
- package/src/runtime/routes/conversation-routes.ts +372 -185
- package/src/runtime/routes/conversation-starter-routes.ts +13 -7
- package/src/runtime/routes/conversations-import-routes.ts +24 -7
- package/src/runtime/routes/documents-routes.ts +4 -0
- package/src/runtime/routes/domain-routes.ts +51 -37
- package/src/runtime/routes/epoch-millis-range.ts +34 -0
- package/src/runtime/routes/events-routes.ts +28 -34
- package/src/runtime/routes/gateway-log-routes.ts +26 -4
- package/src/runtime/routes/heartbeat-routes.ts +32 -12
- package/src/runtime/routes/host-app-control-routes.ts +1 -1
- package/src/runtime/routes/host-cu-routes.ts +1 -1
- package/src/runtime/routes/identity-intro-cache.ts +11 -34
- package/src/runtime/routes/identity-routes.ts +224 -18
- package/src/runtime/routes/image-generation-routes.ts +40 -2
- package/src/runtime/routes/inbound-message-handler.ts +1 -1
- package/src/runtime/routes/index.ts +2 -0
- package/src/runtime/routes/integrations/a2a.ts +12 -10
- package/src/runtime/routes/integrations/slack/__tests__/channel.test.ts +16 -0
- package/src/runtime/routes/integrations/slack/channel.ts +4 -0
- package/src/runtime/routes/integrations/slack/share.ts +27 -6
- package/src/runtime/routes/integrations/telegram.ts +6 -0
- package/src/runtime/routes/integrations/twilio.ts +42 -0
- package/src/runtime/routes/internal-telemetry-routes.ts +88 -0
- package/src/runtime/routes/log-export-routes.ts +8 -0
- package/src/runtime/routes/memory-v2-routes.ts +15 -8
- package/src/runtime/routes/memory-v3-routes.ts +66 -34
- package/src/runtime/routes/oauth-apps.ts +66 -12
- package/src/runtime/routes/oauth-providers.ts +44 -5
- package/src/runtime/routes/platform-routes.ts +81 -5
- package/src/runtime/routes/playground/__tests__/force-compact.test.ts +6 -4
- package/src/runtime/routes/playground/force-compact.ts +1 -1
- package/src/runtime/routes/playground/helpers.ts +1 -1
- package/src/runtime/routes/rename-conversation-routes.ts +5 -0
- package/src/runtime/routes/schedule-routes.ts +152 -42
- package/src/runtime/routes/secret-routes.ts +14 -2
- package/src/runtime/routes/skills-routes.ts +43 -14
- package/src/runtime/routes/surface-conversation-resolver.ts +4 -3
- package/src/runtime/routes/tool-call-confirmation-enrichment.test.ts +161 -0
- package/src/runtime/routes/tool-call-confirmation-enrichment.ts +107 -0
- package/src/runtime/routes/trust-rules-routes.ts +26 -2
- package/src/runtime/routes/tts-routes.ts +35 -0
- package/src/runtime/routes/types.ts +66 -8
- package/src/runtime/routes/usage-routes.ts +47 -39
- package/src/runtime/routes/webhook-routes.ts +41 -2
- package/src/runtime/routes/work-items-routes.ts +2 -4
- package/src/runtime/routes/workspace-routes.ts +4 -0
- package/src/runtime/services/__tests__/analyze-conversation.test.ts +6 -0
- package/src/runtime/services/analyze-conversation.ts +2 -2
- package/src/runtime/services/conversation-serializer.ts +1 -1
- package/src/schedule/schedule-store.ts +20 -1
- package/src/schedule/schedule-usage-store.ts +83 -0
- package/src/schedule/scheduler.ts +12 -5
- package/src/signals/cancel.ts +2 -4
- package/src/skills/catalog-files.ts +2 -2
- package/src/skills/catalog-install.ts +3 -0
- package/src/skills/categories-cache.ts +118 -0
- package/src/skills/clawhub-files.ts +1 -2
- package/src/skills/skillssh-files.ts +1 -2
- package/src/subagent/manager.ts +17 -5
- package/src/telemetry/types.ts +29 -1
- package/src/telemetry/usage-telemetry-reporter.test.ts +112 -3
- package/src/telemetry/usage-telemetry-reporter.ts +57 -2
- package/src/tools/acp/context.ts +20 -0
- package/src/tools/acp/list-agents.test.ts +7 -1
- package/src/tools/acp/spawn.test.ts +158 -55
- package/src/tools/acp/spawn.ts +47 -72
- package/src/tools/acp/steer.test.ts +105 -8
- package/src/tools/acp/steer.ts +48 -17
- package/src/tools/apps/executors.ts +13 -8
- package/src/tools/executor.ts +1 -53
- package/src/tools/filesystem/write.ts +34 -0
- package/src/tools/network/__tests__/web-search-metadata.test.ts +7 -1
- package/src/tools/network/__tests__/web-search.test.ts +11 -3
- package/src/tools/network/web-search-error.test.ts +248 -0
- package/src/tools/network/web-search-error.ts +267 -0
- package/src/tools/network/web-search.ts +207 -48
- package/src/tools/schedule/create.ts +2 -0
- package/src/tools/subagent/spawn.ts +2 -4
- package/src/tools/terminal/safe-env.ts +10 -1
- package/src/tools/ui-surface/definitions.ts +34 -5
- package/src/tts/__tests__/provider-catalog-consistency.test.ts +85 -1
- package/src/tts/provider-catalog.ts +76 -1
- package/src/util/mutex.ts +47 -0
- package/src/workspace/git-service.ts +1 -42
- package/src/workspace/migrations/051-seed-conversation-summarization-callsite.ts +4 -5
- package/src/workspace/migrations/095-bump-heartbeat-interval-30m-to-60m.ts +51 -0
- package/src/workspace/migrations/096-reduce-quality-profile-effort.ts +72 -0
- package/src/workspace/migrations/097-enable-adaptive-thinking-managed-profiles.ts +117 -0
- package/src/workspace/migrations/registry.ts +6 -0
- package/docs/plugins.md +0 -836
- package/examples/plugins/echo/register.ts +0 -184
- package/src/__tests__/bootstrap-turn-cleanup.test.ts +0 -44
- package/src/__tests__/circuit-breaker-pipeline.test.ts +0 -405
- package/src/__tests__/compaction-pipeline.test.ts +0 -210
- package/src/__tests__/compaction-timeout-recovery.test.ts +0 -251
- package/src/__tests__/empty-response-pipeline.test.ts +0 -423
- package/src/__tests__/llm-call-pipeline.test.ts +0 -287
- package/src/__tests__/memory-retrieval-pipeline.test.ts +0 -418
- package/src/__tests__/persistence-pipeline.test.ts +0 -503
- package/src/__tests__/pipeline-runner.test.ts +0 -564
- package/src/__tests__/title-generate-pipeline.test.ts +0 -211
- package/src/__tests__/token-estimate-pipeline.test.ts +0 -479
- package/src/__tests__/tool-error-pipeline.test.ts +0 -241
- package/src/__tests__/tool-execute-pipeline.test.ts +0 -417
- package/src/__tests__/tool-result-truncate-pipeline.test.ts +0 -341
- package/src/daemon/bootstrap-turn-cleanup.ts +0 -45
- package/src/gallery/default-gallery.ts +0 -1359
- package/src/gallery/gallery-manifest.ts +0 -28
- package/src/home/feature-gate.ts +0 -22
- package/src/memory/v3/provider-blocks.ts +0 -16
- package/src/plugins/defaults/circuit-breaker/middlewares/circuitBreaker.ts +0 -93
- package/src/plugins/defaults/circuit-breaker/package.json +0 -15
- package/src/plugins/defaults/circuit-breaker/register.ts +0 -39
- package/src/plugins/defaults/compaction/middlewares/compaction.ts +0 -25
- package/src/plugins/defaults/compaction/terminal.ts +0 -73
- package/src/plugins/defaults/empty-response/middlewares/emptyResponse.ts +0 -22
- package/src/plugins/defaults/empty-response/terminal.ts +0 -106
- package/src/plugins/defaults/injectors/package.json +0 -15
- package/src/plugins/defaults/llm-call/middlewares/llmCall.ts +0 -17
- package/src/plugins/defaults/llm-call/register.ts +0 -45
- package/src/plugins/defaults/memory-retrieval/middlewares/memoryRetrieval.ts +0 -17
- package/src/plugins/defaults/memory-retrieval/package.json +0 -15
- package/src/plugins/defaults/memory-retrieval/register.ts +0 -181
- package/src/plugins/defaults/overflow-reduce/middlewares/overflowReduce.ts +0 -126
- package/src/plugins/defaults/overflow-reduce/package.json +0 -15
- package/src/plugins/defaults/overflow-reduce/register.ts +0 -42
- package/src/plugins/defaults/persistence/middlewares/persistence.ts +0 -19
- package/src/plugins/defaults/persistence/package.json +0 -15
- package/src/plugins/defaults/persistence/register.ts +0 -38
- package/src/plugins/defaults/persistence/terminal.ts +0 -83
- package/src/plugins/defaults/title-generate/terminal.ts +0 -31
- package/src/plugins/defaults/token-estimate/middlewares/tokenEstimate.ts +0 -23
- package/src/plugins/defaults/token-estimate/package.json +0 -15
- package/src/plugins/defaults/token-estimate/register.ts +0 -34
- package/src/plugins/defaults/token-estimate/terminal.ts +0 -40
- package/src/plugins/defaults/tool-error/middlewares/toolError.ts +0 -21
- package/src/plugins/defaults/tool-error/terminal.ts +0 -47
- package/src/plugins/defaults/tool-execute/middlewares/toolExecute.ts +0 -23
- package/src/plugins/defaults/tool-execute/package.json +0 -15
- package/src/plugins/defaults/tool-execute/register.ts +0 -49
- package/src/plugins/defaults/tool-result-truncate/middlewares/toolResultTruncate.ts +0 -23
- package/src/plugins/defaults/tool-result-truncate/types.ts +0 -22
- package/src/skills/category-inference.ts +0 -111
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/capabilities.test.ts +0 -0
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/core.test.ts +0 -0
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/fixtures/eval-turns.json +0 -0
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/fixtures/live-turns.json +0 -0
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/needle.test.ts +0 -0
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/snapshot.test.ts +0 -0
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/tree.test.ts +0 -0
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/types.test.ts +0 -0
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/working-set-eviction.test.ts +0 -0
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/working-set-skeleton.test.ts +0 -0
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/core.ts +0 -0
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/data/README.md +0 -0
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/data/assignments.json +0 -0
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/data/core.json +0 -0
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/data/leaves/domain-a/topic-x.md +0 -0
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/data/leaves/domain-a/topic-y.md +0 -0
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/data/leaves/domain-b/topic-z.md +0 -0
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/needle.ts +0 -0
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/snapshot.ts +0 -0
- /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/working-set.ts +0 -0
|
@@ -14,27 +14,11 @@
|
|
|
14
14
|
import { createRequire } from "node:module";
|
|
15
15
|
import { afterAll, beforeEach, describe, expect, mock, test } from "bun:test";
|
|
16
16
|
|
|
17
|
-
import {
|
|
18
|
-
import type {
|
|
19
|
-
AgentEvent,
|
|
20
|
-
AgentLoopRunOptions,
|
|
21
|
-
AgentLoopRunResult,
|
|
22
|
-
MidLoopCompaction,
|
|
23
|
-
} from "../agent/loop.js";
|
|
17
|
+
import type { LoopToolExecutor } from "../agent/loop.js";
|
|
24
18
|
import type { LLMConfig } from "../config/schemas/llm.js";
|
|
25
|
-
import type { ContextWindowResult } from "../context/window-manager.js";
|
|
26
19
|
import type { ServerMessage } from "../daemon/message-protocol.js";
|
|
27
|
-
import { defaultCompactionTerminal } from "../plugins/defaults/compaction/terminal.js";
|
|
28
20
|
import { resetPluginRegistryAndRegisterDefaults } from "../plugins/defaults/index.js";
|
|
29
|
-
import {
|
|
30
|
-
import { getMiddlewaresFor } from "../plugins/registry.js";
|
|
31
|
-
import type {
|
|
32
|
-
CompactionArgs,
|
|
33
|
-
CompactionResult,
|
|
34
|
-
TurnContext,
|
|
35
|
-
} from "../plugins/types.js";
|
|
36
|
-
import { PluginTimeoutError } from "../plugins/types.js";
|
|
37
|
-
import type { ContentBlock, Message } from "../providers/types.js";
|
|
21
|
+
import type { Message, Provider, ToolDefinition } from "../providers/types.js";
|
|
38
22
|
|
|
39
23
|
const conversationCrudRealSnapshot = {
|
|
40
24
|
...(createRequire(import.meta.url)(
|
|
@@ -103,6 +87,7 @@ mock.module("../config/loader.js", () => ({
|
|
|
103
87
|
memory: { retrieval: { scratchpadInjection: { enabled: true } } },
|
|
104
88
|
ui: {},
|
|
105
89
|
compaction: { enabled: true, autoThreshold: 0.7 },
|
|
90
|
+
conversations: { skipAutoRetitling: true },
|
|
106
91
|
}),
|
|
107
92
|
loadRawConfig: () => ({}),
|
|
108
93
|
saveRawConfig: () => {},
|
|
@@ -114,10 +99,10 @@ mock.module("../config/loader.js", () => ({
|
|
|
114
99
|
// Token estimator — controllable per-test via mockEstimateTokens.
|
|
115
100
|
// Can be a number (constant), a no-arg function, or a function that
|
|
116
101
|
// receives the messages array for dynamic behavior based on content.
|
|
117
|
-
// Both the calibrated entry point (`estimatePromptTokens`,
|
|
118
|
-
// convergence path) and the raw entry point
|
|
119
|
-
// used by the
|
|
120
|
-
//
|
|
102
|
+
// Both the calibrated entry point (`estimatePromptTokens`, which backs the
|
|
103
|
+
// preflight overflow gate and the convergence path) and the raw entry point
|
|
104
|
+
// (`estimatePromptTokensRaw`, used by the pre-send calibration capture) are
|
|
105
|
+
// stubbed so either call site can drive the test.
|
|
121
106
|
let mockEstimateTokens: number | ((msgs?: Message[]) => number) = 1000;
|
|
122
107
|
mock.module("../context/token-estimator.js", () => ({
|
|
123
108
|
estimatePromptTokens: (msgs: Message[]) =>
|
|
@@ -128,8 +113,16 @@ mock.module("../context/token-estimator.js", () => ({
|
|
|
128
113
|
typeof mockEstimateTokens === "function"
|
|
129
114
|
? mockEstimateTokens(msgs)
|
|
130
115
|
: mockEstimateTokens,
|
|
131
|
-
//
|
|
132
|
-
//
|
|
116
|
+
// The preflight overflow gate calls this calibrated wrapper directly, so it
|
|
117
|
+
// must honor `mockEstimateTokens` too — otherwise the real implementation
|
|
118
|
+
// (which sums tool tokens onto the real calibrated estimate) ignores the
|
|
119
|
+
// per-test value and the overflow scenarios below never trigger.
|
|
120
|
+
estimatePromptTokensWithTools: (history: Message[]) =>
|
|
121
|
+
typeof mockEstimateTokens === "function"
|
|
122
|
+
? mockEstimateTokens(history)
|
|
123
|
+
: mockEstimateTokens,
|
|
124
|
+
// `estimatePromptTokensWithTools` folds tool tokens in via this helper; 0
|
|
125
|
+
// keeps the stubbed value unchanged.
|
|
133
126
|
estimateToolsTokens: () => 0,
|
|
134
127
|
// Conversation agent loop now calls this helper to canonicalize the
|
|
135
128
|
// provider key shared with the calibration system. The tests here
|
|
@@ -281,15 +274,6 @@ mock.module("../daemon/conversation-runtime-assembly.js", () => ({
|
|
|
281
274
|
blocks: {},
|
|
282
275
|
}),
|
|
283
276
|
stripInjectionsForCompaction: (msgs: Message[]) => msgs,
|
|
284
|
-
findLastInjectedNowContent: () => null,
|
|
285
|
-
readNowScratchpad: () => null,
|
|
286
|
-
readPkbContext: () => null,
|
|
287
|
-
getPkbAutoInjectList: () => [
|
|
288
|
-
"INDEX.md",
|
|
289
|
-
"essentials.md",
|
|
290
|
-
"threads.md",
|
|
291
|
-
"buffer.md",
|
|
292
|
-
],
|
|
293
277
|
isSlackChannelConversation: () => false,
|
|
294
278
|
getSlackCompactionWatermarkForPrefix: () => null,
|
|
295
279
|
loadSlackChronologicalContext: () => null,
|
|
@@ -437,179 +421,55 @@ mock.module("../memory/archive-store.js", () => ({
|
|
|
437
421
|
|
|
438
422
|
// ── Imports (after mocks) ────────────────────────────────────────────
|
|
439
423
|
|
|
424
|
+
import { AgentLoop } from "../agent/loop.js";
|
|
440
425
|
import {
|
|
441
426
|
type AgentLoopConversationContext,
|
|
442
427
|
runAgentLoopImpl,
|
|
443
428
|
} from "../daemon/conversation-agent-loop.js";
|
|
429
|
+
import {
|
|
430
|
+
createMockProvider,
|
|
431
|
+
type ScriptedResponse,
|
|
432
|
+
textResponse,
|
|
433
|
+
toolUseResponse,
|
|
434
|
+
} from "./helpers/mock-provider.js";
|
|
444
435
|
|
|
445
436
|
// ── Test helpers ─────────────────────────────────────────────────────
|
|
446
437
|
|
|
447
|
-
type AgentLoopRun = (
|
|
448
|
-
messages: Message[],
|
|
449
|
-
onEvent: (event: AgentEvent) => void,
|
|
450
|
-
options?: AgentLoopRunOptions,
|
|
451
|
-
) => Promise<Message[]>;
|
|
452
|
-
|
|
453
|
-
/**
|
|
454
|
-
* Faithful re-implementation of `AgentLoop.compact()` for the mock loop: run
|
|
455
|
-
* the compaction pipeline against the supplied turn context (which carries the
|
|
456
|
-
* test's `contextWindowManager`), invoke the orchestrator-supplied hooks, and
|
|
457
|
-
* return the continuation history — or `null` on timeout/exhaustion so the
|
|
458
|
-
* caller yields "budget".
|
|
459
|
-
*/
|
|
460
|
-
async function simulateInlineCompaction(
|
|
461
|
-
compaction: MidLoopCompaction,
|
|
462
|
-
history: Message[],
|
|
463
|
-
turnContext: TurnContext | undefined,
|
|
464
|
-
signal: AbortSignal | undefined,
|
|
465
|
-
onEvent: (event: AgentEvent) => void | Promise<void>,
|
|
466
|
-
compactionCircuit: CompactionCircuit,
|
|
467
|
-
): Promise<Message[] | null> {
|
|
468
|
-
await onEvent({ type: "context_compacting" });
|
|
469
|
-
const { rawHistory, options } = compaction.prepare(history);
|
|
470
|
-
let result: CompactionResult;
|
|
471
|
-
try {
|
|
472
|
-
result = await runPipeline<CompactionArgs, CompactionResult>(
|
|
473
|
-
"compaction",
|
|
474
|
-
getMiddlewaresFor("compaction"),
|
|
475
|
-
(args) => defaultCompactionTerminal(args, turnContext as TurnContext),
|
|
476
|
-
{ messages: rawHistory, signal, options },
|
|
477
|
-
turnContext as TurnContext,
|
|
478
|
-
DEFAULT_TIMEOUTS.compaction,
|
|
479
|
-
);
|
|
480
|
-
} catch (error) {
|
|
481
|
-
if (error instanceof PluginTimeoutError) {
|
|
482
|
-
await compactionCircuit.recordOutcome(
|
|
483
|
-
{
|
|
484
|
-
currentRequestId: turnContext?.requestId,
|
|
485
|
-
currentTurnTrustContext: turnContext?.trust,
|
|
486
|
-
turnCount: turnContext?.turnIndex ?? 0,
|
|
487
|
-
},
|
|
488
|
-
true,
|
|
489
|
-
onEvent,
|
|
490
|
-
);
|
|
491
|
-
return null;
|
|
492
|
-
}
|
|
493
|
-
throw error;
|
|
494
|
-
}
|
|
495
|
-
const compactResult = result as ContextWindowResult;
|
|
496
|
-
if (compactResult.summaryFailed !== undefined) {
|
|
497
|
-
await compactionCircuit.recordOutcome(
|
|
498
|
-
{
|
|
499
|
-
currentRequestId: turnContext?.requestId,
|
|
500
|
-
currentTurnTrustContext: turnContext?.trust,
|
|
501
|
-
turnCount: turnContext?.turnIndex ?? 0,
|
|
502
|
-
},
|
|
503
|
-
compactResult.summaryFailed,
|
|
504
|
-
onEvent,
|
|
505
|
-
);
|
|
506
|
-
}
|
|
507
|
-
if (compactResult.compacted) {
|
|
508
|
-
await compaction.applyResult(compactResult, rawHistory);
|
|
509
|
-
}
|
|
510
|
-
if (compactResult.exhausted ?? false) {
|
|
511
|
-
return null;
|
|
512
|
-
}
|
|
513
|
-
return compaction.reinject();
|
|
514
|
-
}
|
|
515
|
-
|
|
516
|
-
/**
|
|
517
|
-
* Adapt a `Message[]`-returning mock loop body into `run()`'s real result
|
|
518
|
-
* shape. Mirrors the production loop: the pause-reason carried back is
|
|
519
|
-
* whatever the most recent `onCheckpoint` call yielded with (null when it
|
|
520
|
-
* never yielded), so the orchestrator derives its yield bookkeeping the same
|
|
521
|
-
* way it does against the real loop.
|
|
522
|
-
*/
|
|
523
|
-
const asAgentLoopRun = (
|
|
524
|
-
fn: AgentLoopRun,
|
|
525
|
-
compactionCircuit: CompactionCircuit,
|
|
526
|
-
): ((
|
|
527
|
-
messages: Message[],
|
|
528
|
-
onEvent: (event: AgentEvent) => void | Promise<void>,
|
|
529
|
-
options?: AgentLoopRunOptions,
|
|
530
|
-
) => Promise<AgentLoopRunResult>) => {
|
|
531
|
-
return async (messages, onEvent, options) => {
|
|
532
|
-
let exitReason: AgentLoopRunResult["exitReason"] = null;
|
|
533
|
-
let wrapped = options;
|
|
534
|
-
if (options?.onCheckpoint) {
|
|
535
|
-
const inner = options.onCheckpoint;
|
|
536
|
-
wrapped = {
|
|
537
|
-
...options,
|
|
538
|
-
onCheckpoint: async (info) => {
|
|
539
|
-
// Handoff is offered first, mirroring the loop's ordering.
|
|
540
|
-
const decision = await inner(info);
|
|
541
|
-
if (decision !== "continue") {
|
|
542
|
-
exitReason = decision;
|
|
543
|
-
return decision;
|
|
544
|
-
}
|
|
545
|
-
// The mid-loop budget gate and inline compaction both live inside
|
|
546
|
-
// `AgentLoop.run`. Replicate them here — same formula, stubbed
|
|
547
|
-
// estimator, and the loop's own `compact()` ceremony — so these
|
|
548
|
-
// orchestrator tests drive the real escalation path now that the
|
|
549
|
-
// orchestrator's `onCheckpoint` is handoff-only and compaction
|
|
550
|
-
// runs inline rather than via an orchestrator re-entry loop.
|
|
551
|
-
const contextWindow = options.resolveContextWindow?.();
|
|
552
|
-
if (contextWindow?.overflowRecovery.enabled) {
|
|
553
|
-
const { maxInputTokens, overflowRecovery } = contextWindow;
|
|
554
|
-
const safetyMargin =
|
|
555
|
-
info.history.length > 50
|
|
556
|
-
? Math.max(overflowRecovery.safetyMarginRatio, 0.15)
|
|
557
|
-
: overflowRecovery.safetyMarginRatio;
|
|
558
|
-
const preflightBudget = Math.floor(
|
|
559
|
-
maxInputTokens * (1 - safetyMargin),
|
|
560
|
-
);
|
|
561
|
-
const estimated =
|
|
562
|
-
typeof mockEstimateTokens === "function"
|
|
563
|
-
? mockEstimateTokens(info.history)
|
|
564
|
-
: mockEstimateTokens;
|
|
565
|
-
if (estimated > preflightBudget * 0.85) {
|
|
566
|
-
// Mirror `AgentLoop.compact()`: when a compaction path is
|
|
567
|
-
// supplied, run it in place and continue; on timeout or
|
|
568
|
-
// exhaustion it returns null, so the loop yields "budget".
|
|
569
|
-
const compacted = options.compaction
|
|
570
|
-
? await simulateInlineCompaction(
|
|
571
|
-
options.compaction,
|
|
572
|
-
info.history,
|
|
573
|
-
options.turnContext,
|
|
574
|
-
options.signal,
|
|
575
|
-
onEvent,
|
|
576
|
-
compactionCircuit,
|
|
577
|
-
)
|
|
578
|
-
: null;
|
|
579
|
-
if (compacted) {
|
|
580
|
-
exitReason = null;
|
|
581
|
-
return "continue";
|
|
582
|
-
}
|
|
583
|
-
exitReason = "budget";
|
|
584
|
-
return "budget";
|
|
585
|
-
}
|
|
586
|
-
}
|
|
587
|
-
exitReason = null;
|
|
588
|
-
return "continue";
|
|
589
|
-
},
|
|
590
|
-
};
|
|
591
|
-
}
|
|
592
|
-
const history = await fn(messages, onEvent, wrapped);
|
|
593
|
-
return { history, exitReason };
|
|
594
|
-
};
|
|
595
|
-
};
|
|
596
|
-
|
|
597
438
|
function makeCtx(
|
|
598
439
|
overrides?: Partial<AgentLoopConversationContext> & {
|
|
599
|
-
|
|
440
|
+
providerResponses?: ScriptedResponse[];
|
|
441
|
+
loopProvider?: Provider;
|
|
442
|
+
loopTools?: ToolDefinition[];
|
|
443
|
+
toolExecutor?: LoopToolExecutor;
|
|
600
444
|
},
|
|
601
445
|
): AgentLoopConversationContext {
|
|
602
|
-
const
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
446
|
+
const {
|
|
447
|
+
providerResponses,
|
|
448
|
+
loopProvider,
|
|
449
|
+
loopTools,
|
|
450
|
+
toolExecutor,
|
|
451
|
+
...ctxOverrides
|
|
452
|
+
} = overrides ?? {};
|
|
453
|
+
const conversationId = ctxOverrides.conversationId ?? "test-conv";
|
|
454
|
+
|
|
455
|
+
// Drive the real `AgentLoop` against a scripted provider, mocking only the
|
|
456
|
+
// provider HTTP boundary. The loop owns its mid-loop budget gate, inline
|
|
457
|
+
// compaction, and event emission, so these overflow tests exercise the real
|
|
458
|
+
// escalation/persistence path.
|
|
459
|
+
const loopProviderName =
|
|
460
|
+
(ctxOverrides.provider as { name?: string } | undefined)?.name ??
|
|
461
|
+
"mock-provider";
|
|
462
|
+
const provider =
|
|
463
|
+
loopProvider ??
|
|
464
|
+
createMockProvider(
|
|
465
|
+
providerResponses ?? [textResponse("response")],
|
|
466
|
+
loopProviderName,
|
|
467
|
+
).provider;
|
|
468
|
+
const agentLoop = new AgentLoop(provider, "system prompt", {
|
|
469
|
+
conversationId,
|
|
470
|
+
tools: loopTools ?? [],
|
|
471
|
+
toolExecutor,
|
|
472
|
+
});
|
|
613
473
|
|
|
614
474
|
return {
|
|
615
475
|
conversationId: "test-conv",
|
|
@@ -617,19 +477,16 @@ function makeCtx(
|
|
|
617
477
|
{ role: "user", content: [{ type: "text", text: "Hello" }] },
|
|
618
478
|
] as Message[],
|
|
619
479
|
processing: true,
|
|
480
|
+
isProcessing(this: { processing: boolean }) {
|
|
481
|
+
return this.processing;
|
|
482
|
+
},
|
|
483
|
+
setProcessing(this: { processing: boolean }, value: boolean) {
|
|
484
|
+
this.processing = value;
|
|
485
|
+
},
|
|
620
486
|
abortController: new AbortController(),
|
|
621
487
|
currentRequestId: "test-req",
|
|
622
488
|
|
|
623
|
-
agentLoop
|
|
624
|
-
run: asAgentLoopRun(agentLoopRun, compactionCircuit),
|
|
625
|
-
getToolTokenBudget: () => 0,
|
|
626
|
-
getResolvedTools: () => [],
|
|
627
|
-
// Tests in this file don't exercise calibration, so returning
|
|
628
|
-
// undefined is fine — the estimator falls back to the per-provider
|
|
629
|
-
// aggregate key.
|
|
630
|
-
getActiveModel: () => undefined,
|
|
631
|
-
compactionCircuit,
|
|
632
|
-
} as unknown as AgentLoopConversationContext["agentLoop"],
|
|
489
|
+
agentLoop,
|
|
633
490
|
provider: {
|
|
634
491
|
name: "mock-provider",
|
|
635
492
|
sendMessage: async () => ({
|
|
@@ -658,8 +515,6 @@ function makeCtx(
|
|
|
658
515
|
currentTurnSurfaces: [],
|
|
659
516
|
|
|
660
517
|
workingDir: "/tmp",
|
|
661
|
-
workspaceTopLevelContext: null,
|
|
662
|
-
workspaceTopLevelDirty: false,
|
|
663
518
|
channelCapabilities: undefined,
|
|
664
519
|
commandIntent: undefined,
|
|
665
520
|
trustContext: undefined,
|
|
@@ -696,7 +551,6 @@ function makeCtx(
|
|
|
696
551
|
getWorkspaceGitService: () => ({ ensureInitialized: async () => {} }),
|
|
697
552
|
commitTurnChanges: async () => {},
|
|
698
553
|
|
|
699
|
-
refreshWorkspaceTopLevelContextIfNeeded: () => {},
|
|
700
554
|
markWorkspaceTopLevelDirty: () => {},
|
|
701
555
|
emitActivityState: () => {},
|
|
702
556
|
getQueueDepth: () => 0,
|
|
@@ -722,9 +576,10 @@ function makeCtx(
|
|
|
722
576
|
injectedTokens: 0,
|
|
723
577
|
}),
|
|
724
578
|
retrackCachedNodes: () => {},
|
|
579
|
+
recordPkbQueryVectors: () => {},
|
|
725
580
|
} as unknown as AgentLoopConversationContext["graphMemory"],
|
|
726
581
|
|
|
727
|
-
...
|
|
582
|
+
...ctxOverrides,
|
|
728
583
|
} as AgentLoopConversationContext;
|
|
729
584
|
}
|
|
730
585
|
|
|
@@ -793,15 +648,15 @@ beforeEach(() => {
|
|
|
793
648
|
recordUsageMock.mockClear();
|
|
794
649
|
setAgentLoopExitReasonOnLatestLogMock.mockClear();
|
|
795
650
|
addMessageMock.mockClear();
|
|
796
|
-
// Reset the plugin registry and re-register every default so the
|
|
797
|
-
//
|
|
798
|
-
//
|
|
799
|
-
// (`reduceContextOverflow`, `syncMessageToDisk`, …) these tests install.
|
|
651
|
+
// Reset the plugin registry and re-register every default so the compaction
|
|
652
|
+
// pipeline dispatches to the default middleware, which in turn hits the
|
|
653
|
+
// mocked collaborators (`syncMessageToDisk`, …) these tests install.
|
|
800
654
|
resetPluginRegistryAndRegisterDefaults();
|
|
801
655
|
});
|
|
802
656
|
|
|
803
657
|
describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
804
658
|
test("usage update context max follows active main-agent profile budget", async () => {
|
|
659
|
+
// GIVEN an active main-agent profile that narrows the context budget
|
|
805
660
|
mockLlmConfig = {
|
|
806
661
|
...structuredClone(defaultLlmConfig),
|
|
807
662
|
activeProfile: "short-context",
|
|
@@ -813,27 +668,22 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
813
668
|
},
|
|
814
669
|
};
|
|
815
670
|
|
|
671
|
+
// AND a provider turn that reports 12k input tokens of usage
|
|
816
672
|
const ctx = makeCtx({
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
type: "
|
|
820
|
-
inputTokens: 12_000,
|
|
821
|
-
outputTokens: 300,
|
|
673
|
+
providerResponses: [
|
|
674
|
+
{
|
|
675
|
+
content: [{ type: "text", text: "response" }],
|
|
822
676
|
model: "mock-model",
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
{
|
|
828
|
-
role: "assistant" as const,
|
|
829
|
-
content: [{ type: "text" as const, text: "response" }],
|
|
830
|
-
},
|
|
831
|
-
];
|
|
832
|
-
},
|
|
677
|
+
usage: { inputTokens: 12_000, outputTokens: 300 },
|
|
678
|
+
stopReason: "end_turn",
|
|
679
|
+
},
|
|
680
|
+
],
|
|
833
681
|
});
|
|
834
682
|
|
|
683
|
+
// WHEN the turn runs to completion
|
|
835
684
|
await runAgentLoopImpl(ctx, "hello", "msg-1", () => {});
|
|
836
685
|
|
|
686
|
+
// THEN the recorded main-agent usage carries the profile's max budget
|
|
837
687
|
const mainAgentUsageCall = recordUsageMock.mock.calls.find(
|
|
838
688
|
(call) => call[5] === "main_agent",
|
|
839
689
|
);
|
|
@@ -846,10 +696,9 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
846
696
|
|
|
847
697
|
// ── Test 1 ────────────────────────────────────────────────────────
|
|
848
698
|
// BUG: When the agent loop makes progress (adds messages to history)
|
|
849
|
-
// before hitting context_too_large, the convergence loop
|
|
850
|
-
//
|
|
851
|
-
//
|
|
852
|
-
// invoked — the error is surfaced immediately at line 1163-1175
|
|
699
|
+
// before hitting context_too_large, the convergence loop's progress
|
|
700
|
+
// check must recognize that the loop appended messages. If it fails to,
|
|
701
|
+
// the reducer is never invoked — the error is surfaced immediately
|
|
853
702
|
// without any compaction attempt.
|
|
854
703
|
//
|
|
855
704
|
// Expected behavior (PR 2 fix): After progress + context_too_large,
|
|
@@ -889,125 +738,31 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
889
738
|
};
|
|
890
739
|
};
|
|
891
740
|
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
// then hits context_too_large on next LLM call
|
|
901
|
-
const progressMessages: Message[] = [
|
|
902
|
-
...messages,
|
|
903
|
-
{
|
|
904
|
-
role: "assistant" as const,
|
|
905
|
-
content: [
|
|
906
|
-
{ type: "text", text: "Let me check that." },
|
|
907
|
-
{
|
|
908
|
-
type: "tool_use",
|
|
909
|
-
id: "tu-progress",
|
|
910
|
-
name: "bash",
|
|
911
|
-
input: { command: "ls" },
|
|
912
|
-
},
|
|
913
|
-
] as ContentBlock[],
|
|
914
|
-
},
|
|
915
|
-
{
|
|
916
|
-
role: "user" as const,
|
|
917
|
-
content: [
|
|
918
|
-
{
|
|
919
|
-
type: "tool_result",
|
|
920
|
-
tool_use_id: "tu-progress",
|
|
921
|
-
content: "file1.ts\nfile2.ts",
|
|
922
|
-
is_error: false,
|
|
923
|
-
},
|
|
924
|
-
] as ContentBlock[],
|
|
925
|
-
},
|
|
926
|
-
];
|
|
741
|
+
// Run 1 makes progress (a tool turn) then the following provider call
|
|
742
|
+
// rejects with a context_too_large error; after the convergence reducer
|
|
743
|
+
// compacts, the rerun recovers with plain text.
|
|
744
|
+
const { provider } = createMockProvider([
|
|
745
|
+
toolUseResponse("tu-progress", "bash", { command: "ls" }),
|
|
746
|
+
new Error("prompt is too long: 242201 tokens > 200000 maximum"),
|
|
747
|
+
textResponse("recovered after compaction"),
|
|
748
|
+
]);
|
|
927
749
|
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
750
|
+
const ctx = makeCtx({
|
|
751
|
+
loopProvider: provider,
|
|
752
|
+
loopTools: [
|
|
753
|
+
{
|
|
932
754
|
name: "bash",
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
toolUseId: "tu-progress",
|
|
938
|
-
content: "file1.ts\nfile2.ts",
|
|
939
|
-
isError: false,
|
|
940
|
-
});
|
|
941
|
-
onEvent({
|
|
942
|
-
type: "message_complete",
|
|
943
|
-
message: {
|
|
944
|
-
role: "assistant",
|
|
945
|
-
content: [
|
|
946
|
-
{ type: "text", text: "Let me check that." },
|
|
947
|
-
{
|
|
948
|
-
type: "tool_use",
|
|
949
|
-
id: "tu-progress",
|
|
950
|
-
name: "bash",
|
|
951
|
-
input: { command: "ls" },
|
|
952
|
-
},
|
|
953
|
-
],
|
|
755
|
+
description: "Run a shell command",
|
|
756
|
+
input_schema: {
|
|
757
|
+
type: "object",
|
|
758
|
+
properties: { command: { type: "string" } },
|
|
954
759
|
},
|
|
955
|
-
});
|
|
956
|
-
onEvent({
|
|
957
|
-
type: "usage",
|
|
958
|
-
inputTokens: 100,
|
|
959
|
-
outputTokens: 50,
|
|
960
|
-
model: "test-model",
|
|
961
|
-
providerDurationMs: 100,
|
|
962
|
-
});
|
|
963
|
-
|
|
964
|
-
// Then context_too_large error occurs on the *next* LLM call
|
|
965
|
-
onEvent({
|
|
966
|
-
type: "error",
|
|
967
|
-
error: new Error(
|
|
968
|
-
"prompt is too long: 242201 tokens > 200000 maximum",
|
|
969
|
-
),
|
|
970
|
-
});
|
|
971
|
-
onEvent({
|
|
972
|
-
type: "usage",
|
|
973
|
-
inputTokens: 0,
|
|
974
|
-
outputTokens: 0,
|
|
975
|
-
model: "test-model",
|
|
976
|
-
providerDurationMs: 10,
|
|
977
|
-
});
|
|
978
|
-
|
|
979
|
-
// Return the history WITH progress (more messages than input)
|
|
980
|
-
return progressMessages;
|
|
981
|
-
}
|
|
982
|
-
|
|
983
|
-
// Second call (after compaction): succeed
|
|
984
|
-
onEvent({
|
|
985
|
-
type: "message_complete",
|
|
986
|
-
message: {
|
|
987
|
-
role: "assistant",
|
|
988
|
-
content: [{ type: "text", text: "recovered after compaction" }],
|
|
989
|
-
},
|
|
990
|
-
});
|
|
991
|
-
onEvent({
|
|
992
|
-
type: "usage",
|
|
993
|
-
inputTokens: 50,
|
|
994
|
-
outputTokens: 25,
|
|
995
|
-
model: "test-model",
|
|
996
|
-
providerDurationMs: 100,
|
|
997
|
-
});
|
|
998
|
-
return [
|
|
999
|
-
...messages,
|
|
1000
|
-
{
|
|
1001
|
-
role: "assistant" as const,
|
|
1002
|
-
content: [
|
|
1003
|
-
{ type: "text", text: "recovered after compaction" },
|
|
1004
|
-
] as ContentBlock[],
|
|
1005
760
|
},
|
|
1006
|
-
]
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
761
|
+
],
|
|
762
|
+
toolExecutor: async () => ({
|
|
763
|
+
content: "file1.ts\nfile2.ts",
|
|
764
|
+
isError: false,
|
|
765
|
+
}),
|
|
1011
766
|
contextWindowManager: {
|
|
1012
767
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
1013
768
|
maybeCompact: async () => ({ compacted: false }),
|
|
@@ -1036,13 +791,14 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
1036
791
|
// This test should PASS against current code (when no progress is made).
|
|
1037
792
|
test("overflow recovery compacts below limit even when estimation underestimates", async () => {
|
|
1038
793
|
const events: ServerMessage[] = [];
|
|
1039
|
-
let callCount = 0;
|
|
1040
794
|
let reducerCalled = false;
|
|
1041
795
|
|
|
1042
|
-
//
|
|
796
|
+
// GIVEN the estimator reports 185k — under the 190k preflight budget
|
|
797
|
+
// (200k * 0.95), so the turn proceeds to the provider rather than
|
|
798
|
+
// compacting up front.
|
|
1043
799
|
mockEstimateTokens = 185_000;
|
|
1044
800
|
|
|
1045
|
-
//
|
|
801
|
+
// AND the post-run convergence reducer successfully compacts
|
|
1046
802
|
mockReducerStepFn = (msgs: Message[]) => {
|
|
1047
803
|
reducerCalled = true;
|
|
1048
804
|
return {
|
|
@@ -1072,96 +828,46 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
1072
828
|
};
|
|
1073
829
|
};
|
|
1074
830
|
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
// Provider rejects with "prompt is too long: 242201 tokens > 200000"
|
|
1082
|
-
// even though estimator said 185k
|
|
1083
|
-
onEvent({
|
|
1084
|
-
type: "error",
|
|
1085
|
-
error: new Error(
|
|
1086
|
-
"prompt is too long: 242201 tokens > 200000 maximum",
|
|
1087
|
-
),
|
|
1088
|
-
});
|
|
1089
|
-
onEvent({
|
|
1090
|
-
type: "usage",
|
|
1091
|
-
inputTokens: 0,
|
|
1092
|
-
outputTokens: 0,
|
|
1093
|
-
model: "test-model",
|
|
1094
|
-
providerDurationMs: 10,
|
|
1095
|
-
});
|
|
1096
|
-
// No progress — return same messages
|
|
1097
|
-
return messages;
|
|
1098
|
-
}
|
|
1099
|
-
// Second call succeeds
|
|
1100
|
-
onEvent({
|
|
1101
|
-
type: "message_complete",
|
|
1102
|
-
message: {
|
|
1103
|
-
role: "assistant",
|
|
1104
|
-
content: [{ type: "text", text: "recovered" }],
|
|
1105
|
-
},
|
|
1106
|
-
});
|
|
1107
|
-
onEvent({
|
|
1108
|
-
type: "usage",
|
|
1109
|
-
inputTokens: 80_000,
|
|
1110
|
-
outputTokens: 200,
|
|
1111
|
-
model: "test-model",
|
|
1112
|
-
providerDurationMs: 500,
|
|
1113
|
-
});
|
|
1114
|
-
return [
|
|
1115
|
-
...messages,
|
|
1116
|
-
{
|
|
1117
|
-
role: "assistant" as const,
|
|
1118
|
-
content: [{ type: "text", text: "recovered" }] as ContentBlock[],
|
|
1119
|
-
},
|
|
1120
|
-
];
|
|
1121
|
-
};
|
|
831
|
+
// AND a provider that rejects the first call as too long (revealing the
|
|
832
|
+
// real 242k count the estimator missed), then succeeds on the rerun.
|
|
833
|
+
const { provider, calls } = createMockProvider([
|
|
834
|
+
new Error("prompt is too long: 242201 tokens > 200000 maximum"),
|
|
835
|
+
textResponse("recovered"),
|
|
836
|
+
]);
|
|
1122
837
|
|
|
1123
838
|
const ctx = makeCtx({
|
|
1124
|
-
|
|
839
|
+
loopProvider: provider,
|
|
1125
840
|
contextWindowManager: {
|
|
1126
841
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
1127
842
|
maybeCompact: async () => ({ compacted: false }),
|
|
1128
843
|
} as unknown as AgentLoopConversationContext["contextWindowManager"],
|
|
1129
844
|
});
|
|
1130
845
|
|
|
846
|
+
// WHEN the turn runs
|
|
1131
847
|
await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
|
|
1132
848
|
|
|
1133
|
-
//
|
|
849
|
+
// THEN the convergence reducer ran and the rerun recovered without a
|
|
850
|
+
// user-facing conversation_error.
|
|
1134
851
|
expect(reducerCalled).toBe(true);
|
|
1135
|
-
// Should recover without conversation_error
|
|
1136
852
|
const conversationError = events.find(
|
|
1137
853
|
(e) => e.type === "conversation_error",
|
|
1138
854
|
);
|
|
1139
855
|
expect(conversationError).toBeUndefined();
|
|
1140
|
-
expect(
|
|
856
|
+
expect(calls.length).toBe(2);
|
|
1141
857
|
});
|
|
1142
858
|
|
|
1143
859
|
// ── Test 3 ────────────────────────────────────────────────────────
|
|
1144
|
-
//
|
|
1145
|
-
// "242201 tokens > 200000"), the reducer
|
|
1146
|
-
// the actual limit
|
|
1147
|
-
//
|
|
1148
|
-
//
|
|
1149
|
-
//
|
|
1150
|
-
//
|
|
1151
|
-
//
|
|
1152
|
-
// Expected behavior (PR 4 fix): `targetInputTokensOverride` should
|
|
1153
|
-
// be adjusted based on the ratio between estimated and actual tokens.
|
|
1154
|
-
// BUG: The targetTokens passed to the reducer is preflightBudget = 190k.
|
|
1155
|
-
// But when the actual token count is 242k (1.31x the estimate of 185k),
|
|
1156
|
-
// the target should be adjusted downward to account for the estimation
|
|
1157
|
-
// inaccuracy. For example: 190k / 1.31 ≈ 145k.
|
|
1158
|
-
// Planned fix: targetInputTokensOverride should be adjusted based on
|
|
1159
|
-
// the ratio between estimated and actual tokens.
|
|
860
|
+
// When the provider rejection reveals the actual token count (e.g.,
|
|
861
|
+
// "242201 tokens > 200000"), the overflow reducer's `targetTokens`
|
|
862
|
+
// should be a budget below the actual limit, not below the estimator's
|
|
863
|
+
// inaccurate budget. With a preflightBudget of 190k but an actual count
|
|
864
|
+
// of 242k (1.31x the estimate of 185k), the target is adjusted downward
|
|
865
|
+
// based on the observed mismatch (190k / 1.31 ≈ 145k) so the reducer
|
|
866
|
+
// converges toward the real ceiling rather than the optimistic estimate.
|
|
1160
867
|
test.todo(
|
|
1161
868
|
"forced compaction targets a lower budget when estimation has been inaccurate",
|
|
1162
869
|
async () => {
|
|
1163
870
|
const events: ServerMessage[] = [];
|
|
1164
|
-
let callCount = 0;
|
|
1165
871
|
let capturedTargetTokens: number | undefined;
|
|
1166
872
|
|
|
1167
873
|
// Estimator says 185k (below 190k budget = 200k * 0.95)
|
|
@@ -1197,55 +903,16 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
1197
903
|
};
|
|
1198
904
|
};
|
|
1199
905
|
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
onEvent({
|
|
1208
|
-
type: "error",
|
|
1209
|
-
error: new Error(
|
|
1210
|
-
"prompt is too long: 242201 tokens > 200000 maximum",
|
|
1211
|
-
),
|
|
1212
|
-
});
|
|
1213
|
-
onEvent({
|
|
1214
|
-
type: "usage",
|
|
1215
|
-
inputTokens: 0,
|
|
1216
|
-
outputTokens: 0,
|
|
1217
|
-
model: "test-model",
|
|
1218
|
-
providerDurationMs: 10,
|
|
1219
|
-
});
|
|
1220
|
-
// No progress — return same messages
|
|
1221
|
-
return messages;
|
|
1222
|
-
}
|
|
1223
|
-
// Second call succeeds after compaction
|
|
1224
|
-
onEvent({
|
|
1225
|
-
type: "message_complete",
|
|
1226
|
-
message: {
|
|
1227
|
-
role: "assistant",
|
|
1228
|
-
content: [{ type: "text", text: "recovered" }],
|
|
1229
|
-
},
|
|
1230
|
-
});
|
|
1231
|
-
onEvent({
|
|
1232
|
-
type: "usage",
|
|
1233
|
-
inputTokens: 80_000,
|
|
1234
|
-
outputTokens: 200,
|
|
1235
|
-
model: "test-model",
|
|
1236
|
-
providerDurationMs: 500,
|
|
1237
|
-
});
|
|
1238
|
-
return [
|
|
1239
|
-
...messages,
|
|
1240
|
-
{
|
|
1241
|
-
role: "assistant" as const,
|
|
1242
|
-
content: [{ type: "text", text: "recovered" }] as ContentBlock[],
|
|
1243
|
-
},
|
|
1244
|
-
];
|
|
1245
|
-
};
|
|
906
|
+
// The provider rejects the first call with a context_too_large error
|
|
907
|
+
// (actual tokens 242201, far above the 185k estimate); after forced
|
|
908
|
+
// compaction re-targets a lower budget, the rerun recovers with text.
|
|
909
|
+
const { provider, calls } = createMockProvider([
|
|
910
|
+
new Error("prompt is too long: 242201 tokens > 200000 maximum"),
|
|
911
|
+
textResponse("recovered"),
|
|
912
|
+
]);
|
|
1246
913
|
|
|
1247
914
|
const ctx = makeCtx({
|
|
1248
|
-
|
|
915
|
+
loopProvider: provider,
|
|
1249
916
|
contextWindowManager: {
|
|
1250
917
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
1251
918
|
maybeCompact: async () => ({ compacted: false }),
|
|
@@ -1275,7 +942,7 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
1275
942
|
(e) => e.type === "conversation_error",
|
|
1276
943
|
);
|
|
1277
944
|
expect(conversationError).toBeUndefined();
|
|
1278
|
-
expect(
|
|
945
|
+
expect(calls.length).toBe(2);
|
|
1279
946
|
},
|
|
1280
947
|
);
|
|
1281
948
|
|
|
@@ -1289,7 +956,6 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
1289
956
|
async () => {
|
|
1290
957
|
const events: ServerMessage[] = [];
|
|
1291
958
|
const longHistory = buildLongConversation(75);
|
|
1292
|
-
let callCount = 0;
|
|
1293
959
|
let reducerCalled = false;
|
|
1294
960
|
|
|
1295
961
|
// Estimator says ~195k — just above budget so preflight reducer runs
|
|
@@ -1325,38 +991,14 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
1325
991
|
};
|
|
1326
992
|
};
|
|
1327
993
|
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
onEvent({
|
|
1334
|
-
type: "message_complete",
|
|
1335
|
-
message: {
|
|
1336
|
-
role: "assistant",
|
|
1337
|
-
content: [{ type: "text", text: "Here's the analysis..." }],
|
|
1338
|
-
},
|
|
1339
|
-
});
|
|
1340
|
-
onEvent({
|
|
1341
|
-
type: "usage",
|
|
1342
|
-
inputTokens: 50_000,
|
|
1343
|
-
outputTokens: 300,
|
|
1344
|
-
model: "test-model",
|
|
1345
|
-
providerDurationMs: 800,
|
|
1346
|
-
});
|
|
1347
|
-
return [
|
|
1348
|
-
...messages,
|
|
1349
|
-
{
|
|
1350
|
-
role: "assistant" as const,
|
|
1351
|
-
content: [
|
|
1352
|
-
{ type: "text", text: "Here's the analysis..." },
|
|
1353
|
-
] as ContentBlock[],
|
|
1354
|
-
},
|
|
1355
|
-
];
|
|
1356
|
-
};
|
|
994
|
+
// After the preflight reducer compacts the long history under budget,
|
|
995
|
+
// a single provider call completes the turn with plain text.
|
|
996
|
+
const { provider, calls } = createMockProvider([
|
|
997
|
+
textResponse("Here's the analysis..."),
|
|
998
|
+
]);
|
|
1357
999
|
|
|
1358
1000
|
const ctx = makeCtx({
|
|
1359
|
-
|
|
1001
|
+
loopProvider: provider,
|
|
1360
1002
|
messages: longHistory,
|
|
1361
1003
|
contextWindowManager: {
|
|
1362
1004
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
@@ -1371,7 +1013,7 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
1371
1013
|
// Preflight should trigger the reducer since 195k > 190k budget
|
|
1372
1014
|
expect(reducerCalled).toBe(true);
|
|
1373
1015
|
// Should succeed
|
|
1374
|
-
expect(
|
|
1016
|
+
expect(calls.length).toBe(1);
|
|
1375
1017
|
const conversationError = events.find(
|
|
1376
1018
|
(e) => e.type === "conversation_error",
|
|
1377
1019
|
);
|
|
@@ -1415,118 +1057,31 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
1415
1057
|
};
|
|
1416
1058
|
};
|
|
1417
1059
|
|
|
1418
|
-
|
|
1419
|
-
|
|
1420
|
-
|
|
1421
|
-
|
|
1422
|
-
|
|
1423
|
-
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
const progressMessages: Message[] = [
|
|
1427
|
-
...messages,
|
|
1428
|
-
{
|
|
1429
|
-
role: "assistant" as const,
|
|
1430
|
-
content: [
|
|
1431
|
-
{ type: "text", text: "Running analysis..." },
|
|
1432
|
-
{
|
|
1433
|
-
type: "tool_use",
|
|
1434
|
-
id: "tu-1",
|
|
1435
|
-
name: "bash",
|
|
1436
|
-
input: { command: "find . -name '*.ts'" },
|
|
1437
|
-
},
|
|
1438
|
-
] as ContentBlock[],
|
|
1439
|
-
},
|
|
1440
|
-
{
|
|
1441
|
-
role: "user" as const,
|
|
1442
|
-
content: [
|
|
1443
|
-
{
|
|
1444
|
-
type: "tool_result",
|
|
1445
|
-
tool_use_id: "tu-1",
|
|
1446
|
-
content: "file1.ts\nfile2.ts\nfile3.ts",
|
|
1447
|
-
is_error: false,
|
|
1448
|
-
},
|
|
1449
|
-
] as ContentBlock[],
|
|
1450
|
-
},
|
|
1451
|
-
];
|
|
1060
|
+
// Run 1 makes progress (a tool turn) then the following provider call
|
|
1061
|
+
// rejects with context_too_large; after emergency compaction the rerun
|
|
1062
|
+
// recovers with plain text.
|
|
1063
|
+
const { provider } = createMockProvider([
|
|
1064
|
+
toolUseResponse("tu-1", "bash", { command: "find . -name '*.ts'" }),
|
|
1065
|
+
new Error("context_length_exceeded"),
|
|
1066
|
+
textResponse("recovered"),
|
|
1067
|
+
]);
|
|
1452
1068
|
|
|
1453
|
-
|
|
1454
|
-
|
|
1455
|
-
|
|
1069
|
+
const ctx = makeCtx({
|
|
1070
|
+
loopProvider: provider,
|
|
1071
|
+
loopTools: [
|
|
1072
|
+
{
|
|
1456
1073
|
name: "bash",
|
|
1457
|
-
|
|
1458
|
-
|
|
1459
|
-
|
|
1460
|
-
|
|
1461
|
-
toolUseId: "tu-1",
|
|
1462
|
-
content: "file1.ts\nfile2.ts\nfile3.ts",
|
|
1463
|
-
isError: false,
|
|
1464
|
-
});
|
|
1465
|
-
onEvent({
|
|
1466
|
-
type: "message_complete",
|
|
1467
|
-
message: {
|
|
1468
|
-
role: "assistant",
|
|
1469
|
-
content: [
|
|
1470
|
-
{ type: "text", text: "Running analysis..." },
|
|
1471
|
-
{
|
|
1472
|
-
type: "tool_use",
|
|
1473
|
-
id: "tu-1",
|
|
1474
|
-
name: "bash",
|
|
1475
|
-
input: { command: "find . -name '*.ts'" },
|
|
1476
|
-
},
|
|
1477
|
-
],
|
|
1074
|
+
description: "Run a shell command",
|
|
1075
|
+
input_schema: {
|
|
1076
|
+
type: "object",
|
|
1077
|
+
properties: { command: { type: "string" } },
|
|
1478
1078
|
},
|
|
1479
|
-
});
|
|
1480
|
-
onEvent({
|
|
1481
|
-
type: "usage",
|
|
1482
|
-
inputTokens: 190_000,
|
|
1483
|
-
outputTokens: 100,
|
|
1484
|
-
model: "test-model",
|
|
1485
|
-
providerDurationMs: 200,
|
|
1486
|
-
});
|
|
1487
|
-
|
|
1488
|
-
// Then context_too_large on the next LLM call within the loop
|
|
1489
|
-
onEvent({
|
|
1490
|
-
type: "error",
|
|
1491
|
-
error: new Error("context_length_exceeded"),
|
|
1492
|
-
});
|
|
1493
|
-
onEvent({
|
|
1494
|
-
type: "usage",
|
|
1495
|
-
inputTokens: 0,
|
|
1496
|
-
outputTokens: 0,
|
|
1497
|
-
model: "test-model",
|
|
1498
|
-
providerDurationMs: 10,
|
|
1499
|
-
});
|
|
1500
|
-
|
|
1501
|
-
return progressMessages;
|
|
1502
|
-
}
|
|
1503
|
-
|
|
1504
|
-
// After emergency compaction, succeed
|
|
1505
|
-
onEvent({
|
|
1506
|
-
type: "message_complete",
|
|
1507
|
-
message: {
|
|
1508
|
-
role: "assistant",
|
|
1509
|
-
content: [{ type: "text", text: "recovered" }],
|
|
1510
1079
|
},
|
|
1511
|
-
|
|
1512
|
-
|
|
1513
|
-
|
|
1514
|
-
|
|
1515
|
-
|
|
1516
|
-
model: "test-model",
|
|
1517
|
-
providerDurationMs: 200,
|
|
1518
|
-
});
|
|
1519
|
-
return [
|
|
1520
|
-
...messages,
|
|
1521
|
-
{
|
|
1522
|
-
role: "assistant" as const,
|
|
1523
|
-
content: [{ type: "text", text: "recovered" }] as ContentBlock[],
|
|
1524
|
-
},
|
|
1525
|
-
];
|
|
1526
|
-
};
|
|
1527
|
-
|
|
1528
|
-
const ctx = makeCtx({
|
|
1529
|
-
agentLoopRun,
|
|
1080
|
+
],
|
|
1081
|
+
toolExecutor: async () => ({
|
|
1082
|
+
content: "file1.ts\nfile2.ts\nfile3.ts",
|
|
1083
|
+
isError: false,
|
|
1084
|
+
}),
|
|
1530
1085
|
contextWindowManager: {
|
|
1531
1086
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
1532
1087
|
maybeCompact: async (
|
|
@@ -1603,111 +1158,30 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
1603
1158
|
return 170_000;
|
|
1604
1159
|
};
|
|
1605
1160
|
|
|
1606
|
-
|
|
1607
|
-
|
|
1608
|
-
|
|
1609
|
-
|
|
1610
|
-
|
|
1611
|
-
|
|
1612
|
-
|
|
1613
|
-
if (agentLoopCallCount === 1) {
|
|
1614
|
-
// Simulate a tool round: assistant calls a tool, results come back
|
|
1615
|
-
const withProgress: Message[] = [
|
|
1616
|
-
...messages,
|
|
1617
|
-
{
|
|
1618
|
-
role: "assistant" as const,
|
|
1619
|
-
content: [
|
|
1620
|
-
{ type: "text", text: "Let me check." },
|
|
1621
|
-
{
|
|
1622
|
-
type: "tool_use",
|
|
1623
|
-
id: "tu-1",
|
|
1624
|
-
name: "bash",
|
|
1625
|
-
input: { command: "ls" },
|
|
1626
|
-
},
|
|
1627
|
-
] as ContentBlock[],
|
|
1628
|
-
},
|
|
1629
|
-
{
|
|
1630
|
-
role: "user" as const,
|
|
1631
|
-
content: [
|
|
1632
|
-
{
|
|
1633
|
-
type: "tool_result",
|
|
1634
|
-
tool_use_id: "tu-1",
|
|
1635
|
-
content: "file1.ts\nfile2.ts",
|
|
1636
|
-
is_error: false,
|
|
1637
|
-
},
|
|
1638
|
-
] as ContentBlock[],
|
|
1639
|
-
},
|
|
1640
|
-
];
|
|
1641
|
-
|
|
1642
|
-
onEvent({
|
|
1643
|
-
type: "message_complete",
|
|
1644
|
-
message: {
|
|
1645
|
-
role: "assistant",
|
|
1646
|
-
content: [
|
|
1647
|
-
{ type: "text", text: "Let me check." },
|
|
1648
|
-
{
|
|
1649
|
-
type: "tool_use",
|
|
1650
|
-
id: "tu-1",
|
|
1651
|
-
name: "bash",
|
|
1652
|
-
input: { command: "ls" },
|
|
1653
|
-
},
|
|
1654
|
-
],
|
|
1655
|
-
},
|
|
1656
|
-
});
|
|
1657
|
-
onEvent({
|
|
1658
|
-
type: "usage",
|
|
1659
|
-
inputTokens: 100,
|
|
1660
|
-
outputTokens: 50,
|
|
1661
|
-
model: "test-model",
|
|
1662
|
-
providerDurationMs: 100,
|
|
1663
|
-
});
|
|
1664
|
-
|
|
1665
|
-
// Call onCheckpoint — this should trigger the mid-loop budget check
|
|
1666
|
-
// which sees 170_000 > 161_500 and returns "yield"
|
|
1667
|
-
if (options?.onCheckpoint) {
|
|
1668
|
-
const decision = await options.onCheckpoint({
|
|
1669
|
-
turnIndex: 0,
|
|
1670
|
-
toolCount: 1,
|
|
1671
|
-
hasToolUse: true,
|
|
1672
|
-
history: withProgress,
|
|
1673
|
-
});
|
|
1674
|
-
if (decision !== "continue") {
|
|
1675
|
-
// Agent loop stops when checkpoint yields
|
|
1676
|
-
return withProgress;
|
|
1677
|
-
}
|
|
1678
|
-
}
|
|
1679
|
-
|
|
1680
|
-
return withProgress;
|
|
1681
|
-
}
|
|
1161
|
+
// A tool round trips the mid-loop budget gate (170k > 161_500); the
|
|
1162
|
+
// gate compacts in place (productive) and the loop continues, so the
|
|
1163
|
+
// post-compaction provider call completes the turn with plain text.
|
|
1164
|
+
const { provider, calls } = createMockProvider([
|
|
1165
|
+
toolUseResponse("tu-1", "bash", { command: "ls" }),
|
|
1166
|
+
textResponse("done after compaction"),
|
|
1167
|
+
]);
|
|
1682
1168
|
|
|
1683
|
-
|
|
1684
|
-
|
|
1685
|
-
|
|
1686
|
-
message: {
|
|
1687
|
-
role: "assistant",
|
|
1688
|
-
content: [{ type: "text", text: "done after compaction" }],
|
|
1689
|
-
},
|
|
1690
|
-
});
|
|
1691
|
-
onEvent({
|
|
1692
|
-
type: "usage",
|
|
1693
|
-
inputTokens: 50,
|
|
1694
|
-
outputTokens: 25,
|
|
1695
|
-
model: "test-model",
|
|
1696
|
-
providerDurationMs: 100,
|
|
1697
|
-
});
|
|
1698
|
-
return [
|
|
1699
|
-
...messages,
|
|
1169
|
+
const ctx = makeCtx({
|
|
1170
|
+
loopProvider: provider,
|
|
1171
|
+
loopTools: [
|
|
1700
1172
|
{
|
|
1701
|
-
|
|
1702
|
-
|
|
1703
|
-
|
|
1704
|
-
|
|
1173
|
+
name: "bash",
|
|
1174
|
+
description: "Run a shell command",
|
|
1175
|
+
input_schema: {
|
|
1176
|
+
type: "object",
|
|
1177
|
+
properties: { command: { type: "string" } },
|
|
1178
|
+
},
|
|
1705
1179
|
},
|
|
1706
|
-
]
|
|
1707
|
-
|
|
1708
|
-
|
|
1709
|
-
|
|
1710
|
-
|
|
1180
|
+
],
|
|
1181
|
+
toolExecutor: async () => ({
|
|
1182
|
+
content: "file1.ts\nfile2.ts",
|
|
1183
|
+
isError: false,
|
|
1184
|
+
}),
|
|
1711
1185
|
contextWindowManager: {
|
|
1712
1186
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
1713
1187
|
maybeCompact: async () => {
|
|
@@ -1741,8 +1215,9 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
1741
1215
|
// The mid-loop budget check should have triggered compaction
|
|
1742
1216
|
expect(compactionCalled).toBe(true);
|
|
1743
1217
|
|
|
1744
|
-
//
|
|
1745
|
-
|
|
1218
|
+
// Provider called twice: the tool turn that tripped the gate, then the
|
|
1219
|
+
// post-compaction turn that completed the run.
|
|
1220
|
+
expect(calls.length).toBe(2);
|
|
1746
1221
|
|
|
1747
1222
|
// No conversation_error should be emitted
|
|
1748
1223
|
const conversationError = events.find(
|
|
@@ -1783,104 +1258,36 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
1783
1258
|
return 175_000;
|
|
1784
1259
|
};
|
|
1785
1260
|
|
|
1786
|
-
let agentLoopCallCount = 0;
|
|
1787
1261
|
let contextTooLargeEmitted = false;
|
|
1788
1262
|
|
|
1789
|
-
|
|
1790
|
-
|
|
1791
|
-
|
|
1792
|
-
|
|
1793
|
-
|
|
1794
|
-
|
|
1795
|
-
|
|
1796
|
-
|
|
1797
|
-
|
|
1798
|
-
|
|
1799
|
-
|
|
1800
|
-
const toolId = `tu-${i}`;
|
|
1801
|
-
const assistantMsg: Message = {
|
|
1802
|
-
role: "assistant" as const,
|
|
1803
|
-
content: [
|
|
1804
|
-
{ type: "text", text: `Step ${i}` },
|
|
1805
|
-
{
|
|
1806
|
-
type: "tool_use",
|
|
1807
|
-
id: toolId,
|
|
1808
|
-
name: "bash",
|
|
1809
|
-
input: { command: `cmd-${i}` },
|
|
1810
|
-
},
|
|
1811
|
-
] as ContentBlock[],
|
|
1812
|
-
};
|
|
1813
|
-
const resultMsg: Message = {
|
|
1814
|
-
role: "user" as const,
|
|
1815
|
-
content: [
|
|
1816
|
-
{
|
|
1817
|
-
type: "tool_result",
|
|
1818
|
-
tool_use_id: toolId,
|
|
1819
|
-
content: "x".repeat(10_000),
|
|
1820
|
-
is_error: false,
|
|
1821
|
-
},
|
|
1822
|
-
] as ContentBlock[],
|
|
1823
|
-
};
|
|
1824
|
-
currentHistory.push(assistantMsg, resultMsg);
|
|
1825
|
-
|
|
1826
|
-
onEvent({
|
|
1827
|
-
type: "message_complete",
|
|
1828
|
-
message: assistantMsg,
|
|
1829
|
-
});
|
|
1830
|
-
onEvent({
|
|
1831
|
-
type: "usage",
|
|
1832
|
-
inputTokens: 50_000 + i * 20_000,
|
|
1833
|
-
outputTokens: 50,
|
|
1834
|
-
model: "test-model",
|
|
1835
|
-
providerDurationMs: 100,
|
|
1836
|
-
});
|
|
1837
|
-
|
|
1838
|
-
if (options?.onCheckpoint) {
|
|
1839
|
-
const decision = await options.onCheckpoint({
|
|
1840
|
-
turnIndex: i,
|
|
1841
|
-
toolCount: 1,
|
|
1842
|
-
hasToolUse: true,
|
|
1843
|
-
history: currentHistory,
|
|
1844
|
-
});
|
|
1845
|
-
if (decision !== "continue") {
|
|
1846
|
-
return currentHistory;
|
|
1847
|
-
}
|
|
1848
|
-
}
|
|
1849
|
-
}
|
|
1263
|
+
// Each tool round produces a large result; the estimate grows with each
|
|
1264
|
+
// checkpoint until tool round 3 trips the mid-loop gate (175k > 161_500).
|
|
1265
|
+
// Compaction runs in place (productive) and the loop continues, so the
|
|
1266
|
+
// following plain-text provider call completes the turn. The provider
|
|
1267
|
+
// never rejects with context_too_large.
|
|
1268
|
+
const { provider, calls } = createMockProvider([
|
|
1269
|
+
toolUseResponse("tu-0", "bash", { command: "cmd-0" }),
|
|
1270
|
+
toolUseResponse("tu-1", "bash", { command: "cmd-1" }),
|
|
1271
|
+
toolUseResponse("tu-2", "bash", { command: "cmd-2" }),
|
|
1272
|
+
textResponse("completed after mid-loop compaction"),
|
|
1273
|
+
]);
|
|
1850
1274
|
|
|
1851
|
-
|
|
1852
|
-
|
|
1853
|
-
|
|
1854
|
-
// Second call (after compaction): complete
|
|
1855
|
-
onEvent({
|
|
1856
|
-
type: "message_complete",
|
|
1857
|
-
message: {
|
|
1858
|
-
role: "assistant",
|
|
1859
|
-
content: [
|
|
1860
|
-
{ type: "text", text: "completed after mid-loop compaction" },
|
|
1861
|
-
],
|
|
1862
|
-
},
|
|
1863
|
-
});
|
|
1864
|
-
onEvent({
|
|
1865
|
-
type: "usage",
|
|
1866
|
-
inputTokens: 60_000,
|
|
1867
|
-
outputTokens: 100,
|
|
1868
|
-
model: "test-model",
|
|
1869
|
-
providerDurationMs: 200,
|
|
1870
|
-
});
|
|
1871
|
-
return [
|
|
1872
|
-
...messages,
|
|
1275
|
+
const ctx = makeCtx({
|
|
1276
|
+
loopProvider: provider,
|
|
1277
|
+
loopTools: [
|
|
1873
1278
|
{
|
|
1874
|
-
|
|
1875
|
-
|
|
1876
|
-
|
|
1877
|
-
|
|
1279
|
+
name: "bash",
|
|
1280
|
+
description: "Run a shell command",
|
|
1281
|
+
input_schema: {
|
|
1282
|
+
type: "object",
|
|
1283
|
+
properties: { command: { type: "string" } },
|
|
1284
|
+
},
|
|
1878
1285
|
},
|
|
1879
|
-
]
|
|
1880
|
-
|
|
1881
|
-
|
|
1882
|
-
|
|
1883
|
-
|
|
1286
|
+
],
|
|
1287
|
+
toolExecutor: async () => ({
|
|
1288
|
+
content: "x".repeat(10_000),
|
|
1289
|
+
isError: false,
|
|
1290
|
+
}),
|
|
1884
1291
|
contextWindowManager: {
|
|
1885
1292
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
1886
1293
|
maybeCompact: async () => {
|
|
@@ -1927,8 +1334,9 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
1927
1334
|
// The provider should NEVER have rejected with context_too_large
|
|
1928
1335
|
expect(contextTooLargeEmitted).toBe(false);
|
|
1929
1336
|
|
|
1930
|
-
//
|
|
1931
|
-
|
|
1337
|
+
// Provider called four times: three tool rounds (the third trips the
|
|
1338
|
+
// mid-loop gate) plus the post-compaction text turn that completes.
|
|
1339
|
+
expect(calls.length).toBe(4);
|
|
1932
1340
|
|
|
1933
1341
|
// No conversation_error
|
|
1934
1342
|
const conversationError = events.find(
|
|
@@ -1957,82 +1365,7 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
1957
1365
|
return 170_000;
|
|
1958
1366
|
};
|
|
1959
1367
|
|
|
1960
|
-
|
|
1961
|
-
const agentLoopRun: AgentLoopRun = async (messages, onEvent, options) => {
|
|
1962
|
-
// Prime the assistant row anchor — production code emits this from
|
|
1963
|
-
// `AgentLoop.run` just before `provider.sendMessage`.
|
|
1964
|
-
await onEvent({ type: "llm_call_started" });
|
|
1965
|
-
agentLoopCallCount++;
|
|
1966
|
-
|
|
1967
|
-
// Every call: simulate tool progress then yield at checkpoint
|
|
1968
|
-
const withProgress: Message[] = [
|
|
1969
|
-
...messages,
|
|
1970
|
-
{
|
|
1971
|
-
role: "assistant" as const,
|
|
1972
|
-
content: [
|
|
1973
|
-
{ type: "text", text: `Tool call ${agentLoopCallCount}` },
|
|
1974
|
-
{
|
|
1975
|
-
type: "tool_use",
|
|
1976
|
-
id: `tu-${agentLoopCallCount}`,
|
|
1977
|
-
name: "bash",
|
|
1978
|
-
input: { command: "ls" },
|
|
1979
|
-
},
|
|
1980
|
-
] as ContentBlock[],
|
|
1981
|
-
},
|
|
1982
|
-
{
|
|
1983
|
-
role: "user" as const,
|
|
1984
|
-
content: [
|
|
1985
|
-
{
|
|
1986
|
-
type: "tool_result",
|
|
1987
|
-
tool_use_id: `tu-${agentLoopCallCount}`,
|
|
1988
|
-
content: "output",
|
|
1989
|
-
is_error: false,
|
|
1990
|
-
},
|
|
1991
|
-
] as ContentBlock[],
|
|
1992
|
-
},
|
|
1993
|
-
];
|
|
1994
|
-
|
|
1995
|
-
onEvent({
|
|
1996
|
-
type: "message_complete",
|
|
1997
|
-
message: {
|
|
1998
|
-
role: "assistant",
|
|
1999
|
-
content: [
|
|
2000
|
-
{ type: "text", text: `Tool call ${agentLoopCallCount}` },
|
|
2001
|
-
{
|
|
2002
|
-
type: "tool_use",
|
|
2003
|
-
id: `tu-${agentLoopCallCount}`,
|
|
2004
|
-
name: "bash",
|
|
2005
|
-
input: { command: "ls" },
|
|
2006
|
-
},
|
|
2007
|
-
],
|
|
2008
|
-
},
|
|
2009
|
-
});
|
|
2010
|
-
onEvent({
|
|
2011
|
-
type: "usage",
|
|
2012
|
-
inputTokens: 100,
|
|
2013
|
-
outputTokens: 50,
|
|
2014
|
-
model: "test-model",
|
|
2015
|
-
providerDurationMs: 100,
|
|
2016
|
-
});
|
|
2017
|
-
|
|
2018
|
-
// Always yield at checkpoint — simulates compaction not helping
|
|
2019
|
-
if (options?.onCheckpoint) {
|
|
2020
|
-
const decision = await options.onCheckpoint({
|
|
2021
|
-
turnIndex: 0,
|
|
2022
|
-
toolCount: 1,
|
|
2023
|
-
hasToolUse: true,
|
|
2024
|
-
history: withProgress,
|
|
2025
|
-
});
|
|
2026
|
-
if (decision !== "continue") {
|
|
2027
|
-
return withProgress;
|
|
2028
|
-
}
|
|
2029
|
-
}
|
|
2030
|
-
|
|
2031
|
-
return withProgress;
|
|
2032
|
-
};
|
|
2033
|
-
|
|
2034
|
-
let compactionCallCount = 0;
|
|
2035
|
-
// Convergence reducer: reduce tokens enough to succeed
|
|
1368
|
+
// The convergence reducer reduces tokens enough for the rerun to recover.
|
|
2036
1369
|
let convergenceReducerCalled = false;
|
|
2037
1370
|
mockReducerStepFn = (msgs: Message[]) => {
|
|
2038
1371
|
convergenceReducerCalled = true;
|
|
@@ -2048,8 +1381,30 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
2048
1381
|
};
|
|
2049
1382
|
};
|
|
2050
1383
|
|
|
1384
|
+
// Every provider call returns a tool_use, so each loop run does a tool
|
|
1385
|
+
// turn that trips the mid-loop budget gate. On the initial run the gate
|
|
1386
|
+
// calls compaction (which surfaces `exhausted: true`); the convergence
|
|
1387
|
+
// rerun runs without a compaction hook and yields "budget" directly.
|
|
1388
|
+
// With the reducer exhausted, the convergence loop terminates with the
|
|
1389
|
+
// turn still over budget and the orchestrator stamps `context_too_large`.
|
|
1390
|
+
const { provider, calls } = createMockProvider([
|
|
1391
|
+
toolUseResponse("tu-1", "bash", { command: "ls" }),
|
|
1392
|
+
]);
|
|
1393
|
+
|
|
1394
|
+
let compactionCallCount = 0;
|
|
2051
1395
|
const ctx = makeCtx({
|
|
2052
|
-
|
|
1396
|
+
loopProvider: provider,
|
|
1397
|
+
loopTools: [
|
|
1398
|
+
{
|
|
1399
|
+
name: "bash",
|
|
1400
|
+
description: "Run a shell command",
|
|
1401
|
+
input_schema: {
|
|
1402
|
+
type: "object",
|
|
1403
|
+
properties: { command: { type: "string" } },
|
|
1404
|
+
},
|
|
1405
|
+
},
|
|
1406
|
+
],
|
|
1407
|
+
toolExecutor: async () => ({ content: "output", isError: false }),
|
|
2053
1408
|
contextWindowManager: {
|
|
2054
1409
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
2055
1410
|
maybeCompact: async () => {
|
|
@@ -2057,9 +1412,9 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
2057
1412
|
// Compaction's internal retry budget is exhausted — the
|
|
2058
1413
|
// compactor itself ran maxAttempts passes and still couldn't
|
|
2059
1414
|
// drop below the auto-threshold. `maybeCompact` surfaces this
|
|
2060
|
-
// via `exhausted: true` so the
|
|
2061
|
-
// straight to the convergence loop
|
|
2062
|
-
// stuck compactor.
|
|
1415
|
+
// via `exhausted: true` so the loop yields "budget" and the
|
|
1416
|
+
// orchestrator escalates straight to the convergence loop
|
|
1417
|
+
// instead of looping on a stuck compactor.
|
|
2063
1418
|
return {
|
|
2064
1419
|
compacted: true,
|
|
2065
1420
|
exhausted: true,
|
|
@@ -2094,10 +1449,10 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
2094
1449
|
// `ContextWindowManager.maybeCompact`.
|
|
2095
1450
|
expect(compactionCallCount).toBe(2);
|
|
2096
1451
|
|
|
2097
|
-
//
|
|
2098
|
-
// mid-loop re-entries because the orchestrator
|
|
2099
|
-
// `exhausted` before re-invoking the
|
|
2100
|
-
expect(
|
|
1452
|
+
// Provider calls: 1 initial tool turn (yields budget) + 1 convergence
|
|
1453
|
+
// rerun that recovers. No mid-loop re-entries because the orchestrator
|
|
1454
|
+
// broke out on `exhausted` before re-invoking the loop.
|
|
1455
|
+
expect(calls.length).toBe(2);
|
|
2101
1456
|
|
|
2102
1457
|
// After the compactor exhausted itself, the convergence loop
|
|
2103
1458
|
// should have been triggered (contextTooLargeDetected set to true)
|
|
@@ -2132,83 +1487,32 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
2132
1487
|
return 170_000;
|
|
2133
1488
|
};
|
|
2134
1489
|
|
|
2135
|
-
// A single tool round reaches one checkpoint; the in-loop budget
|
|
2136
|
-
//
|
|
2137
|
-
// the
|
|
2138
|
-
// orchestrator
|
|
2139
|
-
|
|
2140
|
-
|
|
2141
|
-
|
|
2142
|
-
|
|
2143
|
-
|
|
2144
|
-
const withProgress: Message[] = [
|
|
2145
|
-
...messages,
|
|
2146
|
-
{
|
|
2147
|
-
role: "assistant" as const,
|
|
2148
|
-
content: [
|
|
2149
|
-
{ type: "text", text: `Tool call ${agentLoopCallCount}` },
|
|
2150
|
-
{
|
|
2151
|
-
type: "tool_use",
|
|
2152
|
-
id: `tu-${agentLoopCallCount}`,
|
|
2153
|
-
name: "bash",
|
|
2154
|
-
input: { command: "ls" },
|
|
2155
|
-
},
|
|
2156
|
-
] as ContentBlock[],
|
|
2157
|
-
},
|
|
2158
|
-
{
|
|
2159
|
-
role: "user" as const,
|
|
2160
|
-
content: [
|
|
2161
|
-
{
|
|
2162
|
-
type: "tool_result",
|
|
2163
|
-
tool_use_id: `tu-${agentLoopCallCount}`,
|
|
2164
|
-
content: "output",
|
|
2165
|
-
is_error: false,
|
|
2166
|
-
},
|
|
2167
|
-
] as ContentBlock[],
|
|
2168
|
-
},
|
|
2169
|
-
];
|
|
2170
|
-
|
|
2171
|
-
onEvent({
|
|
2172
|
-
type: "message_complete",
|
|
2173
|
-
message: {
|
|
2174
|
-
role: "assistant",
|
|
2175
|
-
content: [
|
|
2176
|
-
{ type: "text", text: `Tool call ${agentLoopCallCount}` },
|
|
2177
|
-
{
|
|
2178
|
-
type: "tool_use",
|
|
2179
|
-
id: `tu-${agentLoopCallCount}`,
|
|
2180
|
-
name: "bash",
|
|
2181
|
-
input: { command: "ls" },
|
|
2182
|
-
},
|
|
2183
|
-
],
|
|
2184
|
-
},
|
|
2185
|
-
});
|
|
2186
|
-
onEvent({
|
|
2187
|
-
type: "usage",
|
|
2188
|
-
inputTokens: 100,
|
|
2189
|
-
outputTokens: 50,
|
|
2190
|
-
model: "test-model",
|
|
2191
|
-
providerDurationMs: 100,
|
|
2192
|
-
});
|
|
2193
|
-
|
|
2194
|
-
if (options?.onCheckpoint) {
|
|
2195
|
-
await options.onCheckpoint({
|
|
2196
|
-
turnIndex: 0,
|
|
2197
|
-
toolCount: 1,
|
|
2198
|
-
hasToolUse: true,
|
|
2199
|
-
history: withProgress,
|
|
2200
|
-
});
|
|
2201
|
-
}
|
|
2202
|
-
|
|
2203
|
-
return withProgress;
|
|
2204
|
-
};
|
|
1490
|
+
// A single tool round reaches one checkpoint; the in-loop budget gate
|
|
1491
|
+
// trips there and compaction runs in place. The loop continues the run
|
|
1492
|
+
// itself — the following provider call returns plain text and the turn
|
|
1493
|
+
// completes — so the orchestrator never re-enters the convergence loop.
|
|
1494
|
+
const { provider, calls } = createMockProvider([
|
|
1495
|
+
toolUseResponse("tu-1", "bash", { command: "ls" }),
|
|
1496
|
+
textResponse("final answer"),
|
|
1497
|
+
]);
|
|
2205
1498
|
|
|
2206
1499
|
// Compaction reports `estimatedInputTokens` well below the 161_500
|
|
2207
1500
|
// threshold — the "compaction is productive" signal (no `exhausted`
|
|
2208
1501
|
// flag) that lets the loop continue in place.
|
|
2209
1502
|
let compactionCallCount = 0;
|
|
2210
1503
|
const ctx = makeCtx({
|
|
2211
|
-
|
|
1504
|
+
loopProvider: provider,
|
|
1505
|
+
loopTools: [
|
|
1506
|
+
{
|
|
1507
|
+
name: "bash",
|
|
1508
|
+
description: "Run a shell command",
|
|
1509
|
+
input_schema: {
|
|
1510
|
+
type: "object",
|
|
1511
|
+
properties: { command: { type: "string" } },
|
|
1512
|
+
},
|
|
1513
|
+
},
|
|
1514
|
+
],
|
|
1515
|
+
toolExecutor: async () => ({ content: "output", isError: false }),
|
|
2212
1516
|
contextWindowManager: {
|
|
2213
1517
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
2214
1518
|
maybeCompact: async () => {
|
|
@@ -2239,18 +1543,20 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
2239
1543
|
|
|
2240
1544
|
await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
|
|
2241
1545
|
|
|
2242
|
-
// 1 initial auto-compact + 1 productive mid-loop compaction.
|
|
2243
|
-
// loop continues in place after compacting, so the orchestrator
|
|
2244
|
-
// never re-enters `run()` — it is invoked exactly once.
|
|
1546
|
+
// 1 initial auto-compact + 1 productive mid-loop compaction.
|
|
2245
1547
|
expect(compactionCallCount).toBe(2);
|
|
2246
|
-
|
|
1548
|
+
// The loop continued in place after compacting: a tool turn followed by
|
|
1549
|
+
// the post-compaction text turn, both within a single run.
|
|
1550
|
+
expect(calls.length).toBe(2);
|
|
2247
1551
|
|
|
2248
1552
|
// No escalation to the convergence loop because the mid-loop
|
|
2249
|
-
// `maybeCompact` returned productive (no `exhausted` flag)
|
|
1553
|
+
// `maybeCompact` returned productive (no `exhausted` flag), and the turn
|
|
1554
|
+
// completed normally.
|
|
2250
1555
|
expect(setAgentLoopExitReasonOnLatestLogMock).not.toHaveBeenCalledWith(
|
|
2251
1556
|
"test-conv",
|
|
2252
1557
|
"context_too_large",
|
|
2253
1558
|
);
|
|
1559
|
+
expect(events.find((e) => e.type === "conversation_error")).toBeUndefined();
|
|
2254
1560
|
});
|
|
2255
1561
|
|
|
2256
1562
|
// ── Test 9 ────────────────────────────────────────────────────────
|
|
@@ -2272,78 +1578,13 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
2272
1578
|
return 170_000;
|
|
2273
1579
|
};
|
|
2274
1580
|
|
|
2275
|
-
|
|
2276
|
-
|
|
2277
|
-
|
|
2278
|
-
|
|
2279
|
-
|
|
2280
|
-
|
|
2281
|
-
|
|
2282
|
-
const withProgress: Message[] = [
|
|
2283
|
-
...messages,
|
|
2284
|
-
{
|
|
2285
|
-
role: "assistant" as const,
|
|
2286
|
-
content: [
|
|
2287
|
-
{ type: "text", text: `Tool call ${agentLoopCallCount}` },
|
|
2288
|
-
{
|
|
2289
|
-
type: "tool_use",
|
|
2290
|
-
id: `tu-${agentLoopCallCount}`,
|
|
2291
|
-
name: "bash",
|
|
2292
|
-
input: { command: "ls" },
|
|
2293
|
-
},
|
|
2294
|
-
] as ContentBlock[],
|
|
2295
|
-
},
|
|
2296
|
-
{
|
|
2297
|
-
role: "user" as const,
|
|
2298
|
-
content: [
|
|
2299
|
-
{
|
|
2300
|
-
type: "tool_result",
|
|
2301
|
-
tool_use_id: `tu-${agentLoopCallCount}`,
|
|
2302
|
-
content: "output",
|
|
2303
|
-
is_error: false,
|
|
2304
|
-
},
|
|
2305
|
-
] as ContentBlock[],
|
|
2306
|
-
},
|
|
2307
|
-
];
|
|
2308
|
-
|
|
2309
|
-
onEvent({
|
|
2310
|
-
type: "message_complete",
|
|
2311
|
-
message: {
|
|
2312
|
-
role: "assistant",
|
|
2313
|
-
content: [
|
|
2314
|
-
{ type: "text", text: `Tool call ${agentLoopCallCount}` },
|
|
2315
|
-
{
|
|
2316
|
-
type: "tool_use",
|
|
2317
|
-
id: `tu-${agentLoopCallCount}`,
|
|
2318
|
-
name: "bash",
|
|
2319
|
-
input: { command: "ls" },
|
|
2320
|
-
},
|
|
2321
|
-
],
|
|
2322
|
-
},
|
|
2323
|
-
});
|
|
2324
|
-
onEvent({
|
|
2325
|
-
type: "usage",
|
|
2326
|
-
inputTokens: 100,
|
|
2327
|
-
outputTokens: 50,
|
|
2328
|
-
model: "test-model",
|
|
2329
|
-
providerDurationMs: 100,
|
|
2330
|
-
});
|
|
2331
|
-
|
|
2332
|
-
// Always yield at checkpoint — simulates reduction not helping enough
|
|
2333
|
-
if (options?.onCheckpoint) {
|
|
2334
|
-
const decision = await options.onCheckpoint({
|
|
2335
|
-
turnIndex: 0,
|
|
2336
|
-
toolCount: 1,
|
|
2337
|
-
hasToolUse: true,
|
|
2338
|
-
history: withProgress,
|
|
2339
|
-
});
|
|
2340
|
-
if (decision !== "continue") {
|
|
2341
|
-
return withProgress;
|
|
2342
|
-
}
|
|
2343
|
-
}
|
|
2344
|
-
|
|
2345
|
-
return withProgress;
|
|
2346
|
-
};
|
|
1581
|
+
// Every provider call returns a tool_use, so each loop run does a tool
|
|
1582
|
+
// turn that trips the mid-loop budget gate and yields "budget". The
|
|
1583
|
+
// initial run's gate calls compaction (exhausted); the convergence
|
|
1584
|
+
// reruns run without a compaction hook and yield directly.
|
|
1585
|
+
const { provider, calls } = createMockProvider([
|
|
1586
|
+
toolUseResponse("tu-1", "bash", { command: "ls" }),
|
|
1587
|
+
]);
|
|
2347
1588
|
|
|
2348
1589
|
// Convergence reducer: first call returns non-exhausted, second returns exhausted
|
|
2349
1590
|
let reducerCallCount = 0;
|
|
@@ -2375,7 +1616,18 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
2375
1616
|
};
|
|
2376
1617
|
|
|
2377
1618
|
const ctx = makeCtx({
|
|
2378
|
-
|
|
1619
|
+
loopProvider: provider,
|
|
1620
|
+
loopTools: [
|
|
1621
|
+
{
|
|
1622
|
+
name: "bash",
|
|
1623
|
+
description: "Run a shell command",
|
|
1624
|
+
input_schema: {
|
|
1625
|
+
type: "object",
|
|
1626
|
+
properties: { command: { type: "string" } },
|
|
1627
|
+
},
|
|
1628
|
+
},
|
|
1629
|
+
],
|
|
1630
|
+
toolExecutor: async () => ({ content: "output", isError: false }),
|
|
2379
1631
|
contextWindowManager: {
|
|
2380
1632
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
2381
1633
|
// Under the new architecture (Compaction Re-homing Arc, Bullet 1)
|
|
@@ -2413,10 +1665,11 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
2413
1665
|
// once more after yieldedForBudget triggered re-entry
|
|
2414
1666
|
expect(reducerCallCount).toBe(2);
|
|
2415
1667
|
|
|
2416
|
-
//
|
|
2417
|
-
//
|
|
2418
|
-
//
|
|
2419
|
-
|
|
1668
|
+
// Provider calls: 1 initial run + 2 convergence reruns = 3 calls, each a
|
|
1669
|
+
// tool turn that yields "budget". The mid-loop no longer drives
|
|
1670
|
+
// daemon-level retries — the manager owns its retry budget and signals
|
|
1671
|
+
// exhaustion via the `exhausted` flag.
|
|
1672
|
+
expect(calls.length).toBe(3);
|
|
2420
1673
|
expect(setAgentLoopExitReasonOnLatestLogMock).toHaveBeenCalledWith(
|
|
2421
1674
|
"test-conv",
|
|
2422
1675
|
"context_too_large",
|
|
@@ -2516,35 +1769,10 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
2516
1769
|
};
|
|
2517
1770
|
};
|
|
2518
1771
|
|
|
2519
|
-
|
|
2520
|
-
|
|
2521
|
-
// `AgentLoop.run` just before `provider.sendMessage`.
|
|
2522
|
-
await onEvent({ type: "llm_call_started" });
|
|
2523
|
-
onEvent({
|
|
2524
|
-
type: "message_complete",
|
|
2525
|
-
message: {
|
|
2526
|
-
role: "assistant",
|
|
2527
|
-
content: [{ type: "text", text: "done" }],
|
|
2528
|
-
},
|
|
2529
|
-
});
|
|
2530
|
-
onEvent({
|
|
2531
|
-
type: "usage",
|
|
2532
|
-
inputTokens: 170_000,
|
|
2533
|
-
outputTokens: 200,
|
|
2534
|
-
model: "test-model",
|
|
2535
|
-
providerDurationMs: 500,
|
|
2536
|
-
});
|
|
2537
|
-
return [
|
|
2538
|
-
...messages,
|
|
2539
|
-
{
|
|
2540
|
-
role: "assistant" as const,
|
|
2541
|
-
content: [{ type: "text", text: "done" }] as ContentBlock[],
|
|
2542
|
-
},
|
|
2543
|
-
];
|
|
2544
|
-
};
|
|
2545
|
-
|
|
1772
|
+
// The preflight overflow reducer runs in the orchestrator before the loop,
|
|
1773
|
+
// so a single successful provider turn is enough to drive the path.
|
|
2546
1774
|
const ctx = makeCtx({
|
|
2547
|
-
|
|
1775
|
+
providerResponses: [textResponse("done")],
|
|
2548
1776
|
contextWindowManager: {
|
|
2549
1777
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
2550
1778
|
maybeCompact: async () => ({ compacted: false }),
|
|
@@ -2615,78 +1843,12 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
2615
1843
|
// emergency compaction + final agentLoop.run path executes.
|
|
2616
1844
|
mockOverflowAction = "auto_compress_latest_turn";
|
|
2617
1845
|
|
|
2618
|
-
|
|
2619
|
-
|
|
2620
|
-
|
|
2621
|
-
|
|
2622
|
-
|
|
2623
|
-
|
|
2624
|
-
|
|
2625
|
-
const withProgress: Message[] = [
|
|
2626
|
-
...messages,
|
|
2627
|
-
{
|
|
2628
|
-
role: "assistant" as const,
|
|
2629
|
-
content: [
|
|
2630
|
-
{ type: "text", text: `tool call ${agentLoopCallCount}` },
|
|
2631
|
-
{
|
|
2632
|
-
type: "tool_use",
|
|
2633
|
-
id: `tu-${agentLoopCallCount}`,
|
|
2634
|
-
name: "bash",
|
|
2635
|
-
input: { command: "ls" },
|
|
2636
|
-
},
|
|
2637
|
-
] as ContentBlock[],
|
|
2638
|
-
},
|
|
2639
|
-
{
|
|
2640
|
-
role: "user" as const,
|
|
2641
|
-
content: [
|
|
2642
|
-
{
|
|
2643
|
-
type: "tool_result",
|
|
2644
|
-
tool_use_id: `tu-${agentLoopCallCount}`,
|
|
2645
|
-
content: "output",
|
|
2646
|
-
is_error: false,
|
|
2647
|
-
},
|
|
2648
|
-
] as ContentBlock[],
|
|
2649
|
-
},
|
|
2650
|
-
];
|
|
2651
|
-
|
|
2652
|
-
onEvent({
|
|
2653
|
-
type: "message_complete",
|
|
2654
|
-
message: {
|
|
2655
|
-
role: "assistant",
|
|
2656
|
-
content: [
|
|
2657
|
-
{ type: "text", text: `tool call ${agentLoopCallCount}` },
|
|
2658
|
-
{
|
|
2659
|
-
type: "tool_use",
|
|
2660
|
-
id: `tu-${agentLoopCallCount}`,
|
|
2661
|
-
name: "bash",
|
|
2662
|
-
input: { command: "ls" },
|
|
2663
|
-
},
|
|
2664
|
-
],
|
|
2665
|
-
},
|
|
2666
|
-
});
|
|
2667
|
-
onEvent({
|
|
2668
|
-
type: "usage",
|
|
2669
|
-
inputTokens: 100,
|
|
2670
|
-
outputTokens: 50,
|
|
2671
|
-
model: "test-model",
|
|
2672
|
-
providerDurationMs: 100,
|
|
2673
|
-
});
|
|
2674
|
-
|
|
2675
|
-
// Every checkpoint yields — including the final auto_compress rerun.
|
|
2676
|
-
if (options?.onCheckpoint) {
|
|
2677
|
-
const decision = await options.onCheckpoint({
|
|
2678
|
-
turnIndex: 0,
|
|
2679
|
-
toolCount: 1,
|
|
2680
|
-
hasToolUse: true,
|
|
2681
|
-
history: withProgress,
|
|
2682
|
-
});
|
|
2683
|
-
if (decision !== "continue") {
|
|
2684
|
-
return withProgress;
|
|
2685
|
-
}
|
|
2686
|
-
}
|
|
2687
|
-
|
|
2688
|
-
return withProgress;
|
|
2689
|
-
};
|
|
1846
|
+
// Every provider call returns a tool_use, so each loop run does a tool
|
|
1847
|
+
// turn that trips the mid-loop budget gate and yields "budget" —
|
|
1848
|
+
// including the final auto_compress rerun.
|
|
1849
|
+
const { provider } = createMockProvider([
|
|
1850
|
+
toolUseResponse("tu-1", "bash", { command: "ls" }),
|
|
1851
|
+
]);
|
|
2690
1852
|
|
|
2691
1853
|
// `maybeCompact` is invoked through three distinct call sites:
|
|
2692
1854
|
// 1. Start-of-turn compaction (no `force` option) — return a no-op
|
|
@@ -2702,7 +1864,18 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
2702
1864
|
// as BUDGET_YIELD_UNRECOVERED.
|
|
2703
1865
|
let forcedMaybeCompactCallCount = 0;
|
|
2704
1866
|
const ctx = makeCtx({
|
|
2705
|
-
|
|
1867
|
+
loopProvider: provider,
|
|
1868
|
+
loopTools: [
|
|
1869
|
+
{
|
|
1870
|
+
name: "bash",
|
|
1871
|
+
description: "Run a shell command",
|
|
1872
|
+
input_schema: {
|
|
1873
|
+
type: "object",
|
|
1874
|
+
properties: { command: { type: "string" } },
|
|
1875
|
+
},
|
|
1876
|
+
},
|
|
1877
|
+
],
|
|
1878
|
+
toolExecutor: async () => ({ content: "output", isError: false }),
|
|
2706
1879
|
contextWindowManager: {
|
|
2707
1880
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
2708
1881
|
maybeCompact: async (
|