@vellumai/assistant 0.8.7 → 0.8.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile +20 -4
- package/docker-entrypoint.sh +4 -2
- package/docker-init-apt-root.sh +3 -1
- package/docker-kata-apt-env.sh +3 -1
- package/docker-kata-runtime-family.sh +12 -0
- package/docs/architecture/memory.md +1 -1
- package/docs/plugins.md +75 -79
- package/examples/plugins/echo/README.md +6 -12
- package/examples/plugins/echo/register.ts +0 -41
- package/node_modules/@vellumai/skill-host-contracts/src/server-message.ts +3 -3
- package/openapi.yaml +3381 -348
- package/package.json +1 -1
- package/scripts/generate-openapi.ts +68 -41
- package/src/__tests__/agent-loop-exit-reason.test.ts +34 -39
- package/src/__tests__/agent-loop-provider-error-recording.test.ts +1 -1
- package/src/__tests__/agent-loop.test.ts +37 -87
- package/src/__tests__/agent-wake-disk-pressure-callsite.test.ts +2 -0
- package/src/__tests__/annotate-activity-metadata.test.ts +262 -0
- package/src/__tests__/annotate-risk-options.test.ts +2 -3
- package/src/__tests__/anthropic-provider.test.ts +95 -2
- package/src/__tests__/assistant-event-hub.test.ts +25 -0
- package/src/__tests__/assistant-events-sse-shed.test.ts +8 -0
- package/src/__tests__/{conversation-stream-state.test.ts → assistant-stream-state.test.ts} +252 -91
- package/src/__tests__/auth-fallback-events-store.test.ts +116 -0
- package/src/__tests__/background-workers-disk-pressure.test.ts +6 -0
- package/src/__tests__/btw-routes.test.ts +62 -3
- package/src/__tests__/build-persisted-content.test.ts +184 -0
- package/src/__tests__/catalog-files.test.ts +1 -1
- package/src/__tests__/clawhub-files.test.ts +1 -1
- package/src/__tests__/compaction-pipeline.test.ts +1 -1
- package/src/__tests__/compaction.benchmark.test.ts +0 -30
- package/src/__tests__/config-watcher.test.ts +1 -1
- package/src/__tests__/conversation-abort-tool-results.test.ts +57 -19
- package/src/__tests__/conversation-agent-loop-disk-pressure.test.ts +6 -2
- package/src/__tests__/conversation-agent-loop-inference-profile.test.ts +10 -4
- package/src/__tests__/conversation-agent-loop-overflow.test.ts +313 -1136
- package/src/__tests__/conversation-agent-loop.test.ts +596 -1616
- package/src/__tests__/conversation-analysis-routes.test.ts +6 -0
- package/src/__tests__/conversation-history-web-search.test.ts +11 -1
- package/src/__tests__/conversation-pairing.test.ts +4 -31
- package/src/__tests__/conversation-process-app-control-preactivation.test.ts +6 -0
- package/src/__tests__/conversation-provider-retry-repair.test.ts +26 -5
- package/src/__tests__/conversation-queue.test.ts +2 -0
- package/src/__tests__/conversation-routes-disk-view.test.ts +3 -0
- package/src/__tests__/conversation-routes-slash-commands.test.ts +6 -5
- package/src/__tests__/conversation-runtime-assembly.test.ts +170 -229
- package/src/__tests__/conversation-runtime-workspace.test.ts +3 -24
- package/src/__tests__/conversation-slash-commands.test.ts +8 -42
- package/src/__tests__/conversation-slash-queue.test.ts +6 -1
- package/src/__tests__/conversation-surfaces-action-delivery.test.ts +84 -0
- package/src/__tests__/conversation-sync-tags.test.ts +27 -15
- package/src/__tests__/conversation-title-service.test.ts +135 -2
- package/src/__tests__/conversation-workspace-injection.test.ts +6 -1
- package/src/__tests__/cross-provider-web-search.test.ts +214 -1
- package/src/__tests__/db-schedule-syntax-migration.test.ts +5 -0
- package/src/__tests__/dm-persistence.test.ts +5 -1
- package/src/__tests__/empty-response-hook.test.ts +304 -0
- package/src/__tests__/feature-flag-test-helpers.ts +2 -2
- package/src/__tests__/gemini-image-service.test.ts +13 -0
- package/src/__tests__/helpers/mock-provider.ts +110 -0
- package/src/__tests__/helpers/native-web-search-harness.ts +129 -0
- package/src/__tests__/history-repair-hook.test.ts +1 -0
- package/src/__tests__/identity-intro-cache.test.ts +12 -100
- package/src/__tests__/identity-routes.test.ts +248 -7
- package/src/__tests__/inbound-slack-persistence.test.ts +5 -1
- package/src/__tests__/injector-background-turn.test.ts +2 -8
- package/src/__tests__/injector-chain.test.ts +106 -270
- package/src/__tests__/injector-disk-pressure.test.ts +3 -12
- package/src/__tests__/injector-document-comments.test.ts +2 -2
- package/src/__tests__/injector-pkb-v2-silenced.test.ts +30 -22
- package/src/__tests__/injector-v3-suppression.test.ts +31 -37
- package/src/__tests__/internal-telemetry-routes.test.ts +109 -0
- package/src/__tests__/list-messages-page-latest.test.ts +60 -0
- package/src/__tests__/list-messages-tool-merge.test.ts +20 -0
- package/src/__tests__/llm-usage-store.test.ts +223 -1
- package/src/__tests__/memory-retrieval-hook.test.ts +297 -0
- package/src/__tests__/memory-v2-static-injector.test.ts +103 -35
- package/src/__tests__/native-web-search.test.ts +191 -0
- package/src/__tests__/onboarding-template-contract.test.ts +2 -0
- package/src/__tests__/openai-image-service.test.ts +17 -0
- package/src/__tests__/openai-provider.test.ts +31 -1
- package/src/__tests__/persist-unsendable-image.test.ts +215 -0
- package/src/__tests__/persistence-secret-redaction.test.ts +1 -0
- package/src/__tests__/pipeline-runner.test.ts +29 -39
- package/src/__tests__/pkb-autoinject.test.ts +2 -5
- package/src/__tests__/plugin-bootstrap.test.ts +13 -28
- package/src/__tests__/plugin-registry.test.ts +0 -27
- package/src/__tests__/plugin-types.test.ts +2 -125
- package/src/__tests__/process-message-display-content.test.ts +6 -2
- package/src/__tests__/regenerate-fire-and-forget-trace.test.ts +5 -1
- package/src/__tests__/resolve-trust-class.test.ts +4 -4
- package/src/__tests__/runtime-events-sse-reconnect.test.ts +60 -23
- package/src/__tests__/schedule-routes.test.ts +603 -2
- package/src/__tests__/schedule-store.test.ts +41 -0
- package/src/__tests__/schedule-tools.test.ts +35 -0
- package/src/__tests__/server-history-render.test.ts +314 -1
- package/src/__tests__/skillssh-files.test.ts +1 -1
- package/src/__tests__/system-prompt.test.ts +20 -0
- package/src/__tests__/task-scheduler.test.ts +162 -1
- package/src/__tests__/terminal-tools.test.ts +6 -1
- package/src/__tests__/title-generate-hook.test.ts +319 -0
- package/src/__tests__/tool-error-hook.test.ts +278 -0
- package/src/__tests__/tool-preview-lifecycle.test.ts +468 -5
- package/src/__tests__/tool-result-metadata-plumbing.test.ts +1 -0
- package/src/__tests__/tool-result-truncate-hook.test.ts +127 -0
- package/src/__tests__/tool-result-truncation.test.ts +0 -2
- package/src/__tests__/ui-choice-copy-surfaces.test.ts +254 -0
- package/src/__tests__/ui-work-result-surface.test.ts +159 -0
- package/src/__tests__/usage-routes.test.ts +285 -1
- package/src/__tests__/user-plugin-loader.test.ts +2 -2
- package/src/__tests__/voice-session-bridge.test.ts +6 -3
- package/src/__tests__/web-search-backend-failure.test.ts +166 -0
- package/src/agent/loop.ts +346 -442
- package/src/api/events/assistant-thinking-delta.ts +33 -0
- package/src/api/events/tool-output-chunk.ts +45 -0
- package/src/api/events/tool-use-preview-start.ts +32 -0
- package/src/api/events/trace-event.ts +69 -0
- package/src/api/index.ts +48 -13
- package/src/api/responses/conversation-message.ts +368 -0
- package/src/avatar/__tests__/avatar-store.test.ts +34 -29
- package/src/cli/commands/__tests__/notifications.test.ts +58 -14
- package/src/cli/commands/notifications.ts +112 -60
- package/src/config/assistant-feature-flags.ts +22 -11
- package/src/config/bundled-skills/app-builder/SKILL.md +3 -20
- package/src/config/bundled-skills/app-builder/references/examples/README.md +17 -0
- package/src/config/bundled-skills/app-builder/references/examples/expense-tracker.md +515 -0
- package/src/config/bundled-skills/app-builder/references/examples/focus-timer.md +342 -0
- package/src/config/bundled-skills/app-builder/references/examples/habit-tracker.md +490 -0
- package/src/config/bundled-skills/document-editor/SKILL.md +1 -1
- package/src/config/bundled-skills/messaging/SKILL.md +0 -7
- package/src/config/feature-flag-cache.ts +3 -3
- package/src/config/feature-flag-registry.json +35 -3
- package/src/config/schemas/__tests__/memory-v2.test.ts +1 -0
- package/src/config/schemas/__tests__/memory-v3.test.ts +25 -0
- package/src/config/schemas/llm.ts +1 -0
- package/src/config/schemas/memory-v2.ts +8 -0
- package/src/config/schemas/memory-v3.ts +8 -0
- package/src/config/schemas/platform.ts +8 -0
- package/src/config/seed-inference-profiles.ts +2 -2
- package/src/config/skills.ts +13 -0
- package/src/context/compactor.ts +1 -1
- package/src/context/strip-injections.ts +122 -0
- package/src/context/token-estimator.ts +23 -0
- package/src/context/tool-result-truncation.ts +0 -23
- package/src/context/window-manager.ts +3 -6
- package/src/credential-execution/executable-discovery.ts +16 -0
- package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +6 -0
- package/src/daemon/__tests__/inference-profile-notification.test.ts +153 -0
- package/src/daemon/__tests__/native-web-search-metadata.test.ts +10 -8
- package/src/daemon/assistant-attachments.ts +1 -1
- package/src/daemon/config-watcher.ts +2 -2
- package/src/daemon/context-overflow-reducer.ts +0 -1
- package/src/daemon/conversation-agent-loop-handlers.ts +605 -153
- package/src/daemon/conversation-agent-loop.ts +281 -760
- package/src/daemon/conversation-history.ts +5 -4
- package/src/daemon/conversation-lifecycle.ts +3 -4
- package/src/daemon/conversation-messaging.ts +7 -6
- package/src/daemon/conversation-process.ts +11 -16
- package/src/daemon/conversation-runtime-assembly.ts +130 -347
- package/src/daemon/conversation-slash.ts +6 -25
- package/src/daemon/conversation-surfaces.ts +222 -4
- package/src/daemon/conversation-tool-setup.ts +2 -29
- package/src/daemon/conversation.ts +32 -14
- package/src/daemon/external-plugins-bootstrap.ts +9 -10
- package/src/daemon/handlers/config-a2a.ts +51 -36
- package/src/daemon/handlers/config-slack-channel.ts +20 -14
- package/src/daemon/handlers/config-telegram.ts +16 -2
- package/src/daemon/handlers/shared.ts +156 -84
- package/src/daemon/handlers/skills.ts +39 -10
- package/src/daemon/lifecycle.ts +4 -0
- package/src/daemon/message-types/apps.ts +1 -29
- package/src/daemon/message-types/messages.ts +9 -57
- package/src/daemon/message-types/skills.ts +2 -0
- package/src/daemon/message-types/surfaces.ts +136 -3
- package/src/daemon/now-scratchpad.ts +21 -0
- package/src/daemon/orphan-reaper.test.ts +210 -0
- package/src/daemon/orphan-reaper.ts +240 -0
- package/src/daemon/persist-unsendable-image.ts +117 -0
- package/src/daemon/process-message.ts +1 -3
- package/src/daemon/trace-emitter.ts +6 -4
- package/src/daemon/trust-context.ts +19 -0
- package/src/daemon/wake-target-adapter.ts +3 -1
- package/src/home/home-greeting-cache.ts +24 -1
- package/src/ipc/gateway-client.test.ts +2 -2
- package/src/ipc/gateway-client.ts +3 -3
- package/src/media/gemini-image-service.ts +15 -0
- package/src/media/openai-image-service.ts +14 -0
- package/src/media/types.ts +34 -0
- package/src/memory/__tests__/jobs-worker-v2-schedule.test.ts +56 -0
- package/src/memory/auth-fallback-events-store.ts +94 -0
- package/src/memory/conversation-title-service.ts +65 -41
- package/src/memory/db-init.ts +4 -0
- package/src/memory/graph/__tests__/conversation-graph-memory-registry.test.ts +119 -0
- package/src/memory/graph/conversation-graph-memory.ts +65 -0
- package/src/memory/jobs-store.ts +33 -0
- package/src/memory/jobs-worker.ts +31 -4
- package/src/memory/llm-usage-store.ts +224 -50
- package/src/memory/migrations/222-strip-placeholder-sentinels-from-messages.ts +6 -5
- package/src/memory/migrations/270-schedule-source-conversation.ts +13 -0
- package/src/memory/migrations/271-create-auth-fallback-events.ts +21 -0
- package/src/memory/migrations/index.ts +2 -0
- package/src/memory/pkb/autoinject.ts +61 -0
- package/src/memory/pkb/context.ts +50 -0
- package/src/memory/pkb/types.ts +14 -0
- package/src/memory/schedule-attribution-sql.ts +104 -0
- package/src/memory/schema/infrastructure.ts +16 -0
- package/src/memory/usage-grouped-buckets.ts +6 -1
- package/src/memory/v2/__tests__/consolidation-job.test.ts +1 -1
- package/src/memory/v2/consolidation-job.ts +1 -1
- package/src/memory/v3/__tests__/health.test.ts +16 -0
- package/src/memory/v3/__tests__/orchestrate.test.ts +45 -9
- package/src/memory/v3/__tests__/provider-blocks.test.ts +13 -0
- package/src/memory/v3/__tests__/router.test.ts +101 -29
- package/src/memory/v3/__tests__/selector.test.ts +93 -27
- package/src/memory/v3/__tests__/shadow-plugin.test.ts +23 -5
- package/src/memory/v3/health.ts +0 -0
- package/src/memory/v3/llm-retry.ts +32 -0
- package/src/memory/v3/orchestrate.ts +26 -14
- package/src/memory/v3/provider-blocks.ts +15 -5
- package/src/memory/v3/router.ts +48 -42
- package/src/memory/v3/selector.ts +57 -42
- package/src/memory/v3/shadow-plugin.ts +47 -15
- package/src/memory/v3/types.ts +8 -0
- package/src/notifications/conversation-pairing.ts +8 -15
- package/src/notifications/decision-engine.ts +6 -3
- package/src/notifications/home-feed-side-effect.ts +12 -1
- package/src/permissions/prompter.ts +4 -0
- package/src/plugin-api/constants.ts +4 -0
- package/src/plugin-api/index.ts +8 -1
- package/src/plugin-api/types.ts +151 -1
- package/src/plugins/defaults/empty-response/hooks/stop.ts +126 -0
- package/src/plugins/defaults/empty-response/register.ts +8 -13
- package/src/plugins/defaults/index.ts +1 -15
- package/src/plugins/defaults/injectors/register.ts +243 -74
- package/src/plugins/defaults/memory-retrieval/hooks/post-compact.ts +91 -0
- package/src/plugins/defaults/memory-retrieval/hooks/user-prompt-submit-temp.ts +216 -0
- package/src/plugins/defaults/memory-retrieval/injector-chain.ts +35 -0
- package/src/plugins/defaults/title-generate/hooks/stop.ts +75 -0
- package/src/plugins/defaults/title-generate/hooks/user-prompt-submit.ts +35 -0
- package/src/plugins/defaults/title-generate/package.json +1 -1
- package/src/plugins/defaults/title-generate/register.ts +18 -18
- package/src/plugins/defaults/tool-error/hooks/post-tool-use.ts +118 -0
- package/src/plugins/defaults/tool-error/package.json +1 -1
- package/src/plugins/defaults/tool-error/register.ts +9 -21
- package/src/plugins/defaults/tool-result-truncate/hooks/post-tool-use.ts +32 -0
- package/src/plugins/defaults/tool-result-truncate/register.ts +10 -21
- package/src/plugins/defaults/tool-result-truncate/terminal.ts +37 -18
- package/src/plugins/pipeline.ts +6 -18
- package/src/plugins/registry.ts +8 -25
- package/src/plugins/types.ts +43 -474
- package/src/proactive-artifact/aux-message-injector.ts +3 -3
- package/src/proactive-artifact/job.test.ts +7 -12
- package/src/prompts/__tests__/system-prompt.test.ts +36 -0
- package/src/prompts/templates/BOOTSTRAP-ACTIVATION-RAIL.md +62 -0
- package/src/prompts/templates/BOOTSTRAP.md +2 -2
- package/src/prompts/templates/system-sections.ts +15 -0
- package/src/providers/anthropic/client.ts +37 -29
- package/src/providers/openai/__tests__/chat-completions-provider-reasoning.test.ts +112 -0
- package/src/providers/openai/chat-completions-provider.ts +44 -0
- package/src/providers/openrouter/client.ts +1 -0
- package/src/providers/placeholder-sentinels.ts +35 -0
- package/src/runtime/__tests__/agent-wake.test.ts +5 -1
- package/src/runtime/agent-wake.ts +2 -2
- package/src/runtime/assistant-event-hub.ts +36 -6
- package/src/runtime/{conversation-stream-state.ts → assistant-stream-state.ts} +132 -58
- package/src/runtime/http-router.ts +16 -21
- package/src/runtime/http-types.ts +16 -70
- package/src/runtime/pending-interactions.ts +1 -0
- package/src/runtime/routes/__tests__/consolidation-routes.test.ts +265 -2
- package/src/runtime/routes/__tests__/conversation-query-routes.test.ts +31 -1
- package/src/runtime/routes/__tests__/memory-v2-routes.test.ts +6 -2
- package/src/runtime/routes/__tests__/tts-routes.test.ts +6 -2
- package/src/runtime/routes/app-management-routes.ts +6 -117
- package/src/runtime/routes/app-routes.ts +13 -15
- package/src/runtime/routes/attachment-routes.ts +26 -15
- package/src/runtime/routes/avatar-routes.ts +26 -0
- package/src/runtime/routes/btw-routes.ts +29 -23
- package/src/runtime/routes/consolidation-routes.ts +120 -20
- package/src/runtime/routes/conversation-query-routes.ts +2 -0
- package/src/runtime/routes/conversation-routes.ts +358 -184
- package/src/runtime/routes/documents-routes.ts +4 -0
- package/src/runtime/routes/domain-routes.ts +51 -37
- package/src/runtime/routes/epoch-millis-range.ts +34 -0
- package/src/runtime/routes/events-routes.ts +28 -34
- package/src/runtime/routes/gateway-log-routes.ts +26 -4
- package/src/runtime/routes/heartbeat-routes.ts +32 -12
- package/src/runtime/routes/identity-intro-cache.ts +11 -34
- package/src/runtime/routes/identity-routes.ts +208 -17
- package/src/runtime/routes/image-generation-routes.ts +40 -2
- package/src/runtime/routes/index.ts +2 -0
- package/src/runtime/routes/integrations/a2a.ts +12 -10
- package/src/runtime/routes/integrations/slack/__tests__/channel.test.ts +16 -0
- package/src/runtime/routes/integrations/slack/channel.ts +4 -0
- package/src/runtime/routes/integrations/slack/share.ts +27 -6
- package/src/runtime/routes/integrations/telegram.ts +6 -0
- package/src/runtime/routes/integrations/twilio.ts +42 -0
- package/src/runtime/routes/internal-telemetry-routes.ts +88 -0
- package/src/runtime/routes/log-export-routes.ts +8 -0
- package/src/runtime/routes/memory-v2-routes.ts +15 -8
- package/src/runtime/routes/memory-v3-routes.ts +50 -28
- package/src/runtime/routes/oauth-apps.ts +66 -12
- package/src/runtime/routes/oauth-providers.ts +44 -5
- package/src/runtime/routes/platform-routes.ts +81 -5
- package/src/runtime/routes/playground/__tests__/force-compact.test.ts +6 -4
- package/src/runtime/routes/playground/force-compact.ts +1 -1
- package/src/runtime/routes/rename-conversation-routes.ts +5 -0
- package/src/runtime/routes/schedule-routes.ts +152 -42
- package/src/runtime/routes/secret-routes.ts +14 -2
- package/src/runtime/routes/skills-routes.ts +43 -14
- package/src/runtime/routes/tool-call-confirmation-enrichment.test.ts +161 -0
- package/src/runtime/routes/tool-call-confirmation-enrichment.ts +107 -0
- package/src/runtime/routes/trust-rules-routes.ts +26 -2
- package/src/runtime/routes/tts-routes.ts +35 -0
- package/src/runtime/routes/types.ts +66 -8
- package/src/runtime/routes/usage-routes.ts +47 -39
- package/src/runtime/routes/webhook-routes.ts +41 -2
- package/src/runtime/routes/workspace-routes.ts +4 -0
- package/src/runtime/services/__tests__/analyze-conversation.test.ts +6 -0
- package/src/runtime/services/analyze-conversation.ts +2 -2
- package/src/schedule/schedule-store.ts +20 -1
- package/src/schedule/schedule-usage-store.ts +83 -0
- package/src/schedule/scheduler.ts +12 -5
- package/src/skills/catalog-files.ts +2 -2
- package/src/skills/catalog-install.ts +3 -0
- package/src/skills/categories-cache.ts +118 -0
- package/src/skills/clawhub-files.ts +1 -2
- package/src/skills/skillssh-files.ts +1 -2
- package/src/telemetry/types.ts +29 -1
- package/src/telemetry/usage-telemetry-reporter.test.ts +112 -3
- package/src/telemetry/usage-telemetry-reporter.ts +57 -2
- package/src/tools/executor.ts +1 -53
- package/src/tools/network/__tests__/web-search-metadata.test.ts +7 -1
- package/src/tools/network/__tests__/web-search.test.ts +11 -3
- package/src/tools/network/web-search-error.test.ts +248 -0
- package/src/tools/network/web-search-error.ts +267 -0
- package/src/tools/network/web-search.ts +207 -48
- package/src/tools/schedule/create.ts +2 -0
- package/src/tools/terminal/safe-env.ts +10 -1
- package/src/tools/ui-surface/definitions.ts +9 -1
- package/src/tts/__tests__/provider-catalog-consistency.test.ts +85 -1
- package/src/tts/provider-catalog.ts +76 -1
- package/src/util/mutex.ts +47 -0
- package/src/workspace/git-service.ts +1 -42
- package/src/workspace/migrations/095-bump-heartbeat-interval-30m-to-60m.ts +51 -0
- package/src/workspace/migrations/096-reduce-quality-profile-effort.ts +72 -0
- package/src/workspace/migrations/097-enable-adaptive-thinking-managed-profiles.ts +93 -0
- package/src/workspace/migrations/registry.ts +6 -0
- package/src/__tests__/bootstrap-turn-cleanup.test.ts +0 -44
- package/src/__tests__/empty-response-pipeline.test.ts +0 -423
- package/src/__tests__/llm-call-pipeline.test.ts +0 -287
- package/src/__tests__/memory-retrieval-pipeline.test.ts +0 -418
- package/src/__tests__/persistence-pipeline.test.ts +0 -503
- package/src/__tests__/title-generate-pipeline.test.ts +0 -211
- package/src/__tests__/token-estimate-pipeline.test.ts +0 -479
- package/src/__tests__/tool-error-pipeline.test.ts +0 -241
- package/src/__tests__/tool-execute-pipeline.test.ts +0 -417
- package/src/__tests__/tool-result-truncate-pipeline.test.ts +0 -341
- package/src/daemon/bootstrap-turn-cleanup.ts +0 -45
- package/src/gallery/default-gallery.ts +0 -1359
- package/src/gallery/gallery-manifest.ts +0 -28
- package/src/home/feature-gate.ts +0 -22
- package/src/plugins/defaults/empty-response/middlewares/emptyResponse.ts +0 -22
- package/src/plugins/defaults/empty-response/terminal.ts +0 -106
- package/src/plugins/defaults/injectors/package.json +0 -15
- package/src/plugins/defaults/llm-call/middlewares/llmCall.ts +0 -17
- package/src/plugins/defaults/llm-call/package.json +0 -15
- package/src/plugins/defaults/llm-call/register.ts +0 -45
- package/src/plugins/defaults/memory-retrieval/middlewares/memoryRetrieval.ts +0 -17
- package/src/plugins/defaults/memory-retrieval/package.json +0 -15
- package/src/plugins/defaults/memory-retrieval/register.ts +0 -181
- package/src/plugins/defaults/persistence/middlewares/persistence.ts +0 -19
- package/src/plugins/defaults/persistence/package.json +0 -15
- package/src/plugins/defaults/persistence/register.ts +0 -38
- package/src/plugins/defaults/persistence/terminal.ts +0 -83
- package/src/plugins/defaults/title-generate/terminal.ts +0 -31
- package/src/plugins/defaults/token-estimate/middlewares/tokenEstimate.ts +0 -23
- package/src/plugins/defaults/token-estimate/package.json +0 -15
- package/src/plugins/defaults/token-estimate/register.ts +0 -34
- package/src/plugins/defaults/token-estimate/terminal.ts +0 -40
- package/src/plugins/defaults/tool-error/middlewares/toolError.ts +0 -21
- package/src/plugins/defaults/tool-error/terminal.ts +0 -47
- package/src/plugins/defaults/tool-execute/middlewares/toolExecute.ts +0 -23
- package/src/plugins/defaults/tool-execute/package.json +0 -15
- package/src/plugins/defaults/tool-execute/register.ts +0 -49
- package/src/plugins/defaults/tool-result-truncate/middlewares/toolResultTruncate.ts +0 -23
- package/src/plugins/defaults/tool-result-truncate/types.ts +0 -22
- package/src/skills/category-inference.ts +0 -111
|
@@ -14,27 +14,11 @@
|
|
|
14
14
|
import { createRequire } from "node:module";
|
|
15
15
|
import { afterAll, beforeEach, describe, expect, mock, test } from "bun:test";
|
|
16
16
|
|
|
17
|
-
import {
|
|
18
|
-
import type {
|
|
19
|
-
AgentEvent,
|
|
20
|
-
AgentLoopRunOptions,
|
|
21
|
-
AgentLoopRunResult,
|
|
22
|
-
MidLoopCompaction,
|
|
23
|
-
} from "../agent/loop.js";
|
|
17
|
+
import type { LoopToolExecutor } from "../agent/loop.js";
|
|
24
18
|
import type { LLMConfig } from "../config/schemas/llm.js";
|
|
25
|
-
import type { ContextWindowResult } from "../context/window-manager.js";
|
|
26
19
|
import type { ServerMessage } from "../daemon/message-protocol.js";
|
|
27
|
-
import { defaultCompactionTerminal } from "../plugins/defaults/compaction/terminal.js";
|
|
28
20
|
import { resetPluginRegistryAndRegisterDefaults } from "../plugins/defaults/index.js";
|
|
29
|
-
import {
|
|
30
|
-
import { getMiddlewaresFor } from "../plugins/registry.js";
|
|
31
|
-
import type {
|
|
32
|
-
CompactionArgs,
|
|
33
|
-
CompactionResult,
|
|
34
|
-
TurnContext,
|
|
35
|
-
} from "../plugins/types.js";
|
|
36
|
-
import { PluginTimeoutError } from "../plugins/types.js";
|
|
37
|
-
import type { ContentBlock, Message } from "../providers/types.js";
|
|
21
|
+
import type { Message, Provider, ToolDefinition } from "../providers/types.js";
|
|
38
22
|
|
|
39
23
|
const conversationCrudRealSnapshot = {
|
|
40
24
|
...(createRequire(import.meta.url)(
|
|
@@ -103,6 +87,7 @@ mock.module("../config/loader.js", () => ({
|
|
|
103
87
|
memory: { retrieval: { scratchpadInjection: { enabled: true } } },
|
|
104
88
|
ui: {},
|
|
105
89
|
compaction: { enabled: true, autoThreshold: 0.7 },
|
|
90
|
+
conversations: { skipAutoRetitling: true },
|
|
106
91
|
}),
|
|
107
92
|
loadRawConfig: () => ({}),
|
|
108
93
|
saveRawConfig: () => {},
|
|
@@ -114,10 +99,10 @@ mock.module("../config/loader.js", () => ({
|
|
|
114
99
|
// Token estimator — controllable per-test via mockEstimateTokens.
|
|
115
100
|
// Can be a number (constant), a no-arg function, or a function that
|
|
116
101
|
// receives the messages array for dynamic behavior based on content.
|
|
117
|
-
// Both the calibrated entry point (`estimatePromptTokens`,
|
|
118
|
-
// convergence path) and the raw entry point
|
|
119
|
-
// used by the
|
|
120
|
-
//
|
|
102
|
+
// Both the calibrated entry point (`estimatePromptTokens`, which backs the
|
|
103
|
+
// preflight overflow gate and the convergence path) and the raw entry point
|
|
104
|
+
// (`estimatePromptTokensRaw`, used by the pre-send calibration capture) are
|
|
105
|
+
// stubbed so either call site can drive the test.
|
|
121
106
|
let mockEstimateTokens: number | ((msgs?: Message[]) => number) = 1000;
|
|
122
107
|
mock.module("../context/token-estimator.js", () => ({
|
|
123
108
|
estimatePromptTokens: (msgs: Message[]) =>
|
|
@@ -128,8 +113,16 @@ mock.module("../context/token-estimator.js", () => ({
|
|
|
128
113
|
typeof mockEstimateTokens === "function"
|
|
129
114
|
? mockEstimateTokens(msgs)
|
|
130
115
|
: mockEstimateTokens,
|
|
131
|
-
//
|
|
132
|
-
//
|
|
116
|
+
// The preflight overflow gate calls this calibrated wrapper directly, so it
|
|
117
|
+
// must honor `mockEstimateTokens` too — otherwise the real implementation
|
|
118
|
+
// (which sums tool tokens onto the real calibrated estimate) ignores the
|
|
119
|
+
// per-test value and the overflow scenarios below never trigger.
|
|
120
|
+
estimatePromptTokensWithTools: (history: Message[]) =>
|
|
121
|
+
typeof mockEstimateTokens === "function"
|
|
122
|
+
? mockEstimateTokens(history)
|
|
123
|
+
: mockEstimateTokens,
|
|
124
|
+
// `estimatePromptTokensWithTools` folds tool tokens in via this helper; 0
|
|
125
|
+
// keeps the stubbed value unchanged.
|
|
133
126
|
estimateToolsTokens: () => 0,
|
|
134
127
|
// Conversation agent loop now calls this helper to canonicalize the
|
|
135
128
|
// provider key shared with the calibration system. The tests here
|
|
@@ -281,15 +274,6 @@ mock.module("../daemon/conversation-runtime-assembly.js", () => ({
|
|
|
281
274
|
blocks: {},
|
|
282
275
|
}),
|
|
283
276
|
stripInjectionsForCompaction: (msgs: Message[]) => msgs,
|
|
284
|
-
findLastInjectedNowContent: () => null,
|
|
285
|
-
readNowScratchpad: () => null,
|
|
286
|
-
readPkbContext: () => null,
|
|
287
|
-
getPkbAutoInjectList: () => [
|
|
288
|
-
"INDEX.md",
|
|
289
|
-
"essentials.md",
|
|
290
|
-
"threads.md",
|
|
291
|
-
"buffer.md",
|
|
292
|
-
],
|
|
293
277
|
isSlackChannelConversation: () => false,
|
|
294
278
|
getSlackCompactionWatermarkForPrefix: () => null,
|
|
295
279
|
loadSlackChronologicalContext: () => null,
|
|
@@ -437,179 +421,55 @@ mock.module("../memory/archive-store.js", () => ({
|
|
|
437
421
|
|
|
438
422
|
// ── Imports (after mocks) ────────────────────────────────────────────
|
|
439
423
|
|
|
424
|
+
import { AgentLoop } from "../agent/loop.js";
|
|
440
425
|
import {
|
|
441
426
|
type AgentLoopConversationContext,
|
|
442
427
|
runAgentLoopImpl,
|
|
443
428
|
} from "../daemon/conversation-agent-loop.js";
|
|
429
|
+
import {
|
|
430
|
+
createMockProvider,
|
|
431
|
+
type ScriptedResponse,
|
|
432
|
+
textResponse,
|
|
433
|
+
toolUseResponse,
|
|
434
|
+
} from "./helpers/mock-provider.js";
|
|
444
435
|
|
|
445
436
|
// ── Test helpers ─────────────────────────────────────────────────────
|
|
446
437
|
|
|
447
|
-
type AgentLoopRun = (
|
|
448
|
-
messages: Message[],
|
|
449
|
-
onEvent: (event: AgentEvent) => void,
|
|
450
|
-
options?: AgentLoopRunOptions,
|
|
451
|
-
) => Promise<Message[]>;
|
|
452
|
-
|
|
453
|
-
/**
|
|
454
|
-
* Faithful re-implementation of `AgentLoop.compact()` for the mock loop: run
|
|
455
|
-
* the compaction pipeline against the supplied turn context (which carries the
|
|
456
|
-
* test's `contextWindowManager`), invoke the orchestrator-supplied hooks, and
|
|
457
|
-
* return the continuation history — or `null` on timeout/exhaustion so the
|
|
458
|
-
* caller yields "budget".
|
|
459
|
-
*/
|
|
460
|
-
async function simulateInlineCompaction(
|
|
461
|
-
compaction: MidLoopCompaction,
|
|
462
|
-
history: Message[],
|
|
463
|
-
turnContext: TurnContext | undefined,
|
|
464
|
-
signal: AbortSignal | undefined,
|
|
465
|
-
onEvent: (event: AgentEvent) => void | Promise<void>,
|
|
466
|
-
compactionCircuit: CompactionCircuit,
|
|
467
|
-
): Promise<Message[] | null> {
|
|
468
|
-
await onEvent({ type: "context_compacting" });
|
|
469
|
-
const { rawHistory, options } = compaction.prepare(history);
|
|
470
|
-
let result: CompactionResult;
|
|
471
|
-
try {
|
|
472
|
-
result = await runPipeline<CompactionArgs, CompactionResult>(
|
|
473
|
-
"compaction",
|
|
474
|
-
getMiddlewaresFor("compaction"),
|
|
475
|
-
(args) => defaultCompactionTerminal(args, turnContext as TurnContext),
|
|
476
|
-
{ messages: rawHistory, signal, options },
|
|
477
|
-
turnContext as TurnContext,
|
|
478
|
-
DEFAULT_TIMEOUTS.compaction,
|
|
479
|
-
);
|
|
480
|
-
} catch (error) {
|
|
481
|
-
if (error instanceof PluginTimeoutError) {
|
|
482
|
-
await compactionCircuit.recordOutcome(
|
|
483
|
-
{
|
|
484
|
-
currentRequestId: turnContext?.requestId,
|
|
485
|
-
currentTurnTrustContext: turnContext?.trust,
|
|
486
|
-
turnCount: turnContext?.turnIndex ?? 0,
|
|
487
|
-
},
|
|
488
|
-
true,
|
|
489
|
-
onEvent,
|
|
490
|
-
);
|
|
491
|
-
return null;
|
|
492
|
-
}
|
|
493
|
-
throw error;
|
|
494
|
-
}
|
|
495
|
-
const compactResult = result as ContextWindowResult;
|
|
496
|
-
if (compactResult.summaryFailed !== undefined) {
|
|
497
|
-
await compactionCircuit.recordOutcome(
|
|
498
|
-
{
|
|
499
|
-
currentRequestId: turnContext?.requestId,
|
|
500
|
-
currentTurnTrustContext: turnContext?.trust,
|
|
501
|
-
turnCount: turnContext?.turnIndex ?? 0,
|
|
502
|
-
},
|
|
503
|
-
compactResult.summaryFailed,
|
|
504
|
-
onEvent,
|
|
505
|
-
);
|
|
506
|
-
}
|
|
507
|
-
if (compactResult.compacted) {
|
|
508
|
-
await compaction.applyResult(compactResult, rawHistory);
|
|
509
|
-
}
|
|
510
|
-
if (compactResult.exhausted ?? false) {
|
|
511
|
-
return null;
|
|
512
|
-
}
|
|
513
|
-
return compaction.reinject();
|
|
514
|
-
}
|
|
515
|
-
|
|
516
|
-
/**
|
|
517
|
-
* Adapt a `Message[]`-returning mock loop body into `run()`'s real result
|
|
518
|
-
* shape. Mirrors the production loop: the pause-reason carried back is
|
|
519
|
-
* whatever the most recent `onCheckpoint` call yielded with (null when it
|
|
520
|
-
* never yielded), so the orchestrator derives its yield bookkeeping the same
|
|
521
|
-
* way it does against the real loop.
|
|
522
|
-
*/
|
|
523
|
-
const asAgentLoopRun = (
|
|
524
|
-
fn: AgentLoopRun,
|
|
525
|
-
compactionCircuit: CompactionCircuit,
|
|
526
|
-
): ((
|
|
527
|
-
messages: Message[],
|
|
528
|
-
onEvent: (event: AgentEvent) => void | Promise<void>,
|
|
529
|
-
options?: AgentLoopRunOptions,
|
|
530
|
-
) => Promise<AgentLoopRunResult>) => {
|
|
531
|
-
return async (messages, onEvent, options) => {
|
|
532
|
-
let exitReason: AgentLoopRunResult["exitReason"] = null;
|
|
533
|
-
let wrapped = options;
|
|
534
|
-
if (options?.onCheckpoint) {
|
|
535
|
-
const inner = options.onCheckpoint;
|
|
536
|
-
wrapped = {
|
|
537
|
-
...options,
|
|
538
|
-
onCheckpoint: async (info) => {
|
|
539
|
-
// Handoff is offered first, mirroring the loop's ordering.
|
|
540
|
-
const decision = await inner(info);
|
|
541
|
-
if (decision !== "continue") {
|
|
542
|
-
exitReason = decision;
|
|
543
|
-
return decision;
|
|
544
|
-
}
|
|
545
|
-
// The mid-loop budget gate and inline compaction both live inside
|
|
546
|
-
// `AgentLoop.run`. Replicate them here — same formula, stubbed
|
|
547
|
-
// estimator, and the loop's own `compact()` ceremony — so these
|
|
548
|
-
// orchestrator tests drive the real escalation path now that the
|
|
549
|
-
// orchestrator's `onCheckpoint` is handoff-only and compaction
|
|
550
|
-
// runs inline rather than via an orchestrator re-entry loop.
|
|
551
|
-
const contextWindow = options.resolveContextWindow?.();
|
|
552
|
-
if (contextWindow?.overflowRecovery.enabled) {
|
|
553
|
-
const { maxInputTokens, overflowRecovery } = contextWindow;
|
|
554
|
-
const safetyMargin =
|
|
555
|
-
info.history.length > 50
|
|
556
|
-
? Math.max(overflowRecovery.safetyMarginRatio, 0.15)
|
|
557
|
-
: overflowRecovery.safetyMarginRatio;
|
|
558
|
-
const preflightBudget = Math.floor(
|
|
559
|
-
maxInputTokens * (1 - safetyMargin),
|
|
560
|
-
);
|
|
561
|
-
const estimated =
|
|
562
|
-
typeof mockEstimateTokens === "function"
|
|
563
|
-
? mockEstimateTokens(info.history)
|
|
564
|
-
: mockEstimateTokens;
|
|
565
|
-
if (estimated > preflightBudget * 0.85) {
|
|
566
|
-
// Mirror `AgentLoop.compact()`: when a compaction path is
|
|
567
|
-
// supplied, run it in place and continue; on timeout or
|
|
568
|
-
// exhaustion it returns null, so the loop yields "budget".
|
|
569
|
-
const compacted = options.compaction
|
|
570
|
-
? await simulateInlineCompaction(
|
|
571
|
-
options.compaction,
|
|
572
|
-
info.history,
|
|
573
|
-
options.turnContext,
|
|
574
|
-
options.signal,
|
|
575
|
-
onEvent,
|
|
576
|
-
compactionCircuit,
|
|
577
|
-
)
|
|
578
|
-
: null;
|
|
579
|
-
if (compacted) {
|
|
580
|
-
exitReason = null;
|
|
581
|
-
return "continue";
|
|
582
|
-
}
|
|
583
|
-
exitReason = "budget";
|
|
584
|
-
return "budget";
|
|
585
|
-
}
|
|
586
|
-
}
|
|
587
|
-
exitReason = null;
|
|
588
|
-
return "continue";
|
|
589
|
-
},
|
|
590
|
-
};
|
|
591
|
-
}
|
|
592
|
-
const history = await fn(messages, onEvent, wrapped);
|
|
593
|
-
return { history, exitReason };
|
|
594
|
-
};
|
|
595
|
-
};
|
|
596
|
-
|
|
597
438
|
function makeCtx(
|
|
598
439
|
overrides?: Partial<AgentLoopConversationContext> & {
|
|
599
|
-
|
|
440
|
+
providerResponses?: ScriptedResponse[];
|
|
441
|
+
loopProvider?: Provider;
|
|
442
|
+
loopTools?: ToolDefinition[];
|
|
443
|
+
toolExecutor?: LoopToolExecutor;
|
|
600
444
|
},
|
|
601
445
|
): AgentLoopConversationContext {
|
|
602
|
-
const
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
446
|
+
const {
|
|
447
|
+
providerResponses,
|
|
448
|
+
loopProvider,
|
|
449
|
+
loopTools,
|
|
450
|
+
toolExecutor,
|
|
451
|
+
...ctxOverrides
|
|
452
|
+
} = overrides ?? {};
|
|
453
|
+
const conversationId = ctxOverrides.conversationId ?? "test-conv";
|
|
454
|
+
|
|
455
|
+
// Drive the real `AgentLoop` against a scripted provider, mocking only the
|
|
456
|
+
// provider HTTP boundary. The loop owns its mid-loop budget gate, inline
|
|
457
|
+
// compaction, and event emission, so these overflow tests exercise the real
|
|
458
|
+
// escalation/persistence path.
|
|
459
|
+
const loopProviderName =
|
|
460
|
+
(ctxOverrides.provider as { name?: string } | undefined)?.name ??
|
|
461
|
+
"mock-provider";
|
|
462
|
+
const provider =
|
|
463
|
+
loopProvider ??
|
|
464
|
+
createMockProvider(
|
|
465
|
+
providerResponses ?? [textResponse("response")],
|
|
466
|
+
loopProviderName,
|
|
467
|
+
).provider;
|
|
468
|
+
const agentLoop = new AgentLoop(provider, "system prompt", {
|
|
469
|
+
conversationId,
|
|
470
|
+
tools: loopTools ?? [],
|
|
471
|
+
toolExecutor,
|
|
472
|
+
});
|
|
613
473
|
|
|
614
474
|
return {
|
|
615
475
|
conversationId: "test-conv",
|
|
@@ -617,19 +477,16 @@ function makeCtx(
|
|
|
617
477
|
{ role: "user", content: [{ type: "text", text: "Hello" }] },
|
|
618
478
|
] as Message[],
|
|
619
479
|
processing: true,
|
|
480
|
+
isProcessing(this: { processing: boolean }) {
|
|
481
|
+
return this.processing;
|
|
482
|
+
},
|
|
483
|
+
setProcessing(this: { processing: boolean }, value: boolean) {
|
|
484
|
+
this.processing = value;
|
|
485
|
+
},
|
|
620
486
|
abortController: new AbortController(),
|
|
621
487
|
currentRequestId: "test-req",
|
|
622
488
|
|
|
623
|
-
agentLoop
|
|
624
|
-
run: asAgentLoopRun(agentLoopRun, compactionCircuit),
|
|
625
|
-
getToolTokenBudget: () => 0,
|
|
626
|
-
getResolvedTools: () => [],
|
|
627
|
-
// Tests in this file don't exercise calibration, so returning
|
|
628
|
-
// undefined is fine — the estimator falls back to the per-provider
|
|
629
|
-
// aggregate key.
|
|
630
|
-
getActiveModel: () => undefined,
|
|
631
|
-
compactionCircuit,
|
|
632
|
-
} as unknown as AgentLoopConversationContext["agentLoop"],
|
|
489
|
+
agentLoop,
|
|
633
490
|
provider: {
|
|
634
491
|
name: "mock-provider",
|
|
635
492
|
sendMessage: async () => ({
|
|
@@ -722,9 +579,10 @@ function makeCtx(
|
|
|
722
579
|
injectedTokens: 0,
|
|
723
580
|
}),
|
|
724
581
|
retrackCachedNodes: () => {},
|
|
582
|
+
recordPkbQueryVectors: () => {},
|
|
725
583
|
} as unknown as AgentLoopConversationContext["graphMemory"],
|
|
726
584
|
|
|
727
|
-
...
|
|
585
|
+
...ctxOverrides,
|
|
728
586
|
} as AgentLoopConversationContext;
|
|
729
587
|
}
|
|
730
588
|
|
|
@@ -802,6 +660,7 @@ beforeEach(() => {
|
|
|
802
660
|
|
|
803
661
|
describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
804
662
|
test("usage update context max follows active main-agent profile budget", async () => {
|
|
663
|
+
// GIVEN an active main-agent profile that narrows the context budget
|
|
805
664
|
mockLlmConfig = {
|
|
806
665
|
...structuredClone(defaultLlmConfig),
|
|
807
666
|
activeProfile: "short-context",
|
|
@@ -813,27 +672,22 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
813
672
|
},
|
|
814
673
|
};
|
|
815
674
|
|
|
675
|
+
// AND a provider turn that reports 12k input tokens of usage
|
|
816
676
|
const ctx = makeCtx({
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
type: "
|
|
820
|
-
inputTokens: 12_000,
|
|
821
|
-
outputTokens: 300,
|
|
677
|
+
providerResponses: [
|
|
678
|
+
{
|
|
679
|
+
content: [{ type: "text", text: "response" }],
|
|
822
680
|
model: "mock-model",
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
{
|
|
828
|
-
role: "assistant" as const,
|
|
829
|
-
content: [{ type: "text" as const, text: "response" }],
|
|
830
|
-
},
|
|
831
|
-
];
|
|
832
|
-
},
|
|
681
|
+
usage: { inputTokens: 12_000, outputTokens: 300 },
|
|
682
|
+
stopReason: "end_turn",
|
|
683
|
+
},
|
|
684
|
+
],
|
|
833
685
|
});
|
|
834
686
|
|
|
687
|
+
// WHEN the turn runs to completion
|
|
835
688
|
await runAgentLoopImpl(ctx, "hello", "msg-1", () => {});
|
|
836
689
|
|
|
690
|
+
// THEN the recorded main-agent usage carries the profile's max budget
|
|
837
691
|
const mainAgentUsageCall = recordUsageMock.mock.calls.find(
|
|
838
692
|
(call) => call[5] === "main_agent",
|
|
839
693
|
);
|
|
@@ -846,10 +700,9 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
846
700
|
|
|
847
701
|
// ── Test 1 ────────────────────────────────────────────────────────
|
|
848
702
|
// BUG: When the agent loop makes progress (adds messages to history)
|
|
849
|
-
// before hitting context_too_large, the convergence loop
|
|
850
|
-
//
|
|
851
|
-
//
|
|
852
|
-
// invoked — the error is surfaced immediately at line 1163-1175
|
|
703
|
+
// before hitting context_too_large, the convergence loop's progress
|
|
704
|
+
// check must recognize that the loop appended messages. If it fails to,
|
|
705
|
+
// the reducer is never invoked — the error is surfaced immediately
|
|
853
706
|
// without any compaction attempt.
|
|
854
707
|
//
|
|
855
708
|
// Expected behavior (PR 2 fix): After progress + context_too_large,
|
|
@@ -889,125 +742,31 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
889
742
|
};
|
|
890
743
|
};
|
|
891
744
|
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
// then hits context_too_large on next LLM call
|
|
901
|
-
const progressMessages: Message[] = [
|
|
902
|
-
...messages,
|
|
903
|
-
{
|
|
904
|
-
role: "assistant" as const,
|
|
905
|
-
content: [
|
|
906
|
-
{ type: "text", text: "Let me check that." },
|
|
907
|
-
{
|
|
908
|
-
type: "tool_use",
|
|
909
|
-
id: "tu-progress",
|
|
910
|
-
name: "bash",
|
|
911
|
-
input: { command: "ls" },
|
|
912
|
-
},
|
|
913
|
-
] as ContentBlock[],
|
|
914
|
-
},
|
|
915
|
-
{
|
|
916
|
-
role: "user" as const,
|
|
917
|
-
content: [
|
|
918
|
-
{
|
|
919
|
-
type: "tool_result",
|
|
920
|
-
tool_use_id: "tu-progress",
|
|
921
|
-
content: "file1.ts\nfile2.ts",
|
|
922
|
-
is_error: false,
|
|
923
|
-
},
|
|
924
|
-
] as ContentBlock[],
|
|
925
|
-
},
|
|
926
|
-
];
|
|
745
|
+
// Run 1 makes progress (a tool turn) then the following provider call
|
|
746
|
+
// rejects with a context_too_large error; after the convergence reducer
|
|
747
|
+
// compacts, the rerun recovers with plain text.
|
|
748
|
+
const { provider } = createMockProvider([
|
|
749
|
+
toolUseResponse("tu-progress", "bash", { command: "ls" }),
|
|
750
|
+
new Error("prompt is too long: 242201 tokens > 200000 maximum"),
|
|
751
|
+
textResponse("recovered after compaction"),
|
|
752
|
+
]);
|
|
927
753
|
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
754
|
+
const ctx = makeCtx({
|
|
755
|
+
loopProvider: provider,
|
|
756
|
+
loopTools: [
|
|
757
|
+
{
|
|
932
758
|
name: "bash",
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
toolUseId: "tu-progress",
|
|
938
|
-
content: "file1.ts\nfile2.ts",
|
|
939
|
-
isError: false,
|
|
940
|
-
});
|
|
941
|
-
onEvent({
|
|
942
|
-
type: "message_complete",
|
|
943
|
-
message: {
|
|
944
|
-
role: "assistant",
|
|
945
|
-
content: [
|
|
946
|
-
{ type: "text", text: "Let me check that." },
|
|
947
|
-
{
|
|
948
|
-
type: "tool_use",
|
|
949
|
-
id: "tu-progress",
|
|
950
|
-
name: "bash",
|
|
951
|
-
input: { command: "ls" },
|
|
952
|
-
},
|
|
953
|
-
],
|
|
759
|
+
description: "Run a shell command",
|
|
760
|
+
input_schema: {
|
|
761
|
+
type: "object",
|
|
762
|
+
properties: { command: { type: "string" } },
|
|
954
763
|
},
|
|
955
|
-
});
|
|
956
|
-
onEvent({
|
|
957
|
-
type: "usage",
|
|
958
|
-
inputTokens: 100,
|
|
959
|
-
outputTokens: 50,
|
|
960
|
-
model: "test-model",
|
|
961
|
-
providerDurationMs: 100,
|
|
962
|
-
});
|
|
963
|
-
|
|
964
|
-
// Then context_too_large error occurs on the *next* LLM call
|
|
965
|
-
onEvent({
|
|
966
|
-
type: "error",
|
|
967
|
-
error: new Error(
|
|
968
|
-
"prompt is too long: 242201 tokens > 200000 maximum",
|
|
969
|
-
),
|
|
970
|
-
});
|
|
971
|
-
onEvent({
|
|
972
|
-
type: "usage",
|
|
973
|
-
inputTokens: 0,
|
|
974
|
-
outputTokens: 0,
|
|
975
|
-
model: "test-model",
|
|
976
|
-
providerDurationMs: 10,
|
|
977
|
-
});
|
|
978
|
-
|
|
979
|
-
// Return the history WITH progress (more messages than input)
|
|
980
|
-
return progressMessages;
|
|
981
|
-
}
|
|
982
|
-
|
|
983
|
-
// Second call (after compaction): succeed
|
|
984
|
-
onEvent({
|
|
985
|
-
type: "message_complete",
|
|
986
|
-
message: {
|
|
987
|
-
role: "assistant",
|
|
988
|
-
content: [{ type: "text", text: "recovered after compaction" }],
|
|
989
|
-
},
|
|
990
|
-
});
|
|
991
|
-
onEvent({
|
|
992
|
-
type: "usage",
|
|
993
|
-
inputTokens: 50,
|
|
994
|
-
outputTokens: 25,
|
|
995
|
-
model: "test-model",
|
|
996
|
-
providerDurationMs: 100,
|
|
997
|
-
});
|
|
998
|
-
return [
|
|
999
|
-
...messages,
|
|
1000
|
-
{
|
|
1001
|
-
role: "assistant" as const,
|
|
1002
|
-
content: [
|
|
1003
|
-
{ type: "text", text: "recovered after compaction" },
|
|
1004
|
-
] as ContentBlock[],
|
|
1005
764
|
},
|
|
1006
|
-
]
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
765
|
+
],
|
|
766
|
+
toolExecutor: async () => ({
|
|
767
|
+
content: "file1.ts\nfile2.ts",
|
|
768
|
+
isError: false,
|
|
769
|
+
}),
|
|
1011
770
|
contextWindowManager: {
|
|
1012
771
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
1013
772
|
maybeCompact: async () => ({ compacted: false }),
|
|
@@ -1036,13 +795,14 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
1036
795
|
// This test should PASS against current code (when no progress is made).
|
|
1037
796
|
test("overflow recovery compacts below limit even when estimation underestimates", async () => {
|
|
1038
797
|
const events: ServerMessage[] = [];
|
|
1039
|
-
let callCount = 0;
|
|
1040
798
|
let reducerCalled = false;
|
|
1041
799
|
|
|
1042
|
-
//
|
|
800
|
+
// GIVEN the estimator reports 185k — under the 190k preflight budget
|
|
801
|
+
// (200k * 0.95), so the turn proceeds to the provider rather than
|
|
802
|
+
// compacting up front.
|
|
1043
803
|
mockEstimateTokens = 185_000;
|
|
1044
804
|
|
|
1045
|
-
//
|
|
805
|
+
// AND the post-run convergence reducer successfully compacts
|
|
1046
806
|
mockReducerStepFn = (msgs: Message[]) => {
|
|
1047
807
|
reducerCalled = true;
|
|
1048
808
|
return {
|
|
@@ -1072,96 +832,46 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
1072
832
|
};
|
|
1073
833
|
};
|
|
1074
834
|
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
// Provider rejects with "prompt is too long: 242201 tokens > 200000"
|
|
1082
|
-
// even though estimator said 185k
|
|
1083
|
-
onEvent({
|
|
1084
|
-
type: "error",
|
|
1085
|
-
error: new Error(
|
|
1086
|
-
"prompt is too long: 242201 tokens > 200000 maximum",
|
|
1087
|
-
),
|
|
1088
|
-
});
|
|
1089
|
-
onEvent({
|
|
1090
|
-
type: "usage",
|
|
1091
|
-
inputTokens: 0,
|
|
1092
|
-
outputTokens: 0,
|
|
1093
|
-
model: "test-model",
|
|
1094
|
-
providerDurationMs: 10,
|
|
1095
|
-
});
|
|
1096
|
-
// No progress — return same messages
|
|
1097
|
-
return messages;
|
|
1098
|
-
}
|
|
1099
|
-
// Second call succeeds
|
|
1100
|
-
onEvent({
|
|
1101
|
-
type: "message_complete",
|
|
1102
|
-
message: {
|
|
1103
|
-
role: "assistant",
|
|
1104
|
-
content: [{ type: "text", text: "recovered" }],
|
|
1105
|
-
},
|
|
1106
|
-
});
|
|
1107
|
-
onEvent({
|
|
1108
|
-
type: "usage",
|
|
1109
|
-
inputTokens: 80_000,
|
|
1110
|
-
outputTokens: 200,
|
|
1111
|
-
model: "test-model",
|
|
1112
|
-
providerDurationMs: 500,
|
|
1113
|
-
});
|
|
1114
|
-
return [
|
|
1115
|
-
...messages,
|
|
1116
|
-
{
|
|
1117
|
-
role: "assistant" as const,
|
|
1118
|
-
content: [{ type: "text", text: "recovered" }] as ContentBlock[],
|
|
1119
|
-
},
|
|
1120
|
-
];
|
|
1121
|
-
};
|
|
835
|
+
// AND a provider that rejects the first call as too long (revealing the
|
|
836
|
+
// real 242k count the estimator missed), then succeeds on the rerun.
|
|
837
|
+
const { provider, calls } = createMockProvider([
|
|
838
|
+
new Error("prompt is too long: 242201 tokens > 200000 maximum"),
|
|
839
|
+
textResponse("recovered"),
|
|
840
|
+
]);
|
|
1122
841
|
|
|
1123
842
|
const ctx = makeCtx({
|
|
1124
|
-
|
|
843
|
+
loopProvider: provider,
|
|
1125
844
|
contextWindowManager: {
|
|
1126
845
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
1127
846
|
maybeCompact: async () => ({ compacted: false }),
|
|
1128
847
|
} as unknown as AgentLoopConversationContext["contextWindowManager"],
|
|
1129
848
|
});
|
|
1130
849
|
|
|
850
|
+
// WHEN the turn runs
|
|
1131
851
|
await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
|
|
1132
852
|
|
|
1133
|
-
//
|
|
853
|
+
// THEN the convergence reducer ran and the rerun recovered without a
|
|
854
|
+
// user-facing conversation_error.
|
|
1134
855
|
expect(reducerCalled).toBe(true);
|
|
1135
|
-
// Should recover without conversation_error
|
|
1136
856
|
const conversationError = events.find(
|
|
1137
857
|
(e) => e.type === "conversation_error",
|
|
1138
858
|
);
|
|
1139
859
|
expect(conversationError).toBeUndefined();
|
|
1140
|
-
expect(
|
|
860
|
+
expect(calls.length).toBe(2);
|
|
1141
861
|
});
|
|
1142
862
|
|
|
1143
863
|
// ── Test 3 ────────────────────────────────────────────────────────
|
|
1144
|
-
//
|
|
1145
|
-
// "242201 tokens > 200000"), the reducer
|
|
1146
|
-
// the actual limit
|
|
1147
|
-
//
|
|
1148
|
-
//
|
|
1149
|
-
//
|
|
1150
|
-
//
|
|
1151
|
-
//
|
|
1152
|
-
// Expected behavior (PR 4 fix): `targetInputTokensOverride` should
|
|
1153
|
-
// be adjusted based on the ratio between estimated and actual tokens.
|
|
1154
|
-
// BUG: The targetTokens passed to the reducer is preflightBudget = 190k.
|
|
1155
|
-
// But when the actual token count is 242k (1.31x the estimate of 185k),
|
|
1156
|
-
// the target should be adjusted downward to account for the estimation
|
|
1157
|
-
// inaccuracy. For example: 190k / 1.31 ≈ 145k.
|
|
1158
|
-
// Planned fix: targetInputTokensOverride should be adjusted based on
|
|
1159
|
-
// the ratio between estimated and actual tokens.
|
|
864
|
+
// When the provider rejection reveals the actual token count (e.g.,
|
|
865
|
+
// "242201 tokens > 200000"), the overflow reducer's `targetTokens`
|
|
866
|
+
// should be a budget below the actual limit, not below the estimator's
|
|
867
|
+
// inaccurate budget. With a preflightBudget of 190k but an actual count
|
|
868
|
+
// of 242k (1.31x the estimate of 185k), the target is adjusted downward
|
|
869
|
+
// based on the observed mismatch (190k / 1.31 ≈ 145k) so the reducer
|
|
870
|
+
// converges toward the real ceiling rather than the optimistic estimate.
|
|
1160
871
|
test.todo(
|
|
1161
872
|
"forced compaction targets a lower budget when estimation has been inaccurate",
|
|
1162
873
|
async () => {
|
|
1163
874
|
const events: ServerMessage[] = [];
|
|
1164
|
-
let callCount = 0;
|
|
1165
875
|
let capturedTargetTokens: number | undefined;
|
|
1166
876
|
|
|
1167
877
|
// Estimator says 185k (below 190k budget = 200k * 0.95)
|
|
@@ -1197,55 +907,16 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
1197
907
|
};
|
|
1198
908
|
};
|
|
1199
909
|
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
onEvent({
|
|
1208
|
-
type: "error",
|
|
1209
|
-
error: new Error(
|
|
1210
|
-
"prompt is too long: 242201 tokens > 200000 maximum",
|
|
1211
|
-
),
|
|
1212
|
-
});
|
|
1213
|
-
onEvent({
|
|
1214
|
-
type: "usage",
|
|
1215
|
-
inputTokens: 0,
|
|
1216
|
-
outputTokens: 0,
|
|
1217
|
-
model: "test-model",
|
|
1218
|
-
providerDurationMs: 10,
|
|
1219
|
-
});
|
|
1220
|
-
// No progress — return same messages
|
|
1221
|
-
return messages;
|
|
1222
|
-
}
|
|
1223
|
-
// Second call succeeds after compaction
|
|
1224
|
-
onEvent({
|
|
1225
|
-
type: "message_complete",
|
|
1226
|
-
message: {
|
|
1227
|
-
role: "assistant",
|
|
1228
|
-
content: [{ type: "text", text: "recovered" }],
|
|
1229
|
-
},
|
|
1230
|
-
});
|
|
1231
|
-
onEvent({
|
|
1232
|
-
type: "usage",
|
|
1233
|
-
inputTokens: 80_000,
|
|
1234
|
-
outputTokens: 200,
|
|
1235
|
-
model: "test-model",
|
|
1236
|
-
providerDurationMs: 500,
|
|
1237
|
-
});
|
|
1238
|
-
return [
|
|
1239
|
-
...messages,
|
|
1240
|
-
{
|
|
1241
|
-
role: "assistant" as const,
|
|
1242
|
-
content: [{ type: "text", text: "recovered" }] as ContentBlock[],
|
|
1243
|
-
},
|
|
1244
|
-
];
|
|
1245
|
-
};
|
|
910
|
+
// The provider rejects the first call with a context_too_large error
|
|
911
|
+
// (actual tokens 242201, far above the 185k estimate); after forced
|
|
912
|
+
// compaction re-targets a lower budget, the rerun recovers with text.
|
|
913
|
+
const { provider, calls } = createMockProvider([
|
|
914
|
+
new Error("prompt is too long: 242201 tokens > 200000 maximum"),
|
|
915
|
+
textResponse("recovered"),
|
|
916
|
+
]);
|
|
1246
917
|
|
|
1247
918
|
const ctx = makeCtx({
|
|
1248
|
-
|
|
919
|
+
loopProvider: provider,
|
|
1249
920
|
contextWindowManager: {
|
|
1250
921
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
1251
922
|
maybeCompact: async () => ({ compacted: false }),
|
|
@@ -1275,7 +946,7 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
1275
946
|
(e) => e.type === "conversation_error",
|
|
1276
947
|
);
|
|
1277
948
|
expect(conversationError).toBeUndefined();
|
|
1278
|
-
expect(
|
|
949
|
+
expect(calls.length).toBe(2);
|
|
1279
950
|
},
|
|
1280
951
|
);
|
|
1281
952
|
|
|
@@ -1289,7 +960,6 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
1289
960
|
async () => {
|
|
1290
961
|
const events: ServerMessage[] = [];
|
|
1291
962
|
const longHistory = buildLongConversation(75);
|
|
1292
|
-
let callCount = 0;
|
|
1293
963
|
let reducerCalled = false;
|
|
1294
964
|
|
|
1295
965
|
// Estimator says ~195k — just above budget so preflight reducer runs
|
|
@@ -1325,38 +995,14 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
1325
995
|
};
|
|
1326
996
|
};
|
|
1327
997
|
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
onEvent({
|
|
1334
|
-
type: "message_complete",
|
|
1335
|
-
message: {
|
|
1336
|
-
role: "assistant",
|
|
1337
|
-
content: [{ type: "text", text: "Here's the analysis..." }],
|
|
1338
|
-
},
|
|
1339
|
-
});
|
|
1340
|
-
onEvent({
|
|
1341
|
-
type: "usage",
|
|
1342
|
-
inputTokens: 50_000,
|
|
1343
|
-
outputTokens: 300,
|
|
1344
|
-
model: "test-model",
|
|
1345
|
-
providerDurationMs: 800,
|
|
1346
|
-
});
|
|
1347
|
-
return [
|
|
1348
|
-
...messages,
|
|
1349
|
-
{
|
|
1350
|
-
role: "assistant" as const,
|
|
1351
|
-
content: [
|
|
1352
|
-
{ type: "text", text: "Here's the analysis..." },
|
|
1353
|
-
] as ContentBlock[],
|
|
1354
|
-
},
|
|
1355
|
-
];
|
|
1356
|
-
};
|
|
998
|
+
// After the preflight reducer compacts the long history under budget,
|
|
999
|
+
// a single provider call completes the turn with plain text.
|
|
1000
|
+
const { provider, calls } = createMockProvider([
|
|
1001
|
+
textResponse("Here's the analysis..."),
|
|
1002
|
+
]);
|
|
1357
1003
|
|
|
1358
1004
|
const ctx = makeCtx({
|
|
1359
|
-
|
|
1005
|
+
loopProvider: provider,
|
|
1360
1006
|
messages: longHistory,
|
|
1361
1007
|
contextWindowManager: {
|
|
1362
1008
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
@@ -1371,7 +1017,7 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
1371
1017
|
// Preflight should trigger the reducer since 195k > 190k budget
|
|
1372
1018
|
expect(reducerCalled).toBe(true);
|
|
1373
1019
|
// Should succeed
|
|
1374
|
-
expect(
|
|
1020
|
+
expect(calls.length).toBe(1);
|
|
1375
1021
|
const conversationError = events.find(
|
|
1376
1022
|
(e) => e.type === "conversation_error",
|
|
1377
1023
|
);
|
|
@@ -1415,118 +1061,31 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
1415
1061
|
};
|
|
1416
1062
|
};
|
|
1417
1063
|
|
|
1418
|
-
|
|
1419
|
-
|
|
1420
|
-
|
|
1421
|
-
|
|
1422
|
-
|
|
1423
|
-
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
const progressMessages: Message[] = [
|
|
1427
|
-
...messages,
|
|
1428
|
-
{
|
|
1429
|
-
role: "assistant" as const,
|
|
1430
|
-
content: [
|
|
1431
|
-
{ type: "text", text: "Running analysis..." },
|
|
1432
|
-
{
|
|
1433
|
-
type: "tool_use",
|
|
1434
|
-
id: "tu-1",
|
|
1435
|
-
name: "bash",
|
|
1436
|
-
input: { command: "find . -name '*.ts'" },
|
|
1437
|
-
},
|
|
1438
|
-
] as ContentBlock[],
|
|
1439
|
-
},
|
|
1440
|
-
{
|
|
1441
|
-
role: "user" as const,
|
|
1442
|
-
content: [
|
|
1443
|
-
{
|
|
1444
|
-
type: "tool_result",
|
|
1445
|
-
tool_use_id: "tu-1",
|
|
1446
|
-
content: "file1.ts\nfile2.ts\nfile3.ts",
|
|
1447
|
-
is_error: false,
|
|
1448
|
-
},
|
|
1449
|
-
] as ContentBlock[],
|
|
1450
|
-
},
|
|
1451
|
-
];
|
|
1064
|
+
// Run 1 makes progress (a tool turn) then the following provider call
|
|
1065
|
+
// rejects with context_too_large; after emergency compaction the rerun
|
|
1066
|
+
// recovers with plain text.
|
|
1067
|
+
const { provider } = createMockProvider([
|
|
1068
|
+
toolUseResponse("tu-1", "bash", { command: "find . -name '*.ts'" }),
|
|
1069
|
+
new Error("context_length_exceeded"),
|
|
1070
|
+
textResponse("recovered"),
|
|
1071
|
+
]);
|
|
1452
1072
|
|
|
1453
|
-
|
|
1454
|
-
|
|
1455
|
-
|
|
1073
|
+
const ctx = makeCtx({
|
|
1074
|
+
loopProvider: provider,
|
|
1075
|
+
loopTools: [
|
|
1076
|
+
{
|
|
1456
1077
|
name: "bash",
|
|
1457
|
-
|
|
1458
|
-
|
|
1459
|
-
|
|
1460
|
-
|
|
1461
|
-
toolUseId: "tu-1",
|
|
1462
|
-
content: "file1.ts\nfile2.ts\nfile3.ts",
|
|
1463
|
-
isError: false,
|
|
1464
|
-
});
|
|
1465
|
-
onEvent({
|
|
1466
|
-
type: "message_complete",
|
|
1467
|
-
message: {
|
|
1468
|
-
role: "assistant",
|
|
1469
|
-
content: [
|
|
1470
|
-
{ type: "text", text: "Running analysis..." },
|
|
1471
|
-
{
|
|
1472
|
-
type: "tool_use",
|
|
1473
|
-
id: "tu-1",
|
|
1474
|
-
name: "bash",
|
|
1475
|
-
input: { command: "find . -name '*.ts'" },
|
|
1476
|
-
},
|
|
1477
|
-
],
|
|
1078
|
+
description: "Run a shell command",
|
|
1079
|
+
input_schema: {
|
|
1080
|
+
type: "object",
|
|
1081
|
+
properties: { command: { type: "string" } },
|
|
1478
1082
|
},
|
|
1479
|
-
});
|
|
1480
|
-
onEvent({
|
|
1481
|
-
type: "usage",
|
|
1482
|
-
inputTokens: 190_000,
|
|
1483
|
-
outputTokens: 100,
|
|
1484
|
-
model: "test-model",
|
|
1485
|
-
providerDurationMs: 200,
|
|
1486
|
-
});
|
|
1487
|
-
|
|
1488
|
-
// Then context_too_large on the next LLM call within the loop
|
|
1489
|
-
onEvent({
|
|
1490
|
-
type: "error",
|
|
1491
|
-
error: new Error("context_length_exceeded"),
|
|
1492
|
-
});
|
|
1493
|
-
onEvent({
|
|
1494
|
-
type: "usage",
|
|
1495
|
-
inputTokens: 0,
|
|
1496
|
-
outputTokens: 0,
|
|
1497
|
-
model: "test-model",
|
|
1498
|
-
providerDurationMs: 10,
|
|
1499
|
-
});
|
|
1500
|
-
|
|
1501
|
-
return progressMessages;
|
|
1502
|
-
}
|
|
1503
|
-
|
|
1504
|
-
// After emergency compaction, succeed
|
|
1505
|
-
onEvent({
|
|
1506
|
-
type: "message_complete",
|
|
1507
|
-
message: {
|
|
1508
|
-
role: "assistant",
|
|
1509
|
-
content: [{ type: "text", text: "recovered" }],
|
|
1510
1083
|
},
|
|
1511
|
-
|
|
1512
|
-
|
|
1513
|
-
|
|
1514
|
-
|
|
1515
|
-
|
|
1516
|
-
model: "test-model",
|
|
1517
|
-
providerDurationMs: 200,
|
|
1518
|
-
});
|
|
1519
|
-
return [
|
|
1520
|
-
...messages,
|
|
1521
|
-
{
|
|
1522
|
-
role: "assistant" as const,
|
|
1523
|
-
content: [{ type: "text", text: "recovered" }] as ContentBlock[],
|
|
1524
|
-
},
|
|
1525
|
-
];
|
|
1526
|
-
};
|
|
1527
|
-
|
|
1528
|
-
const ctx = makeCtx({
|
|
1529
|
-
agentLoopRun,
|
|
1084
|
+
],
|
|
1085
|
+
toolExecutor: async () => ({
|
|
1086
|
+
content: "file1.ts\nfile2.ts\nfile3.ts",
|
|
1087
|
+
isError: false,
|
|
1088
|
+
}),
|
|
1530
1089
|
contextWindowManager: {
|
|
1531
1090
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
1532
1091
|
maybeCompact: async (
|
|
@@ -1603,111 +1162,30 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
1603
1162
|
return 170_000;
|
|
1604
1163
|
};
|
|
1605
1164
|
|
|
1606
|
-
|
|
1607
|
-
|
|
1608
|
-
|
|
1609
|
-
|
|
1610
|
-
|
|
1611
|
-
|
|
1612
|
-
|
|
1613
|
-
if (agentLoopCallCount === 1) {
|
|
1614
|
-
// Simulate a tool round: assistant calls a tool, results come back
|
|
1615
|
-
const withProgress: Message[] = [
|
|
1616
|
-
...messages,
|
|
1617
|
-
{
|
|
1618
|
-
role: "assistant" as const,
|
|
1619
|
-
content: [
|
|
1620
|
-
{ type: "text", text: "Let me check." },
|
|
1621
|
-
{
|
|
1622
|
-
type: "tool_use",
|
|
1623
|
-
id: "tu-1",
|
|
1624
|
-
name: "bash",
|
|
1625
|
-
input: { command: "ls" },
|
|
1626
|
-
},
|
|
1627
|
-
] as ContentBlock[],
|
|
1628
|
-
},
|
|
1629
|
-
{
|
|
1630
|
-
role: "user" as const,
|
|
1631
|
-
content: [
|
|
1632
|
-
{
|
|
1633
|
-
type: "tool_result",
|
|
1634
|
-
tool_use_id: "tu-1",
|
|
1635
|
-
content: "file1.ts\nfile2.ts",
|
|
1636
|
-
is_error: false,
|
|
1637
|
-
},
|
|
1638
|
-
] as ContentBlock[],
|
|
1639
|
-
},
|
|
1640
|
-
];
|
|
1641
|
-
|
|
1642
|
-
onEvent({
|
|
1643
|
-
type: "message_complete",
|
|
1644
|
-
message: {
|
|
1645
|
-
role: "assistant",
|
|
1646
|
-
content: [
|
|
1647
|
-
{ type: "text", text: "Let me check." },
|
|
1648
|
-
{
|
|
1649
|
-
type: "tool_use",
|
|
1650
|
-
id: "tu-1",
|
|
1651
|
-
name: "bash",
|
|
1652
|
-
input: { command: "ls" },
|
|
1653
|
-
},
|
|
1654
|
-
],
|
|
1655
|
-
},
|
|
1656
|
-
});
|
|
1657
|
-
onEvent({
|
|
1658
|
-
type: "usage",
|
|
1659
|
-
inputTokens: 100,
|
|
1660
|
-
outputTokens: 50,
|
|
1661
|
-
model: "test-model",
|
|
1662
|
-
providerDurationMs: 100,
|
|
1663
|
-
});
|
|
1664
|
-
|
|
1665
|
-
// Call onCheckpoint — this should trigger the mid-loop budget check
|
|
1666
|
-
// which sees 170_000 > 161_500 and returns "yield"
|
|
1667
|
-
if (options?.onCheckpoint) {
|
|
1668
|
-
const decision = await options.onCheckpoint({
|
|
1669
|
-
turnIndex: 0,
|
|
1670
|
-
toolCount: 1,
|
|
1671
|
-
hasToolUse: true,
|
|
1672
|
-
history: withProgress,
|
|
1673
|
-
});
|
|
1674
|
-
if (decision !== "continue") {
|
|
1675
|
-
// Agent loop stops when checkpoint yields
|
|
1676
|
-
return withProgress;
|
|
1677
|
-
}
|
|
1678
|
-
}
|
|
1679
|
-
|
|
1680
|
-
return withProgress;
|
|
1681
|
-
}
|
|
1165
|
+
// A tool round trips the mid-loop budget gate (170k > 161_500); the
|
|
1166
|
+
// gate compacts in place (productive) and the loop continues, so the
|
|
1167
|
+
// post-compaction provider call completes the turn with plain text.
|
|
1168
|
+
const { provider, calls } = createMockProvider([
|
|
1169
|
+
toolUseResponse("tu-1", "bash", { command: "ls" }),
|
|
1170
|
+
textResponse("done after compaction"),
|
|
1171
|
+
]);
|
|
1682
1172
|
|
|
1683
|
-
|
|
1684
|
-
|
|
1685
|
-
|
|
1686
|
-
message: {
|
|
1687
|
-
role: "assistant",
|
|
1688
|
-
content: [{ type: "text", text: "done after compaction" }],
|
|
1689
|
-
},
|
|
1690
|
-
});
|
|
1691
|
-
onEvent({
|
|
1692
|
-
type: "usage",
|
|
1693
|
-
inputTokens: 50,
|
|
1694
|
-
outputTokens: 25,
|
|
1695
|
-
model: "test-model",
|
|
1696
|
-
providerDurationMs: 100,
|
|
1697
|
-
});
|
|
1698
|
-
return [
|
|
1699
|
-
...messages,
|
|
1173
|
+
const ctx = makeCtx({
|
|
1174
|
+
loopProvider: provider,
|
|
1175
|
+
loopTools: [
|
|
1700
1176
|
{
|
|
1701
|
-
|
|
1702
|
-
|
|
1703
|
-
|
|
1704
|
-
|
|
1177
|
+
name: "bash",
|
|
1178
|
+
description: "Run a shell command",
|
|
1179
|
+
input_schema: {
|
|
1180
|
+
type: "object",
|
|
1181
|
+
properties: { command: { type: "string" } },
|
|
1182
|
+
},
|
|
1705
1183
|
},
|
|
1706
|
-
]
|
|
1707
|
-
|
|
1708
|
-
|
|
1709
|
-
|
|
1710
|
-
|
|
1184
|
+
],
|
|
1185
|
+
toolExecutor: async () => ({
|
|
1186
|
+
content: "file1.ts\nfile2.ts",
|
|
1187
|
+
isError: false,
|
|
1188
|
+
}),
|
|
1711
1189
|
contextWindowManager: {
|
|
1712
1190
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
1713
1191
|
maybeCompact: async () => {
|
|
@@ -1741,8 +1219,9 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
1741
1219
|
// The mid-loop budget check should have triggered compaction
|
|
1742
1220
|
expect(compactionCalled).toBe(true);
|
|
1743
1221
|
|
|
1744
|
-
//
|
|
1745
|
-
|
|
1222
|
+
// Provider called twice: the tool turn that tripped the gate, then the
|
|
1223
|
+
// post-compaction turn that completed the run.
|
|
1224
|
+
expect(calls.length).toBe(2);
|
|
1746
1225
|
|
|
1747
1226
|
// No conversation_error should be emitted
|
|
1748
1227
|
const conversationError = events.find(
|
|
@@ -1783,104 +1262,36 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
1783
1262
|
return 175_000;
|
|
1784
1263
|
};
|
|
1785
1264
|
|
|
1786
|
-
let agentLoopCallCount = 0;
|
|
1787
1265
|
let contextTooLargeEmitted = false;
|
|
1788
1266
|
|
|
1789
|
-
|
|
1790
|
-
|
|
1791
|
-
|
|
1792
|
-
|
|
1793
|
-
|
|
1794
|
-
|
|
1795
|
-
|
|
1796
|
-
|
|
1797
|
-
|
|
1798
|
-
|
|
1799
|
-
|
|
1800
|
-
const toolId = `tu-${i}`;
|
|
1801
|
-
const assistantMsg: Message = {
|
|
1802
|
-
role: "assistant" as const,
|
|
1803
|
-
content: [
|
|
1804
|
-
{ type: "text", text: `Step ${i}` },
|
|
1805
|
-
{
|
|
1806
|
-
type: "tool_use",
|
|
1807
|
-
id: toolId,
|
|
1808
|
-
name: "bash",
|
|
1809
|
-
input: { command: `cmd-${i}` },
|
|
1810
|
-
},
|
|
1811
|
-
] as ContentBlock[],
|
|
1812
|
-
};
|
|
1813
|
-
const resultMsg: Message = {
|
|
1814
|
-
role: "user" as const,
|
|
1815
|
-
content: [
|
|
1816
|
-
{
|
|
1817
|
-
type: "tool_result",
|
|
1818
|
-
tool_use_id: toolId,
|
|
1819
|
-
content: "x".repeat(10_000),
|
|
1820
|
-
is_error: false,
|
|
1821
|
-
},
|
|
1822
|
-
] as ContentBlock[],
|
|
1823
|
-
};
|
|
1824
|
-
currentHistory.push(assistantMsg, resultMsg);
|
|
1825
|
-
|
|
1826
|
-
onEvent({
|
|
1827
|
-
type: "message_complete",
|
|
1828
|
-
message: assistantMsg,
|
|
1829
|
-
});
|
|
1830
|
-
onEvent({
|
|
1831
|
-
type: "usage",
|
|
1832
|
-
inputTokens: 50_000 + i * 20_000,
|
|
1833
|
-
outputTokens: 50,
|
|
1834
|
-
model: "test-model",
|
|
1835
|
-
providerDurationMs: 100,
|
|
1836
|
-
});
|
|
1837
|
-
|
|
1838
|
-
if (options?.onCheckpoint) {
|
|
1839
|
-
const decision = await options.onCheckpoint({
|
|
1840
|
-
turnIndex: i,
|
|
1841
|
-
toolCount: 1,
|
|
1842
|
-
hasToolUse: true,
|
|
1843
|
-
history: currentHistory,
|
|
1844
|
-
});
|
|
1845
|
-
if (decision !== "continue") {
|
|
1846
|
-
return currentHistory;
|
|
1847
|
-
}
|
|
1848
|
-
}
|
|
1849
|
-
}
|
|
1267
|
+
// Each tool round produces a large result; the estimate grows with each
|
|
1268
|
+
// checkpoint until tool round 3 trips the mid-loop gate (175k > 161_500).
|
|
1269
|
+
// Compaction runs in place (productive) and the loop continues, so the
|
|
1270
|
+
// following plain-text provider call completes the turn. The provider
|
|
1271
|
+
// never rejects with context_too_large.
|
|
1272
|
+
const { provider, calls } = createMockProvider([
|
|
1273
|
+
toolUseResponse("tu-0", "bash", { command: "cmd-0" }),
|
|
1274
|
+
toolUseResponse("tu-1", "bash", { command: "cmd-1" }),
|
|
1275
|
+
toolUseResponse("tu-2", "bash", { command: "cmd-2" }),
|
|
1276
|
+
textResponse("completed after mid-loop compaction"),
|
|
1277
|
+
]);
|
|
1850
1278
|
|
|
1851
|
-
|
|
1852
|
-
|
|
1853
|
-
|
|
1854
|
-
// Second call (after compaction): complete
|
|
1855
|
-
onEvent({
|
|
1856
|
-
type: "message_complete",
|
|
1857
|
-
message: {
|
|
1858
|
-
role: "assistant",
|
|
1859
|
-
content: [
|
|
1860
|
-
{ type: "text", text: "completed after mid-loop compaction" },
|
|
1861
|
-
],
|
|
1862
|
-
},
|
|
1863
|
-
});
|
|
1864
|
-
onEvent({
|
|
1865
|
-
type: "usage",
|
|
1866
|
-
inputTokens: 60_000,
|
|
1867
|
-
outputTokens: 100,
|
|
1868
|
-
model: "test-model",
|
|
1869
|
-
providerDurationMs: 200,
|
|
1870
|
-
});
|
|
1871
|
-
return [
|
|
1872
|
-
...messages,
|
|
1279
|
+
const ctx = makeCtx({
|
|
1280
|
+
loopProvider: provider,
|
|
1281
|
+
loopTools: [
|
|
1873
1282
|
{
|
|
1874
|
-
|
|
1875
|
-
|
|
1876
|
-
|
|
1877
|
-
|
|
1283
|
+
name: "bash",
|
|
1284
|
+
description: "Run a shell command",
|
|
1285
|
+
input_schema: {
|
|
1286
|
+
type: "object",
|
|
1287
|
+
properties: { command: { type: "string" } },
|
|
1288
|
+
},
|
|
1878
1289
|
},
|
|
1879
|
-
]
|
|
1880
|
-
|
|
1881
|
-
|
|
1882
|
-
|
|
1883
|
-
|
|
1290
|
+
],
|
|
1291
|
+
toolExecutor: async () => ({
|
|
1292
|
+
content: "x".repeat(10_000),
|
|
1293
|
+
isError: false,
|
|
1294
|
+
}),
|
|
1884
1295
|
contextWindowManager: {
|
|
1885
1296
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
1886
1297
|
maybeCompact: async () => {
|
|
@@ -1927,8 +1338,9 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
1927
1338
|
// The provider should NEVER have rejected with context_too_large
|
|
1928
1339
|
expect(contextTooLargeEmitted).toBe(false);
|
|
1929
1340
|
|
|
1930
|
-
//
|
|
1931
|
-
|
|
1341
|
+
// Provider called four times: three tool rounds (the third trips the
|
|
1342
|
+
// mid-loop gate) plus the post-compaction text turn that completes.
|
|
1343
|
+
expect(calls.length).toBe(4);
|
|
1932
1344
|
|
|
1933
1345
|
// No conversation_error
|
|
1934
1346
|
const conversationError = events.find(
|
|
@@ -1957,82 +1369,7 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
1957
1369
|
return 170_000;
|
|
1958
1370
|
};
|
|
1959
1371
|
|
|
1960
|
-
|
|
1961
|
-
const agentLoopRun: AgentLoopRun = async (messages, onEvent, options) => {
|
|
1962
|
-
// Prime the assistant row anchor — production code emits this from
|
|
1963
|
-
// `AgentLoop.run` just before `provider.sendMessage`.
|
|
1964
|
-
await onEvent({ type: "llm_call_started" });
|
|
1965
|
-
agentLoopCallCount++;
|
|
1966
|
-
|
|
1967
|
-
// Every call: simulate tool progress then yield at checkpoint
|
|
1968
|
-
const withProgress: Message[] = [
|
|
1969
|
-
...messages,
|
|
1970
|
-
{
|
|
1971
|
-
role: "assistant" as const,
|
|
1972
|
-
content: [
|
|
1973
|
-
{ type: "text", text: `Tool call ${agentLoopCallCount}` },
|
|
1974
|
-
{
|
|
1975
|
-
type: "tool_use",
|
|
1976
|
-
id: `tu-${agentLoopCallCount}`,
|
|
1977
|
-
name: "bash",
|
|
1978
|
-
input: { command: "ls" },
|
|
1979
|
-
},
|
|
1980
|
-
] as ContentBlock[],
|
|
1981
|
-
},
|
|
1982
|
-
{
|
|
1983
|
-
role: "user" as const,
|
|
1984
|
-
content: [
|
|
1985
|
-
{
|
|
1986
|
-
type: "tool_result",
|
|
1987
|
-
tool_use_id: `tu-${agentLoopCallCount}`,
|
|
1988
|
-
content: "output",
|
|
1989
|
-
is_error: false,
|
|
1990
|
-
},
|
|
1991
|
-
] as ContentBlock[],
|
|
1992
|
-
},
|
|
1993
|
-
];
|
|
1994
|
-
|
|
1995
|
-
onEvent({
|
|
1996
|
-
type: "message_complete",
|
|
1997
|
-
message: {
|
|
1998
|
-
role: "assistant",
|
|
1999
|
-
content: [
|
|
2000
|
-
{ type: "text", text: `Tool call ${agentLoopCallCount}` },
|
|
2001
|
-
{
|
|
2002
|
-
type: "tool_use",
|
|
2003
|
-
id: `tu-${agentLoopCallCount}`,
|
|
2004
|
-
name: "bash",
|
|
2005
|
-
input: { command: "ls" },
|
|
2006
|
-
},
|
|
2007
|
-
],
|
|
2008
|
-
},
|
|
2009
|
-
});
|
|
2010
|
-
onEvent({
|
|
2011
|
-
type: "usage",
|
|
2012
|
-
inputTokens: 100,
|
|
2013
|
-
outputTokens: 50,
|
|
2014
|
-
model: "test-model",
|
|
2015
|
-
providerDurationMs: 100,
|
|
2016
|
-
});
|
|
2017
|
-
|
|
2018
|
-
// Always yield at checkpoint — simulates compaction not helping
|
|
2019
|
-
if (options?.onCheckpoint) {
|
|
2020
|
-
const decision = await options.onCheckpoint({
|
|
2021
|
-
turnIndex: 0,
|
|
2022
|
-
toolCount: 1,
|
|
2023
|
-
hasToolUse: true,
|
|
2024
|
-
history: withProgress,
|
|
2025
|
-
});
|
|
2026
|
-
if (decision !== "continue") {
|
|
2027
|
-
return withProgress;
|
|
2028
|
-
}
|
|
2029
|
-
}
|
|
2030
|
-
|
|
2031
|
-
return withProgress;
|
|
2032
|
-
};
|
|
2033
|
-
|
|
2034
|
-
let compactionCallCount = 0;
|
|
2035
|
-
// Convergence reducer: reduce tokens enough to succeed
|
|
1372
|
+
// The convergence reducer reduces tokens enough for the rerun to recover.
|
|
2036
1373
|
let convergenceReducerCalled = false;
|
|
2037
1374
|
mockReducerStepFn = (msgs: Message[]) => {
|
|
2038
1375
|
convergenceReducerCalled = true;
|
|
@@ -2048,8 +1385,30 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
2048
1385
|
};
|
|
2049
1386
|
};
|
|
2050
1387
|
|
|
1388
|
+
// Every provider call returns a tool_use, so each loop run does a tool
|
|
1389
|
+
// turn that trips the mid-loop budget gate. On the initial run the gate
|
|
1390
|
+
// calls compaction (which surfaces `exhausted: true`); the convergence
|
|
1391
|
+
// rerun runs without a compaction hook and yields "budget" directly.
|
|
1392
|
+
// With the reducer exhausted, the convergence loop terminates with the
|
|
1393
|
+
// turn still over budget and the orchestrator stamps `context_too_large`.
|
|
1394
|
+
const { provider, calls } = createMockProvider([
|
|
1395
|
+
toolUseResponse("tu-1", "bash", { command: "ls" }),
|
|
1396
|
+
]);
|
|
1397
|
+
|
|
1398
|
+
let compactionCallCount = 0;
|
|
2051
1399
|
const ctx = makeCtx({
|
|
2052
|
-
|
|
1400
|
+
loopProvider: provider,
|
|
1401
|
+
loopTools: [
|
|
1402
|
+
{
|
|
1403
|
+
name: "bash",
|
|
1404
|
+
description: "Run a shell command",
|
|
1405
|
+
input_schema: {
|
|
1406
|
+
type: "object",
|
|
1407
|
+
properties: { command: { type: "string" } },
|
|
1408
|
+
},
|
|
1409
|
+
},
|
|
1410
|
+
],
|
|
1411
|
+
toolExecutor: async () => ({ content: "output", isError: false }),
|
|
2053
1412
|
contextWindowManager: {
|
|
2054
1413
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
2055
1414
|
maybeCompact: async () => {
|
|
@@ -2057,9 +1416,9 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
2057
1416
|
// Compaction's internal retry budget is exhausted — the
|
|
2058
1417
|
// compactor itself ran maxAttempts passes and still couldn't
|
|
2059
1418
|
// drop below the auto-threshold. `maybeCompact` surfaces this
|
|
2060
|
-
// via `exhausted: true` so the
|
|
2061
|
-
// straight to the convergence loop
|
|
2062
|
-
// stuck compactor.
|
|
1419
|
+
// via `exhausted: true` so the loop yields "budget" and the
|
|
1420
|
+
// orchestrator escalates straight to the convergence loop
|
|
1421
|
+
// instead of looping on a stuck compactor.
|
|
2063
1422
|
return {
|
|
2064
1423
|
compacted: true,
|
|
2065
1424
|
exhausted: true,
|
|
@@ -2094,10 +1453,10 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
2094
1453
|
// `ContextWindowManager.maybeCompact`.
|
|
2095
1454
|
expect(compactionCallCount).toBe(2);
|
|
2096
1455
|
|
|
2097
|
-
//
|
|
2098
|
-
// mid-loop re-entries because the orchestrator
|
|
2099
|
-
// `exhausted` before re-invoking the
|
|
2100
|
-
expect(
|
|
1456
|
+
// Provider calls: 1 initial tool turn (yields budget) + 1 convergence
|
|
1457
|
+
// rerun that recovers. No mid-loop re-entries because the orchestrator
|
|
1458
|
+
// broke out on `exhausted` before re-invoking the loop.
|
|
1459
|
+
expect(calls.length).toBe(2);
|
|
2101
1460
|
|
|
2102
1461
|
// After the compactor exhausted itself, the convergence loop
|
|
2103
1462
|
// should have been triggered (contextTooLargeDetected set to true)
|
|
@@ -2132,83 +1491,32 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
2132
1491
|
return 170_000;
|
|
2133
1492
|
};
|
|
2134
1493
|
|
|
2135
|
-
// A single tool round reaches one checkpoint; the in-loop budget
|
|
2136
|
-
//
|
|
2137
|
-
// the
|
|
2138
|
-
// orchestrator
|
|
2139
|
-
|
|
2140
|
-
|
|
2141
|
-
|
|
2142
|
-
|
|
2143
|
-
|
|
2144
|
-
const withProgress: Message[] = [
|
|
2145
|
-
...messages,
|
|
2146
|
-
{
|
|
2147
|
-
role: "assistant" as const,
|
|
2148
|
-
content: [
|
|
2149
|
-
{ type: "text", text: `Tool call ${agentLoopCallCount}` },
|
|
2150
|
-
{
|
|
2151
|
-
type: "tool_use",
|
|
2152
|
-
id: `tu-${agentLoopCallCount}`,
|
|
2153
|
-
name: "bash",
|
|
2154
|
-
input: { command: "ls" },
|
|
2155
|
-
},
|
|
2156
|
-
] as ContentBlock[],
|
|
2157
|
-
},
|
|
2158
|
-
{
|
|
2159
|
-
role: "user" as const,
|
|
2160
|
-
content: [
|
|
2161
|
-
{
|
|
2162
|
-
type: "tool_result",
|
|
2163
|
-
tool_use_id: `tu-${agentLoopCallCount}`,
|
|
2164
|
-
content: "output",
|
|
2165
|
-
is_error: false,
|
|
2166
|
-
},
|
|
2167
|
-
] as ContentBlock[],
|
|
2168
|
-
},
|
|
2169
|
-
];
|
|
2170
|
-
|
|
2171
|
-
onEvent({
|
|
2172
|
-
type: "message_complete",
|
|
2173
|
-
message: {
|
|
2174
|
-
role: "assistant",
|
|
2175
|
-
content: [
|
|
2176
|
-
{ type: "text", text: `Tool call ${agentLoopCallCount}` },
|
|
2177
|
-
{
|
|
2178
|
-
type: "tool_use",
|
|
2179
|
-
id: `tu-${agentLoopCallCount}`,
|
|
2180
|
-
name: "bash",
|
|
2181
|
-
input: { command: "ls" },
|
|
2182
|
-
},
|
|
2183
|
-
],
|
|
2184
|
-
},
|
|
2185
|
-
});
|
|
2186
|
-
onEvent({
|
|
2187
|
-
type: "usage",
|
|
2188
|
-
inputTokens: 100,
|
|
2189
|
-
outputTokens: 50,
|
|
2190
|
-
model: "test-model",
|
|
2191
|
-
providerDurationMs: 100,
|
|
2192
|
-
});
|
|
2193
|
-
|
|
2194
|
-
if (options?.onCheckpoint) {
|
|
2195
|
-
await options.onCheckpoint({
|
|
2196
|
-
turnIndex: 0,
|
|
2197
|
-
toolCount: 1,
|
|
2198
|
-
hasToolUse: true,
|
|
2199
|
-
history: withProgress,
|
|
2200
|
-
});
|
|
2201
|
-
}
|
|
2202
|
-
|
|
2203
|
-
return withProgress;
|
|
2204
|
-
};
|
|
1494
|
+
// A single tool round reaches one checkpoint; the in-loop budget gate
|
|
1495
|
+
// trips there and compaction runs in place. The loop continues the run
|
|
1496
|
+
// itself — the following provider call returns plain text and the turn
|
|
1497
|
+
// completes — so the orchestrator never re-enters the convergence loop.
|
|
1498
|
+
const { provider, calls } = createMockProvider([
|
|
1499
|
+
toolUseResponse("tu-1", "bash", { command: "ls" }),
|
|
1500
|
+
textResponse("final answer"),
|
|
1501
|
+
]);
|
|
2205
1502
|
|
|
2206
1503
|
// Compaction reports `estimatedInputTokens` well below the 161_500
|
|
2207
1504
|
// threshold — the "compaction is productive" signal (no `exhausted`
|
|
2208
1505
|
// flag) that lets the loop continue in place.
|
|
2209
1506
|
let compactionCallCount = 0;
|
|
2210
1507
|
const ctx = makeCtx({
|
|
2211
|
-
|
|
1508
|
+
loopProvider: provider,
|
|
1509
|
+
loopTools: [
|
|
1510
|
+
{
|
|
1511
|
+
name: "bash",
|
|
1512
|
+
description: "Run a shell command",
|
|
1513
|
+
input_schema: {
|
|
1514
|
+
type: "object",
|
|
1515
|
+
properties: { command: { type: "string" } },
|
|
1516
|
+
},
|
|
1517
|
+
},
|
|
1518
|
+
],
|
|
1519
|
+
toolExecutor: async () => ({ content: "output", isError: false }),
|
|
2212
1520
|
contextWindowManager: {
|
|
2213
1521
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
2214
1522
|
maybeCompact: async () => {
|
|
@@ -2239,18 +1547,20 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
2239
1547
|
|
|
2240
1548
|
await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
|
|
2241
1549
|
|
|
2242
|
-
// 1 initial auto-compact + 1 productive mid-loop compaction.
|
|
2243
|
-
// loop continues in place after compacting, so the orchestrator
|
|
2244
|
-
// never re-enters `run()` — it is invoked exactly once.
|
|
1550
|
+
// 1 initial auto-compact + 1 productive mid-loop compaction.
|
|
2245
1551
|
expect(compactionCallCount).toBe(2);
|
|
2246
|
-
|
|
1552
|
+
// The loop continued in place after compacting: a tool turn followed by
|
|
1553
|
+
// the post-compaction text turn, both within a single run.
|
|
1554
|
+
expect(calls.length).toBe(2);
|
|
2247
1555
|
|
|
2248
1556
|
// No escalation to the convergence loop because the mid-loop
|
|
2249
|
-
// `maybeCompact` returned productive (no `exhausted` flag)
|
|
1557
|
+
// `maybeCompact` returned productive (no `exhausted` flag), and the turn
|
|
1558
|
+
// completed normally.
|
|
2250
1559
|
expect(setAgentLoopExitReasonOnLatestLogMock).not.toHaveBeenCalledWith(
|
|
2251
1560
|
"test-conv",
|
|
2252
1561
|
"context_too_large",
|
|
2253
1562
|
);
|
|
1563
|
+
expect(events.find((e) => e.type === "conversation_error")).toBeUndefined();
|
|
2254
1564
|
});
|
|
2255
1565
|
|
|
2256
1566
|
// ── Test 9 ────────────────────────────────────────────────────────
|
|
@@ -2272,78 +1582,13 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
2272
1582
|
return 170_000;
|
|
2273
1583
|
};
|
|
2274
1584
|
|
|
2275
|
-
|
|
2276
|
-
|
|
2277
|
-
|
|
2278
|
-
|
|
2279
|
-
|
|
2280
|
-
|
|
2281
|
-
|
|
2282
|
-
const withProgress: Message[] = [
|
|
2283
|
-
...messages,
|
|
2284
|
-
{
|
|
2285
|
-
role: "assistant" as const,
|
|
2286
|
-
content: [
|
|
2287
|
-
{ type: "text", text: `Tool call ${agentLoopCallCount}` },
|
|
2288
|
-
{
|
|
2289
|
-
type: "tool_use",
|
|
2290
|
-
id: `tu-${agentLoopCallCount}`,
|
|
2291
|
-
name: "bash",
|
|
2292
|
-
input: { command: "ls" },
|
|
2293
|
-
},
|
|
2294
|
-
] as ContentBlock[],
|
|
2295
|
-
},
|
|
2296
|
-
{
|
|
2297
|
-
role: "user" as const,
|
|
2298
|
-
content: [
|
|
2299
|
-
{
|
|
2300
|
-
type: "tool_result",
|
|
2301
|
-
tool_use_id: `tu-${agentLoopCallCount}`,
|
|
2302
|
-
content: "output",
|
|
2303
|
-
is_error: false,
|
|
2304
|
-
},
|
|
2305
|
-
] as ContentBlock[],
|
|
2306
|
-
},
|
|
2307
|
-
];
|
|
2308
|
-
|
|
2309
|
-
onEvent({
|
|
2310
|
-
type: "message_complete",
|
|
2311
|
-
message: {
|
|
2312
|
-
role: "assistant",
|
|
2313
|
-
content: [
|
|
2314
|
-
{ type: "text", text: `Tool call ${agentLoopCallCount}` },
|
|
2315
|
-
{
|
|
2316
|
-
type: "tool_use",
|
|
2317
|
-
id: `tu-${agentLoopCallCount}`,
|
|
2318
|
-
name: "bash",
|
|
2319
|
-
input: { command: "ls" },
|
|
2320
|
-
},
|
|
2321
|
-
],
|
|
2322
|
-
},
|
|
2323
|
-
});
|
|
2324
|
-
onEvent({
|
|
2325
|
-
type: "usage",
|
|
2326
|
-
inputTokens: 100,
|
|
2327
|
-
outputTokens: 50,
|
|
2328
|
-
model: "test-model",
|
|
2329
|
-
providerDurationMs: 100,
|
|
2330
|
-
});
|
|
2331
|
-
|
|
2332
|
-
// Always yield at checkpoint — simulates reduction not helping enough
|
|
2333
|
-
if (options?.onCheckpoint) {
|
|
2334
|
-
const decision = await options.onCheckpoint({
|
|
2335
|
-
turnIndex: 0,
|
|
2336
|
-
toolCount: 1,
|
|
2337
|
-
hasToolUse: true,
|
|
2338
|
-
history: withProgress,
|
|
2339
|
-
});
|
|
2340
|
-
if (decision !== "continue") {
|
|
2341
|
-
return withProgress;
|
|
2342
|
-
}
|
|
2343
|
-
}
|
|
2344
|
-
|
|
2345
|
-
return withProgress;
|
|
2346
|
-
};
|
|
1585
|
+
// Every provider call returns a tool_use, so each loop run does a tool
|
|
1586
|
+
// turn that trips the mid-loop budget gate and yields "budget". The
|
|
1587
|
+
// initial run's gate calls compaction (exhausted); the convergence
|
|
1588
|
+
// reruns run without a compaction hook and yield directly.
|
|
1589
|
+
const { provider, calls } = createMockProvider([
|
|
1590
|
+
toolUseResponse("tu-1", "bash", { command: "ls" }),
|
|
1591
|
+
]);
|
|
2347
1592
|
|
|
2348
1593
|
// Convergence reducer: first call returns non-exhausted, second returns exhausted
|
|
2349
1594
|
let reducerCallCount = 0;
|
|
@@ -2375,7 +1620,18 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
2375
1620
|
};
|
|
2376
1621
|
|
|
2377
1622
|
const ctx = makeCtx({
|
|
2378
|
-
|
|
1623
|
+
loopProvider: provider,
|
|
1624
|
+
loopTools: [
|
|
1625
|
+
{
|
|
1626
|
+
name: "bash",
|
|
1627
|
+
description: "Run a shell command",
|
|
1628
|
+
input_schema: {
|
|
1629
|
+
type: "object",
|
|
1630
|
+
properties: { command: { type: "string" } },
|
|
1631
|
+
},
|
|
1632
|
+
},
|
|
1633
|
+
],
|
|
1634
|
+
toolExecutor: async () => ({ content: "output", isError: false }),
|
|
2379
1635
|
contextWindowManager: {
|
|
2380
1636
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
2381
1637
|
// Under the new architecture (Compaction Re-homing Arc, Bullet 1)
|
|
@@ -2413,10 +1669,11 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
2413
1669
|
// once more after yieldedForBudget triggered re-entry
|
|
2414
1670
|
expect(reducerCallCount).toBe(2);
|
|
2415
1671
|
|
|
2416
|
-
//
|
|
2417
|
-
//
|
|
2418
|
-
//
|
|
2419
|
-
|
|
1672
|
+
// Provider calls: 1 initial run + 2 convergence reruns = 3 calls, each a
|
|
1673
|
+
// tool turn that yields "budget". The mid-loop no longer drives
|
|
1674
|
+
// daemon-level retries — the manager owns its retry budget and signals
|
|
1675
|
+
// exhaustion via the `exhausted` flag.
|
|
1676
|
+
expect(calls.length).toBe(3);
|
|
2420
1677
|
expect(setAgentLoopExitReasonOnLatestLogMock).toHaveBeenCalledWith(
|
|
2421
1678
|
"test-conv",
|
|
2422
1679
|
"context_too_large",
|
|
@@ -2516,35 +1773,10 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
2516
1773
|
};
|
|
2517
1774
|
};
|
|
2518
1775
|
|
|
2519
|
-
|
|
2520
|
-
|
|
2521
|
-
// `AgentLoop.run` just before `provider.sendMessage`.
|
|
2522
|
-
await onEvent({ type: "llm_call_started" });
|
|
2523
|
-
onEvent({
|
|
2524
|
-
type: "message_complete",
|
|
2525
|
-
message: {
|
|
2526
|
-
role: "assistant",
|
|
2527
|
-
content: [{ type: "text", text: "done" }],
|
|
2528
|
-
},
|
|
2529
|
-
});
|
|
2530
|
-
onEvent({
|
|
2531
|
-
type: "usage",
|
|
2532
|
-
inputTokens: 170_000,
|
|
2533
|
-
outputTokens: 200,
|
|
2534
|
-
model: "test-model",
|
|
2535
|
-
providerDurationMs: 500,
|
|
2536
|
-
});
|
|
2537
|
-
return [
|
|
2538
|
-
...messages,
|
|
2539
|
-
{
|
|
2540
|
-
role: "assistant" as const,
|
|
2541
|
-
content: [{ type: "text", text: "done" }] as ContentBlock[],
|
|
2542
|
-
},
|
|
2543
|
-
];
|
|
2544
|
-
};
|
|
2545
|
-
|
|
1776
|
+
// The preflight overflow reducer runs in the orchestrator before the loop,
|
|
1777
|
+
// so a single successful provider turn is enough to drive the path.
|
|
2546
1778
|
const ctx = makeCtx({
|
|
2547
|
-
|
|
1779
|
+
providerResponses: [textResponse("done")],
|
|
2548
1780
|
contextWindowManager: {
|
|
2549
1781
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
2550
1782
|
maybeCompact: async () => ({ compacted: false }),
|
|
@@ -2615,78 +1847,12 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
2615
1847
|
// emergency compaction + final agentLoop.run path executes.
|
|
2616
1848
|
mockOverflowAction = "auto_compress_latest_turn";
|
|
2617
1849
|
|
|
2618
|
-
|
|
2619
|
-
|
|
2620
|
-
|
|
2621
|
-
|
|
2622
|
-
|
|
2623
|
-
|
|
2624
|
-
|
|
2625
|
-
const withProgress: Message[] = [
|
|
2626
|
-
...messages,
|
|
2627
|
-
{
|
|
2628
|
-
role: "assistant" as const,
|
|
2629
|
-
content: [
|
|
2630
|
-
{ type: "text", text: `tool call ${agentLoopCallCount}` },
|
|
2631
|
-
{
|
|
2632
|
-
type: "tool_use",
|
|
2633
|
-
id: `tu-${agentLoopCallCount}`,
|
|
2634
|
-
name: "bash",
|
|
2635
|
-
input: { command: "ls" },
|
|
2636
|
-
},
|
|
2637
|
-
] as ContentBlock[],
|
|
2638
|
-
},
|
|
2639
|
-
{
|
|
2640
|
-
role: "user" as const,
|
|
2641
|
-
content: [
|
|
2642
|
-
{
|
|
2643
|
-
type: "tool_result",
|
|
2644
|
-
tool_use_id: `tu-${agentLoopCallCount}`,
|
|
2645
|
-
content: "output",
|
|
2646
|
-
is_error: false,
|
|
2647
|
-
},
|
|
2648
|
-
] as ContentBlock[],
|
|
2649
|
-
},
|
|
2650
|
-
];
|
|
2651
|
-
|
|
2652
|
-
onEvent({
|
|
2653
|
-
type: "message_complete",
|
|
2654
|
-
message: {
|
|
2655
|
-
role: "assistant",
|
|
2656
|
-
content: [
|
|
2657
|
-
{ type: "text", text: `tool call ${agentLoopCallCount}` },
|
|
2658
|
-
{
|
|
2659
|
-
type: "tool_use",
|
|
2660
|
-
id: `tu-${agentLoopCallCount}`,
|
|
2661
|
-
name: "bash",
|
|
2662
|
-
input: { command: "ls" },
|
|
2663
|
-
},
|
|
2664
|
-
],
|
|
2665
|
-
},
|
|
2666
|
-
});
|
|
2667
|
-
onEvent({
|
|
2668
|
-
type: "usage",
|
|
2669
|
-
inputTokens: 100,
|
|
2670
|
-
outputTokens: 50,
|
|
2671
|
-
model: "test-model",
|
|
2672
|
-
providerDurationMs: 100,
|
|
2673
|
-
});
|
|
2674
|
-
|
|
2675
|
-
// Every checkpoint yields — including the final auto_compress rerun.
|
|
2676
|
-
if (options?.onCheckpoint) {
|
|
2677
|
-
const decision = await options.onCheckpoint({
|
|
2678
|
-
turnIndex: 0,
|
|
2679
|
-
toolCount: 1,
|
|
2680
|
-
hasToolUse: true,
|
|
2681
|
-
history: withProgress,
|
|
2682
|
-
});
|
|
2683
|
-
if (decision !== "continue") {
|
|
2684
|
-
return withProgress;
|
|
2685
|
-
}
|
|
2686
|
-
}
|
|
2687
|
-
|
|
2688
|
-
return withProgress;
|
|
2689
|
-
};
|
|
1850
|
+
// Every provider call returns a tool_use, so each loop run does a tool
|
|
1851
|
+
// turn that trips the mid-loop budget gate and yields "budget" —
|
|
1852
|
+
// including the final auto_compress rerun.
|
|
1853
|
+
const { provider } = createMockProvider([
|
|
1854
|
+
toolUseResponse("tu-1", "bash", { command: "ls" }),
|
|
1855
|
+
]);
|
|
2690
1856
|
|
|
2691
1857
|
// `maybeCompact` is invoked through three distinct call sites:
|
|
2692
1858
|
// 1. Start-of-turn compaction (no `force` option) — return a no-op
|
|
@@ -2702,7 +1868,18 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
|
|
|
2702
1868
|
// as BUDGET_YIELD_UNRECOVERED.
|
|
2703
1869
|
let forcedMaybeCompactCallCount = 0;
|
|
2704
1870
|
const ctx = makeCtx({
|
|
2705
|
-
|
|
1871
|
+
loopProvider: provider,
|
|
1872
|
+
loopTools: [
|
|
1873
|
+
{
|
|
1874
|
+
name: "bash",
|
|
1875
|
+
description: "Run a shell command",
|
|
1876
|
+
input_schema: {
|
|
1877
|
+
type: "object",
|
|
1878
|
+
properties: { command: { type: "string" } },
|
|
1879
|
+
},
|
|
1880
|
+
},
|
|
1881
|
+
],
|
|
1882
|
+
toolExecutor: async () => ({ content: "output", isError: false }),
|
|
2706
1883
|
contextWindowManager: {
|
|
2707
1884
|
shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
|
|
2708
1885
|
maybeCompact: async (
|