@vellumai/assistant 0.4.17 → 0.4.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/docs/runbook-trusted-contacts.md +5 -3
- package/eslint.config.mjs +2 -2
- package/package.json +1 -1
- package/src/__tests__/access-request-decision.test.ts +128 -120
- package/src/__tests__/account-registry.test.ts +121 -110
- package/src/__tests__/active-skill-tools.test.ts +200 -172
- package/src/__tests__/actor-token-service.test.ts +341 -274
- package/src/__tests__/agent-loop-thinking.test.ts +28 -19
- package/src/__tests__/agent-loop.test.ts +798 -378
- package/src/__tests__/anthropic-provider.test.ts +405 -247
- package/src/__tests__/app-builder-tool-scripts.test.ts +97 -97
- package/src/__tests__/app-bundler.test.ts +112 -79
- package/src/__tests__/app-executors.test.ts +205 -178
- package/src/__tests__/app-git-history.test.ts +90 -73
- package/src/__tests__/app-git-service.test.ts +67 -53
- package/src/__tests__/app-open-proxy.test.ts +29 -25
- package/src/__tests__/approval-conversation-turn.test.ts +100 -81
- package/src/__tests__/approval-hardcoded-copy-guard.test.ts +45 -17
- package/src/__tests__/approval-message-composer.test.ts +119 -119
- package/src/__tests__/approval-primitive.test.ts +264 -233
- package/src/__tests__/approval-routes-http.test.ts +4 -3
- package/src/__tests__/asset-materialize-tool.test.ts +250 -178
- package/src/__tests__/asset-search-tool.test.ts +251 -191
- package/src/__tests__/assistant-attachment-directive.test.ts +187 -142
- package/src/__tests__/assistant-attachments.test.ts +254 -186
- package/src/__tests__/assistant-event-hub.test.ts +105 -63
- package/src/__tests__/assistant-event.test.ts +66 -58
- package/src/__tests__/assistant-events-sse-hardening.test.ts +113 -73
- package/src/__tests__/assistant-feature-flag-guard.test.ts +78 -52
- package/src/__tests__/assistant-feature-flag-guardrails.test.ts +48 -45
- package/src/__tests__/assistant-feature-flags-integration.test.ts +118 -77
- package/src/__tests__/assistant-id-boundary-guard.test.ts +158 -104
- package/src/__tests__/attachments-store.test.ts +240 -183
- package/src/__tests__/attachments.test.ts +70 -62
- package/src/__tests__/audit-log-rotation.test.ts +50 -35
- package/src/__tests__/browser-fill-credential.test.ts +169 -101
- package/src/__tests__/browser-manager.test.ts +97 -75
- package/src/__tests__/browser-runtime-check.test.ts +16 -15
- package/src/__tests__/browser-skill-baseline-tool-payload.test.ts +12 -10
- package/src/__tests__/browser-skill-endstate.test.ts +97 -72
- package/src/__tests__/bundle-scanner.test.ts +47 -22
- package/src/__tests__/bundled-asset.test.ts +74 -47
- package/src/__tests__/call-constants.test.ts +19 -19
- package/src/__tests__/call-controller.test.ts +0 -1
- package/src/__tests__/call-conversation-messages.test.ts +90 -65
- package/src/__tests__/call-domain.test.ts +149 -121
- package/src/__tests__/call-pointer-message-composer.test.ts +113 -83
- package/src/__tests__/call-pointer-messages.test.ts +213 -154
- package/src/__tests__/call-pointer-no-hardcoded-copy.guard.test.ts +9 -10
- package/src/__tests__/call-recovery.test.ts +232 -212
- package/src/__tests__/call-routes-http.test.ts +0 -1
- package/src/__tests__/call-start-guardian-guard.test.ts +32 -30
- package/src/__tests__/call-state-machine.test.ts +62 -51
- package/src/__tests__/call-state.test.ts +89 -75
- package/src/__tests__/call-store.test.ts +387 -316
- package/src/__tests__/callback-handoff-copy.test.ts +84 -82
- package/src/__tests__/canonical-guardian-store.test.ts +331 -280
- package/src/__tests__/channel-approval-routes.test.ts +1643 -1115
- package/src/__tests__/channel-approval.test.ts +139 -137
- package/src/__tests__/channel-approvals.test.ts +7 -2
- package/src/__tests__/channel-delivery-store.test.ts +232 -194
- package/src/__tests__/channel-guardian.test.ts +5 -3
- package/src/__tests__/channel-invite-transport.test.ts +107 -92
- package/src/__tests__/channel-policy.test.ts +42 -38
- package/src/__tests__/channel-readiness-service.test.ts +119 -102
- package/src/__tests__/channel-reply-delivery.test.ts +147 -118
- package/src/__tests__/channel-retry-sweep.test.ts +153 -110
- package/src/__tests__/checker.test.ts +3309 -1850
- package/src/__tests__/clarification-resolver.test.ts +91 -79
- package/src/__tests__/classifier.test.ts +64 -54
- package/src/__tests__/claude-code-skill-regression.test.ts +42 -37
- package/src/__tests__/claude-code-tool-profiles.test.ts +31 -29
- package/src/__tests__/clawhub.test.ts +92 -82
- package/src/__tests__/cli.test.ts +30 -30
- package/src/__tests__/clipboard.test.ts +53 -46
- package/src/__tests__/commit-guarantee.test.ts +59 -52
- package/src/__tests__/commit-message-enrichment-service.test.ts +203 -75
- package/src/__tests__/compaction.benchmark.test.ts +33 -31
- package/src/__tests__/computer-use-session-compaction.test.ts +60 -50
- package/src/__tests__/computer-use-session-lifecycle.test.ts +145 -117
- package/src/__tests__/computer-use-session-working-dir.test.ts +62 -48
- package/src/__tests__/computer-use-skill-baseline.test.ts +22 -19
- package/src/__tests__/computer-use-skill-endstate.test.ts +45 -31
- package/src/__tests__/computer-use-skill-lifecycle-cleanup.test.ts +121 -88
- package/src/__tests__/computer-use-skill-manifest-regression.test.ts +65 -42
- package/src/__tests__/computer-use-skill-proxy-bridge.test.ts +33 -18
- package/src/__tests__/computer-use-tools.test.ts +121 -98
- package/src/__tests__/config-schema.test.ts +443 -347
- package/src/__tests__/config-watcher.test.ts +96 -81
- package/src/__tests__/confirmation-request-guardian-bridge.test.ts +148 -133
- package/src/__tests__/conflict-intent-tokenization.test.ts +96 -78
- package/src/__tests__/conflict-policy.test.ts +151 -80
- package/src/__tests__/conflict-store.test.ts +203 -157
- package/src/__tests__/connection-policy.test.ts +89 -59
- package/src/__tests__/contacts-tools.test.ts +247 -178
- package/src/__tests__/context-memory-e2e.test.ts +306 -214
- package/src/__tests__/context-token-estimator.test.ts +114 -74
- package/src/__tests__/context-window-manager.test.ts +269 -167
- package/src/__tests__/contradiction-checker.test.ts +161 -135
- package/src/__tests__/conversation-attention-store.test.ts +350 -290
- package/src/__tests__/conversation-attention-telegram.test.ts +0 -1
- package/src/__tests__/conversation-pairing.test.ts +220 -113
- package/src/__tests__/conversation-routes-guardian-reply.test.ts +8 -0
- package/src/__tests__/conversation-store.test.ts +390 -235
- package/src/__tests__/credential-broker-browser-fill.test.ts +325 -250
- package/src/__tests__/credential-broker-server-use.test.ts +283 -243
- package/src/__tests__/credential-broker.test.ts +128 -74
- package/src/__tests__/credential-host-pattern-match.test.ts +64 -44
- package/src/__tests__/credential-metadata-store.test.ts +360 -311
- package/src/__tests__/credential-policy-validate.test.ts +81 -65
- package/src/__tests__/credential-resolve.test.ts +212 -145
- package/src/__tests__/credential-security-e2e.test.ts +144 -103
- package/src/__tests__/credential-security-invariants.test.ts +253 -208
- package/src/__tests__/credential-selection.test.ts +254 -146
- package/src/__tests__/credential-vault-unit.test.ts +531 -341
- package/src/__tests__/credential-vault.test.ts +761 -484
- package/src/__tests__/daemon-assistant-events.test.ts +91 -66
- package/src/__tests__/daemon-lifecycle.test.ts +258 -190
- package/src/__tests__/daemon-server-session-init.test.ts +2 -1
- package/src/__tests__/date-context.test.ts +314 -249
- package/src/__tests__/db-migration-rollback.test.ts +259 -130
- package/src/__tests__/db-schedule-syntax-migration.test.ts +78 -41
- package/src/__tests__/delete-managed-skill-tool.test.ts +77 -53
- package/src/__tests__/deterministic-verification-control-plane.test.ts +0 -1
- package/src/__tests__/dictation-mode-detection.test.ts +77 -55
- package/src/__tests__/dictation-profile-store.test.ts +70 -56
- package/src/__tests__/dictation-text-processing.test.ts +53 -35
- package/src/__tests__/diff.test.ts +102 -98
- package/src/__tests__/domain-normalize.test.ts +54 -54
- package/src/__tests__/domain-policy.test.ts +71 -55
- package/src/__tests__/dynamic-page-surface.test.ts +31 -33
- package/src/__tests__/dynamic-skill-workflow-prompt.test.ts +69 -69
- package/src/__tests__/edit-engine.test.ts +56 -56
- package/src/__tests__/elevenlabs-client.test.ts +117 -91
- package/src/__tests__/elevenlabs-config.test.ts +32 -31
- package/src/__tests__/email-classifier.test.ts +15 -12
- package/src/__tests__/email-cli.test.ts +121 -108
- package/src/__tests__/emit-signal-routing-intent.test.ts +76 -69
- package/src/__tests__/encrypted-store.test.ts +180 -154
- package/src/__tests__/entity-extractor.test.ts +108 -87
- package/src/__tests__/entity-search.test.ts +664 -258
- package/src/__tests__/ephemeral-permissions.test.ts +224 -188
- package/src/__tests__/event-bus.test.ts +81 -77
- package/src/__tests__/extract-email.test.ts +29 -20
- package/src/__tests__/file-edit-tool.test.ts +62 -44
- package/src/__tests__/file-ops-service.test.ts +131 -114
- package/src/__tests__/file-read-tool.test.ts +48 -31
- package/src/__tests__/file-write-tool.test.ts +43 -37
- package/src/__tests__/filesystem-tools.test.ts +238 -209
- package/src/__tests__/followup-tools.test.ts +237 -162
- package/src/__tests__/forbidden-legacy-symbols.test.ts +19 -20
- package/src/__tests__/frontmatter.test.ts +96 -81
- package/src/__tests__/fuzzy-match-property.test.ts +75 -81
- package/src/__tests__/fuzzy-match.test.ts +71 -65
- package/src/__tests__/gateway-client-managed-outbound.test.ts +76 -57
- package/src/__tests__/gateway-only-enforcement.test.ts +0 -1
- package/src/__tests__/gateway-only-guard.test.ts +0 -1
- package/src/__tests__/gemini-image-service.test.ts +113 -100
- package/src/__tests__/gemini-provider.test.ts +297 -220
- package/src/__tests__/get-weather.test.ts +188 -114
- package/src/__tests__/gmail-integration.test.ts +13 -5
- package/src/__tests__/guardian-action-conversation-turn.test.ts +226 -171
- package/src/__tests__/guardian-action-copy-generator.test.ts +111 -93
- package/src/__tests__/guardian-action-followup-executor.test.ts +0 -1
- package/src/__tests__/guardian-action-followup-store.test.ts +199 -167
- package/src/__tests__/guardian-action-grant-mint-consume.test.ts +297 -250
- package/src/__tests__/guardian-action-late-reply.test.ts +462 -316
- package/src/__tests__/guardian-action-no-hardcoded-copy.test.ts +23 -18
- package/src/__tests__/guardian-action-store.test.ts +158 -109
- package/src/__tests__/guardian-action-sweep.test.ts +114 -100
- package/src/__tests__/guardian-actions-endpoint.test.ts +440 -256
- package/src/__tests__/guardian-control-plane-policy.test.ts +497 -331
- package/src/__tests__/guardian-decision-primitive-canonical.test.ts +217 -215
- package/src/__tests__/guardian-dispatch.test.ts +316 -256
- package/src/__tests__/guardian-grant-minting.test.ts +247 -178
- package/src/__tests__/guardian-outbound-http.test.ts +5 -3
- package/src/__tests__/guardian-principal-id-roundtrip.test.ts +99 -96
- package/src/__tests__/guardian-question-copy.test.ts +17 -17
- package/src/__tests__/guardian-question-mode.test.ts +134 -100
- package/src/__tests__/guardian-routing-invariants.test.ts +0 -1
- package/src/__tests__/guardian-routing-state.test.ts +0 -1
- package/src/__tests__/guardian-verification-intent-routing.test.ts +94 -88
- package/src/__tests__/guardian-verification-voice-binding.test.ts +0 -1
- package/src/__tests__/guardian-verify-setup-skill-regression.test.ts +0 -1
- package/src/__tests__/handle-user-message-secret-resume.test.ts +7 -2
- package/src/__tests__/handlers-add-trust-rule-metadata.test.ts +92 -76
- package/src/__tests__/handlers-cu-observation-blob.test.ts +103 -70
- package/src/__tests__/handlers-ipc-blob-probe.test.ts +77 -51
- package/src/__tests__/handlers-slack-config.test.ts +63 -54
- package/src/__tests__/handlers-task-submit-slash.test.ts +18 -18
- package/src/__tests__/handlers-telegram-config.test.ts +662 -329
- package/src/__tests__/handlers-twitter-config.test.ts +525 -298
- package/src/__tests__/handlers-user-message-approval-consumption.test.ts +5 -2
- package/src/__tests__/headless-browser-interactions.test.ts +444 -280
- package/src/__tests__/headless-browser-navigate.test.ts +116 -79
- package/src/__tests__/headless-browser-read-tools.test.ts +123 -86
- package/src/__tests__/headless-browser-snapshot.test.ts +71 -56
- package/src/__tests__/heartbeat-service.test.ts +76 -58
- package/src/__tests__/history-repair-observability.test.ts +14 -14
- package/src/__tests__/history-repair.test.ts +171 -167
- package/src/__tests__/home-base-bootstrap.test.ts +30 -27
- package/src/__tests__/hooks-blocking.test.ts +86 -37
- package/src/__tests__/hooks-cli.test.ts +104 -68
- package/src/__tests__/hooks-config.test.ts +81 -43
- package/src/__tests__/hooks-discovery.test.ts +106 -96
- package/src/__tests__/hooks-integration.test.ts +78 -72
- package/src/__tests__/hooks-manager.test.ts +99 -61
- package/src/__tests__/hooks-runner.test.ts +94 -71
- package/src/__tests__/hooks-settings.test.ts +69 -64
- package/src/__tests__/hooks-templates.test.ts +85 -54
- package/src/__tests__/hooks-ts-runner.test.ts +82 -45
- package/src/__tests__/hooks-watch.test.ts +32 -22
- package/src/__tests__/host-file-edit-tool.test.ts +190 -148
- package/src/__tests__/host-file-read-tool.test.ts +86 -63
- package/src/__tests__/host-file-write-tool.test.ts +98 -64
- package/src/__tests__/host-shell-tool.test.ts +342 -233
- package/src/__tests__/inbound-invite-redemption.test.ts +0 -1
- package/src/__tests__/ingress-member-store.test.ts +163 -159
- package/src/__tests__/ingress-reconcile.test.ts +13 -6
- package/src/__tests__/ingress-routes-http.test.ts +441 -356
- package/src/__tests__/ingress-url-consistency.test.ts +125 -64
- package/src/__tests__/integration-status.test.ts +93 -73
- package/src/__tests__/intent-routing.test.ts +148 -118
- package/src/__tests__/invite-redemption-service.test.ts +163 -121
- package/src/__tests__/ipc-blob-store.test.ts +104 -91
- package/src/__tests__/ipc-contract-inventory.test.ts +27 -15
- package/src/__tests__/ipc-contract.test.ts +24 -23
- package/src/__tests__/ipc-protocol.test.ts +52 -46
- package/src/__tests__/ipc-roundtrip.benchmark.test.ts +61 -50
- package/src/__tests__/ipc-snapshot.test.ts +1135 -1056
- package/src/__tests__/ipc-validate.test.ts +240 -179
- package/src/__tests__/key-migration.test.ts +123 -90
- package/src/__tests__/keychain.test.ts +150 -123
- package/src/__tests__/lifecycle-docs-guard.test.ts +65 -64
- package/src/__tests__/llm-usage-store.test.ts +112 -87
- package/src/__tests__/managed-skill-lifecycle.test.ts +147 -108
- package/src/__tests__/managed-store.test.ts +411 -360
- package/src/__tests__/mcp-cli.test.ts +190 -124
- package/src/__tests__/mcp-health-check.test.ts +26 -21
- package/src/__tests__/media-generate-image.test.ts +122 -99
- package/src/__tests__/media-reuse-story.e2e.test.ts +282 -214
- package/src/__tests__/media-visibility-policy.test.ts +86 -38
- package/src/__tests__/memory-context-benchmark.benchmark.test.ts +146 -100
- package/src/__tests__/memory-lifecycle-e2e.test.ts +385 -297
- package/src/__tests__/memory-query-builder.test.ts +32 -33
- package/src/__tests__/memory-recall-quality.test.ts +761 -407
- package/src/__tests__/memory-regressions.experimental.test.ts +443 -380
- package/src/__tests__/memory-regressions.test.ts +3725 -2642
- package/src/__tests__/memory-retrieval-budget.test.ts +7 -8
- package/src/__tests__/memory-retrieval.benchmark.test.ts +144 -109
- package/src/__tests__/memory-upsert-concurrency.test.ts +292 -201
- package/src/__tests__/messaging-send-tool.test.ts +36 -29
- package/src/__tests__/migration-cli-flows.test.ts +69 -53
- package/src/__tests__/migration-ordering.test.ts +103 -86
- package/src/__tests__/mime-builder.test.ts +55 -32
- package/src/__tests__/mock-signup-server.test.ts +384 -246
- package/src/__tests__/model-intents.test.ts +61 -37
- package/src/__tests__/no-direct-anthropic-sdk-imports.test.ts +9 -12
- package/src/__tests__/no-is-trusted-guard.test.ts +24 -21
- package/src/__tests__/non-member-access-request.test.ts +3 -2
- package/src/__tests__/notification-broadcaster.test.ts +99 -81
- package/src/__tests__/notification-decision-fallback.test.ts +223 -178
- package/src/__tests__/notification-decision-strategy.test.ts +375 -337
- package/src/__tests__/notification-deep-link.test.ts +67 -61
- package/src/__tests__/notification-guardian-path.test.ts +248 -206
- package/src/__tests__/notification-routing-intent.test.ts +166 -93
- package/src/__tests__/notification-thread-candidate-validation.test.ts +78 -75
- package/src/__tests__/notification-thread-candidates.test.ts +64 -61
- package/src/__tests__/oauth-callback-registry.test.ts +40 -30
- package/src/__tests__/oauth-connect-handler.test.ts +109 -89
- package/src/__tests__/oauth-scope-policy.test.ts +63 -55
- package/src/__tests__/oauth2-gateway-transport.test.ts +252 -174
- package/src/__tests__/onboarding-starter-tasks.test.ts +93 -89
- package/src/__tests__/onboarding-template-contract.test.ts +93 -94
- package/src/__tests__/openai-provider.test.ts +366 -274
- package/src/__tests__/pairing-concurrent.test.ts +18 -12
- package/src/__tests__/pairing-routes.test.ts +45 -41
- package/src/__tests__/parallel-tool.benchmark.test.ts +108 -58
- package/src/__tests__/parser.test.ts +316 -226
- package/src/__tests__/path-classifier.test.ts +24 -25
- package/src/__tests__/path-policy.test.ts +187 -147
- package/src/__tests__/phone.test.ts +36 -36
- package/src/__tests__/platform-move-helper.test.ts +48 -40
- package/src/__tests__/platform-socket-path.test.ts +23 -24
- package/src/__tests__/platform-workspace-migration.test.ts +464 -414
- package/src/__tests__/platform.test.ts +61 -53
- package/src/__tests__/playbook-execution.test.ts +397 -265
- package/src/__tests__/playbook-tools.test.ts +267 -196
- package/src/__tests__/prebuilt-home-base-seed.test.ts +30 -27
- package/src/__tests__/pricing.test.ts +316 -136
- package/src/__tests__/profile-compiler.test.ts +206 -188
- package/src/__tests__/provider-commit-message-generator.test.ts +114 -106
- package/src/__tests__/provider-error-scenarios.test.ts +212 -158
- package/src/__tests__/provider-fail-open-selection.test.ts +51 -44
- package/src/__tests__/provider-registry-ollama.test.ts +13 -9
- package/src/__tests__/provider-streaming.benchmark.test.ts +232 -183
- package/src/__tests__/proxy-approval-callback.test.ts +180 -119
- package/src/__tests__/public-ingress-urls.test.ts +112 -94
- package/src/__tests__/qdrant-manager.test.ts +147 -98
- package/src/__tests__/ratelimit.test.ts +152 -82
- package/src/__tests__/recording-handler.test.ts +273 -151
- package/src/__tests__/recording-intent-fallback.test.ts +94 -75
- package/src/__tests__/recording-intent-handler.test.ts +9 -2
- package/src/__tests__/recording-intent.test.ts +578 -379
- package/src/__tests__/recording-state-machine.test.ts +530 -316
- package/src/__tests__/recurrence-engine-rruleset.test.ts +150 -92
- package/src/__tests__/recurrence-engine.test.ts +81 -41
- package/src/__tests__/recurrence-types.test.ts +63 -44
- package/src/__tests__/relay-server.test.ts +2131 -1602
- package/src/__tests__/reminder-store.test.ts +158 -80
- package/src/__tests__/reminder.test.ts +113 -109
- package/src/__tests__/remote-skill-policy.test.ts +96 -72
- package/src/__tests__/request-file-tool.test.ts +74 -67
- package/src/__tests__/response-tier.test.ts +131 -74
- package/src/__tests__/runtime-attachment-metadata.test.ts +0 -1
- package/src/__tests__/runtime-events-sse-parity.test.ts +167 -145
- package/src/__tests__/runtime-events-sse.test.ts +0 -1
- package/src/__tests__/sandbox-diagnostics.test.ts +66 -56
- package/src/__tests__/sandbox-host-parity.test.ts +377 -301
- package/src/__tests__/scaffold-managed-skill-tool.test.ts +213 -161
- package/src/__tests__/schedule-store.test.ts +268 -205
- package/src/__tests__/schedule-tools.test.ts +702 -524
- package/src/__tests__/scheduler-recurrence.test.ts +240 -130
- package/src/__tests__/scoped-approval-grants.test.ts +258 -168
- package/src/__tests__/scoped-grant-security-matrix.test.ts +160 -146
- package/src/__tests__/script-proxy-certs.test.ts +38 -35
- package/src/__tests__/script-proxy-connect-tunnel.test.ts +71 -46
- package/src/__tests__/script-proxy-decision-trace.test.ts +161 -84
- package/src/__tests__/script-proxy-http-forwarder.test.ts +146 -129
- package/src/__tests__/script-proxy-injection-runtime.test.ts +139 -113
- package/src/__tests__/script-proxy-mitm-handler.test.ts +226 -142
- package/src/__tests__/script-proxy-policy-runtime.test.ts +126 -86
- package/src/__tests__/script-proxy-policy.test.ts +308 -153
- package/src/__tests__/script-proxy-rewrite-specificity.test.ts +74 -62
- package/src/__tests__/script-proxy-router.test.ts +111 -77
- package/src/__tests__/script-proxy-session-manager.test.ts +156 -113
- package/src/__tests__/script-proxy-session-runtime.test.ts +28 -24
- package/src/__tests__/secret-allowlist.test.ts +105 -90
- package/src/__tests__/secret-ingress-handler.test.ts +41 -30
- package/src/__tests__/secret-onetime-send.test.ts +67 -50
- package/src/__tests__/secret-prompt-log-hygiene.test.ts +35 -31
- package/src/__tests__/secret-response-routing.test.ts +50 -41
- package/src/__tests__/secret-scanner-executor.test.ts +152 -111
- package/src/__tests__/secret-scanner.test.ts +495 -413
- package/src/__tests__/secure-keys.test.ts +132 -121
- package/src/__tests__/send-endpoint-busy.test.ts +8 -3
- package/src/__tests__/send-notification-tool.test.ts +43 -42
- package/src/__tests__/sensitive-output-placeholders.test.ts +72 -64
- package/src/__tests__/sequence-store.test.ts +335 -167
- package/src/__tests__/server-history-render.test.ts +341 -202
- package/src/__tests__/session-abort-tool-results.test.ts +133 -70
- package/src/__tests__/session-confirmation-signals.test.ts +252 -160
- package/src/__tests__/session-conflict-gate.test.ts +775 -585
- package/src/__tests__/session-error.test.ts +222 -191
- package/src/__tests__/session-evictor.test.ts +79 -62
- package/src/__tests__/session-init.benchmark.test.ts +170 -108
- package/src/__tests__/session-load-history-repair.test.ts +273 -139
- package/src/__tests__/session-messaging-secret-redirect.test.ts +130 -90
- package/src/__tests__/session-pre-run-repair.test.ts +106 -59
- package/src/__tests__/session-profile-injection.test.ts +198 -130
- package/src/__tests__/session-provider-retry-repair.test.ts +223 -141
- package/src/__tests__/session-queue.test.ts +624 -321
- package/src/__tests__/session-runtime-assembly.test.ts +425 -329
- package/src/__tests__/session-runtime-workspace.test.ts +69 -61
- package/src/__tests__/session-skill-tools.test.ts +973 -678
- package/src/__tests__/session-slash-known.test.ts +185 -133
- package/src/__tests__/session-slash-queue.test.ts +147 -81
- package/src/__tests__/session-slash-unknown.test.ts +135 -90
- package/src/__tests__/session-surfaces-task-progress.test.ts +122 -87
- package/src/__tests__/session-tool-setup-app-refresh.test.ts +338 -177
- package/src/__tests__/session-tool-setup-memory-scope.test.ts +63 -40
- package/src/__tests__/session-tool-setup-side-effect-flag.test.ts +60 -37
- package/src/__tests__/session-tool-setup-tools-disabled.test.ts +28 -26
- package/src/__tests__/session-undo.test.ts +43 -30
- package/src/__tests__/session-workspace-cache-state.test.ts +108 -67
- package/src/__tests__/session-workspace-injection.test.ts +245 -117
- package/src/__tests__/session-workspace-tool-tracking.test.ts +260 -93
- package/src/__tests__/shared-filesystem-errors.test.ts +47 -47
- package/src/__tests__/shell-credential-ref.test.ts +126 -90
- package/src/__tests__/shell-identity.test.ts +134 -111
- package/src/__tests__/shell-parser-fuzz.test.ts +263 -179
- package/src/__tests__/shell-parser-property.test.ts +435 -288
- package/src/__tests__/shell-tool-proxy-mode.test.ts +142 -70
- package/src/__tests__/size-guard.test.ts +42 -44
- package/src/__tests__/skill-feature-flags-integration.test.ts +79 -52
- package/src/__tests__/skill-feature-flags.test.ts +75 -47
- package/src/__tests__/skill-include-graph.test.ts +143 -148
- package/src/__tests__/skill-load-feature-flag.test.ts +94 -59
- package/src/__tests__/skill-load-tool.test.ts +371 -199
- package/src/__tests__/skill-projection-feature-flag.test.ts +131 -88
- package/src/__tests__/skill-projection.benchmark.test.ts +93 -65
- package/src/__tests__/skill-script-runner-host.test.ts +460 -250
- package/src/__tests__/skill-script-runner-sandbox.test.ts +168 -108
- package/src/__tests__/skill-script-runner.test.ts +115 -74
- package/src/__tests__/skill-tool-factory.test.ts +140 -96
- package/src/__tests__/skill-tool-manifest.test.ts +306 -210
- package/src/__tests__/skill-version-hash.test.ts +70 -56
- package/src/__tests__/skills.test.ts +0 -1
- package/src/__tests__/slack-channel-config.test.ts +127 -84
- package/src/__tests__/slack-skill.test.ts +60 -47
- package/src/__tests__/slash-commands-catalog.test.ts +37 -31
- package/src/__tests__/slash-commands-parser.test.ts +71 -64
- package/src/__tests__/slash-commands-resolver.test.ts +143 -107
- package/src/__tests__/slash-commands-rewrite.test.ts +22 -22
- package/src/__tests__/sms-messaging-provider.test.ts +4 -0
- package/src/__tests__/speaker-identification.test.ts +28 -25
- package/src/__tests__/starter-bundle.test.ts +27 -23
- package/src/__tests__/starter-task-flow.test.ts +67 -52
- package/src/__tests__/subagent-manager-notify.test.ts +154 -108
- package/src/__tests__/subagent-tools.test.ts +311 -270
- package/src/__tests__/subagent-types.test.ts +40 -40
- package/src/__tests__/surface-mutex-cleanup.test.ts +42 -30
- package/src/__tests__/swarm-dag-pathological.test.ts +122 -111
- package/src/__tests__/swarm-orchestrator.test.ts +135 -101
- package/src/__tests__/swarm-plan-validator.test.ts +125 -73
- package/src/__tests__/swarm-recursion.test.ts +58 -46
- package/src/__tests__/swarm-router-planner.test.ts +99 -74
- package/src/__tests__/swarm-session-integration.test.ts +148 -91
- package/src/__tests__/swarm-tool.test.ts +65 -45
- package/src/__tests__/swarm-worker-backend.test.ts +59 -45
- package/src/__tests__/swarm-worker-runner.test.ts +133 -118
- package/src/__tests__/system-prompt.test.ts +311 -256
- package/src/__tests__/task-compiler.test.ts +176 -120
- package/src/__tests__/task-management-tools.test.ts +561 -456
- package/src/__tests__/task-memory-cleanup.test.ts +627 -362
- package/src/__tests__/task-runner.test.ts +117 -94
- package/src/__tests__/task-scheduler.test.ts +113 -84
- package/src/__tests__/task-tools.test.ts +349 -264
- package/src/__tests__/terminal-sandbox.test.ts +138 -108
- package/src/__tests__/terminal-tools.test.ts +350 -305
- package/src/__tests__/thread-seed-composer.test.ts +307 -180
- package/src/__tests__/tool-approval-handler.test.ts +238 -137
- package/src/__tests__/tool-audit-listener.test.ts +69 -69
- package/src/__tests__/tool-domain-event-publisher.test.ts +142 -132
- package/src/__tests__/tool-execution-abort-cleanup.test.ts +155 -146
- package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +136 -105
- package/src/__tests__/tool-executor-lifecycle-events.test.ts +355 -239
- package/src/__tests__/tool-executor-redaction.test.ts +112 -109
- package/src/__tests__/tool-executor-shell-integration.test.ts +130 -79
- package/src/__tests__/tool-executor.test.ts +1274 -674
- package/src/__tests__/tool-grant-request-escalation.test.ts +401 -283
- package/src/__tests__/tool-metrics-listener.test.ts +97 -85
- package/src/__tests__/tool-notification-listener.test.ts +42 -25
- package/src/__tests__/tool-permission-simulate-handler.test.ts +137 -113
- package/src/__tests__/tool-policy.test.ts +44 -25
- package/src/__tests__/tool-profiling-listener.test.ts +99 -93
- package/src/__tests__/tool-result-truncation.test.ts +5 -4
- package/src/__tests__/tool-trace-listener.test.ts +131 -111
- package/src/__tests__/top-level-renderer.test.ts +62 -58
- package/src/__tests__/top-level-scanner.test.ts +68 -64
- package/src/__tests__/trace-emitter.test.ts +56 -56
- package/src/__tests__/trust-context-guards.test.ts +65 -65
- package/src/__tests__/trust-store.test.ts +1239 -806
- package/src/__tests__/trusted-contact-approval-notifier.test.ts +0 -1
- package/src/__tests__/trusted-contact-inline-approval-integration.test.ts +0 -1
- package/src/__tests__/trusted-contact-lifecycle-notifications.test.ts +3 -2
- package/src/__tests__/trusted-contact-multichannel.test.ts +3 -2
- package/src/__tests__/trusted-contact-verification.test.ts +251 -231
- package/src/__tests__/turn-commit.test.ts +259 -200
- package/src/__tests__/twilio-provider.test.ts +140 -126
- package/src/__tests__/twilio-rest.test.ts +22 -18
- package/src/__tests__/twilio-routes-elevenlabs.test.ts +0 -1
- package/src/__tests__/twilio-routes-twiml.test.ts +55 -55
- package/src/__tests__/twilio-routes.test.ts +0 -1
- package/src/__tests__/twitter-auth-handler.test.ts +184 -139
- package/src/__tests__/twitter-cli-error-shaping.test.ts +88 -73
- package/src/__tests__/twitter-cli-routing.test.ts +146 -99
- package/src/__tests__/twitter-oauth-client.test.ts +82 -65
- package/src/__tests__/update-bulletin-format.test.ts +69 -66
- package/src/__tests__/update-bulletin-state.test.ts +66 -60
- package/src/__tests__/update-bulletin.test.ts +150 -114
- package/src/__tests__/update-template-contract.test.ts +15 -10
- package/src/__tests__/url-safety.test.ts +288 -265
- package/src/__tests__/user-reference.test.ts +32 -32
- package/src/__tests__/view-image-tool.test.ts +118 -96
- package/src/__tests__/voice-invite-redemption.test.ts +111 -106
- package/src/__tests__/voice-quality.test.ts +117 -102
- package/src/__tests__/voice-scoped-grant-consumer.test.ts +204 -146
- package/src/__tests__/voice-session-bridge.test.ts +351 -216
- package/src/__tests__/weather-skill-regression.test.ts +170 -120
- package/src/__tests__/web-fetch.test.ts +664 -526
- package/src/__tests__/web-search.test.ts +379 -213
- package/src/__tests__/work-item-output.test.ts +90 -53
- package/src/__tests__/workspace-git-service.test.ts +437 -356
- package/src/__tests__/workspace-heartbeat-service.test.ts +125 -91
- package/src/__tests__/workspace-lifecycle.test.ts +98 -64
- package/src/__tests__/workspace-policy.test.ts +139 -71
- package/src/cli/mcp.ts +81 -28
- package/src/commands/__tests__/cc-command-registry.test.ts +142 -134
- package/src/config/__tests__/feature-flag-registry-guard.test.ts +48 -39
- package/src/config/bundled-skills/chatgpt-import/tools/chatgpt-import.ts +25 -10
- package/src/config/bundled-skills/doordash/__tests__/doordash-session.test.ts +0 -1
- package/src/config/bundled-skills/guardian-verify-setup/SKILL.md +6 -11
- package/src/config/bundled-skills/messaging/SKILL.md +4 -3
- package/src/config/bundled-skills/messaging/tools/gmail-outreach-scan.ts +15 -5
- package/src/config/bundled-skills/messaging/tools/gmail-sender-digest.ts +16 -5
- package/src/config/bundled-skills/phone-calls/SKILL.md +1 -2
- package/src/config/bundled-skills/slack/tools/slack-scan-digest.ts +34 -32
- package/src/config/bundled-skills/sms-setup/SKILL.md +8 -16
- package/src/config/bundled-skills/telegram-setup/SKILL.md +3 -3
- package/src/config/bundled-skills/trusted-contacts/SKILL.md +13 -25
- package/src/config/bundled-skills/twilio-setup/SKILL.md +13 -23
- package/src/config/bundled-tool-registry.ts +2 -0
- package/src/config/env.ts +3 -4
- package/src/config/system-prompt.ts +32 -0
- package/src/mcp/client.ts +2 -7
- package/src/memory/db-connection.ts +16 -10
- package/src/messaging/providers/gmail/adapter.ts +10 -3
- package/src/messaging/providers/gmail/client.ts +280 -72
- package/src/runtime/auth/__tests__/context.test.ts +75 -65
- package/src/runtime/auth/__tests__/credential-service.test.ts +137 -114
- package/src/runtime/auth/__tests__/guard-tests.test.ts +84 -90
- package/src/runtime/auth/__tests__/ipc-auth-context.test.ts +40 -40
- package/src/runtime/auth/__tests__/middleware.test.ts +80 -74
- package/src/runtime/auth/__tests__/policy.test.ts +9 -9
- package/src/runtime/auth/__tests__/route-policy.test.ts +76 -65
- package/src/runtime/auth/__tests__/scopes.test.ts +68 -60
- package/src/runtime/auth/__tests__/subject.test.ts +54 -54
- package/src/runtime/auth/__tests__/token-service.test.ts +115 -108
- package/src/runtime/auth/scopes.ts +3 -0
- package/src/runtime/auth/token-service.ts +4 -1
- package/src/runtime/auth/types.ts +2 -1
- package/src/runtime/http-server.ts +2 -1
- package/src/security/secure-keys.ts +120 -54
- package/src/tools/browser/__tests__/auth-cache.test.ts +69 -63
- package/src/tools/browser/__tests__/auth-detector.test.ts +218 -157
- package/src/tools/browser/__tests__/jit-auth.test.ts +83 -99
- package/src/tools/terminal/safe-env.ts +7 -0
|
@@ -1,7 +1,11 @@
|
|
|
1
|
-
import { describe, expect,test } from
|
|
1
|
+
import { describe, expect, test } from "bun:test";
|
|
2
2
|
|
|
3
|
-
import type {
|
|
4
|
-
|
|
3
|
+
import type {
|
|
4
|
+
AgentEvent,
|
|
5
|
+
CheckpointDecision,
|
|
6
|
+
CheckpointInfo,
|
|
7
|
+
} from "../agent/loop.js";
|
|
8
|
+
import { AgentLoop } from "../agent/loop.js";
|
|
5
9
|
import type {
|
|
6
10
|
ContentBlock,
|
|
7
11
|
Message,
|
|
@@ -9,21 +13,30 @@ import type {
|
|
|
9
13
|
ProviderResponse,
|
|
10
14
|
SendMessageOptions,
|
|
11
15
|
ToolDefinition,
|
|
12
|
-
} from
|
|
16
|
+
} from "../providers/types.js";
|
|
13
17
|
|
|
14
18
|
// ---------------------------------------------------------------------------
|
|
15
19
|
// Helpers
|
|
16
20
|
// ---------------------------------------------------------------------------
|
|
17
21
|
|
|
18
22
|
/** A mock provider that returns pre-configured responses in sequence. */
|
|
19
|
-
function createMockProvider(
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
+
function createMockProvider(responses: ProviderResponse[]): {
|
|
24
|
+
provider: Provider;
|
|
25
|
+
calls: {
|
|
26
|
+
messages: Message[];
|
|
27
|
+
tools?: ToolDefinition[];
|
|
28
|
+
systemPrompt?: string;
|
|
29
|
+
}[];
|
|
30
|
+
} {
|
|
31
|
+
const calls: {
|
|
32
|
+
messages: Message[];
|
|
33
|
+
tools?: ToolDefinition[];
|
|
34
|
+
systemPrompt?: string;
|
|
35
|
+
}[] = [];
|
|
23
36
|
let callIndex = 0;
|
|
24
37
|
|
|
25
38
|
const provider: Provider = {
|
|
26
|
-
name:
|
|
39
|
+
name: "mock",
|
|
27
40
|
async sendMessage(
|
|
28
41
|
messages: Message[],
|
|
29
42
|
tools?: ToolDefinition[],
|
|
@@ -37,8 +50,8 @@ function createMockProvider(
|
|
|
37
50
|
// Emit streaming events if the response has text blocks
|
|
38
51
|
if (options?.onEvent) {
|
|
39
52
|
for (const block of response.content) {
|
|
40
|
-
if (block.type ===
|
|
41
|
-
options.onEvent({ type:
|
|
53
|
+
if (block.type === "text") {
|
|
54
|
+
options.onEvent({ type: "text_delta", text: block.text });
|
|
42
55
|
}
|
|
43
56
|
}
|
|
44
57
|
}
|
|
@@ -52,29 +65,37 @@ function createMockProvider(
|
|
|
52
65
|
|
|
53
66
|
function textResponse(text: string): ProviderResponse {
|
|
54
67
|
return {
|
|
55
|
-
content: [{ type:
|
|
56
|
-
model:
|
|
68
|
+
content: [{ type: "text", text }],
|
|
69
|
+
model: "mock-model",
|
|
57
70
|
usage: { inputTokens: 10, outputTokens: 5 },
|
|
58
|
-
stopReason:
|
|
71
|
+
stopReason: "end_turn",
|
|
59
72
|
};
|
|
60
73
|
}
|
|
61
74
|
|
|
62
|
-
function toolUseResponse(
|
|
75
|
+
function toolUseResponse(
|
|
76
|
+
id: string,
|
|
77
|
+
name: string,
|
|
78
|
+
input: Record<string, unknown>,
|
|
79
|
+
): ProviderResponse {
|
|
63
80
|
return {
|
|
64
|
-
content: [{ type:
|
|
65
|
-
model:
|
|
81
|
+
content: [{ type: "tool_use", id, name, input }],
|
|
82
|
+
model: "mock-model",
|
|
66
83
|
usage: { inputTokens: 10, outputTokens: 5 },
|
|
67
|
-
stopReason:
|
|
84
|
+
stopReason: "tool_use",
|
|
68
85
|
};
|
|
69
86
|
}
|
|
70
87
|
|
|
71
88
|
const dummyTools: ToolDefinition[] = [
|
|
72
|
-
{
|
|
89
|
+
{
|
|
90
|
+
name: "read_file",
|
|
91
|
+
description: "Read a file",
|
|
92
|
+
input_schema: { type: "object", properties: { path: { type: "string" } } },
|
|
93
|
+
},
|
|
73
94
|
];
|
|
74
95
|
|
|
75
96
|
const userMessage: Message = {
|
|
76
|
-
role:
|
|
77
|
-
content: [{ type:
|
|
97
|
+
role: "user",
|
|
98
|
+
content: [{ type: "text", text: "Hello" }],
|
|
78
99
|
};
|
|
79
100
|
|
|
80
101
|
function collectEvents(events: AgentEvent[]): (event: AgentEvent) => void {
|
|
@@ -85,11 +106,11 @@ function collectEvents(events: AgentEvent[]): (event: AgentEvent) => void {
|
|
|
85
106
|
// Tests
|
|
86
107
|
// ---------------------------------------------------------------------------
|
|
87
108
|
|
|
88
|
-
describe(
|
|
109
|
+
describe("AgentLoop", () => {
|
|
89
110
|
// 1. Basic text response
|
|
90
|
-
test(
|
|
91
|
-
const { provider } = createMockProvider([textResponse(
|
|
92
|
-
const loop = new AgentLoop(provider,
|
|
111
|
+
test("returns history with assistant message for simple text response", async () => {
|
|
112
|
+
const { provider } = createMockProvider([textResponse("Hi there!")]);
|
|
113
|
+
const loop = new AgentLoop(provider, "system prompt");
|
|
93
114
|
|
|
94
115
|
const events: AgentEvent[] = [];
|
|
95
116
|
const history = await loop.run([userMessage], collectEvents(events));
|
|
@@ -97,32 +118,41 @@ describe('AgentLoop', () => {
|
|
|
97
118
|
// History should contain original user message + assistant response
|
|
98
119
|
expect(history).toHaveLength(2);
|
|
99
120
|
expect(history[0]).toEqual(userMessage);
|
|
100
|
-
expect(history[1].role).toBe(
|
|
101
|
-
expect(history[1].content).toEqual([{ type:
|
|
121
|
+
expect(history[1].role).toBe("assistant");
|
|
122
|
+
expect(history[1].content).toEqual([{ type: "text", text: "Hi there!" }]);
|
|
102
123
|
});
|
|
103
124
|
|
|
104
125
|
// 2. Tool execution — provider returns tool_use, verify tool executor is called
|
|
105
|
-
test(
|
|
106
|
-
const toolCallId =
|
|
126
|
+
test("executes tool and passes result back to provider", async () => {
|
|
127
|
+
const toolCallId = "tool-1";
|
|
107
128
|
const { provider, calls } = createMockProvider([
|
|
108
|
-
toolUseResponse(toolCallId,
|
|
109
|
-
textResponse(
|
|
129
|
+
toolUseResponse(toolCallId, "read_file", { path: "/tmp/test.txt" }),
|
|
130
|
+
textResponse("File contents received."),
|
|
110
131
|
]);
|
|
111
132
|
|
|
112
133
|
const toolCalls: { name: string; input: Record<string, unknown> }[] = [];
|
|
113
|
-
const toolExecutor = async (
|
|
134
|
+
const toolExecutor = async (
|
|
135
|
+
name: string,
|
|
136
|
+
input: Record<string, unknown>,
|
|
137
|
+
) => {
|
|
114
138
|
toolCalls.push({ name, input });
|
|
115
|
-
return { content:
|
|
139
|
+
return { content: "file data here", isError: false };
|
|
116
140
|
};
|
|
117
141
|
|
|
118
|
-
const loop = new AgentLoop(
|
|
142
|
+
const loop = new AgentLoop(
|
|
143
|
+
provider,
|
|
144
|
+
"system",
|
|
145
|
+
{},
|
|
146
|
+
dummyTools,
|
|
147
|
+
toolExecutor,
|
|
148
|
+
);
|
|
119
149
|
const events: AgentEvent[] = [];
|
|
120
150
|
const history = await loop.run([userMessage], collectEvents(events));
|
|
121
151
|
|
|
122
152
|
// Tool executor was called with correct args
|
|
123
153
|
expect(toolCalls).toHaveLength(1);
|
|
124
|
-
expect(toolCalls[0].name).toBe(
|
|
125
|
-
expect(toolCalls[0].input).toEqual({ path:
|
|
154
|
+
expect(toolCalls[0].name).toBe("read_file");
|
|
155
|
+
expect(toolCalls[0].input).toEqual({ path: "/tmp/test.txt" });
|
|
126
156
|
|
|
127
157
|
// Provider was called twice (initial + after tool result)
|
|
128
158
|
expect(calls).toHaveLength(2);
|
|
@@ -130,35 +160,50 @@ describe('AgentLoop', () => {
|
|
|
130
160
|
// Second call should include the tool result as a user message
|
|
131
161
|
const secondCallMessages = calls[1].messages;
|
|
132
162
|
const lastMsg = secondCallMessages[secondCallMessages.length - 1];
|
|
133
|
-
expect(lastMsg.role).toBe(
|
|
163
|
+
expect(lastMsg.role).toBe("user");
|
|
134
164
|
|
|
135
165
|
const toolResultBlock = lastMsg.content.find(
|
|
136
|
-
(b): b is Extract<ContentBlock, { type:
|
|
166
|
+
(b): b is Extract<ContentBlock, { type: "tool_result" }> =>
|
|
167
|
+
b.type === "tool_result",
|
|
137
168
|
);
|
|
138
169
|
expect(toolResultBlock).toBeDefined();
|
|
139
170
|
expect(toolResultBlock!.tool_use_id).toBe(toolCallId);
|
|
140
|
-
expect(toolResultBlock!.content).toBe(
|
|
171
|
+
expect(toolResultBlock!.content).toBe("file data here");
|
|
141
172
|
expect(toolResultBlock!.is_error).toBe(false);
|
|
142
173
|
|
|
143
174
|
// Final history: user, assistant(tool_use), user(tool_result), assistant(text)
|
|
144
175
|
expect(history).toHaveLength(4);
|
|
145
|
-
expect(history[3].role).toBe(
|
|
146
|
-
expect(history[3].content).toEqual([
|
|
176
|
+
expect(history[3].role).toBe("assistant");
|
|
177
|
+
expect(history[3].content).toEqual([
|
|
178
|
+
{ type: "text", text: "File contents received." },
|
|
179
|
+
]);
|
|
147
180
|
});
|
|
148
181
|
|
|
149
182
|
// 3. Multi-turn tool loop
|
|
150
|
-
test(
|
|
183
|
+
test("supports multi-turn tool execution", async () => {
|
|
151
184
|
const { provider, calls } = createMockProvider([
|
|
152
|
-
toolUseResponse(
|
|
153
|
-
toolUseResponse(
|
|
154
|
-
textResponse(
|
|
185
|
+
toolUseResponse("t1", "read_file", { path: "/a.txt" }),
|
|
186
|
+
toolUseResponse("t2", "read_file", { path: "/b.txt" }),
|
|
187
|
+
textResponse("Done reading both files."),
|
|
155
188
|
]);
|
|
156
189
|
|
|
157
|
-
const toolExecutor = async (
|
|
158
|
-
|
|
190
|
+
const toolExecutor = async (
|
|
191
|
+
name: string,
|
|
192
|
+
input: Record<string, unknown>,
|
|
193
|
+
) => {
|
|
194
|
+
return {
|
|
195
|
+
content: `contents of ${(input as { path: string }).path}`,
|
|
196
|
+
isError: false,
|
|
197
|
+
};
|
|
159
198
|
};
|
|
160
199
|
|
|
161
|
-
const loop = new AgentLoop(
|
|
200
|
+
const loop = new AgentLoop(
|
|
201
|
+
provider,
|
|
202
|
+
"system",
|
|
203
|
+
{},
|
|
204
|
+
dummyTools,
|
|
205
|
+
toolExecutor,
|
|
206
|
+
);
|
|
162
207
|
const history = await loop.run([userMessage], () => {});
|
|
163
208
|
|
|
164
209
|
// Provider called 3 times (two tool rounds + final text)
|
|
@@ -166,35 +211,37 @@ describe('AgentLoop', () => {
|
|
|
166
211
|
|
|
167
212
|
// History: user, assistant(t1), user(result1), assistant(t2), user(result2), assistant(text)
|
|
168
213
|
expect(history).toHaveLength(6);
|
|
169
|
-
expect(history[5].content).toEqual([
|
|
214
|
+
expect(history[5].content).toEqual([
|
|
215
|
+
{ type: "text", text: "Done reading both files." },
|
|
216
|
+
]);
|
|
170
217
|
});
|
|
171
218
|
|
|
172
219
|
// 4. Loop stops when provider returns tool_use but no executor is configured
|
|
173
|
-
test(
|
|
220
|
+
test("stops when tool_use returned but no tool executor configured", async () => {
|
|
174
221
|
const { provider } = createMockProvider([
|
|
175
|
-
toolUseResponse(
|
|
222
|
+
toolUseResponse("t1", "read_file", { path: "/a.txt" }),
|
|
176
223
|
]);
|
|
177
224
|
|
|
178
225
|
// No tool executor provided
|
|
179
|
-
const loop = new AgentLoop(provider,
|
|
226
|
+
const loop = new AgentLoop(provider, "system", {}, dummyTools);
|
|
180
227
|
const history = await loop.run([userMessage], () => {});
|
|
181
228
|
|
|
182
229
|
// Should stop after first response (no executor to handle tool use)
|
|
183
230
|
expect(history).toHaveLength(2);
|
|
184
|
-
expect(history[1].role).toBe(
|
|
231
|
+
expect(history[1].role).toBe("assistant");
|
|
185
232
|
});
|
|
186
233
|
|
|
187
234
|
// 5. Error handling — provider throws, verify error event and loop stops
|
|
188
|
-
test(
|
|
189
|
-
const error = new Error(
|
|
235
|
+
test("emits error event and stops when provider throws", async () => {
|
|
236
|
+
const error = new Error("API rate limit exceeded");
|
|
190
237
|
const provider: Provider = {
|
|
191
|
-
name:
|
|
238
|
+
name: "mock",
|
|
192
239
|
async sendMessage(): Promise<ProviderResponse> {
|
|
193
240
|
throw error;
|
|
194
241
|
},
|
|
195
242
|
};
|
|
196
243
|
|
|
197
|
-
const loop = new AgentLoop(provider,
|
|
244
|
+
const loop = new AgentLoop(provider, "system");
|
|
198
245
|
const events: AgentEvent[] = [];
|
|
199
246
|
const history = await loop.run([userMessage], collectEvents(events));
|
|
200
247
|
|
|
@@ -202,32 +249,34 @@ describe('AgentLoop', () => {
|
|
|
202
249
|
expect(history).toHaveLength(1);
|
|
203
250
|
|
|
204
251
|
// Error event was emitted
|
|
205
|
-
const errorEvents = events.filter((e) => e.type ===
|
|
252
|
+
const errorEvents = events.filter((e) => e.type === "error");
|
|
206
253
|
expect(errorEvents).toHaveLength(1);
|
|
207
|
-
expect(
|
|
254
|
+
expect(
|
|
255
|
+
(errorEvents[0] as { type: "error"; error: Error }).error.message,
|
|
256
|
+
).toBe("API rate limit exceeded");
|
|
208
257
|
});
|
|
209
258
|
|
|
210
259
|
// 6. Abort signal — verify the loop respects AbortSignal
|
|
211
|
-
test(
|
|
260
|
+
test("stops when abort signal is triggered before provider call", async () => {
|
|
212
261
|
const controller = new AbortController();
|
|
213
262
|
controller.abort(); // abort immediately
|
|
214
263
|
|
|
215
|
-
const { provider } = createMockProvider([textResponse(
|
|
216
|
-
const loop = new AgentLoop(provider,
|
|
264
|
+
const { provider } = createMockProvider([textResponse("Should not reach")]);
|
|
265
|
+
const loop = new AgentLoop(provider, "system");
|
|
217
266
|
const history = await loop.run([userMessage], () => {}, controller.signal);
|
|
218
267
|
|
|
219
268
|
// Loop should exit immediately, returning only original messages
|
|
220
269
|
expect(history).toHaveLength(1);
|
|
221
270
|
});
|
|
222
271
|
|
|
223
|
-
test(
|
|
272
|
+
test("stops when abort signal is triggered between turns", async () => {
|
|
224
273
|
const controller = new AbortController();
|
|
225
274
|
let turnCount = 0;
|
|
226
275
|
|
|
227
276
|
const { provider } = createMockProvider([
|
|
228
|
-
toolUseResponse(
|
|
229
|
-
toolUseResponse(
|
|
230
|
-
textResponse(
|
|
277
|
+
toolUseResponse("t1", "read_file", { path: "/a.txt" }),
|
|
278
|
+
toolUseResponse("t2", "read_file", { path: "/b.txt" }),
|
|
279
|
+
textResponse("Should not reach"),
|
|
231
280
|
]);
|
|
232
281
|
|
|
233
282
|
const toolExecutor = async () => {
|
|
@@ -236,10 +285,16 @@ describe('AgentLoop', () => {
|
|
|
236
285
|
// Abort after the first tool turn completes
|
|
237
286
|
controller.abort();
|
|
238
287
|
}
|
|
239
|
-
return { content:
|
|
288
|
+
return { content: "data", isError: false };
|
|
240
289
|
};
|
|
241
290
|
|
|
242
|
-
const loop = new AgentLoop(
|
|
291
|
+
const loop = new AgentLoop(
|
|
292
|
+
provider,
|
|
293
|
+
"system",
|
|
294
|
+
{},
|
|
295
|
+
dummyTools,
|
|
296
|
+
toolExecutor,
|
|
297
|
+
);
|
|
243
298
|
const history = await loop.run([userMessage], () => {}, controller.signal);
|
|
244
299
|
|
|
245
300
|
// After the first tool turn, abort fires. The while loop checks signal at the
|
|
@@ -250,20 +305,24 @@ describe('AgentLoop', () => {
|
|
|
250
305
|
expect(history.length).toBeLessThanOrEqual(4);
|
|
251
306
|
|
|
252
307
|
// Verify the loop didn't reach the final text response
|
|
253
|
-
const lastAssistant = [...history]
|
|
308
|
+
const lastAssistant = [...history]
|
|
309
|
+
.reverse()
|
|
310
|
+
.find((m) => m.role === "assistant");
|
|
254
311
|
expect(lastAssistant).toBeDefined();
|
|
255
|
-
const hasToolUse = lastAssistant!.content.some(
|
|
312
|
+
const hasToolUse = lastAssistant!.content.some(
|
|
313
|
+
(b) => b.type === "tool_use",
|
|
314
|
+
);
|
|
256
315
|
// The last assistant message should be a tool_use, not the final text
|
|
257
316
|
expect(hasToolUse).toBe(true);
|
|
258
317
|
});
|
|
259
318
|
|
|
260
319
|
// 6b. Abort signal during long-running tool execution — loop exits immediately
|
|
261
|
-
test(
|
|
320
|
+
test("stops immediately when abort fires during a stuck tool execution", async () => {
|
|
262
321
|
const controller = new AbortController();
|
|
263
322
|
|
|
264
323
|
const { provider } = createMockProvider([
|
|
265
|
-
toolUseResponse(
|
|
266
|
-
textResponse(
|
|
324
|
+
toolUseResponse("t1", "read_file", { path: "/stuck.txt" }),
|
|
325
|
+
textResponse("Should not reach"),
|
|
267
326
|
]);
|
|
268
327
|
|
|
269
328
|
// Simulate a stuck tool that never resolves — abort fires while it's running
|
|
@@ -271,11 +330,17 @@ describe('AgentLoop', () => {
|
|
|
271
330
|
// Abort from a timer while this tool is "stuck"
|
|
272
331
|
setTimeout(() => controller.abort(), 50);
|
|
273
332
|
// Simulate being stuck for a long time
|
|
274
|
-
await new Promise(resolve => setTimeout(resolve, 10_000));
|
|
275
|
-
return { content:
|
|
333
|
+
await new Promise((resolve) => setTimeout(resolve, 10_000));
|
|
334
|
+
return { content: "should never return", isError: false };
|
|
276
335
|
};
|
|
277
336
|
|
|
278
|
-
const loop = new AgentLoop(
|
|
337
|
+
const loop = new AgentLoop(
|
|
338
|
+
provider,
|
|
339
|
+
"system",
|
|
340
|
+
{},
|
|
341
|
+
dummyTools,
|
|
342
|
+
toolExecutor,
|
|
343
|
+
);
|
|
279
344
|
const start = Date.now();
|
|
280
345
|
const history = await loop.run([userMessage], () => {}, controller.signal);
|
|
281
346
|
const elapsed = Date.now() - start;
|
|
@@ -286,96 +351,142 @@ describe('AgentLoop', () => {
|
|
|
286
351
|
// User message + assistant tool_use + synthesized cancellation tool_result
|
|
287
352
|
expect(history).toHaveLength(3);
|
|
288
353
|
const lastMsg = history[2];
|
|
289
|
-
expect(lastMsg.role).toBe(
|
|
354
|
+
expect(lastMsg.role).toBe("user");
|
|
290
355
|
expect(lastMsg.content).toHaveLength(1);
|
|
291
|
-
expect(lastMsg.content[0].type).toBe(
|
|
292
|
-
expect(
|
|
293
|
-
|
|
356
|
+
expect(lastMsg.content[0].type).toBe("tool_result");
|
|
357
|
+
expect(
|
|
358
|
+
(
|
|
359
|
+
lastMsg.content[0] as {
|
|
360
|
+
type: "tool_result";
|
|
361
|
+
tool_use_id: string;
|
|
362
|
+
content: string;
|
|
363
|
+
is_error: boolean;
|
|
364
|
+
}
|
|
365
|
+
).content,
|
|
366
|
+
).toBe("Cancelled by user");
|
|
367
|
+
expect(
|
|
368
|
+
(
|
|
369
|
+
lastMsg.content[0] as {
|
|
370
|
+
type: "tool_result";
|
|
371
|
+
tool_use_id: string;
|
|
372
|
+
content: string;
|
|
373
|
+
is_error: boolean;
|
|
374
|
+
}
|
|
375
|
+
).is_error,
|
|
376
|
+
).toBe(true);
|
|
294
377
|
});
|
|
295
378
|
|
|
296
379
|
// 7. Events — verify text_delta and other events are emitted
|
|
297
|
-
test(
|
|
298
|
-
const { provider } = createMockProvider([textResponse(
|
|
299
|
-
const loop = new AgentLoop(provider,
|
|
380
|
+
test("emits text_delta events during streaming", async () => {
|
|
381
|
+
const { provider } = createMockProvider([textResponse("Hello world")]);
|
|
382
|
+
const loop = new AgentLoop(provider, "system");
|
|
300
383
|
|
|
301
384
|
const events: AgentEvent[] = [];
|
|
302
385
|
await loop.run([userMessage], collectEvents(events));
|
|
303
386
|
|
|
304
|
-
const textDeltas = events.filter((e) => e.type ===
|
|
387
|
+
const textDeltas = events.filter((e) => e.type === "text_delta");
|
|
305
388
|
expect(textDeltas).toHaveLength(1);
|
|
306
|
-
expect((textDeltas[0] as { type:
|
|
389
|
+
expect((textDeltas[0] as { type: "text_delta"; text: string }).text).toBe(
|
|
390
|
+
"Hello world",
|
|
391
|
+
);
|
|
307
392
|
});
|
|
308
393
|
|
|
309
|
-
test(
|
|
310
|
-
const { provider } = createMockProvider([textResponse(
|
|
311
|
-
const loop = new AgentLoop(provider,
|
|
394
|
+
test("emits usage events", async () => {
|
|
395
|
+
const { provider } = createMockProvider([textResponse("Hi")]);
|
|
396
|
+
const loop = new AgentLoop(provider, "system");
|
|
312
397
|
|
|
313
398
|
const events: AgentEvent[] = [];
|
|
314
399
|
await loop.run([userMessage], collectEvents(events));
|
|
315
400
|
|
|
316
|
-
const usageEvents = events.filter((e) => e.type ===
|
|
401
|
+
const usageEvents = events.filter((e) => e.type === "usage");
|
|
317
402
|
expect(usageEvents).toHaveLength(1);
|
|
318
|
-
const usage = usageEvents[0] as Extract<AgentEvent, { type:
|
|
319
|
-
expect(usage.type).toBe(
|
|
403
|
+
const usage = usageEvents[0] as Extract<AgentEvent, { type: "usage" }>;
|
|
404
|
+
expect(usage.type).toBe("usage");
|
|
320
405
|
expect(usage.inputTokens).toBe(10);
|
|
321
406
|
expect(usage.outputTokens).toBe(5);
|
|
322
|
-
expect(usage.model).toBe(
|
|
323
|
-
expect(typeof usage.providerDurationMs).toBe(
|
|
407
|
+
expect(usage.model).toBe("mock-model");
|
|
408
|
+
expect(typeof usage.providerDurationMs).toBe("number");
|
|
324
409
|
expect(usage.providerDurationMs).toBeGreaterThanOrEqual(0);
|
|
325
410
|
});
|
|
326
411
|
|
|
327
|
-
test(
|
|
328
|
-
const { provider } = createMockProvider([textResponse(
|
|
329
|
-
const loop = new AgentLoop(provider,
|
|
412
|
+
test("emits message_complete events", async () => {
|
|
413
|
+
const { provider } = createMockProvider([textResponse("Done")]);
|
|
414
|
+
const loop = new AgentLoop(provider, "system");
|
|
330
415
|
|
|
331
416
|
const events: AgentEvent[] = [];
|
|
332
417
|
await loop.run([userMessage], collectEvents(events));
|
|
333
418
|
|
|
334
|
-
const completeEvents = events.filter((e) => e.type ===
|
|
419
|
+
const completeEvents = events.filter((e) => e.type === "message_complete");
|
|
335
420
|
expect(completeEvents).toHaveLength(1);
|
|
336
|
-
expect(
|
|
421
|
+
expect(
|
|
422
|
+
(completeEvents[0] as { type: "message_complete"; message: Message })
|
|
423
|
+
.message.role,
|
|
424
|
+
).toBe("assistant");
|
|
337
425
|
});
|
|
338
426
|
|
|
339
|
-
test(
|
|
427
|
+
test("emits tool_use and tool_result events during tool execution", async () => {
|
|
340
428
|
const { provider } = createMockProvider([
|
|
341
|
-
toolUseResponse(
|
|
342
|
-
textResponse(
|
|
429
|
+
toolUseResponse("t1", "read_file", { path: "/test.txt" }),
|
|
430
|
+
textResponse("Done"),
|
|
343
431
|
]);
|
|
344
432
|
|
|
345
|
-
const toolExecutor = async () => ({ content:
|
|
346
|
-
const loop = new AgentLoop(
|
|
433
|
+
const toolExecutor = async () => ({ content: "file data", isError: false });
|
|
434
|
+
const loop = new AgentLoop(
|
|
435
|
+
provider,
|
|
436
|
+
"system",
|
|
437
|
+
{},
|
|
438
|
+
dummyTools,
|
|
439
|
+
toolExecutor,
|
|
440
|
+
);
|
|
347
441
|
|
|
348
442
|
const events: AgentEvent[] = [];
|
|
349
443
|
await loop.run([userMessage], collectEvents(events));
|
|
350
444
|
|
|
351
|
-
const toolUseEvents = events.filter((e) => e.type ===
|
|
445
|
+
const toolUseEvents = events.filter((e) => e.type === "tool_use");
|
|
352
446
|
expect(toolUseEvents).toHaveLength(1);
|
|
353
447
|
expect(toolUseEvents[0]).toEqual({
|
|
354
|
-
type:
|
|
355
|
-
id:
|
|
356
|
-
name:
|
|
357
|
-
input: { path:
|
|
448
|
+
type: "tool_use",
|
|
449
|
+
id: "t1",
|
|
450
|
+
name: "read_file",
|
|
451
|
+
input: { path: "/test.txt" },
|
|
358
452
|
});
|
|
359
453
|
|
|
360
|
-
const toolResultEvents = events.filter((e) => e.type ===
|
|
454
|
+
const toolResultEvents = events.filter((e) => e.type === "tool_result");
|
|
361
455
|
expect(toolResultEvents).toHaveLength(1);
|
|
362
|
-
expect(
|
|
363
|
-
|
|
364
|
-
|
|
456
|
+
expect(
|
|
457
|
+
(toolResultEvents[0] as Extract<AgentEvent, { type: "tool_result" }>)
|
|
458
|
+
.toolUseId,
|
|
459
|
+
).toBe("t1");
|
|
460
|
+
expect(
|
|
461
|
+
(toolResultEvents[0] as Extract<AgentEvent, { type: "tool_result" }>)
|
|
462
|
+
.content,
|
|
463
|
+
).toBe("file data");
|
|
464
|
+
expect(
|
|
465
|
+
(toolResultEvents[0] as Extract<AgentEvent, { type: "tool_result" }>)
|
|
466
|
+
.isError,
|
|
467
|
+
).toBe(false);
|
|
365
468
|
});
|
|
366
469
|
|
|
367
470
|
// 8. Progress reminder injection every 5 tool-use turns
|
|
368
|
-
test(
|
|
471
|
+
test("injects progress reminder after every 5 tool-use turns", async () => {
|
|
369
472
|
// Create 6 tool responses followed by a text response
|
|
370
473
|
const responses: ProviderResponse[] = [];
|
|
371
474
|
for (let i = 0; i < 6; i++) {
|
|
372
|
-
responses.push(
|
|
475
|
+
responses.push(
|
|
476
|
+
toolUseResponse(`t${i}`, "read_file", { path: `/file${i}.txt` }),
|
|
477
|
+
);
|
|
373
478
|
}
|
|
374
|
-
responses.push(textResponse(
|
|
479
|
+
responses.push(textResponse("Finally done"));
|
|
375
480
|
|
|
376
481
|
const { provider, calls } = createMockProvider(responses);
|
|
377
|
-
const toolExecutor = async () => ({ content:
|
|
378
|
-
const loop = new AgentLoop(
|
|
482
|
+
const toolExecutor = async () => ({ content: "data", isError: false });
|
|
483
|
+
const loop = new AgentLoop(
|
|
484
|
+
provider,
|
|
485
|
+
"system",
|
|
486
|
+
{},
|
|
487
|
+
dummyTools,
|
|
488
|
+
toolExecutor,
|
|
489
|
+
);
|
|
379
490
|
|
|
380
491
|
await loop.run([userMessage], () => {});
|
|
381
492
|
|
|
@@ -383,24 +494,24 @@ describe('AgentLoop', () => {
|
|
|
383
494
|
// calls[5] is the 6th provider call; its messages[-1] should have the reminder
|
|
384
495
|
const fifthTurnResultMsg = calls[5].messages[calls[5].messages.length - 1];
|
|
385
496
|
const reminderBlock = fifthTurnResultMsg.content.find(
|
|
386
|
-
(b): b is Extract<ContentBlock, { type:
|
|
387
|
-
b.type ===
|
|
497
|
+
(b): b is Extract<ContentBlock, { type: "text" }> =>
|
|
498
|
+
b.type === "text" && b.text.includes("making meaningful progress"),
|
|
388
499
|
);
|
|
389
500
|
expect(reminderBlock).toBeDefined();
|
|
390
501
|
});
|
|
391
502
|
|
|
392
|
-
test(
|
|
503
|
+
test("stops after configured maxToolUseTurns to prevent runaway loops", async () => {
|
|
393
504
|
const responses: ProviderResponse[] = [
|
|
394
|
-
toolUseResponse(
|
|
395
|
-
toolUseResponse(
|
|
396
|
-
toolUseResponse(
|
|
397
|
-
textResponse(
|
|
505
|
+
toolUseResponse("t1", "read_file", { path: "/one.txt" }),
|
|
506
|
+
toolUseResponse("t2", "read_file", { path: "/two.txt" }),
|
|
507
|
+
toolUseResponse("t3", "read_file", { path: "/three.txt" }),
|
|
508
|
+
textResponse("Should never be requested"),
|
|
398
509
|
];
|
|
399
510
|
const { provider, calls } = createMockProvider(responses);
|
|
400
|
-
const toolExecutor = async () => ({ content:
|
|
511
|
+
const toolExecutor = async () => ({ content: "data", isError: false });
|
|
401
512
|
const loop = new AgentLoop(
|
|
402
513
|
provider,
|
|
403
|
-
|
|
514
|
+
"system",
|
|
404
515
|
{ maxToolUseTurns: 3 },
|
|
405
516
|
dummyTools,
|
|
406
517
|
toolExecutor,
|
|
@@ -413,32 +524,36 @@ describe('AgentLoop', () => {
|
|
|
413
524
|
expect(calls).toHaveLength(3);
|
|
414
525
|
|
|
415
526
|
const errorEvents = events.filter(
|
|
416
|
-
(e): e is Extract<AgentEvent, { type:
|
|
527
|
+
(e): e is Extract<AgentEvent, { type: "error" }> => e.type === "error",
|
|
417
528
|
);
|
|
418
529
|
expect(errorEvents).toHaveLength(1);
|
|
419
|
-
expect(errorEvents[0].error.message).toContain(
|
|
530
|
+
expect(errorEvents[0].error.message).toContain(
|
|
531
|
+
"Tool-use turn limit reached (3)",
|
|
532
|
+
);
|
|
420
533
|
|
|
421
534
|
const lastMessage = history[history.length - 1];
|
|
422
|
-
expect(lastMessage.role).toBe(
|
|
535
|
+
expect(lastMessage.role).toBe("user");
|
|
423
536
|
const limitText = lastMessage.content.find(
|
|
424
|
-
(b): b is Extract<ContentBlock, { type:
|
|
425
|
-
b.type ===
|
|
537
|
+
(b): b is Extract<ContentBlock, { type: "text" }> =>
|
|
538
|
+
b.type === "text" && b.text.includes("Tool-use turn limit reached (3)"),
|
|
426
539
|
);
|
|
427
540
|
expect(limitText).toBeDefined();
|
|
428
541
|
});
|
|
429
542
|
|
|
430
|
-
test(
|
|
543
|
+
test("injects approaching-limit warning before the hard stop", async () => {
|
|
431
544
|
// maxToolUseTurns: 8, soft warning at turn 3 (8 - 5 = 3)
|
|
432
545
|
const responses: ProviderResponse[] = [];
|
|
433
546
|
for (let i = 0; i < 8; i++) {
|
|
434
|
-
responses.push(
|
|
547
|
+
responses.push(
|
|
548
|
+
toolUseResponse(`t${i}`, "read_file", { path: `/${i}.txt` }),
|
|
549
|
+
);
|
|
435
550
|
}
|
|
436
|
-
responses.push(textResponse(
|
|
551
|
+
responses.push(textResponse("done"));
|
|
437
552
|
const { provider, calls } = createMockProvider(responses);
|
|
438
|
-
const toolExecutor = async () => ({ content:
|
|
553
|
+
const toolExecutor = async () => ({ content: "data", isError: false });
|
|
439
554
|
const loop = new AgentLoop(
|
|
440
555
|
provider,
|
|
441
|
-
|
|
556
|
+
"system",
|
|
442
557
|
{ maxToolUseTurns: 8 },
|
|
443
558
|
dummyTools,
|
|
444
559
|
toolExecutor,
|
|
@@ -455,25 +570,28 @@ describe('AgentLoop', () => {
|
|
|
455
570
|
const turn4Messages = calls[3].messages;
|
|
456
571
|
const lastMsg = turn4Messages[turn4Messages.length - 1];
|
|
457
572
|
const warningBlock = lastMsg.content.find(
|
|
458
|
-
(b): b is Extract<ContentBlock, { type:
|
|
459
|
-
b.type ===
|
|
573
|
+
(b): b is Extract<ContentBlock, { type: "text" }> =>
|
|
574
|
+
b.type === "text" &&
|
|
575
|
+
b.text.includes("approaching the tool-use turn limit"),
|
|
460
576
|
);
|
|
461
577
|
expect(warningBlock).toBeDefined();
|
|
462
578
|
});
|
|
463
579
|
|
|
464
|
-
test(
|
|
580
|
+
test("runs without limit when maxToolUseTurns is 0", async () => {
|
|
465
581
|
// Use 20 turns (beyond old default of 8 used in other tests) to verify no cap
|
|
466
582
|
const turnCount = 20;
|
|
467
583
|
const responses: ProviderResponse[] = [];
|
|
468
584
|
for (let i = 0; i < turnCount; i++) {
|
|
469
|
-
responses.push(
|
|
585
|
+
responses.push(
|
|
586
|
+
toolUseResponse(`t${i}`, "read_file", { path: `/${i}.txt` }),
|
|
587
|
+
);
|
|
470
588
|
}
|
|
471
|
-
responses.push(textResponse(
|
|
589
|
+
responses.push(textResponse("done"));
|
|
472
590
|
const { provider, calls } = createMockProvider(responses);
|
|
473
|
-
const toolExecutor = async () => ({ content:
|
|
591
|
+
const toolExecutor = async () => ({ content: "data", isError: false });
|
|
474
592
|
const loop = new AgentLoop(
|
|
475
593
|
provider,
|
|
476
|
-
|
|
594
|
+
"system",
|
|
477
595
|
{ maxToolUseTurns: 0, minTurnIntervalMs: 0 },
|
|
478
596
|
dummyTools,
|
|
479
597
|
toolExecutor,
|
|
@@ -487,7 +605,7 @@ describe('AgentLoop', () => {
|
|
|
487
605
|
|
|
488
606
|
// No hard-limit error events should have been emitted
|
|
489
607
|
const errorEvents = events.filter(
|
|
490
|
-
(e): e is Extract<AgentEvent, { type:
|
|
608
|
+
(e): e is Extract<AgentEvent, { type: "error" }> => e.type === "error",
|
|
491
609
|
);
|
|
492
610
|
expect(errorEvents).toHaveLength(0);
|
|
493
611
|
|
|
@@ -495,8 +613,8 @@ describe('AgentLoop', () => {
|
|
|
495
613
|
const progressChecks = calls.filter((call) => {
|
|
496
614
|
const lastMsg = call.messages[call.messages.length - 1];
|
|
497
615
|
return lastMsg.content.some(
|
|
498
|
-
(b): b is Extract<ContentBlock, { type:
|
|
499
|
-
b.type ===
|
|
616
|
+
(b): b is Extract<ContentBlock, { type: "text" }> =>
|
|
617
|
+
b.type === "text" && b.text.includes("making meaningful progress"),
|
|
500
618
|
);
|
|
501
619
|
});
|
|
502
620
|
expect(progressChecks.length).toBeGreaterThanOrEqual(3);
|
|
@@ -505,40 +623,51 @@ describe('AgentLoop', () => {
|
|
|
505
623
|
const limitWarnings = calls.filter((call) => {
|
|
506
624
|
const lastMsg = call.messages[call.messages.length - 1];
|
|
507
625
|
return lastMsg.content.some(
|
|
508
|
-
(b): b is Extract<ContentBlock, { type:
|
|
509
|
-
b.type ===
|
|
626
|
+
(b): b is Extract<ContentBlock, { type: "text" }> =>
|
|
627
|
+
b.type === "text" &&
|
|
628
|
+
b.text.includes("approaching the tool-use turn limit"),
|
|
510
629
|
);
|
|
511
630
|
});
|
|
512
631
|
expect(limitWarnings).toHaveLength(0);
|
|
513
632
|
});
|
|
514
633
|
|
|
515
634
|
// 9. Tool executor error results are forwarded correctly
|
|
516
|
-
test(
|
|
635
|
+
test("forwards tool error results to provider", async () => {
|
|
517
636
|
const { provider, calls } = createMockProvider([
|
|
518
|
-
toolUseResponse(
|
|
519
|
-
textResponse(
|
|
637
|
+
toolUseResponse("t1", "read_file", { path: "/nonexistent.txt" }),
|
|
638
|
+
textResponse("File not found, sorry."),
|
|
520
639
|
]);
|
|
521
640
|
|
|
522
|
-
const toolExecutor = async () => ({
|
|
523
|
-
|
|
641
|
+
const toolExecutor = async () => ({
|
|
642
|
+
content: "ENOENT: file not found",
|
|
643
|
+
isError: true,
|
|
644
|
+
});
|
|
645
|
+
const loop = new AgentLoop(
|
|
646
|
+
provider,
|
|
647
|
+
"system",
|
|
648
|
+
{},
|
|
649
|
+
dummyTools,
|
|
650
|
+
toolExecutor,
|
|
651
|
+
);
|
|
524
652
|
|
|
525
653
|
await loop.run([userMessage], () => {});
|
|
526
654
|
|
|
527
655
|
const secondCallMessages = calls[1].messages;
|
|
528
656
|
const lastMsg = secondCallMessages[secondCallMessages.length - 1];
|
|
529
657
|
const toolResultBlock = lastMsg.content.find(
|
|
530
|
-
(b): b is Extract<ContentBlock, { type:
|
|
658
|
+
(b): b is Extract<ContentBlock, { type: "tool_result" }> =>
|
|
659
|
+
b.type === "tool_result",
|
|
531
660
|
);
|
|
532
661
|
expect(toolResultBlock).toBeDefined();
|
|
533
662
|
expect(toolResultBlock!.is_error).toBe(true);
|
|
534
|
-
expect(toolResultBlock!.content).toBe(
|
|
663
|
+
expect(toolResultBlock!.content).toBe("ENOENT: file not found");
|
|
535
664
|
});
|
|
536
665
|
|
|
537
666
|
// 10. Tool output chunks are forwarded via onEvent
|
|
538
|
-
test(
|
|
667
|
+
test("emits tool_output_chunk events during tool execution", async () => {
|
|
539
668
|
const { provider } = createMockProvider([
|
|
540
|
-
toolUseResponse(
|
|
541
|
-
textResponse(
|
|
669
|
+
toolUseResponse("t1", "read_file", { path: "/test.txt" }),
|
|
670
|
+
textResponse("Done"),
|
|
542
671
|
]);
|
|
543
672
|
|
|
544
673
|
const toolExecutor = async (
|
|
@@ -546,36 +675,48 @@ describe('AgentLoop', () => {
|
|
|
546
675
|
_input: Record<string, unknown>,
|
|
547
676
|
onOutput?: (chunk: string) => void,
|
|
548
677
|
) => {
|
|
549
|
-
onOutput?.(
|
|
550
|
-
onOutput?.(
|
|
551
|
-
return { content:
|
|
678
|
+
onOutput?.("chunk1");
|
|
679
|
+
onOutput?.("chunk2");
|
|
680
|
+
return { content: "full output", isError: false };
|
|
552
681
|
};
|
|
553
682
|
|
|
554
|
-
const loop = new AgentLoop(
|
|
683
|
+
const loop = new AgentLoop(
|
|
684
|
+
provider,
|
|
685
|
+
"system",
|
|
686
|
+
{},
|
|
687
|
+
dummyTools,
|
|
688
|
+
toolExecutor,
|
|
689
|
+
);
|
|
555
690
|
const events: AgentEvent[] = [];
|
|
556
691
|
await loop.run([userMessage], collectEvents(events));
|
|
557
692
|
|
|
558
|
-
const chunkEvents = events.filter((e) => e.type ===
|
|
693
|
+
const chunkEvents = events.filter((e) => e.type === "tool_output_chunk");
|
|
559
694
|
expect(chunkEvents).toHaveLength(2);
|
|
560
|
-
expect(
|
|
561
|
-
|
|
695
|
+
expect(
|
|
696
|
+
(chunkEvents[0] as Extract<AgentEvent, { type: "tool_output_chunk" }>)
|
|
697
|
+
.chunk,
|
|
698
|
+
).toBe("chunk1");
|
|
699
|
+
expect(
|
|
700
|
+
(chunkEvents[1] as Extract<AgentEvent, { type: "tool_output_chunk" }>)
|
|
701
|
+
.chunk,
|
|
702
|
+
).toBe("chunk2");
|
|
562
703
|
});
|
|
563
704
|
|
|
564
705
|
// 11. System prompt and tools are passed to provider
|
|
565
|
-
test(
|
|
566
|
-
const { provider, calls } = createMockProvider([textResponse(
|
|
567
|
-
const loop = new AgentLoop(provider,
|
|
706
|
+
test("passes system prompt and tools to provider", async () => {
|
|
707
|
+
const { provider, calls } = createMockProvider([textResponse("Hi")]);
|
|
708
|
+
const loop = new AgentLoop(provider, "My system prompt", {}, dummyTools);
|
|
568
709
|
|
|
569
710
|
await loop.run([userMessage], () => {});
|
|
570
711
|
|
|
571
|
-
expect(calls[0].systemPrompt).toBe(
|
|
712
|
+
expect(calls[0].systemPrompt).toBe("My system prompt");
|
|
572
713
|
expect(calls[0].tools).toEqual(dummyTools);
|
|
573
714
|
});
|
|
574
715
|
|
|
575
716
|
// 12. No tools configured — tools are not passed to provider
|
|
576
|
-
test(
|
|
577
|
-
const { provider, calls } = createMockProvider([textResponse(
|
|
578
|
-
const loop = new AgentLoop(provider,
|
|
717
|
+
test("does not pass tools to provider when none are configured", async () => {
|
|
718
|
+
const { provider, calls } = createMockProvider([textResponse("Hi")]);
|
|
719
|
+
const loop = new AgentLoop(provider, "system");
|
|
579
720
|
|
|
580
721
|
await loop.run([userMessage], () => {});
|
|
581
722
|
|
|
@@ -583,33 +724,60 @@ describe('AgentLoop', () => {
|
|
|
583
724
|
});
|
|
584
725
|
|
|
585
726
|
// 13. Parallel tool execution — multiple tool_use blocks in a single response
|
|
586
|
-
test(
|
|
727
|
+
test("executes multiple tools in parallel", async () => {
|
|
587
728
|
const { provider, calls } = createMockProvider([
|
|
588
729
|
// Provider returns 3 tool_use blocks in a single response
|
|
589
730
|
{
|
|
590
731
|
content: [
|
|
591
|
-
{
|
|
592
|
-
|
|
593
|
-
|
|
732
|
+
{
|
|
733
|
+
type: "tool_use" as const,
|
|
734
|
+
id: "t1",
|
|
735
|
+
name: "read_file",
|
|
736
|
+
input: { path: "/a.txt" },
|
|
737
|
+
},
|
|
738
|
+
{
|
|
739
|
+
type: "tool_use" as const,
|
|
740
|
+
id: "t2",
|
|
741
|
+
name: "read_file",
|
|
742
|
+
input: { path: "/b.txt" },
|
|
743
|
+
},
|
|
744
|
+
{
|
|
745
|
+
type: "tool_use" as const,
|
|
746
|
+
id: "t3",
|
|
747
|
+
name: "read_file",
|
|
748
|
+
input: { path: "/c.txt" },
|
|
749
|
+
},
|
|
594
750
|
],
|
|
595
|
-
model:
|
|
751
|
+
model: "mock-model",
|
|
596
752
|
usage: { inputTokens: 10, outputTokens: 5 },
|
|
597
|
-
stopReason:
|
|
753
|
+
stopReason: "tool_use" as const,
|
|
598
754
|
},
|
|
599
|
-
textResponse(
|
|
755
|
+
textResponse("Got all three files."),
|
|
600
756
|
]);
|
|
601
757
|
|
|
602
758
|
const executionLog: { path: string; start: number; end: number }[] = [];
|
|
603
|
-
const toolExecutor = async (
|
|
759
|
+
const toolExecutor = async (
|
|
760
|
+
_name: string,
|
|
761
|
+
input: Record<string, unknown>,
|
|
762
|
+
) => {
|
|
604
763
|
const start = Date.now();
|
|
605
764
|
// Simulate async work — all tools should overlap in time
|
|
606
|
-
await new Promise(resolve => setTimeout(resolve, 50));
|
|
765
|
+
await new Promise((resolve) => setTimeout(resolve, 50));
|
|
607
766
|
const end = Date.now();
|
|
608
767
|
executionLog.push({ path: (input as { path: string }).path, start, end });
|
|
609
|
-
return {
|
|
768
|
+
return {
|
|
769
|
+
content: `contents of ${(input as { path: string }).path}`,
|
|
770
|
+
isError: false,
|
|
771
|
+
};
|
|
610
772
|
};
|
|
611
773
|
|
|
612
|
-
const loop = new AgentLoop(
|
|
774
|
+
const loop = new AgentLoop(
|
|
775
|
+
provider,
|
|
776
|
+
"system",
|
|
777
|
+
{},
|
|
778
|
+
dummyTools,
|
|
779
|
+
toolExecutor,
|
|
780
|
+
);
|
|
613
781
|
const events: AgentEvent[] = [];
|
|
614
782
|
const history = await loop.run([userMessage], collectEvents(events));
|
|
615
783
|
|
|
@@ -618,8 +786,8 @@ describe('AgentLoop', () => {
|
|
|
618
786
|
|
|
619
787
|
// Verify parallel execution: all tools should start before any finishes
|
|
620
788
|
// (with 50ms delay each, sequential would take 150ms+, parallel ~50ms)
|
|
621
|
-
const allStarts = executionLog.map(e => e.start);
|
|
622
|
-
const allEnds = executionLog.map(e => e.end);
|
|
789
|
+
const allStarts = executionLog.map((e) => e.start);
|
|
790
|
+
const allEnds = executionLog.map((e) => e.end);
|
|
623
791
|
const firstEnd = Math.min(...allEnds);
|
|
624
792
|
const lastStart = Math.max(...allStarts);
|
|
625
793
|
// In parallel execution, the last tool starts before the first tool ends
|
|
@@ -632,19 +800,21 @@ describe('AgentLoop', () => {
|
|
|
632
800
|
const secondCallMessages = calls[1].messages;
|
|
633
801
|
const lastMsg = secondCallMessages[secondCallMessages.length - 1];
|
|
634
802
|
const toolResultBlocks = lastMsg.content.filter(
|
|
635
|
-
(b): b is Extract<ContentBlock, { type:
|
|
803
|
+
(b): b is Extract<ContentBlock, { type: "tool_result" }> =>
|
|
804
|
+
b.type === "tool_result",
|
|
636
805
|
);
|
|
637
806
|
expect(toolResultBlocks).toHaveLength(3);
|
|
638
|
-
expect(toolResultBlocks[0].tool_use_id).toBe(
|
|
639
|
-
expect(toolResultBlocks[1].tool_use_id).toBe(
|
|
640
|
-
expect(toolResultBlocks[2].tool_use_id).toBe(
|
|
807
|
+
expect(toolResultBlocks[0].tool_use_id).toBe("t1");
|
|
808
|
+
expect(toolResultBlocks[1].tool_use_id).toBe("t2");
|
|
809
|
+
expect(toolResultBlocks[2].tool_use_id).toBe("t3");
|
|
641
810
|
|
|
642
811
|
// All tool_use events should be emitted before any tool_result events
|
|
643
812
|
let lastToolUseIdx = -1;
|
|
644
813
|
let firstToolResultIdx = events.length;
|
|
645
814
|
events.forEach((e, i) => {
|
|
646
|
-
if (e.type ===
|
|
647
|
-
if (e.type ===
|
|
815
|
+
if (e.type === "tool_use") lastToolUseIdx = i;
|
|
816
|
+
if (e.type === "tool_result" && i < firstToolResultIdx)
|
|
817
|
+
firstToolResultIdx = i;
|
|
648
818
|
});
|
|
649
819
|
expect(lastToolUseIdx).toBeLessThan(firstToolResultIdx);
|
|
650
820
|
|
|
@@ -653,96 +823,151 @@ describe('AgentLoop', () => {
|
|
|
653
823
|
});
|
|
654
824
|
|
|
655
825
|
// 14. Abort before parallel tool execution synthesizes cancelled results
|
|
656
|
-
test(
|
|
826
|
+
test("synthesizes cancelled results when aborted before tool execution", async () => {
|
|
657
827
|
const controller = new AbortController();
|
|
658
828
|
|
|
659
829
|
const { provider } = createMockProvider([
|
|
660
830
|
{
|
|
661
831
|
content: [
|
|
662
|
-
{
|
|
663
|
-
|
|
832
|
+
{
|
|
833
|
+
type: "tool_use" as const,
|
|
834
|
+
id: "t1",
|
|
835
|
+
name: "read_file",
|
|
836
|
+
input: { path: "/a.txt" },
|
|
837
|
+
},
|
|
838
|
+
{
|
|
839
|
+
type: "tool_use" as const,
|
|
840
|
+
id: "t2",
|
|
841
|
+
name: "read_file",
|
|
842
|
+
input: { path: "/b.txt" },
|
|
843
|
+
},
|
|
664
844
|
],
|
|
665
|
-
model:
|
|
845
|
+
model: "mock-model",
|
|
666
846
|
usage: { inputTokens: 10, outputTokens: 5 },
|
|
667
|
-
stopReason:
|
|
847
|
+
stopReason: "tool_use" as const,
|
|
668
848
|
},
|
|
669
849
|
]);
|
|
670
850
|
|
|
671
851
|
// Abort during the provider call so the signal is already aborted
|
|
672
852
|
// before tool execution begins
|
|
673
853
|
const originalSendMessage = provider.sendMessage.bind(provider);
|
|
674
|
-
provider.sendMessage = async (
|
|
854
|
+
provider.sendMessage = async (
|
|
855
|
+
...args: Parameters<typeof provider.sendMessage>
|
|
856
|
+
) => {
|
|
675
857
|
const result = await originalSendMessage(...args);
|
|
676
858
|
controller.abort();
|
|
677
859
|
return result;
|
|
678
860
|
};
|
|
679
861
|
|
|
680
862
|
const toolCalls: string[] = [];
|
|
681
|
-
const toolExecutor = async (
|
|
863
|
+
const toolExecutor = async (
|
|
864
|
+
_name: string,
|
|
865
|
+
input: Record<string, unknown>,
|
|
866
|
+
) => {
|
|
682
867
|
toolCalls.push((input as { path: string }).path);
|
|
683
|
-
return { content:
|
|
868
|
+
return { content: "data", isError: false };
|
|
684
869
|
};
|
|
685
870
|
|
|
686
|
-
const loop = new AgentLoop(
|
|
871
|
+
const loop = new AgentLoop(
|
|
872
|
+
provider,
|
|
873
|
+
"system",
|
|
874
|
+
{},
|
|
875
|
+
dummyTools,
|
|
876
|
+
toolExecutor,
|
|
877
|
+
);
|
|
687
878
|
const events: AgentEvent[] = [];
|
|
688
|
-
const history = await loop.run(
|
|
879
|
+
const history = await loop.run(
|
|
880
|
+
[userMessage],
|
|
881
|
+
collectEvents(events),
|
|
882
|
+
controller.signal,
|
|
883
|
+
);
|
|
689
884
|
|
|
690
885
|
// No tools should have been executed
|
|
691
886
|
expect(toolCalls).toHaveLength(0);
|
|
692
887
|
|
|
693
888
|
// History should contain cancelled tool_result blocks
|
|
694
889
|
const lastMsg = history[history.length - 1];
|
|
695
|
-
expect(lastMsg.role).toBe(
|
|
890
|
+
expect(lastMsg.role).toBe("user");
|
|
696
891
|
const toolResultBlocks = lastMsg.content.filter(
|
|
697
|
-
(b): b is Extract<ContentBlock, { type:
|
|
892
|
+
(b): b is Extract<ContentBlock, { type: "tool_result" }> =>
|
|
893
|
+
b.type === "tool_result",
|
|
698
894
|
);
|
|
699
895
|
expect(toolResultBlocks).toHaveLength(2);
|
|
700
|
-
expect(toolResultBlocks[0].tool_use_id).toBe(
|
|
701
|
-
expect(toolResultBlocks[0].content).toBe(
|
|
896
|
+
expect(toolResultBlocks[0].tool_use_id).toBe("t1");
|
|
897
|
+
expect(toolResultBlocks[0].content).toBe("Cancelled by user");
|
|
702
898
|
expect(toolResultBlocks[0].is_error).toBe(true);
|
|
703
|
-
expect(toolResultBlocks[1].tool_use_id).toBe(
|
|
704
|
-
expect(toolResultBlocks[1].content).toBe(
|
|
899
|
+
expect(toolResultBlocks[1].tool_use_id).toBe("t2");
|
|
900
|
+
expect(toolResultBlocks[1].content).toBe("Cancelled by user");
|
|
705
901
|
expect(toolResultBlocks[1].is_error).toBe(true);
|
|
706
902
|
});
|
|
707
903
|
|
|
708
904
|
// 15. Parallel tool_result events are emitted in deterministic tool_use order
|
|
709
|
-
test(
|
|
905
|
+
test("emits tool_result events in tool_use order regardless of completion timing", async () => {
|
|
710
906
|
const { provider } = createMockProvider([
|
|
711
907
|
{
|
|
712
908
|
content: [
|
|
713
|
-
{
|
|
714
|
-
|
|
715
|
-
|
|
909
|
+
{
|
|
910
|
+
type: "tool_use" as const,
|
|
911
|
+
id: "t1",
|
|
912
|
+
name: "read_file",
|
|
913
|
+
input: { path: "/slow.txt" },
|
|
914
|
+
},
|
|
915
|
+
{
|
|
916
|
+
type: "tool_use" as const,
|
|
917
|
+
id: "t2",
|
|
918
|
+
name: "read_file",
|
|
919
|
+
input: { path: "/fast.txt" },
|
|
920
|
+
},
|
|
921
|
+
{
|
|
922
|
+
type: "tool_use" as const,
|
|
923
|
+
id: "t3",
|
|
924
|
+
name: "read_file",
|
|
925
|
+
input: { path: "/medium.txt" },
|
|
926
|
+
},
|
|
716
927
|
],
|
|
717
|
-
model:
|
|
928
|
+
model: "mock-model",
|
|
718
929
|
usage: { inputTokens: 10, outputTokens: 5 },
|
|
719
|
-
stopReason:
|
|
930
|
+
stopReason: "tool_use" as const,
|
|
720
931
|
},
|
|
721
|
-
textResponse(
|
|
932
|
+
textResponse("Done"),
|
|
722
933
|
]);
|
|
723
934
|
|
|
724
935
|
// Tools complete in different order than they were called: t2 first, t3 second, t1 last
|
|
725
|
-
const toolExecutor = async (
|
|
936
|
+
const toolExecutor = async (
|
|
937
|
+
_name: string,
|
|
938
|
+
input: Record<string, unknown>,
|
|
939
|
+
) => {
|
|
726
940
|
const path = (input as { path: string }).path;
|
|
727
|
-
const delays: Record<string, number> = {
|
|
728
|
-
|
|
941
|
+
const delays: Record<string, number> = {
|
|
942
|
+
"/slow.txt": 80,
|
|
943
|
+
"/fast.txt": 10,
|
|
944
|
+
"/medium.txt": 40,
|
|
945
|
+
};
|
|
946
|
+
await new Promise((resolve) => setTimeout(resolve, delays[path] ?? 10));
|
|
729
947
|
return { content: `contents of ${path}`, isError: false };
|
|
730
948
|
};
|
|
731
949
|
|
|
732
|
-
const loop = new AgentLoop(
|
|
950
|
+
const loop = new AgentLoop(
|
|
951
|
+
provider,
|
|
952
|
+
"system",
|
|
953
|
+
{},
|
|
954
|
+
dummyTools,
|
|
955
|
+
toolExecutor,
|
|
956
|
+
);
|
|
733
957
|
const events: AgentEvent[] = [];
|
|
734
958
|
await loop.run([userMessage], collectEvents(events));
|
|
735
959
|
|
|
736
960
|
// Collect tool_result events in order
|
|
737
961
|
const toolResultEvents = events.filter(
|
|
738
|
-
(e): e is Extract<AgentEvent, { type:
|
|
962
|
+
(e): e is Extract<AgentEvent, { type: "tool_result" }> =>
|
|
963
|
+
e.type === "tool_result",
|
|
739
964
|
);
|
|
740
965
|
expect(toolResultEvents).toHaveLength(3);
|
|
741
966
|
|
|
742
967
|
// Results must be in tool_use order (t1, t2, t3), NOT completion order (t2, t3, t1)
|
|
743
|
-
expect(toolResultEvents[0].toolUseId).toBe(
|
|
744
|
-
expect(toolResultEvents[1].toolUseId).toBe(
|
|
745
|
-
expect(toolResultEvents[2].toolUseId).toBe(
|
|
968
|
+
expect(toolResultEvents[0].toolUseId).toBe("t1");
|
|
969
|
+
expect(toolResultEvents[1].toolUseId).toBe("t2");
|
|
970
|
+
expect(toolResultEvents[2].toolUseId).toBe("t3");
|
|
746
971
|
});
|
|
747
972
|
|
|
748
973
|
// ---------------------------------------------------------------------------
|
|
@@ -750,19 +975,25 @@ describe('AgentLoop', () => {
|
|
|
750
975
|
// ---------------------------------------------------------------------------
|
|
751
976
|
|
|
752
977
|
// 16. Checkpoint callback is called after tool results with correct info
|
|
753
|
-
test(
|
|
978
|
+
test("checkpoint callback is called after tool results with correct info", async () => {
|
|
754
979
|
const { provider } = createMockProvider([
|
|
755
|
-
toolUseResponse(
|
|
756
|
-
textResponse(
|
|
980
|
+
toolUseResponse("t1", "read_file", { path: "/test.txt" }),
|
|
981
|
+
textResponse("Done"),
|
|
757
982
|
]);
|
|
758
983
|
|
|
759
|
-
const toolExecutor = async () => ({ content:
|
|
760
|
-
const loop = new AgentLoop(
|
|
984
|
+
const toolExecutor = async () => ({ content: "file data", isError: false });
|
|
985
|
+
const loop = new AgentLoop(
|
|
986
|
+
provider,
|
|
987
|
+
"system",
|
|
988
|
+
{},
|
|
989
|
+
dummyTools,
|
|
990
|
+
toolExecutor,
|
|
991
|
+
);
|
|
761
992
|
|
|
762
993
|
const checkpoints: CheckpointInfo[] = [];
|
|
763
994
|
const onCheckpoint = (checkpoint: CheckpointInfo): CheckpointDecision => {
|
|
764
995
|
checkpoints.push(checkpoint);
|
|
765
|
-
return
|
|
996
|
+
return "continue";
|
|
766
997
|
};
|
|
767
998
|
|
|
768
999
|
await loop.run([userMessage], () => {}, undefined, undefined, onCheckpoint);
|
|
@@ -776,84 +1007,120 @@ describe('AgentLoop', () => {
|
|
|
776
1007
|
});
|
|
777
1008
|
|
|
778
1009
|
// 17. Returning 'continue' lets the loop proceed normally
|
|
779
|
-
test(
|
|
1010
|
+
test("checkpoint returning continue lets the loop proceed normally", async () => {
|
|
780
1011
|
const { provider, calls } = createMockProvider([
|
|
781
|
-
toolUseResponse(
|
|
782
|
-
toolUseResponse(
|
|
783
|
-
textResponse(
|
|
1012
|
+
toolUseResponse("t1", "read_file", { path: "/a.txt" }),
|
|
1013
|
+
toolUseResponse("t2", "read_file", { path: "/b.txt" }),
|
|
1014
|
+
textResponse("All done"),
|
|
784
1015
|
]);
|
|
785
1016
|
|
|
786
|
-
const toolExecutor = async () => ({ content:
|
|
787
|
-
const loop = new AgentLoop(
|
|
1017
|
+
const toolExecutor = async () => ({ content: "data", isError: false });
|
|
1018
|
+
const loop = new AgentLoop(
|
|
1019
|
+
provider,
|
|
1020
|
+
"system",
|
|
1021
|
+
{},
|
|
1022
|
+
dummyTools,
|
|
1023
|
+
toolExecutor,
|
|
1024
|
+
);
|
|
788
1025
|
|
|
789
|
-
const onCheckpoint = (): CheckpointDecision =>
|
|
1026
|
+
const onCheckpoint = (): CheckpointDecision => "continue";
|
|
790
1027
|
|
|
791
|
-
const history = await loop.run(
|
|
1028
|
+
const history = await loop.run(
|
|
1029
|
+
[userMessage],
|
|
1030
|
+
() => {},
|
|
1031
|
+
undefined,
|
|
1032
|
+
undefined,
|
|
1033
|
+
onCheckpoint,
|
|
1034
|
+
);
|
|
792
1035
|
|
|
793
1036
|
// All 3 provider calls should happen (2 tool turns + final text)
|
|
794
1037
|
expect(calls).toHaveLength(3);
|
|
795
1038
|
// Full history: user, assistant(t1), user(result1), assistant(t2), user(result2), assistant(text)
|
|
796
1039
|
expect(history).toHaveLength(6);
|
|
797
|
-
expect(history[5].content).toEqual([{ type:
|
|
1040
|
+
expect(history[5].content).toEqual([{ type: "text", text: "All done" }]);
|
|
798
1041
|
});
|
|
799
1042
|
|
|
800
1043
|
// 18. Returning 'yield' causes the loop to stop after that turn
|
|
801
|
-
test(
|
|
1044
|
+
test("checkpoint returning yield causes the loop to stop", async () => {
|
|
802
1045
|
const { provider, calls } = createMockProvider([
|
|
803
|
-
toolUseResponse(
|
|
804
|
-
toolUseResponse(
|
|
805
|
-
textResponse(
|
|
1046
|
+
toolUseResponse("t1", "read_file", { path: "/a.txt" }),
|
|
1047
|
+
toolUseResponse("t2", "read_file", { path: "/b.txt" }),
|
|
1048
|
+
textResponse("Should not reach"),
|
|
806
1049
|
]);
|
|
807
1050
|
|
|
808
|
-
const toolExecutor = async () => ({ content:
|
|
809
|
-
const loop = new AgentLoop(
|
|
1051
|
+
const toolExecutor = async () => ({ content: "data", isError: false });
|
|
1052
|
+
const loop = new AgentLoop(
|
|
1053
|
+
provider,
|
|
1054
|
+
"system",
|
|
1055
|
+
{},
|
|
1056
|
+
dummyTools,
|
|
1057
|
+
toolExecutor,
|
|
1058
|
+
);
|
|
810
1059
|
|
|
811
|
-
const onCheckpoint = (): CheckpointDecision =>
|
|
1060
|
+
const onCheckpoint = (): CheckpointDecision => "yield";
|
|
812
1061
|
|
|
813
|
-
const history = await loop.run(
|
|
1062
|
+
const history = await loop.run(
|
|
1063
|
+
[userMessage],
|
|
1064
|
+
() => {},
|
|
1065
|
+
undefined,
|
|
1066
|
+
undefined,
|
|
1067
|
+
onCheckpoint,
|
|
1068
|
+
);
|
|
814
1069
|
|
|
815
1070
|
// Only 1 provider call should happen — loop yields after first tool turn
|
|
816
1071
|
expect(calls).toHaveLength(1);
|
|
817
1072
|
// History: user, assistant(t1), user(result1)
|
|
818
1073
|
expect(history).toHaveLength(3);
|
|
819
|
-
expect(history[1].role).toBe(
|
|
820
|
-
expect(history[2].role).toBe(
|
|
1074
|
+
expect(history[1].role).toBe("assistant");
|
|
1075
|
+
expect(history[2].role).toBe("user");
|
|
821
1076
|
});
|
|
822
1077
|
|
|
823
1078
|
// 19. Without a checkpoint callback, behavior is unchanged
|
|
824
|
-
test(
|
|
1079
|
+
test("without checkpoint callback behavior is unchanged", async () => {
|
|
825
1080
|
const { provider, calls } = createMockProvider([
|
|
826
|
-
toolUseResponse(
|
|
827
|
-
textResponse(
|
|
1081
|
+
toolUseResponse("t1", "read_file", { path: "/a.txt" }),
|
|
1082
|
+
textResponse("Done"),
|
|
828
1083
|
]);
|
|
829
1084
|
|
|
830
|
-
const toolExecutor = async () => ({ content:
|
|
831
|
-
const loop = new AgentLoop(
|
|
1085
|
+
const toolExecutor = async () => ({ content: "data", isError: false });
|
|
1086
|
+
const loop = new AgentLoop(
|
|
1087
|
+
provider,
|
|
1088
|
+
"system",
|
|
1089
|
+
{},
|
|
1090
|
+
dummyTools,
|
|
1091
|
+
toolExecutor,
|
|
1092
|
+
);
|
|
832
1093
|
|
|
833
1094
|
const history = await loop.run([userMessage], () => {});
|
|
834
1095
|
|
|
835
1096
|
// Normal behavior: 2 provider calls, full history
|
|
836
1097
|
expect(calls).toHaveLength(2);
|
|
837
1098
|
expect(history).toHaveLength(4);
|
|
838
|
-
expect(history[3].content).toEqual([{ type:
|
|
1099
|
+
expect(history[3].content).toEqual([{ type: "text", text: "Done" }]);
|
|
839
1100
|
});
|
|
840
1101
|
|
|
841
1102
|
// 20. turnIndex increments correctly across turns
|
|
842
|
-
test(
|
|
1103
|
+
test("turnIndex increments correctly across multiple turns", async () => {
|
|
843
1104
|
const { provider } = createMockProvider([
|
|
844
|
-
toolUseResponse(
|
|
845
|
-
toolUseResponse(
|
|
846
|
-
toolUseResponse(
|
|
847
|
-
textResponse(
|
|
1105
|
+
toolUseResponse("t1", "read_file", { path: "/a.txt" }),
|
|
1106
|
+
toolUseResponse("t2", "read_file", { path: "/b.txt" }),
|
|
1107
|
+
toolUseResponse("t3", "read_file", { path: "/c.txt" }),
|
|
1108
|
+
textResponse("Done"),
|
|
848
1109
|
]);
|
|
849
1110
|
|
|
850
|
-
const toolExecutor = async () => ({ content:
|
|
851
|
-
const loop = new AgentLoop(
|
|
1111
|
+
const toolExecutor = async () => ({ content: "data", isError: false });
|
|
1112
|
+
const loop = new AgentLoop(
|
|
1113
|
+
provider,
|
|
1114
|
+
"system",
|
|
1115
|
+
{},
|
|
1116
|
+
dummyTools,
|
|
1117
|
+
toolExecutor,
|
|
1118
|
+
);
|
|
852
1119
|
|
|
853
1120
|
const checkpoints: CheckpointInfo[] = [];
|
|
854
1121
|
const onCheckpoint = (checkpoint: CheckpointInfo): CheckpointDecision => {
|
|
855
1122
|
checkpoints.push(checkpoint);
|
|
856
|
-
return
|
|
1123
|
+
return "continue";
|
|
857
1124
|
};
|
|
858
1125
|
|
|
859
1126
|
await loop.run([userMessage], () => {}, undefined, undefined, onCheckpoint);
|
|
@@ -865,48 +1132,79 @@ describe('AgentLoop', () => {
|
|
|
865
1132
|
});
|
|
866
1133
|
|
|
867
1134
|
// 21. Checkpoint is NOT called when there's no tool use
|
|
868
|
-
test(
|
|
869
|
-
const { provider } = createMockProvider([
|
|
870
|
-
|
|
1135
|
+
test("checkpoint is not called when assistant responds with text only", async () => {
|
|
1136
|
+
const { provider } = createMockProvider([
|
|
1137
|
+
textResponse("Just a text response"),
|
|
1138
|
+
]);
|
|
1139
|
+
const loop = new AgentLoop(provider, "system", {}, dummyTools);
|
|
871
1140
|
|
|
872
1141
|
const checkpoints: CheckpointInfo[] = [];
|
|
873
1142
|
const onCheckpoint = (checkpoint: CheckpointInfo): CheckpointDecision => {
|
|
874
1143
|
checkpoints.push(checkpoint);
|
|
875
|
-
return
|
|
1144
|
+
return "continue";
|
|
876
1145
|
};
|
|
877
1146
|
|
|
878
|
-
const history = await loop.run(
|
|
1147
|
+
const history = await loop.run(
|
|
1148
|
+
[userMessage],
|
|
1149
|
+
() => {},
|
|
1150
|
+
undefined,
|
|
1151
|
+
undefined,
|
|
1152
|
+
onCheckpoint,
|
|
1153
|
+
);
|
|
879
1154
|
|
|
880
1155
|
// Checkpoint should never be called for a text-only response
|
|
881
1156
|
expect(checkpoints).toHaveLength(0);
|
|
882
1157
|
// Normal response
|
|
883
1158
|
expect(history).toHaveLength(2);
|
|
884
|
-
expect(history[1].content).toEqual([
|
|
1159
|
+
expect(history[1].content).toEqual([
|
|
1160
|
+
{ type: "text", text: "Just a text response" },
|
|
1161
|
+
]);
|
|
885
1162
|
});
|
|
886
1163
|
|
|
887
1164
|
// 22. Checkpoint reports correct toolCount for parallel tool execution
|
|
888
|
-
test(
|
|
1165
|
+
test("checkpoint reports correct toolCount for parallel tools", async () => {
|
|
889
1166
|
const { provider } = createMockProvider([
|
|
890
1167
|
{
|
|
891
1168
|
content: [
|
|
892
|
-
{
|
|
893
|
-
|
|
894
|
-
|
|
1169
|
+
{
|
|
1170
|
+
type: "tool_use" as const,
|
|
1171
|
+
id: "t1",
|
|
1172
|
+
name: "read_file",
|
|
1173
|
+
input: { path: "/a.txt" },
|
|
1174
|
+
},
|
|
1175
|
+
{
|
|
1176
|
+
type: "tool_use" as const,
|
|
1177
|
+
id: "t2",
|
|
1178
|
+
name: "read_file",
|
|
1179
|
+
input: { path: "/b.txt" },
|
|
1180
|
+
},
|
|
1181
|
+
{
|
|
1182
|
+
type: "tool_use" as const,
|
|
1183
|
+
id: "t3",
|
|
1184
|
+
name: "read_file",
|
|
1185
|
+
input: { path: "/c.txt" },
|
|
1186
|
+
},
|
|
895
1187
|
],
|
|
896
|
-
model:
|
|
1188
|
+
model: "mock-model",
|
|
897
1189
|
usage: { inputTokens: 10, outputTokens: 5 },
|
|
898
|
-
stopReason:
|
|
1190
|
+
stopReason: "tool_use" as const,
|
|
899
1191
|
},
|
|
900
|
-
textResponse(
|
|
1192
|
+
textResponse("Got all three"),
|
|
901
1193
|
]);
|
|
902
1194
|
|
|
903
|
-
const toolExecutor = async () => ({ content:
|
|
904
|
-
const loop = new AgentLoop(
|
|
1195
|
+
const toolExecutor = async () => ({ content: "data", isError: false });
|
|
1196
|
+
const loop = new AgentLoop(
|
|
1197
|
+
provider,
|
|
1198
|
+
"system",
|
|
1199
|
+
{},
|
|
1200
|
+
dummyTools,
|
|
1201
|
+
toolExecutor,
|
|
1202
|
+
);
|
|
905
1203
|
|
|
906
1204
|
const checkpoints: CheckpointInfo[] = [];
|
|
907
1205
|
const onCheckpoint = (checkpoint: CheckpointInfo): CheckpointDecision => {
|
|
908
1206
|
checkpoints.push(checkpoint);
|
|
909
|
-
return
|
|
1207
|
+
return "continue";
|
|
910
1208
|
};
|
|
911
1209
|
|
|
912
1210
|
await loop.run([userMessage], () => {}, undefined, undefined, onCheckpoint);
|
|
@@ -917,27 +1215,41 @@ describe('AgentLoop', () => {
|
|
|
917
1215
|
});
|
|
918
1216
|
|
|
919
1217
|
// 23. Multiple checkpoints across a multi-turn run with selective yield on turn 3
|
|
920
|
-
test(
|
|
1218
|
+
test("multiple checkpoints with selective yield — executes turns 0-2, yields at turn 3, never runs 4+", async () => {
|
|
921
1219
|
// Mock provider to return tool_use for 5 turns, then text
|
|
922
1220
|
const responses: ProviderResponse[] = [];
|
|
923
1221
|
for (let i = 0; i < 5; i++) {
|
|
924
|
-
responses.push(
|
|
1222
|
+
responses.push(
|
|
1223
|
+
toolUseResponse(`t${i}`, "read_file", { path: `/file${i}.txt` }),
|
|
1224
|
+
);
|
|
925
1225
|
}
|
|
926
|
-
responses.push(textResponse(
|
|
1226
|
+
responses.push(textResponse("Should never reach this"));
|
|
927
1227
|
|
|
928
1228
|
const { provider, calls } = createMockProvider(responses);
|
|
929
|
-
const toolExecutor = async () => ({ content:
|
|
930
|
-
const loop = new AgentLoop(
|
|
1229
|
+
const toolExecutor = async () => ({ content: "data", isError: false });
|
|
1230
|
+
const loop = new AgentLoop(
|
|
1231
|
+
provider,
|
|
1232
|
+
"system",
|
|
1233
|
+
{},
|
|
1234
|
+
dummyTools,
|
|
1235
|
+
toolExecutor,
|
|
1236
|
+
);
|
|
931
1237
|
|
|
932
1238
|
const checkpoints: CheckpointInfo[] = [];
|
|
933
1239
|
const onCheckpoint = (checkpoint: CheckpointInfo): CheckpointDecision => {
|
|
934
1240
|
checkpoints.push(checkpoint);
|
|
935
1241
|
// Yield on turn 3 (0-indexed)
|
|
936
|
-
return checkpoint.turnIndex === 3 ?
|
|
1242
|
+
return checkpoint.turnIndex === 3 ? "yield" : "continue";
|
|
937
1243
|
};
|
|
938
1244
|
|
|
939
1245
|
const events: AgentEvent[] = [];
|
|
940
|
-
const history = await loop.run(
|
|
1246
|
+
const history = await loop.run(
|
|
1247
|
+
[userMessage],
|
|
1248
|
+
collectEvents(events),
|
|
1249
|
+
undefined,
|
|
1250
|
+
undefined,
|
|
1251
|
+
onCheckpoint,
|
|
1252
|
+
);
|
|
941
1253
|
|
|
942
1254
|
// Turns 0, 1, 2, 3 execute (4 provider calls). Turn 3 yields, so turns 4+ never execute.
|
|
943
1255
|
expect(calls).toHaveLength(4);
|
|
@@ -956,45 +1268,61 @@ describe('AgentLoop', () => {
|
|
|
956
1268
|
expect(history).toHaveLength(9);
|
|
957
1269
|
|
|
958
1270
|
// Verify the last two messages are from turn 3
|
|
959
|
-
expect(history[7].role).toBe(
|
|
960
|
-
const lastAssistantToolUse = history[7].content.find(
|
|
1271
|
+
expect(history[7].role).toBe("assistant");
|
|
1272
|
+
const lastAssistantToolUse = history[7].content.find(
|
|
1273
|
+
(b) => b.type === "tool_use",
|
|
1274
|
+
);
|
|
961
1275
|
expect(lastAssistantToolUse).toBeDefined();
|
|
962
|
-
if (lastAssistantToolUse && lastAssistantToolUse.type ===
|
|
963
|
-
expect(lastAssistantToolUse.id).toBe(
|
|
1276
|
+
if (lastAssistantToolUse && lastAssistantToolUse.type === "tool_use") {
|
|
1277
|
+
expect(lastAssistantToolUse.id).toBe("t3");
|
|
964
1278
|
}
|
|
965
|
-
expect(history[8].role).toBe(
|
|
1279
|
+
expect(history[8].role).toBe("user");
|
|
966
1280
|
const lastToolResult = history[8].content.find(
|
|
967
|
-
(b): b is Extract<ContentBlock, { type:
|
|
1281
|
+
(b): b is Extract<ContentBlock, { type: "tool_result" }> =>
|
|
1282
|
+
b.type === "tool_result",
|
|
968
1283
|
);
|
|
969
1284
|
expect(lastToolResult).toBeDefined();
|
|
970
|
-
expect(lastToolResult!.tool_use_id).toBe(
|
|
1285
|
+
expect(lastToolResult!.tool_use_id).toBe("t3");
|
|
971
1286
|
|
|
972
1287
|
// Verify turns 4+ never executed — no tool_use event for t4
|
|
973
1288
|
const toolUseEvents = events.filter(
|
|
974
|
-
(e): e is Extract<AgentEvent, { type:
|
|
1289
|
+
(e): e is Extract<AgentEvent, { type: "tool_use" }> =>
|
|
1290
|
+
e.type === "tool_use",
|
|
975
1291
|
);
|
|
976
1292
|
const toolUseNames = toolUseEvents.map((e) => e.id);
|
|
977
|
-
expect(toolUseNames).toEqual([
|
|
978
|
-
expect(toolUseNames).not.toContain(
|
|
1293
|
+
expect(toolUseNames).toEqual(["t0", "t1", "t2", "t3"]);
|
|
1294
|
+
expect(toolUseNames).not.toContain("t4");
|
|
979
1295
|
});
|
|
980
1296
|
|
|
981
1297
|
// 24. Yield on second turn — first turn proceeds, second stops
|
|
982
|
-
test(
|
|
1298
|
+
test("yield on second turn lets first turn proceed and stops on second", async () => {
|
|
983
1299
|
const { provider, calls } = createMockProvider([
|
|
984
|
-
toolUseResponse(
|
|
985
|
-
toolUseResponse(
|
|
986
|
-
textResponse(
|
|
1300
|
+
toolUseResponse("t1", "read_file", { path: "/a.txt" }),
|
|
1301
|
+
toolUseResponse("t2", "read_file", { path: "/b.txt" }),
|
|
1302
|
+
textResponse("Should not reach"),
|
|
987
1303
|
]);
|
|
988
1304
|
|
|
989
|
-
const toolExecutor = async () => ({ content:
|
|
990
|
-
const loop = new AgentLoop(
|
|
1305
|
+
const toolExecutor = async () => ({ content: "data", isError: false });
|
|
1306
|
+
const loop = new AgentLoop(
|
|
1307
|
+
provider,
|
|
1308
|
+
"system",
|
|
1309
|
+
{},
|
|
1310
|
+
dummyTools,
|
|
1311
|
+
toolExecutor,
|
|
1312
|
+
);
|
|
991
1313
|
|
|
992
1314
|
const onCheckpoint = (checkpoint: CheckpointInfo): CheckpointDecision => {
|
|
993
1315
|
// Yield on the second turn (turnIndex 1)
|
|
994
|
-
return checkpoint.turnIndex === 1 ?
|
|
1316
|
+
return checkpoint.turnIndex === 1 ? "yield" : "continue";
|
|
995
1317
|
};
|
|
996
1318
|
|
|
997
|
-
const history = await loop.run(
|
|
1319
|
+
const history = await loop.run(
|
|
1320
|
+
[userMessage],
|
|
1321
|
+
() => {},
|
|
1322
|
+
undefined,
|
|
1323
|
+
undefined,
|
|
1324
|
+
onCheckpoint,
|
|
1325
|
+
);
|
|
998
1326
|
|
|
999
1327
|
// 2 provider calls: first tool turn + second tool turn (yield after second)
|
|
1000
1328
|
expect(calls).toHaveLength(2);
|
|
@@ -1007,9 +1335,9 @@ describe('AgentLoop', () => {
|
|
|
1007
1335
|
// ---------------------------------------------------------------------------
|
|
1008
1336
|
|
|
1009
1337
|
// 25. Without resolveTools, static tools are used (backward compatible)
|
|
1010
|
-
test(
|
|
1011
|
-
const { provider, calls } = createMockProvider([textResponse(
|
|
1012
|
-
const loop = new AgentLoop(provider,
|
|
1338
|
+
test("without resolveTools, static tools are passed to provider", async () => {
|
|
1339
|
+
const { provider, calls } = createMockProvider([textResponse("Hi")]);
|
|
1340
|
+
const loop = new AgentLoop(provider, "system", {}, dummyTools);
|
|
1013
1341
|
|
|
1014
1342
|
await loop.run([userMessage], () => {});
|
|
1015
1343
|
|
|
@@ -1017,25 +1345,39 @@ describe('AgentLoop', () => {
|
|
|
1017
1345
|
});
|
|
1018
1346
|
|
|
1019
1347
|
// 26. resolveTools callback is invoked before each provider call
|
|
1020
|
-
test(
|
|
1348
|
+
test("resolveTools is invoked before each provider call", async () => {
|
|
1021
1349
|
const resolverCalls: Message[][] = [];
|
|
1022
1350
|
const resolvedTools: ToolDefinition[] = [
|
|
1023
|
-
{
|
|
1351
|
+
{
|
|
1352
|
+
name: "search",
|
|
1353
|
+
description: "Search files",
|
|
1354
|
+
input_schema: {
|
|
1355
|
+
type: "object",
|
|
1356
|
+
properties: { query: { type: "string" } },
|
|
1357
|
+
},
|
|
1358
|
+
},
|
|
1024
1359
|
];
|
|
1025
1360
|
|
|
1026
1361
|
const { provider } = createMockProvider([
|
|
1027
|
-
toolUseResponse(
|
|
1028
|
-
textResponse(
|
|
1362
|
+
toolUseResponse("t1", "search", { query: "foo" }),
|
|
1363
|
+
textResponse("Found it"),
|
|
1029
1364
|
]);
|
|
1030
1365
|
|
|
1031
|
-
const toolExecutor = async () => ({ content:
|
|
1366
|
+
const toolExecutor = async () => ({ content: "result", isError: false });
|
|
1032
1367
|
|
|
1033
1368
|
const resolveTools = (history: Message[]): ToolDefinition[] => {
|
|
1034
1369
|
resolverCalls.push([...history]);
|
|
1035
1370
|
return resolvedTools;
|
|
1036
1371
|
};
|
|
1037
1372
|
|
|
1038
|
-
const loop = new AgentLoop(
|
|
1373
|
+
const loop = new AgentLoop(
|
|
1374
|
+
provider,
|
|
1375
|
+
"system",
|
|
1376
|
+
{},
|
|
1377
|
+
[],
|
|
1378
|
+
toolExecutor,
|
|
1379
|
+
resolveTools,
|
|
1380
|
+
);
|
|
1039
1381
|
await loop.run([userMessage], () => {});
|
|
1040
1382
|
|
|
1041
1383
|
// resolveTools should be called once per provider turn (2 turns total)
|
|
@@ -1050,17 +1392,28 @@ describe('AgentLoop', () => {
|
|
|
1050
1392
|
});
|
|
1051
1393
|
|
|
1052
1394
|
// 27. Resolved tool list is passed to the provider
|
|
1053
|
-
test(
|
|
1395
|
+
test("resolved tools are passed to the provider instead of static tools", async () => {
|
|
1054
1396
|
const dynamicTools: ToolDefinition[] = [
|
|
1055
|
-
{
|
|
1397
|
+
{
|
|
1398
|
+
name: "dynamic_tool",
|
|
1399
|
+
description: "Dynamic",
|
|
1400
|
+
input_schema: { type: "object" },
|
|
1401
|
+
},
|
|
1056
1402
|
];
|
|
1057
1403
|
|
|
1058
|
-
const { provider, calls } = createMockProvider([textResponse(
|
|
1404
|
+
const { provider, calls } = createMockProvider([textResponse("Hi")]);
|
|
1059
1405
|
|
|
1060
1406
|
const resolveTools = (): ToolDefinition[] => dynamicTools;
|
|
1061
1407
|
|
|
1062
1408
|
// Pass different static tools to verify they are overridden
|
|
1063
|
-
const loop = new AgentLoop(
|
|
1409
|
+
const loop = new AgentLoop(
|
|
1410
|
+
provider,
|
|
1411
|
+
"system",
|
|
1412
|
+
{},
|
|
1413
|
+
dummyTools,
|
|
1414
|
+
undefined,
|
|
1415
|
+
resolveTools,
|
|
1416
|
+
);
|
|
1064
1417
|
await loop.run([userMessage], () => {});
|
|
1065
1418
|
|
|
1066
1419
|
// Provider should receive the dynamically resolved tools, not the static ones
|
|
@@ -1069,31 +1422,59 @@ describe('AgentLoop', () => {
|
|
|
1069
1422
|
});
|
|
1070
1423
|
|
|
1071
1424
|
// 28. Tool list can change between turns
|
|
1072
|
-
test(
|
|
1425
|
+
test("resolveTools can return different tools on each turn", async () => {
|
|
1073
1426
|
const toolsPerTurn: ToolDefinition[][] = [
|
|
1074
|
-
[{ name: 'tool_a', description: 'Tool A', input_schema: { type: 'object' } }],
|
|
1075
1427
|
[
|
|
1076
|
-
{
|
|
1077
|
-
|
|
1428
|
+
{
|
|
1429
|
+
name: "tool_a",
|
|
1430
|
+
description: "Tool A",
|
|
1431
|
+
input_schema: { type: "object" },
|
|
1432
|
+
},
|
|
1433
|
+
],
|
|
1434
|
+
[
|
|
1435
|
+
{
|
|
1436
|
+
name: "tool_a",
|
|
1437
|
+
description: "Tool A",
|
|
1438
|
+
input_schema: { type: "object" },
|
|
1439
|
+
},
|
|
1440
|
+
{
|
|
1441
|
+
name: "tool_b",
|
|
1442
|
+
description: "Tool B",
|
|
1443
|
+
input_schema: { type: "object" },
|
|
1444
|
+
},
|
|
1445
|
+
],
|
|
1446
|
+
[
|
|
1447
|
+
{
|
|
1448
|
+
name: "tool_c",
|
|
1449
|
+
description: "Tool C",
|
|
1450
|
+
input_schema: { type: "object" },
|
|
1451
|
+
},
|
|
1078
1452
|
],
|
|
1079
|
-
[{ name: 'tool_c', description: 'Tool C', input_schema: { type: 'object' } }],
|
|
1080
1453
|
];
|
|
1081
1454
|
|
|
1082
1455
|
let turnIndex = 0;
|
|
1083
1456
|
const resolveTools = (): ToolDefinition[] => {
|
|
1084
|
-
const tools =
|
|
1457
|
+
const tools =
|
|
1458
|
+
toolsPerTurn[turnIndex] ?? toolsPerTurn[toolsPerTurn.length - 1];
|
|
1085
1459
|
turnIndex++;
|
|
1086
1460
|
return tools;
|
|
1087
1461
|
};
|
|
1088
1462
|
|
|
1089
1463
|
const { provider, calls } = createMockProvider([
|
|
1090
|
-
toolUseResponse(
|
|
1091
|
-
toolUseResponse(
|
|
1092
|
-
textResponse(
|
|
1464
|
+
toolUseResponse("t1", "tool_a", {}),
|
|
1465
|
+
toolUseResponse("t2", "tool_a", {}),
|
|
1466
|
+
textResponse("Done"),
|
|
1093
1467
|
]);
|
|
1094
1468
|
|
|
1095
|
-
const toolExecutor = async () => ({ content:
|
|
1096
|
-
const loop = new AgentLoop(
|
|
1469
|
+
const toolExecutor = async () => ({ content: "ok", isError: false });
|
|
1470
|
+
const loop = new AgentLoop(
|
|
1471
|
+
provider,
|
|
1472
|
+
"system",
|
|
1473
|
+
{},
|
|
1474
|
+
[],
|
|
1475
|
+
toolExecutor,
|
|
1476
|
+
resolveTools,
|
|
1477
|
+
);
|
|
1097
1478
|
await loop.run([userMessage], () => {});
|
|
1098
1479
|
|
|
1099
1480
|
// Provider should have been called 3 times
|
|
@@ -1106,12 +1487,21 @@ describe('AgentLoop', () => {
|
|
|
1106
1487
|
});
|
|
1107
1488
|
|
|
1108
1489
|
// 29. resolveTools returning empty array means no tools passed to provider
|
|
1109
|
-
test(
|
|
1490
|
+
test("resolveTools returning empty array sends no tools to provider", async () => {
|
|
1110
1491
|
const resolveTools = (): ToolDefinition[] => [];
|
|
1111
1492
|
|
|
1112
|
-
const { provider, calls } = createMockProvider([
|
|
1493
|
+
const { provider, calls } = createMockProvider([
|
|
1494
|
+
textResponse("No tools available"),
|
|
1495
|
+
]);
|
|
1113
1496
|
|
|
1114
|
-
const loop = new AgentLoop(
|
|
1497
|
+
const loop = new AgentLoop(
|
|
1498
|
+
provider,
|
|
1499
|
+
"system",
|
|
1500
|
+
{},
|
|
1501
|
+
dummyTools,
|
|
1502
|
+
undefined,
|
|
1503
|
+
resolveTools,
|
|
1504
|
+
);
|
|
1115
1505
|
await loop.run([userMessage], () => {});
|
|
1116
1506
|
|
|
1117
1507
|
// Empty array should result in undefined tools (same as no-tools behavior)
|
|
@@ -1123,13 +1513,13 @@ describe('AgentLoop', () => {
|
|
|
1123
1513
|
// ---------------------------------------------------------------------------
|
|
1124
1514
|
|
|
1125
1515
|
// 30. Oversized tool results are truncated before entering history
|
|
1126
|
-
test(
|
|
1127
|
-
const toolCallId =
|
|
1128
|
-
const largeContent =
|
|
1516
|
+
test("truncates oversized tool results before adding to history", async () => {
|
|
1517
|
+
const toolCallId = "tool-large";
|
|
1518
|
+
const largeContent = "x".repeat(500_000);
|
|
1129
1519
|
|
|
1130
1520
|
const { provider, calls } = createMockProvider([
|
|
1131
|
-
toolUseResponse(toolCallId,
|
|
1132
|
-
textResponse(
|
|
1521
|
+
toolUseResponse(toolCallId, "read_file", { path: "/huge.txt" }),
|
|
1522
|
+
textResponse("Got it."),
|
|
1133
1523
|
]);
|
|
1134
1524
|
|
|
1135
1525
|
const toolExecutor = async () => {
|
|
@@ -1138,7 +1528,7 @@ describe('AgentLoop', () => {
|
|
|
1138
1528
|
|
|
1139
1529
|
const loop = new AgentLoop(
|
|
1140
1530
|
provider,
|
|
1141
|
-
|
|
1531
|
+
"system",
|
|
1142
1532
|
{ maxInputTokens: 180_000 },
|
|
1143
1533
|
dummyTools,
|
|
1144
1534
|
toolExecutor,
|
|
@@ -1148,10 +1538,11 @@ describe('AgentLoop', () => {
|
|
|
1148
1538
|
|
|
1149
1539
|
// The tool result user message is at index 2 in history
|
|
1150
1540
|
const toolResultMsg = history[2];
|
|
1151
|
-
expect(toolResultMsg.role).toBe(
|
|
1541
|
+
expect(toolResultMsg.role).toBe("user");
|
|
1152
1542
|
|
|
1153
1543
|
const toolResultBlock = toolResultMsg.content.find(
|
|
1154
|
-
(b): b is Extract<ContentBlock, { type:
|
|
1544
|
+
(b): b is Extract<ContentBlock, { type: "tool_result" }> =>
|
|
1545
|
+
b.type === "tool_result",
|
|
1155
1546
|
);
|
|
1156
1547
|
expect(toolResultBlock).toBeDefined();
|
|
1157
1548
|
|
|
@@ -1159,28 +1550,27 @@ describe('AgentLoop', () => {
|
|
|
1159
1550
|
expect(toolResultBlock!.content.length).toBeLessThan(500_000);
|
|
1160
1551
|
|
|
1161
1552
|
// Content should end with the truncation suffix
|
|
1162
|
-
expect(toolResultBlock!.content).toContain(
|
|
1163
|
-
'[Content truncated',
|
|
1164
|
-
);
|
|
1553
|
+
expect(toolResultBlock!.content).toContain("[Content truncated");
|
|
1165
1554
|
|
|
1166
1555
|
// The second provider call should also have the truncated content in messages
|
|
1167
1556
|
const secondCallMessages = calls[1].messages;
|
|
1168
1557
|
const lastMsg = secondCallMessages[secondCallMessages.length - 1];
|
|
1169
1558
|
const sentBlock = lastMsg.content.find(
|
|
1170
|
-
(b): b is Extract<ContentBlock, { type:
|
|
1559
|
+
(b): b is Extract<ContentBlock, { type: "tool_result" }> =>
|
|
1560
|
+
b.type === "tool_result",
|
|
1171
1561
|
);
|
|
1172
1562
|
expect(sentBlock).toBeDefined();
|
|
1173
1563
|
expect(sentBlock!.content.length).toBeLessThan(500_000);
|
|
1174
1564
|
});
|
|
1175
1565
|
|
|
1176
1566
|
// 31. Non-oversized tool results pass through unchanged
|
|
1177
|
-
test(
|
|
1178
|
-
const toolCallId =
|
|
1179
|
-
const smallContent =
|
|
1567
|
+
test("non-oversized tool results pass through unchanged", async () => {
|
|
1568
|
+
const toolCallId = "tool-small";
|
|
1569
|
+
const smallContent = "small content";
|
|
1180
1570
|
|
|
1181
1571
|
const { provider, calls } = createMockProvider([
|
|
1182
|
-
toolUseResponse(toolCallId,
|
|
1183
|
-
textResponse(
|
|
1572
|
+
toolUseResponse(toolCallId, "read_file", { path: "/small.txt" }),
|
|
1573
|
+
textResponse("Got it."),
|
|
1184
1574
|
]);
|
|
1185
1575
|
|
|
1186
1576
|
const toolExecutor = async () => {
|
|
@@ -1189,7 +1579,7 @@ describe('AgentLoop', () => {
|
|
|
1189
1579
|
|
|
1190
1580
|
const loop = new AgentLoop(
|
|
1191
1581
|
provider,
|
|
1192
|
-
|
|
1582
|
+
"system",
|
|
1193
1583
|
{ maxInputTokens: 180_000 },
|
|
1194
1584
|
dummyTools,
|
|
1195
1585
|
toolExecutor,
|
|
@@ -1199,10 +1589,11 @@ describe('AgentLoop', () => {
|
|
|
1199
1589
|
|
|
1200
1590
|
// The tool result user message is at index 2 in history
|
|
1201
1591
|
const toolResultMsg = history[2];
|
|
1202
|
-
expect(toolResultMsg.role).toBe(
|
|
1592
|
+
expect(toolResultMsg.role).toBe("user");
|
|
1203
1593
|
|
|
1204
1594
|
const toolResultBlock = toolResultMsg.content.find(
|
|
1205
|
-
(b): b is Extract<ContentBlock, { type:
|
|
1595
|
+
(b): b is Extract<ContentBlock, { type: "tool_result" }> =>
|
|
1596
|
+
b.type === "tool_result",
|
|
1206
1597
|
);
|
|
1207
1598
|
expect(toolResultBlock).toBeDefined();
|
|
1208
1599
|
|
|
@@ -1213,7 +1604,8 @@ describe('AgentLoop', () => {
|
|
|
1213
1604
|
const secondCallMessages = calls[1].messages;
|
|
1214
1605
|
const lastMsg = secondCallMessages[secondCallMessages.length - 1];
|
|
1215
1606
|
const sentBlock = lastMsg.content.find(
|
|
1216
|
-
(b): b is Extract<ContentBlock, { type:
|
|
1607
|
+
(b): b is Extract<ContentBlock, { type: "tool_result" }> =>
|
|
1608
|
+
b.type === "tool_result",
|
|
1217
1609
|
);
|
|
1218
1610
|
expect(sentBlock).toBeDefined();
|
|
1219
1611
|
expect(sentBlock!.content).toBe(smallContent);
|
|
@@ -1225,32 +1617,42 @@ describe('AgentLoop', () => {
|
|
|
1225
1617
|
|
|
1226
1618
|
// 32. Tool results with sensitiveBindings populate substitution map and
|
|
1227
1619
|
// final assistant message text is resolved with real values.
|
|
1228
|
-
test(
|
|
1229
|
-
const placeholder =
|
|
1230
|
-
const realToken =
|
|
1620
|
+
test("resolves sensitive output placeholders in final assistant message", async () => {
|
|
1621
|
+
const placeholder = "VELLUM_ASSISTANT_INVITE_CODE_TEST1234";
|
|
1622
|
+
const realToken = "realInviteToken999";
|
|
1231
1623
|
|
|
1232
1624
|
const { provider, calls } = createMockProvider([
|
|
1233
|
-
toolUseResponse(
|
|
1625
|
+
toolUseResponse("t1", "bash", { command: "create invite" }),
|
|
1234
1626
|
// The LLM responds using the placeholder (it never saw the real token)
|
|
1235
|
-
textResponse(
|
|
1627
|
+
textResponse(
|
|
1628
|
+
`Here is your invite link: https://t.me/bot?start=iv_${placeholder}`,
|
|
1629
|
+
),
|
|
1236
1630
|
]);
|
|
1237
1631
|
|
|
1238
1632
|
const toolExecutor = async () => ({
|
|
1239
1633
|
content: `https://t.me/bot?start=iv_${placeholder}`,
|
|
1240
1634
|
isError: false,
|
|
1241
|
-
sensitiveBindings: [
|
|
1635
|
+
sensitiveBindings: [
|
|
1636
|
+
{ kind: "invite_code" as const, placeholder, value: realToken },
|
|
1637
|
+
],
|
|
1242
1638
|
});
|
|
1243
1639
|
|
|
1244
|
-
const loop = new AgentLoop(
|
|
1640
|
+
const loop = new AgentLoop(
|
|
1641
|
+
provider,
|
|
1642
|
+
"system",
|
|
1643
|
+
{},
|
|
1644
|
+
dummyTools,
|
|
1645
|
+
toolExecutor,
|
|
1646
|
+
);
|
|
1245
1647
|
const events: AgentEvent[] = [];
|
|
1246
1648
|
const history = await loop.run([userMessage], collectEvents(events));
|
|
1247
1649
|
|
|
1248
1650
|
// The final assistant message in HISTORY should retain placeholders
|
|
1249
1651
|
// (so the model never sees real values on subsequent turns)
|
|
1250
1652
|
const lastAssistant = history[history.length - 1];
|
|
1251
|
-
expect(lastAssistant.role).toBe(
|
|
1653
|
+
expect(lastAssistant.role).toBe("assistant");
|
|
1252
1654
|
const historyTextBlock = lastAssistant.content.find(
|
|
1253
|
-
(b): b is Extract<ContentBlock, { type:
|
|
1655
|
+
(b): b is Extract<ContentBlock, { type: "text" }> => b.type === "text",
|
|
1254
1656
|
);
|
|
1255
1657
|
expect(historyTextBlock).toBeDefined();
|
|
1256
1658
|
expect(historyTextBlock!.text).toContain(placeholder);
|
|
@@ -1259,11 +1661,12 @@ describe('AgentLoop', () => {
|
|
|
1259
1661
|
// The message_complete EVENT should also retain placeholders (persisted
|
|
1260
1662
|
// to conversation store; real values leak on session reload otherwise)
|
|
1261
1663
|
const completeEvents = events.filter(
|
|
1262
|
-
(e): e is Extract<AgentEvent, { type:
|
|
1664
|
+
(e): e is Extract<AgentEvent, { type: "message_complete" }> =>
|
|
1665
|
+
e.type === "message_complete",
|
|
1263
1666
|
);
|
|
1264
1667
|
const lastComplete = completeEvents[completeEvents.length - 1];
|
|
1265
1668
|
const completeText = lastComplete.message.content.find(
|
|
1266
|
-
(b): b is Extract<ContentBlock, { type:
|
|
1669
|
+
(b): b is Extract<ContentBlock, { type: "text" }> => b.type === "text",
|
|
1267
1670
|
);
|
|
1268
1671
|
expect(completeText!.text).toContain(placeholder);
|
|
1269
1672
|
expect(completeText!.text).not.toContain(realToken);
|
|
@@ -1272,23 +1675,25 @@ describe('AgentLoop', () => {
|
|
|
1272
1675
|
// NOT the raw token (model never sees the real value)
|
|
1273
1676
|
const secondCallMessages = calls[1].messages;
|
|
1274
1677
|
const toolResultMsg = secondCallMessages.find(
|
|
1275
|
-
(m) =>
|
|
1678
|
+
(m) =>
|
|
1679
|
+
m.role === "user" && m.content.some((b) => b.type === "tool_result"),
|
|
1276
1680
|
);
|
|
1277
1681
|
expect(toolResultMsg).toBeDefined();
|
|
1278
1682
|
const toolResultBlock = toolResultMsg!.content.find(
|
|
1279
|
-
(b): b is Extract<ContentBlock, { type:
|
|
1683
|
+
(b): b is Extract<ContentBlock, { type: "tool_result" }> =>
|
|
1684
|
+
b.type === "tool_result",
|
|
1280
1685
|
);
|
|
1281
1686
|
expect(toolResultBlock!.content).toContain(placeholder);
|
|
1282
1687
|
expect(toolResultBlock!.content).not.toContain(realToken);
|
|
1283
1688
|
});
|
|
1284
1689
|
|
|
1285
1690
|
// 33. Streamed text_delta events have placeholders resolved to real values
|
|
1286
|
-
test(
|
|
1287
|
-
const placeholder =
|
|
1288
|
-
const realToken =
|
|
1691
|
+
test("resolves sensitive output placeholders in streamed text_delta events", async () => {
|
|
1692
|
+
const placeholder = "VELLUM_ASSISTANT_INVITE_CODE_STRM5678";
|
|
1693
|
+
const realToken = "streamedRealToken";
|
|
1289
1694
|
|
|
1290
1695
|
const { provider } = createMockProvider([
|
|
1291
|
-
toolUseResponse(
|
|
1696
|
+
toolUseResponse("t1", "bash", { command: "invite" }),
|
|
1292
1697
|
// Response text includes the placeholder
|
|
1293
1698
|
textResponse(`Link: https://t.me/bot?start=iv_${placeholder}`),
|
|
1294
1699
|
]);
|
|
@@ -1296,18 +1701,27 @@ describe('AgentLoop', () => {
|
|
|
1296
1701
|
const toolExecutor = async () => ({
|
|
1297
1702
|
content: `https://t.me/bot?start=iv_${placeholder}`,
|
|
1298
1703
|
isError: false,
|
|
1299
|
-
sensitiveBindings: [
|
|
1704
|
+
sensitiveBindings: [
|
|
1705
|
+
{ kind: "invite_code" as const, placeholder, value: realToken },
|
|
1706
|
+
],
|
|
1300
1707
|
});
|
|
1301
1708
|
|
|
1302
|
-
const loop = new AgentLoop(
|
|
1709
|
+
const loop = new AgentLoop(
|
|
1710
|
+
provider,
|
|
1711
|
+
"system",
|
|
1712
|
+
{},
|
|
1713
|
+
dummyTools,
|
|
1714
|
+
toolExecutor,
|
|
1715
|
+
);
|
|
1303
1716
|
const events: AgentEvent[] = [];
|
|
1304
1717
|
await loop.run([userMessage], collectEvents(events));
|
|
1305
1718
|
|
|
1306
1719
|
// Collect all text_delta events from the final turn (after tool result)
|
|
1307
1720
|
const textDeltas = events.filter(
|
|
1308
|
-
(e): e is Extract<AgentEvent, { type:
|
|
1721
|
+
(e): e is Extract<AgentEvent, { type: "text_delta" }> =>
|
|
1722
|
+
e.type === "text_delta",
|
|
1309
1723
|
);
|
|
1310
|
-
const allStreamedText = textDeltas.map((e) => e.text).join(
|
|
1724
|
+
const allStreamedText = textDeltas.map((e) => e.text).join("");
|
|
1311
1725
|
|
|
1312
1726
|
// Streamed text should contain the real token, not the placeholder
|
|
1313
1727
|
expect(allStreamedText).toContain(realToken);
|
|
@@ -1315,26 +1729,32 @@ describe('AgentLoop', () => {
|
|
|
1315
1729
|
});
|
|
1316
1730
|
|
|
1317
1731
|
// 34. Without sensitive bindings, text passes through unchanged
|
|
1318
|
-
test(
|
|
1732
|
+
test("text passes through unchanged when no sensitive bindings exist", async () => {
|
|
1319
1733
|
const { provider } = createMockProvider([
|
|
1320
|
-
toolUseResponse(
|
|
1321
|
-
textResponse(
|
|
1734
|
+
toolUseResponse("t1", "read_file", { path: "/test.txt" }),
|
|
1735
|
+
textResponse("Normal response with no placeholders."),
|
|
1322
1736
|
]);
|
|
1323
1737
|
|
|
1324
1738
|
const toolExecutor = async () => ({
|
|
1325
|
-
content:
|
|
1739
|
+
content: "file contents",
|
|
1326
1740
|
isError: false,
|
|
1327
1741
|
// No sensitiveBindings
|
|
1328
1742
|
});
|
|
1329
1743
|
|
|
1330
|
-
const loop = new AgentLoop(
|
|
1744
|
+
const loop = new AgentLoop(
|
|
1745
|
+
provider,
|
|
1746
|
+
"system",
|
|
1747
|
+
{},
|
|
1748
|
+
dummyTools,
|
|
1749
|
+
toolExecutor,
|
|
1750
|
+
);
|
|
1331
1751
|
const events: AgentEvent[] = [];
|
|
1332
1752
|
const history = await loop.run([userMessage], collectEvents(events));
|
|
1333
1753
|
|
|
1334
1754
|
const lastAssistant = history[history.length - 1];
|
|
1335
1755
|
const textBlock = lastAssistant.content.find(
|
|
1336
|
-
(b): b is Extract<ContentBlock, { type:
|
|
1756
|
+
(b): b is Extract<ContentBlock, { type: "text" }> => b.type === "text",
|
|
1337
1757
|
);
|
|
1338
|
-
expect(textBlock!.text).toBe(
|
|
1758
|
+
expect(textBlock!.text).toBe("Normal response with no placeholders.");
|
|
1339
1759
|
});
|
|
1340
1760
|
});
|