@vellumai/assistant 0.6.5 → 0.6.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +9 -1
- package/ARCHITECTURE.md +15 -17
- package/Dockerfile +6 -4
- package/__tests__/permissions/gateway-threshold-reader.test.ts +283 -0
- package/docs/architecture/integrations.md +32 -39
- package/docs/architecture/memory.md +25 -30
- package/docs/architecture/security.md +7 -6
- package/docs/browser-use-architecture-phase2.md +63 -20
- package/docs/plugins.md +761 -0
- package/examples/plugins/echo/README.md +132 -0
- package/examples/plugins/echo/package.json +17 -0
- package/examples/plugins/echo/register.ts +187 -0
- package/node_modules/@vellumai/egress-proxy/src/types.ts +19 -0
- package/openapi.yaml +212 -68
- package/package.json +1 -1
- package/src/__tests__/app-compiler.test.ts +57 -0
- package/src/__tests__/approval-cascade.test.ts +7 -2
- package/src/__tests__/auto-analysis-end-to-end.test.ts +1 -0
- package/src/__tests__/avatar-generator.test.ts +4 -2
- package/src/__tests__/bundled-asset.test.ts +6 -6
- package/src/__tests__/catalog-cache.test.ts +69 -0
- package/src/__tests__/checker.test.ts +459 -171
- package/src/__tests__/circuit-breaker-pipeline.test.ts +406 -0
- package/src/__tests__/compaction-events.test.ts +501 -0
- package/src/__tests__/compaction-pipeline.test.ts +210 -0
- package/src/__tests__/compaction-strip-metadata-clear.test.ts +181 -0
- package/src/__tests__/compaction-timeout-recovery.test.ts +262 -0
- package/src/__tests__/config-model-image-provider.test.ts +110 -0
- package/src/__tests__/config-schema.test.ts +22 -9
- package/src/__tests__/config-watcher-cleanup-throttle.test.ts +0 -4
- package/src/__tests__/contacts-tools.test.ts +26 -0
- package/src/__tests__/context-overflow-policy.test.ts +7 -7
- package/src/__tests__/context-window-manager.test.ts +355 -4
- package/src/__tests__/conversation-abort-tool-results.test.ts +4 -1
- package/src/__tests__/conversation-agent-loop-overflow.test.ts +26 -30
- package/src/__tests__/conversation-agent-loop.test.ts +30 -141
- package/src/__tests__/conversation-confirmation-signals.test.ts +6 -1
- package/src/__tests__/conversation-history-web-search.test.ts +1 -0
- package/src/__tests__/conversation-init.benchmark.test.ts +2 -16
- package/src/__tests__/conversation-pairing.test.ts +174 -10
- package/src/__tests__/conversation-pre-run-repair.test.ts +4 -1
- package/src/__tests__/conversation-process-callsite.test.ts +3 -0
- package/src/__tests__/conversation-provider-retry-repair.test.ts +16 -7
- package/src/__tests__/conversation-queue.test.ts +29 -14
- package/src/__tests__/conversation-routes-disk-view.test.ts +7 -6
- package/src/__tests__/conversation-runtime-assembly.test.ts +155 -110
- package/src/__tests__/conversation-runtime-workspace.test.ts +23 -38
- package/src/__tests__/conversation-seed-composer.test.ts +2 -2
- package/src/__tests__/conversation-slash-queue.test.ts +7 -2
- package/src/__tests__/conversation-slash-unknown.test.ts +25 -2
- package/src/__tests__/conversation-speed-override.test.ts +6 -1
- package/src/__tests__/conversation-title-service.test.ts +116 -0
- package/src/__tests__/conversation-tool-setup-app-refresh.test.ts +41 -2
- package/src/__tests__/conversation-usage.test.ts +1 -1
- package/src/__tests__/conversation-workspace-cache-state.test.ts +4 -1
- package/src/__tests__/conversation-workspace-injection.test.ts +3 -0
- package/src/__tests__/conversation-workspace-tool-tracking.test.ts +4 -1
- package/src/__tests__/credential-health-service.test.ts +78 -9
- package/src/__tests__/credential-security-invariants.test.ts +2 -2
- package/src/__tests__/db-schedule-syntax-migration.test.ts +1 -0
- package/src/__tests__/empty-response-pipeline.test.ts +305 -0
- package/src/__tests__/extension-id-sync-guard.test.ts +3 -3
- package/src/__tests__/first-greeting.test.ts +247 -5
- package/src/__tests__/headless-browser-mode.test.ts +57 -0
- package/src/__tests__/history-repair-pipeline.test.ts +399 -0
- package/src/__tests__/host-browser-e2e-cloud.test.ts +307 -0
- package/src/__tests__/host-browser-e2e-self-hosted.test.ts +3 -3
- package/src/__tests__/host-proxy-interface.test.ts +36 -2
- package/src/__tests__/image-credentials.test.ts +137 -0
- package/src/__tests__/image-service-dispatcher.test.ts +186 -0
- package/src/__tests__/injector-chain.test.ts +526 -0
- package/src/__tests__/intent-routing.test.ts +0 -26
- package/src/__tests__/llm-call-pipeline.test.ts +285 -0
- package/src/__tests__/llm-schema.test.ts +1 -1
- package/src/__tests__/media-generate-image.test.ts +119 -13
- package/src/__tests__/memory-retrieval-pipeline.test.ts +401 -0
- package/src/__tests__/memory-upsert-concurrency.test.ts +1 -0
- package/src/__tests__/migration-import-from-url.test.ts +5 -68
- package/src/__tests__/model-intents.test.ts +4 -2
- package/src/__tests__/notification-broadcaster.test.ts +3 -3
- package/src/__tests__/notification-decision-strategy.test.ts +0 -11
- package/src/__tests__/notification-schedule-notify-dedup.test.ts +108 -0
- package/src/__tests__/oauth-apps-routes.test.ts +1 -1
- package/src/__tests__/oauth-cli.test.ts +14 -12
- package/src/__tests__/oauth-connect-orchestrator.test.ts +4 -13
- package/src/__tests__/oauth-provider-serializer.test.ts +6 -4
- package/src/__tests__/oauth-provider-visibility.test.ts +3 -5
- package/src/__tests__/oauth-providers-routes.test.ts +3 -2
- package/src/__tests__/oauth-store.test.ts +41 -76
- package/src/__tests__/onboarding-template-contract.test.ts +16 -64
- package/src/__tests__/openai-image-service.test.ts +368 -0
- package/src/__tests__/overflow-reduce-pipeline.test.ts +676 -0
- package/src/__tests__/permission-checker-host-gate.test.ts +0 -24
- package/src/__tests__/persist-onboarding-artifacts.test.ts +266 -0
- package/src/__tests__/persistence-pipeline.test.ts +377 -0
- package/src/__tests__/pipeline-runner.test.ts +565 -0
- package/src/__tests__/platform.test.ts +5 -2
- package/src/__tests__/plugin-bootstrap.test.ts +483 -0
- package/src/__tests__/plugin-registry.test.ts +273 -0
- package/src/__tests__/plugin-route-contribution.test.ts +288 -0
- package/src/__tests__/plugin-skill-contribution.test.ts +367 -0
- package/src/__tests__/plugin-tool-contribution.test.ts +286 -0
- package/src/__tests__/plugin-types.test.ts +320 -0
- package/src/__tests__/pricing.test.ts +44 -12
- package/src/__tests__/proxy-approval-callback.test.ts +69 -8
- package/src/__tests__/reaction-persistence.test.ts +1 -0
- package/src/__tests__/regenerate-fire-and-forget-trace.test.ts +1 -0
- package/src/__tests__/registry.test.ts +0 -2
- package/src/__tests__/schedule-routes.test.ts +131 -1
- package/src/__tests__/scheduler-recurrence.test.ts +14 -70
- package/src/__tests__/scheduler-reuse-conversation.test.ts +10 -50
- package/src/__tests__/secret-detection-handler.test.ts +0 -10
- package/src/__tests__/shell-identity.test.ts +0 -134
- package/src/__tests__/suggestion-routes.test.ts +103 -4
- package/src/__tests__/task-memory-cleanup.test.ts +1 -0
- package/src/__tests__/task-scheduler.test.ts +3 -15
- package/src/__tests__/test-preload.ts +11 -0
- package/src/__tests__/title-generate-pipeline.test.ts +224 -0
- package/src/__tests__/token-estimate-pipeline.test.ts +431 -0
- package/src/__tests__/tool-error-pipeline.test.ts +244 -0
- package/src/__tests__/tool-execute-pipeline.test.ts +431 -0
- package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +0 -6
- package/src/__tests__/tool-executor-shell-integration.test.ts +7 -10
- package/src/__tests__/tool-executor.test.ts +141 -0
- package/src/__tests__/tool-result-truncate-pipeline.test.ts +356 -0
- package/src/__tests__/tool-result-truncation.test.ts +0 -110
- package/src/__tests__/user-plugin-loader.test.ts +191 -0
- package/src/__tests__/workspace-migration-046-seed-conversation-starters-callsite.test.ts +185 -0
- package/src/__tests__/workspace-migration-049-release-notes-default-sonnet.test.ts +100 -0
- package/src/__tests__/workspace-migration-050-seed-main-agent-opus-callsite.test.ts +171 -0
- package/src/__tests__/workspace-migration-051-seed-conversation-summarization-callsite.test.ts +252 -0
- package/src/__tests__/workspace-migration-remove-hooks.test.ts +99 -0
- package/src/__tests__/workspace-policy.test.ts +21 -3
- package/src/agent/loop.ts +340 -102
- package/src/approvals/__tests__/guardian-feed-event.test.ts +304 -0
- package/src/approvals/guardian-request-resolvers.ts +80 -0
- package/src/backup/__tests__/backup-worker.test.ts +2 -13
- package/src/backup/backup-worker.ts +3 -15
- package/src/bundler/app-compiler.ts +84 -1
- package/src/calls/call-state.ts +2 -2
- package/src/channels/__tests__/types.test.ts +3 -3
- package/src/channels/types.ts +6 -4
- package/src/cli/__tests__/notifications.test.ts +87 -211
- package/src/cli/commands/__tests__/backup.test.ts +1 -1
- package/src/cli/commands/__tests__/image-generation.test.ts +255 -35
- package/src/cli/commands/__tests__/inference-send.test.ts +12 -0
- package/src/cli/commands/__tests__/tts-synthesize.test.ts +12 -0
- package/src/cli/commands/backup.ts +2 -2
- package/src/cli/commands/clients.ts +138 -0
- package/src/cli/commands/completions.ts +2 -9
- package/src/cli/commands/conversations.ts +55 -7
- package/src/cli/commands/image-generation.ts +33 -34
- package/src/cli/commands/notifications.ts +68 -103
- package/src/cli/commands/oauth/__tests__/providers-register.test.ts +1 -1
- package/src/cli/commands/oauth/__tests__/providers-update.test.ts +1 -1
- package/src/cli/commands/oauth/connect.ts +2 -2
- package/src/cli/commands/oauth/providers.ts +176 -8
- package/src/cli/commands/oauth/status.ts +46 -36
- package/src/cli/commands/skills.ts +3 -4
- package/src/cli/program.ts +25 -29
- package/src/config/__tests__/backup-schema.test.ts +7 -2
- package/src/config/bundled-skills/app-builder/SKILL.md +2 -2
- package/src/config/bundled-skills/app-builder/references/WIDGETS.md +10 -10
- package/src/config/bundled-skills/contacts/tools/contact-merge.ts +66 -87
- package/src/config/bundled-skills/contacts/tools/contact-search.ts +28 -51
- package/src/config/bundled-skills/contacts/tools/contact-upsert.ts +22 -40
- package/src/config/bundled-skills/image-studio/SKILL.md +2 -1
- package/src/config/bundled-skills/image-studio/TOOLS.json +2 -1
- package/src/config/bundled-skills/image-studio/tools/media-generate-image.ts +23 -39
- package/src/config/bundled-skills/messaging/SKILL.md +3 -3
- package/src/config/bundled-skills/messaging/tools/__tests__/messaging-feed-events.test.ts +207 -0
- package/src/config/bundled-skills/messaging/tools/messaging-archive-by-sender.ts +12 -0
- package/src/config/bundled-skills/messaging/tools/messaging-send.ts +58 -0
- package/src/config/bundled-skills/schedule/SKILL.md +8 -3
- package/src/config/bundled-skills/schedule/TOOLS.json +15 -7
- package/src/config/bundled-skills/schedule/references/SCRIPT_MODE_PATTERNS.md +59 -0
- package/src/config/bundled-tool-registry.ts +0 -15
- package/src/config/feature-flag-registry.json +17 -1
- package/src/config/schema.ts +19 -0
- package/src/config/schemas/backup.ts +1 -1
- package/src/config/schemas/conversations.ts +16 -0
- package/src/config/schemas/llm.ts +2 -3
- package/src/config/schemas/security.ts +6 -6
- package/src/config/schemas/tts.ts +11 -0
- package/src/config/skill-state.ts +6 -2
- package/src/config/skills.ts +94 -5
- package/src/context/__tests__/compact-prompt.test.ts +27 -9
- package/src/context/prompts/compact.md +26 -12
- package/src/context/tool-result-truncation.ts +3 -63
- package/src/context/window-manager.ts +190 -16
- package/src/credential-health/credential-health-service.ts +19 -6
- package/src/daemon/__tests__/conversation-feed-event.test.ts +317 -0
- package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +4 -12
- package/src/daemon/__tests__/conversation-tool-setup.test.ts +14 -15
- package/src/daemon/config-watcher.ts +0 -2
- package/src/daemon/context-overflow-policy.ts +4 -13
- package/src/daemon/conversation-agent-loop-handlers.ts +83 -22
- package/src/daemon/conversation-agent-loop.ts +984 -683
- package/src/daemon/conversation-history.ts +10 -19
- package/src/daemon/conversation-lifecycle.ts +37 -19
- package/src/daemon/conversation-notifiers.ts +2 -110
- package/src/daemon/conversation-process.ts +14 -7
- package/src/daemon/conversation-runtime-assembly.ts +532 -411
- package/src/daemon/conversation-tool-setup.ts +41 -4
- package/src/daemon/conversation.ts +80 -35
- package/src/daemon/external-plugins-bootstrap.ts +478 -0
- package/src/daemon/first-greeting.ts +191 -14
- package/src/daemon/handlers/config-model.ts +11 -0
- package/src/daemon/handlers/skills.ts +5 -1
- package/src/daemon/lifecycle.ts +33 -68
- package/src/daemon/message-types/computer-use.ts +2 -34
- package/src/daemon/message-types/conversations.ts +49 -0
- package/src/daemon/message-types/messages.ts +12 -0
- package/src/daemon/server.ts +5 -3
- package/src/daemon/shutdown-handlers.ts +2 -12
- package/src/daemon/tool-side-effects.ts +14 -56
- package/src/heartbeat/__tests__/heartbeat-feed-event.test.ts +160 -0
- package/src/heartbeat/heartbeat-service.ts +24 -1
- package/src/home/__tests__/feed-population-integration.test.ts +312 -0
- package/src/home/emit-feed-event.ts +7 -0
- package/src/home/feed-types.ts +41 -2
- package/src/home/rewrite-command-preview.ts +66 -0
- package/src/ipc/__tests__/socket-path.test.ts +11 -50
- package/src/ipc/cli-client.ts +1 -1
- package/src/ipc/cli-server.ts +3 -3
- package/src/ipc/gateway-client.ts +4 -1
- package/src/ipc/routes/browser-context.ts +2 -0
- package/src/ipc/routes/browser.ts +1 -0
- package/src/ipc/routes/get-contact.ts +16 -0
- package/src/ipc/routes/index.ts +14 -0
- package/src/ipc/routes/list-clients.ts +31 -0
- package/src/ipc/routes/merge-contacts.ts +17 -0
- package/src/ipc/routes/notification.ts +133 -0
- package/src/ipc/routes/rename-conversation.ts +59 -0
- package/src/ipc/routes/search-contacts.ts +19 -0
- package/src/ipc/routes/upsert-contact.ts +25 -0
- package/src/ipc/socket-path.ts +14 -38
- package/src/media/app-icon-generator.ts +23 -46
- package/src/media/avatar-router.ts +26 -41
- package/src/media/gemini-image-service.ts +8 -41
- package/src/media/image-credentials.ts +73 -0
- package/src/media/image-service.ts +85 -0
- package/src/media/openai-image-service.ts +131 -0
- package/src/media/types.ts +46 -0
- package/src/memory/conversation-crud.ts +48 -18
- package/src/memory/conversation-queries.ts +57 -4
- package/src/memory/conversation-title-service.ts +25 -0
- package/src/memory/db-init.ts +8 -0
- package/src/memory/embedding-gemini.test.ts +41 -2
- package/src/memory/embedding-gemini.ts +6 -1
- package/src/memory/graph/bootstrap.test.ts +282 -0
- package/src/memory/graph/bootstrap.ts +8 -5
- package/src/memory/graph/extraction.ts +10 -2
- package/src/memory/graph/graph-search.test.ts +1 -0
- package/src/memory/graph/inspect.ts +2 -2
- package/src/memory/graph/retriever.ts +10 -3
- package/src/memory/migrations/041-approval-prompt-ts-tracker.ts +26 -0
- package/src/memory/migrations/149-oauth-tables.ts +1 -0
- package/src/memory/migrations/223-schedule-script-column.ts +11 -0
- package/src/memory/migrations/224-oauth-providers-managed-service-is-paid.ts +24 -0
- package/src/memory/migrations/225-oauth-providers-available-scopes.ts +13 -0
- package/src/memory/migrations/index.ts +4 -0
- package/src/memory/pkb/pkb-index.test.ts +1 -0
- package/src/memory/pkb/pkb-reconcile.test.ts +1 -0
- package/src/memory/pkb/pkb-search.test.ts +65 -4
- package/src/memory/pkb/pkb-search.ts +40 -18
- package/src/memory/qdrant-client.test.ts +60 -0
- package/src/memory/qdrant-client.ts +25 -0
- package/src/memory/schema/infrastructure.ts +1 -0
- package/src/memory/schema/oauth.ts +4 -1
- package/src/messaging/providers/slack/render-transcript.test.ts +77 -29
- package/src/messaging/providers/slack/render-transcript.ts +58 -0
- package/src/notifications/conversation-pairing.ts +78 -19
- package/src/notifications/copy-composer.ts +0 -5
- package/src/notifications/emit-signal.ts +1 -1
- package/src/notifications/signal.ts +1 -2
- package/src/oauth/AGENTS.md +1 -1
- package/src/oauth/__tests__/identity-verifier.test.ts +2 -1
- package/src/oauth/connect-orchestrator.ts +8 -34
- package/src/oauth/connect-types.ts +6 -10
- package/src/oauth/manual-token-connection.ts +23 -0
- package/src/oauth/oauth-store.ts +30 -14
- package/src/oauth/provider-serializer.ts +6 -1
- package/src/oauth/seed-providers.ts +56 -108
- package/src/outbound-proxy/http-forwarder.ts +9 -0
- package/src/permissions/approval-policy.test.ts +293 -18
- package/src/permissions/approval-policy.ts +110 -58
- package/src/permissions/arg-parser.test.ts +161 -0
- package/src/permissions/arg-parser.ts +141 -0
- package/src/permissions/bash-risk-classifier.test.ts +414 -2
- package/src/permissions/bash-risk-classifier.ts +303 -60
- package/src/permissions/checker.ts +157 -29
- package/src/permissions/command-registry.test.ts +239 -0
- package/src/permissions/command-registry.ts +234 -54
- package/src/permissions/defaults.ts +5 -4
- package/src/permissions/gateway-threshold-reader.ts +196 -0
- package/src/permissions/prompter.ts +4 -0
- package/src/permissions/risk-types.ts +61 -4
- package/src/permissions/schedule-risk-classifier.test.ts +129 -0
- package/src/permissions/schedule-risk-classifier.ts +85 -0
- package/src/permissions/shell-identity.ts +2 -42
- package/src/permissions/types.ts +2 -0
- package/src/permissions/workspace-policy.ts +8 -3
- package/src/plugins/defaults/circuit-breaker.ts +146 -0
- package/src/plugins/defaults/compaction.ts +145 -0
- package/src/plugins/defaults/empty-response.ts +126 -0
- package/src/plugins/defaults/history-repair.ts +85 -0
- package/src/plugins/defaults/index.ts +116 -0
- package/src/plugins/defaults/injectors.ts +491 -0
- package/src/plugins/defaults/llm-call.ts +82 -0
- package/src/plugins/defaults/memory-retrieval.ts +226 -0
- package/src/plugins/defaults/overflow-reduce.ts +181 -0
- package/src/plugins/defaults/persistence.ts +129 -0
- package/src/plugins/defaults/title-generate.ts +95 -0
- package/src/plugins/defaults/token-estimate.ts +104 -0
- package/src/plugins/defaults/tool-error.ts +126 -0
- package/src/plugins/defaults/tool-execute.ts +89 -0
- package/src/plugins/defaults/tool-result-truncate.ts +88 -0
- package/src/plugins/pipeline.ts +316 -0
- package/src/plugins/plugin-skill-contributions.ts +292 -0
- package/src/plugins/registry.ts +241 -0
- package/src/plugins/types.ts +1134 -0
- package/src/plugins/user-loader.ts +177 -0
- package/src/prompts/templates/BOOTSTRAP.md +27 -77
- package/src/providers/model-catalog.ts +52 -29
- package/src/providers/model-intents.ts +1 -1
- package/src/providers/openrouter/client.ts +5 -1
- package/src/providers/speech-to-text/deepgram-realtime.test.ts +61 -0
- package/src/providers/speech-to-text/deepgram-realtime.ts +57 -0
- package/src/providers/speech-to-text/xai-realtime.test.ts +72 -4
- package/src/providers/speech-to-text/xai-realtime.ts +39 -14
- package/src/runtime/AGENTS.md +25 -16
- package/src/runtime/__tests__/browser-extension-pair-routes.test.ts +3 -3
- package/src/runtime/__tests__/client-registry.test.ts +293 -0
- package/src/runtime/client-registry.ts +261 -0
- package/src/runtime/http-server.ts +77 -8
- package/src/runtime/http-types.ts +0 -2
- package/src/runtime/migrations/vbundle-builder.ts +1 -22
- package/src/runtime/routes/approval-prompt-ts-tracker.ts +51 -31
- package/src/runtime/routes/approval-routes.ts +17 -0
- package/src/runtime/routes/browser-extension-pair-routes.ts +27 -8
- package/src/runtime/routes/conversation-routes.ts +223 -116
- package/src/runtime/routes/inbound-message-handler.ts +88 -13
- package/src/runtime/routes/memory-item-routes.test.ts +1 -0
- package/src/runtime/routes/migration-routes.ts +0 -3
- package/src/runtime/routes/playground/__tests__/force-compact.test.ts +284 -0
- package/src/runtime/routes/playground/__tests__/guard.test.ts +80 -0
- package/src/runtime/routes/playground/__tests__/inject-failures.test.ts +294 -0
- package/src/runtime/routes/playground/__tests__/reset-circuit.test.ts +271 -0
- package/src/runtime/routes/playground/__tests__/seed-conversation.test.ts +202 -0
- package/src/runtime/routes/playground/__tests__/seeded-conversations.test.ts +309 -0
- package/src/runtime/routes/playground/__tests__/state.test.ts +224 -0
- package/src/runtime/routes/playground/conversation-not-found.ts +29 -0
- package/src/runtime/routes/playground/deps.ts +56 -0
- package/src/runtime/routes/playground/force-compact.ts +73 -0
- package/src/runtime/routes/playground/guard.ts +37 -0
- package/src/runtime/routes/playground/index.ts +28 -0
- package/src/runtime/routes/playground/inject-failures.ts +159 -0
- package/src/runtime/routes/playground/reset-circuit.ts +115 -0
- package/src/runtime/routes/playground/seed-conversation.ts +139 -0
- package/src/runtime/routes/playground/seeded-conversations.ts +78 -0
- package/src/runtime/routes/playground/state.ts +78 -0
- package/src/runtime/routes/schedule-routes.ts +89 -8
- package/src/runtime/skill-route-registry.ts +75 -15
- package/src/schedule/run-script.ts +68 -0
- package/src/schedule/schedule-store.ts +7 -1
- package/src/schedule/scheduler.ts +48 -8
- package/src/skills/catalog-cache.ts +12 -5
- package/src/tools/browser/__tests__/browser-status.test.ts +189 -0
- package/src/tools/browser/browser-execution.ts +88 -19
- package/src/tools/browser/cdp-client/__tests__/extension-cdp-client.test.ts +230 -0
- package/src/tools/browser/cdp-client/__tests__/factory.test.ts +146 -3
- package/src/tools/browser/cdp-client/extension-cdp-client.ts +54 -3
- package/src/tools/browser/cdp-client/factory.ts +15 -4
- package/src/tools/executor.ts +126 -74
- package/src/tools/network/script-proxy/session-manager.ts +37 -1
- package/src/tools/permission-checker.ts +98 -49
- package/src/tools/policy-context.ts +4 -0
- package/src/tools/registry.ts +140 -3
- package/src/tools/schedule/create.ts +23 -8
- package/src/tools/schedule/update.ts +3 -1
- package/src/tools/secret-detection-handler.ts +0 -51
- package/src/tools/system/avatar-generator.ts +6 -2
- package/src/tools/types.ts +28 -2
- package/src/util/platform.ts +7 -2
- package/src/util/pricing.ts +26 -3
- package/src/workspace/migrations/006-services-config.ts +2 -4
- package/src/workspace/migrations/022-move-hooks-to-workspace.ts +2 -3
- package/src/workspace/migrations/041-backfill-google-gmail-settings-scope.ts +3 -4
- package/src/workspace/migrations/046-seed-conversation-starters-callsite.ts +108 -0
- package/src/workspace/migrations/047-remove-watch-callsites.ts +54 -0
- package/src/workspace/migrations/048-remove-workspace-hooks.ts +81 -0
- package/src/workspace/migrations/049-release-notes-default-sonnet.ts +80 -0
- package/src/workspace/migrations/050-seed-main-agent-opus-callsite.ts +86 -0
- package/src/workspace/migrations/051-seed-conversation-summarization-callsite.ts +128 -0
- package/src/workspace/migrations/registry.ts +12 -0
- package/tsconfig.json +1 -1
- package/hook-templates/debug-prompt-logger/hook.json +0 -7
- package/hook-templates/debug-prompt-logger/run.sh +0 -66
- package/src/__tests__/compaction-circuit-breaker.test.ts +0 -336
- package/src/__tests__/context-overflow-approval.test.ts +0 -156
- package/src/__tests__/hooks-blocking.test.ts +0 -178
- package/src/__tests__/hooks-cli.test.ts +0 -182
- package/src/__tests__/hooks-config.test.ts +0 -108
- package/src/__tests__/hooks-discovery.test.ts +0 -211
- package/src/__tests__/hooks-integration.test.ts +0 -196
- package/src/__tests__/hooks-manager.test.ts +0 -226
- package/src/__tests__/hooks-runner.test.ts +0 -175
- package/src/__tests__/hooks-settings.test.ts +0 -160
- package/src/__tests__/hooks-templates.test.ts +0 -169
- package/src/__tests__/hooks-ts-runner.test.ts +0 -170
- package/src/__tests__/hooks-watch.test.ts +0 -112
- package/src/__tests__/notification-schedule-dedup.test.ts +0 -213
- package/src/__tests__/oauth-scope-policy.test.ts +0 -180
- package/src/__tests__/send-notification-tool.test.ts +0 -83
- package/src/cli/commands/shotgun.ts +0 -266
- package/src/config/bundled-skills/conversations/SKILL.md +0 -20
- package/src/config/bundled-skills/conversations/TOOLS.json +0 -23
- package/src/config/bundled-skills/conversations/tools/rename-conversation.ts +0 -88
- package/src/config/bundled-skills/heartbeat/SKILL.md +0 -43
- package/src/config/bundled-skills/notifications/SKILL.md +0 -40
- package/src/config/bundled-skills/notifications/TOOLS.json +0 -80
- package/src/config/bundled-skills/notifications/tools/send-notification.ts +0 -152
- package/src/config/bundled-skills/notifications/tools/shared.ts +0 -13
- package/src/config/bundled-skills/screen-watch/SKILL.md +0 -27
- package/src/config/bundled-skills/screen-watch/TOOLS.json +0 -35
- package/src/config/bundled-skills/screen-watch/tools/start-screen-watch.ts +0 -12
- package/src/config/bundled-skills/skills-catalog/SKILL.md +0 -84
- package/src/daemon/context-overflow-approval.ts +0 -52
- package/src/daemon/watch-handler.ts +0 -399
- package/src/hooks/cli.ts +0 -253
- package/src/hooks/config.ts +0 -100
- package/src/hooks/discovery.ts +0 -135
- package/src/hooks/manager.ts +0 -179
- package/src/hooks/runner.ts +0 -117
- package/src/hooks/templates.ts +0 -77
- package/src/hooks/types.ts +0 -75
- package/src/oauth/scope-policy.ts +0 -89
- package/src/runtime/gateway-internal-client.ts +0 -94
- package/src/runtime/routes/watch-routes.ts +0 -156
- package/src/signals/shotgun.ts +0 -203
- package/src/tools/watch/screen-watch.ts +0 -144
- package/src/tools/watch/watch-state.ts +0 -142
|
@@ -0,0 +1,676 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unit tests for the default `overflowReduce` plugin (PR 23).
|
|
3
|
+
*
|
|
4
|
+
* Two goals:
|
|
5
|
+
* 1. The default middleware produces results **identical** to the historical
|
|
6
|
+
* inline tier loop for a golden set of over-budget histories. We exercise
|
|
7
|
+
* this by running the same inputs through two paths — the pipeline and a
|
|
8
|
+
* faithful re-implementation of the pre-PR-23 inline loop — and asserting
|
|
9
|
+
* the final `(messages, runMessages, injectionMode, reducerState,
|
|
10
|
+
* reducerCompacted, attempts)` tuple matches byte-for-byte.
|
|
11
|
+
* 2. A user-registered spy middleware observes **every** reduction attempt
|
|
12
|
+
* when wrapped around the default. This covers the onion-composition
|
|
13
|
+
* contract: the spy sees each call from the outside and can count
|
|
14
|
+
* iterations without changing reducer behavior.
|
|
15
|
+
*
|
|
16
|
+
* The test creates its own plugin registry via
|
|
17
|
+
* `resetPluginRegistryForTests()` and re-registers the default before each
|
|
18
|
+
* case so the registry is deterministic across runs.
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
import { beforeEach, describe, expect, test } from "bun:test";
|
|
22
|
+
|
|
23
|
+
import { estimatePromptTokens } from "../context/token-estimator.js";
|
|
24
|
+
import type {
|
|
25
|
+
ContextWindowCompactOptions,
|
|
26
|
+
ContextWindowResult,
|
|
27
|
+
} from "../context/window-manager.js";
|
|
28
|
+
import { createContextSummaryMessage } from "../context/window-manager.js";
|
|
29
|
+
import {
|
|
30
|
+
createInitialReducerState,
|
|
31
|
+
reduceContextOverflow,
|
|
32
|
+
type ReducerState,
|
|
33
|
+
} from "../daemon/context-overflow-reducer.js";
|
|
34
|
+
import type {
|
|
35
|
+
InjectionMode,
|
|
36
|
+
TrustContext,
|
|
37
|
+
} from "../daemon/conversation-runtime-assembly.js";
|
|
38
|
+
import {
|
|
39
|
+
defaultOverflowReduceMiddleware,
|
|
40
|
+
defaultOverflowReducePlugin,
|
|
41
|
+
} from "../plugins/defaults/overflow-reduce.js";
|
|
42
|
+
import { runPipeline } from "../plugins/pipeline.js";
|
|
43
|
+
import {
|
|
44
|
+
getMiddlewaresFor,
|
|
45
|
+
registerPlugin,
|
|
46
|
+
resetPluginRegistryForTests,
|
|
47
|
+
} from "../plugins/registry.js";
|
|
48
|
+
import type {
|
|
49
|
+
Middleware,
|
|
50
|
+
OverflowReduceArgs,
|
|
51
|
+
OverflowReduceResult,
|
|
52
|
+
Plugin,
|
|
53
|
+
TurnContext,
|
|
54
|
+
} from "../plugins/types.js";
|
|
55
|
+
import type { Message } from "../providers/types.js";
|
|
56
|
+
|
|
57
|
+
// ── Fixtures ────────────────────────────────────────────────────────────────
|
|
58
|
+
|
|
59
|
+
function msg(role: "user" | "assistant", text: string): Message {
|
|
60
|
+
return { role, content: [{ type: "text", text }] };
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function toolUseMsg(id: string, name: string): Message {
|
|
64
|
+
return {
|
|
65
|
+
role: "assistant",
|
|
66
|
+
content: [{ type: "tool_use", id, name, input: { path: "/tmp/test" } }],
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function toolResultMsg(toolUseId: string, content: string): Message {
|
|
71
|
+
return {
|
|
72
|
+
role: "user",
|
|
73
|
+
content: [{ type: "tool_result", tool_use_id: toolUseId, content }],
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const SYSTEM_PROMPT = "You are a helpful assistant.";
|
|
78
|
+
|
|
79
|
+
const CONTEXT_WINDOW = {
|
|
80
|
+
enabled: true,
|
|
81
|
+
maxInputTokens: 2000,
|
|
82
|
+
targetBudgetRatio: 0.65,
|
|
83
|
+
compactThreshold: 0.6,
|
|
84
|
+
summaryBudgetRatio: 0.05,
|
|
85
|
+
overflowRecovery: {
|
|
86
|
+
enabled: true,
|
|
87
|
+
safetyMarginRatio: 0.05,
|
|
88
|
+
maxAttempts: 3,
|
|
89
|
+
interactiveLatestTurnCompression: "summarize" as const,
|
|
90
|
+
nonInteractiveLatestTurnCompression: "truncate" as const,
|
|
91
|
+
},
|
|
92
|
+
};
|
|
93
|
+
|
|
94
|
+
const TRUST: TrustContext = {
|
|
95
|
+
sourceChannel: "vellum",
|
|
96
|
+
trustClass: "guardian",
|
|
97
|
+
};
|
|
98
|
+
|
|
99
|
+
function makeTurnContext(overrides: Partial<TurnContext> = {}): TurnContext {
|
|
100
|
+
return {
|
|
101
|
+
requestId: "req-overflow-test",
|
|
102
|
+
conversationId: "conv-overflow-test",
|
|
103
|
+
turnIndex: 0,
|
|
104
|
+
trust: TRUST,
|
|
105
|
+
...overrides,
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Minimal compaction stub — always compacts to a one-message summary so the
|
|
111
|
+
* reducer's forced-compaction tier succeeds. Mirrors `makeCompactFn` from
|
|
112
|
+
* `context-overflow-reducer.test.ts` so the two test suites exercise the
|
|
113
|
+
* reducer under comparable conditions.
|
|
114
|
+
*/
|
|
115
|
+
function makeCompactFn(
|
|
116
|
+
summaryText = "## Goals\n- compacted summary",
|
|
117
|
+
): (
|
|
118
|
+
messages: Message[],
|
|
119
|
+
signal: AbortSignal | undefined,
|
|
120
|
+
options: ContextWindowCompactOptions,
|
|
121
|
+
) => Promise<ContextWindowResult> {
|
|
122
|
+
return async (messages, _signal, _options) => {
|
|
123
|
+
const summaryMsg = createContextSummaryMessage(summaryText);
|
|
124
|
+
const compactedMessages = [summaryMsg];
|
|
125
|
+
const estimatedInputTokens = estimatePromptTokens(
|
|
126
|
+
compactedMessages,
|
|
127
|
+
SYSTEM_PROMPT,
|
|
128
|
+
{ providerName: "mock" },
|
|
129
|
+
);
|
|
130
|
+
return {
|
|
131
|
+
messages: compactedMessages,
|
|
132
|
+
compacted: true,
|
|
133
|
+
previousEstimatedInputTokens: estimatePromptTokens(
|
|
134
|
+
messages,
|
|
135
|
+
SYSTEM_PROMPT,
|
|
136
|
+
{ providerName: "mock" },
|
|
137
|
+
),
|
|
138
|
+
estimatedInputTokens,
|
|
139
|
+
maxInputTokens: 2000,
|
|
140
|
+
thresholdTokens: 1200,
|
|
141
|
+
compactedMessages: messages.length,
|
|
142
|
+
compactedPersistedMessages: messages.length,
|
|
143
|
+
summaryCalls: 1,
|
|
144
|
+
summaryInputTokens: 100,
|
|
145
|
+
summaryOutputTokens: 50,
|
|
146
|
+
summaryModel: "mock-model",
|
|
147
|
+
summaryText,
|
|
148
|
+
};
|
|
149
|
+
};
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
/**
|
|
153
|
+
* Faithful re-implementation of the pre-PR-23 inline tier loop — lives in
|
|
154
|
+
* this test file rather than the production module so we have an immutable
|
|
155
|
+
* baseline the default middleware can be diffed against. If either
|
|
156
|
+
* implementation drifts, the golden-output cases below fail.
|
|
157
|
+
*
|
|
158
|
+
* The function intentionally avoids any side effects on external state — no
|
|
159
|
+
* circuit-breaker tracking, no activity emission, no `applyCompactionResult`.
|
|
160
|
+
* The production orchestrator still runs those through callbacks; this
|
|
161
|
+
* baseline only needs the *message mutation* behavior so we can compare
|
|
162
|
+
* reducer output.
|
|
163
|
+
*/
|
|
164
|
+
async function runInlineBaseline(args: {
|
|
165
|
+
readonly messages: Message[];
|
|
166
|
+
readonly runMessages: Message[];
|
|
167
|
+
readonly systemPrompt: string;
|
|
168
|
+
readonly providerName: string;
|
|
169
|
+
readonly preflightBudget: number;
|
|
170
|
+
readonly toolTokenBudget?: number;
|
|
171
|
+
readonly maxAttempts: number;
|
|
172
|
+
readonly abortSignal?: AbortSignal;
|
|
173
|
+
readonly compactFn: (
|
|
174
|
+
messages: Message[],
|
|
175
|
+
signal: AbortSignal | undefined,
|
|
176
|
+
options: ContextWindowCompactOptions,
|
|
177
|
+
) => Promise<ContextWindowResult>;
|
|
178
|
+
readonly contextWindow: typeof CONTEXT_WINDOW;
|
|
179
|
+
readonly reinjectForMode: (
|
|
180
|
+
reducedMessages: Message[],
|
|
181
|
+
mode: InjectionMode,
|
|
182
|
+
stepCompacted: boolean,
|
|
183
|
+
accumulatedCompacted: boolean,
|
|
184
|
+
) => Promise<Message[]>;
|
|
185
|
+
readonly estimatePostInjection: (runMsgs: Message[]) => number;
|
|
186
|
+
}): Promise<{
|
|
187
|
+
messages: Message[];
|
|
188
|
+
runMessages: Message[];
|
|
189
|
+
injectionMode: InjectionMode;
|
|
190
|
+
reducerState: ReducerState;
|
|
191
|
+
reducerCompacted: boolean;
|
|
192
|
+
attempts: number;
|
|
193
|
+
}> {
|
|
194
|
+
let messages = args.messages;
|
|
195
|
+
let runMessages = args.runMessages;
|
|
196
|
+
let injectionMode: InjectionMode = "full";
|
|
197
|
+
let reducerState: ReducerState = createInitialReducerState();
|
|
198
|
+
let reducerCompacted = false;
|
|
199
|
+
let attempts = 0;
|
|
200
|
+
|
|
201
|
+
while (attempts < args.maxAttempts && !reducerState.exhausted) {
|
|
202
|
+
args.abortSignal?.throwIfAborted();
|
|
203
|
+
attempts++;
|
|
204
|
+
const step = await reduceContextOverflow(
|
|
205
|
+
messages,
|
|
206
|
+
{
|
|
207
|
+
providerName: args.providerName,
|
|
208
|
+
systemPrompt: args.systemPrompt,
|
|
209
|
+
contextWindow: args.contextWindow,
|
|
210
|
+
targetTokens: args.preflightBudget,
|
|
211
|
+
toolTokenBudget: args.toolTokenBudget,
|
|
212
|
+
},
|
|
213
|
+
reducerState,
|
|
214
|
+
args.compactFn,
|
|
215
|
+
args.abortSignal,
|
|
216
|
+
);
|
|
217
|
+
|
|
218
|
+
reducerState = step.state;
|
|
219
|
+
messages = step.messages;
|
|
220
|
+
injectionMode = step.state.injectionMode;
|
|
221
|
+
|
|
222
|
+
const stepCompacted = step.compactionResult?.compacted === true;
|
|
223
|
+
if (stepCompacted) {
|
|
224
|
+
reducerCompacted = true;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
args.abortSignal?.throwIfAborted();
|
|
228
|
+
|
|
229
|
+
runMessages = await args.reinjectForMode(
|
|
230
|
+
messages,
|
|
231
|
+
injectionMode,
|
|
232
|
+
stepCompacted,
|
|
233
|
+
reducerCompacted,
|
|
234
|
+
);
|
|
235
|
+
|
|
236
|
+
const postInjectionTokens = args.estimatePostInjection(runMessages);
|
|
237
|
+
if (postInjectionTokens <= args.preflightBudget) break;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
return {
|
|
241
|
+
messages,
|
|
242
|
+
runMessages,
|
|
243
|
+
injectionMode,
|
|
244
|
+
reducerState,
|
|
245
|
+
reducerCompacted,
|
|
246
|
+
attempts,
|
|
247
|
+
};
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
function buildArgs(messages: Message[]): {
|
|
251
|
+
args: OverflowReduceArgs;
|
|
252
|
+
reinjectCalls: Array<{
|
|
253
|
+
mode: InjectionMode;
|
|
254
|
+
stepCompacted: boolean;
|
|
255
|
+
accumulatedCompacted: boolean;
|
|
256
|
+
}>;
|
|
257
|
+
compactionResults: ContextWindowResult[];
|
|
258
|
+
rawCompactFn: (
|
|
259
|
+
messages: Message[],
|
|
260
|
+
signal: AbortSignal | undefined,
|
|
261
|
+
options: ContextWindowCompactOptions,
|
|
262
|
+
) => Promise<ContextWindowResult>;
|
|
263
|
+
} {
|
|
264
|
+
const reinjectCalls: Array<{
|
|
265
|
+
mode: InjectionMode;
|
|
266
|
+
stepCompacted: boolean;
|
|
267
|
+
accumulatedCompacted: boolean;
|
|
268
|
+
}> = [];
|
|
269
|
+
const compactionResults: ContextWindowResult[] = [];
|
|
270
|
+
const compactFn = makeCompactFn();
|
|
271
|
+
|
|
272
|
+
// Identity reinject: the test harness does not exercise the full
|
|
273
|
+
// `applyRuntimeInjections` pipeline; it simply tracks how many times the
|
|
274
|
+
// orchestrator would have been asked to rebuild `runMessages` so the spy
|
|
275
|
+
// middleware can attribute each iteration. Returns the reducer's latest
|
|
276
|
+
// `messages` untouched — real orchestrator code re-injects runtime blocks.
|
|
277
|
+
const reinjectForMode = async (
|
|
278
|
+
reducedMessages: Message[],
|
|
279
|
+
mode: InjectionMode,
|
|
280
|
+
stepCompacted: boolean,
|
|
281
|
+
accumulatedCompacted: boolean,
|
|
282
|
+
): Promise<Message[]> => {
|
|
283
|
+
reinjectCalls.push({ mode, stepCompacted, accumulatedCompacted });
|
|
284
|
+
return reducedMessages;
|
|
285
|
+
};
|
|
286
|
+
|
|
287
|
+
const estimatePostInjection = (runMsgs: Message[]): number =>
|
|
288
|
+
estimatePromptTokens(runMsgs, SYSTEM_PROMPT, {
|
|
289
|
+
providerName: "mock",
|
|
290
|
+
});
|
|
291
|
+
|
|
292
|
+
const args: OverflowReduceArgs = {
|
|
293
|
+
messages,
|
|
294
|
+
runMessages: messages,
|
|
295
|
+
systemPrompt: SYSTEM_PROMPT,
|
|
296
|
+
providerName: "mock",
|
|
297
|
+
contextWindow: CONTEXT_WINDOW,
|
|
298
|
+
preflightBudget: 1000,
|
|
299
|
+
toolTokenBudget: 0,
|
|
300
|
+
maxAttempts: CONTEXT_WINDOW.overflowRecovery.maxAttempts,
|
|
301
|
+
// `OverflowReduceArgs.compactFn` types `options` as `unknown` to avoid
|
|
302
|
+
// leaking the `ContextWindowCompactOptions` shape into the plugin
|
|
303
|
+
// surface. The test helper produces a real `ContextWindowCompactOptions`
|
|
304
|
+
// signature, so we trampoline through a widened wrapper.
|
|
305
|
+
compactFn: (msgs, signal, opts) =>
|
|
306
|
+
compactFn(msgs, signal, opts as ContextWindowCompactOptions),
|
|
307
|
+
emitActivityState: () => {
|
|
308
|
+
/* no-op — the orchestrator owns activity emission */
|
|
309
|
+
},
|
|
310
|
+
onCompactionResult: (result) => {
|
|
311
|
+
compactionResults.push(result);
|
|
312
|
+
},
|
|
313
|
+
reinjectForMode,
|
|
314
|
+
estimatePostInjection,
|
|
315
|
+
};
|
|
316
|
+
|
|
317
|
+
return { args, reinjectCalls, compactionResults, rawCompactFn: compactFn };
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
// ── Test suite ──────────────────────────────────────────────────────────────
|
|
321
|
+
|
|
322
|
+
describe("overflow-reduce pipeline", () => {
|
|
323
|
+
beforeEach(() => {
|
|
324
|
+
resetPluginRegistryForTests();
|
|
325
|
+
registerPlugin(defaultOverflowReducePlugin);
|
|
326
|
+
});
|
|
327
|
+
|
|
328
|
+
describe("default middleware matches historical inline loop", () => {
|
|
329
|
+
test("large tool-result history — identical reduced output", async () => {
|
|
330
|
+
const longToolResult = "r".repeat(8000);
|
|
331
|
+
const goldenHistory: Message[] = [
|
|
332
|
+
msg("user", "Start"),
|
|
333
|
+
toolUseMsg("tu_1", "read_file"),
|
|
334
|
+
toolResultMsg("tu_1", longToolResult),
|
|
335
|
+
msg("assistant", "Result"),
|
|
336
|
+
msg("user", "Next"),
|
|
337
|
+
];
|
|
338
|
+
|
|
339
|
+
const pipelineBuild = buildArgs(goldenHistory);
|
|
340
|
+
const inlineBuild = buildArgs(goldenHistory);
|
|
341
|
+
|
|
342
|
+
// Run both paths against the SAME fixture. `buildArgs` gives each
|
|
343
|
+
// call its own `compactFn` instance so nothing leaks between runs.
|
|
344
|
+
const pipelineResult = await runPipeline<
|
|
345
|
+
OverflowReduceArgs,
|
|
346
|
+
OverflowReduceResult
|
|
347
|
+
>(
|
|
348
|
+
"overflowReduce",
|
|
349
|
+
getMiddlewaresFor("overflowReduce"),
|
|
350
|
+
// Sentinel terminal — the default middleware doesn't call next,
|
|
351
|
+
// so this must never fire. Assert that invariant here.
|
|
352
|
+
async () => {
|
|
353
|
+
throw new Error("terminal unexpectedly reached");
|
|
354
|
+
},
|
|
355
|
+
pipelineBuild.args,
|
|
356
|
+
makeTurnContext(),
|
|
357
|
+
30000,
|
|
358
|
+
);
|
|
359
|
+
|
|
360
|
+
const inlineResult = await runInlineBaseline({
|
|
361
|
+
messages: goldenHistory,
|
|
362
|
+
runMessages: goldenHistory,
|
|
363
|
+
systemPrompt: SYSTEM_PROMPT,
|
|
364
|
+
providerName: "mock",
|
|
365
|
+
preflightBudget: inlineBuild.args.preflightBudget,
|
|
366
|
+
toolTokenBudget: inlineBuild.args.toolTokenBudget,
|
|
367
|
+
maxAttempts: inlineBuild.args.maxAttempts,
|
|
368
|
+
compactFn: inlineBuild.rawCompactFn,
|
|
369
|
+
contextWindow: CONTEXT_WINDOW,
|
|
370
|
+
reinjectForMode: inlineBuild.args.reinjectForMode,
|
|
371
|
+
estimatePostInjection: inlineBuild.args.estimatePostInjection,
|
|
372
|
+
});
|
|
373
|
+
|
|
374
|
+
// Byte-for-byte match across every field the orchestrator relies on.
|
|
375
|
+
expect(pipelineResult.messages).toEqual(inlineResult.messages);
|
|
376
|
+
expect(pipelineResult.runMessages).toEqual(inlineResult.runMessages);
|
|
377
|
+
expect(pipelineResult.injectionMode).toBe(inlineResult.injectionMode);
|
|
378
|
+
expect(pipelineResult.reducerState).toEqual(inlineResult.reducerState);
|
|
379
|
+
expect(pipelineResult.reducerCompacted).toBe(
|
|
380
|
+
inlineResult.reducerCompacted,
|
|
381
|
+
);
|
|
382
|
+
expect(pipelineResult.attempts).toBe(inlineResult.attempts);
|
|
383
|
+
});
|
|
384
|
+
|
|
385
|
+
test("small conversation that fits after first reduction — single attempt", async () => {
|
|
386
|
+
// A history that's already within budget so the first `applyForcedCompaction`
|
|
387
|
+
// brings us under — the loop must exit without iterating further.
|
|
388
|
+
const smallHistory: Message[] = [
|
|
389
|
+
msg("user", "Hello"),
|
|
390
|
+
msg("assistant", "Hi there — how can I help?"),
|
|
391
|
+
];
|
|
392
|
+
|
|
393
|
+
const pipelineBuild = buildArgs(smallHistory);
|
|
394
|
+
const inlineBuild = buildArgs(smallHistory);
|
|
395
|
+
|
|
396
|
+
const pipelineResult = await runPipeline<
|
|
397
|
+
OverflowReduceArgs,
|
|
398
|
+
OverflowReduceResult
|
|
399
|
+
>(
|
|
400
|
+
"overflowReduce",
|
|
401
|
+
getMiddlewaresFor("overflowReduce"),
|
|
402
|
+
async () => {
|
|
403
|
+
throw new Error("terminal unexpectedly reached");
|
|
404
|
+
},
|
|
405
|
+
pipelineBuild.args,
|
|
406
|
+
makeTurnContext(),
|
|
407
|
+
30000,
|
|
408
|
+
);
|
|
409
|
+
const inlineResult = await runInlineBaseline({
|
|
410
|
+
messages: smallHistory,
|
|
411
|
+
runMessages: smallHistory,
|
|
412
|
+
systemPrompt: SYSTEM_PROMPT,
|
|
413
|
+
providerName: "mock",
|
|
414
|
+
preflightBudget: inlineBuild.args.preflightBudget,
|
|
415
|
+
toolTokenBudget: inlineBuild.args.toolTokenBudget,
|
|
416
|
+
maxAttempts: inlineBuild.args.maxAttempts,
|
|
417
|
+
compactFn: inlineBuild.rawCompactFn,
|
|
418
|
+
contextWindow: CONTEXT_WINDOW,
|
|
419
|
+
reinjectForMode: inlineBuild.args.reinjectForMode,
|
|
420
|
+
estimatePostInjection: inlineBuild.args.estimatePostInjection,
|
|
421
|
+
});
|
|
422
|
+
|
|
423
|
+
expect(pipelineResult.attempts).toBe(inlineResult.attempts);
|
|
424
|
+
expect(pipelineResult.attempts).toBeGreaterThanOrEqual(1);
|
|
425
|
+
expect(pipelineResult.messages).toEqual(inlineResult.messages);
|
|
426
|
+
expect(pipelineResult.reducerCompacted).toBe(
|
|
427
|
+
inlineResult.reducerCompacted,
|
|
428
|
+
);
|
|
429
|
+
});
|
|
430
|
+
});
|
|
431
|
+
|
|
432
|
+
describe("spy middleware observes each reduction attempt", () => {
|
|
433
|
+
test("spy sees one invocation when the default converges in one step", async () => {
|
|
434
|
+
const history: Message[] = [msg("user", "Hello"), msg("assistant", "Hi")];
|
|
435
|
+
|
|
436
|
+
// Spy tracks the args passed into its layer. It must forward via
|
|
437
|
+
// `next` so the default still fires.
|
|
438
|
+
const spyCalls: Array<{
|
|
439
|
+
hadMessages: number;
|
|
440
|
+
budget: number;
|
|
441
|
+
attempts: number;
|
|
442
|
+
}> = [];
|
|
443
|
+
const spy: Middleware<OverflowReduceArgs, OverflowReduceResult> =
|
|
444
|
+
async function spyMiddleware(args, next, _ctx) {
|
|
445
|
+
spyCalls.push({
|
|
446
|
+
hadMessages: args.messages.length,
|
|
447
|
+
budget: args.preflightBudget,
|
|
448
|
+
attempts: 0, // populated after next() from the result
|
|
449
|
+
});
|
|
450
|
+
const result = await next(args);
|
|
451
|
+
spyCalls[spyCalls.length - 1]!.attempts = result.attempts;
|
|
452
|
+
return result;
|
|
453
|
+
};
|
|
454
|
+
const spyPlugin: Plugin = {
|
|
455
|
+
manifest: {
|
|
456
|
+
name: "spy-overflow",
|
|
457
|
+
version: "0.0.1",
|
|
458
|
+
requires: { pluginRuntime: "v1", overflowReduceApi: "v1" },
|
|
459
|
+
},
|
|
460
|
+
middleware: { overflowReduce: spy },
|
|
461
|
+
};
|
|
462
|
+
// Register spy first so it wraps the default (registration order =
|
|
463
|
+
// outer→inner). The default therefore runs as the spy's downstream.
|
|
464
|
+
resetPluginRegistryForTests();
|
|
465
|
+
registerPlugin(spyPlugin);
|
|
466
|
+
registerPlugin(defaultOverflowReducePlugin);
|
|
467
|
+
|
|
468
|
+
const { args } = buildArgs(history);
|
|
469
|
+
const result = await runPipeline<
|
|
470
|
+
OverflowReduceArgs,
|
|
471
|
+
OverflowReduceResult
|
|
472
|
+
>(
|
|
473
|
+
"overflowReduce",
|
|
474
|
+
getMiddlewaresFor("overflowReduce"),
|
|
475
|
+
async () => {
|
|
476
|
+
throw new Error("terminal unexpectedly reached");
|
|
477
|
+
},
|
|
478
|
+
args,
|
|
479
|
+
makeTurnContext(),
|
|
480
|
+
30000,
|
|
481
|
+
);
|
|
482
|
+
|
|
483
|
+
// Spy was called exactly once — the pipeline invokes each middleware
|
|
484
|
+
// once per pipeline call, not once per reducer iteration. Iteration
|
|
485
|
+
// count shows up in the result.attempts field.
|
|
486
|
+
expect(spyCalls).toHaveLength(1);
|
|
487
|
+
expect(spyCalls[0]?.hadMessages).toBe(2);
|
|
488
|
+
expect(spyCalls[0]?.budget).toBe(1000);
|
|
489
|
+
expect(spyCalls[0]?.attempts).toBe(result.attempts);
|
|
490
|
+
expect(result.attempts).toBeGreaterThanOrEqual(1);
|
|
491
|
+
});
|
|
492
|
+
|
|
493
|
+
test("spy can short-circuit the default by not calling next", async () => {
|
|
494
|
+
const history: Message[] = [msg("user", "Hi")];
|
|
495
|
+
|
|
496
|
+
const shortCircuit: Middleware<OverflowReduceArgs, OverflowReduceResult> =
|
|
497
|
+
async function shortCircuitMiddleware(args, _next, _ctx) {
|
|
498
|
+
// Returns a synthetic "no-op" result — the default is never invoked.
|
|
499
|
+
return {
|
|
500
|
+
messages: args.messages,
|
|
501
|
+
runMessages: args.runMessages,
|
|
502
|
+
injectionMode: "minimal",
|
|
503
|
+
reducerState: {
|
|
504
|
+
appliedTiers: ["injection_downgrade"],
|
|
505
|
+
injectionMode: "minimal",
|
|
506
|
+
exhausted: true,
|
|
507
|
+
},
|
|
508
|
+
reducerCompacted: false,
|
|
509
|
+
attempts: 0,
|
|
510
|
+
};
|
|
511
|
+
};
|
|
512
|
+
resetPluginRegistryForTests();
|
|
513
|
+
registerPlugin({
|
|
514
|
+
manifest: {
|
|
515
|
+
name: "short-circuit-overflow",
|
|
516
|
+
version: "0.0.1",
|
|
517
|
+
requires: { pluginRuntime: "v1", overflowReduceApi: "v1" },
|
|
518
|
+
},
|
|
519
|
+
middleware: { overflowReduce: shortCircuit },
|
|
520
|
+
});
|
|
521
|
+
registerPlugin(defaultOverflowReducePlugin);
|
|
522
|
+
|
|
523
|
+
const { args, compactionResults, reinjectCalls } = buildArgs(history);
|
|
524
|
+
const result = await runPipeline<
|
|
525
|
+
OverflowReduceArgs,
|
|
526
|
+
OverflowReduceResult
|
|
527
|
+
>(
|
|
528
|
+
"overflowReduce",
|
|
529
|
+
getMiddlewaresFor("overflowReduce"),
|
|
530
|
+
async () => {
|
|
531
|
+
throw new Error("terminal unexpectedly reached");
|
|
532
|
+
},
|
|
533
|
+
args,
|
|
534
|
+
makeTurnContext(),
|
|
535
|
+
30000,
|
|
536
|
+
);
|
|
537
|
+
|
|
538
|
+
// Because the outer middleware short-circuited, the default never
|
|
539
|
+
// ran — no compactFn invocations, no reinject callbacks.
|
|
540
|
+
expect(result.injectionMode).toBe("minimal");
|
|
541
|
+
expect(result.attempts).toBe(0);
|
|
542
|
+
expect(compactionResults).toHaveLength(0);
|
|
543
|
+
expect(reinjectCalls).toHaveLength(0);
|
|
544
|
+
});
|
|
545
|
+
});
|
|
546
|
+
|
|
547
|
+
describe("direct middleware invocation", () => {
|
|
548
|
+
test("default middleware without the pipeline runner still executes the tier loop", async () => {
|
|
549
|
+
const history: Message[] = [msg("user", "Hi")];
|
|
550
|
+
const { args } = buildArgs(history);
|
|
551
|
+
|
|
552
|
+
const result = await defaultOverflowReduceMiddleware(
|
|
553
|
+
args,
|
|
554
|
+
async () => {
|
|
555
|
+
throw new Error("next should not be invoked by the default");
|
|
556
|
+
},
|
|
557
|
+
makeTurnContext(),
|
|
558
|
+
);
|
|
559
|
+
|
|
560
|
+
expect(result.attempts).toBeGreaterThanOrEqual(1);
|
|
561
|
+
expect(result.reducerState.appliedTiers.length).toBeGreaterThanOrEqual(1);
|
|
562
|
+
});
|
|
563
|
+
});
|
|
564
|
+
|
|
565
|
+
describe("abort signal propagation", () => {
|
|
566
|
+
test("middleware bails between iterations when abortSignal fires", async () => {
|
|
567
|
+
// History that won't converge in one step — multiple iterations.
|
|
568
|
+
const longToolResult = "r".repeat(8000);
|
|
569
|
+
const history: Message[] = [
|
|
570
|
+
msg("user", "Start"),
|
|
571
|
+
toolUseMsg("tu_1", "read_file"),
|
|
572
|
+
toolResultMsg("tu_1", longToolResult),
|
|
573
|
+
msg("user", "Next"),
|
|
574
|
+
];
|
|
575
|
+
|
|
576
|
+
const controller = new AbortController();
|
|
577
|
+
const build = buildArgs(history);
|
|
578
|
+
// Abort on the first `estimatePostInjection` — simulates the
|
|
579
|
+
// pipeline-level timeout firing mid-turn. The next loop iteration
|
|
580
|
+
// must see the signal and throw rather than starting another round.
|
|
581
|
+
let estimateCalls = 0;
|
|
582
|
+
const aborting: OverflowReduceArgs = {
|
|
583
|
+
...build.args,
|
|
584
|
+
abortSignal: controller.signal,
|
|
585
|
+
estimatePostInjection: () => {
|
|
586
|
+
estimateCalls++;
|
|
587
|
+
if (estimateCalls === 1) controller.abort();
|
|
588
|
+
// Return a value that guarantees another iteration would fire
|
|
589
|
+
// without the abort gate.
|
|
590
|
+
return build.args.preflightBudget + 1_000_000;
|
|
591
|
+
},
|
|
592
|
+
};
|
|
593
|
+
|
|
594
|
+
expect(
|
|
595
|
+
defaultOverflowReduceMiddleware(
|
|
596
|
+
aborting,
|
|
597
|
+
async () => {
|
|
598
|
+
throw new Error("next should not be invoked");
|
|
599
|
+
},
|
|
600
|
+
makeTurnContext(),
|
|
601
|
+
),
|
|
602
|
+
).rejects.toThrow();
|
|
603
|
+
// Give the event loop a tick to resolve the rejected promise.
|
|
604
|
+
await Promise.resolve();
|
|
605
|
+
// Exactly one iteration ran; the abort gate stopped the next round.
|
|
606
|
+
expect(estimateCalls).toBe(1);
|
|
607
|
+
});
|
|
608
|
+
|
|
609
|
+
test("middleware refuses to start when abortSignal is already aborted", async () => {
|
|
610
|
+
const history: Message[] = [msg("user", "Hi")];
|
|
611
|
+
const controller = new AbortController();
|
|
612
|
+
controller.abort();
|
|
613
|
+
const build = buildArgs(history);
|
|
614
|
+
const args: OverflowReduceArgs = {
|
|
615
|
+
...build.args,
|
|
616
|
+
abortSignal: controller.signal,
|
|
617
|
+
};
|
|
618
|
+
|
|
619
|
+
expect(
|
|
620
|
+
defaultOverflowReduceMiddleware(
|
|
621
|
+
args,
|
|
622
|
+
async () => {
|
|
623
|
+
throw new Error("next should not be invoked");
|
|
624
|
+
},
|
|
625
|
+
makeTurnContext(),
|
|
626
|
+
),
|
|
627
|
+
).rejects.toThrow();
|
|
628
|
+
await Promise.resolve();
|
|
629
|
+
// Reducer never ran — zero compaction and reinject callbacks observed.
|
|
630
|
+
expect(build.compactionResults).toHaveLength(0);
|
|
631
|
+
expect(build.reinjectCalls).toHaveLength(0);
|
|
632
|
+
});
|
|
633
|
+
});
|
|
634
|
+
|
|
635
|
+
describe("reinjectForMode two-flag semantics", () => {
|
|
636
|
+
test("stepCompacted reflects current iteration; accumulatedCompacted stays sticky", async () => {
|
|
637
|
+
// Force multiple iterations by returning over-budget until the loop
|
|
638
|
+
// exits on maxAttempts. First iteration compacts (stepCompacted=true);
|
|
639
|
+
// subsequent iterations run other tiers (stepCompacted=false), but
|
|
640
|
+
// accumulatedCompacted must remain true for slack suppression.
|
|
641
|
+
const longToolResult = "r".repeat(8000);
|
|
642
|
+
const history: Message[] = [
|
|
643
|
+
msg("user", "Start"),
|
|
644
|
+
toolUseMsg("tu_1", "read_file"),
|
|
645
|
+
toolResultMsg("tu_1", longToolResult),
|
|
646
|
+
msg("user", "Next"),
|
|
647
|
+
];
|
|
648
|
+
const build = buildArgs(history);
|
|
649
|
+
const overBudget: OverflowReduceArgs = {
|
|
650
|
+
...build.args,
|
|
651
|
+
estimatePostInjection: () => build.args.preflightBudget + 1_000_000,
|
|
652
|
+
};
|
|
653
|
+
|
|
654
|
+
await defaultOverflowReduceMiddleware(
|
|
655
|
+
overBudget,
|
|
656
|
+
async () => {
|
|
657
|
+
throw new Error("next should not be invoked");
|
|
658
|
+
},
|
|
659
|
+
makeTurnContext(),
|
|
660
|
+
);
|
|
661
|
+
|
|
662
|
+
// At least one compaction attempt happened.
|
|
663
|
+
expect(build.reinjectCalls.length).toBeGreaterThanOrEqual(1);
|
|
664
|
+
// The first iteration that compacted set accumulatedCompacted=true,
|
|
665
|
+
// and every subsequent call continues to see it true — even when
|
|
666
|
+
// that iteration's own step did NOT compact.
|
|
667
|
+
const firstCompactedAt = build.reinjectCalls.findIndex(
|
|
668
|
+
(c) => c.stepCompacted,
|
|
669
|
+
);
|
|
670
|
+
expect(firstCompactedAt).toBeGreaterThanOrEqual(0);
|
|
671
|
+
for (let i = firstCompactedAt; i < build.reinjectCalls.length; i++) {
|
|
672
|
+
expect(build.reinjectCalls[i]!.accumulatedCompacted).toBe(true);
|
|
673
|
+
}
|
|
674
|
+
});
|
|
675
|
+
});
|
|
676
|
+
});
|