@vellumai/assistant 0.4.46 → 0.4.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +7 -7
- package/README.md +2 -23
- package/docs/architecture/integrations.md +45 -41
- package/docs/architecture/keychain-broker.md +3 -3
- package/docs/architecture/security.md +5 -5
- package/docs/runbook-trusted-contacts.md +3 -8
- package/hook-templates/debug-prompt-logger/hook.json +1 -1
- package/hook-templates/debug-prompt-logger/run.sh +1 -3
- package/package.json +1 -1
- package/src/__tests__/actor-token-service.test.ts +0 -1
- package/src/__tests__/anthropic-provider.test.ts +156 -0
- package/src/__tests__/approval-cascade.test.ts +810 -0
- package/src/__tests__/approval-primitive.test.ts +0 -1
- package/src/__tests__/approval-routes-http.test.ts +2 -0
- package/src/__tests__/assistant-attachments.test.ts +12 -34
- package/src/__tests__/assistant-feature-flag-guardrails.test.ts +76 -0
- package/src/__tests__/assistant-feature-flags-integration.test.ts +0 -1
- package/src/__tests__/browser-fill-credential.test.ts +5 -2
- package/src/__tests__/browser-skill-baseline-tool-payload.test.ts +2 -2
- package/src/__tests__/bundled-skill-retrieval-guard.test.ts +2 -1
- package/src/__tests__/channel-guardian.test.ts +0 -2
- package/src/__tests__/channel-readiness-routes.test.ts +35 -25
- package/src/__tests__/channel-readiness-service.test.ts +10 -9
- package/src/__tests__/checker.test.ts +9 -29
- package/src/__tests__/cli.test.ts +23 -0
- package/src/__tests__/computer-use-skill-manifest-regression.test.ts +1 -1
- package/src/__tests__/computer-use-tools.test.ts +2 -19
- package/src/__tests__/config-watcher.test.ts +0 -1
- package/src/__tests__/confirmation-request-guardian-bridge.test.ts +0 -1
- package/src/__tests__/context-image-dimensions.test.ts +332 -0
- package/src/__tests__/context-token-estimator.test.ts +196 -13
- package/src/__tests__/conversation-attention-store.test.ts +0 -1
- package/src/__tests__/conversation-attention-telegram.test.ts +0 -1
- package/src/__tests__/conversation-routes-guardian-reply.test.ts +144 -0
- package/src/__tests__/conversation-routes-slash-commands.test.ts +1 -0
- package/src/__tests__/credential-broker-browser-fill.test.ts +23 -22
- package/src/__tests__/credential-broker-server-use.test.ts +22 -21
- package/src/__tests__/credential-broker.test.ts +2 -1
- package/src/__tests__/credential-metadata-store.test.ts +239 -26
- package/src/__tests__/credential-resolve.test.ts +5 -4
- package/src/__tests__/credential-security-e2e.test.ts +8 -8
- package/src/__tests__/credential-security-invariants.test.ts +111 -7
- package/src/__tests__/credential-vault-unit.test.ts +287 -54
- package/src/__tests__/credential-vault.test.ts +406 -12
- package/src/__tests__/credentials-cli.test.ts +82 -6
- package/src/__tests__/dynamic-skill-workflow-prompt.test.ts +0 -1
- package/src/__tests__/ephemeral-permissions.test.ts +3 -3
- package/src/__tests__/gateway-only-enforcement.test.ts +4 -2
- package/src/__tests__/gateway-only-guard.test.ts +0 -1
- package/src/__tests__/gemini-image-service.test.ts +75 -45
- package/src/__tests__/gemini-provider.test.ts +9 -6
- package/src/__tests__/guardian-action-conversation-turn.test.ts +1 -33
- package/src/__tests__/guardian-action-copy-generator.test.ts +0 -20
- package/src/__tests__/guardian-action-followup-executor.test.ts +1 -28
- package/src/__tests__/guardian-action-followup-store.test.ts +1 -1
- package/src/__tests__/guardian-action-grant-mint-consume.test.ts +0 -1
- package/src/__tests__/guardian-decision-primitive-canonical.test.ts +0 -1
- package/src/__tests__/guardian-grant-minting.test.ts +35 -0
- package/src/__tests__/guardian-routing-invariants.test.ts +0 -1
- package/src/__tests__/guardian-verification-voice-binding.test.ts +0 -1
- package/src/__tests__/handlers-user-message-approval-consumption.test.ts +0 -39
- package/src/__tests__/heartbeat-service.test.ts +0 -1
- package/src/__tests__/host-cu-proxy.test.ts +629 -0
- package/src/__tests__/host-shell-tool.test.ts +27 -15
- package/src/__tests__/http-user-message-parity.test.ts +1 -0
- package/src/__tests__/ingress-url-consistency.test.ts +14 -21
- package/src/__tests__/integration-status.test.ts +38 -25
- package/src/__tests__/intent-routing.test.ts +0 -1
- package/src/__tests__/invite-routes-http.test.ts +10 -9
- package/src/__tests__/keychain-broker-client.test.ts +11 -43
- package/src/__tests__/managed-proxy-context.test.ts +5 -3
- package/src/__tests__/media-generate-image.test.ts +63 -2
- package/src/__tests__/media-reuse-story.e2e.test.ts +7 -3
- package/src/__tests__/messaging-send-tool.test.ts +4 -6
- package/src/__tests__/notification-routing-intent.test.ts +0 -1
- package/src/__tests__/oauth-cli.test.ts +373 -14
- package/src/__tests__/oauth-provider-profiles.test.ts +9 -9
- package/src/__tests__/oauth-scope-policy.test.ts +4 -6
- package/src/__tests__/oauth-store.test.ts +756 -0
- package/src/__tests__/onboarding-starter-tasks.test.ts +0 -1
- package/src/__tests__/provider-error-scenarios.test.ts +0 -1
- package/src/__tests__/provider-fail-open-selection.test.ts +3 -1
- package/src/__tests__/provider-managed-proxy-integration.test.ts +70 -6
- package/src/__tests__/provider-streaming.benchmark.test.ts +0 -1
- package/src/__tests__/public-ingress-urls.test.ts +15 -21
- package/src/__tests__/recording-handler.test.ts +3 -4
- package/src/__tests__/registry.test.ts +2 -2
- package/src/__tests__/runtime-events-sse.test.ts +55 -7
- package/src/__tests__/schedule-store.test.ts +0 -1
- package/src/__tests__/scheduler-recurrence.test.ts +0 -1
- package/src/__tests__/schema-transforms.test.ts +226 -0
- package/src/__tests__/scoped-approval-grants.test.ts +0 -1
- package/src/__tests__/scoped-grant-security-matrix.test.ts +0 -1
- package/src/__tests__/script-proxy-injection-runtime.test.ts +23 -13
- package/src/__tests__/script-proxy-policy-runtime.test.ts +1 -1
- package/src/__tests__/script-proxy-session-manager.test.ts +1 -1
- package/src/__tests__/secret-ingress-handler.test.ts +0 -1
- package/src/__tests__/secret-onetime-send.test.ts +5 -3
- package/src/__tests__/send-endpoint-busy.test.ts +21 -6
- package/src/__tests__/sequence-store.test.ts +0 -1
- package/src/__tests__/session-init.benchmark.test.ts +4 -5
- package/src/__tests__/session-messaging-secret-redirect.test.ts +5 -4
- package/src/__tests__/skill-include-graph.test.ts +66 -0
- package/src/__tests__/skill-load-feature-flag.test.ts +0 -1
- package/src/__tests__/skill-load-tool.test.ts +149 -1
- package/src/__tests__/skill-projection-feature-flag.test.ts +0 -1
- package/src/__tests__/skills-uninstall.test.ts +3 -3
- package/src/__tests__/skills.test.ts +3 -12
- package/src/__tests__/slack-channel-config.test.ts +76 -11
- package/src/__tests__/slack-share-routes.test.ts +17 -14
- package/src/__tests__/system-prompt.test.ts +0 -1
- package/src/__tests__/telegram-bot-username-resolution.test.ts +3 -0
- package/src/__tests__/telegram-invite-adapter.test.ts +18 -22
- package/src/__tests__/terminal-tools.test.ts +4 -3
- package/src/__tests__/test-support/computer-use-skill-harness.ts +3 -2
- package/src/__tests__/tool-approval-handler.test.ts +0 -1
- package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +0 -1
- package/src/__tests__/tool-executor-lifecycle-events.test.ts +0 -1
- package/src/__tests__/tool-executor-shell-integration.test.ts +0 -1
- package/src/__tests__/tool-executor.test.ts +0 -1
- package/src/__tests__/tool-grant-request-escalation.test.ts +0 -1
- package/src/__tests__/trust-store-pattern-matches.test.ts +29 -0
- package/src/__tests__/trust-store.test.ts +1 -22
- package/src/__tests__/trusted-contact-approval-notifier.test.ts +0 -1
- package/src/__tests__/trusted-contact-inline-approval-integration.test.ts +0 -1
- package/src/__tests__/twilio-config.test.ts +2 -1
- package/src/__tests__/twilio-provider.test.ts +4 -2
- package/src/__tests__/twilio-routes.test.ts +5 -20
- package/src/__tests__/verification-control-plane-policy.test.ts +0 -1
- package/src/__tests__/voice-scoped-grant-consumer.test.ts +0 -1
- package/src/agent/ax-tree-compaction.test.ts +235 -0
- package/src/agent/loop.ts +76 -130
- package/src/calls/call-domain.ts +8 -10
- package/src/calls/relay-server.ts +9 -13
- package/src/calls/twilio-config.ts +4 -8
- package/src/calls/twilio-provider.ts +2 -1
- package/src/calls/twilio-rest.ts +2 -1
- package/src/calls/twilio-routes.ts +1 -2
- package/src/calls/voice-ingress-preflight.ts +1 -1
- package/src/cli/commands/browser-relay.ts +46 -15
- package/src/cli/commands/completions.ts +0 -3
- package/src/cli/commands/credentials.ts +110 -23
- package/src/cli/commands/oauth/apps.ts +255 -0
- package/src/cli/commands/oauth/connections.ts +299 -0
- package/src/cli/commands/oauth/index.ts +52 -0
- package/src/cli/commands/oauth/providers.ts +242 -0
- package/src/cli/commands/skills.ts +4 -338
- package/src/cli/program.ts +1 -5
- package/src/cli/reference.ts +1 -3
- package/src/cli.ts +3 -2
- package/src/config/assistant-feature-flags.ts +0 -3
- package/src/config/bundled-skills/_shared/CLI_RETRIEVAL_PATTERN.md +1 -1
- package/src/config/bundled-skills/claude-code/TOOLS.json +0 -4
- package/src/config/bundled-skills/computer-use/SKILL.md +3 -6
- package/src/config/bundled-skills/computer-use/TOOLS.json +22 -4
- package/src/config/bundled-skills/contacts/tools/google-contacts.ts +29 -32
- package/src/config/bundled-skills/gmail/SKILL.md +4 -4
- package/src/config/bundled-skills/gmail/tools/gmail-archive.ts +54 -61
- package/src/config/bundled-skills/gmail/tools/gmail-attachments.ts +25 -28
- package/src/config/bundled-skills/gmail/tools/gmail-draft.ts +14 -17
- package/src/config/bundled-skills/gmail/tools/gmail-filters.ts +39 -44
- package/src/config/bundled-skills/gmail/tools/gmail-follow-up.ts +61 -58
- package/src/config/bundled-skills/gmail/tools/gmail-forward.ts +50 -49
- package/src/config/bundled-skills/gmail/tools/gmail-label.ts +11 -13
- package/src/config/bundled-skills/gmail/tools/gmail-outreach-scan.ts +148 -146
- package/src/config/bundled-skills/gmail/tools/gmail-send-draft.ts +4 -7
- package/src/config/bundled-skills/gmail/tools/gmail-sender-digest.ts +175 -173
- package/src/config/bundled-skills/gmail/tools/gmail-trash.ts +4 -7
- package/src/config/bundled-skills/gmail/tools/gmail-unsubscribe.ts +71 -76
- package/src/config/bundled-skills/gmail/tools/gmail-vacation.ts +32 -38
- package/src/config/bundled-skills/google-calendar/SKILL.md +2 -2
- package/src/config/bundled-skills/google-calendar/calendar-client.ts +90 -44
- package/src/config/bundled-skills/google-calendar/tools/calendar-check-availability.ts +9 -10
- package/src/config/bundled-skills/google-calendar/tools/calendar-create-event.ts +5 -6
- package/src/config/bundled-skills/google-calendar/tools/calendar-get-event.ts +4 -5
- package/src/config/bundled-skills/google-calendar/tools/calendar-list-events.ts +14 -15
- package/src/config/bundled-skills/google-calendar/tools/calendar-rsvp.ts +37 -37
- package/src/config/bundled-skills/google-calendar/tools/shared.ts +4 -9
- package/src/config/bundled-skills/image-studio/tools/media-generate-image.ts +24 -3
- package/src/config/bundled-skills/messaging/SKILL.md +6 -6
- package/src/config/bundled-skills/messaging/tools/messaging-analyze-style.ts +62 -63
- package/src/config/bundled-skills/messaging/tools/messaging-archive-by-sender.ts +15 -16
- package/src/config/bundled-skills/messaging/tools/messaging-auth-test.ts +4 -5
- package/src/config/bundled-skills/messaging/tools/messaging-list-conversations.ts +6 -7
- package/src/config/bundled-skills/messaging/tools/messaging-mark-read.ts +4 -5
- package/src/config/bundled-skills/messaging/tools/messaging-read.ts +14 -15
- package/src/config/bundled-skills/messaging/tools/messaging-search.ts +4 -5
- package/src/config/bundled-skills/messaging/tools/messaging-send.ts +128 -128
- package/src/config/bundled-skills/messaging/tools/messaging-sender-digest.ts +33 -34
- package/src/config/bundled-skills/messaging/tools/shared.ts +12 -15
- package/src/config/bundled-skills/settings/SKILL.md +1 -1
- package/src/config/bundled-skills/settings/TOOLS.json +2 -8
- package/src/config/bundled-skills/settings/tools/voice-config-update.ts +5 -33
- package/src/config/bundled-skills/slack/tools/shared.ts +4 -10
- package/src/config/bundled-skills/slack/tools/slack-add-reaction.ts +4 -5
- package/src/config/bundled-skills/slack/tools/slack-channel-details.ts +15 -16
- package/src/config/bundled-skills/slack/tools/slack-delete-message.ts +4 -5
- package/src/config/bundled-skills/slack/tools/slack-edit-message.ts +4 -5
- package/src/config/bundled-skills/slack/tools/slack-leave-channel.ts +4 -5
- package/src/config/bundled-skills/slack/tools/slack-scan-digest.ts +95 -92
- package/src/config/env-registry.ts +14 -83
- package/src/config/env.ts +11 -50
- package/src/config/feature-flag-registry.json +16 -16
- package/src/config/schema.ts +3 -1
- package/src/config/skills.ts +21 -2
- package/src/context/image-dimensions.ts +229 -0
- package/src/context/token-estimator.ts +75 -12
- package/src/context/window-manager.ts +49 -10
- package/src/daemon/assistant-attachments.ts +1 -13
- package/src/daemon/guardian-action-generators.ts +4 -5
- package/src/daemon/handlers/config-ingress.ts +8 -33
- package/src/daemon/handlers/config-slack-channel.ts +76 -56
- package/src/daemon/handlers/config-telegram.ts +53 -24
- package/src/daemon/handlers/sessions.ts +10 -24
- package/src/daemon/handlers/shared.ts +0 -130
- package/src/daemon/host-cu-proxy.ts +401 -0
- package/src/daemon/lifecycle.ts +39 -63
- package/src/daemon/message-protocol.ts +3 -0
- package/src/daemon/message-types/computer-use.ts +2 -119
- package/src/daemon/message-types/host-cu.ts +19 -0
- package/src/daemon/message-types/integrations.ts +1 -0
- package/src/daemon/message-types/messages.ts +3 -0
- package/src/daemon/server.ts +14 -21
- package/src/daemon/session-agent-loop-handlers.ts +2 -0
- package/src/daemon/session-attachments.ts +1 -2
- package/src/daemon/session-messaging.ts +3 -1
- package/src/daemon/session-slash.ts +1 -1
- package/src/daemon/session-surfaces.ts +40 -28
- package/src/daemon/session-tool-setup.ts +20 -11
- package/src/daemon/session.ts +139 -16
- package/src/daemon/tool-side-effects.ts +2 -8
- package/src/daemon/watch-handler.ts +2 -2
- package/src/email/providers/index.ts +2 -1
- package/src/events/tool-metrics-listener.ts +2 -2
- package/src/hooks/manager.ts +1 -4
- package/src/inbound/public-ingress-urls.ts +7 -7
- package/src/instrument.ts +15 -1
- package/src/logfire.ts +16 -5
- package/src/media/app-icon-generator.ts +30 -4
- package/src/media/avatar-router.ts +26 -3
- package/src/media/gemini-image-service.ts +28 -2
- package/src/memory/conversation-key-store.ts +21 -0
- package/src/memory/db-init.ts +4 -0
- package/src/memory/guardian-action-store.ts +1 -1
- package/src/memory/migrations/149-oauth-tables.ts +60 -0
- package/src/memory/migrations/index.ts +1 -0
- package/src/memory/schema/guardian.ts +1 -1
- package/src/memory/schema/index.ts +1 -0
- package/src/memory/schema/oauth.ts +65 -0
- package/src/messaging/provider.ts +19 -13
- package/src/messaging/providers/gmail/adapter.ts +40 -23
- package/src/messaging/providers/gmail/client.ts +283 -122
- package/src/messaging/providers/gmail/people-client.ts +32 -24
- package/src/messaging/providers/slack/adapter.ts +29 -19
- package/src/messaging/providers/slack/client.ts +265 -78
- package/src/messaging/providers/telegram-bot/adapter.ts +19 -18
- package/src/messaging/providers/whatsapp/adapter.ts +17 -11
- package/src/messaging/registry.ts +2 -31
- package/src/notifications/copy-composer.ts +0 -5
- package/src/notifications/signal.ts +4 -5
- package/src/oauth/byo-connection.test.ts +537 -0
- package/src/oauth/byo-connection.ts +128 -0
- package/src/oauth/connect-orchestrator.ts +139 -56
- package/src/oauth/connect-types.ts +17 -23
- package/src/oauth/connection-resolver.ts +58 -0
- package/src/oauth/connection.ts +38 -0
- package/src/oauth/manual-token-connection.ts +104 -0
- package/src/oauth/oauth-store.ts +496 -0
- package/src/oauth/platform-connection.test.ts +192 -0
- package/src/oauth/platform-connection.ts +111 -0
- package/src/oauth/provider-behaviors.ts +124 -0
- package/src/oauth/scope-policy.ts +9 -2
- package/src/oauth/seed-providers.ts +161 -0
- package/src/oauth/token-persistence.ts +74 -78
- package/src/permissions/checker.ts +8 -4
- package/src/permissions/defaults.ts +0 -1
- package/src/permissions/prompter.ts +10 -1
- package/src/permissions/trust-store.ts +13 -0
- package/src/prompts/__tests__/build-cli-reference-section.test.ts +3 -1
- package/src/prompts/system-prompt.ts +70 -45
- package/src/providers/anthropic/client.ts +133 -24
- package/src/providers/gemini/client.ts +15 -6
- package/src/providers/managed-proxy/constants.ts +2 -2
- package/src/providers/managed-proxy/context.ts +5 -1
- package/src/providers/ratelimit.ts +17 -0
- package/src/providers/registry.ts +2 -2
- package/src/providers/retry.ts +1 -27
- package/src/runtime/AGENTS.md +17 -0
- package/src/runtime/auth/route-policy.ts +0 -3
- package/src/runtime/channel-invite-transports/telegram.ts +2 -1
- package/src/runtime/channel-readiness-service.ts +168 -195
- package/src/runtime/channel-readiness-types.ts +4 -0
- package/src/runtime/channel-reply-delivery.ts +0 -40
- package/src/runtime/gateway-client.ts +0 -7
- package/src/runtime/guardian-action-conversation-turn.ts +1 -3
- package/src/runtime/guardian-action-followup-executor.ts +1 -1
- package/src/runtime/guardian-action-message-composer.ts +3 -23
- package/src/runtime/http-server.ts +17 -10
- package/src/runtime/http-types.ts +2 -3
- package/src/runtime/middleware/rate-limiter.ts +74 -20
- package/src/runtime/middleware/twilio-validation.ts +1 -11
- package/src/runtime/pending-interactions.ts +14 -12
- package/src/runtime/routes/channel-delivery-routes.ts +0 -1
- package/src/runtime/routes/channel-readiness-routes.ts +2 -0
- package/src/runtime/routes/conversation-routes.ts +73 -19
- package/src/runtime/routes/diagnostics-routes.ts +11 -9
- package/src/runtime/routes/events-routes.ts +21 -11
- package/src/runtime/routes/guardian-approval-interception.ts +20 -5
- package/src/runtime/routes/host-cu-routes.ts +97 -0
- package/src/runtime/routes/inbound-stages/background-dispatch.ts +12 -111
- package/src/runtime/routes/integrations/slack/share.ts +6 -6
- package/src/runtime/routes/integrations/twilio.ts +6 -5
- package/src/runtime/routes/log-export-routes.ts +126 -8
- package/src/runtime/routes/secret-routes.ts +3 -2
- package/src/runtime/routes/settings-routes.ts +113 -48
- package/src/runtime/routes/surface-action-routes.ts +1 -1
- package/src/runtime/routes/watch-routes.ts +128 -0
- package/src/schedule/integration-status.ts +10 -8
- package/src/security/credential-key.ts +14 -0
- package/src/security/keychain-broker-client.ts +5 -6
- package/src/security/oauth2.ts +1 -1
- package/src/security/token-manager.ts +145 -43
- package/src/skills/catalog-install.ts +358 -0
- package/src/skills/include-graph.ts +32 -0
- package/src/telegram/bot-username.ts +2 -3
- package/src/tools/apps/definitions.ts +0 -5
- package/src/tools/assets/materialize.ts +0 -5
- package/src/tools/assets/search.ts +0 -5
- package/src/tools/browser/headless-browser.ts +1 -67
- package/src/tools/browser/network-recorder.ts +1 -1
- package/src/tools/browser/network-recording-types.ts +1 -1
- package/src/tools/claude-code/claude-code.ts +0 -5
- package/src/tools/computer-use/definitions.ts +46 -11
- package/src/tools/computer-use/registry.ts +4 -5
- package/src/tools/credentials/broker.ts +5 -4
- package/src/tools/credentials/metadata-store.ts +22 -74
- package/src/tools/credentials/resolve.ts +2 -1
- package/src/tools/credentials/vault.ts +139 -151
- package/src/tools/filesystem/edit.ts +1 -6
- package/src/tools/filesystem/read.ts +0 -5
- package/src/tools/filesystem/write.ts +1 -6
- package/src/tools/host-filesystem/edit.ts +1 -6
- package/src/tools/host-filesystem/read.ts +1 -6
- package/src/tools/host-filesystem/write.ts +1 -6
- package/src/tools/mcp/mcp-tool-factory.ts +18 -1
- package/src/tools/memory/definitions.ts +0 -5
- package/src/tools/network/web-fetch.ts +0 -5
- package/src/tools/network/web-search.ts +0 -5
- package/src/tools/registry.ts +2 -7
- package/src/tools/schema-transforms.ts +99 -0
- package/src/tools/skills/load.ts +62 -8
- package/src/tools/swarm/delegate.ts +0 -5
- package/src/tools/system/avatar-generator.ts +0 -5
- package/src/tools/ui-surface/definitions.ts +0 -15
- package/src/tools/watch/screen-watch.ts +0 -5
- package/src/tools/watch/watch-state.ts +0 -12
- package/src/util/logger.ts +7 -41
- package/src/util/platform.ts +9 -28
- package/src/version.ts +10 -0
- package/src/watcher/providers/github.ts +51 -52
- package/src/watcher/providers/gmail.ts +88 -80
- package/src/watcher/providers/google-calendar.ts +94 -86
- package/src/watcher/providers/linear.ts +87 -93
- package/src/__tests__/computer-use-session-compaction.test.ts +0 -143
- package/src/__tests__/computer-use-session-lifecycle.test.ts +0 -322
- package/src/__tests__/computer-use-session-working-dir.test.ts +0 -166
- package/src/__tests__/computer-use-skill-baseline.test.ts +0 -78
- package/src/__tests__/computer-use-skill-endstate.test.ts +0 -105
- package/src/__tests__/computer-use-skill-lifecycle-cleanup.test.ts +0 -249
- package/src/__tests__/ride-shotgun-handler.test.ts +0 -452
- package/src/cli/commands/dev.ts +0 -129
- package/src/cli/commands/map.ts +0 -391
- package/src/cli/commands/oauth.ts +0 -77
- package/src/config/bundled-skills/computer-use/tools/computer-use-request-control.ts +0 -16
- package/src/daemon/computer-use-session.ts +0 -1020
- package/src/daemon/ride-shotgun-handler.ts +0 -567
- package/src/oauth/provider-profiles.ts +0 -192
- package/src/prompts/computer-use-prompt.ts +0 -98
- package/src/runtime/routes/computer-use-routes.ts +0 -641
- package/src/runtime/telegram-streaming-delivery.test.ts +0 -597
- package/src/runtime/telegram-streaming-delivery.ts +0 -383
- package/src/tools/computer-use/request-computer-control.ts +0 -61
|
@@ -0,0 +1,629 @@
|
|
|
1
|
+
import { afterEach, describe, expect, test } from "bun:test";
|
|
2
|
+
|
|
3
|
+
import { HostCuProxy } from "../daemon/host-cu-proxy.js";
|
|
4
|
+
|
|
5
|
+
describe("HostCuProxy", () => {
|
|
6
|
+
let proxy: InstanceType<typeof HostCuProxy>;
|
|
7
|
+
let sentMessages: unknown[];
|
|
8
|
+
let sendToClient: (msg: unknown) => void;
|
|
9
|
+
|
|
10
|
+
function setup(maxSteps?: number) {
|
|
11
|
+
sentMessages = [];
|
|
12
|
+
sendToClient = (msg: unknown) => sentMessages.push(msg);
|
|
13
|
+
proxy = new HostCuProxy(sendToClient as never, maxSteps);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
afterEach(() => {
|
|
17
|
+
proxy?.dispose();
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
// -------------------------------------------------------------------------
|
|
21
|
+
// Request / resolve lifecycle
|
|
22
|
+
// -------------------------------------------------------------------------
|
|
23
|
+
|
|
24
|
+
describe("request/resolve lifecycle", () => {
|
|
25
|
+
test("sends host_cu_request and resolves with formatted observation", async () => {
|
|
26
|
+
setup();
|
|
27
|
+
|
|
28
|
+
const resultPromise = proxy.request(
|
|
29
|
+
"computer_use_click",
|
|
30
|
+
{ element_id: 42 },
|
|
31
|
+
"session-1",
|
|
32
|
+
1,
|
|
33
|
+
"Clicking the button",
|
|
34
|
+
);
|
|
35
|
+
|
|
36
|
+
expect(sentMessages).toHaveLength(1);
|
|
37
|
+
const sent = sentMessages[0] as Record<string, unknown>;
|
|
38
|
+
expect(sent.type).toBe("host_cu_request");
|
|
39
|
+
expect(sent.sessionId).toBe("session-1");
|
|
40
|
+
expect(sent.toolName).toBe("computer_use_click");
|
|
41
|
+
expect(sent.input).toEqual({ element_id: 42 });
|
|
42
|
+
expect(sent.stepNumber).toBe(1);
|
|
43
|
+
expect(sent.reasoning).toBe("Clicking the button");
|
|
44
|
+
expect(typeof sent.requestId).toBe("string");
|
|
45
|
+
|
|
46
|
+
const requestId = sent.requestId as string;
|
|
47
|
+
expect(proxy.hasPendingRequest(requestId)).toBe(true);
|
|
48
|
+
|
|
49
|
+
proxy.resolve(requestId, {
|
|
50
|
+
axTree: "Button [1]\nLabel [2]",
|
|
51
|
+
executionResult: "Clicked element 42",
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
const result = await resultPromise;
|
|
55
|
+
expect(result.content).toContain("Clicked element 42");
|
|
56
|
+
expect(result.content).toContain("<ax-tree>");
|
|
57
|
+
expect(result.content).toContain("CURRENT SCREEN STATE:");
|
|
58
|
+
expect(result.isError).toBe(false);
|
|
59
|
+
expect(proxy.hasPendingRequest(requestId)).toBe(false);
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
test("formats error observation correctly", async () => {
|
|
63
|
+
setup();
|
|
64
|
+
|
|
65
|
+
const resultPromise = proxy.request(
|
|
66
|
+
"computer_use_click",
|
|
67
|
+
{ element_id: 99 },
|
|
68
|
+
"session-1",
|
|
69
|
+
1,
|
|
70
|
+
);
|
|
71
|
+
|
|
72
|
+
const sent = sentMessages[0] as Record<string, unknown>;
|
|
73
|
+
const requestId = sent.requestId as string;
|
|
74
|
+
|
|
75
|
+
proxy.resolve(requestId, {
|
|
76
|
+
executionError: "Element not found",
|
|
77
|
+
axTree: "Window [1]",
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
const result = await resultPromise;
|
|
81
|
+
expect(result.isError).toBe(true);
|
|
82
|
+
expect(result.content).toContain("Action failed: Element not found");
|
|
83
|
+
expect(result.content).toContain("<ax-tree>");
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
test("includes screenshot as content block", async () => {
|
|
87
|
+
setup();
|
|
88
|
+
|
|
89
|
+
const resultPromise = proxy.request(
|
|
90
|
+
"computer_use_screenshot",
|
|
91
|
+
{},
|
|
92
|
+
"session-1",
|
|
93
|
+
1,
|
|
94
|
+
);
|
|
95
|
+
|
|
96
|
+
const sent = sentMessages[0] as Record<string, unknown>;
|
|
97
|
+
const requestId = sent.requestId as string;
|
|
98
|
+
|
|
99
|
+
proxy.resolve(requestId, {
|
|
100
|
+
axTree: "Button [1]",
|
|
101
|
+
screenshot: "base64data",
|
|
102
|
+
screenshotWidthPx: 1920,
|
|
103
|
+
screenshotHeightPx: 1080,
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
const result = await resultPromise;
|
|
107
|
+
expect(result.contentBlocks).toBeDefined();
|
|
108
|
+
expect(result.contentBlocks).toHaveLength(1);
|
|
109
|
+
expect(result.contentBlocks![0]).toEqual({
|
|
110
|
+
type: "image",
|
|
111
|
+
source: {
|
|
112
|
+
type: "base64",
|
|
113
|
+
media_type: "image/jpeg",
|
|
114
|
+
data: "base64data",
|
|
115
|
+
},
|
|
116
|
+
});
|
|
117
|
+
expect(result.content).toContain("1920x1080 px");
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
test("resolves with unknown requestId is silently ignored", () => {
|
|
121
|
+
setup();
|
|
122
|
+
// Should not throw
|
|
123
|
+
proxy.resolve("unknown-id", { axTree: "something" });
|
|
124
|
+
});
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
// -------------------------------------------------------------------------
|
|
128
|
+
// Timeout
|
|
129
|
+
// -------------------------------------------------------------------------
|
|
130
|
+
|
|
131
|
+
describe("timeout", () => {
|
|
132
|
+
test("resolves with timeout error when timer fires", async () => {
|
|
133
|
+
setup();
|
|
134
|
+
|
|
135
|
+
// We can't easily test the 60s timeout in a unit test, but we can
|
|
136
|
+
// verify the pending state and manual resolution.
|
|
137
|
+
const resultPromise = proxy.request(
|
|
138
|
+
"computer_use_click",
|
|
139
|
+
{ element_id: 1 },
|
|
140
|
+
"session-1",
|
|
141
|
+
1,
|
|
142
|
+
);
|
|
143
|
+
|
|
144
|
+
const sent = sentMessages[0] as Record<string, unknown>;
|
|
145
|
+
const requestId = sent.requestId as string;
|
|
146
|
+
expect(proxy.hasPendingRequest(requestId)).toBe(true);
|
|
147
|
+
|
|
148
|
+
// Resolve to avoid test hanging
|
|
149
|
+
proxy.resolve(requestId, { axTree: "resolved" });
|
|
150
|
+
await resultPromise;
|
|
151
|
+
});
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
// -------------------------------------------------------------------------
|
|
155
|
+
// Abort signal
|
|
156
|
+
// -------------------------------------------------------------------------
|
|
157
|
+
|
|
158
|
+
describe("abort signal", () => {
|
|
159
|
+
test("resolves with abort result when signal fires", async () => {
|
|
160
|
+
setup();
|
|
161
|
+
|
|
162
|
+
const controller = new AbortController();
|
|
163
|
+
const resultPromise = proxy.request(
|
|
164
|
+
"computer_use_click",
|
|
165
|
+
{ element_id: 1 },
|
|
166
|
+
"session-1",
|
|
167
|
+
1,
|
|
168
|
+
undefined,
|
|
169
|
+
controller.signal,
|
|
170
|
+
);
|
|
171
|
+
|
|
172
|
+
const sent = sentMessages[0] as Record<string, unknown>;
|
|
173
|
+
const requestId = sent.requestId as string;
|
|
174
|
+
expect(proxy.hasPendingRequest(requestId)).toBe(true);
|
|
175
|
+
|
|
176
|
+
controller.abort();
|
|
177
|
+
|
|
178
|
+
const result = await resultPromise;
|
|
179
|
+
expect(result.content).toContain("Aborted");
|
|
180
|
+
expect(result.isError).toBe(true);
|
|
181
|
+
expect(proxy.hasPendingRequest(requestId)).toBe(false);
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
test("returns immediately if signal already aborted", async () => {
|
|
185
|
+
setup();
|
|
186
|
+
|
|
187
|
+
const controller = new AbortController();
|
|
188
|
+
controller.abort();
|
|
189
|
+
|
|
190
|
+
const result = await proxy.request(
|
|
191
|
+
"computer_use_click",
|
|
192
|
+
{ element_id: 1 },
|
|
193
|
+
"session-1",
|
|
194
|
+
1,
|
|
195
|
+
undefined,
|
|
196
|
+
controller.signal,
|
|
197
|
+
);
|
|
198
|
+
|
|
199
|
+
expect(result.content).toContain("Aborted");
|
|
200
|
+
expect(result.isError).toBe(true);
|
|
201
|
+
expect(sentMessages).toHaveLength(0); // No message sent
|
|
202
|
+
});
|
|
203
|
+
});
|
|
204
|
+
|
|
205
|
+
// -------------------------------------------------------------------------
|
|
206
|
+
// Step limit enforcement
|
|
207
|
+
// -------------------------------------------------------------------------
|
|
208
|
+
|
|
209
|
+
describe("step limit enforcement", () => {
|
|
210
|
+
test("returns error when step count exceeds max", async () => {
|
|
211
|
+
setup(3); // maxSteps = 3
|
|
212
|
+
|
|
213
|
+
// Record 4 actions to exceed the limit
|
|
214
|
+
proxy.recordAction("computer_use_click", { element_id: 1 });
|
|
215
|
+
proxy.recordAction("computer_use_click", { element_id: 2 });
|
|
216
|
+
proxy.recordAction("computer_use_click", { element_id: 3 });
|
|
217
|
+
proxy.recordAction("computer_use_click", { element_id: 4 });
|
|
218
|
+
|
|
219
|
+
expect(proxy.stepCount).toBe(4);
|
|
220
|
+
|
|
221
|
+
// Now request should be rejected without sending to client
|
|
222
|
+
const result = await proxy.request(
|
|
223
|
+
"computer_use_click",
|
|
224
|
+
{ element_id: 5 },
|
|
225
|
+
"session-1",
|
|
226
|
+
5,
|
|
227
|
+
);
|
|
228
|
+
|
|
229
|
+
expect(result.isError).toBe(true);
|
|
230
|
+
expect(result.content).toContain("Step limit (3) exceeded");
|
|
231
|
+
expect(result.content).toContain("computer_use_done");
|
|
232
|
+
expect(sentMessages).toHaveLength(0); // No message sent to client
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
test("allows requests within step limit", async () => {
|
|
236
|
+
setup(5); // maxSteps = 5
|
|
237
|
+
|
|
238
|
+
proxy.recordAction("computer_use_click", { element_id: 1 });
|
|
239
|
+
expect(proxy.stepCount).toBe(1);
|
|
240
|
+
|
|
241
|
+
const resultPromise = proxy.request(
|
|
242
|
+
"computer_use_click",
|
|
243
|
+
{ element_id: 2 },
|
|
244
|
+
"session-1",
|
|
245
|
+
2,
|
|
246
|
+
);
|
|
247
|
+
|
|
248
|
+
expect(sentMessages).toHaveLength(1); // Message was sent
|
|
249
|
+
|
|
250
|
+
const sent = sentMessages[0] as Record<string, unknown>;
|
|
251
|
+
proxy.resolve(sent.requestId as string, { axTree: "screen" });
|
|
252
|
+
|
|
253
|
+
const result = await resultPromise;
|
|
254
|
+
expect(result.isError).toBe(false);
|
|
255
|
+
});
|
|
256
|
+
});
|
|
257
|
+
|
|
258
|
+
// -------------------------------------------------------------------------
|
|
259
|
+
// Loop detection
|
|
260
|
+
// -------------------------------------------------------------------------
|
|
261
|
+
|
|
262
|
+
describe("loop detection", () => {
|
|
263
|
+
test("injects warning when same action repeated 3 times", () => {
|
|
264
|
+
setup();
|
|
265
|
+
|
|
266
|
+
// Record 3 identical actions
|
|
267
|
+
proxy.recordAction("computer_use_click", { element_id: 42 });
|
|
268
|
+
proxy.recordAction("computer_use_click", { element_id: 42 });
|
|
269
|
+
proxy.recordAction("computer_use_click", { element_id: 42 });
|
|
270
|
+
|
|
271
|
+
const result = proxy.formatObservation({
|
|
272
|
+
axTree: "Button [1]",
|
|
273
|
+
});
|
|
274
|
+
|
|
275
|
+
expect(result.content).toContain(
|
|
276
|
+
"WARNING: You've repeated the same action (computer_use_click) 3 times",
|
|
277
|
+
);
|
|
278
|
+
});
|
|
279
|
+
|
|
280
|
+
test("does not warn when actions differ", () => {
|
|
281
|
+
setup();
|
|
282
|
+
|
|
283
|
+
proxy.recordAction("computer_use_click", { element_id: 1 });
|
|
284
|
+
proxy.recordAction("computer_use_click", { element_id: 2 });
|
|
285
|
+
proxy.recordAction("computer_use_click", { element_id: 3 });
|
|
286
|
+
|
|
287
|
+
const result = proxy.formatObservation({
|
|
288
|
+
axTree: "Button [1]",
|
|
289
|
+
});
|
|
290
|
+
|
|
291
|
+
expect(result.content).not.toContain("WARNING: You've repeated");
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
test("does not warn with fewer than 3 actions", () => {
|
|
295
|
+
setup();
|
|
296
|
+
|
|
297
|
+
proxy.recordAction("computer_use_click", { element_id: 42 });
|
|
298
|
+
proxy.recordAction("computer_use_click", { element_id: 42 });
|
|
299
|
+
|
|
300
|
+
const result = proxy.formatObservation({
|
|
301
|
+
axTree: "Button [1]",
|
|
302
|
+
});
|
|
303
|
+
|
|
304
|
+
expect(result.content).not.toContain("WARNING: You've repeated");
|
|
305
|
+
});
|
|
306
|
+
});
|
|
307
|
+
|
|
308
|
+
// -------------------------------------------------------------------------
|
|
309
|
+
// Consecutive unchanged steps warning
|
|
310
|
+
// -------------------------------------------------------------------------
|
|
311
|
+
|
|
312
|
+
describe("consecutive unchanged steps", () => {
|
|
313
|
+
test("warns after 2 consecutive unchanged observations", async () => {
|
|
314
|
+
setup();
|
|
315
|
+
|
|
316
|
+
// Simulate first request/resolve to establish previous AX tree
|
|
317
|
+
const p1 = proxy.request(
|
|
318
|
+
"computer_use_click",
|
|
319
|
+
{ element_id: 1 },
|
|
320
|
+
"session-1",
|
|
321
|
+
1,
|
|
322
|
+
);
|
|
323
|
+
proxy.recordAction("computer_use_click", { element_id: 1 });
|
|
324
|
+
const sent1 = sentMessages[0] as Record<string, unknown>;
|
|
325
|
+
proxy.resolve(sent1.requestId as string, {
|
|
326
|
+
axTree: "Button [1]",
|
|
327
|
+
});
|
|
328
|
+
await p1;
|
|
329
|
+
|
|
330
|
+
// Second request — same AX tree, no diff (unchanged step 1)
|
|
331
|
+
const p2 = proxy.request(
|
|
332
|
+
"computer_use_click",
|
|
333
|
+
{ element_id: 1 },
|
|
334
|
+
"session-1",
|
|
335
|
+
2,
|
|
336
|
+
);
|
|
337
|
+
proxy.recordAction("computer_use_click", { element_id: 1 });
|
|
338
|
+
const sent2 = sentMessages[1] as Record<string, unknown>;
|
|
339
|
+
proxy.resolve(sent2.requestId as string, {
|
|
340
|
+
axTree: "Button [1]",
|
|
341
|
+
// No axDiff — screen unchanged
|
|
342
|
+
});
|
|
343
|
+
const result2 = await p2;
|
|
344
|
+
// First unchanged: simple warning
|
|
345
|
+
expect(result2.content).toContain("NO VISIBLE EFFECT");
|
|
346
|
+
expect(result2.content).not.toContain("2 consecutive");
|
|
347
|
+
|
|
348
|
+
// Third request — still same AX tree, no diff (unchanged step 2)
|
|
349
|
+
const p3 = proxy.request(
|
|
350
|
+
"computer_use_click",
|
|
351
|
+
{ element_id: 1 },
|
|
352
|
+
"session-1",
|
|
353
|
+
3,
|
|
354
|
+
);
|
|
355
|
+
proxy.recordAction("computer_use_click", { element_id: 1 });
|
|
356
|
+
const sent3 = sentMessages[2] as Record<string, unknown>;
|
|
357
|
+
proxy.resolve(sent3.requestId as string, {
|
|
358
|
+
axTree: "Button [1]",
|
|
359
|
+
});
|
|
360
|
+
const result3 = await p3;
|
|
361
|
+
// Should now have the consecutive warning
|
|
362
|
+
expect(result3.content).toContain(
|
|
363
|
+
"2 consecutive actions had NO VISIBLE EFFECT",
|
|
364
|
+
);
|
|
365
|
+
});
|
|
366
|
+
|
|
367
|
+
test("resets consecutive count when diff is present", async () => {
|
|
368
|
+
setup();
|
|
369
|
+
|
|
370
|
+
// Establish previous AX tree
|
|
371
|
+
const p1 = proxy.request(
|
|
372
|
+
"computer_use_click",
|
|
373
|
+
{ element_id: 1 },
|
|
374
|
+
"session-1",
|
|
375
|
+
1,
|
|
376
|
+
);
|
|
377
|
+
proxy.recordAction("computer_use_click", { element_id: 1 });
|
|
378
|
+
const sent1 = sentMessages[0] as Record<string, unknown>;
|
|
379
|
+
proxy.resolve(sent1.requestId as string, {
|
|
380
|
+
axTree: "Button [1]",
|
|
381
|
+
});
|
|
382
|
+
await p1;
|
|
383
|
+
|
|
384
|
+
// Second request with no diff (unchanged)
|
|
385
|
+
const p2 = proxy.request(
|
|
386
|
+
"computer_use_click",
|
|
387
|
+
{ element_id: 1 },
|
|
388
|
+
"session-1",
|
|
389
|
+
2,
|
|
390
|
+
);
|
|
391
|
+
proxy.recordAction("computer_use_click", { element_id: 1 });
|
|
392
|
+
const sent2 = sentMessages[1] as Record<string, unknown>;
|
|
393
|
+
proxy.resolve(sent2.requestId as string, {
|
|
394
|
+
axTree: "Button [1]",
|
|
395
|
+
});
|
|
396
|
+
await p2;
|
|
397
|
+
expect(proxy.consecutiveUnchangedSteps).toBe(1);
|
|
398
|
+
|
|
399
|
+
// Third request WITH diff (changed) — should reset
|
|
400
|
+
const p3 = proxy.request(
|
|
401
|
+
"computer_use_click",
|
|
402
|
+
{ element_id: 2 },
|
|
403
|
+
"session-1",
|
|
404
|
+
3,
|
|
405
|
+
);
|
|
406
|
+
proxy.recordAction("computer_use_click", { element_id: 2 });
|
|
407
|
+
const sent3 = sentMessages[2] as Record<string, unknown>;
|
|
408
|
+
proxy.resolve(sent3.requestId as string, {
|
|
409
|
+
axTree: "TextField [1]",
|
|
410
|
+
axDiff: "+ TextField [1]\n- Button [1]",
|
|
411
|
+
});
|
|
412
|
+
await p3;
|
|
413
|
+
expect(proxy.consecutiveUnchangedSteps).toBe(0);
|
|
414
|
+
});
|
|
415
|
+
});
|
|
416
|
+
|
|
417
|
+
// -------------------------------------------------------------------------
|
|
418
|
+
// Observation formatting
|
|
419
|
+
// -------------------------------------------------------------------------
|
|
420
|
+
|
|
421
|
+
describe("observation formatting", () => {
|
|
422
|
+
test("formats AX tree with markers", () => {
|
|
423
|
+
setup();
|
|
424
|
+
|
|
425
|
+
const result = proxy.formatObservation({
|
|
426
|
+
axTree: "Button [1]\nLabel [2]",
|
|
427
|
+
});
|
|
428
|
+
|
|
429
|
+
expect(result.content).toContain("<ax-tree>");
|
|
430
|
+
expect(result.content).toContain("CURRENT SCREEN STATE:");
|
|
431
|
+
expect(result.content).toContain("Button [1]");
|
|
432
|
+
expect(result.content).toContain("</ax-tree>");
|
|
433
|
+
expect(result.isError).toBe(false);
|
|
434
|
+
});
|
|
435
|
+
|
|
436
|
+
test("formats user guidance prominently", () => {
|
|
437
|
+
setup();
|
|
438
|
+
|
|
439
|
+
const result = proxy.formatObservation({
|
|
440
|
+
axTree: "Button [1]",
|
|
441
|
+
userGuidance: "Click the save button",
|
|
442
|
+
});
|
|
443
|
+
|
|
444
|
+
expect(result.content).toContain("USER GUIDANCE: Click the save button");
|
|
445
|
+
// User guidance should appear before AX tree
|
|
446
|
+
const guidanceIdx = result.content.indexOf("USER GUIDANCE");
|
|
447
|
+
const axTreeIdx = result.content.indexOf("<ax-tree>");
|
|
448
|
+
expect(guidanceIdx).toBeLessThan(axTreeIdx);
|
|
449
|
+
});
|
|
450
|
+
|
|
451
|
+
test("formats execution result", () => {
|
|
452
|
+
setup();
|
|
453
|
+
|
|
454
|
+
const result = proxy.formatObservation({
|
|
455
|
+
executionResult: "Element clicked successfully",
|
|
456
|
+
axTree: "Button [1]",
|
|
457
|
+
});
|
|
458
|
+
|
|
459
|
+
expect(result.content).toContain("Element clicked successfully");
|
|
460
|
+
});
|
|
461
|
+
|
|
462
|
+
test("formats execution error", () => {
|
|
463
|
+
setup();
|
|
464
|
+
|
|
465
|
+
const result = proxy.formatObservation({
|
|
466
|
+
executionError: "Element not found",
|
|
467
|
+
axTree: "Window [1]",
|
|
468
|
+
});
|
|
469
|
+
|
|
470
|
+
expect(result.isError).toBe(true);
|
|
471
|
+
expect(result.content).toContain("Action failed: Element not found");
|
|
472
|
+
});
|
|
473
|
+
|
|
474
|
+
test("returns 'Action executed' when observation is empty", () => {
|
|
475
|
+
setup();
|
|
476
|
+
|
|
477
|
+
const result = proxy.formatObservation({});
|
|
478
|
+
|
|
479
|
+
expect(result.content).toBe("Action executed");
|
|
480
|
+
expect(result.isError).toBe(false);
|
|
481
|
+
});
|
|
482
|
+
|
|
483
|
+
test("includes screenshot metadata", () => {
|
|
484
|
+
setup();
|
|
485
|
+
|
|
486
|
+
const result = proxy.formatObservation({
|
|
487
|
+
screenshot: "base64data",
|
|
488
|
+
screenshotWidthPx: 2560,
|
|
489
|
+
screenshotHeightPx: 1440,
|
|
490
|
+
screenWidthPt: 1280,
|
|
491
|
+
screenHeightPt: 720,
|
|
492
|
+
});
|
|
493
|
+
|
|
494
|
+
expect(result.content).toContain("2560x1440 px");
|
|
495
|
+
expect(result.content).toContain("1280x720 pt");
|
|
496
|
+
});
|
|
497
|
+
|
|
498
|
+
test("escapes </ax-tree> in AX tree content", () => {
|
|
499
|
+
setup();
|
|
500
|
+
|
|
501
|
+
const result = proxy.formatObservation({
|
|
502
|
+
axTree: "Some content with </ax-tree> inside",
|
|
503
|
+
});
|
|
504
|
+
|
|
505
|
+
expect(result.content).toContain("</ax-tree>");
|
|
506
|
+
// Should still have the real closing marker
|
|
507
|
+
expect(result.content).toMatch(/<\/ax-tree>$/m);
|
|
508
|
+
});
|
|
509
|
+
|
|
510
|
+
test("includes diff when present", () => {
|
|
511
|
+
setup();
|
|
512
|
+
|
|
513
|
+
const result = proxy.formatObservation({
|
|
514
|
+
axTree: "TextField [1]",
|
|
515
|
+
axDiff: "+ TextField [1]\n- Button [1]",
|
|
516
|
+
});
|
|
517
|
+
|
|
518
|
+
expect(result.content).toContain("+ TextField [1]");
|
|
519
|
+
expect(result.content).toContain("- Button [1]");
|
|
520
|
+
});
|
|
521
|
+
|
|
522
|
+
test("no screenshot content blocks when screenshot absent", () => {
|
|
523
|
+
setup();
|
|
524
|
+
|
|
525
|
+
const result = proxy.formatObservation({
|
|
526
|
+
axTree: "Button [1]",
|
|
527
|
+
});
|
|
528
|
+
|
|
529
|
+
expect(result.contentBlocks).toBeUndefined();
|
|
530
|
+
});
|
|
531
|
+
});
|
|
532
|
+
|
|
533
|
+
// -------------------------------------------------------------------------
|
|
534
|
+
// CU state: reset
|
|
535
|
+
// -------------------------------------------------------------------------
|
|
536
|
+
|
|
537
|
+
describe("reset", () => {
|
|
538
|
+
test("clears all CU state", () => {
|
|
539
|
+
setup();
|
|
540
|
+
|
|
541
|
+
proxy.recordAction("computer_use_click", { element_id: 1 });
|
|
542
|
+
proxy.recordAction("computer_use_click", { element_id: 2 });
|
|
543
|
+
expect(proxy.stepCount).toBe(2);
|
|
544
|
+
expect(proxy.actionHistory).toHaveLength(2);
|
|
545
|
+
|
|
546
|
+
proxy.reset();
|
|
547
|
+
|
|
548
|
+
expect(proxy.stepCount).toBe(0);
|
|
549
|
+
expect(proxy.actionHistory).toHaveLength(0);
|
|
550
|
+
expect(proxy.previousAXTree).toBeUndefined();
|
|
551
|
+
expect(proxy.consecutiveUnchangedSteps).toBe(0);
|
|
552
|
+
});
|
|
553
|
+
});
|
|
554
|
+
|
|
555
|
+
// -------------------------------------------------------------------------
|
|
556
|
+
// CU state: action history bounding
|
|
557
|
+
// -------------------------------------------------------------------------
|
|
558
|
+
|
|
559
|
+
describe("action history bounding", () => {
|
|
560
|
+
test("keeps only last 10 entries", () => {
|
|
561
|
+
setup();
|
|
562
|
+
|
|
563
|
+
for (let i = 0; i < 15; i++) {
|
|
564
|
+
proxy.recordAction("computer_use_click", { element_id: i });
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
expect(proxy.actionHistory).toHaveLength(10);
|
|
568
|
+
// First entry should be step 6 (entries 1-5 trimmed)
|
|
569
|
+
expect(proxy.actionHistory[0].step).toBe(6);
|
|
570
|
+
expect(proxy.stepCount).toBe(15);
|
|
571
|
+
});
|
|
572
|
+
});
|
|
573
|
+
|
|
574
|
+
// -------------------------------------------------------------------------
|
|
575
|
+
// Dispose
|
|
576
|
+
// -------------------------------------------------------------------------
|
|
577
|
+
|
|
578
|
+
describe("dispose", () => {
|
|
579
|
+
test("rejects all pending requests", () => {
|
|
580
|
+
setup();
|
|
581
|
+
|
|
582
|
+
const resultPromise = proxy.request(
|
|
583
|
+
"computer_use_click",
|
|
584
|
+
{ element_id: 1 },
|
|
585
|
+
"session-1",
|
|
586
|
+
1,
|
|
587
|
+
);
|
|
588
|
+
|
|
589
|
+
const sent = sentMessages[0] as Record<string, unknown>;
|
|
590
|
+
const requestId = sent.requestId as string;
|
|
591
|
+
expect(proxy.hasPendingRequest(requestId)).toBe(true);
|
|
592
|
+
|
|
593
|
+
proxy.dispose();
|
|
594
|
+
|
|
595
|
+
expect(proxy.hasPendingRequest(requestId)).toBe(false);
|
|
596
|
+
expect(resultPromise).rejects.toThrow("Host CU proxy disposed");
|
|
597
|
+
});
|
|
598
|
+
});
|
|
599
|
+
|
|
600
|
+
// -------------------------------------------------------------------------
|
|
601
|
+
// updateSender
|
|
602
|
+
// -------------------------------------------------------------------------
|
|
603
|
+
|
|
604
|
+
describe("updateSender", () => {
|
|
605
|
+
test("uses updated sender for new requests", async () => {
|
|
606
|
+
setup();
|
|
607
|
+
|
|
608
|
+
const newMessages: unknown[] = [];
|
|
609
|
+
proxy.updateSender((msg) => newMessages.push(msg), true);
|
|
610
|
+
|
|
611
|
+
const resultPromise = proxy.request(
|
|
612
|
+
"computer_use_click",
|
|
613
|
+
{ element_id: 1 },
|
|
614
|
+
"session-1",
|
|
615
|
+
1,
|
|
616
|
+
);
|
|
617
|
+
|
|
618
|
+
expect(sentMessages).toHaveLength(0); // Old sender not used
|
|
619
|
+
expect(newMessages).toHaveLength(1); // New sender used
|
|
620
|
+
|
|
621
|
+
const sent = newMessages[0] as Record<string, unknown>;
|
|
622
|
+
proxy.resolve(sent.requestId as string, {
|
|
623
|
+
axTree: "Button [1]",
|
|
624
|
+
});
|
|
625
|
+
|
|
626
|
+
await resultPromise;
|
|
627
|
+
});
|
|
628
|
+
});
|
|
629
|
+
});
|