@vellumai/assistant 0.4.46 → 0.4.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +7 -7
- package/README.md +2 -23
- package/docs/architecture/integrations.md +45 -41
- package/docs/architecture/keychain-broker.md +3 -3
- package/docs/architecture/security.md +5 -5
- package/docs/runbook-trusted-contacts.md +3 -8
- package/hook-templates/debug-prompt-logger/hook.json +1 -1
- package/hook-templates/debug-prompt-logger/run.sh +1 -3
- package/package.json +1 -1
- package/src/__tests__/actor-token-service.test.ts +0 -1
- package/src/__tests__/anthropic-provider.test.ts +156 -0
- package/src/__tests__/approval-cascade.test.ts +810 -0
- package/src/__tests__/approval-primitive.test.ts +0 -1
- package/src/__tests__/approval-routes-http.test.ts +2 -0
- package/src/__tests__/assistant-attachments.test.ts +12 -34
- package/src/__tests__/assistant-feature-flag-guardrails.test.ts +76 -0
- package/src/__tests__/assistant-feature-flags-integration.test.ts +0 -1
- package/src/__tests__/browser-fill-credential.test.ts +5 -2
- package/src/__tests__/browser-skill-baseline-tool-payload.test.ts +2 -2
- package/src/__tests__/bundled-skill-retrieval-guard.test.ts +2 -1
- package/src/__tests__/channel-guardian.test.ts +0 -2
- package/src/__tests__/channel-readiness-routes.test.ts +35 -25
- package/src/__tests__/channel-readiness-service.test.ts +10 -9
- package/src/__tests__/checker.test.ts +9 -29
- package/src/__tests__/cli.test.ts +23 -0
- package/src/__tests__/computer-use-skill-manifest-regression.test.ts +1 -1
- package/src/__tests__/computer-use-tools.test.ts +2 -19
- package/src/__tests__/config-watcher.test.ts +0 -1
- package/src/__tests__/confirmation-request-guardian-bridge.test.ts +0 -1
- package/src/__tests__/context-image-dimensions.test.ts +332 -0
- package/src/__tests__/context-token-estimator.test.ts +196 -13
- package/src/__tests__/conversation-attention-store.test.ts +0 -1
- package/src/__tests__/conversation-attention-telegram.test.ts +0 -1
- package/src/__tests__/conversation-routes-guardian-reply.test.ts +144 -0
- package/src/__tests__/conversation-routes-slash-commands.test.ts +1 -0
- package/src/__tests__/credential-broker-browser-fill.test.ts +23 -22
- package/src/__tests__/credential-broker-server-use.test.ts +22 -21
- package/src/__tests__/credential-broker.test.ts +2 -1
- package/src/__tests__/credential-metadata-store.test.ts +239 -26
- package/src/__tests__/credential-resolve.test.ts +5 -4
- package/src/__tests__/credential-security-e2e.test.ts +8 -8
- package/src/__tests__/credential-security-invariants.test.ts +111 -7
- package/src/__tests__/credential-vault-unit.test.ts +287 -54
- package/src/__tests__/credential-vault.test.ts +406 -12
- package/src/__tests__/credentials-cli.test.ts +82 -6
- package/src/__tests__/dynamic-skill-workflow-prompt.test.ts +0 -1
- package/src/__tests__/ephemeral-permissions.test.ts +3 -3
- package/src/__tests__/gateway-only-enforcement.test.ts +4 -2
- package/src/__tests__/gateway-only-guard.test.ts +0 -1
- package/src/__tests__/gemini-image-service.test.ts +75 -45
- package/src/__tests__/gemini-provider.test.ts +9 -6
- package/src/__tests__/guardian-action-conversation-turn.test.ts +1 -33
- package/src/__tests__/guardian-action-copy-generator.test.ts +0 -20
- package/src/__tests__/guardian-action-followup-executor.test.ts +1 -28
- package/src/__tests__/guardian-action-followup-store.test.ts +1 -1
- package/src/__tests__/guardian-action-grant-mint-consume.test.ts +0 -1
- package/src/__tests__/guardian-decision-primitive-canonical.test.ts +0 -1
- package/src/__tests__/guardian-grant-minting.test.ts +35 -0
- package/src/__tests__/guardian-routing-invariants.test.ts +0 -1
- package/src/__tests__/guardian-verification-voice-binding.test.ts +0 -1
- package/src/__tests__/handlers-user-message-approval-consumption.test.ts +0 -39
- package/src/__tests__/heartbeat-service.test.ts +0 -1
- package/src/__tests__/host-cu-proxy.test.ts +629 -0
- package/src/__tests__/host-shell-tool.test.ts +27 -15
- package/src/__tests__/http-user-message-parity.test.ts +1 -0
- package/src/__tests__/ingress-url-consistency.test.ts +14 -21
- package/src/__tests__/integration-status.test.ts +38 -25
- package/src/__tests__/intent-routing.test.ts +0 -1
- package/src/__tests__/invite-routes-http.test.ts +10 -9
- package/src/__tests__/keychain-broker-client.test.ts +11 -43
- package/src/__tests__/managed-proxy-context.test.ts +5 -3
- package/src/__tests__/media-generate-image.test.ts +63 -2
- package/src/__tests__/media-reuse-story.e2e.test.ts +7 -3
- package/src/__tests__/messaging-send-tool.test.ts +4 -6
- package/src/__tests__/notification-routing-intent.test.ts +0 -1
- package/src/__tests__/oauth-cli.test.ts +373 -14
- package/src/__tests__/oauth-provider-profiles.test.ts +9 -9
- package/src/__tests__/oauth-scope-policy.test.ts +4 -6
- package/src/__tests__/oauth-store.test.ts +756 -0
- package/src/__tests__/onboarding-starter-tasks.test.ts +0 -1
- package/src/__tests__/provider-error-scenarios.test.ts +0 -1
- package/src/__tests__/provider-fail-open-selection.test.ts +3 -1
- package/src/__tests__/provider-managed-proxy-integration.test.ts +70 -6
- package/src/__tests__/provider-streaming.benchmark.test.ts +0 -1
- package/src/__tests__/public-ingress-urls.test.ts +15 -21
- package/src/__tests__/recording-handler.test.ts +3 -4
- package/src/__tests__/registry.test.ts +2 -2
- package/src/__tests__/runtime-events-sse.test.ts +55 -7
- package/src/__tests__/schedule-store.test.ts +0 -1
- package/src/__tests__/scheduler-recurrence.test.ts +0 -1
- package/src/__tests__/schema-transforms.test.ts +226 -0
- package/src/__tests__/scoped-approval-grants.test.ts +0 -1
- package/src/__tests__/scoped-grant-security-matrix.test.ts +0 -1
- package/src/__tests__/script-proxy-injection-runtime.test.ts +23 -13
- package/src/__tests__/script-proxy-policy-runtime.test.ts +1 -1
- package/src/__tests__/script-proxy-session-manager.test.ts +1 -1
- package/src/__tests__/secret-ingress-handler.test.ts +0 -1
- package/src/__tests__/secret-onetime-send.test.ts +5 -3
- package/src/__tests__/send-endpoint-busy.test.ts +21 -6
- package/src/__tests__/sequence-store.test.ts +0 -1
- package/src/__tests__/session-init.benchmark.test.ts +4 -5
- package/src/__tests__/session-messaging-secret-redirect.test.ts +5 -4
- package/src/__tests__/skill-include-graph.test.ts +66 -0
- package/src/__tests__/skill-load-feature-flag.test.ts +0 -1
- package/src/__tests__/skill-load-tool.test.ts +149 -1
- package/src/__tests__/skill-projection-feature-flag.test.ts +0 -1
- package/src/__tests__/skills-uninstall.test.ts +3 -3
- package/src/__tests__/skills.test.ts +3 -12
- package/src/__tests__/slack-channel-config.test.ts +76 -11
- package/src/__tests__/slack-share-routes.test.ts +17 -14
- package/src/__tests__/system-prompt.test.ts +0 -1
- package/src/__tests__/telegram-bot-username-resolution.test.ts +3 -0
- package/src/__tests__/telegram-invite-adapter.test.ts +18 -22
- package/src/__tests__/terminal-tools.test.ts +4 -3
- package/src/__tests__/test-support/computer-use-skill-harness.ts +3 -2
- package/src/__tests__/tool-approval-handler.test.ts +0 -1
- package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +0 -1
- package/src/__tests__/tool-executor-lifecycle-events.test.ts +0 -1
- package/src/__tests__/tool-executor-shell-integration.test.ts +0 -1
- package/src/__tests__/tool-executor.test.ts +0 -1
- package/src/__tests__/tool-grant-request-escalation.test.ts +0 -1
- package/src/__tests__/trust-store-pattern-matches.test.ts +29 -0
- package/src/__tests__/trust-store.test.ts +1 -22
- package/src/__tests__/trusted-contact-approval-notifier.test.ts +0 -1
- package/src/__tests__/trusted-contact-inline-approval-integration.test.ts +0 -1
- package/src/__tests__/twilio-config.test.ts +2 -1
- package/src/__tests__/twilio-provider.test.ts +4 -2
- package/src/__tests__/twilio-routes.test.ts +5 -20
- package/src/__tests__/verification-control-plane-policy.test.ts +0 -1
- package/src/__tests__/voice-scoped-grant-consumer.test.ts +0 -1
- package/src/agent/ax-tree-compaction.test.ts +235 -0
- package/src/agent/loop.ts +76 -130
- package/src/calls/call-domain.ts +8 -10
- package/src/calls/relay-server.ts +9 -13
- package/src/calls/twilio-config.ts +4 -8
- package/src/calls/twilio-provider.ts +2 -1
- package/src/calls/twilio-rest.ts +2 -1
- package/src/calls/twilio-routes.ts +1 -2
- package/src/calls/voice-ingress-preflight.ts +1 -1
- package/src/cli/commands/browser-relay.ts +46 -15
- package/src/cli/commands/completions.ts +0 -3
- package/src/cli/commands/credentials.ts +110 -23
- package/src/cli/commands/oauth/apps.ts +255 -0
- package/src/cli/commands/oauth/connections.ts +299 -0
- package/src/cli/commands/oauth/index.ts +52 -0
- package/src/cli/commands/oauth/providers.ts +242 -0
- package/src/cli/commands/skills.ts +4 -338
- package/src/cli/program.ts +1 -5
- package/src/cli/reference.ts +1 -3
- package/src/cli.ts +3 -2
- package/src/config/assistant-feature-flags.ts +0 -3
- package/src/config/bundled-skills/_shared/CLI_RETRIEVAL_PATTERN.md +1 -1
- package/src/config/bundled-skills/claude-code/TOOLS.json +0 -4
- package/src/config/bundled-skills/computer-use/SKILL.md +3 -6
- package/src/config/bundled-skills/computer-use/TOOLS.json +22 -4
- package/src/config/bundled-skills/contacts/tools/google-contacts.ts +29 -32
- package/src/config/bundled-skills/gmail/SKILL.md +4 -4
- package/src/config/bundled-skills/gmail/tools/gmail-archive.ts +54 -61
- package/src/config/bundled-skills/gmail/tools/gmail-attachments.ts +25 -28
- package/src/config/bundled-skills/gmail/tools/gmail-draft.ts +14 -17
- package/src/config/bundled-skills/gmail/tools/gmail-filters.ts +39 -44
- package/src/config/bundled-skills/gmail/tools/gmail-follow-up.ts +61 -58
- package/src/config/bundled-skills/gmail/tools/gmail-forward.ts +50 -49
- package/src/config/bundled-skills/gmail/tools/gmail-label.ts +11 -13
- package/src/config/bundled-skills/gmail/tools/gmail-outreach-scan.ts +148 -146
- package/src/config/bundled-skills/gmail/tools/gmail-send-draft.ts +4 -7
- package/src/config/bundled-skills/gmail/tools/gmail-sender-digest.ts +175 -173
- package/src/config/bundled-skills/gmail/tools/gmail-trash.ts +4 -7
- package/src/config/bundled-skills/gmail/tools/gmail-unsubscribe.ts +71 -76
- package/src/config/bundled-skills/gmail/tools/gmail-vacation.ts +32 -38
- package/src/config/bundled-skills/google-calendar/SKILL.md +2 -2
- package/src/config/bundled-skills/google-calendar/calendar-client.ts +90 -44
- package/src/config/bundled-skills/google-calendar/tools/calendar-check-availability.ts +9 -10
- package/src/config/bundled-skills/google-calendar/tools/calendar-create-event.ts +5 -6
- package/src/config/bundled-skills/google-calendar/tools/calendar-get-event.ts +4 -5
- package/src/config/bundled-skills/google-calendar/tools/calendar-list-events.ts +14 -15
- package/src/config/bundled-skills/google-calendar/tools/calendar-rsvp.ts +37 -37
- package/src/config/bundled-skills/google-calendar/tools/shared.ts +4 -9
- package/src/config/bundled-skills/image-studio/tools/media-generate-image.ts +24 -3
- package/src/config/bundled-skills/messaging/SKILL.md +6 -6
- package/src/config/bundled-skills/messaging/tools/messaging-analyze-style.ts +62 -63
- package/src/config/bundled-skills/messaging/tools/messaging-archive-by-sender.ts +15 -16
- package/src/config/bundled-skills/messaging/tools/messaging-auth-test.ts +4 -5
- package/src/config/bundled-skills/messaging/tools/messaging-list-conversations.ts +6 -7
- package/src/config/bundled-skills/messaging/tools/messaging-mark-read.ts +4 -5
- package/src/config/bundled-skills/messaging/tools/messaging-read.ts +14 -15
- package/src/config/bundled-skills/messaging/tools/messaging-search.ts +4 -5
- package/src/config/bundled-skills/messaging/tools/messaging-send.ts +128 -128
- package/src/config/bundled-skills/messaging/tools/messaging-sender-digest.ts +33 -34
- package/src/config/bundled-skills/messaging/tools/shared.ts +12 -15
- package/src/config/bundled-skills/settings/SKILL.md +1 -1
- package/src/config/bundled-skills/settings/TOOLS.json +2 -8
- package/src/config/bundled-skills/settings/tools/voice-config-update.ts +5 -33
- package/src/config/bundled-skills/slack/tools/shared.ts +4 -10
- package/src/config/bundled-skills/slack/tools/slack-add-reaction.ts +4 -5
- package/src/config/bundled-skills/slack/tools/slack-channel-details.ts +15 -16
- package/src/config/bundled-skills/slack/tools/slack-delete-message.ts +4 -5
- package/src/config/bundled-skills/slack/tools/slack-edit-message.ts +4 -5
- package/src/config/bundled-skills/slack/tools/slack-leave-channel.ts +4 -5
- package/src/config/bundled-skills/slack/tools/slack-scan-digest.ts +95 -92
- package/src/config/env-registry.ts +14 -83
- package/src/config/env.ts +11 -50
- package/src/config/feature-flag-registry.json +16 -16
- package/src/config/schema.ts +3 -1
- package/src/config/skills.ts +21 -2
- package/src/context/image-dimensions.ts +229 -0
- package/src/context/token-estimator.ts +75 -12
- package/src/context/window-manager.ts +49 -10
- package/src/daemon/assistant-attachments.ts +1 -13
- package/src/daemon/guardian-action-generators.ts +4 -5
- package/src/daemon/handlers/config-ingress.ts +8 -33
- package/src/daemon/handlers/config-slack-channel.ts +76 -56
- package/src/daemon/handlers/config-telegram.ts +53 -24
- package/src/daemon/handlers/sessions.ts +10 -24
- package/src/daemon/handlers/shared.ts +0 -130
- package/src/daemon/host-cu-proxy.ts +401 -0
- package/src/daemon/lifecycle.ts +39 -63
- package/src/daemon/message-protocol.ts +3 -0
- package/src/daemon/message-types/computer-use.ts +2 -119
- package/src/daemon/message-types/host-cu.ts +19 -0
- package/src/daemon/message-types/integrations.ts +1 -0
- package/src/daemon/message-types/messages.ts +3 -0
- package/src/daemon/server.ts +14 -21
- package/src/daemon/session-agent-loop-handlers.ts +2 -0
- package/src/daemon/session-attachments.ts +1 -2
- package/src/daemon/session-messaging.ts +3 -1
- package/src/daemon/session-slash.ts +1 -1
- package/src/daemon/session-surfaces.ts +40 -28
- package/src/daemon/session-tool-setup.ts +20 -11
- package/src/daemon/session.ts +139 -16
- package/src/daemon/tool-side-effects.ts +2 -8
- package/src/daemon/watch-handler.ts +2 -2
- package/src/email/providers/index.ts +2 -1
- package/src/events/tool-metrics-listener.ts +2 -2
- package/src/hooks/manager.ts +1 -4
- package/src/inbound/public-ingress-urls.ts +7 -7
- package/src/instrument.ts +15 -1
- package/src/logfire.ts +16 -5
- package/src/media/app-icon-generator.ts +30 -4
- package/src/media/avatar-router.ts +26 -3
- package/src/media/gemini-image-service.ts +28 -2
- package/src/memory/conversation-key-store.ts +21 -0
- package/src/memory/db-init.ts +4 -0
- package/src/memory/guardian-action-store.ts +1 -1
- package/src/memory/migrations/149-oauth-tables.ts +60 -0
- package/src/memory/migrations/index.ts +1 -0
- package/src/memory/schema/guardian.ts +1 -1
- package/src/memory/schema/index.ts +1 -0
- package/src/memory/schema/oauth.ts +65 -0
- package/src/messaging/provider.ts +19 -13
- package/src/messaging/providers/gmail/adapter.ts +40 -23
- package/src/messaging/providers/gmail/client.ts +283 -122
- package/src/messaging/providers/gmail/people-client.ts +32 -24
- package/src/messaging/providers/slack/adapter.ts +29 -19
- package/src/messaging/providers/slack/client.ts +265 -78
- package/src/messaging/providers/telegram-bot/adapter.ts +19 -18
- package/src/messaging/providers/whatsapp/adapter.ts +17 -11
- package/src/messaging/registry.ts +2 -31
- package/src/notifications/copy-composer.ts +0 -5
- package/src/notifications/signal.ts +4 -5
- package/src/oauth/byo-connection.test.ts +537 -0
- package/src/oauth/byo-connection.ts +128 -0
- package/src/oauth/connect-orchestrator.ts +139 -56
- package/src/oauth/connect-types.ts +17 -23
- package/src/oauth/connection-resolver.ts +58 -0
- package/src/oauth/connection.ts +38 -0
- package/src/oauth/manual-token-connection.ts +104 -0
- package/src/oauth/oauth-store.ts +496 -0
- package/src/oauth/platform-connection.test.ts +192 -0
- package/src/oauth/platform-connection.ts +111 -0
- package/src/oauth/provider-behaviors.ts +124 -0
- package/src/oauth/scope-policy.ts +9 -2
- package/src/oauth/seed-providers.ts +161 -0
- package/src/oauth/token-persistence.ts +74 -78
- package/src/permissions/checker.ts +8 -4
- package/src/permissions/defaults.ts +0 -1
- package/src/permissions/prompter.ts +10 -1
- package/src/permissions/trust-store.ts +13 -0
- package/src/prompts/__tests__/build-cli-reference-section.test.ts +3 -1
- package/src/prompts/system-prompt.ts +70 -45
- package/src/providers/anthropic/client.ts +133 -24
- package/src/providers/gemini/client.ts +15 -6
- package/src/providers/managed-proxy/constants.ts +2 -2
- package/src/providers/managed-proxy/context.ts +5 -1
- package/src/providers/ratelimit.ts +17 -0
- package/src/providers/registry.ts +2 -2
- package/src/providers/retry.ts +1 -27
- package/src/runtime/AGENTS.md +17 -0
- package/src/runtime/auth/route-policy.ts +0 -3
- package/src/runtime/channel-invite-transports/telegram.ts +2 -1
- package/src/runtime/channel-readiness-service.ts +168 -195
- package/src/runtime/channel-readiness-types.ts +4 -0
- package/src/runtime/channel-reply-delivery.ts +0 -40
- package/src/runtime/gateway-client.ts +0 -7
- package/src/runtime/guardian-action-conversation-turn.ts +1 -3
- package/src/runtime/guardian-action-followup-executor.ts +1 -1
- package/src/runtime/guardian-action-message-composer.ts +3 -23
- package/src/runtime/http-server.ts +17 -10
- package/src/runtime/http-types.ts +2 -3
- package/src/runtime/middleware/rate-limiter.ts +74 -20
- package/src/runtime/middleware/twilio-validation.ts +1 -11
- package/src/runtime/pending-interactions.ts +14 -12
- package/src/runtime/routes/channel-delivery-routes.ts +0 -1
- package/src/runtime/routes/channel-readiness-routes.ts +2 -0
- package/src/runtime/routes/conversation-routes.ts +73 -19
- package/src/runtime/routes/diagnostics-routes.ts +11 -9
- package/src/runtime/routes/events-routes.ts +21 -11
- package/src/runtime/routes/guardian-approval-interception.ts +20 -5
- package/src/runtime/routes/host-cu-routes.ts +97 -0
- package/src/runtime/routes/inbound-stages/background-dispatch.ts +12 -111
- package/src/runtime/routes/integrations/slack/share.ts +6 -6
- package/src/runtime/routes/integrations/twilio.ts +6 -5
- package/src/runtime/routes/log-export-routes.ts +126 -8
- package/src/runtime/routes/secret-routes.ts +3 -2
- package/src/runtime/routes/settings-routes.ts +113 -48
- package/src/runtime/routes/surface-action-routes.ts +1 -1
- package/src/runtime/routes/watch-routes.ts +128 -0
- package/src/schedule/integration-status.ts +10 -8
- package/src/security/credential-key.ts +14 -0
- package/src/security/keychain-broker-client.ts +5 -6
- package/src/security/oauth2.ts +1 -1
- package/src/security/token-manager.ts +145 -43
- package/src/skills/catalog-install.ts +358 -0
- package/src/skills/include-graph.ts +32 -0
- package/src/telegram/bot-username.ts +2 -3
- package/src/tools/apps/definitions.ts +0 -5
- package/src/tools/assets/materialize.ts +0 -5
- package/src/tools/assets/search.ts +0 -5
- package/src/tools/browser/headless-browser.ts +1 -67
- package/src/tools/browser/network-recorder.ts +1 -1
- package/src/tools/browser/network-recording-types.ts +1 -1
- package/src/tools/claude-code/claude-code.ts +0 -5
- package/src/tools/computer-use/definitions.ts +46 -11
- package/src/tools/computer-use/registry.ts +4 -5
- package/src/tools/credentials/broker.ts +5 -4
- package/src/tools/credentials/metadata-store.ts +22 -74
- package/src/tools/credentials/resolve.ts +2 -1
- package/src/tools/credentials/vault.ts +139 -151
- package/src/tools/filesystem/edit.ts +1 -6
- package/src/tools/filesystem/read.ts +0 -5
- package/src/tools/filesystem/write.ts +1 -6
- package/src/tools/host-filesystem/edit.ts +1 -6
- package/src/tools/host-filesystem/read.ts +1 -6
- package/src/tools/host-filesystem/write.ts +1 -6
- package/src/tools/mcp/mcp-tool-factory.ts +18 -1
- package/src/tools/memory/definitions.ts +0 -5
- package/src/tools/network/web-fetch.ts +0 -5
- package/src/tools/network/web-search.ts +0 -5
- package/src/tools/registry.ts +2 -7
- package/src/tools/schema-transforms.ts +99 -0
- package/src/tools/skills/load.ts +62 -8
- package/src/tools/swarm/delegate.ts +0 -5
- package/src/tools/system/avatar-generator.ts +0 -5
- package/src/tools/ui-surface/definitions.ts +0 -15
- package/src/tools/watch/screen-watch.ts +0 -5
- package/src/tools/watch/watch-state.ts +0 -12
- package/src/util/logger.ts +7 -41
- package/src/util/platform.ts +9 -28
- package/src/version.ts +10 -0
- package/src/watcher/providers/github.ts +51 -52
- package/src/watcher/providers/gmail.ts +88 -80
- package/src/watcher/providers/google-calendar.ts +94 -86
- package/src/watcher/providers/linear.ts +87 -93
- package/src/__tests__/computer-use-session-compaction.test.ts +0 -143
- package/src/__tests__/computer-use-session-lifecycle.test.ts +0 -322
- package/src/__tests__/computer-use-session-working-dir.test.ts +0 -166
- package/src/__tests__/computer-use-skill-baseline.test.ts +0 -78
- package/src/__tests__/computer-use-skill-endstate.test.ts +0 -105
- package/src/__tests__/computer-use-skill-lifecycle-cleanup.test.ts +0 -249
- package/src/__tests__/ride-shotgun-handler.test.ts +0 -452
- package/src/cli/commands/dev.ts +0 -129
- package/src/cli/commands/map.ts +0 -391
- package/src/cli/commands/oauth.ts +0 -77
- package/src/config/bundled-skills/computer-use/tools/computer-use-request-control.ts +0 -16
- package/src/daemon/computer-use-session.ts +0 -1020
- package/src/daemon/ride-shotgun-handler.ts +0 -567
- package/src/oauth/provider-profiles.ts +0 -192
- package/src/prompts/computer-use-prompt.ts +0 -98
- package/src/runtime/routes/computer-use-routes.ts +0 -641
- package/src/runtime/telegram-streaming-delivery.test.ts +0 -597
- package/src/runtime/telegram-streaming-delivery.ts +0 -383
- package/src/tools/computer-use/request-computer-control.ts +0 -61
|
@@ -1,322 +0,0 @@
|
|
|
1
|
-
import { describe, expect, mock, test } from "bun:test";
|
|
2
|
-
|
|
3
|
-
// Mock config before importing modules that depend on it.
|
|
4
|
-
// The permissions mode must be 'workspace' so computer-use tools
|
|
5
|
-
// go through normal workspace trust evaluation instead of prompting.
|
|
6
|
-
mock.module("../config/loader.js", () => ({
|
|
7
|
-
getConfig: () => ({
|
|
8
|
-
ui: {},
|
|
9
|
-
|
|
10
|
-
provider: "mock-provider",
|
|
11
|
-
permissions: { mode: "workspace" },
|
|
12
|
-
apiKeys: {},
|
|
13
|
-
sandbox: { enabled: false },
|
|
14
|
-
timeouts: { toolExecutionTimeoutSec: 30, permissionTimeoutSec: 5 },
|
|
15
|
-
skills: { load: { extraDirs: [] } },
|
|
16
|
-
secretDetection: { enabled: false },
|
|
17
|
-
contextWindow: {
|
|
18
|
-
enabled: true,
|
|
19
|
-
maxInputTokens: 180000,
|
|
20
|
-
targetBudgetRatio: 0.3,
|
|
21
|
-
compactThreshold: 0.8,
|
|
22
|
-
summaryBudgetRatio: 0.05,
|
|
23
|
-
},
|
|
24
|
-
}),
|
|
25
|
-
invalidateConfigCache: () => {},
|
|
26
|
-
}));
|
|
27
|
-
|
|
28
|
-
import { ComputerUseSession } from "../daemon/computer-use-session.js";
|
|
29
|
-
import type {
|
|
30
|
-
CuObservation,
|
|
31
|
-
ServerMessage,
|
|
32
|
-
} from "../daemon/message-protocol.js";
|
|
33
|
-
import type { Provider, ProviderResponse } from "../providers/types.js";
|
|
34
|
-
|
|
35
|
-
function createProvider(responses: ProviderResponse[]): {
|
|
36
|
-
provider: Provider;
|
|
37
|
-
getCalls: () => number;
|
|
38
|
-
} {
|
|
39
|
-
let calls = 0;
|
|
40
|
-
const provider: Provider = {
|
|
41
|
-
name: "mock",
|
|
42
|
-
async sendMessage() {
|
|
43
|
-
const response = responses[calls] ?? responses[responses.length - 1];
|
|
44
|
-
calls++;
|
|
45
|
-
return response;
|
|
46
|
-
},
|
|
47
|
-
};
|
|
48
|
-
return { provider, getCalls: () => calls };
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
describe("ComputerUseSession lifecycle", () => {
|
|
52
|
-
test("stops provider loop immediately after terminal computer_use_done tool", async () => {
|
|
53
|
-
const { provider, getCalls } = createProvider([
|
|
54
|
-
{
|
|
55
|
-
content: [
|
|
56
|
-
{
|
|
57
|
-
type: "tool_use",
|
|
58
|
-
id: "tu-1",
|
|
59
|
-
name: "computer_use_done",
|
|
60
|
-
input: { summary: "Task finished" },
|
|
61
|
-
},
|
|
62
|
-
],
|
|
63
|
-
model: "mock-model",
|
|
64
|
-
usage: { inputTokens: 10, outputTokens: 5 },
|
|
65
|
-
stopReason: "tool_use",
|
|
66
|
-
},
|
|
67
|
-
{
|
|
68
|
-
content: [{ type: "text", text: "This should never be requested" }],
|
|
69
|
-
model: "mock-model",
|
|
70
|
-
usage: { inputTokens: 10, outputTokens: 5 },
|
|
71
|
-
stopReason: "end_turn",
|
|
72
|
-
},
|
|
73
|
-
]);
|
|
74
|
-
|
|
75
|
-
const sentMessages: ServerMessage[] = [];
|
|
76
|
-
let terminalCalls = 0;
|
|
77
|
-
|
|
78
|
-
const session = new ComputerUseSession(
|
|
79
|
-
"cu-test-1",
|
|
80
|
-
"test task",
|
|
81
|
-
1440,
|
|
82
|
-
900,
|
|
83
|
-
provider,
|
|
84
|
-
(msg) => {
|
|
85
|
-
sentMessages.push(msg);
|
|
86
|
-
},
|
|
87
|
-
"computer_use",
|
|
88
|
-
() => {
|
|
89
|
-
terminalCalls++;
|
|
90
|
-
},
|
|
91
|
-
);
|
|
92
|
-
|
|
93
|
-
const observation: CuObservation = {
|
|
94
|
-
type: "cu_observation",
|
|
95
|
-
sessionId: "cu-test-1",
|
|
96
|
-
axTree: 'Window "Test" [1]',
|
|
97
|
-
};
|
|
98
|
-
|
|
99
|
-
await session.handleObservation(observation);
|
|
100
|
-
|
|
101
|
-
// If computer_use_done does not abort the loop, we'd see an extra provider call.
|
|
102
|
-
expect(getCalls()).toBe(1);
|
|
103
|
-
expect(session.getState()).toBe("complete");
|
|
104
|
-
expect(terminalCalls).toBe(1);
|
|
105
|
-
|
|
106
|
-
const completes = sentMessages.filter(
|
|
107
|
-
(msg): msg is Extract<ServerMessage, { type: "cu_complete" }> =>
|
|
108
|
-
msg.type === "cu_complete",
|
|
109
|
-
);
|
|
110
|
-
expect(completes).toHaveLength(1);
|
|
111
|
-
expect(completes[0].summary).toBe("Task finished");
|
|
112
|
-
});
|
|
113
|
-
|
|
114
|
-
test("notifies terminal callback only once on repeated abort calls", () => {
|
|
115
|
-
const { provider } = createProvider([
|
|
116
|
-
{
|
|
117
|
-
content: [{ type: "text", text: "unused" }],
|
|
118
|
-
model: "mock-model",
|
|
119
|
-
usage: { inputTokens: 1, outputTokens: 1 },
|
|
120
|
-
stopReason: "end_turn",
|
|
121
|
-
},
|
|
122
|
-
]);
|
|
123
|
-
|
|
124
|
-
let terminalCalls = 0;
|
|
125
|
-
const session = new ComputerUseSession(
|
|
126
|
-
"cu-test-2",
|
|
127
|
-
"test task",
|
|
128
|
-
1440,
|
|
129
|
-
900,
|
|
130
|
-
provider,
|
|
131
|
-
() => {},
|
|
132
|
-
"computer_use",
|
|
133
|
-
() => {
|
|
134
|
-
terminalCalls++;
|
|
135
|
-
},
|
|
136
|
-
);
|
|
137
|
-
|
|
138
|
-
session.abort();
|
|
139
|
-
session.abort();
|
|
140
|
-
|
|
141
|
-
expect(terminalCalls).toBe(1);
|
|
142
|
-
expect(session.getState()).toBe("error");
|
|
143
|
-
});
|
|
144
|
-
|
|
145
|
-
test("CU session passes exactly 10 computer_use_* tools to the agent loop", async () => {
|
|
146
|
-
let capturedTools: string[] = [];
|
|
147
|
-
const provider: Provider = {
|
|
148
|
-
name: "mock",
|
|
149
|
-
async sendMessage(_msgs, tools) {
|
|
150
|
-
capturedTools = (tools ?? []).map((t) => t.name);
|
|
151
|
-
return {
|
|
152
|
-
content: [
|
|
153
|
-
{
|
|
154
|
-
type: "tool_use",
|
|
155
|
-
id: "tu-capture",
|
|
156
|
-
name: "computer_use_done",
|
|
157
|
-
input: { summary: "Done" },
|
|
158
|
-
},
|
|
159
|
-
],
|
|
160
|
-
model: "mock-model",
|
|
161
|
-
usage: { inputTokens: 10, outputTokens: 5 },
|
|
162
|
-
stopReason: "tool_use",
|
|
163
|
-
};
|
|
164
|
-
},
|
|
165
|
-
};
|
|
166
|
-
|
|
167
|
-
const session = new ComputerUseSession(
|
|
168
|
-
"cu-tool-capture",
|
|
169
|
-
"capture tools",
|
|
170
|
-
1440,
|
|
171
|
-
900,
|
|
172
|
-
provider,
|
|
173
|
-
() => {},
|
|
174
|
-
"computer_use",
|
|
175
|
-
);
|
|
176
|
-
|
|
177
|
-
await session.handleObservation({
|
|
178
|
-
type: "cu_observation",
|
|
179
|
-
sessionId: "cu-tool-capture",
|
|
180
|
-
axTree: 'Window "Test" [1]',
|
|
181
|
-
});
|
|
182
|
-
|
|
183
|
-
const cuTools = capturedTools.filter((n) => n.startsWith("computer_use_"));
|
|
184
|
-
expect(cuTools).toHaveLength(10);
|
|
185
|
-
|
|
186
|
-
// Assert exact set of expected CU tool names
|
|
187
|
-
const expectedCuTools = [
|
|
188
|
-
"computer_use_click",
|
|
189
|
-
"computer_use_type_text",
|
|
190
|
-
"computer_use_key",
|
|
191
|
-
"computer_use_scroll",
|
|
192
|
-
"computer_use_drag",
|
|
193
|
-
"computer_use_wait",
|
|
194
|
-
"computer_use_open_app",
|
|
195
|
-
"computer_use_run_applescript",
|
|
196
|
-
"computer_use_done",
|
|
197
|
-
"computer_use_respond",
|
|
198
|
-
];
|
|
199
|
-
for (const name of expectedCuTools) {
|
|
200
|
-
expect(cuTools).toContain(name);
|
|
201
|
-
}
|
|
202
|
-
});
|
|
203
|
-
|
|
204
|
-
test("computer_use_respond is a terminal tool that completes the session", async () => {
|
|
205
|
-
const { provider } = createProvider([
|
|
206
|
-
{
|
|
207
|
-
content: [
|
|
208
|
-
{
|
|
209
|
-
type: "tool_use",
|
|
210
|
-
id: "tu-respond",
|
|
211
|
-
name: "computer_use_respond",
|
|
212
|
-
input: {
|
|
213
|
-
answer: "The meeting is at 3pm",
|
|
214
|
-
reasoning: "Found in calendar",
|
|
215
|
-
},
|
|
216
|
-
},
|
|
217
|
-
],
|
|
218
|
-
model: "mock-model",
|
|
219
|
-
usage: { inputTokens: 10, outputTokens: 5 },
|
|
220
|
-
stopReason: "tool_use",
|
|
221
|
-
},
|
|
222
|
-
]);
|
|
223
|
-
|
|
224
|
-
const sentMessages: ServerMessage[] = [];
|
|
225
|
-
const session = new ComputerUseSession(
|
|
226
|
-
"cu-respond-test",
|
|
227
|
-
"check my schedule",
|
|
228
|
-
1440,
|
|
229
|
-
900,
|
|
230
|
-
provider,
|
|
231
|
-
(msg) => {
|
|
232
|
-
sentMessages.push(msg);
|
|
233
|
-
},
|
|
234
|
-
"computer_use",
|
|
235
|
-
);
|
|
236
|
-
|
|
237
|
-
await session.handleObservation({
|
|
238
|
-
type: "cu_observation",
|
|
239
|
-
sessionId: "cu-respond-test",
|
|
240
|
-
axTree: 'Window "Calendar" [1]',
|
|
241
|
-
});
|
|
242
|
-
|
|
243
|
-
expect(session.getState()).toBe("complete");
|
|
244
|
-
const completes = sentMessages.filter(
|
|
245
|
-
(msg): msg is Extract<ServerMessage, { type: "cu_complete" }> =>
|
|
246
|
-
msg.type === "cu_complete",
|
|
247
|
-
);
|
|
248
|
-
expect(completes).toHaveLength(1);
|
|
249
|
-
expect(completes[0].summary).toBe("The meeting is at 3pm");
|
|
250
|
-
expect(completes[0].isResponse).toBe(true);
|
|
251
|
-
});
|
|
252
|
-
|
|
253
|
-
test("default construction preactivates computer-use skill and provides 10 CU tools", async () => {
|
|
254
|
-
let capturedTools: string[] = [];
|
|
255
|
-
const provider: Provider = {
|
|
256
|
-
name: "mock",
|
|
257
|
-
async sendMessage(_msgs, tools) {
|
|
258
|
-
capturedTools = (tools ?? []).map((t) => t.name);
|
|
259
|
-
return {
|
|
260
|
-
content: [
|
|
261
|
-
{
|
|
262
|
-
type: "tool_use",
|
|
263
|
-
id: "tu-default",
|
|
264
|
-
name: "computer_use_done",
|
|
265
|
-
input: { summary: "Done" },
|
|
266
|
-
},
|
|
267
|
-
],
|
|
268
|
-
model: "mock-model",
|
|
269
|
-
usage: { inputTokens: 10, outputTokens: 5 },
|
|
270
|
-
stopReason: "tool_use",
|
|
271
|
-
};
|
|
272
|
-
},
|
|
273
|
-
};
|
|
274
|
-
|
|
275
|
-
// No preactivatedSkillIds passed — defaults to ['computer-use'] via skill projection
|
|
276
|
-
const session = new ComputerUseSession(
|
|
277
|
-
"cu-default-projection",
|
|
278
|
-
"test default projection",
|
|
279
|
-
1440,
|
|
280
|
-
900,
|
|
281
|
-
provider,
|
|
282
|
-
() => {},
|
|
283
|
-
"computer_use",
|
|
284
|
-
undefined,
|
|
285
|
-
);
|
|
286
|
-
|
|
287
|
-
await session.handleObservation({
|
|
288
|
-
type: "cu_observation",
|
|
289
|
-
sessionId: "cu-default-projection",
|
|
290
|
-
axTree: 'Window "Test" [1]',
|
|
291
|
-
});
|
|
292
|
-
|
|
293
|
-
const cuTools = capturedTools.filter((n) => n.startsWith("computer_use_"));
|
|
294
|
-
expect(cuTools).toHaveLength(10);
|
|
295
|
-
});
|
|
296
|
-
|
|
297
|
-
test("constructor accepts preactivatedSkillIds parameter", () => {
|
|
298
|
-
const { provider } = createProvider([
|
|
299
|
-
{
|
|
300
|
-
content: [{ type: "text", text: "unused" }],
|
|
301
|
-
model: "mock-model",
|
|
302
|
-
usage: { inputTokens: 1, outputTokens: 1 },
|
|
303
|
-
stopReason: "end_turn",
|
|
304
|
-
},
|
|
305
|
-
]);
|
|
306
|
-
|
|
307
|
-
// Should not throw
|
|
308
|
-
const session = new ComputerUseSession(
|
|
309
|
-
"cu-preactivated",
|
|
310
|
-
"test preactivated",
|
|
311
|
-
1440,
|
|
312
|
-
900,
|
|
313
|
-
provider,
|
|
314
|
-
() => {},
|
|
315
|
-
"computer_use",
|
|
316
|
-
undefined,
|
|
317
|
-
["computer-use"],
|
|
318
|
-
);
|
|
319
|
-
|
|
320
|
-
expect(session).toBeDefined();
|
|
321
|
-
});
|
|
322
|
-
});
|
|
@@ -1,166 +0,0 @@
|
|
|
1
|
-
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
|
2
|
-
|
|
3
|
-
import type { CuObservation } from "../daemon/message-protocol.js";
|
|
4
|
-
import type { Provider } from "../providers/types.js";
|
|
5
|
-
|
|
6
|
-
let capturedWorkingDir: string | undefined;
|
|
7
|
-
|
|
8
|
-
const noopLogger = new Proxy({} as Record<string, unknown>, {
|
|
9
|
-
get: (_target, prop) => (prop === "child" ? () => noopLogger : () => {}),
|
|
10
|
-
});
|
|
11
|
-
|
|
12
|
-
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
13
|
-
const realLogger = require("../util/logger.js");
|
|
14
|
-
mock.module("../util/logger.js", () => ({
|
|
15
|
-
...realLogger,
|
|
16
|
-
getLogger: () => noopLogger,
|
|
17
|
-
getCliLogger: () => noopLogger,
|
|
18
|
-
isDebug: () => false,
|
|
19
|
-
truncateForLog: (value: string, maxLen = 500) =>
|
|
20
|
-
value.length > maxLen ? value.slice(0, maxLen) + "..." : value,
|
|
21
|
-
initLogger: () => {},
|
|
22
|
-
pruneOldLogFiles: () => 0,
|
|
23
|
-
}));
|
|
24
|
-
|
|
25
|
-
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
26
|
-
const realPlatform = require("../util/platform.js");
|
|
27
|
-
mock.module("../util/platform.js", () => ({
|
|
28
|
-
...realPlatform,
|
|
29
|
-
getRootDir: () => "/tmp",
|
|
30
|
-
getDataDir: () => "/tmp/data",
|
|
31
|
-
|
|
32
|
-
getSandboxRootDir: () => "/tmp/sandbox",
|
|
33
|
-
getSandboxWorkingDir: () => "/tmp/workspace",
|
|
34
|
-
getInterfacesDir: () => "/tmp/interfaces",
|
|
35
|
-
getWorkspaceDir: () => "/tmp/workspace",
|
|
36
|
-
getWorkspaceConfigPath: () => "/tmp/workspace/config.json",
|
|
37
|
-
getWorkspaceSkillsDir: () => "/tmp/workspace/skills",
|
|
38
|
-
getWorkspaceHooksDir: () => "/tmp/workspace/hooks",
|
|
39
|
-
getWorkspacePromptPath: (file: string) => `/tmp/workspace/${file}`,
|
|
40
|
-
getPlatformName: () => "linux",
|
|
41
|
-
getClipboardCommand: () => null,
|
|
42
|
-
getPidPath: () => "/tmp/test.pid",
|
|
43
|
-
getDbPath: () => "/tmp/data/db/assistant.db",
|
|
44
|
-
getLogPath: () => "/tmp/test.log",
|
|
45
|
-
getHistoryPath: () => "/tmp/data/history",
|
|
46
|
-
getHooksDir: () => "/tmp/hooks",
|
|
47
|
-
readSessionToken: () => null,
|
|
48
|
-
ensureDataDir: () => {},
|
|
49
|
-
isMacOS: () => false,
|
|
50
|
-
isLinux: () => true,
|
|
51
|
-
isWindows: () => false,
|
|
52
|
-
normalizeAssistantId: (id: string) => id,
|
|
53
|
-
readLockfile: () => null,
|
|
54
|
-
writeLockfile: () => {},
|
|
55
|
-
}));
|
|
56
|
-
|
|
57
|
-
mock.module("../config/loader.js", () => ({
|
|
58
|
-
getConfig: () => ({
|
|
59
|
-
ui: {},
|
|
60
|
-
daemon: { standaloneRecording: false },
|
|
61
|
-
provider: "mock-provider",
|
|
62
|
-
model: "mock-model",
|
|
63
|
-
permissions: { mode: "workspace" },
|
|
64
|
-
apiKeys: {},
|
|
65
|
-
sandbox: { enabled: false, backend: "native" },
|
|
66
|
-
timeouts: { toolExecutionTimeoutSec: 30, permissionTimeoutSec: 5 },
|
|
67
|
-
skills: { load: { extraDirs: [] } },
|
|
68
|
-
secretDetection: {
|
|
69
|
-
enabled: false,
|
|
70
|
-
allowOneTimeSend: false,
|
|
71
|
-
customPatterns: [],
|
|
72
|
-
entropyThreshold: 3.5,
|
|
73
|
-
},
|
|
74
|
-
contextWindow: {
|
|
75
|
-
enabled: true,
|
|
76
|
-
maxInputTokens: 180000,
|
|
77
|
-
targetBudgetRatio: 0.30,
|
|
78
|
-
compactThreshold: 0.8, summaryBudgetRatio: 0.05,
|
|
79
|
-
},
|
|
80
|
-
assistantFeatureFlagValues: {},
|
|
81
|
-
}),
|
|
82
|
-
loadConfig: () => ({}),
|
|
83
|
-
loadRawConfig: () => ({}),
|
|
84
|
-
saveConfig: () => {},
|
|
85
|
-
saveRawConfig: () => {},
|
|
86
|
-
invalidateConfigCache: () => {},
|
|
87
|
-
applyNestedDefaults: (config: unknown) => config,
|
|
88
|
-
getNestedValue: () => undefined,
|
|
89
|
-
setNestedValue: () => {},
|
|
90
|
-
syncConfigToLockfile: () => {},
|
|
91
|
-
API_KEY_PROVIDERS: [],
|
|
92
|
-
}));
|
|
93
|
-
|
|
94
|
-
const { ToolExecutor } = await import("../tools/executor.js");
|
|
95
|
-
const { ComputerUseSession } =
|
|
96
|
-
await import("../daemon/computer-use-session.js");
|
|
97
|
-
|
|
98
|
-
const originalExecute = ToolExecutor.prototype.execute;
|
|
99
|
-
|
|
100
|
-
describe("ComputerUseSession working directory", () => {
|
|
101
|
-
beforeEach(() => {
|
|
102
|
-
capturedWorkingDir = undefined;
|
|
103
|
-
ToolExecutor.prototype.execute = async function (
|
|
104
|
-
_name: string,
|
|
105
|
-
_input: Record<string, unknown>,
|
|
106
|
-
context: { workingDir: string },
|
|
107
|
-
) {
|
|
108
|
-
capturedWorkingDir = context.workingDir;
|
|
109
|
-
return { content: "ok", isError: false };
|
|
110
|
-
} as typeof ToolExecutor.prototype.execute;
|
|
111
|
-
});
|
|
112
|
-
|
|
113
|
-
afterEach(() => {
|
|
114
|
-
ToolExecutor.prototype.execute = originalExecute;
|
|
115
|
-
});
|
|
116
|
-
|
|
117
|
-
test("uses sandbox working directory for tool execution context", async () => {
|
|
118
|
-
let providerCalls = 0;
|
|
119
|
-
const provider: Provider = {
|
|
120
|
-
name: "mock-provider",
|
|
121
|
-
async sendMessage() {
|
|
122
|
-
const calls = providerCalls++;
|
|
123
|
-
if (calls === 0) {
|
|
124
|
-
return {
|
|
125
|
-
content: [
|
|
126
|
-
{
|
|
127
|
-
type: "tool_use",
|
|
128
|
-
id: "toolu_1",
|
|
129
|
-
name: "computer_use_click",
|
|
130
|
-
input: { element_id: 1 },
|
|
131
|
-
},
|
|
132
|
-
],
|
|
133
|
-
model: "mock-model",
|
|
134
|
-
usage: { inputTokens: 1, outputTokens: 1 },
|
|
135
|
-
stopReason: "tool_use",
|
|
136
|
-
};
|
|
137
|
-
}
|
|
138
|
-
return {
|
|
139
|
-
content: [{ type: "text", text: "unused" }],
|
|
140
|
-
model: "mock-model",
|
|
141
|
-
usage: { inputTokens: 1, outputTokens: 1 },
|
|
142
|
-
stopReason: "end_turn",
|
|
143
|
-
};
|
|
144
|
-
},
|
|
145
|
-
};
|
|
146
|
-
|
|
147
|
-
const session = new ComputerUseSession(
|
|
148
|
-
"cu-sandbox-1",
|
|
149
|
-
"test task",
|
|
150
|
-
1440,
|
|
151
|
-
900,
|
|
152
|
-
provider,
|
|
153
|
-
() => {},
|
|
154
|
-
);
|
|
155
|
-
|
|
156
|
-
const observation: CuObservation = {
|
|
157
|
-
type: "cu_observation",
|
|
158
|
-
sessionId: "cu-sandbox-1",
|
|
159
|
-
axTree: 'Window "Test" [1]',
|
|
160
|
-
};
|
|
161
|
-
|
|
162
|
-
await session.handleObservation(observation);
|
|
163
|
-
|
|
164
|
-
expect(capturedWorkingDir).toBe("/tmp/workspace");
|
|
165
|
-
});
|
|
166
|
-
});
|
|
@@ -1,78 +0,0 @@
|
|
|
1
|
-
import { afterAll, describe, expect, test } from "bun:test";
|
|
2
|
-
|
|
3
|
-
import { buildToolDefinitions } from "../daemon/session-tool-setup.js";
|
|
4
|
-
import {
|
|
5
|
-
__resetRegistryForTesting,
|
|
6
|
-
getAllToolDefinitions,
|
|
7
|
-
getAllTools,
|
|
8
|
-
getTool,
|
|
9
|
-
initializeTools,
|
|
10
|
-
} from "../tools/registry.js";
|
|
11
|
-
import {
|
|
12
|
-
assertComputerUseToolsAbsent,
|
|
13
|
-
COMPUTER_USE_TOOL_NAMES,
|
|
14
|
-
} from "./test-support/computer-use-skill-harness.js";
|
|
15
|
-
|
|
16
|
-
afterAll(() => {
|
|
17
|
-
__resetRegistryForTesting();
|
|
18
|
-
});
|
|
19
|
-
|
|
20
|
-
describe("computer-use skill baseline: registry tool surfaces", () => {
|
|
21
|
-
test("no computer_use_* action tools are registered after initializeTools() (migrated to skill)", async () => {
|
|
22
|
-
await initializeTools();
|
|
23
|
-
|
|
24
|
-
for (const name of COMPUTER_USE_TOOL_NAMES) {
|
|
25
|
-
const tool = getTool(name);
|
|
26
|
-
expect(tool).toBeUndefined();
|
|
27
|
-
}
|
|
28
|
-
});
|
|
29
|
-
|
|
30
|
-
test("computer_use_request_control is registered in core after initializeTools()", async () => {
|
|
31
|
-
await initializeTools();
|
|
32
|
-
|
|
33
|
-
const tool = getTool("computer_use_request_control");
|
|
34
|
-
expect(tool).toBeDefined();
|
|
35
|
-
});
|
|
36
|
-
|
|
37
|
-
test("getAllToolDefinitions() excludes all computer_use_* tools (proxy exclusion)", async () => {
|
|
38
|
-
await initializeTools();
|
|
39
|
-
|
|
40
|
-
const defNames = getAllToolDefinitions().map((d) => d.name);
|
|
41
|
-
assertComputerUseToolsAbsent(defNames);
|
|
42
|
-
});
|
|
43
|
-
|
|
44
|
-
test("getAllToolDefinitions() excludes computer_use_request_control (proxy exclusion)", async () => {
|
|
45
|
-
await initializeTools();
|
|
46
|
-
|
|
47
|
-
const defNames = getAllToolDefinitions().map((d) => d.name);
|
|
48
|
-
expect(defNames).not.toContain("computer_use_request_control");
|
|
49
|
-
});
|
|
50
|
-
|
|
51
|
-
test("buildToolDefinitions() includes computer_use_request_control for text sessions", async () => {
|
|
52
|
-
await initializeTools();
|
|
53
|
-
|
|
54
|
-
const defNames = buildToolDefinitions().map((d) => d.name);
|
|
55
|
-
expect(defNames).toContain("computer_use_request_control");
|
|
56
|
-
});
|
|
57
|
-
|
|
58
|
-
test("buildToolDefinitions() excludes all computer_use_* action tools from text sessions", async () => {
|
|
59
|
-
await initializeTools();
|
|
60
|
-
|
|
61
|
-
const defNames = buildToolDefinitions().map((d) => d.name);
|
|
62
|
-
// The only computer_use_* tool in text sessions is the escalation tool
|
|
63
|
-
const cuActionTools = defNames.filter(
|
|
64
|
-
(n) =>
|
|
65
|
-
n.startsWith("computer_use_") && n !== "computer_use_request_control",
|
|
66
|
-
);
|
|
67
|
-
expect(cuActionTools).toHaveLength(0);
|
|
68
|
-
});
|
|
69
|
-
|
|
70
|
-
test("post-cutover count: 1 computer_use_* tool in core registry (escalation only)", async () => {
|
|
71
|
-
await initializeTools();
|
|
72
|
-
|
|
73
|
-
const allTools = getAllTools();
|
|
74
|
-
const cuTools = allTools.filter((t) => t.name.startsWith("computer_use_"));
|
|
75
|
-
expect(cuTools).toHaveLength(1);
|
|
76
|
-
expect(cuTools[0].name).toBe("computer_use_request_control");
|
|
77
|
-
});
|
|
78
|
-
});
|
|
@@ -1,105 +0,0 @@
|
|
|
1
|
-
import { join } from "node:path";
|
|
2
|
-
import { beforeAll, describe, expect, test } from "bun:test";
|
|
3
|
-
|
|
4
|
-
import { getBundledSkillsDir } from "../config/skills.js";
|
|
5
|
-
import { buildToolDefinitions } from "../daemon/session-tool-setup.js";
|
|
6
|
-
import { parseToolManifestFile } from "../skills/tool-manifest.js";
|
|
7
|
-
import {
|
|
8
|
-
__resetRegistryForTesting,
|
|
9
|
-
getAllToolDefinitions,
|
|
10
|
-
getAllTools,
|
|
11
|
-
getTool,
|
|
12
|
-
initializeTools,
|
|
13
|
-
} from "../tools/registry.js";
|
|
14
|
-
import {
|
|
15
|
-
COMPUTER_USE_TOOL_COUNT,
|
|
16
|
-
COMPUTER_USE_TOOL_NAMES,
|
|
17
|
-
} from "./test-support/computer-use-skill-harness.js";
|
|
18
|
-
|
|
19
|
-
beforeAll(async () => {
|
|
20
|
-
__resetRegistryForTesting();
|
|
21
|
-
await initializeTools();
|
|
22
|
-
});
|
|
23
|
-
|
|
24
|
-
describe("computer-use skill end-state", () => {
|
|
25
|
-
// ── Core Registry ──────────────────────────────────────────────────
|
|
26
|
-
|
|
27
|
-
test("core registry contains 1 computer_use_* tool (escalation only)", () => {
|
|
28
|
-
const allTools = getAllTools();
|
|
29
|
-
const cuTools = allTools.filter((t) => t.name.startsWith("computer_use_"));
|
|
30
|
-
expect(cuTools).toHaveLength(1);
|
|
31
|
-
expect(cuTools[0].name).toBe("computer_use_request_control");
|
|
32
|
-
});
|
|
33
|
-
|
|
34
|
-
test("computer_use_request_control is resolvable from core registry", () => {
|
|
35
|
-
expect(getTool("computer_use_request_control")).toBeDefined();
|
|
36
|
-
});
|
|
37
|
-
|
|
38
|
-
test("no action tool from COMPUTER_USE_TOOL_NAMES is resolvable from core registry", () => {
|
|
39
|
-
for (const name of COMPUTER_USE_TOOL_NAMES) {
|
|
40
|
-
expect(getTool(name)).toBeUndefined();
|
|
41
|
-
}
|
|
42
|
-
});
|
|
43
|
-
|
|
44
|
-
// ── getAllToolDefinitions (excludes proxy & skill tools) ──────────
|
|
45
|
-
|
|
46
|
-
test("getAllToolDefinitions() excludes computer_use_* tools", () => {
|
|
47
|
-
const defs = getAllToolDefinitions();
|
|
48
|
-
const cuDefs = defs.filter((d) => d.name.startsWith("computer_use_"));
|
|
49
|
-
expect(cuDefs).toHaveLength(0);
|
|
50
|
-
});
|
|
51
|
-
|
|
52
|
-
test("getAllToolDefinitions() excludes computer_use_request_control (proxy exclusion)", () => {
|
|
53
|
-
const defs = getAllToolDefinitions();
|
|
54
|
-
const found = defs.find((d) => d.name === "computer_use_request_control");
|
|
55
|
-
expect(found).toBeUndefined();
|
|
56
|
-
});
|
|
57
|
-
|
|
58
|
-
// ── buildToolDefinitions (text session tool set) ─────────────────
|
|
59
|
-
|
|
60
|
-
test("buildToolDefinitions() includes computer_use_request_control", () => {
|
|
61
|
-
const defs = buildToolDefinitions();
|
|
62
|
-
const found = defs.find((d) => d.name === "computer_use_request_control");
|
|
63
|
-
expect(found).toBeDefined();
|
|
64
|
-
});
|
|
65
|
-
|
|
66
|
-
test("buildToolDefinitions() excludes computer_use_* action tools", () => {
|
|
67
|
-
const defs = buildToolDefinitions();
|
|
68
|
-
const cuDefs = defs.filter(
|
|
69
|
-
(d) =>
|
|
70
|
-
d.name.startsWith("computer_use_") &&
|
|
71
|
-
d.name !== "computer_use_request_control",
|
|
72
|
-
);
|
|
73
|
-
expect(cuDefs).toHaveLength(0);
|
|
74
|
-
});
|
|
75
|
-
|
|
76
|
-
// ── Bundled Skill Catalog ────────────────────────────────────────
|
|
77
|
-
|
|
78
|
-
test(
|
|
79
|
-
"computer-use skill has exactly " +
|
|
80
|
-
COMPUTER_USE_TOOL_COUNT +
|
|
81
|
-
" tools in TOOLS.json",
|
|
82
|
-
() => {
|
|
83
|
-
const manifestPath = join(
|
|
84
|
-
getBundledSkillsDir(),
|
|
85
|
-
"computer-use",
|
|
86
|
-
"TOOLS.json",
|
|
87
|
-
);
|
|
88
|
-
const manifest = parseToolManifestFile(manifestPath);
|
|
89
|
-
expect(manifest.tools).toHaveLength(COMPUTER_USE_TOOL_COUNT);
|
|
90
|
-
},
|
|
91
|
-
);
|
|
92
|
-
|
|
93
|
-
test("bundled skill tool names match expected computer_use_* names", () => {
|
|
94
|
-
const manifestPath = join(
|
|
95
|
-
getBundledSkillsDir(),
|
|
96
|
-
"computer-use",
|
|
97
|
-
"TOOLS.json",
|
|
98
|
-
);
|
|
99
|
-
const manifest = parseToolManifestFile(manifestPath);
|
|
100
|
-
const toolNames = new Set(manifest.tools.map((t) => t.name));
|
|
101
|
-
for (const name of COMPUTER_USE_TOOL_NAMES) {
|
|
102
|
-
expect(toolNames.has(name)).toBe(true);
|
|
103
|
-
}
|
|
104
|
-
});
|
|
105
|
-
});
|