npm - @vellumai/assistant - Versions diffs - 0.4.52 → 0.4.54 - Mend

@vellumai/assistant 0.4.52 → 0.4.54

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (380) hide show

package/ARCHITECTURE.md +2 -2
package/bun.lock +62 -349
package/docs/architecture/integrations.md +1 -1
package/docs/architecture/keychain-broker.md +91 -40
package/docs/architecture/memory.md +3 -3
package/docs/architecture/security.md +2 -2
package/knip.json +7 -29
package/package.json +2 -9
package/src/__tests__/agent-loop.test.ts +1 -1
package/src/__tests__/app-git-history.test.ts +0 -2
package/src/__tests__/app-git-service.test.ts +1 -6
package/src/__tests__/approval-cascade.test.ts +3 -2
package/src/__tests__/approval-routes-http.test.ts +0 -1
package/src/__tests__/asset-materialize-tool.test.ts +0 -1
package/src/__tests__/asset-search-tool.test.ts +0 -1
package/src/__tests__/assistant-events-sse-hardening.test.ts +0 -1
package/src/__tests__/attachments-store.test.ts +0 -1
package/src/__tests__/avatar-e2e.test.ts +5 -1
package/src/__tests__/browser-fill-credential.test.ts +4 -6
package/src/__tests__/btw-routes.test.ts +39 -0
package/src/__tests__/call-controller.test.ts +0 -1
package/src/__tests__/call-domain.test.ts +1 -1
package/src/__tests__/call-routes-http.test.ts +1 -3
package/src/__tests__/canonical-guardian-store.test.ts +33 -2
package/src/__tests__/channel-guardian.test.ts +4 -4
package/src/__tests__/channel-readiness-routes.test.ts +0 -1
package/src/__tests__/channel-readiness-service.test.ts +1 -1
package/src/__tests__/checker.test.ts +13 -11
package/src/__tests__/claude-code-skill-regression.test.ts +5 -2
package/src/__tests__/claude-code-tool-profiles.test.ts +7 -3
package/src/__tests__/config-loader-backfill.test.ts +1 -5
package/src/__tests__/config-schema.test.ts +9 -46
package/src/__tests__/config-watcher.test.ts +11 -3
package/src/__tests__/conversation-routes-slash-commands.test.ts +0 -1
package/src/__tests__/credential-broker-browser-fill.test.ts +27 -24
package/src/__tests__/credential-broker-server-use.test.ts +76 -40
package/src/__tests__/credential-security-e2e.test.ts +1 -6
package/src/__tests__/credential-security-invariants.test.ts +27 -8
package/src/__tests__/credential-vault-unit.test.ts +32 -16
package/src/__tests__/credential-vault.test.ts +40 -28
package/src/__tests__/credentials-cli.test.ts +1 -21
package/src/__tests__/email-invite-adapter.test.ts +0 -1
package/src/__tests__/error-handler-friendly-messages.test.ts +4 -5
package/src/__tests__/fixtures/credential-security-fixtures.ts +3 -3
package/src/__tests__/fixtures/media-reuse-fixtures.ts +3 -79
package/src/__tests__/gateway-only-enforcement.test.ts +1 -23
package/src/__tests__/guardian-action-conversation-turn.test.ts +8 -8
package/src/__tests__/guardian-action-late-reply.test.ts +13 -14
package/src/__tests__/guardian-action-store.test.ts +0 -57
package/src/__tests__/guardian-outbound-http.test.ts +1 -1
package/src/__tests__/guardian-verification-voice-binding.test.ts +1 -3
package/src/__tests__/hooks-blocking.test.ts +1 -1
package/src/__tests__/hooks-config.test.ts +5 -29
package/src/__tests__/hooks-discovery.test.ts +1 -1
package/src/__tests__/hooks-integration.test.ts +1 -1
package/src/__tests__/hooks-manager.test.ts +1 -1
package/src/__tests__/hooks-runner.test.ts +1 -23
package/src/__tests__/hooks-settings.test.ts +1 -1
package/src/__tests__/hooks-templates.test.ts +1 -1
package/src/__tests__/host-shell-tool.test.ts +0 -1
package/src/__tests__/http-user-message-parity.test.ts +19 -0
package/src/__tests__/integration-status.test.ts +0 -1
package/src/__tests__/invite-routes-http.test.ts +0 -3
package/src/__tests__/list-messages-attachments.test.ts +0 -1
package/src/__tests__/llm-usage-store.test.ts +50 -0
package/src/__tests__/log-export-workspace.test.ts +233 -0
package/src/__tests__/managed-proxy-context.test.ts +41 -41
package/src/__tests__/managed-skill-lifecycle.test.ts +0 -1
package/src/__tests__/media-generate-image.test.ts +9 -4
package/src/__tests__/media-reuse-story.e2e.test.ts +1 -7
package/src/__tests__/memory-regressions.experimental.test.ts +4 -4
package/src/__tests__/memory-regressions.test.ts +27 -28
package/src/__tests__/memory-retrieval.benchmark.test.ts +1 -1
package/src/__tests__/memory-upsert-concurrency.test.ts +4 -4
package/src/__tests__/migration-cross-version-compatibility.test.ts +0 -1
package/src/__tests__/migration-export-http.test.ts +0 -1
package/src/__tests__/migration-import-commit-http.test.ts +0 -1
package/src/__tests__/migration-import-preflight-http.test.ts +0 -1
package/src/__tests__/migration-validate-http.test.ts +0 -1
package/src/__tests__/notification-decision-fallback.test.ts +1 -1
package/src/__tests__/notification-schedule-dedup.test.ts +237 -0
package/src/__tests__/oauth-cli.test.ts +2 -14
package/src/__tests__/oauth-store.test.ts +3 -7
package/src/__tests__/oauth2-gateway-transport.test.ts +5 -4
package/src/__tests__/onboarding-starter-tasks.test.ts +1 -1
package/src/__tests__/onboarding-template-contract.test.ts +1 -2
package/src/__tests__/openai-provider.test.ts +7 -7
package/src/__tests__/platform.test.ts +14 -4
package/src/__tests__/pricing.test.ts +0 -234
package/src/__tests__/provider-commit-message-generator.test.ts +19 -15
package/src/__tests__/provider-fail-open-selection.test.ts +67 -62
package/src/__tests__/provider-managed-proxy-integration.test.ts +88 -85
package/src/__tests__/provider-registry-ollama.test.ts +10 -4
package/src/__tests__/public-ingress-urls.test.ts +1 -1
package/src/__tests__/recording-handler.test.ts +0 -1
package/src/__tests__/registry.test.ts +3 -103
package/src/__tests__/relay-server.test.ts +0 -1
package/src/__tests__/runtime-attachment-metadata.test.ts +0 -1
package/src/__tests__/runtime-events-sse-parity.test.ts +0 -1
package/src/__tests__/runtime-events-sse.test.ts +0 -1
package/src/__tests__/script-proxy-injection-runtime.test.ts +2 -7
package/src/__tests__/secret-onetime-send.test.ts +1 -6
package/src/__tests__/secret-routes-managed-proxy.test.ts +6 -14
package/src/__tests__/secret-scanner-executor.test.ts +0 -1
package/src/__tests__/secure-keys.test.ts +241 -229
package/src/__tests__/send-endpoint-busy.test.ts +0 -1
package/src/__tests__/session-abort-tool-results.test.ts +3 -2
package/src/__tests__/session-agent-loop-overflow.test.ts +1012 -838
package/src/__tests__/session-agent-loop.test.ts +2 -2
package/src/__tests__/session-confirmation-signals.test.ts +3 -2
package/src/__tests__/session-error.test.ts +5 -4
package/src/__tests__/session-history-web-search.test.ts +34 -9
package/src/__tests__/session-messaging-secret-redirect.test.ts +1 -7
package/src/__tests__/session-pre-run-repair.test.ts +3 -2
package/src/__tests__/session-provider-retry-repair.test.ts +31 -27
package/src/__tests__/session-queue.test.ts +5 -5
package/src/__tests__/session-runtime-assembly.test.ts +118 -0
package/src/__tests__/session-slash-known.test.ts +31 -14
package/src/__tests__/session-slash-queue.test.ts +3 -2
package/src/__tests__/session-slash-unknown.test.ts +3 -2
package/src/__tests__/session-workspace-cache-state.test.ts +3 -1
package/src/__tests__/session-workspace-injection.test.ts +3 -2
package/src/__tests__/session-workspace-tool-tracking.test.ts +3 -2
package/src/__tests__/shell-tool-proxy-mode.test.ts +0 -1
package/src/__tests__/skill-projection-feature-flag.test.ts +0 -1
package/src/__tests__/skill-script-runner-sandbox.test.ts +0 -1
package/src/__tests__/skillssh-registry.test.ts +21 -0
package/src/__tests__/slack-channel-config.test.ts +1 -7
package/src/__tests__/slack-share-routes.test.ts +1 -1
package/src/__tests__/swarm-recursion.test.ts +4 -1
package/src/__tests__/swarm-session-integration.test.ts +24 -14
package/src/__tests__/swarm-tool.test.ts +4 -2
package/src/__tests__/task-compiler.test.ts +1 -1
package/src/__tests__/telegram-bot-username-resolution.test.ts +2 -4
package/src/__tests__/test-support/browser-skill-harness.ts +0 -18
package/src/__tests__/test-support/computer-use-skill-harness.ts +0 -23
package/src/__tests__/token-estimator-accuracy.benchmark.test.ts +1521 -0
package/src/__tests__/tool-execution-abort-cleanup.test.ts +0 -1
package/src/__tests__/tool-executor-lifecycle-events.test.ts +0 -1
package/src/__tests__/tool-executor-shell-integration.test.ts +0 -1
package/src/__tests__/tool-executor.test.ts +1 -2
package/src/__tests__/trust-store.test.ts +8 -83
package/src/__tests__/twilio-config.test.ts +0 -1
package/src/__tests__/twilio-provider.test.ts +0 -5
package/src/__tests__/twilio-routes.test.ts +2 -3
package/src/__tests__/usage-cache-backfill-migration.test.ts +10 -10
package/src/__tests__/verification-control-plane-policy.test.ts +0 -1
package/src/__tests__/voice-quality.test.ts +2 -1
package/src/__tests__/voice-scoped-grant-consumer.test.ts +0 -1
package/src/__tests__/web-search.test.ts +1 -1
package/src/agent/loop.ts +17 -1
package/src/bundler/app-bundler.ts +40 -24
package/src/calls/call-controller.ts +16 -0
package/src/calls/guardian-question-copy.ts +1 -1
package/src/calls/relay-server.ts +29 -13
package/src/calls/voice-control-protocol.ts +1 -0
package/src/calls/voice-quality.ts +1 -1
package/src/calls/voice-session-bridge.ts +9 -3
package/src/channels/types.ts +16 -0
package/src/cli/commands/bash.ts +173 -0
package/src/cli/commands/doctor.ts +15 -57
package/src/cli/commands/memory.ts +3 -5
package/src/cli/commands/oauth/connections.ts +4 -2
package/src/cli/commands/oauth/providers.ts +1 -13
package/src/cli/commands/sessions.ts +1 -1
package/src/cli/commands/usage.ts +359 -0
package/src/cli/http-client.ts +22 -12
package/src/cli/program.ts +4 -0
package/src/cli/reference.ts +2 -0
package/src/cli.ts +251 -181
package/src/config/assistant-feature-flags.ts +0 -7
package/src/config/bundled-skills/chatgpt-import/tools/chatgpt-import.ts +1 -1
package/src/config/bundled-skills/claude-code/SKILL.md +1 -1
package/src/config/bundled-skills/claude-code/TOOLS.json +1 -1
package/src/config/bundled-skills/gmail/SKILL.md +0 -1
package/src/config/bundled-skills/image-studio/tools/media-generate-image.ts +4 -3
package/src/config/bundled-skills/media-processing/services/reduce.ts +1 -1
package/src/config/bundled-skills/media-processing/tools/analyze-keyframes.ts +3 -5
package/src/config/bundled-skills/media-processing/tools/extract-keyframes.ts +2 -3
package/src/config/bundled-skills/messaging/SKILL.md +0 -1
package/src/config/bundled-skills/phone-calls/references/CONFIG.md +1 -1
package/src/config/bundled-skills/sequences/SKILL.md +0 -1
package/src/config/bundled-skills/transcribe/tools/transcribe-media.ts +5 -6
package/src/config/env.ts +13 -0
package/src/config/feature-flag-registry.json +15 -39
package/src/config/loader.ts +7 -135
package/src/config/schema.ts +0 -6
package/src/config/schemas/channels.ts +1 -0
package/src/config/schemas/elevenlabs.ts +2 -2
package/src/config/schemas/security.ts +1 -2
package/src/config/skills.ts +1 -1
package/src/contacts/contact-store.ts +21 -75
package/src/contacts/contacts-write.ts +6 -6
package/src/contacts/types.ts +2 -0
package/src/context/token-estimator.ts +35 -2
package/src/context/window-manager.ts +16 -2
package/src/daemon/approved-devices-store.ts +0 -44
package/src/daemon/classifier.ts +1 -1
package/src/daemon/config-watcher.ts +35 -11
package/src/daemon/context-overflow-reducer.ts +13 -2
package/src/daemon/handlers/config-ingress.ts +25 -8
package/src/daemon/handlers/config-model.ts +22 -16
package/src/daemon/handlers/config-telegram.ts +18 -6
package/src/daemon/handlers/dictation.ts +0 -429
package/src/daemon/handlers/sessions.ts +4 -116
package/src/daemon/handlers/skills.ts +2 -201
package/src/daemon/lifecycle.ts +21 -20
package/src/daemon/message-types/contacts.ts +2 -0
package/src/daemon/message-types/integrations.ts +1 -0
package/src/daemon/message-types/sessions.ts +2 -0
package/src/daemon/parse-actual-tokens-from-error.test.ts +75 -0
package/src/daemon/providers-setup.ts +1 -1
package/src/daemon/server.ts +42 -5
package/src/daemon/session-agent-loop-handlers.ts +1 -1
package/src/daemon/session-agent-loop.ts +27 -79
package/src/daemon/session-error.ts +5 -4
package/src/daemon/session-process.ts +17 -10
package/src/daemon/session-runtime-assembly.ts +50 -0
package/src/daemon/session-slash.ts +34 -22
package/src/daemon/session.ts +1 -0
package/src/daemon/shutdown-handlers.ts +15 -0
package/src/daemon/watch-handler.ts +2 -2
package/src/email/guardrails.ts +1 -1
package/src/email/service.ts +0 -5
package/src/events/domain-events.ts +1 -0
package/src/hooks/templates.ts +1 -1
package/src/media/app-icon-generator.ts +4 -3
package/src/media/avatar-router.ts +5 -4
package/src/media/gemini-image-service.ts +5 -5
package/src/memory/admin.ts +2 -2
package/src/memory/app-git-service.ts +0 -7
package/src/memory/canonical-guardian-store.ts +25 -3
package/src/memory/conversation-crud.ts +1 -1
package/src/memory/conversation-title-service.ts +2 -2
package/src/memory/db-init.ts +12 -0
package/src/memory/embedding-backend.ts +46 -33
package/src/memory/external-conversation-store.ts +0 -30
package/src/memory/guardian-action-store.ts +0 -31
package/src/memory/guardian-approvals.ts +1 -56
package/src/memory/indexer.ts +4 -3
package/src/memory/items-extractor.ts +1 -1
package/src/memory/job-handlers/backfill.ts +5 -2
package/src/memory/job-handlers/index-maintenance.ts +2 -2
package/src/memory/job-handlers/media-processing.ts +2 -2
package/src/memory/job-handlers/summarization.ts +1 -1
package/src/memory/job-utils.ts +1 -2
package/src/memory/jobs-worker.ts +2 -2
package/src/memory/llm-usage-store.ts +57 -11
package/src/memory/media-store.ts +4 -535
package/src/memory/migrations/032-guardian-delivery-conversation-index.ts +2 -2
package/src/memory/migrations/110-channel-guardian.ts +0 -1
package/src/memory/migrations/158-channel-interaction-columns.ts +18 -0
package/src/memory/migrations/159-drop-contact-interaction-columns.ts +16 -0
package/src/memory/migrations/160-drop-loopback-port-column.ts +13 -0
package/src/memory/migrations/index.ts +3 -0
package/src/memory/published-pages-store.ts +0 -83
package/src/memory/qdrant-circuit-breaker.ts +0 -8
package/src/memory/retriever.test.ts +19 -12
package/src/memory/retriever.ts +1 -1
package/src/memory/schema/contacts.ts +2 -2
package/src/memory/schema/oauth.ts +0 -1
package/src/memory/search/semantic.ts +1 -8
package/src/memory/shared-app-links-store.ts +0 -15
package/src/messaging/registry.ts +0 -5
package/src/messaging/style-analyzer.ts +1 -1
package/src/notifications/copy-composer.ts +5 -13
package/src/notifications/decision-engine.ts +2 -2
package/src/notifications/deliveries-store.ts +0 -39
package/src/notifications/guardian-question-mode.ts +6 -10
package/src/notifications/preference-extractor.ts +1 -1
package/src/oauth/byo-connection.test.ts +29 -20
package/src/oauth/connect-orchestrator.ts +5 -3
package/src/oauth/connect-types.ts +9 -2
package/src/oauth/manual-token-connection.ts +9 -7
package/src/oauth/oauth-store.ts +2 -8
package/src/oauth/provider-behaviors.ts +11 -1
package/src/oauth/seed-providers.ts +13 -5
package/src/permissions/checker.ts +21 -2
package/src/permissions/shell-identity.ts +0 -5
package/src/permissions/trust-store.ts +0 -37
package/src/prompts/__tests__/build-cli-reference-section.test.ts +1 -1
package/src/prompts/system-prompt.ts +5 -14
package/src/prompts/templates/BOOTSTRAP.md +1 -3
package/src/providers/anthropic/client.ts +16 -8
package/src/providers/managed-proxy/constants.ts +9 -11
package/src/providers/managed-proxy/context.ts +14 -9
package/src/providers/provider-send-message.ts +4 -52
package/src/providers/registry.ts +29 -57
package/src/providers/types.ts +1 -1
package/src/runtime/actor-token-store.ts +0 -23
package/src/runtime/auth/route-policy.ts +4 -0
package/src/runtime/channel-invite-transports/telegram.ts +12 -6
package/src/runtime/channel-retry-sweep.ts +6 -0
package/src/runtime/http-router.ts +5 -1
package/src/runtime/http-server.ts +101 -4
package/src/runtime/http-types.ts +1 -0
package/src/runtime/invite-instruction-generator.ts +25 -51
package/src/runtime/invite-service.ts +0 -20
package/src/runtime/middleware/error-handler.ts +1 -2
package/src/runtime/routes/app-management-routes.ts +1 -0
package/src/runtime/routes/attachment-routes.ts +1 -1
package/src/runtime/routes/brain-graph-routes.ts +1 -1
package/src/runtime/routes/btw-routes.ts +20 -1
package/src/runtime/routes/call-routes.ts +1 -1
package/src/runtime/routes/conversation-routes.ts +64 -24
package/src/runtime/routes/debug-routes.ts +1 -1
package/src/runtime/routes/diagnostics-routes.ts +2 -2
package/src/runtime/routes/documents-routes.ts +3 -3
package/src/runtime/routes/global-search-routes.ts +1 -1
package/src/runtime/routes/guardian-bootstrap-routes.ts +0 -20
package/src/runtime/routes/guardian-refresh-routes.ts +0 -20
package/src/runtime/routes/inbound-message-handler.ts +10 -2
package/src/runtime/routes/inbound-stages/background-dispatch.ts +4 -0
package/src/runtime/routes/inbound-stages/edit-intercept.ts +5 -5
package/src/runtime/routes/integrations/slack/share.ts +5 -5
package/src/runtime/routes/log-export-routes.ts +122 -10
package/src/runtime/routes/secret-routes.ts +4 -4
package/src/runtime/routes/session-query-routes.ts +3 -3
package/src/runtime/routes/settings-routes.ts +53 -0
package/src/runtime/routes/trust-rules-routes.ts +1 -1
package/src/runtime/routes/workspace-routes.ts +3 -0
package/src/runtime/verification-templates.ts +1 -1
package/src/security/credential-backend.ts +148 -0
package/src/security/oauth2.ts +5 -5
package/src/security/secret-allowlist.ts +1 -1
package/src/security/secure-keys.ts +98 -160
package/src/security/token-manager.ts +0 -7
package/src/sequence/guardrails.ts +0 -4
package/src/sequence/store.ts +1 -20
package/src/sequence/types.ts +1 -36
package/src/signals/bash.ts +157 -0
package/src/signals/cancel.ts +69 -0
package/src/signals/conversation-undo.ts +127 -0
package/src/signals/trust-rule.ts +174 -0
package/src/skills/clawhub.ts +5 -5
package/src/skills/managed-store.ts +4 -4
package/src/skills/skillssh-registry.ts +6 -1
package/src/swarm/backend-claude-code.ts +6 -6
package/src/swarm/worker-backend.ts +1 -1
package/src/swarm/worker-runner.ts +1 -1
package/src/telegram/bot-username.ts +11 -0
package/src/telemetry/usage-telemetry-reporter.test.ts +366 -0
package/src/telemetry/usage-telemetry-reporter.ts +181 -0
package/src/tools/claude-code/claude-code.ts +6 -6
package/src/tools/credentials/broker.ts +7 -5
package/src/tools/credentials/vault.ts +11 -6
package/src/tools/memory/handlers.test.ts +24 -26
package/src/tools/memory/handlers.ts +1 -13
package/src/tools/network/__tests__/web-search.test.ts +18 -86
package/src/tools/network/web-search.ts +9 -15
package/src/tools/registry.ts +5 -100
package/src/tools/terminal/parser.ts +34 -4
package/src/tools/tool-manifest.ts +0 -10
package/src/usage/actors.ts +0 -12
package/src/util/canonicalize-identity.ts +0 -9
package/src/util/errors.ts +0 -3
package/src/util/platform.ts +31 -8
package/src/util/pricing.ts +0 -39
package/src/watcher/constants.ts +0 -7
package/src/watcher/providers/linear.ts +1 -1
package/src/work-items/work-item-store.ts +4 -4
package/src/workspace/commit-message-provider.ts +1 -1
package/src/workspace/git-service.ts +44 -1
package/src/workspace/provider-commit-message-generator.ts +11 -7
package/src/__tests__/fixtures/proxy-fixtures.ts +0 -147
package/src/browser-extension-relay/client.ts +0 -155
package/src/contacts/index.ts +0 -18
package/src/daemon/tls-certs.ts +0 -270
package/src/errors.ts +0 -41
package/src/events/index.ts +0 -18
package/src/followups/index.ts +0 -10
package/src/playbooks/index.ts +0 -10
package/src/runtime/auth/index.ts +0 -44
package/src/tasks/candidate-store.ts +0 -95
package/src/tools/browser/api-map.ts +0 -313
package/src/tools/browser/auto-navigate.ts +0 -469
package/src/tools/browser/headless-browser.ts +0 -590
package/src/tools/browser/recording-store.ts +0 -75
package/src/tools/computer-use/registry.ts +0 -21
package/src/tools/tasks/index.ts +0 -27

package/src/__tests__/token-estimator-accuracy.benchmark.test.ts ADDED Viewed

@@ -0,0 +1,1521 @@
+/**
+ * Token Estimator Accuracy Benchmark
+ *
+ * Validates estimatePromptTokens() against Anthropic's countTokens API
+ * to measure the estimation gap. Requires ANTHROPIC_API_KEY to run.
+ *
+ * Run: cd assistant && ANTHROPIC_API_KEY=<key> bun test src/__tests__/token-estimator-accuracy.benchmark.test.ts
+ */
+import { describe, expect, test } from "bun:test";
+import {
+  estimatePromptTokens,
+  estimateToolsTokens,
+} from "../context/token-estimator.js";
+import type { Message, ToolDefinition } from "../providers/types.js";
+const API_KEY = process.env.ANTHROPIC_API_KEY;
+const MODEL = "claude-sonnet-4-20250514";
+// Skip all tests if no API key is available
+const describeWithApi = API_KEY ? describe : describe.skip;
+// ---------------------------------------------------------------------------
+// Helpers to construct realistic payloads matching a desktop session
+// ---------------------------------------------------------------------------
+/** Generates a system prompt similar to production (~35-40K chars) */
+function makeSystemPrompt(size: "small" | "production" = "small"): string {
+  const base = [
+    "You are a helpful AI assistant integrated into a desktop application.",
+    "You have access to the user's workspace, files, and tools.",
+    "Follow the user's instructions carefully and use tools when needed.",
+    "",
+    "## Guidelines",
+    "- Be concise and helpful",
+    "- Use tools to accomplish tasks rather than asking the user to do them",
+    "- When editing files, read them first to understand context",
+    "- Follow existing code style and conventions",
+    "- Ask clarifying questions when the request is ambiguous",
+  ];
+  if (size === "small") {
+    return base.join("\n");
+  }
+  // Production-sized system prompt (~35K chars) with realistic sections
+  const sections: string[] = [...base];
+  // Identity section (~1K chars)
+  sections.push(
+    "",
+    "## Identity",
+    "You are Jarvis, a personal AI assistant. Your emoji is 🤖.",
+    "You live in San Francisco, California.",
+    "You are curious, thorough, and always eager to help.",
+    "You express yourself with warmth and precision.",
+  );
+  // Soul section (~3K chars) - personality, boundaries, communication style
+  sections.push(
+    "",
+    "## Soul & Personality",
+    "You are an AI assistant with a distinct personality. You are warm, helpful, and thorough.",
+    "You have boundaries: you do not pretend to be human, you acknowledge uncertainty honestly,",
+    "and you prioritize the user's safety and privacy above all else.",
+    "",
+    "### Communication Style",
+    "- Be direct and concise, but not curt",
+    "- Use technical language when appropriate, but explain jargon",
+    "- Match the user's energy level and formality",
+    "- Use humor sparingly and only when the context is light",
+    "- Never use excessive exclamation marks or emojis unless the user does",
+    "",
+    "### Decision Making",
+    "- When faced with ambiguity, ask clarifying questions",
+    "- When multiple approaches exist, recommend the best one with reasoning",
+    "- When you make a mistake, acknowledge it immediately and correct course",
+    "- When you don't know something, say so rather than guessing",
+  );
+  // CLI reference section (~5K chars)
+  sections.push(
+    "",
+    "## CLI Reference",
+    "The assistant CLI provides these commands:",
+    "",
+    "### File Operations",
+    "- `file read <path>` — Read file contents",
+    "- `file write <path> <content>` — Write file contents",
+    "- `file edit <path> --old <old> --new <new>` — Edit file",
+    "- `file list [path]` — List directory contents",
+    "- `file search <query> [--type <type>]` — Search files",
+    "- `file move <from> <to>` — Move/rename file",
+    "- `file copy <from> <to>` — Copy file",
+    "- `file delete <path>` — Delete file (requires confirmation)",
+    "",
+    "### Terminal Operations",
+    "- `bash <command>` — Execute shell command",
+    "- `bash --timeout <seconds> <command>` — Execute with timeout",
+    "- `bash --background <command>` — Run in background",
+    "",
+    "### Memory Operations",
+    "- `memory recall <query>` — Search memories",
+    "- `memory store <key> <content>` — Store memory",
+    "- `memory forget <key>` — Delete memory",
+    "- `memory list` — List all memories",
+    "",
+    "### Web Operations",
+    "- `web search <query>` — Search the web",
+    "- `web fetch <url>` — Fetch URL content",
+    "- `web screenshot <url>` — Take screenshot",
+    "",
+    "### Skill Operations",
+    "- `skill list` — List available skills",
+    "- `skill run <name> [args]` — Execute skill",
+    "- `skill create <name>` — Create new skill",
+    "- `skill edit <name>` — Edit existing skill",
+  );
+  // Tool permission section (~3K chars)
+  sections.push(
+    "",
+    "## Tool Permissions & Approval Gates",
+    "Some tools require explicit user approval before execution:",
+    "",
+    "### High-Risk Tools (always require approval)",
+    "- `bash` — Shell commands that modify the system",
+    "- `file_write` — Creating or overwriting files",
+    "- `file_edit` — Modifying existing files",
+    "- `file_delete` — Deleting files",
+    "- `credential_store set` — Storing credentials",
+    "",
+    "### Medium-Risk Tools (require approval on first use per session)",
+    "- `web_fetch` — Fetching external URLs",
+    "- `computer_use_*` — All computer use tools",
+    "- `messaging_send` — Sending messages",
+    "- `gmail_send` — Sending emails",
+    "",
+    "### Low-Risk Tools (auto-approved)",
+    "- `file_read` — Reading files",
+    "- `memory_recall` — Searching memories",
+    "- `web_search` — Web searches",
+    "- `tasks_list` — Listing tasks",
+    "- `contacts_search` — Searching contacts",
+    "",
+    "When a tool requires approval, explain what you're about to do and why,",
+    "then wait for the user's confirmation before proceeding.",
+    "Never bypass approval gates or attempt to run commands that circumvent them.",
+  );
+  // Channel awareness section (~2K chars)
+  sections.push(
+    "",
+    "## Channel Awareness",
+    "You may be accessed through different channels, each with different capabilities:",
+    "",
+    "### Desktop App (full capabilities)",
+    "- File system access, terminal, computer use, all tools available",
+    "- Rich text rendering with markdown support",
+    "- Image and file attachment support",
+    "",
+    "### Voice Channel (limited capabilities)",
+    "- Text-to-speech output, push-to-talk input",
+    "- No file system access, no computer use",
+    "- Keep responses concise for audio consumption",
+    "",
+    "### Dashboard (read-only view)",
+    "- Can view conversation history and memories",
+    "- Cannot execute tools or modify files",
+    "- Used for monitoring and reviewing assistant activity",
+  );
+  // Memory & continuity section (~2K chars)
+  sections.push(
+    "",
+    "## Memory & Continuity",
+    "You have access to persistent memory that survives across conversations.",
+    "Use memory to store important context, user preferences, project details,",
+    "and anything that would be useful to recall in future conversations.",
+    "",
+    "### Memory Best Practices",
+    "- Store user preferences when explicitly stated (e.g., 'I prefer tabs over spaces')",
+    "- Store project-specific context (e.g., 'This project uses PostgreSQL 15')",
+    "- Store decisions and their reasoning (e.g., 'We chose Redis over Memcached because...')",
+    "- Update memories when information changes",
+    "- Don't store trivial or ephemeral information",
+    "- Don't store sensitive information (passwords, API keys, etc.)",
+  );
+  // Integration guidance (~3K chars)
+  sections.push(
+    "",
+    "## Integration Guidance",
+    "The assistant supports MCP (Model Context Protocol) servers for extending capabilities.",
+    "When the user asks about integrations:",
+    "",
+    "### Supported Integrations",
+    "- Google Workspace (Gmail, Calendar, Drive, Contacts)",
+    "- Slack (messaging, channel management)",
+    "- GitHub (repositories, issues, pull requests)",
+    "- Linear (project management, issue tracking)",
+    "- Notion (documents, databases)",
+    "- Sentry (error tracking, issue management)",
+    "",
+    "### OAuth Setup",
+    "Most integrations use OAuth for authentication.",
+    "Guide the user through the OAuth flow when setting up a new integration:",
+    "1. Navigate to Settings > Integrations",
+    "2. Click 'Connect' for the desired service",
+    "3. Authorize in the browser popup",
+    "4. Confirm the connection is active",
+    "",
+    "### MCP Servers",
+    "Custom MCP servers can be added via the config file.",
+    "The config lives at ~/.vellum/config.json.",
+    "Each MCP server entry requires: name, command, args, and optional env.",
+  );
+  // Dynamic skills catalog (~5K chars)
+  sections.push("", "## Available Skills", "<available_skills>");
+  const skillCategories = [
+    {
+      id: "gmail",
+      name: "Gmail",
+      desc: "Send, search, draft, and manage Gmail messages",
+    },
+    {
+      id: "calendar",
+      name: "Google Calendar",
+      desc: "Create, list, update, and delete calendar events",
+    },
+    {
+      id: "slack",
+      name: "Slack",
+      desc: "Send messages, search channels, manage threads",
+    },
+    { id: "contacts", name: "Contacts", desc: "Search and manage contacts" },
+    {
+      id: "tasks",
+      name: "Tasks",
+      desc: "Create, list, update, and complete tasks",
+    },
+    {
+      id: "browser",
+      name: "Browser",
+      desc: "Navigate web pages, take screenshots, interact with web content",
+    },
+    {
+      id: "schedule",
+      name: "Schedule",
+      desc: "Set reminders and schedule recurring tasks",
+    },
+    {
+      id: "messaging",
+      name: "Messaging",
+      desc: "Send iMessage and SMS messages",
+    },
+    {
+      id: "sequences",
+      name: "Sequences",
+      desc: "Create and manage multi-step automation workflows",
+    },
+    {
+      id: "playbooks",
+      name: "Playbooks",
+      desc: "Execute pre-defined operational playbooks",
+    },
+    {
+      id: "notes",
+      name: "Notes",
+      desc: "Create and manage notes in Apple Notes",
+    },
+    { id: "music", name: "Music", desc: "Control Apple Music playback" },
+    {
+      id: "photos",
+      name: "Photos",
+      desc: "Search and manage photos in Apple Photos",
+    },
+    {
+      id: "maps",
+      name: "Maps",
+      desc: "Search locations, get directions, find nearby places",
+    },
+    {
+      id: "weather",
+      name: "Weather",
+      desc: "Get current weather and forecasts",
+    },
+  ];
+  for (const skill of skillCategories) {
+    sections.push(
+      `  <skill id="${skill.id}" name="${skill.name}" description="${skill.desc}" ` +
+        `credential_setup="oauth" enabled="true" />`,
+    );
+  }
+  sections.push("</available_skills>");
+  // Attachment handling (~1K chars)
+  sections.push(
+    "",
+    "## Attachment Handling",
+    "When sending files to the user, use the <vellum-attachment> tag:",
+    '`<vellum-attachment path="/path/to/file" type="image/png" />`',
+    "",
+    "Supported attachment types:",
+    "- Images: png, jpg, gif, webp, svg",
+    "- Documents: pdf, docx, xlsx, pptx",
+    "- Code: any text file with syntax highlighting",
+    "- Archives: zip, tar.gz",
+  );
+  // Task/schedule routing (~2K chars)
+  sections.push(
+    "",
+    "## Task & Schedule Routing",
+    "When the user asks to 'remind me' or 'schedule something', disambiguate:",
+    "",
+    "- **One-time reminder** → Use `schedule_reminder` tool",
+    "- **Recurring task** → Use `tasks_create` with recurrence",
+    "- **Calendar event** → Use `google_calendar_create_event`",
+    "- **Notification** → Use `send_notification` for immediate alerts",
+    "",
+    "Ask the user to clarify if the intent is ambiguous.",
+    "Default to `schedule_reminder` for simple time-based reminders.",
+  );
+  // Pad to ~35K chars with additional realistic instruction content
+  const currentLength = sections.join("\n").length;
+  if (currentLength < 35000) {
+    sections.push("", "## Additional Guidelines");
+    // Add realistic padding content to reach ~35K
+    const guidelines = [
+      "When working with code, always read the file before editing it.",
+      "When running shell commands, explain what each command does.",
+      "When searching the web, summarize the most relevant results.",
+      "When managing files, confirm destructive operations with the user.",
+      "When scheduling events, confirm the timezone with the user.",
+      "When sending messages, confirm the recipient and content before sending.",
+      "When managing credentials, never display sensitive values in plain text.",
+      "When creating tasks, include a clear due date and priority level.",
+      "When editing documents, preserve formatting and structure.",
+      "When processing images, describe what you see in detail.",
+    ];
+    while (sections.join("\n").length < 35000) {
+      for (const g of guidelines) {
+        sections.push(`- ${g}`);
+        if (sections.join("\n").length >= 35000) break;
+      }
+    }
+  }
+  return sections.join("\n");
+}
+/** Generates a runtime-injected user message with workspace HTML content */
+function makeRuntimeInjectedMessage(): Message {
+  // Simulates the <active_workspace> XML block with app schema and page HTML
+  const appSchema = `<app_schema>
+  <component name="Sidebar" props="items: NavigationItem[], collapsed: boolean">
+    <component name="NavigationItem" props="label: string, icon: string, href: string, active: boolean" />
+  </component>
+  <component name="MainContent" props="children: ReactNode">
+    <component name="Header" props="title: string, breadcrumbs: Breadcrumb[]" />
+    <component name="DataTable" props="columns: Column[], rows: Row[], sortBy: string, filterText: string">
+      <component name="TableRow" props="cells: Cell[], selected: boolean, onSelect: () => void" />
+    </component>
+  </component>
+  <component name="Modal" props="open: boolean, title: string, onClose: () => void">
+    <component name="Form" props="fields: Field[], onSubmit: (data: FormData) => void" />
+  </component>
+</app_schema>`;
+  // Simulate ~30K chars of page HTML (realistic for a complex web app page)
+  const pageHtmlLines: string[] = [];
+  for (let i = 0; i < 200; i++) {
+    pageHtmlLines.push(
+      `<div class="row-${i}" data-id="${i}" role="listitem">` +
+        `<span class="cell name">Item ${i}: ${`Lorem ipsum dolor sit amet, consectetur adipiscing elit. `.repeat(2)}</span>` +
+        `<span class="cell status">${i % 3 === 0 ? "active" : i % 3 === 1 ? "pending" : "completed"}</span>` +
+        `<span class="cell date">2026-03-${String((i % 28) + 1).padStart(2, "0")}</span>` +
+        `</div>`,
+    );
+  }
+  const fileTree = Array.from(
+    { length: 50 },
+    (_, i) => `  src/modules/feature-${i}/index.ts`,
+  ).join("\n");
+  const workspaceXml = [
+    "<active_workspace>",
+    appSchema,
+    "<file_tree>",
+    fileTree,
+    "</file_tree>",
+    "<current_page>",
+    "<html>",
+    '<body class="app-root">',
+    pageHtmlLines.join("\n"),
+    "</body>",
+    "</html>",
+    "</current_page>",
+    "</active_workspace>",
+  ].join("\n");
+  return {
+    role: "user",
+    content: [
+      {
+        type: "text",
+        text:
+          workspaceXml +
+          "\n\nPlease help me refactor the data table component to support pagination.",
+      },
+    ],
+  };
+}
+/** Generates tool definitions matching a realistic desktop session */
+function makeToolDefinitions(): Array<{
+  name: string;
+  description: string;
+  input_schema: object;
+}> {
+  const tools: Array<{
+    name: string;
+    description: string;
+    input_schema: object;
+  }> = [];
+  // Core tools (11)
+  tools.push(
+    {
+      name: "bash",
+      description:
+        "Execute a shell command on the local machine. Use this for running scripts, installing packages, git operations, and other terminal tasks.",
+      input_schema: {
+        type: "object",
+        properties: {
+          command: {
+            type: "string",
+            description: "The shell command to execute",
+          },
+          reason: {
+            type: "string",
+            description:
+              "Brief non-technical explanation of why this command is being run, shown to the user for approval",
+          },
+          timeout_seconds: {
+            type: "number",
+            description:
+              "Optional timeout in seconds. Defaults to 120. Maximum 600.",
+          },
+        },
+        required: ["command", "reason"],
+      },
+    },
+    {
+      name: "file_read",
+      description:
+        "Read the contents of a file from the local filesystem. Returns the full file content as text. Use this before editing files to understand their current state.",
+      input_schema: {
+        type: "object",
+        properties: {
+          path: {
+            type: "string",
+            description: "Absolute or relative path to the file to read",
+          },
+          offset: {
+            type: "number",
+            description: "Line number to start reading from (0-indexed)",
+          },
+          limit: {
+            type: "number",
+            description: "Maximum number of lines to read",
+          },
+        },
+        required: ["path"],
+      },
+    },
+    {
+      name: "file_write",
+      description:
+        "Write content to a file, creating it if it doesn't exist or overwriting if it does. Use file_edit for surgical changes to existing files.",
+      input_schema: {
+        type: "object",
+        properties: {
+          path: {
+            type: "string",
+            description: "Path to the file to write",
+          },
+          content: {
+            type: "string",
+            description: "The full content to write to the file",
+          },
+          reason: {
+            type: "string",
+            description: "Brief explanation of why this file is being written",
+          },
+        },
+        required: ["path", "content", "reason"],
+      },
+    },
+    {
+      name: "file_edit",
+      description:
+        "Apply a surgical edit to an existing file by replacing a specific string with a new string. The old_string must appear exactly once in the file.",
+      input_schema: {
+        type: "object",
+        properties: {
+          path: { type: "string", description: "Path to the file to edit" },
+          old_string: {
+            type: "string",
+            description:
+              "The exact string to find and replace (must be unique in the file)",
+          },
+          new_string: {
+            type: "string",
+            description: "The replacement string",
+          },
+          reason: {
+            type: "string",
+            description: "Brief explanation of the edit",
+          },
+        },
+        required: ["path", "old_string", "new_string", "reason"],
+      },
+    },
+    {
+      name: "web_search",
+      description:
+        "Search the web for information. Returns a list of search results with titles, URLs, and snippets.",
+      input_schema: {
+        type: "object",
+        properties: {
+          query: { type: "string", description: "The search query" },
+          num_results: {
+            type: "number",
+            description: "Number of results to return (default 5, max 10)",
+          },
+        },
+        required: ["query"],
+      },
+    },
+    {
+      name: "web_fetch",
+      description:
+        "Fetch the content of a URL. Returns the page content as text (HTML stripped to readable text by default).",
+      input_schema: {
+        type: "object",
+        properties: {
+          url: { type: "string", description: "The URL to fetch" },
+          format: {
+            type: "string",
+            enum: ["text", "html", "markdown"],
+            description: "Output format (default: text)",
+          },
+        },
+        required: ["url"],
+      },
+    },
+    {
+      name: "memory_recall",
+      description:
+        "Search across your memory using hybrid semantic and recency-based retrieval. Use this to find information from past conversations, stored facts, or contextual knowledge.",
+      input_schema: {
+        type: "object",
+        properties: {
+          query: {
+            type: "string",
+            description: "The search query to find relevant memories",
+          },
+          scope: {
+            type: "string",
+            enum: ["default", "conversation"],
+            description:
+              "Search scope: 'default' searches all memories, 'conversation' searches only the current conversation",
+          },
+        },
+        required: ["query"],
+      },
+    },
+    {
+      name: "memory_manage",
+      description:
+        "Create, update, or delete a memory entry. Use this to store important information for future reference.",
+      input_schema: {
+        type: "object",
+        properties: {
+          action: {
+            type: "string",
+            enum: ["create", "update", "delete"],
+            description: "The memory operation to perform",
+          },
+          key: {
+            type: "string",
+            description: "Unique key identifying this memory",
+          },
+          content: {
+            type: "string",
+            description: "The content to store (required for create/update)",
+          },
+          tags: {
+            type: "array",
+            items: { type: "string" },
+            description: "Optional tags for categorizing the memory",
+          },
+        },
+        required: ["action", "key"],
+      },
+    },
+    {
+      name: "skill_execute",
+      description:
+        "Execute a loaded skill by name. Skills are pre-defined automation routines that can perform complex multi-step tasks.",
+      input_schema: {
+        type: "object",
+        properties: {
+          skill_name: {
+            type: "string",
+            description: "The name of the skill to execute",
+          },
+          arguments: {
+            type: "object",
+            description: "Arguments to pass to the skill",
+          },
+        },
+        required: ["skill_name"],
+      },
+    },
+    {
+      name: "asset_search",
+      description:
+        "Search for assets (images, documents, files) in the workspace by name, type, or content.",
+      input_schema: {
+        type: "object",
+        properties: {
+          query: { type: "string", description: "Search query" },
+          type: {
+            type: "string",
+            enum: ["image", "document", "code", "any"],
+            description: "Filter by asset type",
+          },
+        },
+        required: ["query"],
+      },
+    },
+    {
+      name: "credential_store",
+      description:
+        "Securely store or retrieve credentials for external services. Credentials are encrypted at rest.",
+      input_schema: {
+        type: "object",
+        properties: {
+          action: {
+            type: "string",
+            enum: ["get", "set", "delete", "list"],
+          },
+          service: {
+            type: "string",
+            description: "The service name (e.g., 'github', 'slack')",
+          },
+          key: {
+            type: "string",
+            description: "Credential key within the service",
+          },
+          value: {
+            type: "string",
+            description: "Credential value (required for 'set')",
+          },
+        },
+        required: ["action", "service"],
+      },
+    },
+  );
+  // Computer-use proxy tools (11)
+  const computerUseTools = [
+    {
+      name: "computer_use_click",
+      description:
+        "Click an element on screen. Prefer element_id from the accessibility tree over x/y coordinates for reliability.",
+      props: {
+        click_type: {
+          type: "string",
+          enum: ["single", "double", "right"],
+          description: "Type of click",
+        },
+        element_id: {
+          type: "integer",
+          description: "Accessibility tree element ID",
+        },
+        x: { type: "integer", description: "Screen x coordinate" },
+        y: { type: "integer", description: "Screen y coordinate" },
+        reasoning: {
+          type: "string",
+          description: "Explanation of what you see and why you're clicking",
+        },
+        reason: {
+          type: "string",
+          description: "Brief non-technical explanation for the user",
+        },
+      },
+      required: ["reasoning"],
+    },
+    {
+      name: "computer_use_type_text",
+      description:
+        "Type text into the currently focused element. The element should already be focused via a click.",
+      props: {
+        text: { type: "string", description: "The text to type" },
+        reasoning: {
+          type: "string",
+          description: "Why this text is being typed",
+        },
+        reason: {
+          type: "string",
+          description: "Brief user-facing explanation",
+        },
+      },
+      required: ["text", "reasoning"],
+    },
+    {
+      name: "computer_use_key",
+      description:
+        "Press a keyboard key or key combination (e.g., 'Return', 'cmd+c', 'shift+tab').",
+      props: {
+        key: {
+          type: "string",
+          description: "Key or key combination to press",
+        },
+        reasoning: { type: "string", description: "Why this key is pressed" },
+        reason: {
+          type: "string",
+          description: "Brief user-facing explanation",
+        },
+      },
+      required: ["key", "reasoning"],
+    },
+    {
+      name: "computer_use_scroll",
+      description:
+        "Scroll in a direction at the current cursor position or a specified element.",
+      props: {
+        direction: {
+          type: "string",
+          enum: ["up", "down", "left", "right"],
+        },
+        amount: { type: "integer", description: "Scroll amount in pixels" },
+        element_id: {
+          type: "integer",
+          description: "Element to scroll within",
+        },
+        reasoning: { type: "string", description: "Why scrolling" },
+      },
+      required: ["direction", "reasoning"],
+    },
+    {
+      name: "computer_use_drag",
+      description: "Drag from one point to another on screen.",
+      props: {
+        start_x: { type: "integer" },
+        start_y: { type: "integer" },
+        end_x: { type: "integer" },
+        end_y: { type: "integer" },
+        reasoning: { type: "string" },
+      },
+      required: ["start_x", "start_y", "end_x", "end_y", "reasoning"],
+    },
+    {
+      name: "computer_use_wait",
+      description:
+        "Wait for a specified duration before taking the next action. Use when UI needs time to load.",
+      props: {
+        seconds: {
+          type: "number",
+          description: "Number of seconds to wait (max 10)",
+        },
+        reasoning: { type: "string", description: "Why waiting" },
+      },
+      required: ["seconds", "reasoning"],
+    },
+    {
+      name: "computer_use_open_app",
+      description: "Open a macOS application by name.",
+      props: {
+        app_name: {
+          type: "string",
+          description: "The application name (e.g., 'Safari', 'Terminal')",
+        },
+        reasoning: { type: "string" },
+        reason: { type: "string" },
+      },
+      required: ["app_name", "reasoning"],
+    },
+    {
+      name: "computer_use_run_applescript",
+      description:
+        "Execute an AppleScript on the user's machine. Use for macOS automation tasks.",
+      props: {
+        script: { type: "string", description: "The AppleScript code to run" },
+        reasoning: { type: "string" },
+        reason: { type: "string" },
+      },
+      required: ["script", "reasoning"],
+    },
+    {
+      name: "computer_use_observe",
+      description:
+        "Capture and analyze the current screen state. Returns a screenshot and accessibility tree of visible UI elements.",
+      props: {
+        reasoning: {
+          type: "string",
+          description: "What you expect to see and why you're observing",
+        },
+      },
+      required: ["reasoning"],
+    },
+    {
+      name: "computer_use_done",
+      description:
+        "Signal that the computer-use task is complete. Call this when the UI task is finished.",
+      props: {
+        result: {
+          type: "string",
+          description: "Summary of what was accomplished",
+        },
+        reasoning: { type: "string" },
+      },
+      required: ["result"],
+    },
+    {
+      name: "computer_use_respond",
+      description:
+        "Send a text response to the user during a computer-use session without performing a UI action.",
+      props: {
+        message: {
+          type: "string",
+          description: "The response message",
+        },
+        reasoning: { type: "string" },
+      },
+      required: ["message"],
+    },
+  ];
+  for (const cu of computerUseTools) {
+    tools.push({
+      name: cu.name,
+      description: cu.description,
+      input_schema: {
+        type: "object",
+        properties: cu.props,
+        required: cu.required,
+      },
+    });
+  }
+  // Bundled skill tools (~15 representative ones from gmail, calendar, slack, etc.)
+  const skillTools = [
+    {
+      name: "gmail_send",
+      description:
+        "Send an email via Gmail. Supports to, cc, bcc, subject, body (plain text or HTML), and attachments.",
+      props: {
+        account: { type: "string", description: "Gmail account to send from" },
+        to: { type: "string", description: "Recipient email address" },
+        cc: { type: "string", description: "CC recipients (comma-separated)" },
+        bcc: { type: "string", description: "BCC recipients" },
+        subject: { type: "string", description: "Email subject line" },
+        body: { type: "string", description: "Email body content" },
+        html: { type: "boolean", description: "Whether body is HTML" },
+        reply_to_message_id: {
+          type: "string",
+          description: "Message ID to reply to",
+        },
+        thread_id: { type: "string", description: "Thread ID to add to" },
+      },
+      required: ["to", "subject", "body"],
+    },
+    {
+      name: "gmail_search",
+      description:
+        "Search Gmail messages using Gmail search syntax. Returns message summaries.",
+      props: {
+        query: { type: "string", description: "Gmail search query" },
+        max_results: {
+          type: "number",
+          description: "Max results (default 10)",
+        },
+        account: { type: "string" },
+      },
+      required: ["query"],
+    },
+    {
+      name: "gmail_draft",
+      description: "Create a draft email in Gmail without sending it.",
+      props: {
+        account: { type: "string" },
+        to: { type: "string" },
+        subject: { type: "string" },
+        body: { type: "string" },
+        html: { type: "boolean" },
+      },
+      required: ["to", "subject", "body"],
+    },
+    {
+      name: "google_calendar_create_event",
+      description:
+        "Create a new event on Google Calendar with attendees, location, and recurrence.",
+      props: {
+        calendar_id: { type: "string" },
+        title: { type: "string", description: "Event title" },
+        start: { type: "string", description: "Start time (ISO 8601)" },
+        end: { type: "string", description: "End time (ISO 8601)" },
+        description: { type: "string", description: "Event description" },
+        location: { type: "string" },
+        attendees: {
+          type: "array",
+          items: { type: "string" },
+          description: "Email addresses",
+        },
+        recurrence: { type: "string", description: "RRULE string" },
+        timezone: { type: "string" },
+      },
+      required: ["title", "start", "end"],
+    },
+    {
+      name: "google_calendar_list_events",
+      description:
+        "List upcoming events from Google Calendar within a time range.",
+      props: {
+        calendar_id: { type: "string" },
+        time_min: { type: "string" },
+        time_max: { type: "string" },
+        max_results: { type: "number" },
+        query: { type: "string" },
+      },
+      required: [],
+    },
+    {
+      name: "slack_send_message",
+      description:
+        "Send a message to a Slack channel or DM. Supports threads and formatted text.",
+      props: {
+        channel: { type: "string", description: "Channel name or ID" },
+        text: { type: "string", description: "Message text" },
+        thread_ts: {
+          type: "string",
+          description: "Thread timestamp for replies",
+        },
+        blocks: { type: "array", description: "Rich text blocks (Block Kit)" },
+      },
+      required: ["channel", "text"],
+    },
+    {
+      name: "slack_search_messages",
+      description:
+        "Search Slack messages across channels using Slack search syntax.",
+      props: {
+        query: { type: "string" },
+        sort: { type: "string", enum: ["score", "timestamp"] },
+        count: { type: "number" },
+      },
+      required: ["query"],
+    },
+    {
+      name: "slack_list_channels",
+      description:
+        "List Slack channels the user has access to, filtered by type.",
+      props: {
+        types: {
+          type: "string",
+          description: "Channel types: public, private, im, mpim",
+        },
+        limit: { type: "number" },
+      },
+      required: [],
+    },
+    {
+      name: "contacts_search",
+      description:
+        "Search the user's contacts by name, email, phone, or organization.",
+      props: {
+        query: { type: "string" },
+        limit: { type: "number" },
+      },
+      required: ["query"],
+    },
+    {
+      name: "tasks_list",
+      description:
+        "List tasks from the user's task manager, filtered by status, project, or due date.",
+      props: {
+        status: { type: "string", enum: ["pending", "completed", "all"] },
+        project: { type: "string" },
+        due_before: { type: "string", description: "ISO date" },
+      },
+      required: [],
+    },
+    {
+      name: "tasks_create",
+      description: "Create a new task in the user's task manager.",
+      props: {
+        title: { type: "string" },
+        description: { type: "string" },
+        due_date: { type: "string" },
+        project: { type: "string" },
+        priority: { type: "string", enum: ["low", "medium", "high", "urgent"] },
+      },
+      required: ["title"],
+    },
+    {
+      name: "browser_navigate",
+      description: "Navigate the browser to a URL and return the page content.",
+      props: {
+        url: { type: "string" },
+        wait_for: { type: "string", description: "CSS selector to wait for" },
+        timeout: { type: "number" },
+      },
+      required: ["url"],
+    },
+    {
+      name: "browser_screenshot",
+      description:
+        "Take a screenshot of the current browser page or a specific element.",
+      props: {
+        selector: {
+          type: "string",
+          description: "CSS selector to screenshot (default: full page)",
+        },
+        full_page: { type: "boolean" },
+      },
+      required: [],
+    },
+    {
+      name: "schedule_reminder",
+      description:
+        "Set a reminder for the user at a specific time or relative delay.",
+      props: {
+        message: { type: "string" },
+        at: { type: "string", description: "ISO 8601 datetime" },
+        delay_minutes: { type: "number", description: "Minutes from now" },
+      },
+      required: ["message"],
+    },
+    {
+      name: "messaging_send",
+      description: "Send an iMessage or SMS to a contact.",
+      props: {
+        to: { type: "string", description: "Phone number or contact name" },
+        message: { type: "string" },
+        service: { type: "string", enum: ["imessage", "sms"] },
+      },
+      required: ["to", "message"],
+    },
+  ];
+  for (const st of skillTools) {
+    tools.push({
+      name: st.name,
+      description: st.description,
+      input_schema: {
+        type: "object",
+        properties: st.props,
+        required: st.required,
+      },
+    });
+  }
+  return tools;
+}
+/**
+ * Generate additional bundled skill tools to scale up to production counts.
+ * Production sessions have ~135 bundled skill tools across 20+ categories.
+ */
+function generateBundledSkillTools(
+  count: number,
+): Array<{ name: string; description: string; input_schema: object }> {
+  const categories = [
+    "gmail",
+    "calendar",
+    "slack",
+    "contacts",
+    "tasks",
+    "browser",
+    "schedule",
+    "messaging",
+    "sequences",
+    "playbooks",
+    "notes",
+    "music",
+    "photos",
+    "maps",
+    "weather",
+    "reminders",
+    "shortcuts",
+    "finder",
+    "system",
+    "notifications",
+  ];
+  const actions = [
+    "list",
+    "search",
+    "create",
+    "update",
+    "delete",
+    "get",
+    "send",
+    "archive",
+    "export",
+    "import",
+    "sync",
+    "share",
+  ];
+  const tools: Array<{
+    name: string;
+    description: string;
+    input_schema: object;
+  }> = [];
+  for (let i = 0; i < count; i++) {
+    const cat = categories[i % categories.length];
+    const action = actions[i % actions.length];
+    const name = `${cat}_${action}_${Math.floor(i / categories.length)}`;
+    // Generate realistic parameter schemas of varying complexity
+    const paramCount = 3 + (i % 5); // 3-7 parameters
+    const properties: Record<string, object> = {};
+    const required: string[] = [];
+    for (let p = 0; p < paramCount; p++) {
+      const paramNames = [
+        "query",
+        "id",
+        "filter",
+        "limit",
+        "offset",
+        "sort_by",
+        "sort_order",
+        "include_archived",
+        "format",
+        "output_path",
+        "account",
+        "workspace",
+        "project",
+        "label",
+        "priority",
+        "assignee",
+        "due_date",
+        "description",
+        "title",
+        "content",
+      ];
+      const pName = paramNames[p % paramNames.length];
+      properties[pName] = {
+        type: p % 3 === 0 ? "string" : p % 3 === 1 ? "number" : "boolean",
+        description:
+          `The ${pName} parameter for ${cat} ${action} operation. ` +
+          `Used to ${action} ${cat} items matching the specified criteria.`,
+      };
+      if (p < 2) required.push(pName); // First 2 params are required
+    }
+    tools.push({
+      name,
+      description:
+        `${action.charAt(0).toUpperCase() + action.slice(1)} ${cat} items. ` +
+        `Supports filtering by multiple criteria including date range, status, ` +
+        `and custom labels. Returns paginated results with metadata.`,
+      input_schema: {
+        type: "object",
+        properties,
+        required,
+      },
+    });
+  }
+  return tools;
+}
+/** Build a multi-turn conversation with tool use */
+function makeConversationMessages(): Message[] {
+  const messages: Message[] = [];
+  // Turn 1: User sends initial request (with runtime injection)
+  messages.push(makeRuntimeInjectedMessage());
+  // Turn 2: Assistant responds with a tool call
+  messages.push({
+    role: "assistant",
+    content: [
+      {
+        type: "text",
+        text: "I'll help you add pagination to the data table. Let me first read the current component to understand its structure.",
+      },
+      {
+        type: "tool_use",
+        id: "tu_01",
+        name: "file_read",
+        input: { path: "src/components/DataTable.tsx" },
+      },
+    ],
+  });
+  // Turn 3: Tool result with realistic file content
+  const fileContent = Array.from(
+    { length: 80 },
+    (_, i) =>
+      `  // Line ${i + 1}: ${
+        i < 10
+          ? "import statements and type definitions"
+          : i < 30
+            ? "interface and props definitions with generics"
+            : i < 60
+              ? "component implementation with hooks and handlers"
+              : "render JSX with table rows and cells"
+      }`,
+  ).join("\n");
+  messages.push({
+    role: "user",
+    content: [
+      {
+        type: "tool_result",
+        tool_use_id: "tu_01",
+        content: fileContent,
+      },
+    ],
+  });
+  // Turn 4: Assistant reads another file
+  messages.push({
+    role: "assistant",
+    content: [
+      {
+        type: "text",
+        text: "Now let me check the existing pagination utilities.",
+      },
+      {
+        type: "tool_use",
+        id: "tu_02",
+        name: "bash",
+        input: {
+          command: "find src -name '*pagina*' -o -name '*Pagina*'",
+          reason: "Looking for existing pagination utilities",
+        },
+      },
+    ],
+  });
+  // Turn 5: Tool result
+  messages.push({
+    role: "user",
+    content: [
+      {
+        type: "tool_result",
+        tool_use_id: "tu_02",
+        content:
+          "src/hooks/usePagination.ts\nsrc/components/Pagination.tsx\nsrc/utils/pagination.ts",
+      },
+    ],
+  });
+  return messages;
+}
+// ---------------------------------------------------------------------------
+// Anthropic countTokens helper
+// ---------------------------------------------------------------------------
+interface CountTokensResult {
+  input_tokens: number;
+}
+async function countTokensViaApi(
+  systemPrompt: string,
+  messages: Message[],
+  tools?: Array<{ name: string; description: string; input_schema: object }>,
+): Promise<CountTokensResult> {
+  // Use the SDK directly
+  const Anthropic = (await import("@anthropic-ai/sdk")).default;
+  const client = new Anthropic({ apiKey: API_KEY });
+  // Convert our Message type to Anthropic's expected format
+  const anthropicMessages = messages.map((m) => ({
+    role: m.role as "user" | "assistant",
+    content: m.content.map((block) => {
+      switch (block.type) {
+        case "text":
+          return { type: "text" as const, text: block.text };
+        case "tool_use":
+          return {
+            type: "tool_use" as const,
+            id: block.id,
+            name: block.name,
+            input: block.input,
+          };
+        case "tool_result":
+          return {
+            type: "tool_result" as const,
+            tool_use_id: block.tool_use_id,
+            content: block.content,
+          };
+        default:
+          return { type: "text" as const, text: String(block) };
+      }
+    }),
+  }));
+  const params: Record<string, unknown> = {
+    model: MODEL,
+    messages: anthropicMessages,
+    system: systemPrompt,
+  };
+  if (tools && tools.length > 0) {
+    params.tools = tools.map((t) => ({
+      name: t.name,
+      description: t.description,
+      input_schema: t.input_schema,
+    }));
+  }
+  const result = await client.messages.countTokens(
+    params as unknown as Parameters<typeof client.messages.countTokens>[0],
+  );
+  return { input_tokens: result.input_tokens };
+}
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+describeWithApi("Token estimator accuracy (requires ANTHROPIC_API_KEY)", () => {
+  test("estimation gap: messages + system prompt (no tools)", async () => {
+    const systemPrompt = makeSystemPrompt();
+    const messages = makeConversationMessages();
+    const estimated = estimatePromptTokens(messages, systemPrompt, {
+      providerName: "anthropic",
+    });
+    const actual = await countTokensViaApi(systemPrompt, messages);
+    const ratio = actual.input_tokens / estimated;
+    console.log("=== No tools ===");
+    console.log(`  Estimated:   ${estimated.toLocaleString()} tokens`);
+    console.log(
+      `  Actual:      ${actual.input_tokens.toLocaleString()} tokens`,
+    );
+    console.log(`  Ratio:       ${ratio.toFixed(2)}x`);
+    // Even without tools, we expect some gap because structured content
+    // (HTML, JSON) tokenizes at ~2-3 chars/token vs our assumed 4
+    expect(ratio).toBeGreaterThan(0.5); // Sanity: we're not wildly over-estimating
+    expect(ratio).toBeLessThan(3.0); // Without tools, gap should be moderate
+  });
+  test("estimation gap with tools: old vs new estimator", async () => {
+    const systemPrompt = makeSystemPrompt();
+    const messages = makeConversationMessages();
+    const tools = makeToolDefinitions() as ToolDefinition[];
+    const toolTokenBudget = estimateToolsTokens(tools);
+    // Old estimator: completely ignores tools
+    const oldEstimated = estimatePromptTokens(messages, systemPrompt, {
+      providerName: "anthropic",
+    });
+    // New estimator: includes tool token budget
+    const newEstimated = estimatePromptTokens(messages, systemPrompt, {
+      providerName: "anthropic",
+      toolTokenBudget,
+    });
+    // Anthropic's countTokens includes tool definitions
+    const actual = await countTokensViaApi(systemPrompt, messages, tools);
+    const oldRatio = actual.input_tokens / oldEstimated;
+    const newRatio = actual.input_tokens / newEstimated;
+    console.log("=== With tools (old vs new estimator) ===");
+    console.log(`  Tools:          ${tools.length}`);
+    console.log(`  Tool budget:    ${toolTokenBudget.toLocaleString()} tokens`);
+    console.log(
+      `  Old estimated:  ${oldEstimated.toLocaleString()} tokens (ratio ${oldRatio.toFixed(2)}x)`,
+    );
+    console.log(
+      `  New estimated:  ${newEstimated.toLocaleString()} tokens (ratio ${newRatio.toFixed(2)}x)`,
+    );
+    console.log(
+      `  Actual:         ${actual.input_tokens.toLocaleString()} tokens`,
+    );
+    // New estimator should be closer to actual
+    expect(newEstimated).toBeGreaterThan(oldEstimated);
+    expect(newRatio).toBeLessThan(oldRatio);
+    // New ratio should be within 30% of actual
+    expect(newRatio).toBeLessThan(1.3);
+  });
+  test("tool definitions contribute significant tokens", async () => {
+    const systemPrompt = makeSystemPrompt();
+    const messages = makeConversationMessages();
+    const tools = makeToolDefinitions();
+    const withoutTools = await countTokensViaApi(systemPrompt, messages);
+    const withTools = await countTokensViaApi(systemPrompt, messages, tools);
+    const toolTokens = withTools.input_tokens - withoutTools.input_tokens;
+    console.log("=== Tool token contribution ===");
+    console.log(
+      `  Without tools: ${withoutTools.input_tokens.toLocaleString()} tokens`,
+    );
+    console.log(
+      `  With tools:    ${withTools.input_tokens.toLocaleString()} tokens`,
+    );
+    console.log(`  Tool overhead: ${toolTokens.toLocaleString()} tokens`);
+    console.log(
+      `  Per tool avg:  ${Math.round(toolTokens / tools.length)} tokens`,
+    );
+    // Tools should contribute a meaningful number of tokens
+    expect(toolTokens).toBeGreaterThan(1000);
+  });
+  test("structured content (HTML/XML) tokenizes more densely than 4 chars/token", async () => {
+    // Test with HTML-heavy content to measure actual chars/token ratio
+    const htmlContent = Array.from(
+      { length: 100 },
+      (_, i) =>
+        `<div class="item-${i}" data-testid="row-${i}"><span class="name">${"Content ".repeat(5)}</span></div>`,
+    ).join("\n");
+    const messages: Message[] = [
+      { role: "user", content: [{ type: "text", text: htmlContent }] },
+    ];
+    const systemPrompt = "You are a helpful assistant.";
+    const estimated = estimatePromptTokens(messages, systemPrompt, {
+      providerName: "anthropic",
+    });
+    const actual = await countTokensViaApi(systemPrompt, messages);
+    const actualCharsPerToken = htmlContent.length / actual.input_tokens;
+    const ratio = actual.input_tokens / estimated;
+    console.log("=== HTML content tokenization ===");
+    console.log(
+      `  Content length: ${htmlContent.length.toLocaleString()} chars`,
+    );
+    console.log(`  Estimated:      ${estimated.toLocaleString()} tokens`);
+    console.log(
+      `  Actual:         ${actual.input_tokens.toLocaleString()} tokens`,
+    );
+    console.log(`  Assumed chars/token: 4`);
+    console.log(`  Actual chars/token:  ${actualCharsPerToken.toFixed(2)}`);
+    console.log(`  Ratio:          ${ratio.toFixed(2)}x`);
+    // HTML/XML typically tokenizes at 2-3 chars per token, not 4
+    // This means our estimate underestimates HTML-heavy content
+    expect(actualCharsPerToken).toBeLessThan(4);
+  });
+  test("production-scale scenario: old vs new estimator with 160 tools", async () => {
+    const systemPrompt = makeSystemPrompt("production");
+    const messages = makeConversationMessages();
+    const baseTools = makeToolDefinitions() as ToolDefinition[];
+    const extraTools = generateBundledSkillTools(123) as ToolDefinition[];
+    const tools = [...baseTools, ...extraTools];
+    const toolTokenBudget = estimateToolsTokens(tools);
+    // Old estimator: no tool awareness
+    const oldEstimated = estimatePromptTokens(messages, systemPrompt, {
+      providerName: "anthropic",
+    });
+    // New estimator: includes tool token budget
+    const newEstimated = estimatePromptTokens(messages, systemPrompt, {
+      providerName: "anthropic",
+      toolTokenBudget,
+    });
+    const actual = await countTokensViaApi(systemPrompt, messages, tools);
+    const oldRatio = actual.input_tokens / oldEstimated;
+    const newRatio = actual.input_tokens / newEstimated;
+    console.log("=== Production-scale scenario (old vs new) ===");
+    console.log(`  Tools:          ${tools.length}`);
+    console.log(
+      `  System:         ${systemPrompt.length.toLocaleString()} chars`,
+    );
+    console.log(`  Tool budget:    ${toolTokenBudget.toLocaleString()} tokens`);
+    console.log(
+      `  Old estimated:  ${oldEstimated.toLocaleString()} tokens (ratio ${oldRatio.toFixed(2)}x)`,
+    );
+    console.log(
+      `  New estimated:  ${newEstimated.toLocaleString()} tokens (ratio ${newRatio.toFixed(2)}x)`,
+    );
+    console.log(
+      `  Actual:         ${actual.input_tokens.toLocaleString()} tokens`,
+    );
+    console.log(`  Production observed: 3.01x (73,416 est vs 220,964 actual)`);
+    // Old estimator should have a large gap
+    expect(oldRatio).toBeGreaterThan(1.5);
+    // New estimator should be significantly better
+    expect(newRatio).toBeLessThan(oldRatio);
+    // New ratio should be within 50% of actual (allowing for remaining
+    // tokenization density gap on structured content)
+    expect(newRatio).toBeLessThan(1.5);
+  });
+});