@vellumai/assistant 0.4.48 → 0.4.50
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +26 -35
- package/README.md +5 -26
- package/docs/architecture/integrations.md +45 -41
- package/docs/architecture/keychain-broker.md +3 -3
- package/docs/architecture/memory.md +180 -119
- package/docs/runbook-trusted-contacts.md +3 -8
- package/hook-templates/debug-prompt-logger/hook.json +1 -1
- package/hook-templates/debug-prompt-logger/run.sh +1 -3
- package/package.json +2 -2
- package/src/__tests__/actor-token-service.test.ts +0 -1
- package/src/__tests__/agent-loop.test.ts +3 -1
- package/src/__tests__/anthropic-provider.test.ts +249 -2
- package/src/__tests__/approval-cascade.test.ts +796 -0
- package/src/__tests__/approval-primitive.test.ts +0 -1
- package/src/__tests__/approval-routes-http.test.ts +4 -0
- package/src/__tests__/assistant-attachments.test.ts +12 -34
- package/src/__tests__/assistant-feature-flag-guard.test.ts +0 -23
- package/src/__tests__/assistant-feature-flag-guardrails.test.ts +76 -0
- package/src/__tests__/assistant-feature-flags-integration.test.ts +0 -1
- package/src/__tests__/browser-skill-baseline-tool-payload.test.ts +2 -2
- package/src/__tests__/canonical-guardian-store.test.ts +95 -0
- package/src/__tests__/channel-guardian.test.ts +0 -2
- package/src/__tests__/channel-readiness-routes.test.ts +15 -6
- package/src/__tests__/channel-readiness-service.test.ts +10 -9
- package/src/__tests__/checker.test.ts +13 -20
- package/src/__tests__/computer-use-skill-manifest-regression.test.ts +1 -1
- package/src/__tests__/computer-use-tools.test.ts +2 -19
- package/src/__tests__/config-schema.test.ts +1 -68
- package/src/__tests__/config-watcher.test.ts +0 -1
- package/src/__tests__/confirmation-request-guardian-bridge.test.ts +0 -1
- package/src/__tests__/context-image-dimensions.test.ts +332 -0
- package/src/__tests__/context-memory-e2e.test.ts +11 -100
- package/src/__tests__/context-token-estimator.test.ts +196 -13
- package/src/__tests__/conversation-attention-store.test.ts +0 -1
- package/src/__tests__/conversation-attention-telegram.test.ts +0 -1
- package/src/__tests__/conversation-routes-guardian-reply.test.ts +152 -0
- package/src/__tests__/conversation-routes-slash-commands.test.ts +2 -0
- package/src/__tests__/credential-metadata-store.test.ts +64 -73
- package/src/__tests__/credential-security-e2e.test.ts +1 -0
- package/src/__tests__/credential-security-invariants.test.ts +13 -7
- package/src/__tests__/credential-vault-unit.test.ts +284 -49
- package/src/__tests__/credential-vault.test.ts +150 -16
- package/src/__tests__/credentials-cli.test.ts +71 -0
- package/src/__tests__/cu-unified-flow.test.ts +532 -0
- package/src/__tests__/date-context.test.ts +93 -77
- package/src/__tests__/deterministic-verification-control-plane.test.ts +64 -0
- package/src/__tests__/dynamic-skill-workflow-prompt.test.ts +0 -1
- package/src/__tests__/ephemeral-permissions.test.ts +3 -3
- package/src/__tests__/gateway-only-guard.test.ts +0 -1
- package/src/__tests__/guardian-action-grant-mint-consume.test.ts +0 -1
- package/src/__tests__/guardian-decision-primitive-canonical.test.ts +0 -1
- package/src/__tests__/guardian-routing-invariants.test.ts +93 -1
- package/src/__tests__/guardian-verification-voice-binding.test.ts +0 -1
- package/src/__tests__/handlers-user-message-approval-consumption.test.ts +0 -39
- package/src/__tests__/heartbeat-service.test.ts +0 -1
- package/src/__tests__/history-repair.test.ts +245 -0
- package/src/__tests__/host-cu-proxy.test.ts +791 -0
- package/src/__tests__/host-shell-tool.test.ts +27 -15
- package/src/__tests__/http-user-message-parity.test.ts +2 -0
- package/src/__tests__/ingress-url-consistency.test.ts +14 -21
- package/src/__tests__/integration-status.test.ts +32 -51
- package/src/__tests__/intent-routing.test.ts +0 -1
- package/src/__tests__/invite-redemption-service.test.ts +65 -1
- package/src/__tests__/invite-routes-http.test.ts +10 -9
- package/src/__tests__/keychain-broker-client.test.ts +14 -46
- package/src/__tests__/memory-context-benchmark.benchmark.test.ts +56 -18
- package/src/__tests__/memory-lifecycle-e2e.test.ts +244 -387
- package/src/__tests__/memory-recall-quality.test.ts +244 -407
- package/src/__tests__/memory-regressions.experimental.test.ts +126 -101
- package/src/__tests__/memory-regressions.test.ts +477 -2841
- package/src/__tests__/memory-retrieval.benchmark.test.ts +33 -150
- package/src/__tests__/memory-upsert-concurrency.test.ts +5 -244
- package/src/__tests__/mime-builder.test.ts +28 -0
- package/src/__tests__/native-web-search.test.ts +1 -0
- package/src/__tests__/notification-routing-intent.test.ts +0 -1
- package/src/__tests__/oauth-cli.test.ts +941 -15
- package/src/__tests__/oauth-provider-profiles.test.ts +9 -9
- package/src/__tests__/oauth-scope-policy.test.ts +4 -6
- package/src/__tests__/oauth-store.test.ts +870 -0
- package/src/__tests__/onboarding-starter-tasks.test.ts +0 -1
- package/src/__tests__/provider-error-scenarios.test.ts +0 -1
- package/src/__tests__/provider-streaming.benchmark.test.ts +0 -1
- package/src/__tests__/public-ingress-urls.test.ts +15 -21
- package/src/__tests__/qdrant-collection-migration.test.ts +53 -8
- package/src/__tests__/recording-handler.test.ts +3 -4
- package/src/__tests__/registry.test.ts +2 -3
- package/src/__tests__/relay-server.test.ts +46 -1
- package/src/__tests__/runtime-events-sse.test.ts +55 -7
- package/src/__tests__/schedule-store.test.ts +0 -1
- package/src/__tests__/schedule-tools.test.ts +32 -0
- package/src/__tests__/scheduler-recurrence.test.ts +0 -1
- package/src/__tests__/scoped-approval-grants.test.ts +0 -1
- package/src/__tests__/scoped-grant-security-matrix.test.ts +0 -1
- package/src/__tests__/script-proxy-certs.test.ts +1 -1
- package/src/__tests__/secret-ingress-handler.test.ts +0 -1
- package/src/__tests__/secret-onetime-send.test.ts +1 -0
- package/src/__tests__/secure-keys.test.ts +7 -2
- package/src/__tests__/send-endpoint-busy.test.ts +24 -6
- package/src/__tests__/sequence-store.test.ts +0 -1
- package/src/__tests__/session-abort-tool-results.test.ts +1 -14
- package/src/__tests__/session-agent-loop-overflow.test.ts +1583 -0
- package/src/__tests__/session-agent-loop.test.ts +19 -15
- package/src/__tests__/session-confirmation-signals.test.ts +1 -15
- package/src/__tests__/session-error.test.ts +124 -2
- package/src/__tests__/session-history-web-search.test.ts +918 -0
- package/src/__tests__/session-init.benchmark.test.ts +4 -5
- package/src/__tests__/session-pre-run-repair.test.ts +1 -14
- package/src/__tests__/session-provider-retry-repair.test.ts +25 -28
- package/src/__tests__/session-queue.test.ts +37 -27
- package/src/__tests__/session-runtime-assembly.test.ts +54 -0
- package/src/__tests__/session-slash-known.test.ts +1 -15
- package/src/__tests__/session-slash-queue.test.ts +1 -15
- package/src/__tests__/session-slash-unknown.test.ts +1 -15
- package/src/__tests__/session-workspace-cache-state.test.ts +3 -33
- package/src/__tests__/session-workspace-injection.test.ts +3 -37
- package/src/__tests__/session-workspace-tool-tracking.test.ts +3 -37
- package/src/__tests__/skill-include-graph.test.ts +66 -0
- package/src/__tests__/skill-load-feature-flag.test.ts +0 -1
- package/src/__tests__/skill-load-tool.test.ts +149 -1
- package/src/__tests__/skill-projection-feature-flag.test.ts +0 -1
- package/src/__tests__/skills-install-extract.test.ts +93 -0
- package/src/__tests__/skills-uninstall.test.ts +1 -1
- package/src/__tests__/skills.test.ts +3 -3
- package/src/__tests__/skillssh-registry.test.ts +451 -0
- package/src/__tests__/slack-channel-config.test.ts +67 -3
- package/src/__tests__/slack-share-routes.test.ts +17 -19
- package/src/__tests__/system-prompt.test.ts +0 -1
- package/src/__tests__/telegram-invite-adapter.test.ts +18 -22
- package/src/__tests__/terminal-tools.test.ts +4 -3
- package/src/__tests__/test-support/computer-use-skill-harness.ts +3 -2
- package/src/__tests__/tool-approval-handler.test.ts +0 -1
- package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +0 -1
- package/src/__tests__/tool-executor-lifecycle-events.test.ts +0 -1
- package/src/__tests__/tool-executor-shell-integration.test.ts +0 -1
- package/src/__tests__/tool-executor.test.ts +0 -1
- package/src/__tests__/tool-grant-request-escalation.test.ts +0 -1
- package/src/__tests__/trust-store-pattern-matches.test.ts +29 -0
- package/src/__tests__/trust-store.test.ts +7 -13
- package/src/__tests__/trusted-contact-approval-notifier.test.ts +0 -1
- package/src/__tests__/trusted-contact-inline-approval-integration.test.ts +0 -1
- package/src/__tests__/twilio-routes.test.ts +0 -16
- package/src/__tests__/verification-control-plane-policy.test.ts +0 -1
- package/src/__tests__/voice-invite-redemption.test.ts +32 -1
- package/src/__tests__/voice-scoped-grant-consumer.test.ts +0 -1
- package/src/agent/ax-tree-compaction.test.ts +286 -0
- package/src/agent/loop.ts +104 -131
- package/src/approvals/AGENTS.md +1 -1
- package/src/approvals/guardian-request-resolvers.ts +14 -2
- package/src/bundler/compiler-tools.ts +66 -2
- package/src/calls/call-domain.ts +133 -6
- package/src/calls/call-store.ts +6 -0
- package/src/calls/relay-server.ts +52 -18
- package/src/calls/relay-setup-router.ts +17 -1
- package/src/calls/twilio-config.ts +3 -8
- package/src/calls/twilio-routes.ts +1 -2
- package/src/calls/types.ts +3 -1
- package/src/calls/voice-ingress-preflight.ts +1 -1
- package/src/cli/commands/browser-relay.ts +18 -12
- package/src/cli/commands/completions.ts +0 -3
- package/src/cli/commands/credentials.ts +101 -15
- package/src/cli/commands/doctor.ts +4 -3
- package/src/cli/commands/mcp.ts +46 -59
- package/src/cli/commands/memory.ts +16 -165
- package/src/cli/commands/oauth/apps.ts +284 -0
- package/src/cli/commands/oauth/connections.ts +633 -0
- package/src/cli/commands/oauth/index.ts +52 -0
- package/src/cli/commands/oauth/providers.ts +256 -0
- package/src/cli/commands/sessions.ts +5 -2
- package/src/cli/commands/skills.ts +177 -339
- package/src/cli/http-client.ts +0 -20
- package/src/cli/main-screen.tsx +2 -2
- package/src/cli/program.ts +6 -11
- package/src/cli/reference.ts +1 -3
- package/src/cli.ts +4 -10
- package/src/config/assistant-feature-flags.ts +0 -3
- package/src/config/bundled-skills/_shared/CLI_RETRIEVAL_PATTERN.md +1 -1
- package/src/config/bundled-skills/computer-use/SKILL.md +3 -6
- package/src/config/bundled-skills/computer-use/TOOLS.json +23 -5
- package/src/config/bundled-skills/computer-use/tools/{computer-use-request-control.ts → computer-use-observe.ts} +1 -5
- package/src/config/bundled-skills/google-calendar/calendar-client.ts +21 -16
- package/src/config/bundled-skills/messaging/tools/shared.ts +1 -4
- package/src/config/bundled-skills/settings/SKILL.md +1 -1
- package/src/config/bundled-skills/settings/TOOLS.json +2 -8
- package/src/config/bundled-skills/settings/tools/voice-config-update.ts +5 -33
- package/src/config/bundled-tool-registry.ts +2 -5
- package/src/config/env-registry.ts +14 -83
- package/src/config/env.ts +11 -50
- package/src/config/feature-flag-registry.json +16 -16
- package/src/config/loader.ts +0 -6
- package/src/config/schema.ts +4 -13
- package/src/config/schemas/memory-lifecycle.ts +0 -9
- package/src/config/schemas/memory-processing.ts +0 -180
- package/src/config/schemas/memory-retrieval.ts +32 -104
- package/src/config/schemas/memory.ts +0 -10
- package/src/config/skills.ts +21 -2
- package/src/config/types.ts +0 -4
- package/src/context/image-dimensions.ts +229 -0
- package/src/context/token-estimator.ts +75 -12
- package/src/context/window-manager.ts +53 -11
- package/src/daemon/assistant-attachments.ts +1 -13
- package/src/daemon/config-watcher.ts +61 -3
- package/src/daemon/daemon-control.ts +1 -1
- package/src/daemon/date-context.ts +114 -31
- package/src/daemon/handlers/config-ingress.ts +8 -33
- package/src/daemon/handlers/config-slack-channel.ts +49 -46
- package/src/daemon/handlers/config-telegram.ts +32 -16
- package/src/daemon/handlers/sessions.ts +27 -36
- package/src/daemon/handlers/shared.ts +0 -130
- package/src/daemon/handlers/skills.ts +20 -1
- package/src/daemon/history-repair.ts +72 -8
- package/src/daemon/host-cu-proxy.ts +430 -0
- package/src/daemon/lifecycle.ts +67 -71
- package/src/daemon/mcp-reload-service.ts +2 -2
- package/src/daemon/message-protocol.ts +3 -0
- package/src/daemon/message-types/computer-use.ts +1 -129
- package/src/daemon/message-types/host-cu.ts +19 -0
- package/src/daemon/message-types/memory.ts +4 -16
- package/src/daemon/message-types/messages.ts +4 -0
- package/src/daemon/message-types/sessions.ts +4 -0
- package/src/daemon/server.ts +25 -21
- package/src/daemon/session-agent-loop-handlers.ts +40 -0
- package/src/daemon/session-agent-loop.ts +334 -48
- package/src/daemon/session-attachments.ts +1 -2
- package/src/daemon/session-error.ts +89 -6
- package/src/daemon/session-history.ts +17 -7
- package/src/daemon/session-media-retry.ts +6 -2
- package/src/daemon/session-memory.ts +69 -149
- package/src/daemon/session-process.ts +10 -1
- package/src/daemon/session-runtime-assembly.ts +49 -19
- package/src/daemon/session-slash.ts +1 -1
- package/src/daemon/session-surfaces.ts +43 -28
- package/src/daemon/session-tool-setup.ts +9 -10
- package/src/daemon/session.ts +150 -17
- package/src/daemon/tool-side-effects.ts +2 -8
- package/src/daemon/watch-handler.ts +2 -2
- package/src/events/tool-metrics-listener.ts +2 -2
- package/src/hooks/manager.ts +1 -4
- package/src/inbound/public-ingress-urls.ts +7 -7
- package/src/instrument.ts +61 -1
- package/src/logfire.ts +16 -5
- package/src/memory/admin.ts +2 -191
- package/src/memory/canonical-guardian-store.ts +38 -2
- package/src/memory/conversation-crud.ts +0 -33
- package/src/memory/conversation-key-store.ts +21 -0
- package/src/memory/conversation-queries.ts +22 -3
- package/src/memory/db-init.ts +32 -0
- package/src/memory/embedding-backend.ts +84 -8
- package/src/memory/embedding-types.ts +9 -1
- package/src/memory/indexer.ts +7 -46
- package/src/memory/items-extractor.ts +274 -76
- package/src/memory/job-handlers/backfill.ts +2 -127
- package/src/memory/job-handlers/cleanup.ts +2 -16
- package/src/memory/job-handlers/extraction.ts +2 -138
- package/src/memory/job-handlers/index-maintenance.ts +1 -6
- package/src/memory/job-handlers/summarization.ts +3 -148
- package/src/memory/job-utils.ts +21 -59
- package/src/memory/jobs-store.ts +1 -159
- package/src/memory/jobs-worker.ts +9 -52
- package/src/memory/migrations/104-core-indexes.ts +3 -3
- package/src/memory/migrations/149-oauth-tables.ts +62 -0
- package/src/memory/migrations/150-oauth-apps-client-secret-path.ts +98 -0
- package/src/memory/migrations/151-oauth-providers-ping-url.ts +11 -0
- package/src/memory/migrations/152-memory-item-supersession.ts +44 -0
- package/src/memory/migrations/153-drop-entity-tables.ts +15 -0
- package/src/memory/migrations/154-drop-fts.ts +20 -0
- package/src/memory/migrations/155-drop-conflicts.ts +7 -0
- package/src/memory/migrations/156-call-session-invite-metadata.ts +24 -0
- package/src/memory/migrations/index.ts +8 -0
- package/src/memory/qdrant-client.ts +148 -51
- package/src/memory/raw-query.ts +1 -1
- package/src/memory/retriever.test.ts +294 -273
- package/src/memory/retriever.ts +421 -645
- package/src/memory/schema/calls.ts +2 -0
- package/src/memory/schema/index.ts +1 -0
- package/src/memory/schema/memory-core.ts +3 -48
- package/src/memory/schema/oauth.ts +67 -0
- package/src/memory/search/formatting.ts +263 -176
- package/src/memory/search/lexical.ts +1 -254
- package/src/memory/search/ranking.ts +0 -455
- package/src/memory/search/semantic.ts +100 -14
- package/src/memory/search/staleness.ts +47 -0
- package/src/memory/search/tier-classifier.ts +21 -0
- package/src/memory/search/types.ts +15 -77
- package/src/memory/task-memory-cleanup.ts +4 -6
- package/src/messaging/provider.ts +4 -4
- package/src/messaging/providers/gmail/client.ts +82 -2
- package/src/messaging/providers/gmail/mime-builder.ts +17 -7
- package/src/messaging/providers/gmail/people-client.ts +10 -10
- package/src/messaging/providers/telegram-bot/adapter.ts +17 -17
- package/src/messaging/providers/whatsapp/adapter.ts +11 -8
- package/src/messaging/registry.ts +2 -32
- package/src/notifications/copy-composer.ts +0 -5
- package/src/notifications/signal.ts +4 -5
- package/src/oauth/byo-connection.test.ts +133 -25
- package/src/oauth/byo-connection.ts +22 -6
- package/src/oauth/connect-orchestrator.ts +113 -57
- package/src/oauth/connect-types.ts +17 -23
- package/src/oauth/connection-resolver.ts +35 -11
- package/src/oauth/connection.ts +1 -1
- package/src/oauth/manual-token-connection.ts +104 -0
- package/src/oauth/oauth-store.ts +582 -0
- package/src/oauth/platform-connection.test.ts +29 -0
- package/src/oauth/platform-connection.ts +6 -5
- package/src/oauth/provider-behaviors.ts +124 -0
- package/src/oauth/scope-policy.ts +9 -2
- package/src/oauth/seed-providers.ts +167 -0
- package/src/oauth/token-persistence.ts +81 -77
- package/src/permissions/checker.ts +3 -3
- package/src/permissions/defaults.ts +1 -1
- package/src/permissions/prompter.ts +10 -1
- package/src/permissions/trust-store.ts +36 -1
- package/src/playbooks/playbook-compiler.ts +1 -1
- package/src/prompts/__tests__/build-cli-reference-section.test.ts +3 -1
- package/src/prompts/system-prompt.ts +46 -42
- package/src/providers/anthropic/client.ts +59 -20
- package/src/providers/retry.ts +1 -27
- package/src/providers/types.ts +7 -1
- package/src/runtime/AGENTS.md +9 -0
- package/src/runtime/auth/route-policy.ts +6 -6
- package/src/runtime/channel-reply-delivery.ts +0 -40
- package/src/runtime/gateway-client.ts +0 -7
- package/src/runtime/guardian-reply-router.ts +24 -22
- package/src/runtime/http-server.ts +10 -8
- package/src/runtime/http-types.ts +2 -2
- package/src/runtime/invite-redemption-service.ts +19 -1
- package/src/runtime/invite-service.ts +25 -0
- package/src/runtime/middleware/twilio-validation.ts +1 -11
- package/src/runtime/pending-interactions.ts +14 -12
- package/src/runtime/routes/brain-graph-routes.ts +10 -90
- package/src/runtime/routes/channel-delivery-routes.ts +0 -1
- package/src/runtime/routes/conversation-routes.ts +81 -19
- package/src/runtime/routes/events-routes.ts +21 -11
- package/src/runtime/routes/host-cu-routes.ts +97 -0
- package/src/runtime/routes/inbound-stages/acl-enforcement.ts +21 -12
- package/src/runtime/routes/inbound-stages/background-dispatch.ts +12 -111
- package/src/runtime/routes/integrations/slack/share.ts +6 -7
- package/src/runtime/routes/log-export-routes.ts +126 -8
- package/src/runtime/routes/memory-item-routes.test.ts +754 -0
- package/src/runtime/routes/memory-item-routes.ts +503 -0
- package/src/runtime/routes/session-management-routes.ts +3 -3
- package/src/runtime/routes/settings-routes.ts +55 -48
- package/src/runtime/routes/surface-action-routes.ts +1 -1
- package/src/runtime/routes/trust-rules-routes.ts +14 -0
- package/src/runtime/routes/watch-routes.ts +128 -0
- package/src/runtime/routes/workspace-routes.ts +2 -1
- package/src/schedule/integration-status.ts +10 -9
- package/src/security/credential-key.ts +0 -156
- package/src/security/keychain-broker-client.ts +22 -10
- package/src/security/oauth2.ts +1 -1
- package/src/security/secure-keys.ts +25 -3
- package/src/security/token-manager.ts +137 -64
- package/src/skills/catalog-install.ts +414 -0
- package/src/skills/include-graph.ts +32 -0
- package/src/skills/skillssh-registry.ts +503 -0
- package/src/telegram/bot-username.ts +2 -3
- package/src/tools/assets/search.ts +5 -1
- package/src/tools/browser/network-recorder.ts +1 -1
- package/src/tools/browser/network-recording-types.ts +1 -1
- package/src/tools/computer-use/definitions.ts +36 -11
- package/src/tools/computer-use/registry.ts +5 -6
- package/src/tools/credentials/broker.ts +1 -2
- package/src/tools/credentials/metadata-store.ts +17 -121
- package/src/tools/credentials/vault.ts +92 -167
- package/src/tools/memory/definitions.ts +4 -13
- package/src/tools/memory/handlers.test.ts +83 -103
- package/src/tools/memory/handlers.ts +50 -85
- package/src/tools/registry.ts +2 -7
- package/src/tools/schedule/create.ts +8 -1
- package/src/tools/schedule/update.ts +8 -1
- package/src/tools/skills/load.ts +85 -3
- package/src/tools/watch/watch-state.ts +0 -12
- package/src/util/logger.ts +7 -41
- package/src/util/platform.ts +9 -28
- package/src/watcher/providers/google-calendar.ts +2 -1
- package/src/__tests__/clarification-resolver.test.ts +0 -193
- package/src/__tests__/computer-use-session-compaction.test.ts +0 -143
- package/src/__tests__/computer-use-session-lifecycle.test.ts +0 -322
- package/src/__tests__/computer-use-session-working-dir.test.ts +0 -166
- package/src/__tests__/computer-use-skill-baseline.test.ts +0 -78
- package/src/__tests__/computer-use-skill-endstate.test.ts +0 -105
- package/src/__tests__/computer-use-skill-lifecycle-cleanup.test.ts +0 -249
- package/src/__tests__/conflict-intent-tokenization.test.ts +0 -160
- package/src/__tests__/conflict-policy.test.ts +0 -269
- package/src/__tests__/conflict-store.test.ts +0 -372
- package/src/__tests__/contradiction-checker.test.ts +0 -361
- package/src/__tests__/entity-extractor.test.ts +0 -211
- package/src/__tests__/entity-search.test.ts +0 -1117
- package/src/__tests__/profile-compiler.test.ts +0 -392
- package/src/__tests__/ride-shotgun-handler.test.ts +0 -452
- package/src/__tests__/session-conflict-gate.test.ts +0 -1228
- package/src/__tests__/session-profile-injection.test.ts +0 -557
- package/src/cli/commands/dev.ts +0 -129
- package/src/cli/commands/map.ts +0 -391
- package/src/cli/commands/oauth.ts +0 -77
- package/src/config/bundled-skills/knowledge-graph/SKILL.md +0 -25
- package/src/config/bundled-skills/knowledge-graph/TOOLS.json +0 -66
- package/src/config/bundled-skills/knowledge-graph/tools/graph-query.ts +0 -211
- package/src/daemon/computer-use-session.ts +0 -1026
- package/src/daemon/ride-shotgun-handler.ts +0 -569
- package/src/daemon/session-conflict-gate.ts +0 -167
- package/src/daemon/session-dynamic-profile.ts +0 -77
- package/src/memory/clarification-resolver.ts +0 -417
- package/src/memory/conflict-intent.ts +0 -205
- package/src/memory/conflict-policy.ts +0 -127
- package/src/memory/conflict-store.ts +0 -410
- package/src/memory/contradiction-checker.ts +0 -508
- package/src/memory/entity-extractor.ts +0 -535
- package/src/memory/format-recall.ts +0 -47
- package/src/memory/fts-reconciler.ts +0 -165
- package/src/memory/job-handlers/conflict.ts +0 -200
- package/src/memory/profile-compiler.ts +0 -195
- package/src/memory/recall-cache.ts +0 -117
- package/src/memory/search/entity.ts +0 -535
- package/src/memory/search/query-expansion.test.ts +0 -70
- package/src/memory/search/query-expansion.ts +0 -118
- package/src/oauth/provider-base-urls.ts +0 -21
- package/src/oauth/provider-profiles.ts +0 -192
- package/src/prompts/computer-use-prompt.ts +0 -98
- package/src/runtime/routes/computer-use-routes.ts +0 -641
- package/src/runtime/routes/mcp-routes.ts +0 -20
- package/src/runtime/telegram-streaming-delivery.test.ts +0 -729
- package/src/runtime/telegram-streaming-delivery.ts +0 -393
- package/src/tools/computer-use/request-computer-control.ts +0 -56
package/src/tools/skills/load.ts
CHANGED
|
@@ -9,6 +9,11 @@ import { loadSkillBySelector, loadSkillCatalog } from "../../config/skills.js";
|
|
|
9
9
|
import { RiskLevel } from "../../permissions/types.js";
|
|
10
10
|
import type { ToolDefinition } from "../../providers/types.js";
|
|
11
11
|
import {
|
|
12
|
+
autoInstallFromCatalog,
|
|
13
|
+
resolveCatalog,
|
|
14
|
+
} from "../../skills/catalog-install.js";
|
|
15
|
+
import {
|
|
16
|
+
collectAllMissing,
|
|
12
17
|
indexCatalogById,
|
|
13
18
|
validateIncludes,
|
|
14
19
|
} from "../../skills/include-graph.js";
|
|
@@ -137,7 +142,32 @@ export class SkillLoadTool implements Tool {
|
|
|
137
142
|
};
|
|
138
143
|
}
|
|
139
144
|
|
|
140
|
-
|
|
145
|
+
let loaded = loadSkillBySelector(selector);
|
|
146
|
+
|
|
147
|
+
// Auto-install from catalog if the skill isn't found locally
|
|
148
|
+
if (
|
|
149
|
+
!loaded.skill &&
|
|
150
|
+
(loaded.errorCode === "not_found" || loaded.errorCode === "empty_catalog")
|
|
151
|
+
) {
|
|
152
|
+
try {
|
|
153
|
+
const installed = await autoInstallFromCatalog(selector);
|
|
154
|
+
if (installed) {
|
|
155
|
+
log.info({ skillId: selector }, "Auto-installed skill from catalog");
|
|
156
|
+
loaded = loadSkillBySelector(selector);
|
|
157
|
+
}
|
|
158
|
+
} catch (err) {
|
|
159
|
+
const installError = err instanceof Error ? err.message : String(err);
|
|
160
|
+
log.warn(
|
|
161
|
+
{ err, skillId: selector },
|
|
162
|
+
"Auto-install from catalog failed",
|
|
163
|
+
);
|
|
164
|
+
return {
|
|
165
|
+
content: `Error: skill "${selector}" was found in the catalog but installation failed: ${installError}`,
|
|
166
|
+
isError: true,
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
141
171
|
if (!loaded.skill) {
|
|
142
172
|
return {
|
|
143
173
|
content: `Error: ${loaded.error ?? "Failed to load skill"}`,
|
|
@@ -160,10 +190,62 @@ export class SkillLoadTool implements Tool {
|
|
|
160
190
|
// Load catalog for include validation and child metadata output
|
|
161
191
|
let catalogIndex: Map<string, SkillSummary> | undefined;
|
|
162
192
|
if (skill.includes && skill.includes.length > 0) {
|
|
163
|
-
|
|
193
|
+
let catalog = loadSkillCatalog();
|
|
164
194
|
catalogIndex = indexCatalogById(catalog);
|
|
165
195
|
|
|
166
|
-
//
|
|
196
|
+
// Auto-install missing includes before validation (max 5 rounds for transitive deps)
|
|
197
|
+
// Defer catalog resolution until we confirm there are missing includes,
|
|
198
|
+
// then cache the result to avoid redundant network requests per dependency.
|
|
199
|
+
let remoteCatalog: Awaited<ReturnType<typeof resolveCatalog>> | undefined;
|
|
200
|
+
|
|
201
|
+
const MAX_INSTALL_ROUNDS = 5;
|
|
202
|
+
for (let round = 0; round < MAX_INSTALL_ROUNDS; round++) {
|
|
203
|
+
const missing = collectAllMissing(skill.id, catalogIndex);
|
|
204
|
+
if (missing.size === 0) break;
|
|
205
|
+
|
|
206
|
+
// Lazily resolve catalog on first round with missing includes
|
|
207
|
+
if (!remoteCatalog) {
|
|
208
|
+
try {
|
|
209
|
+
remoteCatalog = await resolveCatalog([...missing][0]);
|
|
210
|
+
} catch (err) {
|
|
211
|
+
log.warn(
|
|
212
|
+
{ err, skillId: skill.id },
|
|
213
|
+
"Failed to resolve catalog for include auto-install",
|
|
214
|
+
);
|
|
215
|
+
break;
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
let installedAny = false;
|
|
220
|
+
for (const missingId of missing) {
|
|
221
|
+
try {
|
|
222
|
+
const installed = await autoInstallFromCatalog(
|
|
223
|
+
missingId,
|
|
224
|
+
remoteCatalog,
|
|
225
|
+
);
|
|
226
|
+
if (installed) {
|
|
227
|
+
log.info(
|
|
228
|
+
{ skillId: missingId, parentSkillId: skill.id },
|
|
229
|
+
"Auto-installed missing include",
|
|
230
|
+
);
|
|
231
|
+
installedAny = true;
|
|
232
|
+
}
|
|
233
|
+
} catch (err) {
|
|
234
|
+
log.warn(
|
|
235
|
+
{ err, skillId: missingId },
|
|
236
|
+
"Failed to auto-install missing include",
|
|
237
|
+
);
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
if (!installedAny) break; // Nothing could be installed, stop trying
|
|
242
|
+
|
|
243
|
+
// Reload catalog to pick up newly installed skills
|
|
244
|
+
catalog = loadSkillCatalog();
|
|
245
|
+
catalogIndex = indexCatalogById(catalog);
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// Validate (fail-closed — catches genuinely missing deps + cycles)
|
|
167
249
|
const validation = validateIncludes(skill.id, catalogIndex);
|
|
168
250
|
if (!validation.ok) {
|
|
169
251
|
if (validation.error === "missing") {
|
|
@@ -24,18 +24,6 @@ export interface WatchSession {
|
|
|
24
24
|
timeoutHandle?: ReturnType<typeof setTimeout>;
|
|
25
25
|
/** Guards against concurrent generateSummary calls */
|
|
26
26
|
summaryInFlight?: boolean;
|
|
27
|
-
/** Whether this session was started via ride shotgun (no live commentary) */
|
|
28
|
-
isRideShotgun?: boolean;
|
|
29
|
-
/** Learn mode records network traffic alongside screen observations */
|
|
30
|
-
isLearnMode?: boolean;
|
|
31
|
-
/** Domain filter for network recording in learn mode */
|
|
32
|
-
targetDomain?: string;
|
|
33
|
-
/** Recording ID for learn mode sessions */
|
|
34
|
-
recordingId?: string;
|
|
35
|
-
/** Path where the learn recording was successfully saved (undefined if save failed) */
|
|
36
|
-
savedRecordingPath?: string;
|
|
37
|
-
/** Reason the learn-mode bootstrap failed (CDP launch vs recorder attach) */
|
|
38
|
-
bootstrapFailureReason?: string;
|
|
39
27
|
}
|
|
40
28
|
|
|
41
29
|
/** Module-level map of watch sessions keyed by watchId. */
|
package/src/util/logger.ts
CHANGED
|
@@ -12,11 +12,7 @@ import pino from "pino";
|
|
|
12
12
|
import type { PrettyOptions } from "pino-pretty";
|
|
13
13
|
import pinoPretty from "pino-pretty";
|
|
14
14
|
|
|
15
|
-
import {
|
|
16
|
-
getDebugMode,
|
|
17
|
-
getDebugStdoutLogs,
|
|
18
|
-
getLogStderr,
|
|
19
|
-
} from "../config/env-registry.js";
|
|
15
|
+
import { getDebugStdoutLogs } from "../config/env-registry.js";
|
|
20
16
|
import { logSerializers } from "./log-redact.js";
|
|
21
17
|
import { getLogPath } from "./platform.js";
|
|
22
18
|
|
|
@@ -110,31 +106,18 @@ function buildRotatingLogger(config: LogFileConfig): pino.Logger {
|
|
|
110
106
|
activeLogDate = today;
|
|
111
107
|
activeLogFileConfig = config;
|
|
112
108
|
|
|
113
|
-
const level = getDebugMode() ? "debug" : "info";
|
|
114
|
-
|
|
115
|
-
if (getDebugMode()) {
|
|
116
|
-
const prettyStream = pinoPretty(prettyOpts({ destination: 2 }));
|
|
117
|
-
return pino(
|
|
118
|
-
{ name: "assistant", level, serializers: logSerializers },
|
|
119
|
-
pino.multistream([
|
|
120
|
-
{ stream: fileStream, level: "info" as const },
|
|
121
|
-
{ stream: prettyStream, level: "debug" as const },
|
|
122
|
-
]),
|
|
123
|
-
);
|
|
124
|
-
}
|
|
125
|
-
|
|
126
109
|
// When stdout is not a TTY (e.g. desktop app redirects to a hatch log file),
|
|
127
110
|
// write to the rotating file only — the hatch log already captured early
|
|
128
111
|
// startup output and echoing pino output there is unnecessary duplication.
|
|
129
112
|
if (!process.stdout.isTTY) {
|
|
130
113
|
return pino(
|
|
131
|
-
{ name: "assistant", level, serializers: logSerializers },
|
|
114
|
+
{ name: "assistant", level: "info", serializers: logSerializers },
|
|
132
115
|
fileStream,
|
|
133
116
|
);
|
|
134
117
|
}
|
|
135
118
|
|
|
136
119
|
return pino(
|
|
137
|
-
{ name: "assistant", level, serializers: logSerializers },
|
|
120
|
+
{ name: "assistant", level: "info", serializers: logSerializers },
|
|
138
121
|
pino.multistream([
|
|
139
122
|
{ stream: fileStream, level: "info" as const },
|
|
140
123
|
{
|
|
@@ -173,13 +156,11 @@ function getRootLogger(): pino.Logger {
|
|
|
173
156
|
}
|
|
174
157
|
if (!rootLogger) {
|
|
175
158
|
const forceStderr =
|
|
176
|
-
process.env.BUN_TEST === "1" ||
|
|
177
|
-
process.env.NODE_ENV === "test" ||
|
|
178
|
-
getLogStderr();
|
|
159
|
+
process.env.BUN_TEST === "1" || process.env.NODE_ENV === "test";
|
|
179
160
|
if (forceStderr) {
|
|
180
161
|
rootLogger = pino(
|
|
181
162
|
{
|
|
182
|
-
level:
|
|
163
|
+
level: "info",
|
|
183
164
|
serializers: logSerializers,
|
|
184
165
|
},
|
|
185
166
|
pino.destination(2),
|
|
@@ -208,17 +189,7 @@ function getRootLogger(): pino.Logger {
|
|
|
208
189
|
prettyOpts({ destination: fileDest, colorize: false }),
|
|
209
190
|
);
|
|
210
191
|
|
|
211
|
-
if (
|
|
212
|
-
const prettyStream = pinoPretty(prettyOpts({ destination: 2 }));
|
|
213
|
-
const multi = pino.multistream([
|
|
214
|
-
{ stream: fileStream, level: "info" as const },
|
|
215
|
-
{ stream: prettyStream, level: "debug" as const },
|
|
216
|
-
]);
|
|
217
|
-
rootLogger = pino(
|
|
218
|
-
{ level: "debug", serializers: logSerializers },
|
|
219
|
-
multi,
|
|
220
|
-
);
|
|
221
|
-
} else if (getDebugStdoutLogs()) {
|
|
192
|
+
if (getDebugStdoutLogs()) {
|
|
222
193
|
rootLogger = pino(
|
|
223
194
|
{ level: "info", serializers: logSerializers },
|
|
224
195
|
pino.multistream([
|
|
@@ -238,7 +209,7 @@ function getRootLogger(): pino.Logger {
|
|
|
238
209
|
} catch {
|
|
239
210
|
rootLogger = pino(
|
|
240
211
|
{
|
|
241
|
-
level:
|
|
212
|
+
level: "info",
|
|
242
213
|
serializers: logSerializers,
|
|
243
214
|
},
|
|
244
215
|
pinoPretty(prettyOpts({ destination: 2 })),
|
|
@@ -248,11 +219,6 @@ function getRootLogger(): pino.Logger {
|
|
|
248
219
|
return rootLogger;
|
|
249
220
|
}
|
|
250
221
|
|
|
251
|
-
/** Returns true when VELLUM_DEBUG=1 is set. */
|
|
252
|
-
export function isDebug(): boolean {
|
|
253
|
-
return getDebugMode();
|
|
254
|
-
}
|
|
255
|
-
|
|
256
222
|
/**
|
|
257
223
|
* Truncate a string for debug logging. Returns the original if under maxLen,
|
|
258
224
|
* otherwise returns the first maxLen chars with a suffix indicating how much was cut.
|
package/src/util/platform.ts
CHANGED
|
@@ -8,13 +8,7 @@ import {
|
|
|
8
8
|
import { homedir } from "node:os";
|
|
9
9
|
import { join } from "node:path";
|
|
10
10
|
|
|
11
|
-
import {
|
|
12
|
-
getBaseDataDir,
|
|
13
|
-
getDaemonIosPairing,
|
|
14
|
-
getDaemonTcpEnabled,
|
|
15
|
-
getDaemonTcpHost,
|
|
16
|
-
getDaemonTcpPort,
|
|
17
|
-
} from "../config/env-registry.js";
|
|
11
|
+
import { getBaseDataDir } from "../config/env-registry.js";
|
|
18
12
|
|
|
19
13
|
export function isMacOS(): boolean {
|
|
20
14
|
return process.platform === "darwin";
|
|
@@ -245,39 +239,30 @@ export function getInterfacesDir(): string {
|
|
|
245
239
|
|
|
246
240
|
/**
|
|
247
241
|
* Returns the TCP port the daemon should listen on for iOS clients.
|
|
248
|
-
*
|
|
242
|
+
* Hardcoded default: 8765.
|
|
249
243
|
*/
|
|
250
244
|
export function getTCPPort(): number {
|
|
251
|
-
return
|
|
245
|
+
return 8765;
|
|
252
246
|
}
|
|
253
247
|
|
|
254
248
|
/**
|
|
255
249
|
* Returns whether the daemon TCP listener should be enabled.
|
|
256
|
-
*
|
|
257
|
-
*
|
|
258
|
-
* 2. Presence of the flag file ~/.vellum/tcp-enabled (exists → on)
|
|
259
|
-
* 3. Default: false
|
|
250
|
+
* Checks for the presence of the flag file ~/.vellum/tcp-enabled.
|
|
251
|
+
* Default: false.
|
|
260
252
|
*
|
|
261
253
|
* The flag-file check makes it easy to enable TCP in dev without restarting
|
|
262
254
|
* the shell: `touch ~/.vellum/tcp-enabled && kill -USR1 <daemon-pid>`.
|
|
263
|
-
* The macOS CLI (AssistantCli) also sets the env var for bundled-binary deployments.
|
|
264
255
|
*/
|
|
265
256
|
export function isTCPEnabled(): boolean {
|
|
266
|
-
const envValue = getDaemonTcpEnabled();
|
|
267
|
-
if (envValue !== undefined) return envValue;
|
|
268
257
|
return existsSync(join(getRootDir(), "tcp-enabled"));
|
|
269
258
|
}
|
|
270
259
|
|
|
271
260
|
/**
|
|
272
261
|
* Returns the hostname/address for the TCP listener.
|
|
273
|
-
*
|
|
274
|
-
*
|
|
275
|
-
* 2. If iOS pairing is enabled: '0.0.0.0' (LAN-accessible)
|
|
276
|
-
* 3. Default: '127.0.0.1' (localhost only)
|
|
262
|
+
* If iOS pairing is enabled (flag file): '0.0.0.0' (LAN-accessible).
|
|
263
|
+
* Default: '127.0.0.1' (localhost only).
|
|
277
264
|
*/
|
|
278
265
|
export function getTCPHost(): string {
|
|
279
|
-
const override = getDaemonTcpHost();
|
|
280
|
-
if (override) return override;
|
|
281
266
|
if (isIOSPairingEnabled()) return "0.0.0.0";
|
|
282
267
|
return "127.0.0.1";
|
|
283
268
|
}
|
|
@@ -288,17 +273,13 @@ export function getTCPHost(): string {
|
|
|
288
273
|
* instead of 127.0.0.1 (localhost only), making the daemon reachable
|
|
289
274
|
* from iOS devices on the same local network.
|
|
290
275
|
*
|
|
291
|
-
*
|
|
292
|
-
*
|
|
293
|
-
* 2. Presence of the flag file ~/.vellum/ios-pairing-enabled (exists → on)
|
|
294
|
-
* 3. Default: false
|
|
276
|
+
* Checks for the presence of the flag file ~/.vellum/ios-pairing-enabled.
|
|
277
|
+
* Default: false.
|
|
295
278
|
*
|
|
296
279
|
* This is separate from isTCPEnabled() — TCP can be enabled for localhost-only
|
|
297
280
|
* access without exposing the daemon to the LAN.
|
|
298
281
|
*/
|
|
299
282
|
export function isIOSPairingEnabled(): boolean {
|
|
300
|
-
const envValue = getDaemonIosPairing();
|
|
301
|
-
if (envValue !== undefined) return envValue;
|
|
302
283
|
return existsSync(join(getRootDir(), "ios-pairing-enabled"));
|
|
303
284
|
}
|
|
304
285
|
|
|
@@ -13,7 +13,8 @@ import {
|
|
|
13
13
|
import type { CalendarEvent } from "../../config/bundled-skills/google-calendar/types.js";
|
|
14
14
|
import type { OAuthConnection } from "../../oauth/connection.js";
|
|
15
15
|
import { resolveOAuthConnection } from "../../oauth/connection-resolver.js";
|
|
16
|
-
|
|
16
|
+
|
|
17
|
+
const GOOGLE_CALENDAR_BASE_URL = "https://www.googleapis.com/calendar/v3";
|
|
17
18
|
import { getLogger } from "../../util/logger.js";
|
|
18
19
|
import type {
|
|
19
20
|
FetchResult,
|
|
@@ -1,193 +0,0 @@
|
|
|
1
|
-
import { beforeEach, describe, expect, mock, test } from "bun:test";
|
|
2
|
-
|
|
3
|
-
let llmCallCount = 0;
|
|
4
|
-
let llmDelayMs = 0;
|
|
5
|
-
let llmResolution:
|
|
6
|
-
| "keep_existing"
|
|
7
|
-
| "keep_candidate"
|
|
8
|
-
| "merge"
|
|
9
|
-
| "still_unclear" = "still_unclear";
|
|
10
|
-
let llmResolvedStatement = "";
|
|
11
|
-
let llmExplanation = "Unclear response from user.";
|
|
12
|
-
|
|
13
|
-
mock.module("../providers/provider-send-message.js", () => ({
|
|
14
|
-
getConfiguredProvider: () => ({
|
|
15
|
-
sendMessage: async (
|
|
16
|
-
_messages: unknown,
|
|
17
|
-
_tools: unknown,
|
|
18
|
-
_system: unknown,
|
|
19
|
-
opts?: { signal?: AbortSignal },
|
|
20
|
-
) => {
|
|
21
|
-
llmCallCount += 1;
|
|
22
|
-
if (llmDelayMs > 0) {
|
|
23
|
-
await new Promise((resolve, reject) => {
|
|
24
|
-
const timer = setTimeout(resolve, llmDelayMs);
|
|
25
|
-
opts?.signal?.addEventListener("abort", () => {
|
|
26
|
-
clearTimeout(timer);
|
|
27
|
-
reject(new Error("Request was aborted."));
|
|
28
|
-
});
|
|
29
|
-
});
|
|
30
|
-
}
|
|
31
|
-
return {
|
|
32
|
-
content: [
|
|
33
|
-
{
|
|
34
|
-
type: "tool_use" as const,
|
|
35
|
-
id: "test-tool-use-id",
|
|
36
|
-
name: "resolve_conflict",
|
|
37
|
-
input: {
|
|
38
|
-
resolution: llmResolution,
|
|
39
|
-
resolved_statement: llmResolvedStatement,
|
|
40
|
-
explanation: llmExplanation,
|
|
41
|
-
},
|
|
42
|
-
},
|
|
43
|
-
],
|
|
44
|
-
model: "claude-haiku-4-5-20251001",
|
|
45
|
-
stopReason: "tool_use",
|
|
46
|
-
usage: { inputTokens: 0, outputTokens: 0 },
|
|
47
|
-
};
|
|
48
|
-
},
|
|
49
|
-
}),
|
|
50
|
-
createTimeout: (ms: number) => {
|
|
51
|
-
const controller = new AbortController();
|
|
52
|
-
const timer = setTimeout(() => controller.abort(), ms);
|
|
53
|
-
return {
|
|
54
|
-
signal: controller.signal,
|
|
55
|
-
cleanup: () => clearTimeout(timer),
|
|
56
|
-
};
|
|
57
|
-
},
|
|
58
|
-
extractToolUse: (response: { content: Array<{ type: string }> }) => {
|
|
59
|
-
return response.content.find(
|
|
60
|
-
(b: { type: string }) => b.type === "tool_use",
|
|
61
|
-
);
|
|
62
|
-
},
|
|
63
|
-
userMessage: (text: string) => ({
|
|
64
|
-
role: "user",
|
|
65
|
-
content: [{ type: "text", text }],
|
|
66
|
-
}),
|
|
67
|
-
}));
|
|
68
|
-
|
|
69
|
-
mock.module("../config/loader.js", () => ({
|
|
70
|
-
getConfig: () => ({
|
|
71
|
-
ui: {},
|
|
72
|
-
|
|
73
|
-
apiKeys: {
|
|
74
|
-
anthropic: "test-key",
|
|
75
|
-
},
|
|
76
|
-
}),
|
|
77
|
-
}));
|
|
78
|
-
|
|
79
|
-
import { resolveConflictClarification } from "../memory/clarification-resolver.js";
|
|
80
|
-
|
|
81
|
-
beforeEach(() => {
|
|
82
|
-
llmCallCount = 0;
|
|
83
|
-
llmDelayMs = 0;
|
|
84
|
-
llmResolution = "still_unclear";
|
|
85
|
-
llmResolvedStatement = "";
|
|
86
|
-
llmExplanation = "Unclear response from user.";
|
|
87
|
-
});
|
|
88
|
-
|
|
89
|
-
describe("resolveConflictClarification", () => {
|
|
90
|
-
test("returns keep_existing from deterministic heuristic", async () => {
|
|
91
|
-
const result = await resolveConflictClarification({
|
|
92
|
-
existingStatement: "Use React for frontend work.",
|
|
93
|
-
candidateStatement: "Use Vue for frontend work.",
|
|
94
|
-
userMessage: "Keep the old React preference.",
|
|
95
|
-
});
|
|
96
|
-
|
|
97
|
-
expect(result.resolution).toBe("keep_existing");
|
|
98
|
-
expect(result.strategy).toBe("heuristic");
|
|
99
|
-
expect(llmCallCount).toBe(0);
|
|
100
|
-
});
|
|
101
|
-
|
|
102
|
-
test("returns keep_candidate from deterministic heuristic", async () => {
|
|
103
|
-
const result = await resolveConflictClarification({
|
|
104
|
-
existingStatement: "Use React for frontend work.",
|
|
105
|
-
candidateStatement: "Use Vue for frontend work.",
|
|
106
|
-
userMessage: "Use the new Vue note going forward.",
|
|
107
|
-
});
|
|
108
|
-
|
|
109
|
-
expect(result.resolution).toBe("keep_candidate");
|
|
110
|
-
expect(result.strategy).toBe("heuristic");
|
|
111
|
-
expect(llmCallCount).toBe(0);
|
|
112
|
-
});
|
|
113
|
-
|
|
114
|
-
test("returns merge from deterministic heuristic", async () => {
|
|
115
|
-
const result = await resolveConflictClarification({
|
|
116
|
-
existingStatement: "React is preferred for dashboards.",
|
|
117
|
-
candidateStatement: "Vue is preferred for marketing pages.",
|
|
118
|
-
userMessage:
|
|
119
|
-
"Both are true: React for dashboards and Vue for marketing pages.",
|
|
120
|
-
});
|
|
121
|
-
|
|
122
|
-
expect(result.resolution).toBe("merge");
|
|
123
|
-
expect(result.strategy).toBe("heuristic");
|
|
124
|
-
expect(result.resolvedStatement).toContain("Both are true");
|
|
125
|
-
expect(llmCallCount).toBe(0);
|
|
126
|
-
});
|
|
127
|
-
|
|
128
|
-
test("uses LLM fallback when heuristics are inconclusive", async () => {
|
|
129
|
-
llmResolution = "still_unclear";
|
|
130
|
-
llmExplanation = "The user message does not pick a side.";
|
|
131
|
-
|
|
132
|
-
const result = await resolveConflictClarification({
|
|
133
|
-
existingStatement: "Use React for frontend work.",
|
|
134
|
-
candidateStatement: "Use Vue for frontend work.",
|
|
135
|
-
userMessage: "Not sure yet.",
|
|
136
|
-
});
|
|
137
|
-
|
|
138
|
-
expect(result.resolution).toBe("still_unclear");
|
|
139
|
-
expect(result.strategy).toBe("llm");
|
|
140
|
-
expect(llmCallCount).toBe(1);
|
|
141
|
-
});
|
|
142
|
-
|
|
143
|
-
test("does not match cue substrings inside unrelated words", async () => {
|
|
144
|
-
llmResolution = "keep_candidate";
|
|
145
|
-
llmExplanation = "User wants Vue.";
|
|
146
|
-
|
|
147
|
-
// "told" contains "old" as a substring but not as a whole word
|
|
148
|
-
const result = await resolveConflictClarification({
|
|
149
|
-
existingStatement: "Use React for frontend work.",
|
|
150
|
-
candidateStatement: "Use Vue for frontend work.",
|
|
151
|
-
userMessage: "I told you, use Vue.",
|
|
152
|
-
});
|
|
153
|
-
|
|
154
|
-
expect(result.resolution).toBe("keep_candidate");
|
|
155
|
-
expect(result.strategy).toBe("llm");
|
|
156
|
-
expect(llmCallCount).toBe(1);
|
|
157
|
-
});
|
|
158
|
-
|
|
159
|
-
test("delegates to LLM when multiple cue categories match", async () => {
|
|
160
|
-
llmResolution = "keep_existing";
|
|
161
|
-
llmExplanation = "User wants the old one.";
|
|
162
|
-
|
|
163
|
-
// "either" is a merge cue, "old" is an existing cue — ambiguous
|
|
164
|
-
const result = await resolveConflictClarification({
|
|
165
|
-
existingStatement: "Use React for frontend work.",
|
|
166
|
-
candidateStatement: "Use Vue for frontend work.",
|
|
167
|
-
userMessage: "I don't want either, keep the old one.",
|
|
168
|
-
});
|
|
169
|
-
|
|
170
|
-
expect(result.resolution).toBe("keep_existing");
|
|
171
|
-
expect(result.strategy).toBe("llm");
|
|
172
|
-
expect(llmCallCount).toBe(1);
|
|
173
|
-
});
|
|
174
|
-
|
|
175
|
-
test("enforces timeout bound on LLM fallback", async () => {
|
|
176
|
-
llmResolution = "keep_candidate";
|
|
177
|
-
llmExplanation = "Prefer the newer statement.";
|
|
178
|
-
llmDelayMs = 50;
|
|
179
|
-
|
|
180
|
-
const result = await resolveConflictClarification(
|
|
181
|
-
{
|
|
182
|
-
existingStatement: "Use React for frontend work.",
|
|
183
|
-
candidateStatement: "Use Vue for frontend work.",
|
|
184
|
-
userMessage: "I cannot decide right now.",
|
|
185
|
-
},
|
|
186
|
-
{ timeoutMs: 5 },
|
|
187
|
-
);
|
|
188
|
-
|
|
189
|
-
expect(result.resolution).toBe("still_unclear");
|
|
190
|
-
expect(result.strategy).toBe("llm_timeout");
|
|
191
|
-
expect(llmCallCount).toBe(1);
|
|
192
|
-
});
|
|
193
|
-
});
|
|
@@ -1,143 +0,0 @@
|
|
|
1
|
-
import { describe, expect, test } from "bun:test";
|
|
2
|
-
|
|
3
|
-
import { ComputerUseSession } from "../daemon/computer-use-session.js";
|
|
4
|
-
import type { Message } from "../providers/types.js";
|
|
5
|
-
|
|
6
|
-
/**
|
|
7
|
-
* Helper to create a user message with a tool_result block containing
|
|
8
|
-
* an AX tree wrapped in markers.
|
|
9
|
-
*/
|
|
10
|
-
function toolResultMsg(content: string): Message {
|
|
11
|
-
return {
|
|
12
|
-
role: "user",
|
|
13
|
-
content: [
|
|
14
|
-
{
|
|
15
|
-
type: "tool_result",
|
|
16
|
-
tool_use_id: "test-id",
|
|
17
|
-
content,
|
|
18
|
-
},
|
|
19
|
-
],
|
|
20
|
-
};
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
describe("ComputerUseSession.escapeAxTreeContent", () => {
|
|
24
|
-
test("escapes a literal closing tag in the content", () => {
|
|
25
|
-
const input = "some text </ax-tree> more text";
|
|
26
|
-
const escaped = ComputerUseSession.escapeAxTreeContent(input);
|
|
27
|
-
expect(escaped).toBe("some text </ax-tree> more text");
|
|
28
|
-
});
|
|
29
|
-
|
|
30
|
-
test("escapes multiple occurrences", () => {
|
|
31
|
-
const input = "</ax-tree> hello </ax-tree>";
|
|
32
|
-
const escaped = ComputerUseSession.escapeAxTreeContent(input);
|
|
33
|
-
expect(escaped).toBe("</ax-tree> hello </ax-tree>");
|
|
34
|
-
});
|
|
35
|
-
|
|
36
|
-
test("is case-insensitive", () => {
|
|
37
|
-
const input = "</AX-TREE> and </Ax-Tree>";
|
|
38
|
-
const escaped = ComputerUseSession.escapeAxTreeContent(input);
|
|
39
|
-
expect(escaped).toBe("</ax-tree> and </ax-tree>");
|
|
40
|
-
});
|
|
41
|
-
|
|
42
|
-
test("leaves content without closing tags unchanged", () => {
|
|
43
|
-
const input = 'Window "My App" [1]\n Button "OK" [2]';
|
|
44
|
-
expect(ComputerUseSession.escapeAxTreeContent(input)).toBe(input);
|
|
45
|
-
});
|
|
46
|
-
});
|
|
47
|
-
|
|
48
|
-
describe("ComputerUseSession.compactHistory", () => {
|
|
49
|
-
test("[experimental] strips old AX trees and keeps the most recent ones", () => {
|
|
50
|
-
const messages: Message[] = [
|
|
51
|
-
{ role: "assistant", content: [{ type: "text", text: "thinking..." }] },
|
|
52
|
-
toolResultMsg(
|
|
53
|
-
'<ax-tree>CURRENT SCREEN STATE:\nWindow "App" [1]</ax-tree>',
|
|
54
|
-
),
|
|
55
|
-
{ role: "assistant", content: [{ type: "text", text: "action 1" }] },
|
|
56
|
-
toolResultMsg(
|
|
57
|
-
'<ax-tree>CURRENT SCREEN STATE:\nWindow "App" [2]</ax-tree>',
|
|
58
|
-
),
|
|
59
|
-
{ role: "assistant", content: [{ type: "text", text: "action 2" }] },
|
|
60
|
-
toolResultMsg(
|
|
61
|
-
'<ax-tree>CURRENT SCREEN STATE:\nWindow "App" [3]</ax-tree>',
|
|
62
|
-
),
|
|
63
|
-
];
|
|
64
|
-
|
|
65
|
-
const compacted = ComputerUseSession.compactHistory(messages);
|
|
66
|
-
|
|
67
|
-
// First AX tree (index 1) should be stripped
|
|
68
|
-
const firstToolResult = compacted[1].content[0];
|
|
69
|
-
expect(firstToolResult.type).toBe("tool_result");
|
|
70
|
-
if (firstToolResult.type === "tool_result") {
|
|
71
|
-
expect(firstToolResult.content).toContain("<ax_tree_omitted />");
|
|
72
|
-
expect(firstToolResult.content).not.toContain("<ax-tree>");
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
// Last two AX trees should be preserved
|
|
76
|
-
const secondToolResult = compacted[3].content[0];
|
|
77
|
-
if (secondToolResult.type === "tool_result") {
|
|
78
|
-
expect(secondToolResult.content).toContain("<ax-tree>");
|
|
79
|
-
}
|
|
80
|
-
const thirdToolResult = compacted[5].content[0];
|
|
81
|
-
if (thirdToolResult.type === "tool_result") {
|
|
82
|
-
expect(thirdToolResult.content).toContain("<ax-tree>");
|
|
83
|
-
}
|
|
84
|
-
});
|
|
85
|
-
|
|
86
|
-
test("[experimental] handles AX tree content containing literal </ax-tree> (escaped)", () => {
|
|
87
|
-
// Simulate content where the AX tree text includes an escaped closing tag,
|
|
88
|
-
// e.g. user is viewing XML source code with "</ax-tree>" in it.
|
|
89
|
-
const escapedContent =
|
|
90
|
-
'<ax-tree>CURRENT SCREEN STATE:\nTextArea "editor" [1]\n ' +
|
|
91
|
-
"Line: </ax-tree> some xml\n</ax-tree>";
|
|
92
|
-
|
|
93
|
-
const messages: Message[] = [
|
|
94
|
-
{ role: "assistant", content: [{ type: "text", text: "action 0" }] },
|
|
95
|
-
toolResultMsg(escapedContent),
|
|
96
|
-
{ role: "assistant", content: [{ type: "text", text: "action 1" }] },
|
|
97
|
-
toolResultMsg(escapedContent),
|
|
98
|
-
{ role: "assistant", content: [{ type: "text", text: "action 2" }] },
|
|
99
|
-
toolResultMsg(
|
|
100
|
-
'<ax-tree>CURRENT SCREEN STATE:\nWindow "App" [3]</ax-tree>',
|
|
101
|
-
),
|
|
102
|
-
];
|
|
103
|
-
|
|
104
|
-
const compacted = ComputerUseSession.compactHistory(messages);
|
|
105
|
-
|
|
106
|
-
// The first message with escaped content should be fully stripped
|
|
107
|
-
const firstToolResult = compacted[1].content[0];
|
|
108
|
-
if (firstToolResult.type === "tool_result") {
|
|
109
|
-
expect(firstToolResult.content).not.toContain("<ax-tree>");
|
|
110
|
-
expect(firstToolResult.content).toContain("<ax_tree_omitted />");
|
|
111
|
-
}
|
|
112
|
-
});
|
|
113
|
-
|
|
114
|
-
test("regex fails on unescaped </ax-tree> inside content (demonstrating the bug)", () => {
|
|
115
|
-
// This test demonstrates what happens WITHOUT escaping: the regex
|
|
116
|
-
// only partially removes the AX tree block.
|
|
117
|
-
const unescapedContent =
|
|
118
|
-
'<ax-tree>CURRENT SCREEN STATE:\nTextArea "editor" [1]\n ' +
|
|
119
|
-
"Line: </ax-tree> some xml leftover\n</ax-tree>";
|
|
120
|
-
|
|
121
|
-
const messages: Message[] = [
|
|
122
|
-
{ role: "assistant", content: [{ type: "text", text: "action 0" }] },
|
|
123
|
-
toolResultMsg(unescapedContent),
|
|
124
|
-
{ role: "assistant", content: [{ type: "text", text: "action 1" }] },
|
|
125
|
-
toolResultMsg(unescapedContent),
|
|
126
|
-
{ role: "assistant", content: [{ type: "text", text: "action 2" }] },
|
|
127
|
-
toolResultMsg(
|
|
128
|
-
'<ax-tree>CURRENT SCREEN STATE:\nWindow "App" [3]</ax-tree>',
|
|
129
|
-
),
|
|
130
|
-
];
|
|
131
|
-
|
|
132
|
-
const compacted = ComputerUseSession.compactHistory(messages);
|
|
133
|
-
|
|
134
|
-
// Without escaping, the first tool result has leftover content after
|
|
135
|
-
// the regex only matched up to the FIRST </ax-tree>.
|
|
136
|
-
const firstToolResult = compacted[1].content[0];
|
|
137
|
-
if (firstToolResult.type === "tool_result") {
|
|
138
|
-
// The non-greedy regex stops at the first </ax-tree>, leaving
|
|
139
|
-
// " some xml leftover\n</ax-tree>" behind.
|
|
140
|
-
expect(firstToolResult.content).toContain("some xml leftover");
|
|
141
|
-
}
|
|
142
|
-
});
|
|
143
|
-
});
|