@vellumai/assistant 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.dockerignore +27 -0
- package/.env.example +22 -0
- package/Dockerfile +99 -0
- package/Dockerfile.sandbox +5 -0
- package/README.md +248 -0
- package/bun.lock +1723 -0
- package/bunfig.toml +2 -0
- package/docs/skills.md +158 -0
- package/drizzle/0000_dizzy_maggott.sql +301 -0
- package/drizzle/meta/0000_snapshot.json +1999 -0
- package/drizzle/meta/_journal.json +13 -0
- package/drizzle.config.ts +7 -0
- package/eslint.config.mjs +17 -0
- package/hook-templates/debug-prompt-logger/hook.json +7 -0
- package/hook-templates/debug-prompt-logger/run.sh +68 -0
- package/knip.json +9 -0
- package/package.json +70 -0
- package/scripts/capture-x-graphql.ts +545 -0
- package/scripts/ipc/check-contract-inventory.ts +104 -0
- package/scripts/ipc/check-swift-decoder-drift.ts +166 -0
- package/scripts/ipc/generate-swift.ts +492 -0
- package/scripts/test-filesystem-tools.sh +48 -0
- package/scripts/test.sh +127 -0
- package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +2485 -0
- package/src/__tests__/account-registry.test.ts +245 -0
- package/src/__tests__/active-skill-tools.test.ts +378 -0
- package/src/__tests__/agent-heartbeat-service.test.ts +250 -0
- package/src/__tests__/agent-loop-thinking.test.ts +81 -0
- package/src/__tests__/agent-loop.test.ts +1135 -0
- package/src/__tests__/anthropic-provider.test.ts +778 -0
- package/src/__tests__/app-builder-tool-scripts.test.ts +290 -0
- package/src/__tests__/app-bundler.test.ts +292 -0
- package/src/__tests__/app-executors.test.ts +613 -0
- package/src/__tests__/app-git-history.test.ts +176 -0
- package/src/__tests__/app-git-service.test.ts +169 -0
- package/src/__tests__/app-open-proxy.test.ts +62 -0
- package/src/__tests__/asset-materialize-tool.test.ts +452 -0
- package/src/__tests__/asset-search-tool.test.ts +477 -0
- package/src/__tests__/assistant-attachment-directive.test.ts +401 -0
- package/src/__tests__/assistant-attachments.test.ts +437 -0
- package/src/__tests__/assistant-event-hub.test.ts +226 -0
- package/src/__tests__/assistant-event.test.ts +123 -0
- package/src/__tests__/assistant-events-sse-hardening.test.ts +315 -0
- package/src/__tests__/attachments-store.test.ts +476 -0
- package/src/__tests__/attachments.test.ts +134 -0
- package/src/__tests__/audit-log-rotation.test.ts +154 -0
- package/src/__tests__/browser-fill-credential.test.ts +309 -0
- package/src/__tests__/browser-manager.test.ts +203 -0
- package/src/__tests__/browser-runtime-check.test.ts +55 -0
- package/src/__tests__/browser-skill-baseline-tool-payload.test.ts +68 -0
- package/src/__tests__/browser-skill-endstate.test.ts +195 -0
- package/src/__tests__/bundle-scanner.test.ts +313 -0
- package/src/__tests__/call-bridge.test.ts +517 -0
- package/src/__tests__/call-constants.test.ts +40 -0
- package/src/__tests__/call-domain.test.ts +163 -0
- package/src/__tests__/call-orchestrator.test.ts +625 -0
- package/src/__tests__/call-recovery.test.ts +518 -0
- package/src/__tests__/call-routes-http.test.ts +699 -0
- package/src/__tests__/call-state-machine.test.ts +143 -0
- package/src/__tests__/call-state.test.ts +174 -0
- package/src/__tests__/call-store.test.ts +691 -0
- package/src/__tests__/channel-approval-routes.test.ts +2356 -0
- package/src/__tests__/channel-approval.test.ts +299 -0
- package/src/__tests__/channel-approvals.test.ts +521 -0
- package/src/__tests__/channel-delivery-store.test.ts +447 -0
- package/src/__tests__/channel-guardian.test.ts +1005 -0
- package/src/__tests__/checker.test.ts +3519 -0
- package/src/__tests__/clarification-resolver.test.ts +159 -0
- package/src/__tests__/classifier.test.ts +67 -0
- package/src/__tests__/claude-code-skill-regression.test.ts +127 -0
- package/src/__tests__/claude-code-tool-profiles.test.ts +88 -0
- package/src/__tests__/cli-discover.test.ts +85 -0
- package/src/__tests__/cli.test.ts +26 -0
- package/src/__tests__/clipboard.test.ts +80 -0
- package/src/__tests__/commit-guarantee.test.ts +335 -0
- package/src/__tests__/commit-message-enrichment-service.test.ts +550 -0
- package/src/__tests__/compaction.benchmark.test.ts +176 -0
- package/src/__tests__/computer-use-session-compaction.test.ts +132 -0
- package/src/__tests__/computer-use-session-lifecycle.test.ts +293 -0
- package/src/__tests__/computer-use-session-working-dir.test.ts +117 -0
- package/src/__tests__/computer-use-skill-baseline.test.ts +74 -0
- package/src/__tests__/computer-use-skill-endstate.test.ts +89 -0
- package/src/__tests__/computer-use-skill-lifecycle-cleanup.test.ts +217 -0
- package/src/__tests__/computer-use-skill-manifest-regression.test.ts +107 -0
- package/src/__tests__/computer-use-skill-proxy-bridge.test.ts +54 -0
- package/src/__tests__/computer-use-tools.test.ts +250 -0
- package/src/__tests__/config-schema.test.ts +1462 -0
- package/src/__tests__/conflict-intent-tokenization.test.ts +141 -0
- package/src/__tests__/conflict-policy.test.ts +121 -0
- package/src/__tests__/conflict-store.test.ts +332 -0
- package/src/__tests__/connection-policy.test.ts +102 -0
- package/src/__tests__/contacts-tools.test.ts +331 -0
- package/src/__tests__/context-memory-e2e.test.ts +434 -0
- package/src/__tests__/context-token-estimator.test.ts +135 -0
- package/src/__tests__/context-window-manager.test.ts +376 -0
- package/src/__tests__/contradiction-checker.test.ts +314 -0
- package/src/__tests__/conversation-store.test.ts +612 -0
- package/src/__tests__/credential-broker-browser-fill.test.ts +517 -0
- package/src/__tests__/credential-broker-server-use.test.ts +554 -0
- package/src/__tests__/credential-broker.test.ts +167 -0
- package/src/__tests__/credential-host-pattern-match.test.ts +104 -0
- package/src/__tests__/credential-metadata-store.test.ts +779 -0
- package/src/__tests__/credential-policy-validate.test.ts +121 -0
- package/src/__tests__/credential-resolve.test.ts +328 -0
- package/src/__tests__/credential-security-e2e.test.ts +352 -0
- package/src/__tests__/credential-security-invariants.test.ts +583 -0
- package/src/__tests__/credential-selection.test.ts +354 -0
- package/src/__tests__/credential-vault-unit.test.ts +780 -0
- package/src/__tests__/credential-vault.test.ts +852 -0
- package/src/__tests__/daemon-assistant-events.test.ts +164 -0
- package/src/__tests__/daemon-server-session-init.test.ts +522 -0
- package/src/__tests__/date-context.test.ts +373 -0
- package/src/__tests__/db-schedule-syntax-migration.test.ts +129 -0
- package/src/__tests__/delete-managed-skill-tool.test.ts +97 -0
- package/src/__tests__/diff.test.ts +121 -0
- package/src/__tests__/domain-normalize.test.ts +112 -0
- package/src/__tests__/domain-policy.test.ts +124 -0
- package/src/__tests__/doordash-client.test.ts +186 -0
- package/src/__tests__/doordash-session.test.ts +152 -0
- package/src/__tests__/dynamic-page-surface.test.ts +91 -0
- package/src/__tests__/dynamic-skill-workflow-prompt.test.ts +132 -0
- package/src/__tests__/edit-engine.test.ts +180 -0
- package/src/__tests__/elevenlabs-client.test.ts +271 -0
- package/src/__tests__/email-cli.test.ts +283 -0
- package/src/__tests__/encrypted-store.test.ts +332 -0
- package/src/__tests__/entity-extractor.test.ts +190 -0
- package/src/__tests__/ephemeral-permissions.test.ts +362 -0
- package/src/__tests__/evaluate-typescript-tool.test.ts +286 -0
- package/src/__tests__/event-bus.test.ts +222 -0
- package/src/__tests__/file-edit-tool.test.ts +122 -0
- package/src/__tests__/file-ops-service.test.ts +330 -0
- package/src/__tests__/file-read-tool.test.ts +75 -0
- package/src/__tests__/file-write-tool.test.ts +113 -0
- package/src/__tests__/filesystem-tools.test.ts +579 -0
- package/src/__tests__/fixtures/credential-security-fixtures.ts +181 -0
- package/src/__tests__/fixtures/media-reuse-fixtures.ts +126 -0
- package/src/__tests__/fixtures/mock-signup-server.ts +387 -0
- package/src/__tests__/fixtures/proxy-fixtures.ts +147 -0
- package/src/__tests__/followup-tools.test.ts +303 -0
- package/src/__tests__/forbidden-legacy-symbols.test.ts +71 -0
- package/src/__tests__/fuzzy-match-property.test.ts +216 -0
- package/src/__tests__/fuzzy-match.test.ts +138 -0
- package/src/__tests__/gateway-only-enforcement.test.ts +631 -0
- package/src/__tests__/gemini-image-service.test.ts +261 -0
- package/src/__tests__/gemini-provider.test.ts +651 -0
- package/src/__tests__/get-weather.test.ts +318 -0
- package/src/__tests__/gmail-integration.test.ts +73 -0
- package/src/__tests__/handlers-add-trust-rule-metadata.test.ts +202 -0
- package/src/__tests__/handlers-cu-observation-blob.test.ts +352 -0
- package/src/__tests__/handlers-ipc-blob-probe.test.ts +191 -0
- package/src/__tests__/handlers-slack-config.test.ts +200 -0
- package/src/__tests__/handlers-task-submit-slash.test.ts +38 -0
- package/src/__tests__/handlers-telegram-config.test.ts +968 -0
- package/src/__tests__/handlers-twilio-config.test.ts +659 -0
- package/src/__tests__/handlers-twitter-config.test.ts +858 -0
- package/src/__tests__/headless-browser-interactions.test.ts +536 -0
- package/src/__tests__/headless-browser-navigate.test.ts +211 -0
- package/src/__tests__/headless-browser-read-tools.test.ts +261 -0
- package/src/__tests__/headless-browser-snapshot.test.ts +185 -0
- package/src/__tests__/history-repair-observability.test.ts +56 -0
- package/src/__tests__/history-repair.test.ts +510 -0
- package/src/__tests__/home-base-bootstrap.test.ts +82 -0
- package/src/__tests__/hooks-blocking.test.ts +128 -0
- package/src/__tests__/hooks-cli.test.ts +144 -0
- package/src/__tests__/hooks-config.test.ts +93 -0
- package/src/__tests__/hooks-discovery.test.ts +199 -0
- package/src/__tests__/hooks-integration.test.ts +189 -0
- package/src/__tests__/hooks-manager.test.ts +187 -0
- package/src/__tests__/hooks-runner.test.ts +182 -0
- package/src/__tests__/hooks-settings.test.ts +154 -0
- package/src/__tests__/hooks-templates.test.ts +137 -0
- package/src/__tests__/hooks-ts-runner.test.ts +125 -0
- package/src/__tests__/hooks-watch.test.ts +100 -0
- package/src/__tests__/host-file-edit-tool.test.ts +228 -0
- package/src/__tests__/host-file-read-tool.test.ts +123 -0
- package/src/__tests__/host-file-write-tool.test.ts +136 -0
- package/src/__tests__/host-shell-tool.test.ts +562 -0
- package/src/__tests__/ingress-reconcile.test.ts +581 -0
- package/src/__tests__/ingress-url-consistency.test.ts +214 -0
- package/src/__tests__/intent-routing.test.ts +259 -0
- package/src/__tests__/ipc-blob-store.test.ts +315 -0
- package/src/__tests__/ipc-contract-inventory.test.ts +54 -0
- package/src/__tests__/ipc-contract.test.ts +74 -0
- package/src/__tests__/ipc-protocol.test.ts +113 -0
- package/src/__tests__/ipc-roundtrip.benchmark.test.ts +237 -0
- package/src/__tests__/ipc-snapshot.test.ts +1769 -0
- package/src/__tests__/ipc-validate.test.ts +407 -0
- package/src/__tests__/key-migration.test.ts +206 -0
- package/src/__tests__/keychain.test.ts +258 -0
- package/src/__tests__/llm-usage-store.test.ts +221 -0
- package/src/__tests__/managed-skill-lifecycle.test.ts +257 -0
- package/src/__tests__/managed-store.test.ts +608 -0
- package/src/__tests__/media-generate-image.test.ts +238 -0
- package/src/__tests__/media-reuse-story.e2e.test.ts +676 -0
- package/src/__tests__/media-visibility-policy.test.ts +141 -0
- package/src/__tests__/memory-context-benchmark.benchmark.test.ts +235 -0
- package/src/__tests__/memory-lifecycle-e2e.test.ts +481 -0
- package/src/__tests__/memory-query-builder.test.ts +59 -0
- package/src/__tests__/memory-recall-quality.test.ts +846 -0
- package/src/__tests__/memory-regressions.experimental.test.ts +538 -0
- package/src/__tests__/memory-regressions.test.ts +4435 -0
- package/src/__tests__/memory-retrieval-budget.test.ts +49 -0
- package/src/__tests__/memory-retrieval.benchmark.test.ts +430 -0
- package/src/__tests__/migration-cli-flows.test.ts +169 -0
- package/src/__tests__/migration-ordering.test.ts +249 -0
- package/src/__tests__/mock-signup-server.test.ts +528 -0
- package/src/__tests__/oauth-callback-registry.test.ts +92 -0
- package/src/__tests__/oauth2-gateway-transport.test.ts +285 -0
- package/src/__tests__/onboarding-starter-tasks.test.ts +176 -0
- package/src/__tests__/onboarding-template-contract.test.ts +58 -0
- package/src/__tests__/openai-provider.test.ts +753 -0
- package/src/__tests__/parallel-tool.benchmark.test.ts +294 -0
- package/src/__tests__/parser.test.ts +472 -0
- package/src/__tests__/path-classifier.test.ts +73 -0
- package/src/__tests__/path-policy.test.ts +435 -0
- package/src/__tests__/platform-move-helper.test.ts +99 -0
- package/src/__tests__/platform-socket-path.test.ts +52 -0
- package/src/__tests__/platform-workspace-migration.test.ts +1000 -0
- package/src/__tests__/platform.test.ts +131 -0
- package/src/__tests__/playbook-execution.test.ts +502 -0
- package/src/__tests__/playbook-tools.test.ts +340 -0
- package/src/__tests__/prebuilt-home-base-seed.test.ts +75 -0
- package/src/__tests__/pricing.test.ts +256 -0
- package/src/__tests__/profile-compiler.test.ts +374 -0
- package/src/__tests__/provider-commit-message-generator.test.ts +342 -0
- package/src/__tests__/provider-registry-ollama.test.ts +16 -0
- package/src/__tests__/provider-streaming.benchmark.test.ts +773 -0
- package/src/__tests__/proxy-approval-callback.test.ts +601 -0
- package/src/__tests__/public-ingress-urls.test.ts +256 -0
- package/src/__tests__/qdrant-manager.test.ts +267 -0
- package/src/__tests__/ratelimit.test.ts +297 -0
- package/src/__tests__/recurrence-engine-rruleset.test.ts +175 -0
- package/src/__tests__/recurrence-engine.test.ts +78 -0
- package/src/__tests__/recurrence-types.test.ts +79 -0
- package/src/__tests__/registry.test.ts +494 -0
- package/src/__tests__/relay-server.test.ts +688 -0
- package/src/__tests__/reminder-store.test.ts +223 -0
- package/src/__tests__/reminder.test.ts +229 -0
- package/src/__tests__/request-file-tool.test.ts +158 -0
- package/src/__tests__/run-orchestrator-assistant-events.test.ts +227 -0
- package/src/__tests__/run-orchestrator.test.ts +425 -0
- package/src/__tests__/runtime-attachment-metadata.test.ts +189 -0
- package/src/__tests__/runtime-events-sse-parity.test.ts +343 -0
- package/src/__tests__/runtime-events-sse.test.ts +162 -0
- package/src/__tests__/runtime-runs-http.test.ts +438 -0
- package/src/__tests__/runtime-runs.test.ts +260 -0
- package/src/__tests__/sandbox-diagnostics.test.ts +408 -0
- package/src/__tests__/sandbox-host-parity.test.ts +950 -0
- package/src/__tests__/scaffold-managed-skill-tool.test.ts +253 -0
- package/src/__tests__/schedule-store.test.ts +484 -0
- package/src/__tests__/schedule-tools.test.ts +783 -0
- package/src/__tests__/scheduler-recurrence.test.ts +430 -0
- package/src/__tests__/script-proxy-certs.test.ts +90 -0
- package/src/__tests__/script-proxy-connect-tunnel.test.ts +177 -0
- package/src/__tests__/script-proxy-decision-trace.test.ts +156 -0
- package/src/__tests__/script-proxy-http-forwarder.test.ts +281 -0
- package/src/__tests__/script-proxy-injection-runtime.test.ts +401 -0
- package/src/__tests__/script-proxy-mitm-handler.test.ts +407 -0
- package/src/__tests__/script-proxy-policy-runtime.test.ts +287 -0
- package/src/__tests__/script-proxy-policy.test.ts +310 -0
- package/src/__tests__/script-proxy-rewrite-specificity.test.ts +135 -0
- package/src/__tests__/script-proxy-router.test.ts +180 -0
- package/src/__tests__/script-proxy-session-manager.test.ts +382 -0
- package/src/__tests__/script-proxy-session-runtime.test.ts +113 -0
- package/src/__tests__/secret-allowlist.test.ts +230 -0
- package/src/__tests__/secret-ingress-handler.test.ts +110 -0
- package/src/__tests__/secret-onetime-send.test.ts +130 -0
- package/src/__tests__/secret-prompt-log-hygiene.test.ts +106 -0
- package/src/__tests__/secret-response-routing.test.ts +93 -0
- package/src/__tests__/secret-scanner-executor.test.ts +348 -0
- package/src/__tests__/secret-scanner.test.ts +900 -0
- package/src/__tests__/secure-keys.test.ts +323 -0
- package/src/__tests__/server-history-render.test.ts +431 -0
- package/src/__tests__/session-abort-tool-results.test.ts +240 -0
- package/src/__tests__/session-conflict-gate.test.ts +1136 -0
- package/src/__tests__/session-error.test.ts +369 -0
- package/src/__tests__/session-evictor.test.ts +188 -0
- package/src/__tests__/session-init.benchmark.test.ts +465 -0
- package/src/__tests__/session-load-history-repair.test.ts +222 -0
- package/src/__tests__/session-pre-run-repair.test.ts +213 -0
- package/src/__tests__/session-process-bridge.test.ts +242 -0
- package/src/__tests__/session-profile-injection.test.ts +444 -0
- package/src/__tests__/session-provider-retry-repair.test.ts +306 -0
- package/src/__tests__/session-queue.test.ts +1535 -0
- package/src/__tests__/session-runtime-assembly.test.ts +476 -0
- package/src/__tests__/session-runtime-workspace.test.ts +183 -0
- package/src/__tests__/session-skill-tools.test.ts +2431 -0
- package/src/__tests__/session-slash-known.test.ts +368 -0
- package/src/__tests__/session-slash-queue.test.ts +288 -0
- package/src/__tests__/session-slash-unknown.test.ts +271 -0
- package/src/__tests__/session-surfaces-task-progress.test.ts +104 -0
- package/src/__tests__/session-tool-setup-app-refresh.test.ts +473 -0
- package/src/__tests__/session-tool-setup-memory-scope.test.ts +140 -0
- package/src/__tests__/session-tool-setup-side-effect-flag.test.ts +140 -0
- package/src/__tests__/session-undo.test.ts +75 -0
- package/src/__tests__/session-workspace-cache-state.test.ts +246 -0
- package/src/__tests__/session-workspace-injection.test.ts +327 -0
- package/src/__tests__/session-workspace-tool-tracking.test.ts +240 -0
- package/src/__tests__/shared-filesystem-errors.test.ts +78 -0
- package/src/__tests__/shell-credential-ref.test.ts +187 -0
- package/src/__tests__/shell-identity.test.ts +256 -0
- package/src/__tests__/shell-parser-fuzz.test.ts +544 -0
- package/src/__tests__/shell-parser-property.test.ts +433 -0
- package/src/__tests__/shell-tool-proxy-mode.test.ts +272 -0
- package/src/__tests__/signup-e2e.test.ts +353 -0
- package/src/__tests__/size-guard.test.ts +117 -0
- package/src/__tests__/skill-include-graph.test.ts +303 -0
- package/src/__tests__/skill-load-tool.test.ts +409 -0
- package/src/__tests__/skill-projection.benchmark.test.ts +338 -0
- package/src/__tests__/skill-script-runner-host.test.ts +489 -0
- package/src/__tests__/skill-script-runner-sandbox.test.ts +349 -0
- package/src/__tests__/skill-script-runner.test.ts +159 -0
- package/src/__tests__/skill-tool-factory.test.ts +252 -0
- package/src/__tests__/skill-tool-manifest.test.ts +658 -0
- package/src/__tests__/skill-version-hash.test.ts +182 -0
- package/src/__tests__/skills.test.ts +680 -0
- package/src/__tests__/slash-commands-catalog.test.ts +86 -0
- package/src/__tests__/slash-commands-parser.test.ts +119 -0
- package/src/__tests__/slash-commands-resolver.test.ts +193 -0
- package/src/__tests__/slash-commands-rewrite.test.ts +39 -0
- package/src/__tests__/speaker-identification.test.ts +52 -0
- package/src/__tests__/starter-bundle.test.ts +136 -0
- package/src/__tests__/starter-task-flow.test.ts +143 -0
- package/src/__tests__/subagent-manager-notify.test.ts +404 -0
- package/src/__tests__/subagent-tools.test.ts +801 -0
- package/src/__tests__/subagent-types.test.ts +78 -0
- package/src/__tests__/swarm-orchestrator.test.ts +428 -0
- package/src/__tests__/swarm-plan-validator.test.ts +330 -0
- package/src/__tests__/swarm-recursion.test.ts +165 -0
- package/src/__tests__/swarm-router-planner.test.ts +208 -0
- package/src/__tests__/swarm-session-integration.test.ts +274 -0
- package/src/__tests__/swarm-tool.test.ts +145 -0
- package/src/__tests__/swarm-worker-backend.test.ts +129 -0
- package/src/__tests__/swarm-worker-runner.test.ts +272 -0
- package/src/__tests__/system-prompt.test.ts +439 -0
- package/src/__tests__/task-compiler.test.ts +284 -0
- package/src/__tests__/task-management-tools.test.ts +936 -0
- package/src/__tests__/task-runner.test.ts +216 -0
- package/src/__tests__/task-scheduler.test.ts +217 -0
- package/src/__tests__/task-tools.test.ts +595 -0
- package/src/__tests__/terminal-sandbox-docker.test.ts +1064 -0
- package/src/__tests__/terminal-sandbox.integration.test.ts +178 -0
- package/src/__tests__/terminal-sandbox.test.ts +202 -0
- package/src/__tests__/terminal-tools.test.ts +840 -0
- package/src/__tests__/test-support/browser-skill-harness.ts +90 -0
- package/src/__tests__/test-support/computer-use-skill-harness.ts +45 -0
- package/src/__tests__/tool-audit-listener.test.ts +113 -0
- package/src/__tests__/tool-domain-event-publisher.test.ts +253 -0
- package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +500 -0
- package/src/__tests__/tool-executor-lifecycle-events.test.ts +516 -0
- package/src/__tests__/tool-executor-redaction.test.ts +289 -0
- package/src/__tests__/tool-executor-shell-integration.test.ts +301 -0
- package/src/__tests__/tool-executor.test.ts +1989 -0
- package/src/__tests__/tool-metrics-listener.test.ts +225 -0
- package/src/__tests__/tool-notification-listener.test.ts +49 -0
- package/src/__tests__/tool-permission-simulate-handler.test.ts +336 -0
- package/src/__tests__/tool-policy.test.ts +54 -0
- package/src/__tests__/tool-profiling-listener.test.ts +268 -0
- package/src/__tests__/tool-result-truncation.test.ts +217 -0
- package/src/__tests__/tool-trace-listener.test.ts +226 -0
- package/src/__tests__/top-level-renderer.test.ts +121 -0
- package/src/__tests__/top-level-scanner.test.ts +141 -0
- package/src/__tests__/trace-emitter.test.ts +173 -0
- package/src/__tests__/trust-store.test.ts +1605 -0
- package/src/__tests__/turn-commit.test.ts +554 -0
- package/src/__tests__/twilio-provider.test.ts +329 -0
- package/src/__tests__/twilio-routes-elevenlabs.test.ts +375 -0
- package/src/__tests__/twilio-routes-twiml.test.ts +127 -0
- package/src/__tests__/twilio-routes.test.ts +577 -0
- package/src/__tests__/twitter-auth-handler.test.ts +667 -0
- package/src/__tests__/twitter-cli-error-shaping.test.ts +208 -0
- package/src/__tests__/twitter-cli-routing.test.ts +252 -0
- package/src/__tests__/twitter-oauth-client.test.ts +209 -0
- package/src/__tests__/url-safety.test.ts +418 -0
- package/src/__tests__/view-image-tool.test.ts +217 -0
- package/src/__tests__/weather-skill-regression.test.ts +225 -0
- package/src/__tests__/web-fetch.test.ts +869 -0
- package/src/__tests__/web-search.test.ts +584 -0
- package/src/__tests__/workspace-git-service.test.ts +1153 -0
- package/src/__tests__/workspace-heartbeat-service.test.ts +486 -0
- package/src/__tests__/workspace-lifecycle.test.ts +292 -0
- package/src/__tests__/workspace-policy.test.ts +213 -0
- package/src/agent/attachments.ts +35 -0
- package/src/agent/loop.ts +500 -0
- package/src/agent/message-types.ts +17 -0
- package/src/agent-heartbeat/agent-heartbeat-service.ts +155 -0
- package/src/autonomy/autonomy-resolver.ts +60 -0
- package/src/autonomy/autonomy-store.ts +122 -0
- package/src/autonomy/disposition-mapper.ts +31 -0
- package/src/autonomy/index.ts +11 -0
- package/src/autonomy/types.ts +39 -0
- package/src/bundler/app-bundler.ts +295 -0
- package/src/bundler/bundle-scanner.ts +535 -0
- package/src/bundler/bundle-signer.ts +124 -0
- package/src/bundler/manifest.ts +21 -0
- package/src/bundler/signature-verifier.ts +184 -0
- package/src/calls/call-bridge.ts +168 -0
- package/src/calls/call-constants.ts +48 -0
- package/src/calls/call-domain.ts +430 -0
- package/src/calls/call-orchestrator.ts +498 -0
- package/src/calls/call-recovery.ts +207 -0
- package/src/calls/call-state-machine.ts +68 -0
- package/src/calls/call-state.ts +87 -0
- package/src/calls/call-store.ts +422 -0
- package/src/calls/elevenlabs-client.ts +97 -0
- package/src/calls/elevenlabs-config.ts +31 -0
- package/src/calls/relay-server.ts +390 -0
- package/src/calls/speaker-identification.ts +213 -0
- package/src/calls/twilio-config.ts +45 -0
- package/src/calls/twilio-provider.ts +263 -0
- package/src/calls/twilio-rest.ts +156 -0
- package/src/calls/twilio-routes.ts +311 -0
- package/src/calls/types.ts +39 -0
- package/src/calls/voice-provider.ts +14 -0
- package/src/calls/voice-quality.ts +114 -0
- package/src/cli/autonomy.ts +188 -0
- package/src/cli/config-commands.ts +334 -0
- package/src/cli/contacts.ts +149 -0
- package/src/cli/core-commands.ts +784 -0
- package/src/cli/doordash.ts +1055 -0
- package/src/cli/email-guardrails.ts +200 -0
- package/src/cli/email.ts +405 -0
- package/src/cli/ipc-client.ts +82 -0
- package/src/cli/main-screen.tsx +53 -0
- package/src/cli/map.ts +270 -0
- package/src/cli/twitter.ts +754 -0
- package/src/cli.ts +918 -0
- package/src/commands/__tests__/cc-command-registry.test.ts +319 -0
- package/src/commands/cc-command-registry.ts +209 -0
- package/src/config/bundled-skills/.gitkeep +0 -0
- package/src/config/bundled-skills/agentmail/SKILL.md +128 -0
- package/src/config/bundled-skills/agentmail/icon.svg +21 -0
- package/src/config/bundled-skills/app-builder/SKILL.md +1404 -0
- package/src/config/bundled-skills/app-builder/TOOLS.json +279 -0
- package/src/config/bundled-skills/app-builder/icon.svg +9 -0
- package/src/config/bundled-skills/app-builder/tools/app-create.ts +15 -0
- package/src/config/bundled-skills/app-builder/tools/app-delete.ts +10 -0
- package/src/config/bundled-skills/app-builder/tools/app-file-edit.ts +11 -0
- package/src/config/bundled-skills/app-builder/tools/app-file-list.ts +10 -0
- package/src/config/bundled-skills/app-builder/tools/app-file-read.ts +18 -0
- package/src/config/bundled-skills/app-builder/tools/app-file-write.ts +11 -0
- package/src/config/bundled-skills/app-builder/tools/app-list.ts +10 -0
- package/src/config/bundled-skills/app-builder/tools/app-query.ts +10 -0
- package/src/config/bundled-skills/app-builder/tools/app-update.ts +20 -0
- package/src/config/bundled-skills/browser/SKILL.md +28 -0
- package/src/config/bundled-skills/browser/TOOLS.json +234 -0
- package/src/config/bundled-skills/browser/tools/browser-click.ts +9 -0
- package/src/config/bundled-skills/browser/tools/browser-close.ts +9 -0
- package/src/config/bundled-skills/browser/tools/browser-extract.ts +9 -0
- package/src/config/bundled-skills/browser/tools/browser-fill-credential.ts +9 -0
- package/src/config/bundled-skills/browser/tools/browser-navigate.ts +9 -0
- package/src/config/bundled-skills/browser/tools/browser-press-key.ts +9 -0
- package/src/config/bundled-skills/browser/tools/browser-screenshot.ts +9 -0
- package/src/config/bundled-skills/browser/tools/browser-snapshot.ts +9 -0
- package/src/config/bundled-skills/browser/tools/browser-type.ts +9 -0
- package/src/config/bundled-skills/browser/tools/browser-wait-for.ts +9 -0
- package/src/config/bundled-skills/claude-code/SKILL.md +50 -0
- package/src/config/bundled-skills/claude-code/TOOLS.json +40 -0
- package/src/config/bundled-skills/claude-code/tools/claude-code.ts +9 -0
- package/src/config/bundled-skills/computer-use/SKILL.md +17 -0
- package/src/config/bundled-skills/computer-use/TOOLS.json +326 -0
- package/src/config/bundled-skills/computer-use/tools/computer-use-click.ts +9 -0
- package/src/config/bundled-skills/computer-use/tools/computer-use-done.ts +9 -0
- package/src/config/bundled-skills/computer-use/tools/computer-use-double-click.ts +9 -0
- package/src/config/bundled-skills/computer-use/tools/computer-use-drag.ts +9 -0
- package/src/config/bundled-skills/computer-use/tools/computer-use-key.ts +9 -0
- package/src/config/bundled-skills/computer-use/tools/computer-use-open-app.ts +9 -0
- package/src/config/bundled-skills/computer-use/tools/computer-use-request-control.ts +9 -0
- package/src/config/bundled-skills/computer-use/tools/computer-use-respond.ts +9 -0
- package/src/config/bundled-skills/computer-use/tools/computer-use-right-click.ts +9 -0
- package/src/config/bundled-skills/computer-use/tools/computer-use-run-applescript.ts +9 -0
- package/src/config/bundled-skills/computer-use/tools/computer-use-scroll.ts +9 -0
- package/src/config/bundled-skills/computer-use/tools/computer-use-type-text.ts +9 -0
- package/src/config/bundled-skills/computer-use/tools/computer-use-wait.ts +9 -0
- package/src/config/bundled-skills/contacts/SKILL.md +39 -0
- package/src/config/bundled-skills/contacts/TOOLS.json +122 -0
- package/src/config/bundled-skills/contacts/tools/contact-merge.ts +57 -0
- package/src/config/bundled-skills/contacts/tools/contact-search.ts +60 -0
- package/src/config/bundled-skills/contacts/tools/contact-upsert.ts +66 -0
- package/src/config/bundled-skills/document/SKILL.md +26 -0
- package/src/config/bundled-skills/document/TOOLS.json +53 -0
- package/src/config/bundled-skills/document/tools/document-create.ts +9 -0
- package/src/config/bundled-skills/document/tools/document-update.ts +9 -0
- package/src/config/bundled-skills/doordash/SKILL.md +163 -0
- package/src/config/bundled-skills/followups/SKILL.md +32 -0
- package/src/config/bundled-skills/followups/TOOLS.json +100 -0
- package/src/config/bundled-skills/followups/icon.svg +24 -0
- package/src/config/bundled-skills/followups/tools/followup-create.ts +9 -0
- package/src/config/bundled-skills/followups/tools/followup-list.ts +9 -0
- package/src/config/bundled-skills/followups/tools/followup-resolve.ts +9 -0
- package/src/config/bundled-skills/google-calendar/SKILL.md +51 -0
- package/src/config/bundled-skills/google-calendar/TOOLS.json +108 -0
- package/src/config/bundled-skills/google-calendar/calendar-client.ts +165 -0
- package/src/config/bundled-skills/google-calendar/tools/calendar-check-availability.ts +21 -0
- package/src/config/bundled-skills/google-calendar/tools/calendar-create-event.ts +42 -0
- package/src/config/bundled-skills/google-calendar/tools/calendar-get-event.ts +13 -0
- package/src/config/bundled-skills/google-calendar/tools/calendar-list-events.ts +30 -0
- package/src/config/bundled-skills/google-calendar/tools/calendar-rsvp.ts +41 -0
- package/src/config/bundled-skills/google-calendar/tools/shared.ts +18 -0
- package/src/config/bundled-skills/google-calendar/types.ts +97 -0
- package/src/config/bundled-skills/image-studio/SKILL.md +32 -0
- package/src/config/bundled-skills/image-studio/TOOLS.json +42 -0
- package/src/config/bundled-skills/image-studio/tools/media-generate-image.ts +115 -0
- package/src/config/bundled-skills/macos-automation/SKILL.md +66 -0
- package/src/config/bundled-skills/messaging/SKILL.md +153 -0
- package/src/config/bundled-skills/messaging/TOOLS.json +357 -0
- package/src/config/bundled-skills/messaging/tools/gmail-archive.ts +23 -0
- package/src/config/bundled-skills/messaging/tools/gmail-batch-archive.ts +23 -0
- package/src/config/bundled-skills/messaging/tools/gmail-batch-label.ts +25 -0
- package/src/config/bundled-skills/messaging/tools/gmail-draft.ts +26 -0
- package/src/config/bundled-skills/messaging/tools/gmail-label.ts +25 -0
- package/src/config/bundled-skills/messaging/tools/gmail-trash.ts +23 -0
- package/src/config/bundled-skills/messaging/tools/gmail-unsubscribe.ts +84 -0
- package/src/config/bundled-skills/messaging/tools/messaging-analyze-activity.ts +18 -0
- package/src/config/bundled-skills/messaging/tools/messaging-analyze-style.ts +125 -0
- package/src/config/bundled-skills/messaging/tools/messaging-auth-test.ts +16 -0
- package/src/config/bundled-skills/messaging/tools/messaging-draft.ts +49 -0
- package/src/config/bundled-skills/messaging/tools/messaging-list-conversations.ts +21 -0
- package/src/config/bundled-skills/messaging/tools/messaging-mark-read.ts +25 -0
- package/src/config/bundled-skills/messaging/tools/messaging-read.ts +28 -0
- package/src/config/bundled-skills/messaging/tools/messaging-reply.ts +32 -0
- package/src/config/bundled-skills/messaging/tools/messaging-search.ts +22 -0
- package/src/config/bundled-skills/messaging/tools/messaging-send.ts +31 -0
- package/src/config/bundled-skills/messaging/tools/shared.ts +76 -0
- package/src/config/bundled-skills/messaging/tools/slack-add-reaction.ts +25 -0
- package/src/config/bundled-skills/messaging/tools/slack-leave-channel.ts +23 -0
- package/src/config/bundled-skills/phone-calls/SKILL.md +533 -0
- package/src/config/bundled-skills/playbooks/SKILL.md +31 -0
- package/src/config/bundled-skills/playbooks/TOOLS.json +126 -0
- package/src/config/bundled-skills/playbooks/tools/playbook-create.ts +98 -0
- package/src/config/bundled-skills/playbooks/tools/playbook-delete.ts +54 -0
- package/src/config/bundled-skills/playbooks/tools/playbook-list.ts +76 -0
- package/src/config/bundled-skills/playbooks/tools/playbook-update.ts +113 -0
- package/src/config/bundled-skills/public-ingress/SKILL.md +200 -0
- package/src/config/bundled-skills/reminder/SKILL.md +20 -0
- package/src/config/bundled-skills/reminder/TOOLS.json +67 -0
- package/src/config/bundled-skills/reminder/tools/reminder-cancel.ts +9 -0
- package/src/config/bundled-skills/reminder/tools/reminder-create.ts +9 -0
- package/src/config/bundled-skills/reminder/tools/reminder-list.ts +9 -0
- package/src/config/bundled-skills/schedule/SKILL.md +74 -0
- package/src/config/bundled-skills/schedule/TOOLS.json +135 -0
- package/src/config/bundled-skills/schedule/tools/schedule-create.ts +9 -0
- package/src/config/bundled-skills/schedule/tools/schedule-delete.ts +9 -0
- package/src/config/bundled-skills/schedule/tools/schedule-list.ts +9 -0
- package/src/config/bundled-skills/schedule/tools/schedule-update.ts +9 -0
- package/src/config/bundled-skills/self-upgrade/SKILL.md +68 -0
- package/src/config/bundled-skills/start-the-day/SKILL.md +70 -0
- package/src/config/bundled-skills/start-the-day/icon.svg +13 -0
- package/src/config/bundled-skills/subagent/SKILL.md +25 -0
- package/src/config/bundled-skills/subagent/TOOLS.json +107 -0
- package/src/config/bundled-skills/subagent/tools/subagent-abort.ts +9 -0
- package/src/config/bundled-skills/subagent/tools/subagent-message.ts +9 -0
- package/src/config/bundled-skills/subagent/tools/subagent-read.ts +9 -0
- package/src/config/bundled-skills/subagent/tools/subagent-spawn.ts +9 -0
- package/src/config/bundled-skills/subagent/tools/subagent-status.ts +9 -0
- package/src/config/bundled-skills/tasks/SKILL.md +28 -0
- package/src/config/bundled-skills/tasks/TOOLS.json +281 -0
- package/src/config/bundled-skills/tasks/tools/task-delete.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-list-add.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-list-remove.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-list-show.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-list-update.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-list.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-queue-run.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-run.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-save.ts +9 -0
- package/src/config/bundled-skills/transcribe/SKILL.md +25 -0
- package/src/config/bundled-skills/transcribe/TOOLS.json +32 -0
- package/src/config/bundled-skills/transcribe/tools/transcribe-media.ts +370 -0
- package/src/config/bundled-skills/twitter/SKILL.md +220 -0
- package/src/config/bundled-skills/watcher/SKILL.md +27 -0
- package/src/config/bundled-skills/watcher/TOOLS.json +147 -0
- package/src/config/bundled-skills/watcher/tools/watcher-create.ts +9 -0
- package/src/config/bundled-skills/watcher/tools/watcher-delete.ts +9 -0
- package/src/config/bundled-skills/watcher/tools/watcher-digest.ts +9 -0
- package/src/config/bundled-skills/watcher/tools/watcher-list.ts +9 -0
- package/src/config/bundled-skills/watcher/tools/watcher-update.ts +9 -0
- package/src/config/bundled-skills/weather/SKILL.md +37 -0
- package/src/config/bundled-skills/weather/TOOLS.json +32 -0
- package/src/config/bundled-skills/weather/icon.svg +24 -0
- package/src/config/bundled-skills/weather/tools/get-weather.ts +9 -0
- package/src/config/computer-use-prompt.ts +97 -0
- package/src/config/defaults.ts +263 -0
- package/src/config/loader.ts +339 -0
- package/src/config/schema.ts +1436 -0
- package/src/config/skill-state.ts +95 -0
- package/src/config/skills.ts +972 -0
- package/src/config/system-prompt.ts +675 -0
- package/src/config/templates/BOOTSTRAP.md +70 -0
- package/src/config/templates/IDENTITY.md +25 -0
- package/src/config/templates/LOOKS.md +25 -0
- package/src/config/templates/SOUL.md +37 -0
- package/src/config/templates/USER.md +19 -0
- package/src/config/types.ts +42 -0
- package/src/config/vellum-skills/chatgpt-import/SKILL.md +24 -0
- package/src/config/vellum-skills/chatgpt-import/TOOLS.json +23 -0
- package/src/config/vellum-skills/chatgpt-import/tools/chatgpt-import.ts +284 -0
- package/src/config/vellum-skills/deploy-fullstack-vercel/SKILL.md +179 -0
- package/src/config/vellum-skills/document-writer/SKILL.md +195 -0
- package/src/config/vellum-skills/google-oauth-setup/SKILL.md +199 -0
- package/src/config/vellum-skills/slack-oauth-setup/SKILL.md +153 -0
- package/src/config/vellum-skills/telegram-setup/SKILL.md +143 -0
- package/src/config/vellum-skills/twilio-setup/SKILL.md +213 -0
- package/src/contacts/contact-store.ts +410 -0
- package/src/contacts/index.ts +11 -0
- package/src/contacts/types.ts +28 -0
- package/src/context/token-estimator.ts +108 -0
- package/src/context/tool-result-truncation.ts +128 -0
- package/src/context/window-manager.ts +531 -0
- package/src/daemon/assistant-attachments.ts +691 -0
- package/src/daemon/classifier.ts +110 -0
- package/src/daemon/computer-use-session.ts +903 -0
- package/src/daemon/connection-policy.ts +41 -0
- package/src/daemon/date-context.ts +136 -0
- package/src/daemon/handlers/apps.ts +530 -0
- package/src/daemon/handlers/browser.ts +54 -0
- package/src/daemon/handlers/computer-use.ts +187 -0
- package/src/daemon/handlers/config.ts +1517 -0
- package/src/daemon/handlers/diagnostics.ts +338 -0
- package/src/daemon/handlers/documents.ts +173 -0
- package/src/daemon/handlers/home-base.ts +78 -0
- package/src/daemon/handlers/identity.ts +127 -0
- package/src/daemon/handlers/index.ts +129 -0
- package/src/daemon/handlers/misc.ts +331 -0
- package/src/daemon/handlers/open-bundle-handler.ts +80 -0
- package/src/daemon/handlers/publish.ts +187 -0
- package/src/daemon/handlers/sessions.ts +555 -0
- package/src/daemon/handlers/shared.ts +570 -0
- package/src/daemon/handlers/signing.ts +37 -0
- package/src/daemon/handlers/skills.ts +486 -0
- package/src/daemon/handlers/subagents.ts +210 -0
- package/src/daemon/handlers/twitter-auth.ts +198 -0
- package/src/daemon/handlers/work-items.ts +632 -0
- package/src/daemon/handlers/workspace-files.ts +75 -0
- package/src/daemon/handlers.ts +17 -0
- package/src/daemon/history-repair.ts +214 -0
- package/src/daemon/ipc-blob-store.ts +231 -0
- package/src/daemon/ipc-contract-inventory.json +495 -0
- package/src/daemon/ipc-contract-inventory.ts +126 -0
- package/src/daemon/ipc-contract.ts +2551 -0
- package/src/daemon/ipc-protocol.ts +75 -0
- package/src/daemon/ipc-validate.ts +188 -0
- package/src/daemon/lifecycle.ts +582 -0
- package/src/daemon/main.ts +21 -0
- package/src/daemon/media-visibility-policy.ts +57 -0
- package/src/daemon/ride-shotgun-handler.ts +309 -0
- package/src/daemon/server.ts +1215 -0
- package/src/daemon/session-agent-loop.ts +922 -0
- package/src/daemon/session-attachments.ts +196 -0
- package/src/daemon/session-conflict-gate.ts +184 -0
- package/src/daemon/session-dynamic-profile.ts +63 -0
- package/src/daemon/session-error.ts +290 -0
- package/src/daemon/session-evictor.ts +196 -0
- package/src/daemon/session-history.ts +437 -0
- package/src/daemon/session-lifecycle.ts +147 -0
- package/src/daemon/session-media-retry.ts +147 -0
- package/src/daemon/session-memory.ts +212 -0
- package/src/daemon/session-messaging.ts +145 -0
- package/src/daemon/session-notifiers.ts +193 -0
- package/src/daemon/session-process.ts +323 -0
- package/src/daemon/session-queue-manager.ts +82 -0
- package/src/daemon/session-runtime-assembly.ts +447 -0
- package/src/daemon/session-skill-tools.ts +356 -0
- package/src/daemon/session-slash.ts +305 -0
- package/src/daemon/session-surfaces.ts +702 -0
- package/src/daemon/session-tool-setup.ts +523 -0
- package/src/daemon/session-usage.ts +72 -0
- package/src/daemon/session-workspace.ts +19 -0
- package/src/daemon/session.ts +400 -0
- package/src/daemon/tls-certs.ts +189 -0
- package/src/daemon/trace-emitter.ts +82 -0
- package/src/daemon/video-thumbnail.ts +62 -0
- package/src/daemon/watch-handler.ts +274 -0
- package/src/doordash/client.ts +999 -0
- package/src/doordash/queries.ts +1311 -0
- package/src/doordash/query-extractor.ts +93 -0
- package/src/doordash/session.ts +82 -0
- package/src/email/provider.ts +117 -0
- package/src/email/providers/agentmail.ts +317 -0
- package/src/email/providers/index.ts +58 -0
- package/src/email/service.ts +303 -0
- package/src/email/types.ts +126 -0
- package/src/events/bus.ts +157 -0
- package/src/events/domain-events.ts +83 -0
- package/src/events/index.ts +18 -0
- package/src/events/tool-audit-listener.ts +80 -0
- package/src/events/tool-domain-event-publisher.ts +111 -0
- package/src/events/tool-metrics-listener.ts +159 -0
- package/src/events/tool-notification-listener.ts +17 -0
- package/src/events/tool-profiling-listener.ts +158 -0
- package/src/events/tool-trace-listener.ts +75 -0
- package/src/export/formatter.ts +98 -0
- package/src/followups/followup-store.ts +168 -0
- package/src/followups/index.ts +10 -0
- package/src/followups/types.ts +29 -0
- package/src/gallery/default-gallery.ts +795 -0
- package/src/gallery/gallery-manifest.ts +24 -0
- package/src/home-base/app-link-store.ts +82 -0
- package/src/home-base/bootstrap.ts +68 -0
- package/src/home-base/prebuilt/index.html +662 -0
- package/src/home-base/prebuilt/seed-metadata.json +21 -0
- package/src/home-base/prebuilt/seed.ts +112 -0
- package/src/home-base/prebuilt-home-base-updater.ts +30 -0
- package/src/hooks/cli.ts +163 -0
- package/src/hooks/config.ts +88 -0
- package/src/hooks/discovery.ts +110 -0
- package/src/hooks/manager.ts +124 -0
- package/src/hooks/runner.ts +123 -0
- package/src/hooks/templates.ts +52 -0
- package/src/hooks/types.ts +72 -0
- package/src/inbound/public-ingress-urls.ts +123 -0
- package/src/index.ts +81 -0
- package/src/instrument.ts +60 -0
- package/src/logfire.ts +99 -0
- package/src/media/gemini-image-service.ts +136 -0
- package/src/memory/account-store.ts +108 -0
- package/src/memory/admin.ts +211 -0
- package/src/memory/app-git-service.ts +295 -0
- package/src/memory/app-store.ts +577 -0
- package/src/memory/attachments-store.ts +397 -0
- package/src/memory/channel-delivery-store.ts +353 -0
- package/src/memory/channel-guardian-store.ts +669 -0
- package/src/memory/checkpoints.ts +52 -0
- package/src/memory/clarification-resolver.ts +298 -0
- package/src/memory/conflict-intent.ts +157 -0
- package/src/memory/conflict-policy.ts +73 -0
- package/src/memory/conflict-store.ts +350 -0
- package/src/memory/contradiction-checker.ts +358 -0
- package/src/memory/conversation-key-store.ts +122 -0
- package/src/memory/conversation-store.ts +470 -0
- package/src/memory/db.ts +1991 -0
- package/src/memory/embedding-backend.ts +229 -0
- package/src/memory/embedding-gemini.ts +52 -0
- package/src/memory/embedding-local.ts +65 -0
- package/src/memory/embedding-ollama.ts +55 -0
- package/src/memory/embedding-openai.ts +25 -0
- package/src/memory/entity-extractor.ts +474 -0
- package/src/memory/external-conversation-store.ts +234 -0
- package/src/memory/fingerprint.ts +20 -0
- package/src/memory/indexer.ts +156 -0
- package/src/memory/items-extractor.ts +461 -0
- package/src/memory/job-handlers/backfill.ts +139 -0
- package/src/memory/job-handlers/cleanup.ts +58 -0
- package/src/memory/job-handlers/conflict.ts +141 -0
- package/src/memory/job-handlers/embedding.ts +61 -0
- package/src/memory/job-handlers/extraction.ts +123 -0
- package/src/memory/job-handlers/index-maintenance.ts +54 -0
- package/src/memory/job-handlers/summarization.ts +286 -0
- package/src/memory/job-utils.ts +170 -0
- package/src/memory/jobs-store.ts +401 -0
- package/src/memory/jobs-worker.ts +313 -0
- package/src/memory/llm-request-log-store.ts +45 -0
- package/src/memory/llm-usage-store.ts +60 -0
- package/src/memory/message-content.ts +54 -0
- package/src/memory/profile-compiler.ts +160 -0
- package/src/memory/published-pages-store.ts +137 -0
- package/src/memory/qdrant-client.ts +366 -0
- package/src/memory/qdrant-manager.ts +242 -0
- package/src/memory/query-builder.ts +45 -0
- package/src/memory/retrieval-budget.ts +30 -0
- package/src/memory/retriever.ts +653 -0
- package/src/memory/runs-store.ts +305 -0
- package/src/memory/schema.ts +677 -0
- package/src/memory/search/entity.ts +298 -0
- package/src/memory/search/formatting.ts +207 -0
- package/src/memory/search/lexical.ts +227 -0
- package/src/memory/search/ranking.ts +401 -0
- package/src/memory/search/semantic.ts +121 -0
- package/src/memory/search/types.ts +137 -0
- package/src/memory/segmenter.ts +68 -0
- package/src/memory/shared-app-links-store.ts +138 -0
- package/src/memory/tool-usage-store.ts +62 -0
- package/src/messaging/activity-analyzer.ts +76 -0
- package/src/messaging/draft-store.ts +88 -0
- package/src/messaging/index.ts +3 -0
- package/src/messaging/provider-types.ts +80 -0
- package/src/messaging/provider.ts +52 -0
- package/src/messaging/providers/gmail/adapter.ts +193 -0
- package/src/messaging/providers/gmail/client.ts +204 -0
- package/src/messaging/providers/gmail/types.ts +90 -0
- package/src/messaging/providers/slack/adapter.ts +202 -0
- package/src/messaging/providers/slack/client.ts +198 -0
- package/src/messaging/providers/slack/types.ts +119 -0
- package/src/messaging/providers/telegram-bot/adapter.ts +162 -0
- package/src/messaging/providers/telegram-bot/client.ts +104 -0
- package/src/messaging/providers/telegram-bot/types.ts +15 -0
- package/src/messaging/registry.ts +35 -0
- package/src/messaging/style-analyzer.ts +159 -0
- package/src/messaging/thread-summarizer.ts +306 -0
- package/src/messaging/triage-engine.ts +323 -0
- package/src/messaging/types.ts +55 -0
- package/src/permissions/checker.ts +640 -0
- package/src/permissions/defaults.ts +254 -0
- package/src/permissions/prompter.ts +98 -0
- package/src/permissions/secret-prompter.ts +114 -0
- package/src/permissions/shell-identity.ts +227 -0
- package/src/permissions/trust-store.ts +607 -0
- package/src/permissions/types.ts +43 -0
- package/src/permissions/workspace-policy.ts +114 -0
- package/src/playbooks/index.ts +2 -0
- package/src/playbooks/playbook-compiler.ts +90 -0
- package/src/playbooks/types.ts +55 -0
- package/src/providers/anthropic/client.ts +751 -0
- package/src/providers/failover.ts +129 -0
- package/src/providers/fireworks/client.ts +20 -0
- package/src/providers/gemini/client.ts +285 -0
- package/src/providers/ollama/client.ts +30 -0
- package/src/providers/openai/client.ts +337 -0
- package/src/providers/openrouter/client.ts +20 -0
- package/src/providers/ratelimit.ts +93 -0
- package/src/providers/registry.ts +146 -0
- package/src/providers/retry.ts +81 -0
- package/src/providers/stream-timeout.ts +38 -0
- package/src/providers/types.ts +109 -0
- package/src/runtime/assistant-event-hub.ts +157 -0
- package/src/runtime/assistant-event.ts +82 -0
- package/src/runtime/channel-approval-parser.ts +60 -0
- package/src/runtime/channel-approval-types.ts +73 -0
- package/src/runtime/channel-approvals.ts +206 -0
- package/src/runtime/channel-guardian-service.ts +212 -0
- package/src/runtime/gateway-client.ts +58 -0
- package/src/runtime/http-server.ts +1076 -0
- package/src/runtime/http-types.ts +66 -0
- package/src/runtime/routes/app-routes.ts +174 -0
- package/src/runtime/routes/attachment-routes.ts +133 -0
- package/src/runtime/routes/call-routes.ts +190 -0
- package/src/runtime/routes/channel-routes.ts +1404 -0
- package/src/runtime/routes/conversation-routes.ts +352 -0
- package/src/runtime/routes/events-routes.ts +148 -0
- package/src/runtime/routes/run-routes.ts +257 -0
- package/src/runtime/routes/secret-routes.ts +76 -0
- package/src/runtime/run-orchestrator.ts +330 -0
- package/src/schedule/recurrence-engine.ts +162 -0
- package/src/schedule/recurrence-types.ts +67 -0
- package/src/schedule/schedule-store.ts +506 -0
- package/src/schedule/scheduler.ts +171 -0
- package/src/security/encrypted-store.ts +238 -0
- package/src/security/keychain.ts +252 -0
- package/src/security/oauth-callback-registry.ts +66 -0
- package/src/security/oauth2.ts +274 -0
- package/src/security/redaction.ts +89 -0
- package/src/security/secret-allowlist.ts +164 -0
- package/src/security/secret-ingress.ts +57 -0
- package/src/security/secret-scanner.ts +550 -0
- package/src/security/secure-keys.ts +180 -0
- package/src/security/token-manager.ts +141 -0
- package/src/services/published-app-updater.ts +69 -0
- package/src/services/vercel-deploy.ts +73 -0
- package/src/skills/active-skill-tools.ts +81 -0
- package/src/skills/clawhub.ts +414 -0
- package/src/skills/include-graph.ts +146 -0
- package/src/skills/managed-store.ts +233 -0
- package/src/skills/path-classifier.ts +128 -0
- package/src/skills/slash-commands.ts +174 -0
- package/src/skills/tool-manifest.ts +165 -0
- package/src/skills/version-hash.ts +110 -0
- package/src/slack/slack-webhook.ts +61 -0
- package/src/subagent/index.ts +19 -0
- package/src/subagent/manager.ts +511 -0
- package/src/subagent/types.ts +69 -0
- package/src/swarm/backend-claude-code.ts +145 -0
- package/src/swarm/index.ts +44 -0
- package/src/swarm/limits.ts +37 -0
- package/src/swarm/orchestrator.ts +279 -0
- package/src/swarm/plan-validator.ts +151 -0
- package/src/swarm/router-planner.ts +100 -0
- package/src/swarm/router-prompts.ts +36 -0
- package/src/swarm/synthesizer.ts +62 -0
- package/src/swarm/types.ts +62 -0
- package/src/swarm/worker-backend.ts +121 -0
- package/src/swarm/worker-prompts.ts +79 -0
- package/src/swarm/worker-runner.ts +164 -0
- package/src/tasks/SPEC.md +139 -0
- package/src/tasks/candidate-store.ts +86 -0
- package/src/tasks/ephemeral-permissions.ts +48 -0
- package/src/tasks/task-compiler.ts +199 -0
- package/src/tasks/task-runner.ts +90 -0
- package/src/tasks/task-scheduler.ts +21 -0
- package/src/tasks/task-store.ts +127 -0
- package/src/tasks/tool-sanitizer.ts +36 -0
- package/src/tools/apps/definitions.ts +59 -0
- package/src/tools/apps/executors.ts +313 -0
- package/src/tools/apps/open-proxy.ts +43 -0
- package/src/tools/apps/registry.ts +16 -0
- package/src/tools/assets/materialize.ts +218 -0
- package/src/tools/assets/search.ts +361 -0
- package/src/tools/browser/__tests__/auth-cache.test.ts +219 -0
- package/src/tools/browser/__tests__/auth-detector.test.ts +362 -0
- package/src/tools/browser/__tests__/jit-auth.test.ts +189 -0
- package/src/tools/browser/api-map.ts +293 -0
- package/src/tools/browser/auth-cache.ts +149 -0
- package/src/tools/browser/auth-detector.ts +347 -0
- package/src/tools/browser/auto-navigate.ts +270 -0
- package/src/tools/browser/browser-execution.ts +980 -0
- package/src/tools/browser/browser-handoff.ts +79 -0
- package/src/tools/browser/browser-manager.ts +715 -0
- package/src/tools/browser/browser-screencast.ts +217 -0
- package/src/tools/browser/headless-browser.ts +450 -0
- package/src/tools/browser/jit-auth.ts +51 -0
- package/src/tools/browser/network-recorder.ts +349 -0
- package/src/tools/browser/network-recording-types.ts +49 -0
- package/src/tools/browser/recording-store.ts +49 -0
- package/src/tools/browser/runtime-check.ts +43 -0
- package/src/tools/browser/x-auto-navigate.ts +207 -0
- package/src/tools/calls/call-end.ts +67 -0
- package/src/tools/calls/call-start.ts +81 -0
- package/src/tools/calls/call-status.ts +81 -0
- package/src/tools/claude-code/claude-code.ts +428 -0
- package/src/tools/computer-use/definitions.ts +443 -0
- package/src/tools/computer-use/registry.ts +22 -0
- package/src/tools/computer-use/request-computer-control.ts +53 -0
- package/src/tools/computer-use/skill-proxy-bridge.ts +28 -0
- package/src/tools/credentials/account-registry.ts +127 -0
- package/src/tools/credentials/broker-types.ts +107 -0
- package/src/tools/credentials/broker.ts +372 -0
- package/src/tools/credentials/domain-policy.ts +51 -0
- package/src/tools/credentials/host-pattern-match.ts +60 -0
- package/src/tools/credentials/metadata-store.ts +335 -0
- package/src/tools/credentials/policy-types.ts +52 -0
- package/src/tools/credentials/policy-validate.ts +80 -0
- package/src/tools/credentials/resolve.ts +122 -0
- package/src/tools/credentials/selection.ts +159 -0
- package/src/tools/credentials/tool-policy.ts +25 -0
- package/src/tools/credentials/vault.ts +657 -0
- package/src/tools/document/document-tool.ts +92 -0
- package/src/tools/document/editor-template.ts +237 -0
- package/src/tools/execution-target.ts +21 -0
- package/src/tools/execution-timeout.ts +49 -0
- package/src/tools/executor.ts +815 -0
- package/src/tools/filesystem/edit.ts +127 -0
- package/src/tools/filesystem/fuzzy-match.ts +202 -0
- package/src/tools/filesystem/read.ts +71 -0
- package/src/tools/filesystem/view-image.ts +199 -0
- package/src/tools/filesystem/write.ts +79 -0
- package/src/tools/followups/followup_create.ts +76 -0
- package/src/tools/followups/followup_list.ts +60 -0
- package/src/tools/followups/followup_resolve.ts +56 -0
- package/src/tools/host-filesystem/edit.ts +125 -0
- package/src/tools/host-filesystem/read.ts +80 -0
- package/src/tools/host-filesystem/write.ts +76 -0
- package/src/tools/host-terminal/cli-discover.ts +180 -0
- package/src/tools/host-terminal/host-shell.ts +191 -0
- package/src/tools/memory/definitions.ts +69 -0
- package/src/tools/memory/handlers.ts +246 -0
- package/src/tools/memory/register.ts +66 -0
- package/src/tools/network/__tests__/web-search.test.ts +427 -0
- package/src/tools/network/domain-normalize.ts +85 -0
- package/src/tools/network/script-proxy/__tests__/logging.test.ts +248 -0
- package/src/tools/network/script-proxy/__tests__/policy.test.ts +234 -0
- package/src/tools/network/script-proxy/__tests__/router.test.ts +76 -0
- package/src/tools/network/script-proxy/certs.ts +237 -0
- package/src/tools/network/script-proxy/connect-tunnel.ts +82 -0
- package/src/tools/network/script-proxy/http-forwarder.ts +151 -0
- package/src/tools/network/script-proxy/index.ts +28 -0
- package/src/tools/network/script-proxy/logging.ts +196 -0
- package/src/tools/network/script-proxy/mitm-handler.ts +269 -0
- package/src/tools/network/script-proxy/policy.ts +152 -0
- package/src/tools/network/script-proxy/router.ts +60 -0
- package/src/tools/network/script-proxy/server.ts +136 -0
- package/src/tools/network/script-proxy/session-manager.ts +534 -0
- package/src/tools/network/script-proxy/types.ts +125 -0
- package/src/tools/network/url-safety.ts +227 -0
- package/src/tools/network/web-fetch.ts +713 -0
- package/src/tools/network/web-search.ts +296 -0
- package/src/tools/policy-context.ts +29 -0
- package/src/tools/registry.ts +295 -0
- package/src/tools/reminder/reminder-store.ts +148 -0
- package/src/tools/reminder/reminder.ts +80 -0
- package/src/tools/schedule/create.ts +81 -0
- package/src/tools/schedule/delete.ts +28 -0
- package/src/tools/schedule/list.ts +69 -0
- package/src/tools/schedule/update.ts +97 -0
- package/src/tools/shared/filesystem/edit-engine.ts +56 -0
- package/src/tools/shared/filesystem/errors.ts +85 -0
- package/src/tools/shared/filesystem/file-ops-service.ts +215 -0
- package/src/tools/shared/filesystem/format-diff.ts +35 -0
- package/src/tools/shared/filesystem/path-policy.ts +125 -0
- package/src/tools/shared/filesystem/size-guard.ts +41 -0
- package/src/tools/shared/filesystem/types.ts +80 -0
- package/src/tools/shared/shell-output.ts +52 -0
- package/src/tools/skills/delete-managed.ts +60 -0
- package/src/tools/skills/load.ts +139 -0
- package/src/tools/skills/sandbox-runner.ts +279 -0
- package/src/tools/skills/scaffold-managed.ts +150 -0
- package/src/tools/skills/script-contract.ts +6 -0
- package/src/tools/skills/skill-script-runner.ts +86 -0
- package/src/tools/skills/skill-tool-factory.ts +64 -0
- package/src/tools/skills/vellum-catalog.ts +217 -0
- package/src/tools/subagent/abort.ts +33 -0
- package/src/tools/subagent/message.ts +39 -0
- package/src/tools/subagent/read.ts +67 -0
- package/src/tools/subagent/spawn.ts +46 -0
- package/src/tools/subagent/status.ts +45 -0
- package/src/tools/swarm/delegate.ts +183 -0
- package/src/tools/system/request-permission.ts +98 -0
- package/src/tools/system/version.ts +43 -0
- package/src/tools/tasks/index.ts +27 -0
- package/src/tools/tasks/task-delete.ts +82 -0
- package/src/tools/tasks/task-list.ts +44 -0
- package/src/tools/tasks/task-run.ts +97 -0
- package/src/tools/tasks/task-save.ts +47 -0
- package/src/tools/tasks/work-item-enqueue.ts +234 -0
- package/src/tools/tasks/work-item-list.ts +55 -0
- package/src/tools/tasks/work-item-remove.ts +60 -0
- package/src/tools/tasks/work-item-run.ts +78 -0
- package/src/tools/tasks/work-item-update.ts +114 -0
- package/src/tools/terminal/backends/docker.ts +372 -0
- package/src/tools/terminal/backends/native.ts +190 -0
- package/src/tools/terminal/backends/types.ts +26 -0
- package/src/tools/terminal/evaluate-typescript.ts +275 -0
- package/src/tools/terminal/parser.ts +413 -0
- package/src/tools/terminal/safe-env.ts +37 -0
- package/src/tools/terminal/sandbox-diagnostics.ts +149 -0
- package/src/tools/terminal/sandbox.ts +44 -0
- package/src/tools/terminal/shell.ts +257 -0
- package/src/tools/tool-manifest.ts +198 -0
- package/src/tools/types.ts +176 -0
- package/src/tools/ui-surface/definitions.ts +244 -0
- package/src/tools/ui-surface/registry.ts +14 -0
- package/src/tools/watch/screen-watch.ts +130 -0
- package/src/tools/watch/watch-state.ts +119 -0
- package/src/tools/watcher/create.ts +64 -0
- package/src/tools/watcher/delete.ts +27 -0
- package/src/tools/watcher/digest.ts +50 -0
- package/src/tools/watcher/list.ts +60 -0
- package/src/tools/watcher/update.ts +56 -0
- package/src/tools/weather/service.ts +551 -0
- package/src/twitter/client.ts +690 -0
- package/src/twitter/oauth-client.ts +102 -0
- package/src/twitter/router.ts +101 -0
- package/src/twitter/session.ts +91 -0
- package/src/usage/actors.ts +24 -0
- package/src/usage/types.ts +37 -0
- package/src/util/clipboard.ts +33 -0
- package/src/util/content-id.ts +16 -0
- package/src/util/debounce.ts +88 -0
- package/src/util/diff.ts +181 -0
- package/src/util/errors.ts +129 -0
- package/src/util/logger.ts +243 -0
- package/src/util/network-info.ts +47 -0
- package/src/util/platform.ts +632 -0
- package/src/util/pricing.ts +150 -0
- package/src/util/promise-guard.ts +37 -0
- package/src/util/retry.ts +98 -0
- package/src/util/spinner.ts +51 -0
- package/src/util/time.ts +16 -0
- package/src/util/truncate.ts +6 -0
- package/src/util/xml.ts +4 -0
- package/src/version.ts +3 -0
- package/src/watcher/constants.ts +11 -0
- package/src/watcher/engine.ts +199 -0
- package/src/watcher/provider-registry.ts +15 -0
- package/src/watcher/provider-types.ts +48 -0
- package/src/watcher/providers/gmail.ts +198 -0
- package/src/watcher/providers/google-calendar.ts +228 -0
- package/src/watcher/providers/slack.ts +129 -0
- package/src/watcher/watcher-store.ts +419 -0
- package/src/work-items/work-item-runner.ts +171 -0
- package/src/work-items/work-item-store.ts +325 -0
- package/src/workspace/commit-message-enrichment-service.ts +284 -0
- package/src/workspace/commit-message-provider.ts +95 -0
- package/src/workspace/git-service.ts +857 -0
- package/src/workspace/heartbeat-service.ts +345 -0
- package/src/workspace/provider-commit-message-generator.ts +285 -0
- package/src/workspace/top-level-renderer.ts +19 -0
- package/src/workspace/top-level-scanner.ts +41 -0
- package/src/workspace/turn-commit.ts +175 -0
- package/tsconfig.json +21 -0
|
@@ -0,0 +1,2431 @@
|
|
|
1
|
+
import { describe, test, expect, beforeEach, afterAll, mock } from 'bun:test';
|
|
2
|
+
import * as realFs from 'node:fs';
|
|
3
|
+
import type { Message, ToolDefinition, ToolUseContent, ToolResultContent } from '../providers/types.js';
|
|
4
|
+
import type { SkillSummary, SkillToolManifest } from '../config/skills.js';
|
|
5
|
+
import type { Tool } from '../tools/types.js';
|
|
6
|
+
import { RiskLevel } from '../permissions/types.js';
|
|
7
|
+
import {
|
|
8
|
+
BROWSER_TOOL_NAMES,
|
|
9
|
+
buildSkillLoadHistory,
|
|
10
|
+
assertBrowserToolsPresent,
|
|
11
|
+
assertBrowserToolsAbsent,
|
|
12
|
+
} from './test-support/browser-skill-harness.js';
|
|
13
|
+
|
|
14
|
+
// ---------------------------------------------------------------------------
|
|
15
|
+
// Mock state — controlled by tests
|
|
16
|
+
// ---------------------------------------------------------------------------
|
|
17
|
+
|
|
18
|
+
let mockCatalog: SkillSummary[] = [];
|
|
19
|
+
let mockManifests: Record<string, SkillToolManifest | null> = {};
|
|
20
|
+
let mockRegisteredTools: Map<string, Tool[]> = new Map();
|
|
21
|
+
let mockUnregisteredSkillIds: string[] = [];
|
|
22
|
+
let mockSkillRefCount: Map<string, number> = new Map();
|
|
23
|
+
/** Per-skill version hash overrides. When set, computeSkillVersionHash returns this value. */
|
|
24
|
+
let mockVersionHashes: Record<string, string> = {};
|
|
25
|
+
/** Skill IDs for which computeSkillVersionHash should throw (simulates unreadable directories). */
|
|
26
|
+
let mockVersionHashErrors: Set<string> = new Set();
|
|
27
|
+
|
|
28
|
+
// ---------------------------------------------------------------------------
|
|
29
|
+
// Mocks — must be set up before importing the module under test
|
|
30
|
+
// ---------------------------------------------------------------------------
|
|
31
|
+
|
|
32
|
+
mock.module('../config/skills.js', () => ({
|
|
33
|
+
loadSkillCatalog: () => mockCatalog,
|
|
34
|
+
}));
|
|
35
|
+
|
|
36
|
+
mock.module('../skills/active-skill-tools.js', () => {
|
|
37
|
+
// Shared parsing logic for both deriveActiveSkills and deriveActiveSkillIds
|
|
38
|
+
const parseMarkers = (messages: Message[]) => {
|
|
39
|
+
// Two-pass approach matching real implementation:
|
|
40
|
+
// 1. Collect tool_use IDs where name === 'skill_load'
|
|
41
|
+
const skillLoadUseIds = new Set<string>();
|
|
42
|
+
for (const msg of messages) {
|
|
43
|
+
for (const block of msg.content) {
|
|
44
|
+
if (block.type === 'tool_use' && block.name === 'skill_load') {
|
|
45
|
+
skillLoadUseIds.add(block.id);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// 2. Parse markers only from tool_result blocks whose tool_use_id matches
|
|
51
|
+
const re = /<loaded_skill\s+id="([^"]+)"(?:\s+version="([^"]+)")?\s*\/>/g;
|
|
52
|
+
const seen = new Set<string>();
|
|
53
|
+
const entries: Array<{ id: string; version?: string }> = [];
|
|
54
|
+
for (const msg of messages) {
|
|
55
|
+
for (const block of msg.content) {
|
|
56
|
+
if (block.type !== 'tool_result') continue;
|
|
57
|
+
if (!skillLoadUseIds.has(block.tool_use_id)) continue;
|
|
58
|
+
const text = block.content;
|
|
59
|
+
if (!text) continue;
|
|
60
|
+
for (const match of text.matchAll(re)) {
|
|
61
|
+
if (!seen.has(match[1])) {
|
|
62
|
+
seen.add(match[1]);
|
|
63
|
+
const entry: { id: string; version?: string } = { id: match[1] };
|
|
64
|
+
if (match[2]) {
|
|
65
|
+
entry.version = match[2];
|
|
66
|
+
}
|
|
67
|
+
entries.push(entry);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
return entries;
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
return {
|
|
76
|
+
deriveActiveSkills: (messages: Message[]) => parseMarkers(messages),
|
|
77
|
+
deriveActiveSkillIds: (messages: Message[]) =>
|
|
78
|
+
parseMarkers(messages).map((e) => e.id),
|
|
79
|
+
};
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
mock.module('../skills/tool-manifest.js', () => ({
|
|
83
|
+
parseToolManifestFile: (filePath: string) => {
|
|
84
|
+
// Extract skill ID from path: /skills/<id>/TOOLS.json → <id>
|
|
85
|
+
const parts = filePath.split('/');
|
|
86
|
+
const skillId = parts[parts.length - 2];
|
|
87
|
+
const manifest = mockManifests[skillId];
|
|
88
|
+
if (!manifest) {
|
|
89
|
+
throw new Error(`Mock: no manifest for skill "${skillId}"`);
|
|
90
|
+
}
|
|
91
|
+
return manifest;
|
|
92
|
+
},
|
|
93
|
+
}));
|
|
94
|
+
|
|
95
|
+
mock.module('../tools/skills/skill-tool-factory.js', () => ({
|
|
96
|
+
createSkillToolsFromManifest: (
|
|
97
|
+
entries: SkillToolManifest['tools'],
|
|
98
|
+
skillId: string,
|
|
99
|
+
_skillDir: string,
|
|
100
|
+
versionHash: string,
|
|
101
|
+
bundled?: boolean,
|
|
102
|
+
): Tool[] => {
|
|
103
|
+
return entries.map((entry) => ({
|
|
104
|
+
name: entry.name,
|
|
105
|
+
description: entry.description,
|
|
106
|
+
category: entry.category,
|
|
107
|
+
defaultRiskLevel: RiskLevel.Medium,
|
|
108
|
+
origin: 'skill' as const,
|
|
109
|
+
ownerSkillId: skillId,
|
|
110
|
+
ownerSkillVersionHash: versionHash,
|
|
111
|
+
ownerSkillBundled: bundled ?? undefined,
|
|
112
|
+
getDefinition: () => ({
|
|
113
|
+
name: entry.name,
|
|
114
|
+
description: entry.description,
|
|
115
|
+
input_schema: entry.input_schema as object,
|
|
116
|
+
}),
|
|
117
|
+
execute: async () => ({ content: '', isError: false }),
|
|
118
|
+
}));
|
|
119
|
+
},
|
|
120
|
+
}));
|
|
121
|
+
|
|
122
|
+
mock.module('../tools/registry.js', () => ({
|
|
123
|
+
registerSkillTools: (tools: Tool[]) => {
|
|
124
|
+
const skillIds = new Set<string>();
|
|
125
|
+
for (const tool of tools) {
|
|
126
|
+
const skillId = tool.ownerSkillId!;
|
|
127
|
+
skillIds.add(skillId);
|
|
128
|
+
const existing = mockRegisteredTools.get(skillId) ?? [];
|
|
129
|
+
existing.push(tool);
|
|
130
|
+
mockRegisteredTools.set(skillId, existing);
|
|
131
|
+
}
|
|
132
|
+
for (const id of skillIds) {
|
|
133
|
+
mockSkillRefCount.set(id, (mockSkillRefCount.get(id) ?? 0) + 1);
|
|
134
|
+
}
|
|
135
|
+
},
|
|
136
|
+
unregisterSkillTools: (skillId: string) => {
|
|
137
|
+
mockUnregisteredSkillIds.push(skillId);
|
|
138
|
+
const current = mockSkillRefCount.get(skillId) ?? 0;
|
|
139
|
+
if (current > 1) {
|
|
140
|
+
mockSkillRefCount.set(skillId, current - 1);
|
|
141
|
+
return;
|
|
142
|
+
}
|
|
143
|
+
mockSkillRefCount.delete(skillId);
|
|
144
|
+
mockRegisteredTools.delete(skillId);
|
|
145
|
+
},
|
|
146
|
+
getTool: (name: string): Tool | undefined => {
|
|
147
|
+
// Return the last matching tool to match production behavior where
|
|
148
|
+
// re-registering a tool overwrites the previous entry (last wins).
|
|
149
|
+
let found: Tool | undefined;
|
|
150
|
+
for (const tools of mockRegisteredTools.values()) {
|
|
151
|
+
for (const tool of tools) {
|
|
152
|
+
if (tool.name === name) found = tool;
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
return found;
|
|
156
|
+
},
|
|
157
|
+
getSkillToolNames: () => {
|
|
158
|
+
const names: string[] = [];
|
|
159
|
+
for (const tools of mockRegisteredTools.values()) {
|
|
160
|
+
for (const tool of tools) {
|
|
161
|
+
names.push(tool.name);
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
return names;
|
|
165
|
+
},
|
|
166
|
+
}));
|
|
167
|
+
|
|
168
|
+
// Stub existsSync so TOOLS.json existence checks pass for skills that have manifests
|
|
169
|
+
mock.module('node:fs', () => ({
|
|
170
|
+
...realFs,
|
|
171
|
+
existsSync: (p: string) => {
|
|
172
|
+
if (typeof p === 'string' && p.endsWith('TOOLS.json')) {
|
|
173
|
+
const parts = p.split('/');
|
|
174
|
+
const skillId = parts[parts.length - 2];
|
|
175
|
+
return skillId in mockManifests;
|
|
176
|
+
}
|
|
177
|
+
return realFs.existsSync(p);
|
|
178
|
+
},
|
|
179
|
+
}));
|
|
180
|
+
|
|
181
|
+
mock.module('../skills/version-hash.js', () => ({
|
|
182
|
+
computeSkillVersionHash: (skillDir: string) => {
|
|
183
|
+
const parts = skillDir.split('/');
|
|
184
|
+
const skillId = parts[parts.length - 1];
|
|
185
|
+
if (mockVersionHashErrors.has(skillId)) {
|
|
186
|
+
throw new Error(`EACCES: permission denied, scandir '${skillDir}'`);
|
|
187
|
+
}
|
|
188
|
+
if (skillId in mockVersionHashes) {
|
|
189
|
+
return mockVersionHashes[skillId];
|
|
190
|
+
}
|
|
191
|
+
return `v1:default-hash-${skillId}`;
|
|
192
|
+
},
|
|
193
|
+
}));
|
|
194
|
+
|
|
195
|
+
mock.module('../util/logger.js', () => ({
|
|
196
|
+
getLogger: () => ({
|
|
197
|
+
info: () => {},
|
|
198
|
+
warn: () => {},
|
|
199
|
+
debug: () => {},
|
|
200
|
+
error: () => {},
|
|
201
|
+
}),
|
|
202
|
+
}));
|
|
203
|
+
|
|
204
|
+
// ---------------------------------------------------------------------------
|
|
205
|
+
// Import module under test (after mocks)
|
|
206
|
+
// ---------------------------------------------------------------------------
|
|
207
|
+
|
|
208
|
+
const { projectSkillTools, resetSkillToolProjection } = await import(
|
|
209
|
+
'../daemon/session-skill-tools.js'
|
|
210
|
+
);
|
|
211
|
+
|
|
212
|
+
// ---------------------------------------------------------------------------
|
|
213
|
+
// Helpers
|
|
214
|
+
// ---------------------------------------------------------------------------
|
|
215
|
+
|
|
216
|
+
function makeSkill(id: string, dir?: string): SkillSummary {
|
|
217
|
+
return {
|
|
218
|
+
id,
|
|
219
|
+
name: id,
|
|
220
|
+
description: `Skill ${id}`,
|
|
221
|
+
directoryPath: dir ?? `/skills/${id}`,
|
|
222
|
+
skillFilePath: `/skills/${id}/SKILL.md`,
|
|
223
|
+
userInvocable: true,
|
|
224
|
+
disableModelInvocation: false,
|
|
225
|
+
source: 'managed',
|
|
226
|
+
};
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
function makeManifest(toolNames: string[]): SkillToolManifest {
|
|
230
|
+
return {
|
|
231
|
+
version: 1,
|
|
232
|
+
tools: toolNames.map((name) => ({
|
|
233
|
+
name,
|
|
234
|
+
description: `Tool ${name}`,
|
|
235
|
+
category: 'test',
|
|
236
|
+
risk: 'medium' as const,
|
|
237
|
+
input_schema: { type: 'object', properties: {} },
|
|
238
|
+
executor: 'run.ts',
|
|
239
|
+
execution_target: 'host' as const,
|
|
240
|
+
})),
|
|
241
|
+
};
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
let toolUseCounter = 0;
|
|
245
|
+
|
|
246
|
+
/**
|
|
247
|
+
* Creates a pair of messages representing a skill_load tool_use followed by
|
|
248
|
+
* its tool_result with the given content (typically a `<loaded_skill>` marker).
|
|
249
|
+
*/
|
|
250
|
+
function skillLoadMessages(content: string): Message[] {
|
|
251
|
+
const id = `sl-${++toolUseCounter}`;
|
|
252
|
+
return [
|
|
253
|
+
{
|
|
254
|
+
role: 'assistant',
|
|
255
|
+
content: [{ type: 'tool_use', id, name: 'skill_load', input: {} }],
|
|
256
|
+
},
|
|
257
|
+
{
|
|
258
|
+
role: 'user',
|
|
259
|
+
content: [{ type: 'tool_result', tool_use_id: id, content }],
|
|
260
|
+
},
|
|
261
|
+
];
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
// ---------------------------------------------------------------------------
|
|
265
|
+
// Tests
|
|
266
|
+
// ---------------------------------------------------------------------------
|
|
267
|
+
|
|
268
|
+
afterAll(() => { mock.restore(); });
|
|
269
|
+
|
|
270
|
+
describe('projectSkillTools', () => {
|
|
271
|
+
let sessionState: Map<string, string>;
|
|
272
|
+
|
|
273
|
+
beforeEach(() => {
|
|
274
|
+
mockCatalog = [];
|
|
275
|
+
mockManifests = {};
|
|
276
|
+
mockRegisteredTools = new Map();
|
|
277
|
+
mockUnregisteredSkillIds = [];
|
|
278
|
+
mockSkillRefCount = new Map();
|
|
279
|
+
mockSkillRefCount = new Map();
|
|
280
|
+
mockVersionHashes = {};
|
|
281
|
+
mockVersionHashErrors = new Set();
|
|
282
|
+
sessionState = new Map<string, string>();
|
|
283
|
+
});
|
|
284
|
+
|
|
285
|
+
test('no active skills returns empty projection', () => {
|
|
286
|
+
const result = projectSkillTools([], { previouslyActiveSkillIds: sessionState });
|
|
287
|
+
|
|
288
|
+
expect(result.toolDefinitions).toEqual([]);
|
|
289
|
+
expect(result.allowedToolNames.size).toBe(0);
|
|
290
|
+
});
|
|
291
|
+
|
|
292
|
+
test('active skill with valid manifest returns tool definitions', () => {
|
|
293
|
+
mockCatalog = [makeSkill('deploy')];
|
|
294
|
+
mockManifests = { deploy: makeManifest(['deploy_run', 'deploy_status']) };
|
|
295
|
+
|
|
296
|
+
const history: Message[] = [
|
|
297
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
298
|
+
];
|
|
299
|
+
|
|
300
|
+
const result = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
301
|
+
|
|
302
|
+
expect(result.toolDefinitions).toHaveLength(2);
|
|
303
|
+
expect(result.toolDefinitions.map((d) => d.name)).toEqual([
|
|
304
|
+
'deploy_run',
|
|
305
|
+
'deploy_status',
|
|
306
|
+
]);
|
|
307
|
+
expect(result.allowedToolNames).toEqual(
|
|
308
|
+
new Set(['deploy_run', 'deploy_status']),
|
|
309
|
+
);
|
|
310
|
+
});
|
|
311
|
+
|
|
312
|
+
test('multiple active skills are projected', () => {
|
|
313
|
+
mockCatalog = [makeSkill('deploy'), makeSkill('oncall')];
|
|
314
|
+
mockManifests = {
|
|
315
|
+
deploy: makeManifest(['deploy_run']),
|
|
316
|
+
oncall: makeManifest(['oncall_page']),
|
|
317
|
+
};
|
|
318
|
+
|
|
319
|
+
const history: Message[] = [
|
|
320
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
321
|
+
...skillLoadMessages('<loaded_skill id="oncall" />'),
|
|
322
|
+
];
|
|
323
|
+
|
|
324
|
+
const result = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
325
|
+
|
|
326
|
+
expect(result.toolDefinitions).toHaveLength(2);
|
|
327
|
+
expect(result.allowedToolNames).toEqual(
|
|
328
|
+
new Set(['deploy_run', 'oncall_page']),
|
|
329
|
+
);
|
|
330
|
+
});
|
|
331
|
+
|
|
332
|
+
test('preactivated skill IDs are included', () => {
|
|
333
|
+
mockCatalog = [makeSkill('deploy'), makeSkill('oncall')];
|
|
334
|
+
mockManifests = {
|
|
335
|
+
deploy: makeManifest(['deploy_run']),
|
|
336
|
+
oncall: makeManifest(['oncall_page']),
|
|
337
|
+
};
|
|
338
|
+
|
|
339
|
+
// Only deploy is in history; oncall is preactivated
|
|
340
|
+
const history: Message[] = [
|
|
341
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
342
|
+
];
|
|
343
|
+
|
|
344
|
+
const result = projectSkillTools(history, {
|
|
345
|
+
preactivatedSkillIds: ['oncall'],
|
|
346
|
+
previouslyActiveSkillIds: sessionState,
|
|
347
|
+
});
|
|
348
|
+
|
|
349
|
+
expect(result.toolDefinitions).toHaveLength(2);
|
|
350
|
+
expect(result.allowedToolNames).toEqual(
|
|
351
|
+
new Set(['deploy_run', 'oncall_page']),
|
|
352
|
+
);
|
|
353
|
+
});
|
|
354
|
+
|
|
355
|
+
test('skill deactivation: previously active skill is unregistered when removed from history', () => {
|
|
356
|
+
mockCatalog = [makeSkill('deploy'), makeSkill('oncall')];
|
|
357
|
+
mockManifests = {
|
|
358
|
+
deploy: makeManifest(['deploy_run']),
|
|
359
|
+
oncall: makeManifest(['oncall_page']),
|
|
360
|
+
};
|
|
361
|
+
|
|
362
|
+
// First turn: both skills active
|
|
363
|
+
const history1: Message[] = [
|
|
364
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
365
|
+
...skillLoadMessages('<loaded_skill id="oncall" />'),
|
|
366
|
+
];
|
|
367
|
+
projectSkillTools(history1, { previouslyActiveSkillIds: sessionState });
|
|
368
|
+
|
|
369
|
+
// Second turn: only deploy remains active (oncall marker gone)
|
|
370
|
+
mockUnregisteredSkillIds = [];
|
|
371
|
+
const history2: Message[] = [
|
|
372
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
373
|
+
];
|
|
374
|
+
const result = projectSkillTools(history2, { previouslyActiveSkillIds: sessionState });
|
|
375
|
+
|
|
376
|
+
expect(mockUnregisteredSkillIds).toContain('oncall');
|
|
377
|
+
expect(result.allowedToolNames).toEqual(new Set(['deploy_run']));
|
|
378
|
+
});
|
|
379
|
+
|
|
380
|
+
test('invalid/missing manifest is gracefully handled', () => {
|
|
381
|
+
mockCatalog = [makeSkill('broken')];
|
|
382
|
+
// No manifest registered for "broken", so parseToolManifestFile will throw
|
|
383
|
+
|
|
384
|
+
const history: Message[] = [
|
|
385
|
+
...skillLoadMessages('<loaded_skill id="broken" />'),
|
|
386
|
+
];
|
|
387
|
+
|
|
388
|
+
const result = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
389
|
+
|
|
390
|
+
// Should not throw, just return empty projection for that skill
|
|
391
|
+
expect(result.toolDefinitions).toEqual([]);
|
|
392
|
+
expect(result.allowedToolNames.size).toBe(0);
|
|
393
|
+
});
|
|
394
|
+
|
|
395
|
+
test('skill ID not in catalog is gracefully skipped', () => {
|
|
396
|
+
mockCatalog = []; // empty catalog
|
|
397
|
+
mockManifests = {};
|
|
398
|
+
|
|
399
|
+
const history: Message[] = [
|
|
400
|
+
...skillLoadMessages('<loaded_skill id="nonexistent" />'),
|
|
401
|
+
];
|
|
402
|
+
|
|
403
|
+
const result = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
404
|
+
|
|
405
|
+
expect(result.toolDefinitions).toEqual([]);
|
|
406
|
+
expect(result.allowedToolNames.size).toBe(0);
|
|
407
|
+
});
|
|
408
|
+
|
|
409
|
+
test('skill with catalog miss on turn 1 is registered when catalog is populated on turn 2', () => {
|
|
410
|
+
// Turn 1: skill is active but NOT in the catalog — should not be tracked
|
|
411
|
+
mockCatalog = []; // empty catalog
|
|
412
|
+
mockManifests = {};
|
|
413
|
+
|
|
414
|
+
const history: Message[] = [
|
|
415
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
416
|
+
];
|
|
417
|
+
|
|
418
|
+
const result1 = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
419
|
+
expect(result1.toolDefinitions).toEqual([]);
|
|
420
|
+
expect(sessionState.has('deploy')).toBe(false);
|
|
421
|
+
|
|
422
|
+
// Turn 2: catalog now has the skill — should register successfully
|
|
423
|
+
mockCatalog = [makeSkill('deploy')];
|
|
424
|
+
mockManifests = { deploy: makeManifest(['deploy_run']) };
|
|
425
|
+
|
|
426
|
+
const result2 = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
427
|
+
expect(result2.toolDefinitions).toHaveLength(1);
|
|
428
|
+
expect(result2.toolDefinitions[0].name).toBe('deploy_run');
|
|
429
|
+
expect(result2.allowedToolNames.has('deploy_run')).toBe(true);
|
|
430
|
+
expect(sessionState.has('deploy')).toBe(true);
|
|
431
|
+
|
|
432
|
+
// Verify registerSkillTools was called (tool is in the registry)
|
|
433
|
+
expect(mockRegisteredTools.has('deploy')).toBe(true);
|
|
434
|
+
});
|
|
435
|
+
|
|
436
|
+
test('skill with manifest failure on turn 1 is registered when manifest is available on turn 2', () => {
|
|
437
|
+
mockCatalog = [makeSkill('deploy')];
|
|
438
|
+
// No manifest — will fail to load
|
|
439
|
+
mockManifests = {};
|
|
440
|
+
|
|
441
|
+
const history: Message[] = [
|
|
442
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
443
|
+
];
|
|
444
|
+
|
|
445
|
+
const result1 = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
446
|
+
expect(result1.toolDefinitions).toEqual([]);
|
|
447
|
+
expect(sessionState.has('deploy')).toBe(false);
|
|
448
|
+
|
|
449
|
+
// Turn 2: manifest now available
|
|
450
|
+
mockManifests = { deploy: makeManifest(['deploy_run']) };
|
|
451
|
+
|
|
452
|
+
const result2 = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
453
|
+
expect(result2.toolDefinitions).toHaveLength(1);
|
|
454
|
+
expect(result2.toolDefinitions[0].name).toBe('deploy_run');
|
|
455
|
+
expect(sessionState.has('deploy')).toBe(true);
|
|
456
|
+
expect(mockRegisteredTools.has('deploy')).toBe(true);
|
|
457
|
+
});
|
|
458
|
+
|
|
459
|
+
test('previously-registered skill that transiently fails is unregistered to prevent refcount leak', () => {
|
|
460
|
+
mockCatalog = [makeSkill('deploy')];
|
|
461
|
+
mockManifests = { deploy: makeManifest(['deploy_run']) };
|
|
462
|
+
|
|
463
|
+
const history: Message[] = [
|
|
464
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
465
|
+
];
|
|
466
|
+
|
|
467
|
+
// Turn 1: skill registered successfully
|
|
468
|
+
projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
469
|
+
expect(sessionState.has('deploy')).toBe(true);
|
|
470
|
+
expect(mockSkillRefCount.get('deploy')).toBe(1);
|
|
471
|
+
|
|
472
|
+
// Turn 2: manifest transiently fails — skill should be unregistered
|
|
473
|
+
mockManifests = {};
|
|
474
|
+
mockUnregisteredSkillIds = [];
|
|
475
|
+
projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
476
|
+
expect(sessionState.has('deploy')).toBe(false);
|
|
477
|
+
expect(mockUnregisteredSkillIds).toContain('deploy');
|
|
478
|
+
// Ref count should be 0 (properly decremented)
|
|
479
|
+
expect(mockSkillRefCount.has('deploy')).toBe(false);
|
|
480
|
+
|
|
481
|
+
// Turn 3: manifest recovers — skill re-registered with correct ref count
|
|
482
|
+
mockManifests = { deploy: makeManifest(['deploy_run']) };
|
|
483
|
+
mockUnregisteredSkillIds = [];
|
|
484
|
+
const result3 = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
485
|
+
expect(result3.toolDefinitions).toHaveLength(1);
|
|
486
|
+
expect(sessionState.has('deploy')).toBe(true);
|
|
487
|
+
// Ref count should be exactly 1, not 2
|
|
488
|
+
expect(mockSkillRefCount.get('deploy')).toBe(1);
|
|
489
|
+
});
|
|
490
|
+
|
|
491
|
+
test('skill version hash change triggers unregister and re-register', () => {
|
|
492
|
+
mockCatalog = [makeSkill('deploy')];
|
|
493
|
+
mockManifests = { deploy: makeManifest(['deploy_run']) };
|
|
494
|
+
mockVersionHashes = { deploy: 'v1:hash-aaa' };
|
|
495
|
+
|
|
496
|
+
const history: Message[] = [
|
|
497
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
498
|
+
];
|
|
499
|
+
|
|
500
|
+
// Turn 1: skill registered with hash-aaa
|
|
501
|
+
projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
502
|
+
expect(sessionState.has('deploy')).toBe(true);
|
|
503
|
+
expect(sessionState.get('deploy')).toBe('v1:hash-aaa');
|
|
504
|
+
expect(mockSkillRefCount.get('deploy')).toBe(1);
|
|
505
|
+
|
|
506
|
+
// Turn 2: hash changes — should unregister old and re-register new
|
|
507
|
+
mockVersionHashes = { deploy: 'v1:hash-bbb' };
|
|
508
|
+
mockUnregisteredSkillIds = [];
|
|
509
|
+
const result2 = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
510
|
+
expect(result2.toolDefinitions).toHaveLength(1);
|
|
511
|
+
expect(result2.toolDefinitions[0].name).toBe('deploy_run');
|
|
512
|
+
expect(sessionState.get('deploy')).toBe('v1:hash-bbb');
|
|
513
|
+
// Unregister was called for the stale version
|
|
514
|
+
expect(mockUnregisteredSkillIds).toContain('deploy');
|
|
515
|
+
// Ref count should remain 1 (unregister decremented, re-register incremented)
|
|
516
|
+
expect(mockSkillRefCount.get('deploy')).toBe(1);
|
|
517
|
+
});
|
|
518
|
+
|
|
519
|
+
test('skill version hash unchanged skips re-registration', () => {
|
|
520
|
+
mockCatalog = [makeSkill('deploy')];
|
|
521
|
+
mockManifests = { deploy: makeManifest(['deploy_run']) };
|
|
522
|
+
mockVersionHashes = { deploy: 'v1:stable-hash' };
|
|
523
|
+
|
|
524
|
+
const history: Message[] = [
|
|
525
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
526
|
+
];
|
|
527
|
+
|
|
528
|
+
// Turn 1: skill registered
|
|
529
|
+
projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
530
|
+
expect(mockSkillRefCount.get('deploy')).toBe(1);
|
|
531
|
+
|
|
532
|
+
// Turn 2: same hash — should NOT call registerSkillTools again
|
|
533
|
+
mockUnregisteredSkillIds = [];
|
|
534
|
+
const result2 = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
535
|
+
expect(result2.toolDefinitions).toHaveLength(1);
|
|
536
|
+
expect(mockUnregisteredSkillIds).not.toContain('deploy');
|
|
537
|
+
// Ref count should still be 1 (no additional registration)
|
|
538
|
+
expect(mockSkillRefCount.get('deploy')).toBe(1);
|
|
539
|
+
});
|
|
540
|
+
|
|
541
|
+
test('preactivated IDs merge with context-derived IDs (dedup)', () => {
|
|
542
|
+
mockCatalog = [makeSkill('deploy')];
|
|
543
|
+
mockManifests = { deploy: makeManifest(['deploy_run']) };
|
|
544
|
+
|
|
545
|
+
const history: Message[] = [
|
|
546
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
547
|
+
];
|
|
548
|
+
|
|
549
|
+
// deploy is both in history AND preactivated — should not duplicate
|
|
550
|
+
const result = projectSkillTools(history, {
|
|
551
|
+
preactivatedSkillIds: ['deploy'],
|
|
552
|
+
previouslyActiveSkillIds: sessionState,
|
|
553
|
+
});
|
|
554
|
+
|
|
555
|
+
expect(result.toolDefinitions).toHaveLength(1);
|
|
556
|
+
expect(result.allowedToolNames).toEqual(new Set(['deploy_run']));
|
|
557
|
+
});
|
|
558
|
+
|
|
559
|
+
test('no markers in history with preactivated IDs still projects tools', () => {
|
|
560
|
+
mockCatalog = [makeSkill('oncall')];
|
|
561
|
+
mockManifests = { oncall: makeManifest(['oncall_page']) };
|
|
562
|
+
|
|
563
|
+
const result = projectSkillTools([], {
|
|
564
|
+
preactivatedSkillIds: ['oncall'],
|
|
565
|
+
previouslyActiveSkillIds: sessionState,
|
|
566
|
+
});
|
|
567
|
+
|
|
568
|
+
expect(result.toolDefinitions).toHaveLength(1);
|
|
569
|
+
expect(result.allowedToolNames).toEqual(new Set(['oncall_page']));
|
|
570
|
+
});
|
|
571
|
+
|
|
572
|
+
test('concurrent sessions do not interfere with each other', () => {
|
|
573
|
+
mockCatalog = [makeSkill('deploy'), makeSkill('oncall')];
|
|
574
|
+
mockManifests = {
|
|
575
|
+
deploy: makeManifest(['deploy_run']),
|
|
576
|
+
oncall: makeManifest(['oncall_page']),
|
|
577
|
+
};
|
|
578
|
+
|
|
579
|
+
const sessionA = new Map<string, string>();
|
|
580
|
+
const sessionB = new Map<string, string>();
|
|
581
|
+
|
|
582
|
+
// Session A activates deploy
|
|
583
|
+
const historyA: Message[] = [...skillLoadMessages('<loaded_skill id="deploy" />')];
|
|
584
|
+
const resultA = projectSkillTools(historyA, { previouslyActiveSkillIds: sessionA });
|
|
585
|
+
expect(resultA.allowedToolNames.has('deploy_run')).toBe(true);
|
|
586
|
+
|
|
587
|
+
// Session B activates oncall — should NOT unregister deploy from session A
|
|
588
|
+
mockUnregisteredSkillIds = [];
|
|
589
|
+
const historyB: Message[] = [...skillLoadMessages('<loaded_skill id="oncall" />')];
|
|
590
|
+
projectSkillTools(historyB, { previouslyActiveSkillIds: sessionB });
|
|
591
|
+
expect(mockUnregisteredSkillIds).not.toContain('deploy');
|
|
592
|
+
|
|
593
|
+
// Session A's state should still track deploy
|
|
594
|
+
expect(sessionA.has('deploy')).toBe(true);
|
|
595
|
+
expect(sessionB.has('oncall')).toBe(true);
|
|
596
|
+
});
|
|
597
|
+
|
|
598
|
+
test('disposing session A while session B uses the same skill does NOT remove tools', () => {
|
|
599
|
+
mockCatalog = [makeSkill('deploy')];
|
|
600
|
+
mockManifests = { deploy: makeManifest(['deploy_run']) };
|
|
601
|
+
|
|
602
|
+
const sessionA = new Map<string, string>();
|
|
603
|
+
const sessionB = new Map<string, string>();
|
|
604
|
+
|
|
605
|
+
const history: Message[] = [...skillLoadMessages('<loaded_skill id="deploy" />')];
|
|
606
|
+
|
|
607
|
+
// Both sessions activate deploy
|
|
608
|
+
projectSkillTools(history, { previouslyActiveSkillIds: sessionA });
|
|
609
|
+
projectSkillTools(history, { previouslyActiveSkillIds: sessionB });
|
|
610
|
+
|
|
611
|
+
// Ref count should be 2
|
|
612
|
+
expect(mockSkillRefCount.get('deploy')).toBe(2);
|
|
613
|
+
|
|
614
|
+
// Session A tears down
|
|
615
|
+
resetSkillToolProjection(sessionA);
|
|
616
|
+
|
|
617
|
+
// Tools should still be registered (ref count decremented but > 0)
|
|
618
|
+
expect(mockRegisteredTools.has('deploy')).toBe(true);
|
|
619
|
+
expect(mockSkillRefCount.get('deploy')).toBe(1);
|
|
620
|
+
|
|
621
|
+
// Session B can still project the skill tools
|
|
622
|
+
const resultB = projectSkillTools(history, { previouslyActiveSkillIds: sessionB });
|
|
623
|
+
expect(resultB.allowedToolNames.has('deploy_run')).toBe(true);
|
|
624
|
+
});
|
|
625
|
+
|
|
626
|
+
test('tools ARE removed when the last session using them disposes', () => {
|
|
627
|
+
mockCatalog = [makeSkill('deploy')];
|
|
628
|
+
mockManifests = { deploy: makeManifest(['deploy_run']) };
|
|
629
|
+
|
|
630
|
+
const sessionA = new Map<string, string>();
|
|
631
|
+
const sessionB = new Map<string, string>();
|
|
632
|
+
|
|
633
|
+
const history: Message[] = [...skillLoadMessages('<loaded_skill id="deploy" />')];
|
|
634
|
+
|
|
635
|
+
// Both sessions activate deploy
|
|
636
|
+
projectSkillTools(history, { previouslyActiveSkillIds: sessionA });
|
|
637
|
+
projectSkillTools(history, { previouslyActiveSkillIds: sessionB });
|
|
638
|
+
|
|
639
|
+
// Both sessions tear down
|
|
640
|
+
resetSkillToolProjection(sessionA);
|
|
641
|
+
expect(mockRegisteredTools.has('deploy')).toBe(true);
|
|
642
|
+
|
|
643
|
+
resetSkillToolProjection(sessionB);
|
|
644
|
+
expect(mockRegisteredTools.has('deploy')).toBe(false);
|
|
645
|
+
expect(mockSkillRefCount.has('deploy')).toBe(false);
|
|
646
|
+
});
|
|
647
|
+
});
|
|
648
|
+
|
|
649
|
+
// ---------------------------------------------------------------------------
|
|
650
|
+
// resolveTools callback integration tests
|
|
651
|
+
// ---------------------------------------------------------------------------
|
|
652
|
+
|
|
653
|
+
describe('resolveTools callback (session wiring)', () => {
|
|
654
|
+
// Simulates the resolveTools callback wired in the Session constructor:
|
|
655
|
+
// (history) => [...baseToolDefs, ...projectSkillTools(history).toolDefinitions]
|
|
656
|
+
const baseToolDefs: ToolDefinition[] = [
|
|
657
|
+
{ name: 'file_read', description: 'Read a file', input_schema: { type: 'object', properties: {} } },
|
|
658
|
+
{ name: 'bash', description: 'Run a shell command', input_schema: { type: 'object', properties: {} } },
|
|
659
|
+
];
|
|
660
|
+
|
|
661
|
+
let sessionState: Map<string, string>;
|
|
662
|
+
|
|
663
|
+
function makeResolveTools(base: ToolDefinition[]) {
|
|
664
|
+
return (history: Message[]): ToolDefinition[] => {
|
|
665
|
+
const projection = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
666
|
+
return [...base, ...projection.toolDefinitions];
|
|
667
|
+
};
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
beforeEach(() => {
|
|
671
|
+
mockCatalog = [];
|
|
672
|
+
mockManifests = {};
|
|
673
|
+
mockRegisteredTools = new Map();
|
|
674
|
+
mockUnregisteredSkillIds = [];
|
|
675
|
+
mockSkillRefCount = new Map();
|
|
676
|
+
mockSkillRefCount = new Map();
|
|
677
|
+
mockVersionHashes = {};
|
|
678
|
+
mockVersionHashErrors = new Set();
|
|
679
|
+
sessionState = new Map<string, string>();
|
|
680
|
+
});
|
|
681
|
+
|
|
682
|
+
test('returns only base tools when no skills are active', () => {
|
|
683
|
+
const resolveTools = makeResolveTools(baseToolDefs);
|
|
684
|
+
const result = resolveTools([]);
|
|
685
|
+
|
|
686
|
+
expect(result).toHaveLength(2);
|
|
687
|
+
expect(result.map((d) => d.name)).toEqual(['file_read', 'bash']);
|
|
688
|
+
});
|
|
689
|
+
|
|
690
|
+
test('combines base tools with projected skill tools', () => {
|
|
691
|
+
mockCatalog = [makeSkill('deploy')];
|
|
692
|
+
mockManifests = { deploy: makeManifest(['deploy_run', 'deploy_status']) };
|
|
693
|
+
|
|
694
|
+
const resolveTools = makeResolveTools(baseToolDefs);
|
|
695
|
+
const history: Message[] = [
|
|
696
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
697
|
+
];
|
|
698
|
+
|
|
699
|
+
const result = resolveTools(history);
|
|
700
|
+
|
|
701
|
+
expect(result).toHaveLength(4);
|
|
702
|
+
expect(result.map((d) => d.name)).toEqual([
|
|
703
|
+
'file_read',
|
|
704
|
+
'bash',
|
|
705
|
+
'deploy_run',
|
|
706
|
+
'deploy_status',
|
|
707
|
+
]);
|
|
708
|
+
});
|
|
709
|
+
|
|
710
|
+
test('skill tools appear after base tools and do not replace them', () => {
|
|
711
|
+
mockCatalog = [makeSkill('oncall')];
|
|
712
|
+
mockManifests = { oncall: makeManifest(['oncall_page']) };
|
|
713
|
+
|
|
714
|
+
const resolveTools = makeResolveTools(baseToolDefs);
|
|
715
|
+
const history: Message[] = [
|
|
716
|
+
...skillLoadMessages('<loaded_skill id="oncall" />'),
|
|
717
|
+
];
|
|
718
|
+
|
|
719
|
+
const result = resolveTools(history);
|
|
720
|
+
|
|
721
|
+
// Base tools come first, skill tools are appended
|
|
722
|
+
expect(result[0].name).toBe('file_read');
|
|
723
|
+
expect(result[1].name).toBe('bash');
|
|
724
|
+
expect(result[2].name).toBe('oncall_page');
|
|
725
|
+
});
|
|
726
|
+
|
|
727
|
+
test('multiple skills add all their tools alongside base tools', () => {
|
|
728
|
+
mockCatalog = [makeSkill('deploy'), makeSkill('oncall')];
|
|
729
|
+
mockManifests = {
|
|
730
|
+
deploy: makeManifest(['deploy_run']),
|
|
731
|
+
oncall: makeManifest(['oncall_page', 'oncall_ack']),
|
|
732
|
+
};
|
|
733
|
+
|
|
734
|
+
const resolveTools = makeResolveTools(baseToolDefs);
|
|
735
|
+
const history: Message[] = [
|
|
736
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
737
|
+
...skillLoadMessages('<loaded_skill id="oncall" />'),
|
|
738
|
+
];
|
|
739
|
+
|
|
740
|
+
const result = resolveTools(history);
|
|
741
|
+
|
|
742
|
+
expect(result).toHaveLength(5);
|
|
743
|
+
const names = result.map((d) => d.name);
|
|
744
|
+
expect(names).toContain('file_read');
|
|
745
|
+
expect(names).toContain('bash');
|
|
746
|
+
expect(names).toContain('deploy_run');
|
|
747
|
+
expect(names).toContain('oncall_page');
|
|
748
|
+
expect(names).toContain('oncall_ack');
|
|
749
|
+
});
|
|
750
|
+
});
|
|
751
|
+
|
|
752
|
+
// ---------------------------------------------------------------------------
|
|
753
|
+
// Tests — allowed tool set merging with core tools
|
|
754
|
+
// ---------------------------------------------------------------------------
|
|
755
|
+
|
|
756
|
+
describe('allowed tool set merging', () => {
|
|
757
|
+
const CORE_TOOL_NAMES = new Set(['bash', 'file_read', 'file_write', 'file_edit']);
|
|
758
|
+
let sessionState: Map<string, string>;
|
|
759
|
+
|
|
760
|
+
beforeEach(() => {
|
|
761
|
+
mockCatalog = [];
|
|
762
|
+
mockManifests = {};
|
|
763
|
+
mockRegisteredTools = new Map();
|
|
764
|
+
mockUnregisteredSkillIds = [];
|
|
765
|
+
mockSkillRefCount = new Map();
|
|
766
|
+
mockSkillRefCount = new Map();
|
|
767
|
+
mockVersionHashes = {};
|
|
768
|
+
mockVersionHashErrors = new Set();
|
|
769
|
+
sessionState = new Map<string, string>();
|
|
770
|
+
});
|
|
771
|
+
|
|
772
|
+
/**
|
|
773
|
+
* Simulates the merging logic from session.ts:
|
|
774
|
+
* union of core tool names + projected skill tool names.
|
|
775
|
+
*/
|
|
776
|
+
function buildAllowedSet(projection: { allowedToolNames: Set<string> }): Set<string> {
|
|
777
|
+
const merged = new Set(CORE_TOOL_NAMES);
|
|
778
|
+
for (const name of projection.allowedToolNames) {
|
|
779
|
+
merged.add(name);
|
|
780
|
+
}
|
|
781
|
+
return merged;
|
|
782
|
+
}
|
|
783
|
+
|
|
784
|
+
test('core tools are always included even with no active skills', () => {
|
|
785
|
+
const projection = projectSkillTools([], { previouslyActiveSkillIds: sessionState });
|
|
786
|
+
const allowed = buildAllowedSet(projection);
|
|
787
|
+
|
|
788
|
+
for (const core of CORE_TOOL_NAMES) {
|
|
789
|
+
expect(allowed.has(core)).toBe(true);
|
|
790
|
+
}
|
|
791
|
+
});
|
|
792
|
+
|
|
793
|
+
test('active skill tools are included alongside core tools', () => {
|
|
794
|
+
mockCatalog = [makeSkill('deploy')];
|
|
795
|
+
mockManifests = { deploy: makeManifest(['deploy_run', 'deploy_status']) };
|
|
796
|
+
|
|
797
|
+
const history: Message[] = [
|
|
798
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
799
|
+
];
|
|
800
|
+
|
|
801
|
+
const projection = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
802
|
+
const allowed = buildAllowedSet(projection);
|
|
803
|
+
|
|
804
|
+
// Core tools present
|
|
805
|
+
for (const core of CORE_TOOL_NAMES) {
|
|
806
|
+
expect(allowed.has(core)).toBe(true);
|
|
807
|
+
}
|
|
808
|
+
// Active skill tools present
|
|
809
|
+
expect(allowed.has('deploy_run')).toBe(true);
|
|
810
|
+
expect(allowed.has('deploy_status')).toBe(true);
|
|
811
|
+
});
|
|
812
|
+
|
|
813
|
+
test('inactive skill tools are NOT in the allowed set', () => {
|
|
814
|
+
mockCatalog = [makeSkill('deploy'), makeSkill('oncall')];
|
|
815
|
+
mockManifests = {
|
|
816
|
+
deploy: makeManifest(['deploy_run']),
|
|
817
|
+
oncall: makeManifest(['oncall_page']),
|
|
818
|
+
};
|
|
819
|
+
|
|
820
|
+
// Only deploy is active
|
|
821
|
+
const history: Message[] = [
|
|
822
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
823
|
+
];
|
|
824
|
+
|
|
825
|
+
const projection = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
826
|
+
const allowed = buildAllowedSet(projection);
|
|
827
|
+
|
|
828
|
+
expect(allowed.has('deploy_run')).toBe(true);
|
|
829
|
+
// oncall_page is not active — not in projection, not in allowed set
|
|
830
|
+
expect(allowed.has('oncall_page')).toBe(false);
|
|
831
|
+
});
|
|
832
|
+
|
|
833
|
+
test('allowed set updates when skills activate and deactivate', () => {
|
|
834
|
+
mockCatalog = [makeSkill('deploy'), makeSkill('oncall')];
|
|
835
|
+
mockManifests = {
|
|
836
|
+
deploy: makeManifest(['deploy_run']),
|
|
837
|
+
oncall: makeManifest(['oncall_page']),
|
|
838
|
+
};
|
|
839
|
+
|
|
840
|
+
// Turn 1: both active
|
|
841
|
+
const history1: Message[] = [
|
|
842
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
843
|
+
...skillLoadMessages('<loaded_skill id="oncall" />'),
|
|
844
|
+
];
|
|
845
|
+
const projection1 = projectSkillTools(history1, { previouslyActiveSkillIds: sessionState });
|
|
846
|
+
const allowed1 = buildAllowedSet(projection1);
|
|
847
|
+
|
|
848
|
+
expect(allowed1.has('deploy_run')).toBe(true);
|
|
849
|
+
expect(allowed1.has('oncall_page')).toBe(true);
|
|
850
|
+
|
|
851
|
+
// Turn 2: only deploy remains
|
|
852
|
+
const history2: Message[] = [
|
|
853
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
854
|
+
];
|
|
855
|
+
const projection2 = projectSkillTools(history2, { previouslyActiveSkillIds: sessionState });
|
|
856
|
+
const allowed2 = buildAllowedSet(projection2);
|
|
857
|
+
|
|
858
|
+
expect(allowed2.has('deploy_run')).toBe(true);
|
|
859
|
+
expect(allowed2.has('oncall_page')).toBe(false);
|
|
860
|
+
// Core tools still present
|
|
861
|
+
for (const core of CORE_TOOL_NAMES) {
|
|
862
|
+
expect(allowed2.has(core)).toBe(true);
|
|
863
|
+
}
|
|
864
|
+
});
|
|
865
|
+
});
|
|
866
|
+
|
|
867
|
+
// ---------------------------------------------------------------------------
|
|
868
|
+
// End-to-end mid-run activation tests
|
|
869
|
+
// ---------------------------------------------------------------------------
|
|
870
|
+
|
|
871
|
+
// ── Security invariant (PR 34): skill_load is the permission gate ──
|
|
872
|
+
// In strict mode, skill_load requires an explicit trust rule before the
|
|
873
|
+
// tool executor emits a <loaded_skill> marker. Without that marker in
|
|
874
|
+
// the conversation history, projectSkillTools will never activate the
|
|
875
|
+
// skill's tools. The permission enforcement lives in checker.ts; the
|
|
876
|
+
// tests here verify that tool activation only occurs when markers are
|
|
877
|
+
// present — meaning the permission check already succeeded.
|
|
878
|
+
|
|
879
|
+
describe('skill activation requires loaded_skill marker (security invariant)', () => {
|
|
880
|
+
let sessionState: Map<string, string>;
|
|
881
|
+
|
|
882
|
+
beforeEach(() => {
|
|
883
|
+
mockCatalog = [];
|
|
884
|
+
mockManifests = {};
|
|
885
|
+
mockRegisteredTools = new Map();
|
|
886
|
+
mockUnregisteredSkillIds = [];
|
|
887
|
+
mockSkillRefCount = new Map();
|
|
888
|
+
mockVersionHashes = {};
|
|
889
|
+
mockVersionHashErrors = new Set();
|
|
890
|
+
sessionState = new Map<string, string>();
|
|
891
|
+
});
|
|
892
|
+
|
|
893
|
+
test('skill_load tool_use without tool_result marker does not activate skill tools', () => {
|
|
894
|
+
mockCatalog = [makeSkill('gated')];
|
|
895
|
+
mockManifests = { gated: makeManifest(['gated_action']) };
|
|
896
|
+
|
|
897
|
+
// History has a skill_load call but NO tool_result with a
|
|
898
|
+
// <loaded_skill> marker — simulating a permission denial or pending
|
|
899
|
+
// prompt in strict mode where the tool never executed.
|
|
900
|
+
const history: Message[] = [
|
|
901
|
+
{
|
|
902
|
+
role: 'assistant',
|
|
903
|
+
content: [{ type: 'tool_use', id: 'sl-gate-1', name: 'skill_load', input: { skill_id: 'gated' } }],
|
|
904
|
+
},
|
|
905
|
+
{
|
|
906
|
+
role: 'user',
|
|
907
|
+
content: [{ type: 'tool_result', tool_use_id: 'sl-gate-1', content: 'Permission denied.' }],
|
|
908
|
+
},
|
|
909
|
+
];
|
|
910
|
+
|
|
911
|
+
const result = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
912
|
+
expect(result.toolDefinitions).toHaveLength(0);
|
|
913
|
+
expect(result.allowedToolNames.size).toBe(0);
|
|
914
|
+
});
|
|
915
|
+
|
|
916
|
+
test('skill_load with valid marker activates skill tools (approved path)', () => {
|
|
917
|
+
mockCatalog = [makeSkill('approved')];
|
|
918
|
+
mockManifests = { approved: makeManifest(['approved_action']) };
|
|
919
|
+
|
|
920
|
+
const history: Message[] = [
|
|
921
|
+
...skillLoadMessages('<loaded_skill id="approved" />'),
|
|
922
|
+
];
|
|
923
|
+
|
|
924
|
+
const result = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
925
|
+
expect(result.toolDefinitions).toHaveLength(1);
|
|
926
|
+
expect(result.toolDefinitions[0].name).toBe('approved_action');
|
|
927
|
+
expect(result.allowedToolNames.has('approved_action')).toBe(true);
|
|
928
|
+
});
|
|
929
|
+
});
|
|
930
|
+
|
|
931
|
+
describe('mid-run skill tool activation (end-to-end)', () => {
|
|
932
|
+
const baseToolDefs: ToolDefinition[] = [
|
|
933
|
+
{ name: 'file_read', description: 'Read a file', input_schema: { type: 'object', properties: {} } },
|
|
934
|
+
{ name: 'bash', description: 'Run a shell command', input_schema: { type: 'object', properties: {} } },
|
|
935
|
+
];
|
|
936
|
+
|
|
937
|
+
const CORE_TOOL_NAMES = new Set(['bash', 'file_read', 'file_write', 'file_edit']);
|
|
938
|
+
let sessionState: Map<string, string>;
|
|
939
|
+
|
|
940
|
+
function makeResolveTools(base: ToolDefinition[]) {
|
|
941
|
+
return (history: Message[]) => {
|
|
942
|
+
const projection = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
943
|
+
return {
|
|
944
|
+
toolDefinitions: [...base, ...projection.toolDefinitions],
|
|
945
|
+
allowedToolNames: new Set([...CORE_TOOL_NAMES, ...projection.allowedToolNames]),
|
|
946
|
+
};
|
|
947
|
+
};
|
|
948
|
+
}
|
|
949
|
+
|
|
950
|
+
beforeEach(() => {
|
|
951
|
+
mockCatalog = [];
|
|
952
|
+
mockManifests = {};
|
|
953
|
+
mockRegisteredTools = new Map();
|
|
954
|
+
mockUnregisteredSkillIds = [];
|
|
955
|
+
mockSkillRefCount = new Map();
|
|
956
|
+
mockSkillRefCount = new Map();
|
|
957
|
+
mockVersionHashes = {};
|
|
958
|
+
mockVersionHashErrors = new Set();
|
|
959
|
+
sessionState = new Map<string, string>();
|
|
960
|
+
});
|
|
961
|
+
|
|
962
|
+
test('Turn 1 calls skill_load → Turn 2 sees added tool', () => {
|
|
963
|
+
mockCatalog = [makeSkill('deploy')];
|
|
964
|
+
mockManifests = { deploy: makeManifest(['deploy_run']) };
|
|
965
|
+
|
|
966
|
+
const resolveTools = makeResolveTools(baseToolDefs);
|
|
967
|
+
|
|
968
|
+
// Turn 1: no skill markers in history yet
|
|
969
|
+
const historyTurn1: Message[] = [
|
|
970
|
+
{ role: 'user', content: [{ type: 'text', text: 'Please deploy' }] },
|
|
971
|
+
{ role: 'assistant', content: [{ type: 'text', text: 'Let me load the deploy skill.' }] },
|
|
972
|
+
];
|
|
973
|
+
|
|
974
|
+
const turn1Result = resolveTools(historyTurn1);
|
|
975
|
+
expect(turn1Result.toolDefinitions.map((d) => d.name)).toEqual(['file_read', 'bash']);
|
|
976
|
+
expect(turn1Result.allowedToolNames.has('deploy_run')).toBe(false);
|
|
977
|
+
|
|
978
|
+
// Simulate skill_load output appended as a tool result in the same run
|
|
979
|
+
const historyTurn2: Message[] = [
|
|
980
|
+
...historyTurn1,
|
|
981
|
+
{
|
|
982
|
+
role: 'assistant',
|
|
983
|
+
content: [{ type: 'tool_use', id: 'skill-load-1', name: 'skill_load', input: { skill_id: 'deploy' } }],
|
|
984
|
+
},
|
|
985
|
+
{
|
|
986
|
+
role: 'user',
|
|
987
|
+
content: [
|
|
988
|
+
{ type: 'tool_result', tool_use_id: 'skill-load-1', content: '<loaded_skill id="deploy" />' },
|
|
989
|
+
],
|
|
990
|
+
},
|
|
991
|
+
];
|
|
992
|
+
|
|
993
|
+
const turn2Result = resolveTools(historyTurn2);
|
|
994
|
+
expect(turn2Result.toolDefinitions.map((d) => d.name)).toEqual([
|
|
995
|
+
'file_read',
|
|
996
|
+
'bash',
|
|
997
|
+
'deploy_run',
|
|
998
|
+
]);
|
|
999
|
+
expect(turn2Result.allowedToolNames.has('deploy_run')).toBe(true);
|
|
1000
|
+
});
|
|
1001
|
+
|
|
1002
|
+
test('activation succeeds without requiring a new user message', () => {
|
|
1003
|
+
mockCatalog = [makeSkill('monitor')];
|
|
1004
|
+
mockManifests = { monitor: makeManifest(['monitor_check', 'monitor_alert']) };
|
|
1005
|
+
|
|
1006
|
+
const resolveTools = makeResolveTools(baseToolDefs);
|
|
1007
|
+
|
|
1008
|
+
// History contains only the initial user message and the assistant's
|
|
1009
|
+
// tool_use that triggered skill_load, followed by the tool result.
|
|
1010
|
+
// No second user message is present — the agent loop re-projects
|
|
1011
|
+
// tools between turns within the same run.
|
|
1012
|
+
const history: Message[] = [
|
|
1013
|
+
{ role: 'user', content: [{ type: 'text', text: 'Monitor the service' }] },
|
|
1014
|
+
{
|
|
1015
|
+
role: 'assistant',
|
|
1016
|
+
content: [{ type: 'tool_use', id: 'tu-1', name: 'skill_load', input: { skill_id: 'monitor' } }],
|
|
1017
|
+
},
|
|
1018
|
+
{
|
|
1019
|
+
role: 'user',
|
|
1020
|
+
content: [
|
|
1021
|
+
{ type: 'tool_result', tool_use_id: 'tu-1', content: '<loaded_skill id="monitor" />' },
|
|
1022
|
+
],
|
|
1023
|
+
},
|
|
1024
|
+
];
|
|
1025
|
+
|
|
1026
|
+
const result = resolveTools(history);
|
|
1027
|
+
|
|
1028
|
+
// Skill tools appear without needing another user message
|
|
1029
|
+
expect(result.toolDefinitions.map((d) => d.name)).toContain('monitor_check');
|
|
1030
|
+
expect(result.toolDefinitions.map((d) => d.name)).toContain('monitor_alert');
|
|
1031
|
+
expect(result.allowedToolNames.has('monitor_check')).toBe(true);
|
|
1032
|
+
expect(result.allowedToolNames.has('monitor_alert')).toBe(true);
|
|
1033
|
+
|
|
1034
|
+
// Core tools remain accessible
|
|
1035
|
+
for (const core of CORE_TOOL_NAMES) {
|
|
1036
|
+
expect(result.allowedToolNames.has(core)).toBe(true);
|
|
1037
|
+
}
|
|
1038
|
+
});
|
|
1039
|
+
|
|
1040
|
+
test('multiple skills can activate in sequence across turns', () => {
|
|
1041
|
+
mockCatalog = [makeSkill('deploy'), makeSkill('oncall'), makeSkill('metrics')];
|
|
1042
|
+
mockManifests = {
|
|
1043
|
+
deploy: makeManifest(['deploy_run']),
|
|
1044
|
+
oncall: makeManifest(['oncall_page']),
|
|
1045
|
+
metrics: makeManifest(['metrics_query', 'metrics_dashboard']),
|
|
1046
|
+
};
|
|
1047
|
+
|
|
1048
|
+
const resolveTools = makeResolveTools(baseToolDefs);
|
|
1049
|
+
|
|
1050
|
+
// Step 1: Load skill A (deploy)
|
|
1051
|
+
const historyAfterA: Message[] = [
|
|
1052
|
+
{ role: 'user', content: [{ type: 'text', text: 'I need to deploy and check oncall' }] },
|
|
1053
|
+
{
|
|
1054
|
+
role: 'assistant',
|
|
1055
|
+
content: [{ type: 'tool_use', id: 'tu-1', name: 'skill_load', input: { skill_id: 'deploy' } }],
|
|
1056
|
+
},
|
|
1057
|
+
{
|
|
1058
|
+
role: 'user',
|
|
1059
|
+
content: [
|
|
1060
|
+
{ type: 'tool_result', tool_use_id: 'tu-1', content: '<loaded_skill id="deploy" />' },
|
|
1061
|
+
],
|
|
1062
|
+
},
|
|
1063
|
+
];
|
|
1064
|
+
|
|
1065
|
+
const resultA = resolveTools(historyAfterA);
|
|
1066
|
+
const namesA = resultA.toolDefinitions.map((d) => d.name);
|
|
1067
|
+
expect(namesA).toContain('deploy_run');
|
|
1068
|
+
expect(namesA).not.toContain('oncall_page');
|
|
1069
|
+
expect(namesA).not.toContain('metrics_query');
|
|
1070
|
+
|
|
1071
|
+
// Step 2: Load skill B (oncall) — deploy should remain active
|
|
1072
|
+
const historyAfterB: Message[] = [
|
|
1073
|
+
...historyAfterA,
|
|
1074
|
+
{
|
|
1075
|
+
role: 'assistant',
|
|
1076
|
+
content: [{ type: 'tool_use', id: 'tu-2', name: 'skill_load', input: { skill_id: 'oncall' } }],
|
|
1077
|
+
},
|
|
1078
|
+
{
|
|
1079
|
+
role: 'user',
|
|
1080
|
+
content: [
|
|
1081
|
+
{ type: 'tool_result', tool_use_id: 'tu-2', content: '<loaded_skill id="oncall" />' },
|
|
1082
|
+
],
|
|
1083
|
+
},
|
|
1084
|
+
];
|
|
1085
|
+
|
|
1086
|
+
const resultB = resolveTools(historyAfterB);
|
|
1087
|
+
const namesB = resultB.toolDefinitions.map((d) => d.name);
|
|
1088
|
+
expect(namesB).toContain('deploy_run');
|
|
1089
|
+
expect(namesB).toContain('oncall_page');
|
|
1090
|
+
expect(namesB).not.toContain('metrics_query');
|
|
1091
|
+
|
|
1092
|
+
// Step 3: Load skill C (metrics) — all three should be active
|
|
1093
|
+
const historyAfterC: Message[] = [
|
|
1094
|
+
...historyAfterB,
|
|
1095
|
+
{
|
|
1096
|
+
role: 'assistant',
|
|
1097
|
+
content: [{ type: 'tool_use', id: 'tu-3', name: 'skill_load', input: { skill_id: 'metrics' } }],
|
|
1098
|
+
},
|
|
1099
|
+
{
|
|
1100
|
+
role: 'user',
|
|
1101
|
+
content: [
|
|
1102
|
+
{ type: 'tool_result', tool_use_id: 'tu-3', content: '<loaded_skill id="metrics" />' },
|
|
1103
|
+
],
|
|
1104
|
+
},
|
|
1105
|
+
];
|
|
1106
|
+
|
|
1107
|
+
const resultC = resolveTools(historyAfterC);
|
|
1108
|
+
const namesC = resultC.toolDefinitions.map((d) => d.name);
|
|
1109
|
+
expect(namesC).toContain('deploy_run');
|
|
1110
|
+
expect(namesC).toContain('oncall_page');
|
|
1111
|
+
expect(namesC).toContain('metrics_query');
|
|
1112
|
+
expect(namesC).toContain('metrics_dashboard');
|
|
1113
|
+
|
|
1114
|
+
// Verify allowed tool names include all skill tools plus core tools
|
|
1115
|
+
expect(resultC.allowedToolNames.has('deploy_run')).toBe(true);
|
|
1116
|
+
expect(resultC.allowedToolNames.has('oncall_page')).toBe(true);
|
|
1117
|
+
expect(resultC.allowedToolNames.has('metrics_query')).toBe(true);
|
|
1118
|
+
expect(resultC.allowedToolNames.has('metrics_dashboard')).toBe(true);
|
|
1119
|
+
for (const core of CORE_TOOL_NAMES) {
|
|
1120
|
+
expect(resultC.allowedToolNames.has(core)).toBe(true);
|
|
1121
|
+
}
|
|
1122
|
+
});
|
|
1123
|
+
});
|
|
1124
|
+
|
|
1125
|
+
// Context-derived deactivation regression tests
|
|
1126
|
+
// ---------------------------------------------------------------------------
|
|
1127
|
+
|
|
1128
|
+
describe('context-derived deactivation regression', () => {
|
|
1129
|
+
const baseToolDefs: ToolDefinition[] = [
|
|
1130
|
+
{ name: 'file_read', description: 'Read a file', input_schema: { type: 'object', properties: {} } },
|
|
1131
|
+
{ name: 'bash', description: 'Run a shell command', input_schema: { type: 'object', properties: {} } },
|
|
1132
|
+
];
|
|
1133
|
+
|
|
1134
|
+
const CORE_TOOL_NAMES = new Set(['bash', 'file_read', 'file_write', 'file_edit']);
|
|
1135
|
+
let sessionState: Map<string, string>;
|
|
1136
|
+
|
|
1137
|
+
function makeResolveTools(base: ToolDefinition[]) {
|
|
1138
|
+
return (history: Message[]) => {
|
|
1139
|
+
const projection = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
1140
|
+
return {
|
|
1141
|
+
toolDefinitions: [...base, ...projection.toolDefinitions],
|
|
1142
|
+
allowedToolNames: new Set([...CORE_TOOL_NAMES, ...projection.allowedToolNames]),
|
|
1143
|
+
};
|
|
1144
|
+
};
|
|
1145
|
+
}
|
|
1146
|
+
|
|
1147
|
+
beforeEach(() => {
|
|
1148
|
+
mockCatalog = [];
|
|
1149
|
+
mockManifests = {};
|
|
1150
|
+
mockRegisteredTools = new Map();
|
|
1151
|
+
mockUnregisteredSkillIds = [];
|
|
1152
|
+
mockSkillRefCount = new Map();
|
|
1153
|
+
mockSkillRefCount = new Map();
|
|
1154
|
+
mockVersionHashes = {};
|
|
1155
|
+
mockVersionHashErrors = new Set();
|
|
1156
|
+
sessionState = new Map<string, string>();
|
|
1157
|
+
});
|
|
1158
|
+
|
|
1159
|
+
test('tool definitions shrink when skill load marker is removed from history', () => {
|
|
1160
|
+
mockCatalog = [makeSkill('deploy'), makeSkill('oncall')];
|
|
1161
|
+
mockManifests = {
|
|
1162
|
+
deploy: makeManifest(['deploy_run']),
|
|
1163
|
+
oncall: makeManifest(['oncall_page', 'oncall_ack']),
|
|
1164
|
+
};
|
|
1165
|
+
|
|
1166
|
+
const resolveTools = makeResolveTools(baseToolDefs);
|
|
1167
|
+
|
|
1168
|
+
// Turn 1: both skills active
|
|
1169
|
+
const history1: Message[] = [
|
|
1170
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
1171
|
+
...skillLoadMessages('<loaded_skill id="oncall" />'),
|
|
1172
|
+
];
|
|
1173
|
+
const result1 = resolveTools(history1);
|
|
1174
|
+
expect(result1.toolDefinitions).toHaveLength(5); // 2 base + 3 skill tools
|
|
1175
|
+
expect(result1.toolDefinitions.map((d) => d.name)).toContain('oncall_page');
|
|
1176
|
+
expect(result1.toolDefinitions.map((d) => d.name)).toContain('oncall_ack');
|
|
1177
|
+
|
|
1178
|
+
// Turn 2: oncall marker removed from history (truncated)
|
|
1179
|
+
const history2: Message[] = [
|
|
1180
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
1181
|
+
];
|
|
1182
|
+
const result2 = resolveTools(history2);
|
|
1183
|
+
|
|
1184
|
+
// Tool definitions should only have base + deploy tools
|
|
1185
|
+
expect(result2.toolDefinitions).toHaveLength(3); // 2 base + 1 skill tool
|
|
1186
|
+
expect(result2.toolDefinitions.map((d) => d.name)).not.toContain('oncall_page');
|
|
1187
|
+
expect(result2.toolDefinitions.map((d) => d.name)).not.toContain('oncall_ack');
|
|
1188
|
+
expect(result2.toolDefinitions.map((d) => d.name)).toContain('deploy_run');
|
|
1189
|
+
});
|
|
1190
|
+
|
|
1191
|
+
test('executor blocks the tool after deactivation — allowedToolNames excludes it', () => {
|
|
1192
|
+
mockCatalog = [makeSkill('deploy'), makeSkill('oncall')];
|
|
1193
|
+
mockManifests = {
|
|
1194
|
+
deploy: makeManifest(['deploy_run']),
|
|
1195
|
+
oncall: makeManifest(['oncall_page']),
|
|
1196
|
+
};
|
|
1197
|
+
|
|
1198
|
+
const resolveTools = makeResolveTools(baseToolDefs);
|
|
1199
|
+
|
|
1200
|
+
// Turn 1: both skills active, both tools allowed
|
|
1201
|
+
const history1: Message[] = [
|
|
1202
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
1203
|
+
...skillLoadMessages('<loaded_skill id="oncall" />'),
|
|
1204
|
+
];
|
|
1205
|
+
const result1 = resolveTools(history1);
|
|
1206
|
+
expect(result1.allowedToolNames.has('oncall_page')).toBe(true);
|
|
1207
|
+
expect(result1.allowedToolNames.has('deploy_run')).toBe(true);
|
|
1208
|
+
|
|
1209
|
+
// Turn 2: oncall marker gone — its tool should be blocked
|
|
1210
|
+
const history2: Message[] = [
|
|
1211
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
1212
|
+
];
|
|
1213
|
+
const result2 = resolveTools(history2);
|
|
1214
|
+
|
|
1215
|
+
// oncall_page is no longer in allowedToolNames — executor would block it
|
|
1216
|
+
expect(result2.allowedToolNames.has('oncall_page')).toBe(false);
|
|
1217
|
+
// deploy_run remains allowed
|
|
1218
|
+
expect(result2.allowedToolNames.has('deploy_run')).toBe(true);
|
|
1219
|
+
// Core tools remain allowed
|
|
1220
|
+
for (const core of CORE_TOOL_NAMES) {
|
|
1221
|
+
expect(result2.allowedToolNames.has(core)).toBe(true);
|
|
1222
|
+
}
|
|
1223
|
+
});
|
|
1224
|
+
|
|
1225
|
+
test('unregisterSkillTools is called for deactivated skill', () => {
|
|
1226
|
+
mockCatalog = [makeSkill('deploy'), makeSkill('oncall')];
|
|
1227
|
+
mockManifests = {
|
|
1228
|
+
deploy: makeManifest(['deploy_run']),
|
|
1229
|
+
oncall: makeManifest(['oncall_page']),
|
|
1230
|
+
};
|
|
1231
|
+
|
|
1232
|
+
// Turn 1: both active
|
|
1233
|
+
const history1: Message[] = [
|
|
1234
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
1235
|
+
...skillLoadMessages('<loaded_skill id="oncall" />'),
|
|
1236
|
+
];
|
|
1237
|
+
projectSkillTools(history1, { previouslyActiveSkillIds: sessionState });
|
|
1238
|
+
|
|
1239
|
+
// Clear tracking before turn 2
|
|
1240
|
+
mockUnregisteredSkillIds = [];
|
|
1241
|
+
|
|
1242
|
+
// Turn 2: deploy marker gone
|
|
1243
|
+
const history2: Message[] = [
|
|
1244
|
+
...skillLoadMessages('<loaded_skill id="oncall" />'),
|
|
1245
|
+
];
|
|
1246
|
+
projectSkillTools(history2, { previouslyActiveSkillIds: sessionState });
|
|
1247
|
+
|
|
1248
|
+
expect(mockUnregisteredSkillIds).toContain('deploy');
|
|
1249
|
+
expect(mockUnregisteredSkillIds).not.toContain('oncall');
|
|
1250
|
+
});
|
|
1251
|
+
|
|
1252
|
+
test('all skills deactivate when all markers leave history', () => {
|
|
1253
|
+
mockCatalog = [makeSkill('deploy'), makeSkill('oncall')];
|
|
1254
|
+
mockManifests = {
|
|
1255
|
+
deploy: makeManifest(['deploy_run']),
|
|
1256
|
+
oncall: makeManifest(['oncall_page']),
|
|
1257
|
+
};
|
|
1258
|
+
|
|
1259
|
+
const resolveTools = makeResolveTools(baseToolDefs);
|
|
1260
|
+
|
|
1261
|
+
// Turn 1: both skills active
|
|
1262
|
+
const history1: Message[] = [
|
|
1263
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
1264
|
+
...skillLoadMessages('<loaded_skill id="oncall" />'),
|
|
1265
|
+
];
|
|
1266
|
+
const result1 = resolveTools(history1);
|
|
1267
|
+
expect(result1.toolDefinitions).toHaveLength(4); // 2 base + 2 skill
|
|
1268
|
+
|
|
1269
|
+
// Clear tracking before turn 2
|
|
1270
|
+
mockUnregisteredSkillIds = [];
|
|
1271
|
+
|
|
1272
|
+
// Turn 2: all markers gone (e.g. context window fully truncated)
|
|
1273
|
+
const history2: Message[] = [
|
|
1274
|
+
{ role: 'user', content: [{ type: 'text', text: 'Continue working' }] },
|
|
1275
|
+
];
|
|
1276
|
+
const result2 = resolveTools(history2);
|
|
1277
|
+
|
|
1278
|
+
// Only base tools remain
|
|
1279
|
+
expect(result2.toolDefinitions).toHaveLength(2);
|
|
1280
|
+
expect(result2.toolDefinitions.map((d) => d.name)).toEqual(['file_read', 'bash']);
|
|
1281
|
+
|
|
1282
|
+
// Both skills were unregistered
|
|
1283
|
+
expect(mockUnregisteredSkillIds).toContain('deploy');
|
|
1284
|
+
expect(mockUnregisteredSkillIds).toContain('oncall');
|
|
1285
|
+
|
|
1286
|
+
// No skill tools in allowed set
|
|
1287
|
+
expect(result2.allowedToolNames.has('deploy_run')).toBe(false);
|
|
1288
|
+
expect(result2.allowedToolNames.has('oncall_page')).toBe(false);
|
|
1289
|
+
|
|
1290
|
+
// Core tools still present
|
|
1291
|
+
for (const core of CORE_TOOL_NAMES) {
|
|
1292
|
+
expect(result2.allowedToolNames.has(core)).toBe(true);
|
|
1293
|
+
}
|
|
1294
|
+
});
|
|
1295
|
+
|
|
1296
|
+
test('skill can reactivate after deactivation', () => {
|
|
1297
|
+
mockCatalog = [makeSkill('deploy')];
|
|
1298
|
+
mockManifests = {
|
|
1299
|
+
deploy: makeManifest(['deploy_run']),
|
|
1300
|
+
};
|
|
1301
|
+
|
|
1302
|
+
const resolveTools = makeResolveTools(baseToolDefs);
|
|
1303
|
+
|
|
1304
|
+
// Turn 1: deploy active
|
|
1305
|
+
const history1: Message[] = [
|
|
1306
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
1307
|
+
];
|
|
1308
|
+
const result1 = resolveTools(history1);
|
|
1309
|
+
expect(result1.allowedToolNames.has('deploy_run')).toBe(true);
|
|
1310
|
+
|
|
1311
|
+
// Turn 2: marker gone — deactivated
|
|
1312
|
+
const history2: Message[] = [];
|
|
1313
|
+
const result2 = resolveTools(history2);
|
|
1314
|
+
expect(result2.allowedToolNames.has('deploy_run')).toBe(false);
|
|
1315
|
+
|
|
1316
|
+
// Turn 3: marker reappears — reactivated
|
|
1317
|
+
const history3: Message[] = [
|
|
1318
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
1319
|
+
];
|
|
1320
|
+
const result3 = resolveTools(history3);
|
|
1321
|
+
expect(result3.allowedToolNames.has('deploy_run')).toBe(true);
|
|
1322
|
+
expect(result3.toolDefinitions.map((d) => d.name)).toContain('deploy_run');
|
|
1323
|
+
});
|
|
1324
|
+
});
|
|
1325
|
+
|
|
1326
|
+
// ---------------------------------------------------------------------------
|
|
1327
|
+
// Slash preactivation tests
|
|
1328
|
+
// ---------------------------------------------------------------------------
|
|
1329
|
+
|
|
1330
|
+
describe('slash preactivation through session processing', () => {
|
|
1331
|
+
let sessionState: Map<string, string>;
|
|
1332
|
+
|
|
1333
|
+
beforeEach(() => {
|
|
1334
|
+
mockCatalog = [];
|
|
1335
|
+
mockManifests = {};
|
|
1336
|
+
mockRegisteredTools = new Map();
|
|
1337
|
+
mockUnregisteredSkillIds = [];
|
|
1338
|
+
mockSkillRefCount = new Map();
|
|
1339
|
+
mockSkillRefCount = new Map();
|
|
1340
|
+
mockVersionHashes = {};
|
|
1341
|
+
mockVersionHashErrors = new Set();
|
|
1342
|
+
sessionState = new Map<string, string>();
|
|
1343
|
+
});
|
|
1344
|
+
|
|
1345
|
+
test('slash-known skill has its tools available on first projection (turn-0)', () => {
|
|
1346
|
+
mockCatalog = [makeSkill('deploy')];
|
|
1347
|
+
mockManifests = { deploy: makeManifest(['deploy_run', 'deploy_status']) };
|
|
1348
|
+
|
|
1349
|
+
// Empty history — no loaded_skill markers yet. The skill is preactivated
|
|
1350
|
+
// via slash resolution, so its tools should be available immediately.
|
|
1351
|
+
const emptyHistory: Message[] = [];
|
|
1352
|
+
|
|
1353
|
+
const result = projectSkillTools(emptyHistory, {
|
|
1354
|
+
preactivatedSkillIds: ['deploy'],
|
|
1355
|
+
previouslyActiveSkillIds: sessionState,
|
|
1356
|
+
});
|
|
1357
|
+
|
|
1358
|
+
expect(result.toolDefinitions).toHaveLength(2);
|
|
1359
|
+
expect(result.toolDefinitions.map((d) => d.name)).toEqual([
|
|
1360
|
+
'deploy_run',
|
|
1361
|
+
'deploy_status',
|
|
1362
|
+
]);
|
|
1363
|
+
expect(result.allowedToolNames).toEqual(
|
|
1364
|
+
new Set(['deploy_run', 'deploy_status']),
|
|
1365
|
+
);
|
|
1366
|
+
});
|
|
1367
|
+
|
|
1368
|
+
test('preactivation is request-scoped — does not persist to unrelated runs', () => {
|
|
1369
|
+
mockCatalog = [makeSkill('deploy')];
|
|
1370
|
+
mockManifests = { deploy: makeManifest(['deploy_run']) };
|
|
1371
|
+
|
|
1372
|
+
// First request: preactivated via slash command
|
|
1373
|
+
const result1 = projectSkillTools([], {
|
|
1374
|
+
preactivatedSkillIds: ['deploy'],
|
|
1375
|
+
previouslyActiveSkillIds: sessionState,
|
|
1376
|
+
});
|
|
1377
|
+
expect(result1.toolDefinitions).toHaveLength(1);
|
|
1378
|
+
expect(result1.allowedToolNames.has('deploy_run')).toBe(true);
|
|
1379
|
+
|
|
1380
|
+
// Second request: no preactivation, no history markers.
|
|
1381
|
+
// Without preactivated IDs, the skill should not appear.
|
|
1382
|
+
const result2 = projectSkillTools([], { previouslyActiveSkillIds: sessionState });
|
|
1383
|
+
|
|
1384
|
+
expect(result2.toolDefinitions).toHaveLength(0);
|
|
1385
|
+
expect(result2.allowedToolNames.has('deploy_run')).toBe(false);
|
|
1386
|
+
});
|
|
1387
|
+
|
|
1388
|
+
test('preactivated skill tools merge with history-derived skills on turn-0', () => {
|
|
1389
|
+
mockCatalog = [makeSkill('deploy'), makeSkill('oncall')];
|
|
1390
|
+
mockManifests = {
|
|
1391
|
+
deploy: makeManifest(['deploy_run']),
|
|
1392
|
+
oncall: makeManifest(['oncall_page']),
|
|
1393
|
+
};
|
|
1394
|
+
|
|
1395
|
+
// History has an oncall marker from a previous exchange
|
|
1396
|
+
const history: Message[] = [
|
|
1397
|
+
...skillLoadMessages('<loaded_skill id="oncall" />'),
|
|
1398
|
+
];
|
|
1399
|
+
|
|
1400
|
+
// deploy is preactivated via slash, oncall is from history
|
|
1401
|
+
const result = projectSkillTools(history, {
|
|
1402
|
+
preactivatedSkillIds: ['deploy'],
|
|
1403
|
+
previouslyActiveSkillIds: sessionState,
|
|
1404
|
+
});
|
|
1405
|
+
|
|
1406
|
+
expect(result.toolDefinitions).toHaveLength(2);
|
|
1407
|
+
expect(result.allowedToolNames).toEqual(
|
|
1408
|
+
new Set(['deploy_run', 'oncall_page']),
|
|
1409
|
+
);
|
|
1410
|
+
});
|
|
1411
|
+
});
|
|
1412
|
+
|
|
1413
|
+
// ---------------------------------------------------------------------------
|
|
1414
|
+
// Bundled skill pipeline integration tests
|
|
1415
|
+
// ---------------------------------------------------------------------------
|
|
1416
|
+
|
|
1417
|
+
const GMAIL_TOOL_NAMES = [
|
|
1418
|
+
'gmail_search',
|
|
1419
|
+
'gmail_list_messages',
|
|
1420
|
+
'gmail_get_message',
|
|
1421
|
+
'gmail_mark_read',
|
|
1422
|
+
'gmail_draft',
|
|
1423
|
+
'gmail_archive',
|
|
1424
|
+
'gmail_batch_archive',
|
|
1425
|
+
'gmail_label',
|
|
1426
|
+
'gmail_batch_label',
|
|
1427
|
+
'gmail_trash',
|
|
1428
|
+
'gmail_send',
|
|
1429
|
+
'gmail_unsubscribe',
|
|
1430
|
+
] as const;
|
|
1431
|
+
|
|
1432
|
+
describe('bundled skill: gmail', () => {
|
|
1433
|
+
let sessionState: Map<string, string>;
|
|
1434
|
+
|
|
1435
|
+
beforeEach(() => {
|
|
1436
|
+
mockCatalog = [];
|
|
1437
|
+
mockManifests = {};
|
|
1438
|
+
mockRegisteredTools = new Map();
|
|
1439
|
+
mockUnregisteredSkillIds = [];
|
|
1440
|
+
mockSkillRefCount = new Map();
|
|
1441
|
+
mockSkillRefCount = new Map();
|
|
1442
|
+
mockVersionHashes = {};
|
|
1443
|
+
mockVersionHashErrors = new Set();
|
|
1444
|
+
sessionState = new Map<string, string>();
|
|
1445
|
+
});
|
|
1446
|
+
|
|
1447
|
+
test('gmail skill activation via loaded_skill marker projects all 12 tool definitions', () => {
|
|
1448
|
+
mockCatalog = [makeSkill('gmail', '/path/to/bundled-skills/gmail')];
|
|
1449
|
+
mockManifests = { gmail: makeManifest([...GMAIL_TOOL_NAMES]) };
|
|
1450
|
+
|
|
1451
|
+
const history: Message[] = [
|
|
1452
|
+
...skillLoadMessages('<loaded_skill id="gmail" />'),
|
|
1453
|
+
];
|
|
1454
|
+
|
|
1455
|
+
const result = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
1456
|
+
|
|
1457
|
+
expect(result.toolDefinitions).toHaveLength(12);
|
|
1458
|
+
expect(result.toolDefinitions.map((d) => d.name)).toEqual([...GMAIL_TOOL_NAMES]);
|
|
1459
|
+
expect(result.allowedToolNames).toEqual(new Set(GMAIL_TOOL_NAMES));
|
|
1460
|
+
});
|
|
1461
|
+
|
|
1462
|
+
test('gmail tools are NOT available when gmail skill is not in active context', () => {
|
|
1463
|
+
mockCatalog = [makeSkill('gmail', '/path/to/bundled-skills/gmail')];
|
|
1464
|
+
mockManifests = { gmail: makeManifest([...GMAIL_TOOL_NAMES]) };
|
|
1465
|
+
|
|
1466
|
+
// No loaded_skill marker for gmail in history
|
|
1467
|
+
const history: Message[] = [
|
|
1468
|
+
{ role: 'user', content: [{ type: 'text', text: 'Hello' }] },
|
|
1469
|
+
];
|
|
1470
|
+
|
|
1471
|
+
const result = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
1472
|
+
|
|
1473
|
+
expect(result.toolDefinitions).toHaveLength(0);
|
|
1474
|
+
expect(result.allowedToolNames.size).toBe(0);
|
|
1475
|
+
for (const name of GMAIL_TOOL_NAMES) {
|
|
1476
|
+
expect(result.allowedToolNames.has(name)).toBe(false);
|
|
1477
|
+
}
|
|
1478
|
+
});
|
|
1479
|
+
});
|
|
1480
|
+
|
|
1481
|
+
describe('bundled skill: claude-code', () => {
|
|
1482
|
+
let sessionState: Map<string, string>;
|
|
1483
|
+
|
|
1484
|
+
beforeEach(() => {
|
|
1485
|
+
mockCatalog = [];
|
|
1486
|
+
mockManifests = {};
|
|
1487
|
+
mockRegisteredTools = new Map();
|
|
1488
|
+
mockUnregisteredSkillIds = [];
|
|
1489
|
+
mockSkillRefCount = new Map();
|
|
1490
|
+
mockSkillRefCount = new Map();
|
|
1491
|
+
mockVersionHashes = {};
|
|
1492
|
+
mockVersionHashErrors = new Set();
|
|
1493
|
+
sessionState = new Map<string, string>();
|
|
1494
|
+
});
|
|
1495
|
+
|
|
1496
|
+
test('claude-code skill activation produces claude_code tool definition', () => {
|
|
1497
|
+
mockCatalog = [makeSkill('claude-code', '/path/to/bundled-skills/claude-code')];
|
|
1498
|
+
mockManifests = { 'claude-code': makeManifest(['claude_code']) };
|
|
1499
|
+
|
|
1500
|
+
const history: Message[] = [
|
|
1501
|
+
...skillLoadMessages('<loaded_skill id="claude-code" />'),
|
|
1502
|
+
];
|
|
1503
|
+
|
|
1504
|
+
const result = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
1505
|
+
|
|
1506
|
+
expect(result.toolDefinitions).toHaveLength(1);
|
|
1507
|
+
expect(result.toolDefinitions[0].name).toBe('claude_code');
|
|
1508
|
+
expect(result.allowedToolNames).toEqual(new Set(['claude_code']));
|
|
1509
|
+
});
|
|
1510
|
+
|
|
1511
|
+
test('claude_code tool is absent when claude-code skill is not active', () => {
|
|
1512
|
+
mockCatalog = [makeSkill('claude-code', '/path/to/bundled-skills/claude-code')];
|
|
1513
|
+
mockManifests = { 'claude-code': makeManifest(['claude_code']) };
|
|
1514
|
+
|
|
1515
|
+
const history: Message[] = [
|
|
1516
|
+
{ role: 'user', content: [{ type: 'text', text: 'Hello' }] },
|
|
1517
|
+
];
|
|
1518
|
+
|
|
1519
|
+
const result = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
1520
|
+
|
|
1521
|
+
expect(result.toolDefinitions).toHaveLength(0);
|
|
1522
|
+
expect(result.allowedToolNames.has('claude_code')).toBe(false);
|
|
1523
|
+
});
|
|
1524
|
+
});
|
|
1525
|
+
|
|
1526
|
+
describe('bundled skill: weather', () => {
|
|
1527
|
+
let sessionState: Map<string, string>;
|
|
1528
|
+
|
|
1529
|
+
beforeEach(() => {
|
|
1530
|
+
mockCatalog = [];
|
|
1531
|
+
mockManifests = {};
|
|
1532
|
+
mockRegisteredTools = new Map();
|
|
1533
|
+
mockUnregisteredSkillIds = [];
|
|
1534
|
+
mockSkillRefCount = new Map();
|
|
1535
|
+
mockSkillRefCount = new Map();
|
|
1536
|
+
mockVersionHashes = {};
|
|
1537
|
+
mockVersionHashErrors = new Set();
|
|
1538
|
+
sessionState = new Map<string, string>();
|
|
1539
|
+
});
|
|
1540
|
+
|
|
1541
|
+
test('weather skill activation produces get_weather tool definition', () => {
|
|
1542
|
+
mockCatalog = [makeSkill('weather', '/path/to/bundled-skills/weather')];
|
|
1543
|
+
mockManifests = { weather: makeManifest(['get_weather']) };
|
|
1544
|
+
|
|
1545
|
+
const history: Message[] = [
|
|
1546
|
+
...skillLoadMessages('<loaded_skill id="weather" />'),
|
|
1547
|
+
];
|
|
1548
|
+
|
|
1549
|
+
const result = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
1550
|
+
|
|
1551
|
+
expect(result.toolDefinitions).toHaveLength(1);
|
|
1552
|
+
expect(result.toolDefinitions[0].name).toBe('get_weather');
|
|
1553
|
+
expect(result.allowedToolNames).toEqual(new Set(['get_weather']));
|
|
1554
|
+
});
|
|
1555
|
+
|
|
1556
|
+
test('get_weather tool is absent when weather skill is not active', () => {
|
|
1557
|
+
mockCatalog = [makeSkill('weather', '/path/to/bundled-skills/weather')];
|
|
1558
|
+
mockManifests = { weather: makeManifest(['get_weather']) };
|
|
1559
|
+
|
|
1560
|
+
const history: Message[] = [
|
|
1561
|
+
{ role: 'user', content: [{ type: 'text', text: 'Hello' }] },
|
|
1562
|
+
];
|
|
1563
|
+
|
|
1564
|
+
const result = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
1565
|
+
|
|
1566
|
+
expect(result.toolDefinitions).toHaveLength(0);
|
|
1567
|
+
expect(result.allowedToolNames.has('get_weather')).toBe(false);
|
|
1568
|
+
});
|
|
1569
|
+
});
|
|
1570
|
+
|
|
1571
|
+
// ---------------------------------------------------------------------------
|
|
1572
|
+
// Bundled skill: app-builder
|
|
1573
|
+
// ---------------------------------------------------------------------------
|
|
1574
|
+
|
|
1575
|
+
const APP_BUILDER_TOOL_NAMES = [
|
|
1576
|
+
'app_create',
|
|
1577
|
+
'app_list',
|
|
1578
|
+
'app_query',
|
|
1579
|
+
'app_update',
|
|
1580
|
+
'app_delete',
|
|
1581
|
+
'app_file_list',
|
|
1582
|
+
'app_file_read',
|
|
1583
|
+
'app_file_edit',
|
|
1584
|
+
'app_file_write',
|
|
1585
|
+
] as const;
|
|
1586
|
+
|
|
1587
|
+
describe('bundled skill: app-builder', () => {
|
|
1588
|
+
let sessionState: Map<string, string>;
|
|
1589
|
+
|
|
1590
|
+
beforeEach(() => {
|
|
1591
|
+
mockCatalog = [];
|
|
1592
|
+
mockManifests = {};
|
|
1593
|
+
mockRegisteredTools = new Map();
|
|
1594
|
+
mockUnregisteredSkillIds = [];
|
|
1595
|
+
mockSkillRefCount = new Map();
|
|
1596
|
+
mockVersionHashes = {};
|
|
1597
|
+
mockVersionHashErrors = new Set();
|
|
1598
|
+
sessionState = new Map<string, string>();
|
|
1599
|
+
});
|
|
1600
|
+
|
|
1601
|
+
test('app-builder skill activation projects all 9 canonical non-proxy tool definitions', () => {
|
|
1602
|
+
mockCatalog = [makeSkill('app-builder', '/path/to/bundled-skills/app-builder')];
|
|
1603
|
+
mockManifests = { 'app-builder': makeManifest([...APP_BUILDER_TOOL_NAMES]) };
|
|
1604
|
+
|
|
1605
|
+
const history: Message[] = [
|
|
1606
|
+
...skillLoadMessages('<loaded_skill id="app-builder" />'),
|
|
1607
|
+
];
|
|
1608
|
+
|
|
1609
|
+
const result = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
1610
|
+
|
|
1611
|
+
expect(result.toolDefinitions).toHaveLength(9);
|
|
1612
|
+
expect(result.toolDefinitions.map((d) => d.name)).toEqual([...APP_BUILDER_TOOL_NAMES]);
|
|
1613
|
+
expect(result.allowedToolNames).toEqual(new Set(APP_BUILDER_TOOL_NAMES));
|
|
1614
|
+
});
|
|
1615
|
+
|
|
1616
|
+
test('app-builder tools are NOT available when skill is not in active context', () => {
|
|
1617
|
+
mockCatalog = [makeSkill('app-builder', '/path/to/bundled-skills/app-builder')];
|
|
1618
|
+
mockManifests = { 'app-builder': makeManifest([...APP_BUILDER_TOOL_NAMES]) };
|
|
1619
|
+
|
|
1620
|
+
const history: Message[] = [
|
|
1621
|
+
{ role: 'user', content: [{ type: 'text', text: 'Hello' }] },
|
|
1622
|
+
];
|
|
1623
|
+
|
|
1624
|
+
const result = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
1625
|
+
|
|
1626
|
+
expect(result.toolDefinitions).toHaveLength(0);
|
|
1627
|
+
expect(result.allowedToolNames.size).toBe(0);
|
|
1628
|
+
for (const name of APP_BUILDER_TOOL_NAMES) {
|
|
1629
|
+
expect(result.allowedToolNames.has(name)).toBe(false);
|
|
1630
|
+
}
|
|
1631
|
+
});
|
|
1632
|
+
|
|
1633
|
+
test('skill-projected app tools use host execution (script runners)', () => {
|
|
1634
|
+
mockCatalog = [makeSkill('app-builder', '/path/to/bundled-skills/app-builder')];
|
|
1635
|
+
mockManifests = { 'app-builder': makeManifest([...APP_BUILDER_TOOL_NAMES]) };
|
|
1636
|
+
|
|
1637
|
+
const history: Message[] = [
|
|
1638
|
+
...skillLoadMessages('<loaded_skill id="app-builder" />'),
|
|
1639
|
+
];
|
|
1640
|
+
|
|
1641
|
+
projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
1642
|
+
|
|
1643
|
+
const tools = mockRegisteredTools.get('app-builder');
|
|
1644
|
+
expect(tools).toBeDefined();
|
|
1645
|
+
expect(tools!.length).toBe(9);
|
|
1646
|
+
|
|
1647
|
+
// All tools should have skill origin metadata
|
|
1648
|
+
for (const tool of tools!) {
|
|
1649
|
+
expect(tool.origin).toBe('skill');
|
|
1650
|
+
expect(tool.ownerSkillId).toBe('app-builder');
|
|
1651
|
+
}
|
|
1652
|
+
});
|
|
1653
|
+
});
|
|
1654
|
+
|
|
1655
|
+
// ---------------------------------------------------------------------------
|
|
1656
|
+
// Bundled skill: browser
|
|
1657
|
+
// ---------------------------------------------------------------------------
|
|
1658
|
+
|
|
1659
|
+
describe('bundled skill: browser', () => {
|
|
1660
|
+
let sessionState: Map<string, string>;
|
|
1661
|
+
|
|
1662
|
+
beforeEach(() => {
|
|
1663
|
+
mockCatalog = [];
|
|
1664
|
+
mockManifests = {};
|
|
1665
|
+
mockRegisteredTools = new Map();
|
|
1666
|
+
mockUnregisteredSkillIds = [];
|
|
1667
|
+
mockSkillRefCount = new Map();
|
|
1668
|
+
mockVersionHashes = {};
|
|
1669
|
+
mockVersionHashErrors = new Set();
|
|
1670
|
+
sessionState = new Map<string, string>();
|
|
1671
|
+
});
|
|
1672
|
+
|
|
1673
|
+
test('browser skill activation via loaded_skill marker projects all 10 tool definitions', () => {
|
|
1674
|
+
mockCatalog = [makeSkill('browser', '/path/to/bundled-skills/browser')];
|
|
1675
|
+
mockManifests = { browser: makeManifest([...BROWSER_TOOL_NAMES]) };
|
|
1676
|
+
|
|
1677
|
+
const history: Message[] = [
|
|
1678
|
+
...buildSkillLoadHistory('browser', 'v1:testhash'),
|
|
1679
|
+
];
|
|
1680
|
+
|
|
1681
|
+
const result = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
1682
|
+
|
|
1683
|
+
expect(result.toolDefinitions).toHaveLength(10);
|
|
1684
|
+
expect(result.toolDefinitions.map((d) => d.name)).toEqual([...BROWSER_TOOL_NAMES]);
|
|
1685
|
+
expect(result.allowedToolNames).toEqual(new Set(BROWSER_TOOL_NAMES));
|
|
1686
|
+
});
|
|
1687
|
+
|
|
1688
|
+
test('browser tools are NOT available when browser skill is not in active context', () => {
|
|
1689
|
+
mockCatalog = [makeSkill('browser', '/path/to/bundled-skills/browser')];
|
|
1690
|
+
mockManifests = { browser: makeManifest([...BROWSER_TOOL_NAMES]) };
|
|
1691
|
+
|
|
1692
|
+
const history: Message[] = [
|
|
1693
|
+
{ role: 'user', content: [{ type: 'text', text: 'Hello' }] },
|
|
1694
|
+
];
|
|
1695
|
+
|
|
1696
|
+
const result = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
1697
|
+
|
|
1698
|
+
expect(result.toolDefinitions).toHaveLength(0);
|
|
1699
|
+
expect(result.allowedToolNames.size).toBe(0);
|
|
1700
|
+
for (const name of BROWSER_TOOL_NAMES) {
|
|
1701
|
+
expect(result.allowedToolNames.has(name)).toBe(false);
|
|
1702
|
+
}
|
|
1703
|
+
});
|
|
1704
|
+
|
|
1705
|
+
test('browser skill tools have skill origin metadata', () => {
|
|
1706
|
+
mockCatalog = [makeSkill('browser', '/path/to/bundled-skills/browser')];
|
|
1707
|
+
mockManifests = { browser: makeManifest([...BROWSER_TOOL_NAMES]) };
|
|
1708
|
+
|
|
1709
|
+
const history: Message[] = [
|
|
1710
|
+
...buildSkillLoadHistory('browser', 'v1:testhash'),
|
|
1711
|
+
];
|
|
1712
|
+
|
|
1713
|
+
projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
1714
|
+
|
|
1715
|
+
const tools = mockRegisteredTools.get('browser');
|
|
1716
|
+
expect(tools).toBeDefined();
|
|
1717
|
+
expect(tools!.length).toBe(10);
|
|
1718
|
+
|
|
1719
|
+
for (const tool of tools!) {
|
|
1720
|
+
expect(tool.origin).toBe('skill');
|
|
1721
|
+
expect(tool.ownerSkillId).toBe('browser');
|
|
1722
|
+
}
|
|
1723
|
+
});
|
|
1724
|
+
});
|
|
1725
|
+
|
|
1726
|
+
// ---------------------------------------------------------------------------
|
|
1727
|
+
// Tamper detection regression tests
|
|
1728
|
+
// ---------------------------------------------------------------------------
|
|
1729
|
+
|
|
1730
|
+
describe('tamper detection', () => {
|
|
1731
|
+
let sessionState: Map<string, string>;
|
|
1732
|
+
|
|
1733
|
+
beforeEach(() => {
|
|
1734
|
+
mockCatalog = [];
|
|
1735
|
+
mockManifests = {};
|
|
1736
|
+
mockRegisteredTools = new Map();
|
|
1737
|
+
mockUnregisteredSkillIds = [];
|
|
1738
|
+
mockSkillRefCount = new Map();
|
|
1739
|
+
mockVersionHashes = {};
|
|
1740
|
+
mockVersionHashErrors = new Set();
|
|
1741
|
+
sessionState = new Map<string, string>();
|
|
1742
|
+
});
|
|
1743
|
+
|
|
1744
|
+
test('file mutation after projection invalidates the stored hash, causing re-registration on next turn', () => {
|
|
1745
|
+
mockCatalog = [makeSkill('deploy')];
|
|
1746
|
+
mockManifests = { deploy: makeManifest(['deploy_run']) };
|
|
1747
|
+
mockVersionHashes = { deploy: 'v1:original-file-hash' };
|
|
1748
|
+
|
|
1749
|
+
const history: Message[] = [
|
|
1750
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
1751
|
+
];
|
|
1752
|
+
|
|
1753
|
+
// Turn 1: project with original hash
|
|
1754
|
+
const result1 = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
1755
|
+
expect(result1.toolDefinitions).toHaveLength(1);
|
|
1756
|
+
expect(result1.toolDefinitions[0].name).toBe('deploy_run');
|
|
1757
|
+
expect(sessionState.get('deploy')).toBe('v1:original-file-hash');
|
|
1758
|
+
|
|
1759
|
+
// Simulate file mutation on disk — the hash changes
|
|
1760
|
+
mockVersionHashes = { deploy: 'v1:tampered-file-hash' };
|
|
1761
|
+
|
|
1762
|
+
// Turn 2: re-project detects hash drift and re-registers
|
|
1763
|
+
mockUnregisteredSkillIds = [];
|
|
1764
|
+
const result2 = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
1765
|
+
|
|
1766
|
+
// Tools are still available (re-registered with new hash)
|
|
1767
|
+
expect(result2.toolDefinitions).toHaveLength(1);
|
|
1768
|
+
expect(result2.toolDefinitions[0].name).toBe('deploy_run');
|
|
1769
|
+
|
|
1770
|
+
// Old tools were unregistered before new ones registered
|
|
1771
|
+
expect(mockUnregisteredSkillIds).toContain('deploy');
|
|
1772
|
+
|
|
1773
|
+
// Session state now tracks the new hash
|
|
1774
|
+
expect(sessionState.get('deploy')).toBe('v1:tampered-file-hash');
|
|
1775
|
+
|
|
1776
|
+
// Refcount stays at 1 (unregister decremented, re-register incremented)
|
|
1777
|
+
expect(mockSkillRefCount.get('deploy')).toBe(1);
|
|
1778
|
+
});
|
|
1779
|
+
|
|
1780
|
+
test('unmodified skill file does NOT trigger re-registration across multiple turns', () => {
|
|
1781
|
+
mockCatalog = [makeSkill('deploy')];
|
|
1782
|
+
mockManifests = { deploy: makeManifest(['deploy_run']) };
|
|
1783
|
+
mockVersionHashes = { deploy: 'v1:stable-content-hash' };
|
|
1784
|
+
|
|
1785
|
+
const history: Message[] = [
|
|
1786
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
1787
|
+
];
|
|
1788
|
+
|
|
1789
|
+
// Turn 1: initial projection
|
|
1790
|
+
projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
1791
|
+
expect(mockSkillRefCount.get('deploy')).toBe(1);
|
|
1792
|
+
|
|
1793
|
+
// Turns 2-4: hash stays the same, no re-registration should occur
|
|
1794
|
+
for (let turn = 2; turn <= 4; turn++) {
|
|
1795
|
+
mockUnregisteredSkillIds = [];
|
|
1796
|
+
const result = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
1797
|
+
expect(result.toolDefinitions).toHaveLength(1);
|
|
1798
|
+
expect(mockUnregisteredSkillIds).not.toContain('deploy');
|
|
1799
|
+
expect(mockSkillRefCount.get('deploy')).toBe(1);
|
|
1800
|
+
}
|
|
1801
|
+
});
|
|
1802
|
+
|
|
1803
|
+
test('re-projection after tamper produces tools with the updated hash', () => {
|
|
1804
|
+
mockCatalog = [makeSkill('deploy')];
|
|
1805
|
+
mockManifests = { deploy: makeManifest(['deploy_run', 'deploy_status']) };
|
|
1806
|
+
mockVersionHashes = { deploy: 'v1:hash-before-edit' };
|
|
1807
|
+
|
|
1808
|
+
const history: Message[] = [
|
|
1809
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
1810
|
+
];
|
|
1811
|
+
|
|
1812
|
+
// Turn 1: initial projection
|
|
1813
|
+
projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
1814
|
+
expect(sessionState.get('deploy')).toBe('v1:hash-before-edit');
|
|
1815
|
+
|
|
1816
|
+
// Simulate tamper: file changes on disk
|
|
1817
|
+
mockVersionHashes = { deploy: 'v1:hash-after-edit' };
|
|
1818
|
+
|
|
1819
|
+
// Turn 2: re-projection picks up the new hash
|
|
1820
|
+
const result = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
1821
|
+
|
|
1822
|
+
expect(result.toolDefinitions).toHaveLength(2);
|
|
1823
|
+
expect(result.allowedToolNames).toEqual(new Set(['deploy_run', 'deploy_status']));
|
|
1824
|
+
expect(sessionState.get('deploy')).toBe('v1:hash-after-edit');
|
|
1825
|
+
});
|
|
1826
|
+
|
|
1827
|
+
test('multiple skills with only one tampered triggers selective re-registration', () => {
|
|
1828
|
+
mockCatalog = [makeSkill('deploy'), makeSkill('oncall')];
|
|
1829
|
+
mockManifests = {
|
|
1830
|
+
deploy: makeManifest(['deploy_run']),
|
|
1831
|
+
oncall: makeManifest(['oncall_page']),
|
|
1832
|
+
};
|
|
1833
|
+
mockVersionHashes = {
|
|
1834
|
+
deploy: 'v1:deploy-hash-v1',
|
|
1835
|
+
oncall: 'v1:oncall-hash-v1',
|
|
1836
|
+
};
|
|
1837
|
+
|
|
1838
|
+
const history: Message[] = [
|
|
1839
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
1840
|
+
...skillLoadMessages('<loaded_skill id="oncall" />'),
|
|
1841
|
+
];
|
|
1842
|
+
|
|
1843
|
+
// Turn 1: both skills registered
|
|
1844
|
+
projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
1845
|
+
expect(sessionState.get('deploy')).toBe('v1:deploy-hash-v1');
|
|
1846
|
+
expect(sessionState.get('oncall')).toBe('v1:oncall-hash-v1');
|
|
1847
|
+
|
|
1848
|
+
// Tamper only deploy
|
|
1849
|
+
mockVersionHashes = {
|
|
1850
|
+
deploy: 'v1:deploy-hash-v2-tampered',
|
|
1851
|
+
oncall: 'v1:oncall-hash-v1', // unchanged
|
|
1852
|
+
};
|
|
1853
|
+
mockUnregisteredSkillIds = [];
|
|
1854
|
+
|
|
1855
|
+
// Turn 2: only deploy should be re-registered
|
|
1856
|
+
const result = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
1857
|
+
|
|
1858
|
+
expect(result.toolDefinitions).toHaveLength(2);
|
|
1859
|
+
expect(result.allowedToolNames).toEqual(new Set(['deploy_run', 'oncall_page']));
|
|
1860
|
+
|
|
1861
|
+
// Only deploy was unregistered (for re-registration), oncall was untouched
|
|
1862
|
+
expect(mockUnregisteredSkillIds).toContain('deploy');
|
|
1863
|
+
expect(mockUnregisteredSkillIds).not.toContain('oncall');
|
|
1864
|
+
|
|
1865
|
+
// Hashes updated accordingly
|
|
1866
|
+
expect(sessionState.get('deploy')).toBe('v1:deploy-hash-v2-tampered');
|
|
1867
|
+
expect(sessionState.get('oncall')).toBe('v1:oncall-hash-v1');
|
|
1868
|
+
});
|
|
1869
|
+
|
|
1870
|
+
test('hash failure (e.g., unreadable directory) causes fallback re-registration', () => {
|
|
1871
|
+
mockCatalog = [makeSkill('deploy')];
|
|
1872
|
+
mockManifests = { deploy: makeManifest(['deploy_run']) };
|
|
1873
|
+
mockVersionHashes = { deploy: 'v1:initial-hash' };
|
|
1874
|
+
|
|
1875
|
+
const history: Message[] = [
|
|
1876
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
1877
|
+
];
|
|
1878
|
+
|
|
1879
|
+
projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
1880
|
+
expect(sessionState.get('deploy')).toBe('v1:initial-hash');
|
|
1881
|
+
|
|
1882
|
+
// Make computeSkillVersionHash throw to exercise the catch branch
|
|
1883
|
+
// in session-skill-tools.ts that falls back to `unknown-${Date.now()}`
|
|
1884
|
+
mockVersionHashErrors.add('deploy');
|
|
1885
|
+
mockUnregisteredSkillIds = [];
|
|
1886
|
+
|
|
1887
|
+
const result = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
1888
|
+
expect(result.toolDefinitions).toHaveLength(1);
|
|
1889
|
+
|
|
1890
|
+
// The exception triggers re-registration since the fallback hash
|
|
1891
|
+
// (`unknown-<timestamp>`) will never match the stored hash
|
|
1892
|
+
expect(mockUnregisteredSkillIds).toContain('deploy');
|
|
1893
|
+
expect(sessionState.get('deploy')).toMatch(/^unknown-\d+$/);
|
|
1894
|
+
});
|
|
1895
|
+
});
|
|
1896
|
+
|
|
1897
|
+
// ---------------------------------------------------------------------------
|
|
1898
|
+
// resetSkillToolProjection tests
|
|
1899
|
+
// ---------------------------------------------------------------------------
|
|
1900
|
+
|
|
1901
|
+
describe('resetSkillToolProjection', () => {
|
|
1902
|
+
beforeEach(() => {
|
|
1903
|
+
mockCatalog = [];
|
|
1904
|
+
mockManifests = {};
|
|
1905
|
+
mockRegisteredTools = new Map();
|
|
1906
|
+
mockUnregisteredSkillIds = [];
|
|
1907
|
+
mockSkillRefCount = new Map();
|
|
1908
|
+
mockVersionHashes = {};
|
|
1909
|
+
mockVersionHashErrors = new Set();
|
|
1910
|
+
});
|
|
1911
|
+
|
|
1912
|
+
test('unregisters all tracked skills and clears the map', () => {
|
|
1913
|
+
mockCatalog = [makeSkill('deploy'), makeSkill('oncall')];
|
|
1914
|
+
mockManifests = {
|
|
1915
|
+
deploy: makeManifest(['deploy_run']),
|
|
1916
|
+
oncall: makeManifest(['oncall_page']),
|
|
1917
|
+
};
|
|
1918
|
+
|
|
1919
|
+
const trackedIds = new Map<string, string>();
|
|
1920
|
+
|
|
1921
|
+
// Activate both skills
|
|
1922
|
+
const history: Message[] = [
|
|
1923
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
1924
|
+
...skillLoadMessages('<loaded_skill id="oncall" />'),
|
|
1925
|
+
];
|
|
1926
|
+
projectSkillTools(history, { previouslyActiveSkillIds: trackedIds });
|
|
1927
|
+
expect(trackedIds.size).toBe(2);
|
|
1928
|
+
|
|
1929
|
+
mockUnregisteredSkillIds = [];
|
|
1930
|
+
resetSkillToolProjection(trackedIds);
|
|
1931
|
+
|
|
1932
|
+
expect(mockUnregisteredSkillIds).toContain('deploy');
|
|
1933
|
+
expect(mockUnregisteredSkillIds).toContain('oncall');
|
|
1934
|
+
expect(trackedIds.size).toBe(0);
|
|
1935
|
+
});
|
|
1936
|
+
|
|
1937
|
+
test('no-op when called with undefined', () => {
|
|
1938
|
+
mockUnregisteredSkillIds = [];
|
|
1939
|
+
resetSkillToolProjection(undefined);
|
|
1940
|
+
expect(mockUnregisteredSkillIds).toHaveLength(0);
|
|
1941
|
+
});
|
|
1942
|
+
|
|
1943
|
+
test('no-op when called with empty map', () => {
|
|
1944
|
+
mockUnregisteredSkillIds = [];
|
|
1945
|
+
resetSkillToolProjection(new Map());
|
|
1946
|
+
expect(mockUnregisteredSkillIds).toHaveLength(0);
|
|
1947
|
+
});
|
|
1948
|
+
});
|
|
1949
|
+
|
|
1950
|
+
// ---------------------------------------------------------------------------
|
|
1951
|
+
// Versioned marker integration tests
|
|
1952
|
+
// ---------------------------------------------------------------------------
|
|
1953
|
+
|
|
1954
|
+
describe('versioned markers through session projection', () => {
|
|
1955
|
+
let sessionState: Map<string, string>;
|
|
1956
|
+
|
|
1957
|
+
beforeEach(() => {
|
|
1958
|
+
mockCatalog = [];
|
|
1959
|
+
mockManifests = {};
|
|
1960
|
+
mockRegisteredTools = new Map();
|
|
1961
|
+
mockUnregisteredSkillIds = [];
|
|
1962
|
+
mockSkillRefCount = new Map();
|
|
1963
|
+
mockVersionHashes = {};
|
|
1964
|
+
mockVersionHashErrors = new Set();
|
|
1965
|
+
sessionState = new Map<string, string>();
|
|
1966
|
+
});
|
|
1967
|
+
|
|
1968
|
+
test('versioned marker activates skill tools the same as legacy marker', () => {
|
|
1969
|
+
mockCatalog = [makeSkill('deploy')];
|
|
1970
|
+
mockManifests = { deploy: makeManifest(['deploy_run']) };
|
|
1971
|
+
|
|
1972
|
+
const history: Message[] = [
|
|
1973
|
+
...skillLoadMessages('<loaded_skill id="deploy" version="v1:abc123" />'),
|
|
1974
|
+
];
|
|
1975
|
+
|
|
1976
|
+
const result = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
1977
|
+
|
|
1978
|
+
expect(result.toolDefinitions).toHaveLength(1);
|
|
1979
|
+
expect(result.toolDefinitions[0].name).toBe('deploy_run');
|
|
1980
|
+
expect(result.allowedToolNames).toEqual(new Set(['deploy_run']));
|
|
1981
|
+
});
|
|
1982
|
+
|
|
1983
|
+
test('mixed legacy and versioned markers both project tools', () => {
|
|
1984
|
+
mockCatalog = [makeSkill('deploy'), makeSkill('oncall')];
|
|
1985
|
+
mockManifests = {
|
|
1986
|
+
deploy: makeManifest(['deploy_run']),
|
|
1987
|
+
oncall: makeManifest(['oncall_page']),
|
|
1988
|
+
};
|
|
1989
|
+
|
|
1990
|
+
const history: Message[] = [
|
|
1991
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
1992
|
+
...skillLoadMessages('<loaded_skill id="oncall" version="v1:deadbeef" />'),
|
|
1993
|
+
];
|
|
1994
|
+
|
|
1995
|
+
const result = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
1996
|
+
|
|
1997
|
+
expect(result.toolDefinitions).toHaveLength(2);
|
|
1998
|
+
expect(result.allowedToolNames).toEqual(new Set(['deploy_run', 'oncall_page']));
|
|
1999
|
+
});
|
|
2000
|
+
|
|
2001
|
+
test('versioned marker skill deactivates when removed from history', () => {
|
|
2002
|
+
mockCatalog = [makeSkill('deploy')];
|
|
2003
|
+
mockManifests = { deploy: makeManifest(['deploy_run']) };
|
|
2004
|
+
|
|
2005
|
+
// Turn 1: versioned skill active
|
|
2006
|
+
const history1: Message[] = [
|
|
2007
|
+
...skillLoadMessages('<loaded_skill id="deploy" version="v1:abc123" />'),
|
|
2008
|
+
];
|
|
2009
|
+
projectSkillTools(history1, { previouslyActiveSkillIds: sessionState });
|
|
2010
|
+
expect(sessionState.has('deploy')).toBe(true);
|
|
2011
|
+
|
|
2012
|
+
// Turn 2: marker removed
|
|
2013
|
+
mockUnregisteredSkillIds = [];
|
|
2014
|
+
const result2 = projectSkillTools([], { previouslyActiveSkillIds: sessionState });
|
|
2015
|
+
expect(result2.toolDefinitions).toEqual([]);
|
|
2016
|
+
expect(mockUnregisteredSkillIds).toContain('deploy');
|
|
2017
|
+
});
|
|
2018
|
+
});
|
|
2019
|
+
|
|
2020
|
+
// ---------------------------------------------------------------------------
|
|
2021
|
+
// Hash change re-prompt regression tests (PR 35)
|
|
2022
|
+
// Verify that version hash changes trigger re-registration and that the
|
|
2023
|
+
// session state accurately tracks the new hash, which downstream components
|
|
2024
|
+
// use to decide whether cached approvals still apply.
|
|
2025
|
+
// ---------------------------------------------------------------------------
|
|
2026
|
+
|
|
2027
|
+
describe('hash change re-prompt regressions (PR 35)', () => {
|
|
2028
|
+
let sessionState: Map<string, string>;
|
|
2029
|
+
|
|
2030
|
+
beforeEach(() => {
|
|
2031
|
+
mockCatalog = [];
|
|
2032
|
+
mockManifests = {};
|
|
2033
|
+
mockRegisteredTools = new Map();
|
|
2034
|
+
mockUnregisteredSkillIds = [];
|
|
2035
|
+
mockSkillRefCount = new Map();
|
|
2036
|
+
mockVersionHashes = {};
|
|
2037
|
+
mockVersionHashErrors = new Set();
|
|
2038
|
+
sessionState = new Map<string, string>();
|
|
2039
|
+
});
|
|
2040
|
+
|
|
2041
|
+
test('approve v1, edit skill (hash changes), v2 triggers re-registration with new hash', () => {
|
|
2042
|
+
mockCatalog = [makeSkill('deploy')];
|
|
2043
|
+
mockManifests = { deploy: makeManifest(['deploy_run']) };
|
|
2044
|
+
mockVersionHashes = { deploy: 'v1:approved-hash' };
|
|
2045
|
+
|
|
2046
|
+
const history: Message[] = [
|
|
2047
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
2048
|
+
];
|
|
2049
|
+
|
|
2050
|
+
// Turn 1: skill approved and registered with v1 hash
|
|
2051
|
+
const result1 = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
2052
|
+
expect(result1.toolDefinitions).toHaveLength(1);
|
|
2053
|
+
expect(sessionState.get('deploy')).toBe('v1:approved-hash');
|
|
2054
|
+
expect(mockSkillRefCount.get('deploy')).toBe(1);
|
|
2055
|
+
|
|
2056
|
+
// Simulate skill edit — hash changes on disk
|
|
2057
|
+
mockVersionHashes = { deploy: 'v2:edited-hash' };
|
|
2058
|
+
mockUnregisteredSkillIds = [];
|
|
2059
|
+
|
|
2060
|
+
// Turn 2: projection detects hash drift, unregisters old, re-registers new
|
|
2061
|
+
const result2 = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
2062
|
+
|
|
2063
|
+
expect(result2.toolDefinitions).toHaveLength(1);
|
|
2064
|
+
expect(result2.toolDefinitions[0].name).toBe('deploy_run');
|
|
2065
|
+
|
|
2066
|
+
// Old version was unregistered
|
|
2067
|
+
expect(mockUnregisteredSkillIds).toContain('deploy');
|
|
2068
|
+
|
|
2069
|
+
// Session state updated to the new hash
|
|
2070
|
+
expect(sessionState.get('deploy')).toBe('v2:edited-hash');
|
|
2071
|
+
|
|
2072
|
+
// Ref count balanced (unregister decremented, re-register incremented)
|
|
2073
|
+
expect(mockSkillRefCount.get('deploy')).toBe(1);
|
|
2074
|
+
});
|
|
2075
|
+
|
|
2076
|
+
test('two consecutive edits each trigger re-registration with correct hash', () => {
|
|
2077
|
+
mockCatalog = [makeSkill('deploy')];
|
|
2078
|
+
mockManifests = { deploy: makeManifest(['deploy_run']) };
|
|
2079
|
+
mockVersionHashes = { deploy: 'v1:first-version' };
|
|
2080
|
+
|
|
2081
|
+
const history: Message[] = [
|
|
2082
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
2083
|
+
];
|
|
2084
|
+
|
|
2085
|
+
// Turn 1: initial registration
|
|
2086
|
+
projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
2087
|
+
expect(sessionState.get('deploy')).toBe('v1:first-version');
|
|
2088
|
+
|
|
2089
|
+
// Edit 1: hash changes to v2
|
|
2090
|
+
mockVersionHashes = { deploy: 'v2:second-version' };
|
|
2091
|
+
mockUnregisteredSkillIds = [];
|
|
2092
|
+
projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
2093
|
+
expect(sessionState.get('deploy')).toBe('v2:second-version');
|
|
2094
|
+
expect(mockUnregisteredSkillIds).toContain('deploy');
|
|
2095
|
+
|
|
2096
|
+
// Edit 2: hash changes to v3
|
|
2097
|
+
mockVersionHashes = { deploy: 'v3:third-version' };
|
|
2098
|
+
mockUnregisteredSkillIds = [];
|
|
2099
|
+
projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
2100
|
+
expect(sessionState.get('deploy')).toBe('v3:third-version');
|
|
2101
|
+
expect(mockUnregisteredSkillIds).toContain('deploy');
|
|
2102
|
+
|
|
2103
|
+
// Ref count stays at 1 through all edits
|
|
2104
|
+
expect(mockSkillRefCount.get('deploy')).toBe(1);
|
|
2105
|
+
});
|
|
2106
|
+
|
|
2107
|
+
test('hash change in one skill does not affect co-active skill with stable hash', () => {
|
|
2108
|
+
mockCatalog = [makeSkill('deploy'), makeSkill('oncall')];
|
|
2109
|
+
mockManifests = {
|
|
2110
|
+
deploy: makeManifest(['deploy_run']),
|
|
2111
|
+
oncall: makeManifest(['oncall_page']),
|
|
2112
|
+
};
|
|
2113
|
+
mockVersionHashes = {
|
|
2114
|
+
deploy: 'v1:deploy-stable',
|
|
2115
|
+
oncall: 'v1:oncall-original',
|
|
2116
|
+
};
|
|
2117
|
+
|
|
2118
|
+
const history: Message[] = [
|
|
2119
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
2120
|
+
...skillLoadMessages('<loaded_skill id="oncall" />'),
|
|
2121
|
+
];
|
|
2122
|
+
|
|
2123
|
+
// Turn 1: both skills registered
|
|
2124
|
+
projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
2125
|
+
expect(sessionState.get('deploy')).toBe('v1:deploy-stable');
|
|
2126
|
+
expect(sessionState.get('oncall')).toBe('v1:oncall-original');
|
|
2127
|
+
|
|
2128
|
+
// Edit only oncall
|
|
2129
|
+
mockVersionHashes = {
|
|
2130
|
+
deploy: 'v1:deploy-stable', // unchanged
|
|
2131
|
+
oncall: 'v2:oncall-edited',
|
|
2132
|
+
};
|
|
2133
|
+
mockUnregisteredSkillIds = [];
|
|
2134
|
+
|
|
2135
|
+
projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
2136
|
+
|
|
2137
|
+
// Only oncall was re-registered
|
|
2138
|
+
expect(mockUnregisteredSkillIds).toContain('oncall');
|
|
2139
|
+
expect(mockUnregisteredSkillIds).not.toContain('deploy');
|
|
2140
|
+
|
|
2141
|
+
// Hashes updated correctly
|
|
2142
|
+
expect(sessionState.get('deploy')).toBe('v1:deploy-stable');
|
|
2143
|
+
expect(sessionState.get('oncall')).toBe('v2:oncall-edited');
|
|
2144
|
+
});
|
|
2145
|
+
|
|
2146
|
+
test('registered tools carry updated ownerSkillId after hash change re-registration', () => {
|
|
2147
|
+
mockCatalog = [makeSkill('deploy')];
|
|
2148
|
+
mockManifests = { deploy: makeManifest(['deploy_run']) };
|
|
2149
|
+
mockVersionHashes = { deploy: 'v1:pre-edit' };
|
|
2150
|
+
|
|
2151
|
+
const history: Message[] = [
|
|
2152
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
2153
|
+
];
|
|
2154
|
+
|
|
2155
|
+
// Turn 1
|
|
2156
|
+
projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
2157
|
+
const toolsV1 = mockRegisteredTools.get('deploy');
|
|
2158
|
+
expect(toolsV1).toBeDefined();
|
|
2159
|
+
expect(toolsV1!.length).toBe(1);
|
|
2160
|
+
expect(toolsV1![0].ownerSkillId).toBe('deploy');
|
|
2161
|
+
|
|
2162
|
+
// Edit
|
|
2163
|
+
mockVersionHashes = { deploy: 'v2:post-edit' };
|
|
2164
|
+
projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
2165
|
+
|
|
2166
|
+
// After re-registration, tools should still be associated with the skill
|
|
2167
|
+
const toolsV2 = mockRegisteredTools.get('deploy');
|
|
2168
|
+
expect(toolsV2).toBeDefined();
|
|
2169
|
+
expect(toolsV2!.length).toBeGreaterThanOrEqual(1);
|
|
2170
|
+
expect(toolsV2![0].ownerSkillId).toBe('deploy');
|
|
2171
|
+
});
|
|
2172
|
+
});
|
|
2173
|
+
|
|
2174
|
+
// ---------------------------------------------------------------------------
|
|
2175
|
+
// Version hash plumbing regression tests
|
|
2176
|
+
// Verify that createSkillToolsFromManifest receives the computed hash and
|
|
2177
|
+
// that projected tools carry ownerSkillVersionHash, which downstream
|
|
2178
|
+
// components (executor.ts) use to build policy context.
|
|
2179
|
+
// ---------------------------------------------------------------------------
|
|
2180
|
+
|
|
2181
|
+
describe('version hash plumbing to projected tools', () => {
|
|
2182
|
+
let sessionState: Map<string, string>;
|
|
2183
|
+
|
|
2184
|
+
beforeEach(() => {
|
|
2185
|
+
mockCatalog = [];
|
|
2186
|
+
mockManifests = {};
|
|
2187
|
+
mockRegisteredTools = new Map();
|
|
2188
|
+
mockUnregisteredSkillIds = [];
|
|
2189
|
+
mockSkillRefCount = new Map();
|
|
2190
|
+
mockVersionHashes = {};
|
|
2191
|
+
mockVersionHashErrors = new Set();
|
|
2192
|
+
sessionState = new Map<string, string>();
|
|
2193
|
+
});
|
|
2194
|
+
|
|
2195
|
+
test('projected tools carry ownerSkillVersionHash matching the computed hash', () => {
|
|
2196
|
+
mockCatalog = [makeSkill('deploy')];
|
|
2197
|
+
mockManifests = { deploy: makeManifest(['deploy_run', 'deploy_status']) };
|
|
2198
|
+
mockVersionHashes = { deploy: 'v1:secure-hash-abc' };
|
|
2199
|
+
|
|
2200
|
+
const history: Message[] = [
|
|
2201
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
2202
|
+
];
|
|
2203
|
+
|
|
2204
|
+
projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
2205
|
+
|
|
2206
|
+
const tools = mockRegisteredTools.get('deploy');
|
|
2207
|
+
expect(tools).toBeDefined();
|
|
2208
|
+
expect(tools!.length).toBe(2);
|
|
2209
|
+
|
|
2210
|
+
// Every tool created for this skill must carry the version hash
|
|
2211
|
+
for (const tool of tools!) {
|
|
2212
|
+
expect(tool.ownerSkillVersionHash).toBe('v1:secure-hash-abc');
|
|
2213
|
+
}
|
|
2214
|
+
});
|
|
2215
|
+
|
|
2216
|
+
test('after hash change re-registration, new tools carry the updated hash', () => {
|
|
2217
|
+
mockCatalog = [makeSkill('deploy')];
|
|
2218
|
+
mockManifests = { deploy: makeManifest(['deploy_run']) };
|
|
2219
|
+
mockVersionHashes = { deploy: 'v1:hash-before' };
|
|
2220
|
+
|
|
2221
|
+
const history: Message[] = [
|
|
2222
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
2223
|
+
];
|
|
2224
|
+
|
|
2225
|
+
// Turn 1: register with original hash
|
|
2226
|
+
projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
2227
|
+
const toolsV1 = mockRegisteredTools.get('deploy');
|
|
2228
|
+
expect(toolsV1).toBeDefined();
|
|
2229
|
+
expect(toolsV1![0].ownerSkillVersionHash).toBe('v1:hash-before');
|
|
2230
|
+
|
|
2231
|
+
// Simulate file edit — hash changes
|
|
2232
|
+
mockVersionHashes = { deploy: 'v2:hash-after' };
|
|
2233
|
+
|
|
2234
|
+
// Turn 2: re-registration with new hash
|
|
2235
|
+
projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
2236
|
+
const toolsV2 = mockRegisteredTools.get('deploy');
|
|
2237
|
+
expect(toolsV2).toBeDefined();
|
|
2238
|
+
|
|
2239
|
+
// The most recently registered tool should carry the new hash
|
|
2240
|
+
const lastTool = toolsV2![toolsV2!.length - 1];
|
|
2241
|
+
expect(lastTool.ownerSkillVersionHash).toBe('v2:hash-after');
|
|
2242
|
+
});
|
|
2243
|
+
|
|
2244
|
+
test('tools for multiple co-active skills each carry their own version hash', () => {
|
|
2245
|
+
mockCatalog = [makeSkill('deploy'), makeSkill('oncall')];
|
|
2246
|
+
mockManifests = {
|
|
2247
|
+
deploy: makeManifest(['deploy_run']),
|
|
2248
|
+
oncall: makeManifest(['oncall_page']),
|
|
2249
|
+
};
|
|
2250
|
+
mockVersionHashes = {
|
|
2251
|
+
deploy: 'v1:deploy-hash-123',
|
|
2252
|
+
oncall: 'v1:oncall-hash-456',
|
|
2253
|
+
};
|
|
2254
|
+
|
|
2255
|
+
const history: Message[] = [
|
|
2256
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
2257
|
+
...skillLoadMessages('<loaded_skill id="oncall" />'),
|
|
2258
|
+
];
|
|
2259
|
+
|
|
2260
|
+
projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
2261
|
+
|
|
2262
|
+
const deployTools = mockRegisteredTools.get('deploy');
|
|
2263
|
+
expect(deployTools).toBeDefined();
|
|
2264
|
+
expect(deployTools![0].ownerSkillVersionHash).toBe('v1:deploy-hash-123');
|
|
2265
|
+
|
|
2266
|
+
const oncallTools = mockRegisteredTools.get('oncall');
|
|
2267
|
+
expect(oncallTools).toBeDefined();
|
|
2268
|
+
expect(oncallTools![0].ownerSkillVersionHash).toBe('v1:oncall-hash-456');
|
|
2269
|
+
});
|
|
2270
|
+
|
|
2271
|
+
test('default hash is used and plumbed when no explicit hash override is set', () => {
|
|
2272
|
+
mockCatalog = [makeSkill('deploy')];
|
|
2273
|
+
mockManifests = { deploy: makeManifest(['deploy_run']) };
|
|
2274
|
+
// No mockVersionHashes override — mock returns 'v1:default-hash-deploy'
|
|
2275
|
+
|
|
2276
|
+
const history: Message[] = [
|
|
2277
|
+
...skillLoadMessages('<loaded_skill id="deploy" />'),
|
|
2278
|
+
];
|
|
2279
|
+
|
|
2280
|
+
projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
2281
|
+
|
|
2282
|
+
const tools = mockRegisteredTools.get('deploy');
|
|
2283
|
+
expect(tools).toBeDefined();
|
|
2284
|
+
expect(tools![0].ownerSkillVersionHash).toBe('v1:default-hash-deploy');
|
|
2285
|
+
});
|
|
2286
|
+
});
|
|
2287
|
+
|
|
2288
|
+
// ---------------------------------------------------------------------------
|
|
2289
|
+
// Child skill includes: no auto-activation
|
|
2290
|
+
// ---------------------------------------------------------------------------
|
|
2291
|
+
|
|
2292
|
+
describe('includes metadata does not auto-activate child skill tools', () => {
|
|
2293
|
+
let sessionState: Map<string, string>;
|
|
2294
|
+
|
|
2295
|
+
beforeEach(() => {
|
|
2296
|
+
mockCatalog = [];
|
|
2297
|
+
mockManifests = {};
|
|
2298
|
+
mockRegisteredTools = new Map();
|
|
2299
|
+
mockUnregisteredSkillIds = [];
|
|
2300
|
+
mockSkillRefCount = new Map();
|
|
2301
|
+
mockVersionHashes = {};
|
|
2302
|
+
mockVersionHashErrors = new Set();
|
|
2303
|
+
sessionState = new Map<string, string>();
|
|
2304
|
+
});
|
|
2305
|
+
|
|
2306
|
+
test('parent with includes — only parent tools projected when only parent marker present', () => {
|
|
2307
|
+
// Parent skill declares child in its includes metadata
|
|
2308
|
+
const parentSkill = makeSkill('parent-skill');
|
|
2309
|
+
parentSkill.includes = ['child-skill'];
|
|
2310
|
+
|
|
2311
|
+
mockCatalog = [parentSkill, makeSkill('child-skill')];
|
|
2312
|
+
mockManifests = {
|
|
2313
|
+
'parent-skill': makeManifest(['parent_action']),
|
|
2314
|
+
'child-skill': makeManifest(['child_action']),
|
|
2315
|
+
};
|
|
2316
|
+
|
|
2317
|
+
// Only parent marker in history — child is NOT loaded
|
|
2318
|
+
const history: Message[] = [
|
|
2319
|
+
...skillLoadMessages('<loaded_skill id="parent-skill" />'),
|
|
2320
|
+
];
|
|
2321
|
+
|
|
2322
|
+
const result = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
2323
|
+
|
|
2324
|
+
// Only parent tools should be projected
|
|
2325
|
+
expect(result.toolDefinitions).toHaveLength(1);
|
|
2326
|
+
expect(result.toolDefinitions[0].name).toBe('parent_action');
|
|
2327
|
+
expect(result.allowedToolNames).toEqual(new Set(['parent_action']));
|
|
2328
|
+
|
|
2329
|
+
// Child tools must NOT be present
|
|
2330
|
+
expect(result.allowedToolNames.has('child_action')).toBe(false);
|
|
2331
|
+
});
|
|
2332
|
+
|
|
2333
|
+
test('child tools appear only after explicit child loaded_skill marker', () => {
|
|
2334
|
+
const parentSkill = makeSkill('parent-skill');
|
|
2335
|
+
parentSkill.includes = ['child-skill'];
|
|
2336
|
+
|
|
2337
|
+
mockCatalog = [parentSkill, makeSkill('child-skill')];
|
|
2338
|
+
mockManifests = {
|
|
2339
|
+
'parent-skill': makeManifest(['parent_action']),
|
|
2340
|
+
'child-skill': makeManifest(['child_action']),
|
|
2341
|
+
};
|
|
2342
|
+
|
|
2343
|
+
// Both parent AND child markers present — both should be active
|
|
2344
|
+
const history: Message[] = [
|
|
2345
|
+
...skillLoadMessages('<loaded_skill id="parent-skill" />'),
|
|
2346
|
+
...skillLoadMessages('<loaded_skill id="child-skill" />'),
|
|
2347
|
+
];
|
|
2348
|
+
|
|
2349
|
+
const result = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
2350
|
+
|
|
2351
|
+
expect(result.toolDefinitions).toHaveLength(2);
|
|
2352
|
+
expect(result.allowedToolNames).toEqual(new Set(['parent_action', 'child_action']));
|
|
2353
|
+
});
|
|
2354
|
+
|
|
2355
|
+
test('child tools are absent even with deep include chain — only markers matter', () => {
|
|
2356
|
+
const grandparent = makeSkill('grandparent');
|
|
2357
|
+
grandparent.includes = ['parent'];
|
|
2358
|
+
const parent = makeSkill('parent');
|
|
2359
|
+
parent.includes = ['child'];
|
|
2360
|
+
|
|
2361
|
+
mockCatalog = [grandparent, parent, makeSkill('child')];
|
|
2362
|
+
mockManifests = {
|
|
2363
|
+
grandparent: makeManifest(['gp_action']),
|
|
2364
|
+
parent: makeManifest(['parent_action']),
|
|
2365
|
+
child: makeManifest(['child_action']),
|
|
2366
|
+
};
|
|
2367
|
+
|
|
2368
|
+
// Only grandparent marker — despite transitive includes, only grandparent tools active
|
|
2369
|
+
const history: Message[] = [
|
|
2370
|
+
...skillLoadMessages('<loaded_skill id="grandparent" />'),
|
|
2371
|
+
];
|
|
2372
|
+
|
|
2373
|
+
const result = projectSkillTools(history, { previouslyActiveSkillIds: sessionState });
|
|
2374
|
+
|
|
2375
|
+
expect(result.toolDefinitions).toHaveLength(1);
|
|
2376
|
+
expect(result.toolDefinitions[0].name).toBe('gp_action');
|
|
2377
|
+
expect(result.allowedToolNames.has('parent_action')).toBe(false);
|
|
2378
|
+
expect(result.allowedToolNames.has('child_action')).toBe(false);
|
|
2379
|
+
});
|
|
2380
|
+
});
|
|
2381
|
+
|
|
2382
|
+
// ---------------------------------------------------------------------------
|
|
2383
|
+
// Browser skill migration harness — validates shared test helpers
|
|
2384
|
+
// ---------------------------------------------------------------------------
|
|
2385
|
+
|
|
2386
|
+
describe('browser skill migration harness', () => {
|
|
2387
|
+
test('buildSkillLoadHistory creates valid skill_load history', () => {
|
|
2388
|
+
const history = buildSkillLoadHistory('browser', 'v1:abc123');
|
|
2389
|
+
expect(history).toHaveLength(2);
|
|
2390
|
+
expect(history[0].role).toBe('assistant');
|
|
2391
|
+
expect(history[1].role).toBe('user');
|
|
2392
|
+
// Verify tool_use block
|
|
2393
|
+
const toolUse = history[0].content[0] as ToolUseContent;
|
|
2394
|
+
expect(toolUse.type).toBe('tool_use');
|
|
2395
|
+
expect(toolUse.name).toBe('skill_load');
|
|
2396
|
+
// Verify tool_result has marker
|
|
2397
|
+
const toolResult = history[1].content[0] as ToolResultContent;
|
|
2398
|
+
expect(toolResult.type).toBe('tool_result');
|
|
2399
|
+
expect(toolResult.content).toContain('<loaded_skill id="browser" version="v1:abc123" />');
|
|
2400
|
+
});
|
|
2401
|
+
|
|
2402
|
+
test('buildSkillLoadHistory generates unique tool_use IDs per call', () => {
|
|
2403
|
+
const h1 = buildSkillLoadHistory('browser', 'v1:abc');
|
|
2404
|
+
const h2 = buildSkillLoadHistory('browser', 'v1:def');
|
|
2405
|
+
const id1 = (h1[0].content[0] as { id: string }).id;
|
|
2406
|
+
const id2 = (h2[0].content[0] as { id: string }).id;
|
|
2407
|
+
expect(id1).not.toBe(id2);
|
|
2408
|
+
});
|
|
2409
|
+
|
|
2410
|
+
test('BROWSER_TOOL_NAMES contains all 10 browser tools', () => {
|
|
2411
|
+
expect(BROWSER_TOOL_NAMES).toHaveLength(10);
|
|
2412
|
+
expect(BROWSER_TOOL_NAMES).toContain('browser_navigate');
|
|
2413
|
+
expect(BROWSER_TOOL_NAMES).toContain('browser_fill_credential');
|
|
2414
|
+
});
|
|
2415
|
+
|
|
2416
|
+
test('assertBrowserToolsPresent passes when all tools present', () => {
|
|
2417
|
+
expect(() => assertBrowserToolsPresent([...BROWSER_TOOL_NAMES, 'extra_tool'])).not.toThrow();
|
|
2418
|
+
});
|
|
2419
|
+
|
|
2420
|
+
test('assertBrowserToolsPresent fails when tool missing', () => {
|
|
2421
|
+
expect(() => assertBrowserToolsPresent(['browser_navigate'])).toThrow();
|
|
2422
|
+
});
|
|
2423
|
+
|
|
2424
|
+
test('assertBrowserToolsAbsent passes when no browser tools present', () => {
|
|
2425
|
+
expect(() => assertBrowserToolsAbsent(['file_read', 'web_search'])).not.toThrow();
|
|
2426
|
+
});
|
|
2427
|
+
|
|
2428
|
+
test('assertBrowserToolsAbsent fails when browser tool present', () => {
|
|
2429
|
+
expect(() => assertBrowserToolsAbsent(['browser_navigate'])).toThrow();
|
|
2430
|
+
});
|
|
2431
|
+
});
|