vellum 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -2
- package/bun.lock +5 -2
- package/package.json +4 -2
- package/scripts/capture-x-graphql.ts +562 -0
- package/scripts/ipc/check-swift-decoder-drift.ts +2 -1
- package/scripts/test.sh +5 -0
- package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +133 -34
- package/src/__tests__/account-registry.test.ts +2 -1
- package/src/__tests__/agent-heartbeat-service.test.ts +250 -0
- package/src/__tests__/asset-materialize-tool.test.ts +16 -15
- package/src/__tests__/asset-search-tool.test.ts +23 -22
- package/src/__tests__/attachments-store.test.ts +56 -127
- package/src/__tests__/browser-skill-baseline-tool-payload.test.ts +5 -4
- package/src/__tests__/browser-skill-endstate.test.ts +4 -3
- package/src/__tests__/call-bridge.test.ts +385 -0
- package/src/__tests__/call-constants.test.ts +40 -0
- package/src/__tests__/call-orchestrator.test.ts +130 -4
- package/src/__tests__/call-recovery.test.ts +518 -0
- package/src/__tests__/call-routes-http.test.ts +459 -0
- package/src/__tests__/call-state-machine.test.ts +143 -0
- package/src/__tests__/call-store.test.ts +216 -1
- package/src/__tests__/cli-discover.test.ts +1 -1
- package/src/__tests__/commit-message-enrichment-service.test.ts +148 -7
- package/src/__tests__/compaction.benchmark.test.ts +176 -0
- package/src/__tests__/computer-use-tools.test.ts +250 -0
- package/src/__tests__/config-schema.test.ts +299 -3
- package/src/__tests__/conflict-store.test.ts +2 -1
- package/src/__tests__/contacts-tools.test.ts +331 -0
- package/src/__tests__/conversation-store.test.ts +30 -32
- package/src/__tests__/credential-security-invariants.test.ts +4 -0
- package/src/__tests__/date-context.test.ts +373 -0
- package/src/__tests__/db-schedule-syntax-migration.test.ts +129 -0
- package/src/__tests__/fixtures/media-reuse-fixtures.ts +3 -3
- package/src/__tests__/followup-tools.test.ts +303 -0
- package/src/__tests__/handlers-twitter-config.test.ts +718 -0
- package/src/__tests__/intent-routing.test.ts +64 -57
- package/src/__tests__/ipc-roundtrip.benchmark.test.ts +237 -0
- package/src/__tests__/ipc-snapshot.test.ts +62 -28
- package/src/__tests__/llm-usage-store.test.ts +3 -8
- package/src/__tests__/media-generate-image.test.ts +1 -1
- package/src/__tests__/media-reuse-story.e2e.test.ts +7 -7
- package/src/__tests__/memory-retrieval.benchmark.test.ts +430 -0
- package/src/__tests__/parallel-tool.benchmark.test.ts +294 -0
- package/src/__tests__/playbook-tools.test.ts +342 -0
- package/src/__tests__/profile-compiler.test.ts +2 -1
- package/src/__tests__/provider-streaming.benchmark.test.ts +773 -0
- package/src/__tests__/recurrence-engine-rruleset.test.ts +78 -0
- package/src/__tests__/recurrence-engine.test.ts +69 -0
- package/src/__tests__/recurrence-types.test.ts +71 -0
- package/src/__tests__/registry.test.ts +5 -3
- package/src/__tests__/relay-server.test.ts +633 -0
- package/src/__tests__/reminder-store.test.ts +6 -3
- package/src/__tests__/reminder.test.ts +43 -77
- package/src/__tests__/run-orchestrator-assistant-events.test.ts +8 -4
- package/src/__tests__/run-orchestrator.test.ts +4 -4
- package/src/__tests__/runtime-attachment-metadata.test.ts +7 -6
- package/src/__tests__/runtime-runs-http.test.ts +4 -4
- package/src/__tests__/runtime-runs.test.ts +4 -4
- package/src/__tests__/schedule-store.test.ts +482 -0
- package/src/__tests__/schedule-tools.test.ts +700 -0
- package/src/__tests__/scheduler-recurrence.test.ts +329 -0
- package/src/__tests__/server-history-render.test.ts +14 -13
- package/src/__tests__/session-error.test.ts +28 -0
- package/src/__tests__/session-init.benchmark.test.ts +462 -0
- package/src/__tests__/session-queue.test.ts +71 -48
- package/src/__tests__/session-runtime-assembly.test.ts +161 -0
- package/src/__tests__/session-surfaces-task-progress.test.ts +104 -0
- package/src/__tests__/signup-e2e.test.ts +2 -1
- package/src/__tests__/skill-projection.benchmark.test.ts +328 -0
- package/src/__tests__/skill-script-runner.test.ts +159 -0
- package/src/__tests__/speaker-identification.test.ts +52 -0
- package/src/__tests__/subagent-manager-notify.test.ts +42 -10
- package/src/__tests__/subagent-tools.test.ts +141 -41
- package/src/__tests__/task-compiler.test.ts +2 -1
- package/src/__tests__/task-runner.test.ts +2 -1
- package/src/__tests__/task-scheduler.test.ts +2 -1
- package/src/__tests__/task-tools.test.ts +49 -56
- package/src/__tests__/tool-audit-listener.test.ts +1 -0
- package/src/__tests__/tool-domain-event-publisher.test.ts +2 -0
- package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +500 -0
- package/src/__tests__/tool-executor.test.ts +13 -17
- package/src/__tests__/turn-commit.test.ts +218 -3
- package/src/__tests__/twilio-provider.test.ts +143 -0
- package/src/__tests__/twilio-routes.test.ts +789 -0
- package/src/__tests__/twitter-auth-handler.test.ts +581 -0
- package/src/__tests__/view-image-tool.test.ts +217 -0
- package/src/__tests__/workspace-git-service.test.ts +186 -0
- package/src/__tests__/workspace-heartbeat-service.test.ts +13 -3
- package/src/agent-heartbeat/agent-heartbeat-service.ts +155 -0
- package/src/bundler/app-bundler.ts +12 -8
- package/src/calls/call-bridge.ts +95 -0
- package/src/calls/call-constants.ts +43 -5
- package/src/calls/call-domain.ts +276 -0
- package/src/calls/call-orchestrator.ts +43 -17
- package/src/calls/call-recovery.ts +207 -0
- package/src/calls/call-state-machine.ts +68 -0
- package/src/calls/call-store.ts +192 -5
- package/src/calls/relay-server.ts +41 -4
- package/src/calls/speaker-identification.ts +213 -0
- package/src/calls/twilio-provider.ts +10 -6
- package/src/calls/twilio-routes.ts +90 -76
- package/src/calls/types.ts +1 -1
- package/src/cli/config-commands.ts +334 -0
- package/src/cli/core-commands.ts +776 -0
- package/src/cli/doordash.ts +251 -1
- package/src/cli/ipc-client.ts +82 -0
- package/src/cli/map.ts +246 -0
- package/src/cli/twitter.ts +575 -0
- package/src/cli.ts +7 -5
- package/src/commands/__tests__/cc-command-registry.test.ts +319 -0
- package/src/commands/cc-command-registry.ts +209 -0
- package/src/config/bundled-skills/contacts/SKILL.md +39 -0
- package/src/config/bundled-skills/contacts/TOOLS.json +122 -0
- package/src/config/bundled-skills/contacts/tools/contact-merge.ts +9 -0
- package/src/config/bundled-skills/contacts/tools/contact-search.ts +9 -0
- package/src/config/bundled-skills/contacts/tools/contact-upsert.ts +9 -0
- package/src/config/bundled-skills/document/SKILL.md +18 -0
- package/src/config/bundled-skills/document/TOOLS.json +53 -0
- package/src/config/bundled-skills/document/tools/document-create.ts +9 -0
- package/src/config/bundled-skills/document/tools/document-update.ts +9 -0
- package/src/config/bundled-skills/doordash/SKILL.md +82 -23
- package/src/config/bundled-skills/followups/SKILL.md +32 -0
- package/src/config/bundled-skills/followups/TOOLS.json +100 -0
- package/src/config/bundled-skills/followups/tools/followup-create.ts +9 -0
- package/src/config/bundled-skills/followups/tools/followup-list.ts +9 -0
- package/src/config/bundled-skills/followups/tools/followup-resolve.ts +9 -0
- package/src/config/bundled-skills/image-studio/tools/media-generate-image.ts +1 -23
- package/src/config/bundled-skills/messaging/tools/messaging-analyze-style.ts +2 -1
- package/src/config/bundled-skills/playbooks/SKILL.md +31 -0
- package/src/config/bundled-skills/playbooks/TOOLS.json +126 -0
- package/src/config/bundled-skills/playbooks/tools/playbook-create.ts +9 -0
- package/src/config/bundled-skills/playbooks/tools/playbook-delete.ts +9 -0
- package/src/config/bundled-skills/playbooks/tools/playbook-list.ts +9 -0
- package/src/config/bundled-skills/playbooks/tools/playbook-update.ts +9 -0
- package/src/config/bundled-skills/reminder/SKILL.md +20 -0
- package/src/config/bundled-skills/reminder/TOOLS.json +67 -0
- package/src/config/bundled-skills/reminder/tools/reminder-cancel.ts +9 -0
- package/src/config/bundled-skills/reminder/tools/reminder-create.ts +9 -0
- package/src/config/bundled-skills/reminder/tools/reminder-list.ts +9 -0
- package/src/config/bundled-skills/schedule/SKILL.md +74 -0
- package/src/config/bundled-skills/schedule/TOOLS.json +135 -0
- package/src/config/bundled-skills/schedule/tools/schedule-create.ts +9 -0
- package/src/config/bundled-skills/schedule/tools/schedule-delete.ts +9 -0
- package/src/config/bundled-skills/schedule/tools/schedule-list.ts +9 -0
- package/src/config/bundled-skills/schedule/tools/schedule-update.ts +9 -0
- package/src/config/bundled-skills/subagent/SKILL.md +25 -0
- package/src/config/bundled-skills/subagent/TOOLS.json +107 -0
- package/src/config/bundled-skills/subagent/tools/subagent-abort.ts +9 -0
- package/src/config/bundled-skills/subagent/tools/subagent-message.ts +9 -0
- package/src/config/bundled-skills/subagent/tools/subagent-read.ts +9 -0
- package/src/config/bundled-skills/subagent/tools/subagent-spawn.ts +9 -0
- package/src/config/bundled-skills/subagent/tools/subagent-status.ts +9 -0
- package/src/config/bundled-skills/tasks/SKILL.md +28 -0
- package/src/config/bundled-skills/tasks/TOOLS.json +256 -0
- package/src/config/bundled-skills/tasks/tools/task-delete.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-list-add.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-list-remove.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-list-show.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-list-update.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-list.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-run.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-save.ts +9 -0
- package/src/config/bundled-skills/twitter/SKILL.md +134 -0
- package/src/config/bundled-skills/watcher/SKILL.md +27 -0
- package/src/config/bundled-skills/watcher/TOOLS.json +147 -0
- package/src/config/bundled-skills/watcher/tools/watcher-create.ts +9 -0
- package/src/config/bundled-skills/watcher/tools/watcher-delete.ts +9 -0
- package/src/config/bundled-skills/watcher/tools/watcher-digest.ts +9 -0
- package/src/config/bundled-skills/watcher/tools/watcher-list.ts +9 -0
- package/src/config/bundled-skills/watcher/tools/watcher-update.ts +9 -0
- package/src/config/defaults.ts +33 -0
- package/src/config/loader.ts +4 -1
- package/src/config/schema.ts +161 -1
- package/src/config/system-prompt.ts +61 -16
- package/src/config/templates/IDENTITY.md +7 -0
- package/src/config/types.ts +4 -0
- package/src/contacts/contact-store.ts +4 -4
- package/src/daemon/assistant-attachments.ts +10 -0
- package/src/daemon/classifier.ts +3 -1
- package/src/daemon/computer-use-session.ts +3 -1
- package/src/daemon/date-context.ts +136 -0
- package/src/daemon/handlers/apps.ts +16 -1
- package/src/daemon/handlers/browser.ts +54 -0
- package/src/daemon/handlers/computer-use.ts +7 -1
- package/src/daemon/handlers/config.ts +163 -5
- package/src/daemon/handlers/diagnostics.ts +5 -1
- package/src/daemon/handlers/documents.ts +18 -29
- package/src/daemon/handlers/home-base.ts +5 -1
- package/src/daemon/handlers/index.ts +40 -277
- package/src/daemon/handlers/misc.ts +9 -1
- package/src/daemon/handlers/publish.ts +6 -1
- package/src/daemon/handlers/sessions.ts +65 -12
- package/src/daemon/handlers/shared.ts +36 -1
- package/src/daemon/handlers/signing.ts +37 -0
- package/src/daemon/handlers/skills.ts +20 -6
- package/src/daemon/handlers/subagents.ts +8 -3
- package/src/daemon/handlers/twitter-auth.ts +169 -0
- package/src/daemon/handlers/work-items.ts +384 -68
- package/src/daemon/ipc-contract-inventory.json +28 -4
- package/src/daemon/ipc-contract.ts +133 -37
- package/src/daemon/ipc-protocol.ts +7 -2
- package/src/daemon/lifecycle.ts +21 -0
- package/src/daemon/main.ts +10 -4
- package/src/daemon/ride-shotgun-handler.ts +74 -10
- package/src/daemon/server.ts +143 -26
- package/src/daemon/session-agent-loop.ts +887 -0
- package/src/daemon/session-attachments.ts +28 -5
- package/src/daemon/session-error.ts +24 -3
- package/src/daemon/session-lifecycle.ts +147 -0
- package/src/daemon/session-media-retry.ts +147 -0
- package/src/daemon/session-messaging.ts +145 -0
- package/src/daemon/session-notifiers.ts +164 -0
- package/src/daemon/session-process.ts +2 -2
- package/src/daemon/session-queue-manager.ts +1 -0
- package/src/daemon/session-runtime-assembly.ts +52 -0
- package/src/daemon/session-skill-tools.ts +124 -5
- package/src/daemon/session-slash.ts +3 -0
- package/src/daemon/session-surfaces.ts +77 -2
- package/src/daemon/session-tool-setup.ts +216 -2
- package/src/daemon/session-usage.ts +0 -2
- package/src/daemon/session.ts +114 -1404
- package/src/daemon/video-thumbnail.ts +60 -0
- package/src/doordash/client.ts +121 -27
- package/src/doordash/queries.ts +1 -2
- package/src/export/formatter.ts +3 -1
- package/src/followups/followup-store.ts +4 -2
- package/src/followups/types.ts +6 -0
- package/src/hooks/templates.ts +1 -1
- package/src/index.ts +32 -1153
- package/src/memory/attachments-store.ts +28 -83
- package/src/memory/channel-delivery-store.ts +7 -21
- package/src/memory/clarification-resolver.ts +6 -5
- package/src/memory/contradiction-checker.ts +3 -2
- package/src/memory/conversation-key-store.ts +10 -29
- package/src/memory/conversation-store.ts +2 -1
- package/src/memory/db.ts +96 -2
- package/src/memory/entity-extractor.ts +6 -3
- package/src/memory/items-extractor.ts +5 -4
- package/src/memory/jobs-store.ts +3 -2
- package/src/memory/llm-usage-store.ts +1 -2
- package/src/memory/runs-store.ts +1 -2
- package/src/memory/schema.ts +23 -2
- package/src/messaging/style-analyzer.ts +3 -2
- package/src/messaging/thread-summarizer.ts +8 -12
- package/src/messaging/triage-engine.ts +4 -2
- package/src/providers/openrouter/client.ts +20 -0
- package/src/providers/registry.ts +8 -0
- package/src/runtime/http-server.ts +108 -20
- package/src/runtime/routes/attachment-routes.ts +2 -3
- package/src/runtime/routes/call-routes.ts +140 -0
- package/src/runtime/routes/channel-routes.ts +5 -10
- package/src/runtime/routes/conversation-routes.ts +5 -5
- package/src/runtime/routes/run-routes.ts +2 -2
- package/src/runtime/run-orchestrator.ts +9 -3
- package/src/schedule/recurrence-engine.ts +138 -0
- package/src/schedule/recurrence-types.ts +67 -0
- package/src/schedule/schedule-store.ts +102 -57
- package/src/schedule/scheduler.ts +9 -6
- package/src/security/oauth2.ts +29 -4
- package/src/security/secret-allowlist.ts +46 -0
- package/src/skills/clawhub.ts +1 -1
- package/src/subagent/manager.ts +40 -8
- package/src/swarm/backend-claude-code.ts +64 -9
- package/src/swarm/worker-prompts.ts +2 -1
- package/src/tasks/SPEC.md +34 -28
- package/src/tasks/ephemeral-permissions.ts +16 -7
- package/src/tasks/task-compiler.ts +5 -4
- package/src/tasks/task-runner.ts +10 -5
- package/src/tasks/task-scheduler.ts +1 -1
- package/src/tasks/tool-sanitizer.ts +36 -0
- package/src/tools/assets/search.ts +4 -4
- package/src/tools/browser/api-map.ts +220 -0
- package/src/tools/browser/auto-navigate.ts +270 -0
- package/src/tools/browser/browser-execution.ts +2 -1
- package/src/tools/browser/browser-manager.ts +2 -2
- package/src/tools/browser/network-recorder.ts +5 -4
- package/src/tools/browser/x-auto-navigate.ts +207 -0
- package/src/tools/calls/call-end.ts +17 -67
- package/src/tools/calls/call-start.ts +24 -85
- package/src/tools/calls/call-status.ts +35 -51
- package/src/tools/claude-code/claude-code.ts +77 -11
- package/src/tools/contacts/contact-merge.ts +46 -78
- package/src/tools/contacts/contact-search.ts +35 -79
- package/src/tools/contacts/contact-upsert.ts +35 -108
- package/src/tools/credentials/vault.ts +20 -4
- package/src/tools/document/document-tool.ts +71 -144
- package/src/tools/executor.ts +129 -10
- package/src/tools/followups/followup_create.ts +46 -88
- package/src/tools/followups/followup_list.ts +34 -74
- package/src/tools/followups/followup_resolve.ts +31 -66
- package/src/tools/host-terminal/cli-discover.ts +2 -1
- package/src/tools/host-terminal/host-shell.ts +10 -0
- package/src/tools/memory/handlers.ts +5 -4
- package/src/tools/network/__tests__/web-search.test.ts +427 -0
- package/src/tools/network/script-proxy/__tests__/logging.test.ts +248 -0
- package/src/tools/network/script-proxy/__tests__/policy.test.ts +234 -0
- package/src/tools/network/script-proxy/__tests__/router.test.ts +76 -0
- package/src/tools/network/web-fetch.ts +18 -6
- package/src/tools/playbooks/index.ts +4 -5
- package/src/tools/playbooks/playbook-create.ts +3 -47
- package/src/tools/playbooks/playbook-delete.ts +1 -25
- package/src/tools/playbooks/playbook-list.ts +1 -28
- package/src/tools/playbooks/playbook-update.ts +3 -51
- package/src/tools/reminder/reminder.ts +5 -78
- package/src/tools/schedule/create.ts +69 -74
- package/src/tools/schedule/delete.ts +21 -47
- package/src/tools/schedule/list.ts +55 -74
- package/src/tools/schedule/update.ts +77 -84
- package/src/tools/subagent/abort.ts +29 -58
- package/src/tools/subagent/message.ts +30 -63
- package/src/tools/subagent/read.ts +53 -84
- package/src/tools/subagent/spawn.ts +43 -82
- package/src/tools/subagent/status.ts +42 -71
- package/src/tools/swarm/delegate.ts +2 -1
- package/src/tools/tasks/index.ts +8 -8
- package/src/tools/tasks/task-delete.ts +60 -88
- package/src/tools/tasks/task-list.ts +31 -52
- package/src/tools/tasks/task-run.ts +72 -108
- package/src/tools/tasks/task-save.ts +33 -65
- package/src/tools/tasks/work-item-enqueue.ts +183 -215
- package/src/tools/tasks/work-item-list.ts +33 -63
- package/src/tools/tasks/work-item-remove.ts +45 -97
- package/src/tools/tasks/work-item-update.ts +91 -163
- package/src/tools/terminal/backends/native.ts +3 -1
- package/src/tools/tool-manifest.ts +0 -62
- package/src/tools/types.ts +6 -0
- package/src/tools/ui-surface/definitions.ts +3 -1
- package/src/tools/watch/screen-watch.ts +3 -1
- package/src/tools/watcher/create.ts +52 -98
- package/src/tools/watcher/delete.ts +20 -46
- package/src/tools/watcher/digest.ts +36 -70
- package/src/tools/watcher/list.ts +49 -79
- package/src/tools/watcher/update.ts +45 -91
- package/src/twitter/client.ts +690 -0
- package/src/twitter/session.ts +91 -0
- package/src/usage/types.ts +0 -1
- package/src/util/truncate.ts +6 -0
- package/src/watcher/providers/slack.ts +2 -1
- package/src/watcher/watcher-store.ts +3 -2
- package/src/work-items/work-item-store.ts +27 -2
- package/src/workspace/commit-message-enrichment-service.ts +31 -7
- package/src/workspace/git-service.ts +87 -22
- package/src/workspace/provider-commit-message-generator.ts +242 -0
- package/src/workspace/turn-commit.ts +62 -3
- package/src/tools/contacts/index.ts +0 -4
- package/src/tools/document/index.ts +0 -5
- package/src/tools/followups/index.ts +0 -3
- package/src/tools/subagent/index.ts +0 -5
- /package/src/__tests__/{memory-context-benchmark.test.ts → memory-context-benchmark.benchmark.test.ts} +0 -0
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
import { describe, test, expect } from 'bun:test';
|
|
2
|
+
import { AgentLoop } from '../agent/loop.js';
|
|
3
|
+
import type { AgentEvent } from '../agent/loop.js';
|
|
4
|
+
import type {
|
|
5
|
+
Provider,
|
|
6
|
+
Message,
|
|
7
|
+
ProviderResponse,
|
|
8
|
+
SendMessageOptions,
|
|
9
|
+
ToolDefinition,
|
|
10
|
+
ContentBlock,
|
|
11
|
+
} from '../providers/types.js';
|
|
12
|
+
|
|
13
|
+
// ---------------------------------------------------------------------------
|
|
14
|
+
// Helpers (mirrors agent-loop.test.ts patterns)
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
|
|
17
|
+
function createMockProvider(
|
|
18
|
+
responses: ProviderResponse[],
|
|
19
|
+
): { provider: Provider; calls: { messages: Message[]; tools?: ToolDefinition[]; systemPrompt?: string }[] } {
|
|
20
|
+
const calls: { messages: Message[]; tools?: ToolDefinition[]; systemPrompt?: string }[] = [];
|
|
21
|
+
let callIndex = 0;
|
|
22
|
+
|
|
23
|
+
const provider: Provider = {
|
|
24
|
+
name: 'mock',
|
|
25
|
+
async sendMessage(
|
|
26
|
+
messages: Message[],
|
|
27
|
+
tools?: ToolDefinition[],
|
|
28
|
+
systemPrompt?: string,
|
|
29
|
+
options?: SendMessageOptions,
|
|
30
|
+
): Promise<ProviderResponse> {
|
|
31
|
+
calls.push({ messages: [...messages], tools, systemPrompt });
|
|
32
|
+
const response = responses[callIndex] ?? responses[responses.length - 1];
|
|
33
|
+
callIndex++;
|
|
34
|
+
|
|
35
|
+
if (options?.onEvent) {
|
|
36
|
+
for (const block of response.content) {
|
|
37
|
+
if (block.type === 'text') {
|
|
38
|
+
options.onEvent({ type: 'text_delta', text: block.text });
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
return response;
|
|
44
|
+
},
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
return { provider, calls };
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
function textResponse(text: string): ProviderResponse {
|
|
51
|
+
return {
|
|
52
|
+
content: [{ type: 'text', text }],
|
|
53
|
+
model: 'mock-model',
|
|
54
|
+
usage: { inputTokens: 10, outputTokens: 5 },
|
|
55
|
+
stopReason: 'end_turn',
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/** Build a provider response containing N parallel tool_use blocks. */
|
|
60
|
+
function parallelToolUseResponse(
|
|
61
|
+
tools: Array<{ id: string; name: string; input: Record<string, unknown> }>,
|
|
62
|
+
): ProviderResponse {
|
|
63
|
+
return {
|
|
64
|
+
content: tools.map((t) => ({
|
|
65
|
+
type: 'tool_use' as const,
|
|
66
|
+
id: t.id,
|
|
67
|
+
name: t.name,
|
|
68
|
+
input: t.input,
|
|
69
|
+
})),
|
|
70
|
+
model: 'mock-model',
|
|
71
|
+
usage: { inputTokens: 10, outputTokens: 5 },
|
|
72
|
+
stopReason: 'tool_use' as const,
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const dummyTools: ToolDefinition[] = [
|
|
77
|
+
{ name: 'delay_tool', description: 'Delays', input_schema: { type: 'object', properties: {} } },
|
|
78
|
+
];
|
|
79
|
+
|
|
80
|
+
const userMessage: Message = {
|
|
81
|
+
role: 'user',
|
|
82
|
+
content: [{ type: 'text', text: 'Run benchmarks' }],
|
|
83
|
+
};
|
|
84
|
+
|
|
85
|
+
function collectEvents(events: AgentEvent[]): (event: AgentEvent) => void {
|
|
86
|
+
return (event) => events.push(event);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// ---------------------------------------------------------------------------
|
|
90
|
+
// Benchmark Tests
|
|
91
|
+
// ---------------------------------------------------------------------------
|
|
92
|
+
|
|
93
|
+
describe('Parallel tool execution benchmarks', () => {
|
|
94
|
+
// 1. 5 tools at 50ms each should complete in ~50ms (parallel), not ~250ms (sequential)
|
|
95
|
+
test('5 tools at 50ms each complete in parallel (~50ms, not ~250ms)', async () => {
|
|
96
|
+
const toolCount = 5;
|
|
97
|
+
const delayMs = 50;
|
|
98
|
+
|
|
99
|
+
const toolUseBlocks = Array.from({ length: toolCount }, (_, i) => ({
|
|
100
|
+
id: `t${i}`,
|
|
101
|
+
name: 'delay_tool',
|
|
102
|
+
input: { index: i },
|
|
103
|
+
}));
|
|
104
|
+
|
|
105
|
+
const { provider } = createMockProvider([
|
|
106
|
+
parallelToolUseResponse(toolUseBlocks),
|
|
107
|
+
textResponse('All done.'),
|
|
108
|
+
]);
|
|
109
|
+
|
|
110
|
+
const toolExecutor = async () => {
|
|
111
|
+
await new Promise((r) => setTimeout(r, delayMs));
|
|
112
|
+
return { content: 'ok', isError: false };
|
|
113
|
+
};
|
|
114
|
+
|
|
115
|
+
const loop = new AgentLoop(provider, 'system', {}, dummyTools, toolExecutor);
|
|
116
|
+
const start = Date.now();
|
|
117
|
+
await loop.run([userMessage], () => {});
|
|
118
|
+
const elapsed = Date.now() - start;
|
|
119
|
+
|
|
120
|
+
// Parallel: ~50ms + overhead. Sequential would be ~250ms.
|
|
121
|
+
// Allow up to 150ms for CI/scheduling overhead.
|
|
122
|
+
expect(elapsed).toBeLessThan(150);
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
// 2. 10 tools at 50ms each should still complete quickly in parallel
|
|
126
|
+
test('10 tools at 50ms each still complete in parallel (< 200ms)', async () => {
|
|
127
|
+
const toolCount = 10;
|
|
128
|
+
const delayMs = 50;
|
|
129
|
+
|
|
130
|
+
const toolUseBlocks = Array.from({ length: toolCount }, (_, i) => ({
|
|
131
|
+
id: `t${i}`,
|
|
132
|
+
name: 'delay_tool',
|
|
133
|
+
input: { index: i },
|
|
134
|
+
}));
|
|
135
|
+
|
|
136
|
+
const { provider } = createMockProvider([
|
|
137
|
+
parallelToolUseResponse(toolUseBlocks),
|
|
138
|
+
textResponse('All done.'),
|
|
139
|
+
]);
|
|
140
|
+
|
|
141
|
+
const executionLog: { index: number; start: number; end: number }[] = [];
|
|
142
|
+
const toolExecutor = async (_name: string, input: Record<string, unknown>) => {
|
|
143
|
+
const start = Date.now();
|
|
144
|
+
await new Promise((r) => setTimeout(r, delayMs));
|
|
145
|
+
const end = Date.now();
|
|
146
|
+
executionLog.push({ index: input.index as number, start, end });
|
|
147
|
+
return { content: 'ok', isError: false };
|
|
148
|
+
};
|
|
149
|
+
|
|
150
|
+
const loop = new AgentLoop(provider, 'system', {}, dummyTools, toolExecutor);
|
|
151
|
+
const start = Date.now();
|
|
152
|
+
await loop.run([userMessage], () => {});
|
|
153
|
+
const elapsed = Date.now() - start;
|
|
154
|
+
|
|
155
|
+
// All 10 tools should have executed
|
|
156
|
+
expect(executionLog).toHaveLength(toolCount);
|
|
157
|
+
|
|
158
|
+
// Parallel: ~50ms + overhead. Sequential would be ~500ms.
|
|
159
|
+
// Allow up to 200ms for CI/scheduling overhead with 10 concurrent timers.
|
|
160
|
+
expect(elapsed).toBeLessThan(200);
|
|
161
|
+
|
|
162
|
+
// Verify overlap: all tools should start before any finishes
|
|
163
|
+
const allStarts = executionLog.map((e) => e.start);
|
|
164
|
+
const allEnds = executionLog.map((e) => e.end);
|
|
165
|
+
const lastStart = Math.max(...allStarts);
|
|
166
|
+
const firstEnd = Math.min(...allEnds);
|
|
167
|
+
expect(lastStart).toBeLessThanOrEqual(firstEnd);
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
// 3. Mixed latencies: 1 slow (2s) + 4 fast (100ms) = ~2s parallel, ~2.4s sequential
|
|
171
|
+
test('mixed latencies: 1 slow + 4 fast tools complete in slow-tool time', async () => {
|
|
172
|
+
const toolUseBlocks = [
|
|
173
|
+
{ id: 'slow', name: 'delay_tool', input: { delayMs: 2000 } },
|
|
174
|
+
{ id: 'fast1', name: 'delay_tool', input: { delayMs: 100 } },
|
|
175
|
+
{ id: 'fast2', name: 'delay_tool', input: { delayMs: 100 } },
|
|
176
|
+
{ id: 'fast3', name: 'delay_tool', input: { delayMs: 100 } },
|
|
177
|
+
{ id: 'fast4', name: 'delay_tool', input: { delayMs: 100 } },
|
|
178
|
+
];
|
|
179
|
+
|
|
180
|
+
const { provider } = createMockProvider([
|
|
181
|
+
parallelToolUseResponse(toolUseBlocks),
|
|
182
|
+
textResponse('Mixed done.'),
|
|
183
|
+
]);
|
|
184
|
+
|
|
185
|
+
const completionOrder: string[] = [];
|
|
186
|
+
const toolExecutor = async (_name: string, input: Record<string, unknown>) => {
|
|
187
|
+
const delay = input.delayMs as number;
|
|
188
|
+
await new Promise((r) => setTimeout(r, delay));
|
|
189
|
+
completionOrder.push(
|
|
190
|
+
toolUseBlocks.find((t) => t.input.delayMs === delay && !completionOrder.includes(t.id))?.id
|
|
191
|
+
?? `unknown-${delay}`,
|
|
192
|
+
);
|
|
193
|
+
return { content: 'ok', isError: false };
|
|
194
|
+
};
|
|
195
|
+
|
|
196
|
+
const loop = new AgentLoop(provider, 'system', {}, dummyTools, toolExecutor);
|
|
197
|
+
const events: AgentEvent[] = [];
|
|
198
|
+
const start = Date.now();
|
|
199
|
+
await loop.run([userMessage], collectEvents(events));
|
|
200
|
+
const elapsed = Date.now() - start;
|
|
201
|
+
|
|
202
|
+
// Parallel: ~2000ms (dominated by slow tool). Sequential: ~2400ms (2000 + 4*100).
|
|
203
|
+
// Upper bound of 2200ms ensures a sequential implementation would fail.
|
|
204
|
+
expect(elapsed).toBeGreaterThanOrEqual(1900);
|
|
205
|
+
expect(elapsed).toBeLessThan(2200);
|
|
206
|
+
|
|
207
|
+
// tool_result events should be emitted in tool_use order (slow first),
|
|
208
|
+
// even though fast tools finish earlier
|
|
209
|
+
const toolResultEvents = events.filter(
|
|
210
|
+
(e): e is Extract<AgentEvent, { type: 'tool_result' }> => e.type === 'tool_result',
|
|
211
|
+
);
|
|
212
|
+
expect(toolResultEvents).toHaveLength(5);
|
|
213
|
+
expect(toolResultEvents[0].toolUseId).toBe('slow');
|
|
214
|
+
expect(toolResultEvents[1].toolUseId).toBe('fast1');
|
|
215
|
+
}, 10000);
|
|
216
|
+
|
|
217
|
+
// 4. Abort during parallel execution cancels within 200ms
|
|
218
|
+
test('abort during parallel execution cancels within 200ms', async () => {
|
|
219
|
+
const unhandledRejections: Error[] = [];
|
|
220
|
+
const handler = (event: PromiseRejectionEvent) => {
|
|
221
|
+
unhandledRejections.push(event.reason);
|
|
222
|
+
event.preventDefault();
|
|
223
|
+
};
|
|
224
|
+
globalThis.addEventListener('unhandledrejection', handler);
|
|
225
|
+
|
|
226
|
+
try {
|
|
227
|
+
const toolCount = 5;
|
|
228
|
+
|
|
229
|
+
const toolUseBlocks = Array.from({ length: toolCount }, (_, i) => ({
|
|
230
|
+
id: `t${i}`,
|
|
231
|
+
name: 'delay_tool',
|
|
232
|
+
input: { index: i },
|
|
233
|
+
}));
|
|
234
|
+
|
|
235
|
+
const { provider } = createMockProvider([
|
|
236
|
+
parallelToolUseResponse(toolUseBlocks),
|
|
237
|
+
textResponse('Should not reach.'),
|
|
238
|
+
]);
|
|
239
|
+
|
|
240
|
+
const controller = new AbortController();
|
|
241
|
+
|
|
242
|
+
// Track each tool executor's promise so we can wait for them all to
|
|
243
|
+
// settle after abort, ensuring late rejections are caught by our listener.
|
|
244
|
+
const toolPromises: Promise<unknown>[] = [];
|
|
245
|
+
|
|
246
|
+
const toolExecutor = async () => {
|
|
247
|
+
const p = new Promise<void>((resolve) => {
|
|
248
|
+
// Each tool takes 500ms — abort fires at 50ms, well before completion.
|
|
249
|
+
// Shorter than the original 10s so we can actually wait for settlement.
|
|
250
|
+
setTimeout(resolve, 500);
|
|
251
|
+
});
|
|
252
|
+
toolPromises.push(p);
|
|
253
|
+
|
|
254
|
+
setTimeout(() => controller.abort(), 50);
|
|
255
|
+
await p;
|
|
256
|
+
return { content: 'should not return', isError: false };
|
|
257
|
+
};
|
|
258
|
+
|
|
259
|
+
const loop = new AgentLoop(provider, 'system', {}, dummyTools, toolExecutor);
|
|
260
|
+
const start = Date.now();
|
|
261
|
+
const history = await loop.run([userMessage], () => {}, controller.signal);
|
|
262
|
+
const elapsed = Date.now() - start;
|
|
263
|
+
|
|
264
|
+
// Should exit quickly after the 50ms abort, not wait 500ms
|
|
265
|
+
expect(elapsed).toBeLessThan(200);
|
|
266
|
+
|
|
267
|
+
// History should have: user msg, assistant (tool_use), user (cancelled tool_results)
|
|
268
|
+
expect(history).toHaveLength(3);
|
|
269
|
+
|
|
270
|
+
const lastMsg = history[history.length - 1];
|
|
271
|
+
expect(lastMsg.role).toBe('user');
|
|
272
|
+
|
|
273
|
+
const toolResultBlocks = lastMsg.content.filter(
|
|
274
|
+
(b): b is Extract<ContentBlock, { type: 'tool_result' }> => b.type === 'tool_result',
|
|
275
|
+
);
|
|
276
|
+
expect(toolResultBlocks).toHaveLength(toolCount);
|
|
277
|
+
|
|
278
|
+
// All results should be cancelled
|
|
279
|
+
for (const block of toolResultBlocks) {
|
|
280
|
+
expect(block.content).toBe('Cancelled by user');
|
|
281
|
+
expect(block.is_error).toBe(true);
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
// Wait for all abandoned tool promises to settle so any late rejections
|
|
285
|
+
// fire while our listener is still active
|
|
286
|
+
await Promise.allSettled(toolPromises);
|
|
287
|
+
|
|
288
|
+
// Verify no unhandled rejections occurred
|
|
289
|
+
expect(unhandledRejections).toHaveLength(0);
|
|
290
|
+
} finally {
|
|
291
|
+
globalThis.removeEventListener('unhandledrejection', handler);
|
|
292
|
+
}
|
|
293
|
+
});
|
|
294
|
+
});
|
|
@@ -0,0 +1,342 @@
|
|
|
1
|
+
import { describe, test, expect, beforeEach, afterAll, mock } from 'bun:test';
|
|
2
|
+
import { mkdtempSync, rmSync } from 'node:fs';
|
|
3
|
+
import { tmpdir } from 'node:os';
|
|
4
|
+
import { join } from 'node:path';
|
|
5
|
+
|
|
6
|
+
const testDir = mkdtempSync(join(tmpdir(), 'playbook-tools-test-'));
|
|
7
|
+
|
|
8
|
+
mock.module('../util/platform.js', () => ({
|
|
9
|
+
getDataDir: () => testDir,
|
|
10
|
+
isMacOS: () => process.platform === 'darwin',
|
|
11
|
+
isLinux: () => process.platform === 'linux',
|
|
12
|
+
isWindows: () => process.platform === 'win32',
|
|
13
|
+
getSocketPath: () => join(testDir, 'test.sock'),
|
|
14
|
+
getPidPath: () => join(testDir, 'test.pid'),
|
|
15
|
+
getDbPath: () => join(testDir, 'test.db'),
|
|
16
|
+
getLogPath: () => join(testDir, 'test.log'),
|
|
17
|
+
ensureDataDir: () => {},
|
|
18
|
+
migrateToDataLayout: () => {},
|
|
19
|
+
migrateToWorkspaceLayout: () => {},
|
|
20
|
+
}));
|
|
21
|
+
|
|
22
|
+
mock.module('../util/logger.js', () => ({
|
|
23
|
+
getLogger: () => new Proxy({} as Record<string, unknown>, {
|
|
24
|
+
get: () => () => {},
|
|
25
|
+
}),
|
|
26
|
+
}));
|
|
27
|
+
|
|
28
|
+
mock.module('../config/loader.js', () => ({
|
|
29
|
+
getConfig: () => ({ memory: {} }),
|
|
30
|
+
}));
|
|
31
|
+
|
|
32
|
+
// Stub memory job queue to avoid side effects
|
|
33
|
+
mock.module('../memory/jobs-store.js', () => ({
|
|
34
|
+
enqueueMemoryJob: () => {},
|
|
35
|
+
}));
|
|
36
|
+
|
|
37
|
+
import type { Database } from 'bun:sqlite';
|
|
38
|
+
import { initializeDb, getDb, resetDb } from '../memory/db.js';
|
|
39
|
+
import type { ToolContext } from '../tools/types.js';
|
|
40
|
+
import {
|
|
41
|
+
executePlaybookCreate,
|
|
42
|
+
executePlaybookList,
|
|
43
|
+
executePlaybookUpdate,
|
|
44
|
+
executePlaybookDelete,
|
|
45
|
+
} from '../tools/playbooks/index.js';
|
|
46
|
+
|
|
47
|
+
initializeDb();
|
|
48
|
+
|
|
49
|
+
afterAll(() => {
|
|
50
|
+
resetDb();
|
|
51
|
+
try { rmSync(testDir, { recursive: true }); } catch { /* best effort */ }
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
function getRawDb(): Database {
|
|
55
|
+
return (getDb() as unknown as { $client: Database }).$client;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
const ctx: ToolContext = {
|
|
59
|
+
workingDir: '/tmp',
|
|
60
|
+
sessionId: 'test-session',
|
|
61
|
+
conversationId: 'test-conversation',
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
function clearPlaybooks(): void {
|
|
65
|
+
getRawDb().run("DELETE FROM memory_items WHERE kind = 'playbook'");
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function extractPlaybookId(content: string): string {
|
|
69
|
+
const match = content.match(/ID: (\S+)/);
|
|
70
|
+
expect(match).not.toBeNull();
|
|
71
|
+
return match![1];
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// ── playbook_create ─────────────────────────────────────────────────
|
|
75
|
+
|
|
76
|
+
describe('playbook_create tool', () => {
|
|
77
|
+
beforeEach(clearPlaybooks);
|
|
78
|
+
|
|
79
|
+
test('creates a playbook with required fields', async () => {
|
|
80
|
+
const result = await executePlaybookCreate({
|
|
81
|
+
trigger: 'meeting request',
|
|
82
|
+
action: 'check calendar, propose 3 times',
|
|
83
|
+
}, ctx);
|
|
84
|
+
|
|
85
|
+
expect(result.isError).toBe(false);
|
|
86
|
+
expect(result.content).toContain('Playbook created successfully');
|
|
87
|
+
expect(result.content).toContain('meeting request');
|
|
88
|
+
expect(result.content).toContain('check calendar, propose 3 times');
|
|
89
|
+
expect(result.content).toContain('Autonomy: draft for review'); // default
|
|
90
|
+
expect(result.content).toContain('Channel: *'); // default
|
|
91
|
+
expect(result.content).toContain('Category: general'); // default
|
|
92
|
+
expect(result.content).toContain('Priority: 0'); // default
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
test('creates a playbook with all optional fields', async () => {
|
|
96
|
+
const result = await executePlaybookCreate({
|
|
97
|
+
trigger: 'from:ceo@*',
|
|
98
|
+
action: 'prioritize and draft response',
|
|
99
|
+
channel: 'email',
|
|
100
|
+
category: 'triage',
|
|
101
|
+
autonomy_level: 'auto',
|
|
102
|
+
priority: 10,
|
|
103
|
+
}, ctx);
|
|
104
|
+
|
|
105
|
+
expect(result.isError).toBe(false);
|
|
106
|
+
expect(result.content).toContain('from:ceo@*');
|
|
107
|
+
expect(result.content).toContain('Channel: email');
|
|
108
|
+
expect(result.content).toContain('Category: triage');
|
|
109
|
+
expect(result.content).toContain('Autonomy: execute automatically');
|
|
110
|
+
expect(result.content).toContain('Priority: 10');
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
test('creates with notify autonomy level', async () => {
|
|
114
|
+
const result = await executePlaybookCreate({
|
|
115
|
+
trigger: 'newsletter',
|
|
116
|
+
action: 'archive',
|
|
117
|
+
autonomy_level: 'notify',
|
|
118
|
+
}, ctx);
|
|
119
|
+
|
|
120
|
+
expect(result.isError).toBe(false);
|
|
121
|
+
expect(result.content).toContain('Autonomy: notify only');
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
test('rejects duplicate playbook', async () => {
|
|
125
|
+
await executePlaybookCreate({
|
|
126
|
+
trigger: 'unique trigger',
|
|
127
|
+
action: 'unique action',
|
|
128
|
+
}, ctx);
|
|
129
|
+
|
|
130
|
+
const result = await executePlaybookCreate({
|
|
131
|
+
trigger: 'unique trigger',
|
|
132
|
+
action: 'unique action',
|
|
133
|
+
}, ctx);
|
|
134
|
+
|
|
135
|
+
expect(result.isError).toBe(false);
|
|
136
|
+
expect(result.content).toContain('already exists');
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
test('rejects missing trigger', async () => {
|
|
140
|
+
const result = await executePlaybookCreate({
|
|
141
|
+
action: 'do something',
|
|
142
|
+
}, ctx);
|
|
143
|
+
|
|
144
|
+
expect(result.isError).toBe(true);
|
|
145
|
+
expect(result.content).toContain('trigger is required');
|
|
146
|
+
});
|
|
147
|
+
|
|
148
|
+
test('rejects missing action', async () => {
|
|
149
|
+
const result = await executePlaybookCreate({
|
|
150
|
+
trigger: 'test trigger',
|
|
151
|
+
}, ctx);
|
|
152
|
+
|
|
153
|
+
expect(result.isError).toBe(true);
|
|
154
|
+
expect(result.content).toContain('action is required');
|
|
155
|
+
});
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
// ── playbook_list ───────────────────────────────────────────────────
|
|
159
|
+
|
|
160
|
+
describe('playbook_list tool', () => {
|
|
161
|
+
beforeEach(clearPlaybooks);
|
|
162
|
+
|
|
163
|
+
test('returns empty message when no playbooks exist', async () => {
|
|
164
|
+
const result = await executePlaybookList({}, ctx);
|
|
165
|
+
|
|
166
|
+
expect(result.isError).toBe(false);
|
|
167
|
+
expect(result.content).toContain('No playbooks found');
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
test('lists all playbooks', async () => {
|
|
171
|
+
await executePlaybookCreate({
|
|
172
|
+
trigger: 'meeting request',
|
|
173
|
+
action: 'check calendar',
|
|
174
|
+
}, ctx);
|
|
175
|
+
await executePlaybookCreate({
|
|
176
|
+
trigger: 'newsletter',
|
|
177
|
+
action: 'archive it',
|
|
178
|
+
}, ctx);
|
|
179
|
+
|
|
180
|
+
const result = await executePlaybookList({}, ctx);
|
|
181
|
+
|
|
182
|
+
expect(result.isError).toBe(false);
|
|
183
|
+
expect(result.content).toContain('Found 2 playbook(s)');
|
|
184
|
+
expect(result.content).toContain('meeting request');
|
|
185
|
+
expect(result.content).toContain('newsletter');
|
|
186
|
+
});
|
|
187
|
+
|
|
188
|
+
test('filters by channel', async () => {
|
|
189
|
+
await executePlaybookCreate({
|
|
190
|
+
trigger: 'email trigger',
|
|
191
|
+
action: 'handle email',
|
|
192
|
+
channel: 'email',
|
|
193
|
+
}, ctx);
|
|
194
|
+
await executePlaybookCreate({
|
|
195
|
+
trigger: 'slack trigger',
|
|
196
|
+
action: 'handle slack',
|
|
197
|
+
channel: 'slack',
|
|
198
|
+
}, ctx);
|
|
199
|
+
|
|
200
|
+
const result = await executePlaybookList({ channel: 'email' }, ctx);
|
|
201
|
+
|
|
202
|
+
expect(result.isError).toBe(false);
|
|
203
|
+
expect(result.content).toContain('email trigger');
|
|
204
|
+
expect(result.content).not.toContain('slack trigger');
|
|
205
|
+
});
|
|
206
|
+
|
|
207
|
+
test('filters by category', async () => {
|
|
208
|
+
await executePlaybookCreate({
|
|
209
|
+
trigger: 'scheduling trigger',
|
|
210
|
+
action: 'schedule it',
|
|
211
|
+
category: 'scheduling',
|
|
212
|
+
}, ctx);
|
|
213
|
+
await executePlaybookCreate({
|
|
214
|
+
trigger: 'triage trigger',
|
|
215
|
+
action: 'triage it',
|
|
216
|
+
category: 'triage',
|
|
217
|
+
}, ctx);
|
|
218
|
+
|
|
219
|
+
const result = await executePlaybookList({ category: 'scheduling' }, ctx);
|
|
220
|
+
|
|
221
|
+
expect(result.isError).toBe(false);
|
|
222
|
+
expect(result.content).toContain('scheduling trigger');
|
|
223
|
+
expect(result.content).not.toContain('triage trigger');
|
|
224
|
+
});
|
|
225
|
+
|
|
226
|
+
test('includes wildcard channel playbooks in channel filter', async () => {
|
|
227
|
+
await executePlaybookCreate({
|
|
228
|
+
trigger: 'wildcard trigger',
|
|
229
|
+
action: 'handle anything',
|
|
230
|
+
channel: '*',
|
|
231
|
+
}, ctx);
|
|
232
|
+
|
|
233
|
+
const result = await executePlaybookList({ channel: 'email' }, ctx);
|
|
234
|
+
|
|
235
|
+
expect(result.isError).toBe(false);
|
|
236
|
+
expect(result.content).toContain('wildcard trigger');
|
|
237
|
+
});
|
|
238
|
+
});
|
|
239
|
+
|
|
240
|
+
// ── playbook_update ─────────────────────────────────────────────────
|
|
241
|
+
|
|
242
|
+
describe('playbook_update tool', () => {
|
|
243
|
+
beforeEach(clearPlaybooks);
|
|
244
|
+
|
|
245
|
+
test('updates the trigger', async () => {
|
|
246
|
+
const createResult = await executePlaybookCreate({
|
|
247
|
+
trigger: 'old trigger',
|
|
248
|
+
action: 'do something',
|
|
249
|
+
}, ctx);
|
|
250
|
+
const id = extractPlaybookId(createResult.content);
|
|
251
|
+
|
|
252
|
+
const result = await executePlaybookUpdate({
|
|
253
|
+
playbook_id: id,
|
|
254
|
+
trigger: 'new trigger',
|
|
255
|
+
}, ctx);
|
|
256
|
+
|
|
257
|
+
expect(result.isError).toBe(false);
|
|
258
|
+
expect(result.content).toContain('Playbook updated successfully');
|
|
259
|
+
expect(result.content).toContain('new trigger');
|
|
260
|
+
});
|
|
261
|
+
|
|
262
|
+
test('updates multiple fields at once', async () => {
|
|
263
|
+
const createResult = await executePlaybookCreate({
|
|
264
|
+
trigger: 'test',
|
|
265
|
+
action: 'old action',
|
|
266
|
+
}, ctx);
|
|
267
|
+
const id = extractPlaybookId(createResult.content);
|
|
268
|
+
|
|
269
|
+
const result = await executePlaybookUpdate({
|
|
270
|
+
playbook_id: id,
|
|
271
|
+
action: 'new action',
|
|
272
|
+
channel: 'slack',
|
|
273
|
+
category: 'notifications',
|
|
274
|
+
autonomy_level: 'auto',
|
|
275
|
+
priority: 5,
|
|
276
|
+
}, ctx);
|
|
277
|
+
|
|
278
|
+
expect(result.isError).toBe(false);
|
|
279
|
+
expect(result.content).toContain('new action');
|
|
280
|
+
expect(result.content).toContain('Channel: slack');
|
|
281
|
+
expect(result.content).toContain('Category: notifications');
|
|
282
|
+
expect(result.content).toContain('Autonomy: execute automatically');
|
|
283
|
+
expect(result.content).toContain('Priority: 5');
|
|
284
|
+
});
|
|
285
|
+
|
|
286
|
+
test('rejects missing playbook_id', async () => {
|
|
287
|
+
const result = await executePlaybookUpdate({
|
|
288
|
+
trigger: 'new trigger',
|
|
289
|
+
}, ctx);
|
|
290
|
+
|
|
291
|
+
expect(result.isError).toBe(true);
|
|
292
|
+
expect(result.content).toContain('playbook_id is required');
|
|
293
|
+
});
|
|
294
|
+
|
|
295
|
+
test('returns error for nonexistent playbook_id', async () => {
|
|
296
|
+
const result = await executePlaybookUpdate({
|
|
297
|
+
playbook_id: 'nonexistent',
|
|
298
|
+
trigger: 'test',
|
|
299
|
+
}, ctx);
|
|
300
|
+
|
|
301
|
+
expect(result.isError).toBe(true);
|
|
302
|
+
expect(result.content).toContain('not found');
|
|
303
|
+
});
|
|
304
|
+
});
|
|
305
|
+
|
|
306
|
+
// ── playbook_delete ─────────────────────────────────────────────────
|
|
307
|
+
|
|
308
|
+
describe('playbook_delete tool', () => {
|
|
309
|
+
beforeEach(clearPlaybooks);
|
|
310
|
+
|
|
311
|
+
test('deletes a playbook', async () => {
|
|
312
|
+
const createResult = await executePlaybookCreate({
|
|
313
|
+
trigger: 'delete me',
|
|
314
|
+
action: 'to be deleted',
|
|
315
|
+
}, ctx);
|
|
316
|
+
const id = extractPlaybookId(createResult.content);
|
|
317
|
+
|
|
318
|
+
const result = await executePlaybookDelete({ playbook_id: id }, ctx);
|
|
319
|
+
|
|
320
|
+
expect(result.isError).toBe(false);
|
|
321
|
+
expect(result.content).toContain('Playbook deleted');
|
|
322
|
+
expect(result.content).toContain('delete me');
|
|
323
|
+
|
|
324
|
+
// Verify it no longer appears in list
|
|
325
|
+
const listResult = await executePlaybookList({}, ctx);
|
|
326
|
+
expect(listResult.content).toContain('No playbooks found');
|
|
327
|
+
});
|
|
328
|
+
|
|
329
|
+
test('rejects missing playbook_id', async () => {
|
|
330
|
+
const result = await executePlaybookDelete({}, ctx);
|
|
331
|
+
|
|
332
|
+
expect(result.isError).toBe(true);
|
|
333
|
+
expect(result.content).toContain('playbook_id is required');
|
|
334
|
+
});
|
|
335
|
+
|
|
336
|
+
test('returns error for nonexistent playbook_id', async () => {
|
|
337
|
+
const result = await executePlaybookDelete({ playbook_id: 'nonexistent' }, ctx);
|
|
338
|
+
|
|
339
|
+
expect(result.isError).toBe(true);
|
|
340
|
+
expect(result.content).toContain('not found');
|
|
341
|
+
});
|
|
342
|
+
});
|
|
@@ -42,13 +42,14 @@ mock.module('../config/loader.js', () => ({
|
|
|
42
42
|
}));
|
|
43
43
|
|
|
44
44
|
import { estimateTextTokens } from '../context/token-estimator.js';
|
|
45
|
-
import { getDb, initializeDb } from '../memory/db.js';
|
|
45
|
+
import { getDb, initializeDb, resetDb } from '../memory/db.js';
|
|
46
46
|
import { compileDynamicProfile } from '../memory/profile-compiler.js';
|
|
47
47
|
import { memoryItems } from '../memory/schema.js';
|
|
48
48
|
|
|
49
49
|
initializeDb();
|
|
50
50
|
|
|
51
51
|
afterAll(() => {
|
|
52
|
+
resetDb();
|
|
52
53
|
try { rmSync(testDir, { recursive: true }); } catch { /* best effort */ }
|
|
53
54
|
});
|
|
54
55
|
|