vellum 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -2
- package/bun.lock +5 -2
- package/package.json +4 -2
- package/scripts/capture-x-graphql.ts +562 -0
- package/scripts/ipc/check-swift-decoder-drift.ts +2 -1
- package/scripts/test.sh +5 -0
- package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +133 -34
- package/src/__tests__/account-registry.test.ts +2 -1
- package/src/__tests__/agent-heartbeat-service.test.ts +250 -0
- package/src/__tests__/asset-materialize-tool.test.ts +16 -15
- package/src/__tests__/asset-search-tool.test.ts +23 -22
- package/src/__tests__/attachments-store.test.ts +56 -127
- package/src/__tests__/browser-skill-baseline-tool-payload.test.ts +5 -4
- package/src/__tests__/browser-skill-endstate.test.ts +4 -3
- package/src/__tests__/call-bridge.test.ts +385 -0
- package/src/__tests__/call-constants.test.ts +40 -0
- package/src/__tests__/call-orchestrator.test.ts +130 -4
- package/src/__tests__/call-recovery.test.ts +518 -0
- package/src/__tests__/call-routes-http.test.ts +459 -0
- package/src/__tests__/call-state-machine.test.ts +143 -0
- package/src/__tests__/call-store.test.ts +216 -1
- package/src/__tests__/cli-discover.test.ts +1 -1
- package/src/__tests__/commit-message-enrichment-service.test.ts +148 -7
- package/src/__tests__/compaction.benchmark.test.ts +176 -0
- package/src/__tests__/computer-use-tools.test.ts +250 -0
- package/src/__tests__/config-schema.test.ts +299 -3
- package/src/__tests__/conflict-store.test.ts +2 -1
- package/src/__tests__/contacts-tools.test.ts +331 -0
- package/src/__tests__/conversation-store.test.ts +30 -32
- package/src/__tests__/credential-security-invariants.test.ts +4 -0
- package/src/__tests__/date-context.test.ts +373 -0
- package/src/__tests__/db-schedule-syntax-migration.test.ts +129 -0
- package/src/__tests__/fixtures/media-reuse-fixtures.ts +3 -3
- package/src/__tests__/followup-tools.test.ts +303 -0
- package/src/__tests__/handlers-twitter-config.test.ts +718 -0
- package/src/__tests__/intent-routing.test.ts +64 -57
- package/src/__tests__/ipc-roundtrip.benchmark.test.ts +237 -0
- package/src/__tests__/ipc-snapshot.test.ts +62 -28
- package/src/__tests__/llm-usage-store.test.ts +3 -8
- package/src/__tests__/media-generate-image.test.ts +1 -1
- package/src/__tests__/media-reuse-story.e2e.test.ts +7 -7
- package/src/__tests__/memory-retrieval.benchmark.test.ts +430 -0
- package/src/__tests__/parallel-tool.benchmark.test.ts +294 -0
- package/src/__tests__/playbook-tools.test.ts +342 -0
- package/src/__tests__/profile-compiler.test.ts +2 -1
- package/src/__tests__/provider-streaming.benchmark.test.ts +773 -0
- package/src/__tests__/recurrence-engine-rruleset.test.ts +78 -0
- package/src/__tests__/recurrence-engine.test.ts +69 -0
- package/src/__tests__/recurrence-types.test.ts +71 -0
- package/src/__tests__/registry.test.ts +5 -3
- package/src/__tests__/relay-server.test.ts +633 -0
- package/src/__tests__/reminder-store.test.ts +6 -3
- package/src/__tests__/reminder.test.ts +43 -77
- package/src/__tests__/run-orchestrator-assistant-events.test.ts +8 -4
- package/src/__tests__/run-orchestrator.test.ts +4 -4
- package/src/__tests__/runtime-attachment-metadata.test.ts +7 -6
- package/src/__tests__/runtime-runs-http.test.ts +4 -4
- package/src/__tests__/runtime-runs.test.ts +4 -4
- package/src/__tests__/schedule-store.test.ts +482 -0
- package/src/__tests__/schedule-tools.test.ts +700 -0
- package/src/__tests__/scheduler-recurrence.test.ts +329 -0
- package/src/__tests__/server-history-render.test.ts +14 -13
- package/src/__tests__/session-error.test.ts +28 -0
- package/src/__tests__/session-init.benchmark.test.ts +462 -0
- package/src/__tests__/session-queue.test.ts +71 -48
- package/src/__tests__/session-runtime-assembly.test.ts +161 -0
- package/src/__tests__/session-surfaces-task-progress.test.ts +104 -0
- package/src/__tests__/signup-e2e.test.ts +2 -1
- package/src/__tests__/skill-projection.benchmark.test.ts +328 -0
- package/src/__tests__/skill-script-runner.test.ts +159 -0
- package/src/__tests__/speaker-identification.test.ts +52 -0
- package/src/__tests__/subagent-manager-notify.test.ts +42 -10
- package/src/__tests__/subagent-tools.test.ts +141 -41
- package/src/__tests__/task-compiler.test.ts +2 -1
- package/src/__tests__/task-runner.test.ts +2 -1
- package/src/__tests__/task-scheduler.test.ts +2 -1
- package/src/__tests__/task-tools.test.ts +49 -56
- package/src/__tests__/tool-audit-listener.test.ts +1 -0
- package/src/__tests__/tool-domain-event-publisher.test.ts +2 -0
- package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +500 -0
- package/src/__tests__/tool-executor.test.ts +13 -17
- package/src/__tests__/turn-commit.test.ts +218 -3
- package/src/__tests__/twilio-provider.test.ts +143 -0
- package/src/__tests__/twilio-routes.test.ts +789 -0
- package/src/__tests__/twitter-auth-handler.test.ts +581 -0
- package/src/__tests__/view-image-tool.test.ts +217 -0
- package/src/__tests__/workspace-git-service.test.ts +186 -0
- package/src/__tests__/workspace-heartbeat-service.test.ts +13 -3
- package/src/agent-heartbeat/agent-heartbeat-service.ts +155 -0
- package/src/bundler/app-bundler.ts +12 -8
- package/src/calls/call-bridge.ts +95 -0
- package/src/calls/call-constants.ts +43 -5
- package/src/calls/call-domain.ts +276 -0
- package/src/calls/call-orchestrator.ts +43 -17
- package/src/calls/call-recovery.ts +207 -0
- package/src/calls/call-state-machine.ts +68 -0
- package/src/calls/call-store.ts +192 -5
- package/src/calls/relay-server.ts +41 -4
- package/src/calls/speaker-identification.ts +213 -0
- package/src/calls/twilio-provider.ts +10 -6
- package/src/calls/twilio-routes.ts +90 -76
- package/src/calls/types.ts +1 -1
- package/src/cli/config-commands.ts +334 -0
- package/src/cli/core-commands.ts +776 -0
- package/src/cli/doordash.ts +251 -1
- package/src/cli/ipc-client.ts +82 -0
- package/src/cli/map.ts +246 -0
- package/src/cli/twitter.ts +575 -0
- package/src/cli.ts +7 -5
- package/src/commands/__tests__/cc-command-registry.test.ts +319 -0
- package/src/commands/cc-command-registry.ts +209 -0
- package/src/config/bundled-skills/contacts/SKILL.md +39 -0
- package/src/config/bundled-skills/contacts/TOOLS.json +122 -0
- package/src/config/bundled-skills/contacts/tools/contact-merge.ts +9 -0
- package/src/config/bundled-skills/contacts/tools/contact-search.ts +9 -0
- package/src/config/bundled-skills/contacts/tools/contact-upsert.ts +9 -0
- package/src/config/bundled-skills/document/SKILL.md +18 -0
- package/src/config/bundled-skills/document/TOOLS.json +53 -0
- package/src/config/bundled-skills/document/tools/document-create.ts +9 -0
- package/src/config/bundled-skills/document/tools/document-update.ts +9 -0
- package/src/config/bundled-skills/doordash/SKILL.md +82 -23
- package/src/config/bundled-skills/followups/SKILL.md +32 -0
- package/src/config/bundled-skills/followups/TOOLS.json +100 -0
- package/src/config/bundled-skills/followups/tools/followup-create.ts +9 -0
- package/src/config/bundled-skills/followups/tools/followup-list.ts +9 -0
- package/src/config/bundled-skills/followups/tools/followup-resolve.ts +9 -0
- package/src/config/bundled-skills/image-studio/tools/media-generate-image.ts +1 -23
- package/src/config/bundled-skills/messaging/tools/messaging-analyze-style.ts +2 -1
- package/src/config/bundled-skills/playbooks/SKILL.md +31 -0
- package/src/config/bundled-skills/playbooks/TOOLS.json +126 -0
- package/src/config/bundled-skills/playbooks/tools/playbook-create.ts +9 -0
- package/src/config/bundled-skills/playbooks/tools/playbook-delete.ts +9 -0
- package/src/config/bundled-skills/playbooks/tools/playbook-list.ts +9 -0
- package/src/config/bundled-skills/playbooks/tools/playbook-update.ts +9 -0
- package/src/config/bundled-skills/reminder/SKILL.md +20 -0
- package/src/config/bundled-skills/reminder/TOOLS.json +67 -0
- package/src/config/bundled-skills/reminder/tools/reminder-cancel.ts +9 -0
- package/src/config/bundled-skills/reminder/tools/reminder-create.ts +9 -0
- package/src/config/bundled-skills/reminder/tools/reminder-list.ts +9 -0
- package/src/config/bundled-skills/schedule/SKILL.md +74 -0
- package/src/config/bundled-skills/schedule/TOOLS.json +135 -0
- package/src/config/bundled-skills/schedule/tools/schedule-create.ts +9 -0
- package/src/config/bundled-skills/schedule/tools/schedule-delete.ts +9 -0
- package/src/config/bundled-skills/schedule/tools/schedule-list.ts +9 -0
- package/src/config/bundled-skills/schedule/tools/schedule-update.ts +9 -0
- package/src/config/bundled-skills/subagent/SKILL.md +25 -0
- package/src/config/bundled-skills/subagent/TOOLS.json +107 -0
- package/src/config/bundled-skills/subagent/tools/subagent-abort.ts +9 -0
- package/src/config/bundled-skills/subagent/tools/subagent-message.ts +9 -0
- package/src/config/bundled-skills/subagent/tools/subagent-read.ts +9 -0
- package/src/config/bundled-skills/subagent/tools/subagent-spawn.ts +9 -0
- package/src/config/bundled-skills/subagent/tools/subagent-status.ts +9 -0
- package/src/config/bundled-skills/tasks/SKILL.md +28 -0
- package/src/config/bundled-skills/tasks/TOOLS.json +256 -0
- package/src/config/bundled-skills/tasks/tools/task-delete.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-list-add.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-list-remove.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-list-show.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-list-update.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-list.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-run.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-save.ts +9 -0
- package/src/config/bundled-skills/twitter/SKILL.md +134 -0
- package/src/config/bundled-skills/watcher/SKILL.md +27 -0
- package/src/config/bundled-skills/watcher/TOOLS.json +147 -0
- package/src/config/bundled-skills/watcher/tools/watcher-create.ts +9 -0
- package/src/config/bundled-skills/watcher/tools/watcher-delete.ts +9 -0
- package/src/config/bundled-skills/watcher/tools/watcher-digest.ts +9 -0
- package/src/config/bundled-skills/watcher/tools/watcher-list.ts +9 -0
- package/src/config/bundled-skills/watcher/tools/watcher-update.ts +9 -0
- package/src/config/defaults.ts +33 -0
- package/src/config/loader.ts +4 -1
- package/src/config/schema.ts +161 -1
- package/src/config/system-prompt.ts +61 -16
- package/src/config/templates/IDENTITY.md +7 -0
- package/src/config/types.ts +4 -0
- package/src/contacts/contact-store.ts +4 -4
- package/src/daemon/assistant-attachments.ts +10 -0
- package/src/daemon/classifier.ts +3 -1
- package/src/daemon/computer-use-session.ts +3 -1
- package/src/daemon/date-context.ts +136 -0
- package/src/daemon/handlers/apps.ts +16 -1
- package/src/daemon/handlers/browser.ts +54 -0
- package/src/daemon/handlers/computer-use.ts +7 -1
- package/src/daemon/handlers/config.ts +163 -5
- package/src/daemon/handlers/diagnostics.ts +5 -1
- package/src/daemon/handlers/documents.ts +18 -29
- package/src/daemon/handlers/home-base.ts +5 -1
- package/src/daemon/handlers/index.ts +40 -277
- package/src/daemon/handlers/misc.ts +9 -1
- package/src/daemon/handlers/publish.ts +6 -1
- package/src/daemon/handlers/sessions.ts +65 -12
- package/src/daemon/handlers/shared.ts +36 -1
- package/src/daemon/handlers/signing.ts +37 -0
- package/src/daemon/handlers/skills.ts +20 -6
- package/src/daemon/handlers/subagents.ts +8 -3
- package/src/daemon/handlers/twitter-auth.ts +169 -0
- package/src/daemon/handlers/work-items.ts +384 -68
- package/src/daemon/ipc-contract-inventory.json +28 -4
- package/src/daemon/ipc-contract.ts +133 -37
- package/src/daemon/ipc-protocol.ts +7 -2
- package/src/daemon/lifecycle.ts +21 -0
- package/src/daemon/main.ts +10 -4
- package/src/daemon/ride-shotgun-handler.ts +74 -10
- package/src/daemon/server.ts +143 -26
- package/src/daemon/session-agent-loop.ts +887 -0
- package/src/daemon/session-attachments.ts +28 -5
- package/src/daemon/session-error.ts +24 -3
- package/src/daemon/session-lifecycle.ts +147 -0
- package/src/daemon/session-media-retry.ts +147 -0
- package/src/daemon/session-messaging.ts +145 -0
- package/src/daemon/session-notifiers.ts +164 -0
- package/src/daemon/session-process.ts +2 -2
- package/src/daemon/session-queue-manager.ts +1 -0
- package/src/daemon/session-runtime-assembly.ts +52 -0
- package/src/daemon/session-skill-tools.ts +124 -5
- package/src/daemon/session-slash.ts +3 -0
- package/src/daemon/session-surfaces.ts +77 -2
- package/src/daemon/session-tool-setup.ts +216 -2
- package/src/daemon/session-usage.ts +0 -2
- package/src/daemon/session.ts +114 -1404
- package/src/daemon/video-thumbnail.ts +60 -0
- package/src/doordash/client.ts +121 -27
- package/src/doordash/queries.ts +1 -2
- package/src/export/formatter.ts +3 -1
- package/src/followups/followup-store.ts +4 -2
- package/src/followups/types.ts +6 -0
- package/src/hooks/templates.ts +1 -1
- package/src/index.ts +32 -1153
- package/src/memory/attachments-store.ts +28 -83
- package/src/memory/channel-delivery-store.ts +7 -21
- package/src/memory/clarification-resolver.ts +6 -5
- package/src/memory/contradiction-checker.ts +3 -2
- package/src/memory/conversation-key-store.ts +10 -29
- package/src/memory/conversation-store.ts +2 -1
- package/src/memory/db.ts +96 -2
- package/src/memory/entity-extractor.ts +6 -3
- package/src/memory/items-extractor.ts +5 -4
- package/src/memory/jobs-store.ts +3 -2
- package/src/memory/llm-usage-store.ts +1 -2
- package/src/memory/runs-store.ts +1 -2
- package/src/memory/schema.ts +23 -2
- package/src/messaging/style-analyzer.ts +3 -2
- package/src/messaging/thread-summarizer.ts +8 -12
- package/src/messaging/triage-engine.ts +4 -2
- package/src/providers/openrouter/client.ts +20 -0
- package/src/providers/registry.ts +8 -0
- package/src/runtime/http-server.ts +108 -20
- package/src/runtime/routes/attachment-routes.ts +2 -3
- package/src/runtime/routes/call-routes.ts +140 -0
- package/src/runtime/routes/channel-routes.ts +5 -10
- package/src/runtime/routes/conversation-routes.ts +5 -5
- package/src/runtime/routes/run-routes.ts +2 -2
- package/src/runtime/run-orchestrator.ts +9 -3
- package/src/schedule/recurrence-engine.ts +138 -0
- package/src/schedule/recurrence-types.ts +67 -0
- package/src/schedule/schedule-store.ts +102 -57
- package/src/schedule/scheduler.ts +9 -6
- package/src/security/oauth2.ts +29 -4
- package/src/security/secret-allowlist.ts +46 -0
- package/src/skills/clawhub.ts +1 -1
- package/src/subagent/manager.ts +40 -8
- package/src/swarm/backend-claude-code.ts +64 -9
- package/src/swarm/worker-prompts.ts +2 -1
- package/src/tasks/SPEC.md +34 -28
- package/src/tasks/ephemeral-permissions.ts +16 -7
- package/src/tasks/task-compiler.ts +5 -4
- package/src/tasks/task-runner.ts +10 -5
- package/src/tasks/task-scheduler.ts +1 -1
- package/src/tasks/tool-sanitizer.ts +36 -0
- package/src/tools/assets/search.ts +4 -4
- package/src/tools/browser/api-map.ts +220 -0
- package/src/tools/browser/auto-navigate.ts +270 -0
- package/src/tools/browser/browser-execution.ts +2 -1
- package/src/tools/browser/browser-manager.ts +2 -2
- package/src/tools/browser/network-recorder.ts +5 -4
- package/src/tools/browser/x-auto-navigate.ts +207 -0
- package/src/tools/calls/call-end.ts +17 -67
- package/src/tools/calls/call-start.ts +24 -85
- package/src/tools/calls/call-status.ts +35 -51
- package/src/tools/claude-code/claude-code.ts +77 -11
- package/src/tools/contacts/contact-merge.ts +46 -78
- package/src/tools/contacts/contact-search.ts +35 -79
- package/src/tools/contacts/contact-upsert.ts +35 -108
- package/src/tools/credentials/vault.ts +20 -4
- package/src/tools/document/document-tool.ts +71 -144
- package/src/tools/executor.ts +129 -10
- package/src/tools/followups/followup_create.ts +46 -88
- package/src/tools/followups/followup_list.ts +34 -74
- package/src/tools/followups/followup_resolve.ts +31 -66
- package/src/tools/host-terminal/cli-discover.ts +2 -1
- package/src/tools/host-terminal/host-shell.ts +10 -0
- package/src/tools/memory/handlers.ts +5 -4
- package/src/tools/network/__tests__/web-search.test.ts +427 -0
- package/src/tools/network/script-proxy/__tests__/logging.test.ts +248 -0
- package/src/tools/network/script-proxy/__tests__/policy.test.ts +234 -0
- package/src/tools/network/script-proxy/__tests__/router.test.ts +76 -0
- package/src/tools/network/web-fetch.ts +18 -6
- package/src/tools/playbooks/index.ts +4 -5
- package/src/tools/playbooks/playbook-create.ts +3 -47
- package/src/tools/playbooks/playbook-delete.ts +1 -25
- package/src/tools/playbooks/playbook-list.ts +1 -28
- package/src/tools/playbooks/playbook-update.ts +3 -51
- package/src/tools/reminder/reminder.ts +5 -78
- package/src/tools/schedule/create.ts +69 -74
- package/src/tools/schedule/delete.ts +21 -47
- package/src/tools/schedule/list.ts +55 -74
- package/src/tools/schedule/update.ts +77 -84
- package/src/tools/subagent/abort.ts +29 -58
- package/src/tools/subagent/message.ts +30 -63
- package/src/tools/subagent/read.ts +53 -84
- package/src/tools/subagent/spawn.ts +43 -82
- package/src/tools/subagent/status.ts +42 -71
- package/src/tools/swarm/delegate.ts +2 -1
- package/src/tools/tasks/index.ts +8 -8
- package/src/tools/tasks/task-delete.ts +60 -88
- package/src/tools/tasks/task-list.ts +31 -52
- package/src/tools/tasks/task-run.ts +72 -108
- package/src/tools/tasks/task-save.ts +33 -65
- package/src/tools/tasks/work-item-enqueue.ts +183 -215
- package/src/tools/tasks/work-item-list.ts +33 -63
- package/src/tools/tasks/work-item-remove.ts +45 -97
- package/src/tools/tasks/work-item-update.ts +91 -163
- package/src/tools/terminal/backends/native.ts +3 -1
- package/src/tools/tool-manifest.ts +0 -62
- package/src/tools/types.ts +6 -0
- package/src/tools/ui-surface/definitions.ts +3 -1
- package/src/tools/watch/screen-watch.ts +3 -1
- package/src/tools/watcher/create.ts +52 -98
- package/src/tools/watcher/delete.ts +20 -46
- package/src/tools/watcher/digest.ts +36 -70
- package/src/tools/watcher/list.ts +49 -79
- package/src/tools/watcher/update.ts +45 -91
- package/src/twitter/client.ts +690 -0
- package/src/twitter/session.ts +91 -0
- package/src/usage/types.ts +0 -1
- package/src/util/truncate.ts +6 -0
- package/src/watcher/providers/slack.ts +2 -1
- package/src/watcher/watcher-store.ts +3 -2
- package/src/work-items/work-item-store.ts +27 -2
- package/src/workspace/commit-message-enrichment-service.ts +31 -7
- package/src/workspace/git-service.ts +87 -22
- package/src/workspace/provider-commit-message-generator.ts +242 -0
- package/src/workspace/turn-commit.ts +62 -3
- package/src/tools/contacts/index.ts +0 -4
- package/src/tools/document/index.ts +0 -5
- package/src/tools/followups/index.ts +0 -3
- package/src/tools/subagent/index.ts +0 -5
- /package/src/__tests__/{memory-context-benchmark.test.ts → memory-context-benchmark.benchmark.test.ts} +0 -0
|
@@ -0,0 +1,773 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Provider Streaming Benchmark
|
|
3
|
+
*
|
|
4
|
+
* Measures overhead introduced by the provider adapter layers (retry, failover,
|
|
5
|
+
* stream timeout) on top of a simulated streaming source.
|
|
6
|
+
*
|
|
7
|
+
* Baseline targets:
|
|
8
|
+
* - TTFT overhead < 50ms beyond source latency
|
|
9
|
+
* - Event throughput within 20% of source rate through provider wrappers
|
|
10
|
+
* - Abort signal stops streaming within 100ms
|
|
11
|
+
* - Stream timeout fires within 50ms of configured deadline
|
|
12
|
+
*/
|
|
13
|
+
import { describe, test, expect, mock } from 'bun:test';
|
|
14
|
+
|
|
15
|
+
mock.module('../util/logger.js', () => ({
|
|
16
|
+
getLogger: () =>
|
|
17
|
+
new Proxy({} as Record<string, unknown>, { get: () => () => {} }),
|
|
18
|
+
isDebug: () => false,
|
|
19
|
+
}));
|
|
20
|
+
|
|
21
|
+
import { createStreamTimeout } from '../providers/stream-timeout.js';
|
|
22
|
+
import { RetryProvider } from '../providers/retry.js';
|
|
23
|
+
import { FailoverProvider } from '../providers/failover.js';
|
|
24
|
+
import type {
|
|
25
|
+
Provider,
|
|
26
|
+
ProviderResponse,
|
|
27
|
+
SendMessageOptions,
|
|
28
|
+
Message,
|
|
29
|
+
ToolDefinition,
|
|
30
|
+
ProviderEvent,
|
|
31
|
+
} from '../providers/types.js';
|
|
32
|
+
import { ProviderError } from '../util/errors.js';
|
|
33
|
+
|
|
34
|
+
// ---------------------------------------------------------------------------
|
|
35
|
+
// Helpers
|
|
36
|
+
// ---------------------------------------------------------------------------
|
|
37
|
+
|
|
38
|
+
const SIMPLE_MESSAGES: Message[] = [
|
|
39
|
+
{ role: 'user', content: [{ type: 'text', text: 'Hello' }] },
|
|
40
|
+
];
|
|
41
|
+
|
|
42
|
+
// Dummy key for mock server tests — not a real credential
|
|
43
|
+
const BENCH_API_KEY = ['test', 'benchmark', 'key'].join('-');
|
|
44
|
+
|
|
45
|
+
/** Build a mock provider that delivers `tokenCount` text deltas at a given rate. */
|
|
46
|
+
function makeStreamingProvider(
|
|
47
|
+
tokenCount: number,
|
|
48
|
+
tokensPerSecond: number,
|
|
49
|
+
opts?: { ttftMs?: number; name?: string },
|
|
50
|
+
): Provider {
|
|
51
|
+
const delayPerToken = 1000 / tokensPerSecond;
|
|
52
|
+
const ttftMs = opts?.ttftMs ?? 0;
|
|
53
|
+
|
|
54
|
+
return {
|
|
55
|
+
name: opts?.name ?? 'mock-streaming',
|
|
56
|
+
async sendMessage(
|
|
57
|
+
_messages: Message[],
|
|
58
|
+
_tools?: ToolDefinition[],
|
|
59
|
+
_systemPrompt?: string,
|
|
60
|
+
options?: SendMessageOptions,
|
|
61
|
+
): Promise<ProviderResponse> {
|
|
62
|
+
const { onEvent, signal } = options ?? {};
|
|
63
|
+
|
|
64
|
+
// Simulate TTFT delay
|
|
65
|
+
if (ttftMs > 0) {
|
|
66
|
+
await new Promise((r) => setTimeout(r, ttftMs));
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
for (let i = 0; i < tokenCount; i++) {
|
|
70
|
+
if (signal?.aborted) break;
|
|
71
|
+
onEvent?.({ type: 'text_delta', text: `word${i} ` });
|
|
72
|
+
if (i < tokenCount - 1) {
|
|
73
|
+
await new Promise((r) => setTimeout(r, delayPerToken));
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
return {
|
|
78
|
+
content: [{ type: 'text', text: 'complete' }],
|
|
79
|
+
model: 'mock',
|
|
80
|
+
usage: { inputTokens: 10, outputTokens: tokenCount },
|
|
81
|
+
stopReason: 'end_turn',
|
|
82
|
+
};
|
|
83
|
+
},
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/** Build a provider that always fails with a given error. */
|
|
88
|
+
function makeFailingProvider(name: string, statusCode?: number): Provider {
|
|
89
|
+
return {
|
|
90
|
+
name,
|
|
91
|
+
async sendMessage(): Promise<ProviderResponse> {
|
|
92
|
+
throw new ProviderError(`${name} failed`, name, statusCode);
|
|
93
|
+
},
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// ---------------------------------------------------------------------------
|
|
98
|
+
// Benchmarks
|
|
99
|
+
// ---------------------------------------------------------------------------
|
|
100
|
+
|
|
101
|
+
describe('Provider streaming benchmark', () => {
|
|
102
|
+
test('TTFT overhead through RetryProvider is < 50ms', async () => {
|
|
103
|
+
const sourceTtftMs = 20;
|
|
104
|
+
const inner = makeStreamingProvider(10, 100, { ttftMs: sourceTtftMs });
|
|
105
|
+
const wrapped = new RetryProvider(inner);
|
|
106
|
+
|
|
107
|
+
let firstEventTime: number | undefined;
|
|
108
|
+
const start = performance.now();
|
|
109
|
+
|
|
110
|
+
await wrapped.sendMessage(SIMPLE_MESSAGES, undefined, undefined, {
|
|
111
|
+
onEvent: () => {
|
|
112
|
+
if (firstEventTime === undefined) {
|
|
113
|
+
firstEventTime = performance.now();
|
|
114
|
+
}
|
|
115
|
+
},
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
expect(firstEventTime).toBeDefined();
|
|
119
|
+
const observedTtft = firstEventTime! - start;
|
|
120
|
+
const overhead = observedTtft - sourceTtftMs;
|
|
121
|
+
|
|
122
|
+
// The wrapper should add negligible latency
|
|
123
|
+
expect(overhead).toBeLessThan(50);
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
test('TTFT overhead through FailoverProvider is < 50ms', async () => {
|
|
127
|
+
const sourceTtftMs = 20;
|
|
128
|
+
const inner = makeStreamingProvider(10, 100, {
|
|
129
|
+
ttftMs: sourceTtftMs,
|
|
130
|
+
name: 'primary',
|
|
131
|
+
});
|
|
132
|
+
const fallback = makeStreamingProvider(10, 100, {
|
|
133
|
+
ttftMs: sourceTtftMs,
|
|
134
|
+
name: 'fallback',
|
|
135
|
+
});
|
|
136
|
+
const wrapped = new FailoverProvider([inner, fallback]);
|
|
137
|
+
|
|
138
|
+
let firstEventTime: number | undefined;
|
|
139
|
+
const start = performance.now();
|
|
140
|
+
|
|
141
|
+
await wrapped.sendMessage(SIMPLE_MESSAGES, undefined, undefined, {
|
|
142
|
+
onEvent: () => {
|
|
143
|
+
if (firstEventTime === undefined) {
|
|
144
|
+
firstEventTime = performance.now();
|
|
145
|
+
}
|
|
146
|
+
},
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
expect(firstEventTime).toBeDefined();
|
|
150
|
+
const observedTtft = firstEventTime! - start;
|
|
151
|
+
const overhead = observedTtft - sourceTtftMs;
|
|
152
|
+
|
|
153
|
+
expect(overhead).toBeLessThan(50);
|
|
154
|
+
});
|
|
155
|
+
|
|
156
|
+
test('event throughput through provider wrappers is within 20% of source rate', async () => {
|
|
157
|
+
const tokenCount = 50;
|
|
158
|
+
const sourceRate = 200; // tokens/sec
|
|
159
|
+
|
|
160
|
+
// Measure unwrapped baseline in the same run so we compare against actual
|
|
161
|
+
// timer resolution rather than the theoretical sourceRate (which setTimeout
|
|
162
|
+
// may not achieve on busy or coarse-timer hosts).
|
|
163
|
+
const baseline = makeStreamingProvider(tokenCount, sourceRate);
|
|
164
|
+
const baselineEvents: number[] = [];
|
|
165
|
+
const baselineStart = performance.now();
|
|
166
|
+
|
|
167
|
+
await baseline.sendMessage(SIMPLE_MESSAGES, undefined, undefined, {
|
|
168
|
+
onEvent: () => {
|
|
169
|
+
baselineEvents.push(performance.now());
|
|
170
|
+
},
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
const baselineElapsed = baselineEvents[baselineEvents.length - 1] - baselineStart;
|
|
174
|
+
const baselineRate = (baselineEvents.length / baselineElapsed) * 1000;
|
|
175
|
+
|
|
176
|
+
// Now measure the wrapped provider
|
|
177
|
+
const inner = makeStreamingProvider(tokenCount, sourceRate);
|
|
178
|
+
const wrapped = new RetryProvider(inner);
|
|
179
|
+
|
|
180
|
+
const events: number[] = [];
|
|
181
|
+
const start = performance.now();
|
|
182
|
+
|
|
183
|
+
await wrapped.sendMessage(SIMPLE_MESSAGES, undefined, undefined, {
|
|
184
|
+
onEvent: () => {
|
|
185
|
+
events.push(performance.now());
|
|
186
|
+
},
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
const elapsed = events[events.length - 1] - start;
|
|
190
|
+
const observedRate = (events.length / elapsed) * 1000;
|
|
191
|
+
|
|
192
|
+
expect(events.length).toBe(tokenCount);
|
|
193
|
+
|
|
194
|
+
// Wrapped throughput should be within 20% of the measured unwrapped baseline
|
|
195
|
+
const minAcceptableRate = baselineRate * 0.8;
|
|
196
|
+
expect(observedRate).toBeGreaterThanOrEqual(minAcceptableRate);
|
|
197
|
+
});
|
|
198
|
+
|
|
199
|
+
test('failover adds < 100ms overhead when primary provider fails', async () => {
|
|
200
|
+
const failing = makeFailingProvider('failing-primary', 500);
|
|
201
|
+
const healthy = makeStreamingProvider(5, 100, { name: 'healthy-fallback' });
|
|
202
|
+
|
|
203
|
+
// Measure the fallback provider's baseline execution time directly so we
|
|
204
|
+
// can isolate the failover overhead from the stream's own runtime.
|
|
205
|
+
const baselineEvents: ProviderEvent[] = [];
|
|
206
|
+
const baselineStart = performance.now();
|
|
207
|
+
|
|
208
|
+
await healthy.sendMessage(SIMPLE_MESSAGES, undefined, undefined, {
|
|
209
|
+
onEvent: (e) => baselineEvents.push(e),
|
|
210
|
+
});
|
|
211
|
+
|
|
212
|
+
const baselineElapsed = performance.now() - baselineStart;
|
|
213
|
+
|
|
214
|
+
// Now measure through the FailoverProvider (primary fails, falls back)
|
|
215
|
+
const healthy2 = makeStreamingProvider(5, 100, { name: 'healthy-fallback' });
|
|
216
|
+
const wrapped = new FailoverProvider([failing, healthy2]);
|
|
217
|
+
|
|
218
|
+
const events: ProviderEvent[] = [];
|
|
219
|
+
const start = performance.now();
|
|
220
|
+
|
|
221
|
+
await wrapped.sendMessage(SIMPLE_MESSAGES, undefined, undefined, {
|
|
222
|
+
onEvent: (e) => events.push(e),
|
|
223
|
+
});
|
|
224
|
+
|
|
225
|
+
const elapsed = performance.now() - start;
|
|
226
|
+
expect(events.length).toBe(5);
|
|
227
|
+
|
|
228
|
+
// Isolate the failover overhead by subtracting the fallback stream's baseline
|
|
229
|
+
const failoverOverhead = elapsed - baselineElapsed;
|
|
230
|
+
expect(failoverOverhead).toBeLessThan(100);
|
|
231
|
+
});
|
|
232
|
+
|
|
233
|
+
test('createStreamTimeout fires within 50ms of configured deadline', async () => {
|
|
234
|
+
const timeoutMs = 100;
|
|
235
|
+
const { signal, cleanup } = createStreamTimeout(timeoutMs);
|
|
236
|
+
|
|
237
|
+
const start = performance.now();
|
|
238
|
+
|
|
239
|
+
await new Promise<void>((resolve) => {
|
|
240
|
+
signal.addEventListener('abort', () => resolve(), { once: true });
|
|
241
|
+
});
|
|
242
|
+
|
|
243
|
+
const elapsed = performance.now() - start;
|
|
244
|
+
cleanup();
|
|
245
|
+
|
|
246
|
+
// Should fire close to the configured timeout
|
|
247
|
+
expect(elapsed).toBeGreaterThanOrEqual(timeoutMs - 10); // allow 10ms early
|
|
248
|
+
expect(elapsed).toBeLessThan(timeoutMs + 50);
|
|
249
|
+
});
|
|
250
|
+
|
|
251
|
+
test('external abort signal propagates through createStreamTimeout within 10ms', async () => {
|
|
252
|
+
const externalController = new AbortController();
|
|
253
|
+
const { signal, cleanup } = createStreamTimeout(60_000, externalController.signal);
|
|
254
|
+
|
|
255
|
+
const abortDelay = 50;
|
|
256
|
+
|
|
257
|
+
const start = performance.now();
|
|
258
|
+
setTimeout(() => externalController.abort(new Error('user cancel')), abortDelay);
|
|
259
|
+
|
|
260
|
+
await new Promise<void>((resolve) => {
|
|
261
|
+
signal.addEventListener('abort', () => resolve(), { once: true });
|
|
262
|
+
});
|
|
263
|
+
|
|
264
|
+
const elapsed = performance.now() - start;
|
|
265
|
+
cleanup();
|
|
266
|
+
|
|
267
|
+
// Should propagate almost immediately after external abort
|
|
268
|
+
expect(elapsed).toBeGreaterThanOrEqual(abortDelay - 10);
|
|
269
|
+
expect(elapsed).toBeLessThan(abortDelay + 10);
|
|
270
|
+
});
|
|
271
|
+
|
|
272
|
+
test('abort signal stops streaming provider within 100ms', async () => {
|
|
273
|
+
// Provider that would stream 200 tokens at 50/sec (4 seconds total)
|
|
274
|
+
const inner = makeStreamingProvider(200, 50);
|
|
275
|
+
const wrapped = new RetryProvider(inner);
|
|
276
|
+
|
|
277
|
+
const controller = new AbortController();
|
|
278
|
+
const events: ProviderEvent[] = [];
|
|
279
|
+
|
|
280
|
+
// Abort after 100ms — should stop well before all 200 tokens
|
|
281
|
+
const abortAfterMs = 100;
|
|
282
|
+
setTimeout(() => controller.abort(), abortAfterMs);
|
|
283
|
+
|
|
284
|
+
const start = performance.now();
|
|
285
|
+
|
|
286
|
+
await wrapped.sendMessage(SIMPLE_MESSAGES, undefined, undefined, {
|
|
287
|
+
onEvent: (e) => events.push(e),
|
|
288
|
+
signal: controller.signal,
|
|
289
|
+
});
|
|
290
|
+
|
|
291
|
+
const elapsed = performance.now() - start;
|
|
292
|
+
|
|
293
|
+
// Should have stopped well before all 200 tokens
|
|
294
|
+
expect(events.length).toBeLessThan(200);
|
|
295
|
+
// Should complete within 100ms of abort signal (abort at 100ms + 100ms grace)
|
|
296
|
+
expect(elapsed).toBeLessThan(abortAfterMs + 100);
|
|
297
|
+
});
|
|
298
|
+
|
|
299
|
+
test('SSE event parsing throughput via Bun.serve mock', async () => {
|
|
300
|
+
const tokenCount = 100;
|
|
301
|
+
const encoder = new TextEncoder();
|
|
302
|
+
|
|
303
|
+
// Start a local SSE server
|
|
304
|
+
const server = Bun.serve({
|
|
305
|
+
port: 0,
|
|
306
|
+
fetch() {
|
|
307
|
+
const stream = new ReadableStream({
|
|
308
|
+
async start(controller) {
|
|
309
|
+
for (let i = 0; i < tokenCount; i++) {
|
|
310
|
+
const event = `event: content_block_delta\ndata: ${JSON.stringify({
|
|
311
|
+
type: 'content_block_delta',
|
|
312
|
+
index: 0,
|
|
313
|
+
delta: { type: 'text_delta', text: `word${i} ` },
|
|
314
|
+
})}\n\n`;
|
|
315
|
+
controller.enqueue(encoder.encode(event));
|
|
316
|
+
}
|
|
317
|
+
// Send stop event
|
|
318
|
+
controller.enqueue(
|
|
319
|
+
encoder.encode(
|
|
320
|
+
`event: message_stop\ndata: ${JSON.stringify({ type: 'message_stop' })}\n\n`,
|
|
321
|
+
),
|
|
322
|
+
);
|
|
323
|
+
controller.close();
|
|
324
|
+
},
|
|
325
|
+
});
|
|
326
|
+
|
|
327
|
+
return new Response(stream, {
|
|
328
|
+
headers: { 'Content-Type': 'text/event-stream' },
|
|
329
|
+
});
|
|
330
|
+
},
|
|
331
|
+
});
|
|
332
|
+
|
|
333
|
+
try {
|
|
334
|
+
const start = performance.now();
|
|
335
|
+
|
|
336
|
+
const response = await fetch(`http://localhost:${server.port}`);
|
|
337
|
+
const reader = response.body!.getReader();
|
|
338
|
+
const decoder = new TextDecoder();
|
|
339
|
+
|
|
340
|
+
let buffer = '';
|
|
341
|
+
let eventCount = 0;
|
|
342
|
+
let firstEventTime: number | undefined;
|
|
343
|
+
|
|
344
|
+
while (true) {
|
|
345
|
+
const { done, value } = await reader.read();
|
|
346
|
+
if (done) break;
|
|
347
|
+
|
|
348
|
+
buffer += decoder.decode(value, { stream: true });
|
|
349
|
+
|
|
350
|
+
// Parse SSE events from buffer
|
|
351
|
+
const parts = buffer.split('\n\n');
|
|
352
|
+
buffer = parts.pop()!; // keep incomplete last part
|
|
353
|
+
|
|
354
|
+
for (const part of parts) {
|
|
355
|
+
if (!part.trim()) continue;
|
|
356
|
+
const dataLine = part
|
|
357
|
+
.split('\n')
|
|
358
|
+
.find((l) => l.startsWith('data: '));
|
|
359
|
+
if (!dataLine) continue;
|
|
360
|
+
|
|
361
|
+
const json = JSON.parse(dataLine.slice(6));
|
|
362
|
+
if (json.type === 'content_block_delta') {
|
|
363
|
+
eventCount++;
|
|
364
|
+
if (firstEventTime === undefined) {
|
|
365
|
+
firstEventTime = performance.now();
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
const elapsed = performance.now() - start;
|
|
372
|
+
const eventsPerSecond = (eventCount / elapsed) * 1000;
|
|
373
|
+
|
|
374
|
+
// All events should be parsed
|
|
375
|
+
expect(eventCount).toBe(tokenCount);
|
|
376
|
+
|
|
377
|
+
// TTFT from server should be < 50ms (no artificial delay)
|
|
378
|
+
expect(firstEventTime! - start).toBeLessThan(50);
|
|
379
|
+
|
|
380
|
+
// Throughput: at least 1000 events/sec for local SSE parsing
|
|
381
|
+
// (no network latency, just parsing overhead)
|
|
382
|
+
expect(eventsPerSecond).toBeGreaterThan(1000);
|
|
383
|
+
} finally {
|
|
384
|
+
server.stop();
|
|
385
|
+
}
|
|
386
|
+
});
|
|
387
|
+
|
|
388
|
+
test('stream timeout cleanup prevents late abort', async () => {
|
|
389
|
+
// Create a timeout that would fire in 100ms
|
|
390
|
+
const { signal, cleanup } = createStreamTimeout(100);
|
|
391
|
+
|
|
392
|
+
// Clean up before it fires
|
|
393
|
+
cleanup();
|
|
394
|
+
|
|
395
|
+
// Wait past the original timeout
|
|
396
|
+
await new Promise((r) => setTimeout(r, 150));
|
|
397
|
+
|
|
398
|
+
// Signal should NOT have been aborted since we cleaned up
|
|
399
|
+
expect(signal.aborted).toBe(false);
|
|
400
|
+
});
|
|
401
|
+
|
|
402
|
+
test('TTFT through Anthropic SDK adapter with mock SSE server', async () => {
|
|
403
|
+
const Anthropic = (await import('@anthropic-ai/sdk')).default;
|
|
404
|
+
const tokenCount = 20;
|
|
405
|
+
const encoder = new TextEncoder();
|
|
406
|
+
|
|
407
|
+
// Full Anthropic-format SSE response
|
|
408
|
+
function buildAnthropicSSE(count: number): string[] {
|
|
409
|
+
const events: string[] = [];
|
|
410
|
+
|
|
411
|
+
events.push(`event: message_start\ndata: ${JSON.stringify({
|
|
412
|
+
type: 'message_start',
|
|
413
|
+
message: {
|
|
414
|
+
id: 'msg_bench_01',
|
|
415
|
+
type: 'message',
|
|
416
|
+
role: 'assistant',
|
|
417
|
+
content: [],
|
|
418
|
+
model: 'claude-3-5-sonnet-20241022',
|
|
419
|
+
stop_reason: null,
|
|
420
|
+
stop_sequence: null,
|
|
421
|
+
usage: { input_tokens: 10, output_tokens: 1 },
|
|
422
|
+
},
|
|
423
|
+
})}\n\n`);
|
|
424
|
+
|
|
425
|
+
events.push(`event: content_block_start\ndata: ${JSON.stringify({
|
|
426
|
+
type: 'content_block_start',
|
|
427
|
+
index: 0,
|
|
428
|
+
content_block: { type: 'text', text: '' },
|
|
429
|
+
})}\n\n`);
|
|
430
|
+
|
|
431
|
+
for (let i = 0; i < count; i++) {
|
|
432
|
+
events.push(`event: content_block_delta\ndata: ${JSON.stringify({
|
|
433
|
+
type: 'content_block_delta',
|
|
434
|
+
index: 0,
|
|
435
|
+
delta: { type: 'text_delta', text: `word${i} ` },
|
|
436
|
+
})}\n\n`);
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
events.push(`event: content_block_stop\ndata: ${JSON.stringify({
|
|
440
|
+
type: 'content_block_stop',
|
|
441
|
+
index: 0,
|
|
442
|
+
})}\n\n`);
|
|
443
|
+
|
|
444
|
+
events.push(`event: message_delta\ndata: ${JSON.stringify({
|
|
445
|
+
type: 'message_delta',
|
|
446
|
+
delta: { stop_reason: 'end_turn', stop_sequence: null },
|
|
447
|
+
usage: { output_tokens: count },
|
|
448
|
+
})}\n\n`);
|
|
449
|
+
|
|
450
|
+
events.push(`event: message_stop\ndata: ${JSON.stringify({
|
|
451
|
+
type: 'message_stop',
|
|
452
|
+
})}\n\n`);
|
|
453
|
+
|
|
454
|
+
return events;
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
const server = Bun.serve({
|
|
458
|
+
port: 0,
|
|
459
|
+
fetch() {
|
|
460
|
+
const sseEvents = buildAnthropicSSE(tokenCount);
|
|
461
|
+
const stream = new ReadableStream({
|
|
462
|
+
start(controller) {
|
|
463
|
+
for (const evt of sseEvents) {
|
|
464
|
+
controller.enqueue(encoder.encode(evt));
|
|
465
|
+
}
|
|
466
|
+
controller.close();
|
|
467
|
+
},
|
|
468
|
+
});
|
|
469
|
+
return new Response(stream, {
|
|
470
|
+
headers: { 'Content-Type': 'text/event-stream' },
|
|
471
|
+
});
|
|
472
|
+
},
|
|
473
|
+
});
|
|
474
|
+
|
|
475
|
+
try {
|
|
476
|
+
const client = new Anthropic({
|
|
477
|
+
apiKey: BENCH_API_KEY,
|
|
478
|
+
baseURL: `http://localhost:${server.port}`,
|
|
479
|
+
});
|
|
480
|
+
|
|
481
|
+
let firstEventTime: number | undefined;
|
|
482
|
+
const start = performance.now();
|
|
483
|
+
|
|
484
|
+
const sdkStream = client.messages.stream({
|
|
485
|
+
model: 'claude-3-5-sonnet-20241022',
|
|
486
|
+
max_tokens: 1024,
|
|
487
|
+
messages: [{ role: 'user', content: 'Hello' }],
|
|
488
|
+
});
|
|
489
|
+
|
|
490
|
+
sdkStream.on('text', () => {
|
|
491
|
+
if (firstEventTime === undefined) {
|
|
492
|
+
firstEventTime = performance.now();
|
|
493
|
+
}
|
|
494
|
+
});
|
|
495
|
+
|
|
496
|
+
await sdkStream.finalMessage();
|
|
497
|
+
|
|
498
|
+
expect(firstEventTime).toBeDefined();
|
|
499
|
+
const ttft = firstEventTime! - start;
|
|
500
|
+
|
|
501
|
+
// TTFT through the full SDK adapter should be < 100ms with a local mock
|
|
502
|
+
expect(ttft).toBeLessThan(100);
|
|
503
|
+
} finally {
|
|
504
|
+
server.stop();
|
|
505
|
+
}
|
|
506
|
+
});
|
|
507
|
+
|
|
508
|
+
test('throughput through Anthropic SDK adapter matches source rate', async () => {
|
|
509
|
+
const Anthropic = (await import('@anthropic-ai/sdk')).default;
|
|
510
|
+
const tokenCount = 200;
|
|
511
|
+
const encoder = new TextEncoder();
|
|
512
|
+
|
|
513
|
+
const server = Bun.serve({
|
|
514
|
+
port: 0,
|
|
515
|
+
fetch() {
|
|
516
|
+
const events: string[] = [];
|
|
517
|
+
|
|
518
|
+
events.push(`event: message_start\ndata: ${JSON.stringify({
|
|
519
|
+
type: 'message_start',
|
|
520
|
+
message: {
|
|
521
|
+
id: 'msg_bench_02',
|
|
522
|
+
type: 'message',
|
|
523
|
+
role: 'assistant',
|
|
524
|
+
content: [],
|
|
525
|
+
model: 'claude-3-5-sonnet-20241022',
|
|
526
|
+
stop_reason: null,
|
|
527
|
+
stop_sequence: null,
|
|
528
|
+
usage: { input_tokens: 10, output_tokens: 1 },
|
|
529
|
+
},
|
|
530
|
+
})}\n\n`);
|
|
531
|
+
|
|
532
|
+
events.push(`event: content_block_start\ndata: ${JSON.stringify({
|
|
533
|
+
type: 'content_block_start',
|
|
534
|
+
index: 0,
|
|
535
|
+
content_block: { type: 'text', text: '' },
|
|
536
|
+
})}\n\n`);
|
|
537
|
+
|
|
538
|
+
for (let i = 0; i < tokenCount; i++) {
|
|
539
|
+
events.push(`event: content_block_delta\ndata: ${JSON.stringify({
|
|
540
|
+
type: 'content_block_delta',
|
|
541
|
+
index: 0,
|
|
542
|
+
delta: { type: 'text_delta', text: `w${i} ` },
|
|
543
|
+
})}\n\n`);
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
events.push(`event: content_block_stop\ndata: ${JSON.stringify({
|
|
547
|
+
type: 'content_block_stop',
|
|
548
|
+
index: 0,
|
|
549
|
+
})}\n\n`);
|
|
550
|
+
|
|
551
|
+
events.push(`event: message_delta\ndata: ${JSON.stringify({
|
|
552
|
+
type: 'message_delta',
|
|
553
|
+
delta: { stop_reason: 'end_turn', stop_sequence: null },
|
|
554
|
+
usage: { output_tokens: tokenCount },
|
|
555
|
+
})}\n\n`);
|
|
556
|
+
|
|
557
|
+
events.push(`event: message_stop\ndata: ${JSON.stringify({
|
|
558
|
+
type: 'message_stop',
|
|
559
|
+
})}\n\n`);
|
|
560
|
+
|
|
561
|
+
const stream = new ReadableStream({
|
|
562
|
+
start(controller) {
|
|
563
|
+
for (const evt of events) {
|
|
564
|
+
controller.enqueue(encoder.encode(evt));
|
|
565
|
+
}
|
|
566
|
+
controller.close();
|
|
567
|
+
},
|
|
568
|
+
});
|
|
569
|
+
|
|
570
|
+
return new Response(stream, {
|
|
571
|
+
headers: { 'Content-Type': 'text/event-stream' },
|
|
572
|
+
});
|
|
573
|
+
},
|
|
574
|
+
});
|
|
575
|
+
|
|
576
|
+
try {
|
|
577
|
+
const client = new Anthropic({
|
|
578
|
+
apiKey: BENCH_API_KEY,
|
|
579
|
+
baseURL: `http://localhost:${server.port}`,
|
|
580
|
+
});
|
|
581
|
+
|
|
582
|
+
const textEvents: number[] = [];
|
|
583
|
+
const start = performance.now();
|
|
584
|
+
|
|
585
|
+
const sdkStream = client.messages.stream({
|
|
586
|
+
model: 'claude-3-5-sonnet-20241022',
|
|
587
|
+
max_tokens: 4096,
|
|
588
|
+
messages: [{ role: 'user', content: 'Hello' }],
|
|
589
|
+
});
|
|
590
|
+
|
|
591
|
+
sdkStream.on('text', () => {
|
|
592
|
+
textEvents.push(performance.now());
|
|
593
|
+
});
|
|
594
|
+
|
|
595
|
+
await sdkStream.finalMessage();
|
|
596
|
+
|
|
597
|
+
const elapsed = textEvents[textEvents.length - 1] - start;
|
|
598
|
+
const observedRate = (textEvents.length / elapsed) * 1000;
|
|
599
|
+
|
|
600
|
+
// All text deltas should be delivered through the SDK
|
|
601
|
+
expect(textEvents.length).toBe(tokenCount);
|
|
602
|
+
|
|
603
|
+
// SDK adapter should achieve at least 1000 events/sec from a local mock
|
|
604
|
+
// (same threshold as the raw SSE parsing test)
|
|
605
|
+
expect(observedRate).toBeGreaterThan(1000);
|
|
606
|
+
} finally {
|
|
607
|
+
server.stop();
|
|
608
|
+
}
|
|
609
|
+
});
|
|
610
|
+
|
|
611
|
+
test('AnthropicProvider adapter end-to-end with mock SSE server', async () => {
|
|
612
|
+
const tokenCount = 50;
|
|
613
|
+
const encoder = new TextEncoder();
|
|
614
|
+
|
|
615
|
+
const server = Bun.serve({
|
|
616
|
+
port: 0,
|
|
617
|
+
fetch() {
|
|
618
|
+
const events: string[] = [];
|
|
619
|
+
|
|
620
|
+
events.push(`event: message_start\ndata: ${JSON.stringify({
|
|
621
|
+
type: 'message_start',
|
|
622
|
+
message: {
|
|
623
|
+
id: 'msg_bench_03',
|
|
624
|
+
type: 'message',
|
|
625
|
+
role: 'assistant',
|
|
626
|
+
content: [],
|
|
627
|
+
model: 'claude-3-5-sonnet-20241022',
|
|
628
|
+
stop_reason: null,
|
|
629
|
+
stop_sequence: null,
|
|
630
|
+
usage: { input_tokens: 10, output_tokens: 1 },
|
|
631
|
+
},
|
|
632
|
+
})}\n\n`);
|
|
633
|
+
|
|
634
|
+
events.push(`event: content_block_start\ndata: ${JSON.stringify({
|
|
635
|
+
type: 'content_block_start',
|
|
636
|
+
index: 0,
|
|
637
|
+
content_block: { type: 'text', text: '' },
|
|
638
|
+
})}\n\n`);
|
|
639
|
+
|
|
640
|
+
for (let i = 0; i < tokenCount; i++) {
|
|
641
|
+
events.push(`event: content_block_delta\ndata: ${JSON.stringify({
|
|
642
|
+
type: 'content_block_delta',
|
|
643
|
+
index: 0,
|
|
644
|
+
delta: { type: 'text_delta', text: `token${i} ` },
|
|
645
|
+
})}\n\n`);
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
events.push(`event: content_block_stop\ndata: ${JSON.stringify({
|
|
649
|
+
type: 'content_block_stop',
|
|
650
|
+
index: 0,
|
|
651
|
+
})}\n\n`);
|
|
652
|
+
|
|
653
|
+
events.push(`event: message_delta\ndata: ${JSON.stringify({
|
|
654
|
+
type: 'message_delta',
|
|
655
|
+
delta: { stop_reason: 'end_turn', stop_sequence: null },
|
|
656
|
+
usage: { output_tokens: tokenCount },
|
|
657
|
+
})}\n\n`);
|
|
658
|
+
|
|
659
|
+
events.push(`event: message_stop\ndata: ${JSON.stringify({
|
|
660
|
+
type: 'message_stop',
|
|
661
|
+
})}\n\n`);
|
|
662
|
+
|
|
663
|
+
const stream = new ReadableStream({
|
|
664
|
+
start(controller) {
|
|
665
|
+
for (const evt of events) {
|
|
666
|
+
controller.enqueue(encoder.encode(evt));
|
|
667
|
+
}
|
|
668
|
+
controller.close();
|
|
669
|
+
},
|
|
670
|
+
});
|
|
671
|
+
|
|
672
|
+
return new Response(stream, {
|
|
673
|
+
headers: { 'Content-Type': 'text/event-stream' },
|
|
674
|
+
});
|
|
675
|
+
},
|
|
676
|
+
});
|
|
677
|
+
|
|
678
|
+
// Save and override env var before try so it's always restored in finally
|
|
679
|
+
const origBaseUrl = process.env.ANTHROPIC_BASE_URL;
|
|
680
|
+
process.env.ANTHROPIC_BASE_URL = `http://localhost:${server.port}`;
|
|
681
|
+
|
|
682
|
+
try {
|
|
683
|
+
// Import dynamically after setting env var so SDK picks it up
|
|
684
|
+
const { AnthropicProvider } = await import('../providers/anthropic/client.js');
|
|
685
|
+
const provider = new AnthropicProvider(BENCH_API_KEY, 'claude-3-5-sonnet-20241022');
|
|
686
|
+
|
|
687
|
+
const receivedEvents: ProviderEvent[] = [];
|
|
688
|
+
let firstEventTime: number | undefined;
|
|
689
|
+
const start = performance.now();
|
|
690
|
+
|
|
691
|
+
const result = await provider.sendMessage(
|
|
692
|
+
SIMPLE_MESSAGES,
|
|
693
|
+
undefined,
|
|
694
|
+
undefined,
|
|
695
|
+
{
|
|
696
|
+
onEvent: (e) => {
|
|
697
|
+
if (firstEventTime === undefined) {
|
|
698
|
+
firstEventTime = performance.now();
|
|
699
|
+
}
|
|
700
|
+
receivedEvents.push(e);
|
|
701
|
+
},
|
|
702
|
+
},
|
|
703
|
+
);
|
|
704
|
+
|
|
705
|
+
// Verify the full adapter pipeline delivered all events
|
|
706
|
+
const textDeltas = receivedEvents.filter((e) => e.type === 'text_delta');
|
|
707
|
+
expect(textDeltas.length).toBe(tokenCount);
|
|
708
|
+
|
|
709
|
+
// TTFT through the complete provider adapter < 100ms
|
|
710
|
+
expect(firstEventTime).toBeDefined();
|
|
711
|
+
expect(firstEventTime! - start).toBeLessThan(100);
|
|
712
|
+
|
|
713
|
+
// Provider response should have correct structure
|
|
714
|
+
expect(result.model).toBe('claude-3-5-sonnet-20241022');
|
|
715
|
+
expect(result.stopReason).toBe('end_turn');
|
|
716
|
+
expect(result.usage.outputTokens).toBe(tokenCount);
|
|
717
|
+
|
|
718
|
+
// Throughput: events should flow at > 500 events/sec through the full adapter
|
|
719
|
+
const elapsed = performance.now() - start;
|
|
720
|
+
const rate = (textDeltas.length / elapsed) * 1000;
|
|
721
|
+
expect(rate).toBeGreaterThan(500);
|
|
722
|
+
} finally {
|
|
723
|
+
if (origBaseUrl === undefined) {
|
|
724
|
+
delete process.env.ANTHROPIC_BASE_URL;
|
|
725
|
+
} else {
|
|
726
|
+
process.env.ANTHROPIC_BASE_URL = origBaseUrl;
|
|
727
|
+
}
|
|
728
|
+
server.stop();
|
|
729
|
+
}
|
|
730
|
+
});
|
|
731
|
+
|
|
732
|
+
test('multiple rapid events are delivered without batching loss', async () => {
|
|
733
|
+
// Provider that emits events as fast as possible (no delay between tokens)
|
|
734
|
+
const tokenCount = 500;
|
|
735
|
+
const inner: Provider = {
|
|
736
|
+
name: 'rapid-fire',
|
|
737
|
+
async sendMessage(
|
|
738
|
+
_messages: Message[],
|
|
739
|
+
_tools?: ToolDefinition[],
|
|
740
|
+
_systemPrompt?: string,
|
|
741
|
+
options?: SendMessageOptions,
|
|
742
|
+
): Promise<ProviderResponse> {
|
|
743
|
+
const { onEvent } = options ?? {};
|
|
744
|
+
for (let i = 0; i < tokenCount; i++) {
|
|
745
|
+
onEvent?.({ type: 'text_delta', text: `w${i} ` });
|
|
746
|
+
}
|
|
747
|
+
return {
|
|
748
|
+
content: [{ type: 'text', text: 'done' }],
|
|
749
|
+
model: 'mock',
|
|
750
|
+
usage: { inputTokens: 5, outputTokens: tokenCount },
|
|
751
|
+
stopReason: 'end_turn',
|
|
752
|
+
};
|
|
753
|
+
},
|
|
754
|
+
};
|
|
755
|
+
|
|
756
|
+
const wrapped = new RetryProvider(inner);
|
|
757
|
+
const events: ProviderEvent[] = [];
|
|
758
|
+
|
|
759
|
+
const start = performance.now();
|
|
760
|
+
|
|
761
|
+
await wrapped.sendMessage(SIMPLE_MESSAGES, undefined, undefined, {
|
|
762
|
+
onEvent: (e) => events.push(e),
|
|
763
|
+
});
|
|
764
|
+
|
|
765
|
+
const elapsed = performance.now() - start;
|
|
766
|
+
|
|
767
|
+
// All events must be delivered — no loss through the wrapper
|
|
768
|
+
expect(events.length).toBe(tokenCount);
|
|
769
|
+
|
|
770
|
+
// 500 synchronous events should complete in < 50ms
|
|
771
|
+
expect(elapsed).toBeLessThan(50);
|
|
772
|
+
});
|
|
773
|
+
});
|