vellum 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -2
- package/bun.lock +5 -2
- package/package.json +4 -2
- package/scripts/capture-x-graphql.ts +562 -0
- package/scripts/ipc/check-swift-decoder-drift.ts +2 -1
- package/scripts/test.sh +5 -0
- package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +133 -34
- package/src/__tests__/account-registry.test.ts +2 -1
- package/src/__tests__/agent-heartbeat-service.test.ts +250 -0
- package/src/__tests__/asset-materialize-tool.test.ts +16 -15
- package/src/__tests__/asset-search-tool.test.ts +23 -22
- package/src/__tests__/attachments-store.test.ts +56 -127
- package/src/__tests__/browser-skill-baseline-tool-payload.test.ts +5 -4
- package/src/__tests__/browser-skill-endstate.test.ts +4 -3
- package/src/__tests__/call-bridge.test.ts +385 -0
- package/src/__tests__/call-constants.test.ts +40 -0
- package/src/__tests__/call-orchestrator.test.ts +130 -4
- package/src/__tests__/call-recovery.test.ts +518 -0
- package/src/__tests__/call-routes-http.test.ts +459 -0
- package/src/__tests__/call-state-machine.test.ts +143 -0
- package/src/__tests__/call-store.test.ts +216 -1
- package/src/__tests__/cli-discover.test.ts +1 -1
- package/src/__tests__/commit-message-enrichment-service.test.ts +148 -7
- package/src/__tests__/compaction.benchmark.test.ts +176 -0
- package/src/__tests__/computer-use-tools.test.ts +250 -0
- package/src/__tests__/config-schema.test.ts +299 -3
- package/src/__tests__/conflict-store.test.ts +2 -1
- package/src/__tests__/contacts-tools.test.ts +331 -0
- package/src/__tests__/conversation-store.test.ts +30 -32
- package/src/__tests__/credential-security-invariants.test.ts +4 -0
- package/src/__tests__/date-context.test.ts +373 -0
- package/src/__tests__/db-schedule-syntax-migration.test.ts +129 -0
- package/src/__tests__/fixtures/media-reuse-fixtures.ts +3 -3
- package/src/__tests__/followup-tools.test.ts +303 -0
- package/src/__tests__/handlers-twitter-config.test.ts +718 -0
- package/src/__tests__/intent-routing.test.ts +64 -57
- package/src/__tests__/ipc-roundtrip.benchmark.test.ts +237 -0
- package/src/__tests__/ipc-snapshot.test.ts +62 -28
- package/src/__tests__/llm-usage-store.test.ts +3 -8
- package/src/__tests__/media-generate-image.test.ts +1 -1
- package/src/__tests__/media-reuse-story.e2e.test.ts +7 -7
- package/src/__tests__/memory-retrieval.benchmark.test.ts +430 -0
- package/src/__tests__/parallel-tool.benchmark.test.ts +294 -0
- package/src/__tests__/playbook-tools.test.ts +342 -0
- package/src/__tests__/profile-compiler.test.ts +2 -1
- package/src/__tests__/provider-streaming.benchmark.test.ts +773 -0
- package/src/__tests__/recurrence-engine-rruleset.test.ts +78 -0
- package/src/__tests__/recurrence-engine.test.ts +69 -0
- package/src/__tests__/recurrence-types.test.ts +71 -0
- package/src/__tests__/registry.test.ts +5 -3
- package/src/__tests__/relay-server.test.ts +633 -0
- package/src/__tests__/reminder-store.test.ts +6 -3
- package/src/__tests__/reminder.test.ts +43 -77
- package/src/__tests__/run-orchestrator-assistant-events.test.ts +8 -4
- package/src/__tests__/run-orchestrator.test.ts +4 -4
- package/src/__tests__/runtime-attachment-metadata.test.ts +7 -6
- package/src/__tests__/runtime-runs-http.test.ts +4 -4
- package/src/__tests__/runtime-runs.test.ts +4 -4
- package/src/__tests__/schedule-store.test.ts +482 -0
- package/src/__tests__/schedule-tools.test.ts +700 -0
- package/src/__tests__/scheduler-recurrence.test.ts +329 -0
- package/src/__tests__/server-history-render.test.ts +14 -13
- package/src/__tests__/session-error.test.ts +28 -0
- package/src/__tests__/session-init.benchmark.test.ts +462 -0
- package/src/__tests__/session-queue.test.ts +71 -48
- package/src/__tests__/session-runtime-assembly.test.ts +161 -0
- package/src/__tests__/session-surfaces-task-progress.test.ts +104 -0
- package/src/__tests__/signup-e2e.test.ts +2 -1
- package/src/__tests__/skill-projection.benchmark.test.ts +328 -0
- package/src/__tests__/skill-script-runner.test.ts +159 -0
- package/src/__tests__/speaker-identification.test.ts +52 -0
- package/src/__tests__/subagent-manager-notify.test.ts +42 -10
- package/src/__tests__/subagent-tools.test.ts +141 -41
- package/src/__tests__/task-compiler.test.ts +2 -1
- package/src/__tests__/task-runner.test.ts +2 -1
- package/src/__tests__/task-scheduler.test.ts +2 -1
- package/src/__tests__/task-tools.test.ts +49 -56
- package/src/__tests__/tool-audit-listener.test.ts +1 -0
- package/src/__tests__/tool-domain-event-publisher.test.ts +2 -0
- package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +500 -0
- package/src/__tests__/tool-executor.test.ts +13 -17
- package/src/__tests__/turn-commit.test.ts +218 -3
- package/src/__tests__/twilio-provider.test.ts +143 -0
- package/src/__tests__/twilio-routes.test.ts +789 -0
- package/src/__tests__/twitter-auth-handler.test.ts +581 -0
- package/src/__tests__/view-image-tool.test.ts +217 -0
- package/src/__tests__/workspace-git-service.test.ts +186 -0
- package/src/__tests__/workspace-heartbeat-service.test.ts +13 -3
- package/src/agent-heartbeat/agent-heartbeat-service.ts +155 -0
- package/src/bundler/app-bundler.ts +12 -8
- package/src/calls/call-bridge.ts +95 -0
- package/src/calls/call-constants.ts +43 -5
- package/src/calls/call-domain.ts +276 -0
- package/src/calls/call-orchestrator.ts +43 -17
- package/src/calls/call-recovery.ts +207 -0
- package/src/calls/call-state-machine.ts +68 -0
- package/src/calls/call-store.ts +192 -5
- package/src/calls/relay-server.ts +41 -4
- package/src/calls/speaker-identification.ts +213 -0
- package/src/calls/twilio-provider.ts +10 -6
- package/src/calls/twilio-routes.ts +90 -76
- package/src/calls/types.ts +1 -1
- package/src/cli/config-commands.ts +334 -0
- package/src/cli/core-commands.ts +776 -0
- package/src/cli/doordash.ts +251 -1
- package/src/cli/ipc-client.ts +82 -0
- package/src/cli/map.ts +246 -0
- package/src/cli/twitter.ts +575 -0
- package/src/cli.ts +7 -5
- package/src/commands/__tests__/cc-command-registry.test.ts +319 -0
- package/src/commands/cc-command-registry.ts +209 -0
- package/src/config/bundled-skills/contacts/SKILL.md +39 -0
- package/src/config/bundled-skills/contacts/TOOLS.json +122 -0
- package/src/config/bundled-skills/contacts/tools/contact-merge.ts +9 -0
- package/src/config/bundled-skills/contacts/tools/contact-search.ts +9 -0
- package/src/config/bundled-skills/contacts/tools/contact-upsert.ts +9 -0
- package/src/config/bundled-skills/document/SKILL.md +18 -0
- package/src/config/bundled-skills/document/TOOLS.json +53 -0
- package/src/config/bundled-skills/document/tools/document-create.ts +9 -0
- package/src/config/bundled-skills/document/tools/document-update.ts +9 -0
- package/src/config/bundled-skills/doordash/SKILL.md +82 -23
- package/src/config/bundled-skills/followups/SKILL.md +32 -0
- package/src/config/bundled-skills/followups/TOOLS.json +100 -0
- package/src/config/bundled-skills/followups/tools/followup-create.ts +9 -0
- package/src/config/bundled-skills/followups/tools/followup-list.ts +9 -0
- package/src/config/bundled-skills/followups/tools/followup-resolve.ts +9 -0
- package/src/config/bundled-skills/image-studio/tools/media-generate-image.ts +1 -23
- package/src/config/bundled-skills/messaging/tools/messaging-analyze-style.ts +2 -1
- package/src/config/bundled-skills/playbooks/SKILL.md +31 -0
- package/src/config/bundled-skills/playbooks/TOOLS.json +126 -0
- package/src/config/bundled-skills/playbooks/tools/playbook-create.ts +9 -0
- package/src/config/bundled-skills/playbooks/tools/playbook-delete.ts +9 -0
- package/src/config/bundled-skills/playbooks/tools/playbook-list.ts +9 -0
- package/src/config/bundled-skills/playbooks/tools/playbook-update.ts +9 -0
- package/src/config/bundled-skills/reminder/SKILL.md +20 -0
- package/src/config/bundled-skills/reminder/TOOLS.json +67 -0
- package/src/config/bundled-skills/reminder/tools/reminder-cancel.ts +9 -0
- package/src/config/bundled-skills/reminder/tools/reminder-create.ts +9 -0
- package/src/config/bundled-skills/reminder/tools/reminder-list.ts +9 -0
- package/src/config/bundled-skills/schedule/SKILL.md +74 -0
- package/src/config/bundled-skills/schedule/TOOLS.json +135 -0
- package/src/config/bundled-skills/schedule/tools/schedule-create.ts +9 -0
- package/src/config/bundled-skills/schedule/tools/schedule-delete.ts +9 -0
- package/src/config/bundled-skills/schedule/tools/schedule-list.ts +9 -0
- package/src/config/bundled-skills/schedule/tools/schedule-update.ts +9 -0
- package/src/config/bundled-skills/subagent/SKILL.md +25 -0
- package/src/config/bundled-skills/subagent/TOOLS.json +107 -0
- package/src/config/bundled-skills/subagent/tools/subagent-abort.ts +9 -0
- package/src/config/bundled-skills/subagent/tools/subagent-message.ts +9 -0
- package/src/config/bundled-skills/subagent/tools/subagent-read.ts +9 -0
- package/src/config/bundled-skills/subagent/tools/subagent-spawn.ts +9 -0
- package/src/config/bundled-skills/subagent/tools/subagent-status.ts +9 -0
- package/src/config/bundled-skills/tasks/SKILL.md +28 -0
- package/src/config/bundled-skills/tasks/TOOLS.json +256 -0
- package/src/config/bundled-skills/tasks/tools/task-delete.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-list-add.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-list-remove.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-list-show.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-list-update.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-list.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-run.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-save.ts +9 -0
- package/src/config/bundled-skills/twitter/SKILL.md +134 -0
- package/src/config/bundled-skills/watcher/SKILL.md +27 -0
- package/src/config/bundled-skills/watcher/TOOLS.json +147 -0
- package/src/config/bundled-skills/watcher/tools/watcher-create.ts +9 -0
- package/src/config/bundled-skills/watcher/tools/watcher-delete.ts +9 -0
- package/src/config/bundled-skills/watcher/tools/watcher-digest.ts +9 -0
- package/src/config/bundled-skills/watcher/tools/watcher-list.ts +9 -0
- package/src/config/bundled-skills/watcher/tools/watcher-update.ts +9 -0
- package/src/config/defaults.ts +33 -0
- package/src/config/loader.ts +4 -1
- package/src/config/schema.ts +161 -1
- package/src/config/system-prompt.ts +61 -16
- package/src/config/templates/IDENTITY.md +7 -0
- package/src/config/types.ts +4 -0
- package/src/contacts/contact-store.ts +4 -4
- package/src/daemon/assistant-attachments.ts +10 -0
- package/src/daemon/classifier.ts +3 -1
- package/src/daemon/computer-use-session.ts +3 -1
- package/src/daemon/date-context.ts +136 -0
- package/src/daemon/handlers/apps.ts +16 -1
- package/src/daemon/handlers/browser.ts +54 -0
- package/src/daemon/handlers/computer-use.ts +7 -1
- package/src/daemon/handlers/config.ts +163 -5
- package/src/daemon/handlers/diagnostics.ts +5 -1
- package/src/daemon/handlers/documents.ts +18 -29
- package/src/daemon/handlers/home-base.ts +5 -1
- package/src/daemon/handlers/index.ts +40 -277
- package/src/daemon/handlers/misc.ts +9 -1
- package/src/daemon/handlers/publish.ts +6 -1
- package/src/daemon/handlers/sessions.ts +65 -12
- package/src/daemon/handlers/shared.ts +36 -1
- package/src/daemon/handlers/signing.ts +37 -0
- package/src/daemon/handlers/skills.ts +20 -6
- package/src/daemon/handlers/subagents.ts +8 -3
- package/src/daemon/handlers/twitter-auth.ts +169 -0
- package/src/daemon/handlers/work-items.ts +384 -68
- package/src/daemon/ipc-contract-inventory.json +28 -4
- package/src/daemon/ipc-contract.ts +133 -37
- package/src/daemon/ipc-protocol.ts +7 -2
- package/src/daemon/lifecycle.ts +21 -0
- package/src/daemon/main.ts +10 -4
- package/src/daemon/ride-shotgun-handler.ts +74 -10
- package/src/daemon/server.ts +143 -26
- package/src/daemon/session-agent-loop.ts +887 -0
- package/src/daemon/session-attachments.ts +28 -5
- package/src/daemon/session-error.ts +24 -3
- package/src/daemon/session-lifecycle.ts +147 -0
- package/src/daemon/session-media-retry.ts +147 -0
- package/src/daemon/session-messaging.ts +145 -0
- package/src/daemon/session-notifiers.ts +164 -0
- package/src/daemon/session-process.ts +2 -2
- package/src/daemon/session-queue-manager.ts +1 -0
- package/src/daemon/session-runtime-assembly.ts +52 -0
- package/src/daemon/session-skill-tools.ts +124 -5
- package/src/daemon/session-slash.ts +3 -0
- package/src/daemon/session-surfaces.ts +77 -2
- package/src/daemon/session-tool-setup.ts +216 -2
- package/src/daemon/session-usage.ts +0 -2
- package/src/daemon/session.ts +114 -1404
- package/src/daemon/video-thumbnail.ts +60 -0
- package/src/doordash/client.ts +121 -27
- package/src/doordash/queries.ts +1 -2
- package/src/export/formatter.ts +3 -1
- package/src/followups/followup-store.ts +4 -2
- package/src/followups/types.ts +6 -0
- package/src/hooks/templates.ts +1 -1
- package/src/index.ts +32 -1153
- package/src/memory/attachments-store.ts +28 -83
- package/src/memory/channel-delivery-store.ts +7 -21
- package/src/memory/clarification-resolver.ts +6 -5
- package/src/memory/contradiction-checker.ts +3 -2
- package/src/memory/conversation-key-store.ts +10 -29
- package/src/memory/conversation-store.ts +2 -1
- package/src/memory/db.ts +96 -2
- package/src/memory/entity-extractor.ts +6 -3
- package/src/memory/items-extractor.ts +5 -4
- package/src/memory/jobs-store.ts +3 -2
- package/src/memory/llm-usage-store.ts +1 -2
- package/src/memory/runs-store.ts +1 -2
- package/src/memory/schema.ts +23 -2
- package/src/messaging/style-analyzer.ts +3 -2
- package/src/messaging/thread-summarizer.ts +8 -12
- package/src/messaging/triage-engine.ts +4 -2
- package/src/providers/openrouter/client.ts +20 -0
- package/src/providers/registry.ts +8 -0
- package/src/runtime/http-server.ts +108 -20
- package/src/runtime/routes/attachment-routes.ts +2 -3
- package/src/runtime/routes/call-routes.ts +140 -0
- package/src/runtime/routes/channel-routes.ts +5 -10
- package/src/runtime/routes/conversation-routes.ts +5 -5
- package/src/runtime/routes/run-routes.ts +2 -2
- package/src/runtime/run-orchestrator.ts +9 -3
- package/src/schedule/recurrence-engine.ts +138 -0
- package/src/schedule/recurrence-types.ts +67 -0
- package/src/schedule/schedule-store.ts +102 -57
- package/src/schedule/scheduler.ts +9 -6
- package/src/security/oauth2.ts +29 -4
- package/src/security/secret-allowlist.ts +46 -0
- package/src/skills/clawhub.ts +1 -1
- package/src/subagent/manager.ts +40 -8
- package/src/swarm/backend-claude-code.ts +64 -9
- package/src/swarm/worker-prompts.ts +2 -1
- package/src/tasks/SPEC.md +34 -28
- package/src/tasks/ephemeral-permissions.ts +16 -7
- package/src/tasks/task-compiler.ts +5 -4
- package/src/tasks/task-runner.ts +10 -5
- package/src/tasks/task-scheduler.ts +1 -1
- package/src/tasks/tool-sanitizer.ts +36 -0
- package/src/tools/assets/search.ts +4 -4
- package/src/tools/browser/api-map.ts +220 -0
- package/src/tools/browser/auto-navigate.ts +270 -0
- package/src/tools/browser/browser-execution.ts +2 -1
- package/src/tools/browser/browser-manager.ts +2 -2
- package/src/tools/browser/network-recorder.ts +5 -4
- package/src/tools/browser/x-auto-navigate.ts +207 -0
- package/src/tools/calls/call-end.ts +17 -67
- package/src/tools/calls/call-start.ts +24 -85
- package/src/tools/calls/call-status.ts +35 -51
- package/src/tools/claude-code/claude-code.ts +77 -11
- package/src/tools/contacts/contact-merge.ts +46 -78
- package/src/tools/contacts/contact-search.ts +35 -79
- package/src/tools/contacts/contact-upsert.ts +35 -108
- package/src/tools/credentials/vault.ts +20 -4
- package/src/tools/document/document-tool.ts +71 -144
- package/src/tools/executor.ts +129 -10
- package/src/tools/followups/followup_create.ts +46 -88
- package/src/tools/followups/followup_list.ts +34 -74
- package/src/tools/followups/followup_resolve.ts +31 -66
- package/src/tools/host-terminal/cli-discover.ts +2 -1
- package/src/tools/host-terminal/host-shell.ts +10 -0
- package/src/tools/memory/handlers.ts +5 -4
- package/src/tools/network/__tests__/web-search.test.ts +427 -0
- package/src/tools/network/script-proxy/__tests__/logging.test.ts +248 -0
- package/src/tools/network/script-proxy/__tests__/policy.test.ts +234 -0
- package/src/tools/network/script-proxy/__tests__/router.test.ts +76 -0
- package/src/tools/network/web-fetch.ts +18 -6
- package/src/tools/playbooks/index.ts +4 -5
- package/src/tools/playbooks/playbook-create.ts +3 -47
- package/src/tools/playbooks/playbook-delete.ts +1 -25
- package/src/tools/playbooks/playbook-list.ts +1 -28
- package/src/tools/playbooks/playbook-update.ts +3 -51
- package/src/tools/reminder/reminder.ts +5 -78
- package/src/tools/schedule/create.ts +69 -74
- package/src/tools/schedule/delete.ts +21 -47
- package/src/tools/schedule/list.ts +55 -74
- package/src/tools/schedule/update.ts +77 -84
- package/src/tools/subagent/abort.ts +29 -58
- package/src/tools/subagent/message.ts +30 -63
- package/src/tools/subagent/read.ts +53 -84
- package/src/tools/subagent/spawn.ts +43 -82
- package/src/tools/subagent/status.ts +42 -71
- package/src/tools/swarm/delegate.ts +2 -1
- package/src/tools/tasks/index.ts +8 -8
- package/src/tools/tasks/task-delete.ts +60 -88
- package/src/tools/tasks/task-list.ts +31 -52
- package/src/tools/tasks/task-run.ts +72 -108
- package/src/tools/tasks/task-save.ts +33 -65
- package/src/tools/tasks/work-item-enqueue.ts +183 -215
- package/src/tools/tasks/work-item-list.ts +33 -63
- package/src/tools/tasks/work-item-remove.ts +45 -97
- package/src/tools/tasks/work-item-update.ts +91 -163
- package/src/tools/terminal/backends/native.ts +3 -1
- package/src/tools/tool-manifest.ts +0 -62
- package/src/tools/types.ts +6 -0
- package/src/tools/ui-surface/definitions.ts +3 -1
- package/src/tools/watch/screen-watch.ts +3 -1
- package/src/tools/watcher/create.ts +52 -98
- package/src/tools/watcher/delete.ts +20 -46
- package/src/tools/watcher/digest.ts +36 -70
- package/src/tools/watcher/list.ts +49 -79
- package/src/tools/watcher/update.ts +45 -91
- package/src/twitter/client.ts +690 -0
- package/src/twitter/session.ts +91 -0
- package/src/usage/types.ts +0 -1
- package/src/util/truncate.ts +6 -0
- package/src/watcher/providers/slack.ts +2 -1
- package/src/watcher/watcher-store.ts +3 -2
- package/src/work-items/work-item-store.ts +27 -2
- package/src/workspace/commit-message-enrichment-service.ts +31 -7
- package/src/workspace/git-service.ts +87 -22
- package/src/workspace/provider-commit-message-generator.ts +242 -0
- package/src/workspace/turn-commit.ts +62 -3
- package/src/tools/contacts/index.ts +0 -4
- package/src/tools/document/index.ts +0 -5
- package/src/tools/followups/index.ts +0 -3
- package/src/tools/subagent/index.ts +0 -5
- /package/src/__tests__/{memory-context-benchmark.test.ts → memory-context-benchmark.benchmark.test.ts} +0 -0
|
@@ -23,7 +23,7 @@ mock.module('../util/logger.js', () => ({
|
|
|
23
23
|
}),
|
|
24
24
|
}));
|
|
25
25
|
|
|
26
|
-
import { initializeDb, getDb } from '../memory/db.js';
|
|
26
|
+
import { initializeDb, getDb, resetDb } from '../memory/db.js';
|
|
27
27
|
import { conversations } from '../memory/schema.js';
|
|
28
28
|
import {
|
|
29
29
|
createCallSession,
|
|
@@ -37,11 +37,15 @@ import {
|
|
|
37
37
|
getPendingQuestion,
|
|
38
38
|
answerPendingQuestion,
|
|
39
39
|
expirePendingQuestions,
|
|
40
|
+
claimCallback,
|
|
41
|
+
releaseCallbackClaim,
|
|
42
|
+
finalizeCallbackClaim,
|
|
40
43
|
} from '../calls/call-store.js';
|
|
41
44
|
|
|
42
45
|
initializeDb();
|
|
43
46
|
|
|
44
47
|
afterAll(() => {
|
|
48
|
+
resetDb();
|
|
45
49
|
try { rmSync(testDir, { recursive: true }); } catch { /* best effort */ }
|
|
46
50
|
});
|
|
47
51
|
|
|
@@ -65,6 +69,7 @@ function resetTables() {
|
|
|
65
69
|
db.run('DELETE FROM call_pending_questions');
|
|
66
70
|
db.run('DELETE FROM call_events');
|
|
67
71
|
db.run('DELETE FROM call_sessions');
|
|
72
|
+
db.run('DELETE FROM processed_callbacks');
|
|
68
73
|
db.run('DELETE FROM conversations');
|
|
69
74
|
ensuredConvIds = new Set();
|
|
70
75
|
}
|
|
@@ -473,4 +478,214 @@ describe('call-store', () => {
|
|
|
473
478
|
const q1Row = raw.query('SELECT status FROM call_pending_questions WHERE id = ?').get(q1.id) as { status: string };
|
|
474
479
|
expect(q1Row.status).toBe('answered');
|
|
475
480
|
});
|
|
481
|
+
|
|
482
|
+
// ── Callback Claim ──────────────────────────────────────────────
|
|
483
|
+
|
|
484
|
+
test('claimCallback returns a claim ID on first call', () => {
|
|
485
|
+
const session = createTestCallSession({
|
|
486
|
+
conversationId: 'conv-22',
|
|
487
|
+
provider: 'twilio',
|
|
488
|
+
fromNumber: '+15551111111',
|
|
489
|
+
toNumber: '+15552222222',
|
|
490
|
+
});
|
|
491
|
+
|
|
492
|
+
const result = claimCallback('test-dedupe-key-1', session.id);
|
|
493
|
+
expect(result).toBeTypeOf('string');
|
|
494
|
+
expect(result!.length).toBeGreaterThan(0);
|
|
495
|
+
});
|
|
496
|
+
|
|
497
|
+
test('claimCallback returns null on duplicate key', () => {
|
|
498
|
+
const session = createTestCallSession({
|
|
499
|
+
conversationId: 'conv-23',
|
|
500
|
+
provider: 'twilio',
|
|
501
|
+
fromNumber: '+15551111111',
|
|
502
|
+
toNumber: '+15552222222',
|
|
503
|
+
});
|
|
504
|
+
|
|
505
|
+
const first = claimCallback('test-dedupe-key-2', session.id);
|
|
506
|
+
const second = claimCallback('test-dedupe-key-2', session.id);
|
|
507
|
+
|
|
508
|
+
expect(first).toBeTypeOf('string');
|
|
509
|
+
expect(second).toBeNull();
|
|
510
|
+
});
|
|
511
|
+
|
|
512
|
+
test('releaseCallbackClaim allows re-claim', () => {
|
|
513
|
+
const session = createTestCallSession({
|
|
514
|
+
conversationId: 'conv-24',
|
|
515
|
+
provider: 'twilio',
|
|
516
|
+
fromNumber: '+15551111111',
|
|
517
|
+
toNumber: '+15552222222',
|
|
518
|
+
});
|
|
519
|
+
|
|
520
|
+
const first = claimCallback('test-dedupe-key-3', session.id);
|
|
521
|
+
expect(first).toBeTypeOf('string');
|
|
522
|
+
|
|
523
|
+
releaseCallbackClaim('test-dedupe-key-3', first!);
|
|
524
|
+
|
|
525
|
+
const second = claimCallback('test-dedupe-key-3', session.id);
|
|
526
|
+
expect(second).toBeTypeOf('string');
|
|
527
|
+
});
|
|
528
|
+
|
|
529
|
+
test('releaseCallbackClaim with wrong claimId does not release', () => {
|
|
530
|
+
const session = createTestCallSession({
|
|
531
|
+
conversationId: 'conv-24b',
|
|
532
|
+
provider: 'twilio',
|
|
533
|
+
fromNumber: '+15551111111',
|
|
534
|
+
toNumber: '+15552222222',
|
|
535
|
+
});
|
|
536
|
+
|
|
537
|
+
const claimId = claimCallback('test-dedupe-key-3b', session.id);
|
|
538
|
+
expect(claimId).toBeTypeOf('string');
|
|
539
|
+
|
|
540
|
+
// Attempt to release with a wrong claim ID — should be a no-op
|
|
541
|
+
releaseCallbackClaim('test-dedupe-key-3b', 'wrong-claim-id');
|
|
542
|
+
|
|
543
|
+
// The claim should still be held, so re-claiming should fail
|
|
544
|
+
const second = claimCallback('test-dedupe-key-3b', session.id);
|
|
545
|
+
expect(second).toBeNull();
|
|
546
|
+
});
|
|
547
|
+
|
|
548
|
+
test('claimCallback INSERT OR IGNORE pattern is safe for same key', () => {
|
|
549
|
+
const session = createTestCallSession({
|
|
550
|
+
conversationId: 'conv-25',
|
|
551
|
+
provider: 'twilio',
|
|
552
|
+
fromNumber: '+15551111111',
|
|
553
|
+
toNumber: '+15552222222',
|
|
554
|
+
});
|
|
555
|
+
|
|
556
|
+
// Claim the key
|
|
557
|
+
const first = claimCallback('test-dedupe-key-4', session.id);
|
|
558
|
+
expect(first).toBeTypeOf('string');
|
|
559
|
+
|
|
560
|
+
// Subsequent claims with the same key should all return null without throwing
|
|
561
|
+
expect(claimCallback('test-dedupe-key-4', session.id)).toBeNull();
|
|
562
|
+
expect(claimCallback('test-dedupe-key-4', session.id)).toBeNull();
|
|
563
|
+
|
|
564
|
+
// Only one row should exist in the table for this key
|
|
565
|
+
const raw = (getDb() as unknown as { $client: import('bun:sqlite').Database }).$client;
|
|
566
|
+
const rows = raw.query('SELECT COUNT(*) as cnt FROM processed_callbacks WHERE dedupe_key = ?').get('test-dedupe-key-4') as { cnt: number };
|
|
567
|
+
expect(rows.cnt).toBe(1);
|
|
568
|
+
});
|
|
569
|
+
|
|
570
|
+
test('claimCallback reclaims expired orphaned claims', () => {
|
|
571
|
+
const session = createTestCallSession({
|
|
572
|
+
conversationId: 'conv-26',
|
|
573
|
+
provider: 'twilio',
|
|
574
|
+
fromNumber: '+15551111111',
|
|
575
|
+
toNumber: '+15552222222',
|
|
576
|
+
});
|
|
577
|
+
|
|
578
|
+
// Claim the key
|
|
579
|
+
const first = claimCallback('test-dedupe-key-expired', session.id);
|
|
580
|
+
expect(first).toBeTypeOf('string');
|
|
581
|
+
|
|
582
|
+
// Simulate an orphaned claim by backdating the created_at to well past expiry
|
|
583
|
+
const raw = (getDb() as unknown as { $client: import('bun:sqlite').Database }).$client;
|
|
584
|
+
const oldTimestamp = Date.now() - 120_000; // 2 minutes ago, well past 60s expiry
|
|
585
|
+
raw.query('UPDATE processed_callbacks SET created_at = ? WHERE dedupe_key = ?').run(oldTimestamp, 'test-dedupe-key-expired');
|
|
586
|
+
|
|
587
|
+
// Reclaim should succeed because the old claim has expired
|
|
588
|
+
const second = claimCallback('test-dedupe-key-expired', session.id);
|
|
589
|
+
expect(second).toBeTypeOf('string');
|
|
590
|
+
|
|
591
|
+
// The new claim should have a different claim ID
|
|
592
|
+
expect(second).not.toBe(first);
|
|
593
|
+
});
|
|
594
|
+
|
|
595
|
+
test('claimCallback does not reclaim finalized claims', () => {
|
|
596
|
+
const session = createTestCallSession({
|
|
597
|
+
conversationId: 'conv-27',
|
|
598
|
+
provider: 'twilio',
|
|
599
|
+
fromNumber: '+15551111111',
|
|
600
|
+
toNumber: '+15552222222',
|
|
601
|
+
});
|
|
602
|
+
|
|
603
|
+
// Claim and finalize
|
|
604
|
+
const first = claimCallback('test-dedupe-key-finalized', session.id);
|
|
605
|
+
expect(first).toBeTypeOf('string');
|
|
606
|
+
finalizeCallbackClaim('test-dedupe-key-finalized', first!);
|
|
607
|
+
|
|
608
|
+
// Attempting to reclaim a finalized key should fail because the far-future
|
|
609
|
+
// timestamp means it will never be considered expired
|
|
610
|
+
const second = claimCallback('test-dedupe-key-finalized', session.id);
|
|
611
|
+
expect(second).toBeNull();
|
|
612
|
+
});
|
|
613
|
+
|
|
614
|
+
test('finalizeCallbackClaim makes claim permanent', () => {
|
|
615
|
+
const session = createTestCallSession({
|
|
616
|
+
conversationId: 'conv-28',
|
|
617
|
+
provider: 'twilio',
|
|
618
|
+
fromNumber: '+15551111111',
|
|
619
|
+
toNumber: '+15552222222',
|
|
620
|
+
});
|
|
621
|
+
|
|
622
|
+
// Claim and finalize
|
|
623
|
+
const claimId = claimCallback('test-dedupe-key-permanent', session.id)!;
|
|
624
|
+
finalizeCallbackClaim('test-dedupe-key-permanent', claimId);
|
|
625
|
+
|
|
626
|
+
// Verify the created_at is set far in the future
|
|
627
|
+
const raw = (getDb() as unknown as { $client: import('bun:sqlite').Database }).$client;
|
|
628
|
+
const row = raw.query('SELECT created_at FROM processed_callbacks WHERE dedupe_key = ?').get('test-dedupe-key-permanent') as { created_at: number };
|
|
629
|
+
// Should be at least 50 years in the future from now
|
|
630
|
+
const fiftyYearsMs = 50 * 365 * 24 * 60 * 60 * 1000;
|
|
631
|
+
expect(row.created_at).toBeGreaterThan(Date.now() + fiftyYearsMs);
|
|
632
|
+
});
|
|
633
|
+
|
|
634
|
+
test('finalizeCallbackClaim with wrong claimId does not finalize', () => {
|
|
635
|
+
const session = createTestCallSession({
|
|
636
|
+
conversationId: 'conv-28b',
|
|
637
|
+
provider: 'twilio',
|
|
638
|
+
fromNumber: '+15551111111',
|
|
639
|
+
toNumber: '+15552222222',
|
|
640
|
+
});
|
|
641
|
+
|
|
642
|
+
// Claim the key
|
|
643
|
+
const claimId = claimCallback('test-dedupe-key-permanent-b', session.id)!;
|
|
644
|
+
expect(claimId).toBeTypeOf('string');
|
|
645
|
+
|
|
646
|
+
// Try to finalize with wrong claimId — should be a no-op
|
|
647
|
+
finalizeCallbackClaim('test-dedupe-key-permanent-b', 'wrong-claim-id');
|
|
648
|
+
|
|
649
|
+
// Verify the created_at was NOT set to far-future (it should still be close to now)
|
|
650
|
+
const raw = (getDb() as unknown as { $client: import('bun:sqlite').Database }).$client;
|
|
651
|
+
const row = raw.query('SELECT created_at FROM processed_callbacks WHERE dedupe_key = ?').get('test-dedupe-key-permanent-b') as { created_at: number };
|
|
652
|
+
const oneMinuteMs = 60 * 1000;
|
|
653
|
+
expect(row.created_at).toBeLessThan(Date.now() + oneMinuteMs);
|
|
654
|
+
});
|
|
655
|
+
|
|
656
|
+
test('handler A cannot release handler B claim after reclaim', () => {
|
|
657
|
+
const session = createTestCallSession({
|
|
658
|
+
conversationId: 'conv-29',
|
|
659
|
+
provider: 'twilio',
|
|
660
|
+
fromNumber: '+15551111111',
|
|
661
|
+
toNumber: '+15552222222',
|
|
662
|
+
});
|
|
663
|
+
|
|
664
|
+
// Handler A claims
|
|
665
|
+
const claimA = claimCallback('test-dedupe-key-ownership', session.id)!;
|
|
666
|
+
expect(claimA).toBeTypeOf('string');
|
|
667
|
+
|
|
668
|
+
// Simulate handler A taking too long: backdate the claim so it expires
|
|
669
|
+
const raw = (getDb() as unknown as { $client: import('bun:sqlite').Database }).$client;
|
|
670
|
+
const oldTimestamp = Date.now() - 120_000;
|
|
671
|
+
raw.query('UPDATE processed_callbacks SET created_at = ? WHERE dedupe_key = ?').run(oldTimestamp, 'test-dedupe-key-ownership');
|
|
672
|
+
|
|
673
|
+
// Handler B reclaims (succeeds because the old claim expired)
|
|
674
|
+
const claimB = claimCallback('test-dedupe-key-ownership', session.id)!;
|
|
675
|
+
expect(claimB).toBeTypeOf('string');
|
|
676
|
+
expect(claimB).not.toBe(claimA);
|
|
677
|
+
|
|
678
|
+
// Handler B finalizes
|
|
679
|
+
finalizeCallbackClaim('test-dedupe-key-ownership', claimB);
|
|
680
|
+
|
|
681
|
+
// Handler A tries to release using its old claimId — should be a no-op
|
|
682
|
+
releaseCallbackClaim('test-dedupe-key-ownership', claimA);
|
|
683
|
+
|
|
684
|
+
// Verify B's finalized claim is still intact
|
|
685
|
+
const row = raw.query('SELECT created_at, claim_id FROM processed_callbacks WHERE dedupe_key = ?').get('test-dedupe-key-ownership') as { created_at: number; claim_id: string };
|
|
686
|
+
expect(row).not.toBeNull();
|
|
687
|
+
expect(row.claim_id).toBe(claimB);
|
|
688
|
+
const fiftyYearsMs = 50 * 365 * 24 * 60 * 60 * 1000;
|
|
689
|
+
expect(row.created_at).toBeGreaterThan(Date.now() + fiftyYearsMs);
|
|
690
|
+
});
|
|
476
691
|
});
|
|
@@ -71,7 +71,7 @@ describe('cliDiscoverTool', () => {
|
|
|
71
71
|
expect(result.isError).toBe(false);
|
|
72
72
|
// Should at least find git which is nearly universally available
|
|
73
73
|
expect(result.content).toContain('**git**');
|
|
74
|
-
},
|
|
74
|
+
}, 60_000);
|
|
75
75
|
|
|
76
76
|
test('includes version info for found CLIs', async () => {
|
|
77
77
|
const result = await cliDiscoverTool.execute(
|
|
@@ -3,12 +3,13 @@ import { mkdirSync, rmSync, writeFileSync, existsSync } from 'node:fs';
|
|
|
3
3
|
import { join } from 'node:path';
|
|
4
4
|
import { tmpdir } from 'node:os';
|
|
5
5
|
import { execFileSync } from 'node:child_process';
|
|
6
|
+
import type { CommitContext } from '../workspace/commit-message-provider.js';
|
|
7
|
+
|
|
6
8
|
import {
|
|
7
9
|
CommitEnrichmentService,
|
|
8
10
|
_resetEnrichmentService,
|
|
9
11
|
} from '../workspace/commit-message-enrichment-service.js';
|
|
10
12
|
import { WorkspaceGitService, _resetGitServiceRegistry } from '../workspace/git-service.js';
|
|
11
|
-
import type { CommitContext } from '../workspace/commit-message-provider.js';
|
|
12
13
|
|
|
13
14
|
describe('CommitEnrichmentService', () => {
|
|
14
15
|
let testDir: string;
|
|
@@ -48,6 +49,16 @@ describe('CommitEnrichmentService', () => {
|
|
|
48
49
|
return await gitService.getHeadHash();
|
|
49
50
|
}
|
|
50
51
|
|
|
52
|
+
async function waitForDrain(service: CommitEnrichmentService, timeoutMs = 5000): Promise<void> {
|
|
53
|
+
const started = Date.now();
|
|
54
|
+
while (service._getQueueSize() > 0 || service._getActiveWorkers() > 0) {
|
|
55
|
+
if (Date.now() - started > timeoutMs) {
|
|
56
|
+
throw new Error(`Timed out waiting for enrichment queue to drain after ${timeoutMs}ms`);
|
|
57
|
+
}
|
|
58
|
+
await new Promise(resolve => setTimeout(resolve, 50));
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
51
62
|
test('enqueue and execute writes git note on success', async () => {
|
|
52
63
|
const commitHash = await createCommit();
|
|
53
64
|
const service = new CommitEnrichmentService({
|
|
@@ -280,9 +291,7 @@ describe('CommitEnrichmentService', () => {
|
|
|
280
291
|
});
|
|
281
292
|
|
|
282
293
|
// Wait for queue to drain before shutdown (avoids discarding pending jobs)
|
|
283
|
-
|
|
284
|
-
await new Promise(resolve => setTimeout(resolve, 50));
|
|
285
|
-
}
|
|
294
|
+
await waitForDrain(service, 5000);
|
|
286
295
|
await service.shutdown();
|
|
287
296
|
|
|
288
297
|
// Both notes should exist
|
|
@@ -318,9 +327,7 @@ describe('CommitEnrichmentService', () => {
|
|
|
318
327
|
// Wait for all retries to complete (initial + 2 retries, with backoff)
|
|
319
328
|
// Backoff: 1s after attempt 1, 2s after attempt 2 = ~3s total
|
|
320
329
|
// But since the job itself is very fast to time out, total time is dominated by backoff
|
|
321
|
-
|
|
322
|
-
await new Promise(resolve => setTimeout(resolve, 100));
|
|
323
|
-
}
|
|
330
|
+
await waitForDrain(service, 10000);
|
|
324
331
|
await service.shutdown();
|
|
325
332
|
|
|
326
333
|
// After 1 initial attempt + 2 retries (3 total), the job should be counted as failed
|
|
@@ -373,6 +380,140 @@ describe('CommitEnrichmentService', () => {
|
|
|
373
380
|
expect(service._getDroppedCount()).toBe(4);
|
|
374
381
|
});
|
|
375
382
|
|
|
383
|
+
test('timed-out enrichment work is cancelled via AbortSignal', async () => {
|
|
384
|
+
// Track whether the slow enrichment work actually ran to completion
|
|
385
|
+
let enrichmentCompleted = false;
|
|
386
|
+
const commitHash = await createCommit();
|
|
387
|
+
|
|
388
|
+
const service = new CommitEnrichmentService({
|
|
389
|
+
maxQueueSize: 10,
|
|
390
|
+
maxConcurrency: 1,
|
|
391
|
+
jobTimeoutMs: 50, // Very short timeout
|
|
392
|
+
maxRetries: 0,
|
|
393
|
+
});
|
|
394
|
+
|
|
395
|
+
// Monkey-patch writeNote to simulate slow work that respects the abort signal.
|
|
396
|
+
// The real writeNote now passes the signal to execFileAsync which kills the
|
|
397
|
+
// child process on abort. This mock replicates that behavior by rejecting
|
|
398
|
+
// when the signal fires.
|
|
399
|
+
const originalWriteNote = gitService.writeNote.bind(gitService);
|
|
400
|
+
gitService.writeNote = async (_hash: string, _note: string, signal?: AbortSignal) => {
|
|
401
|
+
// Simulate slow work that is cancellable via AbortSignal
|
|
402
|
+
await new Promise<void>((resolve, reject) => {
|
|
403
|
+
const timer = setTimeout(() => {
|
|
404
|
+
enrichmentCompleted = true;
|
|
405
|
+
resolve();
|
|
406
|
+
}, 2000);
|
|
407
|
+
signal?.addEventListener('abort', () => {
|
|
408
|
+
clearTimeout(timer);
|
|
409
|
+
reject(new Error('aborted'));
|
|
410
|
+
}, { once: true });
|
|
411
|
+
});
|
|
412
|
+
};
|
|
413
|
+
|
|
414
|
+
service.enqueue({
|
|
415
|
+
workspaceDir: testDir,
|
|
416
|
+
commitHash,
|
|
417
|
+
context: makeContext(),
|
|
418
|
+
gitService,
|
|
419
|
+
});
|
|
420
|
+
|
|
421
|
+
await waitForDrain(service, 5000);
|
|
422
|
+
await service.shutdown();
|
|
423
|
+
|
|
424
|
+
// Allow any zombie work to settle — if abort didn't work, the 2s timer
|
|
425
|
+
// would still be running and would set enrichmentCompleted=true. Wait
|
|
426
|
+
// longer than the 2000ms mock delay to reliably catch the regression.
|
|
427
|
+
await new Promise(resolve => setTimeout(resolve, 2500));
|
|
428
|
+
|
|
429
|
+
// The job should have timed out and been counted as failed
|
|
430
|
+
expect(service._getFailedCount()).toBe(1);
|
|
431
|
+
expect(service._getSucceededCount()).toBe(0);
|
|
432
|
+
// The slow enrichment work should NOT have completed since the signal was aborted
|
|
433
|
+
expect(enrichmentCompleted).toBe(false);
|
|
434
|
+
|
|
435
|
+
// Restore original
|
|
436
|
+
gitService.writeNote = originalWriteNote;
|
|
437
|
+
});
|
|
438
|
+
|
|
439
|
+
test('shutdown does not hang on timed-out jobs', async () => {
|
|
440
|
+
const commitHash = await createCommit();
|
|
441
|
+
|
|
442
|
+
const service = new CommitEnrichmentService({
|
|
443
|
+
maxQueueSize: 10,
|
|
444
|
+
maxConcurrency: 1,
|
|
445
|
+
jobTimeoutMs: 50, // Short timeout
|
|
446
|
+
maxRetries: 0,
|
|
447
|
+
});
|
|
448
|
+
|
|
449
|
+
// Make writeNote artificially slow so the job will always time out.
|
|
450
|
+
// The mock respects the abort signal so the subprocess is killed on timeout.
|
|
451
|
+
const originalWriteNote = gitService.writeNote.bind(gitService);
|
|
452
|
+
gitService.writeNote = async (_hash: string, _note: string, signal?: AbortSignal) => {
|
|
453
|
+
await new Promise<void>((resolve, reject) => {
|
|
454
|
+
const timer = setTimeout(resolve, 5000);
|
|
455
|
+
signal?.addEventListener('abort', () => {
|
|
456
|
+
clearTimeout(timer);
|
|
457
|
+
reject(new Error('aborted'));
|
|
458
|
+
}, { once: true });
|
|
459
|
+
});
|
|
460
|
+
};
|
|
461
|
+
|
|
462
|
+
service.enqueue({
|
|
463
|
+
workspaceDir: testDir,
|
|
464
|
+
commitHash,
|
|
465
|
+
context: makeContext(),
|
|
466
|
+
gitService,
|
|
467
|
+
});
|
|
468
|
+
|
|
469
|
+
// Shutdown should complete promptly, not hang for 5s waiting on the slow writeNote
|
|
470
|
+
const shutdownStart = Date.now();
|
|
471
|
+
await service.shutdown();
|
|
472
|
+
const shutdownElapsed = Date.now() - shutdownStart;
|
|
473
|
+
|
|
474
|
+
// Shutdown should complete well under the 5s slow-work duration
|
|
475
|
+
expect(shutdownElapsed).toBeLessThan(3000);
|
|
476
|
+
expect(service._getFailedCount()).toBe(1);
|
|
477
|
+
|
|
478
|
+
gitService.writeNote = originalWriteNote;
|
|
479
|
+
}, 10000);
|
|
480
|
+
|
|
481
|
+
test('abort signal is triggered on non-timeout errors before retry', async () => {
|
|
482
|
+
const commitHash = await createCommit();
|
|
483
|
+
|
|
484
|
+
const service = new CommitEnrichmentService({
|
|
485
|
+
maxQueueSize: 10,
|
|
486
|
+
maxConcurrency: 1,
|
|
487
|
+
jobTimeoutMs: 5000,
|
|
488
|
+
maxRetries: 0,
|
|
489
|
+
});
|
|
490
|
+
|
|
491
|
+
// Make writeNote throw an error and observe whether the signal gets aborted
|
|
492
|
+
const originalWriteNote = gitService.writeNote.bind(gitService);
|
|
493
|
+
gitService.writeNote = async (_hash: string, _note: string) => {
|
|
494
|
+
// Set up a listener on the abort controller's signal to track abortion.
|
|
495
|
+
// We access the signal indirectly by throwing, which triggers the catch
|
|
496
|
+
// block in executeJob where controller.abort() is called.
|
|
497
|
+
throw new Error('Simulated writeNote failure');
|
|
498
|
+
};
|
|
499
|
+
|
|
500
|
+
service.enqueue({
|
|
501
|
+
workspaceDir: testDir,
|
|
502
|
+
commitHash,
|
|
503
|
+
context: makeContext(),
|
|
504
|
+
gitService,
|
|
505
|
+
});
|
|
506
|
+
|
|
507
|
+
await waitForDrain(service, 5000);
|
|
508
|
+
await service.shutdown();
|
|
509
|
+
|
|
510
|
+
// The job should have failed (no retries configured)
|
|
511
|
+
expect(service._getFailedCount()).toBe(1);
|
|
512
|
+
expect(service._getSucceededCount()).toBe(0);
|
|
513
|
+
|
|
514
|
+
gitService.writeNote = originalWriteNote;
|
|
515
|
+
});
|
|
516
|
+
|
|
376
517
|
test('enqueue is fire-and-forget and never throws even when called rapidly', async () => {
|
|
377
518
|
const service = new CommitEnrichmentService({
|
|
378
519
|
maxQueueSize: 3,
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Context Window Compaction Benchmark
|
|
3
|
+
*
|
|
4
|
+
* Measures compaction cost with a mock provider:
|
|
5
|
+
* - compaction latency under threshold pressure
|
|
6
|
+
* - no-op fast path for below-threshold histories
|
|
7
|
+
* - token reduction ratio after compaction
|
|
8
|
+
* - summary call count within expected range
|
|
9
|
+
* - severe pressure overriding cooldown
|
|
10
|
+
*/
|
|
11
|
+
import { describe, expect, mock, test } from 'bun:test';
|
|
12
|
+
|
|
13
|
+
import { DEFAULT_CONFIG } from '../config/defaults.js';
|
|
14
|
+
import { ContextWindowManager } from '../context/window-manager.js';
|
|
15
|
+
import { estimatePromptTokens } from '../context/token-estimator.js';
|
|
16
|
+
import type { Message, Provider } from '../providers/types.js';
|
|
17
|
+
|
|
18
|
+
mock.module('../util/logger.js', () => ({
|
|
19
|
+
getLogger: () =>
|
|
20
|
+
new Proxy({} as Record<string, unknown>, { get: () => () => {} }),
|
|
21
|
+
}));
|
|
22
|
+
|
|
23
|
+
function makeSummaryProvider(counter: { calls: number }): Provider {
|
|
24
|
+
return {
|
|
25
|
+
name: 'mock',
|
|
26
|
+
async sendMessage() {
|
|
27
|
+
counter.calls += 1;
|
|
28
|
+
return {
|
|
29
|
+
content: [
|
|
30
|
+
{
|
|
31
|
+
type: 'text',
|
|
32
|
+
text: `## Goals\n- Preserve state\n## Constraints\n- Keep PRs small\n## Decisions\n- Call ${counter.calls}`,
|
|
33
|
+
},
|
|
34
|
+
],
|
|
35
|
+
model: 'mock-model',
|
|
36
|
+
usage: { inputTokens: 420, outputTokens: 85 },
|
|
37
|
+
stopReason: 'end_turn',
|
|
38
|
+
};
|
|
39
|
+
},
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function makeLongMessages(turns: number): Message[] {
|
|
44
|
+
const rows: Message[] = [];
|
|
45
|
+
for (let i = 0; i < turns; i++) {
|
|
46
|
+
rows.push({
|
|
47
|
+
role: 'user',
|
|
48
|
+
content: [
|
|
49
|
+
{
|
|
50
|
+
type: 'text',
|
|
51
|
+
text: `[U${i}] User message with enough content to estimate tokens. Topic ${i % 9}.`,
|
|
52
|
+
},
|
|
53
|
+
],
|
|
54
|
+
});
|
|
55
|
+
rows.push({
|
|
56
|
+
role: 'assistant',
|
|
57
|
+
content: [
|
|
58
|
+
{
|
|
59
|
+
type: 'text',
|
|
60
|
+
text: `[A${i}] Assistant response with relevant content. Result ${i % 7}.`,
|
|
61
|
+
},
|
|
62
|
+
],
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
return rows;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function makeConfig() {
|
|
69
|
+
return {
|
|
70
|
+
...DEFAULT_CONFIG.contextWindow,
|
|
71
|
+
maxInputTokens: 6000,
|
|
72
|
+
targetInputTokens: 3200,
|
|
73
|
+
compactThreshold: 0.6,
|
|
74
|
+
preserveRecentUserTurns: 8,
|
|
75
|
+
chunkTokens: 1200,
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
describe('Compaction benchmark', () => {
|
|
80
|
+
test('compaction with mock provider completes under 500ms', async () => {
|
|
81
|
+
const counter = { calls: 0 };
|
|
82
|
+
const provider = makeSummaryProvider(counter);
|
|
83
|
+
const config = makeConfig();
|
|
84
|
+
const manager = new ContextWindowManager(provider, 'system prompt', config);
|
|
85
|
+
|
|
86
|
+
// 90 turns = 180 messages, well above 60% of 6000 = 3600 threshold
|
|
87
|
+
const messages = makeLongMessages(90);
|
|
88
|
+
const before = estimatePromptTokens(messages, 'system prompt', {
|
|
89
|
+
providerName: 'mock',
|
|
90
|
+
});
|
|
91
|
+
expect(before).toBeGreaterThan(config.maxInputTokens * config.compactThreshold);
|
|
92
|
+
|
|
93
|
+
const start = performance.now();
|
|
94
|
+
const result = await manager.maybeCompact(messages);
|
|
95
|
+
const elapsed = performance.now() - start;
|
|
96
|
+
|
|
97
|
+
expect(result.compacted).toBe(true);
|
|
98
|
+
expect(elapsed).toBeLessThan(500);
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
test('below-threshold check returns in under 50ms (no-op)', async () => {
|
|
102
|
+
const counter = { calls: 0 };
|
|
103
|
+
const provider = makeSummaryProvider(counter);
|
|
104
|
+
const config = makeConfig();
|
|
105
|
+
const manager = new ContextWindowManager(provider, 'system prompt', config);
|
|
106
|
+
|
|
107
|
+
// 3 turns = 6 messages, well below threshold
|
|
108
|
+
const messages = makeLongMessages(3);
|
|
109
|
+
|
|
110
|
+
const start = performance.now();
|
|
111
|
+
const result = await manager.maybeCompact(messages);
|
|
112
|
+
const elapsed = performance.now() - start;
|
|
113
|
+
|
|
114
|
+
expect(result.compacted).toBe(false);
|
|
115
|
+
expect(result.reason).toBe('below compaction threshold');
|
|
116
|
+
expect(elapsed).toBeLessThan(50);
|
|
117
|
+
expect(counter.calls).toBe(0);
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
test('token reduction ratio exceeds 30% after compaction', async () => {
|
|
121
|
+
const counter = { calls: 0 };
|
|
122
|
+
const provider = makeSummaryProvider(counter);
|
|
123
|
+
const config = makeConfig();
|
|
124
|
+
const manager = new ContextWindowManager(provider, 'system prompt', config);
|
|
125
|
+
|
|
126
|
+
const messages = makeLongMessages(90);
|
|
127
|
+
const result = await manager.maybeCompact(messages);
|
|
128
|
+
|
|
129
|
+
expect(result.compacted).toBe(true);
|
|
130
|
+
const reductionRatio =
|
|
131
|
+
(result.previousEstimatedInputTokens - result.estimatedInputTokens) /
|
|
132
|
+
result.previousEstimatedInputTokens;
|
|
133
|
+
expect(reductionRatio).toBeGreaterThan(0.3);
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
test('summary calls fall within 2-6 range', async () => {
|
|
137
|
+
const counter = { calls: 0 };
|
|
138
|
+
const provider = makeSummaryProvider(counter);
|
|
139
|
+
const config = makeConfig();
|
|
140
|
+
const manager = new ContextWindowManager(provider, 'system prompt', config);
|
|
141
|
+
|
|
142
|
+
const messages = makeLongMessages(90);
|
|
143
|
+
const result = await manager.maybeCompact(messages);
|
|
144
|
+
|
|
145
|
+
expect(result.compacted).toBe(true);
|
|
146
|
+
expect(result.summaryCalls).toBeGreaterThanOrEqual(2);
|
|
147
|
+
expect(result.summaryCalls).toBeLessThanOrEqual(6);
|
|
148
|
+
expect(result.summaryCalls).toBe(counter.calls);
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
test('severe pressure triggers compaction even during cooldown', async () => {
|
|
152
|
+
const counter = { calls: 0 };
|
|
153
|
+
const provider = makeSummaryProvider(counter);
|
|
154
|
+
// Use a tighter maxInputTokens so 90 turns exceeds the 95% severe threshold
|
|
155
|
+
const config = {
|
|
156
|
+
...makeConfig(),
|
|
157
|
+
maxInputTokens: 4000,
|
|
158
|
+
targetInputTokens: 2000,
|
|
159
|
+
};
|
|
160
|
+
const manager = new ContextWindowManager(provider, 'system prompt', config);
|
|
161
|
+
|
|
162
|
+
const messages = makeLongMessages(90);
|
|
163
|
+
const estimated = estimatePromptTokens(messages, 'system prompt', {
|
|
164
|
+
providerName: 'mock',
|
|
165
|
+
});
|
|
166
|
+
expect(estimated).toBeGreaterThan(config.maxInputTokens * 0.95);
|
|
167
|
+
|
|
168
|
+
// Simulate being within cooldown by setting lastCompactedAt to now
|
|
169
|
+
const result = await manager.maybeCompact(messages, undefined, {
|
|
170
|
+
lastCompactedAt: Date.now(),
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
expect(result.compacted).toBe(true);
|
|
174
|
+
expect(result.summaryCalls).toBeGreaterThan(0);
|
|
175
|
+
});
|
|
176
|
+
});
|