switchroom 0.7.15 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +51 -59
- package/bin/run-hook.sh +27 -11
- package/bin/timezone-hook.sh +9 -7
- package/dist/agent-scheduler/index.js +410 -133
- package/dist/auth-broker/index.js +13932 -0
- package/dist/cli/switchroom.js +26937 -5601
- package/dist/host-control/main.js +12702 -0
- package/dist/vault/approvals/kernel-server.js +467 -184
- package/dist/vault/broker/server.js +1430 -724
- package/examples/minimal.yaml +63 -0
- package/examples/personal-google-workspace-mcp/.env.example +34 -0
- package/examples/personal-google-workspace-mcp/README.md +194 -0
- package/examples/personal-google-workspace-mcp/compose.yaml +66 -0
- package/examples/switchroom.yaml +220 -0
- package/package.json +7 -4
- package/profiles/_base/settings.json.hbs +20 -5
- package/profiles/_base/start.sh.hbs +16 -3
- package/profiles/_shared/agent-self-service.md.hbs +126 -0
- package/profiles/_shared/telegram-style.md.hbs +20 -90
- package/profiles/_shared/vault-protocol.md.hbs +68 -0
- package/profiles/default/CLAUDE.md +50 -96
- package/profiles/default/CLAUDE.md.hbs +36 -6
- package/profiles/default/workspace/SOUL.md.hbs +12 -5
- package/skills/buildkite-agent-infrastructure/SKILL.md +30 -11
- package/skills/buildkite-agent-runtime/SKILL.md +44 -11
- package/skills/buildkite-api/SKILL.md +31 -8
- package/skills/buildkite-cli/SKILL.md +27 -9
- package/skills/buildkite-migration/SKILL.md +22 -9
- package/skills/buildkite-pipelines/SKILL.md +26 -9
- package/skills/buildkite-secure-delivery/SKILL.md +23 -9
- package/skills/buildkite-test-engine/SKILL.md +25 -8
- package/skills/docx/SKILL.md +1 -1
- package/skills/docx/scripts/office/validators/__pycache__/__init__.cpython-313.pyc +0 -0
- package/skills/docx/scripts/office/validators/__pycache__/base.cpython-313.pyc +0 -0
- package/skills/file-bug/SKILL.md +34 -6
- package/skills/humanizer/SKILL.md +15 -0
- package/skills/humanizer-calibrate/SKILL.md +7 -1
- package/skills/mcp-builder/SKILL.md +1 -1
- package/skills/pdf/SKILL.md +1 -1
- package/skills/pptx/SKILL.md +1 -1
- package/skills/skill-creator/SKILL.md +21 -1
- package/skills/skill-creator/scripts/__pycache__/__init__.cpython-313.pyc +0 -0
- package/skills/skill-creator/scripts/__pycache__/generate_report.cpython-313.pyc +0 -0
- package/skills/skill-creator/scripts/__pycache__/improve_description.cpython-313.pyc +0 -0
- package/skills/skill-creator/scripts/__pycache__/run_eval.cpython-313.pyc +0 -0
- package/skills/skill-creator/scripts/__pycache__/run_loop.cpython-313.pyc +0 -0
- package/skills/skill-creator/scripts/__pycache__/utils.cpython-313.pyc +0 -0
- package/skills/switchroom-cli/SKILL.md +63 -64
- package/skills/switchroom-health/SKILL.md +23 -10
- package/skills/switchroom-install/SKILL.md +3 -3
- package/skills/switchroom-manage/SKILL.md +26 -19
- package/skills/switchroom-runtime/SKILL.md +191 -0
- package/skills/switchroom-status/SKILL.md +27 -2
- package/skills/telegram-test-harness/SKILL.md +3 -0
- package/skills/token-helpers/SKILL.md +24 -1
- package/skills/webapp-testing/SKILL.md +31 -1
- package/skills/xlsx/SKILL.md +1 -1
- package/telegram-plugin/admin-commands/index.ts +7 -5
- package/telegram-plugin/analytics-posthog.ts +191 -0
- package/telegram-plugin/bridge/bridge.ts +69 -0
- package/telegram-plugin/bridge/ipc-client.ts +4 -1
- package/telegram-plugin/dist/bridge/bridge.js +194 -119
- package/telegram-plugin/dist/gateway/gateway.js +23611 -19671
- package/telegram-plugin/dist/server.js +245 -189
- package/telegram-plugin/first-paint.ts +3 -24
- package/telegram-plugin/gateway/auth-add-flow.ts +326 -0
- package/telegram-plugin/gateway/auth-broker-client.ts +75 -0
- package/telegram-plugin/gateway/auth-command.ts +794 -0
- package/telegram-plugin/gateway/auth-line.ts +123 -0
- package/telegram-plugin/gateway/boot-card.ts +169 -40
- package/telegram-plugin/gateway/boot-issue-cache.ts +308 -0
- package/telegram-plugin/gateway/boot-probes.ts +166 -123
- package/telegram-plugin/gateway/boot-reason.ts +41 -7
- package/telegram-plugin/gateway/boot-version.ts +66 -0
- package/telegram-plugin/gateway/gateway.ts +3499 -1885
- package/telegram-plugin/gateway/hostd-dispatch.ts +117 -0
- package/telegram-plugin/gateway/ipc-protocol.ts +18 -0
- package/telegram-plugin/gateway/pending-inbound-buffer.ts +106 -0
- package/telegram-plugin/gateway/quarantine.ts +69 -0
- package/telegram-plugin/gateway/quota-cache.ts +9 -4
- package/telegram-plugin/gateway/reaction-trigger.ts +401 -0
- package/telegram-plugin/gateway/recent-denials.test.ts +103 -0
- package/telegram-plugin/gateway/recent-denials.ts +77 -0
- package/telegram-plugin/gateway/startup-network-retry.ts +109 -31
- package/telegram-plugin/gateway/vault-grant-inbound-builders.ts +125 -0
- package/telegram-plugin/history.ts +91 -0
- package/telegram-plugin/hooks/hooks.json +10 -0
- package/telegram-plugin/hooks/sandbox-hint-posttool.mjs +130 -0
- package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +19 -2
- package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +22 -2
- package/telegram-plugin/hooks/tool-label-pretool.mjs +11 -0
- package/telegram-plugin/hooks/wedge-detect-posttool.mjs +303 -0
- package/telegram-plugin/inbound-classifier.ts +50 -0
- package/telegram-plugin/inline-keyboard-callbacks.ts +136 -0
- package/telegram-plugin/node_modules/.vite/vitest/da39a3ee5e6b4b0d3255bfef95601890afd80709/results.json +1 -0
- package/telegram-plugin/package.json +4 -2
- package/telegram-plugin/permission-rule.ts +51 -0
- package/telegram-plugin/permission-title.ts +56 -0
- package/telegram-plugin/quota-check.ts +19 -41
- package/telegram-plugin/registry/reaper.ts +223 -0
- package/telegram-plugin/retry-api-call.ts +80 -0
- package/telegram-plugin/runtime-metrics.ts +177 -0
- package/telegram-plugin/scripts/build.mjs +0 -1
- package/telegram-plugin/secret-detect/index.ts +24 -0
- package/telegram-plugin/secret-detect/vault-error.test.ts +64 -12
- package/telegram-plugin/secret-detect/vault-error.ts +78 -11
- package/telegram-plugin/secret-detect/vault-write.ts +14 -2
- package/telegram-plugin/server.js +41795 -0
- package/telegram-plugin/session-tail.ts +6 -1
- package/telegram-plugin/shared/bot-runtime.ts +5 -4
- package/telegram-plugin/silence-poke.ts +420 -0
- package/telegram-plugin/silent-end.ts +174 -0
- package/telegram-plugin/stream-controller.ts +13 -0
- package/telegram-plugin/stream-reply-handler.ts +7 -0
- package/telegram-plugin/subagent-watcher.ts +213 -4
- package/telegram-plugin/tests/auth-add-flow.test.ts +559 -0
- package/telegram-plugin/tests/auth-code-redact.test.ts +8 -4
- package/telegram-plugin/tests/auth-command-vernacular.test.ts +531 -0
- package/telegram-plugin/tests/boot-card-issue-dedup.test.ts +247 -0
- package/telegram-plugin/tests/boot-card-reason-to-render.test.ts +182 -0
- package/telegram-plugin/tests/boot-card-reason.test.ts +65 -2
- package/telegram-plugin/tests/boot-card-render.test.ts +146 -0
- package/telegram-plugin/tests/boot-card-silent-on-operator.test.ts +103 -0
- package/telegram-plugin/tests/boot-probes.test.ts +216 -10
- package/telegram-plugin/tests/boot-version-string.test.ts +0 -0
- package/telegram-plugin/tests/finalize-callback.test.ts +190 -0
- package/telegram-plugin/tests/gateway-message-validator.test.ts +26 -0
- package/telegram-plugin/tests/gateway-secret-detect.test.ts +12 -3
- package/telegram-plugin/tests/gateway-startup-network-retry.test.ts +104 -0
- package/telegram-plugin/tests/history-reaper.test.ts +378 -0
- package/telegram-plugin/tests/hostd-dispatch.test.ts +129 -0
- package/telegram-plugin/tests/inbound-classifier.test.ts +76 -0
- package/telegram-plugin/tests/inbound-message-types.test.ts +267 -0
- package/telegram-plugin/tests/issues-card.test.ts +49 -0
- package/telegram-plugin/tests/pending-inbound-buffer.test.ts +132 -0
- package/telegram-plugin/tests/permission-rule.test.ts +80 -1
- package/telegram-plugin/tests/permission-title.test.ts +31 -0
- package/telegram-plugin/tests/quota-check.test.ts +5 -35
- package/telegram-plugin/tests/races.test.ts +179 -0
- package/telegram-plugin/tests/reaction-trigger-flow.test.ts +353 -0
- package/telegram-plugin/tests/reaction-trigger.test.ts +397 -0
- package/telegram-plugin/tests/retry-api-call.test.ts +152 -1
- package/telegram-plugin/tests/runtime-metrics.test.ts +145 -0
- package/telegram-plugin/tests/sandbox-hint-posttool.test.ts +155 -0
- package/telegram-plugin/tests/secret-detect-delete-must-surface-failures.test.ts +133 -0
- package/telegram-plugin/tests/secret-detect-false-positives.test.ts +137 -0
- package/telegram-plugin/tests/silence-poke.test.ts +493 -0
- package/telegram-plugin/tests/silent-end.test.ts +206 -0
- package/telegram-plugin/tests/subagent-tracker-hooks.test.ts +107 -0
- package/telegram-plugin/tests/subagent-watcher-env-thresholds.test.ts +224 -0
- package/telegram-plugin/tests/subagent-watcher-stall-terminal.test.ts +316 -0
- package/telegram-plugin/tests/subagent-watcher.test.ts +263 -0
- package/telegram-plugin/tests/turn-signal-tracker.test.ts +81 -0
- package/telegram-plugin/tests/vault-approval-posture.test.ts +256 -0
- package/telegram-plugin/tests/vault-grant-auto-resume.test.ts +73 -0
- package/telegram-plugin/tests/vault-grant-inbound-builders.test.ts +226 -0
- package/telegram-plugin/tests/vault-grant-union.test.ts +130 -0
- package/telegram-plugin/tests/vault-key-regex-allows-slash.test.ts +140 -0
- package/telegram-plugin/tests/vault-posture-quarantine.test.ts +104 -0
- package/telegram-plugin/tests/vault-request-access-tool.test.ts +114 -0
- package/telegram-plugin/tests/vault-request-access-unlock-resume.test.ts +106 -0
- package/telegram-plugin/turn-signal-tracker.ts +100 -24
- package/telegram-plugin/uat/SETUP.md +210 -35
- package/telegram-plugin/uat/assertions.ts +264 -37
- package/telegram-plugin/uat/driver-info.ts +57 -0
- package/telegram-plugin/uat/driver.ts +590 -51
- package/telegram-plugin/uat/harness.ts +140 -94
- package/telegram-plugin/uat/load-env.test.ts +72 -0
- package/telegram-plugin/uat/load-env.ts +48 -0
- package/telegram-plugin/uat/login.ts +96 -53
- package/telegram-plugin/uat/runners/agent-self-sufficiency.ts +457 -0
- package/telegram-plugin/uat/runners/paraphrases.ts +231 -0
- package/telegram-plugin/uat/runners/report.ts +150 -0
- package/telegram-plugin/uat/runners/run-agent-self-sufficiency.sh +50 -0
- package/telegram-plugin/uat/runners/scorer.test.ts +196 -0
- package/telegram-plugin/uat/runners/scorer.ts +106 -0
- package/telegram-plugin/uat/runners/skill-coverage.test.ts +100 -0
- package/telegram-plugin/uat/runners/skill-coverage.ts +620 -0
- package/telegram-plugin/uat/scenarios/ask-user-button-tap-dm.test.ts +141 -0
- package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +191 -0
- package/telegram-plugin/uat/scenarios/fuzz-extended-dm.test.ts +255 -0
- package/telegram-plugin/uat/scenarios/fuzz-human-style-dm.test.ts +275 -0
- package/telegram-plugin/uat/scenarios/fuzz-random-prompts-dm.test.ts +146 -0
- package/telegram-plugin/uat/scenarios/fuzz-status-ask-dm.test.ts +486 -0
- package/telegram-plugin/uat/scenarios/jtbd-interrupt-marker-dm.test.ts +67 -0
- package/telegram-plugin/uat/scenarios/jtbd-rapid-followup-dm.test.ts +100 -0
- package/telegram-plugin/uat/scenarios/jtbd-soft-commit-dm.test.ts +67 -0
- package/telegram-plugin/uat/scenarios/jtbd-status-query-dm.test.ts +49 -0
- package/telegram-plugin/uat/scenarios/location-inbound-dm.test.ts +65 -0
- package/telegram-plugin/uat/scenarios/midturn-silent-dm.test.ts +175 -0
- package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +142 -0
- package/telegram-plugin/uat/scenarios/reactions-trigger-turn-dm.test.ts +96 -0
- package/telegram-plugin/uat/scenarios/secret-redaction-deletes-original-dm.test.ts +123 -0
- package/telegram-plugin/uat/scenarios/secret-redaction-no-false-positive-dm.test.ts +87 -0
- package/telegram-plugin/uat/scenarios/silence-poke-soft-dm.test.ts +155 -0
- package/telegram-plugin/uat/scenarios/silent-end-recovery-dm.test.ts +95 -0
- package/telegram-plugin/uat/scenarios/smoke-dm-reply.test.ts +57 -0
- package/telegram-plugin/uat/scenarios/subagent-watcher-no-rerun-dm.test.ts +135 -0
- package/telegram-plugin/uat/scenarios/vault-approval-posture-telegram-id-dm.test.ts +191 -0
- package/telegram-plugin/uat/scenarios/vault-audit-allow-dm.test.ts +108 -0
- package/telegram-plugin/uat/scenarios/vault-grant-auto-resume-dm.test.ts +121 -0
- package/telegram-plugin/uat/scenarios/vault-request-access-concurrent-dm.test.ts +161 -0
- package/telegram-plugin/uat/scenarios/vault-request-access-end-to-end-dm.test.ts +158 -0
- package/telegram-plugin/uat/scenarios/voice-inbound-dm.test.ts +65 -0
- package/telegram-plugin/vault-approval-posture.ts +42 -0
- package/telegram-plugin/welcome-text.ts +1 -0
- package/telegram-plugin/active-pins-sweep.ts +0 -204
- package/telegram-plugin/active-pins.ts +0 -146
- package/telegram-plugin/auth-dashboard.ts +0 -1104
- package/telegram-plugin/auth-slot-parser.ts +0 -497
- package/telegram-plugin/card-event-log.ts +0 -138
- package/telegram-plugin/dist/foreman/foreman.js +0 -31106
- package/telegram-plugin/docs/multi-agent-card-design.md +0 -847
- package/telegram-plugin/docs/pinned-progress-card-reliability.md +0 -144
- package/telegram-plugin/foreman/foreman-create-flow.ts +0 -202
- package/telegram-plugin/foreman/foreman-handlers.ts +0 -493
- package/telegram-plugin/foreman/foreman.ts +0 -1165
- package/telegram-plugin/foreman/setup-flow.ts +0 -345
- package/telegram-plugin/foreman/setup-state.ts +0 -239
- package/telegram-plugin/foreman/state.ts +0 -203
- package/telegram-plugin/pin-event-log.ts +0 -76
- package/telegram-plugin/progress-card-driver.ts +0 -2886
- package/telegram-plugin/progress-card-pin-manager.ts +0 -589
- package/telegram-plugin/progress-card-pin-watchdog.ts +0 -98
- package/telegram-plugin/progress-card.ts +0 -1409
- package/telegram-plugin/tests/HARNESS.md +0 -340
- package/telegram-plugin/tests/_progress-card-harness.ts +0 -109
- package/telegram-plugin/tests/active-pins-boot-reaper.test.ts +0 -211
- package/telegram-plugin/tests/active-pins-sweep.test.ts +0 -309
- package/telegram-plugin/tests/active-pins.test.ts +0 -187
- package/telegram-plugin/tests/auth-account-identity-surface.test.ts +0 -118
- package/telegram-plugin/tests/auth-dashboard-edge-cases.test.ts +0 -260
- package/telegram-plugin/tests/auth-dashboard-restart-flow.test.ts +0 -140
- package/telegram-plugin/tests/auth-dashboard-v3b.test.ts +0 -559
- package/telegram-plugin/tests/auth-dashboard.test.ts +0 -1045
- package/telegram-plugin/tests/auth-slot-commands.test.ts +0 -640
- package/telegram-plugin/tests/bg-agent-progress-card-757.test.ts +0 -201
- package/telegram-plugin/tests/boot-card-account-quota.test.ts +0 -137
- package/telegram-plugin/tests/card-event-log.test.ts +0 -145
- package/telegram-plugin/tests/first-paint.test.ts +0 -257
- package/telegram-plugin/tests/foreman-create-flow.test.ts +0 -359
- package/telegram-plugin/tests/foreman-handlers.test.ts +0 -347
- package/telegram-plugin/tests/foreman-state.test.ts +0 -164
- package/telegram-plugin/tests/foreman-write-ops.test.ts +0 -214
- package/telegram-plugin/tests/harness-ordering-invariants.test.ts +0 -243
- package/telegram-plugin/tests/pin-event-log.test.ts +0 -124
- package/telegram-plugin/tests/progress-card-api-failure-during-deferred.test.ts +0 -73
- package/telegram-plugin/tests/progress-card-close-paths-converge.test.ts +0 -272
- package/telegram-plugin/tests/progress-card-cross-turn.test.ts +0 -258
- package/telegram-plugin/tests/progress-card-delay-842.test.ts +0 -160
- package/telegram-plugin/tests/progress-card-dispose-preservepending.test.ts +0 -81
- package/telegram-plugin/tests/progress-card-draft-flag.test.ts +0 -80
- package/telegram-plugin/tests/progress-card-driver-eviction.test.ts +0 -215
- package/telegram-plugin/tests/progress-card-driver-fleet-shadow.test.ts +0 -123
- package/telegram-plugin/tests/progress-card-driver-force-complete-parent-done.test.ts +0 -76
- package/telegram-plugin/tests/progress-card-edit-timestamps-budget.test.ts +0 -62
- package/telegram-plugin/tests/progress-card-memory-bounds.test.ts +0 -84
- package/telegram-plugin/tests/progress-card-pin-failure-paths.test.ts +0 -139
- package/telegram-plugin/tests/progress-card-pin-manager.test.ts +0 -773
- package/telegram-plugin/tests/progress-card-pin-race-fast-turn.test.ts +0 -66
- package/telegram-plugin/tests/progress-card-pin-sidecar-partial-write.test.ts +0 -64
- package/telegram-plugin/tests/progress-card-pin-watchdog.test.ts +0 -190
- package/telegram-plugin/tests/progress-card-sigterm-pin-flush.test.ts +0 -146
- package/telegram-plugin/tests/real-gateway-f1-ladder-integrity.test.ts +0 -123
- package/telegram-plugin/tests/real-gateway-f2-instant-draft.test.ts +0 -82
- package/telegram-plugin/tests/real-gateway-f3-late-card.test.ts +0 -114
- package/telegram-plugin/tests/real-gateway-harness.ts +0 -699
- package/telegram-plugin/tests/real-gateway-i6-turn-flush-replay-dedup.test.ts +0 -313
- package/telegram-plugin/tests/real-gateway-ipc-lifecycle.test.ts +0 -299
- package/telegram-plugin/tests/real-gateway-spec.test.ts +0 -487
- package/telegram-plugin/tests/real-gateway.smoke.test.ts +0 -101
- package/telegram-plugin/tests/setup-flow.test.ts +0 -510
- package/telegram-plugin/tests/setup-state.test.ts +0 -146
- package/telegram-plugin/tests/sync-chat-running-subagents.test.ts +0 -116
- package/telegram-plugin/tests/turn-end-regressions.test.ts +0 -489
- package/telegram-plugin/tests/turn-flush-card-takeover.test.ts +0 -218
- package/telegram-plugin/tests/turn-flush-prose-recovery.test.ts +0 -78
- package/telegram-plugin/tests/two-zone-bg-carry-full-lifecycle.test.ts +0 -131
- package/telegram-plugin/tests/two-zone-bg-detection.test.ts +0 -120
- package/telegram-plugin/tests/two-zone-bg-done-when-all-terminal.test.ts +0 -116
- package/telegram-plugin/tests/two-zone-bg-early-turn-end.test.ts +0 -87
- package/telegram-plugin/tests/two-zone-bg-survives-next-turn.test.ts +0 -211
- package/telegram-plugin/tests/two-zone-card-cap.test.ts +0 -62
- package/telegram-plugin/tests/two-zone-card-fleet-row.test.ts +0 -101
- package/telegram-plugin/tests/two-zone-card-header-phases.test.ts +0 -78
- package/telegram-plugin/tests/two-zone-card-html-balance.test.ts +0 -110
- package/telegram-plugin/tests/two-zone-card-lifecycle.test.ts +0 -128
- package/telegram-plugin/tests/two-zone-card-sanitise.test.ts +0 -58
- package/telegram-plugin/tests/two-zone-card-snapshot.test.ts +0 -133
- package/telegram-plugin/tests/two-zone-concurrent-turns-isolation.test.ts +0 -155
- package/telegram-plugin/tests/two-zone-phasefor-precedence.test.ts +0 -117
- package/telegram-plugin/tests/two-zone-snapshot-extras.test.ts +0 -187
- package/telegram-plugin/tests/two-zone-stuck-edit-throttle.test.ts +0 -149
- package/telegram-plugin/tests/two-zone-stuck-header-escalation.test.ts +0 -101
- package/telegram-plugin/tests/two-zone-stuck-per-member.test.ts +0 -114
- package/telegram-plugin/tests/two-zone-stuck-recovery.test.ts +0 -105
- package/telegram-plugin/tests/waiting-ux-harness.ts +0 -381
- package/telegram-plugin/tests/waiting-ux.e2e.test.ts +0 -233
- package/telegram-plugin/turn-flush-prose-recovery.ts +0 -40
- package/telegram-plugin/two-zone-card.ts +0 -269
- package/telegram-plugin/uat/scenarios/smoke-clerk-reply.test.ts +0 -61
|
@@ -1,487 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Waiting-UX v2 spec — RED tests pinning the new three-class contract.
|
|
3
|
-
*
|
|
4
|
-
* This is PR 1 of the #553 series. All `describe` blocks here are
|
|
5
|
-
* `describe.skip`'d on purpose — these tests author the contract for
|
|
6
|
-
* the rewrite, but the production fixes that turn them green land in
|
|
7
|
-
* subsequent PRs (2 through 5). Each block carries a `// TODO(#553-PR-N)`
|
|
8
|
-
* marker for which PR un-skips it.
|
|
9
|
-
*
|
|
10
|
-
* Spec contract — three turn classes, gated on tools and elapsed time:
|
|
11
|
-
*
|
|
12
|
-
* Class A — instant (<2s, NO tools):
|
|
13
|
-
* 👀 reaction → answer text. No placeholder. No progress card.
|
|
14
|
-
*
|
|
15
|
-
* Class B — short (2–60s, tools, NO sub-agents):
|
|
16
|
-
* 👀 → ladder reactions (🤔, 🔥, etc.) → answer text streams.
|
|
17
|
-
* No placeholder. No progress card.
|
|
18
|
-
*
|
|
19
|
-
* Class C — long-running (>60s OR sub-agents/background workers):
|
|
20
|
-
* 👀 → ladder → progress card appears once
|
|
21
|
-
* `(elapsed >= 60s) OR (any sub-agent has appeared)`. Card stays
|
|
22
|
-
* pinned-feel until ALL work terminal.
|
|
23
|
-
*
|
|
24
|
-
* Key invariants:
|
|
25
|
-
* - A "background worker" ≡ a sub-agent dispatched with
|
|
26
|
-
* `Agent({ run_in_background: true })` — there is no separate concept.
|
|
27
|
-
* - The card is gated on `(elapsed >= 60s) OR (any sub-agent appeared)`.
|
|
28
|
-
* Tool-use alone NEVER triggers the card.
|
|
29
|
-
* - The placeholder strings (`🔵 thinking`, `📚 recalling memories`,
|
|
30
|
-
* `💭 thinking`) are removed entirely in PR 5 — none should appear
|
|
31
|
-
* in any payload, ever.
|
|
32
|
-
* - First-answer-text deadline: <800ms for Class A, TBD by PR 3 for
|
|
33
|
-
* Class B/C.
|
|
34
|
-
* - Sub-agent header count must equal rendered-list-length (no drift).
|
|
35
|
-
*
|
|
36
|
-
* RED-state intent: each `it(...)` is authored so that, when un-skipped
|
|
37
|
-
* against current main, it FAILS. That failure is the bug. PRs 2–5
|
|
38
|
-
* make the failure go away.
|
|
39
|
-
*
|
|
40
|
-
* PR 2 — kill instant-draft placeholder + early 👀 path
|
|
41
|
-
* → un-skips Class A and the ladder/no-placeholder bits of B
|
|
42
|
-
* PR 3 — first-answer-text deadline (Class B/C TBD value)
|
|
43
|
-
* → un-skips the answer-text-deadline assertions
|
|
44
|
-
* PR 4 — card-gate rewrite: `(>=60s) OR (sub-agent appeared)`
|
|
45
|
-
* → un-skips Class C card-gate tests + Class B "no card" test
|
|
46
|
-
* PR 5 — remove placeholder strings entirely + sub-agent header
|
|
47
|
-
* count = list length
|
|
48
|
-
* → un-skips the "no placeholder" assertions repo-wide and
|
|
49
|
-
* the sub-agent count = list length test
|
|
50
|
-
*
|
|
51
|
-
* Tracking: #553 (parent series), waiting-ux-spec.md (contract source).
|
|
52
|
-
*/
|
|
53
|
-
|
|
54
|
-
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
|
|
55
|
-
import { createRealGatewayHarness } from './real-gateway-harness.js'
|
|
56
|
-
|
|
57
|
-
const CHAT = '8248703757'
|
|
58
|
-
const INBOUND_MSG = 100
|
|
59
|
-
|
|
60
|
-
// First-answer-text deadlines per spec. Class A is pinned at 1500ms
|
|
61
|
-
// (covers the 800ms 👀 deadline + token-stream first chunk). Class
|
|
62
|
-
// B/C is pinned at 3000ms in #553 PR 3 — budget = 500ms inbound
|
|
63
|
-
// coalesce + ~1s minInitialChars-driven first send + ~1.5s model
|
|
64
|
-
// TTFT for short replies.
|
|
65
|
-
const CLASS_A_ANSWER_TEXT_DEADLINE_MS = 1500
|
|
66
|
-
const CLASS_BC_ANSWER_TEXT_DEADLINE_MS = 3_000
|
|
67
|
-
|
|
68
|
-
beforeEach(() => { vi.useFakeTimers() })
|
|
69
|
-
afterEach(() => { vi.useRealTimers() })
|
|
70
|
-
|
|
71
|
-
// ─── PR 3 — first-answer-text deadlines (Class A & B) ─────────────────────
|
|
72
|
-
//
|
|
73
|
-
// These two tests are extracted from the Class A / Class B describe.skip
|
|
74
|
-
// blocks below and un-skipped in #553 PR 3. The other tests in those
|
|
75
|
-
// blocks (no-placeholder, no-card, ladder integrity) remain skipped
|
|
76
|
-
// pending PRs 4 & 5. Once those land, the duplicates here can be
|
|
77
|
-
// folded back into the parent describes.
|
|
78
|
-
describe('v2 spec — PR 3: first-answer-text deadlines', () => {
|
|
79
|
-
it(`Class A — first answer text lands within ${CLASS_A_ANSWER_TEXT_DEADLINE_MS}ms of inbound`, async () => {
|
|
80
|
-
const h = createRealGatewayHarness({ gapMs: 0 })
|
|
81
|
-
const inboundAt = h.clock.now()
|
|
82
|
-
h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'hi' })
|
|
83
|
-
h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'hi' })
|
|
84
|
-
await h.clock.advance(200)
|
|
85
|
-
h.feedSessionEvent({ kind: 'thinking' })
|
|
86
|
-
await h.clock.advance(300)
|
|
87
|
-
await h.streamReply({ chat_id: CHAT, text: 'hello back', done: true })
|
|
88
|
-
await h.clock.advance(50)
|
|
89
|
-
|
|
90
|
-
const answerAt = h.firstAnswerTextMs(CHAT)
|
|
91
|
-
expect(answerAt, 'no answer text recorded').not.toBeNull()
|
|
92
|
-
expect((answerAt ?? Infinity) - inboundAt).toBeLessThan(CLASS_A_ANSWER_TEXT_DEADLINE_MS)
|
|
93
|
-
h.feedSessionEvent({ kind: 'turn_end', durationMs: 600 })
|
|
94
|
-
await h.clock.advance(500)
|
|
95
|
-
h.finalize()
|
|
96
|
-
})
|
|
97
|
-
|
|
98
|
-
it(`Class B — first answer text lands within ${CLASS_BC_ANSWER_TEXT_DEADLINE_MS}ms of inbound`, async () => {
|
|
99
|
-
// Use the production default gapMs (500ms after PR 3) so the
|
|
100
|
-
// deadline reflects what real users see, not a coalesce-disabled
|
|
101
|
-
// best-case.
|
|
102
|
-
const h = createRealGatewayHarness({ gapMs: 500 })
|
|
103
|
-
const inboundAt = h.clock.now()
|
|
104
|
-
h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'short tool' })
|
|
105
|
-
// Coalesce window flush.
|
|
106
|
-
await h.clock.advance(500)
|
|
107
|
-
h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'short tool' })
|
|
108
|
-
await h.clock.advance(200)
|
|
109
|
-
h.feedSessionEvent({ kind: 'thinking' })
|
|
110
|
-
await h.clock.advance(300)
|
|
111
|
-
h.feedSessionEvent({ kind: 'tool_use', toolName: 'Bash', toolUseId: 't1' })
|
|
112
|
-
await h.clock.advance(1_000)
|
|
113
|
-
h.feedSessionEvent({ kind: 'tool_result', toolUseId: 't1', toolName: 'Bash' })
|
|
114
|
-
// Answer text begins streaming as soon as the model resumes.
|
|
115
|
-
await h.streamReply({ chat_id: CHAT, text: 'partial...', done: false })
|
|
116
|
-
await h.clock.advance(50)
|
|
117
|
-
|
|
118
|
-
const answerAt = h.firstAnswerTextMs(CHAT)
|
|
119
|
-
expect(answerAt, 'no answer text recorded').not.toBeNull()
|
|
120
|
-
expect((answerAt ?? Infinity) - inboundAt).toBeLessThan(CLASS_BC_ANSWER_TEXT_DEADLINE_MS)
|
|
121
|
-
|
|
122
|
-
await h.streamReply({ chat_id: CHAT, text: 'partial... done', done: true })
|
|
123
|
-
h.feedSessionEvent({ kind: 'turn_end', durationMs: 3_000 })
|
|
124
|
-
await h.clock.advance(500)
|
|
125
|
-
h.finalize()
|
|
126
|
-
})
|
|
127
|
-
})
|
|
128
|
-
|
|
129
|
-
// ─── PR 4 — card-gate rewrite (Class B no-card; Class C card-gate) ────────
|
|
130
|
-
//
|
|
131
|
-
// Extracted from the Class B / Class C describe.skip blocks below and
|
|
132
|
-
// un-skipped in #553 PR 4. The other tests in those blocks (no-placeholder,
|
|
133
|
-
// ladder integrity, sub-agent header count) remain skipped pending PR 5.
|
|
134
|
-
// Once PR 5 lands, the duplicates here can be folded back into the parent
|
|
135
|
-
// describes.
|
|
136
|
-
describe('v2 spec — PR 4: card gate (>=60s) OR (sub-agent appeared)', () => {
|
|
137
|
-
it('Class B — emits NO progress card (turn under 60s, no sub-agents)', async () => {
|
|
138
|
-
const h = createRealGatewayHarness({ gapMs: 0 })
|
|
139
|
-
h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'short tool turn' })
|
|
140
|
-
h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'short tool turn' })
|
|
141
|
-
await h.clock.advance(200)
|
|
142
|
-
h.feedSessionEvent({ kind: 'thinking' })
|
|
143
|
-
await h.clock.advance(300)
|
|
144
|
-
// Two tools, total turn ~10s — well under 60s, no sub-agents.
|
|
145
|
-
h.feedSessionEvent({ kind: 'tool_use', toolName: 'Read', toolUseId: 't1' })
|
|
146
|
-
await h.clock.advance(3_000)
|
|
147
|
-
h.feedSessionEvent({ kind: 'tool_result', toolUseId: 't1', toolName: 'Read' })
|
|
148
|
-
h.feedSessionEvent({ kind: 'tool_use', toolName: 'Bash', toolUseId: 't2' })
|
|
149
|
-
await h.clock.advance(5_000)
|
|
150
|
-
h.feedSessionEvent({ kind: 'tool_result', toolUseId: 't2', toolName: 'Bash' })
|
|
151
|
-
await h.streamReply({ chat_id: CHAT, text: 'done', done: true })
|
|
152
|
-
h.feedSessionEvent({ kind: 'turn_end', durationMs: 9_000 })
|
|
153
|
-
await h.clock.advance(500)
|
|
154
|
-
|
|
155
|
-
expect(h.expectNoCardSent(CHAT)).toBeNull()
|
|
156
|
-
h.finalize()
|
|
157
|
-
})
|
|
158
|
-
|
|
159
|
-
it('Class C — progress card appears when a sub-agent dispatches (regardless of elapsed time)', async () => {
|
|
160
|
-
const h = createRealGatewayHarness({ gapMs: 0 })
|
|
161
|
-
h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'spawn a worker' })
|
|
162
|
-
h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'spawn a worker' })
|
|
163
|
-
await h.clock.advance(200)
|
|
164
|
-
h.feedSessionEvent({ kind: 'thinking' })
|
|
165
|
-
await h.clock.advance(300)
|
|
166
|
-
// Sub-agent appears well under the 60s elapsed threshold — the
|
|
167
|
-
// card MUST still render because of the sub-agent gate.
|
|
168
|
-
h.feedSessionEvent({ kind: 'sub_agent_started', agentId: 'a1', firstPromptText: 'do work' })
|
|
169
|
-
await h.clock.advance(2_000)
|
|
170
|
-
h.feedSessionEvent({ kind: 'sub_agent_turn_end', agentId: 'a1' })
|
|
171
|
-
await h.clock.advance(500)
|
|
172
|
-
|
|
173
|
-
expect(h.expectNoCardSent(CHAT), 'card MUST render when a sub-agent dispatches').not.toBeNull()
|
|
174
|
-
|
|
175
|
-
await h.streamReply({ chat_id: CHAT, text: 'done', done: true })
|
|
176
|
-
h.feedSessionEvent({ kind: 'turn_end', durationMs: 3_000 })
|
|
177
|
-
await h.clock.advance(500)
|
|
178
|
-
h.finalize()
|
|
179
|
-
})
|
|
180
|
-
|
|
181
|
-
it('Class C — progress card appears when elapsed >= 60s even without a sub-agent', async () => {
|
|
182
|
-
const h = createRealGatewayHarness({ gapMs: 0 })
|
|
183
|
-
h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'long single tool' })
|
|
184
|
-
h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'long single tool' })
|
|
185
|
-
await h.clock.advance(200)
|
|
186
|
-
h.feedSessionEvent({ kind: 'thinking' })
|
|
187
|
-
await h.clock.advance(300)
|
|
188
|
-
h.feedSessionEvent({ kind: 'tool_use', toolName: 'Bash', toolUseId: 't1' })
|
|
189
|
-
// Cross the 60s threshold.
|
|
190
|
-
await h.clock.advance(61_000)
|
|
191
|
-
h.feedSessionEvent({ kind: 'tool_result', toolUseId: 't1', toolName: 'Bash' })
|
|
192
|
-
await h.clock.advance(500)
|
|
193
|
-
|
|
194
|
-
expect(h.expectNoCardSent(CHAT), 'card MUST render after 60s elapsed').not.toBeNull()
|
|
195
|
-
|
|
196
|
-
await h.streamReply({ chat_id: CHAT, text: 'done', done: true })
|
|
197
|
-
h.feedSessionEvent({ kind: 'turn_end', durationMs: 62_000 })
|
|
198
|
-
await h.clock.advance(500)
|
|
199
|
-
h.finalize()
|
|
200
|
-
})
|
|
201
|
-
})
|
|
202
|
-
|
|
203
|
-
// ─── Class A — instant (<2s, NO tools) ───────────────────────────────────
|
|
204
|
-
//
|
|
205
|
-
// Un-skipped in #553 PR 5 — the no-placeholder assertions go green
|
|
206
|
-
// after PR 5 deletes the production code that emitted placeholder
|
|
207
|
-
// strings.
|
|
208
|
-
describe('v2 spec — Class A (instant, <2s, no tools)', () => {
|
|
209
|
-
it('emits NO placeholder text edits at any point', async () => {
|
|
210
|
-
const h = createRealGatewayHarness({ gapMs: 0 })
|
|
211
|
-
h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'hi' })
|
|
212
|
-
h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'hi' })
|
|
213
|
-
await h.clock.advance(200)
|
|
214
|
-
h.feedSessionEvent({ kind: 'thinking' })
|
|
215
|
-
await h.clock.advance(300)
|
|
216
|
-
await h.streamReply({ chat_id: CHAT, text: 'hello back', done: true })
|
|
217
|
-
h.feedSessionEvent({ kind: 'turn_end', durationMs: 600 })
|
|
218
|
-
await h.clock.advance(500)
|
|
219
|
-
|
|
220
|
-
expect(h.expectNoPlaceholderEdits(CHAT)).toEqual([])
|
|
221
|
-
h.finalize()
|
|
222
|
-
})
|
|
223
|
-
|
|
224
|
-
it('emits NO progress card', async () => {
|
|
225
|
-
const h = createRealGatewayHarness({ gapMs: 0 })
|
|
226
|
-
h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'hi' })
|
|
227
|
-
h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'hi' })
|
|
228
|
-
await h.clock.advance(200)
|
|
229
|
-
h.feedSessionEvent({ kind: 'thinking' })
|
|
230
|
-
await h.clock.advance(300)
|
|
231
|
-
await h.streamReply({ chat_id: CHAT, text: 'hello back', done: true })
|
|
232
|
-
h.feedSessionEvent({ kind: 'turn_end', durationMs: 600 })
|
|
233
|
-
await h.clock.advance(500)
|
|
234
|
-
|
|
235
|
-
expect(h.expectNoCardSent(CHAT)).toBeNull()
|
|
236
|
-
h.finalize()
|
|
237
|
-
})
|
|
238
|
-
|
|
239
|
-
it('👀 reaction lands within 800ms of inbound', async () => {
|
|
240
|
-
const h = createRealGatewayHarness({ gapMs: 1500 })
|
|
241
|
-
const inboundAt = h.clock.now()
|
|
242
|
-
h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'hi' })
|
|
243
|
-
await h.clock.advance(800)
|
|
244
|
-
|
|
245
|
-
const firstReactionMs = h.recorder.firstReactionMs(CHAT)
|
|
246
|
-
expect(firstReactionMs).not.toBeNull()
|
|
247
|
-
expect((firstReactionMs ?? Infinity) - inboundAt).toBeLessThan(800)
|
|
248
|
-
h.finalize()
|
|
249
|
-
})
|
|
250
|
-
|
|
251
|
-
it(`first answer text lands within ${CLASS_A_ANSWER_TEXT_DEADLINE_MS}ms of inbound`, async () => {
|
|
252
|
-
const h = createRealGatewayHarness({ gapMs: 0 })
|
|
253
|
-
const inboundAt = h.clock.now()
|
|
254
|
-
h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'hi' })
|
|
255
|
-
h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'hi' })
|
|
256
|
-
await h.clock.advance(200)
|
|
257
|
-
h.feedSessionEvent({ kind: 'thinking' })
|
|
258
|
-
await h.clock.advance(300)
|
|
259
|
-
await h.streamReply({ chat_id: CHAT, text: 'hello back', done: true })
|
|
260
|
-
await h.clock.advance(50)
|
|
261
|
-
|
|
262
|
-
const answerAt = h.firstAnswerTextMs(CHAT)
|
|
263
|
-
expect(answerAt, 'no answer text recorded').not.toBeNull()
|
|
264
|
-
expect((answerAt ?? Infinity) - inboundAt).toBeLessThan(CLASS_A_ANSWER_TEXT_DEADLINE_MS)
|
|
265
|
-
h.feedSessionEvent({ kind: 'turn_end', durationMs: 600 })
|
|
266
|
-
await h.clock.advance(500)
|
|
267
|
-
h.finalize()
|
|
268
|
-
})
|
|
269
|
-
|
|
270
|
-
it('emits NO `sendMessageDraft`-style placeholder draft sends', async () => {
|
|
271
|
-
// Currently the production "instant draft" flow can `sendMessage`
|
|
272
|
-
// a placeholder body that gets edited later. The v2 contract
|
|
273
|
-
// bans that — the first sendMessage to the user MUST be real
|
|
274
|
-
// answer text. We assert this by re-using the placeholder
|
|
275
|
-
// helper: any placeholder sendMessage is a draft send.
|
|
276
|
-
const h = createRealGatewayHarness({ gapMs: 0 })
|
|
277
|
-
h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'hi' })
|
|
278
|
-
h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'hi' })
|
|
279
|
-
await h.clock.advance(200)
|
|
280
|
-
h.feedSessionEvent({ kind: 'thinking' })
|
|
281
|
-
await h.clock.advance(300)
|
|
282
|
-
await h.streamReply({ chat_id: CHAT, text: 'hello back', done: true })
|
|
283
|
-
h.feedSessionEvent({ kind: 'turn_end', durationMs: 600 })
|
|
284
|
-
await h.clock.advance(500)
|
|
285
|
-
|
|
286
|
-
const draftSends = h
|
|
287
|
-
.expectNoPlaceholderEdits(CHAT)
|
|
288
|
-
.filter((c) => c.kind === 'sendMessage')
|
|
289
|
-
expect(draftSends).toEqual([])
|
|
290
|
-
h.finalize()
|
|
291
|
-
})
|
|
292
|
-
})
|
|
293
|
-
|
|
294
|
-
// ─── Class B — short (2–60s, tools, no sub-agents) ───────────────────────
|
|
295
|
-
//
|
|
296
|
-
// Un-skipped in #553 PR 5. The no-placeholder + ladder-integrity
|
|
297
|
-
// assertions go green once the placeholder code is deleted (PR 5);
|
|
298
|
-
// no-card and answer-text-deadline assertions were already covered
|
|
299
|
-
// by the PR-3 / PR-4 describe blocks above.
|
|
300
|
-
describe('v2 spec — Class B (short, 2–60s, tools, no sub-agents)', () => {
|
|
301
|
-
it('emits NO placeholder text edits', async () => {
|
|
302
|
-
const h = createRealGatewayHarness({ gapMs: 0 })
|
|
303
|
-
h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'do a thing' })
|
|
304
|
-
h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'do a thing' })
|
|
305
|
-
await h.clock.advance(200)
|
|
306
|
-
h.feedSessionEvent({ kind: 'thinking' })
|
|
307
|
-
await h.clock.advance(300)
|
|
308
|
-
h.feedSessionEvent({ kind: 'tool_use', toolName: 'Bash', toolUseId: 't1' })
|
|
309
|
-
await h.clock.advance(3_000)
|
|
310
|
-
h.feedSessionEvent({ kind: 'tool_result', toolUseId: 't1', toolName: 'Bash' })
|
|
311
|
-
await h.streamReply({ chat_id: CHAT, text: 'all done', done: true })
|
|
312
|
-
h.feedSessionEvent({ kind: 'turn_end', durationMs: 4_000 })
|
|
313
|
-
await h.clock.advance(500)
|
|
314
|
-
|
|
315
|
-
expect(h.expectNoPlaceholderEdits(CHAT)).toEqual([])
|
|
316
|
-
h.finalize()
|
|
317
|
-
})
|
|
318
|
-
|
|
319
|
-
it('emits NO progress card (turn under 60s, no sub-agents)', async () => {
|
|
320
|
-
const h = createRealGatewayHarness({ gapMs: 0 })
|
|
321
|
-
h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'short tool turn' })
|
|
322
|
-
h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'short tool turn' })
|
|
323
|
-
await h.clock.advance(200)
|
|
324
|
-
h.feedSessionEvent({ kind: 'thinking' })
|
|
325
|
-
await h.clock.advance(300)
|
|
326
|
-
// Two tools, total turn ~10s — well under 60s, no sub-agents.
|
|
327
|
-
h.feedSessionEvent({ kind: 'tool_use', toolName: 'Read', toolUseId: 't1' })
|
|
328
|
-
await h.clock.advance(3_000)
|
|
329
|
-
h.feedSessionEvent({ kind: 'tool_result', toolUseId: 't1', toolName: 'Read' })
|
|
330
|
-
h.feedSessionEvent({ kind: 'tool_use', toolName: 'Bash', toolUseId: 't2' })
|
|
331
|
-
await h.clock.advance(5_000)
|
|
332
|
-
h.feedSessionEvent({ kind: 'tool_result', toolUseId: 't2', toolName: 'Bash' })
|
|
333
|
-
await h.streamReply({ chat_id: CHAT, text: 'done', done: true })
|
|
334
|
-
h.feedSessionEvent({ kind: 'turn_end', durationMs: 9_000 })
|
|
335
|
-
await h.clock.advance(500)
|
|
336
|
-
|
|
337
|
-
expect(h.expectNoCardSent(CHAT)).toBeNull()
|
|
338
|
-
h.finalize()
|
|
339
|
-
})
|
|
340
|
-
|
|
341
|
-
it('ladder integrity: 👀 → at least one tool reaction → 👍 (no straight-to-👍 collapse)', async () => {
|
|
342
|
-
const h = createRealGatewayHarness({ gapMs: 0 })
|
|
343
|
-
h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'ladder' })
|
|
344
|
-
h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'ladder' })
|
|
345
|
-
await h.clock.advance(200)
|
|
346
|
-
h.feedSessionEvent({ kind: 'thinking' })
|
|
347
|
-
await h.clock.advance(300)
|
|
348
|
-
h.feedSessionEvent({ kind: 'tool_use', toolName: 'Bash', toolUseId: 't1' })
|
|
349
|
-
await h.clock.advance(3_000)
|
|
350
|
-
h.feedSessionEvent({ kind: 'tool_result', toolUseId: 't1', toolName: 'Bash' })
|
|
351
|
-
await h.streamReply({ chat_id: CHAT, text: 'done', done: true })
|
|
352
|
-
h.feedSessionEvent({ kind: 'turn_end', durationMs: 4_000 })
|
|
353
|
-
await h.clock.advance(1_500)
|
|
354
|
-
|
|
355
|
-
const seq = h.recorder.reactionSequence()
|
|
356
|
-
// Dedupe consecutive duplicates (early-ack + setQueued both emit 👀).
|
|
357
|
-
const ladder: string[] = []
|
|
358
|
-
for (const e of seq) if (ladder[ladder.length - 1] !== e) ladder.push(e)
|
|
359
|
-
expect(ladder[0]).toBe('👀')
|
|
360
|
-
expect(ladder[ladder.length - 1]).toBe('👍')
|
|
361
|
-
expect(ladder.length).toBeGreaterThanOrEqual(3)
|
|
362
|
-
h.finalize()
|
|
363
|
-
})
|
|
364
|
-
|
|
365
|
-
it(`first answer text lands within ${CLASS_BC_ANSWER_TEXT_DEADLINE_MS}ms of inbound`, async () => {
|
|
366
|
-
const h = createRealGatewayHarness({ gapMs: 0 })
|
|
367
|
-
const inboundAt = h.clock.now()
|
|
368
|
-
h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'short tool' })
|
|
369
|
-
h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'short tool' })
|
|
370
|
-
await h.clock.advance(200)
|
|
371
|
-
h.feedSessionEvent({ kind: 'thinking' })
|
|
372
|
-
await h.clock.advance(300)
|
|
373
|
-
h.feedSessionEvent({ kind: 'tool_use', toolName: 'Bash', toolUseId: 't1' })
|
|
374
|
-
await h.clock.advance(2_000)
|
|
375
|
-
h.feedSessionEvent({ kind: 'tool_result', toolUseId: 't1', toolName: 'Bash' })
|
|
376
|
-
// Answer text begins streaming as soon as the model resumes — pin
|
|
377
|
-
// the deadline to the spec value (TBD: PR 3 may tighten).
|
|
378
|
-
await h.streamReply({ chat_id: CHAT, text: 'partial...', done: false })
|
|
379
|
-
await h.clock.advance(50)
|
|
380
|
-
|
|
381
|
-
const answerAt = h.firstAnswerTextMs(CHAT)
|
|
382
|
-
expect(answerAt, 'no answer text recorded').not.toBeNull()
|
|
383
|
-
expect((answerAt ?? Infinity) - inboundAt).toBeLessThan(CLASS_BC_ANSWER_TEXT_DEADLINE_MS)
|
|
384
|
-
|
|
385
|
-
await h.streamReply({ chat_id: CHAT, text: 'partial... done', done: true })
|
|
386
|
-
h.feedSessionEvent({ kind: 'turn_end', durationMs: 3_000 })
|
|
387
|
-
await h.clock.advance(500)
|
|
388
|
-
h.finalize()
|
|
389
|
-
})
|
|
390
|
-
})
|
|
391
|
-
|
|
392
|
-
// ─── Class C — long-running (>60s OR sub-agents/background workers) ───────
|
|
393
|
-
//
|
|
394
|
-
// Un-skipped in #553 PR 5. No-placeholder assertions go green via the
|
|
395
|
-
// PR-5 deletion of placeholder code; sub-agent header == list length
|
|
396
|
-
// goes green via the PR-2 sub-agent count fix (#580).
|
|
397
|
-
describe('v2 spec — Class C (long-running OR sub-agents)', () => {
|
|
398
|
-
it('progress card appears when a sub-agent dispatches (regardless of elapsed time)', async () => {
|
|
399
|
-
const h = createRealGatewayHarness({ gapMs: 0 })
|
|
400
|
-
h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'spawn a worker' })
|
|
401
|
-
h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'spawn a worker' })
|
|
402
|
-
await h.clock.advance(200)
|
|
403
|
-
h.feedSessionEvent({ kind: 'thinking' })
|
|
404
|
-
await h.clock.advance(300)
|
|
405
|
-
// Sub-agent appears well under the 60s elapsed threshold — the
|
|
406
|
-
// card MUST still render because of the sub-agent gate.
|
|
407
|
-
h.feedSessionEvent({ kind: 'sub_agent_started', agentId: 'a1', firstPromptText: 'do work' })
|
|
408
|
-
await h.clock.advance(2_000)
|
|
409
|
-
h.feedSessionEvent({ kind: 'sub_agent_turn_end', agentId: 'a1' })
|
|
410
|
-
await h.clock.advance(500)
|
|
411
|
-
|
|
412
|
-
expect(h.expectNoCardSent(CHAT), 'card MUST render when a sub-agent dispatches').not.toBeNull()
|
|
413
|
-
|
|
414
|
-
await h.streamReply({ chat_id: CHAT, text: 'done', done: true })
|
|
415
|
-
h.feedSessionEvent({ kind: 'turn_end', durationMs: 3_000 })
|
|
416
|
-
await h.clock.advance(500)
|
|
417
|
-
h.finalize()
|
|
418
|
-
})
|
|
419
|
-
|
|
420
|
-
it('progress card appears when elapsed >= 60s even without a sub-agent', async () => {
|
|
421
|
-
const h = createRealGatewayHarness({ gapMs: 0 })
|
|
422
|
-
h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'long single tool' })
|
|
423
|
-
h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'long single tool' })
|
|
424
|
-
await h.clock.advance(200)
|
|
425
|
-
h.feedSessionEvent({ kind: 'thinking' })
|
|
426
|
-
await h.clock.advance(300)
|
|
427
|
-
h.feedSessionEvent({ kind: 'tool_use', toolName: 'Bash', toolUseId: 't1' })
|
|
428
|
-
// Cross the 60s threshold.
|
|
429
|
-
await h.clock.advance(61_000)
|
|
430
|
-
h.feedSessionEvent({ kind: 'tool_result', toolUseId: 't1', toolName: 'Bash' })
|
|
431
|
-
await h.clock.advance(500)
|
|
432
|
-
|
|
433
|
-
expect(h.expectNoCardSent(CHAT), 'card MUST render after 60s elapsed').not.toBeNull()
|
|
434
|
-
|
|
435
|
-
await h.streamReply({ chat_id: CHAT, text: 'done', done: true })
|
|
436
|
-
h.feedSessionEvent({ kind: 'turn_end', durationMs: 62_000 })
|
|
437
|
-
await h.clock.advance(500)
|
|
438
|
-
h.finalize()
|
|
439
|
-
})
|
|
440
|
-
|
|
441
|
-
it('card stays pinned-feel: not marked Done while any sub-agent is in flight', async () => {
|
|
442
|
-
const h = createRealGatewayHarness({ gapMs: 0 })
|
|
443
|
-
h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'fanout' })
|
|
444
|
-
h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'fanout' })
|
|
445
|
-
await h.clock.advance(200)
|
|
446
|
-
h.feedSessionEvent({ kind: 'thinking' })
|
|
447
|
-
await h.clock.advance(300)
|
|
448
|
-
h.feedSessionEvent({ kind: 'sub_agent_started', agentId: 'a1', firstPromptText: 'first' })
|
|
449
|
-
h.feedSessionEvent({ kind: 'sub_agent_started', agentId: 'a2', firstPromptText: 'second' })
|
|
450
|
-
await h.clock.advance(2_000)
|
|
451
|
-
h.feedSessionEvent({ kind: 'sub_agent_turn_end', agentId: 'a1' })
|
|
452
|
-
// a2 still in flight — the card must NOT show Done yet, even though
|
|
453
|
-
// the parent turn could complete.
|
|
454
|
-
await h.clock.advance(500)
|
|
455
|
-
const editsBeforeA2Done = h.recorder.edits(CHAT).map((e) => e.payload ?? '')
|
|
456
|
-
const sawPrematureDone = editsBeforeA2Done.some((p) => /done/i.test(p) && !/working/i.test(p))
|
|
457
|
-
expect(sawPrematureDone, 'card marked Done while a sub-agent was still running').toBe(false)
|
|
458
|
-
|
|
459
|
-
h.feedSessionEvent({ kind: 'sub_agent_turn_end', agentId: 'a2' })
|
|
460
|
-
await h.streamReply({ chat_id: CHAT, text: 'all done', done: true })
|
|
461
|
-
h.feedSessionEvent({ kind: 'turn_end', durationMs: 3_000 })
|
|
462
|
-
await h.clock.advance(500)
|
|
463
|
-
h.finalize()
|
|
464
|
-
})
|
|
465
|
-
|
|
466
|
-
it('emits NO placeholder text edits across the full turn', async () => {
|
|
467
|
-
const h = createRealGatewayHarness({ gapMs: 0 })
|
|
468
|
-
h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'long with workers' })
|
|
469
|
-
h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'long with workers' })
|
|
470
|
-
await h.clock.advance(200)
|
|
471
|
-
h.feedSessionEvent({ kind: 'thinking' })
|
|
472
|
-
await h.clock.advance(300)
|
|
473
|
-
h.feedSessionEvent({ kind: 'sub_agent_started', agentId: 'a1', firstPromptText: 'work' })
|
|
474
|
-
await h.clock.advance(2_000)
|
|
475
|
-
h.feedSessionEvent({ kind: 'sub_agent_turn_end', agentId: 'a1' })
|
|
476
|
-
await h.streamReply({ chat_id: CHAT, text: 'done', done: true })
|
|
477
|
-
h.feedSessionEvent({ kind: 'turn_end', durationMs: 3_000 })
|
|
478
|
-
await h.clock.advance(500)
|
|
479
|
-
|
|
480
|
-
expect(h.expectNoPlaceholderEdits(CHAT)).toEqual([])
|
|
481
|
-
h.finalize()
|
|
482
|
-
})
|
|
483
|
-
|
|
484
|
-
// P4 cutover (#662): legacy "<blockquote expandable>" row-count test deleted.
|
|
485
|
-
// The two-zone v2 renderer's row-count invariant is covered by
|
|
486
|
-
// tests/two-zone-card-cap.test.ts and tests/two-zone-card-snapshot.test.ts.
|
|
487
|
-
})
|
|
@@ -1,101 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Real-gateway harness — smoke tests.
|
|
3
|
-
*
|
|
4
|
-
* Pin the wiring of `real-gateway-harness.ts` works end-to-end before
|
|
5
|
-
* the F1–F4 tests build on it. These tests assert behaviour the harness
|
|
6
|
-
* MUST exhibit for the F-tests to be meaningful:
|
|
7
|
-
*
|
|
8
|
-
* 1. inbound() routes through the real coalescer (👀 fires only after
|
|
9
|
-
* the gap window, not synchronously).
|
|
10
|
-
* 2. gapMs=0 bypasses the buffer (👀 fires immediately).
|
|
11
|
-
* 3. Multiple inbounds within the gap merge into a single flush.
|
|
12
|
-
* 4. Controller + driver still work for session-event feeds (Phase 1
|
|
13
|
-
* contract still holds).
|
|
14
|
-
*
|
|
15
|
-
* Same fake-timers + recorder pattern as `waiting-ux.e2e.test.ts`.
|
|
16
|
-
*/
|
|
17
|
-
|
|
18
|
-
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
|
|
19
|
-
import { createRealGatewayHarness } from './real-gateway-harness.js'
|
|
20
|
-
|
|
21
|
-
const CHAT = '8248703757'
|
|
22
|
-
const INBOUND_MSG = 100
|
|
23
|
-
|
|
24
|
-
beforeEach(() => { vi.useFakeTimers() })
|
|
25
|
-
afterEach(() => { vi.useRealTimers() })
|
|
26
|
-
|
|
27
|
-
describe('real-gateway harness — smoke', () => {
|
|
28
|
-
it('inbound() fires 👀 immediately on raw arrival (F2 early-ack), even with coalesce wait pending', async () => {
|
|
29
|
-
const h = createRealGatewayHarness({ gapMs: 1500 })
|
|
30
|
-
h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'hi' })
|
|
31
|
-
// Microtask flush only — no real time has passed beyond the void
|
|
32
|
-
// setMessageReaction Promise resolving on the next microtask.
|
|
33
|
-
await h.clock.advance(0)
|
|
34
|
-
expect(h.recorder.firstReactionMs(CHAT)).not.toBeNull()
|
|
35
|
-
expect(h.recorder.reactionSequence()[0]).toBe('👀')
|
|
36
|
-
// Coalesce buffer still holds the message — only the reaction fired
|
|
37
|
-
// early; the actual handleInbound dispatch waits for the gap.
|
|
38
|
-
expect(h.coalesceBufferSize()).toBe(1)
|
|
39
|
-
h.finalize()
|
|
40
|
-
})
|
|
41
|
-
|
|
42
|
-
it('after gapMs elapses, the flush fires controller.setQueued (Telegram dedupes the duplicate 👀)', async () => {
|
|
43
|
-
const h = createRealGatewayHarness({ gapMs: 1500 })
|
|
44
|
-
h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'hi' })
|
|
45
|
-
await h.clock.advance(1500)
|
|
46
|
-
expect(h.recorder.firstReactionMs(CHAT)).not.toBeNull()
|
|
47
|
-
// Reaction sequence carries TWO 👀: the early-ack + the controller's
|
|
48
|
-
// post-flush setQueued(). Real Telegram dedupes (same emoji = no
|
|
49
|
-
// visible change). Tests asserting ladder integrity should dedupe
|
|
50
|
-
// consecutive duplicates before checking the sequence.
|
|
51
|
-
expect(h.recorder.reactionSequence()[0]).toBe('👀')
|
|
52
|
-
expect(h.coalesceBufferSize()).toBe(0)
|
|
53
|
-
h.finalize()
|
|
54
|
-
})
|
|
55
|
-
|
|
56
|
-
it('gapMs=0 bypasses the buffer (👀 fires immediately on first paint)', async () => {
|
|
57
|
-
const h = createRealGatewayHarness({ gapMs: 0 })
|
|
58
|
-
h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'hi' })
|
|
59
|
-
await h.clock.advance(0)
|
|
60
|
-
expect(h.recorder.firstReactionMs(CHAT)).not.toBeNull()
|
|
61
|
-
expect(h.coalesceBufferSize()).toBe(0)
|
|
62
|
-
h.finalize()
|
|
63
|
-
})
|
|
64
|
-
|
|
65
|
-
it('multiple inbounds within the gap window merge into one flush (sliding timer resets)', async () => {
|
|
66
|
-
const h = createRealGatewayHarness({ gapMs: 1500 })
|
|
67
|
-
h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'one' })
|
|
68
|
-
await h.clock.advance(1000)
|
|
69
|
-
h.inbound({ chatId: CHAT, messageId: INBOUND_MSG + 1, text: 'two' })
|
|
70
|
-
// First inbound's early-ack already fired 👀 by here — that's the F2 win.
|
|
71
|
-
expect(h.recorder.firstReactionMs(CHAT)).not.toBeNull()
|
|
72
|
-
await h.clock.advance(1000) // 1s after 'two' — still buffered
|
|
73
|
-
expect(h.coalesceBufferSize()).toBe(1)
|
|
74
|
-
await h.clock.advance(500) // 1.5s after 'two' — flush
|
|
75
|
-
expect(h.coalesceBufferSize()).toBe(0)
|
|
76
|
-
// The mid-turn 'two' inbound is suppressed by the activeTurns gate
|
|
77
|
-
// (turn started on flush of 'one'... but here the flush is at the
|
|
78
|
-
// END so 'one' alone never had a flush; both are coalesced into one
|
|
79
|
-
// turn). So only the FIRST inbound's early-ack fires; 'two' lands
|
|
80
|
-
// before any turn started, but the early-ack still counts it as a
|
|
81
|
-
// fresh-turn ack on the same key. Only one 👀 emoji per coalesce
|
|
82
|
-
// turn after the controller dedupes. Test simplifies to: at least one
|
|
83
|
-
// 👀 fired, but multiple are tolerated (Telegram dedupes by emoji).
|
|
84
|
-
expect(h.recorder.reactionSequence().filter((e) => e === '👀').length).toBeGreaterThanOrEqual(1)
|
|
85
|
-
h.finalize()
|
|
86
|
-
})
|
|
87
|
-
|
|
88
|
-
it('Phase 1 contract still holds — feedSessionEvent drives controller transitions', async () => {
|
|
89
|
-
const h = createRealGatewayHarness({ gapMs: 0 }) // bypass coalesce for this isolation test
|
|
90
|
-
h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'hi' })
|
|
91
|
-
await h.clock.advance(0)
|
|
92
|
-
h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'hi' })
|
|
93
|
-
await h.clock.advance(50)
|
|
94
|
-
h.feedSessionEvent({ kind: 'thinking' })
|
|
95
|
-
await h.clock.advance(50)
|
|
96
|
-
// Status reaction debounce (default 700ms) must elapse for transitions to land.
|
|
97
|
-
await h.clock.advance(800)
|
|
98
|
-
expect(h.recorder.reactionSequence()).toContain('👀')
|
|
99
|
-
h.finalize()
|
|
100
|
-
})
|
|
101
|
-
})
|