switchroom 0.7.15 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +51 -59
- package/bin/run-hook.sh +27 -11
- package/bin/timezone-hook.sh +9 -7
- package/dist/agent-scheduler/index.js +410 -133
- package/dist/auth-broker/index.js +13932 -0
- package/dist/cli/switchroom.js +26937 -5601
- package/dist/host-control/main.js +12702 -0
- package/dist/vault/approvals/kernel-server.js +467 -184
- package/dist/vault/broker/server.js +1430 -724
- package/examples/minimal.yaml +63 -0
- package/examples/personal-google-workspace-mcp/.env.example +34 -0
- package/examples/personal-google-workspace-mcp/README.md +194 -0
- package/examples/personal-google-workspace-mcp/compose.yaml +66 -0
- package/examples/switchroom.yaml +220 -0
- package/package.json +7 -4
- package/profiles/_base/settings.json.hbs +20 -5
- package/profiles/_base/start.sh.hbs +16 -3
- package/profiles/_shared/agent-self-service.md.hbs +126 -0
- package/profiles/_shared/telegram-style.md.hbs +20 -90
- package/profiles/_shared/vault-protocol.md.hbs +68 -0
- package/profiles/default/CLAUDE.md +50 -96
- package/profiles/default/CLAUDE.md.hbs +36 -6
- package/profiles/default/workspace/SOUL.md.hbs +12 -5
- package/skills/buildkite-agent-infrastructure/SKILL.md +30 -11
- package/skills/buildkite-agent-runtime/SKILL.md +44 -11
- package/skills/buildkite-api/SKILL.md +31 -8
- package/skills/buildkite-cli/SKILL.md +27 -9
- package/skills/buildkite-migration/SKILL.md +22 -9
- package/skills/buildkite-pipelines/SKILL.md +26 -9
- package/skills/buildkite-secure-delivery/SKILL.md +23 -9
- package/skills/buildkite-test-engine/SKILL.md +25 -8
- package/skills/docx/SKILL.md +1 -1
- package/skills/docx/scripts/office/validators/__pycache__/__init__.cpython-313.pyc +0 -0
- package/skills/docx/scripts/office/validators/__pycache__/base.cpython-313.pyc +0 -0
- package/skills/file-bug/SKILL.md +34 -6
- package/skills/humanizer/SKILL.md +15 -0
- package/skills/humanizer-calibrate/SKILL.md +7 -1
- package/skills/mcp-builder/SKILL.md +1 -1
- package/skills/pdf/SKILL.md +1 -1
- package/skills/pptx/SKILL.md +1 -1
- package/skills/skill-creator/SKILL.md +21 -1
- package/skills/skill-creator/scripts/__pycache__/__init__.cpython-313.pyc +0 -0
- package/skills/skill-creator/scripts/__pycache__/generate_report.cpython-313.pyc +0 -0
- package/skills/skill-creator/scripts/__pycache__/improve_description.cpython-313.pyc +0 -0
- package/skills/skill-creator/scripts/__pycache__/run_eval.cpython-313.pyc +0 -0
- package/skills/skill-creator/scripts/__pycache__/run_loop.cpython-313.pyc +0 -0
- package/skills/skill-creator/scripts/__pycache__/utils.cpython-313.pyc +0 -0
- package/skills/switchroom-cli/SKILL.md +63 -64
- package/skills/switchroom-health/SKILL.md +23 -10
- package/skills/switchroom-install/SKILL.md +3 -3
- package/skills/switchroom-manage/SKILL.md +26 -19
- package/skills/switchroom-runtime/SKILL.md +191 -0
- package/skills/switchroom-status/SKILL.md +27 -2
- package/skills/telegram-test-harness/SKILL.md +3 -0
- package/skills/token-helpers/SKILL.md +24 -1
- package/skills/webapp-testing/SKILL.md +31 -1
- package/skills/xlsx/SKILL.md +1 -1
- package/telegram-plugin/admin-commands/index.ts +7 -5
- package/telegram-plugin/analytics-posthog.ts +191 -0
- package/telegram-plugin/bridge/bridge.ts +69 -0
- package/telegram-plugin/bridge/ipc-client.ts +4 -1
- package/telegram-plugin/dist/bridge/bridge.js +194 -119
- package/telegram-plugin/dist/gateway/gateway.js +23611 -19671
- package/telegram-plugin/dist/server.js +245 -189
- package/telegram-plugin/first-paint.ts +3 -24
- package/telegram-plugin/gateway/auth-add-flow.ts +326 -0
- package/telegram-plugin/gateway/auth-broker-client.ts +75 -0
- package/telegram-plugin/gateway/auth-command.ts +794 -0
- package/telegram-plugin/gateway/auth-line.ts +123 -0
- package/telegram-plugin/gateway/boot-card.ts +169 -40
- package/telegram-plugin/gateway/boot-issue-cache.ts +308 -0
- package/telegram-plugin/gateway/boot-probes.ts +166 -123
- package/telegram-plugin/gateway/boot-reason.ts +41 -7
- package/telegram-plugin/gateway/boot-version.ts +66 -0
- package/telegram-plugin/gateway/gateway.ts +3499 -1885
- package/telegram-plugin/gateway/hostd-dispatch.ts +117 -0
- package/telegram-plugin/gateway/ipc-protocol.ts +18 -0
- package/telegram-plugin/gateway/pending-inbound-buffer.ts +106 -0
- package/telegram-plugin/gateway/quarantine.ts +69 -0
- package/telegram-plugin/gateway/quota-cache.ts +9 -4
- package/telegram-plugin/gateway/reaction-trigger.ts +401 -0
- package/telegram-plugin/gateway/recent-denials.test.ts +103 -0
- package/telegram-plugin/gateway/recent-denials.ts +77 -0
- package/telegram-plugin/gateway/startup-network-retry.ts +109 -31
- package/telegram-plugin/gateway/vault-grant-inbound-builders.ts +125 -0
- package/telegram-plugin/history.ts +91 -0
- package/telegram-plugin/hooks/hooks.json +10 -0
- package/telegram-plugin/hooks/sandbox-hint-posttool.mjs +130 -0
- package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +19 -2
- package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +22 -2
- package/telegram-plugin/hooks/tool-label-pretool.mjs +11 -0
- package/telegram-plugin/hooks/wedge-detect-posttool.mjs +303 -0
- package/telegram-plugin/inbound-classifier.ts +50 -0
- package/telegram-plugin/inline-keyboard-callbacks.ts +136 -0
- package/telegram-plugin/node_modules/.vite/vitest/da39a3ee5e6b4b0d3255bfef95601890afd80709/results.json +1 -0
- package/telegram-plugin/package.json +4 -2
- package/telegram-plugin/permission-rule.ts +51 -0
- package/telegram-plugin/permission-title.ts +56 -0
- package/telegram-plugin/quota-check.ts +19 -41
- package/telegram-plugin/registry/reaper.ts +223 -0
- package/telegram-plugin/retry-api-call.ts +80 -0
- package/telegram-plugin/runtime-metrics.ts +177 -0
- package/telegram-plugin/scripts/build.mjs +0 -1
- package/telegram-plugin/secret-detect/index.ts +24 -0
- package/telegram-plugin/secret-detect/vault-error.test.ts +64 -12
- package/telegram-plugin/secret-detect/vault-error.ts +78 -11
- package/telegram-plugin/secret-detect/vault-write.ts +14 -2
- package/telegram-plugin/server.js +41795 -0
- package/telegram-plugin/session-tail.ts +6 -1
- package/telegram-plugin/shared/bot-runtime.ts +5 -4
- package/telegram-plugin/silence-poke.ts +420 -0
- package/telegram-plugin/silent-end.ts +174 -0
- package/telegram-plugin/stream-controller.ts +13 -0
- package/telegram-plugin/stream-reply-handler.ts +7 -0
- package/telegram-plugin/subagent-watcher.ts +213 -4
- package/telegram-plugin/tests/auth-add-flow.test.ts +559 -0
- package/telegram-plugin/tests/auth-code-redact.test.ts +8 -4
- package/telegram-plugin/tests/auth-command-vernacular.test.ts +531 -0
- package/telegram-plugin/tests/boot-card-issue-dedup.test.ts +247 -0
- package/telegram-plugin/tests/boot-card-reason-to-render.test.ts +182 -0
- package/telegram-plugin/tests/boot-card-reason.test.ts +65 -2
- package/telegram-plugin/tests/boot-card-render.test.ts +146 -0
- package/telegram-plugin/tests/boot-card-silent-on-operator.test.ts +103 -0
- package/telegram-plugin/tests/boot-probes.test.ts +216 -10
- package/telegram-plugin/tests/boot-version-string.test.ts +0 -0
- package/telegram-plugin/tests/finalize-callback.test.ts +190 -0
- package/telegram-plugin/tests/gateway-message-validator.test.ts +26 -0
- package/telegram-plugin/tests/gateway-secret-detect.test.ts +12 -3
- package/telegram-plugin/tests/gateway-startup-network-retry.test.ts +104 -0
- package/telegram-plugin/tests/history-reaper.test.ts +378 -0
- package/telegram-plugin/tests/hostd-dispatch.test.ts +129 -0
- package/telegram-plugin/tests/inbound-classifier.test.ts +76 -0
- package/telegram-plugin/tests/inbound-message-types.test.ts +267 -0
- package/telegram-plugin/tests/issues-card.test.ts +49 -0
- package/telegram-plugin/tests/pending-inbound-buffer.test.ts +132 -0
- package/telegram-plugin/tests/permission-rule.test.ts +80 -1
- package/telegram-plugin/tests/permission-title.test.ts +31 -0
- package/telegram-plugin/tests/quota-check.test.ts +5 -35
- package/telegram-plugin/tests/races.test.ts +179 -0
- package/telegram-plugin/tests/reaction-trigger-flow.test.ts +353 -0
- package/telegram-plugin/tests/reaction-trigger.test.ts +397 -0
- package/telegram-plugin/tests/retry-api-call.test.ts +152 -1
- package/telegram-plugin/tests/runtime-metrics.test.ts +145 -0
- package/telegram-plugin/tests/sandbox-hint-posttool.test.ts +155 -0
- package/telegram-plugin/tests/secret-detect-delete-must-surface-failures.test.ts +133 -0
- package/telegram-plugin/tests/secret-detect-false-positives.test.ts +137 -0
- package/telegram-plugin/tests/silence-poke.test.ts +493 -0
- package/telegram-plugin/tests/silent-end.test.ts +206 -0
- package/telegram-plugin/tests/subagent-tracker-hooks.test.ts +107 -0
- package/telegram-plugin/tests/subagent-watcher-env-thresholds.test.ts +224 -0
- package/telegram-plugin/tests/subagent-watcher-stall-terminal.test.ts +316 -0
- package/telegram-plugin/tests/subagent-watcher.test.ts +263 -0
- package/telegram-plugin/tests/turn-signal-tracker.test.ts +81 -0
- package/telegram-plugin/tests/vault-approval-posture.test.ts +256 -0
- package/telegram-plugin/tests/vault-grant-auto-resume.test.ts +73 -0
- package/telegram-plugin/tests/vault-grant-inbound-builders.test.ts +226 -0
- package/telegram-plugin/tests/vault-grant-union.test.ts +130 -0
- package/telegram-plugin/tests/vault-key-regex-allows-slash.test.ts +140 -0
- package/telegram-plugin/tests/vault-posture-quarantine.test.ts +104 -0
- package/telegram-plugin/tests/vault-request-access-tool.test.ts +114 -0
- package/telegram-plugin/tests/vault-request-access-unlock-resume.test.ts +106 -0
- package/telegram-plugin/turn-signal-tracker.ts +100 -24
- package/telegram-plugin/uat/SETUP.md +210 -35
- package/telegram-plugin/uat/assertions.ts +264 -37
- package/telegram-plugin/uat/driver-info.ts +57 -0
- package/telegram-plugin/uat/driver.ts +590 -51
- package/telegram-plugin/uat/harness.ts +140 -94
- package/telegram-plugin/uat/load-env.test.ts +72 -0
- package/telegram-plugin/uat/load-env.ts +48 -0
- package/telegram-plugin/uat/login.ts +96 -53
- package/telegram-plugin/uat/runners/agent-self-sufficiency.ts +457 -0
- package/telegram-plugin/uat/runners/paraphrases.ts +231 -0
- package/telegram-plugin/uat/runners/report.ts +150 -0
- package/telegram-plugin/uat/runners/run-agent-self-sufficiency.sh +50 -0
- package/telegram-plugin/uat/runners/scorer.test.ts +196 -0
- package/telegram-plugin/uat/runners/scorer.ts +106 -0
- package/telegram-plugin/uat/runners/skill-coverage.test.ts +100 -0
- package/telegram-plugin/uat/runners/skill-coverage.ts +620 -0
- package/telegram-plugin/uat/scenarios/ask-user-button-tap-dm.test.ts +141 -0
- package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +191 -0
- package/telegram-plugin/uat/scenarios/fuzz-extended-dm.test.ts +255 -0
- package/telegram-plugin/uat/scenarios/fuzz-human-style-dm.test.ts +275 -0
- package/telegram-plugin/uat/scenarios/fuzz-random-prompts-dm.test.ts +146 -0
- package/telegram-plugin/uat/scenarios/fuzz-status-ask-dm.test.ts +486 -0
- package/telegram-plugin/uat/scenarios/jtbd-interrupt-marker-dm.test.ts +67 -0
- package/telegram-plugin/uat/scenarios/jtbd-rapid-followup-dm.test.ts +100 -0
- package/telegram-plugin/uat/scenarios/jtbd-soft-commit-dm.test.ts +67 -0
- package/telegram-plugin/uat/scenarios/jtbd-status-query-dm.test.ts +49 -0
- package/telegram-plugin/uat/scenarios/location-inbound-dm.test.ts +65 -0
- package/telegram-plugin/uat/scenarios/midturn-silent-dm.test.ts +175 -0
- package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +142 -0
- package/telegram-plugin/uat/scenarios/reactions-trigger-turn-dm.test.ts +96 -0
- package/telegram-plugin/uat/scenarios/secret-redaction-deletes-original-dm.test.ts +123 -0
- package/telegram-plugin/uat/scenarios/secret-redaction-no-false-positive-dm.test.ts +87 -0
- package/telegram-plugin/uat/scenarios/silence-poke-soft-dm.test.ts +155 -0
- package/telegram-plugin/uat/scenarios/silent-end-recovery-dm.test.ts +95 -0
- package/telegram-plugin/uat/scenarios/smoke-dm-reply.test.ts +57 -0
- package/telegram-plugin/uat/scenarios/subagent-watcher-no-rerun-dm.test.ts +135 -0
- package/telegram-plugin/uat/scenarios/vault-approval-posture-telegram-id-dm.test.ts +191 -0
- package/telegram-plugin/uat/scenarios/vault-audit-allow-dm.test.ts +108 -0
- package/telegram-plugin/uat/scenarios/vault-grant-auto-resume-dm.test.ts +121 -0
- package/telegram-plugin/uat/scenarios/vault-request-access-concurrent-dm.test.ts +161 -0
- package/telegram-plugin/uat/scenarios/vault-request-access-end-to-end-dm.test.ts +158 -0
- package/telegram-plugin/uat/scenarios/voice-inbound-dm.test.ts +65 -0
- package/telegram-plugin/vault-approval-posture.ts +42 -0
- package/telegram-plugin/welcome-text.ts +1 -0
- package/telegram-plugin/active-pins-sweep.ts +0 -204
- package/telegram-plugin/active-pins.ts +0 -146
- package/telegram-plugin/auth-dashboard.ts +0 -1104
- package/telegram-plugin/auth-slot-parser.ts +0 -497
- package/telegram-plugin/card-event-log.ts +0 -138
- package/telegram-plugin/dist/foreman/foreman.js +0 -31106
- package/telegram-plugin/docs/multi-agent-card-design.md +0 -847
- package/telegram-plugin/docs/pinned-progress-card-reliability.md +0 -144
- package/telegram-plugin/foreman/foreman-create-flow.ts +0 -202
- package/telegram-plugin/foreman/foreman-handlers.ts +0 -493
- package/telegram-plugin/foreman/foreman.ts +0 -1165
- package/telegram-plugin/foreman/setup-flow.ts +0 -345
- package/telegram-plugin/foreman/setup-state.ts +0 -239
- package/telegram-plugin/foreman/state.ts +0 -203
- package/telegram-plugin/pin-event-log.ts +0 -76
- package/telegram-plugin/progress-card-driver.ts +0 -2886
- package/telegram-plugin/progress-card-pin-manager.ts +0 -589
- package/telegram-plugin/progress-card-pin-watchdog.ts +0 -98
- package/telegram-plugin/progress-card.ts +0 -1409
- package/telegram-plugin/tests/HARNESS.md +0 -340
- package/telegram-plugin/tests/_progress-card-harness.ts +0 -109
- package/telegram-plugin/tests/active-pins-boot-reaper.test.ts +0 -211
- package/telegram-plugin/tests/active-pins-sweep.test.ts +0 -309
- package/telegram-plugin/tests/active-pins.test.ts +0 -187
- package/telegram-plugin/tests/auth-account-identity-surface.test.ts +0 -118
- package/telegram-plugin/tests/auth-dashboard-edge-cases.test.ts +0 -260
- package/telegram-plugin/tests/auth-dashboard-restart-flow.test.ts +0 -140
- package/telegram-plugin/tests/auth-dashboard-v3b.test.ts +0 -559
- package/telegram-plugin/tests/auth-dashboard.test.ts +0 -1045
- package/telegram-plugin/tests/auth-slot-commands.test.ts +0 -640
- package/telegram-plugin/tests/bg-agent-progress-card-757.test.ts +0 -201
- package/telegram-plugin/tests/boot-card-account-quota.test.ts +0 -137
- package/telegram-plugin/tests/card-event-log.test.ts +0 -145
- package/telegram-plugin/tests/first-paint.test.ts +0 -257
- package/telegram-plugin/tests/foreman-create-flow.test.ts +0 -359
- package/telegram-plugin/tests/foreman-handlers.test.ts +0 -347
- package/telegram-plugin/tests/foreman-state.test.ts +0 -164
- package/telegram-plugin/tests/foreman-write-ops.test.ts +0 -214
- package/telegram-plugin/tests/harness-ordering-invariants.test.ts +0 -243
- package/telegram-plugin/tests/pin-event-log.test.ts +0 -124
- package/telegram-plugin/tests/progress-card-api-failure-during-deferred.test.ts +0 -73
- package/telegram-plugin/tests/progress-card-close-paths-converge.test.ts +0 -272
- package/telegram-plugin/tests/progress-card-cross-turn.test.ts +0 -258
- package/telegram-plugin/tests/progress-card-delay-842.test.ts +0 -160
- package/telegram-plugin/tests/progress-card-dispose-preservepending.test.ts +0 -81
- package/telegram-plugin/tests/progress-card-draft-flag.test.ts +0 -80
- package/telegram-plugin/tests/progress-card-driver-eviction.test.ts +0 -215
- package/telegram-plugin/tests/progress-card-driver-fleet-shadow.test.ts +0 -123
- package/telegram-plugin/tests/progress-card-driver-force-complete-parent-done.test.ts +0 -76
- package/telegram-plugin/tests/progress-card-edit-timestamps-budget.test.ts +0 -62
- package/telegram-plugin/tests/progress-card-memory-bounds.test.ts +0 -84
- package/telegram-plugin/tests/progress-card-pin-failure-paths.test.ts +0 -139
- package/telegram-plugin/tests/progress-card-pin-manager.test.ts +0 -773
- package/telegram-plugin/tests/progress-card-pin-race-fast-turn.test.ts +0 -66
- package/telegram-plugin/tests/progress-card-pin-sidecar-partial-write.test.ts +0 -64
- package/telegram-plugin/tests/progress-card-pin-watchdog.test.ts +0 -190
- package/telegram-plugin/tests/progress-card-sigterm-pin-flush.test.ts +0 -146
- package/telegram-plugin/tests/real-gateway-f1-ladder-integrity.test.ts +0 -123
- package/telegram-plugin/tests/real-gateway-f2-instant-draft.test.ts +0 -82
- package/telegram-plugin/tests/real-gateway-f3-late-card.test.ts +0 -114
- package/telegram-plugin/tests/real-gateway-harness.ts +0 -699
- package/telegram-plugin/tests/real-gateway-i6-turn-flush-replay-dedup.test.ts +0 -313
- package/telegram-plugin/tests/real-gateway-ipc-lifecycle.test.ts +0 -299
- package/telegram-plugin/tests/real-gateway-spec.test.ts +0 -487
- package/telegram-plugin/tests/real-gateway.smoke.test.ts +0 -101
- package/telegram-plugin/tests/setup-flow.test.ts +0 -510
- package/telegram-plugin/tests/setup-state.test.ts +0 -146
- package/telegram-plugin/tests/sync-chat-running-subagents.test.ts +0 -116
- package/telegram-plugin/tests/turn-end-regressions.test.ts +0 -489
- package/telegram-plugin/tests/turn-flush-card-takeover.test.ts +0 -218
- package/telegram-plugin/tests/turn-flush-prose-recovery.test.ts +0 -78
- package/telegram-plugin/tests/two-zone-bg-carry-full-lifecycle.test.ts +0 -131
- package/telegram-plugin/tests/two-zone-bg-detection.test.ts +0 -120
- package/telegram-plugin/tests/two-zone-bg-done-when-all-terminal.test.ts +0 -116
- package/telegram-plugin/tests/two-zone-bg-early-turn-end.test.ts +0 -87
- package/telegram-plugin/tests/two-zone-bg-survives-next-turn.test.ts +0 -211
- package/telegram-plugin/tests/two-zone-card-cap.test.ts +0 -62
- package/telegram-plugin/tests/two-zone-card-fleet-row.test.ts +0 -101
- package/telegram-plugin/tests/two-zone-card-header-phases.test.ts +0 -78
- package/telegram-plugin/tests/two-zone-card-html-balance.test.ts +0 -110
- package/telegram-plugin/tests/two-zone-card-lifecycle.test.ts +0 -128
- package/telegram-plugin/tests/two-zone-card-sanitise.test.ts +0 -58
- package/telegram-plugin/tests/two-zone-card-snapshot.test.ts +0 -133
- package/telegram-plugin/tests/two-zone-concurrent-turns-isolation.test.ts +0 -155
- package/telegram-plugin/tests/two-zone-phasefor-precedence.test.ts +0 -117
- package/telegram-plugin/tests/two-zone-snapshot-extras.test.ts +0 -187
- package/telegram-plugin/tests/two-zone-stuck-edit-throttle.test.ts +0 -149
- package/telegram-plugin/tests/two-zone-stuck-header-escalation.test.ts +0 -101
- package/telegram-plugin/tests/two-zone-stuck-per-member.test.ts +0 -114
- package/telegram-plugin/tests/two-zone-stuck-recovery.test.ts +0 -105
- package/telegram-plugin/tests/waiting-ux-harness.ts +0 -381
- package/telegram-plugin/tests/waiting-ux.e2e.test.ts +0 -233
- package/telegram-plugin/turn-flush-prose-recovery.ts +0 -40
- package/telegram-plugin/two-zone-card.ts +0 -269
- package/telegram-plugin/uat/scenarios/smoke-clerk-reply.test.ts +0 -61
|
@@ -1,381 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Waiting-UX E2E harness — Phase 1 of #545.
|
|
3
|
-
*
|
|
4
|
-
* Wires the production status-reaction controller, progress-card driver,
|
|
5
|
-
* and a recording fake-bot under vitest fake timers. The goal is to make
|
|
6
|
-
* the four observed waiting-UX failure modes catchable in CI by asserting
|
|
7
|
-
* the wall-clock contract that varies by turn class:
|
|
8
|
-
* A — instant reply (no tools, <2s)
|
|
9
|
-
* B — short turn (1–3 tools, <15s)
|
|
10
|
-
* C — long / multi-agent (sub-agents, background workers)
|
|
11
|
-
*
|
|
12
|
-
* The harness simulates the slice of server.ts that determines the
|
|
13
|
-
* user-perceived timing:
|
|
14
|
-
* inbound update → setQueued() (👀) + progressDriver.startTurn()
|
|
15
|
-
* session 'thinking' → setThinking() (🤔)
|
|
16
|
-
* session 'tool_use' → setTool(name) (🔥/✍/👨💻/⚡)
|
|
17
|
-
* stream_reply → editMessageText / sendMessage on bot.api
|
|
18
|
-
* session 'turn_end' → setDone() (👍) + driver flush + onTurnComplete
|
|
19
|
-
*
|
|
20
|
-
* Anything not on that path (auth, history, ipc, foreman) is intentionally
|
|
21
|
-
* out of scope — those don't influence the four failures.
|
|
22
|
-
*
|
|
23
|
-
* Time control is via `vi.useFakeTimers()`; tests advance time with
|
|
24
|
-
* `clock.advance(ms)` which delegates to `vi.advanceTimersByTimeAsync`.
|
|
25
|
-
* Every recorded outbound API call is timestamped with the simulated
|
|
26
|
-
* `Date.now()` at the moment of the call.
|
|
27
|
-
*/
|
|
28
|
-
|
|
29
|
-
import { vi, type MockInstance } from 'vitest'
|
|
30
|
-
import { StatusReactionController } from '../status-reactions.js'
|
|
31
|
-
import { createProgressDriver, type ProgressDriver } from '../progress-card-driver.js'
|
|
32
|
-
import type { SessionEvent } from '../session-tail.js'
|
|
33
|
-
|
|
34
|
-
// ─── Recorder ────────────────────────────────────────────────────────────
|
|
35
|
-
|
|
36
|
-
export type RecordedKind =
|
|
37
|
-
| 'sendMessage'
|
|
38
|
-
| 'editMessageText'
|
|
39
|
-
| 'setMessageReaction'
|
|
40
|
-
| 'sendChatAction'
|
|
41
|
-
| 'deleteMessage'
|
|
42
|
-
| 'pinChatMessage'
|
|
43
|
-
|
|
44
|
-
export interface RecordedCall {
|
|
45
|
-
ts: number
|
|
46
|
-
kind: RecordedKind
|
|
47
|
-
chat_id: string
|
|
48
|
-
message_id?: number
|
|
49
|
-
payload?: string
|
|
50
|
-
args: unknown[]
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
export interface Recorder {
|
|
54
|
-
calls: RecordedCall[]
|
|
55
|
-
reactionSequence(): string[]
|
|
56
|
-
sentTexts(chat_id: string): string[]
|
|
57
|
-
edits(chat_id: string): RecordedCall[]
|
|
58
|
-
/**
|
|
59
|
-
* Detects the progress card sendMessage by payload heuristic
|
|
60
|
-
* (Working… / ⚙️ / ⏳ glyphs that the production card uses).
|
|
61
|
-
*/
|
|
62
|
-
progressCardSendMs(chat_id: string): number | null
|
|
63
|
-
firstReactionMs(chat_id: string): number | null
|
|
64
|
-
lastReactionEmoji(chat_id: string): string | null
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
// ─── Clock ───────────────────────────────────────────────────────────────
|
|
68
|
-
|
|
69
|
-
export interface HarnessClock {
|
|
70
|
-
now(): number
|
|
71
|
-
advance(ms: number): Promise<void>
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
// ─── Fake bot.api with recording ─────────────────────────────────────────
|
|
75
|
-
|
|
76
|
-
type Method = (...args: unknown[]) => Promise<unknown>
|
|
77
|
-
|
|
78
|
-
export interface FakeBotApi {
|
|
79
|
-
sendMessage: MockInstance<Method>
|
|
80
|
-
editMessageText: MockInstance<Method>
|
|
81
|
-
setMessageReaction: MockInstance<Method>
|
|
82
|
-
sendChatAction: MockInstance<Method>
|
|
83
|
-
deleteMessage: MockInstance<Method>
|
|
84
|
-
pinChatMessage: MockInstance<Method>
|
|
85
|
-
unpinChatMessage: MockInstance<Method>
|
|
86
|
-
editMessageReplyMarkup: MockInstance<Method>
|
|
87
|
-
getFile: MockInstance<Method>
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
export interface HarnessHandle {
|
|
91
|
-
bot: { api: FakeBotApi }
|
|
92
|
-
clock: HarnessClock
|
|
93
|
-
recorder: Recorder
|
|
94
|
-
controller: StatusReactionController
|
|
95
|
-
driver: ProgressDriver
|
|
96
|
-
inbound(opts: { chatId: string; messageId: number; text?: string }): void
|
|
97
|
-
feedSessionEvent(ev: SessionEvent): void
|
|
98
|
-
/** Convenience for class-A direct stream_reply path. */
|
|
99
|
-
streamReply(opts: { chat_id: string; text: string; done?: boolean }): Promise<void>
|
|
100
|
-
finalize(): void
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
function makeRecorderAndApi(): { recorder: Recorder; api: FakeBotApi } {
|
|
104
|
-
let nextId = 5000
|
|
105
|
-
const calls: RecordedCall[] = []
|
|
106
|
-
|
|
107
|
-
const sendMessage = vi.fn(async (...args: unknown[]) => {
|
|
108
|
-
const message_id = nextId++
|
|
109
|
-
calls.push({
|
|
110
|
-
ts: Date.now(),
|
|
111
|
-
kind: 'sendMessage',
|
|
112
|
-
chat_id: String(args[0]),
|
|
113
|
-
message_id,
|
|
114
|
-
payload: String(args[1] ?? ''),
|
|
115
|
-
args,
|
|
116
|
-
})
|
|
117
|
-
return { message_id }
|
|
118
|
-
}) as unknown as MockInstance<Method>
|
|
119
|
-
|
|
120
|
-
const editMessageText = vi.fn(async (...args: unknown[]) => {
|
|
121
|
-
calls.push({
|
|
122
|
-
ts: Date.now(),
|
|
123
|
-
kind: 'editMessageText',
|
|
124
|
-
chat_id: String(args[0]),
|
|
125
|
-
message_id: Number(args[1]),
|
|
126
|
-
payload: String(args[2] ?? ''),
|
|
127
|
-
args,
|
|
128
|
-
})
|
|
129
|
-
return true
|
|
130
|
-
}) as unknown as MockInstance<Method>
|
|
131
|
-
|
|
132
|
-
const setMessageReaction = vi.fn(async (...args: unknown[]) => {
|
|
133
|
-
const reactions = args[2] as Array<{ emoji?: string }> | undefined
|
|
134
|
-
const emoji = reactions?.[0]?.emoji
|
|
135
|
-
calls.push({
|
|
136
|
-
ts: Date.now(),
|
|
137
|
-
kind: 'setMessageReaction',
|
|
138
|
-
chat_id: String(args[0]),
|
|
139
|
-
message_id: Number(args[1]),
|
|
140
|
-
payload: emoji,
|
|
141
|
-
args,
|
|
142
|
-
})
|
|
143
|
-
return true
|
|
144
|
-
}) as unknown as MockInstance<Method>
|
|
145
|
-
|
|
146
|
-
const sendChatAction = vi.fn(async (...args: unknown[]) => {
|
|
147
|
-
calls.push({
|
|
148
|
-
ts: Date.now(),
|
|
149
|
-
kind: 'sendChatAction',
|
|
150
|
-
chat_id: String(args[0]),
|
|
151
|
-
payload: String(args[1] ?? ''),
|
|
152
|
-
args,
|
|
153
|
-
})
|
|
154
|
-
return true
|
|
155
|
-
}) as unknown as MockInstance<Method>
|
|
156
|
-
|
|
157
|
-
const deleteMessage = vi.fn(async (...args: unknown[]) => {
|
|
158
|
-
calls.push({
|
|
159
|
-
ts: Date.now(),
|
|
160
|
-
kind: 'deleteMessage',
|
|
161
|
-
chat_id: String(args[0]),
|
|
162
|
-
message_id: Number(args[1]),
|
|
163
|
-
args,
|
|
164
|
-
})
|
|
165
|
-
return true
|
|
166
|
-
}) as unknown as MockInstance<Method>
|
|
167
|
-
|
|
168
|
-
const pinChatMessage = vi.fn(async (...args: unknown[]) => {
|
|
169
|
-
calls.push({
|
|
170
|
-
ts: Date.now(),
|
|
171
|
-
kind: 'pinChatMessage',
|
|
172
|
-
chat_id: String(args[0]),
|
|
173
|
-
message_id: Number(args[1]),
|
|
174
|
-
args,
|
|
175
|
-
})
|
|
176
|
-
return true
|
|
177
|
-
}) as unknown as MockInstance<Method>
|
|
178
|
-
|
|
179
|
-
const unpinChatMessage = vi.fn(async () => true) as unknown as MockInstance<Method>
|
|
180
|
-
const editMessageReplyMarkup = vi.fn(async () => true) as unknown as MockInstance<Method>
|
|
181
|
-
const getFile = vi.fn(async () => ({ file_path: 'x' })) as unknown as MockInstance<Method>
|
|
182
|
-
|
|
183
|
-
const api: FakeBotApi = {
|
|
184
|
-
sendMessage,
|
|
185
|
-
editMessageText,
|
|
186
|
-
setMessageReaction,
|
|
187
|
-
sendChatAction,
|
|
188
|
-
deleteMessage,
|
|
189
|
-
pinChatMessage,
|
|
190
|
-
unpinChatMessage,
|
|
191
|
-
editMessageReplyMarkup,
|
|
192
|
-
getFile,
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
const isCardPayload = (text: string | undefined): boolean =>
|
|
196
|
-
text != null &&
|
|
197
|
-
(text.includes('Working') ||
|
|
198
|
-
text.includes('⚙') ||
|
|
199
|
-
text.includes('⏳') ||
|
|
200
|
-
text.includes('• '))
|
|
201
|
-
|
|
202
|
-
const recorder: Recorder = {
|
|
203
|
-
calls,
|
|
204
|
-
reactionSequence: () =>
|
|
205
|
-
calls.filter((c) => c.kind === 'setMessageReaction').map((c) => c.payload ?? ''),
|
|
206
|
-
sentTexts: (chat_id) =>
|
|
207
|
-
calls
|
|
208
|
-
.filter((c) => c.kind === 'sendMessage' && c.chat_id === chat_id)
|
|
209
|
-
.map((c) => c.payload ?? ''),
|
|
210
|
-
edits: (chat_id) => calls.filter((c) => c.kind === 'editMessageText' && c.chat_id === chat_id),
|
|
211
|
-
progressCardSendMs: (chat_id) => {
|
|
212
|
-
const hit = calls.find(
|
|
213
|
-
(c) => c.kind === 'sendMessage' && c.chat_id === chat_id && isCardPayload(c.payload),
|
|
214
|
-
)
|
|
215
|
-
return hit ? hit.ts : null
|
|
216
|
-
},
|
|
217
|
-
firstReactionMs: (chat_id) => {
|
|
218
|
-
const hit = calls.find((c) => c.kind === 'setMessageReaction' && c.chat_id === chat_id)
|
|
219
|
-
return hit ? hit.ts : null
|
|
220
|
-
},
|
|
221
|
-
lastReactionEmoji: (chat_id) => {
|
|
222
|
-
const hits = calls.filter((c) => c.kind === 'setMessageReaction' && c.chat_id === chat_id)
|
|
223
|
-
return hits.length === 0 ? null : (hits[hits.length - 1].payload ?? null)
|
|
224
|
-
},
|
|
225
|
-
}
|
|
226
|
-
|
|
227
|
-
return { recorder, api }
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
// ─── Public factory ──────────────────────────────────────────────────────
|
|
231
|
-
|
|
232
|
-
export interface CreateHarnessOpts {
|
|
233
|
-
allowedReactions?: Set<string> | null
|
|
234
|
-
debounceMs?: number
|
|
235
|
-
driverCoalesceMs?: number
|
|
236
|
-
driverMinIntervalMs?: number
|
|
237
|
-
/**
|
|
238
|
-
* Progress-card initial-delay-ms. Production default is 30s (cards are
|
|
239
|
-
* suppressed for fast turns). Tests for class B/C should set this small
|
|
240
|
-
* (e.g. 0–500) so the deferred first emit can fire inside the test.
|
|
241
|
-
*/
|
|
242
|
-
driverInitialDelayMs?: number
|
|
243
|
-
/** Heartbeat ms; pass 0 to disable. */
|
|
244
|
-
driverHeartbeatMs?: number
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
export function createWaitingUxHarness(opts: CreateHarnessOpts = {}): HarnessHandle {
|
|
248
|
-
// vi.useFakeTimers() must be called by the test (so afterEach can reset).
|
|
249
|
-
// The harness assumes fake timers are active.
|
|
250
|
-
const { recorder, api } = makeRecorderAndApi()
|
|
251
|
-
const bot = { api }
|
|
252
|
-
|
|
253
|
-
let primaryChatId: string | null = null
|
|
254
|
-
let primaryMessageId: number | null = null
|
|
255
|
-
let currentChatId: string | null = null
|
|
256
|
-
|
|
257
|
-
const controller = new StatusReactionController(
|
|
258
|
-
async (emoji) => {
|
|
259
|
-
if (primaryChatId == null || primaryMessageId == null) return
|
|
260
|
-
await api.setMessageReaction(primaryChatId, primaryMessageId, [
|
|
261
|
-
{ type: 'emoji', emoji },
|
|
262
|
-
])
|
|
263
|
-
},
|
|
264
|
-
opts.allowedReactions ?? null,
|
|
265
|
-
{
|
|
266
|
-
debounceMs: opts.debounceMs ?? 700,
|
|
267
|
-
},
|
|
268
|
-
)
|
|
269
|
-
|
|
270
|
-
const cardMessageIds = new Map<string, number>()
|
|
271
|
-
|
|
272
|
-
async function renderCard(a: { chatId: string; html: string; done: boolean; isFirstEmit: boolean }): Promise<void> {
|
|
273
|
-
const existing = cardMessageIds.get(a.chatId)
|
|
274
|
-
if (existing == null) {
|
|
275
|
-
const result = (await api.sendMessage(a.chatId, a.html, { parse_mode: 'HTML' })) as { message_id: number }
|
|
276
|
-
cardMessageIds.set(a.chatId, result.message_id)
|
|
277
|
-
} else {
|
|
278
|
-
await api.editMessageText(a.chatId, existing, a.html, { parse_mode: 'HTML' })
|
|
279
|
-
}
|
|
280
|
-
}
|
|
281
|
-
|
|
282
|
-
const driver = createProgressDriver({
|
|
283
|
-
emit: (a) => {
|
|
284
|
-
void renderCard(a)
|
|
285
|
-
},
|
|
286
|
-
coalesceMs: opts.driverCoalesceMs ?? 400,
|
|
287
|
-
minIntervalMs: opts.driverMinIntervalMs ?? 500,
|
|
288
|
-
initialDelayMs: opts.driverInitialDelayMs ?? 60000,
|
|
289
|
-
heartbeatMs: opts.driverHeartbeatMs,
|
|
290
|
-
})
|
|
291
|
-
|
|
292
|
-
function feedSessionEvent(ev: SessionEvent): void {
|
|
293
|
-
switch (ev.kind) {
|
|
294
|
-
case 'enqueue':
|
|
295
|
-
if (ev.chatId) currentChatId = ev.chatId
|
|
296
|
-
break
|
|
297
|
-
case 'thinking':
|
|
298
|
-
controller.setThinking()
|
|
299
|
-
break
|
|
300
|
-
case 'tool_use':
|
|
301
|
-
if (!isTelegramSurfaceTool(ev.toolName)) {
|
|
302
|
-
controller.setTool(ev.toolName)
|
|
303
|
-
}
|
|
304
|
-
break
|
|
305
|
-
case 'turn_end':
|
|
306
|
-
controller.setDone()
|
|
307
|
-
break
|
|
308
|
-
default:
|
|
309
|
-
break
|
|
310
|
-
}
|
|
311
|
-
driver.ingest(ev, currentChatId, undefined)
|
|
312
|
-
}
|
|
313
|
-
|
|
314
|
-
function inbound(args: { chatId: string; messageId: number; text?: string }): void {
|
|
315
|
-
primaryChatId = args.chatId
|
|
316
|
-
primaryMessageId = args.messageId
|
|
317
|
-
// 👀 immediately — same line as server.ts:6118.
|
|
318
|
-
controller.setQueued()
|
|
319
|
-
// Prime the progress card synchronously, same as server.ts:6147.
|
|
320
|
-
driver.startTurn({ chatId: args.chatId, userText: args.text ?? '' })
|
|
321
|
-
}
|
|
322
|
-
|
|
323
|
-
const streamMsgs = new Map<string, number>()
|
|
324
|
-
|
|
325
|
-
async function streamReply(args: { chat_id: string; text: string; done?: boolean }): Promise<void> {
|
|
326
|
-
const key = args.chat_id
|
|
327
|
-
const existingId = streamMsgs.get(key)
|
|
328
|
-
if (existingId == null) {
|
|
329
|
-
const r = (await api.sendMessage(args.chat_id, args.text, { parse_mode: 'HTML' })) as { message_id: number }
|
|
330
|
-
streamMsgs.set(key, r.message_id)
|
|
331
|
-
} else {
|
|
332
|
-
await api.editMessageText(args.chat_id, existingId, args.text, { parse_mode: 'HTML' })
|
|
333
|
-
}
|
|
334
|
-
if (args.done === true) {
|
|
335
|
-
controller.setDone()
|
|
336
|
-
}
|
|
337
|
-
}
|
|
338
|
-
|
|
339
|
-
function finalize(): void {
|
|
340
|
-
try { driver.dispose?.() } catch { /* ignore */ }
|
|
341
|
-
}
|
|
342
|
-
|
|
343
|
-
const clock: HarnessClock = {
|
|
344
|
-
now: () => Date.now(),
|
|
345
|
-
advance: async (ms) => {
|
|
346
|
-
// vi.advanceTimersByTimeAsync isn't implemented by Bun's vitest shim,
|
|
347
|
-
// so fall back to the sync variant + microtask flush. Same semantics
|
|
348
|
-
// for these tests; lets the harness run under both vitest and `bun test`.
|
|
349
|
-
const viAny = vi as { advanceTimersByTimeAsync?: (ms: number) => Promise<void> }
|
|
350
|
-
if (typeof viAny.advanceTimersByTimeAsync === 'function') {
|
|
351
|
-
await viAny.advanceTimersByTimeAsync(ms)
|
|
352
|
-
return
|
|
353
|
-
}
|
|
354
|
-
vi.advanceTimersByTime(ms)
|
|
355
|
-
for (let i = 0; i < 5; i++) await Promise.resolve()
|
|
356
|
-
},
|
|
357
|
-
}
|
|
358
|
-
|
|
359
|
-
return {
|
|
360
|
-
bot,
|
|
361
|
-
clock,
|
|
362
|
-
recorder,
|
|
363
|
-
controller,
|
|
364
|
-
driver,
|
|
365
|
-
inbound,
|
|
366
|
-
feedSessionEvent,
|
|
367
|
-
streamReply,
|
|
368
|
-
finalize,
|
|
369
|
-
}
|
|
370
|
-
}
|
|
371
|
-
|
|
372
|
-
function isTelegramSurfaceTool(name: string): boolean {
|
|
373
|
-
const n = name.toLowerCase()
|
|
374
|
-
return (
|
|
375
|
-
n.endsWith('__reply') ||
|
|
376
|
-
n.endsWith('__stream_reply') ||
|
|
377
|
-
n.endsWith('__edit_message') ||
|
|
378
|
-
n === 'reply' ||
|
|
379
|
-
n === 'stream_reply'
|
|
380
|
-
)
|
|
381
|
-
}
|
|
@@ -1,233 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Waiting-UX E2E contract tests — Phase 1 of #545 (RED).
|
|
3
|
-
*
|
|
4
|
-
* These tests assert the deterministic time-sequence contract for the
|
|
5
|
-
* three turn classes specified in #545. They are intentionally RED on
|
|
6
|
-
* `main` — each one catches one of the four observed failure modes from
|
|
7
|
-
* the live demo:
|
|
8
|
-
*
|
|
9
|
-
* F1. Status reaction collapses straight to 👍 (skips 👀→🤔→🔥).
|
|
10
|
-
* F2. No instant draft/typing signal — silence "for ages" after inbound.
|
|
11
|
-
* F3. Progress card renders late.
|
|
12
|
-
* F4. Pre-tool interim text is static — no refresh on step transitions.
|
|
13
|
-
*
|
|
14
|
-
* Phase 1 scope is tests-only — no production fixes. Once these go green
|
|
15
|
-
* we know the underlying behaviour matches the spec.
|
|
16
|
-
*
|
|
17
|
-
* All time control is via `vi.useFakeTimers()`. The harness records every
|
|
18
|
-
* outbound bot.api call with `Date.now()` at invocation time, so first-
|
|
19
|
-
* paint and ladder assertions are wall-clock deterministic.
|
|
20
|
-
*/
|
|
21
|
-
|
|
22
|
-
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'
|
|
23
|
-
import { createWaitingUxHarness, type HarnessHandle } from './waiting-ux-harness.js'
|
|
24
|
-
import type { SessionEvent } from '../session-tail.js'
|
|
25
|
-
|
|
26
|
-
const CHAT = '8248703757'
|
|
27
|
-
const INBOUND_MSG = 100
|
|
28
|
-
|
|
29
|
-
beforeEach(() => {
|
|
30
|
-
vi.useFakeTimers()
|
|
31
|
-
})
|
|
32
|
-
|
|
33
|
-
afterEach(() => {
|
|
34
|
-
vi.useRealTimers()
|
|
35
|
-
})
|
|
36
|
-
|
|
37
|
-
// ─── Class A — Instant reply (no tool calls, <2s) ────────────────────────
|
|
38
|
-
|
|
39
|
-
describe('Class A — instant reply', () => {
|
|
40
|
-
it('first-paint deadline: 👀 reaction lands within 800ms of inbound (catches F2)', async () => {
|
|
41
|
-
const h = createWaitingUxHarness()
|
|
42
|
-
const inboundAt = h.clock.now()
|
|
43
|
-
h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'hi' })
|
|
44
|
-
// Allow microtasks + the controller's queued (immediate, no debounce).
|
|
45
|
-
await h.clock.advance(50)
|
|
46
|
-
const firstReaction = h.recorder.firstReactionMs(CHAT)
|
|
47
|
-
expect(firstReaction).not.toBeNull()
|
|
48
|
-
expect((firstReaction ?? Infinity) - inboundAt).toBeLessThan(800)
|
|
49
|
-
expect(h.recorder.reactionSequence()[0]).toBe('👀')
|
|
50
|
-
h.finalize()
|
|
51
|
-
})
|
|
52
|
-
|
|
53
|
-
it('no progress card is sent for an instant turn (catches F3 / spec class A)', async () => {
|
|
54
|
-
const h = createWaitingUxHarness({ driverInitialDelayMs: 30_000 })
|
|
55
|
-
h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'hi' })
|
|
56
|
-
// Class A: enqueue → small thinking burst → reply → turn_end, all <2s.
|
|
57
|
-
h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'hi' })
|
|
58
|
-
await h.clock.advance(100)
|
|
59
|
-
h.feedSessionEvent({ kind: 'thinking' })
|
|
60
|
-
await h.clock.advance(200)
|
|
61
|
-
await h.streamReply({ chat_id: CHAT, text: 'hello back', done: true })
|
|
62
|
-
h.feedSessionEvent({ kind: 'turn_end', durationMs: 1500 })
|
|
63
|
-
await h.clock.advance(2_000)
|
|
64
|
-
expect(h.recorder.progressCardSendMs(CHAT)).toBeNull()
|
|
65
|
-
h.finalize()
|
|
66
|
-
})
|
|
67
|
-
|
|
68
|
-
it('terminates with 👍 and no spurious intermediate states', async () => {
|
|
69
|
-
const h = createWaitingUxHarness()
|
|
70
|
-
h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'hi' })
|
|
71
|
-
h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'hi' })
|
|
72
|
-
await h.clock.advance(50)
|
|
73
|
-
await h.streamReply({ chat_id: CHAT, text: 'hello back', done: true })
|
|
74
|
-
h.feedSessionEvent({ kind: 'turn_end', durationMs: 800 })
|
|
75
|
-
await h.clock.advance(1_500)
|
|
76
|
-
expect(h.recorder.lastReactionEmoji(CHAT)).toBe('👍')
|
|
77
|
-
h.finalize()
|
|
78
|
-
})
|
|
79
|
-
})
|
|
80
|
-
|
|
81
|
-
// ─── Class B — short turn (1–3 tools, <15s) ──────────────────────────────
|
|
82
|
-
|
|
83
|
-
describe('Class B — short turn', () => {
|
|
84
|
-
it('ladder integrity: 👀 → (🤔 or working glyph) before 👍 — catches F1 (straight-to-👍 collapse)', async () => {
|
|
85
|
-
const h = createWaitingUxHarness({ debounceMs: 700 })
|
|
86
|
-
h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'read foo.txt' })
|
|
87
|
-
h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'read foo.txt' })
|
|
88
|
-
// 200ms in — model starts thinking
|
|
89
|
-
await h.clock.advance(200)
|
|
90
|
-
h.feedSessionEvent({ kind: 'thinking' })
|
|
91
|
-
// 1s in — Read tool (debounced by 700ms — should still land before turn_end)
|
|
92
|
-
await h.clock.advance(800)
|
|
93
|
-
h.feedSessionEvent({ kind: 'tool_use', toolName: 'Read', toolUseId: 't1' })
|
|
94
|
-
// Wait long enough for the tool reaction to flush past debounce.
|
|
95
|
-
await h.clock.advance(1_500)
|
|
96
|
-
h.feedSessionEvent({ kind: 'tool_result', toolUseId: 't1', toolName: 'Read' })
|
|
97
|
-
await h.streamReply({ chat_id: CHAT, text: 'contents: ...', done: true })
|
|
98
|
-
h.feedSessionEvent({ kind: 'turn_end', durationMs: 4_000 })
|
|
99
|
-
await h.clock.advance(2_000)
|
|
100
|
-
|
|
101
|
-
const seq = h.recorder.reactionSequence()
|
|
102
|
-
// Must start with 👀
|
|
103
|
-
expect(seq[0]).toBe('👀')
|
|
104
|
-
// Must NOT collapse straight to 👍 — at least one intermediate before final.
|
|
105
|
-
expect(seq.length).toBeGreaterThanOrEqual(3)
|
|
106
|
-
const finalIdx = seq.length - 1
|
|
107
|
-
expect(seq[finalIdx]).toBe('👍')
|
|
108
|
-
// Intermediate states must include a thinking/working emoji, not just 👀.
|
|
109
|
-
const intermediates = seq.slice(1, finalIdx)
|
|
110
|
-
const hasIntermediate = intermediates.some((e) =>
|
|
111
|
-
['🤔', '🤓', '✍', '⚡', '👌', '👨💻', '🔥'].includes(e),
|
|
112
|
-
)
|
|
113
|
-
expect(hasIntermediate, `ladder collapsed: ${JSON.stringify(seq)}`).toBe(true)
|
|
114
|
-
h.finalize()
|
|
115
|
-
})
|
|
116
|
-
|
|
117
|
-
it('interim refresh: pre-tool preamble updates ≥1× across step transitions (catches F4)', async () => {
|
|
118
|
-
const h = createWaitingUxHarness()
|
|
119
|
-
h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'do thing' })
|
|
120
|
-
h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'do thing' })
|
|
121
|
-
// Initial preamble before any tool runs.
|
|
122
|
-
await h.streamReply({ chat_id: CHAT, text: 'looking…' })
|
|
123
|
-
await h.clock.advance(500)
|
|
124
|
-
// Step transition #1 — tool_use lands.
|
|
125
|
-
h.feedSessionEvent({ kind: 'tool_use', toolName: 'Read', toolUseId: 't1' })
|
|
126
|
-
await h.clock.advance(500)
|
|
127
|
-
// Step transition #2 — second different tool category.
|
|
128
|
-
h.feedSessionEvent({ kind: 'tool_use', toolName: 'WebFetch', toolUseId: 't2' })
|
|
129
|
-
await h.clock.advance(500)
|
|
130
|
-
await h.streamReply({ chat_id: CHAT, text: 'final answer', done: true })
|
|
131
|
-
h.feedSessionEvent({ kind: 'turn_end', durationMs: 3_000 })
|
|
132
|
-
|
|
133
|
-
// Across the two step transitions we must see ≥1 update to the
|
|
134
|
-
// pre-tool preamble surface (sendMessage or editMessageText for the
|
|
135
|
-
// active stream). Today's behaviour: a single static preamble then
|
|
136
|
-
// silence — this assertion catches that.
|
|
137
|
-
const edits = h.recorder.edits(CHAT)
|
|
138
|
-
expect(edits.length, 'pre-tool preamble never refreshed').toBeGreaterThanOrEqual(1)
|
|
139
|
-
h.finalize()
|
|
140
|
-
})
|
|
141
|
-
})
|
|
142
|
-
|
|
143
|
-
// ─── Class C — long / multi-agent ────────────────────────────────────────
|
|
144
|
-
|
|
145
|
-
describe('Class C — long / multi-agent', () => {
|
|
146
|
-
it('progress card renders early, before turn_end, for a multi-second turn (catches F3)', async () => {
|
|
147
|
-
const h = createWaitingUxHarness({
|
|
148
|
-
driverInitialDelayMs: 500, // production tunes this; harness asserts the contract
|
|
149
|
-
driverCoalesceMs: 100,
|
|
150
|
-
driverMinIntervalMs: 100,
|
|
151
|
-
})
|
|
152
|
-
const inboundAt = h.clock.now()
|
|
153
|
-
h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'big task' })
|
|
154
|
-
h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'big task' })
|
|
155
|
-
await h.clock.advance(200)
|
|
156
|
-
h.feedSessionEvent({ kind: 'thinking' })
|
|
157
|
-
await h.clock.advance(300)
|
|
158
|
-
h.feedSessionEvent({ kind: 'tool_use', toolName: 'Bash', toolUseId: 't1' })
|
|
159
|
-
// By 2s, the card MUST be visible — not at turn_end.
|
|
160
|
-
await h.clock.advance(1_500)
|
|
161
|
-
const cardAt = h.recorder.progressCardSendMs(CHAT)
|
|
162
|
-
expect(cardAt, 'progress card never rendered').not.toBeNull()
|
|
163
|
-
expect((cardAt ?? Infinity) - inboundAt).toBeLessThan(2_500)
|
|
164
|
-
// Drain the rest of the turn so afterEach doesn't leak timers.
|
|
165
|
-
h.feedSessionEvent({ kind: 'tool_result', toolUseId: 't1', toolName: 'Bash' })
|
|
166
|
-
await h.streamReply({ chat_id: CHAT, text: 'done', done: true })
|
|
167
|
-
h.feedSessionEvent({ kind: 'turn_end', durationMs: 5_000 })
|
|
168
|
-
await h.clock.advance(2_000)
|
|
169
|
-
h.finalize()
|
|
170
|
-
})
|
|
171
|
-
|
|
172
|
-
it('card stays stable until ALL background work hits terminal — Done ≥ last sub-agent terminal', async () => {
|
|
173
|
-
const h = createWaitingUxHarness({
|
|
174
|
-
driverInitialDelayMs: 200,
|
|
175
|
-
driverCoalesceMs: 100,
|
|
176
|
-
driverMinIntervalMs: 100,
|
|
177
|
-
})
|
|
178
|
-
h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'multi-agent' })
|
|
179
|
-
h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'multi-agent' })
|
|
180
|
-
await h.clock.advance(300)
|
|
181
|
-
// Spawn two sub-agents.
|
|
182
|
-
h.feedSessionEvent({ kind: 'sub_agent_started', agentId: 'a1', firstPromptText: 'a1' })
|
|
183
|
-
h.feedSessionEvent({ kind: 'sub_agent_started', agentId: 'a2', firstPromptText: 'a2' })
|
|
184
|
-
await h.clock.advance(1_000)
|
|
185
|
-
// a1 finishes early.
|
|
186
|
-
h.feedSessionEvent({ kind: 'sub_agent_turn_end', agentId: 'a1' })
|
|
187
|
-
const a1TerminalAt = h.clock.now()
|
|
188
|
-
await h.clock.advance(2_000)
|
|
189
|
-
// Main turn_end arrives BEFORE a2 finishes — the card must NOT mark
|
|
190
|
-
// Done yet (spec: stable until all workers terminal).
|
|
191
|
-
h.feedSessionEvent({ kind: 'turn_end', durationMs: 3_000 })
|
|
192
|
-
await h.clock.advance(500)
|
|
193
|
-
// a2 finishes last — this is the true terminal.
|
|
194
|
-
h.feedSessionEvent({ kind: 'sub_agent_turn_end', agentId: 'a2' })
|
|
195
|
-
const a2TerminalAt = h.clock.now()
|
|
196
|
-
await h.clock.advance(2_000)
|
|
197
|
-
|
|
198
|
-
// Find the card edit/send that marks Done. Production cards
|
|
199
|
-
// typically include a "Done" / "✅" / "✓" glyph in the final HTML.
|
|
200
|
-
const cardOps = h.recorder.calls.filter(
|
|
201
|
-
(c) =>
|
|
202
|
-
(c.kind === 'sendMessage' || c.kind === 'editMessageText') &&
|
|
203
|
-
c.chat_id === CHAT &&
|
|
204
|
-
(c.payload?.includes('Done') === true ||
|
|
205
|
-
c.payload?.includes('✅') === true ||
|
|
206
|
-
c.payload?.includes('✓') === true),
|
|
207
|
-
)
|
|
208
|
-
expect(cardOps.length, 'card never reached a Done state').toBeGreaterThan(0)
|
|
209
|
-
const doneAt = cardOps[cardOps.length - 1].ts
|
|
210
|
-
expect(
|
|
211
|
-
doneAt,
|
|
212
|
-
`card Done (${doneAt}) fired before last sub-agent terminal (${a2TerminalAt})`,
|
|
213
|
-
).toBeGreaterThanOrEqual(a2TerminalAt)
|
|
214
|
-
// Sanity: a1 was earlier than a2.
|
|
215
|
-
expect(a1TerminalAt).toBeLessThan(a2TerminalAt)
|
|
216
|
-
h.finalize()
|
|
217
|
-
})
|
|
218
|
-
|
|
219
|
-
it('first-paint deadline still ≤800ms even on long turns', async () => {
|
|
220
|
-
const h = createWaitingUxHarness({ driverInitialDelayMs: 500 })
|
|
221
|
-
const inboundAt = h.clock.now()
|
|
222
|
-
h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'long' })
|
|
223
|
-
await h.clock.advance(50)
|
|
224
|
-
const firstReaction = h.recorder.firstReactionMs(CHAT)
|
|
225
|
-
expect(firstReaction).not.toBeNull()
|
|
226
|
-
expect((firstReaction ?? Infinity) - inboundAt).toBeLessThan(800)
|
|
227
|
-
// Cleanup.
|
|
228
|
-
h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'long' })
|
|
229
|
-
h.feedSessionEvent({ kind: 'turn_end', durationMs: 100 })
|
|
230
|
-
await h.clock.advance(2_000)
|
|
231
|
-
h.finalize()
|
|
232
|
-
})
|
|
233
|
-
})
|
|
@@ -1,40 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Turn-flush prose recovery for #51.
|
|
3
|
-
*
|
|
4
|
-
* The gateway's `currentTurnCapturedText` accumulator gates on
|
|
5
|
-
* `currentSessionChatId != null`, while the progress-card driver's
|
|
6
|
-
* `ingest` uses the `chatId` from the IPC envelope (chatHint). When those
|
|
7
|
-
* two views of "is this turn one we're tracking" disagree — e.g., text
|
|
8
|
-
* arrives before enqueue's chatId is captured, or after a mid-turn reset
|
|
9
|
-
* — the progress card renders the assistant prose as narrative steps but
|
|
10
|
-
* `capturedText` stays empty, so `decideTurnFlush` returns `'empty-text'`
|
|
11
|
-
* and the turn-flush backstop never sends the prose to Telegram. The
|
|
12
|
-
* user sees a step bullet on the card and nothing in their chat.
|
|
13
|
-
*
|
|
14
|
-
* This helper bridges the gap: at turn_end, if the gateway has no
|
|
15
|
-
* captured text, peek the progress-card state and recover the assistant
|
|
16
|
-
* prose from the narrative steps. Pure for testability — the gateway
|
|
17
|
-
* is responsible for actually wiring the recovered text into the flush
|
|
18
|
-
* decision.
|
|
19
|
-
*/
|
|
20
|
-
|
|
21
|
-
import type { ProgressCardState } from './progress-card.js'
|
|
22
|
-
|
|
23
|
-
/**
|
|
24
|
-
* Returns the joined assistant prose recorded as narrative steps in the
|
|
25
|
-
* progress-card state, trimmed. Empty string when the state has no
|
|
26
|
-
* narratives (or is undefined).
|
|
27
|
-
*/
|
|
28
|
-
export function recoverProseFromProgressCard(
|
|
29
|
-
state: ProgressCardState | undefined,
|
|
30
|
-
): string {
|
|
31
|
-
if (state == null) return ''
|
|
32
|
-
// Defensive: older state shapes (e.g. partial persisted state, mocks
|
|
33
|
-
// in tests) may lack the `narratives` field. Don't throw.
|
|
34
|
-
if (!Array.isArray(state.narratives)) return ''
|
|
35
|
-
const parts: string[] = []
|
|
36
|
-
for (const n of state.narratives) {
|
|
37
|
-
if (typeof n.text === 'string' && n.text.length > 0) parts.push(n.text)
|
|
38
|
-
}
|
|
39
|
-
return parts.join('\n').trim()
|
|
40
|
-
}
|