switchroom 0.14.66 → 0.14.67

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,82 @@
1
+ /**
2
+ * Real-work UAT (channel) — the DM real-work suite, in a forum supergroup.
3
+ * Proves the status surface (activity/worker feed) AND the answer land IN the
4
+ * channel under genuine work — not leaked to the owner DM — and that a late
5
+ * reply after a long tool turn doesn't escape the channel. Self-skips green when
6
+ * SWITCHROOM_UAT_CHAT_ID is unset or the chat isn't a resolvable supergroup.
7
+ *
8
+ * mtcute has no forum-topic API, so this uses the supergroup's General topic: it
9
+ * proves DM-vs-channel routing, not correct-topic-among-many (the gateway unit
10
+ * thread-assertions pin that). See real-work-prompts.ts.
11
+ */
12
+ import { describe, it, expect, beforeAll } from "vitest";
13
+ import { spinUp, type Scenario } from "../harness.js";
14
+ import {
15
+ REAL_WORK_CASES,
16
+ collectTurn,
17
+ analyzeTurn,
18
+ summarizeTurn,
19
+ } from "../real-work-prompts.js";
20
+
21
+ const SUPERGROUP_ID = Number.parseInt(process.env.SWITCHROOM_UAT_CHAT_ID ?? "", 10);
22
+
23
+ describe("uat: real-work channel — status + answer land in the supergroup", () => {
24
+ let sc: Scenario | null = null;
25
+ let postable = false;
26
+
27
+ beforeAll(async () => {
28
+ if (!Number.isFinite(SUPERGROUP_ID)) {
29
+ console.warn("[uat] SWITCHROOM_UAT_CHAT_ID unset — skipping real-work channel suite");
30
+ return;
31
+ }
32
+ sc = await spinUp({ agent: "test-harness" });
33
+ await sc.driver.primeDialogs();
34
+ postable = await sc.driver.canResolve(SUPERGROUP_ID);
35
+ if (!postable) {
36
+ console.warn(`[uat] supergroup ${SUPERGROUP_ID} not resolvable — skipping real-work channel suite`);
37
+ }
38
+ });
39
+
40
+ for (const fc of REAL_WORK_CASES) {
41
+ it(
42
+ `[real-work-sg] ${fc.name} (${fc.kind}) — answer + surface land in the channel`,
43
+ async () => {
44
+ if (sc == null || !postable) return; // self-skip green
45
+ await sc.driver.primeDialogs();
46
+ const obs = await collectTurn(
47
+ sc.driver,
48
+ SUPERGROUP_ID,
49
+ sc.driverUserId,
50
+ fc.prompt,
51
+ { timeoutMs: fc.timeoutMs, minAnswerChars: fc.minAnswerChars },
52
+ );
53
+ console.log(summarizeTurn(`sg:${fc.name}`, obs));
54
+ if (obs.answer != null) {
55
+ console.log(
56
+ `[real-work-sg] ${fc.name} answer: ${JSON.stringify(obs.answer.text.slice(0, 180))}`,
57
+ );
58
+ }
59
+
60
+ const { violations, warnings } = analyzeTurn(obs, {
61
+ requireSurface: fc.requireSurface,
62
+ chatId: SUPERGROUP_ID, // wrong-surface detector = leaked out of the channel
63
+ });
64
+ for (const w of warnings) {
65
+ console.warn(`[real-work-sg] ${fc.name}: WARN ${w.code}: ${w.detail}`);
66
+ }
67
+ if (violations.length > 0) {
68
+ throw new Error(
69
+ `[real-work-sg] ${fc.name}: ${violations.length} invariant violation(s):\n` +
70
+ violations.map((x) => ` - ${x.code}: ${x.detail}`).join("\n"),
71
+ );
72
+ }
73
+ // Every observed bot message must be in the channel (the routing proof).
74
+ for (const m of [...obs.botMessages, ...obs.edits]) {
75
+ expect(m.chatId).toBe(SUPERGROUP_ID);
76
+ }
77
+ expect(obs.answer).not.toBeNull();
78
+ },
79
+ fc.timeoutMs + 45_000,
80
+ );
81
+ }
82
+ });
@@ -0,0 +1,64 @@
1
+ /**
2
+ * Real-work UAT (DM) — human-style prompts that trigger genuine work
3
+ * (multi-tool / web research / sub-agents / background workers), asserting the
4
+ * status-surface + reply-ordering invariants the conversational fuzz never
5
+ * exercised. The status-dark, orphaned-reply-fragment, and late-reply bugs only
6
+ * appear when the agent actually does work; these prompts provoke it in a human
7
+ * voice, `collectTurn` captures the whole bot-message sequence, and `analyzeTurn`
8
+ * flags the known bug signatures. See real-work-prompts.ts for rationale + the
9
+ * mtcute harness limits.
10
+ */
11
+ import { describe, it, expect } from "vitest";
12
+ import { spinUp } from "../harness.js";
13
+ import {
14
+ REAL_WORK_CASES,
15
+ collectTurn,
16
+ analyzeTurn,
17
+ summarizeTurn,
18
+ } from "../real-work-prompts.js";
19
+
20
+ describe("uat: real-work DM — status surface + ordering under genuine work", () => {
21
+ for (const fc of REAL_WORK_CASES) {
22
+ it(
23
+ `[real-work] ${fc.name} (${fc.kind}) — answer lands, surface holds`,
24
+ async () => {
25
+ const sc = await spinUp({ agent: "test-harness" });
26
+ try {
27
+ const obs = await collectTurn(
28
+ sc.driver,
29
+ sc.botUserId,
30
+ sc.driverUserId,
31
+ fc.prompt,
32
+ { timeoutMs: fc.timeoutMs, minAnswerChars: fc.minAnswerChars },
33
+ );
34
+ // Forensic log — the bug hunt reads these to spot dark feeds, late
35
+ // fragments, and surface gaps even on cases that "pass".
36
+ console.log(summarizeTurn(fc.name, obs));
37
+ if (obs.answer != null) {
38
+ console.log(
39
+ `[real-work] ${fc.name} answer: ${JSON.stringify(obs.answer.text.slice(0, 180))}`,
40
+ );
41
+ }
42
+
43
+ const { violations, warnings } = analyzeTurn(obs, {
44
+ requireSurface: fc.requireSurface,
45
+ chatId: sc.botUserId,
46
+ });
47
+ for (const w of warnings) {
48
+ console.warn(`[real-work] ${fc.name}: WARN ${w.code}: ${w.detail}`);
49
+ }
50
+ if (violations.length > 0) {
51
+ throw new Error(
52
+ `[real-work] ${fc.name}: ${violations.length} invariant violation(s):\n` +
53
+ violations.map((x) => ` - ${x.code}: ${x.detail}`).join("\n"),
54
+ );
55
+ }
56
+ expect(obs.answer).not.toBeNull();
57
+ } finally {
58
+ await sc.tearDown();
59
+ }
60
+ },
61
+ fc.timeoutMs + 45_000,
62
+ );
63
+ }
64
+ });