switchroom 0.15.45 → 0.16.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-scheduler/index.js +122 -88
- package/dist/auth-broker/index.js +463 -177
- package/dist/cli/autoaccept-poll.js +4842 -35
- package/dist/cli/drive-write-pretool.mjs +17 -14
- package/dist/cli/notion-write-pretool.mjs +117 -86
- package/dist/cli/self-improve-apply-guard-pretool.mjs +626 -0
- package/dist/cli/self-improve-stop.mjs +428 -0
- package/dist/cli/skill-validate-pretool.mjs +72 -72
- package/dist/cli/switchroom.js +3158 -1178
- package/dist/host-control/main.js +2833 -355
- package/dist/vault/approvals/kernel-server.js +7479 -7439
- package/dist/vault/broker/server.js +11312 -11272
- package/examples/minimal.yaml +1 -0
- package/examples/switchroom.yaml +1 -0
- package/package.json +3 -3
- package/profiles/_base/start.sh.hbs +88 -1
- package/profiles/_shared/execution-discipline.md.hbs +18 -0
- package/profiles/default/CLAUDE.md.hbs +0 -19
- package/telegram-plugin/.claude-plugin/plugin.json +2 -2
- package/telegram-plugin/answer-stream-flag.ts +12 -49
- package/telegram-plugin/answer-stream.ts +5 -150
- package/telegram-plugin/auth-snapshot-format.ts +280 -48
- package/telegram-plugin/auto-fallback-fleet.ts +44 -1
- package/telegram-plugin/context-exhaustion.ts +12 -0
- package/telegram-plugin/demo-mask.ts +154 -0
- package/telegram-plugin/dist/bridge/bridge.js +167 -124
- package/telegram-plugin/dist/gateway/gateway.js +3039 -1159
- package/telegram-plugin/dist/server.js +215 -172
- package/telegram-plugin/docs/waiting-ux-spec.md +2 -2
- package/telegram-plugin/draft-stream.ts +47 -410
- package/telegram-plugin/final-answer-detect.ts +17 -12
- package/telegram-plugin/fleet-fallback-resume.ts +131 -0
- package/telegram-plugin/format.ts +56 -19
- package/telegram-plugin/gateway/auth-add-flow.ts +332 -127
- package/telegram-plugin/gateway/auth-broker-client.ts +2 -2
- package/telegram-plugin/gateway/auth-command.ts +70 -14
- package/telegram-plugin/gateway/clean-shutdown-marker.ts +44 -0
- package/telegram-plugin/gateway/config-approval-handler.test.ts +91 -4
- package/telegram-plugin/gateway/config-approval-handler.ts +94 -13
- package/telegram-plugin/gateway/current-turn-map.ts +188 -0
- package/telegram-plugin/gateway/disconnect-flush.ts +3 -1
- package/telegram-plugin/gateway/effort-command.ts +8 -3
- package/telegram-plugin/gateway/emission-authority.ts +369 -0
- package/telegram-plugin/gateway/feed-open-gate.ts +292 -0
- package/telegram-plugin/gateway/gateway.ts +1837 -291
- package/telegram-plugin/gateway/inject-handler.test.ts +2 -1
- package/telegram-plugin/gateway/ms365-write-approval.test.ts +4 -4
- package/telegram-plugin/gateway/represent-guard.ts +72 -0
- package/telegram-plugin/gateway/status-surface-log.test.ts +5 -4
- package/telegram-plugin/gateway/status-surface-log.ts +14 -3
- package/telegram-plugin/history.ts +33 -11
- package/telegram-plugin/hooks/repo-context-pretool.mjs +26 -0
- package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +5 -0
- package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +8 -0
- package/telegram-plugin/hooks/tool-label-pretool.mjs +39 -15
- package/telegram-plugin/issues-card.ts +4 -0
- package/telegram-plugin/model-unavailable.ts +124 -0
- package/telegram-plugin/narrative-dedup.ts +69 -0
- package/telegram-plugin/over-ping-safety-net.ts +70 -4
- package/telegram-plugin/package.json +3 -3
- package/telegram-plugin/pending-work-progress.ts +12 -0
- package/telegram-plugin/permission-rule.ts +32 -5
- package/telegram-plugin/permission-title.ts +152 -9
- package/telegram-plugin/quota-check.ts +13 -0
- package/telegram-plugin/quota-watch.ts +135 -7
- package/telegram-plugin/registry/turns-schema.test.ts +24 -0
- package/telegram-plugin/registry/turns-schema.ts +9 -0
- package/telegram-plugin/runtime-metrics.ts +13 -0
- package/telegram-plugin/session-tail.ts +96 -11
- package/telegram-plugin/silence-poke.ts +170 -24
- package/telegram-plugin/slot-banner-driver.ts +3 -0
- package/telegram-plugin/status-no-truncate.ts +44 -0
- package/telegram-plugin/status-reactions.ts +20 -3
- package/telegram-plugin/stream-controller.ts +4 -23
- package/telegram-plugin/stream-reply-handler.ts +6 -24
- package/telegram-plugin/streaming-metrics.ts +91 -0
- package/telegram-plugin/subagent-watcher.ts +212 -66
- package/telegram-plugin/tests/activity-ever-opened-sticky.test.ts +47 -0
- package/telegram-plugin/tests/answer-stream-dedup.test.ts +9 -26
- package/telegram-plugin/tests/answer-stream-flag.test.ts +25 -58
- package/telegram-plugin/tests/answer-stream-silent-markers.test.ts +41 -51
- package/telegram-plugin/tests/answer-stream.test.ts +2 -411
- package/telegram-plugin/tests/auth-add-flow.test.ts +488 -253
- package/telegram-plugin/tests/auth-command-format2.test.ts +71 -1
- package/telegram-plugin/tests/auth-snapshot-format.test.ts +376 -6
- package/telegram-plugin/tests/auto-fallback-fleet.test.ts +120 -0
- package/telegram-plugin/tests/cross-turn-card-gate.test.ts +424 -0
- package/telegram-plugin/tests/demo-mask.test.ts +127 -0
- package/telegram-plugin/tests/draft-stream.test.ts +0 -827
- package/telegram-plugin/tests/emission-authority-card-drain-gate.test.ts +236 -0
- package/telegram-plugin/tests/emission-authority-facade.test.ts +488 -0
- package/telegram-plugin/tests/emission-authority-open-gate.test.ts +179 -0
- package/telegram-plugin/tests/emission-authority-ping-gate.test.ts +395 -0
- package/telegram-plugin/tests/emission-determinism-wiring.test.ts +177 -0
- package/telegram-plugin/tests/feed-heartbeat-liveness-open.test.ts +146 -0
- package/telegram-plugin/tests/feed-open-gate.test.ts +259 -0
- package/telegram-plugin/tests/feed-survival.test.ts +526 -0
- package/telegram-plugin/tests/fleet-fallback-resume.test.ts +197 -0
- package/telegram-plugin/tests/gateway-clean-shutdown-marker.test.ts +117 -0
- package/telegram-plugin/tests/gateway-no-reply-single-emit.test.ts +4 -11
- package/telegram-plugin/tests/history.test.ts +60 -0
- package/telegram-plugin/tests/model-unavailable.test.ts +118 -0
- package/telegram-plugin/tests/narrative-dedup.test.ts +118 -0
- package/telegram-plugin/tests/orphaned-reply-rearm.test.ts +285 -0
- package/telegram-plugin/tests/over-ping-final-answer-decoupling.test.ts +194 -0
- package/telegram-plugin/tests/over-ping-safety-net.test.ts +2 -2
- package/telegram-plugin/tests/per-topic-current-turn.test.ts +373 -0
- package/telegram-plugin/tests/permission-card-origin-kill-switch.test.ts +42 -0
- package/telegram-plugin/tests/permission-rule.test.ts +17 -0
- package/telegram-plugin/tests/permission-title.test.ts +206 -17
- package/telegram-plugin/tests/quota-watch.test.ts +252 -9
- package/telegram-plugin/tests/reply-terminal-reaction.test.ts +6 -1
- package/telegram-plugin/tests/repo-context-pretool.test.ts +62 -0
- package/telegram-plugin/tests/represent-guard.test.ts +162 -0
- package/telegram-plugin/tests/session-tail.test.ts +147 -3
- package/telegram-plugin/tests/silence-liveness-wiring.test.ts +18 -0
- package/telegram-plugin/tests/status-card-budget-parity.test.ts +72 -0
- package/telegram-plugin/tests/status-surface-log.test.ts +146 -0
- package/telegram-plugin/tests/subagent-watcher-clip-narrative.test.ts +58 -0
- package/telegram-plugin/tests/subagent-watcher-parent-turn-key.test.ts +102 -0
- package/telegram-plugin/tests/subagent-watcher-workflow-visibility.test.ts +225 -0
- package/telegram-plugin/tests/subagent-watcher.test.ts +147 -0
- package/telegram-plugin/tests/telegram-activity-visibility-integration.test.ts +597 -0
- package/telegram-plugin/tests/telegram-format.test.ts +101 -6
- package/telegram-plugin/tests/tool-activity-summary.test.ts +550 -15
- package/telegram-plugin/tests/tool-label-pretool.test.ts +73 -0
- package/telegram-plugin/tests/tool-label-sidecar.test.ts +44 -0
- package/telegram-plugin/tests/tool-labels.test.ts +67 -0
- package/telegram-plugin/tests/turn-liveness-floor.test.ts +196 -0
- package/telegram-plugin/tests/turn-liveness-invariant.test.ts +340 -0
- package/telegram-plugin/tests/welcome-text.test.ts +32 -3
- package/telegram-plugin/tests/worker-activity-feed.test.ts +470 -22
- package/telegram-plugin/tool-activity-summary.ts +375 -58
- package/telegram-plugin/turn-liveness-floor.ts +240 -0
- package/telegram-plugin/uat/assertions.ts +115 -0
- package/telegram-plugin/uat/driver.ts +68 -0
- package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +119 -133
- package/telegram-plugin/uat/scenarios/jtbd-answer-pings.test.ts +94 -0
- package/telegram-plugin/uat/scenarios/jtbd-cross-turn-card-dm.test.ts +109 -0
- package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-thinkgap-dm.test.ts +478 -0
- package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-visibility-dm.test.ts +396 -0
- package/telegram-plugin/uat/scenarios/jtbd-liveness-feed-open-dm.test.ts +202 -0
- package/telegram-plugin/uat/scenarios/jtbd-reply-is-last-dm.test.ts +202 -0
- package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +93 -87
- package/telegram-plugin/welcome-text.ts +13 -1
- package/telegram-plugin/worker-activity-feed.ts +157 -82
- package/telegram-plugin/draft-transport.ts +0 -122
- package/telegram-plugin/tests/draft-retirement-wiring.test.ts +0 -82
- package/telegram-plugin/tests/draft-transport.test.ts +0 -211
|
@@ -152,12 +152,13 @@ describe('handleInjectCommand — outcome=ok_no_output', () => {
|
|
|
152
152
|
expect(replies[0].text).toContain('empty capture')
|
|
153
153
|
})
|
|
154
154
|
|
|
155
|
-
it('
|
|
155
|
+
it('uses silentNote for /clear (context cleared — fresh slate)', async () => {
|
|
156
156
|
const inject = vi.fn().mockResolvedValue(noOutputResult('/clear'))
|
|
157
157
|
const { deps, replies } = makeDeps({ getArgs: () => '/clear', inject })
|
|
158
158
|
await handleInjectCommand(fakeCtx(), deps)
|
|
159
159
|
expect(replies[0].opts?.accent).toBe('done')
|
|
160
160
|
expect(replies[0].text).toContain('<code>/clear</code>')
|
|
161
|
+
expect(replies[0].text).toContain('context cleared')
|
|
161
162
|
expect(replies[0].text).not.toContain('empty capture')
|
|
162
163
|
expect(replies[0].text).not.toContain('<pre>')
|
|
163
164
|
})
|
|
@@ -26,7 +26,7 @@ describe("validateMs365Preview", () => {
|
|
|
26
26
|
toolName: "mcp__ms-365__upload-file-content",
|
|
27
27
|
itemId: "01ABCDEFG",
|
|
28
28
|
itemDisplayName: "Q3-Strategy.docx",
|
|
29
|
-
accountEmail: "
|
|
29
|
+
accountEmail: "bob@example.com",
|
|
30
30
|
};
|
|
31
31
|
|
|
32
32
|
it("accepts a minimal valid preview", () => {
|
|
@@ -93,7 +93,7 @@ describe("buildMs365CardText", () => {
|
|
|
93
93
|
toolName: "mcp__ms-365__upload-file-content",
|
|
94
94
|
itemId: "01ABCDEFG",
|
|
95
95
|
itemDisplayName: "Q3-Strategy.docx",
|
|
96
|
-
accountEmail: "
|
|
96
|
+
accountEmail: "bob@example.com",
|
|
97
97
|
};
|
|
98
98
|
|
|
99
99
|
it("includes agent, tool, item, account", () => {
|
|
@@ -102,7 +102,7 @@ describe("buildMs365CardText", () => {
|
|
|
102
102
|
expect(text).toContain("ms-365__upload-file-content");
|
|
103
103
|
expect(text).toContain("Q3-Strategy.docx");
|
|
104
104
|
expect(text).toContain("01ABCDEFG");
|
|
105
|
-
expect(text).toContain("
|
|
105
|
+
expect(text).toContain("bob@example.com");
|
|
106
106
|
});
|
|
107
107
|
|
|
108
108
|
it("omits ID line for new files", () => {
|
|
@@ -183,7 +183,7 @@ function makeMsg(overrides: Partial<RequestMs365ApprovalMessage> = {}): RequestM
|
|
|
183
183
|
toolName: "mcp__ms-365__upload-file-content",
|
|
184
184
|
itemId: "01ABC",
|
|
185
185
|
itemDisplayName: "Strategy.docx",
|
|
186
|
-
accountEmail: "
|
|
186
|
+
accountEmail: "bob@example.com",
|
|
187
187
|
},
|
|
188
188
|
ttlMs: 5 * 60 * 1000,
|
|
189
189
|
...overrides,
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* represent-guard.ts — the duplicate-represent guard for the obligation sweep,
|
|
3
|
+
* extracted from obligationSweep so the "satisfied-but-misdetected obligation
|
|
4
|
+
* must NOT re-fire" decision (#2472) is EXECUTABLE in a pure unit test.
|
|
5
|
+
*
|
|
6
|
+
* The bug (#2472): obligation_represent re-fired for the same origin_turn_id even
|
|
7
|
+
* after the agent had already answered represent_count=1 with a reply tool call,
|
|
8
|
+
* producing a second near-identical message. The reply landed but its routing did
|
|
9
|
+
* not resolve back to the origin, so the ledger's normal close path missed it —
|
|
10
|
+
* and the represent branch (unlike the escalate branch) had no belt-and-braces
|
|
11
|
+
* outbound-history check before re-firing.
|
|
12
|
+
*
|
|
13
|
+
* This helper is the decision the sweep's represent branch now consults. PURE —
|
|
14
|
+
* no Telegram, no SQLite; the gateway injects `hasOutboundDeliveredSince` as a
|
|
15
|
+
* predicate. The single load-bearing subtlety lives here in one testable place:
|
|
16
|
+
*
|
|
17
|
+
* The cutoff is `lastRepresentedAt` (the time of the PREVIOUS represent), NOT
|
|
18
|
+
* `openedAt`. On the FIRST represent (`lastRepresentedAt` undefined) the guard
|
|
19
|
+
* is a no-op, so the genuine "agent wrote a plain-text answer and never called
|
|
20
|
+
* the reply tool" case still re-presents ONCE. Only the SECOND-and-later
|
|
21
|
+
* represent is gated — exactly where a reply that landed BETWEEN fires must
|
|
22
|
+
* suppress the re-ask. A reply that predates the last represent (e.g. the
|
|
23
|
+
* original plain-text answer) does not count, because it is not evidence the
|
|
24
|
+
* most recent represent was answered.
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
/** The obligation fields the represent guard inspects. */
|
|
28
|
+
export interface RepresentGuardObligation {
|
|
29
|
+
readonly originTurnId: string
|
|
30
|
+
readonly chatId: string
|
|
31
|
+
readonly threadId?: number
|
|
32
|
+
/** Wall-clock ms this obligation was most recently re-presented, if ever. */
|
|
33
|
+
readonly lastRepresentedAt?: number
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export interface RepresentGuardDeps {
|
|
37
|
+
/** True when history is available to query (else the guard never suppresses). */
|
|
38
|
+
historyEnabled: boolean
|
|
39
|
+
/**
|
|
40
|
+
* Has a genuine assistant reply been delivered to this chat (optionally scoped
|
|
41
|
+
* to thread) at or after `sinceMs`? Wraps history.hasOutboundDeliveredSince.
|
|
42
|
+
*
|
|
43
|
+
* For the represent guard the gateway binds this with a LOW minChars (#2474
|
|
44
|
+
* follow-up): ANY real reply to the turn — even a terse "Yes — done." — means
|
|
45
|
+
* the user was answered and the duplicate represent must be suppressed. The
|
|
46
|
+
* 200-char "substantive" proxy is the ESCALATE branch's concern, not this one;
|
|
47
|
+
* applying it here left short-but-real replies failing to suppress the duplicate
|
|
48
|
+
* (the #2472 gap). The underlying query only counts recordOutbound rows, so
|
|
49
|
+
* typing indicators / progress-card edits are never miscounted as a reply.
|
|
50
|
+
*/
|
|
51
|
+
hasOutboundDeliveredSince: (chatId: string, sinceMs: number, threadId?: number) => boolean
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Decide whether a represent for `o` should be SUPPRESSED because the agent has
|
|
56
|
+
* already delivered a reply since the obligation was last re-presented.
|
|
57
|
+
*
|
|
58
|
+
* Returns true ⇒ the obligation is satisfied-but-misdetected; the caller closes
|
|
59
|
+
* it silently and does NOT re-fire. Returns false ⇒ proceed with the represent
|
|
60
|
+
* (first represent always proceeds; a represent with no reply since the last one
|
|
61
|
+
* proceeds; an unavailable history proceeds — never suppress on doubt).
|
|
62
|
+
*/
|
|
63
|
+
export function shouldSuppressRepresent(
|
|
64
|
+
o: RepresentGuardObligation,
|
|
65
|
+
deps: RepresentGuardDeps,
|
|
66
|
+
): boolean {
|
|
67
|
+
if (!deps.historyEnabled) return false
|
|
68
|
+
// First represent: nothing to compare against — let the single re-ask fire so
|
|
69
|
+
// the genuine plain-text-no-reply case is preserved.
|
|
70
|
+
if (o.lastRepresentedAt == null) return false
|
|
71
|
+
return deps.hasOutboundDeliveredSince(o.chatId, o.lastRepresentedAt, o.threadId)
|
|
72
|
+
}
|
|
@@ -12,6 +12,7 @@ function turn(overrides: Partial<StatusSurfaceTurnView> = {}): StatusSurfaceTurn
|
|
|
12
12
|
sessionThreadId: undefined,
|
|
13
13
|
startedAt: 1_780_000_000_000,
|
|
14
14
|
toolCallCount: 0,
|
|
15
|
+
labeledToolCount: 0,
|
|
15
16
|
activityMessageId: null,
|
|
16
17
|
activityEverOpened: false,
|
|
17
18
|
activityDrainFailures: 0,
|
|
@@ -35,7 +36,7 @@ describe('formatTurnLifecycle', () => {
|
|
|
35
36
|
const line = formatTurnLifecycle(
|
|
36
37
|
'clear',
|
|
37
38
|
'turn_end',
|
|
38
|
-
turn({ sessionThreadId: 3, toolCallCount: 5, activityMessageId: 42, activityEverOpened: true, replyCalled: true, finalAnswerDelivered: true }),
|
|
39
|
+
turn({ sessionThreadId: 3, toolCallCount: 5, labeledToolCount: 5, activityMessageId: 42, activityEverOpened: true, replyCalled: true, finalAnswerDelivered: true }),
|
|
39
40
|
1_780_000_300_000, // +300s
|
|
40
41
|
)
|
|
41
42
|
expect(line).toContain('turn-lifecycle clear reason=turn_end')
|
|
@@ -63,7 +64,7 @@ describe('formatTurnLifecycle', () => {
|
|
|
63
64
|
describe('detectStatusSurfaceDegraded', () => {
|
|
64
65
|
it('flags a turn that did tool work but never opened the feed due to send failures (the resume-400 signature)', () => {
|
|
65
66
|
const d = detectStatusSurfaceDegraded(
|
|
66
|
-
turn({ toolCallCount: 3, activityEverOpened: false, activityDrainFailures: 10 }),
|
|
67
|
+
turn({ toolCallCount: 3, labeledToolCount: 3, activityEverOpened: false, activityDrainFailures: 10 }),
|
|
67
68
|
)
|
|
68
69
|
expect(d).not.toBeNull()
|
|
69
70
|
expect(d!.reason).toBe('feed-never-opened')
|
|
@@ -75,7 +76,7 @@ describe('detectStatusSurfaceDegraded', () => {
|
|
|
75
76
|
// the sticky activityEverOpened keeps this from false-positiving.
|
|
76
77
|
expect(
|
|
77
78
|
detectStatusSurfaceDegraded(
|
|
78
|
-
turn({ toolCallCount: 4, activityMessageId: null, activityEverOpened: true, activityDrainFailures: 0 }),
|
|
79
|
+
turn({ toolCallCount: 4, labeledToolCount: 4, activityMessageId: null, activityEverOpened: true, activityDrainFailures: 0 }),
|
|
79
80
|
),
|
|
80
81
|
).toBeNull()
|
|
81
82
|
})
|
|
@@ -83,7 +84,7 @@ describe('detectStatusSurfaceDegraded', () => {
|
|
|
83
84
|
it('does NOT flag a turn that never attempted a feed send (e.g. ack-first suppression)', () => {
|
|
84
85
|
expect(
|
|
85
86
|
detectStatusSurfaceDegraded(
|
|
86
|
-
turn({ toolCallCount: 2, activityEverOpened: false, activityDrainFailures: 0 }),
|
|
87
|
+
turn({ toolCallCount: 2, labeledToolCount: 2, activityEverOpened: false, activityDrainFailures: 0 }),
|
|
87
88
|
),
|
|
88
89
|
).toBeNull()
|
|
89
90
|
})
|
|
@@ -30,6 +30,17 @@ export interface StatusSurfaceTurnView {
|
|
|
30
30
|
sessionThreadId: number | undefined
|
|
31
31
|
startedAt: number
|
|
32
32
|
toolCallCount: number
|
|
33
|
+
/**
|
|
34
|
+
* Count of tool_label events that passed the surface-tool guard this turn —
|
|
35
|
+
* i.e. the number of surfaced (non-surface, non-suppressed) tool steps. This
|
|
36
|
+
* is the deterministic single source of truth for the `tools=` lifecycle
|
|
37
|
+
* field and the `✓ N steps` activity-feed total. Incremented in
|
|
38
|
+
* `case 'tool_label':` AFTER the `isTelegramSurfaceTool` guard so that
|
|
39
|
+
* reply/stream_reply/edit_message/react are never counted. send_typing and
|
|
40
|
+
* sync_retain are suppressed at the hook level (computeLabel returns null)
|
|
41
|
+
* and never arrive as tool_label events, so they are excluded automatically.
|
|
42
|
+
*/
|
|
43
|
+
labeledToolCount: number
|
|
33
44
|
/** Live activity-feed message id; null until the first send captures it. */
|
|
34
45
|
activityMessageId: number | null
|
|
35
46
|
/**
|
|
@@ -67,7 +78,7 @@ export function formatTurnLifecycle(
|
|
|
67
78
|
return (
|
|
68
79
|
`turn-lifecycle ${action} reason=${reason} turnId=${t.turnId} ` +
|
|
69
80
|
`chat=${t.sessionChatId} thread=${t.sessionThreadId ?? '-'} ` +
|
|
70
|
-
`tools=${t.
|
|
81
|
+
`tools=${t.labeledToolCount} activityMsgId=${t.activityMessageId ?? 'none'} ` +
|
|
71
82
|
`feedOpened=${t.activityEverOpened} drainFailures=${t.activityDrainFailures} ` +
|
|
72
83
|
`replyCalled=${t.replyCalled} finalAnswer=${t.finalAnswerDelivered} age_ms=${ageMs}`
|
|
73
84
|
)
|
|
@@ -89,13 +100,13 @@ export function formatTurnLifecycle(
|
|
|
89
100
|
export function detectStatusSurfaceDegraded(
|
|
90
101
|
t: StatusSurfaceTurnView,
|
|
91
102
|
): { reason: string; detail: string } | null {
|
|
92
|
-
if (t.
|
|
103
|
+
if (t.labeledToolCount === 0) return null
|
|
93
104
|
if (t.activityEverOpened) return null
|
|
94
105
|
if (t.activityDrainFailures === 0) return null
|
|
95
106
|
return {
|
|
96
107
|
reason: 'feed-never-opened',
|
|
97
108
|
detail:
|
|
98
|
-
`tools=${t.
|
|
109
|
+
`tools=${t.labeledToolCount} drainFailures=${t.activityDrainFailures} ` +
|
|
99
110
|
`activityMsgId=none — the live activity feed failed every send this turn ` +
|
|
100
111
|
`(card was dark despite tool work)`,
|
|
101
112
|
}
|
|
@@ -557,11 +557,26 @@ export function getRecentOutboundCount(
|
|
|
557
557
|
* SUBSTANTIVE: we never suppress escalation on a bare ack ("on it", "give me a
|
|
558
558
|
* sec") — an agent that acks then ghosts must still escalate. The history schema
|
|
559
559
|
* does not store a done/substantive flag, so we approximate: a row counts only
|
|
560
|
-
* when LENGTH(text) >= 200
|
|
561
|
-
* final-answer-detect.ts). This is false-negative-safe
|
|
562
|
-
* answer that happens to be < 200 chars
|
|
563
|
-
* the conservative (safe) outcome. A
|
|
564
|
-
*
|
|
560
|
+
* when LENGTH(text) >= `minChars` (default 200, the FINAL_ANSWER_MIN_CHARS
|
|
561
|
+
* constant from final-answer-detect.ts). This is false-negative-safe for the
|
|
562
|
+
* escalate branch: a genuine substantive answer that happens to be < 200 chars
|
|
563
|
+
* will still fire an escalation, which is the conservative (safe) outcome. A
|
|
564
|
+
* schema column would be more precise but is disproportionate for this predicate;
|
|
565
|
+
* the reviewer accepted this approach.
|
|
566
|
+
*
|
|
567
|
+
* `minChars` semantics (decoupled per caller, #2474 follow-up):
|
|
568
|
+
* - The ESCALATE branch (Fix 4) keeps the 200 default: it must not stand down an
|
|
569
|
+
* escalation on a mere ack, so it still demands a substantive-LENGTH outbound.
|
|
570
|
+
* - The duplicate-represent GUARD (#2472) passes a LOW value (1): for that path
|
|
571
|
+
* ANY genuine assistant reply to the turn — even a terse "Yes — done." or
|
|
572
|
+
* "Merged, all three landed." — means the user was answered, so the duplicate
|
|
573
|
+
* represent must be suppressed. The 200-char proxy was borrowed from the
|
|
574
|
+
* escalate branch and is WRONG there: a short-but-real reply left the
|
|
575
|
+
* duplicate-represent bug (#2472) alive. This is safe because the rows this
|
|
576
|
+
* query counts (role='assistant') are ONLY ever written by recordOutbound —
|
|
577
|
+
* i.e. real bot→user messages (reply / stream_reply / silent-anchor content /
|
|
578
|
+
* command acks). Typing indicators and progress-card edits NEVER call
|
|
579
|
+
* recordOutbound, so they cannot be miscounted as "the user was answered".
|
|
565
580
|
*
|
|
566
581
|
* `threadId` semantics:
|
|
567
582
|
* - undefined → any message in the chat regardless of thread (DMs + supergroups)
|
|
@@ -575,16 +590,23 @@ export function hasOutboundDeliveredSince(
|
|
|
575
590
|
chatId: string,
|
|
576
591
|
sinceMs: number,
|
|
577
592
|
threadId?: number | null,
|
|
593
|
+
minChars = 200,
|
|
578
594
|
): boolean {
|
|
579
595
|
try {
|
|
580
596
|
const cutoffSec = Math.floor(sinceMs / 1000)
|
|
581
|
-
|
|
582
|
-
//
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
//
|
|
597
|
+
// Clamp to >= 1 so the predicate never counts an empty/whitespace-only row
|
|
598
|
+
// (a degenerate outbound) as a delivered reply, even if a caller passes 0.
|
|
599
|
+
const minLen = Math.max(1, Math.floor(minChars))
|
|
600
|
+
const params: unknown[] = [chatId, cutoffSec, minLen]
|
|
601
|
+
// LENGTH(text) >= minChars scopes to replies of at least the caller's
|
|
602
|
+
// threshold. ESCALATE passes the default 200 (substantive-only — never stand
|
|
603
|
+
// down on a mere ack). The duplicate-represent GUARD passes a low value so a
|
|
604
|
+
// terse-but-real reply counts (#2472/#2474). The `done` flag is not stored in
|
|
605
|
+
// the history schema, so length is the closest available proxy; rows here are
|
|
606
|
+
// only ever recordOutbound writes (real bot→user sends), so progress-card
|
|
607
|
+
// edits / typing indicators are structurally excluded.
|
|
586
608
|
let sql =
|
|
587
|
-
"SELECT 1 FROM messages WHERE chat_id = ? AND role = 'assistant' AND ts >= ? AND LENGTH(text) >=
|
|
609
|
+
"SELECT 1 FROM messages WHERE chat_id = ? AND role = 'assistant' AND ts >= ? AND LENGTH(text) >= ?"
|
|
588
610
|
if (threadId !== undefined) {
|
|
589
611
|
if (threadId === null) {
|
|
590
612
|
sql += ' AND thread_id IS NULL'
|
|
@@ -297,6 +297,32 @@ async function main() {
|
|
|
297
297
|
const markerPath = findNearestMarker(targetDir)
|
|
298
298
|
if (markerPath == null) process.exit(0)
|
|
299
299
|
|
|
300
|
+
// Own-agent marker guard: suppress the agent's own CLAUDE.md / AGENTS.md /
|
|
301
|
+
// AGENT.md so it is never injected as additionalContext. The agent's own
|
|
302
|
+
// marker is already in the system prompt (baked by start.sh via
|
|
303
|
+
// --append-system-prompt); re-injecting it wastes ~30KB per session.
|
|
304
|
+
//
|
|
305
|
+
// The existing isUnderAgentWorkspace guard only blocks paths under the
|
|
306
|
+
// agent's workspace/ subdirectory. It misses the agent's start cwd
|
|
307
|
+
// (/home/.../.switchroom/agents/<name>) because that guard computes against
|
|
308
|
+
// workspace/, not agentDir itself. This marker-path check closes that gap.
|
|
309
|
+
//
|
|
310
|
+
// We do NOT add a "targetDir under startCwd" directory guard because that
|
|
311
|
+
// would wrongly suppress a legitimate worktree repo the operator has checked
|
|
312
|
+
// out inside the agent dir (e.g. agentDir/workspace/ repos) — the directory
|
|
313
|
+
// guard would catch those too. The marker-path equality check is surgical:
|
|
314
|
+
// only the exact CLAUDE.md / AGENTS.md / AGENT.md at agentDir root is blocked;
|
|
315
|
+
// any nested repo's marker injects normally.
|
|
316
|
+
if (agentName) {
|
|
317
|
+
const startCwd = normalize(
|
|
318
|
+
process.env.SWITCHROOM_AGENT_START_CWD ??
|
|
319
|
+
join(home, '.switchroom', 'agents', agentName),
|
|
320
|
+
)
|
|
321
|
+
for (const m of MARKER_FILES) {
|
|
322
|
+
if (markerPath === join(startCwd, m)) process.exit(0)
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
|
|
300
326
|
const state = readSessionState(sessionId)
|
|
301
327
|
|
|
302
328
|
// Already-loaded dedup — the load-once-per-repo-per-session invariant.
|
|
@@ -313,6 +313,11 @@ function updateRow(dbPath, { id, status, resultSummary, now, asyncLaunch }, done
|
|
|
313
313
|
setImmediate(() => {
|
|
314
314
|
try {
|
|
315
315
|
const db = new SnapDatabaseSync(snapDbPath)
|
|
316
|
+
// Concurrency: per-connection busy_timeout so this hook's writes
|
|
317
|
+
// wait-and-retry instead of failing with SQLITE_BUSY under concurrent
|
|
318
|
+
// sub-agent dispatch. Set on the real open so BOTH the node:sqlite
|
|
319
|
+
// (production) and bun:sqlite branches are armed (#2535 review).
|
|
320
|
+
try { db.exec('PRAGMA busy_timeout = 5000') } catch { /* best-effort */ }
|
|
316
321
|
const row = db.prepare(SELECT_SQL).get(snapId)
|
|
317
322
|
const isBackground = row != null && row.background === 1
|
|
318
323
|
if (isBackground) {
|
|
@@ -184,6 +184,14 @@ function writeRow(dbPath, { id, parentSessionId, parentTurnKey, agentType, descr
|
|
|
184
184
|
setImmediate(() => {
|
|
185
185
|
try {
|
|
186
186
|
const db = new SnapDatabaseSync(snapDbPath)
|
|
187
|
+
// Concurrency: this hook writes registry.db from a separate process
|
|
188
|
+
// that contends with the gateway's subagent-watcher + the PostToolUse
|
|
189
|
+
// hook. Without a busy_timeout, the contending write fails IMMEDIATELY
|
|
190
|
+
// with SQLITE_BUSY ("database is locked") when several sub-agents
|
|
191
|
+
// dispatch at once, dropping the row → NULL jsonl_agent_id/parent_turn_key.
|
|
192
|
+
// Per-connection PRAGMA, set on the real open so BOTH the node:sqlite
|
|
193
|
+
// (production) and bun:sqlite branches are armed.
|
|
194
|
+
try { db.exec('PRAGMA busy_timeout = 5000') } catch { /* best-effort */ }
|
|
187
195
|
db.exec(snapSchemaSql)
|
|
188
196
|
// Migrate older DBs that pre-date jsonl_agent_id.
|
|
189
197
|
const hasJsonlCol = db.prepare(snapMigrateSql).get()
|
|
@@ -149,30 +149,42 @@ export function computeLabel(toolName, input) {
|
|
|
149
149
|
// the progress card path that used to surface this was retired
|
|
150
150
|
// when `progressDriver` was nulled out in #1122 PR3.
|
|
151
151
|
const slug = clip(asText(i.skill), 64)
|
|
152
|
+
// Empty-slug Skill stays suppressed (degenerate/malformed call): the
|
|
153
|
+
// liveness feed-open backstops visibility for a tool-less turn, so this
|
|
154
|
+
// does not need a label. Keeps the #2111 sidecar contract.
|
|
152
155
|
return slug ? `Running skill ${slug}` : null
|
|
153
156
|
}
|
|
154
157
|
}
|
|
155
158
|
|
|
156
159
|
// MCP tools.
|
|
157
160
|
if (typeof toolName === 'string' && toolName.startsWith('mcp__')) {
|
|
158
|
-
//
|
|
161
|
+
// Telegram-plugin tools: matched by the key-agnostic regex so renames/forks work.
|
|
162
|
+
// Strip the `mcp__<server>__` prefix to get just the tool suffix.
|
|
163
|
+
const TELEGRAM_PREFIX_RE = /^mcp__[^_].*?telegram__/
|
|
164
|
+
const telegramMatch = TELEGRAM_PREFIX_RE.exec(toolName)
|
|
165
|
+
if (telegramMatch) {
|
|
166
|
+
const suffix = toolName.slice(telegramMatch[0].length)
|
|
167
|
+
// Surface tools (reply, stream_reply, edit_message, react) are the
|
|
168
|
+
// conversation itself — suppress them from the activity feed entirely.
|
|
169
|
+
// Mirrors isTelegramSurfaceTool in tool-names.ts.
|
|
170
|
+
if (
|
|
171
|
+
suffix === 'reply' ||
|
|
172
|
+
suffix === 'stream_reply' ||
|
|
173
|
+
suffix === 'edit_message' ||
|
|
174
|
+
suffix === 'react'
|
|
175
|
+
) return null
|
|
176
|
+
if (suffix === 'get_recent_messages') return 'Reading chat history'
|
|
177
|
+
// send_typing and all other surface/control tools: suppress.
|
|
178
|
+
return null
|
|
179
|
+
}
|
|
180
|
+
// Explicit labels / suppressions for the hindsight server.
|
|
159
181
|
switch (toolName) {
|
|
160
|
-
case 'mcp__switchroom-telegram__reply':
|
|
161
|
-
case 'mcp__switchroom-telegram__stream_reply':
|
|
162
|
-
return 'Replying'
|
|
163
|
-
case 'mcp__switchroom-telegram__react': {
|
|
164
|
-
const emoji = clip(asText(i.emoji), 8)
|
|
165
|
-
return emoji ? `Reacting ${emoji}` : 'Reacting'
|
|
166
|
-
}
|
|
167
|
-
case 'mcp__switchroom-telegram__get_recent_messages':
|
|
168
|
-
return 'Reading chat history'
|
|
169
182
|
case 'mcp__hindsight__recall':
|
|
170
183
|
case 'mcp__hindsight__reflect':
|
|
171
184
|
return 'Searching memory'
|
|
172
185
|
case 'mcp__hindsight__retain':
|
|
173
186
|
return 'Saving memory'
|
|
174
187
|
// Explicit suppressions — return null so we don't emit a sidecar line.
|
|
175
|
-
case 'mcp__switchroom-telegram__send_typing':
|
|
176
188
|
case 'mcp__hindsight__sync_retain':
|
|
177
189
|
return null
|
|
178
190
|
}
|
|
@@ -182,13 +194,17 @@ export function computeLabel(toolName, input) {
|
|
|
182
194
|
// entirely by MCP tools read as pure silence (only a typing dot + the
|
|
183
195
|
// 👀 reaction) — the "I can't see what it's doing" report. Mirror the
|
|
184
196
|
// gateway's describeToolUse: friendly per-server labels, else a
|
|
185
|
-
// model-authored field, else a humanized tool name. NEVER label
|
|
186
|
-
//
|
|
197
|
+
// model-authored field, else a humanized tool name. NEVER label any
|
|
198
|
+
// Telegram surface/control tools (they ARE the conversation). Use the
|
|
199
|
+
// same regex predicate as isTelegramSurfaceTool in tool-names.ts so
|
|
200
|
+
// this works regardless of the plugin's registration key (clerk-telegram,
|
|
201
|
+
// switchroom-telegram, custom fork, …).
|
|
202
|
+
const TELEGRAM_SURFACE_RE = /^mcp__[^_].*?telegram__/
|
|
203
|
+
if (TELEGRAM_SURFACE_RE.test(toolName)) return null
|
|
187
204
|
const m = /^mcp__(.+?)__(.+)$/.exec(toolName)
|
|
188
205
|
if (!m) return null
|
|
189
206
|
const server = m[1].toLowerCase()
|
|
190
207
|
const tool = m[2].toLowerCase()
|
|
191
|
-
if (server === 'switchroom-telegram') return null
|
|
192
208
|
if (server === 'hindsight') return 'Working with memory'
|
|
193
209
|
if (server === 'google-workspace' || server === 'claude_ai_google_calendar')
|
|
194
210
|
return 'Checking your calendar'
|
|
@@ -213,7 +229,15 @@ export function computeLabel(toolName, input) {
|
|
|
213
229
|
return `Using ${tool.replace(/[-_]+/g, ' ')}`
|
|
214
230
|
}
|
|
215
231
|
|
|
216
|
-
|
|
232
|
+
// Never-null fallthrough: any unrecognized BUILT-IN tool (no mcp__ prefix,
|
|
233
|
+
// not matched above) gets a generic label rather than dropping its sidecar
|
|
234
|
+
// line. A null here was the dark-turn mechanism — if such a tool was a
|
|
235
|
+
// turn's first/only tool, no tool_label event fired, the activity feed
|
|
236
|
+
// never opened, and a working turn read as pure silence. Surface tools
|
|
237
|
+
// (reply/react/send_typing/sync_retain) return earlier and are also
|
|
238
|
+
// suppressed at the gateway's isTelegramSurfaceTool guard, so this does
|
|
239
|
+
// not resurface them.
|
|
240
|
+
return 'Working…'
|
|
217
241
|
}
|
|
218
242
|
|
|
219
243
|
function main() {
|
|
@@ -328,6 +328,10 @@ export function createIssuesCardHandle(
|
|
|
328
328
|
const sendOpts: Record<string, unknown> = {
|
|
329
329
|
parse_mode: "HTML",
|
|
330
330
|
disable_web_page_preview: true,
|
|
331
|
+
// Status card, not the user's answer — silence the open ping.
|
|
332
|
+
// (editMessageText ignores disable_notification, so the shared
|
|
333
|
+
// edit path below is unaffected.)
|
|
334
|
+
disable_notification: true,
|
|
331
335
|
...(opts.threadId != null ? { message_thread_id: opts.threadId } : {}),
|
|
332
336
|
};
|
|
333
337
|
|
|
@@ -84,6 +84,13 @@ export function detectModelUnavailable(
|
|
|
84
84
|
// resets 8:50am (Australia/Melbourne)".
|
|
85
85
|
'hit your limit',
|
|
86
86
|
'hit the limit',
|
|
87
|
+
// SESSION-cap wording: "You've hit your session limit · resets 5pm".
|
|
88
|
+
// A session cap is a quota exhaustion that frees in HOURS (its reset is a
|
|
89
|
+
// bare time-of-day, see parseResetTime's time-only branch) — recognising
|
|
90
|
+
// it here is what lets the time-only reset parse fire and keeps a
|
|
91
|
+
// session-capped account from the +7d weekly bench.
|
|
92
|
+
'session limit',
|
|
93
|
+
'session cap',
|
|
87
94
|
]
|
|
88
95
|
if (quotaSignals.some(s => lower.includes(s))) {
|
|
89
96
|
const resetAt = parseResetTime(sample)
|
|
@@ -192,9 +199,126 @@ function parseResetTime(text: string, parseTimeNow: Date = new Date()): Date | u
|
|
|
192
199
|
if (!Number.isNaN(d.getTime())) return d
|
|
193
200
|
}
|
|
194
201
|
|
|
202
|
+
// "resets 5pm (Australia/Melbourne)" / "resets 8:50am" / "resets 17:00 (UTC)"
|
|
203
|
+
// SESSION-cap wording: a time of day with NO month/day. This frees in
|
|
204
|
+
// HOURS, not a week — without this branch it falls through to undefined,
|
|
205
|
+
// and the 429 inference path then applies resolveExhaustUntil's +7d weekly
|
|
206
|
+
// floor, benching a session-capped account for a week. Must sit AFTER the
|
|
207
|
+
// calendar branch so "resets May 3, 11am" never matches here. The leading
|
|
208
|
+
// negative lookahead `(?!...)` rejects a month name so a date-bearing
|
|
209
|
+
// string can't fall into this time-only branch.
|
|
210
|
+
const timeOnly = text.match(
|
|
211
|
+
/resets?\s+(?:at\s+)?(?!(?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)[a-z]*\b)(\d{1,2})(?::(\d{2}))?\s*(am|pm)?\s*(?:\(([^)]+)\))?/i,
|
|
212
|
+
)
|
|
213
|
+
if (timeOnly) {
|
|
214
|
+
const d = resolveNextWallClock(
|
|
215
|
+
Number(timeOnly[1]),
|
|
216
|
+
timeOnly[2] ? Number(timeOnly[2]) : 0,
|
|
217
|
+
timeOnly[3]?.toLowerCase(),
|
|
218
|
+
timeOnly[4]?.trim(),
|
|
219
|
+
parseTimeNow,
|
|
220
|
+
)
|
|
221
|
+
if (d != null) return d
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
return undefined
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
/**
|
|
228
|
+
* Resolve a bare wall-clock time ("5pm", "8:50am", "17:00") to the NEXT
|
|
229
|
+
* occurrence of that time, tz-aware. Returns the soonest future Date (rolls
|
|
230
|
+
* to tomorrow when the time has already passed today). Null on bad input
|
|
231
|
+
* (out-of-range hour/minute or an unknown tz). When `tz` is omitted the
|
|
232
|
+
* time is interpreted in UTC (best-effort) — Anthropic's strings normally
|
|
233
|
+
* carry the IANA tz in parens, e.g. "(Australia/Melbourne)".
|
|
234
|
+
*/
|
|
235
|
+
function resolveNextWallClock(
|
|
236
|
+
hour12or24: number,
|
|
237
|
+
minute: number,
|
|
238
|
+
ampm: string | undefined,
|
|
239
|
+
tz: string | undefined,
|
|
240
|
+
nowDate: Date,
|
|
241
|
+
): Date | undefined {
|
|
242
|
+
let hour = hour12or24
|
|
243
|
+
if (ampm === 'pm' && hour < 12) hour += 12
|
|
244
|
+
if (ampm === 'am' && hour === 12) hour = 0
|
|
245
|
+
if (!Number.isFinite(hour) || hour > 23 || hour < 0) return undefined
|
|
246
|
+
if (!Number.isFinite(minute) || minute > 59 || minute < 0) return undefined
|
|
247
|
+
const nowMs = nowDate.getTime()
|
|
248
|
+
// Walk today and the next two days (DST-safe span) and pick the first
|
|
249
|
+
// occurrence strictly in the future relative to now.
|
|
250
|
+
const base = new Date(nowMs)
|
|
251
|
+
for (let dayOffset = 0; dayOffset <= 2; dayOffset++) {
|
|
252
|
+
// Derive the y/m/d for `dayOffset` days from now IN THE TARGET TZ, so the
|
|
253
|
+
// wall-clock date we resolve is the tz's calendar date, not the container's.
|
|
254
|
+
const dateParts = tzDateParts(new Date(nowMs + dayOffset * 86_400_000), tz)
|
|
255
|
+
if (dateParts == null) return undefined
|
|
256
|
+
const epoch = wallClockToEpoch(
|
|
257
|
+
dateParts.year, dateParts.month, dateParts.day, hour, minute, tz,
|
|
258
|
+
)
|
|
259
|
+
if (epoch != null && epoch > nowMs) return new Date(epoch)
|
|
260
|
+
}
|
|
261
|
+
// Fallback: shouldn't happen, but keep the function total.
|
|
262
|
+
void base
|
|
195
263
|
return undefined
|
|
196
264
|
}
|
|
197
265
|
|
|
266
|
+
/** The y/m/d of `d` as seen in `tz` (UTC when tz omitted). Null on bad tz. */
|
|
267
|
+
function tzDateParts(
|
|
268
|
+
d: Date,
|
|
269
|
+
tz: string | undefined,
|
|
270
|
+
): { year: number; month: number; day: number } | null {
|
|
271
|
+
if (!tz) {
|
|
272
|
+
return { year: d.getUTCFullYear(), month: d.getUTCMonth(), day: d.getUTCDate() }
|
|
273
|
+
}
|
|
274
|
+
try {
|
|
275
|
+
const fmt = new Intl.DateTimeFormat('en-US', {
|
|
276
|
+
timeZone: tz, year: 'numeric', month: '2-digit', day: '2-digit',
|
|
277
|
+
})
|
|
278
|
+
const parts = Object.fromEntries(
|
|
279
|
+
fmt.formatToParts(d).filter((p) => p.type !== 'literal').map((p) => [p.type, p.value]),
|
|
280
|
+
)
|
|
281
|
+
return {
|
|
282
|
+
year: Number(parts.year),
|
|
283
|
+
month: Number(parts.month) - 1,
|
|
284
|
+
day: Number(parts.day),
|
|
285
|
+
}
|
|
286
|
+
} catch {
|
|
287
|
+
return null
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
/**
|
|
292
|
+
* Convert a wall-clock time in an IANA tz to epoch-ms (null if the tz is
|
|
293
|
+
* unknown). Resolves the tz's offset AT that date via Intl, so it is correct
|
|
294
|
+
* across DST — NOT `new Date(localString)`, which assumes the container TZ.
|
|
295
|
+
* Mirrors wedge-watchdog.ts's helper of the same name (kept local to keep
|
|
296
|
+
* this module dependency-free / pure-testable).
|
|
297
|
+
*/
|
|
298
|
+
function wallClockToEpoch(
|
|
299
|
+
year: number, month: number, day: number, hour: number, minute: number, tz: string | undefined,
|
|
300
|
+
): number | null {
|
|
301
|
+
const asUtc = Date.UTC(year, month, day, hour, minute, 0)
|
|
302
|
+
if (!tz) return asUtc // no tz given → best-effort UTC
|
|
303
|
+
try {
|
|
304
|
+
const fmt = new Intl.DateTimeFormat('en-US', {
|
|
305
|
+
timeZone: tz, year: 'numeric', month: '2-digit', day: '2-digit',
|
|
306
|
+
hour: '2-digit', minute: '2-digit', second: '2-digit', hour12: false,
|
|
307
|
+
})
|
|
308
|
+
const parts = Object.fromEntries(
|
|
309
|
+
fmt.formatToParts(new Date(asUtc)).filter((p) => p.type !== 'literal').map((p) => [p.type, p.value]),
|
|
310
|
+
)
|
|
311
|
+
const shown = Date.UTC(
|
|
312
|
+
Number(parts.year), Number(parts.month) - 1, Number(parts.day),
|
|
313
|
+
Number(parts.hour) % 24, Number(parts.minute), Number(parts.second),
|
|
314
|
+
)
|
|
315
|
+
const offset = shown - asUtc // how far ahead the tz wall clock is of UTC
|
|
316
|
+
return asUtc - offset
|
|
317
|
+
} catch {
|
|
318
|
+
return null // unknown tz
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
|
|
198
322
|
function parseRelativeDuration(s: string): number | null {
|
|
199
323
|
// "2h 15m" / "30m" / "45 seconds"
|
|
200
324
|
let total = 0
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Reducer-side narrative dedup gate (the correctness core of the
|
|
3
|
+
* JSONL-text-narrative primitive).
|
|
4
|
+
*
|
|
5
|
+
* A `text` / `sub_agent_text` JSONL block is one of two things:
|
|
6
|
+
*
|
|
7
|
+
* 1. DRAFT-THEN-SEND — the model composing its answer just before it
|
|
8
|
+
* calls `reply` / `stream_reply` with near-identical text. Surfacing
|
|
9
|
+
* it would double-print the answer (once as a transient narrative
|
|
10
|
+
* step, once as the canonical reply). It MUST be suppressed.
|
|
11
|
+
* 2. WORKING NARRATION — the agent's own commentary that is never sent
|
|
12
|
+
* to the user ("On it. Let me find the repo…", "Sent. Waiting on the
|
|
13
|
+
* build…"). It SHOULD be surfaced as a transient liveness step.
|
|
14
|
+
*
|
|
15
|
+
* A projector sees one JSONL line at a time and cannot know whether a
|
|
16
|
+
* later line is a reply tool_use, so the SHOW/SUPPRESS decision is a
|
|
17
|
+
* stateful, one-step-deferred decision made reducer-side (the gateway for
|
|
18
|
+
* the main agent, the subagent-watcher for sub/worker). This module is the
|
|
19
|
+
* pure, fully-unit-testable kernel of that decision — no I/O, no state of
|
|
20
|
+
* its own; the caller owns the `pendingNarrative` slot.
|
|
21
|
+
*
|
|
22
|
+
* The threshold heuristic deliberately matches the spirit of the #546
|
|
23
|
+
* outbound dedup (trim + lowercase + whitespace-collapse) so a draft and
|
|
24
|
+
* its reply compare equal the same way #546 considers them the same
|
|
25
|
+
* message.
|
|
26
|
+
*/
|
|
27
|
+
|
|
28
|
+
/** Tools whose `input.text` IS the canonical answer surface. */
|
|
29
|
+
export const REPLY_TOOLS = new Set(['reply', 'stream_reply'])
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Normalize for prefix comparison: strip markdown/HTML-ish emphasis,
|
|
33
|
+
* heading and quote marks, collapse whitespace, lowercase. Mirrors the
|
|
34
|
+
* #546 outbound-dedup normalization so a markdown-decorated draft and its
|
|
35
|
+
* plain reply compare equal.
|
|
36
|
+
*/
|
|
37
|
+
export function normalizeNarrative(s: string): string {
|
|
38
|
+
return s
|
|
39
|
+
.replace(/[*_`>#~]/g, '') // markdown emphasis / heading / quote marks
|
|
40
|
+
.replace(/\s+/g, ' ')
|
|
41
|
+
.trim()
|
|
42
|
+
.toLowerCase()
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/** Longest-common-prefix ratio over the SHORTER of the two normalized strings. */
|
|
46
|
+
export function prefixSimilarity(a: string, b: string): number {
|
|
47
|
+
const x = normalizeNarrative(a)
|
|
48
|
+
const y = normalizeNarrative(b)
|
|
49
|
+
if (x.length === 0 || y.length === 0) return 0
|
|
50
|
+
const n = Math.min(x.length, y.length)
|
|
51
|
+
let i = 0
|
|
52
|
+
while (i < n && x[i] === y[i]) i++
|
|
53
|
+
return i / n
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* The single tunable. Longest-common-PREFIX ratio (not Levenshtein) is
|
|
58
|
+
* deliberate: a draft shares a long head with the sent answer even when the
|
|
59
|
+
* model trims a trailing sentence before sending. 0.8 over the shorter
|
|
60
|
+
* string tolerates that trim while rejecting the "Sent. Waiting…" +
|
|
61
|
+
* different-reply case (short string, near-zero shared prefix). Exported so
|
|
62
|
+
* the test pins it — a silent retune breaks a test.
|
|
63
|
+
*/
|
|
64
|
+
export const DRAFT_SUPPRESS_THRESHOLD = 0.8
|
|
65
|
+
|
|
66
|
+
/** TRUE ⇒ this text block is a draft-then-send of `replyText`; SUPPRESS it. */
|
|
67
|
+
export function isDraftOfReply(textBlock: string, replyText: string): boolean {
|
|
68
|
+
return prefixSimilarity(textBlock, replyText) >= DRAFT_SUPPRESS_THRESHOLD
|
|
69
|
+
}
|