@a1hvdy/cc-openclaw 0.27.4 → 0.27.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/channels/telegram-mirror/card-renderer.d.ts +7 -0
- package/dist/src/channels/telegram-mirror/card-renderer.js +61 -8
- package/dist/src/channels/telegram-mirror/commands.d.ts +13 -0
- package/dist/src/channels/telegram-mirror/commands.js +26 -0
- package/dist/src/channels/telegram-mirror/index.js +44 -1
- package/dist/src/channels/telegram-mirror/sync-commands.d.ts +26 -0
- package/dist/src/channels/telegram-mirror/sync-commands.js +18 -1
- package/dist/src/channels/telegram-mirror/turn-bridge.d.ts +1 -1
- package/dist/src/channels/telegram-mirror/turn-bridge.js +10 -2
- package/dist/src/constants.d.ts +17 -0
- package/dist/src/constants.js +18 -0
- package/dist/src/engines/persistent-session.d.ts +5 -0
- package/dist/src/engines/persistent-session.js +27 -0
- package/dist/src/lib/html-render.d.ts +15 -0
- package/dist/src/lib/html-render.js +36 -0
- package/dist/src/openai-compat/non-streaming-handler.js +18 -1
- package/dist/src/openai-compat/openai-compat.js +49 -1
- package/dist/src/openai-compat/request-coalescer.d.ts +77 -0
- package/dist/src/openai-compat/request-coalescer.js +157 -0
- package/dist/src/openai-compat/streaming-handler.d.ts +9 -1
- package/dist/src/openai-compat/streaming-handler.js +33 -4
- package/dist/src/session/watchdogs.d.ts +3 -0
- package/dist/src/session/watchdogs.js +6 -0
- package/dist/src/types.d.ts +4 -0
- package/package.json +1 -1
- package/dist/src/config/drift-detector.d.ts +0 -28
- package/dist/src/config/drift-detector.js +0 -74
- package/dist/src/lib/stale-pid-files.d.ts +0 -17
- package/dist/src/lib/stale-pid-files.js +0 -39
- package/dist/src/persistence/snapshot.d.ts +0 -18
- package/dist/src/persistence/snapshot.js +0 -31
- package/dist/src/persistence/wal.d.ts +0 -17
- package/dist/src/persistence/wal.js +0 -31
- package/dist/src/types/index.d.ts +0 -15
- package/dist/src/types/index.js +0 -15
- package/dist/src/types/session.d.ts +0 -48
- package/dist/src/types/session.js +0 -19
|
@@ -32,6 +32,8 @@ import { TTS_RULE } from './tts-rule.js';
|
|
|
32
32
|
import { extractUserMessage, } from './message-extractor.js';
|
|
33
33
|
import { handleNonStreaming } from './non-streaming-handler.js';
|
|
34
34
|
import { handleStreaming } from './streaming-handler.js';
|
|
35
|
+
import { getDedupWindowMs, computeSignature, findInFlight, registerLeader, awaitLeader, replayCoalesced, } from './request-coalescer.js';
|
|
36
|
+
import { resolveTurnTimeoutMs } from '../lib/env-overrides.js';
|
|
35
37
|
// Re-export for backward compat — Cluster B extracted these to dedicated
|
|
36
38
|
// modules; keep the original import surface stable for any external caller.
|
|
37
39
|
// See src/openai-compat/{mode-flags,session-key-resolver,prompts,tool-calls-parser,tool-results-serializer}.ts.
|
|
@@ -416,13 +418,48 @@ export async function handleChatCompletion(manager, body, headers, res) {
|
|
|
416
418
|
userMessage = `${toolBlock}\n\n${userMessage}`;
|
|
417
419
|
}
|
|
418
420
|
const completionId = `chatcmpl-${randomUUID().replace(/-/g, '').slice(0, 29)}`;
|
|
421
|
+
// ── v0.27.5 single-flight request coalescing (streaming path) ─────────────
|
|
422
|
+
// When OpenClaw retries a request whose stream it perceived as dead (the
|
|
423
|
+
// 2026-05-22 OOM/exit-137 incident), the retry carries a byte-identical body.
|
|
424
|
+
// Without this guard, session-manager's per-session send-chain SERIALIZES the
|
|
425
|
+
// retry into a SECOND full model run → a duplicate Telegram message. Here the
|
|
426
|
+
// first such request is the leader (runs once); a duplicate within the dedup
|
|
427
|
+
// window is a follower that replays the leader's result. FAIL-OPEN: any error,
|
|
428
|
+
// an empty/failed leader, or a leader that exceeds the turn timeout all fall
|
|
429
|
+
// THROUGH to a normal dispatch — this can never drop a real reply.
|
|
430
|
+
const dedupWindowMs = getDedupWindowMs();
|
|
431
|
+
let coalesceLeader;
|
|
432
|
+
if (isStreaming && dedupWindowMs > 0) {
|
|
433
|
+
try {
|
|
434
|
+
const sig = computeSignature(sessionName, sendInput);
|
|
435
|
+
const existing = findInFlight(sig, dedupWindowMs);
|
|
436
|
+
if (existing) {
|
|
437
|
+
const leaderResult = await awaitLeader(existing, resolveTurnTimeoutMs());
|
|
438
|
+
if (leaderResult && leaderResult.text.length > 0) {
|
|
439
|
+
replayCoalesced(res, completionId, resolvedModel, leaderResult);
|
|
440
|
+
emitTrajectory('response_complete', { engine, model: resolvedModel, latencyMs: Date.now() - _t0, ok: true, coalesced: true }, sessionName);
|
|
441
|
+
return; // duplicate served from the leader — no second model run
|
|
442
|
+
}
|
|
443
|
+
// leader failed / produced no text / timed out → fail-open below
|
|
444
|
+
}
|
|
445
|
+
else {
|
|
446
|
+
coalesceLeader = registerLeader(sig);
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
catch {
|
|
450
|
+
coalesceLeader = undefined; // any coalescer fault → behave as before
|
|
451
|
+
}
|
|
452
|
+
}
|
|
419
453
|
// Pillar B v0.4.1: bracket dispatch with try/finally so response_complete
|
|
420
454
|
// fires for both success and failure (the latter relabelled). Latency is
|
|
421
455
|
// measured from request_in's _t0 above.
|
|
422
456
|
let _ok = true;
|
|
423
457
|
try {
|
|
424
458
|
if (isStreaming) {
|
|
425
|
-
await handleStreaming(manager, sessionName, resolvedModel, sendInput, completionId, res, hasTools, extracted.slashCommand
|
|
459
|
+
await handleStreaming(manager, sessionName, resolvedModel, sendInput, completionId, res, hasTools, extracted.slashCommand,
|
|
460
|
+
// v0.27.5: capture this leader's result so a coalesced follower can
|
|
461
|
+
// replay it. handleStreaming calls this only on a successful turn.
|
|
462
|
+
coalesceLeader ? (r) => coalesceLeader.resolve(r) : undefined);
|
|
426
463
|
}
|
|
427
464
|
else {
|
|
428
465
|
await handleNonStreaming(manager, sessionName, resolvedModel, sendInput, completionId, res, hasTools, extracted.slashCommand);
|
|
@@ -433,6 +470,17 @@ export async function handleChatCompletion(manager, body, headers, res) {
|
|
|
433
470
|
throw err;
|
|
434
471
|
}
|
|
435
472
|
finally {
|
|
473
|
+
// v0.27.5: ALWAYS settle the leader so followers never hang. After a
|
|
474
|
+
// successful capture this is a no-op (resolve is idempotent); on error or
|
|
475
|
+
// an empty turn it resolves null → followers fail-open to a fresh run.
|
|
476
|
+
if (coalesceLeader) {
|
|
477
|
+
try {
|
|
478
|
+
coalesceLeader.resolve(null);
|
|
479
|
+
}
|
|
480
|
+
catch {
|
|
481
|
+
/* already settled */
|
|
482
|
+
}
|
|
483
|
+
}
|
|
436
484
|
let tokensIn;
|
|
437
485
|
let tokensOut;
|
|
438
486
|
try {
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Single-flight request coalescer for the openai-compat streaming path (v0.27.5).
|
|
3
|
+
*
|
|
4
|
+
* THE PROBLEM (2026-05-22 incident)
|
|
5
|
+
* ─────────────────────────────────
|
|
6
|
+
* When a turn is OOM-killed mid-stream (exit 137) — or simply runs long enough
|
|
7
|
+
* that OpenClaw's HTTP client perceives the SSE stream as dead — OpenClaw fires
|
|
8
|
+
* a RETRY with a byte-identical request body. session-manager.sendMessage()
|
|
9
|
+
* *serializes* concurrent same-session sends via a per-session promise chain
|
|
10
|
+
* (session-manager.ts:437-453) rather than coalescing them, so the retry runs
|
|
11
|
+
* the FULL turn a second time and delivers a second, identical Telegram message
|
|
12
|
+
* (the "two identical messages" the user reported).
|
|
13
|
+
*
|
|
14
|
+
* THE FIX
|
|
15
|
+
* ───────
|
|
16
|
+
* Classic single-flight (a.k.a. request coalescing): the FIRST request for a
|
|
17
|
+
* given signature becomes the "leader" and runs the model once; any duplicate
|
|
18
|
+
* arriving within DEDUP_WINDOW_MS becomes a "follower" that AWAITS the leader's
|
|
19
|
+
* result and replays it — no second subprocess, no divergent second generation.
|
|
20
|
+
* In the common retry-after-perceived-death case, OpenClaw has already abandoned
|
|
21
|
+
* the leader's connection, so only the follower delivers → exactly one message.
|
|
22
|
+
*
|
|
23
|
+
* SAFETY: FAIL-OPEN BY CONSTRUCTION
|
|
24
|
+
* ─────────────────────────────────
|
|
25
|
+
* The caller wraps every coalescer interaction in try/catch and, on ANY error
|
|
26
|
+
* (or a leader that produced empty text, or a leader that exceeds the turn
|
|
27
|
+
* timeout), falls THROUGH to a normal dispatch. The worst case this can produce
|
|
28
|
+
* is the prior behavior (a possible duplicate) — it can NEVER drop a real reply.
|
|
29
|
+
* That property is the whole point: the user's deepest pain is missing messages,
|
|
30
|
+
* so the duplicate defense must not be able to cause a miss.
|
|
31
|
+
*/
|
|
32
|
+
import type * as http from 'node:http';
|
|
33
|
+
/** The leader's captured turn output, replayed verbatim to followers. */
|
|
34
|
+
export interface CoalescedResult {
|
|
35
|
+
text: string;
|
|
36
|
+
finishReason: 'stop' | 'tool_calls';
|
|
37
|
+
usage?: {
|
|
38
|
+
prompt_tokens: number;
|
|
39
|
+
completion_tokens: number;
|
|
40
|
+
total_tokens: number;
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
interface InFlightEntry {
|
|
44
|
+
startedAt: number;
|
|
45
|
+
/** Resolves when the leader's turn completes. `null` ⇒ leader failed or
|
|
46
|
+
* produced no replayable text; followers must fail-open to a fresh run. */
|
|
47
|
+
resultPromise: Promise<CoalescedResult | null>;
|
|
48
|
+
}
|
|
49
|
+
/** Resolve the dedup window. CC_OPENCLAW_DEDUP_WINDOW_MS=0 disables coalescing
|
|
50
|
+
* entirely (the caller then never enters the leader/follower branches). */
|
|
51
|
+
export declare function getDedupWindowMs(): number;
|
|
52
|
+
/** Stable signature for "the same turn". Session-scoped so two chats sending
|
|
53
|
+
* identical text never collide. SHA-256 of sessionName + NUL + serialized
|
|
54
|
+
* input keeps the key bounded regardless of prompt size. */
|
|
55
|
+
export declare function computeSignature(sessionName: string, input: unknown): string;
|
|
56
|
+
/** Return a live (within-window) in-flight entry for `sig`, or undefined.
|
|
57
|
+
* Prunes a stale entry as a side effect so the map self-heals. */
|
|
58
|
+
export declare function findInFlight(sig: string, windowMs: number): InFlightEntry | undefined;
|
|
59
|
+
/** Register the current request as the leader for `sig`. Returns a `resolve`
|
|
60
|
+
* the caller MUST invoke in a finally block with the captured result (or
|
|
61
|
+
* `null` on failure) so followers never hang. The entry is retained for the
|
|
62
|
+
* window after resolution, then evicted. */
|
|
63
|
+
export declare function registerLeader(sig: string): {
|
|
64
|
+
resolve: (r: CoalescedResult | null) => void;
|
|
65
|
+
};
|
|
66
|
+
/** Await a leader's result with a hard cap so a wedged leader can't hang the
|
|
67
|
+
* follower forever. On timeout returns `null` ⇒ caller fails open. */
|
|
68
|
+
export declare function awaitLeader(entry: InFlightEntry, timeoutMs: number): Promise<CoalescedResult | null>;
|
|
69
|
+
/** Replay a leader's captured result to a follower's response as a complete,
|
|
70
|
+
* well-formed SSE stream (role chunk → content chunk → final chunk → [DONE]).
|
|
71
|
+
* Mirrors the shape handleStreaming emits so OpenClaw sees an ordinary, valid
|
|
72
|
+
* completion. Best-effort writes: a disconnected follower socket is harmless. */
|
|
73
|
+
export declare function replayCoalesced(res: http.ServerResponse, completionId: string, model: string, result: CoalescedResult): void;
|
|
74
|
+
/** Test-only helpers. */
|
|
75
|
+
export declare function _resetForTest(): void;
|
|
76
|
+
export declare function _size(): number;
|
|
77
|
+
export {};
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Single-flight request coalescer for the openai-compat streaming path (v0.27.5).
|
|
3
|
+
*
|
|
4
|
+
* THE PROBLEM (2026-05-22 incident)
|
|
5
|
+
* ─────────────────────────────────
|
|
6
|
+
* When a turn is OOM-killed mid-stream (exit 137) — or simply runs long enough
|
|
7
|
+
* that OpenClaw's HTTP client perceives the SSE stream as dead — OpenClaw fires
|
|
8
|
+
* a RETRY with a byte-identical request body. session-manager.sendMessage()
|
|
9
|
+
* *serializes* concurrent same-session sends via a per-session promise chain
|
|
10
|
+
* (session-manager.ts:437-453) rather than coalescing them, so the retry runs
|
|
11
|
+
* the FULL turn a second time and delivers a second, identical Telegram message
|
|
12
|
+
* (the "two identical messages" the user reported).
|
|
13
|
+
*
|
|
14
|
+
* THE FIX
|
|
15
|
+
* ───────
|
|
16
|
+
* Classic single-flight (a.k.a. request coalescing): the FIRST request for a
|
|
17
|
+
* given signature becomes the "leader" and runs the model once; any duplicate
|
|
18
|
+
* arriving within DEDUP_WINDOW_MS becomes a "follower" that AWAITS the leader's
|
|
19
|
+
* result and replays it — no second subprocess, no divergent second generation.
|
|
20
|
+
* In the common retry-after-perceived-death case, OpenClaw has already abandoned
|
|
21
|
+
* the leader's connection, so only the follower delivers → exactly one message.
|
|
22
|
+
*
|
|
23
|
+
* SAFETY: FAIL-OPEN BY CONSTRUCTION
|
|
24
|
+
* ─────────────────────────────────
|
|
25
|
+
* The caller wraps every coalescer interaction in try/catch and, on ANY error
|
|
26
|
+
* (or a leader that produced empty text, or a leader that exceeds the turn
|
|
27
|
+
* timeout), falls THROUGH to a normal dispatch. The worst case this can produce
|
|
28
|
+
* is the prior behavior (a possible duplicate) — it can NEVER drop a real reply.
|
|
29
|
+
* That property is the whole point: the user's deepest pain is missing messages,
|
|
30
|
+
* so the duplicate defense must not be able to cause a miss.
|
|
31
|
+
*/
|
|
32
|
+
import { createHash } from 'node:crypto';
|
|
33
|
+
import { DEDUP_WINDOW_MS } from '../constants.js';
|
|
34
|
+
import { formatCompletionChunk } from './response-formatter.js';
|
|
35
|
+
/** Module-scoped registry. Keyed by signature. Entries linger for the window
|
|
36
|
+
* after completion so a late retry (arriving just after the leader finished)
|
|
37
|
+
* still coalesces against the already-resolved result. */
|
|
38
|
+
const inFlight = new Map();
|
|
39
|
+
/** Resolve the dedup window. CC_OPENCLAW_DEDUP_WINDOW_MS=0 disables coalescing
|
|
40
|
+
* entirely (the caller then never enters the leader/follower branches). */
|
|
41
|
+
export function getDedupWindowMs() {
|
|
42
|
+
const raw = process.env.CC_OPENCLAW_DEDUP_WINDOW_MS;
|
|
43
|
+
const n = raw !== undefined ? parseInt(raw, 10) : NaN;
|
|
44
|
+
if (Number.isFinite(n) && n >= 0)
|
|
45
|
+
return n;
|
|
46
|
+
return DEDUP_WINDOW_MS;
|
|
47
|
+
}
|
|
48
|
+
/** Stable signature for "the same turn". Session-scoped so two chats sending
|
|
49
|
+
* identical text never collide. SHA-256 of sessionName + NUL + serialized
|
|
50
|
+
* input keeps the key bounded regardless of prompt size. */
|
|
51
|
+
export function computeSignature(sessionName, input) {
|
|
52
|
+
const raw = typeof input === 'string' ? input : JSON.stringify(input);
|
|
53
|
+
return createHash('sha256').update(sessionName).update('\0').update(raw).digest('hex');
|
|
54
|
+
}
|
|
55
|
+
/** Return a live (within-window) in-flight entry for `sig`, or undefined.
|
|
56
|
+
* Prunes a stale entry as a side effect so the map self-heals. */
|
|
57
|
+
export function findInFlight(sig, windowMs) {
|
|
58
|
+
const entry = inFlight.get(sig);
|
|
59
|
+
if (!entry)
|
|
60
|
+
return undefined;
|
|
61
|
+
// >= (not >): a window of 0 means "no coalescing window" → every entry is
|
|
62
|
+
// already stale and must be pruned (matches CC_OPENCLAW_DEDUP_WINDOW_MS=0
|
|
63
|
+
// disabling coalescing). With `>`, a same-millisecond lookup (elapsed 0)
|
|
64
|
+
// wrongly treated a 0-window entry as live. Real windows are unaffected
|
|
65
|
+
// (elapsed 0 >= 45000 is still false → live).
|
|
66
|
+
if (Date.now() - entry.startedAt >= windowMs) {
|
|
67
|
+
inFlight.delete(sig);
|
|
68
|
+
return undefined;
|
|
69
|
+
}
|
|
70
|
+
return entry;
|
|
71
|
+
}
|
|
72
|
+
/** Register the current request as the leader for `sig`. Returns a `resolve`
|
|
73
|
+
* the caller MUST invoke in a finally block with the captured result (or
|
|
74
|
+
* `null` on failure) so followers never hang. The entry is retained for the
|
|
75
|
+
* window after resolution, then evicted. */
|
|
76
|
+
export function registerLeader(sig) {
|
|
77
|
+
let resolveFn;
|
|
78
|
+
const resultPromise = new Promise((res) => {
|
|
79
|
+
resolveFn = res;
|
|
80
|
+
});
|
|
81
|
+
const entry = { startedAt: Date.now(), resultPromise };
|
|
82
|
+
inFlight.set(sig, entry);
|
|
83
|
+
// Idempotent: the caller resolves from BOTH the success-path capture callback
|
|
84
|
+
// and a finally-block backstop (which passes null). Only the first wins; the
|
|
85
|
+
// backstop is a no-op after a successful capture.
|
|
86
|
+
let settled = false;
|
|
87
|
+
return {
|
|
88
|
+
resolve: (r) => {
|
|
89
|
+
if (settled)
|
|
90
|
+
return;
|
|
91
|
+
settled = true;
|
|
92
|
+
resolveFn(r);
|
|
93
|
+
const t = setTimeout(() => {
|
|
94
|
+
if (inFlight.get(sig) === entry)
|
|
95
|
+
inFlight.delete(sig);
|
|
96
|
+
}, getDedupWindowMs());
|
|
97
|
+
// Don't let the eviction timer keep the process alive.
|
|
98
|
+
t.unref?.();
|
|
99
|
+
},
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
/** Await a leader's result with a hard cap so a wedged leader can't hang the
|
|
103
|
+
* follower forever. On timeout returns `null` ⇒ caller fails open. */
|
|
104
|
+
export async function awaitLeader(entry, timeoutMs) {
|
|
105
|
+
let timer;
|
|
106
|
+
const timeout = new Promise((res) => {
|
|
107
|
+
timer = setTimeout(() => res(null), timeoutMs);
|
|
108
|
+
timer.unref?.();
|
|
109
|
+
});
|
|
110
|
+
try {
|
|
111
|
+
return await Promise.race([entry.resultPromise, timeout]);
|
|
112
|
+
}
|
|
113
|
+
finally {
|
|
114
|
+
if (timer)
|
|
115
|
+
clearTimeout(timer);
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
/** Replay a leader's captured result to a follower's response as a complete,
|
|
119
|
+
* well-formed SSE stream (role chunk → content chunk → final chunk → [DONE]).
|
|
120
|
+
* Mirrors the shape handleStreaming emits so OpenClaw sees an ordinary, valid
|
|
121
|
+
* completion. Best-effort writes: a disconnected follower socket is harmless. */
|
|
122
|
+
export function replayCoalesced(res, completionId, model, result) {
|
|
123
|
+
res.writeHead(200, {
|
|
124
|
+
'Content-Type': 'text/event-stream',
|
|
125
|
+
'Cache-Control': 'no-cache',
|
|
126
|
+
Connection: 'keep-alive',
|
|
127
|
+
'X-Accel-Buffering': 'no',
|
|
128
|
+
});
|
|
129
|
+
const write = (data) => {
|
|
130
|
+
try {
|
|
131
|
+
res.write(`data: ${data}\n\n`);
|
|
132
|
+
}
|
|
133
|
+
catch {
|
|
134
|
+
/* follower disconnected — nothing to deliver, safe to ignore */
|
|
135
|
+
}
|
|
136
|
+
};
|
|
137
|
+
write(JSON.stringify(formatCompletionChunk(completionId, model, { role: 'assistant' }, null)));
|
|
138
|
+
write(JSON.stringify(formatCompletionChunk(completionId, model, { content: result.text }, null)));
|
|
139
|
+
const finalChunk = formatCompletionChunk(completionId, model, {}, result.finishReason);
|
|
140
|
+
if (result.usage)
|
|
141
|
+
finalChunk.usage = result.usage;
|
|
142
|
+
write(JSON.stringify(finalChunk));
|
|
143
|
+
write('[DONE]');
|
|
144
|
+
try {
|
|
145
|
+
res.end();
|
|
146
|
+
}
|
|
147
|
+
catch {
|
|
148
|
+
/* already closed */
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
/** Test-only helpers. */
|
|
152
|
+
export function _resetForTest() {
|
|
153
|
+
inFlight.clear();
|
|
154
|
+
}
|
|
155
|
+
export function _size() {
|
|
156
|
+
return inFlight.size;
|
|
157
|
+
}
|
|
@@ -41,4 +41,12 @@ import type { UserMessageBlock } from './message-extractor.js';
|
|
|
41
41
|
export declare function handleStreaming(manager: SessionManagerLike, sessionName: string, model: string, userMessage: string | UserMessageBlock[], completionId: string, res: http.ServerResponse, hasTools: boolean, slashCommand?: {
|
|
42
42
|
cmd: string;
|
|
43
43
|
mode?: string;
|
|
44
|
-
}
|
|
44
|
+
}, onFinalText?: (result: {
|
|
45
|
+
text: string;
|
|
46
|
+
finishReason: 'stop' | 'tool_calls';
|
|
47
|
+
usage?: {
|
|
48
|
+
prompt_tokens: number;
|
|
49
|
+
completion_tokens: number;
|
|
50
|
+
total_tokens: number;
|
|
51
|
+
};
|
|
52
|
+
}) => void): Promise<void>;
|
|
@@ -69,7 +69,12 @@ userMessage, completionId, res, hasTools,
|
|
|
69
69
|
// v0.19.1 M3: slash command captured by extractUserMessage, threaded to
|
|
70
70
|
// the patched sendMessage so the live-card pill renders the original
|
|
71
71
|
// /<slash> even when maybeInlineSkill replaced the message body.
|
|
72
|
-
slashCommand
|
|
72
|
+
slashCommand,
|
|
73
|
+
// v0.27.5: leader-result capture for the single-flight request coalescer.
|
|
74
|
+
// Invoked exactly once on a SUCCESSFUL turn (never on the error path) with
|
|
75
|
+
// the final assistant text + finish reason + usage, so a coalesced follower
|
|
76
|
+
// can replay this turn's output instead of running the model a second time.
|
|
77
|
+
onFinalText) {
|
|
73
78
|
// v0.26.1 observability: confirm the wired handler runs AND how many mirror
|
|
74
79
|
// cards THIS module instance sees. If cards=0 here while the inbound handler
|
|
75
80
|
// logged a registered card, the cardState singleton split across instances
|
|
@@ -125,7 +130,14 @@ slashCommand) {
|
|
|
125
130
|
};
|
|
126
131
|
// Initial chunk with role
|
|
127
132
|
writeSSE(JSON.stringify(formatCompletionChunk(completionId, model, { role: 'assistant' }, null)));
|
|
128
|
-
// SSE keepalive heartbeat
|
|
133
|
+
// SSE keepalive heartbeat. v0.27.5: 30s → 15s. A long quiet phase (Claude
|
|
134
|
+
// CLI thinking, a slow Bash/tool step) with no SSE write can make OpenClaw's
|
|
135
|
+
// HTTP client perceive the stream as dead and fire a RETRY — which the
|
|
136
|
+
// session-manager send-chain serializes into a SECOND full turn → duplicate
|
|
137
|
+
// Telegram message (2026-05-22 incident). A tighter heartbeat keeps the
|
|
138
|
+
// connection demonstrably alive between content events, cutting spurious
|
|
139
|
+
// retries at the source. (The request-coalescer is the second line of
|
|
140
|
+
// defense for the retries that still slip through.)
|
|
129
141
|
const heartbeatTimer = setInterval(() => {
|
|
130
142
|
if (!clientDisconnected) {
|
|
131
143
|
try {
|
|
@@ -135,7 +147,7 @@ slashCommand) {
|
|
|
135
147
|
clientDisconnected = true;
|
|
136
148
|
}
|
|
137
149
|
}
|
|
138
|
-
},
|
|
150
|
+
}, 15_000);
|
|
139
151
|
// Phase 2 R1+R2: in tool-stream mode, bridge session-manager's pre-parsed
|
|
140
152
|
// tool_use events directly to OpenAI tool_calls SSE deltas. Skips the
|
|
141
153
|
// legacy "buffer text + regex-parse <tool_calls> XML" path entirely.
|
|
@@ -534,6 +546,18 @@ slashCommand) {
|
|
|
534
546
|
tool_calls: toolCallsEmitted,
|
|
535
547
|
bytes_out: accumulatedText.length,
|
|
536
548
|
});
|
|
549
|
+
// v0.27.5: hand the captured turn output to the coalescer (success only).
|
|
550
|
+
// A coalesced follower replays exactly this — so the model runs once even
|
|
551
|
+
// when OpenClaw retries. Guarded: a capture-callback throw must never break
|
|
552
|
+
// the SSE response that just succeeded.
|
|
553
|
+
if (onFinalText) {
|
|
554
|
+
try {
|
|
555
|
+
onFinalText({ text: accumulatedText, finishReason: traceFinishReason, usage });
|
|
556
|
+
}
|
|
557
|
+
catch {
|
|
558
|
+
/* capture is best-effort; the real reply already streamed */
|
|
559
|
+
}
|
|
560
|
+
}
|
|
537
561
|
writeSSE('[DONE]');
|
|
538
562
|
}
|
|
539
563
|
catch (err) {
|
|
@@ -603,7 +627,12 @@ slashCommand) {
|
|
|
603
627
|
// turn. reply_dispatch (gateway) fires too early to own this; the handler
|
|
604
628
|
// does. Best-effort: a finalize failure must not break the SSE response.
|
|
605
629
|
try {
|
|
606
|
-
|
|
630
|
+
// v0.27.6 — report-drop fix (Killer #2): when the gateway socket died
|
|
631
|
+
// mid-turn (clientDisconnected), the gateway delivers nothing separately,
|
|
632
|
+
// so pass the accumulated text and the finalized card KEEPS it as the
|
|
633
|
+
// sole delivery channel. Happy path (connected) passes undefined → card
|
|
634
|
+
// wiped → gateway delivers the reply (no duplicate).
|
|
635
|
+
await mirrorFinalizeActiveCards(clientDisconnected ? accumulatedText : undefined);
|
|
607
636
|
}
|
|
608
637
|
catch {
|
|
609
638
|
/* finalize is cosmetic; never propagate */
|
|
@@ -27,6 +27,9 @@ export interface WatchdogManagedSession {
|
|
|
27
27
|
getStats(): {
|
|
28
28
|
lastActivity?: string | null | undefined;
|
|
29
29
|
lastProgressAt?: string | null | undefined;
|
|
30
|
+
/** v0.27.6 — count of in-flight tool calls; > 0 means a tool is running
|
|
31
|
+
* (alive), so the stalled check is skipped no matter how quiet it is. */
|
|
32
|
+
inFlightTools?: number | undefined;
|
|
30
33
|
};
|
|
31
34
|
stop(): void;
|
|
32
35
|
};
|
|
@@ -46,6 +46,12 @@ export function watchStalledSessions(opts) {
|
|
|
46
46
|
if (!managed.session.isBusy)
|
|
47
47
|
continue;
|
|
48
48
|
const stats = managed.session.getStats();
|
|
49
|
+
// v0.27.6 (Killer #1) — a session with a tool in flight is ALIVE, not
|
|
50
|
+
// stalled, no matter how long the tool runs quiet (a 40-min build/test emits
|
|
51
|
+
// no stream events while it works). Skip it entirely; the age/threshold
|
|
52
|
+
// check below only governs a genuine silent wedge with NO tool running.
|
|
53
|
+
if ((stats.inFlightTools ?? 0) > 0)
|
|
54
|
+
continue;
|
|
49
55
|
// v0.27.x — prefer the PROGRESS timestamp (real output: text/tool/result),
|
|
50
56
|
// which excludes `system/api_retry` pings. Keying off lastActivity let a
|
|
51
57
|
// retry-storm reset the clock forever so the watchdog never fired. Fall back
|
package/dist/src/types.d.ts
CHANGED
|
@@ -197,6 +197,10 @@ export interface SessionStats {
|
|
|
197
197
|
* events like `system/api_retry`. The stalled-session watchdog keys off this
|
|
198
198
|
* so an API retry-storm (no output) is fast-failed instead of looking busy. */
|
|
199
199
|
lastProgressAt: string | null;
|
|
200
|
+
/** v0.27.6 — tool calls currently in flight (dispatched without a matching
|
|
201
|
+
* result yet). Optional: only the Claude persistent-session engine populates
|
|
202
|
+
* it; the stalled-session watchdog treats > 0 as "alive, don't kill". */
|
|
203
|
+
inFlightTools?: number;
|
|
200
204
|
/**
|
|
201
205
|
* Approximate context window utilization (0-100).
|
|
202
206
|
* Estimated as (tokensIn + tokensOut) / 200,000 * 100.
|
package/package.json
CHANGED
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* drift-detector — snapshots the parsed Config to disk on a healthy boot;
|
|
3
|
-
* compares on subsequent boots and returns a delta list.
|
|
4
|
-
*
|
|
5
|
-
* Primary purpose: detect keys pruned by `openclaw doctor --fix`
|
|
6
|
-
* (OF5 — feedback_cc_openclaw_provider_timeout.md).
|
|
7
|
-
*
|
|
8
|
-
* Snapshot path: ~/.openclaw/workspace/memory/cc-openclaw-config-snapshot.json
|
|
9
|
-
* The file is written atomically (write-then-rename is not available in pure
|
|
10
|
-
* Node ESM without extra deps; we write directly since the worst case is a
|
|
11
|
-
* corrupted snapshot — non-fatal; falls back to empty baseline).
|
|
12
|
-
*/
|
|
13
|
-
import type { Config } from './schema.js';
|
|
14
|
-
export interface ConfigDelta {
|
|
15
|
-
path: string;
|
|
16
|
-
previous: unknown;
|
|
17
|
-
current: unknown;
|
|
18
|
-
}
|
|
19
|
-
/**
|
|
20
|
-
* Compare current config against the on-disk snapshot.
|
|
21
|
-
* Returns an array of changed paths (empty = no drift).
|
|
22
|
-
*/
|
|
23
|
-
export declare function detectDrift(current: Config): ConfigDelta[];
|
|
24
|
-
/**
|
|
25
|
-
* Persist current config as the baseline snapshot for future drift detection.
|
|
26
|
-
* Called at the end of a successful boot (after phase 7 / ready state).
|
|
27
|
-
*/
|
|
28
|
-
export declare function saveSnapshot(current: Config): void;
|
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* drift-detector — snapshots the parsed Config to disk on a healthy boot;
|
|
3
|
-
* compares on subsequent boots and returns a delta list.
|
|
4
|
-
*
|
|
5
|
-
* Primary purpose: detect keys pruned by `openclaw doctor --fix`
|
|
6
|
-
* (OF5 — feedback_cc_openclaw_provider_timeout.md).
|
|
7
|
-
*
|
|
8
|
-
* Snapshot path: ~/.openclaw/workspace/memory/cc-openclaw-config-snapshot.json
|
|
9
|
-
* The file is written atomically (write-then-rename is not available in pure
|
|
10
|
-
* Node ESM without extra deps; we write directly since the worst case is a
|
|
11
|
-
* corrupted snapshot — non-fatal; falls back to empty baseline).
|
|
12
|
-
*/
|
|
13
|
-
import { existsSync, readFileSync, writeFileSync, mkdirSync } from 'fs';
|
|
14
|
-
import { homedir } from 'os';
|
|
15
|
-
import { join, dirname } from 'path';
|
|
16
|
-
const SNAPSHOT_PATH = join(homedir(), '.openclaw', 'workspace', 'memory', 'cc-openclaw-config-snapshot.json');
|
|
17
|
-
// ── Internal helpers ──────────────────────────────────────────────────────────
|
|
18
|
-
function flattenConfig(obj, prefix = '') {
|
|
19
|
-
if (obj === null || typeof obj !== 'object') {
|
|
20
|
-
return { [prefix]: obj };
|
|
21
|
-
}
|
|
22
|
-
const result = {};
|
|
23
|
-
for (const [k, v] of Object.entries(obj)) {
|
|
24
|
-
const key = prefix ? `${prefix}.${k}` : k;
|
|
25
|
-
if (v !== null && typeof v === 'object' && !Array.isArray(v)) {
|
|
26
|
-
Object.assign(result, flattenConfig(v, key));
|
|
27
|
-
}
|
|
28
|
-
else {
|
|
29
|
-
result[key] = v;
|
|
30
|
-
}
|
|
31
|
-
}
|
|
32
|
-
return result;
|
|
33
|
-
}
|
|
34
|
-
function readSnapshot() {
|
|
35
|
-
if (!existsSync(SNAPSHOT_PATH))
|
|
36
|
-
return {};
|
|
37
|
-
try {
|
|
38
|
-
const raw = readFileSync(SNAPSHOT_PATH, 'utf8');
|
|
39
|
-
return JSON.parse(raw);
|
|
40
|
-
}
|
|
41
|
-
catch {
|
|
42
|
-
return {};
|
|
43
|
-
}
|
|
44
|
-
}
|
|
45
|
-
// ── Public API ────────────────────────────────────────────────────────────────
|
|
46
|
-
/**
|
|
47
|
-
* Compare current config against the on-disk snapshot.
|
|
48
|
-
* Returns an array of changed paths (empty = no drift).
|
|
49
|
-
*/
|
|
50
|
-
export function detectDrift(current) {
|
|
51
|
-
const previous = readSnapshot();
|
|
52
|
-
const flat = flattenConfig(current);
|
|
53
|
-
const deltas = [];
|
|
54
|
-
for (const [path, value] of Object.entries(flat)) {
|
|
55
|
-
if (path in previous && JSON.stringify(previous[path]) !== JSON.stringify(value)) {
|
|
56
|
-
deltas.push({ path, previous: previous[path], current: value });
|
|
57
|
-
}
|
|
58
|
-
}
|
|
59
|
-
return deltas;
|
|
60
|
-
}
|
|
61
|
-
/**
|
|
62
|
-
* Persist current config as the baseline snapshot for future drift detection.
|
|
63
|
-
* Called at the end of a successful boot (after phase 7 / ready state).
|
|
64
|
-
*/
|
|
65
|
-
export function saveSnapshot(current) {
|
|
66
|
-
try {
|
|
67
|
-
mkdirSync(dirname(SNAPSHOT_PATH), { recursive: true });
|
|
68
|
-
const flat = flattenConfig(current);
|
|
69
|
-
writeFileSync(SNAPSHOT_PATH, JSON.stringify(flat, null, 2), 'utf8');
|
|
70
|
-
}
|
|
71
|
-
catch {
|
|
72
|
-
// Non-fatal: drift detection degrades to "no baseline" on next boot.
|
|
73
|
-
}
|
|
74
|
-
}
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Stale PID-keyed file cleanup — for status/sentinel files named `<pid>.json`
|
|
3
|
-
* in a tee directory, removes entries whose PID is no longer alive.
|
|
4
|
-
*
|
|
5
|
-
* Extracted from `live-card.ts` 2026-05-14. Pure-function utility with a
|
|
6
|
-
* dir-parameter shape so future callers (other PID-keyed sentinel patterns)
|
|
7
|
-
* can reuse without duplicating the kill(pid, 0) liveness probe.
|
|
8
|
-
*
|
|
9
|
-
* The original file-extension was `.json`; expose it as a parameter so this
|
|
10
|
-
* is forward-compatible with `.sock`, `.lock`, etc.
|
|
11
|
-
*/
|
|
12
|
-
/**
|
|
13
|
-
* Remove `<pid><ext>` files in `dir` whose PID no longer exists.
|
|
14
|
-
* Silent on missing directory. Per-file errors are swallowed — this is
|
|
15
|
-
* best-effort cleanup, not a correctness guarantee.
|
|
16
|
-
*/
|
|
17
|
-
export declare function cleanStalePidFiles(dir: string, ext?: string): void;
|
|
@@ -1,39 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Stale PID-keyed file cleanup — for status/sentinel files named `<pid>.json`
|
|
3
|
-
* in a tee directory, removes entries whose PID is no longer alive.
|
|
4
|
-
*
|
|
5
|
-
* Extracted from `live-card.ts` 2026-05-14. Pure-function utility with a
|
|
6
|
-
* dir-parameter shape so future callers (other PID-keyed sentinel patterns)
|
|
7
|
-
* can reuse without duplicating the kill(pid, 0) liveness probe.
|
|
8
|
-
*
|
|
9
|
-
* The original file-extension was `.json`; expose it as a parameter so this
|
|
10
|
-
* is forward-compatible with `.sock`, `.lock`, etc.
|
|
11
|
-
*/
|
|
12
|
-
import { readdirSync, unlinkSync } from 'node:fs';
|
|
13
|
-
/**
|
|
14
|
-
* Remove `<pid><ext>` files in `dir` whose PID no longer exists.
|
|
15
|
-
* Silent on missing directory. Per-file errors are swallowed — this is
|
|
16
|
-
* best-effort cleanup, not a correctness guarantee.
|
|
17
|
-
*/
|
|
18
|
-
export function cleanStalePidFiles(dir, ext = '.json') {
|
|
19
|
-
try {
|
|
20
|
-
const files = readdirSync(dir);
|
|
21
|
-
for (const file of files) {
|
|
22
|
-
if (!file.endsWith(ext))
|
|
23
|
-
continue;
|
|
24
|
-
const pid = parseInt(file.replace(ext, ''), 10);
|
|
25
|
-
if (isNaN(pid))
|
|
26
|
-
continue;
|
|
27
|
-
try {
|
|
28
|
-
process.kill(pid, 0);
|
|
29
|
-
}
|
|
30
|
-
catch {
|
|
31
|
-
try {
|
|
32
|
-
unlinkSync(`${dir}/${file}`);
|
|
33
|
-
}
|
|
34
|
-
catch { /* ignore */ }
|
|
35
|
-
}
|
|
36
|
-
}
|
|
37
|
-
}
|
|
38
|
-
catch { /* directory doesn't exist — nothing to clean */ }
|
|
39
|
-
}
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* SnapshotWriter — periodic full-state snapshot of the SessionRegistry.
|
|
3
|
-
*
|
|
4
|
-
* Snapshot file: ~/.openclaw/workspace/memory/cc-openclaw-session-snapshot.json
|
|
5
|
-
* Written every N minutes from phase 7 (schedule-jobs).
|
|
6
|
-
* Phase E wires SessionRegistry.snapshot() → SnapshotWriter.write().
|
|
7
|
-
* Stub body in Phase D.
|
|
8
|
-
*/
|
|
9
|
-
import type { SessionState } from './session-registry.js';
|
|
10
|
-
export declare class SnapshotWriter {
|
|
11
|
-
private readonly path;
|
|
12
|
-
constructor(path?: string);
|
|
13
|
-
/**
|
|
14
|
-
* Write the full session state array as a JSON file.
|
|
15
|
-
* Phase E: write to a temp file then rename for atomicity.
|
|
16
|
-
*/
|
|
17
|
-
write(states: SessionState[]): Promise<void>;
|
|
18
|
-
}
|
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* SnapshotWriter — periodic full-state snapshot of the SessionRegistry.
|
|
3
|
-
*
|
|
4
|
-
* Snapshot file: ~/.openclaw/workspace/memory/cc-openclaw-session-snapshot.json
|
|
5
|
-
* Written every N minutes from phase 7 (schedule-jobs).
|
|
6
|
-
* Phase E wires SessionRegistry.snapshot() → SnapshotWriter.write().
|
|
7
|
-
* Stub body in Phase D.
|
|
8
|
-
*/
|
|
9
|
-
import { mkdirSync, writeFileSync } from 'fs';
|
|
10
|
-
import { homedir } from 'os';
|
|
11
|
-
import { join, dirname } from 'path';
|
|
12
|
-
const SNAPSHOT_PATH = join(homedir(), '.openclaw', 'workspace', 'memory', 'cc-openclaw-session-snapshot.json');
|
|
13
|
-
export class SnapshotWriter {
|
|
14
|
-
path;
|
|
15
|
-
constructor(path = SNAPSHOT_PATH) {
|
|
16
|
-
this.path = path;
|
|
17
|
-
}
|
|
18
|
-
/**
|
|
19
|
-
* Write the full session state array as a JSON file.
|
|
20
|
-
* Phase E: write to a temp file then rename for atomicity.
|
|
21
|
-
*/
|
|
22
|
-
async write(states) {
|
|
23
|
-
try {
|
|
24
|
-
mkdirSync(dirname(this.path), { recursive: true });
|
|
25
|
-
writeFileSync(this.path, JSON.stringify(states, null, 2), 'utf8');
|
|
26
|
-
}
|
|
27
|
-
catch {
|
|
28
|
-
// Non-fatal stub; Phase E promotes to logged error.
|
|
29
|
-
}
|
|
30
|
-
}
|
|
31
|
-
}
|