@cordfuse/crosstalk 5.0.0-alpha.6 → 6.0.0-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/crosstalk.js +34 -78
- package/package.json +4 -4
- package/src/activation.ts +104 -0
- package/src/attach.ts +1 -1
- package/src/channel.ts +8 -21
- package/src/chat.ts +52 -115
- package/src/dispatch.ts +252 -570
- package/src/dlq.ts +68 -136
- package/src/init.ts +17 -41
- package/src/open.ts +55 -31
- package/src/replies.ts +59 -0
- package/src/send.ts +48 -67
- package/src/state.ts +143 -0
- package/src/status.ts +18 -57
- package/src/transport.ts +68 -198
- package/src/turnq.ts +64 -32
- package/src/upgrade.ts +9 -11
- package/src/wake.ts +5 -6
- package/src/cursor.ts +0 -48
- package/template/.amazonq/rules/crosstalk.md +0 -2
- package/template/.continue/rules/crosstalk.md +0 -7
- package/template/.cursor/rules/crosstalk.mdc +0 -7
- package/template/.github/copilot-instructions.md +0 -2
- package/template/.windsurfrules +0 -2
- package/template/AGENTS.md +0 -2
- package/template/ANTIGRAVITY.md +0 -2
- package/template/CLAUDE.md +0 -2
- package/template/GEMINI.md +0 -2
- package/template/OPENCODE.md +0 -2
- package/template/QWEN.md +0 -2
- package/template/README.md +0 -22
- package/template/local/CROSSTALK.md +0 -4
- package/template/upstream/CROSSTALK-VERSION +0 -1
- package/template/upstream/CROSSTALK.md +0 -589
- package/template/upstream/JITTER.md +0 -24
- package/template/upstream/OPERATOR.md +0 -60
- package/template/upstream/PROTOCOL.md +0 -239
- package/template/upstream/actors/cloud-architect.md +0 -83
- package/template/upstream/actors/concierge.md +0 -125
- package/template/upstream/actors/devops-engineer.md +0 -83
- package/template/upstream/actors/documentation-engineer.md +0 -107
- package/template/upstream/actors/infrastructure-engineer.md +0 -83
- package/template/upstream/actors/junior-developer.md +0 -83
- package/template/upstream/actors/precise-generalist.md +0 -48
- package/template/upstream/actors/product-manager.md +0 -83
- package/template/upstream/actors/qa-engineer.md +0 -83
- package/template/upstream/actors/security-engineer.md +0 -92
- package/template/upstream/actors/senior-generalist-engineer.md +0 -111
- package/template/upstream/actors/senior-software-engineer.md +0 -94
- package/template/upstream/actors/skeptic.md +0 -89
- package/template/upstream/actors/technical-writer.md +0 -89
- package/template/upstream/actors/ux-designer.md +0 -83
package/src/dispatch.ts
CHANGED
|
@@ -1,30 +1,19 @@
|
|
|
1
|
+
// crosstalk dispatch — the loop.
|
|
2
|
+
//
|
|
3
|
+
// Tick: pull → for each local actor, scan channels for messages past the
|
|
4
|
+
// cursor → decideWake (activation.ts, the one rule) → invoke the actor's
|
|
5
|
+
// CLI per batch → write replies (re: linked per sender) → commit+push.
|
|
6
|
+
//
|
|
7
|
+
// Only the commit+push is locked, and the lock is advisory (turnq.ts) —
|
|
8
|
+
// git arbitrates correctness. Cursors, DLQ, heartbeat and the error log
|
|
9
|
+
// live in the machine-local state dir (state.ts), so a tick's commit only
|
|
10
|
+
// ever contains data/ and there is no self-inflicted git deadlock to heal.
|
|
11
|
+
|
|
1
12
|
import { resolve, join, dirname } from 'path';
|
|
2
13
|
import { spawn } from 'child_process';
|
|
3
|
-
import {
|
|
4
|
-
mkdirSync,
|
|
5
|
-
writeFileSync,
|
|
6
|
-
readFileSync,
|
|
7
|
-
existsSync,
|
|
8
|
-
appendFileSync,
|
|
9
|
-
openSync,
|
|
10
|
-
closeSync,
|
|
11
|
-
} from 'fs';
|
|
14
|
+
import { mkdirSync, writeFileSync, readFileSync, existsSync, appendFileSync } from 'fs';
|
|
12
15
|
import { watch } from 'fs/promises';
|
|
13
16
|
import { fileURLToPath } from 'url';
|
|
14
|
-
|
|
15
|
-
// Read runtime version from the installed package's package.json at startup
|
|
16
|
-
// so dispatch_start logs and heartbeat content always match the actual
|
|
17
|
-
// installed @cordfuse/crosstalk version. Avoids hand-editing on every release.
|
|
18
|
-
const RUNTIME_VERSION: string = (() => {
|
|
19
|
-
try {
|
|
20
|
-
const thisFileDir = dirname(fileURLToPath(import.meta.url));
|
|
21
|
-
const pkgPath = join(thisFileDir, '..', 'package.json');
|
|
22
|
-
const pkg = JSON.parse(readFileSync(pkgPath, 'utf-8')) as { version?: string };
|
|
23
|
-
return pkg.version ?? 'unknown';
|
|
24
|
-
} catch {
|
|
25
|
-
return 'unknown';
|
|
26
|
-
}
|
|
27
|
-
})();
|
|
28
17
|
import {
|
|
29
18
|
findHostFile,
|
|
30
19
|
loadActorProfile,
|
|
@@ -38,15 +27,31 @@ import {
|
|
|
38
27
|
listChannelMessages,
|
|
39
28
|
gitPull,
|
|
40
29
|
gitCommitAndPush,
|
|
41
|
-
|
|
42
|
-
|
|
30
|
+
cursorBaseline,
|
|
31
|
+
newFilesSince,
|
|
43
32
|
type ChannelMessage,
|
|
44
33
|
} from './transport.js';
|
|
45
|
-
import {
|
|
34
|
+
import {
|
|
35
|
+
stateDir,
|
|
36
|
+
readCursor,
|
|
37
|
+
writeCursor,
|
|
38
|
+
writeHeartbeat,
|
|
39
|
+
logError,
|
|
40
|
+
} from './state.js';
|
|
41
|
+
import { recipients, reList, decideWake, splitForConcurrency } from './activation.js';
|
|
46
42
|
import { now, messageFilename } from './filenames.js';
|
|
47
43
|
import { serializeFrontmatter } from './frontmatter.js';
|
|
48
44
|
import { withLock } from './turnq.js';
|
|
49
|
-
import { writeDlqEntry, isQuarantined
|
|
45
|
+
import { writeDlqEntry, isQuarantined } from './dlq.js';
|
|
46
|
+
|
|
47
|
+
const RUNTIME_VERSION: string = (() => {
|
|
48
|
+
try {
|
|
49
|
+
const pkgPath = join(dirname(fileURLToPath(import.meta.url)), '..', 'package.json');
|
|
50
|
+
return (JSON.parse(readFileSync(pkgPath, 'utf-8')) as { version?: string }).version ?? 'unknown';
|
|
51
|
+
} catch {
|
|
52
|
+
return 'unknown';
|
|
53
|
+
}
|
|
54
|
+
})();
|
|
50
55
|
|
|
51
56
|
const transportRoot = resolve(process.cwd());
|
|
52
57
|
const argv = process.argv.slice(2);
|
|
@@ -63,27 +68,9 @@ const hostOverride = flag('--host');
|
|
|
63
68
|
const pollSeconds = Number(flag('--poll')) || 30;
|
|
64
69
|
const logFile = flag('--log-file');
|
|
65
70
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
const
|
|
69
|
-
const BACKOFF_GRACE = 2; // first N failures don't trigger backoff
|
|
70
|
-
|
|
71
|
-
// Per-tick heal: when N consecutive infra failures pile up, the dispatch
|
|
72
|
-
// loop is stuck in a deadlock that entrypoint's boot-time auto-recovery
|
|
73
|
-
// can't break (because dispatch is already running). At HEAL_THRESHOLD
|
|
74
|
-
// consecutive failures, attempt a `git fetch && reset --hard origin/<branch>
|
|
75
|
-
// && clean -fd` from inside the tick loop. Mirrors the entrypoint logic.
|
|
76
|
-
// Throttled — won't reattempt until fully BACKOFF_GRACE+HEAL_THRESHOLD more
|
|
77
|
-
// failures pile up after a heal, to avoid heal-loop-storms.
|
|
78
|
-
const HEAL_THRESHOLD = 5;
|
|
79
|
-
let lastHealAtFailureCount = 0;
|
|
80
|
-
|
|
81
|
-
// Stale-read-receipt sweep config — runs at most every SWEEP_INTERVAL_MS
|
|
82
|
-
// of wall-clock to surface read receipts that never produced a reply
|
|
83
|
-
// (indicates dispatch crashed mid-tick or CLI hung silently).
|
|
84
|
-
const SWEEP_INTERVAL_MS = 5 * 60_000;
|
|
85
|
-
const STALE_RECEIPT_THRESHOLD_MS = 5 * 60_000;
|
|
86
|
-
let lastSweepAt = 0;
|
|
71
|
+
const CLI_TIMEOUT_MS = 5 * 60_000;
|
|
72
|
+
const MAX_BACKOFF_MULTIPLIER = 10;
|
|
73
|
+
const BACKOFF_GRACE = 2;
|
|
87
74
|
|
|
88
75
|
function log(event: string, fields: Record<string, unknown> = {}): void {
|
|
89
76
|
let line: string;
|
|
@@ -101,114 +88,24 @@ function log(event: string, fields: Record<string, unknown> = {}): void {
|
|
|
101
88
|
}
|
|
102
89
|
}
|
|
103
90
|
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
91
|
+
// Config errors (bad host file, bad actor profile) repeat every tick until
|
|
92
|
+
// fixed — log each distinct one once per process run, not once per tick.
|
|
93
|
+
const loggedConfigErrors = new Set<string>();
|
|
94
|
+
function logConfigError(scope: string, message: string): void {
|
|
95
|
+
const key = `${scope}::${message}`;
|
|
96
|
+
if (loggedConfigErrors.has(key)) return;
|
|
97
|
+
loggedConfigErrors.add(key);
|
|
98
|
+
logError(transportRoot, 'parse', `${scope}: ${message}`);
|
|
99
|
+
log('config_error', { scope, message: message.slice(0, 200) });
|
|
111
100
|
}
|
|
112
101
|
|
|
113
|
-
|
|
102
|
+
const protocolPrompt = (() => {
|
|
114
103
|
const p = join(transportRoot, 'upstream', 'PROTOCOL.md');
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
const protocolPrompt = loadProtocolPrompt();
|
|
120
|
-
|
|
121
|
-
function recipients(toField: unknown): string[] {
|
|
122
|
-
if (Array.isArray(toField)) return toField.map(String);
|
|
123
|
-
if (typeof toField === 'string') return [toField];
|
|
124
|
-
return [];
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
// Declared lifecycle kind for a message. `work` (default for legacy messages
|
|
128
|
-
// without the field) is the as-tagged intent. The runtime does NOT trust this
|
|
129
|
-
// value directly for the activation decision — see effectiveKind() below.
|
|
130
|
-
// Kept for use as the seed of the effective-kind computation.
|
|
131
|
-
function messageKind(msg: ChannelMessage): 'work' | 'result' {
|
|
132
|
-
const raw = msg.data['kind'];
|
|
133
|
-
return raw === 'result' ? 'result' : 'work';
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
// Is `msg` causally a reply to a prior ask? True iff some message strictly
|
|
137
|
-
// before `msg` was sent FROM one of `msg`'s recipients TO `msg`'s sender with
|
|
138
|
-
// declared kind `work`. If so, `msg` is that recipient's answer coming back —
|
|
139
|
-
// regardless of how its sender (a fallible LLM actor, or `crosstalk send`'s
|
|
140
|
-
// `work` default) labelled it.
|
|
141
|
-
//
|
|
142
|
-
// Conservative on multi-recipient `to:` lists: if ANY recipient previously
|
|
143
|
-
// tasked the sender, the message is treated as causally a reply for all
|
|
144
|
-
// recipients. The per-addressee asymmetry in hasPriorWork (below) compensates
|
|
145
|
-
// — only the recipient that actually asked wakes on it. Known v1 limitation:
|
|
146
|
-
// genuine multi-recipient fan-out where one recipient happens to have prior
|
|
147
|
-
// unrelated work to the sender will be demoted to result and suppress wakes
|
|
148
|
-
// for the other recipients. Not observed in Monte Carlo; revisit if it
|
|
149
|
-
// surfaces.
|
|
150
|
-
function isCausalReply(channelMessages: ChannelMessage[], msg: ChannelMessage): boolean {
|
|
151
|
-
const sender = typeof msg.data['from'] === 'string' ? msg.data['from'] : '';
|
|
152
|
-
if (!sender) return false;
|
|
153
|
-
const toList = recipients(msg.data['to']);
|
|
154
|
-
for (const m of channelMessages) {
|
|
155
|
-
if (m.relPath >= msg.relPath) break;
|
|
156
|
-
const mFrom = typeof m.data['from'] === 'string' ? m.data['from'] : '';
|
|
157
|
-
if (!toList.includes(mFrom)) continue;
|
|
158
|
-
if ((m.data['kind'] ?? 'work') === 'result') continue;
|
|
159
|
-
if (recipients(m.data['to']).includes(sender)) return true;
|
|
160
|
-
}
|
|
161
|
-
return false;
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
// Effective lifecycle kind. The runtime INFERS kind from the causality graph
|
|
165
|
-
// rather than trusting the declared field: a message that is causally a reply
|
|
166
|
-
// is a `result` even if it was labelled `work` (actors routinely report
|
|
167
|
-
// results via `crosstalk send`, which defaults to `work`, and that mislabel
|
|
168
|
-
// forges false reply-causality edges → wake-up loops). Genuine unsolicited
|
|
169
|
-
// tasks (kickoffs, fresh dispatches) have no prior opposite-direction work
|
|
170
|
-
// and keep their `work` kind. See PROTOCOL.md "Message kinds".
|
|
171
|
-
//
|
|
172
|
-
// This is the load-bearing principle the rest of the activation rule rides
|
|
173
|
-
// on: the dispatcher derives semantics from the interaction graph; it never
|
|
174
|
-
// trusts an actor's declaration.
|
|
175
|
-
function effectiveKind(channelMessages: ChannelMessage[], msg: ChannelMessage): 'work' | 'result' {
|
|
176
|
-
if (messageKind(msg) === 'result') return 'result';
|
|
177
|
-
return isCausalReply(channelMessages, msg) ? 'result' : 'work';
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
// Reply causality — does `addressee` have a prior `kind: work` outbound to
|
|
181
|
-
// `sender` somewhere in the channel's history strictly before `before`? If
|
|
182
|
-
// yes, an inbound `kind: result` from `sender` to `addressee` is the answer
|
|
183
|
-
// to that ask, and the addressee should wake on it. If no, the result is
|
|
184
|
-
// unsolicited from addressee's POV and is informational only.
|
|
185
|
-
//
|
|
186
|
-
// Uses effectiveKind (not messageKind) when checking prior messages — a
|
|
187
|
-
// mislabeled "work" reply from a prior peer would otherwise forge a false
|
|
188
|
-
// causality edge here, which was the ping-pong root.
|
|
189
|
-
//
|
|
190
|
-
// The channel is already sorted by relPath ascending in
|
|
191
|
-
// listChannelMessages(), so the scan walks chronologically.
|
|
192
|
-
function hasPriorWork(
|
|
193
|
-
channelMessages: ChannelMessage[],
|
|
194
|
-
addressee: string,
|
|
195
|
-
sender: string,
|
|
196
|
-
before: string,
|
|
197
|
-
): boolean {
|
|
198
|
-
for (const m of channelMessages) {
|
|
199
|
-
if (m.relPath >= before) break;
|
|
200
|
-
if (typeof m.data['from'] !== 'string' || m.data['from'] !== addressee) continue;
|
|
201
|
-
if (effectiveKind(channelMessages, m) !== 'work') continue;
|
|
202
|
-
const toList = recipients(m.data['to']);
|
|
203
|
-
if (toList.includes(sender)) return true;
|
|
204
|
-
}
|
|
205
|
-
return false;
|
|
206
|
-
}
|
|
104
|
+
return existsSync(p) ? readFileSync(p, 'utf-8').trim() : '';
|
|
105
|
+
})();
|
|
207
106
|
|
|
208
107
|
function composeSystemPrompt(actorPrompt: string): string {
|
|
209
|
-
return [protocolPrompt, actorPrompt]
|
|
210
|
-
.filter((p) => p.length > 0)
|
|
211
|
-
.join('\n\n---\n\n');
|
|
108
|
+
return [protocolPrompt, actorPrompt].filter((p) => p.length > 0).join('\n\n---\n\n');
|
|
212
109
|
}
|
|
213
110
|
|
|
214
111
|
function actorConcurrency(tiers: HostActorTiers): number {
|
|
@@ -220,6 +117,10 @@ function actorConcurrency(tiers: HostActorTiers): number {
|
|
|
220
117
|
return 1;
|
|
221
118
|
}
|
|
222
119
|
|
|
120
|
+
function messageSender(msg: ChannelMessage): string {
|
|
121
|
+
return typeof msg.data['from'] === 'string' ? (msg.data['from'] as string) : 'unknown';
|
|
122
|
+
}
|
|
123
|
+
|
|
223
124
|
interface CliResult {
|
|
224
125
|
status: number;
|
|
225
126
|
stdout: string;
|
|
@@ -230,7 +131,7 @@ function invokeCli(
|
|
|
230
131
|
cli: string,
|
|
231
132
|
systemPrompt: string,
|
|
232
133
|
userMessage: string,
|
|
233
|
-
|
|
134
|
+
env: Record<string, string>,
|
|
234
135
|
): Promise<CliResult> {
|
|
235
136
|
return new Promise((res) => {
|
|
236
137
|
const fullPrompt = `${systemPrompt}\n\n---\n\n${userMessage}`;
|
|
@@ -239,15 +140,13 @@ function invokeCli(
|
|
|
239
140
|
res({ status: 1, stdout: '', stderr: 'tokenized cli is empty' });
|
|
240
141
|
return;
|
|
241
142
|
}
|
|
242
|
-
// detached:
|
|
243
|
-
//
|
|
244
|
-
//
|
|
245
|
-
|
|
246
|
-
// the dispatched actor invokes `crosstalk send` without explicit --from.
|
|
247
|
-
const child = spawn(parts[0], parts.slice(1), {
|
|
143
|
+
// detached: new process group, so the timeout SIGKILL takes the actor's
|
|
144
|
+
// children with it — orphans writing to the transport after a timeout
|
|
145
|
+
// was an observed v5 hazard.
|
|
146
|
+
const child = spawn(parts[0]!, parts.slice(1), {
|
|
248
147
|
stdio: ['pipe', 'pipe', 'pipe'],
|
|
249
148
|
detached: true,
|
|
250
|
-
env: { ...process.env,
|
|
149
|
+
env: { ...process.env, ...env },
|
|
251
150
|
});
|
|
252
151
|
let stdout = '';
|
|
253
152
|
let stderr = '';
|
|
@@ -255,20 +154,14 @@ function invokeCli(
|
|
|
255
154
|
const timeout = setTimeout(() => {
|
|
256
155
|
if (resolved) return;
|
|
257
156
|
resolved = true;
|
|
258
|
-
// SIGKILL the process group (negative pid) so any children the actor
|
|
259
|
-
// spawned (e.g. crosstalk send subprocesses) die with the parent.
|
|
260
|
-
// Fallback to single-pid kill if the group signal fails (some envs).
|
|
261
157
|
try {
|
|
262
|
-
if (typeof child.pid === 'number')
|
|
263
|
-
|
|
264
|
-
} else {
|
|
265
|
-
child.kill('SIGKILL');
|
|
266
|
-
}
|
|
158
|
+
if (typeof child.pid === 'number') process.kill(-child.pid, 'SIGKILL');
|
|
159
|
+
else child.kill('SIGKILL');
|
|
267
160
|
} catch {
|
|
268
161
|
try { child.kill('SIGKILL'); } catch { /* already dead */ }
|
|
269
162
|
}
|
|
270
163
|
res({ status: 124, stdout, stderr: stderr + '\n[timeout]' });
|
|
271
|
-
},
|
|
164
|
+
}, CLI_TIMEOUT_MS);
|
|
272
165
|
child.stdout.on('data', (d) => { stdout += d.toString(); });
|
|
273
166
|
child.stderr.on('data', (d) => { stderr += d.toString(); });
|
|
274
167
|
child.on('close', (code) => {
|
|
@@ -283,158 +176,53 @@ function invokeCli(
|
|
|
283
176
|
clearTimeout(timeout);
|
|
284
177
|
res({ status: 1, stdout, stderr: stderr + '\n' + err.message });
|
|
285
178
|
});
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
child.stdin.on('error', () => { /* EPIPE/etc. — child closed stdin */ });
|
|
290
|
-
try {
|
|
291
|
-
child.stdin.write(fullPrompt);
|
|
292
|
-
} catch { /* same: child closed stdin before we could write */ }
|
|
293
|
-
try {
|
|
294
|
-
child.stdin.end();
|
|
295
|
-
} catch { /* ignore */ }
|
|
179
|
+
child.stdin.on('error', () => { /* child closed stdin */ });
|
|
180
|
+
try { child.stdin.write(fullPrompt); } catch { /* same */ }
|
|
181
|
+
try { child.stdin.end(); } catch { /* ignore */ }
|
|
296
182
|
});
|
|
297
183
|
}
|
|
298
184
|
|
|
299
185
|
function writeReply(
|
|
300
186
|
channelUuid: string,
|
|
301
187
|
fromActor: string,
|
|
302
|
-
toActor: string
|
|
188
|
+
toActor: string,
|
|
189
|
+
re: string | string[],
|
|
303
190
|
body: string,
|
|
304
191
|
): void {
|
|
305
192
|
const ts = now();
|
|
306
193
|
const dir = join(transportRoot, 'data', 'channels', channelUuid, ts.pathDate);
|
|
307
194
|
mkdirSync(dir, { recursive: true });
|
|
308
|
-
// Auto-replies emitted via stdout are `kind: result` by default — the actor
|
|
309
|
-
// is answering, not initiating new work. Recipients only wake on a result if
|
|
310
|
-
// they previously asked the sender for work in this channel (reply
|
|
311
|
-
// causality, see activation rule below). Actors that want to dispatch new
|
|
312
|
-
// work do so explicitly via `crosstalk send --kind work`.
|
|
313
195
|
const content = serializeFrontmatter(
|
|
314
|
-
{ from: fromActor, to: toActor, type: 'text',
|
|
196
|
+
{ from: fromActor, to: toActor, type: 'text', timestamp: ts.iso, re },
|
|
315
197
|
body,
|
|
316
198
|
);
|
|
317
199
|
writeFileSync(join(dir, messageFilename(ts)), content);
|
|
318
200
|
}
|
|
319
201
|
|
|
320
|
-
function writeReadReceipt(
|
|
321
|
-
channelUuid: string,
|
|
322
|
-
fromActor: string,
|
|
323
|
-
toActor: string,
|
|
324
|
-
ref: string,
|
|
325
|
-
): void {
|
|
326
|
-
const ts = now();
|
|
327
|
-
const dir = join(transportRoot, 'data', 'channels', channelUuid, ts.pathDate);
|
|
328
|
-
mkdirSync(dir, { recursive: true });
|
|
329
|
-
const content = serializeFrontmatter(
|
|
330
|
-
{ from: fromActor, to: toActor, type: 'read', ref, timestamp: ts.iso },
|
|
331
|
-
'',
|
|
332
|
-
);
|
|
333
|
-
writeFileSync(join(dir, messageFilename(ts)), content);
|
|
334
|
-
}
|
|
335
|
-
|
|
336
|
-
interface PendingDispatch {
|
|
337
|
-
actorName: string;
|
|
338
|
-
channelUuid: string;
|
|
339
|
-
msgs: ChannelMessage[]; // all unread messages addressed to this actor in this channel
|
|
340
|
-
tiers: HostActorTiers;
|
|
341
|
-
}
|
|
342
|
-
|
|
343
|
-
function messageSender(msg: ChannelMessage): string {
|
|
344
|
-
return typeof msg.data['from'] === 'string' ? msg.data['from'] : 'unknown';
|
|
345
|
-
}
|
|
346
|
-
|
|
347
202
|
function formatBatchedUserMessage(msgs: ChannelMessage[]): string {
|
|
348
|
-
if (msgs.length === 1) return msgs[0]
|
|
349
|
-
const
|
|
350
|
-
const parts: string[] = [header];
|
|
203
|
+
if (msgs.length === 1) return msgs[0]!.body;
|
|
204
|
+
const parts = [`You have ${msgs.length} new messages in this channel. Process them collectively and reply once.`];
|
|
351
205
|
for (let i = 0; i < msgs.length; i++) {
|
|
352
|
-
const m = msgs[i]
|
|
353
|
-
const
|
|
354
|
-
|
|
355
|
-
parts.push(`--- Message ${i + 1} of ${msgs.length} (from: ${from}, ref: ${m.relPath}${ts ? `, ts: ${ts}` : ''}) ---`);
|
|
206
|
+
const m = msgs[i]!;
|
|
207
|
+
const ts = typeof m.data['timestamp'] === 'string' ? `, ts: ${m.data['timestamp']}` : '';
|
|
208
|
+
parts.push(`--- Message ${i + 1} of ${msgs.length} (from: ${messageSender(m)}, ref: ${m.relPath}${ts}) ---`);
|
|
356
209
|
parts.push(m.body);
|
|
357
210
|
}
|
|
358
211
|
return parts.join('\n\n');
|
|
359
212
|
}
|
|
360
213
|
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
//
|
|
367
|
-
// When pending fits within concurrency, every batch is a single message
|
|
368
|
-
// (preserves parallel fan-out — junior-developer with count: 10 and 10
|
|
369
|
-
// pending fan-out messages dispatches 10 parallel CLI invocations of 1
|
|
370
|
-
// message each). When pending exceeds concurrency, batches collapse pending
|
|
371
|
-
// into ~concurrency parallel invocations, each handling ceil(N/concurrency)
|
|
372
|
-
// messages (preserves the fan-in collapse — concierge with count: 1 and 10
|
|
373
|
-
// pending replies dispatches 1 invocation of 10 messages).
|
|
374
|
-
function splitForConcurrency(
|
|
375
|
-
msgs: ChannelMessage[],
|
|
376
|
-
concurrency: number,
|
|
377
|
-
): ChannelMessage[][] {
|
|
378
|
-
if (concurrency <= 1 || msgs.length <= 1) return [msgs];
|
|
379
|
-
const chunkSize = Math.max(1, Math.ceil(msgs.length / concurrency));
|
|
380
|
-
const out: ChannelMessage[][] = [];
|
|
381
|
-
for (let i = 0; i < msgs.length; i += chunkSize) {
|
|
382
|
-
out.push(msgs.slice(i, i + chunkSize));
|
|
383
|
-
}
|
|
384
|
-
return out;
|
|
385
|
-
}
|
|
386
|
-
|
|
387
|
-
function distinctSenders(msgs: ChannelMessage[]): string[] {
|
|
388
|
-
const seen = new Set<string>();
|
|
389
|
-
const out: string[] = [];
|
|
390
|
-
for (const m of msgs) {
|
|
391
|
-
const s = messageSender(m);
|
|
392
|
-
if (s !== 'unknown' && !seen.has(s)) {
|
|
393
|
-
seen.add(s);
|
|
394
|
-
out.push(s);
|
|
395
|
-
}
|
|
396
|
-
}
|
|
397
|
-
return out;
|
|
214
|
+
interface PendingDispatch {
|
|
215
|
+
actorName: string;
|
|
216
|
+
channelUuid: string;
|
|
217
|
+
msgs: ChannelMessage[];
|
|
218
|
+
tiers: HostActorTiers;
|
|
398
219
|
}
|
|
399
220
|
|
|
400
221
|
async function dispatchOne(p: PendingDispatch): Promise<boolean> {
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
const firstMsg = p.msgs[0];
|
|
406
|
-
const lastMsg = p.msgs[p.msgs.length - 1];
|
|
407
|
-
const preferredTier = typeof firstMsg.data['tier'] === 'string'
|
|
408
|
-
? (firstMsg.data['tier'] as string)
|
|
409
|
-
: undefined;
|
|
410
|
-
let resolved;
|
|
411
|
-
try {
|
|
412
|
-
resolved = pickTier(p.tiers, preferredTier);
|
|
413
|
-
} catch (err) {
|
|
414
|
-
const r = writeDlqEntry(
|
|
415
|
-
transportRoot,
|
|
416
|
-
'config',
|
|
417
|
-
p.actorName,
|
|
418
|
-
'(config)',
|
|
419
|
-
'(config)',
|
|
420
|
-
`tier selection failed: ${(err as Error).message}`,
|
|
421
|
-
);
|
|
422
|
-
log('actor_config_error', {
|
|
423
|
-
actor: p.actorName,
|
|
424
|
-
dlq_id: r.id,
|
|
425
|
-
attempts: r.attempts,
|
|
426
|
-
quarantined: r.quarantined,
|
|
427
|
-
});
|
|
428
|
-
return false;
|
|
429
|
-
}
|
|
430
|
-
const cli = resolved.cli;
|
|
431
|
-
|
|
432
|
-
// Quarantine check uses the LAST message's relPath as the batch's identity.
|
|
433
|
-
// Per-message quarantine semantics are preserved because batch boundaries
|
|
434
|
-
// align with cursor checkpoints; if a single message in a batch keeps
|
|
435
|
-
// failing, the cursor never advances past it and it surfaces as a singleton
|
|
436
|
-
// batch on the next tick.
|
|
437
|
-
if (isQuarantined(transportRoot, 'dispatch', p.actorName, p.channelUuid, lastMsg.relPath)) {
|
|
222
|
+
const firstMsg = p.msgs[0]!;
|
|
223
|
+
const lastMsg = p.msgs[p.msgs.length - 1]!;
|
|
224
|
+
|
|
225
|
+
if (isQuarantined(transportRoot, p.actorName, p.channelUuid, lastMsg.relPath)) {
|
|
438
226
|
log('dispatch_skipped_quarantined', {
|
|
439
227
|
actor: p.actorName,
|
|
440
228
|
channel: p.channelUuid.slice(0, 8),
|
|
@@ -443,6 +231,17 @@ async function dispatchOne(p: PendingDispatch): Promise<boolean> {
|
|
|
443
231
|
return false;
|
|
444
232
|
}
|
|
445
233
|
|
|
234
|
+
const preferredTier = typeof firstMsg.data['tier'] === 'string' ? (firstMsg.data['tier'] as string) : undefined;
|
|
235
|
+
let cli: string;
|
|
236
|
+
let profile;
|
|
237
|
+
try {
|
|
238
|
+
cli = pickTier(p.tiers, preferredTier).cli;
|
|
239
|
+
profile = loadActorProfile(transportRoot, p.actorName);
|
|
240
|
+
} catch (err) {
|
|
241
|
+
logConfigError(`actor:${p.actorName}`, (err as Error).message);
|
|
242
|
+
return false;
|
|
243
|
+
}
|
|
244
|
+
|
|
446
245
|
log('dispatch', {
|
|
447
246
|
actor: p.actorName,
|
|
448
247
|
channel: p.channelUuid.slice(0, 8),
|
|
@@ -451,42 +250,22 @@ async function dispatchOne(p: PendingDispatch): Promise<boolean> {
|
|
|
451
250
|
last_msg: lastMsg.relPath,
|
|
452
251
|
});
|
|
453
252
|
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
transportRoot,
|
|
467
|
-
'config',
|
|
468
|
-
p.actorName,
|
|
469
|
-
'(config)',
|
|
470
|
-
'(config)',
|
|
471
|
-
`actor profile load failed: ${(err as Error).message}`,
|
|
472
|
-
);
|
|
473
|
-
log('dispatch_config_error', {
|
|
474
|
-
actor: p.actorName,
|
|
475
|
-
dlq_id: r.id,
|
|
476
|
-
attempts: r.attempts,
|
|
477
|
-
quarantined: r.quarantined,
|
|
478
|
-
});
|
|
479
|
-
return false;
|
|
480
|
-
}
|
|
481
|
-
|
|
482
|
-
const systemPrompt = composeSystemPrompt(profile.systemPrompt);
|
|
483
|
-
const userMessage = formatBatchedUserMessage(p.msgs);
|
|
484
|
-
const result = await invokeCli(cli, systemPrompt, userMessage, p.actorName);
|
|
253
|
+
const result = await invokeCli(
|
|
254
|
+
cli,
|
|
255
|
+
composeSystemPrompt(profile.systemPrompt),
|
|
256
|
+
formatBatchedUserMessage(p.msgs),
|
|
257
|
+
{
|
|
258
|
+
CROSSTALK_DISPATCH_ACTOR: p.actorName,
|
|
259
|
+
CROSSTALK_DISPATCH_CHANNEL: p.channelUuid,
|
|
260
|
+
// Every relPath in the batch — `crosstalk send` records them all as
|
|
261
|
+
// the reply's re: list, so batching never loses an answered message.
|
|
262
|
+
CROSSTALK_DISPATCH_RE: p.msgs.map((m) => m.relPath).join(','),
|
|
263
|
+
},
|
|
264
|
+
);
|
|
485
265
|
|
|
486
266
|
if (result.status !== 0) {
|
|
487
267
|
const r = writeDlqEntry(
|
|
488
268
|
transportRoot,
|
|
489
|
-
'dispatch',
|
|
490
269
|
p.actorName,
|
|
491
270
|
p.channelUuid,
|
|
492
271
|
lastMsg.relPath,
|
|
@@ -506,45 +285,26 @@ async function dispatchOne(p: PendingDispatch): Promise<boolean> {
|
|
|
506
285
|
|
|
507
286
|
const reply = result.stdout.trim();
|
|
508
287
|
if (reply.length === 0) {
|
|
509
|
-
//
|
|
510
|
-
//
|
|
511
|
-
//
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
if (p.msgs.length > 1) {
|
|
515
|
-
log('dispatch_batch_silent_ok', {
|
|
516
|
-
actor: p.actorName,
|
|
517
|
-
channel: p.channelUuid.slice(0, 8),
|
|
518
|
-
batch_size: p.msgs.length,
|
|
519
|
-
});
|
|
520
|
-
return true;
|
|
521
|
-
}
|
|
522
|
-
const r = writeDlqEntry(
|
|
523
|
-
transportRoot,
|
|
524
|
-
'dispatch',
|
|
525
|
-
p.actorName,
|
|
526
|
-
p.channelUuid,
|
|
527
|
-
lastMsg.relPath,
|
|
528
|
-
'cli returned empty reply',
|
|
529
|
-
);
|
|
530
|
-
log('dispatch_empty_reply', {
|
|
531
|
-
actor: p.actorName,
|
|
532
|
-
channel: p.channelUuid.slice(0, 8),
|
|
533
|
-
dlq_id: r.id,
|
|
534
|
-
attempts: r.attempts,
|
|
535
|
-
quarantined: r.quarantined,
|
|
536
|
-
});
|
|
537
|
-
return false;
|
|
288
|
+
// Legitimate: the actor routed its answer via `crosstalk send` (which
|
|
289
|
+
// auto-links re:). If it truly did nothing, the asker's `crosstalk
|
|
290
|
+
// replies` stays PENDING — visible, not silently lost.
|
|
291
|
+
log('dispatch_silent', { actor: p.actorName, channel: p.channelUuid.slice(0, 8), batch_size: p.msgs.length });
|
|
292
|
+
return true;
|
|
538
293
|
}
|
|
539
294
|
|
|
540
|
-
//
|
|
541
|
-
//
|
|
542
|
-
//
|
|
543
|
-
const
|
|
544
|
-
const
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
295
|
+
// One reply per distinct sender, re:-linked to EVERY message that sender
|
|
296
|
+
// had in the batch — the asker's activation rule fires, and `crosstalk
|
|
297
|
+
// replies` sees each individual message as answered.
|
|
298
|
+
const bySender = new Map<string, string[]>();
|
|
299
|
+
for (const m of p.msgs) {
|
|
300
|
+
const sender = messageSender(m);
|
|
301
|
+
bySender.set(sender, [...(bySender.get(sender) ?? []), m.relPath]);
|
|
302
|
+
}
|
|
303
|
+
bySender.delete('unknown');
|
|
304
|
+
if (bySender.size === 0) bySender.set(messageSender(firstMsg), [firstMsg.relPath]);
|
|
305
|
+
for (const [sender, relPaths] of bySender) {
|
|
306
|
+
writeReply(p.channelUuid, p.actorName, sender, relPaths.length === 1 ? relPaths[0]! : relPaths, reply);
|
|
307
|
+
}
|
|
548
308
|
return true;
|
|
549
309
|
}
|
|
550
310
|
|
|
@@ -554,258 +314,180 @@ interface TickResult {
|
|
|
554
314
|
}
|
|
555
315
|
|
|
556
316
|
async function dispatchTick(): Promise<TickResult> {
|
|
557
|
-
writeHeartbeat();
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
// gives operators full diagnostic info via stdout/json logs.
|
|
570
|
-
log('git_pull_failed', { error: pullResult.error.slice(0, 200) });
|
|
571
|
-
infraOk = false;
|
|
572
|
-
}
|
|
573
|
-
|
|
574
|
-
let host: HostFile;
|
|
575
|
-
try {
|
|
576
|
-
host = findHostFile(transportRoot, hostOverride);
|
|
577
|
-
} catch (err) {
|
|
578
|
-
const r = writeDlqEntry(
|
|
579
|
-
transportRoot,
|
|
580
|
-
'config',
|
|
581
|
-
'(host)',
|
|
582
|
-
'(config)',
|
|
583
|
-
'(config)',
|
|
584
|
-
`host file load failed: ${(err as Error).message}`,
|
|
585
|
-
);
|
|
586
|
-
log('tick_config_error', {
|
|
587
|
-
scope: 'host',
|
|
588
|
-
dlq_id: r.id,
|
|
589
|
-
attempts: r.attempts,
|
|
590
|
-
quarantined: r.quarantined,
|
|
591
|
-
});
|
|
592
|
-
return { didWork: false, infraOk };
|
|
593
|
-
}
|
|
594
|
-
|
|
595
|
-
let didWork = false;
|
|
317
|
+
writeHeartbeat(transportRoot, RUNTIME_VERSION);
|
|
318
|
+
let infraOk = true;
|
|
319
|
+
|
|
320
|
+
const pullResult = gitPull(transportRoot);
|
|
321
|
+
if (!pullResult.ok) {
|
|
322
|
+
// Skip the whole tick: a failed pull can leave origin/HEAD (the cursor
|
|
323
|
+
// baseline) ahead of the working tree, and scanning against that would
|
|
324
|
+
// advance cursors past messages that never materialized.
|
|
325
|
+
logError(transportRoot, 'git_pull', pullResult.error ?? 'unknown');
|
|
326
|
+
log('git_pull_failed', { error: (pullResult.error ?? '').slice(0, 200) });
|
|
327
|
+
return { didWork: false, infraOk: false };
|
|
328
|
+
}
|
|
596
329
|
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
330
|
+
let host: HostFile;
|
|
331
|
+
try {
|
|
332
|
+
host = findHostFile(transportRoot, hostOverride);
|
|
333
|
+
} catch (err) {
|
|
334
|
+
logConfigError('host', (err as Error).message);
|
|
335
|
+
return { didWork: false, infraOk };
|
|
336
|
+
}
|
|
602
337
|
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
// causally a reply is treated as `result` even when an actor (or
|
|
642
|
-
// `crosstalk send`'s default) labelled it `work`, so a fan-in peer
|
|
643
|
-
// mislabeling its reply can't forge a wake-up loop. See PROTOCOL.md
|
|
644
|
-
// "Message kinds".
|
|
645
|
-
const kind = effectiveKind(messages, msg);
|
|
646
|
-
if (kind === 'result' && !hasPriorWork(messages, actorName, from, msg.relPath)) {
|
|
647
|
-
writeCursor(transportRoot, actorName, channelUuid, msg.relPath);
|
|
648
|
-
continue;
|
|
338
|
+
// Cursors are commit hashes, not relPaths: filenames order by sender
|
|
339
|
+
// timestamp but arrive in push order, so a relPath cursor can advance
|
|
340
|
+
// past a slower writer's earlier-stamped message and lose it forever.
|
|
341
|
+
// "New since cursor" is asked of git, which records arrival truthfully.
|
|
342
|
+
const head = cursorBaseline(transportRoot);
|
|
343
|
+
if (!head) {
|
|
344
|
+
logError(transportRoot, 'other', 'git rev-parse failed for origin/HEAD and HEAD — skipping tick');
|
|
345
|
+
return { didWork: false, infraOk: false };
|
|
346
|
+
}
|
|
347
|
+
// diff results keyed by cursor commit (shared across actors on the same
|
|
348
|
+
// cursor); null = commit unknown to this clone -> full re-scan.
|
|
349
|
+
const addedSince = new Map<string, Set<string> | null>();
|
|
350
|
+
|
|
351
|
+
let didWork = false;
|
|
352
|
+
const channels = discoverChannels(transportRoot);
|
|
353
|
+
|
|
354
|
+
for (const actorName of Object.keys(host.actors)) {
|
|
355
|
+
const tiers = host.actors[actorName]!;
|
|
356
|
+
const concurrency = actorConcurrency(tiers);
|
|
357
|
+
const pending: PendingDispatch[] = [];
|
|
358
|
+
|
|
359
|
+
for (const channelUuid of channels) {
|
|
360
|
+
const cursor = readCursor(transportRoot, actorName, channelUuid);
|
|
361
|
+
if (cursor === head) continue;
|
|
362
|
+
|
|
363
|
+
const messages = listChannelMessages(transportRoot, channelUuid);
|
|
364
|
+
const senderByRelPath = new Map(messages.map((m) => [m.relPath, messageSender(m)]));
|
|
365
|
+
const senderOf = (relPath: string) => senderByRelPath.get(relPath);
|
|
366
|
+
|
|
367
|
+
let post = messages;
|
|
368
|
+
if (cursor) {
|
|
369
|
+
let added = addedSince.get(cursor);
|
|
370
|
+
if (added === undefined) {
|
|
371
|
+
const files = newFilesSince(transportRoot, cursor);
|
|
372
|
+
added = files === null ? null : new Set(files);
|
|
373
|
+
addedSince.set(cursor, added);
|
|
374
|
+
if (added === null) {
|
|
375
|
+
logError(transportRoot, 'other', `cursor commit ${cursor.slice(0, 12)} unknown to this clone — full channel re-scan`);
|
|
649
376
|
}
|
|
650
|
-
channelBatch.push(msg);
|
|
651
377
|
}
|
|
652
|
-
if (
|
|
653
|
-
const
|
|
654
|
-
|
|
655
|
-
pending.push({ actorName, channelUuid, msgs: g, tiers });
|
|
656
|
-
}
|
|
378
|
+
if (added !== null) {
|
|
379
|
+
const prefix = `data/channels/${channelUuid}/`;
|
|
380
|
+
post = messages.filter((m) => added.has(prefix + m.relPath));
|
|
657
381
|
}
|
|
658
382
|
}
|
|
383
|
+
if (post.length === 0) {
|
|
384
|
+
writeCursor(transportRoot, actorName, channelUuid, head);
|
|
385
|
+
continue;
|
|
386
|
+
}
|
|
659
387
|
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
388
|
+
const batch: ChannelMessage[] = [];
|
|
389
|
+
for (const msg of post) {
|
|
390
|
+
if (msg.data['type'] !== 'text') continue;
|
|
391
|
+
const decision = decideWake(
|
|
392
|
+
{
|
|
393
|
+
from: messageSender(msg),
|
|
394
|
+
to: recipients(msg.data['to']),
|
|
395
|
+
re: reList(msg.data['re']),
|
|
396
|
+
},
|
|
397
|
+
actorName,
|
|
398
|
+
host.alias,
|
|
399
|
+
senderOf,
|
|
400
|
+
);
|
|
401
|
+
if (decision === 'wake') {
|
|
402
|
+
batch.push(msg);
|
|
403
|
+
} else if (decision === 'wrong-host') {
|
|
404
|
+
log('host_routing_mismatch', {
|
|
405
|
+
actor: actorName,
|
|
406
|
+
this_host: host.alias,
|
|
407
|
+
channel: channelUuid.slice(0, 8),
|
|
408
|
+
msg: msg.relPath,
|
|
409
|
+
to: recipients(msg.data['to']),
|
|
410
|
+
});
|
|
673
411
|
}
|
|
674
412
|
}
|
|
413
|
+
|
|
414
|
+
if (batch.length === 0) {
|
|
415
|
+
writeCursor(transportRoot, actorName, channelUuid, head);
|
|
416
|
+
continue;
|
|
417
|
+
}
|
|
418
|
+
for (const g of splitForConcurrency(batch, concurrency)) {
|
|
419
|
+
pending.push({ actorName, channelUuid, msgs: g, tiers });
|
|
420
|
+
}
|
|
675
421
|
}
|
|
676
422
|
|
|
677
|
-
//
|
|
678
|
-
//
|
|
679
|
-
//
|
|
680
|
-
//
|
|
681
|
-
//
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
const
|
|
688
|
-
|
|
689
|
-
// Same rationale as the pull case above: no writeErrorLog.
|
|
690
|
-
// Repeated push failures shouldn't flood errors/ since that
|
|
691
|
-
// contributes to the same git-deadlock-feedback that pull does.
|
|
692
|
-
const kind = pushResult.committed ? 'git_push' : 'git_commit';
|
|
693
|
-
log('git_push_failed', {
|
|
694
|
-
kind,
|
|
695
|
-
committed_locally: pushResult.committed,
|
|
696
|
-
error: pushResult.error.slice(0, 200),
|
|
697
|
-
});
|
|
698
|
-
infraOk = false;
|
|
423
|
+
// Waves of `concurrency` parallel CLI invocations. The cursor advances
|
|
424
|
+
// to the scanned commit whether each batch succeeded or DLQ'd —
|
|
425
|
+
// at-least-once was attempted; `crosstalk dlq --retry` rewinds the
|
|
426
|
+
// cursor explicitly. A crash mid-wave leaves the cursor behind, so the
|
|
427
|
+
// whole span replays next tick (at-least-once, never lost).
|
|
428
|
+
for (let i = 0; i < pending.length; i += concurrency) {
|
|
429
|
+
const wave = pending.slice(i, i + concurrency);
|
|
430
|
+
const results = await Promise.all(wave.map((p) => dispatchOne(p)));
|
|
431
|
+
if (results.some(Boolean)) didWork = true;
|
|
432
|
+
}
|
|
433
|
+
for (const p of pending) {
|
|
434
|
+
writeCursor(transportRoot, p.actorName, p.channelUuid, head);
|
|
699
435
|
}
|
|
436
|
+
}
|
|
700
437
|
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
}
|
|
438
|
+
if (didWork) {
|
|
439
|
+
const pushResult = await withLock(transportRoot, 'git', async () =>
|
|
440
|
+
gitCommitAndPush(transportRoot, `dispatch: replies ${new Date().toISOString()}`),
|
|
441
|
+
);
|
|
442
|
+
if (!pushResult.ok && pushResult.error) {
|
|
443
|
+
logError(transportRoot, pushResult.committed ? 'git_push' : 'git_commit', pushResult.error);
|
|
444
|
+
log('git_push_failed', { committed_locally: pushResult.committed, error: pushResult.error.slice(0, 200) });
|
|
445
|
+
infraOk = false;
|
|
708
446
|
}
|
|
447
|
+
}
|
|
709
448
|
|
|
710
|
-
|
|
711
|
-
});
|
|
449
|
+
return { didWork, infraOk };
|
|
712
450
|
}
|
|
713
451
|
|
|
714
|
-
async function waitForWakeOrTimeout(ms: number): Promise<
|
|
715
|
-
const
|
|
716
|
-
mkdirSync(wakeDir, { recursive: true });
|
|
452
|
+
async function waitForWakeOrTimeout(ms: number): Promise<void> {
|
|
453
|
+
const dir = stateDir(transportRoot);
|
|
717
454
|
const ac = new AbortController();
|
|
718
455
|
const timer = setTimeout(() => ac.abort(), ms);
|
|
719
456
|
try {
|
|
720
|
-
const watcher = watch(
|
|
457
|
+
const watcher = watch(dir, { signal: ac.signal });
|
|
721
458
|
for await (const ev of watcher) {
|
|
722
|
-
if (ev.filename === 'wake.signal')
|
|
723
|
-
clearTimeout(timer);
|
|
724
|
-
return 'wake';
|
|
725
|
-
}
|
|
459
|
+
if (ev.filename === 'wake.signal') return;
|
|
726
460
|
}
|
|
727
|
-
return 'timeout';
|
|
728
461
|
} catch {
|
|
729
|
-
|
|
462
|
+
/* abort = timeout */
|
|
730
463
|
} finally {
|
|
731
464
|
clearTimeout(timer);
|
|
732
465
|
}
|
|
733
466
|
}
|
|
734
467
|
|
|
735
468
|
async function main(): Promise<void> {
|
|
736
|
-
log('dispatch_start', {
|
|
737
|
-
transport: transportRoot,
|
|
738
|
-
version: RUNTIME_VERSION,
|
|
739
|
-
log_file: logFile ?? null,
|
|
740
|
-
});
|
|
469
|
+
log('dispatch_start', { transport: transportRoot, version: RUNTIME_VERSION, state_dir: stateDir(transportRoot) });
|
|
741
470
|
if (onceMode) {
|
|
742
471
|
await dispatchTick();
|
|
743
|
-
|
|
472
|
+
process.exit(0);
|
|
744
473
|
}
|
|
745
|
-
log('
|
|
474
|
+
log('dispatch_running', { quiet_poll_s: pollSeconds });
|
|
746
475
|
|
|
747
476
|
let consecutiveInfraFailures = 0;
|
|
748
|
-
|
|
749
477
|
while (true) {
|
|
750
478
|
try {
|
|
751
479
|
const r = await dispatchTick();
|
|
752
480
|
if (r.infraOk) {
|
|
753
|
-
if (consecutiveInfraFailures > 0) {
|
|
754
|
-
log('backoff_cleared', { previous_consecutive_failures: consecutiveInfraFailures });
|
|
755
|
-
}
|
|
481
|
+
if (consecutiveInfraFailures > 0) log('backoff_cleared', { previous_failures: consecutiveInfraFailures });
|
|
756
482
|
consecutiveInfraFailures = 0;
|
|
757
483
|
} else {
|
|
758
484
|
consecutiveInfraFailures++;
|
|
759
485
|
}
|
|
760
|
-
|
|
761
|
-
// Backoff kicks in only after a grace period of failures.
|
|
762
486
|
const beyondGrace = Math.max(0, consecutiveInfraFailures - BACKOFF_GRACE);
|
|
763
487
|
const backoffFactor = Math.min(MAX_BACKOFF_MULTIPLIER, 2 ** beyondGrace);
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
log('backoff_active', {
|
|
767
|
-
consecutive_failures: consecutiveInfraFailures,
|
|
768
|
-
factor: backoffFactor,
|
|
769
|
-
});
|
|
488
|
+
if (backoffFactor > 1) {
|
|
489
|
+
log('backoff_active', { consecutive_failures: consecutiveInfraFailures, factor: backoffFactor });
|
|
770
490
|
}
|
|
771
|
-
|
|
772
|
-
// Per-tick heal: deadlock-break when the dispatch loop has been
|
|
773
|
-
// failing for HEAL_THRESHOLD consecutive ticks AND we haven't healed
|
|
774
|
-
// recently. Hard-resets the working tree to origin/<current branch>.
|
|
775
|
-
// Trades any uncommitted local state for forward progress — acceptable
|
|
776
|
-
// because messages/cursors/dlq are pulled back from origin and
|
|
777
|
-
// .turnq/errors are regenerated.
|
|
778
|
-
if (
|
|
779
|
-
consecutiveInfraFailures >= HEAL_THRESHOLD &&
|
|
780
|
-
consecutiveInfraFailures - lastHealAtFailureCount >= HEAL_THRESHOLD
|
|
781
|
-
) {
|
|
782
|
-
try {
|
|
783
|
-
const branchProc = spawn('git', ['rev-parse', '--abbrev-ref', 'HEAD'], {
|
|
784
|
-
cwd: transportRoot,
|
|
785
|
-
stdio: ['ignore', 'pipe', 'ignore'],
|
|
786
|
-
});
|
|
787
|
-
let branchName = '';
|
|
788
|
-
branchProc.stdout.on('data', (d) => { branchName += d.toString(); });
|
|
789
|
-
await new Promise<void>((res) => branchProc.on('close', () => res()));
|
|
790
|
-
const branch = branchName.trim() || 'main';
|
|
791
|
-
log('per_tick_heal_start', {
|
|
792
|
-
consecutive_failures: consecutiveInfraFailures,
|
|
793
|
-
target: `origin/${branch}`,
|
|
794
|
-
});
|
|
795
|
-
await new Promise<void>((res) => {
|
|
796
|
-
const p = spawn('sh', [
|
|
797
|
-
'-c',
|
|
798
|
-
`git rebase --abort 2>/dev/null; git fetch --quiet origin '${branch}' && git reset --hard --quiet 'origin/${branch}' && git clean -fdq`,
|
|
799
|
-
], { cwd: transportRoot, stdio: 'inherit' });
|
|
800
|
-
p.on('close', () => res());
|
|
801
|
-
});
|
|
802
|
-
log('per_tick_heal_done', { target: `origin/${branch}` });
|
|
803
|
-
lastHealAtFailureCount = consecutiveInfraFailures;
|
|
804
|
-
} catch (err) {
|
|
805
|
-
log('per_tick_heal_failed', { error: (err as Error).message });
|
|
806
|
-
}
|
|
807
|
-
}
|
|
808
|
-
|
|
809
491
|
if (r.didWork) {
|
|
810
492
|
await new Promise((res) => setTimeout(res, 1_000 * backoffFactor));
|
|
811
493
|
} else {
|
|
@@ -813,7 +495,7 @@ async function main(): Promise<void> {
|
|
|
813
495
|
}
|
|
814
496
|
} catch (err) {
|
|
815
497
|
const msg = (err as Error).message;
|
|
816
|
-
|
|
498
|
+
logError(transportRoot, 'other', `tick error: ${msg}`);
|
|
817
499
|
log('tick_error', { message: msg });
|
|
818
500
|
consecutiveInfraFailures++;
|
|
819
501
|
await new Promise((res) => setTimeout(res, pollSeconds * 1_000));
|