@cordfuse/crosstalk 5.0.0-alpha.7 → 6.0.0-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/crosstalk.js +34 -78
- package/package.json +4 -4
- package/src/activation.ts +104 -0
- package/src/attach.ts +1 -1
- package/src/channel.ts +8 -21
- package/src/chat.ts +52 -115
- package/src/dispatch.ts +252 -661
- package/src/dlq.ts +68 -136
- package/src/init.ts +17 -41
- package/src/open.ts +55 -31
- package/src/replies.ts +59 -0
- package/src/send.ts +48 -67
- package/src/state.ts +143 -0
- package/src/status.ts +18 -57
- package/src/transport.ts +68 -198
- package/src/turnq.ts +64 -32
- package/src/upgrade.ts +9 -11
- package/src/wake.ts +5 -6
- package/src/cursor.ts +0 -48
- package/template/.amazonq/rules/crosstalk.md +0 -2
- package/template/.continue/rules/crosstalk.md +0 -7
- package/template/.cursor/rules/crosstalk.mdc +0 -7
- package/template/.github/copilot-instructions.md +0 -2
- package/template/.windsurfrules +0 -2
- package/template/AGENTS.md +0 -2
- package/template/ANTIGRAVITY.md +0 -2
- package/template/CLAUDE.md +0 -2
- package/template/GEMINI.md +0 -2
- package/template/OPENCODE.md +0 -2
- package/template/QWEN.md +0 -2
- package/template/README.md +0 -22
- package/template/local/CROSSTALK.md +0 -4
- package/template/upstream/CROSSTALK-VERSION +0 -1
- package/template/upstream/CROSSTALK.md +0 -589
- package/template/upstream/JITTER.md +0 -24
- package/template/upstream/OPERATOR.md +0 -60
- package/template/upstream/PROTOCOL.md +0 -260
- package/template/upstream/actors/cloud-architect.md +0 -83
- package/template/upstream/actors/concierge.md +0 -130
- package/template/upstream/actors/devops-engineer.md +0 -83
- package/template/upstream/actors/documentation-engineer.md +0 -107
- package/template/upstream/actors/infrastructure-engineer.md +0 -83
- package/template/upstream/actors/junior-developer.md +0 -83
- package/template/upstream/actors/precise-generalist.md +0 -48
- package/template/upstream/actors/product-manager.md +0 -83
- package/template/upstream/actors/qa-engineer.md +0 -83
- package/template/upstream/actors/security-engineer.md +0 -92
- package/template/upstream/actors/senior-generalist-engineer.md +0 -111
- package/template/upstream/actors/senior-software-engineer.md +0 -94
- package/template/upstream/actors/skeptic.md +0 -89
- package/template/upstream/actors/technical-writer.md +0 -89
- package/template/upstream/actors/ux-designer.md +0 -83
package/src/dispatch.ts
CHANGED
|
@@ -1,30 +1,19 @@
|
|
|
1
|
+
// crosstalk dispatch — the loop.
|
|
2
|
+
//
|
|
3
|
+
// Tick: pull → for each local actor, scan channels for messages past the
|
|
4
|
+
// cursor → decideWake (activation.ts, the one rule) → invoke the actor's
|
|
5
|
+
// CLI per batch → write replies (re: linked per sender) → commit+push.
|
|
6
|
+
//
|
|
7
|
+
// Only the commit+push is locked, and the lock is advisory (turnq.ts) —
|
|
8
|
+
// git arbitrates correctness. Cursors, DLQ, heartbeat and the error log
|
|
9
|
+
// live in the machine-local state dir (state.ts), so a tick's commit only
|
|
10
|
+
// ever contains data/ and there is no self-inflicted git deadlock to heal.
|
|
11
|
+
|
|
1
12
|
import { resolve, join, dirname } from 'path';
|
|
2
13
|
import { spawn } from 'child_process';
|
|
3
|
-
import {
|
|
4
|
-
mkdirSync,
|
|
5
|
-
writeFileSync,
|
|
6
|
-
readFileSync,
|
|
7
|
-
existsSync,
|
|
8
|
-
appendFileSync,
|
|
9
|
-
openSync,
|
|
10
|
-
closeSync,
|
|
11
|
-
} from 'fs';
|
|
14
|
+
import { mkdirSync, writeFileSync, readFileSync, existsSync, appendFileSync } from 'fs';
|
|
12
15
|
import { watch } from 'fs/promises';
|
|
13
16
|
import { fileURLToPath } from 'url';
|
|
14
|
-
|
|
15
|
-
// Read runtime version from the installed package's package.json at startup
|
|
16
|
-
// so dispatch_start logs and heartbeat content always match the actual
|
|
17
|
-
// installed @cordfuse/crosstalk version. Avoids hand-editing on every release.
|
|
18
|
-
const RUNTIME_VERSION: string = (() => {
|
|
19
|
-
try {
|
|
20
|
-
const thisFileDir = dirname(fileURLToPath(import.meta.url));
|
|
21
|
-
const pkgPath = join(thisFileDir, '..', 'package.json');
|
|
22
|
-
const pkg = JSON.parse(readFileSync(pkgPath, 'utf-8')) as { version?: string };
|
|
23
|
-
return pkg.version ?? 'unknown';
|
|
24
|
-
} catch {
|
|
25
|
-
return 'unknown';
|
|
26
|
-
}
|
|
27
|
-
})();
|
|
28
17
|
import {
|
|
29
18
|
findHostFile,
|
|
30
19
|
loadActorProfile,
|
|
@@ -38,15 +27,31 @@ import {
|
|
|
38
27
|
listChannelMessages,
|
|
39
28
|
gitPull,
|
|
40
29
|
gitCommitAndPush,
|
|
41
|
-
|
|
42
|
-
|
|
30
|
+
cursorBaseline,
|
|
31
|
+
newFilesSince,
|
|
43
32
|
type ChannelMessage,
|
|
44
33
|
} from './transport.js';
|
|
45
|
-
import {
|
|
34
|
+
import {
|
|
35
|
+
stateDir,
|
|
36
|
+
readCursor,
|
|
37
|
+
writeCursor,
|
|
38
|
+
writeHeartbeat,
|
|
39
|
+
logError,
|
|
40
|
+
} from './state.js';
|
|
41
|
+
import { recipients, reList, decideWake, splitForConcurrency } from './activation.js';
|
|
46
42
|
import { now, messageFilename } from './filenames.js';
|
|
47
43
|
import { serializeFrontmatter } from './frontmatter.js';
|
|
48
44
|
import { withLock } from './turnq.js';
|
|
49
|
-
import { writeDlqEntry, isQuarantined
|
|
45
|
+
import { writeDlqEntry, isQuarantined } from './dlq.js';
|
|
46
|
+
|
|
47
|
+
const RUNTIME_VERSION: string = (() => {
|
|
48
|
+
try {
|
|
49
|
+
const pkgPath = join(dirname(fileURLToPath(import.meta.url)), '..', 'package.json');
|
|
50
|
+
return (JSON.parse(readFileSync(pkgPath, 'utf-8')) as { version?: string }).version ?? 'unknown';
|
|
51
|
+
} catch {
|
|
52
|
+
return 'unknown';
|
|
53
|
+
}
|
|
54
|
+
})();
|
|
50
55
|
|
|
51
56
|
const transportRoot = resolve(process.cwd());
|
|
52
57
|
const argv = process.argv.slice(2);
|
|
@@ -63,27 +68,9 @@ const hostOverride = flag('--host');
|
|
|
63
68
|
const pollSeconds = Number(flag('--poll')) || 30;
|
|
64
69
|
const logFile = flag('--log-file');
|
|
65
70
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
const
|
|
69
|
-
const BACKOFF_GRACE = 2; // first N failures don't trigger backoff
|
|
70
|
-
|
|
71
|
-
// Per-tick heal: when N consecutive infra failures pile up, the dispatch
|
|
72
|
-
// loop is stuck in a deadlock that entrypoint's boot-time auto-recovery
|
|
73
|
-
// can't break (because dispatch is already running). At HEAL_THRESHOLD
|
|
74
|
-
// consecutive failures, attempt a `git fetch && reset --hard origin/<branch>
|
|
75
|
-
// && clean -fd` from inside the tick loop. Mirrors the entrypoint logic.
|
|
76
|
-
// Throttled — won't reattempt until fully BACKOFF_GRACE+HEAL_THRESHOLD more
|
|
77
|
-
// failures pile up after a heal, to avoid heal-loop-storms.
|
|
78
|
-
const HEAL_THRESHOLD = 5;
|
|
79
|
-
let lastHealAtFailureCount = 0;
|
|
80
|
-
|
|
81
|
-
// Stale-read-receipt sweep config — runs at most every SWEEP_INTERVAL_MS
|
|
82
|
-
// of wall-clock to surface read receipts that never produced a reply
|
|
83
|
-
// (indicates dispatch crashed mid-tick or CLI hung silently).
|
|
84
|
-
const SWEEP_INTERVAL_MS = 5 * 60_000;
|
|
85
|
-
const STALE_RECEIPT_THRESHOLD_MS = 5 * 60_000;
|
|
86
|
-
let lastSweepAt = 0;
|
|
71
|
+
const CLI_TIMEOUT_MS = 5 * 60_000;
|
|
72
|
+
const MAX_BACKOFF_MULTIPLIER = 10;
|
|
73
|
+
const BACKOFF_GRACE = 2;
|
|
87
74
|
|
|
88
75
|
function log(event: string, fields: Record<string, unknown> = {}): void {
|
|
89
76
|
let line: string;
|
|
@@ -101,185 +88,24 @@ function log(event: string, fields: Record<string, unknown> = {}): void {
|
|
|
101
88
|
}
|
|
102
89
|
}
|
|
103
90
|
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
91
|
+
// Config errors (bad host file, bad actor profile) repeat every tick until
|
|
92
|
+
// fixed — log each distinct one once per process run, not once per tick.
|
|
93
|
+
const loggedConfigErrors = new Set<string>();
|
|
94
|
+
function logConfigError(scope: string, message: string): void {
|
|
95
|
+
const key = `${scope}::${message}`;
|
|
96
|
+
if (loggedConfigErrors.has(key)) return;
|
|
97
|
+
loggedConfigErrors.add(key);
|
|
98
|
+
logError(transportRoot, 'parse', `${scope}: ${message}`);
|
|
99
|
+
log('config_error', { scope, message: message.slice(0, 200) });
|
|
111
100
|
}
|
|
112
101
|
|
|
113
|
-
|
|
102
|
+
const protocolPrompt = (() => {
|
|
114
103
|
const p = join(transportRoot, 'upstream', 'PROTOCOL.md');
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
const protocolPrompt = loadProtocolPrompt();
|
|
120
|
-
|
|
121
|
-
function recipients(toField: unknown): string[] {
|
|
122
|
-
if (Array.isArray(toField)) return toField.map(String);
|
|
123
|
-
if (typeof toField === 'string') return [toField];
|
|
124
|
-
return [];
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
// A `to:` recipient is either a bare actor name (`junior-developer`) or
|
|
128
|
-
// an actor@host pair (`junior-developer@cachy`). Bare names broadcast to
|
|
129
|
-
// every host that declares the actor; @host narrows to one host.
|
|
130
|
-
//
|
|
131
|
-
// Documented in concierge.md "Host-aware routing"; honored by the runtime
|
|
132
|
-
// as of alpha.7 step 1. Prior to this, the recipient string was matched
|
|
133
|
-
// verbatim against the actor name, so `junior-developer@cachy` never
|
|
134
|
-
// matched the cachy dispatcher's `junior-developer` actor declaration —
|
|
135
|
-
// the harness's first cross-host bug.
|
|
136
|
-
function extractActor(recipient: string): string {
|
|
137
|
-
const at = recipient.indexOf('@');
|
|
138
|
-
return at === -1 ? recipient : recipient.slice(0, at);
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
function targetHost(recipient: string): string | null {
|
|
142
|
-
const at = recipient.indexOf('@');
|
|
143
|
-
return at === -1 ? null : recipient.slice(at + 1);
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
// Does `recipientList` address `actorName` on `thisHost`? Returns the match
|
|
147
|
-
// outcome plus a flag for "actor was named but every instance targeted a
|
|
148
|
-
// different host" — useful as a diagnostic so silent wrong-host routes are
|
|
149
|
-
// logged rather than dropped without trace.
|
|
150
|
-
function matchHostRouting(
|
|
151
|
-
recipientList: string[],
|
|
152
|
-
actorName: string,
|
|
153
|
-
thisHost: string,
|
|
154
|
-
): { addressed: boolean; wrongHost: boolean } {
|
|
155
|
-
let addressed = false;
|
|
156
|
-
let actorNamedAtAll = false;
|
|
157
|
-
for (const r of recipientList) {
|
|
158
|
-
if (extractActor(r) !== actorName) continue;
|
|
159
|
-
actorNamedAtAll = true;
|
|
160
|
-
const host = targetHost(r);
|
|
161
|
-
if (host === null || host === thisHost) {
|
|
162
|
-
addressed = true;
|
|
163
|
-
break;
|
|
164
|
-
}
|
|
165
|
-
}
|
|
166
|
-
return { addressed, wrongHost: !addressed && actorNamedAtAll };
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
// Host-agnostic actor name check, used by causality scans (isCausalReply,
|
|
170
|
-
// hasPriorWork) where the question is "does this recipient list name actor
|
|
171
|
-
// X at all?" — host doesn't matter because the `from` field of replies
|
|
172
|
-
// doesn't carry a host suffix either.
|
|
173
|
-
function namesActor(recipientList: string[], actorName: string): boolean {
|
|
174
|
-
for (const r of recipientList) {
|
|
175
|
-
if (extractActor(r) === actorName) return true;
|
|
176
|
-
}
|
|
177
|
-
return false;
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
// Declared lifecycle kind for a message. `work` (default for legacy messages
|
|
181
|
-
// without the field) is the as-tagged intent. The runtime does NOT trust this
|
|
182
|
-
// value directly for the activation decision — see effectiveKind() below.
|
|
183
|
-
// Kept for use as the seed of the effective-kind computation.
|
|
184
|
-
function messageKind(msg: ChannelMessage): 'work' | 'result' {
|
|
185
|
-
const raw = msg.data['kind'];
|
|
186
|
-
return raw === 'result' ? 'result' : 'work';
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
// Is `msg` causally a reply to a prior ask? True iff some message strictly
|
|
190
|
-
// before `msg` was sent FROM one of `msg`'s recipients TO `msg`'s sender with
|
|
191
|
-
// declared kind `work`. If so, `msg` is that recipient's answer coming back —
|
|
192
|
-
// regardless of how its sender (a fallible LLM actor, or `crosstalk send`'s
|
|
193
|
-
// `work` default) labelled it.
|
|
194
|
-
//
|
|
195
|
-
// Conservative on multi-recipient `to:` lists: if ANY recipient previously
|
|
196
|
-
// tasked the sender, the message is treated as causally a reply for all
|
|
197
|
-
// recipients. The per-addressee asymmetry in hasPriorWork (below) compensates
|
|
198
|
-
// — only the recipient that actually asked wakes on it. Known v1 limitation:
|
|
199
|
-
// genuine multi-recipient fan-out where one recipient happens to have prior
|
|
200
|
-
// unrelated work to the sender will be demoted to result and suppress wakes
|
|
201
|
-
// for the other recipients. Not observed in Monte Carlo; revisit if it
|
|
202
|
-
// surfaces.
|
|
203
|
-
function isCausalReply(channelMessages: ChannelMessage[], msg: ChannelMessage): boolean {
|
|
204
|
-
const sender = typeof msg.data['from'] === 'string' ? msg.data['from'] : '';
|
|
205
|
-
if (!sender) return false;
|
|
206
|
-
const toList = recipients(msg.data['to']);
|
|
207
|
-
for (const m of channelMessages) {
|
|
208
|
-
if (m.relPath >= msg.relPath) break;
|
|
209
|
-
// Read receipts are bookkeeping, never causal evidence. The activation
|
|
210
|
-
// scan already filters them out before considering a message for
|
|
211
|
-
// dispatch — this filter is the same guard at the causality-helper
|
|
212
|
-
// level, so a receipt from one of msg's recipients to msg's sender
|
|
213
|
-
// can't forge a false causal-reply edge (which would then demote a
|
|
214
|
-
// legitimate `work` to `result` and silently skip it). This was the
|
|
215
|
-
// alpha.7 step 2 finding from the cross-host harness — receipts
|
|
216
|
-
// pre-existing in the channel from cachy's first dispatch burst
|
|
217
|
-
// misclassified mac's subsequent fan-out msgs as replies.
|
|
218
|
-
if (m.data['type'] === 'read') continue;
|
|
219
|
-
const mFrom = typeof m.data['from'] === 'string' ? m.data['from'] : '';
|
|
220
|
-
// Host-agnostic actor name match: `from` fields are bare actor names,
|
|
221
|
-
// but `to` fields may include `@host` suffixes that don't change
|
|
222
|
-
// causal semantics.
|
|
223
|
-
if (!namesActor(toList, mFrom)) continue;
|
|
224
|
-
if ((m.data['kind'] ?? 'work') === 'result') continue;
|
|
225
|
-
if (namesActor(recipients(m.data['to']), sender)) return true;
|
|
226
|
-
}
|
|
227
|
-
return false;
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
// Effective lifecycle kind. The runtime INFERS kind from the causality graph
|
|
231
|
-
// rather than trusting the declared field: a message that is causally a reply
|
|
232
|
-
// is a `result` even if it was labelled `work` (actors routinely report
|
|
233
|
-
// results via `crosstalk send`, which defaults to `work`, and that mislabel
|
|
234
|
-
// forges false reply-causality edges → wake-up loops). Genuine unsolicited
|
|
235
|
-
// tasks (kickoffs, fresh dispatches) have no prior opposite-direction work
|
|
236
|
-
// and keep their `work` kind. See PROTOCOL.md "Message kinds".
|
|
237
|
-
//
|
|
238
|
-
// This is the load-bearing principle the rest of the activation rule rides
|
|
239
|
-
// on: the dispatcher derives semantics from the interaction graph; it never
|
|
240
|
-
// trusts an actor's declaration.
|
|
241
|
-
function effectiveKind(channelMessages: ChannelMessage[], msg: ChannelMessage): 'work' | 'result' {
|
|
242
|
-
if (messageKind(msg) === 'result') return 'result';
|
|
243
|
-
return isCausalReply(channelMessages, msg) ? 'result' : 'work';
|
|
244
|
-
}
|
|
245
|
-
|
|
246
|
-
// Reply causality — does `addressee` have a prior `kind: work` outbound to
|
|
247
|
-
// `sender` somewhere in the channel's history strictly before `before`? If
|
|
248
|
-
// yes, an inbound `kind: result` from `sender` to `addressee` is the answer
|
|
249
|
-
// to that ask, and the addressee should wake on it. If no, the result is
|
|
250
|
-
// unsolicited from addressee's POV and is informational only.
|
|
251
|
-
//
|
|
252
|
-
// Uses effectiveKind (not messageKind) when checking prior messages — a
|
|
253
|
-
// mislabeled "work" reply from a prior peer would otherwise forge a false
|
|
254
|
-
// causality edge here, which was the ping-pong root.
|
|
255
|
-
//
|
|
256
|
-
// The channel is already sorted by relPath ascending in
|
|
257
|
-
// listChannelMessages(), so the scan walks chronologically.
|
|
258
|
-
function hasPriorWork(
|
|
259
|
-
channelMessages: ChannelMessage[],
|
|
260
|
-
addressee: string,
|
|
261
|
-
sender: string,
|
|
262
|
-
before: string,
|
|
263
|
-
): boolean {
|
|
264
|
-
for (const m of channelMessages) {
|
|
265
|
-
if (m.relPath >= before) break;
|
|
266
|
-
// Same receipt filter as isCausalReply — a receipt from `addressee`
|
|
267
|
-
// to `sender` would otherwise look like a prior work outbound and
|
|
268
|
-
// forge a false causal edge here too. Defense against the same
|
|
269
|
-
// bug class at every causality-walking helper.
|
|
270
|
-
if (m.data['type'] === 'read') continue;
|
|
271
|
-
if (typeof m.data['from'] !== 'string' || m.data['from'] !== addressee) continue;
|
|
272
|
-
if (effectiveKind(channelMessages, m) !== 'work') continue;
|
|
273
|
-
const toList = recipients(m.data['to']);
|
|
274
|
-
if (namesActor(toList, sender)) return true;
|
|
275
|
-
}
|
|
276
|
-
return false;
|
|
277
|
-
}
|
|
104
|
+
return existsSync(p) ? readFileSync(p, 'utf-8').trim() : '';
|
|
105
|
+
})();
|
|
278
106
|
|
|
279
107
|
function composeSystemPrompt(actorPrompt: string): string {
|
|
280
|
-
return [protocolPrompt, actorPrompt]
|
|
281
|
-
.filter((p) => p.length > 0)
|
|
282
|
-
.join('\n\n---\n\n');
|
|
108
|
+
return [protocolPrompt, actorPrompt].filter((p) => p.length > 0).join('\n\n---\n\n');
|
|
283
109
|
}
|
|
284
110
|
|
|
285
111
|
function actorConcurrency(tiers: HostActorTiers): number {
|
|
@@ -291,6 +117,10 @@ function actorConcurrency(tiers: HostActorTiers): number {
|
|
|
291
117
|
return 1;
|
|
292
118
|
}
|
|
293
119
|
|
|
120
|
+
function messageSender(msg: ChannelMessage): string {
|
|
121
|
+
return typeof msg.data['from'] === 'string' ? (msg.data['from'] as string) : 'unknown';
|
|
122
|
+
}
|
|
123
|
+
|
|
294
124
|
interface CliResult {
|
|
295
125
|
status: number;
|
|
296
126
|
stdout: string;
|
|
@@ -301,7 +131,7 @@ function invokeCli(
|
|
|
301
131
|
cli: string,
|
|
302
132
|
systemPrompt: string,
|
|
303
133
|
userMessage: string,
|
|
304
|
-
|
|
134
|
+
env: Record<string, string>,
|
|
305
135
|
): Promise<CliResult> {
|
|
306
136
|
return new Promise((res) => {
|
|
307
137
|
const fullPrompt = `${systemPrompt}\n\n---\n\n${userMessage}`;
|
|
@@ -310,15 +140,13 @@ function invokeCli(
|
|
|
310
140
|
res({ status: 1, stdout: '', stderr: 'tokenized cli is empty' });
|
|
311
141
|
return;
|
|
312
142
|
}
|
|
313
|
-
// detached:
|
|
314
|
-
//
|
|
315
|
-
//
|
|
316
|
-
|
|
317
|
-
// the dispatched actor invokes `crosstalk send` without explicit --from.
|
|
318
|
-
const child = spawn(parts[0], parts.slice(1), {
|
|
143
|
+
// detached: new process group, so the timeout SIGKILL takes the actor's
|
|
144
|
+
// children with it — orphans writing to the transport after a timeout
|
|
145
|
+
// was an observed v5 hazard.
|
|
146
|
+
const child = spawn(parts[0]!, parts.slice(1), {
|
|
319
147
|
stdio: ['pipe', 'pipe', 'pipe'],
|
|
320
148
|
detached: true,
|
|
321
|
-
env: { ...process.env,
|
|
149
|
+
env: { ...process.env, ...env },
|
|
322
150
|
});
|
|
323
151
|
let stdout = '';
|
|
324
152
|
let stderr = '';
|
|
@@ -326,20 +154,14 @@ function invokeCli(
|
|
|
326
154
|
const timeout = setTimeout(() => {
|
|
327
155
|
if (resolved) return;
|
|
328
156
|
resolved = true;
|
|
329
|
-
// SIGKILL the process group (negative pid) so any children the actor
|
|
330
|
-
// spawned (e.g. crosstalk send subprocesses) die with the parent.
|
|
331
|
-
// Fallback to single-pid kill if the group signal fails (some envs).
|
|
332
157
|
try {
|
|
333
|
-
if (typeof child.pid === 'number')
|
|
334
|
-
|
|
335
|
-
} else {
|
|
336
|
-
child.kill('SIGKILL');
|
|
337
|
-
}
|
|
158
|
+
if (typeof child.pid === 'number') process.kill(-child.pid, 'SIGKILL');
|
|
159
|
+
else child.kill('SIGKILL');
|
|
338
160
|
} catch {
|
|
339
161
|
try { child.kill('SIGKILL'); } catch { /* already dead */ }
|
|
340
162
|
}
|
|
341
163
|
res({ status: 124, stdout, stderr: stderr + '\n[timeout]' });
|
|
342
|
-
},
|
|
164
|
+
}, CLI_TIMEOUT_MS);
|
|
343
165
|
child.stdout.on('data', (d) => { stdout += d.toString(); });
|
|
344
166
|
child.stderr.on('data', (d) => { stderr += d.toString(); });
|
|
345
167
|
child.on('close', (code) => {
|
|
@@ -354,158 +176,53 @@ function invokeCli(
|
|
|
354
176
|
clearTimeout(timeout);
|
|
355
177
|
res({ status: 1, stdout, stderr: stderr + '\n' + err.message });
|
|
356
178
|
});
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
child.stdin.on('error', () => { /* EPIPE/etc. — child closed stdin */ });
|
|
361
|
-
try {
|
|
362
|
-
child.stdin.write(fullPrompt);
|
|
363
|
-
} catch { /* same: child closed stdin before we could write */ }
|
|
364
|
-
try {
|
|
365
|
-
child.stdin.end();
|
|
366
|
-
} catch { /* ignore */ }
|
|
179
|
+
child.stdin.on('error', () => { /* child closed stdin */ });
|
|
180
|
+
try { child.stdin.write(fullPrompt); } catch { /* same */ }
|
|
181
|
+
try { child.stdin.end(); } catch { /* ignore */ }
|
|
367
182
|
});
|
|
368
183
|
}
|
|
369
184
|
|
|
370
185
|
function writeReply(
|
|
371
186
|
channelUuid: string,
|
|
372
187
|
fromActor: string,
|
|
373
|
-
toActor: string
|
|
188
|
+
toActor: string,
|
|
189
|
+
re: string | string[],
|
|
374
190
|
body: string,
|
|
375
191
|
): void {
|
|
376
192
|
const ts = now();
|
|
377
193
|
const dir = join(transportRoot, 'data', 'channels', channelUuid, ts.pathDate);
|
|
378
194
|
mkdirSync(dir, { recursive: true });
|
|
379
|
-
// Auto-replies emitted via stdout are `kind: result` by default — the actor
|
|
380
|
-
// is answering, not initiating new work. Recipients only wake on a result if
|
|
381
|
-
// they previously asked the sender for work in this channel (reply
|
|
382
|
-
// causality, see activation rule below). Actors that want to dispatch new
|
|
383
|
-
// work do so explicitly via `crosstalk send --kind work`.
|
|
384
195
|
const content = serializeFrontmatter(
|
|
385
|
-
{ from: fromActor, to: toActor, type: 'text',
|
|
196
|
+
{ from: fromActor, to: toActor, type: 'text', timestamp: ts.iso, re },
|
|
386
197
|
body,
|
|
387
198
|
);
|
|
388
199
|
writeFileSync(join(dir, messageFilename(ts)), content);
|
|
389
200
|
}
|
|
390
201
|
|
|
391
|
-
function writeReadReceipt(
|
|
392
|
-
channelUuid: string,
|
|
393
|
-
fromActor: string,
|
|
394
|
-
toActor: string,
|
|
395
|
-
ref: string,
|
|
396
|
-
): void {
|
|
397
|
-
const ts = now();
|
|
398
|
-
const dir = join(transportRoot, 'data', 'channels', channelUuid, ts.pathDate);
|
|
399
|
-
mkdirSync(dir, { recursive: true });
|
|
400
|
-
const content = serializeFrontmatter(
|
|
401
|
-
{ from: fromActor, to: toActor, type: 'read', ref, timestamp: ts.iso },
|
|
402
|
-
'',
|
|
403
|
-
);
|
|
404
|
-
writeFileSync(join(dir, messageFilename(ts)), content);
|
|
405
|
-
}
|
|
406
|
-
|
|
407
|
-
interface PendingDispatch {
|
|
408
|
-
actorName: string;
|
|
409
|
-
channelUuid: string;
|
|
410
|
-
msgs: ChannelMessage[]; // all unread messages addressed to this actor in this channel
|
|
411
|
-
tiers: HostActorTiers;
|
|
412
|
-
}
|
|
413
|
-
|
|
414
|
-
function messageSender(msg: ChannelMessage): string {
|
|
415
|
-
return typeof msg.data['from'] === 'string' ? msg.data['from'] : 'unknown';
|
|
416
|
-
}
|
|
417
|
-
|
|
418
202
|
function formatBatchedUserMessage(msgs: ChannelMessage[]): string {
|
|
419
|
-
if (msgs.length === 1) return msgs[0]
|
|
420
|
-
const
|
|
421
|
-
const parts: string[] = [header];
|
|
203
|
+
if (msgs.length === 1) return msgs[0]!.body;
|
|
204
|
+
const parts = [`You have ${msgs.length} new messages in this channel. Process them collectively and reply once.`];
|
|
422
205
|
for (let i = 0; i < msgs.length; i++) {
|
|
423
|
-
const m = msgs[i]
|
|
424
|
-
const
|
|
425
|
-
|
|
426
|
-
parts.push(`--- Message ${i + 1} of ${msgs.length} (from: ${from}, ref: ${m.relPath}${ts ? `, ts: ${ts}` : ''}) ---`);
|
|
206
|
+
const m = msgs[i]!;
|
|
207
|
+
const ts = typeof m.data['timestamp'] === 'string' ? `, ts: ${m.data['timestamp']}` : '';
|
|
208
|
+
parts.push(`--- Message ${i + 1} of ${msgs.length} (from: ${messageSender(m)}, ref: ${m.relPath}${ts}) ---`);
|
|
427
209
|
parts.push(m.body);
|
|
428
210
|
}
|
|
429
211
|
return parts.join('\n\n');
|
|
430
212
|
}
|
|
431
213
|
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
//
|
|
438
|
-
// When pending fits within concurrency, every batch is a single message
|
|
439
|
-
// (preserves parallel fan-out — junior-developer with count: 10 and 10
|
|
440
|
-
// pending fan-out messages dispatches 10 parallel CLI invocations of 1
|
|
441
|
-
// message each). When pending exceeds concurrency, batches collapse pending
|
|
442
|
-
// into ~concurrency parallel invocations, each handling ceil(N/concurrency)
|
|
443
|
-
// messages (preserves the fan-in collapse — concierge with count: 1 and 10
|
|
444
|
-
// pending replies dispatches 1 invocation of 10 messages).
|
|
445
|
-
function splitForConcurrency(
|
|
446
|
-
msgs: ChannelMessage[],
|
|
447
|
-
concurrency: number,
|
|
448
|
-
): ChannelMessage[][] {
|
|
449
|
-
if (concurrency <= 1 || msgs.length <= 1) return [msgs];
|
|
450
|
-
const chunkSize = Math.max(1, Math.ceil(msgs.length / concurrency));
|
|
451
|
-
const out: ChannelMessage[][] = [];
|
|
452
|
-
for (let i = 0; i < msgs.length; i += chunkSize) {
|
|
453
|
-
out.push(msgs.slice(i, i + chunkSize));
|
|
454
|
-
}
|
|
455
|
-
return out;
|
|
456
|
-
}
|
|
457
|
-
|
|
458
|
-
function distinctSenders(msgs: ChannelMessage[]): string[] {
|
|
459
|
-
const seen = new Set<string>();
|
|
460
|
-
const out: string[] = [];
|
|
461
|
-
for (const m of msgs) {
|
|
462
|
-
const s = messageSender(m);
|
|
463
|
-
if (s !== 'unknown' && !seen.has(s)) {
|
|
464
|
-
seen.add(s);
|
|
465
|
-
out.push(s);
|
|
466
|
-
}
|
|
467
|
-
}
|
|
468
|
-
return out;
|
|
214
|
+
interface PendingDispatch {
|
|
215
|
+
actorName: string;
|
|
216
|
+
channelUuid: string;
|
|
217
|
+
msgs: ChannelMessage[];
|
|
218
|
+
tiers: HostActorTiers;
|
|
469
219
|
}
|
|
470
220
|
|
|
471
221
|
async function dispatchOne(p: PendingDispatch): Promise<boolean> {
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
const firstMsg = p.msgs[0];
|
|
477
|
-
const lastMsg = p.msgs[p.msgs.length - 1];
|
|
478
|
-
const preferredTier = typeof firstMsg.data['tier'] === 'string'
|
|
479
|
-
? (firstMsg.data['tier'] as string)
|
|
480
|
-
: undefined;
|
|
481
|
-
let resolved;
|
|
482
|
-
try {
|
|
483
|
-
resolved = pickTier(p.tiers, preferredTier);
|
|
484
|
-
} catch (err) {
|
|
485
|
-
const r = writeDlqEntry(
|
|
486
|
-
transportRoot,
|
|
487
|
-
'config',
|
|
488
|
-
p.actorName,
|
|
489
|
-
'(config)',
|
|
490
|
-
'(config)',
|
|
491
|
-
`tier selection failed: ${(err as Error).message}`,
|
|
492
|
-
);
|
|
493
|
-
log('actor_config_error', {
|
|
494
|
-
actor: p.actorName,
|
|
495
|
-
dlq_id: r.id,
|
|
496
|
-
attempts: r.attempts,
|
|
497
|
-
quarantined: r.quarantined,
|
|
498
|
-
});
|
|
499
|
-
return false;
|
|
500
|
-
}
|
|
501
|
-
const cli = resolved.cli;
|
|
502
|
-
|
|
503
|
-
// Quarantine check uses the LAST message's relPath as the batch's identity.
|
|
504
|
-
// Per-message quarantine semantics are preserved because batch boundaries
|
|
505
|
-
// align with cursor checkpoints; if a single message in a batch keeps
|
|
506
|
-
// failing, the cursor never advances past it and it surfaces as a singleton
|
|
507
|
-
// batch on the next tick.
|
|
508
|
-
if (isQuarantined(transportRoot, 'dispatch', p.actorName, p.channelUuid, lastMsg.relPath)) {
|
|
222
|
+
const firstMsg = p.msgs[0]!;
|
|
223
|
+
const lastMsg = p.msgs[p.msgs.length - 1]!;
|
|
224
|
+
|
|
225
|
+
if (isQuarantined(transportRoot, p.actorName, p.channelUuid, lastMsg.relPath)) {
|
|
509
226
|
log('dispatch_skipped_quarantined', {
|
|
510
227
|
actor: p.actorName,
|
|
511
228
|
channel: p.channelUuid.slice(0, 8),
|
|
@@ -514,6 +231,17 @@ async function dispatchOne(p: PendingDispatch): Promise<boolean> {
|
|
|
514
231
|
return false;
|
|
515
232
|
}
|
|
516
233
|
|
|
234
|
+
const preferredTier = typeof firstMsg.data['tier'] === 'string' ? (firstMsg.data['tier'] as string) : undefined;
|
|
235
|
+
let cli: string;
|
|
236
|
+
let profile;
|
|
237
|
+
try {
|
|
238
|
+
cli = pickTier(p.tiers, preferredTier).cli;
|
|
239
|
+
profile = loadActorProfile(transportRoot, p.actorName);
|
|
240
|
+
} catch (err) {
|
|
241
|
+
logConfigError(`actor:${p.actorName}`, (err as Error).message);
|
|
242
|
+
return false;
|
|
243
|
+
}
|
|
244
|
+
|
|
517
245
|
log('dispatch', {
|
|
518
246
|
actor: p.actorName,
|
|
519
247
|
channel: p.channelUuid.slice(0, 8),
|
|
@@ -522,42 +250,22 @@ async function dispatchOne(p: PendingDispatch): Promise<boolean> {
|
|
|
522
250
|
last_msg: lastMsg.relPath,
|
|
523
251
|
});
|
|
524
252
|
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
transportRoot,
|
|
538
|
-
'config',
|
|
539
|
-
p.actorName,
|
|
540
|
-
'(config)',
|
|
541
|
-
'(config)',
|
|
542
|
-
`actor profile load failed: ${(err as Error).message}`,
|
|
543
|
-
);
|
|
544
|
-
log('dispatch_config_error', {
|
|
545
|
-
actor: p.actorName,
|
|
546
|
-
dlq_id: r.id,
|
|
547
|
-
attempts: r.attempts,
|
|
548
|
-
quarantined: r.quarantined,
|
|
549
|
-
});
|
|
550
|
-
return false;
|
|
551
|
-
}
|
|
552
|
-
|
|
553
|
-
const systemPrompt = composeSystemPrompt(profile.systemPrompt);
|
|
554
|
-
const userMessage = formatBatchedUserMessage(p.msgs);
|
|
555
|
-
const result = await invokeCli(cli, systemPrompt, userMessage, p.actorName);
|
|
253
|
+
const result = await invokeCli(
|
|
254
|
+
cli,
|
|
255
|
+
composeSystemPrompt(profile.systemPrompt),
|
|
256
|
+
formatBatchedUserMessage(p.msgs),
|
|
257
|
+
{
|
|
258
|
+
CROSSTALK_DISPATCH_ACTOR: p.actorName,
|
|
259
|
+
CROSSTALK_DISPATCH_CHANNEL: p.channelUuid,
|
|
260
|
+
// Every relPath in the batch — `crosstalk send` records them all as
|
|
261
|
+
// the reply's re: list, so batching never loses an answered message.
|
|
262
|
+
CROSSTALK_DISPATCH_RE: p.msgs.map((m) => m.relPath).join(','),
|
|
263
|
+
},
|
|
264
|
+
);
|
|
556
265
|
|
|
557
266
|
if (result.status !== 0) {
|
|
558
267
|
const r = writeDlqEntry(
|
|
559
268
|
transportRoot,
|
|
560
|
-
'dispatch',
|
|
561
269
|
p.actorName,
|
|
562
270
|
p.channelUuid,
|
|
563
271
|
lastMsg.relPath,
|
|
@@ -577,45 +285,26 @@ async function dispatchOne(p: PendingDispatch): Promise<boolean> {
|
|
|
577
285
|
|
|
578
286
|
const reply = result.stdout.trim();
|
|
579
287
|
if (reply.length === 0) {
|
|
580
|
-
//
|
|
581
|
-
//
|
|
582
|
-
//
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
if (p.msgs.length > 1) {
|
|
586
|
-
log('dispatch_batch_silent_ok', {
|
|
587
|
-
actor: p.actorName,
|
|
588
|
-
channel: p.channelUuid.slice(0, 8),
|
|
589
|
-
batch_size: p.msgs.length,
|
|
590
|
-
});
|
|
591
|
-
return true;
|
|
592
|
-
}
|
|
593
|
-
const r = writeDlqEntry(
|
|
594
|
-
transportRoot,
|
|
595
|
-
'dispatch',
|
|
596
|
-
p.actorName,
|
|
597
|
-
p.channelUuid,
|
|
598
|
-
lastMsg.relPath,
|
|
599
|
-
'cli returned empty reply',
|
|
600
|
-
);
|
|
601
|
-
log('dispatch_empty_reply', {
|
|
602
|
-
actor: p.actorName,
|
|
603
|
-
channel: p.channelUuid.slice(0, 8),
|
|
604
|
-
dlq_id: r.id,
|
|
605
|
-
attempts: r.attempts,
|
|
606
|
-
quarantined: r.quarantined,
|
|
607
|
-
});
|
|
608
|
-
return false;
|
|
288
|
+
// Legitimate: the actor routed its answer via `crosstalk send` (which
|
|
289
|
+
// auto-links re:). If it truly did nothing, the asker's `crosstalk
|
|
290
|
+
// replies` stays PENDING — visible, not silently lost.
|
|
291
|
+
log('dispatch_silent', { actor: p.actorName, channel: p.channelUuid.slice(0, 8), batch_size: p.msgs.length });
|
|
292
|
+
return true;
|
|
609
293
|
}
|
|
610
294
|
|
|
611
|
-
//
|
|
612
|
-
//
|
|
613
|
-
//
|
|
614
|
-
const
|
|
615
|
-
const
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
295
|
+
// One reply per distinct sender, re:-linked to EVERY message that sender
|
|
296
|
+
// had in the batch — the asker's activation rule fires, and `crosstalk
|
|
297
|
+
// replies` sees each individual message as answered.
|
|
298
|
+
const bySender = new Map<string, string[]>();
|
|
299
|
+
for (const m of p.msgs) {
|
|
300
|
+
const sender = messageSender(m);
|
|
301
|
+
bySender.set(sender, [...(bySender.get(sender) ?? []), m.relPath]);
|
|
302
|
+
}
|
|
303
|
+
bySender.delete('unknown');
|
|
304
|
+
if (bySender.size === 0) bySender.set(messageSender(firstMsg), [firstMsg.relPath]);
|
|
305
|
+
for (const [sender, relPaths] of bySender) {
|
|
306
|
+
writeReply(p.channelUuid, p.actorName, sender, relPaths.length === 1 ? relPaths[0]! : relPaths, reply);
|
|
307
|
+
}
|
|
619
308
|
return true;
|
|
620
309
|
}
|
|
621
310
|
|
|
@@ -625,278 +314,180 @@ interface TickResult {
|
|
|
625
314
|
}
|
|
626
315
|
|
|
627
316
|
async function dispatchTick(): Promise<TickResult> {
|
|
628
|
-
writeHeartbeat();
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
// gives operators full diagnostic info via stdout/json logs.
|
|
641
|
-
log('git_pull_failed', { error: pullResult.error.slice(0, 200) });
|
|
642
|
-
infraOk = false;
|
|
643
|
-
}
|
|
644
|
-
|
|
645
|
-
let host: HostFile;
|
|
646
|
-
try {
|
|
647
|
-
host = findHostFile(transportRoot, hostOverride);
|
|
648
|
-
} catch (err) {
|
|
649
|
-
const r = writeDlqEntry(
|
|
650
|
-
transportRoot,
|
|
651
|
-
'config',
|
|
652
|
-
'(host)',
|
|
653
|
-
'(config)',
|
|
654
|
-
'(config)',
|
|
655
|
-
`host file load failed: ${(err as Error).message}`,
|
|
656
|
-
);
|
|
657
|
-
log('tick_config_error', {
|
|
658
|
-
scope: 'host',
|
|
659
|
-
dlq_id: r.id,
|
|
660
|
-
attempts: r.attempts,
|
|
661
|
-
quarantined: r.quarantined,
|
|
662
|
-
});
|
|
663
|
-
return { didWork: false, infraOk };
|
|
664
|
-
}
|
|
665
|
-
|
|
666
|
-
let didWork = false;
|
|
317
|
+
writeHeartbeat(transportRoot, RUNTIME_VERSION);
|
|
318
|
+
let infraOk = true;
|
|
319
|
+
|
|
320
|
+
const pullResult = gitPull(transportRoot);
|
|
321
|
+
if (!pullResult.ok) {
|
|
322
|
+
// Skip the whole tick: a failed pull can leave origin/HEAD (the cursor
|
|
323
|
+
// baseline) ahead of the working tree, and scanning against that would
|
|
324
|
+
// advance cursors past messages that never materialized.
|
|
325
|
+
logError(transportRoot, 'git_pull', pullResult.error ?? 'unknown');
|
|
326
|
+
log('git_pull_failed', { error: (pullResult.error ?? '').slice(0, 200) });
|
|
327
|
+
return { didWork: false, infraOk: false };
|
|
328
|
+
}
|
|
667
329
|
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
330
|
+
let host: HostFile;
|
|
331
|
+
try {
|
|
332
|
+
host = findHostFile(transportRoot, hostOverride);
|
|
333
|
+
} catch (err) {
|
|
334
|
+
logConfigError('host', (err as Error).message);
|
|
335
|
+
return { didWork: false, infraOk };
|
|
336
|
+
}
|
|
673
337
|
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
// surfaced rather than dropped without trace. See concierge.md
|
|
713
|
-
// "Host-aware routing" + PROTOCOL.md.
|
|
714
|
-
const routing = matchHostRouting(to, actorName, host.alias);
|
|
715
|
-
if (!routing.addressed || from === actorName || msgType === 'read') {
|
|
716
|
-
if (routing.wrongHost) {
|
|
717
|
-
log('host_routing_mismatch', {
|
|
718
|
-
actor: actorName,
|
|
719
|
-
this_host: host.alias,
|
|
720
|
-
channel: channelUuid.slice(0, 8),
|
|
721
|
-
msg: msg.relPath,
|
|
722
|
-
to,
|
|
723
|
-
});
|
|
724
|
-
}
|
|
725
|
-
writeCursor(transportRoot, actorName, channelUuid, msg.relPath);
|
|
726
|
-
continue;
|
|
727
|
-
}
|
|
728
|
-
// Lifecycle activation rule. `work` always wakes. `result` wakes
|
|
729
|
-
// only if reply-causal — actor previously sent the sender a `work`
|
|
730
|
-
// in this channel. The kind used here is the runtime's INFERRED
|
|
731
|
-
// effective kind, not the actor's declared kind: a message that's
|
|
732
|
-
// causally a reply is treated as `result` even when an actor (or
|
|
733
|
-
// `crosstalk send`'s default) labelled it `work`, so a fan-in peer
|
|
734
|
-
// mislabeling its reply can't forge a wake-up loop. See PROTOCOL.md
|
|
735
|
-
// "Message kinds".
|
|
736
|
-
const kind = effectiveKind(messages, msg);
|
|
737
|
-
if (kind === 'result' && !hasPriorWork(messages, actorName, from, msg.relPath)) {
|
|
738
|
-
writeCursor(transportRoot, actorName, channelUuid, msg.relPath);
|
|
739
|
-
continue;
|
|
338
|
+
// Cursors are commit hashes, not relPaths: filenames order by sender
|
|
339
|
+
// timestamp but arrive in push order, so a relPath cursor can advance
|
|
340
|
+
// past a slower writer's earlier-stamped message and lose it forever.
|
|
341
|
+
// "New since cursor" is asked of git, which records arrival truthfully.
|
|
342
|
+
const head = cursorBaseline(transportRoot);
|
|
343
|
+
if (!head) {
|
|
344
|
+
logError(transportRoot, 'other', 'git rev-parse failed for origin/HEAD and HEAD — skipping tick');
|
|
345
|
+
return { didWork: false, infraOk: false };
|
|
346
|
+
}
|
|
347
|
+
// diff results keyed by cursor commit (shared across actors on the same
|
|
348
|
+
// cursor); null = commit unknown to this clone -> full re-scan.
|
|
349
|
+
const addedSince = new Map<string, Set<string> | null>();
|
|
350
|
+
|
|
351
|
+
let didWork = false;
|
|
352
|
+
const channels = discoverChannels(transportRoot);
|
|
353
|
+
|
|
354
|
+
for (const actorName of Object.keys(host.actors)) {
|
|
355
|
+
const tiers = host.actors[actorName]!;
|
|
356
|
+
const concurrency = actorConcurrency(tiers);
|
|
357
|
+
const pending: PendingDispatch[] = [];
|
|
358
|
+
|
|
359
|
+
for (const channelUuid of channels) {
|
|
360
|
+
const cursor = readCursor(transportRoot, actorName, channelUuid);
|
|
361
|
+
if (cursor === head) continue;
|
|
362
|
+
|
|
363
|
+
const messages = listChannelMessages(transportRoot, channelUuid);
|
|
364
|
+
const senderByRelPath = new Map(messages.map((m) => [m.relPath, messageSender(m)]));
|
|
365
|
+
const senderOf = (relPath: string) => senderByRelPath.get(relPath);
|
|
366
|
+
|
|
367
|
+
let post = messages;
|
|
368
|
+
if (cursor) {
|
|
369
|
+
let added = addedSince.get(cursor);
|
|
370
|
+
if (added === undefined) {
|
|
371
|
+
const files = newFilesSince(transportRoot, cursor);
|
|
372
|
+
added = files === null ? null : new Set(files);
|
|
373
|
+
addedSince.set(cursor, added);
|
|
374
|
+
if (added === null) {
|
|
375
|
+
logError(transportRoot, 'other', `cursor commit ${cursor.slice(0, 12)} unknown to this clone — full channel re-scan`);
|
|
740
376
|
}
|
|
741
|
-
channelBatch.push(msg);
|
|
742
377
|
}
|
|
743
|
-
if (
|
|
744
|
-
const
|
|
745
|
-
|
|
746
|
-
pending.push({ actorName, channelUuid, msgs: g, tiers });
|
|
747
|
-
}
|
|
378
|
+
if (added !== null) {
|
|
379
|
+
const prefix = `data/channels/${channelUuid}/`;
|
|
380
|
+
post = messages.filter((m) => added.has(prefix + m.relPath));
|
|
748
381
|
}
|
|
749
382
|
}
|
|
383
|
+
if (post.length === 0) {
|
|
384
|
+
writeCursor(transportRoot, actorName, channelUuid, head);
|
|
385
|
+
continue;
|
|
386
|
+
}
|
|
750
387
|
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
388
|
+
const batch: ChannelMessage[] = [];
|
|
389
|
+
for (const msg of post) {
|
|
390
|
+
if (msg.data['type'] !== 'text') continue;
|
|
391
|
+
const decision = decideWake(
|
|
392
|
+
{
|
|
393
|
+
from: messageSender(msg),
|
|
394
|
+
to: recipients(msg.data['to']),
|
|
395
|
+
re: reList(msg.data['re']),
|
|
396
|
+
},
|
|
397
|
+
actorName,
|
|
398
|
+
host.alias,
|
|
399
|
+
senderOf,
|
|
400
|
+
);
|
|
401
|
+
if (decision === 'wake') {
|
|
402
|
+
batch.push(msg);
|
|
403
|
+
} else if (decision === 'wrong-host') {
|
|
404
|
+
log('host_routing_mismatch', {
|
|
405
|
+
actor: actorName,
|
|
406
|
+
this_host: host.alias,
|
|
407
|
+
channel: channelUuid.slice(0, 8),
|
|
408
|
+
msg: msg.relPath,
|
|
409
|
+
to: recipients(msg.data['to']),
|
|
410
|
+
});
|
|
764
411
|
}
|
|
765
412
|
}
|
|
413
|
+
|
|
414
|
+
if (batch.length === 0) {
|
|
415
|
+
writeCursor(transportRoot, actorName, channelUuid, head);
|
|
416
|
+
continue;
|
|
417
|
+
}
|
|
418
|
+
for (const g of splitForConcurrency(batch, concurrency)) {
|
|
419
|
+
pending.push({ actorName, channelUuid, msgs: g, tiers });
|
|
420
|
+
}
|
|
766
421
|
}
|
|
767
422
|
|
|
768
|
-
//
|
|
769
|
-
//
|
|
770
|
-
//
|
|
771
|
-
//
|
|
772
|
-
//
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
: `dispatch: cursor advance ${new Date().toISOString()}`;
|
|
778
|
-
const pushResult = gitCommitAndPush(transportRoot, commitMsg);
|
|
779
|
-
if (!pushResult.ok && pushResult.error) {
|
|
780
|
-
// Same rationale as the pull case above: no writeErrorLog.
|
|
781
|
-
// Repeated push failures shouldn't flood errors/ since that
|
|
782
|
-
// contributes to the same git-deadlock-feedback that pull does.
|
|
783
|
-
const kind = pushResult.committed ? 'git_push' : 'git_commit';
|
|
784
|
-
log('git_push_failed', {
|
|
785
|
-
kind,
|
|
786
|
-
committed_locally: pushResult.committed,
|
|
787
|
-
error: pushResult.error.slice(0, 200),
|
|
788
|
-
});
|
|
789
|
-
infraOk = false;
|
|
423
|
+
// Waves of `concurrency` parallel CLI invocations. The cursor advances
|
|
424
|
+
// to the scanned commit whether each batch succeeded or DLQ'd —
|
|
425
|
+
// at-least-once was attempted; `crosstalk dlq --retry` rewinds the
|
|
426
|
+
// cursor explicitly. A crash mid-wave leaves the cursor behind, so the
|
|
427
|
+
// whole span replays next tick (at-least-once, never lost).
|
|
428
|
+
for (let i = 0; i < pending.length; i += concurrency) {
|
|
429
|
+
const wave = pending.slice(i, i + concurrency);
|
|
430
|
+
const results = await Promise.all(wave.map((p) => dispatchOne(p)));
|
|
431
|
+
if (results.some(Boolean)) didWork = true;
|
|
790
432
|
}
|
|
433
|
+
for (const p of pending) {
|
|
434
|
+
writeCursor(transportRoot, p.actorName, p.channelUuid, head);
|
|
435
|
+
}
|
|
436
|
+
}
|
|
791
437
|
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
}
|
|
438
|
+
if (didWork) {
|
|
439
|
+
const pushResult = await withLock(transportRoot, 'git', async () =>
|
|
440
|
+
gitCommitAndPush(transportRoot, `dispatch: replies ${new Date().toISOString()}`),
|
|
441
|
+
);
|
|
442
|
+
if (!pushResult.ok && pushResult.error) {
|
|
443
|
+
logError(transportRoot, pushResult.committed ? 'git_push' : 'git_commit', pushResult.error);
|
|
444
|
+
log('git_push_failed', { committed_locally: pushResult.committed, error: pushResult.error.slice(0, 200) });
|
|
445
|
+
infraOk = false;
|
|
799
446
|
}
|
|
447
|
+
}
|
|
800
448
|
|
|
801
|
-
|
|
802
|
-
});
|
|
449
|
+
return { didWork, infraOk };
|
|
803
450
|
}
|
|
804
451
|
|
|
805
|
-
async function waitForWakeOrTimeout(ms: number): Promise<
|
|
806
|
-
const
|
|
807
|
-
mkdirSync(wakeDir, { recursive: true });
|
|
452
|
+
async function waitForWakeOrTimeout(ms: number): Promise<void> {
|
|
453
|
+
const dir = stateDir(transportRoot);
|
|
808
454
|
const ac = new AbortController();
|
|
809
455
|
const timer = setTimeout(() => ac.abort(), ms);
|
|
810
456
|
try {
|
|
811
|
-
const watcher = watch(
|
|
457
|
+
const watcher = watch(dir, { signal: ac.signal });
|
|
812
458
|
for await (const ev of watcher) {
|
|
813
|
-
if (ev.filename === 'wake.signal')
|
|
814
|
-
clearTimeout(timer);
|
|
815
|
-
return 'wake';
|
|
816
|
-
}
|
|
459
|
+
if (ev.filename === 'wake.signal') return;
|
|
817
460
|
}
|
|
818
|
-
return 'timeout';
|
|
819
461
|
} catch {
|
|
820
|
-
|
|
462
|
+
/* abort = timeout */
|
|
821
463
|
} finally {
|
|
822
464
|
clearTimeout(timer);
|
|
823
465
|
}
|
|
824
466
|
}
|
|
825
467
|
|
|
826
468
|
async function main(): Promise<void> {
|
|
827
|
-
log('dispatch_start', {
|
|
828
|
-
transport: transportRoot,
|
|
829
|
-
version: RUNTIME_VERSION,
|
|
830
|
-
log_file: logFile ?? null,
|
|
831
|
-
});
|
|
469
|
+
log('dispatch_start', { transport: transportRoot, version: RUNTIME_VERSION, state_dir: stateDir(transportRoot) });
|
|
832
470
|
if (onceMode) {
|
|
833
471
|
await dispatchTick();
|
|
834
|
-
|
|
472
|
+
process.exit(0);
|
|
835
473
|
}
|
|
836
|
-
log('
|
|
474
|
+
log('dispatch_running', { quiet_poll_s: pollSeconds });
|
|
837
475
|
|
|
838
476
|
let consecutiveInfraFailures = 0;
|
|
839
|
-
|
|
840
477
|
while (true) {
|
|
841
478
|
try {
|
|
842
479
|
const r = await dispatchTick();
|
|
843
480
|
if (r.infraOk) {
|
|
844
|
-
if (consecutiveInfraFailures > 0) {
|
|
845
|
-
log('backoff_cleared', { previous_consecutive_failures: consecutiveInfraFailures });
|
|
846
|
-
}
|
|
481
|
+
if (consecutiveInfraFailures > 0) log('backoff_cleared', { previous_failures: consecutiveInfraFailures });
|
|
847
482
|
consecutiveInfraFailures = 0;
|
|
848
483
|
} else {
|
|
849
484
|
consecutiveInfraFailures++;
|
|
850
485
|
}
|
|
851
|
-
|
|
852
|
-
// Backoff kicks in only after a grace period of failures.
|
|
853
486
|
const beyondGrace = Math.max(0, consecutiveInfraFailures - BACKOFF_GRACE);
|
|
854
487
|
const backoffFactor = Math.min(MAX_BACKOFF_MULTIPLIER, 2 ** beyondGrace);
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
log('backoff_active', {
|
|
858
|
-
consecutive_failures: consecutiveInfraFailures,
|
|
859
|
-
factor: backoffFactor,
|
|
860
|
-
});
|
|
861
|
-
}
|
|
862
|
-
|
|
863
|
-
// Per-tick heal: deadlock-break when the dispatch loop has been
|
|
864
|
-
// failing for HEAL_THRESHOLD consecutive ticks AND we haven't healed
|
|
865
|
-
// recently. Hard-resets the working tree to origin/<current branch>.
|
|
866
|
-
// Trades any uncommitted local state for forward progress — acceptable
|
|
867
|
-
// because messages/cursors/dlq are pulled back from origin and
|
|
868
|
-
// .turnq/errors are regenerated.
|
|
869
|
-
if (
|
|
870
|
-
consecutiveInfraFailures >= HEAL_THRESHOLD &&
|
|
871
|
-
consecutiveInfraFailures - lastHealAtFailureCount >= HEAL_THRESHOLD
|
|
872
|
-
) {
|
|
873
|
-
try {
|
|
874
|
-
const branchProc = spawn('git', ['rev-parse', '--abbrev-ref', 'HEAD'], {
|
|
875
|
-
cwd: transportRoot,
|
|
876
|
-
stdio: ['ignore', 'pipe', 'ignore'],
|
|
877
|
-
});
|
|
878
|
-
let branchName = '';
|
|
879
|
-
branchProc.stdout.on('data', (d) => { branchName += d.toString(); });
|
|
880
|
-
await new Promise<void>((res) => branchProc.on('close', () => res()));
|
|
881
|
-
const branch = branchName.trim() || 'main';
|
|
882
|
-
log('per_tick_heal_start', {
|
|
883
|
-
consecutive_failures: consecutiveInfraFailures,
|
|
884
|
-
target: `origin/${branch}`,
|
|
885
|
-
});
|
|
886
|
-
await new Promise<void>((res) => {
|
|
887
|
-
const p = spawn('sh', [
|
|
888
|
-
'-c',
|
|
889
|
-
`git rebase --abort 2>/dev/null; git fetch --quiet origin '${branch}' && git reset --hard --quiet 'origin/${branch}' && git clean -fdq`,
|
|
890
|
-
], { cwd: transportRoot, stdio: 'inherit' });
|
|
891
|
-
p.on('close', () => res());
|
|
892
|
-
});
|
|
893
|
-
log('per_tick_heal_done', { target: `origin/${branch}` });
|
|
894
|
-
lastHealAtFailureCount = consecutiveInfraFailures;
|
|
895
|
-
} catch (err) {
|
|
896
|
-
log('per_tick_heal_failed', { error: (err as Error).message });
|
|
897
|
-
}
|
|
488
|
+
if (backoffFactor > 1) {
|
|
489
|
+
log('backoff_active', { consecutive_failures: consecutiveInfraFailures, factor: backoffFactor });
|
|
898
490
|
}
|
|
899
|
-
|
|
900
491
|
if (r.didWork) {
|
|
901
492
|
await new Promise((res) => setTimeout(res, 1_000 * backoffFactor));
|
|
902
493
|
} else {
|
|
@@ -904,7 +495,7 @@ async function main(): Promise<void> {
|
|
|
904
495
|
}
|
|
905
496
|
} catch (err) {
|
|
906
497
|
const msg = (err as Error).message;
|
|
907
|
-
|
|
498
|
+
logError(transportRoot, 'other', `tick error: ${msg}`);
|
|
908
499
|
log('tick_error', { message: msg });
|
|
909
500
|
consecutiveInfraFailures++;
|
|
910
501
|
await new Promise((res) => setTimeout(res, pollSeconds * 1_000));
|