@cordfuse/crosstalk 5.0.0-alpha.3 → 5.0.0-alpha.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/chat.ts +6 -4
- package/src/dispatch.ts +77 -8
- package/src/send.ts +13 -10
- package/src/transport.ts +29 -2
- package/template/upstream/PROTOCOL.md +14 -14
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@cordfuse/crosstalk",
|
|
3
|
-
"version": "5.0.0-alpha.
|
|
3
|
+
"version": "5.0.0-alpha.5",
|
|
4
4
|
"description": "Crosstalk runtime — async messaging between agents over git. The crosstalk CLI plus dispatch, send, attach, chat, and supporting tools.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|
package/src/chat.ts
CHANGED
|
@@ -25,7 +25,7 @@ import { createInterface } from 'readline/promises';
|
|
|
25
25
|
import { spawnSync } from 'child_process';
|
|
26
26
|
import { now, messageFilename } from './filenames.js';
|
|
27
27
|
import { serializeFrontmatter, parseFrontmatter } from './frontmatter.js';
|
|
28
|
-
import { gitCommitAndPush
|
|
28
|
+
import { gitCommitAndPush } from './transport.js';
|
|
29
29
|
import { withLock } from './turnq.js';
|
|
30
30
|
|
|
31
31
|
const transportRoot = resolve(process.cwd());
|
|
@@ -132,9 +132,11 @@ async function sendMessage(body: string): Promise<void> {
|
|
|
132
132
|
`chat: ${fromName} -> ${toActor} in ${channelUuid!.slice(0, 8)}`,
|
|
133
133
|
);
|
|
134
134
|
if (!r.ok && r.error) {
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
135
|
+
// Same anti-pattern as send.ts: writing to errors/ from an operator-
|
|
136
|
+
// side command dirties the working tree and breaks subsequent
|
|
137
|
+
// git pull --rebase. Stay on stderr only.
|
|
138
|
+
const kind = r.committed ? 'push' : 'commit';
|
|
139
|
+
console.error(`(${kind} failed: ${r.error.slice(0, 200)} — message is local-only)`);
|
|
138
140
|
}
|
|
139
141
|
});
|
|
140
142
|
}
|
package/src/dispatch.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { resolve, join } from 'path';
|
|
1
|
+
import { resolve, join, dirname } from 'path';
|
|
2
2
|
import { spawn } from 'child_process';
|
|
3
3
|
import {
|
|
4
4
|
mkdirSync,
|
|
@@ -10,6 +10,21 @@ import {
|
|
|
10
10
|
closeSync,
|
|
11
11
|
} from 'fs';
|
|
12
12
|
import { watch } from 'fs/promises';
|
|
13
|
+
import { fileURLToPath } from 'url';
|
|
14
|
+
|
|
15
|
+
// Read runtime version from the installed package's package.json at startup
|
|
16
|
+
// so dispatch_start logs and heartbeat content always match the actual
|
|
17
|
+
// installed @cordfuse/crosstalk version. Avoids hand-editing on every release.
|
|
18
|
+
const RUNTIME_VERSION: string = (() => {
|
|
19
|
+
try {
|
|
20
|
+
const thisFileDir = dirname(fileURLToPath(import.meta.url));
|
|
21
|
+
const pkgPath = join(thisFileDir, '..', 'package.json');
|
|
22
|
+
const pkg = JSON.parse(readFileSync(pkgPath, 'utf-8')) as { version?: string };
|
|
23
|
+
return pkg.version ?? 'unknown';
|
|
24
|
+
} catch {
|
|
25
|
+
return 'unknown';
|
|
26
|
+
}
|
|
27
|
+
})();
|
|
13
28
|
import {
|
|
14
29
|
findHostFile,
|
|
15
30
|
loadActorProfile,
|
|
@@ -53,6 +68,16 @@ const logFile = flag('--log-file');
|
|
|
53
68
|
const MAX_BACKOFF_MULTIPLIER = 10; // cap: pollSeconds * 10
|
|
54
69
|
const BACKOFF_GRACE = 2; // first N failures don't trigger backoff
|
|
55
70
|
|
|
71
|
+
// Per-tick heal: when N consecutive infra failures pile up, the dispatch
|
|
72
|
+
// loop is stuck in a deadlock that entrypoint's boot-time auto-recovery
|
|
73
|
+
// can't break (because dispatch is already running). At HEAL_THRESHOLD
|
|
74
|
+
// consecutive failures, attempt a `git fetch && reset --hard origin/<branch>
|
|
75
|
+
// && clean -fd` from inside the tick loop. Mirrors the entrypoint logic.
|
|
76
|
+
// Throttled — won't reattempt until fully BACKOFF_GRACE+HEAL_THRESHOLD more
|
|
77
|
+
// failures pile up after a heal, to avoid heal-loop-storms.
|
|
78
|
+
const HEAL_THRESHOLD = 5;
|
|
79
|
+
let lastHealAtFailureCount = 0;
|
|
80
|
+
|
|
56
81
|
// Stale-read-receipt sweep config — runs at most every SWEEP_INTERVAL_MS
|
|
57
82
|
// of wall-clock to surface read receipts that never produced a reply
|
|
58
83
|
// (indicates dispatch crashed mid-tick or CLI hung silently).
|
|
@@ -80,7 +105,7 @@ function writeHeartbeat(): void {
|
|
|
80
105
|
try {
|
|
81
106
|
const dir = join(transportRoot, '.turnq');
|
|
82
107
|
mkdirSync(dir, { recursive: true });
|
|
83
|
-
const data = { ts: new Date().toISOString(), pid: process.pid, version:
|
|
108
|
+
const data = { ts: new Date().toISOString(), pid: process.pid, version: RUNTIME_VERSION };
|
|
84
109
|
writeFileSync(join(dir, 'heartbeat'), JSON.stringify(data) + '\n');
|
|
85
110
|
} catch { /* best-effort */ }
|
|
86
111
|
}
|
|
@@ -330,8 +355,13 @@ async function dispatchTick(): Promise<TickResult> {
|
|
|
330
355
|
|
|
331
356
|
const pullResult = gitPull(transportRoot);
|
|
332
357
|
if (!pullResult.ok && pullResult.error) {
|
|
333
|
-
|
|
334
|
-
|
|
358
|
+
// Note: deliberately NOT calling writeErrorLog here. Repeated pull
|
|
359
|
+
// failures (deadlock loop) would otherwise write a new errors/*.md
|
|
360
|
+
// every tick, which dispatch then has to commit, which the next
|
|
361
|
+
// pull then chokes on — a positive feedback that contributed to
|
|
362
|
+
// the alpha.3/alpha.4 Mac UAT wedge. The structured log line below
|
|
363
|
+
// gives operators full diagnostic info via stdout/json logs.
|
|
364
|
+
log('git_pull_failed', { error: pullResult.error.slice(0, 200) });
|
|
335
365
|
infraOk = false;
|
|
336
366
|
}
|
|
337
367
|
|
|
@@ -420,12 +450,14 @@ async function dispatchTick(): Promise<TickResult> {
|
|
|
420
450
|
: `dispatch: cursor advance ${new Date().toISOString()}`;
|
|
421
451
|
const pushResult = gitCommitAndPush(transportRoot, commitMsg);
|
|
422
452
|
if (!pushResult.ok && pushResult.error) {
|
|
453
|
+
// Same rationale as the pull case above: no writeErrorLog.
|
|
454
|
+
// Repeated push failures shouldn't flood errors/ since that
|
|
455
|
+
// contributes to the same git-deadlock-feedback that pull does.
|
|
423
456
|
const kind = pushResult.committed ? 'git_push' : 'git_commit';
|
|
424
|
-
const errId = writeErrorLog(transportRoot, kind, pushResult.error);
|
|
425
457
|
log('git_push_failed', {
|
|
426
|
-
|
|
458
|
+
kind,
|
|
427
459
|
committed_locally: pushResult.committed,
|
|
428
|
-
error: pushResult.error.slice(0,
|
|
460
|
+
error: pushResult.error.slice(0, 200),
|
|
429
461
|
});
|
|
430
462
|
infraOk = false;
|
|
431
463
|
}
|
|
@@ -467,7 +499,7 @@ async function waitForWakeOrTimeout(ms: number): Promise<'wake' | 'timeout'> {
|
|
|
467
499
|
async function main(): Promise<void> {
|
|
468
500
|
log('dispatch_start', {
|
|
469
501
|
transport: transportRoot,
|
|
470
|
-
version:
|
|
502
|
+
version: RUNTIME_VERSION,
|
|
471
503
|
log_file: logFile ?? null,
|
|
472
504
|
});
|
|
473
505
|
if (onceMode) {
|
|
@@ -501,6 +533,43 @@ async function main(): Promise<void> {
|
|
|
501
533
|
});
|
|
502
534
|
}
|
|
503
535
|
|
|
536
|
+
// Per-tick heal: deadlock-break when the dispatch loop has been
|
|
537
|
+
// failing for HEAL_THRESHOLD consecutive ticks AND we haven't healed
|
|
538
|
+
// recently. Hard-resets the working tree to origin/<current branch>.
|
|
539
|
+
// Trades any uncommitted local state for forward progress — acceptable
|
|
540
|
+
// because messages/cursors/dlq are pulled back from origin and
|
|
541
|
+
// .turnq/errors are regenerated.
|
|
542
|
+
if (
|
|
543
|
+
consecutiveInfraFailures >= HEAL_THRESHOLD &&
|
|
544
|
+
consecutiveInfraFailures - lastHealAtFailureCount >= HEAL_THRESHOLD
|
|
545
|
+
) {
|
|
546
|
+
try {
|
|
547
|
+
const branchProc = spawn('git', ['rev-parse', '--abbrev-ref', 'HEAD'], {
|
|
548
|
+
cwd: transportRoot,
|
|
549
|
+
stdio: ['ignore', 'pipe', 'ignore'],
|
|
550
|
+
});
|
|
551
|
+
let branchName = '';
|
|
552
|
+
branchProc.stdout.on('data', (d) => { branchName += d.toString(); });
|
|
553
|
+
await new Promise<void>((res) => branchProc.on('close', () => res()));
|
|
554
|
+
const branch = branchName.trim() || 'main';
|
|
555
|
+
log('per_tick_heal_start', {
|
|
556
|
+
consecutive_failures: consecutiveInfraFailures,
|
|
557
|
+
target: `origin/${branch}`,
|
|
558
|
+
});
|
|
559
|
+
await new Promise<void>((res) => {
|
|
560
|
+
const p = spawn('sh', [
|
|
561
|
+
'-c',
|
|
562
|
+
`git rebase --abort 2>/dev/null; git fetch --quiet origin '${branch}' && git reset --hard --quiet 'origin/${branch}' && git clean -fdq`,
|
|
563
|
+
], { cwd: transportRoot, stdio: 'inherit' });
|
|
564
|
+
p.on('close', () => res());
|
|
565
|
+
});
|
|
566
|
+
log('per_tick_heal_done', { target: `origin/${branch}` });
|
|
567
|
+
lastHealAtFailureCount = consecutiveInfraFailures;
|
|
568
|
+
} catch (err) {
|
|
569
|
+
log('per_tick_heal_failed', { error: (err as Error).message });
|
|
570
|
+
}
|
|
571
|
+
}
|
|
572
|
+
|
|
504
573
|
if (r.didWork) {
|
|
505
574
|
await new Promise((res) => setTimeout(res, 1_000 * backoffFactor));
|
|
506
575
|
} else {
|
package/src/send.ts
CHANGED
|
@@ -2,7 +2,7 @@ import { resolve, join } from 'path';
|
|
|
2
2
|
import { mkdirSync, writeFileSync } from 'fs';
|
|
3
3
|
import { now, messageFilename } from './filenames.js';
|
|
4
4
|
import { serializeFrontmatter } from './frontmatter.js';
|
|
5
|
-
import { gitCommitAndPush
|
|
5
|
+
import { gitCommitAndPush } from './transport.js';
|
|
6
6
|
import { withLock } from './turnq.js';
|
|
7
7
|
|
|
8
8
|
const transportRoot = resolve(process.cwd());
|
|
@@ -60,16 +60,19 @@ async function main(): Promise<void> {
|
|
|
60
60
|
}
|
|
61
61
|
|
|
62
62
|
if (!pushResult.ok && pushResult.error) {
|
|
63
|
-
|
|
64
|
-
|
|
63
|
+
// Note: deliberately NOT writing to errors/. That directory is dispatcher-
|
|
64
|
+
// owned state, and operator-side writes from `crosstalk send` were
|
|
65
|
+
// dirtying the working tree, causing subsequent `git pull --rebase` to
|
|
66
|
+
// fail with "unstaged changes". Surface the error to stderr only.
|
|
67
|
+
const kind = pushResult.committed ? 'push' : 'commit';
|
|
65
68
|
console.error(`Wrote locally: ${join(ts.pathDate, filename)}`);
|
|
66
|
-
console.error(
|
|
67
|
-
|
|
68
|
-
);
|
|
69
|
-
console.error(
|
|
70
|
-
console.error(
|
|
71
|
-
|
|
72
|
-
);
|
|
69
|
+
console.error(`but git ${kind} FAILED:`);
|
|
70
|
+
console.error(` ${pushResult.error.slice(0, 300)}`);
|
|
71
|
+
console.error('');
|
|
72
|
+
console.error('Your message is in your local clone but not on origin.');
|
|
73
|
+
console.error('Recover with:');
|
|
74
|
+
console.error(' git pull --rebase');
|
|
75
|
+
console.error(' git push');
|
|
73
76
|
process.exit(3);
|
|
74
77
|
}
|
|
75
78
|
|
package/src/transport.ts
CHANGED
|
@@ -70,13 +70,40 @@ export function gitCommitAndPush(transportRoot: string, message: string): GitPus
|
|
|
70
70
|
return { ok: true, committed: false, pushed: false };
|
|
71
71
|
}
|
|
72
72
|
|
|
73
|
-
|
|
74
|
-
|
|
73
|
+
// Stage everything EXCEPT .turnq/ (machine-local runtime state; commits of
|
|
74
|
+
// this directory cause modify/delete conflicts the moment another clone
|
|
75
|
+
// untracks it via gitignore). Pathspec exclusion is independent of the
|
|
76
|
+
// transport's .gitignore — defensive against gitignore drift.
|
|
77
|
+
//
|
|
78
|
+
// Edge: `git add -A . :(exclude).turnq` exits non-zero with "The following
|
|
79
|
+
// paths are ignored by one of your .gitignore files: .turnq" because the
|
|
80
|
+
// exclude pathspec itself matches a gitignored path. The add still stages
|
|
81
|
+
// every other change correctly — only the exit code is misleading. So we
|
|
82
|
+
// treat that specific failure-pattern as benign and let the subsequent
|
|
83
|
+
// commit step decide whether anything actually got staged.
|
|
84
|
+
const add = captureGit(transportRoot, ['add', '-A', '.', ':(exclude).turnq']);
|
|
85
|
+
const addBenignIgnoredPath = add.status !== 0 &&
|
|
86
|
+
/paths are ignored/.test(add.stderr);
|
|
87
|
+
if (add.status !== 0 && !addBenignIgnoredPath) {
|
|
75
88
|
return { ok: false, committed: false, pushed: false, error: add.stderr.trim().slice(0, 500) };
|
|
76
89
|
}
|
|
77
90
|
|
|
91
|
+
// If .turnq/ was previously committed (pre-alpha.4 transport), the index
|
|
92
|
+
// may still hold tracked .turnq/* entries. Untrack them here so subsequent
|
|
93
|
+
// pulls don't fight with operator clones that have untracked .turnq/. This
|
|
94
|
+
// is a one-time-per-transport heal; on a clean transport it's a no-op.
|
|
95
|
+
const indexedTurnq = captureGit(transportRoot, ['ls-files', '.turnq']);
|
|
96
|
+
if (indexedTurnq.status === 0 && indexedTurnq.stdout.trim().length > 0) {
|
|
97
|
+
captureGit(transportRoot, ['rm', '-r', '--cached', '--quiet', '.turnq']);
|
|
98
|
+
}
|
|
99
|
+
|
|
78
100
|
const commit = captureGit(transportRoot, ['commit', '-m', message]);
|
|
79
101
|
if (commit.status !== 0) {
|
|
102
|
+
// Empty commit ("nothing to commit") is fine — the exclusion may have
|
|
103
|
+
// dropped the only change. Treat exit-1 with no error text as no-op.
|
|
104
|
+
const noop = commit.stdout.includes('nothing to commit') ||
|
|
105
|
+
commit.stderr.includes('nothing to commit');
|
|
106
|
+
if (noop) return { ok: true, committed: false, pushed: false };
|
|
80
107
|
return { ok: false, committed: false, pushed: false, error: commit.stderr.trim().slice(0, 500) };
|
|
81
108
|
}
|
|
82
109
|
|
|
@@ -54,13 +54,13 @@ If you are *authoring* an actor profile for a compute role, write the system pro
|
|
|
54
54
|
|
|
55
55
|
## Available tools
|
|
56
56
|
|
|
57
|
-
You have shell access. You can invoke these tools any time you decide they help with your reply. All of them run from the transport root (the current working directory). The tools are documented here so you can pick the right one from natural-language intent — e.g. "check what the dispatch state looks like" → `
|
|
57
|
+
You have shell access. You can invoke these tools any time you decide they help with your reply. All of them run from the transport root (the current working directory). The tools are documented here so you can pick the right one from natural-language intent — e.g. "check what the dispatch state looks like" → `crosstalk status`.
|
|
58
58
|
|
|
59
59
|
### `send` — initiate a message to another actor
|
|
60
60
|
|
|
61
61
|
Use this when you want to **proactively** message someone, not just reply to the prompt you're processing. (If you only want to reply to what you received, just answer — do not call `send`.)
|
|
62
62
|
|
|
63
|
-
|
|
63
|
+
crosstalk send --channel <channel-uuid> --to <actor> [--from <your-name>] [--tier <tier-name>] "<message body>"
|
|
64
64
|
|
|
65
65
|
`send` also pokes dispatch to tick immediately so the recipient sees the message right away.
|
|
66
66
|
|
|
@@ -70,13 +70,13 @@ Use this when you want to **proactively** message someone, not just reply to the
|
|
|
70
70
|
|
|
71
71
|
Use this to bypass the quiet-poll interval. Rarely needed manually — `send` already pokes dispatch automatically. Use this if you've directly written a message file and want dispatch to notice it now.
|
|
72
72
|
|
|
73
|
-
|
|
73
|
+
crosstalk wake
|
|
74
74
|
|
|
75
75
|
### `status` — inspect transport state
|
|
76
76
|
|
|
77
77
|
Use this when an operator asks "what's happening?" or before deciding whether to retry something.
|
|
78
78
|
|
|
79
|
-
|
|
79
|
+
crosstalk status
|
|
80
80
|
|
|
81
81
|
Outputs: host file summary, per-actor cursors, turnq lock state, channel list with message counts, DLQ entry count.
|
|
82
82
|
|
|
@@ -89,24 +89,24 @@ Use this when you want to inspect or retry failures. DLQ entries have one of two
|
|
|
89
89
|
|
|
90
90
|
Entries also carry an `attempts` count and a `quarantined: true|false` flag. If the same failure repeats 4+ times within an hour, the entry is quarantined: dispatch starts skipping that message (for `dispatch` kind) or that actor (for `config` kind). The retry command clears the quarantine and lets the next dispatch tick try again.
|
|
91
91
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
92
|
+
crosstalk dlq # same as --list
|
|
93
|
+
crosstalk dlq --list # list all DLQ entries (incl. quarantine markers + counts by kind)
|
|
94
|
+
crosstalk dlq --show <id> # show full details of one entry
|
|
95
|
+
crosstalk dlq --retry <id> # for dispatch: rewind cursor; for config: clear quarantine
|
|
96
|
+
crosstalk dlq --clear # delete all entries (destructive)
|
|
97
97
|
|
|
98
98
|
### `init` — scaffold a new transport
|
|
99
99
|
|
|
100
100
|
Use this only when an operator is setting up a fresh transport directory. Creates a default host file (for the current hostname), a `general` channel, and the empty `custom/actors/`, `cursors/`, and `dlq/` directories.
|
|
101
101
|
|
|
102
|
-
|
|
103
|
-
|
|
102
|
+
crosstalk init
|
|
103
|
+
crosstalk init --force # overwrite existing files
|
|
104
104
|
|
|
105
105
|
### `channel` — create a new channel or subchannel
|
|
106
106
|
|
|
107
107
|
Use this when you want to spin up a new conversation space — either a top-level channel or a focused subchannel of an existing one. Generates a UUID and writes `data/channels/<uuid>/CHANNEL.md`.
|
|
108
108
|
|
|
109
|
-
|
|
109
|
+
crosstalk channel --name <name> [--parent <parent-uuid>] [--created-by <name>]
|
|
110
110
|
|
|
111
111
|
Prints the new channel UUID. Use that UUID in subsequent `send` calls.
|
|
112
112
|
|
|
@@ -146,7 +146,7 @@ There are two persistent failure logs in the transport:
|
|
|
146
146
|
- **`dlq/`** — failed dispatches and config errors. Per-message and per-actor. Use the `dlq` tool to inspect/retry.
|
|
147
147
|
- **`errors/`** — infrastructure failures (git pull/push/commit, filesystem, message parse). Deduped by signature with a `count` field. If you see something not working as expected (replies aren't reaching origin, dispatch keeps reporting `skip_tick_locked`, etc.), check this directory — operator hostile state often surfaces here first.
|
|
148
148
|
|
|
149
|
-
`
|
|
149
|
+
`crosstalk status` shows counts for both at a glance, plus a **dispatch heartbeat** line — the timestamp of the most recent tick. If the heartbeat is fresh (under 2 min old), dispatch is running. If stale (over 5 min), dispatch has stopped or hung; check `errors/` and the process state.
|
|
150
150
|
|
|
151
151
|
**Persistent infrastructure failures trigger exponential backoff.** After 2+ consecutive ticks with failed git pull or push, dispatch doubles its poll interval each tick, capped at 10× the configured quiet poll. The `backoff_active` log event fires when active; `backoff_cleared` fires when a tick succeeds again.
|
|
152
152
|
|
|
@@ -177,4 +177,4 @@ For idempotent work (information lookup, calculation, advice), duplicates are ha
|
|
|
177
177
|
- **Do not modify `errors/` directly.** Same reasoning — entries are deduped by signature and the count field matters.
|
|
178
178
|
- **Do not modify `.turnq/`.** That holds turnq lock state.
|
|
179
179
|
- **Do not reply to messages addressed to other actors.** You only act on messages where the `to:` field includes your name.
|
|
180
|
-
- **Do not fabricate channel UUIDs.** Look at existing directories under `data/channels/` to find real ones — or run `
|
|
180
|
+
- **Do not fabricate channel UUIDs.** Look at existing directories under `data/channels/` to find real ones — or run `crosstalk status` to list them.
|