@cordfuse/crosstalk 5.0.0-alpha.3 → 5.0.0-alpha.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@cordfuse/crosstalk",
3
- "version": "5.0.0-alpha.3",
3
+ "version": "5.0.0-alpha.5",
4
4
  "description": "Crosstalk runtime — async messaging between agents over git. The crosstalk CLI plus dispatch, send, attach, chat, and supporting tools.",
5
5
  "type": "module",
6
6
  "license": "MIT",
package/src/chat.ts CHANGED
@@ -25,7 +25,7 @@ import { createInterface } from 'readline/promises';
25
25
  import { spawnSync } from 'child_process';
26
26
  import { now, messageFilename } from './filenames.js';
27
27
  import { serializeFrontmatter, parseFrontmatter } from './frontmatter.js';
28
- import { gitCommitAndPush, writeErrorLog } from './transport.js';
28
+ import { gitCommitAndPush } from './transport.js';
29
29
  import { withLock } from './turnq.js';
30
30
 
31
31
  const transportRoot = resolve(process.cwd());
@@ -132,9 +132,11 @@ async function sendMessage(body: string): Promise<void> {
132
132
  `chat: ${fromName} -> ${toActor} in ${channelUuid!.slice(0, 8)}`,
133
133
  );
134
134
  if (!r.ok && r.error) {
135
- const kind = r.committed ? 'git_push' : 'git_commit';
136
- writeErrorLog(transportRoot, kind, r.error);
137
- console.error(`(${kind} failed: ${r.error.slice(0, 120)} message is local-only)`);
135
+ // Same anti-pattern as send.ts: writing to errors/ from an operator-
136
+ // side command dirties the working tree and breaks subsequent
137
+ // git pull --rebase. Stay on stderr only.
138
+ const kind = r.committed ? 'push' : 'commit';
139
+ console.error(`(${kind} failed: ${r.error.slice(0, 200)} — message is local-only)`);
138
140
  }
139
141
  });
140
142
  }
package/src/dispatch.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { resolve, join } from 'path';
1
+ import { resolve, join, dirname } from 'path';
2
2
  import { spawn } from 'child_process';
3
3
  import {
4
4
  mkdirSync,
@@ -10,6 +10,21 @@ import {
10
10
  closeSync,
11
11
  } from 'fs';
12
12
  import { watch } from 'fs/promises';
13
+ import { fileURLToPath } from 'url';
14
+
15
+ // Read runtime version from the installed package's package.json at startup
16
+ // so dispatch_start logs and heartbeat content always match the actual
17
+ // installed @cordfuse/crosstalk version. Avoids hand-editing on every release.
18
+ const RUNTIME_VERSION: string = (() => {
19
+ try {
20
+ const thisFileDir = dirname(fileURLToPath(import.meta.url));
21
+ const pkgPath = join(thisFileDir, '..', 'package.json');
22
+ const pkg = JSON.parse(readFileSync(pkgPath, 'utf-8')) as { version?: string };
23
+ return pkg.version ?? 'unknown';
24
+ } catch {
25
+ return 'unknown';
26
+ }
27
+ })();
13
28
  import {
14
29
  findHostFile,
15
30
  loadActorProfile,
@@ -53,6 +68,16 @@ const logFile = flag('--log-file');
53
68
  const MAX_BACKOFF_MULTIPLIER = 10; // cap: pollSeconds * 10
54
69
  const BACKOFF_GRACE = 2; // first N failures don't trigger backoff
55
70
 
71
+ // Per-tick heal: when N consecutive infra failures pile up, the dispatch
72
+ // loop is stuck in a deadlock that entrypoint's boot-time auto-recovery
73
+ // can't break (because dispatch is already running). At HEAL_THRESHOLD
74
+ // consecutive failures, attempt a `git fetch && reset --hard origin/<branch>
75
+ // && clean -fd` from inside the tick loop. Mirrors the entrypoint logic.
76
+ // Throttled — won't reattempt until fully BACKOFF_GRACE+HEAL_THRESHOLD more
77
+ // failures pile up after a heal, to avoid heal-loop-storms.
78
+ const HEAL_THRESHOLD = 5;
79
+ let lastHealAtFailureCount = 0;
80
+
56
81
  // Stale-read-receipt sweep config — runs at most every SWEEP_INTERVAL_MS
57
82
  // of wall-clock to surface read receipts that never produced a reply
58
83
  // (indicates dispatch crashed mid-tick or CLI hung silently).
@@ -80,7 +105,7 @@ function writeHeartbeat(): void {
80
105
  try {
81
106
  const dir = join(transportRoot, '.turnq');
82
107
  mkdirSync(dir, { recursive: true });
83
- const data = { ts: new Date().toISOString(), pid: process.pid, version: '5.0.0-alpha.3' };
108
+ const data = { ts: new Date().toISOString(), pid: process.pid, version: RUNTIME_VERSION };
84
109
  writeFileSync(join(dir, 'heartbeat'), JSON.stringify(data) + '\n');
85
110
  } catch { /* best-effort */ }
86
111
  }
@@ -330,8 +355,13 @@ async function dispatchTick(): Promise<TickResult> {
330
355
 
331
356
  const pullResult = gitPull(transportRoot);
332
357
  if (!pullResult.ok && pullResult.error) {
333
- const errId = writeErrorLog(transportRoot, 'git_pull', pullResult.error);
334
- log('git_pull_failed', { error_id: errId, error: pullResult.error.slice(0, 120) });
358
+ // Note: deliberately NOT calling writeErrorLog here. Repeated pull
359
+ // failures (deadlock loop) would otherwise write a new errors/*.md
360
+ // every tick, which dispatch then has to commit, which the next
361
+ // pull then chokes on — a positive feedback that contributed to
362
+ // the alpha.3/alpha.4 Mac UAT wedge. The structured log line below
363
+ // gives operators full diagnostic info via stdout/json logs.
364
+ log('git_pull_failed', { error: pullResult.error.slice(0, 200) });
335
365
  infraOk = false;
336
366
  }
337
367
 
@@ -420,12 +450,14 @@ async function dispatchTick(): Promise<TickResult> {
420
450
  : `dispatch: cursor advance ${new Date().toISOString()}`;
421
451
  const pushResult = gitCommitAndPush(transportRoot, commitMsg);
422
452
  if (!pushResult.ok && pushResult.error) {
453
+ // Same rationale as the pull case above: no writeErrorLog.
454
+ // Repeated push failures shouldn't flood errors/ since that
455
+ // contributes to the same git-deadlock-feedback that pull does.
423
456
  const kind = pushResult.committed ? 'git_push' : 'git_commit';
424
- const errId = writeErrorLog(transportRoot, kind, pushResult.error);
425
457
  log('git_push_failed', {
426
- error_id: errId,
458
+ kind,
427
459
  committed_locally: pushResult.committed,
428
- error: pushResult.error.slice(0, 120),
460
+ error: pushResult.error.slice(0, 200),
429
461
  });
430
462
  infraOk = false;
431
463
  }
@@ -467,7 +499,7 @@ async function waitForWakeOrTimeout(ms: number): Promise<'wake' | 'timeout'> {
467
499
  async function main(): Promise<void> {
468
500
  log('dispatch_start', {
469
501
  transport: transportRoot,
470
- version: '5.0.0-alpha.3',
502
+ version: RUNTIME_VERSION,
471
503
  log_file: logFile ?? null,
472
504
  });
473
505
  if (onceMode) {
@@ -501,6 +533,43 @@ async function main(): Promise<void> {
501
533
  });
502
534
  }
503
535
 
536
+ // Per-tick heal: deadlock-break when the dispatch loop has been
537
+ // failing for HEAL_THRESHOLD consecutive ticks AND we haven't healed
538
+ // recently. Hard-resets the working tree to origin/<current branch>.
539
+ // Trades any uncommitted local state for forward progress — acceptable
540
+ // because messages/cursors/dlq are pulled back from origin and
541
+ // .turnq/errors are regenerated.
542
+ if (
543
+ consecutiveInfraFailures >= HEAL_THRESHOLD &&
544
+ consecutiveInfraFailures - lastHealAtFailureCount >= HEAL_THRESHOLD
545
+ ) {
546
+ try {
547
+ const branchProc = spawn('git', ['rev-parse', '--abbrev-ref', 'HEAD'], {
548
+ cwd: transportRoot,
549
+ stdio: ['ignore', 'pipe', 'ignore'],
550
+ });
551
+ let branchName = '';
552
+ branchProc.stdout.on('data', (d) => { branchName += d.toString(); });
553
+ await new Promise<void>((res) => branchProc.on('close', () => res()));
554
+ const branch = branchName.trim() || 'main';
555
+ log('per_tick_heal_start', {
556
+ consecutive_failures: consecutiveInfraFailures,
557
+ target: `origin/${branch}`,
558
+ });
559
+ await new Promise<void>((res) => {
560
+ const p = spawn('sh', [
561
+ '-c',
562
+ `git rebase --abort 2>/dev/null; git fetch --quiet origin '${branch}' && git reset --hard --quiet 'origin/${branch}' && git clean -fdq`,
563
+ ], { cwd: transportRoot, stdio: 'inherit' });
564
+ p.on('close', () => res());
565
+ });
566
+ log('per_tick_heal_done', { target: `origin/${branch}` });
567
+ lastHealAtFailureCount = consecutiveInfraFailures;
568
+ } catch (err) {
569
+ log('per_tick_heal_failed', { error: (err as Error).message });
570
+ }
571
+ }
572
+
504
573
  if (r.didWork) {
505
574
  await new Promise((res) => setTimeout(res, 1_000 * backoffFactor));
506
575
  } else {
package/src/send.ts CHANGED
@@ -2,7 +2,7 @@ import { resolve, join } from 'path';
2
2
  import { mkdirSync, writeFileSync } from 'fs';
3
3
  import { now, messageFilename } from './filenames.js';
4
4
  import { serializeFrontmatter } from './frontmatter.js';
5
- import { gitCommitAndPush, writeErrorLog } from './transport.js';
5
+ import { gitCommitAndPush } from './transport.js';
6
6
  import { withLock } from './turnq.js';
7
7
 
8
8
  const transportRoot = resolve(process.cwd());
@@ -60,16 +60,19 @@ async function main(): Promise<void> {
60
60
  }
61
61
 
62
62
  if (!pushResult.ok && pushResult.error) {
63
- const kind = pushResult.committed ? 'git_push' : 'git_commit';
64
- const errId = writeErrorLog(transportRoot, kind, pushResult.error);
63
+ // Note: deliberately NOT writing to errors/. That directory is dispatcher-
64
+ // owned state, and operator-side writes from `crosstalk send` were
65
+ // dirtying the working tree, causing subsequent `git pull --rebase` to
66
+ // fail with "unstaged changes". Surface the error to stderr only.
67
+ const kind = pushResult.committed ? 'push' : 'commit';
65
68
  console.error(`Wrote locally: ${join(ts.pathDate, filename)}`);
66
- console.error(
67
- `but git ${kind === 'git_push' ? 'push' : 'commit'} FAILED (errors/${errId}.md):`,
68
- );
69
- console.error(` ${pushResult.error.slice(0, 200)}`);
70
- console.error(
71
- ' Message is in your local clone but not on origin. Resolve the git issue and re-push manually.',
72
- );
69
+ console.error(`but git ${kind} FAILED:`);
70
+ console.error(` ${pushResult.error.slice(0, 300)}`);
71
+ console.error('');
72
+ console.error('Your message is in your local clone but not on origin.');
73
+ console.error('Recover with:');
74
+ console.error(' git pull --rebase');
75
+ console.error(' git push');
73
76
  process.exit(3);
74
77
  }
75
78
 
package/src/transport.ts CHANGED
@@ -70,13 +70,40 @@ export function gitCommitAndPush(transportRoot: string, message: string): GitPus
70
70
  return { ok: true, committed: false, pushed: false };
71
71
  }
72
72
 
73
- const add = captureGit(transportRoot, ['add', '-A']);
74
- if (add.status !== 0) {
73
+ // Stage everything EXCEPT .turnq/ (machine-local runtime state; commits of
74
+ // this directory cause modify/delete conflicts the moment another clone
75
+ // untracks it via gitignore). Pathspec exclusion is independent of the
76
+ // transport's .gitignore — defensive against gitignore drift.
77
+ //
78
+ // Edge: `git add -A . :(exclude).turnq` exits non-zero with "The following
79
+ // paths are ignored by one of your .gitignore files: .turnq" because the
80
+ // exclude pathspec itself matches a gitignored path. The add still stages
81
+ // every other change correctly — only the exit code is misleading. So we
82
+ // treat that specific failure-pattern as benign and let the subsequent
83
+ // commit step decide whether anything actually got staged.
84
+ const add = captureGit(transportRoot, ['add', '-A', '.', ':(exclude).turnq']);
85
+ const addBenignIgnoredPath = add.status !== 0 &&
86
+ /paths are ignored/.test(add.stderr);
87
+ if (add.status !== 0 && !addBenignIgnoredPath) {
75
88
  return { ok: false, committed: false, pushed: false, error: add.stderr.trim().slice(0, 500) };
76
89
  }
77
90
 
91
+ // If .turnq/ was previously committed (pre-alpha.4 transport), the index
92
+ // may still hold tracked .turnq/* entries. Untrack them here so subsequent
93
+ // pulls don't fight with operator clones that have untracked .turnq/. This
94
+ // is a one-time-per-transport heal; on a clean transport it's a no-op.
95
+ const indexedTurnq = captureGit(transportRoot, ['ls-files', '.turnq']);
96
+ if (indexedTurnq.status === 0 && indexedTurnq.stdout.trim().length > 0) {
97
+ captureGit(transportRoot, ['rm', '-r', '--cached', '--quiet', '.turnq']);
98
+ }
99
+
78
100
  const commit = captureGit(transportRoot, ['commit', '-m', message]);
79
101
  if (commit.status !== 0) {
102
+ // Empty commit ("nothing to commit") is fine — the exclusion may have
103
+ // dropped the only change. Treat exit-1 with no error text as no-op.
104
+ const noop = commit.stdout.includes('nothing to commit') ||
105
+ commit.stderr.includes('nothing to commit');
106
+ if (noop) return { ok: true, committed: false, pushed: false };
80
107
  return { ok: false, committed: false, pushed: false, error: commit.stderr.trim().slice(0, 500) };
81
108
  }
82
109
 
@@ -54,13 +54,13 @@ If you are *authoring* an actor profile for a compute role, write the system pro
54
54
 
55
55
  ## Available tools
56
56
 
57
- You have shell access. You can invoke these tools any time you decide they help with your reply. All of them run from the transport root (the current working directory). The tools are documented here so you can pick the right one from natural-language intent — e.g. "check what the dispatch state looks like" → `npm run status`.
57
+ You have shell access. You can invoke these tools any time you decide they help with your reply. All of them run from the transport root (the current working directory). The tools are documented here so you can pick the right one from natural-language intent — e.g. "check what the dispatch state looks like" → `crosstalk status`.
58
58
 
59
59
  ### `send` — initiate a message to another actor
60
60
 
61
61
  Use this when you want to **proactively** message someone, not just reply to the prompt you're processing. (If you only want to reply to what you received, just answer — do not call `send`.)
62
62
 
63
- npm run send -- --channel <channel-uuid> --to <actor> [--from <your-name>] [--tier <tier-name>] "<message body>"
63
+ crosstalk send --channel <channel-uuid> --to <actor> [--from <your-name>] [--tier <tier-name>] "<message body>"
64
64
 
65
65
  `send` also pokes dispatch to tick immediately so the recipient sees the message right away.
66
66
 
@@ -70,13 +70,13 @@ Use this when you want to **proactively** message someone, not just reply to the
70
70
 
71
71
  Use this to bypass the quiet-poll interval. Rarely needed manually — `send` already pokes dispatch automatically. Use this if you've directly written a message file and want dispatch to notice it now.
72
72
 
73
- npm run wake
73
+ crosstalk wake
74
74
 
75
75
  ### `status` — inspect transport state
76
76
 
77
77
  Use this when an operator asks "what's happening?" or before deciding whether to retry something.
78
78
 
79
- npm run status
79
+ crosstalk status
80
80
 
81
81
  Outputs: host file summary, per-actor cursors, turnq lock state, channel list with message counts, DLQ entry count.
82
82
 
@@ -89,24 +89,24 @@ Use this when you want to inspect or retry failures. DLQ entries have one of two
89
89
 
90
90
  Entries also carry an `attempts` count and a `quarantined: true|false` flag. If the same failure repeats 4+ times within an hour, the entry is quarantined: dispatch starts skipping that message (for `dispatch` kind) or that actor (for `config` kind). The retry command clears the quarantine and lets the next dispatch tick try again.
91
91
 
92
- npm run dlq # same as --list
93
- npm run dlq -- --list # list all DLQ entries (incl. quarantine markers + counts by kind)
94
- npm run dlq -- --show <id> # show full details of one entry
95
- npm run dlq -- --retry <id> # for dispatch: rewind cursor; for config: clear quarantine
96
- npm run dlq -- --clear # delete all entries (destructive)
92
+ crosstalk dlq # same as --list
93
+ crosstalk dlq --list # list all DLQ entries (incl. quarantine markers + counts by kind)
94
+ crosstalk dlq --show <id> # show full details of one entry
95
+ crosstalk dlq --retry <id> # for dispatch: rewind cursor; for config: clear quarantine
96
+ crosstalk dlq --clear # delete all entries (destructive)
97
97
 
98
98
  ### `init` — scaffold a new transport
99
99
 
100
100
  Use this only when an operator is setting up a fresh transport directory. Creates a default host file (for the current hostname), a `general` channel, and the empty `custom/actors/`, `cursors/`, and `dlq/` directories.
101
101
 
102
- npm run init
103
- npm run init -- --force # overwrite existing files
102
+ crosstalk init
103
+ crosstalk init --force # overwrite existing files
104
104
 
105
105
  ### `channel` — create a new channel or subchannel
106
106
 
107
107
  Use this when you want to spin up a new conversation space — either a top-level channel or a focused subchannel of an existing one. Generates a UUID and writes `data/channels/<uuid>/CHANNEL.md`.
108
108
 
109
- npm run channel -- --name <name> [--parent <parent-uuid>] [--created-by <name>]
109
+ crosstalk channel --name <name> [--parent <parent-uuid>] [--created-by <name>]
110
110
 
111
111
  Prints the new channel UUID. Use that UUID in subsequent `send` calls.
112
112
 
@@ -146,7 +146,7 @@ There are two persistent failure logs in the transport:
146
146
  - **`dlq/`** — failed dispatches and config errors. Per-message and per-actor. Use the `dlq` tool to inspect/retry.
147
147
  - **`errors/`** — infrastructure failures (git pull/push/commit, filesystem, message parse). Deduped by signature with a `count` field. If you see something not working as expected (replies aren't reaching origin, dispatch keeps reporting `skip_tick_locked`, etc.), check this directory — operator hostile state often surfaces here first.
148
148
 
149
- `npm run status` shows counts for both at a glance, plus a **dispatch heartbeat** line — the timestamp of the most recent tick. If the heartbeat is fresh (under 2 min old), dispatch is running. If stale (over 5 min), dispatch has stopped or hung; check `errors/` and the process state.
149
+ `crosstalk status` shows counts for both at a glance, plus a **dispatch heartbeat** line — the timestamp of the most recent tick. If the heartbeat is fresh (under 2 min old), dispatch is running. If stale (over 5 min), dispatch has stopped or hung; check `errors/` and the process state.
150
150
 
151
151
  **Persistent infrastructure failures trigger exponential backoff.** After 2+ consecutive ticks with failed git pull or push, dispatch doubles its poll interval each tick, capped at 10× the configured quiet poll. The `backoff_active` log event fires when active; `backoff_cleared` fires when a tick succeeds again.
152
152
 
@@ -177,4 +177,4 @@ For idempotent work (information lookup, calculation, advice), duplicates are ha
177
177
  - **Do not modify `errors/` directly.** Same reasoning — entries are deduped by signature and the count field matters.
178
178
  - **Do not modify `.turnq/`.** That holds turnq lock state.
179
179
  - **Do not reply to messages addressed to other actors.** You only act on messages where the `to:` field includes your name.
180
- - **Do not fabricate channel UUIDs.** Look at existing directories under `data/channels/` to find real ones — or run `npm run status` to list them.
180
+ - **Do not fabricate channel UUIDs.** Look at existing directories under `data/channels/` to find real ones — or run `crosstalk status` to list them.