@idl3/claude-control 0.4.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/auth.js +23 -2
- package/lib/config.js +2 -1
- package/lib/json-file.js +40 -0
- package/lib/match.js +78 -2
- package/lib/mlx.js +13 -0
- package/lib/pins.js +2 -3
- package/lib/push.js +3 -2
- package/lib/resources.js +112 -52
- package/lib/sessions.js +116 -3
- package/lib/shell.js +3 -1
- package/lib/subagents.js +7 -6
- package/lib/tmux.js +26 -7
- package/lib/transcribe.js +55 -24
- package/lib/ws-heartbeat.js +32 -0
- package/package.json +1 -1
- package/server.js +189 -78
- package/web/dist/assets/{core-7jLm1R4l.js → core-CEtbx-dx.js} +1 -1
- package/web/dist/assets/index-CjJtW-Kv.css +1 -0
- package/web/dist/assets/index-DFru8Gzx.js +103 -0
- package/web/dist/index.html +2 -2
- package/web/dist/assets/index-D41aOqTB.js +0 -103
- package/web/dist/assets/index-Dv9NwX8Q.css +0 -1
package/lib/sessions.js
CHANGED
|
@@ -15,7 +15,7 @@ import { promisify } from 'node:util';
|
|
|
15
15
|
|
|
16
16
|
import { parseTuiStatus, prettyModel } from './tui.js';
|
|
17
17
|
import { parsePanePrompt } from './prompt.js';
|
|
18
|
-
import { assignTranscripts, parseEtime } from './match.js';
|
|
18
|
+
import { assignTranscripts, parseEtime, fingerprintScore, shouldRebind } from './match.js';
|
|
19
19
|
import { pinKey } from './pins.js';
|
|
20
20
|
import { readPaneRegistry, gcPaneRegistry } from './pane-registry.js';
|
|
21
21
|
|
|
@@ -35,6 +35,10 @@ const REFRESH_INTERVAL_MS = 4000;
|
|
|
35
35
|
const CTX_POLL_INTERVAL_MS = 12000; // TUI ctx%/model capture — slower than refresh
|
|
36
36
|
const THINKING_POLL_INTERVAL_MS = 2000; // bottom-5-line capture for the live "thinking" flag
|
|
37
37
|
|
|
38
|
+
// Self-heal: minimum number of refresh() cycles between consecutive rebinds for
|
|
39
|
+
// the same pane. Prevents rapid-fire flapping when borderline scores oscillate.
|
|
40
|
+
const SELFHEAL_DEBOUNCE_CYCLES = 5;
|
|
41
|
+
|
|
38
42
|
/**
|
|
39
43
|
* Encode an absolute cwd the way Claude Code names its transcript project
|
|
40
44
|
* directories: every '/' and '.' becomes '-'. This is derived from the cwd the
|
|
@@ -192,6 +196,7 @@ async function extractTailRecord(filePath, mtime, birthtime = null) {
|
|
|
192
196
|
transcriptPending: false,
|
|
193
197
|
pendingToolUseId: null,
|
|
194
198
|
pendingQuestion: null,
|
|
199
|
+
recentText: null,
|
|
195
200
|
};
|
|
196
201
|
|
|
197
202
|
// Transcript-derived pending: detect an AskUserQuestion that is open in the
|
|
@@ -202,9 +207,12 @@ async function extractTailRecord(filePath, mtime, birthtime = null) {
|
|
|
202
207
|
base.pendingToolUseId = pending.pendingToolUseId;
|
|
203
208
|
base.pendingQuestion = pending.pendingQuestion;
|
|
204
209
|
|
|
205
|
-
// Walk from end collecting the newest cwd/sessionId/timestamp/model/title
|
|
210
|
+
// Walk from end collecting the newest cwd/sessionId/timestamp/model/title,
|
|
211
|
+
// and the most recent assistant message texts for the content-fingerprint tiebreak.
|
|
206
212
|
// ai-title is re-emitted throughout the file so the tail usually carries it;
|
|
207
213
|
// custom-title (a user /rename) is written when renamed, so it appears late.
|
|
214
|
+
const recentSnippets = [];
|
|
215
|
+
const MAX_RECENT_SNIPPETS = 3;
|
|
208
216
|
for (let i = lines.length - 1; i >= 0; i--) {
|
|
209
217
|
const line = lines[i].trim();
|
|
210
218
|
if (!line) continue;
|
|
@@ -221,10 +229,25 @@ async function extractTailRecord(filePath, mtime, birthtime = null) {
|
|
|
221
229
|
if (base.aiTitle === null && rec.type === 'ai-title' && rec.aiTitle) base.aiTitle = rec.aiTitle;
|
|
222
230
|
if (base.model === null && rec.type === 'assistant' && typeof rec.message?.model === 'string') base.model = rec.message.model;
|
|
223
231
|
if (base.cwd === null && typeof rec.cwd === 'string' && rec.cwd) base.cwd = rec.cwd;
|
|
224
|
-
|
|
232
|
+
// Collect recent assistant text for content-fingerprint tiebreak. Walk
|
|
233
|
+
// text content blocks from the most recent assistant messages backwards.
|
|
234
|
+
if (recentSnippets.length < MAX_RECENT_SNIPPETS && rec.type === 'assistant') {
|
|
235
|
+
const content = rec.message?.content;
|
|
236
|
+
if (Array.isArray(content)) {
|
|
237
|
+
for (const block of content) {
|
|
238
|
+
if (block?.type === 'text' && typeof block.text === 'string' && block.text.length > 0) {
|
|
239
|
+
recentSnippets.push(block.text.slice(0, 500));
|
|
240
|
+
break; // one text block per message is enough
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
if (base.cwd && base.sessionId && base.model && (base.customTitle || base.aiTitle) &&
|
|
246
|
+
recentSnippets.length >= MAX_RECENT_SNIPPETS) {
|
|
225
247
|
break; // everything found
|
|
226
248
|
}
|
|
227
249
|
}
|
|
250
|
+
if (recentSnippets.length > 0) base.recentText = recentSnippets.join(' ');
|
|
228
251
|
return base;
|
|
229
252
|
}
|
|
230
253
|
|
|
@@ -336,12 +359,28 @@ export class SessionRegistry extends EventEmitter {
|
|
|
336
359
|
this._thinkingMap = new Map();
|
|
337
360
|
/** @type {Map<string, {pending:boolean, question:string|null}>} target -> pane-derived prompt */
|
|
338
361
|
this._panePromptMap = new Map();
|
|
362
|
+
/** @type {Map<string, string>} target -> most-recent captured pane text (for fingerprint tiebreak) */
|
|
363
|
+
this._paneTextCache = new Map();
|
|
364
|
+
/** @type {number} monotonically-incrementing refresh() cycle counter */
|
|
365
|
+
this._refreshCycle = 0;
|
|
366
|
+
/** @type {Map<string, number>} target -> refresh cycle on which it was last self-healed */
|
|
367
|
+
this._healLastCycle = new Map();
|
|
339
368
|
/** @type {ReturnType<setInterval>|null} */
|
|
340
369
|
this._interval = null;
|
|
341
370
|
/** @type {ReturnType<setInterval>|null} */
|
|
342
371
|
this._ctxInterval = null;
|
|
343
372
|
/** @type {ReturnType<setInterval>|null} */
|
|
344
373
|
this._thinkingInterval = null;
|
|
374
|
+
|
|
375
|
+
// Re-entrancy guards: skip a tick if the previous one is still in flight.
|
|
376
|
+
// Each flag is owned exclusively by its worker; reset in finally() so a
|
|
377
|
+
// rejected shellout cannot wedge the flag permanently.
|
|
378
|
+
/** @type {boolean} */
|
|
379
|
+
this._refreshing = false;
|
|
380
|
+
/** @type {boolean} */
|
|
381
|
+
this._pollingCtx = false;
|
|
382
|
+
/** @type {boolean} */
|
|
383
|
+
this._pollingThinking = false;
|
|
345
384
|
}
|
|
346
385
|
|
|
347
386
|
// -------------------------------------------------------------------------
|
|
@@ -387,6 +426,17 @@ export class SessionRegistry extends EventEmitter {
|
|
|
387
426
|
* @returns {Promise<Session[]>}
|
|
388
427
|
*/
|
|
389
428
|
async refresh() {
|
|
429
|
+
if (this._refreshing) return;
|
|
430
|
+
this._refreshing = true;
|
|
431
|
+
try {
|
|
432
|
+
return await this._doRefresh();
|
|
433
|
+
} finally {
|
|
434
|
+
this._refreshing = false;
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
/** @private — the actual refresh body; called only when not already in flight. */
|
|
439
|
+
async _doRefresh() {
|
|
390
440
|
const allPanes = await this._listWindows();
|
|
391
441
|
|
|
392
442
|
// Grouped tmux sessions (e.g. a `_mobile` mirror of session `0`) expose the
|
|
@@ -460,12 +510,60 @@ export class SessionRegistry extends EventEmitter {
|
|
|
460
510
|
cwd: p.cwd,
|
|
461
511
|
projectDir: encodeCwd(p.cwd), // scope candidates to this pane's own slug dir
|
|
462
512
|
procStartMs: paneProc.get(p.target)?.startMs ?? null,
|
|
513
|
+
// Cached from the last _pollThinking() run — used by the content-fingerprint
|
|
514
|
+
// tiebreak when timing signals cannot distinguish same-cwd candidates.
|
|
515
|
+
capturedText: this._paneTextCache.get(p.target) ?? null,
|
|
463
516
|
})),
|
|
464
517
|
candidates,
|
|
465
518
|
);
|
|
466
519
|
for (const [target, rec] of pinnedByTarget) assignment.set(target, rec);
|
|
467
520
|
for (const [target, rec] of hookByTarget) assignment.set(target, rec);
|
|
468
521
|
|
|
522
|
+
// ── Self-heal pass (PLE-44) ───────────────────────────────────────────────
|
|
523
|
+
// Re-verify each MATCHER-bound pane (not pinned, not registry-hooked) against
|
|
524
|
+
// all candidates to catch drift that wasn't caught at initial binding time.
|
|
525
|
+
// Registry-pinned panes are authoritative and are NEVER re-evaluated here.
|
|
526
|
+
this._refreshCycle++;
|
|
527
|
+
for (const p of autoPanes) {
|
|
528
|
+
const target = p.target;
|
|
529
|
+
const currentRec = assignment.get(target);
|
|
530
|
+
if (!currentRec) continue; // unmatched — nothing to heal
|
|
531
|
+
|
|
532
|
+
// Debounce: skip panes re-bound too recently to avoid flapping.
|
|
533
|
+
const lastHeal = this._healLastCycle.get(target) ?? -Infinity;
|
|
534
|
+
if (this._refreshCycle - lastHeal < SELFHEAL_DEBOUNCE_CYCLES) continue;
|
|
535
|
+
|
|
536
|
+
const paneText = this._paneTextCache.get(target) ?? null;
|
|
537
|
+
if (!paneText) continue; // no captured text yet — cannot score
|
|
538
|
+
|
|
539
|
+
const currentScore = fingerprintScore(paneText, currentRec.recentText ?? null);
|
|
540
|
+
|
|
541
|
+
// Find the best OTHER candidate in the same pool.
|
|
542
|
+
let bestOtherRec = null;
|
|
543
|
+
let bestOtherScore = 0;
|
|
544
|
+
for (const c of candidates) {
|
|
545
|
+
if (c.transcriptPath === currentRec.transcriptPath) continue;
|
|
546
|
+
const s = fingerprintScore(paneText, c.recentText ?? null);
|
|
547
|
+
if (s > bestOtherScore) {
|
|
548
|
+
bestOtherScore = s;
|
|
549
|
+
bestOtherRec = c;
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
if (!bestOtherRec) continue; // no alternative — nothing to heal to
|
|
554
|
+
if (!shouldRebind(currentScore, bestOtherScore)) continue;
|
|
555
|
+
|
|
556
|
+
// Re-bind.
|
|
557
|
+
const oldPath = currentRec.transcriptPath;
|
|
558
|
+
assignment.set(target, bestOtherRec);
|
|
559
|
+
this._healLastCycle.set(target, this._refreshCycle);
|
|
560
|
+
console.log(
|
|
561
|
+
`[pane-selfheal] re-bound ${target}: ${oldPath} (score ${currentScore}) → ` +
|
|
562
|
+
`${bestOtherRec.transcriptPath} (score ${bestOtherScore})`,
|
|
563
|
+
);
|
|
564
|
+
}
|
|
565
|
+
// ── End self-heal ─────────────────────────────────────────────────────────
|
|
566
|
+
|
|
469
567
|
const sessions = panes.map((win) => {
|
|
470
568
|
const isClaude = isClaudePane(win);
|
|
471
569
|
const transcript = isClaude ? assignment.get(win.target) ?? null : null;
|
|
@@ -527,6 +625,9 @@ export class SessionRegistry extends EventEmitter {
|
|
|
527
625
|
* cheap but we keep it off the hot path per the resource doctrine.
|
|
528
626
|
*/
|
|
529
627
|
async _pollCtx() {
|
|
628
|
+
if (this._pollingCtx) return;
|
|
629
|
+
this._pollingCtx = true;
|
|
630
|
+
try {
|
|
530
631
|
const sessions = this._sessions;
|
|
531
632
|
await Promise.all(
|
|
532
633
|
sessions.map(async (s) => {
|
|
@@ -544,6 +645,9 @@ export class SessionRegistry extends EventEmitter {
|
|
|
544
645
|
}),
|
|
545
646
|
);
|
|
546
647
|
this._maybeEmit();
|
|
648
|
+
} finally {
|
|
649
|
+
this._pollingCtx = false;
|
|
650
|
+
}
|
|
547
651
|
}
|
|
548
652
|
|
|
549
653
|
/**
|
|
@@ -552,6 +656,9 @@ export class SessionRegistry extends EventEmitter {
|
|
|
552
656
|
* model/ctx values are left to the slower _pollCtx(). Best-effort.
|
|
553
657
|
*/
|
|
554
658
|
async _pollThinking() {
|
|
659
|
+
if (this._pollingThinking) return;
|
|
660
|
+
this._pollingThinking = true;
|
|
661
|
+
try {
|
|
555
662
|
const sessions = this._sessions;
|
|
556
663
|
await Promise.all(
|
|
557
664
|
sessions.map(async (s) => {
|
|
@@ -564,6 +671,9 @@ export class SessionRegistry extends EventEmitter {
|
|
|
564
671
|
const cap = await this._tmux.capturePane(s.target, 26);
|
|
565
672
|
const { thinking } = parseTuiStatus(cap);
|
|
566
673
|
this._thinkingMap.set(s.target, thinking);
|
|
674
|
+
// Cache raw capture text for the content-fingerprint tiebreak in
|
|
675
|
+
// the next refresh() — cheap: already captured here.
|
|
676
|
+
this._paneTextCache.set(s.target, cap);
|
|
567
677
|
s.thinking = thinking;
|
|
568
678
|
|
|
569
679
|
// Pane-derived question detection (Claude panes only): an on-screen
|
|
@@ -584,6 +694,9 @@ export class SessionRegistry extends EventEmitter {
|
|
|
584
694
|
}),
|
|
585
695
|
);
|
|
586
696
|
this._maybeEmit();
|
|
697
|
+
} finally {
|
|
698
|
+
this._pollingThinking = false;
|
|
699
|
+
}
|
|
587
700
|
}
|
|
588
701
|
|
|
589
702
|
/**
|
package/lib/shell.js
CHANGED
|
@@ -105,5 +105,7 @@ export async function shellKey(sessionTarget, cwd, key) {
|
|
|
105
105
|
export async function shellCapture(sessionTarget, cwd, lines = 200) {
|
|
106
106
|
const target = await ensureSessionShell(sessionTarget, cwd);
|
|
107
107
|
const n = Math.max(1, Math.min(10000, Number(lines) || 200));
|
|
108
|
-
|
|
108
|
+
// escapes=true (keep ANSI colors), join=true (rejoin soft-wrapped lines so
|
|
109
|
+
// URLs split across narrow pane columns are reconstructed as single <a> tags).
|
|
110
|
+
return tmux.capturePane(target, n, true, true);
|
|
109
111
|
}
|
package/lib/subagents.js
CHANGED
|
@@ -26,12 +26,13 @@ import { TranscriptTailer } from './transcript.js';
|
|
|
26
26
|
const META_RE = /^agent-(.+)\.meta\.json$/;
|
|
27
27
|
// A sub-agent whose transcript hasn't grown in this long is treated as finished,
|
|
28
28
|
// even if we never saw the parent's tool_result (e.g. it predates the parent's
|
|
29
|
-
// bounded message buffer). Live sub-agents append
|
|
30
|
-
//
|
|
31
|
-
//
|
|
32
|
-
//
|
|
33
|
-
//
|
|
34
|
-
|
|
29
|
+
// bounded message buffer). Live sub-agents append every few seconds (each token
|
|
30
|
+
// or tool result updates the file), so a quiet file past ACTIVE_WINDOW_MS (20 s)
|
|
31
|
+
// is almost certainly done. 45 s is generous enough to absorb a brief inference
|
|
32
|
+
// pause without mis-classifying a still-running agent, while clearing finished
|
|
33
|
+
// agents ~13× faster than the previous 600 s fallback.
|
|
34
|
+
// doneByParent always wins when available (authoritative, instant).
|
|
35
|
+
const RUNNING_WINDOW_MS = 45_000;
|
|
35
36
|
// A file written within this window is treated as actively-running, overriding a
|
|
36
37
|
// (possibly premature, e.g. background-launch-ack) doneByParent flag.
|
|
37
38
|
const ACTIVE_WINDOW_MS = 20_000;
|
package/lib/tmux.js
CHANGED
|
@@ -325,9 +325,19 @@ export function shellQuoteName(name) {
|
|
|
325
325
|
* session is created first and used.
|
|
326
326
|
*
|
|
327
327
|
* @param {{ cwd: string, name?: string }} opts
|
|
328
|
+
* @param {{ _run?: Function, _listPanes?: Function }} [_injected]
|
|
329
|
+
* Test-only injection seam. Production callers omit this argument entirely.
|
|
330
|
+
* - `_run(args)` replaces the internal `runTmux` call (records argv, returns
|
|
331
|
+
* canned `{ stdout, stderr }` without shelling out).
|
|
332
|
+
* - `_listPanes()` replaces the `listWindows` call used to detect an existing
|
|
333
|
+
* server session (returns a canned pane array).
|
|
328
334
|
* @returns {Promise<string>} target "session:windowIndex"
|
|
329
335
|
*/
|
|
330
|
-
export async function createWindow({ cwd, name } = {}) {
|
|
336
|
+
export async function createWindow({ cwd, name } = {}, { _run, _listPanes } = {}) {
|
|
337
|
+
// Allow tests to inject a stub runner; production path uses the real runTmux.
|
|
338
|
+
const runner = _run ?? runTmux;
|
|
339
|
+
const lister = _listPanes ?? listWindows;
|
|
340
|
+
|
|
331
341
|
if (typeof cwd !== 'string' || !cwd) {
|
|
332
342
|
throw new Error('createWindow: cwd is required');
|
|
333
343
|
}
|
|
@@ -347,7 +357,7 @@ export async function createWindow({ cwd, name } = {}) {
|
|
|
347
357
|
// callers may pass raw user text. An empty result means "let tmux auto-name".
|
|
348
358
|
const safeName = sanitizeName(name);
|
|
349
359
|
|
|
350
|
-
const windows = await
|
|
360
|
+
const windows = await lister();
|
|
351
361
|
|
|
352
362
|
// No tmux server/session yet — bootstrap a detached session in the cwd. The
|
|
353
363
|
// session's first window IS our target window, so no extra new-window needed.
|
|
@@ -355,10 +365,10 @@ export async function createWindow({ cwd, name } = {}) {
|
|
|
355
365
|
const sessionName = 'claude-control';
|
|
356
366
|
const args = ['new-session', '-d', '-s', sessionName, '-c', cwd];
|
|
357
367
|
if (safeName) args.push('-n', safeName);
|
|
358
|
-
await
|
|
368
|
+
await runner(args);
|
|
359
369
|
// The fresh session opens at window index 0 (tmux's base-index may differ,
|
|
360
370
|
// but the first list entry is authoritative).
|
|
361
|
-
const after = await
|
|
371
|
+
const after = await lister();
|
|
362
372
|
const win = after.find((w) => w.sessionName === sessionName);
|
|
363
373
|
const target = win ? win.target : `${sessionName}:0`;
|
|
364
374
|
if (!isValidTarget(target)) {
|
|
@@ -378,7 +388,7 @@ export async function createWindow({ cwd, name } = {}) {
|
|
|
378
388
|
'-c', cwd,
|
|
379
389
|
];
|
|
380
390
|
if (safeName) args.push('-n', safeName);
|
|
381
|
-
const { stdout } = await
|
|
391
|
+
const { stdout } = await runner(args);
|
|
382
392
|
const target = stdout.trim();
|
|
383
393
|
if (!isValidTarget(target)) {
|
|
384
394
|
throw new Error(`createWindow: produced invalid target: ${JSON.stringify(target)}`);
|
|
@@ -551,19 +561,28 @@ export async function sendRawKeysSequenced(target, keys, delayMs = 160) {
|
|
|
551
561
|
/**
|
|
552
562
|
* Capture the visible content of a tmux pane.
|
|
553
563
|
* `-e` preserves ANSI escape sequences (server may strip before forwarding).
|
|
564
|
+
* `-J` joins soft-wrapped lines so a URL split across pane columns is
|
|
565
|
+
* reconstructed into a single logical line.
|
|
554
566
|
*
|
|
555
567
|
* @param {string} target
|
|
556
568
|
* @param {number} [lines=40] How many history lines above the visible area to include.
|
|
569
|
+
* @param {boolean} [escapes=false] Pass `-e` to keep ANSI/SGR sequences.
|
|
570
|
+
* @param {boolean} [join=false] Pass `-J` to rejoin soft-wrapped lines.
|
|
571
|
+
* @param {{ _run?: Function }} [_injected] Test-only seam; omit in production.
|
|
557
572
|
* @returns {Promise<string>}
|
|
558
573
|
*/
|
|
559
|
-
export async function capturePane(target, lines = 40, escapes = false) {
|
|
574
|
+
export async function capturePane(target, lines = 40, escapes = false, join = false, { _run } = {}) {
|
|
560
575
|
assertTarget(target);
|
|
576
|
+
const runner = _run ?? runTmux;
|
|
561
577
|
const args = ['capture-pane', '-t', target, '-p'];
|
|
562
578
|
// `-e` keeps ANSI/SGR sequences so the client can render terminal colors. Off
|
|
563
579
|
// by default: LivePane / AskModal render plain text (escapes would show as
|
|
564
580
|
// garbage). The composer terminal view opts in to get a themed, colored pane.
|
|
565
581
|
if (escapes) args.push('-e');
|
|
582
|
+
// `-J` rejoins soft-wrapped lines into logical lines so that a URL split
|
|
583
|
+
// across narrow pane columns is reconstructed before the client linkifies it.
|
|
584
|
+
if (join) args.push('-J');
|
|
566
585
|
args.push('-S', `-${lines}`); // start N lines above the visible area
|
|
567
|
-
const { stdout } = await
|
|
586
|
+
const { stdout } = await runner(args);
|
|
568
587
|
return stdout;
|
|
569
588
|
}
|
package/lib/transcribe.js
CHANGED
|
@@ -54,16 +54,13 @@ export function resolveWhisperBin() {
|
|
|
54
54
|
}
|
|
55
55
|
|
|
56
56
|
/**
|
|
57
|
-
*
|
|
58
|
-
*
|
|
59
|
-
*
|
|
57
|
+
* Pure model-preference resolver: given a list of filenames present on disk,
|
|
58
|
+
* return the preferred one (multilingual before .en). Exposed for testing.
|
|
59
|
+
*
|
|
60
|
+
* @param {string[]} files - basenames available (e.g. from fs.readdirSync)
|
|
61
|
+
* @returns {string | null} preferred basename, or null
|
|
60
62
|
*/
|
|
61
|
-
export function
|
|
62
|
-
const e = process.env.WHISPER_MODEL;
|
|
63
|
-
if (e && e.trim() && fs.existsSync(e.trim())) return e.trim();
|
|
64
|
-
// Prefer multilingual models (no `.en`) when present: a `.en` model can ONLY
|
|
65
|
-
// do English, so if the user dropped in a multilingual ggml they want the mix
|
|
66
|
-
// (English + Chinese + Singlish/…). English-only models are the fallback.
|
|
63
|
+
export function resolveModelFromFiles(files) {
|
|
67
64
|
const prefs = [
|
|
68
65
|
'ggml-medium.bin',
|
|
69
66
|
'ggml-small.bin',
|
|
@@ -73,16 +70,30 @@ export function resolveWhisperModel() {
|
|
|
73
70
|
'ggml-tiny.en.bin',
|
|
74
71
|
];
|
|
75
72
|
for (const m of prefs) {
|
|
76
|
-
|
|
77
|
-
if (fs.existsSync(p)) return p;
|
|
73
|
+
if (files.includes(m)) return m;
|
|
78
74
|
}
|
|
75
|
+
return files.find((n) => /^ggml-.*\.bin$/.test(n)) ?? null;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Resolve the ggml model: WHISPER_MODEL env → preferred names in the models
|
|
80
|
+
* dir → any `ggml-*.bin` there.
|
|
81
|
+
* @returns {string | null}
|
|
82
|
+
*/
|
|
83
|
+
export function resolveWhisperModel() {
|
|
84
|
+
const e = process.env.WHISPER_MODEL;
|
|
85
|
+
if (e && e.trim() && fs.existsSync(e.trim())) return e.trim();
|
|
86
|
+
// Prefer multilingual models (no `.en`) when present: a `.en` model can ONLY
|
|
87
|
+
// do English, so if the user dropped in a multilingual ggml they want the mix
|
|
88
|
+
// (English + Chinese + Singlish/…). English-only models are the fallback.
|
|
89
|
+
let files = [];
|
|
79
90
|
try {
|
|
80
|
-
|
|
81
|
-
if (found) return path.join(MODELS_DIR, found);
|
|
91
|
+
files = fs.readdirSync(MODELS_DIR);
|
|
82
92
|
} catch {
|
|
83
93
|
/* dir missing */
|
|
84
94
|
}
|
|
85
|
-
|
|
95
|
+
const found = resolveModelFromFiles(files);
|
|
96
|
+
return found ? path.join(MODELS_DIR, found) : null;
|
|
86
97
|
}
|
|
87
98
|
|
|
88
99
|
/**
|
|
@@ -124,6 +135,22 @@ function run(bin, args) {
|
|
|
124
135
|
});
|
|
125
136
|
}
|
|
126
137
|
|
|
138
|
+
/**
|
|
139
|
+
* Derive the whisper-cli language flags from the resolved model path and call
|
|
140
|
+
* options. Pure function — no I/O. Exposed for testing.
|
|
141
|
+
*
|
|
142
|
+
* @param {string} modelPath - resolved model file path (used for its basename)
|
|
143
|
+
* @param {{ lang?: string }} [opts]
|
|
144
|
+
* @param {NodeJS.ProcessEnv} [env] - defaults to process.env
|
|
145
|
+
* @returns {{ effLang: string, translate: boolean }}
|
|
146
|
+
*/
|
|
147
|
+
export function buildWhisperFlags(modelPath, { lang } = {}, env = process.env) {
|
|
148
|
+
const englishOnly = /\.en\.bin$/i.test(path.basename(modelPath));
|
|
149
|
+
const effLang = lang || env.WHISPER_LANG || (englishOnly ? 'en' : 'auto');
|
|
150
|
+
const translate = !englishOnly; // → always-English output
|
|
151
|
+
return { effLang, translate };
|
|
152
|
+
}
|
|
153
|
+
|
|
127
154
|
/**
|
|
128
155
|
* Transcribe an audio file (any ffmpeg-readable format) to text — always in
|
|
129
156
|
* English. A multilingual model uses Whisper's TRANSLATE task, so Chinese,
|
|
@@ -131,13 +158,19 @@ function run(bin, args) {
|
|
|
131
158
|
* models are already English; nothing to translate.
|
|
132
159
|
*
|
|
133
160
|
* @param {string} inputPath - path to the recorded audio file.
|
|
134
|
-
* @param {{ lang?: string }} [opts]
|
|
161
|
+
* @param {{ lang?: string, _resolvers?: object, _run?: Function }} [opts]
|
|
135
162
|
* @returns {Promise<string>}
|
|
136
163
|
*/
|
|
137
|
-
export async function transcribe(inputPath, { lang } = {}) {
|
|
138
|
-
const
|
|
139
|
-
const
|
|
140
|
-
const
|
|
164
|
+
export async function transcribe(inputPath, { lang, _resolvers, _run } = {}) {
|
|
165
|
+
const resolvers = _resolvers ?? {};
|
|
166
|
+
const ffmpegFn = resolvers.resolveFfmpeg ?? resolveFfmpeg;
|
|
167
|
+
const whisperFn = resolvers.resolveWhisperBin ?? resolveWhisperBin;
|
|
168
|
+
const modelFn = resolvers.resolveWhisperModel ?? resolveWhisperModel;
|
|
169
|
+
const runFn = _run ?? run;
|
|
170
|
+
|
|
171
|
+
const ffmpeg = ffmpegFn();
|
|
172
|
+
const whisper = whisperFn();
|
|
173
|
+
const model = modelFn();
|
|
141
174
|
if (!ffmpeg) throw new Error('ffmpeg not found (brew install ffmpeg)');
|
|
142
175
|
if (!whisper) throw new Error('whisper-cli not found (brew install whisper-cpp)');
|
|
143
176
|
if (!model) throw new Error(`no whisper model found in ${MODELS_DIR}`);
|
|
@@ -145,22 +178,20 @@ export async function transcribe(inputPath, { lang } = {}) {
|
|
|
145
178
|
// `.en` models do English only; multilingual models auto-detect the source then
|
|
146
179
|
// translate it to English. Source language is overridable (lang / WHISPER_LANG)
|
|
147
180
|
// for the rare case you want to pin detection; output stays English.
|
|
148
|
-
const
|
|
149
|
-
const effLang = lang || process.env.WHISPER_LANG || (englishOnly ? 'en' : 'auto');
|
|
150
|
-
const translate = !englishOnly; // → always-English output
|
|
181
|
+
const { effLang, translate } = buildWhisperFlags(model, { lang });
|
|
151
182
|
|
|
152
183
|
const wav = path.join(
|
|
153
184
|
os.tmpdir(),
|
|
154
185
|
`cc-stt-${Date.now()}-${process.pid}.wav`,
|
|
155
186
|
);
|
|
156
187
|
try {
|
|
157
|
-
await
|
|
188
|
+
await runFn(ffmpeg, [
|
|
158
189
|
'-nostdin', '-y',
|
|
159
190
|
'-i', inputPath,
|
|
160
191
|
'-ar', '16000', '-ac', '1', '-c:a', 'pcm_s16le',
|
|
161
192
|
'-f', 'wav', wav,
|
|
162
193
|
]);
|
|
163
|
-
const { stdout } = await
|
|
194
|
+
const { stdout } = await runFn(whisper, [
|
|
164
195
|
'-m', model, '-f', wav, '-np', '-nt', '-l', effLang,
|
|
165
196
|
...(translate ? ['--translate'] : []),
|
|
166
197
|
]);
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WebSocket ping/pong heartbeat helpers.
|
|
3
|
+
*
|
|
4
|
+
* Extracted from server.js so tests can import pruneDeadClients without
|
|
5
|
+
* booting the HTTP/WS server.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Prune dead WebSocket clients using the ping/pong aliveness flag.
|
|
10
|
+
*
|
|
11
|
+
* On every heartbeat tick the server calls this with `wss.clients`. Any
|
|
12
|
+
* client whose `isAlive` flag is still `false` from the previous sweep is
|
|
13
|
+
* terminated (firing its existing `close` handler → existing cleanup /
|
|
14
|
+
* `maybeTeardown`). Live clients have their flag reset to `false` and
|
|
15
|
+
* receive a ping; if they respond with a pong the `pong` handler in
|
|
16
|
+
* server.js sets `isAlive = true` before the next sweep.
|
|
17
|
+
*
|
|
18
|
+
* New connections set `isAlive = true` on creation, so they are never
|
|
19
|
+
* terminated on the very first sweep.
|
|
20
|
+
*
|
|
21
|
+
* @param {Iterable<{isAlive:boolean,terminate:()=>void,ping:()=>void}>} clients
|
|
22
|
+
*/
|
|
23
|
+
export function pruneDeadClients(clients) {
|
|
24
|
+
for (const ws of clients) {
|
|
25
|
+
if (ws.isAlive === false) {
|
|
26
|
+
ws.terminate();
|
|
27
|
+
} else {
|
|
28
|
+
ws.isAlive = false;
|
|
29
|
+
ws.ping();
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@idl3/claude-control",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "1.0.1",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Local web UI to watch and drive your Claude Code sessions running in tmux — live transcripts, reply, answer AskUserQuestion, attach files, from a browser or phone.",
|
|
6
6
|
"keywords": [
|