@idl3/claude-control 0.1.16 → 0.1.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/answer.js +335 -9
- package/lib/claude-cli.js +170 -0
- package/lib/config.js +47 -3
- package/lib/match.js +13 -0
- package/lib/optimize.js +222 -0
- package/lib/push.js +14 -1
- package/lib/skills.js +147 -0
- package/lib/subagents.js +153 -2
- package/lib/transcribe.js +156 -0
- package/lib/transcript.js +33 -4
- package/package.json +1 -1
- package/server.js +377 -17
- package/web/dist/apple-touch-icon.png +0 -0
- package/web/dist/assets/{core-D48qhH71.js → core-CyYMg33t.js} +1 -1
- package/web/dist/assets/index-BeJg6Cs1.js +85 -0
- package/web/dist/assets/index-Dn7NDGPq.css +1 -0
- package/web/dist/icon-192.png +0 -0
- package/web/dist/icon-512.png +0 -0
- package/web/dist/icon.svg +12 -0
- package/web/dist/index.html +3 -3
- package/web/dist/manifest.webmanifest +2 -2
- package/web/dist/sw.js +4 -1
- package/web/dist/assets/index-DpbKPoQC.css +0 -1
- package/web/dist/assets/index-krh1COBO.js +0 -78
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/transcribe.js — local speech-to-text via ffmpeg + whisper.cpp.
|
|
3
|
+
*
|
|
4
|
+
* No API key, no cloud: transcodes the uploaded audio to 16kHz mono WAV with
|
|
5
|
+
* ffmpeg, then runs the whisper-cli binary (brew install whisper-cpp) against a
|
|
6
|
+
* local ggml model. Works for any browser that can record audio (incl. iOS
|
|
7
|
+
* Safari), which the Web Speech API does not.
|
|
8
|
+
*
|
|
9
|
+
* Exports:
|
|
10
|
+
* - resolveFfmpeg() / resolveWhisperBin() / resolveWhisperModel() → string | null
|
|
11
|
+
* - cleanTranscript(raw) → string (pure; strips timestamps/blank markers)
|
|
12
|
+
* - transcribe(inputPath, { lang }) → Promise<string>
|
|
13
|
+
*
|
|
14
|
+
* Binary/model resolution is overridable via env (FFMPEG_BIN, WHISPER_BIN,
|
|
15
|
+
* WHISPER_MODEL) and defaults to Homebrew + ~/.claude-control/models.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import fs from 'node:fs';
|
|
19
|
+
import path from 'node:path';
|
|
20
|
+
import os from 'node:os';
|
|
21
|
+
import { spawn, execFileSync } from 'node:child_process';
|
|
22
|
+
|
|
23
|
+
const MODELS_DIR = path.join(os.homedir(), '.claude-control', 'models');
|
|
24
|
+
|
|
25
|
+
/** Resolve a binary: env override → `which` → known fallbacks. */
|
|
26
|
+
function resolveBin(name, envVar, fallbacks) {
|
|
27
|
+
const e = process.env[envVar];
|
|
28
|
+
if (e && e.trim() && fs.existsSync(e.trim())) return e.trim();
|
|
29
|
+
try {
|
|
30
|
+
const w = execFileSync('which', [name], { encoding: 'utf8' }).trim();
|
|
31
|
+
if (w && fs.existsSync(w)) return w;
|
|
32
|
+
} catch {
|
|
33
|
+
/* not on PATH */
|
|
34
|
+
}
|
|
35
|
+
for (const f of fallbacks) if (fs.existsSync(f)) return f;
|
|
36
|
+
return null;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/** @returns {string | null} */
|
|
40
|
+
export function resolveFfmpeg() {
|
|
41
|
+
return resolveBin('ffmpeg', 'FFMPEG_BIN', [
|
|
42
|
+
'/opt/homebrew/bin/ffmpeg',
|
|
43
|
+
'/usr/local/bin/ffmpeg',
|
|
44
|
+
'/usr/bin/ffmpeg',
|
|
45
|
+
]);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/** @returns {string | null} */
|
|
49
|
+
export function resolveWhisperBin() {
|
|
50
|
+
return resolveBin('whisper-cli', 'WHISPER_BIN', [
|
|
51
|
+
'/opt/homebrew/bin/whisper-cli',
|
|
52
|
+
'/usr/local/bin/whisper-cli',
|
|
53
|
+
]);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Resolve the ggml model: WHISPER_MODEL env → preferred names in the models
|
|
58
|
+
* dir → any `ggml-*.bin` there.
|
|
59
|
+
* @returns {string | null}
|
|
60
|
+
*/
|
|
61
|
+
export function resolveWhisperModel() {
|
|
62
|
+
const e = process.env.WHISPER_MODEL;
|
|
63
|
+
if (e && e.trim() && fs.existsSync(e.trim())) return e.trim();
|
|
64
|
+
const prefs = [
|
|
65
|
+
'ggml-base.en.bin',
|
|
66
|
+
'ggml-small.en.bin',
|
|
67
|
+
'ggml-base.bin',
|
|
68
|
+
'ggml-small.bin',
|
|
69
|
+
'ggml-tiny.en.bin',
|
|
70
|
+
];
|
|
71
|
+
for (const m of prefs) {
|
|
72
|
+
const p = path.join(MODELS_DIR, m);
|
|
73
|
+
if (fs.existsSync(p)) return p;
|
|
74
|
+
}
|
|
75
|
+
try {
|
|
76
|
+
const found = fs.readdirSync(MODELS_DIR).find((n) => /^ggml-.*\.bin$/.test(n));
|
|
77
|
+
if (found) return path.join(MODELS_DIR, found);
|
|
78
|
+
} catch {
|
|
79
|
+
/* dir missing */
|
|
80
|
+
}
|
|
81
|
+
return null;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Clean whisper-cli stdout into a single line: drop blank lines, drop
|
|
86
|
+
* bracketed-only markers ([BLANK_AUDIO], (silence)), collapse whitespace.
|
|
87
|
+
*
|
|
88
|
+
* @param {string} raw
|
|
89
|
+
* @returns {string}
|
|
90
|
+
*/
|
|
91
|
+
export function cleanTranscript(raw) {
|
|
92
|
+
return String(raw)
|
|
93
|
+
.split('\n')
|
|
94
|
+
.map((l) => l.trim())
|
|
95
|
+
.filter(Boolean)
|
|
96
|
+
.filter((l) => !/^[[(][^\])]*[\])]$/.test(l))
|
|
97
|
+
.join(' ')
|
|
98
|
+
.replace(/\s+/g, ' ')
|
|
99
|
+
.trim();
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/** Spawn a binary, capture stdout/stderr, resolve on exit 0. */
|
|
103
|
+
function run(bin, args) {
|
|
104
|
+
return new Promise((resolve, reject) => {
|
|
105
|
+
const p = spawn(bin, args, { stdio: ['ignore', 'pipe', 'pipe'] });
|
|
106
|
+
let out = '';
|
|
107
|
+
let err = '';
|
|
108
|
+
p.stdout.on('data', (d) => {
|
|
109
|
+
out += d;
|
|
110
|
+
});
|
|
111
|
+
p.stderr.on('data', (d) => {
|
|
112
|
+
err += d;
|
|
113
|
+
});
|
|
114
|
+
p.on('error', reject);
|
|
115
|
+
p.on('close', (code) =>
|
|
116
|
+
code === 0
|
|
117
|
+
? resolve({ stdout: out, stderr: err })
|
|
118
|
+
: reject(new Error(`${path.basename(bin)} exited ${code}: ${err.slice(0, 500)}`)),
|
|
119
|
+
);
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Transcribe an audio file (any ffmpeg-readable format) to text.
|
|
125
|
+
*
|
|
126
|
+
* @param {string} inputPath - path to the recorded audio file.
|
|
127
|
+
* @param {{ lang?: string }} [opts]
|
|
128
|
+
* @returns {Promise<string>}
|
|
129
|
+
*/
|
|
130
|
+
export async function transcribe(inputPath, { lang = 'en' } = {}) {
|
|
131
|
+
const ffmpeg = resolveFfmpeg();
|
|
132
|
+
const whisper = resolveWhisperBin();
|
|
133
|
+
const model = resolveWhisperModel();
|
|
134
|
+
if (!ffmpeg) throw new Error('ffmpeg not found (brew install ffmpeg)');
|
|
135
|
+
if (!whisper) throw new Error('whisper-cli not found (brew install whisper-cpp)');
|
|
136
|
+
if (!model) throw new Error(`no whisper model found in ${MODELS_DIR}`);
|
|
137
|
+
|
|
138
|
+
const wav = path.join(
|
|
139
|
+
os.tmpdir(),
|
|
140
|
+
`cc-stt-${Date.now()}-${process.pid}.wav`,
|
|
141
|
+
);
|
|
142
|
+
try {
|
|
143
|
+
await run(ffmpeg, [
|
|
144
|
+
'-nostdin', '-y',
|
|
145
|
+
'-i', inputPath,
|
|
146
|
+
'-ar', '16000', '-ac', '1', '-c:a', 'pcm_s16le',
|
|
147
|
+
'-f', 'wav', wav,
|
|
148
|
+
]);
|
|
149
|
+
const { stdout } = await run(whisper, [
|
|
150
|
+
'-m', model, '-f', wav, '-np', '-nt', '-l', lang,
|
|
151
|
+
]);
|
|
152
|
+
return cleanTranscript(stdout);
|
|
153
|
+
} finally {
|
|
154
|
+
fs.promises.unlink(wav).catch(() => {});
|
|
155
|
+
}
|
|
156
|
+
}
|
package/lib/transcript.js
CHANGED
|
@@ -1,12 +1,41 @@
|
|
|
1
1
|
// lib/transcript.js — bounded transcript tailing for claude-cockpit.
|
|
2
2
|
// Resource doctrine: NEVER read a whole file. Initial load reads only the last
|
|
3
|
-
// min(size,
|
|
4
|
-
// Files can be 200 MB+; whole-file reads will blow RAM.
|
|
3
|
+
// min(size, TAIL_MAX_BYTES) bytes (tail), then watches and reads ONLY new bytes
|
|
4
|
+
// via offset. Files can be 200 MB+; whole-file reads will blow RAM.
|
|
5
5
|
|
|
6
6
|
import fs from 'node:fs';
|
|
7
7
|
import { EventEmitter } from 'node:events';
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
// Env lookup mirroring server.js: prefer CLAUDE_CONTROL_<X>, fall back to the
|
|
10
|
+
// legacy COCKPIT_<X> so existing launchers keep working after the rename.
|
|
11
|
+
function envInt(name) {
|
|
12
|
+
const raw =
|
|
13
|
+
process.env[`CLAUDE_CONTROL_${name}`] ?? process.env[`COCKPIT_${name}`];
|
|
14
|
+
if (raw == null) return null;
|
|
15
|
+
const n = Number(raw);
|
|
16
|
+
return Number.isFinite(n) && n > 0 ? n : null;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
// Initial-tail byte cap. A fresh subscribe reads only the last min(size,
|
|
20
|
+
// TAIL_MAX_BYTES) bytes of the JSONL (NEVER the whole file — transcripts reach
|
|
21
|
+
// 200 MB+). In busy sessions a single assistant turn can carry hundreds of KB
|
|
22
|
+
// of tool output, so the old 1 MB window held only a handful of messages and
|
|
23
|
+
// the user's own recent turns fell outside it — they vanished on reload.
|
|
24
|
+
//
|
|
25
|
+
// 8 MB is the balance point: at a few KB/record it yields several hundred to a
|
|
26
|
+
// few thousand messages (enough that the message-count cap, not bytes, governs
|
|
27
|
+
// what a fresh subscribe serves), while staying ~25x below the largest real
|
|
28
|
+
// files and bounded per open session. A phone renders a capped subset anyway.
|
|
29
|
+
// Override with CLAUDE_CONTROL_TAIL_BYTES (legacy: COCKPIT_TAIL_BYTES).
|
|
30
|
+
const TAIL_MAX_BYTES = envInt('TAIL_BYTES') ?? 8 * 1024 * 1024; // 8 MB
|
|
31
|
+
|
|
32
|
+
// Default message-count cap for the in-memory buffer. Raised 1500 → 4000 so a
|
|
33
|
+
// fresh subscribe serves deeper scrollback (within the 8 MB tail window the
|
|
34
|
+
// count cap, not the byte window, governs how much history is served). At a few
|
|
35
|
+
// hundred bytes/normalized message this is a few MB resident per open session,
|
|
36
|
+
// well within the server's RSS budget.
|
|
37
|
+
// Override with CLAUDE_CONTROL_MAX_BUFFER (legacy: COCKPIT_MAX_BUFFER).
|
|
38
|
+
const DEFAULT_MAX_BUFFER = envInt('MAX_BUFFER') ?? 4000;
|
|
10
39
|
|
|
11
40
|
// ---------------------------------------------------------------------------
|
|
12
41
|
// Internal helper: read the last `maxBytes` of a file without loading it all.
|
|
@@ -167,7 +196,7 @@ export class TranscriptTailer extends EventEmitter {
|
|
|
167
196
|
* @param {string} filePath
|
|
168
197
|
* @param {{ maxBuffer?: number, debounceMs?: number }} options
|
|
169
198
|
*/
|
|
170
|
-
constructor(filePath, { maxBuffer =
|
|
199
|
+
constructor(filePath, { maxBuffer = DEFAULT_MAX_BUFFER, debounceMs = 150 } = {}) {
|
|
171
200
|
super();
|
|
172
201
|
this._filePath = filePath;
|
|
173
202
|
this._maxBuffer = maxBuffer;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@idl3/claude-control",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.21",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Local web UI to watch and drive your Claude Code sessions running in tmux — live transcripts, reply, answer AskUserQuestion, attach files, from a browser or phone.",
|
|
6
6
|
"keywords": [
|
package/server.js
CHANGED
|
@@ -20,11 +20,15 @@ import { parsePanePrompt } from './lib/prompt.js';
|
|
|
20
20
|
import { SessionRegistry, listRecentTranscripts } from './lib/sessions.js';
|
|
21
21
|
import { loadPins, savePins, validateTranscriptPath, pinKey } from './lib/pins.js';
|
|
22
22
|
import { ResourceMonitor } from './lib/resources.js';
|
|
23
|
-
import { buildAnswerProgram } from './lib/answer.js';
|
|
23
|
+
import { buildAnswerProgram, parsePicker, planStep } from './lib/answer.js';
|
|
24
24
|
import { sweepUploads, resolveUploadPath } from './lib/uploads.js';
|
|
25
25
|
import { getVersionInfo, currentVersion } from './lib/version.js';
|
|
26
26
|
import * as push from './lib/push.js';
|
|
27
27
|
import { readConfig, writeConfig } from './lib/config.js';
|
|
28
|
+
import { optimizePrompt } from './lib/optimize.js';
|
|
29
|
+
import { complete as claudeCliComplete } from './lib/claude-cli.js';
|
|
30
|
+
import { transcribe } from './lib/transcribe.js';
|
|
31
|
+
import { listSkills } from './lib/skills.js';
|
|
28
32
|
// Note: the client offers [WS_PROTOCOL, token] as subprotocols; the `ws`
|
|
29
33
|
// library auto-selects the FIRST offered one (the non-secret WS_PROTOCOL label)
|
|
30
34
|
// and echoes it, so we never reflect the raw token back and need no custom
|
|
@@ -67,13 +71,21 @@ const CONFIG = {
|
|
|
67
71
|
// tripped "over limit" permanently. Override with CLAUDE_CONTROL_RSS_LIMIT_MB.
|
|
68
72
|
rssLimitMB: Number(env('RSS_LIMIT_MB')) || 768,
|
|
69
73
|
token: env('TOKEN') || readPersistedToken() || null,
|
|
70
|
-
|
|
74
|
+
// 4000: within lib/transcript's 8 MB byte tail, the message-count cap governs
|
|
75
|
+
// how much history a fresh subscribe serves. Raised 1500 → 4000 for deeper
|
|
76
|
+
// scrollback. Shares the CLAUDE_CONTROL_MAX_BUFFER override with lib/transcript.
|
|
77
|
+
maxBuffer: Number(env('MAX_BUFFER')) || 4000,
|
|
71
78
|
maxUploadMB: Number(env('MAX_UPLOAD_MB')) || 25,
|
|
72
79
|
uploadsDir:
|
|
73
80
|
env('UPLOADS') || path.join(os.homedir(), '.claude-control', 'uploads'),
|
|
74
81
|
uploadTtlHours: Number(env('UPLOAD_TTL_HOURS')) || 24,
|
|
75
82
|
pinsFile:
|
|
76
83
|
env('PINS') || path.join(os.homedir(), '.claude-control', 'pins.json'),
|
|
84
|
+
// Custom PWA home-screen icon (PNG). When present it overrides the bundled
|
|
85
|
+
// default robot logo for the manifest icons + apple-touch-icon. Uploaded via
|
|
86
|
+
// POST /api/icon, removed via DELETE /api/icon.
|
|
87
|
+
iconFile:
|
|
88
|
+
env('ICON') || path.join(os.homedir(), '.claude-control', 'icon.png'),
|
|
77
89
|
};
|
|
78
90
|
|
|
79
91
|
const MIME = {
|
|
@@ -164,6 +176,10 @@ const server = http.createServer((req, res) => {
|
|
|
164
176
|
if (!checkToken(req)) return endJson(res, 401, { error: 'unauthorized' });
|
|
165
177
|
return endJson(res, 200, { sessions: registry.getSessions() });
|
|
166
178
|
}
|
|
179
|
+
if (u.pathname === '/api/skills') {
|
|
180
|
+
if (!checkToken(req)) return endJson(res, 401, { error: 'unauthorized' });
|
|
181
|
+
return endJson(res, 200, { skills: listSkills() });
|
|
182
|
+
}
|
|
167
183
|
if (u.pathname === '/api/health') {
|
|
168
184
|
if (!checkToken(req)) return endJson(res, 401, { error: 'unauthorized' });
|
|
169
185
|
return endJson(res, 200, { ok: true, snapshot: resources.snapshot() });
|
|
@@ -234,6 +250,16 @@ const server = http.createServer((req, res) => {
|
|
|
234
250
|
if (req.method === 'POST') return handleConfigSave(req, res);
|
|
235
251
|
return endJson(res, 405, { error: 'method not allowed' });
|
|
236
252
|
}
|
|
253
|
+
if (u.pathname === '/api/optimize') {
|
|
254
|
+
if (req.method !== 'POST') return endJson(res, 405, { error: 'method not allowed' });
|
|
255
|
+
if (!checkToken(req)) return endJson(res, 401, { error: 'unauthorized' });
|
|
256
|
+
return handleOptimize(req, res);
|
|
257
|
+
}
|
|
258
|
+
if (u.pathname === '/api/transcribe') {
|
|
259
|
+
if (req.method !== 'POST') return endJson(res, 405, { error: 'method not allowed' });
|
|
260
|
+
if (!checkToken(req)) return endJson(res, 401, { error: 'unauthorized' });
|
|
261
|
+
return handleTranscribe(req, res, u);
|
|
262
|
+
}
|
|
237
263
|
if (u.pathname === '/api/session/new') {
|
|
238
264
|
if (req.method !== 'POST') return endJson(res, 405, { error: 'method not allowed' });
|
|
239
265
|
if (!checkToken(req)) return endJson(res, 401, { error: 'unauthorized' });
|
|
@@ -253,6 +279,22 @@ const server = http.createServer((req, res) => {
|
|
|
253
279
|
return handleServeUpload(req, res, u);
|
|
254
280
|
}
|
|
255
281
|
|
|
282
|
+
// PWA home-screen icon. GET is token-FREE: the OS fetches manifest icons and
|
|
283
|
+
// the apple-touch-icon with no Authorization header, so this surface must be
|
|
284
|
+
// open (it only ever returns an image). POST/DELETE (replace/reset the custom
|
|
285
|
+
// icon) are token-gated.
|
|
286
|
+
if (u.pathname === '/api/icon') {
|
|
287
|
+
if (req.method === 'POST') {
|
|
288
|
+
if (!checkToken(req)) return endJson(res, 401, { error: 'unauthorized' });
|
|
289
|
+
return handleIconUpload(req, res);
|
|
290
|
+
}
|
|
291
|
+
if (req.method === 'DELETE') {
|
|
292
|
+
if (!checkToken(req)) return endJson(res, 401, { error: 'unauthorized' });
|
|
293
|
+
return handleIconReset(res);
|
|
294
|
+
}
|
|
295
|
+
return handleServeIcon(res, u);
|
|
296
|
+
}
|
|
297
|
+
|
|
256
298
|
// Raw-terminal escape hatch: token-gated reverse proxy to an on-demand,
|
|
257
299
|
// loopback-bound ttyd attached to this session's tmux pane. ttyd itself runs
|
|
258
300
|
// with no auth; this branch (and the matching upgrade branch) is the gate.
|
|
@@ -438,6 +480,73 @@ async function handleConfigSave(req, res) {
|
|
|
438
480
|
}
|
|
439
481
|
}
|
|
440
482
|
|
|
483
|
+
// POST /api/optimize — token-gated prompt optimiser. Accepts { text, intent }
|
|
484
|
+
// and returns { optimized, rationale, changes, mode } from optimizePrompt.
|
|
485
|
+
// Falls back to rules-based optimization when the Claude CLI is unavailable.
|
|
486
|
+
async function handleOptimize(req, res) {
|
|
487
|
+
let body;
|
|
488
|
+
try {
|
|
489
|
+
body = await readJsonBody(req);
|
|
490
|
+
} catch (err) {
|
|
491
|
+
return endJson(res, 400, { error: 'invalid JSON body' });
|
|
492
|
+
}
|
|
493
|
+
const text = typeof body.text === 'string' ? body.text : '';
|
|
494
|
+
if (!text.trim()) return endJson(res, 400, { error: 'text required' });
|
|
495
|
+
if (text.length > 8000) return endJson(res, 400, { error: 'text exceeds 8000 character limit' });
|
|
496
|
+
const intent = typeof body.intent === 'string' ? body.intent : undefined;
|
|
497
|
+
try {
|
|
498
|
+
const result = await optimizePrompt(text, { complete: claudeCliComplete, intent });
|
|
499
|
+
return endJson(res, 200, result);
|
|
500
|
+
} catch (err) {
|
|
501
|
+
return endJson(res, 500, { error: String(err?.message || err) });
|
|
502
|
+
}
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
// POST /api/transcribe — local speech-to-text. Accepts a raw audio body (the
|
|
506
|
+
// MediaRecorder blob from the voice dialog; ?ext=webm|mp4|wav names the format),
|
|
507
|
+
// caps the size, writes it to a temp file, and runs ffmpeg→whisper.cpp via
|
|
508
|
+
// lib/transcribe. Returns { ok, text }. No key, no cloud — fully local.
|
|
509
|
+
function handleTranscribe(req, res, u) {
|
|
510
|
+
const maxBytes = CONFIG.maxUploadMB * 1024 * 1024;
|
|
511
|
+
const ext =
|
|
512
|
+
(u.searchParams.get('ext') || 'webm').toLowerCase().replace(/[^a-z0-9]/g, '').slice(0, 5) ||
|
|
513
|
+
'webm';
|
|
514
|
+
const chunks = [];
|
|
515
|
+
let size = 0;
|
|
516
|
+
let aborted = false;
|
|
517
|
+
|
|
518
|
+
req.on('data', (c) => {
|
|
519
|
+
if (aborted) return;
|
|
520
|
+
size += c.length;
|
|
521
|
+
if (size > maxBytes) {
|
|
522
|
+
aborted = true;
|
|
523
|
+
endJson(res, 413, { error: `audio exceeds ${CONFIG.maxUploadMB} MB limit` });
|
|
524
|
+
req.destroy();
|
|
525
|
+
return;
|
|
526
|
+
}
|
|
527
|
+
chunks.push(c);
|
|
528
|
+
});
|
|
529
|
+
|
|
530
|
+
req.on('end', async () => {
|
|
531
|
+
if (aborted) return;
|
|
532
|
+
if (size === 0) return endJson(res, 400, { error: 'empty audio' });
|
|
533
|
+
const tmp = path.join(os.tmpdir(), `cc-stt-in-${Date.now()}-${process.pid}.${ext}`);
|
|
534
|
+
try {
|
|
535
|
+
await fs.promises.writeFile(tmp, Buffer.concat(chunks), { mode: 0o600 });
|
|
536
|
+
const text = await transcribe(tmp);
|
|
537
|
+
endJson(res, 200, { ok: true, text });
|
|
538
|
+
} catch (err) {
|
|
539
|
+
endJson(res, 500, { error: String(err?.message || err) });
|
|
540
|
+
} finally {
|
|
541
|
+
fs.promises.unlink(tmp).catch(() => {});
|
|
542
|
+
}
|
|
543
|
+
});
|
|
544
|
+
|
|
545
|
+
req.on('error', () => {
|
|
546
|
+
if (!aborted) endJson(res, 400, { error: 'audio stream error' });
|
|
547
|
+
});
|
|
548
|
+
}
|
|
549
|
+
|
|
441
550
|
// POST /api/session/new — create a new tmux window in the configured (or
|
|
442
551
|
// body-overridden) cwd, then type the launch command into it via send-keys so
|
|
443
552
|
// the interactive shell resolves aliases. Security: the command is operator
|
|
@@ -594,6 +703,70 @@ function handleServeFile(res, u) {
|
|
|
594
703
|
});
|
|
595
704
|
}
|
|
596
705
|
|
|
706
|
+
// 8-byte PNG file signature.
|
|
707
|
+
const PNG_SIG = Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a]);
|
|
708
|
+
|
|
709
|
+
// GET /api/icon[?size=192|512] — serve the custom icon if one was uploaded,
|
|
710
|
+
// else the bundled default robot logo at the closest bundled size. Token-free
|
|
711
|
+
// (see the route guard) because the OS fetches it without auth headers.
|
|
712
|
+
function handleServeIcon(res, u) {
|
|
713
|
+
const size = Number(u.searchParams.get('size')) || 192;
|
|
714
|
+
const fallback = path.join(PUBLIC_DIR, size >= 512 ? 'icon-512.png' : 'icon-192.png');
|
|
715
|
+
const file = fs.existsSync(CONFIG.iconFile) ? CONFIG.iconFile : fallback;
|
|
716
|
+
fs.readFile(file, (err, data) => {
|
|
717
|
+
if (err) { res.writeHead(404); return res.end('not found'); }
|
|
718
|
+
res.writeHead(200, {
|
|
719
|
+
'content-type': 'image/png',
|
|
720
|
+
// The home-screen icon may change at runtime; never let the phone pin a
|
|
721
|
+
// stale one (it already re-reads the manifest on reinstall).
|
|
722
|
+
'cache-control': 'no-store, must-revalidate',
|
|
723
|
+
});
|
|
724
|
+
res.end(data);
|
|
725
|
+
});
|
|
726
|
+
}
|
|
727
|
+
|
|
728
|
+
// POST /api/icon — replace the custom home-screen icon with the raw PNG body.
|
|
729
|
+
// PNG-only (validated by signature) so handleServeIcon's image/png is honest.
|
|
730
|
+
function handleIconUpload(req, res) {
|
|
731
|
+
const maxBytes = 4 * 1024 * 1024;
|
|
732
|
+
const chunks = [];
|
|
733
|
+
let size = 0;
|
|
734
|
+
let aborted = false;
|
|
735
|
+
req.on('data', (c) => {
|
|
736
|
+
if (aborted) return;
|
|
737
|
+
size += c.length;
|
|
738
|
+
if (size > maxBytes) {
|
|
739
|
+
aborted = true;
|
|
740
|
+
endJson(res, 413, { error: 'icon exceeds 4 MB limit' });
|
|
741
|
+
req.destroy();
|
|
742
|
+
return;
|
|
743
|
+
}
|
|
744
|
+
chunks.push(c);
|
|
745
|
+
});
|
|
746
|
+
req.on('end', async () => {
|
|
747
|
+
if (aborted) return;
|
|
748
|
+
const buf = Buffer.concat(chunks);
|
|
749
|
+
if (buf.length < 8 || !buf.subarray(0, 8).equals(PNG_SIG)) {
|
|
750
|
+
return endJson(res, 400, { error: 'icon must be a PNG image' });
|
|
751
|
+
}
|
|
752
|
+
try {
|
|
753
|
+
await fs.promises.mkdir(path.dirname(CONFIG.iconFile), { recursive: true });
|
|
754
|
+
await fs.promises.writeFile(CONFIG.iconFile, buf, { mode: 0o600 });
|
|
755
|
+
endJson(res, 200, { ok: true, custom: true });
|
|
756
|
+
} catch (err) {
|
|
757
|
+
endJson(res, 500, { error: String(err?.message || err) });
|
|
758
|
+
}
|
|
759
|
+
});
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
// DELETE /api/icon — drop the custom icon, reverting to the bundled default.
|
|
763
|
+
function handleIconReset(res) {
|
|
764
|
+
fs.promises
|
|
765
|
+
.rm(CONFIG.iconFile, { force: true })
|
|
766
|
+
.then(() => endJson(res, 200, { ok: true, custom: false }))
|
|
767
|
+
.catch((err) => endJson(res, 500, { error: String(err?.message || err) }));
|
|
768
|
+
}
|
|
769
|
+
|
|
597
770
|
// Set or clear a manual transcript pin. Body: { id, transcriptPath }.
|
|
598
771
|
// transcriptPath null/empty clears the pin. The pin is keyed by the session's
|
|
599
772
|
// stable windowId.paneIndex so it survives tmux window renumbering.
|
|
@@ -956,25 +1129,212 @@ async function handleClientMessage(ws, msg) {
|
|
|
956
1129
|
if (msg.toolUseId !== pending.toolUseId) {
|
|
957
1130
|
throw new Error('stale question (already answered or changed)');
|
|
958
1131
|
}
|
|
959
|
-
|
|
960
|
-
//
|
|
961
|
-
//
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
1132
|
+
|
|
1133
|
+
// ── Capture-driven path ──────────────────────────────────────────────
|
|
1134
|
+
// Attempt to navigate by parsing the live picker render. Falls back to
|
|
1135
|
+
// the static buildAnswerProgram on ANY parse failure, unknown label, or
|
|
1136
|
+
// post-send verification mismatch — so it can NEVER regress the working path.
|
|
1137
|
+
//
|
|
1138
|
+
// Constants:
|
|
1139
|
+
const SETTLE_MS = 300; // ms to wait after sending keys before re-capture
|
|
1140
|
+
const MAX_RETRIES = 1; // retry attempts per question on verification failure
|
|
1141
|
+
|
|
1142
|
+
let usedDynamic = false;
|
|
1143
|
+
// Tracks whether the dynamic path has injected ANY keystroke. Once true,
|
|
1144
|
+
// the picker is in a partial/unknown state and the from-scratch static
|
|
1145
|
+
// fallback would corrupt it — so a later failure must fail loud, not retry.
|
|
1146
|
+
let sentAny = false;
|
|
965
1147
|
try {
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
1148
|
+
const questions = pending?.questions || [];
|
|
1149
|
+
const selections = msg.selections || [];
|
|
1150
|
+
|
|
1151
|
+
if (questions.length > 0) {
|
|
1152
|
+
let dynamicOk = true; // will be set false to fall back
|
|
1153
|
+
|
|
1154
|
+
for (let qi = 0; qi < questions.length && dynamicOk; qi += 1) {
|
|
1155
|
+
const question = questions[qi];
|
|
1156
|
+
const selectedLabels = selections[qi] || [];
|
|
1157
|
+
|
|
1158
|
+
let attempt = 0;
|
|
1159
|
+
let stepOk = false;
|
|
1160
|
+
|
|
1161
|
+
while (attempt <= MAX_RETRIES && !stepOk) {
|
|
1162
|
+
// 1. Capture current picker state.
|
|
1163
|
+
let capture;
|
|
1164
|
+
try {
|
|
1165
|
+
capture = await tmux.capturePane(session.target);
|
|
1166
|
+
} catch (captureErr) {
|
|
1167
|
+
console.log(`[answer/dynamic] capture failed q${qi}: ${captureErr?.message}`);
|
|
1168
|
+
dynamicOk = false;
|
|
1169
|
+
break;
|
|
1170
|
+
}
|
|
1171
|
+
|
|
1172
|
+
// 2. Parse.
|
|
1173
|
+
const parsed = parsePicker(capture);
|
|
1174
|
+
if (parsed.confidence !== 'ok') {
|
|
1175
|
+
console.log(`[answer/dynamic] low confidence on q${qi} — falling back`);
|
|
1176
|
+
dynamicOk = false;
|
|
1177
|
+
break;
|
|
1178
|
+
}
|
|
1179
|
+
|
|
1180
|
+
// 3. Handle the review screen (multi-question final step).
|
|
1181
|
+
if (parsed.isReview) {
|
|
1182
|
+
// We expect to be here only after the last question's action Enter.
|
|
1183
|
+
// Send Enter to confirm "Submit answers".
|
|
1184
|
+
console.log(`[answer/dynamic] review screen — sending Enter`);
|
|
1185
|
+
sentAny = true;
|
|
1186
|
+
await tmux.sendRawKeysSequenced(session.target, ['Enter'], SETTLE_MS);
|
|
1187
|
+
await new Promise((r) => setTimeout(r, SETTLE_MS));
|
|
1188
|
+
// Verify: the review screen should be gone.
|
|
1189
|
+
const afterReview = await tmux.capturePane(session.target);
|
|
1190
|
+
const reparse = parsePicker(afterReview);
|
|
1191
|
+
if (reparse.isReview) {
|
|
1192
|
+
console.log(`[answer/dynamic] review screen still up after Enter — falling back`);
|
|
1193
|
+
dynamicOk = false;
|
|
1194
|
+
}
|
|
1195
|
+
// Whether verified or not, we break out of the question loop —
|
|
1196
|
+
// we've processed all questions.
|
|
1197
|
+
break;
|
|
1198
|
+
}
|
|
1199
|
+
|
|
1200
|
+
// 4. Plan keystrokes for this question.
|
|
1201
|
+
const keys = planStep(parsed, question, selectedLabels);
|
|
1202
|
+
if (!keys) {
|
|
1203
|
+
console.log(`[answer/dynamic] planStep null on q${qi} — falling back`);
|
|
1204
|
+
dynamicOk = false;
|
|
1205
|
+
break;
|
|
1206
|
+
}
|
|
1207
|
+
|
|
1208
|
+
console.log(
|
|
1209
|
+
`[answer/dynamic] q${qi} attempt=${attempt} keys=${JSON.stringify(keys)}`,
|
|
1210
|
+
);
|
|
1211
|
+
|
|
1212
|
+
// 5. Send keys.
|
|
1213
|
+
sentAny = true;
|
|
1214
|
+
await tmux.sendRawKeysSequenced(session.target, keys, SETTLE_MS);
|
|
1215
|
+
|
|
1216
|
+
// 6. Settle then verify.
|
|
1217
|
+
await new Promise((r) => setTimeout(r, SETTLE_MS));
|
|
1218
|
+
let afterCapture;
|
|
1219
|
+
try {
|
|
1220
|
+
afterCapture = await tmux.capturePane(session.target);
|
|
1221
|
+
} catch (captureErr) {
|
|
1222
|
+
console.log(`[answer/dynamic] post-send capture failed q${qi}: ${captureErr?.message}`);
|
|
1223
|
+
dynamicOk = false;
|
|
1224
|
+
break;
|
|
1225
|
+
}
|
|
1226
|
+
|
|
1227
|
+
const afterParsed = parsePicker(afterCapture);
|
|
1228
|
+
|
|
1229
|
+
if (question.multiSelect) {
|
|
1230
|
+
// Verify: all intended labels are now checked in the re-parsed picker.
|
|
1231
|
+
// If we advanced (Next/Submit pressed), the screen changes — that's
|
|
1232
|
+
// also acceptable (confidence goes low = we moved on).
|
|
1233
|
+
if (afterParsed.confidence === 'ok' && !afterParsed.isReview) {
|
|
1234
|
+
const uncheckedTargets = selectedLabels.filter((label) =>
|
|
1235
|
+
afterParsed.rows.some(
|
|
1236
|
+
(r) => r.kind === 'option' && r.label === label && !r.checked,
|
|
1237
|
+
),
|
|
1238
|
+
);
|
|
1239
|
+
if (uncheckedTargets.length > 0) {
|
|
1240
|
+
console.log(
|
|
1241
|
+
`[answer/dynamic] verify failed q${qi}: still unchecked=${JSON.stringify(uncheckedTargets)} attempt=${attempt}`,
|
|
1242
|
+
);
|
|
1243
|
+
attempt += 1;
|
|
1244
|
+
continue; // retry
|
|
1245
|
+
}
|
|
1246
|
+
}
|
|
1247
|
+
// Either confidence is low (screen advanced) or all checked — either
|
|
1248
|
+
// way, treat the step as done and move to the next question.
|
|
1249
|
+
stepOk = true;
|
|
1250
|
+
} else {
|
|
1251
|
+
// Single-select: after Enter, picker should advance (screen changes).
|
|
1252
|
+
// If the exact same option is still shown as selected (cursor on it),
|
|
1253
|
+
// something went wrong. Accept any screen change as advancement.
|
|
1254
|
+
if (
|
|
1255
|
+
afterParsed.confidence === 'ok' &&
|
|
1256
|
+
!afterParsed.isReview &&
|
|
1257
|
+
afterParsed.rows.some(
|
|
1258
|
+
(r) => r.cursor && r.kind === 'option' && r.label === selectedLabels[0],
|
|
1259
|
+
)
|
|
1260
|
+
) {
|
|
1261
|
+
console.log(`[answer/dynamic] single-select stuck on q${qi} attempt=${attempt}`);
|
|
1262
|
+
attempt += 1;
|
|
1263
|
+
continue;
|
|
1264
|
+
}
|
|
1265
|
+
stepOk = true;
|
|
1266
|
+
}
|
|
1267
|
+
}
|
|
1268
|
+
|
|
1269
|
+
if (!stepOk && attempt > MAX_RETRIES) {
|
|
1270
|
+
console.log(`[answer/dynamic] max retries exceeded on q${qi} — falling back`);
|
|
1271
|
+
dynamicOk = false;
|
|
1272
|
+
}
|
|
1273
|
+
}
|
|
1274
|
+
|
|
1275
|
+
// After processing all questions via dynamic path, check if we need to
|
|
1276
|
+
// handle the review screen (multi-question pickers).
|
|
1277
|
+
if (dynamicOk && questions.length > 1) {
|
|
1278
|
+
// Capture and check: we may already be on the review screen (handled
|
|
1279
|
+
// in the loop above) or may need to check.
|
|
1280
|
+
try {
|
|
1281
|
+
const finalCapture = await tmux.capturePane(session.target);
|
|
1282
|
+
const finalParsed = parsePicker(finalCapture);
|
|
1283
|
+
if (finalParsed.isReview && finalParsed.confidence === 'ok') {
|
|
1284
|
+
// Submit the review screen.
|
|
1285
|
+
console.log(`[answer/dynamic] post-loop review screen — sending Enter`);
|
|
1286
|
+
sentAny = true;
|
|
1287
|
+
await tmux.sendRawKeysSequenced(session.target, ['Enter'], SETTLE_MS);
|
|
1288
|
+
}
|
|
1289
|
+
} catch (captureErr) {
|
|
1290
|
+
// Non-fatal: we already sent the question answers; review Enter is best-effort.
|
|
1291
|
+
console.log(`[answer/dynamic] final review capture failed: ${captureErr?.message}`);
|
|
1292
|
+
}
|
|
1293
|
+
}
|
|
1294
|
+
|
|
1295
|
+
if (dynamicOk) {
|
|
1296
|
+
usedDynamic = true;
|
|
1297
|
+
}
|
|
1298
|
+
}
|
|
1299
|
+
} catch (dynamicErr) {
|
|
1300
|
+
// Any unexpected error in the dynamic path — log and fall back.
|
|
1301
|
+
console.log(`[answer/dynamic] unexpected error: ${dynamicErr?.message} — falling back`);
|
|
1302
|
+
}
|
|
1303
|
+
|
|
1304
|
+
// ── Static fallback ──────────────────────────────────────────────────
|
|
1305
|
+
// Only safe when the dynamic path sent NOTHING (picker still pristine). If
|
|
1306
|
+
// dynamic already injected keys then failed, the picker is in a partial
|
|
1307
|
+
// state — replaying the from-scratch static program would mis-navigate a
|
|
1308
|
+
// dirty picker and corrupt the answer. Fail loud so the user can retry.
|
|
1309
|
+
if (!usedDynamic && sentAny) {
|
|
972
1310
|
console.error(
|
|
973
|
-
`[answer]
|
|
1311
|
+
`[answer] dynamic path failed AFTER sending keys; NOT running static fallback (picker dirty) toolUseId=${msg.toolUseId}`,
|
|
974
1312
|
);
|
|
975
|
-
|
|
1313
|
+
return send(ws, {
|
|
1314
|
+
type: 'ack',
|
|
1315
|
+
op: 'answer',
|
|
1316
|
+
ok: false,
|
|
1317
|
+
error: 'answer injection failed mid-picker — please retry',
|
|
1318
|
+
});
|
|
976
1319
|
}
|
|
977
|
-
|
|
1320
|
+
if (!usedDynamic) {
|
|
1321
|
+
const keys = buildAnswerProgram(pending, msg.selections || []);
|
|
1322
|
+
console.log(
|
|
1323
|
+
`[answer] toolUseId=${msg.toolUseId} target=${session.target} keys=${JSON.stringify(keys)} (static fallback)`,
|
|
1324
|
+
);
|
|
1325
|
+
try {
|
|
1326
|
+
await tmux.sendRawKeysSequenced(session.target, keys);
|
|
1327
|
+
} catch (err) {
|
|
1328
|
+
console.error(
|
|
1329
|
+
`[answer] FAILED toolUseId=${msg.toolUseId} target=${session.target}: ${String(err?.message || err)}`,
|
|
1330
|
+
);
|
|
1331
|
+
throw err;
|
|
1332
|
+
}
|
|
1333
|
+
console.log(`[answer] sent toolUseId=${msg.toolUseId} (${keys.length} keys)`);
|
|
1334
|
+
} else {
|
|
1335
|
+
console.log(`[answer] sent toolUseId=${msg.toolUseId} via dynamic path`);
|
|
1336
|
+
}
|
|
1337
|
+
|
|
978
1338
|
return send(ws, { type: 'ack', op: 'answer', ok: true });
|
|
979
1339
|
}
|
|
980
1340
|
case 'capture': {
|