mod8-cli 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +87 -0
- package/LICENSE +21 -0
- package/README.md +239 -0
- package/bin/mod8.js +2 -0
- package/dist/cli.js +302 -0
- package/dist/commands/addProvider.js +105 -0
- package/dist/commands/all.js +158 -0
- package/dist/commands/chat.js +855 -0
- package/dist/commands/config.js +29 -0
- package/dist/commands/devAuthStatus.js +34 -0
- package/dist/commands/devHostAsk.js +51 -0
- package/dist/commands/devHostSystem.js +15 -0
- package/dist/commands/devResolve.js +54 -0
- package/dist/commands/devSimulate.js +235 -0
- package/dist/commands/devWorkAsk.js +55 -0
- package/dist/commands/intentRouting.js +280 -0
- package/dist/commands/keys.js +55 -0
- package/dist/commands/list.js +27 -0
- package/dist/commands/login.js +147 -0
- package/dist/commands/logout.js +17 -0
- package/dist/commands/prompt.js +63 -0
- package/dist/commands/providers.js +30 -0
- package/dist/commands/verify.js +5 -0
- package/dist/input/compose.js +37 -0
- package/dist/input/files.js +49 -0
- package/dist/input/stdin.js +14 -0
- package/dist/providers/anthropic.js +115 -0
- package/dist/providers/displayName.js +25 -0
- package/dist/providers/errorHints.js +175 -0
- package/dist/providers/generic.js +331 -0
- package/dist/providers/genericChat.js +265 -0
- package/dist/providers/google.js +63 -0
- package/dist/providers/hostSystem.js +173 -0
- package/dist/providers/index.js +38 -0
- package/dist/providers/mock.js +87 -0
- package/dist/providers/modelResolution.js +42 -0
- package/dist/providers/openai.js +75 -0
- package/dist/providers/pricing.js +47 -0
- package/dist/providers/proxy.js +148 -0
- package/dist/providers/registry.js +196 -0
- package/dist/providers/types.js +1 -0
- package/dist/providers/workSystem.js +33 -0
- package/dist/storage/auth.js +65 -0
- package/dist/storage/config.js +35 -0
- package/dist/storage/keys.js +59 -0
- package/dist/storage/providers.js +337 -0
- package/dist/storage/sessions.js +150 -0
- package/dist/types.js +9 -0
- package/dist/util/debug.js +79 -0
- package/dist/util/errors.js +157 -0
- package/dist/util/prompt.js +111 -0
- package/dist/util/secrets.js +110 -0
- package/dist/util/text.js +53 -0
- package/dist/util/time.js +25 -0
- package/dist/verify/runner.js +437 -0
- package/package.json +69 -0
- package/specs/all-mode.yaml +44 -0
- package/specs/behavior/auto-fallback.yaml +49 -0
- package/specs/behavior/bare-name-routing.yaml +223 -0
- package/specs/behavior/bare-paste-confirm.yaml +125 -0
- package/specs/behavior/env-var-respected.yaml +108 -0
- package/specs/behavior/error-fidelity.yaml +92 -0
- package/specs/behavior/error-hints.yaml +160 -0
- package/specs/behavior/fresh-vs-resume.yaml +94 -0
- package/specs/behavior/fuzzy-match.yaml +208 -0
- package/specs/behavior/host-self-knowledge-fresh.yaml +66 -0
- package/specs/behavior/intent-no-mismatch.yaml +115 -0
- package/specs/behavior/login-logout.yaml +97 -0
- package/specs/behavior/no-model-allowlist.yaml +80 -0
- package/specs/behavior/paste-key.yaml +342 -0
- package/specs/behavior/provider-switching.yaml +186 -0
- package/specs/behavior/providers-json-respected.yaml +106 -0
- package/specs/behavior/self-knowledge.yaml +119 -0
- package/specs/behavior/stress-session.yaml +226 -0
- package/specs/behavior/switch-back-when-failing.yaml +90 -0
- package/specs/behavior/work-character.yaml +109 -0
- package/specs/chat-meta.yaml +349 -0
- package/specs/chat-startup.yaml +148 -0
- package/specs/chat.yaml +91 -0
- package/specs/config.yaml +42 -0
- package/specs/install.yaml +112 -0
- package/specs/keys.yaml +81 -0
- package/specs/one-shot.yaml +65 -0
- package/specs/pipe-and-files.yaml +40 -0
- package/specs/providers.yaml +172 -0
- package/specs/sessions.yaml +115 -0
|
@@ -0,0 +1,437 @@
|
|
|
1
|
+
import { spawn } from 'child_process';
|
|
2
|
+
import { promises as fs } from 'fs';
|
|
3
|
+
import { join, dirname, resolve } from 'path';
|
|
4
|
+
import { tmpdir } from 'os';
|
|
5
|
+
import { randomBytes } from 'crypto';
|
|
6
|
+
import { fileURLToPath } from 'url';
|
|
7
|
+
import yaml from 'js-yaml';
|
|
8
|
+
import chalk from 'chalk';
|
|
9
|
+
// Resolve mod8 root from this compiled file's location (dist/verify/runner.js → mod8)
|
|
10
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
11
|
+
const __dirname = dirname(__filename);
|
|
12
|
+
const MOD8_ROOT = resolve(__dirname, '..', '..');
|
|
13
|
+
const MOD8_BIN_TARGET = join(MOD8_ROOT, 'bin', 'mod8.js');
|
|
14
|
+
const ANSI_RE = /\x1B\[[0-9;]*[a-zA-Z]/g;
|
|
15
|
+
const stripAnsi = (s) => s.replace(ANSI_RE, '');
|
|
16
|
+
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
|
|
17
|
+
async function setupSandbox() {
|
|
18
|
+
const id = randomBytes(4).toString('hex');
|
|
19
|
+
const dir = join(tmpdir(), `mod8-verify-${id}`);
|
|
20
|
+
await fs.mkdir(dir, { recursive: true, mode: 0o700 });
|
|
21
|
+
const binDir = join(dir, 'bin');
|
|
22
|
+
await fs.mkdir(binDir, { recursive: true });
|
|
23
|
+
await fs.symlink(MOD8_BIN_TARGET, join(binDir, 'mod8'));
|
|
24
|
+
const env = {
|
|
25
|
+
...process.env,
|
|
26
|
+
MOD8_CONFIG_DIR: dir,
|
|
27
|
+
PATH: `${binDir}:${process.env.PATH ?? ''}`,
|
|
28
|
+
};
|
|
29
|
+
// API key: explicit verify key only. Strip the user's real key so
|
|
30
|
+
// tests can never accidentally hit prod credentials.
|
|
31
|
+
delete env.ANTHROPIC_API_KEY;
|
|
32
|
+
delete env.OPENAI_API_KEY;
|
|
33
|
+
delete env.GOOGLE_API_KEY;
|
|
34
|
+
delete env.GEMINI_API_KEY;
|
|
35
|
+
if (process.env.MOD8_VERIFY_KEY) {
|
|
36
|
+
env.ANTHROPIC_API_KEY = process.env.MOD8_VERIFY_KEY;
|
|
37
|
+
}
|
|
38
|
+
// Ensure consent gate doesn't block --all tests.
|
|
39
|
+
env.MOD8_AUTO_CONFIRM = '1';
|
|
40
|
+
return { dir, binDir, env };
|
|
41
|
+
}
|
|
42
|
+
async function teardownSandbox(sandbox) {
|
|
43
|
+
await fs.rm(sandbox.dir, { recursive: true, force: true });
|
|
44
|
+
}
|
|
45
|
+
function runShell(command, stdin, env, timeoutMs = 15000) {
|
|
46
|
+
return new Promise((resolveP, rejectP) => {
|
|
47
|
+
const child = spawn('bash', ['-c', command], { env });
|
|
48
|
+
let stdout = '';
|
|
49
|
+
let stderr = '';
|
|
50
|
+
const timer = setTimeout(() => child.kill('SIGKILL'), timeoutMs);
|
|
51
|
+
child.stdout.on('data', (d) => {
|
|
52
|
+
stdout += d.toString();
|
|
53
|
+
});
|
|
54
|
+
child.stderr.on('data', (d) => {
|
|
55
|
+
stderr += d.toString();
|
|
56
|
+
});
|
|
57
|
+
child.on('error', (err) => {
|
|
58
|
+
clearTimeout(timer);
|
|
59
|
+
rejectP(err);
|
|
60
|
+
});
|
|
61
|
+
child.on('close', (code) => {
|
|
62
|
+
clearTimeout(timer);
|
|
63
|
+
resolveP({ stdout, stderr, exitCode: code ?? 0 });
|
|
64
|
+
});
|
|
65
|
+
if (stdin !== undefined)
|
|
66
|
+
child.stdin.write(stdin);
|
|
67
|
+
child.stdin.end();
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
async function runRepl(command, inputs, env, timeoutMs) {
|
|
71
|
+
// Spawn directly via bash; stdin is a regular pipe. Ink-driven REPLs that
|
|
72
|
+
// require raw-mode TTY will crash on entry — that's expected, the runner just
|
|
73
|
+
// captures whatever stdout/stderr was emitted before the crash + SIGKILL on
|
|
74
|
+
// timeout, so spec assertions can still match against the early output.
|
|
75
|
+
const child = spawn('bash', ['-c', command], {
|
|
76
|
+
env,
|
|
77
|
+
detached: true, // own process group so we can SIGKILL grandchildren too
|
|
78
|
+
});
|
|
79
|
+
let stdout = '';
|
|
80
|
+
let stderr = '';
|
|
81
|
+
let exited = false;
|
|
82
|
+
let resolveClose;
|
|
83
|
+
const closePromise = new Promise((res) => {
|
|
84
|
+
resolveClose = res;
|
|
85
|
+
});
|
|
86
|
+
const killEverything = () => {
|
|
87
|
+
try {
|
|
88
|
+
if (child.pid)
|
|
89
|
+
process.kill(-child.pid, 'SIGKILL');
|
|
90
|
+
}
|
|
91
|
+
catch {
|
|
92
|
+
try {
|
|
93
|
+
child.kill('SIGKILL');
|
|
94
|
+
}
|
|
95
|
+
catch {
|
|
96
|
+
/* already gone */
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
try {
|
|
100
|
+
child.stdout?.destroy();
|
|
101
|
+
}
|
|
102
|
+
catch { /* ignore */ }
|
|
103
|
+
try {
|
|
104
|
+
child.stderr?.destroy();
|
|
105
|
+
}
|
|
106
|
+
catch { /* ignore */ }
|
|
107
|
+
};
|
|
108
|
+
const timer = setTimeout(() => {
|
|
109
|
+
killEverything();
|
|
110
|
+
// Fallback: even if 'close' never fires (orphaned grandchild holding
|
|
111
|
+
// pipes open elsewhere), resolve with -1 so the runner can move on.
|
|
112
|
+
setTimeout(() => resolveClose(-1), 500);
|
|
113
|
+
}, timeoutMs);
|
|
114
|
+
child.stdout.on('data', (d) => { stdout += d.toString(); });
|
|
115
|
+
child.stderr.on('data', (d) => { stderr += d.toString(); });
|
|
116
|
+
child.on('error', () => { });
|
|
117
|
+
child.stdin.on('error', () => { });
|
|
118
|
+
child.on('exit', () => { exited = true; });
|
|
119
|
+
child.on('close', (code) => resolveClose(code ?? 0));
|
|
120
|
+
await sleep(300);
|
|
121
|
+
for (const step of inputs) {
|
|
122
|
+
if (exited)
|
|
123
|
+
break;
|
|
124
|
+
if (step.send !== undefined) {
|
|
125
|
+
try {
|
|
126
|
+
child.stdin.write(step.send);
|
|
127
|
+
}
|
|
128
|
+
catch { /* ignore — child gone */ }
|
|
129
|
+
}
|
|
130
|
+
if (step.delay_ms !== undefined) {
|
|
131
|
+
await sleep(step.delay_ms);
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
try {
|
|
135
|
+
child.stdin.end();
|
|
136
|
+
}
|
|
137
|
+
catch { /* ignore */ }
|
|
138
|
+
const exitCode = await closePromise;
|
|
139
|
+
clearTimeout(timer);
|
|
140
|
+
// Best-effort cleanup if we returned via the fallback timer.
|
|
141
|
+
if (exitCode === -1)
|
|
142
|
+
killEverything();
|
|
143
|
+
return { stdout, stderr, exitCode };
|
|
144
|
+
}
|
|
145
|
+
function asArray(v) {
|
|
146
|
+
if (v === undefined)
|
|
147
|
+
return [];
|
|
148
|
+
return Array.isArray(v) ? v : [v];
|
|
149
|
+
}
|
|
150
|
+
async function matchExpect(expect, result, env) {
|
|
151
|
+
if (!expect)
|
|
152
|
+
return { ok: true };
|
|
153
|
+
const cleanStdout = stripAnsi(result.stdout);
|
|
154
|
+
const cleanStderr = stripAnsi(result.stderr);
|
|
155
|
+
for (const needle of asArray(expect.stdout_contains)) {
|
|
156
|
+
if (!cleanStdout.includes(needle)) {
|
|
157
|
+
return {
|
|
158
|
+
ok: false,
|
|
159
|
+
reason: `stdout missing ${JSON.stringify(needle)}\n--- actual stdout ---\n${cleanStdout || '(empty)'}\n--- actual stderr ---\n${cleanStderr || '(empty)'}`,
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
for (const pattern of asArray(expect.stdout_matches)) {
|
|
164
|
+
const re = compileRegex(pattern);
|
|
165
|
+
if (!re.test(cleanStdout)) {
|
|
166
|
+
return {
|
|
167
|
+
ok: false,
|
|
168
|
+
reason: `stdout doesn't match /${pattern}/\n--- actual stdout ---\n${cleanStdout || '(empty)'}`,
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
for (const needle of asArray(expect.stdout_omits)) {
|
|
173
|
+
if (cleanStdout.includes(needle)) {
|
|
174
|
+
return {
|
|
175
|
+
ok: false,
|
|
176
|
+
reason: `stdout should NOT contain ${JSON.stringify(needle)}\n--- actual stdout ---\n${cleanStdout}`,
|
|
177
|
+
};
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
for (const needle of asArray(expect.stderr_contains)) {
|
|
181
|
+
if (!cleanStderr.includes(needle)) {
|
|
182
|
+
return {
|
|
183
|
+
ok: false,
|
|
184
|
+
reason: `stderr missing ${JSON.stringify(needle)}\n--- actual stderr ---\n${cleanStderr || '(empty)'}`,
|
|
185
|
+
};
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
if (expect.exit_code !== undefined && result.exitCode !== expect.exit_code) {
|
|
189
|
+
return {
|
|
190
|
+
ok: false,
|
|
191
|
+
reason: `expected exit code ${expect.exit_code}, got ${result.exitCode}\n--- stdout ---\n${cleanStdout}\n--- stderr ---\n${cleanStderr}`,
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
if (expect.file_exists !== undefined) {
|
|
195
|
+
const path = expandEnv(expect.file_exists, env);
|
|
196
|
+
try {
|
|
197
|
+
await fs.access(path);
|
|
198
|
+
}
|
|
199
|
+
catch {
|
|
200
|
+
return { ok: false, reason: `file does not exist: ${path}` };
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
if (expect.file_mode !== undefined) {
|
|
204
|
+
const [pathRaw, expectedMode] = expect.file_mode.split(':');
|
|
205
|
+
const path = expandEnv(pathRaw, env);
|
|
206
|
+
try {
|
|
207
|
+
const stat = await fs.stat(path);
|
|
208
|
+
const mode = (stat.mode & 0o777).toString(8);
|
|
209
|
+
if (mode !== expectedMode) {
|
|
210
|
+
return { ok: false, reason: `file ${path} mode is ${mode}, expected ${expectedMode}` };
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
catch (err) {
|
|
214
|
+
return { ok: false, reason: `cannot stat ${path}: ${err.message}` };
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
return { ok: true };
|
|
218
|
+
}
|
|
219
|
+
/**
|
|
220
|
+
* Collect all *.yaml files under a directory, recursively. Returns paths
|
|
221
|
+
* relative to the root so display lines stay short ("behavior/foo.yaml"
|
|
222
|
+
* rather than the full sandbox path).
|
|
223
|
+
*/
|
|
224
|
+
async function collectSpecFiles(root, relBase = '') {
|
|
225
|
+
const out = [];
|
|
226
|
+
const entries = await fs.readdir(root, { withFileTypes: true });
|
|
227
|
+
for (const e of entries) {
|
|
228
|
+
const rel = relBase ? `${relBase}/${e.name}` : e.name;
|
|
229
|
+
if (e.isDirectory()) {
|
|
230
|
+
out.push(...(await collectSpecFiles(join(root, e.name), rel)));
|
|
231
|
+
}
|
|
232
|
+
else if (e.isFile() && e.name.endsWith('.yaml')) {
|
|
233
|
+
out.push(rel);
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
return out;
|
|
237
|
+
}
|
|
238
|
+
/**
|
|
239
|
+
* Compile a regex from a spec string. Supports a leading `(?<flags>)` prefix
|
|
240
|
+
* (e.g. `(?i)…` for case-insensitive) — JS's RegExp doesn't accept inline
|
|
241
|
+
* flags by default, so we strip them and pass to the constructor.
|
|
242
|
+
*/
|
|
243
|
+
function compileRegex(pattern) {
|
|
244
|
+
const m = pattern.match(/^\(\?([imsuy]+)\)([\s\S]*)$/);
|
|
245
|
+
if (m)
|
|
246
|
+
return new RegExp(m[2], m[1]);
|
|
247
|
+
return new RegExp(pattern);
|
|
248
|
+
}
|
|
249
|
+
function expandEnv(s, env) {
|
|
250
|
+
return s.replace(/\$([A-Z_][A-Z0-9_]*)/g, (_, name) => env[name] ?? '');
|
|
251
|
+
}
|
|
252
|
+
async function runTest(test, spec) {
|
|
253
|
+
const start = Date.now();
|
|
254
|
+
const requiresKey = test.requires_api_key ?? spec.requires_api_key ?? false;
|
|
255
|
+
if (requiresKey && !process.env.MOD8_VERIFY_KEY) {
|
|
256
|
+
return {
|
|
257
|
+
name: test.name,
|
|
258
|
+
status: 'skipped',
|
|
259
|
+
durationMs: 0,
|
|
260
|
+
reason: 'no MOD8_VERIFY_KEY',
|
|
261
|
+
};
|
|
262
|
+
}
|
|
263
|
+
let sandbox = null;
|
|
264
|
+
try {
|
|
265
|
+
sandbox = await setupSandbox();
|
|
266
|
+
if (test.setup) {
|
|
267
|
+
for (let i = 0; i < test.setup.length; i++) {
|
|
268
|
+
const step = test.setup[i];
|
|
269
|
+
const cmd = step.run ?? step.shell;
|
|
270
|
+
if (!cmd) {
|
|
271
|
+
return {
|
|
272
|
+
name: test.name,
|
|
273
|
+
status: 'fail',
|
|
274
|
+
durationMs: Date.now() - start,
|
|
275
|
+
reason: `setup step ${i + 1}: missing 'run' or 'shell'`,
|
|
276
|
+
};
|
|
277
|
+
}
|
|
278
|
+
const result = await runShell(cmd, step.stdin, sandbox.env);
|
|
279
|
+
if (result.exitCode !== 0) {
|
|
280
|
+
return {
|
|
281
|
+
name: test.name,
|
|
282
|
+
status: 'fail',
|
|
283
|
+
durationMs: Date.now() - start,
|
|
284
|
+
reason: `setup step ${i + 1} (${cmd}) exited ${result.exitCode}\n--- stdout ---\n${stripAnsi(result.stdout)}\n--- stderr ---\n${stripAnsi(result.stderr)}`,
|
|
285
|
+
};
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
if (test.repl) {
|
|
290
|
+
const result = await runRepl(test.repl.run, test.repl.inputs ?? [], sandbox.env, test.repl.timeout_ms ?? 30000);
|
|
291
|
+
const match = await matchExpect(test.expect, result, sandbox.env);
|
|
292
|
+
return {
|
|
293
|
+
name: test.name,
|
|
294
|
+
status: match.ok ? 'pass' : 'fail',
|
|
295
|
+
durationMs: Date.now() - start,
|
|
296
|
+
reason: match.ok ? undefined : match.reason,
|
|
297
|
+
};
|
|
298
|
+
}
|
|
299
|
+
if (test.steps) {
|
|
300
|
+
for (let i = 0; i < test.steps.length; i++) {
|
|
301
|
+
const step = test.steps[i];
|
|
302
|
+
const cmd = step.run ?? step.shell;
|
|
303
|
+
if (!cmd) {
|
|
304
|
+
return {
|
|
305
|
+
name: test.name,
|
|
306
|
+
status: 'fail',
|
|
307
|
+
durationMs: Date.now() - start,
|
|
308
|
+
reason: `step ${i + 1}: missing 'run' or 'shell'`,
|
|
309
|
+
};
|
|
310
|
+
}
|
|
311
|
+
const result = await runShell(cmd, step.stdin, sandbox.env);
|
|
312
|
+
const match = await matchExpect(step.expect, result, sandbox.env);
|
|
313
|
+
if (!match.ok) {
|
|
314
|
+
return {
|
|
315
|
+
name: test.name,
|
|
316
|
+
status: 'fail',
|
|
317
|
+
durationMs: Date.now() - start,
|
|
318
|
+
reason: `step ${i + 1} (${cmd}): ${match.reason}`,
|
|
319
|
+
};
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
return { name: test.name, status: 'pass', durationMs: Date.now() - start };
|
|
323
|
+
}
|
|
324
|
+
if (test.run || test.shell) {
|
|
325
|
+
const cmd = test.run ?? test.shell;
|
|
326
|
+
const result = await runShell(cmd, test.stdin, sandbox.env);
|
|
327
|
+
const match = await matchExpect(test.expect, result, sandbox.env);
|
|
328
|
+
return {
|
|
329
|
+
name: test.name,
|
|
330
|
+
status: match.ok ? 'pass' : 'fail',
|
|
331
|
+
durationMs: Date.now() - start,
|
|
332
|
+
reason: match.ok ? undefined : match.reason,
|
|
333
|
+
};
|
|
334
|
+
}
|
|
335
|
+
return {
|
|
336
|
+
name: test.name,
|
|
337
|
+
status: 'fail',
|
|
338
|
+
durationMs: Date.now() - start,
|
|
339
|
+
reason: 'test has no run/shell/steps/repl',
|
|
340
|
+
};
|
|
341
|
+
}
|
|
342
|
+
catch (err) {
|
|
343
|
+
return {
|
|
344
|
+
name: test.name,
|
|
345
|
+
status: 'fail',
|
|
346
|
+
durationMs: Date.now() - start,
|
|
347
|
+
reason: `runner error: ${err.message}`,
|
|
348
|
+
};
|
|
349
|
+
}
|
|
350
|
+
finally {
|
|
351
|
+
if (sandbox)
|
|
352
|
+
await teardownSandbox(sandbox);
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
function printResult(result) {
|
|
356
|
+
const dur = `${(result.durationMs / 1000).toFixed(2)}s`;
|
|
357
|
+
if (result.status === 'pass') {
|
|
358
|
+
console.log(` ${chalk.green('✓')} ${result.name} ${chalk.dim(dur)}`);
|
|
359
|
+
}
|
|
360
|
+
else if (result.status === 'skipped') {
|
|
361
|
+
console.log(` ${chalk.yellow('⊘')} ${chalk.dim(result.name)} ${chalk.dim(`(skipped — ${result.reason})`)}`);
|
|
362
|
+
}
|
|
363
|
+
else {
|
|
364
|
+
console.log(` ${chalk.red('✗')} ${result.name} ${chalk.dim(dur)}`);
|
|
365
|
+
const indented = (result.reason ?? '').split('\n').map((l) => ' ' + l).join('\n');
|
|
366
|
+
console.log(chalk.red(indented));
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
export async function runVerify(opts = {}) {
|
|
370
|
+
const specsDir = opts.specsDir ?? join(MOD8_ROOT, 'specs');
|
|
371
|
+
const start = Date.now();
|
|
372
|
+
let files;
|
|
373
|
+
try {
|
|
374
|
+
files = (await collectSpecFiles(specsDir)).sort();
|
|
375
|
+
}
|
|
376
|
+
catch {
|
|
377
|
+
console.error(chalk.red(`mod8 verify: no specs/ directory found at ${specsDir}`));
|
|
378
|
+
return { pass: 0, fail: 0, skipped: 0, durationMs: 0, specs: [] };
|
|
379
|
+
}
|
|
380
|
+
if (files.length === 0) {
|
|
381
|
+
console.error(chalk.dim(`mod8 verify: no .yaml specs found in ${specsDir}`));
|
|
382
|
+
return { pass: 0, fail: 0, skipped: 0, durationMs: 0, specs: [] };
|
|
383
|
+
}
|
|
384
|
+
const hasKey = !!process.env.MOD8_VERIFY_KEY;
|
|
385
|
+
console.log();
|
|
386
|
+
console.log(chalk.bold('mod8 verify') +
|
|
387
|
+
chalk.dim(` · ${files.length} spec file${files.length === 1 ? '' : 's'}`) +
|
|
388
|
+
(hasKey ? chalk.dim(' · MOD8_VERIFY_KEY set') : chalk.dim(' · no MOD8_VERIFY_KEY (api tests will skip)')));
|
|
389
|
+
const specResults = [];
|
|
390
|
+
let totalPass = 0, totalFail = 0, totalSkipped = 0;
|
|
391
|
+
for (const relPath of files) {
|
|
392
|
+
const path = join(specsDir, relPath);
|
|
393
|
+
const data = await fs.readFile(path, 'utf8');
|
|
394
|
+
let spec;
|
|
395
|
+
try {
|
|
396
|
+
spec = yaml.load(data);
|
|
397
|
+
}
|
|
398
|
+
catch (err) {
|
|
399
|
+
console.log();
|
|
400
|
+
console.log(chalk.bold(relPath) + chalk.red(' (parse error)'));
|
|
401
|
+
console.log(' ' + chalk.red(err.message));
|
|
402
|
+
totalFail++;
|
|
403
|
+
continue;
|
|
404
|
+
}
|
|
405
|
+
console.log();
|
|
406
|
+
console.log(chalk.bold(relPath) + chalk.dim(` — ${spec.name}`));
|
|
407
|
+
const results = [];
|
|
408
|
+
for (const test of spec.tests ?? []) {
|
|
409
|
+
const result = await runTest(test, spec);
|
|
410
|
+
results.push(result);
|
|
411
|
+
printResult(result);
|
|
412
|
+
if (result.status === 'pass')
|
|
413
|
+
totalPass++;
|
|
414
|
+
else if (result.status === 'fail')
|
|
415
|
+
totalFail++;
|
|
416
|
+
else
|
|
417
|
+
totalSkipped++;
|
|
418
|
+
}
|
|
419
|
+
specResults.push({ file: relPath, name: spec.name, results });
|
|
420
|
+
}
|
|
421
|
+
const durationMs = Date.now() - start;
|
|
422
|
+
console.log();
|
|
423
|
+
console.log(chalk.dim('─'.repeat(60)));
|
|
424
|
+
const total = totalPass + totalFail + totalSkipped;
|
|
425
|
+
console.log(`${chalk.green(totalPass + ' pass')} · ` +
|
|
426
|
+
`${totalFail > 0 ? chalk.red(totalFail + ' fail') : chalk.dim(totalFail + ' fail')} · ` +
|
|
427
|
+
`${chalk.dim(totalSkipped + ' skipped')} · ` +
|
|
428
|
+
`${chalk.dim(total + ' total · ' + (durationMs / 1000).toFixed(2) + 's')}`);
|
|
429
|
+
console.log();
|
|
430
|
+
return {
|
|
431
|
+
pass: totalPass,
|
|
432
|
+
fail: totalFail,
|
|
433
|
+
skipped: totalSkipped,
|
|
434
|
+
durationMs,
|
|
435
|
+
specs: specResults,
|
|
436
|
+
};
|
|
437
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "mod8-cli",
|
|
3
|
+
"version": "0.2.0",
|
|
4
|
+
"description": "Talk to any LLM from your terminal — Claude, GPT, Gemini, DeepSeek, Mistral, Groq, anything OpenAI-compatible. BYOK or `mod8 login` to use the hosted proxy.",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"bin": {
|
|
7
|
+
"mod8": "./bin/mod8.js"
|
|
8
|
+
},
|
|
9
|
+
"files": [
|
|
10
|
+
"bin",
|
|
11
|
+
"dist",
|
|
12
|
+
"specs",
|
|
13
|
+
"README.md",
|
|
14
|
+
"LICENSE",
|
|
15
|
+
"CHANGELOG.md"
|
|
16
|
+
],
|
|
17
|
+
"scripts": {
|
|
18
|
+
"build": "tsc",
|
|
19
|
+
"dev": "tsx src/cli.ts",
|
|
20
|
+
"verify": "node ./bin/mod8.js verify",
|
|
21
|
+
"prepublishOnly": "npm run build"
|
|
22
|
+
},
|
|
23
|
+
"engines": {
|
|
24
|
+
"node": ">=20"
|
|
25
|
+
},
|
|
26
|
+
"keywords": [
|
|
27
|
+
"cli",
|
|
28
|
+
"llm",
|
|
29
|
+
"ai",
|
|
30
|
+
"chat",
|
|
31
|
+
"byok",
|
|
32
|
+
"claude",
|
|
33
|
+
"anthropic",
|
|
34
|
+
"openai",
|
|
35
|
+
"gpt",
|
|
36
|
+
"gemini",
|
|
37
|
+
"google",
|
|
38
|
+
"deepseek",
|
|
39
|
+
"mistral",
|
|
40
|
+
"groq",
|
|
41
|
+
"openrouter",
|
|
42
|
+
"xai",
|
|
43
|
+
"grok",
|
|
44
|
+
"together",
|
|
45
|
+
"compare",
|
|
46
|
+
"openai-compatible"
|
|
47
|
+
],
|
|
48
|
+
"license": "MIT",
|
|
49
|
+
"author": "Yonatan Zlit",
|
|
50
|
+
"dependencies": {
|
|
51
|
+
"@anthropic-ai/sdk": "^0.94.0",
|
|
52
|
+
"@google/generative-ai": "^0.24.1",
|
|
53
|
+
"chalk": "^5.6.2",
|
|
54
|
+
"commander": "^14.0.3",
|
|
55
|
+
"ink": "^7.0.2",
|
|
56
|
+
"ink-text-input": "^6.0.0",
|
|
57
|
+
"js-yaml": "^4.1.1",
|
|
58
|
+
"openai": "^6.36.0",
|
|
59
|
+
"react": "^19.2.5"
|
|
60
|
+
},
|
|
61
|
+
"devDependencies": {
|
|
62
|
+
"@types/js-yaml": "^4.0.9",
|
|
63
|
+
"@types/node": "^25.6.0",
|
|
64
|
+
"@types/react": "^19.2.14",
|
|
65
|
+
"ink-testing-library": "^4.0.0",
|
|
66
|
+
"tsx": "^4.21.0",
|
|
67
|
+
"typescript": "^6.0.3"
|
|
68
|
+
}
|
|
69
|
+
}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
name: --all parallel mode
|
|
2
|
+
description: --all renders three speaker blocks plus Total summary
|
|
3
|
+
|
|
4
|
+
tests:
|
|
5
|
+
- name: all three providers respond, total line shows 3/3
|
|
6
|
+
shell: "MOD8_MOCK=1 mod8 --all 'compare in 5 words'"
|
|
7
|
+
expect:
|
|
8
|
+
stdout_contains:
|
|
9
|
+
- "Anthropic (Claude)"
|
|
10
|
+
- "OpenAI (GPT)"
|
|
11
|
+
- "Google (Gemini)"
|
|
12
|
+
- "Total:"
|
|
13
|
+
- "3/3 ok"
|
|
14
|
+
exit_code: 0
|
|
15
|
+
|
|
16
|
+
- name: partial failure renders friendly per-block error and partial total
|
|
17
|
+
shell: "MOD8_MOCK=1 MOD8_MOCK_FAIL=openai mod8 --all 'test'"
|
|
18
|
+
expect:
|
|
19
|
+
stdout_contains:
|
|
20
|
+
- "Anthropic (Claude)"
|
|
21
|
+
- "✗ failed"
|
|
22
|
+
- "Google (Gemini)"
|
|
23
|
+
- "2/3 ok"
|
|
24
|
+
|
|
25
|
+
- name: 401 error in --all is classified per-block
|
|
26
|
+
shell: "MOD8_MOCK=1 MOD8_MOCK_ERROR=401 MOD8_MOCK_ERROR_PROVIDER=anthropic mod8 --all 'test'"
|
|
27
|
+
expect:
|
|
28
|
+
stdout_contains:
|
|
29
|
+
- "✗ failed"
|
|
30
|
+
- "invalid API key"
|
|
31
|
+
- "2/3 ok"
|
|
32
|
+
|
|
33
|
+
- name: cost displayed in Total
|
|
34
|
+
shell: "MOD8_MOCK=1 mod8 --all 'hi'"
|
|
35
|
+
expect:
|
|
36
|
+
stdout_matches: "Total:.*tok.*\\$"
|
|
37
|
+
|
|
38
|
+
- name: consent gate refuses piped stdin without auto-confirm
|
|
39
|
+
shell: "echo 'n' | MOD8_AUTO_CONFIRM= MOD8_MOCK=1 mod8 --all 'test'"
|
|
40
|
+
expect:
|
|
41
|
+
stderr_contains:
|
|
42
|
+
- "first-run confirmation"
|
|
43
|
+
- "MOD8_AUTO_CONFIRM=1"
|
|
44
|
+
exit_code: 1
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
name: behavior — auto-fallback after consecutive work errors
|
|
2
|
+
description: |
|
|
3
|
+
When the work-mode provider keeps failing, mod8 should auto-fallback to
|
|
4
|
+
host so the user is never trapped. The DECISION (warn vs fallback) is a
|
|
5
|
+
pure function in src/commands/intentRouting.ts: fallbackDecision(count).
|
|
6
|
+
These tests pin the threshold and the warn/fallback transition.
|
|
7
|
+
|
|
8
|
+
The runtime side of the fix (chat.tsx incrementing the counter on each
|
|
9
|
+
error, calling fallbackDecision, and switching modes) is covered by code
|
|
10
|
+
review — driving 3 consecutive turns through Ink would require a real PTY,
|
|
11
|
+
which the verify sandbox doesn't have. The pure decision function is the
|
|
12
|
+
load-bearing piece and IS verifiable here.
|
|
13
|
+
|
|
14
|
+
tests:
|
|
15
|
+
- name: 0 errors → ok (no banner, nothing to do)
|
|
16
|
+
shell: 'mod8 dev:check-fallback 0'
|
|
17
|
+
expect:
|
|
18
|
+
stdout_contains: "decision=ok"
|
|
19
|
+
|
|
20
|
+
- name: 1 error → warn (suggest switch-back, don't force)
|
|
21
|
+
shell: 'mod8 dev:check-fallback 1'
|
|
22
|
+
expect:
|
|
23
|
+
stdout_contains: "decision=warn"
|
|
24
|
+
|
|
25
|
+
- name: 2 errors → still warn (one shy of threshold)
|
|
26
|
+
shell: 'mod8 dev:check-fallback 2'
|
|
27
|
+
expect:
|
|
28
|
+
stdout_contains: "decision=warn"
|
|
29
|
+
|
|
30
|
+
- name: 3 errors → fallback (auto-switch to host)
|
|
31
|
+
shell: 'mod8 dev:check-fallback 3'
|
|
32
|
+
expect:
|
|
33
|
+
stdout_contains: "decision=fallback"
|
|
34
|
+
|
|
35
|
+
- name: 4+ errors → still fallback (decision is monotonic past threshold)
|
|
36
|
+
shell: 'mod8 dev:check-fallback 7'
|
|
37
|
+
expect:
|
|
38
|
+
stdout_contains: "decision=fallback"
|
|
39
|
+
|
|
40
|
+
- name: threshold is published as 3 (don't lower it without thinking)
|
|
41
|
+
shell: 'mod8 dev:check-fallback 0'
|
|
42
|
+
expect:
|
|
43
|
+
stdout_contains: "threshold=3"
|
|
44
|
+
|
|
45
|
+
- name: negative count is rejected
|
|
46
|
+
shell: 'mod8 dev:check-fallback -1'
|
|
47
|
+
expect:
|
|
48
|
+
stderr_contains: "non-negative"
|
|
49
|
+
exit_code: 1
|