aiden-runtime 4.6.0 → 4.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +488 -265
- package/dist/cli/v4/aidenCLI.js +26 -1
- package/dist/cli/v4/chatSession.js +13 -0
- package/dist/cli/v4/commands/help.js +2 -0
- package/dist/cli/v4/commands/index.js +6 -1
- package/dist/cli/v4/commands/walkthrough.js +140 -0
- package/dist/cli/v4/daemonAgentBuilder.js +12 -4
- package/dist/cli/v4/onboarding/disclaimer.js +162 -0
- package/dist/cli/v4/onboarding/loading.js +208 -0
- package/dist/cli/v4/onboarding/providerPicker.js +126 -0
- package/dist/cli/v4/onboarding/successScreen.js +68 -0
- package/dist/cli/v4/repl/firstRunHint.js +107 -0
- package/dist/cli/v4/setupWizard.js +201 -31
- package/dist/core/v4/aidenAgent.js +19 -14
- package/dist/core/v4/providers/modelFetch.js +179 -0
- package/dist/core/v4/providers/probe.js +275 -0
- package/dist/core/v4/sandboxFs.js +1 -1
- package/dist/core/v4/subagent/childBuilder.js +12 -4
- package/dist/core/v4/ui/banner.js +133 -0
- package/dist/core/v4/ui/theme.js +164 -0
- package/dist/core/version.js +1 -1
- package/dist/moat/honestyEnforcement.js +143 -241
- package/dist/tools/v4/ui/_uiSmokeTool.js +60 -0
- package/package.json +10 -4
|
@@ -372,25 +372,21 @@ class AidenAgent {
|
|
|
372
372
|
// 8. Run the tool-calling loop.
|
|
373
373
|
const loopResult = await this.runTurnLoop(messages, narrowedTools, trackers, options);
|
|
374
374
|
// 9. Honesty post-loop scan (only if loop ended with a normal stop).
|
|
375
|
+
//
|
|
376
|
+
// v4.7.0 Phase 2.3 — the verifier now records deterministic
|
|
377
|
+
// outcome events from `toolCallTrace` (not regex over the
|
|
378
|
+
// assistant's text). When `findings.length > 0` AND mode is
|
|
379
|
+
// `enforce`, it returns an append-only `footer` we concatenate
|
|
380
|
+
// to `finalContent`. The model's text is NEVER rewritten —
|
|
381
|
+
// that was the v4.6.x failure mode this verifier replaces.
|
|
375
382
|
let honestyFindings;
|
|
376
383
|
let finalContent = loopResult.finalContent;
|
|
377
384
|
if (this.honestyEnforcement && loopResult.finishReason === 'stop') {
|
|
378
385
|
try {
|
|
379
386
|
const scan = await this.honestyEnforcement.check(finalContent, loopResult.messages, loopResult.toolCallTrace);
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
finalContent = scan.correctedResponse;
|
|
384
|
-
// Reflect the corrected text in the message history too so
|
|
385
|
-
// /debug-prompt and /usage agree on the final string.
|
|
386
|
-
for (let i = loopResult.messages.length - 1; i >= 0; i--) {
|
|
387
|
-
const m = loopResult.messages[i];
|
|
388
|
-
if (m.role === 'assistant' && (!m.toolCalls || m.toolCalls.length === 0)) {
|
|
389
|
-
loopResult.messages[i].content = finalContent;
|
|
390
|
-
break;
|
|
391
|
-
}
|
|
392
|
-
}
|
|
393
|
-
}
|
|
387
|
+
honestyFindings = scan.findings;
|
|
388
|
+
if (scan.footer) {
|
|
389
|
+
finalContent = `${finalContent}\n\n${scan.footer}`;
|
|
394
390
|
}
|
|
395
391
|
}
|
|
396
392
|
catch {
|
|
@@ -970,6 +966,15 @@ class AidenAgent {
|
|
|
970
966
|
result: result.result,
|
|
971
967
|
error: result.error,
|
|
972
968
|
verified: this.resolveVerifiedFlag?.(result),
|
|
969
|
+
// v4.7.0 Phase 2.3 — stamp the handler's `mutates` flag
|
|
970
|
+
// at dispatch time so the post-loop honesty verifier can
|
|
971
|
+
// distinguish mutating vs read-only failures without
|
|
972
|
+
// needing a registry handle. Defaults to `false` for
|
|
973
|
+
// unknown tools (the resolver returns undefined) — read-
|
|
974
|
+
// only tools that error are surfaced via the tool-trail
|
|
975
|
+
// row already; the verifier deliberately stays quiet
|
|
976
|
+
// about them.
|
|
977
|
+
handlerMutates: this.resolveMutates?.(call.name) ?? false,
|
|
973
978
|
// v4.2 Phase 1 — verification surfaces alongside the trace
|
|
974
979
|
// entry for downstream callers (chatSession, loopTrace,
|
|
975
980
|
// future RecoveryReport). Undefined when TCE is off.
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Copyright (c) 2026 Shiva Deore (Taracod).
|
|
4
|
+
* Licensed under AGPL-3.0. See LICENSE for details.
|
|
5
|
+
*
|
|
6
|
+
* Aiden — local-first agent.
|
|
7
|
+
*/
|
|
8
|
+
/**
|
|
9
|
+
* core/v4/providers/modelFetch.ts — ONB1 slice 6.
|
|
10
|
+
*
|
|
11
|
+
* Live `/models` enumeration used by the onboarding model picker.
|
|
12
|
+
* Six providers have first-class live-fetch implementations:
|
|
13
|
+
* - anthropic GET https://api.anthropic.com/v1/models
|
|
14
|
+
* - openai GET https://api.openai.com/v1/models
|
|
15
|
+
* - groq GET https://api.groq.com/openai/v1/models
|
|
16
|
+
* - openrouter GET https://openrouter.ai/api/v1/models
|
|
17
|
+
* - gemini GET https://generativelanguage.googleapis.com/v1beta/models
|
|
18
|
+
* - ollama GET http://localhost:11434/api/tags
|
|
19
|
+
*
|
|
20
|
+
* Every other provider falls through to the curated MODEL_CATALOG
|
|
21
|
+
* static list (providers/v4/modelCatalog.ts).
|
|
22
|
+
*
|
|
23
|
+
* Behaviour contract:
|
|
24
|
+
* - 5-second hard timeout per request (configurable).
|
|
25
|
+
* - On any failure (network, non-2xx, malformed body) we return the
|
|
26
|
+
* static fallback with `{ source: 'fallback', reason }` so the
|
|
27
|
+
* picker can show the muted "Couldn't reach API" hint.
|
|
28
|
+
* - Results are sorted with "recommended" / default models first,
|
|
29
|
+
* then by display name.
|
|
30
|
+
* - No client-side cost-tier annotation — the curated catalog owns
|
|
31
|
+
* pricing where it's known; the picker shows "$" tiers from the
|
|
32
|
+
* fallback only.
|
|
33
|
+
*/
|
|
34
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
35
|
+
exports.fetchModels = fetchModels;
|
|
36
|
+
const modelCatalog_1 = require("../../../providers/v4/modelCatalog");
|
|
37
|
+
const DEFAULT_TIMEOUT_MS = 5000;
|
|
38
|
+
function tierFromPricing(p) {
|
|
39
|
+
if (!p)
|
|
40
|
+
return undefined;
|
|
41
|
+
const avg = (p.inputPerM + p.outputPerM) / 2;
|
|
42
|
+
if (avg <= 0)
|
|
43
|
+
return 'free';
|
|
44
|
+
if (avg < 2)
|
|
45
|
+
return '$';
|
|
46
|
+
if (avg < 10)
|
|
47
|
+
return '$$';
|
|
48
|
+
return '$$$';
|
|
49
|
+
}
|
|
50
|
+
function fallbackFor(providerId, reason) {
|
|
51
|
+
const entries = modelCatalog_1.MODEL_CATALOG.filter((m) => m.providerId === providerId);
|
|
52
|
+
const models = entries
|
|
53
|
+
.sort((a, b) => Number(b.isDefault) - Number(a.isDefault) || a.displayName.localeCompare(b.displayName))
|
|
54
|
+
.map((m) => ({
|
|
55
|
+
id: m.id,
|
|
56
|
+
displayName: m.displayName,
|
|
57
|
+
contextLength: m.contextLength,
|
|
58
|
+
recommended: m.isDefault,
|
|
59
|
+
tier: tierFromPricing(m.pricing),
|
|
60
|
+
}));
|
|
61
|
+
return { models, source: 'fallback', reason };
|
|
62
|
+
}
|
|
63
|
+
function withTimeout(p, ms) {
|
|
64
|
+
return new Promise((resolve, reject) => {
|
|
65
|
+
const t = setTimeout(() => reject(new Error(`Timed out after ${ms}ms`)), ms);
|
|
66
|
+
p.then((v) => { clearTimeout(t); resolve(v); }, (e) => { clearTimeout(t); reject(e); });
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
function normalise(providerId, raws) {
|
|
70
|
+
// Cross-reference the static catalog for recommended flags + display names
|
|
71
|
+
// (live responses rarely include the friendly name).
|
|
72
|
+
const cat = new Map(modelCatalog_1.MODEL_CATALOG.filter((m) => m.providerId === providerId).map((m) => [m.id, m]));
|
|
73
|
+
return raws
|
|
74
|
+
.filter((m) => m && typeof m.id === 'string' && m.id.length > 0)
|
|
75
|
+
.map((m) => {
|
|
76
|
+
const c = cat.get(m.id);
|
|
77
|
+
return {
|
|
78
|
+
id: m.id,
|
|
79
|
+
displayName: c?.displayName ?? m.display_name ?? m.name ?? m.id,
|
|
80
|
+
contextLength: c?.contextLength ?? m.context_length,
|
|
81
|
+
recommended: c?.isDefault,
|
|
82
|
+
tier: tierFromPricing(c?.pricing),
|
|
83
|
+
};
|
|
84
|
+
})
|
|
85
|
+
.sort((a, b) => Number(b.recommended) - Number(a.recommended) || a.displayName.localeCompare(b.displayName));
|
|
86
|
+
}
|
|
87
|
+
async function fetchAnthropic(o) {
|
|
88
|
+
const res = await withTimeout(o.fetchImpl('https://api.anthropic.com/v1/models', {
|
|
89
|
+
headers: { 'x-api-key': o.apiKey, 'anthropic-version': '2023-06-01' },
|
|
90
|
+
}), o.timeoutMs);
|
|
91
|
+
if (!res.ok)
|
|
92
|
+
throw new Error(`HTTP ${res.status}`);
|
|
93
|
+
const body = await res.json();
|
|
94
|
+
return body.data ?? [];
|
|
95
|
+
}
|
|
96
|
+
async function fetchOpenAICompat(url, o) {
|
|
97
|
+
const res = await withTimeout(o.fetchImpl(url, {
|
|
98
|
+
headers: { Authorization: `Bearer ${o.apiKey}` },
|
|
99
|
+
}), o.timeoutMs);
|
|
100
|
+
if (!res.ok)
|
|
101
|
+
throw new Error(`HTTP ${res.status}`);
|
|
102
|
+
const body = await res.json();
|
|
103
|
+
return body.data ?? [];
|
|
104
|
+
}
|
|
105
|
+
async function fetchGemini(o) {
|
|
106
|
+
const url = `https://generativelanguage.googleapis.com/v1beta/models?key=${encodeURIComponent(o.apiKey)}`;
|
|
107
|
+
const res = await withTimeout(o.fetchImpl(url), o.timeoutMs);
|
|
108
|
+
if (!res.ok)
|
|
109
|
+
throw new Error(`HTTP ${res.status}`);
|
|
110
|
+
const body = await res.json();
|
|
111
|
+
// Gemini ids come back as "models/gemini-2.0-flash" — strip the prefix.
|
|
112
|
+
return (body.models ?? []).map((m) => ({
|
|
113
|
+
id: m.name.replace(/^models\//, ''),
|
|
114
|
+
display_name: m.displayName,
|
|
115
|
+
context_length: m.inputTokenLimit,
|
|
116
|
+
}));
|
|
117
|
+
}
|
|
118
|
+
async function fetchOllama(baseUrl, o) {
|
|
119
|
+
const res = await withTimeout(o.fetchImpl(`${baseUrl.replace(/\/+$/, '')}/api/tags`), o.timeoutMs);
|
|
120
|
+
if (!res.ok)
|
|
121
|
+
throw new Error(`HTTP ${res.status}`);
|
|
122
|
+
const body = await res.json();
|
|
123
|
+
return (body.models ?? []).map((m) => ({ id: m.name, display_name: m.name }));
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Fetch available models for `providerId`, falling back to the
|
|
127
|
+
* curated catalog when the live endpoint is unreachable, the key is
|
|
128
|
+
* missing, or the response is malformed.
|
|
129
|
+
*/
|
|
130
|
+
async function fetchModels(opts) {
|
|
131
|
+
const timeoutMs = opts.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
132
|
+
const fetchImpl = opts.fetchImpl ?? fetch;
|
|
133
|
+
const apiKey = opts.apiKey ?? '';
|
|
134
|
+
try {
|
|
135
|
+
let raws;
|
|
136
|
+
switch (opts.providerId) {
|
|
137
|
+
case 'anthropic':
|
|
138
|
+
if (!apiKey)
|
|
139
|
+
return fallbackFor('anthropic', 'no API key');
|
|
140
|
+
raws = await fetchAnthropic({ apiKey, timeoutMs, fetchImpl });
|
|
141
|
+
break;
|
|
142
|
+
case 'openai':
|
|
143
|
+
if (!apiKey)
|
|
144
|
+
return fallbackFor('openai', 'no API key');
|
|
145
|
+
raws = await fetchOpenAICompat('https://api.openai.com/v1/models', { apiKey, timeoutMs, fetchImpl });
|
|
146
|
+
break;
|
|
147
|
+
case 'groq':
|
|
148
|
+
if (!apiKey)
|
|
149
|
+
return fallbackFor('groq', 'no API key');
|
|
150
|
+
raws = await fetchOpenAICompat('https://api.groq.com/openai/v1/models', { apiKey, timeoutMs, fetchImpl });
|
|
151
|
+
break;
|
|
152
|
+
case 'openrouter':
|
|
153
|
+
// OpenRouter exposes /models without auth, but auth gives the user's
|
|
154
|
+
// available subset — we use the public list to populate the picker.
|
|
155
|
+
raws = await fetchOpenAICompat('https://openrouter.ai/api/v1/models', { apiKey: apiKey || 'anon', timeoutMs, fetchImpl });
|
|
156
|
+
break;
|
|
157
|
+
case 'gemini':
|
|
158
|
+
if (!apiKey)
|
|
159
|
+
return fallbackFor('gemini', 'no API key');
|
|
160
|
+
raws = await fetchGemini({ apiKey, timeoutMs, fetchImpl });
|
|
161
|
+
break;
|
|
162
|
+
case 'ollama':
|
|
163
|
+
raws = await fetchOllama(opts.baseUrl ?? 'http://localhost:11434', { timeoutMs, fetchImpl });
|
|
164
|
+
break;
|
|
165
|
+
default:
|
|
166
|
+
// Every other provider — together, nvidia, deepseek, mistral, custom,
|
|
167
|
+
// claude-pro, chatgpt-plus, etc. — uses the curated catalog.
|
|
168
|
+
return fallbackFor(opts.providerId);
|
|
169
|
+
}
|
|
170
|
+
const models = normalise(opts.providerId, raws);
|
|
171
|
+
if (models.length === 0)
|
|
172
|
+
return fallbackFor(opts.providerId, 'empty live response');
|
|
173
|
+
return { models, source: 'live' };
|
|
174
|
+
}
|
|
175
|
+
catch (err) {
|
|
176
|
+
const reason = err instanceof Error ? err.message : String(err);
|
|
177
|
+
return fallbackFor(opts.providerId, reason);
|
|
178
|
+
}
|
|
179
|
+
}
|
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Copyright (c) 2026 Shiva Deore (Taracod).
|
|
4
|
+
* Licensed under AGPL-3.0. See LICENSE for details.
|
|
5
|
+
*
|
|
6
|
+
* Aiden — local-first agent.
|
|
7
|
+
*/
|
|
8
|
+
/**
|
|
9
|
+
* core/v4/providers/probe.ts — ONB1 slice 7.
|
|
10
|
+
*
|
|
11
|
+
* Three-step connection validator run after the user enters an API
|
|
12
|
+
* key during the onboarding flow. Replaces the wizard's single
|
|
13
|
+
* `validateProviderKey` round-trip with discrete probes so the user
|
|
14
|
+
* sees exactly which capability fails:
|
|
15
|
+
*
|
|
16
|
+
* Step 1 Sending test request → key + auth header accepted
|
|
17
|
+
* Step 2 Verifying model access → chosen model is reachable
|
|
18
|
+
* Step 3 Checking tool calls → tool_use is supported
|
|
19
|
+
*
|
|
20
|
+
* Each step returns a `ProbeStepResult` independently; the runner
|
|
21
|
+
* stops on the first failure. The error envelope is categorised so
|
|
22
|
+
* the UX can branch: auth → "key was rejected"; rate-limit → "wait
|
|
23
|
+
* or try another provider"; model-not-found → "model not on this
|
|
24
|
+
* key's allow-list"; network → "couldn't reach API".
|
|
25
|
+
*
|
|
26
|
+
* No client-side cost: each probe uses the cheapest call available
|
|
27
|
+
* (max_tokens=1, GET /models, or a no-op tool-definition send).
|
|
28
|
+
*/
|
|
29
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
30
|
+
exports.runProbe = runProbe;
|
|
31
|
+
const DEFAULT_TIMEOUT_MS = 8000;
|
|
32
|
+
function withTimeout(p, ms) {
|
|
33
|
+
return new Promise((resolve, reject) => {
|
|
34
|
+
const t = setTimeout(() => reject(Object.assign(new Error(`timed out after ${ms}ms`), { code: 'TIMEOUT' })), ms);
|
|
35
|
+
p.then((v) => { clearTimeout(t); resolve(v); }, (e) => { clearTimeout(t); reject(e); });
|
|
36
|
+
});
|
|
37
|
+
}
|
|
38
|
+
function classifyStatus(status, retryAfter) {
|
|
39
|
+
if (status === 401 || status === 403)
|
|
40
|
+
return { category: 'auth', reason: 'API key rejected' };
|
|
41
|
+
if (status === 404)
|
|
42
|
+
return { category: 'model-not-found', reason: 'Model not on this key\'s allow-list' };
|
|
43
|
+
if (status === 429) {
|
|
44
|
+
const sec = retryAfter ? parseInt(retryAfter, 10) : undefined;
|
|
45
|
+
return { category: 'rate-limit', reason: 'Rate-limited by provider', retryAfterSec: Number.isFinite(sec ?? NaN) ? sec : undefined };
|
|
46
|
+
}
|
|
47
|
+
if (status >= 500)
|
|
48
|
+
return { category: 'network', reason: `Upstream error (HTTP ${status})` };
|
|
49
|
+
return { category: 'unknown', reason: `HTTP ${status}` };
|
|
50
|
+
}
|
|
51
|
+
function classifyError(err) {
|
|
52
|
+
if (err && typeof err === 'object') {
|
|
53
|
+
const e = err;
|
|
54
|
+
if (e.code === 'TIMEOUT' || e.name === 'AbortError')
|
|
55
|
+
return { category: 'network', reason: 'Request timed out' };
|
|
56
|
+
const msg = e.message ?? String(err);
|
|
57
|
+
return { category: 'network', reason: msg.length > 160 ? msg.slice(0, 157) + '...' : msg };
|
|
58
|
+
}
|
|
59
|
+
return { category: 'unknown', reason: String(err) };
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Step 1 — key works. Cheapest GET we can issue per provider; for
|
|
63
|
+
* Anthropic we POST a 1-token /v1/messages because they don't expose
|
|
64
|
+
* a no-auth /models for keys without billing.
|
|
65
|
+
*/
|
|
66
|
+
function buildAuthRequest(o) {
|
|
67
|
+
const apiKey = o.apiKey;
|
|
68
|
+
switch (o.providerId) {
|
|
69
|
+
case 'anthropic':
|
|
70
|
+
return {
|
|
71
|
+
url: 'https://api.anthropic.com/v1/models',
|
|
72
|
+
method: 'GET',
|
|
73
|
+
headers: { 'x-api-key': apiKey, 'anthropic-version': '2023-06-01' },
|
|
74
|
+
};
|
|
75
|
+
case 'openai':
|
|
76
|
+
return { url: 'https://api.openai.com/v1/models', method: 'GET', headers: { Authorization: `Bearer ${apiKey}` } };
|
|
77
|
+
case 'groq':
|
|
78
|
+
return { url: 'https://api.groq.com/openai/v1/models', method: 'GET', headers: { Authorization: `Bearer ${apiKey}` } };
|
|
79
|
+
case 'openrouter':
|
|
80
|
+
return { url: 'https://openrouter.ai/api/v1/auth/key', method: 'GET', headers: { Authorization: `Bearer ${apiKey}` } };
|
|
81
|
+
case 'gemini':
|
|
82
|
+
return { url: `https://generativelanguage.googleapis.com/v1beta/models?key=${encodeURIComponent(apiKey)}`, method: 'GET', headers: {} };
|
|
83
|
+
case 'together':
|
|
84
|
+
return { url: 'https://api.together.xyz/v1/models', method: 'GET', headers: { Authorization: `Bearer ${apiKey}` } };
|
|
85
|
+
case 'nvidia':
|
|
86
|
+
return { url: 'https://integrate.api.nvidia.com/v1/models', method: 'GET', headers: { Authorization: `Bearer ${apiKey}` } };
|
|
87
|
+
case 'ollama': {
|
|
88
|
+
const root = (o.baseUrl ?? 'http://localhost:11434').replace(/\/+$/, '');
|
|
89
|
+
return { url: `${root}/api/tags`, method: 'GET', headers: {} };
|
|
90
|
+
}
|
|
91
|
+
case 'custom': {
|
|
92
|
+
const root = (o.baseUrl ?? '').replace(/\/+$/, '');
|
|
93
|
+
if (!root)
|
|
94
|
+
return null;
|
|
95
|
+
return { url: `${root}/models`, method: 'GET', headers: { Authorization: `Bearer ${apiKey}` } };
|
|
96
|
+
}
|
|
97
|
+
default:
|
|
98
|
+
return null;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* Step 2 — model access. Re-uses the models list from step 1 when
|
|
103
|
+
* possible (single GET), but issues a 1-token completion when the
|
|
104
|
+
* provider's /models is incomplete (Anthropic returns paginated;
|
|
105
|
+
* Ollama returns local tags only). The runner caches the step-1
|
|
106
|
+
* body so step 2 doesn't double-fetch.
|
|
107
|
+
*/
|
|
108
|
+
function buildModelCheckRequest(o) {
|
|
109
|
+
switch (o.providerId) {
|
|
110
|
+
case 'anthropic':
|
|
111
|
+
return {
|
|
112
|
+
url: 'https://api.anthropic.com/v1/messages',
|
|
113
|
+
method: 'POST',
|
|
114
|
+
headers: {
|
|
115
|
+
'x-api-key': o.apiKey,
|
|
116
|
+
'anthropic-version': '2023-06-01',
|
|
117
|
+
'content-type': 'application/json',
|
|
118
|
+
},
|
|
119
|
+
body: JSON.stringify({ model: o.modelId, max_tokens: 1, messages: [{ role: 'user', content: 'ping' }] }),
|
|
120
|
+
};
|
|
121
|
+
default:
|
|
122
|
+
// For OpenAI-compatible providers we trust the /models list parsed
|
|
123
|
+
// in step 1. The runner short-circuits and just checks membership.
|
|
124
|
+
return null;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
function buildToolCheckRequest(o) {
|
|
128
|
+
switch (o.providerId) {
|
|
129
|
+
case 'anthropic':
|
|
130
|
+
return {
|
|
131
|
+
url: 'https://api.anthropic.com/v1/messages',
|
|
132
|
+
method: 'POST',
|
|
133
|
+
headers: { 'x-api-key': o.apiKey, 'anthropic-version': '2023-06-01', 'content-type': 'application/json' },
|
|
134
|
+
body: JSON.stringify({
|
|
135
|
+
model: o.modelId,
|
|
136
|
+
max_tokens: 1,
|
|
137
|
+
tools: [{ name: 'noop', description: 'noop', input_schema: { type: 'object', properties: {} } }],
|
|
138
|
+
messages: [{ role: 'user', content: 'noop' }],
|
|
139
|
+
}),
|
|
140
|
+
};
|
|
141
|
+
case 'openai':
|
|
142
|
+
case 'groq':
|
|
143
|
+
case 'openrouter':
|
|
144
|
+
case 'together':
|
|
145
|
+
case 'nvidia':
|
|
146
|
+
return {
|
|
147
|
+
url: o.providerId === 'openai'
|
|
148
|
+
? 'https://api.openai.com/v1/chat/completions'
|
|
149
|
+
: o.providerId === 'groq'
|
|
150
|
+
? 'https://api.groq.com/openai/v1/chat/completions'
|
|
151
|
+
: o.providerId === 'openrouter'
|
|
152
|
+
? 'https://openrouter.ai/api/v1/chat/completions'
|
|
153
|
+
: o.providerId === 'together'
|
|
154
|
+
? 'https://api.together.xyz/v1/chat/completions'
|
|
155
|
+
: 'https://integrate.api.nvidia.com/v1/chat/completions',
|
|
156
|
+
method: 'POST',
|
|
157
|
+
headers: { Authorization: `Bearer ${o.apiKey}`, 'content-type': 'application/json' },
|
|
158
|
+
body: JSON.stringify({
|
|
159
|
+
model: o.modelId,
|
|
160
|
+
max_tokens: 1,
|
|
161
|
+
messages: [{ role: 'user', content: 'noop' }],
|
|
162
|
+
tools: [{ type: 'function', function: { name: 'noop', parameters: { type: 'object', properties: {} } } }],
|
|
163
|
+
}),
|
|
164
|
+
};
|
|
165
|
+
default:
|
|
166
|
+
// Local (Ollama) and providers without tool_use support — skip.
|
|
167
|
+
return null;
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
async function runRequest(req, o) {
|
|
171
|
+
const fetchImpl = o.fetchImpl ?? fetch;
|
|
172
|
+
const timeoutMs = o.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
173
|
+
const res = await withTimeout(fetchImpl(req.url, {
|
|
174
|
+
method: req.method,
|
|
175
|
+
headers: req.headers,
|
|
176
|
+
body: req.body,
|
|
177
|
+
}), timeoutMs);
|
|
178
|
+
const retryAfter = res.headers.get('retry-after');
|
|
179
|
+
const bodyText = await res.text().catch(() => '');
|
|
180
|
+
return { status: res.status, bodyText, retryAfter };
|
|
181
|
+
}
|
|
182
|
+
/**
|
|
183
|
+
* Run the 3-step probe. Stops on first failure and returns the
|
|
184
|
+
* partial trace so the UX can render which step turned red.
|
|
185
|
+
*/
|
|
186
|
+
async function runProbe(o) {
|
|
187
|
+
const steps = [];
|
|
188
|
+
// Step 1 — auth
|
|
189
|
+
const authReq = buildAuthRequest(o);
|
|
190
|
+
let modelsBody = '';
|
|
191
|
+
if (!authReq) {
|
|
192
|
+
steps.push({ step: 'auth', ok: false, category: 'unknown', reason: 'No probe endpoint for this provider' });
|
|
193
|
+
return { ok: false, steps };
|
|
194
|
+
}
|
|
195
|
+
try {
|
|
196
|
+
const r = await runRequest(authReq, o);
|
|
197
|
+
if (r.status >= 200 && r.status < 300) {
|
|
198
|
+
steps.push({ step: 'auth', ok: true });
|
|
199
|
+
modelsBody = r.bodyText;
|
|
200
|
+
}
|
|
201
|
+
else {
|
|
202
|
+
const cls = classifyStatus(r.status, r.retryAfter);
|
|
203
|
+
steps.push({ step: 'auth', ok: false, ...cls });
|
|
204
|
+
return { ok: false, steps };
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
catch (err) {
|
|
208
|
+
steps.push({ step: 'auth', ok: false, ...classifyError(err) });
|
|
209
|
+
return { ok: false, steps };
|
|
210
|
+
}
|
|
211
|
+
// Step 2 — model access
|
|
212
|
+
const modelReq = buildModelCheckRequest(o);
|
|
213
|
+
if (modelReq) {
|
|
214
|
+
// Provider needs a real completion call (e.g. Anthropic).
|
|
215
|
+
try {
|
|
216
|
+
const r = await runRequest(modelReq, o);
|
|
217
|
+
if (r.status >= 200 && r.status < 300) {
|
|
218
|
+
steps.push({ step: 'model', ok: true });
|
|
219
|
+
}
|
|
220
|
+
else {
|
|
221
|
+
const cls = classifyStatus(r.status, r.retryAfter);
|
|
222
|
+
steps.push({ step: 'model', ok: false, ...cls });
|
|
223
|
+
return { ok: false, steps };
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
catch (err) {
|
|
227
|
+
steps.push({ step: 'model', ok: false, ...classifyError(err) });
|
|
228
|
+
return { ok: false, steps };
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
else {
|
|
232
|
+
// OpenAI-compatible: check membership in the /models body from step 1.
|
|
233
|
+
let found = false;
|
|
234
|
+
try {
|
|
235
|
+
const body = JSON.parse(modelsBody);
|
|
236
|
+
found = !!body.data?.some((m) => m.id === o.modelId);
|
|
237
|
+
}
|
|
238
|
+
catch { /* malformed body — treat as unknown */ }
|
|
239
|
+
if (found) {
|
|
240
|
+
steps.push({ step: 'model', ok: true });
|
|
241
|
+
}
|
|
242
|
+
else {
|
|
243
|
+
steps.push({ step: 'model', ok: false, category: 'model-not-found', reason: `Model '${o.modelId}' not in this key's catalog` });
|
|
244
|
+
return { ok: false, steps };
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
// Step 3 — tool support
|
|
248
|
+
const toolReq = buildToolCheckRequest(o);
|
|
249
|
+
if (!toolReq) {
|
|
250
|
+
steps.push({ step: 'tools', ok: true });
|
|
251
|
+
return { ok: true, steps };
|
|
252
|
+
}
|
|
253
|
+
try {
|
|
254
|
+
const r = await runRequest(toolReq, o);
|
|
255
|
+
if (r.status >= 200 && r.status < 300) {
|
|
256
|
+
steps.push({ step: 'tools', ok: true });
|
|
257
|
+
return { ok: true, steps };
|
|
258
|
+
}
|
|
259
|
+
// 400 with a body that mentions tools is the typical "model doesn't
|
|
260
|
+
// support tool_use" signature — we categorise as tool-unsupported
|
|
261
|
+
// rather than generic auth.
|
|
262
|
+
if (r.status === 400 && /tool/i.test(r.bodyText)) {
|
|
263
|
+
steps.push({ step: 'tools', ok: false, category: 'tool-unsupported', reason: 'Model does not support tool calls' });
|
|
264
|
+
}
|
|
265
|
+
else {
|
|
266
|
+
const cls = classifyStatus(r.status, r.retryAfter);
|
|
267
|
+
steps.push({ step: 'tools', ok: false, ...cls });
|
|
268
|
+
}
|
|
269
|
+
return { ok: false, steps };
|
|
270
|
+
}
|
|
271
|
+
catch (err) {
|
|
272
|
+
steps.push({ step: 'tools', ok: false, ...classifyError(err) });
|
|
273
|
+
return { ok: false, steps };
|
|
274
|
+
}
|
|
275
|
+
}
|
|
@@ -94,7 +94,7 @@ function expandPathInline(input, cwd) {
|
|
|
94
94
|
}
|
|
95
95
|
/**
|
|
96
96
|
* Boundary-aware containment check. `path.relative` avoids the
|
|
97
|
-
*
|
|
97
|
+
* `<root>/user-evil` vs `<root>/user` false positive that a naive
|
|
98
98
|
* `startsWith` would produce.
|
|
99
99
|
*/
|
|
100
100
|
function isWithin(child, parent) {
|
|
@@ -33,6 +33,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
33
33
|
exports.ProviderNotFoundError = exports.SUBAGENT_BLOCKED_TOOL_NAMES = void 0;
|
|
34
34
|
exports.buildChildAgent = buildChildAgent;
|
|
35
35
|
const approvalEngine_1 = require("../../../moat/approvalEngine");
|
|
36
|
+
const honestyEnforcement_1 = require("../../../moat/honestyEnforcement");
|
|
36
37
|
const aidenAgent_1 = require("../aidenAgent");
|
|
37
38
|
const providerFallback_1 = require("../providerFallback");
|
|
38
39
|
// ── Hard-coded blocklist (Q5 from design doc §2) ────────────────────────────
|
|
@@ -177,10 +178,16 @@ function buildChildAgent(deps, input) {
|
|
|
177
178
|
// Pure no-op when runStore is absent (unit tests of buildChildAgent).
|
|
178
179
|
const onToolCall = buildOnToolCall(deps);
|
|
179
180
|
// ── 7. Build the child agent ─────────────────────────────────────────────
|
|
180
|
-
// Focused worker config: omit plannerGuard,
|
|
181
|
-
//
|
|
182
|
-
//
|
|
183
|
-
//
|
|
181
|
+
// Focused worker config: omit plannerGuard, skillTeacher, skillMiner,
|
|
182
|
+
// contextCompressor, promptCaching, promptBuilder. Match the daemon
|
|
183
|
+
// agent's "act on the task, don't self-improve" shape.
|
|
184
|
+
//
|
|
185
|
+
// v4.7.0: HonestyEnforcement is now structural (reads tool trace only,
|
|
186
|
+
// no natural-language scanning) and cheap enough to run in subagents.
|
|
187
|
+
// Mode is 'detect' here — events are captured into the child's run
|
|
188
|
+
// record but never produce user-visible output (subagents have no
|
|
189
|
+
// chat surface; the parent assembles their summary).
|
|
190
|
+
const childHonestyEnforcement = new honestyEnforcement_1.HonestyEnforcement('detect');
|
|
184
191
|
const agent = new aidenAgent_1.AidenAgent({
|
|
185
192
|
provider: childProvider,
|
|
186
193
|
tools: childTools,
|
|
@@ -192,6 +199,7 @@ function buildChildAgent(deps, input) {
|
|
|
192
199
|
resolveVerifiedFlag: deps.resolveVerifiedFlag,
|
|
193
200
|
resolveToolset: deps.resolveToolset,
|
|
194
201
|
resolveMutates: deps.resolveMutates,
|
|
202
|
+
honestyEnforcement: childHonestyEnforcement,
|
|
195
203
|
onToolCall,
|
|
196
204
|
// iterationBudgetInjection inherits the default (true) — child
|
|
197
205
|
// sees its own remaining-budget hint near the end of the run.
|