nubos-pilot 1.2.4 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +17 -1
- package/README.md +2 -1
- package/SECURITY.md +3 -4
- package/bin/np-tools/_commands.cjs +1 -0
- package/bin/np-tools/learnings.cjs +1 -1
- package/bin/np-tools/resolve-model.cjs +55 -1
- package/bin/np-tools/resolve-model.test.cjs +139 -0
- package/bin/np-tools/security.cjs +1 -1
- package/bin/np-tools/spawn-headless.cjs +100 -1
- package/bin/np-tools/spawn-headless.test.cjs +108 -58
- package/bin/np-tools/spawn-offhost.cjs +93 -0
- package/bin/np-tools/spawn-offhost.test.cjs +38 -0
- package/lib/agents.cjs +16 -2
- package/lib/config-schema.cjs +5 -1
- package/lib/learnings/extract.cjs +4 -4
- package/lib/learnings/extract.test.cjs +8 -8
- package/lib/model-providers.cjs +118 -0
- package/lib/model-providers.test.cjs +85 -0
- package/lib/runtime/agent-loop.cjs +64 -0
- package/lib/runtime/agent-loop.test.cjs +135 -0
- package/lib/runtime/dispatch.cjs +174 -0
- package/lib/runtime/dispatch.test.cjs +193 -0
- package/lib/runtime/preflight.cjs +68 -0
- package/lib/runtime/preflight.test.cjs +62 -0
- package/lib/runtime/providers/openai-compat.cjs +102 -0
- package/lib/runtime/providers/openai-compat.test.cjs +103 -0
- package/lib/runtime/tools/index.cjs +415 -0
- package/lib/runtime/tools/index.test.cjs +230 -0
- package/lib/security/review.cjs +4 -4
- package/lib/security/review.test.cjs +6 -6
- package/np-tools.cjs +1 -0
- package/package.json +1 -1
- package/workflows/add-tests.md +41 -0
- package/workflows/architect-phase.md +19 -0
- package/workflows/discuss-phase.md +29 -10
- package/workflows/execute-phase.md +93 -4
- package/workflows/plan-phase.md +57 -16
- package/workflows/research-phase.md +45 -0
- package/workflows/scan-codebase.md +21 -3
- package/workflows/validate-phase.md +30 -13
- package/workflows/verify-work.md +17 -0
|
@@ -124,7 +124,7 @@ function parseExtractorOutput(raw) {
|
|
|
124
124
|
return { candidates, parse_ok: true };
|
|
125
125
|
}
|
|
126
126
|
|
|
127
|
-
function _defaultSpawn(promptText, opts) {
|
|
127
|
+
async function _defaultSpawn(promptText, opts) {
|
|
128
128
|
const spawnHeadless = require('../../bin/np-tools/spawn-headless.cjs');
|
|
129
129
|
const tmp = os.tmpdir();
|
|
130
130
|
const tag = process.pid + '-' + crypto.randomBytes(4).toString('hex');
|
|
@@ -132,7 +132,7 @@ function _defaultSpawn(promptText, opts) {
|
|
|
132
132
|
const outputPath = path.join(tmp, 'np-learn-out-' + tag + '.json');
|
|
133
133
|
fs.writeFileSync(promptPath, promptText, 'utf-8');
|
|
134
134
|
try {
|
|
135
|
-
spawnHeadless.run(
|
|
135
|
+
await spawnHeadless.run(
|
|
136
136
|
['--agent', EXTRACTOR_AGENT, '--prompt-path', promptPath, '--output-path', outputPath,
|
|
137
137
|
'--timeout-ms', String(opts.timeoutMs)],
|
|
138
138
|
{ cwd: opts.cwd, stdout: { write: () => {} } },
|
|
@@ -144,7 +144,7 @@ function _defaultSpawn(promptText, opts) {
|
|
|
144
144
|
}
|
|
145
145
|
}
|
|
146
146
|
|
|
147
|
-
function runExtract(opts) {
|
|
147
|
+
async function runExtract(opts) {
|
|
148
148
|
const o = opts || {};
|
|
149
149
|
const cwd = o.cwd || process.cwd();
|
|
150
150
|
const config = o.config || {};
|
|
@@ -164,7 +164,7 @@ function runExtract(opts) {
|
|
|
164
164
|
const promptText = buildExtractorPrompt(diff);
|
|
165
165
|
let raw = '';
|
|
166
166
|
try {
|
|
167
|
-
raw = spawn(promptText, { cwd, timeoutMs: config.timeout_ms || 120000 });
|
|
167
|
+
raw = await spawn(promptText, { cwd, timeoutMs: config.timeout_ms || 120000 });
|
|
168
168
|
} catch {
|
|
169
169
|
return { ran: true, logged: 0, reason: 'spawn-failed' };
|
|
170
170
|
}
|
|
@@ -67,31 +67,31 @@ test('EX-6: non-JSON output → parse_ok false', () => {
|
|
|
67
67
|
assert.strictEqual(extract.parseExtractorOutput('').parse_ok, false);
|
|
68
68
|
});
|
|
69
69
|
|
|
70
|
-
test('EX-7: runExtract on a non-repo returns not-a-repo, logs nothing', () => {
|
|
70
|
+
test('EX-7: runExtract on a non-repo returns not-a-repo, logs nothing', async () => {
|
|
71
71
|
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'np-norepo-'));
|
|
72
72
|
try {
|
|
73
73
|
const logged = [];
|
|
74
|
-
const r = extract.runExtract({ cwd: dir, spawnImpl: () => '{}', logImpl: (c) => logged.push(c) });
|
|
74
|
+
const r = await extract.runExtract({ cwd: dir, spawnImpl: () => '{}', logImpl: (c) => logged.push(c) });
|
|
75
75
|
assert.strictEqual(r.ran, false);
|
|
76
76
|
assert.strictEqual(r.reason, 'not-a-repo');
|
|
77
77
|
assert.strictEqual(logged.length, 0);
|
|
78
78
|
} finally { fs.rmSync(dir, { recursive: true, force: true }); }
|
|
79
79
|
});
|
|
80
80
|
|
|
81
|
-
test('EX-8: runExtract on empty repo (no commit, no changes) → empty-diff', () => {
|
|
81
|
+
test('EX-8: runExtract on empty repo (no commit, no changes) → empty-diff', async () => {
|
|
82
82
|
const dir = _gitRepo(false);
|
|
83
83
|
try {
|
|
84
|
-
const r = extract.runExtract({ cwd: dir, spawnImpl: () => '{}', logImpl: () => {} });
|
|
84
|
+
const r = await extract.runExtract({ cwd: dir, spawnImpl: () => '{}', logImpl: () => {} });
|
|
85
85
|
assert.strictEqual(r.ran, true);
|
|
86
86
|
assert.strictEqual(r.reason, 'empty-diff');
|
|
87
87
|
} finally { fs.rmSync(dir, { recursive: true, force: true }); }
|
|
88
88
|
});
|
|
89
89
|
|
|
90
|
-
test('EX-9: runExtract over a commit logs parsed candidates', () => {
|
|
90
|
+
test('EX-9: runExtract over a commit logs parsed candidates', async () => {
|
|
91
91
|
const dir = _gitRepo(true);
|
|
92
92
|
try {
|
|
93
93
|
const logged = [];
|
|
94
|
-
const r = extract.runExtract({
|
|
94
|
+
const r = await extract.runExtract({
|
|
95
95
|
cwd: dir,
|
|
96
96
|
spawnImpl: () => JSON.stringify({ result: JSON.stringify({ learnings: [
|
|
97
97
|
{ pattern: 'keep add() pure and total', outcome: 'verified' },
|
|
@@ -104,11 +104,11 @@ test('EX-9: runExtract over a commit logs parsed candidates', () => {
|
|
|
104
104
|
} finally { fs.rmSync(dir, { recursive: true, force: true }); }
|
|
105
105
|
});
|
|
106
106
|
|
|
107
|
-
test('EX-10: runExtract with unparseable spawn output → parse-failed, no log', () => {
|
|
107
|
+
test('EX-10: runExtract with unparseable spawn output → parse-failed, no log', async () => {
|
|
108
108
|
const dir = _gitRepo(true);
|
|
109
109
|
try {
|
|
110
110
|
const logged = [];
|
|
111
|
-
const r = extract.runExtract({ cwd: dir, spawnImpl: () => 'garbage', logImpl: (c) => logged.push(c) });
|
|
111
|
+
const r = await extract.runExtract({ cwd: dir, spawnImpl: () => 'garbage', logImpl: (c) => logged.push(c) });
|
|
112
112
|
assert.strictEqual(r.reason, 'parse-failed');
|
|
113
113
|
assert.strictEqual(logged.length, 0);
|
|
114
114
|
} finally { fs.rmSync(dir, { recursive: true, force: true }); }
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const { NubosPilotError } = require('./core.cjs');
|
|
4
|
+
|
|
5
|
+
const VALID_PROVIDER_KINDS = Object.freeze(['native', 'openai-compat']);
|
|
6
|
+
const DEFAULT_PROVIDER = 'claude';
|
|
7
|
+
|
|
8
|
+
function matchRouting(agentName, routing) {
|
|
9
|
+
if (!agentName || !routing || typeof routing !== 'object') return null;
|
|
10
|
+
if (Object.prototype.hasOwnProperty.call(routing, agentName)) {
|
|
11
|
+
return { key: agentName, entry: routing[agentName], match: 'exact' };
|
|
12
|
+
}
|
|
13
|
+
let best = null;
|
|
14
|
+
for (const key of Object.keys(routing)) {
|
|
15
|
+
if (key.length > 1 && key.endsWith('*')) {
|
|
16
|
+
const prefix = key.slice(0, -1);
|
|
17
|
+
if (agentName.startsWith(prefix) && (!best || prefix.length > best.prefixLen)) {
|
|
18
|
+
best = { key, entry: routing[key], match: 'glob', prefixLen: prefix.length };
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
return best ? { key: best.key, entry: best.entry, match: 'glob' } : null;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function resolveProvider({ agentName, tier, config }) {
|
|
26
|
+
const cfg = config || {};
|
|
27
|
+
const providers = cfg.model_providers;
|
|
28
|
+
const routing = cfg.agent_routing;
|
|
29
|
+
|
|
30
|
+
const matched = matchRouting(agentName || null, routing);
|
|
31
|
+
|
|
32
|
+
let providerName;
|
|
33
|
+
let pinnedModel = null;
|
|
34
|
+
let source;
|
|
35
|
+
if (matched) {
|
|
36
|
+
const entry = matched.entry;
|
|
37
|
+
if (!entry || typeof entry !== 'object') {
|
|
38
|
+
throw new NubosPilotError(
|
|
39
|
+
'agent-routing-invalid-entry',
|
|
40
|
+
'agent_routing["' + matched.key + '"] must be an object with a "provider" field',
|
|
41
|
+
{ key: matched.key },
|
|
42
|
+
);
|
|
43
|
+
}
|
|
44
|
+
providerName = entry.provider;
|
|
45
|
+
if (typeof providerName !== 'string' || !providerName) {
|
|
46
|
+
throw new NubosPilotError(
|
|
47
|
+
'agent-routing-missing-provider',
|
|
48
|
+
'agent_routing["' + matched.key + '"] has no "provider" field',
|
|
49
|
+
{ key: matched.key },
|
|
50
|
+
);
|
|
51
|
+
}
|
|
52
|
+
if (typeof entry.model === 'string' && entry.model) pinnedModel = entry.model;
|
|
53
|
+
source = 'agent_routing["' + matched.key + '"]';
|
|
54
|
+
} else if (providers && typeof providers.default === 'string' && providers.default) {
|
|
55
|
+
providerName = providers.default;
|
|
56
|
+
source = 'model_providers.default';
|
|
57
|
+
} else {
|
|
58
|
+
providerName = DEFAULT_PROVIDER;
|
|
59
|
+
source = 'default';
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
let def;
|
|
63
|
+
if (providerName === DEFAULT_PROVIDER && (!providers || !providers[DEFAULT_PROVIDER])) {
|
|
64
|
+
def = { kind: 'native' };
|
|
65
|
+
} else if (providers && typeof providers === 'object' && providers[providerName]
|
|
66
|
+
&& typeof providers[providerName] === 'object') {
|
|
67
|
+
def = providers[providerName];
|
|
68
|
+
} else {
|
|
69
|
+
throw new NubosPilotError(
|
|
70
|
+
'provider-undefined',
|
|
71
|
+
source + ' references provider "' + providerName
|
|
72
|
+
+ '", but model_providers.' + providerName + ' is not defined',
|
|
73
|
+
{ provider: providerName, source },
|
|
74
|
+
);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const kind = def.kind || 'native';
|
|
78
|
+
if (!VALID_PROVIDER_KINDS.includes(kind)) {
|
|
79
|
+
throw new NubosPilotError(
|
|
80
|
+
'provider-invalid-kind',
|
|
81
|
+
'model_providers.' + providerName + '.kind must be one of ' + VALID_PROVIDER_KINDS.join('/'),
|
|
82
|
+
{ provider: providerName, got: kind, allowed: VALID_PROVIDER_KINDS.slice() },
|
|
83
|
+
);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
let model = null;
|
|
87
|
+
if (kind === 'native') {
|
|
88
|
+
model = pinnedModel || null;
|
|
89
|
+
} else if (pinnedModel) {
|
|
90
|
+
model = pinnedModel;
|
|
91
|
+
} else if (def.models && typeof def.models === 'object' && typeof def.models[tier] === 'string' && def.models[tier]) {
|
|
92
|
+
model = def.models[tier];
|
|
93
|
+
} else {
|
|
94
|
+
throw new NubosPilotError(
|
|
95
|
+
'provider-model-unresolved',
|
|
96
|
+
'cannot resolve a model for provider "' + providerName + '" at tier "' + tier
|
|
97
|
+
+ '": no pinned model in agent_routing and no model_providers.' + providerName + '.models.' + tier,
|
|
98
|
+
{ provider: providerName, tier },
|
|
99
|
+
);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
return {
|
|
103
|
+
provider: providerName,
|
|
104
|
+
kind,
|
|
105
|
+
model,
|
|
106
|
+
baseUrl: (typeof def.base_url === 'string' && def.base_url) ? def.base_url : null,
|
|
107
|
+
apiKeyEnv: (typeof def.api_key_env === 'string' && def.api_key_env) ? def.api_key_env : null,
|
|
108
|
+
routed: !!matched,
|
|
109
|
+
source,
|
|
110
|
+
};
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
module.exports = {
|
|
114
|
+
matchRouting,
|
|
115
|
+
resolveProvider,
|
|
116
|
+
VALID_PROVIDER_KINDS,
|
|
117
|
+
DEFAULT_PROVIDER,
|
|
118
|
+
};
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
const { test } = require('node:test');
|
|
2
|
+
const assert = require('node:assert/strict');
|
|
3
|
+
|
|
4
|
+
const { matchRouting, resolveProvider, VALID_PROVIDER_KINDS, DEFAULT_PROVIDER } = require('./model-providers.cjs');
|
|
5
|
+
|
|
6
|
+
test('MPV-1: exact routing key beats glob', () => {
|
|
7
|
+
const r = { 'np-critic': { provider: 'a' }, 'np-critic*': { provider: 'b' } };
|
|
8
|
+
assert.equal(matchRouting('np-critic', r).match, 'exact');
|
|
9
|
+
assert.equal(matchRouting('np-critic', r).entry.provider, 'a');
|
|
10
|
+
});
|
|
11
|
+
|
|
12
|
+
test('MPV-2: trailing-* glob matches by prefix, longest prefix wins', () => {
|
|
13
|
+
const r = { 'np-*': { provider: 'wide' }, 'np-critic*': { provider: 'narrow' } };
|
|
14
|
+
assert.equal(matchRouting('np-critic-style', r).entry.provider, 'narrow');
|
|
15
|
+
assert.equal(matchRouting('np-planner', r).entry.provider, 'wide');
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
test('MPV-3: no match returns null; empty agentName returns null', () => {
|
|
19
|
+
assert.equal(matchRouting('np-x', { 'np-y*': {} }), null);
|
|
20
|
+
assert.equal(matchRouting(null, { 'np-y*': {} }), null);
|
|
21
|
+
assert.equal(matchRouting('np-x', null), null);
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
test('MPV-4: absent config resolves to implicit claude-native default', () => {
|
|
25
|
+
const out = resolveProvider({ agentName: 'np-planner', tier: 'opus', config: {} });
|
|
26
|
+
assert.deepEqual(
|
|
27
|
+
{ provider: out.provider, kind: out.kind, model: out.model, routed: out.routed },
|
|
28
|
+
{ provider: DEFAULT_PROVIDER, kind: 'native', model: null, routed: false },
|
|
29
|
+
);
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
test('MPV-5: openai-compat resolves models[tier] when unpinned', () => {
|
|
33
|
+
const config = {
|
|
34
|
+
model_providers: { ollama: { kind: 'openai-compat', base_url: 'http://x/v1', models: { sonnet: 'm-s', opus: 'm-o' } } },
|
|
35
|
+
agent_routing: { 'np-executor': { provider: 'ollama' } },
|
|
36
|
+
};
|
|
37
|
+
assert.equal(resolveProvider({ agentName: 'np-executor', tier: 'sonnet', config }).model, 'm-s');
|
|
38
|
+
assert.equal(resolveProvider({ agentName: 'np-executor', tier: 'opus', config }).model, 'm-o');
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
test('MPV-6: undefined provider reference throws provider-undefined', () => {
|
|
42
|
+
let thrown = null;
|
|
43
|
+
try {
|
|
44
|
+
resolveProvider({
|
|
45
|
+
agentName: 'np-executor', tier: 'opus',
|
|
46
|
+
config: { model_providers: { claude: { kind: 'native' } }, agent_routing: { 'np-executor': { provider: 'ghost' } } },
|
|
47
|
+
});
|
|
48
|
+
} catch (e) { thrown = e; }
|
|
49
|
+
assert.equal(thrown && thrown.code, 'provider-undefined');
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
test('MPV-7: invalid kind throws provider-invalid-kind', () => {
|
|
53
|
+
let thrown = null;
|
|
54
|
+
try {
|
|
55
|
+
resolveProvider({
|
|
56
|
+
agentName: 'np-executor', tier: 'opus',
|
|
57
|
+
config: { model_providers: { weird: { kind: 'grpc' } }, agent_routing: { 'np-executor': { provider: 'weird' } } },
|
|
58
|
+
});
|
|
59
|
+
} catch (e) { thrown = e; }
|
|
60
|
+
assert.equal(thrown && thrown.code, 'provider-invalid-kind');
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
test('MPV-8: routing entry without provider throws agent-routing-missing-provider', () => {
|
|
64
|
+
let thrown = null;
|
|
65
|
+
try {
|
|
66
|
+
resolveProvider({ agentName: 'np-executor', tier: 'opus', config: { agent_routing: { 'np-executor': { model: 'x' } } } });
|
|
67
|
+
} catch (e) { thrown = e; }
|
|
68
|
+
assert.equal(thrown && thrown.code, 'agent-routing-missing-provider');
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
test('MPV-9: baseUrl + apiKeyEnv surfaced for openai-compat', () => {
|
|
72
|
+
const out = resolveProvider({
|
|
73
|
+
agentName: 'np-executor', tier: 'opus',
|
|
74
|
+
config: {
|
|
75
|
+
model_providers: { openai: { kind: 'openai-compat', base_url: 'https://api.openai.com/v1', api_key_env: 'OPENAI_API_KEY', models: { opus: 'gpt-4.1' } } },
|
|
76
|
+
agent_routing: { 'np-executor': { provider: 'openai' } },
|
|
77
|
+
},
|
|
78
|
+
});
|
|
79
|
+
assert.equal(out.baseUrl, 'https://api.openai.com/v1');
|
|
80
|
+
assert.equal(out.apiKeyEnv, 'OPENAI_API_KEY');
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
test('MPV-10: VALID_PROVIDER_KINDS is the closed set [native, openai-compat]', () => {
|
|
84
|
+
assert.deepEqual(VALID_PROVIDER_KINDS, ['native', 'openai-compat']);
|
|
85
|
+
});
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const { NubosPilotError } = require('../core.cjs');
|
|
4
|
+
|
|
5
|
+
const DEFAULT_MAX_ITERATIONS = 25;
|
|
6
|
+
|
|
7
|
+
async function runAgentLoop(a) {
|
|
8
|
+
const {
|
|
9
|
+
systemPrompt, task, toolset, provider, cwd,
|
|
10
|
+
maxIterations, chatImpl,
|
|
11
|
+
} = a || {};
|
|
12
|
+
if (!toolset || typeof toolset.execute !== 'function') {
|
|
13
|
+
throw new NubosPilotError('agent-loop-no-toolset', 'runAgentLoop requires a toolset with execute()', {});
|
|
14
|
+
}
|
|
15
|
+
if (!provider || typeof provider.model !== 'string') {
|
|
16
|
+
throw new NubosPilotError('agent-loop-no-provider', 'runAgentLoop requires a provider with a model', {});
|
|
17
|
+
}
|
|
18
|
+
const chat = chatImpl || require('./providers/openai-compat.cjs').chat;
|
|
19
|
+
const max = Math.max(1, maxIterations || DEFAULT_MAX_ITERATIONS);
|
|
20
|
+
const schemas = (toolset.schemas && toolset.schemas.length) ? toolset.schemas : undefined;
|
|
21
|
+
|
|
22
|
+
const messages = [];
|
|
23
|
+
if (systemPrompt) messages.push({ role: 'system', content: String(systemPrompt) });
|
|
24
|
+
messages.push({ role: 'user', content: String(task == null ? '' : task) });
|
|
25
|
+
|
|
26
|
+
const toolLog = [];
|
|
27
|
+
|
|
28
|
+
for (let i = 0; i < max; i++) {
|
|
29
|
+
const resp = await chat({ ...provider, messages, tools: schemas });
|
|
30
|
+
|
|
31
|
+
if (!resp.toolCalls || resp.toolCalls.length === 0) {
|
|
32
|
+
return { content: resp.content || '', iterations: i + 1, stopped: 'final', toolLog };
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
messages.push({
|
|
36
|
+
role: 'assistant',
|
|
37
|
+
content: resp.content || '',
|
|
38
|
+
tool_calls: resp.toolCalls.map((tc) => ({
|
|
39
|
+
id: tc.id,
|
|
40
|
+
type: 'function',
|
|
41
|
+
function: {
|
|
42
|
+
name: tc.name,
|
|
43
|
+
arguments: typeof tc.arguments === 'string' ? tc.arguments : JSON.stringify(tc.arguments || {}),
|
|
44
|
+
},
|
|
45
|
+
})),
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
for (const tc of resp.toolCalls) {
|
|
49
|
+
const result = toolset.execute(tc.name, tc.arguments, { cwd: cwd || process.cwd() });
|
|
50
|
+
toolLog.push({ name: tc.name, ok: !String(result).startsWith('Error:') });
|
|
51
|
+
messages.push({ role: 'tool', tool_call_id: tc.id, content: String(result) });
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const last = messages[messages.length - 1];
|
|
56
|
+
return {
|
|
57
|
+
content: (last && typeof last.content === 'string') ? last.content : '',
|
|
58
|
+
iterations: max,
|
|
59
|
+
stopped: 'max-iterations',
|
|
60
|
+
toolLog,
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
module.exports = { runAgentLoop, DEFAULT_MAX_ITERATIONS };
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
const fs = require('node:fs');
|
|
2
|
+
const os = require('node:os');
|
|
3
|
+
const path = require('node:path');
|
|
4
|
+
const { test, afterEach } = require('node:test');
|
|
5
|
+
const assert = require('node:assert/strict');
|
|
6
|
+
|
|
7
|
+
const { runAgentLoop, DEFAULT_MAX_ITERATIONS } = require('./agent-loop.cjs');
|
|
8
|
+
const { toolsetFor } = require('./tools/index.cjs');
|
|
9
|
+
|
|
10
|
+
const _dirs = [];
|
|
11
|
+
function _ws(files) {
|
|
12
|
+
const root = fs.realpathSync(fs.mkdtempSync(path.join(os.tmpdir(), 'np-loop-')));
|
|
13
|
+
for (const [rel, content] of Object.entries(files || {})) {
|
|
14
|
+
const abs = path.join(root, rel);
|
|
15
|
+
fs.mkdirSync(path.dirname(abs), { recursive: true });
|
|
16
|
+
fs.writeFileSync(abs, content, 'utf-8');
|
|
17
|
+
}
|
|
18
|
+
_dirs.push(root);
|
|
19
|
+
return root;
|
|
20
|
+
}
|
|
21
|
+
afterEach(() => { while (_dirs.length) { try { fs.rmSync(_dirs.pop(), { recursive: true, force: true }); } catch {} } });
|
|
22
|
+
|
|
23
|
+
function _scriptedChat(turns) {
|
|
24
|
+
let i = 0;
|
|
25
|
+
const seen = [];
|
|
26
|
+
const fn = async ({ messages }) => {
|
|
27
|
+
seen.push(JSON.parse(JSON.stringify(messages)));
|
|
28
|
+
const t = turns[Math.min(i, turns.length - 1)];
|
|
29
|
+
i++;
|
|
30
|
+
if (t.toolCalls) {
|
|
31
|
+
return { content: t.content || '', toolCalls: t.toolCalls, finishReason: 'tool_calls', raw: { role: 'assistant', content: t.content || '', tool_calls: t.toolCalls.map((c) => ({ id: c.id, function: { name: c.name, arguments: c.arguments } })) } };
|
|
32
|
+
}
|
|
33
|
+
return { content: t.content, toolCalls: [], finishReason: 'stop', raw: { role: 'assistant', content: t.content } };
|
|
34
|
+
};
|
|
35
|
+
fn.seen = seen;
|
|
36
|
+
return fn;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
test('AL-1: a final-answer turn returns immediately, stopped=final', async () => {
|
|
40
|
+
const chatImpl = _scriptedChat([{ content: 'done' }]);
|
|
41
|
+
const out = await runAgentLoop({
|
|
42
|
+
systemPrompt: 'you are x', task: 'do it',
|
|
43
|
+
toolset: toolsetFor(['Read']), provider: { baseUrl: 'http://x/v1', model: 'm' }, chatImpl,
|
|
44
|
+
});
|
|
45
|
+
assert.equal(out.content, 'done');
|
|
46
|
+
assert.equal(out.stopped, 'final');
|
|
47
|
+
assert.equal(out.iterations, 1);
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
test('AL-2: a tool call is executed in the workspace and fed back, then a final answer', async () => {
|
|
51
|
+
const cwd = _ws({ 'data.txt': 'hello' });
|
|
52
|
+
const chatImpl = _scriptedChat([
|
|
53
|
+
{ toolCalls: [{ id: 't1', name: 'Read', arguments: '{"path":"data.txt"}' }] },
|
|
54
|
+
{ content: 'the file says hello' },
|
|
55
|
+
]);
|
|
56
|
+
const out = await runAgentLoop({
|
|
57
|
+
systemPrompt: 's', task: 'read data.txt', cwd,
|
|
58
|
+
toolset: toolsetFor(['Read']), provider: { baseUrl: 'http://x/v1', model: 'm' }, chatImpl,
|
|
59
|
+
});
|
|
60
|
+
assert.equal(out.stopped, 'final');
|
|
61
|
+
assert.equal(out.iterations, 2);
|
|
62
|
+
assert.deepEqual(out.toolLog, [{ name: 'Read', ok: true }]);
|
|
63
|
+
const lastTurnMsgs = chatImpl.seen[1];
|
|
64
|
+
const toolMsg = lastTurnMsgs.find((m) => m.role === 'tool');
|
|
65
|
+
assert.equal(toolMsg.tool_call_id, 't1');
|
|
66
|
+
assert.equal(toolMsg.content, '1\thello');
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
test('AL-3: a failing tool call returns an error string, ok=false, loop continues', async () => {
|
|
70
|
+
const cwd = _ws({});
|
|
71
|
+
const chatImpl = _scriptedChat([
|
|
72
|
+
{ toolCalls: [{ id: 't1', name: 'Read', arguments: '{"path":"missing.txt"}' }] },
|
|
73
|
+
{ content: 'could not read' },
|
|
74
|
+
]);
|
|
75
|
+
const out = await runAgentLoop({
|
|
76
|
+
systemPrompt: 's', task: 't', cwd,
|
|
77
|
+
toolset: toolsetFor(['Read']), provider: { baseUrl: 'http://x/v1', model: 'm' }, chatImpl,
|
|
78
|
+
});
|
|
79
|
+
assert.equal(out.toolLog[0].ok, false);
|
|
80
|
+
assert.equal(out.stopped, 'final');
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
test('AL-4: a model that never stops hits the iteration cap', async () => {
|
|
84
|
+
const cwd = _ws({ 'a.txt': 'x' });
|
|
85
|
+
const chatImpl = _scriptedChat([{ toolCalls: [{ id: 't', name: 'Read', arguments: '{"path":"a.txt"}' }] }]);
|
|
86
|
+
const out = await runAgentLoop({
|
|
87
|
+
systemPrompt: 's', task: 't', cwd, maxIterations: 3,
|
|
88
|
+
toolset: toolsetFor(['Read']), provider: { baseUrl: 'http://x/v1', model: 'm' }, chatImpl,
|
|
89
|
+
});
|
|
90
|
+
assert.equal(out.stopped, 'max-iterations');
|
|
91
|
+
assert.equal(out.iterations, 3);
|
|
92
|
+
assert.equal(out.toolLog.length, 3);
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
test('AL-5: missing toolset / provider throw loud', async () => {
|
|
96
|
+
let a = null; try { await runAgentLoop({ provider: { model: 'm' } }); } catch (e) { a = e; }
|
|
97
|
+
assert.equal(a.code, 'agent-loop-no-toolset');
|
|
98
|
+
let b = null; try { await runAgentLoop({ toolset: toolsetFor(['Read']) }); } catch (e) { b = e; }
|
|
99
|
+
assert.equal(b.code, 'agent-loop-no-provider');
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
test('AL-6: DEFAULT_MAX_ITERATIONS is a sane positive cap', () => {
|
|
103
|
+
assert.ok(DEFAULT_MAX_ITERATIONS >= 1 && DEFAULT_MAX_ITERATIONS <= 100);
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
test('AL-7: assistant echo is rebuilt in OpenAI wire shape; ids round-trip even if provider omits them', async () => {
|
|
107
|
+
const cwd = _ws({ 'a.txt': 'A', 'b.txt': 'B' });
|
|
108
|
+
const chatImpl = async ({ messages }) => {
|
|
109
|
+
chatImpl.seen = (chatImpl.seen || []).concat([JSON.parse(JSON.stringify(messages))]);
|
|
110
|
+
if (!chatImpl.called) {
|
|
111
|
+
chatImpl.called = true;
|
|
112
|
+
return {
|
|
113
|
+
content: '', finishReason: 'tool_calls',
|
|
114
|
+
toolCalls: [
|
|
115
|
+
{ id: 'call_0', name: 'Read', arguments: '{"path":"a.txt"}' },
|
|
116
|
+
{ id: 'call_1', name: 'Read', arguments: '{"path":"b.txt"}' },
|
|
117
|
+
],
|
|
118
|
+
raw: { role: 'assistant', content: '', tool_calls: [{ function: { name: 'Read' } }] },
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
return { content: 'done', toolCalls: [], finishReason: 'stop', raw: { role: 'assistant', content: 'done' } };
|
|
122
|
+
};
|
|
123
|
+
const out = await runAgentLoop({
|
|
124
|
+
systemPrompt: 's', task: 't', cwd,
|
|
125
|
+
toolset: toolsetFor(['Read']), provider: { baseUrl: 'http://x/v1', model: 'm' }, chatImpl,
|
|
126
|
+
});
|
|
127
|
+
assert.equal(out.stopped, 'final');
|
|
128
|
+
const secondTurn = chatImpl.seen[1];
|
|
129
|
+
const assistant = secondTurn.find((m) => m.role === 'assistant' && m.tool_calls);
|
|
130
|
+
assert.equal(assistant.tool_calls[0].type, 'function');
|
|
131
|
+
assert.equal(assistant.tool_calls[0].function.name, 'Read');
|
|
132
|
+
assert.deepEqual(assistant.tool_calls.map((c) => c.id), ['call_0', 'call_1']);
|
|
133
|
+
const toolMsgs = secondTurn.filter((m) => m.role === 'tool');
|
|
134
|
+
assert.deepEqual(toolMsgs.map((m) => m.tool_call_id), ['call_0', 'call_1']);
|
|
135
|
+
});
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const path = require('node:path');
|
|
4
|
+
const { NubosPilotError } = require('../core.cjs');
|
|
5
|
+
const { loadAgentSource } = require('../agents.cjs');
|
|
6
|
+
const { resolveFromConfig } = require('../../bin/np-tools/resolve-model.cjs');
|
|
7
|
+
const { assertPreflight } = require('./preflight.cjs');
|
|
8
|
+
const { runAgentLoop } = require('./agent-loop.cjs');
|
|
9
|
+
const { toolsetFor } = require('./tools/index.cjs');
|
|
10
|
+
const { AUDITED_AGENTS, auditToolUse } = require('../nubosloop-audit.cjs');
|
|
11
|
+
const { TASK_ID_RE } = require('../ids.cjs');
|
|
12
|
+
const metrics = require('../metrics.cjs');
|
|
13
|
+
|
|
14
|
+
function _lintOutput(content, schemaName) {
|
|
15
|
+
if (!schemaName) return null;
|
|
16
|
+
try {
|
|
17
|
+
const { getSchema } = require('../schemas/index.cjs');
|
|
18
|
+
const { lintContent } = require('../output-lint.cjs');
|
|
19
|
+
const res = lintContent(String(content == null ? '' : content), getSchema(schemaName));
|
|
20
|
+
return { ok: !!res.ok, schema: schemaName, violations: res.violations || [] };
|
|
21
|
+
} catch (err) {
|
|
22
|
+
return { ok: false, schema: schemaName, error: (err && err.code) || 'output-lint-failed' };
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function _defaultInWorktree(cwd) {
|
|
27
|
+
try {
|
|
28
|
+
const { listSliceWorktrees } = require('../worktree.cjs');
|
|
29
|
+
return listSliceWorktrees(cwd).some((w) => cwd === w.path || cwd.startsWith(w.path + path.sep));
|
|
30
|
+
} catch { return false; }
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function _parseTools(toolsField) {
|
|
34
|
+
if (Array.isArray(toolsField)) return toolsField.map((s) => String(s).trim()).filter(Boolean);
|
|
35
|
+
if (typeof toolsField === 'string') return toolsField.split(',').map((s) => s.trim()).filter(Boolean);
|
|
36
|
+
return [];
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
async function dispatchOffHost(o) {
|
|
40
|
+
const opts = o || {};
|
|
41
|
+
const cwd = opts.cwd || process.cwd();
|
|
42
|
+
const deps = opts.deps || {};
|
|
43
|
+
const resolve = deps.resolve || resolveFromConfig;
|
|
44
|
+
const preflight = deps.preflight || assertPreflight;
|
|
45
|
+
const loadSource = deps.loadSource || loadAgentSource;
|
|
46
|
+
const runLoop = deps.runLoop || runAgentLoop;
|
|
47
|
+
const isInWorktree = deps.isInWorktree || _defaultInWorktree;
|
|
48
|
+
const now = deps.now || (() => new Date().toISOString());
|
|
49
|
+
|
|
50
|
+
if (typeof opts.agent !== 'string' || !opts.agent) {
|
|
51
|
+
throw new NubosPilotError('dispatch-no-agent', 'dispatchOffHost requires an agent name', {});
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
const res = resolve({ agentOrTier: opts.agent, cwd });
|
|
55
|
+
if (res.kind !== 'openai-compat') {
|
|
56
|
+
throw new NubosPilotError(
|
|
57
|
+
'dispatch-not-offhost',
|
|
58
|
+
'agent "' + opts.agent + '" resolves to provider "' + res.provider + '" (kind ' + res.kind
|
|
59
|
+
+ ') — dispatchOffHost only runs openai-compat providers',
|
|
60
|
+
{ provider: res.provider, kind: res.kind },
|
|
61
|
+
);
|
|
62
|
+
}
|
|
63
|
+
const audited = AUDITED_AGENTS.includes(opts.agent);
|
|
64
|
+
const hasTaskCtx = typeof opts.taskId === 'string' && TASK_ID_RE.test(opts.taskId);
|
|
65
|
+
if (audited && !hasTaskCtx) {
|
|
66
|
+
throw new NubosPilotError(
|
|
67
|
+
'offhost-audited-agent-unsupported',
|
|
68
|
+
'agent "' + opts.agent + '" is Rule-9-audited and needs a task context off-host — pass --task-id '
|
|
69
|
+
+ 'M<NNN>-S<NNN>-T<NNNN> so the search-evidence ledger + audit apply. (Wired into execute-phase in ADR-0021 Slice 4b.)',
|
|
70
|
+
{ agent: opts.agent, audited: AUDITED_AGENTS.slice() },
|
|
71
|
+
);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
if (opts.allowBash && !isInWorktree(cwd)) {
|
|
75
|
+
throw new NubosPilotError(
|
|
76
|
+
'offhost-bash-requires-sandbox',
|
|
77
|
+
'off-host Bash needs worktree isolation — run inside a slice worktree (workflow.worktree_isolation) so model-driven shell is confined. Refused outside one.',
|
|
78
|
+
{ cwd: path.basename(cwd) },
|
|
79
|
+
);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const src = loadSource(opts.agent, cwd);
|
|
83
|
+
const declared = _parseTools(src.frontmatter && src.frontmatter.tools);
|
|
84
|
+
const toolset = toolsetFor(declared, {
|
|
85
|
+
readOnly: !!opts.readOnly,
|
|
86
|
+
allowBash: !!opts.allowBash,
|
|
87
|
+
withSearch: audited,
|
|
88
|
+
ctx: { taskId: hasTaskCtx ? opts.taskId : null, customRulesPath: opts.customRulesPath },
|
|
89
|
+
});
|
|
90
|
+
const provider = { baseUrl: res.baseUrl, apiKeyEnv: res.apiKeyEnv, model: res.model };
|
|
91
|
+
|
|
92
|
+
await preflight(provider);
|
|
93
|
+
|
|
94
|
+
const started = now();
|
|
95
|
+
let result = null;
|
|
96
|
+
let status = 'ok';
|
|
97
|
+
let errObj = null;
|
|
98
|
+
try {
|
|
99
|
+
result = await runLoop({
|
|
100
|
+
systemPrompt: src.body,
|
|
101
|
+
task: opts.task,
|
|
102
|
+
toolset,
|
|
103
|
+
provider,
|
|
104
|
+
cwd,
|
|
105
|
+
maxIterations: opts.maxIterations,
|
|
106
|
+
});
|
|
107
|
+
} catch (err) {
|
|
108
|
+
status = 'error';
|
|
109
|
+
errObj = { code: (err && err.code) || 'dispatch-loop-failed', message: (err && err.message) || 'loop failed' };
|
|
110
|
+
}
|
|
111
|
+
const ended = now();
|
|
112
|
+
|
|
113
|
+
let metricsRecorded = false;
|
|
114
|
+
try {
|
|
115
|
+
const record = metrics.buildRecord({
|
|
116
|
+
agent: opts.agent,
|
|
117
|
+
tier: res.tier,
|
|
118
|
+
resolved_model: res.model,
|
|
119
|
+
phase: opts.phase || '',
|
|
120
|
+
plan: opts.plan || 'offhost',
|
|
121
|
+
task: opts.taskId || 'adhoc',
|
|
122
|
+
started_at: started,
|
|
123
|
+
ended_at: ended,
|
|
124
|
+
status,
|
|
125
|
+
runtime: res.provider,
|
|
126
|
+
error: errObj,
|
|
127
|
+
});
|
|
128
|
+
metrics.appendRecord(record, { cwd });
|
|
129
|
+
metricsRecorded = true;
|
|
130
|
+
} catch {}
|
|
131
|
+
|
|
132
|
+
if (status === 'error') {
|
|
133
|
+
throw new NubosPilotError(errObj.code, errObj.message, { agent: opts.agent, provider: res.provider });
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
let rule9 = null;
|
|
137
|
+
if (audited && hasTaskCtx && !opts.skipAudit) {
|
|
138
|
+
try {
|
|
139
|
+
rule9 = auditToolUse(opts.taskId, opts.agent, (result.toolLog || []).map((t) => t.name), cwd);
|
|
140
|
+
} catch (err) { rule9 = { ok: false, error: (err && err.code) || 'audit-failed' }; }
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// Tool-calling capability signal: if the toolset advertised tools but the model
|
|
144
|
+
// made zero tool calls across the whole loop, the provider/model very likely does
|
|
145
|
+
// not support OpenAI function-calling. The loop does not crash on this (a tool-less
|
|
146
|
+
// turn is treated as a final answer), so it must be surfaced loudly — an off-host
|
|
147
|
+
// editor in this state silently produces no edits. Not fatal: read-only emit-only
|
|
148
|
+
// agents legitimately call no tools, so this is a hint, scoped by `mutating`.
|
|
149
|
+
const toolsAdvertised = (toolset.schemas || []).length;
|
|
150
|
+
const toolCalls = (result.toolLog || []).length;
|
|
151
|
+
const capability = {
|
|
152
|
+
toolsAdvertised,
|
|
153
|
+
toolCalls,
|
|
154
|
+
mutating: toolset.names.some((n) => n === 'Write' || n === 'Edit' || n === 'Bash'),
|
|
155
|
+
ok: !(toolsAdvertised > 0 && toolCalls === 0),
|
|
156
|
+
};
|
|
157
|
+
|
|
158
|
+
return {
|
|
159
|
+
agent: opts.agent,
|
|
160
|
+
provider: res.provider,
|
|
161
|
+
model: res.model,
|
|
162
|
+
content: result.content,
|
|
163
|
+
stopped: result.stopped,
|
|
164
|
+
iterations: result.iterations,
|
|
165
|
+
toolLog: result.toolLog,
|
|
166
|
+
tools: toolset.names,
|
|
167
|
+
rule9,
|
|
168
|
+
capability,
|
|
169
|
+
output_lint: _lintOutput(result.content, opts.outputSchema),
|
|
170
|
+
metrics_recorded: metricsRecorded,
|
|
171
|
+
};
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
module.exports = { dispatchOffHost, _parseTools };
|