nubos-pilot 1.3.0 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/CHANGELOG.md +10 -0
  2. package/bin/np-tools/_commands.cjs +2 -0
  3. package/bin/np-tools/_elision-proxy-entry.cjs +13 -0
  4. package/bin/np-tools/doctor.cjs +25 -3
  5. package/bin/np-tools/elision-bench.cjs +67 -0
  6. package/bin/np-tools/elision-get.cjs +48 -0
  7. package/bin/np-tools/elision-get.test.cjs +66 -0
  8. package/bin/np-tools/loop-run-round.cjs +25 -11
  9. package/bin/np-tools/plan-milestone.cjs +1 -0
  10. package/bin/np-tools/research-phase.cjs +1 -1
  11. package/bin/np-tools/resume-work.cjs +9 -0
  12. package/bin/np-tools/resume-work.test.cjs +21 -1
  13. package/bin/np-tools/spawn-headless.cjs +62 -9
  14. package/lib/cache-align.cjs +78 -0
  15. package/lib/cache-align.test.cjs +69 -0
  16. package/lib/checkpoint-reconcile.cjs +42 -0
  17. package/lib/checkpoint-reconcile.test.cjs +106 -0
  18. package/lib/compress.cjs +495 -0
  19. package/lib/compress.test.cjs +267 -0
  20. package/lib/config-defaults.cjs +39 -0
  21. package/lib/config-schema.cjs +40 -4
  22. package/lib/elision-bench.cjs +409 -0
  23. package/lib/elision-bench.test.cjs +89 -0
  24. package/lib/elision-proxy.cjs +158 -0
  25. package/lib/elision-proxy.test.cjs +243 -0
  26. package/lib/elision.cjs +163 -0
  27. package/lib/elision.test.cjs +143 -0
  28. package/lib/git.cjs +4 -2
  29. package/lib/nubosloop.cjs +1 -1
  30. package/lib/output-steering.cjs +68 -0
  31. package/lib/output-steering.test.cjs +74 -0
  32. package/lib/researcher-swarm.cjs +14 -3
  33. package/lib/runtime/agent-loop.cjs +36 -6
  34. package/lib/runtime/agent-loop.test.cjs +105 -0
  35. package/lib/runtime/dispatch.cjs +6 -6
  36. package/lib/runtime/dispatch.test.cjs +17 -3
  37. package/lib/runtime/providers/openai-compat.cjs +2 -1
  38. package/lib/runtime/providers/openai-compat.test.cjs +9 -0
  39. package/lib/runtime/tools/index.cjs +33 -1
  40. package/lib/runtime/tools/index.test.cjs +24 -0
  41. package/lib/schemas/data/elision-entry.v1.json +16 -0
  42. package/lib/token-cost.cjs +46 -0
  43. package/lib/token-cost.test.cjs +42 -0
  44. package/np-tools.cjs +2 -0
  45. package/package.json +1 -1
  46. package/workflows/execute-phase.md +10 -2
@@ -0,0 +1,74 @@
1
+ 'use strict';
2
+
3
+ const { test } = require('node:test');
4
+ const assert = require('node:assert/strict');
5
+
6
+ const steering = require('./output-steering.cjs');
7
+
8
+ test('OS-1: balanced/unknown profile is a no-op (no shaping block)', () => {
9
+ const p = 'You are an agent.';
10
+ assert.equal(steering.enrichSystemPrompt(p, 'balanced'), p);
11
+ assert.equal(steering.enrichSystemPrompt(p, 'nonsense'), p);
12
+ assert.equal(steering.enrichSystemPrompt(p, undefined), p);
13
+ });
14
+
15
+ test('OS-2: a real profile appends one tagged, byte-stable block', () => {
16
+ const out = steering.enrichSystemPrompt('You are an agent.', 'terse');
17
+ assert.match(out, /^You are an agent\./);
18
+ assert.match(out, /<nubos_output_shaping>[\s\S]*<\/nubos_output_shaping>$/);
19
+ assert.equal(out, steering.enrichSystemPrompt('You are an agent.', 'terse'), 'deterministic');
20
+ });
21
+
22
+ test('OS-3: enrichment is idempotent and profile-switchable (always exactly one block)', () => {
23
+ const once = steering.enrichSystemPrompt('base', 'terse');
24
+ const twice = steering.enrichSystemPrompt(once, 'terse');
25
+ assert.equal(twice, once, 're-enriching with same profile converges');
26
+ const switched = steering.enrichSystemPrompt(once, 'minimal');
27
+ assert.equal((switched.match(/<nubos_output_shaping>/g) || []).length, 1, 'never stacks blocks');
28
+ assert.match(switched, /Minimum tokens/);
29
+ assert.equal(steering.enrichSystemPrompt(once, 'balanced'), 'base', 'balanced strips back to bare prompt');
30
+ });
31
+
32
+ test('OS-4: classifyTurn — fresh user ask', () => {
33
+ const msgs = [{ role: 'system', content: 's' }, { role: 'user', content: 'do x' }];
34
+ assert.equal(steering.classifyTurn(msgs), 'new_user_ask');
35
+ assert.equal(steering.classifyTurn([]), 'new_user_ask');
36
+ });
37
+
38
+ test('OS-5: classifyTurn — clean tool results are a mechanical continuation', () => {
39
+ const msgs = [
40
+ { role: 'user', content: 'do x' },
41
+ { role: 'assistant', content: '', tool_calls: [] },
42
+ { role: 'tool', tool_call_id: 'a', content: 'file written ok' },
43
+ { role: 'tool', tool_call_id: 'b', content: 'ok' },
44
+ ];
45
+ assert.equal(steering.classifyTurn(msgs), 'mechanical_continuation');
46
+ });
47
+
48
+ test('OS-6: classifyTurn — an error tool result forces full effort', () => {
49
+ const msgs = [
50
+ { role: 'user', content: 'do x' },
51
+ { role: 'assistant', content: '' },
52
+ { role: 'tool', tool_call_id: 'a', content: 'ok' },
53
+ { role: 'tool', tool_call_id: 'b', content: 'Error: file not found' },
54
+ ];
55
+ assert.equal(steering.classifyTurn(msgs), 'error_continuation');
56
+ });
57
+
58
+ test('OS-8: classifyTurn — a fresh user message after a tool turn is a new ask', () => {
59
+ const msgs = [
60
+ { role: 'user', content: 'do x' },
61
+ { role: 'assistant', content: '' },
62
+ { role: 'user', content: 'actually do y' },
63
+ { role: 'tool', tool_call_id: 'a', content: 'ok' },
64
+ ];
65
+ assert.equal(steering.classifyTurn(msgs), 'new_user_ask');
66
+ });
67
+
68
+ test('OS-7: routeEffort downgrades only on mechanical turns, never injects or upgrades', () => {
69
+ assert.equal(steering.routeEffort(undefined, 'mechanical_continuation', {}), undefined);
70
+ assert.equal(steering.routeEffort('high', 'mechanical_continuation', { mechanicalEffort: 'low' }), 'low');
71
+ assert.equal(steering.routeEffort('high', 'new_user_ask', { mechanicalEffort: 'low' }), 'high');
72
+ assert.equal(steering.routeEffort('high', 'error_continuation', { mechanicalEffort: 'low' }), 'high');
73
+ assert.equal(steering.routeEffort('low', 'mechanical_continuation', { mechanicalEffort: 'high' }), 'low');
74
+ });
@@ -4,6 +4,7 @@ const crypto = require('node:crypto');
4
4
 
5
5
  const { DEFAULT_THRESHOLD, DEFAULT_MIN_OCCURRENCE } = require('./knowledge-adapter.cjs');
6
6
  const config = require('./config.cjs');
7
+ const elision = require('./elision.cjs');
7
8
  const { normalizeText } = require('./core.cjs');
8
9
 
9
10
  const DEFAULT_K = 3;
@@ -50,19 +51,29 @@ function resolveSwarmOpts(cwd, override) {
50
51
  return { k, threshold, minOccurrence };
51
52
  }
52
53
 
53
- function buildSpawnSpecs(input, k) {
54
+ function _dedupInputRef(input, cwd) {
55
+ if (!cwd) return null;
56
+ const cx = elision.compressionContext(cwd);
57
+ return cx.store ? cx.store(JSON.stringify(input), 'json-array') : null;
58
+ }
59
+
60
+ function buildSpawnSpecs(input, k, opts) {
54
61
  if (!input || typeof input !== 'object') {
55
62
  throw new TypeError('buildSpawnSpecs: input object is required');
56
63
  }
57
64
  const safeK = _coerceK(k);
65
+ const o = opts || {};
66
+ const inputRef = _dedupInputRef(input, o.cwd);
58
67
  const specs = [];
59
68
  for (let i = 0; i < safeK; i += 1) {
60
- specs.push({
69
+ const spec = {
61
70
  index: i,
62
71
  seed_delta: i,
63
72
  seed_nudge: SEED_DELTAS[i % SEED_DELTAS.length],
64
73
  input,
65
- });
74
+ };
75
+ if (inputRef) spec.input_ref = inputRef;
76
+ specs.push(spec);
66
77
  }
67
78
  return specs;
68
79
  }
@@ -1,9 +1,23 @@
1
1
  'use strict';
2
2
 
3
3
  const { NubosPilotError } = require('../core.cjs');
4
+ const compress = require('../compress.cjs');
5
+ const elision = require('../elision.cjs');
6
+ const steering = require('../output-steering.cjs');
7
+ const { EXPAND_TOOL_NAME } = require('./tools/index.cjs');
4
8
 
5
9
  const DEFAULT_MAX_ITERATIONS = 25;
6
10
 
11
+ function _compressToolResult(text, cx) {
12
+ if (!cx || !cx.enabled || typeof cx.store !== 'function') return text;
13
+ try {
14
+ const res = compress.compressBlock(text, { minBlockBytes: cx.minBlockBytes, store: cx.store });
15
+ return (res && res.changed) ? res.compressed : text;
16
+ } catch {
17
+ return text;
18
+ }
19
+ }
20
+
7
21
  async function runAgentLoop(a) {
8
22
  const {
9
23
  systemPrompt, task, toolset, provider, cwd,
@@ -19,17 +33,27 @@ async function runAgentLoop(a) {
19
33
  const max = Math.max(1, maxIterations || DEFAULT_MAX_ITERATIONS);
20
34
  const schemas = (toolset.schemas && toolset.schemas.length) ? toolset.schemas : undefined;
21
35
 
36
+ const cx = elision.compressionContext(cwd);
37
+ const os = cx.outputSteering || { enabled: false, effortRouting: false };
38
+ const compression = { tool_results: 0, blocks_compressed: 0, bytes_before: 0, bytes_after: 0 };
39
+
22
40
  const messages = [];
23
- if (systemPrompt) messages.push({ role: 'system', content: String(systemPrompt) });
41
+ if (systemPrompt) {
42
+ const sys = os.enabled ? steering.enrichSystemPrompt(String(systemPrompt), os.profile) : String(systemPrompt);
43
+ messages.push({ role: 'system', content: sys });
44
+ }
24
45
  messages.push({ role: 'user', content: String(task == null ? '' : task) });
25
46
 
26
47
  const toolLog = [];
27
48
 
28
49
  for (let i = 0; i < max; i++) {
29
- const resp = await chat({ ...provider, messages, tools: schemas });
50
+ const turnProvider = os.effortRouting
51
+ ? { ...provider, effort: steering.routeEffort(provider.effort, steering.classifyTurn(messages), { mechanicalEffort: os.mechanicalEffort }) }
52
+ : provider;
53
+ const resp = await chat({ ...turnProvider, messages, tools: schemas });
30
54
 
31
55
  if (!resp.toolCalls || resp.toolCalls.length === 0) {
32
- return { content: resp.content || '', iterations: i + 1, stopped: 'final', toolLog };
56
+ return { content: resp.content || '', iterations: i + 1, stopped: 'final', toolLog, compression };
33
57
  }
34
58
 
35
59
  messages.push({
@@ -46,9 +70,14 @@ async function runAgentLoop(a) {
46
70
  });
47
71
 
48
72
  for (const tc of resp.toolCalls) {
49
- const result = toolset.execute(tc.name, tc.arguments, { cwd: cwd || process.cwd() });
50
- toolLog.push({ name: tc.name, ok: !String(result).startsWith('Error:') });
51
- messages.push({ role: 'tool', tool_call_id: tc.id, content: String(result) });
73
+ const raw = String(toolset.execute(tc.name, tc.arguments, { cwd: cwd || process.cwd() }));
74
+ toolLog.push({ name: tc.name, ok: !raw.startsWith('Error:') });
75
+ const stored = tc.name === EXPAND_TOOL_NAME ? raw : _compressToolResult(raw, cx);
76
+ compression.tool_results += 1;
77
+ compression.bytes_before += Buffer.byteLength(raw, 'utf-8');
78
+ compression.bytes_after += Buffer.byteLength(stored, 'utf-8');
79
+ if (stored !== raw) compression.blocks_compressed += 1;
80
+ messages.push({ role: 'tool', tool_call_id: tc.id, content: stored });
52
81
  }
53
82
  }
54
83
 
@@ -58,6 +87,7 @@ async function runAgentLoop(a) {
58
87
  iterations: max,
59
88
  stopped: 'max-iterations',
60
89
  toolLog,
90
+ compression,
61
91
  };
62
92
  }
63
93
 
@@ -6,6 +6,16 @@ const assert = require('node:assert/strict');
6
6
 
7
7
  const { runAgentLoop, DEFAULT_MAX_ITERATIONS } = require('./agent-loop.cjs');
8
8
  const { toolsetFor } = require('./tools/index.cjs');
9
+ const elision = require('../elision.cjs');
10
+
11
+ function _bigLog() {
12
+ const lines = [];
13
+ for (let i = 0; i < 300; i++) {
14
+ if (i % 73 === 0) lines.push('ERROR: boom at module_' + i);
15
+ else lines.push('[info] step ' + i + ' ok processed record ' + (i * 7) + ' ' + 'x'.repeat(30));
16
+ }
17
+ return lines.join('\n');
18
+ }
9
19
 
10
20
  const _dirs = [];
11
21
  function _ws(files) {
@@ -103,6 +113,73 @@ test('AL-6: DEFAULT_MAX_ITERATIONS is a sane positive cap', () => {
103
113
  assert.ok(DEFAULT_MAX_ITERATIONS >= 1 && DEFAULT_MAX_ITERATIONS <= 100);
104
114
  });
105
115
 
116
+ test('AL-8: compression default OFF — tool result enters history verbatim, no blocks compressed', async () => {
117
+ const cwd = _ws({ 'log.txt': _bigLog() });
118
+ const chatImpl = _scriptedChat([
119
+ { toolCalls: [{ id: 't1', name: 'Read', arguments: '{"path":"log.txt"}' }] },
120
+ { content: 'done' },
121
+ ]);
122
+ const out = await runAgentLoop({
123
+ systemPrompt: 's', task: 't', cwd,
124
+ toolset: toolsetFor(['Read']), provider: { baseUrl: 'http://x/v1', model: 'm' }, chatImpl,
125
+ });
126
+ const toolMsg = chatImpl.seen[1].find((m) => m.role === 'tool');
127
+ assert.ok(!toolMsg.content.includes('⟦elided:'), 'no marker when compression off');
128
+ assert.equal(out.compression.blocks_compressed, 0);
129
+ assert.equal(out.compression.bytes_after, out.compression.bytes_before);
130
+ });
131
+
132
+ test('AL-9: compression ON — large tool result is crushed in history, original retrievable from Elision store', async () => {
133
+ const cwd = _ws({
134
+ 'log.txt': _bigLog(),
135
+ '.nubos-pilot/config.json': JSON.stringify({ compression: { enabled: true } }),
136
+ });
137
+ const chatImpl = _scriptedChat([
138
+ { toolCalls: [{ id: 't1', name: 'Read', arguments: '{"path":"log.txt"}' }] },
139
+ { content: 'done' },
140
+ ]);
141
+ const out = await runAgentLoop({
142
+ systemPrompt: 's', task: 't', cwd,
143
+ toolset: toolsetFor(['Read']), provider: { baseUrl: 'http://x/v1', model: 'm' }, chatImpl,
144
+ });
145
+ const toolMsg = chatImpl.seen[1].find((m) => m.role === 'tool');
146
+ assert.equal(out.compression.blocks_compressed, 1);
147
+ assert.ok(out.compression.bytes_after < out.compression.bytes_before, 'history shrank');
148
+ const m = toolMsg.content.match(/⟦elided:([a-f0-9]{12})/);
149
+ assert.ok(m, 'marker with hash present in history');
150
+ const back = elision.retrieve(m[1], cwd);
151
+ assert.equal(back.status, 'ok');
152
+ assert.ok(back.original.includes('ERROR: boom at module_0'), 'original recoverable byte-for-byte');
153
+ });
154
+
155
+ test('AL-10: end-to-end — model retrieves an elided original mid-loop via context-expand', async () => {
156
+ const cwd = _ws({
157
+ 'log.txt': _bigLog(),
158
+ '.nubos-pilot/config.json': JSON.stringify({ compression: { enabled: true } }),
159
+ });
160
+ let expanded = null;
161
+ const chat = async ({ messages }) => {
162
+ chat.n = (chat.n || 0) + 1;
163
+ if (chat.n === 1) {
164
+ return { content: '', finishReason: 'tool_calls', toolCalls: [{ id: 'r1', name: 'Read', arguments: '{"path":"log.txt"}' }] };
165
+ }
166
+ if (chat.n === 2) {
167
+ const toolMsg = messages.filter((m) => m.role === 'tool').pop();
168
+ const hash = toolMsg.content.match(/⟦elided:([a-f0-9]{12})/)[1];
169
+ return { content: '', finishReason: 'tool_calls', toolCalls: [{ id: 'r2', name: 'context-expand', arguments: JSON.stringify({ hash }) }] };
170
+ }
171
+ expanded = messages.filter((m) => m.role === 'tool').pop().content;
172
+ return { content: 'done', finishReason: 'stop', toolCalls: [] };
173
+ };
174
+ const out = await runAgentLoop({
175
+ systemPrompt: 's', task: 't', cwd,
176
+ toolset: toolsetFor(['Read'], { withExpand: true }), provider: { baseUrl: 'http://x/v1', model: 'm' }, chatImpl: chat,
177
+ });
178
+ assert.equal(out.stopped, 'final');
179
+ assert.ok(expanded.includes('ERROR: boom at module_0'), 'model recovered the full original byte-for-byte');
180
+ assert.ok(!expanded.includes('⟦elided:'), 'the expanded original carries no marker');
181
+ });
182
+
106
183
  test('AL-7: assistant echo is rebuilt in OpenAI wire shape; ids round-trip even if provider omits them', async () => {
107
184
  const cwd = _ws({ 'a.txt': 'A', 'b.txt': 'B' });
108
185
  const chatImpl = async ({ messages }) => {
@@ -133,3 +210,31 @@ test('AL-7: assistant echo is rebuilt in OpenAI wire shape; ids round-trip even
133
210
  const toolMsgs = secondTurn.filter((m) => m.role === 'tool');
134
211
  assert.deepEqual(toolMsgs.map((m) => m.tool_call_id), ['call_0', 'call_1']);
135
212
  });
213
+
214
+ test('AL-12: output_steering ON — system prompt is enriched and mechanical turns downgrade effort', async () => {
215
+ const cwd = _ws({
216
+ 'a.txt': 'A',
217
+ '.nubos-pilot/config.json': JSON.stringify({
218
+ compression: {
219
+ enabled: true,
220
+ output_steering: { enabled: true, verbosity_profile: 'terse', effort_routing: { enabled: true, base_effort: 'high', mechanical_effort: 'low' } },
221
+ },
222
+ }),
223
+ });
224
+ const seen = [];
225
+ const chatImpl = async (args) => {
226
+ seen.push({ effort: args.effort, system: (args.messages.find((m) => m.role === 'system') || {}).content });
227
+ if (seen.length === 1) {
228
+ return { content: '', finishReason: 'tool_calls', toolCalls: [{ id: 'c0', name: 'Read', arguments: '{"path":"a.txt"}' }], raw: { role: 'assistant', content: '' } };
229
+ }
230
+ return { content: 'done', toolCalls: [], finishReason: 'stop', raw: { role: 'assistant', content: 'done' } };
231
+ };
232
+ const out = await runAgentLoop({
233
+ systemPrompt: 'you are x', task: 't', cwd,
234
+ toolset: toolsetFor(['Read']), provider: { baseUrl: 'http://x/v1', model: 'm', effort: 'high' }, chatImpl,
235
+ });
236
+ assert.equal(out.stopped, 'final');
237
+ assert.match(seen[0].system, /<nubos_output_shaping>[\s\S]*<\/nubos_output_shaping>$/, 'system prompt carries the shaping block');
238
+ assert.equal(seen[0].effort, 'high', 'first turn (new user ask) keeps full effort');
239
+ assert.equal(seen[1].effort, 'low', 'second turn (clean tool result) downgrades to low');
240
+ });
@@ -7,6 +7,7 @@ const { resolveFromConfig } = require('../../bin/np-tools/resolve-model.cjs');
7
7
  const { assertPreflight } = require('./preflight.cjs');
8
8
  const { runAgentLoop } = require('./agent-loop.cjs');
9
9
  const { toolsetFor } = require('./tools/index.cjs');
10
+ const elision = require('../elision.cjs');
10
11
  const { AUDITED_AGENTS, auditToolUse } = require('../nubosloop-audit.cjs');
11
12
  const { TASK_ID_RE } = require('../ids.cjs');
12
13
  const metrics = require('../metrics.cjs');
@@ -81,13 +82,17 @@ async function dispatchOffHost(o) {
81
82
 
82
83
  const src = loadSource(opts.agent, cwd);
83
84
  const declared = _parseTools(src.frontmatter && src.frontmatter.tools);
85
+ const cx = elision.compressionContext(cwd);
84
86
  const toolset = toolsetFor(declared, {
85
87
  readOnly: !!opts.readOnly,
86
88
  allowBash: !!opts.allowBash,
87
89
  withSearch: audited,
90
+ withExpand: cx.enabled,
88
91
  ctx: { taskId: hasTaskCtx ? opts.taskId : null, customRulesPath: opts.customRulesPath },
89
92
  });
90
93
  const provider = { baseUrl: res.baseUrl, apiKeyEnv: res.apiKeyEnv, model: res.model };
94
+ const _os = cx.outputSteering;
95
+ if (_os && _os.effortRouting && _os.baseEffort) provider.effort = _os.baseEffort;
91
96
 
92
97
  await preflight(provider);
93
98
 
@@ -140,12 +145,6 @@ async function dispatchOffHost(o) {
140
145
  } catch (err) { rule9 = { ok: false, error: (err && err.code) || 'audit-failed' }; }
141
146
  }
142
147
 
143
- // Tool-calling capability signal: if the toolset advertised tools but the model
144
- // made zero tool calls across the whole loop, the provider/model very likely does
145
- // not support OpenAI function-calling. The loop does not crash on this (a tool-less
146
- // turn is treated as a final answer), so it must be surfaced loudly — an off-host
147
- // editor in this state silently produces no edits. Not fatal: read-only emit-only
148
- // agents legitimately call no tools, so this is a hint, scoped by `mutating`.
149
148
  const toolsAdvertised = (toolset.schemas || []).length;
150
149
  const toolCalls = (result.toolLog || []).length;
151
150
  const capability = {
@@ -168,6 +167,7 @@ async function dispatchOffHost(o) {
168
167
  capability,
169
168
  output_lint: _lintOutput(result.content, opts.outputSchema),
170
169
  metrics_recorded: metricsRecorded,
170
+ compression: result.compression || null,
171
171
  };
172
172
  }
173
173
 
@@ -42,6 +42,23 @@ test('DSP-1: happy path returns the envelope and records a metrics row', async (
42
42
  assert.equal(rec.status, 'ok');
43
43
  });
44
44
 
45
+ test('DSP-1c: provider.effort is seeded from base_effort only when effort routing is opted in', async () => {
46
+ const bare = _root();
47
+ let seenBare;
48
+ await dispatchOffHost({ agent: 'np-architect', task: 't', cwd: bare,
49
+ deps: _deps({ runLoop: async ({ provider }) => { seenBare = provider; return { content: 'x', stopped: 'final', iterations: 1, toolLog: [] }; } }) });
50
+ assert.equal(seenBare.effort, undefined, 'no effort field absent opt-in (providers without support unaffected)');
51
+
52
+ const on = _root();
53
+ fs.writeFileSync(path.join(on, '.nubos-pilot', 'config.json'), JSON.stringify({
54
+ compression: { enabled: true, output_steering: { enabled: true, effort_routing: { enabled: true, base_effort: 'high', mechanical_effort: 'low' } } },
55
+ }));
56
+ let seenOn;
57
+ await dispatchOffHost({ agent: 'np-architect', task: 't', cwd: on,
58
+ deps: _deps({ runLoop: async ({ provider }) => { seenOn = provider; return { content: 'x', stopped: 'final', iterations: 1, toolLog: [] }; } }) });
59
+ assert.equal(seenOn.effort, 'high', 'base_effort seeds the provider effort');
60
+ });
61
+
45
62
  test('DSP-2: a native-kind agent is refused (dispatch-not-offhost)', async () => {
46
63
  const cwd = _root();
47
64
  const deps = _deps({ resolve: () => ({ kind: 'native', provider: 'claude', model: null, tier: 'opus' }) });
@@ -166,7 +183,6 @@ test('DSP-13: outputSchema lints the result and rides the envelope (null when un
166
183
 
167
184
  test('DSP-14: capability flags zero tool-calls despite an advertised toolset (tool-calling unsupported signal)', async () => {
168
185
  const cwd = _root();
169
- // model made no tool calls but tools were advertised → not ok, mutating true (Write/Bash in toolset)
170
186
  const noTools = _deps({ runLoop: async () => ({ content: 'just text', stopped: 'final', iterations: 1, toolLog: [] }) });
171
187
  const out1 = await dispatchOffHost({ agent: 'np-architect', task: 't', cwd, deps: noTools });
172
188
  assert.equal(out1.capability.ok, false);
@@ -174,12 +190,10 @@ test('DSP-14: capability flags zero tool-calls despite an advertised toolset (to
174
190
  assert.ok(out1.capability.toolsAdvertised > 0);
175
191
  assert.equal(out1.capability.mutating, true);
176
192
 
177
- // model used a tool → ok
178
193
  const usedTool = _deps({ runLoop: async () => ({ content: 'x', stopped: 'final', iterations: 2, toolLog: [{ name: 'Read', ok: true }] }) });
179
194
  const out2 = await dispatchOffHost({ agent: 'np-architect', task: 't', cwd, deps: usedTool });
180
195
  assert.equal(out2.capability.ok, true);
181
196
 
182
- // read-only emitter with no tool calls → not ok but mutating false (softer hint)
183
197
  const ro = _deps({ runLoop: async () => ({ content: 'x', stopped: 'final', iterations: 1, toolLog: [] }) });
184
198
  const out3 = await dispatchOffHost({ agent: 'np-architect', task: 't', cwd, deps: ro, readOnly: true });
185
199
  assert.equal(out3.capability.ok, false);
@@ -31,7 +31,7 @@ function _parse(json) {
31
31
  };
32
32
  }
33
33
 
34
- async function chat({ baseUrl, apiKeyEnv, model, messages, tools, timeoutMs, fetchImpl, env }) {
34
+ async function chat({ baseUrl, apiKeyEnv, model, messages, tools, effort, timeoutMs, fetchImpl, env }) {
35
35
  if (typeof baseUrl !== 'string' || !baseUrl) {
36
36
  throw new NubosPilotError('provider-no-base-url', 'openai-compat chat requires a base_url', {});
37
37
  }
@@ -61,6 +61,7 @@ async function chat({ baseUrl, apiKeyEnv, model, messages, tools, timeoutMs, fet
61
61
  body.tools = tools;
62
62
  body.tool_choice = 'auto';
63
63
  }
64
+ if (typeof effort === 'string' && effort) body.reasoning_effort = effort;
64
65
 
65
66
  const url = baseUrl.replace(/\/+$/, '') + '/chat/completions';
66
67
  const host = _hostOf(url);
@@ -49,6 +49,15 @@ test('OAC-3: chat POSTs to <base>/chat/completions with model + tools and parses
49
49
  assert.equal(out.content, 'hi');
50
50
  });
51
51
 
52
+ test('OAC-3b: chat forwards effort as reasoning_effort only when set', async () => {
53
+ let captured = null;
54
+ const fetchImpl = async (_url, opts) => { captured = JSON.parse(opts.body); return _res({ json: { choices: [{ message: { content: 'ok' } }] } }); };
55
+ await chat({ baseUrl: 'http://x/v1', model: 'm', messages: [{ role: 'user', content: 'x' }], effort: 'low', fetchImpl });
56
+ assert.equal(captured.reasoning_effort, 'low', 'a set effort reaches the request body');
57
+ await chat({ baseUrl: 'http://x/v1', model: 'm', messages: [{ role: 'user', content: 'x' }], fetchImpl });
58
+ assert.ok(!('reasoning_effort' in captured), 'absent effort is never sent (providers without support unaffected)');
59
+ });
60
+
52
61
  test('OAC-4: api_key_env adds a bearer header; missing key throws provider-missing-api-key', async () => {
53
62
  let auth = null;
54
63
  const fetchImpl = async (_url, opts) => { auth = opts.headers.authorization; return _res({ json: { choices: [{ message: { content: 'ok' } }] } }); };
@@ -8,6 +8,7 @@ const { assertInsideBase } = require('../../safe-path.cjs');
8
8
  const { scanContent, _looksCatastrophic } = require('../../security/scan.cjs');
9
9
  const { search: knowledgeSearch } = require('../../knowledge.cjs');
10
10
  const { recordSearchEvidence } = require('../../nubosloop-audit.cjs');
11
+ const elision = require('../../elision.cjs');
11
12
 
12
13
  const MAX_FILE_BYTES = 1024 * 1024;
13
14
  const MAX_READ_LINES = 2000;
@@ -226,7 +227,35 @@ function _knowledgeSearch(args, ctx) {
226
227
  return hits.map((h) => h.rel_path + ':' + h.line_start + ' (score ' + h.score + ')\n ' + String(h.preview || '').slice(0, 200)).join('\n');
227
228
  }
228
229
 
230
+ function _contextExpand(args, ctx) {
231
+ const hash = args && args.hash;
232
+ if (typeof hash !== 'string' || !hash) throw new NubosPilotError('tool-bad-args', 'context-expand requires a "hash"', {});
233
+ const res = elision.retrieve(hash, ctx && ctx.cwd);
234
+ if (res.status === 'ok') return res.original;
235
+ if (res.status === 'expired') {
236
+ return 'Error: context-expand: marker ' + hash + ' has expired (its retention window elapsed) and is no longer retrievable';
237
+ }
238
+ return 'Error: context-expand: no stored original for ' + hash;
239
+ }
240
+
229
241
  const TOOLS = {
242
+ 'context-expand': {
243
+ run: _contextExpand,
244
+ schema: {
245
+ type: 'function',
246
+ function: {
247
+ name: 'context-expand',
248
+ description: 'Retrieve the full original text behind a ⟦elided:<hash>⟧ marker that appeared in an earlier tool result (large outputs are compressed in place). Pass the 12-char hash. Only call this when you actually need the elided detail.',
249
+ parameters: {
250
+ type: 'object',
251
+ properties: {
252
+ hash: { type: 'string', description: 'The 12-character hash from a ⟦elided:<hash>⟧ marker.' },
253
+ },
254
+ required: ['hash'],
255
+ },
256
+ },
257
+ },
258
+ },
230
259
  'knowledge-search': {
231
260
  run: _knowledgeSearch,
232
261
  schema: {
@@ -359,7 +388,8 @@ const TOOLS = {
359
388
  const READ_ONLY_TOOL_NAMES = Object.freeze(['Read', 'Glob', 'Grep']);
360
389
  const MUTATING_TOOL_NAMES = Object.freeze(['Write', 'Edit']);
361
390
  const SEARCH_TOOL_NAME = 'knowledge-search';
362
- const IMPLEMENTED_TOOL_NAMES = Object.freeze([...READ_ONLY_TOOL_NAMES, ...MUTATING_TOOL_NAMES, 'Bash', SEARCH_TOOL_NAME]);
391
+ const EXPAND_TOOL_NAME = 'context-expand';
392
+ const IMPLEMENTED_TOOL_NAMES = Object.freeze([...READ_ONLY_TOOL_NAMES, ...MUTATING_TOOL_NAMES, 'Bash', SEARCH_TOOL_NAME, EXPAND_TOOL_NAME]);
363
393
 
364
394
  function toolsetFor(declaredNames, opts) {
365
395
  const o = opts || {};
@@ -371,6 +401,7 @@ function toolsetFor(declaredNames, opts) {
371
401
  }
372
402
  const names = declared.filter((n) => allowed.includes(n));
373
403
  if (o.withSearch && !names.includes(SEARCH_TOOL_NAME)) names.push(SEARCH_TOOL_NAME);
404
+ if (o.withExpand && !names.includes(EXPAND_TOOL_NAME)) names.push(EXPAND_TOOL_NAME);
374
405
  const extraCtx = o.ctx || {};
375
406
  return {
376
407
  names,
@@ -407,6 +438,7 @@ module.exports = {
407
438
  READ_ONLY_TOOL_NAMES,
408
439
  MUTATING_TOOL_NAMES,
409
440
  SEARCH_TOOL_NAME,
441
+ EXPAND_TOOL_NAME,
410
442
  IMPLEMENTED_TOOL_NAMES,
411
443
  BASH_DENYLIST,
412
444
  toolsetFor,
@@ -228,3 +228,27 @@ test('TOOL-29: knowledge-search records search evidence when a taskId is in ctx'
228
228
  const { searchEvidenceForRound } = require('../../nubosloop-audit.cjs');
229
229
  assert.ok(searchEvidenceForRound(taskId, 1, cwd).length > 0, 'evidence must be recorded for the round');
230
230
  });
231
+
232
+ test('TOOL-30: withExpand injects context-expand only on demand', () => {
233
+ assert.ok(!toolsetFor(['Read']).names.includes('context-expand'));
234
+ const ts = toolsetFor(['Read'], { withExpand: true });
235
+ assert.ok(ts.names.includes('context-expand'));
236
+ assert.ok(ts.schemas.some((s) => s.function.name === 'context-expand'));
237
+ });
238
+
239
+ test('TOOL-31: context-expand returns the stored original for a known hash, error for unknown', () => {
240
+ const elision = require('../../elision.cjs');
241
+ const cwd = _ws({});
242
+ const original = 'the full uncompressed tool output\nwith many lines\n'.repeat(20);
243
+ const hash = elision.store(original, { type: 'log' }, cwd);
244
+ const ts = toolsetFor(['Read'], { withExpand: true });
245
+ assert.equal(ts.execute('context-expand', { hash }, { cwd }), original);
246
+ assert.match(ts.execute('context-expand', { hash: 'ffffffffffff' }, { cwd }), /no stored original/);
247
+ assert.match(ts.execute('context-expand', {}, { cwd }), /requires a "hash"/);
248
+ });
249
+
250
+ test('TOOL-32: context-expand is not callable unless it was injected (allow-list)', () => {
251
+ const cwd = _ws({});
252
+ const ts = toolsetFor(['Read']);
253
+ assert.match(ts.execute('context-expand', { hash: 'aaaaaaaaaaaa' }, { cwd }), /not available to this agent/);
254
+ });
@@ -0,0 +1,16 @@
1
+ {
2
+ "$id": "elision-entry.v1",
3
+ "title": "Elision cache entry (.nubos-pilot/elision/<hash>.json)",
4
+ "type": "object",
5
+ "required": ["version", "hash", "original", "type", "created_at", "ttl_ms", "original_bytes", "compressed_bytes"],
6
+ "properties": {
7
+ "version": { "type": "integer", "minimum": 1 },
8
+ "hash": { "type": "string", "pattern": "^[a-f0-9]{12}$" },
9
+ "original": { "type": "string", "maxBytes": 8388608 },
10
+ "type": { "type": "string", "maxBytes": 64 },
11
+ "created_at": { "type": "string" },
12
+ "ttl_ms": { "type": "integer", "minimum": 0 },
13
+ "original_bytes": { "type": "integer", "minimum": 0 },
14
+ "compressed_bytes": { "type": "integer", "minimum": 0 }
15
+ }
16
+ }
@@ -0,0 +1,46 @@
1
+ 'use strict';
2
+
3
+ const DEFAULT_CHARS_PER_TOKEN = 4;
4
+
5
+ function _round(n) {
6
+ return Number.isFinite(n) ? Math.round(n) : 0;
7
+ }
8
+
9
+ function estimateTokens(bytes, charsPerToken) {
10
+ const cpt = Number.isFinite(charsPerToken) && charsPerToken > 0 ? charsPerToken : DEFAULT_CHARS_PER_TOKEN;
11
+ return Math.max(0, _round((Number(bytes) || 0) / cpt));
12
+ }
13
+
14
+ function estimateCost(tokens, pricePerMTok) {
15
+ if (!Number.isFinite(pricePerMTok) || pricePerMTok <= 0) return null;
16
+ return (Number(tokens) || 0) / 1e6 * pricePerMTok;
17
+ }
18
+
19
+ function summarizeSavings(opts) {
20
+ const o = opts || {};
21
+ const before = Number(o.bytesBefore) || 0;
22
+ const after = Number(o.bytesAfter) || 0;
23
+ const bytesSaved = Math.max(0, before - after);
24
+ const cpt = Number.isFinite(o.charsPerToken) && o.charsPerToken > 0 ? o.charsPerToken : DEFAULT_CHARS_PER_TOKEN;
25
+ const tokensSaved = estimateTokens(bytesSaved, cpt);
26
+ const cost = estimateCost(tokensSaved, o.pricePerMTok);
27
+ const out = {
28
+ bytes_saved: bytesSaved,
29
+ chars_per_token: cpt,
30
+ tokens_saved_est: tokensSaved,
31
+ saved_pct: before ? Math.round((bytesSaved / before) * 100) : 0,
32
+ };
33
+ if (cost !== null) {
34
+ out.price_per_mtok = o.pricePerMTok;
35
+ out.currency = typeof o.currency === 'string' && o.currency ? o.currency : 'USD';
36
+ out.cost_saved_est = Math.round(cost * 10000) / 10000;
37
+ }
38
+ return out;
39
+ }
40
+
41
+ module.exports = {
42
+ DEFAULT_CHARS_PER_TOKEN,
43
+ estimateTokens,
44
+ estimateCost,
45
+ summarizeSavings,
46
+ };
@@ -0,0 +1,42 @@
1
+ 'use strict';
2
+
3
+ const { test } = require('node:test');
4
+ const assert = require('node:assert/strict');
5
+
6
+ const tc = require('./token-cost.cjs');
7
+
8
+ test('TC-1: estimateTokens uses chars-per-token and defaults sanely', () => {
9
+ assert.equal(tc.estimateTokens(4000, 4), 1000);
10
+ assert.equal(tc.estimateTokens(4000), 1000);
11
+ assert.equal(tc.estimateTokens(0, 4), 0);
12
+ assert.equal(tc.estimateTokens(100, 0), 25);
13
+ });
14
+
15
+ test('TC-2: estimateCost returns null without a positive price', () => {
16
+ assert.equal(tc.estimateCost(1_000_000, null), null);
17
+ assert.equal(tc.estimateCost(1_000_000, 0), null);
18
+ assert.equal(tc.estimateCost(1_000_000, 3), 3);
19
+ });
20
+
21
+ test('TC-3: summarizeSavings reports tokens only when no price is given', () => {
22
+ const s = tc.summarizeSavings({ bytesBefore: 8000, bytesAfter: 2000, charsPerToken: 4 });
23
+ assert.equal(s.bytes_saved, 6000);
24
+ assert.equal(s.tokens_saved_est, 1500);
25
+ assert.equal(s.saved_pct, 75);
26
+ assert.equal(s.cost_saved_est, undefined, 'no cost without a price');
27
+ assert.equal(s.currency, undefined);
28
+ });
29
+
30
+ test('TC-4: summarizeSavings adds a cost estimate + currency when priced', () => {
31
+ const s = tc.summarizeSavings({ bytesBefore: 8000, bytesAfter: 2000, charsPerToken: 4, pricePerMTok: 3, currency: 'EUR' });
32
+ assert.equal(s.tokens_saved_est, 1500);
33
+ assert.equal(s.cost_saved_est, 0.0045);
34
+ assert.equal(s.currency, 'EUR');
35
+ assert.equal(s.price_per_mtok, 3);
36
+ });
37
+
38
+ test('TC-5: never negative when output is somehow larger than input', () => {
39
+ const s = tc.summarizeSavings({ bytesBefore: 100, bytesAfter: 400 });
40
+ assert.equal(s.bytes_saved, 0);
41
+ assert.equal(s.tokens_saved_est, 0);
42
+ });
package/np-tools.cjs CHANGED
@@ -91,6 +91,8 @@ const topLevelCommands = {
91
91
  'knowledge-index': require('./bin/np-tools/knowledge-index.cjs'),
92
92
  'knowledge-search': require('./bin/np-tools/knowledge-search.cjs'),
93
93
  'knowledge-stats': require('./bin/np-tools/knowledge-stats.cjs'),
94
+ 'elision-get': require('./bin/np-tools/elision-get.cjs'),
95
+ 'elision-bench': require('./bin/np-tools/elision-bench.cjs'),
94
96
  'context-stats': require('./bin/np-tools/context-stats.cjs'),
95
97
  'session-snapshot-write': require('./bin/np-tools/session-snapshot-write.cjs'),
96
98
  'session-snapshot-read': require('./bin/np-tools/session-snapshot-read.cjs'),