iosm-cli 0.2.9 → 0.2.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +67 -0
- package/README.md +11 -2
- package/dist/core/agent-session.d.ts +9 -0
- package/dist/core/agent-session.d.ts.map +1 -1
- package/dist/core/agent-session.js +425 -50
- package/dist/core/agent-session.js.map +1 -1
- package/dist/core/background-processes.d.ts +31 -0
- package/dist/core/background-processes.d.ts.map +1 -0
- package/dist/core/background-processes.js +241 -0
- package/dist/core/background-processes.js.map +1 -0
- package/dist/core/bash-executor.d.ts +6 -0
- package/dist/core/bash-executor.d.ts.map +1 -1
- package/dist/core/bash-executor.js.map +1 -1
- package/dist/core/extensions/types.d.ts +3 -0
- package/dist/core/extensions/types.d.ts.map +1 -1
- package/dist/core/extensions/types.js.map +1 -1
- package/dist/core/extensions/wrapper.d.ts +19 -2
- package/dist/core/extensions/wrapper.d.ts.map +1 -1
- package/dist/core/extensions/wrapper.js +23 -4
- package/dist/core/extensions/wrapper.js.map +1 -1
- package/dist/core/mcp/runtime.d.ts.map +1 -1
- package/dist/core/mcp/runtime.js +2 -0
- package/dist/core/mcp/runtime.js.map +1 -1
- package/dist/core/messages.d.ts +1 -1
- package/dist/core/messages.d.ts.map +1 -1
- package/dist/core/messages.js +2 -1
- package/dist/core/messages.js.map +1 -1
- package/dist/core/sdk.d.ts.map +1 -1
- package/dist/core/sdk.js +30 -2
- package/dist/core/sdk.js.map +1 -1
- package/dist/core/settings-manager.d.ts +17 -0
- package/dist/core/settings-manager.d.ts.map +1 -1
- package/dist/core/settings-manager.js +29 -0
- package/dist/core/settings-manager.js.map +1 -1
- package/dist/core/slash-commands.d.ts.map +1 -1
- package/dist/core/slash-commands.js +8 -0
- package/dist/core/slash-commands.js.map +1 -1
- package/dist/core/subagents.d.ts +10 -2
- package/dist/core/subagents.d.ts.map +1 -1
- package/dist/core/subagents.js +66 -17
- package/dist/core/subagents.js.map +1 -1
- package/dist/core/system-prompt.d.ts +25 -0
- package/dist/core/system-prompt.d.ts.map +1 -1
- package/dist/core/system-prompt.js +146 -14
- package/dist/core/system-prompt.js.map +1 -1
- package/dist/core/tools/bash.d.ts +5 -0
- package/dist/core/tools/bash.d.ts.map +1 -1
- package/dist/core/tools/bash.js +28 -2
- package/dist/core/tools/bash.js.map +1 -1
- package/dist/core/tools/index.d.ts +2 -1
- package/dist/core/tools/index.d.ts.map +1 -1
- package/dist/core/tools/index.js.map +1 -1
- package/dist/core/tools/permissions.d.ts +3 -0
- package/dist/core/tools/permissions.d.ts.map +1 -1
- package/dist/core/tools/permissions.js.map +1 -1
- package/dist/core/tools/task.d.ts +3 -1
- package/dist/core/tools/task.d.ts.map +1 -1
- package/dist/core/tools/task.js +48 -23
- package/dist/core/tools/task.js.map +1 -1
- package/dist/core/ultrathink.d.ts +122 -0
- package/dist/core/ultrathink.d.ts.map +1 -0
- package/dist/core/ultrathink.js +621 -0
- package/dist/core/ultrathink.js.map +1 -0
- package/dist/main.d.ts.map +1 -1
- package/dist/main.js +93 -1
- package/dist/main.js.map +1 -1
- package/dist/modes/interactive/interactive-mode.d.ts +6 -0
- package/dist/modes/interactive/interactive-mode.d.ts.map +1 -1
- package/dist/modes/interactive/interactive-mode.js +281 -24
- package/dist/modes/interactive/interactive-mode.js.map +1 -1
- package/docs/cli-reference.md +10 -0
- package/docs/configuration.md +57 -1
- package/docs/development-and-testing.md +3 -2
- package/docs/extensions-packages-themes.md +27 -0
- package/docs/interactive-mode.md +7 -0
- package/docs/orchestration-and-subagents.md +16 -0
- package/docs/rpc-json-sdk.md +14 -0
- package/docs/sessions-traces-export.md +7 -0
- package/package.json +1 -1
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
*/
|
|
15
15
|
import { appendFileSync, mkdirSync, readFileSync } from "node:fs";
|
|
16
16
|
import { basename, dirname, join } from "node:path";
|
|
17
|
-
import { isContextOverflow, modelsAreEqual, resetApiProviders, supportsXhigh } from "@mariozechner/pi-ai";
|
|
17
|
+
import { completeSimple, isContextOverflow, modelsAreEqual, resetApiProviders, supportsXhigh } from "@mariozechner/pi-ai";
|
|
18
18
|
import { getDocsPath, getSessionTracePath, isSessionTraceEnabled } from "../config.js";
|
|
19
19
|
import { buildIosmRuntimeDirective, prepareIosmRuntimeContext } from "../iosm/runtime-context.js";
|
|
20
20
|
import { theme } from "../modes/interactive/theme/theme.js";
|
|
@@ -28,6 +28,7 @@ import { createToolHtmlRenderer } from "./export-html/tool-renderer.js";
|
|
|
28
28
|
import { ExtensionRunner, wrapRegisteredTools, wrapToolsWithExtensions, } from "./extensions/index.js";
|
|
29
29
|
import { INTERNAL_UI_META_CUSTOM_TYPE } from "./messages.js";
|
|
30
30
|
import { expandPromptTemplate } from "./prompt-templates.js";
|
|
31
|
+
import { startBackgroundProcess } from "./background-processes.js";
|
|
31
32
|
import { getLatestCompactionEntry } from "./session-manager.js";
|
|
32
33
|
import { BUILTIN_SLASH_COMMANDS } from "./slash-commands.js";
|
|
33
34
|
import { buildSystemPrompt } from "./system-prompt.js";
|
|
@@ -35,6 +36,7 @@ import { isReadOnlyProfileName } from "./agent-profiles.js";
|
|
|
35
36
|
import { applyPostToolUseHooks, applyPreToolUseHooks, applyStopHooks, applyUserPromptSubmitHooks, emptyHooksConfig, } from "./hooks.js";
|
|
36
37
|
import { extractTaskPlanFromAssistantMessage, formatTaskPlanMessageContent, taskPlanSignature, TASK_PLAN_CUSTOM_TYPE, } from "./task-plan.js";
|
|
37
38
|
import { createAllTools, getAllowedFetchMethodsForProfile } from "./tools/index.js";
|
|
39
|
+
import { ULTRATHINK_CHECKPOINT_COMPRESSION_SYSTEM_PROMPT, ULTRATHINK_MAX_CHECKPOINT_CHARS, ULTRATHINK_MAX_ITERATION_INPUT_TOKENS, ULTRATHINK_MAX_RUN_COST, ULTRATHINK_MAX_RUN_INPUT_TOKENS, ULTRATHINK_MAX_RUN_TOTAL_TOKENS, ULTRATHINK_STAGNATION_LIMIT, ULTRATHINK_VISIBLE_PROMPT_PREFIX, buildUltrathinkCheckpointCompressionPrompt, buildUltrathinkBudgetStatusLine, buildUltrathinkComplianceRepairPrompt, buildUltrathinkContextTail, buildUltrathinkEvidenceCatalog, buildUltrathinkIterationPrompt, buildUltrathinkToolGroundingPrompt, buildUltrathinkVisibleIterationPrompt, createInitialUltrathinkCheckpoint, evaluateUltrathinkEvidencePolicy, extractUltrathinkCheckpoint, extractUltrathinkIterationSummary, extractUltrathinkToolEvidence, findLastMeaningfulUserIntent, getUltrathinkPhase, hasUltrathinkEvidenceViolations, isUltrathinkStagnated, normalizeUltrathinkCheckpoint, parseUltrathinkCommand, resolveUltrathinkReadOnlyTools, shouldUltrathinkForceToolGrounding, truncateUltrathinkCheckpoint, ULTRATHINK_USAGE, } from "./ultrathink.js";
|
|
38
40
|
/**
|
|
39
41
|
* Parse a skill block from message text.
|
|
40
42
|
* Returns null if the text doesn't contain a skill block.
|
|
@@ -252,6 +254,16 @@ const THINKING_LEVELS = ["off", "minimal", "low", "medium", "high"];
|
|
|
252
254
|
const MAX_PROMPT_PROTOCOL_AUTO_REPAIR_ATTEMPTS = 2;
|
|
253
255
|
/** Thinking levels including xhigh (for supported models) */
|
|
254
256
|
const THINKING_LEVELS_WITH_XHIGH = ["off", "minimal", "low", "medium", "high", "xhigh"];
|
|
257
|
+
const BUILTIN_TOOL_REQUIRED_PERMISSIONS = {
|
|
258
|
+
bash: "danger-full-access",
|
|
259
|
+
edit: "workspace-write",
|
|
260
|
+
write: "workspace-write",
|
|
261
|
+
fetch: "read-only",
|
|
262
|
+
web_search: "read-only",
|
|
263
|
+
git_write: "workspace-write",
|
|
264
|
+
fs_ops: "workspace-write",
|
|
265
|
+
db_run: "workspace-write",
|
|
266
|
+
};
|
|
255
267
|
// ============================================================================
|
|
256
268
|
// AgentSession Class
|
|
257
269
|
// ============================================================================
|
|
@@ -291,6 +303,7 @@ export class AgentSession {
|
|
|
291
303
|
this._toolRegistry = new Map();
|
|
292
304
|
this._toolPromptSnippets = new Map();
|
|
293
305
|
this._toolPromptGuidelines = new Map();
|
|
306
|
+
this._toolRequiredPermissions = new Map();
|
|
294
307
|
this._subagentStartMeta = new Map();
|
|
295
308
|
// Base system prompt (without extension appends) - used to apply fresh appends each turn
|
|
296
309
|
this._baseSystemPrompt = "";
|
|
@@ -299,6 +312,7 @@ export class AgentSession {
|
|
|
299
312
|
this._pendingHookNotices = [];
|
|
300
313
|
this._sessionTraceEnabled = isSessionTraceEnabled();
|
|
301
314
|
this._protocolAutoRepairActive = false;
|
|
315
|
+
this._ultrathinkActive = false;
|
|
302
316
|
// Track last assistant message for auto-compaction check
|
|
303
317
|
this._lastAssistantMessage = undefined;
|
|
304
318
|
this._lastTaskPlanSignature = undefined;
|
|
@@ -1002,6 +1016,32 @@ export class AgentSession {
|
|
|
1002
1016
|
}
|
|
1003
1017
|
return Array.from(unique);
|
|
1004
1018
|
}
|
|
1019
|
+
_summarizeToolPermissionInput(input) {
|
|
1020
|
+
let preview = "{}";
|
|
1021
|
+
try {
|
|
1022
|
+
preview = JSON.stringify(input);
|
|
1023
|
+
}
|
|
1024
|
+
catch {
|
|
1025
|
+
preview = "{...}";
|
|
1026
|
+
}
|
|
1027
|
+
if (preview.length > 200) {
|
|
1028
|
+
preview = `${preview.slice(0, 197)}...`;
|
|
1029
|
+
}
|
|
1030
|
+
return preview;
|
|
1031
|
+
}
|
|
1032
|
+
async _evaluateToolPermission(request) {
|
|
1033
|
+
const normalizedRequest = {
|
|
1034
|
+
...request,
|
|
1035
|
+
requiredPermission: request.requiredPermission ?? this._toolRequiredPermissions.get(request.toolName) ?? undefined,
|
|
1036
|
+
toolSource: request.toolSource ?? "builtin",
|
|
1037
|
+
};
|
|
1038
|
+
const hookResult = applyPreToolUseHooks(this._hooksConfig, normalizedRequest);
|
|
1039
|
+
this._queueHookNotices("PreToolUse", hookResult.notices);
|
|
1040
|
+
if (!hookResult.allowed) {
|
|
1041
|
+
throw new Error(hookResult.message ?? "Tool blocked by PreToolUse hook.");
|
|
1042
|
+
}
|
|
1043
|
+
return this._toolPermissionHandler ? this._toolPermissionHandler(normalizedRequest) : true;
|
|
1044
|
+
}
|
|
1005
1045
|
_rebuildSystemPrompt(toolNames) {
|
|
1006
1046
|
const validToolNames = toolNames.filter((name) => this._toolRegistry.has(name));
|
|
1007
1047
|
const toolSnippets = {};
|
|
@@ -1033,15 +1073,35 @@ export class AgentSession {
|
|
|
1033
1073
|
return true;
|
|
1034
1074
|
return this._iosmAutopilotEnabled ? isIosmPlaybook : isAgentsPlaybook;
|
|
1035
1075
|
});
|
|
1076
|
+
const promptContextOptions = {
|
|
1077
|
+
enableContextDedupe: this.settingsManager.getPromptContextEnableDedupe(),
|
|
1078
|
+
maxContextCharsPerFile: this.settingsManager.getPromptContextMaxCharsPerFile(),
|
|
1079
|
+
maxTotalContextChars: this.settingsManager.getPromptContextMaxTotalChars(),
|
|
1080
|
+
enableGitSnapshotContext: this.settingsManager.getPromptContextEnableGitSnapshotContext(),
|
|
1081
|
+
};
|
|
1036
1082
|
return buildSystemPrompt({
|
|
1037
1083
|
cwd: this._cwd,
|
|
1038
1084
|
skills: loadedSkills,
|
|
1039
1085
|
contextFiles: loadedContextFiles,
|
|
1086
|
+
contextProcessing: promptContextOptions,
|
|
1040
1087
|
customPrompt: loaderSystemPrompt,
|
|
1041
1088
|
appendSystemPrompt,
|
|
1042
1089
|
selectedTools: validToolNames,
|
|
1043
1090
|
toolSnippets,
|
|
1044
1091
|
promptGuidelines,
|
|
1092
|
+
onContextProcessed: (stats) => {
|
|
1093
|
+
this._appendSessionTrace({
|
|
1094
|
+
type: "system_prompt_context_compose",
|
|
1095
|
+
context_before_chars: stats.contextBeforeChars,
|
|
1096
|
+
context_after_chars: stats.contextAfterChars,
|
|
1097
|
+
dedupe_hits: stats.dedupeHits,
|
|
1098
|
+
truncated_files: stats.truncatedFiles,
|
|
1099
|
+
dropped_files: stats.droppedFiles,
|
|
1100
|
+
included_files: stats.includedFiles,
|
|
1101
|
+
total_files: stats.totalFiles,
|
|
1102
|
+
git_snapshot_included: stats.gitSnapshotIncluded,
|
|
1103
|
+
});
|
|
1104
|
+
},
|
|
1045
1105
|
});
|
|
1046
1106
|
}
|
|
1047
1107
|
// =========================================================================
|
|
@@ -1066,6 +1126,22 @@ export class AgentSession {
|
|
|
1066
1126
|
source: inputSource,
|
|
1067
1127
|
imageCount: options?.images?.length ?? 0,
|
|
1068
1128
|
});
|
|
1129
|
+
if (!options?.skipUltrathinkCommand) {
|
|
1130
|
+
const ultrathinkParseResult = parseUltrathinkCommand(text);
|
|
1131
|
+
if (ultrathinkParseResult?.kind === "error") {
|
|
1132
|
+
throw new Error(`${ultrathinkParseResult.error}\n\n${ultrathinkParseResult.usage}`);
|
|
1133
|
+
}
|
|
1134
|
+
if (ultrathinkParseResult?.kind === "command") {
|
|
1135
|
+
await this._runUltrathinkCommand(ultrathinkParseResult.command, { source: inputSource });
|
|
1136
|
+
this._appendSessionTrace({
|
|
1137
|
+
type: "prompt_handled_by_command",
|
|
1138
|
+
text,
|
|
1139
|
+
command: "ultrathink",
|
|
1140
|
+
iterations: ultrathinkParseResult.command.iterations,
|
|
1141
|
+
});
|
|
1142
|
+
return;
|
|
1143
|
+
}
|
|
1144
|
+
}
|
|
1069
1145
|
// Handle extension commands first (execute immediately, even during streaming)
|
|
1070
1146
|
// Extension commands manage their own LLM interaction via iosm.sendMessage()
|
|
1071
1147
|
if (expandPromptTemplates && text.startsWith("/")) {
|
|
@@ -1313,6 +1389,289 @@ export class AgentSession {
|
|
|
1313
1389
|
}
|
|
1314
1390
|
}
|
|
1315
1391
|
}
|
|
1392
|
+
async _runUltrathinkCommand(command, options) {
|
|
1393
|
+
if (this.isStreaming) {
|
|
1394
|
+
throw new Error("Cannot start /ultrathink while the agent is processing another request.");
|
|
1395
|
+
}
|
|
1396
|
+
if (this.isCompacting) {
|
|
1397
|
+
throw new Error("Cannot start /ultrathink while compaction is running.");
|
|
1398
|
+
}
|
|
1399
|
+
if (this._ultrathinkActive) {
|
|
1400
|
+
throw new Error("An /ultrathink run is already in progress.");
|
|
1401
|
+
}
|
|
1402
|
+
if (!this.model) {
|
|
1403
|
+
throw new Error("No model selected.\n\n" +
|
|
1404
|
+
`Use /login or set an API key environment variable. See ${join(getDocsPath(), "providers.md")}\n\n` +
|
|
1405
|
+
"Then use /model to select a model.");
|
|
1406
|
+
}
|
|
1407
|
+
const apiKey = await this._modelRegistry.getApiKey(this.model);
|
|
1408
|
+
if (!apiKey) {
|
|
1409
|
+
const isOAuth = this._modelRegistry.isUsingOAuth(this.model);
|
|
1410
|
+
if (isOAuth) {
|
|
1411
|
+
throw new Error(`Authentication failed for "${this.model.provider}". ` +
|
|
1412
|
+
`Credentials may have expired or network is unavailable. ` +
|
|
1413
|
+
`Run '/login ${this.model.provider}' to re-authenticate.`);
|
|
1414
|
+
}
|
|
1415
|
+
throw new Error(`No API key found for ${this.model.provider}.\n\n` +
|
|
1416
|
+
`Use /login or set an API key environment variable. See ${join(getDocsPath(), "providers.md")}`);
|
|
1417
|
+
}
|
|
1418
|
+
const objective = command.query?.trim() || findLastMeaningfulUserIntent(this.messages);
|
|
1419
|
+
if (!objective) {
|
|
1420
|
+
throw new Error([
|
|
1421
|
+
"Cannot infer an objective for /ultrathink from session context.",
|
|
1422
|
+
"Provide a query explicitly or send a regular user request first.",
|
|
1423
|
+
"",
|
|
1424
|
+
ULTRATHINK_USAGE,
|
|
1425
|
+
].join("\n"));
|
|
1426
|
+
}
|
|
1427
|
+
const originalTools = this.getActiveToolNames();
|
|
1428
|
+
const availableToolNames = this.getAllTools().map((tool) => tool.name);
|
|
1429
|
+
const readOnlyTools = resolveUltrathinkReadOnlyTools(availableToolNames);
|
|
1430
|
+
if (readOnlyTools.length === 0) {
|
|
1431
|
+
throw new Error([
|
|
1432
|
+
"Cannot start /ultrathink: no read-only tools are currently active.",
|
|
1433
|
+
"Enable at least one analysis tool (for example read/rg/find/semantic_search/fetch/git_read) and retry.",
|
|
1434
|
+
].join("\n"));
|
|
1435
|
+
}
|
|
1436
|
+
const contextTail = buildUltrathinkContextTail(this.messages);
|
|
1437
|
+
let checkpoint = createInitialUltrathinkCheckpoint(objective);
|
|
1438
|
+
let previousSummary;
|
|
1439
|
+
let accumulatedInputTokens = 0;
|
|
1440
|
+
let accumulatedTotalTokens = 0;
|
|
1441
|
+
let accumulatedCost = 0;
|
|
1442
|
+
let stagnationCount = 0;
|
|
1443
|
+
let targetIterations = command.iterations;
|
|
1444
|
+
let enforceEvidencePolicy = true;
|
|
1445
|
+
const evidenceById = new Map();
|
|
1446
|
+
const mergeEvidence = (messages) => {
|
|
1447
|
+
const evidence = extractUltrathinkToolEvidence(messages);
|
|
1448
|
+
for (const entry of evidence) {
|
|
1449
|
+
evidenceById.set(entry.toolCallId, entry);
|
|
1450
|
+
}
|
|
1451
|
+
return evidence.length;
|
|
1452
|
+
};
|
|
1453
|
+
const registerUsage = (assistantMessage) => {
|
|
1454
|
+
const usage = assistantMessage?.usage;
|
|
1455
|
+
const inputTokens = Number(usage?.input ?? 0);
|
|
1456
|
+
const totalTokens = Number(usage?.totalTokens ?? inputTokens + Number(usage?.output ?? 0));
|
|
1457
|
+
const costTotal = Number(usage?.cost?.total ?? 0);
|
|
1458
|
+
accumulatedInputTokens += inputTokens;
|
|
1459
|
+
accumulatedTotalTokens += totalTokens;
|
|
1460
|
+
accumulatedCost += costTotal;
|
|
1461
|
+
return { inputTokens, totalTokens, costTotal };
|
|
1462
|
+
};
|
|
1463
|
+
const exceedsBudget = (iterationInputTokens) => iterationInputTokens > ULTRATHINK_MAX_ITERATION_INPUT_TOKENS ||
|
|
1464
|
+
accumulatedInputTokens > ULTRATHINK_MAX_RUN_INPUT_TOKENS ||
|
|
1465
|
+
accumulatedTotalTokens > ULTRATHINK_MAX_RUN_TOTAL_TOKENS ||
|
|
1466
|
+
accumulatedCost > ULTRATHINK_MAX_RUN_COST;
|
|
1467
|
+
const runUltrathinkInternalPrompt = async (rawPrompt, displayText) => {
|
|
1468
|
+
this._appendCustomMessageLocally({
|
|
1469
|
+
customType: INTERNAL_UI_META_CUSTOM_TYPE,
|
|
1470
|
+
content: "",
|
|
1471
|
+
display: false,
|
|
1472
|
+
details: {
|
|
1473
|
+
kind: "orchestration_context",
|
|
1474
|
+
rawPrompt,
|
|
1475
|
+
displayText,
|
|
1476
|
+
},
|
|
1477
|
+
});
|
|
1478
|
+
const messageCountBefore = this.messages.length;
|
|
1479
|
+
await this.prompt(rawPrompt, {
|
|
1480
|
+
expandPromptTemplates: false,
|
|
1481
|
+
skipIosmAutopilot: true,
|
|
1482
|
+
skipOrchestrationDirective: true,
|
|
1483
|
+
skipUltrathinkCommand: true,
|
|
1484
|
+
source: options.source,
|
|
1485
|
+
});
|
|
1486
|
+
return this.messages.slice(messageCountBefore);
|
|
1487
|
+
};
|
|
1488
|
+
this._ultrathinkActive = true;
|
|
1489
|
+
this.setActiveToolsByName(readOnlyTools);
|
|
1490
|
+
try {
|
|
1491
|
+
for (let iteration = 1; iteration <= targetIterations; iteration++) {
|
|
1492
|
+
const phase = iteration === targetIterations ? "Synthesis" : getUltrathinkPhase(iteration, targetIterations);
|
|
1493
|
+
const evidenceCatalog = buildUltrathinkEvidenceCatalog([...evidenceById.values()]);
|
|
1494
|
+
const budgetStatus = buildUltrathinkBudgetStatusLine({
|
|
1495
|
+
accumulatedInputTokens,
|
|
1496
|
+
accumulatedTotalTokens,
|
|
1497
|
+
accumulatedCost,
|
|
1498
|
+
});
|
|
1499
|
+
const iterationPrompt = buildUltrathinkIterationPrompt({
|
|
1500
|
+
iteration,
|
|
1501
|
+
totalIterations: targetIterations,
|
|
1502
|
+
phase,
|
|
1503
|
+
objective,
|
|
1504
|
+
checkpoint,
|
|
1505
|
+
previousSummary,
|
|
1506
|
+
contextTail: iteration === 1 ? contextTail : undefined,
|
|
1507
|
+
evidenceCatalog,
|
|
1508
|
+
budgetStatus,
|
|
1509
|
+
});
|
|
1510
|
+
const visibleIterationPrompt = buildUltrathinkVisibleIterationPrompt({
|
|
1511
|
+
iteration,
|
|
1512
|
+
totalIterations: targetIterations,
|
|
1513
|
+
phase,
|
|
1514
|
+
objective,
|
|
1515
|
+
});
|
|
1516
|
+
const iterationMessages = await runUltrathinkInternalPrompt(iterationPrompt, visibleIterationPrompt);
|
|
1517
|
+
let toolChecksThisIteration = mergeEvidence(iterationMessages);
|
|
1518
|
+
let assistantMessage = this._findLastAssistantMessage();
|
|
1519
|
+
let assistantText = this.getLastAssistantText() ?? "";
|
|
1520
|
+
let iterationUsage = registerUsage(assistantMessage);
|
|
1521
|
+
let iterationInputTokens = iterationUsage.inputTokens;
|
|
1522
|
+
const evaluatePolicy = () => evaluateUltrathinkEvidencePolicy({
|
|
1523
|
+
text: assistantText,
|
|
1524
|
+
phase,
|
|
1525
|
+
toolChecksThisIteration,
|
|
1526
|
+
knownEvidenceIds: [...evidenceById.keys()],
|
|
1527
|
+
});
|
|
1528
|
+
const shouldGround = shouldUltrathinkForceToolGrounding({
|
|
1529
|
+
phase,
|
|
1530
|
+
cumulativeEvidenceCount: evidenceById.size,
|
|
1531
|
+
toolChecksThisIteration,
|
|
1532
|
+
});
|
|
1533
|
+
if (shouldGround) {
|
|
1534
|
+
const groundingPrompt = buildUltrathinkToolGroundingPrompt({
|
|
1535
|
+
iteration,
|
|
1536
|
+
totalIterations: targetIterations,
|
|
1537
|
+
phase,
|
|
1538
|
+
objective,
|
|
1539
|
+
checkpoint,
|
|
1540
|
+
availableReadOnlyTools: readOnlyTools,
|
|
1541
|
+
evidenceCatalog: buildUltrathinkEvidenceCatalog([...evidenceById.values()]),
|
|
1542
|
+
});
|
|
1543
|
+
const groundingDisplayText = `${ULTRATHINK_VISIBLE_PROMPT_PREFIX} ${iteration}/${targetIterations} (${phase}) grounding retry. Performing live workspace probes with read-only tools.`;
|
|
1544
|
+
const groundingMessages = await runUltrathinkInternalPrompt(groundingPrompt, groundingDisplayText);
|
|
1545
|
+
toolChecksThisIteration += mergeEvidence(groundingMessages);
|
|
1546
|
+
assistantMessage = this._findLastAssistantMessage();
|
|
1547
|
+
assistantText = this.getLastAssistantText() ?? "";
|
|
1548
|
+
iterationUsage = registerUsage(assistantMessage);
|
|
1549
|
+
iterationInputTokens += iterationUsage.inputTokens;
|
|
1550
|
+
}
|
|
1551
|
+
if (enforceEvidencePolicy) {
|
|
1552
|
+
let evidencePolicy = evaluatePolicy();
|
|
1553
|
+
if (hasUltrathinkEvidenceViolations(evidencePolicy)) {
|
|
1554
|
+
const policyIssues = [];
|
|
1555
|
+
if (evidencePolicy.missingEvidenceForNumbers) {
|
|
1556
|
+
policyIssues.push("Quantitative claims are missing `[evidence:<toolCallId>]` tags.");
|
|
1557
|
+
}
|
|
1558
|
+
if (evidencePolicy.invalidEvidenceTags.length > 0) {
|
|
1559
|
+
policyIssues.push(`Unknown evidence tags: ${evidencePolicy.invalidEvidenceTags.join(", ")}`);
|
|
1560
|
+
}
|
|
1561
|
+
if (evidencePolicy.needsNoNewEvidenceMarker && !evidencePolicy.hasNoNewEvidenceMarker) {
|
|
1562
|
+
policyIssues.push("Verify/Synthesis response with no new tool checks must include [NO_NEW_EVIDENCE_OK].");
|
|
1563
|
+
}
|
|
1564
|
+
const repairPrompt = buildUltrathinkComplianceRepairPrompt({
|
|
1565
|
+
iteration,
|
|
1566
|
+
totalIterations: targetIterations,
|
|
1567
|
+
phase,
|
|
1568
|
+
objective,
|
|
1569
|
+
originalResponse: assistantText,
|
|
1570
|
+
issues: policyIssues,
|
|
1571
|
+
checkpoint,
|
|
1572
|
+
evidenceCatalog: buildUltrathinkEvidenceCatalog([...evidenceById.values()]),
|
|
1573
|
+
});
|
|
1574
|
+
const repairDisplayText = `${ULTRATHINK_VISIBLE_PROMPT_PREFIX} ${iteration}/${targetIterations} (${phase}) policy repair. Normalizing evidence links and checkpoint format.`;
|
|
1575
|
+
const repairMessages = await runUltrathinkInternalPrompt(repairPrompt, repairDisplayText);
|
|
1576
|
+
toolChecksThisIteration += mergeEvidence(repairMessages);
|
|
1577
|
+
assistantMessage = this._findLastAssistantMessage();
|
|
1578
|
+
assistantText = this.getLastAssistantText() ?? "";
|
|
1579
|
+
iterationUsage = registerUsage(assistantMessage);
|
|
1580
|
+
iterationInputTokens += iterationUsage.inputTokens;
|
|
1581
|
+
evidencePolicy = evaluatePolicy();
|
|
1582
|
+
if (hasUltrathinkEvidenceViolations(evidencePolicy)) {
|
|
1583
|
+
// Do not fail the entire run; keep the latest usable answer and finish gracefully.
|
|
1584
|
+
enforceEvidencePolicy = false;
|
|
1585
|
+
if (iteration < targetIterations) {
|
|
1586
|
+
targetIterations = Math.min(targetIterations, iteration + 1);
|
|
1587
|
+
}
|
|
1588
|
+
}
|
|
1589
|
+
}
|
|
1590
|
+
}
|
|
1591
|
+
previousSummary = extractUltrathinkIterationSummary(assistantText);
|
|
1592
|
+
const checkpointBeforeIteration = checkpoint;
|
|
1593
|
+
const extractedCheckpoint = extractUltrathinkCheckpoint(assistantText);
|
|
1594
|
+
if (extractedCheckpoint && extractedCheckpoint.trim()) {
|
|
1595
|
+
checkpoint = normalizeUltrathinkCheckpoint(extractedCheckpoint, objective);
|
|
1596
|
+
}
|
|
1597
|
+
if (checkpoint.length > ULTRATHINK_MAX_CHECKPOINT_CHARS) {
|
|
1598
|
+
checkpoint = await this._compressUltrathinkCheckpoint(checkpoint, objective, apiKey);
|
|
1599
|
+
}
|
|
1600
|
+
const stagnated = isUltrathinkStagnated({
|
|
1601
|
+
previousCheckpoint: checkpointBeforeIteration,
|
|
1602
|
+
nextCheckpoint: checkpoint,
|
|
1603
|
+
toolChecksThisIteration,
|
|
1604
|
+
});
|
|
1605
|
+
if (stagnated && iteration < targetIterations) {
|
|
1606
|
+
stagnationCount += 1;
|
|
1607
|
+
if (stagnationCount >= ULTRATHINK_STAGNATION_LIMIT) {
|
|
1608
|
+
targetIterations = Math.min(targetIterations, iteration + 1);
|
|
1609
|
+
}
|
|
1610
|
+
}
|
|
1611
|
+
else {
|
|
1612
|
+
stagnationCount = 0;
|
|
1613
|
+
}
|
|
1614
|
+
if (iteration < targetIterations && exceedsBudget(iterationInputTokens)) {
|
|
1615
|
+
targetIterations = Math.min(targetIterations, iteration + 1);
|
|
1616
|
+
}
|
|
1617
|
+
}
|
|
1618
|
+
}
|
|
1619
|
+
finally {
|
|
1620
|
+
this._ultrathinkActive = false;
|
|
1621
|
+
const currentTools = this.getActiveToolNames();
|
|
1622
|
+
const shouldRestore = currentTools.length !== originalTools.length ||
|
|
1623
|
+
currentTools.some((toolName, index) => toolName !== originalTools[index]);
|
|
1624
|
+
if (shouldRestore) {
|
|
1625
|
+
this.setActiveToolsByName(originalTools);
|
|
1626
|
+
}
|
|
1627
|
+
}
|
|
1628
|
+
}
|
|
1629
|
+
async _compressUltrathinkCheckpoint(checkpoint, objective, apiKey) {
|
|
1630
|
+
const model = this.model;
|
|
1631
|
+
const fallback = truncateUltrathinkCheckpoint(normalizeUltrathinkCheckpoint(checkpoint, objective), ULTRATHINK_MAX_CHECKPOINT_CHARS);
|
|
1632
|
+
if (!model)
|
|
1633
|
+
return fallback;
|
|
1634
|
+
const reserveTokens = this.settingsManager.getCompactionReserveTokens();
|
|
1635
|
+
const maxTokens = Math.max(256, Math.min(2048, Math.floor(reserveTokens * 0.4)));
|
|
1636
|
+
try {
|
|
1637
|
+
const response = await completeSimple(model, {
|
|
1638
|
+
systemPrompt: ULTRATHINK_CHECKPOINT_COMPRESSION_SYSTEM_PROMPT,
|
|
1639
|
+
messages: [
|
|
1640
|
+
{
|
|
1641
|
+
role: "user",
|
|
1642
|
+
content: [
|
|
1643
|
+
{
|
|
1644
|
+
type: "text",
|
|
1645
|
+
text: buildUltrathinkCheckpointCompressionPrompt({
|
|
1646
|
+
objective,
|
|
1647
|
+
checkpoint,
|
|
1648
|
+
maxChars: ULTRATHINK_MAX_CHECKPOINT_CHARS,
|
|
1649
|
+
}),
|
|
1650
|
+
},
|
|
1651
|
+
],
|
|
1652
|
+
timestamp: Date.now(),
|
|
1653
|
+
},
|
|
1654
|
+
],
|
|
1655
|
+
}, model.reasoning
|
|
1656
|
+
? { maxTokens, apiKey, reasoning: "high" }
|
|
1657
|
+
: { maxTokens, apiKey });
|
|
1658
|
+
if (response.stopReason === "error") {
|
|
1659
|
+
return fallback;
|
|
1660
|
+
}
|
|
1661
|
+
const text = response.content
|
|
1662
|
+
.filter((part) => part.type === "text")
|
|
1663
|
+
.map((part) => part.text)
|
|
1664
|
+
.join("\n")
|
|
1665
|
+
.trim();
|
|
1666
|
+
if (!text) {
|
|
1667
|
+
return fallback;
|
|
1668
|
+
}
|
|
1669
|
+
return truncateUltrathinkCheckpoint(normalizeUltrathinkCheckpoint(text, objective), ULTRATHINK_MAX_CHECKPOINT_CHARS);
|
|
1670
|
+
}
|
|
1671
|
+
catch {
|
|
1672
|
+
return fallback;
|
|
1673
|
+
}
|
|
1674
|
+
}
|
|
1316
1675
|
/**
|
|
1317
1676
|
* Try to execute an extension command. Returns true if command was found and executed.
|
|
1318
1677
|
*/
|
|
@@ -2466,11 +2825,18 @@ export class AgentSession {
|
|
|
2466
2825
|
_refreshToolRegistry(options) {
|
|
2467
2826
|
const previousRegistryNames = new Set(this._toolRegistry.keys());
|
|
2468
2827
|
const previousActiveToolNames = this.getActiveToolNames();
|
|
2828
|
+
const enforceExtensionToolPermissions = this.settingsManager.getPermissionExtensionToolEnforcement();
|
|
2469
2829
|
const registeredTools = this._extensionRunner?.getAllRegisteredTools() ?? [];
|
|
2470
2830
|
const allCustomTools = [
|
|
2471
2831
|
...registeredTools,
|
|
2472
2832
|
...this._customTools.map((def) => ({ definition: def, extensionPath: "<sdk>" })),
|
|
2473
2833
|
];
|
|
2834
|
+
const requiredPermissionEntries = Object.entries(BUILTIN_TOOL_REQUIRED_PERMISSIONS).map(([name, permission]) => [name, permission]);
|
|
2835
|
+
this._toolRequiredPermissions = new Map(requiredPermissionEntries);
|
|
2836
|
+
for (const registeredTool of allCustomTools) {
|
|
2837
|
+
const existing = this._toolRequiredPermissions.get(registeredTool.definition.name);
|
|
2838
|
+
this._toolRequiredPermissions.set(registeredTool.definition.name, registeredTool.definition.requiredPermission ?? existing);
|
|
2839
|
+
}
|
|
2474
2840
|
this._toolPromptSnippets = new Map(allCustomTools
|
|
2475
2841
|
.map((registeredTool) => {
|
|
2476
2842
|
const snippet = this._normalizePromptSnippet(registeredTool.definition.promptSnippet ?? registeredTool.definition.description);
|
|
@@ -2484,7 +2850,24 @@ export class AgentSession {
|
|
|
2484
2850
|
})
|
|
2485
2851
|
.filter((entry) => entry !== undefined));
|
|
2486
2852
|
const wrappedExtensionTools = this._extensionRunner
|
|
2487
|
-
? wrapRegisteredTools(allCustomTools, this._extensionRunner)
|
|
2853
|
+
? wrapRegisteredTools(allCustomTools, this._extensionRunner, (registeredTool) => {
|
|
2854
|
+
const toolSource = registeredTool.extensionPath === "<sdk>" ? "custom" : "extension";
|
|
2855
|
+
return {
|
|
2856
|
+
toolSource,
|
|
2857
|
+
permissionPolicy: {
|
|
2858
|
+
enabled: enforceExtensionToolPermissions && toolSource === "extension",
|
|
2859
|
+
guard: async (request) => this._evaluateToolPermission(request),
|
|
2860
|
+
createRequest: ({ toolName, params, requiredPermission, toolSource: source }) => ({
|
|
2861
|
+
toolName,
|
|
2862
|
+
cwd: this._cwd,
|
|
2863
|
+
input: params,
|
|
2864
|
+
summary: `Extension tool ${toolName} ${this._summarizeToolPermissionInput(params)}`,
|
|
2865
|
+
requiredPermission,
|
|
2866
|
+
toolSource: source,
|
|
2867
|
+
}),
|
|
2868
|
+
},
|
|
2869
|
+
};
|
|
2870
|
+
})
|
|
2488
2871
|
: [];
|
|
2489
2872
|
const toolRegistry = new Map(this._baseToolRegistry);
|
|
2490
2873
|
for (const tool of wrappedExtensionTools) {
|
|
@@ -2517,46 +2900,26 @@ export class AgentSession {
|
|
|
2517
2900
|
_buildRuntime(options) {
|
|
2518
2901
|
const autoResizeImages = this.settingsManager.getImageAutoResize();
|
|
2519
2902
|
const shellCommandPrefix = this.settingsManager.getShellCommandPrefix();
|
|
2520
|
-
const evaluatePreToolHooks = (request) => {
|
|
2521
|
-
const hookResult = applyPreToolUseHooks(this._hooksConfig, request);
|
|
2522
|
-
this._queueHookNotices("PreToolUse", hookResult.notices);
|
|
2523
|
-
if (!hookResult.allowed) {
|
|
2524
|
-
throw new Error(hookResult.message ?? "Tool blocked by PreToolUse hook.");
|
|
2525
|
-
}
|
|
2526
|
-
return true;
|
|
2527
|
-
};
|
|
2528
2903
|
const baseTools = this._baseToolsOverride
|
|
2529
2904
|
? this._baseToolsOverride
|
|
2530
2905
|
: createAllTools(this._cwd, {
|
|
2531
2906
|
read: { autoResizeImages },
|
|
2532
2907
|
bash: {
|
|
2533
2908
|
commandPrefix: shellCommandPrefix,
|
|
2534
|
-
permissionGuard: async (request) =>
|
|
2535
|
-
evaluatePreToolHooks(request);
|
|
2536
|
-
return this._toolPermissionHandler ? this._toolPermissionHandler(request) : true;
|
|
2537
|
-
},
|
|
2909
|
+
permissionGuard: async (request) => this._evaluateToolPermission(request),
|
|
2538
2910
|
},
|
|
2539
2911
|
edit: {
|
|
2540
|
-
permissionGuard: async (request) =>
|
|
2541
|
-
evaluatePreToolHooks(request);
|
|
2542
|
-
return this._toolPermissionHandler ? this._toolPermissionHandler(request) : true;
|
|
2543
|
-
},
|
|
2912
|
+
permissionGuard: async (request) => this._evaluateToolPermission(request),
|
|
2544
2913
|
},
|
|
2545
2914
|
write: {
|
|
2546
|
-
permissionGuard: async (request) =>
|
|
2547
|
-
evaluatePreToolHooks(request);
|
|
2548
|
-
return this._toolPermissionHandler ? this._toolPermissionHandler(request) : true;
|
|
2549
|
-
},
|
|
2915
|
+
permissionGuard: async (request) => this._evaluateToolPermission(request),
|
|
2550
2916
|
},
|
|
2551
2917
|
semantic: {
|
|
2552
2918
|
authStorage: this._modelRegistry.authStorage,
|
|
2553
2919
|
},
|
|
2554
2920
|
fetch: {
|
|
2555
2921
|
resolveAllowedMethods: () => getAllowedFetchMethodsForProfile(this._profileName),
|
|
2556
|
-
permissionGuard: async (request) =>
|
|
2557
|
-
evaluatePreToolHooks(request);
|
|
2558
|
-
return this._toolPermissionHandler ? this._toolPermissionHandler(request) : true;
|
|
2559
|
-
},
|
|
2922
|
+
permissionGuard: async (request) => this._evaluateToolPermission(request),
|
|
2560
2923
|
},
|
|
2561
2924
|
webSearch: {
|
|
2562
2925
|
resolveRuntimeConfig: () => ({
|
|
@@ -2569,33 +2932,21 @@ export class AgentSession {
|
|
|
2569
2932
|
}),
|
|
2570
2933
|
resolveTavilyApiKey: () => this.settingsManager.getWebSearchTavilyApiKey(),
|
|
2571
2934
|
resolveSearxngBaseUrl: () => this.settingsManager.getWebSearchSearxngUrl(),
|
|
2572
|
-
permissionGuard: async (request) =>
|
|
2573
|
-
evaluatePreToolHooks(request);
|
|
2574
|
-
return this._toolPermissionHandler ? this._toolPermissionHandler(request) : true;
|
|
2575
|
-
},
|
|
2935
|
+
permissionGuard: async (request) => this._evaluateToolPermission(request),
|
|
2576
2936
|
},
|
|
2577
2937
|
gitWrite: {
|
|
2578
2938
|
resolveRuntimeConfig: () => ({
|
|
2579
2939
|
networkEnabled: this.settingsManager.getGithubToolsNetworkEnabled(),
|
|
2580
2940
|
}),
|
|
2581
2941
|
resolveGithubToken: () => this.settingsManager.getGithubToolsToken(),
|
|
2582
|
-
permissionGuard: async (request) =>
|
|
2583
|
-
evaluatePreToolHooks(request);
|
|
2584
|
-
return this._toolPermissionHandler ? this._toolPermissionHandler(request) : true;
|
|
2585
|
-
},
|
|
2942
|
+
permissionGuard: async (request) => this._evaluateToolPermission(request),
|
|
2586
2943
|
},
|
|
2587
2944
|
fsOps: {
|
|
2588
|
-
permissionGuard: async (request) =>
|
|
2589
|
-
evaluatePreToolHooks(request);
|
|
2590
|
-
return this._toolPermissionHandler ? this._toolPermissionHandler(request) : true;
|
|
2591
|
-
},
|
|
2945
|
+
permissionGuard: async (request) => this._evaluateToolPermission(request),
|
|
2592
2946
|
},
|
|
2593
2947
|
dbRun: {
|
|
2594
2948
|
resolveRuntimeConfig: () => this.settingsManager.getDbToolsSettings(),
|
|
2595
|
-
permissionGuard: async (request) =>
|
|
2596
|
-
evaluatePreToolHooks(request);
|
|
2597
|
-
return this._toolPermissionHandler ? this._toolPermissionHandler(request) : true;
|
|
2598
|
-
},
|
|
2949
|
+
permissionGuard: async (request) => this._evaluateToolPermission(request),
|
|
2599
2950
|
},
|
|
2600
2951
|
});
|
|
2601
2952
|
this._baseToolRegistry = new Map(Object.entries(baseTools).map(([name, tool]) => [name, tool]));
|
|
@@ -2801,16 +3152,39 @@ export class AgentSession {
|
|
|
2801
3152
|
const prefix = this.settingsManager.getShellCommandPrefix();
|
|
2802
3153
|
const resolvedCommand = prefix ? `${prefix}\n${command}` : command;
|
|
2803
3154
|
try {
|
|
2804
|
-
|
|
2805
|
-
|
|
2806
|
-
|
|
2807
|
-
|
|
2808
|
-
}
|
|
2809
|
-
|
|
3155
|
+
let result;
|
|
3156
|
+
if (options?.runInBackground) {
|
|
3157
|
+
if (options.operations) {
|
|
3158
|
+
throw new Error("Background bash is not supported with custom execution operations.");
|
|
3159
|
+
}
|
|
3160
|
+
const background = startBackgroundProcess({
|
|
3161
|
+
rootCwd: this._cwd,
|
|
2810
3162
|
cwd: this._cwd,
|
|
2811
|
-
|
|
2812
|
-
|
|
3163
|
+
command: resolvedCommand,
|
|
3164
|
+
source: "interactive",
|
|
2813
3165
|
});
|
|
3166
|
+
result = {
|
|
3167
|
+
output: `Started background process ${background.id} (pid ${background.pid}). Use /bg status ${background.id}, /bg logs ${background.id}, or /bg stop ${background.id}.`,
|
|
3168
|
+
exitCode: undefined,
|
|
3169
|
+
cancelled: false,
|
|
3170
|
+
truncated: false,
|
|
3171
|
+
backgroundTaskId: background.id,
|
|
3172
|
+
backgroundStatusPath: background.metaPath,
|
|
3173
|
+
backgroundLogPath: background.logPath,
|
|
3174
|
+
};
|
|
3175
|
+
}
|
|
3176
|
+
else {
|
|
3177
|
+
result = options?.operations
|
|
3178
|
+
? await executeBashWithOperations(resolvedCommand, this._cwd, options.operations, {
|
|
3179
|
+
onChunk,
|
|
3180
|
+
signal: this._bashAbortController.signal,
|
|
3181
|
+
})
|
|
3182
|
+
: await executeBashCommand(resolvedCommand, {
|
|
3183
|
+
cwd: this._cwd,
|
|
3184
|
+
onChunk,
|
|
3185
|
+
signal: this._bashAbortController.signal,
|
|
3186
|
+
});
|
|
3187
|
+
}
|
|
2814
3188
|
this.recordBashResult(command, result, options);
|
|
2815
3189
|
this._appendSessionTrace({
|
|
2816
3190
|
type: "bash_end",
|
|
@@ -2818,6 +3192,7 @@ export class AgentSession {
|
|
|
2818
3192
|
exitCode: result.exitCode,
|
|
2819
3193
|
cancelled: result.cancelled,
|
|
2820
3194
|
truncated: result.truncated,
|
|
3195
|
+
backgroundTaskId: result.backgroundTaskId ?? null,
|
|
2821
3196
|
});
|
|
2822
3197
|
return result;
|
|
2823
3198
|
}
|