@nomad-e/bluma-cli 0.1.75 → 0.1.78
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/main.js +236 -200
- package/package.json +1 -1
package/dist/main.js
CHANGED
|
@@ -299,21 +299,22 @@ function assessCommandSafety(command, policy = getSandboxPolicy()) {
|
|
|
299
299
|
return { allowed: false, risk: "blocked", reason: entry.reason };
|
|
300
300
|
}
|
|
301
301
|
}
|
|
302
|
+
const skipConfirmation = ruleDecision === "allow";
|
|
302
303
|
if (HIGH_RISK_COMMAND_PATTERNS.some((pattern) => pattern.test(trimmed))) {
|
|
303
304
|
return {
|
|
304
305
|
allowed: true,
|
|
305
306
|
risk: policy.isSandbox ? "high" : "high",
|
|
306
|
-
reason: policy.isSandbox ? "High-risk command allowed inside the workspace sandbox." : "High-risk command requires explicit approval outside sandbox mode."
|
|
307
|
+
reason: skipConfirmation ? "Command allowed by permission rules engine." : policy.isSandbox ? "High-risk command allowed inside the workspace sandbox." : "High-risk command requires explicit approval outside sandbox mode."
|
|
307
308
|
};
|
|
308
309
|
}
|
|
309
310
|
if (MODERATE_RISK_COMMAND_PATTERNS.some((pattern) => pattern.test(trimmed))) {
|
|
310
311
|
return {
|
|
311
312
|
allowed: true,
|
|
312
313
|
risk: "moderate",
|
|
313
|
-
reason: policy.isSandbox ? "Workspace mutation command allowed inside the sandbox." : "Workspace mutation command requires confirmation outside sandbox mode."
|
|
314
|
+
reason: skipConfirmation ? "Command allowed by permission rules engine." : policy.isSandbox ? "Workspace mutation command allowed inside the sandbox." : "Workspace mutation command requires confirmation outside sandbox mode."
|
|
314
315
|
};
|
|
315
316
|
}
|
|
316
|
-
if (
|
|
317
|
+
if (skipConfirmation) {
|
|
317
318
|
return { allowed: true, risk: "safe", reason: "Command allowed by permission rules engine." };
|
|
318
319
|
}
|
|
319
320
|
return { allowed: true, risk: "safe" };
|
|
@@ -327,7 +328,7 @@ var init_sandbox_policy = __esm({
|
|
|
327
328
|
BLOCKED_COMMAND_PATTERNS = [
|
|
328
329
|
{ pattern: /\bsudo\b/, reason: "Privilege escalation is not allowed." },
|
|
329
330
|
{ pattern: /\bsu\b\s/, reason: "User switching is not allowed." },
|
|
330
|
-
{ pattern:
|
|
331
|
+
{ pattern: /\brm\s+-rf\s+\/(?:\s*(?:$|[;&|]))/, reason: "Deleting root filesystem is blocked." },
|
|
331
332
|
{ pattern: /\bcurl\b.*\|\s*(bash|sh|zsh)/i, reason: "Pipe-to-shell execution is blocked." },
|
|
332
333
|
{ pattern: /\bwget\b.*\|\s*(bash|sh|zsh)/i, reason: "Pipe-to-shell execution is blocked." },
|
|
333
334
|
{ pattern: /\beval\b\s*\(/, reason: "Eval execution is blocked." },
|
|
@@ -2367,7 +2368,7 @@ var getSlashCommands = () => [
|
|
|
2367
2368
|
},
|
|
2368
2369
|
{
|
|
2369
2370
|
name: "/review",
|
|
2370
|
-
description: "review
|
|
2371
|
+
description: "review changes directly or use /review mason for parallel specialist reviewers (slower, deeper)",
|
|
2371
2372
|
category: "agent"
|
|
2372
2373
|
},
|
|
2373
2374
|
{
|
|
@@ -4770,8 +4771,12 @@ var renderAskUserQuestion = ({ args }) => {
|
|
|
4770
4771
|
const parsed = parseArgs(args);
|
|
4771
4772
|
const qs = Array.isArray(parsed.questions) ? parsed.questions : [];
|
|
4772
4773
|
const q0 = qs[0];
|
|
4773
|
-
const
|
|
4774
|
-
return /* @__PURE__ */ jsx8(Box8, { flexDirection: "column", children: /* @__PURE__ */
|
|
4774
|
+
const options = Array.isArray(q0?.options) ? q0.options.length : 0;
|
|
4775
|
+
return /* @__PURE__ */ jsx8(Box8, { flexDirection: "column", children: /* @__PURE__ */ jsxs8(Text8, { dimColor: true, wrap: "wrap", children: [
|
|
4776
|
+
"Awaiting user answer",
|
|
4777
|
+
qs.length > 0 ? ` \xB7 ${qs.length} question${qs.length === 1 ? "" : "s"}` : "",
|
|
4778
|
+
options > 0 ? ` \xB7 ${options} option${options === 1 ? "" : "s"}` : ""
|
|
4779
|
+
] }) });
|
|
4775
4780
|
};
|
|
4776
4781
|
var renderPlanMode = ({ args }) => {
|
|
4777
4782
|
const parsed = parseArgs(args);
|
|
@@ -14739,6 +14744,46 @@ Next steps: ${anchor.nextSteps}`;
|
|
|
14739
14744
|
}
|
|
14740
14745
|
|
|
14741
14746
|
// src/app/agent/core/context-api/context_manager.ts
|
|
14747
|
+
function isValidJsonArguments(value) {
|
|
14748
|
+
if (typeof value !== "string") return false;
|
|
14749
|
+
try {
|
|
14750
|
+
JSON.parse(value);
|
|
14751
|
+
return true;
|
|
14752
|
+
} catch {
|
|
14753
|
+
return false;
|
|
14754
|
+
}
|
|
14755
|
+
}
|
|
14756
|
+
function sanitizeConversationForProvider(conversationHistory) {
|
|
14757
|
+
const cleaned = [];
|
|
14758
|
+
const issues = [];
|
|
14759
|
+
let droppingCorruptTurn = false;
|
|
14760
|
+
for (let index = 0; index < conversationHistory.length; index += 1) {
|
|
14761
|
+
const msg = conversationHistory[index];
|
|
14762
|
+
if (droppingCorruptTurn) {
|
|
14763
|
+
if (msg?.role === "assistant") {
|
|
14764
|
+
continue;
|
|
14765
|
+
}
|
|
14766
|
+
droppingCorruptTurn = false;
|
|
14767
|
+
}
|
|
14768
|
+
const toolCalls = Array.isArray(msg?.tool_calls) ? msg.tool_calls : null;
|
|
14769
|
+
if (msg?.role === "assistant" && toolCalls && toolCalls.length > 0) {
|
|
14770
|
+
const invalidCalls = toolCalls.filter(
|
|
14771
|
+
(call) => !isValidJsonArguments(call?.function?.arguments)
|
|
14772
|
+
);
|
|
14773
|
+
if (invalidCalls.length > 0) {
|
|
14774
|
+
issues.push({
|
|
14775
|
+
index,
|
|
14776
|
+
reason: "assistant tool_calls had invalid JSON arguments",
|
|
14777
|
+
toolNames: invalidCalls.map((call) => String(call?.function?.name ?? "unknown"))
|
|
14778
|
+
});
|
|
14779
|
+
droppingCorruptTurn = true;
|
|
14780
|
+
continue;
|
|
14781
|
+
}
|
|
14782
|
+
}
|
|
14783
|
+
cleaned.push(conversationHistory[index]);
|
|
14784
|
+
}
|
|
14785
|
+
return { messages: cleaned, issues };
|
|
14786
|
+
}
|
|
14742
14787
|
function partitionConversationIntoTurnSlices(conversationHistory) {
|
|
14743
14788
|
const turns = [];
|
|
14744
14789
|
let current = [];
|
|
@@ -14774,13 +14819,15 @@ async function createApiContextWindow(fullHistory, currentAnchor, compressedTurn
|
|
|
14774
14819
|
const tokenBudget = options?.tokenBudget ?? CONTEXT_TOKEN_BUDGET;
|
|
14775
14820
|
const compressThreshold = options?.compressThreshold ?? COMPRESS_THRESHOLD;
|
|
14776
14821
|
const keepRecentTurns = options?.keepRecentTurns ?? KEEP_RECENT_TURNS;
|
|
14822
|
+
const sanitized = sanitizeConversationForProvider(fullHistory);
|
|
14823
|
+
const safeHistory = sanitized.messages;
|
|
14777
14824
|
const systemMessages = [];
|
|
14778
14825
|
let historyStartIndex = 0;
|
|
14779
|
-
while (historyStartIndex <
|
|
14780
|
-
systemMessages.push(
|
|
14826
|
+
while (historyStartIndex < safeHistory.length && safeHistory[historyStartIndex].role === "system") {
|
|
14827
|
+
systemMessages.push(safeHistory[historyStartIndex]);
|
|
14781
14828
|
historyStartIndex++;
|
|
14782
14829
|
}
|
|
14783
|
-
const conversationHistory =
|
|
14830
|
+
const conversationHistory = safeHistory.slice(historyStartIndex);
|
|
14784
14831
|
const turnSlices = partitionConversationIntoTurnSlices(conversationHistory);
|
|
14785
14832
|
const n = turnSlices.length;
|
|
14786
14833
|
const recentStart = Math.max(0, n - keepRecentTurns);
|
|
@@ -14920,6 +14967,41 @@ function buildFactorHeaders(ctx) {
|
|
|
14920
14967
|
"X-Company-Name": encodeHeader(ctx.companyName)
|
|
14921
14968
|
};
|
|
14922
14969
|
}
|
|
14970
|
+
var THINKING_TOKEN_BUDGET_BY_EFFORT = {
|
|
14971
|
+
low: 256,
|
|
14972
|
+
medium: 1024,
|
|
14973
|
+
high: 2048
|
|
14974
|
+
};
|
|
14975
|
+
function getThinkingTokenBudgetForEffort(effort) {
|
|
14976
|
+
if (!effort) return void 0;
|
|
14977
|
+
return THINKING_TOKEN_BUDGET_BY_EFFORT[effort];
|
|
14978
|
+
}
|
|
14979
|
+
function buildVllmReasoningPayload(effort) {
|
|
14980
|
+
if (!effort) return void 0;
|
|
14981
|
+
return {
|
|
14982
|
+
reasoning: { effort },
|
|
14983
|
+
extra_body: {
|
|
14984
|
+
thinking_token_budget: getThinkingTokenBudgetForEffort(effort)
|
|
14985
|
+
}
|
|
14986
|
+
};
|
|
14987
|
+
}
|
|
14988
|
+
function buildChatCompletionRequestBody(params, runtimeConfig = getRuntimeConfig(), stream = false) {
|
|
14989
|
+
const tools = params.tools;
|
|
14990
|
+
const hasTools = Array.isArray(tools) && tools.length > 0;
|
|
14991
|
+
const effort = params.reasoning?.effort ?? runtimeConfig.reasoningEffort;
|
|
14992
|
+
const reasoningPayload = buildVllmReasoningPayload(effort);
|
|
14993
|
+
return {
|
|
14994
|
+
model: params.model || runtimeConfig.model || "auto",
|
|
14995
|
+
messages: params.messages,
|
|
14996
|
+
tools: hasTools ? tools : void 0,
|
|
14997
|
+
tool_choice: hasTools ? "auto" : void 0,
|
|
14998
|
+
parallel_tool_calls: params.parallel_tool_calls ?? false,
|
|
14999
|
+
temperature: params.temperature ?? 0,
|
|
15000
|
+
...reasoningPayload ?? {},
|
|
15001
|
+
max_tokens: params.max_tokens,
|
|
15002
|
+
...stream ? { stream: true } : {}
|
|
15003
|
+
};
|
|
15004
|
+
}
|
|
14923
15005
|
function applyDeltaToolCallsToAccumulator(toolCallsAccumulator, deltaToolCalls) {
|
|
14924
15006
|
if (!deltaToolCalls?.length) {
|
|
14925
15007
|
return false;
|
|
@@ -14997,20 +15079,9 @@ var LLMService = class {
|
|
|
14997
15079
|
if (!params.userContext) {
|
|
14998
15080
|
throw new Error("LLMService.chatCompletion: userContext \xE9 obrigat\xF3rio");
|
|
14999
15081
|
}
|
|
15000
|
-
const tools = params.tools;
|
|
15001
|
-
const hasTools = Array.isArray(tools) && tools.length > 0;
|
|
15002
15082
|
const runtimeConfig = getRuntimeConfig();
|
|
15003
15083
|
const resp = await this.client.chat.completions.create(
|
|
15004
|
-
|
|
15005
|
-
model: params.model || runtimeConfig.model || "auto",
|
|
15006
|
-
messages: params.messages,
|
|
15007
|
-
tools: hasTools ? tools : void 0,
|
|
15008
|
-
tool_choice: hasTools ? "auto" : void 0,
|
|
15009
|
-
parallel_tool_calls: params.parallel_tool_calls ?? false,
|
|
15010
|
-
temperature: params.temperature ?? 0,
|
|
15011
|
-
reasoning: params.reasoning ?? (runtimeConfig.reasoningEffort ? { effort: runtimeConfig.reasoningEffort } : void 0),
|
|
15012
|
-
max_tokens: params.max_tokens
|
|
15013
|
-
},
|
|
15084
|
+
buildChatCompletionRequestBody(params, runtimeConfig, false),
|
|
15014
15085
|
{ headers: this.requestHeaders(params.userContext) }
|
|
15015
15086
|
);
|
|
15016
15087
|
return resp;
|
|
@@ -15022,21 +15093,9 @@ var LLMService = class {
|
|
|
15022
15093
|
if (!params.userContext) {
|
|
15023
15094
|
throw new Error("LLMService.chatCompletionStream: userContext \xE9 obrigat\xF3rio");
|
|
15024
15095
|
}
|
|
15025
|
-
const tools = params.tools;
|
|
15026
|
-
const hasTools = Array.isArray(tools) && tools.length > 0;
|
|
15027
15096
|
const runtimeConfig = getRuntimeConfig();
|
|
15028
15097
|
const stream = await this.client.chat.completions.create(
|
|
15029
|
-
|
|
15030
|
-
model: params.model || runtimeConfig.model || "auto",
|
|
15031
|
-
messages: params.messages,
|
|
15032
|
-
tools: hasTools ? tools : void 0,
|
|
15033
|
-
tool_choice: hasTools ? "auto" : void 0,
|
|
15034
|
-
parallel_tool_calls: params.parallel_tool_calls ?? false,
|
|
15035
|
-
temperature: params.temperature ?? 0,
|
|
15036
|
-
reasoning: params.reasoning ?? (runtimeConfig.reasoningEffort ? { effort: runtimeConfig.reasoningEffort } : void 0),
|
|
15037
|
-
max_tokens: params.max_tokens,
|
|
15038
|
-
stream: true
|
|
15039
|
-
},
|
|
15098
|
+
buildChatCompletionRequestBody(params, runtimeConfig, true),
|
|
15040
15099
|
{ headers: this.requestHeaders(params.userContext) }
|
|
15041
15100
|
);
|
|
15042
15101
|
const toolCallsAccumulator = /* @__PURE__ */ new Map();
|
|
@@ -15094,8 +15153,9 @@ function formatLlmUiError(error) {
|
|
|
15094
15153
|
}
|
|
15095
15154
|
return {
|
|
15096
15155
|
message: message2,
|
|
15097
|
-
details:
|
|
15098
|
-
hint
|
|
15156
|
+
details: "See server logs for technical details.",
|
|
15157
|
+
hint,
|
|
15158
|
+
rawMessage
|
|
15099
15159
|
};
|
|
15100
15160
|
}
|
|
15101
15161
|
|
|
@@ -15254,7 +15314,15 @@ var ToolCallNormalizer = class {
|
|
|
15254
15314
|
* Valida se um tool call normalizado é válido
|
|
15255
15315
|
*/
|
|
15256
15316
|
static isValidToolCall(call) {
|
|
15257
|
-
|
|
15317
|
+
if (!(call.id && call.type === "function" && call.function?.name && typeof call.function.arguments === "string")) {
|
|
15318
|
+
return false;
|
|
15319
|
+
}
|
|
15320
|
+
try {
|
|
15321
|
+
JSON.parse(call.function.arguments);
|
|
15322
|
+
return true;
|
|
15323
|
+
} catch {
|
|
15324
|
+
return false;
|
|
15325
|
+
}
|
|
15258
15326
|
}
|
|
15259
15327
|
};
|
|
15260
15328
|
|
|
@@ -15538,7 +15606,8 @@ function buildTurnStartBackendMessage(params) {
|
|
|
15538
15606
|
}
|
|
15539
15607
|
|
|
15540
15608
|
// src/app/agent/bluma/core/bluma.ts
|
|
15541
|
-
var BluMaAgent = class {
|
|
15609
|
+
var BluMaAgent = class _BluMaAgent {
|
|
15610
|
+
static MAX_INVALID_TOOL_CALL_RETRIES = 3;
|
|
15542
15611
|
llm;
|
|
15543
15612
|
sessionId;
|
|
15544
15613
|
sessionFile = "";
|
|
@@ -15557,6 +15626,8 @@ var BluMaAgent = class {
|
|
|
15557
15626
|
factorRouterTurnClosed = false;
|
|
15558
15627
|
/** Passos seguidos sem tool_calls nem texto visível (só raciocínio) — evita loop lento no mesmo turno. */
|
|
15559
15628
|
emptyAssistantReplySteps = 0;
|
|
15629
|
+
/** Reintentos consecutivos por tool call inválido. */
|
|
15630
|
+
invalidToolCallRetrySteps = 0;
|
|
15560
15631
|
/** Deduplicação de reasoning chunks no streaming — evita repetição. */
|
|
15561
15632
|
lastReasoningChunkRef = "";
|
|
15562
15633
|
constructor(sessionId, eventBus, llm, mcpClient, feedbackSystem) {
|
|
@@ -15601,6 +15672,33 @@ var BluMaAgent = class {
|
|
|
15601
15672
|
if (!this.sessionFile) return;
|
|
15602
15673
|
void saveSessionHistory(this.sessionFile, this.history, this.getMemorySnapshot());
|
|
15603
15674
|
}
|
|
15675
|
+
async handleInvalidToolCallRetry(message2) {
|
|
15676
|
+
this.invalidToolCallRetrySteps += 1;
|
|
15677
|
+
if (this.history[this.history.length - 1] === message2) {
|
|
15678
|
+
this.history.pop();
|
|
15679
|
+
}
|
|
15680
|
+
if (this.invalidToolCallRetrySteps >= _BluMaAgent.MAX_INVALID_TOOL_CALL_RETRIES) {
|
|
15681
|
+
this.eventBus.emit("backend_message", {
|
|
15682
|
+
type: "error",
|
|
15683
|
+
message: "The model kept returning invalid tool calls. Closing the turn to avoid a retry loop."
|
|
15684
|
+
});
|
|
15685
|
+
this.eventBus.emit("backend_message", {
|
|
15686
|
+
type: "log",
|
|
15687
|
+
message: "Invalid tool call retry limit reached",
|
|
15688
|
+
payload: String(this.invalidToolCallRetrySteps)
|
|
15689
|
+
});
|
|
15690
|
+
await this.notifyFactorTurnEndIfNeeded("invalid_tool_calls_exhausted");
|
|
15691
|
+
this.eventBus.emit("backend_message", { type: "done", status: "failed" });
|
|
15692
|
+
this.invalidToolCallRetrySteps = 0;
|
|
15693
|
+
return;
|
|
15694
|
+
}
|
|
15695
|
+
this.history.push({
|
|
15696
|
+
role: "system",
|
|
15697
|
+
content: "Previous assistant tool_calls were invalid. Retry with valid JSON arguments only, or answer without tools."
|
|
15698
|
+
});
|
|
15699
|
+
this.persistSession();
|
|
15700
|
+
await this._continueConversation();
|
|
15701
|
+
}
|
|
15604
15702
|
async initialize() {
|
|
15605
15703
|
await this.mcpClient.nativeToolInvoker.initialize();
|
|
15606
15704
|
await this.mcpClient.initialize();
|
|
@@ -15709,6 +15807,7 @@ var BluMaAgent = class {
|
|
|
15709
15807
|
const userContent = buildUserMessageContent(inputText, process.cwd());
|
|
15710
15808
|
this.history.push({ role: "user", content: userContent });
|
|
15711
15809
|
this.emptyAssistantReplySteps = 0;
|
|
15810
|
+
this.invalidToolCallRetrySteps = 0;
|
|
15712
15811
|
this.eventBus.emit(
|
|
15713
15812
|
"backend_message",
|
|
15714
15813
|
buildTurnStartBackendMessage({
|
|
@@ -16145,6 +16244,11 @@ ${editData.error.display}`;
|
|
|
16145
16244
|
message: `Received follow-up from coordinator (priority: ${mailboxUpdate.followUp.priority})`
|
|
16146
16245
|
});
|
|
16147
16246
|
}
|
|
16247
|
+
const sanitized = sanitizeConversationForProvider(this.history);
|
|
16248
|
+
if (sanitized.issues.length > 0) {
|
|
16249
|
+
this.history = sanitized.messages;
|
|
16250
|
+
this.persistSession();
|
|
16251
|
+
}
|
|
16148
16252
|
const { messages: contextWindow, newAnchor, newCompressedTurnSliceCount } = await createApiContextWindow(
|
|
16149
16253
|
this.history,
|
|
16150
16254
|
this.sessionAnchor,
|
|
@@ -16172,7 +16276,7 @@ ${editData.error.display}`;
|
|
|
16172
16276
|
this.eventBus.emit("backend_message", {
|
|
16173
16277
|
type: "log",
|
|
16174
16278
|
message: "LLM request failed",
|
|
16175
|
-
payload: uiError.
|
|
16279
|
+
payload: uiError.rawMessage
|
|
16176
16280
|
});
|
|
16177
16281
|
await this.notifyFactorTurnEndIfNeeded("llm_error");
|
|
16178
16282
|
this.eventBus.emit("backend_message", { type: "done", status: "failed" });
|
|
@@ -16285,15 +16389,12 @@ ${editData.error.display}`;
|
|
|
16285
16389
|
this.history.push(normalizedMessage);
|
|
16286
16390
|
if (normalizedMessage.tool_calls && normalizedMessage.tool_calls.length > 0) {
|
|
16287
16391
|
this.emptyAssistantReplySteps = 0;
|
|
16392
|
+
this.invalidToolCallRetrySteps = 0;
|
|
16288
16393
|
const validToolCalls = normalizedMessage.tool_calls.filter(
|
|
16289
16394
|
(call) => ToolCallNormalizer.isValidToolCall(call)
|
|
16290
16395
|
);
|
|
16291
16396
|
if (validToolCalls.length === 0) {
|
|
16292
|
-
this.
|
|
16293
|
-
type: "error",
|
|
16294
|
-
message: "Model returned invalid tool calls. Retrying..."
|
|
16295
|
-
});
|
|
16296
|
-
await this._continueConversation();
|
|
16397
|
+
await this.handleInvalidToolCallRetry(normalizedMessage);
|
|
16297
16398
|
return;
|
|
16298
16399
|
}
|
|
16299
16400
|
const needsConfirmation = validToolCalls.some(
|
|
@@ -16325,9 +16426,6 @@ ${editData.error.display}`;
|
|
|
16325
16426
|
} else if (trimmedText) {
|
|
16326
16427
|
this.emptyAssistantReplySteps = 0;
|
|
16327
16428
|
this.eventBus.emit("backend_message", { type: "assistant_message", content: accumulatedContent });
|
|
16328
|
-
this.eventBus.emit("info", {
|
|
16329
|
-
message: "Assistant returned plain text without tool_calls. Closing the turn to avoid protocol drift."
|
|
16330
|
-
});
|
|
16331
16429
|
await this.notifyFactorTurnEndIfNeeded("assistant_text_without_tool_call");
|
|
16332
16430
|
this.emitTurnCompleted();
|
|
16333
16431
|
return;
|
|
@@ -16360,15 +16458,12 @@ ${editData.error.display}`;
|
|
|
16360
16458
|
this.history.push(message2);
|
|
16361
16459
|
if (message2.tool_calls && message2.tool_calls.length > 0) {
|
|
16362
16460
|
this.emptyAssistantReplySteps = 0;
|
|
16461
|
+
this.invalidToolCallRetrySteps = 0;
|
|
16363
16462
|
const validToolCalls = message2.tool_calls.filter(
|
|
16364
16463
|
(call) => ToolCallNormalizer.isValidToolCall(call)
|
|
16365
16464
|
);
|
|
16366
16465
|
if (validToolCalls.length === 0) {
|
|
16367
|
-
this.
|
|
16368
|
-
type: "error",
|
|
16369
|
-
message: "Model returned invalid tool calls. Retrying..."
|
|
16370
|
-
});
|
|
16371
|
-
await this._continueConversation();
|
|
16466
|
+
await this.handleInvalidToolCallRetry(message2);
|
|
16372
16467
|
return;
|
|
16373
16468
|
}
|
|
16374
16469
|
const needsConfirmation = validToolCalls.some(
|
|
@@ -16399,10 +16494,8 @@ ${editData.error.display}`;
|
|
|
16399
16494
|
}
|
|
16400
16495
|
} else if (typeof message2.content === "string" && message2.content.trim()) {
|
|
16401
16496
|
this.emptyAssistantReplySteps = 0;
|
|
16497
|
+
this.invalidToolCallRetrySteps = 0;
|
|
16402
16498
|
this.eventBus.emit("backend_message", { type: "assistant_message", content: message2.content });
|
|
16403
|
-
this.eventBus.emit("info", {
|
|
16404
|
-
message: "Assistant returned plain text without tool_calls. Closing the turn to avoid protocol drift."
|
|
16405
|
-
});
|
|
16406
16499
|
await this.notifyFactorTurnEndIfNeeded("assistant_text_without_tool_call");
|
|
16407
16500
|
this.emitTurnCompleted();
|
|
16408
16501
|
return;
|
|
@@ -18780,23 +18873,12 @@ var ToolResultDisplayComponent = ({
|
|
|
18780
18873
|
if (toolName.includes("ask_user_question")) {
|
|
18781
18874
|
const success = parsed?.success === true;
|
|
18782
18875
|
const selectedLabel = typeof parsed?.selected_label === "string" ? parsed.selected_label : "";
|
|
18783
|
-
const selectedIndex = typeof parsed?.selected_index === "number" ? parsed.selected_index : null;
|
|
18784
|
-
const questionIndex = typeof parsed?.question_index === "number" ? parsed.question_index : 0;
|
|
18785
|
-
const qs = Array.isArray(args?.questions) ? args.questions : [];
|
|
18786
|
-
const q = qs[questionIndex];
|
|
18787
|
-
const questionText = typeof q?.question === "string" ? q.question : "";
|
|
18788
18876
|
if (success && selectedLabel) {
|
|
18789
|
-
return /* @__PURE__ */ jsx13(ResultGutter, { children: /* @__PURE__ */
|
|
18790
|
-
/* @__PURE__ */
|
|
18791
|
-
|
|
18792
|
-
|
|
18793
|
-
|
|
18794
|
-
] }),
|
|
18795
|
-
questionText ? /* @__PURE__ */ jsxs13(Text13, { dimColor: true, wrap: "wrap", children: [
|
|
18796
|
-
truncate3(questionText, 140),
|
|
18797
|
-
selectedIndex !== null ? ` \xB7 option ${selectedIndex + 1}` : ""
|
|
18798
|
-
] }) : null
|
|
18799
|
-
] }) });
|
|
18877
|
+
return /* @__PURE__ */ jsx13(ResultGutter, { children: /* @__PURE__ */ jsx13(Box13, { flexDirection: "column", children: /* @__PURE__ */ jsxs13(Text13, { dimColor: true, children: [
|
|
18878
|
+
/* @__PURE__ */ jsx13(Text13, { bold: true, children: "Response" }),
|
|
18879
|
+
" \xB7 ",
|
|
18880
|
+
selectedLabel
|
|
18881
|
+
] }) }) });
|
|
18800
18882
|
}
|
|
18801
18883
|
if (parsed?.cancelled === true) {
|
|
18802
18884
|
return /* @__PURE__ */ jsx13(ResultGutter, { children: /* @__PURE__ */ jsxs13(Text13, { dimColor: true, children: [
|
|
@@ -20963,183 +21045,137 @@ Report the release version, tag, changelog summary, and verification results whe
|
|
|
20963
21045
|
);
|
|
20964
21046
|
}
|
|
20965
21047
|
if (cmd === "review") {
|
|
20966
|
-
const
|
|
21048
|
+
const normalizedArgs = args.map((a) => a.toLowerCase());
|
|
21049
|
+
const hasMasonPrefix = normalizedArgs[0] === "mason" || normalizedArgs[0] === "with" && normalizedArgs[1] === "mason";
|
|
21050
|
+
const reviewMode = hasMasonPrefix ? "mason" : "direct";
|
|
21051
|
+
const targetArgs = hasMasonPrefix ? normalizedArgs[0] === "mason" ? args.slice(1) : args.slice(2) : args;
|
|
21052
|
+
const target = targetArgs.join(" ") || "";
|
|
20967
21053
|
const isPR = target && /^\d+$/.test(target);
|
|
20968
21054
|
(async () => {
|
|
20969
21055
|
try {
|
|
20970
21056
|
const reviewTarget = isPR ? `PR #${target}` : target === "local" || target === "local changes" ? "current local changes (git diff HEAD)" : target ? `the file/module: ${target}` : "current local changes (git diff HEAD)";
|
|
20971
|
-
|
|
20972
|
-
content: `## REVIEW COORDINATOR MODE \u2014 Lead a Team of Senior QA Reviewers
|
|
21057
|
+
const reviewPrompt = reviewMode === "mason" ? `## REVIEW COORDINATOR MODE \u2014 Mason Specialists
|
|
20973
21058
|
|
|
20974
|
-
You are now the **Review Coordinator**
|
|
21059
|
+
You are now the **Review Coordinator** for a slower, deeper pass with Mason senior specialists.
|
|
20975
21060
|
|
|
20976
|
-
|
|
21061
|
+
This mode is intentionally heavier:
|
|
21062
|
+
- You may coordinate specialized reviewers in parallel
|
|
21063
|
+
- Each reviewer should focus on one area of risk
|
|
21064
|
+
- This can take longer, but it should surface deeper issues
|
|
20977
21065
|
|
|
20978
21066
|
**Review Target:** ${reviewTarget}
|
|
20979
21067
|
|
|
20980
21068
|
### COORDINATOR REVIEW WORKFLOW
|
|
20981
21069
|
|
|
20982
|
-
#### Step 1: Triage
|
|
21070
|
+
#### Step 1: Triage
|
|
20983
21071
|
1. Gather the diff/changes:
|
|
20984
21072
|
${isPR ? `- Run \`gh pr view ${target}\` for PR details` : ""}
|
|
20985
21073
|
${isPR ? `- Run \`gh pr diff ${target}\` for the full diff` : ""}
|
|
20986
21074
|
${!isPR && target !== "local" && target !== "local changes" ? `- Read the file: ${target}` : ""}
|
|
20987
21075
|
${target === "local" || target === "local changes" ? `- Run \`git diff HEAD\` for unstaged changes` : ""}
|
|
20988
21076
|
${target === "local" || target === "local changes" ? `- Run \`git diff --cached HEAD\` for staged changes` : ""}
|
|
20989
|
-
2.
|
|
21077
|
+
2. Identify the risk surface and decide which specialist areas are worth parallelizing
|
|
20990
21078
|
|
|
20991
|
-
#### Step 2:
|
|
20992
|
-
|
|
21079
|
+
#### Step 2: Parallel Specialists
|
|
21080
|
+
If the scope justifies it, spawn specialized reviewers in parallel:
|
|
21081
|
+
- Security
|
|
21082
|
+
- Logic & Correctness
|
|
21083
|
+
- Code Quality
|
|
20993
21084
|
|
|
20994
|
-
|
|
21085
|
+
If the scope is small, do not force parallelism. Use judgment.
|
|
20995
21086
|
|
|
20996
|
-
|
|
20997
|
-
|
|
20998
|
-
spawn_agent({
|
|
20999
|
-
task: "SECURITY REVIEW: Thoroughly review ${reviewTarget} for security vulnerabilities.
|
|
21000
|
-
|
|
21001
|
-
You are a Senior Security Engineer. Read EVERY changed file line by line. Do NOT report until you have examined all files.
|
|
21002
|
-
|
|
21003
|
-
Look for:
|
|
21004
|
-
- Injection vulnerabilities (SQL, XSS, command injection, template injection)
|
|
21005
|
-
- Authentication/authorization flaws (missing auth checks, privilege escalation)
|
|
21006
|
-
- Sensitive data exposure (secrets in logs, PII leaks, hardcoded credentials)
|
|
21007
|
-
- Insecure defaults (missing TLS, weak crypto, permissive CORS)
|
|
21008
|
-
- Input validation gaps (missing sanitization, type confusion)
|
|
21009
|
-
- Dependency vulnerabilities (outdated packages, known CVEs)
|
|
21010
|
-
- Path traversal, SSRF, CSRF, race conditions
|
|
21011
|
-
|
|
21012
|
-
For EACH issue found:
|
|
21013
|
-
- Severity: CRITICAL / HIGH / MEDIUM / LOW
|
|
21014
|
-
- File:line number
|
|
21015
|
-
- Exact code snippet
|
|
21016
|
-
- Why it's vulnerable
|
|
21017
|
-
- How to exploit it (brief)
|
|
21018
|
-
- Recommended fix
|
|
21019
|
-
|
|
21020
|
-
Be PICKY. If something looks suspicious, flag it.
|
|
21021
|
-
|
|
21022
|
-
Do NOT modify files. Report only.
|
|
21023
|
-
|
|
21024
|
-
At the end of your report, list ALL files you reviewed.",
|
|
21025
|
-
title: "Security Review",
|
|
21026
|
-
agent_type: "reviewer"
|
|
21027
|
-
})
|
|
21028
|
-
\`\`\`
|
|
21087
|
+
#### Step 3: Synthesize
|
|
21088
|
+
Wait for all reviewers that you spawned, then synthesize the findings into a single review report.
|
|
21029
21089
|
|
|
21030
|
-
|
|
21031
|
-
|
|
21032
|
-
spawn_agent({
|
|
21033
|
-
task: "LOGIC REVIEW: Thoroughly review ${reviewTarget} for bugs and logic errors.
|
|
21034
|
-
|
|
21035
|
-
You are a Senior QA Engineer who finds bugs for a living. Read EVERY changed file line by line. Do NOT report until you have examined all files.
|
|
21036
|
-
|
|
21037
|
-
Look for:
|
|
21038
|
-
- Logic errors (wrong conditions, off-by-one, inverted boolean, wrong operator)
|
|
21039
|
-
- Null/undefined handling (missing null checks, unsafe property access)
|
|
21040
|
-
- State management issues (stale state, missing initialization, race conditions)
|
|
21041
|
-
- Async bugs (unawaited promises, missing error handling, promise rejections)
|
|
21042
|
-
- Edge cases (empty arrays, zero values, negative numbers, boundary conditions)
|
|
21043
|
-
- Wrong assumptions (code assumes X but Y can happen)
|
|
21044
|
-
- Dead code (unreachable branches, unused variables, commented-out logic)
|
|
21045
|
-
- Error handling gaps (swallowed errors, missing catch blocks, generic catches)
|
|
21046
|
-
|
|
21047
|
-
For EACH issue found:
|
|
21048
|
-
- Severity: BLOCKER / MAJOR / MINOR
|
|
21049
|
-
- File:line number
|
|
21050
|
-
- What the code does vs what it SHOULD do
|
|
21051
|
-
- How to trigger the bug
|
|
21052
|
-
- Recommended fix
|
|
21053
|
-
|
|
21054
|
-
Be RELENTLESS. Question every assumption.
|
|
21055
|
-
|
|
21056
|
-
Do NOT modify files. Report only.
|
|
21057
|
-
|
|
21058
|
-
At the end of your report, list ALL files you reviewed.",
|
|
21059
|
-
title: "Logic & Correctness Review",
|
|
21060
|
-
agent_type: "reviewer"
|
|
21061
|
-
})
|
|
21062
|
-
\`\`\`
|
|
21090
|
+
#### Step 4: Produce the Review Report
|
|
21091
|
+
Compile a comprehensive review report:
|
|
21063
21092
|
|
|
21064
|
-
**
|
|
21065
|
-
|
|
21066
|
-
|
|
21067
|
-
|
|
21093
|
+
**REVIEW REPORT for ${reviewTarget}**
|
|
21094
|
+
|
|
21095
|
+
\u{1F534} CRITICAL / BLOCKER (must fix before merge):
|
|
21096
|
+
- [List critical findings]
|
|
21068
21097
|
|
|
21069
|
-
|
|
21098
|
+
\u{1F7E1} HIGH / MAJOR (should fix):
|
|
21099
|
+
- [List high findings]
|
|
21070
21100
|
|
|
21071
|
-
|
|
21072
|
-
-
|
|
21073
|
-
- Function length and complexity (too long, too many responsibilities, deep nesting)
|
|
21074
|
-
- DRY violations (duplicated logic that should be extracted)
|
|
21075
|
-
- SOLID violations (tight coupling, god classes, leaking abstractions)
|
|
21076
|
-
- Style inconsistencies (formatting, import order, naming conventions)
|
|
21077
|
-
- Missing or wrong comments (no docs for complex logic, outdated comments)
|
|
21078
|
-
- Type safety issues (any usage, missing type annotations, wrong types)
|
|
21079
|
-
- Error message quality (unhelpful messages, missing context)
|
|
21080
|
-
- API design (inconsistent interfaces, breaking changes, missing deprecation)
|
|
21101
|
+
\u{1F7E2} MEDIUM / MINOR (nice to fix):
|
|
21102
|
+
- [List medium findings]
|
|
21081
21103
|
|
|
21082
|
-
|
|
21083
|
-
-
|
|
21084
|
-
- What's wrong
|
|
21085
|
-
- Suggested improvement with before/after code
|
|
21104
|
+
\u2139\uFE0F OBSERVATIONS (no action needed):
|
|
21105
|
+
- [List observations]
|
|
21086
21106
|
|
|
21087
|
-
|
|
21107
|
+
\u2705 POSITIVE FINDINGS:
|
|
21108
|
+
- [List strong points]
|
|
21088
21109
|
|
|
21089
|
-
|
|
21110
|
+
**Review Summary:**
|
|
21111
|
+
- Total issues found: X critical, Y high, Z medium
|
|
21112
|
+
- Reviewers used: [list workers or "direct review"]
|
|
21113
|
+
- Recommendation: APPROVE / APPROVE WITH COMMENTS / REQUEST CHANGES
|
|
21114
|
+
- Confidence level: HIGH / MEDIUM / LOW
|
|
21090
21115
|
|
|
21091
|
-
|
|
21092
|
-
|
|
21093
|
-
|
|
21094
|
-
|
|
21095
|
-
|
|
21116
|
+
### COORDINATOR RULES
|
|
21117
|
+
- Be selective: do not spawn workers unless the scope justifies it
|
|
21118
|
+
- If workers fail, finish the review yourself
|
|
21119
|
+
- Never rubber-stamp
|
|
21120
|
+
- Never fabricate results
|
|
21096
21121
|
|
|
21097
|
-
|
|
21098
|
-
Wait for ALL 3 workers to complete. Use wait_agent with a large timeout (600000ms).
|
|
21122
|
+
Start coordinating now.` : `## REVIEW MODE \u2014 Direct Senior Review
|
|
21099
21123
|
|
|
21100
|
-
|
|
21101
|
-
- This can happen with fast-completing workers
|
|
21102
|
-
- Simply perform the review yourself by reading the changed files
|
|
21103
|
-
- Report: "Workers completed/unavailable \u2014 performing review directly"
|
|
21104
|
-
- Do NOT waste time retrying \u2014 just do the review
|
|
21124
|
+
You are a senior engineer performing a direct code review. Do the review yourself using the available tools and your own judgment.
|
|
21105
21125
|
|
|
21106
|
-
**
|
|
21107
|
-
**ALWAYS synthesize**: Group findings by severity, cross-reference between reviewers, identify patterns.
|
|
21126
|
+
**Do not spawn parallel reviewers by default.** Only use extra agents if the scope is genuinely large and you need them.
|
|
21108
21127
|
|
|
21109
|
-
|
|
21110
|
-
|
|
21128
|
+
**Review Target:** ${reviewTarget}
|
|
21129
|
+
|
|
21130
|
+
### REVIEW WORKFLOW
|
|
21131
|
+
|
|
21132
|
+
#### Step 1: Triage
|
|
21133
|
+
1. Gather the diff/changes:
|
|
21134
|
+
${isPR ? `- Run \`gh pr view ${target}\` for PR details` : ""}
|
|
21135
|
+
${isPR ? `- Run \`gh pr diff ${target}\` for the full diff` : ""}
|
|
21136
|
+
${!isPR && target !== "local" && target !== "local changes" ? `- Read the file: ${target}` : ""}
|
|
21137
|
+
${target === "local" || target === "local changes" ? `- Run \`git diff HEAD\` for unstaged changes` : ""}
|
|
21138
|
+
${target === "local" || target === "local changes" ? `- Run \`git diff --cached HEAD\` for staged changes` : ""}
|
|
21139
|
+
2. Understand the scope and the main risk areas
|
|
21140
|
+
|
|
21141
|
+
#### Step 2: Review Directly
|
|
21142
|
+
Read the changed files carefully yourself. Focus on:
|
|
21143
|
+
- Correctness and regressions
|
|
21144
|
+
- Security and data handling
|
|
21145
|
+
- Tests and edge cases
|
|
21146
|
+
- Clarity and maintainability
|
|
21147
|
+
|
|
21148
|
+
If the diff is large, you may use helpers, but keep the review centered on your own synthesis.
|
|
21149
|
+
|
|
21150
|
+
#### Step 3: Produce the Review Report
|
|
21151
|
+
Compile a concise but rigorous review report:
|
|
21111
21152
|
|
|
21112
21153
|
**REVIEW REPORT for ${reviewTarget}**
|
|
21113
21154
|
|
|
21114
21155
|
\u{1F534} CRITICAL / BLOCKER (must fix before merge):
|
|
21115
|
-
- [List
|
|
21156
|
+
- [List critical findings]
|
|
21116
21157
|
|
|
21117
21158
|
\u{1F7E1} HIGH / MAJOR (should fix):
|
|
21118
|
-
- [List
|
|
21159
|
+
- [List high findings]
|
|
21119
21160
|
|
|
21120
21161
|
\u{1F7E2} MEDIUM / MINOR (nice to fix):
|
|
21121
|
-
- [List
|
|
21162
|
+
- [List medium findings]
|
|
21122
21163
|
|
|
21123
21164
|
\u2139\uFE0F OBSERVATIONS (no action needed):
|
|
21124
|
-
- [List observations
|
|
21165
|
+
- [List observations]
|
|
21125
21166
|
|
|
21126
|
-
\u2705 POSITIVE FINDINGS
|
|
21127
|
-
- [List
|
|
21167
|
+
\u2705 POSITIVE FINDINGS:
|
|
21168
|
+
- [List strong points]
|
|
21128
21169
|
|
|
21129
21170
|
**Review Summary:**
|
|
21130
21171
|
- Total issues found: X critical, Y high, Z medium
|
|
21131
|
-
- Reviewers used:
|
|
21172
|
+
- Reviewers used: direct review
|
|
21132
21173
|
- Recommendation: APPROVE / APPROVE WITH COMMENTS / REQUEST CHANGES
|
|
21133
21174
|
- Confidence level: HIGH / MEDIUM / LOW
|
|
21134
21175
|
|
|
21135
|
-
|
|
21136
|
-
|
|
21137
|
-
|
|
21138
|
-
- **If workers fail, do the review yourself** \u2014 no drama, just deliver
|
|
21139
|
-
- **NEVER rubber-stamp** \u2014 your job is to find issues
|
|
21140
|
-
- **NEVER fabricate results** \u2014 report truth
|
|
21141
|
-
|
|
21142
|
-
Start coordinating now. Triage the changes, then spawn your 3 reviewers.`
|
|
21176
|
+
Start the review now.`;
|
|
21177
|
+
await agentRef.current?.processTurn({
|
|
21178
|
+
content: reviewPrompt
|
|
21143
21179
|
});
|
|
21144
21180
|
} catch (e) {
|
|
21145
21181
|
setHistory((prev) => prev.concat({
|