@nomad-e/bluma-cli 0.1.74 → 0.1.76

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -299,21 +299,22 @@ function assessCommandSafety(command, policy = getSandboxPolicy()) {
299
299
  return { allowed: false, risk: "blocked", reason: entry.reason };
300
300
  }
301
301
  }
302
+ const skipConfirmation = ruleDecision === "allow";
302
303
  if (HIGH_RISK_COMMAND_PATTERNS.some((pattern) => pattern.test(trimmed))) {
303
304
  return {
304
305
  allowed: true,
305
306
  risk: policy.isSandbox ? "high" : "high",
306
- reason: policy.isSandbox ? "High-risk command allowed inside the workspace sandbox." : "High-risk command requires explicit approval outside sandbox mode."
307
+ reason: skipConfirmation ? "Command allowed by permission rules engine." : policy.isSandbox ? "High-risk command allowed inside the workspace sandbox." : "High-risk command requires explicit approval outside sandbox mode."
307
308
  };
308
309
  }
309
310
  if (MODERATE_RISK_COMMAND_PATTERNS.some((pattern) => pattern.test(trimmed))) {
310
311
  return {
311
312
  allowed: true,
312
313
  risk: "moderate",
313
- reason: policy.isSandbox ? "Workspace mutation command allowed inside the sandbox." : "Workspace mutation command requires confirmation outside sandbox mode."
314
+ reason: skipConfirmation ? "Command allowed by permission rules engine." : policy.isSandbox ? "Workspace mutation command allowed inside the sandbox." : "Workspace mutation command requires confirmation outside sandbox mode."
314
315
  };
315
316
  }
316
- if (ruleDecision === "allow") {
317
+ if (skipConfirmation) {
317
318
  return { allowed: true, risk: "safe", reason: "Command allowed by permission rules engine." };
318
319
  }
319
320
  return { allowed: true, risk: "safe" };
@@ -327,7 +328,7 @@ var init_sandbox_policy = __esm({
327
328
  BLOCKED_COMMAND_PATTERNS = [
328
329
  { pattern: /\bsudo\b/, reason: "Privilege escalation is not allowed." },
329
330
  { pattern: /\bsu\b\s/, reason: "User switching is not allowed." },
330
- { pattern: /\brm\s+-rf\s+\/\b/, reason: "Deleting root filesystem is blocked." },
331
+ { pattern: /\brm\s+-rf\s+\/(?:\s*(?:$|[;&|]))/, reason: "Deleting root filesystem is blocked." },
331
332
  { pattern: /\bcurl\b.*\|\s*(bash|sh|zsh)/i, reason: "Pipe-to-shell execution is blocked." },
332
333
  { pattern: /\bwget\b.*\|\s*(bash|sh|zsh)/i, reason: "Pipe-to-shell execution is blocked." },
333
334
  { pattern: /\beval\b\s*\(/, reason: "Eval execution is blocked." },
@@ -2367,7 +2368,7 @@ var getSlashCommands = () => [
2367
2368
  },
2368
2369
  {
2369
2370
  name: "/review",
2370
- description: "review coordinator \u2014 spawn specialized QA reviewers in parallel (security, logic, perf, quality, tests, architecture)",
2371
+ description: "review changes directly or use /review mason for parallel specialist reviewers (slower, deeper)",
2371
2372
  category: "agent"
2372
2373
  },
2373
2374
  {
@@ -4770,8 +4771,12 @@ var renderAskUserQuestion = ({ args }) => {
4770
4771
  const parsed = parseArgs(args);
4771
4772
  const qs = Array.isArray(parsed.questions) ? parsed.questions : [];
4772
4773
  const q0 = qs[0];
4773
- const qtext = typeof q0?.question === "string" ? truncate2(q0.question, 100) : "(question)";
4774
- return /* @__PURE__ */ jsx8(Box8, { flexDirection: "column", children: /* @__PURE__ */ jsx8(Text8, { dimColor: true, wrap: "wrap", children: qtext }) });
4774
+ const options = Array.isArray(q0?.options) ? q0.options.length : 0;
4775
+ return /* @__PURE__ */ jsx8(Box8, { flexDirection: "column", children: /* @__PURE__ */ jsxs8(Text8, { dimColor: true, wrap: "wrap", children: [
4776
+ "Awaiting user answer",
4777
+ qs.length > 0 ? ` \xB7 ${qs.length} question${qs.length === 1 ? "" : "s"}` : "",
4778
+ options > 0 ? ` \xB7 ${options} option${options === 1 ? "" : "s"}` : ""
4779
+ ] }) });
4775
4780
  };
4776
4781
  var renderPlanMode = ({ args }) => {
4777
4782
  const parsed = parseArgs(args);
@@ -6980,9 +6985,106 @@ async function readArtifact(args) {
6980
6985
  import https from "https";
6981
6986
  import http from "http";
6982
6987
  var DEFAULT_SOURCES = ["reddit", "github", "stackoverflow"];
6983
- var MAX_RESULTS_DEFAULT = 5;
6988
+ var MAX_RESULTS_DEFAULT = 10;
6989
+ var MAX_USES_DEFAULT = 8;
6984
6990
  var REQUEST_TIMEOUT = 15e3;
6985
6991
  var MAX_CONTENT_LENGTH = 4e3;
6992
+ function validateInput(args) {
6993
+ if (!args.query || typeof args.query !== "string") {
6994
+ return { valid: false, error: "query is required and must be a string" };
6995
+ }
6996
+ if (args.query.trim().length < 2) {
6997
+ return { valid: false, error: "query must be at least 2 characters long" };
6998
+ }
6999
+ if (args.query.length > 500) {
7000
+ return { valid: false, error: "query must be less than 500 characters" };
7001
+ }
7002
+ if (args.allowed_domains && args.blocked_domains && args.allowed_domains.length > 0 && args.blocked_domains.length > 0) {
7003
+ return {
7004
+ valid: false,
7005
+ error: "Cannot specify both allowed_domains and blocked_domains in the same request"
7006
+ };
7007
+ }
7008
+ if (args.sources && args.sources.length > 0) {
7009
+ const validSources = ["reddit", "github", "stackoverflow", "x"];
7010
+ for (const source of args.sources) {
7011
+ if (!validSources.includes(source)) {
7012
+ return {
7013
+ valid: false,
7014
+ error: `Invalid source: ${source}. Valid sources are: ${validSources.join(", ")}`
7015
+ };
7016
+ }
7017
+ }
7018
+ }
7019
+ if (args.max_results !== void 0) {
7020
+ if (typeof args.max_results !== "number" || args.max_results < 1) {
7021
+ return { valid: false, error: "max_results must be a positive integer" };
7022
+ }
7023
+ if (args.max_results > 50) {
7024
+ return { valid: false, error: "max_results cannot exceed 50" };
7025
+ }
7026
+ }
7027
+ if (args.max_uses !== void 0) {
7028
+ if (typeof args.max_uses !== "number" || args.max_uses < 1) {
7029
+ return { valid: false, error: "max_uses must be a positive integer" };
7030
+ }
7031
+ if (args.max_uses > 20) {
7032
+ return { valid: false, error: "max_uses cannot exceed 20" };
7033
+ }
7034
+ }
7035
+ const domainRegex = /^[a-zA-Z0-9][a-zA-Z0-9.-]*\.[a-zA-Z]{2,}$/;
7036
+ if (args.allowed_domains) {
7037
+ for (const domain of args.allowed_domains) {
7038
+ if (!domainRegex.test(domain)) {
7039
+ return { valid: false, error: `Invalid domain format: ${domain}` };
7040
+ }
7041
+ }
7042
+ }
7043
+ if (args.blocked_domains) {
7044
+ for (const domain of args.blocked_domains) {
7045
+ if (!domainRegex.test(domain)) {
7046
+ return { valid: false, error: `Invalid domain format: ${domain}` };
7047
+ }
7048
+ }
7049
+ }
7050
+ return { valid: true };
7051
+ }
7052
+ function extractDomain(url) {
7053
+ try {
7054
+ const urlObj = new URL(url);
7055
+ const hostname = urlObj.hostname.toLowerCase();
7056
+ const parts = hostname.split(".");
7057
+ if (parts.length >= 2) {
7058
+ return parts.slice(-2).join(".");
7059
+ }
7060
+ return hostname;
7061
+ } catch {
7062
+ return null;
7063
+ }
7064
+ }
7065
+ function passesDomainFilter(url, allowedDomains, blockedDomains) {
7066
+ const domain = extractDomain(url);
7067
+ if (!domain) {
7068
+ return { passes: false, reason: "Could not extract domain from URL" };
7069
+ }
7070
+ if (allowedDomains && allowedDomains.length > 0) {
7071
+ const isAllowed = allowedDomains.some(
7072
+ (allowed) => domain === allowed.toLowerCase() || domain.endsWith("." + allowed.toLowerCase())
7073
+ );
7074
+ if (!isAllowed) {
7075
+ return { passes: false, reason: `Domain ${domain} not in allowed list` };
7076
+ }
7077
+ }
7078
+ if (blockedDomains && blockedDomains.length > 0) {
7079
+ const isBlocked = blockedDomains.some(
7080
+ (blocked) => domain === blocked.toLowerCase() || domain.endsWith("." + blocked.toLowerCase())
7081
+ );
7082
+ if (isBlocked) {
7083
+ return { passes: false, reason: `Domain ${domain} is in blocked list` };
7084
+ }
7085
+ }
7086
+ return { passes: true };
7087
+ }
6986
7088
  function httpGet(url, customHeaders) {
6987
7089
  return new Promise((resolve2, reject) => {
6988
7090
  const protocol = url.startsWith("https") ? https : http;
@@ -7028,17 +7130,29 @@ function cleanContent(text, maxLength = MAX_CONTENT_LENGTH) {
7028
7130
  }
7029
7131
  return cleaned;
7030
7132
  }
7031
- async function searchReddit(query, limit) {
7133
+ async function searchReddit(query, limit, allowedDomains, blockedDomains) {
7032
7134
  const results = [];
7135
+ const warnings = [];
7033
7136
  try {
7034
7137
  const subreddits = "programming+webdev+javascript+typescript+python+node+reactjs+learnprogramming+rust+golang+devops";
7035
7138
  const encodedQuery = encodeURIComponent(query);
7036
- const url = `https://www.reddit.com/r/${subreddits}/search.json?q=${encodedQuery}&sort=relevance&limit=${limit}&restrict_sr=on`;
7139
+ const url = `https://www.reddit.com/r/${subreddits}/search.json?q=${encodedQuery}&sort=relevance&limit=${limit * 2}&restrict_sr=on`;
7140
+ const domainCheck = passesDomainFilter(url, allowedDomains, blockedDomains);
7141
+ if (!domainCheck.passes) {
7142
+ warnings.push(`Reddit search skipped: ${domainCheck.reason}`);
7143
+ return results;
7144
+ }
7037
7145
  const response = await httpGet(url);
7038
7146
  const data = JSON.parse(response);
7039
7147
  if (data.data?.children) {
7040
- for (const child of data.data.children.slice(0, limit)) {
7148
+ for (const child of data.data.children.slice(0, limit * 2)) {
7149
+ if (results.length >= limit) break;
7041
7150
  const post = child.data;
7151
+ const postUrl = `https://reddit.com${post.permalink}`;
7152
+ const urlCheck = passesDomainFilter(postUrl, allowedDomains, blockedDomains);
7153
+ if (!urlCheck.passes) {
7154
+ continue;
7155
+ }
7042
7156
  let content = `# ${post.title}
7043
7157
 
7044
7158
  `;
@@ -7070,7 +7184,7 @@ ${cleanContent(post.selftext, 2e3)}
7070
7184
  }
7071
7185
  results.push({
7072
7186
  title: post.title || "",
7073
- url: `https://reddit.com${post.permalink}`,
7187
+ url: postUrl,
7074
7188
  source: "reddit",
7075
7189
  content: cleanContent(content),
7076
7190
  score: post.score,
@@ -7084,18 +7198,34 @@ ${cleanContent(post.selftext, 2e3)}
7084
7198
  }
7085
7199
  } catch (error) {
7086
7200
  console.error(`[search_web] Reddit error: ${error.message}`);
7201
+ if (error.message.includes("403") || error.message.includes("429")) {
7202
+ warnings.push("Reddit rate limit encountered - results may be incomplete");
7203
+ }
7087
7204
  }
7088
7205
  return results;
7089
7206
  }
7090
- async function searchGitHub(query, limit) {
7207
+ async function searchGitHub(query, limit, allowedDomains, blockedDomains) {
7091
7208
  const results = [];
7209
+ const warnings = [];
7092
7210
  try {
7093
7211
  const encodedQuery = encodeURIComponent(query);
7094
- const url = `https://api.github.com/search/issues?q=${encodedQuery}+is:issue&sort=reactions&order=desc&per_page=${limit}`;
7095
- const response = await httpGet(url);
7212
+ const url = `https://api.github.com/search/issues?q=${encodedQuery}+is:issue&sort=reactions&order=desc&per_page=${limit * 2}`;
7213
+ const domainCheck = passesDomainFilter(url, allowedDomains, blockedDomains);
7214
+ if (!domainCheck.passes) {
7215
+ warnings.push(`GitHub search skipped: ${domainCheck.reason}`);
7216
+ return results;
7217
+ }
7218
+ const response = await httpGet(url, {
7219
+ "Accept": "application/vnd.github+json"
7220
+ });
7096
7221
  const data = JSON.parse(response);
7097
7222
  if (data.items) {
7098
- for (const item of data.items.slice(0, limit)) {
7223
+ for (const item of data.items.slice(0, limit * 2)) {
7224
+ if (results.length >= limit) break;
7225
+ const urlCheck = passesDomainFilter(item.html_url, allowedDomains, blockedDomains);
7226
+ if (!urlCheck.passes) {
7227
+ continue;
7228
+ }
7099
7229
  let content = `# ${item.title}
7100
7230
 
7101
7231
  `;
@@ -7130,18 +7260,32 @@ ${cleanContent(item.body, 2500)}
7130
7260
  }
7131
7261
  } catch (error) {
7132
7262
  console.error(`[search_web] GitHub error: ${error.message}`);
7263
+ if (error.message.includes("403")) {
7264
+ warnings.push("GitHub API rate limit may have been reached");
7265
+ }
7133
7266
  }
7134
7267
  return results;
7135
7268
  }
7136
- async function searchStackOverflow(query, limit) {
7269
+ async function searchStackOverflow(query, limit, allowedDomains, blockedDomains) {
7137
7270
  const results = [];
7271
+ const warnings = [];
7138
7272
  try {
7139
7273
  const encodedQuery = encodeURIComponent(query);
7140
- const url = `https://api.stackexchange.com/2.3/search/advanced?order=desc&sort=relevance&q=${encodedQuery}&site=stackoverflow&pagesize=${limit}&filter=withbody`;
7274
+ const url = `https://api.stackexchange.com/2.3/search/advanced?order=desc&sort=relevance&q=${encodedQuery}&site=stackoverflow&pagesize=${limit * 2}&filter=withbody`;
7275
+ const domainCheck = passesDomainFilter(url, allowedDomains, blockedDomains);
7276
+ if (!domainCheck.passes) {
7277
+ warnings.push(`StackOverflow search skipped: ${domainCheck.reason}`);
7278
+ return results;
7279
+ }
7141
7280
  const response = await httpGet(url);
7142
7281
  const data = JSON.parse(response);
7143
7282
  if (data.items) {
7144
- for (const item of data.items.slice(0, limit)) {
7283
+ for (const item of data.items.slice(0, limit * 2)) {
7284
+ if (results.length >= limit) break;
7285
+ const urlCheck = passesDomainFilter(item.link, allowedDomains, blockedDomains);
7286
+ if (!urlCheck.passes) {
7287
+ continue;
7288
+ }
7145
7289
  let content = `# ${item.title}
7146
7290
 
7147
7291
  `;
@@ -7195,64 +7339,96 @@ ${cleanContent(cleanAnswer, 2e3)}
7195
7339
  }
7196
7340
  return results;
7197
7341
  }
7198
- async function searchWeb(args) {
7342
+ async function searchX(query, limit, allowedDomains, blockedDomains) {
7343
+ const results = [];
7344
+ const warnings = [];
7345
+ warnings.push("X (Twitter) search is not available - API requires authentication");
7199
7346
  try {
7200
- const {
7201
- query,
7202
- sources = DEFAULT_SOURCES,
7203
- max_results = MAX_RESULTS_DEFAULT
7204
- } = args;
7205
- if (!query || typeof query !== "string") {
7206
- return {
7207
- success: false,
7208
- query: query || "",
7209
- results: [],
7210
- sources_searched: [],
7211
- total_results: 0,
7212
- error: "query is required and must be a string"
7213
- };
7214
- }
7215
- const allResults = [];
7216
- const sourcesSearched = [];
7217
- const resultsPerSource = Math.ceil(max_results / sources.length);
7218
- const searches = [];
7219
- for (const source of sources) {
7220
- sourcesSearched.push(source);
7221
- switch (source) {
7222
- case "reddit":
7223
- searches.push(searchReddit(query, resultsPerSource));
7224
- break;
7225
- case "github":
7226
- searches.push(searchGitHub(query, resultsPerSource));
7227
- break;
7228
- case "stackoverflow":
7229
- searches.push(searchStackOverflow(query, resultsPerSource));
7230
- break;
7231
- }
7232
- }
7233
- const searchResults = await Promise.all(searches);
7234
- for (const results of searchResults) {
7235
- allResults.push(...results);
7347
+ const encodedQuery = encodeURIComponent(`${query} site:twitter.com OR site:x.com`);
7348
+ const url = `https://www.google.com/search?q=${encodedQuery}&num=${limit}`;
7349
+ const domainCheck = passesDomainFilter(url, allowedDomains, blockedDomains);
7350
+ if (!domainCheck.passes) {
7351
+ warnings.push(`X search skipped: ${domainCheck.reason}`);
7352
+ return results;
7236
7353
  }
7237
- allResults.sort((a, b) => (b.score || 0) - (a.score || 0));
7238
- const limitedResults = allResults.slice(0, max_results);
7239
- return {
7240
- success: true,
7241
- query,
7242
- results: limitedResults,
7243
- sources_searched: sourcesSearched,
7244
- total_results: limitedResults.length
7245
- };
7246
7354
  } catch (error) {
7355
+ console.error(`[search_web] X error: ${error.message}`);
7356
+ }
7357
+ return results;
7358
+ }
7359
+ async function searchWeb(args) {
7360
+ const startTime = performance.now();
7361
+ const warnings = [];
7362
+ let searchesPerformed = 0;
7363
+ const validation = validateInput(args);
7364
+ if (!validation.valid) {
7247
7365
  return {
7248
7366
  success: false,
7249
7367
  query: args.query || "",
7250
7368
  results: [],
7251
7369
  sources_searched: [],
7252
7370
  total_results: 0,
7253
- error: `Unexpected error: ${error.message}`
7371
+ duration_seconds: 0,
7372
+ searches_performed: 0,
7373
+ error: validation.error
7254
7374
  };
7255
7375
  }
7376
+ const {
7377
+ query,
7378
+ sources = DEFAULT_SOURCES,
7379
+ max_results = MAX_RESULTS_DEFAULT,
7380
+ allowed_domains,
7381
+ blocked_domains,
7382
+ max_uses = MAX_USES_DEFAULT
7383
+ } = args;
7384
+ if (sources.length > max_uses) {
7385
+ warnings.push(`Requested ${sources.length} sources but max_uses is ${max_uses} - limiting to ${max_uses} sources`);
7386
+ }
7387
+ const allResults = [];
7388
+ const sourcesSearched = [];
7389
+ const limitedSources = sources.slice(0, max_uses);
7390
+ const resultsPerSource = Math.ceil(max_results / limitedSources.length);
7391
+ const searches = [];
7392
+ for (const source of limitedSources) {
7393
+ sourcesSearched.push(source);
7394
+ switch (source) {
7395
+ case "reddit":
7396
+ searches.push(searchReddit(query, resultsPerSource, allowed_domains, blocked_domains));
7397
+ break;
7398
+ case "github":
7399
+ searches.push(searchGitHub(query, resultsPerSource, allowed_domains, blocked_domains));
7400
+ break;
7401
+ case "stackoverflow":
7402
+ searches.push(searchStackOverflow(query, resultsPerSource, allowed_domains, blocked_domains));
7403
+ break;
7404
+ case "x":
7405
+ searches.push(searchX(query, resultsPerSource, allowed_domains, blocked_domains));
7406
+ break;
7407
+ }
7408
+ }
7409
+ const searchResults = await Promise.all(searches);
7410
+ searchesPerformed = searchResults.length;
7411
+ for (const results of searchResults) {
7412
+ allResults.push(...results);
7413
+ }
7414
+ allResults.sort((a, b) => (b.score || 0) - (a.score || 0));
7415
+ const limitedResults = allResults.slice(0, max_results);
7416
+ const endTime = performance.now();
7417
+ const durationSeconds = (endTime - startTime) / 1e3;
7418
+ if (limitedResults.length === 0) {
7419
+ warnings.push("No results found - try adjusting your query or domain filters");
7420
+ }
7421
+ return {
7422
+ success: true,
7423
+ query,
7424
+ results: limitedResults,
7425
+ sources_searched: sourcesSearched,
7426
+ total_results: limitedResults.length,
7427
+ duration_seconds: Math.round(durationSeconds * 100) / 100,
7428
+ // 2 casas decimais
7429
+ searches_performed: searchesPerformed,
7430
+ warnings: warnings.length > 0 ? warnings : void 0
7431
+ };
7256
7432
  }
7257
7433
 
7258
7434
  // src/app/agent/tools/natives/load_skill.ts
@@ -13036,7 +13212,7 @@ You are the **BluMa Coordinator** \u2014 a **Product Owner + Engineering Manager
13036
13212
 
13037
13213
  ## 0. Core Philosophy: Team > Solo
13038
13214
 
13039
- **One AI is good. A coordinated team of 3-7 AIs is exponentially better.**
13215
+ **One AI is good. A coordinated team of 3-7 AIs can be better when the task truly benefits from delegation.**
13040
13216
 
13041
13217
  Think of yourself as a **rigorous PO** who:
13042
13218
  - Receives a request from the user (the "client")
@@ -13044,7 +13220,7 @@ Think of yourself as a **rigorous PO** who:
13044
13220
  - Assigns each task to the right specialist worker
13045
13221
  - Coordinates their work in parallel
13046
13222
  - Verifies quality before delivering to the client
13047
- - **Always prefers team execution over solo work** for anything non-trivial
13223
+ - **Prefer team execution** when the task is non-trivial, parallelizable, risky, or needs independent verification
13048
13224
 
13049
13225
  **Why this matters:**
13050
13226
  - **Quality**: Each worker focuses deeply on one aspect \u2192 fewer mistakes
@@ -13054,14 +13230,15 @@ Think of yourself as a **rigorous PO** who:
13054
13230
  - **CEO appreciation**: Systematic, professional approach that scales
13055
13231
 
13056
13232
  **Default behavior**: When a task arrives, your first instinct should be:
13057
- > "How can I break this into parallel worker tasks?"
13233
+ > "Can I answer or handle this directly?"
13058
13234
 
13059
- Not: "How do I do this myself?"
13235
+ Only if the answer is no, ask:
13236
+ > "How can I break this into parallel worker tasks?"
13060
13237
 
13061
13238
  ## 1. Your Role
13062
13239
 
13063
13240
  You do **NOT execute tasks directly** (except trivial reads). Your job is to:
13064
- - **Orchestrate workers** to research, implement, and verify changes
13241
+ - **Orchestrate workers** to research, implement, and verify changes when that materially improves speed, quality, or confidence
13065
13242
  - **Synthesize results** and communicate with the user
13066
13243
  - **Answer questions directly** when possible \u2014 don't delegate work you can handle without tools
13067
13244
  - **Read-only tools** (\`read_file_lines\`, \`grep\`, etc.) are fine for **light** coordinator checks (e.g. verify a path before writing a worker spec); heavy exploration belongs in workers
@@ -14233,7 +14410,7 @@ Use **both** API **reasoning** (when available) **and** the \`message\` tool. Re
14233
14410
  - Never claim success without tool output that proves it.
14234
14411
  - **Stay audible:** Your **default** in multi-step work is to call \`message\` with \`message_type: "info"\` **early and often** \u2014 not optional polish. **Bias toward sending \`info\`** after discoveries, failures, and before long tool chains; **several \`info\` calls per turn** is normal and expected. Do **not** hide behind tools or reasoning only; \`info\` is how the user follows along.
14235
14412
  - **Ask when uncertain:** Use \`ask_user_question\` when you encounter ambiguity, need clarification, or face multiple valid approaches. Do not assume \u2014 ask the user to make decisions about their preferences, requirements, or implementation choices. This tool is your primary mechanism for resolving uncertainty.
14236
- - **Team-first mindset:** For any non-trivial task, prefer spawning parallel workers over doing it yourself. One AI is good; a coordinated team of 3-7 workers is exponentially better. Break work into research \u2192 implementation \u2192 verification phases, each handled by specialist workers. You are the PO \u2014 orchestrate, synthesize, verify, deliver.
14413
+ - **Worker policy:** Use workers surgically, not by default. Do the work directly when the task is simple, local, or already well-scoped. Spawn workers when the task is genuinely non-trivial, parallelizable, risky, or needs independent verification. Break larger efforts into research \u2192 implementation \u2192 verification phases when that creates real value. You are the PO \u2014 orchestrate when it helps, synthesize, verify, deliver.
14237
14414
  - **Engineer mindset \u2014 question anomalies:** When something seems wrong (memory loss, unexpected behavior, aggressive defaults), **investigate deeply**. Do not accept "it's working as designed". Trace the code, find the root cause, and **have courage to revert** if a feature breaks core functionality. Protect the session, memory, and stability above all.
14238
14415
  - **Courage to reverse:** If you discover a path is wrong (e.g., a "feature" that destroys context, a default that's too aggressive), **stop immediately**, explain why it's broken, and revert/remove it. Better to undo a bad change than to let it cause harm. This is what separates a **thinking engineer** from a **blind executor**.
14239
14416
  - Large efforts: \`todo\`; parallel subtasks: \`spawn_agent\` with a clear scope + \`wait_agent\` / \`list_agents\`.
@@ -14567,6 +14744,46 @@ Next steps: ${anchor.nextSteps}`;
14567
14744
  }
14568
14745
 
14569
14746
  // src/app/agent/core/context-api/context_manager.ts
14747
+ function isValidJsonArguments(value) {
14748
+ if (typeof value !== "string") return false;
14749
+ try {
14750
+ JSON.parse(value);
14751
+ return true;
14752
+ } catch {
14753
+ return false;
14754
+ }
14755
+ }
14756
+ function sanitizeConversationForProvider(conversationHistory) {
14757
+ const cleaned = [];
14758
+ const issues = [];
14759
+ let droppingCorruptTurn = false;
14760
+ for (let index = 0; index < conversationHistory.length; index += 1) {
14761
+ const msg = conversationHistory[index];
14762
+ if (droppingCorruptTurn) {
14763
+ if (msg?.role === "assistant") {
14764
+ continue;
14765
+ }
14766
+ droppingCorruptTurn = false;
14767
+ }
14768
+ const toolCalls = Array.isArray(msg?.tool_calls) ? msg.tool_calls : null;
14769
+ if (msg?.role === "assistant" && toolCalls && toolCalls.length > 0) {
14770
+ const invalidCalls = toolCalls.filter(
14771
+ (call) => !isValidJsonArguments(call?.function?.arguments)
14772
+ );
14773
+ if (invalidCalls.length > 0) {
14774
+ issues.push({
14775
+ index,
14776
+ reason: "assistant tool_calls had invalid JSON arguments",
14777
+ toolNames: invalidCalls.map((call) => String(call?.function?.name ?? "unknown"))
14778
+ });
14779
+ droppingCorruptTurn = true;
14780
+ continue;
14781
+ }
14782
+ }
14783
+ cleaned.push(conversationHistory[index]);
14784
+ }
14785
+ return { messages: cleaned, issues };
14786
+ }
14570
14787
  function partitionConversationIntoTurnSlices(conversationHistory) {
14571
14788
  const turns = [];
14572
14789
  let current = [];
@@ -14602,13 +14819,15 @@ async function createApiContextWindow(fullHistory, currentAnchor, compressedTurn
14602
14819
  const tokenBudget = options?.tokenBudget ?? CONTEXT_TOKEN_BUDGET;
14603
14820
  const compressThreshold = options?.compressThreshold ?? COMPRESS_THRESHOLD;
14604
14821
  const keepRecentTurns = options?.keepRecentTurns ?? KEEP_RECENT_TURNS;
14822
+ const sanitized = sanitizeConversationForProvider(fullHistory);
14823
+ const safeHistory = sanitized.messages;
14605
14824
  const systemMessages = [];
14606
14825
  let historyStartIndex = 0;
14607
- while (historyStartIndex < fullHistory.length && fullHistory[historyStartIndex].role === "system") {
14608
- systemMessages.push(fullHistory[historyStartIndex]);
14826
+ while (historyStartIndex < safeHistory.length && safeHistory[historyStartIndex].role === "system") {
14827
+ systemMessages.push(safeHistory[historyStartIndex]);
14609
14828
  historyStartIndex++;
14610
14829
  }
14611
- const conversationHistory = fullHistory.slice(historyStartIndex);
14830
+ const conversationHistory = safeHistory.slice(historyStartIndex);
14612
14831
  const turnSlices = partitionConversationIntoTurnSlices(conversationHistory);
14613
14832
  const n = turnSlices.length;
14614
14833
  const recentStart = Math.max(0, n - keepRecentTurns);
@@ -14654,6 +14873,29 @@ async function createApiContextWindow(fullHistory, currentAnchor, compressedTurn
14654
14873
  init_runtime_config();
14655
14874
  import os23 from "os";
14656
14875
  import OpenAI from "openai";
14876
+
14877
+ // src/app/agent/core/llm/streaming_delta.ts
14878
+ function extractStreamingDelta(previous, next) {
14879
+ const prev = String(previous ?? "");
14880
+ const curr = String(next ?? "");
14881
+ if (!curr) return "";
14882
+ if (!prev) return curr;
14883
+ if (curr.startsWith(prev)) {
14884
+ return curr.slice(prev.length);
14885
+ }
14886
+ if (prev.startsWith(curr)) {
14887
+ return "";
14888
+ }
14889
+ const maxOverlap = Math.min(prev.length, curr.length);
14890
+ for (let overlap = maxOverlap; overlap > 0; overlap -= 1) {
14891
+ if (prev.slice(-overlap) === curr.slice(0, overlap)) {
14892
+ return curr.slice(overlap);
14893
+ }
14894
+ }
14895
+ return curr;
14896
+ }
14897
+
14898
+ // src/app/agent/core/llm/llm.ts
14657
14899
  function defaultBlumaUserContextInput(sessionId, userMessage) {
14658
14900
  const msg = String(userMessage || "").slice(0, 300);
14659
14901
  return {
@@ -14845,12 +15087,17 @@ var LLMService = class {
14845
15087
  { headers: this.requestHeaders(params.userContext) }
14846
15088
  );
14847
15089
  const toolCallsAccumulator = /* @__PURE__ */ new Map();
15090
+ let reasoningSnapshot = "";
14848
15091
  for await (const chunk of stream) {
14849
15092
  const choice = chunk.choices[0];
14850
15093
  if (!choice) continue;
14851
15094
  const delta = choice.delta;
14852
15095
  applyDeltaToolCallsToAccumulator(toolCallsAccumulator, delta?.tool_calls);
14853
- const reasoning = delta?.reasoning_content || delta?.reasoning || "";
15096
+ const rawReasoning = delta?.reasoning_content || delta?.reasoning || "";
15097
+ const reasoning = extractStreamingDelta(reasoningSnapshot, rawReasoning);
15098
+ if (reasoning) {
15099
+ reasoningSnapshot += reasoning;
15100
+ }
14854
15101
  const fullToolCalls = choice.finish_reason === "tool_calls" ? Array.from(toolCallsAccumulator.values()) : void 0;
14855
15102
  yield {
14856
15103
  delta: delta?.content || "",
@@ -14873,6 +15120,33 @@ var LLMService = class {
14873
15120
  }
14874
15121
  };
14875
15122
 
15123
+ // src/app/agent/core/llm/llm_errors.ts
15124
+ function formatLlmUiError(error) {
15125
+ const rawMessage = error instanceof Error ? error.message : typeof error === "string" ? error : "Unknown error during LLM request.";
15126
+ const lower = rawMessage.toLowerCase();
15127
+ let message2 = "Ocorreu um erro inesperado ao contactar o modelo.";
15128
+ let hint = "Tente novamente. Se continuar, verifique a liga\xE7\xE3o ao FactorRouter.";
15129
+ if (lower.includes("timeout") || lower.includes("etimedout") || lower.includes("upstream_timeout")) {
15130
+ message2 = "O BluMa demorou demasiado a responder.";
15131
+ hint = "Aumente o timeout do pedido ou tente novamente.";
15132
+ } else if (lower.includes("connection") || lower.includes("econnrefused") || lower.includes("ehostunreach") || lower.includes("enotfound")) {
15133
+ message2 = "N\xE3o foi poss\xEDvel conectar ao servi\xE7o do modelo.";
15134
+ hint = "Verifique a rede, o FactorRouter_URL e o estado do gateway.";
15135
+ } else if (lower.includes("401") || lower.includes("403") || lower.includes("unauthorized") || lower.includes("forbidden")) {
15136
+ message2 = "Falha de autentica\xE7\xE3o/autoriza\xE7\xE3o ao contactar o modelo.";
15137
+ hint = "Verifique o FactorRouter_KEY e as permiss\xF5es da conta.";
15138
+ } else if (lower.includes("api")) {
15139
+ message2 = "Erro de comunica\xE7\xE3o com a API do modelo.";
15140
+ hint = "Verifique credenciais e o estado do servi\xE7o upstream.";
15141
+ }
15142
+ return {
15143
+ message: message2,
15144
+ details: "See server logs for technical details.",
15145
+ hint,
15146
+ rawMessage
15147
+ };
15148
+ }
15149
+
14876
15150
  // src/app/agent/core/llm/tool_call_normalizer.ts
14877
15151
  import { randomUUID } from "crypto";
14878
15152
  var ToolCallNormalizer = class {
@@ -15028,7 +15302,15 @@ var ToolCallNormalizer = class {
15028
15302
  * Valida se um tool call normalizado é válido
15029
15303
  */
15030
15304
  static isValidToolCall(call) {
15031
- return !!(call.id && call.type === "function" && call.function?.name && typeof call.function.arguments === "string");
15305
+ if (!(call.id && call.type === "function" && call.function?.name && typeof call.function.arguments === "string")) {
15306
+ return false;
15307
+ }
15308
+ try {
15309
+ JSON.parse(call.function.arguments);
15310
+ return true;
15311
+ } catch {
15312
+ return false;
15313
+ }
15032
15314
  }
15033
15315
  };
15034
15316
 
@@ -15312,7 +15594,8 @@ function buildTurnStartBackendMessage(params) {
15312
15594
  }
15313
15595
 
15314
15596
  // src/app/agent/bluma/core/bluma.ts
15315
- var BluMaAgent = class {
15597
+ var BluMaAgent = class _BluMaAgent {
15598
+ static MAX_INVALID_TOOL_CALL_RETRIES = 3;
15316
15599
  llm;
15317
15600
  sessionId;
15318
15601
  sessionFile = "";
@@ -15331,8 +15614,10 @@ var BluMaAgent = class {
15331
15614
  factorRouterTurnClosed = false;
15332
15615
  /** Passos seguidos sem tool_calls nem texto visível (só raciocínio) — evita loop lento no mesmo turno. */
15333
15616
  emptyAssistantReplySteps = 0;
15334
- /** Passos seguidos com texto do assistente sem tool_calls (violação de protocolo) — evita loop até timeout do job. */
15335
- directTextProtocolSteps = 0;
15617
+ /** Reintentos consecutivos por tool call inválido. */
15618
+ invalidToolCallRetrySteps = 0;
15619
+ /** Deduplicação de reasoning chunks no streaming — evita repetição. */
15620
+ lastReasoningChunkRef = "";
15336
15621
  constructor(sessionId, eventBus, llm, mcpClient, feedbackSystem) {
15337
15622
  this.sessionId = sessionId;
15338
15623
  this.eventBus = eventBus;
@@ -15375,6 +15660,33 @@ var BluMaAgent = class {
15375
15660
  if (!this.sessionFile) return;
15376
15661
  void saveSessionHistory(this.sessionFile, this.history, this.getMemorySnapshot());
15377
15662
  }
15663
+ async handleInvalidToolCallRetry(message2) {
15664
+ this.invalidToolCallRetrySteps += 1;
15665
+ if (this.history[this.history.length - 1] === message2) {
15666
+ this.history.pop();
15667
+ }
15668
+ if (this.invalidToolCallRetrySteps >= _BluMaAgent.MAX_INVALID_TOOL_CALL_RETRIES) {
15669
+ this.eventBus.emit("backend_message", {
15670
+ type: "error",
15671
+ message: "The model kept returning invalid tool calls. Closing the turn to avoid a retry loop."
15672
+ });
15673
+ this.eventBus.emit("backend_message", {
15674
+ type: "log",
15675
+ message: "Invalid tool call retry limit reached",
15676
+ payload: String(this.invalidToolCallRetrySteps)
15677
+ });
15678
+ await this.notifyFactorTurnEndIfNeeded("invalid_tool_calls_exhausted");
15679
+ this.eventBus.emit("backend_message", { type: "done", status: "failed" });
15680
+ this.invalidToolCallRetrySteps = 0;
15681
+ return;
15682
+ }
15683
+ this.history.push({
15684
+ role: "system",
15685
+ content: "Previous assistant tool_calls were invalid. Retry with valid JSON arguments only, or answer without tools."
15686
+ });
15687
+ this.persistSession();
15688
+ await this._continueConversation();
15689
+ }
15378
15690
  async initialize() {
15379
15691
  await this.mcpClient.nativeToolInvoker.initialize();
15380
15692
  await this.mcpClient.initialize();
@@ -15483,7 +15795,7 @@ var BluMaAgent = class {
15483
15795
  const userContent = buildUserMessageContent(inputText, process.cwd());
15484
15796
  this.history.push({ role: "user", content: userContent });
15485
15797
  this.emptyAssistantReplySteps = 0;
15486
- this.directTextProtocolSteps = 0;
15798
+ this.invalidToolCallRetrySteps = 0;
15487
15799
  this.eventBus.emit(
15488
15800
  "backend_message",
15489
15801
  buildTurnStartBackendMessage({
@@ -15529,15 +15841,18 @@ var BluMaAgent = class {
15529
15841
  }
15530
15842
  } catch (parseError) {
15531
15843
  this.eventBus.emit("backend_message", {
15532
- type: "error",
15533
- message: `Failed to parse tool arguments: ${parseError.message}`
15844
+ type: "info",
15845
+ message: "O BluMa encontrou um erro ao processar. A tentar recuperar a sess\xE3o..."
15534
15846
  });
15535
15847
  toolResultContent = JSON.stringify({
15536
- error: "Invalid tool arguments format",
15537
- details: `The arguments could not be parsed as JSON: ${parseError.message}`,
15538
- raw_arguments: toolCall.function.arguments
15848
+ error: "Tool arguments could not be parsed",
15849
+ recovery: "Session recovered automatically"
15539
15850
  });
15540
15851
  this.history.push({ role: "tool", tool_call_id: toolCall.id, content: toolResultContent });
15852
+ this.history.push({
15853
+ role: "system",
15854
+ content: "The previous tool call had invalid JSON arguments. Please retry with properly formatted JSON arguments."
15855
+ });
15541
15856
  this.persistSession();
15542
15857
  return true;
15543
15858
  }
@@ -15697,13 +16012,12 @@ var BluMaAgent = class {
15697
16012
  parsed.push({ toolCall, toolName: toolCall.function.name, toolArgs });
15698
16013
  } catch (parseError) {
15699
16014
  const toolResultContent = JSON.stringify({
15700
- error: "Invalid tool arguments format",
15701
- details: String(parseError?.message || parseError),
15702
- raw_arguments: toolCall.function.arguments
16015
+ error: "Tool arguments could not be parsed",
16016
+ recovery: "Session recovered automatically"
15703
16017
  });
15704
16018
  this.eventBus.emit("backend_message", {
15705
- type: "error",
15706
- message: `Failed to parse tool arguments: ${parseError.message}`
16019
+ type: "info",
16020
+ message: "O BluMa encontrou um erro ao processar. A tentar recuperar a sess\xE3o..."
15707
16021
  });
15708
16022
  this.history.push({ role: "tool", tool_call_id: toolCall.id, content: toolResultContent });
15709
16023
  this.persistSession();
@@ -15918,6 +16232,11 @@ ${editData.error.display}`;
15918
16232
  message: `Received follow-up from coordinator (priority: ${mailboxUpdate.followUp.priority})`
15919
16233
  });
15920
16234
  }
16235
+ const sanitized = sanitizeConversationForProvider(this.history);
16236
+ if (sanitized.issues.length > 0) {
16237
+ this.history = sanitized.messages;
16238
+ this.persistSession();
16239
+ }
15921
16240
  const { messages: contextWindow, newAnchor, newCompressedTurnSliceCount } = await createApiContextWindow(
15922
16241
  this.history,
15923
16242
  this.sessionAnchor,
@@ -15935,8 +16254,18 @@ ${editData.error.display}`;
15935
16254
  await this._handleNonStreamingResponse(contextWindow);
15936
16255
  }
15937
16256
  } catch (error) {
15938
- const errorMessage = error instanceof Error ? error.message : "An unknown API error occurred.";
15939
- this.eventBus.emit("backend_message", { type: "error", message: errorMessage });
16257
+ const uiError = formatLlmUiError(error);
16258
+ this.eventBus.emit("backend_message", {
16259
+ type: "error",
16260
+ message: uiError.message,
16261
+ details: uiError.details,
16262
+ hint: uiError.hint
16263
+ });
16264
+ this.eventBus.emit("backend_message", {
16265
+ type: "log",
16266
+ message: "LLM request failed",
16267
+ payload: uiError.rawMessage
16268
+ });
15940
16269
  await this.notifyFactorTurnEndIfNeeded("llm_error");
15941
16270
  this.eventBus.emit("backend_message", { type: "done", status: "failed" });
15942
16271
  } finally {
@@ -15956,13 +16285,12 @@ ${editData.error.display}`;
15956
16285
  });
15957
16286
  } else if (this.emptyAssistantReplySteps >= 6) {
15958
16287
  this.eventBus.emit("backend_message", {
15959
- type: "error",
15960
- message: "The assistant produced no tool calls or visible text after several steps. Try again or use /effort low."
16288
+ type: "info",
16289
+ message: "O BluMa est\xE1 a ter dificuldade em processar. Tente novamente ou use /effort low para respostas mais r\xE1pidas."
15961
16290
  });
15962
16291
  await this.notifyFactorTurnEndIfNeeded("empty_reply_exhausted");
15963
16292
  this.eventBus.emit("backend_message", { type: "done", status: "failed" });
15964
16293
  this.emptyAssistantReplySteps = 0;
15965
- this.directTextProtocolSteps = 0;
15966
16294
  return;
15967
16295
  }
15968
16296
  await this._continueConversation();
@@ -15994,6 +16322,7 @@ ${editData.error.display}`;
15994
16322
  parallel_tool_calls: true,
15995
16323
  userContext: this.getLlmUserContext()
15996
16324
  });
16325
+ this.lastReasoningChunkRef = "";
15997
16326
  for await (const chunk of stream) {
15998
16327
  if (this.isInterrupted) {
15999
16328
  this.eventBus.emit("stream_end", {});
@@ -16005,7 +16334,11 @@ ${editData.error.display}`;
16005
16334
  this.eventBus.emit("stream_start", {});
16006
16335
  hasEmittedStart = true;
16007
16336
  }
16008
- this.eventBus.emit("stream_reasoning_chunk", { delta: chunk.reasoning });
16337
+ const reasoningKey = chunk.reasoning.trim().replace(/\s+/g, " ");
16338
+ if (reasoningKey !== this.lastReasoningChunkRef) {
16339
+ this.lastReasoningChunkRef = reasoningKey;
16340
+ this.eventBus.emit("stream_reasoning_chunk", { delta: chunk.reasoning });
16341
+ }
16009
16342
  }
16010
16343
  if (chunk.delta) {
16011
16344
  if (!hasEmittedStart) {
@@ -16044,16 +16377,12 @@ ${editData.error.display}`;
16044
16377
  this.history.push(normalizedMessage);
16045
16378
  if (normalizedMessage.tool_calls && normalizedMessage.tool_calls.length > 0) {
16046
16379
  this.emptyAssistantReplySteps = 0;
16047
- this.directTextProtocolSteps = 0;
16380
+ this.invalidToolCallRetrySteps = 0;
16048
16381
  const validToolCalls = normalizedMessage.tool_calls.filter(
16049
16382
  (call) => ToolCallNormalizer.isValidToolCall(call)
16050
16383
  );
16051
16384
  if (validToolCalls.length === 0) {
16052
- this.eventBus.emit("backend_message", {
16053
- type: "error",
16054
- message: "Model returned invalid tool calls. Retrying..."
16055
- });
16056
- await this._continueConversation();
16385
+ await this.handleInvalidToolCallRetry(normalizedMessage);
16057
16386
  return;
16058
16387
  }
16059
16388
  const needsConfirmation = validToolCalls.some(
@@ -16084,28 +16413,10 @@ ${editData.error.display}`;
16084
16413
  }
16085
16414
  } else if (trimmedText) {
16086
16415
  this.emptyAssistantReplySteps = 0;
16087
- this.directTextProtocolSteps += 1;
16088
- const MAX_DIRECT_TEXT_PROTOCOL = 3;
16089
- if (!hasEmittedStart) {
16090
- this.eventBus.emit("backend_message", { type: "assistant_message", content: accumulatedContent });
16091
- }
16092
- if (this.directTextProtocolSteps >= MAX_DIRECT_TEXT_PROTOCOL) {
16093
- this.eventBus.emit("backend_message", {
16094
- type: "error",
16095
- message: 'Agent kept answering with plain assistant text instead of the `message` tool with message_type "result". Turn forcibly closed to avoid job timeout; fix prompts or model routing.'
16096
- });
16097
- await this.notifyFactorTurnEndIfNeeded("protocol_direct_text_exhausted");
16098
- this.emitTurnCompleted();
16099
- this.emptyAssistantReplySteps = 0;
16100
- this.directTextProtocolSteps = 0;
16101
- return;
16102
- }
16103
- const feedback = this.feedbackSystem.generateFeedback({
16104
- event: "protocol_violation_direct_text",
16105
- details: { violationContent: accumulatedContent }
16106
- });
16107
- this.history.push({ role: "system", content: feedback.correction });
16108
- await this._continueConversation();
16416
+ this.eventBus.emit("backend_message", { type: "assistant_message", content: accumulatedContent });
16417
+ await this.notifyFactorTurnEndIfNeeded("assistant_text_without_tool_call");
16418
+ this.emitTurnCompleted();
16419
+ return;
16109
16420
  } else {
16110
16421
  await this.continueAfterEmptyAssistantResponse();
16111
16422
  }
@@ -16135,16 +16446,12 @@ ${editData.error.display}`;
16135
16446
  this.history.push(message2);
16136
16447
  if (message2.tool_calls && message2.tool_calls.length > 0) {
16137
16448
  this.emptyAssistantReplySteps = 0;
16138
- this.directTextProtocolSteps = 0;
16449
+ this.invalidToolCallRetrySteps = 0;
16139
16450
  const validToolCalls = message2.tool_calls.filter(
16140
16451
  (call) => ToolCallNormalizer.isValidToolCall(call)
16141
16452
  );
16142
16453
  if (validToolCalls.length === 0) {
16143
- this.eventBus.emit("backend_message", {
16144
- type: "error",
16145
- message: "Model returned invalid tool calls. Retrying..."
16146
- });
16147
- await this._continueConversation();
16454
+ await this.handleInvalidToolCallRetry(message2);
16148
16455
  return;
16149
16456
  }
16150
16457
  const needsConfirmation = validToolCalls.some(
@@ -16175,27 +16482,11 @@ ${editData.error.display}`;
16175
16482
  }
16176
16483
  } else if (typeof message2.content === "string" && message2.content.trim()) {
16177
16484
  this.emptyAssistantReplySteps = 0;
16178
- this.directTextProtocolSteps += 1;
16179
- const MAX_DIRECT_TEXT_PROTOCOL = 3;
16485
+ this.invalidToolCallRetrySteps = 0;
16180
16486
  this.eventBus.emit("backend_message", { type: "assistant_message", content: message2.content });
16181
- if (this.directTextProtocolSteps >= MAX_DIRECT_TEXT_PROTOCOL) {
16182
- this.eventBus.emit("backend_message", {
16183
- type: "error",
16184
- message: 'Agent kept answering with plain assistant text instead of the `message` tool with message_type "result". Turn forcibly closed to avoid job timeout.'
16185
- });
16186
- await this.notifyFactorTurnEndIfNeeded("protocol_direct_text_exhausted");
16187
- this.emitTurnCompleted();
16188
- this.emptyAssistantReplySteps = 0;
16189
- this.directTextProtocolSteps = 0;
16190
- return;
16191
- }
16192
- const feedback = this.feedbackSystem.generateFeedback({
16193
- event: "protocol_violation_direct_text",
16194
- details: { violationContent: message2.content }
16195
- });
16196
- this.eventBus.emit("backend_message", { type: "protocol_violation", message: feedback.message, content: message2.content });
16197
- this.history.push({ role: "system", content: feedback.correction });
16198
- await this._continueConversation();
16487
+ await this.notifyFactorTurnEndIfNeeded("assistant_text_without_tool_call");
16488
+ this.emitTurnCompleted();
16489
+ return;
16199
16490
  } else {
16200
16491
  await this.continueAfterEmptyAssistantResponse();
16201
16492
  }
@@ -16993,11 +17284,13 @@ var BaseLLMSubAgent = class {
16993
17284
  /** Um turnId por execute(); reutilizado em todo o loop de tools do subagente. */
16994
17285
  subagentTurnContext = null;
16995
17286
  lastActivityTimestamp = Date.now();
17287
+ terminalEventEmitted = false;
16996
17288
  async execute(input, ctx) {
16997
17289
  workerLog.info("Worker started", { id: this.id, pid: process.pid });
16998
17290
  this.emitEvent("worker_heartbeat", { status: "started", timestamp: Date.now(), pid: process.pid, id: this.id });
16999
17291
  this.ctx = ctx;
17000
17292
  this.isInterrupted = false;
17293
+ this.terminalEventEmitted = false;
17001
17294
  this.ctx.eventBus.on("user_interrupt", () => {
17002
17295
  this.isInterrupted = true;
17003
17296
  });
@@ -17060,13 +17353,15 @@ var BaseLLMSubAgent = class {
17060
17353
  this.emitEvent("error", {
17061
17354
  message: `Subagent tool "${message2.tool_calls[0].function.name}" requires confirmation outside sandbox mode.`
17062
17355
  });
17063
- this.emitEvent("done", { status: "blocked_confirmation" });
17356
+ this.emitDoneOnce("blocked_confirmation");
17064
17357
  break;
17065
17358
  }
17066
17359
  await this._handleToolExecution({ type: "user_decision_execute", tool_calls: message2.tool_calls });
17067
- } else if (message2.content) {
17360
+ } else if (typeof message2.content === "string" && message2.content.trim()) {
17068
17361
  this.emitEvent("assistant_message", { content: message2.content });
17069
- this.emitEvent("protocol_violation", { message: "Direct text emission detected from subagent.", content: message2.content });
17362
+ this.emitEvent("info", { message: "SubAgent returned plain text without tool_calls. Closing turn." });
17363
+ this.emitDoneOnce("completed");
17364
+ break;
17070
17365
  } else {
17071
17366
  this.emitEvent("info", { message: "SubAgent is thinking... continuing reasoning cycle." });
17072
17367
  }
@@ -17074,8 +17369,9 @@ var BaseLLMSubAgent = class {
17074
17369
  }
17075
17370
  if (turnCount >= MAX_TURNS) {
17076
17371
  this.emitEvent("info", { message: `Worker reached max turns limit (${MAX_TURNS}).` });
17077
- this.emitEvent("done", { status: "max_turns_reached" });
17372
+ this.emitDoneOnce("max_turns_reached");
17078
17373
  }
17374
+ this.emitDoneOnce("completed");
17079
17375
  return { history: this.history, turns: turnCount, status: this.isInterrupted ? "cancelled" : "completed" };
17080
17376
  };
17081
17377
  const timeoutPromise = new Promise((_, reject) => {
@@ -17088,7 +17384,7 @@ var BaseLLMSubAgent = class {
17088
17384
  if (error.message?.includes("timed out")) {
17089
17385
  workerLog.warn("Worker timed out", { id: this.id, turns: turnCount });
17090
17386
  this.emitEvent("error", { message: error.message });
17091
- this.emitEvent("done", { status: "timeout" });
17387
+ this.emitDoneOnce("timeout");
17092
17388
  } else {
17093
17389
  this.emitEvent("error", { message: error.message });
17094
17390
  }
@@ -17189,9 +17485,11 @@ ${editData.error.display}`;
17189
17485
  if (!lastToolName.includes("agent_end_turn") && !this.isInterrupted) {
17190
17486
  await this._continueConversation();
17191
17487
  }
17192
- } else if (message2.content) {
17488
+ } else if (typeof message2.content === "string" && message2.content.trim()) {
17193
17489
  this.emitEvent("assistant_message", { content: message2.content });
17194
- this.emitEvent("protocol_violation", { message: "Direct text emission detected from subagent.", content: message2.content });
17490
+ this.emitEvent("info", { message: "SubAgent returned plain text without tool_calls. Closing turn." });
17491
+ this.emitEvent("done", { status: "completed" });
17492
+ return;
17195
17493
  } else {
17196
17494
  this.emitEvent("info", { message: "SubAgent is thinking... continuing reasoning cycle." });
17197
17495
  }
@@ -17247,7 +17545,7 @@ ${editData.error.display}`;
17247
17545
  result: toolResultContent
17248
17546
  });
17249
17547
  if (toolName.includes("agent_end_turn")) {
17250
- this.emitEvent("done", { status: "completed" });
17548
+ this.emitDoneOnce("completed");
17251
17549
  }
17252
17550
  } else {
17253
17551
  toolResultContent = "Tool execution was declined.";
@@ -17286,7 +17584,12 @@ ${editData.error.display}`;
17286
17584
  } catch {
17287
17585
  }
17288
17586
  this.isInterrupted = true;
17289
- this.emitEvent("done", { status: "shutdown", reason });
17587
+ this.emitDoneOnce("shutdown", { reason });
17588
+ }
17589
+ emitDoneOnce(status, extra = {}) {
17590
+ if (this.terminalEventEmitted) return;
17591
+ this.terminalEventEmitted = true;
17592
+ this.emitEvent("done", { status, ...extra });
17290
17593
  }
17291
17594
  /**
17292
17595
  * Verifica mailbox por follow-ups do coordinator
@@ -18558,23 +18861,12 @@ var ToolResultDisplayComponent = ({
18558
18861
  if (toolName.includes("ask_user_question")) {
18559
18862
  const success = parsed?.success === true;
18560
18863
  const selectedLabel = typeof parsed?.selected_label === "string" ? parsed.selected_label : "";
18561
- const selectedIndex = typeof parsed?.selected_index === "number" ? parsed.selected_index : null;
18562
- const questionIndex = typeof parsed?.question_index === "number" ? parsed.question_index : 0;
18563
- const qs = Array.isArray(args?.questions) ? args.questions : [];
18564
- const q = qs[questionIndex];
18565
- const questionText = typeof q?.question === "string" ? q.question : "";
18566
18864
  if (success && selectedLabel) {
18567
- return /* @__PURE__ */ jsx13(ResultGutter, { children: /* @__PURE__ */ jsxs13(Box13, { flexDirection: "column", children: [
18568
- /* @__PURE__ */ jsxs13(Text13, { dimColor: true, children: [
18569
- /* @__PURE__ */ jsx13(Text13, { bold: true, children: "Response" }),
18570
- " \xB7 ",
18571
- selectedLabel
18572
- ] }),
18573
- questionText ? /* @__PURE__ */ jsxs13(Text13, { dimColor: true, wrap: "wrap", children: [
18574
- truncate3(questionText, 140),
18575
- selectedIndex !== null ? ` \xB7 option ${selectedIndex + 1}` : ""
18576
- ] }) : null
18577
- ] }) });
18865
+ return /* @__PURE__ */ jsx13(ResultGutter, { children: /* @__PURE__ */ jsx13(Box13, { flexDirection: "column", children: /* @__PURE__ */ jsxs13(Text13, { dimColor: true, children: [
18866
+ /* @__PURE__ */ jsx13(Text13, { bold: true, children: "Response" }),
18867
+ " \xB7 ",
18868
+ selectedLabel
18869
+ ] }) }) });
18578
18870
  }
18579
18871
  if (parsed?.cancelled === true) {
18580
18872
  return /* @__PURE__ */ jsx13(ResultGutter, { children: /* @__PURE__ */ jsxs13(Text13, { dimColor: true, children: [
@@ -20741,183 +21033,137 @@ Report the release version, tag, changelog summary, and verification results whe
20741
21033
  );
20742
21034
  }
20743
21035
  if (cmd === "review") {
20744
- const target = args.join(" ") || "";
21036
+ const normalizedArgs = args.map((a) => a.toLowerCase());
21037
+ const hasMasonPrefix = normalizedArgs[0] === "mason" || normalizedArgs[0] === "with" && normalizedArgs[1] === "mason";
21038
+ const reviewMode = hasMasonPrefix ? "mason" : "direct";
21039
+ const targetArgs = hasMasonPrefix ? normalizedArgs[0] === "mason" ? args.slice(1) : args.slice(2) : args;
21040
+ const target = targetArgs.join(" ") || "";
20745
21041
  const isPR = target && /^\d+$/.test(target);
20746
21042
  (async () => {
20747
21043
  try {
20748
21044
  const reviewTarget = isPR ? `PR #${target}` : target === "local" || target === "local changes" ? "current local changes (git diff HEAD)" : target ? `the file/module: ${target}` : "current local changes (git diff HEAD)";
20749
- await agentRef.current?.processTurn({
20750
- content: `## REVIEW COORDINATOR MODE \u2014 Lead a Team of Senior QA Reviewers
21045
+ const reviewPrompt = reviewMode === "mason" ? `## REVIEW COORDINATOR MODE \u2014 Mason Specialists
20751
21046
 
20752
- You are now the **Review Coordinator** \u2014 a Principal Engineer leading a team of senior, picky QA reviewers. Your job is to orchestrate a **thorough, line-by-line code review** where NOTHING slips through.
21047
+ You are now the **Review Coordinator** for a slower, deeper pass with Mason senior specialists.
20753
21048
 
20754
- **NEVER be afraid to coordinate.** Spawning specialized reviewers is how you catch bugs that a single reviewer would miss.
21049
+ This mode is intentionally heavier:
21050
+ - You may coordinate specialized reviewers in parallel
21051
+ - Each reviewer should focus on one area of risk
21052
+ - This can take longer, but it should surface deeper issues
20755
21053
 
20756
21054
  **Review Target:** ${reviewTarget}
20757
21055
 
20758
21056
  ### COORDINATOR REVIEW WORKFLOW
20759
21057
 
20760
- #### Step 1: Triage (you do this \u2014 quick, ~30s)
21058
+ #### Step 1: Triage
20761
21059
  1. Gather the diff/changes:
20762
21060
  ${isPR ? `- Run \`gh pr view ${target}\` for PR details` : ""}
20763
21061
  ${isPR ? `- Run \`gh pr diff ${target}\` for the full diff` : ""}
20764
21062
  ${!isPR && target !== "local" && target !== "local changes" ? `- Read the file: ${target}` : ""}
20765
21063
  ${target === "local" || target === "local changes" ? `- Run \`git diff HEAD\` for unstaged changes` : ""}
20766
21064
  ${target === "local" || target === "local changes" ? `- Run \`git diff --cached HEAD\` for staged changes` : ""}
20767
- 2. Understand the SCOPE: how many files changed, what areas are affected
21065
+ 2. Identify the risk surface and decide which specialist areas are worth parallelizing
20768
21066
 
20769
- #### Step 2: Spawn 3 Parallel Review Workers
20770
- Launch exactly **3 workers in parallel** \u2014 one for each core area.
21067
+ #### Step 2: Parallel Specialists
21068
+ If the scope justifies it, spawn specialized reviewers in parallel:
21069
+ - Security
21070
+ - Logic & Correctness
21071
+ - Code Quality
20771
21072
 
20772
- **IMPORTANT:** Each worker MUST read EVERY changed file line by line. Do NOT report until you have examined all files. List each file you reviewed in your report.
21073
+ If the scope is small, do not force parallelism. Use judgment.
20773
21074
 
20774
- **Worker 1 \u2014 Security Reviewer:**
20775
- \`\`\`
20776
- spawn_agent({
20777
- task: "SECURITY REVIEW: Thoroughly review ${reviewTarget} for security vulnerabilities.
20778
-
20779
- You are a Senior Security Engineer. Read EVERY changed file line by line. Do NOT report until you have examined all files.
20780
-
20781
- Look for:
20782
- - Injection vulnerabilities (SQL, XSS, command injection, template injection)
20783
- - Authentication/authorization flaws (missing auth checks, privilege escalation)
20784
- - Sensitive data exposure (secrets in logs, PII leaks, hardcoded credentials)
20785
- - Insecure defaults (missing TLS, weak crypto, permissive CORS)
20786
- - Input validation gaps (missing sanitization, type confusion)
20787
- - Dependency vulnerabilities (outdated packages, known CVEs)
20788
- - Path traversal, SSRF, CSRF, race conditions
20789
-
20790
- For EACH issue found:
20791
- - Severity: CRITICAL / HIGH / MEDIUM / LOW
20792
- - File:line number
20793
- - Exact code snippet
20794
- - Why it's vulnerable
20795
- - How to exploit it (brief)
20796
- - Recommended fix
20797
-
20798
- Be PICKY. If something looks suspicious, flag it.
20799
-
20800
- Do NOT modify files. Report only.
20801
-
20802
- At the end of your report, list ALL files you reviewed.",
20803
- title: "Security Review",
20804
- agent_type: "reviewer"
20805
- })
20806
- \`\`\`
21075
+ #### Step 3: Synthesize
21076
+ Wait for all reviewers that you spawned, then synthesize the findings into a single review report.
20807
21077
 
20808
- **Worker 2 \u2014 Logic & Correctness:**
20809
- \`\`\`
20810
- spawn_agent({
20811
- task: "LOGIC REVIEW: Thoroughly review ${reviewTarget} for bugs and logic errors.
20812
-
20813
- You are a Senior QA Engineer who finds bugs for a living. Read EVERY changed file line by line. Do NOT report until you have examined all files.
20814
-
20815
- Look for:
20816
- - Logic errors (wrong conditions, off-by-one, inverted boolean, wrong operator)
20817
- - Null/undefined handling (missing null checks, unsafe property access)
20818
- - State management issues (stale state, missing initialization, race conditions)
20819
- - Async bugs (unawaited promises, missing error handling, promise rejections)
20820
- - Edge cases (empty arrays, zero values, negative numbers, boundary conditions)
20821
- - Wrong assumptions (code assumes X but Y can happen)
20822
- - Dead code (unreachable branches, unused variables, commented-out logic)
20823
- - Error handling gaps (swallowed errors, missing catch blocks, generic catches)
20824
-
20825
- For EACH issue found:
20826
- - Severity: BLOCKER / MAJOR / MINOR
20827
- - File:line number
20828
- - What the code does vs what it SHOULD do
20829
- - How to trigger the bug
20830
- - Recommended fix
20831
-
20832
- Be RELENTLESS. Question every assumption.
20833
-
20834
- Do NOT modify files. Report only.
20835
-
20836
- At the end of your report, list ALL files you reviewed.",
20837
- title: "Logic & Correctness Review",
20838
- agent_type: "reviewer"
20839
- })
20840
- \`\`\`
21078
+ #### Step 4: Produce the Review Report
21079
+ Compile a comprehensive review report:
20841
21080
 
20842
- **Worker 3 \u2014 Code Quality:**
20843
- \`\`\`
20844
- spawn_agent({
20845
- task: "CODE QUALITY REVIEW: Thoroughly review ${reviewTarget} for code quality and convention violations.
21081
+ **REVIEW REPORT for ${reviewTarget}**
20846
21082
 
20847
- You are a Staff Engineer obsessed with clean code. Read EVERY changed file line by line. Do NOT report until you have examined all files.
21083
+ \u{1F534} CRITICAL / BLOCKER (must fix before merge):
21084
+ - [List critical findings]
20848
21085
 
20849
- Look for:
20850
- - Naming issues (misleading names, abbreviations, inconsistent casing)
20851
- - Function length and complexity (too long, too many responsibilities, deep nesting)
20852
- - DRY violations (duplicated logic that should be extracted)
20853
- - SOLID violations (tight coupling, god classes, leaking abstractions)
20854
- - Style inconsistencies (formatting, import order, naming conventions)
20855
- - Missing or wrong comments (no docs for complex logic, outdated comments)
20856
- - Type safety issues (any usage, missing type annotations, wrong types)
20857
- - Error message quality (unhelpful messages, missing context)
20858
- - API design (inconsistent interfaces, breaking changes, missing deprecation)
21086
+ \u{1F7E1} HIGH / MAJOR (should fix):
21087
+ - [List high findings]
20859
21088
 
20860
- For EACH issue found:
20861
- - File:line number
20862
- - What's wrong
20863
- - Suggested improvement with before/after code
21089
+ \u{1F7E2} MEDIUM / MINOR (nice to fix):
21090
+ - [List medium findings]
20864
21091
 
20865
- Be PICKY about readability. Code is read 10x more than written.
21092
+ \u2139\uFE0F OBSERVATIONS (no action needed):
21093
+ - [List observations]
20866
21094
 
20867
- Do NOT modify files. Report only.
21095
+ \u2705 POSITIVE FINDINGS:
21096
+ - [List strong points]
20868
21097
 
20869
- At the end of your report, list ALL files you reviewed.",
20870
- title: "Code Quality Review",
20871
- agent_type: "reviewer"
20872
- })
20873
- \`\`\`
21098
+ **Review Summary:**
21099
+ - Total issues found: X critical, Y high, Z medium
21100
+ - Reviewers used: [list workers or "direct review"]
21101
+ - Recommendation: APPROVE / APPROVE WITH COMMENTS / REQUEST CHANGES
21102
+ - Confidence level: HIGH / MEDIUM / LOW
21103
+
21104
+ ### COORDINATOR RULES
21105
+ - Be selective: do not spawn workers unless the scope justifies it
21106
+ - If workers fail, finish the review yourself
21107
+ - Never rubber-stamp
21108
+ - Never fabricate results
20874
21109
 
20875
- #### Step 3: Wait for Workers + Synthesize
20876
- Wait for ALL 3 workers to complete. Use wait_agent with a large timeout (600000ms).
21110
+ Start coordinating now.` : `## REVIEW MODE \u2014 Direct Senior Review
20877
21111
 
20878
- **If workers fail or sessions disappear:**
20879
- - This can happen with fast-completing workers
20880
- - Simply perform the review yourself by reading the changed files
20881
- - Report: "Workers completed/unavailable \u2014 performing review directly"
20882
- - Do NOT waste time retrying \u2014 just do the review
21112
+ You are a senior engineer performing a direct code review. Do the review yourself using the available tools and your own judgment.
20883
21113
 
20884
- **NEVER write** "the review looks good" \u2014 that's lazy.
20885
- **ALWAYS synthesize**: Group findings by severity, cross-reference between reviewers, identify patterns.
21114
+ **Do not spawn parallel reviewers by default.** Only use extra agents if the scope is genuinely large and you need them.
20886
21115
 
20887
- #### Step 4: Produce the Review Report
20888
- Compile a comprehensive review report:
21116
+ **Review Target:** ${reviewTarget}
21117
+
21118
+ ### REVIEW WORKFLOW
21119
+
21120
+ #### Step 1: Triage
21121
+ 1. Gather the diff/changes:
21122
+ ${isPR ? `- Run \`gh pr view ${target}\` for PR details` : ""}
21123
+ ${isPR ? `- Run \`gh pr diff ${target}\` for the full diff` : ""}
21124
+ ${!isPR && target !== "local" && target !== "local changes" ? `- Read the file: ${target}` : ""}
21125
+ ${target === "local" || target === "local changes" ? `- Run \`git diff HEAD\` for unstaged changes` : ""}
21126
+ ${target === "local" || target === "local changes" ? `- Run \`git diff --cached HEAD\` for staged changes` : ""}
21127
+ 2. Understand the scope and the main risk areas
21128
+
21129
+ #### Step 2: Review Directly
21130
+ Read the changed files carefully yourself. Focus on:
21131
+ - Correctness and regressions
21132
+ - Security and data handling
21133
+ - Tests and edge cases
21134
+ - Clarity and maintainability
21135
+
21136
+ If the diff is large, you may use helpers, but keep the review centered on your own synthesis.
21137
+
21138
+ #### Step 3: Produce the Review Report
21139
+ Compile a concise but rigorous review report:
20889
21140
 
20890
21141
  **REVIEW REPORT for ${reviewTarget}**
20891
21142
 
20892
21143
  \u{1F534} CRITICAL / BLOCKER (must fix before merge):
20893
- - [List all critical findings]
21144
+ - [List critical findings]
20894
21145
 
20895
21146
  \u{1F7E1} HIGH / MAJOR (should fix):
20896
- - [List all high findings]
21147
+ - [List high findings]
20897
21148
 
20898
21149
  \u{1F7E2} MEDIUM / MINOR (nice to fix):
20899
- - [List all medium findings]
21150
+ - [List medium findings]
20900
21151
 
20901
21152
  \u2139\uFE0F OBSERVATIONS (no action needed):
20902
- - [List observations, style notes]
21153
+ - [List observations]
20903
21154
 
20904
- \u2705 POSITIVE FINDINGS (what's good):
20905
- - [List well-written code, good patterns]
21155
+ \u2705 POSITIVE FINDINGS:
21156
+ - [List strong points]
20906
21157
 
20907
21158
  **Review Summary:**
20908
21159
  - Total issues found: X critical, Y high, Z medium
20909
- - Reviewers used: [list workers or "direct review"]
21160
+ - Reviewers used: direct review
20910
21161
  - Recommendation: APPROVE / APPROVE WITH COMMENTS / REQUEST CHANGES
20911
21162
  - Confidence level: HIGH / MEDIUM / LOW
20912
21163
 
20913
- ### COORDINATOR RULES
20914
- - **You are the brain, reviewers are the eyes** \u2014 synthesize, don't just copy-paste
20915
- - **Spawn 3 workers in parallel** \u2014 Security, Logic, Code Quality
20916
- - **If workers fail, do the review yourself** \u2014 no drama, just deliver
20917
- - **NEVER rubber-stamp** \u2014 your job is to find issues
20918
- - **NEVER fabricate results** \u2014 report truth
20919
-
20920
- Start coordinating now. Triage the changes, then spawn your 3 reviewers.`
21164
+ Start the review now.`;
21165
+ await agentRef.current?.processTurn({
21166
+ content: reviewPrompt
20921
21167
  });
20922
21168
  } catch (e) {
20923
21169
  setHistory((prev) => prev.concat({