open-agents-ai 0.187.468 → 0.187.470

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -250490,6 +250490,11 @@ import { execSync as execSync20, exec } from "node:child_process";
250490
250490
  import { existsSync as existsSync24, writeFileSync as writeFileSync9, mkdirSync as mkdirSync8 } from "node:fs";
250491
250491
  import { join as join37, dirname as dirname11 } from "node:path";
250492
250492
  import { homedir as homedir9 } from "node:os";
250493
+ function pushBounded(buf, item) {
250494
+ buf.push(item);
250495
+ if (buf.length > MAX_BUFFER)
250496
+ buf.splice(0, buf.length - MAX_BUFFER);
250497
+ }
250493
250498
  async function ensurePlaywright() {
250494
250499
  if (pw)
250495
250500
  return null;
@@ -250529,28 +250534,78 @@ async function ensureBrowser() {
250529
250534
  userAgent: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
250530
250535
  });
250531
250536
  page = await context.newPage();
250532
- page.on("dialog", async (dialog) => {
250533
- try {
250534
- await dialog.dismiss();
250535
- } catch {
250536
- }
250537
- });
250537
+ attachDiagnosticListeners(page);
250538
250538
  } catch (err) {
250539
250539
  return `Failed to launch browser: ${err instanceof Error ? err.message : String(err)}`;
250540
250540
  }
250541
250541
  }
250542
250542
  if (!page || page.isClosed?.()) {
250543
250543
  page = await context.newPage();
250544
+ attachDiagnosticListeners(page);
250544
250545
  }
250545
250546
  return null;
250546
250547
  }
250548
+ function attachDiagnosticListeners(p2) {
250549
+ p2.on("dialog", async (dialog) => {
250550
+ try {
250551
+ await dialog.dismiss();
250552
+ } catch {
250553
+ }
250554
+ });
250555
+ p2.on("console", (msg) => {
250556
+ try {
250557
+ const loc = msg.location?.();
250558
+ pushBounded(consoleBuffer, {
250559
+ ts: Date.now(),
250560
+ type: String(msg.type?.() ?? "log"),
250561
+ text: String(msg.text?.() ?? "").slice(0, 600),
250562
+ loc: loc?.url ? `${loc.url}:${loc.lineNumber ?? 0}:${loc.columnNumber ?? 0}` : void 0
250563
+ });
250564
+ } catch {
250565
+ }
250566
+ });
250567
+ p2.on("pageerror", (err) => {
250568
+ pushBounded(errorBuffer, {
250569
+ ts: Date.now(),
250570
+ message: String(err?.message ?? err).slice(0, 600),
250571
+ stack: typeof err?.stack === "string" ? err.stack.slice(0, 1500) : void 0
250572
+ });
250573
+ });
250574
+ p2.on("requestfailed", (req2) => {
250575
+ pushBounded(networkBuffer, {
250576
+ ts: Date.now(),
250577
+ method: String(req2.method?.() ?? "GET"),
250578
+ url: String(req2.url?.() ?? "").slice(0, 600),
250579
+ failure: String(req2.failure?.()?.errorText ?? "request failed").slice(0, 200),
250580
+ ok: false
250581
+ });
250582
+ });
250583
+ p2.on("response", (resp) => {
250584
+ try {
250585
+ const status = Number(resp.status?.() ?? 0);
250586
+ pushBounded(networkBuffer, {
250587
+ ts: Date.now(),
250588
+ method: String(resp.request?.()?.method?.() ?? "GET"),
250589
+ url: String(resp.url?.() ?? "").slice(0, 600),
250590
+ status,
250591
+ ok: status >= 200 && status < 400
250592
+ });
250593
+ } catch {
250594
+ }
250595
+ });
250596
+ }
250597
+ function clearDiagnosticBuffers() {
250598
+ consoleBuffer = [];
250599
+ networkBuffer = [];
250600
+ errorBuffer = [];
250601
+ }
250547
250602
  function ok(output, start2) {
250548
250603
  return { success: true, output, durationMs: Date.now() - start2 };
250549
250604
  }
250550
250605
  function fail(error, start2) {
250551
250606
  return { success: false, output: "", error, durationMs: Date.now() - start2 };
250552
250607
  }
250553
- var pw, browser2, context, page, dynamicImport, PlaywrightBrowserTool;
250608
+ var pw, browser2, context, page, MAX_BUFFER, consoleBuffer, networkBuffer, errorBuffer, dynamicImport, PlaywrightBrowserTool;
250554
250609
  var init_playwright_browser = __esm({
250555
250610
  "packages/execution/dist/tools/playwright-browser.js"() {
250556
250611
  "use strict";
@@ -250558,10 +250613,14 @@ var init_playwright_browser = __esm({
250558
250613
  browser2 = null;
250559
250614
  context = null;
250560
250615
  page = null;
250616
+ MAX_BUFFER = 200;
250617
+ consoleBuffer = [];
250618
+ networkBuffer = [];
250619
+ errorBuffer = [];
250561
250620
  dynamicImport = new Function("mod", "return import(mod)");
250562
250621
  PlaywrightBrowserTool = class {
250563
250622
  name = "playwright_browser";
250564
- description = "Full-scope Playwright browser automation. Launches a persistent headless Chromium session for interactive web tasks. Use this to: navigate pages, click elements, fill forms, take screenshots, evaluate JavaScript, wait for elements, extract content, and run end-to-end test assertions against live websites. Auto-installs Playwright + Chromium on first use. Actions: navigate, click, fill, type, press, screenshot, evaluate, content, select, check, wait, goto, title, url, pdf, close. Start with navigate, then use content/screenshot to see the page, then click/fill/type to interact.";
250623
+ description = "Full-scope Playwright browser automation + diagnostic capture. Launches a persistent headless Chromium session. Beyond navigation/interaction, this tool buffers everything the running app emits (console messages, network requests, JS exceptions, accessibility tree) so the agent can verify what is ACTUALLY happening not just what the build/test reports. Auto-installs Playwright + Chromium on first use. Diagnostic actions: console_logs, network_log, page_errors, a11y_snapshot, bounding_box, query_all, performance, cookies, storage, viewport, clear_diagnostics. Interaction actions: navigate, click, fill, type, press, select, check, hover. Capture actions: screenshot, pdf, content, innerText, innerHTML, getAttribute, evaluate. Workflow: navigate screenshot or content → console_logs/network_log/page_errors to verify state click/fill/type to drive UI → repeat.";
250565
250624
  parameters = {
250566
250625
  type: "object",
250567
250626
  properties: {
@@ -250593,6 +250652,18 @@ var init_playwright_browser = __esm({
250593
250652
  "goBack",
250594
250653
  "goForward",
250595
250654
  "reload",
250655
+ // FEAT-1: diagnostic capture actions
250656
+ "console_logs",
250657
+ "network_log",
250658
+ "page_errors",
250659
+ "a11y_snapshot",
250660
+ "bounding_box",
250661
+ "query_all",
250662
+ "performance",
250663
+ "cookies",
250664
+ "storage",
250665
+ "viewport",
250666
+ "clear_diagnostics",
250596
250667
  "close"
250597
250668
  ],
250598
250669
  description: "Action to perform:\n- navigate: go to a URL\n- click: click element by selector\n- fill: clear input and type text (for form fields)\n- type: type text character by character (simulates real typing)\n- press: press a key (Enter, Tab, Escape, etc.)\n- screenshot: capture page screenshot (saves to file)\n- evaluate: run JavaScript in page context\n- content: get page text content (readable, stripped)\n- innerText: get innerText of a specific element\n- select: select dropdown option by value\n- check/uncheck: toggle checkbox\n- hover: hover over element\n- wait: wait for a selector to appear\n- waitForNavigation: wait for page navigation to complete\n- waitForSelector: wait for element matching selector\n- title: get page title\n- url: get current URL\n- getAttribute: get element attribute value\n- innerHTML: get element's innerHTML\n- textContent: get element's textContent\n- goBack/goForward/reload: browser navigation\n- pdf: save page as PDF\n- close: close browser session"
@@ -250641,6 +250712,7 @@ var init_playwright_browser = __esm({
250641
250712
  page = null;
250642
250713
  context = null;
250643
250714
  browser2 = null;
250715
+ clearDiagnosticBuffers();
250644
250716
  return { success: true, output: "Browser closed.", durationMs: Date.now() - start2 };
250645
250717
  }
250646
250718
  const err = await ensureBrowser();
@@ -250831,6 +250903,171 @@ var init_playwright_browser = __esm({
250831
250903
  await page.pdf({ path: pdfPath, format: "A4" });
250832
250904
  return ok(`PDF saved to ${pdfPath}`, start2);
250833
250905
  }
250906
+ // ── FEAT-1: Diagnostic capture ────────────────────────────────────
250907
+ case "console_logs": {
250908
+ const filter2 = (text || "").toLowerCase();
250909
+ const entries = filter2 ? consoleBuffer.filter((e2) => e2.type.toLowerCase().includes(filter2)) : consoleBuffer;
250910
+ const tail = entries.slice(-50);
250911
+ if (tail.length === 0) {
250912
+ return ok(`No console messages buffered yet${filter2 ? ` (filter="${filter2}")` : ""}. Buffer holds last 200 messages.`, start2);
250913
+ }
250914
+ const lines = tail.map((e2) => {
250915
+ const at = new Date(e2.ts).toISOString().slice(11, 19);
250916
+ return `[${at}] ${e2.type.toUpperCase()}: ${e2.text}${e2.loc ? ` (${e2.loc})` : ""}`;
250917
+ });
250918
+ return ok(`Console messages (${tail.length}/${entries.length} shown${filter2 ? `, filter="${filter2}"` : ""}):
250919
+ ${lines.join("\n")}`, start2);
250920
+ }
250921
+ case "network_log": {
250922
+ const mode = (text || "").toLowerCase();
250923
+ let entries = networkBuffer;
250924
+ if (mode === "failed" || mode === "errors") {
250925
+ entries = entries.filter((e2) => e2.ok === false);
250926
+ }
250927
+ const tail = entries.slice(-50);
250928
+ if (tail.length === 0) {
250929
+ return ok(`No network entries buffered${mode ? ` (mode="${mode}")` : ""}. Navigate to a URL to populate.`, start2);
250930
+ }
250931
+ const lines = tail.map((e2) => {
250932
+ const at = new Date(e2.ts).toISOString().slice(11, 19);
250933
+ const tag = e2.failure ? `FAILED (${e2.failure})` : `${e2.status ?? "?"}`;
250934
+ return `[${at}] ${e2.method} ${tag} ${e2.url}`;
250935
+ });
250936
+ return ok(`Network log (${tail.length}/${entries.length} shown${mode ? `, mode="${mode}"` : ""}):
250937
+ ${lines.join("\n")}`, start2);
250938
+ }
250939
+ case "page_errors": {
250940
+ if (errorBuffer.length === 0) {
250941
+ return ok("No uncaught page errors recorded.", start2);
250942
+ }
250943
+ const lines = errorBuffer.map((e2) => {
250944
+ const at = new Date(e2.ts).toISOString().slice(11, 19);
250945
+ return `[${at}] ${e2.message}${e2.stack ? `
250946
+ ${e2.stack.split("\n").slice(0, 4).join("\n ")}` : ""}`;
250947
+ });
250948
+ return ok(`Uncaught page errors (${errorBuffer.length}):
250949
+ ${lines.join("\n\n")}`, start2);
250950
+ }
250951
+ case "a11y_snapshot": {
250952
+ const root = selector ? await page.locator(selector).first().elementHandle() : null;
250953
+ const tree2 = await page.accessibility.snapshot({ root: root ?? void 0, interestingOnly: true });
250954
+ if (!tree2)
250955
+ return ok("No accessibility tree (page may be empty).", start2);
250956
+ const render2 = (n2, depth = 0) => {
250957
+ const pad = " ".repeat(depth);
250958
+ const role = n2.role ?? "?";
250959
+ const name10 = n2.name ? ` "${String(n2.name).slice(0, 80)}"` : "";
250960
+ const value3 = n2.value ? ` value="${String(n2.value).slice(0, 60)}"` : "";
250961
+ const checked = n2.checked != null ? ` checked=${n2.checked}` : "";
250962
+ const lines = [`${pad}${role}${name10}${value3}${checked}`];
250963
+ for (const child of (n2.children ?? []).slice(0, 30)) {
250964
+ lines.push(render2(child, depth + 1));
250965
+ }
250966
+ return lines.join("\n");
250967
+ };
250968
+ const out = render2(tree2).slice(0, 6e3);
250969
+ return ok(`Accessibility snapshot:
250970
+ ${out}`, start2);
250971
+ }
250972
+ case "bounding_box": {
250973
+ if (!selector)
250974
+ return fail("selector is required", start2);
250975
+ const loc = page.locator(selector).first();
250976
+ const box = await loc.boundingBox();
250977
+ if (!box)
250978
+ return fail(`Element ${selector} has no bounding box (not rendered or detached)`, start2);
250979
+ return ok(JSON.stringify(box), start2);
250980
+ }
250981
+ case "query_all": {
250982
+ if (!selector)
250983
+ return fail("selector is required", start2);
250984
+ const handles = await page.locator(selector).all();
250985
+ if (handles.length === 0)
250986
+ return ok(`No elements matched ${selector}.`, start2);
250987
+ const max = Math.min(handles.length, 30);
250988
+ const lines = [];
250989
+ for (let i2 = 0; i2 < max; i2++) {
250990
+ try {
250991
+ const h = handles[i2];
250992
+ const txt = (await h.textContent())?.trim().slice(0, 120) ?? "";
250993
+ const tag = await h.evaluate((el) => (el.tagName || "?").toLowerCase());
250994
+ const id = await h.evaluate((el) => el.id || "");
250995
+ const cls = await h.evaluate((el) => (el.className || "").toString().slice(0, 80));
250996
+ lines.push(`[${i2}] <${tag}${id ? ` id="${id}"` : ""}${cls ? ` class="${cls}"` : ""}> ${txt}`);
250997
+ } catch (e2) {
250998
+ lines.push(`[${i2}] (error: ${e2 instanceof Error ? e2.message.slice(0, 80) : String(e2).slice(0, 80)})`);
250999
+ }
251000
+ }
251001
+ if (handles.length > max)
251002
+ lines.push(`... +${handles.length - max} more`);
251003
+ return ok(`Matched ${handles.length} element(s) for ${selector}:
251004
+ ${lines.join("\n")}`, start2);
251005
+ }
251006
+ case "performance": {
251007
+ const data = await page.evaluate(`(() => {
251008
+ const perf = window.performance;
251009
+ const nav = (perf.getEntriesByType('navigation') || [])[0];
251010
+ const paints = (perf.getEntriesByType('paint') || []);
251011
+ return {
251012
+ url: window.location.href,
251013
+ domContentLoaded: nav ? nav.domContentLoadedEventEnd : null,
251014
+ loadComplete: nav ? nav.loadEventEnd : null,
251015
+ firstPaint: (paints.find(p => p.name === 'first-paint') || {}).startTime || null,
251016
+ firstContentfulPaint: (paints.find(p => p.name === 'first-contentful-paint') || {}).startTime || null,
251017
+ transferSize: nav ? nav.transferSize : null,
251018
+ encodedBodySize: nav ? nav.encodedBodySize : null,
251019
+ decodedBodySize: nav ? nav.decodedBodySize : null,
251020
+ };
251021
+ })()`);
251022
+ return ok(`Performance metrics:
251023
+ ${JSON.stringify(data, null, 2)}`, start2);
251024
+ }
251025
+ case "cookies": {
251026
+ const ck = await context.cookies();
251027
+ if (!ck || ck.length === 0)
251028
+ return ok("No cookies set.", start2);
251029
+ const lines = ck.slice(0, 50).map((c9) => ` ${c9.name}=${String(c9.value).slice(0, 80)} (domain=${c9.domain}, path=${c9.path}${c9.httpOnly ? ", httpOnly" : ""}${c9.secure ? ", secure" : ""})`);
251030
+ return ok(`Cookies (${ck.length}):
251031
+ ${lines.join("\n")}`, start2);
251032
+ }
251033
+ case "storage": {
251034
+ const data = await page.evaluate(`(() => {
251035
+ const dump = function(s) {
251036
+ var o = {};
251037
+ for (var i = 0; i < s.length; i++) {
251038
+ var k = s.key(i);
251039
+ if (k) o[k] = String(s.getItem(k) || '').slice(0, 200);
251040
+ }
251041
+ return o;
251042
+ };
251043
+ return {
251044
+ localStorage: dump(window.localStorage),
251045
+ sessionStorage: dump(window.sessionStorage),
251046
+ };
251047
+ })()`);
251048
+ return ok(`Storage:
251049
+ ${JSON.stringify(data, null, 2)}`, start2);
251050
+ }
251051
+ case "viewport": {
251052
+ const v = (text || value2 || "").trim();
251053
+ const m2 = v.match(/^(\d+)\s*[x×]\s*(\d+)$/);
251054
+ if (m2) {
251055
+ const w = parseInt(m2[1], 10), h = parseInt(m2[2], 10);
251056
+ await page.setViewportSize({ width: w, height: h });
251057
+ return ok(`Viewport set to ${w}x${h}`, start2);
251058
+ }
251059
+ const dev = pw?.devices?.[v];
251060
+ if (dev) {
251061
+ await page.setViewportSize(dev.viewport);
251062
+ return ok(`Viewport set to ${v} (${dev.viewport.width}x${dev.viewport.height})`, start2);
251063
+ }
251064
+ return fail(`viewport: provide "WxH" (e.g. "375x667") or a Playwright device name (e.g. "iPhone 13")`, start2);
251065
+ }
251066
+ case "clear_diagnostics": {
251067
+ const sizes = `console=${consoleBuffer.length} network=${networkBuffer.length} errors=${errorBuffer.length}`;
251068
+ clearDiagnosticBuffers();
251069
+ return ok(`Cleared diagnostic buffers (${sizes}).`, start2);
251070
+ }
250834
251071
  default:
250835
251072
  return fail(`Unknown action: ${action}`, start2);
250836
251073
  }
@@ -518601,41 +518838,98 @@ ${body}`;
518601
518838
  if (!output || typeof output !== "string")
518602
518839
  return null;
518603
518840
  const text = output;
518604
- const tail = text.slice(-3e3);
518605
- if (/\bTS\d{3,5}\b.*?: /m.test(tail) && /\berror\s+TS\d{3,5}\b/i.test(tail)) {
518841
+ const tail = text.slice(-4e3);
518842
+ if (/\berror\s+TS\d{3,5}\b/i.test(tail)) {
518606
518843
  const m2 = tail.match(/error\s+TS\d{3,5}[^\n]{0,200}/i);
518607
- return `TypeScript build error detected: ${(m2?.[0] ?? "").slice(0, 200)}`;
518844
+ return `Compile error: ${(m2?.[0] ?? "").slice(0, 240)}`;
518845
+ }
518846
+ if (/\berror\[E\d+\]/i.test(tail)) {
518847
+ const m2 = tail.match(/error\[E\d+\][^\n]{0,200}/i);
518848
+ return `Compile error: ${(m2?.[0] ?? "").slice(0, 240)}`;
518849
+ }
518850
+ if (/\berror\s+CS\d{3,5}\b/i.test(tail)) {
518851
+ const m2 = tail.match(/error\s+CS\d{3,5}[^\n]{0,200}/i);
518852
+ return `Compile error: ${(m2?.[0] ?? "").slice(0, 240)}`;
518853
+ }
518854
+ if (/(^|\n)\S+:\d+:\d+:\s+error:/i.test(tail)) {
518855
+ const m2 = tail.match(/\S+:\d+:\d+:\s+error:[^\n]{0,200}/i);
518856
+ return `Compile error: ${(m2?.[0] ?? "").slice(0, 240)}`;
518608
518857
  }
518609
- if (/\b(Failed to compile|Compilation failed|Build failed)\b/i.test(tail)) {
518610
- const m2 = tail.match(/(Failed to compile|Compilation failed|Build failed)[^\n]{0,300}/i);
518611
- return `Build failed: ${(m2?.[0] ?? "").slice(0, 200)}`;
518858
+ if (/\bundefined\s+reference\s+to\b|\bld:\s+error:/i.test(tail)) {
518859
+ const m2 = tail.match(/(undefined\s+reference\s+to|ld:\s+error:)[^\n]{0,200}/i);
518860
+ return `Linker error: ${(m2?.[0] ?? "").slice(0, 240)}`;
518612
518861
  }
518613
- if (/\bError:\s+Build failed\b/i.test(tail)) {
518614
- return "Next.js build failed (webpack/rspack errors)";
518862
+ if (/\b(Failed to compile|Compilation failed|Build failed|BUILD\s+FAIL(ED|URE))\b/i.test(tail)) {
518863
+ const m2 = tail.match(/(Failed to compile|Compilation failed|Build failed|BUILD\s+FAIL(?:ED|URE))[^\n]{0,300}/i);
518864
+ return `Build failed: ${(m2?.[0] ?? "").slice(0, 240)}`;
518615
518865
  }
518616
- if (/\b\d+\s+errors? found\b/i.test(tail) && /\b(error|failed)\b/i.test(tail)) {
518617
- const m2 = tail.match(/\d+\s+errors? found[^\n]{0,200}/i);
518618
- return `Build reported errors: ${(m2?.[0] ?? "").slice(0, 200)}`;
518866
+ if (/\b\d+\s+errors?\s+(found|generated)\b/i.test(tail)) {
518867
+ const m2 = tail.match(/\d+\s+errors?\s+(?:found|generated)[^\n]{0,200}/i);
518868
+ return `Build reported errors: ${(m2?.[0] ?? "").slice(0, 240)}`;
518619
518869
  }
518620
518870
  if (/\bnpm\s+ERR!|\bnpm error code\b/i.test(tail)) {
518621
518871
  const m2 = tail.match(/(npm\s+ERR!|npm error code)[^\n]{0,300}/i);
518622
- return `npm error: ${(m2?.[0] ?? "").slice(0, 200)}`;
518872
+ return `Package install error: ${(m2?.[0] ?? "").slice(0, 240)}`;
518623
518873
  }
518624
- if (/\bTests:\s+\d+\s+failed\b/i.test(tail) || /\b\d+\s+failed\b.*\bof\s+\d+\b/i.test(tail)) {
518625
- const m2 = tail.match(/Tests:[^\n]{0,200}|\d+\s+failed[^\n]{0,200}/i);
518626
- return `Test failures detected: ${(m2?.[0] ?? "").slice(0, 200)}`;
518874
+ if (/\bERROR:\s+Could not (install|find|build)\b/i.test(tail)) {
518875
+ const m2 = tail.match(/ERROR:\s+Could not[^\n]{0,200}/i);
518876
+ return `Package install error: ${(m2?.[0] ?? "").slice(0, 240)}`;
518627
518877
  }
518628
- if (/^Traceback \(most recent call last\):/m.test(tail) && /\bError:|Exception:/m.test(tail)) {
518629
- const m2 = tail.match(/(\w+(?:Error|Exception)):[^\n]{0,200}/);
518630
- return `Python exception: ${(m2?.[0] ?? "").slice(0, 200)}`;
518878
+ if (/\b(E:\s+(Unable to|Could not)|apt-get:\s+error|dnf:\s+(?:Error|Failed)|pacman:\s+error)/i.test(tail)) {
518879
+ const m2 = tail.match(/(E:\s+|apt-get:\s+error|dnf:\s+|pacman:\s+error)[^\n]{0,200}/i);
518880
+ return `System package error: ${(m2?.[0] ?? "").slice(0, 240)}`;
518631
518881
  }
518632
- if (/\berror\[E\d+\]:/i.test(tail) || /\b(\.\/[\w./-]+):\d+:\d+:\s+error:/i.test(tail)) {
518633
- const m2 = tail.match(/error\[E\d+\][^\n]{0,200}|\.\/[\w./-]+:\d+:\d+:\s+error:[^\n]{0,200}/i);
518634
- return `Compiler error: ${(m2?.[0] ?? "").slice(0, 200)}`;
518882
+ if (/\bTests?:\s+\d+\s+failed\b/i.test(tail)) {
518883
+ const m2 = tail.match(/Tests?:[^\n]{0,200}/i);
518884
+ return `Test failures: ${(m2?.[0] ?? "").slice(0, 240)}`;
518885
+ }
518886
+ if (/^FAILED\s+\S+::/m.test(tail) || /=+\s*\d+\s+failed/i.test(tail)) {
518887
+ const m2 = tail.match(/(FAILED\s+\S+|=+\s*\d+\s+failed)[^\n]{0,200}/i);
518888
+ return `Test failures: ${(m2?.[0] ?? "").slice(0, 240)}`;
518889
+ }
518890
+ if (/\b\d+\s+failing\b/i.test(tail) && /\bpassing\b/i.test(tail)) {
518891
+ const m2 = tail.match(/\d+\s+failing[^\n]{0,200}/i);
518892
+ return `Test failures: ${(m2?.[0] ?? "").slice(0, 240)}`;
518893
+ }
518894
+ if (/^---\s+FAIL:/m.test(tail) || /^FAIL\t\S+/m.test(tail)) {
518895
+ const m2 = tail.match(/(---\s+FAIL:[^\n]{0,200}|FAIL\t\S+[^\n]{0,200})/i);
518896
+ return `Test failures: ${(m2?.[0] ?? "").slice(0, 240)}`;
518897
+ }
518898
+ if (/test\s+result:\s+FAILED/i.test(tail)) {
518899
+ const m2 = tail.match(/test\s+result:\s+FAILED[^\n]{0,200}/i);
518900
+ return `Test failures: ${(m2?.[0] ?? "").slice(0, 240)}`;
518901
+ }
518902
+ if (/\b\d+\s+failed\s+(of|out\s+of)\s+\d+\b/i.test(tail)) {
518903
+ const m2 = tail.match(/\d+\s+failed\s+(?:of|out\s+of)\s+\d+[^\n]{0,200}/i);
518904
+ return `Test failures: ${(m2?.[0] ?? "").slice(0, 240)}`;
518905
+ }
518906
+ if (/^Traceback \(most recent call last\):/m.test(tail) && /\w+(?:Error|Exception):\s/.test(tail)) {
518907
+ const m2 = tail.match(/\w+(?:Error|Exception):[^\n]{0,200}/);
518908
+ return `Runtime exception: ${(m2?.[0] ?? "").slice(0, 240)}`;
518909
+ }
518910
+ if (/\b(UnhandledPromiseRejection|throw\s+new\s+\w+|Uncaught\s+\w+Error)\b/i.test(tail)) {
518911
+ const m2 = tail.match(/(UnhandledPromiseRejection|throw\s+new\s+\w+|Uncaught[^\n]{0,200})/i);
518912
+ return `Runtime exception: ${(m2?.[0] ?? "").slice(0, 240)}`;
518913
+ }
518914
+ if (/\b(Exception\s+in\s+thread|java\.\w+(?:Error|Exception):)/i.test(tail)) {
518915
+ const m2 = tail.match(/(Exception\s+in\s+thread[^\n]{0,200}|java\.\w+(?:Error|Exception):[^\n]{0,200})/i);
518916
+ return `Runtime exception: ${(m2?.[0] ?? "").slice(0, 240)}`;
518917
+ }
518918
+ if (/^[\w./:-]+:\d+:in\s+`[^']*':\s+/m.test(tail) && /\b\w+Error\b/.test(tail)) {
518919
+ const m2 = tail.match(/\b\w+Error[^\n]{0,200}/);
518920
+ return `Runtime exception: ${(m2?.[0] ?? "").slice(0, 240)}`;
518921
+ }
518922
+ if (/\bthread\s+'[^']*'\s+panicked\s+at/i.test(tail)) {
518923
+ const m2 = tail.match(/thread\s+'[^']*'\s+panicked\s+at[^\n]{0,200}/i);
518924
+ return `Runtime panic: ${(m2?.[0] ?? "").slice(0, 240)}`;
518635
518925
  }
518636
518926
  if (/^(fatal|FATAL|panic):\s/m.test(tail)) {
518637
518927
  const m2 = tail.match(/^(fatal|FATAL|panic):\s[^\n]{0,200}/m);
518638
- return `Fatal error: ${(m2?.[0] ?? "").slice(0, 200)}`;
518928
+ return `Fatal error: ${(m2?.[0] ?? "").slice(0, 240)}`;
518929
+ }
518930
+ if (/\b(Segmentation\s+fault|core\s+dumped|Abort(?:ed)?\s+\(core dumped\)|Killed)\b/i.test(tail)) {
518931
+ const m2 = tail.match(/(Segmentation\s+fault|core\s+dumped|Abort(?:ed)?\s+\(core dumped\)|Killed)[^\n]{0,80}/i);
518932
+ return `Process terminated abnormally: ${(m2?.[0] ?? "").slice(0, 240)}`;
518639
518933
  }
518640
518934
  return null;
518641
518935
  }
@@ -518692,6 +518986,7 @@ ${body}`;
518692
518986
  if (/(^|[^&\d])(>|>>)\s*\S/.test(cmd))
518693
518987
  return false;
518694
518988
  const MUTATE_BINS = [
518989
+ // POSIX file/process mutators
518695
518990
  "rm",
518696
518991
  "mv",
518697
518992
  "cp",
@@ -518699,56 +518994,158 @@ ${body}`;
518699
518994
  "rmdir",
518700
518995
  "chmod",
518701
518996
  "chown",
518997
+ "chgrp",
518702
518998
  "touch",
518703
518999
  "tee",
518704
519000
  "dd",
518705
519001
  "truncate",
518706
519002
  "ln",
519003
+ "install",
518707
519004
  "kill",
518708
519005
  "pkill",
518709
519006
  "killall",
518710
519007
  "reboot",
518711
519008
  "shutdown",
519009
+ "halt",
519010
+ "poweroff",
518712
519011
  "fakeroot",
518713
519012
  "sudo",
519013
+ "doas",
518714
519014
  "nohup",
518715
519015
  "setsid",
519016
+ "su",
519017
+ // Build orchestrators (always trigger compile/test side-effects)
518716
519018
  "make",
519019
+ "gmake",
519020
+ "ninja",
518717
519021
  "gradle",
519022
+ "gradlew",
518718
519023
  "mvn",
519024
+ "mvnw",
519025
+ "ant",
519026
+ "bazel",
519027
+ "buck",
519028
+ "buck2",
519029
+ "cmake",
519030
+ "meson",
519031
+ "scons",
519032
+ "rake",
519033
+ "leiningen",
519034
+ "lein",
519035
+ "sbt",
519036
+ "stack",
519037
+ // Configuration-management / orchestration
518719
519038
  "ansible",
518720
- "systemd-run"
519039
+ "ansible-playbook",
519040
+ "puppet",
519041
+ "chef-client",
519042
+ "salt",
519043
+ "salt-call",
519044
+ "systemd-run",
519045
+ "systemd-analyze",
519046
+ // System package managers
519047
+ "apt",
519048
+ "apt-get",
519049
+ "yum",
519050
+ "dnf",
519051
+ "rpm",
519052
+ "pacman",
519053
+ "zypper",
519054
+ "emerge",
519055
+ "snap",
519056
+ "flatpak",
519057
+ "brew",
519058
+ "port",
519059
+ "pkg",
519060
+ "apk",
519061
+ "choco",
519062
+ "scoop",
519063
+ "winget",
519064
+ // Helm / k8s-flavored
519065
+ "helm",
519066
+ "helmfile",
519067
+ "kustomize"
518721
519068
  ];
518722
519069
  const mutateBinsRe = new RegExp(`\\b(${MUTATE_BINS.join("|")})\\b`, "i");
518723
519070
  if (mutateBinsRe.test(cmd))
518724
519071
  return false;
518725
519072
  if (/\bsed\s+(-i|--in-place)\b/.test(cmd))
518726
519073
  return false;
518727
- if (/\bsystemctl\s+(?!status\b|show\b|is-)/i.test(cmd))
519074
+ if (/\bsystemctl\s+(?!status\b|show\b|is-|cat\b|list-|get-)/i.test(cmd))
518728
519075
  return false;
518729
519076
  if (/\bservice\s+\S+\s+(?!status\b)/i.test(cmd))
518730
519077
  return false;
518731
519078
  if (/\bcrontab\s+-(e|d|r)\b/.test(cmd))
518732
519079
  return false;
518733
- if (/\bnpm\s+(install|uninstall|update|run|test|exec|publish|init|link|unlink|version|cache\s+clean|ci|audit\s+fix)\b/i.test(cmd))
519080
+ if (/\bnpm\s+(install|i\b|uninstall|un\b|update|up\b|run\s|test\b|exec|publish|init|link|unlink|version|cache\s+clean|ci\b|audit\s+fix|prune|rebuild|adduser|login|logout|deprecate)\b/i.test(cmd))
518734
519081
  return false;
518735
- if (/\bpnpm\s+(install|update|add|remove|run|test|exec|publish|init|link|unlink|version)\b/i.test(cmd))
519082
+ if (/\bpnpm\s+(install|i\b|update|up\b|add|remove|rm\b|run\s|test\b|exec|publish|init|link|unlink|version|prune|rebuild)\b/i.test(cmd))
518736
519083
  return false;
518737
- if (/\byarn\s+(install|add|remove|upgrade|run|test|exec|publish|init|link|unlink|version)\b/i.test(cmd))
518738
- return false;
518739
- if (/\bpip\s+(install|uninstall|wheel)\b/i.test(cmd))
519084
+ if (/\byarn\s+(install|add|remove|upgrade|run\s|test\b|exec|publish|init|link|unlink|version)\b/i.test(cmd))
518740
519085
  return false;
518741
519086
  if (/\bnpx\b/.test(cmd))
518742
519087
  return false;
518743
- if (/\bcargo\s+(build|run|test|update|publish|install|uninstall|fmt|fix)\b/i.test(cmd))
519088
+ if (/\bpip3?\s+(install|uninstall|wheel|download)\b/i.test(cmd))
519089
+ return false;
519090
+ if (/\bpipenv\s+(install|uninstall|run|update|sync)\b/i.test(cmd))
519091
+ return false;
519092
+ if (/\bpoetry\s+(add|install|run|update|remove|build|publish|init|new|export|lock)\b/i.test(cmd))
519093
+ return false;
519094
+ if (/\bconda\s+(install|remove|update|create|env\s+(create|update|remove))\b/i.test(cmd))
519095
+ return false;
519096
+ if (/\buv\s+(add|remove|sync|install|run|build|publish|pip\s+install)\b/i.test(cmd))
519097
+ return false;
519098
+ if (/\bbundle\s+(install|update|exec|add|remove|init)\b/i.test(cmd))
519099
+ return false;
519100
+ if (/\bgem\s+(install|uninstall|update|build|push|owner)\b/i.test(cmd))
519101
+ return false;
519102
+ if (/\bcpan\b|\bcpanm\b|\bperl\s+-MCPAN\b/i.test(cmd))
519103
+ return false;
519104
+ if (/\bcomposer\s+(install|update|require|remove|create-project|dump-autoload|run-script)\b/i.test(cmd))
519105
+ return false;
519106
+ if (/\bdotnet\s+(build|run|test|publish|pack|restore|add|remove|new|tool\s+install)\b/i.test(cmd))
519107
+ return false;
519108
+ if (/\bcargo\s+(build|run|test|bench|update|publish|install|uninstall|fmt|fix|clippy\s+--fix|new|init|add|remove|generate-lockfile)\b/i.test(cmd))
519109
+ return false;
519110
+ if (/\bgo\s+(build|run|test|get|install|generate|mod\s+(tidy|download|init|edit|vendor)|work\s+)\b/i.test(cmd))
519111
+ return false;
519112
+ if (/\bdocker\s+(build|run|push|pull|exec|kill|stop|rm|rmi|tag|create|cp|commit|save|load|import|export|network\s+create|volume\s+create|compose\s+(up|down|build|run|exec|restart))\b/i.test(cmd))
519113
+ return false;
519114
+ if (/\bpodman\s+(build|run|push|pull|exec|kill|stop|rm|rmi|tag|create|commit)\b/i.test(cmd))
519115
+ return false;
519116
+ if (/\bkubectl\s+(apply|delete|create|edit|patch|scale|rollout|exec|cp|drain|cordon|uncordon|taint|label|annotate|set\s)\b/i.test(cmd))
519117
+ return false;
519118
+ if (/\bterraform\s+(apply|destroy|init|plan|import|taint|untaint|workspace\s+(new|delete)|state\s+(rm|mv|push|replace-provider))\b/i.test(cmd))
519119
+ return false;
519120
+ if (/\bpulumi\s+(up|destroy|new|stack\s+(rm|init)|config\s+set|policy\s+enable|import)\b/i.test(cmd))
518744
519121
  return false;
518745
- if (/\bgo\s+(build|run|test|get|install)\b/i.test(cmd))
519122
+ if (/\baws\s+(?:\S+\s+)+(create|delete|put|update|run|start|stop|terminate|attach|detach|cp|sync|mv)\b/i.test(cmd))
518746
519123
  return false;
518747
- if (/\bdocker\s+(build|run|push|pull|exec|kill|stop|rm|rmi|tag)\b/i.test(cmd))
519124
+ if (/\bgcloud\s+(?:\S+\s+)+(create|delete|update|deploy|apply|set|enable|disable|attach|detach|reset|move|sign-in|sign-out)\b/i.test(cmd))
518748
519125
  return false;
518749
- if (/\bkubectl\s+(apply|delete|create|edit|patch|scale|rollout|exec)\b/i.test(cmd))
519126
+ if (/\baz\s+(?:\S+\s+)+(create|delete|update|deploy|set|attach|detach|reset|move)\b/i.test(cmd))
518750
519127
  return false;
518751
- if (/\bterraform\s+(apply|destroy|init|plan|import)\b/i.test(cmd))
519128
+ if (/\beslint\s+[^|;&]*--fix\b/i.test(cmd))
519129
+ return false;
519130
+ if (/\bprettier\s+[^|;&]*--write\b/i.test(cmd))
519131
+ return false;
519132
+ if (/\bblack\s+(?!--check\b)/i.test(cmd))
519133
+ return false;
519134
+ if (/\bisort\s+(?!--check\b|--diff\b)/i.test(cmd))
519135
+ return false;
519136
+ if (/\bruff\s+(?:format\b|check\s+[^|;&]*--fix\b)/i.test(cmd))
519137
+ return false;
519138
+ if (/\bgofmt\s+-w\b/i.test(cmd))
519139
+ return false;
519140
+ if (/\brustfmt\s+(?!--check\b)/i.test(cmd))
519141
+ return false;
519142
+ if (/\b(node|python\d?|ruby|perl|bash|sh|zsh|dash|fish)\s+-(e|c)\b/.test(cmd))
519143
+ return false;
519144
+ if (/\bcurl\s+[^|]*\|\s*(bash|sh|zsh|fish|python\d?|ruby|node|perl)\b/i.test(cmd))
519145
+ return false;
519146
+ if (/\bgit\s+(add|commit|push|pull|fetch|clone|init|checkout|switch|restore|reset|rm|mv|merge|rebase|cherry-pick|revert|stash|apply|am|tag(?:\s+\S)?|notes|worktree\s+(add|remove)|gc|prune|repack|filter-branch|filter-repo|reflog\s+(delete|expire)|update-ref|update-index|symbolic-ref|hash-object\s+-w)\b/i.test(cmd))
519147
+ return false;
519148
+ if (/\bollama\s+(pull|push|run|create|rm|cp|serve)\b/i.test(cmd))
518752
519149
  return false;
518753
519150
  const READ_ONLY_BINS = /* @__PURE__ */ new Set([
518754
519151
  "cd",
@@ -518758,79 +519155,128 @@ ${body}`;
518758
519155
  "fgrep",
518759
519156
  "rg",
518760
519157
  "ag",
519158
+ "ack",
518761
519159
  "cat",
518762
519160
  "head",
518763
519161
  "tail",
518764
519162
  "less",
518765
519163
  "more",
519164
+ "bat",
519165
+ "tac",
518766
519166
  "ls",
518767
519167
  "ll",
518768
519168
  "la",
519169
+ "tree",
518769
519170
  "find",
518770
- // ALLOWED only if no -delete/-exec mutating action — pre-filtered above
519171
+ "fd",
519172
+ // mutating actions pre-filtered above
518771
519173
  "wc",
518772
519174
  "awk",
518773
519175
  "gawk",
519176
+ "mawk",
519177
+ "nawk",
518774
519178
  "sort",
518775
519179
  "uniq",
519180
+ "shuf",
518776
519181
  "tr",
518777
519182
  "cut",
518778
519183
  "paste",
518779
519184
  "join",
518780
519185
  "comm",
519186
+ "column",
519187
+ "expand",
519188
+ "unexpand",
518781
519189
  "diff",
518782
519190
  "cmp",
519191
+ "patch",
519192
+ // patch with -R or no-args could be mutating; --dry-run only is read
518783
519193
  "echo",
518784
519194
  "printf",
518785
519195
  "pwd",
518786
519196
  "which",
518787
519197
  "type",
518788
519198
  "command",
518789
- "node",
518790
- "python",
518791
- "python3",
518792
- "ruby",
518793
- "perl",
519199
+ "whereis",
518794
519200
  "git",
518795
- // git log/show/diff/status are read; but git add/commit/push/pull are writes — pre-filtered above
519201
+ // pre-filtered above for mutating subcommands
518796
519202
  "ollama",
518797
- // ollama show/list are read; ollama pull/run/create are writes — pre-filtered above
519203
+ // pre-filtered above
518798
519204
  "cargo",
518799
- // pre-filtered above for build/run/etc.
518800
519205
  "go",
518801
- // pre-filtered above for build/run/etc.
519206
+ "rustc",
519207
+ // bin-only forms are read; mutating subcommands pre-filtered
518802
519208
  "stat",
518803
519209
  "file",
518804
519210
  "du",
518805
519211
  "df",
519212
+ "lsof",
519213
+ "fuser",
518806
519214
  "date",
518807
519215
  "uname",
518808
519216
  "id",
518809
519217
  "whoami",
518810
519218
  "hostname",
518811
519219
  "uptime",
519220
+ "tty",
518812
519221
  "env",
518813
519222
  "printenv",
519223
+ "set",
518814
519224
  "test",
518815
519225
  "[",
518816
519226
  "true",
518817
519227
  "false",
519228
+ "yes",
519229
+ "seq",
518818
519230
  "tsc",
518819
519231
  "eslint",
518820
519232
  "prettier",
518821
- // these emit but mostly read
518822
- "head",
518823
- "tail",
519233
+ "ruff",
519234
+ "black",
519235
+ "isort",
519236
+ "rustfmt",
519237
+ "gofmt",
519238
+ // Linters/formatters: mutating modes pre-filtered above (--fix/--write/-w/-i),
519239
+ // so reaching here means we have a check-only invocation.
518824
519240
  "jq",
518825
519241
  "yq",
518826
519242
  "xq",
519243
+ "tomlq",
519244
+ "fx",
518827
519245
  "base64",
518828
519246
  "md5sum",
518829
519247
  "sha256sum",
518830
519248
  "sha1sum",
519249
+ "sha512sum",
518831
519250
  "tldr",
518832
519251
  "man",
518833
- "info"
519252
+ "info",
519253
+ "help",
519254
+ "pip",
519255
+ "pip3",
519256
+ "pipenv",
519257
+ "poetry",
519258
+ "uv",
519259
+ "conda",
519260
+ // pre-filtered above for mutating subcommands
519261
+ "npm",
519262
+ "pnpm",
519263
+ "yarn",
519264
+ // pre-filtered above
519265
+ "kubectl",
519266
+ "terraform",
519267
+ "pulumi",
519268
+ "aws",
519269
+ "gcloud",
519270
+ "az",
519271
+ // pre-filtered above
519272
+ "docker",
519273
+ "podman",
519274
+ // pre-filtered above
519275
+ "composer",
519276
+ "dotnet",
519277
+ "bundle",
519278
+ "gem"
519279
+ // pre-filtered above
518834
519280
  ]);
518835
519281
  if (/\bfind\b[\s\S]*?(-delete|-exec\s+(rm|mv|cp|chmod|chown|sed\s+-i)|--?ok\s+(rm|mv))/i.test(cmd))
518836
519282
  return false;
@@ -518839,11 +519285,42 @@ ${body}`;
518839
519285
  const segments = cmd.split(/(?:\|\||&&|;)/).map((s2) => s2.trim()).filter(Boolean);
518840
519286
  if (segments.length === 0)
518841
519287
  return false;
519288
+ const SAFE_INTERP_FLAGS = /^(?:--version|--help|-V|-v|-h|--vers|version)$/;
519289
+ const INTERPRETER_BINS = /* @__PURE__ */ new Set([
519290
+ "node",
519291
+ "python",
519292
+ "python2",
519293
+ "python3",
519294
+ "ruby",
519295
+ "perl",
519296
+ "php",
519297
+ "lua",
519298
+ "bash",
519299
+ "sh",
519300
+ "zsh",
519301
+ "dash",
519302
+ "fish",
519303
+ "ksh",
519304
+ "tcsh",
519305
+ "java",
519306
+ "kotlin",
519307
+ "scala",
519308
+ "groovy"
519309
+ ]);
518842
519310
  for (const seg of segments) {
518843
519311
  const stripped = seg.replace(/^cd\s+\S+\s*$/i, "true").replace(/^!/, "");
518844
- const firstTok = stripped.split(/\s+/)[0]?.replace(/^.*\//, "") || "";
519312
+ const tokens = stripped.split(/\s+/).filter(Boolean);
519313
+ const firstTok = (tokens[0] ?? "").replace(/^.*\//, "");
518845
519314
  if (!firstTok)
518846
519315
  continue;
519316
+ if (INTERPRETER_BINS.has(firstTok)) {
519317
+ const restToks = tokens.slice(1);
519318
+ if (restToks.length === 0)
519319
+ return false;
519320
+ if (!restToks.every((t2) => SAFE_INTERP_FLAGS.test(t2)))
519321
+ return false;
519322
+ continue;
519323
+ }
518847
519324
  if (!READ_ONLY_BINS.has(firstTok))
518848
519325
  return false;
518849
519326
  }
@@ -518852,10 +519329,11 @@ ${body}`;
518852
519329
  /**
518853
519330
  * REG-5: Render the recent-failures block so the agent SEES its own error
518854
519331
  * output before deciding what to do next. Detects same-fingerprint failure
518855
- * repetition and escalates the warning. Without this, the agent runs
518856
- * `npx next build`, gets a 200-line TypeScript error, ignores the specific
518857
- * error and blindly retries with `npm install --force`. Caching the failure
518858
- * + injecting it pre-LLM forces the model to confront what actually broke.
519332
+ * repetition and escalates the warning. Without this, the agent runs a
519333
+ * build/test/install command, gets a long error stream from the
519334
+ * underlying compiler/runner, ignores the specific error, and blindly
519335
+ * retries with a different flag combination. Caching the failure +
519336
+ * injecting it pre-LLM forces the model to confront what actually broke.
518859
519337
  */
518860
519338
  _renderRecentFailuresBlock(turn) {
518861
519339
  const fails = this._recentFailures;
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "open-agents-ai",
3
- "version": "0.187.468",
3
+ "version": "0.187.470",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "open-agents-ai",
9
- "version": "0.187.468",
9
+ "version": "0.187.470",
10
10
  "hasInstallScript": true,
11
11
  "license": "CC-BY-NC-4.0",
12
12
  "dependencies": {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "open-agents-ai",
3
- "version": "0.187.468",
3
+ "version": "0.187.470",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",