@charzhu/openjaw-agent 0.2.8 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -4005,7 +4005,7 @@ function categoryForTool(toolName) {
4005
4005
  if (toolName.startsWith("word_") || toolName.startsWith("excel_") || toolName.startsWith("powerpoint_") || toolName.startsWith("office_")) return "office";
4006
4006
  if (toolName.startsWith("memory_") || toolName === "todo_write") return "memory";
4007
4007
  if (toolName.startsWith("file_") || toolName.startsWith("image_") || toolName === "grep" || toolName === "glob") return "files";
4008
- if (toolName.startsWith("system_") || toolName.startsWith("clipboard_") || ["code_execute", "web_fetch", "web_search", "notify", "sleep", "ask_user", "config"].includes(toolName)) return "system";
4008
+ if (toolName.startsWith("system_") || toolName.startsWith("clipboard_") || ["code_execute", "web_fetch", "web_search", "web_extract", "notify", "sleep", "ask_user", "config"].includes(toolName)) return "system";
4009
4009
  return "mcp";
4010
4010
  }
4011
4011
  var DEFAULT_OPENAI_MAX_TOOLS, MCP_AUTO_GROW_HARD_CAP, BUILTIN_HEADROOM, FOUNDATION_TOOL_NAMES, PROFILE_CATEGORIES, CATEGORY_KEYWORDS;
@@ -4031,9 +4031,9 @@ var init_tool_exposure = __esm({
4031
4031
  CATEGORY_KEYWORDS = [
4032
4032
  { category: "email", patterns: [/\b(email|mail|outlook|inbox|calendar|schedule|meeting|invite|today|tomorrow)\b/i] },
4033
4033
  { category: "teams", patterns: [/\b(teams|chat|channel|message|dm|meeting|standup|today|mention)\b/i] },
4034
- { category: "browser", patterns: [/\b(browser|page|website|web|navigate|click|screenshot|search online)\b/i] },
4034
+ { category: "browser", patterns: [/\b(browser|page|website|web|navigate|click|screenshot|snapshot|console|image|search online)\b/i] },
4035
4035
  { category: "files", patterns: [/\b(file|folder|directory|read|write|edit|grep|glob|find in repo|codebase)\b/i] },
4036
- { category: "system", patterns: [/\b(shell|command|terminal|run|execute|clipboard|notify|sleep|web search|fetch url)\b/i] },
4036
+ { category: "system", patterns: [/\b(shell|command|terminal|run|execute|clipboard|notify|sleep|web search|fetch url|extract url|read url|article|docs?|paper|source page|news|latest|headlines|current events|breaking news)\b/i] },
4037
4037
  { category: "office", patterns: [/\b(word|excel|powerpoint|spreadsheet|document|presentation|slide)\b/i] },
4038
4038
  { category: "wechat", patterns: [/\b(wechat|weixin)\b/i] },
4039
4039
  { category: "memory", patterns: [/\b(memory|remember|recall|todo|preference)\b/i] }
@@ -5833,18 +5833,135 @@ var init_logger = __esm({
5833
5833
  }
5834
5834
  });
5835
5835
 
5836
+ // ../openjaw-mcp/dist/tools/url-safety.js
5837
+ import { isIP } from "node:net";
5838
+ function hasTokenLikeSecret(value) {
5839
+ const decoded = safeDecode(value);
5840
+ return TOKEN_PATTERNS.some((pattern) => pattern.test(value) || pattern.test(decoded));
5841
+ }
5842
+ function safeDecode(value) {
5843
+ try {
5844
+ return decodeURIComponent(value);
5845
+ } catch {
5846
+ return value;
5847
+ }
5848
+ }
5849
+ function normalizeHostname(hostname) {
5850
+ return hostname.replace(/^\[|\]$/g, "").replace(/\.+$/g, "").toLowerCase();
5851
+ }
5852
+ function isPrivateIPv4(host) {
5853
+ const parts = host.split(".").map((part) => Number(part));
5854
+ if (parts.length !== 4 || parts.some((part) => !Number.isInteger(part) || part < 0 || part > 255))
5855
+ return false;
5856
+ const [a, b] = parts;
5857
+ return a === 10 || a === 172 && b >= 16 && b <= 31 || a === 192 && b === 168 || a === 127 || a === 169 && b === 254 || a === 0;
5858
+ }
5859
+ function ipv4FromMappedIPv6(host) {
5860
+ const lower = host.toLowerCase();
5861
+ const dotted = /^::ffff:(\d{1,3}(?:\.\d{1,3}){3})$/.exec(lower);
5862
+ if (dotted)
5863
+ return dotted[1];
5864
+ const hex = /^::ffff:([0-9a-f]{1,4}):([0-9a-f]{1,4})$/.exec(lower);
5865
+ if (!hex)
5866
+ return void 0;
5867
+ const high = Number.parseInt(hex[1], 16);
5868
+ const low = Number.parseInt(hex[2], 16);
5869
+ if (!Number.isFinite(high) || !Number.isFinite(low))
5870
+ return void 0;
5871
+ return [high >> 8, high & 255, low >> 8, low & 255].join(".");
5872
+ }
5873
+ function isPrivateIPv6(host) {
5874
+ const lower = host.toLowerCase();
5875
+ const mapped = ipv4FromMappedIPv6(lower);
5876
+ if (mapped)
5877
+ return isPrivateIPv4(mapped);
5878
+ return lower === "::1" || lower.startsWith("fc") || lower.startsWith("fd") || lower.startsWith("fe80:");
5879
+ }
5880
+ function isMetadataUrl(input) {
5881
+ const url = typeof input === "string" ? new URL(input) : input;
5882
+ const host = normalizeHostname(url.hostname);
5883
+ const mapped = ipv4FromMappedIPv6(host);
5884
+ return METADATA_HOSTS.has(host) || METADATA_IPS.has(host) || (mapped ? METADATA_IPS.has(mapped) : false);
5885
+ }
5886
+ function isPrivateHost(hostname) {
5887
+ const host = normalizeHostname(hostname);
5888
+ if (host === "localhost" || host.endsWith(".localhost"))
5889
+ return true;
5890
+ const ipKind = isIP(host);
5891
+ if (ipKind === 4)
5892
+ return isPrivateIPv4(host);
5893
+ if (ipKind === 6)
5894
+ return isPrivateIPv6(host);
5895
+ return false;
5896
+ }
5897
+ function validateHttpUrl(input, options = {}) {
5898
+ let url;
5899
+ try {
5900
+ url = new URL(input);
5901
+ } catch {
5902
+ return { ok: false, error: "Invalid URL" };
5903
+ }
5904
+ if (url.protocol !== "http:" && url.protocol !== "https:") {
5905
+ return { ok: false, error: "Only http:// and https:// URLs are supported" };
5906
+ }
5907
+ if (url.username || url.password) {
5908
+ return { ok: false, error: "Blocked: URL contains embedded credentials" };
5909
+ }
5910
+ if (hasTokenLikeSecret(url.href)) {
5911
+ return { ok: false, error: "Blocked: URL contains what appears to be an API key, token, password, or secret" };
5912
+ }
5913
+ if (isMetadataUrl(url)) {
5914
+ return { ok: false, error: "Blocked: URL targets a cloud metadata endpoint" };
5915
+ }
5916
+ if (!options.allowPrivate && isPrivateHost(url.hostname)) {
5917
+ return { ok: false, error: "Blocked: URL targets a private, loopback, or internal network address" };
5918
+ }
5919
+ return { ok: true, url };
5920
+ }
5921
+ var TOKEN_PATTERNS, METADATA_HOSTS, METADATA_IPS;
5922
+ var init_url_safety = __esm({
5923
+ "../openjaw-mcp/dist/tools/url-safety.js"() {
5924
+ "use strict";
5925
+ TOKEN_PATTERNS = [
5926
+ /\b(?:sk|ghp|github_pat|gho|ghu|ghs|glpat|xox[baprs]|ya29|AIza)[A-Za-z0-9_\-]{12,}\b/i,
5927
+ /(?:api[_-]?key|access[_-]?token|auth[_-]?token|bearer|secret|password|passwd|pwd|token)=([^&\s]{8,})/i
5928
+ ];
5929
+ METADATA_HOSTS = /* @__PURE__ */ new Set([
5930
+ "metadata.google.internal",
5931
+ "metadata.azure.internal"
5932
+ ]);
5933
+ METADATA_IPS = /* @__PURE__ */ new Set([
5934
+ "169.254.169.254",
5935
+ "100.100.100.200"
5936
+ ]);
5937
+ __name(hasTokenLikeSecret, "hasTokenLikeSecret");
5938
+ __name(safeDecode, "safeDecode");
5939
+ __name(normalizeHostname, "normalizeHostname");
5940
+ __name(isPrivateIPv4, "isPrivateIPv4");
5941
+ __name(ipv4FromMappedIPv6, "ipv4FromMappedIPv6");
5942
+ __name(isPrivateIPv6, "isPrivateIPv6");
5943
+ __name(isMetadataUrl, "isMetadataUrl");
5944
+ __name(isPrivateHost, "isPrivateHost");
5945
+ __name(validateHttpUrl, "validateHttpUrl");
5946
+ }
5947
+ });
5948
+
5836
5949
  // ../openjaw-mcp/dist/channels/browser.js
5837
5950
  import * as chromeLauncher from "chrome-launcher";
5838
5951
  import CDP from "chrome-remote-interface";
5839
5952
  function escapeForJs(str) {
5840
5953
  return str.replace(/\\/g, "\\\\").replace(/'/g, "\\'").replace(/"/g, '\\"').replace(/`/g, "\\`").replace(/\$/g, "\\$").replace(/\n/g, "\\n").replace(/\r/g, "\\r").replace(/\0/g, "\\0").replace(/</g, "\\x3c").replace(/>/g, "\\x3e");
5841
5954
  }
5842
- var BrowserChannel;
5955
+ var MAX_CONSOLE_MESSAGES, MAX_JS_ERRORS, MAX_CONSOLE_TEXT, BrowserChannel;
5843
5956
  var init_browser = __esm({
5844
5957
  "../openjaw-mcp/dist/channels/browser.js"() {
5845
5958
  "use strict";
5846
5959
  init_logger();
5960
+ init_url_safety();
5847
5961
  __name(escapeForJs, "escapeForJs");
5962
+ MAX_CONSOLE_MESSAGES = 200;
5963
+ MAX_JS_ERRORS = 100;
5964
+ MAX_CONSOLE_TEXT = 4e3;
5848
5965
  BrowserChannel = class {
5849
5966
  static {
5850
5967
  __name(this, "BrowserChannel");
@@ -5854,6 +5971,10 @@ var init_browser = __esm({
5854
5971
  client = null;
5855
5972
  page = null;
5856
5973
  launchPromise = null;
5974
+ currentSnapshotId = null;
5975
+ currentRefs = /* @__PURE__ */ new Map();
5976
+ consoleBuffer = [];
5977
+ jsErrorBuffer = [];
5857
5978
  constructor(config) {
5858
5979
  this.config = config;
5859
5980
  }
@@ -5900,6 +6021,8 @@ var init_browser = __esm({
5900
6021
  this.client = null;
5901
6022
  this.page = null;
5902
6023
  this.launchPromise = null;
6024
+ this.consoleBuffer = [];
6025
+ this.jsErrorBuffer = [];
5903
6026
  logger_default.info("Browser connection state reset");
5904
6027
  }
5905
6028
  /**
@@ -5952,13 +6075,71 @@ var init_browser = __esm({
5952
6075
  DOM.enable(),
5953
6076
  Network.enable()
5954
6077
  ]);
6078
+ this.attachRuntimeEventBuffers(Runtime);
6079
+ await this.attachRequestSafetyInterception(this.client.Fetch);
5955
6080
  this.page = { Runtime, Page, DOM, Input };
5956
6081
  logger_default.info("Browser launched", { port: this.chrome.port });
5957
6082
  }
6083
+ async attachRequestSafetyInterception(Fetch) {
6084
+ const fetchDomain = Fetch;
6085
+ if (!fetchDomain?.enable || !fetchDomain.requestPaused || !fetchDomain.failRequest || !fetchDomain.continueRequest) {
6086
+ logger_default.debug("Browser Fetch domain unavailable; navigation safety falls back to pre/final URL checks");
6087
+ return;
6088
+ }
6089
+ fetchDomain.requestPaused((event) => {
6090
+ void (async () => {
6091
+ try {
6092
+ const url = new URL(event.request.url);
6093
+ if (url.protocol === "http:" || url.protocol === "https:") {
6094
+ const safety = validateHttpUrl(url.href, { allowPrivate: true });
6095
+ if (!safety.ok) {
6096
+ await fetchDomain.failRequest({ requestId: event.requestId, errorReason: "BlockedByClient" });
6097
+ return;
6098
+ }
6099
+ }
6100
+ await fetchDomain.continueRequest({ requestId: event.requestId });
6101
+ } catch {
6102
+ await fetchDomain.continueRequest({ requestId: event.requestId }).catch(() => void 0);
6103
+ }
6104
+ })();
6105
+ });
6106
+ await fetchDomain.enable({ patterns: [{ urlPattern: "*", requestStage: "Request" }] });
6107
+ }
6108
+ attachRuntimeEventBuffers(Runtime) {
6109
+ const runtime = Runtime;
6110
+ runtime.consoleAPICalled?.((event) => {
6111
+ const text = (event.args ?? []).map((arg) => String(arg.value ?? arg.description ?? "")).join(" ").slice(0, MAX_CONSOLE_TEXT);
6112
+ this.consoleBuffer.push({
6113
+ type: event.type ?? "log",
6114
+ text,
6115
+ timestamp: event.timestamp ?? Date.now()
6116
+ });
6117
+ if (this.consoleBuffer.length > MAX_CONSOLE_MESSAGES) {
6118
+ this.consoleBuffer.splice(0, this.consoleBuffer.length - MAX_CONSOLE_MESSAGES);
6119
+ }
6120
+ });
6121
+ runtime.exceptionThrown?.((event) => {
6122
+ const details = event.exceptionDetails ?? {};
6123
+ this.jsErrorBuffer.push({
6124
+ message: String(details.exception?.description ?? details.exception?.value ?? details.text ?? "JavaScript exception").slice(0, MAX_CONSOLE_TEXT),
6125
+ url: details.url,
6126
+ line: details.lineNumber,
6127
+ column: details.columnNumber,
6128
+ timestamp: event.timestamp ?? Date.now()
6129
+ });
6130
+ if (this.jsErrorBuffer.length > MAX_JS_ERRORS) {
6131
+ this.jsErrorBuffer.splice(0, this.jsErrorBuffer.length - MAX_JS_ERRORS);
6132
+ }
6133
+ });
6134
+ }
5958
6135
  async navigate(options) {
5959
6136
  await this.ensureBrowser();
5960
6137
  const { Page, Runtime } = this.page;
5961
- await Page.navigate({ url: options.url });
6138
+ const initialSafety = validateHttpUrl(options.url, { allowPrivate: true });
6139
+ if (!initialSafety.ok) {
6140
+ return { url: options.url, title: "", snapshot: "", error: initialSafety.error };
6141
+ }
6142
+ await Page.navigate({ url: initialSafety.url.href });
5962
6143
  if (options.waitFor === "load") {
5963
6144
  await Page.loadEventFired();
5964
6145
  } else if (options.waitFor === "domcontentloaded") {
@@ -5970,15 +6151,23 @@ var init_browser = __esm({
5970
6151
  const result = await Runtime.evaluate({
5971
6152
  expression: "document.title"
5972
6153
  });
5973
- return {
5974
- url: options.url,
5975
- title: result.result.value
5976
- };
6154
+ const url = await this.getCurrentUrl() ?? initialSafety.url.href;
6155
+ const finalSafety = validateHttpUrl(url, { allowPrivate: true });
6156
+ if (!finalSafety.ok) {
6157
+ return { url, title: result.result.value, snapshot: "", error: `Blocked final URL: ${finalSafety.error}` };
6158
+ }
6159
+ const title = result.result.value;
6160
+ const snapshot = await this.snapshot({ full: false });
6161
+ return { ...snapshot, url, title };
5977
6162
  }
5978
6163
  async click(options) {
5979
6164
  await this.ensureBrowser();
5980
6165
  const { Runtime } = this.page;
5981
- const findExpr = options.selector ? `document.querySelector('${escapeForJs(options.selector)}')` : options.text ? `(() => {
6166
+ const resolvedSelector = options.ref ? this.resolveRef(options.ref, options.snapshotId) : options.selector;
6167
+ if (options.ref && !resolvedSelector) {
6168
+ return { success: false, element: `stale or unknown ref ${options.ref}; call browser_snapshot again` };
6169
+ }
6170
+ const findExpr = resolvedSelector ? `document.querySelector('${escapeForJs(resolvedSelector)}')` : options.text ? `(() => {
5982
6171
  const isVis = (e) => {
5983
6172
  if (!e.offsetParent && e.tagName !== 'BODY' && e.tagName !== 'HTML') return false;
5984
6173
  const s = getComputedStyle(e);
@@ -6102,9 +6291,13 @@ var init_browser = __esm({
6102
6291
  async type(options) {
6103
6292
  await this.ensureBrowser();
6104
6293
  const { Runtime } = this.page;
6294
+ const resolvedSelector = options.ref ? this.resolveRef(options.ref, options.snapshotId) : options.selector;
6295
+ if (!resolvedSelector) {
6296
+ return { success: false, error: options.ref ? `stale or unknown ref ${options.ref}; call browser_snapshot again` : "No selector or ref provided" };
6297
+ }
6105
6298
  const script = `
6106
6299
  (() => {
6107
- const el = document.querySelector('${escapeForJs(options.selector)}');
6300
+ const el = document.querySelector('${escapeForJs(resolvedSelector)}');
6108
6301
  if (el) {
6109
6302
  ${options.clear ? "el.value = '';" : ""}
6110
6303
  el.value = '${escapeForJs(options.text)}';
@@ -6186,6 +6379,141 @@ var init_browser = __esm({
6186
6379
  const result = await Runtime.evaluate({ expression: script, returnByValue: true });
6187
6380
  return { content: result.result.value };
6188
6381
  }
6382
+ async snapshot(options) {
6383
+ await this.ensureBrowser();
6384
+ const { Runtime } = this.page;
6385
+ const scope = options?.selector ? `'${escapeForJs(options.selector)}'` : `'body'`;
6386
+ const maxElements = Math.min(Math.max(options?.maxElements ?? 80, 1), 200);
6387
+ const textLimit = options?.full ? 8e3 : 1200;
6388
+ const script = `(() => {
6389
+ const root = document.querySelector(${scope});
6390
+ const cssEscape = (value) => globalThis.CSS?.escape
6391
+ ? globalThis.CSS.escape(String(value))
6392
+ : String(value).replace(/[^a-zA-Z0-9_-]/g, ch => '\\\\' + ch.charCodeAt(0).toString(16) + ' ');
6393
+ const selectorFor = (el) => {
6394
+ if (el.id) return '#' + cssEscape(el.id);
6395
+ const parts = [];
6396
+ let cur = el;
6397
+ while (cur && cur.nodeType === 1 && cur !== document.body) {
6398
+ let part = cur.tagName.toLowerCase();
6399
+ const parent = cur.parentElement;
6400
+ if (!parent) break;
6401
+ const same = Array.from(parent.children).filter(child => child.tagName === cur.tagName);
6402
+ if (same.length > 1) part += ':nth-of-type(' + (same.indexOf(cur) + 1) + ')';
6403
+ parts.unshift(part);
6404
+ cur = parent;
6405
+ }
6406
+ return parts.length ? parts.join(' > ') : 'body';
6407
+ };
6408
+ const isVisible = (el) => {
6409
+ const rect = el.getBoundingClientRect();
6410
+ const style = getComputedStyle(el);
6411
+ return rect.width > 0 && rect.height > 0 && style.display !== 'none' && style.visibility !== 'hidden' && style.opacity !== '0';
6412
+ };
6413
+ const disabled = (el) => Boolean(el.disabled || el.getAttribute('aria-disabled') === 'true');
6414
+ if (!root) return JSON.stringify({ url: location.href, title: document.title, elements: [], text: '' });
6415
+ const nodes = Array.from(root.querySelectorAll('button, a[href], input, select, textarea, summary, [role="button"], [role="link"], [role="textbox"], [onclick], [tabindex]'));
6416
+ const elements = [];
6417
+ for (const el of nodes) {
6418
+ if (elements.length >= ${maxElements}) break;
6419
+ const tag = el.tagName.toLowerCase();
6420
+ const type = el.getAttribute('type') || '';
6421
+ const role = el.getAttribute('role') || (type ? tag + '[' + type + ']' : tag);
6422
+ const text = (el.innerText || el.textContent || '').replace(/s+/g, ' ').trim().slice(0, 120);
6423
+ const label = (el.getAttribute('aria-label') || el.getAttribute('title') || el.getAttribute('placeholder') || text || el.getAttribute('value') || '').replace(/s+/g, ' ').trim().slice(0, 120);
6424
+ elements.push({
6425
+ ref: '@e' + (elements.length + 1),
6426
+ selector: selectorFor(el),
6427
+ role,
6428
+ tag,
6429
+ label,
6430
+ text,
6431
+ visible: isVisible(el),
6432
+ disabled: disabled(el),
6433
+ });
6434
+ }
6435
+ const pageText = (root.innerText || root.textContent || '').replace(/s+/g, ' ').trim().slice(0, ${textLimit});
6436
+ return JSON.stringify({ url: location.href, title: document.title, elements, text: pageText });
6437
+ })()`;
6438
+ const result = await Runtime.evaluate({ expression: script, returnByValue: true });
6439
+ const parsed = JSON.parse(result.result.value || "{}");
6440
+ const snapshotId = `snap_${Date.now().toString(36)}_${Math.random().toString(36).slice(2, 8)}`;
6441
+ const elements = parsed.elements ?? [];
6442
+ const refs = {};
6443
+ this.currentRefs.clear();
6444
+ for (const element of elements) {
6445
+ refs[element.ref] = element.selector;
6446
+ this.currentRefs.set(element.ref, element.selector);
6447
+ }
6448
+ this.currentSnapshotId = snapshotId;
6449
+ const lines = [
6450
+ `URL: ${parsed.url ?? ""}`,
6451
+ `Title: ${parsed.title ?? ""}`,
6452
+ `Snapshot: ${snapshotId}`,
6453
+ "",
6454
+ `Interactive elements (${elements.length}):`,
6455
+ ...elements.map((element) => `${element.ref} <${element.role}>${element.visible ? "" : " [hidden]"}${element.disabled ? " [disabled]" : ""} ${JSON.stringify(element.label || element.text || element.selector)}`),
6456
+ "",
6457
+ "Visible text:",
6458
+ parsed.text ?? ""
6459
+ ];
6460
+ return {
6461
+ url: parsed.url ?? "",
6462
+ title: parsed.title ?? "",
6463
+ snapshot: lines.join("\n").trim(),
6464
+ snapshot_id: snapshotId,
6465
+ refs,
6466
+ elements,
6467
+ text: parsed.text ?? "",
6468
+ element_count: elements.length
6469
+ };
6470
+ }
6471
+ resolveRef(ref, snapshotId) {
6472
+ if (snapshotId && this.currentSnapshotId && snapshotId !== this.currentSnapshotId)
6473
+ return void 0;
6474
+ return this.currentRefs.get(ref);
6475
+ }
6476
+ async back() {
6477
+ await this.ensureBrowser();
6478
+ const { Page, Runtime } = this.page;
6479
+ const history = await Page.getNavigationHistory();
6480
+ if (history.currentIndex > 0) {
6481
+ await Page.navigateToHistoryEntry({ entryId: history.entries[history.currentIndex - 1].id });
6482
+ await Page.loadEventFired().catch(() => void 0);
6483
+ }
6484
+ const title = await Runtime.evaluate({ expression: "document.title", returnByValue: true });
6485
+ return { url: await this.getCurrentUrl(), title: title.result.value };
6486
+ }
6487
+ async press(key) {
6488
+ await this.sendKey(key);
6489
+ return { success: true, key };
6490
+ }
6491
+ async getImages() {
6492
+ await this.ensureBrowser();
6493
+ const { Runtime } = this.page;
6494
+ const script = `JSON.stringify(Array.from(document.images).slice(0, 100).map(img => {
6495
+ const rect = img.getBoundingClientRect();
6496
+ return { src: img.currentSrc || img.src, alt: img.alt || '', width: img.naturalWidth || rect.width || 0, height: img.naturalHeight || rect.height || 0, visible: rect.width > 0 && rect.height > 0 };
6497
+ }).filter(img => img.src && !img.src.startsWith('data:')))`;
6498
+ const result = await Runtime.evaluate({ expression: script, returnByValue: true });
6499
+ const images = JSON.parse(result.result.value || "[]");
6500
+ return { images, count: images.length };
6501
+ }
6502
+ async consoleMessages(clear2 = false) {
6503
+ const consoleMessages = [...this.consoleBuffer];
6504
+ const jsErrors = [...this.jsErrorBuffer];
6505
+ if (clear2) {
6506
+ this.consoleBuffer = [];
6507
+ this.jsErrorBuffer = [];
6508
+ }
6509
+ return {
6510
+ console_messages: consoleMessages,
6511
+ js_errors: jsErrors,
6512
+ total_messages: consoleMessages.length,
6513
+ total_errors: jsErrors.length,
6514
+ note: clear2 ? "Returned and cleared buffered console messages." : "Returned buffered console messages."
6515
+ };
6516
+ }
6189
6517
  async evaluate(script) {
6190
6518
  await this.ensureBrowser();
6191
6519
  const { Runtime } = this.page;
@@ -6620,18 +6948,48 @@ function createBrowseTools(config, sharedBrowser) {
6620
6948
  required: ["url"]
6621
6949
  },
6622
6950
  execute: /* @__PURE__ */ __name(async (input) => {
6951
+ const safety = validateHttpUrl(input.url, { allowPrivate: true });
6952
+ if (!safety.ok)
6953
+ return { error: safety.error, url: input.url };
6623
6954
  return await browser.navigate({
6624
6955
  url: input.url,
6625
6956
  waitFor: input.wait_for ?? "load"
6626
6957
  });
6627
6958
  }, "execute")
6628
6959
  },
6960
+ {
6961
+ name: "browser_snapshot",
6962
+ description: "Get a compact snapshot of the current page with stable refs like @e1 for browser_click and browser_type. Use after navigation or after interactions that changed the page.",
6963
+ parameters: {
6964
+ type: "object",
6965
+ properties: {
6966
+ full: { type: "boolean", description: "Include more visible page text (default false)", default: false },
6967
+ selector: { type: "string", description: "Optional CSS selector to scope the snapshot" },
6968
+ max_elements: { type: "number", description: "Maximum interactive elements to include (default 80, max 200)" }
6969
+ }
6970
+ },
6971
+ execute: /* @__PURE__ */ __name(async (input) => {
6972
+ return await browser.snapshot({
6973
+ full: input.full === true,
6974
+ selector: input.selector,
6975
+ maxElements: input.max_elements
6976
+ });
6977
+ }, "execute")
6978
+ },
6629
6979
  {
6630
6980
  name: "browser_click",
6631
- description: "Click an element on the page",
6981
+ description: "Click an element on the page. Prefer ref from browser_snapshot (e.g. @e5). Legacy selector/text targeting is still supported.",
6632
6982
  parameters: {
6633
6983
  type: "object",
6634
6984
  properties: {
6985
+ ref: {
6986
+ type: "string",
6987
+ description: "Element ref from browser_snapshot, e.g. @e5"
6988
+ },
6989
+ snapshot_id: {
6990
+ type: "string",
6991
+ description: "Optional snapshot_id associated with the ref. If stale, refresh with browser_snapshot."
6992
+ },
6635
6993
  selector: {
6636
6994
  type: "string",
6637
6995
  description: "CSS selector of element to click"
@@ -6644,6 +7002,8 @@ function createBrowseTools(config, sharedBrowser) {
6644
7002
  },
6645
7003
  execute: /* @__PURE__ */ __name(async (input) => {
6646
7004
  return await browser.click({
7005
+ ref: input.ref,
7006
+ snapshotId: input.snapshot_id,
6647
7007
  selector: input.selector,
6648
7008
  text: input.text
6649
7009
  });
@@ -6651,10 +7011,18 @@ function createBrowseTools(config, sharedBrowser) {
6651
7011
  },
6652
7012
  {
6653
7013
  name: "browser_type",
6654
- description: "Type text into an input element",
7014
+ description: "Type text into an input element. Prefer ref from browser_snapshot (e.g. @e3). Legacy selector targeting is still supported.",
6655
7015
  parameters: {
6656
7016
  type: "object",
6657
7017
  properties: {
7018
+ ref: {
7019
+ type: "string",
7020
+ description: "Element ref from browser_snapshot, e.g. @e3"
7021
+ },
7022
+ snapshot_id: {
7023
+ type: "string",
7024
+ description: "Optional snapshot_id associated with the ref. If stale, refresh with browser_snapshot."
7025
+ },
6658
7026
  selector: {
6659
7027
  type: "string",
6660
7028
  description: "CSS selector of input element"
@@ -6674,10 +7042,12 @@ function createBrowseTools(config, sharedBrowser) {
6674
7042
  default: false
6675
7043
  }
6676
7044
  },
6677
- required: ["selector", "text"]
7045
+ required: ["text"]
6678
7046
  },
6679
7047
  execute: /* @__PURE__ */ __name(async (input) => {
6680
7048
  return await browser.type({
7049
+ ref: input.ref,
7050
+ snapshotId: input.snapshot_id,
6681
7051
  selector: input.selector,
6682
7052
  text: input.text,
6683
7053
  clear: input.clear ?? true,
@@ -6685,6 +7055,24 @@ function createBrowseTools(config, sharedBrowser) {
6685
7055
  });
6686
7056
  }, "execute")
6687
7057
  },
7058
+ {
7059
+ name: "browser_back",
7060
+ description: "Navigate back to the previous page in browser history.",
7061
+ parameters: { type: "object", properties: {} },
7062
+ execute: /* @__PURE__ */ __name(async () => await browser.back(), "execute")
7063
+ },
7064
+ {
7065
+ name: "browser_press",
7066
+ description: "Press a keyboard key in the browser, e.g. Enter, Tab, Escape, ArrowDown.",
7067
+ parameters: {
7068
+ type: "object",
7069
+ properties: {
7070
+ key: { type: "string", description: "Key to press, e.g. Enter, Tab, Escape, ArrowDown" }
7071
+ },
7072
+ required: ["key"]
7073
+ },
7074
+ execute: /* @__PURE__ */ __name(async (input) => await browser.press(input.key), "execute")
7075
+ },
6688
7076
  {
6689
7077
  name: "browser_extract",
6690
7078
  description: "Extract text content from the page",
@@ -6729,6 +7117,51 @@ function createBrowseTools(config, sharedBrowser) {
6729
7117
  return await browser.evaluate(input.script);
6730
7118
  }, "execute")
6731
7119
  },
7120
+ {
7121
+ name: "browser_console",
7122
+ description: "Read browser console logs and JavaScript errors. This tool is read-only; use browser_evaluate for JavaScript execution.",
7123
+ parameters: {
7124
+ type: "object",
7125
+ properties: {
7126
+ clear: { type: "boolean", description: "Clear buffered logs after reading, if buffering is active", default: false }
7127
+ }
7128
+ },
7129
+ execute: /* @__PURE__ */ __name(async (input) => await browser.consoleMessages(input.clear === true), "execute")
7130
+ },
7131
+ {
7132
+ name: "browser_get_images",
7133
+ description: "List images on the current page with URLs, alt text, dimensions, and visibility. Use before visual analysis or downloading images.",
7134
+ parameters: { type: "object", properties: {} },
7135
+ execute: /* @__PURE__ */ __name(async () => await browser.getImages(), "execute")
7136
+ },
7137
+ {
7138
+ name: "browser_vision",
7139
+ description: "Take a browser screenshot for visual inspection. Returns a screenshot path plus page metadata. Use for visual layouts, CAPTCHAs, screenshots, or image-heavy pages.",
7140
+ parameters: {
7141
+ type: "object",
7142
+ properties: {
7143
+ question: { type: "string", description: "What you want to inspect visually" },
7144
+ annotate: { type: "boolean", description: "Reserved for future ref overlays; currently returns a normal screenshot", default: false }
7145
+ }
7146
+ },
7147
+ execute: /* @__PURE__ */ __name(async (input) => {
7148
+ const { join: join47 } = await import("node:path");
7149
+ const { tmpdir: tmpdir13 } = await import("node:os");
7150
+ const { randomUUID: randomUUID14 } = await import("node:crypto");
7151
+ const screenshotPath = join47(tmpdir13(), `openjaw-browser-${randomUUID14().slice(0, 8)}.png`);
7152
+ const screenshot = await browser.screenshot({ fullPage: false, path: screenshotPath });
7153
+ const snapshot = await browser.snapshot({ full: false });
7154
+ return {
7155
+ message: "Browser screenshot captured for visual inspection",
7156
+ question: input.question,
7157
+ snapshot_id: snapshot.snapshot_id,
7158
+ url: snapshot.url,
7159
+ title: snapshot.title,
7160
+ screenshotPath: screenshot.path ?? screenshotPath,
7161
+ imagePayload: "not_attached: screenshot path returned to avoid sending images to non-vision providers"
7162
+ };
7163
+ }, "execute")
7164
+ },
6732
7165
  {
6733
7166
  name: "browser_wait",
6734
7167
  description: "Wait for an element to appear, text to change, or element to hide. Use before interacting with dynamic/slow-loading pages.",
@@ -6814,6 +7247,7 @@ var init_browse = __esm({
6814
7247
  "../openjaw-mcp/dist/tools/browse.js"() {
6815
7248
  "use strict";
6816
7249
  init_browser();
7250
+ init_url_safety();
6817
7251
  __name(createBrowseTools, "createBrowseTools");
6818
7252
  }
6819
7253
  });
@@ -15745,6 +16179,175 @@ function urlMatchesDomain(url, domain) {
15745
16179
  return false;
15746
16180
  }
15747
16181
  }
16182
+ function normalizeSearchResults(results, limit) {
16183
+ return results.slice(0, limit).map((result, index) => ({
16184
+ ...result,
16185
+ snippet: result.snippet ?? "",
16186
+ description: result.snippet ?? "",
16187
+ position: index + 1
16188
+ }));
16189
+ }
16190
+ function clampNumber(value, fallback, min, max) {
16191
+ const numberValue = typeof value === "number" ? value : Number(value);
16192
+ if (!Number.isFinite(numberValue))
16193
+ return fallback;
16194
+ return Math.min(Math.max(Math.trunc(numberValue), min), max);
16195
+ }
16196
+ function extractHtmlTitle(html) {
16197
+ const match = /<title[^>]*>([\s\S]*?)<\/title>/i.exec(html);
16198
+ if (!match)
16199
+ return void 0;
16200
+ return DECODE_ENTITIES(match[1].replace(/\s+/g, " ").trim()) || void 0;
16201
+ }
16202
+ function cleanHtmlForMarkdown(html) {
16203
+ return html.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, "").replace(/<style[^>]*>[\s\S]*?<\/style>/gi, "").replace(/<noscript[^>]*>[\s\S]*?<\/noscript>/gi, "").replace(/<svg[^>]*>[\s\S]*?<\/svg>/gi, "").replace(/<nav[^>]*>[\s\S]*?<\/nav>/gi, "").replace(/<footer[^>]*>[\s\S]*?<\/footer>/gi, "").replace(/<header[^>]*>[\s\S]*?<\/header>/gi, "");
16204
+ }
16205
+ function htmlToText(html) {
16206
+ return DECODE_ENTITIES(cleanHtmlForMarkdown(html).replace(/<br\s*\/?>/gi, "\n").replace(/<\/p>/gi, "\n\n").replace(/<\/h[1-6]>/gi, "\n\n").replace(/<[^>]+>/g, " ").replace(/[ \t]+/g, " ").replace(/\n\s+/g, "\n").replace(/\n{3,}/g, "\n\n").trim());
16207
+ }
16208
+ function isPdfLike(url, contentType) {
16209
+ if (/application\/pdf/i.test(contentType))
16210
+ return true;
16211
+ try {
16212
+ return new URL(url).pathname.toLowerCase().endsWith(".pdf");
16213
+ } catch {
16214
+ return /\.pdf(?:$|[?#])/i.test(url);
16215
+ }
16216
+ }
16217
+ async function htmlToMarkdown(html) {
16218
+ try {
16219
+ const TurndownModule = await import("turndown");
16220
+ const TurndownService = TurndownModule.default || TurndownModule;
16221
+ const turndown = new TurndownService({ headingStyle: "atx", codeBlockStyle: "fenced" });
16222
+ return turndown.turndown(cleanHtmlForMarkdown(html));
16223
+ } catch {
16224
+ return cleanHtmlForMarkdown(html).replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim();
16225
+ }
16226
+ }
16227
+ function paginateContent(text, startIndex, maxLength) {
16228
+ const safeStart = clampNumber(startIndex, 0, 0, Math.max(0, text.length));
16229
+ const safeMax = clampNumber(maxLength, 8e3, 1, 1e5);
16230
+ const content = text.slice(safeStart, safeStart + safeMax);
16231
+ const nextStartIndex = safeStart + content.length < text.length ? safeStart + content.length : void 0;
16232
+ return {
16233
+ content,
16234
+ contentLength: text.length,
16235
+ truncated: nextStartIndex !== void 0,
16236
+ startIndex: safeStart,
16237
+ ...nextStartIndex !== void 0 && { nextStartIndex }
16238
+ };
16239
+ }
16240
+ async function fetchWithValidatedRedirects(startUrl, options) {
16241
+ let current = startUrl;
16242
+ const maxRedirects = options.maxRedirects ?? 10;
16243
+ for (let redirects = 0; redirects <= maxRedirects; redirects++) {
16244
+ const response = await fetch(current.href, {
16245
+ signal: options.signal,
16246
+ headers: options.headers,
16247
+ redirect: "manual"
16248
+ });
16249
+ if (response.status < 300 || response.status >= 400) {
16250
+ return { response, finalUrl: current.href };
16251
+ }
16252
+ const location = response.headers.get("location");
16253
+ if (!location) {
16254
+ return { response, finalUrl: current.href };
16255
+ }
16256
+ const nextUrl = new URL(location, current.href);
16257
+ const nextSafety = validateHttpUrl(nextUrl.href, { allowPrivate: options.allowPrivate });
16258
+ if (!nextSafety.ok) {
16259
+ return { finalUrl: nextUrl.href, error: `Blocked redirect: ${nextSafety.error}` };
16260
+ }
16261
+ current = nextSafety.url;
16262
+ }
16263
+ return { finalUrl: current.href, error: `Too many redirects (>${maxRedirects})` };
16264
+ }
16265
+ async function extractUrlContent(inputUrl, options) {
16266
+ const initial = validateHttpUrl(inputUrl, { allowPrivate: options.allowPrivate });
16267
+ if (!initial.ok) {
16268
+ return {
16269
+ url: inputUrl,
16270
+ finalUrl: inputUrl,
16271
+ contentType: "",
16272
+ content: "",
16273
+ contentLength: 0,
16274
+ truncated: false,
16275
+ startIndex: options.startIndex,
16276
+ error: initial.error
16277
+ };
16278
+ }
16279
+ const controller = new AbortController();
16280
+ const timeout = setTimeout(() => controller.abort(), options.timeoutMs ?? 2e4);
16281
+ try {
16282
+ const fetched = await fetchWithValidatedRedirects(initial.url, {
16283
+ signal: controller.signal,
16284
+ headers: { "User-Agent": "OpenJaw-Agent/1.0" },
16285
+ allowPrivate: options.allowPrivate
16286
+ });
16287
+ const finalUrl = fetched.finalUrl;
16288
+ if (fetched.error || !fetched.response) {
16289
+ return {
16290
+ url: inputUrl,
16291
+ finalUrl,
16292
+ contentType: "",
16293
+ content: "",
16294
+ contentLength: 0,
16295
+ truncated: false,
16296
+ startIndex: options.startIndex,
16297
+ error: fetched.error ?? "Fetch failed before response"
16298
+ };
16299
+ }
16300
+ const response = fetched.response;
16301
+ if (!response.ok) {
16302
+ return {
16303
+ url: inputUrl,
16304
+ finalUrl,
16305
+ contentType: response.headers.get("content-type") || "",
16306
+ content: "",
16307
+ contentLength: 0,
16308
+ truncated: false,
16309
+ startIndex: options.startIndex,
16310
+ error: `HTTP ${response.status}: ${response.statusText}`
16311
+ };
16312
+ }
16313
+ const contentType = response.headers.get("content-type") || "";
16314
+ if (isPdfLike(finalUrl, contentType)) {
16315
+ return {
16316
+ url: inputUrl,
16317
+ finalUrl,
16318
+ contentType,
16319
+ content: "",
16320
+ contentLength: 0,
16321
+ truncated: false,
16322
+ startIndex: options.startIndex,
16323
+ error: "PDF extraction is not yet supported"
16324
+ };
16325
+ }
16326
+ const rawText = await response.text();
16327
+ const title = contentType.includes("html") ? extractHtmlTitle(rawText) : void 0;
16328
+ const text = contentType.includes("html") ? options.format === "text" ? htmlToText(rawText) : await htmlToMarkdown(rawText) : rawText;
16329
+ return {
16330
+ url: inputUrl,
16331
+ finalUrl,
16332
+ title,
16333
+ contentType,
16334
+ ...paginateContent(text, options.startIndex, options.maxLength)
16335
+ };
16336
+ } catch (err) {
16337
+ return {
16338
+ url: inputUrl,
16339
+ finalUrl: inputUrl,
16340
+ contentType: "",
16341
+ content: "",
16342
+ contentLength: 0,
16343
+ truncated: false,
16344
+ startIndex: options.startIndex,
16345
+ error: `Fetch failed: ${err instanceof Error ? err.message : String(err)}`
16346
+ };
16347
+ } finally {
16348
+ clearTimeout(timeout);
16349
+ }
16350
+ }
15748
16351
  function createShellTools(_config, hooks) {
15749
16352
  return [
15750
16353
  {
@@ -16011,58 +16614,86 @@ function createShellTools(_config, hooks) {
16011
16614
  // ─── Web Fetch tool (headless URL content extraction) ───
16012
16615
  {
16013
16616
  name: "web_fetch",
16014
- description: "Fetch content from a URL and return it as text. Use this for quick content retrieval without opening a browser. Supports HTML pages (converted to markdown), JSON APIs, and plain text.",
16617
+ description: "Fetch raw content from a URL. Use for APIs, JSON, plain text, and small pages. For articles/docs/pages that need cleanup, prefer web_extract.",
16015
16618
  parameters: {
16016
16619
  type: "object",
16017
16620
  properties: {
16018
16621
  url: { type: "string", description: "The URL to fetch" },
16019
16622
  max_length: { type: "number", description: "Maximum characters to return (default: 5000)" },
16020
- start_index: { type: "number", description: "Start index for content pagination (default: 0). Use to continue reading truncated content." }
16623
+ start_index: { type: "number", description: "Start index for content pagination (default: 0). Use to continue reading truncated content." },
16624
+ allow_private: { type: "boolean", description: "Allow private, loopback, or internal network URLs. Default false." }
16021
16625
  },
16022
16626
  required: ["url"]
16023
16627
  },
16024
16628
  execute: /* @__PURE__ */ __name(async (input) => {
16025
16629
  const url = input.url;
16026
- const maxLength = input.max_length || 5e3;
16027
- const startIndex = input.start_index || 0;
16028
- try {
16029
- const controller = new AbortController();
16030
- const timeout = setTimeout(() => controller.abort(), 15e3);
16031
- const response = await fetch(url, {
16032
- signal: controller.signal,
16033
- headers: { "User-Agent": "OpenJaw-Agent/1.0" }
16034
- });
16035
- clearTimeout(timeout);
16036
- if (!response.ok) {
16037
- return { error: `HTTP ${response.status}: ${response.statusText}`, url };
16038
- }
16039
- const contentType = response.headers.get("content-type") || "";
16040
- let text = await response.text();
16041
- if (contentType.includes("html")) {
16042
- try {
16043
- const TurndownModule = await import("turndown");
16044
- const TurndownService = TurndownModule.default || TurndownModule;
16045
- const turndown = new TurndownService({
16046
- headingStyle: "atx",
16047
- codeBlockStyle: "fenced"
16048
- });
16049
- const cleaned = text.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, "").replace(/<style[^>]*>[\s\S]*?<\/style>/gi, "").replace(/<nav[^>]*>[\s\S]*?<\/nav>/gi, "").replace(/<footer[^>]*>[\s\S]*?<\/footer>/gi, "");
16050
- text = turndown.turndown(cleaned);
16051
- } catch {
16052
- text = text.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, "").replace(/<style[^>]*>[\s\S]*?<\/style>/gi, "").replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim();
16053
- }
16054
- }
16055
- if (startIndex > 0) {
16056
- text = text.slice(startIndex);
16057
- }
16058
- if (text.length > maxLength) {
16059
- text = text.slice(0, maxLength);
16060
- return { url, contentType, length: text.length, content: text, truncated: true, nextIndex: startIndex + maxLength, hint: `Content truncated. Use start_index: ${startIndex + maxLength} to continue reading.` };
16630
+ const maxLength = clampNumber(input.max_length, 5e3, 1, 1e5);
16631
+ const startIndex = clampNumber(input.start_index, 0, 0, Number.MAX_SAFE_INTEGER);
16632
+ const allowPrivate = input.allow_private === true;
16633
+ const extracted = await extractUrlContent(url, { maxLength, startIndex, allowPrivate, timeoutMs: 15e3, format: "markdown" });
16634
+ if (extracted.error)
16635
+ return { error: extracted.error, url: extracted.url, finalUrl: extracted.finalUrl };
16636
+ return {
16637
+ url: extracted.url,
16638
+ finalUrl: extracted.finalUrl,
16639
+ contentType: extracted.contentType,
16640
+ title: extracted.title,
16641
+ length: extracted.content.length,
16642
+ contentLength: extracted.contentLength,
16643
+ content: extracted.content,
16644
+ truncated: extracted.truncated,
16645
+ ...extracted.nextStartIndex !== void 0 && {
16646
+ nextIndex: extracted.nextStartIndex,
16647
+ nextStartIndex: extracted.nextStartIndex,
16648
+ hint: `Content truncated. Use start_index: ${extracted.nextStartIndex} to continue reading.`
16061
16649
  }
16062
- return { url, contentType, length: text.length, content: text };
16063
- } catch (err) {
16064
- return { error: `Fetch failed: ${err instanceof Error ? err.message : String(err)}`, url };
16065
- }
16650
+ };
16651
+ }, "execute")
16652
+ },
16653
+ // ─── Web Extract tool (selected-source reading) ───
16654
+ {
16655
+ name: "web_extract",
16656
+ description: "Extract readable content from selected web page URLs. Use after web_search when you need to read source pages, articles, docs, or papers. Returns markdown/text with metadata and pagination. For raw APIs or plain JSON, web_fetch is lighter.",
16657
+ parameters: {
16658
+ type: "object",
16659
+ properties: {
16660
+ urls: { type: "array", items: { type: "string" }, description: "List of URLs to extract content from (max 5)" },
16661
+ format: { type: "string", enum: ["markdown", "text"], description: "Output format. HTML is converted to markdown by default.", default: "markdown" },
16662
+ max_length: { type: "number", description: "Maximum characters per URL to return (default: 8000, max: 100000)" },
16663
+ start_index: { type: "number", description: "Start index for content pagination (default: 0). Applies to each URL." },
16664
+ allow_private: { type: "boolean", description: "Allow private, loopback, or internal network URLs. Default false." },
16665
+ use_llm_processing: { type: "boolean", description: "Reserved for future LLM summarization. Currently deterministic extraction only." }
16666
+ },
16667
+ required: ["urls"]
16668
+ },
16669
+ execute: /* @__PURE__ */ __name(async (input) => {
16670
+ const rawUrls = Array.isArray(input.urls) ? input.urls.filter((url) => typeof url === "string") : [];
16671
+ const urls = rawUrls.slice(0, 5);
16672
+ if (urls.length === 0)
16673
+ return { error: "web_extract requires at least one URL", results: [] };
16674
+ const maxLength = clampNumber(input.max_length, 8e3, 1, 1e5);
16675
+ const startIndex = clampNumber(input.start_index, 0, 0, Number.MAX_SAFE_INTEGER);
16676
+ const allowPrivate = input.allow_private === true;
16677
+ const format2 = input.format === "text" ? "text" : "markdown";
16678
+ const results = await Promise.all(urls.map((url) => extractUrlContent(url, { maxLength, startIndex, allowPrivate, format: format2 })));
16679
+ return {
16680
+ results: results.map((result) => ({
16681
+ url: result.url,
16682
+ finalUrl: result.finalUrl,
16683
+ title: result.title,
16684
+ contentType: result.contentType,
16685
+ format: format2,
16686
+ content: result.content,
16687
+ contentLength: result.contentLength,
16688
+ truncated: result.truncated,
16689
+ startIndex: result.startIndex,
16690
+ ...result.nextStartIndex !== void 0 && { nextStartIndex: result.nextStartIndex },
16691
+ ...result.error && { error: result.error }
16692
+ })),
16693
+ count: results.length,
16694
+ ...rawUrls.length > urls.length && { warning: `Only the first ${urls.length} URLs were extracted (max 5 per call).` },
16695
+ ...input.use_llm_processing === true && { llmProcessing: "unavailable: deterministic extraction only in this build" }
16696
+ };
16066
16697
  }, "execute")
16067
16698
  },
16068
16699
  // ─── Sleep tool ───
@@ -16157,12 +16788,14 @@ function createShellTools(_config, hooks) {
16157
16788
  signal: controller.signal
16158
16789
  });
16159
16790
  if (native && Array.isArray(native.results) && native.results.length > 0) {
16791
+ const results = normalizeSearchResults(native.results, maxResults);
16160
16792
  return {
16161
16793
  query,
16162
- resultCount: native.results.length,
16163
- results: native.results,
16794
+ resultCount: results.length,
16795
+ results,
16164
16796
  summary: native.summary,
16165
16797
  provider: native.provider,
16798
+ backend: native.provider,
16166
16799
  durationSeconds: native.durationSeconds
16167
16800
  };
16168
16801
  }
@@ -16213,11 +16846,13 @@ function createShellTools(_config, hooks) {
16213
16846
  results = results.filter((r) => !blockedDomains.some((d) => urlMatchesDomain(r.url, d)));
16214
16847
  }
16215
16848
  if (results.length > 0) {
16849
+ const normalizedResults = normalizeSearchResults(results, maxResults);
16216
16850
  return {
16217
16851
  query,
16218
- resultCount: results.length,
16219
- results,
16852
+ resultCount: normalizedResults.length,
16853
+ results: normalizedResults,
16220
16854
  provider: `duckduckgo-${ep.kind}`,
16855
+ backend: `duckduckgo-${ep.kind}`,
16221
16856
  ...nativeError ? { nativeSearchError: nativeError } : {}
16222
16857
  };
16223
16858
  }
@@ -16282,12 +16917,23 @@ var init_shell = __esm({
16282
16917
  "../openjaw-mcp/dist/tools/shell.js"() {
16283
16918
  "use strict";
16284
16919
  init_web_search_types();
16920
+ init_url_safety();
16285
16921
  __name(truncateOutput, "truncateOutput");
16286
16922
  DECODE_ENTITIES = /* @__PURE__ */ __name((s) => s.replace(/&amp;/g, "&").replace(/&nbsp;/g, " ").replace(/&#x27;/g, "'").replace(/&quot;/g, '"').replace(/&lt;/g, "<").replace(/&gt;/g, ">"), "DECODE_ENTITIES");
16287
16923
  __name(parseLiteResults, "parseLiteResults");
16288
16924
  __name(parseHtmlResults, "parseHtmlResults");
16289
16925
  __name(isAnomalyPage, "isAnomalyPage");
16290
16926
  __name(urlMatchesDomain, "urlMatchesDomain");
16927
+ __name(normalizeSearchResults, "normalizeSearchResults");
16928
+ __name(clampNumber, "clampNumber");
16929
+ __name(extractHtmlTitle, "extractHtmlTitle");
16930
+ __name(cleanHtmlForMarkdown, "cleanHtmlForMarkdown");
16931
+ __name(htmlToText, "htmlToText");
16932
+ __name(isPdfLike, "isPdfLike");
16933
+ __name(htmlToMarkdown, "htmlToMarkdown");
16934
+ __name(paginateContent, "paginateContent");
16935
+ __name(fetchWithValidatedRedirects, "fetchWithValidatedRedirects");
16936
+ __name(extractUrlContent, "extractUrlContent");
16291
16937
  __name(createShellTools, "createShellTools");
16292
16938
  }
16293
16939
  });
@@ -19662,6 +20308,7 @@ var init_categories = __esm({
19662
20308
  ["clipboard_", "system"],
19663
20309
  ["web_fetch", "system"],
19664
20310
  ["web_search", "system"],
20311
+ ["web_extract", "system"],
19665
20312
  ["notify", "system"],
19666
20313
  ["sleep", "system"],
19667
20314
  ["ask_user", "system"],
@@ -20243,6 +20890,8 @@ function parseSkillFile(content, filename) {
20243
20890
  whenToUse: meta.whenToUse || meta.when_to_use,
20244
20891
  tools: Array.isArray(meta.tools) ? meta.tools : meta.tools ? parseYamlArray(meta.tools) : void 0,
20245
20892
  model: meta.model,
20893
+ execution: meta.execution === "fork" ? "fork" : meta.execution === "inline" ? "inline" : void 0,
20894
+ timeoutMs: typeof meta.timeoutMs === "number" ? meta.timeoutMs : typeof meta.timeout_ms === "number" ? meta.timeout_ms : void 0,
20246
20895
  version: meta.version,
20247
20896
  author: meta.author,
20248
20897
  platforms: Array.isArray(meta.platforms) ? meta.platforms : void 0,
@@ -20522,8 +21171,10 @@ function getSkillsSection() {
20522
21171
  const content = `# Available Skills
20523
21172
 
20524
21173
  Use the \`invoke_skill\` tool to execute any skill listed below.
20525
- When a skill matches the user's request, invoke it instead of doing the task manually.
20526
- Skills run in an isolated context with their own conversation \u2014 they won't pollute your main conversation.
21174
+ Skills are for specialized, multi-step workflows that materially benefit from the skill's process.
21175
+ Do NOT invoke a skill for simple one-off questions, quick factual/current-info lookups, basic summaries, or tasks a visible built-in/MCP tool can answer in 1-2 calls.
21176
+ For news/latest/current events, use \`web_search\` directly. For selected URLs, use \`web_extract\` directly.
21177
+ When a skill truly matches a complex workflow, invoke it. Most skills load inline into the current turn so you can continue with visible tools. Only skills marked for fork execution run as isolated sub-agents and may take longer than ordinary tools.
20527
21178
 
20528
21179
  To create new skills or improve existing ones, use \`invoke_skill("skill-creator")\`.
20529
21180
  New skills must be saved to \`~/.openjaw-agent/skills/\` (user skills directory).
@@ -26864,10 +27515,38 @@ __export(skill_tool_exports, {
26864
27515
  createSkillTool: () => createSkillTool
26865
27516
  });
26866
27517
  import { readFileSync as readFileSync23, writeFileSync as writeFileSync15 } from "node:fs";
27518
+ function resolveSkillTimeoutMs() {
27519
+ const raw = process.env["OPENJAW_SKILL_TIMEOUT_MS"];
27520
+ const parsed = raw ? Number(raw) : NaN;
27521
+ if (Number.isFinite(parsed) && parsed > 0) {
27522
+ return Math.min(Math.max(Math.floor(parsed), 1e4), 6e5);
27523
+ }
27524
+ return DEFAULT_SKILL_TIMEOUT_MS;
27525
+ }
27526
+ function resolveForkTimeoutMs(skill) {
27527
+ const configured = skill.meta.timeoutMs;
27528
+ if (typeof configured === "number" && Number.isFinite(configured) && configured > 0) {
27529
+ return Math.min(Math.max(Math.floor(configured), 1e4), 6e5);
27530
+ }
27531
+ return resolveSkillTimeoutMs();
27532
+ }
27533
+ async function withTimeout(promise, ms) {
27534
+ let timeoutId;
27535
+ try {
27536
+ return await Promise.race([
27537
+ promise,
27538
+ new Promise((resolve5) => {
27539
+ timeoutId = setTimeout(() => resolve5(SKILL_TIMEOUT), ms);
27540
+ })
27541
+ ]);
27542
+ } finally {
27543
+ if (timeoutId) clearTimeout(timeoutId);
27544
+ }
27545
+ }
26867
27546
  function createSkillTool(config, toolRegistry, systemPromptFn) {
26868
27547
  return {
26869
27548
  name: "invoke_skill",
26870
- description: "Execute a skill from the available skills list. Skills provide specialized multi-step workflows. Use this when the user's request matches a listed skill.",
27549
+ description: "Execute a skill from the available skills list. Skills are for specialized multi-step workflows. Do not use for simple one-off lookups, quick current-news/factual questions, or tasks a direct built-in tool can answer in 1-2 calls.",
26871
27550
  parameters: {
26872
27551
  type: "object",
26873
27552
  properties: {
@@ -26878,6 +27557,11 @@ function createSkillTool(config, toolRegistry, systemPromptFn) {
26878
27557
  args: {
26879
27558
  type: "string",
26880
27559
  description: "Optional arguments or context for the skill"
27560
+ },
27561
+ mode: {
27562
+ type: "string",
27563
+ enum: ["inline", "fork"],
27564
+ description: "Execution mode. Default inline loads instructions into this turn; fork runs an isolated sub-agent for skills that explicitly support long-running execution."
26881
27565
  }
26882
27566
  },
26883
27567
  required: ["skill"]
@@ -26885,6 +27569,7 @@ function createSkillTool(config, toolRegistry, systemPromptFn) {
26885
27569
  execute: /* @__PURE__ */ __name(async (input) => {
26886
27570
  const skillName = input.skill.trim().replace(/^\//, "");
26887
27571
  const args = input.args || "";
27572
+ const requestedMode = input.mode === "fork" ? "fork" : input.mode === "inline" ? "inline" : void 0;
26888
27573
  const skill = findSkill(skillName);
26889
27574
  if (!skill) {
26890
27575
  clearSkillRegistry();
@@ -26892,17 +27577,39 @@ function createSkillTool(config, toolRegistry, systemPromptFn) {
26892
27577
  if (!retrySkill) {
26893
27578
  return { success: false, error: `Skill "${skillName}" not found. Check /skills for available skills.` };
26894
27579
  }
26895
- return await executeSkill(retrySkill.name, args, config, toolRegistry, systemPromptFn);
27580
+ return await executeSkill(retrySkill, args, requestedMode, config, toolRegistry, systemPromptFn);
26896
27581
  }
26897
- return await executeSkill(skill.name, args, config, toolRegistry, systemPromptFn);
27582
+ return await executeSkill(skill, args, requestedMode, config, toolRegistry, systemPromptFn);
26898
27583
  }, "execute")
26899
27584
  };
26900
27585
  }
26901
- async function executeSkill(skillName, args, config, toolRegistry, systemPromptFn) {
27586
+ async function executeSkill(skill, args, requestedMode, config, toolRegistry, systemPromptFn) {
27587
+ const skillName = skill.name;
26902
27588
  const body = loadSkillPrompt(skillName);
26903
27589
  if (!body) {
26904
27590
  return { success: false, error: `Could not load skill content for "${skillName}"` };
26905
27591
  }
27592
+ const mode = requestedMode ?? skill.meta.execution ?? "inline";
27593
+ if (mode === "fork") {
27594
+ return await executeForkedSkill(skill, body, args, config, toolRegistry, systemPromptFn);
27595
+ }
27596
+ return loadInlineSkill(skill, body, args);
27597
+ }
27598
+ function loadInlineSkill(skill, body, args) {
27599
+ const skillBlock = args ? `${body}
27600
+
27601
+ # User instructions for this skill
27602
+ ${args}` : body;
27603
+ return {
27604
+ success: true,
27605
+ skill: skill.name,
27606
+ mode: "inline",
27607
+ message: "Skill instructions loaded inline. Continue the task in the current conversation using these instructions and visible tools.",
27608
+ instructions: skillBlock
27609
+ };
27610
+ }
27611
+ async function executeForkedSkill(skill, body, args, config, toolRegistry, systemPromptFn) {
27612
+ const skillName = skill.name;
26906
27613
  const baseSections = await systemPromptFn();
26907
27614
  const staticSections = baseSections.slice(0, 4).filter(Boolean);
26908
27615
  const skillPrompt = [...staticSections, body].join("\n\n");
@@ -26912,7 +27619,34 @@ async function executeSkill(skillName, args, config, toolRegistry, systemPromptF
26912
27619
  let lastAnswer = "";
26913
27620
  let thinking = "";
26914
27621
  const allChunks = [];
26915
- for await (const chunk of forkLoop.run(userMessage, skillPrompt)) {
27622
+ const timeoutMs = resolveForkTimeoutMs(skill);
27623
+ const deadline = Date.now() + timeoutMs;
27624
+ const iterator = forkLoop.run(userMessage, skillPrompt)[Symbol.asyncIterator]();
27625
+ while (true) {
27626
+ const remaining = deadline - Date.now();
27627
+ if (remaining <= 0) {
27628
+ forkLoop.abort();
27629
+ return {
27630
+ success: false,
27631
+ skill: skillName,
27632
+ timeoutMs,
27633
+ error: `Skill "${skillName}" exceeded ${Math.round(timeoutMs / 1e3)}s. Use direct tools for simple requests, or rerun with narrower instructions.`,
27634
+ partial: lastAnswer || thinking || void 0
27635
+ };
27636
+ }
27637
+ const next = await withTimeout(iterator.next(), remaining);
27638
+ if (next === SKILL_TIMEOUT) {
27639
+ forkLoop.abort();
27640
+ return {
27641
+ success: false,
27642
+ skill: skillName,
27643
+ timeoutMs,
27644
+ error: `Skill "${skillName}" exceeded ${Math.round(timeoutMs / 1e3)}s. Use direct tools for simple requests, or rerun with narrower instructions.`,
27645
+ partial: lastAnswer || thinking || void 0
27646
+ };
27647
+ }
27648
+ if (next.done) break;
27649
+ const chunk = next.value;
26916
27650
  if (chunk.type === "answer" && chunk.content) lastAnswer = chunk.content;
26917
27651
  if (chunk.type === "thinking" && chunk.content) thinking += chunk.content;
26918
27652
  allChunks.push({ type: chunk.type, content: chunk.content });
@@ -26920,14 +27654,14 @@ async function executeSkill(skillName, args, config, toolRegistry, systemPromptF
26920
27654
  const result = lastAnswer || thinking || "Skill completed (no output)";
26921
27655
  const compressed = result.length > 2e3 ? result.slice(0, 2e3) + `
26922
27656
  ...(${result.length} chars total, truncated)` : result;
26923
- const skill = findSkill(skillName);
26924
- if (skill?.filePath && skill.source === "user") {
27657
+ const currentSkill = findSkill(skillName);
27658
+ if (currentSkill?.filePath && currentSkill.source === "user") {
26925
27659
  const lessons = extractLessons(allChunks);
26926
27660
  if (lessons) {
26927
- appendLessonsLearned(skill.filePath, lessons);
27661
+ appendLessonsLearned(currentSkill.filePath, lessons);
26928
27662
  }
26929
27663
  }
26930
- return { success: true, skill: skillName, result: compressed };
27664
+ return { success: true, skill: skillName, mode: "fork", result: compressed };
26931
27665
  } catch (err) {
26932
27666
  return { success: false, skill: skillName, error: err instanceof Error ? err.message : String(err) };
26933
27667
  }
@@ -26968,13 +27702,21 @@ function appendLessonsLearned(skillPath, lessons) {
26968
27702
  } catch {
26969
27703
  }
26970
27704
  }
27705
+ var DEFAULT_SKILL_TIMEOUT_MS, SKILL_TIMEOUT;
26971
27706
  var init_skill_tool = __esm({
26972
27707
  "src/tools/skill-tool.ts"() {
26973
27708
  "use strict";
26974
27709
  init_agent_loop();
26975
27710
  init_registry2();
27711
+ DEFAULT_SKILL_TIMEOUT_MS = 12e4;
27712
+ SKILL_TIMEOUT = /* @__PURE__ */ Symbol("skill-timeout");
27713
+ __name(resolveSkillTimeoutMs, "resolveSkillTimeoutMs");
27714
+ __name(resolveForkTimeoutMs, "resolveForkTimeoutMs");
27715
+ __name(withTimeout, "withTimeout");
26976
27716
  __name(createSkillTool, "createSkillTool");
26977
27717
  __name(executeSkill, "executeSkill");
27718
+ __name(loadInlineSkill, "loadInlineSkill");
27719
+ __name(executeForkedSkill, "executeForkedSkill");
26978
27720
  __name(extractLessons, "extractLessons");
26979
27721
  __name(appendLessonsLearned, "appendLessonsLearned");
26980
27722
  }
@@ -27827,13 +28569,13 @@ var init_feishu = __esm({
27827
28569
  if (stat2.isFile() && stat2.size < 30 * 1024 * 1024) {
27828
28570
  const fileType = this.getFeishuFileType(fileName);
27829
28571
  this.emit({ type: "system", content: `\u{1F4E4} Uploading to Feishu: ${fileName} (${(stat2.size / 1024).toFixed(0)}KB, type=${fileType})` });
27830
- const withTimeout = /* @__PURE__ */ __name((promise, ms, label) => Promise.race([
28572
+ const withTimeout2 = /* @__PURE__ */ __name((promise, ms, label) => Promise.race([
27831
28573
  promise,
27832
28574
  new Promise((_, reject) => setTimeout(() => reject(new Error(`${label} timed out after ${ms / 1e3}s`)), ms))
27833
28575
  ]), "withTimeout");
27834
28576
  const { createReadStream: crs } = await import("node:fs");
27835
28577
  const t0 = Date.now();
27836
- const uploadResp = await withTimeout(
28578
+ const uploadResp = await withTimeout2(
27837
28579
  this.client.im.file.create({
27838
28580
  data: {
27839
28581
  file_type: fileType,
@@ -27851,7 +28593,7 @@ var init_feishu = __esm({
27851
28593
  this.emit({ type: "system", content: `\u{1F4E4} Upload done in ${(uploadMs / 1e3).toFixed(1)}s: code=${respCode}, file_key=${fileKey ? "\u2713" : "\u2717"}` });
27852
28594
  if (fileKey) {
27853
28595
  const t1 = Date.now();
27854
- const sendResp = await withTimeout(
28596
+ const sendResp = await withTimeout2(
27855
28597
  this.client.im.message.create({
27856
28598
  params: { receive_id_type: "chat_id" },
27857
28599
  data: {
@@ -59428,7 +60170,7 @@ var init_emoji = __esm({
59428
60170
  });
59429
60171
 
59430
60172
  // src/lib/externalLink.ts
59431
- import { isIP } from "node:net";
60173
+ import { isIP as isIP2 } from "node:net";
59432
60174
  import { useEffect as useEffect21, useMemo as useMemo13, useState as useState25 } from "react";
59433
60175
  function normalizeExternalUrl(value) {
59434
60176
  const trimmed = value.trim();
@@ -59534,13 +60276,13 @@ function isPrivateIpv6(value) {
59534
60276
  }
59535
60277
  return false;
59536
60278
  }
59537
- function normalizeHostname(value) {
60279
+ function normalizeHostname2(value) {
59538
60280
  const withoutBrackets = value.replace(/^\[/, "").replace(/\]$/, "");
59539
60281
  const withoutZoneId = withoutBrackets.split("%", 1)[0];
59540
60282
  return withoutZoneId.replace(/\.$/, "").toLowerCase();
59541
60283
  }
59542
60284
  function isPrivateOrLocalHost(hostname) {
59543
- const normalized = normalizeHostname(hostname);
60285
+ const normalized = normalizeHostname2(hostname);
59544
60286
  if (!normalized) {
59545
60287
  return true;
59546
60288
  }
@@ -59550,7 +60292,7 @@ function isPrivateOrLocalHost(hostname) {
59550
60292
  if (LOCAL_HOST_SUFFIXES.some((suffix) => normalized.endsWith(suffix))) {
59551
60293
  return true;
59552
60294
  }
59553
- const ipVersion = isIP(normalized);
60295
+ const ipVersion = isIP2(normalized);
59554
60296
  if (ipVersion === 4) {
59555
60297
  return isPrivateIpv4(normalized);
59556
60298
  }
@@ -59734,7 +60476,7 @@ var init_externalLink = __esm({
59734
60476
  __name(parseIpv4Octets, "parseIpv4Octets");
59735
60477
  __name(isPrivateIpv4, "isPrivateIpv4");
59736
60478
  __name(isPrivateIpv6, "isPrivateIpv6");
59737
- __name(normalizeHostname, "normalizeHostname");
60479
+ __name(normalizeHostname2, "normalizeHostname");
59738
60480
  __name(isPrivateOrLocalHost, "isPrivateOrLocalHost");
59739
60481
  __name(isTitleFetchable, "isTitleFetchable");
59740
60482
  __name(decodeHtmlEntities, "decodeHtmlEntities");