@everworker/oneringai 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -5,6 +5,8 @@ var jose = require('jose');
5
5
  var fs16 = require('fs');
6
6
  var eventemitter3 = require('eventemitter3');
7
7
  var path2 = require('path');
8
+ var TurndownService = require('turndown');
9
+ var readability = require('@mozilla/readability');
8
10
  var os2 = require('os');
9
11
  var OpenAI3 = require('openai');
10
12
  var Anthropic = require('@anthropic-ai/sdk');
@@ -12,6 +14,7 @@ var genai = require('@google/genai');
12
14
  require('zod/v3');
13
15
  var z4mini = require('zod/v4-mini');
14
16
  var z = require('zod/v4');
17
+ var spawn = require('cross-spawn');
15
18
  var process2 = require('process');
16
19
  var stream = require('stream');
17
20
  var fs15 = require('fs/promises');
@@ -19,8 +22,6 @@ var simpleIcons = require('simple-icons');
19
22
  var child_process = require('child_process');
20
23
  var util = require('util');
21
24
  var cheerio = require('cheerio');
22
- var TurndownService = require('turndown');
23
- var readability = require('@mozilla/readability');
24
25
  var vm = require('vm');
25
26
 
26
27
  function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
@@ -46,15 +47,16 @@ function _interopNamespace(e) {
46
47
  var crypto2__namespace = /*#__PURE__*/_interopNamespace(crypto2);
47
48
  var fs16__namespace = /*#__PURE__*/_interopNamespace(fs16);
48
49
  var path2__namespace = /*#__PURE__*/_interopNamespace(path2);
50
+ var TurndownService__default = /*#__PURE__*/_interopDefault(TurndownService);
49
51
  var os2__namespace = /*#__PURE__*/_interopNamespace(os2);
50
52
  var OpenAI3__default = /*#__PURE__*/_interopDefault(OpenAI3);
51
53
  var Anthropic__default = /*#__PURE__*/_interopDefault(Anthropic);
52
54
  var z4mini__namespace = /*#__PURE__*/_interopNamespace(z4mini);
53
55
  var z__namespace = /*#__PURE__*/_interopNamespace(z);
56
+ var spawn__default = /*#__PURE__*/_interopDefault(spawn);
54
57
  var process2__default = /*#__PURE__*/_interopDefault(process2);
55
58
  var fs15__namespace = /*#__PURE__*/_interopNamespace(fs15);
56
59
  var simpleIcons__namespace = /*#__PURE__*/_interopNamespace(simpleIcons);
57
- var TurndownService__default = /*#__PURE__*/_interopDefault(TurndownService);
58
60
  var vm__namespace = /*#__PURE__*/_interopNamespace(vm);
59
61
 
60
62
  var __create = Object.create;
@@ -2262,6 +2264,66 @@ var init_Connector = __esm({
2262
2264
  }
2263
2265
  });
2264
2266
 
2267
+ // src/core/constants.ts
2268
+ var AGENT_DEFAULTS, TOKEN_ESTIMATION, DOCUMENT_DEFAULTS;
2269
+ var init_constants = __esm({
2270
+ "src/core/constants.ts"() {
2271
+ AGENT_DEFAULTS = {
2272
+ /** Default maximum iterations for agentic loop */
2273
+ MAX_ITERATIONS: 50,
2274
+ /** Default temperature for LLM calls */
2275
+ DEFAULT_TEMPERATURE: 0.7,
2276
+ /** Message injected when max iterations is reached */
2277
+ MAX_ITERATIONS_MESSAGE: `You have reached the maximum iteration limit for this execution. Please:
2278
+ 1. Summarize what you have accomplished so far
2279
+ 2. Explain what remains to be done (if anything)
2280
+ 3. Ask the user if they would like you to continue
2281
+
2282
+ Do NOT use any tools in this response - just provide a clear summary and ask for confirmation to proceed.`
2283
+ };
2284
+ TOKEN_ESTIMATION = {
2285
+ /** Characters per token for code */
2286
+ CODE_CHARS_PER_TOKEN: 3,
2287
+ /** Characters per token for prose */
2288
+ PROSE_CHARS_PER_TOKEN: 4,
2289
+ /** Characters per token for mixed content */
2290
+ MIXED_CHARS_PER_TOKEN: 3.5,
2291
+ /** Default characters per token */
2292
+ DEFAULT_CHARS_PER_TOKEN: 4
2293
+ };
2294
+ DOCUMENT_DEFAULTS = {
2295
+ /** Maximum estimated tokens in output */
2296
+ MAX_OUTPUT_TOKENS: 1e5,
2297
+ /** Maximum output size in bytes (5MB) */
2298
+ MAX_OUTPUT_BYTES: 5 * 1024 * 1024,
2299
+ /** Maximum download size for URL sources (50MB) */
2300
+ MAX_DOWNLOAD_SIZE_BYTES: 50 * 1024 * 1024,
2301
+ /** Download timeout for URL sources */
2302
+ DOWNLOAD_TIMEOUT_MS: 6e4,
2303
+ /** Maximum extracted images from a single document */
2304
+ MAX_EXTRACTED_IMAGES: 50,
2305
+ /** Maximum Excel rows per sheet */
2306
+ MAX_EXCEL_ROWS: 1e3,
2307
+ /** Maximum Excel columns per sheet */
2308
+ MAX_EXCEL_COLUMNS: 50,
2309
+ /** Maximum HTML content length */
2310
+ MAX_HTML_LENGTH: 5e4,
2311
+ /** Characters per token estimate */
2312
+ CHARS_PER_TOKEN: 4,
2313
+ /** Estimated tokens for an image with auto detail */
2314
+ IMAGE_TOKENS_AUTO: 765,
2315
+ /** Estimated tokens for an image with low detail */
2316
+ IMAGE_TOKENS_LOW: 85,
2317
+ /** Image filter defaults */
2318
+ IMAGE_FILTER: {
2319
+ MIN_WIDTH: 50,
2320
+ MIN_HEIGHT: 50,
2321
+ MIN_SIZE_BYTES: 1024
2322
+ }
2323
+ };
2324
+ }
2325
+ });
2326
+
2265
2327
  // node_modules/@modelcontextprotocol/sdk/node_modules/ajv/dist/compile/codegen/code.js
2266
2328
  var require_code = __commonJS({
2267
2329
  "node_modules/@modelcontextprotocol/sdk/node_modules/ajv/dist/compile/codegen/code.js"(exports$1) {
@@ -14575,491 +14637,961 @@ var require_dist = __commonJS({
14575
14637
  }
14576
14638
  });
14577
14639
 
14578
- // node_modules/isexe/windows.js
14579
- var require_windows = __commonJS({
14580
- "node_modules/isexe/windows.js"(exports$1, module) {
14581
- module.exports = isexe;
14582
- isexe.sync = sync;
14583
- var fs17 = __require("fs");
14584
- function checkPathExt(path6, options) {
14585
- var pathext = options.pathExt !== void 0 ? options.pathExt : process.env.PATHEXT;
14586
- if (!pathext) {
14587
- return true;
14588
- }
14589
- pathext = pathext.split(";");
14590
- if (pathext.indexOf("") !== -1) {
14591
- return true;
14640
+ // src/capabilities/documents/handlers/TextHandler.ts
14641
+ var CODE_FENCE_FORMATS, TextHandler;
14642
+ var init_TextHandler = __esm({
14643
+ "src/capabilities/documents/handlers/TextHandler.ts"() {
14644
+ init_constants();
14645
+ CODE_FENCE_FORMATS = {
14646
+ json: "json",
14647
+ xml: "xml",
14648
+ yaml: "yaml",
14649
+ yml: "yaml"
14650
+ };
14651
+ TextHandler = class {
14652
+ name = "TextHandler";
14653
+ supportedFormats = ["txt", "md", "json", "xml", "yaml", "yml"];
14654
+ async handle(buffer, filename, format, _options) {
14655
+ const text = buffer.toString("utf-8");
14656
+ const fenceLanguage = CODE_FENCE_FORMATS[format];
14657
+ const content = fenceLanguage ? `\`\`\`${fenceLanguage}
14658
+ ${text}
14659
+ \`\`\`` : text;
14660
+ const sizeBytes = Buffer.byteLength(content, "utf-8");
14661
+ return [
14662
+ {
14663
+ type: "text",
14664
+ content,
14665
+ metadata: {
14666
+ sourceFilename: filename,
14667
+ format,
14668
+ index: 0,
14669
+ sizeBytes,
14670
+ estimatedTokens: Math.ceil(sizeBytes / DOCUMENT_DEFAULTS.CHARS_PER_TOKEN)
14671
+ }
14672
+ }
14673
+ ];
14592
14674
  }
14593
- for (var i = 0; i < pathext.length; i++) {
14594
- var p = pathext[i].toLowerCase();
14595
- if (p && path6.substr(-p.length).toLowerCase() === p) {
14596
- return true;
14675
+ };
14676
+ }
14677
+ });
14678
+
14679
+ // src/capabilities/documents/handlers/ImageHandler.ts
14680
+ var MIME_TYPES, ImageHandler;
14681
+ var init_ImageHandler = __esm({
14682
+ "src/capabilities/documents/handlers/ImageHandler.ts"() {
14683
+ init_constants();
14684
+ MIME_TYPES = {
14685
+ png: "image/png",
14686
+ jpg: "image/jpeg",
14687
+ jpeg: "image/jpeg",
14688
+ gif: "image/gif",
14689
+ webp: "image/webp",
14690
+ svg: "image/svg+xml"
14691
+ };
14692
+ ImageHandler = class {
14693
+ name = "ImageHandler";
14694
+ supportedFormats = ["png", "jpg", "jpeg", "gif", "webp", "svg"];
14695
+ async handle(buffer, filename, format, _options) {
14696
+ const pieces = [];
14697
+ const mimeType = MIME_TYPES[format] || "application/octet-stream";
14698
+ pieces.push({
14699
+ type: "image",
14700
+ base64: buffer.toString("base64"),
14701
+ mimeType,
14702
+ metadata: {
14703
+ sourceFilename: filename,
14704
+ format,
14705
+ index: 0,
14706
+ sizeBytes: buffer.length,
14707
+ estimatedTokens: DOCUMENT_DEFAULTS.IMAGE_TOKENS_AUTO,
14708
+ label: filename
14709
+ }
14710
+ });
14711
+ if (format === "svg") {
14712
+ const svgText = buffer.toString("utf-8");
14713
+ const sizeBytes = Buffer.byteLength(svgText, "utf-8");
14714
+ pieces.push({
14715
+ type: "text",
14716
+ content: `\`\`\`svg
14717
+ ${svgText}
14718
+ \`\`\``,
14719
+ metadata: {
14720
+ sourceFilename: filename,
14721
+ format,
14722
+ index: 1,
14723
+ section: "SVG source",
14724
+ sizeBytes,
14725
+ estimatedTokens: Math.ceil(sizeBytes / DOCUMENT_DEFAULTS.CHARS_PER_TOKEN)
14726
+ }
14727
+ });
14597
14728
  }
14729
+ return pieces;
14598
14730
  }
14599
- return false;
14600
- }
14601
- function checkStat(stat6, path6, options) {
14602
- if (!stat6.isSymbolicLink() && !stat6.isFile()) {
14603
- return false;
14604
- }
14605
- return checkPathExt(path6, options);
14606
- }
14607
- function isexe(path6, options, cb) {
14608
- fs17.stat(path6, function(er, stat6) {
14609
- cb(er, er ? false : checkStat(stat6, path6, options));
14610
- });
14611
- }
14612
- function sync(path6, options) {
14613
- return checkStat(fs17.statSync(path6), path6, options);
14614
- }
14731
+ };
14615
14732
  }
14616
14733
  });
14617
-
14618
- // node_modules/isexe/mode.js
14619
- var require_mode = __commonJS({
14620
- "node_modules/isexe/mode.js"(exports$1, module) {
14621
- module.exports = isexe;
14622
- isexe.sync = sync;
14623
- var fs17 = __require("fs");
14624
- function isexe(path6, options, cb) {
14625
- fs17.stat(path6, function(er, stat6) {
14626
- cb(er, er ? false : checkStat(stat6, options));
14627
- });
14628
- }
14629
- function sync(path6, options) {
14630
- return checkStat(fs17.statSync(path6), options);
14734
+ async function getJSDOM() {
14735
+ if (!JSDOM) {
14736
+ const jsdom = await import('jsdom');
14737
+ JSDOM = jsdom.JSDOM;
14738
+ }
14739
+ return JSDOM;
14740
+ }
14741
+ async function htmlToMarkdown(html, url2, maxLength = 5e4) {
14742
+ const JSDOMClass = await getJSDOM();
14743
+ const dom = new JSDOMClass(html, { url: url2 });
14744
+ const document = dom.window.document;
14745
+ let title = document.title || "";
14746
+ let byline;
14747
+ let excerpt;
14748
+ let contentHtml = html;
14749
+ let wasReadabilityUsed = false;
14750
+ try {
14751
+ const clonedDoc = document.cloneNode(true);
14752
+ const reader = new readability.Readability(clonedDoc);
14753
+ const article = reader.parse();
14754
+ if (article && article.content && article.content.length > 100) {
14755
+ contentHtml = article.content;
14756
+ title = article.title || title;
14757
+ byline = article.byline || void 0;
14758
+ excerpt = article.excerpt || void 0;
14759
+ wasReadabilityUsed = true;
14631
14760
  }
14632
- function checkStat(stat6, options) {
14633
- return stat6.isFile() && checkMode(stat6, options);
14761
+ } catch {
14762
+ }
14763
+ const turndown = new TurndownService__default.default({
14764
+ headingStyle: "atx",
14765
+ codeBlockStyle: "fenced",
14766
+ bulletListMarker: "-",
14767
+ emDelimiter: "_"
14768
+ });
14769
+ turndown.remove(["script", "style", "nav", "footer", "aside", "iframe", "noscript"]);
14770
+ turndown.addRule("pre", {
14771
+ filter: ["pre"],
14772
+ replacement: (content, node) => {
14773
+ const element = node;
14774
+ const code = element.querySelector?.("code");
14775
+ const lang = code?.className?.match(/language-(\w+)/)?.[1] || "";
14776
+ const text = code?.textContent || content;
14777
+ return `
14778
+ \`\`\`${lang}
14779
+ ${text}
14780
+ \`\`\`
14781
+ `;
14634
14782
  }
14635
- function checkMode(stat6, options) {
14636
- var mod = stat6.mode;
14637
- var uid = stat6.uid;
14638
- var gid = stat6.gid;
14639
- var myUid = options.uid !== void 0 ? options.uid : process.getuid && process.getuid();
14640
- var myGid = options.gid !== void 0 ? options.gid : process.getgid && process.getgid();
14641
- var u = parseInt("100", 8);
14642
- var g = parseInt("010", 8);
14643
- var o = parseInt("001", 8);
14644
- var ug = u | g;
14645
- var ret = mod & o || mod & g && gid === myGid || mod & u && uid === myUid || mod & ug && myUid === 0;
14646
- return ret;
14783
+ });
14784
+ let markdown = turndown.turndown(contentHtml);
14785
+ markdown = markdown.replace(/\n{3,}/g, "\n\n").replace(/^\s+|\s+$/g, "").replace(/[ \t]+$/gm, "");
14786
+ let wasTruncated = false;
14787
+ if (markdown.length > maxLength) {
14788
+ const truncateAt = markdown.lastIndexOf("\n\n", maxLength);
14789
+ if (truncateAt > maxLength * 0.5) {
14790
+ markdown = markdown.slice(0, truncateAt) + "\n\n...[content truncated]";
14791
+ } else {
14792
+ markdown = markdown.slice(0, maxLength) + "...[truncated]";
14647
14793
  }
14794
+ wasTruncated = true;
14795
+ }
14796
+ return {
14797
+ markdown,
14798
+ title,
14799
+ byline,
14800
+ excerpt,
14801
+ wasReadabilityUsed,
14802
+ wasTruncated
14803
+ };
14804
+ }
14805
+ var JSDOM;
14806
+ var init_htmlToMarkdown = __esm({
14807
+ "src/tools/web/htmlToMarkdown.ts"() {
14808
+ JSDOM = null;
14809
+ }
14810
+ });
14811
+
14812
+ // src/capabilities/documents/handlers/HTMLHandler.ts
14813
+ var HTMLHandler;
14814
+ var init_HTMLHandler = __esm({
14815
+ "src/capabilities/documents/handlers/HTMLHandler.ts"() {
14816
+ init_constants();
14817
+ init_htmlToMarkdown();
14818
+ HTMLHandler = class {
14819
+ name = "HTMLHandler";
14820
+ supportedFormats = ["html"];
14821
+ async handle(buffer, filename, format, options) {
14822
+ const html = buffer.toString("utf-8");
14823
+ const maxLength = options.formatOptions?.html?.maxLength ?? DOCUMENT_DEFAULTS.MAX_HTML_LENGTH;
14824
+ const result = await htmlToMarkdown(html, `file://${filename}`, maxLength);
14825
+ const content = result.markdown;
14826
+ const sizeBytes = Buffer.byteLength(content, "utf-8");
14827
+ return [
14828
+ {
14829
+ type: "text",
14830
+ content,
14831
+ metadata: {
14832
+ sourceFilename: filename,
14833
+ format,
14834
+ index: 0,
14835
+ sizeBytes,
14836
+ estimatedTokens: Math.ceil(sizeBytes / DOCUMENT_DEFAULTS.CHARS_PER_TOKEN),
14837
+ label: result.title || void 0
14838
+ }
14839
+ }
14840
+ ];
14841
+ }
14842
+ };
14648
14843
  }
14649
14844
  });
14650
14845
 
14651
- // node_modules/isexe/index.js
14652
- var require_isexe = __commonJS({
14653
- "node_modules/isexe/index.js"(exports$1, module) {
14654
- __require("fs");
14655
- var core;
14656
- if (process.platform === "win32" || global.TESTING_WINDOWS) {
14657
- core = require_windows();
14658
- } else {
14659
- core = require_mode();
14660
- }
14661
- module.exports = isexe;
14662
- isexe.sync = sync;
14663
- function isexe(path6, options, cb) {
14664
- if (typeof options === "function") {
14665
- cb = options;
14666
- options = {};
14667
- }
14668
- if (!cb) {
14669
- if (typeof Promise !== "function") {
14670
- throw new TypeError("callback not provided");
14671
- }
14672
- return new Promise(function(resolve4, reject) {
14673
- isexe(path6, options || {}, function(er, is) {
14674
- if (er) {
14675
- reject(er);
14676
- } else {
14677
- resolve4(is);
14678
- }
14679
- });
14846
+ // src/capabilities/documents/handlers/OfficeHandler.ts
14847
+ async function getParseOffice() {
14848
+ if (!parseOffice) {
14849
+ const mod = await import('officeparser');
14850
+ parseOffice = mod.parseOffice;
14851
+ }
14852
+ return parseOffice;
14853
+ }
14854
+ var parseOffice, OfficeHandler;
14855
+ var init_OfficeHandler = __esm({
14856
+ "src/capabilities/documents/handlers/OfficeHandler.ts"() {
14857
+ init_constants();
14858
+ parseOffice = null;
14859
+ OfficeHandler = class {
14860
+ name = "OfficeHandler";
14861
+ supportedFormats = ["docx", "pptx", "odt", "odp", "ods", "rtf"];
14862
+ async handle(buffer, filename, format, options) {
14863
+ const parse = await getParseOffice();
14864
+ const extractImages = options.extractImages !== false;
14865
+ const includeSpeakerNotes = options.formatOptions?.office?.includeSpeakerNotes !== false;
14866
+ const ast = await parse(buffer, {
14867
+ extractAttachments: extractImages,
14868
+ ignoreNotes: !includeSpeakerNotes
14680
14869
  });
14870
+ const pieces = [];
14871
+ let pieceIndex = 0;
14872
+ const content = ast.content || [];
14873
+ const markdown = this.astToMarkdown(content, format);
14874
+ if (format === "pptx" || format === "odp") {
14875
+ const slides = this.splitBySlides(content);
14876
+ for (let i = 0; i < slides.length; i++) {
14877
+ const slideContent = this.astToMarkdown(slides[i] ?? [], format);
14878
+ if (slideContent.trim()) {
14879
+ const sizeBytes = Buffer.byteLength(slideContent, "utf-8");
14880
+ pieces.push({
14881
+ type: "text",
14882
+ content: slideContent,
14883
+ metadata: {
14884
+ sourceFilename: filename,
14885
+ format,
14886
+ index: pieceIndex++,
14887
+ section: `Slide ${i + 1}`,
14888
+ sizeBytes,
14889
+ estimatedTokens: Math.ceil(sizeBytes / DOCUMENT_DEFAULTS.CHARS_PER_TOKEN)
14890
+ }
14891
+ });
14892
+ }
14893
+ }
14894
+ } else {
14895
+ if (markdown.trim()) {
14896
+ const sizeBytes = Buffer.byteLength(markdown, "utf-8");
14897
+ pieces.push({
14898
+ type: "text",
14899
+ content: markdown,
14900
+ metadata: {
14901
+ sourceFilename: filename,
14902
+ format,
14903
+ index: pieceIndex++,
14904
+ sizeBytes,
14905
+ estimatedTokens: Math.ceil(sizeBytes / DOCUMENT_DEFAULTS.CHARS_PER_TOKEN)
14906
+ }
14907
+ });
14908
+ }
14909
+ }
14910
+ if (extractImages && ast.attachments?.length > 0) {
14911
+ for (const attachment of ast.attachments) {
14912
+ if (attachment.type === "image" && attachment.data) {
14913
+ const imageData = attachment.data;
14914
+ const sizeBytes = Math.ceil(imageData.length * 0.75);
14915
+ pieces.push({
14916
+ type: "image",
14917
+ base64: imageData,
14918
+ mimeType: attachment.mimeType || "image/png",
14919
+ metadata: {
14920
+ sourceFilename: filename,
14921
+ format,
14922
+ index: pieceIndex++,
14923
+ sizeBytes,
14924
+ estimatedTokens: DOCUMENT_DEFAULTS.IMAGE_TOKENS_AUTO,
14925
+ label: attachment.altText || attachment.name || void 0
14926
+ }
14927
+ });
14928
+ }
14929
+ }
14930
+ }
14931
+ return pieces;
14681
14932
  }
14682
- core(path6, options || {}, function(er, is) {
14683
- if (er) {
14684
- if (er.code === "EACCES" || options && options.ignoreErrors) {
14685
- er = null;
14686
- is = false;
14933
+ /**
14934
+ * Split AST content into slide groups
14935
+ */
14936
+ splitBySlides(content) {
14937
+ const slides = [];
14938
+ let currentSlide = [];
14939
+ for (const node of content) {
14940
+ if (node.type === "slide") {
14941
+ if (currentSlide.length > 0) {
14942
+ slides.push(currentSlide);
14943
+ }
14944
+ currentSlide = [node];
14945
+ } else {
14946
+ currentSlide.push(node);
14687
14947
  }
14688
14948
  }
14689
- cb(er, is);
14690
- });
14691
- }
14692
- function sync(path6, options) {
14693
- try {
14694
- return core.sync(path6, options || {});
14695
- } catch (er) {
14696
- if (options && options.ignoreErrors || er.code === "EACCES") {
14697
- return false;
14698
- } else {
14699
- throw er;
14949
+ if (currentSlide.length > 0) {
14950
+ slides.push(currentSlide);
14951
+ }
14952
+ if (slides.length === 0 && content.length > 0) {
14953
+ slides.push(content);
14700
14954
  }
14955
+ return slides;
14701
14956
  }
14702
- }
14703
- }
14704
- });
14705
-
14706
- // node_modules/which/which.js
14707
- var require_which = __commonJS({
14708
- "node_modules/which/which.js"(exports$1, module) {
14709
- var isWindows = process.platform === "win32" || process.env.OSTYPE === "cygwin" || process.env.OSTYPE === "msys";
14710
- var path6 = __require("path");
14711
- var COLON = isWindows ? ";" : ":";
14712
- var isexe = require_isexe();
14713
- var getNotFoundError = (cmd) => Object.assign(new Error(`not found: ${cmd}`), { code: "ENOENT" });
14714
- var getPathInfo = (cmd, opt) => {
14715
- const colon = opt.colon || COLON;
14716
- const pathEnv = cmd.match(/\//) || isWindows && cmd.match(/\\/) ? [""] : [
14717
- // windows always checks the cwd first
14718
- ...isWindows ? [process.cwd()] : [],
14719
- ...(opt.path || process.env.PATH || /* istanbul ignore next: very unusual */
14720
- "").split(colon)
14721
- ];
14722
- const pathExtExe = isWindows ? opt.pathExt || process.env.PATHEXT || ".EXE;.CMD;.BAT;.COM" : "";
14723
- const pathExt = isWindows ? pathExtExe.split(colon) : [""];
14724
- if (isWindows) {
14725
- if (cmd.indexOf(".") !== -1 && pathExt[0] !== "")
14726
- pathExt.unshift("");
14957
+ /**
14958
+ * Convert AST nodes to markdown
14959
+ */
14960
+ astToMarkdown(nodes, format) {
14961
+ const parts = [];
14962
+ for (const node of nodes) {
14963
+ const md = this.nodeToMarkdown(node, format);
14964
+ if (md) parts.push(md);
14965
+ }
14966
+ return parts.join("\n\n");
14727
14967
  }
14728
- return {
14729
- pathEnv,
14730
- pathExt,
14731
- pathExtExe
14732
- };
14733
- };
14734
- var which = (cmd, opt, cb) => {
14735
- if (typeof opt === "function") {
14736
- cb = opt;
14737
- opt = {};
14738
- }
14739
- if (!opt)
14740
- opt = {};
14741
- const { pathEnv, pathExt, pathExtExe } = getPathInfo(cmd, opt);
14742
- const found = [];
14743
- const step = (i) => new Promise((resolve4, reject) => {
14744
- if (i === pathEnv.length)
14745
- return opt.all && found.length ? resolve4(found) : reject(getNotFoundError(cmd));
14746
- const ppRaw = pathEnv[i];
14747
- const pathPart = /^".*"$/.test(ppRaw) ? ppRaw.slice(1, -1) : ppRaw;
14748
- const pCmd = path6.join(pathPart, cmd);
14749
- const p = !pathPart && /^\.[\\\/]/.test(cmd) ? cmd.slice(0, 2) + pCmd : pCmd;
14750
- resolve4(subStep(p, i, 0));
14751
- });
14752
- const subStep = (p, i, ii) => new Promise((resolve4, reject) => {
14753
- if (ii === pathExt.length)
14754
- return resolve4(step(i + 1));
14755
- const ext = pathExt[ii];
14756
- isexe(p + ext, { pathExt: pathExtExe }, (er, is) => {
14757
- if (!er && is) {
14758
- if (opt.all)
14759
- found.push(p + ext);
14760
- else
14761
- return resolve4(p + ext);
14968
+ /**
14969
+ * Convert a single AST node to markdown
14970
+ */
14971
+ nodeToMarkdown(node, format) {
14972
+ if (!node) return "";
14973
+ switch (node.type) {
14974
+ case "heading": {
14975
+ const level = node.metadata?.level || 1;
14976
+ const prefix = "#".repeat(Math.min(level, 6));
14977
+ return `${prefix} ${node.text || ""}`;
14978
+ }
14979
+ case "paragraph":
14980
+ return this.formatText(node);
14981
+ case "text":
14982
+ return this.formatText(node);
14983
+ case "list": {
14984
+ const items = node.children || [];
14985
+ return items.map((item, i) => {
14986
+ const indent = " ".repeat(node.metadata?.indentation || 0);
14987
+ const prefix = node.metadata?.listType === "ordered" ? `${i + 1}.` : "-";
14988
+ return `${indent}${prefix} ${item.text || this.getNodeText(item)}`;
14989
+ }).join("\n");
14990
+ }
14991
+ case "table": {
14992
+ return this.tableToMarkdown(node);
14993
+ }
14994
+ case "slide": {
14995
+ const slideNum = node.metadata?.slideNumber || "";
14996
+ const childContent = node.children ? node.children.map((c) => this.nodeToMarkdown(c, format)).filter(Boolean).join("\n\n") : node.text || "";
14997
+ return slideNum ? `### Slide ${slideNum}
14998
+
14999
+ ${childContent}` : childContent;
15000
+ }
15001
+ case "note":
15002
+ return `> **Note:** ${node.text || this.getNodeText(node)}`;
15003
+ case "sheet": {
15004
+ const sheetName = node.metadata?.sheetName || "Sheet";
15005
+ const childContent = node.children ? node.children.map((c) => this.nodeToMarkdown(c, format)).filter(Boolean).join("\n") : "";
15006
+ return `## Sheet: ${sheetName}
15007
+
15008
+ ${childContent}`;
15009
+ }
15010
+ case "page": {
15011
+ const pageNum = node.metadata?.pageNumber || "";
15012
+ const childContent = node.children ? node.children.map((c) => this.nodeToMarkdown(c, format)).filter(Boolean).join("\n\n") : node.text || "";
15013
+ return pageNum ? `--- Page ${pageNum} ---
15014
+
15015
+ ${childContent}` : childContent;
15016
+ }
15017
+ case "image":
15018
+ return `[Image: ${node.metadata?.altText || node.metadata?.attachmentName || "embedded image"}]`;
15019
+ case "chart":
15020
+ return `[Chart: ${node.metadata?.attachmentName || "embedded chart"}]`;
15021
+ default:
15022
+ return node.text || this.getNodeText(node);
15023
+ }
15024
+ }
15025
+ /**
15026
+ * Get text from a node recursively
15027
+ */
15028
+ getNodeText(node) {
15029
+ if (node.text) return node.text;
15030
+ if (node.children) {
15031
+ return node.children.map((c) => this.getNodeText(c)).join("");
15032
+ }
15033
+ return "";
15034
+ }
15035
+ /**
15036
+ * Format text with markdown formatting
15037
+ */
15038
+ formatText(node) {
15039
+ if (!node.children || node.children.length === 0) {
15040
+ return node.text || "";
15041
+ }
15042
+ return node.children.map((child) => {
15043
+ let text = child.text || this.getNodeText(child);
15044
+ if (!text) return "";
15045
+ const fmt = child.formatting;
15046
+ if (fmt) {
15047
+ if (fmt.bold) text = `**${text}**`;
15048
+ if (fmt.italic) text = `_${text}_`;
15049
+ if (fmt.strikethrough) text = `~~${text}~~`;
14762
15050
  }
14763
- return resolve4(subStep(p, i, ii + 1));
14764
- });
14765
- });
14766
- return cb ? step(0).then((res) => cb(null, res), cb) : step(0);
14767
- };
14768
- var whichSync = (cmd, opt) => {
14769
- opt = opt || {};
14770
- const { pathEnv, pathExt, pathExtExe } = getPathInfo(cmd, opt);
14771
- const found = [];
14772
- for (let i = 0; i < pathEnv.length; i++) {
14773
- const ppRaw = pathEnv[i];
14774
- const pathPart = /^".*"$/.test(ppRaw) ? ppRaw.slice(1, -1) : ppRaw;
14775
- const pCmd = path6.join(pathPart, cmd);
14776
- const p = !pathPart && /^\.[\\\/]/.test(cmd) ? cmd.slice(0, 2) + pCmd : pCmd;
14777
- for (let j = 0; j < pathExt.length; j++) {
14778
- const cur = p + pathExt[j];
14779
- try {
14780
- const is = isexe.sync(cur, { pathExt: pathExtExe });
14781
- if (is) {
14782
- if (opt.all)
14783
- found.push(cur);
14784
- else
14785
- return cur;
14786
- }
14787
- } catch (ex) {
15051
+ if (child.metadata?.link && child.metadata?.linkType === "external") {
15052
+ text = `[${text}](${child.metadata.link})`;
14788
15053
  }
14789
- }
15054
+ return text;
15055
+ }).join("");
14790
15056
  }
14791
- if (opt.all && found.length)
14792
- return found;
14793
- if (opt.nothrow)
14794
- return null;
14795
- throw getNotFoundError(cmd);
14796
- };
14797
- module.exports = which;
14798
- which.sync = whichSync;
14799
- }
14800
- });
15057
+ /**
15058
+ * Convert table node to markdown table
15059
+ */
15060
+ tableToMarkdown(node) {
15061
+ if (!node.children || node.children.length === 0) return "";
15062
+ const rows = [];
15063
+ for (const row of node.children) {
15064
+ if (row.type === "row" && row.children) {
15065
+ rows.push(row.children.map((cell) => {
15066
+ const text = cell.text || this.getNodeText(cell);
15067
+ return text.replace(/\|/g, "\\|").trim();
15068
+ }));
15069
+ }
15070
+ }
15071
+ if (rows.length === 0) return "";
15072
+ const maxCols = Math.max(...rows.map((r) => r.length));
15073
+ const normalizedRows = rows.map((r) => {
15074
+ while (r.length < maxCols) r.push("");
15075
+ return r;
15076
+ });
15077
+ const firstRow = normalizedRows[0] ?? [];
15078
+ const header = `| ${firstRow.join(" | ")} |`;
15079
+ const separator = `| ${firstRow.map(() => "---").join(" | ")} |`;
15080
+ const body = normalizedRows.slice(1).map((r) => `| ${r.join(" | ")} |`).join("\n");
15081
+ return body ? `${header}
15082
+ ${separator}
15083
+ ${body}` : `${header}
15084
+ ${separator}`;
15085
+ }
15086
+ };
15087
+ }
15088
+ });
15089
+
15090
+ // src/capabilities/documents/handlers/ExcelHandler.ts
15091
+ async function getExcelJS() {
15092
+ if (!ExcelJS) {
15093
+ ExcelJS = await import('exceljs');
15094
+ }
15095
+ return ExcelJS;
15096
+ }
15097
+ var ExcelJS, ExcelHandler;
15098
+ var init_ExcelHandler = __esm({
15099
+ "src/capabilities/documents/handlers/ExcelHandler.ts"() {
15100
+ init_constants();
15101
+ ExcelJS = null;
15102
+ ExcelHandler = class {
15103
+ name = "ExcelHandler";
15104
+ supportedFormats = ["xlsx", "csv"];
15105
+ async handle(buffer, filename, format, options) {
15106
+ const exceljs = await getExcelJS();
15107
+ const Workbook = exceljs.Workbook || exceljs.default?.Workbook;
15108
+ const excelOpts = {
15109
+ maxRows: options.formatOptions?.excel?.maxRows ?? DOCUMENT_DEFAULTS.MAX_EXCEL_ROWS,
15110
+ maxColumns: options.formatOptions?.excel?.maxColumns ?? DOCUMENT_DEFAULTS.MAX_EXCEL_COLUMNS,
15111
+ tableFormat: options.formatOptions?.excel?.tableFormat ?? "markdown",
15112
+ includeFormulas: options.formatOptions?.excel?.includeFormulas ?? false
15113
+ };
15114
+ const workbook = new Workbook();
15115
+ if (format === "csv") {
15116
+ await workbook.csv.read(
15117
+ new (await import('stream')).Readable({
15118
+ read() {
15119
+ this.push(buffer);
15120
+ this.push(null);
15121
+ }
15122
+ })
15123
+ );
15124
+ } else {
15125
+ await workbook.xlsx.load(buffer);
15126
+ }
15127
+ const pieces = [];
15128
+ let pieceIndex = 0;
15129
+ const requestedSheets = options.pages;
15130
+ workbook.eachSheet((worksheet, sheetId) => {
15131
+ if (requestedSheets && requestedSheets.length > 0) {
15132
+ const isRequested = requestedSheets.some((p) => {
15133
+ if (typeof p === "number") return sheetId === p;
15134
+ return worksheet.name === p || String(sheetId) === p;
15135
+ });
15136
+ if (!isRequested) return;
15137
+ }
15138
+ const content = this.sheetToContent(worksheet, excelOpts);
15139
+ if (!content.trim()) return;
15140
+ const sheetContent = format === "csv" ? content : `## Sheet: ${worksheet.name}
14801
15141
 
14802
- // node_modules/path-key/index.js
14803
- var require_path_key = __commonJS({
14804
- "node_modules/path-key/index.js"(exports$1, module) {
14805
- var pathKey = (options = {}) => {
14806
- const environment = options.env || process.env;
14807
- const platform2 = options.platform || process.platform;
14808
- if (platform2 !== "win32") {
14809
- return "PATH";
15142
+ ${content}`;
15143
+ const sizeBytes = Buffer.byteLength(sheetContent, "utf-8");
15144
+ pieces.push({
15145
+ type: "text",
15146
+ content: sheetContent,
15147
+ metadata: {
15148
+ sourceFilename: filename,
15149
+ format,
15150
+ index: pieceIndex++,
15151
+ section: format === "csv" ? void 0 : worksheet.name,
15152
+ sizeBytes,
15153
+ estimatedTokens: Math.ceil(sizeBytes / DOCUMENT_DEFAULTS.CHARS_PER_TOKEN)
15154
+ }
15155
+ });
15156
+ });
15157
+ return pieces;
14810
15158
  }
14811
- return Object.keys(environment).reverse().find((key) => key.toUpperCase() === "PATH") || "Path";
14812
- };
14813
- module.exports = pathKey;
14814
- module.exports.default = pathKey;
14815
- }
14816
- });
14817
-
14818
- // node_modules/cross-spawn/lib/util/resolveCommand.js
14819
- var require_resolveCommand = __commonJS({
14820
- "node_modules/cross-spawn/lib/util/resolveCommand.js"(exports$1, module) {
14821
- var path6 = __require("path");
14822
- var which = require_which();
14823
- var getPathKey = require_path_key();
14824
- function resolveCommandAttempt(parsed, withoutPathExt) {
14825
- const env = parsed.options.env || process.env;
14826
- const cwd = process.cwd();
14827
- const hasCustomCwd = parsed.options.cwd != null;
14828
- const shouldSwitchCwd = hasCustomCwd && process.chdir !== void 0 && !process.chdir.disabled;
14829
- if (shouldSwitchCwd) {
14830
- try {
14831
- process.chdir(parsed.options.cwd);
14832
- } catch (err) {
15159
+ /**
15160
+ * Convert a worksheet to the configured format
15161
+ */
15162
+ sheetToContent(worksheet, opts) {
15163
+ switch (opts.tableFormat) {
15164
+ case "csv":
15165
+ return this.sheetToCSV(worksheet, opts);
15166
+ case "json":
15167
+ return this.sheetToJSON(worksheet, opts);
15168
+ default:
15169
+ return this.sheetToMarkdownTable(worksheet, opts);
14833
15170
  }
14834
15171
  }
14835
- let resolved;
14836
- try {
14837
- resolved = which.sync(parsed.command, {
14838
- path: env[getPathKey({ env })],
14839
- pathExt: withoutPathExt ? path6.delimiter : void 0
15172
+ /**
15173
+ * Convert worksheet to markdown table
15174
+ */
15175
+ sheetToMarkdownTable(worksheet, opts) {
15176
+ const rows = this.extractRows(worksheet, opts);
15177
+ if (rows.length === 0) return "";
15178
+ const maxCols = Math.min(
15179
+ Math.max(...rows.map((r) => r.length)),
15180
+ opts.maxColumns
15181
+ );
15182
+ const normalizedRows = rows.map((r) => {
15183
+ const truncated = r.slice(0, maxCols);
15184
+ while (truncated.length < maxCols) truncated.push("");
15185
+ return truncated;
14840
15186
  });
14841
- } catch (e) {
14842
- } finally {
14843
- if (shouldSwitchCwd) {
14844
- process.chdir(cwd);
15187
+ const firstRow = normalizedRows[0] ?? [];
15188
+ const header = `| ${firstRow.join(" | ")} |`;
15189
+ const separator = `| ${firstRow.map(() => "---").join(" | ")} |`;
15190
+ const body = normalizedRows.slice(1).map((r) => `| ${r.join(" | ")} |`).join("\n");
15191
+ let result = `${header}
15192
+ ${separator}`;
15193
+ if (body) result += `
15194
+ ${body}`;
15195
+ if (worksheet.rowCount > opts.maxRows) {
15196
+ result += `
15197
+
15198
+ _...truncated (${worksheet.rowCount - opts.maxRows} more rows)_`;
14845
15199
  }
15200
+ return result;
14846
15201
  }
14847
- if (resolved) {
14848
- resolved = path6.resolve(hasCustomCwd ? parsed.options.cwd : "", resolved);
15202
+ /**
15203
+ * Convert worksheet to CSV
15204
+ */
15205
+ sheetToCSV(worksheet, opts) {
15206
+ const rows = this.extractRows(worksheet, opts);
15207
+ return rows.map(
15208
+ (row) => row.slice(0, opts.maxColumns).map((cell) => {
15209
+ if (cell.includes(",") || cell.includes('"') || cell.includes("\n")) {
15210
+ return `"${cell.replace(/"/g, '""')}"`;
15211
+ }
15212
+ return cell;
15213
+ }).join(",")
15214
+ ).join("\n");
14849
15215
  }
14850
- return resolved;
14851
- }
14852
- function resolveCommand(parsed) {
14853
- return resolveCommandAttempt(parsed) || resolveCommandAttempt(parsed, true);
14854
- }
14855
- module.exports = resolveCommand;
14856
- }
14857
- });
14858
-
14859
- // node_modules/cross-spawn/lib/util/escape.js
14860
- var require_escape = __commonJS({
14861
- "node_modules/cross-spawn/lib/util/escape.js"(exports$1, module) {
14862
- var metaCharsRegExp = /([()\][%!^"`<>&|;, *?])/g;
14863
- function escapeCommand(arg) {
14864
- arg = arg.replace(metaCharsRegExp, "^$1");
14865
- return arg;
14866
- }
14867
- function escapeArgument(arg, doubleEscapeMetaChars) {
14868
- arg = `${arg}`;
14869
- arg = arg.replace(/(?=(\\+?)?)\1"/g, '$1$1\\"');
14870
- arg = arg.replace(/(?=(\\+?)?)\1$/, "$1$1");
14871
- arg = `"${arg}"`;
14872
- arg = arg.replace(metaCharsRegExp, "^$1");
14873
- if (doubleEscapeMetaChars) {
14874
- arg = arg.replace(metaCharsRegExp, "^$1");
15216
+ /**
15217
+ * Convert worksheet to JSON
15218
+ */
15219
+ sheetToJSON(worksheet, opts) {
15220
+ const rows = this.extractRows(worksheet, opts);
15221
+ if (rows.length < 2) return "[]";
15222
+ const headers = (rows[0] ?? []).slice(0, opts.maxColumns);
15223
+ const data = rows.slice(1).map((row) => {
15224
+ const obj = {};
15225
+ headers.forEach((header, i) => {
15226
+ if (header && i < row.length) {
15227
+ obj[header] = row[i] ?? "";
15228
+ }
15229
+ });
15230
+ return obj;
15231
+ });
15232
+ return "```json\n" + JSON.stringify(data, null, 2) + "\n```";
14875
15233
  }
14876
- return arg;
14877
- }
14878
- module.exports.command = escapeCommand;
14879
- module.exports.argument = escapeArgument;
14880
- }
14881
- });
14882
-
14883
- // node_modules/shebang-regex/index.js
14884
- var require_shebang_regex = __commonJS({
14885
- "node_modules/shebang-regex/index.js"(exports$1, module) {
14886
- module.exports = /^#!(.*)/;
14887
- }
14888
- });
14889
-
14890
- // node_modules/shebang-command/index.js
14891
- var require_shebang_command = __commonJS({
14892
- "node_modules/shebang-command/index.js"(exports$1, module) {
14893
- var shebangRegex = require_shebang_regex();
14894
- module.exports = (string3 = "") => {
14895
- const match = string3.match(shebangRegex);
14896
- if (!match) {
14897
- return null;
15234
+ /**
15235
+ * Extract rows from worksheet as string arrays
15236
+ */
15237
+ extractRows(worksheet, opts) {
15238
+ const rows = [];
15239
+ let rowCount = 0;
15240
+ worksheet.eachRow({ includeEmpty: false }, (row) => {
15241
+ if (rowCount >= opts.maxRows) return;
15242
+ rowCount++;
15243
+ const cells = [];
15244
+ row.eachCell({ includeEmpty: true }, (cell, colNumber) => {
15245
+ if (colNumber > opts.maxColumns) return;
15246
+ let value = "";
15247
+ if (opts.includeFormulas && cell.formula) {
15248
+ value = `${this.getCellValue(cell)} (=${cell.formula})`;
15249
+ } else {
15250
+ value = this.getCellValue(cell);
15251
+ }
15252
+ while (cells.length < colNumber - 1) cells.push("");
15253
+ cells.push(value);
15254
+ });
15255
+ rows.push(cells);
15256
+ });
15257
+ return rows;
14898
15258
  }
14899
- const [path6, argument] = match[0].replace(/#! ?/, "").split(" ");
14900
- const binary = path6.split("/").pop();
14901
- if (binary === "env") {
14902
- return argument;
15259
+ /**
15260
+ * Get cell value as string
15261
+ */
15262
+ getCellValue(cell) {
15263
+ if (cell.value === null || cell.value === void 0) return "";
15264
+ if (typeof cell.value === "object") {
15265
+ if (cell.value.richText) {
15266
+ return cell.value.richText.map((rt) => rt.text || "").join("");
15267
+ }
15268
+ if (cell.value.hyperlink) {
15269
+ return cell.value.text || cell.value.hyperlink;
15270
+ }
15271
+ if ("result" in cell.value) {
15272
+ return String(cell.value.result ?? "");
15273
+ }
15274
+ if (cell.value instanceof Date) {
15275
+ return cell.value.toISOString().split("T")[0];
15276
+ }
15277
+ return String(cell.value);
15278
+ }
15279
+ return String(cell.value).replace(/\|/g, "\\|");
14903
15280
  }
14904
- return argument ? `${binary} ${argument}` : binary;
14905
15281
  };
14906
15282
  }
14907
15283
  });
14908
15284
 
14909
- // node_modules/cross-spawn/lib/util/readShebang.js
14910
- var require_readShebang = __commonJS({
14911
- "node_modules/cross-spawn/lib/util/readShebang.js"(exports$1, module) {
14912
- var fs17 = __require("fs");
14913
- var shebangCommand = require_shebang_command();
14914
- function readShebang(command) {
14915
- const size = 150;
14916
- const buffer = Buffer.alloc(size);
14917
- let fd;
14918
- try {
14919
- fd = fs17.openSync(command, "r");
14920
- fs17.readSync(fd, buffer, 0, size, 0);
14921
- fs17.closeSync(fd);
14922
- } catch (e) {
14923
- }
14924
- return shebangCommand(buffer.toString());
14925
- }
14926
- module.exports = readShebang;
14927
- }
14928
- });
14929
-
14930
- // node_modules/cross-spawn/lib/parse.js
14931
- var require_parse = __commonJS({
14932
- "node_modules/cross-spawn/lib/parse.js"(exports$1, module) {
14933
- var path6 = __require("path");
14934
- var resolveCommand = require_resolveCommand();
14935
- var escape2 = require_escape();
14936
- var readShebang = require_readShebang();
14937
- var isWin = process.platform === "win32";
14938
- var isExecutableRegExp = /\.(?:com|exe)$/i;
14939
- var isCmdShimRegExp = /node_modules[\\/].bin[\\/][^\\/]+\.cmd$/i;
14940
- function detectShebang(parsed) {
14941
- parsed.file = resolveCommand(parsed);
14942
- const shebang = parsed.file && readShebang(parsed.file);
14943
- if (shebang) {
14944
- parsed.args.unshift(parsed.file);
14945
- parsed.command = shebang;
14946
- return resolveCommand(parsed);
14947
- }
14948
- return parsed.file;
14949
- }
14950
- function parseNonShell(parsed) {
14951
- if (!isWin) {
14952
- return parsed;
14953
- }
14954
- const commandFile = detectShebang(parsed);
14955
- const needsShell = !isExecutableRegExp.test(commandFile);
14956
- if (parsed.options.forceShell || needsShell) {
14957
- const needsDoubleEscapeMetaChars = isCmdShimRegExp.test(commandFile);
14958
- parsed.command = path6.normalize(parsed.command);
14959
- parsed.command = escape2.command(parsed.command);
14960
- parsed.args = parsed.args.map((arg) => escape2.argument(arg, needsDoubleEscapeMetaChars));
14961
- const shellCommand = [parsed.command].concat(parsed.args).join(" ");
14962
- parsed.args = ["/d", "/s", "/c", `"${shellCommand}"`];
14963
- parsed.command = process.env.comspec || "cmd.exe";
14964
- parsed.options.windowsVerbatimArguments = true;
15285
+ // src/capabilities/documents/handlers/PDFHandler.ts
15286
+ async function getUnpdf() {
15287
+ if (!unpdfModule) {
15288
+ const mod = await import('unpdf');
15289
+ unpdfModule = {
15290
+ extractText: mod.extractText,
15291
+ extractImages: mod.extractImages,
15292
+ getMeta: mod.getMeta
15293
+ };
15294
+ }
15295
+ return unpdfModule;
15296
+ }
15297
+ var unpdfModule, PDFHandler;
15298
+ var init_PDFHandler = __esm({
15299
+ "src/capabilities/documents/handlers/PDFHandler.ts"() {
15300
+ init_constants();
15301
+ unpdfModule = null;
15302
+ PDFHandler = class {
15303
+ name = "PDFHandler";
15304
+ supportedFormats = ["pdf"];
15305
+ async handle(buffer, filename, format, options) {
15306
+ const unpdf = await getUnpdf();
15307
+ const pieces = [];
15308
+ let pieceIndex = 0;
15309
+ let metadata = {};
15310
+ const includeMetadata = options.formatOptions?.pdf?.includeMetadata !== false;
15311
+ if (includeMetadata) {
15312
+ try {
15313
+ metadata = await unpdf.getMeta(buffer);
15314
+ } catch {
15315
+ }
15316
+ }
15317
+ const textResult = await unpdf.extractText(buffer, { mergePages: false });
15318
+ const pages = textResult?.pages || textResult?.text ? Array.isArray(textResult.text) ? textResult.text : [textResult.text] : [];
15319
+ const requestedPages = options.pages;
15320
+ const pageEntries = pages.map((text, i) => ({ text, pageNum: i + 1 }));
15321
+ const filteredPages = requestedPages && requestedPages.length > 0 ? pageEntries.filter(
15322
+ (p) => requestedPages.some((rp) => {
15323
+ const num = typeof rp === "string" ? parseInt(rp, 10) : rp;
15324
+ return num === p.pageNum;
15325
+ })
15326
+ ) : pageEntries;
15327
+ if (includeMetadata && metadata?.info) {
15328
+ const metaParts = [];
15329
+ if (metadata.info.Title) metaParts.push(`**Title:** ${metadata.info.Title}`);
15330
+ if (metadata.info.Author) metaParts.push(`**Author:** ${metadata.info.Author}`);
15331
+ if (metadata.info.Subject) metaParts.push(`**Subject:** ${metadata.info.Subject}`);
15332
+ if (metadata.info.Creator) metaParts.push(`**Creator:** ${metadata.info.Creator}`);
15333
+ if (pages.length) metaParts.push(`**Pages:** ${pages.length}`);
15334
+ if (metaParts.length > 0) {
15335
+ const metaContent = metaParts.join("\n");
15336
+ const sizeBytes = Buffer.byteLength(metaContent, "utf-8");
15337
+ pieces.push({
15338
+ type: "text",
15339
+ content: metaContent,
15340
+ metadata: {
15341
+ sourceFilename: filename,
15342
+ format,
15343
+ index: pieceIndex++,
15344
+ section: "Metadata",
15345
+ sizeBytes,
15346
+ estimatedTokens: Math.ceil(sizeBytes / DOCUMENT_DEFAULTS.CHARS_PER_TOKEN)
15347
+ }
15348
+ });
15349
+ }
15350
+ }
15351
+ for (const page of filteredPages) {
15352
+ const text = page.text.trim();
15353
+ if (!text) continue;
15354
+ const sizeBytes = Buffer.byteLength(text, "utf-8");
15355
+ pieces.push({
15356
+ type: "text",
15357
+ content: text,
15358
+ metadata: {
15359
+ sourceFilename: filename,
15360
+ format,
15361
+ index: pieceIndex++,
15362
+ section: `Page ${page.pageNum}`,
15363
+ sizeBytes,
15364
+ estimatedTokens: Math.ceil(sizeBytes / DOCUMENT_DEFAULTS.CHARS_PER_TOKEN)
15365
+ }
15366
+ });
15367
+ }
15368
+ if (options.extractImages !== false) {
15369
+ try {
15370
+ const imagesResult = await unpdf.extractImages(buffer, {});
15371
+ const images = imagesResult?.images || [];
15372
+ for (const img of images) {
15373
+ if (!img.data) continue;
15374
+ const base64 = typeof img.data === "string" ? img.data : Buffer.from(img.data).toString("base64");
15375
+ const sizeBytes = Math.ceil(base64.length * 0.75);
15376
+ pieces.push({
15377
+ type: "image",
15378
+ base64,
15379
+ mimeType: img.mimeType || "image/png",
15380
+ width: img.width,
15381
+ height: img.height,
15382
+ metadata: {
15383
+ sourceFilename: filename,
15384
+ format,
15385
+ index: pieceIndex++,
15386
+ sizeBytes,
15387
+ estimatedTokens: DOCUMENT_DEFAULTS.IMAGE_TOKENS_AUTO,
15388
+ label: img.name || void 0
15389
+ }
15390
+ });
15391
+ }
15392
+ } catch {
15393
+ }
15394
+ }
15395
+ return pieces;
15396
+ }
15397
+ };
15398
+ }
15399
+ });
15400
+
15401
+ // src/capabilities/documents/handlers/index.ts
15402
+ var handlers_exports = {};
15403
+ __export(handlers_exports, {
15404
+ ExcelHandler: () => ExcelHandler,
15405
+ HTMLHandler: () => HTMLHandler,
15406
+ ImageHandler: () => ImageHandler,
15407
+ OfficeHandler: () => OfficeHandler,
15408
+ PDFHandler: () => PDFHandler,
15409
+ TextHandler: () => TextHandler,
15410
+ getDefaultHandlers: () => getDefaultHandlers
15411
+ });
15412
+ function getDefaultHandlers() {
15413
+ return /* @__PURE__ */ new Map([
15414
+ ["text", new TextHandler()],
15415
+ ["image", new ImageHandler()],
15416
+ ["html", new HTMLHandler()],
15417
+ ["office", new OfficeHandler()],
15418
+ ["spreadsheet", new ExcelHandler()],
15419
+ ["pdf", new PDFHandler()]
15420
+ ]);
15421
+ }
15422
+ var init_handlers = __esm({
15423
+ "src/capabilities/documents/handlers/index.ts"() {
15424
+ init_TextHandler();
15425
+ init_ImageHandler();
15426
+ init_HTMLHandler();
15427
+ init_OfficeHandler();
15428
+ init_ExcelHandler();
15429
+ init_PDFHandler();
15430
+ init_TextHandler();
15431
+ init_ImageHandler();
15432
+ init_HTMLHandler();
15433
+ init_OfficeHandler();
15434
+ init_ExcelHandler();
15435
+ init_PDFHandler();
15436
+ }
15437
+ });
15438
+
15439
+ // src/capabilities/documents/transformers/DefaultTransformers.ts
15440
+ function normalizeTable(table) {
15441
+ const lines = table.trim().split("\n");
15442
+ if (lines.length < 2) return table;
15443
+ const rows = lines.map(
15444
+ (line) => line.split("|").slice(1, -1).map((cell) => cell.trim())
15445
+ );
15446
+ const colCount = Math.max(...rows.map((r) => r.length));
15447
+ const colWidths = new Array(colCount).fill(3);
15448
+ for (const row of rows) {
15449
+ for (let i = 0; i < row.length; i++) {
15450
+ const cell = row[i] ?? "";
15451
+ if (cell.match(/^[-:]+$/)) continue;
15452
+ colWidths[i] = Math.max(colWidths[i] ?? 3, cell.length);
15453
+ }
15454
+ }
15455
+ const result = rows.map((row, rowIndex) => {
15456
+ const cells = [];
15457
+ for (let i = 0; i < colCount; i++) {
15458
+ const cell = row[i] || "";
15459
+ if (rowIndex === 1) {
15460
+ cells.push("-".repeat(colWidths[i]));
15461
+ } else {
15462
+ cells.push(cell.padEnd(colWidths[i]));
14965
15463
  }
14966
- return parsed;
14967
15464
  }
14968
- function parse(command, args, options) {
14969
- if (args && !Array.isArray(args)) {
14970
- options = args;
14971
- args = null;
15465
+ return `| ${cells.join(" | ")} |`;
15466
+ });
15467
+ return result.join("\n");
15468
+ }
15469
+ function getDefaultTransformers() {
15470
+ return [
15471
+ documentHeaderTransformer,
15472
+ tableFormattingTransformer,
15473
+ truncationTransformer
15474
+ ];
15475
+ }
15476
+ var documentHeaderTransformer, tableFormattingTransformer, truncationTransformer;
15477
+ var init_DefaultTransformers = __esm({
15478
+ "src/capabilities/documents/transformers/DefaultTransformers.ts"() {
15479
+ init_constants();
15480
+ documentHeaderTransformer = {
15481
+ name: "documentHeaderTransformer",
15482
+ appliesTo: [],
15483
+ // applies to all formats
15484
+ priority: 10,
15485
+ async transform(pieces, context) {
15486
+ if (pieces.length === 0) return pieces;
15487
+ const totalSize = pieces.reduce((sum, p) => sum + p.metadata.sizeBytes, 0);
15488
+ const sizeStr = totalSize > 1024 * 1024 ? `${(totalSize / 1024 / 1024).toFixed(1)}MB` : `${(totalSize / 1024).toFixed(1)}KB`;
15489
+ const header = `# Document: ${context.filename}
15490
+ _Format: ${context.format.toUpperCase()} | Size: ${sizeStr}_`;
15491
+ const headerBytes = Buffer.byteLength(header, "utf-8");
15492
+ const headerPiece = {
15493
+ type: "text",
15494
+ content: header,
15495
+ metadata: {
15496
+ sourceFilename: context.filename,
15497
+ format: context.format,
15498
+ index: -1,
15499
+ // will be re-indexed
15500
+ section: "Header",
15501
+ sizeBytes: headerBytes,
15502
+ estimatedTokens: Math.ceil(headerBytes / DOCUMENT_DEFAULTS.CHARS_PER_TOKEN)
15503
+ }
15504
+ };
15505
+ const result = [headerPiece, ...pieces];
15506
+ result.forEach((p, i) => {
15507
+ p.metadata.index = i;
15508
+ });
15509
+ return result;
14972
15510
  }
14973
- args = args ? args.slice(0) : [];
14974
- options = Object.assign({}, options);
14975
- const parsed = {
14976
- command,
14977
- args,
14978
- options,
14979
- file: void 0,
14980
- original: {
14981
- command,
14982
- args
14983
- }
14984
- };
14985
- return options.shell ? parsed : parseNonShell(parsed);
14986
- }
14987
- module.exports = parse;
14988
- }
14989
- });
14990
-
14991
- // node_modules/cross-spawn/lib/enoent.js
14992
- var require_enoent = __commonJS({
14993
- "node_modules/cross-spawn/lib/enoent.js"(exports$1, module) {
14994
- var isWin = process.platform === "win32";
14995
- function notFoundError(original, syscall) {
14996
- return Object.assign(new Error(`${syscall} ${original.command} ENOENT`), {
14997
- code: "ENOENT",
14998
- errno: "ENOENT",
14999
- syscall: `${syscall} ${original.command}`,
15000
- path: original.command,
15001
- spawnargs: original.args
15002
- });
15003
- }
15004
- function hookChildProcess(cp, parsed) {
15005
- if (!isWin) {
15006
- return;
15511
+ };
15512
+ tableFormattingTransformer = {
15513
+ name: "tableFormattingTransformer",
15514
+ appliesTo: ["xlsx", "csv"],
15515
+ priority: 50,
15516
+ async transform(pieces, _context) {
15517
+ return pieces.map((piece) => {
15518
+ if (piece.type !== "text") return piece;
15519
+ let content = piece.content;
15520
+ content = content.replace(
15521
+ /(\|[^\n]+\|\n\|[\s\-:|]+\|\n(?:\|[^\n]+\|\n?)*)/g,
15522
+ (table) => normalizeTable(table)
15523
+ );
15524
+ if (content === piece.content) return piece;
15525
+ const sizeBytes = Buffer.byteLength(content, "utf-8");
15526
+ return {
15527
+ ...piece,
15528
+ content,
15529
+ metadata: {
15530
+ ...piece.metadata,
15531
+ sizeBytes,
15532
+ estimatedTokens: Math.ceil(sizeBytes / DOCUMENT_DEFAULTS.CHARS_PER_TOKEN)
15533
+ }
15534
+ };
15535
+ });
15007
15536
  }
15008
- const originalEmit = cp.emit;
15009
- cp.emit = function(name, arg1) {
15010
- if (name === "exit") {
15011
- const err = verifyENOENT(arg1, parsed);
15012
- if (err) {
15013
- return originalEmit.call(cp, "error", err);
15537
+ };
15538
+ truncationTransformer = {
15539
+ name: "truncationTransformer",
15540
+ appliesTo: [],
15541
+ // applies to all formats
15542
+ priority: 1e3,
15543
+ // runs last
15544
+ async transform(pieces, context) {
15545
+ const maxTokens = context.options.maxTokens ?? DOCUMENT_DEFAULTS.MAX_OUTPUT_TOKENS;
15546
+ const maxBytes = context.options.maxOutputBytes ?? DOCUMENT_DEFAULTS.MAX_OUTPUT_BYTES;
15547
+ let totalTokens = 0;
15548
+ let totalBytes = 0;
15549
+ const result = [];
15550
+ for (const piece of pieces) {
15551
+ totalTokens += piece.metadata.estimatedTokens;
15552
+ totalBytes += piece.metadata.sizeBytes;
15553
+ if (totalTokens > maxTokens || totalBytes > maxBytes) {
15554
+ if (piece.type === "text") {
15555
+ const remainingTokens = maxTokens - (totalTokens - piece.metadata.estimatedTokens);
15556
+ const remainingChars = remainingTokens * DOCUMENT_DEFAULTS.CHARS_PER_TOKEN;
15557
+ if (remainingChars > 100) {
15558
+ const content = piece.content;
15559
+ const truncateAt = content.lastIndexOf("\n\n", remainingChars);
15560
+ const cutPoint = truncateAt > remainingChars * 0.3 ? truncateAt : remainingChars;
15561
+ const truncated = content.slice(0, cutPoint) + "\n\n..._[content truncated]_";
15562
+ const sizeBytes = Buffer.byteLength(truncated, "utf-8");
15563
+ result.push({
15564
+ ...piece,
15565
+ content: truncated,
15566
+ metadata: {
15567
+ ...piece.metadata,
15568
+ sizeBytes,
15569
+ estimatedTokens: Math.ceil(sizeBytes / DOCUMENT_DEFAULTS.CHARS_PER_TOKEN)
15570
+ }
15571
+ });
15572
+ }
15573
+ }
15574
+ break;
15014
15575
  }
15576
+ result.push(piece);
15015
15577
  }
15016
- return originalEmit.apply(cp, arguments);
15017
- };
15018
- }
15019
- function verifyENOENT(status, parsed) {
15020
- if (isWin && status === 1 && !parsed.file) {
15021
- return notFoundError(parsed.original, "spawn");
15022
- }
15023
- return null;
15024
- }
15025
- function verifyENOENTSync(status, parsed) {
15026
- if (isWin && status === 1 && !parsed.file) {
15027
- return notFoundError(parsed.original, "spawnSync");
15578
+ return result;
15028
15579
  }
15029
- return null;
15030
- }
15031
- module.exports = {
15032
- hookChildProcess,
15033
- verifyENOENT,
15034
- verifyENOENTSync,
15035
- notFoundError
15036
- };
15037
- }
15038
- });
15039
-
15040
- // node_modules/cross-spawn/index.js
15041
- var require_cross_spawn = __commonJS({
15042
- "node_modules/cross-spawn/index.js"(exports$1, module) {
15043
- var cp = __require("child_process");
15044
- var parse = require_parse();
15045
- var enoent = require_enoent();
15046
- function spawn3(command, args, options) {
15047
- const parsed = parse(command, args, options);
15048
- const spawned = cp.spawn(parsed.command, parsed.args, parsed.options);
15049
- enoent.hookChildProcess(spawned, parsed);
15050
- return spawned;
15051
- }
15052
- function spawnSync(command, args, options) {
15053
- const parsed = parse(command, args, options);
15054
- const result = cp.spawnSync(parsed.command, parsed.args, parsed.options);
15055
- result.error = result.error || enoent.verifyENOENTSync(result.status, parsed);
15056
- return result;
15057
- }
15058
- module.exports = spawn3;
15059
- module.exports.spawn = spawn3;
15060
- module.exports.sync = spawnSync;
15061
- module.exports._parse = parse;
15062
- module.exports._enoent = enoent;
15580
+ };
15581
+ }
15582
+ });
15583
+
15584
+ // src/capabilities/documents/transformers/index.ts
15585
+ var transformers_exports = {};
15586
+ __export(transformers_exports, {
15587
+ documentHeaderTransformer: () => documentHeaderTransformer,
15588
+ getDefaultTransformers: () => getDefaultTransformers,
15589
+ tableFormattingTransformer: () => tableFormattingTransformer,
15590
+ truncationTransformer: () => truncationTransformer
15591
+ });
15592
+ var init_transformers = __esm({
15593
+ "src/capabilities/documents/transformers/index.ts"() {
15594
+ init_DefaultTransformers();
15063
15595
  }
15064
15596
  });
15065
15597
 
@@ -15774,6 +16306,32 @@ var ParallelTasksError = class _ParallelTasksError extends AIError {
15774
16306
  return this.failures.map((f) => f.taskId);
15775
16307
  }
15776
16308
  };
16309
+ var DocumentReadError = class _DocumentReadError extends AIError {
16310
+ constructor(source, message, originalError) {
16311
+ super(
16312
+ `Failed to read document '${source}': ${message}`,
16313
+ "DOCUMENT_READ_ERROR",
16314
+ 500,
16315
+ originalError
16316
+ );
16317
+ this.source = source;
16318
+ this.name = "DocumentReadError";
16319
+ Object.setPrototypeOf(this, _DocumentReadError.prototype);
16320
+ }
16321
+ };
16322
+ var UnsupportedFormatError = class _UnsupportedFormatError extends AIError {
16323
+ constructor(format, family) {
16324
+ super(
16325
+ `Unsupported document format: '${format}'${family ? ` (family: ${family})` : ""}`,
16326
+ "UNSUPPORTED_FORMAT",
16327
+ 400
16328
+ );
16329
+ this.format = format;
16330
+ this.family = family;
16331
+ this.name = "UnsupportedFormatError";
16332
+ Object.setPrototypeOf(this, _UnsupportedFormatError.prototype);
16333
+ }
16334
+ };
15777
16335
  var ContextOverflowError = class _ContextOverflowError extends AIError {
15778
16336
  constructor(message, budget) {
15779
16337
  super(
@@ -18177,24 +18735,9 @@ var ContentType = /* @__PURE__ */ ((ContentType2) => {
18177
18735
  ContentType2["TOOL_RESULT"] = "tool_result";
18178
18736
  return ContentType2;
18179
18737
  })(ContentType || {});
18180
- var AGENT_DEFAULTS = {
18181
- /** Default maximum iterations for agentic loop */
18182
- MAX_ITERATIONS: 50,
18183
- /** Default temperature for LLM calls */
18184
- DEFAULT_TEMPERATURE: 0.7,
18185
- /** Message injected when max iterations is reached */
18186
- MAX_ITERATIONS_MESSAGE: `You have reached the maximum iteration limit for this execution. Please:
18187
- 1. Summarize what you have accomplished so far
18188
- 2. Explain what remains to be done (if anything)
18189
- 3. Ask the user if they would like you to continue
18190
-
18191
- Do NOT use any tools in this response - just provide a clear summary and ask for confirmation to proceed.`
18192
- };
18193
- var TOKEN_ESTIMATION = {
18194
- /** Characters per token for mixed content */
18195
- MIXED_CHARS_PER_TOKEN: 3.5};
18196
18738
 
18197
18739
  // src/core/context-nextgen/BasePluginNextGen.ts
18740
+ init_constants();
18198
18741
  var simpleTokenEstimator = {
18199
18742
  estimateTokens(text) {
18200
18743
  if (!text || text.length === 0) return 0;
@@ -18203,6 +18746,14 @@ var simpleTokenEstimator = {
18203
18746
  estimateDataTokens(data) {
18204
18747
  const text = typeof data === "string" ? data : JSON.stringify(data);
18205
18748
  return this.estimateTokens(text);
18749
+ },
18750
+ estimateImageTokens(width, height, detail) {
18751
+ if (detail === "low") return 85;
18752
+ if (width && height) {
18753
+ const tiles = Math.ceil(width / 512) * Math.ceil(height / 512);
18754
+ return 85 + 170 * tiles;
18755
+ }
18756
+ return 1e3;
18206
18757
  }
18207
18758
  };
18208
18759
  var BasePluginNextGen = class {
@@ -21450,12 +22001,26 @@ var AgentContextNextGen = class _AgentContextNextGen extends eventemitter3.Event
21450
22001
  return "";
21451
22002
  }
21452
22003
  const id = this.generateId();
21453
- const contentArray = results.map((r) => ({
21454
- type: "tool_result" /* TOOL_RESULT */,
21455
- tool_use_id: r.tool_use_id,
21456
- content: typeof r.content === "string" ? r.content : JSON.stringify(r.content),
21457
- error: r.error
21458
- }));
22004
+ const contentArray = results.map((r) => {
22005
+ let contentStr;
22006
+ let images;
22007
+ if (typeof r.content === "string") {
22008
+ contentStr = r.content;
22009
+ } else if (r.content && Array.isArray(r.content.__images) && r.content.__images.length > 0) {
22010
+ images = r.content.__images;
22011
+ const { __images: _, base64: __, ...rest } = r.content;
22012
+ contentStr = JSON.stringify(rest);
22013
+ } else {
22014
+ contentStr = JSON.stringify(r.content);
22015
+ }
22016
+ return {
22017
+ type: "tool_result" /* TOOL_RESULT */,
22018
+ tool_use_id: r.tool_use_id,
22019
+ content: contentStr,
22020
+ error: r.error,
22021
+ ...images ? { __images: images } : {}
22022
+ };
22023
+ });
21459
22024
  const message = {
21460
22025
  type: "message",
21461
22026
  id,
@@ -21716,12 +22281,29 @@ ${content}`);
21716
22281
  total += this._estimator.estimateDataTokens(c.input || {});
21717
22282
  } else if (c.type === "tool_result" /* TOOL_RESULT */) {
21718
22283
  total += this._estimator.estimateTokens(c.content || "");
22284
+ const images = c.__images;
22285
+ if (images?.length) {
22286
+ for (const _img of images) {
22287
+ total += this._estimateImageTokens();
22288
+ }
22289
+ }
21719
22290
  } else if (c.type === "input_image_url" /* INPUT_IMAGE_URL */) {
21720
- total += 200;
22291
+ const imgContent = c;
22292
+ const detail = imgContent.image_url?.detail;
22293
+ total += this._estimateImageTokens(void 0, void 0, detail);
21721
22294
  }
21722
22295
  }
21723
22296
  return total;
21724
22297
  }
22298
+ /**
22299
+ * Estimate tokens for a single image, using the estimator's image method if available.
22300
+ */
22301
+ _estimateImageTokens(width, height, detail) {
22302
+ if (this._estimator.estimateImageTokens) {
22303
+ return this._estimator.estimateImageTokens(width, height, detail);
22304
+ }
22305
+ return 1e3;
22306
+ }
21725
22307
  // ============================================================================
21726
22308
  // Compaction
21727
22309
  // ============================================================================
@@ -21954,7 +22536,8 @@ ${content}`);
21954
22536
  if (c.type === "tool_result" /* TOOL_RESULT */) {
21955
22537
  const toolResult = c;
21956
22538
  const content = toolResult.content || "";
21957
- if (this.isBinaryContent(content)) {
22539
+ const images = toolResult.__images;
22540
+ if (!images?.length && this.isBinaryContent(content)) {
21958
22541
  truncatedContent.push({
21959
22542
  type: "tool_result" /* TOOL_RESULT */,
21960
22543
  tool_use_id: toolResult.tool_use_id,
@@ -21971,7 +22554,9 @@ ${content}`);
21971
22554
  tool_use_id: toolResult.tool_use_id,
21972
22555
  content: `${truncated}
21973
22556
 
21974
- [TRUNCATED: Original output was ${Math.round(content.length / 1024)}KB. Only first ${Math.round(availableChars / 1024)}KB shown. Consider using more targeted queries.]`
22557
+ [TRUNCATED: Original output was ${Math.round(content.length / 1024)}KB. Only first ${Math.round(availableChars / 1024)}KB shown. Consider using more targeted queries.]`,
22558
+ // Preserve images even when text is truncated — they're handled natively by providers
22559
+ ...images ? { __images: images } : {}
21975
22560
  });
21976
22561
  totalCharsUsed += truncated.length + 150;
21977
22562
  } else if (availableChars > 0) {
@@ -21982,7 +22567,9 @@ ${content}`);
21982
22567
  type: "tool_result" /* TOOL_RESULT */,
21983
22568
  tool_use_id: toolResult.tool_use_id,
21984
22569
  content: "[Output too large - skipped due to context limits. Try a more targeted query.]",
21985
- error: "Output too large"
22570
+ error: "Output too large",
22571
+ // Preserve images even when text is dropped
22572
+ ...images ? { __images: images } : {}
21986
22573
  });
21987
22574
  totalCharsUsed += 100;
21988
22575
  }
@@ -22511,14 +23098,41 @@ var OpenAIResponsesConverter = class {
22511
23098
  arguments: content.arguments
22512
23099
  });
22513
23100
  break;
22514
- case "tool_result":
22515
- const output = typeof content.content === "string" ? content.content : JSON.stringify(content.content);
23101
+ case "tool_result": {
23102
+ const contentImages = content.__images;
23103
+ let outputText;
23104
+ let images;
23105
+ if (contentImages?.length) {
23106
+ outputText = typeof content.content === "string" ? content.content : JSON.stringify(content.content);
23107
+ images = contentImages;
23108
+ } else {
23109
+ const rawOutput = typeof content.content === "string" ? content.content : JSON.stringify(content.content);
23110
+ const extracted = this.extractImagesFromOutput(rawOutput);
23111
+ outputText = extracted.text;
23112
+ images = extracted.images;
23113
+ }
22516
23114
  items.push({
22517
23115
  type: "function_call_output",
22518
23116
  call_id: content.tool_use_id,
22519
- output
23117
+ output: outputText
22520
23118
  });
23119
+ if (images.length > 0) {
23120
+ const imageContent = images.map((img) => ({
23121
+ type: "input_image",
23122
+ image_url: `data:${img.mediaType};base64,${img.base64}`
23123
+ }));
23124
+ items.push({
23125
+ type: "message",
23126
+ role: "user",
23127
+ content: [
23128
+ { type: "input_text", text: "[Screenshot from tool result]" },
23129
+ ...imageContent
23130
+ ],
23131
+ status: "completed"
23132
+ });
23133
+ }
22521
23134
  break;
23135
+ }
22522
23136
  }
22523
23137
  }
22524
23138
  if (messageContent.length > 0) {
@@ -22686,6 +23300,22 @@ var OpenAIResponsesConverter = class {
22686
23300
  }
22687
23301
  };
22688
23302
  }
23303
+ /**
23304
+ * Extract __images from a JSON tool result and return cleaned text + images.
23305
+ * Used by the __images convention for multimodal tool results.
23306
+ */
23307
+ extractImagesFromOutput(output) {
23308
+ try {
23309
+ const parsed = JSON.parse(output);
23310
+ if (parsed && Array.isArray(parsed.__images) && parsed.__images.length > 0) {
23311
+ const images = parsed.__images;
23312
+ const { __images: _, base64: __, ...rest } = parsed;
23313
+ return { text: JSON.stringify(rest), images };
23314
+ }
23315
+ } catch {
23316
+ }
23317
+ return { text: output, images: [] };
23318
+ }
22689
23319
  };
22690
23320
 
22691
23321
  // src/domain/entities/StreamEvent.ts
@@ -23491,6 +24121,7 @@ var AnthropicConverter = class extends BaseConverter {
23491
24121
  /**
23492
24122
  * Convert tool result to Anthropic block
23493
24123
  * Anthropic requires non-empty content when is_error is true
24124
+ * Supports __images convention: tool results with __images get multimodal content
23494
24125
  */
23495
24126
  convertToolResultToAnthropicBlock(resultContent) {
23496
24127
  const isError = !!resultContent.error;
@@ -23503,6 +24134,30 @@ var AnthropicConverter = class extends BaseConverter {
23503
24134
  if (isError && !toolResultContent) {
23504
24135
  toolResultContent = resultContent.error || "Tool execution failed";
23505
24136
  }
24137
+ const images = resultContent.__images?.length ? resultContent.__images : this.extractImages(toolResultContent);
24138
+ if (images) {
24139
+ const textContent = resultContent.__images?.length ? toolResultContent : this.stripImagesFromContent(toolResultContent);
24140
+ const contentBlocks = [];
24141
+ if (textContent.trim()) {
24142
+ contentBlocks.push({ type: "text", text: textContent });
24143
+ }
24144
+ for (const img of images) {
24145
+ contentBlocks.push({
24146
+ type: "image",
24147
+ source: {
24148
+ type: "base64",
24149
+ media_type: img.mediaType || "image/png",
24150
+ data: img.base64
24151
+ }
24152
+ });
24153
+ }
24154
+ return {
24155
+ type: "tool_result",
24156
+ tool_use_id: resultContent.tool_use_id,
24157
+ content: contentBlocks.length > 0 ? contentBlocks : textContent,
24158
+ is_error: isError
24159
+ };
24160
+ }
23506
24161
  return {
23507
24162
  type: "tool_result",
23508
24163
  tool_use_id: resultContent.tool_use_id,
@@ -23510,6 +24165,32 @@ var AnthropicConverter = class extends BaseConverter {
23510
24165
  is_error: isError
23511
24166
  };
23512
24167
  }
24168
+ /**
24169
+ * Extract __images from a JSON-stringified tool result content.
24170
+ * Returns null if no images found.
24171
+ */
24172
+ extractImages(content) {
24173
+ try {
24174
+ const parsed = JSON.parse(content);
24175
+ if (parsed && Array.isArray(parsed.__images) && parsed.__images.length > 0) {
24176
+ return parsed.__images;
24177
+ }
24178
+ } catch {
24179
+ }
24180
+ return null;
24181
+ }
24182
+ /**
24183
+ * Strip __images and base64 fields from JSON content to reduce token usage in text.
24184
+ */
24185
+ stripImagesFromContent(content) {
24186
+ try {
24187
+ const parsed = JSON.parse(content);
24188
+ const { __images: _, base64: __, ...rest } = parsed;
24189
+ return JSON.stringify(rest);
24190
+ } catch {
24191
+ return content;
24192
+ }
24193
+ }
23513
24194
  /**
23514
24195
  * Convert our Tool[] -> Anthropic tools
23515
24196
  * Uses shared conversion utilities (DRY)
@@ -24237,18 +24918,38 @@ var GoogleConverter = class {
24237
24918
  }
24238
24919
  parts.push(functionCallPart);
24239
24920
  break;
24240
- case "tool_result" /* TOOL_RESULT */:
24921
+ case "tool_result" /* TOOL_RESULT */: {
24241
24922
  const functionName = this.toolCallMapping.get(c.tool_use_id) || this.extractToolName(c.tool_use_id);
24923
+ const contentImages = c.__images;
24924
+ let resultText;
24925
+ let resultImages;
24926
+ if (contentImages?.length) {
24927
+ resultText = typeof c.content === "string" ? c.content : JSON.stringify(c.content);
24928
+ resultImages = contentImages;
24929
+ } else {
24930
+ const resultStr = typeof c.content === "string" ? c.content : JSON.stringify(c.content);
24931
+ const extracted = this.extractImagesFromResult(resultStr);
24932
+ resultText = extracted.text;
24933
+ resultImages = extracted.images;
24934
+ }
24242
24935
  parts.push({
24243
24936
  functionResponse: {
24244
24937
  name: functionName,
24245
- // Use actual function name from mapping
24246
24938
  response: {
24247
- result: typeof c.content === "string" ? c.content : c.content
24939
+ result: resultText
24248
24940
  }
24249
24941
  }
24250
24942
  });
24943
+ for (const img of resultImages) {
24944
+ parts.push({
24945
+ inlineData: {
24946
+ mimeType: img.mediaType || "image/png",
24947
+ data: img.base64
24948
+ }
24949
+ });
24950
+ }
24251
24951
  break;
24952
+ }
24252
24953
  }
24253
24954
  }
24254
24955
  return parts;
@@ -24384,6 +25085,22 @@ var GoogleConverter = class {
24384
25085
  reset() {
24385
25086
  this.clearMappings();
24386
25087
  }
25088
+ /**
25089
+ * Extract __images from a JSON tool result and return cleaned text + images.
25090
+ * Used by the __images convention for multimodal tool results.
25091
+ */
25092
+ extractImagesFromResult(content) {
25093
+ try {
25094
+ const parsed = JSON.parse(content);
25095
+ if (parsed && Array.isArray(parsed.__images) && parsed.__images.length > 0) {
25096
+ const images = parsed.__images;
25097
+ const { __images: _, base64: __, ...rest } = parsed;
25098
+ return { text: JSON.stringify(rest), images };
25099
+ }
25100
+ } catch {
25101
+ }
25102
+ return { text: content, images: [] };
25103
+ }
24387
25104
  };
24388
25105
  var GoogleStreamConverter = class {
24389
25106
  responseId = "";
@@ -26346,6 +27063,7 @@ function assertNotDestroyed(obj, operation) {
26346
27063
 
26347
27064
  // src/core/Agent.ts
26348
27065
  init_Metrics();
27066
+ init_constants();
26349
27067
  var Agent = class _Agent extends BaseAgent {
26350
27068
  // ===== Agent-specific State =====
26351
27069
  hookManager;
@@ -27553,6 +28271,9 @@ var Agent = class _Agent extends BaseAgent {
27553
28271
  this._logger.debug("Agent destroyed");
27554
28272
  }
27555
28273
  };
28274
+
28275
+ // src/core/index.ts
28276
+ init_constants();
27556
28277
  (class {
27557
28278
  static DEFAULT_PATHS = [
27558
28279
  "./oneringai.config.json",
@@ -30962,9 +31683,6 @@ var Client = class extends Protocol {
30962
31683
  }
30963
31684
  };
30964
31685
 
30965
- // node_modules/@modelcontextprotocol/sdk/dist/esm/client/stdio.js
30966
- var import_cross_spawn = __toESM(require_cross_spawn());
30967
-
30968
31686
  // node_modules/@modelcontextprotocol/sdk/dist/esm/shared/stdio.js
30969
31687
  var ReadBuffer = class {
30970
31688
  append(chunk) {
@@ -31042,7 +31760,7 @@ var StdioClientTransport = class {
31042
31760
  throw new Error("StdioClientTransport already started! If using Client class, note that connect() calls start() automatically.");
31043
31761
  }
31044
31762
  return new Promise((resolve4, reject) => {
31045
- this._process = (0, import_cross_spawn.default)(this._serverParams.command, this._serverParams.args ?? [], {
31763
+ this._process = spawn__default.default(this._serverParams.command, this._serverParams.args ?? [], {
31046
31764
  // merge default env with server env because mcp server needs some env vars
31047
31765
  env: {
31048
31766
  ...getDefaultEnvironment(),
@@ -38591,6 +39309,494 @@ var ZenRowsProvider = class {
38591
39309
  };
38592
39310
  registerScrapeProvider("zenrows", ZenRowsProvider);
38593
39311
 
39312
+ // src/capabilities/documents/DocumentReader.ts
39313
+ init_constants();
39314
+
39315
+ // src/capabilities/documents/FormatDetector.ts
39316
+ var EXTENSION_MAP = {
39317
+ // Office
39318
+ ".docx": { format: "docx", family: "office", mimeType: "application/vnd.openxmlformats-officedocument.wordprocessingml.document" },
39319
+ ".pptx": { format: "pptx", family: "office", mimeType: "application/vnd.openxmlformats-officedocument.presentationml.presentation" },
39320
+ ".odt": { format: "odt", family: "office", mimeType: "application/vnd.oasis.opendocument.text" },
39321
+ ".odp": { format: "odp", family: "office", mimeType: "application/vnd.oasis.opendocument.presentation" },
39322
+ ".ods": { format: "ods", family: "office", mimeType: "application/vnd.oasis.opendocument.spreadsheet" },
39323
+ ".rtf": { format: "rtf", family: "office", mimeType: "application/rtf" },
39324
+ // Spreadsheet
39325
+ ".xlsx": { format: "xlsx", family: "spreadsheet", mimeType: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" },
39326
+ ".csv": { format: "csv", family: "spreadsheet", mimeType: "text/csv" },
39327
+ // PDF
39328
+ ".pdf": { format: "pdf", family: "pdf", mimeType: "application/pdf" },
39329
+ // HTML
39330
+ ".html": { format: "html", family: "html", mimeType: "text/html" },
39331
+ ".htm": { format: "html", family: "html", mimeType: "text/html" },
39332
+ // Text
39333
+ ".txt": { format: "txt", family: "text", mimeType: "text/plain" },
39334
+ ".md": { format: "md", family: "text", mimeType: "text/markdown" },
39335
+ ".json": { format: "json", family: "text", mimeType: "application/json" },
39336
+ ".xml": { format: "xml", family: "text", mimeType: "application/xml" },
39337
+ ".yaml": { format: "yaml", family: "text", mimeType: "application/yaml" },
39338
+ ".yml": { format: "yml", family: "text", mimeType: "application/yaml" },
39339
+ // Image
39340
+ ".png": { format: "png", family: "image", mimeType: "image/png" },
39341
+ ".jpg": { format: "jpg", family: "image", mimeType: "image/jpeg" },
39342
+ ".jpeg": { format: "jpeg", family: "image", mimeType: "image/jpeg" },
39343
+ ".gif": { format: "gif", family: "image", mimeType: "image/gif" },
39344
+ ".webp": { format: "webp", family: "image", mimeType: "image/webp" },
39345
+ ".svg": { format: "svg", family: "image", mimeType: "image/svg+xml" }
39346
+ };
39347
+ var MIME_MAP = {
39348
+ "application/pdf": { format: "pdf", family: "pdf" },
39349
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document": { format: "docx", family: "office" },
39350
+ "application/vnd.openxmlformats-officedocument.presentationml.presentation": { format: "pptx", family: "office" },
39351
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": { format: "xlsx", family: "spreadsheet" },
39352
+ "application/vnd.oasis.opendocument.text": { format: "odt", family: "office" },
39353
+ "application/vnd.oasis.opendocument.presentation": { format: "odp", family: "office" },
39354
+ "application/vnd.oasis.opendocument.spreadsheet": { format: "ods", family: "office" },
39355
+ "application/rtf": { format: "rtf", family: "office" },
39356
+ "text/rtf": { format: "rtf", family: "office" },
39357
+ "text/csv": { format: "csv", family: "spreadsheet" },
39358
+ "application/csv": { format: "csv", family: "spreadsheet" }
39359
+ };
39360
+ var BINARY_DOCUMENT_EXTENSIONS = /* @__PURE__ */ new Set([
39361
+ ".docx",
39362
+ ".pptx",
39363
+ ".xlsx",
39364
+ ".odt",
39365
+ ".odp",
39366
+ ".ods",
39367
+ ".pdf",
39368
+ ".png",
39369
+ ".jpg",
39370
+ ".jpeg",
39371
+ ".gif",
39372
+ ".webp"
39373
+ ]);
39374
+ var FormatDetector = class _FormatDetector {
39375
+ /**
39376
+ * Detect format from filename and optional buffer
39377
+ */
39378
+ static detect(filename, _buffer) {
39379
+ const ext = _FormatDetector.getExtension(filename);
39380
+ const entry = EXTENSION_MAP[ext];
39381
+ if (!entry) {
39382
+ return {
39383
+ format: "txt",
39384
+ family: "text",
39385
+ mimeType: "text/plain",
39386
+ confidence: "low"
39387
+ };
39388
+ }
39389
+ return {
39390
+ format: entry.format,
39391
+ family: entry.family,
39392
+ mimeType: entry.mimeType,
39393
+ confidence: "high"
39394
+ };
39395
+ }
39396
+ /**
39397
+ * Check if an extension is a supported document format
39398
+ * Used by readFile to detect when to use DocumentReader
39399
+ */
39400
+ static isDocumentFormat(ext) {
39401
+ const normalizedExt = ext.startsWith(".") ? ext.toLowerCase() : `.${ext.toLowerCase()}`;
39402
+ return normalizedExt in EXTENSION_MAP;
39403
+ }
39404
+ /**
39405
+ * Check if an extension is a binary document format
39406
+ * (i.e., cannot be read as UTF-8)
39407
+ */
39408
+ static isBinaryDocumentFormat(ext) {
39409
+ const normalizedExt = ext.startsWith(".") ? ext.toLowerCase() : `.${ext.toLowerCase()}`;
39410
+ return BINARY_DOCUMENT_EXTENSIONS.has(normalizedExt);
39411
+ }
39412
+ /**
39413
+ * Check if a Content-Type header indicates a document format
39414
+ * Used by webFetch to detect downloadable documents
39415
+ */
39416
+ static isDocumentMimeType(contentType) {
39417
+ const mime = (contentType.split(";")[0] ?? "").trim().toLowerCase();
39418
+ return mime in MIME_MAP;
39419
+ }
39420
+ /**
39421
+ * Detect format from Content-Type header
39422
+ */
39423
+ static detectFromMimeType(contentType) {
39424
+ const mime = (contentType.split(";")[0] ?? "").trim().toLowerCase();
39425
+ const entry = MIME_MAP[mime];
39426
+ if (!entry) return null;
39427
+ const extEntry = Object.values(EXTENSION_MAP).find(
39428
+ (e) => e.format === entry.format
39429
+ );
39430
+ return {
39431
+ format: entry.format,
39432
+ family: entry.family,
39433
+ mimeType: extEntry?.mimeType || mime,
39434
+ confidence: "high"
39435
+ };
39436
+ }
39437
+ /**
39438
+ * Get all supported document extensions
39439
+ */
39440
+ static getSupportedExtensions() {
39441
+ return Object.keys(EXTENSION_MAP);
39442
+ }
39443
+ /**
39444
+ * Get the normalized extension from a filename
39445
+ */
39446
+ static getExtension(filename) {
39447
+ const lastDot = filename.lastIndexOf(".");
39448
+ if (lastDot === -1 || lastDot === filename.length - 1) return "";
39449
+ return filename.slice(lastDot).toLowerCase();
39450
+ }
39451
+ };
39452
+
39453
+ // src/capabilities/documents/DocumentReader.ts
39454
+ var DocumentReader = class _DocumentReader {
39455
+ handlers;
39456
+ config;
39457
+ constructor(config = {}) {
39458
+ this.config = config;
39459
+ this.handlers = config.handlers ? new Map(config.handlers) : /* @__PURE__ */ new Map();
39460
+ }
39461
+ /**
39462
+ * Create a new DocumentReader instance
39463
+ */
39464
+ static create(config = {}) {
39465
+ const reader = new _DocumentReader(config);
39466
+ reader.registerDefaultHandlers();
39467
+ return reader;
39468
+ }
39469
+ /**
39470
+ * Register all default format handlers (lazy-loaded)
39471
+ */
39472
+ registerDefaultHandlers() {
39473
+ }
39474
+ /**
39475
+ * Register a custom format handler
39476
+ */
39477
+ registerHandler(family, handler) {
39478
+ this.handlers.set(family, handler);
39479
+ }
39480
+ /**
39481
+ * Read a document from any source
39482
+ */
39483
+ async read(source, options = {}) {
39484
+ const startTime = Date.now();
39485
+ const warnings = [];
39486
+ const mergedOptions = {
39487
+ ...this.config.defaults,
39488
+ ...options,
39489
+ formatOptions: {
39490
+ ...this.config.defaults?.formatOptions,
39491
+ ...options.formatOptions
39492
+ },
39493
+ imageFilter: {
39494
+ ...this.config.defaults?.imageFilter,
39495
+ ...options.imageFilter
39496
+ }
39497
+ };
39498
+ try {
39499
+ const { buffer, filename } = await this.resolveSource(
39500
+ typeof source === "string" ? this.parseStringSource(source) : source
39501
+ );
39502
+ const detection = FormatDetector.detect(filename, buffer);
39503
+ const handler = await this.getHandler(detection.family);
39504
+ if (!handler) {
39505
+ throw new UnsupportedFormatError(detection.format, detection.family);
39506
+ }
39507
+ let pieces = await handler.handle(buffer, filename, detection.format, mergedOptions);
39508
+ if (mergedOptions.extractImages !== false) {
39509
+ pieces = this.filterImages(pieces, mergedOptions.imageFilter);
39510
+ } else {
39511
+ pieces = pieces.filter((p) => p.type !== "image");
39512
+ }
39513
+ const transformerContext = {
39514
+ filename,
39515
+ format: detection.format,
39516
+ family: detection.family,
39517
+ options: mergedOptions
39518
+ };
39519
+ pieces = await this.runTransformers(pieces, transformerContext, mergedOptions);
39520
+ const metadata = this.assembleMetadata(pieces, filename, detection, startTime);
39521
+ return {
39522
+ success: true,
39523
+ pieces,
39524
+ metadata,
39525
+ warnings
39526
+ };
39527
+ } catch (error) {
39528
+ if (error instanceof DocumentReadError || error instanceof UnsupportedFormatError) {
39529
+ throw error;
39530
+ }
39531
+ throw new DocumentReadError(
39532
+ typeof source === "string" ? source : "path" in source ? source.path : "filename" in source ? source.filename : "unknown",
39533
+ error instanceof Error ? error.message : String(error),
39534
+ error instanceof Error ? error : void 0
39535
+ );
39536
+ }
39537
+ }
39538
+ /**
39539
+ * Parse a string source (auto-detect path vs URL)
39540
+ */
39541
+ parseStringSource(source) {
39542
+ if (source.startsWith("http://") || source.startsWith("https://")) {
39543
+ return { type: "url", url: source };
39544
+ }
39545
+ return { type: "file", path: source };
39546
+ }
39547
+ /**
39548
+ * Resolve any source to a buffer and filename
39549
+ */
39550
+ async resolveSource(source) {
39551
+ switch (source.type) {
39552
+ case "file": {
39553
+ const buffer = await fs15.readFile(source.path);
39554
+ const filename = source.path.split("/").pop() || source.path;
39555
+ return { buffer, filename };
39556
+ }
39557
+ case "url": {
39558
+ const maxSize = this.config.maxDownloadSizeBytes ?? DOCUMENT_DEFAULTS.MAX_DOWNLOAD_SIZE_BYTES;
39559
+ const timeout = this.config.downloadTimeoutMs ?? DOCUMENT_DEFAULTS.DOWNLOAD_TIMEOUT_MS;
39560
+ const controller = new AbortController();
39561
+ const timeoutId = setTimeout(() => controller.abort(), timeout);
39562
+ try {
39563
+ const response = await fetch(source.url, {
39564
+ headers: {
39565
+ ...source.headers,
39566
+ "User-Agent": "OneRingAI-DocumentReader/1.0"
39567
+ },
39568
+ signal: controller.signal
39569
+ });
39570
+ clearTimeout(timeoutId);
39571
+ if (!response.ok) {
39572
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`);
39573
+ }
39574
+ const contentLength = response.headers.get("content-length");
39575
+ if (contentLength && parseInt(contentLength, 10) > maxSize) {
39576
+ throw new Error(`File too large: ${contentLength} bytes (max: ${maxSize})`);
39577
+ }
39578
+ const arrayBuffer = await response.arrayBuffer();
39579
+ if (arrayBuffer.byteLength > maxSize) {
39580
+ throw new Error(`Downloaded file too large: ${arrayBuffer.byteLength} bytes (max: ${maxSize})`);
39581
+ }
39582
+ const filename = this.extractFilenameFromURL(source.url, response);
39583
+ return { buffer: Buffer.from(arrayBuffer), filename };
39584
+ } catch (error) {
39585
+ clearTimeout(timeoutId);
39586
+ if (error.name === "AbortError") {
39587
+ throw new Error(`Download timed out after ${timeout}ms`);
39588
+ }
39589
+ throw error;
39590
+ }
39591
+ }
39592
+ case "buffer": {
39593
+ const buffer = Buffer.isBuffer(source.buffer) ? source.buffer : Buffer.from(source.buffer);
39594
+ return { buffer, filename: source.filename };
39595
+ }
39596
+ case "blob": {
39597
+ const arrayBuffer = await source.blob.arrayBuffer();
39598
+ return { buffer: Buffer.from(arrayBuffer), filename: source.filename };
39599
+ }
39600
+ }
39601
+ }
39602
+ /**
39603
+ * Extract filename from URL and response headers
39604
+ */
39605
+ extractFilenameFromURL(url2, response) {
39606
+ const disposition = response.headers.get("content-disposition");
39607
+ if (disposition) {
39608
+ const match = disposition.match(/filename[^;=\n]*=(['"]?)([^'"\n;]*)\1/);
39609
+ if (match?.[2]) return match[2];
39610
+ }
39611
+ try {
39612
+ const pathname = new URL(url2).pathname;
39613
+ const basename = pathname.split("/").pop();
39614
+ if (basename && basename.includes(".")) return basename;
39615
+ } catch {
39616
+ }
39617
+ return "document";
39618
+ }
39619
+ /**
39620
+ * Get the handler for a format family, loading defaults lazily
39621
+ */
39622
+ async getHandler(family) {
39623
+ if (this.handlers.has(family)) {
39624
+ return this.handlers.get(family);
39625
+ }
39626
+ try {
39627
+ const { getDefaultHandlers: getDefaultHandlers2 } = await Promise.resolve().then(() => (init_handlers(), handlers_exports));
39628
+ const defaults = getDefaultHandlers2();
39629
+ const handler = defaults.get(family);
39630
+ if (handler) {
39631
+ this.handlers.set(family, handler);
39632
+ return handler;
39633
+ }
39634
+ } catch {
39635
+ }
39636
+ return null;
39637
+ }
39638
+ /**
39639
+ * Filter images based on options
39640
+ */
39641
+ filterImages(pieces, filterOptions) {
39642
+ const minWidth = filterOptions?.minWidth ?? DOCUMENT_DEFAULTS.IMAGE_FILTER.MIN_WIDTH;
39643
+ const minHeight = filterOptions?.minHeight ?? DOCUMENT_DEFAULTS.IMAGE_FILTER.MIN_HEIGHT;
39644
+ const minSizeBytes = filterOptions?.minSizeBytes ?? DOCUMENT_DEFAULTS.IMAGE_FILTER.MIN_SIZE_BYTES;
39645
+ const maxImages = filterOptions?.maxImages ?? DOCUMENT_DEFAULTS.MAX_EXTRACTED_IMAGES;
39646
+ const excludePatterns = filterOptions?.excludePatterns ?? [];
39647
+ let imageCount = 0;
39648
+ return pieces.filter((piece) => {
39649
+ if (piece.type !== "image") return true;
39650
+ const img = piece;
39651
+ if (img.width !== void 0 && img.width < minWidth) return false;
39652
+ if (img.height !== void 0 && img.height < minHeight) return false;
39653
+ if (img.metadata.sizeBytes < minSizeBytes) return false;
39654
+ const label = img.metadata.label || "";
39655
+ if (excludePatterns.some((p) => p.test(label))) return false;
39656
+ imageCount++;
39657
+ if (imageCount > maxImages) return false;
39658
+ return true;
39659
+ });
39660
+ }
39661
+ /**
39662
+ * Run the transformer pipeline
39663
+ */
39664
+ async runTransformers(pieces, context, options) {
39665
+ const transformers = [];
39666
+ if (!options.skipDefaultTransformers) {
39667
+ try {
39668
+ const { getDefaultTransformers: getDefaultTransformers2 } = await Promise.resolve().then(() => (init_transformers(), transformers_exports));
39669
+ transformers.push(...getDefaultTransformers2());
39670
+ } catch {
39671
+ }
39672
+ }
39673
+ if (options.transformers) {
39674
+ transformers.push(...options.transformers);
39675
+ }
39676
+ transformers.sort((a, b) => (a.priority ?? 100) - (b.priority ?? 100));
39677
+ let result = pieces;
39678
+ for (const transformer of transformers) {
39679
+ if (transformer.appliesTo.length === 0 || transformer.appliesTo.includes(context.format)) {
39680
+ result = await transformer.transform(result, context);
39681
+ }
39682
+ }
39683
+ return result;
39684
+ }
39685
+ /**
39686
+ * Assemble metadata from pieces
39687
+ */
39688
+ assembleMetadata(pieces, filename, detection, startTime) {
39689
+ const textPieces = pieces.filter((p) => p.type === "text");
39690
+ const imagePieces = pieces.filter((p) => p.type === "image");
39691
+ const totalSizeBytes = pieces.reduce((sum, p) => sum + p.metadata.sizeBytes, 0);
39692
+ const estimatedTokens = pieces.reduce((sum, p) => sum + p.metadata.estimatedTokens, 0);
39693
+ return {
39694
+ filename,
39695
+ format: detection.format,
39696
+ family: detection.family,
39697
+ mimeType: detection.mimeType,
39698
+ totalPieces: pieces.length,
39699
+ totalTextPieces: textPieces.length,
39700
+ totalImagePieces: imagePieces.length,
39701
+ totalSizeBytes,
39702
+ estimatedTokens,
39703
+ processingTimeMs: Date.now() - startTime
39704
+ };
39705
+ }
39706
+ };
39707
+ function mergeTextPieces(pieces) {
39708
+ return pieces.filter((p) => p.type === "text").map((p) => p.content).join("\n\n");
39709
+ }
39710
+
39711
+ // src/capabilities/documents/index.ts
39712
+ init_handlers();
39713
+ init_transformers();
39714
+
39715
+ // src/utils/documentContentBridge.ts
39716
+ function documentToContent(result, options = {}) {
39717
+ const {
39718
+ imageDetail = "auto",
39719
+ imageFilter,
39720
+ maxImages = 20,
39721
+ mergeAdjacentText = true
39722
+ } = options;
39723
+ const minWidth = imageFilter?.minWidth ?? 0;
39724
+ const minHeight = imageFilter?.minHeight ?? 0;
39725
+ const minSizeBytes = imageFilter?.minSizeBytes ?? 0;
39726
+ const excludePatterns = imageFilter?.excludePatterns ?? [];
39727
+ const contents = [];
39728
+ let imageCount = 0;
39729
+ let pendingText = [];
39730
+ const flushText = () => {
39731
+ if (pendingText.length > 0) {
39732
+ const text = {
39733
+ type: "input_text" /* INPUT_TEXT */,
39734
+ text: pendingText.join("\n\n")
39735
+ };
39736
+ contents.push(text);
39737
+ pendingText = [];
39738
+ }
39739
+ };
39740
+ for (const piece of result.pieces) {
39741
+ if (piece.type === "text") {
39742
+ if (mergeAdjacentText) {
39743
+ pendingText.push(piece.content);
39744
+ } else {
39745
+ const text = {
39746
+ type: "input_text" /* INPUT_TEXT */,
39747
+ text: piece.content
39748
+ };
39749
+ contents.push(text);
39750
+ }
39751
+ } else if (piece.type === "image") {
39752
+ if (piece.width !== void 0 && piece.width < minWidth) continue;
39753
+ if (piece.height !== void 0 && piece.height < minHeight) continue;
39754
+ if (piece.metadata.sizeBytes < minSizeBytes) continue;
39755
+ const label = piece.metadata.label || "";
39756
+ if (excludePatterns.some((p) => p.test(label))) continue;
39757
+ imageCount++;
39758
+ if (imageCount > maxImages) continue;
39759
+ flushText();
39760
+ const imageContent = {
39761
+ type: "input_image_url" /* INPUT_IMAGE_URL */,
39762
+ image_url: {
39763
+ url: `data:${piece.mimeType};base64,${piece.base64}`,
39764
+ detail: imageDetail
39765
+ }
39766
+ };
39767
+ contents.push(imageContent);
39768
+ }
39769
+ }
39770
+ flushText();
39771
+ return contents;
39772
+ }
39773
+ async function readDocumentAsContent(source, options = {}) {
39774
+ const {
39775
+ imageDetail,
39776
+ maxImages,
39777
+ mergeAdjacentText,
39778
+ // imageFilter is shared between both
39779
+ ...readOptions
39780
+ } = options;
39781
+ const contentOptions = {
39782
+ imageDetail,
39783
+ imageFilter: options.imageFilter,
39784
+ maxImages,
39785
+ mergeAdjacentText
39786
+ };
39787
+ const reader = DocumentReader.create();
39788
+ const result = await reader.read(source, readOptions);
39789
+ if (!result.success) {
39790
+ return [
39791
+ {
39792
+ type: "input_text" /* INPUT_TEXT */,
39793
+ text: `[Document read error: ${result.error || "Unknown error"}]`
39794
+ }
39795
+ ];
39796
+ }
39797
+ return documentToContent(result, contentOptions);
39798
+ }
39799
+
38594
39800
  // src/domain/interfaces/IContextStorage.ts
38595
39801
  var CONTEXT_SESSION_FORMAT_VERSION = 1;
38596
39802
 
@@ -40531,6 +41737,21 @@ var ApproximateTokenEstimator = class {
40531
41737
  return 100;
40532
41738
  }
40533
41739
  }
41740
+ /**
41741
+ * Estimate tokens for an image using tile-based model (matches OpenAI pricing).
41742
+ *
41743
+ * - detail='low': 85 tokens
41744
+ * - detail='high' with known dimensions: 85 + 170 per 512×512 tile
41745
+ * - Unknown dimensions: 1000 tokens (conservative default)
41746
+ */
41747
+ estimateImageTokens(width, height, detail) {
41748
+ if (detail === "low") return 85;
41749
+ if (width && height) {
41750
+ const tiles = Math.ceil(width / 512) * Math.ceil(height / 512);
41751
+ return 85 + 170 * tiles;
41752
+ }
41753
+ return 1e3;
41754
+ }
40534
41755
  };
40535
41756
 
40536
41757
  // src/infrastructure/context/estimators/index.ts
@@ -40758,10 +41979,17 @@ var FileContextStorage = class {
40758
41979
  }
40759
41980
  /**
40760
41981
  * Get the storage path (for display/debugging)
41982
+ * @deprecated Use getLocation() instead
40761
41983
  */
40762
41984
  getPath() {
40763
41985
  return this.sessionsDirectory;
40764
41986
  }
41987
+ /**
41988
+ * Get a human-readable storage location string (for display/debugging)
41989
+ */
41990
+ getLocation() {
41991
+ return this.sessionsDirectory;
41992
+ }
40765
41993
  /**
40766
41994
  * Get the agent ID
40767
41995
  */
@@ -41133,7 +42361,7 @@ var FileAgentDefinitionStorage = class {
41133
42361
  function createFileAgentDefinitionStorage(config) {
41134
42362
  return new FileAgentDefinitionStorage(config);
41135
42363
  }
41136
- var MIME_TYPES = {
42364
+ var MIME_TYPES2 = {
41137
42365
  png: "image/png",
41138
42366
  jpeg: "image/jpeg",
41139
42367
  jpg: "image/jpeg",
@@ -41163,7 +42391,7 @@ var FileMediaStorage = class {
41163
42391
  const filePath = path2__namespace.join(dir, filename);
41164
42392
  await fs15__namespace.writeFile(filePath, data);
41165
42393
  const format = metadata.format.toLowerCase();
41166
- const mimeType = MIME_TYPES[format] ?? "application/octet-stream";
42394
+ const mimeType = MIME_TYPES2[format] ?? "application/octet-stream";
41167
42395
  return {
41168
42396
  location: filePath,
41169
42397
  mimeType,
@@ -41208,7 +42436,7 @@ var FileMediaStorage = class {
41208
42436
  const stat6 = await fs15__namespace.stat(filePath);
41209
42437
  if (!stat6.isFile()) continue;
41210
42438
  const ext = path2__namespace.extname(file).slice(1).toLowerCase();
41211
- const mimeType = MIME_TYPES[ext] ?? "application/octet-stream";
42439
+ const mimeType = MIME_TYPES2[ext] ?? "application/octet-stream";
41212
42440
  let type;
41213
42441
  for (const prefix of MEDIA_TYPE_PREFIXES) {
41214
42442
  if (file.startsWith(`${prefix}_`)) {
@@ -45485,13 +46713,30 @@ function extractNumber(text, patterns = [
45485
46713
  var tools_exports = {};
45486
46714
  __export(tools_exports, {
45487
46715
  ConnectorTools: () => ConnectorTools,
46716
+ DEFAULT_DESKTOP_CONFIG: () => DEFAULT_DESKTOP_CONFIG,
45488
46717
  DEFAULT_FILESYSTEM_CONFIG: () => DEFAULT_FILESYSTEM_CONFIG,
45489
46718
  DEFAULT_SHELL_CONFIG: () => DEFAULT_SHELL_CONFIG,
46719
+ DESKTOP_TOOL_NAMES: () => DESKTOP_TOOL_NAMES,
46720
+ DocumentReader: () => DocumentReader,
45490
46721
  FileMediaOutputHandler: () => FileMediaStorage,
46722
+ FormatDetector: () => FormatDetector,
46723
+ NutTreeDriver: () => NutTreeDriver,
45491
46724
  ToolRegistry: () => ToolRegistry,
46725
+ applyHumanDelay: () => applyHumanDelay,
45492
46726
  bash: () => bash,
45493
46727
  createBashTool: () => createBashTool,
45494
46728
  createCreatePRTool: () => createCreatePRTool,
46729
+ createDesktopGetCursorTool: () => createDesktopGetCursorTool,
46730
+ createDesktopGetScreenSizeTool: () => createDesktopGetScreenSizeTool,
46731
+ createDesktopKeyboardKeyTool: () => createDesktopKeyboardKeyTool,
46732
+ createDesktopKeyboardTypeTool: () => createDesktopKeyboardTypeTool,
46733
+ createDesktopMouseClickTool: () => createDesktopMouseClickTool,
46734
+ createDesktopMouseDragTool: () => createDesktopMouseDragTool,
46735
+ createDesktopMouseMoveTool: () => createDesktopMouseMoveTool,
46736
+ createDesktopMouseScrollTool: () => createDesktopMouseScrollTool,
46737
+ createDesktopScreenshotTool: () => createDesktopScreenshotTool,
46738
+ createDesktopWindowFocusTool: () => createDesktopWindowFocusTool,
46739
+ createDesktopWindowListTool: () => createDesktopWindowListTool,
45495
46740
  createEditFileTool: () => createEditFileTool,
45496
46741
  createExecuteJavaScriptTool: () => createExecuteJavaScriptTool,
45497
46742
  createGetPRTool: () => createGetPRTool,
@@ -45511,12 +46756,25 @@ __export(tools_exports, {
45511
46756
  createWebScrapeTool: () => createWebScrapeTool,
45512
46757
  createWebSearchTool: () => createWebSearchTool,
45513
46758
  createWriteFileTool: () => createWriteFileTool,
46759
+ desktopGetCursor: () => desktopGetCursor,
46760
+ desktopGetScreenSize: () => desktopGetScreenSize,
46761
+ desktopKeyboardKey: () => desktopKeyboardKey,
46762
+ desktopKeyboardType: () => desktopKeyboardType,
46763
+ desktopMouseClick: () => desktopMouseClick,
46764
+ desktopMouseDrag: () => desktopMouseDrag,
46765
+ desktopMouseMove: () => desktopMouseMove,
46766
+ desktopMouseScroll: () => desktopMouseScroll,
46767
+ desktopScreenshot: () => desktopScreenshot,
46768
+ desktopTools: () => desktopTools,
46769
+ desktopWindowFocus: () => desktopWindowFocus,
46770
+ desktopWindowList: () => desktopWindowList,
45514
46771
  developerTools: () => developerTools,
45515
46772
  editFile: () => editFile,
45516
46773
  executeJavaScript: () => executeJavaScript,
45517
46774
  expandTilde: () => expandTilde,
45518
46775
  getAllBuiltInTools: () => getAllBuiltInTools,
45519
46776
  getBackgroundOutput: () => getBackgroundOutput,
46777
+ getDesktopDriver: () => getDesktopDriver,
45520
46778
  getMediaOutputHandler: () => getMediaOutputHandler,
45521
46779
  getMediaStorage: () => getMediaStorage,
45522
46780
  getToolByName: () => getToolByName,
@@ -45531,8 +46789,11 @@ __export(tools_exports, {
45531
46789
  jsonManipulator: () => jsonManipulator,
45532
46790
  killBackgroundProcess: () => killBackgroundProcess,
45533
46791
  listDirectory: () => listDirectory,
46792
+ mergeTextPieces: () => mergeTextPieces,
46793
+ parseKeyCombo: () => parseKeyCombo,
45534
46794
  parseRepository: () => parseRepository,
45535
46795
  readFile: () => readFile5,
46796
+ resetDefaultDriver: () => resetDefaultDriver,
45536
46797
  resolveRepository: () => resolveRepository,
45537
46798
  setMediaOutputHandler: () => setMediaOutputHandler,
45538
46799
  setMediaStorage: () => setMediaStorage,
@@ -45578,13 +46839,11 @@ var DEFAULT_FILESYSTEM_CONFIG = {
45578
46839
  ".avi",
45579
46840
  ".mov",
45580
46841
  ".mkv",
45581
- ".pdf",
46842
+ // Note: .pdf, .docx, .xlsx, .pptx are NOT excluded — DocumentReader handles them
45582
46843
  ".doc",
45583
- ".docx",
45584
46844
  ".xls",
45585
- ".xlsx",
45586
46845
  ".ppt",
45587
- ".pptx",
46846
+ // Legacy Office formats not yet supported
45588
46847
  ".woff",
45589
46848
  ".woff2",
45590
46849
  ".ttf",
@@ -45592,6 +46851,9 @@ var DEFAULT_FILESYSTEM_CONFIG = {
45592
46851
  ".otf"
45593
46852
  ]
45594
46853
  };
46854
+ function toForwardSlash(p) {
46855
+ return path2.sep === "\\" ? p.replace(/\\/g, "/") : p;
46856
+ }
45595
46857
  function validatePath(inputPath, config = {}) {
45596
46858
  const workingDir = config.workingDirectory || process.cwd();
45597
46859
  const allowedDirs = config.allowedDirectories || [];
@@ -45608,7 +46870,8 @@ function validatePath(inputPath, config = {}) {
45608
46870
  } else {
45609
46871
  resolvedPath = path2.resolve(workingDir, expandedPath);
45610
46872
  }
45611
- const pathSegments = resolvedPath.split("/").filter(Boolean);
46873
+ const normalizedResolved = toForwardSlash(resolvedPath);
46874
+ const pathSegments = normalizedResolved.split("/").filter(Boolean);
45612
46875
  for (const blocked of blockedDirs) {
45613
46876
  if (!blocked.includes("/")) {
45614
46877
  if (pathSegments.includes(blocked)) {
@@ -45619,8 +46882,8 @@ function validatePath(inputPath, config = {}) {
45619
46882
  };
45620
46883
  }
45621
46884
  } else {
45622
- const blockedPath = path2.isAbsolute(blocked) ? blocked : path2.resolve(workingDir, blocked);
45623
- if (resolvedPath.startsWith(blockedPath + "/") || resolvedPath === blockedPath) {
46885
+ const blockedPath = toForwardSlash(path2.isAbsolute(blocked) ? blocked : path2.resolve(workingDir, blocked));
46886
+ if (normalizedResolved.startsWith(blockedPath + "/") || normalizedResolved === blockedPath) {
45624
46887
  return {
45625
46888
  valid: false,
45626
46889
  resolvedPath,
@@ -45632,8 +46895,8 @@ function validatePath(inputPath, config = {}) {
45632
46895
  if (allowedDirs.length > 0) {
45633
46896
  let isAllowed = false;
45634
46897
  for (const allowed of allowedDirs) {
45635
- const allowedPath = path2.isAbsolute(allowed) ? allowed : path2.resolve(workingDir, allowed);
45636
- if (resolvedPath.startsWith(allowedPath + "/") || resolvedPath === allowedPath) {
46898
+ const allowedPath = toForwardSlash(path2.isAbsolute(allowed) ? allowed : path2.resolve(workingDir, allowed));
46899
+ if (normalizedResolved.startsWith(allowedPath + "/") || normalizedResolved === allowedPath) {
45637
46900
  isAllowed = true;
45638
46901
  break;
45639
46902
  }
@@ -45667,7 +46930,7 @@ function createReadFileTool(config = {}) {
45667
46930
  type: "function",
45668
46931
  function: {
45669
46932
  name: "read_file",
45670
- description: `Read content from a file on the local filesystem.
46933
+ description: `Read content from a file on the local filesystem. Supports text files AND binary document formats \u2014 PDF, DOCX, PPTX, XLSX, ODS, ODT, ODP, and images (PNG, JPG, GIF, WEBP) are automatically converted to markdown text.
45671
46934
 
45672
46935
  USAGE:
45673
46936
  - The file_path parameter must be an absolute path, not a relative path
@@ -45676,20 +46939,34 @@ USAGE:
45676
46939
  - Any lines longer than 2000 characters will be truncated
45677
46940
  - Results are returned with line numbers starting at 1
45678
46941
 
46942
+ DOCUMENT SUPPORT:
46943
+ - PDF files: extracted as markdown text with per-page sections
46944
+ - Word documents (.docx): converted to markdown preserving headings, lists, tables
46945
+ - PowerPoint (.pptx): extracted slide-by-slide as markdown
46946
+ - Excel (.xlsx) / CSV / ODS: tables converted to markdown tables
46947
+ - OpenDocument (.odt, .odp, .ods): converted like their MS Office equivalents
46948
+ - Images (.png, .jpg, .gif, .webp): described as image metadata
46949
+ - Binary documents are auto-detected by extension \u2014 just pass the file path
46950
+
45679
46951
  WHEN TO USE:
45680
46952
  - To read source code files before making edits
45681
46953
  - To understand file contents and structure
45682
46954
  - To read configuration files
45683
46955
  - To examine log files or data files
46956
+ - To read PDF, Word, Excel, PowerPoint, or other document files as text
45684
46957
 
45685
46958
  IMPORTANT:
45686
46959
  - Always read a file before attempting to edit it
45687
46960
  - Use offset/limit for very large files to read in chunks
45688
46961
  - The tool will return an error if the file doesn't exist
46962
+ - offset/limit are ignored for binary document formats (full document is always returned)
45689
46963
 
45690
46964
  EXAMPLES:
45691
46965
  - Read entire file: { "file_path": "/path/to/file.ts" }
45692
- - Read lines 100-200: { "file_path": "/path/to/file.ts", "offset": 100, "limit": 100 }`,
46966
+ - Read lines 100-200: { "file_path": "/path/to/file.ts", "offset": 100, "limit": 100 }
46967
+ - Read a PDF: { "file_path": "/path/to/report.pdf" }
46968
+ - Read an Excel file: { "file_path": "/path/to/data.xlsx" }
46969
+ - Read a Word doc: { "file_path": "/path/to/document.docx" }`,
45693
46970
  parameters: {
45694
46971
  type: "object",
45695
46972
  properties: {
@@ -45751,6 +47028,32 @@ EXAMPLES:
45751
47028
  size: stats.size
45752
47029
  };
45753
47030
  }
47031
+ const ext = path2.extname(resolvedPath).toLowerCase();
47032
+ if (FormatDetector.isBinaryDocumentFormat(ext)) {
47033
+ try {
47034
+ const reader = DocumentReader.create(mergedConfig.documentReaderConfig);
47035
+ const result2 = await reader.read(
47036
+ { type: "file", path: resolvedPath },
47037
+ {
47038
+ extractImages: false,
47039
+ ...mergedConfig.documentReaderConfig?.defaults
47040
+ }
47041
+ );
47042
+ if (result2.success) {
47043
+ const content2 = mergeTextPieces(result2.pieces);
47044
+ return {
47045
+ success: true,
47046
+ content: content2,
47047
+ lines: content2.split("\n").length,
47048
+ truncated: false,
47049
+ encoding: "document",
47050
+ size: stats.size,
47051
+ path: file_path
47052
+ };
47053
+ }
47054
+ } catch {
47055
+ }
47056
+ }
45754
47057
  const content = await fs15.readFile(resolvedPath, "utf-8");
45755
47058
  const allLines = content.split("\n");
45756
47059
  const totalLines = allLines.length;
@@ -46072,7 +47375,7 @@ async function findFiles(dir, pattern, baseDir, config, results = [], depth = 0)
46072
47375
  for (const entry of entries) {
46073
47376
  if (results.length >= config.maxResults) break;
46074
47377
  const fullPath = path2.join(dir, entry.name);
46075
- const relativePath = path2.relative(baseDir, fullPath);
47378
+ const relativePath = toForwardSlash(path2.relative(baseDir, fullPath));
46076
47379
  if (entry.isDirectory()) {
46077
47380
  const isBlocked = config.blockedDirectories.some(
46078
47381
  (blocked) => entry.name === blocked || relativePath.includes(`/${blocked}/`) || relativePath.startsWith(`${blocked}/`)
@@ -46435,7 +47738,7 @@ WHEN TO USE:
46435
47738
  );
46436
47739
  if (matches.length > 0) {
46437
47740
  filesMatched++;
46438
- const relativePath = path2.relative(resolvedPath, file) || file;
47741
+ const relativePath = toForwardSlash(path2.relative(resolvedPath, file)) || file;
46439
47742
  for (const match of matches) {
46440
47743
  match.file = relativePath;
46441
47744
  }
@@ -46501,7 +47804,7 @@ async function listDir(dir, baseDir, config, recursive, filter, maxDepth = 3, cu
46501
47804
  for (const entry of dirEntries) {
46502
47805
  if (entries.length >= config.maxResults) break;
46503
47806
  const fullPath = path2.join(dir, entry.name);
46504
- const relativePath = path2.relative(baseDir, fullPath);
47807
+ const relativePath = toForwardSlash(path2.relative(baseDir, fullPath));
46505
47808
  if (entry.isDirectory() && config.blockedDirectories.includes(entry.name)) {
46506
47809
  continue;
46507
47810
  }
@@ -46716,6 +48019,8 @@ function createBashTool(config = {}) {
46716
48019
  name: "bash",
46717
48020
  description: `Execute shell commands with optional timeout.
46718
48021
 
48022
+ SHELL: This tool uses ${mergedConfig.shell}${process.platform === "win32" ? " (Windows). Use Windows command syntax (dir, type, del, etc.), NOT Unix commands (ls, cat, rm, etc.). Use \\ as path separator or quote paths with forward slashes." : " (Unix). Use standard Unix command syntax."}
48023
+
46719
48024
  USAGE:
46720
48025
  - Execute any shell command
46721
48026
  - Working directory persists between commands
@@ -46732,9 +48037,11 @@ For file operations, prefer dedicated tools:
46732
48037
 
46733
48038
  BEST PRACTICES:
46734
48039
  - Always quote file paths with spaces: cd "/path with spaces"
46735
- - Use absolute paths when possible
48040
+ - Use absolute paths when possible${process.platform === "win32" ? `
48041
+ - Chain dependent commands with &&: git add . && git commit -m "msg"
48042
+ - Use PowerShell syntax if cmd.exe is insufficient` : `
46736
48043
  - Chain dependent commands with &&: git add . && git commit -m "msg"
46737
- - Use ; only when you don't care if earlier commands fail
48044
+ - Use ; only when you don't care if earlier commands fail`}
46738
48045
  - Avoid interactive commands (no -i flags)
46739
48046
 
46740
48047
  GIT SAFETY:
@@ -47451,92 +48758,30 @@ function detectContentQuality(html, text, $) {
47451
48758
  issues
47452
48759
  };
47453
48760
  }
47454
- var JSDOM = null;
47455
- async function getJSDOM() {
47456
- if (!JSDOM) {
47457
- const jsdom = await import('jsdom');
47458
- JSDOM = jsdom.JSDOM;
47459
- }
47460
- return JSDOM;
47461
- }
47462
- async function htmlToMarkdown(html, url2, maxLength = 5e4) {
47463
- const JSDOMClass = await getJSDOM();
47464
- const dom = new JSDOMClass(html, { url: url2 });
47465
- const document = dom.window.document;
47466
- let title = document.title || "";
47467
- let byline;
47468
- let excerpt;
47469
- let contentHtml = html;
47470
- let wasReadabilityUsed = false;
47471
- try {
47472
- const clonedDoc = document.cloneNode(true);
47473
- const reader = new readability.Readability(clonedDoc);
47474
- const article = reader.parse();
47475
- if (article && article.content && article.content.length > 100) {
47476
- contentHtml = article.content;
47477
- title = article.title || title;
47478
- byline = article.byline || void 0;
47479
- excerpt = article.excerpt || void 0;
47480
- wasReadabilityUsed = true;
47481
- }
47482
- } catch {
47483
- }
47484
- const turndown = new TurndownService__default.default({
47485
- headingStyle: "atx",
47486
- codeBlockStyle: "fenced",
47487
- bulletListMarker: "-",
47488
- emDelimiter: "_"
47489
- });
47490
- turndown.remove(["script", "style", "nav", "footer", "aside", "iframe", "noscript"]);
47491
- turndown.addRule("pre", {
47492
- filter: ["pre"],
47493
- replacement: (content, node) => {
47494
- const element = node;
47495
- const code = element.querySelector?.("code");
47496
- const lang = code?.className?.match(/language-(\w+)/)?.[1] || "";
47497
- const text = code?.textContent || content;
47498
- return `
47499
- \`\`\`${lang}
47500
- ${text}
47501
- \`\`\`
47502
- `;
47503
- }
47504
- });
47505
- let markdown = turndown.turndown(contentHtml);
47506
- markdown = markdown.replace(/\n{3,}/g, "\n\n").replace(/^\s+|\s+$/g, "").replace(/[ \t]+$/gm, "");
47507
- let wasTruncated = false;
47508
- if (markdown.length > maxLength) {
47509
- const truncateAt = markdown.lastIndexOf("\n\n", maxLength);
47510
- if (truncateAt > maxLength * 0.5) {
47511
- markdown = markdown.slice(0, truncateAt) + "\n\n...[content truncated]";
47512
- } else {
47513
- markdown = markdown.slice(0, maxLength) + "...[truncated]";
47514
- }
47515
- wasTruncated = true;
47516
- }
47517
- return {
47518
- markdown,
47519
- title,
47520
- byline,
47521
- excerpt,
47522
- wasReadabilityUsed,
47523
- wasTruncated
47524
- };
47525
- }
47526
48761
 
47527
48762
  // src/tools/web/webFetch.ts
48763
+ init_htmlToMarkdown();
47528
48764
  var webFetch = {
47529
48765
  definition: {
47530
48766
  type: "function",
47531
48767
  function: {
47532
48768
  name: "web_fetch",
47533
- description: `Fetch and extract text content from a web page URL.
48769
+ description: `Fetch and extract content from a URL \u2014 works with web pages AND document files (PDF, DOCX, XLSX, PPTX, etc.). Document URLs are automatically detected and converted to markdown text.
47534
48770
 
47535
- IMPORTANT: This tool performs a simple HTTP fetch and HTML parsing. It works well for:
48771
+ WEB PAGES:
48772
+ This tool performs HTTP fetch and HTML parsing. It works well for:
47536
48773
  - Static websites (blogs, documentation, articles)
47537
48774
  - Server-rendered HTML pages
47538
48775
  - Content that doesn't require JavaScript
47539
48776
 
48777
+ DOCUMENT URLs:
48778
+ When the URL points to a document file (detected via Content-Type header or URL extension), the document is automatically downloaded and converted to markdown:
48779
+ - PDF files: extracted as markdown with per-page sections
48780
+ - Word (.docx), PowerPoint (.pptx): converted to structured markdown
48781
+ - Excel (.xlsx), CSV, ODS: tables converted to markdown tables
48782
+ - OpenDocument formats (.odt, .odp, .ods): converted like MS Office equivalents
48783
+ - Returns contentType: "document" and includes documentMetadata in the result
48784
+
47540
48785
  LIMITATIONS:
47541
48786
  - Cannot execute JavaScript
47542
48787
  - May fail on React/Vue/Angular sites (will return low quality score)
@@ -47556,8 +48801,8 @@ RETURNS:
47556
48801
  success: boolean,
47557
48802
  url: string,
47558
48803
  title: string,
47559
- content: string, // Clean markdown (converted from HTML via Readability + Turndown)
47560
- contentType: string, // 'html' | 'json' | 'text' | 'error'
48804
+ content: string, // Clean markdown (converted from HTML or document)
48805
+ contentType: string, // 'html' | 'json' | 'text' | 'document' | 'error'
47561
48806
  qualityScore: number, // 0-100 (quality of extraction)
47562
48807
  requiresJS: boolean, // True if site likely needs JavaScript
47563
48808
  suggestedAction: string, // Suggestion if quality is low
@@ -47565,20 +48810,24 @@ RETURNS:
47565
48810
  excerpt: string, // Short summary excerpt (if extracted)
47566
48811
  byline: string, // Author info (if extracted)
47567
48812
  wasTruncated: boolean, // True if content was truncated
48813
+ documentMetadata: object, // Document metadata (format, pages, etc.) \u2014 only for document URLs
47568
48814
  error: string // Error message if failed
47569
48815
  }
47570
48816
 
47571
- EXAMPLE:
47572
- To fetch a blog post:
48817
+ EXAMPLES:
48818
+ Fetch a blog post:
47573
48819
  {
47574
48820
  url: "https://example.com/blog/article"
47575
48821
  }
47576
48822
 
47577
- With custom user agent:
48823
+ Fetch a PDF document:
48824
+ {
48825
+ url: "https://example.com/reports/q4-2025.pdf"
48826
+ }
48827
+
48828
+ Fetch an Excel spreadsheet:
47578
48829
  {
47579
- url: "https://example.com/page",
47580
- userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64)...",
47581
- timeout: 15000
48830
+ url: "https://example.com/data/metrics.xlsx"
47582
48831
  }`,
47583
48832
  parameters: {
47584
48833
  type: "object",
@@ -47642,6 +48891,51 @@ With custom user agent:
47642
48891
  };
47643
48892
  }
47644
48893
  const contentType = response.headers.get("content-type") || "";
48894
+ const urlExt = (() => {
48895
+ try {
48896
+ const pathname = new URL(args.url).pathname;
48897
+ const ext = pathname.split(".").pop()?.toLowerCase();
48898
+ return ext ? `.${ext}` : "";
48899
+ } catch {
48900
+ return "";
48901
+ }
48902
+ })();
48903
+ if (FormatDetector.isDocumentMimeType(contentType) || FormatDetector.isBinaryDocumentFormat(urlExt)) {
48904
+ try {
48905
+ const arrayBuffer = await response.arrayBuffer();
48906
+ const buffer = Buffer.from(arrayBuffer);
48907
+ const disposition = response.headers.get("content-disposition");
48908
+ let filename = "document";
48909
+ if (disposition) {
48910
+ const match = disposition.match(/filename[^;=\n]*=(['"]?)([^'"\n;]*)\1/);
48911
+ if (match?.[2]) filename = match[2];
48912
+ } else {
48913
+ try {
48914
+ const basename = new URL(args.url).pathname.split("/").pop();
48915
+ if (basename && basename.includes(".")) filename = basename;
48916
+ } catch {
48917
+ }
48918
+ }
48919
+ const reader = DocumentReader.create();
48920
+ const result = await reader.read(
48921
+ { type: "buffer", buffer, filename },
48922
+ { extractImages: false }
48923
+ );
48924
+ if (result.success) {
48925
+ return {
48926
+ success: true,
48927
+ url: args.url,
48928
+ title: `Document: ${filename}`,
48929
+ content: mergeTextPieces(result.pieces),
48930
+ contentType: "document",
48931
+ qualityScore: 100,
48932
+ requiresJS: false,
48933
+ documentMetadata: result.metadata
48934
+ };
48935
+ }
48936
+ } catch {
48937
+ }
48938
+ }
47645
48939
  if (contentType.includes("application/json")) {
47646
48940
  const json = await response.json();
47647
48941
  return {
@@ -47908,6 +49202,10 @@ For JS-heavy sites:
47908
49202
  const api = await tryAPI(args, startTime, attemptedMethods);
47909
49203
  if (api.success) return api;
47910
49204
  if (native.success) return native;
49205
+ const errors = [];
49206
+ if (native.error) errors.push(`native: ${native.error}`);
49207
+ if (api.error) errors.push(`api(${connector.name}): ${api.error}`);
49208
+ const detail = errors.length > 0 ? errors.join(" | ") : "Unknown failure";
47911
49209
  return {
47912
49210
  success: false,
47913
49211
  url: args.url,
@@ -47916,7 +49214,7 @@ For JS-heavy sites:
47916
49214
  content: "",
47917
49215
  durationMs: Date.now() - startTime,
47918
49216
  attemptedMethods,
47919
- error: "All scraping methods failed. Site may have bot protection."
49217
+ error: `All scraping methods failed. ${detail}`
47920
49218
  };
47921
49219
  },
47922
49220
  describeCall: (args) => args.url
@@ -49580,6 +50878,819 @@ function registerGitHubTools() {
49580
50878
  // src/tools/github/index.ts
49581
50879
  registerGitHubTools();
49582
50880
 
50881
+ // src/tools/desktop/types.ts
50882
+ var DEFAULT_DESKTOP_CONFIG = {
50883
+ driver: null,
50884
+ // Lazy-initialized
50885
+ humanDelay: [50, 150],
50886
+ humanizeMovement: false
50887
+ };
50888
+ async function applyHumanDelay(config) {
50889
+ const [min, max] = config.humanDelay ?? DEFAULT_DESKTOP_CONFIG.humanDelay;
50890
+ if (min === 0 && max === 0) return;
50891
+ const delay = min + Math.random() * (max - min);
50892
+ await new Promise((resolve4) => setTimeout(resolve4, delay));
50893
+ }
50894
+ var DESKTOP_TOOL_NAMES = [
50895
+ "desktop_screenshot",
50896
+ "desktop_mouse_move",
50897
+ "desktop_mouse_click",
50898
+ "desktop_mouse_drag",
50899
+ "desktop_mouse_scroll",
50900
+ "desktop_get_cursor",
50901
+ "desktop_keyboard_type",
50902
+ "desktop_keyboard_key",
50903
+ "desktop_get_screen_size",
50904
+ "desktop_window_list",
50905
+ "desktop_window_focus"
50906
+ ];
50907
+
50908
+ // src/tools/desktop/driver/NutTreeDriver.ts
50909
+ var KEY_MAP = {
50910
+ // Modifiers
50911
+ ctrl: "LeftControl",
50912
+ control: "LeftControl",
50913
+ cmd: "LeftCmd",
50914
+ command: "LeftCmd",
50915
+ meta: "LeftCmd",
50916
+ super: "LeftCmd",
50917
+ alt: "LeftAlt",
50918
+ option: "LeftAlt",
50919
+ shift: "LeftShift",
50920
+ // Navigation
50921
+ enter: "Return",
50922
+ return: "Return",
50923
+ tab: "Tab",
50924
+ escape: "Escape",
50925
+ esc: "Escape",
50926
+ backspace: "Backspace",
50927
+ delete: "Delete",
50928
+ space: "Space",
50929
+ // Arrow keys
50930
+ up: "Up",
50931
+ down: "Down",
50932
+ left: "Left",
50933
+ right: "Right",
50934
+ // Function keys
50935
+ f1: "F1",
50936
+ f2: "F2",
50937
+ f3: "F3",
50938
+ f4: "F4",
50939
+ f5: "F5",
50940
+ f6: "F6",
50941
+ f7: "F7",
50942
+ f8: "F8",
50943
+ f9: "F9",
50944
+ f10: "F10",
50945
+ f11: "F11",
50946
+ f12: "F12",
50947
+ // Other
50948
+ home: "Home",
50949
+ end: "End",
50950
+ pageup: "PageUp",
50951
+ pagedown: "PageDown",
50952
+ insert: "Insert",
50953
+ printscreen: "Print",
50954
+ capslock: "CapsLock",
50955
+ numlock: "NumLock",
50956
+ scrolllock: "ScrollLock"
50957
+ };
50958
+ function parseKeyCombo(keys, KeyEnum) {
50959
+ const parts = keys.toLowerCase().split("+").map((k) => k.trim());
50960
+ const result = [];
50961
+ for (const part of parts) {
50962
+ const mapped = KEY_MAP[part];
50963
+ if (mapped && KeyEnum[mapped] !== void 0) {
50964
+ result.push(KeyEnum[mapped]);
50965
+ continue;
50966
+ }
50967
+ if (part.length === 1) {
50968
+ const upper = part.toUpperCase();
50969
+ if (KeyEnum[upper] !== void 0) {
50970
+ result.push(KeyEnum[upper]);
50971
+ continue;
50972
+ }
50973
+ }
50974
+ const pascal = part.charAt(0).toUpperCase() + part.slice(1);
50975
+ if (KeyEnum[pascal] !== void 0) {
50976
+ result.push(KeyEnum[pascal]);
50977
+ continue;
50978
+ }
50979
+ if (KeyEnum[part] !== void 0) {
50980
+ result.push(KeyEnum[part]);
50981
+ continue;
50982
+ }
50983
+ throw new Error(`Unknown key: "${part}". Available modifiers: ctrl, cmd, alt, shift. Common keys: enter, tab, escape, space, up, down, left, right, f1-f12, a-z, 0-9`);
50984
+ }
50985
+ return result;
50986
+ }
50987
+ async function encodeRGBAToPNG(data, width, height) {
50988
+ const { PNG } = await import('pngjs');
50989
+ const png = new PNG({ width, height });
50990
+ const sourceBuffer = Buffer.isBuffer(data) ? data : Buffer.from(data);
50991
+ sourceBuffer.copy(png.data, 0, 0, width * height * 4);
50992
+ return PNG.sync.write(png);
50993
+ }
50994
+ var NutTreeDriver = class {
50995
+ _isInitialized = false;
50996
+ _scaleFactor = 1;
50997
+ // Lazy-loaded nut-tree modules
50998
+ _nut = null;
50999
+ // Cache of Window objects keyed by windowHandle, populated by getWindowList()
51000
+ _windowCache = /* @__PURE__ */ new Map();
51001
+ get isInitialized() {
51002
+ return this._isInitialized;
51003
+ }
51004
+ get scaleFactor() {
51005
+ return this._scaleFactor;
51006
+ }
51007
+ async initialize() {
51008
+ if (this._isInitialized) return;
51009
+ try {
51010
+ this._nut = await import('@nut-tree-fork/nut-js');
51011
+ } catch {
51012
+ throw new Error(
51013
+ "@nut-tree-fork/nut-js is not installed. Install it to use desktop automation tools:\n npm install @nut-tree-fork/nut-js"
51014
+ );
51015
+ }
51016
+ try {
51017
+ const { mouse, keyboard } = this._nut;
51018
+ if (mouse.config) {
51019
+ mouse.config.mouseSpeed = 1e4;
51020
+ mouse.config.autoDelayMs = 0;
51021
+ }
51022
+ if (keyboard.config) {
51023
+ keyboard.config.autoDelayMs = 0;
51024
+ }
51025
+ } catch {
51026
+ }
51027
+ try {
51028
+ const { screen } = this._nut;
51029
+ const logicalWidth = await screen.width();
51030
+ const screenshotImage = await screen.grab();
51031
+ const physicalWidth = screenshotImage.width;
51032
+ this._scaleFactor = physicalWidth / logicalWidth;
51033
+ } catch (err) {
51034
+ if (err.message?.includes("permission") || err.message?.includes("accessibility")) {
51035
+ throw new Error(
51036
+ "Desktop automation requires accessibility permissions.\nOn macOS: System Settings \u2192 Privacy & Security \u2192 Accessibility \u2192 Enable your terminal app."
51037
+ );
51038
+ }
51039
+ this._scaleFactor = 1;
51040
+ }
51041
+ this._isInitialized = true;
51042
+ }
51043
+ assertInitialized() {
51044
+ if (!this._isInitialized) {
51045
+ throw new Error("NutTreeDriver not initialized. Call initialize() first.");
51046
+ }
51047
+ }
51048
+ /** Convert physical (screenshot) coords to logical (OS) coords */
51049
+ toLogical(x, y) {
51050
+ return {
51051
+ x: Math.round(x / this._scaleFactor),
51052
+ y: Math.round(y / this._scaleFactor)
51053
+ };
51054
+ }
51055
+ /** Convert logical (OS) coords to physical (screenshot) coords */
51056
+ toPhysical(x, y) {
51057
+ return {
51058
+ x: Math.round(x * this._scaleFactor),
51059
+ y: Math.round(y * this._scaleFactor)
51060
+ };
51061
+ }
51062
+ // ===== Screen =====
51063
+ async screenshot(region) {
51064
+ this.assertInitialized();
51065
+ const { screen } = this._nut;
51066
+ let image;
51067
+ if (region) {
51068
+ const { Region } = this._nut;
51069
+ const logTopLeft = this.toLogical(region.x, region.y);
51070
+ const logicalWidth = Math.round(region.width / this._scaleFactor);
51071
+ const logicalHeight = Math.round(region.height / this._scaleFactor);
51072
+ const nutRegion = new Region(logTopLeft.x, logTopLeft.y, logicalWidth, logicalHeight);
51073
+ image = await screen.grabRegion(nutRegion);
51074
+ } else {
51075
+ image = await screen.grab();
51076
+ }
51077
+ const pngBuffer = await encodeRGBAToPNG(image.data, image.width, image.height);
51078
+ const base64 = pngBuffer.toString("base64");
51079
+ return {
51080
+ base64,
51081
+ width: image.width,
51082
+ height: image.height
51083
+ };
51084
+ }
51085
+ async getScreenSize() {
51086
+ this.assertInitialized();
51087
+ const { screen } = this._nut;
51088
+ const logicalWidth = await screen.width();
51089
+ const logicalHeight = await screen.height();
51090
+ return {
51091
+ physicalWidth: Math.round(logicalWidth * this._scaleFactor),
51092
+ physicalHeight: Math.round(logicalHeight * this._scaleFactor),
51093
+ logicalWidth,
51094
+ logicalHeight,
51095
+ scaleFactor: this._scaleFactor
51096
+ };
51097
+ }
51098
+ // ===== Mouse =====
51099
+ async mouseMove(x, y) {
51100
+ this.assertInitialized();
51101
+ const { mouse, straightTo, Point } = this._nut;
51102
+ const logical = this.toLogical(x, y);
51103
+ await mouse.move(straightTo(new Point(logical.x, logical.y)));
51104
+ }
51105
+ async mouseClick(x, y, button, clickCount) {
51106
+ this.assertInitialized();
51107
+ const { mouse, straightTo, Point, Button } = this._nut;
51108
+ const nutButton = button === "right" ? Button.RIGHT : button === "middle" ? Button.MIDDLE : Button.LEFT;
51109
+ const logical = this.toLogical(x, y);
51110
+ await mouse.move(straightTo(new Point(logical.x, logical.y)));
51111
+ for (let i = 0; i < clickCount; i++) {
51112
+ await mouse.click(nutButton);
51113
+ }
51114
+ }
51115
+ async mouseDrag(startX, startY, endX, endY, button) {
51116
+ this.assertInitialized();
51117
+ const { mouse, straightTo, Point, Button } = this._nut;
51118
+ const nutButton = button === "right" ? Button.RIGHT : button === "middle" ? Button.MIDDLE : Button.LEFT;
51119
+ const logicalStart = this.toLogical(startX, startY);
51120
+ const logicalEnd = this.toLogical(endX, endY);
51121
+ await mouse.move(straightTo(new Point(logicalStart.x, logicalStart.y)));
51122
+ await mouse.pressButton(nutButton);
51123
+ await mouse.move(straightTo(new Point(logicalEnd.x, logicalEnd.y)));
51124
+ await mouse.releaseButton(nutButton);
51125
+ }
51126
+ async mouseScroll(deltaX, deltaY, x, y) {
51127
+ this.assertInitialized();
51128
+ const { mouse, straightTo, Point } = this._nut;
51129
+ if (x !== void 0 && y !== void 0) {
51130
+ const logical = this.toLogical(x, y);
51131
+ await mouse.move(straightTo(new Point(logical.x, logical.y)));
51132
+ }
51133
+ if (deltaY !== 0) {
51134
+ if (deltaY > 0) {
51135
+ await mouse.scrollDown(Math.abs(deltaY));
51136
+ } else {
51137
+ await mouse.scrollUp(Math.abs(deltaY));
51138
+ }
51139
+ }
51140
+ if (deltaX !== 0) {
51141
+ if (deltaX > 0) {
51142
+ await mouse.scrollRight(Math.abs(deltaX));
51143
+ } else {
51144
+ await mouse.scrollLeft(Math.abs(deltaX));
51145
+ }
51146
+ }
51147
+ }
51148
+ async getCursorPosition() {
51149
+ this.assertInitialized();
51150
+ const { mouse } = this._nut;
51151
+ const pos = await mouse.getPosition();
51152
+ return this.toPhysical(pos.x, pos.y);
51153
+ }
51154
+ // ===== Keyboard =====
51155
+ async keyboardType(text, delay) {
51156
+ this.assertInitialized();
51157
+ const { keyboard } = this._nut;
51158
+ const prevDelay = keyboard.config.autoDelayMs;
51159
+ if (delay !== void 0) {
51160
+ keyboard.config.autoDelayMs = delay;
51161
+ }
51162
+ try {
51163
+ await keyboard.type(text);
51164
+ } finally {
51165
+ if (delay !== void 0) {
51166
+ keyboard.config.autoDelayMs = prevDelay;
51167
+ }
51168
+ }
51169
+ }
51170
+ async keyboardKey(keys) {
51171
+ this.assertInitialized();
51172
+ const { keyboard, Key } = this._nut;
51173
+ const parsedKeys = parseKeyCombo(keys, Key);
51174
+ if (parsedKeys.length === 1) {
51175
+ await keyboard.pressKey(parsedKeys[0]);
51176
+ await keyboard.releaseKey(parsedKeys[0]);
51177
+ } else {
51178
+ for (const key of parsedKeys) {
51179
+ await keyboard.pressKey(key);
51180
+ }
51181
+ for (const key of [...parsedKeys].reverse()) {
51182
+ await keyboard.releaseKey(key);
51183
+ }
51184
+ }
51185
+ }
51186
+ // ===== Windows =====
51187
+ async getWindowList() {
51188
+ this.assertInitialized();
51189
+ const { getWindows } = this._nut;
51190
+ try {
51191
+ const windows = await getWindows();
51192
+ const result = [];
51193
+ this._windowCache.clear();
51194
+ for (const win of windows) {
51195
+ try {
51196
+ const handle = win.windowHandle;
51197
+ if (handle === void 0 || handle === null) continue;
51198
+ const title = await win.title;
51199
+ const region = await win.region;
51200
+ this._windowCache.set(handle, win);
51201
+ result.push({
51202
+ id: handle,
51203
+ title: title || "",
51204
+ bounds: region ? {
51205
+ x: Math.round(region.left * this._scaleFactor),
51206
+ y: Math.round(region.top * this._scaleFactor),
51207
+ width: Math.round(region.width * this._scaleFactor),
51208
+ height: Math.round(region.height * this._scaleFactor)
51209
+ } : void 0
51210
+ });
51211
+ } catch {
51212
+ }
51213
+ }
51214
+ return result;
51215
+ } catch {
51216
+ return [];
51217
+ }
51218
+ }
51219
+ async focusWindow(windowId) {
51220
+ this.assertInitialized();
51221
+ let target = this._windowCache.get(windowId);
51222
+ if (!target) {
51223
+ const { getWindows } = this._nut;
51224
+ const windows = await getWindows();
51225
+ target = windows.find((w) => w.windowHandle === windowId);
51226
+ }
51227
+ if (!target) {
51228
+ throw new Error(`Window with ID ${windowId} not found. Call desktop_window_list first to get current window IDs.`);
51229
+ }
51230
+ await target.focus();
51231
+ }
51232
+ };
51233
+
51234
+ // src/tools/desktop/getDriver.ts
51235
+ var defaultDriver = null;
51236
+ async function getDesktopDriver(config) {
51237
+ if (config?.driver) {
51238
+ if (!config.driver.isInitialized) {
51239
+ await config.driver.initialize();
51240
+ }
51241
+ return config.driver;
51242
+ }
51243
+ if (!defaultDriver) {
51244
+ defaultDriver = new NutTreeDriver();
51245
+ }
51246
+ if (!defaultDriver.isInitialized) {
51247
+ await defaultDriver.initialize();
51248
+ }
51249
+ return defaultDriver;
51250
+ }
51251
+ function resetDefaultDriver() {
51252
+ defaultDriver = null;
51253
+ }
51254
+
51255
+ // src/tools/desktop/screenshot.ts
51256
+ function createDesktopScreenshotTool(config) {
51257
+ return {
51258
+ definition: {
51259
+ type: "function",
51260
+ function: {
51261
+ name: "desktop_screenshot",
51262
+ description: `Take a screenshot of the entire screen or a specific region. Returns the screenshot image for visual analysis. Use this to see what's on screen before performing actions. IMPORTANT: If you capture a region, element positions in the image are relative to the region's top-left corner. To click an element at image position (ix, iy), you must use screen coordinates (ix + region.x, iy + region.y). Prefer full-screen screenshots to avoid coordinate offset errors.`,
51263
+ parameters: {
51264
+ type: "object",
51265
+ properties: {
51266
+ region: {
51267
+ type: "object",
51268
+ description: "Optional region to capture (in physical pixel coordinates). Omit to capture full screen.",
51269
+ properties: {
51270
+ x: { type: "number", description: "Left edge X coordinate" },
51271
+ y: { type: "number", description: "Top edge Y coordinate" },
51272
+ width: { type: "number", description: "Width in pixels" },
51273
+ height: { type: "number", description: "Height in pixels" }
51274
+ },
51275
+ required: ["x", "y", "width", "height"]
51276
+ }
51277
+ },
51278
+ required: []
51279
+ }
51280
+ }
51281
+ },
51282
+ describeCall: (args) => {
51283
+ if (args.region) {
51284
+ return `region (${args.region.x},${args.region.y}) ${args.region.width}x${args.region.height}`;
51285
+ }
51286
+ return "full screen";
51287
+ },
51288
+ execute: async (args) => {
51289
+ try {
51290
+ const driver = await getDesktopDriver(config);
51291
+ const screenshot = await driver.screenshot(args.region);
51292
+ return {
51293
+ success: true,
51294
+ width: screenshot.width,
51295
+ height: screenshot.height,
51296
+ base64: screenshot.base64,
51297
+ __images: [{ base64: screenshot.base64, mediaType: "image/png" }],
51298
+ // Include region info so the agent can compute screen coordinates:
51299
+ // screen_x = image_x + regionOffsetX, screen_y = image_y + regionOffsetY
51300
+ ...args.region ? { regionOffsetX: args.region.x, regionOffsetY: args.region.y } : {}
51301
+ };
51302
+ } catch (err) {
51303
+ return { success: false, error: err.message };
51304
+ }
51305
+ }
51306
+ };
51307
+ }
51308
+ var desktopScreenshot = createDesktopScreenshotTool();
51309
+
51310
+ // src/tools/desktop/mouseMove.ts
51311
+ function createDesktopMouseMoveTool(config) {
51312
+ return {
51313
+ definition: {
51314
+ type: "function",
51315
+ function: {
51316
+ name: "desktop_mouse_move",
51317
+ description: `Move the mouse cursor to the specified (x, y) position. Coordinates are in screenshot pixel space (full screen). Returns the actual cursor position after the move for verification.`,
51318
+ parameters: {
51319
+ type: "object",
51320
+ properties: {
51321
+ x: { type: "number", description: "X coordinate (in screenshot pixels)" },
51322
+ y: { type: "number", description: "Y coordinate (in screenshot pixels)" }
51323
+ },
51324
+ required: ["x", "y"]
51325
+ }
51326
+ }
51327
+ },
51328
+ describeCall: (args) => `(${args.x}, ${args.y})`,
51329
+ execute: async (args) => {
51330
+ try {
51331
+ const driver = await getDesktopDriver(config);
51332
+ await driver.mouseMove(args.x, args.y);
51333
+ await applyHumanDelay(config ?? {});
51334
+ const actual = await driver.getCursorPosition();
51335
+ return { success: true, x: actual.x, y: actual.y };
51336
+ } catch (err) {
51337
+ return { success: false, error: err.message };
51338
+ }
51339
+ }
51340
+ };
51341
+ }
51342
+ var desktopMouseMove = createDesktopMouseMoveTool();
51343
+
51344
+ // src/tools/desktop/mouseClick.ts
51345
+ function createDesktopMouseClickTool(config) {
51346
+ return {
51347
+ definition: {
51348
+ type: "function",
51349
+ function: {
51350
+ name: "desktop_mouse_click",
51351
+ description: `Click the mouse at the specified position or at the current cursor position. Supports left/right/middle button and single/double/triple click.`,
51352
+ parameters: {
51353
+ type: "object",
51354
+ properties: {
51355
+ x: { type: "number", description: "X coordinate to click (in screenshot pixels). Omit to click at current position." },
51356
+ y: { type: "number", description: "Y coordinate to click (in screenshot pixels). Omit to click at current position." },
51357
+ button: {
51358
+ type: "string",
51359
+ enum: ["left", "right", "middle"],
51360
+ description: 'Mouse button to click. Default: "left"'
51361
+ },
51362
+ clickCount: {
51363
+ type: "number",
51364
+ description: "Number of clicks (1=single, 2=double, 3=triple). Default: 1"
51365
+ }
51366
+ },
51367
+ required: []
51368
+ }
51369
+ }
51370
+ },
51371
+ describeCall: (args) => {
51372
+ const pos = args.x !== void 0 ? `(${args.x}, ${args.y})` : "current position";
51373
+ const btn = args.button && args.button !== "left" ? ` ${args.button}` : "";
51374
+ const count = args.clickCount && args.clickCount > 1 ? ` x${args.clickCount}` : "";
51375
+ return `${pos}${btn}${count}`;
51376
+ },
51377
+ execute: async (args) => {
51378
+ try {
51379
+ const driver = await getDesktopDriver(config);
51380
+ const button = args.button ?? "left";
51381
+ const clickCount = args.clickCount ?? 1;
51382
+ if (args.x !== void 0 && args.y !== void 0) {
51383
+ await driver.mouseClick(args.x, args.y, button, clickCount);
51384
+ } else {
51385
+ const pos = await driver.getCursorPosition();
51386
+ await driver.mouseClick(pos.x, pos.y, button, clickCount);
51387
+ }
51388
+ await applyHumanDelay(config ?? {});
51389
+ const actual = await driver.getCursorPosition();
51390
+ return { success: true, x: actual.x, y: actual.y, button, clickCount };
51391
+ } catch (err) {
51392
+ return { success: false, error: err.message };
51393
+ }
51394
+ }
51395
+ };
51396
+ }
51397
+ var desktopMouseClick = createDesktopMouseClickTool();
51398
+
51399
+ // src/tools/desktop/mouseDrag.ts
51400
+ function createDesktopMouseDragTool(config) {
51401
+ return {
51402
+ definition: {
51403
+ type: "function",
51404
+ function: {
51405
+ name: "desktop_mouse_drag",
51406
+ description: `Drag the mouse from one position to another. Presses the button at the start position, moves to the end position, then releases.`,
51407
+ parameters: {
51408
+ type: "object",
51409
+ properties: {
51410
+ startX: { type: "number", description: "Start X coordinate (in screenshot pixels)" },
51411
+ startY: { type: "number", description: "Start Y coordinate (in screenshot pixels)" },
51412
+ endX: { type: "number", description: "End X coordinate (in screenshot pixels)" },
51413
+ endY: { type: "number", description: "End Y coordinate (in screenshot pixels)" },
51414
+ button: {
51415
+ type: "string",
51416
+ enum: ["left", "right", "middle"],
51417
+ description: 'Mouse button to use for dragging. Default: "left"'
51418
+ }
51419
+ },
51420
+ required: ["startX", "startY", "endX", "endY"]
51421
+ }
51422
+ }
51423
+ },
51424
+ describeCall: (args) => `(${args.startX},${args.startY}) \u2192 (${args.endX},${args.endY})`,
51425
+ execute: async (args) => {
51426
+ try {
51427
+ const driver = await getDesktopDriver(config);
51428
+ await driver.mouseDrag(args.startX, args.startY, args.endX, args.endY, args.button ?? "left");
51429
+ await applyHumanDelay(config ?? {});
51430
+ return { success: true };
51431
+ } catch (err) {
51432
+ return { success: false, error: err.message };
51433
+ }
51434
+ }
51435
+ };
51436
+ }
51437
+ var desktopMouseDrag = createDesktopMouseDragTool();
51438
+
51439
+ // src/tools/desktop/mouseScroll.ts
51440
+ function createDesktopMouseScrollTool(config) {
51441
+ return {
51442
+ definition: {
51443
+ type: "function",
51444
+ function: {
51445
+ name: "desktop_mouse_scroll",
51446
+ description: `Scroll the mouse wheel. Positive deltaY scrolls down, negative scrolls up. Positive deltaX scrolls right, negative scrolls left. Optionally specify position to scroll at.`,
51447
+ parameters: {
51448
+ type: "object",
51449
+ properties: {
51450
+ deltaX: { type: "number", description: "Horizontal scroll amount. Positive=right, negative=left. Default: 0" },
51451
+ deltaY: { type: "number", description: "Vertical scroll amount. Positive=down, negative=up. Default: 0" },
51452
+ x: { type: "number", description: "X coordinate to scroll at (in screenshot pixels). Omit to scroll at current position." },
51453
+ y: { type: "number", description: "Y coordinate to scroll at (in screenshot pixels). Omit to scroll at current position." }
51454
+ },
51455
+ required: []
51456
+ }
51457
+ }
51458
+ },
51459
+ describeCall: (args) => {
51460
+ const parts = [];
51461
+ if (args.deltaY) parts.push(args.deltaY > 0 ? `down ${args.deltaY}` : `up ${Math.abs(args.deltaY)}`);
51462
+ if (args.deltaX) parts.push(args.deltaX > 0 ? `right ${args.deltaX}` : `left ${Math.abs(args.deltaX)}`);
51463
+ if (args.x !== void 0) parts.push(`at (${args.x},${args.y})`);
51464
+ return parts.join(", ") || "no-op";
51465
+ },
51466
+ execute: async (args) => {
51467
+ try {
51468
+ const driver = await getDesktopDriver(config);
51469
+ await driver.mouseScroll(args.deltaX ?? 0, args.deltaY ?? 0, args.x, args.y);
51470
+ await applyHumanDelay(config ?? {});
51471
+ return { success: true };
51472
+ } catch (err) {
51473
+ return { success: false, error: err.message };
51474
+ }
51475
+ }
51476
+ };
51477
+ }
51478
+ var desktopMouseScroll = createDesktopMouseScrollTool();
51479
+
51480
+ // src/tools/desktop/getCursor.ts
51481
+ function createDesktopGetCursorTool(config) {
51482
+ return {
51483
+ definition: {
51484
+ type: "function",
51485
+ function: {
51486
+ name: "desktop_get_cursor",
51487
+ description: `Get the current mouse cursor position in screenshot pixel coordinates.`,
51488
+ parameters: {
51489
+ type: "object",
51490
+ properties: {},
51491
+ required: []
51492
+ }
51493
+ }
51494
+ },
51495
+ describeCall: () => "get cursor position",
51496
+ execute: async () => {
51497
+ try {
51498
+ const driver = await getDesktopDriver(config);
51499
+ const pos = await driver.getCursorPosition();
51500
+ return { success: true, x: pos.x, y: pos.y };
51501
+ } catch (err) {
51502
+ return { success: false, error: err.message };
51503
+ }
51504
+ }
51505
+ };
51506
+ }
51507
+ var desktopGetCursor = createDesktopGetCursorTool();
51508
+
51509
+ // src/tools/desktop/keyboardType.ts
51510
+ function createDesktopKeyboardTypeTool(config) {
51511
+ return {
51512
+ definition: {
51513
+ type: "function",
51514
+ function: {
51515
+ name: "desktop_keyboard_type",
51516
+ description: `Type text using the keyboard. Each character is typed as a keypress. Use this for entering text into focused input fields.`,
51517
+ parameters: {
51518
+ type: "object",
51519
+ properties: {
51520
+ text: { type: "string", description: "The text to type" },
51521
+ delay: { type: "number", description: "Delay in ms between each keystroke. Default: uses system default." }
51522
+ },
51523
+ required: ["text"]
51524
+ }
51525
+ }
51526
+ },
51527
+ describeCall: (args) => {
51528
+ const preview = args.text.length > 30 ? args.text.slice(0, 27) + "..." : args.text;
51529
+ return `"${preview}"`;
51530
+ },
51531
+ execute: async (args) => {
51532
+ try {
51533
+ const driver = await getDesktopDriver(config);
51534
+ await driver.keyboardType(args.text, args.delay);
51535
+ await applyHumanDelay(config ?? {});
51536
+ return { success: true };
51537
+ } catch (err) {
51538
+ return { success: false, error: err.message };
51539
+ }
51540
+ }
51541
+ };
51542
+ }
51543
+ var desktopKeyboardType = createDesktopKeyboardTypeTool();
51544
+
51545
+ // src/tools/desktop/keyboardKey.ts
51546
+ function createDesktopKeyboardKeyTool(config) {
51547
+ return {
51548
+ definition: {
51549
+ type: "function",
51550
+ function: {
51551
+ name: "desktop_keyboard_key",
51552
+ description: `Press a keyboard shortcut or special key. Use "+" to combine keys (e.g., "ctrl+c", "cmd+shift+s", "enter", "tab", "escape"). Modifiers: ctrl, cmd/command, alt/option, shift. Special keys: enter, tab, escape, backspace, delete, space, up, down, left, right, f1-f12, home, end, pageup, pagedown.`,
51553
+ parameters: {
51554
+ type: "object",
51555
+ properties: {
51556
+ keys: {
51557
+ type: "string",
51558
+ description: 'Key combo string (e.g., "ctrl+c", "enter", "cmd+shift+s")'
51559
+ }
51560
+ },
51561
+ required: ["keys"]
51562
+ }
51563
+ }
51564
+ },
51565
+ describeCall: (args) => args.keys,
51566
+ execute: async (args) => {
51567
+ try {
51568
+ const driver = await getDesktopDriver(config);
51569
+ await driver.keyboardKey(args.keys);
51570
+ await applyHumanDelay(config ?? {});
51571
+ return { success: true };
51572
+ } catch (err) {
51573
+ return { success: false, error: err.message };
51574
+ }
51575
+ }
51576
+ };
51577
+ }
51578
+ var desktopKeyboardKey = createDesktopKeyboardKeyTool();
51579
+
51580
+ // src/tools/desktop/getScreenSize.ts
51581
+ function createDesktopGetScreenSizeTool(config) {
51582
+ return {
51583
+ definition: {
51584
+ type: "function",
51585
+ function: {
51586
+ name: "desktop_get_screen_size",
51587
+ description: `Get the screen dimensions. Returns physical pixel size (screenshot space), logical size (OS coordinates), and the scale factor (e.g., 2.0 on Retina displays). All desktop tool coordinates use physical pixel space.`,
51588
+ parameters: {
51589
+ type: "object",
51590
+ properties: {},
51591
+ required: []
51592
+ }
51593
+ }
51594
+ },
51595
+ describeCall: () => "get screen size",
51596
+ execute: async () => {
51597
+ try {
51598
+ const driver = await getDesktopDriver(config);
51599
+ const size = await driver.getScreenSize();
51600
+ return {
51601
+ success: true,
51602
+ physicalWidth: size.physicalWidth,
51603
+ physicalHeight: size.physicalHeight,
51604
+ logicalWidth: size.logicalWidth,
51605
+ logicalHeight: size.logicalHeight,
51606
+ scaleFactor: size.scaleFactor
51607
+ };
51608
+ } catch (err) {
51609
+ return { success: false, error: err.message };
51610
+ }
51611
+ }
51612
+ };
51613
+ }
51614
+ var desktopGetScreenSize = createDesktopGetScreenSizeTool();
51615
+
51616
+ // src/tools/desktop/windowList.ts
51617
+ function createDesktopWindowListTool(config) {
51618
+ return {
51619
+ definition: {
51620
+ type: "function",
51621
+ function: {
51622
+ name: "desktop_window_list",
51623
+ description: `List all visible windows on the desktop. Returns window IDs, titles, application names, and bounds. Use the window ID with desktop_window_focus to bring a window to the foreground.`,
51624
+ parameters: {
51625
+ type: "object",
51626
+ properties: {},
51627
+ required: []
51628
+ }
51629
+ }
51630
+ },
51631
+ describeCall: () => "list windows",
51632
+ execute: async () => {
51633
+ try {
51634
+ const driver = await getDesktopDriver(config);
51635
+ const windows = await driver.getWindowList();
51636
+ return { success: true, windows };
51637
+ } catch (err) {
51638
+ return { success: false, error: err.message };
51639
+ }
51640
+ }
51641
+ };
51642
+ }
51643
+ var desktopWindowList = createDesktopWindowListTool();
51644
+
51645
+ // src/tools/desktop/windowFocus.ts
51646
+ function createDesktopWindowFocusTool(config) {
51647
+ return {
51648
+ definition: {
51649
+ type: "function",
51650
+ function: {
51651
+ name: "desktop_window_focus",
51652
+ description: `Focus (bring to front) a window by its ID. Use desktop_window_list to get available window IDs.`,
51653
+ parameters: {
51654
+ type: "object",
51655
+ properties: {
51656
+ windowId: {
51657
+ type: "number",
51658
+ description: "The window ID from desktop_window_list"
51659
+ }
51660
+ },
51661
+ required: ["windowId"]
51662
+ }
51663
+ }
51664
+ },
51665
+ describeCall: (args) => `window ${args.windowId}`,
51666
+ execute: async (args) => {
51667
+ try {
51668
+ const driver = await getDesktopDriver(config);
51669
+ await driver.focusWindow(args.windowId);
51670
+ return { success: true };
51671
+ } catch (err) {
51672
+ return { success: false, error: err.message };
51673
+ }
51674
+ }
51675
+ };
51676
+ }
51677
+ var desktopWindowFocus = createDesktopWindowFocusTool();
51678
+
51679
+ // src/tools/desktop/index.ts
51680
+ var desktopTools = [
51681
+ desktopScreenshot,
51682
+ desktopMouseMove,
51683
+ desktopMouseClick,
51684
+ desktopMouseDrag,
51685
+ desktopMouseScroll,
51686
+ desktopGetCursor,
51687
+ desktopKeyboardType,
51688
+ desktopKeyboardKey,
51689
+ desktopGetScreenSize,
51690
+ desktopWindowList,
51691
+ desktopWindowFocus
51692
+ ];
51693
+
49583
51694
  // src/tools/registry.generated.ts
49584
51695
  var toolRegistry = [
49585
51696
  {
@@ -49591,6 +51702,105 @@ var toolRegistry = [
49591
51702
  tool: executeJavaScript,
49592
51703
  safeByDefault: false
49593
51704
  },
51705
+ {
51706
+ name: "desktop_get_cursor",
51707
+ exportName: "desktopGetCursor",
51708
+ displayName: "Desktop Get Cursor",
51709
+ category: "desktop",
51710
+ description: "Get the current mouse cursor position in screenshot pixel coordinates.",
51711
+ tool: desktopGetCursor,
51712
+ safeByDefault: false
51713
+ },
51714
+ {
51715
+ name: "desktop_get_screen_size",
51716
+ exportName: "desktopGetScreenSize",
51717
+ displayName: "Desktop Get Screen Size",
51718
+ category: "desktop",
51719
+ description: "Get the screen dimensions. Returns physical pixel size (screenshot space), logical size (OS coordinates), and the scale factor (e.g., 2.0 on Retina displays). All desktop tool coordinates use physical",
51720
+ tool: desktopGetScreenSize,
51721
+ safeByDefault: false
51722
+ },
51723
+ {
51724
+ name: "desktop_keyboard_key",
51725
+ exportName: "desktopKeyboardKey",
51726
+ displayName: "Desktop Keyboard Key",
51727
+ category: "desktop",
51728
+ description: 'Press a keyboard shortcut or special key. Use "+" to combine keys (e.g., "ctrl+c", "cmd+shift+s", "enter", "tab", "escape"). Modifiers: ctrl, cmd/command, alt/option, shift. Special keys: enter, tab, ',
51729
+ tool: desktopKeyboardKey,
51730
+ safeByDefault: false
51731
+ },
51732
+ {
51733
+ name: "desktop_keyboard_type",
51734
+ exportName: "desktopKeyboardType",
51735
+ displayName: "Desktop Keyboard Type",
51736
+ category: "desktop",
51737
+ description: "Type text using the keyboard. Each character is typed as a keypress. Use this for entering text into focused input fields.",
51738
+ tool: desktopKeyboardType,
51739
+ safeByDefault: false
51740
+ },
51741
+ {
51742
+ name: "desktop_mouse_click",
51743
+ exportName: "desktopMouseClick",
51744
+ displayName: "Desktop Mouse Click",
51745
+ category: "desktop",
51746
+ description: "Click the mouse at the specified position or at the current cursor position. Supports left/right/middle button and single/double/triple click.",
51747
+ tool: desktopMouseClick,
51748
+ safeByDefault: false
51749
+ },
51750
+ {
51751
+ name: "desktop_mouse_drag",
51752
+ exportName: "desktopMouseDrag",
51753
+ displayName: "Desktop Mouse Drag",
51754
+ category: "desktop",
51755
+ description: "Drag the mouse from one position to another. Presses the button at the start position, moves to the end position, then releases.",
51756
+ tool: desktopMouseDrag,
51757
+ safeByDefault: false
51758
+ },
51759
+ {
51760
+ name: "desktop_mouse_move",
51761
+ exportName: "desktopMouseMove",
51762
+ displayName: "Desktop Mouse Move",
51763
+ category: "desktop",
51764
+ description: "Move the mouse cursor to the specified (x, y) position. Coordinates are in screenshot pixel space (full screen). Returns the actual cursor position after the move for verification.",
51765
+ tool: desktopMouseMove,
51766
+ safeByDefault: false
51767
+ },
51768
+ {
51769
+ name: "desktop_mouse_scroll",
51770
+ exportName: "desktopMouseScroll",
51771
+ displayName: "Desktop Mouse Scroll",
51772
+ category: "desktop",
51773
+ description: "Scroll the mouse wheel. Positive deltaY scrolls down, negative scrolls up. Positive deltaX scrolls right, negative scrolls left. Optionally specify position to scroll at.",
51774
+ tool: desktopMouseScroll,
51775
+ safeByDefault: false
51776
+ },
51777
+ {
51778
+ name: "desktop_screenshot",
51779
+ exportName: "desktopScreenshot",
51780
+ displayName: "Desktop Screenshot",
51781
+ category: "desktop",
51782
+ description: "Take a screenshot of the entire screen or a specific region. Returns the screenshot image for visual analysis. Use this to see what's on screen before performing actions. IMPORTANT: If you capture a r",
51783
+ tool: desktopScreenshot,
51784
+ safeByDefault: false
51785
+ },
51786
+ {
51787
+ name: "desktop_window_focus",
51788
+ exportName: "desktopWindowFocus",
51789
+ displayName: "Desktop Window Focus",
51790
+ category: "desktop",
51791
+ description: "Focus (bring to front) a window by its ID. Use desktop_window_list to get available window IDs.",
51792
+ tool: desktopWindowFocus,
51793
+ safeByDefault: false
51794
+ },
51795
+ {
51796
+ name: "desktop_window_list",
51797
+ exportName: "desktopWindowList",
51798
+ displayName: "Desktop Window List",
51799
+ category: "desktop",
51800
+ description: "List all visible windows on the desktop. Returns window IDs, titles, application names, and bounds. Use the window ID with desktop_window_focus to bring a window to the foreground.",
51801
+ tool: desktopWindowList,
51802
+ safeByDefault: false
51803
+ },
49594
51804
  {
49595
51805
  name: "edit_file",
49596
51806
  exportName: "editFile",
@@ -49632,7 +51842,7 @@ var toolRegistry = [
49632
51842
  exportName: "readFile",
49633
51843
  displayName: "Read File",
49634
51844
  category: "filesystem",
49635
- description: "Read content from a file on the local filesystem.",
51845
+ description: "Read content from a file on the local filesystem. Supports text files AND binary document formats \u2014 PDF, DOCX, PPTX, XLSX, ODS, ODT, ODP, and images (PNG, JPG, GIF, WEBP) are automatically converted t",
49636
51846
  tool: readFile5,
49637
51847
  safeByDefault: true
49638
51848
  },
@@ -49668,7 +51878,7 @@ var toolRegistry = [
49668
51878
  exportName: "webFetch",
49669
51879
  displayName: "Web Fetch",
49670
51880
  category: "web",
49671
- description: "Fetch and extract text content from a web page URL.",
51881
+ description: "Fetch and extract content from a URL \u2014 works with web pages AND document files (PDF, DOCX, XLSX, PPTX, etc.). Document URLs are automatically detected and converted to markdown text.",
49672
51882
  tool: webFetch,
49673
51883
  safeByDefault: true
49674
51884
  }
@@ -50050,6 +52260,7 @@ exports.DEFAULT_ALLOWLIST = DEFAULT_ALLOWLIST;
50050
52260
  exports.DEFAULT_CHECKPOINT_STRATEGY = DEFAULT_CHECKPOINT_STRATEGY;
50051
52261
  exports.DEFAULT_CONFIG = DEFAULT_CONFIG2;
50052
52262
  exports.DEFAULT_CONTEXT_CONFIG = DEFAULT_CONTEXT_CONFIG;
52263
+ exports.DEFAULT_DESKTOP_CONFIG = DEFAULT_DESKTOP_CONFIG;
50053
52264
  exports.DEFAULT_FEATURES = DEFAULT_FEATURES;
50054
52265
  exports.DEFAULT_FILESYSTEM_CONFIG = DEFAULT_FILESYSTEM_CONFIG;
50055
52266
  exports.DEFAULT_HISTORY_MANAGER_CONFIG = DEFAULT_HISTORY_MANAGER_CONFIG;
@@ -50057,8 +52268,10 @@ exports.DEFAULT_MEMORY_CONFIG = DEFAULT_MEMORY_CONFIG;
50057
52268
  exports.DEFAULT_PERMISSION_CONFIG = DEFAULT_PERMISSION_CONFIG;
50058
52269
  exports.DEFAULT_RATE_LIMITER_CONFIG = DEFAULT_RATE_LIMITER_CONFIG;
50059
52270
  exports.DEFAULT_SHELL_CONFIG = DEFAULT_SHELL_CONFIG;
52271
+ exports.DESKTOP_TOOL_NAMES = DESKTOP_TOOL_NAMES;
50060
52272
  exports.DefaultCompactionStrategy = DefaultCompactionStrategy;
50061
52273
  exports.DependencyCycleError = DependencyCycleError;
52274
+ exports.DocumentReader = DocumentReader;
50062
52275
  exports.ErrorHandler = ErrorHandler;
50063
52276
  exports.ExecutionContext = ExecutionContext;
50064
52277
  exports.ExternalDependencyHandler = ExternalDependencyHandler;
@@ -50069,6 +52282,7 @@ exports.FileMediaOutputHandler = FileMediaStorage;
50069
52282
  exports.FileMediaStorage = FileMediaStorage;
50070
52283
  exports.FilePersistentInstructionsStorage = FilePersistentInstructionsStorage;
50071
52284
  exports.FileStorage = FileStorage;
52285
+ exports.FormatDetector = FormatDetector;
50072
52286
  exports.HookManager = HookManager;
50073
52287
  exports.IMAGE_MODELS = IMAGE_MODELS;
50074
52288
  exports.IMAGE_MODEL_REGISTRY = IMAGE_MODEL_REGISTRY;
@@ -50097,6 +52311,7 @@ exports.MemoryEvictionCompactor = MemoryEvictionCompactor;
50097
52311
  exports.MessageBuilder = MessageBuilder;
50098
52312
  exports.MessageRole = MessageRole;
50099
52313
  exports.ModelNotSupportedError = ModelNotSupportedError;
52314
+ exports.NutTreeDriver = NutTreeDriver;
50100
52315
  exports.ParallelTasksError = ParallelTasksError;
50101
52316
  exports.PersistentInstructionsPluginNextGen = PersistentInstructionsPluginNextGen;
50102
52317
  exports.PlanningAgent = PlanningAgent;
@@ -50173,6 +52388,17 @@ exports.createAuthenticatedFetch = createAuthenticatedFetch;
50173
52388
  exports.createBashTool = createBashTool;
50174
52389
  exports.createConnectorFromTemplate = createConnectorFromTemplate;
50175
52390
  exports.createCreatePRTool = createCreatePRTool;
52391
+ exports.createDesktopGetCursorTool = createDesktopGetCursorTool;
52392
+ exports.createDesktopGetScreenSizeTool = createDesktopGetScreenSizeTool;
52393
+ exports.createDesktopKeyboardKeyTool = createDesktopKeyboardKeyTool;
52394
+ exports.createDesktopKeyboardTypeTool = createDesktopKeyboardTypeTool;
52395
+ exports.createDesktopMouseClickTool = createDesktopMouseClickTool;
52396
+ exports.createDesktopMouseDragTool = createDesktopMouseDragTool;
52397
+ exports.createDesktopMouseMoveTool = createDesktopMouseMoveTool;
52398
+ exports.createDesktopMouseScrollTool = createDesktopMouseScrollTool;
52399
+ exports.createDesktopScreenshotTool = createDesktopScreenshotTool;
52400
+ exports.createDesktopWindowFocusTool = createDesktopWindowFocusTool;
52401
+ exports.createDesktopWindowListTool = createDesktopWindowListTool;
50176
52402
  exports.createEditFileTool = createEditFileTool;
50177
52403
  exports.createEstimator = createEstimator;
50178
52404
  exports.createExecuteJavaScriptTool = createExecuteJavaScriptTool;
@@ -50203,9 +52429,22 @@ exports.createVideoProvider = createVideoProvider;
50203
52429
  exports.createVideoTools = createVideoTools;
50204
52430
  exports.createWriteFileTool = createWriteFileTool;
50205
52431
  exports.defaultDescribeCall = defaultDescribeCall;
52432
+ exports.desktopGetCursor = desktopGetCursor;
52433
+ exports.desktopGetScreenSize = desktopGetScreenSize;
52434
+ exports.desktopKeyboardKey = desktopKeyboardKey;
52435
+ exports.desktopKeyboardType = desktopKeyboardType;
52436
+ exports.desktopMouseClick = desktopMouseClick;
52437
+ exports.desktopMouseDrag = desktopMouseDrag;
52438
+ exports.desktopMouseMove = desktopMouseMove;
52439
+ exports.desktopMouseScroll = desktopMouseScroll;
52440
+ exports.desktopScreenshot = desktopScreenshot;
52441
+ exports.desktopTools = desktopTools;
52442
+ exports.desktopWindowFocus = desktopWindowFocus;
52443
+ exports.desktopWindowList = desktopWindowList;
50206
52444
  exports.detectDependencyCycle = detectDependencyCycle;
50207
52445
  exports.detectServiceFromURL = detectServiceFromURL;
50208
52446
  exports.developerTools = developerTools;
52447
+ exports.documentToContent = documentToContent;
50209
52448
  exports.editFile = editFile;
50210
52449
  exports.evaluateCondition = evaluateCondition;
50211
52450
  exports.extractJSON = extractJSON;
@@ -50229,6 +52468,7 @@ exports.getAllVendorTemplates = getAllVendorTemplates;
50229
52468
  exports.getBackgroundOutput = getBackgroundOutput;
50230
52469
  exports.getConnectorTools = getConnectorTools;
50231
52470
  exports.getCredentialsSetupURL = getCredentialsSetupURL;
52471
+ exports.getDesktopDriver = getDesktopDriver;
50232
52472
  exports.getDocsURL = getDocsURL;
50233
52473
  exports.getImageModelInfo = getImageModelInfo;
50234
52474
  exports.getImageModelsByVendor = getImageModelsByVendor;
@@ -50296,10 +52536,14 @@ exports.listVendors = listVendors;
50296
52536
  exports.listVendorsByAuthType = listVendorsByAuthType;
50297
52537
  exports.listVendorsByCategory = listVendorsByCategory;
50298
52538
  exports.listVendorsWithLogos = listVendorsWithLogos;
52539
+ exports.mergeTextPieces = mergeTextPieces;
52540
+ exports.parseKeyCombo = parseKeyCombo;
50299
52541
  exports.parseRepository = parseRepository;
50300
52542
  exports.readClipboardImage = readClipboardImage;
52543
+ exports.readDocumentAsContent = readDocumentAsContent;
50301
52544
  exports.readFile = readFile5;
50302
52545
  exports.registerScrapeProvider = registerScrapeProvider;
52546
+ exports.resetDefaultDriver = resetDefaultDriver;
50303
52547
  exports.resolveConnector = resolveConnector;
50304
52548
  exports.resolveDependencies = resolveDependencies;
50305
52549
  exports.resolveRepository = resolveRepository;