browser-use 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. package/README.md +295 -686
  2. package/dist/actor/element.d.ts +19 -0
  3. package/dist/actor/element.js +46 -0
  4. package/dist/actor/index.d.ts +4 -0
  5. package/dist/actor/index.js +4 -0
  6. package/dist/actor/mouse.d.ts +19 -0
  7. package/dist/actor/mouse.js +39 -0
  8. package/dist/actor/page.d.ts +29 -0
  9. package/dist/actor/page.js +88 -0
  10. package/dist/actor/utils.d.ts +4 -0
  11. package/dist/actor/utils.js +35 -0
  12. package/dist/agent/cloud-events.d.ts +18 -0
  13. package/dist/agent/cloud-events.js +65 -2
  14. package/dist/agent/gif.d.ts +1 -0
  15. package/dist/agent/gif.js +24 -2
  16. package/dist/agent/judge.d.ts +17 -0
  17. package/dist/agent/judge.js +197 -0
  18. package/dist/agent/message-manager/service.d.ts +12 -4
  19. package/dist/agent/message-manager/service.js +205 -39
  20. package/dist/agent/message-manager/utils.js +0 -1
  21. package/dist/agent/message-manager/views.d.ts +4 -0
  22. package/dist/agent/message-manager/views.js +11 -7
  23. package/dist/agent/prompts.d.ts +24 -3
  24. package/dist/agent/prompts.js +274 -59
  25. package/dist/agent/service.d.ts +99 -41
  26. package/dist/agent/service.js +2266 -472
  27. package/dist/agent/variable-detector.d.ts +12 -0
  28. package/dist/agent/variable-detector.js +211 -0
  29. package/dist/agent/views.d.ts +237 -18
  30. package/dist/agent/views.js +446 -33
  31. package/dist/browser/cloud/cloud.d.ts +20 -0
  32. package/dist/browser/cloud/cloud.js +129 -0
  33. package/dist/browser/cloud/index.d.ts +2 -0
  34. package/dist/browser/cloud/index.js +2 -0
  35. package/dist/browser/cloud/views.d.ts +41 -0
  36. package/dist/browser/cloud/views.js +35 -0
  37. package/dist/browser/events.d.ts +345 -0
  38. package/dist/browser/events.js +566 -0
  39. package/dist/browser/extensions.js +17 -17
  40. package/dist/browser/index.d.ts +4 -0
  41. package/dist/browser/index.js +4 -0
  42. package/dist/browser/profile.d.ts +8 -2
  43. package/dist/browser/profile.js +79 -12
  44. package/dist/browser/session-manager.d.ts +85 -0
  45. package/dist/browser/session-manager.js +208 -0
  46. package/dist/browser/session.d.ts +100 -8
  47. package/dist/browser/session.js +1097 -58
  48. package/dist/browser/types.d.ts +0 -2
  49. package/dist/browser/views.d.ts +39 -0
  50. package/dist/browser/views.js +32 -0
  51. package/dist/browser/watchdogs/aboutblank-watchdog.d.ts +12 -0
  52. package/dist/browser/watchdogs/aboutblank-watchdog.js +131 -0
  53. package/dist/browser/watchdogs/base.d.ts +21 -0
  54. package/dist/browser/watchdogs/base.js +81 -0
  55. package/dist/browser/watchdogs/cdp-session-watchdog.d.ts +14 -0
  56. package/dist/browser/watchdogs/cdp-session-watchdog.js +177 -0
  57. package/dist/browser/watchdogs/crash-watchdog.d.ts +38 -0
  58. package/dist/browser/watchdogs/crash-watchdog.js +296 -0
  59. package/dist/browser/watchdogs/default-action-watchdog.d.ts +49 -0
  60. package/dist/browser/watchdogs/default-action-watchdog.js +212 -0
  61. package/dist/browser/watchdogs/dom-watchdog.d.ts +8 -0
  62. package/dist/browser/watchdogs/dom-watchdog.js +31 -0
  63. package/dist/browser/watchdogs/downloads-watchdog.d.ts +77 -0
  64. package/dist/browser/watchdogs/downloads-watchdog.js +409 -0
  65. package/dist/browser/watchdogs/har-recording-watchdog.d.ts +19 -0
  66. package/dist/browser/watchdogs/har-recording-watchdog.js +317 -0
  67. package/dist/browser/watchdogs/index.d.ts +15 -0
  68. package/dist/browser/watchdogs/index.js +15 -0
  69. package/dist/browser/watchdogs/local-browser-watchdog.d.ts +10 -0
  70. package/dist/browser/watchdogs/local-browser-watchdog.js +32 -0
  71. package/dist/browser/watchdogs/permissions-watchdog.d.ts +8 -0
  72. package/dist/browser/watchdogs/permissions-watchdog.js +73 -0
  73. package/dist/browser/watchdogs/popups-watchdog.d.ts +13 -0
  74. package/dist/browser/watchdogs/popups-watchdog.js +77 -0
  75. package/dist/browser/watchdogs/recording-watchdog.d.ts +27 -0
  76. package/dist/browser/watchdogs/recording-watchdog.js +249 -0
  77. package/dist/browser/watchdogs/screenshot-watchdog.d.ts +6 -0
  78. package/dist/browser/watchdogs/screenshot-watchdog.js +13 -0
  79. package/dist/browser/watchdogs/security-watchdog.d.ts +10 -0
  80. package/dist/browser/watchdogs/security-watchdog.js +84 -0
  81. package/dist/browser/watchdogs/storage-state-watchdog.d.ts +24 -0
  82. package/dist/browser/watchdogs/storage-state-watchdog.js +288 -0
  83. package/dist/cli.d.ts +7 -2
  84. package/dist/cli.js +182 -25
  85. package/dist/code-use/formatting.d.ts +3 -0
  86. package/dist/code-use/formatting.js +18 -0
  87. package/dist/code-use/index.d.ts +6 -0
  88. package/dist/code-use/index.js +6 -0
  89. package/dist/code-use/namespace.d.ts +5 -0
  90. package/dist/code-use/namespace.js +81 -0
  91. package/dist/code-use/notebook-export.d.ts +3 -0
  92. package/dist/code-use/notebook-export.js +56 -0
  93. package/dist/code-use/service.d.ts +24 -0
  94. package/dist/code-use/service.js +104 -0
  95. package/dist/code-use/utils.d.ts +4 -0
  96. package/dist/code-use/utils.js +98 -0
  97. package/dist/code-use/views.d.ts +108 -0
  98. package/dist/code-use/views.js +165 -0
  99. package/dist/config.d.ts +13 -0
  100. package/dist/config.js +69 -3
  101. package/dist/controller/registry/service.d.ts +10 -1
  102. package/dist/controller/registry/service.js +266 -10
  103. package/dist/controller/registry/views.d.ts +4 -1
  104. package/dist/controller/registry/views.js +25 -2
  105. package/dist/controller/service.d.ts +10 -1
  106. package/dist/controller/service.js +1807 -268
  107. package/dist/controller/views.d.ts +78 -155
  108. package/dist/controller/views.js +61 -12
  109. package/dist/dom/history-tree-processor/service.d.ts +5 -0
  110. package/dist/dom/history-tree-processor/service.js +169 -14
  111. package/dist/dom/history-tree-processor/view.d.ts +7 -1
  112. package/dist/dom/history-tree-processor/view.js +10 -1
  113. package/dist/dom/markdown-extractor.d.ts +37 -0
  114. package/dist/dom/markdown-extractor.js +345 -0
  115. package/dist/dom/service.d.ts +3 -1
  116. package/dist/dom/service.js +76 -0
  117. package/dist/dom/views.d.ts +1 -0
  118. package/dist/dom/views.js +45 -0
  119. package/dist/event-bus.d.ts +107 -7
  120. package/dist/event-bus.js +313 -10
  121. package/dist/exceptions.d.ts +0 -3
  122. package/dist/exceptions.js +0 -7
  123. package/dist/filesystem/file-system.d.ts +18 -0
  124. package/dist/filesystem/file-system.js +503 -42
  125. package/dist/index.d.ts +7 -0
  126. package/dist/index.js +6 -0
  127. package/dist/integrations/gmail/actions.d.ts +3 -3
  128. package/dist/integrations/gmail/actions.js +4 -4
  129. package/dist/llm/anthropic/chat.d.ts +18 -1
  130. package/dist/llm/anthropic/chat.js +123 -55
  131. package/dist/llm/anthropic/serializer.d.ts +2 -0
  132. package/dist/llm/anthropic/serializer.js +81 -9
  133. package/dist/llm/aws/chat-anthropic.d.ts +17 -0
  134. package/dist/llm/aws/chat-anthropic.js +126 -26
  135. package/dist/llm/aws/chat-bedrock.d.ts +28 -1
  136. package/dist/llm/aws/chat-bedrock.js +161 -34
  137. package/dist/llm/aws/serializer.d.ts +13 -1
  138. package/dist/llm/aws/serializer.js +56 -17
  139. package/dist/llm/azure/chat.d.ts +53 -2
  140. package/dist/llm/azure/chat.js +366 -54
  141. package/dist/llm/base.d.ts +2 -0
  142. package/dist/llm/browser-use/chat.d.ts +40 -0
  143. package/dist/llm/browser-use/chat.js +305 -0
  144. package/dist/llm/browser-use/index.d.ts +1 -0
  145. package/dist/llm/browser-use/index.js +1 -0
  146. package/dist/llm/cerebras/chat.d.ts +39 -0
  147. package/dist/llm/cerebras/chat.js +178 -0
  148. package/dist/llm/cerebras/index.d.ts +2 -0
  149. package/dist/llm/cerebras/index.js +2 -0
  150. package/dist/llm/cerebras/serializer.d.ts +7 -0
  151. package/dist/llm/cerebras/serializer.js +82 -0
  152. package/dist/llm/deepseek/chat.d.ts +19 -2
  153. package/dist/llm/deepseek/chat.js +138 -25
  154. package/dist/llm/google/chat.d.ts +46 -2
  155. package/dist/llm/google/chat.js +267 -64
  156. package/dist/llm/google/serializer.d.ts +9 -1
  157. package/dist/llm/google/serializer.js +141 -34
  158. package/dist/llm/groq/chat.d.ts +21 -2
  159. package/dist/llm/groq/chat.js +125 -26
  160. package/dist/llm/groq/parser.js +3 -1
  161. package/dist/llm/mistral/chat.d.ts +43 -0
  162. package/dist/llm/mistral/chat.js +154 -0
  163. package/dist/llm/mistral/index.d.ts +2 -0
  164. package/dist/llm/mistral/index.js +2 -0
  165. package/dist/llm/mistral/schema.d.ts +8 -0
  166. package/dist/llm/mistral/schema.js +27 -0
  167. package/dist/llm/models.d.ts +2 -0
  168. package/dist/llm/models.js +317 -0
  169. package/dist/llm/ollama/chat.d.ts +13 -1
  170. package/dist/llm/ollama/chat.js +110 -19
  171. package/dist/llm/ollama/serializer.d.ts +1 -0
  172. package/dist/llm/ollama/serializer.js +34 -12
  173. package/dist/llm/openai/chat.d.ts +16 -0
  174. package/dist/llm/openai/chat.js +94 -44
  175. package/dist/llm/openai/like.d.ts +5 -3
  176. package/dist/llm/openai/like.js +7 -3
  177. package/dist/llm/openai/responses-serializer.d.ts +18 -0
  178. package/dist/llm/openai/responses-serializer.js +72 -0
  179. package/dist/llm/openrouter/chat.d.ts +28 -2
  180. package/dist/llm/openrouter/chat.js +115 -29
  181. package/dist/llm/schema.d.ts +11 -1
  182. package/dist/llm/schema.js +81 -1
  183. package/dist/llm/vercel/chat.d.ts +50 -0
  184. package/dist/llm/vercel/chat.js +276 -0
  185. package/dist/llm/vercel/index.d.ts +1 -0
  186. package/dist/llm/vercel/index.js +1 -0
  187. package/dist/llm/vercel/serializer.d.ts +5 -0
  188. package/dist/llm/vercel/serializer.js +7 -0
  189. package/dist/llm/views.d.ts +2 -1
  190. package/dist/llm/views.js +3 -1
  191. package/dist/logging-config.d.ts +2 -0
  192. package/dist/logging-config.js +82 -29
  193. package/dist/mcp/client.d.ts +10 -5
  194. package/dist/mcp/client.js +14 -9
  195. package/dist/mcp/controller.d.ts +42 -3
  196. package/dist/mcp/controller.js +56 -31
  197. package/dist/mcp/server.d.ts +14 -0
  198. package/dist/mcp/server.js +255 -52
  199. package/dist/observability.js +10 -4
  200. package/dist/sandbox/index.d.ts +2 -0
  201. package/dist/sandbox/index.js +2 -0
  202. package/dist/sandbox/sandbox.d.ts +19 -0
  203. package/dist/sandbox/sandbox.js +140 -0
  204. package/dist/sandbox/views.d.ts +67 -0
  205. package/dist/sandbox/views.js +121 -0
  206. package/dist/skill-cli/index.d.ts +3 -0
  207. package/dist/skill-cli/index.js +3 -0
  208. package/dist/skill-cli/protocol.d.ts +30 -0
  209. package/dist/skill-cli/protocol.js +48 -0
  210. package/dist/skill-cli/server.d.ts +11 -0
  211. package/dist/skill-cli/server.js +85 -0
  212. package/dist/skill-cli/sessions.d.ts +24 -0
  213. package/dist/skill-cli/sessions.js +47 -0
  214. package/dist/skills/index.d.ts +3 -0
  215. package/dist/skills/index.js +3 -0
  216. package/dist/skills/service.d.ts +27 -0
  217. package/dist/skills/service.js +266 -0
  218. package/dist/skills/utils.d.ts +6 -0
  219. package/dist/skills/utils.js +53 -0
  220. package/dist/skills/views.d.ts +40 -0
  221. package/dist/skills/views.js +10 -0
  222. package/dist/sync/auth.js +8 -3
  223. package/dist/sync/service.d.ts +6 -6
  224. package/dist/sync/service.js +54 -89
  225. package/dist/telemetry/views.d.ts +20 -6
  226. package/dist/telemetry/views.js +23 -5
  227. package/dist/tokens/custom-pricing.d.ts +2 -0
  228. package/dist/tokens/custom-pricing.js +22 -0
  229. package/dist/tokens/index.d.ts +2 -0
  230. package/dist/tokens/index.js +2 -0
  231. package/dist/tokens/mappings.d.ts +1 -0
  232. package/dist/tokens/mappings.js +3 -0
  233. package/dist/tokens/service.js +27 -8
  234. package/dist/tools/extraction/index.d.ts +2 -0
  235. package/dist/tools/extraction/index.js +2 -0
  236. package/dist/tools/extraction/schema-utils.d.ts +6 -0
  237. package/dist/tools/extraction/schema-utils.js +237 -0
  238. package/dist/tools/extraction/views.d.ts +7 -0
  239. package/dist/tools/index.d.ts +5 -0
  240. package/dist/tools/index.js +5 -0
  241. package/dist/tools/registry/index.d.ts +2 -0
  242. package/dist/tools/registry/index.js +2 -0
  243. package/dist/tools/registry/service.d.ts +1 -0
  244. package/dist/tools/registry/service.js +1 -0
  245. package/dist/tools/registry/views.d.ts +1 -0
  246. package/dist/tools/registry/views.js +1 -0
  247. package/dist/tools/service.d.ts +2 -0
  248. package/dist/tools/service.js +1 -0
  249. package/dist/tools/utils.d.ts +2 -0
  250. package/dist/tools/utils.js +57 -0
  251. package/dist/tools/views.d.ts +1 -0
  252. package/dist/tools/views.js +1 -0
  253. package/dist/utils.d.ts +10 -1
  254. package/dist/utils.js +70 -3
  255. package/package.json +87 -26
  256. package/dist/dom/playground/process-dom.js +0 -5
  257. package/dist/dom/playground/test-accessibility.d.ts +0 -44
  258. package/dist/dom/playground/test-accessibility.js +0 -111
  259. /package/dist/{dom/playground/process-dom.d.ts → tools/extraction/views.js} +0 -0
@@ -1,11 +1,12 @@
1
1
  import fsSync from 'node:fs';
2
2
  import { promises as fsp } from 'node:fs';
3
3
  import path from 'node:path';
4
+ import AdmZip from 'adm-zip';
4
5
  import PDFDocument from 'pdfkit';
5
6
  import { createRequire } from 'node:module';
6
7
  import { spawnSync } from 'node:child_process';
7
8
  const require = createRequire(import.meta.url);
8
- async function extractPdfText(buffer) {
9
+ export async function extractPdfText(buffer) {
9
10
  const pdfParseModule = (await import('pdf-parse'));
10
11
  if (typeof pdfParseModule.default === 'function') {
11
12
  const legacyParser = pdfParseModule.default;
@@ -33,10 +34,200 @@ async function extractPdfText(buffer) {
33
34
  }
34
35
  throw new FileSystemError("Error: Could not parse PDF file due to unsupported 'pdf-parse' module format.");
35
36
  }
37
+ export async function extractPdfTextByPage(buffer) {
38
+ const pdfParseModule = (await import('pdf-parse'));
39
+ if (typeof pdfParseModule.PDFParse === 'function') {
40
+ const Parser = pdfParseModule.PDFParse;
41
+ const parser = new Parser({ data: buffer });
42
+ try {
43
+ let numPages = 0;
44
+ try {
45
+ const info = await parser.getInfo?.({ parsePageInfo: false });
46
+ numPages = Number(info?.total ?? 0);
47
+ }
48
+ catch {
49
+ numPages = 0;
50
+ }
51
+ if (!Number.isFinite(numPages) || numPages <= 0) {
52
+ const full = await parser.getText();
53
+ const text = typeof full?.text === 'string' ? full.text : '';
54
+ return {
55
+ numPages: 1,
56
+ pageTexts: [text],
57
+ totalChars: text.length,
58
+ };
59
+ }
60
+ const pageTexts = [];
61
+ let totalChars = 0;
62
+ for (let pageNumber = 1; pageNumber <= numPages; pageNumber += 1) {
63
+ const pageResult = await parser.getText({ partial: [pageNumber] });
64
+ const text = typeof pageResult?.text === 'string' ? pageResult.text : '';
65
+ pageTexts.push(text);
66
+ totalChars += text.length;
67
+ }
68
+ return {
69
+ numPages,
70
+ pageTexts,
71
+ totalChars,
72
+ };
73
+ }
74
+ finally {
75
+ if (typeof parser.destroy === 'function') {
76
+ await parser.destroy();
77
+ }
78
+ }
79
+ }
80
+ const parsed = await extractPdfText(buffer);
81
+ const text = parsed.text ?? '';
82
+ return {
83
+ numPages: Math.max(parsed.totalPages, 1),
84
+ pageTexts: [text],
85
+ totalChars: text.length,
86
+ };
87
+ }
36
88
  export const INVALID_FILENAME_ERROR_MESSAGE = 'Error: Invalid filename format. Must be alphanumeric with supported extension.';
37
89
  export const DEFAULT_FILE_SYSTEM_PATH = 'browseruse_agent_data';
38
- const DEFAULT_EXTENSIONS = ['md', 'txt', 'json', 'csv', 'pdf'];
39
- const filenameRegex = new RegExp(`^[a-zA-Z0-9_\\-]+\\.(${DEFAULT_EXTENSIONS.join('|')})$`);
90
+ const UNSUPPORTED_BINARY_EXTENSIONS = new Set([
91
+ 'png',
92
+ 'jpg',
93
+ 'jpeg',
94
+ 'gif',
95
+ 'bmp',
96
+ 'svg',
97
+ 'webp',
98
+ 'ico',
99
+ 'mp3',
100
+ 'mp4',
101
+ 'wav',
102
+ 'avi',
103
+ 'mov',
104
+ 'zip',
105
+ 'tar',
106
+ 'gz',
107
+ 'rar',
108
+ 'exe',
109
+ 'bin',
110
+ 'dll',
111
+ 'so',
112
+ ]);
113
+ const DEFAULT_EXTENSIONS = [
114
+ 'md',
115
+ 'txt',
116
+ 'json',
117
+ 'jsonl',
118
+ 'csv',
119
+ 'pdf',
120
+ 'docx',
121
+ 'html',
122
+ 'xml',
123
+ ];
124
+ const escapeRegex = (value) => value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
125
+ const buildFilenameRegex = (extensions) => new RegExp(`^[a-zA-Z0-9_\\-.() \\u4e00-\\u9fff]+\\.(${extensions.map(escapeRegex).join('|')})$`);
126
+ const buildFilenameErrorMessage = (fileName, supportedExtensions) => {
127
+ const base = path.basename(fileName);
128
+ const supported = supportedExtensions.map((ext) => `.${ext}`).join(', ');
129
+ if (base.includes('.')) {
130
+ const ext = base.slice(base.lastIndexOf('.') + 1).toLowerCase();
131
+ if (UNSUPPORTED_BINARY_EXTENSIONS.has(ext)) {
132
+ return (`Error: Cannot write binary/image file '${base}'. ` +
133
+ 'The write_file tool only supports text-based files. ' +
134
+ `Supported extensions: ${supported}. ` +
135
+ 'For screenshots, the browser automatically captures them - do not try to save screenshots as files.');
136
+ }
137
+ if (!supportedExtensions.includes(ext)) {
138
+ return (`Error: Unsupported file extension '.${ext}' in '${base}'. ` +
139
+ `Supported extensions: ${supported}. ` +
140
+ 'Please rename the file to use a supported extension.');
141
+ }
142
+ }
143
+ else {
144
+ return (`Error: Filename '${base}' has no extension. ` +
145
+ `Please add a supported extension: ${supported}.`);
146
+ }
147
+ return (`Error: Invalid filename '${base}'. ` +
148
+ 'Filenames must contain only letters, numbers, underscores, hyphens, dots, parentheses, and spaces. ' +
149
+ `Supported extensions: ${supported}.`);
150
+ };
151
+ const escapeXmlText = (value) => value
152
+ .replace(/&/g, '&amp;')
153
+ .replace(/</g, '&lt;')
154
+ .replace(/>/g, '&gt;')
155
+ .replace(/"/g, '&quot;')
156
+ .replace(/'/g, '&apos;');
157
+ const decodeXmlText = (value) => value
158
+ .replace(/&apos;/g, "'")
159
+ .replace(/&quot;/g, '"')
160
+ .replace(/&gt;/g, '>')
161
+ .replace(/&lt;/g, '<')
162
+ .replace(/&amp;/g, '&');
163
+ const DOCX_CONTENT_TYPES_XML = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
164
+ <Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
165
+ <Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
166
+ <Default Extension="xml" ContentType="application/xml"/>
167
+ <Override PartName="/word/document.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>
168
+ </Types>`;
169
+ const DOCX_ROOT_RELS_XML = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
170
+ <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
171
+ <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="word/document.xml"/>
172
+ </Relationships>`;
173
+ const DOCX_DOCUMENT_RELS_XML = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
174
+ <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"></Relationships>`;
175
+ const buildDocxDocumentXml = (content) => {
176
+ const lines = content.split(/\r?\n/);
177
+ const paragraphs = lines
178
+ .map((line) => {
179
+ if (!line) {
180
+ return '<w:p/>';
181
+ }
182
+ return `<w:p><w:r><w:t xml:space="preserve">${escapeXmlText(line)}</w:t></w:r></w:p>`;
183
+ })
184
+ .join('');
185
+ return `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
186
+ <w:document xmlns:wpc="http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas"
187
+ xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
188
+ xmlns:o="urn:schemas-microsoft-com:office:office"
189
+ xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
190
+ xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math"
191
+ xmlns:v="urn:schemas-microsoft-com:vml"
192
+ xmlns:wp14="http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing"
193
+ xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
194
+ xmlns:w10="urn:schemas-microsoft-com:office:word"
195
+ xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
196
+ xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml"
197
+ xmlns:wpg="http://schemas.microsoft.com/office/word/2010/wordprocessingGroup"
198
+ xmlns:wpi="http://schemas.microsoft.com/office/word/2010/wordprocessingInk"
199
+ xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml"
200
+ xmlns:wps="http://schemas.microsoft.com/office/word/2010/wordprocessingShape"
201
+ mc:Ignorable="w14 wp14">
202
+ <w:body>${paragraphs}<w:sectPr><w:pgSz w:w="12240" w:h="15840"/><w:pgMar w:top="1440" w:right="1440" w:bottom="1440" w:left="1440" w:header="708" w:footer="708" w:gutter="0"/></w:sectPr></w:body>
203
+ </w:document>`;
204
+ };
205
+ const buildDocxBuffer = (content) => {
206
+ const zip = new AdmZip();
207
+ zip.addFile('[Content_Types].xml', Buffer.from(DOCX_CONTENT_TYPES_XML, 'utf-8'));
208
+ zip.addFile('_rels/.rels', Buffer.from(DOCX_ROOT_RELS_XML, 'utf-8'));
209
+ zip.addFile('word/_rels/document.xml.rels', Buffer.from(DOCX_DOCUMENT_RELS_XML, 'utf-8'));
210
+ zip.addFile('word/document.xml', Buffer.from(buildDocxDocumentXml(content), 'utf-8'));
211
+ return zip.toBuffer();
212
+ };
213
+ const readDocxText = (fileBuffer) => {
214
+ const zip = new AdmZip(fileBuffer);
215
+ const documentEntry = zip.getEntry('word/document.xml');
216
+ if (!documentEntry) {
217
+ throw new FileSystemError('Error: Could not parse DOCX file content.');
218
+ }
219
+ const xml = documentEntry.getData().toString('utf-8');
220
+ const normalizedXml = xml.replace(/<w:p\b([^>]*)\/>/g, '<w:p$1></w:p>');
221
+ const paragraphMatches = normalizedXml.match(/<w:p[\s\S]*?<\/w:p>/g) ?? [];
222
+ const lines = paragraphMatches.map((paragraph) => {
223
+ const textMatches = Array.from(paragraph.matchAll(/<w:t(?:\s[^>]*)?>([\s\S]*?)<\/w:t>/g));
224
+ if (!textMatches.length) {
225
+ return '';
226
+ }
227
+ return textMatches.map((match) => decodeXmlText(match[1] ?? '')).join('');
228
+ });
229
+ return lines.join('\n').trim();
230
+ };
40
231
  export class FileSystemError extends Error {
41
232
  }
42
233
  class BaseFile {
@@ -105,6 +296,11 @@ class JsonFile extends BaseFile {
105
296
  return 'json';
106
297
  }
107
298
  }
299
+ class JsonlFile extends BaseFile {
300
+ get extension() {
301
+ return 'jsonl';
302
+ }
303
+ }
108
304
  class CsvFile extends BaseFile {
109
305
  get extension() {
110
306
  return 'csv';
@@ -154,19 +350,52 @@ stream.on('error', (err) => {
154
350
  }
155
351
  }
156
352
  }
353
+ class DocxFile extends BaseFile {
354
+ get extension() {
355
+ return 'docx';
356
+ }
357
+ async syncToDisk(dir) {
358
+ const filePath = path.join(dir, this.fullName);
359
+ const docxBuffer = buildDocxBuffer(this.content || '');
360
+ await fsp.writeFile(filePath, docxBuffer);
361
+ }
362
+ syncToDiskSync(dir) {
363
+ const filePath = path.join(dir, this.fullName);
364
+ const docxBuffer = buildDocxBuffer(this.content || '');
365
+ fsSync.writeFileSync(filePath, docxBuffer);
366
+ }
367
+ }
368
+ class HtmlFile extends BaseFile {
369
+ get extension() {
370
+ return 'html';
371
+ }
372
+ }
373
+ class XmlFile extends BaseFile {
374
+ get extension() {
375
+ return 'xml';
376
+ }
377
+ }
157
378
  const FILE_TYPES = {
158
379
  md: MarkdownFile,
159
380
  txt: TxtFile,
160
381
  json: JsonFile,
382
+ jsonl: JsonlFile,
161
383
  csv: CsvFile,
162
384
  pdf: PdfFile,
385
+ docx: DocxFile,
386
+ html: HtmlFile,
387
+ xml: XmlFile,
163
388
  };
164
389
  const TYPE_NAME_MAP = {
165
390
  MarkdownFile,
166
391
  TxtFile,
167
392
  JsonFile,
393
+ JsonlFile,
168
394
  CsvFile,
169
395
  PdfFile,
396
+ DocxFile,
397
+ HtmlFile,
398
+ XmlFile,
170
399
  };
171
400
  export class FileSystem {
172
401
  files = new Map();
@@ -194,7 +423,45 @@ export class FileSystem {
194
423
  }
195
424
  }
196
425
  isValidFilename(filename) {
197
- return filenameRegex.test(filename);
426
+ const base = path.basename(filename);
427
+ const regex = buildFilenameRegex(this.get_allowed_extensions());
428
+ if (!regex.test(base)) {
429
+ return false;
430
+ }
431
+ const idx = base.lastIndexOf('.');
432
+ if (idx <= 0) {
433
+ return false;
434
+ }
435
+ return base.slice(0, idx).trim().length > 0;
436
+ }
437
+ static sanitize_filename(fileName) {
438
+ const base = path.basename(fileName);
439
+ const idx = base.lastIndexOf('.');
440
+ if (idx === -1) {
441
+ return base;
442
+ }
443
+ const ext = base.slice(idx + 1).toLowerCase();
444
+ let namePart = base.slice(0, idx);
445
+ namePart = namePart.replace(/ /g, '-');
446
+ namePart = namePart.replace(/[^a-zA-Z0-9_\-.()\u4e00-\u9fff]/g, '');
447
+ namePart = namePart.replace(/-{2,}/g, '-');
448
+ namePart = namePart.replace(/^[-.]+|[-.]+$/g, '');
449
+ if (!namePart) {
450
+ namePart = 'file';
451
+ }
452
+ return `${namePart}.${ext}`;
453
+ }
454
+ resolveFilename(filename) {
455
+ const base = path.basename(filename);
456
+ const wasChanged = base !== filename;
457
+ if (this.isValidFilename(base)) {
458
+ return [base, wasChanged];
459
+ }
460
+ const sanitized = FileSystem.sanitize_filename(base);
461
+ if (sanitized !== base && this.isValidFilename(sanitized)) {
462
+ return [sanitized, true];
463
+ }
464
+ return [base, wasChanged];
198
465
  }
199
466
  parseFilename(filename) {
200
467
  const idx = filename.lastIndexOf('.');
@@ -223,113 +490,307 @@ export class FileSystem {
223
490
  return this.dataDir;
224
491
  }
225
492
  get_file(filename) {
226
- return this.files.get(filename) ?? null;
493
+ const [resolved] = this.resolveFilename(filename);
494
+ if (!this.isValidFilename(resolved)) {
495
+ return null;
496
+ }
497
+ return this.files.get(resolved) ?? null;
227
498
  }
228
499
  list_files() {
229
500
  return Array.from(this.files.values()).map((file) => file.fullName);
230
501
  }
231
502
  display_file(filename) {
232
- if (!this.isValidFilename(filename)) {
503
+ const [resolved] = this.resolveFilename(filename);
504
+ if (!this.isValidFilename(resolved)) {
233
505
  return null;
234
506
  }
235
- const file = this.get_file(filename);
507
+ const file = this.files.get(resolved) ?? null;
236
508
  return file ? file.read() : null;
237
509
  }
238
- async read_file(filename, externalFile = false) {
510
+ async read_file_structured(filename, externalFile = false) {
511
+ const result = {
512
+ message: '',
513
+ images: null,
514
+ };
239
515
  if (externalFile) {
240
516
  try {
241
- const [, extension] = this.parseFilename(filename);
242
- if (['md', 'txt', 'json', 'csv'].includes(extension)) {
517
+ const base = path.basename(filename);
518
+ const idx = base.lastIndexOf('.');
519
+ if (idx === -1) {
520
+ result.message =
521
+ `Error: Invalid filename format ${filename}. ` +
522
+ 'Must be alphanumeric with a supported extension.';
523
+ return result;
524
+ }
525
+ const extension = base.slice(idx + 1).toLowerCase();
526
+ const specialExtensions = new Set([
527
+ 'docx',
528
+ 'pdf',
529
+ 'jpg',
530
+ 'jpeg',
531
+ 'png',
532
+ ]);
533
+ const textExtensions = this.get_allowed_extensions().filter((ext) => !specialExtensions.has(ext));
534
+ if (textExtensions.includes(extension)) {
243
535
  const content = await fsp.readFile(filename, 'utf-8');
244
- return `Read from file ${filename}.\n<content>\n${content}\n</content>`;
536
+ result.message = `Read from file ${filename}.\n<content>\n${content}\n</content>`;
537
+ return result;
245
538
  }
246
539
  if (extension === 'pdf') {
540
+ const MAX_CHARS = 60000;
247
541
  const buffer = await fsp.readFile(filename);
248
- const parsed = await extractPdfText(buffer);
249
- const totalPages = parsed.totalPages;
250
- const extraPages = Math.max(0, totalPages - 10);
251
- const snippet = parsed.text.trim();
252
- const preview = snippet
253
- .split(/\n{2,}/)
254
- .slice(0, 10)
542
+ const pdf = await extractPdfTextByPage(buffer);
543
+ const numPages = pdf.numPages;
544
+ const pageTexts = pdf.pageTexts;
545
+ const totalChars = pdf.totalChars;
546
+ if (totalChars <= MAX_CHARS) {
547
+ const contentParts = [];
548
+ for (let pageNumber = 1; pageNumber <= pageTexts.length; pageNumber += 1) {
549
+ const text = pageTexts[pageNumber - 1] ?? '';
550
+ if (!text.trim()) {
551
+ continue;
552
+ }
553
+ contentParts.push(`--- Page ${pageNumber} ---\n${text}`);
554
+ }
555
+ result.message =
556
+ `Read from file ${filename} (${numPages} pages, ${totalChars.toLocaleString()} chars).\n` +
557
+ `<content>\n${contentParts.join('\n\n')}\n</content>`;
558
+ return result;
559
+ }
560
+ const wordToPages = new Map();
561
+ const pageWords = new Map();
562
+ for (let pageNumber = 1; pageNumber <= pageTexts.length; pageNumber += 1) {
563
+ const text = pageTexts[pageNumber - 1] ?? '';
564
+ const words = new Set((text.toLowerCase().match(/\b[a-zA-Z]{4,}\b/g) ?? []).map((word) => word));
565
+ pageWords.set(pageNumber, words);
566
+ for (const word of words) {
567
+ if (!wordToPages.has(word)) {
568
+ wordToPages.set(word, new Set());
569
+ }
570
+ wordToPages.get(word).add(pageNumber);
571
+ }
572
+ }
573
+ const pageScores = new Map();
574
+ for (const [pageNumber, words] of pageWords.entries()) {
575
+ let score = 0;
576
+ for (const word of words) {
577
+ const pagesWithWord = wordToPages.get(word)?.size ?? 1;
578
+ score += Math.log(Math.max(numPages, 1) / pagesWithWord);
579
+ }
580
+ pageScores.set(pageNumber, score);
581
+ }
582
+ const priorityPages = [1];
583
+ const sortedPages = Array.from(pageScores.entries()).sort((a, b) => b[1] - a[1]);
584
+ for (const [pageNumber] of sortedPages) {
585
+ if (!priorityPages.includes(pageNumber)) {
586
+ priorityPages.push(pageNumber);
587
+ }
588
+ }
589
+ for (let pageNumber = 1; pageNumber <= numPages; pageNumber += 1) {
590
+ if (!priorityPages.includes(pageNumber)) {
591
+ priorityPages.push(pageNumber);
592
+ }
593
+ }
594
+ const contentParts = [];
595
+ let charsUsed = 0;
596
+ const pagesIncluded = [];
597
+ const pagesIncludedSet = new Set();
598
+ for (const pageNumber of priorityPages) {
599
+ const text = pageTexts[pageNumber - 1] ?? '';
600
+ if (!text.trim()) {
601
+ continue;
602
+ }
603
+ const pageHeader = `--- Page ${pageNumber} ---\n`;
604
+ const truncationSuffix = '\n[...truncated]';
605
+ const remaining = MAX_CHARS - charsUsed;
606
+ const minUseful = pageHeader.length + truncationSuffix.length + 50;
607
+ if (remaining < minUseful) {
608
+ break;
609
+ }
610
+ let pageContent = `${pageHeader}${text}`;
611
+ if (pageContent.length > remaining) {
612
+ pageContent =
613
+ pageContent.slice(0, Math.max(0, remaining - truncationSuffix.length)) + truncationSuffix;
614
+ }
615
+ contentParts.push({ pageNumber, content: pageContent });
616
+ charsUsed += pageContent.length;
617
+ pagesIncluded.push(pageNumber);
618
+ pagesIncludedSet.add(pageNumber);
619
+ if (charsUsed >= MAX_CHARS) {
620
+ break;
621
+ }
622
+ }
623
+ contentParts.sort((a, b) => a.pageNumber - b.pageNumber);
624
+ const extractedText = contentParts
625
+ .map((part) => part.content)
255
626
  .join('\n\n');
256
- const suffix = extraPages > 0 ? `\n${extraPages} more pages...` : '';
257
- return `Read from file ${filename}.\n<content>\n${preview}${suffix}\n</content>`;
627
+ let truncationNote = '';
628
+ const pagesNotShown = numPages - pagesIncluded.length;
629
+ if (pagesNotShown > 0) {
630
+ const skipped = [];
631
+ for (let pageNumber = 1; pageNumber <= numPages; pageNumber += 1) {
632
+ if (!pagesIncludedSet.has(pageNumber)) {
633
+ skipped.push(pageNumber);
634
+ }
635
+ }
636
+ const skippedPreview = skipped.slice(0, 10).join(', ');
637
+ const skippedSuffix = skipped.length > 10 ? ', ...' : '';
638
+ truncationNote =
639
+ `\n\n[Showing ${pagesIncluded.length} of ${numPages} pages. ` +
640
+ `Skipped pages: [${skippedPreview}${skippedSuffix}]. ` +
641
+ 'Use read_long_content with a specific goal to find relevant sections.]';
642
+ }
643
+ result.message =
644
+ `Read from file ${filename} (${numPages} pages, ${totalChars.toLocaleString()} chars total).\n` +
645
+ `<content>\n${extractedText}${truncationNote}\n</content>`;
646
+ return result;
647
+ }
648
+ if (extension === 'docx') {
649
+ const fileBuffer = await fsp.readFile(filename);
650
+ const content = readDocxText(fileBuffer);
651
+ result.message = `Read from file ${filename}.\n<content>\n${content}\n</content>`;
652
+ return result;
258
653
  }
259
- return `Error: Cannot read file ${filename} as ${extension} extension is not supported.`;
654
+ if (extension === 'jpg' ||
655
+ extension === 'jpeg' ||
656
+ extension === 'png') {
657
+ const fileBuffer = await fsp.readFile(filename);
658
+ result.message = `Read image file ${filename}.`;
659
+ result.images = [
660
+ {
661
+ name: base,
662
+ data: fileBuffer.toString('base64'),
663
+ },
664
+ ];
665
+ return result;
666
+ }
667
+ result.message = `Error: Cannot read file ${filename} as ${extension} extension is not supported.`;
668
+ return result;
260
669
  }
261
670
  catch (error) {
262
671
  if (error?.code === 'ENOENT') {
263
- return `Error: File '${filename}' not found.`;
672
+ result.message = `Error: File '${filename}' not found.`;
673
+ return result;
264
674
  }
265
675
  if (error?.code === 'EACCES') {
266
- return `Error: Permission denied to read file '${filename}'.`;
676
+ result.message = `Error: Permission denied to read file '${filename}'.`;
677
+ return result;
267
678
  }
268
- return `Error: Could not read file '${filename}'.`;
679
+ result.message =
680
+ `Error: Could not read file '${filename}'. ${error instanceof Error ? error.message : ''}`.trim();
681
+ return result;
269
682
  }
270
683
  }
271
- if (!this.isValidFilename(filename)) {
272
- return INVALID_FILENAME_ERROR_MESSAGE;
684
+ const originalFilename = filename;
685
+ const [resolved, wasSanitized] = this.resolveFilename(filename);
686
+ if (!this.isValidFilename(resolved)) {
687
+ result.message = buildFilenameErrorMessage(filename, this.get_allowed_extensions());
688
+ return result;
273
689
  }
274
- const file = this.get_file(filename);
690
+ const file = this.files.get(resolved) ?? null;
275
691
  if (!file) {
276
- return `File '${filename}' not found.`;
692
+ if (wasSanitized) {
693
+ result.message =
694
+ `File '${resolved}' not found. ` +
695
+ `(Filename was auto-corrected from '${originalFilename}')`;
696
+ }
697
+ else {
698
+ result.message = `File '${originalFilename}' not found.`;
699
+ }
700
+ return result;
277
701
  }
278
702
  try {
279
703
  const content = file.read();
280
- return `Read from file ${filename}.\n<content>\n${content}\n</content>`;
704
+ const sanitizeNote = wasSanitized
705
+ ? `Note: filename was auto-corrected from '${originalFilename}' to '${resolved}'. `
706
+ : '';
707
+ result.message = `${sanitizeNote}Read from file ${resolved}.\n<content>\n${content}\n</content>`;
708
+ return result;
281
709
  }
282
710
  catch (error) {
283
- return error instanceof FileSystemError
284
- ? error.message
285
- : `Error: Could not read file '${filename}'.`;
711
+ result.message =
712
+ error instanceof FileSystemError
713
+ ? error.message
714
+ : `Error: Could not read file '${originalFilename}'.`;
715
+ return result;
286
716
  }
287
717
  }
718
+ async read_file(filename, externalFile = false) {
719
+ const result = await this.read_file_structured(filename, externalFile);
720
+ return result.message;
721
+ }
288
722
  async write_file(filename, content) {
289
- if (!this.isValidFilename(filename)) {
290
- return INVALID_FILENAME_ERROR_MESSAGE;
723
+ const originalFilename = filename;
724
+ const [resolved, wasSanitized] = this.resolveFilename(filename);
725
+ if (!this.isValidFilename(resolved)) {
726
+ return buildFilenameErrorMessage(filename, this.get_allowed_extensions());
291
727
  }
728
+ filename = resolved;
292
729
  const file = this.files.get(filename) ?? this.instantiateFile(filename);
293
730
  this.files.set(filename, file);
294
731
  try {
295
732
  await file.write(content, this.dataDir);
296
- return `Data written to file ${filename} successfully.`;
733
+ const sanitizeNote = wasSanitized
734
+ ? ` (auto-corrected from '${originalFilename}')`
735
+ : '';
736
+ return `Data written to file ${filename} successfully.${sanitizeNote}`;
297
737
  }
298
738
  catch (error) {
299
739
  return `Error: Could not write to file '${filename}'. ${error.message}`;
300
740
  }
301
741
  }
302
742
  async append_file(filename, content) {
303
- if (!this.isValidFilename(filename)) {
304
- return INVALID_FILENAME_ERROR_MESSAGE;
743
+ const originalFilename = filename;
744
+ const [resolved, wasSanitized] = this.resolveFilename(filename);
745
+ if (!this.isValidFilename(resolved)) {
746
+ return buildFilenameErrorMessage(filename, this.get_allowed_extensions());
305
747
  }
748
+ filename = resolved;
306
749
  const file = this.get_file(filename);
307
750
  if (!file) {
751
+ if (wasSanitized) {
752
+ return (`File '${filename}' not found. ` +
753
+ `(Filename was auto-corrected from '${originalFilename}')`);
754
+ }
308
755
  return `File '${filename}' not found.`;
309
756
  }
310
757
  try {
311
758
  await file.append(content, this.dataDir);
312
- return `Data appended to file ${filename} successfully.`;
759
+ const sanitizeNote = wasSanitized
760
+ ? ` (auto-corrected from '${originalFilename}')`
761
+ : '';
762
+ return `Data appended to file ${filename} successfully.${sanitizeNote}`;
313
763
  }
314
764
  catch (error) {
315
765
  return `Error: Could not append to file '${filename}'. ${error.message}`;
316
766
  }
317
767
  }
318
768
  async replace_file_str(filename, oldStr, newStr) {
319
- if (!this.isValidFilename(filename)) {
320
- return INVALID_FILENAME_ERROR_MESSAGE;
769
+ const originalFilename = filename;
770
+ const [resolved, wasSanitized] = this.resolveFilename(filename);
771
+ if (!this.isValidFilename(resolved)) {
772
+ return buildFilenameErrorMessage(filename, this.get_allowed_extensions());
321
773
  }
774
+ filename = resolved;
322
775
  if (!oldStr) {
323
776
  return 'Error: Cannot replace empty string. Please provide a non-empty string to replace.';
324
777
  }
325
778
  const file = this.get_file(filename);
326
779
  if (!file) {
780
+ if (wasSanitized) {
781
+ return (`File '${filename}' not found. ` +
782
+ `(Filename was auto-corrected from '${originalFilename}')`);
783
+ }
327
784
  return `File '${filename}' not found.`;
328
785
  }
329
786
  try {
330
787
  const content = file.read().replaceAll(oldStr, newStr);
331
788
  await file.write(content, this.dataDir);
332
- return `Successfully replaced all occurrences of "${oldStr}" with "${newStr}" in file ${filename}`;
789
+ const sanitizeNote = wasSanitized
790
+ ? ` (auto-corrected from '${originalFilename}')`
791
+ : '';
792
+ return (`Successfully replaced all occurrences of "${oldStr}" with "${newStr}" in file ${filename}` +
793
+ sanitizeNote);
333
794
  }
334
795
  catch (error) {
335
796
  return `Error: Could not replace string in file '${filename}'. ${error.message}`;
@@ -341,7 +802,7 @@ export class FileSystem {
341
802
  await file.write(content, this.dataDir);
342
803
  this.files.set(filename, file);
343
804
  this.extractedContentCount += 1;
344
- return `Extracted content saved to file ${filename} successfully.`;
805
+ return filename;
345
806
  }
346
807
  describe() {
347
808
  const DISPLAY_CHARS = 400;