browser-use 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +295 -686
- package/dist/actor/element.d.ts +19 -0
- package/dist/actor/element.js +46 -0
- package/dist/actor/index.d.ts +4 -0
- package/dist/actor/index.js +4 -0
- package/dist/actor/mouse.d.ts +19 -0
- package/dist/actor/mouse.js +39 -0
- package/dist/actor/page.d.ts +29 -0
- package/dist/actor/page.js +88 -0
- package/dist/actor/utils.d.ts +4 -0
- package/dist/actor/utils.js +35 -0
- package/dist/agent/cloud-events.d.ts +18 -0
- package/dist/agent/cloud-events.js +65 -2
- package/dist/agent/gif.d.ts +1 -0
- package/dist/agent/gif.js +24 -2
- package/dist/agent/judge.d.ts +17 -0
- package/dist/agent/judge.js +197 -0
- package/dist/agent/message-manager/service.d.ts +12 -4
- package/dist/agent/message-manager/service.js +205 -39
- package/dist/agent/message-manager/utils.js +0 -1
- package/dist/agent/message-manager/views.d.ts +4 -0
- package/dist/agent/message-manager/views.js +11 -7
- package/dist/agent/prompts.d.ts +24 -3
- package/dist/agent/prompts.js +274 -59
- package/dist/agent/service.d.ts +99 -41
- package/dist/agent/service.js +2266 -472
- package/dist/agent/variable-detector.d.ts +12 -0
- package/dist/agent/variable-detector.js +211 -0
- package/dist/agent/views.d.ts +237 -18
- package/dist/agent/views.js +446 -33
- package/dist/browser/cloud/cloud.d.ts +20 -0
- package/dist/browser/cloud/cloud.js +129 -0
- package/dist/browser/cloud/index.d.ts +2 -0
- package/dist/browser/cloud/index.js +2 -0
- package/dist/browser/cloud/views.d.ts +41 -0
- package/dist/browser/cloud/views.js +35 -0
- package/dist/browser/events.d.ts +345 -0
- package/dist/browser/events.js +566 -0
- package/dist/browser/extensions.js +17 -17
- package/dist/browser/index.d.ts +4 -0
- package/dist/browser/index.js +4 -0
- package/dist/browser/profile.d.ts +8 -2
- package/dist/browser/profile.js +79 -12
- package/dist/browser/session-manager.d.ts +85 -0
- package/dist/browser/session-manager.js +208 -0
- package/dist/browser/session.d.ts +100 -8
- package/dist/browser/session.js +1097 -58
- package/dist/browser/types.d.ts +0 -2
- package/dist/browser/views.d.ts +39 -0
- package/dist/browser/views.js +32 -0
- package/dist/browser/watchdogs/aboutblank-watchdog.d.ts +12 -0
- package/dist/browser/watchdogs/aboutblank-watchdog.js +131 -0
- package/dist/browser/watchdogs/base.d.ts +21 -0
- package/dist/browser/watchdogs/base.js +81 -0
- package/dist/browser/watchdogs/cdp-session-watchdog.d.ts +14 -0
- package/dist/browser/watchdogs/cdp-session-watchdog.js +177 -0
- package/dist/browser/watchdogs/crash-watchdog.d.ts +38 -0
- package/dist/browser/watchdogs/crash-watchdog.js +296 -0
- package/dist/browser/watchdogs/default-action-watchdog.d.ts +49 -0
- package/dist/browser/watchdogs/default-action-watchdog.js +212 -0
- package/dist/browser/watchdogs/dom-watchdog.d.ts +8 -0
- package/dist/browser/watchdogs/dom-watchdog.js +31 -0
- package/dist/browser/watchdogs/downloads-watchdog.d.ts +77 -0
- package/dist/browser/watchdogs/downloads-watchdog.js +409 -0
- package/dist/browser/watchdogs/har-recording-watchdog.d.ts +19 -0
- package/dist/browser/watchdogs/har-recording-watchdog.js +317 -0
- package/dist/browser/watchdogs/index.d.ts +15 -0
- package/dist/browser/watchdogs/index.js +15 -0
- package/dist/browser/watchdogs/local-browser-watchdog.d.ts +10 -0
- package/dist/browser/watchdogs/local-browser-watchdog.js +32 -0
- package/dist/browser/watchdogs/permissions-watchdog.d.ts +8 -0
- package/dist/browser/watchdogs/permissions-watchdog.js +73 -0
- package/dist/browser/watchdogs/popups-watchdog.d.ts +13 -0
- package/dist/browser/watchdogs/popups-watchdog.js +77 -0
- package/dist/browser/watchdogs/recording-watchdog.d.ts +27 -0
- package/dist/browser/watchdogs/recording-watchdog.js +249 -0
- package/dist/browser/watchdogs/screenshot-watchdog.d.ts +6 -0
- package/dist/browser/watchdogs/screenshot-watchdog.js +13 -0
- package/dist/browser/watchdogs/security-watchdog.d.ts +10 -0
- package/dist/browser/watchdogs/security-watchdog.js +84 -0
- package/dist/browser/watchdogs/storage-state-watchdog.d.ts +24 -0
- package/dist/browser/watchdogs/storage-state-watchdog.js +288 -0
- package/dist/cli.d.ts +7 -2
- package/dist/cli.js +182 -25
- package/dist/code-use/formatting.d.ts +3 -0
- package/dist/code-use/formatting.js +18 -0
- package/dist/code-use/index.d.ts +6 -0
- package/dist/code-use/index.js +6 -0
- package/dist/code-use/namespace.d.ts +5 -0
- package/dist/code-use/namespace.js +81 -0
- package/dist/code-use/notebook-export.d.ts +3 -0
- package/dist/code-use/notebook-export.js +56 -0
- package/dist/code-use/service.d.ts +24 -0
- package/dist/code-use/service.js +104 -0
- package/dist/code-use/utils.d.ts +4 -0
- package/dist/code-use/utils.js +98 -0
- package/dist/code-use/views.d.ts +108 -0
- package/dist/code-use/views.js +165 -0
- package/dist/config.d.ts +13 -0
- package/dist/config.js +69 -3
- package/dist/controller/registry/service.d.ts +10 -1
- package/dist/controller/registry/service.js +266 -10
- package/dist/controller/registry/views.d.ts +4 -1
- package/dist/controller/registry/views.js +25 -2
- package/dist/controller/service.d.ts +10 -1
- package/dist/controller/service.js +1807 -268
- package/dist/controller/views.d.ts +78 -155
- package/dist/controller/views.js +61 -12
- package/dist/dom/history-tree-processor/service.d.ts +5 -0
- package/dist/dom/history-tree-processor/service.js +169 -14
- package/dist/dom/history-tree-processor/view.d.ts +7 -1
- package/dist/dom/history-tree-processor/view.js +10 -1
- package/dist/dom/markdown-extractor.d.ts +37 -0
- package/dist/dom/markdown-extractor.js +345 -0
- package/dist/dom/service.d.ts +3 -1
- package/dist/dom/service.js +76 -0
- package/dist/dom/views.d.ts +1 -0
- package/dist/dom/views.js +45 -0
- package/dist/event-bus.d.ts +107 -7
- package/dist/event-bus.js +313 -10
- package/dist/exceptions.d.ts +0 -3
- package/dist/exceptions.js +0 -7
- package/dist/filesystem/file-system.d.ts +18 -0
- package/dist/filesystem/file-system.js +503 -42
- package/dist/index.d.ts +7 -0
- package/dist/index.js +6 -0
- package/dist/integrations/gmail/actions.d.ts +3 -3
- package/dist/integrations/gmail/actions.js +4 -4
- package/dist/llm/anthropic/chat.d.ts +18 -1
- package/dist/llm/anthropic/chat.js +123 -55
- package/dist/llm/anthropic/serializer.d.ts +2 -0
- package/dist/llm/anthropic/serializer.js +81 -9
- package/dist/llm/aws/chat-anthropic.d.ts +17 -0
- package/dist/llm/aws/chat-anthropic.js +126 -26
- package/dist/llm/aws/chat-bedrock.d.ts +28 -1
- package/dist/llm/aws/chat-bedrock.js +161 -34
- package/dist/llm/aws/serializer.d.ts +13 -1
- package/dist/llm/aws/serializer.js +56 -17
- package/dist/llm/azure/chat.d.ts +53 -2
- package/dist/llm/azure/chat.js +366 -54
- package/dist/llm/base.d.ts +2 -0
- package/dist/llm/browser-use/chat.d.ts +40 -0
- package/dist/llm/browser-use/chat.js +305 -0
- package/dist/llm/browser-use/index.d.ts +1 -0
- package/dist/llm/browser-use/index.js +1 -0
- package/dist/llm/cerebras/chat.d.ts +39 -0
- package/dist/llm/cerebras/chat.js +178 -0
- package/dist/llm/cerebras/index.d.ts +2 -0
- package/dist/llm/cerebras/index.js +2 -0
- package/dist/llm/cerebras/serializer.d.ts +7 -0
- package/dist/llm/cerebras/serializer.js +82 -0
- package/dist/llm/deepseek/chat.d.ts +19 -2
- package/dist/llm/deepseek/chat.js +138 -25
- package/dist/llm/google/chat.d.ts +46 -2
- package/dist/llm/google/chat.js +267 -64
- package/dist/llm/google/serializer.d.ts +9 -1
- package/dist/llm/google/serializer.js +141 -34
- package/dist/llm/groq/chat.d.ts +21 -2
- package/dist/llm/groq/chat.js +125 -26
- package/dist/llm/groq/parser.js +3 -1
- package/dist/llm/mistral/chat.d.ts +43 -0
- package/dist/llm/mistral/chat.js +154 -0
- package/dist/llm/mistral/index.d.ts +2 -0
- package/dist/llm/mistral/index.js +2 -0
- package/dist/llm/mistral/schema.d.ts +8 -0
- package/dist/llm/mistral/schema.js +27 -0
- package/dist/llm/models.d.ts +2 -0
- package/dist/llm/models.js +317 -0
- package/dist/llm/ollama/chat.d.ts +13 -1
- package/dist/llm/ollama/chat.js +110 -19
- package/dist/llm/ollama/serializer.d.ts +1 -0
- package/dist/llm/ollama/serializer.js +34 -12
- package/dist/llm/openai/chat.d.ts +16 -0
- package/dist/llm/openai/chat.js +94 -44
- package/dist/llm/openai/like.d.ts +5 -3
- package/dist/llm/openai/like.js +7 -3
- package/dist/llm/openai/responses-serializer.d.ts +18 -0
- package/dist/llm/openai/responses-serializer.js +72 -0
- package/dist/llm/openrouter/chat.d.ts +28 -2
- package/dist/llm/openrouter/chat.js +115 -29
- package/dist/llm/schema.d.ts +11 -1
- package/dist/llm/schema.js +81 -1
- package/dist/llm/vercel/chat.d.ts +50 -0
- package/dist/llm/vercel/chat.js +276 -0
- package/dist/llm/vercel/index.d.ts +1 -0
- package/dist/llm/vercel/index.js +1 -0
- package/dist/llm/vercel/serializer.d.ts +5 -0
- package/dist/llm/vercel/serializer.js +7 -0
- package/dist/llm/views.d.ts +2 -1
- package/dist/llm/views.js +3 -1
- package/dist/logging-config.d.ts +2 -0
- package/dist/logging-config.js +82 -29
- package/dist/mcp/client.d.ts +10 -5
- package/dist/mcp/client.js +14 -9
- package/dist/mcp/controller.d.ts +42 -3
- package/dist/mcp/controller.js +56 -31
- package/dist/mcp/server.d.ts +14 -0
- package/dist/mcp/server.js +255 -52
- package/dist/observability.js +10 -4
- package/dist/sandbox/index.d.ts +2 -0
- package/dist/sandbox/index.js +2 -0
- package/dist/sandbox/sandbox.d.ts +19 -0
- package/dist/sandbox/sandbox.js +140 -0
- package/dist/sandbox/views.d.ts +67 -0
- package/dist/sandbox/views.js +121 -0
- package/dist/skill-cli/index.d.ts +3 -0
- package/dist/skill-cli/index.js +3 -0
- package/dist/skill-cli/protocol.d.ts +30 -0
- package/dist/skill-cli/protocol.js +48 -0
- package/dist/skill-cli/server.d.ts +11 -0
- package/dist/skill-cli/server.js +85 -0
- package/dist/skill-cli/sessions.d.ts +24 -0
- package/dist/skill-cli/sessions.js +47 -0
- package/dist/skills/index.d.ts +3 -0
- package/dist/skills/index.js +3 -0
- package/dist/skills/service.d.ts +27 -0
- package/dist/skills/service.js +266 -0
- package/dist/skills/utils.d.ts +6 -0
- package/dist/skills/utils.js +53 -0
- package/dist/skills/views.d.ts +40 -0
- package/dist/skills/views.js +10 -0
- package/dist/sync/auth.js +8 -3
- package/dist/sync/service.d.ts +6 -6
- package/dist/sync/service.js +54 -89
- package/dist/telemetry/views.d.ts +20 -6
- package/dist/telemetry/views.js +23 -5
- package/dist/tokens/custom-pricing.d.ts +2 -0
- package/dist/tokens/custom-pricing.js +22 -0
- package/dist/tokens/index.d.ts +2 -0
- package/dist/tokens/index.js +2 -0
- package/dist/tokens/mappings.d.ts +1 -0
- package/dist/tokens/mappings.js +3 -0
- package/dist/tokens/service.js +27 -8
- package/dist/tools/extraction/index.d.ts +2 -0
- package/dist/tools/extraction/index.js +2 -0
- package/dist/tools/extraction/schema-utils.d.ts +6 -0
- package/dist/tools/extraction/schema-utils.js +237 -0
- package/dist/tools/extraction/views.d.ts +7 -0
- package/dist/tools/index.d.ts +5 -0
- package/dist/tools/index.js +5 -0
- package/dist/tools/registry/index.d.ts +2 -0
- package/dist/tools/registry/index.js +2 -0
- package/dist/tools/registry/service.d.ts +1 -0
- package/dist/tools/registry/service.js +1 -0
- package/dist/tools/registry/views.d.ts +1 -0
- package/dist/tools/registry/views.js +1 -0
- package/dist/tools/service.d.ts +2 -0
- package/dist/tools/service.js +1 -0
- package/dist/tools/utils.d.ts +2 -0
- package/dist/tools/utils.js +57 -0
- package/dist/tools/views.d.ts +1 -0
- package/dist/tools/views.js +1 -0
- package/dist/utils.d.ts +10 -1
- package/dist/utils.js +70 -3
- package/package.json +87 -26
- package/dist/dom/playground/process-dom.js +0 -5
- package/dist/dom/playground/test-accessibility.d.ts +0 -44
- package/dist/dom/playground/test-accessibility.js +0 -111
- /package/dist/{dom/playground/process-dom.d.ts → tools/extraction/views.js} +0 -0
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
import fsSync from 'node:fs';
|
|
2
2
|
import { promises as fsp } from 'node:fs';
|
|
3
3
|
import path from 'node:path';
|
|
4
|
+
import AdmZip from 'adm-zip';
|
|
4
5
|
import PDFDocument from 'pdfkit';
|
|
5
6
|
import { createRequire } from 'node:module';
|
|
6
7
|
import { spawnSync } from 'node:child_process';
|
|
7
8
|
const require = createRequire(import.meta.url);
|
|
8
|
-
async function extractPdfText(buffer) {
|
|
9
|
+
export async function extractPdfText(buffer) {
|
|
9
10
|
const pdfParseModule = (await import('pdf-parse'));
|
|
10
11
|
if (typeof pdfParseModule.default === 'function') {
|
|
11
12
|
const legacyParser = pdfParseModule.default;
|
|
@@ -33,10 +34,200 @@ async function extractPdfText(buffer) {
|
|
|
33
34
|
}
|
|
34
35
|
throw new FileSystemError("Error: Could not parse PDF file due to unsupported 'pdf-parse' module format.");
|
|
35
36
|
}
|
|
37
|
+
export async function extractPdfTextByPage(buffer) {
|
|
38
|
+
const pdfParseModule = (await import('pdf-parse'));
|
|
39
|
+
if (typeof pdfParseModule.PDFParse === 'function') {
|
|
40
|
+
const Parser = pdfParseModule.PDFParse;
|
|
41
|
+
const parser = new Parser({ data: buffer });
|
|
42
|
+
try {
|
|
43
|
+
let numPages = 0;
|
|
44
|
+
try {
|
|
45
|
+
const info = await parser.getInfo?.({ parsePageInfo: false });
|
|
46
|
+
numPages = Number(info?.total ?? 0);
|
|
47
|
+
}
|
|
48
|
+
catch {
|
|
49
|
+
numPages = 0;
|
|
50
|
+
}
|
|
51
|
+
if (!Number.isFinite(numPages) || numPages <= 0) {
|
|
52
|
+
const full = await parser.getText();
|
|
53
|
+
const text = typeof full?.text === 'string' ? full.text : '';
|
|
54
|
+
return {
|
|
55
|
+
numPages: 1,
|
|
56
|
+
pageTexts: [text],
|
|
57
|
+
totalChars: text.length,
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
const pageTexts = [];
|
|
61
|
+
let totalChars = 0;
|
|
62
|
+
for (let pageNumber = 1; pageNumber <= numPages; pageNumber += 1) {
|
|
63
|
+
const pageResult = await parser.getText({ partial: [pageNumber] });
|
|
64
|
+
const text = typeof pageResult?.text === 'string' ? pageResult.text : '';
|
|
65
|
+
pageTexts.push(text);
|
|
66
|
+
totalChars += text.length;
|
|
67
|
+
}
|
|
68
|
+
return {
|
|
69
|
+
numPages,
|
|
70
|
+
pageTexts,
|
|
71
|
+
totalChars,
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
finally {
|
|
75
|
+
if (typeof parser.destroy === 'function') {
|
|
76
|
+
await parser.destroy();
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
const parsed = await extractPdfText(buffer);
|
|
81
|
+
const text = parsed.text ?? '';
|
|
82
|
+
return {
|
|
83
|
+
numPages: Math.max(parsed.totalPages, 1),
|
|
84
|
+
pageTexts: [text],
|
|
85
|
+
totalChars: text.length,
|
|
86
|
+
};
|
|
87
|
+
}
|
|
36
88
|
export const INVALID_FILENAME_ERROR_MESSAGE = 'Error: Invalid filename format. Must be alphanumeric with supported extension.';
|
|
37
89
|
export const DEFAULT_FILE_SYSTEM_PATH = 'browseruse_agent_data';
|
|
38
|
-
const
|
|
39
|
-
|
|
90
|
+
const UNSUPPORTED_BINARY_EXTENSIONS = new Set([
|
|
91
|
+
'png',
|
|
92
|
+
'jpg',
|
|
93
|
+
'jpeg',
|
|
94
|
+
'gif',
|
|
95
|
+
'bmp',
|
|
96
|
+
'svg',
|
|
97
|
+
'webp',
|
|
98
|
+
'ico',
|
|
99
|
+
'mp3',
|
|
100
|
+
'mp4',
|
|
101
|
+
'wav',
|
|
102
|
+
'avi',
|
|
103
|
+
'mov',
|
|
104
|
+
'zip',
|
|
105
|
+
'tar',
|
|
106
|
+
'gz',
|
|
107
|
+
'rar',
|
|
108
|
+
'exe',
|
|
109
|
+
'bin',
|
|
110
|
+
'dll',
|
|
111
|
+
'so',
|
|
112
|
+
]);
|
|
113
|
+
const DEFAULT_EXTENSIONS = [
|
|
114
|
+
'md',
|
|
115
|
+
'txt',
|
|
116
|
+
'json',
|
|
117
|
+
'jsonl',
|
|
118
|
+
'csv',
|
|
119
|
+
'pdf',
|
|
120
|
+
'docx',
|
|
121
|
+
'html',
|
|
122
|
+
'xml',
|
|
123
|
+
];
|
|
124
|
+
const escapeRegex = (value) => value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
125
|
+
const buildFilenameRegex = (extensions) => new RegExp(`^[a-zA-Z0-9_\\-.() \\u4e00-\\u9fff]+\\.(${extensions.map(escapeRegex).join('|')})$`);
|
|
126
|
+
const buildFilenameErrorMessage = (fileName, supportedExtensions) => {
|
|
127
|
+
const base = path.basename(fileName);
|
|
128
|
+
const supported = supportedExtensions.map((ext) => `.${ext}`).join(', ');
|
|
129
|
+
if (base.includes('.')) {
|
|
130
|
+
const ext = base.slice(base.lastIndexOf('.') + 1).toLowerCase();
|
|
131
|
+
if (UNSUPPORTED_BINARY_EXTENSIONS.has(ext)) {
|
|
132
|
+
return (`Error: Cannot write binary/image file '${base}'. ` +
|
|
133
|
+
'The write_file tool only supports text-based files. ' +
|
|
134
|
+
`Supported extensions: ${supported}. ` +
|
|
135
|
+
'For screenshots, the browser automatically captures them - do not try to save screenshots as files.');
|
|
136
|
+
}
|
|
137
|
+
if (!supportedExtensions.includes(ext)) {
|
|
138
|
+
return (`Error: Unsupported file extension '.${ext}' in '${base}'. ` +
|
|
139
|
+
`Supported extensions: ${supported}. ` +
|
|
140
|
+
'Please rename the file to use a supported extension.');
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
else {
|
|
144
|
+
return (`Error: Filename '${base}' has no extension. ` +
|
|
145
|
+
`Please add a supported extension: ${supported}.`);
|
|
146
|
+
}
|
|
147
|
+
return (`Error: Invalid filename '${base}'. ` +
|
|
148
|
+
'Filenames must contain only letters, numbers, underscores, hyphens, dots, parentheses, and spaces. ' +
|
|
149
|
+
`Supported extensions: ${supported}.`);
|
|
150
|
+
};
|
|
151
|
+
const escapeXmlText = (value) => value
|
|
152
|
+
.replace(/&/g, '&')
|
|
153
|
+
.replace(/</g, '<')
|
|
154
|
+
.replace(/>/g, '>')
|
|
155
|
+
.replace(/"/g, '"')
|
|
156
|
+
.replace(/'/g, ''');
|
|
157
|
+
const decodeXmlText = (value) => value
|
|
158
|
+
.replace(/'/g, "'")
|
|
159
|
+
.replace(/"/g, '"')
|
|
160
|
+
.replace(/>/g, '>')
|
|
161
|
+
.replace(/</g, '<')
|
|
162
|
+
.replace(/&/g, '&');
|
|
163
|
+
const DOCX_CONTENT_TYPES_XML = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
164
|
+
<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
|
|
165
|
+
<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
|
|
166
|
+
<Default Extension="xml" ContentType="application/xml"/>
|
|
167
|
+
<Override PartName="/word/document.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>
|
|
168
|
+
</Types>`;
|
|
169
|
+
const DOCX_ROOT_RELS_XML = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
170
|
+
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
|
|
171
|
+
<Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="word/document.xml"/>
|
|
172
|
+
</Relationships>`;
|
|
173
|
+
const DOCX_DOCUMENT_RELS_XML = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
174
|
+
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"></Relationships>`;
|
|
175
|
+
const buildDocxDocumentXml = (content) => {
|
|
176
|
+
const lines = content.split(/\r?\n/);
|
|
177
|
+
const paragraphs = lines
|
|
178
|
+
.map((line) => {
|
|
179
|
+
if (!line) {
|
|
180
|
+
return '<w:p/>';
|
|
181
|
+
}
|
|
182
|
+
return `<w:p><w:r><w:t xml:space="preserve">${escapeXmlText(line)}</w:t></w:r></w:p>`;
|
|
183
|
+
})
|
|
184
|
+
.join('');
|
|
185
|
+
return `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
186
|
+
<w:document xmlns:wpc="http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas"
|
|
187
|
+
xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
|
|
188
|
+
xmlns:o="urn:schemas-microsoft-com:office:office"
|
|
189
|
+
xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
|
|
190
|
+
xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math"
|
|
191
|
+
xmlns:v="urn:schemas-microsoft-com:vml"
|
|
192
|
+
xmlns:wp14="http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing"
|
|
193
|
+
xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
|
|
194
|
+
xmlns:w10="urn:schemas-microsoft-com:office:word"
|
|
195
|
+
xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
|
|
196
|
+
xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml"
|
|
197
|
+
xmlns:wpg="http://schemas.microsoft.com/office/word/2010/wordprocessingGroup"
|
|
198
|
+
xmlns:wpi="http://schemas.microsoft.com/office/word/2010/wordprocessingInk"
|
|
199
|
+
xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml"
|
|
200
|
+
xmlns:wps="http://schemas.microsoft.com/office/word/2010/wordprocessingShape"
|
|
201
|
+
mc:Ignorable="w14 wp14">
|
|
202
|
+
<w:body>${paragraphs}<w:sectPr><w:pgSz w:w="12240" w:h="15840"/><w:pgMar w:top="1440" w:right="1440" w:bottom="1440" w:left="1440" w:header="708" w:footer="708" w:gutter="0"/></w:sectPr></w:body>
|
|
203
|
+
</w:document>`;
|
|
204
|
+
};
|
|
205
|
+
const buildDocxBuffer = (content) => {
|
|
206
|
+
const zip = new AdmZip();
|
|
207
|
+
zip.addFile('[Content_Types].xml', Buffer.from(DOCX_CONTENT_TYPES_XML, 'utf-8'));
|
|
208
|
+
zip.addFile('_rels/.rels', Buffer.from(DOCX_ROOT_RELS_XML, 'utf-8'));
|
|
209
|
+
zip.addFile('word/_rels/document.xml.rels', Buffer.from(DOCX_DOCUMENT_RELS_XML, 'utf-8'));
|
|
210
|
+
zip.addFile('word/document.xml', Buffer.from(buildDocxDocumentXml(content), 'utf-8'));
|
|
211
|
+
return zip.toBuffer();
|
|
212
|
+
};
|
|
213
|
+
const readDocxText = (fileBuffer) => {
|
|
214
|
+
const zip = new AdmZip(fileBuffer);
|
|
215
|
+
const documentEntry = zip.getEntry('word/document.xml');
|
|
216
|
+
if (!documentEntry) {
|
|
217
|
+
throw new FileSystemError('Error: Could not parse DOCX file content.');
|
|
218
|
+
}
|
|
219
|
+
const xml = documentEntry.getData().toString('utf-8');
|
|
220
|
+
const normalizedXml = xml.replace(/<w:p\b([^>]*)\/>/g, '<w:p$1></w:p>');
|
|
221
|
+
const paragraphMatches = normalizedXml.match(/<w:p[\s\S]*?<\/w:p>/g) ?? [];
|
|
222
|
+
const lines = paragraphMatches.map((paragraph) => {
|
|
223
|
+
const textMatches = Array.from(paragraph.matchAll(/<w:t(?:\s[^>]*)?>([\s\S]*?)<\/w:t>/g));
|
|
224
|
+
if (!textMatches.length) {
|
|
225
|
+
return '';
|
|
226
|
+
}
|
|
227
|
+
return textMatches.map((match) => decodeXmlText(match[1] ?? '')).join('');
|
|
228
|
+
});
|
|
229
|
+
return lines.join('\n').trim();
|
|
230
|
+
};
|
|
40
231
|
export class FileSystemError extends Error {
|
|
41
232
|
}
|
|
42
233
|
class BaseFile {
|
|
@@ -105,6 +296,11 @@ class JsonFile extends BaseFile {
|
|
|
105
296
|
return 'json';
|
|
106
297
|
}
|
|
107
298
|
}
|
|
299
|
+
class JsonlFile extends BaseFile {
|
|
300
|
+
get extension() {
|
|
301
|
+
return 'jsonl';
|
|
302
|
+
}
|
|
303
|
+
}
|
|
108
304
|
class CsvFile extends BaseFile {
|
|
109
305
|
get extension() {
|
|
110
306
|
return 'csv';
|
|
@@ -154,19 +350,52 @@ stream.on('error', (err) => {
|
|
|
154
350
|
}
|
|
155
351
|
}
|
|
156
352
|
}
|
|
353
|
+
class DocxFile extends BaseFile {
|
|
354
|
+
get extension() {
|
|
355
|
+
return 'docx';
|
|
356
|
+
}
|
|
357
|
+
async syncToDisk(dir) {
|
|
358
|
+
const filePath = path.join(dir, this.fullName);
|
|
359
|
+
const docxBuffer = buildDocxBuffer(this.content || '');
|
|
360
|
+
await fsp.writeFile(filePath, docxBuffer);
|
|
361
|
+
}
|
|
362
|
+
syncToDiskSync(dir) {
|
|
363
|
+
const filePath = path.join(dir, this.fullName);
|
|
364
|
+
const docxBuffer = buildDocxBuffer(this.content || '');
|
|
365
|
+
fsSync.writeFileSync(filePath, docxBuffer);
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
class HtmlFile extends BaseFile {
|
|
369
|
+
get extension() {
|
|
370
|
+
return 'html';
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
class XmlFile extends BaseFile {
|
|
374
|
+
get extension() {
|
|
375
|
+
return 'xml';
|
|
376
|
+
}
|
|
377
|
+
}
|
|
157
378
|
const FILE_TYPES = {
|
|
158
379
|
md: MarkdownFile,
|
|
159
380
|
txt: TxtFile,
|
|
160
381
|
json: JsonFile,
|
|
382
|
+
jsonl: JsonlFile,
|
|
161
383
|
csv: CsvFile,
|
|
162
384
|
pdf: PdfFile,
|
|
385
|
+
docx: DocxFile,
|
|
386
|
+
html: HtmlFile,
|
|
387
|
+
xml: XmlFile,
|
|
163
388
|
};
|
|
164
389
|
const TYPE_NAME_MAP = {
|
|
165
390
|
MarkdownFile,
|
|
166
391
|
TxtFile,
|
|
167
392
|
JsonFile,
|
|
393
|
+
JsonlFile,
|
|
168
394
|
CsvFile,
|
|
169
395
|
PdfFile,
|
|
396
|
+
DocxFile,
|
|
397
|
+
HtmlFile,
|
|
398
|
+
XmlFile,
|
|
170
399
|
};
|
|
171
400
|
export class FileSystem {
|
|
172
401
|
files = new Map();
|
|
@@ -194,7 +423,45 @@ export class FileSystem {
|
|
|
194
423
|
}
|
|
195
424
|
}
|
|
196
425
|
isValidFilename(filename) {
|
|
197
|
-
|
|
426
|
+
const base = path.basename(filename);
|
|
427
|
+
const regex = buildFilenameRegex(this.get_allowed_extensions());
|
|
428
|
+
if (!regex.test(base)) {
|
|
429
|
+
return false;
|
|
430
|
+
}
|
|
431
|
+
const idx = base.lastIndexOf('.');
|
|
432
|
+
if (idx <= 0) {
|
|
433
|
+
return false;
|
|
434
|
+
}
|
|
435
|
+
return base.slice(0, idx).trim().length > 0;
|
|
436
|
+
}
|
|
437
|
+
static sanitize_filename(fileName) {
|
|
438
|
+
const base = path.basename(fileName);
|
|
439
|
+
const idx = base.lastIndexOf('.');
|
|
440
|
+
if (idx === -1) {
|
|
441
|
+
return base;
|
|
442
|
+
}
|
|
443
|
+
const ext = base.slice(idx + 1).toLowerCase();
|
|
444
|
+
let namePart = base.slice(0, idx);
|
|
445
|
+
namePart = namePart.replace(/ /g, '-');
|
|
446
|
+
namePart = namePart.replace(/[^a-zA-Z0-9_\-.()\u4e00-\u9fff]/g, '');
|
|
447
|
+
namePart = namePart.replace(/-{2,}/g, '-');
|
|
448
|
+
namePart = namePart.replace(/^[-.]+|[-.]+$/g, '');
|
|
449
|
+
if (!namePart) {
|
|
450
|
+
namePart = 'file';
|
|
451
|
+
}
|
|
452
|
+
return `${namePart}.${ext}`;
|
|
453
|
+
}
|
|
454
|
+
resolveFilename(filename) {
|
|
455
|
+
const base = path.basename(filename);
|
|
456
|
+
const wasChanged = base !== filename;
|
|
457
|
+
if (this.isValidFilename(base)) {
|
|
458
|
+
return [base, wasChanged];
|
|
459
|
+
}
|
|
460
|
+
const sanitized = FileSystem.sanitize_filename(base);
|
|
461
|
+
if (sanitized !== base && this.isValidFilename(sanitized)) {
|
|
462
|
+
return [sanitized, true];
|
|
463
|
+
}
|
|
464
|
+
return [base, wasChanged];
|
|
198
465
|
}
|
|
199
466
|
parseFilename(filename) {
|
|
200
467
|
const idx = filename.lastIndexOf('.');
|
|
@@ -223,113 +490,307 @@ export class FileSystem {
|
|
|
223
490
|
return this.dataDir;
|
|
224
491
|
}
|
|
225
492
|
get_file(filename) {
|
|
226
|
-
|
|
493
|
+
const [resolved] = this.resolveFilename(filename);
|
|
494
|
+
if (!this.isValidFilename(resolved)) {
|
|
495
|
+
return null;
|
|
496
|
+
}
|
|
497
|
+
return this.files.get(resolved) ?? null;
|
|
227
498
|
}
|
|
228
499
|
list_files() {
|
|
229
500
|
return Array.from(this.files.values()).map((file) => file.fullName);
|
|
230
501
|
}
|
|
231
502
|
display_file(filename) {
|
|
232
|
-
|
|
503
|
+
const [resolved] = this.resolveFilename(filename);
|
|
504
|
+
if (!this.isValidFilename(resolved)) {
|
|
233
505
|
return null;
|
|
234
506
|
}
|
|
235
|
-
const file = this.
|
|
507
|
+
const file = this.files.get(resolved) ?? null;
|
|
236
508
|
return file ? file.read() : null;
|
|
237
509
|
}
|
|
238
|
-
async
|
|
510
|
+
async read_file_structured(filename, externalFile = false) {
|
|
511
|
+
const result = {
|
|
512
|
+
message: '',
|
|
513
|
+
images: null,
|
|
514
|
+
};
|
|
239
515
|
if (externalFile) {
|
|
240
516
|
try {
|
|
241
|
-
const
|
|
242
|
-
|
|
517
|
+
const base = path.basename(filename);
|
|
518
|
+
const idx = base.lastIndexOf('.');
|
|
519
|
+
if (idx === -1) {
|
|
520
|
+
result.message =
|
|
521
|
+
`Error: Invalid filename format ${filename}. ` +
|
|
522
|
+
'Must be alphanumeric with a supported extension.';
|
|
523
|
+
return result;
|
|
524
|
+
}
|
|
525
|
+
const extension = base.slice(idx + 1).toLowerCase();
|
|
526
|
+
const specialExtensions = new Set([
|
|
527
|
+
'docx',
|
|
528
|
+
'pdf',
|
|
529
|
+
'jpg',
|
|
530
|
+
'jpeg',
|
|
531
|
+
'png',
|
|
532
|
+
]);
|
|
533
|
+
const textExtensions = this.get_allowed_extensions().filter((ext) => !specialExtensions.has(ext));
|
|
534
|
+
if (textExtensions.includes(extension)) {
|
|
243
535
|
const content = await fsp.readFile(filename, 'utf-8');
|
|
244
|
-
|
|
536
|
+
result.message = `Read from file ${filename}.\n<content>\n${content}\n</content>`;
|
|
537
|
+
return result;
|
|
245
538
|
}
|
|
246
539
|
if (extension === 'pdf') {
|
|
540
|
+
const MAX_CHARS = 60000;
|
|
247
541
|
const buffer = await fsp.readFile(filename);
|
|
248
|
-
const
|
|
249
|
-
const
|
|
250
|
-
const
|
|
251
|
-
const
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
542
|
+
const pdf = await extractPdfTextByPage(buffer);
|
|
543
|
+
const numPages = pdf.numPages;
|
|
544
|
+
const pageTexts = pdf.pageTexts;
|
|
545
|
+
const totalChars = pdf.totalChars;
|
|
546
|
+
if (totalChars <= MAX_CHARS) {
|
|
547
|
+
const contentParts = [];
|
|
548
|
+
for (let pageNumber = 1; pageNumber <= pageTexts.length; pageNumber += 1) {
|
|
549
|
+
const text = pageTexts[pageNumber - 1] ?? '';
|
|
550
|
+
if (!text.trim()) {
|
|
551
|
+
continue;
|
|
552
|
+
}
|
|
553
|
+
contentParts.push(`--- Page ${pageNumber} ---\n${text}`);
|
|
554
|
+
}
|
|
555
|
+
result.message =
|
|
556
|
+
`Read from file ${filename} (${numPages} pages, ${totalChars.toLocaleString()} chars).\n` +
|
|
557
|
+
`<content>\n${contentParts.join('\n\n')}\n</content>`;
|
|
558
|
+
return result;
|
|
559
|
+
}
|
|
560
|
+
const wordToPages = new Map();
|
|
561
|
+
const pageWords = new Map();
|
|
562
|
+
for (let pageNumber = 1; pageNumber <= pageTexts.length; pageNumber += 1) {
|
|
563
|
+
const text = pageTexts[pageNumber - 1] ?? '';
|
|
564
|
+
const words = new Set((text.toLowerCase().match(/\b[a-zA-Z]{4,}\b/g) ?? []).map((word) => word));
|
|
565
|
+
pageWords.set(pageNumber, words);
|
|
566
|
+
for (const word of words) {
|
|
567
|
+
if (!wordToPages.has(word)) {
|
|
568
|
+
wordToPages.set(word, new Set());
|
|
569
|
+
}
|
|
570
|
+
wordToPages.get(word).add(pageNumber);
|
|
571
|
+
}
|
|
572
|
+
}
|
|
573
|
+
const pageScores = new Map();
|
|
574
|
+
for (const [pageNumber, words] of pageWords.entries()) {
|
|
575
|
+
let score = 0;
|
|
576
|
+
for (const word of words) {
|
|
577
|
+
const pagesWithWord = wordToPages.get(word)?.size ?? 1;
|
|
578
|
+
score += Math.log(Math.max(numPages, 1) / pagesWithWord);
|
|
579
|
+
}
|
|
580
|
+
pageScores.set(pageNumber, score);
|
|
581
|
+
}
|
|
582
|
+
const priorityPages = [1];
|
|
583
|
+
const sortedPages = Array.from(pageScores.entries()).sort((a, b) => b[1] - a[1]);
|
|
584
|
+
for (const [pageNumber] of sortedPages) {
|
|
585
|
+
if (!priorityPages.includes(pageNumber)) {
|
|
586
|
+
priorityPages.push(pageNumber);
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
for (let pageNumber = 1; pageNumber <= numPages; pageNumber += 1) {
|
|
590
|
+
if (!priorityPages.includes(pageNumber)) {
|
|
591
|
+
priorityPages.push(pageNumber);
|
|
592
|
+
}
|
|
593
|
+
}
|
|
594
|
+
const contentParts = [];
|
|
595
|
+
let charsUsed = 0;
|
|
596
|
+
const pagesIncluded = [];
|
|
597
|
+
const pagesIncludedSet = new Set();
|
|
598
|
+
for (const pageNumber of priorityPages) {
|
|
599
|
+
const text = pageTexts[pageNumber - 1] ?? '';
|
|
600
|
+
if (!text.trim()) {
|
|
601
|
+
continue;
|
|
602
|
+
}
|
|
603
|
+
const pageHeader = `--- Page ${pageNumber} ---\n`;
|
|
604
|
+
const truncationSuffix = '\n[...truncated]';
|
|
605
|
+
const remaining = MAX_CHARS - charsUsed;
|
|
606
|
+
const minUseful = pageHeader.length + truncationSuffix.length + 50;
|
|
607
|
+
if (remaining < minUseful) {
|
|
608
|
+
break;
|
|
609
|
+
}
|
|
610
|
+
let pageContent = `${pageHeader}${text}`;
|
|
611
|
+
if (pageContent.length > remaining) {
|
|
612
|
+
pageContent =
|
|
613
|
+
pageContent.slice(0, Math.max(0, remaining - truncationSuffix.length)) + truncationSuffix;
|
|
614
|
+
}
|
|
615
|
+
contentParts.push({ pageNumber, content: pageContent });
|
|
616
|
+
charsUsed += pageContent.length;
|
|
617
|
+
pagesIncluded.push(pageNumber);
|
|
618
|
+
pagesIncludedSet.add(pageNumber);
|
|
619
|
+
if (charsUsed >= MAX_CHARS) {
|
|
620
|
+
break;
|
|
621
|
+
}
|
|
622
|
+
}
|
|
623
|
+
contentParts.sort((a, b) => a.pageNumber - b.pageNumber);
|
|
624
|
+
const extractedText = contentParts
|
|
625
|
+
.map((part) => part.content)
|
|
255
626
|
.join('\n\n');
|
|
256
|
-
|
|
257
|
-
|
|
627
|
+
let truncationNote = '';
|
|
628
|
+
const pagesNotShown = numPages - pagesIncluded.length;
|
|
629
|
+
if (pagesNotShown > 0) {
|
|
630
|
+
const skipped = [];
|
|
631
|
+
for (let pageNumber = 1; pageNumber <= numPages; pageNumber += 1) {
|
|
632
|
+
if (!pagesIncludedSet.has(pageNumber)) {
|
|
633
|
+
skipped.push(pageNumber);
|
|
634
|
+
}
|
|
635
|
+
}
|
|
636
|
+
const skippedPreview = skipped.slice(0, 10).join(', ');
|
|
637
|
+
const skippedSuffix = skipped.length > 10 ? ', ...' : '';
|
|
638
|
+
truncationNote =
|
|
639
|
+
`\n\n[Showing ${pagesIncluded.length} of ${numPages} pages. ` +
|
|
640
|
+
`Skipped pages: [${skippedPreview}${skippedSuffix}]. ` +
|
|
641
|
+
'Use read_long_content with a specific goal to find relevant sections.]';
|
|
642
|
+
}
|
|
643
|
+
result.message =
|
|
644
|
+
`Read from file ${filename} (${numPages} pages, ${totalChars.toLocaleString()} chars total).\n` +
|
|
645
|
+
`<content>\n${extractedText}${truncationNote}\n</content>`;
|
|
646
|
+
return result;
|
|
647
|
+
}
|
|
648
|
+
if (extension === 'docx') {
|
|
649
|
+
const fileBuffer = await fsp.readFile(filename);
|
|
650
|
+
const content = readDocxText(fileBuffer);
|
|
651
|
+
result.message = `Read from file ${filename}.\n<content>\n${content}\n</content>`;
|
|
652
|
+
return result;
|
|
258
653
|
}
|
|
259
|
-
|
|
654
|
+
if (extension === 'jpg' ||
|
|
655
|
+
extension === 'jpeg' ||
|
|
656
|
+
extension === 'png') {
|
|
657
|
+
const fileBuffer = await fsp.readFile(filename);
|
|
658
|
+
result.message = `Read image file ${filename}.`;
|
|
659
|
+
result.images = [
|
|
660
|
+
{
|
|
661
|
+
name: base,
|
|
662
|
+
data: fileBuffer.toString('base64'),
|
|
663
|
+
},
|
|
664
|
+
];
|
|
665
|
+
return result;
|
|
666
|
+
}
|
|
667
|
+
result.message = `Error: Cannot read file ${filename} as ${extension} extension is not supported.`;
|
|
668
|
+
return result;
|
|
260
669
|
}
|
|
261
670
|
catch (error) {
|
|
262
671
|
if (error?.code === 'ENOENT') {
|
|
263
|
-
|
|
672
|
+
result.message = `Error: File '${filename}' not found.`;
|
|
673
|
+
return result;
|
|
264
674
|
}
|
|
265
675
|
if (error?.code === 'EACCES') {
|
|
266
|
-
|
|
676
|
+
result.message = `Error: Permission denied to read file '${filename}'.`;
|
|
677
|
+
return result;
|
|
267
678
|
}
|
|
268
|
-
|
|
679
|
+
result.message =
|
|
680
|
+
`Error: Could not read file '${filename}'. ${error instanceof Error ? error.message : ''}`.trim();
|
|
681
|
+
return result;
|
|
269
682
|
}
|
|
270
683
|
}
|
|
271
|
-
|
|
272
|
-
|
|
684
|
+
const originalFilename = filename;
|
|
685
|
+
const [resolved, wasSanitized] = this.resolveFilename(filename);
|
|
686
|
+
if (!this.isValidFilename(resolved)) {
|
|
687
|
+
result.message = buildFilenameErrorMessage(filename, this.get_allowed_extensions());
|
|
688
|
+
return result;
|
|
273
689
|
}
|
|
274
|
-
const file = this.
|
|
690
|
+
const file = this.files.get(resolved) ?? null;
|
|
275
691
|
if (!file) {
|
|
276
|
-
|
|
692
|
+
if (wasSanitized) {
|
|
693
|
+
result.message =
|
|
694
|
+
`File '${resolved}' not found. ` +
|
|
695
|
+
`(Filename was auto-corrected from '${originalFilename}')`;
|
|
696
|
+
}
|
|
697
|
+
else {
|
|
698
|
+
result.message = `File '${originalFilename}' not found.`;
|
|
699
|
+
}
|
|
700
|
+
return result;
|
|
277
701
|
}
|
|
278
702
|
try {
|
|
279
703
|
const content = file.read();
|
|
280
|
-
|
|
704
|
+
const sanitizeNote = wasSanitized
|
|
705
|
+
? `Note: filename was auto-corrected from '${originalFilename}' to '${resolved}'. `
|
|
706
|
+
: '';
|
|
707
|
+
result.message = `${sanitizeNote}Read from file ${resolved}.\n<content>\n${content}\n</content>`;
|
|
708
|
+
return result;
|
|
281
709
|
}
|
|
282
710
|
catch (error) {
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
711
|
+
result.message =
|
|
712
|
+
error instanceof FileSystemError
|
|
713
|
+
? error.message
|
|
714
|
+
: `Error: Could not read file '${originalFilename}'.`;
|
|
715
|
+
return result;
|
|
286
716
|
}
|
|
287
717
|
}
|
|
718
|
+
async read_file(filename, externalFile = false) {
|
|
719
|
+
const result = await this.read_file_structured(filename, externalFile);
|
|
720
|
+
return result.message;
|
|
721
|
+
}
|
|
288
722
|
async write_file(filename, content) {
|
|
289
|
-
|
|
290
|
-
|
|
723
|
+
const originalFilename = filename;
|
|
724
|
+
const [resolved, wasSanitized] = this.resolveFilename(filename);
|
|
725
|
+
if (!this.isValidFilename(resolved)) {
|
|
726
|
+
return buildFilenameErrorMessage(filename, this.get_allowed_extensions());
|
|
291
727
|
}
|
|
728
|
+
filename = resolved;
|
|
292
729
|
const file = this.files.get(filename) ?? this.instantiateFile(filename);
|
|
293
730
|
this.files.set(filename, file);
|
|
294
731
|
try {
|
|
295
732
|
await file.write(content, this.dataDir);
|
|
296
|
-
|
|
733
|
+
const sanitizeNote = wasSanitized
|
|
734
|
+
? ` (auto-corrected from '${originalFilename}')`
|
|
735
|
+
: '';
|
|
736
|
+
return `Data written to file ${filename} successfully.${sanitizeNote}`;
|
|
297
737
|
}
|
|
298
738
|
catch (error) {
|
|
299
739
|
return `Error: Could not write to file '${filename}'. ${error.message}`;
|
|
300
740
|
}
|
|
301
741
|
}
|
|
302
742
|
async append_file(filename, content) {
|
|
303
|
-
|
|
304
|
-
|
|
743
|
+
const originalFilename = filename;
|
|
744
|
+
const [resolved, wasSanitized] = this.resolveFilename(filename);
|
|
745
|
+
if (!this.isValidFilename(resolved)) {
|
|
746
|
+
return buildFilenameErrorMessage(filename, this.get_allowed_extensions());
|
|
305
747
|
}
|
|
748
|
+
filename = resolved;
|
|
306
749
|
const file = this.get_file(filename);
|
|
307
750
|
if (!file) {
|
|
751
|
+
if (wasSanitized) {
|
|
752
|
+
return (`File '${filename}' not found. ` +
|
|
753
|
+
`(Filename was auto-corrected from '${originalFilename}')`);
|
|
754
|
+
}
|
|
308
755
|
return `File '${filename}' not found.`;
|
|
309
756
|
}
|
|
310
757
|
try {
|
|
311
758
|
await file.append(content, this.dataDir);
|
|
312
|
-
|
|
759
|
+
const sanitizeNote = wasSanitized
|
|
760
|
+
? ` (auto-corrected from '${originalFilename}')`
|
|
761
|
+
: '';
|
|
762
|
+
return `Data appended to file ${filename} successfully.${sanitizeNote}`;
|
|
313
763
|
}
|
|
314
764
|
catch (error) {
|
|
315
765
|
return `Error: Could not append to file '${filename}'. ${error.message}`;
|
|
316
766
|
}
|
|
317
767
|
}
|
|
318
768
|
async replace_file_str(filename, oldStr, newStr) {
|
|
319
|
-
|
|
320
|
-
|
|
769
|
+
const originalFilename = filename;
|
|
770
|
+
const [resolved, wasSanitized] = this.resolveFilename(filename);
|
|
771
|
+
if (!this.isValidFilename(resolved)) {
|
|
772
|
+
return buildFilenameErrorMessage(filename, this.get_allowed_extensions());
|
|
321
773
|
}
|
|
774
|
+
filename = resolved;
|
|
322
775
|
if (!oldStr) {
|
|
323
776
|
return 'Error: Cannot replace empty string. Please provide a non-empty string to replace.';
|
|
324
777
|
}
|
|
325
778
|
const file = this.get_file(filename);
|
|
326
779
|
if (!file) {
|
|
780
|
+
if (wasSanitized) {
|
|
781
|
+
return (`File '${filename}' not found. ` +
|
|
782
|
+
`(Filename was auto-corrected from '${originalFilename}')`);
|
|
783
|
+
}
|
|
327
784
|
return `File '${filename}' not found.`;
|
|
328
785
|
}
|
|
329
786
|
try {
|
|
330
787
|
const content = file.read().replaceAll(oldStr, newStr);
|
|
331
788
|
await file.write(content, this.dataDir);
|
|
332
|
-
|
|
789
|
+
const sanitizeNote = wasSanitized
|
|
790
|
+
? ` (auto-corrected from '${originalFilename}')`
|
|
791
|
+
: '';
|
|
792
|
+
return (`Successfully replaced all occurrences of "${oldStr}" with "${newStr}" in file ${filename}` +
|
|
793
|
+
sanitizeNote);
|
|
333
794
|
}
|
|
334
795
|
catch (error) {
|
|
335
796
|
return `Error: Could not replace string in file '${filename}'. ${error.message}`;
|
|
@@ -341,7 +802,7 @@ export class FileSystem {
|
|
|
341
802
|
await file.write(content, this.dataDir);
|
|
342
803
|
this.files.set(filename, file);
|
|
343
804
|
this.extractedContentCount += 1;
|
|
344
|
-
return
|
|
805
|
+
return filename;
|
|
345
806
|
}
|
|
346
807
|
describe() {
|
|
347
808
|
const DISPLAY_CHARS = 400;
|