protoagent 0.1.13 → 0.1.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -4
- package/dist/App.js +77 -442
- package/dist/agentic-loop/errors.js +198 -0
- package/dist/agentic-loop/executor.js +108 -0
- package/dist/agentic-loop/stream.js +109 -0
- package/dist/agentic-loop.js +67 -593
- package/dist/components/ApprovalPrompt.js +18 -0
- package/dist/components/CommandFilter.js +19 -0
- package/dist/components/InlineSetup.js +33 -0
- package/dist/components/UsageDisplay.js +10 -0
- package/dist/config.js +52 -51
- package/dist/hooks/useAgentEventHandler.js +356 -0
- package/dist/mcp.js +3 -0
- package/dist/runtime-config.js +64 -33
- package/dist/skills.js +3 -1
- package/dist/sub-agent.js +11 -16
- package/dist/tools/bash.js +37 -11
- package/dist/tools/edit-file.js +8 -49
- package/dist/tools/read-file.js +3 -66
- package/dist/tools/search-files.js +70 -12
- package/dist/tools/webfetch.js +77 -62
- package/dist/tools/write-file.js +39 -3
- package/dist/utils/approval.js +2 -0
- package/dist/utils/compactor.js +2 -1
- package/dist/utils/cost-tracker.js +5 -2
- package/dist/utils/format-message.js +13 -0
- package/dist/utils/logger.js +16 -3
- package/dist/utils/path-suggestions.js +74 -0
- package/dist/utils/path-validation.js +2 -5
- package/dist/utils/tool-display.js +53 -0
- package/package.json +11 -4
- package/dist/components/CollapsibleBox.js +0 -27
- package/dist/components/ConfigDialog.js +0 -42
- package/dist/components/ConsolidatedToolMessage.js +0 -34
- package/dist/components/FormattedMessage.js +0 -170
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* Falls back to a pure JS recursive directory walk if rg is not found.
|
|
6
6
|
*/
|
|
7
7
|
import fs from 'node:fs/promises';
|
|
8
|
-
import {
|
|
8
|
+
import { stat } from 'node:fs/promises';
|
|
9
9
|
import path from 'node:path';
|
|
10
10
|
import { execFileSync } from 'node:child_process';
|
|
11
11
|
import { validatePath } from '../utils/path-validation.js';
|
|
@@ -40,21 +40,48 @@ catch {
|
|
|
40
40
|
// ripgrep not available, will use JS fallback
|
|
41
41
|
}
|
|
42
42
|
const MAX_RESULTS = 100;
|
|
43
|
+
const MAX_PATTERN_LENGTH = 1000;
|
|
44
|
+
// Directories to skip during recursive search
|
|
45
|
+
const SKIP_DIRS = new Set([
|
|
46
|
+
'node_modules',
|
|
47
|
+
'.git',
|
|
48
|
+
'dist',
|
|
49
|
+
'build',
|
|
50
|
+
'coverage',
|
|
51
|
+
'__pycache__',
|
|
52
|
+
'.venv',
|
|
53
|
+
'venv',
|
|
54
|
+
'.tox',
|
|
55
|
+
'.nox',
|
|
56
|
+
'.pytest_cache',
|
|
57
|
+
'.mypy_cache',
|
|
58
|
+
'.ruff_cache',
|
|
59
|
+
'.hypothesis',
|
|
60
|
+
'.next',
|
|
61
|
+
'out',
|
|
62
|
+
'.turbo',
|
|
63
|
+
'.cache',
|
|
64
|
+
]);
|
|
43
65
|
export async function searchFiles(searchTerm, directoryPath = '.', caseSensitive = true, fileExtensions) {
|
|
44
66
|
const validated = await validatePath(directoryPath);
|
|
67
|
+
// Security: Validate pattern to prevent ReDoS (Catastrophic Backtracking)
|
|
68
|
+
// Attack: Pattern (a+)+$ with input 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!' causes exponential backtracking
|
|
69
|
+
// In JS fallback, this hangs the process for minutes/hours with 100% CPU
|
|
70
|
+
if (searchTerm.length > MAX_PATTERN_LENGTH) {
|
|
71
|
+
return `Error: Pattern too long (${searchTerm.length} chars, max ${MAX_PATTERN_LENGTH})`;
|
|
72
|
+
}
|
|
45
73
|
if (hasRipgrep) {
|
|
46
74
|
return searchWithRipgrep(searchTerm, validated, directoryPath, caseSensitive, fileExtensions);
|
|
47
75
|
}
|
|
48
76
|
return searchWithJs(searchTerm, validated, directoryPath, caseSensitive, fileExtensions);
|
|
49
77
|
}
|
|
50
78
|
// ─── Ripgrep implementation ───
|
|
51
|
-
function searchWithRipgrep(searchTerm, validated, directoryPath, caseSensitive, fileExtensions) {
|
|
79
|
+
async function searchWithRipgrep(searchTerm, validated, directoryPath, caseSensitive, fileExtensions) {
|
|
52
80
|
const args = [
|
|
53
81
|
'--line-number',
|
|
54
82
|
'--with-filename',
|
|
55
83
|
'--no-heading',
|
|
56
84
|
'--color=never',
|
|
57
|
-
'--max-count=1',
|
|
58
85
|
'--max-filesize=1M',
|
|
59
86
|
];
|
|
60
87
|
if (!caseSensitive) {
|
|
@@ -79,10 +106,13 @@ function searchWithRipgrep(searchTerm, validated, directoryPath, caseSensitive,
|
|
|
79
106
|
return `No matches found for "${searchTerm}" in ${directoryPath}`;
|
|
80
107
|
}
|
|
81
108
|
// Parse rg output and sort by mtime
|
|
82
|
-
const parsed =
|
|
109
|
+
const parsed = [];
|
|
110
|
+
for (const line of lines.slice(0, MAX_RESULTS)) {
|
|
83
111
|
// rg output: filepath:linenum:content
|
|
84
112
|
const firstColon = line.indexOf(':');
|
|
85
113
|
const secondColon = line.indexOf(':', firstColon + 1);
|
|
114
|
+
if (firstColon === -1 || secondColon === -1)
|
|
115
|
+
continue;
|
|
86
116
|
const filePath = line.slice(0, firstColon);
|
|
87
117
|
const lineNum = line.slice(firstColon + 1, secondColon);
|
|
88
118
|
let content = line.slice(secondColon + 1).trim();
|
|
@@ -92,11 +122,12 @@ function searchWithRipgrep(searchTerm, validated, directoryPath, caseSensitive,
|
|
|
92
122
|
const relativePath = path.relative(validated, filePath);
|
|
93
123
|
let mtime = 0;
|
|
94
124
|
try {
|
|
95
|
-
|
|
125
|
+
const stats = await stat(filePath);
|
|
126
|
+
mtime = stats.mtimeMs;
|
|
96
127
|
}
|
|
97
|
-
catch { /* ignore */ }
|
|
98
|
-
|
|
99
|
-
}
|
|
128
|
+
catch { /* ignore stat errors */ }
|
|
129
|
+
parsed.push({ display: `${relativePath}:${lineNum}: ${content}`, mtime });
|
|
130
|
+
}
|
|
100
131
|
// Sort by mtime descending (most recently modified first)
|
|
101
132
|
parsed.sort((a, b) => b.mtime - a.mtime);
|
|
102
133
|
const results = parsed.map(r => r.display);
|
|
@@ -114,7 +145,7 @@ function searchWithRipgrep(searchTerm, validated, directoryPath, caseSensitive,
|
|
|
114
145
|
return `Error: ripgrep error: ${msg}`;
|
|
115
146
|
}
|
|
116
147
|
// Fall back to JS search on any other error
|
|
117
|
-
return
|
|
148
|
+
return searchWithJs(searchTerm, validated, directoryPath, caseSensitive, fileExtensions);
|
|
118
149
|
}
|
|
119
150
|
}
|
|
120
151
|
// ─── JS fallback implementation ───
|
|
@@ -129,6 +160,7 @@ async function searchWithJs(searchTerm, validated, directoryPath, caseSensitive,
|
|
|
129
160
|
return `Error: invalid regex pattern "${searchTerm}": ${message}`;
|
|
130
161
|
}
|
|
131
162
|
const results = [];
|
|
163
|
+
const visitedInodes = new Set();
|
|
132
164
|
async function search(dir) {
|
|
133
165
|
if (results.length >= MAX_RESULTS)
|
|
134
166
|
return;
|
|
@@ -137,13 +169,32 @@ async function searchWithJs(searchTerm, validated, directoryPath, caseSensitive,
|
|
|
137
169
|
if (results.length >= MAX_RESULTS)
|
|
138
170
|
break;
|
|
139
171
|
const fullPath = path.join(dir, entry.name);
|
|
172
|
+
// Skip symlinks to prevent cycles
|
|
173
|
+
if (entry.isSymbolicLink()) {
|
|
174
|
+
continue;
|
|
175
|
+
}
|
|
140
176
|
// Skip common non-useful directories
|
|
141
177
|
if (entry.isDirectory()) {
|
|
142
|
-
if (
|
|
178
|
+
if (SKIP_DIRS.has(entry.name))
|
|
143
179
|
continue;
|
|
180
|
+
// Track inode to detect hardlink cycles
|
|
181
|
+
try {
|
|
182
|
+
const stats = await fs.stat(fullPath);
|
|
183
|
+
const inodeKey = `${stats.dev}:${stats.ino}`;
|
|
184
|
+
if (visitedInodes.has(inodeKey)) {
|
|
185
|
+
continue; // Already visited this directory
|
|
186
|
+
}
|
|
187
|
+
visitedInodes.add(inodeKey);
|
|
188
|
+
}
|
|
189
|
+
catch {
|
|
190
|
+
// If we can't stat, skip to be safe
|
|
191
|
+
continue;
|
|
192
|
+
}
|
|
144
193
|
await search(fullPath);
|
|
145
194
|
continue;
|
|
146
195
|
}
|
|
196
|
+
if (!entry.isFile())
|
|
197
|
+
continue;
|
|
147
198
|
// Filter by extension
|
|
148
199
|
if (fileExtensions && fileExtensions.length > 0) {
|
|
149
200
|
const ext = path.extname(entry.name);
|
|
@@ -152,6 +203,7 @@ async function searchWithJs(searchTerm, validated, directoryPath, caseSensitive,
|
|
|
152
203
|
}
|
|
153
204
|
try {
|
|
154
205
|
const content = await fs.readFile(fullPath, 'utf8');
|
|
206
|
+
const stats = await stat(fullPath);
|
|
155
207
|
const lines = content.split('\n');
|
|
156
208
|
for (let i = 0; i < lines.length && results.length < MAX_RESULTS; i++) {
|
|
157
209
|
if (regex.test(lines[i])) {
|
|
@@ -161,7 +213,10 @@ async function searchWithJs(searchTerm, validated, directoryPath, caseSensitive,
|
|
|
161
213
|
if (lineContent.length > 500) {
|
|
162
214
|
lineContent = lineContent.slice(0, 500) + '... (truncated)';
|
|
163
215
|
}
|
|
164
|
-
results.push(
|
|
216
|
+
results.push({
|
|
217
|
+
display: `${relativePath}:${i + 1}: ${lineContent}`,
|
|
218
|
+
mtime: stats.mtimeMs,
|
|
219
|
+
});
|
|
165
220
|
}
|
|
166
221
|
regex.lastIndex = 0; // reset regex state
|
|
167
222
|
}
|
|
@@ -175,6 +230,9 @@ async function searchWithJs(searchTerm, validated, directoryPath, caseSensitive,
|
|
|
175
230
|
if (results.length === 0) {
|
|
176
231
|
return `No matches found for "${searchTerm}" in ${directoryPath}`;
|
|
177
232
|
}
|
|
233
|
+
// Sort by mtime descending (most recently modified first)
|
|
234
|
+
results.sort((a, b) => b.mtime - a.mtime);
|
|
235
|
+
const displayResults = results.map(r => r.display);
|
|
178
236
|
const suffix = results.length >= MAX_RESULTS ? `\n(results truncated at ${MAX_RESULTS})` : '';
|
|
179
|
-
return `Found ${results.length} match(es) for "${searchTerm}":\n${
|
|
237
|
+
return `Found ${results.length} match(es) for "${searchTerm}":\n${displayResults.join('\n')}${suffix}`;
|
|
180
238
|
}
|
package/dist/tools/webfetch.js
CHANGED
|
@@ -12,51 +12,59 @@
|
|
|
12
12
|
* - Proper redirect limiting
|
|
13
13
|
* - Charset-aware content decoding
|
|
14
14
|
*/
|
|
15
|
-
import { convert } from
|
|
15
|
+
import { convert } from "html-to-text";
|
|
16
16
|
const MAX_RESPONSE_SIZE = 5 * 1024 * 1024; // 5MB
|
|
17
17
|
const MAX_OUTPUT_SIZE = 2 * 1024 * 1024; // 2MB
|
|
18
18
|
const MAX_REDIRECTS = 10;
|
|
19
19
|
const MAX_URL_LENGTH = 4096;
|
|
20
20
|
const FETCH_HEADERS = {
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
21
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
|
22
|
+
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
23
|
+
"Accept-Language": "en-US,en;q=0.9",
|
|
24
|
+
"Accept-Encoding": "gzip, deflate",
|
|
25
|
+
DNT: "1",
|
|
26
|
+
Connection: "keep-alive",
|
|
27
|
+
"Upgrade-Insecure-Requests": "1",
|
|
28
28
|
};
|
|
29
29
|
// Text-based MIME types that are safe to process
|
|
30
30
|
const TEXT_MIME_TYPES = [
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
31
|
+
"text/",
|
|
32
|
+
"application/json",
|
|
33
|
+
"application/xml",
|
|
34
|
+
"application/x-www-form-urlencoded",
|
|
35
|
+
"application/atom+xml",
|
|
36
|
+
"application/rss+xml",
|
|
37
|
+
"application/javascript",
|
|
38
|
+
"application/typescript",
|
|
39
39
|
];
|
|
40
|
-
// Lazy-loaded Turndown instance
|
|
40
|
+
// Lazy-loaded Turndown instance — converts HTML to Markdown
|
|
41
|
+
// We lazy-load because Turndown is a CommonJS module; dynamic import keeps our
|
|
42
|
+
// ESM output clean without forcing esbuild to bundle everything as CJS.
|
|
43
|
+
// Why Turndown? HTML → Markdown preserves document structure (headings, lists,
|
|
44
|
+
// links) in a readable format that LLMs handle better than raw HTML markup.
|
|
41
45
|
let _turndownService = null;
|
|
42
46
|
async function getTurndownService() {
|
|
43
47
|
if (!_turndownService) {
|
|
44
|
-
const { default: TurndownService } = await import(
|
|
48
|
+
const { default: TurndownService } = await import("turndown");
|
|
45
49
|
_turndownService = new TurndownService({
|
|
46
|
-
headingStyle:
|
|
47
|
-
codeBlockStyle:
|
|
48
|
-
bulletListMarker:
|
|
49
|
-
emDelimiter:
|
|
50
|
+
headingStyle: "atx", // # Heading, not underlined
|
|
51
|
+
codeBlockStyle: "fenced", // ```code```, not indented
|
|
52
|
+
bulletListMarker: "-",
|
|
53
|
+
emDelimiter: "*",
|
|
50
54
|
});
|
|
51
|
-
|
|
55
|
+
// Remove noise that doesn't help LLM understanding
|
|
56
|
+
_turndownService.remove(["script", "style", "meta", "link"]);
|
|
52
57
|
}
|
|
53
58
|
return _turndownService;
|
|
54
59
|
}
|
|
55
|
-
// Lazy-loaded he module
|
|
60
|
+
// Lazy-loaded 'he' module — decodes HTML entities like < > &
|
|
61
|
+
// We lazy-load for the same CJS/ESM reason as Turndown.
|
|
62
|
+
// Why 'he'? Browsers and node don't have built-in HTML entity decoding that
|
|
63
|
+
// handles the full set ( , ✓, named entities, etc.) correctly.
|
|
56
64
|
let _he = null;
|
|
57
65
|
async function getHe() {
|
|
58
66
|
if (!_he) {
|
|
59
|
-
const { default: he } = await import(
|
|
67
|
+
const { default: he } = await import("he");
|
|
60
68
|
_he = he;
|
|
61
69
|
}
|
|
62
70
|
return _he;
|
|
@@ -72,7 +80,7 @@ function isTextMimeType(mimeType) {
|
|
|
72
80
|
*/
|
|
73
81
|
function detectHTML(content, contentType) {
|
|
74
82
|
// Header says HTML
|
|
75
|
-
if (contentType.includes(
|
|
83
|
+
if (contentType.includes("text/html")) {
|
|
76
84
|
return true;
|
|
77
85
|
}
|
|
78
86
|
// Sniff content for HTML signature
|
|
@@ -85,17 +93,17 @@ function detectHTML(content, contentType) {
|
|
|
85
93
|
function parseCharset(contentType) {
|
|
86
94
|
const match = contentType.match(/charset=([^\s;]+)/i);
|
|
87
95
|
if (match) {
|
|
88
|
-
const charset = match[1].replace(/['"]/g,
|
|
96
|
+
const charset = match[1].replace(/['"]/g, "");
|
|
89
97
|
// Validate charset is supported by TextDecoder
|
|
90
98
|
try {
|
|
91
99
|
new TextDecoder(charset);
|
|
92
100
|
return charset;
|
|
93
101
|
}
|
|
94
102
|
catch {
|
|
95
|
-
return
|
|
103
|
+
return "utf-8";
|
|
96
104
|
}
|
|
97
105
|
}
|
|
98
|
-
return
|
|
106
|
+
return "utf-8";
|
|
99
107
|
}
|
|
100
108
|
/**
|
|
101
109
|
* Truncate output if too large
|
|
@@ -109,28 +117,28 @@ function truncateOutput(output, maxSize) {
|
|
|
109
117
|
return output;
|
|
110
118
|
}
|
|
111
119
|
export const webfetchTool = {
|
|
112
|
-
type:
|
|
120
|
+
type: "function",
|
|
113
121
|
function: {
|
|
114
|
-
name:
|
|
115
|
-
description:
|
|
122
|
+
name: "webfetch",
|
|
123
|
+
description: "Fetch and process content from a web URL. Supports text (plain text extraction), markdown (HTML to markdown conversion), or html (raw HTML) output formats.",
|
|
116
124
|
parameters: {
|
|
117
|
-
type:
|
|
125
|
+
type: "object",
|
|
118
126
|
properties: {
|
|
119
127
|
url: {
|
|
120
|
-
type:
|
|
121
|
-
description:
|
|
128
|
+
type: "string",
|
|
129
|
+
description: "HTTP(S) URL to fetch (must start with http:// or https://)",
|
|
122
130
|
},
|
|
123
131
|
format: {
|
|
124
|
-
type:
|
|
125
|
-
enum: [
|
|
126
|
-
description:
|
|
132
|
+
type: "string",
|
|
133
|
+
enum: ["text", "markdown", "html"],
|
|
134
|
+
description: "Output format: text (plain text), markdown (HTML to markdown), or html (raw HTML)",
|
|
127
135
|
},
|
|
128
136
|
timeout: {
|
|
129
|
-
type:
|
|
130
|
-
description:
|
|
137
|
+
type: "number",
|
|
138
|
+
description: "Timeout in seconds (default 30, min 1, max 120)",
|
|
131
139
|
},
|
|
132
140
|
},
|
|
133
|
-
required: [
|
|
141
|
+
required: ["url", "format"],
|
|
134
142
|
},
|
|
135
143
|
},
|
|
136
144
|
};
|
|
@@ -142,21 +150,21 @@ function htmlToText(html) {
|
|
|
142
150
|
return convert(html, {
|
|
143
151
|
wordwrap: 120,
|
|
144
152
|
selectors: [
|
|
145
|
-
{ selector:
|
|
146
|
-
{ selector:
|
|
153
|
+
{ selector: "img", options: { ignoreHref: true } },
|
|
154
|
+
{ selector: "a", options: { ignoreHref: true } },
|
|
147
155
|
],
|
|
148
156
|
});
|
|
149
157
|
}
|
|
150
158
|
catch (error) {
|
|
151
159
|
// Fallback: basic regex if library fails
|
|
152
160
|
return html
|
|
153
|
-
.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi,
|
|
154
|
-
.replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi,
|
|
155
|
-
.replace(/<[^>]+>/g,
|
|
156
|
-
.split(
|
|
161
|
+
.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, "")
|
|
162
|
+
.replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, "")
|
|
163
|
+
.replace(/<[^>]+>/g, " ")
|
|
164
|
+
.split("\n")
|
|
157
165
|
.map((line) => line.trim())
|
|
158
166
|
.filter((line) => line.length > 0)
|
|
159
|
-
.join(
|
|
167
|
+
.join("\n");
|
|
160
168
|
}
|
|
161
169
|
}
|
|
162
170
|
/**
|
|
@@ -184,11 +192,11 @@ async function fetchWithRedirectLimit(url, signal) {
|
|
|
184
192
|
const response = await originalFetch(currentUrl, {
|
|
185
193
|
signal,
|
|
186
194
|
headers: FETCH_HEADERS,
|
|
187
|
-
redirect:
|
|
195
|
+
redirect: "manual", // Handle redirects manually to count them
|
|
188
196
|
});
|
|
189
197
|
// Check for redirect status
|
|
190
198
|
if (response.status >= 300 && response.status < 400) {
|
|
191
|
-
const location = response.headers.get(
|
|
199
|
+
const location = response.headers.get("location");
|
|
192
200
|
if (location) {
|
|
193
201
|
redirectCount++;
|
|
194
202
|
// Resolve relative URLs
|
|
@@ -211,20 +219,20 @@ async function fetchWithRedirectLimit(url, signal) {
|
|
|
211
219
|
*/
|
|
212
220
|
export async function webfetch(url, format, timeout) {
|
|
213
221
|
// Validate URL
|
|
214
|
-
if (!url.startsWith(
|
|
215
|
-
throw new Error(
|
|
222
|
+
if (!url.startsWith("http://") && !url.startsWith("https://")) {
|
|
223
|
+
throw new Error("Invalid URL format. Must start with http:// or https://");
|
|
216
224
|
}
|
|
217
225
|
if (url.length > MAX_URL_LENGTH) {
|
|
218
226
|
throw new Error(`URL too long (${url.length} characters, max ${MAX_URL_LENGTH})`);
|
|
219
227
|
}
|
|
220
228
|
// Validate format
|
|
221
|
-
if (![
|
|
229
|
+
if (!["text", "markdown", "html"].includes(format)) {
|
|
222
230
|
throw new Error("Invalid format. Must be 'text', 'markdown', or 'html'");
|
|
223
231
|
}
|
|
224
232
|
// Validate timeout
|
|
225
233
|
const timeoutSeconds = Math.min(timeout ?? 30, 120);
|
|
226
234
|
if (timeoutSeconds < 1) {
|
|
227
|
-
throw new Error(
|
|
235
|
+
throw new Error("Timeout must be between 1 and 120 seconds");
|
|
228
236
|
}
|
|
229
237
|
// Setup timeout for entire operation
|
|
230
238
|
const controller = new AbortController();
|
|
@@ -238,17 +246,22 @@ export async function webfetch(url, format, timeout) {
|
|
|
238
246
|
throw new Error(`HTTP ${response.status} error: ${response.statusText}`);
|
|
239
247
|
}
|
|
240
248
|
// Validate response size by header
|
|
241
|
-
const contentLength = response.headers.get(
|
|
249
|
+
const contentLength = response.headers.get("content-length");
|
|
242
250
|
if (contentLength && parseInt(contentLength) > MAX_RESPONSE_SIZE) {
|
|
243
251
|
throw new Error(`Response too large (exceeds 5MB limit). Content-Length: ${contentLength}`);
|
|
244
252
|
}
|
|
245
253
|
// Get content type
|
|
246
|
-
const contentType = response.headers.get(
|
|
254
|
+
const contentType = response.headers.get("content-type") ?? "text/plain";
|
|
247
255
|
// Check if content type is text-based
|
|
248
256
|
if (!isTextMimeType(contentType)) {
|
|
249
257
|
throw new Error(`Content type '${contentType}' is not supported. Only text-based formats are allowed.`);
|
|
250
258
|
}
|
|
251
|
-
// Get response as ArrayBuffer
|
|
259
|
+
// Get response as ArrayBuffer (not .text() or .blob()) because:
|
|
260
|
+
// 1. response.text() always decodes as UTF-8 — would corrupt non-UTF-8 pages
|
|
261
|
+
// (e.g., Shift_JIS, GB2312, windows-1251 sites)
|
|
262
|
+
// 2. ArrayBuffer preserves raw bytes so we can use TextDecoder with the
|
|
263
|
+
// CORRECT charset from the Content-Type header
|
|
264
|
+
// 3. We can check byteLength BEFORE decoding for security (5MB limit)
|
|
252
265
|
const arrayBuffer = await response.arrayBuffer();
|
|
253
266
|
// Check actual response size
|
|
254
267
|
if (arrayBuffer.byteLength > MAX_RESPONSE_SIZE) {
|
|
@@ -262,18 +275,20 @@ export async function webfetch(url, format, timeout) {
|
|
|
262
275
|
const isHTML = detectHTML(content, contentType);
|
|
263
276
|
// Format content based on requested format
|
|
264
277
|
let output;
|
|
265
|
-
if (format ===
|
|
278
|
+
if (format === "text") {
|
|
266
279
|
output = isHTML ? htmlToText(content) : content;
|
|
267
280
|
}
|
|
268
|
-
else if (format ===
|
|
269
|
-
output = isHTML
|
|
281
|
+
else if (format === "markdown") {
|
|
282
|
+
output = isHTML
|
|
283
|
+
? await htmlToMarkdown(content)
|
|
284
|
+
: `\`\`\`\n${content}\n\`\`\``;
|
|
270
285
|
}
|
|
271
286
|
else {
|
|
272
287
|
// format === 'html'
|
|
273
288
|
output = content;
|
|
274
289
|
}
|
|
275
290
|
// Decode HTML entities ONLY for text/markdown formats (not for raw HTML)
|
|
276
|
-
if (format !==
|
|
291
|
+
if (format !== "html") {
|
|
277
292
|
const he = await getHe();
|
|
278
293
|
output = he.decode(output);
|
|
279
294
|
}
|
|
@@ -294,7 +309,7 @@ export async function webfetch(url, format, timeout) {
|
|
|
294
309
|
}
|
|
295
310
|
catch (error) {
|
|
296
311
|
// Handle AbortError (timeout or cancellation)
|
|
297
|
-
if (error instanceof Error && error.name ===
|
|
312
|
+
if (error instanceof Error && error.name === "AbortError") {
|
|
298
313
|
throw new Error(`Fetch timeout after ${timeoutSeconds} seconds`);
|
|
299
314
|
}
|
|
300
315
|
// Re-throw our errors as-is
|
package/dist/tools/write-file.js
CHANGED
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
import fs from 'node:fs/promises';
|
|
5
5
|
import path from 'node:path';
|
|
6
6
|
import { validatePath } from '../utils/path-validation.js';
|
|
7
|
+
import { findSimilarPaths } from '../utils/path-suggestions.js';
|
|
7
8
|
import { requestApproval } from '../utils/approval.js';
|
|
8
9
|
import { recordRead } from '../utils/file-time.js';
|
|
9
10
|
export const writeFileTool = {
|
|
@@ -22,7 +23,22 @@ export const writeFileTool = {
|
|
|
22
23
|
},
|
|
23
24
|
};
|
|
24
25
|
export async function writeFile(filePath, content, sessionId) {
|
|
25
|
-
|
|
26
|
+
let validated;
|
|
27
|
+
try {
|
|
28
|
+
validated = await validatePath(filePath);
|
|
29
|
+
}
|
|
30
|
+
catch (err) {
|
|
31
|
+
// If file not found, try to suggest similar paths
|
|
32
|
+
if (err.message?.includes('does not exist') || err.code === 'ENOENT') {
|
|
33
|
+
const suggestions = await findSimilarPaths(filePath);
|
|
34
|
+
let msg = `File not found: '${filePath}'`;
|
|
35
|
+
if (suggestions.length > 0) {
|
|
36
|
+
msg += '\nDid you mean one of these?\n' + suggestions.map(s => ` ${s}`).join('\n');
|
|
37
|
+
}
|
|
38
|
+
return msg;
|
|
39
|
+
}
|
|
40
|
+
throw err;
|
|
41
|
+
}
|
|
26
42
|
// Request approval
|
|
27
43
|
const preview = content.length > 500
|
|
28
44
|
? `${content.slice(0, 250)}\n... (${content.length} chars total) ...\n${content.slice(-250)}`
|
|
@@ -40,12 +56,32 @@ export async function writeFile(filePath, content, sessionId) {
|
|
|
40
56
|
}
|
|
41
57
|
// Ensure parent directory exists
|
|
42
58
|
await fs.mkdir(path.dirname(validated), { recursive: true });
|
|
43
|
-
// Atomic write
|
|
59
|
+
// Security: Atomic write with symlink protection
|
|
60
|
+
// Uses O_CREAT|O_EXCL ('wx' flag) to prevent symlink attacks
|
|
44
61
|
const tmpPath = path.join(path.dirname(validated), `.protoagent-write-${process.pid}-${Date.now()}-${path.basename(validated)}`);
|
|
62
|
+
let fd;
|
|
45
63
|
try {
|
|
46
|
-
|
|
64
|
+
// Open with O_CREAT|O_EXCL - atomically creates or fails if exists
|
|
65
|
+
fd = await fs.open(tmpPath, 'wx', 0o600);
|
|
66
|
+
await fd.writeFile(content, 'utf8');
|
|
67
|
+
await fd.sync();
|
|
68
|
+
await fd.close();
|
|
69
|
+
fd = undefined;
|
|
47
70
|
await fs.rename(tmpPath, validated);
|
|
48
71
|
}
|
|
72
|
+
catch (err) {
|
|
73
|
+
if (fd !== undefined) {
|
|
74
|
+
try {
|
|
75
|
+
await fd.close();
|
|
76
|
+
}
|
|
77
|
+
catch { /* ignore */ }
|
|
78
|
+
}
|
|
79
|
+
try {
|
|
80
|
+
await fs.unlink(tmpPath);
|
|
81
|
+
}
|
|
82
|
+
catch { /* ignore */ }
|
|
83
|
+
throw err;
|
|
84
|
+
}
|
|
49
85
|
finally {
|
|
50
86
|
await fs.rm(tmpPath, { force: true }).catch(() => undefined);
|
|
51
87
|
}
|
package/dist/utils/approval.js
CHANGED
package/dist/utils/compactor.js
CHANGED
|
@@ -30,7 +30,8 @@ Be thorough but concise. Do not lose any information that would be needed to con
|
|
|
30
30
|
* Compact a conversation if it exceeds the context window threshold.
|
|
31
31
|
* Returns the original messages if compaction isn't needed or fails.
|
|
32
32
|
*/
|
|
33
|
-
export async function compactIfNeeded(client, model, messages, contextWindow,
|
|
33
|
+
export async function compactIfNeeded(client, model, messages, contextWindow, requestDefaults = {}, sessionId) {
|
|
34
|
+
const currentTokens = estimateConversationTokens(messages);
|
|
34
35
|
const utilisation = (currentTokens / contextWindow) * 100;
|
|
35
36
|
if (utilisation < 90)
|
|
36
37
|
return messages;
|
|
@@ -14,9 +14,12 @@ export function estimateMessageTokens(msg) {
|
|
|
14
14
|
if ('content' in msg && typeof msg.content === 'string') {
|
|
15
15
|
tokens += estimateTokens(msg.content);
|
|
16
16
|
}
|
|
17
|
-
if ('tool_calls' in msg && Array.isArray(msg.tool_calls)) {
|
|
17
|
+
if ('tool_calls' in msg && msg.role === 'assistant' && Array.isArray(msg.tool_calls)) {
|
|
18
18
|
for (const tc of msg.tool_calls) {
|
|
19
|
-
|
|
19
|
+
// Type guard for function tool calls
|
|
20
|
+
if (tc.type === 'function' && 'function' in tc) {
|
|
21
|
+
tokens += estimateTokens(tc.function.name || '') + estimateTokens(tc.function.arguments || '') + 10;
|
|
22
|
+
}
|
|
20
23
|
}
|
|
21
24
|
}
|
|
22
25
|
return tokens;
|
|
@@ -1,5 +1,18 @@
|
|
|
1
1
|
import { jsx as _jsx } from "react/jsx-runtime";
|
|
2
2
|
import { Text } from 'ink';
|
|
3
|
+
/**
|
|
4
|
+
* Normalize text for transcript display.
|
|
5
|
+
* - Collapses multiple consecutive newlines into a single newline
|
|
6
|
+
* - Trims leading/trailing whitespace
|
|
7
|
+
* - Returns empty string if text is empty/whitespace only
|
|
8
|
+
*/
|
|
9
|
+
export function normalizeTranscriptText(text) {
|
|
10
|
+
if (!text || !text.trim()) {
|
|
11
|
+
return '';
|
|
12
|
+
}
|
|
13
|
+
// Collapse multiple newlines to single, trim ends
|
|
14
|
+
return text.replace(/\n{2,}/g, '\n').trim();
|
|
15
|
+
}
|
|
3
16
|
function parseSegments(text) {
|
|
4
17
|
const segments = [];
|
|
5
18
|
// Strip markdown headers
|
package/dist/utils/logger.js
CHANGED
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
import { appendFileSync, mkdirSync, existsSync } from 'node:fs';
|
|
11
11
|
import { join } from 'node:path';
|
|
12
12
|
import { homedir } from 'node:os';
|
|
13
|
+
import stripAnsi from 'strip-ansi';
|
|
13
14
|
export var LogLevel;
|
|
14
15
|
(function (LogLevel) {
|
|
15
16
|
LogLevel[LogLevel["ERROR"] = 0] = "ERROR";
|
|
@@ -59,7 +60,8 @@ function writeToFile(message) {
|
|
|
59
60
|
appendFileSync(logFilePath, message);
|
|
60
61
|
}
|
|
61
62
|
catch (err) {
|
|
62
|
-
//
|
|
63
|
+
// Emit to stderr since we can't write to log file
|
|
64
|
+
process.stderr.write(`Failed to write to log file: ${err}\n`);
|
|
63
65
|
}
|
|
64
66
|
}
|
|
65
67
|
function timestamp() {
|
|
@@ -70,6 +72,14 @@ function timestamp() {
|
|
|
70
72
|
const ms = String(d.getMilliseconds()).padStart(3, '0');
|
|
71
73
|
return `${hh}:${mm}:${ss}.${ms}`;
|
|
72
74
|
}
|
|
75
|
+
function safeStringify(obj) {
|
|
76
|
+
try {
|
|
77
|
+
return JSON.stringify(obj);
|
|
78
|
+
}
|
|
79
|
+
catch {
|
|
80
|
+
return '[Object with circular references]';
|
|
81
|
+
}
|
|
82
|
+
}
|
|
73
83
|
function log(level, label, message, context) {
|
|
74
84
|
if (level > currentLevel)
|
|
75
85
|
return;
|
|
@@ -89,8 +99,11 @@ function log(level, label, message, context) {
|
|
|
89
99
|
// Notify listeners
|
|
90
100
|
logListeners.forEach(listener => listener(entry));
|
|
91
101
|
// Write to file
|
|
92
|
-
const ctx = context ? ` ${
|
|
93
|
-
|
|
102
|
+
const ctx = context ? ` ${safeStringify(context)}` : '';
|
|
103
|
+
// Security: Strip ANSI escape codes to prevent terminal injection attacks
|
|
104
|
+
const sanitizedMessage = stripAnsi(message);
|
|
105
|
+
const sanitizedCtx = stripAnsi(ctx);
|
|
106
|
+
writeToFile(`[${ts}] ${label.padEnd(5)} ${sanitizedMessage}${sanitizedCtx}\n`);
|
|
94
107
|
}
|
|
95
108
|
export const logger = {
|
|
96
109
|
error: (msg, ctx) => log(LogLevel.ERROR, 'ERROR', msg, ctx),
|