pi-agent-flow 1.8.1 → 1.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -30
- package/agents/audit.md +1 -2
- package/agents/build.md +1 -0
- package/agents/craft.md +12 -8
- package/agents/debug.md +2 -2
- package/agents/ideas.md +1 -0
- package/agents/scout.md +1 -0
- package/dist/agents.d.ts +41 -0
- package/dist/agents.d.ts.map +1 -0
- package/dist/agents.js +283 -0
- package/dist/agents.js.map +1 -0
- package/dist/batch/batch-bash.d.ts +87 -0
- package/dist/batch/batch-bash.d.ts.map +1 -0
- package/dist/batch/batch-bash.js +369 -0
- package/dist/batch/batch-bash.js.map +1 -0
- package/dist/batch/constants.d.ts +100 -0
- package/dist/batch/constants.d.ts.map +1 -0
- package/dist/batch/constants.js +15 -0
- package/dist/batch/constants.js.map +1 -0
- package/dist/batch/execute.d.ts +21 -0
- package/dist/batch/execute.d.ts.map +1 -0
- package/dist/batch/execute.js +440 -0
- package/dist/batch/execute.js.map +1 -0
- package/dist/batch/fuzzy-edit.d.ts +29 -0
- package/dist/batch/fuzzy-edit.d.ts.map +1 -0
- package/dist/batch/fuzzy-edit.js +257 -0
- package/dist/batch/fuzzy-edit.js.map +1 -0
- package/dist/batch/index.d.ts +85 -0
- package/dist/batch/index.d.ts.map +1 -0
- package/dist/batch/index.js +422 -0
- package/dist/batch/index.js.map +1 -0
- package/dist/batch/render.d.ts +14 -0
- package/dist/batch/render.d.ts.map +1 -0
- package/dist/batch/render.js +74 -0
- package/dist/batch/render.js.map +1 -0
- package/dist/batch/symbols.d.ts +9 -0
- package/dist/batch/symbols.d.ts.map +1 -0
- package/dist/batch/symbols.js +310 -0
- package/dist/batch/symbols.js.map +1 -0
- package/dist/batch.d.ts +12 -0
- package/dist/batch.d.ts.map +1 -0
- package/dist/batch.js +11 -0
- package/dist/batch.js.map +1 -0
- package/dist/cli-args.d.ts +27 -0
- package/dist/cli-args.d.ts.map +1 -0
- package/dist/cli-args.js +265 -0
- package/dist/cli-args.js.map +1 -0
- package/dist/config.d.ts +58 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +296 -0
- package/dist/config.js.map +1 -0
- package/dist/depth.d.ts +25 -0
- package/dist/depth.d.ts.map +1 -0
- package/dist/depth.js +160 -0
- package/dist/depth.js.map +1 -0
- package/dist/executor.d.ts +87 -0
- package/dist/executor.d.ts.map +1 -0
- package/dist/executor.js +295 -0
- package/dist/executor.js.map +1 -0
- package/dist/flow-prompt.d.ts +23 -0
- package/dist/flow-prompt.d.ts.map +1 -0
- package/dist/flow-prompt.js +99 -0
- package/dist/flow-prompt.js.map +1 -0
- package/dist/flow.d.ts +76 -0
- package/dist/flow.d.ts.map +1 -0
- package/dist/flow.js +704 -0
- package/dist/flow.js.map +1 -0
- package/dist/index.d.ts +10 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +327 -0
- package/dist/index.js.map +1 -0
- package/dist/reasoning-strip.d.ts +26 -0
- package/dist/reasoning-strip.d.ts.map +1 -0
- package/dist/reasoning-strip.js +58 -0
- package/dist/reasoning-strip.js.map +1 -0
- package/dist/render-utils.d.ts +42 -0
- package/dist/render-utils.d.ts.map +1 -0
- package/dist/render-utils.js +182 -0
- package/dist/render-utils.js.map +1 -0
- package/dist/render.d.ts +24 -0
- package/dist/render.d.ts.map +1 -0
- package/dist/render.js +409 -0
- package/dist/render.js.map +1 -0
- package/dist/runner-events.d.ts +59 -0
- package/dist/runner-events.d.ts.map +1 -0
- package/dist/runner-events.js +539 -0
- package/dist/runner-events.js.map +1 -0
- package/dist/session-mode.d.ts +10 -0
- package/dist/session-mode.d.ts.map +1 -0
- package/dist/session-mode.js +25 -0
- package/dist/session-mode.js.map +1 -0
- package/dist/settings-resolver.d.ts +28 -0
- package/dist/settings-resolver.d.ts.map +1 -0
- package/dist/settings-resolver.js +148 -0
- package/dist/settings-resolver.js.map +1 -0
- package/dist/sliding-prompt.d.ts +40 -0
- package/dist/sliding-prompt.d.ts.map +1 -0
- package/dist/sliding-prompt.js +121 -0
- package/dist/sliding-prompt.js.map +1 -0
- package/dist/snapshot.d.ts +29 -0
- package/dist/snapshot.d.ts.map +1 -0
- package/dist/snapshot.js +199 -0
- package/dist/snapshot.js.map +1 -0
- package/dist/structured-output.d.ts +36 -0
- package/dist/structured-output.d.ts.map +1 -0
- package/dist/structured-output.js +244 -0
- package/dist/structured-output.js.map +1 -0
- package/dist/timed-bash.d.ts +45 -0
- package/dist/timed-bash.d.ts.map +1 -0
- package/dist/timed-bash.js +219 -0
- package/dist/timed-bash.js.map +1 -0
- package/dist/tool-utils.d.ts +20 -0
- package/dist/tool-utils.d.ts.map +1 -0
- package/dist/tool-utils.js +38 -0
- package/dist/tool-utils.js.map +1 -0
- package/dist/transitions.d.ts +39 -0
- package/dist/transitions.d.ts.map +1 -0
- package/dist/transitions.js +59 -0
- package/dist/transitions.js.map +1 -0
- package/dist/types.d.ts +207 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +143 -0
- package/dist/types.js.map +1 -0
- package/dist/web-tool.d.ts +35 -0
- package/dist/web-tool.d.ts.map +1 -0
- package/dist/web-tool.js +545 -0
- package/dist/web-tool.js.map +1 -0
- package/package.json +7 -5
- package/src/agents.ts +0 -299
- package/src/ambient.d.ts +0 -107
- package/src/batch/batch-bash.ts +0 -443
- package/src/batch/constants.ts +0 -128
- package/src/batch/execute.ts +0 -551
- package/src/batch/fuzzy-edit.ts +0 -323
- package/src/batch/index.ts +0 -494
- package/src/batch/render.ts +0 -81
- package/src/batch/symbols.ts +0 -341
- package/src/batch.ts +0 -28
- package/src/cli-args.ts +0 -315
- package/src/config.ts +0 -391
- package/src/executor.ts +0 -445
- package/src/flow.ts +0 -834
- package/src/hooks.ts +0 -294
- package/src/index.ts +0 -1132
- package/src/render-utils.ts +0 -205
- package/src/render.ts +0 -524
- package/src/runner-events.ts +0 -692
- package/src/session-mode.ts +0 -33
- package/src/sliding-prompt.ts +0 -144
- package/src/structured-output.ts +0 -195
- package/src/timed-bash.ts +0 -270
- package/src/transitions.ts +0 -86
- package/src/types.ts +0 -386
- package/src/web-tool.ts +0 -663
package/src/web-tool.ts
DELETED
|
@@ -1,663 +0,0 @@
|
|
|
1
|
-
import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent";
|
|
2
|
-
import { DEFAULT_MAX_BYTES, DEFAULT_MAX_LINES, truncateHead } from "@mariozechner/pi-coding-agent";
|
|
3
|
-
import { Type, type Static } from "@sinclair/typebox";
|
|
4
|
-
import { createHash } from "node:crypto";
|
|
5
|
-
import { execFile } from "node:child_process";
|
|
6
|
-
import { mkdir, writeFile } from "node:fs/promises";
|
|
7
|
-
import { join } from "node:path";
|
|
8
|
-
import { JSDOM } from "jsdom";
|
|
9
|
-
import TurndownService from "turndown";
|
|
10
|
-
|
|
11
|
-
// ---------------------------------------------------------------------------
|
|
12
|
-
// Schema
|
|
13
|
-
// ---------------------------------------------------------------------------
|
|
14
|
-
|
|
15
|
-
const searchOp = Type.Object({
|
|
16
|
-
o: Type.Literal("search"),
|
|
17
|
-
q: Type.String({ minLength: 1, description: "Search query" }),
|
|
18
|
-
});
|
|
19
|
-
|
|
20
|
-
const fetchOp = Type.Object({
|
|
21
|
-
o: Type.Literal("fetch"),
|
|
22
|
-
u: Type.String({ minLength: 1, description: "URL to fetch" }),
|
|
23
|
-
f: Type.Optional(
|
|
24
|
-
Type.Union([Type.Literal("markdown"), Type.Literal("text"), Type.Literal("html")], {
|
|
25
|
-
description: "Output format (default: markdown)",
|
|
26
|
-
}),
|
|
27
|
-
),
|
|
28
|
-
});
|
|
29
|
-
|
|
30
|
-
export const webSchema = Type.Object({
|
|
31
|
-
op: Type.Array(Type.Union([searchOp, fetchOp]), {
|
|
32
|
-
minItems: 1,
|
|
33
|
-
description: "Array of web operations to perform",
|
|
34
|
-
}),
|
|
35
|
-
});
|
|
36
|
-
|
|
37
|
-
type WebParams = Static<typeof webSchema>;
|
|
38
|
-
|
|
39
|
-
type SearchResult = {
|
|
40
|
-
title: string;
|
|
41
|
-
url: string;
|
|
42
|
-
snippet: string;
|
|
43
|
-
source: "brave" | "duckduckgo";
|
|
44
|
-
};
|
|
45
|
-
|
|
46
|
-
// ---------------------------------------------------------------------------
|
|
47
|
-
// Constants
|
|
48
|
-
// ---------------------------------------------------------------------------
|
|
49
|
-
|
|
50
|
-
const MAX_SEARCH_RESULTS = 4;
|
|
51
|
-
const MAX_SEARCH_SNIPPET_CHARS = 160;
|
|
52
|
-
const MAX_MARKDOWN_CHARS = 200_000;
|
|
53
|
-
const MAX_FETCH_BYTES = 5_000_000;
|
|
54
|
-
const PREVIEW_CHARS = 500;
|
|
55
|
-
const ALLOWED_CONTENT_TYPES = [
|
|
56
|
-
"text/html",
|
|
57
|
-
"application/xhtml+xml",
|
|
58
|
-
"text/plain",
|
|
59
|
-
"application/xml",
|
|
60
|
-
"text/xml",
|
|
61
|
-
];
|
|
62
|
-
|
|
63
|
-
// ---------------------------------------------------------------------------
|
|
64
|
-
// Tool factory
|
|
65
|
-
// ---------------------------------------------------------------------------
|
|
66
|
-
|
|
67
|
-
export function createWebTool() {
|
|
68
|
-
return {
|
|
69
|
-
name: "web",
|
|
70
|
-
label: "Web",
|
|
71
|
-
description:
|
|
72
|
-
"Perform web operations: search or fetch pages. Pass an array of ops: [{ o: 'search', q: '...' }] or [{ o: 'fetch', u: '...', f: 'markdown' }].",
|
|
73
|
-
promptSnippet: "Search the web or fetch a webpage when local files are insufficient",
|
|
74
|
-
promptGuidelines: [
|
|
75
|
-
"Pass ops as an array: [{ o: 'search', q: '<query>' }] to find pages.",
|
|
76
|
-
"Pass ops as an array: [{ o: 'fetch', u: '<url>', f: 'markdown' }] to download a URL. Content is saved to a temp file — use the read tool to access it in chunks.",
|
|
77
|
-
"The tool returns the file path, title, content length, and a short preview of the content when fetching.",
|
|
78
|
-
"Do NOT ask the web tool a question directly. Search or fetch first, then read the results or file to find what you need.",
|
|
79
|
-
],
|
|
80
|
-
parameters: webSchema,
|
|
81
|
-
|
|
82
|
-
async execute(
|
|
83
|
-
_toolCallId: string,
|
|
84
|
-
params: WebParams,
|
|
85
|
-
signal: AbortSignal | undefined,
|
|
86
|
-
_onUpdate: unknown,
|
|
87
|
-
ctx: ExtensionContext,
|
|
88
|
-
) {
|
|
89
|
-
return runWebOps(params, ctx, signal);
|
|
90
|
-
},
|
|
91
|
-
};
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
// ---------------------------------------------------------------------------
|
|
95
|
-
// Active tool management
|
|
96
|
-
// ---------------------------------------------------------------------------
|
|
97
|
-
|
|
98
|
-
export function setWebToolsActive(pi: ExtensionAPI, enabled: boolean) {
|
|
99
|
-
const active = new Set(pi.getActiveTools());
|
|
100
|
-
if (enabled) active.add("web");
|
|
101
|
-
else active.delete("web");
|
|
102
|
-
pi.setActiveTools([...active]);
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
// ---------------------------------------------------------------------------
|
|
106
|
-
// Core dispatch
|
|
107
|
-
// ---------------------------------------------------------------------------
|
|
108
|
-
|
|
109
|
-
async function runWebOps(params: WebParams, ctx: ExtensionContext, signal?: AbortSignal) {
|
|
110
|
-
const parts: string[] = [];
|
|
111
|
-
const details: Array<Record<string, unknown>> = [];
|
|
112
|
-
|
|
113
|
-
for (const op of params.op) {
|
|
114
|
-
if (op.o === "search") {
|
|
115
|
-
const result = await runWebSearch({ query: op.q }, ctx, signal);
|
|
116
|
-
parts.push(result.content[0].text);
|
|
117
|
-
details.push({ o: "search", q: op.q, ...result.details });
|
|
118
|
-
} else {
|
|
119
|
-
const result = await runWebFetch({ url: op.u, format: op.f }, ctx, signal);
|
|
120
|
-
parts.push(result.content[0].text);
|
|
121
|
-
details.push({ o: "fetch", u: op.u, f: op.f, ...result.details });
|
|
122
|
-
}
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
const text = parts.join("\n\n---\n\n");
|
|
126
|
-
const truncated = truncateHead(text, {
|
|
127
|
-
maxLines: DEFAULT_MAX_LINES,
|
|
128
|
-
maxBytes: DEFAULT_MAX_BYTES,
|
|
129
|
-
}).content;
|
|
130
|
-
|
|
131
|
-
return {
|
|
132
|
-
content: [{ type: "text" as const, text: truncated }],
|
|
133
|
-
details: { ops: details },
|
|
134
|
-
};
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
// ---------------------------------------------------------------------------
|
|
138
|
-
// Search
|
|
139
|
-
// ---------------------------------------------------------------------------
|
|
140
|
-
|
|
141
|
-
async function runWebSearch(
|
|
142
|
-
params: { query: string },
|
|
143
|
-
_ctx: ExtensionContext,
|
|
144
|
-
signal?: AbortSignal,
|
|
145
|
-
) {
|
|
146
|
-
const { results, errors } = await searchKeyless(params.query, signal);
|
|
147
|
-
|
|
148
|
-
let text: string;
|
|
149
|
-
if (results.length > 0) {
|
|
150
|
-
text = results
|
|
151
|
-
.map((result, index) => {
|
|
152
|
-
const snippet = result.snippet ? `\n ${result.snippet}` : "";
|
|
153
|
-
return `${index + 1}. ${result.title}\n ${result.url}${snippet}`;
|
|
154
|
-
})
|
|
155
|
-
.join("\n\n");
|
|
156
|
-
} else if (errors.length > 0) {
|
|
157
|
-
text = `Search failed for: ${params.query}\n\nAll search providers returned errors:\n${errors.map((e) => `- ${e}`).join("\n")}`;
|
|
158
|
-
} else {
|
|
159
|
-
text = `No results found for: ${params.query}`;
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
text = truncateHead(text, { maxLines: DEFAULT_MAX_LINES, maxBytes: DEFAULT_MAX_BYTES }).content;
|
|
163
|
-
|
|
164
|
-
return {
|
|
165
|
-
content: [{ type: "text" as const, text }],
|
|
166
|
-
details: {
|
|
167
|
-
query: params.query,
|
|
168
|
-
results,
|
|
169
|
-
errors: errors.length > 0 ? errors : undefined,
|
|
170
|
-
},
|
|
171
|
-
};
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
async function searchKeyless(
|
|
175
|
-
query: string,
|
|
176
|
-
signal?: AbortSignal,
|
|
177
|
-
): Promise<{ results: SearchResult[]; errors: string[] }> {
|
|
178
|
-
const errors: string[] = [];
|
|
179
|
-
|
|
180
|
-
try {
|
|
181
|
-
const brave = await braveSearchHtml(query, signal);
|
|
182
|
-
if (brave.length > 0) return { results: brave, errors: [] };
|
|
183
|
-
} catch (err) {
|
|
184
|
-
errors.push(`Brave: ${err instanceof Error ? err.message : String(err)}`);
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
try {
|
|
188
|
-
const ddg = await duckDuckGoHtmlSearch(query, signal);
|
|
189
|
-
if (ddg.length > 0) return { results: ddg, errors: [] };
|
|
190
|
-
} catch (err) {
|
|
191
|
-
errors.push(`DuckDuckGo: ${err instanceof Error ? err.message : String(err)}`);
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
if (errors.length > 0) {
|
|
195
|
-
console.warn(`[web] searchKeyless failed:\n${errors.join("\n")}`);
|
|
196
|
-
}
|
|
197
|
-
|
|
198
|
-
return { results: [], errors };
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
async function braveSearchHtml(query: string, signal?: AbortSignal): Promise<SearchResult[]> {
|
|
202
|
-
const url = new URL("https://search.brave.com/search");
|
|
203
|
-
url.searchParams.set("q", query);
|
|
204
|
-
url.searchParams.set("source", "web");
|
|
205
|
-
|
|
206
|
-
const response = await fetch(url.toString(), {
|
|
207
|
-
redirect: "follow",
|
|
208
|
-
signal,
|
|
209
|
-
headers: browserHeaders(),
|
|
210
|
-
});
|
|
211
|
-
|
|
212
|
-
if (!response.ok) {
|
|
213
|
-
throw new Error(`Brave search failed with status ${response.status}`);
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
const html = await response.text();
|
|
217
|
-
const dom = new JSDOM(html, { url: url.toString() });
|
|
218
|
-
const document = dom.window.document;
|
|
219
|
-
|
|
220
|
-
const anchors: HTMLAnchorElement[] = Array.from(
|
|
221
|
-
document.querySelectorAll<HTMLAnchorElement>(
|
|
222
|
-
[
|
|
223
|
-
"a[data-testid='result-title-a']",
|
|
224
|
-
".snippet.fdb a",
|
|
225
|
-
".result h2 a",
|
|
226
|
-
"a.heading-serpresult",
|
|
227
|
-
].join(", "),
|
|
228
|
-
),
|
|
229
|
-
);
|
|
230
|
-
|
|
231
|
-
const results: SearchResult[] = [];
|
|
232
|
-
const seen = new Set<string>();
|
|
233
|
-
|
|
234
|
-
for (const anchor of anchors) {
|
|
235
|
-
const href = anchor.href?.trim();
|
|
236
|
-
const title = anchor.textContent?.replace(/\s+/g, " ").trim() ?? "";
|
|
237
|
-
|
|
238
|
-
if (!href || !title) continue;
|
|
239
|
-
if (!/^https?:\/\//i.test(href)) continue;
|
|
240
|
-
if (href.includes("search.brave.com")) continue;
|
|
241
|
-
if (seen.has(href)) continue;
|
|
242
|
-
|
|
243
|
-
const container =
|
|
244
|
-
anchor.closest("[data-type='web']") ??
|
|
245
|
-
anchor.closest(".snippet") ??
|
|
246
|
-
anchor.closest(".fdb") ??
|
|
247
|
-
anchor.parentElement;
|
|
248
|
-
|
|
249
|
-
const snippet = extractSnippet(container?.textContent ?? "", title);
|
|
250
|
-
|
|
251
|
-
seen.add(href);
|
|
252
|
-
results.push({
|
|
253
|
-
title,
|
|
254
|
-
url: href,
|
|
255
|
-
snippet,
|
|
256
|
-
source: "brave",
|
|
257
|
-
});
|
|
258
|
-
|
|
259
|
-
if (results.length >= MAX_SEARCH_RESULTS) break;
|
|
260
|
-
}
|
|
261
|
-
|
|
262
|
-
return results;
|
|
263
|
-
}
|
|
264
|
-
|
|
265
|
-
async function duckDuckGoHtmlSearch(query: string, signal?: AbortSignal): Promise<SearchResult[]> {
|
|
266
|
-
const url = new URL("https://html.duckduckgo.com/html/");
|
|
267
|
-
url.searchParams.set("q", query);
|
|
268
|
-
|
|
269
|
-
const response = await fetch(url.toString(), {
|
|
270
|
-
method: "GET",
|
|
271
|
-
redirect: "follow",
|
|
272
|
-
signal,
|
|
273
|
-
headers: browserHeaders(),
|
|
274
|
-
});
|
|
275
|
-
|
|
276
|
-
if (!response.ok) {
|
|
277
|
-
throw new Error(`DuckDuckGo search failed with status ${response.status}`);
|
|
278
|
-
}
|
|
279
|
-
|
|
280
|
-
const html = await response.text();
|
|
281
|
-
const dom = new JSDOM(html, { url: url.toString() });
|
|
282
|
-
const document = dom.window.document;
|
|
283
|
-
|
|
284
|
-
const results: SearchResult[] = [];
|
|
285
|
-
const seen = new Set<string>();
|
|
286
|
-
|
|
287
|
-
const items: Element[] = Array.from(document.querySelectorAll<Element>(".result"));
|
|
288
|
-
|
|
289
|
-
for (const item of items) {
|
|
290
|
-
const titleAnchor = item.querySelector(
|
|
291
|
-
".result__title a, a.result__a",
|
|
292
|
-
) as HTMLAnchorElement | null;
|
|
293
|
-
if (!titleAnchor) continue;
|
|
294
|
-
|
|
295
|
-
const href = titleAnchor.href?.trim();
|
|
296
|
-
const title = titleAnchor.textContent?.replace(/\s+/g, " ").trim() ?? "";
|
|
297
|
-
|
|
298
|
-
if (!href || !title) continue;
|
|
299
|
-
if (!/^https?:\/\//i.test(href)) continue;
|
|
300
|
-
if (seen.has(href)) continue;
|
|
301
|
-
|
|
302
|
-
const snippetNode =
|
|
303
|
-
item.querySelector(".result__snippet") ??
|
|
304
|
-
item.querySelector(".result__body") ??
|
|
305
|
-
item.querySelector(".result__extras");
|
|
306
|
-
|
|
307
|
-
const snippet = extractSnippet(snippetNode?.textContent ?? "", title);
|
|
308
|
-
|
|
309
|
-
seen.add(href);
|
|
310
|
-
results.push({
|
|
311
|
-
title,
|
|
312
|
-
url: href,
|
|
313
|
-
snippet,
|
|
314
|
-
source: "duckduckgo",
|
|
315
|
-
});
|
|
316
|
-
|
|
317
|
-
if (results.length >= MAX_SEARCH_RESULTS) break;
|
|
318
|
-
}
|
|
319
|
-
|
|
320
|
-
return results;
|
|
321
|
-
}
|
|
322
|
-
|
|
323
|
-
// ---------------------------------------------------------------------------
|
|
324
|
-
// Fetch
|
|
325
|
-
// ---------------------------------------------------------------------------
|
|
326
|
-
|
|
327
|
-
async function runWebFetch(
|
|
328
|
-
params: { url: string; format?: string },
|
|
329
|
-
ctx: ExtensionContext,
|
|
330
|
-
signal?: AbortSignal,
|
|
331
|
-
) {
|
|
332
|
-
validateFetchUrl(params.url);
|
|
333
|
-
|
|
334
|
-
const format = params.format ?? "markdown";
|
|
335
|
-
const html = await fetchHtml(params.url, signal);
|
|
336
|
-
|
|
337
|
-
let content: string;
|
|
338
|
-
let title: string;
|
|
339
|
-
let ext: string;
|
|
340
|
-
|
|
341
|
-
if (format === "html") {
|
|
342
|
-
content = html;
|
|
343
|
-
title = extractTitle(html, params.url);
|
|
344
|
-
ext = ".html";
|
|
345
|
-
} else if (format === "text") {
|
|
346
|
-
const result = htmlToMarkdown(html, params.url);
|
|
347
|
-
title = result.title;
|
|
348
|
-
content = stripMarkdownFormatting(result.markdown);
|
|
349
|
-
ext = ".txt";
|
|
350
|
-
} else {
|
|
351
|
-
const result = htmlToMarkdown(html, params.url);
|
|
352
|
-
title = result.title;
|
|
353
|
-
content = result.markdown;
|
|
354
|
-
ext = ".md";
|
|
355
|
-
}
|
|
356
|
-
|
|
357
|
-
if (content.length > MAX_MARKDOWN_CHARS) {
|
|
358
|
-
content = trimLargeDocument(content, MAX_MARKDOWN_CHARS);
|
|
359
|
-
}
|
|
360
|
-
|
|
361
|
-
const sessionDir = ctx.sessionManager.getSessionDir();
|
|
362
|
-
const filePath = await writeTempFile(sessionDir, params.url, content, ext);
|
|
363
|
-
const preview = content.slice(0, PREVIEW_CHARS).trim();
|
|
364
|
-
|
|
365
|
-
let warning: string | undefined;
|
|
366
|
-
if (content.length === 0) {
|
|
367
|
-
warning = "Warning: no readable content was extracted from this page.";
|
|
368
|
-
} else if (content.length < 100) {
|
|
369
|
-
warning = "Warning: very little content was extracted from this page.";
|
|
370
|
-
}
|
|
371
|
-
|
|
372
|
-
const text = [
|
|
373
|
-
warning,
|
|
374
|
-
`File: ${filePath}`,
|
|
375
|
-
title ? `Title: ${title}` : undefined,
|
|
376
|
-
`Content length: ${content.length} chars`,
|
|
377
|
-
"",
|
|
378
|
-
"Preview:",
|
|
379
|
-
preview,
|
|
380
|
-
]
|
|
381
|
-
.filter((line) => line != null)
|
|
382
|
-
.join("\n");
|
|
383
|
-
|
|
384
|
-
const truncated = truncateHead(text, {
|
|
385
|
-
maxLines: DEFAULT_MAX_LINES,
|
|
386
|
-
maxBytes: DEFAULT_MAX_BYTES,
|
|
387
|
-
}).content;
|
|
388
|
-
|
|
389
|
-
return {
|
|
390
|
-
content: [{ type: "text" as const, text: truncated }],
|
|
391
|
-
details: {
|
|
392
|
-
url: params.url,
|
|
393
|
-
title,
|
|
394
|
-
filePath,
|
|
395
|
-
contentLength: content.length,
|
|
396
|
-
format,
|
|
397
|
-
},
|
|
398
|
-
};
|
|
399
|
-
}
|
|
400
|
-
|
|
401
|
-
async function fetchHtml(url: string, signal?: AbortSignal): Promise<string> {
|
|
402
|
-
// Tier 1: direct fetch
|
|
403
|
-
try {
|
|
404
|
-
const response = await fetch(url, {
|
|
405
|
-
redirect: "follow",
|
|
406
|
-
signal,
|
|
407
|
-
headers: browserHeaders(),
|
|
408
|
-
});
|
|
409
|
-
|
|
410
|
-
if (response.ok) {
|
|
411
|
-
const contentType = (response.headers.get("content-type") ?? "").toLowerCase();
|
|
412
|
-
if (!contentType || ALLOWED_CONTENT_TYPES.some((t) => contentType.includes(t))) {
|
|
413
|
-
const contentLength = Number(response.headers.get("content-length") || "0");
|
|
414
|
-
if (contentLength <= MAX_FETCH_BYTES) {
|
|
415
|
-
const text = await response.text();
|
|
416
|
-
if (text.length <= MAX_FETCH_BYTES && text.length >= 100) {
|
|
417
|
-
return text;
|
|
418
|
-
}
|
|
419
|
-
}
|
|
420
|
-
}
|
|
421
|
-
}
|
|
422
|
-
} catch {
|
|
423
|
-
// Fall through to jina.ai
|
|
424
|
-
}
|
|
425
|
-
|
|
426
|
-
// Tier 2: jina.ai summarizer/extractor
|
|
427
|
-
try {
|
|
428
|
-
const jinaUrl = `https://r.jina.ai/http://${url.replace(/^https?:\/\//, "")}`;
|
|
429
|
-
const jinaResponse = await fetch(jinaUrl, {
|
|
430
|
-
redirect: "follow",
|
|
431
|
-
signal,
|
|
432
|
-
headers: browserHeaders(),
|
|
433
|
-
});
|
|
434
|
-
|
|
435
|
-
if (jinaResponse.ok) {
|
|
436
|
-
const text = await jinaResponse.text();
|
|
437
|
-
if (text.length <= MAX_FETCH_BYTES && text.length >= 100) {
|
|
438
|
-
return text;
|
|
439
|
-
}
|
|
440
|
-
}
|
|
441
|
-
} catch {
|
|
442
|
-
// Fall through to curl
|
|
443
|
-
}
|
|
444
|
-
|
|
445
|
-
// Tier 3: curl subprocess (last effort)
|
|
446
|
-
const curlText = await fetchWithCurl(url, signal);
|
|
447
|
-
if (curlText.length >= 100) {
|
|
448
|
-
return curlText;
|
|
449
|
-
}
|
|
450
|
-
|
|
451
|
-
throw new Error(`Failed to fetch URL (direct + jina.ai + curl all failed). URL: ${url}`);
|
|
452
|
-
}
|
|
453
|
-
|
|
454
|
-
function fetchWithCurl(url: string, signal?: AbortSignal): Promise<string> {
|
|
455
|
-
return new Promise((resolve, reject) => {
|
|
456
|
-
const child = execFile(
|
|
457
|
-
"curl",
|
|
458
|
-
["-sL", "--max-time", "30", "-A", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36", url],
|
|
459
|
-
{ maxBuffer: MAX_FETCH_BYTES },
|
|
460
|
-
(error, stdout, stderr) => {
|
|
461
|
-
if (error) {
|
|
462
|
-
reject(error);
|
|
463
|
-
return;
|
|
464
|
-
}
|
|
465
|
-
resolve(stdout);
|
|
466
|
-
},
|
|
467
|
-
);
|
|
468
|
-
|
|
469
|
-
if (signal) {
|
|
470
|
-
signal.addEventListener("abort", () => {
|
|
471
|
-
child.kill("SIGTERM");
|
|
472
|
-
reject(new Error("Aborted"));
|
|
473
|
-
});
|
|
474
|
-
}
|
|
475
|
-
});
|
|
476
|
-
}
|
|
477
|
-
|
|
478
|
-
// ---------------------------------------------------------------------------
|
|
479
|
-
// Utilities (exported for testing)
|
|
480
|
-
// ---------------------------------------------------------------------------
|
|
481
|
-
|
|
482
|
-
export function extractSnippet(raw: string, title: string): string {
|
|
483
|
-
let text = raw.replace(/\s+/g, " ").trim();
|
|
484
|
-
if (!text || text === title) return "";
|
|
485
|
-
if (text.startsWith(title)) text = text.slice(title.length).trim();
|
|
486
|
-
if (!text) return "";
|
|
487
|
-
return text.slice(0, MAX_SEARCH_SNIPPET_CHARS);
|
|
488
|
-
}
|
|
489
|
-
|
|
490
|
-
export function validateFetchUrl(url: string): void {
|
|
491
|
-
let parsed: URL;
|
|
492
|
-
try {
|
|
493
|
-
parsed = new URL(url);
|
|
494
|
-
} catch {
|
|
495
|
-
throw new Error(`Invalid URL: ${url}`);
|
|
496
|
-
}
|
|
497
|
-
|
|
498
|
-
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
|
|
499
|
-
throw new Error(
|
|
500
|
-
`Unsupported URL scheme "${parsed.protocol}" — only http: and https: are allowed`,
|
|
501
|
-
);
|
|
502
|
-
}
|
|
503
|
-
|
|
504
|
-
const hostname = parsed.hostname;
|
|
505
|
-
if (
|
|
506
|
-
hostname === "localhost" ||
|
|
507
|
-
hostname === "127.0.0.1" ||
|
|
508
|
-
hostname === "0.0.0.0" ||
|
|
509
|
-
hostname === "[::1]" ||
|
|
510
|
-
hostname.startsWith("169.254.") ||
|
|
511
|
-
hostname.startsWith("10.") ||
|
|
512
|
-
hostname.startsWith("192.168.") ||
|
|
513
|
-
/^172\.(1[6-9]|2\d|3[01])\./.test(hostname) ||
|
|
514
|
-
hostname.endsWith(".local") ||
|
|
515
|
-
hostname.endsWith(".internal")
|
|
516
|
-
) {
|
|
517
|
-
throw new Error(`Blocked: "${hostname}" looks like a private or internal address`);
|
|
518
|
-
}
|
|
519
|
-
}
|
|
520
|
-
|
|
521
|
-
export function htmlToMarkdown(html: string, baseUrl: string): { title: string; markdown: string } {
|
|
522
|
-
const dom = new JSDOM(html, { url: baseUrl });
|
|
523
|
-
const document = dom.window.document;
|
|
524
|
-
|
|
525
|
-
for (const selector of [
|
|
526
|
-
"script",
|
|
527
|
-
"style",
|
|
528
|
-
"noscript",
|
|
529
|
-
"iframe",
|
|
530
|
-
"svg",
|
|
531
|
-
"canvas",
|
|
532
|
-
"form",
|
|
533
|
-
"nav",
|
|
534
|
-
"aside",
|
|
535
|
-
"footer",
|
|
536
|
-
"header",
|
|
537
|
-
]) {
|
|
538
|
-
document.querySelectorAll(selector).forEach((el: Element) => el.remove());
|
|
539
|
-
}
|
|
540
|
-
|
|
541
|
-
const main =
|
|
542
|
-
document.querySelector("main") ??
|
|
543
|
-
document.querySelector("article") ??
|
|
544
|
-
document.querySelector("[role='main']") ??
|
|
545
|
-
document.body;
|
|
546
|
-
|
|
547
|
-
const turndown = new TurndownService({
|
|
548
|
-
headingStyle: "atx",
|
|
549
|
-
codeBlockStyle: "fenced",
|
|
550
|
-
bulletListMarker: "-",
|
|
551
|
-
});
|
|
552
|
-
|
|
553
|
-
turndown.remove(["script", "style", "noscript", "iframe", "canvas"]);
|
|
554
|
-
|
|
555
|
-
const title = (document.title || "").trim();
|
|
556
|
-
const markdown = turndown.turndown(main?.innerHTML || "");
|
|
557
|
-
|
|
558
|
-
return {
|
|
559
|
-
title,
|
|
560
|
-
markdown: markdown.replace(/\n{3,}/g, "\n\n").trim(),
|
|
561
|
-
};
|
|
562
|
-
}
|
|
563
|
-
|
|
564
|
-
export function trimLargeDocument(markdown: string, maxChars: number): string {
|
|
565
|
-
if (markdown.length <= maxChars) return markdown;
|
|
566
|
-
|
|
567
|
-
const marker = "\n\n[...content trimmed...]\n\n";
|
|
568
|
-
const budget = maxChars - marker.length;
|
|
569
|
-
const headSize = Math.floor(budget * 0.75);
|
|
570
|
-
const tailSize = budget - headSize;
|
|
571
|
-
|
|
572
|
-
const head = markdown.slice(0, headSize).trimEnd();
|
|
573
|
-
const tail = markdown.slice(-tailSize).trimStart();
|
|
574
|
-
|
|
575
|
-
return `${head}${marker}${tail}`.slice(0, maxChars);
|
|
576
|
-
}
|
|
577
|
-
|
|
578
|
-
function extractTitle(html: string, _baseUrl: string): string {
|
|
579
|
-
const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
|
|
580
|
-
return match ? match[1].replace(/\s+/g, " ").trim() : "";
|
|
581
|
-
}
|
|
582
|
-
|
|
583
|
-
export function stripMarkdownFormatting(markdown: string): string {
|
|
584
|
-
return markdown
|
|
585
|
-
.replace(/^#{1,6}\s+/gm, "") // headings
|
|
586
|
-
.replace(/```[\s\S]*?```/g, (m) => m.replace(/```\w*\n?/g, "").trim()) // fenced code blocks
|
|
587
|
-
.replace(/\*\*([^*]+)\*\*/g, "$1") // bold
|
|
588
|
-
.replace(/\*([^*]+)\*/g, "$1") // italic
|
|
589
|
-
.replace(/`([^`]+)`/g, "$1") // inline code
|
|
590
|
-
.replace(/!\[([^\]]*)\]\([^)]+\)/g, "$1") // images
|
|
591
|
-
.replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") // links
|
|
592
|
-
.replace(/^\d+\.\s+/gm, "") // numbered lists
|
|
593
|
-
.replace(/^[-*+]\s+/gm, "") // unordered list markers
|
|
594
|
-
.replace(/^>\s+/gm, "") // blockquotes
|
|
595
|
-
.replace(/^---+$/gm, "") // horizontal rules
|
|
596
|
-
.replace(/^\|.*\|$/gm, (row) =>
|
|
597
|
-
/^[\s|:-]+$/.test(row)
|
|
598
|
-
? ""
|
|
599
|
-
: row
|
|
600
|
-
.replace(/^\||\|$/g, "")
|
|
601
|
-
.replace(/\|/g, " — ")
|
|
602
|
-
.trim(),
|
|
603
|
-
) // tables
|
|
604
|
-
.replace(/&/g, "&")
|
|
605
|
-
.replace(/</g, "<")
|
|
606
|
-
.replace(/>/g, ">")
|
|
607
|
-
.replace(/"/g, '"')
|
|
608
|
-
.replace(/'/g, "'")
|
|
609
|
-
.replace(/\n{3,}/g, "\n\n")
|
|
610
|
-
.trim();
|
|
611
|
-
}
|
|
612
|
-
|
|
613
|
-
export function urlToHash(url: string): string {
|
|
614
|
-
return createHash("sha256").update(url).digest("hex").slice(0, 12);
|
|
615
|
-
}
|
|
616
|
-
|
|
617
|
-
async function writeTempFile(
|
|
618
|
-
sessionDir: string,
|
|
619
|
-
url: string,
|
|
620
|
-
content: string,
|
|
621
|
-
ext: string,
|
|
622
|
-
): Promise<string> {
|
|
623
|
-
const dir = join(sessionDir, "tmp");
|
|
624
|
-
await mkdir(dir, { recursive: true });
|
|
625
|
-
const hash = urlToHash(url);
|
|
626
|
-
const filePath = join(dir, `fetch-${hash}${ext}`);
|
|
627
|
-
await writeFile(filePath, content, "utf-8");
|
|
628
|
-
return filePath;
|
|
629
|
-
}
|
|
630
|
-
|
|
631
|
-
export function looksLikeUrlPrompt(prompt: string | undefined): boolean {
|
|
632
|
-
if (!prompt) return false;
|
|
633
|
-
return /(https?:\/\/\S+|www\.\S+)/i.test(prompt);
|
|
634
|
-
}
|
|
635
|
-
|
|
636
|
-
export function looksLikeWebSearchPrompt(prompt: string | undefined): boolean {
|
|
637
|
-
if (!prompt) return false;
|
|
638
|
-
const text = prompt.toLowerCase();
|
|
639
|
-
|
|
640
|
-
const patterns = [
|
|
641
|
-
/\b(search the web|look online|find online|search online|web search)\b/,
|
|
642
|
-
/\b(official documentation|official docs|api docs|api reference)\b/,
|
|
643
|
-
/\b(latest version|latest release|release notes|what's new)\b/,
|
|
644
|
-
/\b(current price|current status|today's|yesterday's|this week's)\b/,
|
|
645
|
-
/\bnews about\b/,
|
|
646
|
-
/\bwhat changed in\b/,
|
|
647
|
-
/\bup to date\b/,
|
|
648
|
-
/\bon the web\b/,
|
|
649
|
-
/\bgoogle\s+(for|how|what|why|when)\b/,
|
|
650
|
-
/\b(find|look up|check)\s+.{0,20}\b(online|on the web|on the internet)\b/,
|
|
651
|
-
];
|
|
652
|
-
|
|
653
|
-
return patterns.some((re) => re.test(text));
|
|
654
|
-
}
|
|
655
|
-
|
|
656
|
-
function browserHeaders(): HeadersInit {
|
|
657
|
-
return {
|
|
658
|
-
"User-Agent":
|
|
659
|
-
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
|
|
660
|
-
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
661
|
-
"Accept-Language": "en-US,en;q=0.9",
|
|
662
|
-
};
|
|
663
|
-
}
|