@aaroncql/pim-agent 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +212 -0
- package/bin/pim.ts +109 -0
- package/package.json +49 -0
- package/src/extensions/_init/index.ts +109 -0
- package/src/extensions/bash/capture.test.ts +126 -0
- package/src/extensions/bash/capture.ts +80 -0
- package/src/extensions/bash/format.test.ts +240 -0
- package/src/extensions/bash/format.ts +76 -0
- package/src/extensions/bash/index.ts +86 -0
- package/src/extensions/bash/run.test.ts +262 -0
- package/src/extensions/bash/run.ts +207 -0
- package/src/extensions/bash/schema.ts +54 -0
- package/src/extensions/command-picker/index.ts +52 -0
- package/src/extensions/command-picker/ranker.test.ts +46 -0
- package/src/extensions/command-picker/ranker.ts +17 -0
- package/src/extensions/edit/edit.test.ts +285 -0
- package/src/extensions/edit/edit.ts +382 -0
- package/src/extensions/edit/index.ts +54 -0
- package/src/extensions/edit/schema.ts +37 -0
- package/src/extensions/file-picker/catalog.test.ts +263 -0
- package/src/extensions/file-picker/catalog.ts +219 -0
- package/src/extensions/file-picker/index.test.ts +168 -0
- package/src/extensions/file-picker/index.ts +119 -0
- package/src/extensions/file-picker/ranker.test.ts +94 -0
- package/src/extensions/file-picker/ranker.ts +76 -0
- package/src/extensions/footer/git.test.ts +76 -0
- package/src/extensions/footer/git.ts +87 -0
- package/src/extensions/footer/index.test.ts +161 -0
- package/src/extensions/footer/index.ts +148 -0
- package/src/extensions/footer/powerline.ts +87 -0
- package/src/extensions/footer/segments.test.ts +164 -0
- package/src/extensions/footer/segments.ts +234 -0
- package/src/extensions/glob/glob.test.ts +171 -0
- package/src/extensions/glob/glob.ts +34 -0
- package/src/extensions/glob/index.test.ts +68 -0
- package/src/extensions/glob/index.ts +136 -0
- package/src/extensions/glob/render.test.ts +126 -0
- package/src/extensions/glob/render.ts +74 -0
- package/src/extensions/glob/schema.ts +52 -0
- package/src/extensions/grep/grep.test.ts +387 -0
- package/src/extensions/grep/grep.ts +215 -0
- package/src/extensions/grep/index.test.ts +68 -0
- package/src/extensions/grep/index.ts +158 -0
- package/src/extensions/grep/render.test.ts +269 -0
- package/src/extensions/grep/render.ts +243 -0
- package/src/extensions/grep/schema.ts +92 -0
- package/src/extensions/read/index.ts +84 -0
- package/src/extensions/read/read.test.ts +177 -0
- package/src/extensions/read/read.ts +206 -0
- package/src/extensions/read/render.test.ts +61 -0
- package/src/extensions/read/render.ts +33 -0
- package/src/extensions/read/schema.ts +27 -0
- package/src/extensions/subagent/index.test.ts +44 -0
- package/src/extensions/subagent/index.ts +30 -0
- package/src/extensions/subagent/render.test.ts +292 -0
- package/src/extensions/subagent/render.ts +359 -0
- package/src/extensions/subagent/schema.ts +9 -0
- package/src/extensions/subagent/subagent.test.ts +315 -0
- package/src/extensions/subagent/subagent.ts +418 -0
- package/src/extensions/system-prompt/index.ts +28 -0
- package/src/extensions/system-prompt/prompt.test.ts +64 -0
- package/src/extensions/system-prompt/prompt.ts +213 -0
- package/src/extensions/todo/index.test.ts +244 -0
- package/src/extensions/todo/index.ts +122 -0
- package/src/extensions/todo/render.test.ts +180 -0
- package/src/extensions/todo/render.ts +172 -0
- package/src/extensions/todo/schema.ts +24 -0
- package/src/extensions/todo/todo.test.ts +222 -0
- package/src/extensions/todo/todo.ts +188 -0
- package/src/extensions/tps/index.test.ts +254 -0
- package/src/extensions/tps/index.ts +136 -0
- package/src/extensions/web-fetch/JinaReaderClient.ts +230 -0
- package/src/extensions/web-fetch/WebViewFetchClient.ts +186 -0
- package/src/extensions/web-fetch/WebViewMarkdownSnapshot.test.ts +119 -0
- package/src/extensions/web-fetch/WebViewMarkdownSnapshot.ts +511 -0
- package/src/extensions/web-fetch/fetch.test.ts +244 -0
- package/src/extensions/web-fetch/fetch.ts +249 -0
- package/src/extensions/web-fetch/index.ts +107 -0
- package/src/extensions/web-fetch/render.test.ts +56 -0
- package/src/extensions/web-fetch/render.ts +39 -0
- package/src/extensions/web-fetch/schema.ts +23 -0
- package/src/extensions/web-search/ExaMcpClient.test.ts +143 -0
- package/src/extensions/web-search/ExaMcpClient.ts +258 -0
- package/src/extensions/web-search/index.ts +118 -0
- package/src/extensions/web-search/render.test.ts +21 -0
- package/src/extensions/web-search/render.ts +9 -0
- package/src/extensions/web-search/schema.ts +21 -0
- package/src/extensions/web-search/search.test.ts +53 -0
- package/src/extensions/web-search/search.ts +23 -0
- package/src/extensions/working-indicator/index.test.ts +21 -0
- package/src/extensions/working-indicator/index.ts +77 -0
- package/src/extensions/write/index.ts +76 -0
- package/src/extensions/write/render.test.ts +64 -0
- package/src/extensions/write/schema.ts +14 -0
- package/src/extensions/write/write.test.ts +108 -0
- package/src/extensions/write/write.ts +104 -0
- package/src/shared/DiffLines.test.ts +193 -0
- package/src/shared/DiffLines.ts +307 -0
- package/src/shared/DiffRenderer.test.ts +206 -0
- package/src/shared/DiffRenderer.ts +396 -0
- package/src/shared/DiffView.ts +199 -0
- package/src/shared/EditMatcher.test.ts +123 -0
- package/src/shared/EditMatcher.ts +826 -0
- package/src/shared/FileScanner.test.ts +158 -0
- package/src/shared/FileScanner.ts +41 -0
- package/src/shared/Fs.ts +46 -0
- package/src/shared/FsErrors.ts +72 -0
- package/src/shared/FuzzyMatcher.test.ts +114 -0
- package/src/shared/FuzzyMatcher.ts +73 -0
- package/src/shared/GitignoreFilter.test.ts +64 -0
- package/src/shared/GitignoreFilter.ts +142 -0
- package/src/shared/GlobExclusions.ts +23 -0
- package/src/shared/Levenshtein.ts +33 -0
- package/src/shared/Lines.test.ts +25 -0
- package/src/shared/Lines.ts +77 -0
- package/src/shared/McpClient.test.ts +235 -0
- package/src/shared/McpClient.ts +406 -0
- package/src/shared/OutputBudget.test.ts +99 -0
- package/src/shared/OutputBudget.ts +79 -0
- package/src/shared/Paths.test.ts +51 -0
- package/src/shared/Paths.ts +52 -0
- package/src/shared/PimSettings.test.ts +90 -0
- package/src/shared/PimSettings.ts +124 -0
- package/src/shared/Renderer.test.ts +190 -0
- package/src/shared/Renderer.ts +256 -0
- package/src/shared/SpillCache.test.ts +94 -0
- package/src/shared/SpillCache.ts +89 -0
- package/src/shared/Tools.test.ts +392 -0
- package/src/shared/Tools.ts +636 -0
- package/src/telegram/Bot.ts +198 -0
- package/src/telegram/Commands.ts +721 -0
- package/src/telegram/Config.test.ts +275 -0
- package/src/telegram/Config.ts +162 -0
- package/src/telegram/Markdown.test.ts +143 -0
- package/src/telegram/Markdown.ts +177 -0
- package/src/telegram/Message.ts +211 -0
- package/src/telegram/Renderer.test.ts +216 -0
- package/src/telegram/Renderer.ts +713 -0
- package/src/telegram/SendFileSchema.ts +19 -0
- package/src/telegram/SendFileTool.ts +94 -0
- package/src/telegram/Session.ts +579 -0
- package/src/telegram/SessionRegistry.test.ts +89 -0
- package/src/telegram/SessionRegistry.ts +170 -0
- package/src/telegram/Supervisor.ts +357 -0
- package/src/telegram/TaskScheduler.test.ts +278 -0
- package/src/telegram/TaskScheduler.ts +293 -0
- package/src/telegram/TaskSchema.ts +88 -0
- package/src/telegram/TaskStore.ts +73 -0
- package/src/telegram/TaskTool.test.ts +179 -0
- package/src/telegram/TaskTool.ts +159 -0
- package/src/telegram/TypingIndicator.ts +43 -0
- package/src/telegram/index.ts +32 -0
- package/src/themes/pim-dark.json +84 -0
- package/src/themes/pim-light.json +84 -0
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
import { afterAll, beforeAll, describe, expect, test } from "bun:test";
|
|
2
|
+
import { mkdtemp, rm } from "node:fs/promises";
|
|
3
|
+
import { tmpdir } from "node:os";
|
|
4
|
+
import { join } from "node:path";
|
|
5
|
+
import { SpillCache } from "../../shared/SpillCache";
|
|
6
|
+
import {
|
|
7
|
+
executeFetch,
|
|
8
|
+
formatOutcome,
|
|
9
|
+
truncationFooter,
|
|
10
|
+
validatePublicUrl,
|
|
11
|
+
} from "./fetch";
|
|
12
|
+
import { WEB_FETCH_INLINE_BYTES } from "./schema";
|
|
13
|
+
import type { JinaReaderClient } from "./JinaReaderClient";
|
|
14
|
+
import type { WebViewFetchClient } from "./WebViewFetchClient";
|
|
15
|
+
|
|
16
|
+
let previousPimHomeDir: string | undefined;
|
|
17
|
+
let testPimHomeDir: string | undefined;
|
|
18
|
+
|
|
19
|
+
beforeAll(async () => {
|
|
20
|
+
previousPimHomeDir = process.env.PIM_HOME_DIR;
|
|
21
|
+
testPimHomeDir = await mkdtemp(join(tmpdir(), "pim-fetch-home-"));
|
|
22
|
+
process.env.PIM_HOME_DIR = testPimHomeDir;
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
afterAll(async () => {
|
|
26
|
+
if (previousPimHomeDir === undefined) {
|
|
27
|
+
delete process.env.PIM_HOME_DIR;
|
|
28
|
+
} else {
|
|
29
|
+
process.env.PIM_HOME_DIR = previousPimHomeDir;
|
|
30
|
+
}
|
|
31
|
+
if (testPimHomeDir) {
|
|
32
|
+
await rm(testPimHomeDir, { recursive: true, force: true });
|
|
33
|
+
}
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
describe("validatePublicUrl", () => {
|
|
37
|
+
test("accepts http and https", () => {
|
|
38
|
+
expect(validatePublicUrl("https://example.com/path")).toBe(
|
|
39
|
+
"https://example.com/path"
|
|
40
|
+
);
|
|
41
|
+
expect(validatePublicUrl("http://example.com")).toBe("http://example.com/");
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
test("rejects non-http schemes", () => {
|
|
45
|
+
expect(() => validatePublicUrl("ftp://example.com")).toThrow(/http/);
|
|
46
|
+
expect(() => validatePublicUrl("file:///etc/passwd")).toThrow(/http/);
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
test("rejects malformed URLs", () => {
|
|
50
|
+
expect(() => validatePublicUrl("not a url")).toThrow(/valid/);
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
test("rejects embedded credentials", () => {
|
|
54
|
+
expect(() => validatePublicUrl("https://user:pw@example.com")).toThrow(
|
|
55
|
+
/credentials/
|
|
56
|
+
);
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
test("rejects localhost and .local", () => {
|
|
60
|
+
expect(() => validatePublicUrl("http://localhost/")).toThrow(/public/);
|
|
61
|
+
expect(() => validatePublicUrl("http://printer.local/")).toThrow(/public/);
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
test("rejects RFC1918 IPv4", () => {
|
|
65
|
+
expect(() => validatePublicUrl("http://10.0.0.1/")).toThrow(/public/);
|
|
66
|
+
expect(() => validatePublicUrl("http://192.168.1.1/")).toThrow(/public/);
|
|
67
|
+
expect(() => validatePublicUrl("http://172.16.0.1/")).toThrow(/public/);
|
|
68
|
+
expect(() => validatePublicUrl("http://127.0.0.1/")).toThrow(/public/);
|
|
69
|
+
expect(() => validatePublicUrl("http://169.254.0.1/")).toThrow(/public/);
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
test("rejects IPv6 loopback and link-local", () => {
|
|
73
|
+
expect(() => validatePublicUrl("http://[::1]/")).toThrow(/public/);
|
|
74
|
+
expect(() => validatePublicUrl("http://[fe80::1]/")).toThrow(/public/);
|
|
75
|
+
expect(() => validatePublicUrl("http://[fc00::1]/")).toThrow(/public/);
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
test("accepts public IPs", () => {
|
|
79
|
+
expect(validatePublicUrl("http://8.8.8.8/")).toBe("http://8.8.8.8/");
|
|
80
|
+
});
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
describe("executeFetch", () => {
|
|
84
|
+
test("returns remote markdown when available", async () => {
|
|
85
|
+
const jina = {
|
|
86
|
+
fetchUrl: async () => ({
|
|
87
|
+
title: "Remote",
|
|
88
|
+
url: "https://example.test/remote",
|
|
89
|
+
content: "remote markdown",
|
|
90
|
+
}),
|
|
91
|
+
} as unknown as JinaReaderClient;
|
|
92
|
+
const webView = {
|
|
93
|
+
fetchMarkdown: async () => {
|
|
94
|
+
throw new Error("Rendered markdown should not be attempted.");
|
|
95
|
+
},
|
|
96
|
+
} as unknown as WebViewFetchClient;
|
|
97
|
+
|
|
98
|
+
const outcome = await executeFetch({
|
|
99
|
+
jina,
|
|
100
|
+
webView,
|
|
101
|
+
url: "https://example.test/",
|
|
102
|
+
format: "markdown",
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
expect(outcome.format).toBe("markdown");
|
|
106
|
+
expect(outcome.text).toContain("remote markdown");
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
test("falls back to rendered markdown when remote markdown fails", async () => {
|
|
110
|
+
const jina = {
|
|
111
|
+
fetchUrl: async () => {
|
|
112
|
+
throw new Error("Request timed out after 20000ms.");
|
|
113
|
+
},
|
|
114
|
+
} as unknown as JinaReaderClient;
|
|
115
|
+
const webView = {
|
|
116
|
+
fetchMarkdown: async () => ({
|
|
117
|
+
title: "Rendered",
|
|
118
|
+
url: "https://example.test/rendered",
|
|
119
|
+
content: "# rendered markdown",
|
|
120
|
+
}),
|
|
121
|
+
fetchHtml: async () => {
|
|
122
|
+
throw new Error("HTML should not be attempted for markdown mode.");
|
|
123
|
+
},
|
|
124
|
+
} as unknown as WebViewFetchClient;
|
|
125
|
+
|
|
126
|
+
const outcome = await executeFetch({
|
|
127
|
+
jina,
|
|
128
|
+
webView,
|
|
129
|
+
url: "https://example.test/",
|
|
130
|
+
format: "markdown",
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
expect(outcome.format).toBe("markdown");
|
|
134
|
+
expect(outcome.text).toContain("# rendered markdown");
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
test("throws rendered markdown error when fallback fails", async () => {
|
|
138
|
+
const jina = {
|
|
139
|
+
fetchUrl: async () => {
|
|
140
|
+
throw new Error("remote unavailable");
|
|
141
|
+
},
|
|
142
|
+
} as unknown as JinaReaderClient;
|
|
143
|
+
const webView = {
|
|
144
|
+
fetchMarkdown: async () => {
|
|
145
|
+
throw new Error("Request failed: unavailable");
|
|
146
|
+
},
|
|
147
|
+
} as unknown as WebViewFetchClient;
|
|
148
|
+
|
|
149
|
+
await expect(
|
|
150
|
+
executeFetch({
|
|
151
|
+
jina,
|
|
152
|
+
webView,
|
|
153
|
+
url: "https://example.test/",
|
|
154
|
+
format: "markdown",
|
|
155
|
+
})
|
|
156
|
+
).rejects.toThrow("Failed to fetch: Request failed: unavailable");
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
test("returns raw rendered HTML for HTML mode", async () => {
|
|
160
|
+
const jina = {
|
|
161
|
+
fetchUrl: async () => {
|
|
162
|
+
throw new Error("Remote markdown should not be attempted.");
|
|
163
|
+
},
|
|
164
|
+
} as unknown as JinaReaderClient;
|
|
165
|
+
const webView = {
|
|
166
|
+
fetchHtml: async () => ({
|
|
167
|
+
title: "HTML",
|
|
168
|
+
url: "https://example.test/html",
|
|
169
|
+
content: "<html><body>Hello</body></html>",
|
|
170
|
+
}),
|
|
171
|
+
} as unknown as WebViewFetchClient;
|
|
172
|
+
|
|
173
|
+
const outcome = await executeFetch({
|
|
174
|
+
jina,
|
|
175
|
+
webView,
|
|
176
|
+
url: "https://example.test/",
|
|
177
|
+
format: "html",
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
expect(outcome.format).toBe("html");
|
|
181
|
+
expect(outcome.text).toContain("<html><body>Hello</body></html>");
|
|
182
|
+
});
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
describe("formatOutcome", () => {
|
|
186
|
+
const page = {
|
|
187
|
+
title: "Example",
|
|
188
|
+
url: "https://example.test/page",
|
|
189
|
+
content: "hello world",
|
|
190
|
+
};
|
|
191
|
+
|
|
192
|
+
test("formats untruncated page without spilling", async () => {
|
|
193
|
+
const outcome = await formatOutcome(page, "markdown");
|
|
194
|
+
expect(outcome.text).toBe(
|
|
195
|
+
[
|
|
196
|
+
"title: Example",
|
|
197
|
+
"url: https://example.test/page",
|
|
198
|
+
"format: markdown",
|
|
199
|
+
"content:",
|
|
200
|
+
"hello world",
|
|
201
|
+
].join("\n")
|
|
202
|
+
);
|
|
203
|
+
expect(outcome.truncated).toBe(false);
|
|
204
|
+
expect(outcome.returnedBytes).toBe(11);
|
|
205
|
+
expect(outcome.totalBytes).toBe(11);
|
|
206
|
+
expect(outcome.format).toBe("markdown");
|
|
207
|
+
expect(outcome.path).toBeNull();
|
|
208
|
+
});
|
|
209
|
+
|
|
210
|
+
test("spills the full body and points the footer at the resume line over the inline budget", async () => {
|
|
211
|
+
// 1 KiB newline-terminated lines: the 32 KiB head holds exactly 32 lines,
|
|
212
|
+
// so the footer should resume reading at line 33.
|
|
213
|
+
const line = `${"x".repeat(1023)}\n`;
|
|
214
|
+
const content = line.repeat(40);
|
|
215
|
+
const long = { ...page, content };
|
|
216
|
+
const outcome = await formatOutcome(long, "html");
|
|
217
|
+
expect(outcome.truncated).toBe(true);
|
|
218
|
+
expect(outcome.returnedBytes).toBe(WEB_FETCH_INLINE_BYTES);
|
|
219
|
+
expect(outcome.totalBytes).toBe(content.length);
|
|
220
|
+
expect(outcome.path).toBeTruthy();
|
|
221
|
+
expect(outcome.path!.startsWith(join(SpillCache.dir(), "fetch-"))).toBe(
|
|
222
|
+
true
|
|
223
|
+
);
|
|
224
|
+
expect(outcome.path!.endsWith(".html")).toBe(true);
|
|
225
|
+
expect(outcome.text).toContain(
|
|
226
|
+
`use read with path=${outcome.path} and start=33 for the rest.]`
|
|
227
|
+
);
|
|
228
|
+
expect(await Bun.file(outcome.path!).text()).toBe(content);
|
|
229
|
+
});
|
|
230
|
+
});
|
|
231
|
+
|
|
232
|
+
describe("truncationFooter", () => {
|
|
233
|
+
test("points at the spill file with a resume line when one was written", () => {
|
|
234
|
+
expect(truncationFooter(100, 2000, "/tmp/pim/cache/fetch-abc.md", 7)).toBe(
|
|
235
|
+
"[web_fetch tool: showing first 100 bytes of 2000; use read with path=/tmp/pim/cache/fetch-abc.md and start=7 for the rest.]"
|
|
236
|
+
);
|
|
237
|
+
});
|
|
238
|
+
|
|
239
|
+
test("signals the rest is unavailable when the spill failed", () => {
|
|
240
|
+
expect(truncationFooter(100, 2000, null, 7)).toBe(
|
|
241
|
+
"[web_fetch tool: showing first 100 bytes of 2000; full content unavailable.]"
|
|
242
|
+
);
|
|
243
|
+
});
|
|
244
|
+
});
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
import { isIP } from "node:net";
|
|
2
|
+
import { Lines } from "../../shared/Lines";
|
|
3
|
+
import { OutputBudget } from "../../shared/OutputBudget";
|
|
4
|
+
import { SpillCache } from "../../shared/SpillCache";
|
|
5
|
+
import type { JinaReaderClient } from "./JinaReaderClient";
|
|
6
|
+
import {
|
|
7
|
+
WEB_FETCH_INLINE_BYTES,
|
|
8
|
+
type WebFetchFormat,
|
|
9
|
+
type WebFetchResolvedFormat,
|
|
10
|
+
} from "./schema";
|
|
11
|
+
import type { WebViewFetchClient } from "./WebViewFetchClient";
|
|
12
|
+
|
|
13
|
+
export type WebFetchPage = {
|
|
14
|
+
readonly title: string;
|
|
15
|
+
readonly url: string;
|
|
16
|
+
readonly content: string;
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
export type WebFetchOutcome = {
|
|
20
|
+
readonly text: string;
|
|
21
|
+
readonly title: string;
|
|
22
|
+
readonly url: string;
|
|
23
|
+
readonly format: WebFetchResolvedFormat;
|
|
24
|
+
readonly returnedBytes: number;
|
|
25
|
+
readonly totalBytes: number;
|
|
26
|
+
readonly truncated: boolean;
|
|
27
|
+
readonly path: string | null;
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
export function truncationFooter(
|
|
31
|
+
returnedBytes: number,
|
|
32
|
+
totalBytes: number,
|
|
33
|
+
spillPath: string | null,
|
|
34
|
+
nextStart: number
|
|
35
|
+
): string {
|
|
36
|
+
const base = `[web_fetch tool: showing first ${returnedBytes} bytes of ${totalBytes}`;
|
|
37
|
+
if (spillPath) {
|
|
38
|
+
return `${base}; use read with path=${spillPath} and start=${nextStart} for the rest.]`;
|
|
39
|
+
}
|
|
40
|
+
return `${base}; full content unavailable.]`;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export function validatePublicUrl(value: string): string {
|
|
44
|
+
let url: URL;
|
|
45
|
+
try {
|
|
46
|
+
url = new URL(value);
|
|
47
|
+
} catch {
|
|
48
|
+
throw new Error(`URL must be a valid public HTTP or HTTPS URL: ${value}`);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
if (url.protocol !== "http:" && url.protocol !== "https:") {
|
|
52
|
+
throw new Error(`URL must use http:// or https://: ${value}`);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
if (url.username.length > 0 || url.password.length > 0) {
|
|
56
|
+
throw new Error("URL must not contain embedded credentials.");
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
if (!isPublicHostname(url.hostname)) {
|
|
60
|
+
throw new Error(
|
|
61
|
+
`URL must use a public hostname or IP address (no localhost, .local, or RFC1918 ranges): ${url.hostname}`
|
|
62
|
+
);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
return url.href;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
export async function formatOutcome(
|
|
69
|
+
page: WebFetchPage,
|
|
70
|
+
format: WebFetchResolvedFormat
|
|
71
|
+
): Promise<WebFetchOutcome> {
|
|
72
|
+
const { body, returnedBytes, totalBytes, truncated } =
|
|
73
|
+
OutputBudget.truncateUtf8(page.content, WEB_FETCH_INLINE_BYTES);
|
|
74
|
+
const path = truncated
|
|
75
|
+
? await SpillCache.write(
|
|
76
|
+
"fetch",
|
|
77
|
+
format === "html" ? "html" : "md",
|
|
78
|
+
page.content
|
|
79
|
+
)
|
|
80
|
+
: null;
|
|
81
|
+
|
|
82
|
+
const lines = [
|
|
83
|
+
`title: ${page.title}`,
|
|
84
|
+
`url: ${page.url}`,
|
|
85
|
+
`format: ${format}`,
|
|
86
|
+
"content:",
|
|
87
|
+
body,
|
|
88
|
+
];
|
|
89
|
+
|
|
90
|
+
if (truncated) {
|
|
91
|
+
const nextStart = Lines.continuationLine(body);
|
|
92
|
+
lines.push(
|
|
93
|
+
"",
|
|
94
|
+
truncationFooter(returnedBytes, totalBytes, path, nextStart)
|
|
95
|
+
);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
return {
|
|
99
|
+
text: lines.join("\n"),
|
|
100
|
+
title: page.title,
|
|
101
|
+
url: page.url,
|
|
102
|
+
format,
|
|
103
|
+
returnedBytes,
|
|
104
|
+
totalBytes,
|
|
105
|
+
truncated,
|
|
106
|
+
path,
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
export type ExecuteFetchInput = {
|
|
111
|
+
readonly jina: JinaReaderClient;
|
|
112
|
+
readonly webView: WebViewFetchClient;
|
|
113
|
+
readonly url: string;
|
|
114
|
+
readonly format: WebFetchFormat;
|
|
115
|
+
readonly signal?: AbortSignal;
|
|
116
|
+
};
|
|
117
|
+
|
|
118
|
+
export async function executeFetch(
|
|
119
|
+
input: ExecuteFetchInput
|
|
120
|
+
): Promise<WebFetchOutcome> {
|
|
121
|
+
const { jina, webView, url, format, signal } = input;
|
|
122
|
+
const fetchInput = {
|
|
123
|
+
url,
|
|
124
|
+
...(signal === undefined ? {} : { signal }),
|
|
125
|
+
};
|
|
126
|
+
|
|
127
|
+
if (format === "html") {
|
|
128
|
+
const page = await webView.fetchHtml(fetchInput);
|
|
129
|
+
return formatOutcome(page, "html");
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
try {
|
|
133
|
+
const page = await jina.fetchUrl(fetchInput);
|
|
134
|
+
return await formatOutcome(page, "markdown");
|
|
135
|
+
} catch (error) {
|
|
136
|
+
if (signal?.aborted) {
|
|
137
|
+
throw error;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
try {
|
|
142
|
+
const page = await webView.fetchMarkdown(fetchInput);
|
|
143
|
+
return await formatOutcome(page, "markdown");
|
|
144
|
+
} catch (error) {
|
|
145
|
+
if (signal?.aborted) {
|
|
146
|
+
throw error;
|
|
147
|
+
}
|
|
148
|
+
throw new Error(`Failed to fetch: ${describeError(error)}`);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
function isPublicHostname(hostname: string): boolean {
|
|
153
|
+
const normalized = normalizeHostname(hostname);
|
|
154
|
+
|
|
155
|
+
if (normalized === "localhost" || normalized.endsWith(".local")) {
|
|
156
|
+
return false;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
const version = isIP(normalized);
|
|
160
|
+
|
|
161
|
+
if (version === 4) {
|
|
162
|
+
return isPublicIpv4(normalized);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
if (version === 6) {
|
|
166
|
+
return isPublicIpv6(normalized);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
return normalized.length > 0;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
function normalizeHostname(hostname: string): string {
|
|
173
|
+
const lower = hostname.toLowerCase().replace(/\.+$/u, "");
|
|
174
|
+
|
|
175
|
+
if (lower.startsWith("[") && lower.endsWith("]")) {
|
|
176
|
+
return lower.slice(1, -1);
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
return lower;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
function isPublicIpv4(ip: string): boolean {
|
|
183
|
+
const octets = ip.split(".").map((octet) => Number(octet));
|
|
184
|
+
const [a = 0, b = 0, c = 0] = octets;
|
|
185
|
+
|
|
186
|
+
if (a === 0 || a === 10 || a === 127) {
|
|
187
|
+
return false;
|
|
188
|
+
}
|
|
189
|
+
if (a === 169 && b === 254) {
|
|
190
|
+
return false;
|
|
191
|
+
}
|
|
192
|
+
if (a === 172 && b >= 16 && b <= 31) {
|
|
193
|
+
return false;
|
|
194
|
+
}
|
|
195
|
+
if (a === 192 && b === 168) {
|
|
196
|
+
return false;
|
|
197
|
+
}
|
|
198
|
+
if (a === 100 && b >= 64 && b <= 127) {
|
|
199
|
+
return false;
|
|
200
|
+
}
|
|
201
|
+
if (a === 192 && b === 0 && c === 0) {
|
|
202
|
+
return false;
|
|
203
|
+
}
|
|
204
|
+
if (a === 198 && (b === 18 || b === 19)) {
|
|
205
|
+
return false;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
return a < 224;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
function isPublicIpv6(ip: string): boolean {
|
|
212
|
+
const embedded = readEmbeddedIpv4(ip);
|
|
213
|
+
|
|
214
|
+
if (embedded !== undefined && !isPublicIpv4(embedded)) {
|
|
215
|
+
return false;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
if (ip === "::" || ip === "::1") {
|
|
219
|
+
return false;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
const first = readFirstIpv6Segment(ip);
|
|
223
|
+
|
|
224
|
+
if ((first & 0xfe00) === 0xfc00) {
|
|
225
|
+
return false;
|
|
226
|
+
}
|
|
227
|
+
if ((first & 0xffc0) === 0xfe80) {
|
|
228
|
+
return false;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
return (first & 0xff00) !== 0xff00;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
function readEmbeddedIpv4(ip: string): string | undefined {
|
|
235
|
+
const match = /(?:^|:)(\d{1,3}(?:\.\d{1,3}){3})$/u.exec(ip);
|
|
236
|
+
return match?.[1];
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
function readFirstIpv6Segment(ip: string): number {
|
|
240
|
+
const first =
|
|
241
|
+
ip
|
|
242
|
+
.split(":")
|
|
243
|
+
.find((segment) => segment.length > 0 && !segment.includes(".")) ?? "0";
|
|
244
|
+
return Number.parseInt(first, 16);
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
function describeError(error: unknown): string {
|
|
248
|
+
return error instanceof Error ? error.message : String(error);
|
|
249
|
+
}
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
2
|
+
import { PimSettings } from "../../shared/PimSettings";
|
|
3
|
+
import { Renderer } from "../../shared/Renderer";
|
|
4
|
+
import { SpillCache } from "../../shared/SpillCache";
|
|
5
|
+
import { Tools } from "../../shared/Tools";
|
|
6
|
+
import { executeFetch, validatePublicUrl, type WebFetchOutcome } from "./fetch";
|
|
7
|
+
import { JinaReaderClient } from "./JinaReaderClient";
|
|
8
|
+
import { formatTitle, type WebFetchTitleOutcome } from "./render";
|
|
9
|
+
import { type WebFetchInput, webFetchSchema } from "./schema";
|
|
10
|
+
import { WebViewFetchClient } from "./WebViewFetchClient";
|
|
11
|
+
|
|
12
|
+
const PREVIEW_LINES = 10;
|
|
13
|
+
|
|
14
|
+
type WebFetchRenderState = {
|
|
15
|
+
outcome?: WebFetchTitleOutcome;
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
async function createJina(): Promise<JinaReaderClient> {
|
|
19
|
+
const apiKey = await PimSettings.getJinaApiKey();
|
|
20
|
+
return new JinaReaderClient(apiKey ? { apiKey } : {});
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export default function (pi: ExtensionAPI): void {
|
|
24
|
+
SpillCache.installSweeper();
|
|
25
|
+
|
|
26
|
+
let jinaPromise: Promise<JinaReaderClient> | undefined;
|
|
27
|
+
const getJina = () => (jinaPromise ??= createJina());
|
|
28
|
+
const webView = new WebViewFetchClient();
|
|
29
|
+
|
|
30
|
+
Tools.register(pi, {
|
|
31
|
+
name: "web_fetch",
|
|
32
|
+
label: "web_fetch",
|
|
33
|
+
description: "Fetch a web page as markdown or HTML.",
|
|
34
|
+
parameters: webFetchSchema,
|
|
35
|
+
renderShell: "self",
|
|
36
|
+
executionMode: "parallel",
|
|
37
|
+
async execute(_id, params, signal) {
|
|
38
|
+
const { url, format } = params as WebFetchInput;
|
|
39
|
+
|
|
40
|
+
if (signal?.aborted) {
|
|
41
|
+
throw new Error("Web fetch aborted before execution.");
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
const safeUrl = validatePublicUrl(url);
|
|
45
|
+
|
|
46
|
+
const jina = await getJina();
|
|
47
|
+
const outcome = await executeFetch({
|
|
48
|
+
jina,
|
|
49
|
+
webView,
|
|
50
|
+
url: safeUrl,
|
|
51
|
+
format: format ?? "markdown",
|
|
52
|
+
...(signal === undefined ? {} : { signal }),
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
return {
|
|
56
|
+
content: [{ type: "text", text: outcome.text }],
|
|
57
|
+
details: {
|
|
58
|
+
url: outcome.url,
|
|
59
|
+
title: outcome.title,
|
|
60
|
+
format: outcome.format,
|
|
61
|
+
returnedBytes: outcome.returnedBytes,
|
|
62
|
+
totalBytes: outcome.totalBytes,
|
|
63
|
+
truncated: outcome.truncated,
|
|
64
|
+
path: outcome.path,
|
|
65
|
+
},
|
|
66
|
+
};
|
|
67
|
+
},
|
|
68
|
+
renderCall(args, theme, context) {
|
|
69
|
+
const input = (args ?? {}) as Partial<WebFetchInput>;
|
|
70
|
+
const state = context.state as WebFetchRenderState;
|
|
71
|
+
return Renderer.renderToolCallTitle({
|
|
72
|
+
label: "Web Fetch",
|
|
73
|
+
title: formatTitle(input.url, input.format, state.outcome),
|
|
74
|
+
theme,
|
|
75
|
+
context,
|
|
76
|
+
});
|
|
77
|
+
},
|
|
78
|
+
renderResult(result, options, theme, context) {
|
|
79
|
+
const state = context.state as WebFetchRenderState;
|
|
80
|
+
|
|
81
|
+
if (!options.isPartial && state.outcome === undefined) {
|
|
82
|
+
const details = result.details as
|
|
83
|
+
| Pick<WebFetchOutcome, "format" | "totalBytes">
|
|
84
|
+
| undefined;
|
|
85
|
+
|
|
86
|
+
if (
|
|
87
|
+
details?.format !== undefined &&
|
|
88
|
+
typeof details.totalBytes === "number"
|
|
89
|
+
) {
|
|
90
|
+
state.outcome = {
|
|
91
|
+
format: details.format,
|
|
92
|
+
totalBytes: details.totalBytes,
|
|
93
|
+
};
|
|
94
|
+
context.invalidate();
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
return Renderer.renderBorderedResult({
|
|
99
|
+
result,
|
|
100
|
+
options,
|
|
101
|
+
theme,
|
|
102
|
+
context,
|
|
103
|
+
previewLines: PREVIEW_LINES,
|
|
104
|
+
});
|
|
105
|
+
},
|
|
106
|
+
});
|
|
107
|
+
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import { describe, expect, test } from "bun:test";
|
|
2
|
+
import { formatTitle } from "./render";
|
|
3
|
+
|
|
4
|
+
describe("formatTitle", () => {
|
|
5
|
+
test("returns placeholder and default format when url is undefined", () => {
|
|
6
|
+
expect(formatTitle(undefined, undefined, undefined)).toBe("... (Markdown)");
|
|
7
|
+
});
|
|
8
|
+
|
|
9
|
+
test("renders default markdown pre-result", () => {
|
|
10
|
+
expect(formatTitle("https://example.com", undefined, undefined)).toBe(
|
|
11
|
+
"https://example.com (Markdown)"
|
|
12
|
+
);
|
|
13
|
+
});
|
|
14
|
+
|
|
15
|
+
test("renders requested HTML pre-result", () => {
|
|
16
|
+
expect(formatTitle("https://example.com", "html", undefined)).toBe(
|
|
17
|
+
"https://example.com (HTML)"
|
|
18
|
+
);
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
test("renders size + format label after result arrives", () => {
|
|
22
|
+
expect(
|
|
23
|
+
formatTitle("https://example.com", undefined, {
|
|
24
|
+
format: "markdown",
|
|
25
|
+
totalBytes: 23 * 1024,
|
|
26
|
+
})
|
|
27
|
+
).toBe("https://example.com (23KB Markdown)");
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
test("strips trailing zeros and supports two decimals", () => {
|
|
31
|
+
expect(
|
|
32
|
+
formatTitle("https://example.com", undefined, {
|
|
33
|
+
format: "html",
|
|
34
|
+
totalBytes: 5355,
|
|
35
|
+
})
|
|
36
|
+
).toBe("https://example.com (5.23KB HTML)");
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
test("renders bytes for tiny payloads", () => {
|
|
40
|
+
expect(
|
|
41
|
+
formatTitle("https://example.com", undefined, {
|
|
42
|
+
format: "markdown",
|
|
43
|
+
totalBytes: 512,
|
|
44
|
+
})
|
|
45
|
+
).toBe("https://example.com (512B Markdown)");
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
test("renders MB for large payloads", () => {
|
|
49
|
+
expect(
|
|
50
|
+
formatTitle("https://example.com", undefined, {
|
|
51
|
+
format: "html",
|
|
52
|
+
totalBytes: 2.5 * 1024 * 1024,
|
|
53
|
+
})
|
|
54
|
+
).toBe("https://example.com (2.5MB HTML)");
|
|
55
|
+
});
|
|
56
|
+
});
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import type { WebFetchFormat, WebFetchResolvedFormat } from "./schema";
|
|
2
|
+
|
|
3
|
+
export type WebFetchTitleOutcome = {
|
|
4
|
+
readonly format: WebFetchResolvedFormat;
|
|
5
|
+
readonly totalBytes: number;
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
export function formatTitle(
|
|
9
|
+
url: string | undefined,
|
|
10
|
+
format: WebFetchFormat | undefined,
|
|
11
|
+
outcome: WebFetchTitleOutcome | undefined
|
|
12
|
+
): string {
|
|
13
|
+
const u = url ?? "...";
|
|
14
|
+
const label = formatLabel(outcome?.format ?? format ?? "markdown");
|
|
15
|
+
|
|
16
|
+
if (outcome !== undefined) {
|
|
17
|
+
return `${u} (${formatSize(outcome.totalBytes)} ${label})`;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
return `${u} (${label})`;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function formatLabel(format: WebFetchResolvedFormat): string {
|
|
24
|
+
return format === "html" ? "HTML" : "Markdown";
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function formatSize(bytes: number): string {
|
|
28
|
+
if (bytes < 1024) {
|
|
29
|
+
return `${bytes}B`;
|
|
30
|
+
}
|
|
31
|
+
if (bytes < 1024 * 1024) {
|
|
32
|
+
return `${trimZeros((bytes / 1024).toFixed(2))}KB`;
|
|
33
|
+
}
|
|
34
|
+
return `${trimZeros((bytes / (1024 * 1024)).toFixed(2))}MB`;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function trimZeros(value: string): string {
|
|
38
|
+
return value.replace(/\.?0+$/u, "");
|
|
39
|
+
}
|