@j6e/pi-md-web-surfer 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +105 -0
- package/package.json +42 -0
- package/src/config.test.ts +69 -0
- package/src/config.ts +63 -0
- package/src/fetch-markdown.test.ts +272 -0
- package/src/fetch-markdown.ts +108 -0
- package/src/index.test.ts +73 -0
- package/src/index.ts +267 -0
- package/src/web-search.test.ts +221 -0
- package/src/web-search.ts +103 -0
package/README.md
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# pi-md-web-surfer
|
|
2
|
+
|
|
3
|
+
A [pi](https://github.com/nichochar/pi-coding-agent) extension that adds `fetch_markdown` and `web_search` tools — fetch web pages as clean markdown and search the web.
|
|
4
|
+
|
|
5
|
+
Inspired by the [skill](https://github.com/ruliana/pi-fetch-markdown) of the same name.
|
|
6
|
+
|
|
7
|
+
## Tools
|
|
8
|
+
|
|
9
|
+
### `fetch_markdown`
|
|
10
|
+
|
|
11
|
+
Fetch any web page as clean, token-efficient markdown.
|
|
12
|
+
|
|
13
|
+
Three strategies, tried in order:
|
|
14
|
+
|
|
15
|
+
1. **Content Negotiation** — Sends `Accept: text/markdown` header. Sites using Cloudflare's [Markdown for Agents](https://blog.cloudflare.com/markdown-for-agents/) return markdown directly (~80% fewer tokens than HTML).
|
|
16
|
+
2. **Direct text/plain** — For raw `.md` files, `text/plain` responses, etc. The body is already clean text, so we use it directly.
|
|
17
|
+
3. **Jina Reader** (fallback) — For sites that return HTML, routes through `r.jina.ai` which renders the page and extracts clean markdown.
|
|
18
|
+
|
|
19
|
+
**Usage:**
|
|
20
|
+
```
|
|
21
|
+
fetch_markdown(url, info?)
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
### `web_search`
|
|
25
|
+
|
|
26
|
+
Search the web and return results as clean markdown via Jina Search.
|
|
27
|
+
|
|
28
|
+
**Requires a Jina API key.** Get one free at [jina.ai/?newKey](https://jina.ai/?newKey), then configure it with `/md-web-surfer-config` or the `JINA_API_KEY` environment variable.
|
|
29
|
+
|
|
30
|
+
**Usage:**
|
|
31
|
+
```
|
|
32
|
+
web_search(query, num?, info?, retainImages?, withLinksSummary?)
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
- `query` — Search query
|
|
36
|
+
- `num` — Number of results (1–20, default 5)
|
|
37
|
+
- `info` — Include metadata (method, tokens)
|
|
38
|
+
- `retainImages` — Image retention mode: `"none"` (default), `"all"`, or `"alt_text"` (keep alt text with auto-generation)
|
|
39
|
+
- `withLinksSummary` — Include a summary section for hyperlinks (default: false)
|
|
40
|
+
|
|
41
|
+
## Configuration
|
|
42
|
+
|
|
43
|
+
### `/md-web-surfer-config`
|
|
44
|
+
|
|
45
|
+
Interactive command to set your Jina API key. Writes to `~/.config/pi-md-web-surfer/config.json` (chmod 0600).
|
|
46
|
+
|
|
47
|
+
```
|
|
48
|
+
/md-web-surfer-config # prompt for key
|
|
49
|
+
/md-web-surfer-config --show # show current key status (masked)
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### API key resolution
|
|
53
|
+
|
|
54
|
+
First match wins:
|
|
55
|
+
|
|
56
|
+
1. `JINA_API_KEY` environment variable
|
|
57
|
+
2. `jinaApiKey` field in `~/.config/pi-md-web-surfer/config.json`
|
|
58
|
+
|
|
59
|
+
## Installation
|
|
60
|
+
|
|
61
|
+
### Quick test (one session)
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
pi -e ./src/index.ts
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### Persistent (auto-discovered)
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
ln -s $(pwd) ~/.pi/agent/extensions/pi-md-web-surfer
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
Then run `pi` normally. The tools appear alongside `read`, `write`, `edit`, etc.
|
|
74
|
+
|
|
75
|
+
### As a pi package
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
pi install /path/to/pi-md-web-surfer
|
|
79
|
+
# or once published:
|
|
80
|
+
# pi install npm:@j6e/pi-md-web-surfer
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## Development
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
npm install
|
|
87
|
+
npm test # run tests once
|
|
88
|
+
npm run test:watch # TDD mode
|
|
89
|
+
npm run check # typecheck
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
Smoke test against real URLs:
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
node --experimental-strip-types scripts/smoke-test.ts
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## Requirements
|
|
99
|
+
|
|
100
|
+
- Node.js 22+ (for `--experimental-strip-types`)
|
|
101
|
+
- pi-coding-agent 0.74+
|
|
102
|
+
|
|
103
|
+
## License
|
|
104
|
+
|
|
105
|
+
MIT
|
package/package.json
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@j6e/pi-md-web-surfer",
|
|
3
|
+
"version": "0.9.1",
|
|
4
|
+
"description": "Pi extension: fetch web pages as markdown and search the web, powered by Jina AI",
|
|
5
|
+
"license": "MIT",
|
|
6
|
+
"author": "Joan G. Esquerdo",
|
|
7
|
+
"repository": {
|
|
8
|
+
"type": "git",
|
|
9
|
+
"url": "git+https://github.com/j6e/pi-md-web-surfer.git"
|
|
10
|
+
},
|
|
11
|
+
"bugs": {
|
|
12
|
+
"url": "https://github.com/j6e/pi-md-web-surfer/issues"
|
|
13
|
+
},
|
|
14
|
+
"homepage": "https://github.com/j6e/pi-md-web-surfer#readme",
|
|
15
|
+
"keywords": [
|
|
16
|
+
"pi-package"
|
|
17
|
+
],
|
|
18
|
+
"type": "module",
|
|
19
|
+
"scripts": {
|
|
20
|
+
"test": "vitest run",
|
|
21
|
+
"test:watch": "vitest",
|
|
22
|
+
"check": "tsc --noEmit"
|
|
23
|
+
},
|
|
24
|
+
"peerDependencies": {
|
|
25
|
+
"@earendil-works/pi-coding-agent": "*",
|
|
26
|
+
"@earendil-works/pi-tui": "*",
|
|
27
|
+
"typebox": "*"
|
|
28
|
+
},
|
|
29
|
+
"devDependencies": {
|
|
30
|
+
"@types/node": "^22.0.0",
|
|
31
|
+
"typescript": "^5.6.0",
|
|
32
|
+
"vitest": "^2.1.0"
|
|
33
|
+
},
|
|
34
|
+
"files": [
|
|
35
|
+
"src"
|
|
36
|
+
],
|
|
37
|
+
"pi": {
|
|
38
|
+
"extensions": [
|
|
39
|
+
"./src/index.ts"
|
|
40
|
+
]
|
|
41
|
+
}
|
|
42
|
+
}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import { describe, it, expect, beforeEach, afterEach } from "vitest";
|
|
2
|
+
import {
|
|
3
|
+
getConfigPath,
|
|
4
|
+
readConfig,
|
|
5
|
+
writeConfig,
|
|
6
|
+
clearConfig,
|
|
7
|
+
resolveJinaApiKey,
|
|
8
|
+
maskKey,
|
|
9
|
+
} from "./config.js";
|
|
10
|
+
|
|
11
|
+
describe("config", () => {
|
|
12
|
+
const originalEnv = process.env.JINA_API_KEY;
|
|
13
|
+
|
|
14
|
+
beforeEach(() => {
|
|
15
|
+
delete process.env.JINA_API_KEY;
|
|
16
|
+
clearConfig();
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
afterEach(() => {
|
|
20
|
+
if (originalEnv !== undefined) {
|
|
21
|
+
process.env.JINA_API_KEY = originalEnv;
|
|
22
|
+
} else {
|
|
23
|
+
delete process.env.JINA_API_KEY;
|
|
24
|
+
}
|
|
25
|
+
clearConfig();
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
it("reads empty config when file does not exist", () => {
|
|
29
|
+
const config = readConfig();
|
|
30
|
+
expect(config).toEqual({});
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
it("writes and reads config", () => {
|
|
34
|
+
writeConfig({ jinaApiKey: "jina_test_123" });
|
|
35
|
+
const config = readConfig();
|
|
36
|
+
expect(config.jinaApiKey).toBe("jina_test_123");
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
it("overwrites existing config", () => {
|
|
40
|
+
writeConfig({ jinaApiKey: "old" });
|
|
41
|
+
writeConfig({ jinaApiKey: "new" });
|
|
42
|
+
const config = readConfig();
|
|
43
|
+
expect(config.jinaApiKey).toBe("new");
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
it("resolves from env var first", () => {
|
|
47
|
+
process.env.JINA_API_KEY = "env_key";
|
|
48
|
+
writeConfig({ jinaApiKey: "file_key" });
|
|
49
|
+
expect(resolveJinaApiKey()).toBe("env_key");
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
it("falls back to config file when env is unset", () => {
|
|
53
|
+
writeConfig({ jinaApiKey: "file_key" });
|
|
54
|
+
expect(resolveJinaApiKey()).toBe("file_key");
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
it("returns null when no key is available", () => {
|
|
58
|
+
expect(resolveJinaApiKey()).toBeNull();
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
it("masks keys correctly", () => {
|
|
62
|
+
expect(maskKey("short")).toBe("****");
|
|
63
|
+
expect(maskKey("jina_1234567890abcd")).toBe("jina...abcd");
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
it("config path includes package name", () => {
|
|
67
|
+
expect(getConfigPath()).toContain("pi-md-web-surfer");
|
|
68
|
+
});
|
|
69
|
+
});
|
package/src/config.ts
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import { homedir } from "node:os";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
import {
|
|
4
|
+
readFileSync,
|
|
5
|
+
writeFileSync,
|
|
6
|
+
mkdirSync,
|
|
7
|
+
existsSync,
|
|
8
|
+
unlinkSync,
|
|
9
|
+
} from "node:fs";
|
|
10
|
+
|
|
11
|
+
const CONFIG_DIR = join(homedir(), ".config", "pi-md-web-surfer");
|
|
12
|
+
const CONFIG_PATH = join(CONFIG_DIR, "config.json");
|
|
13
|
+
|
|
14
|
+
export interface Config {
|
|
15
|
+
jinaApiKey?: string;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export function getConfigDir(): string {
|
|
19
|
+
return CONFIG_DIR;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export function getConfigPath(): string {
|
|
23
|
+
return CONFIG_PATH;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export function readConfig(): Config {
|
|
27
|
+
try {
|
|
28
|
+
const raw = readFileSync(CONFIG_PATH, "utf8");
|
|
29
|
+
return JSON.parse(raw) as Config;
|
|
30
|
+
} catch {
|
|
31
|
+
return {};
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export function writeConfig(config: Config): void {
|
|
36
|
+
mkdirSync(CONFIG_DIR, { recursive: true, mode: 0o700 });
|
|
37
|
+
writeFileSync(CONFIG_PATH, JSON.stringify(config, null, 2), {
|
|
38
|
+
mode: 0o600,
|
|
39
|
+
});
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export function clearConfig(): void {
|
|
43
|
+
try {
|
|
44
|
+
unlinkSync(CONFIG_PATH);
|
|
45
|
+
} catch {
|
|
46
|
+
// ignore
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export function resolveJinaApiKey(): string | null {
|
|
51
|
+
const envKey = process.env.JINA_API_KEY;
|
|
52
|
+
if (envKey) return envKey;
|
|
53
|
+
|
|
54
|
+
const config = readConfig();
|
|
55
|
+
if (config.jinaApiKey) return config.jinaApiKey;
|
|
56
|
+
|
|
57
|
+
return null;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export function maskKey(key: string): string {
|
|
61
|
+
if (key.length <= 8) return "****";
|
|
62
|
+
return key.slice(0, 4) + "..." + key.slice(-4);
|
|
63
|
+
}
|
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
|
|
2
|
+
|
|
3
|
+
vi.mock("node:os", () => ({ tmpdir: vi.fn(() => "/tmp") }));
|
|
4
|
+
vi.mock("node:fs/promises", () => ({ writeFile: vi.fn(() => Promise.resolve()) }));
|
|
5
|
+
|
|
6
|
+
vi.mock("@earendil-works/pi-coding-agent", async () => {
|
|
7
|
+
return {
|
|
8
|
+
truncateHead: vi.fn((content: string) => ({
|
|
9
|
+
content,
|
|
10
|
+
truncated: false,
|
|
11
|
+
truncatedBy: undefined,
|
|
12
|
+
firstLineExceedsLimit: false,
|
|
13
|
+
lastLinePartial: false,
|
|
14
|
+
maxLines: 2000,
|
|
15
|
+
maxBytes: 50000,
|
|
16
|
+
outputLines: content.split("\n").length,
|
|
17
|
+
totalLines: content.split("\n").length,
|
|
18
|
+
outputBytes: content.length,
|
|
19
|
+
totalBytes: content.length,
|
|
20
|
+
})),
|
|
21
|
+
formatSize: vi.fn((bytes: number) => `${bytes}B`),
|
|
22
|
+
DEFAULT_MAX_LINES: 2000,
|
|
23
|
+
DEFAULT_MAX_BYTES: 50000,
|
|
24
|
+
};
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
import { fetchMarkdown } from "./fetch-markdown.js";
|
|
28
|
+
import { truncateHead } from "@earendil-works/pi-coding-agent";
|
|
29
|
+
import { writeFile } from "node:fs/promises";
|
|
30
|
+
|
|
31
|
+
describe("fetchMarkdown", () => {
|
|
32
|
+
beforeEach(() => {
|
|
33
|
+
vi.stubGlobal("fetch", vi.fn());
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
afterEach(() => {
|
|
37
|
+
vi.unstubAllGlobals();
|
|
38
|
+
vi.clearAllMocks();
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
it("returns markdown via content negotiation when server supports it", async () => {
|
|
42
|
+
vi.mocked(fetch).mockResolvedValueOnce(
|
|
43
|
+
new Response("# Hello\n\nWorld", {
|
|
44
|
+
status: 200,
|
|
45
|
+
headers: new Headers({
|
|
46
|
+
"content-type": "text/markdown; charset=utf-8",
|
|
47
|
+
"x-markdown-tokens": "42",
|
|
48
|
+
"content-signal": "blog-post",
|
|
49
|
+
}),
|
|
50
|
+
}),
|
|
51
|
+
);
|
|
52
|
+
|
|
53
|
+
const result = await fetchMarkdown("https://example.com");
|
|
54
|
+
|
|
55
|
+
expect(result.text).toBe("# Hello\n\nWorld");
|
|
56
|
+
expect(result.details.method).toBe("content-negotiation");
|
|
57
|
+
expect(result.details.url).toBe("https://example.com");
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
it("falls back to Jina Reader when content negotiation fails", async () => {
|
|
61
|
+
vi.mocked(fetch)
|
|
62
|
+
.mockResolvedValueOnce(new Response("Not Found", { status: 404 }))
|
|
63
|
+
.mockResolvedValueOnce(
|
|
64
|
+
new Response("# Jina Result", {
|
|
65
|
+
status: 200,
|
|
66
|
+
headers: new Headers({ "content-type": "text/markdown" }),
|
|
67
|
+
}),
|
|
68
|
+
);
|
|
69
|
+
|
|
70
|
+
const result = await fetchMarkdown("https://example.com");
|
|
71
|
+
|
|
72
|
+
expect(result.text).toBe("# Jina Result");
|
|
73
|
+
expect(result.details.method).toBe("jina-reader");
|
|
74
|
+
expect(fetch).toHaveBeenNthCalledWith(
|
|
75
|
+
2,
|
|
76
|
+
"https://r.jina.ai/https://example.com",
|
|
77
|
+
expect.any(Object),
|
|
78
|
+
);
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
it("falls back when server returns HTML despite markdown content-type", async () => {
|
|
82
|
+
vi.mocked(fetch)
|
|
83
|
+
.mockResolvedValueOnce(
|
|
84
|
+
new Response("<!DOCTYPE html><html><body>oops</body></html>", {
|
|
85
|
+
status: 200,
|
|
86
|
+
headers: new Headers({ "content-type": "text/markdown" }),
|
|
87
|
+
}),
|
|
88
|
+
)
|
|
89
|
+
.mockResolvedValueOnce(
|
|
90
|
+
new Response("# Clean Markdown", {
|
|
91
|
+
status: 200,
|
|
92
|
+
headers: new Headers({ "content-type": "text/markdown" }),
|
|
93
|
+
}),
|
|
94
|
+
);
|
|
95
|
+
|
|
96
|
+
const result = await fetchMarkdown("https://example.com");
|
|
97
|
+
|
|
98
|
+
expect(result.text).toBe("# Clean Markdown");
|
|
99
|
+
expect(result.details.method).toBe("jina-reader");
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
it("falls back when content-type is not text/markdown", async () => {
|
|
103
|
+
vi.mocked(fetch)
|
|
104
|
+
.mockResolvedValueOnce(
|
|
105
|
+
new Response("<html></html>", {
|
|
106
|
+
status: 200,
|
|
107
|
+
headers: new Headers({ "content-type": "text/html" }),
|
|
108
|
+
}),
|
|
109
|
+
)
|
|
110
|
+
.mockResolvedValueOnce(
|
|
111
|
+
new Response("# Fallback", {
|
|
112
|
+
status: 200,
|
|
113
|
+
headers: new Headers({ "content-type": "text/markdown" }),
|
|
114
|
+
}),
|
|
115
|
+
);
|
|
116
|
+
|
|
117
|
+
const result = await fetchMarkdown("https://example.com");
|
|
118
|
+
|
|
119
|
+
expect(result.text).toBe("# Fallback");
|
|
120
|
+
expect(result.details.method).toBe("jina-reader");
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
it("throws when both methods fail", async () => {
|
|
124
|
+
vi.mocked(fetch)
|
|
125
|
+
.mockResolvedValueOnce(new Response("Error", { status: 500 }))
|
|
126
|
+
.mockResolvedValueOnce(new Response("Error", { status: 500 }));
|
|
127
|
+
|
|
128
|
+
await expect(fetchMarkdown("https://example.com")).rejects.toThrow(
|
|
129
|
+
"Both content negotiation and Jina Reader failed for https://example.com",
|
|
130
|
+
);
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
it("throws when Jina returns HTML", async () => {
|
|
134
|
+
vi.mocked(fetch)
|
|
135
|
+
.mockResolvedValueOnce(new Response("Error", { status: 500 }))
|
|
136
|
+
.mockResolvedValueOnce(
|
|
137
|
+
new Response("<!DOCTYPE html><html></html>", { status: 200 }),
|
|
138
|
+
);
|
|
139
|
+
|
|
140
|
+
await expect(fetchMarkdown("https://example.com")).rejects.toThrow(
|
|
141
|
+
"Jina Reader returned empty or HTML for https://example.com",
|
|
142
|
+
);
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
it("throws when Jina returns empty body", async () => {
|
|
146
|
+
vi.mocked(fetch)
|
|
147
|
+
.mockResolvedValueOnce(new Response("Error", { status: 500 }))
|
|
148
|
+
.mockResolvedValueOnce(new Response("", { status: 200 }));
|
|
149
|
+
|
|
150
|
+
await expect(fetchMarkdown("https://example.com")).rejects.toThrow(
|
|
151
|
+
"Jina Reader returned empty or HTML for https://example.com",
|
|
152
|
+
);
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
it("includes metadata when info flag is set", async () => {
|
|
156
|
+
vi.mocked(fetch).mockResolvedValueOnce(
|
|
157
|
+
new Response("# Content", {
|
|
158
|
+
status: 200,
|
|
159
|
+
headers: new Headers({
|
|
160
|
+
"content-type": "text/markdown",
|
|
161
|
+
"x-markdown-tokens": "1234",
|
|
162
|
+
"content-signal": "blog-post",
|
|
163
|
+
}),
|
|
164
|
+
}),
|
|
165
|
+
);
|
|
166
|
+
|
|
167
|
+
const result = await fetchMarkdown("https://example.com", { info: true });
|
|
168
|
+
|
|
169
|
+
expect(result.details.tokens).toBe("1234");
|
|
170
|
+
expect(result.details.contentSignal).toBe("blog-post");
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
it("omits metadata when info flag is false", async () => {
|
|
174
|
+
vi.mocked(fetch).mockResolvedValueOnce(
|
|
175
|
+
new Response("# Content", {
|
|
176
|
+
status: 200,
|
|
177
|
+
headers: new Headers({
|
|
178
|
+
"content-type": "text/markdown",
|
|
179
|
+
"x-markdown-tokens": "1234",
|
|
180
|
+
}),
|
|
181
|
+
}),
|
|
182
|
+
);
|
|
183
|
+
|
|
184
|
+
const result = await fetchMarkdown("https://example.com");
|
|
185
|
+
|
|
186
|
+
expect(result.details.tokens).toBeUndefined();
|
|
187
|
+
expect(result.details.contentSignal).toBeUndefined();
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
it("handles truncated output by writing temp file", async () => {
|
|
191
|
+
vi.mocked(fetch).mockResolvedValueOnce(
|
|
192
|
+
new Response("# Very long content", {
|
|
193
|
+
status: 200,
|
|
194
|
+
headers: new Headers({ "content-type": "text/markdown" }),
|
|
195
|
+
}),
|
|
196
|
+
);
|
|
197
|
+
|
|
198
|
+
vi.mocked(truncateHead).mockReturnValueOnce({
|
|
199
|
+
content: "# Very",
|
|
200
|
+
truncated: true,
|
|
201
|
+
truncatedBy: "bytes",
|
|
202
|
+
firstLineExceedsLimit: false,
|
|
203
|
+
lastLinePartial: false,
|
|
204
|
+
maxLines: 2000,
|
|
205
|
+
maxBytes: 50000,
|
|
206
|
+
outputLines: 1,
|
|
207
|
+
totalLines: 100,
|
|
208
|
+
outputBytes: 6,
|
|
209
|
+
totalBytes: 10000,
|
|
210
|
+
});
|
|
211
|
+
|
|
212
|
+
const result = await fetchMarkdown("https://example.com");
|
|
213
|
+
|
|
214
|
+
expect(result.text).toContain("[Output truncated:");
|
|
215
|
+
expect(result.text).toContain("Full output saved to:");
|
|
216
|
+
expect(result.details.fullOutputPath).toMatch(/\.md$/);
|
|
217
|
+
expect(writeFile).toHaveBeenCalledWith(
|
|
218
|
+
expect.stringMatching(/\.md$/),
|
|
219
|
+
"# Very long content",
|
|
220
|
+
"utf8",
|
|
221
|
+
);
|
|
222
|
+
});
|
|
223
|
+
|
|
224
|
+
it("uses text/plain responses directly without Jina", async () => {
|
|
225
|
+
vi.mocked(fetch).mockResolvedValueOnce(
|
|
226
|
+
new Response("# Raw Markdown\n\nContent", {
|
|
227
|
+
status: 200,
|
|
228
|
+
headers: new Headers({ "content-type": "text/plain; charset=utf-8" }),
|
|
229
|
+
}),
|
|
230
|
+
);
|
|
231
|
+
|
|
232
|
+
const result = await fetchMarkdown(
|
|
233
|
+
"https://raw.githubusercontent.com/user/repo/main/README.md",
|
|
234
|
+
);
|
|
235
|
+
|
|
236
|
+
expect(result.text).toBe("# Raw Markdown\n\nContent");
|
|
237
|
+
expect(result.details.method).toBe("direct");
|
|
238
|
+
expect(fetch).toHaveBeenCalledTimes(1); // no Jina fallback
|
|
239
|
+
});
|
|
240
|
+
|
|
241
|
+
it("uses text/plain for non-markdown URLs too", async () => {
|
|
242
|
+
vi.mocked(fetch).mockResolvedValueOnce(
|
|
243
|
+
new Response("just plain text", {
|
|
244
|
+
status: 200,
|
|
245
|
+
headers: new Headers({ "content-type": "text/plain" }),
|
|
246
|
+
}),
|
|
247
|
+
);
|
|
248
|
+
|
|
249
|
+
const result = await fetchMarkdown("https://example.com/notes.txt");
|
|
250
|
+
|
|
251
|
+
expect(result.text).toBe("just plain text");
|
|
252
|
+
expect(result.details.method).toBe("direct");
|
|
253
|
+
expect(fetch).toHaveBeenCalledTimes(1); // no Jina fallback
|
|
254
|
+
});
|
|
255
|
+
|
|
256
|
+
it("passes abort signal to fetch", async () => {
|
|
257
|
+
const controller = new AbortController();
|
|
258
|
+
vi.mocked(fetch).mockResolvedValueOnce(
|
|
259
|
+
new Response("# Content", {
|
|
260
|
+
status: 200,
|
|
261
|
+
headers: new Headers({ "content-type": "text/markdown" }),
|
|
262
|
+
}),
|
|
263
|
+
);
|
|
264
|
+
|
|
265
|
+
await fetchMarkdown("https://example.com", { signal: controller.signal });
|
|
266
|
+
|
|
267
|
+
expect(fetch).toHaveBeenCalledWith(
|
|
268
|
+
"https://example.com",
|
|
269
|
+
expect.objectContaining({ signal: controller.signal }),
|
|
270
|
+
);
|
|
271
|
+
});
|
|
272
|
+
});
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
import {
|
|
2
|
+
truncateHead,
|
|
3
|
+
formatSize,
|
|
4
|
+
DEFAULT_MAX_BYTES,
|
|
5
|
+
DEFAULT_MAX_LINES,
|
|
6
|
+
} from "@earendil-works/pi-coding-agent";
|
|
7
|
+
import { tmpdir } from "node:os";
|
|
8
|
+
import { writeFile } from "node:fs/promises";
|
|
9
|
+
import { join } from "node:path";
|
|
10
|
+
|
|
11
|
+
export interface FetchOptions {
|
|
12
|
+
info?: boolean;
|
|
13
|
+
signal?: AbortSignal;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export interface FetchResult {
|
|
17
|
+
text: string;
|
|
18
|
+
details: Record<string, unknown>;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export async function fetchMarkdown(
|
|
22
|
+
url: string,
|
|
23
|
+
options: FetchOptions = {},
|
|
24
|
+
): Promise<FetchResult> {
|
|
25
|
+
const { info, signal } = options;
|
|
26
|
+
|
|
27
|
+
// ─── Method 1: Content negotiation ───
|
|
28
|
+
const negotiate = await fetch(url, {
|
|
29
|
+
signal,
|
|
30
|
+
headers: {
|
|
31
|
+
Accept: "text/markdown, text/html;q=0.9",
|
|
32
|
+
"User-Agent": "Mozilla/5.0 (compatible; pi-agent/1.0)",
|
|
33
|
+
},
|
|
34
|
+
redirect: "follow",
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
let body = "";
|
|
38
|
+
let method = "jina-reader";
|
|
39
|
+
let tokens: string | undefined;
|
|
40
|
+
let contentSignal: string | undefined;
|
|
41
|
+
|
|
42
|
+
const ct = negotiate.headers.get("content-type") ?? "";
|
|
43
|
+
if (negotiate.ok && ct.includes("text/markdown")) {
|
|
44
|
+
body = await negotiate.text();
|
|
45
|
+
if (!body.slice(0, 500).match(/<!DOCTYPE|<html/i)) {
|
|
46
|
+
method = "content-negotiation";
|
|
47
|
+
tokens = negotiate.headers.get("x-markdown-tokens") ?? undefined;
|
|
48
|
+
contentSignal = negotiate.headers.get("content-signal") ?? undefined;
|
|
49
|
+
} else {
|
|
50
|
+
body = "";
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// ─── Method 1b: text/plain served directly ───
|
|
55
|
+
if (negotiate.ok && !body && ct.includes("text/plain")) {
|
|
56
|
+
body = await negotiate.text();
|
|
57
|
+
if (!body.slice(0, 500).match(/<!DOCTYPE|<html/i)) {
|
|
58
|
+
method = "direct";
|
|
59
|
+
} else {
|
|
60
|
+
body = "";
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// ─── Method 2: Jina Reader fallback ───
|
|
65
|
+
if (!body) {
|
|
66
|
+
const jina = await fetch(`https://r.jina.ai/${url}`, {
|
|
67
|
+
signal,
|
|
68
|
+
headers: {
|
|
69
|
+
Accept: "text/markdown",
|
|
70
|
+
"User-Agent": "Mozilla/5.0 (compatible; pi-agent/1.0)",
|
|
71
|
+
},
|
|
72
|
+
});
|
|
73
|
+
if (!jina.ok) {
|
|
74
|
+
throw new Error(
|
|
75
|
+
`Both content negotiation and Jina Reader failed for ${url}`,
|
|
76
|
+
);
|
|
77
|
+
}
|
|
78
|
+
body = await jina.text();
|
|
79
|
+
if (!body || body.slice(0, 300).match(/<!DOCTYPE|<html/i)) {
|
|
80
|
+
throw new Error(`Jina Reader returned empty or HTML for ${url}`);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// ─── Truncate if needed ───
|
|
85
|
+
const truncated = truncateHead(body, {
|
|
86
|
+
maxLines: DEFAULT_MAX_LINES,
|
|
87
|
+
maxBytes: DEFAULT_MAX_BYTES,
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
let text = truncated.content;
|
|
91
|
+
const details: Record<string, unknown> = { method, url };
|
|
92
|
+
|
|
93
|
+
if (info) {
|
|
94
|
+
details.tokens = tokens ?? "unknown";
|
|
95
|
+
details.contentSignal = contentSignal ?? "none";
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
if (truncated.truncated) {
|
|
99
|
+
const tmp = join(tmpdir(), `fetch-markdown-${Date.now()}.md`);
|
|
100
|
+
await writeFile(tmp, body, "utf8");
|
|
101
|
+
text += `\n\n[Output truncated: ${truncated.outputLines} of ${truncated.totalLines} lines`;
|
|
102
|
+
text += ` (${formatSize(truncated.outputBytes)} of ${formatSize(truncated.totalBytes)}).`;
|
|
103
|
+
text += ` Full output saved to: ${tmp}]`;
|
|
104
|
+
details.fullOutputPath = tmp;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
return { text, details };
|
|
108
|
+
}
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import { describe, it, expect, vi } from "vitest";
|
|
2
|
+
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
3
|
+
import extensionFactory from "./index.js";
|
|
4
|
+
|
|
5
|
+
describe("extension", () => {
|
|
6
|
+
it("registers fetch_markdown tool", () => {
|
|
7
|
+
const registerTool = vi.fn();
|
|
8
|
+
const registerCommand = vi.fn();
|
|
9
|
+
const mockPi = {
|
|
10
|
+
registerTool,
|
|
11
|
+
registerCommand,
|
|
12
|
+
} as unknown as ExtensionAPI;
|
|
13
|
+
|
|
14
|
+
extensionFactory(mockPi);
|
|
15
|
+
|
|
16
|
+
const definitions = registerTool.mock.calls.map(
|
|
17
|
+
(call) =>
|
|
18
|
+
call[0] as {
|
|
19
|
+
name: string;
|
|
20
|
+
label: string;
|
|
21
|
+
description: string;
|
|
22
|
+
},
|
|
23
|
+
);
|
|
24
|
+
|
|
25
|
+
const markdownTool = definitions.find((d) => d.name === "fetch_markdown");
|
|
26
|
+
expect(markdownTool).toBeDefined();
|
|
27
|
+
expect(markdownTool!.label).toBe("Fetch Markdown");
|
|
28
|
+
expect(markdownTool!.description).toContain("content negotiation");
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
it("registers web_search tool", () => {
|
|
32
|
+
const registerTool = vi.fn();
|
|
33
|
+
const registerCommand = vi.fn();
|
|
34
|
+
const mockPi = {
|
|
35
|
+
registerTool,
|
|
36
|
+
registerCommand,
|
|
37
|
+
} as unknown as ExtensionAPI;
|
|
38
|
+
|
|
39
|
+
extensionFactory(mockPi);
|
|
40
|
+
|
|
41
|
+
const definitions = registerTool.mock.calls.map(
|
|
42
|
+
(call) =>
|
|
43
|
+
call[0] as {
|
|
44
|
+
name: string;
|
|
45
|
+
label: string;
|
|
46
|
+
description: string;
|
|
47
|
+
},
|
|
48
|
+
);
|
|
49
|
+
|
|
50
|
+
const searchTool = definitions.find((d) => d.name === "web_search");
|
|
51
|
+
expect(searchTool).toBeDefined();
|
|
52
|
+
expect(searchTool!.label).toBe("Web Search");
|
|
53
|
+
expect(searchTool!.description).toContain("Jina Search");
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
it("registers md-web-surfer-config command", () => {
|
|
57
|
+
const registerTool = vi.fn();
|
|
58
|
+
const registerCommand = vi.fn();
|
|
59
|
+
const mockPi = {
|
|
60
|
+
registerTool,
|
|
61
|
+
registerCommand,
|
|
62
|
+
} as unknown as ExtensionAPI;
|
|
63
|
+
|
|
64
|
+
extensionFactory(mockPi);
|
|
65
|
+
|
|
66
|
+
expect(registerCommand).toHaveBeenCalledWith(
|
|
67
|
+
"md-web-surfer-config",
|
|
68
|
+
expect.objectContaining({
|
|
69
|
+
description: expect.stringContaining("Jina API key"),
|
|
70
|
+
}),
|
|
71
|
+
);
|
|
72
|
+
});
|
|
73
|
+
});
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
import type { ExtensionAPI, Theme } from "@earendil-works/pi-coding-agent";
|
|
2
|
+
import { highlightCode, keyHint } from "@earendil-works/pi-coding-agent";
|
|
3
|
+
import { Text } from "@earendil-works/pi-tui";
|
|
4
|
+
import { Type } from "typebox";
|
|
5
|
+
import { fetchMarkdown } from "./fetch-markdown.js";
|
|
6
|
+
import { webSearch } from "./web-search.js";
|
|
7
|
+
import {
|
|
8
|
+
resolveJinaApiKey,
|
|
9
|
+
readConfig,
|
|
10
|
+
writeConfig,
|
|
11
|
+
maskKey,
|
|
12
|
+
} from "./config.js";
|
|
13
|
+
|
|
14
|
+
function replaceTabs(text: string): string {
|
|
15
|
+
return text.replace(/\t/g, " ");
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
function trimTrailingEmptyLines(lines: string[]): string[] {
|
|
19
|
+
let end = lines.length;
|
|
20
|
+
while (end > 0 && lines[end - 1] === "") {
|
|
21
|
+
end--;
|
|
22
|
+
}
|
|
23
|
+
return lines.slice(0, end);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function renderMarkdownOutput(
|
|
27
|
+
output: string,
|
|
28
|
+
options: { expanded: boolean; isPartial: boolean },
|
|
29
|
+
theme: Theme,
|
|
30
|
+
): string {
|
|
31
|
+
const highlighted = highlightCode(replaceTabs(output), "markdown");
|
|
32
|
+
const lines = trimTrailingEmptyLines(highlighted);
|
|
33
|
+
|
|
34
|
+
const maxLines = options.expanded || options.isPartial ? lines.length : 10;
|
|
35
|
+
const displayLines = lines.slice(0, maxLines);
|
|
36
|
+
const remaining = lines.length - maxLines;
|
|
37
|
+
|
|
38
|
+
let resultText = `\n${displayLines.map((line) => replaceTabs(line)).join("\n")}`;
|
|
39
|
+
|
|
40
|
+
if (remaining > 0) {
|
|
41
|
+
resultText += `${theme.fg("muted", `\n... (${remaining} more lines,`)} ${keyHint("app.tools.expand", "to expand")})`;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
return resultText;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export default function (pi: ExtensionAPI) {
|
|
48
|
+
// ─── fetch_markdown ───
|
|
49
|
+
pi.registerTool({
|
|
50
|
+
name: "fetch_markdown",
|
|
51
|
+
label: "Fetch Markdown",
|
|
52
|
+
description:
|
|
53
|
+
"Fetch any web page as clean, token-efficient markdown. Tries content negotiation (Accept: text/markdown) first, then falls back to Jina Reader.",
|
|
54
|
+
promptSnippet: "Fetch a URL and return its content as markdown",
|
|
55
|
+
promptGuidelines: [
|
|
56
|
+
"Use fetch_markdown when you need to read, summarize, or analyze web page content.",
|
|
57
|
+
"Use fetch_markdown instead of bash+curl for fetching web pages.",
|
|
58
|
+
],
|
|
59
|
+
parameters: Type.Object({
|
|
60
|
+
url: Type.String({ description: "URL to fetch" }),
|
|
61
|
+
info: Type.Optional(
|
|
62
|
+
Type.Boolean({
|
|
63
|
+
description: "Include metadata (method, tokens, content-signal)",
|
|
64
|
+
}),
|
|
65
|
+
),
|
|
66
|
+
}),
|
|
67
|
+
|
|
68
|
+
async execute(_toolCallId, params, signal) {
|
|
69
|
+
const result = await fetchMarkdown(params.url, {
|
|
70
|
+
info: params.info,
|
|
71
|
+
signal,
|
|
72
|
+
});
|
|
73
|
+
return {
|
|
74
|
+
content: [{ type: "text", text: result.text }],
|
|
75
|
+
details: result.details,
|
|
76
|
+
};
|
|
77
|
+
},
|
|
78
|
+
|
|
79
|
+
renderCall(args, theme, context) {
|
|
80
|
+
const text =
|
|
81
|
+
(context.lastComponent as Text | undefined) ?? new Text("", 0, 0);
|
|
82
|
+
const url = typeof args.url === "string" ? args.url : "";
|
|
83
|
+
const urlDisplay = url
|
|
84
|
+
? theme.fg("accent", url)
|
|
85
|
+
: theme.fg("error", "[invalid arg]");
|
|
86
|
+
text.setText(
|
|
87
|
+
`${theme.fg("toolTitle", theme.bold("fetch_markdown"))} ${urlDisplay}`,
|
|
88
|
+
);
|
|
89
|
+
return text;
|
|
90
|
+
},
|
|
91
|
+
|
|
92
|
+
renderResult(result, options, theme, context) {
|
|
93
|
+
const text =
|
|
94
|
+
(context.lastComponent as Text | undefined) ?? new Text("", 0, 0);
|
|
95
|
+
|
|
96
|
+
if (options.isPartial) {
|
|
97
|
+
text.setText(theme.fg("warning", "Fetching..."));
|
|
98
|
+
return text;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
const output = result.content
|
|
102
|
+
.filter((c): c is { type: "text"; text: string } => c.type === "text")
|
|
103
|
+
.map((c) => c.text)
|
|
104
|
+
.join("\n");
|
|
105
|
+
|
|
106
|
+
text.setText(
|
|
107
|
+
renderMarkdownOutput(output, options, theme),
|
|
108
|
+
);
|
|
109
|
+
return text;
|
|
110
|
+
},
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
// ─── web_search ───
|
|
114
|
+
pi.registerTool({
|
|
115
|
+
name: "web_search",
|
|
116
|
+
label: "Web Search",
|
|
117
|
+
description:
|
|
118
|
+
"Search the web and return results as clean markdown via Jina Search.",
|
|
119
|
+
promptSnippet: "Search the web for current information",
|
|
120
|
+
promptGuidelines: [
|
|
121
|
+
"Use web_search when the user asks about recent events, current facts, or information that may not be in your training data.",
|
|
122
|
+
"Use web_search before fetch_markdown when you don't know the exact URL.",
|
|
123
|
+
],
|
|
124
|
+
parameters: Type.Object({
|
|
125
|
+
query: Type.String({ description: "Search query" }),
|
|
126
|
+
num: Type.Optional(
|
|
127
|
+
Type.Number({
|
|
128
|
+
description: "Number of results (1–20, default 5)",
|
|
129
|
+
minimum: 1,
|
|
130
|
+
maximum: 20,
|
|
131
|
+
}),
|
|
132
|
+
),
|
|
133
|
+
info: Type.Optional(
|
|
134
|
+
Type.Boolean({
|
|
135
|
+
description: "Include metadata (method, tokens)",
|
|
136
|
+
}),
|
|
137
|
+
),
|
|
138
|
+
retainImages: Type.Optional(
|
|
139
|
+
Type.Union(
|
|
140
|
+
[
|
|
141
|
+
Type.Literal("none"),
|
|
142
|
+
Type.Literal("all"),
|
|
143
|
+
Type.Literal("alt_text"),
|
|
144
|
+
],
|
|
145
|
+
{
|
|
146
|
+
description:
|
|
147
|
+
"Image retention mode: none (default), all, alt_text (keep alt text with auto-generation)",
|
|
148
|
+
},
|
|
149
|
+
),
|
|
150
|
+
),
|
|
151
|
+
withLinksSummary: Type.Optional(
|
|
152
|
+
Type.Boolean({
|
|
153
|
+
description: "Include a summary section for hyperlinks (default: false)",
|
|
154
|
+
}),
|
|
155
|
+
),
|
|
156
|
+
}),
|
|
157
|
+
|
|
158
|
+
async execute(_toolCallId, params, signal) {
|
|
159
|
+
const result = await webSearch(params.query, {
|
|
160
|
+
num: params.num,
|
|
161
|
+
info: params.info,
|
|
162
|
+
signal,
|
|
163
|
+
retainImages: params.retainImages,
|
|
164
|
+
withLinksSummary: params.withLinksSummary,
|
|
165
|
+
});
|
|
166
|
+
return {
|
|
167
|
+
content: [{ type: "text", text: result.text }],
|
|
168
|
+
details: result.details,
|
|
169
|
+
};
|
|
170
|
+
},
|
|
171
|
+
|
|
172
|
+
renderCall(args, theme, context) {
|
|
173
|
+
const text =
|
|
174
|
+
(context.lastComponent as Text | undefined) ?? new Text("", 0, 0);
|
|
175
|
+
const query = typeof args.query === "string" ? args.query : "";
|
|
176
|
+
const queryDisplay = query
|
|
177
|
+
? theme.fg("accent", `"${query}"`)
|
|
178
|
+
: theme.fg("error", "[invalid arg]");
|
|
179
|
+
text.setText(
|
|
180
|
+
`${theme.fg("toolTitle", theme.bold("web_search"))} ${queryDisplay}`,
|
|
181
|
+
);
|
|
182
|
+
return text;
|
|
183
|
+
},
|
|
184
|
+
|
|
185
|
+
renderResult(result, options, theme, context) {
|
|
186
|
+
const text =
|
|
187
|
+
(context.lastComponent as Text | undefined) ?? new Text("", 0, 0);
|
|
188
|
+
|
|
189
|
+
if (options.isPartial) {
|
|
190
|
+
text.setText(theme.fg("warning", "Searching..."));
|
|
191
|
+
return text;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
const output = result.content
|
|
195
|
+
.filter((c): c is { type: "text"; text: string } => c.type === "text")
|
|
196
|
+
.map((c) => c.text)
|
|
197
|
+
.join("\n");
|
|
198
|
+
|
|
199
|
+
text.setText(renderMarkdownOutput(output, options, theme));
|
|
200
|
+
return text;
|
|
201
|
+
},
|
|
202
|
+
});
|
|
203
|
+
|
|
204
|
+
// ─── /md-web-surfer-config ───
|
|
205
|
+
pi.registerCommand("md-web-surfer-config", {
|
|
206
|
+
description: "Configure Jina API key for web search",
|
|
207
|
+
handler: async (args, ctx) => {
|
|
208
|
+
const show = args.trim() === "--show";
|
|
209
|
+
const envKey = process.env.JINA_API_KEY;
|
|
210
|
+
const config = readConfig();
|
|
211
|
+
const configKey = config.jinaApiKey;
|
|
212
|
+
|
|
213
|
+
if (show) {
|
|
214
|
+
const lines: string[] = [];
|
|
215
|
+
if (envKey) {
|
|
216
|
+
lines.push(`Env var JINA_API_KEY: ${maskKey(envKey)}`);
|
|
217
|
+
} else {
|
|
218
|
+
lines.push("Env var JINA_API_KEY: not set");
|
|
219
|
+
}
|
|
220
|
+
if (configKey) {
|
|
221
|
+
lines.push(`Config file: ${maskKey(configKey)}`);
|
|
222
|
+
} else {
|
|
223
|
+
lines.push("Config file: no key stored");
|
|
224
|
+
}
|
|
225
|
+
if (!ctx.hasUI) {
|
|
226
|
+
console.log(lines.join("\n"));
|
|
227
|
+
return;
|
|
228
|
+
}
|
|
229
|
+
for (const line of lines) {
|
|
230
|
+
ctx.ui.notify(line, "info");
|
|
231
|
+
}
|
|
232
|
+
return;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
if (!ctx.hasUI) {
|
|
236
|
+
console.log(
|
|
237
|
+
"Run /md-web-surfer-config interactively to set your API key, or set the JINA_API_KEY environment variable.",
|
|
238
|
+
);
|
|
239
|
+
return;
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
const currentKey = resolveJinaApiKey();
|
|
243
|
+
if (currentKey) {
|
|
244
|
+
const ok = await ctx.ui.confirm(
|
|
245
|
+
"API Key",
|
|
246
|
+
`Current key: ${maskKey(currentKey)}. Replace it?`,
|
|
247
|
+
);
|
|
248
|
+
if (!ok) {
|
|
249
|
+
ctx.ui.notify("Key unchanged", "info");
|
|
250
|
+
return;
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
const key = await ctx.ui.input(
|
|
255
|
+
"Jina API Key",
|
|
256
|
+
"Get one free at https://jina.ai/?newKey",
|
|
257
|
+
);
|
|
258
|
+
if (!key) {
|
|
259
|
+
ctx.ui.notify("No key provided", "warning");
|
|
260
|
+
return;
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
writeConfig({ jinaApiKey: key.trim() });
|
|
264
|
+
ctx.ui.notify("API key saved", "info");
|
|
265
|
+
},
|
|
266
|
+
});
|
|
267
|
+
}
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
|
|
2
|
+
|
|
3
|
+
vi.mock("node:os", () => ({ tmpdir: vi.fn(() => "/tmp") }));
|
|
4
|
+
vi.mock("node:fs/promises", () => ({ writeFile: vi.fn(() => Promise.resolve()) }));
|
|
5
|
+
|
|
6
|
+
vi.mock("@earendil-works/pi-coding-agent", async () => {
|
|
7
|
+
return {
|
|
8
|
+
truncateHead: vi.fn((content: string) => ({
|
|
9
|
+
content,
|
|
10
|
+
truncated: false,
|
|
11
|
+
truncatedBy: undefined,
|
|
12
|
+
firstLineExceedsLimit: false,
|
|
13
|
+
lastLinePartial: false,
|
|
14
|
+
maxLines: 2000,
|
|
15
|
+
maxBytes: 50000,
|
|
16
|
+
outputLines: content.split("\n").length,
|
|
17
|
+
totalLines: content.split("\n").length,
|
|
18
|
+
outputBytes: content.length,
|
|
19
|
+
totalBytes: content.length,
|
|
20
|
+
})),
|
|
21
|
+
formatSize: vi.fn((bytes: number) => `${bytes}B`),
|
|
22
|
+
DEFAULT_MAX_LINES: 2000,
|
|
23
|
+
DEFAULT_MAX_BYTES: 50000,
|
|
24
|
+
};
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
vi.mock("./config.js", () => ({
|
|
28
|
+
resolveJinaApiKey: vi.fn(),
|
|
29
|
+
}));
|
|
30
|
+
|
|
31
|
+
import { webSearch } from "./web-search.js";
|
|
32
|
+
import { resolveJinaApiKey } from "./config.js";
|
|
33
|
+
import { truncateHead } from "@earendil-works/pi-coding-agent";
|
|
34
|
+
|
|
35
|
+
describe("webSearch", () => {
|
|
36
|
+
beforeEach(() => {
|
|
37
|
+
vi.stubGlobal("fetch", vi.fn());
|
|
38
|
+
vi.mocked(resolveJinaApiKey).mockReturnValue("jina_test_key");
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
afterEach(() => {
|
|
42
|
+
vi.unstubAllGlobals();
|
|
43
|
+
vi.clearAllMocks();
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
it("returns search results with correct defaults", async () => {
|
|
47
|
+
vi.mocked(fetch).mockResolvedValueOnce(
|
|
48
|
+
new Response("# Search Results\n\n1. [Example](https://example.com)", {
|
|
49
|
+
status: 200,
|
|
50
|
+
headers: new Headers({ "content-type": "text/markdown" }),
|
|
51
|
+
}),
|
|
52
|
+
);
|
|
53
|
+
|
|
54
|
+
const result = await webSearch("hello world");
|
|
55
|
+
|
|
56
|
+
expect(result.text).toContain("Search Results");
|
|
57
|
+
expect(result.details.method).toBe("jina-search");
|
|
58
|
+
expect(result.details.query).toBe("hello world");
|
|
59
|
+
expect(result.details.num).toBe(5);
|
|
60
|
+
expect(result.details.retainImages).toBe("none");
|
|
61
|
+
expect(result.details.withLinksSummary).toBe(false);
|
|
62
|
+
|
|
63
|
+
const call = vi.mocked(fetch).mock.calls[0];
|
|
64
|
+
expect(call[0]).toContain("s.jina.ai/hello%20world");
|
|
65
|
+
expect(call[0]).toContain("num=5");
|
|
66
|
+
expect(call[0]).toContain("retainImages=none");
|
|
67
|
+
expect(call[0]).toContain("withLinksSummary=false");
|
|
68
|
+
expect(call[0]).not.toContain("withImagesSummary");
|
|
69
|
+
expect(call[1]?.headers).toMatchObject({
|
|
70
|
+
Authorization: "Bearer jina_test_key",
|
|
71
|
+
});
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
it("throws when no API key is configured", async () => {
|
|
75
|
+
vi.mocked(resolveJinaApiKey).mockReturnValue(null);
|
|
76
|
+
|
|
77
|
+
await expect(webSearch("test")).rejects.toThrow(
|
|
78
|
+
"Jina Search requires an API key",
|
|
79
|
+
);
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
it("throws on non-ok response", async () => {
|
|
83
|
+
vi.mocked(fetch).mockResolvedValueOnce(
|
|
84
|
+
new Response("Rate limited", { status: 429 }),
|
|
85
|
+
);
|
|
86
|
+
|
|
87
|
+
await expect(webSearch("test")).rejects.toThrow(
|
|
88
|
+
"Jina Search failed: 429",
|
|
89
|
+
);
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
it("throws when response is HTML", async () => {
|
|
93
|
+
vi.mocked(fetch).mockResolvedValueOnce(
|
|
94
|
+
new Response("<!DOCTYPE html><html></html>", { status: 200 }),
|
|
95
|
+
);
|
|
96
|
+
|
|
97
|
+
await expect(webSearch("test")).rejects.toThrow(
|
|
98
|
+
"Jina Search returned empty or HTML response",
|
|
99
|
+
);
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
it("respects num parameter and clamps to 1-20", async () => {
|
|
103
|
+
vi.mocked(fetch).mockResolvedValueOnce(
|
|
104
|
+
new Response("# Results", {
|
|
105
|
+
status: 200,
|
|
106
|
+
headers: new Headers({ "content-type": "text/markdown" }),
|
|
107
|
+
}),
|
|
108
|
+
);
|
|
109
|
+
|
|
110
|
+
await webSearch("test", { num: 50 });
|
|
111
|
+
expect(vi.mocked(fetch).mock.calls[0][0]).toContain("num=20");
|
|
112
|
+
|
|
113
|
+
vi.mocked(fetch).mockResolvedValueOnce(
|
|
114
|
+
new Response("# Results", {
|
|
115
|
+
status: 200,
|
|
116
|
+
headers: new Headers({ "content-type": "text/markdown" }),
|
|
117
|
+
}),
|
|
118
|
+
);
|
|
119
|
+
await webSearch("test", { num: 0 });
|
|
120
|
+
expect(vi.mocked(fetch).mock.calls[1][0]).toContain("num=1");
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
it("passes abort signal to fetch", async () => {
|
|
124
|
+
const controller = new AbortController();
|
|
125
|
+
vi.mocked(fetch).mockResolvedValueOnce(
|
|
126
|
+
new Response("# Results", {
|
|
127
|
+
status: 200,
|
|
128
|
+
headers: new Headers({ "content-type": "text/markdown" }),
|
|
129
|
+
}),
|
|
130
|
+
);
|
|
131
|
+
|
|
132
|
+
await webSearch("test", { signal: controller.signal });
|
|
133
|
+
|
|
134
|
+
expect(vi.mocked(fetch).mock.calls[0][1]).toMatchObject({
|
|
135
|
+
signal: controller.signal,
|
|
136
|
+
});
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
it("includes metadata when info flag is set", async () => {
|
|
140
|
+
vi.mocked(fetch).mockResolvedValueOnce(
|
|
141
|
+
new Response("# Results", {
|
|
142
|
+
status: 200,
|
|
143
|
+
headers: new Headers({
|
|
144
|
+
"content-type": "text/markdown",
|
|
145
|
+
"x-markdown-tokens": "15000",
|
|
146
|
+
}),
|
|
147
|
+
}),
|
|
148
|
+
);
|
|
149
|
+
|
|
150
|
+
const result = await webSearch("test", { info: true });
|
|
151
|
+
expect(result.details.tokens).toBe("15000");
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
it("passes custom retainImages and withLinksSummary", async () => {
|
|
155
|
+
vi.mocked(fetch).mockResolvedValueOnce(
|
|
156
|
+
new Response("# Results", {
|
|
157
|
+
status: 200,
|
|
158
|
+
headers: new Headers({ "content-type": "text/markdown" }),
|
|
159
|
+
}),
|
|
160
|
+
);
|
|
161
|
+
|
|
162
|
+
const result = await webSearch("test", {
|
|
163
|
+
retainImages: "all",
|
|
164
|
+
withLinksSummary: true,
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
expect(result.details.retainImages).toBe("all");
|
|
168
|
+
expect(result.details.withLinksSummary).toBe(true);
|
|
169
|
+
|
|
170
|
+
const call = vi.mocked(fetch).mock.calls[0][0] as string;
|
|
171
|
+
expect(call).toContain("retainImages=all");
|
|
172
|
+
expect(call).toContain("withLinksSummary=true");
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
it("maps alt_text to alt_p for Jina API", async () => {
|
|
176
|
+
vi.mocked(fetch).mockResolvedValueOnce(
|
|
177
|
+
new Response("# Results", {
|
|
178
|
+
status: 200,
|
|
179
|
+
headers: new Headers({ "content-type": "text/markdown" }),
|
|
180
|
+
}),
|
|
181
|
+
);
|
|
182
|
+
|
|
183
|
+
const result = await webSearch("test", {
|
|
184
|
+
retainImages: "alt_text",
|
|
185
|
+
});
|
|
186
|
+
|
|
187
|
+
expect(result.details.retainImages).toBe("alt_text");
|
|
188
|
+
|
|
189
|
+
const call = vi.mocked(fetch).mock.calls[0][0] as string;
|
|
190
|
+
expect(call).toContain("retainImages=alt_p");
|
|
191
|
+
});
|
|
192
|
+
|
|
193
|
+
it("handles truncated output", async () => {
|
|
194
|
+
vi.mocked(fetch).mockResolvedValueOnce(
|
|
195
|
+
new Response("# Very long results", {
|
|
196
|
+
status: 200,
|
|
197
|
+
headers: new Headers({ "content-type": "text/markdown" }),
|
|
198
|
+
}),
|
|
199
|
+
);
|
|
200
|
+
|
|
201
|
+
vi.mocked(truncateHead).mockReturnValueOnce({
|
|
202
|
+
content: "# Very",
|
|
203
|
+
truncated: true,
|
|
204
|
+
truncatedBy: "bytes",
|
|
205
|
+
firstLineExceedsLimit: false,
|
|
206
|
+
lastLinePartial: false,
|
|
207
|
+
maxLines: 2000,
|
|
208
|
+
maxBytes: 50000,
|
|
209
|
+
outputLines: 1,
|
|
210
|
+
totalLines: 100,
|
|
211
|
+
outputBytes: 6,
|
|
212
|
+
totalBytes: 10000,
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
const result = await webSearch("test");
|
|
216
|
+
|
|
217
|
+
expect(result.text).toContain("[Output truncated:");
|
|
218
|
+
expect(result.text).toContain("Full output saved to:");
|
|
219
|
+
expect(result.details.fullOutputPath).toMatch(/\.md$/);
|
|
220
|
+
});
|
|
221
|
+
});
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import {
|
|
2
|
+
truncateHead,
|
|
3
|
+
formatSize,
|
|
4
|
+
DEFAULT_MAX_BYTES,
|
|
5
|
+
DEFAULT_MAX_LINES,
|
|
6
|
+
} from "@earendil-works/pi-coding-agent";
|
|
7
|
+
import { tmpdir } from "node:os";
|
|
8
|
+
import { writeFile } from "node:fs/promises";
|
|
9
|
+
import { join } from "node:path";
|
|
10
|
+
import { resolveJinaApiKey } from "./config.js";
|
|
11
|
+
|
|
12
|
+
export interface SearchOptions {
|
|
13
|
+
num?: number;
|
|
14
|
+
info?: boolean;
|
|
15
|
+
signal?: AbortSignal;
|
|
16
|
+
retainImages?: "none" | "all" | "alt_text";
|
|
17
|
+
withLinksSummary?: boolean;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export interface SearchResult {
|
|
21
|
+
text: string;
|
|
22
|
+
details: Record<string, unknown>;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export async function webSearch(
|
|
26
|
+
query: string,
|
|
27
|
+
options: SearchOptions = {},
|
|
28
|
+
): Promise<SearchResult> {
|
|
29
|
+
const {
|
|
30
|
+
num = 5,
|
|
31
|
+
info,
|
|
32
|
+
signal,
|
|
33
|
+
retainImages = "none",
|
|
34
|
+
withLinksSummary = false,
|
|
35
|
+
} = options;
|
|
36
|
+
|
|
37
|
+
const apiKey = resolveJinaApiKey();
|
|
38
|
+
if (!apiKey) {
|
|
39
|
+
throw new Error(
|
|
40
|
+
"Jina Search requires an API key. Run /md-web-surfer-config to set one, or set the JINA_API_KEY environment variable.",
|
|
41
|
+
);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
const count = Math.min(Math.max(num, 1), 20);
|
|
45
|
+
const jinaRetain = retainImages === "alt_text" ? "alt_p" : retainImages;
|
|
46
|
+
|
|
47
|
+
const params = new URLSearchParams({
|
|
48
|
+
num: String(count),
|
|
49
|
+
respondWith: "markdown",
|
|
50
|
+
retainImages: jinaRetain,
|
|
51
|
+
withLinksSummary: String(withLinksSummary),
|
|
52
|
+
});
|
|
53
|
+
const url = `https://s.jina.ai/${encodeURIComponent(query)}?${params.toString()}`;
|
|
54
|
+
|
|
55
|
+
const response = await fetch(url, {
|
|
56
|
+
signal,
|
|
57
|
+
headers: {
|
|
58
|
+
Accept: "text/markdown",
|
|
59
|
+
Authorization: `Bearer ${apiKey}`,
|
|
60
|
+
"User-Agent": "Mozilla/5.0 (compatible; pi-agent/1.0)",
|
|
61
|
+
},
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
if (!response.ok) {
|
|
65
|
+
throw new Error(
|
|
66
|
+
`Jina Search failed: ${response.status} ${response.statusText}`,
|
|
67
|
+
);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
let body = await response.text();
|
|
71
|
+
if (!body || body.slice(0, 300).match(/<!DOCTYPE|<html/i)) {
|
|
72
|
+
throw new Error("Jina Search returned empty or HTML response");
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
const truncated = truncateHead(body, {
|
|
76
|
+
maxLines: DEFAULT_MAX_LINES,
|
|
77
|
+
maxBytes: DEFAULT_MAX_BYTES,
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
let text = truncated.content;
|
|
81
|
+
const details: Record<string, unknown> = {
|
|
82
|
+
method: "jina-search",
|
|
83
|
+
query,
|
|
84
|
+
num: count,
|
|
85
|
+
retainImages,
|
|
86
|
+
withLinksSummary,
|
|
87
|
+
};
|
|
88
|
+
|
|
89
|
+
if (info) {
|
|
90
|
+
details.tokens = response.headers.get("x-markdown-tokens") ?? "unknown";
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
if (truncated.truncated) {
|
|
94
|
+
const tmp = join(tmpdir(), `fetch-search-${Date.now()}.md`);
|
|
95
|
+
await writeFile(tmp, body, "utf8");
|
|
96
|
+
text += `\n\n[Output truncated: ${truncated.outputLines} of ${truncated.totalLines} lines`;
|
|
97
|
+
text += ` (${formatSize(truncated.outputBytes)} of ${formatSize(truncated.totalBytes)}).`;
|
|
98
|
+
text += ` Full output saved to: ${tmp}]`;
|
|
99
|
+
details.fullOutputPath = tmp;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
return { text, details };
|
|
103
|
+
}
|