@ibeex/pi-fetch 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 ibeex
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,132 @@
1
+ # @ibeex/pi-fetch
2
+
3
+ Installable pi package that adds a `/fetch` slash command.
4
+
5
+ Published package name: `@ibeex/pi-fetch`
6
+
7
+ Repository: https://github.com/ibeex/pi-fetch
8
+
9
+ `/fetch` retrieves web content and injects it into pi's session context so the next prompt can use it.
10
+
11
+ ## Features
12
+
13
+ - `/fetch <url>` only
14
+ - uses `jina` for normal public URLs and `raw` for localhost, private-network, and other likely local URLs
15
+ - retries Jina once with auth on 401/429/auth-required style responses
16
+ - loads Jina API key from `JINA_API_KEY`, `JINA_API_TOKEN`, or `pass api/jina`
17
+ - supports site-specific cleanup before truncation
18
+ - currently cleans Hacker News item pages so the model sees story metadata + comment text instead of repetitive UI noise
19
+ - shows a live footer status while fetching
20
+ - shows a visible fetch result row in chat; expand it with `Ctrl+O` to inspect what was captured
21
+ - truncates fetched content before injecting it into context
22
+ - when truncation happens, only the first `PI_FETCH_MAX_CONTEXT_CHARS` chars are added to context and the full fetched response is saved to a temp file
23
+ - default fetch context limit is `28000` chars, roughly `~7k` tokens for normal English text
24
+ - stores fetched content as a hidden custom session message, so it is available to the model without cluttering chat
25
+
26
+ ## Install
27
+
28
+ Because this repo now has a `package.json` with a `pi` manifest, you can install it as a pi package.
29
+
30
+ ### Install from npm
31
+
32
+ ```bash
33
+ pi install npm:@ibeex/pi-fetch
34
+ ```
35
+
36
+ ### Clone the repository
37
+
38
+ ```bash
39
+ git clone git@github.com:ibeex/pi-fetch.git
40
+ cd pi-fetch
41
+ ```
42
+
43
+ ### Install globally
44
+
45
+ ```bash
46
+ pi install /absolute/path/to/pi-fetch
47
+ ```
48
+
49
+ ### Install into another project
50
+
51
+ From the target project directory:
52
+
53
+ ```bash
54
+ pi install /absolute/path/to/pi-fetch -l
55
+ ```
56
+
57
+ Then start pi in that project, or run:
58
+
59
+ ```text
60
+ /reload
61
+ ```
62
+
63
+ ## Quick test without installing
64
+
65
+ From outside this repo:
66
+
67
+ ```bash
68
+ pi -e /absolute/path/to/pi-fetch
69
+ ```
70
+
71
+ If you test from inside this repo and already have another copy of `@ibeex/pi-fetch` auto-loaded, disable extension auto-discovery to avoid duplicate `/fetch` commands:
72
+
73
+ ```bash
74
+ pi --no-extensions -e .
75
+ ```
76
+
77
+ Or load the raw extension file directly:
78
+
79
+ ```bash
80
+ pi --no-extensions -e ./index.ts
81
+ ```
82
+
83
+ ## Usage
84
+
85
+ ```text
86
+ /fetch https://example.com/docs
87
+ /fetch http://localhost:3000/health
88
+ ```
89
+
90
+ After `/fetch`, ask a normal question in the next prompt, for example:
91
+
92
+ ```text
93
+ /fetch https://example.com/docs
94
+ What are the main points from that page?
95
+ ```
96
+
97
+ In interactive mode, `/fetch` also shows:
98
+
99
+ - a temporary footer spinner while the request is running
100
+ - a visible result row after completion
101
+ - a short collapsed preview of the captured content
102
+ - expandable details with `Ctrl+O`, including the exact content that was injected into context
103
+
104
+ ## Jina auth
105
+
106
+ Lookup order:
107
+
108
+ 1. `JINA_API_KEY`
109
+ 2. `JINA_API_TOKEN`
110
+ 3. `pass api/jina`
111
+
112
+ Example:
113
+
114
+ ```bash
115
+ pass insert api/jina
116
+ ```
117
+
118
+ ## Optional env vars
119
+
120
+ - `PI_FETCH_TIMEOUT_MS` default `30000`
121
+ - `PI_FETCH_MAX_CONTEXT_CHARS` default `28000` (roughly `~7k` tokens)
122
+ - `PI_FETCH_JINA_PASS_PATH` default `api/jina`
123
+
124
+ ## Notes
125
+
126
+ - This is intentionally slash-command based, not an LLM tool.
127
+ - `/fetch` adds context for the next prompt; it does not automatically ask the model a follow-up question.
128
+ - `PI_FETCH_MAX_CONTEXT_CHARS` only affects fetched-page context from this extension.
129
+ - `truncated` means the fetched response was larger than `PI_FETCH_MAX_CONTEXT_CHARS`, so only the first chunk was injected into model context.
130
+ - If truncated, the visible result row shows how many chars were kept vs omitted, and expanded details show the temp file path for the full response.
131
+ - The visible fetch result row is filtered out of model context; only the hidden fetched-content message is sent to the model.
132
+ - Jina reader is used only for public web URLs. Localhost, private-network, and other likely local URLs use raw fetch automatically.
@@ -0,0 +1,264 @@
1
+ import type { ContentCleaner } from "./index.ts";
2
+
3
+ const HN_URL_PATTERN = String.raw`https?:\/\/news\.ycombinator\.com`;
4
+ const HN_VOTE_LINK_RE = new RegExp(String.raw`${HN_URL_PATTERN}\/vote\?id=`);
5
+ const HN_ITEM_LINK_RE = new RegExp(String.raw`\[(?<count>\d+\s+comments?)\]\(${HN_URL_PATTERN}\/item\?id=\d+[^)]*\)`);
6
+ const HN_COMMENT_HEADER_RE = new RegExp(
7
+ String.raw`^\[(?<user>[^\]]+)\]\(${HN_URL_PATTERN}\/user\?id=[^)]+\)\[(?<age>[^\]]+)\]\(${HN_URL_PATTERN}\/item\?id=\d+[^)]*\)(?<tail>[\s\S]*)$`,
8
+ );
9
+ const HN_SEGMENT_SPLIT_RE = new RegExp(
10
+ String.raw`(?:!?\[Image \d+\]\(${HN_URL_PATTERN}\/s\.gif\))?\[\]\(${HN_URL_PATTERN}\/vote\?id=`,
11
+ );
12
+ const HN_PLAIN_STATS_RE =
13
+ /^(?<points>\d+\s+points?)\s+by\s+(?<submitter>\S+)\s+(?<age>.+?)\s+\|\s+.*?(?<comments>\d+\s+comments?)\s*$/;
14
+ const HN_PLAIN_COMMENT_HEADER_RE = /^(?<user>\S+)\s+(?<age>.+?\bago)\s+\|\s+.+\[[^\]]+\]$/;
15
+
16
+ function simplifyMarkdownText(value: string): string {
17
+ let cleaned = value;
18
+ cleaned = cleaned.replace(/!?\[Image \d+\]\([^)]*\)/g, "");
19
+ cleaned = cleaned.replace(/\[\[[^\]]*\]\]\(javascript:void\(0\)\)/g, "");
20
+ cleaned = cleaned.replace(/\[\]\([^)]*\)/g, "");
21
+
22
+ for (const label of ["reply", "parent", "next", "prev", "root", "hide", "favorite", "past", "help", "login"]) {
23
+ cleaned = cleaned.replace(new RegExp(`\\[${label}\\]\\([^)]+\\)`, "gi"), "");
24
+ }
25
+
26
+ cleaned = cleaned.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (_match, label: string) => {
27
+ const text = label.trim();
28
+ if (!text) {
29
+ return "";
30
+ }
31
+ if (/^https?:\/\//i.test(text)) {
32
+ return text;
33
+ }
34
+ return text;
35
+ });
36
+
37
+ cleaned = cleaned.replace(/\s+\|\s+/g, " • ");
38
+ cleaned = cleaned.replace(/(?:\s*•\s*){2,}/g, " • ");
39
+ cleaned = cleaned.replace(/[ \t]{2,}/g, " ");
40
+ cleaned = cleaned.replace(/\n{3,}/g, "\n\n");
41
+ return cleaned.trim();
42
+ }
43
+
44
+ function formatStoryHeader(segment: string): string[] {
45
+ let rest = segment;
46
+ const lines = ["Hacker News discussion"];
47
+
48
+ const titleMatch = rest.match(/^\[(?<title>[^\]]+)\]\((?<link>[^)]+)\)(?<tail>[\s\S]*)$/);
49
+ if (titleMatch?.groups) {
50
+ lines.push("", `Title: ${titleMatch.groups.title}`, `Article: ${titleMatch.groups.link}`);
51
+ rest = titleMatch.groups.tail;
52
+ }
53
+
54
+ const siteMatch = rest.match(/^\s*\(\[(?<site>[^\]]+)\]\([^)]+\)\)(?<tail>[\s\S]*)$/);
55
+ if (siteMatch?.groups) {
56
+ lines.push(`Site: ${siteMatch.groups.site}`);
57
+ rest = siteMatch.groups.tail;
58
+ }
59
+
60
+ const statsMatch = rest.match(
61
+ /(?<points>\d+\s+points?)\s+by\s+\[(?<submitter>[^\]]+)\]\([^)]+\)\[(?<age>[^\]]+)\]\([^)]+\)/,
62
+ );
63
+ if (statsMatch?.groups) {
64
+ lines.push(`Points: ${statsMatch.groups.points}`);
65
+ lines.push(`Submitter: ${statsMatch.groups.submitter}`);
66
+ lines.push(`Posted: ${statsMatch.groups.age}`);
67
+ }
68
+
69
+ const commentsMatch = rest.match(HN_ITEM_LINK_RE);
70
+ if (commentsMatch?.groups) {
71
+ lines.push(`Comments: ${commentsMatch.groups.count}`);
72
+ }
73
+
74
+ let extraLinks = simplifyMarkdownText(rest);
75
+ extraLinks = extraLinks.replace(/^.*?\d+\s+points?\s+by\s+.+?comments/i, "").trim();
76
+ extraLinks = extraLinks.replace(/^•\s*/, "").trim();
77
+ if (extraLinks) {
78
+ lines.push("", `Links: ${extraLinks}`);
79
+ }
80
+
81
+ return lines;
82
+ }
83
+
84
+ function extractPageTitle(lines: string[]): string | undefined {
85
+ const titleLine = lines.find((line) => line.startsWith("Title: "));
86
+ if (!titleLine) {
87
+ return undefined;
88
+ }
89
+ const title = titleLine.slice("Title: ".length).replace(/\s+\|\s+Hacker News$/i, "").trim();
90
+ return title || undefined;
91
+ }
92
+
93
+ function looksLikeStoryHeader(segment: string): boolean {
94
+ return /^\[[^\]]+\]\([^)]+\)/.test(segment) || /\d+\s+points?\s+by\s+\[/.test(segment) || HN_ITEM_LINK_RE.test(segment);
95
+ }
96
+
97
+ function formatComment(segment: string, index: number): string | undefined {
98
+ const commentMatch = segment.match(HN_COMMENT_HEADER_RE);
99
+ if (!commentMatch?.groups) {
100
+ const fallback = simplifyMarkdownText(segment);
101
+ return fallback ? `${index}. ${fallback}` : undefined;
102
+ }
103
+
104
+ let body = commentMatch.groups.tail;
105
+ body = body.replace(/^(?:\s*\|\s*\[(?:parent|next|prev|root)\]\([^)]+\))+/gi, "");
106
+ body = body.replace(/^\s*\[\[[^\]]*\]\]\(javascript:void\(0\)\)\s*/i, "");
107
+ body = simplifyMarkdownText(body).replace(/^•\s*/, "").trim();
108
+ if (!body) {
109
+ return undefined;
110
+ }
111
+
112
+ return `${index}. ${commentMatch.groups.user} — ${commentMatch.groups.age}\n${body}`;
113
+ }
114
+
115
+ function normalizePlainLine(line: string): string {
116
+ return line.replace(/\u00a0/g, " ").trim();
117
+ }
118
+
119
+ function collapsePlainTextParagraphs(lines: string[]): string {
120
+ const paragraphs: string[] = [];
121
+ let current: string[] = [];
122
+
123
+ for (const rawLine of lines) {
124
+ const line = normalizePlainLine(rawLine);
125
+ if (!line) {
126
+ if (current.length > 0) {
127
+ paragraphs.push(current.join(" "));
128
+ current = [];
129
+ }
130
+ continue;
131
+ }
132
+ current.push(line);
133
+ }
134
+
135
+ if (current.length > 0) {
136
+ paragraphs.push(current.join(" "));
137
+ }
138
+
139
+ return paragraphs.join("\n\n").trim();
140
+ }
141
+
142
+ function cleanPlainTextHackerNewsItemContent(lines: string[]): string | undefined {
143
+ const normalizedLines = lines.map(normalizePlainLine);
144
+ const statsIndex = normalizedLines.findIndex((line) => HN_PLAIN_STATS_RE.test(line));
145
+ if (statsIndex < 0) {
146
+ return undefined;
147
+ }
148
+
149
+ const statsMatch = normalizedLines[statsIndex].match(HN_PLAIN_STATS_RE);
150
+ if (!statsMatch?.groups) {
151
+ return undefined;
152
+ }
153
+
154
+ let titleLine = "";
155
+ for (let index = statsIndex - 1; index >= 0; index -= 1) {
156
+ const candidate = normalizedLines[index];
157
+ if (!candidate || /^#/.test(candidate) || /^Hacker News/i.test(candidate) || candidate === "help") {
158
+ continue;
159
+ }
160
+ titleLine = candidate;
161
+ break;
162
+ }
163
+
164
+ const output = ["Hacker News discussion"];
165
+ if (titleLine) {
166
+ const titleMatch = titleLine.match(/^(?<title>.+?)\s+\((?<site>[^()]+\.[^()]+)\)$/);
167
+ if (titleMatch?.groups) {
168
+ output.push("", `Title: ${titleMatch.groups.title}`, `Site: ${titleMatch.groups.site}`);
169
+ } else {
170
+ output.push("", `Title: ${titleLine}`);
171
+ }
172
+ }
173
+
174
+ output.push(`Points: ${statsMatch.groups.points}`);
175
+ output.push(`Submitter: ${statsMatch.groups.submitter}`);
176
+ output.push(`Posted: ${statsMatch.groups.age}`);
177
+ output.push(`Comments: ${statsMatch.groups.comments}`);
178
+
179
+ const comments: string[] = [];
180
+ for (let index = statsIndex + 1; index < normalizedLines.length; index += 1) {
181
+ const line = normalizedLines[index];
182
+ const commentMatch = line.match(HN_PLAIN_COMMENT_HEADER_RE);
183
+ if (!commentMatch?.groups) {
184
+ continue;
185
+ }
186
+
187
+ const bodyLines: string[] = [];
188
+ for (index += 1; index < normalizedLines.length; index += 1) {
189
+ const currentLine = normalizedLines[index];
190
+ if (!currentLine) {
191
+ bodyLines.push("");
192
+ continue;
193
+ }
194
+ if (currentLine === "reply") {
195
+ break;
196
+ }
197
+ if (/^Guidelines\b/i.test(currentLine)) {
198
+ index = normalizedLines.length;
199
+ break;
200
+ }
201
+ if (HN_PLAIN_COMMENT_HEADER_RE.test(currentLine)) {
202
+ index -= 1;
203
+ break;
204
+ }
205
+ bodyLines.push(currentLine);
206
+ }
207
+
208
+ const body = collapsePlainTextParagraphs(bodyLines);
209
+ if (!body) {
210
+ continue;
211
+ }
212
+
213
+ comments.push(`${comments.length + 1}. ${commentMatch.groups.user} — ${commentMatch.groups.age}\n${body}`);
214
+ }
215
+
216
+ if (comments.length > 0) {
217
+ output.push("", "Comments:", "", comments.join("\n\n"));
218
+ }
219
+
220
+ return output.join("\n").trim() || undefined;
221
+ }
222
+
223
+ function cleanHackerNewsItemContent(body: string): string {
224
+ const lines = body.split(/\r?\n/).map((line) => line.trimEnd());
225
+ const pageTitle = extractPageTitle(lines);
226
+ const markdownIndex = lines.findIndex((line) => line.trim() === "Markdown Content:");
227
+ const markdownLines = markdownIndex >= 0 ? lines.slice(markdownIndex + 1) : lines;
228
+ const discussionLine = markdownLines.find((line) => HN_VOTE_LINK_RE.test(line));
229
+ if (discussionLine) {
230
+ const segments = discussionLine
231
+ .split(HN_SEGMENT_SPLIT_RE)
232
+ .map((segment) => segment.replace(/^\d+&how=up&goto=[^)]*\)/, "").trim())
233
+ .filter(Boolean);
234
+ if (segments.length > 0) {
235
+ const hasStoryHeader = looksLikeStoryHeader(segments[0]);
236
+ const output = hasStoryHeader ? formatStoryHeader(segments[0]) : ["Hacker News discussion"];
237
+ if (!hasStoryHeader && pageTitle) {
238
+ output.push("", `Title: ${pageTitle}`);
239
+ }
240
+ const commentSegments = hasStoryHeader ? segments.slice(1) : segments;
241
+ const comments = commentSegments
242
+ .map((segment, index) => formatComment(segment, index + 1))
243
+ .filter((comment): comment is string => Boolean(comment));
244
+
245
+ if (comments.length > 0) {
246
+ output.push("", "Comments:", "", comments.join("\n\n"));
247
+ }
248
+
249
+ return output.join("\n").trim() || body;
250
+ }
251
+ }
252
+
253
+ return cleanPlainTextHackerNewsItemContent(markdownLines) || body;
254
+ }
255
+
256
+ export const hackerNewsCleaner: ContentCleaner = {
257
+ id: "hacker-news-item",
258
+ matches(url) {
259
+ return url.hostname === "news.ycombinator.com" && url.pathname === "/item";
260
+ },
261
+ clean(body) {
262
+ return cleanHackerNewsItemContent(body);
263
+ },
264
+ };
@@ -0,0 +1,19 @@
1
+ import { hackerNewsCleaner } from "./hacker-news.ts";
2
+
3
+ export type ContentCleaner = {
4
+ id: string;
5
+ matches(url: URL): boolean;
6
+ clean(body: string): string;
7
+ };
8
+
9
+ const CLEANERS: ContentCleaner[] = [hackerNewsCleaner];
10
+
11
+ export function cleanFetchedBody(url: string, body: string): string {
12
+ try {
13
+ const parsedUrl = new URL(url);
14
+ const cleaner = CLEANERS.find((candidate) => candidate.matches(parsedUrl));
15
+ return cleaner ? cleaner.clean(body) : body;
16
+ } catch {
17
+ return body;
18
+ }
19
+ }
package/index.ts ADDED
@@ -0,0 +1,656 @@
1
+ import type { ExtensionAPI, ExtensionCommandContext } from "@mariozechner/pi-coding-agent";
2
+ import { keyHint } from "@mariozechner/pi-coding-agent";
3
+ import { Text } from "@mariozechner/pi-tui";
4
+ import { execFile } from "node:child_process";
5
+ import { mkdtemp, writeFile } from "node:fs/promises";
6
+ import { isIP } from "node:net";
7
+ import { tmpdir } from "node:os";
8
+ import { join } from "node:path";
9
+ import { promisify } from "node:util";
10
+ import { cleanFetchedBody } from "./cleaners/index.ts";
11
+
12
+ const execFileAsync = promisify(execFile);
13
+
14
+ const JINA_READER_URL = "https://r.jina.ai/";
15
+ const DEFAULT_USER_AGENT = "pi-fetch/0.1";
16
+ const DEFAULT_TIMEOUT_MS = parsePositiveInt(process.env.PI_FETCH_TIMEOUT_MS, 30_000);
17
+ const DEFAULT_MAX_CONTEXT_CHARS = parsePositiveInt(process.env.PI_FETCH_MAX_CONTEXT_CHARS, 28_000);
18
+ const DEFAULT_PASS_PATH = process.env.PI_FETCH_JINA_PASS_PATH?.trim() || "api/jina";
19
+ const CONTENT_TRUNCATED_MARKER = "[Content truncated for context]";
20
+ const FETCH_CONTEXT_CUSTOM_TYPE = "pi-fetch-context";
21
+ const FETCH_RESULT_CUSTOM_TYPE = "pi-fetch-result";
22
+ const FETCH_STATUS_KEY = "pi-fetch";
23
+ const FETCH_STATUS_FRAMES = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"];
24
+ const FETCH_STATUS_TICK_MS = 80;
25
+ type FetchMode = "auto" | "raw" | "jina";
26
+ type FetchDelivery = "nextPrompt" | "afterCurrentRun";
27
+
28
+ type FetchResult = {
29
+ url: string;
30
+ mode: Exclude<FetchMode, "auto">;
31
+ status: number;
32
+ contentType: string;
33
+ content: string;
34
+ truncated: boolean;
35
+ rawLength: number;
36
+ contextLength: number;
37
+ omittedLength: number;
38
+ fullOutputPath?: string;
39
+ };
40
+
41
+ type FetchDisplayDetails =
42
+ | {
43
+ ok: true;
44
+ delivery: FetchDelivery;
45
+ url: string;
46
+ mode: Exclude<FetchMode, "auto">;
47
+ status: number;
48
+ contentType: string;
49
+ truncated: boolean;
50
+ rawLength: number;
51
+ contextLength: number;
52
+ omittedLength: number;
53
+ contextContent: string;
54
+ fullOutputPath?: string;
55
+ }
56
+ | {
57
+ ok: false;
58
+ error: string;
59
+ url?: string;
60
+ mode?: Exclude<FetchMode, "auto">;
61
+ };
62
+
63
+ class HttpStatusError extends Error {
64
+ status: number;
65
+ detail: string;
66
+
67
+ constructor(status: number, statusText: string, detail: string) {
68
+ super(detail ? `HTTP ${status} ${statusText}: ${detail}` : `HTTP ${status} ${statusText}`);
69
+ this.name = "HttpStatusError";
70
+ this.status = status;
71
+ this.detail = detail;
72
+ }
73
+ }
74
+
75
+ function parsePositiveInt(raw: string | undefined, fallback: number): number {
76
+ const value = Number.parseInt(raw ?? "", 10);
77
+ if (!Number.isFinite(value) || value <= 0) {
78
+ return fallback;
79
+ }
80
+ return value;
81
+ }
82
+
83
+ function formatCount(value: number): string {
84
+ return new Intl.NumberFormat("en-US").format(value);
85
+ }
86
+
87
+ function shortenUrl(value: string, maxLength = 96): string {
88
+ if (value.length <= maxLength) {
89
+ return value;
90
+ }
91
+ const separator = "...";
92
+ const available = maxLength - separator.length;
93
+ const head = Math.ceil(available / 2);
94
+ const tail = Math.floor(available / 2);
95
+ return `${value.slice(0, head)}${separator}${value.slice(-tail)}`;
96
+ }
97
+
98
+ function truncateForContext(
99
+ text: string,
100
+ limit: number,
101
+ ): { content: string; truncated: boolean; contextLength: number; omittedLength: number } {
102
+ const normalized = text.trim() || "(empty response body)";
103
+ if (normalized.length <= limit) {
104
+ return { content: normalized, truncated: false, contextLength: normalized.length, omittedLength: 0 };
105
+ }
106
+ const content = normalized.slice(0, limit).trimEnd();
107
+ return {
108
+ content: `${content}\n${CONTENT_TRUNCATED_MARKER}`,
109
+ truncated: true,
110
+ contextLength: content.length,
111
+ omittedLength: normalized.length - content.length,
112
+ };
113
+ }
114
+
115
+ function preprocessFetchedBody(url: string, body: string): string {
116
+ return cleanFetchedBody(url, body);
117
+ }
118
+
119
+ function stripWrappingQuotes(value: string): string {
120
+ const trimmed = value.trim();
121
+ if (trimmed.length >= 2) {
122
+ const first = trimmed[0];
123
+ const last = trimmed[trimmed.length - 1];
124
+ if ((first === '"' && last === '"') || (first === "'" && last === "'")) {
125
+ return trimmed.slice(1, -1).trim();
126
+ }
127
+ }
128
+ return trimmed;
129
+ }
130
+
131
+ function validateUrl(value: string): string {
132
+ const candidate = stripWrappingQuotes(value);
133
+ if (!candidate) {
134
+ throw new Error("URL must not be empty.");
135
+ }
136
+ const parsed = new URL(candidate);
137
+ if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
138
+ throw new Error("URL must start with http:// or https://.");
139
+ }
140
+ if (!parsed.hostname) {
141
+ throw new Error("URL must include a hostname.");
142
+ }
143
+ return parsed.toString();
144
+ }
145
+
146
+ function normalizeHostname(hostname: string): string {
147
+ const trimmed = hostname.trim().toLowerCase();
148
+ const withoutBrackets =
149
+ trimmed.startsWith("[") && trimmed.endsWith("]") ? trimmed.slice(1, -1) : trimmed;
150
+ return withoutBrackets.endsWith(".") ? withoutBrackets.slice(0, -1) : withoutBrackets;
151
+ }
152
+
153
+ function isPrivateIpv4(hostname: string): boolean {
154
+ const parts = hostname.split(".").map((part) => Number.parseInt(part, 10));
155
+ if (parts.length !== 4 || parts.some((part) => !Number.isInteger(part) || part < 0 || part > 255)) {
156
+ return false;
157
+ }
158
+
159
+ const [a, b] = parts;
160
+ return (
161
+ a === 0 ||
162
+ a === 10 ||
163
+ a === 127 ||
164
+ (a === 169 && b === 254) ||
165
+ (a === 172 && b >= 16 && b <= 31) ||
166
+ (a === 192 && b === 168)
167
+ );
168
+ }
169
+
170
+ function isPrivateIpv6(hostname: string): boolean {
171
+ return (
172
+ hostname === "::" ||
173
+ hostname === "::1" ||
174
+ hostname.startsWith("fc") ||
175
+ hostname.startsWith("fd") ||
176
+ hostname.startsWith("fe80:")
177
+ );
178
+ }
179
+
180
+ function isLikelyPrivateHostname(hostname: string): boolean {
181
+ if (!hostname) {
182
+ return false;
183
+ }
184
+
185
+ if (hostname === "localhost" || hostname.endsWith(".localhost") || hostname.endsWith(".local")) {
186
+ return true;
187
+ }
188
+
189
+ const ipVersion = isIP(hostname);
190
+ if (ipVersion === 4) {
191
+ return isPrivateIpv4(hostname);
192
+ }
193
+ if (ipVersion === 6) {
194
+ return isPrivateIpv6(hostname);
195
+ }
196
+
197
+ return !hostname.includes(".");
198
+ }
199
+
200
+ function isLocalUrl(value: string): boolean {
201
+ const hostname = normalizeHostname(new URL(value).hostname);
202
+ return isLikelyPrivateHostname(hostname);
203
+ }
204
+
205
+ function resolveMode(url: string, mode: FetchMode): Exclude<FetchMode, "auto"> {
206
+ if (mode === "raw" || mode === "jina") {
207
+ return mode;
208
+ }
209
+ return isLocalUrl(url) ? "raw" : "jina";
210
+ }
211
+
212
+ function shouldRetryWithJinaApiKey(status: number, detail: string): boolean {
213
+ const normalized = detail.toLowerCase();
214
+ return (
215
+ status === 401 ||
216
+ status === 403 ||
217
+ status === 429 ||
218
+ normalized.includes("rate limit") ||
219
+ normalized.includes("too many requests") ||
220
+ normalized.includes("authentication is required") ||
221
+ normalized.includes("authenticationrequirederror") ||
222
+ normalized.includes("provide a valid api key") ||
223
+ normalized.includes("authorization header")
224
+ );
225
+ }
226
+
227
+ function describeDelivery(delivery: FetchDelivery): string {
228
+ return delivery === "nextPrompt"
229
+ ? "Available to the model in your next prompt."
230
+ : "Will be added after the current run finishes.";
231
+ }
232
+
233
+ function formatAddedNotice(result: FetchResult): string {
234
+ if (!result.truncated) {
235
+ return `Fetched ${result.url} via ${result.mode} and added ${formatCount(result.contextLength)} chars to context.`;
236
+ }
237
+ return `Fetched ${result.url} via ${result.mode}. Retrieved ${formatCount(result.rawLength)} chars; kept ${formatCount(result.contextLength)} chars in context and omitted ${formatCount(result.omittedLength)}.`;
238
+ }
239
+
240
+ function parseCommandArgs(args: string): string {
241
+ const trimmed = args.trim();
242
+ if (!trimmed) {
243
+ throw new Error("Usage: /fetch <url>");
244
+ }
245
+ return validateUrl(trimmed);
246
+ }
247
+
248
+ function buildContextMessage(result: FetchResult): string {
249
+ const lines = [
250
+ "Fetched web content explicitly requested by the user:",
251
+ `URL: ${result.url}`,
252
+ `Mode: ${result.mode}`,
253
+ `Status: ${result.status}`,
254
+ `Content-Type: ${result.contentType}`,
255
+ `Raw-Length: ${result.rawLength}`,
256
+ `Context-Length: ${result.contextLength}`,
257
+ `Omitted-Length: ${result.omittedLength}`,
258
+ ];
259
+ if (result.fullOutputPath) {
260
+ lines.push(`Full-Response-Path: ${result.fullOutputPath}`);
261
+ }
262
+ lines.push("", result.content);
263
+ return lines.join("\n");
264
+ }
265
+
266
+ function queueFetchedContext(
267
+ pi: ExtensionAPI,
268
+ ctx: ExtensionCommandContext,
269
+ result: FetchResult,
270
+ ): FetchDelivery {
271
+ const delivery: FetchDelivery = ctx.isIdle() ? "nextPrompt" : "afterCurrentRun";
272
+ const message = {
273
+ customType: FETCH_CONTEXT_CUSTOM_TYPE,
274
+ content: buildContextMessage(result),
275
+ display: false,
276
+ details: {
277
+ url: result.url,
278
+ mode: result.mode,
279
+ status: result.status,
280
+ contentType: result.contentType,
281
+ truncated: result.truncated,
282
+ rawLength: result.rawLength,
283
+ contextLength: result.contextLength,
284
+ omittedLength: result.omittedLength,
285
+ fullOutputPath: result.fullOutputPath,
286
+ },
287
+ };
288
+
289
+ if (ctx.isIdle()) {
290
+ pi.sendMessage(message);
291
+ return delivery;
292
+ }
293
+
294
+ pi.sendMessage(message, { deliverAs: "followUp" });
295
+ return delivery;
296
+ }
297
+
298
+ function sendFetchResultMessage(
299
+ pi: ExtensionAPI,
300
+ ctx: ExtensionCommandContext,
301
+ result: FetchResult,
302
+ delivery: FetchDelivery,
303
+ ): void {
304
+ const details: FetchDisplayDetails = {
305
+ ok: true,
306
+ delivery,
307
+ url: result.url,
308
+ mode: result.mode,
309
+ status: result.status,
310
+ contentType: result.contentType,
311
+ truncated: result.truncated,
312
+ rawLength: result.rawLength,
313
+ contextLength: result.contextLength,
314
+ omittedLength: result.omittedLength,
315
+ contextContent: result.content,
316
+ fullOutputPath: result.fullOutputPath,
317
+ };
318
+ const message = {
319
+ customType: FETCH_RESULT_CUSTOM_TYPE,
320
+ content: formatAddedNotice(result),
321
+ display: true,
322
+ details,
323
+ };
324
+
325
+ if (ctx.isIdle()) {
326
+ pi.sendMessage(message);
327
+ return;
328
+ }
329
+
330
+ pi.sendMessage(message, { deliverAs: "steer" });
331
+ }
332
+
333
+ function sendFetchFailureMessage(
334
+ pi: ExtensionAPI,
335
+ ctx: ExtensionCommandContext,
336
+ error: string,
337
+ url?: string,
338
+ mode?: Exclude<FetchMode, "auto">,
339
+ ): void {
340
+ const prefix = url ? `Fetch failed for ${url}` : "Fetch failed";
341
+ const details: FetchDisplayDetails = {
342
+ ok: false,
343
+ error,
344
+ url,
345
+ mode,
346
+ };
347
+ const message = {
348
+ customType: FETCH_RESULT_CUSTOM_TYPE,
349
+ content: `${prefix}: ${error}`,
350
+ display: true,
351
+ details,
352
+ };
353
+
354
+ if (ctx.isIdle()) {
355
+ pi.sendMessage(message);
356
+ return;
357
+ }
358
+
359
+ pi.sendMessage(message, { deliverAs: "steer" });
360
+ }
361
+
362
+ function startFetchStatus(
363
+ ctx: ExtensionCommandContext,
364
+ url: string,
365
+ mode: Exclude<FetchMode, "auto">,
366
+ ): () => void {
367
+ if (!ctx.hasUI) {
368
+ return () => {};
369
+ }
370
+
371
+ let frame = 0;
372
+ const render = () => {
373
+ const spinner = ctx.ui.theme.fg("accent", FETCH_STATUS_FRAMES[frame % FETCH_STATUS_FRAMES.length]!);
374
+ const label = ctx.ui.theme.fg("dim", ` Fetching ${shortenUrl(url, 72)} via ${mode}...`);
375
+ ctx.ui.setStatus(FETCH_STATUS_KEY, spinner + label);
376
+ frame++;
377
+ };
378
+
379
+ render();
380
+ const timer = setInterval(render, FETCH_STATUS_TICK_MS);
381
+
382
+ return () => {
383
+ clearInterval(timer);
384
+ ctx.ui.setStatus(FETCH_STATUS_KEY, undefined);
385
+ };
386
+ }
387
+
388
+ function buildPreview(content: string, maxLines = 3, maxCharsPerLine = 100): { text: string; clipped: boolean } {
389
+ const normalized = content.replace(`\n${CONTENT_TRUNCATED_MARKER}`, "").trim();
390
+ if (!normalized) {
391
+ return { text: "(empty response body)", clipped: false };
392
+ }
393
+
394
+ const sourceLines = normalized
395
+ .split(/\r?\n/)
396
+ .map((line) => line.trim())
397
+ .filter(Boolean);
398
+ const previewLines = sourceLines.slice(0, maxLines).map((line) => {
399
+ if (line.length <= maxCharsPerLine) {
400
+ return line;
401
+ }
402
+ return `${line.slice(0, maxCharsPerLine).trimEnd()}…`;
403
+ });
404
+ const clipped =
405
+ sourceLines.length > previewLines.length || previewLines.some((line, index) => line !== sourceLines[index]);
406
+ return { text: previewLines.join("\n"), clipped };
407
+ }
408
+
409
+ function renderFetchResult(message: { content: string; details?: unknown }, expanded: boolean, theme: any): Text {
410
+ const details = message.details as FetchDisplayDetails | undefined;
411
+ if (!details) {
412
+ return new Text(message.content, 0, 0);
413
+ }
414
+
415
+ if (!details.ok) {
416
+ let text = theme.fg("error", "✗ fetch failed");
417
+ if (details.url) {
418
+ text += "\n" + theme.fg("muted", shortenUrl(details.url));
419
+ }
420
+ if (details.mode) {
421
+ text += "\n" + theme.fg("dim", `Mode: ${details.mode}`);
422
+ }
423
+ text += "\n" + theme.fg("error", details.error);
424
+ return new Text(text, 0, 0);
425
+ }
426
+
427
+ let text = theme.fg("success", "✓ ") + theme.fg("accent", "fetch ") + theme.fg("muted", shortenUrl(details.url));
428
+ text += "\n" + theme.fg("dim", `via ${details.mode} • HTTP ${details.status} • ${details.contentType}`);
429
+
430
+ if (details.truncated) {
431
+ text +=
432
+ "\n" +
433
+ theme.fg(
434
+ "warning",
435
+ `${formatCount(details.contextLength)} of ${formatCount(details.rawLength)} chars were added to context (${formatCount(details.omittedLength)} omitted)`,
436
+ );
437
+ } else {
438
+ text += "\n" + theme.fg("success", `${formatCount(details.contextLength)} chars added to context`);
439
+ }
440
+
441
+ text += "\n" + theme.fg("muted", describeDelivery(details.delivery));
442
+
443
+ if (!expanded) {
444
+ const preview = buildPreview(details.contextContent);
445
+ text += "\n\n" + theme.fg("dim", preview.text);
446
+ if (preview.clipped || details.truncated) {
447
+ text += "\n" + theme.fg("dim", "…");
448
+ }
449
+ text += "\n" + theme.fg("muted", `(${keyHint("app.tools.expand", "to inspect captured content")})`);
450
+ return new Text(text, 0, 0);
451
+ }
452
+
453
+ if (details.truncated) {
454
+ text +=
455
+ "\n\n" +
456
+ theme.fg(
457
+ "warning",
458
+ `Only the first ${formatCount(details.contextLength)} chars below were injected into model context.`,
459
+ );
460
+ }
461
+
462
+ if (details.fullOutputPath) {
463
+ text += "\n" + theme.fg("muted", `Full response saved to: ${details.fullOutputPath}`);
464
+ }
465
+
466
+ text += "\n\n" + theme.fg("accent", theme.bold("Captured context content"));
467
+ text += `\n${details.contextContent}`;
468
+ return new Text(text, 0, 0);
469
+ }
470
+
471
+ async function getJinaApiKey(): Promise<string> {
472
+ const envKey = process.env.JINA_API_KEY?.trim() || process.env.JINA_API_TOKEN?.trim();
473
+ if (envKey) {
474
+ return envKey;
475
+ }
476
+
477
+ try {
478
+ const result = await execFileAsync("pass", [DEFAULT_PASS_PATH]);
479
+ const lines = result.stdout
480
+ .split(/\r?\n/)
481
+ .map((line) => line.trim())
482
+ .filter(Boolean);
483
+ if (lines.length === 0) {
484
+ throw new Error(`pass ${DEFAULT_PASS_PATH} returned no secret.`);
485
+ }
486
+ return lines[0];
487
+ } catch (error) {
488
+ const message = error instanceof Error ? error.message : String(error);
489
+ throw new Error(
490
+ `Jina API key not available. Set JINA_API_KEY or store it in pass at ${DEFAULT_PASS_PATH}. (${message})`,
491
+ );
492
+ }
493
+ }
494
+
495
+ async function persistFullOutput(text: string): Promise<string> {
496
+ const directory = await mkdtemp(join(tmpdir(), "pi-fetch-"));
497
+ const filePath = join(directory, "response.txt");
498
+ await writeFile(filePath, text, "utf8");
499
+ return filePath;
500
+ }
501
+
502
+ async function fetchText(url: string, init: RequestInit, timeoutMs: number): Promise<{ response: Response; body: string }> {
503
+ const controller = new AbortController();
504
+ const timer = setTimeout(() => controller.abort(), timeoutMs);
505
+ try {
506
+ const response = await fetch(url, {
507
+ ...init,
508
+ signal: controller.signal,
509
+ });
510
+ const body = (await response.text()).trim() || "(empty response body)";
511
+ return { response, body };
512
+ } catch (error) {
513
+ if (error instanceof Error && error.name === "AbortError") {
514
+ throw new Error(`Request timed out after ${timeoutMs}ms.`);
515
+ }
516
+ throw error;
517
+ } finally {
518
+ clearTimeout(timer);
519
+ }
520
+ }
521
+
522
+ async function runRawRequest(url: string, timeoutMs: number, maxContextChars: number): Promise<FetchResult> {
523
+ const { response, body } = await fetchText(
524
+ url,
525
+ {
526
+ headers: {
527
+ "user-agent": DEFAULT_USER_AGENT,
528
+ accept: "text/plain,text/html,application/json,application/xml;q=0.9,*/*;q=0.8",
529
+ "accept-language": "en-US,en;q=0.9",
530
+ },
531
+ },
532
+ timeoutMs,
533
+ );
534
+ if (!response.ok) {
535
+ throw new HttpStatusError(response.status, response.statusText, body);
536
+ }
537
+ const processedBody = preprocessFetchedBody(url, body);
538
+ const truncated = truncateForContext(processedBody, maxContextChars);
539
+ const fullOutputPath = truncated.truncated ? await persistFullOutput(processedBody) : undefined;
540
+ return {
541
+ url,
542
+ mode: "raw",
543
+ status: response.status,
544
+ contentType: response.headers.get("content-type") || "unknown",
545
+ content: truncated.content,
546
+ truncated: truncated.truncated,
547
+ rawLength: processedBody.length,
548
+ contextLength: truncated.contextLength,
549
+ omittedLength: truncated.omittedLength,
550
+ fullOutputPath,
551
+ };
552
+ }
553
+
554
+ async function runJinaRequest(
555
+ url: string,
556
+ timeoutMs: number,
557
+ maxContextChars: number,
558
+ apiKey?: string,
559
+ ): Promise<FetchResult> {
560
+ const headers: Record<string, string> = {
561
+ "user-agent": DEFAULT_USER_AGENT,
562
+ accept: "text/plain",
563
+ "accept-language": "en-US,en;q=0.9",
564
+ };
565
+ if (apiKey) {
566
+ headers.authorization = `Bearer ${apiKey}`;
567
+ }
568
+
569
+ const { response, body } = await fetchText(`${JINA_READER_URL}${url}`, { headers }, timeoutMs);
570
+ if (!response.ok) {
571
+ throw new HttpStatusError(response.status, response.statusText, body);
572
+ }
573
+
574
+ const processedBody = preprocessFetchedBody(url, body);
575
+ const truncated = truncateForContext(processedBody, maxContextChars);
576
+ const fullOutputPath = truncated.truncated ? await persistFullOutput(processedBody) : undefined;
577
+ return {
578
+ url,
579
+ mode: "jina",
580
+ status: response.status,
581
+ contentType: response.headers.get("content-type") || "text/plain",
582
+ content: truncated.content,
583
+ truncated: truncated.truncated,
584
+ rawLength: processedBody.length,
585
+ contextLength: truncated.contextLength,
586
+ omittedLength: truncated.omittedLength,
587
+ fullOutputPath,
588
+ };
589
+ }
590
+
591
+ async function fetchUrl(
592
+ url: string,
593
+ mode: FetchMode,
594
+ timeoutMs: number,
595
+ maxContextChars: number,
596
+ ): Promise<FetchResult> {
597
+ const resolvedMode = resolveMode(url, mode);
598
+ if (resolvedMode === "raw") {
599
+ return runRawRequest(url, timeoutMs, maxContextChars);
600
+ }
601
+
602
+ try {
603
+ return await runJinaRequest(url, timeoutMs, maxContextChars);
604
+ } catch (error) {
605
+ if (!(error instanceof HttpStatusError) || !shouldRetryWithJinaApiKey(error.status, error.detail)) {
606
+ throw error;
607
+ }
608
+ const apiKey = await getJinaApiKey();
609
+ return runJinaRequest(url, timeoutMs, maxContextChars, apiKey);
610
+ }
611
+ }
612
+
613
+ export default function (pi: ExtensionAPI) {
614
+ pi.on("context", async (event) => {
615
+ return {
616
+ messages: event.messages.filter(
617
+ (message) => (message as { customType?: string }).customType !== FETCH_RESULT_CUSTOM_TYPE,
618
+ ),
619
+ };
620
+ });
621
+
622
+ pi.registerMessageRenderer(FETCH_RESULT_CUSTOM_TYPE, (message, options, theme) => {
623
+ return renderFetchResult(message as { content: string; details?: unknown }, options.expanded, theme);
624
+ });
625
+
626
+ pi.registerCommand("fetch", {
627
+ description: "Fetch a URL and add the content to session context for the next prompt",
628
+ handler: async (args, ctx) => {
629
+ let url: string | undefined;
630
+ let mode: Exclude<FetchMode, "auto"> | undefined;
631
+ let stopStatus = () => {};
632
+
633
+ try {
634
+ url = parseCommandArgs(args);
635
+ mode = resolveMode(url, "auto");
636
+ stopStatus = startFetchStatus(ctx, url, mode);
637
+
638
+ const result = await fetchUrl(url, "auto", DEFAULT_TIMEOUT_MS, DEFAULT_MAX_CONTEXT_CHARS);
639
+ const delivery = queueFetchedContext(pi, ctx, result);
640
+ sendFetchResultMessage(pi, ctx, result, delivery);
641
+
642
+ if (ctx.hasUI && delivery === "afterCurrentRun") {
643
+ ctx.ui.notify(`${formatAddedNotice(result)} It will be appended after the current run finishes.`, "info");
644
+ }
645
+ } catch (error) {
646
+ const message = error instanceof Error ? error.message : String(error);
647
+ sendFetchFailureMessage(pi, ctx, message, url, mode);
648
+ if (ctx.hasUI && !ctx.isIdle()) {
649
+ ctx.ui.notify(`Fetch failed: ${message}`, "warning");
650
+ }
651
+ } finally {
652
+ stopStatus();
653
+ }
654
+ },
655
+ });
656
+ }
package/package.json ADDED
@@ -0,0 +1,29 @@
1
+ {
2
+ "name": "@ibeex/pi-fetch",
3
+ "version": "0.1.0",
4
+ "description": "pi extension that fetches web content into session context with Jina/raw fallback and cleaned previews",
5
+ "type": "module",
6
+ "keywords": ["pi-package"],
7
+ "repository": {
8
+ "type": "git",
9
+ "url": "git@github.com:ibeex/pi-fetch.git"
10
+ },
11
+ "homepage": "https://github.com/ibeex/pi-fetch",
12
+ "publishConfig": {
13
+ "access": "public"
14
+ },
15
+ "scripts": {
16
+ "clean": "echo 'nothing to clean'",
17
+ "build": "echo 'nothing to build'",
18
+ "check": "echo 'nothing to check'"
19
+ },
20
+ "pi": {
21
+ "extensions": [
22
+ "./index.ts"
23
+ ]
24
+ },
25
+ "peerDependencies": {
26
+ "@mariozechner/pi-coding-agent": "*",
27
+ "@mariozechner/pi-tui": "*"
28
+ }
29
+ }