pi-simple-web 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +51 -0
- package/index.ts +76 -0
- package/package.json +64 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 David Tran
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# pi-simple-web
|
|
2
|
+
|
|
3
|
+
Strict separation-of-concerns web tools for the Pi coding agent.
|
|
4
|
+
|
|
5
|
+
`pi-simple-web` replaces broad, ambiguous URL tools with narrow tools that each do one job and reject the wrong input with an actionable suggestion.
|
|
6
|
+
|
|
7
|
+
## Tools
|
|
8
|
+
|
|
9
|
+
- `read_webpage` - ordinary webpages/articles/docs only. Rejects GitHub, YouTube, PDFs, media, and direct search tasks.
|
|
10
|
+
- `read_github_repo` - GitHub repository root/tree URLs only. Rejects GitHub file/blob/raw URLs.
|
|
11
|
+
- `read_github_file` - GitHub blob/raw file URLs only. Rejects repo roots and trees.
|
|
12
|
+
- `read_pdf` - direct remote PDF URLs only.
|
|
13
|
+
- `read_youtube_transcript` - YouTube transcript/caption extraction only. Requires `yt-dlp`.
|
|
14
|
+
- `inspect_video_frames` - one YouTube image frame at an explicit timestamp. Requires `yt-dlp` and `ffmpeg`.
|
|
15
|
+
- `search_web` - plain-language web search query only. Rejects direct URLs.
|
|
16
|
+
- `get_web_content` - retrieves cached full output using a `contentId` returned by another tool.
|
|
17
|
+
- `read_url` - deprecated compatibility alias that intentionally errors to reduce ambiguity.
|
|
18
|
+
|
|
19
|
+
## Install
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
pi install npm:pi-simple-web
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
For YouTube transcript and frame tools:
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
brew install yt-dlp ffmpeg
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## Design
|
|
32
|
+
|
|
33
|
+
The package enforces routing in code, not just in descriptions:
|
|
34
|
+
|
|
35
|
+
- A GitHub URL passed to `read_webpage` is rejected with a suggestion to use `read_github_repo` or `read_github_file`.
|
|
36
|
+
- A direct URL passed to `search_web` is rejected with a suggestion to use a reader.
|
|
37
|
+
- A repo URL passed to `read_github_file` is rejected with a suggestion to use `read_github_repo`.
|
|
38
|
+
- A PDF passed to `read_webpage` is rejected with a suggestion to use `read_pdf`.
|
|
39
|
+
|
|
40
|
+
This keeps the agent from accidentally switching modes because of optional parameters.
|
|
41
|
+
|
|
42
|
+
## Publishing
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
npm pack --dry-run
|
|
46
|
+
npm publish --access public
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## License
|
|
50
|
+
|
|
51
|
+
MIT
|
package/index.ts
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
|
2
|
+
import { Type } from "typebox";
|
|
3
|
+
import { Text, truncateToWidth } from "@mariozechner/pi-tui";
|
|
4
|
+
import { Readability } from "@mozilla/readability";
|
|
5
|
+
import { parseHTML } from "linkedom";
|
|
6
|
+
import TurndownService from "turndown";
|
|
7
|
+
import { randomUUID } from "node:crypto";
|
|
8
|
+
import { execFileSync } from "node:child_process";
|
|
9
|
+
import { getDocumentProxy } from "unpdf";
|
|
10
|
+
|
|
11
|
+
const TIMEOUT_MS = 30000;
|
|
12
|
+
const MAX_INLINE = 50000;
|
|
13
|
+
const MAX_TREE = 200;
|
|
14
|
+
const MAX_PDF_PAGES = 100;
|
|
15
|
+
const RAW_GITHUB = "raw.githubusercontent.com";
|
|
16
|
+
const turndown = new TurndownService({ headingStyle: "atx", codeBlockStyle: "fenced" });
|
|
17
|
+
const cache = new Map<string, Cached>();
|
|
18
|
+
|
|
19
|
+
type Cached = { id: string; type: string; title: string; url: string; content: string; method: string; createdAt: number };
|
|
20
|
+
type Result = { type: string; url: string; title: string; content: string; method: string; error?: string; suggestedTool?: string };
|
|
21
|
+
type GitHubInfo = { owner: string; repo: string; ref?: string; path?: string; type: "root" | "tree" | "blob" | "raw" | "unsupported"; unsupportedReason?: string };
|
|
22
|
+
|
|
23
|
+
function ok(text: string, details: Record<string, unknown> = {}) { return { content: [{ type: "text", text }], details }; }
|
|
24
|
+
function err(message: string, details: Record<string, unknown> = {}) { return ok(`Error: ${message}`, { error: message, ...details }); }
|
|
25
|
+
function httpUrl(url: string): URL | null { try { const u = new URL(url); return u.protocol === "http:" || u.protocol === "https:" ? u : null; } catch { return null; } }
|
|
26
|
+
function ytId(url: string): string | null { try { const u = new URL(url); const h = u.hostname.toLowerCase().replace(/^www\./, ""); if (h === "youtu.be") return u.pathname.split("/").filter(Boolean)[0] || null; if (h === "youtube.com" || h === "m.youtube.com") { if (u.pathname === "/watch") return u.searchParams.get("v"); const p = u.pathname.split("/").filter(Boolean); if (["shorts", "live", "embed", "v"].includes(p[0])) return p[1] || null; } } catch {} return null; }
|
|
27
|
+
function isPdf(u: URL) { return u.pathname.toLowerCase().endsWith(".pdf"); }
|
|
28
|
+
function isMedia(u: URL) { return /\.(mp4|mov|webm|mkv|avi|mp3|wav|m4a|flac)$/i.test(u.pathname); }
|
|
29
|
+
function titleFromUrl(url: string, fallback = "document") { try { const last = new URL(url).pathname.split("/").filter(Boolean).pop() || fallback; return decodeURIComponent(last).replace(/\.pdf$/i, "").replace(/[-_]+/g, " ").trim() || fallback; } catch { return fallback; } }
|
|
30
|
+
|
|
31
|
+
function parseGitHub(url: string): GitHubInfo | null {
|
|
32
|
+
const u = httpUrl(url); if (!u) return null;
|
|
33
|
+
const host = u.hostname.toLowerCase();
|
|
34
|
+
if (host === RAW_GITHUB) { const p = u.pathname.split("/").filter(Boolean).map(decodeURIComponent); if (p.length < 4) return { owner: "", repo: "", type: "unsupported", unsupportedReason: "Raw GitHub URL is missing owner/repo/ref/path" }; return { owner: p[0], repo: p[1], ref: p[2], path: p.slice(3).join("/"), type: "raw" }; }
|
|
35
|
+
if (host !== "github.com" && host !== "www.github.com") return null;
|
|
36
|
+
const p = u.pathname.split("/").filter(Boolean).map(decodeURIComponent); if (p.length < 2) return null;
|
|
37
|
+
const owner = p[0], repo = p[1].replace(/\.git$/, ""); if (p.length === 2) return { owner, repo, type: "root" };
|
|
38
|
+
const unsupported = new Set(["issues", "pull", "pulls", "discussions", "releases", "wiki", "actions", "settings", "security", "projects", "graphs", "compare", "commits", "tags", "branches", "stargazers", "watchers", "network", "forks", "packages"]);
|
|
39
|
+
if (unsupported.has(p[2])) return { owner, repo, type: "unsupported", unsupportedReason: `GitHub ${p[2]} pages are not repository code content` };
|
|
40
|
+
if ((p[2] === "tree" || p[2] === "blob") && p[3]) return { owner, repo, type: p[2] as "tree" | "blob", ref: p[3], path: p.slice(4).join("/") };
|
|
41
|
+
return { owner, repo, type: "unsupported", unsupportedReason: "Only GitHub repository root, tree, blob, and raw URLs are supported" };
|
|
42
|
+
}
|
|
43
|
+
function rawUrl(g: GitHubInfo, path: string, ref = g.ref ?? "HEAD") { return `https://${RAW_GITHUB}/${g.owner}/${g.repo}/${ref}/${path}`; }
|
|
44
|
+
async function fetchText(url: string, signal?: AbortSignal, accept = "text/html,application/xhtml+xml,application/xml,text/plain,application/json;q=0.9,*/*;q=0.8") { const r = await fetch(url, { signal: signal ? AbortSignal.any([signal, AbortSignal.timeout(TIMEOUT_MS)]) : AbortSignal.timeout(TIMEOUT_MS), headers: { "User-Agent": "Mozilla/5.0 pi-simple-web/0.1", Accept: accept } }); return { response: r, text: r.ok ? await r.text() : undefined }; }
|
|
45
|
+
function envelope(r: Result) { return [`# ${r.title || r.type}`, "", `Source: ${r.url}`, `Type: ${r.type}`, `Method: ${r.method}`, "", r.content].join("\n"); }
|
|
46
|
+
function store(r: Result) { const full = envelope(r), id = randomUUID().slice(0, 12); cache.set(id, { id, type: r.type, title: r.title, url: r.url, content: full, method: r.method, createdAt: Date.now() }); return { id, chars: full.length, truncated: full.length > MAX_INLINE, text: full.length > MAX_INLINE ? `${full.slice(0, MAX_INLINE)}\n\n[Content truncated at ${MAX_INLINE} chars. Use get_web_content with contentId: ${id}.]` : full }; }
|
|
47
|
+
function reject(url: string, expected: string): Result | null { const u = httpUrl(url); if (!u) return { type: expected, url, title: "", content: "", method: "reject", error: "Invalid or non-HTTP(S) URL" }; const gh = parseGitHub(url); if (gh && expected === "webpage") { const suggestedTool = gh.type === "blob" || gh.type === "raw" ? "read_github_file" : "read_github_repo"; return { type: expected, url, title: "", content: "", method: "reject", error: `This is a GitHub URL. Use ${suggestedTool} instead.`, suggestedTool }; } if (!gh && (expected === "github_repo" || expected === "github_file")) return { type: expected, url, title: "", content: "", method: "reject", error: "This is not a GitHub URL. Use read_webpage for ordinary webpages.", suggestedTool: "read_webpage" }; if (ytId(url)) return { type: expected, url, title: "", content: "", method: "reject", error: "This appears to be a YouTube URL. Use read_youtube_transcript or inspect_video_frames instead.", suggestedTool: "read_youtube_transcript" }; if (isPdf(u)) return { type: expected, url, title: "", content: "", method: "reject", error: "This appears to be a PDF. Use read_pdf instead.", suggestedTool: "read_pdf" }; if (isMedia(u)) return { type: expected, url, title: "", content: "", method: "reject", error: "This appears to be a media file. Use a video/audio-specific tool instead.", suggestedTool: "inspect_video_frames" }; return null; }
|
|
48
|
+
|
|
49
|
+
async function readWebpage(url: string, signal?: AbortSignal): Promise<Result> { const rej = reject(url, "webpage"); if (rej) return rej; const u = httpUrl(url)!; try { const { response, text } = await fetchText(url, signal); if (!response.ok || text === undefined) return { type: "webpage", url, title: "", content: "", method: "http", error: `HTTP ${response.status}: ${response.statusText}` }; const ct = response.headers.get("content-type") || ""; if (!ct.includes("html")) return { type: "webpage", url, title: titleFromUrl(url), content: text, method: "plain-text" }; const { document } = parseHTML(text); const article = new Readability(document as unknown as Document).parse(); if (!article) return { type: "webpage", url, title: "", content: "", method: "readability", error: "Could not extract readable content from HTML" }; return { type: "webpage", url, title: article.title || u.hostname, content: turndown.turndown(article.content).trim(), method: "readability" }; } catch (e) { return { type: "webpage", url, title: "", content: "", method: "http", error: e instanceof Error ? e.message : String(e) }; } }
|
|
50
|
+
async function ghJson(api: string, signal?: AbortSignal) { const { response, text } = await fetchText(api, signal, "application/vnd.github+json"); if (!response.ok || !text) throw new Error(`GitHub API ${response.status}: ${response.statusText}`); return JSON.parse(text); }
|
|
51
|
+
async function readRaw(url: string, signal?: AbortSignal) { const { response, text } = await fetchText(url, signal, "text/plain,*/*;q=0.8"); return response.ok && text !== undefined ? text : null; }
|
|
52
|
+
async function readGitHubRepo(url: string, signal?: AbortSignal): Promise<Result> { const rej = reject(url, "github_repo"); if (rej) return rej; const g = parseGitHub(url)!; if (g.type === "unsupported") return { type: "github_repo", url, title: `${g.owner}/${g.repo}`, content: "", method: "reject", error: g.unsupportedReason }; if (g.type === "blob" || g.type === "raw") return { type: "github_repo", url, title: `${g.owner}/${g.repo}`, content: "", method: "reject", error: "This is a GitHub file URL. Use read_github_file instead.", suggestedTool: "read_github_file" }; try { const ref = g.ref ?? "HEAD"; const api = `https://api.github.com/repos/${g.owner}/${g.repo}${g.type === "tree" && g.path ? `/contents/${g.path}` : "/contents"}?ref=${encodeURIComponent(ref)}`; const listing = await ghJson(api, signal); const items = Array.isArray(listing) ? listing : [listing]; const tree = items.slice(0, MAX_TREE).map((x: any) => `${x.type === "dir" ? "dir " : "file"} ${x.path}`).join("\n") + (items.length > MAX_TREE ? `\n... truncated at ${MAX_TREE} entries` : ""); let readme = ""; if (g.type === "root") for (const name of ["README.md", "README", "README.txt", "README.rst"]) { const raw = await readRaw(rawUrl(g, name, ref), signal); if (raw) { readme = `\n\n## ${name}\n\n${raw}`; break; } } return { type: "github_repo", url, title: `${g.owner}/${g.repo}`, method: "github-api", content: [`GitHub repository: ${url}`, "", "## Files", tree, readme].join("\n") }; } catch (e) { return { type: "github_repo", url, title: `${g.owner}/${g.repo}`, content: "", method: "github-api", error: e instanceof Error ? e.message : String(e) }; } }
|
|
53
|
+
async function readGitHubFile(url: string, signal?: AbortSignal): Promise<Result> { const rej = reject(url, "github_file"); if (rej) return rej; const g = parseGitHub(url)!; if (g.type === "root" || g.type === "tree") return { type: "github_file", url, title: `${g.owner}/${g.repo}`, content: "", method: "reject", error: "This is a GitHub repository/tree URL. Use read_github_repo instead.", suggestedTool: "read_github_repo" }; if (!g.path) return { type: "github_file", url, title: `${g.owner}/${g.repo}`, content: "", method: "reject", error: "GitHub file URL is missing a path" }; const raw = await readRaw(g.type === "raw" ? url : rawUrl(g, g.path), signal); if (raw === null) return { type: "github_file", url, title: `${g.owner}/${g.repo} - ${g.path}`, content: "", method: "github-raw", error: "Could not fetch raw GitHub file" }; return { type: "github_file", url, title: `${g.owner}/${g.repo} - ${g.path}`, content: raw, method: "github-raw" }; }
|
|
54
|
+
async function readPdf(url: string, signal?: AbortSignal): Promise<Result> { const u = httpUrl(url); if (!u) return { type: "pdf", url, title: "", content: "", method: "reject", error: "Invalid or non-HTTP(S) URL" }; if (!isPdf(u)) return { type: "pdf", url, title: "", content: "", method: "reject", error: "This URL does not appear to be a PDF. Use read_webpage for ordinary webpages.", suggestedTool: "read_webpage" }; try { const r = await fetch(url, { signal: signal ? AbortSignal.any([signal, AbortSignal.timeout(TIMEOUT_MS)]) : AbortSignal.timeout(TIMEOUT_MS), headers: { "User-Agent": "Mozilla/5.0 pi-simple-web/0.1", Accept: "application/pdf,*/*;q=0.8" } }); if (!r.ok) return { type: "pdf", url, title: "", content: "", method: "http", error: `HTTP ${r.status}: ${r.statusText}` }; const pdf = await getDocumentProxy(new Uint8Array(await r.arrayBuffer())); const meta: any = await pdf.getMetadata().catch(() => null); const title = meta?.info?.Title?.trim?.() || titleFromUrl(url); const n = Math.min(pdf.numPages, MAX_PDF_PAGES); const chunks = [`Pages: ${pdf.numPages}${pdf.numPages > n ? ` (first ${n} extracted)` : ""}`]; for (let i = 1; i <= n; i++) { const page = await pdf.getPage(i); const tc = await page.getTextContent(); const txt = tc.items.map((x: any) => x.str || "").join(" ").replace(/\s+/g, " ").trim(); if (txt) chunks.push(`\n<!-- Page ${i} -->\n\n${txt}`); } return { type: "pdf", url, title, content: chunks.join("\n"), method: "pdf" }; } catch (e) { return { type: "pdf", url, title: "", content: "", method: "pdf", error: e instanceof Error ? e.message : String(e) }; } }
|
|
55
|
+
function execErr(e: unknown) { const any = e as any; const code = any?.code; const raw = any?.stderr; const stderr = Buffer.isBuffer(raw) ? raw.toString("utf8") : typeof raw === "string" ? raw : ""; return { code, text: (stderr || any?.message || String(e)).replace(/\s+/g, " ").trim().slice(0, 200) }; }
|
|
56
|
+
function ytdlpErr(e: unknown) { const x = execErr(e); if (x.code === "ENOENT") return "yt-dlp is not installed. Install with: brew install yt-dlp"; return x.text ? `yt-dlp failed: ${x.text}` : "yt-dlp failed"; }
|
|
57
|
+
function ffmpegErr(e: unknown) { const x = execErr(e); if (x.code === "ENOENT") return "ffmpeg is not installed. Install with: brew install ffmpeg"; return x.text ? `ffmpeg failed: ${x.text}` : "ffmpeg failed"; }
|
|
58
|
+
async function readYouTubeTranscript(url: string): Promise<Result> { const id = ytId(url); if (!id) return { type: "youtube_transcript", url, title: "", content: "", method: "reject", error: "This is not a supported YouTube video URL. Use read_webpage for ordinary webpages.", suggestedTool: "read_webpage" }; try { const canonical = `https://www.youtube.com/watch?v=${id}`; const data = JSON.parse(execFileSync("yt-dlp", ["-J", "--skip-download", canonical], { timeout: 30000, encoding: "utf8", stdio: ["pipe", "pipe", "pipe"] })); const title = typeof data.title === "string" ? data.title : "YouTube Video"; let cap = ""; for (const group of [data.subtitles, data.automatic_captions].filter(Boolean)) { const key = Object.keys(group).find((k) => k === "en" || k.startsWith("en-")); const arr = key ? group[key] : null; const picked = Array.isArray(arr) ? arr.find((e: any) => e.ext === "vtt") || arr[0] : null; if (picked?.url) { cap = picked.url; break; } } if (!cap) return { type: "youtube_transcript", url, title, content: "", method: "yt-dlp", error: "No English transcript/subtitles found via yt-dlp" }; const { response, text } = await fetchText(cap); if (!response.ok || !text) return { type: "youtube_transcript", url, title, content: "", method: "yt-dlp", error: `Could not fetch transcript: HTTP ${response.status}` }; return { type: "youtube_transcript", url, title, content: text, method: "yt-dlp" }; } catch (e) { return { type: "youtube_transcript", url, title: "", content: "", method: "yt-dlp", error: ytdlpErr(e) }; } }
|
|
59
|
+
function parseTs(ts: string) { const n = Number(ts); if (Number.isFinite(n) && n >= 0) return Math.floor(n); const p = ts.split(":").map(Number); if (p.some((x) => !Number.isFinite(x) || x < 0)) return null; if (p.length === 3) return Math.floor(p[0] * 3600 + p[1] * 60 + p[2]); if (p.length === 2) return Math.floor(p[0] * 60 + p[1]); return null; }
|
|
60
|
+
function fmt(s: number) { const h = Math.floor(s / 3600), m = Math.floor((s % 3600) / 60), sec = s % 60; return h > 0 ? `${h}:${String(m).padStart(2, "0")}:${String(sec).padStart(2, "0")}` : `${m}:${String(sec).padStart(2, "0")}`; }
|
|
61
|
+
async function searchWeb(query: string, signal?: AbortSignal): Promise<Result> { if (/^https?:\/\//i.test(query.trim())) return { type: "web_search", url: query, title: "", content: "", method: "reject", error: "search_web accepts search queries, not URLs. Use a reader tool for direct URL extraction.", suggestedTool: "read_webpage" }; const url = `https://duckduckgo.com/html/?q=${encodeURIComponent(query)}`; try { const { response, text } = await fetchText(url, signal); if (!response.ok || !text) return { type: "web_search", url, title: query, content: "", method: "duckduckgo", error: `HTTP ${response.status}: ${response.statusText}` }; const { document } = parseHTML(text); const lines = Array.from(document.querySelectorAll("a.result__a")).slice(0, 10).map((a: any, i) => `${i + 1}. ${(a.textContent || "").replace(/\s+/g, " ").trim()}\n ${a.getAttribute("href") || ""}`).filter(Boolean); if (!lines.length) return { type: "web_search", url, title: query, content: "", method: "duckduckgo", error: "No search results parsed" }; return { type: "web_search", url, title: query, content: lines.join("\n\n"), method: "duckduckgo" }; } catch (e) { return { type: "web_search", url, title: query, content: "", method: "duckduckgo", error: e instanceof Error ? e.message : String(e) }; } }
|
|
62
|
+
|
|
63
|
+
function render(name: string, args: any, theme: any) { const v = typeof args.url === "string" ? args.url : typeof args.query === "string" ? args.query : ""; return new Text(theme.fg("muted", `${name} `) + truncateToWidth(v, 80), 0, 0); }
|
|
64
|
+
function reader(pi: ExtensionAPI, name: string, label: string, description: string, promptSnippet: string, fn: (url: string, signal?: AbortSignal) => Promise<Result>) { pi.registerTool({ name, label, description, promptSnippet, parameters: Type.Object({ url: Type.String({ description: "HTTP(S) URL for this specific reader" }) }), async execute(_id, params, signal) { const r = await fn(params.url, signal); if (r.error) return err(r.error, { type: r.type, url: params.url, method: r.method, suggestedTool: r.suggestedTool }); const p = store(r); return ok(p.text, { type: r.type, url: r.url, title: r.title, method: r.method, chars: p.chars, truncated: p.truncated, contentId: p.id }); }, renderCall(args, theme) { return render(name, args, theme); } }); }
|
|
65
|
+
|
|
66
|
+
export default function(pi: ExtensionAPI) {
|
|
67
|
+
reader(pi, "read_webpage", "Read Webpage", "Read only ordinary webpages/articles/docs as Markdown. Do not use for GitHub, YouTube, PDFs, search, local files, or media.", "Use read_webpage only for normal webpages.", readWebpage);
|
|
68
|
+
reader(pi, "read_github_repo", "Read GitHub Repo", "Read only GitHub repository root/tree URLs. Do not use for GitHub file/blob/raw URLs or normal webpages.", "Use read_github_repo only for GitHub repo roots and tree/directory URLs.", readGitHubRepo);
|
|
69
|
+
reader(pi, "read_github_file", "Read GitHub File", "Read only GitHub blob/raw file URLs. Do not use for repository roots, trees, or normal webpages.", "Use read_github_file only for GitHub file/blob/raw URLs.", readGitHubFile);
|
|
70
|
+
reader(pi, "read_pdf", "Read PDF", "Read only remote PDF URLs. Do not use for webpages, GitHub, YouTube, search, or media.", "Use read_pdf only for direct PDF URLs.", readPdf);
|
|
71
|
+
reader(pi, "read_youtube_transcript", "Read YouTube Transcript", "Read only YouTube transcripts/captions. Do not use for visual video analysis or webpages.", "Use read_youtube_transcript only for YouTube transcript requests.", readYouTubeTranscript);
|
|
72
|
+
pi.registerTool({ name: "inspect_video_frames", label: "Inspect Video Frames", description: "Extract one image frame from a YouTube video at an explicit timestamp. Do not use for webpages, transcripts, GitHub, PDFs, or search.", promptSnippet: "Use inspect_video_frames only for explicit visual video frame inspection.", parameters: Type.Object({ url: Type.String({ description: "YouTube video URL" }), timestamp: Type.String({ description: "Timestamp like 85, 1:25, or 1:02:03" }) }), async execute(_id, params) { const id = ytId(params.url); if (!id) return err("inspect_video_frames currently supports YouTube video URLs only", { url: params.url, suggestedTool: "read_webpage" }); const seconds = parseTs(params.timestamp); if (seconds === null) return err(`Invalid timestamp: ${params.timestamp}`, { url: params.url, timestamp: params.timestamp }); try { const canonical = `https://www.youtube.com/watch?v=${id}`; const stream = execFileSync("yt-dlp", ["-g", canonical], { timeout: 15000, encoding: "utf8", stdio: ["pipe", "pipe", "pipe"] }).trim().split(/\r?\n/).find(Boolean); if (!stream) return err("yt-dlp failed: missing stream URL", { url: params.url }); const buf = execFileSync("ffmpeg", ["-ss", String(seconds), "-i", stream, "-frames:v", "1", "-f", "image2pipe", "-vcodec", "mjpeg", "pipe:1"], { maxBuffer: 5 * 1024 * 1024, timeout: 30000, stdio: ["pipe", "pipe", "pipe"] }); return { content: [{ type: "image", data: buf.toString("base64"), mimeType: "image/jpeg" }, { type: "text", text: `Frame extracted at ${fmt(seconds)} from ${canonical}` }], details: { type: "video_frames", url: params.url, timestamp: fmt(seconds), method: "yt-dlp+ffmpeg" } }; } catch (e) { const m = String((e as any)?.message || "").includes("ffmpeg") ? ffmpegErr(e) : ytdlpErr(e); return err(m, { type: "video_frames", url: params.url, timestamp: params.timestamp }); } }, renderCall(args, theme) { return new Text(theme.fg("muted", "inspect_video_frames ") + truncateToWidth(`${args.url || ""} @ ${args.timestamp || ""}`, 80), 0, 0); } });
|
|
73
|
+
pi.registerTool({ name: "search_web", label: "Search Web", description: "Search the web from a plain-language query. Do not use for direct URL reading.", promptSnippet: "Use search_web for search queries only.", parameters: Type.Object({ query: Type.String({ description: "Plain-language search query, not a URL" }) }), async execute(_id, params, signal) { const r = await searchWeb(params.query, signal); if (r.error) return err(r.error, { type: r.type, query: params.query, method: r.method, suggestedTool: r.suggestedTool }); const p = store(r); return ok(p.text, { type: r.type, query: params.query, method: r.method, chars: p.chars, truncated: p.truncated, contentId: p.id }); }, renderCall(args, theme) { return render("search_web", args, theme); } });
|
|
74
|
+
pi.registerTool({ name: "get_web_content", label: "Get Web Content", description: "Retrieve full content previously returned by pi-simple-web readers using a contentId.", promptSnippet: "Use get_web_content only with a contentId returned by a pi-simple-web tool.", parameters: Type.Object({ contentId: Type.String({ description: "contentId returned by a pi-simple-web reader" }) }), async execute(_id, params) { const item = cache.get(params.contentId); if (!item) return err(`No cached web content found for contentId: ${params.contentId}`, { contentId: params.contentId }); return ok(item.content, { contentId: item.id, type: item.type, url: item.url, title: item.title, method: item.method, chars: item.content.length }); }, renderCall(args, theme) { return new Text(theme.fg("muted", "get_web_content ") + (args.contentId || ""), 0, 0); } });
|
|
75
|
+
pi.registerTool({ name: "read_url", label: "Read URL Deprecated", description: "Deprecated compatibility alias. Prefer read_webpage, read_github_repo, or read_github_file so intent stays unambiguous.", promptSnippet: "Avoid read_url. Choose a specific reader instead.", parameters: Type.Object({ url: Type.String({ description: "URL; deprecated alias only" }) }), async execute(_id, params) { return err("read_url is deprecated to reduce ambiguity. Use read_webpage, read_github_repo, read_github_file, read_pdf, or read_youtube_transcript instead.", { url: params.url }); }, renderCall(args, theme) { return render("read_url(deprecated)", args, theme); } });
|
|
76
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "pi-simple-web",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Strict separation-of-concerns web, GitHub, PDF, search, and YouTube tools for Pi",
|
|
5
|
+
"main": "index.js",
|
|
6
|
+
"scripts": {
|
|
7
|
+
"test": "node --test test/*.test.mjs",
|
|
8
|
+
"packcheck": "npm pack --dry-run"
|
|
9
|
+
},
|
|
10
|
+
"keywords": [
|
|
11
|
+
"pi-package",
|
|
12
|
+
"pi",
|
|
13
|
+
"pi-coding-agent",
|
|
14
|
+
"extension",
|
|
15
|
+
"web",
|
|
16
|
+
"github",
|
|
17
|
+
"pdf",
|
|
18
|
+
"youtube",
|
|
19
|
+
"soc"
|
|
20
|
+
],
|
|
21
|
+
"author": "David Tran",
|
|
22
|
+
"license": "MIT",
|
|
23
|
+
"type": "module",
|
|
24
|
+
"homepage": "https://github.com/davidus-tranus/pi-simple-web#readme",
|
|
25
|
+
"repository": {
|
|
26
|
+
"type": "git",
|
|
27
|
+
"url": "git+https://github.com/davidus-tranus/pi-simple-web.git"
|
|
28
|
+
},
|
|
29
|
+
"bugs": {
|
|
30
|
+
"url": "https://github.com/davidus-tranus/pi-simple-web/issues"
|
|
31
|
+
},
|
|
32
|
+
"files": [
|
|
33
|
+
"index.ts",
|
|
34
|
+
"README.md",
|
|
35
|
+
"LICENSE"
|
|
36
|
+
],
|
|
37
|
+
"dependencies": {
|
|
38
|
+
"@mozilla/readability": "^0.5.0",
|
|
39
|
+
"linkedom": "^0.16.0",
|
|
40
|
+
"turndown": "^7.2.0",
|
|
41
|
+
"unpdf": "^1.6.2"
|
|
42
|
+
},
|
|
43
|
+
"peerDependencies": {
|
|
44
|
+
"@mariozechner/pi-coding-agent": "*",
|
|
45
|
+
"@mariozechner/pi-tui": "*",
|
|
46
|
+
"typebox": "*"
|
|
47
|
+
},
|
|
48
|
+
"peerDependenciesMeta": {
|
|
49
|
+
"@mariozechner/pi-coding-agent": {
|
|
50
|
+
"optional": true
|
|
51
|
+
},
|
|
52
|
+
"@mariozechner/pi-tui": {
|
|
53
|
+
"optional": true
|
|
54
|
+
},
|
|
55
|
+
"typebox": {
|
|
56
|
+
"optional": true
|
|
57
|
+
}
|
|
58
|
+
},
|
|
59
|
+
"pi": {
|
|
60
|
+
"extensions": [
|
|
61
|
+
"./index.ts"
|
|
62
|
+
]
|
|
63
|
+
}
|
|
64
|
+
}
|