pi-web-access 0.5.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +69 -1
- package/README.md +147 -23
- package/chrome-cookies.ts +240 -0
- package/extract.ts +266 -27
- package/gemini-api.ts +103 -0
- package/gemini-search.ts +236 -0
- package/gemini-url-context.ts +119 -0
- package/gemini-web.ts +296 -0
- package/github-api.ts +3 -2
- package/index.ts +113 -23
- package/package.json +4 -2
- package/perplexity.ts +7 -2
- package/pi-web-fetch-demo.mp4 +0 -0
- package/rsc-extract.ts +1 -1
- package/skills/librarian/SKILL.md +195 -0
- package/utils.ts +44 -0
- package/video-extract.ts +329 -0
- package/youtube-extract.ts +280 -0
package/index.ts
CHANGED
|
@@ -4,7 +4,9 @@ import { Type } from "@sinclair/typebox";
|
|
|
4
4
|
import { StringEnum } from "@mariozechner/pi-ai";
|
|
5
5
|
import { fetchAllContent, type ExtractedContent } from "./extract.js";
|
|
6
6
|
import { clearCloneCache } from "./github-extract.js";
|
|
7
|
-
import {
|
|
7
|
+
import { search, type SearchProvider } from "./gemini-search.js";
|
|
8
|
+
import type { SearchResult } from "./perplexity.js";
|
|
9
|
+
import { formatSeconds } from "./utils.js";
|
|
8
10
|
import {
|
|
9
11
|
clearResults,
|
|
10
12
|
deleteResult,
|
|
@@ -25,6 +27,10 @@ let widgetUnsubscribe: (() => void) | null = null;
|
|
|
25
27
|
|
|
26
28
|
const MAX_INLINE_CONTENT = 30000; // Content returned directly to agent
|
|
27
29
|
|
|
30
|
+
function stripThumbnails(results: ExtractedContent[]): ExtractedContent[] {
|
|
31
|
+
return results.map(({ thumbnail, frames, ...rest }) => rest);
|
|
32
|
+
}
|
|
33
|
+
|
|
28
34
|
function formatSearchSummary(results: SearchResult[], answer: string): string {
|
|
29
35
|
let output = answer ? `${answer}\n\n---\n\n**Sources:**\n` : "";
|
|
30
36
|
output += results.map((r, i) => `${i + 1}. ${r.title}\n ${r.url}`).join("\n\n");
|
|
@@ -144,7 +150,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
144
150
|
|
|
145
151
|
pi.on("session_start", async (_event, ctx) => handleSessionChange(ctx));
|
|
146
152
|
pi.on("session_switch", async (_event, ctx) => handleSessionChange(ctx));
|
|
147
|
-
pi.on("
|
|
153
|
+
pi.on("session_fork", async (_event, ctx) => handleSessionChange(ctx));
|
|
148
154
|
pi.on("session_tree", async (_event, ctx) => handleSessionChange(ctx));
|
|
149
155
|
|
|
150
156
|
pi.on("session_shutdown", () => {
|
|
@@ -163,16 +169,19 @@ export default function (pi: ExtensionAPI) {
|
|
|
163
169
|
name: "web_search",
|
|
164
170
|
label: "Web Search",
|
|
165
171
|
description:
|
|
166
|
-
"Search the web using Perplexity AI. Returns an AI-synthesized answer with source citations. Supports batch searching with multiple queries. When includeContent is true, full page content is fetched in the background.",
|
|
172
|
+
"Search the web using Perplexity AI or Gemini. Returns an AI-synthesized answer with source citations. Supports batch searching with multiple queries. When includeContent is true, full page content is fetched in the background. Provider auto-selects: Perplexity if configured, else Gemini API (needs key), else Gemini Web (needs Chrome login).",
|
|
167
173
|
parameters: Type.Object({
|
|
168
174
|
query: Type.Optional(Type.String({ description: "Single search query" })),
|
|
169
|
-
queries: Type.Optional(Type.Array(Type.String(), { description: "Multiple queries (
|
|
175
|
+
queries: Type.Optional(Type.Array(Type.String(), { description: "Multiple queries (batch)" })),
|
|
170
176
|
numResults: Type.Optional(Type.Number({ description: "Results per query (default: 5, max: 20)" })),
|
|
171
177
|
includeContent: Type.Optional(Type.Boolean({ description: "Fetch full page content (async)" })),
|
|
172
178
|
recencyFilter: Type.Optional(
|
|
173
179
|
StringEnum(["day", "week", "month", "year"], { description: "Filter by recency" }),
|
|
174
180
|
),
|
|
175
181
|
domainFilter: Type.Optional(Type.Array(Type.String(), { description: "Limit to domains (prefix with - to exclude)" })),
|
|
182
|
+
provider: Type.Optional(
|
|
183
|
+
StringEnum(["auto", "perplexity", "gemini"], { description: "Search provider (default: auto)" }),
|
|
184
|
+
),
|
|
176
185
|
}),
|
|
177
186
|
|
|
178
187
|
async execute(_toolCallId, params, signal, onUpdate, _ctx) {
|
|
@@ -196,7 +205,8 @@ export default function (pi: ExtensionAPI) {
|
|
|
196
205
|
});
|
|
197
206
|
|
|
198
207
|
try {
|
|
199
|
-
const { answer, results } = await
|
|
208
|
+
const { answer, results } = await search(query, {
|
|
209
|
+
provider: params.provider as SearchProvider | undefined,
|
|
200
210
|
numResults: params.numResults,
|
|
201
211
|
recencyFilter: params.recencyFilter,
|
|
202
212
|
domainFilter: params.domainFilter,
|
|
@@ -249,7 +259,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
249
259
|
id: capturedFetchId,
|
|
250
260
|
type: "fetch",
|
|
251
261
|
timestamp: Date.now(),
|
|
252
|
-
urls: fetched,
|
|
262
|
+
urls: stripThumbnails(fetched),
|
|
253
263
|
};
|
|
254
264
|
storeResult(capturedFetchId, data);
|
|
255
265
|
pi.appendEntry("web-search-results", data);
|
|
@@ -392,13 +402,24 @@ export default function (pi: ExtensionAPI) {
|
|
|
392
402
|
pi.registerTool({
|
|
393
403
|
name: "fetch_content",
|
|
394
404
|
label: "Fetch Content",
|
|
395
|
-
description: "Fetch URL(s) and extract readable content as markdown. Content is always stored and can be retrieved with get_search_content.",
|
|
405
|
+
description: "Fetch URL(s) and extract readable content as markdown. Supports YouTube video transcripts (with thumbnail), GitHub repository contents, and local video files (with frame thumbnail). Video frames can be extracted via timestamp/range or sampled across the entire video with frames alone. Falls back to Gemini for pages that block bots or fail Readability extraction. For YouTube and video files: ALWAYS pass the user's specific question via the prompt parameter — this directs the AI to focus on that aspect of the video, producing much better results than a generic extraction. Content is always stored and can be retrieved with get_search_content.",
|
|
396
406
|
parameters: Type.Object({
|
|
397
407
|
url: Type.Optional(Type.String({ description: "Single URL to fetch" })),
|
|
398
408
|
urls: Type.Optional(Type.Array(Type.String(), { description: "Multiple URLs (parallel)" })),
|
|
399
409
|
forceClone: Type.Optional(Type.Boolean({
|
|
400
410
|
description: "Force cloning large GitHub repositories that exceed the size threshold",
|
|
401
411
|
})),
|
|
412
|
+
prompt: Type.Optional(Type.String({
|
|
413
|
+
description: "Question or instruction for video analysis (YouTube and video files). Pass the user's specific question here — e.g. 'describe the book shown at the advice for beginners section'. Without this, a generic transcript extraction is used which may miss what the user is asking about.",
|
|
414
|
+
})),
|
|
415
|
+
timestamp: Type.Optional(Type.String({
|
|
416
|
+
description: "Extract video frame(s) at a timestamp or time range. Single: '1:23:45', '23:45', or '85' (seconds). Range: '23:41-25:00' extracts evenly-spaced frames across that span (default 6). Use frames with ranges to control density; single+frames uses a fixed 5s interval. YouTube requires yt-dlp + ffmpeg; local videos require ffmpeg. Use a range when you know the approximate area but not the exact moment — you'll get a contact sheet to visually identify the right frame.",
|
|
417
|
+
})),
|
|
418
|
+
frames: Type.Optional(Type.Integer({
|
|
419
|
+
minimum: 1,
|
|
420
|
+
maximum: 12,
|
|
421
|
+
description: "Number of frames to extract. Use with timestamp range for custom density, with single timestamp to get N frames at 5s intervals, or alone to sample across the entire video. Requires yt-dlp + ffmpeg for YouTube, ffmpeg for local video.",
|
|
422
|
+
})),
|
|
402
423
|
}),
|
|
403
424
|
|
|
404
425
|
async execute(_toolCallId, params, signal, onUpdate, _ctx) {
|
|
@@ -417,6 +438,9 @@ export default function (pi: ExtensionAPI) {
|
|
|
417
438
|
|
|
418
439
|
const fetchResults = await fetchAllContent(urlList, signal, {
|
|
419
440
|
forceClone: params.forceClone,
|
|
441
|
+
prompt: params.prompt,
|
|
442
|
+
timestamp: params.timestamp,
|
|
443
|
+
frames: params.frames,
|
|
420
444
|
});
|
|
421
445
|
const successful = fetchResults.filter((r) => !r.error).length;
|
|
422
446
|
const totalChars = fetchResults.reduce((sum, r) => sum + r.content.length, 0);
|
|
@@ -427,7 +451,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
427
451
|
id: responseId,
|
|
428
452
|
type: "fetch",
|
|
429
453
|
timestamp: Date.now(),
|
|
430
|
-
urls: fetchResults,
|
|
454
|
+
urls: stripThumbnails(fetchResults),
|
|
431
455
|
};
|
|
432
456
|
storeResult(responseId, data);
|
|
433
457
|
pi.appendEntry("web-search-results", data);
|
|
@@ -438,7 +462,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
438
462
|
if (result.error) {
|
|
439
463
|
return {
|
|
440
464
|
content: [{ type: "text", text: `Error: ${result.error}` }],
|
|
441
|
-
details: { urls: urlList, urlCount: 1, successful: 0, error: result.error, responseId },
|
|
465
|
+
details: { urls: urlList, urlCount: 1, successful: 0, error: result.error, responseId, prompt: params.prompt, timestamp: params.timestamp, frames: params.frames },
|
|
442
466
|
};
|
|
443
467
|
}
|
|
444
468
|
|
|
@@ -453,8 +477,20 @@ export default function (pi: ExtensionAPI) {
|
|
|
453
477
|
`Use get_search_content({ responseId: "${responseId}", urlIndex: 0 }) for full content.`;
|
|
454
478
|
}
|
|
455
479
|
|
|
480
|
+
const content: Array<{ type: string; text?: string; data?: string; mimeType?: string }> = [];
|
|
481
|
+
if (result.frames?.length) {
|
|
482
|
+
for (const frame of result.frames) {
|
|
483
|
+
content.push({ type: "image", data: frame.data, mimeType: frame.mimeType });
|
|
484
|
+
content.push({ type: "text", text: `Frame at ${frame.timestamp}` });
|
|
485
|
+
}
|
|
486
|
+
} else if (result.thumbnail) {
|
|
487
|
+
content.push({ type: "image", data: result.thumbnail.data, mimeType: result.thumbnail.mimeType });
|
|
488
|
+
}
|
|
489
|
+
content.push({ type: "text", text: output });
|
|
490
|
+
|
|
491
|
+
const imageCount = (result.frames?.length ?? 0) + (result.thumbnail ? 1 : 0);
|
|
456
492
|
return {
|
|
457
|
-
content
|
|
493
|
+
content,
|
|
458
494
|
details: {
|
|
459
495
|
urls: urlList,
|
|
460
496
|
urlCount: 1,
|
|
@@ -463,6 +499,12 @@ export default function (pi: ExtensionAPI) {
|
|
|
463
499
|
title: result.title,
|
|
464
500
|
responseId,
|
|
465
501
|
truncated,
|
|
502
|
+
hasImage: imageCount > 0,
|
|
503
|
+
imageCount,
|
|
504
|
+
prompt: params.prompt,
|
|
505
|
+
timestamp: params.timestamp,
|
|
506
|
+
frames: params.frames,
|
|
507
|
+
duration: result.duration,
|
|
466
508
|
},
|
|
467
509
|
};
|
|
468
510
|
}
|
|
@@ -485,27 +527,39 @@ export default function (pi: ExtensionAPI) {
|
|
|
485
527
|
},
|
|
486
528
|
|
|
487
529
|
renderCall(args, theme) {
|
|
488
|
-
const { url, urls } = args as { url?: string; urls?: string[] };
|
|
530
|
+
const { url, urls, prompt, timestamp, frames } = args as { url?: string; urls?: string[]; prompt?: string; timestamp?: string; frames?: number };
|
|
489
531
|
const urlList = urls ?? (url ? [url] : []);
|
|
490
532
|
if (urlList.length === 0) {
|
|
491
533
|
return new Text(theme.fg("toolTitle", theme.bold("fetch ")) + theme.fg("error", "(no URL)"), 0, 0);
|
|
492
534
|
}
|
|
535
|
+
const lines: string[] = [];
|
|
493
536
|
if (urlList.length === 1) {
|
|
494
|
-
const display = urlList[0].length >
|
|
495
|
-
|
|
537
|
+
const display = urlList[0].length > 60 ? urlList[0].slice(0, 57) + "..." : urlList[0];
|
|
538
|
+
lines.push(theme.fg("toolTitle", theme.bold("fetch ")) + theme.fg("accent", display));
|
|
539
|
+
} else {
|
|
540
|
+
lines.push(theme.fg("toolTitle", theme.bold("fetch ")) + theme.fg("accent", `${urlList.length} URLs`));
|
|
541
|
+
for (const u of urlList.slice(0, 5)) {
|
|
542
|
+
const display = u.length > 60 ? u.slice(0, 57) + "..." : u;
|
|
543
|
+
lines.push(theme.fg("muted", " " + display));
|
|
544
|
+
}
|
|
545
|
+
if (urlList.length > 5) {
|
|
546
|
+
lines.push(theme.fg("muted", ` ... and ${urlList.length - 5} more`));
|
|
547
|
+
}
|
|
548
|
+
}
|
|
549
|
+
if (timestamp) {
|
|
550
|
+
lines.push(theme.fg("dim", " timestamp: ") + theme.fg("warning", timestamp));
|
|
496
551
|
}
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
const display = u.length > 60 ? u.slice(0, 57) + "..." : u;
|
|
500
|
-
lines.push(theme.fg("muted", " " + display));
|
|
552
|
+
if (typeof frames === "number") {
|
|
553
|
+
lines.push(theme.fg("dim", " frames: ") + theme.fg("warning", String(frames)));
|
|
501
554
|
}
|
|
502
|
-
if (
|
|
503
|
-
|
|
555
|
+
if (prompt) {
|
|
556
|
+
const display = prompt.length > 250 ? prompt.slice(0, 247) + "..." : prompt;
|
|
557
|
+
lines.push(theme.fg("dim", " prompt: ") + theme.fg("muted", `"${display}"`));
|
|
504
558
|
}
|
|
505
559
|
return new Text(lines.join("\n"), 0, 0);
|
|
506
560
|
},
|
|
507
561
|
|
|
508
|
-
renderResult(result, { expanded }, theme) {
|
|
562
|
+
renderResult(result, { expanded, isPartial }, theme) {
|
|
509
563
|
const details = result.details as {
|
|
510
564
|
urlCount?: number;
|
|
511
565
|
successful?: number;
|
|
@@ -514,27 +568,63 @@ export default function (pi: ExtensionAPI) {
|
|
|
514
568
|
title?: string;
|
|
515
569
|
truncated?: boolean;
|
|
516
570
|
responseId?: string;
|
|
571
|
+
phase?: string;
|
|
572
|
+
progress?: number;
|
|
573
|
+
hasImage?: boolean;
|
|
574
|
+
imageCount?: number;
|
|
575
|
+
prompt?: string;
|
|
576
|
+
timestamp?: string;
|
|
577
|
+
frames?: number;
|
|
578
|
+
duration?: number;
|
|
517
579
|
};
|
|
518
580
|
|
|
581
|
+
if (isPartial) {
|
|
582
|
+
const progress = details?.progress ?? 0;
|
|
583
|
+
const bar = "\u2588".repeat(Math.floor(progress * 10)) + "\u2591".repeat(10 - Math.floor(progress * 10));
|
|
584
|
+
return new Text(theme.fg("accent", `[${bar}] ${details?.phase || "fetching"}`), 0, 0);
|
|
585
|
+
}
|
|
586
|
+
|
|
519
587
|
if (details?.error) {
|
|
520
588
|
return new Text(theme.fg("error", `Error: ${details.error}`), 0, 0);
|
|
521
589
|
}
|
|
522
590
|
|
|
523
591
|
if (details?.urlCount === 1) {
|
|
524
592
|
const title = details?.title || "Untitled";
|
|
525
|
-
|
|
593
|
+
const imgCount = details?.imageCount ?? (details?.hasImage ? 1 : 0);
|
|
594
|
+
const imageBadge = imgCount > 1
|
|
595
|
+
? theme.fg("accent", ` [${imgCount} images]`)
|
|
596
|
+
: imgCount === 1
|
|
597
|
+
? theme.fg("accent", " [image]")
|
|
598
|
+
: "";
|
|
599
|
+
let statusLine = theme.fg("success", title) + theme.fg("muted", ` (${details?.totalChars ?? 0} chars)`) + imageBadge;
|
|
526
600
|
if (details?.truncated) {
|
|
527
601
|
statusLine += theme.fg("warning", " [truncated]");
|
|
528
602
|
}
|
|
603
|
+
if (typeof details?.duration === "number") {
|
|
604
|
+
statusLine += theme.fg("muted", ` | ${formatSeconds(Math.floor(details.duration))} total`);
|
|
605
|
+
}
|
|
529
606
|
if (!expanded) {
|
|
530
607
|
return new Text(statusLine, 0, 0);
|
|
531
608
|
}
|
|
609
|
+
const lines = [statusLine];
|
|
610
|
+
if (details?.prompt) {
|
|
611
|
+
const display = details.prompt.length > 250 ? details.prompt.slice(0, 247) + "..." : details.prompt;
|
|
612
|
+
lines.push(theme.fg("dim", ` prompt: "${display}"`));
|
|
613
|
+
}
|
|
614
|
+
if (details?.timestamp) {
|
|
615
|
+
lines.push(theme.fg("dim", ` timestamp: ${details.timestamp}`));
|
|
616
|
+
}
|
|
617
|
+
if (typeof details?.frames === "number") {
|
|
618
|
+
lines.push(theme.fg("dim", ` frames: ${details.frames}`));
|
|
619
|
+
}
|
|
532
620
|
const textContent = result.content.find((c) => c.type === "text")?.text || "";
|
|
533
621
|
const preview = textContent.length > 500 ? textContent.slice(0, 500) + "..." : textContent;
|
|
534
|
-
|
|
622
|
+
lines.push(theme.fg("dim", preview));
|
|
623
|
+
return new Text(lines.join("\n"), 0, 0);
|
|
535
624
|
}
|
|
536
625
|
|
|
537
|
-
const
|
|
626
|
+
const countColor = (details?.successful ?? 0) > 0 ? "success" : "error";
|
|
627
|
+
const statusLine = theme.fg(countColor, `${details?.successful}/${details?.urlCount} URLs`) + theme.fg("muted", " (content stored)");
|
|
538
628
|
if (!expanded) {
|
|
539
629
|
return new Text(statusLine, 0, 0);
|
|
540
630
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-web-access",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.7.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"keywords": ["pi-package", "pi", "pi-coding-agent", "extension", "web-search", "perplexity", "fetch", "scraping"],
|
|
6
6
|
"dependencies": {
|
|
@@ -11,6 +11,8 @@
|
|
|
11
11
|
"unpdf": "^1.4.0"
|
|
12
12
|
},
|
|
13
13
|
"pi": {
|
|
14
|
-
"extensions": ["./index.ts"]
|
|
14
|
+
"extensions": ["./index.ts"],
|
|
15
|
+
"skills": ["./skills"],
|
|
16
|
+
"video": "https://github.com/nicobailon/pi-web-access/raw/refs/heads/main/pi-web-fetch-demo.mp4"
|
|
15
17
|
}
|
|
16
18
|
}
|
package/perplexity.ts
CHANGED
|
@@ -19,7 +19,7 @@ export interface SearchResult {
|
|
|
19
19
|
snippet: string;
|
|
20
20
|
}
|
|
21
21
|
|
|
22
|
-
export interface
|
|
22
|
+
export interface SearchResponse {
|
|
23
23
|
answer: string;
|
|
24
24
|
results: SearchResult[];
|
|
25
25
|
}
|
|
@@ -91,7 +91,12 @@ function validateDomainFilter(domains: string[]): string[] {
|
|
|
91
91
|
});
|
|
92
92
|
}
|
|
93
93
|
|
|
94
|
-
export
|
|
94
|
+
export function isPerplexityAvailable(): boolean {
|
|
95
|
+
const config = loadConfig();
|
|
96
|
+
return Boolean(config.perplexityApiKey || process.env.PERPLEXITY_API_KEY);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
export async function searchWithPerplexity(query: string, options: SearchOptions = {}): Promise<SearchResponse> {
|
|
95
100
|
checkRateLimit();
|
|
96
101
|
|
|
97
102
|
const activityId = activityMonitor.logStart({ type: "api", query });
|
|
Binary file
|
package/rsc-extract.ts
CHANGED
|
@@ -150,7 +150,7 @@ export function extractRSCContent(html: string): RSCExtractResult | null {
|
|
|
150
150
|
case "p": return ctx.inTable ? content : `${content.trim()}\n\n`;
|
|
151
151
|
case "code": {
|
|
152
152
|
const codeContent = children ? extractNode(children as Node, { ...ctx, inCode: true }) : "";
|
|
153
|
-
return `\`${codeContent}\``;
|
|
153
|
+
return ctx.inCode ? codeContent : `\`${codeContent}\``;
|
|
154
154
|
}
|
|
155
155
|
case "pre": {
|
|
156
156
|
const preContent = children ? extractNode(children as Node, { ...ctx, inCode: true }) : "";
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: librarian
|
|
3
|
+
description: Research open-source libraries with evidence-backed answers and GitHub permalinks. Use when the user asks about library internals, needs implementation details with source code references, wants to understand why something was changed, or needs authoritative answers backed by actual code. Excels at navigating large open-source repos and providing citations to exact lines of code.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Librarian
|
|
7
|
+
|
|
8
|
+
Answer questions about open-source libraries by finding evidence with GitHub permalinks. Every claim backed by actual code.
|
|
9
|
+
|
|
10
|
+
## Execution Model
|
|
11
|
+
|
|
12
|
+
Pi executes tool calls sequentially, even when you emit multiple calls in one turn. But batching independent calls in a single turn still saves LLM round-trips (~5-10s each). Use these patterns:
|
|
13
|
+
|
|
14
|
+
| Pattern | When | Actually parallel? |
|
|
15
|
+
|---------|------|-------------------|
|
|
16
|
+
| Batch tool calls in one turn | Independent ops (web_search + fetch_content + read) | No, but saves round-trips |
|
|
17
|
+
| `fetch_content({ urls: [...] })` | Multiple URLs to fetch | Yes (3 concurrent) |
|
|
18
|
+
| Bash with `&` + `wait` | Multiple git/gh commands | Yes (OS-level) |
|
|
19
|
+
|
|
20
|
+
## Step 1: Classify the Request
|
|
21
|
+
|
|
22
|
+
Before doing anything, classify the request to pick the right research strategy.
|
|
23
|
+
|
|
24
|
+
| Type | Trigger | Primary Approach |
|
|
25
|
+
|------|---------|-----------------|
|
|
26
|
+
| **Conceptual** | "How do I use X?", "Best practice for Y?" | web_search + fetch_content (README/docs) |
|
|
27
|
+
| **Implementation** | "How does X implement Y?", "Show me the source" | fetch_content (clone) + code search |
|
|
28
|
+
| **Context/History** | "Why was this changed?", "History of X?" | git log + git blame + issue/PR search |
|
|
29
|
+
| **Comprehensive** | Complex or ambiguous requests, "deep dive" | All of the above |
|
|
30
|
+
|
|
31
|
+
## Step 2: Research by Type
|
|
32
|
+
|
|
33
|
+
### Conceptual Questions
|
|
34
|
+
|
|
35
|
+
Batch these in one turn:
|
|
36
|
+
|
|
37
|
+
1. **web_search**: `"library-name topic"` via Perplexity for recent articles and discussions
|
|
38
|
+
2. **fetch_content**: the library's GitHub repo URL to clone and check README, docs, or examples
|
|
39
|
+
|
|
40
|
+
Synthesize web results + repo docs. Cite official documentation and link to relevant source files.
|
|
41
|
+
|
|
42
|
+
### Implementation Questions
|
|
43
|
+
|
|
44
|
+
The core workflow -- clone, find, permalink:
|
|
45
|
+
|
|
46
|
+
1. **fetch_content** the GitHub repo URL -- this clones it locally and returns the file tree
|
|
47
|
+
2. Use **bash** to search the cloned repo: `grep -rn "function_name"`, `find . -name "*.ts"`
|
|
48
|
+
3. Use **read** to examine specific files once you've located them
|
|
49
|
+
4. Get the commit SHA: `cd /tmp/pi-github-repos/owner/repo && git rev-parse HEAD`
|
|
50
|
+
5. Construct permalink: `https://github.com/owner/repo/blob/<sha>/path/to/file#L10-L20`
|
|
51
|
+
|
|
52
|
+
Batch the initial calls: fetch_content (clone) + web_search (recent discussions) in one turn. Then dig into the clone with grep/read once it's available.
|
|
53
|
+
|
|
54
|
+
### Context/History Questions
|
|
55
|
+
|
|
56
|
+
Use git operations on the cloned repo:
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
cd /tmp/pi-github-repos/owner/repo
|
|
60
|
+
|
|
61
|
+
# Recent changes to a specific file
|
|
62
|
+
git log --oneline -n 20 -- path/to/file.ts
|
|
63
|
+
|
|
64
|
+
# Who changed what and when
|
|
65
|
+
git blame -L 10,30 path/to/file.ts
|
|
66
|
+
|
|
67
|
+
# Full diff for a specific commit
|
|
68
|
+
git show <sha> -- path/to/file.ts
|
|
69
|
+
|
|
70
|
+
# Search commit messages
|
|
71
|
+
git log --oneline --grep="keyword" -n 10
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
For issues and PRs, use bash:
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
# Search issues
|
|
78
|
+
gh search issues "keyword" --repo owner/repo --state all --limit 10
|
|
79
|
+
|
|
80
|
+
# Search merged PRs
|
|
81
|
+
gh search prs "keyword" --repo owner/repo --state merged --limit 10
|
|
82
|
+
|
|
83
|
+
# View specific issue/PR with comments
|
|
84
|
+
gh issue view <number> --repo owner/repo --comments
|
|
85
|
+
gh pr view <number> --repo owner/repo --comments
|
|
86
|
+
|
|
87
|
+
# Release notes
|
|
88
|
+
gh api repos/owner/repo/releases --jq '.[0:5] | .[].tag_name'
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### Comprehensive Research
|
|
92
|
+
|
|
93
|
+
Combine everything. Batch these in one turn:
|
|
94
|
+
|
|
95
|
+
1. **web_search**: recent articles and discussions
|
|
96
|
+
2. **fetch_content**: clone the repo (or multiple repos if comparing)
|
|
97
|
+
3. **bash**: `gh search issues "keyword" --repo owner/repo --limit 10 & gh search prs "keyword" --repo owner/repo --state merged --limit 10 & wait`
|
|
98
|
+
|
|
99
|
+
Then dig into the clone with grep, read, git blame, git log as needed.
|
|
100
|
+
|
|
101
|
+
## Step 3: Construct Permalinks
|
|
102
|
+
|
|
103
|
+
Permalinks are the whole point. They make your answers citable and verifiable.
|
|
104
|
+
|
|
105
|
+
```
|
|
106
|
+
https://github.com/<owner>/<repo>/blob/<commit-sha>/<filepath>#L<start>-L<end>
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Getting the SHA from a cloned repo:
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
cd /tmp/pi-github-repos/owner/repo && git rev-parse HEAD
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
Getting the SHA from a tag:
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
gh api repos/owner/repo/git/refs/tags/v1.0.0 --jq '.object.sha'
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
Always use full commit SHAs, not branch names. Branch links break when code changes. Permalinks don't.
|
|
122
|
+
|
|
123
|
+
## Step 4: Cite Everything
|
|
124
|
+
|
|
125
|
+
Every code-related claim needs a permalink. Format:
|
|
126
|
+
|
|
127
|
+
```markdown
|
|
128
|
+
The stale time check happens in [`notifyManager.ts`](https://github.com/TanStack/query/blob/abc123/packages/query-core/src/notifyManager.ts#L42-L50):
|
|
129
|
+
|
|
130
|
+
\`\`\`typescript
|
|
131
|
+
function isStale(query: Query, staleTime: number): boolean {
|
|
132
|
+
return query.state.dataUpdatedAt + staleTime < Date.now()
|
|
133
|
+
}
|
|
134
|
+
\`\`\`
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
For conceptual answers, link to official docs and relevant source files. For implementation answers, every function/class reference should have a permalink.
|
|
138
|
+
|
|
139
|
+
## Video Analysis
|
|
140
|
+
|
|
141
|
+
For questions about video tutorials, conference talks, or screen recordings:
|
|
142
|
+
|
|
143
|
+
```typescript
|
|
144
|
+
// Full extraction (transcript + visual descriptions)
|
|
145
|
+
fetch_content({ url: "https://youtube.com/watch?v=abc" })
|
|
146
|
+
|
|
147
|
+
// Ask a specific question about a video
|
|
148
|
+
fetch_content({ url: "https://youtube.com/watch?v=abc", prompt: "What libraries are imported in this tutorial?" })
|
|
149
|
+
|
|
150
|
+
// Single frame at a known moment
|
|
151
|
+
fetch_content({ url: "https://youtube.com/watch?v=abc", timestamp: "23:41" })
|
|
152
|
+
|
|
153
|
+
// Range scan for visual discovery
|
|
154
|
+
fetch_content({ url: "https://youtube.com/watch?v=abc", timestamp: "23:41-25:00" })
|
|
155
|
+
|
|
156
|
+
// Custom density across a range
|
|
157
|
+
fetch_content({ url: "https://youtube.com/watch?v=abc", timestamp: "23:41-25:00", frames: 3 })
|
|
158
|
+
|
|
159
|
+
// Whole-video sampling
|
|
160
|
+
fetch_content({ url: "https://youtube.com/watch?v=abc", frames: 6 })
|
|
161
|
+
|
|
162
|
+
// Analyze a local recording
|
|
163
|
+
fetch_content({ url: "/path/to/demo.mp4", prompt: "What error message appears on screen?" })
|
|
164
|
+
|
|
165
|
+
// Batch multiple videos with the same question
|
|
166
|
+
fetch_content({
|
|
167
|
+
urls: ["https://youtube.com/watch?v=abc", "https://youtube.com/watch?v=def"],
|
|
168
|
+
prompt: "What packages are installed?"
|
|
169
|
+
})
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
Use single timestamps for known moments, ranges for visual scanning, and frames-alone for a quick overview of the whole video.
|
|
173
|
+
|
|
174
|
+
The `prompt` parameter only applies to video content (YouTube URLs and local video files). For non-video URLs, it's ignored.
|
|
175
|
+
|
|
176
|
+
## Failure Recovery
|
|
177
|
+
|
|
178
|
+
| Failure | Recovery |
|
|
179
|
+
|---------|----------|
|
|
180
|
+
| grep finds nothing | Broaden the query, try concept names instead of exact function names |
|
|
181
|
+
| gh CLI rate limited | Use the already-cloned repo in /tmp/pi-github-repos/ for git operations |
|
|
182
|
+
| Repo too large to clone | fetch_content returns an API-only view automatically; use that or add `forceClone: true` |
|
|
183
|
+
| File not found in clone | Branch name with slashes may have misresolved; list the repo tree and navigate manually |
|
|
184
|
+
| Uncertain about implementation | State your uncertainty explicitly, propose a hypothesis, show what evidence you did find |
|
|
185
|
+
| Video extraction fails | Ensure Chrome is signed into gemini.google.com (free) or set GEMINI_API_KEY |
|
|
186
|
+
| Page returns 403/bot block | Gemini fallback triggers automatically; no action needed if Gemini is configured |
|
|
187
|
+
| web_search fails | Check provider config; try explicit `provider: "gemini"` if Perplexity key is missing |
|
|
188
|
+
|
|
189
|
+
## Guidelines
|
|
190
|
+
|
|
191
|
+
- Vary search queries when running multiple searches -- different angles, not the same pattern repeated
|
|
192
|
+
- Prefer recent sources; filter out outdated results when they conflict with newer information
|
|
193
|
+
- For version-specific questions, clone the tagged version: `fetch_content("https://github.com/owner/repo/tree/v1.0.0")`
|
|
194
|
+
- When the repo is already cloned from a previous fetch_content call, reuse it -- check the path before cloning again
|
|
195
|
+
- Answer directly. Skip preamble like "I'll help you with..." -- go straight to findings
|
package/utils.ts
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
export function formatSeconds(s: number): string {
|
|
2
|
+
const h = Math.floor(s / 3600);
|
|
3
|
+
const m = Math.floor((s % 3600) / 60);
|
|
4
|
+
const sec = s % 60;
|
|
5
|
+
if (h > 0) return `${h}:${String(m).padStart(2, "0")}:${String(sec).padStart(2, "0")}`;
|
|
6
|
+
return `${m}:${String(sec).padStart(2, "0")}`;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export function readExecError(err: unknown): { code?: string; stderr: string; message: string } {
|
|
10
|
+
if (!err || typeof err !== "object") {
|
|
11
|
+
return { stderr: "", message: String(err) };
|
|
12
|
+
}
|
|
13
|
+
const code = (err as { code?: string }).code;
|
|
14
|
+
const message = (err as { message?: string }).message ?? "";
|
|
15
|
+
const stderrRaw = (err as { stderr?: Buffer | string }).stderr;
|
|
16
|
+
const stderr = Buffer.isBuffer(stderrRaw)
|
|
17
|
+
? stderrRaw.toString("utf-8")
|
|
18
|
+
: typeof stderrRaw === "string"
|
|
19
|
+
? stderrRaw
|
|
20
|
+
: "";
|
|
21
|
+
return { code, stderr, message };
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export function isTimeoutError(err: unknown): boolean {
|
|
25
|
+
if (!err || typeof err !== "object") return false;
|
|
26
|
+
if ((err as { killed?: boolean }).killed) return true;
|
|
27
|
+
const name = (err as { name?: string }).name;
|
|
28
|
+
const code = (err as { code?: string }).code;
|
|
29
|
+
const message = (err as { message?: string }).message ?? "";
|
|
30
|
+
return name === "AbortError" || code === "ETIMEDOUT" || message.toLowerCase().includes("timed out");
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export function trimErrorText(text: string): string {
|
|
34
|
+
return text.replace(/\s+/g, " ").trim().slice(0, 200);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export function mapFfmpegError(err: unknown): string {
|
|
38
|
+
const { code, stderr, message } = readExecError(err);
|
|
39
|
+
if (code === "ENOENT") return "ffmpeg is not installed. Install with: brew install ffmpeg";
|
|
40
|
+
if (isTimeoutError(err)) return "ffmpeg timed out extracting frame";
|
|
41
|
+
if (stderr.includes("403")) return "Stream URL returned 403 — may have expired, try again";
|
|
42
|
+
const snippet = trimErrorText(stderr || message);
|
|
43
|
+
return snippet ? `ffmpeg failed: ${snippet}` : "ffmpeg failed";
|
|
44
|
+
}
|