pi-web-access 0.5.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +59 -1
- package/README.md +136 -23
- package/chrome-cookies.ts +240 -0
- package/extract.ts +266 -27
- package/gemini-api.ts +103 -0
- package/gemini-search.ts +236 -0
- package/gemini-url-context.ts +119 -0
- package/gemini-web.ts +296 -0
- package/index.ts +112 -22
- package/package.json +3 -2
- package/perplexity.ts +7 -2
- package/pi-web-fetch-demo.mp4 +0 -0
- package/rsc-extract.ts +1 -1
- package/skills/librarian/SKILL.md +40 -0
- package/utils.ts +44 -0
- package/video-extract.ts +329 -0
- package/youtube-extract.ts +280 -0
package/extract.ts
CHANGED
|
@@ -6,10 +6,16 @@ import { activityMonitor } from "./activity.js";
|
|
|
6
6
|
import { extractRSCContent } from "./rsc-extract.js";
|
|
7
7
|
import { extractPDFToMarkdown, isPDF } from "./pdf-extract.js";
|
|
8
8
|
import { extractGitHub } from "./github-extract.js";
|
|
9
|
+
import { isYouTubeURL, isYouTubeEnabled, extractYouTube, extractYouTubeFrame, extractYouTubeFrames, getYouTubeStreamInfo } from "./youtube-extract.js";
|
|
10
|
+
import { extractWithUrlContext, extractWithGeminiWeb } from "./gemini-url-context.js";
|
|
11
|
+
import { isVideoFile, extractVideo, extractVideoFrame, getLocalVideoDuration } from "./video-extract.js";
|
|
12
|
+
import { formatSeconds } from "./utils.js";
|
|
9
13
|
|
|
10
14
|
const DEFAULT_TIMEOUT_MS = 30000;
|
|
11
15
|
const CONCURRENT_LIMIT = 3;
|
|
12
16
|
|
|
17
|
+
const NON_RECOVERABLE_ERRORS = ["Unsupported content type", "Response too large"];
|
|
18
|
+
|
|
13
19
|
const turndown = new TurndownService({
|
|
14
20
|
headingStyle: "atx",
|
|
15
21
|
codeBlockStyle: "fenced",
|
|
@@ -17,16 +23,102 @@ const turndown = new TurndownService({
|
|
|
17
23
|
|
|
18
24
|
const fetchLimit = pLimit(CONCURRENT_LIMIT);
|
|
19
25
|
|
|
26
|
+
export interface VideoFrame {
|
|
27
|
+
data: string;
|
|
28
|
+
mimeType: string;
|
|
29
|
+
timestamp: string;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export type FrameData = { data: string; mimeType: string };
|
|
33
|
+
export type FrameResult = FrameData | { error: string };
|
|
34
|
+
|
|
20
35
|
export interface ExtractedContent {
|
|
21
36
|
url: string;
|
|
22
37
|
title: string;
|
|
23
38
|
content: string;
|
|
24
39
|
error: string | null;
|
|
40
|
+
thumbnail?: { data: string; mimeType: string };
|
|
41
|
+
frames?: VideoFrame[];
|
|
42
|
+
duration?: number;
|
|
25
43
|
}
|
|
26
44
|
|
|
27
45
|
export interface ExtractOptions {
|
|
28
46
|
timeoutMs?: number;
|
|
29
47
|
forceClone?: boolean;
|
|
48
|
+
prompt?: string;
|
|
49
|
+
timestamp?: string;
|
|
50
|
+
frames?: number;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function parseTimestamp(ts: string): number | null {
|
|
54
|
+
const num = Number(ts);
|
|
55
|
+
if (!isNaN(num) && num >= 0) return Math.floor(num);
|
|
56
|
+
const parts = ts.split(":").map(Number);
|
|
57
|
+
if (parts.some(isNaN)) return null;
|
|
58
|
+
if (parts.length === 3) return Math.floor(parts[0] * 3600 + parts[1] * 60 + parts[2]);
|
|
59
|
+
if (parts.length === 2) return Math.floor(parts[0] * 60 + parts[1]);
|
|
60
|
+
return null;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
type TimestampSpec = { type: "single"; seconds: number } | { type: "range"; start: number; end: number };
|
|
64
|
+
|
|
65
|
+
function parseTimestampSpec(ts: string): TimestampSpec | null {
|
|
66
|
+
const dashIdx = ts.indexOf("-", 1);
|
|
67
|
+
if (dashIdx > 0) {
|
|
68
|
+
const start = parseTimestamp(ts.slice(0, dashIdx));
|
|
69
|
+
const end = parseTimestamp(ts.slice(dashIdx + 1));
|
|
70
|
+
if (start !== null && end !== null && end > start) return { type: "range", start, end };
|
|
71
|
+
}
|
|
72
|
+
const seconds = parseTimestamp(ts);
|
|
73
|
+
return seconds !== null ? { type: "single", seconds } : null;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const DEFAULT_RANGE_FRAMES = 6;
|
|
77
|
+
const MIN_FRAME_INTERVAL = 5;
|
|
78
|
+
|
|
79
|
+
function computeRangeTimestamps(start: number, end: number, maxFrames: number = DEFAULT_RANGE_FRAMES): number[] {
|
|
80
|
+
if (maxFrames <= 1) return [start];
|
|
81
|
+
const duration = end - start;
|
|
82
|
+
const idealInterval = duration / (maxFrames - 1);
|
|
83
|
+
if (idealInterval < MIN_FRAME_INTERVAL) {
|
|
84
|
+
const timestamps: number[] = [];
|
|
85
|
+
for (let t = start; t <= end && timestamps.length < maxFrames; t += MIN_FRAME_INTERVAL) {
|
|
86
|
+
timestamps.push(t);
|
|
87
|
+
}
|
|
88
|
+
return timestamps;
|
|
89
|
+
}
|
|
90
|
+
return Array.from({ length: maxFrames }, (_, i) => Math.round(start + i * idealInterval));
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
function buildFrameResult(
|
|
94
|
+
url: string, label: string, requestedCount: number,
|
|
95
|
+
frames: VideoFrame[], error: string | null, duration?: number,
|
|
96
|
+
): ExtractedContent {
|
|
97
|
+
if (frames.length === 0) {
|
|
98
|
+
const msg = error ?? "Frame extraction failed";
|
|
99
|
+
return { url, title: `Frames ${label} (0/${requestedCount})`, content: msg, error: msg };
|
|
100
|
+
}
|
|
101
|
+
return {
|
|
102
|
+
url,
|
|
103
|
+
title: `Frames ${label} (${frames.length}/${requestedCount})`,
|
|
104
|
+
content: `${frames.length} frames extracted from ${label}`,
|
|
105
|
+
error: null,
|
|
106
|
+
frames,
|
|
107
|
+
duration,
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
async function extractLocalFrames(
|
|
112
|
+
filePath: string, timestamps: number[],
|
|
113
|
+
): Promise<{ frames: VideoFrame[]; error: string | null }> {
|
|
114
|
+
const results = await Promise.all(timestamps.map(async (t) => {
|
|
115
|
+
const frame = await extractVideoFrame(filePath, t);
|
|
116
|
+
if ("error" in frame) return { error: frame.error };
|
|
117
|
+
return { ...frame, timestamp: formatSeconds(t) };
|
|
118
|
+
}));
|
|
119
|
+
const frames = results.filter((f): f is VideoFrame => "data" in f);
|
|
120
|
+
const firstError = results.find((f): f is { error: string } => "error" in f);
|
|
121
|
+
return { frames, error: frames.length === 0 && firstError ? firstError.error : null };
|
|
30
122
|
}
|
|
31
123
|
|
|
32
124
|
export async function extractContent(
|
|
@@ -34,11 +126,135 @@ export async function extractContent(
|
|
|
34
126
|
signal?: AbortSignal,
|
|
35
127
|
options?: ExtractOptions,
|
|
36
128
|
): Promise<ExtractedContent> {
|
|
37
|
-
const timeoutMs = options?.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
38
129
|
if (signal?.aborted) {
|
|
39
130
|
return { url, title: "", content: "", error: "Aborted" };
|
|
40
131
|
}
|
|
41
132
|
|
|
133
|
+
if (options?.frames && !options.timestamp) {
|
|
134
|
+
const frameCount = options.frames;
|
|
135
|
+
const ytInfo = isYouTubeURL(url);
|
|
136
|
+
if (ytInfo.isYouTube && ytInfo.videoId) {
|
|
137
|
+
const streamInfo = await getYouTubeStreamInfo(ytInfo.videoId);
|
|
138
|
+
if ("error" in streamInfo) {
|
|
139
|
+
return { url, title: "Frames", content: streamInfo.error, error: streamInfo.error };
|
|
140
|
+
}
|
|
141
|
+
if (streamInfo.duration === null) {
|
|
142
|
+
const error = "Cannot determine video duration. Use a timestamp range instead.";
|
|
143
|
+
return { url, title: "Frames", content: error, error };
|
|
144
|
+
}
|
|
145
|
+
const dur = Math.floor(streamInfo.duration);
|
|
146
|
+
const timestamps = computeRangeTimestamps(0, dur, frameCount);
|
|
147
|
+
const result = await extractYouTubeFrames(ytInfo.videoId, timestamps, streamInfo);
|
|
148
|
+
const label = `${formatSeconds(0)}-${formatSeconds(dur)}`;
|
|
149
|
+
return buildFrameResult(url, label, timestamps.length, result.frames, result.error, streamInfo.duration);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
const videoInfo = isVideoFile(url);
|
|
153
|
+
if (videoInfo) {
|
|
154
|
+
const durationResult = await getLocalVideoDuration(videoInfo.absolutePath);
|
|
155
|
+
if (typeof durationResult !== "number") {
|
|
156
|
+
return { url, title: "Frames", content: durationResult.error, error: durationResult.error };
|
|
157
|
+
}
|
|
158
|
+
const dur = Math.floor(durationResult);
|
|
159
|
+
const timestamps = computeRangeTimestamps(0, dur, frameCount);
|
|
160
|
+
const result = await extractLocalFrames(videoInfo.absolutePath, timestamps);
|
|
161
|
+
const label = `${formatSeconds(0)}-${formatSeconds(dur)}`;
|
|
162
|
+
return buildFrameResult(url, label, timestamps.length, result.frames, result.error, durationResult);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
return { url, title: "", content: "", error: "Frame extraction only works with YouTube and local video files" };
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
if (options?.timestamp) {
|
|
169
|
+
const spec = parseTimestampSpec(options.timestamp);
|
|
170
|
+
if (spec) {
|
|
171
|
+
const frameCount = options.frames;
|
|
172
|
+
const ytInfo = isYouTubeURL(url);
|
|
173
|
+
if (ytInfo.isYouTube && ytInfo.videoId) {
|
|
174
|
+
const streamInfo = await getYouTubeStreamInfo(ytInfo.videoId);
|
|
175
|
+
if ("error" in streamInfo) {
|
|
176
|
+
if (spec.type === "range") {
|
|
177
|
+
const label = `${formatSeconds(spec.start)}-${formatSeconds(spec.end)}`;
|
|
178
|
+
return { url, title: `Frames ${label}`, content: streamInfo.error, error: streamInfo.error };
|
|
179
|
+
}
|
|
180
|
+
if (frameCount) {
|
|
181
|
+
const end = spec.seconds + (frameCount - 1) * MIN_FRAME_INTERVAL;
|
|
182
|
+
const label = `${formatSeconds(spec.seconds)}-${formatSeconds(end)}`;
|
|
183
|
+
return { url, title: `Frames ${label}`, content: streamInfo.error, error: streamInfo.error };
|
|
184
|
+
}
|
|
185
|
+
return { url, title: `Frame at ${options.timestamp}`, content: streamInfo.error, error: streamInfo.error };
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
if (spec.type === "range") {
|
|
189
|
+
const label = `${formatSeconds(spec.start)}-${formatSeconds(spec.end)}`;
|
|
190
|
+
if (streamInfo.duration !== null && spec.end > streamInfo.duration) {
|
|
191
|
+
const error = `Timestamp ${formatSeconds(spec.end)} exceeds video duration (${formatSeconds(Math.floor(streamInfo.duration))})`;
|
|
192
|
+
return { url, title: `Frames ${label}`, content: error, error };
|
|
193
|
+
}
|
|
194
|
+
const timestamps = frameCount
|
|
195
|
+
? computeRangeTimestamps(spec.start, spec.end, frameCount)
|
|
196
|
+
: computeRangeTimestamps(spec.start, spec.end);
|
|
197
|
+
const result = await extractYouTubeFrames(ytInfo.videoId, timestamps, streamInfo);
|
|
198
|
+
return buildFrameResult(url, label, timestamps.length, result.frames, result.error, result.duration ?? undefined);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
if (frameCount) {
|
|
202
|
+
const end = spec.seconds + (frameCount - 1) * MIN_FRAME_INTERVAL;
|
|
203
|
+
const label = `${formatSeconds(spec.seconds)}-${formatSeconds(end)}`;
|
|
204
|
+
if (streamInfo.duration !== null && end > streamInfo.duration) {
|
|
205
|
+
const error = `Timestamp ${formatSeconds(end)} exceeds video duration (${formatSeconds(Math.floor(streamInfo.duration))})`;
|
|
206
|
+
return { url, title: `Frames ${label}`, content: error, error };
|
|
207
|
+
}
|
|
208
|
+
const timestamps = computeRangeTimestamps(spec.seconds, end, frameCount);
|
|
209
|
+
const result = await extractYouTubeFrames(ytInfo.videoId, timestamps, streamInfo);
|
|
210
|
+
return buildFrameResult(url, label, timestamps.length, result.frames, result.error, result.duration ?? undefined);
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
if (streamInfo.duration !== null && spec.seconds > streamInfo.duration) {
|
|
214
|
+
const error = `Timestamp ${formatSeconds(spec.seconds)} exceeds video duration (${formatSeconds(Math.floor(streamInfo.duration))})`;
|
|
215
|
+
return { url, title: `Frame at ${options.timestamp}`, content: error, error };
|
|
216
|
+
}
|
|
217
|
+
const frame = await extractYouTubeFrame(ytInfo.videoId, spec.seconds, streamInfo);
|
|
218
|
+
if ("error" in frame) {
|
|
219
|
+
return { url, title: `Frame at ${options.timestamp}`, content: frame.error, error: frame.error };
|
|
220
|
+
}
|
|
221
|
+
return { url, title: `Frame at ${options.timestamp}`, content: `Video frame at ${options.timestamp}`, error: null, thumbnail: frame };
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
const videoInfo = isVideoFile(url);
|
|
225
|
+
if (videoInfo) {
|
|
226
|
+
if (spec.type === "range") {
|
|
227
|
+
const timestamps = frameCount
|
|
228
|
+
? computeRangeTimestamps(spec.start, spec.end, frameCount)
|
|
229
|
+
: computeRangeTimestamps(spec.start, spec.end);
|
|
230
|
+
const result = await extractLocalFrames(videoInfo.absolutePath, timestamps);
|
|
231
|
+
const label = `${formatSeconds(spec.start)}-${formatSeconds(spec.end)}`;
|
|
232
|
+
return buildFrameResult(url, label, timestamps.length, result.frames, result.error);
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
if (frameCount) {
|
|
236
|
+
const end = spec.seconds + (frameCount - 1) * MIN_FRAME_INTERVAL;
|
|
237
|
+
const timestamps = computeRangeTimestamps(spec.seconds, end, frameCount);
|
|
238
|
+
const result = await extractLocalFrames(videoInfo.absolutePath, timestamps);
|
|
239
|
+
const label = `${formatSeconds(spec.seconds)}-${formatSeconds(end)}`;
|
|
240
|
+
return buildFrameResult(url, label, timestamps.length, result.frames, result.error);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
const frame = await extractVideoFrame(videoInfo.absolutePath, spec.seconds);
|
|
244
|
+
if ("error" in frame) {
|
|
245
|
+
return { url, title: `Frame at ${options.timestamp}`, content: frame.error, error: frame.error };
|
|
246
|
+
}
|
|
247
|
+
return { url, title: `Frame at ${options.timestamp}`, content: `Video frame at ${options.timestamp}`, error: null, thumbnail: frame };
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
const videoInfo = isVideoFile(url);
|
|
253
|
+
if (videoInfo) {
|
|
254
|
+
const result = await extractVideo(videoInfo, signal, options);
|
|
255
|
+
return result ?? { url, title: "", content: "", error: "Video analysis requires Gemini access. Either:\n 1. Sign into gemini.google.com in Chrome (free, uses cookies)\n 2. Set GEMINI_API_KEY in ~/.pi/web-search.json" };
|
|
256
|
+
}
|
|
257
|
+
|
|
42
258
|
try {
|
|
43
259
|
new URL(url);
|
|
44
260
|
} catch {
|
|
@@ -48,10 +264,39 @@ export async function extractContent(
|
|
|
48
264
|
try {
|
|
49
265
|
const ghResult = await extractGitHub(url, signal, options?.forceClone);
|
|
50
266
|
if (ghResult) return ghResult;
|
|
51
|
-
} catch {
|
|
52
|
-
|
|
267
|
+
} catch {}
|
|
268
|
+
|
|
269
|
+
const ytInfo = isYouTubeURL(url);
|
|
270
|
+
if (ytInfo.isYouTube && isYouTubeEnabled()) {
|
|
271
|
+
try {
|
|
272
|
+
const ytResult = await extractYouTube(url, signal, options?.prompt);
|
|
273
|
+
if (ytResult) return ytResult;
|
|
274
|
+
} catch {}
|
|
275
|
+
return {
|
|
276
|
+
url,
|
|
277
|
+
title: "",
|
|
278
|
+
content: "",
|
|
279
|
+
error: "Could not extract YouTube video content. Sign into Google in Chrome for automatic access, or set GEMINI_API_KEY.",
|
|
280
|
+
};
|
|
53
281
|
}
|
|
54
282
|
|
|
283
|
+
const httpResult = await extractViaHttp(url, signal, options);
|
|
284
|
+
|
|
285
|
+
if (!httpResult.error || signal?.aborted) return httpResult;
|
|
286
|
+
if (NON_RECOVERABLE_ERRORS.some(prefix => httpResult.error!.startsWith(prefix))) return httpResult;
|
|
287
|
+
|
|
288
|
+
const geminiResult = await extractWithUrlContext(url, signal)
|
|
289
|
+
?? await extractWithGeminiWeb(url, signal);
|
|
290
|
+
|
|
291
|
+
return geminiResult ?? httpResult;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
async function extractViaHttp(
|
|
295
|
+
url: string,
|
|
296
|
+
signal?: AbortSignal,
|
|
297
|
+
options?: ExtractOptions,
|
|
298
|
+
): Promise<ExtractedContent> {
|
|
299
|
+
const timeoutMs = options?.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
55
300
|
const activityId = activityMonitor.logStart({ type: "fetch", url });
|
|
56
301
|
|
|
57
302
|
const controller = new AbortController();
|
|
@@ -79,11 +324,10 @@ export async function extractContent(
|
|
|
79
324
|
};
|
|
80
325
|
}
|
|
81
326
|
|
|
82
|
-
// Check content length to avoid memory issues with huge responses
|
|
83
327
|
const contentLengthHeader = response.headers.get("content-length");
|
|
84
328
|
const contentType = response.headers.get("content-type") || "";
|
|
85
329
|
const isPDFContent = isPDF(url, contentType);
|
|
86
|
-
const maxResponseSize = isPDFContent ? 20 * 1024 * 1024 : 5 * 1024 * 1024;
|
|
330
|
+
const maxResponseSize = isPDFContent ? 20 * 1024 * 1024 : 5 * 1024 * 1024;
|
|
87
331
|
if (contentLengthHeader) {
|
|
88
332
|
const contentLength = parseInt(contentLengthHeader, 10);
|
|
89
333
|
if (contentLength > maxResponseSize) {
|
|
@@ -97,7 +341,6 @@ export async function extractContent(
|
|
|
97
341
|
}
|
|
98
342
|
}
|
|
99
343
|
|
|
100
|
-
// Handle PDFs - extract and save to markdown file
|
|
101
344
|
if (isPDFContent) {
|
|
102
345
|
try {
|
|
103
346
|
const buffer = await response.arrayBuffer();
|
|
@@ -115,8 +358,7 @@ export async function extractContent(
|
|
|
115
358
|
return { url, title: "", content: "", error: `PDF extraction failed: ${message}` };
|
|
116
359
|
}
|
|
117
360
|
}
|
|
118
|
-
|
|
119
|
-
// Reject binary/non-text content types
|
|
361
|
+
|
|
120
362
|
if (contentType.includes("application/octet-stream") ||
|
|
121
363
|
contentType.includes("image/") ||
|
|
122
364
|
contentType.includes("audio/") ||
|
|
@@ -130,35 +372,27 @@ export async function extractContent(
|
|
|
130
372
|
error: `Unsupported content type: ${contentType.split(";")[0]}`,
|
|
131
373
|
};
|
|
132
374
|
}
|
|
133
|
-
|
|
134
|
-
// Return plain text directly without Readability
|
|
135
|
-
const urlHostname = new URL(url).hostname;
|
|
136
|
-
const isPlainText = contentType.includes("text/plain") ||
|
|
137
|
-
urlHostname === "raw.githubusercontent.com" ||
|
|
138
|
-
urlHostname === "gist.githubusercontent.com";
|
|
139
375
|
|
|
140
376
|
const text = await response.text();
|
|
377
|
+
const isHTML = contentType.includes("text/html") || contentType.includes("application/xhtml+xml");
|
|
141
378
|
|
|
142
|
-
if (
|
|
379
|
+
if (!isHTML) {
|
|
143
380
|
activityMonitor.logComplete(activityId, response.status);
|
|
144
|
-
const
|
|
145
|
-
const title = urlPath.split("/").pop() || url;
|
|
381
|
+
const title = extractTextTitle(text, url);
|
|
146
382
|
return { url, title, content: text, error: null };
|
|
147
383
|
}
|
|
148
384
|
|
|
149
385
|
const { document } = parseHTML(text);
|
|
150
|
-
|
|
151
386
|
const reader = new Readability(document as unknown as Document);
|
|
152
387
|
const article = reader.parse();
|
|
153
388
|
|
|
154
389
|
if (!article) {
|
|
155
|
-
// Fallback: Try extracting from RSC flight data (Next.js App Router)
|
|
156
390
|
const rscResult = extractRSCContent(text);
|
|
157
391
|
if (rscResult) {
|
|
158
392
|
activityMonitor.logComplete(activityId, response.status);
|
|
159
393
|
return { url, title: rscResult.title, content: rscResult.content, error: null };
|
|
160
394
|
}
|
|
161
|
-
|
|
395
|
+
|
|
162
396
|
activityMonitor.logComplete(activityId, response.status);
|
|
163
397
|
return {
|
|
164
398
|
url,
|
|
@@ -169,14 +403,8 @@ export async function extractContent(
|
|
|
169
403
|
}
|
|
170
404
|
|
|
171
405
|
const markdown = turndown.turndown(article.content);
|
|
172
|
-
|
|
173
406
|
activityMonitor.logComplete(activityId, response.status);
|
|
174
|
-
return {
|
|
175
|
-
url,
|
|
176
|
-
title: article.title || "",
|
|
177
|
-
content: markdown,
|
|
178
|
-
error: null,
|
|
179
|
-
};
|
|
407
|
+
return { url, title: article.title || "", content: markdown, error: null };
|
|
180
408
|
} catch (err) {
|
|
181
409
|
const message = err instanceof Error ? err.message : String(err);
|
|
182
410
|
if (message.toLowerCase().includes("abort")) {
|
|
@@ -191,6 +419,17 @@ export async function extractContent(
|
|
|
191
419
|
}
|
|
192
420
|
}
|
|
193
421
|
|
|
422
|
+
export function extractHeadingTitle(text: string): string | null {
|
|
423
|
+
const match = text.match(/^#{1,2}\s+(.+)/m);
|
|
424
|
+
if (!match) return null;
|
|
425
|
+
const cleaned = match[1].replace(/\*+/g, "").trim();
|
|
426
|
+
return cleaned || null;
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
function extractTextTitle(text: string, url: string): string {
|
|
430
|
+
return extractHeadingTitle(text) ?? (new URL(url).pathname.split("/").pop() || url);
|
|
431
|
+
}
|
|
432
|
+
|
|
194
433
|
export async function fetchAllContent(
|
|
195
434
|
urls: string[],
|
|
196
435
|
signal?: AbortSignal,
|
package/gemini-api.ts
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
2
|
+
import { homedir } from "node:os";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
|
|
5
|
+
export const API_BASE = "https://generativelanguage.googleapis.com/v1beta";
|
|
6
|
+
const CONFIG_PATH = join(homedir(), ".pi", "web-search.json");
|
|
7
|
+
export const DEFAULT_MODEL = "gemini-2.5-flash";
|
|
8
|
+
|
|
9
|
+
interface GeminiApiConfig {
|
|
10
|
+
geminiApiKey?: string;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
let cachedConfig: GeminiApiConfig | null = null;
|
|
14
|
+
|
|
15
|
+
function loadConfig(): GeminiApiConfig {
|
|
16
|
+
if (cachedConfig) return cachedConfig;
|
|
17
|
+
if (existsSync(CONFIG_PATH)) {
|
|
18
|
+
try {
|
|
19
|
+
cachedConfig = JSON.parse(readFileSync(CONFIG_PATH, "utf-8")) as GeminiApiConfig;
|
|
20
|
+
return cachedConfig;
|
|
21
|
+
} catch {}
|
|
22
|
+
}
|
|
23
|
+
cachedConfig = {};
|
|
24
|
+
return cachedConfig;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function withTimeout(signal: AbortSignal | undefined, timeoutMs: number): AbortSignal {
|
|
28
|
+
const timeout = AbortSignal.timeout(timeoutMs);
|
|
29
|
+
return signal ? AbortSignal.any([signal, timeout]) : timeout;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export function getApiKey(): string | null {
|
|
33
|
+
const envKey = process.env.GEMINI_API_KEY;
|
|
34
|
+
if (envKey) return envKey;
|
|
35
|
+
return loadConfig().geminiApiKey ?? null;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export function isGeminiApiAvailable(): boolean {
|
|
39
|
+
return getApiKey() !== null;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export interface GeminiApiOptions {
|
|
43
|
+
model?: string;
|
|
44
|
+
mimeType?: string;
|
|
45
|
+
signal?: AbortSignal;
|
|
46
|
+
timeoutMs?: number;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export async function queryGeminiApiWithVideo(
|
|
50
|
+
prompt: string,
|
|
51
|
+
videoUri: string,
|
|
52
|
+
options: GeminiApiOptions = {},
|
|
53
|
+
): Promise<string> {
|
|
54
|
+
const apiKey = getApiKey();
|
|
55
|
+
if (!apiKey) throw new Error("GEMINI_API_KEY not configured");
|
|
56
|
+
|
|
57
|
+
const model = options.model ?? DEFAULT_MODEL;
|
|
58
|
+
const signal = withTimeout(options.signal, options.timeoutMs ?? 120000);
|
|
59
|
+
const url = `${API_BASE}/models/${model}:generateContent?key=${apiKey}`;
|
|
60
|
+
|
|
61
|
+
const fileData: Record<string, string> = { fileUri: videoUri };
|
|
62
|
+
if (options.mimeType) fileData.mimeType = options.mimeType;
|
|
63
|
+
|
|
64
|
+
const body = {
|
|
65
|
+
contents: [
|
|
66
|
+
{
|
|
67
|
+
parts: [
|
|
68
|
+
{ fileData },
|
|
69
|
+
{ text: prompt },
|
|
70
|
+
],
|
|
71
|
+
},
|
|
72
|
+
],
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
const res = await fetch(url, {
|
|
76
|
+
method: "POST",
|
|
77
|
+
headers: { "Content-Type": "application/json" },
|
|
78
|
+
body: JSON.stringify(body),
|
|
79
|
+
signal,
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
if (!res.ok) {
|
|
83
|
+
const errorText = await res.text();
|
|
84
|
+
throw new Error(`Gemini API error ${res.status}: ${errorText.slice(0, 300)}`);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
const data = (await res.json()) as GenerateContentResponse;
|
|
88
|
+
const text = data.candidates?.[0]?.content?.parts
|
|
89
|
+
?.map((p) => p.text)
|
|
90
|
+
.filter(Boolean)
|
|
91
|
+
.join("\n");
|
|
92
|
+
|
|
93
|
+
if (!text) throw new Error("Gemini API returned empty response");
|
|
94
|
+
return text;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
interface GenerateContentResponse {
|
|
98
|
+
candidates?: Array<{
|
|
99
|
+
content?: {
|
|
100
|
+
parts?: Array<{ text?: string }>;
|
|
101
|
+
};
|
|
102
|
+
}>;
|
|
103
|
+
}
|