@alexion42/pi-web-search 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json ADDED
@@ -0,0 +1,46 @@
1
+ {
2
+ "name": "@alexion42/pi-web-search",
3
+ "version": "0.1.0",
4
+ "description": "Exa-powered web search and content extraction for Pi coding agent",
5
+ "type": "module",
6
+ "scripts": {
7
+ "test": "node --test"
8
+ },
9
+ "keywords": [
10
+ "pi-package",
11
+ "pi",
12
+ "pi-coding-agent",
13
+ "extension",
14
+ "web-search",
15
+ "exa",
16
+ "fetch",
17
+ "scraping"
18
+ ],
19
+ "author": "Alexion Fortytwo <alexion@lexiupon.com>",
20
+ "license": "MIT",
21
+ "repository": {
22
+ "type": "git",
23
+ "url": "git+https://github.com/nicobailon/pi-web-access.git"
24
+ },
25
+ "bugs": {
26
+ "url": "https://github.com/nicobailon/pi-web-access/issues"
27
+ },
28
+ "homepage": "https://github.com/nicobailon/pi-web-access#readme",
29
+ "dependencies": {
30
+ "@mozilla/readability": "^0.5.0",
31
+ "linkedom": "^0.16.0",
32
+ "p-limit": "^6.1.0",
33
+ "turndown": "^7.2.0",
34
+ "unpdf": "^1.6.2"
35
+ },
36
+ "peerDependencies": {
37
+ "@mariozechner/pi-ai": ">=0.6.0",
38
+ "@mariozechner/pi-coding-agent": ">=0.37.3",
39
+ "@mariozechner/pi-tui": ">=0.37.3"
40
+ },
41
+ "pi": {
42
+ "extensions": [
43
+ "./index.ts"
44
+ ]
45
+ }
46
+ }
package/pdf-extract.ts ADDED
@@ -0,0 +1,192 @@
1
+ /**
2
+ * PDF Content Extractor
3
+ *
4
+ * Extracts text from PDF files and saves to markdown.
5
+ * Uses unpdf (pdfjs-dist wrapper) for text extraction.
6
+ */
7
+
8
+ import { getDocumentProxy } from "unpdf";
9
+ import { writeFile, mkdir } from "node:fs/promises";
10
+ import { join, basename } from "node:path";
11
+ import { homedir } from "node:os";
12
+
13
+ export interface PDFExtractResult {
14
+ title: string;
15
+ pages: number;
16
+ chars: number;
17
+ outputPath: string;
18
+ }
19
+
20
+ export interface PDFExtractOptions {
21
+ maxPages?: number;
22
+ outputDir?: string;
23
+ filename?: string;
24
+ }
25
+
26
+ const DEFAULT_MAX_PAGES = 100;
27
+ const DEFAULT_OUTPUT_DIR = join(homedir(), "Downloads");
28
+
29
+ /**
30
+ * Extract text from a PDF buffer and save to markdown file
31
+ */
32
+ export async function extractPDFToMarkdown(
33
+ buffer: ArrayBuffer,
34
+ url: string,
35
+ options: PDFExtractOptions = {}
36
+ ): Promise<PDFExtractResult> {
37
+ const {
38
+ maxPages = DEFAULT_MAX_PAGES,
39
+ outputDir = DEFAULT_OUTPUT_DIR,
40
+ filename,
41
+ } = options;
42
+
43
+ const safeMaxPages = Number.isFinite(maxPages)
44
+ ? Math.max(1, Math.floor(maxPages))
45
+ : DEFAULT_MAX_PAGES;
46
+
47
+ const pdf = await getDocumentProxy(new Uint8Array(buffer));
48
+ const metadata = await pdf.getMetadata();
49
+ const metadataInfo = metadata.info && typeof metadata.info === "object"
50
+ ? metadata.info as Record<string, unknown>
51
+ : null;
52
+
53
+ // Extract title from metadata or URL
54
+ const metaTitle = typeof metadataInfo?.Title === "string" ? metadataInfo.Title : undefined;
55
+ const metaAuthor = typeof metadataInfo?.Author === "string" ? metadataInfo.Author : undefined;
56
+ const urlTitle = extractTitleFromURL(url);
57
+ const title = metaTitle?.trim() || urlTitle;
58
+
59
+ // Determine pages to extract
60
+ const pagesToExtract = Math.min(pdf.numPages, safeMaxPages);
61
+ const truncated = pdf.numPages > safeMaxPages;
62
+
63
+ // Extract text page by page for better structure
64
+ const pages: { pageNum: number; text: string }[] = [];
65
+ for (let i = 1; i <= pagesToExtract; i++) {
66
+ const page = await pdf.getPage(i);
67
+ const textContent = await page.getTextContent();
68
+ const pageText = textContent.items
69
+ .map((item: unknown) => {
70
+ const textItem = item as { str?: string };
71
+ return textItem.str || "";
72
+ })
73
+ .join(" ")
74
+ .replace(/\s+/g, " ")
75
+ .trim();
76
+
77
+ if (pageText) {
78
+ pages.push({ pageNum: i, text: pageText });
79
+ }
80
+ }
81
+
82
+ // Build markdown content
83
+ const lines: string[] = [];
84
+
85
+ // Header with metadata
86
+ lines.push(`# ${title}`);
87
+ lines.push("");
88
+ lines.push(`> Source: ${url}`);
89
+ lines.push(`> Pages: ${pdf.numPages}${truncated ? ` (extracted first ${pagesToExtract})` : ""}`);
90
+ if (metaAuthor) {
91
+ lines.push(`> Author: ${metaAuthor}`);
92
+ }
93
+ lines.push("");
94
+ lines.push("---");
95
+ lines.push("");
96
+
97
+ // Content with page markers
98
+ for (let i = 0; i < pages.length; i++) {
99
+ if (i > 0) {
100
+ lines.push("");
101
+ lines.push(`<!-- Page ${pages[i].pageNum} -->`);
102
+ lines.push("");
103
+ }
104
+ lines.push(pages[i].text);
105
+ }
106
+
107
+ if (truncated) {
108
+ lines.push("");
109
+ lines.push("---");
110
+ lines.push("");
111
+ lines.push(`*[Truncated: Only first ${pagesToExtract} of ${pdf.numPages} pages extracted]*`);
112
+ }
113
+
114
+ const content = lines.join("\n");
115
+
116
+ // Generate output filename
117
+ const outputFilename = filename || sanitizeFilename(title) + ".md";
118
+ const outputPath = join(outputDir, outputFilename);
119
+
120
+ // Ensure output directory exists
121
+ await mkdir(outputDir, { recursive: true });
122
+
123
+ // Write file
124
+ await writeFile(outputPath, content, "utf-8");
125
+
126
+ return {
127
+ title,
128
+ pages: pdf.numPages,
129
+ chars: content.length,
130
+ outputPath,
131
+ };
132
+ }
133
+
134
+ /**
135
+ * Extract a reasonable title from URL
136
+ */
137
+ function extractTitleFromURL(url: string): string {
138
+ try {
139
+ const urlObj = new URL(url);
140
+ const pathname = urlObj.pathname;
141
+
142
+ // Get filename without extension
143
+ let filename = basename(pathname, ".pdf");
144
+
145
+ // Handle arxiv URLs: /pdf/1706.03762 → "arxiv-1706.03762"
146
+ if (urlObj.hostname.includes("arxiv.org")) {
147
+ const match = pathname.match(/\/(?:pdf|abs)\/(\d+\.\d+)/);
148
+ if (match) {
149
+ filename = `arxiv-${match[1]}`;
150
+ }
151
+ }
152
+
153
+ // Clean up filename
154
+ filename = filename
155
+ .replace(/[_-]+/g, " ")
156
+ .replace(/\s+/g, " ")
157
+ .trim();
158
+
159
+ return filename || "document";
160
+ } catch {
161
+ return "document";
162
+ }
163
+ }
164
+
165
+ /**
166
+ * Sanitize string for use as filename
167
+ */
168
+ function sanitizeFilename(name: string): string {
169
+ return name
170
+ .toLowerCase()
171
+ .replace(/[^a-z0-9\s-]/g, "")
172
+ .replace(/\s+/g, "-")
173
+ .replace(/-+/g, "-")
174
+ .slice(0, 100)
175
+ .replace(/^-|-$/g, "")
176
+ || "document";
177
+ }
178
+
179
+ /**
180
+ * Check if URL or content-type indicates a PDF
181
+ */
182
+ export function isPDF(url: string, contentType?: string): boolean {
183
+ if (contentType?.includes("application/pdf")) {
184
+ return true;
185
+ }
186
+ try {
187
+ const urlObj = new URL(url);
188
+ return urlObj.pathname.toLowerCase().endsWith(".pdf");
189
+ } catch {
190
+ return false;
191
+ }
192
+ }
Binary file
package/rsc-extract.ts ADDED
@@ -0,0 +1,338 @@
1
+ /**
2
+ * RSC Content Extractor
3
+ *
4
+ * Extracts readable content from Next.js React Server Components (RSC) flight payloads.
5
+ * RSC pages embed content as JSON in <script>self.__next_f.push([...])</script> tags.
6
+ */
7
+
8
+ export interface RSCExtractResult {
9
+ title: string;
10
+ content: string;
11
+ }
12
+
13
+ export function extractRSCContent(html: string): RSCExtractResult | null {
14
+ if (!html.includes("self.__next_f.push")) {
15
+ return null;
16
+ }
17
+
18
+ // Parse all RSC chunks into a map
19
+ const chunkMap = new Map<string, string>();
20
+ const scriptRegex = /<script>self\.__next_f\.push\(\[1,"([\s\S]*?)"\]\)<\/script>/g;
21
+
22
+ for (const match of html.matchAll(scriptRegex)) {
23
+ let content: string;
24
+ try {
25
+ content = JSON.parse('"' + match[1] + '"');
26
+ } catch {
27
+ continue;
28
+ }
29
+
30
+ // Parse each line as "id:payload"
31
+ // Lines are separated by \n, each line is one chunk
32
+ // Chunk IDs are hex strings, typically 1-4 chars (supports up to 65535 chunks)
33
+ for (const line of content.split("\n")) {
34
+ if (!line.trim()) continue;
35
+
36
+ const colonIdx = line.indexOf(":");
37
+ if (colonIdx <= 0 || colonIdx > 4) continue;
38
+
39
+ const id = line.slice(0, colonIdx);
40
+ if (!/^[0-9a-f]+$/i.test(id)) continue;
41
+
42
+ const payload = line.slice(colonIdx + 1);
43
+ if (!payload) continue;
44
+
45
+ const existing = chunkMap.get(id);
46
+ if (!existing || payload.length > existing.length) {
47
+ chunkMap.set(id, payload);
48
+ }
49
+ }
50
+ }
51
+
52
+ if (chunkMap.size === 0) return null;
53
+
54
+ // Extract title
55
+ const titleMatch = html.match(/<title[^>]*>([^<]+)<\/title>/);
56
+ const title = titleMatch?.[1]?.split("|")[0]?.trim() || "";
57
+
58
+ // Parse and cache parsed chunks
59
+ const parsedCache = new Map<string, unknown>();
60
+
61
+ function getParsedChunk(id: string): unknown | null {
62
+ if (parsedCache.has(id)) return parsedCache.get(id);
63
+
64
+ const chunk = chunkMap.get(id);
65
+ if (!chunk || !chunk.startsWith("[")) {
66
+ parsedCache.set(id, null);
67
+ return null;
68
+ }
69
+
70
+ try {
71
+ const parsed = JSON.parse(chunk);
72
+ parsedCache.set(id, parsed);
73
+ return parsed;
74
+ } catch {
75
+ parsedCache.set(id, null);
76
+ return null;
77
+ }
78
+ }
79
+
80
+ // Extract markdown from nodes, resolving refs on the fly
81
+ type Node = unknown;
82
+ const visitedRefs = new Set<string>();
83
+
84
+ function extractNode(node: Node, ctx = { inTable: false, inCode: false }): string {
85
+ if (node === null || node === undefined) return "";
86
+
87
+ if (typeof node === "string") {
88
+ // Check if it's a reference like "$L30"
89
+ const refMatch = node.match(/^\$L([0-9a-f]+)$/i);
90
+ if (refMatch) {
91
+ const refId = refMatch[1];
92
+ if (visitedRefs.has(refId)) return ""; // Prevent cycles
93
+ visitedRefs.add(refId);
94
+ const refNode = getParsedChunk(refId);
95
+ const result = refNode ? extractNode(refNode, ctx) : "";
96
+ visitedRefs.delete(refId);
97
+ return result;
98
+ }
99
+ // Filter out RSC-specific artifacts, but preserve content inside code blocks
100
+ if (!ctx.inCode && (node === "$undefined" || node === "$" || /^\$[A-Z]/.test(node))) return "";
101
+ return node.trim() ? node : "";
102
+ }
103
+
104
+ if (typeof node === "number") return String(node);
105
+ if (typeof node === "boolean") return "";
106
+ if (!Array.isArray(node)) return "";
107
+
108
+ // RSC element: ["$", "tag", key, props]
109
+ if (node[0] === "$" && typeof node[1] === "string") {
110
+ const tag = node[1] as string;
111
+ const props = (node[3] || {}) as Record<string, unknown>;
112
+
113
+ // Skip non-content
114
+ const skipTags = ["script", "style", "svg", "path", "circle", "link", "meta",
115
+ "template", "button", "input", "nav", "footer", "aside"];
116
+ if (skipTags.includes(tag)) return "";
117
+
118
+ // Component ref like $L25
119
+ if (tag.startsWith("$L")) {
120
+ const refId = tag.slice(2);
121
+ if (visitedRefs.has(refId)) return "";
122
+
123
+ // Check for heading components with baseId
124
+ if (props.baseId && props.children) {
125
+ return `## ${String(props.children)}\n\n`;
126
+ }
127
+
128
+ visitedRefs.add(refId);
129
+ const refNode = getParsedChunk(refId);
130
+ let result = "";
131
+ if (refNode) {
132
+ result = extractNode(refNode, ctx);
133
+ } else if (props.children) {
134
+ result = extractNode(props.children as Node, ctx);
135
+ }
136
+ visitedRefs.delete(refId);
137
+ return result;
138
+ }
139
+
140
+ const children = props.children;
141
+ const content = children ? extractNode(children as Node, ctx) : "";
142
+
143
+ switch (tag) {
144
+ case "h1": return `# ${content.trim()}\n\n`;
145
+ case "h2": return `## ${content.trim()}\n\n`;
146
+ case "h3": return `### ${content.trim()}\n\n`;
147
+ case "h4": return `#### ${content.trim()}\n\n`;
148
+ case "h5": return `##### ${content.trim()}\n\n`;
149
+ case "h6": return `###### ${content.trim()}\n\n`;
150
+ case "p": return ctx.inTable ? content : `${content.trim()}\n\n`;
151
+ case "code": {
152
+ const codeContent = children ? extractNode(children as Node, { ...ctx, inCode: true }) : "";
153
+ return ctx.inCode ? codeContent : `\`${codeContent}\``;
154
+ }
155
+ case "pre": {
156
+ const preContent = children ? extractNode(children as Node, { ...ctx, inCode: true }) : "";
157
+ return "```\n" + preContent + "\n```\n\n";
158
+ }
159
+ case "strong": case "b": return `**${content}**`;
160
+ case "em": case "i": return `*${content}*`;
161
+ case "li": return `- ${content.trim()}\n`;
162
+ case "ul": case "ol": return content + "\n";
163
+ case "blockquote": return `> ${content.trim()}\n\n`;
164
+ case "table": return extractTable(node as unknown[]) + "\n";
165
+ case "thead": case "tbody": case "tr": case "th": case "td":
166
+ return content;
167
+ case "div":
168
+ if (props.role === "alert" || props["data-slot"] === "alert") {
169
+ return `> ${content.trim()}\n\n`;
170
+ }
171
+ return content;
172
+ case "a": {
173
+ const href = props.href as string | undefined;
174
+ return href && !href.startsWith("#") ? `[${content}](${href})` : content;
175
+ }
176
+ default: return content;
177
+ }
178
+ }
179
+
180
+ // Array of child nodes
181
+ return (node as Node[]).map(n => extractNode(n, ctx)).join("");
182
+ }
183
+
184
+ function extractTable(tableNode: unknown[]): string {
185
+ const props = (tableNode[3] || {}) as Record<string, unknown>;
186
+ const rows: string[][] = [];
187
+ let headerRowCount = 0;
188
+
189
+ function walkTable(node: unknown, isHeader = false): void {
190
+ if (node === null || node === undefined) return;
191
+
192
+ // Handle string refs
193
+ if (typeof node === "string") {
194
+ const refMatch = node.match(/^\$L([0-9a-f]+)$/i);
195
+ if (refMatch && !visitedRefs.has(refMatch[1])) {
196
+ visitedRefs.add(refMatch[1]);
197
+ const refNode = getParsedChunk(refMatch[1]);
198
+ if (refNode) walkTable(refNode, isHeader);
199
+ visitedRefs.delete(refMatch[1]);
200
+ }
201
+ return;
202
+ }
203
+
204
+ if (!Array.isArray(node)) return;
205
+
206
+ if (node[0] === "$") {
207
+ const tag = node[1] as string;
208
+ const nodeProps = (node[3] || {}) as Record<string, unknown>;
209
+
210
+ // Handle component refs
211
+ if (tag.startsWith("$L")) {
212
+ const refId = tag.slice(2);
213
+ if (!visitedRefs.has(refId)) {
214
+ visitedRefs.add(refId);
215
+ const refNode = getParsedChunk(refId);
216
+ if (refNode) walkTable(refNode, isHeader);
217
+ visitedRefs.delete(refId);
218
+ }
219
+ return;
220
+ }
221
+
222
+ if (tag === "thead") walkTable(nodeProps.children, true);
223
+ else if (tag === "tbody") walkTable(nodeProps.children, false);
224
+ else if (tag === "tr") {
225
+ const cells: string[] = [];
226
+ walkCells(nodeProps.children, cells);
227
+ if (cells.length > 0) {
228
+ rows.push(cells);
229
+ if (isHeader) headerRowCount++;
230
+ }
231
+ } else walkTable(nodeProps.children, isHeader);
232
+ } else {
233
+ for (const child of node) walkTable(child, isHeader);
234
+ }
235
+ }
236
+
237
+ function walkCells(node: unknown, cells: string[]): void {
238
+ if (node === null || node === undefined) return;
239
+
240
+ // Handle string refs
241
+ if (typeof node === "string") {
242
+ const refMatch = node.match(/^\$L([0-9a-f]+)$/i);
243
+ if (refMatch && !visitedRefs.has(refMatch[1])) {
244
+ visitedRefs.add(refMatch[1]);
245
+ const refNode = getParsedChunk(refMatch[1]);
246
+ if (refNode) walkCells(refNode, cells);
247
+ visitedRefs.delete(refMatch[1]);
248
+ }
249
+ return;
250
+ }
251
+
252
+ if (!Array.isArray(node)) return;
253
+
254
+ if (node[0] === "$" && (node[1] === "td" || node[1] === "th")) {
255
+ const cellProps = (node[3] || {}) as Record<string, unknown>;
256
+ const text = extractNode(cellProps.children, { inTable: true, inCode: false })
257
+ .trim()
258
+ .replace(/\n/g, " ")
259
+ .replace(/\\/g, "\\\\") // Escape backslashes first
260
+ .replace(/\|/g, "\\|"); // Then escape pipes
261
+ cells.push(text);
262
+ } else if (node[0] === "$" && typeof node[1] === "string" && (node[1] as string).startsWith("$L")) {
263
+ // Component ref for a cell
264
+ const refId = (node[1] as string).slice(2);
265
+ if (!visitedRefs.has(refId)) {
266
+ visitedRefs.add(refId);
267
+ const refNode = getParsedChunk(refId);
268
+ if (refNode) walkCells(refNode, cells);
269
+ visitedRefs.delete(refId);
270
+ }
271
+ } else {
272
+ for (const child of node) walkCells(child, cells);
273
+ }
274
+ }
275
+
276
+ walkTable(props.children);
277
+ if (rows.length === 0) return "";
278
+
279
+ const colCount = Math.max(...rows.map(r => r.length));
280
+ let md = "";
281
+ for (let i = 0; i < rows.length; i++) {
282
+ const row = rows[i].concat(Array(colCount - rows[i].length).fill(""));
283
+ md += "| " + row.join(" | ") + " |\n";
284
+ if (i === headerRowCount - 1 || (headerRowCount === 0 && i === 0)) {
285
+ md += "| " + Array(colCount).fill("---").join(" | ") + " |\n";
286
+ }
287
+ }
288
+ return md;
289
+ }
290
+
291
+ // Process main content chunk (usually "23")
292
+ const mainChunk = getParsedChunk("23");
293
+
294
+ if (mainChunk) {
295
+ const content = extractNode(mainChunk);
296
+ if (content.trim().length > 100) {
297
+ const cleaned = content
298
+ .replace(/\n{3,}/g, "\n\n")
299
+ .trim();
300
+ return { title, content: cleaned };
301
+ }
302
+ }
303
+
304
+ // Fallback: try other chunks
305
+ const contentParts: { order: number; text: string }[] = [];
306
+
307
+ for (const [id] of chunkMap) {
308
+ if (id === "23") continue;
309
+ const parsed = getParsedChunk(id);
310
+ if (!parsed) continue;
311
+
312
+ visitedRefs.clear();
313
+ const text = extractNode(parsed);
314
+
315
+ if (text.trim().length > 50 &&
316
+ !text.includes("page was not found") &&
317
+ !text.includes("404")) {
318
+ contentParts.push({ order: parseInt(id, 16), text: text.trim() });
319
+ }
320
+ }
321
+
322
+ if (contentParts.length === 0) return null;
323
+
324
+ contentParts.sort((a, b) => a.order - b.order);
325
+
326
+ const seen = new Set<string>();
327
+ const uniqueParts: string[] = [];
328
+ for (const part of contentParts) {
329
+ const key = part.text.slice(0, 150);
330
+ if (!seen.has(key)) {
331
+ seen.add(key);
332
+ uniqueParts.push(part.text);
333
+ }
334
+ }
335
+
336
+ const content = uniqueParts.join("\n\n").replace(/\n{3,}/g, "\n\n").trim();
337
+ return content.length > 100 ? { title, content } : null;
338
+ }
package/search.ts ADDED
@@ -0,0 +1,49 @@
1
+ import { activityMonitor } from "./activity.js";
2
+ import { hasExaApiKey, searchWithExa } from "./exa.js";
3
+ import type { SearchResponse, SearchOptions } from "./types.js";
4
+
5
+ const MAX_NUM_RESULTS = 20;
6
+
7
+ export interface FullSearchOptions extends SearchOptions {
8
+ includeContent?: boolean;
9
+ }
10
+
11
+ function errorMessage(err: unknown): string {
12
+ return err instanceof Error ? err.message : String(err);
13
+ }
14
+
15
+ function isAbortError(err: unknown): boolean {
16
+ return errorMessage(err).toLowerCase().includes("abort");
17
+ }
18
+
19
+ export async function search(query: string, options: FullSearchOptions = {}): Promise<SearchResponse> {
20
+ const activityId = activityMonitor.logStart({ type: "api", query });
21
+
22
+ try {
23
+ const result = await searchWithExa(query, options);
24
+ if (result && "exhausted" in result) {
25
+ throw new Error(
26
+ "Exa monthly free tier exhausted (1,000 requests). Resets next month.\n" +
27
+ " Upgrade at exa.ai/pricing"
28
+ );
29
+ }
30
+ if (result && "answer" in result) {
31
+ activityMonitor.logComplete(activityId, 200);
32
+ return result;
33
+ }
34
+ // null result from MCP with no API key
35
+ throw new Error(
36
+ "No search provider available. Either:\n" +
37
+ " 1. Set EXA_API_KEY (or exaApiKey in ~/.pi/web-search.json)\n" +
38
+ " 2. Use Exa MCP (no API key needed)"
39
+ );
40
+ } catch (err) {
41
+ const message = err instanceof Error ? err.message : String(err);
42
+ if (isAbortError(err)) {
43
+ activityMonitor.logComplete(activityId, 0);
44
+ } else {
45
+ activityMonitor.logError(activityId, message);
46
+ }
47
+ throw err;
48
+ }
49
+ }
package/storage.ts ADDED
@@ -0,0 +1,71 @@
1
+ import type { ExtensionContext } from "@mariozechner/pi-coding-agent";
2
+ import type { ExtractedContent } from "./extract.js";
3
+ import type { SearchResult } from "./types.js";
4
+
5
+ const CACHE_TTL_MS = 60 * 60 * 1000;
6
+
7
+ export interface QueryResultData {
8
+ query: string;
9
+ answer: string;
10
+ results: SearchResult[];
11
+ error: string | null;
12
+ }
13
+
14
+ export interface StoredSearchData {
15
+ id: string;
16
+ type: "search" | "fetch";
17
+ timestamp: number;
18
+ queries?: QueryResultData[];
19
+ urls?: ExtractedContent[];
20
+ }
21
+
22
+ const storedResults = new Map<string, StoredSearchData>();
23
+
24
+ export function generateId(): string {
25
+ return Date.now().toString(36) + Math.random().toString(36).slice(2, 8);
26
+ }
27
+
28
+ export function storeResult(id: string, data: StoredSearchData): void {
29
+ storedResults.set(id, data);
30
+ }
31
+
32
+ export function getResult(id: string): StoredSearchData | null {
33
+ return storedResults.get(id) ?? null;
34
+ }
35
+
36
+ export function getAllResults(): StoredSearchData[] {
37
+ return Array.from(storedResults.values());
38
+ }
39
+
40
+ export function deleteResult(id: string): boolean {
41
+ return storedResults.delete(id);
42
+ }
43
+
44
+ export function clearResults(): void {
45
+ storedResults.clear();
46
+ }
47
+
48
+ function isValidStoredData(data: unknown): data is StoredSearchData {
49
+ if (!data || typeof data !== "object") return false;
50
+ const d = data as Record<string, unknown>;
51
+ if (typeof d.id !== "string" || !d.id) return false;
52
+ if (d.type !== "search" && d.type !== "fetch") return false;
53
+ if (typeof d.timestamp !== "number") return false;
54
+ if (d.type === "search" && !Array.isArray(d.queries)) return false;
55
+ if (d.type === "fetch" && !Array.isArray(d.urls)) return false;
56
+ return true;
57
+ }
58
+
59
+ export function restoreFromSession(ctx: ExtensionContext): void {
60
+ storedResults.clear();
61
+ const now = Date.now();
62
+
63
+ for (const entry of ctx.sessionManager.getBranch()) {
64
+ if (entry.type === "custom" && entry.customType === "web-search-results") {
65
+ const data = entry.data;
66
+ if (isValidStoredData(data) && now - data.timestamp < CACHE_TTL_MS) {
67
+ storedResults.set(data.id, data);
68
+ }
69
+ }
70
+ }
71
+ }