@alexion42/pi-web-search 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,95 @@
1
+ import assert from "node:assert/strict";
2
+ import { spawnSync } from "node:child_process";
3
+ import { test } from "node:test";
4
+
5
+ const extractorUrl = new URL("../pdf-extract.ts", import.meta.url).href;
6
+
7
+ test("extractPDFToMarkdown works on Node 22 without native Promise.try", () => {
8
+ const child = spawnSync(process.execPath, ["--input-type=module"], {
9
+ input: buildChildScript(extractorUrl),
10
+ encoding: "utf8",
11
+ maxBuffer: 2 * 1024 * 1024,
12
+ });
13
+
14
+ assert.equal(
15
+ child.status,
16
+ 0,
17
+ "PDF extraction failed in a child process. stderr summary:\n" + errorSummary(child.stderr),
18
+ );
19
+
20
+ assert.match(child.stdout, /Hello PDF/);
21
+ });
22
+
23
+ function buildChildScript(moduleUrl) {
24
+ return `
25
+ import { mkdtemp, readFile } from "node:fs/promises";
26
+ import { tmpdir } from "node:os";
27
+ import { join } from "node:path";
28
+
29
+ process.on("uncaughtException", (error) => {
30
+ console.error(error?.stack || error);
31
+ process.exit(1);
32
+ });
33
+ process.on("unhandledRejection", (error) => {
34
+ console.error(error?.stack || error);
35
+ process.exit(1);
36
+ });
37
+
38
+ Reflect.deleteProperty(Promise, "try");
39
+ if (typeof Promise.try !== "undefined") {
40
+ throw new Error("Expected Promise.try to be unavailable before PDF extraction");
41
+ }
42
+
43
+ const { extractPDFToMarkdown } = await import(${JSON.stringify(moduleUrl)});
44
+
45
+ const outputDir = await mkdtemp(join(tmpdir(), "pi-web-access-pdf-"));
46
+ const result = await extractPDFToMarkdown(
47
+ makePdf("Hello PDF"),
48
+ "https://example.test/hello.pdf",
49
+ { outputDir },
50
+ );
51
+
52
+ console.log(await readFile(result.outputPath, "utf8"));
53
+
54
+ function makePdf(text) {
55
+ const content = "BT /F1 24 Tf 72 720 Td (" + text + ") Tj ET";
56
+ const objects = [
57
+ "<< /Type /Catalog /Pages 2 0 R >>",
58
+ "<< /Type /Pages /Kids [3 0 R] /Count 1 >>",
59
+ "<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Resources << /Font << /F1 4 0 R >> >> /Contents 5 0 R >>",
60
+ "<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>",
61
+ "<< /Length " + Buffer.byteLength(content, "ascii") + " >>\\nstream\\n" + content + "\\nendstream",
62
+ ];
63
+ let body = "%PDF-1.4\\n";
64
+ const offsets = [0];
65
+
66
+ for (let index = 0; index < objects.length; index += 1) {
67
+ offsets.push(Buffer.byteLength(body, "ascii"));
68
+ body += String(index + 1) + " 0 obj\\n" + objects[index] + "\\nendobj\\n";
69
+ }
70
+
71
+ const xrefOffset = Buffer.byteLength(body, "ascii");
72
+ body += "xref\\n0 " + String(objects.length + 1) + "\\n";
73
+ body += "0000000000 65535 f \\n";
74
+
75
+ for (const offset of offsets.slice(1)) {
76
+ body += String(offset).padStart(10, "0") + " 00000 n \\n";
77
+ }
78
+
79
+ body += "trailer\\n<< /Size " + String(objects.length + 1) + " /Root 1 0 R >>\\n";
80
+ body += "startxref\\n" + String(xrefOffset) + "\\n%%EOF\\n";
81
+
82
+ return new TextEncoder().encode(body).buffer;
83
+ }
84
+ `;
85
+ }
86
+
87
+ function errorSummary(value, size = 1200) {
88
+ const marker = "TypeError: Promise.try is not a function";
89
+ const index = value.indexOf(marker);
90
+ if (index >= 0) {
91
+ return value.slice(index, index + size);
92
+ }
93
+
94
+ return value.length > size ? value.slice(-size) : value;
95
+ }
package/types.ts ADDED
@@ -0,0 +1,20 @@
1
+ import type { ExtractedContent } from "./extract.js";
2
+
3
+ export interface SearchResult {
4
+ title: string;
5
+ url: string;
6
+ snippet: string;
7
+ }
8
+
9
+ export interface SearchResponse {
10
+ answer: string;
11
+ results: SearchResult[];
12
+ inlineContent?: ExtractedContent[];
13
+ }
14
+
15
+ export interface SearchOptions {
16
+ numResults?: number;
17
+ recencyFilter?: "day" | "week" | "month" | "year";
18
+ domainFilter?: string[];
19
+ signal?: AbortSignal;
20
+ }
package/utils.ts ADDED
@@ -0,0 +1,44 @@
1
+ export function formatSeconds(s: number): string {
2
+ const h = Math.floor(s / 3600);
3
+ const m = Math.floor((s % 3600) / 60);
4
+ const sec = s % 60;
5
+ if (h > 0) return `${h}:${String(m).padStart(2, "0")}:${String(sec).padStart(2, "0")}`;
6
+ return `${m}:${String(sec).padStart(2, "0")}`;
7
+ }
8
+
9
+ export function readExecError(err: unknown): { code?: string; stderr: string; message: string } {
10
+ if (!err || typeof err !== "object") {
11
+ return { stderr: "", message: String(err) };
12
+ }
13
+ const code = (err as { code?: string }).code;
14
+ const message = (err as { message?: string }).message ?? "";
15
+ const stderrRaw = (err as { stderr?: Buffer | string }).stderr;
16
+ const stderr = Buffer.isBuffer(stderrRaw)
17
+ ? stderrRaw.toString("utf-8")
18
+ : typeof stderrRaw === "string"
19
+ ? stderrRaw
20
+ : "";
21
+ return { code, stderr, message };
22
+ }
23
+
24
+ export function isTimeoutError(err: unknown): boolean {
25
+ if (!err || typeof err !== "object") return false;
26
+ if ((err as { killed?: boolean }).killed) return true;
27
+ const name = (err as { name?: string }).name;
28
+ const code = (err as { code?: string }).code;
29
+ const message = (err as { message?: string }).message ?? "";
30
+ return name === "AbortError" || code === "ETIMEDOUT" || message.toLowerCase().includes("timed out");
31
+ }
32
+
33
+ export function trimErrorText(text: string): string {
34
+ return text.replace(/\s+/g, " ").trim().slice(0, 200);
35
+ }
36
+
37
+ export function mapFfmpegError(err: unknown): string {
38
+ const { code, stderr, message } = readExecError(err);
39
+ if (code === "ENOENT") return "ffmpeg is not installed. Install with: brew install ffmpeg";
40
+ if (isTimeoutError(err)) return "ffmpeg timed out extracting frame";
41
+ if (stderr.includes("403")) return "Stream URL returned 403 — may have expired, try again";
42
+ const snippet = trimErrorText(stderr || message);
43
+ return snippet ? `ffmpeg failed: ${snippet}` : "ffmpeg failed";
44
+ }