websnap-reader 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,187 @@
1
+ # websnap
2
+
3
+ Turn any URL into clean markdown. A better "reader mode" for your terminal.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ npm install -g @wilsonxu/websnap
9
+ ```
10
+
11
+ Or install locally from source:
12
+
13
+ ```bash
14
+ git clone <repo-url>
15
+ cd websnap
16
+ npm install && npm run build
17
+ npm link
18
+ ```
19
+
20
+ ## Usage
21
+
22
+ ### Basic: URL to Markdown
23
+
24
+ ```bash
25
+ websnap https://example.com
26
+ ```
27
+
28
+ Outputs clean, readable markdown to stdout. Pipe it anywhere:
29
+
30
+ ```bash
31
+ websnap https://blog.example.com/post > article.md
32
+ ```
33
+
34
+ ### Structured JSON Output
35
+
36
+ ```bash
37
+ websnap https://example.com --json
38
+ ```
39
+
40
+ Returns:
41
+
42
+ ```json
43
+ {
44
+ "url": "https://example.com",
45
+ "title": "Example Article",
46
+ "author": "John Doe",
47
+ "date": "March 15, 2024",
48
+ "content": "# Example Article\n\nArticle content in markdown...",
49
+ "wordCount": 1234,
50
+ "readingTime": "6 min read",
51
+ "extractedAt": "2024-03-15T10:30:00.000Z"
52
+ }
53
+ ```
54
+
55
+ ### AI-Powered Summary
56
+
57
+ ```bash
58
+ websnap https://example.com --summary
59
+ ```
60
+
61
+ Generates a concise 3-sentence summary. Supports multiple AI backends:
62
+
63
+ | Backend | Setup | Model Default |
64
+ | --------- | ---------------------------------- | ------------------- |
65
+ | OpenAI | `export OPENAI_API_KEY=sk-...` | gpt-4o-mini |
66
+ | Anthropic | `export ANTHROPIC_API_KEY=sk-...` | claude-sonnet-4-20250514 |
67
+ | Ollama | Run `ollama serve` locally | llama3.2 |
68
+ | Fallback | No setup needed | Extractive summary |
69
+
70
+ Combine with JSON:
71
+
72
+ ```bash
73
+ websnap https://example.com --summary --json
74
+ ```
75
+
76
+ ### Batch Processing
77
+
78
+ Create a file with one URL per line:
79
+
80
+ ```
81
+ # urls.txt
82
+ https://example.com/article-1
83
+ https://example.com/article-2
84
+ https://example.com/article-3
85
+ ```
86
+
87
+ Process all at once:
88
+
89
+ ```bash
90
+ websnap batch urls.txt --outdir ./articles
91
+ websnap batch urls.txt --json
92
+ websnap batch urls.txt --outdir ./summaries --summary
93
+ ```
94
+
95
+ ### Save to File
96
+
97
+ ```bash
98
+ websnap https://example.com -o article.md
99
+ websnap https://example.com --json -o article.json
100
+ ```
101
+
102
+ ## Chrome CDP Integration
103
+
104
+ For JavaScript-heavy sites or login-required pages, websnap can connect to your running Chrome browser via the Chrome DevTools Protocol.
105
+
106
+ ### Setup
107
+
108
+ Start Chrome with remote debugging enabled:
109
+
110
+ ```bash
111
+ # macOS
112
+ /Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --remote-debugging-port=9222
113
+
114
+ # Linux
115
+ google-chrome --remote-debugging-port=9222
116
+ ```
117
+
118
+ Now websnap will automatically use CDP to fetch pages, reusing your existing cookies and sessions. This means **login-required pages just work** if you're already logged in.
119
+
120
+ ### Custom CDP Endpoint
121
+
122
+ ```bash
123
+ websnap https://example.com --cdp http://localhost:9333
124
+ ```
125
+
126
+ ### Fallback
127
+
128
+ If Chrome CDP is not available, websnap automatically falls back to plain HTTP fetch. Static pages work without any Chrome setup.
129
+
130
+ ## Options
131
+
132
+ | Flag | Description |
133
+ | ---------------------- | ------------------------------------------ |
134
+ | `--json` | Output structured JSON |
135
+ | `--summary` | Generate AI-powered 3-sentence summary |
136
+ | `--raw` | Output raw extracted HTML |
137
+ | `-o, --output <file>` | Write output to file |
138
+ | `--cdp <endpoint>` | Chrome CDP endpoint (default: `http://127.0.0.1:9222`) |
139
+ | `--timeout <ms>` | Page load timeout (default: 15000) |
140
+ | `--user-agent <str>` | Custom User-Agent string |
141
+ | `-V, --version` | Show version number |
142
+ | `-h, --help` | Show help |
143
+
144
+ ### Batch Options
145
+
146
+ | Flag | Description |
147
+ | ------------------ | ---------------------------------------------- |
148
+ | `--outdir <dir>` | Write each result as a separate file |
149
+ | `--delay <ms>` | Delay between requests (default: 1000) |
150
+
151
+ ## Environment Variables
152
+
153
+ | Variable | Description |
154
+ | ------------------ | ---------------------------------- |
155
+ | `OPENAI_API_KEY` | OpenAI API key for summaries |
156
+ | `OPENAI_MODEL` | OpenAI model (default: gpt-4o-mini)|
157
+ | `ANTHROPIC_API_KEY`| Anthropic API key for summaries |
158
+ | `ANTHROPIC_MODEL` | Anthropic model (default: claude-sonnet-4-20250514) |
159
+ | `OLLAMA_URL` | Ollama server URL (default: http://127.0.0.1:11434) |
160
+ | `OLLAMA_MODEL` | Ollama model (default: llama3.2) |
161
+
162
+ ## Examples
163
+
164
+ ```bash
165
+ # Quick read of a blog post
166
+ websnap https://paulgraham.com/greatwork.html
167
+
168
+ # Save an article as JSON for processing
169
+ websnap https://arxiv.org/abs/2301.00001 --json -o paper.json
170
+
171
+ # Get a quick summary
172
+ websnap https://news.ycombinator.com/item?id=12345 --summary
173
+
174
+ # Batch scrape a list of articles
175
+ websnap batch research-urls.txt --outdir ./research --json
176
+
177
+ # Use with jq for data extraction
178
+ websnap https://example.com --json | jq '.title, .wordCount'
179
+
180
+ # Pipe to other tools
181
+ websnap https://example.com | glow - # render with glow
182
+ websnap https://example.com | pbcopy # copy to clipboard (macOS)
183
+ ```
184
+
185
+ ## License
186
+
187
+ MIT
@@ -0,0 +1,20 @@
1
+ /**
2
+ * fetcher.ts - Page fetching via Chrome CDP or plain HTTP
3
+ *
4
+ * Strategy:
5
+ * 1. Try Chrome CDP connection (port 9222 by default) for JS-rendered pages
6
+ * 2. Fall back to plain HTTP fetch for static pages
7
+ *
8
+ * CDP mode reuses existing Chrome cookies, so login-required pages work
9
+ * if the user has already logged in via their Chrome browser.
10
+ */
11
+ export interface FetchOptions {
12
+ cdpEndpoint?: string;
13
+ timeout?: number;
14
+ userAgent?: string;
15
+ }
16
+ /**
17
+ * Fetch a page's HTML content. Tries CDP first, then falls back to HTTP.
18
+ */
19
+ export declare function fetchPage(url: string, options?: FetchOptions): Promise<string>;
20
+ //# sourceMappingURL=fetcher.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fetcher.d.ts","sourceRoot":"","sources":["../src/fetcher.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,MAAM,WAAW,YAAY;IAC3B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AASD;;GAEG;AACH,wBAAsB,SAAS,CAC7B,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,YAAiB,GACzB,OAAO,CAAC,MAAM,CAAC,CAejB"}
@@ -0,0 +1,350 @@
1
+ "use strict";
2
+ /**
3
+ * fetcher.ts - Page fetching via Chrome CDP or plain HTTP
4
+ *
5
+ * Strategy:
6
+ * 1. Try Chrome CDP connection (port 9222 by default) for JS-rendered pages
7
+ * 2. Fall back to plain HTTP fetch for static pages
8
+ *
9
+ * CDP mode reuses existing Chrome cookies, so login-required pages work
10
+ * if the user has already logged in via their Chrome browser.
11
+ */
12
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
13
+ if (k2 === undefined) k2 = k;
14
+ var desc = Object.getOwnPropertyDescriptor(m, k);
15
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
16
+ desc = { enumerable: true, get: function() { return m[k]; } };
17
+ }
18
+ Object.defineProperty(o, k2, desc);
19
+ }) : (function(o, m, k, k2) {
20
+ if (k2 === undefined) k2 = k;
21
+ o[k2] = m[k];
22
+ }));
23
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
24
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
25
+ }) : function(o, v) {
26
+ o["default"] = v;
27
+ });
28
+ var __importStar = (this && this.__importStar) || (function () {
29
+ var ownKeys = function(o) {
30
+ ownKeys = Object.getOwnPropertyNames || function (o) {
31
+ var ar = [];
32
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
33
+ return ar;
34
+ };
35
+ return ownKeys(o);
36
+ };
37
+ return function (mod) {
38
+ if (mod && mod.__esModule) return mod;
39
+ var result = {};
40
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
41
+ __setModuleDefault(result, mod);
42
+ return result;
43
+ };
44
+ })();
45
+ Object.defineProperty(exports, "__esModule", { value: true });
46
+ exports.fetchPage = fetchPage;
47
+ /**
48
+ * Fetch a page's HTML content. Tries CDP first, then falls back to HTTP.
49
+ */
50
+ async function fetchPage(url, options = {}) {
51
+ const { cdpEndpoint = "http://127.0.0.1:9222", timeout = 15000 } = options;
52
+ // Try CDP first
53
+ try {
54
+ const html = await fetchViaCDP(url, cdpEndpoint, timeout, options.userAgent);
55
+ return html;
56
+ }
57
+ catch (cdpErr) {
58
+ // CDP not available; fall back to HTTP
59
+ process.stderr.write(`\x1b[90mCDP unavailable (${cdpErr.message?.substring(0, 60) || "connection failed"}), using HTTP fetch...\x1b[0m\n`);
60
+ }
61
+ return fetchViaHTTP(url, timeout, options.userAgent);
62
+ }
63
+ /**
64
+ * Fetch via Chrome DevTools Protocol
65
+ */
66
+ async function fetchViaCDP(url, cdpEndpoint, timeout, userAgent) {
67
+ // Get browser websocket URL
68
+ const versionUrl = `${cdpEndpoint}/json/version`;
69
+ const versionRes = await fetchWithTimeout(versionUrl, 3000);
70
+ if (!versionRes.ok)
71
+ throw new Error(`CDP version endpoint returned ${versionRes.status}`);
72
+ const versionInfo = await versionRes.json();
73
+ // Create a new target (tab)
74
+ const newTabUrl = `${cdpEndpoint}/json/new?${encodeURIComponent("about:blank")}`;
75
+ const newTabRes = await fetchWithTimeout(newTabUrl, 3000);
76
+ if (!newTabRes.ok)
77
+ throw new Error(`CDP new tab returned ${newTabRes.status}`);
78
+ const tabInfo = (await newTabRes.json());
79
+ const wsUrl = tabInfo.webSocketDebuggerUrl;
80
+ if (!wsUrl)
81
+ throw new Error("No WebSocket URL for new tab");
82
+ // Connect via WebSocket
83
+ const session = await connectCDP(wsUrl);
84
+ try {
85
+ // Set user agent if provided
86
+ if (userAgent) {
87
+ await session.send("Network.setUserAgentOverride", {
88
+ userAgent,
89
+ });
90
+ }
91
+ // Enable page events
92
+ await session.send("Page.enable");
93
+ await session.send("Network.enable");
94
+ // Navigate
95
+ const navResult = await session.send("Page.navigate", { url });
96
+ // Wait for load
97
+ await waitForLoad(session, timeout);
98
+ // Small extra delay for JS rendering
99
+ await new Promise((r) => setTimeout(r, 1500));
100
+ // Get document HTML
101
+ const result = await session.send("Runtime.evaluate", {
102
+ expression: "document.documentElement.outerHTML",
103
+ returnByValue: true,
104
+ });
105
+ const html = result?.result?.value;
106
+ if (!html || typeof html !== "string") {
107
+ throw new Error("Failed to extract HTML from page");
108
+ }
109
+ return html;
110
+ }
111
+ finally {
112
+ // Close the tab
113
+ try {
114
+ const closeUrl = `${cdpEndpoint}/json/close/${tabInfo.id}`;
115
+ await fetchWithTimeout(closeUrl, 2000).catch(() => { });
116
+ }
117
+ catch { }
118
+ session.close();
119
+ }
120
+ }
121
+ /**
122
+ * Minimal CDP WebSocket connection using Node.js built-ins
123
+ */
124
+ async function connectCDP(wsUrl) {
125
+ // Dynamic import for WebSocket (available in Node 18+)
126
+ const WebSocket = globalThis.WebSocket || (await getWebSocket());
127
+ return new Promise((resolve, reject) => {
128
+ const ws = new WebSocket(wsUrl);
129
+ let msgId = 0;
130
+ const pending = new Map();
131
+ const eventHandlers = new Map();
132
+ ws.onopen = () => {
133
+ const session = {
134
+ ws,
135
+ id: 0,
136
+ send(method, params = {}) {
137
+ return new Promise((res, rej) => {
138
+ const id = ++msgId;
139
+ pending.set(id, { resolve: res, reject: rej });
140
+ ws.send(JSON.stringify({ id, method, params }));
141
+ // Timeout for individual commands
142
+ setTimeout(() => {
143
+ if (pending.has(id)) {
144
+ pending.delete(id);
145
+ rej(new Error(`CDP command "${method}" timed out`));
146
+ }
147
+ }, 30000);
148
+ });
149
+ },
150
+ close() {
151
+ try {
152
+ ws.close();
153
+ }
154
+ catch { }
155
+ },
156
+ };
157
+ session._eventHandlers = eventHandlers;
158
+ resolve(session);
159
+ };
160
+ ws.onmessage = (event) => {
161
+ try {
162
+ const data = typeof event.data === "string" ? event.data : event.data.toString();
163
+ const msg = JSON.parse(data);
164
+ if (msg.id && pending.has(msg.id)) {
165
+ const handler = pending.get(msg.id);
166
+ pending.delete(msg.id);
167
+ if (msg.error) {
168
+ handler.reject(new Error(msg.error.message || JSON.stringify(msg.error)));
169
+ }
170
+ else {
171
+ handler.resolve(msg.result);
172
+ }
173
+ }
174
+ if (msg.method && eventHandlers.has(msg.method)) {
175
+ for (const h of eventHandlers.get(msg.method)) {
176
+ h(msg.params);
177
+ }
178
+ }
179
+ }
180
+ catch { }
181
+ };
182
+ ws.onerror = (err) => {
183
+ reject(new Error(`WebSocket error: ${err.message || "connection failed"}`));
184
+ };
185
+ // Connection timeout
186
+ setTimeout(() => {
187
+ reject(new Error("CDP WebSocket connection timed out"));
188
+ }, 5000);
189
+ });
190
+ }
191
+ /**
192
+ * Try to get WebSocket from 'ws' package or undici
193
+ */
194
+ async function getWebSocket() {
195
+ try {
196
+ // Node 18+ has WebSocket behind a flag, Node 21+ has it globally
197
+ const { WebSocket } = await Promise.resolve(`${"undici"}`).then(s => __importStar(require(s)));
198
+ return WebSocket;
199
+ }
200
+ catch {
201
+ try {
202
+ const ws = await Promise.resolve(`${"ws"}`).then(s => __importStar(require(s)));
203
+ return ws.default || ws.WebSocket || ws;
204
+ }
205
+ catch {
206
+ throw new Error("No WebSocket implementation available. Use Node.js 21+ or install 'ws' package.");
207
+ }
208
+ }
209
+ }
210
+ /**
211
+ * Wait for page load event via CDP
212
+ */
213
+ function waitForLoad(session, timeout) {
214
+ return new Promise((resolve, reject) => {
215
+ const eventHandlers = session._eventHandlers;
216
+ const handlers = eventHandlers.get("Page.loadEventFired") || [];
217
+ handlers.push(() => {
218
+ clearTimeout(timer);
219
+ resolve();
220
+ });
221
+ eventHandlers.set("Page.loadEventFired", handlers);
222
+ const timer = setTimeout(() => {
223
+ // Resolve anyway after timeout - page may have partially loaded
224
+ resolve();
225
+ }, timeout);
226
+ });
227
+ }
228
+ /**
229
+ * Simple HTTP fetch fallback for static pages.
230
+ * Tries native fetch first, then falls back to node:https for TLS issues.
231
+ */
232
+ async function fetchViaHTTP(url, timeout, userAgent) {
233
+ const ua = userAgent ||
234
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36";
235
+ // Try native fetch first
236
+ try {
237
+ return await fetchViaHTTPNative(url, timeout, ua);
238
+ }
239
+ catch (nativeErr) {
240
+ // If it's a TLS certificate error, fall back to node:https
241
+ const msg = String(nativeErr?.cause || nativeErr?.message || "");
242
+ if (msg.includes("UNABLE_TO_GET_ISSUER_CERT") ||
243
+ msg.includes("CERT_HAS_EXPIRED") ||
244
+ msg.includes("DEPTH_ZERO_SELF_SIGNED") ||
245
+ msg.includes("certificate") ||
246
+ msg.includes("fetch failed")) {
247
+ process.stderr.write(`\x1b[90mNative fetch failed, using node:https fallback...\x1b[0m\n`);
248
+ return fetchViaNodeHTTPS(url, timeout, ua);
249
+ }
250
+ throw nativeErr;
251
+ }
252
+ }
253
+ async function fetchViaHTTPNative(url, timeout, userAgent) {
254
+ const headers = {
255
+ Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
256
+ "Accept-Language": "en-US,en;q=0.9",
257
+ "User-Agent": userAgent,
258
+ };
259
+ const controller = new AbortController();
260
+ const timer = setTimeout(() => controller.abort(), timeout);
261
+ try {
262
+ const response = await fetch(url, {
263
+ headers,
264
+ signal: controller.signal,
265
+ redirect: "follow",
266
+ });
267
+ if (!response.ok) {
268
+ throw new Error(`HTTP ${response.status} ${response.statusText}`);
269
+ }
270
+ const contentType = response.headers.get("content-type") || "";
271
+ if (!contentType.includes("text/html") &&
272
+ !contentType.includes("application/xhtml")) {
273
+ process.stderr.write(`\x1b[33mWarning:\x1b[0m Content-Type is "${contentType}", not HTML\n`);
274
+ }
275
+ return await response.text();
276
+ }
277
+ finally {
278
+ clearTimeout(timer);
279
+ }
280
+ }
281
+ /**
282
+ * Fallback HTTP fetcher using node:https with relaxed TLS for environments
283
+ * that have certificate issues (corporate proxies, outdated cert stores, etc.)
284
+ */
285
+ async function fetchViaNodeHTTPS(url, timeout, userAgent) {
286
+ const https = await Promise.resolve().then(() => __importStar(require("node:https")));
287
+ const http = await Promise.resolve().then(() => __importStar(require("node:http")));
288
+ const { URL } = await Promise.resolve().then(() => __importStar(require("node:url")));
289
+ return new Promise((resolve, reject) => {
290
+ const parsedUrl = new URL(url);
291
+ const isHTTPS = parsedUrl.protocol === "https:";
292
+ const mod = isHTTPS ? https : http;
293
+ const options = {
294
+ hostname: parsedUrl.hostname,
295
+ port: parsedUrl.port || (isHTTPS ? 443 : 80),
296
+ path: parsedUrl.pathname + parsedUrl.search,
297
+ method: "GET",
298
+ headers: {
299
+ Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
300
+ "Accept-Language": "en-US,en;q=0.9",
301
+ "User-Agent": userAgent,
302
+ },
303
+ rejectUnauthorized: false, // Handle self-signed/corporate certs
304
+ timeout,
305
+ };
306
+ const req = mod.request(options, (res) => {
307
+ // Follow redirects (up to 5)
308
+ if (res.statusCode >= 300 &&
309
+ res.statusCode < 400 &&
310
+ res.headers.location) {
311
+ const redirectUrl = new URL(res.headers.location, url).toString();
312
+ fetchViaNodeHTTPS(redirectUrl, timeout, userAgent)
313
+ .then(resolve)
314
+ .catch(reject);
315
+ return;
316
+ }
317
+ if (res.statusCode < 200 || res.statusCode >= 400) {
318
+ reject(new Error(`HTTP ${res.statusCode}`));
319
+ return;
320
+ }
321
+ const chunks = [];
322
+ res.on("data", (chunk) => chunks.push(chunk));
323
+ res.on("end", () => {
324
+ const body = Buffer.concat(chunks).toString("utf-8");
325
+ resolve(body);
326
+ });
327
+ res.on("error", reject);
328
+ });
329
+ req.on("error", reject);
330
+ req.on("timeout", () => {
331
+ req.destroy();
332
+ reject(new Error("Request timed out"));
333
+ });
334
+ req.end();
335
+ });
336
+ }
337
+ /**
338
+ * Fetch helper with timeout
339
+ */
340
+ async function fetchWithTimeout(url, timeout) {
341
+ const controller = new AbortController();
342
+ const timer = setTimeout(() => controller.abort(), timeout);
343
+ try {
344
+ return await fetch(url, { signal: controller.signal });
345
+ }
346
+ finally {
347
+ clearTimeout(timer);
348
+ }
349
+ }
350
+ //# sourceMappingURL=fetcher.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fetcher.js","sourceRoot":"","sources":["../src/fetcher.ts"],"names":[],"mappings":";AAAA;;;;;;;;;GASG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAkBH,8BAkBC;AArBD;;GAEG;AACI,KAAK,UAAU,SAAS,CAC7B,GAAW,EACX,UAAwB,EAAE;IAE1B,MAAM,EAAE,WAAW,GAAG,uBAAuB,EAAE,OAAO,GAAG,KAAK,EAAE,GAAG,OAAO,CAAC;IAE3E,gBAAgB;IAChB,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,MAAM,WAAW,CAAC,GAAG,EAAE,WAAW,EAAE,OAAO,EAAE,OAAO,CAAC,SAAS,CAAC,CAAC;QAC7E,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,OAAO,MAAW,EAAE,CAAC;QACrB,uCAAuC;QACvC,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,4BAA4B,MAAM,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,mBAAmB,iCAAiC,CACrH,CAAC;IACJ,CAAC;IAED,OAAO,YAAY,CAAC,GAAG,EAAE,OAAO,EAAE,OAAO,CAAC,SAAS,CAAC,CAAC;AACvD,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,WAAW,CACxB,GAAW,EACX,WAAmB,EACnB,OAAe,EACf,SAAkB;IAElB,4BAA4B;IAC5B,MAAM,UAAU,GAAG,GAAG,WAAW,eAAe,CAAC;IACjD,MAAM,UAAU,GAAG,MAAM,gBAAgB,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;IAC5D,IAAI,CAAC,UAAU,CAAC,EAAE;QAAE,MAAM,IAAI,KAAK,CAAC,iCAAiC,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC;IAC1F,MAAM,WAAW,GAAG,MAAM,UAAU,CAAC,IAAI,EAAE,CAAC;IAE5C,4BAA4B;IAC5B,MAAM,SAAS,GAAG,GAAG,WAAW,aAAa,kBAAkB,CAAC,aAAa,CAAC,EAAE,CAAC;IACjF,MAAM,SAAS,GAAG,MAAM,gBAAgB,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;IAC1D,IAAI,CAAC,SAAS,CAAC,EAAE;QAAE,MAAM,IAAI,KAAK,CAAC,wBAAwB,SAAS,CAAC,MAAM,EAAE,CAAC,CAAC;IAC/E,MAAM,OAAO,GAAG,CAAC,MAAM,SAAS,CAAC,IAAI,EAAE,CAAwB,CAAC;IAChE,MAAM,KAAK,GAAG,OAAO,CAAC,oBAAoB,CAAC;IAE3C,IAAI,CAAC,KAAK;QAAE,MAAM,IAAI,KAAK,CAAC,8BAA8B,CAAC,CAAC;IAE5D,wBAAwB;IACxB,MAAM,OAAO,GAAG,MAAM,UAAU,CAAC,KAAK,CAAC,CAAC;IAExC,IAAI,CAAC;QACH,6BAA6B;QAC7B,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,OAAO,CAAC,IAAI,CAAC,8BAA8B,EAAE;gBACjD,SAAS;aACV,CAAC,CAAC;QACL,CAAC;QAED,qBAAqB;QACrB,MAAM,OAAO,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAClC,MAAM,OAAO,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;QAErC,WAAW;QACX,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC,eAAe,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC;QAE/D,gBAAgB;QAChB,MAAM,WAAW,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;QAEpC,qCAAqC;QACrC,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC;QAE9C,oBAAoB;QACpB,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC,kBAAkB,EAAE;YACpD,UAAU,EAAE,oCAAoC;YAChD,aAAa,EAAE,IAAI;SACpB,CAAC,CAAC;QAEH,MAAM,IAAI,GAAG,MAAM,EAAE,MAAM,EAAE,KAAK,CAAC;QACnC,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;YACtC,MAAM,IAAI,KAAK,CAAC,kCAAkC,CAAC,CAAC;QACtD,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC;YAAS,CAAC;QACT,gBAAgB;QAChB,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,GAAG,WAAW,eAAe,OAAO,CAAC,EAAE,EAAE,CAAC;YAC3D,MAAM,gBAAgB,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;QACzD,CAAC;QAAC,MAAM,CAAC,CAAA,CAAC;QACV,OAAO,CAAC,KAAK,EAAE,CAAC;IAClB,CAAC;AACH,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,UAAU,CAAC,KAAa;IACrC,uDAAuD;IACvD,MAAM,SAAS,GAAI,UAAkB,CAAC,SAAS,IAAI,CAAC,MAAM,YAAY,EAAE,CAAC,CAAC;IAE1E,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QACrC,MAAM,EAAE,GAAG,IAAI,SAAS,CAAC,KAAK,CAAC,CAAC;QAChC,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,MAAM,OAAO,GAAG,IAAI,GAAG,EAGpB,CAAC;QACJ,MAAM,aAAa,GAAG,IAAI,GAAG,EAAqC,CAAC;QAEnE,EAAE,CAAC,MAAM,GAAG,GAAG,EAAE;YACf,MAAM,OAAO,GAAe;gBAC1B,EAAE;gBACF,EAAE,EAAE,CAAC;gBACL,IAAI,CAAC,MAAc,EAAE,SAA8B,EAAE;oBACnD,OAAO,IAAI,OAAO,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE;wBAC9B,MAAM,EAAE,GAAG,EAAE,KAAK,CAAC;wBACnB,OAAO,CAAC,GAAG,CAAC,EAAE,EAAE,EAAE,OAAO,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,CAAC;wBAC/C,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC;wBAEhD,kCAAkC;wBAClC,UAAU,CAAC,GAAG,EAAE;4BACd,IAAI,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;gCACpB,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;gCACnB,GAAG,CAAC,IAAI,KAAK,CAAC,gBAAgB,MAAM,aAAa,CAAC,CAAC,CAAC;4BACtD,CAAC;wBACH,CAAC,EAAE,KAAK,CAAC,CAAC;oBACZ,CAAC,CAAC,CAAC;gBACL,CAAC;gBACD,KAAK;oBACH,IAAI,CAAC;wBACH,EAAE,CAAC,KAAK,EAAE,CAAC;oBACb,CAAC;oBAAC,MAAM,CAAC,CAAA,CAAC;gBACZ,CAAC;aACF,CAAC;YAED,OAAe,CAAC,cAAc,GAAG,aAAa,CAAC;YAChD,OAAO,CAAC,OAAO,CAAC,CAAC;QACnB,CAAC,CAAC;QAEF,EAAE,CAAC,SAAS,GAAG,CAAC,KAAU,EAAE,EAAE;YAC5B,IAAI,CAAC;gBACH,MAAM,IAAI,GACR,OAAO,KAAK,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACtE,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBAE7B,IAAI,GAAG,CAAC,EAAE,IAAI,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;oBAClC,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAE,CAAC;oBACrC,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;oBACvB,IAAI,GAAG,CAAC,KAAK,EAAE,CAAC;wBACd,OAAO,CAAC,MAAM,CAAC,IAAI,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,OAAO,IAAI,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;oBAC5E,CAAC;yBAAM,CAAC;wBACN,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;oBAC9B,CAAC;gBACH,CAAC;gBAED,IAAI,GAAG,CAAC,MAAM,IAAI,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC;oBAChD,KAAK,MAAM,CAAC,IAAI,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAE,EAAE,CAAC;wBAC/C,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;oBAChB,CAAC;gBACH,CAAC;YACH,CAAC;YAAC,MAAM,CAAC,CAAA,CAAC;QACZ,CAAC,CAAC;QAEF,EAAE,CAAC,OAAO,GAAG,CAAC,GAAQ,EAAE,EAAE;YACxB,MAAM,CAAC,IAAI,KAAK,CAAC,oBAAoB,GAAG,CAAC,OAAO,IAAI,mBAAmB,EAAE,CAAC,CAAC,CAAC;QAC9E,CAAC,CAAC;QAEF,qBAAqB;QACrB,UAAU,CAAC,GAAG,EAAE;YACd,MAAM,CAAC,IAAI,KAAK,CAAC,oCAAoC,CAAC,CAAC,CAAC;QAC1D,CAAC,EAAE,IAAI,CAAC,CAAC;IACX,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,YAAY;IACzB,IAAI,CAAC;QACH,iEAAiE;QACjE,MAAM,EAAE,SAAS,EAAE,GAAG,yBAAa,QAAe,uCAAC,CAAC;QACpD,OAAO,SAAS,CAAC;IACnB,CAAC;IAAC,MAAM,CAAC;QACP,IAAI,CAAC;YACH,MAAM,EAAE,GAAG,yBAAa,IAAW,uCAAC,CAAC;YACrC,OAAO,EAAE,CAAC,OAAO,IAAI,EAAE,CAAC,SAAS,IAAI,EAAE,CAAC;QAC1C,CAAC;QAAC,MAAM,CAAC;YACP,MAAM,IAAI,KAAK,CACb,iFAAiF,CAClF,CAAC;QACJ,CAAC;IACH,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,WAAW,CAAC,OAAmB,EAAE,OAAe;IACvD,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QACrC,MAAM,aAAa,GAChB,OAAe,CAAC,cAAc,CAAC;QAElC,MAAM,QAAQ,GAAG,aAAa,CAAC,GAAG,CAAC,qBAAqB,CAAC,IAAI,EAAE,CAAC;QAChE,QAAQ,CAAC,IAAI,CAAC,GAAG,EAAE;YACjB,YAAY,CAAC,KAAK,CAAC,CAAC;YACpB,OAAO,EAAE,CAAC;QACZ,CAAC,CAAC,CAAC;QACH,aAAa,CAAC,GAAG,CAAC,qBAAqB,EAAE,QAAQ,CAAC,CAAC;QAEnD,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE;YAC5B,gEAAgE;YAChE,OAAO,EAAE,CAAC;QACZ,CAAC,EAAE,OAAO,CAAC,CAAC;IACd,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;;GAGG;AACH,KAAK,UAAU,YAAY,CACzB,GAAW,EACX,OAAe,EACf,SAAkB;IAElB,MAAM,EAAE,GACN,SAAS;QACT,uHAAuH,CAAC;IAE1H,yBAAyB;IACzB,IAAI,CAAC;QACH,OAAO,MAAM,kBAAkB,CAAC,GAAG,EAAE,OAAO,EAAE,EAAE,CAAC,CAAC;IACpD,CAAC;IAAC,OAAO,SAAc,EAAE,CAAC;QACxB,2DAA2D;QAC3D,MAAM,GAAG,GAAG,MAAM,CAAC,SAAS,EAAE,KAAK,IAAI,SAAS,EAAE,OAAO,IAAI,EAAE,CAAC,CAAC;QACjE,IACE,GAAG,CAAC,QAAQ,CAAC,2BAA2B,CAAC;YACzC,GAAG,CAAC,QAAQ,CAAC,kBAAkB,CAAC;YAChC,GAAG,CAAC,QAAQ,CAAC,wBAAwB,CAAC;YACtC,GAAG,CAAC,QAAQ,CAAC,aAAa,CAAC;YAC3B,GAAG,CAAC,QAAQ,CAAC,cAAc,CAAC,EAC5B,CAAC;YACD,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,oEAAoE,CACrE,CAAC;YACF,OAAO,iBAAiB,CAAC,GAAG,EAAE,OAAO,EAAE,EAAE,CAAC,CAAC;QAC7C,CAAC;QACD,MAAM,SAAS,CAAC;IAClB,CAAC;AACH,CAAC;AAED,KAAK,UAAU,kBAAkB,CAC/B,GAAW,EACX,OAAe,EACf,SAAiB;IAEjB,MAAM,OAAO,GAA2B;QACtC,MAAM,EACJ,iEAAiE;QACnE,iBAAiB,EAAE,gBAAgB;QACnC,YAAY,EAAE,SAAS;KACxB,CAAC;IAEF,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;IACzC,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,OAAO,CAAC,CAAC;IAE5D,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;YAChC,OAAO;YACP,MAAM,EAAE,UAAU,CAAC,MAAM;YACzB,QAAQ,EAAE,QAAQ;SACnB,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,QAAQ,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC;QACpE,CAAC;QAED,MAAM,WAAW,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;QAC/D,IACE,CAAC,WAAW,CAAC,QAAQ,CAAC,WAAW,CAAC;YAClC,CAAC,WAAW,CAAC,QAAQ,CAAC,mBAAmB,CAAC,EAC1C,CAAC;YACD,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,4CAA4C,WAAW,eAAe,CACvE,CAAC;QACJ,CAAC;QAED,OAAO,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;IAC/B,CAAC;YAAS,CAAC;QACT,YAAY,CAAC,KAAK,CAAC,CAAC;IACtB,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,KAAK,UAAU,iBAAiB,CAC9B,GAAW,EACX,OAAe,EACf,SAAiB;IAEjB,MAAM,KAAK,GAAG,wDAAa,YAAY,GAAC,CAAC;IACzC,MAAM,IAAI,GAAG,wDAAa,WAAW,GAAC,CAAC;IACvC,MAAM,EAAE,GAAG,EAAE,GAAG,wDAAa,UAAU,GAAC,CAAC;IAEzC,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QACrC,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAC/B,MAAM,OAAO,GAAG,SAAS,CAAC,QAAQ,KAAK,QAAQ,CAAC;QAChD,MAAM,GAAG,GAAG,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC;QAEnC,MAAM,OAAO,GAAG;YACd,QAAQ,EAAE,SAAS,CAAC,QAAQ;YAC5B,IAAI,EAAE,SAAS,CAAC,IAAI,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;YAC5C,IAAI,EAAE,SAAS,CAAC,QAAQ,GAAG,SAAS,CAAC,MAAM;YAC3C,MAAM,EAAE,KAAK;YACb,OAAO,EAAE;gBACP,MAAM,EACJ,iEAAiE;gBACnE,iBAAiB,EAAE,gBAAgB;gBACnC,YAAY,EAAE,SAAS;aACxB;YACD,kBAAkB,EAAE,KAAK,EAAE,qCAAqC;YAChE,OAAO;SACR,CAAC;QAEF,MAAM,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC,GAAQ,EAAE,EAAE;YAC5C,6BAA6B;YAC7B,IACE,GAAG,CAAC,UAAU,IAAI,GAAG;gBACrB,GAAG,CAAC,UAAU,GAAG,GAAG;gBACpB,GAAG,CAAC,OAAO,CAAC,QAAQ,EACpB,CAAC;gBACD,MAAM,WAAW,GAAG,IAAI,GAAG,CACzB,GAAG,CAAC,OAAO,CAAC,QAAQ,EACpB,GAAG,CACJ,CAAC,QAAQ,EAAE,CAAC;gBACb,iBAAiB,CAAC,WAAW,EAAE,OAAO,EAAE,SAAS,CAAC;qBAC/C,IAAI,CAAC,OAAO,CAAC;qBACb,KAAK,CAAC,MAAM,CAAC,CAAC;gBACjB,OAAO;YACT,CAAC;YAED,IAAI,GAAG,CAAC,UAAU,GAAG,GAAG,IAAI,GAAG,CAAC,UAAU,IAAI,GAAG,EAAE,CAAC;gBAClD,MAAM,CAAC,IAAI,KAAK,CAAC,QAAQ,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;gBAC5C,OAAO;YACT,CAAC;YAED,MAAM,MAAM,GAAa,EAAE,CAAC;YAC5B,GAAG,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,KAAa,EAAE,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;YACtD,GAAG,CAAC,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE;gBACjB,MAAM,IAAI,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;gBACrD,OAAO,CAAC,IAAI,CAAC,CAAC;YAChB,CAAC,CAAC,CAAC;YACH,GAAG,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;QAC1B,CAAC,CAAC,CAAC;QAEH,GAAG,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;QACxB,GAAG,CAAC,EAAE,CAAC,SAAS,EAAE,GAAG,EAAE;YACrB,GAAG,CAAC,OAAO,EAAE,CAAC;YACd,MAAM,CAAC,IAAI,KAAK,CAAC,mBAAmB,CAAC,CAAC,CAAC;QACzC,CAAC,CAAC,CAAC;QACH,GAAG,CAAC,GAAG,EAAE,CAAC;IACZ,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,gBAAgB,CAC7B,GAAW,EACX,OAAe;IAEf,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;IACzC,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,OAAO,CAAC,CAAC;IAC5D,IAAI,CAAC;QACH,OAAO,MAAM,KAAK,CAAC,GAAG,EAAE,EAAE,MAAM,EAAE,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC;IACzD,CAAC;YAAS,CAAC;QACT,YAAY,CAAC,KAAK,CAAC,CAAC;IACtB,CAAC;AACH,CAAC"}
@@ -0,0 +1,17 @@
1
+ /**
2
+ * formatter.ts - Convert parsed articles to markdown or JSON output
3
+ */
4
+ import { ParsedArticle } from "./parser";
5
+ /**
6
+ * Format article as clean markdown
7
+ */
8
+ export declare function formatMarkdown(article: ParsedArticle, url: string): string;
9
+ /**
10
+ * Format article as structured JSON
11
+ */
12
+ export declare function formatJSON(article: ParsedArticle, url: string): string;
13
+ /**
14
+ * Format a prompt for AI summarization
15
+ */
16
+ export declare function formatSummaryPrompt(article: ParsedArticle): string;
17
+ //# sourceMappingURL=formatter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"formatter.d.ts","sourceRoot":"","sources":["../src/formatter.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAiBzC;;GAEG;AACH,wBAAgB,cAAc,CAAC,OAAO,EAAE,aAAa,EAAE,GAAG,EAAE,MAAM,GAAG,MAAM,CAqC1E;AAED;;GAEG;AACH,wBAAgB,UAAU,CAAC,OAAO,EAAE,aAAa,EAAE,GAAG,EAAE,MAAM,GAAG,MAAM,CAiBtE;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,OAAO,EAAE,aAAa,GAAG,MAAM,CAqBlE"}