websnap-reader 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +187 -0
- package/dist/fetcher.d.ts +20 -0
- package/dist/fetcher.d.ts.map +1 -0
- package/dist/fetcher.js +350 -0
- package/dist/fetcher.js.map +1 -0
- package/dist/formatter.d.ts +17 -0
- package/dist/formatter.d.ts.map +1 -0
- package/dist/formatter.js +116 -0
- package/dist/formatter.js.map +1 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +248 -0
- package/dist/index.js.map +1 -0
- package/dist/parser.d.ts +25 -0
- package/dist/parser.d.ts.map +1 -0
- package/dist/parser.js +340 -0
- package/dist/parser.js.map +1 -0
- package/dist/summarizer.d.ts +15 -0
- package/dist/summarizer.d.ts.map +1 -0
- package/dist/summarizer.js +197 -0
- package/dist/summarizer.js.map +1 -0
- package/package.json +44 -0
package/README.md
ADDED
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
# websnap
|
|
2
|
+
|
|
3
|
+
Turn any URL into clean markdown. A better "reader mode" for your terminal.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
npm install -g @wilsonxu/websnap
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
Or install locally from source:
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
git clone <repo-url>
|
|
15
|
+
cd websnap
|
|
16
|
+
npm install && npm run build
|
|
17
|
+
npm link
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Usage
|
|
21
|
+
|
|
22
|
+
### Basic: URL to Markdown
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
websnap https://example.com
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
Outputs clean, readable markdown to stdout. Pipe it anywhere:
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
websnap https://blog.example.com/post > article.md
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
### Structured JSON Output
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
websnap https://example.com --json
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
|
|
42
|
+
```json
|
|
43
|
+
{
|
|
44
|
+
"url": "https://example.com",
|
|
45
|
+
"title": "Example Article",
|
|
46
|
+
"author": "John Doe",
|
|
47
|
+
"date": "March 15, 2024",
|
|
48
|
+
"content": "# Example Article\n\nArticle content in markdown...",
|
|
49
|
+
"wordCount": 1234,
|
|
50
|
+
"readingTime": "6 min read",
|
|
51
|
+
"extractedAt": "2024-03-15T10:30:00.000Z"
|
|
52
|
+
}
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
### AI-Powered Summary
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
websnap https://example.com --summary
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Generates a concise 3-sentence summary. Supports multiple AI backends:
|
|
62
|
+
|
|
63
|
+
| Backend | Setup | Model Default |
|
|
64
|
+
| --------- | ---------------------------------- | ------------------- |
|
|
65
|
+
| OpenAI | `export OPENAI_API_KEY=sk-...` | gpt-4o-mini |
|
|
66
|
+
| Anthropic | `export ANTHROPIC_API_KEY=sk-...` | claude-sonnet-4-20250514 |
|
|
67
|
+
| Ollama | Run `ollama serve` locally | llama3.2 |
|
|
68
|
+
| Fallback | No setup needed | Extractive summary |
|
|
69
|
+
|
|
70
|
+
Combine with JSON:
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
websnap https://example.com --summary --json
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### Batch Processing
|
|
77
|
+
|
|
78
|
+
Create a file with one URL per line:
|
|
79
|
+
|
|
80
|
+
```
|
|
81
|
+
# urls.txt
|
|
82
|
+
https://example.com/article-1
|
|
83
|
+
https://example.com/article-2
|
|
84
|
+
https://example.com/article-3
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
Process all at once:
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
websnap batch urls.txt --outdir ./articles
|
|
91
|
+
websnap batch urls.txt --json
|
|
92
|
+
websnap batch urls.txt --outdir ./summaries --summary
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### Save to File
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
websnap https://example.com -o article.md
|
|
99
|
+
websnap https://example.com --json -o article.json
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## Chrome CDP Integration
|
|
103
|
+
|
|
104
|
+
For JavaScript-heavy sites or login-required pages, websnap can connect to your running Chrome browser via the Chrome DevTools Protocol.
|
|
105
|
+
|
|
106
|
+
### Setup
|
|
107
|
+
|
|
108
|
+
Start Chrome with remote debugging enabled:
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
# macOS
|
|
112
|
+
/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --remote-debugging-port=9222
|
|
113
|
+
|
|
114
|
+
# Linux
|
|
115
|
+
google-chrome --remote-debugging-port=9222
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
Now websnap will automatically use CDP to fetch pages, reusing your existing cookies and sessions. This means **login-required pages just work** if you're already logged in.
|
|
119
|
+
|
|
120
|
+
### Custom CDP Endpoint
|
|
121
|
+
|
|
122
|
+
```bash
|
|
123
|
+
websnap https://example.com --cdp http://localhost:9333
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
### Fallback
|
|
127
|
+
|
|
128
|
+
If Chrome CDP is not available, websnap automatically falls back to plain HTTP fetch. Static pages work without any Chrome setup.
|
|
129
|
+
|
|
130
|
+
## Options
|
|
131
|
+
|
|
132
|
+
| Flag | Description |
|
|
133
|
+
| ---------------------- | ------------------------------------------ |
|
|
134
|
+
| `--json` | Output structured JSON |
|
|
135
|
+
| `--summary` | Generate AI-powered 3-sentence summary |
|
|
136
|
+
| `--raw` | Output raw extracted HTML |
|
|
137
|
+
| `-o, --output <file>` | Write output to file |
|
|
138
|
+
| `--cdp <endpoint>` | Chrome CDP endpoint (default: `http://127.0.0.1:9222`) |
|
|
139
|
+
| `--timeout <ms>` | Page load timeout (default: 15000) |
|
|
140
|
+
| `--user-agent <str>` | Custom User-Agent string |
|
|
141
|
+
| `-V, --version` | Show version number |
|
|
142
|
+
| `-h, --help` | Show help |
|
|
143
|
+
|
|
144
|
+
### Batch Options
|
|
145
|
+
|
|
146
|
+
| Flag | Description |
|
|
147
|
+
| ------------------ | ---------------------------------------------- |
|
|
148
|
+
| `--outdir <dir>` | Write each result as a separate file |
|
|
149
|
+
| `--delay <ms>` | Delay between requests (default: 1000) |
|
|
150
|
+
|
|
151
|
+
## Environment Variables
|
|
152
|
+
|
|
153
|
+
| Variable | Description |
|
|
154
|
+
| ------------------ | ---------------------------------- |
|
|
155
|
+
| `OPENAI_API_KEY` | OpenAI API key for summaries |
|
|
156
|
+
| `OPENAI_MODEL` | OpenAI model (default: gpt-4o-mini)|
|
|
157
|
+
| `ANTHROPIC_API_KEY`| Anthropic API key for summaries |
|
|
158
|
+
| `ANTHROPIC_MODEL` | Anthropic model (default: claude-sonnet-4-20250514) |
|
|
159
|
+
| `OLLAMA_URL` | Ollama server URL (default: http://127.0.0.1:11434) |
|
|
160
|
+
| `OLLAMA_MODEL` | Ollama model (default: llama3.2) |
|
|
161
|
+
|
|
162
|
+
## Examples
|
|
163
|
+
|
|
164
|
+
```bash
|
|
165
|
+
# Quick read of a blog post
|
|
166
|
+
websnap https://paulgraham.com/greatwork.html
|
|
167
|
+
|
|
168
|
+
# Save an article as JSON for processing
|
|
169
|
+
websnap https://arxiv.org/abs/2301.00001 --json -o paper.json
|
|
170
|
+
|
|
171
|
+
# Get a quick summary
|
|
172
|
+
websnap https://news.ycombinator.com/item?id=12345 --summary
|
|
173
|
+
|
|
174
|
+
# Batch scrape a list of articles
|
|
175
|
+
websnap batch research-urls.txt --outdir ./research --json
|
|
176
|
+
|
|
177
|
+
# Use with jq for data extraction
|
|
178
|
+
websnap https://example.com --json | jq '.title, .wordCount'
|
|
179
|
+
|
|
180
|
+
# Pipe to other tools
|
|
181
|
+
websnap https://example.com | glow - # render with glow
|
|
182
|
+
websnap https://example.com | pbcopy # copy to clipboard (macOS)
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
## License
|
|
186
|
+
|
|
187
|
+
MIT
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* fetcher.ts - Page fetching via Chrome CDP or plain HTTP
|
|
3
|
+
*
|
|
4
|
+
* Strategy:
|
|
5
|
+
* 1. Try Chrome CDP connection (port 9222 by default) for JS-rendered pages
|
|
6
|
+
* 2. Fall back to plain HTTP fetch for static pages
|
|
7
|
+
*
|
|
8
|
+
* CDP mode reuses existing Chrome cookies, so login-required pages work
|
|
9
|
+
* if the user has already logged in via their Chrome browser.
|
|
10
|
+
*/
|
|
11
|
+
export interface FetchOptions {
|
|
12
|
+
cdpEndpoint?: string;
|
|
13
|
+
timeout?: number;
|
|
14
|
+
userAgent?: string;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Fetch a page's HTML content. Tries CDP first, then falls back to HTTP.
|
|
18
|
+
*/
|
|
19
|
+
export declare function fetchPage(url: string, options?: FetchOptions): Promise<string>;
|
|
20
|
+
//# sourceMappingURL=fetcher.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fetcher.d.ts","sourceRoot":"","sources":["../src/fetcher.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,MAAM,WAAW,YAAY;IAC3B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AASD;;GAEG;AACH,wBAAsB,SAAS,CAC7B,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,YAAiB,GACzB,OAAO,CAAC,MAAM,CAAC,CAejB"}
|
package/dist/fetcher.js
ADDED
|
@@ -0,0 +1,350 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* fetcher.ts - Page fetching via Chrome CDP or plain HTTP
|
|
4
|
+
*
|
|
5
|
+
* Strategy:
|
|
6
|
+
* 1. Try Chrome CDP connection (port 9222 by default) for JS-rendered pages
|
|
7
|
+
* 2. Fall back to plain HTTP fetch for static pages
|
|
8
|
+
*
|
|
9
|
+
* CDP mode reuses existing Chrome cookies, so login-required pages work
|
|
10
|
+
* if the user has already logged in via their Chrome browser.
|
|
11
|
+
*/
|
|
12
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
13
|
+
if (k2 === undefined) k2 = k;
|
|
14
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
15
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
16
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
17
|
+
}
|
|
18
|
+
Object.defineProperty(o, k2, desc);
|
|
19
|
+
}) : (function(o, m, k, k2) {
|
|
20
|
+
if (k2 === undefined) k2 = k;
|
|
21
|
+
o[k2] = m[k];
|
|
22
|
+
}));
|
|
23
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
24
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
25
|
+
}) : function(o, v) {
|
|
26
|
+
o["default"] = v;
|
|
27
|
+
});
|
|
28
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
29
|
+
var ownKeys = function(o) {
|
|
30
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
31
|
+
var ar = [];
|
|
32
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
33
|
+
return ar;
|
|
34
|
+
};
|
|
35
|
+
return ownKeys(o);
|
|
36
|
+
};
|
|
37
|
+
return function (mod) {
|
|
38
|
+
if (mod && mod.__esModule) return mod;
|
|
39
|
+
var result = {};
|
|
40
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
41
|
+
__setModuleDefault(result, mod);
|
|
42
|
+
return result;
|
|
43
|
+
};
|
|
44
|
+
})();
|
|
45
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
46
|
+
exports.fetchPage = fetchPage;
|
|
47
|
+
/**
|
|
48
|
+
* Fetch a page's HTML content. Tries CDP first, then falls back to HTTP.
|
|
49
|
+
*/
|
|
50
|
+
async function fetchPage(url, options = {}) {
|
|
51
|
+
const { cdpEndpoint = "http://127.0.0.1:9222", timeout = 15000 } = options;
|
|
52
|
+
// Try CDP first
|
|
53
|
+
try {
|
|
54
|
+
const html = await fetchViaCDP(url, cdpEndpoint, timeout, options.userAgent);
|
|
55
|
+
return html;
|
|
56
|
+
}
|
|
57
|
+
catch (cdpErr) {
|
|
58
|
+
// CDP not available; fall back to HTTP
|
|
59
|
+
process.stderr.write(`\x1b[90mCDP unavailable (${cdpErr.message?.substring(0, 60) || "connection failed"}), using HTTP fetch...\x1b[0m\n`);
|
|
60
|
+
}
|
|
61
|
+
return fetchViaHTTP(url, timeout, options.userAgent);
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Fetch via Chrome DevTools Protocol
|
|
65
|
+
*/
|
|
66
|
+
async function fetchViaCDP(url, cdpEndpoint, timeout, userAgent) {
|
|
67
|
+
// Get browser websocket URL
|
|
68
|
+
const versionUrl = `${cdpEndpoint}/json/version`;
|
|
69
|
+
const versionRes = await fetchWithTimeout(versionUrl, 3000);
|
|
70
|
+
if (!versionRes.ok)
|
|
71
|
+
throw new Error(`CDP version endpoint returned ${versionRes.status}`);
|
|
72
|
+
const versionInfo = await versionRes.json();
|
|
73
|
+
// Create a new target (tab)
|
|
74
|
+
const newTabUrl = `${cdpEndpoint}/json/new?${encodeURIComponent("about:blank")}`;
|
|
75
|
+
const newTabRes = await fetchWithTimeout(newTabUrl, 3000);
|
|
76
|
+
if (!newTabRes.ok)
|
|
77
|
+
throw new Error(`CDP new tab returned ${newTabRes.status}`);
|
|
78
|
+
const tabInfo = (await newTabRes.json());
|
|
79
|
+
const wsUrl = tabInfo.webSocketDebuggerUrl;
|
|
80
|
+
if (!wsUrl)
|
|
81
|
+
throw new Error("No WebSocket URL for new tab");
|
|
82
|
+
// Connect via WebSocket
|
|
83
|
+
const session = await connectCDP(wsUrl);
|
|
84
|
+
try {
|
|
85
|
+
// Set user agent if provided
|
|
86
|
+
if (userAgent) {
|
|
87
|
+
await session.send("Network.setUserAgentOverride", {
|
|
88
|
+
userAgent,
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
// Enable page events
|
|
92
|
+
await session.send("Page.enable");
|
|
93
|
+
await session.send("Network.enable");
|
|
94
|
+
// Navigate
|
|
95
|
+
const navResult = await session.send("Page.navigate", { url });
|
|
96
|
+
// Wait for load
|
|
97
|
+
await waitForLoad(session, timeout);
|
|
98
|
+
// Small extra delay for JS rendering
|
|
99
|
+
await new Promise((r) => setTimeout(r, 1500));
|
|
100
|
+
// Get document HTML
|
|
101
|
+
const result = await session.send("Runtime.evaluate", {
|
|
102
|
+
expression: "document.documentElement.outerHTML",
|
|
103
|
+
returnByValue: true,
|
|
104
|
+
});
|
|
105
|
+
const html = result?.result?.value;
|
|
106
|
+
if (!html || typeof html !== "string") {
|
|
107
|
+
throw new Error("Failed to extract HTML from page");
|
|
108
|
+
}
|
|
109
|
+
return html;
|
|
110
|
+
}
|
|
111
|
+
finally {
|
|
112
|
+
// Close the tab
|
|
113
|
+
try {
|
|
114
|
+
const closeUrl = `${cdpEndpoint}/json/close/${tabInfo.id}`;
|
|
115
|
+
await fetchWithTimeout(closeUrl, 2000).catch(() => { });
|
|
116
|
+
}
|
|
117
|
+
catch { }
|
|
118
|
+
session.close();
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* Minimal CDP WebSocket connection using Node.js built-ins
|
|
123
|
+
*/
|
|
124
|
+
async function connectCDP(wsUrl) {
|
|
125
|
+
// Dynamic import for WebSocket (available in Node 18+)
|
|
126
|
+
const WebSocket = globalThis.WebSocket || (await getWebSocket());
|
|
127
|
+
return new Promise((resolve, reject) => {
|
|
128
|
+
const ws = new WebSocket(wsUrl);
|
|
129
|
+
let msgId = 0;
|
|
130
|
+
const pending = new Map();
|
|
131
|
+
const eventHandlers = new Map();
|
|
132
|
+
ws.onopen = () => {
|
|
133
|
+
const session = {
|
|
134
|
+
ws,
|
|
135
|
+
id: 0,
|
|
136
|
+
send(method, params = {}) {
|
|
137
|
+
return new Promise((res, rej) => {
|
|
138
|
+
const id = ++msgId;
|
|
139
|
+
pending.set(id, { resolve: res, reject: rej });
|
|
140
|
+
ws.send(JSON.stringify({ id, method, params }));
|
|
141
|
+
// Timeout for individual commands
|
|
142
|
+
setTimeout(() => {
|
|
143
|
+
if (pending.has(id)) {
|
|
144
|
+
pending.delete(id);
|
|
145
|
+
rej(new Error(`CDP command "${method}" timed out`));
|
|
146
|
+
}
|
|
147
|
+
}, 30000);
|
|
148
|
+
});
|
|
149
|
+
},
|
|
150
|
+
close() {
|
|
151
|
+
try {
|
|
152
|
+
ws.close();
|
|
153
|
+
}
|
|
154
|
+
catch { }
|
|
155
|
+
},
|
|
156
|
+
};
|
|
157
|
+
session._eventHandlers = eventHandlers;
|
|
158
|
+
resolve(session);
|
|
159
|
+
};
|
|
160
|
+
ws.onmessage = (event) => {
|
|
161
|
+
try {
|
|
162
|
+
const data = typeof event.data === "string" ? event.data : event.data.toString();
|
|
163
|
+
const msg = JSON.parse(data);
|
|
164
|
+
if (msg.id && pending.has(msg.id)) {
|
|
165
|
+
const handler = pending.get(msg.id);
|
|
166
|
+
pending.delete(msg.id);
|
|
167
|
+
if (msg.error) {
|
|
168
|
+
handler.reject(new Error(msg.error.message || JSON.stringify(msg.error)));
|
|
169
|
+
}
|
|
170
|
+
else {
|
|
171
|
+
handler.resolve(msg.result);
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
if (msg.method && eventHandlers.has(msg.method)) {
|
|
175
|
+
for (const h of eventHandlers.get(msg.method)) {
|
|
176
|
+
h(msg.params);
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
catch { }
|
|
181
|
+
};
|
|
182
|
+
ws.onerror = (err) => {
|
|
183
|
+
reject(new Error(`WebSocket error: ${err.message || "connection failed"}`));
|
|
184
|
+
};
|
|
185
|
+
// Connection timeout
|
|
186
|
+
setTimeout(() => {
|
|
187
|
+
reject(new Error("CDP WebSocket connection timed out"));
|
|
188
|
+
}, 5000);
|
|
189
|
+
});
|
|
190
|
+
}
|
|
191
|
+
/**
|
|
192
|
+
* Try to get WebSocket from 'ws' package or undici
|
|
193
|
+
*/
|
|
194
|
+
async function getWebSocket() {
|
|
195
|
+
try {
|
|
196
|
+
// Node 18+ has WebSocket behind a flag, Node 21+ has it globally
|
|
197
|
+
const { WebSocket } = await Promise.resolve(`${"undici"}`).then(s => __importStar(require(s)));
|
|
198
|
+
return WebSocket;
|
|
199
|
+
}
|
|
200
|
+
catch {
|
|
201
|
+
try {
|
|
202
|
+
const ws = await Promise.resolve(`${"ws"}`).then(s => __importStar(require(s)));
|
|
203
|
+
return ws.default || ws.WebSocket || ws;
|
|
204
|
+
}
|
|
205
|
+
catch {
|
|
206
|
+
throw new Error("No WebSocket implementation available. Use Node.js 21+ or install 'ws' package.");
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
/**
|
|
211
|
+
* Wait for page load event via CDP
|
|
212
|
+
*/
|
|
213
|
+
function waitForLoad(session, timeout) {
|
|
214
|
+
return new Promise((resolve, reject) => {
|
|
215
|
+
const eventHandlers = session._eventHandlers;
|
|
216
|
+
const handlers = eventHandlers.get("Page.loadEventFired") || [];
|
|
217
|
+
handlers.push(() => {
|
|
218
|
+
clearTimeout(timer);
|
|
219
|
+
resolve();
|
|
220
|
+
});
|
|
221
|
+
eventHandlers.set("Page.loadEventFired", handlers);
|
|
222
|
+
const timer = setTimeout(() => {
|
|
223
|
+
// Resolve anyway after timeout - page may have partially loaded
|
|
224
|
+
resolve();
|
|
225
|
+
}, timeout);
|
|
226
|
+
});
|
|
227
|
+
}
|
|
228
|
+
/**
|
|
229
|
+
* Simple HTTP fetch fallback for static pages.
|
|
230
|
+
* Tries native fetch first, then falls back to node:https for TLS issues.
|
|
231
|
+
*/
|
|
232
|
+
async function fetchViaHTTP(url, timeout, userAgent) {
|
|
233
|
+
const ua = userAgent ||
|
|
234
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36";
|
|
235
|
+
// Try native fetch first
|
|
236
|
+
try {
|
|
237
|
+
return await fetchViaHTTPNative(url, timeout, ua);
|
|
238
|
+
}
|
|
239
|
+
catch (nativeErr) {
|
|
240
|
+
// If it's a TLS certificate error, fall back to node:https
|
|
241
|
+
const msg = String(nativeErr?.cause || nativeErr?.message || "");
|
|
242
|
+
if (msg.includes("UNABLE_TO_GET_ISSUER_CERT") ||
|
|
243
|
+
msg.includes("CERT_HAS_EXPIRED") ||
|
|
244
|
+
msg.includes("DEPTH_ZERO_SELF_SIGNED") ||
|
|
245
|
+
msg.includes("certificate") ||
|
|
246
|
+
msg.includes("fetch failed")) {
|
|
247
|
+
process.stderr.write(`\x1b[90mNative fetch failed, using node:https fallback...\x1b[0m\n`);
|
|
248
|
+
return fetchViaNodeHTTPS(url, timeout, ua);
|
|
249
|
+
}
|
|
250
|
+
throw nativeErr;
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
async function fetchViaHTTPNative(url, timeout, userAgent) {
|
|
254
|
+
const headers = {
|
|
255
|
+
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
256
|
+
"Accept-Language": "en-US,en;q=0.9",
|
|
257
|
+
"User-Agent": userAgent,
|
|
258
|
+
};
|
|
259
|
+
const controller = new AbortController();
|
|
260
|
+
const timer = setTimeout(() => controller.abort(), timeout);
|
|
261
|
+
try {
|
|
262
|
+
const response = await fetch(url, {
|
|
263
|
+
headers,
|
|
264
|
+
signal: controller.signal,
|
|
265
|
+
redirect: "follow",
|
|
266
|
+
});
|
|
267
|
+
if (!response.ok) {
|
|
268
|
+
throw new Error(`HTTP ${response.status} ${response.statusText}`);
|
|
269
|
+
}
|
|
270
|
+
const contentType = response.headers.get("content-type") || "";
|
|
271
|
+
if (!contentType.includes("text/html") &&
|
|
272
|
+
!contentType.includes("application/xhtml")) {
|
|
273
|
+
process.stderr.write(`\x1b[33mWarning:\x1b[0m Content-Type is "${contentType}", not HTML\n`);
|
|
274
|
+
}
|
|
275
|
+
return await response.text();
|
|
276
|
+
}
|
|
277
|
+
finally {
|
|
278
|
+
clearTimeout(timer);
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
/**
|
|
282
|
+
* Fallback HTTP fetcher using node:https with relaxed TLS for environments
|
|
283
|
+
* that have certificate issues (corporate proxies, outdated cert stores, etc.)
|
|
284
|
+
*/
|
|
285
|
+
async function fetchViaNodeHTTPS(url, timeout, userAgent) {
|
|
286
|
+
const https = await Promise.resolve().then(() => __importStar(require("node:https")));
|
|
287
|
+
const http = await Promise.resolve().then(() => __importStar(require("node:http")));
|
|
288
|
+
const { URL } = await Promise.resolve().then(() => __importStar(require("node:url")));
|
|
289
|
+
return new Promise((resolve, reject) => {
|
|
290
|
+
const parsedUrl = new URL(url);
|
|
291
|
+
const isHTTPS = parsedUrl.protocol === "https:";
|
|
292
|
+
const mod = isHTTPS ? https : http;
|
|
293
|
+
const options = {
|
|
294
|
+
hostname: parsedUrl.hostname,
|
|
295
|
+
port: parsedUrl.port || (isHTTPS ? 443 : 80),
|
|
296
|
+
path: parsedUrl.pathname + parsedUrl.search,
|
|
297
|
+
method: "GET",
|
|
298
|
+
headers: {
|
|
299
|
+
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
300
|
+
"Accept-Language": "en-US,en;q=0.9",
|
|
301
|
+
"User-Agent": userAgent,
|
|
302
|
+
},
|
|
303
|
+
rejectUnauthorized: false, // Handle self-signed/corporate certs
|
|
304
|
+
timeout,
|
|
305
|
+
};
|
|
306
|
+
const req = mod.request(options, (res) => {
|
|
307
|
+
// Follow redirects (up to 5)
|
|
308
|
+
if (res.statusCode >= 300 &&
|
|
309
|
+
res.statusCode < 400 &&
|
|
310
|
+
res.headers.location) {
|
|
311
|
+
const redirectUrl = new URL(res.headers.location, url).toString();
|
|
312
|
+
fetchViaNodeHTTPS(redirectUrl, timeout, userAgent)
|
|
313
|
+
.then(resolve)
|
|
314
|
+
.catch(reject);
|
|
315
|
+
return;
|
|
316
|
+
}
|
|
317
|
+
if (res.statusCode < 200 || res.statusCode >= 400) {
|
|
318
|
+
reject(new Error(`HTTP ${res.statusCode}`));
|
|
319
|
+
return;
|
|
320
|
+
}
|
|
321
|
+
const chunks = [];
|
|
322
|
+
res.on("data", (chunk) => chunks.push(chunk));
|
|
323
|
+
res.on("end", () => {
|
|
324
|
+
const body = Buffer.concat(chunks).toString("utf-8");
|
|
325
|
+
resolve(body);
|
|
326
|
+
});
|
|
327
|
+
res.on("error", reject);
|
|
328
|
+
});
|
|
329
|
+
req.on("error", reject);
|
|
330
|
+
req.on("timeout", () => {
|
|
331
|
+
req.destroy();
|
|
332
|
+
reject(new Error("Request timed out"));
|
|
333
|
+
});
|
|
334
|
+
req.end();
|
|
335
|
+
});
|
|
336
|
+
}
|
|
337
|
+
/**
|
|
338
|
+
* Fetch helper with timeout
|
|
339
|
+
*/
|
|
340
|
+
async function fetchWithTimeout(url, timeout) {
|
|
341
|
+
const controller = new AbortController();
|
|
342
|
+
const timer = setTimeout(() => controller.abort(), timeout);
|
|
343
|
+
try {
|
|
344
|
+
return await fetch(url, { signal: controller.signal });
|
|
345
|
+
}
|
|
346
|
+
finally {
|
|
347
|
+
clearTimeout(timer);
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
//# sourceMappingURL=fetcher.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fetcher.js","sourceRoot":"","sources":["../src/fetcher.ts"],"names":[],"mappings":";AAAA;;;;;;;;;GASG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAkBH,8BAkBC;AArBD;;GAEG;AACI,KAAK,UAAU,SAAS,CAC7B,GAAW,EACX,UAAwB,EAAE;IAE1B,MAAM,EAAE,WAAW,GAAG,uBAAuB,EAAE,OAAO,GAAG,KAAK,EAAE,GAAG,OAAO,CAAC;IAE3E,gBAAgB;IAChB,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,MAAM,WAAW,CAAC,GAAG,EAAE,WAAW,EAAE,OAAO,EAAE,OAAO,CAAC,SAAS,CAAC,CAAC;QAC7E,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,OAAO,MAAW,EAAE,CAAC;QACrB,uCAAuC;QACvC,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,4BAA4B,MAAM,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,mBAAmB,iCAAiC,CACrH,CAAC;IACJ,CAAC;IAED,OAAO,YAAY,CAAC,GAAG,EAAE,OAAO,EAAE,OAAO,CAAC,SAAS,CAAC,CAAC;AACvD,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,WAAW,CACxB,GAAW,EACX,WAAmB,EACnB,OAAe,EACf,SAAkB;IAElB,4BAA4B;IAC5B,MAAM,UAAU,GAAG,GAAG,WAAW,eAAe,CAAC;IACjD,MAAM,UAAU,GAAG,MAAM,gBAAgB,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;IAC5D,IAAI,CAAC,UAAU,CAAC,EAAE;QAAE,MAAM,IAAI,KAAK,CAAC,iCAAiC,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC;IAC1F,MAAM,WAAW,GAAG,MAAM,UAAU,CAAC,IAAI,EAAE,CAAC;IAE5C,4BAA4B;IAC5B,MAAM,SAAS,GAAG,GAAG,WAAW,aAAa,kBAAkB,CAAC,aAAa,CAAC,EAAE,CAAC;IACjF,MAAM,SAAS,GAAG,MAAM,gBAAgB,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;IAC1D,IAAI,CAAC,SAAS,CAAC,EAAE;QAAE,MAAM,IAAI,KAAK,CAAC,wBAAwB,SAAS,CAAC,MAAM,EAAE,CAAC,CAAC;IAC/E,MAAM,OAAO,GAAG,CAAC,MAAM,SAAS,CAAC,IAAI,EAAE,CAAwB,CAAC;IAChE,MAAM,KAAK,GAAG,OAAO,CAAC,oBAAoB,CAAC;IAE3C,IAAI,CAAC,KAAK;QAAE,MAAM,IAAI,KAAK,CAAC,8BAA8B,CAAC,CAAC;IAE5D,wBAAwB;IACxB,MAAM,OAAO,GAAG,MAAM,UAAU,CAAC,KAAK,CAAC,CAAC;IAExC,IAAI,CAAC;QACH,6BAA6B;QAC7B,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,OAAO,CAAC,IAAI,CAAC,8BAA8B,EAAE;gBACjD,SAAS;aACV,CAAC,CAAC;QACL,CAAC;QAED,qBAAqB;QACrB,MAAM,OAAO,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAClC,MAAM,OAAO,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;QAErC,WAAW;QACX,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC,eAAe,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC;QAE/D,gBAAgB;QAChB,MAAM,WAAW,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;QAEpC,qCAAqC;QACrC,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC;QAE9C,oBAAoB;QACpB,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC,kBAAkB,EAAE;YACpD,UAAU,EAAE,oCAAoC;YAChD,aAAa,EAAE,IAAI;SACpB,CAAC,CAAC;QAEH,MAAM,IAAI,GAAG,MAAM,EAAE,MAAM,EAAE,KAAK,CAAC;QACnC,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;YACtC,MAAM,IAAI,KAAK,CAAC,kCAAkC,CAAC,CAAC;QACtD,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC;YAAS,CAAC;QACT,gBAAgB;QAChB,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,GAAG,WAAW,eAAe,OAAO,CAAC,EAAE,EAAE,CAAC;YAC3D,MAAM,gBAAgB,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;QACzD,CAAC;QAAC,MAAM,CAAC,CAAA,CAAC;QACV,OAAO,CAAC,KAAK,EAAE,CAAC;IAClB,CAAC;AACH,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,UAAU,CAAC,KAAa;IACrC,uDAAuD;IACvD,MAAM,SAAS,GAAI,UAAkB,CAAC,SAAS,IAAI,CAAC,MAAM,YAAY,EAAE,CAAC,CAAC;IAE1E,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QACrC,MAAM,EAAE,GAAG,IAAI,SAAS,CAAC,KAAK,CAAC,CAAC;QAChC,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,MAAM,OAAO,GAAG,IAAI,GAAG,EAGpB,CAAC;QACJ,MAAM,aAAa,GAAG,IAAI,GAAG,EAAqC,CAAC;QAEnE,EAAE,CAAC,MAAM,GAAG,GAAG,EAAE;YACf,MAAM,OAAO,GAAe;gBAC1B,EAAE;gBACF,EAAE,EAAE,CAAC;gBACL,IAAI,CAAC,MAAc,EAAE,SAA8B,EAAE;oBACnD,OAAO,IAAI,OAAO,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE;wBAC9B,MAAM,EAAE,GAAG,EAAE,KAAK,CAAC;wBACnB,OAAO,CAAC,GAAG,CAAC,EAAE,EAAE,EAAE,OAAO,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,CAAC;wBAC/C,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC;wBAEhD,kCAAkC;wBAClC,UAAU,CAAC,GAAG,EAAE;4BACd,IAAI,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;gCACpB,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;gCACnB,GAAG,CAAC,IAAI,KAAK,CAAC,gBAAgB,MAAM,aAAa,CAAC,CAAC,CAAC;4BACtD,CAAC;wBACH,CAAC,EAAE,KAAK,CAAC,CAAC;oBACZ,CAAC,CAAC,CAAC;gBACL,CAAC;gBACD,KAAK;oBACH,IAAI,CAAC;wBACH,EAAE,CAAC,KAAK,EAAE,CAAC;oBACb,CAAC;oBAAC,MAAM,CAAC,CAAA,CAAC;gBACZ,CAAC;aACF,CAAC;YAED,OAAe,CAAC,cAAc,GAAG,aAAa,CAAC;YAChD,OAAO,CAAC,OAAO,CAAC,CAAC;QACnB,CAAC,CAAC;QAEF,EAAE,CAAC,SAAS,GAAG,CAAC,KAAU,EAAE,EAAE;YAC5B,IAAI,CAAC;gBACH,MAAM,IAAI,GACR,OAAO,KAAK,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACtE,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBAE7B,IAAI,GAAG,CAAC,EAAE,IAAI,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;oBAClC,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAE,CAAC;oBACrC,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;oBACvB,IAAI,GAAG,CAAC,KAAK,EAAE,CAAC;wBACd,OAAO,CAAC,MAAM,CAAC,IAAI,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,OAAO,IAAI,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;oBAC5E,CAAC;yBAAM,CAAC;wBACN,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;oBAC9B,CAAC;gBACH,CAAC;gBAED,IAAI,GAAG,CAAC,MAAM,IAAI,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC;oBAChD,KAAK,MAAM,CAAC,IAAI,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAE,EAAE,CAAC;wBAC/C,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;oBAChB,CAAC;gBACH,CAAC;YACH,CAAC;YAAC,MAAM,CAAC,CAAA,CAAC;QACZ,CAAC,CAAC;QAEF,EAAE,CAAC,OAAO,GAAG,CAAC,GAAQ,EAAE,EAAE;YACxB,MAAM,CAAC,IAAI,KAAK,CAAC,oBAAoB,GAAG,CAAC,OAAO,IAAI,mBAAmB,EAAE,CAAC,CAAC,CAAC;QAC9E,CAAC,CAAC;QAEF,qBAAqB;QACrB,UAAU,CAAC,GAAG,EAAE;YACd,MAAM,CAAC,IAAI,KAAK,CAAC,oCAAoC,CAAC,CAAC,CAAC;QAC1D,CAAC,EAAE,IAAI,CAAC,CAAC;IACX,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,YAAY;IACzB,IAAI,CAAC;QACH,iEAAiE;QACjE,MAAM,EAAE,SAAS,EAAE,GAAG,yBAAa,QAAe,uCAAC,CAAC;QACpD,OAAO,SAAS,CAAC;IACnB,CAAC;IAAC,MAAM,CAAC;QACP,IAAI,CAAC;YACH,MAAM,EAAE,GAAG,yBAAa,IAAW,uCAAC,CAAC;YACrC,OAAO,EAAE,CAAC,OAAO,IAAI,EAAE,CAAC,SAAS,IAAI,EAAE,CAAC;QAC1C,CAAC;QAAC,MAAM,CAAC;YACP,MAAM,IAAI,KAAK,CACb,iFAAiF,CAClF,CAAC;QACJ,CAAC;IACH,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,WAAW,CAAC,OAAmB,EAAE,OAAe;IACvD,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QACrC,MAAM,aAAa,GAChB,OAAe,CAAC,cAAc,CAAC;QAElC,MAAM,QAAQ,GAAG,aAAa,CAAC,GAAG,CAAC,qBAAqB,CAAC,IAAI,EAAE,CAAC;QAChE,QAAQ,CAAC,IAAI,CAAC,GAAG,EAAE;YACjB,YAAY,CAAC,KAAK,CAAC,CAAC;YACpB,OAAO,EAAE,CAAC;QACZ,CAAC,CAAC,CAAC;QACH,aAAa,CAAC,GAAG,CAAC,qBAAqB,EAAE,QAAQ,CAAC,CAAC;QAEnD,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE;YAC5B,gEAAgE;YAChE,OAAO,EAAE,CAAC;QACZ,CAAC,EAAE,OAAO,CAAC,CAAC;IACd,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;;GAGG;AACH,KAAK,UAAU,YAAY,CACzB,GAAW,EACX,OAAe,EACf,SAAkB;IAElB,MAAM,EAAE,GACN,SAAS;QACT,uHAAuH,CAAC;IAE1H,yBAAyB;IACzB,IAAI,CAAC;QACH,OAAO,MAAM,kBAAkB,CAAC,GAAG,EAAE,OAAO,EAAE,EAAE,CAAC,CAAC;IACpD,CAAC;IAAC,OAAO,SAAc,EAAE,CAAC;QACxB,2DAA2D;QAC3D,MAAM,GAAG,GAAG,MAAM,CAAC,SAAS,EAAE,KAAK,IAAI,SAAS,EAAE,OAAO,IAAI,EAAE,CAAC,CAAC;QACjE,IACE,GAAG,CAAC,QAAQ,CAAC,2BAA2B,CAAC;YACzC,GAAG,CAAC,QAAQ,CAAC,kBAAkB,CAAC;YAChC,GAAG,CAAC,QAAQ,CAAC,wBAAwB,CAAC;YACtC,GAAG,CAAC,QAAQ,CAAC,aAAa,CAAC;YAC3B,GAAG,CAAC,QAAQ,CAAC,cAAc,CAAC,EAC5B,CAAC;YACD,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,oEAAoE,CACrE,CAAC;YACF,OAAO,iBAAiB,CAAC,GAAG,EAAE,OAAO,EAAE,EAAE,CAAC,CAAC;QAC7C,CAAC;QACD,MAAM,SAAS,CAAC;IAClB,CAAC;AACH,CAAC;AAED,KAAK,UAAU,kBAAkB,CAC/B,GAAW,EACX,OAAe,EACf,SAAiB;IAEjB,MAAM,OAAO,GAA2B;QACtC,MAAM,EACJ,iEAAiE;QACnE,iBAAiB,EAAE,gBAAgB;QACnC,YAAY,EAAE,SAAS;KACxB,CAAC;IAEF,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;IACzC,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,OAAO,CAAC,CAAC;IAE5D,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;YAChC,OAAO;YACP,MAAM,EAAE,UAAU,CAAC,MAAM;YACzB,QAAQ,EAAE,QAAQ;SACnB,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,QAAQ,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC;QACpE,CAAC;QAED,MAAM,WAAW,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;QAC/D,IACE,CAAC,WAAW,CAAC,QAAQ,CAAC,WAAW,CAAC;YAClC,CAAC,WAAW,CAAC,QAAQ,CAAC,mBAAmB,CAAC,EAC1C,CAAC;YACD,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,4CAA4C,WAAW,eAAe,CACvE,CAAC;QACJ,CAAC;QAED,OAAO,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;IAC/B,CAAC;YAAS,CAAC;QACT,YAAY,CAAC,KAAK,CAAC,CAAC;IACtB,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,KAAK,UAAU,iBAAiB,CAC9B,GAAW,EACX,OAAe,EACf,SAAiB;IAEjB,MAAM,KAAK,GAAG,wDAAa,YAAY,GAAC,CAAC;IACzC,MAAM,IAAI,GAAG,wDAAa,WAAW,GAAC,CAAC;IACvC,MAAM,EAAE,GAAG,EAAE,GAAG,wDAAa,UAAU,GAAC,CAAC;IAEzC,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QACrC,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAC/B,MAAM,OAAO,GAAG,SAAS,CAAC,QAAQ,KAAK,QAAQ,CAAC;QAChD,MAAM,GAAG,GAAG,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC;QAEnC,MAAM,OAAO,GAAG;YACd,QAAQ,EAAE,SAAS,CAAC,QAAQ;YAC5B,IAAI,EAAE,SAAS,CAAC,IAAI,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;YAC5C,IAAI,EAAE,SAAS,CAAC,QAAQ,GAAG,SAAS,CAAC,MAAM;YAC3C,MAAM,EAAE,KAAK;YACb,OAAO,EAAE;gBACP,MAAM,EACJ,iEAAiE;gBACnE,iBAAiB,EAAE,gBAAgB;gBACnC,YAAY,EAAE,SAAS;aACxB;YACD,kBAAkB,EAAE,KAAK,EAAE,qCAAqC;YAChE,OAAO;SACR,CAAC;QAEF,MAAM,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC,GAAQ,EAAE,EAAE;YAC5C,6BAA6B;YAC7B,IACE,GAAG,CAAC,UAAU,IAAI,GAAG;gBACrB,GAAG,CAAC,UAAU,GAAG,GAAG;gBACpB,GAAG,CAAC,OAAO,CAAC,QAAQ,EACpB,CAAC;gBACD,MAAM,WAAW,GAAG,IAAI,GAAG,CACzB,GAAG,CAAC,OAAO,CAAC,QAAQ,EACpB,GAAG,CACJ,CAAC,QAAQ,EAAE,CAAC;gBACb,iBAAiB,CAAC,WAAW,EAAE,OAAO,EAAE,SAAS,CAAC;qBAC/C,IAAI,CAAC,OAAO,CAAC;qBACb,KAAK,CAAC,MAAM,CAAC,CAAC;gBACjB,OAAO;YACT,CAAC;YAED,IAAI,GAAG,CAAC,UAAU,GAAG,GAAG,IAAI,GAAG,CAAC,UAAU,IAAI,GAAG,EAAE,CAAC;gBAClD,MAAM,CAAC,IAAI,KAAK,CAAC,QAAQ,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;gBAC5C,OAAO;YACT,CAAC;YAED,MAAM,MAAM,GAAa,EAAE,CAAC;YAC5B,GAAG,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,KAAa,EAAE,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;YACtD,GAAG,CAAC,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE;gBACjB,MAAM,IAAI,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;gBACrD,OAAO,CAAC,IAAI,CAAC,CAAC;YAChB,CAAC,CAAC,CAAC;YACH,GAAG,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;QAC1B,CAAC,CAAC,CAAC;QAEH,GAAG,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;QACxB,GAAG,CAAC,EAAE,CAAC,SAAS,EAAE,GAAG,EAAE;YACrB,GAAG,CAAC,OAAO,EAAE,CAAC;YACd,MAAM,CAAC,IAAI,KAAK,CAAC,mBAAmB,CAAC,CAAC,CAAC;QACzC,CAAC,CAAC,CAAC;QACH,GAAG,CAAC,GAAG,EAAE,CAAC;IACZ,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,gBAAgB,CAC7B,GAAW,EACX,OAAe;IAEf,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;IACzC,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,OAAO,CAAC,CAAC;IAC5D,IAAI,CAAC;QACH,OAAO,MAAM,KAAK,CAAC,GAAG,EAAE,EAAE,MAAM,EAAE,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC;IACzD,CAAC;YAAS,CAAC;QACT,YAAY,CAAC,KAAK,CAAC,CAAC;IACtB,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* formatter.ts - Convert parsed articles to markdown or JSON output
|
|
3
|
+
*/
|
|
4
|
+
import { ParsedArticle } from "./parser";
|
|
5
|
+
/**
|
|
6
|
+
* Format article as clean markdown
|
|
7
|
+
*/
|
|
8
|
+
export declare function formatMarkdown(article: ParsedArticle, url: string): string;
|
|
9
|
+
/**
|
|
10
|
+
* Format article as structured JSON
|
|
11
|
+
*/
|
|
12
|
+
export declare function formatJSON(article: ParsedArticle, url: string): string;
|
|
13
|
+
/**
|
|
14
|
+
* Format a prompt for AI summarization
|
|
15
|
+
*/
|
|
16
|
+
export declare function formatSummaryPrompt(article: ParsedArticle): string;
|
|
17
|
+
//# sourceMappingURL=formatter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"formatter.d.ts","sourceRoot":"","sources":["../src/formatter.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAiBzC;;GAEG;AACH,wBAAgB,cAAc,CAAC,OAAO,EAAE,aAAa,EAAE,GAAG,EAAE,MAAM,GAAG,MAAM,CAqC1E;AAED;;GAEG;AACH,wBAAgB,UAAU,CAAC,OAAO,EAAE,aAAa,EAAE,GAAG,EAAE,MAAM,GAAG,MAAM,CAiBtE;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,OAAO,EAAE,aAAa,GAAG,MAAM,CAqBlE"}
|