webpeel 0.21.41 → 0.21.43
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/domain-extractors.js +27 -10
- package/package.json +1 -1
|
@@ -149,16 +149,33 @@ function unixToIso(sec) {
|
|
|
149
149
|
}
|
|
150
150
|
/** Fetch JSON from a URL using simpleFetch (reuses WebPeel's HTTP stack). */
|
|
151
151
|
async function fetchJson(url, customHeaders) {
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
const
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
152
|
+
// Use plain fetch (not simpleFetch) for JSON API calls.
|
|
153
|
+
// simpleFetch adds stealth browser headers (Sec-CH-UA, Sec-Fetch-*, etc.)
|
|
154
|
+
// which confuse API endpoints like api.github.com into returning HTML.
|
|
155
|
+
const controller = new AbortController();
|
|
156
|
+
const timer = setTimeout(() => controller.abort(), 15000);
|
|
157
|
+
try {
|
|
158
|
+
const resp = await fetch(url, {
|
|
159
|
+
headers: {
|
|
160
|
+
'User-Agent': 'webpeel/0.21 (https://webpeel.dev)',
|
|
161
|
+
'Accept': 'application/json',
|
|
162
|
+
...customHeaders,
|
|
163
|
+
},
|
|
164
|
+
signal: controller.signal,
|
|
165
|
+
redirect: 'follow',
|
|
166
|
+
});
|
|
167
|
+
clearTimeout(timer);
|
|
168
|
+
const text = await resp.text();
|
|
169
|
+
const parsed = tryParseJson(text);
|
|
170
|
+
if (parsed === null && text.length > 0) {
|
|
171
|
+
console.warn(`[webpeel:fetchJson] Non-JSON response from ${url} (${text.length} bytes, status: ${resp.status}): ${text.slice(0, 120)}`);
|
|
172
|
+
}
|
|
173
|
+
return parsed;
|
|
174
|
+
}
|
|
175
|
+
catch (e) {
|
|
176
|
+
clearTimeout(timer);
|
|
177
|
+
throw e;
|
|
178
|
+
}
|
|
162
179
|
}
|
|
163
180
|
/** Fetch JSON with exponential backoff retry on 429 / rate-limit errors. */
|
|
164
181
|
async function fetchJsonWithRetry(url, headers, retries = 2, baseDelayMs = 1000) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "webpeel",
|
|
3
|
-
"version": "0.21.
|
|
3
|
+
"version": "0.21.43",
|
|
4
4
|
"description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
|
|
5
5
|
"author": "Jake Liu",
|
|
6
6
|
"license": "AGPL-3.0-only",
|