webpeel 0.21.18 → 0.21.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -153,7 +153,12 @@ async function fetchJson(url, customHeaders) {
|
|
|
153
153
|
Accept: 'application/json',
|
|
154
154
|
...customHeaders,
|
|
155
155
|
});
|
|
156
|
-
|
|
156
|
+
const parsed = tryParseJson(result.html);
|
|
157
|
+
// Debug: log GitHub API failures to help diagnose rate limiting issues
|
|
158
|
+
if (!parsed && url.includes('api.github.com')) {
|
|
159
|
+
console.error(`[github-debug] fetchJson failed for ${url} — raw response (first 200): ${result.html?.substring(0, 200)}`);
|
|
160
|
+
}
|
|
161
|
+
return parsed;
|
|
157
162
|
}
|
|
158
163
|
/** Fetch JSON with exponential backoff retry on 429 / rate-limit errors. */
|
|
159
164
|
async function fetchJsonWithRetry(url, headers, retries = 2, baseDelayMs = 1000) {
|
|
@@ -780,8 +785,13 @@ async function githubExtractor(_html, url) {
|
|
|
780
785
|
const ghHeaders = { Accept: 'application/vnd.github.v3+json' };
|
|
781
786
|
// Use GITHUB_TOKEN if available for higher rate limits (5000/hr vs 60/hr)
|
|
782
787
|
const ghToken = process.env.GITHUB_TOKEN || process.env.GH_TOKEN;
|
|
783
|
-
if (ghToken)
|
|
788
|
+
if (ghToken) {
|
|
784
789
|
ghHeaders.Authorization = `token ${ghToken}`;
|
|
790
|
+
console.log(`[github-debug] Using token (prefix: ${ghToken.substring(0, 8)}..., len: ${ghToken.length})`);
|
|
791
|
+
}
|
|
792
|
+
else {
|
|
793
|
+
console.warn('[github-debug] No GITHUB_TOKEN found — using anonymous (60/hr limit)');
|
|
794
|
+
}
|
|
785
795
|
// User profile: /username (single segment)
|
|
786
796
|
if (pathParts.length === 1) {
|
|
787
797
|
const username = pathParts[0];
|
|
@@ -908,12 +918,14 @@ ${commentsMd || '*No comments.*'}`;
|
|
|
908
918
|
}
|
|
909
919
|
// Repository page: /owner/repo (and no deeper path we handle above)
|
|
910
920
|
if (pathParts.length >= 2) {
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
fetchJson(`https://api.github.com/repos/${owner}/${repo}/readme`, ghHeaders).catch(() => null),
|
|
914
|
-
]);
|
|
921
|
+
// Sequential fetches to avoid secondary rate limits on popular repos
|
|
922
|
+
const repoData = await fetchJsonWithRetry(`https://api.github.com/repos/${owner}/${repo}`, ghHeaders, 2, 1000);
|
|
915
923
|
if (!repoData || repoData.message === 'Not Found')
|
|
916
924
|
return null;
|
|
925
|
+
// Secondary rate limit check
|
|
926
|
+
if (repoData.message?.includes('secondary rate limit') || repoData.message?.includes('abuse'))
|
|
927
|
+
return null;
|
|
928
|
+
const readmeData = await fetchJsonWithRetry(`https://api.github.com/repos/${owner}/${repo}/readme`, ghHeaders, 1, 500).catch(() => null);
|
|
917
929
|
// README content is base64 encoded
|
|
918
930
|
let readmeText = '';
|
|
919
931
|
if (readmeData?.content) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "webpeel",
|
|
3
|
-
"version": "0.21.
|
|
3
|
+
"version": "0.21.20",
|
|
4
4
|
"description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
|
|
5
5
|
"author": "Jake Liu",
|
|
6
6
|
"license": "AGPL-3.0-only",
|