webpeel 0.21.18 → 0.21.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -153,7 +153,12 @@ async function fetchJson(url, customHeaders) {
153
153
  Accept: 'application/json',
154
154
  ...customHeaders,
155
155
  });
156
- return tryParseJson(result.html);
156
+ const parsed = tryParseJson(result.html);
157
+ // Debug: log GitHub API failures to help diagnose rate limiting issues
158
+ if (!parsed && url.includes('api.github.com')) {
159
+ console.error(`[github-debug] fetchJson failed for ${url} — raw response (first 200): ${result.html?.substring(0, 200)}`);
160
+ }
161
+ return parsed;
157
162
  }
158
163
  /** Fetch JSON with exponential backoff retry on 429 / rate-limit errors. */
159
164
  async function fetchJsonWithRetry(url, headers, retries = 2, baseDelayMs = 1000) {
@@ -780,8 +785,13 @@ async function githubExtractor(_html, url) {
780
785
  const ghHeaders = { Accept: 'application/vnd.github.v3+json' };
781
786
  // Use GITHUB_TOKEN if available for higher rate limits (5000/hr vs 60/hr)
782
787
  const ghToken = process.env.GITHUB_TOKEN || process.env.GH_TOKEN;
783
- if (ghToken)
788
+ if (ghToken) {
784
789
  ghHeaders.Authorization = `token ${ghToken}`;
790
+ console.log(`[github-debug] Using token (prefix: ${ghToken.substring(0, 8)}..., len: ${ghToken.length})`);
791
+ }
792
+ else {
793
+ console.warn('[github-debug] No GITHUB_TOKEN found — using anonymous (60/hr limit)');
794
+ }
785
795
  // User profile: /username (single segment)
786
796
  if (pathParts.length === 1) {
787
797
  const username = pathParts[0];
@@ -908,12 +918,14 @@ ${commentsMd || '*No comments.*'}`;
908
918
  }
909
919
  // Repository page: /owner/repo (and no deeper path we handle above)
910
920
  if (pathParts.length >= 2) {
911
- const [repoData, readmeData] = await Promise.all([
912
- fetchJson(`https://api.github.com/repos/${owner}/${repo}`, ghHeaders),
913
- fetchJson(`https://api.github.com/repos/${owner}/${repo}/readme`, ghHeaders).catch(() => null),
914
- ]);
921
+ // Sequential fetches to avoid secondary rate limits on popular repos
922
+ const repoData = await fetchJsonWithRetry(`https://api.github.com/repos/${owner}/${repo}`, ghHeaders, 2, 1000);
915
923
  if (!repoData || repoData.message === 'Not Found')
916
924
  return null;
925
+ // Secondary rate limit check
926
+ if (repoData.message?.includes('secondary rate limit') || repoData.message?.includes('abuse'))
927
+ return null;
928
+ const readmeData = await fetchJsonWithRetry(`https://api.github.com/repos/${owner}/${repo}/readme`, ghHeaders, 1, 500).catch(() => null);
917
929
  // README content is base64 encoded
918
930
  let readmeText = '';
919
931
  if (readmeData?.content) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "webpeel",
3
- "version": "0.21.18",
3
+ "version": "0.21.20",
4
4
  "description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
5
5
  "author": "Jake Liu",
6
6
  "license": "AGPL-3.0-only",