webpeel 0.21.20 → 0.21.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -153,12 +153,7 @@ async function fetchJson(url, customHeaders) {
153
153
  Accept: 'application/json',
154
154
  ...customHeaders,
155
155
  });
156
- const parsed = tryParseJson(result.html);
157
- // Debug: log GitHub API failures to help diagnose rate limiting issues
158
- if (!parsed && url.includes('api.github.com')) {
159
- console.error(`[github-debug] fetchJson failed for ${url} — raw response (first 200): ${result.html?.substring(0, 200)}`);
160
- }
161
- return parsed;
156
+ return tryParseJson(result.html);
162
157
  }
163
158
  /** Fetch JSON with exponential backoff retry on 429 / rate-limit errors. */
164
159
  async function fetchJsonWithRetry(url, headers, retries = 2, baseDelayMs = 1000) {
@@ -785,13 +780,8 @@ async function githubExtractor(_html, url) {
785
780
  const ghHeaders = { Accept: 'application/vnd.github.v3+json' };
786
781
  // Use GITHUB_TOKEN if available for higher rate limits (5000/hr vs 60/hr)
787
782
  const ghToken = process.env.GITHUB_TOKEN || process.env.GH_TOKEN;
788
- if (ghToken) {
783
+ if (ghToken)
789
784
  ghHeaders.Authorization = `token ${ghToken}`;
790
- console.log(`[github-debug] Using token (prefix: ${ghToken.substring(0, 8)}..., len: ${ghToken.length})`);
791
- }
792
- else {
793
- console.warn('[github-debug] No GITHUB_TOKEN found — using anonymous (60/hr limit)');
794
- }
795
785
  // User profile: /username (single segment)
796
786
  if (pathParts.length === 1) {
797
787
  const username = pathParts[0];
@@ -29,7 +29,7 @@ export function createSearchRouter(authStore) {
29
29
  // scrapeResults=true: fetches full page content for each result (like Firecrawl's scrape_options).
30
30
  // Adds `content` field to each result. Significantly increases response time and credits used.
31
31
  // Documented in OpenAPI spec under /v1/search parameters.
32
- const { q, count, scrapeResults, sources, categories, tbs, country, location } = req.query;
32
+ const { q, count, scrapeResults, enrich, sources, categories, tbs, country, location } = req.query;
33
33
  // --- Search provider (new: BYOK Brave support) ---
34
34
  const providerParam = (req.query.provider || '').toLowerCase() || 'auto';
35
35
  const validProviders = ['duckduckgo', 'brave', 'stealth', 'google'];
@@ -61,7 +61,8 @@ export function createSearchRouter(authStore) {
61
61
  const countryStr = country || '';
62
62
  const locationStr = location || '';
63
63
  // Build cache key (include all parameters)
64
- const cacheKey = `search:${providerId}:${q}:${resultCount}:${sourcesStr}:${shouldScrape}:${categoriesStr}:${tbsStr}:${countryStr}:${locationStr}`;
64
+ const enrichCount = enrich ? Math.min(Math.max(parseInt(enrich, 10) || 0, 0), 5) : 0;
65
+ const cacheKey = `search:${providerId}:${q}:${resultCount}:${sourcesStr}:${shouldScrape}:${enrichCount}:${categoriesStr}:${tbsStr}:${countryStr}:${locationStr}`;
65
66
  // Check cache
66
67
  const cached = cache.get(cacheKey);
67
68
  if (cached) {
@@ -133,7 +134,7 @@ export function createSearchRouter(authStore) {
133
134
  });
134
135
  });
135
136
  }
136
- // Scrape each result URL if requested
137
+ // Scrape each result URL if requested (sequential — legacy)
137
138
  if (shouldScrape) {
138
139
  for (const result of results) {
139
140
  try {
@@ -148,6 +149,40 @@ export function createSearchRouter(authStore) {
148
149
  }
149
150
  }
150
151
  }
152
+ // Enrich top N results in parallel with timeout (fast alternative to scrapeResults)
153
+ // IMPORTANT: forceBrowser=false, stealth=false to prevent OOM on 512MB containers
154
+ if (enrichCount > 0 && !shouldScrape) {
155
+ const ENRICH_TIMEOUT = 4000; // 4s hard timeout per URL
156
+ const toEnrich = results.slice(0, enrichCount);
157
+ const enrichResults = await Promise.allSettled(toEnrich.map(async (result) => {
158
+ const fetchPromise = peel(result.url, {
159
+ format: 'markdown',
160
+ maxTokens: 1500,
161
+ render: false,
162
+ stealth: false,
163
+ }).then(peelResult => ({
164
+ url: result.url,
165
+ content: peelResult.content?.substring(0, 1500) || null,
166
+ wordCount: peelResult.content?.trim().split(/\s+/).length || 0,
167
+ method: peelResult.method || 'unknown',
168
+ fetchTimeMs: peelResult.elapsed || 0,
169
+ }));
170
+ const timeoutPromise = new Promise(resolve => setTimeout(() => resolve({ url: result.url, content: null, wordCount: 0, method: 'timeout', fetchTimeMs: 0 }), ENRICH_TIMEOUT));
171
+ return Promise.race([fetchPromise, timeoutPromise]);
172
+ }));
173
+ // Merge enrichment data back into results
174
+ for (const settled of enrichResults) {
175
+ if (settled.status === 'fulfilled' && settled.value.content) {
176
+ const match = results.find(r => r.url === settled.value.url);
177
+ if (match) {
178
+ match.content = settled.value.content;
179
+ match.wordCount = settled.value.wordCount;
180
+ match.method = settled.value.method;
181
+ match.fetchTimeMs = settled.value.fetchTimeMs;
182
+ }
183
+ }
184
+ }
185
+ }
151
186
  data.web = results;
152
187
  }
153
188
  // Fetch news results (DDG only — Brave news is not supported via HTML scraping)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "webpeel",
3
- "version": "0.21.20",
3
+ "version": "0.21.22",
4
4
  "description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
5
5
  "author": "Jake Liu",
6
6
  "license": "AGPL-3.0-only",
@@ -58,7 +58,8 @@
58
58
  "lint": "tsc --noEmit",
59
59
  "prepublishOnly": "bash scripts/pre-publish.sh",
60
60
  "serve": "node dist/server/app.js",
61
- "mcp": "node dist/mcp/server.js"
61
+ "mcp": "node dist/mcp/server.js",
62
+ "version": "bash scripts/postversion.sh"
62
63
  },
63
64
  "repository": {
64
65
  "type": "git",