webpeel 0.21.16 → 0.21.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -148,6 +148,17 @@ export async function runFetch(url, options) {
148
148
  }
149
149
  process.exit(0);
150
150
  }
151
+ // --- #4b: Read URL from stdin (pipe mode) if no URL argument provided ---
152
+ if ((!url || url.trim() === '') && !process.stdin.isTTY) {
153
+ try {
154
+ const stdinData = await readStdin();
155
+ const stdinUrl = stdinData.trim().split('\n')[0].trim();
156
+ if (stdinUrl && (stdinUrl.startsWith('http://') || stdinUrl.startsWith('https://'))) {
157
+ url = stdinUrl;
158
+ }
159
+ }
160
+ catch { /* ignore stdin read errors */ }
161
+ }
151
162
  // --- #5: Concise error for missing URL (no help dump) ---
152
163
  if (!url || url.trim() === '') {
153
164
  if (isJson) {
@@ -158,6 +158,13 @@ function heuristicExtractString(fieldName, content, pageUrl) {
158
158
  if (pageUrl)
159
159
  return pageUrl;
160
160
  }
161
+ // Creator / designer / founder / inventor
162
+ if (/creator|designer|founder|inventor|invented_by|created_by/.test(lf)) {
163
+ const m = content.match(/(?:created?|designed?|founded?|invented?)\s+by\s+([A-Z][^\n,·|–—]+?)(?:\s*[,·|–—]|\s+in\s+\d{4}|\.)/i)
164
+ ?? content.match(/(?:creator|designer|founder|inventor)[:\s]+([A-Z][^\n,·|]+?)(?:\s*[,·|–—]|\.)/i);
165
+ if (m?.[1])
166
+ return m[1].replace(/[*_`[\]]/g, '').trim().slice(0, 80);
167
+ }
161
168
  // Director (for movies/films)
162
169
  if (/director/.test(lf)) {
163
170
  const m = content.match(/Director[:\s*]+([^\n|,]+)/i) ?? content.match(/Directed by[:\s]+([^\n|,]+)/i);
@@ -312,12 +319,23 @@ function heuristicExtractNumber(fieldName, content) {
312
319
  }
313
320
  // Year
314
321
  if (/year/.test(lf)) {
315
- // Match 4-digit years (1900-2099), prefer explicit "Year: YYYY" pattern first
322
+ // Explicit "Year: YYYY" label first
316
323
  const explicit = content.match(/\bYear[:\s]+(\d{4})\b/i);
317
324
  if (explicit?.[1]) {
318
325
  const n = parseInt(explicit[1]);
319
326
  return isNaN(n) ? null : n;
320
327
  }
328
+ // For "created_year" / "founded_year" / "released_year" — look for context
329
+ if (/creat|found|release|launch|start|born|inception/.test(lf)) {
330
+ const ctxMatch = content.match(/(?:created?|founded?|released?|launched?|started?|born|inception)[^\d]*(\b(?:19|20)\d{2}\b)/i)
331
+ ?? content.match(/\b(?:in|year)\s+(\b(?:19|20)\d{2}\b)/i)
332
+ ?? content.match(/(\b(?:19|20)\d{2}\b)/);
333
+ if (ctxMatch?.[1]) {
334
+ const n = parseInt(ctxMatch[1]);
335
+ return isNaN(n) ? null : n;
336
+ }
337
+ }
338
+ // Fallback: first year found
321
339
  const m = content.match(/\b((?:19|20)\d{2})\b/);
322
340
  if (m?.[1]) {
323
341
  const n = parseInt(m[1]);
@@ -155,8 +155,11 @@ export function createApp(config = {}) {
155
155
  return callback(null, true);
156
156
  if (corsOrigins.includes(origin))
157
157
  return callback(null, origin);
158
- // Unknown origins: allow (API key clients need cross-origin access) but no credentials
159
- return callback(null, true);
158
+ // Unknown origins: allow (API key clients need cross-origin access) but no credentials.
159
+ // SECURITY: Return '*' instead of reflecting the origin — wildcard is incompatible with
160
+ // credentials (browsers reject Allow-Credentials + *), prevents origin-specific CORS caching,
161
+ // and avoids security-scanner false positives from reflected origins.
162
+ return callback(null, '*');
160
163
  },
161
164
  // credentials: set conditionally via post-cors middleware below
162
165
  credentials: false,
@@ -4,7 +4,7 @@
4
4
  */
5
5
  export interface ApiKeyInfo {
6
6
  key: string;
7
- tier: 'free' | 'starter' | 'pro' | 'enterprise' | 'max';
7
+ tier: 'free' | 'starter' | 'pro' | 'enterprise' | 'max' | 'admin';
8
8
  rateLimit: number;
9
9
  accountId?: string;
10
10
  createdAt: Date;
@@ -127,8 +127,8 @@ export function createAuthMiddleware(authStore) {
127
127
  }
128
128
  return;
129
129
  }
130
- // Check limits (only for PostgresAuthStore)
131
- if (authStore instanceof PostgresAuthStore) {
130
+ // Check limits (only for PostgresAuthStore, skip for admin tier)
131
+ if (authStore instanceof PostgresAuthStore && keyInfo?.tier !== 'admin') {
132
132
  // HARD LIMIT: Check burst limit first (per-hour cap)
133
133
  const { allowed: burstAllowed, burst } = await authStore.checkBurstLimit(apiKey);
134
134
  if (!burstAllowed) {
@@ -25,6 +25,21 @@ export function validateUrlForSSRF(urlString) {
25
25
  if (localhostPatterns.some(pattern => hostname === pattern || hostname.endsWith('.' + pattern))) {
26
26
  throw new SSRFError('Cannot fetch localhost, private networks, or non-HTTP URLs');
27
27
  }
28
+ // SECURITY: Block well-known cloud metadata service hostnames.
29
+ // These hostnames resolve to link-local IPs (169.254.x.x) which are blocked
30
+ // by IP, but hostname-level blocking provides defense-in-depth against DNS
31
+ // rebinding attacks where a domain transiently resolves to a valid IP during
32
+ // validation, then resolves to a private IP for the actual fetch.
33
+ const metadataHostnames = [
34
+ 'metadata.google.internal', // GCP: resolves to 169.254.169.254
35
+ 'metadata.goog', // GCP alternate
36
+ 'metadata.internal', // Generic internal
37
+ 'instance-data.ec2.internal', // AWS alternate
38
+ 'computeMetadata', // Partial GCP hostname
39
+ ];
40
+ if (metadataHostnames.some(m => hostname === m || hostname.endsWith('.' + m))) {
41
+ throw new SSRFError('Cannot fetch localhost, private networks, or non-HTTP URLs');
42
+ }
28
43
  // Parse and validate IP addresses
29
44
  const ipv4Info = parseIPv4(hostname);
30
45
  if (ipv4Info) {
@@ -14,6 +14,8 @@ const TIER_LIMITS = {
14
14
  free: { weekly_limit: 500, burst_limit: 50, rate_limit: 10 },
15
15
  pro: { weekly_limit: 1250, burst_limit: 100, rate_limit: 60 },
16
16
  max: { weekly_limit: 6250, burst_limit: 500, rate_limit: 200 },
17
+ admin: { weekly_limit: 100000, burst_limit: 10000, rate_limit: 1000 },
18
+ enterprise: { weekly_limit: 50000, burst_limit: 2000, rate_limit: 500 },
17
19
  };
18
20
  /**
19
21
  * Create Stripe Billing Portal router
package/llms.txt CHANGED
@@ -39,7 +39,7 @@ webpeel mcp Start MCP server
39
39
  - Quick answers: Ask questions about any page (no LLM needed)
40
40
  - Anti-bot handling: Stealth mode, proxy rotation, graceful degradation
41
41
  - Format options: markdown, text, html, clean (AI-optimized)
42
- - MCP server: 18 tools for AI agent integration
42
+ - MCP server: 7 tools for AI agent integration
43
43
  - Site search: Search eBay, Amazon, GitHub, and 20+ sites with structured output
44
44
 
45
45
  ## Formats
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "webpeel",
3
- "version": "0.21.16",
3
+ "version": "0.21.18",
4
4
  "description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
5
5
  "author": "Jake Liu",
6
6
  "license": "AGPL-3.0-only",