@iflow-mcp/jakeliume-webpeel 0.22.0 → 0.22.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,48 @@
1
+ /**
2
+ * WebPeel Typed Error System
3
+ *
4
+ * Every error thrown in the pipeline should use these types.
5
+ * The `retryable` flag tells the retry system whether to attempt again.
6
+ * The `code` enables smart routing, logging, and Sentry filtering.
7
+ */
8
+ export type ErrorCode = 'NETWORK_ERROR' | 'DNS_RESOLUTION_FAILED' | 'CONNECTION_REFUSED' | 'CONNECTION_RESET' | 'SSL_ERROR' | 'SOCKET_TIMEOUT' | 'HTTP_CLIENT_ERROR' | 'HTTP_SERVER_ERROR' | 'HTTP_TOO_MANY_REQUESTS' | 'TIMEOUT' | 'FETCH_TIMEOUT' | 'RENDER_TIMEOUT' | 'NAVIGATION_TIMEOUT' | 'BLOCKED' | 'BLOCKED_CLOUDFLARE' | 'BLOCKED_CAPTCHA' | 'BLOCKED_WAF' | 'BLOCKED_GEO' | 'BLOCKED_RATE_LIMIT' | 'BROWSER_CRASH' | 'BROWSER_OOM' | 'BROWSER_LAUNCH_FAILED' | 'BROWSER_CONTEXT_ERROR' | 'EMPTY_CONTENT' | 'INVALID_URL' | 'UNSUPPORTED_PROTOCOL' | 'ROBOTS_DENIED' | 'AUTH_REQUIRED' | 'PROXY_ERROR' | 'PROXY_EXHAUSTED' | 'CIRCUIT_OPEN' | 'MEMORY_LIMIT' | 'RATE_LIMITED' | 'JOB_TIMEOUT' | 'JOB_CANCELLED' | 'JOB_STALLED' | 'UNKNOWN';
9
+ export declare class WebPeelError extends Error {
10
+ readonly code: ErrorCode;
11
+ readonly retryable: boolean;
12
+ readonly statusCode: number;
13
+ readonly context?: Record<string, any>;
14
+ readonly timestamp: string;
15
+ constructor(code: ErrorCode, message: string, options?: {
16
+ retryable?: boolean;
17
+ statusCode?: number;
18
+ context?: Record<string, any>;
19
+ cause?: Error;
20
+ });
21
+ /** Serialize for transport across processes (worker ↔ API) */
22
+ serialize(): Record<string, any>;
23
+ /** Deserialize from JSON (e.g., from Bull job result) */
24
+ static deserialize(data: Record<string, any>): WebPeelError;
25
+ /** Convert a generic Error into the most appropriate WebPeelError */
26
+ static fromError(err: Error, fallbackCode?: ErrorCode): WebPeelError;
27
+ }
28
+ /** Helper factory functions for common errors */
29
+ export declare const Errors: {
30
+ timeout: (msg: string, ctx?: Record<string, any>) => WebPeelError;
31
+ fetchTimeout: (url: string, ms: number) => WebPeelError;
32
+ renderTimeout: (url: string, ms: number) => WebPeelError;
33
+ blocked: (url: string, reason?: string) => WebPeelError;
34
+ invalidUrl: (url: string) => WebPeelError;
35
+ networkError: (msg: string, cause?: Error) => WebPeelError;
36
+ proxyError: (msg: string) => WebPeelError;
37
+ proxyExhausted: () => WebPeelError;
38
+ circuitOpen: (name: string) => WebPeelError;
39
+ memoryLimit: (usage: number) => WebPeelError;
40
+ emptyContent: (url: string) => WebPeelError;
41
+ authRequired: (url: string) => WebPeelError;
42
+ rateLimited: (identifier: string) => WebPeelError;
43
+ jobCancelled: (jobId: string) => WebPeelError;
44
+ browserCrash: (msg: string) => WebPeelError;
45
+ unknown: (msg: string, cause?: Error) => WebPeelError;
46
+ };
47
+ /** Check if an error is retryable (works for both WebPeelError and generic Error) */
48
+ export declare function isRetryable(err: Error): boolean;
package/dist/errors.js ADDED
@@ -0,0 +1,128 @@
1
+ /**
2
+ * WebPeel Typed Error System
3
+ *
4
+ * Every error thrown in the pipeline should use these types.
5
+ * The `retryable` flag tells the retry system whether to attempt again.
6
+ * The `code` enables smart routing, logging, and Sentry filtering.
7
+ */
8
+ export class WebPeelError extends Error {
9
+ code;
10
+ retryable;
11
+ statusCode;
12
+ context;
13
+ timestamp;
14
+ constructor(code, message, options = {}) {
15
+ super(message);
16
+ this.name = 'WebPeelError';
17
+ this.code = code;
18
+ this.retryable = options.retryable ?? false;
19
+ this.statusCode = options.statusCode ?? 500;
20
+ this.context = options.context;
21
+ this.timestamp = new Date().toISOString();
22
+ if (options.cause) {
23
+ this.cause = options.cause;
24
+ }
25
+ }
26
+ /** Serialize for transport across processes (worker ↔ API) */
27
+ serialize() {
28
+ return {
29
+ name: this.name,
30
+ code: this.code,
31
+ message: this.message,
32
+ retryable: this.retryable,
33
+ statusCode: this.statusCode,
34
+ context: this.context,
35
+ timestamp: this.timestamp,
36
+ stack: this.stack,
37
+ };
38
+ }
39
+ /** Deserialize from JSON (e.g., from Bull job result) */
40
+ static deserialize(data) {
41
+ const err = new WebPeelError(data.code, data.message, {
42
+ retryable: data.retryable,
43
+ statusCode: data.statusCode,
44
+ context: data.context,
45
+ });
46
+ err.stack = data.stack;
47
+ return err;
48
+ }
49
+ /** Convert a generic Error into the most appropriate WebPeelError */
50
+ static fromError(err, fallbackCode = 'UNKNOWN') {
51
+ if (err instanceof WebPeelError)
52
+ return err;
53
+ const msg = err.message?.toLowerCase() || '';
54
+ // DNS errors
55
+ if (msg.includes('getaddrinfo') || msg.includes('enotfound') || msg.includes('dns')) {
56
+ return new WebPeelError('DNS_RESOLUTION_FAILED', err.message, { retryable: true, statusCode: 502, cause: err });
57
+ }
58
+ // Connection errors
59
+ if (msg.includes('econnrefused')) {
60
+ return new WebPeelError('CONNECTION_REFUSED', err.message, { retryable: true, statusCode: 502, cause: err });
61
+ }
62
+ if (msg.includes('econnreset') || msg.includes('socket hang up')) {
63
+ return new WebPeelError('CONNECTION_RESET', err.message, { retryable: true, statusCode: 502, cause: err });
64
+ }
65
+ // SSL errors
66
+ if (msg.includes('ssl') || msg.includes('tls') || msg.includes('certificate') || msg.includes('cert_')) {
67
+ return new WebPeelError('SSL_ERROR', err.message, { retryable: false, statusCode: 502, cause: err });
68
+ }
69
+ // Timeout errors
70
+ if (msg.includes('timeout') || msg.includes('etimedout') || msg.includes('timed out')) {
71
+ if (msg.includes('navigation'))
72
+ return new WebPeelError('NAVIGATION_TIMEOUT', err.message, { retryable: true, statusCode: 504, cause: err });
73
+ if (msg.includes('render') || msg.includes('browser'))
74
+ return new WebPeelError('RENDER_TIMEOUT', err.message, { retryable: true, statusCode: 504, cause: err });
75
+ return new WebPeelError('FETCH_TIMEOUT', err.message, { retryable: true, statusCode: 504, cause: err });
76
+ }
77
+ // Browser errors
78
+ if (msg.includes('browser') && (msg.includes('crash') || msg.includes('killed'))) {
79
+ return new WebPeelError('BROWSER_CRASH', err.message, { retryable: true, statusCode: 500, cause: err });
80
+ }
81
+ if (msg.includes('browsertype.launch') || msg.includes('failed to launch')) {
82
+ return new WebPeelError('BROWSER_LAUNCH_FAILED', err.message, { retryable: true, statusCode: 500, cause: err });
83
+ }
84
+ // Blocked
85
+ if (msg.includes('cloudflare') || msg.includes('cf-') || msg.includes('challenge')) {
86
+ return new WebPeelError('BLOCKED_CLOUDFLARE', err.message, { retryable: true, statusCode: 403, cause: err });
87
+ }
88
+ if (msg.includes('captcha') || msg.includes('recaptcha')) {
89
+ return new WebPeelError('BLOCKED_CAPTCHA', err.message, { retryable: false, statusCode: 403, cause: err });
90
+ }
91
+ if (msg.includes('blocked') || msg.includes('forbidden') || msg.includes('access denied')) {
92
+ return new WebPeelError('BLOCKED', err.message, { retryable: true, statusCode: 403, cause: err });
93
+ }
94
+ return new WebPeelError(fallbackCode, err.message, { retryable: false, statusCode: 500, cause: err });
95
+ }
96
+ }
97
+ /** Helper factory functions for common errors */
98
+ export const Errors = {
99
+ timeout: (msg, ctx) => new WebPeelError('TIMEOUT', msg, { retryable: true, statusCode: 504, context: ctx }),
100
+ fetchTimeout: (url, ms) => new WebPeelError('FETCH_TIMEOUT', `Fetch timed out after ${ms}ms: ${url}`, { retryable: true, statusCode: 504, context: { url, timeoutMs: ms } }),
101
+ renderTimeout: (url, ms) => new WebPeelError('RENDER_TIMEOUT', `Render timed out after ${ms}ms: ${url}`, { retryable: true, statusCode: 504, context: { url, timeoutMs: ms } }),
102
+ blocked: (url, reason) => new WebPeelError('BLOCKED', `Blocked: ${url}${reason ? ` (${reason})` : ''}`, { retryable: true, statusCode: 403, context: { url, reason } }),
103
+ invalidUrl: (url) => new WebPeelError('INVALID_URL', `Invalid URL: ${url}`, { retryable: false, statusCode: 400, context: { url } }),
104
+ networkError: (msg, cause) => new WebPeelError('NETWORK_ERROR', msg, { retryable: true, statusCode: 502, cause }),
105
+ proxyError: (msg) => new WebPeelError('PROXY_ERROR', msg, { retryable: true, statusCode: 502 }),
106
+ proxyExhausted: () => new WebPeelError('PROXY_EXHAUSTED', 'All proxy attempts exhausted', { retryable: false, statusCode: 502 }),
107
+ circuitOpen: (name) => new WebPeelError('CIRCUIT_OPEN', `Circuit breaker open: ${name}`, { retryable: false, statusCode: 503 }),
108
+ memoryLimit: (usage) => new WebPeelError('MEMORY_LIMIT', `Memory usage too high: ${(usage * 100).toFixed(1)}%`, { retryable: false, statusCode: 503, context: { memoryPct: usage } }),
109
+ emptyContent: (url) => new WebPeelError('EMPTY_CONTENT', `No content extracted from ${url}`, { retryable: false, statusCode: 422, context: { url } }),
110
+ authRequired: (url) => new WebPeelError('AUTH_REQUIRED', `Authentication required: ${url}`, { retryable: false, statusCode: 403, context: { url } }),
111
+ rateLimited: (identifier) => new WebPeelError('RATE_LIMITED', `Rate limit exceeded for ${identifier}`, { retryable: false, statusCode: 429 }),
112
+ jobCancelled: (jobId) => new WebPeelError('JOB_CANCELLED', `Job ${jobId} was cancelled`, { retryable: false, context: { jobId } }),
113
+ browserCrash: (msg) => new WebPeelError('BROWSER_CRASH', msg, { retryable: true, statusCode: 500 }),
114
+ unknown: (msg, cause) => new WebPeelError('UNKNOWN', msg, { retryable: false, statusCode: 500, cause }),
115
+ };
116
+ /** Check if an error is retryable (works for both WebPeelError and generic Error) */
117
+ export function isRetryable(err) {
118
+ if (err instanceof WebPeelError)
119
+ return err.retryable;
120
+ // Heuristic for generic errors
121
+ const msg = err.message?.toLowerCase() || '';
122
+ return (msg.includes('timeout') ||
123
+ msg.includes('econnreset') ||
124
+ msg.includes('econnrefused') ||
125
+ msg.includes('socket hang up') ||
126
+ msg.includes('getaddrinfo') ||
127
+ msg.includes('network'));
128
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@iflow-mcp/jakeliume-webpeel",
3
- "version": "0.22.0",
3
+ "version": "0.22.2",
4
4
  "description": "The web data layer for AI agents — fetch, search, crawl, extract, screenshot, and monitor the web with 55+ domain extractors and MCP.",
5
5
  "author": "Jake Liu",
6
6
  "license": "SEE LICENSE IN LICENSE",
@@ -48,6 +48,8 @@
48
48
  "dist/cli-auth.js",
49
49
  "dist/cli-auth.d.ts",
50
50
  "dist/cache.js",
51
+ "dist/errors.js",
52
+ "dist/errors.d.ts",
51
53
  "dist/cache.d.ts"
52
54
  ],
53
55
  "scripts": {