webpeel 0.20.7 → 0.20.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,7 @@
3
3
  */
4
4
  import { handleLogin, handleLogout, handleUsage, loadConfig, saveConfig } from '../../cli-auth.js';
5
5
  import { clearCache, cacheStats } from '../../cache.js';
6
+ import { loginToProfile } from '../../core/profiles.js';
6
7
  import { cliVersion } from '../utils.js';
7
8
  export function registerAuthCommands(program) {
8
9
  // ── auth command ──────────────────────────────────────────────────────────
@@ -254,13 +255,37 @@ export function registerAuthCommands(program) {
254
255
  console.log(' Try: webpeel "https://news.ycombinator.com" --json');
255
256
  });
256
257
  // ── login command ─────────────────────────────────────────────────────────
258
+ // Two modes:
259
+ // webpeel login — interactive API key authentication (existing)
260
+ // webpeel login <domain> — browser login: open site, log in, save cookies as profile
257
261
  program
258
- .command('login')
259
- .description('Authenticate the CLI with your API key')
260
- .action(async () => {
262
+ .command('login [domain]')
263
+ .description('Authenticate: no args = API key auth; with domain = browser login (saves cookies as a named profile)')
264
+ .option('--profile <name>', 'Profile name to save under (defaults to the domain)')
265
+ .action(async (domain, opts) => {
261
266
  try {
262
- await handleLogin();
263
- process.exit(0);
267
+ if (domain) {
268
+ // ── Browser login mode ──────────────────────────────────────────
269
+ const url = domain.startsWith('http') ? domain : `https://${domain}`;
270
+ // Extract hostname for profile name default (e.g. "instagram.com" from "https://www.instagram.com/")
271
+ let defaultProfileName;
272
+ try {
273
+ const hostname = new URL(url).hostname;
274
+ // Strip "www." prefix for cleaner profile names
275
+ defaultProfileName = hostname.replace(/^www\./, '');
276
+ }
277
+ catch {
278
+ defaultProfileName = domain;
279
+ }
280
+ const profileName = opts.profile || defaultProfileName;
281
+ await loginToProfile(url, profileName);
282
+ process.exit(0);
283
+ }
284
+ else {
285
+ // ── API key auth mode (original behavior) ───────────────────────
286
+ await handleLogin();
287
+ process.exit(0);
288
+ }
264
289
  }
265
290
  catch (error) {
266
291
  console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
@@ -310,6 +310,7 @@ export function registerInteractCommands(program) {
310
310
  .option('--schema <json>', 'Schema template name (e.g. product, article) or JSON schema for structured output')
311
311
  .option('-s, --silent', 'Silent mode (no spinner)')
312
312
  .option('--json', 'Output as JSON')
313
+ .option('--stream', 'Stream progress via SSE (calls API endpoint, requires API key)')
313
314
  .action(async (prompt, options) => {
314
315
  const llmApiKey = options.llmKey || process.env.OPENAI_API_KEY;
315
316
  const urls = options.urls ? options.urls.split(',').map((u) => u.trim()) : undefined;
@@ -14,7 +14,8 @@ export interface ProfileMetadata {
14
14
  description?: string;
15
15
  }
16
16
  /**
17
- * Valid profile names: letters, digits, hyphens only. No spaces or special chars.
17
+ * Valid profile names: letters, digits, hyphens, and dots. No spaces or special chars.
18
+ * Dots are allowed so domain names like "instagram.com" work as profile names.
18
19
  */
19
20
  export declare function isValidProfileName(name: string): boolean;
20
21
  /**
@@ -45,3 +46,16 @@ export declare function deleteProfile(name: string): boolean;
45
46
  * 3. On browser close or Ctrl+C, captures storage state and saves the profile
46
47
  */
47
48
  export declare function createProfile(name: string, description?: string): Promise<void>;
49
+ /**
50
+ * Open a headed browser, navigate to `url`, and wait for the user to log in.
51
+ * Pressing Enter (or closing the browser) saves the session as a named profile.
52
+ *
53
+ * Unlike `createProfile()` (which opens to about:blank and waits for browser close),
54
+ * this function:
55
+ * 1. Navigates directly to the given URL on launch
56
+ * 2. Waits for the user to press Enter (or close the browser) to save
57
+ * 3. Saves storage state AND creates metadata under ~/.webpeel/profiles/<name>/
58
+ *
59
+ * Profile names may contain letters, digits, hyphens, and dots (e.g. "instagram.com").
60
+ */
61
+ export declare function loginToProfile(url: string, profileName: string, description?: string): Promise<void>;
@@ -19,10 +19,11 @@ function ensureProfilesDir() {
19
19
  }
20
20
  // ─── Name validation ─────────────────────────────────────────────────────────
21
21
  /**
22
- * Valid profile names: letters, digits, hyphens only. No spaces or special chars.
22
+ * Valid profile names: letters, digits, hyphens, and dots. No spaces or special chars.
23
+ * Dots are allowed so domain names like "instagram.com" work as profile names.
23
24
  */
24
25
  export function isValidProfileName(name) {
25
- return /^[a-zA-Z0-9-]+$/.test(name) && name.length > 0 && name.length <= 64;
26
+ return /^[a-zA-Z0-9\-.]+$/.test(name) && name.length > 0 && name.length <= 64;
26
27
  }
27
28
  // ─── Core helpers ─────────────────────────────────────────────────────────────
28
29
  /**
@@ -213,3 +214,137 @@ export async function createProfile(name, description) {
213
214
  });
214
215
  });
215
216
  }
217
+ // ─── Browser-based login helper ───────────────────────────────────────────────
218
+ /**
219
+ * Open a headed browser, navigate to `url`, and wait for the user to log in.
220
+ * Pressing Enter (or closing the browser) saves the session as a named profile.
221
+ *
222
+ * Unlike `createProfile()` (which opens to about:blank and waits for browser close),
223
+ * this function:
224
+ * 1. Navigates directly to the given URL on launch
225
+ * 2. Waits for the user to press Enter (or close the browser) to save
226
+ * 3. Saves storage state AND creates metadata under ~/.webpeel/profiles/<name>/
227
+ *
228
+ * Profile names may contain letters, digits, hyphens, and dots (e.g. "instagram.com").
229
+ */
230
+ export async function loginToProfile(url, profileName, description) {
231
+ if (!isValidProfileName(profileName)) {
232
+ throw new Error(`Invalid profile name "${profileName}". Use only letters, numbers, hyphens, and dots (no spaces).`);
233
+ }
234
+ ensureProfilesDir();
235
+ const profileDir = path.join(PROFILES_DIR, profileName);
236
+ const isUpdate = existsSync(profileDir) && existsSync(path.join(profileDir, 'metadata.json'));
237
+ mkdirSync(profileDir, { recursive: true });
238
+ const browser = await chromium.launch({ headless: false });
239
+ const context = await browser.newContext();
240
+ const page = await context.newPage();
241
+ try {
242
+ await page.goto(url);
243
+ }
244
+ catch (e) {
245
+ // Non-fatal — browser is open, user can navigate manually
246
+ if (process.env.DEBUG)
247
+ console.debug('[webpeel]', 'initial navigation error:', e instanceof Error ? e.message : e);
248
+ }
249
+ console.log('');
250
+ console.log('╔══════════════════════════════════════════════════════╗');
251
+ console.log(`║ WebPeel Browser Login`);
252
+ console.log(`║ URL: ${url}`);
253
+ console.log(`║ Profile: ${profileName}`);
254
+ console.log('║ ║');
255
+ console.log('║ Log in, then press Enter here to save your session. ║');
256
+ console.log('║ (Or close the browser window — same effect.) ║');
257
+ console.log('╚══════════════════════════════════════════════════════╝');
258
+ console.log('');
259
+ let saved = false;
260
+ const saveAndClose = async () => {
261
+ if (saved)
262
+ return;
263
+ saved = true;
264
+ console.log('\nCapturing browser session...');
265
+ try {
266
+ const storageState = await context.storageState();
267
+ writeFileSync(path.join(profileDir, 'storage-state.json'), JSON.stringify(storageState, null, 2));
268
+ // Extract unique domains from cookies (strip leading dot)
269
+ const domains = [
270
+ ...new Set((storageState.cookies ?? [])
271
+ .map((c) => (c.domain ?? '').replace(/^\./, ''))
272
+ .filter(Boolean)),
273
+ ];
274
+ const now = new Date().toISOString();
275
+ const meta = isUpdate
276
+ ? {
277
+ // Preserve original creation date on update
278
+ ...((() => {
279
+ try {
280
+ return JSON.parse(readFileSync(path.join(profileDir, 'metadata.json'), 'utf-8'));
281
+ }
282
+ catch {
283
+ return {};
284
+ }
285
+ })()),
286
+ name: profileName,
287
+ lastUsed: now,
288
+ domains,
289
+ ...(description ? { description } : {}),
290
+ }
291
+ : {
292
+ name: profileName,
293
+ created: now,
294
+ lastUsed: now,
295
+ domains,
296
+ ...(description ? { description } : {}),
297
+ };
298
+ writeFileSync(path.join(profileDir, 'metadata.json'), JSON.stringify(meta, null, 2));
299
+ console.log(`✅ Profile "${profileName}" ${isUpdate ? 'updated' : 'saved'}!`);
300
+ if (domains.length > 0) {
301
+ console.log(` Domains: ${domains.join(', ')}`);
302
+ }
303
+ else {
304
+ console.log(' No login sessions detected (no cookies captured).');
305
+ console.log(' Make sure you completed the login before pressing Enter.');
306
+ }
307
+ console.log('');
308
+ console.log(` Use with: webpeel "${url}" --profile ${profileName}`);
309
+ }
310
+ catch (e) {
311
+ console.error('Warning: Failed to save storage state:', e instanceof Error ? e.message : String(e));
312
+ // Clean up partial directory if this was a new profile
313
+ if (!isUpdate) {
314
+ try {
315
+ rmSync(profileDir, { recursive: true, force: true });
316
+ }
317
+ catch {
318
+ // ignore cleanup errors
319
+ }
320
+ }
321
+ }
322
+ try {
323
+ await browser.close();
324
+ }
325
+ catch {
326
+ // ignore close errors
327
+ }
328
+ };
329
+ // Three ways to save: Enter key, browser close, or Ctrl+C
330
+ await new Promise((resolve) => {
331
+ let resolved = false;
332
+ const done = async () => {
333
+ if (resolved)
334
+ return;
335
+ resolved = true;
336
+ await saveAndClose();
337
+ resolve();
338
+ };
339
+ // Wait for Enter key on stdin
340
+ if (process.stdin.isTTY) {
341
+ process.stdin.setRawMode(false);
342
+ }
343
+ process.stdin.resume();
344
+ process.stdin.once('data', () => done());
345
+ // Browser closed by user
346
+ browser.on('disconnected', () => done());
347
+ // Ctrl+C
348
+ process.once('SIGINT', () => done());
349
+ });
350
+ }
@@ -14,6 +14,7 @@
14
14
  * Returns: { success, data|answer, sources, method, elapsed, tokensUsed }
15
15
  *
16
16
  * Webhook support: pass `webhook` URL to get async delivery with HMAC-SHA256 signing.
17
+ * Streaming support: pass `stream: true` to get SSE events instead of polling.
17
18
  *
18
19
  * 5-minute in-memory cache. Max 10 sources per request.
19
20
  */
@@ -14,6 +14,7 @@
14
14
  * Returns: { success, data|answer, sources, method, elapsed, tokensUsed }
15
15
  *
16
16
  * Webhook support: pass `webhook` URL to get async delivery with HMAC-SHA256 signing.
17
+ * Streaming support: pass `stream: true` to get SSE events instead of polling.
17
18
  *
18
19
  * 5-minute in-memory cache. Max 10 sources per request.
19
20
  */
@@ -81,8 +82,14 @@ function setCache(key, result) {
81
82
  }
82
83
  cache.set(key, { result, expiresAt: Date.now() + CACHE_TTL });
83
84
  }
85
+ // ---------------------------------------------------------------------------
86
+ // SSE helpers
87
+ // ---------------------------------------------------------------------------
88
+ function sseWrite(res, event, data) {
89
+ res.write(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`);
90
+ }
84
91
  async function runAgentQuery(params) {
85
- const { prompt, schema, llmApiKey, llmProvider, llmModel, urls, sources: maxSources } = params;
92
+ const { prompt, schema, llmApiKey, llmProvider, llmModel, urls, sources: maxSources, onSearching, onFetching, onExtracting } = params;
86
93
  const startMs = Date.now();
87
94
  const numSources = Math.min(maxSources || 5, 10);
88
95
  // Cache check
@@ -97,6 +104,8 @@ async function runAgentQuery(params) {
97
104
  }
98
105
  else {
99
106
  log.info(`Searching web for: "${prompt}"`);
107
+ if (onSearching)
108
+ onSearching();
100
109
  const { provider, apiKey: searchApiKey } = getBestSearchProvider();
101
110
  try {
102
111
  const searchResults = await provider.searchWeb(prompt.trim(), { count: numSources, apiKey: searchApiKey });
@@ -111,6 +120,8 @@ async function runAgentQuery(params) {
111
120
  }
112
121
  // Step 2: Fetch pages in parallel
113
122
  log.info(`Fetching ${sourceUrls.length} sources in parallel`);
123
+ if (onFetching)
124
+ onFetching(sourceUrls.length);
114
125
  const PER_SOURCE_TIMEOUT_MS = 5000;
115
126
  const fetchPromises = sourceUrls.map(async (source) => {
116
127
  try {
@@ -136,6 +147,8 @@ async function runAgentQuery(params) {
136
147
  let result;
137
148
  if (schema && llmApiKey) {
138
149
  log.info('Using LLM extraction');
150
+ if (onExtracting)
151
+ onExtracting('llm');
139
152
  const extracted = await extractWithLLM({
140
153
  content: combinedContent.slice(0, 30000), schema, llmApiKey, llmProvider: (llmProvider || 'openai'), llmModel,
141
154
  prompt: `Based on these web pages, ${prompt}`, url: fetchResults[0].url,
@@ -146,6 +159,8 @@ async function runAgentQuery(params) {
146
159
  }
147
160
  else {
148
161
  log.info('Using BM25 text extraction');
162
+ if (onExtracting)
163
+ onExtracting('bm25');
149
164
  const qa = quickAnswer({ question: prompt, content: combinedContent, maxPassages: 3, maxChars: 2000 });
150
165
  result = { success: true, answer: qa.answer || combinedContent.slice(0, 2000), confidence: qa.confidence ?? 0,
151
166
  sources: fetchResults.map((r) => ({ url: r.url, title: r.title })), method: 'agent-bm25', tokensUsed: totalTokens, elapsed: Date.now() - startMs };
@@ -158,9 +173,9 @@ async function runAgentQuery(params) {
158
173
  // ---------------------------------------------------------------------------
159
174
  export function createAgentRouter() {
160
175
  const router = Router();
161
- // ── POST /v1/agent — single query (with optional webhook) ──────────────
176
+ // ── POST /v1/agent — single query (with optional webhook or stream) ──────
162
177
  router.post('/', async (req, res) => {
163
- const { prompt, schema, llmApiKey, llmProvider, llmModel, urls, sources: maxSources, webhook } = req.body || {};
178
+ const { prompt, schema, llmApiKey, llmProvider, llmModel, urls, sources: maxSources, webhook, stream } = req.body || {};
164
179
  const requestId = req.requestId || crypto.randomUUID();
165
180
  if (!prompt?.trim()) {
166
181
  return res.status(400).json({
@@ -170,6 +185,34 @@ export function createAgentRouter() {
170
185
  requestId,
171
186
  });
172
187
  }
188
+ // ── Streaming mode (SSE) ─────────────────────────────────────────────
189
+ if (stream === true) {
190
+ res.setHeader('Content-Type', 'text/event-stream');
191
+ res.setHeader('Cache-Control', 'no-cache');
192
+ res.setHeader('Connection', 'keep-alive');
193
+ res.setHeader('X-Accel-Buffering', 'no');
194
+ res.flushHeaders();
195
+ try {
196
+ const result = await runAgentQuery({
197
+ prompt, schema, llmApiKey, llmProvider, llmModel, urls, sources: maxSources,
198
+ onSearching: () => {
199
+ sseWrite(res, 'searching', { message: 'Searching the web...' });
200
+ },
201
+ onFetching: (count) => {
202
+ sseWrite(res, 'fetching', { message: `Fetching ${count} sources...`, count });
203
+ },
204
+ onExtracting: (method) => {
205
+ sseWrite(res, 'extracting', { message: method === 'llm' ? 'Extracting with LLM...' : 'Analyzing with BM25...', method });
206
+ },
207
+ });
208
+ sseWrite(res, 'done', { ...result, requestId });
209
+ }
210
+ catch (err) {
211
+ sseWrite(res, 'error', { message: err.message || 'An unexpected error occurred', requestId });
212
+ }
213
+ res.end();
214
+ return;
215
+ }
173
216
  // Async mode: webhook provided → return immediately, deliver result later
174
217
  if (webhook) {
175
218
  const jobId = crypto.randomUUID();
@@ -198,7 +241,7 @@ export function createAgentRouter() {
198
241
  });
199
242
  // ── POST /v1/agent/batch — parallel batch queries ─────────────────────
200
243
  router.post('/batch', async (req, res) => {
201
- const { prompts, schema, llmApiKey, llmProvider, llmModel, sources, webhook } = req.body || {};
244
+ const { prompts, schema, llmApiKey, llmProvider, llmModel, sources, webhook, stream } = req.body || {};
202
245
  const requestId = req.requestId || crypto.randomUUID();
203
246
  if (!Array.isArray(prompts) || prompts.length === 0) {
204
247
  return res.status(400).json({
@@ -214,7 +257,57 @@ export function createAgentRouter() {
214
257
  const jobId = crypto.randomUUID();
215
258
  const job = { id: jobId, status: 'processing', total: prompts.length, completed: 0, results: [], webhook, createdAt: Date.now() };
216
259
  batchJobs.set(jobId, job);
217
- // Return immediately, then process in background
260
+ // ── Streaming mode (SSE) keep connection open ──────────────────────
261
+ if (stream === true) {
262
+ res.setHeader('Content-Type', 'text/event-stream');
263
+ res.setHeader('Cache-Control', 'no-cache');
264
+ res.setHeader('Connection', 'keep-alive');
265
+ res.setHeader('X-Accel-Buffering', 'no');
266
+ res.flushHeaders();
267
+ // Send start event
268
+ sseWrite(res, 'start', { id: jobId, total: prompts.length, requestId });
269
+ const sem = new Semaphore(5);
270
+ const tasks = prompts.map(async (prompt) => {
271
+ await sem.acquire();
272
+ try {
273
+ const result = await runAgentQuery({ prompt, schema, llmApiKey, llmProvider, llmModel, sources });
274
+ const entry = {
275
+ prompt,
276
+ success: !!result.success,
277
+ answer: result.answer,
278
+ data: result.data,
279
+ sources: result.sources,
280
+ method: result.method,
281
+ elapsed: result.elapsed,
282
+ };
283
+ job.results.push(entry);
284
+ job.completed++;
285
+ // Send per-prompt progress event
286
+ sseWrite(res, 'progress', { completed: job.completed, total: job.total, result: entry });
287
+ }
288
+ catch (err) {
289
+ const entry = { prompt, success: false, error: err.message };
290
+ job.results.push(entry);
291
+ job.completed++;
292
+ sseWrite(res, 'progress', { completed: job.completed, total: job.total, result: entry });
293
+ }
294
+ finally {
295
+ sem.release();
296
+ }
297
+ });
298
+ await Promise.allSettled(tasks);
299
+ job.status = 'completed';
300
+ // Send done event
301
+ sseWrite(res, 'done', { id: jobId, total: job.total, completed: job.completed, requestId });
302
+ res.end();
303
+ // Fire webhook if configured
304
+ if (webhook) {
305
+ sendWebhook(webhook, 'agent.batch.completed', { id: jobId, total: job.total, completed: job.completed, results: job.results })
306
+ .catch((err) => log.error('Batch webhook failed:', err.message));
307
+ }
308
+ return;
309
+ }
310
+ // Non-streaming mode: Return immediately, then process in background
218
311
  res.json({ success: true, id: jobId, status: 'processing', total: prompts.length, requestId });
219
312
  // Process in background with concurrency limit of 5
220
313
  // eslint-disable-next-line @typescript-eslint/no-floating-promises
@@ -11,12 +11,14 @@ import { Router } from 'express';
11
11
  import '../types.js'; // Augments Express.Request with requestId
12
12
  import { crawl } from '../../core/crawler.js';
13
13
  import { validateUrlForSSRF, SSRFError } from '../middleware/url-validator.js';
14
+ import crypto from 'crypto';
14
15
  export function createCrawlRouter(jobQueue) {
15
16
  const router = Router();
16
17
  /**
17
18
  * POST /v1/crawl
18
19
  *
19
20
  * Start an async crawl job. Returns a job ID immediately; poll GET /v1/crawl/:id for status.
21
+ * With stream:true, keeps the connection open and sends SSE events per page.
20
22
  *
21
23
  * Body:
22
24
  * url {string} Required. Starting URL.
@@ -26,10 +28,11 @@ export function createCrawlRouter(jobQueue) {
26
28
  * excludePatterns {string[]} Regex patterns — skip matching URLs.
27
29
  * formats {string[]} Content formats: 'markdown' | 'text' (default: ['markdown']).
28
30
  * webhook {object} Optional webhook to POST results to when done.
31
+ * stream {boolean} If true, respond with SSE events (start → progress → done).
29
32
  */
30
33
  router.post('/', async (req, res) => {
31
34
  try {
32
- const { url, maxPages = 10, maxDepth = 2, includePatterns = [], excludePatterns = [], webhook, } = req.body ?? {};
35
+ const { url, maxPages = 10, maxDepth = 2, includePatterns = [], excludePatterns = [], webhook, stream, } = req.body ?? {};
33
36
  // Validate URL
34
37
  if (!url || typeof url !== 'string') {
35
38
  res.status(400).json({
@@ -78,6 +81,76 @@ export function createCrawlRouter(jobQueue) {
78
81
  throw error;
79
82
  }
80
83
  const ownerId = req.auth?.keyInfo?.accountId;
84
+ // ── Streaming mode (SSE) — keep connection open ──────────────────────
85
+ if (stream === true) {
86
+ res.setHeader('Content-Type', 'text/event-stream');
87
+ res.setHeader('Cache-Control', 'no-cache');
88
+ res.setHeader('Connection', 'keep-alive');
89
+ res.setHeader('X-Accel-Buffering', 'no');
90
+ res.flushHeaders();
91
+ const jobId = crypto.randomUUID();
92
+ // Send start event (total unknown until crawl runs)
93
+ res.write(`event: start\ndata: ${JSON.stringify({ id: jobId, url, maxPages, requestId: req.requestId })}\n\n`);
94
+ const crawlOptions = {
95
+ maxPages,
96
+ maxDepth,
97
+ tier: req.auth?.tier,
98
+ onProgress: (progress) => {
99
+ const total = progress.crawled + progress.queued;
100
+ res.write(`event: progress\ndata: ${JSON.stringify({
101
+ id: jobId,
102
+ completed: progress.crawled,
103
+ total,
104
+ queued: progress.queued,
105
+ currentUrl: progress.currentUrl,
106
+ })}\n\n`);
107
+ },
108
+ };
109
+ if (Array.isArray(includePatterns) && includePatterns.length > 0) {
110
+ crawlOptions.includePatterns = includePatterns;
111
+ }
112
+ if (Array.isArray(excludePatterns) && excludePatterns.length > 0) {
113
+ crawlOptions.excludePatterns = excludePatterns;
114
+ }
115
+ try {
116
+ const results = await crawl(url, crawlOptions);
117
+ const data = results.map(r => ({
118
+ url: r.url,
119
+ title: r.title,
120
+ content: r.markdown,
121
+ links: r.links,
122
+ elapsed: r.elapsed,
123
+ }));
124
+ res.write(`event: done\ndata: ${JSON.stringify({
125
+ id: jobId,
126
+ total: results.length,
127
+ completed: results.length,
128
+ results: data,
129
+ requestId: req.requestId,
130
+ })}\n\n`);
131
+ // Fire webhook if configured
132
+ if (webhook) {
133
+ Promise.resolve(jobQueue.createJob('crawl', webhook, ownerId)).then((job) => {
134
+ jobQueue.updateJob(job.id, {
135
+ status: 'completed',
136
+ data,
137
+ total: results.length,
138
+ completed: results.length,
139
+ creditsUsed: results.length,
140
+ });
141
+ }).catch(() => { });
142
+ }
143
+ }
144
+ catch (error) {
145
+ res.write(`event: error\ndata: ${JSON.stringify({
146
+ id: jobId,
147
+ message: error.message || 'Crawl failed',
148
+ requestId: req.requestId,
149
+ })}\n\n`);
150
+ }
151
+ res.end();
152
+ return;
153
+ }
81
154
  const job = await jobQueue.createJob('crawl', webhook, ownerId);
82
155
  // Start crawl in background
83
156
  setImmediate(async () => {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "webpeel",
3
- "version": "0.20.7",
3
+ "version": "0.20.8",
4
4
  "description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
5
5
  "author": "Jake Liu",
6
6
  "license": "AGPL-3.0-only",