npm - webpeel - Versions diffs - 0.20.4 → 0.20.6 - Mend

webpeel 0.20.4 → 0.20.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/cli/commands/fetch.js +115 -10
package/dist/cli/commands/search.js +71 -4
package/dist/cli/utils.js +13 -6
package/dist/core/search-provider.d.ts +2 -2
package/dist/core/search-provider.js +52 -6
package/dist/core/strategies.js +6 -2
package/dist/server/routes/agent.d.ts +4 -4
package/dist/server/routes/agent.js +187 -170
package/dist/server/routes/session.d.ts +3 -1
package/dist/server/routes/session.js +160 -9
package/package.json +1 -1

package/dist/cli/commands/fetch.js CHANGED Viewed

@@ -7,12 +7,51 @@ import { getProfilePath, loadStorageState, touchProfile } from '../../core/profi
 import { peel, cleanup } from '../../index.js';
 import { checkUsage, showUsageFooter, loadConfig } from '../../cli-auth.js';
 import { getCache, setCache, parseTTL } from '../../cache.js';
-import { estimateTokens } from '../../core/markdown.js';
+import { estimateTokens, htmlToMarkdown } from '../../core/markdown.js';
 import { distillToBudget, budgetListings } from '../../core/budget.js';
 import { parseActions, formatError, fetchViaApi, outputResult, writeStdout, buildEnvelope, classifyErrorCode, formatListingsCsv, normaliseExtractedToRows, } from '../utils.js';
+// ─── readStdin ────────────────────────────────────────────────────────────────
+async function readStdin() {
+    const chunks = [];
+    for await (const chunk of process.stdin) {
+        chunks.push(Buffer.from(chunk));
+    }
+    return Buffer.concat(chunks).toString('utf-8');
+}
+// ─── runStdin ─────────────────────────────────────────────────────────────────
+// Read HTML from stdin, convert to markdown, and output
+async function runStdin(options) {
+    try {
+        const html = await readStdin();
+        if (!html.trim()) {
+            process.stderr.write('Error: No input received on stdin\n');
+            process.exit(1);
+        }
+        const markdown = htmlToMarkdown(html, { raw: false, prune: true });
+        if (options.json) {
+            const tokens = estimateTokens(markdown);
+            process.stdout.write(JSON.stringify({ success: true, content: markdown, tokens }) + '\n');
+        }
+        else {
+            process.stdout.write(markdown + '\n');
+        }
+    }
+    catch (err) {
+        process.stderr.write(`Error: ${err.message}\n`);
+        process.exit(1);
+    }
+}
 // ─── runFetch ─────────────────────────────────────────────────────────────────
 // Main fetch handler — shared with the `pipe` and `ask` subcommands
 export async function runFetch(url, options) {
+    // --content-only: override all output flags — we just want raw content
+    if (options.contentOnly) {
+        options.silent = true;
+        // Disable json/text/html — we output content directly
+        options.json = false;
+        options.html = false;
+        options.text = false;
+    }
     // Handle --format flag: maps to existing boolean flags
     if (options.format) {
         const fmt = options.format.toLowerCase();
@@ -30,9 +69,10 @@ export async function runFetch(url, options) {
     }
     // Smart defaults: when piped (not a TTY), default to silent JSON + budget
     // BUT respect explicit --format flag (user chose the output format)
+    // AND respect --content-only (raw content output, no JSON wrapper)
     const isPiped = !process.stdout.isTTY;
     const hasExplicitFormat = options.format && ['text', 'html', 'markdown', 'md'].includes(options.format.toLowerCase());
-    if (isPiped && !options.html && !options.text && !hasExplicitFormat) {
+    if (isPiped && !options.html && !options.text && !hasExplicitFormat && !options.contentOnly) {
         if (!options.json)
             options.json = true;
         if (!options.silent)
@@ -284,11 +324,38 @@ export async function runFetch(url, options) {
                     cachedResult.extracted = extractedCached;
                 }
             }
-            await outputResult(cachedResult, options, { cached: true });
+            if (options.contentOnly) {
+                await writeStdout(cachedResult.content + '\n');
+            }
+            else {
+                await outputResult(cachedResult, options, { cached: true });
+            }
             process.exit(0);
         }
     }
-    const spinner = options.silent ? null : ora('Fetching...').start();
+    // --progress: show escalation steps on stderr (overrides spinner)
+    let progressInterval;
+    const progressStart = Date.now();
+    if (options.progress) {
+        process.stderr.write(`[simple] Fetching ${url}...\n`);
+        // Show escalation hints based on elapsed time (best-effort approximations)
+        const progressSteps = [
+            { afterMs: 2500, message: '[simple] Waiting for response...' },
+            { afterMs: 6000, message: '[browser] Simple too slow — escalating to browser render...' },
+            { afterMs: 12000, message: '[browser] Rendering with Chromium...' },
+            { afterMs: 20000, message: '[stealth] Escalating to stealth mode...' },
+        ];
+        let stepIdx = 0;
+        progressInterval = setInterval(() => {
+            const elapsed = Date.now() - progressStart;
+            while (stepIdx < progressSteps.length && elapsed >= progressSteps[stepIdx].afterMs) {
+                process.stderr.write(`${progressSteps[stepIdx].message}\n`);
+                stepIdx++;
+            }
+        }, 500);
+    }
+    // Suppress spinner when --progress is active (progress lines replace it)
+    const spinner = (options.silent || options.progress) ? null : ora('Fetching...').start();
     try {
         // Validate options
         if (options.wait && (options.wait < 0 || options.wait > 60000)) {
@@ -528,7 +595,22 @@ export async function runFetch(url, options) {
         if (resolvedProfileName) {
             touchProfile(resolvedProfileName);
         }
-        if (spinner) {
+        // Stop progress interval and show final result
+        if (progressInterval) {
+            clearInterval(progressInterval);
+            progressInterval = undefined;
+        }
+        if (options.progress) {
+            const method = result.method || 'simple';
+            const elapsedSec = ((result.elapsed || (Date.now() - progressStart)) / 1000).toFixed(1);
+            const tokenCount = (result.tokens || 0).toLocaleString();
+            // Show escalation arrow if browser/stealth was needed
+            if (method !== 'simple') {
+                process.stderr.write(`[simple] → [${method}] escalated\n`);
+            }
+            process.stderr.write(`[${method}] Done — ${tokenCount} tokens in ${elapsedSec}s\n`);
+        }
+        else if (spinner) {
             const domainTag = result.domainData
                 ? ` [${result.domainData.domain}:${result.domainData.type}]`
                 : '';
@@ -866,11 +948,27 @@ export async function runFetch(url, options) {
                     result.extracted = extracted;
                 }
             }
-            // Output results (default path)
-            await outputResult(result, options, {
-                cached: false,
-                truncated: contentTruncated || undefined,
-            });
+            // --content-only: output raw content only, no wrapper
+            if (options.contentOnly) {
+                await writeStdout(result.content + '\n');
+            }
+            else {
+                // Output results (default path)
+                await outputResult(result, options, {
+                    cached: false,
+                    truncated: contentTruncated || undefined,
+                });
+                // Token savings display (our unique selling point)
+                if (!options.json && !options.silent && result.tokenSavingsPercent) {
+                    const savings = result.tokenSavingsPercent;
+                    const raw = result.rawTokenEstimate;
+                    const optimized = result.tokens || 0;
+                    if (savings > 0) {
+                        const rawStr = raw ? `${raw.toLocaleString()}→${optimized.toLocaleString()} tokens` : `${optimized.toLocaleString()} tokens`;
+                        process.stderr.write(`\x1b[32m💰 Token savings: ${savings}% smaller than raw HTML (${rawStr})\x1b[0m\n`);
+                    }
+                }
+            }
         }
         // Clean up and exit
         await cleanup();
@@ -976,7 +1074,14 @@ export function registerFetchCommands(program) {
         .option('--wait-selector <css>', 'Wait for CSS selector before extracting (auto-enables --render)')
         .option('--block-resources <types>', 'Block resource types, comma-separated: image,stylesheet,font,media,script (auto-enables --render)')
         .option('--format <type>', 'Output format: markdown (default), text, html, json')
+        .option('--content-only', 'Output only the raw content field (no metadata, no JSON wrapper) — ideal for piping to LLMs')
+        .option('--progress', 'Show engine escalation steps (simple → browser → stealth) with timing')
+        .option('--stdin', 'Read HTML from stdin instead of fetching a URL — converts to markdown')
         .action(async (url, options) => {
+        if (options.stdin) {
+            await runStdin(options);
+            return;
+        }
         await runFetch(url, options);
     });
     // ── read subcommand (explicit readable mode) ─────────────────────────────

package/dist/cli/commands/search.js CHANGED Viewed

@@ -23,6 +23,7 @@ export function registerSearchCommands(program) {
         .option('--budget <n>', 'Token budget for site-search result content', parseInt)
         .option('-s, --silent', 'Silent mode')
         .option('--proxy <url>', 'Proxy URL for requests (http://host:port, socks5://user:pass@host:port)')
+        .option('--fetch', 'Also fetch and include content from each result URL')
         .option('--agent', 'Agent mode: sets --json, --silent, and --budget 4000 (override with --budget N)')
         .action(async (query, options) => {
         // --agent sets sensible defaults for AI agents; explicit flags override
@@ -178,9 +179,61 @@ export function registerSearchCommands(program) {
             const searchData = await searchRes.json();
             // API returns { success: true, data: { web: [...] } } or { results: [...] }
             let results = searchData.data?.web || searchData.data?.results || searchData.results || [];
+            // Client-side ad filtering: remove DuckDuckGo ads that slip through the server
+            results = results.filter(r => {
+                // Filter DDG-internal URLs
+                try {
+                    const parsed = new URL(r.url);
+                    if (parsed.hostname === 'duckduckgo.com')
+                        return false;
+                    if (parsed.searchParams.has('ad_domain') ||
+                        parsed.searchParams.has('ad_provider') ||
+                        parsed.searchParams.has('ad_type'))
+                        return false;
+                }
+                catch {
+                    return false;
+                }
+                // Filter ad snippets
+                if (r.snippet && (r.snippet.includes('Ad ·') ||
+                    r.snippet.includes('Ad Viewing ads is privacy protected by DuckDuckGo') ||
+                    r.snippet.toLowerCase().startsWith('ad ·')))
+                    return false;
+                return true;
+            });
             if (spinner) {
                 spinner.succeed(`Found ${results.length} results`);
             }
+            // --fetch: fetch content from each result
+            if (options.fetch && results.length > 0) {
+                const fetchCfg = loadConfig();
+                const fetchApiKey = fetchCfg.apiKey || process.env.WEBPEEL_API_KEY;
+                const fetchApiUrl = process.env.WEBPEEL_API_URL || 'https://api.webpeel.dev';
+                if (fetchApiKey) {
+                    const fetchSpinner = isSilent ? null : ora(`Fetching content from ${results.length} results...`).start();
+                    await Promise.all(results.map(async (result) => {
+                        try {
+                            const fetchParams = new URLSearchParams({ url: result.url });
+                            if (options.budget)
+                                fetchParams.set('budget', String(options.budget || 2000));
+                            const fetchRes = await fetch(`${fetchApiUrl}/v1/fetch?${fetchParams}`, {
+                                headers: { Authorization: `Bearer ${fetchApiKey}` },
+                                signal: AbortSignal.timeout(20000),
+                            });
+                            if (fetchRes.ok) {
+                                const fetchData = await fetchRes.json();
+                                result.content = fetchData.content || fetchData.data?.content || '';
+                            }
+                        }
+                        catch { /* skip on error */ }
+                    }));
+                    if (fetchSpinner)
+                        fetchSpinner.succeed('Content fetched');
+                }
+                else if (!isSilent) {
+                    console.error('Warning: --fetch requires API key (run: webpeel auth <key>)');
+                }
+            }
             // Show usage footer for free/anonymous users
             if (usageCheck.usageInfo && !isSilent) {
                 showUsageFooter(usageCheck.usageInfo, usageCheck.isAnonymous || false, false);
@@ -196,10 +249,24 @@ export function registerSearchCommands(program) {
                 await writeStdout(jsonStr + '\n');
             }
             else {
-                for (const result of results) {
-                    console.log(`\n${result.title}`);
-                    console.log(result.url);
-                    console.log(result.snippet);
+                // Human-readable numbered results
+                if (results.length === 0) {
+                    await writeStdout('No results found.\n');
+                }
+                else {
+                    await writeStdout(`\n`);
+                    for (const [i, result] of results.entries()) {
+                        await writeStdout(`${i + 1}. ${result.title}\n`);
+                        await writeStdout(`   ${result.url}\n`);
+                        if (result.snippet) {
+                            await writeStdout(`   ${result.snippet}\n`);
+                        }
+                        if (result.content) {
+                            const preview = result.content.slice(0, 500);
+                            await writeStdout(`\n   --- Content ---\n${preview}${result.content.length > 500 ? '\n   [...]' : ''}\n`);
+                        }
+                        await writeStdout('\n');
+                    }
                 }
             }
             process.exit(0);

package/dist/cli/utils.js CHANGED Viewed

@@ -508,13 +508,11 @@ export async function outputResult(result, options, extra = {}) {
     // Default: full output
     if (options.json) {
         // Build clean JSON output with guaranteed top-level fields
+        // Note: elapsed/method/tokens are placed at the END so `tail -3` shows perf metrics
         const output = {
             url: result.url,
             title: result.metadata?.title || result.title || null,
-            tokens: result.tokens || 0,
             fetchedAt: new Date().toISOString(),
-            method: result.method || 'simple',
-            elapsed: result.elapsed,
             content: result.content,
         };
         // Add optional fields only if present (filter out undefined/null values from metadata)
@@ -529,6 +527,10 @@ export async function outputResult(result, options, extra = {}) {
         }
         if (result.links?.length)
             output.links = result.links;
+        if (result.tokenSavingsPercent !== undefined)
+            output.tokenSavingsPercent = result.tokenSavingsPercent;
+        if (result.rawTokenEstimate !== undefined)
+            output.rawTokenEstimate = result.rawTokenEstimate;
         if (result.images?.length)
             output.images = result.images;
         if (result.structured)
@@ -562,6 +564,10 @@ export async function outputResult(result, options, extra = {}) {
         if (extra.totalAvailable !== undefined)
             output.totalAvailable = extra.totalAvailable;
         output._meta = { version: cliVersion, method: result.method || 'simple', timing: result.timing, serverMarkdown: result.serverMarkdown || false };
+        // Perf metrics at the end — `tail -3` shows: elapsed | method | tokens
+        output.elapsed = result.elapsed;
+        output.method = result.method || 'simple';
+        output.tokens = result.tokens || 0;
         await writeStdout(JSON.stringify(output, null, 2) + '\n');
     }
     else {
@@ -586,10 +592,11 @@ export async function outputResult(result, options, extra = {}) {
         }
         // Stream content immediately to stdout — consumer gets it without waiting
         await writeStdout(result.content + '\n');
-        // Append timing summary to stderr so it doesn't pollute piped content
-        if (!options.silent) {
+        // Append timing summary to stderr (always — doesn't pollute stdout pipe)
+        {
             const totalMs = result.timing?.total ?? result.elapsed;
-            process.stderr.write(`\n--- ${result.tokens} tokens · ${totalMs}ms ---\n`);
+            const method = result.method || 'simple';
+            process.stderr.write(`\n--- ${totalMs}ms | ${method} | ${result.tokens} tokens ---\n`);
         }
     }
 }

package/dist/core/search-provider.d.ts CHANGED Viewed

@@ -83,7 +83,7 @@ export declare const providerStats: ProviderStatsTracker;
 export declare class StealthSearchProvider implements SearchProvider {
     readonly id: SearchProviderId;
     readonly requiresApiKey = false;
-    /** Validate and normalize a URL; returns null if invalid/non-http */
+    /** Validate and normalize a URL; returns null if invalid/non-http or a DDG ad URL */
     private validateUrl;
     /**
      * Scrape DuckDuckGo HTML endpoint with stealth browser.
@@ -145,7 +145,7 @@ export declare class GoogleSearchProvider implements SearchProvider {
      *                      m[n]=past n months, y[n]=past n years.
      */
     private mapFreshnessToDateRestrict;
-    /** Validate URL; returns null if invalid/non-http */
+    /** Validate URL; returns null if invalid/non-http or a DDG ad URL */
     private validateUrl;
     /**
      * Stealth browser scrape of google.com/search.

package/dist/core/search-provider.js CHANGED Viewed

@@ -84,6 +84,30 @@ function decodeDdgUrl(rawUrl) {
         return rawUrl;
     }
 }
+/** Returns true if a URL looks like a DuckDuckGo ad or tracking link */
+function isDdgAdUrl(url) {
+    try {
+        const parsed = new URL(url);
+        // DDG-internal ad redirect paths
+        if (parsed.hostname === 'duckduckgo.com')
+            return true;
+        // URLs with known ad tracking query params
+        if (parsed.searchParams.has('ad_domain') ||
+            parsed.searchParams.has('ad_provider') ||
+            parsed.searchParams.has('ad_type'))
+            return true;
+        return false;
+    }
+    catch {
+        return false;
+    }
+}
+/** Returns true if a snippet is a DuckDuckGo ad snippet */
+function isDdgAdSnippet(snippet) {
+    return snippet.includes('Ad ·') ||
+        snippet.includes('Ad Viewing ads is privacy protected by DuckDuckGo') ||
+        snippet.toLowerCase().startsWith('ad ·');
+}
 class ProviderStatsTracker {
     history = new Map();
     windowSize;
@@ -182,14 +206,19 @@ function normalizeUrlForDedupe(rawUrl) {
 export class StealthSearchProvider {
     id = 'stealth';
     requiresApiKey = false;
-    /** Validate and normalize a URL; returns null if invalid/non-http */
+    /** Validate and normalize a URL; returns null if invalid/non-http or a DDG ad URL */
     validateUrl(rawUrl) {
         try {
             const parsed = new URL(rawUrl);
             if (!['http:', 'https:'].includes(parsed.protocol))
                 return null;
-            // Filter DuckDuckGo ad redirect URLs (e.g. duckduckgo.com/y.js?ad_domain=...)
-            if (parsed.hostname === 'duckduckgo.com' && parsed.pathname === '/y.js')
+            // Filter all DuckDuckGo URLs (internal links, ad redirects, etc.)
+            if (parsed.hostname === 'duckduckgo.com')
+                return null;
+            // Filter URLs with ad tracking query params
+            if (parsed.searchParams.has('ad_domain') ||
+                parsed.searchParams.has('ad_provider') ||
+                parsed.searchParams.has('ad_type'))
                 return null;
             return parsed.href;
         }
@@ -236,10 +265,16 @@ export class StealthSearchProvider {
                 const snippet = cleanText(snippetRaw, { maxLen: 500, stripEllipsisPadding: true });
                 if (!title || !rawUrl)
                     return;
+                // Filter ad snippets
+                if (isDdgAdSnippet(snippet))
+                    return;
                 // Extract real URL from DDG redirect param
                 const finalUrl = decodeDdgUrl(rawUrl);
                 if (!finalUrl)
                     return; // filtered out (DDG internal link)
+                // Filter ad URLs
+                if (isDdgAdUrl(finalUrl))
+                    return;
                 const validated = this.validateUrl(finalUrl);
                 if (!validated)
                     return;
@@ -532,10 +567,16 @@ export class DuckDuckGoProvider {
             let snippet = cleanText(snippetRaw, { maxLen: 500, stripEllipsisPadding: true });
             if (!title || !rawUrl)
                 return;
+            // Filter ad snippets (DuckDuckGo injects ad labels into snippets)
+            if (isDdgAdSnippet(snippet))
+                return;
             // Extract actual URL from DuckDuckGo redirect; filter DDG internal/ad URLs
             const decoded = decodeDdgUrl(rawUrl);
             if (!decoded)
                 return; // filtered out (DDG internal link or ad redirect)
+            // Filter ad URLs
+            if (isDdgAdUrl(decoded))
+                return;
             // SECURITY: Validate and sanitize results — only allow HTTP/HTTPS URLs
             let url;
             try {
@@ -813,14 +854,19 @@ export class GoogleSearchProvider {
         };
         return map[tbs];
     }
-    /** Validate URL; returns null if invalid/non-http */
+    /** Validate URL; returns null if invalid/non-http or a DDG ad URL */
     validateUrl(rawUrl) {
         try {
             const parsed = new URL(rawUrl);
             if (!['http:', 'https:'].includes(parsed.protocol))
                 return null;
-            // Filter DuckDuckGo ad redirect URLs (e.g. duckduckgo.com/y.js?ad_domain=...)
-            if (parsed.hostname === 'duckduckgo.com' && parsed.pathname === '/y.js')
+            // Filter all DuckDuckGo URLs (internal links, ad redirects, etc.)
+            if (parsed.hostname === 'duckduckgo.com')
+                return null;
+            // Filter URLs with ad tracking query params
+            if (parsed.searchParams.has('ad_domain') ||
+                parsed.searchParams.has('ad_provider') ||
+                parsed.searchParams.has('ad_type'))
                 return null;
             return parsed.href;
         }

package/dist/core/strategies.js CHANGED Viewed

@@ -597,8 +597,12 @@ export async function smartFetch(url, options = {}) {
                     .then((result) => ({ type: 'simple-success', result }))
                     .catch((error) => ({ type: 'simple-error', error }));
                 if (simpleResult.type === 'simple-success') {
-                    // Check if the content is suspiciously thin or has SPA indicators — escalate to browser if so
-                    if (shouldEscalateForLowContent(simpleResult.result) || hasSpaIndicators(simpleResult.result.html)) {
+                    // Check if the content is suspiciously thin, looks like an SPA shell, or is a shell page
+                    // (looksLikeShellPage catches partial renders with 200-500 visible chars that
+                    // shouldEscalateForLowContent misses — improves consistency on sites like China Daily)
+                    if (shouldEscalateForLowContent(simpleResult.result) ||
+                        hasSpaIndicators(simpleResult.result.html) ||
+                        looksLikeShellPage(simpleResult.result)) {
                         shouldUseBrowser = true;
                     }
                     else {

package/dist/server/routes/agent.d.ts CHANGED Viewed

@@ -1,5 +1,7 @@
 /**
- * POST /v1/agent
+ * POST /v1/agent           — single autonomous agent query
+ * POST /v1/agent/batch     — parallel batch of agent queries (max 50)
+ * GET  /v1/agent/batch/:id — poll batch job status
  *
  * Autonomous web agent — search → fetch → extract (LLM or BM25)
  *
@@ -11,9 +13,7 @@
  *
  * Returns: { success, data|answer, sources, method, elapsed, tokensUsed }
  *
- * Two modes:
- *   - agent-llm:  schema + llmApiKey → LLM extraction (BYOK)
- *   - agent-bm25: no LLM key → BM25 text answer (always free)
+ * Webhook support: pass `webhook` URL to get async delivery with HMAC-SHA256 signing.
  *
  * 5-minute in-memory cache. Max 10 sources per request.
  */

package/dist/server/routes/agent.js CHANGED Viewed

@@ -1,5 +1,7 @@
 /**
- * POST /v1/agent
+ * POST /v1/agent           — single autonomous agent query
+ * POST /v1/agent/batch     — parallel batch of agent queries (max 50)
+ * GET  /v1/agent/batch/:id — poll batch job status
  *
  * Autonomous web agent — search → fetch → extract (LLM or BM25)
  *
@@ -11,9 +13,7 @@
  *
  * Returns: { success, data|answer, sources, method, elapsed, tokensUsed }
  *
- * Two modes:
- *   - agent-llm:  schema + llmApiKey → LLM extraction (BYOK)
- *   - agent-bm25: no LLM key → BM25 text answer (always free)
+ * Webhook support: pass `webhook` URL to get async delivery with HMAC-SHA256 signing.
  *
  * 5-minute in-memory cache. Max 10 sources per request.
  */
@@ -22,9 +22,42 @@ import { peel } from '../../index.js';
 import { extractWithLLM } from '../../core/llm-extract.js';
 import { getBestSearchProvider } from '../../core/search-provider.js';
 import { quickAnswer } from '../../core/quick-answer.js';
+import { sendWebhook } from './webhooks.js';
 import { createLogger } from '../../core/logger.js';
 import crypto from 'crypto';
 const log = createLogger('agent');
+const batchJobs = new Map();
+const BATCH_TTL = 60 * 60 * 1000; // 1 hour
+// GC stale batch jobs every 10 minutes
+setInterval(() => {
+    const now = Date.now();
+    for (const [id, job] of batchJobs) {
+        if (now - job.createdAt > BATCH_TTL)
+            batchJobs.delete(id);
+    }
+}, 10 * 60 * 1000).unref();
+// Simple concurrency limiter
+class Semaphore {
+    max;
+    queue = [];
+    running = 0;
+    constructor(max) {
+        this.max = max;
+    }
+    async acquire() {
+        if (this.running < this.max) {
+            this.running++;
+            return;
+        }
+        return new Promise((resolve) => this.queue.push(() => { this.running++; resolve(); }));
+    }
+    release() {
+        this.running--;
+        const next = this.queue.shift();
+        if (next)
+            next();
+    }
+}
 const cache = new Map();
 const CACHE_TTL = 5 * 60 * 1000; // 5 minutes
 function getCached(key) {
@@ -48,191 +81,175 @@ function setCache(key, result) {
     }
     cache.set(key, { result, expiresAt: Date.now() + CACHE_TTL });
 }
+async function runAgentQuery(params) {
+    const { prompt, schema, llmApiKey, llmProvider, llmModel, urls, sources: maxSources } = params;
+    const startMs = Date.now();
+    const numSources = Math.min(maxSources || 5, 10);
+    // Cache check
+    const cacheKey = `${prompt.trim()}:${JSON.stringify(schema || {})}`;
+    const cached = getCached(cacheKey);
+    if (cached)
+        return { ...cached, cached: true };
+    // Step 1: Resolve source URLs
+    let sourceUrls = [];
+    if (Array.isArray(urls) && urls.length > 0) {
+        sourceUrls = urls.map((u) => ({ url: u }));
+    }
+    else {
+        log.info(`Searching web for: "${prompt}"`);
+        const { provider, apiKey: searchApiKey } = getBestSearchProvider();
+        try {
+            const searchResults = await provider.searchWeb(prompt.trim(), { count: numSources, apiKey: searchApiKey });
+            sourceUrls = searchResults.slice(0, numSources).map((r) => ({ url: r.url, title: r.title, snippet: r.snippet }));
+        }
+        catch (err) {
+            log.warn('Search failed:', err.message);
+        }
+    }
+    if (sourceUrls.length === 0) {
+        return { success: false, error: { type: 'no_sources', message: 'Could not find relevant pages for this query' }, prompt, elapsed: Date.now() - startMs };
+    }
+    // Step 2: Fetch pages in parallel
+    log.info(`Fetching ${sourceUrls.length} sources in parallel`);
+    const PER_SOURCE_TIMEOUT_MS = 5000;
+    const fetchPromises = sourceUrls.map(async (source) => {
+        try {
+            const result = await Promise.race([
+                peel(source.url, { render: false, noEscalate: true, format: 'markdown', timeout: PER_SOURCE_TIMEOUT_MS, budget: 3000 }),
+                new Promise((_, reject) => setTimeout(() => reject(new Error('per-source timeout')), PER_SOURCE_TIMEOUT_MS)),
+            ]);
+            return { url: source.url, title: result.title || source.title || '', content: (result.content || '').slice(0, 15000), tokens: result.tokens || 0 };
+        }
+        catch {
+            return null;
+        }
+    });
+    const fetchResults = (await Promise.allSettled(fetchPromises))
+        .map((r) => (r.status === 'fulfilled' ? r.value : null))
+        .filter(Boolean);
+    if (fetchResults.length === 0) {
+        return { success: false, error: { type: 'fetch_failed', message: 'Could not fetch any of the found pages' }, prompt, sources: sourceUrls.map((s) => ({ url: s.url })), elapsed: Date.now() - startMs };
+    }
+    // Step 3: Extract or answer
+    const combinedContent = fetchResults.map((r) => `### ${r.title || r.url}\nURL: ${r.url}\n\n${r.content}`).join('\n\n---\n\n');
+    const totalTokens = fetchResults.reduce((sum, r) => sum + r.tokens, 0);
+    let result;
+    if (schema && llmApiKey) {
+        log.info('Using LLM extraction');
+        const extracted = await extractWithLLM({
+            content: combinedContent.slice(0, 30000), schema, llmApiKey, llmProvider: (llmProvider || 'openai'), llmModel,
+            prompt: `Based on these web pages, ${prompt}`, url: fetchResults[0].url,
+        });
+        const llmTokensUsed = (extracted.tokensUsed?.input ?? 0) + (extracted.tokensUsed?.output ?? 0);
+        result = { success: true, data: extracted.items, sources: fetchResults.map((r) => ({ url: r.url, title: r.title })), method: 'agent-llm',
+            llm: { provider: extracted.provider || llmProvider || 'openai', model: extracted.model || llmModel || 'default' }, tokensUsed: totalTokens + llmTokensUsed, elapsed: Date.now() - startMs };
+    }
+    else {
+        log.info('Using BM25 text extraction');
+        const qa = quickAnswer({ question: prompt, content: combinedContent, maxPassages: 3, maxChars: 2000 });
+        result = { success: true, answer: qa.answer || combinedContent.slice(0, 2000), confidence: qa.confidence ?? 0,
+            sources: fetchResults.map((r) => ({ url: r.url, title: r.title })), method: 'agent-bm25', tokensUsed: totalTokens, elapsed: Date.now() - startMs };
+    }
+    setCache(cacheKey, result);
+    return result;
+}
 // ---------------------------------------------------------------------------
 // Route factory
 // ---------------------------------------------------------------------------
 export function createAgentRouter() {
     const router = Router();
+    // ── POST /v1/agent — single query (with optional webhook) ──────────────
     router.post('/', async (req, res) => {
-        const { prompt, schema, llmApiKey, llmProvider, llmModel, urls, sources: maxSources, } = req.body || {};
-        // Validate required param
+        const { prompt, schema, llmApiKey, llmProvider, llmModel, urls, sources: maxSources, webhook } = req.body || {};
+        const requestId = req.requestId || crypto.randomUUID();
         if (!prompt?.trim()) {
             return res.status(400).json({
                 success: false,
-                error: {
-                    type: 'missing_prompt',
-                    message: 'Provide a prompt describing what you want to find',
-                    hint: 'POST /v1/agent { "prompt": "Find Stripe pricing plans" }',
-                    docs: 'https://webpeel.dev/docs/api-reference',
-                },
-                requestId: req.requestId || crypto.randomUUID(),
+                error: { type: 'missing_prompt', message: 'Provide a prompt describing what you want to find',
+                    hint: 'POST /v1/agent { "prompt": "Find Stripe pricing plans" }', docs: 'https://webpeel.dev/docs/api-reference' },
+                requestId,
             });
         }
-        const startMs = Date.now();
-        const numSources = Math.min(maxSources || 5, 10);
-        const requestId = req.requestId || crypto.randomUUID();
-        // Cache check
-        const cacheKey = `${prompt.trim()}:${JSON.stringify(schema || {})}`;
-        const cached = getCached(cacheKey);
-        if (cached) {
-            return res.json({ ...cached, cached: true, requestId });
+        // Async mode: webhook provided → return immediately, deliver result later
+        if (webhook) {
+            const jobId = crypto.randomUUID();
+            res.json({ success: true, id: jobId, status: 'processing', requestId });
+            // Fire-and-forget agent query + webhook delivery
+            runAgentQuery({ prompt, schema, llmApiKey, llmProvider, llmModel, urls, sources: maxSources })
+                .then((result) => sendWebhook(webhook, 'agent.completed', { id: jobId, ...result, requestId }))
+                .catch((err) => {
+                log.error('Async agent error:', err.message);
+                sendWebhook(webhook, 'agent.failed', { id: jobId, error: err.message, requestId }).catch(() => { });
+            });
+            return;
         }
+        // Synchronous mode: wait for result
         try {
-            // -----------------------------------------------------------------------
-            // Step 1: Resolve source URLs — use caller-provided or search the web
-            // -----------------------------------------------------------------------
-            let sourceUrls = [];
-            if (Array.isArray(urls) && urls.length > 0) {
-                sourceUrls = urls.map((u) => ({ url: u }));
-            }
-            else {
-                log.info(`Searching web for: "${prompt}"`);
-                const { provider, apiKey: searchApiKey } = getBestSearchProvider();
-                let searchResults = [];
-                try {
-                    searchResults = await provider.searchWeb(prompt.trim(), {
-                        count: numSources,
-                        apiKey: searchApiKey,
-                    });
-                }
-                catch (err) {
-                    log.warn('Search failed:', err.message);
-                }
-                sourceUrls = searchResults.slice(0, numSources).map((r) => ({
-                    url: r.url,
-                    title: r.title,
-                    snippet: r.snippet,
-                }));
-            }
-            if (sourceUrls.length === 0) {
-                return res.json({
-                    success: false,
-                    error: {
-                        type: 'no_sources',
-                        message: 'Could not find relevant pages for this query',
-                    },
-                    prompt,
-                    elapsed: Date.now() - startMs,
-                    requestId,
-                });
-            }
-            // -----------------------------------------------------------------------
-            // Step 2: Fetch pages in parallel (HTTP only, no browser, 5s timeout)
-            // -----------------------------------------------------------------------
-            log.info(`Fetching ${sourceUrls.length} sources in parallel`);
-            const PER_SOURCE_TIMEOUT_MS = 5000;
-            const fetchPromises = sourceUrls.map(async (source) => {
-                try {
-                    const result = await Promise.race([
-                        peel(source.url, {
-                            render: false,
-                            noEscalate: true,
-                            format: 'markdown',
-                            timeout: PER_SOURCE_TIMEOUT_MS,
-                            budget: 3000,
-                        }),
-                        new Promise((_, reject) => setTimeout(() => reject(new Error('per-source timeout')), PER_SOURCE_TIMEOUT_MS)),
-                    ]);
-                    return {
-                        url: source.url,
-                        title: result.title || source.title || '',
-                        content: (result.content || '').slice(0, 15000),
-                        tokens: result.tokens || 0,
-                    };
-                }
-                catch {
-                    return null;
-                }
-            });
-            const fetchSettled = await Promise.allSettled(fetchPromises);
-            const fetchResults = fetchSettled
-                .map((r) => (r.status === 'fulfilled' ? r.value : null))
-                .filter(Boolean);
-            if (fetchResults.length === 0) {
-                return res.json({
-                    success: false,
-                    error: {
-                        type: 'fetch_failed',
-                        message: 'Could not fetch any of the found pages',
-                    },
-                    prompt,
-                    sources: sourceUrls.map((s) => ({ url: s.url })),
-                    elapsed: Date.now() - startMs,
-                    requestId,
-                });
-            }
-            // -----------------------------------------------------------------------
-            // Step 3: Extract or answer
-            // -----------------------------------------------------------------------
-            const combinedContent = fetchResults
-                .map((r) => `### ${r.title || r.url}\nURL: ${r.url}\n\n${r.content}`)
-                .join('\n\n---\n\n');
-            const totalTokens = fetchResults.reduce((sum, r) => sum + r.tokens, 0);
-            let result;
-            if (schema && llmApiKey) {
-                // ── LLM extraction path ──────────────────────────────────────────────
-                log.info('Using LLM extraction');
-                const extracted = await extractWithLLM({
-                    content: combinedContent.slice(0, 30000),
-                    schema,
-                    llmApiKey,
-                    llmProvider: llmProvider || 'openai',
-                    llmModel,
-                    prompt: `Based on these web pages, ${prompt}`,
-                    url: fetchResults[0].url,
-                });
-                const llmTokensUsed = (extracted.tokensUsed?.input ?? 0) + (extracted.tokensUsed?.output ?? 0);
-                result = {
-                    success: true,
-                    data: extracted.items,
-                    sources: fetchResults.map((r) => ({ url: r.url, title: r.title })),
-                    method: 'agent-llm',
-                    llm: {
-                        provider: extracted.provider || llmProvider || 'openai',
-                        model: extracted.model || llmModel || 'default',
-                    },
-                    tokensUsed: totalTokens + llmTokensUsed,
-                    elapsed: Date.now() - startMs,
-                    requestId,
-                };
-            }
-            else {
-                // ── BM25 text answer path (no LLM needed) ───────────────────────────
-                log.info('Using BM25 text extraction');
-                const qa = quickAnswer({
-                    question: prompt,
-                    content: combinedContent,
-                    maxPassages: 3,
-                    maxChars: 2000,
-                });
-                result = {
-                    success: true,
-                    answer: qa.answer || combinedContent.slice(0, 2000),
-                    confidence: qa.confidence ?? 0,
-                    sources: fetchResults.map((r) => ({ url: r.url, title: r.title })),
-                    method: 'agent-bm25',
-                    tokensUsed: totalTokens,
-                    elapsed: Date.now() - startMs,
-                    requestId,
-                };
-            }
-            // Cache the result
-            setCache(cacheKey, result);
-            return res.json(result);
+            const result = await runAgentQuery({ prompt, schema, llmApiKey, llmProvider, llmModel, urls, sources: maxSources });
+            return res.json({ ...result, requestId });
         }
         catch (err) {
             log.error('Agent error:', err.message);
             return res.status(500).json({
-                success: false,
-                error: {
-                    type: 'agent_error',
-                    message: err.message || 'An unexpected error occurred',
-                },
-                prompt,
-                elapsed: Date.now() - startMs,
-                requestId,
+                success: false, error: { type: 'agent_error', message: err.message || 'An unexpected error occurred' },
+                prompt, elapsed: 0, requestId,
+            });
+        }
+    });
+    // ── POST /v1/agent/batch — parallel batch queries ─────────────────────
+    router.post('/batch', async (req, res) => {
+        const { prompts, schema, llmApiKey, llmProvider, llmModel, sources, webhook } = req.body || {};
+        const requestId = req.requestId || crypto.randomUUID();
+        if (!Array.isArray(prompts) || prompts.length === 0) {
+            return res.status(400).json({
+                success: false, error: { type: 'missing_prompts', message: 'Provide an array of prompts',
+                    hint: 'POST /v1/agent/batch { "prompts": ["Find X", "Find Y"] }' }, requestId,
+            });
+        }
+        if (prompts.length > 50) {
+            return res.status(400).json({
+                success: false, error: { type: 'too_many_prompts', message: `Max 50 prompts per batch (got ${prompts.length})` }, requestId,
             });
         }
+        const jobId = crypto.randomUUID();
+        const job = { id: jobId, status: 'processing', total: prompts.length, completed: 0, results: [], webhook, createdAt: Date.now() };
+        batchJobs.set(jobId, job);
+        // Return immediately, then process in background
+        res.json({ success: true, id: jobId, status: 'processing', total: prompts.length, requestId });
+        // Process in background with concurrency limit of 5
+        // eslint-disable-next-line @typescript-eslint/no-floating-promises
+        const sem = new Semaphore(5);
+        const tasks = prompts.map(async (prompt) => {
+            await sem.acquire();
+            try {
+                const result = await runAgentQuery({ prompt, schema, llmApiKey, llmProvider, llmModel, sources });
+                job.results.push({ prompt, success: !!result.success, answer: result.answer,
+                    data: result.data, sources: result.sources, method: result.method, elapsed: result.elapsed });
+            }
+            catch (err) {
+                job.results.push({ prompt, success: false, error: err.message });
+            }
+            finally {
+                job.completed++;
+                sem.release();
+            }
+        });
+        Promise.allSettled(tasks).then(() => {
+            job.status = job.results.every((r) => r.success) ? 'completed' : 'completed';
+            if (webhook) {
+                sendWebhook(webhook, 'agent.batch.completed', { id: jobId, total: job.total, completed: job.completed, results: job.results })
+                    .catch((err) => log.error('Batch webhook failed:', err.message));
+            }
+        });
+        return;
+    });
+    // ── GET /v1/agent/batch/:id — poll batch status ───────────────────────
+    router.get('/batch/:id', async (req, res) => {
+        const job = batchJobs.get(req.params.id);
+        if (!job) {
+            return res.status(404).json({ success: false, error: { type: 'not_found', message: 'Batch job not found or expired' } });
+        }
+        return res.json({ success: true, id: job.id, status: job.status, total: job.total, completed: job.completed, results: job.results });
     });
     return router;
 }

package/dist/server/routes/session.d.ts CHANGED Viewed

@@ -6,9 +6,11 @@
  * POST   /v1/session/:id/navigate   → navigate to URL { url }
  * POST   /v1/session/:id/act        → execute PageActions array
  * GET    /v1/session/:id/screenshot → take screenshot (image/png)
+ * GET    /v1/session/:id/cookies    → export cookies from session context
+ * POST   /v1/session/:id/cookies    → inject cookies into session context
  * DELETE /v1/session/:id            → close session
  *
- * Use cases: login flows, multi-step automation, UI testing.
+ * Use cases: login flows, multi-step automation, UI testing, cookie persistence.
  * This is what Browserbase charges $500/mo for — built into WebPeel.
  */
 import { Router } from 'express';

package/dist/server/routes/session.js CHANGED Viewed

@@ -6,9 +6,11 @@
  * POST   /v1/session/:id/navigate   → navigate to URL { url }
  * POST   /v1/session/:id/act        → execute PageActions array
  * GET    /v1/session/:id/screenshot → take screenshot (image/png)
+ * GET    /v1/session/:id/cookies    → export cookies from session context
+ * POST   /v1/session/:id/cookies    → inject cookies into session context
  * DELETE /v1/session/:id            → close session
  *
- * Use cases: login flows, multi-step automation, UI testing.
+ * Use cases: login flows, multi-step automation, UI testing, cookie persistence.
  * This is what Browserbase charges $500/mo for — built into WebPeel.
  */
 import { Router } from 'express';
@@ -17,13 +19,15 @@ import { normalizeActions, executeActions } from '../../core/actions.js';
 import { ANTI_DETECTION_ARGS, getRandomViewport, getRandomUserAgent, applyStealthScripts, } from '../../core/browser-pool.js';
 import { extractReadableContent } from '../../core/readability.js';
 const sessions = new Map();
-const SESSION_TTL_MS = 5 * 60 * 1000; // 5 minutes idle TTL
+const DEFAULT_SESSION_TTL_MS = 5 * 60 * 1000; // 5 minutes idle TTL (default)
+const MAX_SESSION_TTL_MS = 60 * 60 * 1000; // 60 minutes (persist / max)
+const MIN_SESSION_TTL_MS = 1 * 60 * 1000; // 1 minute minimum
 const MAX_SESSIONS_PER_USER = 3; // prevent abuse
 // Cleanup expired sessions every minute
 const _cleanupInterval = setInterval(() => {
     const now = Date.now();
     for (const [id, session] of sessions) {
-        if (now - session.lastUsedAt > SESSION_TTL_MS) {
+        if (now - session.lastUsedAt > session.ttlMs) {
             session.browser.close().catch(() => { });
             sessions.delete(id);
         }
@@ -73,7 +77,18 @@ function extractReadableText(html, url) {
 // ── Router ────────────────────────────────────────────────────────────────────
 export function createSessionRouter() {
     const router = Router();
-    // ── POST /v1/session — create session ────────────────────────────────────────
+    /**
+     * POST /v1/session — create a stateful browser session
+     *
+     * Body params:
+     *   url?     {string}  Initial URL to navigate to (optional).
+     *   ttl?     {number}  Session idle TTL in minutes (1–60, default 5).
+     *                      Timer resets on every request that touches the session.
+     *   persist? {boolean} Shorthand for ttl=60. Enables long-lived sessions
+     *                      for login flows where cookies must persist.
+     *
+     * Returns: { sessionId, currentUrl, expiresAt, ttlMinutes }
+     */
     router.post('/v1/session', async (req, res) => {
         const ownerId = getOwnerId(req);
         if (!ownerId) {
@@ -95,7 +110,15 @@ export function createSessionRouter() {
             });
             return;
         }
-        const { url } = req.body;
+        const { url, ttl, persist } = req.body;
+        // Resolve TTL: persist=true → 60 min max, ttl overrides default, clamp to [1, 60] min
+        let ttlMs = DEFAULT_SESSION_TTL_MS;
+        if (persist) {
+            ttlMs = MAX_SESSION_TTL_MS;
+        }
+        else if (typeof ttl === 'number') {
+            ttlMs = Math.min(MAX_SESSION_TTL_MS, Math.max(MIN_SESSION_TTL_MS, ttl * 60 * 1000));
+        }
         let browser = null;
         try {
             browser = await launchBrowser();
@@ -137,11 +160,13 @@ export function createSessionRouter() {
                 createdAt: now,
                 lastUsedAt: now,
                 currentUrl: page.url(),
+                ttlMs,
             });
             res.status(201).json({
                 sessionId: id,
                 currentUrl: page.url(),
-                expiresAt: new Date(now + SESSION_TTL_MS).toISOString(),
+                expiresAt: new Date(now + ttlMs).toISOString(),
+                ttlMinutes: ttlMs / 60_000,
             });
         }
         catch (err) {
@@ -188,7 +213,8 @@ export function createSessionRouter() {
                 currentUrl: session.page.url(),
                 title,
                 content,
-                expiresAt: new Date(session.lastUsedAt + SESSION_TTL_MS).toISOString(),
+                expiresAt: new Date(session.lastUsedAt + session.ttlMs).toISOString(),
+                ttlMinutes: session.ttlMs / 60_000,
             });
         }
         catch (err) {
@@ -242,7 +268,8 @@ export function createSessionRouter() {
             res.json({
                 currentUrl: session.page.url(),
                 title: await session.page.title(),
-                expiresAt: new Date(session.lastUsedAt + SESSION_TTL_MS).toISOString(),
+                expiresAt: new Date(session.lastUsedAt + session.ttlMs).toISOString(),
+                ttlMinutes: session.ttlMs / 60_000,
             });
         }
         catch (err) {
@@ -327,7 +354,8 @@ export function createSessionRouter() {
                 title,
                 screenshot,
                 actionsExecuted: normalizedActions.length,
-                expiresAt: new Date(session.lastUsedAt + SESSION_TTL_MS).toISOString(),
+                expiresAt: new Date(session.lastUsedAt + session.ttlMs).toISOString(),
+                ttlMinutes: session.ttlMs / 60_000,
             });
         }
         catch (err) {
@@ -367,6 +395,7 @@ export function createSessionRouter() {
             session.lastUsedAt = Date.now();
             res.setHeader('Content-Type', 'image/png');
             res.setHeader('Cache-Control', 'no-store');
+            res.setHeader('X-Session-Expires-At', new Date(session.lastUsedAt + session.ttlMs).toISOString());
             res.send(buf);
         }
         catch (err) {
@@ -382,6 +411,128 @@ export function createSessionRouter() {
             });
         }
     });
+    /**
+     * GET /v1/session/:id/cookies — export all cookies from the session's browser context
+     *
+     * Returns: { sessionId, cookies: Cookie[], count: number, expiresAt: string }
+     *
+     * Each cookie follows the Playwright Cookie shape:
+     *   { name, value, domain, path, expires, httpOnly, secure, sameSite }
+     *
+     * Use this to snapshot cookies after a login flow, then re-inject them later
+     * via POST /v1/session/:id/cookies to skip re-authentication.
+     */
+    router.get('/v1/session/:id/cookies', async (req, res) => {
+        const ownerId = getOwnerId(req);
+        const session = getSession(req.params['id'], ownerId);
+        if (!session) {
+            res.status(404).json({
+                success: false,
+                error: {
+                    type: 'session_not_found',
+                    message: 'Session not found or has expired.',
+                    hint: 'Create a new session via POST /v1/session.',
+                    docs: 'https://webpeel.dev/docs/errors#session-not-found',
+                },
+                requestId: req.requestId || randomUUID(),
+            });
+            return;
+        }
+        try {
+            // Playwright context.cookies() returns all cookies for all URLs by default
+            const cookies = await session.context.cookies();
+            session.lastUsedAt = Date.now();
+            res.json({
+                sessionId: session.id,
+                cookies,
+                count: cookies.length,
+                expiresAt: new Date(session.lastUsedAt + session.ttlMs).toISOString(),
+            });
+        }
+        catch (err) {
+            const msg = err instanceof Error ? err.message : String(err);
+            res.status(500).json({
+                success: false,
+                error: {
+                    type: 'cookie_export_failed',
+                    message: msg,
+                    docs: 'https://webpeel.dev/docs/errors#cookie-export-failed',
+                },
+                requestId: req.requestId || randomUUID(),
+            });
+        }
+    });
+    /**
+     * POST /v1/session/:id/cookies — inject cookies into the session's browser context
+     *
+     * Body params:
+     *   cookies {Cookie[]} Array of Playwright-compatible cookie objects.
+     *                      Required fields: name, value, domain (or url).
+     *                      Optional: path, expires, httpOnly, secure, sameSite.
+     *
+     * Returns: { sessionId, injected: number, expiresAt: string }
+     *
+     * Typical cookie-persistence workflow:
+     *   1. POST /v1/session { url: "https://example.com", persist: true }
+     *   2. POST /v1/session/:id/act  (complete login flow)
+     *   3. GET  /v1/session/:id/cookies  → save cookies array to your storage
+     *   4. Later: POST /v1/session/:id/cookies { cookies: [...] }
+     *   5. GET  /v1/session/:id  → page loads authenticated (no re-login needed)
+     */
+    router.post('/v1/session/:id/cookies', async (req, res) => {
+        const ownerId = getOwnerId(req);
+        const session = getSession(req.params['id'], ownerId);
+        if (!session) {
+            res.status(404).json({
+                success: false,
+                error: {
+                    type: 'session_not_found',
+                    message: 'Session not found or has expired.',
+                    hint: 'Create a new session via POST /v1/session.',
+                    docs: 'https://webpeel.dev/docs/errors#session-not-found',
+                },
+                requestId: req.requestId || randomUUID(),
+            });
+            return;
+        }
+        const { cookies } = req.body;
+        if (!Array.isArray(cookies) || cookies.length === 0) {
+            res.status(400).json({
+                success: false,
+                error: {
+                    type: 'bad_request',
+                    message: '`cookies` must be a non-empty array of cookie objects.',
+                    hint: 'Pass cookies exported from GET /v1/session/:id/cookies or a compatible Cookie[] array.',
+                    docs: 'https://webpeel.dev/docs/errors#bad-request',
+                },
+                requestId: req.requestId || randomUUID(),
+            });
+            return;
+        }
+        try {
+            // Playwright's addCookies validates the shape internally; invalid cookies will throw
+            await session.context.addCookies(cookies);
+            session.lastUsedAt = Date.now();
+            res.json({
+                sessionId: session.id,
+                injected: cookies.length,
+                expiresAt: new Date(session.lastUsedAt + session.ttlMs).toISOString(),
+            });
+        }
+        catch (err) {
+            const msg = err instanceof Error ? err.message : String(err);
+            res.status(400).json({
+                success: false,
+                error: {
+                    type: 'cookie_inject_failed',
+                    message: msg,
+                    hint: 'Ensure each cookie has at minimum: name, value, and domain (or url).',
+                    docs: 'https://webpeel.dev/docs/errors#cookie-inject-failed',
+                },
+                requestId: req.requestId || randomUUID(),
+            });
+        }
+    });
     // ── DELETE /v1/session/:id ───────────────────────────────────────────────────
     router.delete('/v1/session/:id', async (req, res) => {
         const ownerId = getOwnerId(req);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "webpeel",
-  "version": "0.20.4",
+  "version": "0.20.6",
   "description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
   "author": "Jake Liu",
   "license": "AGPL-3.0-only",