npm - novada-proxy-core - Versions diffs - 0.0.1 - Mend

novada-proxy-core 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

package/build/adapters/brightdata.d.ts +24 -0
package/build/adapters/brightdata.js +56 -0
package/build/adapters/generic.d.ts +32 -0
package/build/adapters/generic.js +63 -0
package/build/adapters/index.d.ts +16 -0
package/build/adapters/index.js +42 -0
package/build/adapters/novada.d.ts +23 -0
package/build/adapters/novada.js +61 -0
package/build/adapters/oxylabs.d.ts +22 -0
package/build/adapters/oxylabs.js +54 -0
package/build/adapters/smartproxy.d.ts +22 -0
package/build/adapters/smartproxy.js +54 -0
package/build/adapters/types.d.ts +58 -0
package/build/adapters/types.js +7 -0
package/build/config.d.ts +4 -0
package/build/config.js +7 -0
package/build/errors.d.ts +2 -0
package/build/errors.js +58 -0
package/build/index.d.ts +28 -0
package/build/index.js +22 -0
package/build/redact.d.ts +2 -0
package/build/redact.js +24 -0
package/build/tools/batch.d.ts +24 -0
package/build/tools/batch.js +156 -0
package/build/tools/crawl.d.ts +33 -0
package/build/tools/crawl.js +604 -0
package/build/tools/extract.d.ts +22 -0
package/build/tools/extract.js +454 -0
package/build/tools/fetch.d.ts +17 -0
package/build/tools/fetch.js +243 -0
package/build/tools/index.d.ts +19 -0
package/build/tools/index.js +10 -0
package/build/tools/map.d.ts +19 -0
package/build/tools/map.js +131 -0
package/build/tools/render.d.ts +8 -0
package/build/tools/render.js +98 -0
package/build/tools/research.d.ts +9 -0
package/build/tools/research.js +126 -0
package/build/tools/search.d.ts +9 -0
package/build/tools/search.js +104 -0
package/build/tools/session.d.ts +12 -0
package/build/tools/session.js +108 -0
package/build/tools/status.d.ts +2 -0
package/build/tools/status.js +66 -0
package/build/types.d.ts +34 -0
package/build/types.js +1 -0
package/build/utils.d.ts +18 -0
package/build/utils.js +151 -0
package/build/validation.d.ts +4 -0
package/build/validation.js +6 -0
package/package.json +50 -0

package/build/tools/fetch.js ADDED Viewed

@@ -0,0 +1,243 @@
+import axios from "axios";
+import { HttpsProxyAgent } from "https-proxy-agent";
+import { HttpProxyAgent } from "http-proxy-agent";
+import { gunzipSync, brotliDecompressSync, inflateSync } from "zlib";
+import { DEFAULT_USER_AGENT } from "../config.js";
+import { htmlToMarkdown, unicodeSafeTruncate, countHtmlTags, contentDensity } from "../utils.js";
+import { SAFE_COUNTRY, SAFE_CITY, SAFE_SESSION_ID, QUOTA_NOTE } from "../validation.js";
+// ─── In-process response cache ───────────────────────────────────────────────
+// Eliminates duplicate proxy credits when agents re-fetch the same URL.
+// Keyed by (url + format + country). Session-pinned requests are NEVER cached
+// (stickiness implies same-IP routing — caching would break that guarantee).
+// TTL defaults to 300s. Set PROXY4AGENT_CACHE_TTL_SECONDS=0 to disable.
+const DEFAULT_CACHE_TTL_SECONDS = 300;
+const MAX_CACHE_ENTRIES = 200;
+const _responseCache = new Map();
+/** Returns the configured TTL in seconds. 0 = cache disabled. */
+export function getCacheTtl() {
+    const raw = Number(process.env.PROXY4AGENT_CACHE_TTL_SECONDS);
+    return Number.isFinite(raw) && raw >= 0 ? raw : DEFAULT_CACHE_TTL_SECONDS;
+}
+/** Cache key: url + format + country (country affects what you receive back). */
+export function makeCacheKey(url, format, country) {
+    return `${url}|${format}|${country ?? ""}`;
+}
+/** Remove all expired entries. */
+function pruneExpired() {
+    const now = Date.now();
+    for (const [key, entry] of _responseCache) {
+        if (entry.expires_at <= now)
+            _responseCache.delete(key);
+    }
+}
+/** Evict the oldest insertion when at capacity after pruning. */
+function evictOldest() {
+    const firstKey = (_responseCache.keys().next().value);
+    if (firstKey !== undefined)
+        _responseCache.delete(firstKey);
+}
+/** Clear the entire cache (useful for tests and manual cache invalidation). */
+export function clearResponseCache() {
+    _responseCache.clear();
+}
+function decompress(buffer, encoding) {
+    // When the server declares an encoding, trust it — throw on failure so the retry loop fires
+    if (encoding === "gzip")
+        return gunzipSync(buffer).toString("utf-8");
+    if (encoding === "br")
+        return brotliDecompressSync(buffer).toString("utf-8");
+    if (encoding === "deflate")
+        return inflateSync(buffer).toString("utf-8");
+    // No encoding header — check magic bytes before probing
+    if (buffer.length >= 2 && buffer[0] === 0x1f && buffer[1] === 0x8b) {
+        try {
+            return gunzipSync(buffer).toString("utf-8");
+        }
+        catch { /* corrupted gzip */ }
+    }
+    // Brotli has no reliable magic bytes — skip probe
+    // Deflate starts with various bytes (0x78 0x01/9C/DA common) but not reliable — skip probe
+    return buffer.toString("utf-8");
+}
+export async function novadaProxyFetch(params, adapter, credentials) {
+    const { url, format = "markdown", timeout = 60 } = params;
+    if (!url.startsWith("http://") && !url.startsWith("https://")) {
+        throw new Error("URL must start with http:// or https://");
+    }
+    // ── Cache lookup ────────────────────────────────────────────────────────────
+    // Skip cache when session_id is set: sticky sessions imply same-IP routing,
+    // so two agents with different session IDs would wrongly share cached content.
+    const ttl = getCacheTtl();
+    const cacheKey = !params.session_id && ttl > 0
+        ? makeCacheKey(url, format, params.country)
+        : null;
+    if (cacheKey) {
+        const hit = _responseCache.get(cacheKey);
+        if (hit && hit.expires_at > Date.now()) {
+            // LRU: refresh position in Map so this entry isn't evicted as "oldest"
+            _responseCache.delete(cacheKey);
+            _responseCache.set(cacheKey, hit);
+            const serveStart = Date.now();
+            const parsed = JSON.parse(hit.payload);
+            parsed.meta.cache_hit = true;
+            parsed.meta.cache_age_seconds = Math.floor((Date.now() - hit.cached_at) / 1000);
+            parsed.meta.latency_ms = Date.now() - serveStart; // ~0ms — reflects cache serve time, not proxy latency
+            return JSON.stringify(parsed);
+        }
+    }
+    // Warn if targeting params are requested but the active adapter doesn't support them
+    const unsupported = [];
+    if (params.country && !adapter.capabilities.country)
+        unsupported.push(`country (not supported by ${adapter.displayName})`);
+    if (params.city && !adapter.capabilities.city)
+        unsupported.push(`city (not supported by ${adapter.displayName})`);
+    if (params.session_id && !adapter.capabilities.sticky)
+        unsupported.push(`session_id/sticky (not supported by ${adapter.displayName})`);
+    if (unsupported.length) {
+        console.error(`[novada-proxy] Warning: ${unsupported.join(", ")}. Switch to Novada for full targeting support.`);
+    }
+    const proxyUrl = adapter.buildProxyUrl(credentials, params);
+    // HttpsProxyAgent for HTTPS targets (CONNECT tunnel + TLS); HttpProxyAgent for plain HTTP
+    const httpsAgent = new HttpsProxyAgent(proxyUrl);
+    const httpAgent = new HttpProxyAgent(proxyUrl);
+    let lastError = null;
+    const startTime = Date.now();
+    for (let attempt = 1; attempt <= 2; attempt++) {
+        try {
+            const response = await axios.get(url, {
+                httpsAgent,
+                httpAgent,
+                proxy: false,
+                // arraybuffer + decompress:false = we handle decompression ourselves.
+                // axios built-in decompress conflicts with https-proxy-agent CONNECT tunnel
+                // on large pages (Amazon 1.6MB returned ECONNABORTED with decompress:true).
+                responseType: "arraybuffer",
+                decompress: false,
+                headers: {
+                    "User-Agent": DEFAULT_USER_AGENT,
+                    Accept: "text/html,application/xhtml+xml,*/*;q=0.8",
+                    "Accept-Language": "en-US,en;q=0.9",
+                    "Accept-Encoding": "gzip, deflate, br",
+                },
+                timeout: timeout * 1000,
+                maxContentLength: 50 * 1024 * 1024,
+                maxRedirects: 5,
+            });
+            const latency_ms = Date.now() - startTime;
+            const encoding = response.headers["content-encoding"];
+            const contentType = response.headers["content-type"];
+            const body = decompress(Buffer.from(response.data), encoding);
+            const isHtml = contentType?.includes("text/html") || body.toLowerCase().includes("<html");
+            // Pre-truncate before expensive markdown conversion to avoid 600MB of intermediate strings
+            const bodyForConversion = body.length > 500_000 ? body.slice(0, 500_000) : body;
+            const output = format === "markdown" && isHtml ? htmlToMarkdown(bodyForConversion) : body;
+            const truncated = output.length > 100_000;
+            const finalOutput = truncated
+                ? unicodeSafeTruncate(output, 100_000) + "\n\n[... truncated — page is large]"
+                : output;
+            // Compute content density: ratio of useful text to tag overhead
+            const tagCount = isHtml ? countHtmlTags(bodyForConversion) : 0;
+            const content_density = isHtml
+                ? contentDensity(finalOutput.length, tagCount)
+                : 1.0;
+            const result = {
+                ok: true,
+                tool: "novada_proxy_fetch",
+                data: {
+                    url,
+                    status_code: response.status,
+                    content: finalOutput,
+                    content_type: contentType || "unknown",
+                    size_bytes: body.length,
+                    warnings: unsupported.length ? unsupported.map(u => `Ignored param: ${u}`) : undefined,
+                },
+                meta: {
+                    latency_ms,
+                    country: params.country,
+                    session_id: params.session_id,
+                    truncated,
+                    content_density,
+                    quota: { credits_estimated: 1, note: QUOTA_NOTE },
+                    cache_hit: false,
+                },
+            };
+            // Remove undefined fields from data
+            if (!result.data.warnings)
+                delete result.data.warnings;
+            if (!result.meta.country)
+                delete result.meta.country;
+            if (!result.meta.session_id)
+                delete result.meta.session_id;
+            // ── Store in cache ──────────────────────────────────────────────────────
+            if (cacheKey) {
+                if (_responseCache.size >= MAX_CACHE_ENTRIES) {
+                    pruneExpired();
+                    if (_responseCache.size >= MAX_CACHE_ENTRIES)
+                        evictOldest();
+                }
+                const now = Date.now();
+                _responseCache.set(cacheKey, {
+                    payload: JSON.stringify(result),
+                    expires_at: now + ttl * 1000,
+                    cached_at: now,
+                });
+            }
+            return JSON.stringify(result);
+        }
+        catch (err) {
+            lastError = err instanceof Error ? err : new Error(String(err));
+            // Surface rate-limit errors clearly
+            if (axios.isAxiosError(err) && err.response?.status === 429) {
+                throw new Error("Rate limited (HTTP 429). Wait a moment before retrying. Consider using a session_id for consistent routing.");
+            }
+            // Only retry on network errors or 5xx — never retry 4xx (auth, not-found, etc.)
+            const isRetryable = !(axios.isAxiosError(err) &&
+                err.response &&
+                err.response.status < 500);
+            if (attempt < 2 && isRetryable) {
+                // Exponential backoff: 500ms before retry
+                await new Promise(r => setTimeout(r, 500 * attempt));
+                continue;
+            }
+        }
+    }
+    throw lastError;
+}
+export function validateFetchParams(raw) {
+    if (!raw.url || typeof raw.url !== "string") {
+        throw new Error("url is required and must be a string");
+    }
+    if (!raw.url.startsWith("http://") && !raw.url.startsWith("https://")) {
+        throw new Error("url must start with http:// or https://");
+    }
+    if (raw.country !== undefined) {
+        if (typeof raw.country !== "string" || raw.country.length > 10 || !SAFE_COUNTRY.test(raw.country)) {
+            throw new Error("country must be a 2-letter ISO code with no hyphens (e.g. US, DE, GB)");
+        }
+    }
+    if (raw.city !== undefined) {
+        if (typeof raw.city !== "string" || raw.city.length > 50 || !SAFE_CITY.test(raw.city)) {
+            throw new Error("city must contain only letters, numbers, underscores, max 50 chars (e.g. newyork, london)");
+        }
+    }
+    if (raw.session_id !== undefined) {
+        if (typeof raw.session_id !== "string" || raw.session_id.length > 64 || !SAFE_SESSION_ID.test(raw.session_id)) {
+            throw new Error("session_id must contain only letters, numbers, and underscores, max 64 chars (no hyphens)");
+        }
+    }
+    if (raw.format && raw.format !== "raw" && raw.format !== "markdown") {
+        throw new Error("format must be 'raw' or 'markdown'");
+    }
+    const timeout = raw.timeout !== undefined ? Number(raw.timeout) : 60;
+    if (!Number.isFinite(timeout) || timeout < 1 || timeout > 120) {
+        throw new Error("timeout must be between 1 and 120 seconds");
+    }
+    return {
+        url: raw.url,
+        country: raw.country,
+        city: raw.city,
+        session_id: raw.session_id,
+        format: raw.format || "markdown",
+        timeout,
+    };
+}

package/build/tools/index.d.ts ADDED Viewed

@@ -0,0 +1,19 @@
+export { novadaProxyFetch, validateFetchParams } from "./fetch.js";
+export type { FetchParams } from "./fetch.js";
+export { novadaProxyBatchFetch, validateBatchFetchParams } from "./batch.js";
+export type { BatchFetchParams, BatchFetchResult } from "./batch.js";
+export { novadaProxySearch, validateSearchParams } from "./search.js";
+export type { SearchParams } from "./search.js";
+export { novadaProxySession, validateSessionParams } from "./session.js";
+export type { SessionParams } from "./session.js";
+export { novadaProxyStatus } from "./status.js";
+export { novadaProxyRender, validateRenderParams } from "./render.js";
+export type { RenderParams } from "./render.js";
+export { novadaProxyExtract, validateExtractParams } from "./extract.js";
+export type { ExtractParams } from "./extract.js";
+export { novadaProxyMap, validateMapParams } from "./map.js";
+export type { MapParams } from "./map.js";
+export { novadaProxyCrawl, validateCrawlParams } from "./crawl.js";
+export type { CrawlParams } from "./crawl.js";
+export { novadaProxyResearch, validateResearchParams } from "./research.js";
+export type { ResearchParams } from "./research.js";

package/build/tools/index.js ADDED Viewed

@@ -0,0 +1,10 @@
+export { novadaProxyFetch, validateFetchParams } from "./fetch.js";
+export { novadaProxyBatchFetch, validateBatchFetchParams } from "./batch.js";
+export { novadaProxySearch, validateSearchParams } from "./search.js";
+export { novadaProxySession, validateSessionParams } from "./session.js";
+export { novadaProxyStatus } from "./status.js";
+export { novadaProxyRender, validateRenderParams } from "./render.js";
+export { novadaProxyExtract, validateExtractParams } from "./extract.js";
+export { novadaProxyMap, validateMapParams } from "./map.js";
+export { novadaProxyCrawl, validateCrawlParams } from "./crawl.js";
+export { novadaProxyResearch, validateResearchParams } from "./research.js";

package/build/tools/map.d.ts ADDED Viewed

@@ -0,0 +1,19 @@
+import type { ProxyAdapter, ProxyCredentials } from "../adapters/index.js";
+export interface MapParams {
+    url: string;
+    limit?: number;
+    include_external?: boolean;
+    country?: string;
+    timeout?: number;
+}
+/**
+ * Crawl a URL and return all internal links found on the page (and optionally
+ * linked pages up to limit). This is a shallow map — it fetches the starting URL,
+ * extracts all <a href> links, normalises them to absolute URLs, filters to the
+ * same domain, and returns the list.
+ *
+ * For a full sitemap crawl, agents should call novada_proxy_map iteratively on
+ * the discovered URLs or use the sitemap.xml directly.
+ */
+export declare function novadaProxyMap(params: MapParams, adapter: ProxyAdapter, credentials: ProxyCredentials): Promise<string>;
+export declare function validateMapParams(raw: Record<string, unknown>): MapParams;

package/build/tools/map.js ADDED Viewed

@@ -0,0 +1,131 @@
+import { novadaProxyFetch } from "./fetch.js";
+import { SAFE_COUNTRY, QUOTA_NOTE } from "../validation.js";
+/**
+ * Crawl a URL and return all internal links found on the page (and optionally
+ * linked pages up to limit). This is a shallow map — it fetches the starting URL,
+ * extracts all <a href> links, normalises them to absolute URLs, filters to the
+ * same domain, and returns the list.
+ *
+ * For a full sitemap crawl, agents should call novada_proxy_map iteratively on
+ * the discovered URLs or use the sitemap.xml directly.
+ */
+export async function novadaProxyMap(params, adapter, credentials) {
+    const { url, limit = 50, include_external = false, country, timeout = 60 } = params;
+    const startTime = Date.now();
+    // Parse origin for relative-URL resolution and same-domain filtering
+    let origin;
+    let hostname;
+    try {
+        const parsed = new URL(url);
+        origin = parsed.origin;
+        hostname = parsed.hostname;
+    }
+    catch {
+        throw new Error(`Invalid URL: ${url}`);
+    }
+    // Fetch the starting page
+    const fetchResultStr = await novadaProxyFetch({ url, format: "raw", country, timeout }, adapter, credentials);
+    let html;
+    try {
+        const fetchResult = JSON.parse(fetchResultStr);
+        html = fetchResult.data.content || "";
+    }
+    catch {
+        html = fetchResultStr;
+    }
+    // Extract all <a href> links
+    const hrefRe = /<a[^>]+href=["']([^"'#?][^"']*)["']/gi;
+    const seen = new Set();
+    const internalUrls = [];
+    const externalUrls = [];
+    let match;
+    while ((match = hrefRe.exec(html)) !== null) {
+        const raw = match[1]?.trim();
+        if (!raw)
+            continue;
+        let resolved;
+        try {
+            resolved = new URL(raw, origin).toString();
+        }
+        catch {
+            continue; // skip malformed hrefs
+        }
+        // Normalise: strip trailing slash, fragments already excluded by regex
+        resolved = resolved.replace(/\/$/, "");
+        if (seen.has(resolved))
+            continue;
+        seen.add(resolved);
+        // Strip query strings from internal links for cleaner output
+        let resolvedHostname;
+        try {
+            resolvedHostname = new URL(resolved).hostname;
+        }
+        catch {
+            continue;
+        }
+        if (resolvedHostname === hostname || resolvedHostname.endsWith(`.${hostname}`)) {
+            internalUrls.push(resolved);
+        }
+        else if (include_external) {
+            externalUrls.push(resolved);
+        }
+    }
+    // Also check for sitemap.xml at the root
+    const sitemapUrl = `${origin}/sitemap.xml`;
+    const hasSitemap = !seen.has(sitemapUrl) ? `${sitemapUrl} (check manually — not on this page)` : null;
+    // Apply limit
+    const internal = internalUrls.slice(0, limit);
+    const external = include_external ? externalUrls.slice(0, Math.max(0, limit - internal.length)) : [];
+    const latency_ms = Date.now() - startTime;
+    const result = {
+        ok: true,
+        tool: "novada_proxy_map",
+        data: {
+            source_url: url,
+            domain: hostname,
+            internal_url_count: internal.length,
+            external_url_count: external.length,
+            total_found: internalUrls.length + (include_external ? externalUrls.length : 0),
+            truncated: internalUrls.length > limit,
+            internal_urls: internal,
+            ...(include_external ? { external_urls: external } : {}),
+            ...(hasSitemap ? { sitemap_hint: hasSitemap } : {}),
+        },
+        meta: {
+            latency_ms,
+            country,
+            quota: { credits_estimated: 1, note: QUOTA_NOTE },
+        },
+    };
+    if (!result.meta.country)
+        delete result.meta.country;
+    return JSON.stringify(result);
+}
+export function validateMapParams(raw) {
+    if (!raw.url || typeof raw.url !== "string") {
+        throw new Error("url is required and must be a string");
+    }
+    if (!raw.url.startsWith("http://") && !raw.url.startsWith("https://")) {
+        throw new Error("url must start with http:// or https://");
+    }
+    const limit = raw.limit !== undefined ? Number(raw.limit) : 50;
+    if (!Number.isFinite(limit) || limit < 10 || limit > 200) {
+        throw new Error("limit must be between 10 and 200");
+    }
+    if (raw.country !== undefined) {
+        if (typeof raw.country !== "string" || raw.country.length > 10 || !SAFE_COUNTRY.test(raw.country)) {
+            throw new Error("country must be a 2-letter ISO code with no hyphens (e.g. US, DE, GB)");
+        }
+    }
+    const timeout = raw.timeout !== undefined ? Number(raw.timeout) : 60;
+    if (!Number.isFinite(timeout) || timeout < 1 || timeout > 120) {
+        throw new Error("timeout must be between 1 and 120 seconds");
+    }
+    return {
+        url: raw.url,
+        limit,
+        include_external: raw.include_external === true,
+        country: raw.country,
+        timeout,
+    };
+}

package/build/tools/render.d.ts ADDED Viewed

@@ -0,0 +1,8 @@
+export interface RenderParams {
+    url: string;
+    format?: "markdown" | "html" | "text";
+    wait_for?: string;
+    timeout?: number;
+}
+export declare function novadaProxyRender(params: RenderParams, browserWsEndpoint: string): Promise<string>;
+export declare function validateRenderParams(raw: Record<string, unknown>): RenderParams;

package/build/tools/render.js ADDED Viewed

@@ -0,0 +1,98 @@
+import puppeteer from "puppeteer-core";
+import { htmlToMarkdown, htmlToText, unicodeSafeTruncate } from "../utils.js";
+import { QUOTA_NOTE } from "../validation.js";
+export async function novadaProxyRender(params, browserWsEndpoint) {
+    const { url, format = "markdown", wait_for, timeout = 60 } = params;
+    if (!url.startsWith("http://") && !url.startsWith("https://")) {
+        throw new Error("URL must start with http:// or https://");
+    }
+    const startTime = Date.now();
+    const browser = await puppeteer.connect({
+        browserWSEndpoint: browserWsEndpoint,
+        defaultViewport: { width: 1366, height: 768 },
+    });
+    try {
+        const page = await browser.newPage();
+        try {
+            // Use a shared deadline so goto + waitForSelector together never exceed timeout
+            const deadline = Date.now() + timeout * 1000;
+            const response = await page.goto(url, {
+                waitUntil: "domcontentloaded",
+                timeout: timeout * 1000,
+            });
+            if (wait_for) {
+                const remaining = deadline - Date.now();
+                if (remaining <= 0)
+                    throw new Error(`Timeout waiting for selector: ${wait_for}`);
+                await page.waitForSelector(wait_for, { timeout: remaining });
+            }
+            const html = await page.content();
+            const content = format === "html" ? html
+                : format === "text" ? htmlToText(html)
+                    : htmlToMarkdown(html);
+            const truncated = content.length > 100_000;
+            const finalContent = truncated
+                ? unicodeSafeTruncate(content, 100_000) + "\n\n[... truncated — rendered page is large]"
+                : content;
+            const latency_ms = Date.now() - startTime;
+            const statusCode = response?.status() ?? 200;
+            const result = {
+                ok: true,
+                tool: "novada_proxy_render",
+                data: {
+                    url,
+                    status_code: statusCode,
+                    content: finalContent,
+                    content_type: "text/html",
+                    size_bytes: html.length,
+                    format,
+                },
+                meta: {
+                    latency_ms,
+                    truncated,
+                    quota: { credits_estimated: 5, note: "Browser API is metered separately — " + QUOTA_NOTE },
+                },
+            };
+            return JSON.stringify(result);
+        }
+        finally {
+            // Always close the page to avoid server-side session leak (billed by session-second)
+            await page.close().catch(() => { });
+        }
+    }
+    finally {
+        // Always disconnect even if newPage() throws (quota exhaustion, WS drop)
+        await browser.disconnect();
+    }
+}
+export function validateRenderParams(raw) {
+    if (!raw.url || typeof raw.url !== "string") {
+        throw new Error("url is required");
+    }
+    if (!raw.url.startsWith("http://") && !raw.url.startsWith("https://")) {
+        throw new Error("url must start with http:// or https://");
+    }
+    const validFormats = ["markdown", "html", "text"];
+    if (raw.format && !validFormats.includes(raw.format)) {
+        throw new Error("format must be markdown, html, or text");
+    }
+    if (raw.wait_for !== undefined) {
+        if (typeof raw.wait_for !== "string" || raw.wait_for.length > 200) {
+            throw new Error("wait_for must be a CSS selector string (max 200 chars)");
+        }
+        // Allowlist: only safe CSS selector characters — no backticks, braces, or semicolons
+        const SAFE_SELECTOR = /^[a-zA-Z0-9\s\[\]().#:*>,~+="'_-]+$/;
+        if (!SAFE_SELECTOR.test(raw.wait_for)) {
+            throw new Error("wait_for contains invalid characters for a CSS selector");
+        }
+    }
+    const timeout = raw.timeout !== undefined ? Number(raw.timeout) : 60;
+    if (!Number.isFinite(timeout) || timeout < 5 || timeout > 120)
+        throw new Error("timeout must be 5-120 seconds");
+    return {
+        url: raw.url,
+        format: raw.format || "markdown",
+        wait_for: raw.wait_for,
+        timeout,
+    };
+}

package/build/tools/research.d.ts ADDED Viewed

@@ -0,0 +1,9 @@
+import type { ProxyAdapter, ProxyCredentials } from "../adapters/index.js";
+export interface ResearchParams {
+    query: string;
+    depth?: "quick" | "standard" | "deep";
+    country?: string;
+    timeout?: number;
+}
+export declare function novadaProxyResearch(params: ResearchParams, adapter: ProxyAdapter, credentials: ProxyCredentials, novadaApiKey: string): Promise<string>;
+export declare function validateResearchParams(raw: Record<string, unknown>): ResearchParams;

package/build/tools/research.js ADDED Viewed

@@ -0,0 +1,126 @@
+import { novadaProxySearch } from "./search.js";
+import { novadaProxyBatchFetch } from "./batch.js";
+import { novadaProxyFetch } from "./fetch.js";
+import { SAFE_COUNTRY } from "../validation.js";
+const DEPTH_MAP = { quick: 3, standard: 5, deep: 10 };
+export async function novadaProxyResearch(params, adapter, credentials, novadaApiKey) {
+    const { query, depth = "standard", country, timeout = 60 } = params;
+    const numSources = DEPTH_MAP[depth];
+    const wallStart = Date.now();
+    // Step 1: Search
+    const searchResult = await novadaProxySearch({ query, num: numSources, country }, novadaApiKey);
+    const searchParsed = JSON.parse(searchResult);
+    const searchResults = searchParsed.data.results || [];
+    if (searchResults.length === 0) {
+        return JSON.stringify({
+            ok: true,
+            tool: "novada_proxy_research",
+            data: {
+                query,
+                depth,
+                sources_searched: 0,
+                sources_fetched: 0,
+                sources_failed: 0,
+                findings: [],
+                urls: [],
+                findings_summary: "No search results found for this query.",
+            },
+            meta: { latency_ms: Date.now() - wallStart, quota: { credits_estimated: 1 } },
+        });
+    }
+    // Step 2: Fetch top results
+    const urls = searchResults.slice(0, numSources).map(r => r.url);
+    let batchResults = [];
+    if (urls.length === 1) {
+        // batch_fetch requires minimum 2 URLs — fetch single URL directly
+        try {
+            const fetchResult = await novadaProxyFetch({ url: urls[0], format: "markdown", country, timeout }, adapter, credentials);
+            const parsed = JSON.parse(fetchResult);
+            batchResults = [{ url: urls[0], ok: true, content: parsed.data.content }];
+        }
+        catch {
+            batchResults = [{ url: urls[0], ok: false, error: { code: "FETCH_FAILED", message: "Failed to fetch" } }];
+        }
+    }
+    else {
+        const batchResult = await novadaProxyBatchFetch({ urls, format: "markdown", country, timeout, concurrency: 3 }, adapter, credentials);
+        const batchParsed = JSON.parse(batchResult);
+        batchResults = batchParsed.data.results || [];
+    }
+    // Step 3: Extract findings
+    const findings = batchResults
+        .filter(r => r.ok && r.content)
+        .map(r => {
+        const content = r.content || "";
+        const titleMatch = content.match(/^#\s+(.+)/m);
+        const firstLine = content.split("\n").find(l => l.trim().length > 0)?.trim() || r.url;
+        const title = titleMatch ? titleMatch[1].trim() : firstLine;
+        const contentPreview = content.slice(0, 500).trim();
+        return {
+            title,
+            url: r.url,
+            snippet: searchResults.find(s => s.url === r.url)?.snippet || "",
+            content_preview: contentPreview,
+        };
+    });
+    // Step 4: Build findings summary (concatenated source previews — agent should analyze findings[] for deeper synthesis)
+    const summaryParts = findings.map(f => {
+        // Skip heading lines to get actual content for the summary
+        const paragraphs = f.content_preview.split("\n\n").filter(p => !p.trim().startsWith("#"));
+        const firstParagraph = paragraphs[0]?.trim() || f.snippet;
+        return `According to ${f.title} (${f.url}): ${firstParagraph}`;
+    });
+    const findings_summary = summaryParts.length > 0
+        ? summaryParts.join("\n\n")
+        : "Unable to build findings summary — all source fetches failed.";
+    const latency_ms = Date.now() - wallStart;
+    const sourcesFetched = batchResults.filter(r => r.ok).length;
+    const sourcesFailed = batchResults.filter(r => !r.ok).length;
+    return JSON.stringify({
+        ok: true,
+        tool: "novada_proxy_research",
+        data: {
+            query,
+            depth,
+            sources_searched: searchResults.length,
+            sources_fetched: sourcesFetched,
+            sources_failed: sourcesFailed,
+            findings,
+            urls: findings.map(f => f.url),
+            findings_summary,
+        },
+        meta: {
+            latency_ms,
+            quota: { credits_estimated: 1 + urls.length },
+        },
+    });
+}
+export function validateResearchParams(raw) {
+    if (!raw.query || typeof raw.query !== "string") {
+        throw new Error("query is required and must be a string");
+    }
+    if (raw.query.trim().length === 0) {
+        throw new Error("query must not be empty");
+    }
+    if (raw.query.length > 500) {
+        throw new Error("query must be 500 characters or less");
+    }
+    if (raw.depth !== undefined && !["quick", "standard", "deep"].includes(raw.depth)) {
+        throw new Error("depth must be 'quick', 'standard', or 'deep'");
+    }
+    if (raw.country !== undefined) {
+        if (typeof raw.country !== "string" || raw.country.length > 10 || !SAFE_COUNTRY.test(raw.country)) {
+            throw new Error("country must be a 2-letter ISO code with no hyphens (e.g. US, DE, GB)");
+        }
+    }
+    const timeout = raw.timeout !== undefined ? Number(raw.timeout) : 60;
+    if (!Number.isFinite(timeout) || timeout < 1 || timeout > 120) {
+        throw new Error("timeout must be between 1 and 120 seconds");
+    }
+    return {
+        query: raw.query.trim(),
+        depth: raw.depth || "standard",
+        country: raw.country,
+        timeout,
+    };
+}