@character-foundry/character-foundry 0.1.8 → 0.1.9-dev.1765913722

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/image-utils.ts","../../image-utils/src/extraction.ts","../../image-utils/src/ssrf.ts"],"sourcesContent":["export * from '@character-foundry/image-utils';\n","/**\n * Image URL Extraction\n *\n * Extracts image URLs from markdown, HTML, and base64 data URLs in text.\n * Used across Federation, Archive, and Architect for image processing.\n */\n\nexport interface ImageExtractionOptions {\n /** Include markdown image syntax `![alt](url)` (default: true) */\n includeMarkdown?: boolean;\n /** Include HTML img tags `<img src=\"url\">` (default: true) */\n includeHTML?: boolean;\n /** Include base64 data URLs `data:image/...;base64,...` (default: true) */\n includeBase64?: boolean;\n}\n\nexport interface ExtractedImage {\n /** The extracted URL or data URL */\n url: string;\n /** Source format where the image was found */\n source: 'markdown' | 'html' | 'base64';\n /** Surrounding context for debugging (optional) */\n context?: string;\n}\n\n/**\n * Extract all image URLs from text.\n *\n * Supports markdown, HTML, and base64 data URLs.\n * Returns deduplicated results.\n *\n * @param text - Text to extract image URLs from\n * @param options - Extraction options\n * @returns Array of extracted images with source information\n *\n * @example\n * ```typescript\n * const text = 'Check out ![portrait](avatar.png) and <img src=\"banner.jpg\">';\n * const images = extractImageUrls(text);\n * // [\n * // { url: 'avatar.png', source: 'markdown' },\n * // { url: 'banner.jpg', source: 'html' }\n * // ]\n * ```\n */\nexport function extractImageUrls(\n text: string,\n options: ImageExtractionOptions = {},\n): ExtractedImage[] {\n const {\n includeMarkdown = true,\n includeHTML = true,\n includeBase64 = true,\n } = options;\n\n const results: ExtractedImage[] = [];\n const seen = new Set<string>();\n\n // Extract markdown images: ![alt](url) or ![alt](<url>) or ![alt](url =dimensions)\n // Supports: standard, angle brackets, and SillyTavern dimension syntax\n if (includeMarkdown) {\n const markdownPattern = /!\\[([^\\]]*)\\]\\(<?([^>\\s)]+)>?(?:\\s*=[^)]+)?\\)/g;\n let match: RegExpExecArray | null;\n\n while ((match = markdownPattern.exec(text)) !== null) {\n const url = match[2]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'markdown',\n context: match[0],\n });\n }\n }\n }\n\n // Extract HTML img tags: <img src=\"url\"> (quoted and unquoted)\n if (includeHTML) {\n // Quoted src\n const htmlQuotedPattern = /<img[^>]+src=[\"']([^\"']+)[\"'][^>]*>/gi;\n let match: RegExpExecArray | null;\n\n while ((match = htmlQuotedPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n\n // Unquoted src (e.g., <img src=url>)\n const htmlUnquotedPattern = /<img[^>]+src=([^\\s\"'>]+)[^>]*>/gi;\n while ((match = htmlUnquotedPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n // Skip if it starts with a quote (already handled above)\n if (url && !url.startsWith('\"') && !url.startsWith(\"'\") && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n\n // CSS url() function: url(url), url(\"url\"), url('url')\n // Common in background-image, background, content properties\n const cssUrlPattern = /url\\([\"']?([^)\"']+)[\"']?\\)/gi;\n while ((match = cssUrlPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n // Only include http(s) URLs to avoid CSS variables and relative paths\n if (url && (url.startsWith('http://') || url.startsWith('https://')) && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n }\n\n // Extract plain image URLs (not wrapped in any syntax)\n if (includeHTML) {\n // Plain URLs with image extensions (common on image hosts)\n const plainUrlPattern = /(?<![(\"'])(https?:\\/\\/[^\\s<>\"']+\\.(?:jpg|jpeg|png|gif|webp|svg|avif|bmp))(?![)\"'])/gi;\n let match: RegExpExecArray | null;\n\n while ((match = plainUrlPattern.exec(text)) !== null) {\n const url = match[0]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: url,\n });\n }\n }\n }\n\n // Extract base64 data URLs: data:image/...;base64,...\n if (includeBase64) {\n const dataUrlPattern = /data:image\\/[^;]+;base64,[A-Za-z0-9+/=]+/g;\n let match: RegExpExecArray | null;\n\n while ((match = dataUrlPattern.exec(text)) !== null) {\n const url = match[0];\n if (!seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'base64',\n });\n }\n }\n }\n\n return results;\n}\n\n/**\n * Extract only HTTP/HTTPS URLs (excludes data URLs and relative paths).\n *\n * Convenience wrapper around extractImageUrls that filters for remote URLs.\n *\n * @param text - Text to extract URLs from\n * @returns Array of extracted HTTP/HTTPS image URLs\n */\nexport function extractRemoteImageUrls(text: string): ExtractedImage[] {\n const all = extractImageUrls(text, { includeBase64: false });\n return all.filter((img) => /^https?:\\/\\//i.test(img.url));\n}\n\n/**\n * Extract only base64 data URLs.\n *\n * Convenience wrapper around extractImageUrls.\n *\n * @param text - Text to extract data URLs from\n * @returns Array of base64 image data URLs\n */\nexport function extractDataUrls(text: string): ExtractedImage[] {\n return extractImageUrls(text, {\n includeMarkdown: false,\n includeHTML: false,\n includeBase64: true,\n });\n}\n\n/**\n * Count images in text without extracting full details.\n *\n * More efficient than extractImageUrls when you only need the count.\n *\n * @param text - Text to count images in\n * @returns Total number of unique image references\n */\nexport function countImages(text: string): number {\n return extractImageUrls(text).length;\n}\n","/**\n * SSRF (Server-Side Request Forgery) Protection\n *\n * Validates URLs to prevent SSRF attacks.\n * Browser-safe implementation (no Node.js dependencies).\n */\n\nexport interface SSRFPolicy {\n /** Allow private IP addresses (10.x, 172.16-31.x, 192.168.x) (default: false) */\n allowPrivateIPs?: boolean;\n /** Allow localhost/loopback (127.x, ::1) (default: false) */\n allowLocalhost?: boolean;\n /** Blocked domain patterns (e.g., ['internal.company.com', '*.local']) */\n blockedDomains?: string[];\n /** Allowed domain patterns - if provided, ONLY these are allowed */\n allowedDomains?: string[];\n /** Allow data URLs (data:image/...) (default: false) */\n allowDataUrls?: boolean;\n}\n\nexport interface SafetyCheck {\n /** Whether the URL is safe according to policy */\n safe: boolean;\n /** Reason why URL is unsafe (if safe=false) */\n reason?: string;\n}\n\n/**\n * Default SSRF policy - blocks private IPs, localhost, and data URLs.\n */\nexport const DEFAULT_SSRF_POLICY: Required<SSRFPolicy> = {\n allowPrivateIPs: false,\n allowLocalhost: false,\n blockedDomains: [],\n allowedDomains: [],\n allowDataUrls: false,\n};\n\n/**\n * Check if URL is safe to fetch according to SSRF policy.\n *\n * This is the canonical SSRF protection - all apps should use this\n * before fetching external URLs.\n *\n * @param url - URL to validate\n * @param policy - SSRF policy (uses defaults if not provided)\n * @returns Safety check result with reason if unsafe\n *\n * @example\n * ```typescript\n * const check = isURLSafe('http://10.0.0.1/secret');\n * if (!check.safe) {\n * console.error('Unsafe URL:', check.reason);\n * }\n * ```\n */\nexport function isURLSafe(\n url: string,\n policy: SSRFPolicy = {},\n): SafetyCheck {\n const config = { ...DEFAULT_SSRF_POLICY, ...policy };\n\n // Parse URL\n let parsed: URL;\n try {\n parsed = new URL(url);\n } catch {\n return { safe: false, reason: 'Invalid URL format' };\n }\n\n // Check protocol\n if (parsed.protocol === 'data:') {\n if (!config.allowDataUrls) {\n return { safe: false, reason: 'Data URLs not allowed' };\n }\n return { safe: true };\n }\n\n if (!['http:', 'https:'].includes(parsed.protocol)) {\n return {\n safe: false,\n reason: `Protocol '${parsed.protocol}' not allowed (only http/https)`,\n };\n }\n\n const hostname = parsed.hostname.toLowerCase();\n\n // Check allowed domains (whitelist)\n if (config.allowedDomains.length > 0) {\n const isAllowed = config.allowedDomains.some((pattern) =>\n matchDomainPattern(hostname, pattern),\n );\n if (!isAllowed) {\n return {\n safe: false,\n reason: `Domain '${hostname}' not in allowed list`,\n };\n }\n }\n\n // Check blocked domains (blacklist)\n if (config.blockedDomains.length > 0) {\n const isBlocked = config.blockedDomains.some((pattern) =>\n matchDomainPattern(hostname, pattern),\n );\n if (isBlocked) {\n return {\n safe: false,\n reason: `Domain '${hostname}' is blocked`,\n };\n }\n }\n\n // Check localhost\n if (isLocalhost(hostname) && !config.allowLocalhost) {\n return { safe: false, reason: 'Localhost not allowed' };\n }\n\n // Check private IPs\n if (isPrivateIP(hostname) && !config.allowPrivateIPs) {\n return { safe: false, reason: 'Private IP addresses not allowed' };\n }\n\n return { safe: true };\n}\n\n/**\n * Quick check if URL is safe with default policy.\n *\n * Convenience wrapper for common case.\n *\n * @param url - URL to validate\n * @returns true if safe, false otherwise\n */\nexport function isSafeForFetch(url: string): boolean {\n return isURLSafe(url).safe;\n}\n\n/**\n * Match domain against pattern (supports wildcards).\n *\n * @example\n * ```typescript\n * matchDomainPattern('api.github.com', '*.github.com') // true\n * matchDomainPattern('github.com', '*.github.com') // false\n * matchDomainPattern('github.com', 'github.com') // true\n * ```\n */\nfunction matchDomainPattern(domain: string, pattern: string): boolean {\n // Exact match\n if (domain === pattern) return true;\n\n // Wildcard match - only matches subdomains, not the base domain\n if (pattern.startsWith('*.')) {\n const suffix = pattern.slice(2);\n return domain.endsWith('.' + suffix);\n }\n\n return false;\n}\n\n/**\n * Check if hostname is localhost/loopback.\n */\nfunction isLocalhost(hostname: string): boolean {\n const lower = hostname.toLowerCase();\n\n // IPv4 loopback\n if (lower === 'localhost' || lower === '0.0.0.0' || lower.startsWith('127.')) {\n return true;\n }\n\n // IPv6 loopback\n if (lower === '::1' || lower === '[::1]') {\n return true;\n }\n\n return false;\n}\n\n/**\n * Check if hostname is a private IP address.\n *\n * Checks for:\n * - 10.0.0.0/8\n * - 172.16.0.0/12\n * - 192.168.0.0/16\n * - Link-local: 169.254.0.0/16 (includes AWS metadata endpoint)\n * - IPv6 private ranges\n */\nfunction isPrivateIP(hostname: string): boolean {\n const lower = hostname.toLowerCase();\n\n // IPv4 private ranges\n const parts = hostname.split('.').map(Number);\n\n if (\n parts.length === 4 &&\n parts.every((p) => p >= 0 && p <= 255 && !isNaN(p))\n ) {\n const [octet1, octet2] = parts;\n\n // 10.0.0.0/8\n if (octet1 === 10) return true;\n\n // 172.16.0.0/12\n if (octet1 === 172 && octet2 !== undefined && octet2 >= 16 && octet2 <= 31)\n return true;\n\n // 192.168.0.0/16\n if (octet1 === 192 && octet2 === 168) return true;\n\n // 169.254.0.0/16 (link-local) - includes AWS metadata endpoint\n if (octet1 === 169 && octet2 === 254) return true;\n }\n\n // IPv6 private ranges using regex patterns\n // Strip brackets if present (URL parser adds them: [fc00::1])\n const cleanedHostname = lower.replace(/^\\[|\\]$/g, '');\n\n const ipv6Patterns = [\n /^f[cd][0-9a-f]{2}:/i, // fc00::/7 (includes both fc and fd ranges)\n /^fe80:/i, // fe80::/10 (link-local)\n ];\n\n for (const pattern of ipv6Patterns) {\n if (pattern.test(cleanedHostname)) {\n return true;\n }\n }\n\n return false;\n}\n\n/**\n * Filter array of URLs, keeping only safe ones.\n *\n * @param urls - Array of URLs to filter\n * @param policy - SSRF policy\n * @returns Array of safe URLs\n */\nexport function filterSafeUrls(\n urls: string[],\n policy?: SSRFPolicy,\n): string[] {\n return urls.filter((url) => isURLSafe(url, policy).safe);\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;AC6CO,SAAS,iBACd,MACA,UAAkC,CAAC,GACjB;AAClB,QAAM;IACJ,kBAAkB;IAClB,cAAc;IACd,gBAAgB;EAClB,IAAI;AAEJ,QAAM,UAA4B,CAAC;AACnC,QAAM,OAAO,oBAAI,IAAY;AAI7B,MAAI,iBAAiB;AACnB,UAAM,kBAAkB;AACxB,QAAI;AAEJ,YAAQ,QAAQ,gBAAgB,KAAK,IAAI,OAAO,MAAM;AACpD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;EACF;AAGA,MAAI,aAAa;AAEf,UAAM,oBAAoB;AAC1B,QAAI;AAEJ,YAAQ,QAAQ,kBAAkB,KAAK,IAAI,OAAO,MAAM;AACtD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;AAGA,UAAM,sBAAsB;AAC5B,YAAQ,QAAQ,oBAAoB,KAAK,IAAI,OAAO,MAAM;AACxD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAE3B,UAAI,OAAO,CAAC,IAAI,WAAW,GAAG,KAAK,CAAC,IAAI,WAAW,GAAG,KAAK,CAAC,KAAK,IAAI,GAAG,GAAG;AACzE,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;AAIA,UAAM,gBAAgB;AACtB,YAAQ,QAAQ,cAAc,KAAK,IAAI,OAAO,MAAM;AAClD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAE3B,UAAI,QAAQ,IAAI,WAAW,SAAS,KAAK,IAAI,WAAW,UAAU,MAAM,CAAC,KAAK,IAAI,GAAG,GAAG;AACtF,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;EACF;AAGA,MAAI,aAAa;AAEf,UAAM,kBAAkB;AACxB,QAAI;AAEJ,YAAQ,QAAQ,gBAAgB,KAAK,IAAI,OAAO,MAAM;AACpD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS;QACX,CAAC;MACH;IACF;EACF;AAGA,MAAI,eAAe;AACjB,UAAM,iBAAiB;AACvB,QAAI;AAEJ,YAAQ,QAAQ,eAAe,KAAK,IAAI,OAAO,MAAM;AACnD,YAAM,MAAM,MAAM,CAAC;AACnB,UAAI,CAAC,KAAK,IAAI,GAAG,GAAG;AAClB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;QACV,CAAC;MACH;IACF;EACF;AAEA,SAAO;AACT;AAUO,SAAS,uBAAuB,MAAgC;AACrE,QAAM,MAAM,iBAAiB,MAAM,EAAE,eAAe,MAAM,CAAC;AAC3D,SAAO,IAAI,OAAO,CAAC,QAAQ,gBAAgB,KAAK,IAAI,GAAG,CAAC;AAC1D;AAUO,SAAS,gBAAgB,MAAgC;AAC9D,SAAO,iBAAiB,MAAM;IAC5B,iBAAiB;IACjB,aAAa;IACb,eAAe;EACjB,CAAC;AACH;AAUO,SAAS,YAAY,MAAsB;AAChD,SAAO,iBAAiB,IAAI,EAAE;AAChC;AC/KO,IAAM,sBAA4C;EACvD,iBAAiB;EACjB,gBAAgB;EAChB,gBAAgB,CAAC;EACjB,gBAAgB,CAAC;EACjB,eAAe;AACjB;AAoBO,SAAS,UACd,KACA,SAAqB,CAAC,GACT;AACb,QAAM,SAAS,EAAE,GAAG,qBAAqB,GAAG,OAAO;AAGnD,MAAI;AACJ,MAAI;AACF,aAAS,IAAI,IAAI,GAAG;EACtB,QAAQ;AACN,WAAO,EAAE,MAAM,OAAO,QAAQ,qBAAqB;EACrD;AAGA,MAAI,OAAO,aAAa,SAAS;AAC/B,QAAI,CAAC,OAAO,eAAe;AACzB,aAAO,EAAE,MAAM,OAAO,QAAQ,wBAAwB;IACxD;AACA,WAAO,EAAE,MAAM,KAAK;EACtB;AAEA,MAAI,CAAC,CAAC,SAAS,QAAQ,EAAE,SAAS,OAAO,QAAQ,GAAG;AAClD,WAAO;MACL,MAAM;MACN,QAAQ,aAAa,OAAO,QAAQ;IACtC;EACF;AAEA,QAAM,WAAW,OAAO,SAAS,YAAY;AAG7C,MAAI,OAAO,eAAe,SAAS,GAAG;AACpC,UAAM,YAAY,OAAO,eAAe;MAAK,CAAC,YAC5C,mBAAmB,UAAU,OAAO;IACtC;AACA,QAAI,CAAC,WAAW;AACd,aAAO;QACL,MAAM;QACN,QAAQ,WAAW,QAAQ;MAC7B;IACF;EACF;AAGA,MAAI,OAAO,eAAe,SAAS,GAAG;AACpC,UAAM,YAAY,OAAO,eAAe;MAAK,CAAC,YAC5C,mBAAmB,UAAU,OAAO;IACtC;AACA,QAAI,WAAW;AACb,aAAO;QACL,MAAM;QACN,QAAQ,WAAW,QAAQ;MAC7B;IACF;EACF;AAGA,MAAI,YAAY,QAAQ,KAAK,CAAC,OAAO,gBAAgB;AACnD,WAAO,EAAE,MAAM,OAAO,QAAQ,wBAAwB;EACxD;AAGA,MAAI,YAAY,QAAQ,KAAK,CAAC,OAAO,iBAAiB;AACpD,WAAO,EAAE,MAAM,OAAO,QAAQ,mCAAmC;EACnE;AAEA,SAAO,EAAE,MAAM,KAAK;AACtB;AAUO,SAAS,eAAe,KAAsB;AACnD,SAAO,UAAU,GAAG,EAAE;AACxB;AAYA,SAAS,mBAAmB,QAAgB,SAA0B;AAEpE,MAAI,WAAW,QAAS,QAAO;AAG/B,MAAI,QAAQ,WAAW,IAAI,GAAG;AAC5B,UAAM,SAAS,QAAQ,MAAM,CAAC;AAC9B,WAAO,OAAO,SAAS,MAAM,MAAM;EACrC;AAEA,SAAO;AACT;AAKA,SAAS,YAAY,UAA2B;AAC9C,QAAM,QAAQ,SAAS,YAAY;AAGnC,MAAI,UAAU,eAAe,UAAU,aAAa,MAAM,WAAW,MAAM,GAAG;AAC5E,WAAO;EACT;AAGA,MAAI,UAAU,SAAS,UAAU,SAAS;AACxC,WAAO;EACT;AAEA,SAAO;AACT;AAYA,SAAS,YAAY,UAA2B;AAC9C,QAAM,QAAQ,SAAS,YAAY;AAGnC,QAAM,QAAQ,SAAS,MAAM,GAAG,EAAE,IAAI,MAAM;AAE5C,MACE,MAAM,WAAW,KACjB,MAAM,MAAM,CAAC,MAAM,KAAK,KAAK,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC,GAClD;AACA,UAAM,CAAC,QAAQ,MAAM,IAAI;AAGzB,QAAI,WAAW,GAAI,QAAO;AAG1B,QAAI,WAAW,OAAO,WAAW,UAAa,UAAU,MAAM,UAAU;AACtE,aAAO;AAGT,QAAI,WAAW,OAAO,WAAW,IAAK,QAAO;AAG7C,QAAI,WAAW,OAAO,WAAW,IAAK,QAAO;EAC/C;AAIA,QAAM,kBAAkB,MAAM,QAAQ,YAAY,EAAE;AAEpD,QAAM,eAAe;IACnB;;IACA;;EACF;AAEA,aAAW,WAAW,cAAc;AAClC,QAAI,QAAQ,KAAK,eAAe,GAAG;AACjC,aAAO;IACT;EACF;AAEA,SAAO;AACT;AASO,SAAS,eACd,MACA,QACU;AACV,SAAO,KAAK,OAAO,CAAC,QAAQ,UAAU,KAAK,MAAM,EAAE,IAAI;AACzD;","names":[]}
1
+ {"version":3,"sources":["../src/image-utils.ts","../../image-utils/src/extraction.ts","../../image-utils/src/ssrf.ts"],"sourcesContent":["export * from '@character-foundry/image-utils';\n","/**\n * Image URL Extraction\n *\n * Extracts image URLs from markdown, HTML, and base64 data URLs in text.\n * Used across Federation, Archive, and Architect for image processing.\n */\n\nexport interface ImageExtractionOptions {\n /** Include markdown image syntax `![alt](url)` (default: true) */\n includeMarkdown?: boolean;\n /** Include HTML img tags `<img src=\"url\">` (default: true) */\n includeHTML?: boolean;\n /** Include base64 data URLs `data:image/...;base64,...` (default: true) */\n includeBase64?: boolean;\n}\n\nexport interface ExtractedImage {\n /** The extracted URL or data URL */\n url: string;\n /** Source format where the image was found */\n source: 'markdown' | 'html' | 'base64';\n /** Surrounding context for debugging (optional) */\n context?: string;\n}\n\n/**\n * Extract all image URLs from text.\n *\n * Supports markdown, HTML, and base64 data URLs.\n * Returns deduplicated results.\n *\n * @param text - Text to extract image URLs from\n * @param options - Extraction options\n * @returns Array of extracted images with source information\n *\n * @example\n * ```typescript\n * const text = 'Check out ![portrait](avatar.png) and <img src=\"banner.jpg\">';\n * const images = extractImageUrls(text);\n * // [\n * // { url: 'avatar.png', source: 'markdown' },\n * // { url: 'banner.jpg', source: 'html' }\n * // ]\n * ```\n */\nexport function extractImageUrls(\n text: string,\n options: ImageExtractionOptions = {},\n): ExtractedImage[] {\n const {\n includeMarkdown = true,\n includeHTML = true,\n includeBase64 = true,\n } = options;\n\n const results: ExtractedImage[] = [];\n const seen = new Set<string>();\n\n // Extract markdown images: ![alt](url) or ![alt](<url>) or ![alt](url =dimensions)\n // Supports: standard, angle brackets, and SillyTavern dimension syntax\n if (includeMarkdown) {\n const markdownPattern = /!\\[([^\\]]*)\\]\\(<?([^>\\s)]+)>?(?:\\s*=[^)]+)?\\)/g;\n let match: RegExpExecArray | null;\n\n while ((match = markdownPattern.exec(text)) !== null) {\n const url = match[2]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'markdown',\n context: match[0],\n });\n }\n }\n }\n\n // Extract HTML img tags: <img src=\"url\"> (quoted and unquoted)\n if (includeHTML) {\n // Quoted src\n const htmlQuotedPattern = /<img[^>]+src=[\"']([^\"']+)[\"'][^>]*>/gi;\n let match: RegExpExecArray | null;\n\n while ((match = htmlQuotedPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n\n // Unquoted src (e.g., <img src=url>)\n const htmlUnquotedPattern = /<img[^>]+src=([^\\s\"'>]+)[^>]*>/gi;\n while ((match = htmlUnquotedPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n // Skip if it starts with a quote (already handled above)\n if (url && !url.startsWith('\"') && !url.startsWith(\"'\") && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n\n // CSS url() function: url(url), url(\"url\"), url('url')\n // Common in background-image, background, content properties\n const cssUrlPattern = /url\\([\"']?([^)\"']+)[\"']?\\)/gi;\n while ((match = cssUrlPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n // Only include http(s) URLs to avoid CSS variables and relative paths\n if (url && (url.startsWith('http://') || url.startsWith('https://')) && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n }\n\n // Extract plain image URLs (not wrapped in any syntax)\n if (includeHTML) {\n // Plain URLs with image extensions (common on image hosts)\n const plainUrlPattern = /(?<![(\"'])(https?:\\/\\/[^\\s<>\"']+\\.(?:jpg|jpeg|png|gif|webp|svg|avif|bmp))(?![)\"'])/gi;\n let match: RegExpExecArray | null;\n\n while ((match = plainUrlPattern.exec(text)) !== null) {\n const url = match[0]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: url,\n });\n }\n }\n }\n\n // Extract base64 data URLs: data:image/...;base64,...\n if (includeBase64) {\n const dataUrlPattern = /data:image\\/[^;]+;base64,[A-Za-z0-9+/=]+/g;\n let match: RegExpExecArray | null;\n\n while ((match = dataUrlPattern.exec(text)) !== null) {\n const url = match[0];\n if (!seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'base64',\n });\n }\n }\n }\n\n return results;\n}\n\n/**\n * Extract only HTTP/HTTPS URLs (excludes data URLs and relative paths).\n *\n * Convenience wrapper around extractImageUrls that filters for remote URLs.\n *\n * @param text - Text to extract URLs from\n * @returns Array of extracted HTTP/HTTPS image URLs\n */\nexport function extractRemoteImageUrls(text: string): ExtractedImage[] {\n const all = extractImageUrls(text, { includeBase64: false });\n return all.filter((img) => /^https?:\\/\\//i.test(img.url));\n}\n\n/**\n * Extract only base64 data URLs.\n *\n * Convenience wrapper around extractImageUrls.\n *\n * @param text - Text to extract data URLs from\n * @returns Array of base64 image data URLs\n */\nexport function extractDataUrls(text: string): ExtractedImage[] {\n return extractImageUrls(text, {\n includeMarkdown: false,\n includeHTML: false,\n includeBase64: true,\n });\n}\n\n/**\n * Count images in text without extracting full details.\n *\n * More efficient than extractImageUrls when you only need the count.\n *\n * @param text - Text to count images in\n * @returns Total number of unique image references\n */\nexport function countImages(text: string): number {\n return extractImageUrls(text).length;\n}\n","/**\n * SSRF (Server-Side Request Forgery) Protection\n *\n * Validates URLs to prevent SSRF attacks.\n * Browser-safe implementation (no Node.js dependencies).\n */\n\nexport interface SSRFPolicy {\n /** Allow private IP addresses (10.x, 172.16-31.x, 192.168.x) (default: false) */\n allowPrivateIPs?: boolean;\n /** Allow localhost/loopback (127.x, ::1) (default: false) */\n allowLocalhost?: boolean;\n /** Blocked domain patterns (e.g., ['internal.company.com', '*.local']) */\n blockedDomains?: string[];\n /** Allowed domain patterns - if provided, ONLY these are allowed */\n allowedDomains?: string[];\n /** Allow data URLs (data:image/...) (default: false) */\n allowDataUrls?: boolean;\n}\n\nexport interface SafetyCheck {\n /** Whether the URL is safe according to policy */\n safe: boolean;\n /** Reason why URL is unsafe (if safe=false) */\n reason?: string;\n}\n\n/**\n * Default SSRF policy - blocks private IPs, localhost, and data URLs.\n */\nexport const DEFAULT_SSRF_POLICY: Required<SSRFPolicy> = {\n allowPrivateIPs: false,\n allowLocalhost: false,\n blockedDomains: [],\n allowedDomains: [],\n allowDataUrls: false,\n};\n\n/**\n * Check if URL is safe to fetch according to SSRF policy.\n *\n * This is the canonical SSRF protection - all apps should use this\n * before fetching external URLs.\n *\n * **Limitations (browser-side validation only):**\n * - Cannot detect domains that resolve to private IPs (e.g., `attacker.com` -> `127.0.0.1`)\n * - Cannot prevent DNS rebinding attacks (domain resolves differently on subsequent requests)\n * - Cannot validate redirect destinations (server may redirect to internal URLs)\n * - Does not perform actual DNS resolution - only validates URL syntax and hostname patterns\n *\n * For server-side applications requiring stronger SSRF protection, perform DNS resolution\n * and validate the resolved IP address before making the request, and validate redirect\n * destinations or disable redirects entirely.\n *\n * @param url - URL to validate\n * @param policy - SSRF policy (uses defaults if not provided)\n * @returns Safety check result with reason if unsafe\n *\n * @see https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html\n *\n * @example\n * ```typescript\n * const check = isURLSafe('http://10.0.0.1/secret');\n * if (!check.safe) {\n * console.error('Unsafe URL:', check.reason);\n * }\n * ```\n */\nexport function isURLSafe(\n url: string,\n policy: SSRFPolicy = {},\n): SafetyCheck {\n const config = { ...DEFAULT_SSRF_POLICY, ...policy };\n\n // Parse URL\n let parsed: URL;\n try {\n parsed = new URL(url);\n } catch {\n return { safe: false, reason: 'Invalid URL format' };\n }\n\n // Check protocol\n if (parsed.protocol === 'data:') {\n if (!config.allowDataUrls) {\n return { safe: false, reason: 'Data URLs not allowed' };\n }\n return { safe: true };\n }\n\n if (!['http:', 'https:'].includes(parsed.protocol)) {\n return {\n safe: false,\n reason: `Protocol '${parsed.protocol}' not allowed (only http/https)`,\n };\n }\n\n const hostname = parsed.hostname.toLowerCase();\n\n // Check allowed domains (whitelist)\n if (config.allowedDomains.length > 0) {\n const isAllowed = config.allowedDomains.some((pattern) =>\n matchDomainPattern(hostname, pattern),\n );\n if (!isAllowed) {\n return {\n safe: false,\n reason: `Domain '${hostname}' not in allowed list`,\n };\n }\n }\n\n // Check blocked domains (blacklist)\n if (config.blockedDomains.length > 0) {\n const isBlocked = config.blockedDomains.some((pattern) =>\n matchDomainPattern(hostname, pattern),\n );\n if (isBlocked) {\n return {\n safe: false,\n reason: `Domain '${hostname}' is blocked`,\n };\n }\n }\n\n // Check localhost\n if (isLocalhost(hostname) && !config.allowLocalhost) {\n return { safe: false, reason: 'Localhost not allowed' };\n }\n\n // Check private IPs\n if (isPrivateIP(hostname) && !config.allowPrivateIPs) {\n return { safe: false, reason: 'Private IP addresses not allowed' };\n }\n\n return { safe: true };\n}\n\n/**\n * Quick check if URL is safe with default policy.\n *\n * Convenience wrapper for common case.\n *\n * @param url - URL to validate\n * @returns true if safe, false otherwise\n */\nexport function isSafeForFetch(url: string): boolean {\n return isURLSafe(url).safe;\n}\n\n/**\n * Match domain against pattern (supports wildcards).\n *\n * @example\n * ```typescript\n * matchDomainPattern('api.github.com', '*.github.com') // true\n * matchDomainPattern('github.com', '*.github.com') // false\n * matchDomainPattern('github.com', 'github.com') // true\n * ```\n */\nfunction matchDomainPattern(domain: string, pattern: string): boolean {\n // Exact match\n if (domain === pattern) return true;\n\n // Wildcard match - only matches subdomains, not the base domain\n if (pattern.startsWith('*.')) {\n const suffix = pattern.slice(2);\n return domain.endsWith('.' + suffix);\n }\n\n return false;\n}\n\n/**\n * Check if hostname is localhost/loopback.\n */\nfunction isLocalhost(hostname: string): boolean {\n const lower = hostname.toLowerCase();\n\n // IPv4 loopback\n if (lower === 'localhost' || lower === '0.0.0.0' || lower.startsWith('127.')) {\n return true;\n }\n\n // IPv6 loopback\n if (lower === '::1' || lower === '[::1]') {\n return true;\n }\n\n return false;\n}\n\n/**\n * Check if hostname is a private IP address.\n *\n * Checks for:\n * - 10.0.0.0/8\n * - 172.16.0.0/12\n * - 192.168.0.0/16\n * - Link-local: 169.254.0.0/16 (includes AWS metadata endpoint)\n * - IPv6 private ranges\n */\nfunction isPrivateIP(hostname: string): boolean {\n const lower = hostname.toLowerCase();\n\n // IPv4 private ranges\n const parts = hostname.split('.').map(Number);\n\n if (\n parts.length === 4 &&\n parts.every((p) => p >= 0 && p <= 255 && !isNaN(p))\n ) {\n const [octet1, octet2] = parts;\n\n // 10.0.0.0/8\n if (octet1 === 10) return true;\n\n // 172.16.0.0/12\n if (octet1 === 172 && octet2 !== undefined && octet2 >= 16 && octet2 <= 31)\n return true;\n\n // 192.168.0.0/16\n if (octet1 === 192 && octet2 === 168) return true;\n\n // 169.254.0.0/16 (link-local) - includes AWS metadata endpoint\n if (octet1 === 169 && octet2 === 254) return true;\n }\n\n // IPv6 private ranges using regex patterns\n // Strip brackets if present (URL parser adds them: [fc00::1])\n const cleanedHostname = lower.replace(/^\\[|\\]$/g, '');\n\n const ipv6Patterns = [\n /^f[cd][0-9a-f]{2}:/i, // fc00::/7 (includes both fc and fd ranges)\n /^fe80:/i, // fe80::/10 (link-local)\n ];\n\n for (const pattern of ipv6Patterns) {\n if (pattern.test(cleanedHostname)) {\n return true;\n }\n }\n\n return false;\n}\n\n/**\n * Filter array of URLs, keeping only safe ones.\n *\n * @param urls - Array of URLs to filter\n * @param policy - SSRF policy\n * @returns Array of safe URLs\n */\nexport function filterSafeUrls(\n urls: string[],\n policy?: SSRFPolicy,\n): string[] {\n return urls.filter((url) => isURLSafe(url, policy).safe);\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;AC6CO,SAAS,iBACd,MACA,UAAkC,CAAC,GACjB;AAClB,QAAM;IACJ,kBAAkB;IAClB,cAAc;IACd,gBAAgB;EAClB,IAAI;AAEJ,QAAM,UAA4B,CAAC;AACnC,QAAM,OAAO,oBAAI,IAAY;AAI7B,MAAI,iBAAiB;AACnB,UAAM,kBAAkB;AACxB,QAAI;AAEJ,YAAQ,QAAQ,gBAAgB,KAAK,IAAI,OAAO,MAAM;AACpD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;EACF;AAGA,MAAI,aAAa;AAEf,UAAM,oBAAoB;AAC1B,QAAI;AAEJ,YAAQ,QAAQ,kBAAkB,KAAK,IAAI,OAAO,MAAM;AACtD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;AAGA,UAAM,sBAAsB;AAC5B,YAAQ,QAAQ,oBAAoB,KAAK,IAAI,OAAO,MAAM;AACxD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAE3B,UAAI,OAAO,CAAC,IAAI,WAAW,GAAG,KAAK,CAAC,IAAI,WAAW,GAAG,KAAK,CAAC,KAAK,IAAI,GAAG,GAAG;AACzE,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;AAIA,UAAM,gBAAgB;AACtB,YAAQ,QAAQ,cAAc,KAAK,IAAI,OAAO,MAAM;AAClD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAE3B,UAAI,QAAQ,IAAI,WAAW,SAAS,KAAK,IAAI,WAAW,UAAU,MAAM,CAAC,KAAK,IAAI,GAAG,GAAG;AACtF,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;EACF;AAGA,MAAI,aAAa;AAEf,UAAM,kBAAkB;AACxB,QAAI;AAEJ,YAAQ,QAAQ,gBAAgB,KAAK,IAAI,OAAO,MAAM;AACpD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS;QACX,CAAC;MACH;IACF;EACF;AAGA,MAAI,eAAe;AACjB,UAAM,iBAAiB;AACvB,QAAI;AAEJ,YAAQ,QAAQ,eAAe,KAAK,IAAI,OAAO,MAAM;AACnD,YAAM,MAAM,MAAM,CAAC;AACnB,UAAI,CAAC,KAAK,IAAI,GAAG,GAAG;AAClB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;QACV,CAAC;MACH;IACF;EACF;AAEA,SAAO;AACT;AAUO,SAAS,uBAAuB,MAAgC;AACrE,QAAM,MAAM,iBAAiB,MAAM,EAAE,eAAe,MAAM,CAAC;AAC3D,SAAO,IAAI,OAAO,CAAC,QAAQ,gBAAgB,KAAK,IAAI,GAAG,CAAC;AAC1D;AAUO,SAAS,gBAAgB,MAAgC;AAC9D,SAAO,iBAAiB,MAAM;IAC5B,iBAAiB;IACjB,aAAa;IACb,eAAe;EACjB,CAAC;AACH;AAUO,SAAS,YAAY,MAAsB;AAChD,SAAO,iBAAiB,IAAI,EAAE;AAChC;AC/KO,IAAM,sBAA4C;EACvD,iBAAiB;EACjB,gBAAgB;EAChB,gBAAgB,CAAC;EACjB,gBAAgB,CAAC;EACjB,eAAe;AACjB;AAgCO,SAAS,UACd,KACA,SAAqB,CAAC,GACT;AACb,QAAM,SAAS,EAAE,GAAG,qBAAqB,GAAG,OAAO;AAGnD,MAAI;AACJ,MAAI;AACF,aAAS,IAAI,IAAI,GAAG;EACtB,QAAQ;AACN,WAAO,EAAE,MAAM,OAAO,QAAQ,qBAAqB;EACrD;AAGA,MAAI,OAAO,aAAa,SAAS;AAC/B,QAAI,CAAC,OAAO,eAAe;AACzB,aAAO,EAAE,MAAM,OAAO,QAAQ,wBAAwB;IACxD;AACA,WAAO,EAAE,MAAM,KAAK;EACtB;AAEA,MAAI,CAAC,CAAC,SAAS,QAAQ,EAAE,SAAS,OAAO,QAAQ,GAAG;AAClD,WAAO;MACL,MAAM;MACN,QAAQ,aAAa,OAAO,QAAQ;IACtC;EACF;AAEA,QAAM,WAAW,OAAO,SAAS,YAAY;AAG7C,MAAI,OAAO,eAAe,SAAS,GAAG;AACpC,UAAM,YAAY,OAAO,eAAe;MAAK,CAAC,YAC5C,mBAAmB,UAAU,OAAO;IACtC;AACA,QAAI,CAAC,WAAW;AACd,aAAO;QACL,MAAM;QACN,QAAQ,WAAW,QAAQ;MAC7B;IACF;EACF;AAGA,MAAI,OAAO,eAAe,SAAS,GAAG;AACpC,UAAM,YAAY,OAAO,eAAe;MAAK,CAAC,YAC5C,mBAAmB,UAAU,OAAO;IACtC;AACA,QAAI,WAAW;AACb,aAAO;QACL,MAAM;QACN,QAAQ,WAAW,QAAQ;MAC7B;IACF;EACF;AAGA,MAAI,YAAY,QAAQ,KAAK,CAAC,OAAO,gBAAgB;AACnD,WAAO,EAAE,MAAM,OAAO,QAAQ,wBAAwB;EACxD;AAGA,MAAI,YAAY,QAAQ,KAAK,CAAC,OAAO,iBAAiB;AACpD,WAAO,EAAE,MAAM,OAAO,QAAQ,mCAAmC;EACnE;AAEA,SAAO,EAAE,MAAM,KAAK;AACtB;AAUO,SAAS,eAAe,KAAsB;AACnD,SAAO,UAAU,GAAG,EAAE;AACxB;AAYA,SAAS,mBAAmB,QAAgB,SAA0B;AAEpE,MAAI,WAAW,QAAS,QAAO;AAG/B,MAAI,QAAQ,WAAW,IAAI,GAAG;AAC5B,UAAM,SAAS,QAAQ,MAAM,CAAC;AAC9B,WAAO,OAAO,SAAS,MAAM,MAAM;EACrC;AAEA,SAAO;AACT;AAKA,SAAS,YAAY,UAA2B;AAC9C,QAAM,QAAQ,SAAS,YAAY;AAGnC,MAAI,UAAU,eAAe,UAAU,aAAa,MAAM,WAAW,MAAM,GAAG;AAC5E,WAAO;EACT;AAGA,MAAI,UAAU,SAAS,UAAU,SAAS;AACxC,WAAO;EACT;AAEA,SAAO;AACT;AAYA,SAAS,YAAY,UAA2B;AAC9C,QAAM,QAAQ,SAAS,YAAY;AAGnC,QAAM,QAAQ,SAAS,MAAM,GAAG,EAAE,IAAI,MAAM;AAE5C,MACE,MAAM,WAAW,KACjB,MAAM,MAAM,CAAC,MAAM,KAAK,KAAK,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC,GAClD;AACA,UAAM,CAAC,QAAQ,MAAM,IAAI;AAGzB,QAAI,WAAW,GAAI,QAAO;AAG1B,QAAI,WAAW,OAAO,WAAW,UAAa,UAAU,MAAM,UAAU;AACtE,aAAO;AAGT,QAAI,WAAW,OAAO,WAAW,IAAK,QAAO;AAG7C,QAAI,WAAW,OAAO,WAAW,IAAK,QAAO;EAC/C;AAIA,QAAM,kBAAkB,MAAM,QAAQ,YAAY,EAAE;AAEpD,QAAM,eAAe;IACnB;;IACA;;EACF;AAEA,aAAW,WAAW,cAAc;AAClC,QAAI,QAAQ,KAAK,eAAe,GAAG;AACjC,aAAO;IACT;EACF;AAEA,SAAO;AACT;AASO,SAAS,eACd,MACA,QACU;AACV,SAAO,KAAK,OAAO,CAAC,QAAQ,UAAU,KAAK,MAAM,EAAE,IAAI;AACzD;","names":[]}
@@ -102,10 +102,22 @@ export declare const DEFAULT_SSRF_POLICY: Required<SSRFPolicy>;
102
102
  * This is the canonical SSRF protection - all apps should use this
103
103
  * before fetching external URLs.
104
104
  *
105
+ * **Limitations (browser-side validation only):**
106
+ * - Cannot detect domains that resolve to private IPs (e.g., `attacker.com` -> `127.0.0.1`)
107
+ * - Cannot prevent DNS rebinding attacks (domain resolves differently on subsequent requests)
108
+ * - Cannot validate redirect destinations (server may redirect to internal URLs)
109
+ * - Does not perform actual DNS resolution - only validates URL syntax and hostname patterns
110
+ *
111
+ * For server-side applications requiring stronger SSRF protection, perform DNS resolution
112
+ * and validate the resolved IP address before making the request, and validate redirect
113
+ * destinations or disable redirects entirely.
114
+ *
105
115
  * @param url - URL to validate
106
116
  * @param policy - SSRF policy (uses defaults if not provided)
107
117
  * @returns Safety check result with reason if unsafe
108
118
  *
119
+ * @see https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html
120
+ *
109
121
  * @example
110
122
  * ```typescript
111
123
  * const check = isURLSafe('http://10.0.0.1/secret');
@@ -102,10 +102,22 @@ export declare const DEFAULT_SSRF_POLICY: Required<SSRFPolicy>;
102
102
  * This is the canonical SSRF protection - all apps should use this
103
103
  * before fetching external URLs.
104
104
  *
105
+ * **Limitations (browser-side validation only):**
106
+ * - Cannot detect domains that resolve to private IPs (e.g., `attacker.com` -> `127.0.0.1`)
107
+ * - Cannot prevent DNS rebinding attacks (domain resolves differently on subsequent requests)
108
+ * - Cannot validate redirect destinations (server may redirect to internal URLs)
109
+ * - Does not perform actual DNS resolution - only validates URL syntax and hostname patterns
110
+ *
111
+ * For server-side applications requiring stronger SSRF protection, perform DNS resolution
112
+ * and validate the resolved IP address before making the request, and validate redirect
113
+ * destinations or disable redirects entirely.
114
+ *
105
115
  * @param url - URL to validate
106
116
  * @param policy - SSRF policy (uses defaults if not provided)
107
117
  * @returns Safety check result with reason if unsafe
108
118
  *
119
+ * @see https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html
120
+ *
109
121
  * @example
110
122
  * ```typescript
111
123
  * const check = isURLSafe('http://10.0.0.1/secret');
@@ -1 +1 @@
1
- {"version":3,"sources":["../../image-utils/src/extraction.ts","../../image-utils/src/ssrf.ts"],"sourcesContent":["/**\n * Image URL Extraction\n *\n * Extracts image URLs from markdown, HTML, and base64 data URLs in text.\n * Used across Federation, Archive, and Architect for image processing.\n */\n\nexport interface ImageExtractionOptions {\n /** Include markdown image syntax `![alt](url)` (default: true) */\n includeMarkdown?: boolean;\n /** Include HTML img tags `<img src=\"url\">` (default: true) */\n includeHTML?: boolean;\n /** Include base64 data URLs `data:image/...;base64,...` (default: true) */\n includeBase64?: boolean;\n}\n\nexport interface ExtractedImage {\n /** The extracted URL or data URL */\n url: string;\n /** Source format where the image was found */\n source: 'markdown' | 'html' | 'base64';\n /** Surrounding context for debugging (optional) */\n context?: string;\n}\n\n/**\n * Extract all image URLs from text.\n *\n * Supports markdown, HTML, and base64 data URLs.\n * Returns deduplicated results.\n *\n * @param text - Text to extract image URLs from\n * @param options - Extraction options\n * @returns Array of extracted images with source information\n *\n * @example\n * ```typescript\n * const text = 'Check out ![portrait](avatar.png) and <img src=\"banner.jpg\">';\n * const images = extractImageUrls(text);\n * // [\n * // { url: 'avatar.png', source: 'markdown' },\n * // { url: 'banner.jpg', source: 'html' }\n * // ]\n * ```\n */\nexport function extractImageUrls(\n text: string,\n options: ImageExtractionOptions = {},\n): ExtractedImage[] {\n const {\n includeMarkdown = true,\n includeHTML = true,\n includeBase64 = true,\n } = options;\n\n const results: ExtractedImage[] = [];\n const seen = new Set<string>();\n\n // Extract markdown images: ![alt](url) or ![alt](<url>) or ![alt](url =dimensions)\n // Supports: standard, angle brackets, and SillyTavern dimension syntax\n if (includeMarkdown) {\n const markdownPattern = /!\\[([^\\]]*)\\]\\(<?([^>\\s)]+)>?(?:\\s*=[^)]+)?\\)/g;\n let match: RegExpExecArray | null;\n\n while ((match = markdownPattern.exec(text)) !== null) {\n const url = match[2]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'markdown',\n context: match[0],\n });\n }\n }\n }\n\n // Extract HTML img tags: <img src=\"url\"> (quoted and unquoted)\n if (includeHTML) {\n // Quoted src\n const htmlQuotedPattern = /<img[^>]+src=[\"']([^\"']+)[\"'][^>]*>/gi;\n let match: RegExpExecArray | null;\n\n while ((match = htmlQuotedPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n\n // Unquoted src (e.g., <img src=url>)\n const htmlUnquotedPattern = /<img[^>]+src=([^\\s\"'>]+)[^>]*>/gi;\n while ((match = htmlUnquotedPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n // Skip if it starts with a quote (already handled above)\n if (url && !url.startsWith('\"') && !url.startsWith(\"'\") && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n\n // CSS url() function: url(url), url(\"url\"), url('url')\n // Common in background-image, background, content properties\n const cssUrlPattern = /url\\([\"']?([^)\"']+)[\"']?\\)/gi;\n while ((match = cssUrlPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n // Only include http(s) URLs to avoid CSS variables and relative paths\n if (url && (url.startsWith('http://') || url.startsWith('https://')) && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n }\n\n // Extract plain image URLs (not wrapped in any syntax)\n if (includeHTML) {\n // Plain URLs with image extensions (common on image hosts)\n const plainUrlPattern = /(?<![(\"'])(https?:\\/\\/[^\\s<>\"']+\\.(?:jpg|jpeg|png|gif|webp|svg|avif|bmp))(?![)\"'])/gi;\n let match: RegExpExecArray | null;\n\n while ((match = plainUrlPattern.exec(text)) !== null) {\n const url = match[0]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: url,\n });\n }\n }\n }\n\n // Extract base64 data URLs: data:image/...;base64,...\n if (includeBase64) {\n const dataUrlPattern = /data:image\\/[^;]+;base64,[A-Za-z0-9+/=]+/g;\n let match: RegExpExecArray | null;\n\n while ((match = dataUrlPattern.exec(text)) !== null) {\n const url = match[0];\n if (!seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'base64',\n });\n }\n }\n }\n\n return results;\n}\n\n/**\n * Extract only HTTP/HTTPS URLs (excludes data URLs and relative paths).\n *\n * Convenience wrapper around extractImageUrls that filters for remote URLs.\n *\n * @param text - Text to extract URLs from\n * @returns Array of extracted HTTP/HTTPS image URLs\n */\nexport function extractRemoteImageUrls(text: string): ExtractedImage[] {\n const all = extractImageUrls(text, { includeBase64: false });\n return all.filter((img) => /^https?:\\/\\//i.test(img.url));\n}\n\n/**\n * Extract only base64 data URLs.\n *\n * Convenience wrapper around extractImageUrls.\n *\n * @param text - Text to extract data URLs from\n * @returns Array of base64 image data URLs\n */\nexport function extractDataUrls(text: string): ExtractedImage[] {\n return extractImageUrls(text, {\n includeMarkdown: false,\n includeHTML: false,\n includeBase64: true,\n });\n}\n\n/**\n * Count images in text without extracting full details.\n *\n * More efficient than extractImageUrls when you only need the count.\n *\n * @param text - Text to count images in\n * @returns Total number of unique image references\n */\nexport function countImages(text: string): number {\n return extractImageUrls(text).length;\n}\n","/**\n * SSRF (Server-Side Request Forgery) Protection\n *\n * Validates URLs to prevent SSRF attacks.\n * Browser-safe implementation (no Node.js dependencies).\n */\n\nexport interface SSRFPolicy {\n /** Allow private IP addresses (10.x, 172.16-31.x, 192.168.x) (default: false) */\n allowPrivateIPs?: boolean;\n /** Allow localhost/loopback (127.x, ::1) (default: false) */\n allowLocalhost?: boolean;\n /** Blocked domain patterns (e.g., ['internal.company.com', '*.local']) */\n blockedDomains?: string[];\n /** Allowed domain patterns - if provided, ONLY these are allowed */\n allowedDomains?: string[];\n /** Allow data URLs (data:image/...) (default: false) */\n allowDataUrls?: boolean;\n}\n\nexport interface SafetyCheck {\n /** Whether the URL is safe according to policy */\n safe: boolean;\n /** Reason why URL is unsafe (if safe=false) */\n reason?: string;\n}\n\n/**\n * Default SSRF policy - blocks private IPs, localhost, and data URLs.\n */\nexport const DEFAULT_SSRF_POLICY: Required<SSRFPolicy> = {\n allowPrivateIPs: false,\n allowLocalhost: false,\n blockedDomains: [],\n allowedDomains: [],\n allowDataUrls: false,\n};\n\n/**\n * Check if URL is safe to fetch according to SSRF policy.\n *\n * This is the canonical SSRF protection - all apps should use this\n * before fetching external URLs.\n *\n * @param url - URL to validate\n * @param policy - SSRF policy (uses defaults if not provided)\n * @returns Safety check result with reason if unsafe\n *\n * @example\n * ```typescript\n * const check = isURLSafe('http://10.0.0.1/secret');\n * if (!check.safe) {\n * console.error('Unsafe URL:', check.reason);\n * }\n * ```\n */\nexport function isURLSafe(\n url: string,\n policy: SSRFPolicy = {},\n): SafetyCheck {\n const config = { ...DEFAULT_SSRF_POLICY, ...policy };\n\n // Parse URL\n let parsed: URL;\n try {\n parsed = new URL(url);\n } catch {\n return { safe: false, reason: 'Invalid URL format' };\n }\n\n // Check protocol\n if (parsed.protocol === 'data:') {\n if (!config.allowDataUrls) {\n return { safe: false, reason: 'Data URLs not allowed' };\n }\n return { safe: true };\n }\n\n if (!['http:', 'https:'].includes(parsed.protocol)) {\n return {\n safe: false,\n reason: `Protocol '${parsed.protocol}' not allowed (only http/https)`,\n };\n }\n\n const hostname = parsed.hostname.toLowerCase();\n\n // Check allowed domains (whitelist)\n if (config.allowedDomains.length > 0) {\n const isAllowed = config.allowedDomains.some((pattern) =>\n matchDomainPattern(hostname, pattern),\n );\n if (!isAllowed) {\n return {\n safe: false,\n reason: `Domain '${hostname}' not in allowed list`,\n };\n }\n }\n\n // Check blocked domains (blacklist)\n if (config.blockedDomains.length > 0) {\n const isBlocked = config.blockedDomains.some((pattern) =>\n matchDomainPattern(hostname, pattern),\n );\n if (isBlocked) {\n return {\n safe: false,\n reason: `Domain '${hostname}' is blocked`,\n };\n }\n }\n\n // Check localhost\n if (isLocalhost(hostname) && !config.allowLocalhost) {\n return { safe: false, reason: 'Localhost not allowed' };\n }\n\n // Check private IPs\n if (isPrivateIP(hostname) && !config.allowPrivateIPs) {\n return { safe: false, reason: 'Private IP addresses not allowed' };\n }\n\n return { safe: true };\n}\n\n/**\n * Quick check if URL is safe with default policy.\n *\n * Convenience wrapper for common case.\n *\n * @param url - URL to validate\n * @returns true if safe, false otherwise\n */\nexport function isSafeForFetch(url: string): boolean {\n return isURLSafe(url).safe;\n}\n\n/**\n * Match domain against pattern (supports wildcards).\n *\n * @example\n * ```typescript\n * matchDomainPattern('api.github.com', '*.github.com') // true\n * matchDomainPattern('github.com', '*.github.com') // false\n * matchDomainPattern('github.com', 'github.com') // true\n * ```\n */\nfunction matchDomainPattern(domain: string, pattern: string): boolean {\n // Exact match\n if (domain === pattern) return true;\n\n // Wildcard match - only matches subdomains, not the base domain\n if (pattern.startsWith('*.')) {\n const suffix = pattern.slice(2);\n return domain.endsWith('.' + suffix);\n }\n\n return false;\n}\n\n/**\n * Check if hostname is localhost/loopback.\n */\nfunction isLocalhost(hostname: string): boolean {\n const lower = hostname.toLowerCase();\n\n // IPv4 loopback\n if (lower === 'localhost' || lower === '0.0.0.0' || lower.startsWith('127.')) {\n return true;\n }\n\n // IPv6 loopback\n if (lower === '::1' || lower === '[::1]') {\n return true;\n }\n\n return false;\n}\n\n/**\n * Check if hostname is a private IP address.\n *\n * Checks for:\n * - 10.0.0.0/8\n * - 172.16.0.0/12\n * - 192.168.0.0/16\n * - Link-local: 169.254.0.0/16 (includes AWS metadata endpoint)\n * - IPv6 private ranges\n */\nfunction isPrivateIP(hostname: string): boolean {\n const lower = hostname.toLowerCase();\n\n // IPv4 private ranges\n const parts = hostname.split('.').map(Number);\n\n if (\n parts.length === 4 &&\n parts.every((p) => p >= 0 && p <= 255 && !isNaN(p))\n ) {\n const [octet1, octet2] = parts;\n\n // 10.0.0.0/8\n if (octet1 === 10) return true;\n\n // 172.16.0.0/12\n if (octet1 === 172 && octet2 !== undefined && octet2 >= 16 && octet2 <= 31)\n return true;\n\n // 192.168.0.0/16\n if (octet1 === 192 && octet2 === 168) return true;\n\n // 169.254.0.0/16 (link-local) - includes AWS metadata endpoint\n if (octet1 === 169 && octet2 === 254) return true;\n }\n\n // IPv6 private ranges using regex patterns\n // Strip brackets if present (URL parser adds them: [fc00::1])\n const cleanedHostname = lower.replace(/^\\[|\\]$/g, '');\n\n const ipv6Patterns = [\n /^f[cd][0-9a-f]{2}:/i, // fc00::/7 (includes both fc and fd ranges)\n /^fe80:/i, // fe80::/10 (link-local)\n ];\n\n for (const pattern of ipv6Patterns) {\n if (pattern.test(cleanedHostname)) {\n return true;\n }\n }\n\n return false;\n}\n\n/**\n * Filter array of URLs, keeping only safe ones.\n *\n * @param urls - Array of URLs to filter\n * @param policy - SSRF policy\n * @returns Array of safe URLs\n */\nexport function filterSafeUrls(\n urls: string[],\n policy?: SSRFPolicy,\n): string[] {\n return urls.filter((url) => isURLSafe(url, policy).safe);\n}\n"],"mappings":";AA6CO,SAAS,iBACd,MACA,UAAkC,CAAC,GACjB;AAClB,QAAM;IACJ,kBAAkB;IAClB,cAAc;IACd,gBAAgB;EAClB,IAAI;AAEJ,QAAM,UAA4B,CAAC;AACnC,QAAM,OAAO,oBAAI,IAAY;AAI7B,MAAI,iBAAiB;AACnB,UAAM,kBAAkB;AACxB,QAAI;AAEJ,YAAQ,QAAQ,gBAAgB,KAAK,IAAI,OAAO,MAAM;AACpD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;EACF;AAGA,MAAI,aAAa;AAEf,UAAM,oBAAoB;AAC1B,QAAI;AAEJ,YAAQ,QAAQ,kBAAkB,KAAK,IAAI,OAAO,MAAM;AACtD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;AAGA,UAAM,sBAAsB;AAC5B,YAAQ,QAAQ,oBAAoB,KAAK,IAAI,OAAO,MAAM;AACxD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAE3B,UAAI,OAAO,CAAC,IAAI,WAAW,GAAG,KAAK,CAAC,IAAI,WAAW,GAAG,KAAK,CAAC,KAAK,IAAI,GAAG,GAAG;AACzE,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;AAIA,UAAM,gBAAgB;AACtB,YAAQ,QAAQ,cAAc,KAAK,IAAI,OAAO,MAAM;AAClD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAE3B,UAAI,QAAQ,IAAI,WAAW,SAAS,KAAK,IAAI,WAAW,UAAU,MAAM,CAAC,KAAK,IAAI,GAAG,GAAG;AACtF,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;EACF;AAGA,MAAI,aAAa;AAEf,UAAM,kBAAkB;AACxB,QAAI;AAEJ,YAAQ,QAAQ,gBAAgB,KAAK,IAAI,OAAO,MAAM;AACpD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS;QACX,CAAC;MACH;IACF;EACF;AAGA,MAAI,eAAe;AACjB,UAAM,iBAAiB;AACvB,QAAI;AAEJ,YAAQ,QAAQ,eAAe,KAAK,IAAI,OAAO,MAAM;AACnD,YAAM,MAAM,MAAM,CAAC;AACnB,UAAI,CAAC,KAAK,IAAI,GAAG,GAAG;AAClB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;QACV,CAAC;MACH;IACF;EACF;AAEA,SAAO;AACT;AAUO,SAAS,uBAAuB,MAAgC;AACrE,QAAM,MAAM,iBAAiB,MAAM,EAAE,eAAe,MAAM,CAAC;AAC3D,SAAO,IAAI,OAAO,CAAC,QAAQ,gBAAgB,KAAK,IAAI,GAAG,CAAC;AAC1D;AAUO,SAAS,gBAAgB,MAAgC;AAC9D,SAAO,iBAAiB,MAAM;IAC5B,iBAAiB;IACjB,aAAa;IACb,eAAe;EACjB,CAAC;AACH;AAUO,SAAS,YAAY,MAAsB;AAChD,SAAO,iBAAiB,IAAI,EAAE;AAChC;AC/KO,IAAM,sBAA4C;EACvD,iBAAiB;EACjB,gBAAgB;EAChB,gBAAgB,CAAC;EACjB,gBAAgB,CAAC;EACjB,eAAe;AACjB;AAoBO,SAAS,UACd,KACA,SAAqB,CAAC,GACT;AACb,QAAM,SAAS,EAAE,GAAG,qBAAqB,GAAG,OAAO;AAGnD,MAAI;AACJ,MAAI;AACF,aAAS,IAAI,IAAI,GAAG;EACtB,QAAQ;AACN,WAAO,EAAE,MAAM,OAAO,QAAQ,qBAAqB;EACrD;AAGA,MAAI,OAAO,aAAa,SAAS;AAC/B,QAAI,CAAC,OAAO,eAAe;AACzB,aAAO,EAAE,MAAM,OAAO,QAAQ,wBAAwB;IACxD;AACA,WAAO,EAAE,MAAM,KAAK;EACtB;AAEA,MAAI,CAAC,CAAC,SAAS,QAAQ,EAAE,SAAS,OAAO,QAAQ,GAAG;AAClD,WAAO;MACL,MAAM;MACN,QAAQ,aAAa,OAAO,QAAQ;IACtC;EACF;AAEA,QAAM,WAAW,OAAO,SAAS,YAAY;AAG7C,MAAI,OAAO,eAAe,SAAS,GAAG;AACpC,UAAM,YAAY,OAAO,eAAe;MAAK,CAAC,YAC5C,mBAAmB,UAAU,OAAO;IACtC;AACA,QAAI,CAAC,WAAW;AACd,aAAO;QACL,MAAM;QACN,QAAQ,WAAW,QAAQ;MAC7B;IACF;EACF;AAGA,MAAI,OAAO,eAAe,SAAS,GAAG;AACpC,UAAM,YAAY,OAAO,eAAe;MAAK,CAAC,YAC5C,mBAAmB,UAAU,OAAO;IACtC;AACA,QAAI,WAAW;AACb,aAAO;QACL,MAAM;QACN,QAAQ,WAAW,QAAQ;MAC7B;IACF;EACF;AAGA,MAAI,YAAY,QAAQ,KAAK,CAAC,OAAO,gBAAgB;AACnD,WAAO,EAAE,MAAM,OAAO,QAAQ,wBAAwB;EACxD;AAGA,MAAI,YAAY,QAAQ,KAAK,CAAC,OAAO,iBAAiB;AACpD,WAAO,EAAE,MAAM,OAAO,QAAQ,mCAAmC;EACnE;AAEA,SAAO,EAAE,MAAM,KAAK;AACtB;AAUO,SAAS,eAAe,KAAsB;AACnD,SAAO,UAAU,GAAG,EAAE;AACxB;AAYA,SAAS,mBAAmB,QAAgB,SAA0B;AAEpE,MAAI,WAAW,QAAS,QAAO;AAG/B,MAAI,QAAQ,WAAW,IAAI,GAAG;AAC5B,UAAM,SAAS,QAAQ,MAAM,CAAC;AAC9B,WAAO,OAAO,SAAS,MAAM,MAAM;EACrC;AAEA,SAAO;AACT;AAKA,SAAS,YAAY,UAA2B;AAC9C,QAAM,QAAQ,SAAS,YAAY;AAGnC,MAAI,UAAU,eAAe,UAAU,aAAa,MAAM,WAAW,MAAM,GAAG;AAC5E,WAAO;EACT;AAGA,MAAI,UAAU,SAAS,UAAU,SAAS;AACxC,WAAO;EACT;AAEA,SAAO;AACT;AAYA,SAAS,YAAY,UAA2B;AAC9C,QAAM,QAAQ,SAAS,YAAY;AAGnC,QAAM,QAAQ,SAAS,MAAM,GAAG,EAAE,IAAI,MAAM;AAE5C,MACE,MAAM,WAAW,KACjB,MAAM,MAAM,CAAC,MAAM,KAAK,KAAK,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC,GAClD;AACA,UAAM,CAAC,QAAQ,MAAM,IAAI;AAGzB,QAAI,WAAW,GAAI,QAAO;AAG1B,QAAI,WAAW,OAAO,WAAW,UAAa,UAAU,MAAM,UAAU;AACtE,aAAO;AAGT,QAAI,WAAW,OAAO,WAAW,IAAK,QAAO;AAG7C,QAAI,WAAW,OAAO,WAAW,IAAK,QAAO;EAC/C;AAIA,QAAM,kBAAkB,MAAM,QAAQ,YAAY,EAAE;AAEpD,QAAM,eAAe;IACnB;;IACA;;EACF;AAEA,aAAW,WAAW,cAAc;AAClC,QAAI,QAAQ,KAAK,eAAe,GAAG;AACjC,aAAO;IACT;EACF;AAEA,SAAO;AACT;AASO,SAAS,eACd,MACA,QACU;AACV,SAAO,KAAK,OAAO,CAAC,QAAQ,UAAU,KAAK,MAAM,EAAE,IAAI;AACzD;","names":[]}
1
+ {"version":3,"sources":["../../image-utils/src/extraction.ts","../../image-utils/src/ssrf.ts"],"sourcesContent":["/**\n * Image URL Extraction\n *\n * Extracts image URLs from markdown, HTML, and base64 data URLs in text.\n * Used across Federation, Archive, and Architect for image processing.\n */\n\nexport interface ImageExtractionOptions {\n /** Include markdown image syntax `![alt](url)` (default: true) */\n includeMarkdown?: boolean;\n /** Include HTML img tags `<img src=\"url\">` (default: true) */\n includeHTML?: boolean;\n /** Include base64 data URLs `data:image/...;base64,...` (default: true) */\n includeBase64?: boolean;\n}\n\nexport interface ExtractedImage {\n /** The extracted URL or data URL */\n url: string;\n /** Source format where the image was found */\n source: 'markdown' | 'html' | 'base64';\n /** Surrounding context for debugging (optional) */\n context?: string;\n}\n\n/**\n * Extract all image URLs from text.\n *\n * Supports markdown, HTML, and base64 data URLs.\n * Returns deduplicated results.\n *\n * @param text - Text to extract image URLs from\n * @param options - Extraction options\n * @returns Array of extracted images with source information\n *\n * @example\n * ```typescript\n * const text = 'Check out ![portrait](avatar.png) and <img src=\"banner.jpg\">';\n * const images = extractImageUrls(text);\n * // [\n * // { url: 'avatar.png', source: 'markdown' },\n * // { url: 'banner.jpg', source: 'html' }\n * // ]\n * ```\n */\nexport function extractImageUrls(\n text: string,\n options: ImageExtractionOptions = {},\n): ExtractedImage[] {\n const {\n includeMarkdown = true,\n includeHTML = true,\n includeBase64 = true,\n } = options;\n\n const results: ExtractedImage[] = [];\n const seen = new Set<string>();\n\n // Extract markdown images: ![alt](url) or ![alt](<url>) or ![alt](url =dimensions)\n // Supports: standard, angle brackets, and SillyTavern dimension syntax\n if (includeMarkdown) {\n const markdownPattern = /!\\[([^\\]]*)\\]\\(<?([^>\\s)]+)>?(?:\\s*=[^)]+)?\\)/g;\n let match: RegExpExecArray | null;\n\n while ((match = markdownPattern.exec(text)) !== null) {\n const url = match[2]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'markdown',\n context: match[0],\n });\n }\n }\n }\n\n // Extract HTML img tags: <img src=\"url\"> (quoted and unquoted)\n if (includeHTML) {\n // Quoted src\n const htmlQuotedPattern = /<img[^>]+src=[\"']([^\"']+)[\"'][^>]*>/gi;\n let match: RegExpExecArray | null;\n\n while ((match = htmlQuotedPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n\n // Unquoted src (e.g., <img src=url>)\n const htmlUnquotedPattern = /<img[^>]+src=([^\\s\"'>]+)[^>]*>/gi;\n while ((match = htmlUnquotedPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n // Skip if it starts with a quote (already handled above)\n if (url && !url.startsWith('\"') && !url.startsWith(\"'\") && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n\n // CSS url() function: url(url), url(\"url\"), url('url')\n // Common in background-image, background, content properties\n const cssUrlPattern = /url\\([\"']?([^)\"']+)[\"']?\\)/gi;\n while ((match = cssUrlPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n // Only include http(s) URLs to avoid CSS variables and relative paths\n if (url && (url.startsWith('http://') || url.startsWith('https://')) && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n }\n\n // Extract plain image URLs (not wrapped in any syntax)\n if (includeHTML) {\n // Plain URLs with image extensions (common on image hosts)\n const plainUrlPattern = /(?<![(\"'])(https?:\\/\\/[^\\s<>\"']+\\.(?:jpg|jpeg|png|gif|webp|svg|avif|bmp))(?![)\"'])/gi;\n let match: RegExpExecArray | null;\n\n while ((match = plainUrlPattern.exec(text)) !== null) {\n const url = match[0]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: url,\n });\n }\n }\n }\n\n // Extract base64 data URLs: data:image/...;base64,...\n if (includeBase64) {\n const dataUrlPattern = /data:image\\/[^;]+;base64,[A-Za-z0-9+/=]+/g;\n let match: RegExpExecArray | null;\n\n while ((match = dataUrlPattern.exec(text)) !== null) {\n const url = match[0];\n if (!seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'base64',\n });\n }\n }\n }\n\n return results;\n}\n\n/**\n * Extract only HTTP/HTTPS URLs (excludes data URLs and relative paths).\n *\n * Convenience wrapper around extractImageUrls that filters for remote URLs.\n *\n * @param text - Text to extract URLs from\n * @returns Array of extracted HTTP/HTTPS image URLs\n */\nexport function extractRemoteImageUrls(text: string): ExtractedImage[] {\n const all = extractImageUrls(text, { includeBase64: false });\n return all.filter((img) => /^https?:\\/\\//i.test(img.url));\n}\n\n/**\n * Extract only base64 data URLs.\n *\n * Convenience wrapper around extractImageUrls.\n *\n * @param text - Text to extract data URLs from\n * @returns Array of base64 image data URLs\n */\nexport function extractDataUrls(text: string): ExtractedImage[] {\n return extractImageUrls(text, {\n includeMarkdown: false,\n includeHTML: false,\n includeBase64: true,\n });\n}\n\n/**\n * Count images in text without extracting full details.\n *\n * More efficient than extractImageUrls when you only need the count.\n *\n * @param text - Text to count images in\n * @returns Total number of unique image references\n */\nexport function countImages(text: string): number {\n return extractImageUrls(text).length;\n}\n","/**\n * SSRF (Server-Side Request Forgery) Protection\n *\n * Validates URLs to prevent SSRF attacks.\n * Browser-safe implementation (no Node.js dependencies).\n */\n\nexport interface SSRFPolicy {\n /** Allow private IP addresses (10.x, 172.16-31.x, 192.168.x) (default: false) */\n allowPrivateIPs?: boolean;\n /** Allow localhost/loopback (127.x, ::1) (default: false) */\n allowLocalhost?: boolean;\n /** Blocked domain patterns (e.g., ['internal.company.com', '*.local']) */\n blockedDomains?: string[];\n /** Allowed domain patterns - if provided, ONLY these are allowed */\n allowedDomains?: string[];\n /** Allow data URLs (data:image/...) (default: false) */\n allowDataUrls?: boolean;\n}\n\nexport interface SafetyCheck {\n /** Whether the URL is safe according to policy */\n safe: boolean;\n /** Reason why URL is unsafe (if safe=false) */\n reason?: string;\n}\n\n/**\n * Default SSRF policy - blocks private IPs, localhost, and data URLs.\n */\nexport const DEFAULT_SSRF_POLICY: Required<SSRFPolicy> = {\n allowPrivateIPs: false,\n allowLocalhost: false,\n blockedDomains: [],\n allowedDomains: [],\n allowDataUrls: false,\n};\n\n/**\n * Check if URL is safe to fetch according to SSRF policy.\n *\n * This is the canonical SSRF protection - all apps should use this\n * before fetching external URLs.\n *\n * **Limitations (browser-side validation only):**\n * - Cannot detect domains that resolve to private IPs (e.g., `attacker.com` -> `127.0.0.1`)\n * - Cannot prevent DNS rebinding attacks (domain resolves differently on subsequent requests)\n * - Cannot validate redirect destinations (server may redirect to internal URLs)\n * - Does not perform actual DNS resolution - only validates URL syntax and hostname patterns\n *\n * For server-side applications requiring stronger SSRF protection, perform DNS resolution\n * and validate the resolved IP address before making the request, and validate redirect\n * destinations or disable redirects entirely.\n *\n * @param url - URL to validate\n * @param policy - SSRF policy (uses defaults if not provided)\n * @returns Safety check result with reason if unsafe\n *\n * @see https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html\n *\n * @example\n * ```typescript\n * const check = isURLSafe('http://10.0.0.1/secret');\n * if (!check.safe) {\n * console.error('Unsafe URL:', check.reason);\n * }\n * ```\n */\nexport function isURLSafe(\n url: string,\n policy: SSRFPolicy = {},\n): SafetyCheck {\n const config = { ...DEFAULT_SSRF_POLICY, ...policy };\n\n // Parse URL\n let parsed: URL;\n try {\n parsed = new URL(url);\n } catch {\n return { safe: false, reason: 'Invalid URL format' };\n }\n\n // Check protocol\n if (parsed.protocol === 'data:') {\n if (!config.allowDataUrls) {\n return { safe: false, reason: 'Data URLs not allowed' };\n }\n return { safe: true };\n }\n\n if (!['http:', 'https:'].includes(parsed.protocol)) {\n return {\n safe: false,\n reason: `Protocol '${parsed.protocol}' not allowed (only http/https)`,\n };\n }\n\n const hostname = parsed.hostname.toLowerCase();\n\n // Check allowed domains (whitelist)\n if (config.allowedDomains.length > 0) {\n const isAllowed = config.allowedDomains.some((pattern) =>\n matchDomainPattern(hostname, pattern),\n );\n if (!isAllowed) {\n return {\n safe: false,\n reason: `Domain '${hostname}' not in allowed list`,\n };\n }\n }\n\n // Check blocked domains (blacklist)\n if (config.blockedDomains.length > 0) {\n const isBlocked = config.blockedDomains.some((pattern) =>\n matchDomainPattern(hostname, pattern),\n );\n if (isBlocked) {\n return {\n safe: false,\n reason: `Domain '${hostname}' is blocked`,\n };\n }\n }\n\n // Check localhost\n if (isLocalhost(hostname) && !config.allowLocalhost) {\n return { safe: false, reason: 'Localhost not allowed' };\n }\n\n // Check private IPs\n if (isPrivateIP(hostname) && !config.allowPrivateIPs) {\n return { safe: false, reason: 'Private IP addresses not allowed' };\n }\n\n return { safe: true };\n}\n\n/**\n * Quick check if URL is safe with default policy.\n *\n * Convenience wrapper for common case.\n *\n * @param url - URL to validate\n * @returns true if safe, false otherwise\n */\nexport function isSafeForFetch(url: string): boolean {\n return isURLSafe(url).safe;\n}\n\n/**\n * Match domain against pattern (supports wildcards).\n *\n * @example\n * ```typescript\n * matchDomainPattern('api.github.com', '*.github.com') // true\n * matchDomainPattern('github.com', '*.github.com') // false\n * matchDomainPattern('github.com', 'github.com') // true\n * ```\n */\nfunction matchDomainPattern(domain: string, pattern: string): boolean {\n // Exact match\n if (domain === pattern) return true;\n\n // Wildcard match - only matches subdomains, not the base domain\n if (pattern.startsWith('*.')) {\n const suffix = pattern.slice(2);\n return domain.endsWith('.' + suffix);\n }\n\n return false;\n}\n\n/**\n * Check if hostname is localhost/loopback.\n */\nfunction isLocalhost(hostname: string): boolean {\n const lower = hostname.toLowerCase();\n\n // IPv4 loopback\n if (lower === 'localhost' || lower === '0.0.0.0' || lower.startsWith('127.')) {\n return true;\n }\n\n // IPv6 loopback\n if (lower === '::1' || lower === '[::1]') {\n return true;\n }\n\n return false;\n}\n\n/**\n * Check if hostname is a private IP address.\n *\n * Checks for:\n * - 10.0.0.0/8\n * - 172.16.0.0/12\n * - 192.168.0.0/16\n * - Link-local: 169.254.0.0/16 (includes AWS metadata endpoint)\n * - IPv6 private ranges\n */\nfunction isPrivateIP(hostname: string): boolean {\n const lower = hostname.toLowerCase();\n\n // IPv4 private ranges\n const parts = hostname.split('.').map(Number);\n\n if (\n parts.length === 4 &&\n parts.every((p) => p >= 0 && p <= 255 && !isNaN(p))\n ) {\n const [octet1, octet2] = parts;\n\n // 10.0.0.0/8\n if (octet1 === 10) return true;\n\n // 172.16.0.0/12\n if (octet1 === 172 && octet2 !== undefined && octet2 >= 16 && octet2 <= 31)\n return true;\n\n // 192.168.0.0/16\n if (octet1 === 192 && octet2 === 168) return true;\n\n // 169.254.0.0/16 (link-local) - includes AWS metadata endpoint\n if (octet1 === 169 && octet2 === 254) return true;\n }\n\n // IPv6 private ranges using regex patterns\n // Strip brackets if present (URL parser adds them: [fc00::1])\n const cleanedHostname = lower.replace(/^\\[|\\]$/g, '');\n\n const ipv6Patterns = [\n /^f[cd][0-9a-f]{2}:/i, // fc00::/7 (includes both fc and fd ranges)\n /^fe80:/i, // fe80::/10 (link-local)\n ];\n\n for (const pattern of ipv6Patterns) {\n if (pattern.test(cleanedHostname)) {\n return true;\n }\n }\n\n return false;\n}\n\n/**\n * Filter array of URLs, keeping only safe ones.\n *\n * @param urls - Array of URLs to filter\n * @param policy - SSRF policy\n * @returns Array of safe URLs\n */\nexport function filterSafeUrls(\n urls: string[],\n policy?: SSRFPolicy,\n): string[] {\n return urls.filter((url) => isURLSafe(url, policy).safe);\n}\n"],"mappings":";AA6CO,SAAS,iBACd,MACA,UAAkC,CAAC,GACjB;AAClB,QAAM;IACJ,kBAAkB;IAClB,cAAc;IACd,gBAAgB;EAClB,IAAI;AAEJ,QAAM,UAA4B,CAAC;AACnC,QAAM,OAAO,oBAAI,IAAY;AAI7B,MAAI,iBAAiB;AACnB,UAAM,kBAAkB;AACxB,QAAI;AAEJ,YAAQ,QAAQ,gBAAgB,KAAK,IAAI,OAAO,MAAM;AACpD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;EACF;AAGA,MAAI,aAAa;AAEf,UAAM,oBAAoB;AAC1B,QAAI;AAEJ,YAAQ,QAAQ,kBAAkB,KAAK,IAAI,OAAO,MAAM;AACtD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;AAGA,UAAM,sBAAsB;AAC5B,YAAQ,QAAQ,oBAAoB,KAAK,IAAI,OAAO,MAAM;AACxD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAE3B,UAAI,OAAO,CAAC,IAAI,WAAW,GAAG,KAAK,CAAC,IAAI,WAAW,GAAG,KAAK,CAAC,KAAK,IAAI,GAAG,GAAG;AACzE,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;AAIA,UAAM,gBAAgB;AACtB,YAAQ,QAAQ,cAAc,KAAK,IAAI,OAAO,MAAM;AAClD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAE3B,UAAI,QAAQ,IAAI,WAAW,SAAS,KAAK,IAAI,WAAW,UAAU,MAAM,CAAC,KAAK,IAAI,GAAG,GAAG;AACtF,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;EACF;AAGA,MAAI,aAAa;AAEf,UAAM,kBAAkB;AACxB,QAAI;AAEJ,YAAQ,QAAQ,gBAAgB,KAAK,IAAI,OAAO,MAAM;AACpD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS;QACX,CAAC;MACH;IACF;EACF;AAGA,MAAI,eAAe;AACjB,UAAM,iBAAiB;AACvB,QAAI;AAEJ,YAAQ,QAAQ,eAAe,KAAK,IAAI,OAAO,MAAM;AACnD,YAAM,MAAM,MAAM,CAAC;AACnB,UAAI,CAAC,KAAK,IAAI,GAAG,GAAG;AAClB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;QACV,CAAC;MACH;IACF;EACF;AAEA,SAAO;AACT;AAUO,SAAS,uBAAuB,MAAgC;AACrE,QAAM,MAAM,iBAAiB,MAAM,EAAE,eAAe,MAAM,CAAC;AAC3D,SAAO,IAAI,OAAO,CAAC,QAAQ,gBAAgB,KAAK,IAAI,GAAG,CAAC;AAC1D;AAUO,SAAS,gBAAgB,MAAgC;AAC9D,SAAO,iBAAiB,MAAM;IAC5B,iBAAiB;IACjB,aAAa;IACb,eAAe;EACjB,CAAC;AACH;AAUO,SAAS,YAAY,MAAsB;AAChD,SAAO,iBAAiB,IAAI,EAAE;AAChC;AC/KO,IAAM,sBAA4C;EACvD,iBAAiB;EACjB,gBAAgB;EAChB,gBAAgB,CAAC;EACjB,gBAAgB,CAAC;EACjB,eAAe;AACjB;AAgCO,SAAS,UACd,KACA,SAAqB,CAAC,GACT;AACb,QAAM,SAAS,EAAE,GAAG,qBAAqB,GAAG,OAAO;AAGnD,MAAI;AACJ,MAAI;AACF,aAAS,IAAI,IAAI,GAAG;EACtB,QAAQ;AACN,WAAO,EAAE,MAAM,OAAO,QAAQ,qBAAqB;EACrD;AAGA,MAAI,OAAO,aAAa,SAAS;AAC/B,QAAI,CAAC,OAAO,eAAe;AACzB,aAAO,EAAE,MAAM,OAAO,QAAQ,wBAAwB;IACxD;AACA,WAAO,EAAE,MAAM,KAAK;EACtB;AAEA,MAAI,CAAC,CAAC,SAAS,QAAQ,EAAE,SAAS,OAAO,QAAQ,GAAG;AAClD,WAAO;MACL,MAAM;MACN,QAAQ,aAAa,OAAO,QAAQ;IACtC;EACF;AAEA,QAAM,WAAW,OAAO,SAAS,YAAY;AAG7C,MAAI,OAAO,eAAe,SAAS,GAAG;AACpC,UAAM,YAAY,OAAO,eAAe;MAAK,CAAC,YAC5C,mBAAmB,UAAU,OAAO;IACtC;AACA,QAAI,CAAC,WAAW;AACd,aAAO;QACL,MAAM;QACN,QAAQ,WAAW,QAAQ;MAC7B;IACF;EACF;AAGA,MAAI,OAAO,eAAe,SAAS,GAAG;AACpC,UAAM,YAAY,OAAO,eAAe;MAAK,CAAC,YAC5C,mBAAmB,UAAU,OAAO;IACtC;AACA,QAAI,WAAW;AACb,aAAO;QACL,MAAM;QACN,QAAQ,WAAW,QAAQ;MAC7B;IACF;EACF;AAGA,MAAI,YAAY,QAAQ,KAAK,CAAC,OAAO,gBAAgB;AACnD,WAAO,EAAE,MAAM,OAAO,QAAQ,wBAAwB;EACxD;AAGA,MAAI,YAAY,QAAQ,KAAK,CAAC,OAAO,iBAAiB;AACpD,WAAO,EAAE,MAAM,OAAO,QAAQ,mCAAmC;EACnE;AAEA,SAAO,EAAE,MAAM,KAAK;AACtB;AAUO,SAAS,eAAe,KAAsB;AACnD,SAAO,UAAU,GAAG,EAAE;AACxB;AAYA,SAAS,mBAAmB,QAAgB,SAA0B;AAEpE,MAAI,WAAW,QAAS,QAAO;AAG/B,MAAI,QAAQ,WAAW,IAAI,GAAG;AAC5B,UAAM,SAAS,QAAQ,MAAM,CAAC;AAC9B,WAAO,OAAO,SAAS,MAAM,MAAM;EACrC;AAEA,SAAO;AACT;AAKA,SAAS,YAAY,UAA2B;AAC9C,QAAM,QAAQ,SAAS,YAAY;AAGnC,MAAI,UAAU,eAAe,UAAU,aAAa,MAAM,WAAW,MAAM,GAAG;AAC5E,WAAO;EACT;AAGA,MAAI,UAAU,SAAS,UAAU,SAAS;AACxC,WAAO;EACT;AAEA,SAAO;AACT;AAYA,SAAS,YAAY,UAA2B;AAC9C,QAAM,QAAQ,SAAS,YAAY;AAGnC,QAAM,QAAQ,SAAS,MAAM,GAAG,EAAE,IAAI,MAAM;AAE5C,MACE,MAAM,WAAW,KACjB,MAAM,MAAM,CAAC,MAAM,KAAK,KAAK,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC,GAClD;AACA,UAAM,CAAC,QAAQ,MAAM,IAAI;AAGzB,QAAI,WAAW,GAAI,QAAO;AAG1B,QAAI,WAAW,OAAO,WAAW,UAAa,UAAU,MAAM,UAAU;AACtE,aAAO;AAGT,QAAI,WAAW,OAAO,WAAW,IAAK,QAAO;AAG7C,QAAI,WAAW,OAAO,WAAW,IAAK,QAAO;EAC/C;AAIA,QAAM,kBAAkB,MAAM,QAAQ,YAAY,EAAE;AAEpD,QAAM,eAAe;IACnB;;IACA;;EACF;AAEA,aAAW,WAAW,cAAc;AAClC,QAAI,QAAQ,KAAK,eAAe,GAAG;AACjC,aAAO;IACT;EACF;AAEA,SAAO;AACT;AASO,SAAS,eACd,MACA,QACU;AACV,SAAO,KAAK,OAAO,CAAC,QAAQ,UAAU,KAAK,MAAM,EAAE,IAAI;AACzD;","names":[]}
package/dist/index.cjs CHANGED
@@ -128,7 +128,23 @@ function streamingUnzipSync(data, limits = DEFAULT_ZIP_LIMITS) {
128
128
  if (unsafePathHandling === "warn" && limits.onUnsafePath) {
129
129
  limits.onUnsafePath(file.name, reason);
130
130
  }
131
- file.ondata = () => {
131
+ file.ondata = (err, chunk, _final) => {
132
+ if (error) return;
133
+ if (err) {
134
+ error = err;
135
+ return;
136
+ }
137
+ if (chunk && chunk.length > 0) {
138
+ totalBytes += chunk.length;
139
+ if (totalBytes > limits.maxTotalSize) {
140
+ error = new ZipPreflightError(
141
+ `Total actual size ${totalBytes} exceeds limit ${limits.maxTotalSize}`,
142
+ totalBytes,
143
+ limits.maxTotalSize
144
+ );
145
+ file.terminate();
146
+ }
147
+ }
132
148
  };
133
149
  file.start();
134
150
  return;
@@ -8531,6 +8547,43 @@ var DEFAULT_OPTIONS3 = {
8531
8547
  maxTotalSize: 500 * 1024 * 1024,
8532
8548
  extractAssets: true
8533
8549
  };
8550
+ var ASSET_PREFIX_VARIANTS = [
8551
+ { prefix: "__asset:", format: "CCv3 (SillyTavern)" },
8552
+ { prefix: "asset:", format: "CCv2/CCv3 common" },
8553
+ { prefix: "pngchunk:", format: "Explicit PNG chunk" },
8554
+ { prefix: "chara-ext-asset_:", format: "RisuAI (with colon)" },
8555
+ { prefix: "chara-ext-asset_", format: "RisuAI" },
8556
+ { prefix: "__asset_", format: "Legacy underscore variant" }
8557
+ ];
8558
+ function isChunkReference(uri) {
8559
+ return ASSET_PREFIX_VARIANTS.some(({ prefix }) => uri.startsWith(prefix)) || !uri.includes(":");
8560
+ }
8561
+ function stripAssetPrefix(uri) {
8562
+ for (const { prefix } of ASSET_PREFIX_VARIANTS) {
8563
+ if (uri.startsWith(prefix)) {
8564
+ return uri.substring(prefix.length);
8565
+ }
8566
+ }
8567
+ return uri;
8568
+ }
8569
+ function generateChunkKeyCandidates(assetId, originalUri) {
8570
+ return [
8571
+ assetId,
8572
+ // Plain ID: "0"
8573
+ originalUri,
8574
+ // Original URI: "__asset:0"
8575
+ `asset:${assetId}`,
8576
+ // Common format
8577
+ `__asset:${assetId}`,
8578
+ // CCv3 format
8579
+ `__asset_${assetId}`,
8580
+ // Legacy underscore variant
8581
+ `chara-ext-asset_${assetId}`,
8582
+ // RisuAI format
8583
+ `chara-ext-asset_:${assetId}`
8584
+ // RisuAI format with colon
8585
+ ];
8586
+ }
8534
8587
  function estimateBase64DecodedSize(base64Length) {
8535
8588
  return Math.ceil(base64Length * 0.75);
8536
8589
  }
@@ -8598,39 +8651,22 @@ function parsePng(data, options) {
8598
8651
  if (extracted.extraChunks && options.extractAssets && card.data.assets) {
8599
8652
  const usedChunks = /* @__PURE__ */ new Set();
8600
8653
  const chunkMap = /* @__PURE__ */ new Map();
8654
+ const risuIndexPrefixes = ASSET_PREFIX_VARIANTS.filter((v) => v.prefix.startsWith("chara-ext-asset_"));
8601
8655
  for (const chunk of extracted.extraChunks) {
8602
8656
  chunkMap.set(chunk.keyword, chunk);
8603
- if (chunk.keyword.startsWith("chara-ext-asset_")) {
8604
- const suffix = chunk.keyword.replace("chara-ext-asset_", "");
8605
- chunkMap.set(suffix, chunk);
8606
- if (suffix.startsWith(":")) {
8607
- chunkMap.set(suffix.substring(1), chunk);
8657
+ for (const { prefix } of risuIndexPrefixes) {
8658
+ if (chunk.keyword.startsWith(prefix)) {
8659
+ const suffix = chunk.keyword.substring(prefix.length);
8660
+ chunkMap.set(suffix, chunk);
8661
+ break;
8608
8662
  }
8609
8663
  }
8610
8664
  }
8611
8665
  for (const descriptor of card.data.assets) {
8612
8666
  if (!descriptor.uri) continue;
8613
- if (descriptor.uri.startsWith("__asset:") || descriptor.uri.startsWith("asset:") || descriptor.uri.startsWith("pngchunk:") || !descriptor.uri.includes(":")) {
8614
- let assetId = descriptor.uri;
8615
- if (assetId.startsWith("__asset:")) assetId = assetId.substring(8);
8616
- else if (assetId.startsWith("asset:")) assetId = assetId.substring(6);
8617
- else if (assetId.startsWith("pngchunk:")) assetId = assetId.substring(9);
8618
- const candidates = [
8619
- assetId,
8620
- // "0"
8621
- descriptor.uri,
8622
- // "__asset:0"
8623
- `asset:${assetId}`,
8624
- // "asset:0"
8625
- `__asset:${assetId}`,
8626
- // "__asset:0"
8627
- `__asset_${assetId}`,
8628
- // "__asset_0"
8629
- `chara-ext-asset_${assetId}`,
8630
- // "chara-ext-asset_0"
8631
- `chara-ext-asset_:${assetId}`
8632
- // "chara-ext-asset_:0"
8633
- ];
8667
+ if (isChunkReference(descriptor.uri)) {
8668
+ const assetId = stripAssetPrefix(descriptor.uri);
8669
+ const candidates = generateChunkKeyCandidates(assetId, descriptor.uri);
8634
8670
  let chunk;
8635
8671
  for (const candidate of candidates) {
8636
8672
  chunk = chunkMap.get(candidate);
@@ -8677,13 +8713,15 @@ function parsePng(data, options) {
8677
8713
  }
8678
8714
  }
8679
8715
  }
8716
+ const risuPrefixes = ASSET_PREFIX_VARIANTS.filter((v) => v.prefix.startsWith("chara-ext-asset_"));
8680
8717
  for (const chunk of extracted.extraChunks) {
8681
8718
  if (!usedChunks.has(chunk.keyword)) {
8682
8719
  let assetId = null;
8683
- if (chunk.keyword.startsWith("chara-ext-asset_:")) {
8684
- assetId = chunk.keyword.substring("chara-ext-asset_:".length);
8685
- } else if (chunk.keyword.startsWith("chara-ext-asset_")) {
8686
- assetId = chunk.keyword.substring("chara-ext-asset_".length);
8720
+ for (const { prefix } of risuPrefixes) {
8721
+ if (chunk.keyword.startsWith(prefix)) {
8722
+ assetId = chunk.keyword.substring(prefix.length);
8723
+ break;
8724
+ }
8687
8725
  }
8688
8726
  if (assetId) {
8689
8727
  try {