@character-foundry/character-foundry 0.1.9 → 0.4.0-dev.1765937896

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/dist/app-framework.cjs +291 -95
  2. package/dist/app-framework.cjs.map +1 -1
  3. package/dist/app-framework.d.cts +1 -1
  4. package/dist/app-framework.d.ts +1 -1
  5. package/dist/app-framework.js +292 -96
  6. package/dist/app-framework.js.map +1 -1
  7. package/dist/charx.cjs +44 -23
  8. package/dist/charx.cjs.map +1 -1
  9. package/dist/charx.d.cts +368 -207
  10. package/dist/charx.d.ts +368 -207
  11. package/dist/charx.js +44 -23
  12. package/dist/charx.js.map +1 -1
  13. package/dist/exporter.cjs +27 -22
  14. package/dist/exporter.cjs.map +1 -1
  15. package/dist/exporter.d.cts +368 -207
  16. package/dist/exporter.d.ts +368 -207
  17. package/dist/exporter.js +27 -22
  18. package/dist/exporter.js.map +1 -1
  19. package/dist/federation.cjs +16 -4
  20. package/dist/federation.cjs.map +1 -1
  21. package/dist/federation.d.cts +368 -207
  22. package/dist/federation.d.ts +368 -207
  23. package/dist/federation.js +16 -4
  24. package/dist/federation.js.map +1 -1
  25. package/dist/image-utils.cjs.map +1 -1
  26. package/dist/image-utils.d.cts +12 -0
  27. package/dist/image-utils.d.ts +12 -0
  28. package/dist/image-utils.js.map +1 -1
  29. package/dist/index.cjs +106 -56
  30. package/dist/index.cjs.map +1 -1
  31. package/dist/index.d.cts +708 -423
  32. package/dist/index.d.ts +708 -423
  33. package/dist/index.js +106 -56
  34. package/dist/index.js.map +1 -1
  35. package/dist/loader.cjs +106 -56
  36. package/dist/loader.cjs.map +1 -1
  37. package/dist/loader.d.cts +564 -318
  38. package/dist/loader.d.ts +564 -318
  39. package/dist/loader.js +106 -56
  40. package/dist/loader.js.map +1 -1
  41. package/dist/lorebook.cjs +5 -5
  42. package/dist/lorebook.cjs.map +1 -1
  43. package/dist/lorebook.d.cts +674 -381
  44. package/dist/lorebook.d.ts +674 -381
  45. package/dist/lorebook.js +5 -5
  46. package/dist/lorebook.js.map +1 -1
  47. package/dist/normalizer.cjs +33 -23
  48. package/dist/normalizer.cjs.map +1 -1
  49. package/dist/normalizer.d.cts +896 -560
  50. package/dist/normalizer.d.ts +896 -560
  51. package/dist/normalizer.js +33 -23
  52. package/dist/normalizer.js.map +1 -1
  53. package/dist/png.cjs +27 -22
  54. package/dist/png.cjs.map +1 -1
  55. package/dist/png.d.cts +512 -312
  56. package/dist/png.d.ts +512 -312
  57. package/dist/png.js +27 -22
  58. package/dist/png.js.map +1 -1
  59. package/dist/schemas.cjs +41 -26
  60. package/dist/schemas.cjs.map +1 -1
  61. package/dist/schemas.d.cts +1444 -896
  62. package/dist/schemas.d.ts +1444 -896
  63. package/dist/schemas.js +41 -26
  64. package/dist/schemas.js.map +1 -1
  65. package/dist/voxta.cjs +48 -25
  66. package/dist/voxta.cjs.map +1 -1
  67. package/dist/voxta.d.cts +564 -318
  68. package/dist/voxta.d.ts +564 -318
  69. package/dist/voxta.js +48 -25
  70. package/dist/voxta.js.map +1 -1
  71. package/package.json +6 -6
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/image-utils.ts","../../image-utils/src/extraction.ts","../../image-utils/src/ssrf.ts"],"sourcesContent":["export * from '@character-foundry/image-utils';\n","/**\n * Image URL Extraction\n *\n * Extracts image URLs from markdown, HTML, and base64 data URLs in text.\n * Used across Federation, Archive, and Architect for image processing.\n */\n\nexport interface ImageExtractionOptions {\n /** Include markdown image syntax `![alt](url)` (default: true) */\n includeMarkdown?: boolean;\n /** Include HTML img tags `<img src=\"url\">` (default: true) */\n includeHTML?: boolean;\n /** Include base64 data URLs `data:image/...;base64,...` (default: true) */\n includeBase64?: boolean;\n}\n\nexport interface ExtractedImage {\n /** The extracted URL or data URL */\n url: string;\n /** Source format where the image was found */\n source: 'markdown' | 'html' | 'base64';\n /** Surrounding context for debugging (optional) */\n context?: string;\n}\n\n/**\n * Extract all image URLs from text.\n *\n * Supports markdown, HTML, and base64 data URLs.\n * Returns deduplicated results.\n *\n * @param text - Text to extract image URLs from\n * @param options - Extraction options\n * @returns Array of extracted images with source information\n *\n * @example\n * ```typescript\n * const text = 'Check out ![portrait](avatar.png) and <img src=\"banner.jpg\">';\n * const images = extractImageUrls(text);\n * // [\n * // { url: 'avatar.png', source: 'markdown' },\n * // { url: 'banner.jpg', source: 'html' }\n * // ]\n * ```\n */\nexport function extractImageUrls(\n text: string,\n options: ImageExtractionOptions = {},\n): ExtractedImage[] {\n const {\n includeMarkdown = true,\n includeHTML = true,\n includeBase64 = true,\n } = options;\n\n const results: ExtractedImage[] = [];\n const seen = new Set<string>();\n\n // Extract markdown images: ![alt](url) or ![alt](<url>) or ![alt](url =dimensions)\n // Supports: standard, angle brackets, and SillyTavern dimension syntax\n if (includeMarkdown) {\n const markdownPattern = /!\\[([^\\]]*)\\]\\(<?([^>\\s)]+)>?(?:\\s*=[^)]+)?\\)/g;\n let match: RegExpExecArray | null;\n\n while ((match = markdownPattern.exec(text)) !== null) {\n const url = match[2]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'markdown',\n context: match[0],\n });\n }\n }\n }\n\n // Extract HTML img tags: <img src=\"url\"> (quoted and unquoted)\n if (includeHTML) {\n // Quoted src\n const htmlQuotedPattern = /<img[^>]+src=[\"']([^\"']+)[\"'][^>]*>/gi;\n let match: RegExpExecArray | null;\n\n while ((match = htmlQuotedPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n\n // Unquoted src (e.g., <img src=url>)\n const htmlUnquotedPattern = /<img[^>]+src=([^\\s\"'>]+)[^>]*>/gi;\n while ((match = htmlUnquotedPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n // Skip if it starts with a quote (already handled above)\n if (url && !url.startsWith('\"') && !url.startsWith(\"'\") && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n\n // CSS url() function: url(url), url(\"url\"), url('url')\n // Common in background-image, background, content properties\n const cssUrlPattern = /url\\([\"']?([^)\"']+)[\"']?\\)/gi;\n while ((match = cssUrlPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n // Only include http(s) URLs to avoid CSS variables and relative paths\n if (url && (url.startsWith('http://') || url.startsWith('https://')) && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n }\n\n // Extract plain image URLs (not wrapped in any syntax)\n if (includeHTML) {\n // Plain URLs with image extensions (common on image hosts)\n const plainUrlPattern = /(?<![(\"'])(https?:\\/\\/[^\\s<>\"']+\\.(?:jpg|jpeg|png|gif|webp|svg|avif|bmp))(?![)\"'])/gi;\n let match: RegExpExecArray | null;\n\n while ((match = plainUrlPattern.exec(text)) !== null) {\n const url = match[0]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: url,\n });\n }\n }\n }\n\n // Extract base64 data URLs: data:image/...;base64,...\n if (includeBase64) {\n const dataUrlPattern = /data:image\\/[^;]+;base64,[A-Za-z0-9+/=]+/g;\n let match: RegExpExecArray | null;\n\n while ((match = dataUrlPattern.exec(text)) !== null) {\n const url = match[0];\n if (!seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'base64',\n });\n }\n }\n }\n\n return results;\n}\n\n/**\n * Extract only HTTP/HTTPS URLs (excludes data URLs and relative paths).\n *\n * Convenience wrapper around extractImageUrls that filters for remote URLs.\n *\n * @param text - Text to extract URLs from\n * @returns Array of extracted HTTP/HTTPS image URLs\n */\nexport function extractRemoteImageUrls(text: string): ExtractedImage[] {\n const all = extractImageUrls(text, { includeBase64: false });\n return all.filter((img) => /^https?:\\/\\//i.test(img.url));\n}\n\n/**\n * Extract only base64 data URLs.\n *\n * Convenience wrapper around extractImageUrls.\n *\n * @param text - Text to extract data URLs from\n * @returns Array of base64 image data URLs\n */\nexport function extractDataUrls(text: string): ExtractedImage[] {\n return extractImageUrls(text, {\n includeMarkdown: false,\n includeHTML: false,\n includeBase64: true,\n });\n}\n\n/**\n * Count images in text without extracting full details.\n *\n * More efficient than extractImageUrls when you only need the count.\n *\n * @param text - Text to count images in\n * @returns Total number of unique image references\n */\nexport function countImages(text: string): number {\n return extractImageUrls(text).length;\n}\n","/**\n * SSRF (Server-Side Request Forgery) Protection\n *\n * Validates URLs to prevent SSRF attacks.\n * Browser-safe implementation (no Node.js dependencies).\n */\n\nexport interface SSRFPolicy {\n /** Allow private IP addresses (10.x, 172.16-31.x, 192.168.x) (default: false) */\n allowPrivateIPs?: boolean;\n /** Allow localhost/loopback (127.x, ::1) (default: false) */\n allowLocalhost?: boolean;\n /** Blocked domain patterns (e.g., ['internal.company.com', '*.local']) */\n blockedDomains?: string[];\n /** Allowed domain patterns - if provided, ONLY these are allowed */\n allowedDomains?: string[];\n /** Allow data URLs (data:image/...) (default: false) */\n allowDataUrls?: boolean;\n}\n\nexport interface SafetyCheck {\n /** Whether the URL is safe according to policy */\n safe: boolean;\n /** Reason why URL is unsafe (if safe=false) */\n reason?: string;\n}\n\n/**\n * Default SSRF policy - blocks private IPs, localhost, and data URLs.\n */\nexport const DEFAULT_SSRF_POLICY: Required<SSRFPolicy> = {\n allowPrivateIPs: false,\n allowLocalhost: false,\n blockedDomains: [],\n allowedDomains: [],\n allowDataUrls: false,\n};\n\n/**\n * Check if URL is safe to fetch according to SSRF policy.\n *\n * This is the canonical SSRF protection - all apps should use this\n * before fetching external URLs.\n *\n * @param url - URL to validate\n * @param policy - SSRF policy (uses defaults if not provided)\n * @returns Safety check result with reason if unsafe\n *\n * @example\n * ```typescript\n * const check = isURLSafe('http://10.0.0.1/secret');\n * if (!check.safe) {\n * console.error('Unsafe URL:', check.reason);\n * }\n * ```\n */\nexport function isURLSafe(\n url: string,\n policy: SSRFPolicy = {},\n): SafetyCheck {\n const config = { ...DEFAULT_SSRF_POLICY, ...policy };\n\n // Parse URL\n let parsed: URL;\n try {\n parsed = new URL(url);\n } catch {\n return { safe: false, reason: 'Invalid URL format' };\n }\n\n // Check protocol\n if (parsed.protocol === 'data:') {\n if (!config.allowDataUrls) {\n return { safe: false, reason: 'Data URLs not allowed' };\n }\n return { safe: true };\n }\n\n if (!['http:', 'https:'].includes(parsed.protocol)) {\n return {\n safe: false,\n reason: `Protocol '${parsed.protocol}' not allowed (only http/https)`,\n };\n }\n\n const hostname = parsed.hostname.toLowerCase();\n\n // Check allowed domains (whitelist)\n if (config.allowedDomains.length > 0) {\n const isAllowed = config.allowedDomains.some((pattern) =>\n matchDomainPattern(hostname, pattern),\n );\n if (!isAllowed) {\n return {\n safe: false,\n reason: `Domain '${hostname}' not in allowed list`,\n };\n }\n }\n\n // Check blocked domains (blacklist)\n if (config.blockedDomains.length > 0) {\n const isBlocked = config.blockedDomains.some((pattern) =>\n matchDomainPattern(hostname, pattern),\n );\n if (isBlocked) {\n return {\n safe: false,\n reason: `Domain '${hostname}' is blocked`,\n };\n }\n }\n\n // Check localhost\n if (isLocalhost(hostname) && !config.allowLocalhost) {\n return { safe: false, reason: 'Localhost not allowed' };\n }\n\n // Check private IPs\n if (isPrivateIP(hostname) && !config.allowPrivateIPs) {\n return { safe: false, reason: 'Private IP addresses not allowed' };\n }\n\n return { safe: true };\n}\n\n/**\n * Quick check if URL is safe with default policy.\n *\n * Convenience wrapper for common case.\n *\n * @param url - URL to validate\n * @returns true if safe, false otherwise\n */\nexport function isSafeForFetch(url: string): boolean {\n return isURLSafe(url).safe;\n}\n\n/**\n * Match domain against pattern (supports wildcards).\n *\n * @example\n * ```typescript\n * matchDomainPattern('api.github.com', '*.github.com') // true\n * matchDomainPattern('github.com', '*.github.com') // false\n * matchDomainPattern('github.com', 'github.com') // true\n * ```\n */\nfunction matchDomainPattern(domain: string, pattern: string): boolean {\n // Exact match\n if (domain === pattern) return true;\n\n // Wildcard match - only matches subdomains, not the base domain\n if (pattern.startsWith('*.')) {\n const suffix = pattern.slice(2);\n return domain.endsWith('.' + suffix);\n }\n\n return false;\n}\n\n/**\n * Check if hostname is localhost/loopback.\n */\nfunction isLocalhost(hostname: string): boolean {\n const lower = hostname.toLowerCase();\n\n // IPv4 loopback\n if (lower === 'localhost' || lower === '0.0.0.0' || lower.startsWith('127.')) {\n return true;\n }\n\n // IPv6 loopback\n if (lower === '::1' || lower === '[::1]') {\n return true;\n }\n\n return false;\n}\n\n/**\n * Check if hostname is a private IP address.\n *\n * Checks for:\n * - 10.0.0.0/8\n * - 172.16.0.0/12\n * - 192.168.0.0/16\n * - Link-local: 169.254.0.0/16 (includes AWS metadata endpoint)\n * - IPv6 private ranges\n */\nfunction isPrivateIP(hostname: string): boolean {\n const lower = hostname.toLowerCase();\n\n // IPv4 private ranges\n const parts = hostname.split('.').map(Number);\n\n if (\n parts.length === 4 &&\n parts.every((p) => p >= 0 && p <= 255 && !isNaN(p))\n ) {\n const [octet1, octet2] = parts;\n\n // 10.0.0.0/8\n if (octet1 === 10) return true;\n\n // 172.16.0.0/12\n if (octet1 === 172 && octet2 !== undefined && octet2 >= 16 && octet2 <= 31)\n return true;\n\n // 192.168.0.0/16\n if (octet1 === 192 && octet2 === 168) return true;\n\n // 169.254.0.0/16 (link-local) - includes AWS metadata endpoint\n if (octet1 === 169 && octet2 === 254) return true;\n }\n\n // IPv6 private ranges using regex patterns\n // Strip brackets if present (URL parser adds them: [fc00::1])\n const cleanedHostname = lower.replace(/^\\[|\\]$/g, '');\n\n const ipv6Patterns = [\n /^f[cd][0-9a-f]{2}:/i, // fc00::/7 (includes both fc and fd ranges)\n /^fe80:/i, // fe80::/10 (link-local)\n ];\n\n for (const pattern of ipv6Patterns) {\n if (pattern.test(cleanedHostname)) {\n return true;\n }\n }\n\n return false;\n}\n\n/**\n * Filter array of URLs, keeping only safe ones.\n *\n * @param urls - Array of URLs to filter\n * @param policy - SSRF policy\n * @returns Array of safe URLs\n */\nexport function filterSafeUrls(\n urls: string[],\n policy?: SSRFPolicy,\n): string[] {\n return urls.filter((url) => isURLSafe(url, policy).safe);\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;AC6CO,SAAS,iBACd,MACA,UAAkC,CAAC,GACjB;AAClB,QAAM;IACJ,kBAAkB;IAClB,cAAc;IACd,gBAAgB;EAClB,IAAI;AAEJ,QAAM,UAA4B,CAAC;AACnC,QAAM,OAAO,oBAAI,IAAY;AAI7B,MAAI,iBAAiB;AACnB,UAAM,kBAAkB;AACxB,QAAI;AAEJ,YAAQ,QAAQ,gBAAgB,KAAK,IAAI,OAAO,MAAM;AACpD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;EACF;AAGA,MAAI,aAAa;AAEf,UAAM,oBAAoB;AAC1B,QAAI;AAEJ,YAAQ,QAAQ,kBAAkB,KAAK,IAAI,OAAO,MAAM;AACtD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;AAGA,UAAM,sBAAsB;AAC5B,YAAQ,QAAQ,oBAAoB,KAAK,IAAI,OAAO,MAAM;AACxD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAE3B,UAAI,OAAO,CAAC,IAAI,WAAW,GAAG,KAAK,CAAC,IAAI,WAAW,GAAG,KAAK,CAAC,KAAK,IAAI,GAAG,GAAG;AACzE,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;AAIA,UAAM,gBAAgB;AACtB,YAAQ,QAAQ,cAAc,KAAK,IAAI,OAAO,MAAM;AAClD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAE3B,UAAI,QAAQ,IAAI,WAAW,SAAS,KAAK,IAAI,WAAW,UAAU,MAAM,CAAC,KAAK,IAAI,GAAG,GAAG;AACtF,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;EACF;AAGA,MAAI,aAAa;AAEf,UAAM,kBAAkB;AACxB,QAAI;AAEJ,YAAQ,QAAQ,gBAAgB,KAAK,IAAI,OAAO,MAAM;AACpD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS;QACX,CAAC;MACH;IACF;EACF;AAGA,MAAI,eAAe;AACjB,UAAM,iBAAiB;AACvB,QAAI;AAEJ,YAAQ,QAAQ,eAAe,KAAK,IAAI,OAAO,MAAM;AACnD,YAAM,MAAM,MAAM,CAAC;AACnB,UAAI,CAAC,KAAK,IAAI,GAAG,GAAG;AAClB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;QACV,CAAC;MACH;IACF;EACF;AAEA,SAAO;AACT;AAUO,SAAS,uBAAuB,MAAgC;AACrE,QAAM,MAAM,iBAAiB,MAAM,EAAE,eAAe,MAAM,CAAC;AAC3D,SAAO,IAAI,OAAO,CAAC,QAAQ,gBAAgB,KAAK,IAAI,GAAG,CAAC;AAC1D;AAUO,SAAS,gBAAgB,MAAgC;AAC9D,SAAO,iBAAiB,MAAM;IAC5B,iBAAiB;IACjB,aAAa;IACb,eAAe;EACjB,CAAC;AACH;AAUO,SAAS,YAAY,MAAsB;AAChD,SAAO,iBAAiB,IAAI,EAAE;AAChC;AC/KO,IAAM,sBAA4C;EACvD,iBAAiB;EACjB,gBAAgB;EAChB,gBAAgB,CAAC;EACjB,gBAAgB,CAAC;EACjB,eAAe;AACjB;AAoBO,SAAS,UACd,KACA,SAAqB,CAAC,GACT;AACb,QAAM,SAAS,EAAE,GAAG,qBAAqB,GAAG,OAAO;AAGnD,MAAI;AACJ,MAAI;AACF,aAAS,IAAI,IAAI,GAAG;EACtB,QAAQ;AACN,WAAO,EAAE,MAAM,OAAO,QAAQ,qBAAqB;EACrD;AAGA,MAAI,OAAO,aAAa,SAAS;AAC/B,QAAI,CAAC,OAAO,eAAe;AACzB,aAAO,EAAE,MAAM,OAAO,QAAQ,wBAAwB;IACxD;AACA,WAAO,EAAE,MAAM,KAAK;EACtB;AAEA,MAAI,CAAC,CAAC,SAAS,QAAQ,EAAE,SAAS,OAAO,QAAQ,GAAG;AAClD,WAAO;MACL,MAAM;MACN,QAAQ,aAAa,OAAO,QAAQ;IACtC;EACF;AAEA,QAAM,WAAW,OAAO,SAAS,YAAY;AAG7C,MAAI,OAAO,eAAe,SAAS,GAAG;AACpC,UAAM,YAAY,OAAO,eAAe;MAAK,CAAC,YAC5C,mBAAmB,UAAU,OAAO;IACtC;AACA,QAAI,CAAC,WAAW;AACd,aAAO;QACL,MAAM;QACN,QAAQ,WAAW,QAAQ;MAC7B;IACF;EACF;AAGA,MAAI,OAAO,eAAe,SAAS,GAAG;AACpC,UAAM,YAAY,OAAO,eAAe;MAAK,CAAC,YAC5C,mBAAmB,UAAU,OAAO;IACtC;AACA,QAAI,WAAW;AACb,aAAO;QACL,MAAM;QACN,QAAQ,WAAW,QAAQ;MAC7B;IACF;EACF;AAGA,MAAI,YAAY,QAAQ,KAAK,CAAC,OAAO,gBAAgB;AACnD,WAAO,EAAE,MAAM,OAAO,QAAQ,wBAAwB;EACxD;AAGA,MAAI,YAAY,QAAQ,KAAK,CAAC,OAAO,iBAAiB;AACpD,WAAO,EAAE,MAAM,OAAO,QAAQ,mCAAmC;EACnE;AAEA,SAAO,EAAE,MAAM,KAAK;AACtB;AAUO,SAAS,eAAe,KAAsB;AACnD,SAAO,UAAU,GAAG,EAAE;AACxB;AAYA,SAAS,mBAAmB,QAAgB,SAA0B;AAEpE,MAAI,WAAW,QAAS,QAAO;AAG/B,MAAI,QAAQ,WAAW,IAAI,GAAG;AAC5B,UAAM,SAAS,QAAQ,MAAM,CAAC;AAC9B,WAAO,OAAO,SAAS,MAAM,MAAM;EACrC;AAEA,SAAO;AACT;AAKA,SAAS,YAAY,UAA2B;AAC9C,QAAM,QAAQ,SAAS,YAAY;AAGnC,MAAI,UAAU,eAAe,UAAU,aAAa,MAAM,WAAW,MAAM,GAAG;AAC5E,WAAO;EACT;AAGA,MAAI,UAAU,SAAS,UAAU,SAAS;AACxC,WAAO;EACT;AAEA,SAAO;AACT;AAYA,SAAS,YAAY,UAA2B;AAC9C,QAAM,QAAQ,SAAS,YAAY;AAGnC,QAAM,QAAQ,SAAS,MAAM,GAAG,EAAE,IAAI,MAAM;AAE5C,MACE,MAAM,WAAW,KACjB,MAAM,MAAM,CAAC,MAAM,KAAK,KAAK,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC,GAClD;AACA,UAAM,CAAC,QAAQ,MAAM,IAAI;AAGzB,QAAI,WAAW,GAAI,QAAO;AAG1B,QAAI,WAAW,OAAO,WAAW,UAAa,UAAU,MAAM,UAAU;AACtE,aAAO;AAGT,QAAI,WAAW,OAAO,WAAW,IAAK,QAAO;AAG7C,QAAI,WAAW,OAAO,WAAW,IAAK,QAAO;EAC/C;AAIA,QAAM,kBAAkB,MAAM,QAAQ,YAAY,EAAE;AAEpD,QAAM,eAAe;IACnB;;IACA;;EACF;AAEA,aAAW,WAAW,cAAc;AAClC,QAAI,QAAQ,KAAK,eAAe,GAAG;AACjC,aAAO;IACT;EACF;AAEA,SAAO;AACT;AASO,SAAS,eACd,MACA,QACU;AACV,SAAO,KAAK,OAAO,CAAC,QAAQ,UAAU,KAAK,MAAM,EAAE,IAAI;AACzD;","names":[]}
1
+ {"version":3,"sources":["../src/image-utils.ts","../../image-utils/src/extraction.ts","../../image-utils/src/ssrf.ts"],"sourcesContent":["export * from '@character-foundry/image-utils';\n","/**\n * Image URL Extraction\n *\n * Extracts image URLs from markdown, HTML, and base64 data URLs in text.\n * Used across Federation, Archive, and Architect for image processing.\n */\n\nexport interface ImageExtractionOptions {\n /** Include markdown image syntax `![alt](url)` (default: true) */\n includeMarkdown?: boolean;\n /** Include HTML img tags `<img src=\"url\">` (default: true) */\n includeHTML?: boolean;\n /** Include base64 data URLs `data:image/...;base64,...` (default: true) */\n includeBase64?: boolean;\n}\n\nexport interface ExtractedImage {\n /** The extracted URL or data URL */\n url: string;\n /** Source format where the image was found */\n source: 'markdown' | 'html' | 'base64';\n /** Surrounding context for debugging (optional) */\n context?: string;\n}\n\n/**\n * Extract all image URLs from text.\n *\n * Supports markdown, HTML, and base64 data URLs.\n * Returns deduplicated results.\n *\n * @param text - Text to extract image URLs from\n * @param options - Extraction options\n * @returns Array of extracted images with source information\n *\n * @example\n * ```typescript\n * const text = 'Check out ![portrait](avatar.png) and <img src=\"banner.jpg\">';\n * const images = extractImageUrls(text);\n * // [\n * // { url: 'avatar.png', source: 'markdown' },\n * // { url: 'banner.jpg', source: 'html' }\n * // ]\n * ```\n */\nexport function extractImageUrls(\n text: string,\n options: ImageExtractionOptions = {},\n): ExtractedImage[] {\n const {\n includeMarkdown = true,\n includeHTML = true,\n includeBase64 = true,\n } = options;\n\n const results: ExtractedImage[] = [];\n const seen = new Set<string>();\n\n // Extract markdown images: ![alt](url) or ![alt](<url>) or ![alt](url =dimensions)\n // Supports: standard, angle brackets, and SillyTavern dimension syntax\n if (includeMarkdown) {\n const markdownPattern = /!\\[([^\\]]*)\\]\\(<?([^>\\s)]+)>?(?:\\s*=[^)]+)?\\)/g;\n let match: RegExpExecArray | null;\n\n while ((match = markdownPattern.exec(text)) !== null) {\n const url = match[2]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'markdown',\n context: match[0],\n });\n }\n }\n }\n\n // Extract HTML img tags: <img src=\"url\"> (quoted and unquoted)\n if (includeHTML) {\n // Quoted src\n const htmlQuotedPattern = /<img[^>]+src=[\"']([^\"']+)[\"'][^>]*>/gi;\n let match: RegExpExecArray | null;\n\n while ((match = htmlQuotedPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n\n // Unquoted src (e.g., <img src=url>)\n const htmlUnquotedPattern = /<img[^>]+src=([^\\s\"'>]+)[^>]*>/gi;\n while ((match = htmlUnquotedPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n // Skip if it starts with a quote (already handled above)\n if (url && !url.startsWith('\"') && !url.startsWith(\"'\") && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n\n // CSS url() function: url(url), url(\"url\"), url('url')\n // Common in background-image, background, content properties\n const cssUrlPattern = /url\\([\"']?([^)\"']+)[\"']?\\)/gi;\n while ((match = cssUrlPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n // Only include http(s) URLs to avoid CSS variables and relative paths\n if (url && (url.startsWith('http://') || url.startsWith('https://')) && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n }\n\n // Extract plain image URLs (not wrapped in any syntax)\n if (includeHTML) {\n // Plain URLs with image extensions (common on image hosts)\n const plainUrlPattern = /(?<![(\"'])(https?:\\/\\/[^\\s<>\"']+\\.(?:jpg|jpeg|png|gif|webp|svg|avif|bmp))(?![)\"'])/gi;\n let match: RegExpExecArray | null;\n\n while ((match = plainUrlPattern.exec(text)) !== null) {\n const url = match[0]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: url,\n });\n }\n }\n }\n\n // Extract base64 data URLs: data:image/...;base64,...\n if (includeBase64) {\n const dataUrlPattern = /data:image\\/[^;]+;base64,[A-Za-z0-9+/=]+/g;\n let match: RegExpExecArray | null;\n\n while ((match = dataUrlPattern.exec(text)) !== null) {\n const url = match[0];\n if (!seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'base64',\n });\n }\n }\n }\n\n return results;\n}\n\n/**\n * Extract only HTTP/HTTPS URLs (excludes data URLs and relative paths).\n *\n * Convenience wrapper around extractImageUrls that filters for remote URLs.\n *\n * @param text - Text to extract URLs from\n * @returns Array of extracted HTTP/HTTPS image URLs\n */\nexport function extractRemoteImageUrls(text: string): ExtractedImage[] {\n const all = extractImageUrls(text, { includeBase64: false });\n return all.filter((img) => /^https?:\\/\\//i.test(img.url));\n}\n\n/**\n * Extract only base64 data URLs.\n *\n * Convenience wrapper around extractImageUrls.\n *\n * @param text - Text to extract data URLs from\n * @returns Array of base64 image data URLs\n */\nexport function extractDataUrls(text: string): ExtractedImage[] {\n return extractImageUrls(text, {\n includeMarkdown: false,\n includeHTML: false,\n includeBase64: true,\n });\n}\n\n/**\n * Count images in text without extracting full details.\n *\n * More efficient than extractImageUrls when you only need the count.\n *\n * @param text - Text to count images in\n * @returns Total number of unique image references\n */\nexport function countImages(text: string): number {\n return extractImageUrls(text).length;\n}\n","/**\n * SSRF (Server-Side Request Forgery) Protection\n *\n * Validates URLs to prevent SSRF attacks.\n * Browser-safe implementation (no Node.js dependencies).\n */\n\nexport interface SSRFPolicy {\n /** Allow private IP addresses (10.x, 172.16-31.x, 192.168.x) (default: false) */\n allowPrivateIPs?: boolean;\n /** Allow localhost/loopback (127.x, ::1) (default: false) */\n allowLocalhost?: boolean;\n /** Blocked domain patterns (e.g., ['internal.company.com', '*.local']) */\n blockedDomains?: string[];\n /** Allowed domain patterns - if provided, ONLY these are allowed */\n allowedDomains?: string[];\n /** Allow data URLs (data:image/...) (default: false) */\n allowDataUrls?: boolean;\n}\n\nexport interface SafetyCheck {\n /** Whether the URL is safe according to policy */\n safe: boolean;\n /** Reason why URL is unsafe (if safe=false) */\n reason?: string;\n}\n\n/**\n * Default SSRF policy - blocks private IPs, localhost, and data URLs.\n */\nexport const DEFAULT_SSRF_POLICY: Required<SSRFPolicy> = {\n allowPrivateIPs: false,\n allowLocalhost: false,\n blockedDomains: [],\n allowedDomains: [],\n allowDataUrls: false,\n};\n\n/**\n * Check if URL is safe to fetch according to SSRF policy.\n *\n * This is the canonical SSRF protection - all apps should use this\n * before fetching external URLs.\n *\n * **Limitations (browser-side validation only):**\n * - Cannot detect domains that resolve to private IPs (e.g., `attacker.com` -> `127.0.0.1`)\n * - Cannot prevent DNS rebinding attacks (domain resolves differently on subsequent requests)\n * - Cannot validate redirect destinations (server may redirect to internal URLs)\n * - Does not perform actual DNS resolution - only validates URL syntax and hostname patterns\n *\n * For server-side applications requiring stronger SSRF protection, perform DNS resolution\n * and validate the resolved IP address before making the request, and validate redirect\n * destinations or disable redirects entirely.\n *\n * @param url - URL to validate\n * @param policy - SSRF policy (uses defaults if not provided)\n * @returns Safety check result with reason if unsafe\n *\n * @see https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html\n *\n * @example\n * ```typescript\n * const check = isURLSafe('http://10.0.0.1/secret');\n * if (!check.safe) {\n * console.error('Unsafe URL:', check.reason);\n * }\n * ```\n */\nexport function isURLSafe(\n url: string,\n policy: SSRFPolicy = {},\n): SafetyCheck {\n const config = { ...DEFAULT_SSRF_POLICY, ...policy };\n\n // Parse URL\n let parsed: URL;\n try {\n parsed = new URL(url);\n } catch {\n return { safe: false, reason: 'Invalid URL format' };\n }\n\n // Check protocol\n if (parsed.protocol === 'data:') {\n if (!config.allowDataUrls) {\n return { safe: false, reason: 'Data URLs not allowed' };\n }\n return { safe: true };\n }\n\n if (!['http:', 'https:'].includes(parsed.protocol)) {\n return {\n safe: false,\n reason: `Protocol '${parsed.protocol}' not allowed (only http/https)`,\n };\n }\n\n const hostname = parsed.hostname.toLowerCase();\n\n // Check allowed domains (whitelist)\n if (config.allowedDomains.length > 0) {\n const isAllowed = config.allowedDomains.some((pattern) =>\n matchDomainPattern(hostname, pattern),\n );\n if (!isAllowed) {\n return {\n safe: false,\n reason: `Domain '${hostname}' not in allowed list`,\n };\n }\n }\n\n // Check blocked domains (blacklist)\n if (config.blockedDomains.length > 0) {\n const isBlocked = config.blockedDomains.some((pattern) =>\n matchDomainPattern(hostname, pattern),\n );\n if (isBlocked) {\n return {\n safe: false,\n reason: `Domain '${hostname}' is blocked`,\n };\n }\n }\n\n // Check localhost\n if (isLocalhost(hostname) && !config.allowLocalhost) {\n return { safe: false, reason: 'Localhost not allowed' };\n }\n\n // Check private IPs\n if (isPrivateIP(hostname) && !config.allowPrivateIPs) {\n return { safe: false, reason: 'Private IP addresses not allowed' };\n }\n\n return { safe: true };\n}\n\n/**\n * Quick check if URL is safe with default policy.\n *\n * Convenience wrapper for common case.\n *\n * @param url - URL to validate\n * @returns true if safe, false otherwise\n */\nexport function isSafeForFetch(url: string): boolean {\n return isURLSafe(url).safe;\n}\n\n/**\n * Match domain against pattern (supports wildcards).\n *\n * @example\n * ```typescript\n * matchDomainPattern('api.github.com', '*.github.com') // true\n * matchDomainPattern('github.com', '*.github.com') // false\n * matchDomainPattern('github.com', 'github.com') // true\n * ```\n */\nfunction matchDomainPattern(domain: string, pattern: string): boolean {\n // Exact match\n if (domain === pattern) return true;\n\n // Wildcard match - only matches subdomains, not the base domain\n if (pattern.startsWith('*.')) {\n const suffix = pattern.slice(2);\n return domain.endsWith('.' + suffix);\n }\n\n return false;\n}\n\n/**\n * Check if hostname is localhost/loopback.\n */\nfunction isLocalhost(hostname: string): boolean {\n const lower = hostname.toLowerCase();\n\n // IPv4 loopback\n if (lower === 'localhost' || lower === '0.0.0.0' || lower.startsWith('127.')) {\n return true;\n }\n\n // IPv6 loopback\n if (lower === '::1' || lower === '[::1]') {\n return true;\n }\n\n return false;\n}\n\n/**\n * Check if hostname is a private IP address.\n *\n * Checks for:\n * - 10.0.0.0/8\n * - 172.16.0.0/12\n * - 192.168.0.0/16\n * - Link-local: 169.254.0.0/16 (includes AWS metadata endpoint)\n * - IPv6 private ranges\n */\nfunction isPrivateIP(hostname: string): boolean {\n const lower = hostname.toLowerCase();\n\n // IPv4 private ranges\n const parts = hostname.split('.').map(Number);\n\n if (\n parts.length === 4 &&\n parts.every((p) => p >= 0 && p <= 255 && !isNaN(p))\n ) {\n const [octet1, octet2] = parts;\n\n // 10.0.0.0/8\n if (octet1 === 10) return true;\n\n // 172.16.0.0/12\n if (octet1 === 172 && octet2 !== undefined && octet2 >= 16 && octet2 <= 31)\n return true;\n\n // 192.168.0.0/16\n if (octet1 === 192 && octet2 === 168) return true;\n\n // 169.254.0.0/16 (link-local) - includes AWS metadata endpoint\n if (octet1 === 169 && octet2 === 254) return true;\n }\n\n // IPv6 private ranges using regex patterns\n // Strip brackets if present (URL parser adds them: [fc00::1])\n const cleanedHostname = lower.replace(/^\\[|\\]$/g, '');\n\n const ipv6Patterns = [\n /^f[cd][0-9a-f]{2}:/i, // fc00::/7 (includes both fc and fd ranges)\n /^fe80:/i, // fe80::/10 (link-local)\n ];\n\n for (const pattern of ipv6Patterns) {\n if (pattern.test(cleanedHostname)) {\n return true;\n }\n }\n\n return false;\n}\n\n/**\n * Filter array of URLs, keeping only safe ones.\n *\n * @param urls - Array of URLs to filter\n * @param policy - SSRF policy\n * @returns Array of safe URLs\n */\nexport function filterSafeUrls(\n urls: string[],\n policy?: SSRFPolicy,\n): string[] {\n return urls.filter((url) => isURLSafe(url, policy).safe);\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;AC6CO,SAAS,iBACd,MACA,UAAkC,CAAC,GACjB;AAClB,QAAM;IACJ,kBAAkB;IAClB,cAAc;IACd,gBAAgB;EAClB,IAAI;AAEJ,QAAM,UAA4B,CAAC;AACnC,QAAM,OAAO,oBAAI,IAAY;AAI7B,MAAI,iBAAiB;AACnB,UAAM,kBAAkB;AACxB,QAAI;AAEJ,YAAQ,QAAQ,gBAAgB,KAAK,IAAI,OAAO,MAAM;AACpD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;EACF;AAGA,MAAI,aAAa;AAEf,UAAM,oBAAoB;AAC1B,QAAI;AAEJ,YAAQ,QAAQ,kBAAkB,KAAK,IAAI,OAAO,MAAM;AACtD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;AAGA,UAAM,sBAAsB;AAC5B,YAAQ,QAAQ,oBAAoB,KAAK,IAAI,OAAO,MAAM;AACxD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAE3B,UAAI,OAAO,CAAC,IAAI,WAAW,GAAG,KAAK,CAAC,IAAI,WAAW,GAAG,KAAK,CAAC,KAAK,IAAI,GAAG,GAAG;AACzE,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;AAIA,UAAM,gBAAgB;AACtB,YAAQ,QAAQ,cAAc,KAAK,IAAI,OAAO,MAAM;AAClD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAE3B,UAAI,QAAQ,IAAI,WAAW,SAAS,KAAK,IAAI,WAAW,UAAU,MAAM,CAAC,KAAK,IAAI,GAAG,GAAG;AACtF,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;EACF;AAGA,MAAI,aAAa;AAEf,UAAM,kBAAkB;AACxB,QAAI;AAEJ,YAAQ,QAAQ,gBAAgB,KAAK,IAAI,OAAO,MAAM;AACpD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS;QACX,CAAC;MACH;IACF;EACF;AAGA,MAAI,eAAe;AACjB,UAAM,iBAAiB;AACvB,QAAI;AAEJ,YAAQ,QAAQ,eAAe,KAAK,IAAI,OAAO,MAAM;AACnD,YAAM,MAAM,MAAM,CAAC;AACnB,UAAI,CAAC,KAAK,IAAI,GAAG,GAAG;AAClB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;QACV,CAAC;MACH;IACF;EACF;AAEA,SAAO;AACT;AAUO,SAAS,uBAAuB,MAAgC;AACrE,QAAM,MAAM,iBAAiB,MAAM,EAAE,eAAe,MAAM,CAAC;AAC3D,SAAO,IAAI,OAAO,CAAC,QAAQ,gBAAgB,KAAK,IAAI,GAAG,CAAC;AAC1D;AAUO,SAAS,gBAAgB,MAAgC;AAC9D,SAAO,iBAAiB,MAAM;IAC5B,iBAAiB;IACjB,aAAa;IACb,eAAe;EACjB,CAAC;AACH;AAUO,SAAS,YAAY,MAAsB;AAChD,SAAO,iBAAiB,IAAI,EAAE;AAChC;AC/KO,IAAM,sBAA4C;EACvD,iBAAiB;EACjB,gBAAgB;EAChB,gBAAgB,CAAC;EACjB,gBAAgB,CAAC;EACjB,eAAe;AACjB;AAgCO,SAAS,UACd,KACA,SAAqB,CAAC,GACT;AACb,QAAM,SAAS,EAAE,GAAG,qBAAqB,GAAG,OAAO;AAGnD,MAAI;AACJ,MAAI;AACF,aAAS,IAAI,IAAI,GAAG;EACtB,QAAQ;AACN,WAAO,EAAE,MAAM,OAAO,QAAQ,qBAAqB;EACrD;AAGA,MAAI,OAAO,aAAa,SAAS;AAC/B,QAAI,CAAC,OAAO,eAAe;AACzB,aAAO,EAAE,MAAM,OAAO,QAAQ,wBAAwB;IACxD;AACA,WAAO,EAAE,MAAM,KAAK;EACtB;AAEA,MAAI,CAAC,CAAC,SAAS,QAAQ,EAAE,SAAS,OAAO,QAAQ,GAAG;AAClD,WAAO;MACL,MAAM;MACN,QAAQ,aAAa,OAAO,QAAQ;IACtC;EACF;AAEA,QAAM,WAAW,OAAO,SAAS,YAAY;AAG7C,MAAI,OAAO,eAAe,SAAS,GAAG;AACpC,UAAM,YAAY,OAAO,eAAe;MAAK,CAAC,YAC5C,mBAAmB,UAAU,OAAO;IACtC;AACA,QAAI,CAAC,WAAW;AACd,aAAO;QACL,MAAM;QACN,QAAQ,WAAW,QAAQ;MAC7B;IACF;EACF;AAGA,MAAI,OAAO,eAAe,SAAS,GAAG;AACpC,UAAM,YAAY,OAAO,eAAe;MAAK,CAAC,YAC5C,mBAAmB,UAAU,OAAO;IACtC;AACA,QAAI,WAAW;AACb,aAAO;QACL,MAAM;QACN,QAAQ,WAAW,QAAQ;MAC7B;IACF;EACF;AAGA,MAAI,YAAY,QAAQ,KAAK,CAAC,OAAO,gBAAgB;AACnD,WAAO,EAAE,MAAM,OAAO,QAAQ,wBAAwB;EACxD;AAGA,MAAI,YAAY,QAAQ,KAAK,CAAC,OAAO,iBAAiB;AACpD,WAAO,EAAE,MAAM,OAAO,QAAQ,mCAAmC;EACnE;AAEA,SAAO,EAAE,MAAM,KAAK;AACtB;AAUO,SAAS,eAAe,KAAsB;AACnD,SAAO,UAAU,GAAG,EAAE;AACxB;AAYA,SAAS,mBAAmB,QAAgB,SAA0B;AAEpE,MAAI,WAAW,QAAS,QAAO;AAG/B,MAAI,QAAQ,WAAW,IAAI,GAAG;AAC5B,UAAM,SAAS,QAAQ,MAAM,CAAC;AAC9B,WAAO,OAAO,SAAS,MAAM,MAAM;EACrC;AAEA,SAAO;AACT;AAKA,SAAS,YAAY,UAA2B;AAC9C,QAAM,QAAQ,SAAS,YAAY;AAGnC,MAAI,UAAU,eAAe,UAAU,aAAa,MAAM,WAAW,MAAM,GAAG;AAC5E,WAAO;EACT;AAGA,MAAI,UAAU,SAAS,UAAU,SAAS;AACxC,WAAO;EACT;AAEA,SAAO;AACT;AAYA,SAAS,YAAY,UAA2B;AAC9C,QAAM,QAAQ,SAAS,YAAY;AAGnC,QAAM,QAAQ,SAAS,MAAM,GAAG,EAAE,IAAI,MAAM;AAE5C,MACE,MAAM,WAAW,KACjB,MAAM,MAAM,CAAC,MAAM,KAAK,KAAK,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC,GAClD;AACA,UAAM,CAAC,QAAQ,MAAM,IAAI;AAGzB,QAAI,WAAW,GAAI,QAAO;AAG1B,QAAI,WAAW,OAAO,WAAW,UAAa,UAAU,MAAM,UAAU;AACtE,aAAO;AAGT,QAAI,WAAW,OAAO,WAAW,IAAK,QAAO;AAG7C,QAAI,WAAW,OAAO,WAAW,IAAK,QAAO;EAC/C;AAIA,QAAM,kBAAkB,MAAM,QAAQ,YAAY,EAAE;AAEpD,QAAM,eAAe;IACnB;;IACA;;EACF;AAEA,aAAW,WAAW,cAAc;AAClC,QAAI,QAAQ,KAAK,eAAe,GAAG;AACjC,aAAO;IACT;EACF;AAEA,SAAO;AACT;AASO,SAAS,eACd,MACA,QACU;AACV,SAAO,KAAK,OAAO,CAAC,QAAQ,UAAU,KAAK,MAAM,EAAE,IAAI;AACzD;","names":[]}
@@ -102,10 +102,22 @@ export declare const DEFAULT_SSRF_POLICY: Required<SSRFPolicy>;
102
102
  * This is the canonical SSRF protection - all apps should use this
103
103
  * before fetching external URLs.
104
104
  *
105
+ * **Limitations (browser-side validation only):**
106
+ * - Cannot detect domains that resolve to private IPs (e.g., `attacker.com` -> `127.0.0.1`)
107
+ * - Cannot prevent DNS rebinding attacks (domain resolves differently on subsequent requests)
108
+ * - Cannot validate redirect destinations (server may redirect to internal URLs)
109
+ * - Does not perform actual DNS resolution - only validates URL syntax and hostname patterns
110
+ *
111
+ * For server-side applications requiring stronger SSRF protection, perform DNS resolution
112
+ * and validate the resolved IP address before making the request, and validate redirect
113
+ * destinations or disable redirects entirely.
114
+ *
105
115
  * @param url - URL to validate
106
116
  * @param policy - SSRF policy (uses defaults if not provided)
107
117
  * @returns Safety check result with reason if unsafe
108
118
  *
119
+ * @see https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html
120
+ *
109
121
  * @example
110
122
  * ```typescript
111
123
  * const check = isURLSafe('http://10.0.0.1/secret');
@@ -102,10 +102,22 @@ export declare const DEFAULT_SSRF_POLICY: Required<SSRFPolicy>;
102
102
  * This is the canonical SSRF protection - all apps should use this
103
103
  * before fetching external URLs.
104
104
  *
105
+ * **Limitations (browser-side validation only):**
106
+ * - Cannot detect domains that resolve to private IPs (e.g., `attacker.com` -> `127.0.0.1`)
107
+ * - Cannot prevent DNS rebinding attacks (domain resolves differently on subsequent requests)
108
+ * - Cannot validate redirect destinations (server may redirect to internal URLs)
109
+ * - Does not perform actual DNS resolution - only validates URL syntax and hostname patterns
110
+ *
111
+ * For server-side applications requiring stronger SSRF protection, perform DNS resolution
112
+ * and validate the resolved IP address before making the request, and validate redirect
113
+ * destinations or disable redirects entirely.
114
+ *
105
115
  * @param url - URL to validate
106
116
  * @param policy - SSRF policy (uses defaults if not provided)
107
117
  * @returns Safety check result with reason if unsafe
108
118
  *
119
+ * @see https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html
120
+ *
109
121
  * @example
110
122
  * ```typescript
111
123
  * const check = isURLSafe('http://10.0.0.1/secret');
@@ -1 +1 @@
1
- {"version":3,"sources":["../../image-utils/src/extraction.ts","../../image-utils/src/ssrf.ts"],"sourcesContent":["/**\n * Image URL Extraction\n *\n * Extracts image URLs from markdown, HTML, and base64 data URLs in text.\n * Used across Federation, Archive, and Architect for image processing.\n */\n\nexport interface ImageExtractionOptions {\n /** Include markdown image syntax `![alt](url)` (default: true) */\n includeMarkdown?: boolean;\n /** Include HTML img tags `<img src=\"url\">` (default: true) */\n includeHTML?: boolean;\n /** Include base64 data URLs `data:image/...;base64,...` (default: true) */\n includeBase64?: boolean;\n}\n\nexport interface ExtractedImage {\n /** The extracted URL or data URL */\n url: string;\n /** Source format where the image was found */\n source: 'markdown' | 'html' | 'base64';\n /** Surrounding context for debugging (optional) */\n context?: string;\n}\n\n/**\n * Extract all image URLs from text.\n *\n * Supports markdown, HTML, and base64 data URLs.\n * Returns deduplicated results.\n *\n * @param text - Text to extract image URLs from\n * @param options - Extraction options\n * @returns Array of extracted images with source information\n *\n * @example\n * ```typescript\n * const text = 'Check out ![portrait](avatar.png) and <img src=\"banner.jpg\">';\n * const images = extractImageUrls(text);\n * // [\n * // { url: 'avatar.png', source: 'markdown' },\n * // { url: 'banner.jpg', source: 'html' }\n * // ]\n * ```\n */\nexport function extractImageUrls(\n text: string,\n options: ImageExtractionOptions = {},\n): ExtractedImage[] {\n const {\n includeMarkdown = true,\n includeHTML = true,\n includeBase64 = true,\n } = options;\n\n const results: ExtractedImage[] = [];\n const seen = new Set<string>();\n\n // Extract markdown images: ![alt](url) or ![alt](<url>) or ![alt](url =dimensions)\n // Supports: standard, angle brackets, and SillyTavern dimension syntax\n if (includeMarkdown) {\n const markdownPattern = /!\\[([^\\]]*)\\]\\(<?([^>\\s)]+)>?(?:\\s*=[^)]+)?\\)/g;\n let match: RegExpExecArray | null;\n\n while ((match = markdownPattern.exec(text)) !== null) {\n const url = match[2]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'markdown',\n context: match[0],\n });\n }\n }\n }\n\n // Extract HTML img tags: <img src=\"url\"> (quoted and unquoted)\n if (includeHTML) {\n // Quoted src\n const htmlQuotedPattern = /<img[^>]+src=[\"']([^\"']+)[\"'][^>]*>/gi;\n let match: RegExpExecArray | null;\n\n while ((match = htmlQuotedPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n\n // Unquoted src (e.g., <img src=url>)\n const htmlUnquotedPattern = /<img[^>]+src=([^\\s\"'>]+)[^>]*>/gi;\n while ((match = htmlUnquotedPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n // Skip if it starts with a quote (already handled above)\n if (url && !url.startsWith('\"') && !url.startsWith(\"'\") && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n\n // CSS url() function: url(url), url(\"url\"), url('url')\n // Common in background-image, background, content properties\n const cssUrlPattern = /url\\([\"']?([^)\"']+)[\"']?\\)/gi;\n while ((match = cssUrlPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n // Only include http(s) URLs to avoid CSS variables and relative paths\n if (url && (url.startsWith('http://') || url.startsWith('https://')) && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n }\n\n // Extract plain image URLs (not wrapped in any syntax)\n if (includeHTML) {\n // Plain URLs with image extensions (common on image hosts)\n const plainUrlPattern = /(?<![(\"'])(https?:\\/\\/[^\\s<>\"']+\\.(?:jpg|jpeg|png|gif|webp|svg|avif|bmp))(?![)\"'])/gi;\n let match: RegExpExecArray | null;\n\n while ((match = plainUrlPattern.exec(text)) !== null) {\n const url = match[0]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: url,\n });\n }\n }\n }\n\n // Extract base64 data URLs: data:image/...;base64,...\n if (includeBase64) {\n const dataUrlPattern = /data:image\\/[^;]+;base64,[A-Za-z0-9+/=]+/g;\n let match: RegExpExecArray | null;\n\n while ((match = dataUrlPattern.exec(text)) !== null) {\n const url = match[0];\n if (!seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'base64',\n });\n }\n }\n }\n\n return results;\n}\n\n/**\n * Extract only HTTP/HTTPS URLs (excludes data URLs and relative paths).\n *\n * Convenience wrapper around extractImageUrls that filters for remote URLs.\n *\n * @param text - Text to extract URLs from\n * @returns Array of extracted HTTP/HTTPS image URLs\n */\nexport function extractRemoteImageUrls(text: string): ExtractedImage[] {\n const all = extractImageUrls(text, { includeBase64: false });\n return all.filter((img) => /^https?:\\/\\//i.test(img.url));\n}\n\n/**\n * Extract only base64 data URLs.\n *\n * Convenience wrapper around extractImageUrls.\n *\n * @param text - Text to extract data URLs from\n * @returns Array of base64 image data URLs\n */\nexport function extractDataUrls(text: string): ExtractedImage[] {\n return extractImageUrls(text, {\n includeMarkdown: false,\n includeHTML: false,\n includeBase64: true,\n });\n}\n\n/**\n * Count images in text without extracting full details.\n *\n * More efficient than extractImageUrls when you only need the count.\n *\n * @param text - Text to count images in\n * @returns Total number of unique image references\n */\nexport function countImages(text: string): number {\n return extractImageUrls(text).length;\n}\n","/**\n * SSRF (Server-Side Request Forgery) Protection\n *\n * Validates URLs to prevent SSRF attacks.\n * Browser-safe implementation (no Node.js dependencies).\n */\n\nexport interface SSRFPolicy {\n /** Allow private IP addresses (10.x, 172.16-31.x, 192.168.x) (default: false) */\n allowPrivateIPs?: boolean;\n /** Allow localhost/loopback (127.x, ::1) (default: false) */\n allowLocalhost?: boolean;\n /** Blocked domain patterns (e.g., ['internal.company.com', '*.local']) */\n blockedDomains?: string[];\n /** Allowed domain patterns - if provided, ONLY these are allowed */\n allowedDomains?: string[];\n /** Allow data URLs (data:image/...) (default: false) */\n allowDataUrls?: boolean;\n}\n\nexport interface SafetyCheck {\n /** Whether the URL is safe according to policy */\n safe: boolean;\n /** Reason why URL is unsafe (if safe=false) */\n reason?: string;\n}\n\n/**\n * Default SSRF policy - blocks private IPs, localhost, and data URLs.\n */\nexport const DEFAULT_SSRF_POLICY: Required<SSRFPolicy> = {\n allowPrivateIPs: false,\n allowLocalhost: false,\n blockedDomains: [],\n allowedDomains: [],\n allowDataUrls: false,\n};\n\n/**\n * Check if URL is safe to fetch according to SSRF policy.\n *\n * This is the canonical SSRF protection - all apps should use this\n * before fetching external URLs.\n *\n * @param url - URL to validate\n * @param policy - SSRF policy (uses defaults if not provided)\n * @returns Safety check result with reason if unsafe\n *\n * @example\n * ```typescript\n * const check = isURLSafe('http://10.0.0.1/secret');\n * if (!check.safe) {\n * console.error('Unsafe URL:', check.reason);\n * }\n * ```\n */\nexport function isURLSafe(\n url: string,\n policy: SSRFPolicy = {},\n): SafetyCheck {\n const config = { ...DEFAULT_SSRF_POLICY, ...policy };\n\n // Parse URL\n let parsed: URL;\n try {\n parsed = new URL(url);\n } catch {\n return { safe: false, reason: 'Invalid URL format' };\n }\n\n // Check protocol\n if (parsed.protocol === 'data:') {\n if (!config.allowDataUrls) {\n return { safe: false, reason: 'Data URLs not allowed' };\n }\n return { safe: true };\n }\n\n if (!['http:', 'https:'].includes(parsed.protocol)) {\n return {\n safe: false,\n reason: `Protocol '${parsed.protocol}' not allowed (only http/https)`,\n };\n }\n\n const hostname = parsed.hostname.toLowerCase();\n\n // Check allowed domains (whitelist)\n if (config.allowedDomains.length > 0) {\n const isAllowed = config.allowedDomains.some((pattern) =>\n matchDomainPattern(hostname, pattern),\n );\n if (!isAllowed) {\n return {\n safe: false,\n reason: `Domain '${hostname}' not in allowed list`,\n };\n }\n }\n\n // Check blocked domains (blacklist)\n if (config.blockedDomains.length > 0) {\n const isBlocked = config.blockedDomains.some((pattern) =>\n matchDomainPattern(hostname, pattern),\n );\n if (isBlocked) {\n return {\n safe: false,\n reason: `Domain '${hostname}' is blocked`,\n };\n }\n }\n\n // Check localhost\n if (isLocalhost(hostname) && !config.allowLocalhost) {\n return { safe: false, reason: 'Localhost not allowed' };\n }\n\n // Check private IPs\n if (isPrivateIP(hostname) && !config.allowPrivateIPs) {\n return { safe: false, reason: 'Private IP addresses not allowed' };\n }\n\n return { safe: true };\n}\n\n/**\n * Quick check if URL is safe with default policy.\n *\n * Convenience wrapper for common case.\n *\n * @param url - URL to validate\n * @returns true if safe, false otherwise\n */\nexport function isSafeForFetch(url: string): boolean {\n return isURLSafe(url).safe;\n}\n\n/**\n * Match domain against pattern (supports wildcards).\n *\n * @example\n * ```typescript\n * matchDomainPattern('api.github.com', '*.github.com') // true\n * matchDomainPattern('github.com', '*.github.com') // false\n * matchDomainPattern('github.com', 'github.com') // true\n * ```\n */\nfunction matchDomainPattern(domain: string, pattern: string): boolean {\n // Exact match\n if (domain === pattern) return true;\n\n // Wildcard match - only matches subdomains, not the base domain\n if (pattern.startsWith('*.')) {\n const suffix = pattern.slice(2);\n return domain.endsWith('.' + suffix);\n }\n\n return false;\n}\n\n/**\n * Check if hostname is localhost/loopback.\n */\nfunction isLocalhost(hostname: string): boolean {\n const lower = hostname.toLowerCase();\n\n // IPv4 loopback\n if (lower === 'localhost' || lower === '0.0.0.0' || lower.startsWith('127.')) {\n return true;\n }\n\n // IPv6 loopback\n if (lower === '::1' || lower === '[::1]') {\n return true;\n }\n\n return false;\n}\n\n/**\n * Check if hostname is a private IP address.\n *\n * Checks for:\n * - 10.0.0.0/8\n * - 172.16.0.0/12\n * - 192.168.0.0/16\n * - Link-local: 169.254.0.0/16 (includes AWS metadata endpoint)\n * - IPv6 private ranges\n */\nfunction isPrivateIP(hostname: string): boolean {\n const lower = hostname.toLowerCase();\n\n // IPv4 private ranges\n const parts = hostname.split('.').map(Number);\n\n if (\n parts.length === 4 &&\n parts.every((p) => p >= 0 && p <= 255 && !isNaN(p))\n ) {\n const [octet1, octet2] = parts;\n\n // 10.0.0.0/8\n if (octet1 === 10) return true;\n\n // 172.16.0.0/12\n if (octet1 === 172 && octet2 !== undefined && octet2 >= 16 && octet2 <= 31)\n return true;\n\n // 192.168.0.0/16\n if (octet1 === 192 && octet2 === 168) return true;\n\n // 169.254.0.0/16 (link-local) - includes AWS metadata endpoint\n if (octet1 === 169 && octet2 === 254) return true;\n }\n\n // IPv6 private ranges using regex patterns\n // Strip brackets if present (URL parser adds them: [fc00::1])\n const cleanedHostname = lower.replace(/^\\[|\\]$/g, '');\n\n const ipv6Patterns = [\n /^f[cd][0-9a-f]{2}:/i, // fc00::/7 (includes both fc and fd ranges)\n /^fe80:/i, // fe80::/10 (link-local)\n ];\n\n for (const pattern of ipv6Patterns) {\n if (pattern.test(cleanedHostname)) {\n return true;\n }\n }\n\n return false;\n}\n\n/**\n * Filter array of URLs, keeping only safe ones.\n *\n * @param urls - Array of URLs to filter\n * @param policy - SSRF policy\n * @returns Array of safe URLs\n */\nexport function filterSafeUrls(\n urls: string[],\n policy?: SSRFPolicy,\n): string[] {\n return urls.filter((url) => isURLSafe(url, policy).safe);\n}\n"],"mappings":";AA6CO,SAAS,iBACd,MACA,UAAkC,CAAC,GACjB;AAClB,QAAM;IACJ,kBAAkB;IAClB,cAAc;IACd,gBAAgB;EAClB,IAAI;AAEJ,QAAM,UAA4B,CAAC;AACnC,QAAM,OAAO,oBAAI,IAAY;AAI7B,MAAI,iBAAiB;AACnB,UAAM,kBAAkB;AACxB,QAAI;AAEJ,YAAQ,QAAQ,gBAAgB,KAAK,IAAI,OAAO,MAAM;AACpD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;EACF;AAGA,MAAI,aAAa;AAEf,UAAM,oBAAoB;AAC1B,QAAI;AAEJ,YAAQ,QAAQ,kBAAkB,KAAK,IAAI,OAAO,MAAM;AACtD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;AAGA,UAAM,sBAAsB;AAC5B,YAAQ,QAAQ,oBAAoB,KAAK,IAAI,OAAO,MAAM;AACxD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAE3B,UAAI,OAAO,CAAC,IAAI,WAAW,GAAG,KAAK,CAAC,IAAI,WAAW,GAAG,KAAK,CAAC,KAAK,IAAI,GAAG,GAAG;AACzE,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;AAIA,UAAM,gBAAgB;AACtB,YAAQ,QAAQ,cAAc,KAAK,IAAI,OAAO,MAAM;AAClD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAE3B,UAAI,QAAQ,IAAI,WAAW,SAAS,KAAK,IAAI,WAAW,UAAU,MAAM,CAAC,KAAK,IAAI,GAAG,GAAG;AACtF,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;EACF;AAGA,MAAI,aAAa;AAEf,UAAM,kBAAkB;AACxB,QAAI;AAEJ,YAAQ,QAAQ,gBAAgB,KAAK,IAAI,OAAO,MAAM;AACpD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS;QACX,CAAC;MACH;IACF;EACF;AAGA,MAAI,eAAe;AACjB,UAAM,iBAAiB;AACvB,QAAI;AAEJ,YAAQ,QAAQ,eAAe,KAAK,IAAI,OAAO,MAAM;AACnD,YAAM,MAAM,MAAM,CAAC;AACnB,UAAI,CAAC,KAAK,IAAI,GAAG,GAAG;AAClB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;QACV,CAAC;MACH;IACF;EACF;AAEA,SAAO;AACT;AAUO,SAAS,uBAAuB,MAAgC;AACrE,QAAM,MAAM,iBAAiB,MAAM,EAAE,eAAe,MAAM,CAAC;AAC3D,SAAO,IAAI,OAAO,CAAC,QAAQ,gBAAgB,KAAK,IAAI,GAAG,CAAC;AAC1D;AAUO,SAAS,gBAAgB,MAAgC;AAC9D,SAAO,iBAAiB,MAAM;IAC5B,iBAAiB;IACjB,aAAa;IACb,eAAe;EACjB,CAAC;AACH;AAUO,SAAS,YAAY,MAAsB;AAChD,SAAO,iBAAiB,IAAI,EAAE;AAChC;AC/KO,IAAM,sBAA4C;EACvD,iBAAiB;EACjB,gBAAgB;EAChB,gBAAgB,CAAC;EACjB,gBAAgB,CAAC;EACjB,eAAe;AACjB;AAoBO,SAAS,UACd,KACA,SAAqB,CAAC,GACT;AACb,QAAM,SAAS,EAAE,GAAG,qBAAqB,GAAG,OAAO;AAGnD,MAAI;AACJ,MAAI;AACF,aAAS,IAAI,IAAI,GAAG;EACtB,QAAQ;AACN,WAAO,EAAE,MAAM,OAAO,QAAQ,qBAAqB;EACrD;AAGA,MAAI,OAAO,aAAa,SAAS;AAC/B,QAAI,CAAC,OAAO,eAAe;AACzB,aAAO,EAAE,MAAM,OAAO,QAAQ,wBAAwB;IACxD;AACA,WAAO,EAAE,MAAM,KAAK;EACtB;AAEA,MAAI,CAAC,CAAC,SAAS,QAAQ,EAAE,SAAS,OAAO,QAAQ,GAAG;AAClD,WAAO;MACL,MAAM;MACN,QAAQ,aAAa,OAAO,QAAQ;IACtC;EACF;AAEA,QAAM,WAAW,OAAO,SAAS,YAAY;AAG7C,MAAI,OAAO,eAAe,SAAS,GAAG;AACpC,UAAM,YAAY,OAAO,eAAe;MAAK,CAAC,YAC5C,mBAAmB,UAAU,OAAO;IACtC;AACA,QAAI,CAAC,WAAW;AACd,aAAO;QACL,MAAM;QACN,QAAQ,WAAW,QAAQ;MAC7B;IACF;EACF;AAGA,MAAI,OAAO,eAAe,SAAS,GAAG;AACpC,UAAM,YAAY,OAAO,eAAe;MAAK,CAAC,YAC5C,mBAAmB,UAAU,OAAO;IACtC;AACA,QAAI,WAAW;AACb,aAAO;QACL,MAAM;QACN,QAAQ,WAAW,QAAQ;MAC7B;IACF;EACF;AAGA,MAAI,YAAY,QAAQ,KAAK,CAAC,OAAO,gBAAgB;AACnD,WAAO,EAAE,MAAM,OAAO,QAAQ,wBAAwB;EACxD;AAGA,MAAI,YAAY,QAAQ,KAAK,CAAC,OAAO,iBAAiB;AACpD,WAAO,EAAE,MAAM,OAAO,QAAQ,mCAAmC;EACnE;AAEA,SAAO,EAAE,MAAM,KAAK;AACtB;AAUO,SAAS,eAAe,KAAsB;AACnD,SAAO,UAAU,GAAG,EAAE;AACxB;AAYA,SAAS,mBAAmB,QAAgB,SAA0B;AAEpE,MAAI,WAAW,QAAS,QAAO;AAG/B,MAAI,QAAQ,WAAW,IAAI,GAAG;AAC5B,UAAM,SAAS,QAAQ,MAAM,CAAC;AAC9B,WAAO,OAAO,SAAS,MAAM,MAAM;EACrC;AAEA,SAAO;AACT;AAKA,SAAS,YAAY,UAA2B;AAC9C,QAAM,QAAQ,SAAS,YAAY;AAGnC,MAAI,UAAU,eAAe,UAAU,aAAa,MAAM,WAAW,MAAM,GAAG;AAC5E,WAAO;EACT;AAGA,MAAI,UAAU,SAAS,UAAU,SAAS;AACxC,WAAO;EACT;AAEA,SAAO;AACT;AAYA,SAAS,YAAY,UAA2B;AAC9C,QAAM,QAAQ,SAAS,YAAY;AAGnC,QAAM,QAAQ,SAAS,MAAM,GAAG,EAAE,IAAI,MAAM;AAE5C,MACE,MAAM,WAAW,KACjB,MAAM,MAAM,CAAC,MAAM,KAAK,KAAK,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC,GAClD;AACA,UAAM,CAAC,QAAQ,MAAM,IAAI;AAGzB,QAAI,WAAW,GAAI,QAAO;AAG1B,QAAI,WAAW,OAAO,WAAW,UAAa,UAAU,MAAM,UAAU;AACtE,aAAO;AAGT,QAAI,WAAW,OAAO,WAAW,IAAK,QAAO;AAG7C,QAAI,WAAW,OAAO,WAAW,IAAK,QAAO;EAC/C;AAIA,QAAM,kBAAkB,MAAM,QAAQ,YAAY,EAAE;AAEpD,QAAM,eAAe;IACnB;;IACA;;EACF;AAEA,aAAW,WAAW,cAAc;AAClC,QAAI,QAAQ,KAAK,eAAe,GAAG;AACjC,aAAO;IACT;EACF;AAEA,SAAO;AACT;AASO,SAAS,eACd,MACA,QACU;AACV,SAAO,KAAK,OAAO,CAAC,QAAQ,UAAU,KAAK,MAAM,EAAE,IAAI;AACzD;","names":[]}
1
+ {"version":3,"sources":["../../image-utils/src/extraction.ts","../../image-utils/src/ssrf.ts"],"sourcesContent":["/**\n * Image URL Extraction\n *\n * Extracts image URLs from markdown, HTML, and base64 data URLs in text.\n * Used across Federation, Archive, and Architect for image processing.\n */\n\nexport interface ImageExtractionOptions {\n /** Include markdown image syntax `![alt](url)` (default: true) */\n includeMarkdown?: boolean;\n /** Include HTML img tags `<img src=\"url\">` (default: true) */\n includeHTML?: boolean;\n /** Include base64 data URLs `data:image/...;base64,...` (default: true) */\n includeBase64?: boolean;\n}\n\nexport interface ExtractedImage {\n /** The extracted URL or data URL */\n url: string;\n /** Source format where the image was found */\n source: 'markdown' | 'html' | 'base64';\n /** Surrounding context for debugging (optional) */\n context?: string;\n}\n\n/**\n * Extract all image URLs from text.\n *\n * Supports markdown, HTML, and base64 data URLs.\n * Returns deduplicated results.\n *\n * @param text - Text to extract image URLs from\n * @param options - Extraction options\n * @returns Array of extracted images with source information\n *\n * @example\n * ```typescript\n * const text = 'Check out ![portrait](avatar.png) and <img src=\"banner.jpg\">';\n * const images = extractImageUrls(text);\n * // [\n * // { url: 'avatar.png', source: 'markdown' },\n * // { url: 'banner.jpg', source: 'html' }\n * // ]\n * ```\n */\nexport function extractImageUrls(\n text: string,\n options: ImageExtractionOptions = {},\n): ExtractedImage[] {\n const {\n includeMarkdown = true,\n includeHTML = true,\n includeBase64 = true,\n } = options;\n\n const results: ExtractedImage[] = [];\n const seen = new Set<string>();\n\n // Extract markdown images: ![alt](url) or ![alt](<url>) or ![alt](url =dimensions)\n // Supports: standard, angle brackets, and SillyTavern dimension syntax\n if (includeMarkdown) {\n const markdownPattern = /!\\[([^\\]]*)\\]\\(<?([^>\\s)]+)>?(?:\\s*=[^)]+)?\\)/g;\n let match: RegExpExecArray | null;\n\n while ((match = markdownPattern.exec(text)) !== null) {\n const url = match[2]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'markdown',\n context: match[0],\n });\n }\n }\n }\n\n // Extract HTML img tags: <img src=\"url\"> (quoted and unquoted)\n if (includeHTML) {\n // Quoted src\n const htmlQuotedPattern = /<img[^>]+src=[\"']([^\"']+)[\"'][^>]*>/gi;\n let match: RegExpExecArray | null;\n\n while ((match = htmlQuotedPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n\n // Unquoted src (e.g., <img src=url>)\n const htmlUnquotedPattern = /<img[^>]+src=([^\\s\"'>]+)[^>]*>/gi;\n while ((match = htmlUnquotedPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n // Skip if it starts with a quote (already handled above)\n if (url && !url.startsWith('\"') && !url.startsWith(\"'\") && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n\n // CSS url() function: url(url), url(\"url\"), url('url')\n // Common in background-image, background, content properties\n const cssUrlPattern = /url\\([\"']?([^)\"']+)[\"']?\\)/gi;\n while ((match = cssUrlPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n // Only include http(s) URLs to avoid CSS variables and relative paths\n if (url && (url.startsWith('http://') || url.startsWith('https://')) && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n }\n\n // Extract plain image URLs (not wrapped in any syntax)\n if (includeHTML) {\n // Plain URLs with image extensions (common on image hosts)\n const plainUrlPattern = /(?<![(\"'])(https?:\\/\\/[^\\s<>\"']+\\.(?:jpg|jpeg|png|gif|webp|svg|avif|bmp))(?![)\"'])/gi;\n let match: RegExpExecArray | null;\n\n while ((match = plainUrlPattern.exec(text)) !== null) {\n const url = match[0]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: url,\n });\n }\n }\n }\n\n // Extract base64 data URLs: data:image/...;base64,...\n if (includeBase64) {\n const dataUrlPattern = /data:image\\/[^;]+;base64,[A-Za-z0-9+/=]+/g;\n let match: RegExpExecArray | null;\n\n while ((match = dataUrlPattern.exec(text)) !== null) {\n const url = match[0];\n if (!seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'base64',\n });\n }\n }\n }\n\n return results;\n}\n\n/**\n * Extract only HTTP/HTTPS URLs (excludes data URLs and relative paths).\n *\n * Convenience wrapper around extractImageUrls that filters for remote URLs.\n *\n * @param text - Text to extract URLs from\n * @returns Array of extracted HTTP/HTTPS image URLs\n */\nexport function extractRemoteImageUrls(text: string): ExtractedImage[] {\n const all = extractImageUrls(text, { includeBase64: false });\n return all.filter((img) => /^https?:\\/\\//i.test(img.url));\n}\n\n/**\n * Extract only base64 data URLs.\n *\n * Convenience wrapper around extractImageUrls.\n *\n * @param text - Text to extract data URLs from\n * @returns Array of base64 image data URLs\n */\nexport function extractDataUrls(text: string): ExtractedImage[] {\n return extractImageUrls(text, {\n includeMarkdown: false,\n includeHTML: false,\n includeBase64: true,\n });\n}\n\n/**\n * Count images in text without extracting full details.\n *\n * More efficient than extractImageUrls when you only need the count.\n *\n * @param text - Text to count images in\n * @returns Total number of unique image references\n */\nexport function countImages(text: string): number {\n return extractImageUrls(text).length;\n}\n","/**\n * SSRF (Server-Side Request Forgery) Protection\n *\n * Validates URLs to prevent SSRF attacks.\n * Browser-safe implementation (no Node.js dependencies).\n */\n\nexport interface SSRFPolicy {\n /** Allow private IP addresses (10.x, 172.16-31.x, 192.168.x) (default: false) */\n allowPrivateIPs?: boolean;\n /** Allow localhost/loopback (127.x, ::1) (default: false) */\n allowLocalhost?: boolean;\n /** Blocked domain patterns (e.g., ['internal.company.com', '*.local']) */\n blockedDomains?: string[];\n /** Allowed domain patterns - if provided, ONLY these are allowed */\n allowedDomains?: string[];\n /** Allow data URLs (data:image/...) (default: false) */\n allowDataUrls?: boolean;\n}\n\nexport interface SafetyCheck {\n /** Whether the URL is safe according to policy */\n safe: boolean;\n /** Reason why URL is unsafe (if safe=false) */\n reason?: string;\n}\n\n/**\n * Default SSRF policy - blocks private IPs, localhost, and data URLs.\n */\nexport const DEFAULT_SSRF_POLICY: Required<SSRFPolicy> = {\n allowPrivateIPs: false,\n allowLocalhost: false,\n blockedDomains: [],\n allowedDomains: [],\n allowDataUrls: false,\n};\n\n/**\n * Check if URL is safe to fetch according to SSRF policy.\n *\n * This is the canonical SSRF protection - all apps should use this\n * before fetching external URLs.\n *\n * **Limitations (browser-side validation only):**\n * - Cannot detect domains that resolve to private IPs (e.g., `attacker.com` -> `127.0.0.1`)\n * - Cannot prevent DNS rebinding attacks (domain resolves differently on subsequent requests)\n * - Cannot validate redirect destinations (server may redirect to internal URLs)\n * - Does not perform actual DNS resolution - only validates URL syntax and hostname patterns\n *\n * For server-side applications requiring stronger SSRF protection, perform DNS resolution\n * and validate the resolved IP address before making the request, and validate redirect\n * destinations or disable redirects entirely.\n *\n * @param url - URL to validate\n * @param policy - SSRF policy (uses defaults if not provided)\n * @returns Safety check result with reason if unsafe\n *\n * @see https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html\n *\n * @example\n * ```typescript\n * const check = isURLSafe('http://10.0.0.1/secret');\n * if (!check.safe) {\n * console.error('Unsafe URL:', check.reason);\n * }\n * ```\n */\nexport function isURLSafe(\n url: string,\n policy: SSRFPolicy = {},\n): SafetyCheck {\n const config = { ...DEFAULT_SSRF_POLICY, ...policy };\n\n // Parse URL\n let parsed: URL;\n try {\n parsed = new URL(url);\n } catch {\n return { safe: false, reason: 'Invalid URL format' };\n }\n\n // Check protocol\n if (parsed.protocol === 'data:') {\n if (!config.allowDataUrls) {\n return { safe: false, reason: 'Data URLs not allowed' };\n }\n return { safe: true };\n }\n\n if (!['http:', 'https:'].includes(parsed.protocol)) {\n return {\n safe: false,\n reason: `Protocol '${parsed.protocol}' not allowed (only http/https)`,\n };\n }\n\n const hostname = parsed.hostname.toLowerCase();\n\n // Check allowed domains (whitelist)\n if (config.allowedDomains.length > 0) {\n const isAllowed = config.allowedDomains.some((pattern) =>\n matchDomainPattern(hostname, pattern),\n );\n if (!isAllowed) {\n return {\n safe: false,\n reason: `Domain '${hostname}' not in allowed list`,\n };\n }\n }\n\n // Check blocked domains (blacklist)\n if (config.blockedDomains.length > 0) {\n const isBlocked = config.blockedDomains.some((pattern) =>\n matchDomainPattern(hostname, pattern),\n );\n if (isBlocked) {\n return {\n safe: false,\n reason: `Domain '${hostname}' is blocked`,\n };\n }\n }\n\n // Check localhost\n if (isLocalhost(hostname) && !config.allowLocalhost) {\n return { safe: false, reason: 'Localhost not allowed' };\n }\n\n // Check private IPs\n if (isPrivateIP(hostname) && !config.allowPrivateIPs) {\n return { safe: false, reason: 'Private IP addresses not allowed' };\n }\n\n return { safe: true };\n}\n\n/**\n * Quick check if URL is safe with default policy.\n *\n * Convenience wrapper for common case.\n *\n * @param url - URL to validate\n * @returns true if safe, false otherwise\n */\nexport function isSafeForFetch(url: string): boolean {\n return isURLSafe(url).safe;\n}\n\n/**\n * Match domain against pattern (supports wildcards).\n *\n * @example\n * ```typescript\n * matchDomainPattern('api.github.com', '*.github.com') // true\n * matchDomainPattern('github.com', '*.github.com') // false\n * matchDomainPattern('github.com', 'github.com') // true\n * ```\n */\nfunction matchDomainPattern(domain: string, pattern: string): boolean {\n // Exact match\n if (domain === pattern) return true;\n\n // Wildcard match - only matches subdomains, not the base domain\n if (pattern.startsWith('*.')) {\n const suffix = pattern.slice(2);\n return domain.endsWith('.' + suffix);\n }\n\n return false;\n}\n\n/**\n * Check if hostname is localhost/loopback.\n */\nfunction isLocalhost(hostname: string): boolean {\n const lower = hostname.toLowerCase();\n\n // IPv4 loopback\n if (lower === 'localhost' || lower === '0.0.0.0' || lower.startsWith('127.')) {\n return true;\n }\n\n // IPv6 loopback\n if (lower === '::1' || lower === '[::1]') {\n return true;\n }\n\n return false;\n}\n\n/**\n * Check if hostname is a private IP address.\n *\n * Checks for:\n * - 10.0.0.0/8\n * - 172.16.0.0/12\n * - 192.168.0.0/16\n * - Link-local: 169.254.0.0/16 (includes AWS metadata endpoint)\n * - IPv6 private ranges\n */\nfunction isPrivateIP(hostname: string): boolean {\n const lower = hostname.toLowerCase();\n\n // IPv4 private ranges\n const parts = hostname.split('.').map(Number);\n\n if (\n parts.length === 4 &&\n parts.every((p) => p >= 0 && p <= 255 && !isNaN(p))\n ) {\n const [octet1, octet2] = parts;\n\n // 10.0.0.0/8\n if (octet1 === 10) return true;\n\n // 172.16.0.0/12\n if (octet1 === 172 && octet2 !== undefined && octet2 >= 16 && octet2 <= 31)\n return true;\n\n // 192.168.0.0/16\n if (octet1 === 192 && octet2 === 168) return true;\n\n // 169.254.0.0/16 (link-local) - includes AWS metadata endpoint\n if (octet1 === 169 && octet2 === 254) return true;\n }\n\n // IPv6 private ranges using regex patterns\n // Strip brackets if present (URL parser adds them: [fc00::1])\n const cleanedHostname = lower.replace(/^\\[|\\]$/g, '');\n\n const ipv6Patterns = [\n /^f[cd][0-9a-f]{2}:/i, // fc00::/7 (includes both fc and fd ranges)\n /^fe80:/i, // fe80::/10 (link-local)\n ];\n\n for (const pattern of ipv6Patterns) {\n if (pattern.test(cleanedHostname)) {\n return true;\n }\n }\n\n return false;\n}\n\n/**\n * Filter array of URLs, keeping only safe ones.\n *\n * @param urls - Array of URLs to filter\n * @param policy - SSRF policy\n * @returns Array of safe URLs\n */\nexport function filterSafeUrls(\n urls: string[],\n policy?: SSRFPolicy,\n): string[] {\n return urls.filter((url) => isURLSafe(url, policy).safe);\n}\n"],"mappings":";AA6CO,SAAS,iBACd,MACA,UAAkC,CAAC,GACjB;AAClB,QAAM;IACJ,kBAAkB;IAClB,cAAc;IACd,gBAAgB;EAClB,IAAI;AAEJ,QAAM,UAA4B,CAAC;AACnC,QAAM,OAAO,oBAAI,IAAY;AAI7B,MAAI,iBAAiB;AACnB,UAAM,kBAAkB;AACxB,QAAI;AAEJ,YAAQ,QAAQ,gBAAgB,KAAK,IAAI,OAAO,MAAM;AACpD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;EACF;AAGA,MAAI,aAAa;AAEf,UAAM,oBAAoB;AAC1B,QAAI;AAEJ,YAAQ,QAAQ,kBAAkB,KAAK,IAAI,OAAO,MAAM;AACtD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;AAGA,UAAM,sBAAsB;AAC5B,YAAQ,QAAQ,oBAAoB,KAAK,IAAI,OAAO,MAAM;AACxD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAE3B,UAAI,OAAO,CAAC,IAAI,WAAW,GAAG,KAAK,CAAC,IAAI,WAAW,GAAG,KAAK,CAAC,KAAK,IAAI,GAAG,GAAG;AACzE,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;AAIA,UAAM,gBAAgB;AACtB,YAAQ,QAAQ,cAAc,KAAK,IAAI,OAAO,MAAM;AAClD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAE3B,UAAI,QAAQ,IAAI,WAAW,SAAS,KAAK,IAAI,WAAW,UAAU,MAAM,CAAC,KAAK,IAAI,GAAG,GAAG;AACtF,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;EACF;AAGA,MAAI,aAAa;AAEf,UAAM,kBAAkB;AACxB,QAAI;AAEJ,YAAQ,QAAQ,gBAAgB,KAAK,IAAI,OAAO,MAAM;AACpD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS;QACX,CAAC;MACH;IACF;EACF;AAGA,MAAI,eAAe;AACjB,UAAM,iBAAiB;AACvB,QAAI;AAEJ,YAAQ,QAAQ,eAAe,KAAK,IAAI,OAAO,MAAM;AACnD,YAAM,MAAM,MAAM,CAAC;AACnB,UAAI,CAAC,KAAK,IAAI,GAAG,GAAG;AAClB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;QACV,CAAC;MACH;IACF;EACF;AAEA,SAAO;AACT;AAUO,SAAS,uBAAuB,MAAgC;AACrE,QAAM,MAAM,iBAAiB,MAAM,EAAE,eAAe,MAAM,CAAC;AAC3D,SAAO,IAAI,OAAO,CAAC,QAAQ,gBAAgB,KAAK,IAAI,GAAG,CAAC;AAC1D;AAUO,SAAS,gBAAgB,MAAgC;AAC9D,SAAO,iBAAiB,MAAM;IAC5B,iBAAiB;IACjB,aAAa;IACb,eAAe;EACjB,CAAC;AACH;AAUO,SAAS,YAAY,MAAsB;AAChD,SAAO,iBAAiB,IAAI,EAAE;AAChC;AC/KO,IAAM,sBAA4C;EACvD,iBAAiB;EACjB,gBAAgB;EAChB,gBAAgB,CAAC;EACjB,gBAAgB,CAAC;EACjB,eAAe;AACjB;AAgCO,SAAS,UACd,KACA,SAAqB,CAAC,GACT;AACb,QAAM,SAAS,EAAE,GAAG,qBAAqB,GAAG,OAAO;AAGnD,MAAI;AACJ,MAAI;AACF,aAAS,IAAI,IAAI,GAAG;EACtB,QAAQ;AACN,WAAO,EAAE,MAAM,OAAO,QAAQ,qBAAqB;EACrD;AAGA,MAAI,OAAO,aAAa,SAAS;AAC/B,QAAI,CAAC,OAAO,eAAe;AACzB,aAAO,EAAE,MAAM,OAAO,QAAQ,wBAAwB;IACxD;AACA,WAAO,EAAE,MAAM,KAAK;EACtB;AAEA,MAAI,CAAC,CAAC,SAAS,QAAQ,EAAE,SAAS,OAAO,QAAQ,GAAG;AAClD,WAAO;MACL,MAAM;MACN,QAAQ,aAAa,OAAO,QAAQ;IACtC;EACF;AAEA,QAAM,WAAW,OAAO,SAAS,YAAY;AAG7C,MAAI,OAAO,eAAe,SAAS,GAAG;AACpC,UAAM,YAAY,OAAO,eAAe;MAAK,CAAC,YAC5C,mBAAmB,UAAU,OAAO;IACtC;AACA,QAAI,CAAC,WAAW;AACd,aAAO;QACL,MAAM;QACN,QAAQ,WAAW,QAAQ;MAC7B;IACF;EACF;AAGA,MAAI,OAAO,eAAe,SAAS,GAAG;AACpC,UAAM,YAAY,OAAO,eAAe;MAAK,CAAC,YAC5C,mBAAmB,UAAU,OAAO;IACtC;AACA,QAAI,WAAW;AACb,aAAO;QACL,MAAM;QACN,QAAQ,WAAW,QAAQ;MAC7B;IACF;EACF;AAGA,MAAI,YAAY,QAAQ,KAAK,CAAC,OAAO,gBAAgB;AACnD,WAAO,EAAE,MAAM,OAAO,QAAQ,wBAAwB;EACxD;AAGA,MAAI,YAAY,QAAQ,KAAK,CAAC,OAAO,iBAAiB;AACpD,WAAO,EAAE,MAAM,OAAO,QAAQ,mCAAmC;EACnE;AAEA,SAAO,EAAE,MAAM,KAAK;AACtB;AAUO,SAAS,eAAe,KAAsB;AACnD,SAAO,UAAU,GAAG,EAAE;AACxB;AAYA,SAAS,mBAAmB,QAAgB,SAA0B;AAEpE,MAAI,WAAW,QAAS,QAAO;AAG/B,MAAI,QAAQ,WAAW,IAAI,GAAG;AAC5B,UAAM,SAAS,QAAQ,MAAM,CAAC;AAC9B,WAAO,OAAO,SAAS,MAAM,MAAM;EACrC;AAEA,SAAO;AACT;AAKA,SAAS,YAAY,UAA2B;AAC9C,QAAM,QAAQ,SAAS,YAAY;AAGnC,MAAI,UAAU,eAAe,UAAU,aAAa,MAAM,WAAW,MAAM,GAAG;AAC5E,WAAO;EACT;AAGA,MAAI,UAAU,SAAS,UAAU,SAAS;AACxC,WAAO;EACT;AAEA,SAAO;AACT;AAYA,SAAS,YAAY,UAA2B;AAC9C,QAAM,QAAQ,SAAS,YAAY;AAGnC,QAAM,QAAQ,SAAS,MAAM,GAAG,EAAE,IAAI,MAAM;AAE5C,MACE,MAAM,WAAW,KACjB,MAAM,MAAM,CAAC,MAAM,KAAK,KAAK,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC,GAClD;AACA,UAAM,CAAC,QAAQ,MAAM,IAAI;AAGzB,QAAI,WAAW,GAAI,QAAO;AAG1B,QAAI,WAAW,OAAO,WAAW,UAAa,UAAU,MAAM,UAAU;AACtE,aAAO;AAGT,QAAI,WAAW,OAAO,WAAW,IAAK,QAAO;AAG7C,QAAI,WAAW,OAAO,WAAW,IAAK,QAAO;EAC/C;AAIA,QAAM,kBAAkB,MAAM,QAAQ,YAAY,EAAE;AAEpD,QAAM,eAAe;IACnB;;IACA;;EACF;AAEA,aAAW,WAAW,cAAc;AAClC,QAAI,QAAQ,KAAK,eAAe,GAAG;AACjC,aAAO;IACT;EACF;AAEA,SAAO;AACT;AASO,SAAS,eACd,MACA,QACU;AACV,SAAO,KAAK,OAAO,CAAC,QAAQ,UAAU,KAAK,MAAM,EAAE,IAAI;AACzD;","names":[]}
package/dist/index.cjs CHANGED
@@ -128,7 +128,23 @@ function streamingUnzipSync(data, limits = DEFAULT_ZIP_LIMITS) {
128
128
  if (unsafePathHandling === "warn" && limits.onUnsafePath) {
129
129
  limits.onUnsafePath(file.name, reason);
130
130
  }
131
- file.ondata = () => {
131
+ file.ondata = (err, chunk, _final) => {
132
+ if (error) return;
133
+ if (err) {
134
+ error = err;
135
+ return;
136
+ }
137
+ if (chunk && chunk.length > 0) {
138
+ totalBytes += chunk.length;
139
+ if (totalBytes > limits.maxTotalSize) {
140
+ error = new ZipPreflightError(
141
+ `Total actual size ${totalBytes} exceeds limit ${limits.maxTotalSize}`,
142
+ totalBytes,
143
+ limits.maxTotalSize
144
+ );
145
+ file.terminate();
146
+ }
147
+ }
132
148
  };
133
149
  file.start();
134
150
  return;
@@ -588,22 +604,24 @@ var ExtractedAssetSchema = import_zod.z.object({
588
604
  mimeType: import_zod.z.string()
589
605
  });
590
606
  var CCv2LorebookEntrySchema = import_zod2.z.object({
591
- keys: import_zod2.z.array(import_zod2.z.string()),
607
+ keys: import_zod2.z.array(import_zod2.z.string()).optional(),
608
+ // Some tools use 'key' instead
592
609
  content: import_zod2.z.string(),
593
- enabled: import_zod2.z.boolean(),
594
- insertion_order: import_zod2.z.number().int(),
595
- // Optional fields
610
+ enabled: import_zod2.z.boolean().default(true),
611
+ // Default to enabled if missing
612
+ insertion_order: import_zod2.z.number().int().default(0),
613
+ // Optional fields - be lenient with nulls since wild data has them
596
614
  extensions: import_zod2.z.record(import_zod2.z.unknown()).optional(),
597
- case_sensitive: import_zod2.z.boolean().optional(),
615
+ case_sensitive: import_zod2.z.boolean().nullable().optional(),
598
616
  name: import_zod2.z.string().optional(),
599
617
  priority: import_zod2.z.number().int().optional(),
600
618
  id: import_zod2.z.number().int().optional(),
601
619
  comment: import_zod2.z.string().optional(),
602
- selective: import_zod2.z.boolean().optional(),
620
+ selective: import_zod2.z.boolean().nullable().optional(),
603
621
  secondary_keys: import_zod2.z.array(import_zod2.z.string()).optional(),
604
- constant: import_zod2.z.boolean().optional(),
605
- position: import_zod2.z.enum(["before_char", "after_char"]).optional()
606
- });
622
+ constant: import_zod2.z.boolean().nullable().optional(),
623
+ position: import_zod2.z.union([import_zod2.z.enum(["before_char", "after_char"]), import_zod2.z.number().int()]).nullable().optional()
624
+ }).passthrough();
607
625
  var CCv2CharacterBookSchema = import_zod2.z.object({
608
626
  name: import_zod2.z.string().optional(),
609
627
  description: import_zod2.z.string().optional(),
@@ -651,31 +669,34 @@ function getV2Data(card) {
651
669
  return card;
652
670
  }
653
671
  var CCv3LorebookEntrySchema = import_zod3.z.object({
654
- keys: import_zod3.z.array(import_zod3.z.string()),
672
+ keys: import_zod3.z.array(import_zod3.z.string()).optional(),
673
+ // Some tools use 'key' instead
655
674
  content: import_zod3.z.string(),
656
- enabled: import_zod3.z.boolean(),
657
- insertion_order: import_zod3.z.number().int(),
658
- // Optional fields
659
- case_sensitive: import_zod3.z.boolean().optional(),
675
+ enabled: import_zod3.z.boolean().default(true),
676
+ // Default to enabled if missing
677
+ insertion_order: import_zod3.z.number().int().default(0),
678
+ // Optional fields - be lenient with nulls since wild data has them
679
+ case_sensitive: import_zod3.z.boolean().nullable().optional(),
660
680
  name: import_zod3.z.string().optional(),
661
681
  priority: import_zod3.z.number().int().optional(),
662
682
  id: import_zod3.z.number().int().optional(),
663
683
  comment: import_zod3.z.string().optional(),
664
- selective: import_zod3.z.boolean().optional(),
684
+ selective: import_zod3.z.boolean().nullable().optional(),
665
685
  secondary_keys: import_zod3.z.array(import_zod3.z.string()).optional(),
666
- constant: import_zod3.z.boolean().optional(),
667
- position: import_zod3.z.enum(["before_char", "after_char"]).optional(),
686
+ constant: import_zod3.z.boolean().nullable().optional(),
687
+ position: import_zod3.z.union([import_zod3.z.enum(["before_char", "after_char"]), import_zod3.z.number().int()]).nullable().optional(),
668
688
  extensions: import_zod3.z.record(import_zod3.z.unknown()).optional(),
669
- // v3 specific
689
+ // v3 specific - also lenient with types since SillyTavern uses numbers for enums
670
690
  automation_id: import_zod3.z.string().optional(),
671
- role: import_zod3.z.enum(["system", "user", "assistant"]).optional(),
691
+ role: import_zod3.z.union([import_zod3.z.enum(["system", "user", "assistant"]), import_zod3.z.number().int()]).nullable().optional(),
672
692
  group: import_zod3.z.string().optional(),
673
693
  scan_frequency: import_zod3.z.number().int().nonnegative().optional(),
674
- probability: import_zod3.z.number().min(0).max(1).optional(),
694
+ probability: import_zod3.z.number().min(0).max(100).optional(),
695
+ // Some tools use 0-100 instead of 0-1
675
696
  use_regex: import_zod3.z.boolean().optional(),
676
697
  depth: import_zod3.z.number().int().nonnegative().optional(),
677
- selective_logic: import_zod3.z.enum(["AND", "NOT"]).optional()
678
- });
698
+ selective_logic: import_zod3.z.union([import_zod3.z.enum(["AND", "NOT"]), import_zod3.z.number().int()]).optional()
699
+ }).passthrough();
679
700
  var CCv3CharacterBookSchema = import_zod3.z.object({
680
701
  name: import_zod3.z.string().optional(),
681
702
  description: import_zod3.z.string().optional(),
@@ -8111,8 +8132,10 @@ function voxtaToCCv3(character, books) {
8111
8132
  };
8112
8133
  }
8113
8134
  }
8114
- const creationDate = character.DateCreated ? Math.floor(new Date(character.DateCreated).getTime() / 1e3) : void 0;
8115
- const modificationDate = character.DateModified ? Math.floor(new Date(character.DateModified).getTime() / 1e3) : void 0;
8135
+ const rawCreationDate = character.DateCreated ? Math.floor(new Date(character.DateCreated).getTime() / 1e3) : void 0;
8136
+ const rawModificationDate = character.DateModified ? Math.floor(new Date(character.DateModified).getTime() / 1e3) : void 0;
8137
+ const creationDate = rawCreationDate !== void 0 && rawCreationDate >= 0 ? rawCreationDate : void 0;
8138
+ const modificationDate = rawModificationDate !== void 0 && rawModificationDate >= 0 ? rawModificationDate : void 0;
8116
8139
  const card = {
8117
8140
  spec: "chara_card_v3",
8118
8141
  spec_version: "3.0",
@@ -8353,6 +8376,11 @@ var DELTA_MAX_TOTAL_SIZE = 500 * 1024 * 1024;
8353
8376
  var DELTA_MAX_FILE_SIZE = 50 * 1024 * 1024;
8354
8377
 
8355
8378
  // ../normalizer/dist/index.js
8379
+ function normalizePosition(position) {
8380
+ if (position === void 0 || position === null) return "before_char";
8381
+ if (typeof position === "string") return position;
8382
+ return position;
8383
+ }
8356
8384
  function convertLorebookEntry(entry, index) {
8357
8385
  return {
8358
8386
  keys: entry.keys || [],
@@ -8368,7 +8396,7 @@ function convertLorebookEntry(entry, index) {
8368
8396
  selective: entry.selective ?? false,
8369
8397
  secondary_keys: entry.secondary_keys || [],
8370
8398
  constant: entry.constant ?? false,
8371
- position: entry.position || "before_char"
8399
+ position: normalizePosition(entry.position)
8372
8400
  };
8373
8401
  }
8374
8402
  function convertCharacterBook(book) {
@@ -8531,6 +8559,43 @@ var DEFAULT_OPTIONS3 = {
8531
8559
  maxTotalSize: 500 * 1024 * 1024,
8532
8560
  extractAssets: true
8533
8561
  };
8562
+ var ASSET_PREFIX_VARIANTS = [
8563
+ { prefix: "__asset:", format: "CCv3 (SillyTavern)" },
8564
+ { prefix: "asset:", format: "CCv2/CCv3 common" },
8565
+ { prefix: "pngchunk:", format: "Explicit PNG chunk" },
8566
+ { prefix: "chara-ext-asset_:", format: "RisuAI (with colon)" },
8567
+ { prefix: "chara-ext-asset_", format: "RisuAI" },
8568
+ { prefix: "__asset_", format: "Legacy underscore variant" }
8569
+ ];
8570
+ function isChunkReference(uri) {
8571
+ return ASSET_PREFIX_VARIANTS.some(({ prefix }) => uri.startsWith(prefix)) || !uri.includes(":");
8572
+ }
8573
+ function stripAssetPrefix(uri) {
8574
+ for (const { prefix } of ASSET_PREFIX_VARIANTS) {
8575
+ if (uri.startsWith(prefix)) {
8576
+ return uri.substring(prefix.length);
8577
+ }
8578
+ }
8579
+ return uri;
8580
+ }
8581
+ function generateChunkKeyCandidates(assetId, originalUri) {
8582
+ return [
8583
+ assetId,
8584
+ // Plain ID: "0"
8585
+ originalUri,
8586
+ // Original URI: "__asset:0"
8587
+ `asset:${assetId}`,
8588
+ // Common format
8589
+ `__asset:${assetId}`,
8590
+ // CCv3 format
8591
+ `__asset_${assetId}`,
8592
+ // Legacy underscore variant
8593
+ `chara-ext-asset_${assetId}`,
8594
+ // RisuAI format
8595
+ `chara-ext-asset_:${assetId}`
8596
+ // RisuAI format with colon
8597
+ ];
8598
+ }
8534
8599
  function estimateBase64DecodedSize(base64Length) {
8535
8600
  return Math.ceil(base64Length * 0.75);
8536
8601
  }
@@ -8598,39 +8663,22 @@ function parsePng(data, options) {
8598
8663
  if (extracted.extraChunks && options.extractAssets && card.data.assets) {
8599
8664
  const usedChunks = /* @__PURE__ */ new Set();
8600
8665
  const chunkMap = /* @__PURE__ */ new Map();
8666
+ const risuIndexPrefixes = ASSET_PREFIX_VARIANTS.filter((v) => v.prefix.startsWith("chara-ext-asset_"));
8601
8667
  for (const chunk of extracted.extraChunks) {
8602
8668
  chunkMap.set(chunk.keyword, chunk);
8603
- if (chunk.keyword.startsWith("chara-ext-asset_")) {
8604
- const suffix = chunk.keyword.replace("chara-ext-asset_", "");
8605
- chunkMap.set(suffix, chunk);
8606
- if (suffix.startsWith(":")) {
8607
- chunkMap.set(suffix.substring(1), chunk);
8669
+ for (const { prefix } of risuIndexPrefixes) {
8670
+ if (chunk.keyword.startsWith(prefix)) {
8671
+ const suffix = chunk.keyword.substring(prefix.length);
8672
+ chunkMap.set(suffix, chunk);
8673
+ break;
8608
8674
  }
8609
8675
  }
8610
8676
  }
8611
8677
  for (const descriptor of card.data.assets) {
8612
8678
  if (!descriptor.uri) continue;
8613
- if (descriptor.uri.startsWith("__asset:") || descriptor.uri.startsWith("asset:") || descriptor.uri.startsWith("pngchunk:") || !descriptor.uri.includes(":")) {
8614
- let assetId = descriptor.uri;
8615
- if (assetId.startsWith("__asset:")) assetId = assetId.substring(8);
8616
- else if (assetId.startsWith("asset:")) assetId = assetId.substring(6);
8617
- else if (assetId.startsWith("pngchunk:")) assetId = assetId.substring(9);
8618
- const candidates = [
8619
- assetId,
8620
- // "0"
8621
- descriptor.uri,
8622
- // "__asset:0"
8623
- `asset:${assetId}`,
8624
- // "asset:0"
8625
- `__asset:${assetId}`,
8626
- // "__asset:0"
8627
- `__asset_${assetId}`,
8628
- // "__asset_0"
8629
- `chara-ext-asset_${assetId}`,
8630
- // "chara-ext-asset_0"
8631
- `chara-ext-asset_:${assetId}`
8632
- // "chara-ext-asset_:0"
8633
- ];
8679
+ if (isChunkReference(descriptor.uri)) {
8680
+ const assetId = stripAssetPrefix(descriptor.uri);
8681
+ const candidates = generateChunkKeyCandidates(assetId, descriptor.uri);
8634
8682
  let chunk;
8635
8683
  for (const candidate of candidates) {
8636
8684
  chunk = chunkMap.get(candidate);
@@ -8677,13 +8725,15 @@ function parsePng(data, options) {
8677
8725
  }
8678
8726
  }
8679
8727
  }
8728
+ const risuPrefixes = ASSET_PREFIX_VARIANTS.filter((v) => v.prefix.startsWith("chara-ext-asset_"));
8680
8729
  for (const chunk of extracted.extraChunks) {
8681
8730
  if (!usedChunks.has(chunk.keyword)) {
8682
8731
  let assetId = null;
8683
- if (chunk.keyword.startsWith("chara-ext-asset_:")) {
8684
- assetId = chunk.keyword.substring("chara-ext-asset_:".length);
8685
- } else if (chunk.keyword.startsWith("chara-ext-asset_")) {
8686
- assetId = chunk.keyword.substring("chara-ext-asset_".length);
8732
+ for (const { prefix } of risuPrefixes) {
8733
+ if (chunk.keyword.startsWith(prefix)) {
8734
+ assetId = chunk.keyword.substring(prefix.length);
8735
+ break;
8736
+ }
8687
8737
  }
8688
8738
  if (assetId) {
8689
8739
  try {