@character-foundry/character-foundry 0.1.8 → 0.1.9-dev.1765913722
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/app-framework.cjs +290 -94
- package/dist/app-framework.cjs.map +1 -1
- package/dist/app-framework.d.cts +1 -1
- package/dist/app-framework.d.ts +1 -1
- package/dist/app-framework.js +291 -95
- package/dist/app-framework.js.map +1 -1
- package/dist/charx.cjs +17 -1
- package/dist/charx.cjs.map +1 -1
- package/dist/charx.js +17 -1
- package/dist/charx.js.map +1 -1
- package/dist/exporter.cjs.map +1 -1
- package/dist/exporter.js.map +1 -1
- package/dist/federation.cjs +16 -4
- package/dist/federation.cjs.map +1 -1
- package/dist/federation.js +16 -4
- package/dist/federation.js.map +1 -1
- package/dist/image-utils.cjs.map +1 -1
- package/dist/image-utils.d.cts +12 -0
- package/dist/image-utils.d.ts +12 -0
- package/dist/image-utils.js.map +1 -1
- package/dist/index.cjs +69 -31
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +69 -31
- package/dist/index.js.map +1 -1
- package/dist/loader.cjs +69 -31
- package/dist/loader.cjs.map +1 -1
- package/dist/loader.js +69 -31
- package/dist/loader.js.map +1 -1
- package/dist/voxta.cjs +17 -1
- package/dist/voxta.cjs.map +1 -1
- package/dist/voxta.js +17 -1
- package/dist/voxta.js.map +1 -1
- package/package.json +6 -6
package/dist/image-utils.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/image-utils.ts","../../image-utils/src/extraction.ts","../../image-utils/src/ssrf.ts"],"sourcesContent":["export * from '@character-foundry/image-utils';\n","/**\n * Image URL Extraction\n *\n * Extracts image URLs from markdown, HTML, and base64 data URLs in text.\n * Used across Federation, Archive, and Architect for image processing.\n */\n\nexport interface ImageExtractionOptions {\n /** Include markdown image syntax `` (default: true) */\n includeMarkdown?: boolean;\n /** Include HTML img tags `<img src=\"url\">` (default: true) */\n includeHTML?: boolean;\n /** Include base64 data URLs `data:image/...;base64,...` (default: true) */\n includeBase64?: boolean;\n}\n\nexport interface ExtractedImage {\n /** The extracted URL or data URL */\n url: string;\n /** Source format where the image was found */\n source: 'markdown' | 'html' | 'base64';\n /** Surrounding context for debugging (optional) */\n context?: string;\n}\n\n/**\n * Extract all image URLs from text.\n *\n * Supports markdown, HTML, and base64 data URLs.\n * Returns deduplicated results.\n *\n * @param text - Text to extract image URLs from\n * @param options - Extraction options\n * @returns Array of extracted images with source information\n *\n * @example\n * ```typescript\n * const text = 'Check out  and <img src=\"banner.jpg\">';\n * const images = extractImageUrls(text);\n * // [\n * // { url: 'avatar.png', source: 'markdown' },\n * // { url: 'banner.jpg', source: 'html' }\n * // ]\n * ```\n */\nexport function extractImageUrls(\n text: string,\n options: ImageExtractionOptions = {},\n): ExtractedImage[] {\n const {\n includeMarkdown = true,\n includeHTML = true,\n includeBase64 = true,\n } = options;\n\n const results: ExtractedImage[] = [];\n const seen = new Set<string>();\n\n // Extract markdown images:  or  or \n // Supports: standard, angle brackets, and SillyTavern dimension syntax\n if (includeMarkdown) {\n const markdownPattern = /!\\[([^\\]]*)\\]\\(<?([^>\\s)]+)>?(?:\\s*=[^)]+)?\\)/g;\n let match: RegExpExecArray | null;\n\n while ((match = markdownPattern.exec(text)) !== null) {\n const url = match[2]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'markdown',\n context: match[0],\n });\n }\n }\n }\n\n // Extract HTML img tags: <img src=\"url\"> (quoted and unquoted)\n if (includeHTML) {\n // Quoted src\n const htmlQuotedPattern = /<img[^>]+src=[\"']([^\"']+)[\"'][^>]*>/gi;\n let match: RegExpExecArray | null;\n\n while ((match = htmlQuotedPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n\n // Unquoted src (e.g., <img src=url>)\n const htmlUnquotedPattern = /<img[^>]+src=([^\\s\"'>]+)[^>]*>/gi;\n while ((match = htmlUnquotedPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n // Skip if it starts with a quote (already handled above)\n if (url && !url.startsWith('\"') && !url.startsWith(\"'\") && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n\n // CSS url() function: url(url), url(\"url\"), url('url')\n // Common in background-image, background, content properties\n const cssUrlPattern = /url\\([\"']?([^)\"']+)[\"']?\\)/gi;\n while ((match = cssUrlPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n // Only include http(s) URLs to avoid CSS variables and relative paths\n if (url && (url.startsWith('http://') || url.startsWith('https://')) && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n }\n\n // Extract plain image URLs (not wrapped in any syntax)\n if (includeHTML) {\n // Plain URLs with image extensions (common on image hosts)\n const plainUrlPattern = /(?<![(\"'])(https?:\\/\\/[^\\s<>\"']+\\.(?:jpg|jpeg|png|gif|webp|svg|avif|bmp))(?![)\"'])/gi;\n let match: RegExpExecArray | null;\n\n while ((match = plainUrlPattern.exec(text)) !== null) {\n const url = match[0]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: url,\n });\n }\n }\n }\n\n // Extract base64 data URLs: data:image/...;base64,...\n if (includeBase64) {\n const dataUrlPattern = /data:image\\/[^;]+;base64,[A-Za-z0-9+/=]+/g;\n let match: RegExpExecArray | null;\n\n while ((match = dataUrlPattern.exec(text)) !== null) {\n const url = match[0];\n if (!seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'base64',\n });\n }\n }\n }\n\n return results;\n}\n\n/**\n * Extract only HTTP/HTTPS URLs (excludes data URLs and relative paths).\n *\n * Convenience wrapper around extractImageUrls that filters for remote URLs.\n *\n * @param text - Text to extract URLs from\n * @returns Array of extracted HTTP/HTTPS image URLs\n */\nexport function extractRemoteImageUrls(text: string): ExtractedImage[] {\n const all = extractImageUrls(text, { includeBase64: false });\n return all.filter((img) => /^https?:\\/\\//i.test(img.url));\n}\n\n/**\n * Extract only base64 data URLs.\n *\n * Convenience wrapper around extractImageUrls.\n *\n * @param text - Text to extract data URLs from\n * @returns Array of base64 image data URLs\n */\nexport function extractDataUrls(text: string): ExtractedImage[] {\n return extractImageUrls(text, {\n includeMarkdown: false,\n includeHTML: false,\n includeBase64: true,\n });\n}\n\n/**\n * Count images in text without extracting full details.\n *\n * More efficient than extractImageUrls when you only need the count.\n *\n * @param text - Text to count images in\n * @returns Total number of unique image references\n */\nexport function countImages(text: string): number {\n return extractImageUrls(text).length;\n}\n","/**\n * SSRF (Server-Side Request Forgery) Protection\n *\n * Validates URLs to prevent SSRF attacks.\n * Browser-safe implementation (no Node.js dependencies).\n */\n\nexport interface SSRFPolicy {\n /** Allow private IP addresses (10.x, 172.16-31.x, 192.168.x) (default: false) */\n allowPrivateIPs?: boolean;\n /** Allow localhost/loopback (127.x, ::1) (default: false) */\n allowLocalhost?: boolean;\n /** Blocked domain patterns (e.g., ['internal.company.com', '*.local']) */\n blockedDomains?: string[];\n /** Allowed domain patterns - if provided, ONLY these are allowed */\n allowedDomains?: string[];\n /** Allow data URLs (data:image/...) (default: false) */\n allowDataUrls?: boolean;\n}\n\nexport interface SafetyCheck {\n /** Whether the URL is safe according to policy */\n safe: boolean;\n /** Reason why URL is unsafe (if safe=false) */\n reason?: string;\n}\n\n/**\n * Default SSRF policy - blocks private IPs, localhost, and data URLs.\n */\nexport const DEFAULT_SSRF_POLICY: Required<SSRFPolicy> = {\n allowPrivateIPs: false,\n allowLocalhost: false,\n blockedDomains: [],\n allowedDomains: [],\n allowDataUrls: false,\n};\n\n/**\n * Check if URL is safe to fetch according to SSRF policy.\n *\n * This is the canonical SSRF protection - all apps should use this\n * before fetching external URLs.\n *\n * @param url - URL to validate\n * @param policy - SSRF policy (uses defaults if not provided)\n * @returns Safety check result with reason if unsafe\n *\n * @example\n * ```typescript\n * const check = isURLSafe('http://10.0.0.1/secret');\n * if (!check.safe) {\n * console.error('Unsafe URL:', check.reason);\n * }\n * ```\n */\nexport function isURLSafe(\n url: string,\n policy: SSRFPolicy = {},\n): SafetyCheck {\n const config = { ...DEFAULT_SSRF_POLICY, ...policy };\n\n // Parse URL\n let parsed: URL;\n try {\n parsed = new URL(url);\n } catch {\n return { safe: false, reason: 'Invalid URL format' };\n }\n\n // Check protocol\n if (parsed.protocol === 'data:') {\n if (!config.allowDataUrls) {\n return { safe: false, reason: 'Data URLs not allowed' };\n }\n return { safe: true };\n }\n\n if (!['http:', 'https:'].includes(parsed.protocol)) {\n return {\n safe: false,\n reason: `Protocol '${parsed.protocol}' not allowed (only http/https)`,\n };\n }\n\n const hostname = parsed.hostname.toLowerCase();\n\n // Check allowed domains (whitelist)\n if (config.allowedDomains.length > 0) {\n const isAllowed = config.allowedDomains.some((pattern) =>\n matchDomainPattern(hostname, pattern),\n );\n if (!isAllowed) {\n return {\n safe: false,\n reason: `Domain '${hostname}' not in allowed list`,\n };\n }\n }\n\n // Check blocked domains (blacklist)\n if (config.blockedDomains.length > 0) {\n const isBlocked = config.blockedDomains.some((pattern) =>\n matchDomainPattern(hostname, pattern),\n );\n if (isBlocked) {\n return {\n safe: false,\n reason: `Domain '${hostname}' is blocked`,\n };\n }\n }\n\n // Check localhost\n if (isLocalhost(hostname) && !config.allowLocalhost) {\n return { safe: false, reason: 'Localhost not allowed' };\n }\n\n // Check private IPs\n if (isPrivateIP(hostname) && !config.allowPrivateIPs) {\n return { safe: false, reason: 'Private IP addresses not allowed' };\n }\n\n return { safe: true };\n}\n\n/**\n * Quick check if URL is safe with default policy.\n *\n * Convenience wrapper for common case.\n *\n * @param url - URL to validate\n * @returns true if safe, false otherwise\n */\nexport function isSafeForFetch(url: string): boolean {\n return isURLSafe(url).safe;\n}\n\n/**\n * Match domain against pattern (supports wildcards).\n *\n * @example\n * ```typescript\n * matchDomainPattern('api.github.com', '*.github.com') // true\n * matchDomainPattern('github.com', '*.github.com') // false\n * matchDomainPattern('github.com', 'github.com') // true\n * ```\n */\nfunction matchDomainPattern(domain: string, pattern: string): boolean {\n // Exact match\n if (domain === pattern) return true;\n\n // Wildcard match - only matches subdomains, not the base domain\n if (pattern.startsWith('*.')) {\n const suffix = pattern.slice(2);\n return domain.endsWith('.' + suffix);\n }\n\n return false;\n}\n\n/**\n * Check if hostname is localhost/loopback.\n */\nfunction isLocalhost(hostname: string): boolean {\n const lower = hostname.toLowerCase();\n\n // IPv4 loopback\n if (lower === 'localhost' || lower === '0.0.0.0' || lower.startsWith('127.')) {\n return true;\n }\n\n // IPv6 loopback\n if (lower === '::1' || lower === '[::1]') {\n return true;\n }\n\n return false;\n}\n\n/**\n * Check if hostname is a private IP address.\n *\n * Checks for:\n * - 10.0.0.0/8\n * - 172.16.0.0/12\n * - 192.168.0.0/16\n * - Link-local: 169.254.0.0/16 (includes AWS metadata endpoint)\n * - IPv6 private ranges\n */\nfunction isPrivateIP(hostname: string): boolean {\n const lower = hostname.toLowerCase();\n\n // IPv4 private ranges\n const parts = hostname.split('.').map(Number);\n\n if (\n parts.length === 4 &&\n parts.every((p) => p >= 0 && p <= 255 && !isNaN(p))\n ) {\n const [octet1, octet2] = parts;\n\n // 10.0.0.0/8\n if (octet1 === 10) return true;\n\n // 172.16.0.0/12\n if (octet1 === 172 && octet2 !== undefined && octet2 >= 16 && octet2 <= 31)\n return true;\n\n // 192.168.0.0/16\n if (octet1 === 192 && octet2 === 168) return true;\n\n // 169.254.0.0/16 (link-local) - includes AWS metadata endpoint\n if (octet1 === 169 && octet2 === 254) return true;\n }\n\n // IPv6 private ranges using regex patterns\n // Strip brackets if present (URL parser adds them: [fc00::1])\n const cleanedHostname = lower.replace(/^\\[|\\]$/g, '');\n\n const ipv6Patterns = [\n /^f[cd][0-9a-f]{2}:/i, // fc00::/7 (includes both fc and fd ranges)\n /^fe80:/i, // fe80::/10 (link-local)\n ];\n\n for (const pattern of ipv6Patterns) {\n if (pattern.test(cleanedHostname)) {\n return true;\n }\n }\n\n return false;\n}\n\n/**\n * Filter array of URLs, keeping only safe ones.\n *\n * @param urls - Array of URLs to filter\n * @param policy - SSRF policy\n * @returns Array of safe URLs\n */\nexport function filterSafeUrls(\n urls: string[],\n policy?: SSRFPolicy,\n): string[] {\n return urls.filter((url) => isURLSafe(url, policy).safe);\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;AC6CO,SAAS,iBACd,MACA,UAAkC,CAAC,GACjB;AAClB,QAAM;IACJ,kBAAkB;IAClB,cAAc;IACd,gBAAgB;EAClB,IAAI;AAEJ,QAAM,UAA4B,CAAC;AACnC,QAAM,OAAO,oBAAI,IAAY;AAI7B,MAAI,iBAAiB;AACnB,UAAM,kBAAkB;AACxB,QAAI;AAEJ,YAAQ,QAAQ,gBAAgB,KAAK,IAAI,OAAO,MAAM;AACpD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;EACF;AAGA,MAAI,aAAa;AAEf,UAAM,oBAAoB;AAC1B,QAAI;AAEJ,YAAQ,QAAQ,kBAAkB,KAAK,IAAI,OAAO,MAAM;AACtD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;AAGA,UAAM,sBAAsB;AAC5B,YAAQ,QAAQ,oBAAoB,KAAK,IAAI,OAAO,MAAM;AACxD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAE3B,UAAI,OAAO,CAAC,IAAI,WAAW,GAAG,KAAK,CAAC,IAAI,WAAW,GAAG,KAAK,CAAC,KAAK,IAAI,GAAG,GAAG;AACzE,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;AAIA,UAAM,gBAAgB;AACtB,YAAQ,QAAQ,cAAc,KAAK,IAAI,OAAO,MAAM;AAClD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAE3B,UAAI,QAAQ,IAAI,WAAW,SAAS,KAAK,IAAI,WAAW,UAAU,MAAM,CAAC,KAAK,IAAI,GAAG,GAAG;AACtF,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;EACF;AAGA,MAAI,aAAa;AAEf,UAAM,kBAAkB;AACxB,QAAI;AAEJ,YAAQ,QAAQ,gBAAgB,KAAK,IAAI,OAAO,MAAM;AACpD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS;QACX,CAAC;MACH;IACF;EACF;AAGA,MAAI,eAAe;AACjB,UAAM,iBAAiB;AACvB,QAAI;AAEJ,YAAQ,QAAQ,eAAe,KAAK,IAAI,OAAO,MAAM;AACnD,YAAM,MAAM,MAAM,CAAC;AACnB,UAAI,CAAC,KAAK,IAAI,GAAG,GAAG;AAClB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;QACV,CAAC;MACH;IACF;EACF;AAEA,SAAO;AACT;AAUO,SAAS,uBAAuB,MAAgC;AACrE,QAAM,MAAM,iBAAiB,MAAM,EAAE,eAAe,MAAM,CAAC;AAC3D,SAAO,IAAI,OAAO,CAAC,QAAQ,gBAAgB,KAAK,IAAI,GAAG,CAAC;AAC1D;AAUO,SAAS,gBAAgB,MAAgC;AAC9D,SAAO,iBAAiB,MAAM;IAC5B,iBAAiB;IACjB,aAAa;IACb,eAAe;EACjB,CAAC;AACH;AAUO,SAAS,YAAY,MAAsB;AAChD,SAAO,iBAAiB,IAAI,EAAE;AAChC;AC/KO,IAAM,sBAA4C;EACvD,iBAAiB;EACjB,gBAAgB;EAChB,gBAAgB,CAAC;EACjB,gBAAgB,CAAC;EACjB,eAAe;AACjB;AAoBO,SAAS,UACd,KACA,SAAqB,CAAC,GACT;AACb,QAAM,SAAS,EAAE,GAAG,qBAAqB,GAAG,OAAO;AAGnD,MAAI;AACJ,MAAI;AACF,aAAS,IAAI,IAAI,GAAG;EACtB,QAAQ;AACN,WAAO,EAAE,MAAM,OAAO,QAAQ,qBAAqB;EACrD;AAGA,MAAI,OAAO,aAAa,SAAS;AAC/B,QAAI,CAAC,OAAO,eAAe;AACzB,aAAO,EAAE,MAAM,OAAO,QAAQ,wBAAwB;IACxD;AACA,WAAO,EAAE,MAAM,KAAK;EACtB;AAEA,MAAI,CAAC,CAAC,SAAS,QAAQ,EAAE,SAAS,OAAO,QAAQ,GAAG;AAClD,WAAO;MACL,MAAM;MACN,QAAQ,aAAa,OAAO,QAAQ;IACtC;EACF;AAEA,QAAM,WAAW,OAAO,SAAS,YAAY;AAG7C,MAAI,OAAO,eAAe,SAAS,GAAG;AACpC,UAAM,YAAY,OAAO,eAAe;MAAK,CAAC,YAC5C,mBAAmB,UAAU,OAAO;IACtC;AACA,QAAI,CAAC,WAAW;AACd,aAAO;QACL,MAAM;QACN,QAAQ,WAAW,QAAQ;MAC7B;IACF;EACF;AAGA,MAAI,OAAO,eAAe,SAAS,GAAG;AACpC,UAAM,YAAY,OAAO,eAAe;MAAK,CAAC,YAC5C,mBAAmB,UAAU,OAAO;IACtC;AACA,QAAI,WAAW;AACb,aAAO;QACL,MAAM;QACN,QAAQ,WAAW,QAAQ;MAC7B;IACF;EACF;AAGA,MAAI,YAAY,QAAQ,KAAK,CAAC,OAAO,gBAAgB;AACnD,WAAO,EAAE,MAAM,OAAO,QAAQ,wBAAwB;EACxD;AAGA,MAAI,YAAY,QAAQ,KAAK,CAAC,OAAO,iBAAiB;AACpD,WAAO,EAAE,MAAM,OAAO,QAAQ,mCAAmC;EACnE;AAEA,SAAO,EAAE,MAAM,KAAK;AACtB;AAUO,SAAS,eAAe,KAAsB;AACnD,SAAO,UAAU,GAAG,EAAE;AACxB;AAYA,SAAS,mBAAmB,QAAgB,SAA0B;AAEpE,MAAI,WAAW,QAAS,QAAO;AAG/B,MAAI,QAAQ,WAAW,IAAI,GAAG;AAC5B,UAAM,SAAS,QAAQ,MAAM,CAAC;AAC9B,WAAO,OAAO,SAAS,MAAM,MAAM;EACrC;AAEA,SAAO;AACT;AAKA,SAAS,YAAY,UAA2B;AAC9C,QAAM,QAAQ,SAAS,YAAY;AAGnC,MAAI,UAAU,eAAe,UAAU,aAAa,MAAM,WAAW,MAAM,GAAG;AAC5E,WAAO;EACT;AAGA,MAAI,UAAU,SAAS,UAAU,SAAS;AACxC,WAAO;EACT;AAEA,SAAO;AACT;AAYA,SAAS,YAAY,UAA2B;AAC9C,QAAM,QAAQ,SAAS,YAAY;AAGnC,QAAM,QAAQ,SAAS,MAAM,GAAG,EAAE,IAAI,MAAM;AAE5C,MACE,MAAM,WAAW,KACjB,MAAM,MAAM,CAAC,MAAM,KAAK,KAAK,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC,GAClD;AACA,UAAM,CAAC,QAAQ,MAAM,IAAI;AAGzB,QAAI,WAAW,GAAI,QAAO;AAG1B,QAAI,WAAW,OAAO,WAAW,UAAa,UAAU,MAAM,UAAU;AACtE,aAAO;AAGT,QAAI,WAAW,OAAO,WAAW,IAAK,QAAO;AAG7C,QAAI,WAAW,OAAO,WAAW,IAAK,QAAO;EAC/C;AAIA,QAAM,kBAAkB,MAAM,QAAQ,YAAY,EAAE;AAEpD,QAAM,eAAe;IACnB;;IACA;;EACF;AAEA,aAAW,WAAW,cAAc;AAClC,QAAI,QAAQ,KAAK,eAAe,GAAG;AACjC,aAAO;IACT;EACF;AAEA,SAAO;AACT;AASO,SAAS,eACd,MACA,QACU;AACV,SAAO,KAAK,OAAO,CAAC,QAAQ,UAAU,KAAK,MAAM,EAAE,IAAI;AACzD;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../src/image-utils.ts","../../image-utils/src/extraction.ts","../../image-utils/src/ssrf.ts"],"sourcesContent":["export * from '@character-foundry/image-utils';\n","/**\n * Image URL Extraction\n *\n * Extracts image URLs from markdown, HTML, and base64 data URLs in text.\n * Used across Federation, Archive, and Architect for image processing.\n */\n\nexport interface ImageExtractionOptions {\n /** Include markdown image syntax `` (default: true) */\n includeMarkdown?: boolean;\n /** Include HTML img tags `<img src=\"url\">` (default: true) */\n includeHTML?: boolean;\n /** Include base64 data URLs `data:image/...;base64,...` (default: true) */\n includeBase64?: boolean;\n}\n\nexport interface ExtractedImage {\n /** The extracted URL or data URL */\n url: string;\n /** Source format where the image was found */\n source: 'markdown' | 'html' | 'base64';\n /** Surrounding context for debugging (optional) */\n context?: string;\n}\n\n/**\n * Extract all image URLs from text.\n *\n * Supports markdown, HTML, and base64 data URLs.\n * Returns deduplicated results.\n *\n * @param text - Text to extract image URLs from\n * @param options - Extraction options\n * @returns Array of extracted images with source information\n *\n * @example\n * ```typescript\n * const text = 'Check out  and <img src=\"banner.jpg\">';\n * const images = extractImageUrls(text);\n * // [\n * // { url: 'avatar.png', source: 'markdown' },\n * // { url: 'banner.jpg', source: 'html' }\n * // ]\n * ```\n */\nexport function extractImageUrls(\n text: string,\n options: ImageExtractionOptions = {},\n): ExtractedImage[] {\n const {\n includeMarkdown = true,\n includeHTML = true,\n includeBase64 = true,\n } = options;\n\n const results: ExtractedImage[] = [];\n const seen = new Set<string>();\n\n // Extract markdown images:  or  or \n // Supports: standard, angle brackets, and SillyTavern dimension syntax\n if (includeMarkdown) {\n const markdownPattern = /!\\[([^\\]]*)\\]\\(<?([^>\\s)]+)>?(?:\\s*=[^)]+)?\\)/g;\n let match: RegExpExecArray | null;\n\n while ((match = markdownPattern.exec(text)) !== null) {\n const url = match[2]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'markdown',\n context: match[0],\n });\n }\n }\n }\n\n // Extract HTML img tags: <img src=\"url\"> (quoted and unquoted)\n if (includeHTML) {\n // Quoted src\n const htmlQuotedPattern = /<img[^>]+src=[\"']([^\"']+)[\"'][^>]*>/gi;\n let match: RegExpExecArray | null;\n\n while ((match = htmlQuotedPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n\n // Unquoted src (e.g., <img src=url>)\n const htmlUnquotedPattern = /<img[^>]+src=([^\\s\"'>]+)[^>]*>/gi;\n while ((match = htmlUnquotedPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n // Skip if it starts with a quote (already handled above)\n if (url && !url.startsWith('\"') && !url.startsWith(\"'\") && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n\n // CSS url() function: url(url), url(\"url\"), url('url')\n // Common in background-image, background, content properties\n const cssUrlPattern = /url\\([\"']?([^)\"']+)[\"']?\\)/gi;\n while ((match = cssUrlPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n // Only include http(s) URLs to avoid CSS variables and relative paths\n if (url && (url.startsWith('http://') || url.startsWith('https://')) && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n }\n\n // Extract plain image URLs (not wrapped in any syntax)\n if (includeHTML) {\n // Plain URLs with image extensions (common on image hosts)\n const plainUrlPattern = /(?<![(\"'])(https?:\\/\\/[^\\s<>\"']+\\.(?:jpg|jpeg|png|gif|webp|svg|avif|bmp))(?![)\"'])/gi;\n let match: RegExpExecArray | null;\n\n while ((match = plainUrlPattern.exec(text)) !== null) {\n const url = match[0]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: url,\n });\n }\n }\n }\n\n // Extract base64 data URLs: data:image/...;base64,...\n if (includeBase64) {\n const dataUrlPattern = /data:image\\/[^;]+;base64,[A-Za-z0-9+/=]+/g;\n let match: RegExpExecArray | null;\n\n while ((match = dataUrlPattern.exec(text)) !== null) {\n const url = match[0];\n if (!seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'base64',\n });\n }\n }\n }\n\n return results;\n}\n\n/**\n * Extract only HTTP/HTTPS URLs (excludes data URLs and relative paths).\n *\n * Convenience wrapper around extractImageUrls that filters for remote URLs.\n *\n * @param text - Text to extract URLs from\n * @returns Array of extracted HTTP/HTTPS image URLs\n */\nexport function extractRemoteImageUrls(text: string): ExtractedImage[] {\n const all = extractImageUrls(text, { includeBase64: false });\n return all.filter((img) => /^https?:\\/\\//i.test(img.url));\n}\n\n/**\n * Extract only base64 data URLs.\n *\n * Convenience wrapper around extractImageUrls.\n *\n * @param text - Text to extract data URLs from\n * @returns Array of base64 image data URLs\n */\nexport function extractDataUrls(text: string): ExtractedImage[] {\n return extractImageUrls(text, {\n includeMarkdown: false,\n includeHTML: false,\n includeBase64: true,\n });\n}\n\n/**\n * Count images in text without extracting full details.\n *\n * More efficient than extractImageUrls when you only need the count.\n *\n * @param text - Text to count images in\n * @returns Total number of unique image references\n */\nexport function countImages(text: string): number {\n return extractImageUrls(text).length;\n}\n","/**\n * SSRF (Server-Side Request Forgery) Protection\n *\n * Validates URLs to prevent SSRF attacks.\n * Browser-safe implementation (no Node.js dependencies).\n */\n\nexport interface SSRFPolicy {\n /** Allow private IP addresses (10.x, 172.16-31.x, 192.168.x) (default: false) */\n allowPrivateIPs?: boolean;\n /** Allow localhost/loopback (127.x, ::1) (default: false) */\n allowLocalhost?: boolean;\n /** Blocked domain patterns (e.g., ['internal.company.com', '*.local']) */\n blockedDomains?: string[];\n /** Allowed domain patterns - if provided, ONLY these are allowed */\n allowedDomains?: string[];\n /** Allow data URLs (data:image/...) (default: false) */\n allowDataUrls?: boolean;\n}\n\nexport interface SafetyCheck {\n /** Whether the URL is safe according to policy */\n safe: boolean;\n /** Reason why URL is unsafe (if safe=false) */\n reason?: string;\n}\n\n/**\n * Default SSRF policy - blocks private IPs, localhost, and data URLs.\n */\nexport const DEFAULT_SSRF_POLICY: Required<SSRFPolicy> = {\n allowPrivateIPs: false,\n allowLocalhost: false,\n blockedDomains: [],\n allowedDomains: [],\n allowDataUrls: false,\n};\n\n/**\n * Check if URL is safe to fetch according to SSRF policy.\n *\n * This is the canonical SSRF protection - all apps should use this\n * before fetching external URLs.\n *\n * **Limitations (browser-side validation only):**\n * - Cannot detect domains that resolve to private IPs (e.g., `attacker.com` -> `127.0.0.1`)\n * - Cannot prevent DNS rebinding attacks (domain resolves differently on subsequent requests)\n * - Cannot validate redirect destinations (server may redirect to internal URLs)\n * - Does not perform actual DNS resolution - only validates URL syntax and hostname patterns\n *\n * For server-side applications requiring stronger SSRF protection, perform DNS resolution\n * and validate the resolved IP address before making the request, and validate redirect\n * destinations or disable redirects entirely.\n *\n * @param url - URL to validate\n * @param policy - SSRF policy (uses defaults if not provided)\n * @returns Safety check result with reason if unsafe\n *\n * @see https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html\n *\n * @example\n * ```typescript\n * const check = isURLSafe('http://10.0.0.1/secret');\n * if (!check.safe) {\n * console.error('Unsafe URL:', check.reason);\n * }\n * ```\n */\nexport function isURLSafe(\n url: string,\n policy: SSRFPolicy = {},\n): SafetyCheck {\n const config = { ...DEFAULT_SSRF_POLICY, ...policy };\n\n // Parse URL\n let parsed: URL;\n try {\n parsed = new URL(url);\n } catch {\n return { safe: false, reason: 'Invalid URL format' };\n }\n\n // Check protocol\n if (parsed.protocol === 'data:') {\n if (!config.allowDataUrls) {\n return { safe: false, reason: 'Data URLs not allowed' };\n }\n return { safe: true };\n }\n\n if (!['http:', 'https:'].includes(parsed.protocol)) {\n return {\n safe: false,\n reason: `Protocol '${parsed.protocol}' not allowed (only http/https)`,\n };\n }\n\n const hostname = parsed.hostname.toLowerCase();\n\n // Check allowed domains (whitelist)\n if (config.allowedDomains.length > 0) {\n const isAllowed = config.allowedDomains.some((pattern) =>\n matchDomainPattern(hostname, pattern),\n );\n if (!isAllowed) {\n return {\n safe: false,\n reason: `Domain '${hostname}' not in allowed list`,\n };\n }\n }\n\n // Check blocked domains (blacklist)\n if (config.blockedDomains.length > 0) {\n const isBlocked = config.blockedDomains.some((pattern) =>\n matchDomainPattern(hostname, pattern),\n );\n if (isBlocked) {\n return {\n safe: false,\n reason: `Domain '${hostname}' is blocked`,\n };\n }\n }\n\n // Check localhost\n if (isLocalhost(hostname) && !config.allowLocalhost) {\n return { safe: false, reason: 'Localhost not allowed' };\n }\n\n // Check private IPs\n if (isPrivateIP(hostname) && !config.allowPrivateIPs) {\n return { safe: false, reason: 'Private IP addresses not allowed' };\n }\n\n return { safe: true };\n}\n\n/**\n * Quick check if URL is safe with default policy.\n *\n * Convenience wrapper for common case.\n *\n * @param url - URL to validate\n * @returns true if safe, false otherwise\n */\nexport function isSafeForFetch(url: string): boolean {\n return isURLSafe(url).safe;\n}\n\n/**\n * Match domain against pattern (supports wildcards).\n *\n * @example\n * ```typescript\n * matchDomainPattern('api.github.com', '*.github.com') // true\n * matchDomainPattern('github.com', '*.github.com') // false\n * matchDomainPattern('github.com', 'github.com') // true\n * ```\n */\nfunction matchDomainPattern(domain: string, pattern: string): boolean {\n // Exact match\n if (domain === pattern) return true;\n\n // Wildcard match - only matches subdomains, not the base domain\n if (pattern.startsWith('*.')) {\n const suffix = pattern.slice(2);\n return domain.endsWith('.' + suffix);\n }\n\n return false;\n}\n\n/**\n * Check if hostname is localhost/loopback.\n */\nfunction isLocalhost(hostname: string): boolean {\n const lower = hostname.toLowerCase();\n\n // IPv4 loopback\n if (lower === 'localhost' || lower === '0.0.0.0' || lower.startsWith('127.')) {\n return true;\n }\n\n // IPv6 loopback\n if (lower === '::1' || lower === '[::1]') {\n return true;\n }\n\n return false;\n}\n\n/**\n * Check if hostname is a private IP address.\n *\n * Checks for:\n * - 10.0.0.0/8\n * - 172.16.0.0/12\n * - 192.168.0.0/16\n * - Link-local: 169.254.0.0/16 (includes AWS metadata endpoint)\n * - IPv6 private ranges\n */\nfunction isPrivateIP(hostname: string): boolean {\n const lower = hostname.toLowerCase();\n\n // IPv4 private ranges\n const parts = hostname.split('.').map(Number);\n\n if (\n parts.length === 4 &&\n parts.every((p) => p >= 0 && p <= 255 && !isNaN(p))\n ) {\n const [octet1, octet2] = parts;\n\n // 10.0.0.0/8\n if (octet1 === 10) return true;\n\n // 172.16.0.0/12\n if (octet1 === 172 && octet2 !== undefined && octet2 >= 16 && octet2 <= 31)\n return true;\n\n // 192.168.0.0/16\n if (octet1 === 192 && octet2 === 168) return true;\n\n // 169.254.0.0/16 (link-local) - includes AWS metadata endpoint\n if (octet1 === 169 && octet2 === 254) return true;\n }\n\n // IPv6 private ranges using regex patterns\n // Strip brackets if present (URL parser adds them: [fc00::1])\n const cleanedHostname = lower.replace(/^\\[|\\]$/g, '');\n\n const ipv6Patterns = [\n /^f[cd][0-9a-f]{2}:/i, // fc00::/7 (includes both fc and fd ranges)\n /^fe80:/i, // fe80::/10 (link-local)\n ];\n\n for (const pattern of ipv6Patterns) {\n if (pattern.test(cleanedHostname)) {\n return true;\n }\n }\n\n return false;\n}\n\n/**\n * Filter array of URLs, keeping only safe ones.\n *\n * @param urls - Array of URLs to filter\n * @param policy - SSRF policy\n * @returns Array of safe URLs\n */\nexport function filterSafeUrls(\n urls: string[],\n policy?: SSRFPolicy,\n): string[] {\n return urls.filter((url) => isURLSafe(url, policy).safe);\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;AC6CO,SAAS,iBACd,MACA,UAAkC,CAAC,GACjB;AAClB,QAAM;IACJ,kBAAkB;IAClB,cAAc;IACd,gBAAgB;EAClB,IAAI;AAEJ,QAAM,UAA4B,CAAC;AACnC,QAAM,OAAO,oBAAI,IAAY;AAI7B,MAAI,iBAAiB;AACnB,UAAM,kBAAkB;AACxB,QAAI;AAEJ,YAAQ,QAAQ,gBAAgB,KAAK,IAAI,OAAO,MAAM;AACpD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;EACF;AAGA,MAAI,aAAa;AAEf,UAAM,oBAAoB;AAC1B,QAAI;AAEJ,YAAQ,QAAQ,kBAAkB,KAAK,IAAI,OAAO,MAAM;AACtD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;AAGA,UAAM,sBAAsB;AAC5B,YAAQ,QAAQ,oBAAoB,KAAK,IAAI,OAAO,MAAM;AACxD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAE3B,UAAI,OAAO,CAAC,IAAI,WAAW,GAAG,KAAK,CAAC,IAAI,WAAW,GAAG,KAAK,CAAC,KAAK,IAAI,GAAG,GAAG;AACzE,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;AAIA,UAAM,gBAAgB;AACtB,YAAQ,QAAQ,cAAc,KAAK,IAAI,OAAO,MAAM;AAClD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAE3B,UAAI,QAAQ,IAAI,WAAW,SAAS,KAAK,IAAI,WAAW,UAAU,MAAM,CAAC,KAAK,IAAI,GAAG,GAAG;AACtF,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;EACF;AAGA,MAAI,aAAa;AAEf,UAAM,kBAAkB;AACxB,QAAI;AAEJ,YAAQ,QAAQ,gBAAgB,KAAK,IAAI,OAAO,MAAM;AACpD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS;QACX,CAAC;MACH;IACF;EACF;AAGA,MAAI,eAAe;AACjB,UAAM,iBAAiB;AACvB,QAAI;AAEJ,YAAQ,QAAQ,eAAe,KAAK,IAAI,OAAO,MAAM;AACnD,YAAM,MAAM,MAAM,CAAC;AACnB,UAAI,CAAC,KAAK,IAAI,GAAG,GAAG;AAClB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;QACV,CAAC;MACH;IACF;EACF;AAEA,SAAO;AACT;AAUO,SAAS,uBAAuB,MAAgC;AACrE,QAAM,MAAM,iBAAiB,MAAM,EAAE,eAAe,MAAM,CAAC;AAC3D,SAAO,IAAI,OAAO,CAAC,QAAQ,gBAAgB,KAAK,IAAI,GAAG,CAAC;AAC1D;AAUO,SAAS,gBAAgB,MAAgC;AAC9D,SAAO,iBAAiB,MAAM;IAC5B,iBAAiB;IACjB,aAAa;IACb,eAAe;EACjB,CAAC;AACH;AAUO,SAAS,YAAY,MAAsB;AAChD,SAAO,iBAAiB,IAAI,EAAE;AAChC;AC/KO,IAAM,sBAA4C;EACvD,iBAAiB;EACjB,gBAAgB;EAChB,gBAAgB,CAAC;EACjB,gBAAgB,CAAC;EACjB,eAAe;AACjB;AAgCO,SAAS,UACd,KACA,SAAqB,CAAC,GACT;AACb,QAAM,SAAS,EAAE,GAAG,qBAAqB,GAAG,OAAO;AAGnD,MAAI;AACJ,MAAI;AACF,aAAS,IAAI,IAAI,GAAG;EACtB,QAAQ;AACN,WAAO,EAAE,MAAM,OAAO,QAAQ,qBAAqB;EACrD;AAGA,MAAI,OAAO,aAAa,SAAS;AAC/B,QAAI,CAAC,OAAO,eAAe;AACzB,aAAO,EAAE,MAAM,OAAO,QAAQ,wBAAwB;IACxD;AACA,WAAO,EAAE,MAAM,KAAK;EACtB;AAEA,MAAI,CAAC,CAAC,SAAS,QAAQ,EAAE,SAAS,OAAO,QAAQ,GAAG;AAClD,WAAO;MACL,MAAM;MACN,QAAQ,aAAa,OAAO,QAAQ;IACtC;EACF;AAEA,QAAM,WAAW,OAAO,SAAS,YAAY;AAG7C,MAAI,OAAO,eAAe,SAAS,GAAG;AACpC,UAAM,YAAY,OAAO,eAAe;MAAK,CAAC,YAC5C,mBAAmB,UAAU,OAAO;IACtC;AACA,QAAI,CAAC,WAAW;AACd,aAAO;QACL,MAAM;QACN,QAAQ,WAAW,QAAQ;MAC7B;IACF;EACF;AAGA,MAAI,OAAO,eAAe,SAAS,GAAG;AACpC,UAAM,YAAY,OAAO,eAAe;MAAK,CAAC,YAC5C,mBAAmB,UAAU,OAAO;IACtC;AACA,QAAI,WAAW;AACb,aAAO;QACL,MAAM;QACN,QAAQ,WAAW,QAAQ;MAC7B;IACF;EACF;AAGA,MAAI,YAAY,QAAQ,KAAK,CAAC,OAAO,gBAAgB;AACnD,WAAO,EAAE,MAAM,OAAO,QAAQ,wBAAwB;EACxD;AAGA,MAAI,YAAY,QAAQ,KAAK,CAAC,OAAO,iBAAiB;AACpD,WAAO,EAAE,MAAM,OAAO,QAAQ,mCAAmC;EACnE;AAEA,SAAO,EAAE,MAAM,KAAK;AACtB;AAUO,SAAS,eAAe,KAAsB;AACnD,SAAO,UAAU,GAAG,EAAE;AACxB;AAYA,SAAS,mBAAmB,QAAgB,SAA0B;AAEpE,MAAI,WAAW,QAAS,QAAO;AAG/B,MAAI,QAAQ,WAAW,IAAI,GAAG;AAC5B,UAAM,SAAS,QAAQ,MAAM,CAAC;AAC9B,WAAO,OAAO,SAAS,MAAM,MAAM;EACrC;AAEA,SAAO;AACT;AAKA,SAAS,YAAY,UAA2B;AAC9C,QAAM,QAAQ,SAAS,YAAY;AAGnC,MAAI,UAAU,eAAe,UAAU,aAAa,MAAM,WAAW,MAAM,GAAG;AAC5E,WAAO;EACT;AAGA,MAAI,UAAU,SAAS,UAAU,SAAS;AACxC,WAAO;EACT;AAEA,SAAO;AACT;AAYA,SAAS,YAAY,UAA2B;AAC9C,QAAM,QAAQ,SAAS,YAAY;AAGnC,QAAM,QAAQ,SAAS,MAAM,GAAG,EAAE,IAAI,MAAM;AAE5C,MACE,MAAM,WAAW,KACjB,MAAM,MAAM,CAAC,MAAM,KAAK,KAAK,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC,GAClD;AACA,UAAM,CAAC,QAAQ,MAAM,IAAI;AAGzB,QAAI,WAAW,GAAI,QAAO;AAG1B,QAAI,WAAW,OAAO,WAAW,UAAa,UAAU,MAAM,UAAU;AACtE,aAAO;AAGT,QAAI,WAAW,OAAO,WAAW,IAAK,QAAO;AAG7C,QAAI,WAAW,OAAO,WAAW,IAAK,QAAO;EAC/C;AAIA,QAAM,kBAAkB,MAAM,QAAQ,YAAY,EAAE;AAEpD,QAAM,eAAe;IACnB;;IACA;;EACF;AAEA,aAAW,WAAW,cAAc;AAClC,QAAI,QAAQ,KAAK,eAAe,GAAG;AACjC,aAAO;IACT;EACF;AAEA,SAAO;AACT;AASO,SAAS,eACd,MACA,QACU;AACV,SAAO,KAAK,OAAO,CAAC,QAAQ,UAAU,KAAK,MAAM,EAAE,IAAI;AACzD;","names":[]}
|
package/dist/image-utils.d.cts
CHANGED
|
@@ -102,10 +102,22 @@ export declare const DEFAULT_SSRF_POLICY: Required<SSRFPolicy>;
|
|
|
102
102
|
* This is the canonical SSRF protection - all apps should use this
|
|
103
103
|
* before fetching external URLs.
|
|
104
104
|
*
|
|
105
|
+
* **Limitations (browser-side validation only):**
|
|
106
|
+
* - Cannot detect domains that resolve to private IPs (e.g., `attacker.com` -> `127.0.0.1`)
|
|
107
|
+
* - Cannot prevent DNS rebinding attacks (domain resolves differently on subsequent requests)
|
|
108
|
+
* - Cannot validate redirect destinations (server may redirect to internal URLs)
|
|
109
|
+
* - Does not perform actual DNS resolution - only validates URL syntax and hostname patterns
|
|
110
|
+
*
|
|
111
|
+
* For server-side applications requiring stronger SSRF protection, perform DNS resolution
|
|
112
|
+
* and validate the resolved IP address before making the request, and validate redirect
|
|
113
|
+
* destinations or disable redirects entirely.
|
|
114
|
+
*
|
|
105
115
|
* @param url - URL to validate
|
|
106
116
|
* @param policy - SSRF policy (uses defaults if not provided)
|
|
107
117
|
* @returns Safety check result with reason if unsafe
|
|
108
118
|
*
|
|
119
|
+
* @see https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html
|
|
120
|
+
*
|
|
109
121
|
* @example
|
|
110
122
|
* ```typescript
|
|
111
123
|
* const check = isURLSafe('http://10.0.0.1/secret');
|
package/dist/image-utils.d.ts
CHANGED
|
@@ -102,10 +102,22 @@ export declare const DEFAULT_SSRF_POLICY: Required<SSRFPolicy>;
|
|
|
102
102
|
* This is the canonical SSRF protection - all apps should use this
|
|
103
103
|
* before fetching external URLs.
|
|
104
104
|
*
|
|
105
|
+
* **Limitations (browser-side validation only):**
|
|
106
|
+
* - Cannot detect domains that resolve to private IPs (e.g., `attacker.com` -> `127.0.0.1`)
|
|
107
|
+
* - Cannot prevent DNS rebinding attacks (domain resolves differently on subsequent requests)
|
|
108
|
+
* - Cannot validate redirect destinations (server may redirect to internal URLs)
|
|
109
|
+
* - Does not perform actual DNS resolution - only validates URL syntax and hostname patterns
|
|
110
|
+
*
|
|
111
|
+
* For server-side applications requiring stronger SSRF protection, perform DNS resolution
|
|
112
|
+
* and validate the resolved IP address before making the request, and validate redirect
|
|
113
|
+
* destinations or disable redirects entirely.
|
|
114
|
+
*
|
|
105
115
|
* @param url - URL to validate
|
|
106
116
|
* @param policy - SSRF policy (uses defaults if not provided)
|
|
107
117
|
* @returns Safety check result with reason if unsafe
|
|
108
118
|
*
|
|
119
|
+
* @see https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html
|
|
120
|
+
*
|
|
109
121
|
* @example
|
|
110
122
|
* ```typescript
|
|
111
123
|
* const check = isURLSafe('http://10.0.0.1/secret');
|
package/dist/image-utils.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../image-utils/src/extraction.ts","../../image-utils/src/ssrf.ts"],"sourcesContent":["/**\n * Image URL Extraction\n *\n * Extracts image URLs from markdown, HTML, and base64 data URLs in text.\n * Used across Federation, Archive, and Architect for image processing.\n */\n\nexport interface ImageExtractionOptions {\n /** Include markdown image syntax `` (default: true) */\n includeMarkdown?: boolean;\n /** Include HTML img tags `<img src=\"url\">` (default: true) */\n includeHTML?: boolean;\n /** Include base64 data URLs `data:image/...;base64,...` (default: true) */\n includeBase64?: boolean;\n}\n\nexport interface ExtractedImage {\n /** The extracted URL or data URL */\n url: string;\n /** Source format where the image was found */\n source: 'markdown' | 'html' | 'base64';\n /** Surrounding context for debugging (optional) */\n context?: string;\n}\n\n/**\n * Extract all image URLs from text.\n *\n * Supports markdown, HTML, and base64 data URLs.\n * Returns deduplicated results.\n *\n * @param text - Text to extract image URLs from\n * @param options - Extraction options\n * @returns Array of extracted images with source information\n *\n * @example\n * ```typescript\n * const text = 'Check out  and <img src=\"banner.jpg\">';\n * const images = extractImageUrls(text);\n * // [\n * // { url: 'avatar.png', source: 'markdown' },\n * // { url: 'banner.jpg', source: 'html' }\n * // ]\n * ```\n */\nexport function extractImageUrls(\n text: string,\n options: ImageExtractionOptions = {},\n): ExtractedImage[] {\n const {\n includeMarkdown = true,\n includeHTML = true,\n includeBase64 = true,\n } = options;\n\n const results: ExtractedImage[] = [];\n const seen = new Set<string>();\n\n // Extract markdown images:  or  or \n // Supports: standard, angle brackets, and SillyTavern dimension syntax\n if (includeMarkdown) {\n const markdownPattern = /!\\[([^\\]]*)\\]\\(<?([^>\\s)]+)>?(?:\\s*=[^)]+)?\\)/g;\n let match: RegExpExecArray | null;\n\n while ((match = markdownPattern.exec(text)) !== null) {\n const url = match[2]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'markdown',\n context: match[0],\n });\n }\n }\n }\n\n // Extract HTML img tags: <img src=\"url\"> (quoted and unquoted)\n if (includeHTML) {\n // Quoted src\n const htmlQuotedPattern = /<img[^>]+src=[\"']([^\"']+)[\"'][^>]*>/gi;\n let match: RegExpExecArray | null;\n\n while ((match = htmlQuotedPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n\n // Unquoted src (e.g., <img src=url>)\n const htmlUnquotedPattern = /<img[^>]+src=([^\\s\"'>]+)[^>]*>/gi;\n while ((match = htmlUnquotedPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n // Skip if it starts with a quote (already handled above)\n if (url && !url.startsWith('\"') && !url.startsWith(\"'\") && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n\n // CSS url() function: url(url), url(\"url\"), url('url')\n // Common in background-image, background, content properties\n const cssUrlPattern = /url\\([\"']?([^)\"']+)[\"']?\\)/gi;\n while ((match = cssUrlPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n // Only include http(s) URLs to avoid CSS variables and relative paths\n if (url && (url.startsWith('http://') || url.startsWith('https://')) && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n }\n\n // Extract plain image URLs (not wrapped in any syntax)\n if (includeHTML) {\n // Plain URLs with image extensions (common on image hosts)\n const plainUrlPattern = /(?<![(\"'])(https?:\\/\\/[^\\s<>\"']+\\.(?:jpg|jpeg|png|gif|webp|svg|avif|bmp))(?![)\"'])/gi;\n let match: RegExpExecArray | null;\n\n while ((match = plainUrlPattern.exec(text)) !== null) {\n const url = match[0]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: url,\n });\n }\n }\n }\n\n // Extract base64 data URLs: data:image/...;base64,...\n if (includeBase64) {\n const dataUrlPattern = /data:image\\/[^;]+;base64,[A-Za-z0-9+/=]+/g;\n let match: RegExpExecArray | null;\n\n while ((match = dataUrlPattern.exec(text)) !== null) {\n const url = match[0];\n if (!seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'base64',\n });\n }\n }\n }\n\n return results;\n}\n\n/**\n * Extract only HTTP/HTTPS URLs (excludes data URLs and relative paths).\n *\n * Convenience wrapper around extractImageUrls that filters for remote URLs.\n *\n * @param text - Text to extract URLs from\n * @returns Array of extracted HTTP/HTTPS image URLs\n */\nexport function extractRemoteImageUrls(text: string): ExtractedImage[] {\n const all = extractImageUrls(text, { includeBase64: false });\n return all.filter((img) => /^https?:\\/\\//i.test(img.url));\n}\n\n/**\n * Extract only base64 data URLs.\n *\n * Convenience wrapper around extractImageUrls.\n *\n * @param text - Text to extract data URLs from\n * @returns Array of base64 image data URLs\n */\nexport function extractDataUrls(text: string): ExtractedImage[] {\n return extractImageUrls(text, {\n includeMarkdown: false,\n includeHTML: false,\n includeBase64: true,\n });\n}\n\n/**\n * Count images in text without extracting full details.\n *\n * More efficient than extractImageUrls when you only need the count.\n *\n * @param text - Text to count images in\n * @returns Total number of unique image references\n */\nexport function countImages(text: string): number {\n return extractImageUrls(text).length;\n}\n","/**\n * SSRF (Server-Side Request Forgery) Protection\n *\n * Validates URLs to prevent SSRF attacks.\n * Browser-safe implementation (no Node.js dependencies).\n */\n\nexport interface SSRFPolicy {\n /** Allow private IP addresses (10.x, 172.16-31.x, 192.168.x) (default: false) */\n allowPrivateIPs?: boolean;\n /** Allow localhost/loopback (127.x, ::1) (default: false) */\n allowLocalhost?: boolean;\n /** Blocked domain patterns (e.g., ['internal.company.com', '*.local']) */\n blockedDomains?: string[];\n /** Allowed domain patterns - if provided, ONLY these are allowed */\n allowedDomains?: string[];\n /** Allow data URLs (data:image/...) (default: false) */\n allowDataUrls?: boolean;\n}\n\nexport interface SafetyCheck {\n /** Whether the URL is safe according to policy */\n safe: boolean;\n /** Reason why URL is unsafe (if safe=false) */\n reason?: string;\n}\n\n/**\n * Default SSRF policy - blocks private IPs, localhost, and data URLs.\n */\nexport const DEFAULT_SSRF_POLICY: Required<SSRFPolicy> = {\n allowPrivateIPs: false,\n allowLocalhost: false,\n blockedDomains: [],\n allowedDomains: [],\n allowDataUrls: false,\n};\n\n/**\n * Check if URL is safe to fetch according to SSRF policy.\n *\n * This is the canonical SSRF protection - all apps should use this\n * before fetching external URLs.\n *\n * @param url - URL to validate\n * @param policy - SSRF policy (uses defaults if not provided)\n * @returns Safety check result with reason if unsafe\n *\n * @example\n * ```typescript\n * const check = isURLSafe('http://10.0.0.1/secret');\n * if (!check.safe) {\n * console.error('Unsafe URL:', check.reason);\n * }\n * ```\n */\nexport function isURLSafe(\n url: string,\n policy: SSRFPolicy = {},\n): SafetyCheck {\n const config = { ...DEFAULT_SSRF_POLICY, ...policy };\n\n // Parse URL\n let parsed: URL;\n try {\n parsed = new URL(url);\n } catch {\n return { safe: false, reason: 'Invalid URL format' };\n }\n\n // Check protocol\n if (parsed.protocol === 'data:') {\n if (!config.allowDataUrls) {\n return { safe: false, reason: 'Data URLs not allowed' };\n }\n return { safe: true };\n }\n\n if (!['http:', 'https:'].includes(parsed.protocol)) {\n return {\n safe: false,\n reason: `Protocol '${parsed.protocol}' not allowed (only http/https)`,\n };\n }\n\n const hostname = parsed.hostname.toLowerCase();\n\n // Check allowed domains (whitelist)\n if (config.allowedDomains.length > 0) {\n const isAllowed = config.allowedDomains.some((pattern) =>\n matchDomainPattern(hostname, pattern),\n );\n if (!isAllowed) {\n return {\n safe: false,\n reason: `Domain '${hostname}' not in allowed list`,\n };\n }\n }\n\n // Check blocked domains (blacklist)\n if (config.blockedDomains.length > 0) {\n const isBlocked = config.blockedDomains.some((pattern) =>\n matchDomainPattern(hostname, pattern),\n );\n if (isBlocked) {\n return {\n safe: false,\n reason: `Domain '${hostname}' is blocked`,\n };\n }\n }\n\n // Check localhost\n if (isLocalhost(hostname) && !config.allowLocalhost) {\n return { safe: false, reason: 'Localhost not allowed' };\n }\n\n // Check private IPs\n if (isPrivateIP(hostname) && !config.allowPrivateIPs) {\n return { safe: false, reason: 'Private IP addresses not allowed' };\n }\n\n return { safe: true };\n}\n\n/**\n * Quick check if URL is safe with default policy.\n *\n * Convenience wrapper for common case.\n *\n * @param url - URL to validate\n * @returns true if safe, false otherwise\n */\nexport function isSafeForFetch(url: string): boolean {\n return isURLSafe(url).safe;\n}\n\n/**\n * Match domain against pattern (supports wildcards).\n *\n * @example\n * ```typescript\n * matchDomainPattern('api.github.com', '*.github.com') // true\n * matchDomainPattern('github.com', '*.github.com') // false\n * matchDomainPattern('github.com', 'github.com') // true\n * ```\n */\nfunction matchDomainPattern(domain: string, pattern: string): boolean {\n // Exact match\n if (domain === pattern) return true;\n\n // Wildcard match - only matches subdomains, not the base domain\n if (pattern.startsWith('*.')) {\n const suffix = pattern.slice(2);\n return domain.endsWith('.' + suffix);\n }\n\n return false;\n}\n\n/**\n * Check if hostname is localhost/loopback.\n */\nfunction isLocalhost(hostname: string): boolean {\n const lower = hostname.toLowerCase();\n\n // IPv4 loopback\n if (lower === 'localhost' || lower === '0.0.0.0' || lower.startsWith('127.')) {\n return true;\n }\n\n // IPv6 loopback\n if (lower === '::1' || lower === '[::1]') {\n return true;\n }\n\n return false;\n}\n\n/**\n * Check if hostname is a private IP address.\n *\n * Checks for:\n * - 10.0.0.0/8\n * - 172.16.0.0/12\n * - 192.168.0.0/16\n * - Link-local: 169.254.0.0/16 (includes AWS metadata endpoint)\n * - IPv6 private ranges\n */\nfunction isPrivateIP(hostname: string): boolean {\n const lower = hostname.toLowerCase();\n\n // IPv4 private ranges\n const parts = hostname.split('.').map(Number);\n\n if (\n parts.length === 4 &&\n parts.every((p) => p >= 0 && p <= 255 && !isNaN(p))\n ) {\n const [octet1, octet2] = parts;\n\n // 10.0.0.0/8\n if (octet1 === 10) return true;\n\n // 172.16.0.0/12\n if (octet1 === 172 && octet2 !== undefined && octet2 >= 16 && octet2 <= 31)\n return true;\n\n // 192.168.0.0/16\n if (octet1 === 192 && octet2 === 168) return true;\n\n // 169.254.0.0/16 (link-local) - includes AWS metadata endpoint\n if (octet1 === 169 && octet2 === 254) return true;\n }\n\n // IPv6 private ranges using regex patterns\n // Strip brackets if present (URL parser adds them: [fc00::1])\n const cleanedHostname = lower.replace(/^\\[|\\]$/g, '');\n\n const ipv6Patterns = [\n /^f[cd][0-9a-f]{2}:/i, // fc00::/7 (includes both fc and fd ranges)\n /^fe80:/i, // fe80::/10 (link-local)\n ];\n\n for (const pattern of ipv6Patterns) {\n if (pattern.test(cleanedHostname)) {\n return true;\n }\n }\n\n return false;\n}\n\n/**\n * Filter array of URLs, keeping only safe ones.\n *\n * @param urls - Array of URLs to filter\n * @param policy - SSRF policy\n * @returns Array of safe URLs\n */\nexport function filterSafeUrls(\n urls: string[],\n policy?: SSRFPolicy,\n): string[] {\n return urls.filter((url) => isURLSafe(url, policy).safe);\n}\n"],"mappings":";AA6CO,SAAS,iBACd,MACA,UAAkC,CAAC,GACjB;AAClB,QAAM;IACJ,kBAAkB;IAClB,cAAc;IACd,gBAAgB;EAClB,IAAI;AAEJ,QAAM,UAA4B,CAAC;AACnC,QAAM,OAAO,oBAAI,IAAY;AAI7B,MAAI,iBAAiB;AACnB,UAAM,kBAAkB;AACxB,QAAI;AAEJ,YAAQ,QAAQ,gBAAgB,KAAK,IAAI,OAAO,MAAM;AACpD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;EACF;AAGA,MAAI,aAAa;AAEf,UAAM,oBAAoB;AAC1B,QAAI;AAEJ,YAAQ,QAAQ,kBAAkB,KAAK,IAAI,OAAO,MAAM;AACtD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;AAGA,UAAM,sBAAsB;AAC5B,YAAQ,QAAQ,oBAAoB,KAAK,IAAI,OAAO,MAAM;AACxD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAE3B,UAAI,OAAO,CAAC,IAAI,WAAW,GAAG,KAAK,CAAC,IAAI,WAAW,GAAG,KAAK,CAAC,KAAK,IAAI,GAAG,GAAG;AACzE,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;AAIA,UAAM,gBAAgB;AACtB,YAAQ,QAAQ,cAAc,KAAK,IAAI,OAAO,MAAM;AAClD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAE3B,UAAI,QAAQ,IAAI,WAAW,SAAS,KAAK,IAAI,WAAW,UAAU,MAAM,CAAC,KAAK,IAAI,GAAG,GAAG;AACtF,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;EACF;AAGA,MAAI,aAAa;AAEf,UAAM,kBAAkB;AACxB,QAAI;AAEJ,YAAQ,QAAQ,gBAAgB,KAAK,IAAI,OAAO,MAAM;AACpD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS;QACX,CAAC;MACH;IACF;EACF;AAGA,MAAI,eAAe;AACjB,UAAM,iBAAiB;AACvB,QAAI;AAEJ,YAAQ,QAAQ,eAAe,KAAK,IAAI,OAAO,MAAM;AACnD,YAAM,MAAM,MAAM,CAAC;AACnB,UAAI,CAAC,KAAK,IAAI,GAAG,GAAG;AAClB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;QACV,CAAC;MACH;IACF;EACF;AAEA,SAAO;AACT;AAUO,SAAS,uBAAuB,MAAgC;AACrE,QAAM,MAAM,iBAAiB,MAAM,EAAE,eAAe,MAAM,CAAC;AAC3D,SAAO,IAAI,OAAO,CAAC,QAAQ,gBAAgB,KAAK,IAAI,GAAG,CAAC;AAC1D;AAUO,SAAS,gBAAgB,MAAgC;AAC9D,SAAO,iBAAiB,MAAM;IAC5B,iBAAiB;IACjB,aAAa;IACb,eAAe;EACjB,CAAC;AACH;AAUO,SAAS,YAAY,MAAsB;AAChD,SAAO,iBAAiB,IAAI,EAAE;AAChC;AC/KO,IAAM,sBAA4C;EACvD,iBAAiB;EACjB,gBAAgB;EAChB,gBAAgB,CAAC;EACjB,gBAAgB,CAAC;EACjB,eAAe;AACjB;AAoBO,SAAS,UACd,KACA,SAAqB,CAAC,GACT;AACb,QAAM,SAAS,EAAE,GAAG,qBAAqB,GAAG,OAAO;AAGnD,MAAI;AACJ,MAAI;AACF,aAAS,IAAI,IAAI,GAAG;EACtB,QAAQ;AACN,WAAO,EAAE,MAAM,OAAO,QAAQ,qBAAqB;EACrD;AAGA,MAAI,OAAO,aAAa,SAAS;AAC/B,QAAI,CAAC,OAAO,eAAe;AACzB,aAAO,EAAE,MAAM,OAAO,QAAQ,wBAAwB;IACxD;AACA,WAAO,EAAE,MAAM,KAAK;EACtB;AAEA,MAAI,CAAC,CAAC,SAAS,QAAQ,EAAE,SAAS,OAAO,QAAQ,GAAG;AAClD,WAAO;MACL,MAAM;MACN,QAAQ,aAAa,OAAO,QAAQ;IACtC;EACF;AAEA,QAAM,WAAW,OAAO,SAAS,YAAY;AAG7C,MAAI,OAAO,eAAe,SAAS,GAAG;AACpC,UAAM,YAAY,OAAO,eAAe;MAAK,CAAC,YAC5C,mBAAmB,UAAU,OAAO;IACtC;AACA,QAAI,CAAC,WAAW;AACd,aAAO;QACL,MAAM;QACN,QAAQ,WAAW,QAAQ;MAC7B;IACF;EACF;AAGA,MAAI,OAAO,eAAe,SAAS,GAAG;AACpC,UAAM,YAAY,OAAO,eAAe;MAAK,CAAC,YAC5C,mBAAmB,UAAU,OAAO;IACtC;AACA,QAAI,WAAW;AACb,aAAO;QACL,MAAM;QACN,QAAQ,WAAW,QAAQ;MAC7B;IACF;EACF;AAGA,MAAI,YAAY,QAAQ,KAAK,CAAC,OAAO,gBAAgB;AACnD,WAAO,EAAE,MAAM,OAAO,QAAQ,wBAAwB;EACxD;AAGA,MAAI,YAAY,QAAQ,KAAK,CAAC,OAAO,iBAAiB;AACpD,WAAO,EAAE,MAAM,OAAO,QAAQ,mCAAmC;EACnE;AAEA,SAAO,EAAE,MAAM,KAAK;AACtB;AAUO,SAAS,eAAe,KAAsB;AACnD,SAAO,UAAU,GAAG,EAAE;AACxB;AAYA,SAAS,mBAAmB,QAAgB,SAA0B;AAEpE,MAAI,WAAW,QAAS,QAAO;AAG/B,MAAI,QAAQ,WAAW,IAAI,GAAG;AAC5B,UAAM,SAAS,QAAQ,MAAM,CAAC;AAC9B,WAAO,OAAO,SAAS,MAAM,MAAM;EACrC;AAEA,SAAO;AACT;AAKA,SAAS,YAAY,UAA2B;AAC9C,QAAM,QAAQ,SAAS,YAAY;AAGnC,MAAI,UAAU,eAAe,UAAU,aAAa,MAAM,WAAW,MAAM,GAAG;AAC5E,WAAO;EACT;AAGA,MAAI,UAAU,SAAS,UAAU,SAAS;AACxC,WAAO;EACT;AAEA,SAAO;AACT;AAYA,SAAS,YAAY,UAA2B;AAC9C,QAAM,QAAQ,SAAS,YAAY;AAGnC,QAAM,QAAQ,SAAS,MAAM,GAAG,EAAE,IAAI,MAAM;AAE5C,MACE,MAAM,WAAW,KACjB,MAAM,MAAM,CAAC,MAAM,KAAK,KAAK,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC,GAClD;AACA,UAAM,CAAC,QAAQ,MAAM,IAAI;AAGzB,QAAI,WAAW,GAAI,QAAO;AAG1B,QAAI,WAAW,OAAO,WAAW,UAAa,UAAU,MAAM,UAAU;AACtE,aAAO;AAGT,QAAI,WAAW,OAAO,WAAW,IAAK,QAAO;AAG7C,QAAI,WAAW,OAAO,WAAW,IAAK,QAAO;EAC/C;AAIA,QAAM,kBAAkB,MAAM,QAAQ,YAAY,EAAE;AAEpD,QAAM,eAAe;IACnB;;IACA;;EACF;AAEA,aAAW,WAAW,cAAc;AAClC,QAAI,QAAQ,KAAK,eAAe,GAAG;AACjC,aAAO;IACT;EACF;AAEA,SAAO;AACT;AASO,SAAS,eACd,MACA,QACU;AACV,SAAO,KAAK,OAAO,CAAC,QAAQ,UAAU,KAAK,MAAM,EAAE,IAAI;AACzD;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../../image-utils/src/extraction.ts","../../image-utils/src/ssrf.ts"],"sourcesContent":["/**\n * Image URL Extraction\n *\n * Extracts image URLs from markdown, HTML, and base64 data URLs in text.\n * Used across Federation, Archive, and Architect for image processing.\n */\n\nexport interface ImageExtractionOptions {\n /** Include markdown image syntax `` (default: true) */\n includeMarkdown?: boolean;\n /** Include HTML img tags `<img src=\"url\">` (default: true) */\n includeHTML?: boolean;\n /** Include base64 data URLs `data:image/...;base64,...` (default: true) */\n includeBase64?: boolean;\n}\n\nexport interface ExtractedImage {\n /** The extracted URL or data URL */\n url: string;\n /** Source format where the image was found */\n source: 'markdown' | 'html' | 'base64';\n /** Surrounding context for debugging (optional) */\n context?: string;\n}\n\n/**\n * Extract all image URLs from text.\n *\n * Supports markdown, HTML, and base64 data URLs.\n * Returns deduplicated results.\n *\n * @param text - Text to extract image URLs from\n * @param options - Extraction options\n * @returns Array of extracted images with source information\n *\n * @example\n * ```typescript\n * const text = 'Check out  and <img src=\"banner.jpg\">';\n * const images = extractImageUrls(text);\n * // [\n * // { url: 'avatar.png', source: 'markdown' },\n * // { url: 'banner.jpg', source: 'html' }\n * // ]\n * ```\n */\nexport function extractImageUrls(\n text: string,\n options: ImageExtractionOptions = {},\n): ExtractedImage[] {\n const {\n includeMarkdown = true,\n includeHTML = true,\n includeBase64 = true,\n } = options;\n\n const results: ExtractedImage[] = [];\n const seen = new Set<string>();\n\n // Extract markdown images:  or  or \n // Supports: standard, angle brackets, and SillyTavern dimension syntax\n if (includeMarkdown) {\n const markdownPattern = /!\\[([^\\]]*)\\]\\(<?([^>\\s)]+)>?(?:\\s*=[^)]+)?\\)/g;\n let match: RegExpExecArray | null;\n\n while ((match = markdownPattern.exec(text)) !== null) {\n const url = match[2]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'markdown',\n context: match[0],\n });\n }\n }\n }\n\n // Extract HTML img tags: <img src=\"url\"> (quoted and unquoted)\n if (includeHTML) {\n // Quoted src\n const htmlQuotedPattern = /<img[^>]+src=[\"']([^\"']+)[\"'][^>]*>/gi;\n let match: RegExpExecArray | null;\n\n while ((match = htmlQuotedPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n\n // Unquoted src (e.g., <img src=url>)\n const htmlUnquotedPattern = /<img[^>]+src=([^\\s\"'>]+)[^>]*>/gi;\n while ((match = htmlUnquotedPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n // Skip if it starts with a quote (already handled above)\n if (url && !url.startsWith('\"') && !url.startsWith(\"'\") && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n\n // CSS url() function: url(url), url(\"url\"), url('url')\n // Common in background-image, background, content properties\n const cssUrlPattern = /url\\([\"']?([^)\"']+)[\"']?\\)/gi;\n while ((match = cssUrlPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n // Only include http(s) URLs to avoid CSS variables and relative paths\n if (url && (url.startsWith('http://') || url.startsWith('https://')) && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n }\n\n // Extract plain image URLs (not wrapped in any syntax)\n if (includeHTML) {\n // Plain URLs with image extensions (common on image hosts)\n const plainUrlPattern = /(?<![(\"'])(https?:\\/\\/[^\\s<>\"']+\\.(?:jpg|jpeg|png|gif|webp|svg|avif|bmp))(?![)\"'])/gi;\n let match: RegExpExecArray | null;\n\n while ((match = plainUrlPattern.exec(text)) !== null) {\n const url = match[0]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: url,\n });\n }\n }\n }\n\n // Extract base64 data URLs: data:image/...;base64,...\n if (includeBase64) {\n const dataUrlPattern = /data:image\\/[^;]+;base64,[A-Za-z0-9+/=]+/g;\n let match: RegExpExecArray | null;\n\n while ((match = dataUrlPattern.exec(text)) !== null) {\n const url = match[0];\n if (!seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'base64',\n });\n }\n }\n }\n\n return results;\n}\n\n/**\n * Extract only HTTP/HTTPS URLs (excludes data URLs and relative paths).\n *\n * Convenience wrapper around extractImageUrls that filters for remote URLs.\n *\n * @param text - Text to extract URLs from\n * @returns Array of extracted HTTP/HTTPS image URLs\n */\nexport function extractRemoteImageUrls(text: string): ExtractedImage[] {\n const all = extractImageUrls(text, { includeBase64: false });\n return all.filter((img) => /^https?:\\/\\//i.test(img.url));\n}\n\n/**\n * Extract only base64 data URLs.\n *\n * Convenience wrapper around extractImageUrls.\n *\n * @param text - Text to extract data URLs from\n * @returns Array of base64 image data URLs\n */\nexport function extractDataUrls(text: string): ExtractedImage[] {\n return extractImageUrls(text, {\n includeMarkdown: false,\n includeHTML: false,\n includeBase64: true,\n });\n}\n\n/**\n * Count images in text without extracting full details.\n *\n * More efficient than extractImageUrls when you only need the count.\n *\n * @param text - Text to count images in\n * @returns Total number of unique image references\n */\nexport function countImages(text: string): number {\n return extractImageUrls(text).length;\n}\n","/**\n * SSRF (Server-Side Request Forgery) Protection\n *\n * Validates URLs to prevent SSRF attacks.\n * Browser-safe implementation (no Node.js dependencies).\n */\n\nexport interface SSRFPolicy {\n /** Allow private IP addresses (10.x, 172.16-31.x, 192.168.x) (default: false) */\n allowPrivateIPs?: boolean;\n /** Allow localhost/loopback (127.x, ::1) (default: false) */\n allowLocalhost?: boolean;\n /** Blocked domain patterns (e.g., ['internal.company.com', '*.local']) */\n blockedDomains?: string[];\n /** Allowed domain patterns - if provided, ONLY these are allowed */\n allowedDomains?: string[];\n /** Allow data URLs (data:image/...) (default: false) */\n allowDataUrls?: boolean;\n}\n\nexport interface SafetyCheck {\n /** Whether the URL is safe according to policy */\n safe: boolean;\n /** Reason why URL is unsafe (if safe=false) */\n reason?: string;\n}\n\n/**\n * Default SSRF policy - blocks private IPs, localhost, and data URLs.\n */\nexport const DEFAULT_SSRF_POLICY: Required<SSRFPolicy> = {\n allowPrivateIPs: false,\n allowLocalhost: false,\n blockedDomains: [],\n allowedDomains: [],\n allowDataUrls: false,\n};\n\n/**\n * Check if URL is safe to fetch according to SSRF policy.\n *\n * This is the canonical SSRF protection - all apps should use this\n * before fetching external URLs.\n *\n * **Limitations (browser-side validation only):**\n * - Cannot detect domains that resolve to private IPs (e.g., `attacker.com` -> `127.0.0.1`)\n * - Cannot prevent DNS rebinding attacks (domain resolves differently on subsequent requests)\n * - Cannot validate redirect destinations (server may redirect to internal URLs)\n * - Does not perform actual DNS resolution - only validates URL syntax and hostname patterns\n *\n * For server-side applications requiring stronger SSRF protection, perform DNS resolution\n * and validate the resolved IP address before making the request, and validate redirect\n * destinations or disable redirects entirely.\n *\n * @param url - URL to validate\n * @param policy - SSRF policy (uses defaults if not provided)\n * @returns Safety check result with reason if unsafe\n *\n * @see https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html\n *\n * @example\n * ```typescript\n * const check = isURLSafe('http://10.0.0.1/secret');\n * if (!check.safe) {\n * console.error('Unsafe URL:', check.reason);\n * }\n * ```\n */\nexport function isURLSafe(\n url: string,\n policy: SSRFPolicy = {},\n): SafetyCheck {\n const config = { ...DEFAULT_SSRF_POLICY, ...policy };\n\n // Parse URL\n let parsed: URL;\n try {\n parsed = new URL(url);\n } catch {\n return { safe: false, reason: 'Invalid URL format' };\n }\n\n // Check protocol\n if (parsed.protocol === 'data:') {\n if (!config.allowDataUrls) {\n return { safe: false, reason: 'Data URLs not allowed' };\n }\n return { safe: true };\n }\n\n if (!['http:', 'https:'].includes(parsed.protocol)) {\n return {\n safe: false,\n reason: `Protocol '${parsed.protocol}' not allowed (only http/https)`,\n };\n }\n\n const hostname = parsed.hostname.toLowerCase();\n\n // Check allowed domains (whitelist)\n if (config.allowedDomains.length > 0) {\n const isAllowed = config.allowedDomains.some((pattern) =>\n matchDomainPattern(hostname, pattern),\n );\n if (!isAllowed) {\n return {\n safe: false,\n reason: `Domain '${hostname}' not in allowed list`,\n };\n }\n }\n\n // Check blocked domains (blacklist)\n if (config.blockedDomains.length > 0) {\n const isBlocked = config.blockedDomains.some((pattern) =>\n matchDomainPattern(hostname, pattern),\n );\n if (isBlocked) {\n return {\n safe: false,\n reason: `Domain '${hostname}' is blocked`,\n };\n }\n }\n\n // Check localhost\n if (isLocalhost(hostname) && !config.allowLocalhost) {\n return { safe: false, reason: 'Localhost not allowed' };\n }\n\n // Check private IPs\n if (isPrivateIP(hostname) && !config.allowPrivateIPs) {\n return { safe: false, reason: 'Private IP addresses not allowed' };\n }\n\n return { safe: true };\n}\n\n/**\n * Quick check if URL is safe with default policy.\n *\n * Convenience wrapper for common case.\n *\n * @param url - URL to validate\n * @returns true if safe, false otherwise\n */\nexport function isSafeForFetch(url: string): boolean {\n return isURLSafe(url).safe;\n}\n\n/**\n * Match domain against pattern (supports wildcards).\n *\n * @example\n * ```typescript\n * matchDomainPattern('api.github.com', '*.github.com') // true\n * matchDomainPattern('github.com', '*.github.com') // false\n * matchDomainPattern('github.com', 'github.com') // true\n * ```\n */\nfunction matchDomainPattern(domain: string, pattern: string): boolean {\n // Exact match\n if (domain === pattern) return true;\n\n // Wildcard match - only matches subdomains, not the base domain\n if (pattern.startsWith('*.')) {\n const suffix = pattern.slice(2);\n return domain.endsWith('.' + suffix);\n }\n\n return false;\n}\n\n/**\n * Check if hostname is localhost/loopback.\n */\nfunction isLocalhost(hostname: string): boolean {\n const lower = hostname.toLowerCase();\n\n // IPv4 loopback\n if (lower === 'localhost' || lower === '0.0.0.0' || lower.startsWith('127.')) {\n return true;\n }\n\n // IPv6 loopback\n if (lower === '::1' || lower === '[::1]') {\n return true;\n }\n\n return false;\n}\n\n/**\n * Check if hostname is a private IP address.\n *\n * Checks for:\n * - 10.0.0.0/8\n * - 172.16.0.0/12\n * - 192.168.0.0/16\n * - Link-local: 169.254.0.0/16 (includes AWS metadata endpoint)\n * - IPv6 private ranges\n */\nfunction isPrivateIP(hostname: string): boolean {\n const lower = hostname.toLowerCase();\n\n // IPv4 private ranges\n const parts = hostname.split('.').map(Number);\n\n if (\n parts.length === 4 &&\n parts.every((p) => p >= 0 && p <= 255 && !isNaN(p))\n ) {\n const [octet1, octet2] = parts;\n\n // 10.0.0.0/8\n if (octet1 === 10) return true;\n\n // 172.16.0.0/12\n if (octet1 === 172 && octet2 !== undefined && octet2 >= 16 && octet2 <= 31)\n return true;\n\n // 192.168.0.0/16\n if (octet1 === 192 && octet2 === 168) return true;\n\n // 169.254.0.0/16 (link-local) - includes AWS metadata endpoint\n if (octet1 === 169 && octet2 === 254) return true;\n }\n\n // IPv6 private ranges using regex patterns\n // Strip brackets if present (URL parser adds them: [fc00::1])\n const cleanedHostname = lower.replace(/^\\[|\\]$/g, '');\n\n const ipv6Patterns = [\n /^f[cd][0-9a-f]{2}:/i, // fc00::/7 (includes both fc and fd ranges)\n /^fe80:/i, // fe80::/10 (link-local)\n ];\n\n for (const pattern of ipv6Patterns) {\n if (pattern.test(cleanedHostname)) {\n return true;\n }\n }\n\n return false;\n}\n\n/**\n * Filter array of URLs, keeping only safe ones.\n *\n * @param urls - Array of URLs to filter\n * @param policy - SSRF policy\n * @returns Array of safe URLs\n */\nexport function filterSafeUrls(\n urls: string[],\n policy?: SSRFPolicy,\n): string[] {\n return urls.filter((url) => isURLSafe(url, policy).safe);\n}\n"],"mappings":";AA6CO,SAAS,iBACd,MACA,UAAkC,CAAC,GACjB;AAClB,QAAM;IACJ,kBAAkB;IAClB,cAAc;IACd,gBAAgB;EAClB,IAAI;AAEJ,QAAM,UAA4B,CAAC;AACnC,QAAM,OAAO,oBAAI,IAAY;AAI7B,MAAI,iBAAiB;AACnB,UAAM,kBAAkB;AACxB,QAAI;AAEJ,YAAQ,QAAQ,gBAAgB,KAAK,IAAI,OAAO,MAAM;AACpD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;EACF;AAGA,MAAI,aAAa;AAEf,UAAM,oBAAoB;AAC1B,QAAI;AAEJ,YAAQ,QAAQ,kBAAkB,KAAK,IAAI,OAAO,MAAM;AACtD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;AAGA,UAAM,sBAAsB;AAC5B,YAAQ,QAAQ,oBAAoB,KAAK,IAAI,OAAO,MAAM;AACxD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAE3B,UAAI,OAAO,CAAC,IAAI,WAAW,GAAG,KAAK,CAAC,IAAI,WAAW,GAAG,KAAK,CAAC,KAAK,IAAI,GAAG,GAAG;AACzE,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;AAIA,UAAM,gBAAgB;AACtB,YAAQ,QAAQ,cAAc,KAAK,IAAI,OAAO,MAAM;AAClD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAE3B,UAAI,QAAQ,IAAI,WAAW,SAAS,KAAK,IAAI,WAAW,UAAU,MAAM,CAAC,KAAK,IAAI,GAAG,GAAG;AACtF,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;EACF;AAGA,MAAI,aAAa;AAEf,UAAM,kBAAkB;AACxB,QAAI;AAEJ,YAAQ,QAAQ,gBAAgB,KAAK,IAAI,OAAO,MAAM;AACpD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS;QACX,CAAC;MACH;IACF;EACF;AAGA,MAAI,eAAe;AACjB,UAAM,iBAAiB;AACvB,QAAI;AAEJ,YAAQ,QAAQ,eAAe,KAAK,IAAI,OAAO,MAAM;AACnD,YAAM,MAAM,MAAM,CAAC;AACnB,UAAI,CAAC,KAAK,IAAI,GAAG,GAAG;AAClB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;QACV,CAAC;MACH;IACF;EACF;AAEA,SAAO;AACT;AAUO,SAAS,uBAAuB,MAAgC;AACrE,QAAM,MAAM,iBAAiB,MAAM,EAAE,eAAe,MAAM,CAAC;AAC3D,SAAO,IAAI,OAAO,CAAC,QAAQ,gBAAgB,KAAK,IAAI,GAAG,CAAC;AAC1D;AAUO,SAAS,gBAAgB,MAAgC;AAC9D,SAAO,iBAAiB,MAAM;IAC5B,iBAAiB;IACjB,aAAa;IACb,eAAe;EACjB,CAAC;AACH;AAUO,SAAS,YAAY,MAAsB;AAChD,SAAO,iBAAiB,IAAI,EAAE;AAChC;AC/KO,IAAM,sBAA4C;EACvD,iBAAiB;EACjB,gBAAgB;EAChB,gBAAgB,CAAC;EACjB,gBAAgB,CAAC;EACjB,eAAe;AACjB;AAgCO,SAAS,UACd,KACA,SAAqB,CAAC,GACT;AACb,QAAM,SAAS,EAAE,GAAG,qBAAqB,GAAG,OAAO;AAGnD,MAAI;AACJ,MAAI;AACF,aAAS,IAAI,IAAI,GAAG;EACtB,QAAQ;AACN,WAAO,EAAE,MAAM,OAAO,QAAQ,qBAAqB;EACrD;AAGA,MAAI,OAAO,aAAa,SAAS;AAC/B,QAAI,CAAC,OAAO,eAAe;AACzB,aAAO,EAAE,MAAM,OAAO,QAAQ,wBAAwB;IACxD;AACA,WAAO,EAAE,MAAM,KAAK;EACtB;AAEA,MAAI,CAAC,CAAC,SAAS,QAAQ,EAAE,SAAS,OAAO,QAAQ,GAAG;AAClD,WAAO;MACL,MAAM;MACN,QAAQ,aAAa,OAAO,QAAQ;IACtC;EACF;AAEA,QAAM,WAAW,OAAO,SAAS,YAAY;AAG7C,MAAI,OAAO,eAAe,SAAS,GAAG;AACpC,UAAM,YAAY,OAAO,eAAe;MAAK,CAAC,YAC5C,mBAAmB,UAAU,OAAO;IACtC;AACA,QAAI,CAAC,WAAW;AACd,aAAO;QACL,MAAM;QACN,QAAQ,WAAW,QAAQ;MAC7B;IACF;EACF;AAGA,MAAI,OAAO,eAAe,SAAS,GAAG;AACpC,UAAM,YAAY,OAAO,eAAe;MAAK,CAAC,YAC5C,mBAAmB,UAAU,OAAO;IACtC;AACA,QAAI,WAAW;AACb,aAAO;QACL,MAAM;QACN,QAAQ,WAAW,QAAQ;MAC7B;IACF;EACF;AAGA,MAAI,YAAY,QAAQ,KAAK,CAAC,OAAO,gBAAgB;AACnD,WAAO,EAAE,MAAM,OAAO,QAAQ,wBAAwB;EACxD;AAGA,MAAI,YAAY,QAAQ,KAAK,CAAC,OAAO,iBAAiB;AACpD,WAAO,EAAE,MAAM,OAAO,QAAQ,mCAAmC;EACnE;AAEA,SAAO,EAAE,MAAM,KAAK;AACtB;AAUO,SAAS,eAAe,KAAsB;AACnD,SAAO,UAAU,GAAG,EAAE;AACxB;AAYA,SAAS,mBAAmB,QAAgB,SAA0B;AAEpE,MAAI,WAAW,QAAS,QAAO;AAG/B,MAAI,QAAQ,WAAW,IAAI,GAAG;AAC5B,UAAM,SAAS,QAAQ,MAAM,CAAC;AAC9B,WAAO,OAAO,SAAS,MAAM,MAAM;EACrC;AAEA,SAAO;AACT;AAKA,SAAS,YAAY,UAA2B;AAC9C,QAAM,QAAQ,SAAS,YAAY;AAGnC,MAAI,UAAU,eAAe,UAAU,aAAa,MAAM,WAAW,MAAM,GAAG;AAC5E,WAAO;EACT;AAGA,MAAI,UAAU,SAAS,UAAU,SAAS;AACxC,WAAO;EACT;AAEA,SAAO;AACT;AAYA,SAAS,YAAY,UAA2B;AAC9C,QAAM,QAAQ,SAAS,YAAY;AAGnC,QAAM,QAAQ,SAAS,MAAM,GAAG,EAAE,IAAI,MAAM;AAE5C,MACE,MAAM,WAAW,KACjB,MAAM,MAAM,CAAC,MAAM,KAAK,KAAK,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC,GAClD;AACA,UAAM,CAAC,QAAQ,MAAM,IAAI;AAGzB,QAAI,WAAW,GAAI,QAAO;AAG1B,QAAI,WAAW,OAAO,WAAW,UAAa,UAAU,MAAM,UAAU;AACtE,aAAO;AAGT,QAAI,WAAW,OAAO,WAAW,IAAK,QAAO;AAG7C,QAAI,WAAW,OAAO,WAAW,IAAK,QAAO;EAC/C;AAIA,QAAM,kBAAkB,MAAM,QAAQ,YAAY,EAAE;AAEpD,QAAM,eAAe;IACnB;;IACA;;EACF;AAEA,aAAW,WAAW,cAAc;AAClC,QAAI,QAAQ,KAAK,eAAe,GAAG;AACjC,aAAO;IACT;EACF;AAEA,SAAO;AACT;AASO,SAAS,eACd,MACA,QACU;AACV,SAAO,KAAK,OAAO,CAAC,QAAQ,UAAU,KAAK,MAAM,EAAE,IAAI;AACzD;","names":[]}
|
package/dist/index.cjs
CHANGED
|
@@ -128,7 +128,23 @@ function streamingUnzipSync(data, limits = DEFAULT_ZIP_LIMITS) {
|
|
|
128
128
|
if (unsafePathHandling === "warn" && limits.onUnsafePath) {
|
|
129
129
|
limits.onUnsafePath(file.name, reason);
|
|
130
130
|
}
|
|
131
|
-
file.ondata = () => {
|
|
131
|
+
file.ondata = (err, chunk, _final) => {
|
|
132
|
+
if (error) return;
|
|
133
|
+
if (err) {
|
|
134
|
+
error = err;
|
|
135
|
+
return;
|
|
136
|
+
}
|
|
137
|
+
if (chunk && chunk.length > 0) {
|
|
138
|
+
totalBytes += chunk.length;
|
|
139
|
+
if (totalBytes > limits.maxTotalSize) {
|
|
140
|
+
error = new ZipPreflightError(
|
|
141
|
+
`Total actual size ${totalBytes} exceeds limit ${limits.maxTotalSize}`,
|
|
142
|
+
totalBytes,
|
|
143
|
+
limits.maxTotalSize
|
|
144
|
+
);
|
|
145
|
+
file.terminate();
|
|
146
|
+
}
|
|
147
|
+
}
|
|
132
148
|
};
|
|
133
149
|
file.start();
|
|
134
150
|
return;
|
|
@@ -8531,6 +8547,43 @@ var DEFAULT_OPTIONS3 = {
|
|
|
8531
8547
|
maxTotalSize: 500 * 1024 * 1024,
|
|
8532
8548
|
extractAssets: true
|
|
8533
8549
|
};
|
|
8550
|
+
var ASSET_PREFIX_VARIANTS = [
|
|
8551
|
+
{ prefix: "__asset:", format: "CCv3 (SillyTavern)" },
|
|
8552
|
+
{ prefix: "asset:", format: "CCv2/CCv3 common" },
|
|
8553
|
+
{ prefix: "pngchunk:", format: "Explicit PNG chunk" },
|
|
8554
|
+
{ prefix: "chara-ext-asset_:", format: "RisuAI (with colon)" },
|
|
8555
|
+
{ prefix: "chara-ext-asset_", format: "RisuAI" },
|
|
8556
|
+
{ prefix: "__asset_", format: "Legacy underscore variant" }
|
|
8557
|
+
];
|
|
8558
|
+
function isChunkReference(uri) {
|
|
8559
|
+
return ASSET_PREFIX_VARIANTS.some(({ prefix }) => uri.startsWith(prefix)) || !uri.includes(":");
|
|
8560
|
+
}
|
|
8561
|
+
function stripAssetPrefix(uri) {
|
|
8562
|
+
for (const { prefix } of ASSET_PREFIX_VARIANTS) {
|
|
8563
|
+
if (uri.startsWith(prefix)) {
|
|
8564
|
+
return uri.substring(prefix.length);
|
|
8565
|
+
}
|
|
8566
|
+
}
|
|
8567
|
+
return uri;
|
|
8568
|
+
}
|
|
8569
|
+
function generateChunkKeyCandidates(assetId, originalUri) {
|
|
8570
|
+
return [
|
|
8571
|
+
assetId,
|
|
8572
|
+
// Plain ID: "0"
|
|
8573
|
+
originalUri,
|
|
8574
|
+
// Original URI: "__asset:0"
|
|
8575
|
+
`asset:${assetId}`,
|
|
8576
|
+
// Common format
|
|
8577
|
+
`__asset:${assetId}`,
|
|
8578
|
+
// CCv3 format
|
|
8579
|
+
`__asset_${assetId}`,
|
|
8580
|
+
// Legacy underscore variant
|
|
8581
|
+
`chara-ext-asset_${assetId}`,
|
|
8582
|
+
// RisuAI format
|
|
8583
|
+
`chara-ext-asset_:${assetId}`
|
|
8584
|
+
// RisuAI format with colon
|
|
8585
|
+
];
|
|
8586
|
+
}
|
|
8534
8587
|
function estimateBase64DecodedSize(base64Length) {
|
|
8535
8588
|
return Math.ceil(base64Length * 0.75);
|
|
8536
8589
|
}
|
|
@@ -8598,39 +8651,22 @@ function parsePng(data, options) {
|
|
|
8598
8651
|
if (extracted.extraChunks && options.extractAssets && card.data.assets) {
|
|
8599
8652
|
const usedChunks = /* @__PURE__ */ new Set();
|
|
8600
8653
|
const chunkMap = /* @__PURE__ */ new Map();
|
|
8654
|
+
const risuIndexPrefixes = ASSET_PREFIX_VARIANTS.filter((v) => v.prefix.startsWith("chara-ext-asset_"));
|
|
8601
8655
|
for (const chunk of extracted.extraChunks) {
|
|
8602
8656
|
chunkMap.set(chunk.keyword, chunk);
|
|
8603
|
-
|
|
8604
|
-
|
|
8605
|
-
|
|
8606
|
-
|
|
8607
|
-
|
|
8657
|
+
for (const { prefix } of risuIndexPrefixes) {
|
|
8658
|
+
if (chunk.keyword.startsWith(prefix)) {
|
|
8659
|
+
const suffix = chunk.keyword.substring(prefix.length);
|
|
8660
|
+
chunkMap.set(suffix, chunk);
|
|
8661
|
+
break;
|
|
8608
8662
|
}
|
|
8609
8663
|
}
|
|
8610
8664
|
}
|
|
8611
8665
|
for (const descriptor of card.data.assets) {
|
|
8612
8666
|
if (!descriptor.uri) continue;
|
|
8613
|
-
if (
|
|
8614
|
-
|
|
8615
|
-
|
|
8616
|
-
else if (assetId.startsWith("asset:")) assetId = assetId.substring(6);
|
|
8617
|
-
else if (assetId.startsWith("pngchunk:")) assetId = assetId.substring(9);
|
|
8618
|
-
const candidates = [
|
|
8619
|
-
assetId,
|
|
8620
|
-
// "0"
|
|
8621
|
-
descriptor.uri,
|
|
8622
|
-
// "__asset:0"
|
|
8623
|
-
`asset:${assetId}`,
|
|
8624
|
-
// "asset:0"
|
|
8625
|
-
`__asset:${assetId}`,
|
|
8626
|
-
// "__asset:0"
|
|
8627
|
-
`__asset_${assetId}`,
|
|
8628
|
-
// "__asset_0"
|
|
8629
|
-
`chara-ext-asset_${assetId}`,
|
|
8630
|
-
// "chara-ext-asset_0"
|
|
8631
|
-
`chara-ext-asset_:${assetId}`
|
|
8632
|
-
// "chara-ext-asset_:0"
|
|
8633
|
-
];
|
|
8667
|
+
if (isChunkReference(descriptor.uri)) {
|
|
8668
|
+
const assetId = stripAssetPrefix(descriptor.uri);
|
|
8669
|
+
const candidates = generateChunkKeyCandidates(assetId, descriptor.uri);
|
|
8634
8670
|
let chunk;
|
|
8635
8671
|
for (const candidate of candidates) {
|
|
8636
8672
|
chunk = chunkMap.get(candidate);
|
|
@@ -8677,13 +8713,15 @@ function parsePng(data, options) {
|
|
|
8677
8713
|
}
|
|
8678
8714
|
}
|
|
8679
8715
|
}
|
|
8716
|
+
const risuPrefixes = ASSET_PREFIX_VARIANTS.filter((v) => v.prefix.startsWith("chara-ext-asset_"));
|
|
8680
8717
|
for (const chunk of extracted.extraChunks) {
|
|
8681
8718
|
if (!usedChunks.has(chunk.keyword)) {
|
|
8682
8719
|
let assetId = null;
|
|
8683
|
-
|
|
8684
|
-
|
|
8685
|
-
|
|
8686
|
-
|
|
8720
|
+
for (const { prefix } of risuPrefixes) {
|
|
8721
|
+
if (chunk.keyword.startsWith(prefix)) {
|
|
8722
|
+
assetId = chunk.keyword.substring(prefix.length);
|
|
8723
|
+
break;
|
|
8724
|
+
}
|
|
8687
8725
|
}
|
|
8688
8726
|
if (assetId) {
|
|
8689
8727
|
try {
|