@character-foundry/character-foundry 0.1.9 → 0.4.0-dev.1765937896
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/app-framework.cjs +291 -95
- package/dist/app-framework.cjs.map +1 -1
- package/dist/app-framework.d.cts +1 -1
- package/dist/app-framework.d.ts +1 -1
- package/dist/app-framework.js +292 -96
- package/dist/app-framework.js.map +1 -1
- package/dist/charx.cjs +44 -23
- package/dist/charx.cjs.map +1 -1
- package/dist/charx.d.cts +368 -207
- package/dist/charx.d.ts +368 -207
- package/dist/charx.js +44 -23
- package/dist/charx.js.map +1 -1
- package/dist/exporter.cjs +27 -22
- package/dist/exporter.cjs.map +1 -1
- package/dist/exporter.d.cts +368 -207
- package/dist/exporter.d.ts +368 -207
- package/dist/exporter.js +27 -22
- package/dist/exporter.js.map +1 -1
- package/dist/federation.cjs +16 -4
- package/dist/federation.cjs.map +1 -1
- package/dist/federation.d.cts +368 -207
- package/dist/federation.d.ts +368 -207
- package/dist/federation.js +16 -4
- package/dist/federation.js.map +1 -1
- package/dist/image-utils.cjs.map +1 -1
- package/dist/image-utils.d.cts +12 -0
- package/dist/image-utils.d.ts +12 -0
- package/dist/image-utils.js.map +1 -1
- package/dist/index.cjs +106 -56
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +708 -423
- package/dist/index.d.ts +708 -423
- package/dist/index.js +106 -56
- package/dist/index.js.map +1 -1
- package/dist/loader.cjs +106 -56
- package/dist/loader.cjs.map +1 -1
- package/dist/loader.d.cts +564 -318
- package/dist/loader.d.ts +564 -318
- package/dist/loader.js +106 -56
- package/dist/loader.js.map +1 -1
- package/dist/lorebook.cjs +5 -5
- package/dist/lorebook.cjs.map +1 -1
- package/dist/lorebook.d.cts +674 -381
- package/dist/lorebook.d.ts +674 -381
- package/dist/lorebook.js +5 -5
- package/dist/lorebook.js.map +1 -1
- package/dist/normalizer.cjs +33 -23
- package/dist/normalizer.cjs.map +1 -1
- package/dist/normalizer.d.cts +896 -560
- package/dist/normalizer.d.ts +896 -560
- package/dist/normalizer.js +33 -23
- package/dist/normalizer.js.map +1 -1
- package/dist/png.cjs +27 -22
- package/dist/png.cjs.map +1 -1
- package/dist/png.d.cts +512 -312
- package/dist/png.d.ts +512 -312
- package/dist/png.js +27 -22
- package/dist/png.js.map +1 -1
- package/dist/schemas.cjs +41 -26
- package/dist/schemas.cjs.map +1 -1
- package/dist/schemas.d.cts +1444 -896
- package/dist/schemas.d.ts +1444 -896
- package/dist/schemas.js +41 -26
- package/dist/schemas.js.map +1 -1
- package/dist/voxta.cjs +48 -25
- package/dist/voxta.cjs.map +1 -1
- package/dist/voxta.d.cts +564 -318
- package/dist/voxta.d.ts +564 -318
- package/dist/voxta.js +48 -25
- package/dist/voxta.js.map +1 -1
- package/package.json +6 -6
package/dist/image-utils.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/image-utils.ts","../../image-utils/src/extraction.ts","../../image-utils/src/ssrf.ts"],"sourcesContent":["export * from '@character-foundry/image-utils';\n","/**\n * Image URL Extraction\n *\n * Extracts image URLs from markdown, HTML, and base64 data URLs in text.\n * Used across Federation, Archive, and Architect for image processing.\n */\n\nexport interface ImageExtractionOptions {\n /** Include markdown image syntax `` (default: true) */\n includeMarkdown?: boolean;\n /** Include HTML img tags `<img src=\"url\">` (default: true) */\n includeHTML?: boolean;\n /** Include base64 data URLs `data:image/...;base64,...` (default: true) */\n includeBase64?: boolean;\n}\n\nexport interface ExtractedImage {\n /** The extracted URL or data URL */\n url: string;\n /** Source format where the image was found */\n source: 'markdown' | 'html' | 'base64';\n /** Surrounding context for debugging (optional) */\n context?: string;\n}\n\n/**\n * Extract all image URLs from text.\n *\n * Supports markdown, HTML, and base64 data URLs.\n * Returns deduplicated results.\n *\n * @param text - Text to extract image URLs from\n * @param options - Extraction options\n * @returns Array of extracted images with source information\n *\n * @example\n * ```typescript\n * const text = 'Check out  and <img src=\"banner.jpg\">';\n * const images = extractImageUrls(text);\n * // [\n * // { url: 'avatar.png', source: 'markdown' },\n * // { url: 'banner.jpg', source: 'html' }\n * // ]\n * ```\n */\nexport function extractImageUrls(\n text: string,\n options: ImageExtractionOptions = {},\n): ExtractedImage[] {\n const {\n includeMarkdown = true,\n includeHTML = true,\n includeBase64 = true,\n } = options;\n\n const results: ExtractedImage[] = [];\n const seen = new Set<string>();\n\n // Extract markdown images:  or  or \n // Supports: standard, angle brackets, and SillyTavern dimension syntax\n if (includeMarkdown) {\n const markdownPattern = /!\\[([^\\]]*)\\]\\(<?([^>\\s)]+)>?(?:\\s*=[^)]+)?\\)/g;\n let match: RegExpExecArray | null;\n\n while ((match = markdownPattern.exec(text)) !== null) {\n const url = match[2]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'markdown',\n context: match[0],\n });\n }\n }\n }\n\n // Extract HTML img tags: <img src=\"url\"> (quoted and unquoted)\n if (includeHTML) {\n // Quoted src\n const htmlQuotedPattern = /<img[^>]+src=[\"']([^\"']+)[\"'][^>]*>/gi;\n let match: RegExpExecArray | null;\n\n while ((match = htmlQuotedPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n\n // Unquoted src (e.g., <img src=url>)\n const htmlUnquotedPattern = /<img[^>]+src=([^\\s\"'>]+)[^>]*>/gi;\n while ((match = htmlUnquotedPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n // Skip if it starts with a quote (already handled above)\n if (url && !url.startsWith('\"') && !url.startsWith(\"'\") && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n\n // CSS url() function: url(url), url(\"url\"), url('url')\n // Common in background-image, background, content properties\n const cssUrlPattern = /url\\([\"']?([^)\"']+)[\"']?\\)/gi;\n while ((match = cssUrlPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n // Only include http(s) URLs to avoid CSS variables and relative paths\n if (url && (url.startsWith('http://') || url.startsWith('https://')) && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n }\n\n // Extract plain image URLs (not wrapped in any syntax)\n if (includeHTML) {\n // Plain URLs with image extensions (common on image hosts)\n const plainUrlPattern = /(?<![(\"'])(https?:\\/\\/[^\\s<>\"']+\\.(?:jpg|jpeg|png|gif|webp|svg|avif|bmp))(?![)\"'])/gi;\n let match: RegExpExecArray | null;\n\n while ((match = plainUrlPattern.exec(text)) !== null) {\n const url = match[0]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: url,\n });\n }\n }\n }\n\n // Extract base64 data URLs: data:image/...;base64,...\n if (includeBase64) {\n const dataUrlPattern = /data:image\\/[^;]+;base64,[A-Za-z0-9+/=]+/g;\n let match: RegExpExecArray | null;\n\n while ((match = dataUrlPattern.exec(text)) !== null) {\n const url = match[0];\n if (!seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'base64',\n });\n }\n }\n }\n\n return results;\n}\n\n/**\n * Extract only HTTP/HTTPS URLs (excludes data URLs and relative paths).\n *\n * Convenience wrapper around extractImageUrls that filters for remote URLs.\n *\n * @param text - Text to extract URLs from\n * @returns Array of extracted HTTP/HTTPS image URLs\n */\nexport function extractRemoteImageUrls(text: string): ExtractedImage[] {\n const all = extractImageUrls(text, { includeBase64: false });\n return all.filter((img) => /^https?:\\/\\//i.test(img.url));\n}\n\n/**\n * Extract only base64 data URLs.\n *\n * Convenience wrapper around extractImageUrls.\n *\n * @param text - Text to extract data URLs from\n * @returns Array of base64 image data URLs\n */\nexport function extractDataUrls(text: string): ExtractedImage[] {\n return extractImageUrls(text, {\n includeMarkdown: false,\n includeHTML: false,\n includeBase64: true,\n });\n}\n\n/**\n * Count images in text without extracting full details.\n *\n * More efficient than extractImageUrls when you only need the count.\n *\n * @param text - Text to count images in\n * @returns Total number of unique image references\n */\nexport function countImages(text: string): number {\n return extractImageUrls(text).length;\n}\n","/**\n * SSRF (Server-Side Request Forgery) Protection\n *\n * Validates URLs to prevent SSRF attacks.\n * Browser-safe implementation (no Node.js dependencies).\n */\n\nexport interface SSRFPolicy {\n /** Allow private IP addresses (10.x, 172.16-31.x, 192.168.x) (default: false) */\n allowPrivateIPs?: boolean;\n /** Allow localhost/loopback (127.x, ::1) (default: false) */\n allowLocalhost?: boolean;\n /** Blocked domain patterns (e.g., ['internal.company.com', '*.local']) */\n blockedDomains?: string[];\n /** Allowed domain patterns - if provided, ONLY these are allowed */\n allowedDomains?: string[];\n /** Allow data URLs (data:image/...) (default: false) */\n allowDataUrls?: boolean;\n}\n\nexport interface SafetyCheck {\n /** Whether the URL is safe according to policy */\n safe: boolean;\n /** Reason why URL is unsafe (if safe=false) */\n reason?: string;\n}\n\n/**\n * Default SSRF policy - blocks private IPs, localhost, and data URLs.\n */\nexport const DEFAULT_SSRF_POLICY: Required<SSRFPolicy> = {\n allowPrivateIPs: false,\n allowLocalhost: false,\n blockedDomains: [],\n allowedDomains: [],\n allowDataUrls: false,\n};\n\n/**\n * Check if URL is safe to fetch according to SSRF policy.\n *\n * This is the canonical SSRF protection - all apps should use this\n * before fetching external URLs.\n *\n * @param url - URL to validate\n * @param policy - SSRF policy (uses defaults if not provided)\n * @returns Safety check result with reason if unsafe\n *\n * @example\n * ```typescript\n * const check = isURLSafe('http://10.0.0.1/secret');\n * if (!check.safe) {\n * console.error('Unsafe URL:', check.reason);\n * }\n * ```\n */\nexport function isURLSafe(\n url: string,\n policy: SSRFPolicy = {},\n): SafetyCheck {\n const config = { ...DEFAULT_SSRF_POLICY, ...policy };\n\n // Parse URL\n let parsed: URL;\n try {\n parsed = new URL(url);\n } catch {\n return { safe: false, reason: 'Invalid URL format' };\n }\n\n // Check protocol\n if (parsed.protocol === 'data:') {\n if (!config.allowDataUrls) {\n return { safe: false, reason: 'Data URLs not allowed' };\n }\n return { safe: true };\n }\n\n if (!['http:', 'https:'].includes(parsed.protocol)) {\n return {\n safe: false,\n reason: `Protocol '${parsed.protocol}' not allowed (only http/https)`,\n };\n }\n\n const hostname = parsed.hostname.toLowerCase();\n\n // Check allowed domains (whitelist)\n if (config.allowedDomains.length > 0) {\n const isAllowed = config.allowedDomains.some((pattern) =>\n matchDomainPattern(hostname, pattern),\n );\n if (!isAllowed) {\n return {\n safe: false,\n reason: `Domain '${hostname}' not in allowed list`,\n };\n }\n }\n\n // Check blocked domains (blacklist)\n if (config.blockedDomains.length > 0) {\n const isBlocked = config.blockedDomains.some((pattern) =>\n matchDomainPattern(hostname, pattern),\n );\n if (isBlocked) {\n return {\n safe: false,\n reason: `Domain '${hostname}' is blocked`,\n };\n }\n }\n\n // Check localhost\n if (isLocalhost(hostname) && !config.allowLocalhost) {\n return { safe: false, reason: 'Localhost not allowed' };\n }\n\n // Check private IPs\n if (isPrivateIP(hostname) && !config.allowPrivateIPs) {\n return { safe: false, reason: 'Private IP addresses not allowed' };\n }\n\n return { safe: true };\n}\n\n/**\n * Quick check if URL is safe with default policy.\n *\n * Convenience wrapper for common case.\n *\n * @param url - URL to validate\n * @returns true if safe, false otherwise\n */\nexport function isSafeForFetch(url: string): boolean {\n return isURLSafe(url).safe;\n}\n\n/**\n * Match domain against pattern (supports wildcards).\n *\n * @example\n * ```typescript\n * matchDomainPattern('api.github.com', '*.github.com') // true\n * matchDomainPattern('github.com', '*.github.com') // false\n * matchDomainPattern('github.com', 'github.com') // true\n * ```\n */\nfunction matchDomainPattern(domain: string, pattern: string): boolean {\n // Exact match\n if (domain === pattern) return true;\n\n // Wildcard match - only matches subdomains, not the base domain\n if (pattern.startsWith('*.')) {\n const suffix = pattern.slice(2);\n return domain.endsWith('.' + suffix);\n }\n\n return false;\n}\n\n/**\n * Check if hostname is localhost/loopback.\n */\nfunction isLocalhost(hostname: string): boolean {\n const lower = hostname.toLowerCase();\n\n // IPv4 loopback\n if (lower === 'localhost' || lower === '0.0.0.0' || lower.startsWith('127.')) {\n return true;\n }\n\n // IPv6 loopback\n if (lower === '::1' || lower === '[::1]') {\n return true;\n }\n\n return false;\n}\n\n/**\n * Check if hostname is a private IP address.\n *\n * Checks for:\n * - 10.0.0.0/8\n * - 172.16.0.0/12\n * - 192.168.0.0/16\n * - Link-local: 169.254.0.0/16 (includes AWS metadata endpoint)\n * - IPv6 private ranges\n */\nfunction isPrivateIP(hostname: string): boolean {\n const lower = hostname.toLowerCase();\n\n // IPv4 private ranges\n const parts = hostname.split('.').map(Number);\n\n if (\n parts.length === 4 &&\n parts.every((p) => p >= 0 && p <= 255 && !isNaN(p))\n ) {\n const [octet1, octet2] = parts;\n\n // 10.0.0.0/8\n if (octet1 === 10) return true;\n\n // 172.16.0.0/12\n if (octet1 === 172 && octet2 !== undefined && octet2 >= 16 && octet2 <= 31)\n return true;\n\n // 192.168.0.0/16\n if (octet1 === 192 && octet2 === 168) return true;\n\n // 169.254.0.0/16 (link-local) - includes AWS metadata endpoint\n if (octet1 === 169 && octet2 === 254) return true;\n }\n\n // IPv6 private ranges using regex patterns\n // Strip brackets if present (URL parser adds them: [fc00::1])\n const cleanedHostname = lower.replace(/^\\[|\\]$/g, '');\n\n const ipv6Patterns = [\n /^f[cd][0-9a-f]{2}:/i, // fc00::/7 (includes both fc and fd ranges)\n /^fe80:/i, // fe80::/10 (link-local)\n ];\n\n for (const pattern of ipv6Patterns) {\n if (pattern.test(cleanedHostname)) {\n return true;\n }\n }\n\n return false;\n}\n\n/**\n * Filter array of URLs, keeping only safe ones.\n *\n * @param urls - Array of URLs to filter\n * @param policy - SSRF policy\n * @returns Array of safe URLs\n */\nexport function filterSafeUrls(\n urls: string[],\n policy?: SSRFPolicy,\n): string[] {\n return urls.filter((url) => isURLSafe(url, policy).safe);\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;AC6CO,SAAS,iBACd,MACA,UAAkC,CAAC,GACjB;AAClB,QAAM;IACJ,kBAAkB;IAClB,cAAc;IACd,gBAAgB;EAClB,IAAI;AAEJ,QAAM,UAA4B,CAAC;AACnC,QAAM,OAAO,oBAAI,IAAY;AAI7B,MAAI,iBAAiB;AACnB,UAAM,kBAAkB;AACxB,QAAI;AAEJ,YAAQ,QAAQ,gBAAgB,KAAK,IAAI,OAAO,MAAM;AACpD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;EACF;AAGA,MAAI,aAAa;AAEf,UAAM,oBAAoB;AAC1B,QAAI;AAEJ,YAAQ,QAAQ,kBAAkB,KAAK,IAAI,OAAO,MAAM;AACtD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;AAGA,UAAM,sBAAsB;AAC5B,YAAQ,QAAQ,oBAAoB,KAAK,IAAI,OAAO,MAAM;AACxD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAE3B,UAAI,OAAO,CAAC,IAAI,WAAW,GAAG,KAAK,CAAC,IAAI,WAAW,GAAG,KAAK,CAAC,KAAK,IAAI,GAAG,GAAG;AACzE,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;AAIA,UAAM,gBAAgB;AACtB,YAAQ,QAAQ,cAAc,KAAK,IAAI,OAAO,MAAM;AAClD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAE3B,UAAI,QAAQ,IAAI,WAAW,SAAS,KAAK,IAAI,WAAW,UAAU,MAAM,CAAC,KAAK,IAAI,GAAG,GAAG;AACtF,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;EACF;AAGA,MAAI,aAAa;AAEf,UAAM,kBAAkB;AACxB,QAAI;AAEJ,YAAQ,QAAQ,gBAAgB,KAAK,IAAI,OAAO,MAAM;AACpD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS;QACX,CAAC;MACH;IACF;EACF;AAGA,MAAI,eAAe;AACjB,UAAM,iBAAiB;AACvB,QAAI;AAEJ,YAAQ,QAAQ,eAAe,KAAK,IAAI,OAAO,MAAM;AACnD,YAAM,MAAM,MAAM,CAAC;AACnB,UAAI,CAAC,KAAK,IAAI,GAAG,GAAG;AAClB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;QACV,CAAC;MACH;IACF;EACF;AAEA,SAAO;AACT;AAUO,SAAS,uBAAuB,MAAgC;AACrE,QAAM,MAAM,iBAAiB,MAAM,EAAE,eAAe,MAAM,CAAC;AAC3D,SAAO,IAAI,OAAO,CAAC,QAAQ,gBAAgB,KAAK,IAAI,GAAG,CAAC;AAC1D;AAUO,SAAS,gBAAgB,MAAgC;AAC9D,SAAO,iBAAiB,MAAM;IAC5B,iBAAiB;IACjB,aAAa;IACb,eAAe;EACjB,CAAC;AACH;AAUO,SAAS,YAAY,MAAsB;AAChD,SAAO,iBAAiB,IAAI,EAAE;AAChC;AC/KO,IAAM,sBAA4C;EACvD,iBAAiB;EACjB,gBAAgB;EAChB,gBAAgB,CAAC;EACjB,gBAAgB,CAAC;EACjB,eAAe;AACjB;AAoBO,SAAS,UACd,KACA,SAAqB,CAAC,GACT;AACb,QAAM,SAAS,EAAE,GAAG,qBAAqB,GAAG,OAAO;AAGnD,MAAI;AACJ,MAAI;AACF,aAAS,IAAI,IAAI,GAAG;EACtB,QAAQ;AACN,WAAO,EAAE,MAAM,OAAO,QAAQ,qBAAqB;EACrD;AAGA,MAAI,OAAO,aAAa,SAAS;AAC/B,QAAI,CAAC,OAAO,eAAe;AACzB,aAAO,EAAE,MAAM,OAAO,QAAQ,wBAAwB;IACxD;AACA,WAAO,EAAE,MAAM,KAAK;EACtB;AAEA,MAAI,CAAC,CAAC,SAAS,QAAQ,EAAE,SAAS,OAAO,QAAQ,GAAG;AAClD,WAAO;MACL,MAAM;MACN,QAAQ,aAAa,OAAO,QAAQ;IACtC;EACF;AAEA,QAAM,WAAW,OAAO,SAAS,YAAY;AAG7C,MAAI,OAAO,eAAe,SAAS,GAAG;AACpC,UAAM,YAAY,OAAO,eAAe;MAAK,CAAC,YAC5C,mBAAmB,UAAU,OAAO;IACtC;AACA,QAAI,CAAC,WAAW;AACd,aAAO;QACL,MAAM;QACN,QAAQ,WAAW,QAAQ;MAC7B;IACF;EACF;AAGA,MAAI,OAAO,eAAe,SAAS,GAAG;AACpC,UAAM,YAAY,OAAO,eAAe;MAAK,CAAC,YAC5C,mBAAmB,UAAU,OAAO;IACtC;AACA,QAAI,WAAW;AACb,aAAO;QACL,MAAM;QACN,QAAQ,WAAW,QAAQ;MAC7B;IACF;EACF;AAGA,MAAI,YAAY,QAAQ,KAAK,CAAC,OAAO,gBAAgB;AACnD,WAAO,EAAE,MAAM,OAAO,QAAQ,wBAAwB;EACxD;AAGA,MAAI,YAAY,QAAQ,KAAK,CAAC,OAAO,iBAAiB;AACpD,WAAO,EAAE,MAAM,OAAO,QAAQ,mCAAmC;EACnE;AAEA,SAAO,EAAE,MAAM,KAAK;AACtB;AAUO,SAAS,eAAe,KAAsB;AACnD,SAAO,UAAU,GAAG,EAAE;AACxB;AAYA,SAAS,mBAAmB,QAAgB,SAA0B;AAEpE,MAAI,WAAW,QAAS,QAAO;AAG/B,MAAI,QAAQ,WAAW,IAAI,GAAG;AAC5B,UAAM,SAAS,QAAQ,MAAM,CAAC;AAC9B,WAAO,OAAO,SAAS,MAAM,MAAM;EACrC;AAEA,SAAO;AACT;AAKA,SAAS,YAAY,UAA2B;AAC9C,QAAM,QAAQ,SAAS,YAAY;AAGnC,MAAI,UAAU,eAAe,UAAU,aAAa,MAAM,WAAW,MAAM,GAAG;AAC5E,WAAO;EACT;AAGA,MAAI,UAAU,SAAS,UAAU,SAAS;AACxC,WAAO;EACT;AAEA,SAAO;AACT;AAYA,SAAS,YAAY,UAA2B;AAC9C,QAAM,QAAQ,SAAS,YAAY;AAGnC,QAAM,QAAQ,SAAS,MAAM,GAAG,EAAE,IAAI,MAAM;AAE5C,MACE,MAAM,WAAW,KACjB,MAAM,MAAM,CAAC,MAAM,KAAK,KAAK,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC,GAClD;AACA,UAAM,CAAC,QAAQ,MAAM,IAAI;AAGzB,QAAI,WAAW,GAAI,QAAO;AAG1B,QAAI,WAAW,OAAO,WAAW,UAAa,UAAU,MAAM,UAAU;AACtE,aAAO;AAGT,QAAI,WAAW,OAAO,WAAW,IAAK,QAAO;AAG7C,QAAI,WAAW,OAAO,WAAW,IAAK,QAAO;EAC/C;AAIA,QAAM,kBAAkB,MAAM,QAAQ,YAAY,EAAE;AAEpD,QAAM,eAAe;IACnB;;IACA;;EACF;AAEA,aAAW,WAAW,cAAc;AAClC,QAAI,QAAQ,KAAK,eAAe,GAAG;AACjC,aAAO;IACT;EACF;AAEA,SAAO;AACT;AASO,SAAS,eACd,MACA,QACU;AACV,SAAO,KAAK,OAAO,CAAC,QAAQ,UAAU,KAAK,MAAM,EAAE,IAAI;AACzD;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../src/image-utils.ts","../../image-utils/src/extraction.ts","../../image-utils/src/ssrf.ts"],"sourcesContent":["export * from '@character-foundry/image-utils';\n","/**\n * Image URL Extraction\n *\n * Extracts image URLs from markdown, HTML, and base64 data URLs in text.\n * Used across Federation, Archive, and Architect for image processing.\n */\n\nexport interface ImageExtractionOptions {\n /** Include markdown image syntax `` (default: true) */\n includeMarkdown?: boolean;\n /** Include HTML img tags `<img src=\"url\">` (default: true) */\n includeHTML?: boolean;\n /** Include base64 data URLs `data:image/...;base64,...` (default: true) */\n includeBase64?: boolean;\n}\n\nexport interface ExtractedImage {\n /** The extracted URL or data URL */\n url: string;\n /** Source format where the image was found */\n source: 'markdown' | 'html' | 'base64';\n /** Surrounding context for debugging (optional) */\n context?: string;\n}\n\n/**\n * Extract all image URLs from text.\n *\n * Supports markdown, HTML, and base64 data URLs.\n * Returns deduplicated results.\n *\n * @param text - Text to extract image URLs from\n * @param options - Extraction options\n * @returns Array of extracted images with source information\n *\n * @example\n * ```typescript\n * const text = 'Check out  and <img src=\"banner.jpg\">';\n * const images = extractImageUrls(text);\n * // [\n * // { url: 'avatar.png', source: 'markdown' },\n * // { url: 'banner.jpg', source: 'html' }\n * // ]\n * ```\n */\nexport function extractImageUrls(\n text: string,\n options: ImageExtractionOptions = {},\n): ExtractedImage[] {\n const {\n includeMarkdown = true,\n includeHTML = true,\n includeBase64 = true,\n } = options;\n\n const results: ExtractedImage[] = [];\n const seen = new Set<string>();\n\n // Extract markdown images:  or  or \n // Supports: standard, angle brackets, and SillyTavern dimension syntax\n if (includeMarkdown) {\n const markdownPattern = /!\\[([^\\]]*)\\]\\(<?([^>\\s)]+)>?(?:\\s*=[^)]+)?\\)/g;\n let match: RegExpExecArray | null;\n\n while ((match = markdownPattern.exec(text)) !== null) {\n const url = match[2]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'markdown',\n context: match[0],\n });\n }\n }\n }\n\n // Extract HTML img tags: <img src=\"url\"> (quoted and unquoted)\n if (includeHTML) {\n // Quoted src\n const htmlQuotedPattern = /<img[^>]+src=[\"']([^\"']+)[\"'][^>]*>/gi;\n let match: RegExpExecArray | null;\n\n while ((match = htmlQuotedPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n\n // Unquoted src (e.g., <img src=url>)\n const htmlUnquotedPattern = /<img[^>]+src=([^\\s\"'>]+)[^>]*>/gi;\n while ((match = htmlUnquotedPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n // Skip if it starts with a quote (already handled above)\n if (url && !url.startsWith('\"') && !url.startsWith(\"'\") && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n\n // CSS url() function: url(url), url(\"url\"), url('url')\n // Common in background-image, background, content properties\n const cssUrlPattern = /url\\([\"']?([^)\"']+)[\"']?\\)/gi;\n while ((match = cssUrlPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n // Only include http(s) URLs to avoid CSS variables and relative paths\n if (url && (url.startsWith('http://') || url.startsWith('https://')) && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n }\n\n // Extract plain image URLs (not wrapped in any syntax)\n if (includeHTML) {\n // Plain URLs with image extensions (common on image hosts)\n const plainUrlPattern = /(?<![(\"'])(https?:\\/\\/[^\\s<>\"']+\\.(?:jpg|jpeg|png|gif|webp|svg|avif|bmp))(?![)\"'])/gi;\n let match: RegExpExecArray | null;\n\n while ((match = plainUrlPattern.exec(text)) !== null) {\n const url = match[0]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: url,\n });\n }\n }\n }\n\n // Extract base64 data URLs: data:image/...;base64,...\n if (includeBase64) {\n const dataUrlPattern = /data:image\\/[^;]+;base64,[A-Za-z0-9+/=]+/g;\n let match: RegExpExecArray | null;\n\n while ((match = dataUrlPattern.exec(text)) !== null) {\n const url = match[0];\n if (!seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'base64',\n });\n }\n }\n }\n\n return results;\n}\n\n/**\n * Extract only HTTP/HTTPS URLs (excludes data URLs and relative paths).\n *\n * Convenience wrapper around extractImageUrls that filters for remote URLs.\n *\n * @param text - Text to extract URLs from\n * @returns Array of extracted HTTP/HTTPS image URLs\n */\nexport function extractRemoteImageUrls(text: string): ExtractedImage[] {\n const all = extractImageUrls(text, { includeBase64: false });\n return all.filter((img) => /^https?:\\/\\//i.test(img.url));\n}\n\n/**\n * Extract only base64 data URLs.\n *\n * Convenience wrapper around extractImageUrls.\n *\n * @param text - Text to extract data URLs from\n * @returns Array of base64 image data URLs\n */\nexport function extractDataUrls(text: string): ExtractedImage[] {\n return extractImageUrls(text, {\n includeMarkdown: false,\n includeHTML: false,\n includeBase64: true,\n });\n}\n\n/**\n * Count images in text without extracting full details.\n *\n * More efficient than extractImageUrls when you only need the count.\n *\n * @param text - Text to count images in\n * @returns Total number of unique image references\n */\nexport function countImages(text: string): number {\n return extractImageUrls(text).length;\n}\n","/**\n * SSRF (Server-Side Request Forgery) Protection\n *\n * Validates URLs to prevent SSRF attacks.\n * Browser-safe implementation (no Node.js dependencies).\n */\n\nexport interface SSRFPolicy {\n /** Allow private IP addresses (10.x, 172.16-31.x, 192.168.x) (default: false) */\n allowPrivateIPs?: boolean;\n /** Allow localhost/loopback (127.x, ::1) (default: false) */\n allowLocalhost?: boolean;\n /** Blocked domain patterns (e.g., ['internal.company.com', '*.local']) */\n blockedDomains?: string[];\n /** Allowed domain patterns - if provided, ONLY these are allowed */\n allowedDomains?: string[];\n /** Allow data URLs (data:image/...) (default: false) */\n allowDataUrls?: boolean;\n}\n\nexport interface SafetyCheck {\n /** Whether the URL is safe according to policy */\n safe: boolean;\n /** Reason why URL is unsafe (if safe=false) */\n reason?: string;\n}\n\n/**\n * Default SSRF policy - blocks private IPs, localhost, and data URLs.\n */\nexport const DEFAULT_SSRF_POLICY: Required<SSRFPolicy> = {\n allowPrivateIPs: false,\n allowLocalhost: false,\n blockedDomains: [],\n allowedDomains: [],\n allowDataUrls: false,\n};\n\n/**\n * Check if URL is safe to fetch according to SSRF policy.\n *\n * This is the canonical SSRF protection - all apps should use this\n * before fetching external URLs.\n *\n * **Limitations (browser-side validation only):**\n * - Cannot detect domains that resolve to private IPs (e.g., `attacker.com` -> `127.0.0.1`)\n * - Cannot prevent DNS rebinding attacks (domain resolves differently on subsequent requests)\n * - Cannot validate redirect destinations (server may redirect to internal URLs)\n * - Does not perform actual DNS resolution - only validates URL syntax and hostname patterns\n *\n * For server-side applications requiring stronger SSRF protection, perform DNS resolution\n * and validate the resolved IP address before making the request, and validate redirect\n * destinations or disable redirects entirely.\n *\n * @param url - URL to validate\n * @param policy - SSRF policy (uses defaults if not provided)\n * @returns Safety check result with reason if unsafe\n *\n * @see https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html\n *\n * @example\n * ```typescript\n * const check = isURLSafe('http://10.0.0.1/secret');\n * if (!check.safe) {\n * console.error('Unsafe URL:', check.reason);\n * }\n * ```\n */\nexport function isURLSafe(\n url: string,\n policy: SSRFPolicy = {},\n): SafetyCheck {\n const config = { ...DEFAULT_SSRF_POLICY, ...policy };\n\n // Parse URL\n let parsed: URL;\n try {\n parsed = new URL(url);\n } catch {\n return { safe: false, reason: 'Invalid URL format' };\n }\n\n // Check protocol\n if (parsed.protocol === 'data:') {\n if (!config.allowDataUrls) {\n return { safe: false, reason: 'Data URLs not allowed' };\n }\n return { safe: true };\n }\n\n if (!['http:', 'https:'].includes(parsed.protocol)) {\n return {\n safe: false,\n reason: `Protocol '${parsed.protocol}' not allowed (only http/https)`,\n };\n }\n\n const hostname = parsed.hostname.toLowerCase();\n\n // Check allowed domains (whitelist)\n if (config.allowedDomains.length > 0) {\n const isAllowed = config.allowedDomains.some((pattern) =>\n matchDomainPattern(hostname, pattern),\n );\n if (!isAllowed) {\n return {\n safe: false,\n reason: `Domain '${hostname}' not in allowed list`,\n };\n }\n }\n\n // Check blocked domains (blacklist)\n if (config.blockedDomains.length > 0) {\n const isBlocked = config.blockedDomains.some((pattern) =>\n matchDomainPattern(hostname, pattern),\n );\n if (isBlocked) {\n return {\n safe: false,\n reason: `Domain '${hostname}' is blocked`,\n };\n }\n }\n\n // Check localhost\n if (isLocalhost(hostname) && !config.allowLocalhost) {\n return { safe: false, reason: 'Localhost not allowed' };\n }\n\n // Check private IPs\n if (isPrivateIP(hostname) && !config.allowPrivateIPs) {\n return { safe: false, reason: 'Private IP addresses not allowed' };\n }\n\n return { safe: true };\n}\n\n/**\n * Quick check if URL is safe with default policy.\n *\n * Convenience wrapper for common case.\n *\n * @param url - URL to validate\n * @returns true if safe, false otherwise\n */\nexport function isSafeForFetch(url: string): boolean {\n return isURLSafe(url).safe;\n}\n\n/**\n * Match domain against pattern (supports wildcards).\n *\n * @example\n * ```typescript\n * matchDomainPattern('api.github.com', '*.github.com') // true\n * matchDomainPattern('github.com', '*.github.com') // false\n * matchDomainPattern('github.com', 'github.com') // true\n * ```\n */\nfunction matchDomainPattern(domain: string, pattern: string): boolean {\n // Exact match\n if (domain === pattern) return true;\n\n // Wildcard match - only matches subdomains, not the base domain\n if (pattern.startsWith('*.')) {\n const suffix = pattern.slice(2);\n return domain.endsWith('.' + suffix);\n }\n\n return false;\n}\n\n/**\n * Check if hostname is localhost/loopback.\n */\nfunction isLocalhost(hostname: string): boolean {\n const lower = hostname.toLowerCase();\n\n // IPv4 loopback\n if (lower === 'localhost' || lower === '0.0.0.0' || lower.startsWith('127.')) {\n return true;\n }\n\n // IPv6 loopback\n if (lower === '::1' || lower === '[::1]') {\n return true;\n }\n\n return false;\n}\n\n/**\n * Check if hostname is a private IP address.\n *\n * Checks for:\n * - 10.0.0.0/8\n * - 172.16.0.0/12\n * - 192.168.0.0/16\n * - Link-local: 169.254.0.0/16 (includes AWS metadata endpoint)\n * - IPv6 private ranges\n */\nfunction isPrivateIP(hostname: string): boolean {\n const lower = hostname.toLowerCase();\n\n // IPv4 private ranges\n const parts = hostname.split('.').map(Number);\n\n if (\n parts.length === 4 &&\n parts.every((p) => p >= 0 && p <= 255 && !isNaN(p))\n ) {\n const [octet1, octet2] = parts;\n\n // 10.0.0.0/8\n if (octet1 === 10) return true;\n\n // 172.16.0.0/12\n if (octet1 === 172 && octet2 !== undefined && octet2 >= 16 && octet2 <= 31)\n return true;\n\n // 192.168.0.0/16\n if (octet1 === 192 && octet2 === 168) return true;\n\n // 169.254.0.0/16 (link-local) - includes AWS metadata endpoint\n if (octet1 === 169 && octet2 === 254) return true;\n }\n\n // IPv6 private ranges using regex patterns\n // Strip brackets if present (URL parser adds them: [fc00::1])\n const cleanedHostname = lower.replace(/^\\[|\\]$/g, '');\n\n const ipv6Patterns = [\n /^f[cd][0-9a-f]{2}:/i, // fc00::/7 (includes both fc and fd ranges)\n /^fe80:/i, // fe80::/10 (link-local)\n ];\n\n for (const pattern of ipv6Patterns) {\n if (pattern.test(cleanedHostname)) {\n return true;\n }\n }\n\n return false;\n}\n\n/**\n * Filter array of URLs, keeping only safe ones.\n *\n * @param urls - Array of URLs to filter\n * @param policy - SSRF policy\n * @returns Array of safe URLs\n */\nexport function filterSafeUrls(\n urls: string[],\n policy?: SSRFPolicy,\n): string[] {\n return urls.filter((url) => isURLSafe(url, policy).safe);\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;AC6CO,SAAS,iBACd,MACA,UAAkC,CAAC,GACjB;AAClB,QAAM;IACJ,kBAAkB;IAClB,cAAc;IACd,gBAAgB;EAClB,IAAI;AAEJ,QAAM,UAA4B,CAAC;AACnC,QAAM,OAAO,oBAAI,IAAY;AAI7B,MAAI,iBAAiB;AACnB,UAAM,kBAAkB;AACxB,QAAI;AAEJ,YAAQ,QAAQ,gBAAgB,KAAK,IAAI,OAAO,MAAM;AACpD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;EACF;AAGA,MAAI,aAAa;AAEf,UAAM,oBAAoB;AAC1B,QAAI;AAEJ,YAAQ,QAAQ,kBAAkB,KAAK,IAAI,OAAO,MAAM;AACtD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;AAGA,UAAM,sBAAsB;AAC5B,YAAQ,QAAQ,oBAAoB,KAAK,IAAI,OAAO,MAAM;AACxD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAE3B,UAAI,OAAO,CAAC,IAAI,WAAW,GAAG,KAAK,CAAC,IAAI,WAAW,GAAG,KAAK,CAAC,KAAK,IAAI,GAAG,GAAG;AACzE,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;AAIA,UAAM,gBAAgB;AACtB,YAAQ,QAAQ,cAAc,KAAK,IAAI,OAAO,MAAM;AAClD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAE3B,UAAI,QAAQ,IAAI,WAAW,SAAS,KAAK,IAAI,WAAW,UAAU,MAAM,CAAC,KAAK,IAAI,GAAG,GAAG;AACtF,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;EACF;AAGA,MAAI,aAAa;AAEf,UAAM,kBAAkB;AACxB,QAAI;AAEJ,YAAQ,QAAQ,gBAAgB,KAAK,IAAI,OAAO,MAAM;AACpD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS;QACX,CAAC;MACH;IACF;EACF;AAGA,MAAI,eAAe;AACjB,UAAM,iBAAiB;AACvB,QAAI;AAEJ,YAAQ,QAAQ,eAAe,KAAK,IAAI,OAAO,MAAM;AACnD,YAAM,MAAM,MAAM,CAAC;AACnB,UAAI,CAAC,KAAK,IAAI,GAAG,GAAG;AAClB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;QACV,CAAC;MACH;IACF;EACF;AAEA,SAAO;AACT;AAUO,SAAS,uBAAuB,MAAgC;AACrE,QAAM,MAAM,iBAAiB,MAAM,EAAE,eAAe,MAAM,CAAC;AAC3D,SAAO,IAAI,OAAO,CAAC,QAAQ,gBAAgB,KAAK,IAAI,GAAG,CAAC;AAC1D;AAUO,SAAS,gBAAgB,MAAgC;AAC9D,SAAO,iBAAiB,MAAM;IAC5B,iBAAiB;IACjB,aAAa;IACb,eAAe;EACjB,CAAC;AACH;AAUO,SAAS,YAAY,MAAsB;AAChD,SAAO,iBAAiB,IAAI,EAAE;AAChC;AC/KO,IAAM,sBAA4C;EACvD,iBAAiB;EACjB,gBAAgB;EAChB,gBAAgB,CAAC;EACjB,gBAAgB,CAAC;EACjB,eAAe;AACjB;AAgCO,SAAS,UACd,KACA,SAAqB,CAAC,GACT;AACb,QAAM,SAAS,EAAE,GAAG,qBAAqB,GAAG,OAAO;AAGnD,MAAI;AACJ,MAAI;AACF,aAAS,IAAI,IAAI,GAAG;EACtB,QAAQ;AACN,WAAO,EAAE,MAAM,OAAO,QAAQ,qBAAqB;EACrD;AAGA,MAAI,OAAO,aAAa,SAAS;AAC/B,QAAI,CAAC,OAAO,eAAe;AACzB,aAAO,EAAE,MAAM,OAAO,QAAQ,wBAAwB;IACxD;AACA,WAAO,EAAE,MAAM,KAAK;EACtB;AAEA,MAAI,CAAC,CAAC,SAAS,QAAQ,EAAE,SAAS,OAAO,QAAQ,GAAG;AAClD,WAAO;MACL,MAAM;MACN,QAAQ,aAAa,OAAO,QAAQ;IACtC;EACF;AAEA,QAAM,WAAW,OAAO,SAAS,YAAY;AAG7C,MAAI,OAAO,eAAe,SAAS,GAAG;AACpC,UAAM,YAAY,OAAO,eAAe;MAAK,CAAC,YAC5C,mBAAmB,UAAU,OAAO;IACtC;AACA,QAAI,CAAC,WAAW;AACd,aAAO;QACL,MAAM;QACN,QAAQ,WAAW,QAAQ;MAC7B;IACF;EACF;AAGA,MAAI,OAAO,eAAe,SAAS,GAAG;AACpC,UAAM,YAAY,OAAO,eAAe;MAAK,CAAC,YAC5C,mBAAmB,UAAU,OAAO;IACtC;AACA,QAAI,WAAW;AACb,aAAO;QACL,MAAM;QACN,QAAQ,WAAW,QAAQ;MAC7B;IACF;EACF;AAGA,MAAI,YAAY,QAAQ,KAAK,CAAC,OAAO,gBAAgB;AACnD,WAAO,EAAE,MAAM,OAAO,QAAQ,wBAAwB;EACxD;AAGA,MAAI,YAAY,QAAQ,KAAK,CAAC,OAAO,iBAAiB;AACpD,WAAO,EAAE,MAAM,OAAO,QAAQ,mCAAmC;EACnE;AAEA,SAAO,EAAE,MAAM,KAAK;AACtB;AAUO,SAAS,eAAe,KAAsB;AACnD,SAAO,UAAU,GAAG,EAAE;AACxB;AAYA,SAAS,mBAAmB,QAAgB,SAA0B;AAEpE,MAAI,WAAW,QAAS,QAAO;AAG/B,MAAI,QAAQ,WAAW,IAAI,GAAG;AAC5B,UAAM,SAAS,QAAQ,MAAM,CAAC;AAC9B,WAAO,OAAO,SAAS,MAAM,MAAM;EACrC;AAEA,SAAO;AACT;AAKA,SAAS,YAAY,UAA2B;AAC9C,QAAM,QAAQ,SAAS,YAAY;AAGnC,MAAI,UAAU,eAAe,UAAU,aAAa,MAAM,WAAW,MAAM,GAAG;AAC5E,WAAO;EACT;AAGA,MAAI,UAAU,SAAS,UAAU,SAAS;AACxC,WAAO;EACT;AAEA,SAAO;AACT;AAYA,SAAS,YAAY,UAA2B;AAC9C,QAAM,QAAQ,SAAS,YAAY;AAGnC,QAAM,QAAQ,SAAS,MAAM,GAAG,EAAE,IAAI,MAAM;AAE5C,MACE,MAAM,WAAW,KACjB,MAAM,MAAM,CAAC,MAAM,KAAK,KAAK,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC,GAClD;AACA,UAAM,CAAC,QAAQ,MAAM,IAAI;AAGzB,QAAI,WAAW,GAAI,QAAO;AAG1B,QAAI,WAAW,OAAO,WAAW,UAAa,UAAU,MAAM,UAAU;AACtE,aAAO;AAGT,QAAI,WAAW,OAAO,WAAW,IAAK,QAAO;AAG7C,QAAI,WAAW,OAAO,WAAW,IAAK,QAAO;EAC/C;AAIA,QAAM,kBAAkB,MAAM,QAAQ,YAAY,EAAE;AAEpD,QAAM,eAAe;IACnB;;IACA;;EACF;AAEA,aAAW,WAAW,cAAc;AAClC,QAAI,QAAQ,KAAK,eAAe,GAAG;AACjC,aAAO;IACT;EACF;AAEA,SAAO;AACT;AASO,SAAS,eACd,MACA,QACU;AACV,SAAO,KAAK,OAAO,CAAC,QAAQ,UAAU,KAAK,MAAM,EAAE,IAAI;AACzD;","names":[]}
|
package/dist/image-utils.d.cts
CHANGED
|
@@ -102,10 +102,22 @@ export declare const DEFAULT_SSRF_POLICY: Required<SSRFPolicy>;
|
|
|
102
102
|
* This is the canonical SSRF protection - all apps should use this
|
|
103
103
|
* before fetching external URLs.
|
|
104
104
|
*
|
|
105
|
+
* **Limitations (browser-side validation only):**
|
|
106
|
+
* - Cannot detect domains that resolve to private IPs (e.g., `attacker.com` -> `127.0.0.1`)
|
|
107
|
+
* - Cannot prevent DNS rebinding attacks (domain resolves differently on subsequent requests)
|
|
108
|
+
* - Cannot validate redirect destinations (server may redirect to internal URLs)
|
|
109
|
+
* - Does not perform actual DNS resolution - only validates URL syntax and hostname patterns
|
|
110
|
+
*
|
|
111
|
+
* For server-side applications requiring stronger SSRF protection, perform DNS resolution
|
|
112
|
+
* and validate the resolved IP address before making the request, and validate redirect
|
|
113
|
+
* destinations or disable redirects entirely.
|
|
114
|
+
*
|
|
105
115
|
* @param url - URL to validate
|
|
106
116
|
* @param policy - SSRF policy (uses defaults if not provided)
|
|
107
117
|
* @returns Safety check result with reason if unsafe
|
|
108
118
|
*
|
|
119
|
+
* @see https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html
|
|
120
|
+
*
|
|
109
121
|
* @example
|
|
110
122
|
* ```typescript
|
|
111
123
|
* const check = isURLSafe('http://10.0.0.1/secret');
|
package/dist/image-utils.d.ts
CHANGED
|
@@ -102,10 +102,22 @@ export declare const DEFAULT_SSRF_POLICY: Required<SSRFPolicy>;
|
|
|
102
102
|
* This is the canonical SSRF protection - all apps should use this
|
|
103
103
|
* before fetching external URLs.
|
|
104
104
|
*
|
|
105
|
+
* **Limitations (browser-side validation only):**
|
|
106
|
+
* - Cannot detect domains that resolve to private IPs (e.g., `attacker.com` -> `127.0.0.1`)
|
|
107
|
+
* - Cannot prevent DNS rebinding attacks (domain resolves differently on subsequent requests)
|
|
108
|
+
* - Cannot validate redirect destinations (server may redirect to internal URLs)
|
|
109
|
+
* - Does not perform actual DNS resolution - only validates URL syntax and hostname patterns
|
|
110
|
+
*
|
|
111
|
+
* For server-side applications requiring stronger SSRF protection, perform DNS resolution
|
|
112
|
+
* and validate the resolved IP address before making the request, and validate redirect
|
|
113
|
+
* destinations or disable redirects entirely.
|
|
114
|
+
*
|
|
105
115
|
* @param url - URL to validate
|
|
106
116
|
* @param policy - SSRF policy (uses defaults if not provided)
|
|
107
117
|
* @returns Safety check result with reason if unsafe
|
|
108
118
|
*
|
|
119
|
+
* @see https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html
|
|
120
|
+
*
|
|
109
121
|
* @example
|
|
110
122
|
* ```typescript
|
|
111
123
|
* const check = isURLSafe('http://10.0.0.1/secret');
|
package/dist/image-utils.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../image-utils/src/extraction.ts","../../image-utils/src/ssrf.ts"],"sourcesContent":["/**\n * Image URL Extraction\n *\n * Extracts image URLs from markdown, HTML, and base64 data URLs in text.\n * Used across Federation, Archive, and Architect for image processing.\n */\n\nexport interface ImageExtractionOptions {\n /** Include markdown image syntax `` (default: true) */\n includeMarkdown?: boolean;\n /** Include HTML img tags `<img src=\"url\">` (default: true) */\n includeHTML?: boolean;\n /** Include base64 data URLs `data:image/...;base64,...` (default: true) */\n includeBase64?: boolean;\n}\n\nexport interface ExtractedImage {\n /** The extracted URL or data URL */\n url: string;\n /** Source format where the image was found */\n source: 'markdown' | 'html' | 'base64';\n /** Surrounding context for debugging (optional) */\n context?: string;\n}\n\n/**\n * Extract all image URLs from text.\n *\n * Supports markdown, HTML, and base64 data URLs.\n * Returns deduplicated results.\n *\n * @param text - Text to extract image URLs from\n * @param options - Extraction options\n * @returns Array of extracted images with source information\n *\n * @example\n * ```typescript\n * const text = 'Check out  and <img src=\"banner.jpg\">';\n * const images = extractImageUrls(text);\n * // [\n * // { url: 'avatar.png', source: 'markdown' },\n * // { url: 'banner.jpg', source: 'html' }\n * // ]\n * ```\n */\nexport function extractImageUrls(\n text: string,\n options: ImageExtractionOptions = {},\n): ExtractedImage[] {\n const {\n includeMarkdown = true,\n includeHTML = true,\n includeBase64 = true,\n } = options;\n\n const results: ExtractedImage[] = [];\n const seen = new Set<string>();\n\n // Extract markdown images:  or  or \n // Supports: standard, angle brackets, and SillyTavern dimension syntax\n if (includeMarkdown) {\n const markdownPattern = /!\\[([^\\]]*)\\]\\(<?([^>\\s)]+)>?(?:\\s*=[^)]+)?\\)/g;\n let match: RegExpExecArray | null;\n\n while ((match = markdownPattern.exec(text)) !== null) {\n const url = match[2]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'markdown',\n context: match[0],\n });\n }\n }\n }\n\n // Extract HTML img tags: <img src=\"url\"> (quoted and unquoted)\n if (includeHTML) {\n // Quoted src\n const htmlQuotedPattern = /<img[^>]+src=[\"']([^\"']+)[\"'][^>]*>/gi;\n let match: RegExpExecArray | null;\n\n while ((match = htmlQuotedPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n\n // Unquoted src (e.g., <img src=url>)\n const htmlUnquotedPattern = /<img[^>]+src=([^\\s\"'>]+)[^>]*>/gi;\n while ((match = htmlUnquotedPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n // Skip if it starts with a quote (already handled above)\n if (url && !url.startsWith('\"') && !url.startsWith(\"'\") && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n\n // CSS url() function: url(url), url(\"url\"), url('url')\n // Common in background-image, background, content properties\n const cssUrlPattern = /url\\([\"']?([^)\"']+)[\"']?\\)/gi;\n while ((match = cssUrlPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n // Only include http(s) URLs to avoid CSS variables and relative paths\n if (url && (url.startsWith('http://') || url.startsWith('https://')) && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n }\n\n // Extract plain image URLs (not wrapped in any syntax)\n if (includeHTML) {\n // Plain URLs with image extensions (common on image hosts)\n const plainUrlPattern = /(?<![(\"'])(https?:\\/\\/[^\\s<>\"']+\\.(?:jpg|jpeg|png|gif|webp|svg|avif|bmp))(?![)\"'])/gi;\n let match: RegExpExecArray | null;\n\n while ((match = plainUrlPattern.exec(text)) !== null) {\n const url = match[0]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: url,\n });\n }\n }\n }\n\n // Extract base64 data URLs: data:image/...;base64,...\n if (includeBase64) {\n const dataUrlPattern = /data:image\\/[^;]+;base64,[A-Za-z0-9+/=]+/g;\n let match: RegExpExecArray | null;\n\n while ((match = dataUrlPattern.exec(text)) !== null) {\n const url = match[0];\n if (!seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'base64',\n });\n }\n }\n }\n\n return results;\n}\n\n/**\n * Extract only HTTP/HTTPS URLs (excludes data URLs and relative paths).\n *\n * Convenience wrapper around extractImageUrls that filters for remote URLs.\n *\n * @param text - Text to extract URLs from\n * @returns Array of extracted HTTP/HTTPS image URLs\n */\nexport function extractRemoteImageUrls(text: string): ExtractedImage[] {\n const all = extractImageUrls(text, { includeBase64: false });\n return all.filter((img) => /^https?:\\/\\//i.test(img.url));\n}\n\n/**\n * Extract only base64 data URLs.\n *\n * Convenience wrapper around extractImageUrls.\n *\n * @param text - Text to extract data URLs from\n * @returns Array of base64 image data URLs\n */\nexport function extractDataUrls(text: string): ExtractedImage[] {\n return extractImageUrls(text, {\n includeMarkdown: false,\n includeHTML: false,\n includeBase64: true,\n });\n}\n\n/**\n * Count images in text without extracting full details.\n *\n * More efficient than extractImageUrls when you only need the count.\n *\n * @param text - Text to count images in\n * @returns Total number of unique image references\n */\nexport function countImages(text: string): number {\n return extractImageUrls(text).length;\n}\n","/**\n * SSRF (Server-Side Request Forgery) Protection\n *\n * Validates URLs to prevent SSRF attacks.\n * Browser-safe implementation (no Node.js dependencies).\n */\n\nexport interface SSRFPolicy {\n /** Allow private IP addresses (10.x, 172.16-31.x, 192.168.x) (default: false) */\n allowPrivateIPs?: boolean;\n /** Allow localhost/loopback (127.x, ::1) (default: false) */\n allowLocalhost?: boolean;\n /** Blocked domain patterns (e.g., ['internal.company.com', '*.local']) */\n blockedDomains?: string[];\n /** Allowed domain patterns - if provided, ONLY these are allowed */\n allowedDomains?: string[];\n /** Allow data URLs (data:image/...) (default: false) */\n allowDataUrls?: boolean;\n}\n\nexport interface SafetyCheck {\n /** Whether the URL is safe according to policy */\n safe: boolean;\n /** Reason why URL is unsafe (if safe=false) */\n reason?: string;\n}\n\n/**\n * Default SSRF policy - blocks private IPs, localhost, and data URLs.\n */\nexport const DEFAULT_SSRF_POLICY: Required<SSRFPolicy> = {\n allowPrivateIPs: false,\n allowLocalhost: false,\n blockedDomains: [],\n allowedDomains: [],\n allowDataUrls: false,\n};\n\n/**\n * Check if URL is safe to fetch according to SSRF policy.\n *\n * This is the canonical SSRF protection - all apps should use this\n * before fetching external URLs.\n *\n * @param url - URL to validate\n * @param policy - SSRF policy (uses defaults if not provided)\n * @returns Safety check result with reason if unsafe\n *\n * @example\n * ```typescript\n * const check = isURLSafe('http://10.0.0.1/secret');\n * if (!check.safe) {\n * console.error('Unsafe URL:', check.reason);\n * }\n * ```\n */\nexport function isURLSafe(\n url: string,\n policy: SSRFPolicy = {},\n): SafetyCheck {\n const config = { ...DEFAULT_SSRF_POLICY, ...policy };\n\n // Parse URL\n let parsed: URL;\n try {\n parsed = new URL(url);\n } catch {\n return { safe: false, reason: 'Invalid URL format' };\n }\n\n // Check protocol\n if (parsed.protocol === 'data:') {\n if (!config.allowDataUrls) {\n return { safe: false, reason: 'Data URLs not allowed' };\n }\n return { safe: true };\n }\n\n if (!['http:', 'https:'].includes(parsed.protocol)) {\n return {\n safe: false,\n reason: `Protocol '${parsed.protocol}' not allowed (only http/https)`,\n };\n }\n\n const hostname = parsed.hostname.toLowerCase();\n\n // Check allowed domains (whitelist)\n if (config.allowedDomains.length > 0) {\n const isAllowed = config.allowedDomains.some((pattern) =>\n matchDomainPattern(hostname, pattern),\n );\n if (!isAllowed) {\n return {\n safe: false,\n reason: `Domain '${hostname}' not in allowed list`,\n };\n }\n }\n\n // Check blocked domains (blacklist)\n if (config.blockedDomains.length > 0) {\n const isBlocked = config.blockedDomains.some((pattern) =>\n matchDomainPattern(hostname, pattern),\n );\n if (isBlocked) {\n return {\n safe: false,\n reason: `Domain '${hostname}' is blocked`,\n };\n }\n }\n\n // Check localhost\n if (isLocalhost(hostname) && !config.allowLocalhost) {\n return { safe: false, reason: 'Localhost not allowed' };\n }\n\n // Check private IPs\n if (isPrivateIP(hostname) && !config.allowPrivateIPs) {\n return { safe: false, reason: 'Private IP addresses not allowed' };\n }\n\n return { safe: true };\n}\n\n/**\n * Quick check if URL is safe with default policy.\n *\n * Convenience wrapper for common case.\n *\n * @param url - URL to validate\n * @returns true if safe, false otherwise\n */\nexport function isSafeForFetch(url: string): boolean {\n return isURLSafe(url).safe;\n}\n\n/**\n * Match domain against pattern (supports wildcards).\n *\n * @example\n * ```typescript\n * matchDomainPattern('api.github.com', '*.github.com') // true\n * matchDomainPattern('github.com', '*.github.com') // false\n * matchDomainPattern('github.com', 'github.com') // true\n * ```\n */\nfunction matchDomainPattern(domain: string, pattern: string): boolean {\n // Exact match\n if (domain === pattern) return true;\n\n // Wildcard match - only matches subdomains, not the base domain\n if (pattern.startsWith('*.')) {\n const suffix = pattern.slice(2);\n return domain.endsWith('.' + suffix);\n }\n\n return false;\n}\n\n/**\n * Check if hostname is localhost/loopback.\n */\nfunction isLocalhost(hostname: string): boolean {\n const lower = hostname.toLowerCase();\n\n // IPv4 loopback\n if (lower === 'localhost' || lower === '0.0.0.0' || lower.startsWith('127.')) {\n return true;\n }\n\n // IPv6 loopback\n if (lower === '::1' || lower === '[::1]') {\n return true;\n }\n\n return false;\n}\n\n/**\n * Check if hostname is a private IP address.\n *\n * Checks for:\n * - 10.0.0.0/8\n * - 172.16.0.0/12\n * - 192.168.0.0/16\n * - Link-local: 169.254.0.0/16 (includes AWS metadata endpoint)\n * - IPv6 private ranges\n */\nfunction isPrivateIP(hostname: string): boolean {\n const lower = hostname.toLowerCase();\n\n // IPv4 private ranges\n const parts = hostname.split('.').map(Number);\n\n if (\n parts.length === 4 &&\n parts.every((p) => p >= 0 && p <= 255 && !isNaN(p))\n ) {\n const [octet1, octet2] = parts;\n\n // 10.0.0.0/8\n if (octet1 === 10) return true;\n\n // 172.16.0.0/12\n if (octet1 === 172 && octet2 !== undefined && octet2 >= 16 && octet2 <= 31)\n return true;\n\n // 192.168.0.0/16\n if (octet1 === 192 && octet2 === 168) return true;\n\n // 169.254.0.0/16 (link-local) - includes AWS metadata endpoint\n if (octet1 === 169 && octet2 === 254) return true;\n }\n\n // IPv6 private ranges using regex patterns\n // Strip brackets if present (URL parser adds them: [fc00::1])\n const cleanedHostname = lower.replace(/^\\[|\\]$/g, '');\n\n const ipv6Patterns = [\n /^f[cd][0-9a-f]{2}:/i, // fc00::/7 (includes both fc and fd ranges)\n /^fe80:/i, // fe80::/10 (link-local)\n ];\n\n for (const pattern of ipv6Patterns) {\n if (pattern.test(cleanedHostname)) {\n return true;\n }\n }\n\n return false;\n}\n\n/**\n * Filter array of URLs, keeping only safe ones.\n *\n * @param urls - Array of URLs to filter\n * @param policy - SSRF policy\n * @returns Array of safe URLs\n */\nexport function filterSafeUrls(\n urls: string[],\n policy?: SSRFPolicy,\n): string[] {\n return urls.filter((url) => isURLSafe(url, policy).safe);\n}\n"],"mappings":";AA6CO,SAAS,iBACd,MACA,UAAkC,CAAC,GACjB;AAClB,QAAM;IACJ,kBAAkB;IAClB,cAAc;IACd,gBAAgB;EAClB,IAAI;AAEJ,QAAM,UAA4B,CAAC;AACnC,QAAM,OAAO,oBAAI,IAAY;AAI7B,MAAI,iBAAiB;AACnB,UAAM,kBAAkB;AACxB,QAAI;AAEJ,YAAQ,QAAQ,gBAAgB,KAAK,IAAI,OAAO,MAAM;AACpD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;EACF;AAGA,MAAI,aAAa;AAEf,UAAM,oBAAoB;AAC1B,QAAI;AAEJ,YAAQ,QAAQ,kBAAkB,KAAK,IAAI,OAAO,MAAM;AACtD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;AAGA,UAAM,sBAAsB;AAC5B,YAAQ,QAAQ,oBAAoB,KAAK,IAAI,OAAO,MAAM;AACxD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAE3B,UAAI,OAAO,CAAC,IAAI,WAAW,GAAG,KAAK,CAAC,IAAI,WAAW,GAAG,KAAK,CAAC,KAAK,IAAI,GAAG,GAAG;AACzE,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;AAIA,UAAM,gBAAgB;AACtB,YAAQ,QAAQ,cAAc,KAAK,IAAI,OAAO,MAAM;AAClD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAE3B,UAAI,QAAQ,IAAI,WAAW,SAAS,KAAK,IAAI,WAAW,UAAU,MAAM,CAAC,KAAK,IAAI,GAAG,GAAG;AACtF,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;EACF;AAGA,MAAI,aAAa;AAEf,UAAM,kBAAkB;AACxB,QAAI;AAEJ,YAAQ,QAAQ,gBAAgB,KAAK,IAAI,OAAO,MAAM;AACpD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS;QACX,CAAC;MACH;IACF;EACF;AAGA,MAAI,eAAe;AACjB,UAAM,iBAAiB;AACvB,QAAI;AAEJ,YAAQ,QAAQ,eAAe,KAAK,IAAI,OAAO,MAAM;AACnD,YAAM,MAAM,MAAM,CAAC;AACnB,UAAI,CAAC,KAAK,IAAI,GAAG,GAAG;AAClB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;QACV,CAAC;MACH;IACF;EACF;AAEA,SAAO;AACT;AAUO,SAAS,uBAAuB,MAAgC;AACrE,QAAM,MAAM,iBAAiB,MAAM,EAAE,eAAe,MAAM,CAAC;AAC3D,SAAO,IAAI,OAAO,CAAC,QAAQ,gBAAgB,KAAK,IAAI,GAAG,CAAC;AAC1D;AAUO,SAAS,gBAAgB,MAAgC;AAC9D,SAAO,iBAAiB,MAAM;IAC5B,iBAAiB;IACjB,aAAa;IACb,eAAe;EACjB,CAAC;AACH;AAUO,SAAS,YAAY,MAAsB;AAChD,SAAO,iBAAiB,IAAI,EAAE;AAChC;AC/KO,IAAM,sBAA4C;EACvD,iBAAiB;EACjB,gBAAgB;EAChB,gBAAgB,CAAC;EACjB,gBAAgB,CAAC;EACjB,eAAe;AACjB;AAoBO,SAAS,UACd,KACA,SAAqB,CAAC,GACT;AACb,QAAM,SAAS,EAAE,GAAG,qBAAqB,GAAG,OAAO;AAGnD,MAAI;AACJ,MAAI;AACF,aAAS,IAAI,IAAI,GAAG;EACtB,QAAQ;AACN,WAAO,EAAE,MAAM,OAAO,QAAQ,qBAAqB;EACrD;AAGA,MAAI,OAAO,aAAa,SAAS;AAC/B,QAAI,CAAC,OAAO,eAAe;AACzB,aAAO,EAAE,MAAM,OAAO,QAAQ,wBAAwB;IACxD;AACA,WAAO,EAAE,MAAM,KAAK;EACtB;AAEA,MAAI,CAAC,CAAC,SAAS,QAAQ,EAAE,SAAS,OAAO,QAAQ,GAAG;AAClD,WAAO;MACL,MAAM;MACN,QAAQ,aAAa,OAAO,QAAQ;IACtC;EACF;AAEA,QAAM,WAAW,OAAO,SAAS,YAAY;AAG7C,MAAI,OAAO,eAAe,SAAS,GAAG;AACpC,UAAM,YAAY,OAAO,eAAe;MAAK,CAAC,YAC5C,mBAAmB,UAAU,OAAO;IACtC;AACA,QAAI,CAAC,WAAW;AACd,aAAO;QACL,MAAM;QACN,QAAQ,WAAW,QAAQ;MAC7B;IACF;EACF;AAGA,MAAI,OAAO,eAAe,SAAS,GAAG;AACpC,UAAM,YAAY,OAAO,eAAe;MAAK,CAAC,YAC5C,mBAAmB,UAAU,OAAO;IACtC;AACA,QAAI,WAAW;AACb,aAAO;QACL,MAAM;QACN,QAAQ,WAAW,QAAQ;MAC7B;IACF;EACF;AAGA,MAAI,YAAY,QAAQ,KAAK,CAAC,OAAO,gBAAgB;AACnD,WAAO,EAAE,MAAM,OAAO,QAAQ,wBAAwB;EACxD;AAGA,MAAI,YAAY,QAAQ,KAAK,CAAC,OAAO,iBAAiB;AACpD,WAAO,EAAE,MAAM,OAAO,QAAQ,mCAAmC;EACnE;AAEA,SAAO,EAAE,MAAM,KAAK;AACtB;AAUO,SAAS,eAAe,KAAsB;AACnD,SAAO,UAAU,GAAG,EAAE;AACxB;AAYA,SAAS,mBAAmB,QAAgB,SAA0B;AAEpE,MAAI,WAAW,QAAS,QAAO;AAG/B,MAAI,QAAQ,WAAW,IAAI,GAAG;AAC5B,UAAM,SAAS,QAAQ,MAAM,CAAC;AAC9B,WAAO,OAAO,SAAS,MAAM,MAAM;EACrC;AAEA,SAAO;AACT;AAKA,SAAS,YAAY,UAA2B;AAC9C,QAAM,QAAQ,SAAS,YAAY;AAGnC,MAAI,UAAU,eAAe,UAAU,aAAa,MAAM,WAAW,MAAM,GAAG;AAC5E,WAAO;EACT;AAGA,MAAI,UAAU,SAAS,UAAU,SAAS;AACxC,WAAO;EACT;AAEA,SAAO;AACT;AAYA,SAAS,YAAY,UAA2B;AAC9C,QAAM,QAAQ,SAAS,YAAY;AAGnC,QAAM,QAAQ,SAAS,MAAM,GAAG,EAAE,IAAI,MAAM;AAE5C,MACE,MAAM,WAAW,KACjB,MAAM,MAAM,CAAC,MAAM,KAAK,KAAK,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC,GAClD;AACA,UAAM,CAAC,QAAQ,MAAM,IAAI;AAGzB,QAAI,WAAW,GAAI,QAAO;AAG1B,QAAI,WAAW,OAAO,WAAW,UAAa,UAAU,MAAM,UAAU;AACtE,aAAO;AAGT,QAAI,WAAW,OAAO,WAAW,IAAK,QAAO;AAG7C,QAAI,WAAW,OAAO,WAAW,IAAK,QAAO;EAC/C;AAIA,QAAM,kBAAkB,MAAM,QAAQ,YAAY,EAAE;AAEpD,QAAM,eAAe;IACnB;;IACA;;EACF;AAEA,aAAW,WAAW,cAAc;AAClC,QAAI,QAAQ,KAAK,eAAe,GAAG;AACjC,aAAO;IACT;EACF;AAEA,SAAO;AACT;AASO,SAAS,eACd,MACA,QACU;AACV,SAAO,KAAK,OAAO,CAAC,QAAQ,UAAU,KAAK,MAAM,EAAE,IAAI;AACzD;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../../image-utils/src/extraction.ts","../../image-utils/src/ssrf.ts"],"sourcesContent":["/**\n * Image URL Extraction\n *\n * Extracts image URLs from markdown, HTML, and base64 data URLs in text.\n * Used across Federation, Archive, and Architect for image processing.\n */\n\nexport interface ImageExtractionOptions {\n /** Include markdown image syntax `` (default: true) */\n includeMarkdown?: boolean;\n /** Include HTML img tags `<img src=\"url\">` (default: true) */\n includeHTML?: boolean;\n /** Include base64 data URLs `data:image/...;base64,...` (default: true) */\n includeBase64?: boolean;\n}\n\nexport interface ExtractedImage {\n /** The extracted URL or data URL */\n url: string;\n /** Source format where the image was found */\n source: 'markdown' | 'html' | 'base64';\n /** Surrounding context for debugging (optional) */\n context?: string;\n}\n\n/**\n * Extract all image URLs from text.\n *\n * Supports markdown, HTML, and base64 data URLs.\n * Returns deduplicated results.\n *\n * @param text - Text to extract image URLs from\n * @param options - Extraction options\n * @returns Array of extracted images with source information\n *\n * @example\n * ```typescript\n * const text = 'Check out  and <img src=\"banner.jpg\">';\n * const images = extractImageUrls(text);\n * // [\n * // { url: 'avatar.png', source: 'markdown' },\n * // { url: 'banner.jpg', source: 'html' }\n * // ]\n * ```\n */\nexport function extractImageUrls(\n text: string,\n options: ImageExtractionOptions = {},\n): ExtractedImage[] {\n const {\n includeMarkdown = true,\n includeHTML = true,\n includeBase64 = true,\n } = options;\n\n const results: ExtractedImage[] = [];\n const seen = new Set<string>();\n\n // Extract markdown images:  or  or \n // Supports: standard, angle brackets, and SillyTavern dimension syntax\n if (includeMarkdown) {\n const markdownPattern = /!\\[([^\\]]*)\\]\\(<?([^>\\s)]+)>?(?:\\s*=[^)]+)?\\)/g;\n let match: RegExpExecArray | null;\n\n while ((match = markdownPattern.exec(text)) !== null) {\n const url = match[2]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'markdown',\n context: match[0],\n });\n }\n }\n }\n\n // Extract HTML img tags: <img src=\"url\"> (quoted and unquoted)\n if (includeHTML) {\n // Quoted src\n const htmlQuotedPattern = /<img[^>]+src=[\"']([^\"']+)[\"'][^>]*>/gi;\n let match: RegExpExecArray | null;\n\n while ((match = htmlQuotedPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n\n // Unquoted src (e.g., <img src=url>)\n const htmlUnquotedPattern = /<img[^>]+src=([^\\s\"'>]+)[^>]*>/gi;\n while ((match = htmlUnquotedPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n // Skip if it starts with a quote (already handled above)\n if (url && !url.startsWith('\"') && !url.startsWith(\"'\") && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n\n // CSS url() function: url(url), url(\"url\"), url('url')\n // Common in background-image, background, content properties\n const cssUrlPattern = /url\\([\"']?([^)\"']+)[\"']?\\)/gi;\n while ((match = cssUrlPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n // Only include http(s) URLs to avoid CSS variables and relative paths\n if (url && (url.startsWith('http://') || url.startsWith('https://')) && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n }\n\n // Extract plain image URLs (not wrapped in any syntax)\n if (includeHTML) {\n // Plain URLs with image extensions (common on image hosts)\n const plainUrlPattern = /(?<![(\"'])(https?:\\/\\/[^\\s<>\"']+\\.(?:jpg|jpeg|png|gif|webp|svg|avif|bmp))(?![)\"'])/gi;\n let match: RegExpExecArray | null;\n\n while ((match = plainUrlPattern.exec(text)) !== null) {\n const url = match[0]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: url,\n });\n }\n }\n }\n\n // Extract base64 data URLs: data:image/...;base64,...\n if (includeBase64) {\n const dataUrlPattern = /data:image\\/[^;]+;base64,[A-Za-z0-9+/=]+/g;\n let match: RegExpExecArray | null;\n\n while ((match = dataUrlPattern.exec(text)) !== null) {\n const url = match[0];\n if (!seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'base64',\n });\n }\n }\n }\n\n return results;\n}\n\n/**\n * Extract only HTTP/HTTPS URLs (excludes data URLs and relative paths).\n *\n * Convenience wrapper around extractImageUrls that filters for remote URLs.\n *\n * @param text - Text to extract URLs from\n * @returns Array of extracted HTTP/HTTPS image URLs\n */\nexport function extractRemoteImageUrls(text: string): ExtractedImage[] {\n const all = extractImageUrls(text, { includeBase64: false });\n return all.filter((img) => /^https?:\\/\\//i.test(img.url));\n}\n\n/**\n * Extract only base64 data URLs.\n *\n * Convenience wrapper around extractImageUrls.\n *\n * @param text - Text to extract data URLs from\n * @returns Array of base64 image data URLs\n */\nexport function extractDataUrls(text: string): ExtractedImage[] {\n return extractImageUrls(text, {\n includeMarkdown: false,\n includeHTML: false,\n includeBase64: true,\n });\n}\n\n/**\n * Count images in text without extracting full details.\n *\n * More efficient than extractImageUrls when you only need the count.\n *\n * @param text - Text to count images in\n * @returns Total number of unique image references\n */\nexport function countImages(text: string): number {\n return extractImageUrls(text).length;\n}\n","/**\n * SSRF (Server-Side Request Forgery) Protection\n *\n * Validates URLs to prevent SSRF attacks.\n * Browser-safe implementation (no Node.js dependencies).\n */\n\nexport interface SSRFPolicy {\n /** Allow private IP addresses (10.x, 172.16-31.x, 192.168.x) (default: false) */\n allowPrivateIPs?: boolean;\n /** Allow localhost/loopback (127.x, ::1) (default: false) */\n allowLocalhost?: boolean;\n /** Blocked domain patterns (e.g., ['internal.company.com', '*.local']) */\n blockedDomains?: string[];\n /** Allowed domain patterns - if provided, ONLY these are allowed */\n allowedDomains?: string[];\n /** Allow data URLs (data:image/...) (default: false) */\n allowDataUrls?: boolean;\n}\n\nexport interface SafetyCheck {\n /** Whether the URL is safe according to policy */\n safe: boolean;\n /** Reason why URL is unsafe (if safe=false) */\n reason?: string;\n}\n\n/**\n * Default SSRF policy - blocks private IPs, localhost, and data URLs.\n */\nexport const DEFAULT_SSRF_POLICY: Required<SSRFPolicy> = {\n allowPrivateIPs: false,\n allowLocalhost: false,\n blockedDomains: [],\n allowedDomains: [],\n allowDataUrls: false,\n};\n\n/**\n * Check if URL is safe to fetch according to SSRF policy.\n *\n * This is the canonical SSRF protection - all apps should use this\n * before fetching external URLs.\n *\n * **Limitations (browser-side validation only):**\n * - Cannot detect domains that resolve to private IPs (e.g., `attacker.com` -> `127.0.0.1`)\n * - Cannot prevent DNS rebinding attacks (domain resolves differently on subsequent requests)\n * - Cannot validate redirect destinations (server may redirect to internal URLs)\n * - Does not perform actual DNS resolution - only validates URL syntax and hostname patterns\n *\n * For server-side applications requiring stronger SSRF protection, perform DNS resolution\n * and validate the resolved IP address before making the request, and validate redirect\n * destinations or disable redirects entirely.\n *\n * @param url - URL to validate\n * @param policy - SSRF policy (uses defaults if not provided)\n * @returns Safety check result with reason if unsafe\n *\n * @see https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html\n *\n * @example\n * ```typescript\n * const check = isURLSafe('http://10.0.0.1/secret');\n * if (!check.safe) {\n * console.error('Unsafe URL:', check.reason);\n * }\n * ```\n */\nexport function isURLSafe(\n url: string,\n policy: SSRFPolicy = {},\n): SafetyCheck {\n const config = { ...DEFAULT_SSRF_POLICY, ...policy };\n\n // Parse URL\n let parsed: URL;\n try {\n parsed = new URL(url);\n } catch {\n return { safe: false, reason: 'Invalid URL format' };\n }\n\n // Check protocol\n if (parsed.protocol === 'data:') {\n if (!config.allowDataUrls) {\n return { safe: false, reason: 'Data URLs not allowed' };\n }\n return { safe: true };\n }\n\n if (!['http:', 'https:'].includes(parsed.protocol)) {\n return {\n safe: false,\n reason: `Protocol '${parsed.protocol}' not allowed (only http/https)`,\n };\n }\n\n const hostname = parsed.hostname.toLowerCase();\n\n // Check allowed domains (whitelist)\n if (config.allowedDomains.length > 0) {\n const isAllowed = config.allowedDomains.some((pattern) =>\n matchDomainPattern(hostname, pattern),\n );\n if (!isAllowed) {\n return {\n safe: false,\n reason: `Domain '${hostname}' not in allowed list`,\n };\n }\n }\n\n // Check blocked domains (blacklist)\n if (config.blockedDomains.length > 0) {\n const isBlocked = config.blockedDomains.some((pattern) =>\n matchDomainPattern(hostname, pattern),\n );\n if (isBlocked) {\n return {\n safe: false,\n reason: `Domain '${hostname}' is blocked`,\n };\n }\n }\n\n // Check localhost\n if (isLocalhost(hostname) && !config.allowLocalhost) {\n return { safe: false, reason: 'Localhost not allowed' };\n }\n\n // Check private IPs\n if (isPrivateIP(hostname) && !config.allowPrivateIPs) {\n return { safe: false, reason: 'Private IP addresses not allowed' };\n }\n\n return { safe: true };\n}\n\n/**\n * Quick check if URL is safe with default policy.\n *\n * Convenience wrapper for common case.\n *\n * @param url - URL to validate\n * @returns true if safe, false otherwise\n */\nexport function isSafeForFetch(url: string): boolean {\n return isURLSafe(url).safe;\n}\n\n/**\n * Match domain against pattern (supports wildcards).\n *\n * @example\n * ```typescript\n * matchDomainPattern('api.github.com', '*.github.com') // true\n * matchDomainPattern('github.com', '*.github.com') // false\n * matchDomainPattern('github.com', 'github.com') // true\n * ```\n */\nfunction matchDomainPattern(domain: string, pattern: string): boolean {\n // Exact match\n if (domain === pattern) return true;\n\n // Wildcard match - only matches subdomains, not the base domain\n if (pattern.startsWith('*.')) {\n const suffix = pattern.slice(2);\n return domain.endsWith('.' + suffix);\n }\n\n return false;\n}\n\n/**\n * Check if hostname is localhost/loopback.\n */\nfunction isLocalhost(hostname: string): boolean {\n const lower = hostname.toLowerCase();\n\n // IPv4 loopback\n if (lower === 'localhost' || lower === '0.0.0.0' || lower.startsWith('127.')) {\n return true;\n }\n\n // IPv6 loopback\n if (lower === '::1' || lower === '[::1]') {\n return true;\n }\n\n return false;\n}\n\n/**\n * Check if hostname is a private IP address.\n *\n * Checks for:\n * - 10.0.0.0/8\n * - 172.16.0.0/12\n * - 192.168.0.0/16\n * - Link-local: 169.254.0.0/16 (includes AWS metadata endpoint)\n * - IPv6 private ranges\n */\nfunction isPrivateIP(hostname: string): boolean {\n const lower = hostname.toLowerCase();\n\n // IPv4 private ranges\n const parts = hostname.split('.').map(Number);\n\n if (\n parts.length === 4 &&\n parts.every((p) => p >= 0 && p <= 255 && !isNaN(p))\n ) {\n const [octet1, octet2] = parts;\n\n // 10.0.0.0/8\n if (octet1 === 10) return true;\n\n // 172.16.0.0/12\n if (octet1 === 172 && octet2 !== undefined && octet2 >= 16 && octet2 <= 31)\n return true;\n\n // 192.168.0.0/16\n if (octet1 === 192 && octet2 === 168) return true;\n\n // 169.254.0.0/16 (link-local) - includes AWS metadata endpoint\n if (octet1 === 169 && octet2 === 254) return true;\n }\n\n // IPv6 private ranges using regex patterns\n // Strip brackets if present (URL parser adds them: [fc00::1])\n const cleanedHostname = lower.replace(/^\\[|\\]$/g, '');\n\n const ipv6Patterns = [\n /^f[cd][0-9a-f]{2}:/i, // fc00::/7 (includes both fc and fd ranges)\n /^fe80:/i, // fe80::/10 (link-local)\n ];\n\n for (const pattern of ipv6Patterns) {\n if (pattern.test(cleanedHostname)) {\n return true;\n }\n }\n\n return false;\n}\n\n/**\n * Filter array of URLs, keeping only safe ones.\n *\n * @param urls - Array of URLs to filter\n * @param policy - SSRF policy\n * @returns Array of safe URLs\n */\nexport function filterSafeUrls(\n urls: string[],\n policy?: SSRFPolicy,\n): string[] {\n return urls.filter((url) => isURLSafe(url, policy).safe);\n}\n"],"mappings":";AA6CO,SAAS,iBACd,MACA,UAAkC,CAAC,GACjB;AAClB,QAAM;IACJ,kBAAkB;IAClB,cAAc;IACd,gBAAgB;EAClB,IAAI;AAEJ,QAAM,UAA4B,CAAC;AACnC,QAAM,OAAO,oBAAI,IAAY;AAI7B,MAAI,iBAAiB;AACnB,UAAM,kBAAkB;AACxB,QAAI;AAEJ,YAAQ,QAAQ,gBAAgB,KAAK,IAAI,OAAO,MAAM;AACpD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;EACF;AAGA,MAAI,aAAa;AAEf,UAAM,oBAAoB;AAC1B,QAAI;AAEJ,YAAQ,QAAQ,kBAAkB,KAAK,IAAI,OAAO,MAAM;AACtD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;AAGA,UAAM,sBAAsB;AAC5B,YAAQ,QAAQ,oBAAoB,KAAK,IAAI,OAAO,MAAM;AACxD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAE3B,UAAI,OAAO,CAAC,IAAI,WAAW,GAAG,KAAK,CAAC,IAAI,WAAW,GAAG,KAAK,CAAC,KAAK,IAAI,GAAG,GAAG;AACzE,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;AAIA,UAAM,gBAAgB;AACtB,YAAQ,QAAQ,cAAc,KAAK,IAAI,OAAO,MAAM;AAClD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAE3B,UAAI,QAAQ,IAAI,WAAW,SAAS,KAAK,IAAI,WAAW,UAAU,MAAM,CAAC,KAAK,IAAI,GAAG,GAAG;AACtF,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;EACF;AAGA,MAAI,aAAa;AAEf,UAAM,kBAAkB;AACxB,QAAI;AAEJ,YAAQ,QAAQ,gBAAgB,KAAK,IAAI,OAAO,MAAM;AACpD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS;QACX,CAAC;MACH;IACF;EACF;AAGA,MAAI,eAAe;AACjB,UAAM,iBAAiB;AACvB,QAAI;AAEJ,YAAQ,QAAQ,eAAe,KAAK,IAAI,OAAO,MAAM;AACnD,YAAM,MAAM,MAAM,CAAC;AACnB,UAAI,CAAC,KAAK,IAAI,GAAG,GAAG;AAClB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;QACV,CAAC;MACH;IACF;EACF;AAEA,SAAO;AACT;AAUO,SAAS,uBAAuB,MAAgC;AACrE,QAAM,MAAM,iBAAiB,MAAM,EAAE,eAAe,MAAM,CAAC;AAC3D,SAAO,IAAI,OAAO,CAAC,QAAQ,gBAAgB,KAAK,IAAI,GAAG,CAAC;AAC1D;AAUO,SAAS,gBAAgB,MAAgC;AAC9D,SAAO,iBAAiB,MAAM;IAC5B,iBAAiB;IACjB,aAAa;IACb,eAAe;EACjB,CAAC;AACH;AAUO,SAAS,YAAY,MAAsB;AAChD,SAAO,iBAAiB,IAAI,EAAE;AAChC;AC/KO,IAAM,sBAA4C;EACvD,iBAAiB;EACjB,gBAAgB;EAChB,gBAAgB,CAAC;EACjB,gBAAgB,CAAC;EACjB,eAAe;AACjB;AAgCO,SAAS,UACd,KACA,SAAqB,CAAC,GACT;AACb,QAAM,SAAS,EAAE,GAAG,qBAAqB,GAAG,OAAO;AAGnD,MAAI;AACJ,MAAI;AACF,aAAS,IAAI,IAAI,GAAG;EACtB,QAAQ;AACN,WAAO,EAAE,MAAM,OAAO,QAAQ,qBAAqB;EACrD;AAGA,MAAI,OAAO,aAAa,SAAS;AAC/B,QAAI,CAAC,OAAO,eAAe;AACzB,aAAO,EAAE,MAAM,OAAO,QAAQ,wBAAwB;IACxD;AACA,WAAO,EAAE,MAAM,KAAK;EACtB;AAEA,MAAI,CAAC,CAAC,SAAS,QAAQ,EAAE,SAAS,OAAO,QAAQ,GAAG;AAClD,WAAO;MACL,MAAM;MACN,QAAQ,aAAa,OAAO,QAAQ;IACtC;EACF;AAEA,QAAM,WAAW,OAAO,SAAS,YAAY;AAG7C,MAAI,OAAO,eAAe,SAAS,GAAG;AACpC,UAAM,YAAY,OAAO,eAAe;MAAK,CAAC,YAC5C,mBAAmB,UAAU,OAAO;IACtC;AACA,QAAI,CAAC,WAAW;AACd,aAAO;QACL,MAAM;QACN,QAAQ,WAAW,QAAQ;MAC7B;IACF;EACF;AAGA,MAAI,OAAO,eAAe,SAAS,GAAG;AACpC,UAAM,YAAY,OAAO,eAAe;MAAK,CAAC,YAC5C,mBAAmB,UAAU,OAAO;IACtC;AACA,QAAI,WAAW;AACb,aAAO;QACL,MAAM;QACN,QAAQ,WAAW,QAAQ;MAC7B;IACF;EACF;AAGA,MAAI,YAAY,QAAQ,KAAK,CAAC,OAAO,gBAAgB;AACnD,WAAO,EAAE,MAAM,OAAO,QAAQ,wBAAwB;EACxD;AAGA,MAAI,YAAY,QAAQ,KAAK,CAAC,OAAO,iBAAiB;AACpD,WAAO,EAAE,MAAM,OAAO,QAAQ,mCAAmC;EACnE;AAEA,SAAO,EAAE,MAAM,KAAK;AACtB;AAUO,SAAS,eAAe,KAAsB;AACnD,SAAO,UAAU,GAAG,EAAE;AACxB;AAYA,SAAS,mBAAmB,QAAgB,SAA0B;AAEpE,MAAI,WAAW,QAAS,QAAO;AAG/B,MAAI,QAAQ,WAAW,IAAI,GAAG;AAC5B,UAAM,SAAS,QAAQ,MAAM,CAAC;AAC9B,WAAO,OAAO,SAAS,MAAM,MAAM;EACrC;AAEA,SAAO;AACT;AAKA,SAAS,YAAY,UAA2B;AAC9C,QAAM,QAAQ,SAAS,YAAY;AAGnC,MAAI,UAAU,eAAe,UAAU,aAAa,MAAM,WAAW,MAAM,GAAG;AAC5E,WAAO;EACT;AAGA,MAAI,UAAU,SAAS,UAAU,SAAS;AACxC,WAAO;EACT;AAEA,SAAO;AACT;AAYA,SAAS,YAAY,UAA2B;AAC9C,QAAM,QAAQ,SAAS,YAAY;AAGnC,QAAM,QAAQ,SAAS,MAAM,GAAG,EAAE,IAAI,MAAM;AAE5C,MACE,MAAM,WAAW,KACjB,MAAM,MAAM,CAAC,MAAM,KAAK,KAAK,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC,GAClD;AACA,UAAM,CAAC,QAAQ,MAAM,IAAI;AAGzB,QAAI,WAAW,GAAI,QAAO;AAG1B,QAAI,WAAW,OAAO,WAAW,UAAa,UAAU,MAAM,UAAU;AACtE,aAAO;AAGT,QAAI,WAAW,OAAO,WAAW,IAAK,QAAO;AAG7C,QAAI,WAAW,OAAO,WAAW,IAAK,QAAO;EAC/C;AAIA,QAAM,kBAAkB,MAAM,QAAQ,YAAY,EAAE;AAEpD,QAAM,eAAe;IACnB;;IACA;;EACF;AAEA,aAAW,WAAW,cAAc;AAClC,QAAI,QAAQ,KAAK,eAAe,GAAG;AACjC,aAAO;IACT;EACF;AAEA,SAAO;AACT;AASO,SAAS,eACd,MACA,QACU;AACV,SAAO,KAAK,OAAO,CAAC,QAAQ,UAAU,KAAK,MAAM,EAAE,IAAI;AACzD;","names":[]}
|
package/dist/index.cjs
CHANGED
|
@@ -128,7 +128,23 @@ function streamingUnzipSync(data, limits = DEFAULT_ZIP_LIMITS) {
|
|
|
128
128
|
if (unsafePathHandling === "warn" && limits.onUnsafePath) {
|
|
129
129
|
limits.onUnsafePath(file.name, reason);
|
|
130
130
|
}
|
|
131
|
-
file.ondata = () => {
|
|
131
|
+
file.ondata = (err, chunk, _final) => {
|
|
132
|
+
if (error) return;
|
|
133
|
+
if (err) {
|
|
134
|
+
error = err;
|
|
135
|
+
return;
|
|
136
|
+
}
|
|
137
|
+
if (chunk && chunk.length > 0) {
|
|
138
|
+
totalBytes += chunk.length;
|
|
139
|
+
if (totalBytes > limits.maxTotalSize) {
|
|
140
|
+
error = new ZipPreflightError(
|
|
141
|
+
`Total actual size ${totalBytes} exceeds limit ${limits.maxTotalSize}`,
|
|
142
|
+
totalBytes,
|
|
143
|
+
limits.maxTotalSize
|
|
144
|
+
);
|
|
145
|
+
file.terminate();
|
|
146
|
+
}
|
|
147
|
+
}
|
|
132
148
|
};
|
|
133
149
|
file.start();
|
|
134
150
|
return;
|
|
@@ -588,22 +604,24 @@ var ExtractedAssetSchema = import_zod.z.object({
|
|
|
588
604
|
mimeType: import_zod.z.string()
|
|
589
605
|
});
|
|
590
606
|
var CCv2LorebookEntrySchema = import_zod2.z.object({
|
|
591
|
-
keys: import_zod2.z.array(import_zod2.z.string()),
|
|
607
|
+
keys: import_zod2.z.array(import_zod2.z.string()).optional(),
|
|
608
|
+
// Some tools use 'key' instead
|
|
592
609
|
content: import_zod2.z.string(),
|
|
593
|
-
enabled: import_zod2.z.boolean(),
|
|
594
|
-
|
|
595
|
-
|
|
610
|
+
enabled: import_zod2.z.boolean().default(true),
|
|
611
|
+
// Default to enabled if missing
|
|
612
|
+
insertion_order: import_zod2.z.number().int().default(0),
|
|
613
|
+
// Optional fields - be lenient with nulls since wild data has them
|
|
596
614
|
extensions: import_zod2.z.record(import_zod2.z.unknown()).optional(),
|
|
597
|
-
case_sensitive: import_zod2.z.boolean().optional(),
|
|
615
|
+
case_sensitive: import_zod2.z.boolean().nullable().optional(),
|
|
598
616
|
name: import_zod2.z.string().optional(),
|
|
599
617
|
priority: import_zod2.z.number().int().optional(),
|
|
600
618
|
id: import_zod2.z.number().int().optional(),
|
|
601
619
|
comment: import_zod2.z.string().optional(),
|
|
602
|
-
selective: import_zod2.z.boolean().optional(),
|
|
620
|
+
selective: import_zod2.z.boolean().nullable().optional(),
|
|
603
621
|
secondary_keys: import_zod2.z.array(import_zod2.z.string()).optional(),
|
|
604
|
-
constant: import_zod2.z.boolean().optional(),
|
|
605
|
-
position: import_zod2.z.enum(["before_char", "after_char"]).optional()
|
|
606
|
-
});
|
|
622
|
+
constant: import_zod2.z.boolean().nullable().optional(),
|
|
623
|
+
position: import_zod2.z.union([import_zod2.z.enum(["before_char", "after_char"]), import_zod2.z.number().int()]).nullable().optional()
|
|
624
|
+
}).passthrough();
|
|
607
625
|
var CCv2CharacterBookSchema = import_zod2.z.object({
|
|
608
626
|
name: import_zod2.z.string().optional(),
|
|
609
627
|
description: import_zod2.z.string().optional(),
|
|
@@ -651,31 +669,34 @@ function getV2Data(card) {
|
|
|
651
669
|
return card;
|
|
652
670
|
}
|
|
653
671
|
var CCv3LorebookEntrySchema = import_zod3.z.object({
|
|
654
|
-
keys: import_zod3.z.array(import_zod3.z.string()),
|
|
672
|
+
keys: import_zod3.z.array(import_zod3.z.string()).optional(),
|
|
673
|
+
// Some tools use 'key' instead
|
|
655
674
|
content: import_zod3.z.string(),
|
|
656
|
-
enabled: import_zod3.z.boolean(),
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
675
|
+
enabled: import_zod3.z.boolean().default(true),
|
|
676
|
+
// Default to enabled if missing
|
|
677
|
+
insertion_order: import_zod3.z.number().int().default(0),
|
|
678
|
+
// Optional fields - be lenient with nulls since wild data has them
|
|
679
|
+
case_sensitive: import_zod3.z.boolean().nullable().optional(),
|
|
660
680
|
name: import_zod3.z.string().optional(),
|
|
661
681
|
priority: import_zod3.z.number().int().optional(),
|
|
662
682
|
id: import_zod3.z.number().int().optional(),
|
|
663
683
|
comment: import_zod3.z.string().optional(),
|
|
664
|
-
selective: import_zod3.z.boolean().optional(),
|
|
684
|
+
selective: import_zod3.z.boolean().nullable().optional(),
|
|
665
685
|
secondary_keys: import_zod3.z.array(import_zod3.z.string()).optional(),
|
|
666
|
-
constant: import_zod3.z.boolean().optional(),
|
|
667
|
-
position: import_zod3.z.enum(["before_char", "after_char"]).optional(),
|
|
686
|
+
constant: import_zod3.z.boolean().nullable().optional(),
|
|
687
|
+
position: import_zod3.z.union([import_zod3.z.enum(["before_char", "after_char"]), import_zod3.z.number().int()]).nullable().optional(),
|
|
668
688
|
extensions: import_zod3.z.record(import_zod3.z.unknown()).optional(),
|
|
669
|
-
// v3 specific
|
|
689
|
+
// v3 specific - also lenient with types since SillyTavern uses numbers for enums
|
|
670
690
|
automation_id: import_zod3.z.string().optional(),
|
|
671
|
-
role: import_zod3.z.enum(["system", "user", "assistant"]).optional(),
|
|
691
|
+
role: import_zod3.z.union([import_zod3.z.enum(["system", "user", "assistant"]), import_zod3.z.number().int()]).nullable().optional(),
|
|
672
692
|
group: import_zod3.z.string().optional(),
|
|
673
693
|
scan_frequency: import_zod3.z.number().int().nonnegative().optional(),
|
|
674
|
-
probability: import_zod3.z.number().min(0).max(
|
|
694
|
+
probability: import_zod3.z.number().min(0).max(100).optional(),
|
|
695
|
+
// Some tools use 0-100 instead of 0-1
|
|
675
696
|
use_regex: import_zod3.z.boolean().optional(),
|
|
676
697
|
depth: import_zod3.z.number().int().nonnegative().optional(),
|
|
677
|
-
selective_logic: import_zod3.z.enum(["AND", "NOT"]).optional()
|
|
678
|
-
});
|
|
698
|
+
selective_logic: import_zod3.z.union([import_zod3.z.enum(["AND", "NOT"]), import_zod3.z.number().int()]).optional()
|
|
699
|
+
}).passthrough();
|
|
679
700
|
var CCv3CharacterBookSchema = import_zod3.z.object({
|
|
680
701
|
name: import_zod3.z.string().optional(),
|
|
681
702
|
description: import_zod3.z.string().optional(),
|
|
@@ -8111,8 +8132,10 @@ function voxtaToCCv3(character, books) {
|
|
|
8111
8132
|
};
|
|
8112
8133
|
}
|
|
8113
8134
|
}
|
|
8114
|
-
const
|
|
8115
|
-
const
|
|
8135
|
+
const rawCreationDate = character.DateCreated ? Math.floor(new Date(character.DateCreated).getTime() / 1e3) : void 0;
|
|
8136
|
+
const rawModificationDate = character.DateModified ? Math.floor(new Date(character.DateModified).getTime() / 1e3) : void 0;
|
|
8137
|
+
const creationDate = rawCreationDate !== void 0 && rawCreationDate >= 0 ? rawCreationDate : void 0;
|
|
8138
|
+
const modificationDate = rawModificationDate !== void 0 && rawModificationDate >= 0 ? rawModificationDate : void 0;
|
|
8116
8139
|
const card = {
|
|
8117
8140
|
spec: "chara_card_v3",
|
|
8118
8141
|
spec_version: "3.0",
|
|
@@ -8353,6 +8376,11 @@ var DELTA_MAX_TOTAL_SIZE = 500 * 1024 * 1024;
|
|
|
8353
8376
|
var DELTA_MAX_FILE_SIZE = 50 * 1024 * 1024;
|
|
8354
8377
|
|
|
8355
8378
|
// ../normalizer/dist/index.js
|
|
8379
|
+
function normalizePosition(position) {
|
|
8380
|
+
if (position === void 0 || position === null) return "before_char";
|
|
8381
|
+
if (typeof position === "string") return position;
|
|
8382
|
+
return position;
|
|
8383
|
+
}
|
|
8356
8384
|
function convertLorebookEntry(entry, index) {
|
|
8357
8385
|
return {
|
|
8358
8386
|
keys: entry.keys || [],
|
|
@@ -8368,7 +8396,7 @@ function convertLorebookEntry(entry, index) {
|
|
|
8368
8396
|
selective: entry.selective ?? false,
|
|
8369
8397
|
secondary_keys: entry.secondary_keys || [],
|
|
8370
8398
|
constant: entry.constant ?? false,
|
|
8371
|
-
position: entry.position
|
|
8399
|
+
position: normalizePosition(entry.position)
|
|
8372
8400
|
};
|
|
8373
8401
|
}
|
|
8374
8402
|
function convertCharacterBook(book) {
|
|
@@ -8531,6 +8559,43 @@ var DEFAULT_OPTIONS3 = {
|
|
|
8531
8559
|
maxTotalSize: 500 * 1024 * 1024,
|
|
8532
8560
|
extractAssets: true
|
|
8533
8561
|
};
|
|
8562
|
+
var ASSET_PREFIX_VARIANTS = [
|
|
8563
|
+
{ prefix: "__asset:", format: "CCv3 (SillyTavern)" },
|
|
8564
|
+
{ prefix: "asset:", format: "CCv2/CCv3 common" },
|
|
8565
|
+
{ prefix: "pngchunk:", format: "Explicit PNG chunk" },
|
|
8566
|
+
{ prefix: "chara-ext-asset_:", format: "RisuAI (with colon)" },
|
|
8567
|
+
{ prefix: "chara-ext-asset_", format: "RisuAI" },
|
|
8568
|
+
{ prefix: "__asset_", format: "Legacy underscore variant" }
|
|
8569
|
+
];
|
|
8570
|
+
function isChunkReference(uri) {
|
|
8571
|
+
return ASSET_PREFIX_VARIANTS.some(({ prefix }) => uri.startsWith(prefix)) || !uri.includes(":");
|
|
8572
|
+
}
|
|
8573
|
+
function stripAssetPrefix(uri) {
|
|
8574
|
+
for (const { prefix } of ASSET_PREFIX_VARIANTS) {
|
|
8575
|
+
if (uri.startsWith(prefix)) {
|
|
8576
|
+
return uri.substring(prefix.length);
|
|
8577
|
+
}
|
|
8578
|
+
}
|
|
8579
|
+
return uri;
|
|
8580
|
+
}
|
|
8581
|
+
function generateChunkKeyCandidates(assetId, originalUri) {
|
|
8582
|
+
return [
|
|
8583
|
+
assetId,
|
|
8584
|
+
// Plain ID: "0"
|
|
8585
|
+
originalUri,
|
|
8586
|
+
// Original URI: "__asset:0"
|
|
8587
|
+
`asset:${assetId}`,
|
|
8588
|
+
// Common format
|
|
8589
|
+
`__asset:${assetId}`,
|
|
8590
|
+
// CCv3 format
|
|
8591
|
+
`__asset_${assetId}`,
|
|
8592
|
+
// Legacy underscore variant
|
|
8593
|
+
`chara-ext-asset_${assetId}`,
|
|
8594
|
+
// RisuAI format
|
|
8595
|
+
`chara-ext-asset_:${assetId}`
|
|
8596
|
+
// RisuAI format with colon
|
|
8597
|
+
];
|
|
8598
|
+
}
|
|
8534
8599
|
function estimateBase64DecodedSize(base64Length) {
|
|
8535
8600
|
return Math.ceil(base64Length * 0.75);
|
|
8536
8601
|
}
|
|
@@ -8598,39 +8663,22 @@ function parsePng(data, options) {
|
|
|
8598
8663
|
if (extracted.extraChunks && options.extractAssets && card.data.assets) {
|
|
8599
8664
|
const usedChunks = /* @__PURE__ */ new Set();
|
|
8600
8665
|
const chunkMap = /* @__PURE__ */ new Map();
|
|
8666
|
+
const risuIndexPrefixes = ASSET_PREFIX_VARIANTS.filter((v) => v.prefix.startsWith("chara-ext-asset_"));
|
|
8601
8667
|
for (const chunk of extracted.extraChunks) {
|
|
8602
8668
|
chunkMap.set(chunk.keyword, chunk);
|
|
8603
|
-
|
|
8604
|
-
|
|
8605
|
-
|
|
8606
|
-
|
|
8607
|
-
|
|
8669
|
+
for (const { prefix } of risuIndexPrefixes) {
|
|
8670
|
+
if (chunk.keyword.startsWith(prefix)) {
|
|
8671
|
+
const suffix = chunk.keyword.substring(prefix.length);
|
|
8672
|
+
chunkMap.set(suffix, chunk);
|
|
8673
|
+
break;
|
|
8608
8674
|
}
|
|
8609
8675
|
}
|
|
8610
8676
|
}
|
|
8611
8677
|
for (const descriptor of card.data.assets) {
|
|
8612
8678
|
if (!descriptor.uri) continue;
|
|
8613
|
-
if (
|
|
8614
|
-
|
|
8615
|
-
|
|
8616
|
-
else if (assetId.startsWith("asset:")) assetId = assetId.substring(6);
|
|
8617
|
-
else if (assetId.startsWith("pngchunk:")) assetId = assetId.substring(9);
|
|
8618
|
-
const candidates = [
|
|
8619
|
-
assetId,
|
|
8620
|
-
// "0"
|
|
8621
|
-
descriptor.uri,
|
|
8622
|
-
// "__asset:0"
|
|
8623
|
-
`asset:${assetId}`,
|
|
8624
|
-
// "asset:0"
|
|
8625
|
-
`__asset:${assetId}`,
|
|
8626
|
-
// "__asset:0"
|
|
8627
|
-
`__asset_${assetId}`,
|
|
8628
|
-
// "__asset_0"
|
|
8629
|
-
`chara-ext-asset_${assetId}`,
|
|
8630
|
-
// "chara-ext-asset_0"
|
|
8631
|
-
`chara-ext-asset_:${assetId}`
|
|
8632
|
-
// "chara-ext-asset_:0"
|
|
8633
|
-
];
|
|
8679
|
+
if (isChunkReference(descriptor.uri)) {
|
|
8680
|
+
const assetId = stripAssetPrefix(descriptor.uri);
|
|
8681
|
+
const candidates = generateChunkKeyCandidates(assetId, descriptor.uri);
|
|
8634
8682
|
let chunk;
|
|
8635
8683
|
for (const candidate of candidates) {
|
|
8636
8684
|
chunk = chunkMap.get(candidate);
|
|
@@ -8677,13 +8725,15 @@ function parsePng(data, options) {
|
|
|
8677
8725
|
}
|
|
8678
8726
|
}
|
|
8679
8727
|
}
|
|
8728
|
+
const risuPrefixes = ASSET_PREFIX_VARIANTS.filter((v) => v.prefix.startsWith("chara-ext-asset_"));
|
|
8680
8729
|
for (const chunk of extracted.extraChunks) {
|
|
8681
8730
|
if (!usedChunks.has(chunk.keyword)) {
|
|
8682
8731
|
let assetId = null;
|
|
8683
|
-
|
|
8684
|
-
|
|
8685
|
-
|
|
8686
|
-
|
|
8732
|
+
for (const { prefix } of risuPrefixes) {
|
|
8733
|
+
if (chunk.keyword.startsWith(prefix)) {
|
|
8734
|
+
assetId = chunk.keyword.substring(prefix.length);
|
|
8735
|
+
break;
|
|
8736
|
+
}
|
|
8687
8737
|
}
|
|
8688
8738
|
if (assetId) {
|
|
8689
8739
|
try {
|