@character-foundry/character-foundry 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/image-utils.cjs +249 -0
- package/dist/image-utils.cjs.map +1 -0
- package/dist/image-utils.d.cts +136 -0
- package/dist/image-utils.d.ts +136 -0
- package/dist/image-utils.js +226 -0
- package/dist/image-utils.js.map +1 -0
- package/package.json +15 -4
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
|
|
20
|
+
// src/image-utils.ts
|
|
21
|
+
var image_utils_exports = {};
|
|
22
|
+
__export(image_utils_exports, {
|
|
23
|
+
DEFAULT_SSRF_POLICY: () => DEFAULT_SSRF_POLICY,
|
|
24
|
+
countImages: () => countImages,
|
|
25
|
+
extractDataUrls: () => extractDataUrls,
|
|
26
|
+
extractImageUrls: () => extractImageUrls,
|
|
27
|
+
extractRemoteImageUrls: () => extractRemoteImageUrls,
|
|
28
|
+
filterSafeUrls: () => filterSafeUrls,
|
|
29
|
+
isSafeForFetch: () => isSafeForFetch,
|
|
30
|
+
isURLSafe: () => isURLSafe
|
|
31
|
+
});
|
|
32
|
+
module.exports = __toCommonJS(image_utils_exports);
|
|
33
|
+
|
|
34
|
+
// ../image-utils/dist/index.js
|
|
35
|
+
function extractImageUrls(text, options = {}) {
|
|
36
|
+
const {
|
|
37
|
+
includeMarkdown = true,
|
|
38
|
+
includeHTML = true,
|
|
39
|
+
includeBase64 = true
|
|
40
|
+
} = options;
|
|
41
|
+
const results = [];
|
|
42
|
+
const seen = /* @__PURE__ */ new Set();
|
|
43
|
+
if (includeMarkdown) {
|
|
44
|
+
const markdownPattern = /!\[([^\]]*)\]\(<?([^>\s)]+)>?(?:\s*=[^)]+)?\)/g;
|
|
45
|
+
let match;
|
|
46
|
+
while ((match = markdownPattern.exec(text)) !== null) {
|
|
47
|
+
const url = match[2]?.trim();
|
|
48
|
+
if (url && !seen.has(url)) {
|
|
49
|
+
seen.add(url);
|
|
50
|
+
results.push({
|
|
51
|
+
url,
|
|
52
|
+
source: "markdown",
|
|
53
|
+
context: match[0]
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
if (includeHTML) {
|
|
59
|
+
const htmlQuotedPattern = /<img[^>]+src=["']([^"']+)["'][^>]*>/gi;
|
|
60
|
+
let match;
|
|
61
|
+
while ((match = htmlQuotedPattern.exec(text)) !== null) {
|
|
62
|
+
const url = match[1]?.trim();
|
|
63
|
+
if (url && !seen.has(url)) {
|
|
64
|
+
seen.add(url);
|
|
65
|
+
results.push({
|
|
66
|
+
url,
|
|
67
|
+
source: "html",
|
|
68
|
+
context: match[0]
|
|
69
|
+
});
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
const htmlUnquotedPattern = /<img[^>]+src=([^\s"'>]+)[^>]*>/gi;
|
|
73
|
+
while ((match = htmlUnquotedPattern.exec(text)) !== null) {
|
|
74
|
+
const url = match[1]?.trim();
|
|
75
|
+
if (url && !url.startsWith('"') && !url.startsWith("'") && !seen.has(url)) {
|
|
76
|
+
seen.add(url);
|
|
77
|
+
results.push({
|
|
78
|
+
url,
|
|
79
|
+
source: "html",
|
|
80
|
+
context: match[0]
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
const cssUrlPattern = /url\(["']?([^)"']+)["']?\)/gi;
|
|
85
|
+
while ((match = cssUrlPattern.exec(text)) !== null) {
|
|
86
|
+
const url = match[1]?.trim();
|
|
87
|
+
if (url && (url.startsWith("http://") || url.startsWith("https://")) && !seen.has(url)) {
|
|
88
|
+
seen.add(url);
|
|
89
|
+
results.push({
|
|
90
|
+
url,
|
|
91
|
+
source: "html",
|
|
92
|
+
context: match[0]
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
if (includeHTML) {
|
|
98
|
+
const plainUrlPattern = /(?<![("'])(https?:\/\/[^\s<>"']+\.(?:jpg|jpeg|png|gif|webp|svg|avif|bmp))(?![)"'])/gi;
|
|
99
|
+
let match;
|
|
100
|
+
while ((match = plainUrlPattern.exec(text)) !== null) {
|
|
101
|
+
const url = match[0]?.trim();
|
|
102
|
+
if (url && !seen.has(url)) {
|
|
103
|
+
seen.add(url);
|
|
104
|
+
results.push({
|
|
105
|
+
url,
|
|
106
|
+
source: "html",
|
|
107
|
+
context: url
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
if (includeBase64) {
|
|
113
|
+
const dataUrlPattern = /data:image\/[^;]+;base64,[A-Za-z0-9+/=]+/g;
|
|
114
|
+
let match;
|
|
115
|
+
while ((match = dataUrlPattern.exec(text)) !== null) {
|
|
116
|
+
const url = match[0];
|
|
117
|
+
if (!seen.has(url)) {
|
|
118
|
+
seen.add(url);
|
|
119
|
+
results.push({
|
|
120
|
+
url,
|
|
121
|
+
source: "base64"
|
|
122
|
+
});
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
return results;
|
|
127
|
+
}
|
|
128
|
+
function extractRemoteImageUrls(text) {
|
|
129
|
+
const all = extractImageUrls(text, { includeBase64: false });
|
|
130
|
+
return all.filter((img) => /^https?:\/\//i.test(img.url));
|
|
131
|
+
}
|
|
132
|
+
function extractDataUrls(text) {
|
|
133
|
+
return extractImageUrls(text, {
|
|
134
|
+
includeMarkdown: false,
|
|
135
|
+
includeHTML: false,
|
|
136
|
+
includeBase64: true
|
|
137
|
+
});
|
|
138
|
+
}
|
|
139
|
+
function countImages(text) {
|
|
140
|
+
return extractImageUrls(text).length;
|
|
141
|
+
}
|
|
142
|
+
var DEFAULT_SSRF_POLICY = {
|
|
143
|
+
allowPrivateIPs: false,
|
|
144
|
+
allowLocalhost: false,
|
|
145
|
+
blockedDomains: [],
|
|
146
|
+
allowedDomains: [],
|
|
147
|
+
allowDataUrls: false
|
|
148
|
+
};
|
|
149
|
+
function isURLSafe(url, policy = {}) {
|
|
150
|
+
const config = { ...DEFAULT_SSRF_POLICY, ...policy };
|
|
151
|
+
let parsed;
|
|
152
|
+
try {
|
|
153
|
+
parsed = new URL(url);
|
|
154
|
+
} catch {
|
|
155
|
+
return { safe: false, reason: "Invalid URL format" };
|
|
156
|
+
}
|
|
157
|
+
if (parsed.protocol === "data:") {
|
|
158
|
+
if (!config.allowDataUrls) {
|
|
159
|
+
return { safe: false, reason: "Data URLs not allowed" };
|
|
160
|
+
}
|
|
161
|
+
return { safe: true };
|
|
162
|
+
}
|
|
163
|
+
if (!["http:", "https:"].includes(parsed.protocol)) {
|
|
164
|
+
return {
|
|
165
|
+
safe: false,
|
|
166
|
+
reason: `Protocol '${parsed.protocol}' not allowed (only http/https)`
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
const hostname = parsed.hostname.toLowerCase();
|
|
170
|
+
if (config.allowedDomains.length > 0) {
|
|
171
|
+
const isAllowed = config.allowedDomains.some(
|
|
172
|
+
(pattern) => matchDomainPattern(hostname, pattern)
|
|
173
|
+
);
|
|
174
|
+
if (!isAllowed) {
|
|
175
|
+
return {
|
|
176
|
+
safe: false,
|
|
177
|
+
reason: `Domain '${hostname}' not in allowed list`
|
|
178
|
+
};
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
if (config.blockedDomains.length > 0) {
|
|
182
|
+
const isBlocked = config.blockedDomains.some(
|
|
183
|
+
(pattern) => matchDomainPattern(hostname, pattern)
|
|
184
|
+
);
|
|
185
|
+
if (isBlocked) {
|
|
186
|
+
return {
|
|
187
|
+
safe: false,
|
|
188
|
+
reason: `Domain '${hostname}' is blocked`
|
|
189
|
+
};
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
if (isLocalhost(hostname) && !config.allowLocalhost) {
|
|
193
|
+
return { safe: false, reason: "Localhost not allowed" };
|
|
194
|
+
}
|
|
195
|
+
if (isPrivateIP(hostname) && !config.allowPrivateIPs) {
|
|
196
|
+
return { safe: false, reason: "Private IP addresses not allowed" };
|
|
197
|
+
}
|
|
198
|
+
return { safe: true };
|
|
199
|
+
}
|
|
200
|
+
function isSafeForFetch(url) {
|
|
201
|
+
return isURLSafe(url).safe;
|
|
202
|
+
}
|
|
203
|
+
function matchDomainPattern(domain, pattern) {
|
|
204
|
+
if (domain === pattern) return true;
|
|
205
|
+
if (pattern.startsWith("*.")) {
|
|
206
|
+
const suffix = pattern.slice(2);
|
|
207
|
+
return domain.endsWith("." + suffix);
|
|
208
|
+
}
|
|
209
|
+
return false;
|
|
210
|
+
}
|
|
211
|
+
function isLocalhost(hostname) {
|
|
212
|
+
const lower = hostname.toLowerCase();
|
|
213
|
+
if (lower === "localhost" || lower === "0.0.0.0" || lower.startsWith("127.")) {
|
|
214
|
+
return true;
|
|
215
|
+
}
|
|
216
|
+
if (lower === "::1" || lower === "[::1]") {
|
|
217
|
+
return true;
|
|
218
|
+
}
|
|
219
|
+
return false;
|
|
220
|
+
}
|
|
221
|
+
function isPrivateIP(hostname) {
|
|
222
|
+
const lower = hostname.toLowerCase();
|
|
223
|
+
const parts = hostname.split(".").map(Number);
|
|
224
|
+
if (parts.length === 4 && parts.every((p) => p >= 0 && p <= 255 && !isNaN(p))) {
|
|
225
|
+
const [octet1, octet2] = parts;
|
|
226
|
+
if (octet1 === 10) return true;
|
|
227
|
+
if (octet1 === 172 && octet2 !== void 0 && octet2 >= 16 && octet2 <= 31)
|
|
228
|
+
return true;
|
|
229
|
+
if (octet1 === 192 && octet2 === 168) return true;
|
|
230
|
+
if (octet1 === 169 && octet2 === 254) return true;
|
|
231
|
+
}
|
|
232
|
+
const cleanedHostname = lower.replace(/^\[|\]$/g, "");
|
|
233
|
+
const ipv6Patterns = [
|
|
234
|
+
/^f[cd][0-9a-f]{2}:/i,
|
|
235
|
+
// fc00::/7 (includes both fc and fd ranges)
|
|
236
|
+
/^fe80:/i
|
|
237
|
+
// fe80::/10 (link-local)
|
|
238
|
+
];
|
|
239
|
+
for (const pattern of ipv6Patterns) {
|
|
240
|
+
if (pattern.test(cleanedHostname)) {
|
|
241
|
+
return true;
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
return false;
|
|
245
|
+
}
|
|
246
|
+
function filterSafeUrls(urls, policy) {
|
|
247
|
+
return urls.filter((url) => isURLSafe(url, policy).safe);
|
|
248
|
+
}
|
|
249
|
+
//# sourceMappingURL=image-utils.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/image-utils.ts","../../image-utils/src/extraction.ts","../../image-utils/src/ssrf.ts"],"sourcesContent":["export * from '@character-foundry/image-utils';\n","/**\n * Image URL Extraction\n *\n * Extracts image URLs from markdown, HTML, and base64 data URLs in text.\n * Used across Federation, Archive, and Architect for image processing.\n */\n\nexport interface ImageExtractionOptions {\n /** Include markdown image syntax `` (default: true) */\n includeMarkdown?: boolean;\n /** Include HTML img tags `<img src=\"url\">` (default: true) */\n includeHTML?: boolean;\n /** Include base64 data URLs `data:image/...;base64,...` (default: true) */\n includeBase64?: boolean;\n}\n\nexport interface ExtractedImage {\n /** The extracted URL or data URL */\n url: string;\n /** Source format where the image was found */\n source: 'markdown' | 'html' | 'base64';\n /** Surrounding context for debugging (optional) */\n context?: string;\n}\n\n/**\n * Extract all image URLs from text.\n *\n * Supports markdown, HTML, and base64 data URLs.\n * Returns deduplicated results.\n *\n * @param text - Text to extract image URLs from\n * @param options - Extraction options\n * @returns Array of extracted images with source information\n *\n * @example\n * ```typescript\n * const text = 'Check out  and <img src=\"banner.jpg\">';\n * const images = extractImageUrls(text);\n * // [\n * // { url: 'avatar.png', source: 'markdown' },\n * // { url: 'banner.jpg', source: 'html' }\n * // ]\n * ```\n */\nexport function extractImageUrls(\n text: string,\n options: ImageExtractionOptions = {},\n): ExtractedImage[] {\n const {\n includeMarkdown = true,\n includeHTML = true,\n includeBase64 = true,\n } = options;\n\n const results: ExtractedImage[] = [];\n const seen = new Set<string>();\n\n // Extract markdown images:  or  or \n // Supports: standard, angle brackets, and SillyTavern dimension syntax\n if (includeMarkdown) {\n const markdownPattern = /!\\[([^\\]]*)\\]\\(<?([^>\\s)]+)>?(?:\\s*=[^)]+)?\\)/g;\n let match: RegExpExecArray | null;\n\n while ((match = markdownPattern.exec(text)) !== null) {\n const url = match[2]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'markdown',\n context: match[0],\n });\n }\n }\n }\n\n // Extract HTML img tags: <img src=\"url\"> (quoted and unquoted)\n if (includeHTML) {\n // Quoted src\n const htmlQuotedPattern = /<img[^>]+src=[\"']([^\"']+)[\"'][^>]*>/gi;\n let match: RegExpExecArray | null;\n\n while ((match = htmlQuotedPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n\n // Unquoted src (e.g., <img src=url>)\n const htmlUnquotedPattern = /<img[^>]+src=([^\\s\"'>]+)[^>]*>/gi;\n while ((match = htmlUnquotedPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n // Skip if it starts with a quote (already handled above)\n if (url && !url.startsWith('\"') && !url.startsWith(\"'\") && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n\n // CSS url() function: url(url), url(\"url\"), url('url')\n // Common in background-image, background, content properties\n const cssUrlPattern = /url\\([\"']?([^)\"']+)[\"']?\\)/gi;\n while ((match = cssUrlPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n // Only include http(s) URLs to avoid CSS variables and relative paths\n if (url && (url.startsWith('http://') || url.startsWith('https://')) && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n }\n\n // Extract plain image URLs (not wrapped in any syntax)\n if (includeHTML) {\n // Plain URLs with image extensions (common on image hosts)\n const plainUrlPattern = /(?<![(\"'])(https?:\\/\\/[^\\s<>\"']+\\.(?:jpg|jpeg|png|gif|webp|svg|avif|bmp))(?![)\"'])/gi;\n let match: RegExpExecArray | null;\n\n while ((match = plainUrlPattern.exec(text)) !== null) {\n const url = match[0]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: url,\n });\n }\n }\n }\n\n // Extract base64 data URLs: data:image/...;base64,...\n if (includeBase64) {\n const dataUrlPattern = /data:image\\/[^;]+;base64,[A-Za-z0-9+/=]+/g;\n let match: RegExpExecArray | null;\n\n while ((match = dataUrlPattern.exec(text)) !== null) {\n const url = match[0];\n if (!seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'base64',\n });\n }\n }\n }\n\n return results;\n}\n\n/**\n * Extract only HTTP/HTTPS URLs (excludes data URLs and relative paths).\n *\n * Convenience wrapper around extractImageUrls that filters for remote URLs.\n *\n * @param text - Text to extract URLs from\n * @returns Array of extracted HTTP/HTTPS image URLs\n */\nexport function extractRemoteImageUrls(text: string): ExtractedImage[] {\n const all = extractImageUrls(text, { includeBase64: false });\n return all.filter((img) => /^https?:\\/\\//i.test(img.url));\n}\n\n/**\n * Extract only base64 data URLs.\n *\n * Convenience wrapper around extractImageUrls.\n *\n * @param text - Text to extract data URLs from\n * @returns Array of base64 image data URLs\n */\nexport function extractDataUrls(text: string): ExtractedImage[] {\n return extractImageUrls(text, {\n includeMarkdown: false,\n includeHTML: false,\n includeBase64: true,\n });\n}\n\n/**\n * Count images in text without extracting full details.\n *\n * More efficient than extractImageUrls when you only need the count.\n *\n * @param text - Text to count images in\n * @returns Total number of unique image references\n */\nexport function countImages(text: string): number {\n return extractImageUrls(text).length;\n}\n","/**\n * SSRF (Server-Side Request Forgery) Protection\n *\n * Validates URLs to prevent SSRF attacks.\n * Browser-safe implementation (no Node.js dependencies).\n */\n\nexport interface SSRFPolicy {\n /** Allow private IP addresses (10.x, 172.16-31.x, 192.168.x) (default: false) */\n allowPrivateIPs?: boolean;\n /** Allow localhost/loopback (127.x, ::1) (default: false) */\n allowLocalhost?: boolean;\n /** Blocked domain patterns (e.g., ['internal.company.com', '*.local']) */\n blockedDomains?: string[];\n /** Allowed domain patterns - if provided, ONLY these are allowed */\n allowedDomains?: string[];\n /** Allow data URLs (data:image/...) (default: false) */\n allowDataUrls?: boolean;\n}\n\nexport interface SafetyCheck {\n /** Whether the URL is safe according to policy */\n safe: boolean;\n /** Reason why URL is unsafe (if safe=false) */\n reason?: string;\n}\n\n/**\n * Default SSRF policy - blocks private IPs, localhost, and data URLs.\n */\nexport const DEFAULT_SSRF_POLICY: Required<SSRFPolicy> = {\n allowPrivateIPs: false,\n allowLocalhost: false,\n blockedDomains: [],\n allowedDomains: [],\n allowDataUrls: false,\n};\n\n/**\n * Check if URL is safe to fetch according to SSRF policy.\n *\n * This is the canonical SSRF protection - all apps should use this\n * before fetching external URLs.\n *\n * @param url - URL to validate\n * @param policy - SSRF policy (uses defaults if not provided)\n * @returns Safety check result with reason if unsafe\n *\n * @example\n * ```typescript\n * const check = isURLSafe('http://10.0.0.1/secret');\n * if (!check.safe) {\n * console.error('Unsafe URL:', check.reason);\n * }\n * ```\n */\nexport function isURLSafe(\n url: string,\n policy: SSRFPolicy = {},\n): SafetyCheck {\n const config = { ...DEFAULT_SSRF_POLICY, ...policy };\n\n // Parse URL\n let parsed: URL;\n try {\n parsed = new URL(url);\n } catch {\n return { safe: false, reason: 'Invalid URL format' };\n }\n\n // Check protocol\n if (parsed.protocol === 'data:') {\n if (!config.allowDataUrls) {\n return { safe: false, reason: 'Data URLs not allowed' };\n }\n return { safe: true };\n }\n\n if (!['http:', 'https:'].includes(parsed.protocol)) {\n return {\n safe: false,\n reason: `Protocol '${parsed.protocol}' not allowed (only http/https)`,\n };\n }\n\n const hostname = parsed.hostname.toLowerCase();\n\n // Check allowed domains (whitelist)\n if (config.allowedDomains.length > 0) {\n const isAllowed = config.allowedDomains.some((pattern) =>\n matchDomainPattern(hostname, pattern),\n );\n if (!isAllowed) {\n return {\n safe: false,\n reason: `Domain '${hostname}' not in allowed list`,\n };\n }\n }\n\n // Check blocked domains (blacklist)\n if (config.blockedDomains.length > 0) {\n const isBlocked = config.blockedDomains.some((pattern) =>\n matchDomainPattern(hostname, pattern),\n );\n if (isBlocked) {\n return {\n safe: false,\n reason: `Domain '${hostname}' is blocked`,\n };\n }\n }\n\n // Check localhost\n if (isLocalhost(hostname) && !config.allowLocalhost) {\n return { safe: false, reason: 'Localhost not allowed' };\n }\n\n // Check private IPs\n if (isPrivateIP(hostname) && !config.allowPrivateIPs) {\n return { safe: false, reason: 'Private IP addresses not allowed' };\n }\n\n return { safe: true };\n}\n\n/**\n * Quick check if URL is safe with default policy.\n *\n * Convenience wrapper for common case.\n *\n * @param url - URL to validate\n * @returns true if safe, false otherwise\n */\nexport function isSafeForFetch(url: string): boolean {\n return isURLSafe(url).safe;\n}\n\n/**\n * Match domain against pattern (supports wildcards).\n *\n * @example\n * ```typescript\n * matchDomainPattern('api.github.com', '*.github.com') // true\n * matchDomainPattern('github.com', '*.github.com') // false\n * matchDomainPattern('github.com', 'github.com') // true\n * ```\n */\nfunction matchDomainPattern(domain: string, pattern: string): boolean {\n // Exact match\n if (domain === pattern) return true;\n\n // Wildcard match - only matches subdomains, not the base domain\n if (pattern.startsWith('*.')) {\n const suffix = pattern.slice(2);\n return domain.endsWith('.' + suffix);\n }\n\n return false;\n}\n\n/**\n * Check if hostname is localhost/loopback.\n */\nfunction isLocalhost(hostname: string): boolean {\n const lower = hostname.toLowerCase();\n\n // IPv4 loopback\n if (lower === 'localhost' || lower === '0.0.0.0' || lower.startsWith('127.')) {\n return true;\n }\n\n // IPv6 loopback\n if (lower === '::1' || lower === '[::1]') {\n return true;\n }\n\n return false;\n}\n\n/**\n * Check if hostname is a private IP address.\n *\n * Checks for:\n * - 10.0.0.0/8\n * - 172.16.0.0/12\n * - 192.168.0.0/16\n * - Link-local: 169.254.0.0/16 (includes AWS metadata endpoint)\n * - IPv6 private ranges\n */\nfunction isPrivateIP(hostname: string): boolean {\n const lower = hostname.toLowerCase();\n\n // IPv4 private ranges\n const parts = hostname.split('.').map(Number);\n\n if (\n parts.length === 4 &&\n parts.every((p) => p >= 0 && p <= 255 && !isNaN(p))\n ) {\n const [octet1, octet2] = parts;\n\n // 10.0.0.0/8\n if (octet1 === 10) return true;\n\n // 172.16.0.0/12\n if (octet1 === 172 && octet2 !== undefined && octet2 >= 16 && octet2 <= 31)\n return true;\n\n // 192.168.0.0/16\n if (octet1 === 192 && octet2 === 168) return true;\n\n // 169.254.0.0/16 (link-local) - includes AWS metadata endpoint\n if (octet1 === 169 && octet2 === 254) return true;\n }\n\n // IPv6 private ranges using regex patterns\n // Strip brackets if present (URL parser adds them: [fc00::1])\n const cleanedHostname = lower.replace(/^\\[|\\]$/g, '');\n\n const ipv6Patterns = [\n /^f[cd][0-9a-f]{2}:/i, // fc00::/7 (includes both fc and fd ranges)\n /^fe80:/i, // fe80::/10 (link-local)\n ];\n\n for (const pattern of ipv6Patterns) {\n if (pattern.test(cleanedHostname)) {\n return true;\n }\n }\n\n return false;\n}\n\n/**\n * Filter array of URLs, keeping only safe ones.\n *\n * @param urls - Array of URLs to filter\n * @param policy - SSRF policy\n * @returns Array of safe URLs\n */\nexport function filterSafeUrls(\n urls: string[],\n policy?: SSRFPolicy,\n): string[] {\n return urls.filter((url) => isURLSafe(url, policy).safe);\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;AC6CO,SAAS,iBACd,MACA,UAAkC,CAAC,GACjB;AAClB,QAAM;IACJ,kBAAkB;IAClB,cAAc;IACd,gBAAgB;EAClB,IAAI;AAEJ,QAAM,UAA4B,CAAC;AACnC,QAAM,OAAO,oBAAI,IAAY;AAI7B,MAAI,iBAAiB;AACnB,UAAM,kBAAkB;AACxB,QAAI;AAEJ,YAAQ,QAAQ,gBAAgB,KAAK,IAAI,OAAO,MAAM;AACpD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;EACF;AAGA,MAAI,aAAa;AAEf,UAAM,oBAAoB;AAC1B,QAAI;AAEJ,YAAQ,QAAQ,kBAAkB,KAAK,IAAI,OAAO,MAAM;AACtD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;AAGA,UAAM,sBAAsB;AAC5B,YAAQ,QAAQ,oBAAoB,KAAK,IAAI,OAAO,MAAM;AACxD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAE3B,UAAI,OAAO,CAAC,IAAI,WAAW,GAAG,KAAK,CAAC,IAAI,WAAW,GAAG,KAAK,CAAC,KAAK,IAAI,GAAG,GAAG;AACzE,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;AAIA,UAAM,gBAAgB;AACtB,YAAQ,QAAQ,cAAc,KAAK,IAAI,OAAO,MAAM;AAClD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAE3B,UAAI,QAAQ,IAAI,WAAW,SAAS,KAAK,IAAI,WAAW,UAAU,MAAM,CAAC,KAAK,IAAI,GAAG,GAAG;AACtF,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;EACF;AAGA,MAAI,aAAa;AAEf,UAAM,kBAAkB;AACxB,QAAI;AAEJ,YAAQ,QAAQ,gBAAgB,KAAK,IAAI,OAAO,MAAM;AACpD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS;QACX,CAAC;MACH;IACF;EACF;AAGA,MAAI,eAAe;AACjB,UAAM,iBAAiB;AACvB,QAAI;AAEJ,YAAQ,QAAQ,eAAe,KAAK,IAAI,OAAO,MAAM;AACnD,YAAM,MAAM,MAAM,CAAC;AACnB,UAAI,CAAC,KAAK,IAAI,GAAG,GAAG;AAClB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;QACV,CAAC;MACH;IACF;EACF;AAEA,SAAO;AACT;AAUO,SAAS,uBAAuB,MAAgC;AACrE,QAAM,MAAM,iBAAiB,MAAM,EAAE,eAAe,MAAM,CAAC;AAC3D,SAAO,IAAI,OAAO,CAAC,QAAQ,gBAAgB,KAAK,IAAI,GAAG,CAAC;AAC1D;AAUO,SAAS,gBAAgB,MAAgC;AAC9D,SAAO,iBAAiB,MAAM;IAC5B,iBAAiB;IACjB,aAAa;IACb,eAAe;EACjB,CAAC;AACH;AAUO,SAAS,YAAY,MAAsB;AAChD,SAAO,iBAAiB,IAAI,EAAE;AAChC;AC/KO,IAAM,sBAA4C;EACvD,iBAAiB;EACjB,gBAAgB;EAChB,gBAAgB,CAAC;EACjB,gBAAgB,CAAC;EACjB,eAAe;AACjB;AAoBO,SAAS,UACd,KACA,SAAqB,CAAC,GACT;AACb,QAAM,SAAS,EAAE,GAAG,qBAAqB,GAAG,OAAO;AAGnD,MAAI;AACJ,MAAI;AACF,aAAS,IAAI,IAAI,GAAG;EACtB,QAAQ;AACN,WAAO,EAAE,MAAM,OAAO,QAAQ,qBAAqB;EACrD;AAGA,MAAI,OAAO,aAAa,SAAS;AAC/B,QAAI,CAAC,OAAO,eAAe;AACzB,aAAO,EAAE,MAAM,OAAO,QAAQ,wBAAwB;IACxD;AACA,WAAO,EAAE,MAAM,KAAK;EACtB;AAEA,MAAI,CAAC,CAAC,SAAS,QAAQ,EAAE,SAAS,OAAO,QAAQ,GAAG;AAClD,WAAO;MACL,MAAM;MACN,QAAQ,aAAa,OAAO,QAAQ;IACtC;EACF;AAEA,QAAM,WAAW,OAAO,SAAS,YAAY;AAG7C,MAAI,OAAO,eAAe,SAAS,GAAG;AACpC,UAAM,YAAY,OAAO,eAAe;MAAK,CAAC,YAC5C,mBAAmB,UAAU,OAAO;IACtC;AACA,QAAI,CAAC,WAAW;AACd,aAAO;QACL,MAAM;QACN,QAAQ,WAAW,QAAQ;MAC7B;IACF;EACF;AAGA,MAAI,OAAO,eAAe,SAAS,GAAG;AACpC,UAAM,YAAY,OAAO,eAAe;MAAK,CAAC,YAC5C,mBAAmB,UAAU,OAAO;IACtC;AACA,QAAI,WAAW;AACb,aAAO;QACL,MAAM;QACN,QAAQ,WAAW,QAAQ;MAC7B;IACF;EACF;AAGA,MAAI,YAAY,QAAQ,KAAK,CAAC,OAAO,gBAAgB;AACnD,WAAO,EAAE,MAAM,OAAO,QAAQ,wBAAwB;EACxD;AAGA,MAAI,YAAY,QAAQ,KAAK,CAAC,OAAO,iBAAiB;AACpD,WAAO,EAAE,MAAM,OAAO,QAAQ,mCAAmC;EACnE;AAEA,SAAO,EAAE,MAAM,KAAK;AACtB;AAUO,SAAS,eAAe,KAAsB;AACnD,SAAO,UAAU,GAAG,EAAE;AACxB;AAYA,SAAS,mBAAmB,QAAgB,SAA0B;AAEpE,MAAI,WAAW,QAAS,QAAO;AAG/B,MAAI,QAAQ,WAAW,IAAI,GAAG;AAC5B,UAAM,SAAS,QAAQ,MAAM,CAAC;AAC9B,WAAO,OAAO,SAAS,MAAM,MAAM;EACrC;AAEA,SAAO;AACT;AAKA,SAAS,YAAY,UAA2B;AAC9C,QAAM,QAAQ,SAAS,YAAY;AAGnC,MAAI,UAAU,eAAe,UAAU,aAAa,MAAM,WAAW,MAAM,GAAG;AAC5E,WAAO;EACT;AAGA,MAAI,UAAU,SAAS,UAAU,SAAS;AACxC,WAAO;EACT;AAEA,SAAO;AACT;AAYA,SAAS,YAAY,UAA2B;AAC9C,QAAM,QAAQ,SAAS,YAAY;AAGnC,QAAM,QAAQ,SAAS,MAAM,GAAG,EAAE,IAAI,MAAM;AAE5C,MACE,MAAM,WAAW,KACjB,MAAM,MAAM,CAAC,MAAM,KAAK,KAAK,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC,GAClD;AACA,UAAM,CAAC,QAAQ,MAAM,IAAI;AAGzB,QAAI,WAAW,GAAI,QAAO;AAG1B,QAAI,WAAW,OAAO,WAAW,UAAa,UAAU,MAAM,UAAU;AACtE,aAAO;AAGT,QAAI,WAAW,OAAO,WAAW,IAAK,QAAO;AAG7C,QAAI,WAAW,OAAO,WAAW,IAAK,QAAO;EAC/C;AAIA,QAAM,kBAAkB,MAAM,QAAQ,YAAY,EAAE;AAEpD,QAAM,eAAe;IACnB;;IACA;;EACF;AAEA,aAAW,WAAW,cAAc;AAClC,QAAI,QAAQ,KAAK,eAAe,GAAG;AACjC,aAAO;IACT;EACF;AAEA,SAAO;AACT;AASO,SAAS,eACd,MACA,QACU;AACV,SAAO,KAAK,OAAO,CAAC,QAAQ,UAAU,KAAK,MAAM,EAAE,IAAI;AACzD;","names":[]}
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Image URL Extraction
|
|
3
|
+
*
|
|
4
|
+
* Extracts image URLs from markdown, HTML, and base64 data URLs in text.
|
|
5
|
+
* Used across Federation, Archive, and Architect for image processing.
|
|
6
|
+
*/
|
|
7
|
+
export interface ImageExtractionOptions {
|
|
8
|
+
/** Include markdown image syntax `` (default: true) */
|
|
9
|
+
includeMarkdown?: boolean;
|
|
10
|
+
/** Include HTML img tags `<img src="url">` (default: true) */
|
|
11
|
+
includeHTML?: boolean;
|
|
12
|
+
/** Include base64 data URLs `data:image/...;base64,...` (default: true) */
|
|
13
|
+
includeBase64?: boolean;
|
|
14
|
+
}
|
|
15
|
+
export interface ExtractedImage {
|
|
16
|
+
/** The extracted URL or data URL */
|
|
17
|
+
url: string;
|
|
18
|
+
/** Source format where the image was found */
|
|
19
|
+
source: "markdown" | "html" | "base64";
|
|
20
|
+
/** Surrounding context for debugging (optional) */
|
|
21
|
+
context?: string;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Extract all image URLs from text.
|
|
25
|
+
*
|
|
26
|
+
* Supports markdown, HTML, and base64 data URLs.
|
|
27
|
+
* Returns deduplicated results.
|
|
28
|
+
*
|
|
29
|
+
* @param text - Text to extract image URLs from
|
|
30
|
+
* @param options - Extraction options
|
|
31
|
+
* @returns Array of extracted images with source information
|
|
32
|
+
*
|
|
33
|
+
* @example
|
|
34
|
+
* ```typescript
|
|
35
|
+
* const text = 'Check out  and <img src="banner.jpg">';
|
|
36
|
+
* const images = extractImageUrls(text);
|
|
37
|
+
* // [
|
|
38
|
+
* // { url: 'avatar.png', source: 'markdown' },
|
|
39
|
+
* // { url: 'banner.jpg', source: 'html' }
|
|
40
|
+
* // ]
|
|
41
|
+
* ```
|
|
42
|
+
*/
|
|
43
|
+
export declare function extractImageUrls(text: string, options?: ImageExtractionOptions): ExtractedImage[];
|
|
44
|
+
/**
|
|
45
|
+
* Extract only HTTP/HTTPS URLs (excludes data URLs and relative paths).
|
|
46
|
+
*
|
|
47
|
+
* Convenience wrapper around extractImageUrls that filters for remote URLs.
|
|
48
|
+
*
|
|
49
|
+
* @param text - Text to extract URLs from
|
|
50
|
+
* @returns Array of extracted HTTP/HTTPS image URLs
|
|
51
|
+
*/
|
|
52
|
+
export declare function extractRemoteImageUrls(text: string): ExtractedImage[];
|
|
53
|
+
/**
|
|
54
|
+
* Extract only base64 data URLs.
|
|
55
|
+
*
|
|
56
|
+
* Convenience wrapper around extractImageUrls.
|
|
57
|
+
*
|
|
58
|
+
* @param text - Text to extract data URLs from
|
|
59
|
+
* @returns Array of base64 image data URLs
|
|
60
|
+
*/
|
|
61
|
+
export declare function extractDataUrls(text: string): ExtractedImage[];
|
|
62
|
+
/**
|
|
63
|
+
* Count images in text without extracting full details.
|
|
64
|
+
*
|
|
65
|
+
* More efficient than extractImageUrls when you only need the count.
|
|
66
|
+
*
|
|
67
|
+
* @param text - Text to count images in
|
|
68
|
+
* @returns Total number of unique image references
|
|
69
|
+
*/
|
|
70
|
+
export declare function countImages(text: string): number;
|
|
71
|
+
/**
|
|
72
|
+
* SSRF (Server-Side Request Forgery) Protection
|
|
73
|
+
*
|
|
74
|
+
* Validates URLs to prevent SSRF attacks.
|
|
75
|
+
* Browser-safe implementation (no Node.js dependencies).
|
|
76
|
+
*/
|
|
77
|
+
export interface SSRFPolicy {
|
|
78
|
+
/** Allow private IP addresses (10.x, 172.16-31.x, 192.168.x) (default: false) */
|
|
79
|
+
allowPrivateIPs?: boolean;
|
|
80
|
+
/** Allow localhost/loopback (127.x, ::1) (default: false) */
|
|
81
|
+
allowLocalhost?: boolean;
|
|
82
|
+
/** Blocked domain patterns (e.g., ['internal.company.com', '*.local']) */
|
|
83
|
+
blockedDomains?: string[];
|
|
84
|
+
/** Allowed domain patterns - if provided, ONLY these are allowed */
|
|
85
|
+
allowedDomains?: string[];
|
|
86
|
+
/** Allow data URLs (data:image/...) (default: false) */
|
|
87
|
+
allowDataUrls?: boolean;
|
|
88
|
+
}
|
|
89
|
+
export interface SafetyCheck {
|
|
90
|
+
/** Whether the URL is safe according to policy */
|
|
91
|
+
safe: boolean;
|
|
92
|
+
/** Reason why URL is unsafe (if safe=false) */
|
|
93
|
+
reason?: string;
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* Default SSRF policy - blocks private IPs, localhost, and data URLs.
|
|
97
|
+
*/
|
|
98
|
+
export declare const DEFAULT_SSRF_POLICY: Required<SSRFPolicy>;
|
|
99
|
+
/**
|
|
100
|
+
* Check if URL is safe to fetch according to SSRF policy.
|
|
101
|
+
*
|
|
102
|
+
* This is the canonical SSRF protection - all apps should use this
|
|
103
|
+
* before fetching external URLs.
|
|
104
|
+
*
|
|
105
|
+
* @param url - URL to validate
|
|
106
|
+
* @param policy - SSRF policy (uses defaults if not provided)
|
|
107
|
+
* @returns Safety check result with reason if unsafe
|
|
108
|
+
*
|
|
109
|
+
* @example
|
|
110
|
+
* ```typescript
|
|
111
|
+
* const check = isURLSafe('http://10.0.0.1/secret');
|
|
112
|
+
* if (!check.safe) {
|
|
113
|
+
* console.error('Unsafe URL:', check.reason);
|
|
114
|
+
* }
|
|
115
|
+
* ```
|
|
116
|
+
*/
|
|
117
|
+
export declare function isURLSafe(url: string, policy?: SSRFPolicy): SafetyCheck;
|
|
118
|
+
/**
|
|
119
|
+
* Quick check if URL is safe with default policy.
|
|
120
|
+
*
|
|
121
|
+
* Convenience wrapper for common case.
|
|
122
|
+
*
|
|
123
|
+
* @param url - URL to validate
|
|
124
|
+
* @returns true if safe, false otherwise
|
|
125
|
+
*/
|
|
126
|
+
export declare function isSafeForFetch(url: string): boolean;
|
|
127
|
+
/**
|
|
128
|
+
* Filter array of URLs, keeping only safe ones.
|
|
129
|
+
*
|
|
130
|
+
* @param urls - Array of URLs to filter
|
|
131
|
+
* @param policy - SSRF policy
|
|
132
|
+
* @returns Array of safe URLs
|
|
133
|
+
*/
|
|
134
|
+
export declare function filterSafeUrls(urls: string[], policy?: SSRFPolicy): string[];
|
|
135
|
+
|
|
136
|
+
export {};
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Image URL Extraction
|
|
3
|
+
*
|
|
4
|
+
* Extracts image URLs from markdown, HTML, and base64 data URLs in text.
|
|
5
|
+
* Used across Federation, Archive, and Architect for image processing.
|
|
6
|
+
*/
|
|
7
|
+
export interface ImageExtractionOptions {
|
|
8
|
+
/** Include markdown image syntax `` (default: true) */
|
|
9
|
+
includeMarkdown?: boolean;
|
|
10
|
+
/** Include HTML img tags `<img src="url">` (default: true) */
|
|
11
|
+
includeHTML?: boolean;
|
|
12
|
+
/** Include base64 data URLs `data:image/...;base64,...` (default: true) */
|
|
13
|
+
includeBase64?: boolean;
|
|
14
|
+
}
|
|
15
|
+
export interface ExtractedImage {
|
|
16
|
+
/** The extracted URL or data URL */
|
|
17
|
+
url: string;
|
|
18
|
+
/** Source format where the image was found */
|
|
19
|
+
source: "markdown" | "html" | "base64";
|
|
20
|
+
/** Surrounding context for debugging (optional) */
|
|
21
|
+
context?: string;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Extract all image URLs from text.
|
|
25
|
+
*
|
|
26
|
+
* Supports markdown, HTML, and base64 data URLs.
|
|
27
|
+
* Returns deduplicated results.
|
|
28
|
+
*
|
|
29
|
+
* @param text - Text to extract image URLs from
|
|
30
|
+
* @param options - Extraction options
|
|
31
|
+
* @returns Array of extracted images with source information
|
|
32
|
+
*
|
|
33
|
+
* @example
|
|
34
|
+
* ```typescript
|
|
35
|
+
* const text = 'Check out  and <img src="banner.jpg">';
|
|
36
|
+
* const images = extractImageUrls(text);
|
|
37
|
+
* // [
|
|
38
|
+
* // { url: 'avatar.png', source: 'markdown' },
|
|
39
|
+
* // { url: 'banner.jpg', source: 'html' }
|
|
40
|
+
* // ]
|
|
41
|
+
* ```
|
|
42
|
+
*/
|
|
43
|
+
export declare function extractImageUrls(text: string, options?: ImageExtractionOptions): ExtractedImage[];
|
|
44
|
+
/**
|
|
45
|
+
* Extract only HTTP/HTTPS URLs (excludes data URLs and relative paths).
|
|
46
|
+
*
|
|
47
|
+
* Convenience wrapper around extractImageUrls that filters for remote URLs.
|
|
48
|
+
*
|
|
49
|
+
* @param text - Text to extract URLs from
|
|
50
|
+
* @returns Array of extracted HTTP/HTTPS image URLs
|
|
51
|
+
*/
|
|
52
|
+
export declare function extractRemoteImageUrls(text: string): ExtractedImage[];
|
|
53
|
+
/**
|
|
54
|
+
* Extract only base64 data URLs.
|
|
55
|
+
*
|
|
56
|
+
* Convenience wrapper around extractImageUrls.
|
|
57
|
+
*
|
|
58
|
+
* @param text - Text to extract data URLs from
|
|
59
|
+
* @returns Array of base64 image data URLs
|
|
60
|
+
*/
|
|
61
|
+
export declare function extractDataUrls(text: string): ExtractedImage[];
|
|
62
|
+
/**
|
|
63
|
+
* Count images in text without extracting full details.
|
|
64
|
+
*
|
|
65
|
+
* More efficient than extractImageUrls when you only need the count.
|
|
66
|
+
*
|
|
67
|
+
* @param text - Text to count images in
|
|
68
|
+
* @returns Total number of unique image references
|
|
69
|
+
*/
|
|
70
|
+
export declare function countImages(text: string): number;
|
|
71
|
+
/**
|
|
72
|
+
* SSRF (Server-Side Request Forgery) Protection
|
|
73
|
+
*
|
|
74
|
+
* Validates URLs to prevent SSRF attacks.
|
|
75
|
+
* Browser-safe implementation (no Node.js dependencies).
|
|
76
|
+
*/
|
|
77
|
+
export interface SSRFPolicy {
|
|
78
|
+
/** Allow private IP addresses (10.x, 172.16-31.x, 192.168.x) (default: false) */
|
|
79
|
+
allowPrivateIPs?: boolean;
|
|
80
|
+
/** Allow localhost/loopback (127.x, ::1) (default: false) */
|
|
81
|
+
allowLocalhost?: boolean;
|
|
82
|
+
/** Blocked domain patterns (e.g., ['internal.company.com', '*.local']) */
|
|
83
|
+
blockedDomains?: string[];
|
|
84
|
+
/** Allowed domain patterns - if provided, ONLY these are allowed */
|
|
85
|
+
allowedDomains?: string[];
|
|
86
|
+
/** Allow data URLs (data:image/...) (default: false) */
|
|
87
|
+
allowDataUrls?: boolean;
|
|
88
|
+
}
|
|
89
|
+
export interface SafetyCheck {
|
|
90
|
+
/** Whether the URL is safe according to policy */
|
|
91
|
+
safe: boolean;
|
|
92
|
+
/** Reason why URL is unsafe (if safe=false) */
|
|
93
|
+
reason?: string;
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* Default SSRF policy - blocks private IPs, localhost, and data URLs.
|
|
97
|
+
*/
|
|
98
|
+
export declare const DEFAULT_SSRF_POLICY: Required<SSRFPolicy>;
|
|
99
|
+
/**
|
|
100
|
+
* Check if URL is safe to fetch according to SSRF policy.
|
|
101
|
+
*
|
|
102
|
+
* This is the canonical SSRF protection - all apps should use this
|
|
103
|
+
* before fetching external URLs.
|
|
104
|
+
*
|
|
105
|
+
* @param url - URL to validate
|
|
106
|
+
* @param policy - SSRF policy (uses defaults if not provided)
|
|
107
|
+
* @returns Safety check result with reason if unsafe
|
|
108
|
+
*
|
|
109
|
+
* @example
|
|
110
|
+
* ```typescript
|
|
111
|
+
* const check = isURLSafe('http://10.0.0.1/secret');
|
|
112
|
+
* if (!check.safe) {
|
|
113
|
+
* console.error('Unsafe URL:', check.reason);
|
|
114
|
+
* }
|
|
115
|
+
* ```
|
|
116
|
+
*/
|
|
117
|
+
export declare function isURLSafe(url: string, policy?: SSRFPolicy): SafetyCheck;
|
|
118
|
+
/**
|
|
119
|
+
* Quick check if URL is safe with default policy.
|
|
120
|
+
*
|
|
121
|
+
* Convenience wrapper for common case.
|
|
122
|
+
*
|
|
123
|
+
* @param url - URL to validate
|
|
124
|
+
* @returns true if safe, false otherwise
|
|
125
|
+
*/
|
|
126
|
+
export declare function isSafeForFetch(url: string): boolean;
|
|
127
|
+
/**
|
|
128
|
+
* Filter array of URLs, keeping only safe ones.
|
|
129
|
+
*
|
|
130
|
+
* @param urls - Array of URLs to filter
|
|
131
|
+
* @param policy - SSRF policy
|
|
132
|
+
* @returns Array of safe URLs
|
|
133
|
+
*/
|
|
134
|
+
export declare function filterSafeUrls(urls: string[], policy?: SSRFPolicy): string[];
|
|
135
|
+
|
|
136
|
+
export {};
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
// ../image-utils/dist/index.js
|
|
2
|
+
function extractImageUrls(text, options = {}) {
|
|
3
|
+
const {
|
|
4
|
+
includeMarkdown = true,
|
|
5
|
+
includeHTML = true,
|
|
6
|
+
includeBase64 = true
|
|
7
|
+
} = options;
|
|
8
|
+
const results = [];
|
|
9
|
+
const seen = /* @__PURE__ */ new Set();
|
|
10
|
+
if (includeMarkdown) {
|
|
11
|
+
const markdownPattern = /!\[([^\]]*)\]\(<?([^>\s)]+)>?(?:\s*=[^)]+)?\)/g;
|
|
12
|
+
let match;
|
|
13
|
+
while ((match = markdownPattern.exec(text)) !== null) {
|
|
14
|
+
const url = match[2]?.trim();
|
|
15
|
+
if (url && !seen.has(url)) {
|
|
16
|
+
seen.add(url);
|
|
17
|
+
results.push({
|
|
18
|
+
url,
|
|
19
|
+
source: "markdown",
|
|
20
|
+
context: match[0]
|
|
21
|
+
});
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
if (includeHTML) {
|
|
26
|
+
const htmlQuotedPattern = /<img[^>]+src=["']([^"']+)["'][^>]*>/gi;
|
|
27
|
+
let match;
|
|
28
|
+
while ((match = htmlQuotedPattern.exec(text)) !== null) {
|
|
29
|
+
const url = match[1]?.trim();
|
|
30
|
+
if (url && !seen.has(url)) {
|
|
31
|
+
seen.add(url);
|
|
32
|
+
results.push({
|
|
33
|
+
url,
|
|
34
|
+
source: "html",
|
|
35
|
+
context: match[0]
|
|
36
|
+
});
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
const htmlUnquotedPattern = /<img[^>]+src=([^\s"'>]+)[^>]*>/gi;
|
|
40
|
+
while ((match = htmlUnquotedPattern.exec(text)) !== null) {
|
|
41
|
+
const url = match[1]?.trim();
|
|
42
|
+
if (url && !url.startsWith('"') && !url.startsWith("'") && !seen.has(url)) {
|
|
43
|
+
seen.add(url);
|
|
44
|
+
results.push({
|
|
45
|
+
url,
|
|
46
|
+
source: "html",
|
|
47
|
+
context: match[0]
|
|
48
|
+
});
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
const cssUrlPattern = /url\(["']?([^)"']+)["']?\)/gi;
|
|
52
|
+
while ((match = cssUrlPattern.exec(text)) !== null) {
|
|
53
|
+
const url = match[1]?.trim();
|
|
54
|
+
if (url && (url.startsWith("http://") || url.startsWith("https://")) && !seen.has(url)) {
|
|
55
|
+
seen.add(url);
|
|
56
|
+
results.push({
|
|
57
|
+
url,
|
|
58
|
+
source: "html",
|
|
59
|
+
context: match[0]
|
|
60
|
+
});
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
if (includeHTML) {
|
|
65
|
+
const plainUrlPattern = /(?<![("'])(https?:\/\/[^\s<>"']+\.(?:jpg|jpeg|png|gif|webp|svg|avif|bmp))(?![)"'])/gi;
|
|
66
|
+
let match;
|
|
67
|
+
while ((match = plainUrlPattern.exec(text)) !== null) {
|
|
68
|
+
const url = match[0]?.trim();
|
|
69
|
+
if (url && !seen.has(url)) {
|
|
70
|
+
seen.add(url);
|
|
71
|
+
results.push({
|
|
72
|
+
url,
|
|
73
|
+
source: "html",
|
|
74
|
+
context: url
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
if (includeBase64) {
|
|
80
|
+
const dataUrlPattern = /data:image\/[^;]+;base64,[A-Za-z0-9+/=]+/g;
|
|
81
|
+
let match;
|
|
82
|
+
while ((match = dataUrlPattern.exec(text)) !== null) {
|
|
83
|
+
const url = match[0];
|
|
84
|
+
if (!seen.has(url)) {
|
|
85
|
+
seen.add(url);
|
|
86
|
+
results.push({
|
|
87
|
+
url,
|
|
88
|
+
source: "base64"
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
return results;
|
|
94
|
+
}
|
|
95
|
+
function extractRemoteImageUrls(text) {
|
|
96
|
+
const all = extractImageUrls(text, { includeBase64: false });
|
|
97
|
+
return all.filter((img) => /^https?:\/\//i.test(img.url));
|
|
98
|
+
}
|
|
99
|
+
function extractDataUrls(text) {
|
|
100
|
+
return extractImageUrls(text, {
|
|
101
|
+
includeMarkdown: false,
|
|
102
|
+
includeHTML: false,
|
|
103
|
+
includeBase64: true
|
|
104
|
+
});
|
|
105
|
+
}
|
|
106
|
+
function countImages(text) {
|
|
107
|
+
return extractImageUrls(text).length;
|
|
108
|
+
}
|
|
109
|
+
var DEFAULT_SSRF_POLICY = {
|
|
110
|
+
allowPrivateIPs: false,
|
|
111
|
+
allowLocalhost: false,
|
|
112
|
+
blockedDomains: [],
|
|
113
|
+
allowedDomains: [],
|
|
114
|
+
allowDataUrls: false
|
|
115
|
+
};
|
|
116
|
+
function isURLSafe(url, policy = {}) {
|
|
117
|
+
const config = { ...DEFAULT_SSRF_POLICY, ...policy };
|
|
118
|
+
let parsed;
|
|
119
|
+
try {
|
|
120
|
+
parsed = new URL(url);
|
|
121
|
+
} catch {
|
|
122
|
+
return { safe: false, reason: "Invalid URL format" };
|
|
123
|
+
}
|
|
124
|
+
if (parsed.protocol === "data:") {
|
|
125
|
+
if (!config.allowDataUrls) {
|
|
126
|
+
return { safe: false, reason: "Data URLs not allowed" };
|
|
127
|
+
}
|
|
128
|
+
return { safe: true };
|
|
129
|
+
}
|
|
130
|
+
if (!["http:", "https:"].includes(parsed.protocol)) {
|
|
131
|
+
return {
|
|
132
|
+
safe: false,
|
|
133
|
+
reason: `Protocol '${parsed.protocol}' not allowed (only http/https)`
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
const hostname = parsed.hostname.toLowerCase();
|
|
137
|
+
if (config.allowedDomains.length > 0) {
|
|
138
|
+
const isAllowed = config.allowedDomains.some(
|
|
139
|
+
(pattern) => matchDomainPattern(hostname, pattern)
|
|
140
|
+
);
|
|
141
|
+
if (!isAllowed) {
|
|
142
|
+
return {
|
|
143
|
+
safe: false,
|
|
144
|
+
reason: `Domain '${hostname}' not in allowed list`
|
|
145
|
+
};
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
if (config.blockedDomains.length > 0) {
|
|
149
|
+
const isBlocked = config.blockedDomains.some(
|
|
150
|
+
(pattern) => matchDomainPattern(hostname, pattern)
|
|
151
|
+
);
|
|
152
|
+
if (isBlocked) {
|
|
153
|
+
return {
|
|
154
|
+
safe: false,
|
|
155
|
+
reason: `Domain '${hostname}' is blocked`
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
if (isLocalhost(hostname) && !config.allowLocalhost) {
|
|
160
|
+
return { safe: false, reason: "Localhost not allowed" };
|
|
161
|
+
}
|
|
162
|
+
if (isPrivateIP(hostname) && !config.allowPrivateIPs) {
|
|
163
|
+
return { safe: false, reason: "Private IP addresses not allowed" };
|
|
164
|
+
}
|
|
165
|
+
return { safe: true };
|
|
166
|
+
}
|
|
167
|
+
function isSafeForFetch(url) {
|
|
168
|
+
return isURLSafe(url).safe;
|
|
169
|
+
}
|
|
170
|
+
function matchDomainPattern(domain, pattern) {
|
|
171
|
+
if (domain === pattern) return true;
|
|
172
|
+
if (pattern.startsWith("*.")) {
|
|
173
|
+
const suffix = pattern.slice(2);
|
|
174
|
+
return domain.endsWith("." + suffix);
|
|
175
|
+
}
|
|
176
|
+
return false;
|
|
177
|
+
}
|
|
178
|
+
function isLocalhost(hostname) {
|
|
179
|
+
const lower = hostname.toLowerCase();
|
|
180
|
+
if (lower === "localhost" || lower === "0.0.0.0" || lower.startsWith("127.")) {
|
|
181
|
+
return true;
|
|
182
|
+
}
|
|
183
|
+
if (lower === "::1" || lower === "[::1]") {
|
|
184
|
+
return true;
|
|
185
|
+
}
|
|
186
|
+
return false;
|
|
187
|
+
}
|
|
188
|
+
function isPrivateIP(hostname) {
|
|
189
|
+
const lower = hostname.toLowerCase();
|
|
190
|
+
const parts = hostname.split(".").map(Number);
|
|
191
|
+
if (parts.length === 4 && parts.every((p) => p >= 0 && p <= 255 && !isNaN(p))) {
|
|
192
|
+
const [octet1, octet2] = parts;
|
|
193
|
+
if (octet1 === 10) return true;
|
|
194
|
+
if (octet1 === 172 && octet2 !== void 0 && octet2 >= 16 && octet2 <= 31)
|
|
195
|
+
return true;
|
|
196
|
+
if (octet1 === 192 && octet2 === 168) return true;
|
|
197
|
+
if (octet1 === 169 && octet2 === 254) return true;
|
|
198
|
+
}
|
|
199
|
+
const cleanedHostname = lower.replace(/^\[|\]$/g, "");
|
|
200
|
+
const ipv6Patterns = [
|
|
201
|
+
/^f[cd][0-9a-f]{2}:/i,
|
|
202
|
+
// fc00::/7 (includes both fc and fd ranges)
|
|
203
|
+
/^fe80:/i
|
|
204
|
+
// fe80::/10 (link-local)
|
|
205
|
+
];
|
|
206
|
+
for (const pattern of ipv6Patterns) {
|
|
207
|
+
if (pattern.test(cleanedHostname)) {
|
|
208
|
+
return true;
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
return false;
|
|
212
|
+
}
|
|
213
|
+
function filterSafeUrls(urls, policy) {
|
|
214
|
+
return urls.filter((url) => isURLSafe(url, policy).safe);
|
|
215
|
+
}
|
|
216
|
+
export {
|
|
217
|
+
DEFAULT_SSRF_POLICY,
|
|
218
|
+
countImages,
|
|
219
|
+
extractDataUrls,
|
|
220
|
+
extractImageUrls,
|
|
221
|
+
extractRemoteImageUrls,
|
|
222
|
+
filterSafeUrls,
|
|
223
|
+
isSafeForFetch,
|
|
224
|
+
isURLSafe
|
|
225
|
+
};
|
|
226
|
+
//# sourceMappingURL=image-utils.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../image-utils/src/extraction.ts","../../image-utils/src/ssrf.ts"],"sourcesContent":["/**\n * Image URL Extraction\n *\n * Extracts image URLs from markdown, HTML, and base64 data URLs in text.\n * Used across Federation, Archive, and Architect for image processing.\n */\n\nexport interface ImageExtractionOptions {\n /** Include markdown image syntax `` (default: true) */\n includeMarkdown?: boolean;\n /** Include HTML img tags `<img src=\"url\">` (default: true) */\n includeHTML?: boolean;\n /** Include base64 data URLs `data:image/...;base64,...` (default: true) */\n includeBase64?: boolean;\n}\n\nexport interface ExtractedImage {\n /** The extracted URL or data URL */\n url: string;\n /** Source format where the image was found */\n source: 'markdown' | 'html' | 'base64';\n /** Surrounding context for debugging (optional) */\n context?: string;\n}\n\n/**\n * Extract all image URLs from text.\n *\n * Supports markdown, HTML, and base64 data URLs.\n * Returns deduplicated results.\n *\n * @param text - Text to extract image URLs from\n * @param options - Extraction options\n * @returns Array of extracted images with source information\n *\n * @example\n * ```typescript\n * const text = 'Check out  and <img src=\"banner.jpg\">';\n * const images = extractImageUrls(text);\n * // [\n * // { url: 'avatar.png', source: 'markdown' },\n * // { url: 'banner.jpg', source: 'html' }\n * // ]\n * ```\n */\nexport function extractImageUrls(\n text: string,\n options: ImageExtractionOptions = {},\n): ExtractedImage[] {\n const {\n includeMarkdown = true,\n includeHTML = true,\n includeBase64 = true,\n } = options;\n\n const results: ExtractedImage[] = [];\n const seen = new Set<string>();\n\n // Extract markdown images:  or  or \n // Supports: standard, angle brackets, and SillyTavern dimension syntax\n if (includeMarkdown) {\n const markdownPattern = /!\\[([^\\]]*)\\]\\(<?([^>\\s)]+)>?(?:\\s*=[^)]+)?\\)/g;\n let match: RegExpExecArray | null;\n\n while ((match = markdownPattern.exec(text)) !== null) {\n const url = match[2]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'markdown',\n context: match[0],\n });\n }\n }\n }\n\n // Extract HTML img tags: <img src=\"url\"> (quoted and unquoted)\n if (includeHTML) {\n // Quoted src\n const htmlQuotedPattern = /<img[^>]+src=[\"']([^\"']+)[\"'][^>]*>/gi;\n let match: RegExpExecArray | null;\n\n while ((match = htmlQuotedPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n\n // Unquoted src (e.g., <img src=url>)\n const htmlUnquotedPattern = /<img[^>]+src=([^\\s\"'>]+)[^>]*>/gi;\n while ((match = htmlUnquotedPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n // Skip if it starts with a quote (already handled above)\n if (url && !url.startsWith('\"') && !url.startsWith(\"'\") && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n\n // CSS url() function: url(url), url(\"url\"), url('url')\n // Common in background-image, background, content properties\n const cssUrlPattern = /url\\([\"']?([^)\"']+)[\"']?\\)/gi;\n while ((match = cssUrlPattern.exec(text)) !== null) {\n const url = match[1]?.trim();\n // Only include http(s) URLs to avoid CSS variables and relative paths\n if (url && (url.startsWith('http://') || url.startsWith('https://')) && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: match[0],\n });\n }\n }\n }\n\n // Extract plain image URLs (not wrapped in any syntax)\n if (includeHTML) {\n // Plain URLs with image extensions (common on image hosts)\n const plainUrlPattern = /(?<![(\"'])(https?:\\/\\/[^\\s<>\"']+\\.(?:jpg|jpeg|png|gif|webp|svg|avif|bmp))(?![)\"'])/gi;\n let match: RegExpExecArray | null;\n\n while ((match = plainUrlPattern.exec(text)) !== null) {\n const url = match[0]?.trim();\n if (url && !seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'html',\n context: url,\n });\n }\n }\n }\n\n // Extract base64 data URLs: data:image/...;base64,...\n if (includeBase64) {\n const dataUrlPattern = /data:image\\/[^;]+;base64,[A-Za-z0-9+/=]+/g;\n let match: RegExpExecArray | null;\n\n while ((match = dataUrlPattern.exec(text)) !== null) {\n const url = match[0];\n if (!seen.has(url)) {\n seen.add(url);\n results.push({\n url,\n source: 'base64',\n });\n }\n }\n }\n\n return results;\n}\n\n/**\n * Extract only HTTP/HTTPS URLs (excludes data URLs and relative paths).\n *\n * Convenience wrapper around extractImageUrls that filters for remote URLs.\n *\n * @param text - Text to extract URLs from\n * @returns Array of extracted HTTP/HTTPS image URLs\n */\nexport function extractRemoteImageUrls(text: string): ExtractedImage[] {\n const all = extractImageUrls(text, { includeBase64: false });\n return all.filter((img) => /^https?:\\/\\//i.test(img.url));\n}\n\n/**\n * Extract only base64 data URLs.\n *\n * Convenience wrapper around extractImageUrls.\n *\n * @param text - Text to extract data URLs from\n * @returns Array of base64 image data URLs\n */\nexport function extractDataUrls(text: string): ExtractedImage[] {\n return extractImageUrls(text, {\n includeMarkdown: false,\n includeHTML: false,\n includeBase64: true,\n });\n}\n\n/**\n * Count images in text without extracting full details.\n *\n * More efficient than extractImageUrls when you only need the count.\n *\n * @param text - Text to count images in\n * @returns Total number of unique image references\n */\nexport function countImages(text: string): number {\n return extractImageUrls(text).length;\n}\n","/**\n * SSRF (Server-Side Request Forgery) Protection\n *\n * Validates URLs to prevent SSRF attacks.\n * Browser-safe implementation (no Node.js dependencies).\n */\n\nexport interface SSRFPolicy {\n /** Allow private IP addresses (10.x, 172.16-31.x, 192.168.x) (default: false) */\n allowPrivateIPs?: boolean;\n /** Allow localhost/loopback (127.x, ::1) (default: false) */\n allowLocalhost?: boolean;\n /** Blocked domain patterns (e.g., ['internal.company.com', '*.local']) */\n blockedDomains?: string[];\n /** Allowed domain patterns - if provided, ONLY these are allowed */\n allowedDomains?: string[];\n /** Allow data URLs (data:image/...) (default: false) */\n allowDataUrls?: boolean;\n}\n\nexport interface SafetyCheck {\n /** Whether the URL is safe according to policy */\n safe: boolean;\n /** Reason why URL is unsafe (if safe=false) */\n reason?: string;\n}\n\n/**\n * Default SSRF policy - blocks private IPs, localhost, and data URLs.\n */\nexport const DEFAULT_SSRF_POLICY: Required<SSRFPolicy> = {\n allowPrivateIPs: false,\n allowLocalhost: false,\n blockedDomains: [],\n allowedDomains: [],\n allowDataUrls: false,\n};\n\n/**\n * Check if URL is safe to fetch according to SSRF policy.\n *\n * This is the canonical SSRF protection - all apps should use this\n * before fetching external URLs.\n *\n * @param url - URL to validate\n * @param policy - SSRF policy (uses defaults if not provided)\n * @returns Safety check result with reason if unsafe\n *\n * @example\n * ```typescript\n * const check = isURLSafe('http://10.0.0.1/secret');\n * if (!check.safe) {\n * console.error('Unsafe URL:', check.reason);\n * }\n * ```\n */\nexport function isURLSafe(\n url: string,\n policy: SSRFPolicy = {},\n): SafetyCheck {\n const config = { ...DEFAULT_SSRF_POLICY, ...policy };\n\n // Parse URL\n let parsed: URL;\n try {\n parsed = new URL(url);\n } catch {\n return { safe: false, reason: 'Invalid URL format' };\n }\n\n // Check protocol\n if (parsed.protocol === 'data:') {\n if (!config.allowDataUrls) {\n return { safe: false, reason: 'Data URLs not allowed' };\n }\n return { safe: true };\n }\n\n if (!['http:', 'https:'].includes(parsed.protocol)) {\n return {\n safe: false,\n reason: `Protocol '${parsed.protocol}' not allowed (only http/https)`,\n };\n }\n\n const hostname = parsed.hostname.toLowerCase();\n\n // Check allowed domains (whitelist)\n if (config.allowedDomains.length > 0) {\n const isAllowed = config.allowedDomains.some((pattern) =>\n matchDomainPattern(hostname, pattern),\n );\n if (!isAllowed) {\n return {\n safe: false,\n reason: `Domain '${hostname}' not in allowed list`,\n };\n }\n }\n\n // Check blocked domains (blacklist)\n if (config.blockedDomains.length > 0) {\n const isBlocked = config.blockedDomains.some((pattern) =>\n matchDomainPattern(hostname, pattern),\n );\n if (isBlocked) {\n return {\n safe: false,\n reason: `Domain '${hostname}' is blocked`,\n };\n }\n }\n\n // Check localhost\n if (isLocalhost(hostname) && !config.allowLocalhost) {\n return { safe: false, reason: 'Localhost not allowed' };\n }\n\n // Check private IPs\n if (isPrivateIP(hostname) && !config.allowPrivateIPs) {\n return { safe: false, reason: 'Private IP addresses not allowed' };\n }\n\n return { safe: true };\n}\n\n/**\n * Quick check if URL is safe with default policy.\n *\n * Convenience wrapper for common case.\n *\n * @param url - URL to validate\n * @returns true if safe, false otherwise\n */\nexport function isSafeForFetch(url: string): boolean {\n return isURLSafe(url).safe;\n}\n\n/**\n * Match domain against pattern (supports wildcards).\n *\n * @example\n * ```typescript\n * matchDomainPattern('api.github.com', '*.github.com') // true\n * matchDomainPattern('github.com', '*.github.com') // false\n * matchDomainPattern('github.com', 'github.com') // true\n * ```\n */\nfunction matchDomainPattern(domain: string, pattern: string): boolean {\n // Exact match\n if (domain === pattern) return true;\n\n // Wildcard match - only matches subdomains, not the base domain\n if (pattern.startsWith('*.')) {\n const suffix = pattern.slice(2);\n return domain.endsWith('.' + suffix);\n }\n\n return false;\n}\n\n/**\n * Check if hostname is localhost/loopback.\n */\nfunction isLocalhost(hostname: string): boolean {\n const lower = hostname.toLowerCase();\n\n // IPv4 loopback\n if (lower === 'localhost' || lower === '0.0.0.0' || lower.startsWith('127.')) {\n return true;\n }\n\n // IPv6 loopback\n if (lower === '::1' || lower === '[::1]') {\n return true;\n }\n\n return false;\n}\n\n/**\n * Check if hostname is a private IP address.\n *\n * Checks for:\n * - 10.0.0.0/8\n * - 172.16.0.0/12\n * - 192.168.0.0/16\n * - Link-local: 169.254.0.0/16 (includes AWS metadata endpoint)\n * - IPv6 private ranges\n */\nfunction isPrivateIP(hostname: string): boolean {\n const lower = hostname.toLowerCase();\n\n // IPv4 private ranges\n const parts = hostname.split('.').map(Number);\n\n if (\n parts.length === 4 &&\n parts.every((p) => p >= 0 && p <= 255 && !isNaN(p))\n ) {\n const [octet1, octet2] = parts;\n\n // 10.0.0.0/8\n if (octet1 === 10) return true;\n\n // 172.16.0.0/12\n if (octet1 === 172 && octet2 !== undefined && octet2 >= 16 && octet2 <= 31)\n return true;\n\n // 192.168.0.0/16\n if (octet1 === 192 && octet2 === 168) return true;\n\n // 169.254.0.0/16 (link-local) - includes AWS metadata endpoint\n if (octet1 === 169 && octet2 === 254) return true;\n }\n\n // IPv6 private ranges using regex patterns\n // Strip brackets if present (URL parser adds them: [fc00::1])\n const cleanedHostname = lower.replace(/^\\[|\\]$/g, '');\n\n const ipv6Patterns = [\n /^f[cd][0-9a-f]{2}:/i, // fc00::/7 (includes both fc and fd ranges)\n /^fe80:/i, // fe80::/10 (link-local)\n ];\n\n for (const pattern of ipv6Patterns) {\n if (pattern.test(cleanedHostname)) {\n return true;\n }\n }\n\n return false;\n}\n\n/**\n * Filter array of URLs, keeping only safe ones.\n *\n * @param urls - Array of URLs to filter\n * @param policy - SSRF policy\n * @returns Array of safe URLs\n */\nexport function filterSafeUrls(\n urls: string[],\n policy?: SSRFPolicy,\n): string[] {\n return urls.filter((url) => isURLSafe(url, policy).safe);\n}\n"],"mappings":";AA6CO,SAAS,iBACd,MACA,UAAkC,CAAC,GACjB;AAClB,QAAM;IACJ,kBAAkB;IAClB,cAAc;IACd,gBAAgB;EAClB,IAAI;AAEJ,QAAM,UAA4B,CAAC;AACnC,QAAM,OAAO,oBAAI,IAAY;AAI7B,MAAI,iBAAiB;AACnB,UAAM,kBAAkB;AACxB,QAAI;AAEJ,YAAQ,QAAQ,gBAAgB,KAAK,IAAI,OAAO,MAAM;AACpD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;EACF;AAGA,MAAI,aAAa;AAEf,UAAM,oBAAoB;AAC1B,QAAI;AAEJ,YAAQ,QAAQ,kBAAkB,KAAK,IAAI,OAAO,MAAM;AACtD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;AAGA,UAAM,sBAAsB;AAC5B,YAAQ,QAAQ,oBAAoB,KAAK,IAAI,OAAO,MAAM;AACxD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAE3B,UAAI,OAAO,CAAC,IAAI,WAAW,GAAG,KAAK,CAAC,IAAI,WAAW,GAAG,KAAK,CAAC,KAAK,IAAI,GAAG,GAAG;AACzE,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;AAIA,UAAM,gBAAgB;AACtB,YAAQ,QAAQ,cAAc,KAAK,IAAI,OAAO,MAAM;AAClD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAE3B,UAAI,QAAQ,IAAI,WAAW,SAAS,KAAK,IAAI,WAAW,UAAU,MAAM,CAAC,KAAK,IAAI,GAAG,GAAG;AACtF,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS,MAAM,CAAC;QAClB,CAAC;MACH;IACF;EACF;AAGA,MAAI,aAAa;AAEf,UAAM,kBAAkB;AACxB,QAAI;AAEJ,YAAQ,QAAQ,gBAAgB,KAAK,IAAI,OAAO,MAAM;AACpD,YAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,UAAI,OAAO,CAAC,KAAK,IAAI,GAAG,GAAG;AACzB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;UACR,SAAS;QACX,CAAC;MACH;IACF;EACF;AAGA,MAAI,eAAe;AACjB,UAAM,iBAAiB;AACvB,QAAI;AAEJ,YAAQ,QAAQ,eAAe,KAAK,IAAI,OAAO,MAAM;AACnD,YAAM,MAAM,MAAM,CAAC;AACnB,UAAI,CAAC,KAAK,IAAI,GAAG,GAAG;AAClB,aAAK,IAAI,GAAG;AACZ,gBAAQ,KAAK;UACX;UACA,QAAQ;QACV,CAAC;MACH;IACF;EACF;AAEA,SAAO;AACT;AAUO,SAAS,uBAAuB,MAAgC;AACrE,QAAM,MAAM,iBAAiB,MAAM,EAAE,eAAe,MAAM,CAAC;AAC3D,SAAO,IAAI,OAAO,CAAC,QAAQ,gBAAgB,KAAK,IAAI,GAAG,CAAC;AAC1D;AAUO,SAAS,gBAAgB,MAAgC;AAC9D,SAAO,iBAAiB,MAAM;IAC5B,iBAAiB;IACjB,aAAa;IACb,eAAe;EACjB,CAAC;AACH;AAUO,SAAS,YAAY,MAAsB;AAChD,SAAO,iBAAiB,IAAI,EAAE;AAChC;AC/KO,IAAM,sBAA4C;EACvD,iBAAiB;EACjB,gBAAgB;EAChB,gBAAgB,CAAC;EACjB,gBAAgB,CAAC;EACjB,eAAe;AACjB;AAoBO,SAAS,UACd,KACA,SAAqB,CAAC,GACT;AACb,QAAM,SAAS,EAAE,GAAG,qBAAqB,GAAG,OAAO;AAGnD,MAAI;AACJ,MAAI;AACF,aAAS,IAAI,IAAI,GAAG;EACtB,QAAQ;AACN,WAAO,EAAE,MAAM,OAAO,QAAQ,qBAAqB;EACrD;AAGA,MAAI,OAAO,aAAa,SAAS;AAC/B,QAAI,CAAC,OAAO,eAAe;AACzB,aAAO,EAAE,MAAM,OAAO,QAAQ,wBAAwB;IACxD;AACA,WAAO,EAAE,MAAM,KAAK;EACtB;AAEA,MAAI,CAAC,CAAC,SAAS,QAAQ,EAAE,SAAS,OAAO,QAAQ,GAAG;AAClD,WAAO;MACL,MAAM;MACN,QAAQ,aAAa,OAAO,QAAQ;IACtC;EACF;AAEA,QAAM,WAAW,OAAO,SAAS,YAAY;AAG7C,MAAI,OAAO,eAAe,SAAS,GAAG;AACpC,UAAM,YAAY,OAAO,eAAe;MAAK,CAAC,YAC5C,mBAAmB,UAAU,OAAO;IACtC;AACA,QAAI,CAAC,WAAW;AACd,aAAO;QACL,MAAM;QACN,QAAQ,WAAW,QAAQ;MAC7B;IACF;EACF;AAGA,MAAI,OAAO,eAAe,SAAS,GAAG;AACpC,UAAM,YAAY,OAAO,eAAe;MAAK,CAAC,YAC5C,mBAAmB,UAAU,OAAO;IACtC;AACA,QAAI,WAAW;AACb,aAAO;QACL,MAAM;QACN,QAAQ,WAAW,QAAQ;MAC7B;IACF;EACF;AAGA,MAAI,YAAY,QAAQ,KAAK,CAAC,OAAO,gBAAgB;AACnD,WAAO,EAAE,MAAM,OAAO,QAAQ,wBAAwB;EACxD;AAGA,MAAI,YAAY,QAAQ,KAAK,CAAC,OAAO,iBAAiB;AACpD,WAAO,EAAE,MAAM,OAAO,QAAQ,mCAAmC;EACnE;AAEA,SAAO,EAAE,MAAM,KAAK;AACtB;AAUO,SAAS,eAAe,KAAsB;AACnD,SAAO,UAAU,GAAG,EAAE;AACxB;AAYA,SAAS,mBAAmB,QAAgB,SAA0B;AAEpE,MAAI,WAAW,QAAS,QAAO;AAG/B,MAAI,QAAQ,WAAW,IAAI,GAAG;AAC5B,UAAM,SAAS,QAAQ,MAAM,CAAC;AAC9B,WAAO,OAAO,SAAS,MAAM,MAAM;EACrC;AAEA,SAAO;AACT;AAKA,SAAS,YAAY,UAA2B;AAC9C,QAAM,QAAQ,SAAS,YAAY;AAGnC,MAAI,UAAU,eAAe,UAAU,aAAa,MAAM,WAAW,MAAM,GAAG;AAC5E,WAAO;EACT;AAGA,MAAI,UAAU,SAAS,UAAU,SAAS;AACxC,WAAO;EACT;AAEA,SAAO;AACT;AAYA,SAAS,YAAY,UAA2B;AAC9C,QAAM,QAAQ,SAAS,YAAY;AAGnC,QAAM,QAAQ,SAAS,MAAM,GAAG,EAAE,IAAI,MAAM;AAE5C,MACE,MAAM,WAAW,KACjB,MAAM,MAAM,CAAC,MAAM,KAAK,KAAK,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC,GAClD;AACA,UAAM,CAAC,QAAQ,MAAM,IAAI;AAGzB,QAAI,WAAW,GAAI,QAAO;AAG1B,QAAI,WAAW,OAAO,WAAW,UAAa,UAAU,MAAM,UAAU;AACtE,aAAO;AAGT,QAAI,WAAW,OAAO,WAAW,IAAK,QAAO;AAG7C,QAAI,WAAW,OAAO,WAAW,IAAK,QAAO;EAC/C;AAIA,QAAM,kBAAkB,MAAM,QAAQ,YAAY,EAAE;AAEpD,QAAM,eAAe;IACnB;;IACA;;EACF;AAEA,aAAW,WAAW,cAAc;AAClC,QAAI,QAAQ,KAAK,eAAe,GAAG;AACjC,aAAO;IACT;EACF;AAEA,SAAO;AACT;AASO,SAAS,eACd,MACA,QACU;AACV,SAAO,KAAK,OAAO,CAAC,QAAQ,UAAU,KAAK,MAAM,EAAE,IAAI;AACzD;","names":[]}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@character-foundry/character-foundry",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.7",
|
|
4
4
|
"description": "Universal TypeScript library for AI character card formats",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"exports": {
|
|
@@ -124,6 +124,16 @@
|
|
|
124
124
|
"default": "./dist/media.cjs"
|
|
125
125
|
}
|
|
126
126
|
},
|
|
127
|
+
"./image-utils": {
|
|
128
|
+
"import": {
|
|
129
|
+
"types": "./dist/image-utils.d.ts",
|
|
130
|
+
"default": "./dist/image-utils.js"
|
|
131
|
+
},
|
|
132
|
+
"require": {
|
|
133
|
+
"types": "./dist/image-utils.d.cts",
|
|
134
|
+
"default": "./dist/image-utils.cjs"
|
|
135
|
+
}
|
|
136
|
+
},
|
|
127
137
|
"./federation": {
|
|
128
138
|
"import": {
|
|
129
139
|
"types": "./dist/federation.d.ts",
|
|
@@ -184,16 +194,17 @@
|
|
|
184
194
|
"tsup": "^8.5.1",
|
|
185
195
|
"typescript": "^5.3.0",
|
|
186
196
|
"@character-foundry/core": "^0.1.3",
|
|
197
|
+
"@character-foundry/charx": "^0.0.7",
|
|
187
198
|
"@character-foundry/schemas": "^0.2.2",
|
|
188
199
|
"@character-foundry/png": "^0.0.6",
|
|
189
|
-
"@character-foundry/charx": "^0.0.7",
|
|
190
|
-
"@character-foundry/lorebook": "^0.0.3",
|
|
191
|
-
"@character-foundry/loader": "^0.1.10",
|
|
192
200
|
"@character-foundry/voxta": "^0.1.13",
|
|
201
|
+
"@character-foundry/lorebook": "^0.0.3",
|
|
193
202
|
"@character-foundry/exporter": "^0.1.4",
|
|
194
203
|
"@character-foundry/normalizer": "^0.1.5",
|
|
204
|
+
"@character-foundry/loader": "^0.1.10",
|
|
195
205
|
"@character-foundry/tokenizers": "^0.1.3",
|
|
196
206
|
"@character-foundry/media": "^0.1.3",
|
|
207
|
+
"@character-foundry/image-utils": "^0.1.0",
|
|
197
208
|
"@character-foundry/federation": "^0.5.2",
|
|
198
209
|
"@character-foundry/app-framework": "^0.2.2"
|
|
199
210
|
},
|