agent-reader 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +213 -0
- package/bin/agent-reader.js +83 -0
- package/package.json +52 -0
- package/src/cli/commands.js +602 -0
- package/src/core/assets.js +429 -0
- package/src/core/exporter.js +710 -0
- package/src/core/opener.js +329 -0
- package/src/core/renderer.js +235 -0
- package/src/core/sanitizer.js +79 -0
- package/src/core/slideshow.js +383 -0
- package/src/core/templates/docx-table.lua +4 -0
- package/src/core/templates/reference.docx +0 -0
- package/src/core/themes/dark.css +256 -0
- package/src/core/themes/light.css +312 -0
- package/src/core/themes/print.css +54 -0
- package/src/mcp/server.js +381 -0
- package/src/templates/document.html +145 -0
- package/src/templates/slideshow.html +42 -0
- package/src/utils/logger.js +64 -0
- package/src/utils/naturalSort.js +12 -0
- package/src/utils/output.js +85 -0
- package/src/utils/preferences.js +89 -0
- package/src/utils/server.js +295 -0
|
@@ -0,0 +1,429 @@
|
|
|
1
|
+
import { createHash } from 'node:crypto';
|
|
2
|
+
import dns from 'node:dns/promises';
|
|
3
|
+
import { promises as fs } from 'node:fs';
|
|
4
|
+
import net from 'node:net';
|
|
5
|
+
import path from 'node:path';
|
|
6
|
+
|
|
7
|
+
const LOCAL_IMAGE_EXTENSIONS = new Set(['.png', '.jpg', '.jpeg', '.gif', '.svg', '.webp']);
|
|
8
|
+
const DEFAULT_MAX_INLINE_BYTES = 200 * 1024;
|
|
9
|
+
const DEFAULT_MAX_REMOTE_BYTES = 10 * 1024 * 1024;
|
|
10
|
+
const DEFAULT_REMOTE_TIMEOUT_MS = 10_000;
|
|
11
|
+
const DEFAULT_ASSET_DIR_NAME = 'assets';
|
|
12
|
+
|
|
13
|
+
function isRemoteUrl(value) {
|
|
14
|
+
return /^https?:\/\//i.test(value);
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
function normalizeSrc(src) {
|
|
18
|
+
return src.trim();
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
function extFromPath(value) {
|
|
22
|
+
const parsed = value.split('?')[0].split('#')[0];
|
|
23
|
+
return path.extname(parsed).toLowerCase();
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function guessMimeTypeFromExt(ext) {
|
|
27
|
+
switch (ext) {
|
|
28
|
+
case '.jpg':
|
|
29
|
+
case '.jpeg':
|
|
30
|
+
return 'image/jpeg';
|
|
31
|
+
case '.png':
|
|
32
|
+
return 'image/png';
|
|
33
|
+
case '.gif':
|
|
34
|
+
return 'image/gif';
|
|
35
|
+
case '.svg':
|
|
36
|
+
return 'image/svg+xml';
|
|
37
|
+
case '.webp':
|
|
38
|
+
return 'image/webp';
|
|
39
|
+
default:
|
|
40
|
+
return 'application/octet-stream';
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function toDataUrl(mimeType, buffer) {
|
|
45
|
+
return `data:${mimeType};base64,${buffer.toString('base64')}`;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function toSafeAssetName(originalName, contentBuffer) {
|
|
49
|
+
const extension = path.extname(originalName).toLowerCase();
|
|
50
|
+
const baseName = path.basename(originalName, extension).replace(/[^a-zA-Z0-9_-]/g, '_') || 'image';
|
|
51
|
+
const hash = createHash('sha256').update(contentBuffer).digest('hex').slice(0, 8);
|
|
52
|
+
return `${baseName}-${hash}${extension || '.bin'}`;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
async function ensureAssetDir(outDir, assetDirName) {
|
|
56
|
+
const assetsDir = path.join(outDir, assetDirName);
|
|
57
|
+
await fs.mkdir(assetsDir, { recursive: true });
|
|
58
|
+
return assetsDir;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function isPrivateOrReservedIPv4(ip) {
|
|
62
|
+
const parts = ip.split('.').map((part) => Number(part));
|
|
63
|
+
if (parts.length !== 4 || parts.some((part) => Number.isNaN(part) || part < 0 || part > 255)) {
|
|
64
|
+
return true;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
const [a, b] = parts;
|
|
68
|
+
if (a === 10 || a === 127 || a === 0) return true;
|
|
69
|
+
if (a === 169 && b === 254) return true;
|
|
70
|
+
if (a === 172 && b >= 16 && b <= 31) return true;
|
|
71
|
+
if (a === 192 && b === 168) return true;
|
|
72
|
+
if (a === 100 && b >= 64 && b <= 127) return true;
|
|
73
|
+
if (a === 198 && (b === 18 || b === 19)) return true;
|
|
74
|
+
if (a >= 224) return true;
|
|
75
|
+
return false;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function isPrivateOrReservedIPv6(ip) {
|
|
79
|
+
const normalized = ip.toLowerCase();
|
|
80
|
+
return (
|
|
81
|
+
normalized === '::1' ||
|
|
82
|
+
normalized.startsWith('fc') ||
|
|
83
|
+
normalized.startsWith('fd') ||
|
|
84
|
+
normalized.startsWith('fe8') ||
|
|
85
|
+
normalized.startsWith('fe9') ||
|
|
86
|
+
normalized.startsWith('fea') ||
|
|
87
|
+
normalized.startsWith('feb') ||
|
|
88
|
+
normalized.startsWith('::ffff:127.')
|
|
89
|
+
);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
function isUnsafeIp(ip) {
|
|
93
|
+
if (net.isIPv4(ip)) {
|
|
94
|
+
return isPrivateOrReservedIPv4(ip);
|
|
95
|
+
}
|
|
96
|
+
if (net.isIPv6(ip)) {
|
|
97
|
+
return isPrivateOrReservedIPv6(ip);
|
|
98
|
+
}
|
|
99
|
+
return true;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
async function resolveAndValidateHost(hostname) {
|
|
103
|
+
const lowerHost = hostname.toLowerCase();
|
|
104
|
+
if (lowerHost === 'localhost') {
|
|
105
|
+
return false;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
if (net.isIP(lowerHost)) {
|
|
109
|
+
return !isUnsafeIp(lowerHost);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
const records = await dns.lookup(hostname, { all: true, verbatim: true });
|
|
113
|
+
if (!records.length) {
|
|
114
|
+
return false;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
return records.every((record) => !isUnsafeIp(record.address));
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
async function readResponseWithLimit(response, maxBytes) {
|
|
121
|
+
const contentLength = response.headers.get('content-length');
|
|
122
|
+
if (contentLength && Number(contentLength) > maxBytes) {
|
|
123
|
+
throw new Error(`remote image too large: ${contentLength} bytes`);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
if (!response.body) {
|
|
127
|
+
throw new Error('response body is empty');
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
const reader = response.body.getReader();
|
|
131
|
+
const chunks = [];
|
|
132
|
+
let total = 0;
|
|
133
|
+
|
|
134
|
+
while (true) {
|
|
135
|
+
const { done, value } = await reader.read();
|
|
136
|
+
if (done) {
|
|
137
|
+
break;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
total += value.byteLength;
|
|
141
|
+
if (total > maxBytes) {
|
|
142
|
+
throw new Error(`remote image exceeded ${maxBytes} bytes`);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
chunks.push(Buffer.from(value));
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
return Buffer.concat(chunks);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
async function fetchImageWithRedirects(initialUrl, {
|
|
152
|
+
maxBytes,
|
|
153
|
+
timeoutMs,
|
|
154
|
+
maxRedirects = 5,
|
|
155
|
+
}) {
|
|
156
|
+
let current = initialUrl;
|
|
157
|
+
|
|
158
|
+
for (let redirectCount = 0; redirectCount <= maxRedirects; redirectCount += 1) {
|
|
159
|
+
const url = new URL(current);
|
|
160
|
+
if (!['http:', 'https:'].includes(url.protocol)) {
|
|
161
|
+
throw new Error(`unsupported protocol: ${url.protocol}`);
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
const publicHost = await resolveAndValidateHost(url.hostname);
|
|
165
|
+
if (!publicHost) {
|
|
166
|
+
throw new Error(`blocked host: ${url.hostname}`);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
const controller = new AbortController();
|
|
170
|
+
const timeout = setTimeout(() => controller.abort(), timeoutMs);
|
|
171
|
+
|
|
172
|
+
let response;
|
|
173
|
+
try {
|
|
174
|
+
response = await fetch(current, {
|
|
175
|
+
method: 'GET',
|
|
176
|
+
redirect: 'manual',
|
|
177
|
+
signal: controller.signal,
|
|
178
|
+
});
|
|
179
|
+
} finally {
|
|
180
|
+
clearTimeout(timeout);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
if (response.status >= 300 && response.status < 400) {
|
|
184
|
+
const location = response.headers.get('location');
|
|
185
|
+
if (!location) {
|
|
186
|
+
throw new Error(`redirect without location from ${current}`);
|
|
187
|
+
}
|
|
188
|
+
current = new URL(location, current).toString();
|
|
189
|
+
continue;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
if (!response.ok) {
|
|
193
|
+
throw new Error(`download failed with status ${response.status}`);
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
const contentType = response.headers.get('content-type') || '';
|
|
197
|
+
const mimeType = contentType.split(';')[0].trim().toLowerCase();
|
|
198
|
+
if (!mimeType.startsWith('image/')) {
|
|
199
|
+
throw new Error(`unexpected content-type: ${contentType || 'unknown'}`);
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
const buffer = await readResponseWithLimit(response, maxBytes);
|
|
203
|
+
return {
|
|
204
|
+
mimeType,
|
|
205
|
+
buffer,
|
|
206
|
+
finalUrl: current,
|
|
207
|
+
};
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
throw new Error(`too many redirects for ${initialUrl}`);
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
function replaceImgSrc(tag, newSrc) {
|
|
214
|
+
return tag.replace(/(\bsrc\s*=\s*['"])([^'"]*)(['"])/i, (match, prefix, _value, suffix) => {
|
|
215
|
+
void match;
|
|
216
|
+
return `${prefix}${newSrc}${suffix}`;
|
|
217
|
+
});
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
async function processLocalImage(src, {
|
|
221
|
+
baseDir,
|
|
222
|
+
outDir,
|
|
223
|
+
inlineAll,
|
|
224
|
+
maxInlineBytes,
|
|
225
|
+
assetDirName,
|
|
226
|
+
assets,
|
|
227
|
+
warnings,
|
|
228
|
+
}) {
|
|
229
|
+
const cleanSrc = normalizeSrc(src);
|
|
230
|
+
|
|
231
|
+
const extension = extFromPath(cleanSrc);
|
|
232
|
+
if (!LOCAL_IMAGE_EXTENSIONS.has(extension)) {
|
|
233
|
+
warnings.push(`skip non-image local asset: ${cleanSrc}`);
|
|
234
|
+
return cleanSrc;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
const isAbsoluteFile = path.isAbsolute(cleanSrc);
|
|
238
|
+
if (!isAbsoluteFile && !baseDir) {
|
|
239
|
+
warnings.push(`skip relative image without baseDir: ${cleanSrc}`);
|
|
240
|
+
return cleanSrc;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
const resolvedPath = isAbsoluteFile ? cleanSrc : path.resolve(baseDir, cleanSrc);
|
|
244
|
+
|
|
245
|
+
let fileBuffer;
|
|
246
|
+
try {
|
|
247
|
+
fileBuffer = await fs.readFile(resolvedPath);
|
|
248
|
+
} catch {
|
|
249
|
+
warnings.push(`local image not found: ${cleanSrc}`);
|
|
250
|
+
return cleanSrc;
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
const mimeType = guessMimeTypeFromExt(extension);
|
|
254
|
+
if (inlineAll || fileBuffer.byteLength <= maxInlineBytes) {
|
|
255
|
+
return toDataUrl(mimeType, fileBuffer);
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
if (!outDir) {
|
|
259
|
+
warnings.push(`skip copying large image without outDir: ${cleanSrc}`);
|
|
260
|
+
return cleanSrc;
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
const assetName = toSafeAssetName(path.basename(cleanSrc), fileBuffer);
|
|
264
|
+
const assetsDir = await ensureAssetDir(outDir, assetDirName);
|
|
265
|
+
const targetPath = path.join(assetsDir, assetName);
|
|
266
|
+
await fs.writeFile(targetPath, fileBuffer);
|
|
267
|
+
assets.push(targetPath);
|
|
268
|
+
return `${assetDirName}/${assetName}`;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
async function processRemoteImage(src, {
|
|
272
|
+
outDir,
|
|
273
|
+
inlineAll,
|
|
274
|
+
fetchRemote,
|
|
275
|
+
maxInlineBytes,
|
|
276
|
+
maxRemoteBytes,
|
|
277
|
+
remoteTimeoutMs,
|
|
278
|
+
assetDirName,
|
|
279
|
+
assets,
|
|
280
|
+
warnings,
|
|
281
|
+
}) {
|
|
282
|
+
if (!fetchRemote) {
|
|
283
|
+
return {
|
|
284
|
+
newSrc: src,
|
|
285
|
+
retainedRemote: true,
|
|
286
|
+
};
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
try {
|
|
290
|
+
const { mimeType, buffer, finalUrl } = await fetchImageWithRedirects(src, {
|
|
291
|
+
maxBytes: maxRemoteBytes,
|
|
292
|
+
timeoutMs: remoteTimeoutMs,
|
|
293
|
+
});
|
|
294
|
+
|
|
295
|
+
if (inlineAll || buffer.byteLength <= maxInlineBytes) {
|
|
296
|
+
return {
|
|
297
|
+
newSrc: toDataUrl(mimeType, buffer),
|
|
298
|
+
retainedRemote: false,
|
|
299
|
+
};
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
if (!outDir) {
|
|
303
|
+
warnings.push(`skip copying remote image without outDir: ${src}`);
|
|
304
|
+
return {
|
|
305
|
+
newSrc: src,
|
|
306
|
+
retainedRemote: true,
|
|
307
|
+
};
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
const url = new URL(finalUrl);
|
|
311
|
+
const fallbackExt = mimeType.includes('/') ? `.${mimeType.split('/')[1].split('+')[0]}` : '.bin';
|
|
312
|
+
const baseName = path.basename(url.pathname) || `remote${fallbackExt}`;
|
|
313
|
+
const ext = path.extname(baseName) || fallbackExt;
|
|
314
|
+
const assetName = toSafeAssetName(`${path.basename(baseName, path.extname(baseName))}${ext}`, buffer);
|
|
315
|
+
|
|
316
|
+
const assetsDir = await ensureAssetDir(outDir, assetDirName);
|
|
317
|
+
const targetPath = path.join(assetsDir, assetName);
|
|
318
|
+
await fs.writeFile(targetPath, buffer);
|
|
319
|
+
assets.push(targetPath);
|
|
320
|
+
|
|
321
|
+
return {
|
|
322
|
+
newSrc: `${assetDirName}/${assetName}`,
|
|
323
|
+
retainedRemote: false,
|
|
324
|
+
};
|
|
325
|
+
} catch (error) {
|
|
326
|
+
warnings.push(`remote image fetch failed (${src}): ${error.message}`);
|
|
327
|
+
return {
|
|
328
|
+
newSrc: src,
|
|
329
|
+
retainedRemote: true,
|
|
330
|
+
};
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
export async function processAssets(html, options = {}) {
|
|
335
|
+
const {
|
|
336
|
+
baseDir,
|
|
337
|
+
outDir,
|
|
338
|
+
inlineAll = false,
|
|
339
|
+
fetchRemote = true,
|
|
340
|
+
maxInlineBytes = DEFAULT_MAX_INLINE_BYTES,
|
|
341
|
+
maxRemoteBytes = DEFAULT_MAX_REMOTE_BYTES,
|
|
342
|
+
remoteTimeoutMs = DEFAULT_REMOTE_TIMEOUT_MS,
|
|
343
|
+
assetDirName = DEFAULT_ASSET_DIR_NAME,
|
|
344
|
+
} = options;
|
|
345
|
+
|
|
346
|
+
const warnings = [];
|
|
347
|
+
const assets = [];
|
|
348
|
+
const cache = new Map();
|
|
349
|
+
let retainedRemote = false;
|
|
350
|
+
|
|
351
|
+
const imgTagRegex = /<img\b[^>]*\bsrc\s*=\s*['"][^'"]*['"][^>]*>/gi;
|
|
352
|
+
let result = '';
|
|
353
|
+
let cursor = 0;
|
|
354
|
+
|
|
355
|
+
for (const match of html.matchAll(imgTagRegex)) {
|
|
356
|
+
const index = match.index ?? 0;
|
|
357
|
+
const wholeTag = match[0];
|
|
358
|
+
const srcMatch = wholeTag.match(/\bsrc\s*=\s*['"]([^'"]*)['"]/i);
|
|
359
|
+
|
|
360
|
+
result += html.slice(cursor, index);
|
|
361
|
+
cursor = index + wholeTag.length;
|
|
362
|
+
|
|
363
|
+
if (!srcMatch) {
|
|
364
|
+
result += wholeTag;
|
|
365
|
+
continue;
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
const src = normalizeSrc(srcMatch[1]);
|
|
369
|
+
|
|
370
|
+
if (!src || src.startsWith('data:')) {
|
|
371
|
+
result += wholeTag;
|
|
372
|
+
continue;
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
if (cache.has(src)) {
|
|
376
|
+
const cached = cache.get(src);
|
|
377
|
+
if (cached.retainedRemote) {
|
|
378
|
+
retainedRemote = true;
|
|
379
|
+
}
|
|
380
|
+
result += replaceImgSrc(wholeTag, cached.newSrc);
|
|
381
|
+
continue;
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
let payload;
|
|
385
|
+
if (isRemoteUrl(src)) {
|
|
386
|
+
payload = await processRemoteImage(src, {
|
|
387
|
+
outDir,
|
|
388
|
+
inlineAll,
|
|
389
|
+
fetchRemote,
|
|
390
|
+
maxInlineBytes,
|
|
391
|
+
maxRemoteBytes,
|
|
392
|
+
remoteTimeoutMs,
|
|
393
|
+
assetDirName,
|
|
394
|
+
assets,
|
|
395
|
+
warnings,
|
|
396
|
+
});
|
|
397
|
+
} else {
|
|
398
|
+
const newSrc = await processLocalImage(src, {
|
|
399
|
+
baseDir,
|
|
400
|
+
outDir,
|
|
401
|
+
inlineAll,
|
|
402
|
+
maxInlineBytes,
|
|
403
|
+
assetDirName,
|
|
404
|
+
assets,
|
|
405
|
+
warnings,
|
|
406
|
+
});
|
|
407
|
+
payload = {
|
|
408
|
+
newSrc,
|
|
409
|
+
retainedRemote: false,
|
|
410
|
+
};
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
cache.set(src, payload);
|
|
414
|
+
if (payload.retainedRemote) {
|
|
415
|
+
retainedRemote = true;
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
result += replaceImgSrc(wholeTag, payload.newSrc);
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
result += html.slice(cursor);
|
|
422
|
+
|
|
423
|
+
return {
|
|
424
|
+
html: result,
|
|
425
|
+
assets,
|
|
426
|
+
warnings,
|
|
427
|
+
retainedRemote,
|
|
428
|
+
};
|
|
429
|
+
}
|