agent-reader 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- import { promises as fs } from 'node:fs';
1
+ import { accessSync, constants as fsConstants, promises as fs } from 'node:fs';
2
2
  import os from 'node:os';
3
3
  import path from 'node:path';
4
4
  import { createRequire } from 'node:module';
@@ -7,6 +7,7 @@ import { execa } from 'execa';
7
7
  import MarkdownIt from 'markdown-it';
8
8
  import {
9
9
  Document,
10
+ ExternalHyperlink,
10
11
  HeadingLevel,
11
12
  Packer,
12
13
  Paragraph,
@@ -25,6 +26,109 @@ const LUA_TABLE_FILTER = path.join(__dirname, 'templates', 'docx-table.lua');
25
26
  const POSTPROCESS_SCRIPT = path.join(__dirname, '..', '..', 'scripts', 'postprocess-docx.py');
26
27
 
27
28
  const markdownParser = new MarkdownIt({ html: false, linkify: true, typographer: true });
29
+ const SANDBOX_DISABLED_ARGS = ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage'];
30
+ const SANDBOX_MODES = new Set(['auto', 'on', 'off']);
31
+
32
+ function isSandboxMode(mode) {
33
+ return SANDBOX_MODES.has(mode);
34
+ }
35
+
36
+ function normalizeSandboxMode(mode) {
37
+ if (typeof mode !== 'string') {
38
+ return null;
39
+ }
40
+ const normalized = mode.trim().toLowerCase();
41
+ return isSandboxMode(normalized) ? normalized : null;
42
+ }
43
+
44
+ function fileExistsSync(filePath) {
45
+ try {
46
+ accessSync(filePath, fsConstants.F_OK);
47
+ return true;
48
+ } catch {
49
+ return false;
50
+ }
51
+ }
52
+
53
+ export function detectContainerEnv(runtime = {}) {
54
+ const env = runtime.env ?? process.env;
55
+ const fileExists = runtime.fileExists ?? fileExistsSync;
56
+ const getuid = Object.prototype.hasOwnProperty.call(runtime, 'getuid')
57
+ ? runtime.getuid
58
+ : process.getuid;
59
+
60
+ if (fileExists('/.dockerenv')) {
61
+ return true;
62
+ }
63
+ if (fileExists('/run/.containerenv')) {
64
+ return true;
65
+ }
66
+ if (env?.CI === 'true') {
67
+ return true;
68
+ }
69
+
70
+ try {
71
+ if (typeof getuid === 'function' && getuid() === 0) {
72
+ return true;
73
+ }
74
+ } catch {
75
+ return false;
76
+ }
77
+
78
+ return false;
79
+ }
80
+
81
+ export function resolveSandboxMode(requestedMode, env = process.env) {
82
+ const fromExplicit = normalizeSandboxMode(requestedMode);
83
+ if (fromExplicit) {
84
+ return fromExplicit;
85
+ }
86
+ const fromEnv = normalizeSandboxMode(env?.AGENT_READER_SANDBOX);
87
+ if (fromEnv) {
88
+ return fromEnv;
89
+ }
90
+ return 'auto';
91
+ }
92
+
93
+ export function getSandboxArgs(sandboxMode = 'auto', runtime = {}) {
94
+ const mode = normalizeSandboxMode(sandboxMode) || 'auto';
95
+ if (mode === 'on') {
96
+ return [];
97
+ }
98
+ if (mode === 'off') {
99
+ return [...SANDBOX_DISABLED_ARGS];
100
+ }
101
+ return detectContainerEnv(runtime) ? [...SANDBOX_DISABLED_ARGS] : [];
102
+ }
103
+
104
+ function runHasProperty(run, propKey) {
105
+ return run.properties?.root?.some((item) => item.rootKey === propKey);
106
+ }
107
+
108
+ function cloneRunWithStyle(run, { bold, italics }) {
109
+ const baseTextNode = run.root.find((node) => node.rootKey === 'w:t');
110
+ const text = typeof baseTextNode?.root?.[baseTextNode.root.length - 1] === 'string'
111
+ ? baseTextNode.root[baseTextNode.root.length - 1]
112
+ : '';
113
+ const hasCodeStyle = runHasProperty(run, 'w:rFonts');
114
+ const hasShading = runHasProperty(run, 'w:shd');
115
+
116
+ return new TextRun({
117
+ text,
118
+ bold: bold || undefined,
119
+ italics: italics || undefined,
120
+ ...(hasCodeStyle ? { font: 'Consolas' } : {}),
121
+ ...(hasShading
122
+ ? {
123
+ shading: {
124
+ type: ShadingType.CLEAR,
125
+ color: 'auto',
126
+ fill: 'F4F4F5',
127
+ },
128
+ }
129
+ : {}),
130
+ });
131
+ }
28
132
 
29
133
  function paragraphFromText(text) {
30
134
  return new Paragraph({
@@ -65,6 +169,139 @@ function inlineToPlainText(token) {
65
169
  .join('');
66
170
  }
67
171
 
172
+ function createInlineTextRun(text, { bold, italics, code = false }) {
173
+ return new TextRun({
174
+ text,
175
+ bold: bold || undefined,
176
+ italics: italics || undefined,
177
+ ...(code
178
+ ? {
179
+ font: 'Consolas',
180
+ shading: {
181
+ type: ShadingType.CLEAR,
182
+ color: 'auto',
183
+ fill: 'F4F4F5',
184
+ },
185
+ }
186
+ : {}),
187
+ });
188
+ }
189
+
190
+ function pushInlineNode(target, linkStack, node) {
191
+ if (linkStack.length > 0) {
192
+ linkStack[linkStack.length - 1].children.push(node);
193
+ return;
194
+ }
195
+ target.push(node);
196
+ }
197
+
198
+ function closeCurrentLink(target, linkStack, style) {
199
+ const current = linkStack.pop();
200
+ if (!current) {
201
+ return;
202
+ }
203
+
204
+ const href = current.href || '';
205
+ if (!href) {
206
+ const fallbackRuns = current.children.length > 0
207
+ ? current.children
208
+ : [createInlineTextRun('', style)];
209
+ for (const run of fallbackRuns) {
210
+ pushInlineNode(target, linkStack, run);
211
+ }
212
+ return;
213
+ }
214
+
215
+ const hyperlinkChildren = current.children.length > 0
216
+ ? current.children
217
+ : [createInlineTextRun(href, style)];
218
+ const hyperlink = new ExternalHyperlink({
219
+ link: href,
220
+ children: hyperlinkChildren.map((node) => (
221
+ node instanceof TextRun ? cloneRunWithStyle(node, style) : node
222
+ )),
223
+ });
224
+ pushInlineNode(target, linkStack, hyperlink);
225
+ }
226
+
227
+ export function inlineToDocxRuns(token) {
228
+ if (!token?.children?.length) {
229
+ const fallbackText = token?.content || '';
230
+ return fallbackText ? [new TextRun(fallbackText)] : [];
231
+ }
232
+
233
+ const output = [];
234
+ const linkStack = [];
235
+ let boldDepth = 0;
236
+ let italicsDepth = 0;
237
+
238
+ for (const child of token.children) {
239
+ const style = {
240
+ bold: boldDepth > 0,
241
+ italics: italicsDepth > 0,
242
+ };
243
+
244
+ if (child.type === 'strong_open') {
245
+ boldDepth += 1;
246
+ continue;
247
+ }
248
+ if (child.type === 'strong_close') {
249
+ boldDepth = Math.max(0, boldDepth - 1);
250
+ continue;
251
+ }
252
+ if (child.type === 'em_open') {
253
+ italicsDepth += 1;
254
+ continue;
255
+ }
256
+ if (child.type === 'em_close') {
257
+ italicsDepth = Math.max(0, italicsDepth - 1);
258
+ continue;
259
+ }
260
+ if (child.type === 'link_open') {
261
+ linkStack.push({
262
+ href: child.attrGet('href') || '',
263
+ children: [],
264
+ });
265
+ continue;
266
+ }
267
+ if (child.type === 'link_close') {
268
+ closeCurrentLink(output, linkStack, style);
269
+ continue;
270
+ }
271
+ if (child.type === 'text') {
272
+ if (child.content) {
273
+ pushInlineNode(output, linkStack, createInlineTextRun(child.content, style));
274
+ }
275
+ continue;
276
+ }
277
+ if (child.type === 'code_inline') {
278
+ pushInlineNode(output, linkStack, createInlineTextRun(child.content || '', { ...style, code: true }));
279
+ continue;
280
+ }
281
+ if (child.type === 'softbreak' || child.type === 'hardbreak') {
282
+ pushInlineNode(output, linkStack, createInlineTextRun('\n', style));
283
+ continue;
284
+ }
285
+ if (child.type === 'image') {
286
+ const alt = child.content || child.attrGet('alt') || 'image';
287
+ pushInlineNode(output, linkStack, createInlineTextRun(`[${alt}]`, style));
288
+ continue;
289
+ }
290
+ if (child.content) {
291
+ pushInlineNode(output, linkStack, createInlineTextRun(child.content, style));
292
+ }
293
+ }
294
+
295
+ while (linkStack.length > 0) {
296
+ closeCurrentLink(output, linkStack, {
297
+ bold: boldDepth > 0,
298
+ italics: italicsDepth > 0,
299
+ });
300
+ }
301
+
302
+ return output;
303
+ }
304
+
68
305
  function collectTocEntries(tokens) {
69
306
  const entries = [];
70
307
  for (let i = 0; i < tokens.length; i += 1) {
@@ -123,9 +360,11 @@ function parseTable(tokens, startIndex) {
123
360
  rows: rows.map(
124
361
  (items) =>
125
362
  new TableRow({
126
- children: items.map((item) =>
363
+ children: items.map((itemRuns) =>
127
364
  new TableCell({
128
- children: [paragraphFromText(item || '')],
365
+ children: [new Paragraph({
366
+ children: itemRuns.length > 0 ? itemRuns : [new TextRun('')],
367
+ })],
129
368
  }),
130
369
  ),
131
370
  }),
@@ -146,17 +385,17 @@ function parseTable(tokens, startIndex) {
146
385
  }
147
386
 
148
387
  if (token.type === 'th_open' || token.type === 'td_open') {
149
- currentCell = '';
388
+ currentCell = [];
150
389
  continue;
151
390
  }
152
391
 
153
392
  if (token.type === 'inline' && currentCell !== null) {
154
- currentCell += inlineToPlainText(token);
393
+ currentCell.push(...inlineToDocxRuns(token));
155
394
  continue;
156
395
  }
157
396
 
158
397
  if (token.type === 'th_close' || token.type === 'td_close') {
159
- row.push(currentCell || '');
398
+ row.push(currentCell || []);
160
399
  currentCell = null;
161
400
  }
162
401
  }
@@ -167,10 +406,10 @@ function parseTable(tokens, startIndex) {
167
406
  };
168
407
  }
169
408
 
170
- function markdownToDocx(markdown) {
409
+ export function markdownToDocx(markdown) {
171
410
  const tokens = markdownParser.parse(markdown, {});
172
411
  const tocEntries = collectTocEntries(tokens);
173
- const children = [];
412
+ const children = [...buildDocxTocParagraphs(tocEntries)];
174
413
  const listStack = [];
175
414
  let pendingBlock = null;
176
415
 
@@ -237,15 +476,15 @@ function markdownToDocx(markdown) {
237
476
  continue;
238
477
  }
239
478
 
240
- const text = inlineToPlainText(token).trim();
241
- if (!text) {
479
+ const runs = inlineToDocxRuns(token);
480
+ if (runs.length === 0) {
242
481
  continue;
243
482
  }
244
483
 
245
484
  if (pendingBlock?.type === 'heading') {
246
485
  children.push(
247
486
  new Paragraph({
248
- text,
487
+ children: runs,
249
488
  heading: headingLevelFromNumber(pendingBlock.level),
250
489
  }),
251
490
  );
@@ -256,11 +495,13 @@ function markdownToDocx(markdown) {
256
495
  if (listStack.length > 0) {
257
496
  const top = listStack[listStack.length - 1];
258
497
  const prefix = top.type === 'ordered' ? `${top.count}. ` : '• ';
259
- children.push(paragraphFromText(`${prefix}${text}`));
498
+ children.push(new Paragraph({
499
+ children: [new TextRun(prefix), ...runs],
500
+ }));
260
501
  continue;
261
502
  }
262
503
 
263
- children.push(paragraphFromText(text));
504
+ children.push(new Paragraph({ children: runs }));
264
505
  pendingBlock = null;
265
506
  }
266
507
 
@@ -431,6 +672,24 @@ export async function checkPuppeteer() {
431
672
  }
432
673
  }
433
674
 
675
+ export function getPandocInstallHint(platform = process.platform) {
676
+ if (platform === 'darwin') {
677
+ return 'brew install pandoc';
678
+ }
679
+ if (platform === 'win32') {
680
+ return 'winget install pandoc (or choco install pandoc / scoop install pandoc)';
681
+ }
682
+ return 'apt-get install pandoc (or yum install pandoc)';
683
+ }
684
+
685
+ export function createPandocFallbackWarnings(platform = process.platform) {
686
+ const installHint = getPandocInstallHint(platform);
687
+ return [
688
+ 'pandoc_not_found',
689
+ `⚡ 当前使用基础排版模式。想要更精美的代码高亮和智能表格?一行命令解锁:${installHint}`,
690
+ ];
691
+ }
692
+
434
693
  export async function exportPDF(html, options = {}) {
435
694
  const {
436
695
  pageSize = 'A4',
@@ -438,6 +697,7 @@ export async function exportPDF(html, options = {}) {
438
697
  outDir = os.tmpdir(),
439
698
  fileName = 'output.pdf',
440
699
  htmlPath,
700
+ sandbox,
441
701
  } = options;
442
702
 
443
703
  let puppeteer;
@@ -450,10 +710,13 @@ export async function exportPDF(html, options = {}) {
450
710
 
451
711
  const warnings = [];
452
712
  const pdfPath = path.join(path.resolve(outDir), fileName);
713
+ const resolvedSandboxMode = resolveSandboxMode(sandbox);
714
+ const launchArgs = landscape ? ['--allow-file-access-from-files'] : [];
715
+ launchArgs.push(...getSandboxArgs(resolvedSandboxMode));
453
716
 
454
717
  const browser = await puppeteer.launch({
455
718
  headless: true,
456
- args: landscape ? ['--allow-file-access-from-files'] : [],
719
+ args: launchArgs,
457
720
  });
458
721
  try {
459
722
  const page = await browser.newPage();
@@ -587,8 +850,7 @@ export async function exportDOCX(markdownString, options = {}) {
587
850
  await fs.rm(tempInput, { force: true }).catch(() => {});
588
851
  }
589
852
  } else {
590
- const warning = 'Pandoc not found, using docx fallback. Install Pandoc for better results.';
591
- warnings.push(warning);
853
+ warnings.push(...createPandocFallbackWarnings());
592
854
 
593
855
  const document = markdownToDocx(markdownString);
594
856
  const buffer = await Packer.toBuffer(document);
@@ -677,7 +939,7 @@ export async function exportDOCXFromHTML(htmlString, options = {}) {
677
939
  await fs.rm(tempInput, { force: true }).catch(() => {});
678
940
  }
679
941
  } else {
680
- warnings.push('Pandoc not found, using text-only DOCX fallback.');
942
+ warnings.push(...createPandocFallbackWarnings());
681
943
 
682
944
  const plainText = String(htmlString || '')
683
945
  .replace(/<style[\s\S]*?<\/style>/gi, ' ')
@@ -3,7 +3,7 @@ import path from 'node:path';
3
3
  import { createOutputDir } from '../utils/output.js';
4
4
  import { normalizeOpenMode } from '../utils/preferences.js';
5
5
  import { renderMarkdown } from './renderer.js';
6
- import { exportDOCX, exportPDF } from './exporter.js';
6
+ import { exportDOCX, exportPDF, resolveSandboxMode } from './exporter.js';
7
7
  import { createSlideshow } from './slideshow.js';
8
8
 
9
9
  const MARKDOWN_EXTENSIONS = new Set(['.md', '.markdown', '.mdown', '.mdx']);
@@ -94,7 +94,9 @@ export async function openTarget(targetPath, options = {}) {
94
94
  fetchRemote = true,
95
95
  returnContent = false,
96
96
  maxContentBytes = 50 * 1024 * 1024,
97
+ sandbox,
97
98
  } = options;
99
+ const resolvedSandbox = resolveSandboxMode(sandbox, process.env);
98
100
 
99
101
  const requestedMode = normalizeOpenMode(mode, 'auto');
100
102
  const preferredMode = requestedMode === 'auto'
@@ -258,6 +260,7 @@ export async function openTarget(targetPath, options = {}) {
258
260
  outDir: outputDir,
259
261
  fileName: `${name}.pdf`,
260
262
  htmlPath,
263
+ sandbox: resolvedSandbox,
261
264
  });
262
265
  warnings.push(...pdf.warnings);
263
266
 
@@ -13,7 +13,8 @@ const THEME_DIR = path.join(__dirname, 'themes');
13
13
  const TEMPLATE_PATH = path.resolve(__dirname, '../templates/document.html');
14
14
 
15
15
  const GITHUB_MARKDOWN_CSS_PATH = require.resolve('github-markdown-css/github-markdown.css');
16
- const HIGHLIGHT_CSS_PATH = require.resolve('highlight.js/styles/github.css');
16
+ const HIGHLIGHT_CSS_LIGHT = require.resolve('highlight.js/styles/github.css');
17
+ const HIGHLIGHT_CSS_DARK = require.resolve('highlight.js/styles/github-dark.css');
17
18
 
18
19
  const TOC_BASE_CSS = `
19
20
  .markdown-body :is(h1, h2, h3, h4, h5, h6) {
@@ -165,18 +166,18 @@ async function loadStyles(theme) {
165
166
  const builtInThemePath =
166
167
  themeName === 'dark'
167
168
  ? path.join(THEME_DIR, 'dark.css')
168
- : themeName === 'print'
169
- ? path.join(THEME_DIR, 'print.css')
170
- : path.join(THEME_DIR, 'light.css');
169
+ : path.join(THEME_DIR, 'light.css');
171
170
  const customThemePath =
172
- themeName === 'light' || themeName === 'dark' || themeName === 'print'
171
+ themeName === 'light' || themeName === 'dark'
173
172
  ? null
174
173
  : path.resolve(themeName);
175
174
 
175
+ const highlightCssPath = themeName === 'dark' ? HIGHLIGHT_CSS_DARK : HIGHLIGHT_CSS_LIGHT;
176
+
176
177
  const [builtInTheme, githubCss, highlightCss, customTheme = ''] = await Promise.all([
177
178
  fs.readFile(builtInThemePath, 'utf8'),
178
179
  fs.readFile(GITHUB_MARKDOWN_CSS_PATH, 'utf8'),
179
- fs.readFile(HIGHLIGHT_CSS_PATH, 'utf8'),
180
+ fs.readFile(highlightCssPath, 'utf8'),
180
181
  customThemePath ? fs.readFile(customThemePath, 'utf8') : Promise.resolve(''),
181
182
  ]);
182
183