md-to-mowen 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -165,3 +165,5 @@ npm link
165
165
  ## License
166
166
 
167
167
  MIT
168
+
169
+ # md-to-mowen
package/dist/cli/index.js CHANGED
@@ -120,6 +120,7 @@ program
120
120
  .option('--cache-dir <dir>', '保存各阶段产物的目录(调试用)')
121
121
  .option('--code-block-style <style>', '代码块样式:paragraph 或 codeblock')
122
122
  .option('--no-recursive', '批量发布时不递归扫描子目录', false)
123
+ .option('--quiet', '静默模式:抑制进度条,仅输出最终汇总', false)
123
124
  .action(async (opts) => {
124
125
  const apiKey = getApiKey();
125
126
  if (!apiKey && !opts.dryRun) {
@@ -171,6 +172,7 @@ program
171
172
  dryRun: opts.dryRun,
172
173
  cacheDir: resolvedConfig.cacheDir,
173
174
  recursive: !opts.noRecursive,
175
+ quiet: opts.quiet,
174
176
  });
175
177
  // 批量模式下,有失败则 exit 1
176
178
  if (result.failed > 0) {
@@ -197,6 +199,7 @@ program
197
199
  dryRun: opts.dryRun,
198
200
  cacheDir: resolvedConfig.cacheDir,
199
201
  codeBlockStyle: resolvedConfig.codeBlockStyle,
202
+ quiet: opts.quiet,
200
203
  });
201
204
  if (!result.dryRun && result.noteId) {
202
205
  upsertNote(metaStore, absInput, result.noteId);
@@ -319,4 +322,42 @@ program
319
322
  process.exit(1);
320
323
  }
321
324
  });
325
+ // ── status ───────────────────────────────────────────────────────────────────
326
+ program
327
+ .command('status')
328
+ .description('查看已发布笔记的状态')
329
+ .option('-i, --input <path>', '查看指定文件的发布状态')
330
+ .option('--json', '输出 JSON 格式')
331
+ .action(async (opts) => {
332
+ const metaPath = findMetadataPath();
333
+ const metaStore = readMetadata(metaPath);
334
+ const cwd = process.cwd();
335
+ if (opts.input) {
336
+ // 单文件查询
337
+ const { lookupFileStatus, formatStatusTable, formatStatusJson } = await import('./status.js');
338
+ const absInput = resolve(opts.input);
339
+ const entry = lookupFileStatus(metaStore, absInput, cwd);
340
+ if (!entry) {
341
+ console.error(`未发布:${opts.input}(元数据中无记录)`);
342
+ process.exit(1);
343
+ }
344
+ if (opts.json) {
345
+ console.log(formatStatusJson([entry]));
346
+ }
347
+ else {
348
+ console.log(formatStatusTable([entry]));
349
+ }
350
+ }
351
+ else {
352
+ // 列出所有
353
+ const { listAllNotes, formatStatusTable, formatStatusJson } = await import('./status.js');
354
+ const entries = listAllNotes(metaStore, cwd);
355
+ if (opts.json) {
356
+ console.log(formatStatusJson(entries));
357
+ }
358
+ else {
359
+ console.log(formatStatusTable(entries));
360
+ }
361
+ }
362
+ });
322
363
  program.parse();
@@ -0,0 +1,44 @@
1
+ import { relative } from 'path';
2
+ const MOWEN_NOTE_BASE_URL = 'https://mowen.cn/note';
3
+ /** 将 NoteRecord 转为 StatusEntry,路径转为相对于 cwd 的相对路径 */
4
+ function toEntry(absPath, record, cwd) {
5
+ return {
6
+ file: relative(cwd, absPath),
7
+ noteId: record.noteId,
8
+ noteUrl: `${MOWEN_NOTE_BASE_URL}/${record.noteId}`,
9
+ createdAt: record.createdAt,
10
+ updatedAt: record.updatedAt,
11
+ };
12
+ }
13
+ /** 列出所有已发布文件 */
14
+ export function listAllNotes(store, cwd) {
15
+ return Object.entries(store.notes).map(([absPath, record]) => toEntry(absPath, record, cwd));
16
+ }
17
+ /** 查询单个文件的发布状态,未发布返回 null */
18
+ export function lookupFileStatus(store, absPath, cwd) {
19
+ const record = store.notes[absPath];
20
+ if (!record)
21
+ return null;
22
+ return toEntry(absPath, record, cwd);
23
+ }
24
+ /** 格式化为表格文本 */
25
+ export function formatStatusTable(entries) {
26
+ if (entries.length === 0) {
27
+ return '暂无已发布笔记。';
28
+ }
29
+ // 计算列宽
30
+ const fileWidth = Math.max(6, ...entries.map((e) => e.file.length));
31
+ const idWidth = Math.max(8, ...entries.map((e) => e.noteId.length));
32
+ const header = `${'文件'.padEnd(fileWidth)} ${'笔记 ID'.padEnd(idWidth)} 更新时间`;
33
+ const divider = `${'─'.repeat(fileWidth)} ${'─'.repeat(idWidth)} ${'─'.repeat(24)}`;
34
+ const rows = entries.map((e) => {
35
+ const time = e.updatedAt.replace('T', ' ').replace(/\.\d{3}Z$/, '');
36
+ return `${e.file.padEnd(fileWidth)} ${e.noteId.padEnd(idWidth)} ${time}`;
37
+ });
38
+ const links = entries.map((e) => ` → ${e.noteUrl}`);
39
+ return [header, divider, ...rows, '', ...links].join('\n');
40
+ }
41
+ /** 格式化为 JSON */
42
+ export function formatStatusJson(entries) {
43
+ return JSON.stringify(entries, null, 2) + '\n';
44
+ }
@@ -37,6 +37,10 @@ function serializeBlock(block, doc) {
37
37
  return serializeAudio(block);
38
38
  case 'codeblock':
39
39
  return serializeCodeBlock(block);
40
+ case 'note':
41
+ return serializeNote(block);
42
+ case 'pdf':
43
+ return serializePdf(block);
40
44
  }
41
45
  }
42
46
  function serializeParagraph(block) {
@@ -70,6 +74,12 @@ function serializeCodeBlock(block) {
70
74
  const lang = block.language || '';
71
75
  return '```' + lang + '\n' + block.content + '\n```';
72
76
  }
77
+ function serializeNote(block) {
78
+ return `![[note:${block.noteId}]]`;
79
+ }
80
+ function serializePdf(block) {
81
+ return `![[pdf:${block.src}]]`;
82
+ }
73
83
  function serializeTextRun(run) {
74
84
  if (!run.marks)
75
85
  return run.text;
@@ -91,6 +101,9 @@ function serializeTextRun(run) {
91
101
  // italic
92
102
  if (run.marks.italic)
93
103
  text = `*${text}*`;
104
+ // highlight
105
+ if (run.marks.highlight)
106
+ text = `==${text}==`;
94
107
  // link 最后包裹(outermost)
95
108
  if (run.marks.link)
96
109
  text = `[${text}](${run.marks.link})`;
@@ -19,12 +19,14 @@ function mimeFromFileName(fileName) {
19
19
  /**
20
20
  * 上传本地图片文件,返回 fileId。
21
21
  * 使用两步 OSS 上传流程:prepare → multipart POST。
22
+ *
23
+ * @param alt 可选的替代文本,用作上传文件名。为空时不传文件名(图片不带标题)。
22
24
  */
23
- export async function uploadLocalFile(filePath, client) {
24
- const fileName = basename(filePath);
25
+ export async function uploadLocalFile(filePath, client, alt) {
26
+ const fileName = alt?.trim() || '';
25
27
  const fileBuffer = await readFile(filePath);
26
28
  const form = await client.uploadPrepare(1, fileName);
27
- await ossUpload(form, fileBuffer, fileName);
29
+ await ossUpload(form, fileBuffer, fileName || basename(filePath));
28
30
  return form['x:file_id'];
29
31
  }
30
32
  /**
@@ -36,18 +38,21 @@ export async function uploadRemoteUrl(url, client) {
36
38
  /**
37
39
  * 上传 Data URI 图片,返回 fileId。
38
40
  * 支持 data:image/png;base64,... 格式。
41
+ *
42
+ * @param alt 可选的替代文本,用作上传文件名。为空时不传文件名(图片不带标题)。
39
43
  */
40
- export async function uploadDataUri(dataUri, client) {
44
+ export async function uploadDataUri(dataUri, client, alt) {
41
45
  const match = dataUri.match(/^data:([^;]+);base64,(.+)$/);
42
46
  if (!match)
43
47
  throw new Error(`Invalid data URI: ${dataUri.slice(0, 40)}`);
44
48
  const mime = match[1] ?? 'image/png';
45
49
  const b64 = match[2] ?? '';
46
50
  const ext = mime.split('/')[1] ?? 'png';
47
- const fileName = `image.${ext}`;
51
+ const uploadFileName = alt?.trim() || '';
52
+ const ossFileName = `image.${ext}`;
48
53
  const buffer = Buffer.from(b64, 'base64');
49
- const form = await client.uploadPrepare(1, fileName);
50
- await ossUpload(form, buffer, fileName);
54
+ const form = await client.uploadPrepare(1, uploadFileName);
55
+ await ossUpload(form, buffer, ossFileName);
51
56
  return form['x:file_id'];
52
57
  }
53
58
  /**
@@ -26,6 +26,10 @@ function convertBlock(block, doc) {
26
26
  return [convertAudio(block)];
27
27
  case 'codeblock':
28
28
  return [convertCodeBlock(block)];
29
+ case 'note':
30
+ return [convertNote(block)];
31
+ case 'pdf':
32
+ return [convertPdf(block)];
29
33
  }
30
34
  }
31
35
  function convertParagraph(block) {
@@ -90,13 +94,32 @@ function convertCodeBlock(block) {
90
94
  content: block.content,
91
95
  };
92
96
  }
97
+ function convertNote(block) {
98
+ return {
99
+ type: 'note',
100
+ attrs: {
101
+ uuid: block.noteId,
102
+ },
103
+ };
104
+ }
105
+ function convertPdf(block) {
106
+ if (!block.uuid) {
107
+ throw new Error(`MASTPdfBlock ${block.id} has no uuid — run asset processing before serialization`);
108
+ }
109
+ return {
110
+ type: 'pdf',
111
+ attrs: {
112
+ uuid: block.uuid,
113
+ },
114
+ };
115
+ }
93
116
  // ── 行内节点转换 ───────────────────────────────────────────────────────────────
94
117
  function convertTextRun(run) {
95
118
  const node = { type: 'text', text: run.text };
96
119
  if (!run.marks)
97
120
  return node;
98
121
  const marks = [];
99
- // 按优先级顺序:code → strikethrough → bold → italic → link
122
+ // 按优先级顺序:code → strikethrough → bold → italic → highlight → link
100
123
  if (run.marks.code)
101
124
  marks.push({ type: 'code' });
102
125
  if (run.marks.strikethrough)
@@ -105,6 +128,8 @@ function convertTextRun(run) {
105
128
  marks.push({ type: 'bold' });
106
129
  if (run.marks.italic)
107
130
  marks.push({ type: 'italic' });
131
+ if (run.marks.highlight)
132
+ marks.push({ type: 'highlight' });
108
133
  if (run.marks.link)
109
134
  marks.push({ type: 'link', attrs: { href: run.marks.link } });
110
135
  if (marks.length > 0)
@@ -29,6 +29,10 @@ function convertNode(node, blocks) {
29
29
  return convertAudio(node, blocks);
30
30
  case 'codeblock':
31
31
  return convertCodeBlock(node, blocks);
32
+ case 'note':
33
+ return convertNote(node, blocks);
34
+ case 'pdf':
35
+ return convertPdf(node, blocks);
32
36
  }
33
37
  }
34
38
  function convertParagraph(block, blocks) {
@@ -85,6 +89,27 @@ function convertCodeBlock(block, blocks) {
85
89
  blocks[id] = mast;
86
90
  return id;
87
91
  }
92
+ function convertNote(block, blocks) {
93
+ const id = newId();
94
+ const mast = {
95
+ id,
96
+ type: 'note',
97
+ noteId: block.attrs.uuid,
98
+ };
99
+ blocks[id] = mast;
100
+ return id;
101
+ }
102
+ function convertPdf(block, blocks) {
103
+ const id = newId();
104
+ const mast = {
105
+ id,
106
+ type: 'pdf',
107
+ src: `mowen://file/${block.attrs.uuid}`,
108
+ uuid: block.attrs.uuid,
109
+ };
110
+ blocks[id] = mast;
111
+ return id;
112
+ }
88
113
  // ── 行内节点转换 ───────────────────────────────────────────────────────────────
89
114
  function convertTextRun(run) {
90
115
  const mast = { type: 'text', text: run.text };
@@ -105,6 +130,9 @@ function convertTextRun(run) {
105
130
  case 'strikethrough':
106
131
  marks.strikethrough = true;
107
132
  break;
133
+ case 'highlight':
134
+ marks.highlight = true;
135
+ break;
108
136
  case 'link':
109
137
  marks.link = mark.attrs.href;
110
138
  break;
@@ -1,3 +1,4 @@
1
+ import { HIGHLIGHT_MARKER } from './md-to-hast.js';
1
2
  // ── ID 生成 ────────────────────────────────────────────────────────────────────
2
3
  let _counter = 0;
3
4
  function newId() {
@@ -18,21 +19,65 @@ function getTagName(node) {
18
19
  return isElement(node) ? node.tagName : null;
19
20
  }
20
21
  // ── 行内节点提取 ───────────────────────────────────────────────────────────────
22
+ /**
23
+ * 处理含高亮标记(⸻)的文本节点。
24
+ * 将文本按 marker 分割,根据当前高亮状态决定哪些段落加 highlight mark。
25
+ *
26
+ * 例(hlActive=false):"普通 ⸻高亮⸻ 文本"
27
+ * → [{ text: "普通" }, { text: "高亮", marks: { highlight } }, { text: " 文本" }]
28
+ *
29
+ * 例(hlActive=true,从前节点继承):"⸻ 混合。"
30
+ * → [{ text: " 混合。" }] ← marker 关闭高亮,后续文本正常
31
+ *
32
+ * @param hlActive 从前序节点继承的高亮状态
33
+ * @returns runs 生成的 TextRun 列表
34
+ * @returns lastHighlighted 末段是否仍处于高亮状态(供后续节点继承)
35
+ */
36
+ function processTextWithHighlight(text, marks, hlActive) {
37
+ const parts = text.split(HIGHLIGHT_MARKER);
38
+ const runs = [];
39
+ for (let i = 0; i < parts.length; i++) {
40
+ // 每个 marker 切换高亮状态(无论当前段是否为空)
41
+ if (i > 0)
42
+ hlActive = !hlActive;
43
+ const part = parts[i];
44
+ if (!part)
45
+ continue; // 跳过空段(marker 相邻或位于首尾)
46
+ const run = { type: 'text', text: part };
47
+ const merged = { ...marks, ...(hlActive ? { highlight: true } : {}) };
48
+ if (Object.keys(merged).length > 0) {
49
+ run.marks = merged;
50
+ }
51
+ runs.push(run);
52
+ }
53
+ return { runs, lastHighlighted: hlActive };
54
+ }
21
55
  /**
22
56
  * 递归遍历 HAST 行内节点树,将嵌套标记展平为 MASTTextRun 列表。
23
57
  * 例:<strong><em>text</em></strong> → [{ text, marks: { bold, italic } }]
58
+ *
59
+ * @param highlightActive 是否继承前序节点的高亮状态(用于跨节点高亮)
24
60
  */
25
- function extractInline(nodes, marks = {}) {
61
+ function extractInline(nodes, marks = {}, highlightActive = false) {
26
62
  const runs = [];
63
+ let hlActive = highlightActive;
27
64
  for (const node of nodes) {
28
65
  if (isText(node)) {
29
66
  if (node.value) {
30
- const run = { type: 'text', text: node.value };
31
- const activeMarks = { ...marks };
32
- if (Object.keys(activeMarks).length > 0) {
33
- run.marks = activeMarks;
67
+ if (node.value.includes(HIGHLIGHT_MARKER)) {
68
+ // 含高亮 marker:分割处理
69
+ const result = processTextWithHighlight(node.value, marks, hlActive);
70
+ runs.push(...result.runs);
71
+ hlActive = result.lastHighlighted;
72
+ }
73
+ else {
74
+ const run = { type: 'text', text: node.value };
75
+ const activeMarks = hlActive ? { ...marks, highlight: true } : { ...marks };
76
+ if (Object.keys(activeMarks).length > 0) {
77
+ run.marks = activeMarks;
78
+ }
79
+ runs.push(run);
34
80
  }
35
- runs.push(run);
36
81
  }
37
82
  continue;
38
83
  }
@@ -53,6 +98,9 @@ function extractInline(nodes, marks = {}) {
53
98
  case 'del':
54
99
  childMarks.strikethrough = true;
55
100
  break;
101
+ case 'mark':
102
+ childMarks.highlight = true;
103
+ break;
56
104
  case 'a': {
57
105
  const href = node.properties?.href ?? '';
58
106
  // 忽略纯锚点链接(fragment links)
@@ -68,7 +116,13 @@ function extractInline(nodes, marks = {}) {
68
116
  case 'img':
69
117
  continue;
70
118
  }
71
- runs.push(...extractInline(node.children ?? [], childMarks));
119
+ const childRuns = extractInline(node.children ?? [], childMarks, hlActive);
120
+ runs.push(...childRuns);
121
+ // 更新 hlActive:如果子节点处理后仍处于高亮状态,继承下去
122
+ const lastRun = childRuns[childRuns.length - 1];
123
+ if (lastRun) {
124
+ hlActive = !!lastRun.marks?.highlight;
125
+ }
72
126
  }
73
127
  return runs;
74
128
  }
@@ -104,6 +158,7 @@ function marksEqual(a, b) {
104
158
  a.italic === b.italic &&
105
159
  a.code === b.code &&
106
160
  a.strikethrough === b.strikethrough &&
161
+ a.highlight === b.highlight &&
107
162
  a.link === b.link);
108
163
  }
109
164
  // ── 块节点转换 ─────────────────────────────────────────────────────────────────
@@ -117,7 +172,7 @@ function makeEmptyParagraph() {
117
172
  * 将 HAST 元素转换为一组 MAST 块节点。
118
173
  * 返回数组是因为某些元素(如列表)会展开为多个块。
119
174
  */
120
- function convertBlock(node, doc, opts = {}) {
175
+ function convertBlock(node, doc, opts = {}, warnings = []) {
121
176
  const tag = node.tagName;
122
177
  // ── 标题 ──────────────────────────────────────────────────────────────────
123
178
  if (/^h[1-6]$/.test(tag)) {
@@ -125,6 +180,7 @@ function convertBlock(node, doc, opts = {}) {
125
180
  // H1–H6 全部 → bold paragraph
126
181
  const extraMarks = { bold: true };
127
182
  const content = extractInlineContent(node, extraMarks);
183
+ warnings.push({ type: 'heading', message: `H${level} 标题转换为加粗段落(墨问不支持标题层级)`, source: tag });
128
184
  return [makeParagraph(content)];
129
185
  }
130
186
  // ── 段落 ──────────────────────────────────────────────────────────────────
@@ -145,6 +201,28 @@ function convertBlock(node, doc, opts = {}) {
145
201
  };
146
202
  return [block];
147
203
  }
204
+ // 内链笔记约定:alt 以 "note:" 开头,src 以 "note:" 开头
205
+ if (alt.startsWith('note:') && src.startsWith('note:')) {
206
+ const noteId = alt.slice('note:'.length).trim();
207
+ const block = {
208
+ id: newId(),
209
+ type: 'note',
210
+ noteId,
211
+ };
212
+ return [block];
213
+ }
214
+ // PDF 嵌入约定:alt 以 "pdf:" 开头,src 以 "pdf:" 开头
215
+ if (alt.startsWith('pdf:') && src.startsWith('pdf:')) {
216
+ const pdfSrc = src.slice('pdf:'.length).trim();
217
+ const block = {
218
+ id: newId(),
219
+ type: 'pdf',
220
+ src: pdfSrc,
221
+ // 如果 src 看起来像 fileId(包含 -TMP 后缀),直接设为 uuid
222
+ ...(pdfSrc.includes('-TMP') ? { uuid: pdfSrc } : {}),
223
+ };
224
+ return [block];
225
+ }
148
226
  const block = {
149
227
  id: newId(),
150
228
  type: 'image',
@@ -164,7 +242,7 @@ function convertBlock(node, doc, opts = {}) {
164
242
  for (const child of node.children) {
165
243
  if (!isElement(child))
166
244
  continue;
167
- const childBlocks = convertBlock(child, doc, opts);
245
+ const childBlocks = convertBlock(child, doc, opts, warnings);
168
246
  for (const b of childBlocks) {
169
247
  doc.blocks[b.id] = b;
170
248
  childIds.push(b.id);
@@ -179,11 +257,11 @@ function convertBlock(node, doc, opts = {}) {
179
257
  }
180
258
  // ── 无序列表 ──────────────────────────────────────────────────────────────
181
259
  if (tag === 'ul') {
182
- return convertList(node, doc, opts, false, 0);
260
+ return convertList(node, doc, opts, false, 0, warnings);
183
261
  }
184
262
  // ── 有序列表 ──────────────────────────────────────────────────────────────
185
263
  if (tag === 'ol') {
186
- return convertList(node, doc, opts, true, 0);
264
+ return convertList(node, doc, opts, true, 0, warnings);
187
265
  }
188
266
  // ── 代码块 ────────────────────────────────────────────────────────────────
189
267
  if (tag === 'pre') {
@@ -242,11 +320,21 @@ function convertBlock(node, doc, opts = {}) {
242
320
  const blocks = [];
243
321
  for (const child of node.children) {
244
322
  if (isElement(child)) {
245
- blocks.push(...convertBlock(child, doc, opts));
323
+ blocks.push(...convertBlock(child, doc, opts, warnings));
246
324
  }
247
325
  }
248
326
  return blocks;
249
327
  }
328
+ // ── 脚注检测 ────────────────────────────────────────────────────────────
329
+ if (tag === 'sup' && node.properties?.id && String(node.properties.id).startsWith('fnref-')) {
330
+ warnings.push({ type: 'footnote', message: '脚注引用转换为纯文本(墨问不支持脚注)', source: 'footnote-ref' });
331
+ const content = extractInlineContent(node);
332
+ return content.length > 0 ? [makeParagraph(content)] : [];
333
+ }
334
+ if (tag === 'section' && node.properties?.['data-footnotes'] !== undefined) {
335
+ warnings.push({ type: 'footnote', message: '脚注定义被丢弃(墨问不支持脚注)', source: 'footnote-def' });
336
+ return [];
337
+ }
250
338
  // 其他未知标签:尝试提取文本
251
339
  const content = extractInlineContent(node);
252
340
  if (content.length > 0) {
@@ -255,13 +343,22 @@ function convertBlock(node, doc, opts = {}) {
255
343
  return [];
256
344
  }
257
345
  // ── 列表转换 ───────────────────────────────────────────────────────────────────
258
- function convertList(listEl, doc, opts, ordered, depth) {
346
+ function convertList(listEl, doc, opts, ordered, depth, warnings = []) {
259
347
  const blocks = [];
260
348
  let itemIndex = 1;
261
349
  const indent = ' '.repeat(depth);
262
350
  for (const child of listEl.children) {
263
351
  if (!isElement(child) || child.tagName !== 'li')
264
352
  continue;
353
+ // 检测 task list(li 内含 <input type="checkbox">)
354
+ const hasCheckbox = child.children.some((c) => isElement(c) && c.tagName === 'input' && c.properties?.type === 'checkbox');
355
+ if (hasCheckbox) {
356
+ warnings.push({
357
+ type: 'task-list',
358
+ message: '任务列表项转换为纯文本(墨问不支持 checkbox)',
359
+ source: 'task-list',
360
+ });
361
+ }
265
362
  const prefix = ordered ? `${indent}${itemIndex}. ` : `${indent}• `;
266
363
  itemIndex++;
267
364
  // li 内容可能包含段落和嵌套列表
@@ -275,10 +372,10 @@ function convertList(listEl, doc, opts, ordered, depth) {
275
372
  continue;
276
373
  }
277
374
  if (liChild.tagName === 'ul') {
278
- nestedBlocks.push(...convertList(liChild, doc, opts, false, depth + 1));
375
+ nestedBlocks.push(...convertList(liChild, doc, opts, false, depth + 1, warnings));
279
376
  }
280
377
  else if (liChild.tagName === 'ol') {
281
- nestedBlocks.push(...convertList(liChild, doc, opts, true, depth + 1));
378
+ nestedBlocks.push(...convertList(liChild, doc, opts, true, depth + 1, warnings));
282
379
  }
283
380
  else if (liChild.tagName === 'p') {
284
381
  paragraphContent.push(...extractInlineContent(liChild));
@@ -345,17 +442,37 @@ function tableToMarkdown(tableEl) {
345
442
  *
346
443
  * @param hast HAST Root 节点
347
444
  * @param opts 转换选项
445
+ * @returns 包含文档和转换警告的结果对象
348
446
  */
349
447
  export function hastToMast(hast, opts = {}) {
350
448
  const doc = { blocks: {}, topLevel: [] };
449
+ const warnings = [];
450
+ let prevEndLine = null;
351
451
  for (const node of hast.children) {
452
+ // ── 空行插入:根据相邻元素的行号差推断空行数量 ─────────────────────
453
+ if (isElement(node)) {
454
+ const startLine = node.position?.start?.line;
455
+ if (startLine != null && prevEndLine != null) {
456
+ const gap = startLine - prevEndLine;
457
+ // gap > 1 表示中间有空行,每个空行对应一个空段落
458
+ for (let i = 1; i < gap; i++) {
459
+ const emptyBlock = makeEmptyParagraph();
460
+ doc.blocks[emptyBlock.id] = emptyBlock;
461
+ doc.topLevel.push(emptyBlock.id);
462
+ }
463
+ }
464
+ }
352
465
  if (!isElement(node))
353
466
  continue;
354
- const blocks = convertBlock(node, doc, opts);
467
+ const blocks = convertBlock(node, doc, opts, warnings);
355
468
  for (const block of blocks) {
356
469
  doc.blocks[block.id] = block;
357
470
  doc.topLevel.push(block.id);
358
471
  }
472
+ // 记录当前元素的结束行号
473
+ if (node.position?.end?.line != null) {
474
+ prevEndLine = node.position.end.line;
475
+ }
359
476
  }
360
- return doc;
477
+ return { doc, warnings };
361
478
  }
@@ -2,13 +2,78 @@ import { unified } from 'unified';
2
2
  import remarkParse from 'remark-parse';
3
3
  import remarkGfm from 'remark-gfm';
4
4
  import remarkRehype from 'remark-rehype';
5
+ /** 高亮标记字符(U+2E3B THREE-EM DASH),用作 ==text== 的中间占位符 */
6
+ export const HIGHLIGHT_MARKER = '⸻';
7
+ /**
8
+ * 预处理 Markdown 中的 ==text== 高亮语法。
9
+ * 将 `==内容==` 替换为 `⸻内容⸻`(marker 包裹),
10
+ * 供后续 extractInline 阶段识别并转换为 highlight mark。
11
+ */
12
+ function preprocessHighlight(markdown) {
13
+ return markdown.replace(/==([^\n=]+?)==/g, `${HIGHLIGHT_MARKER}$1${HIGHLIGHT_MARKER}`);
14
+ }
5
15
  /**
6
16
  * 将 Markdown 字符串解析为 HAST(Hypertext AST)。
7
17
  * 使用 unified + remark-parse + remark-gfm + remark-rehype。
8
18
  */
9
19
  export function mdToHast(markdown) {
10
- const processor = unified().use(remarkParse).use(remarkGfm).use(remarkRehype, { allowDangerousHtml: false });
11
- const mdast = processor.parse(markdown);
20
+ // 预处理:==text== <mark>text</mark>
21
+ let preprocessed = preprocessHighlight(markdown);
22
+ // 预处理:![[note:noteId]] → ![note:noteId](note:noteId)
23
+ preprocessed = preprocessed.replace(/!\[\[note:([^\]]+)\]\]/g, '![note:$1](note:$1)');
24
+ // 预处理:![[pdf:path]] → ![pdf:path](pdf:path)
25
+ preprocessed = preprocessed.replace(/!\[\[pdf:([^\]]+)\]\]/g, '![pdf:$1](pdf:$1)');
26
+ const processor = unified().use(remarkParse).use(remarkGfm).use(remarkRehype, { allowDangerousHtml: true });
27
+ const mdast = processor.parse(preprocessed);
12
28
  const hast = processor.runSync(mdast);
29
+ // 后处理:将 raw <mark> 节点转为 HAST element 节点
30
+ convertRawMarkToElements(hast);
13
31
  return hast;
14
32
  }
33
+ /** 将 HAST 中的 raw <mark>/<mark> 节点转为 element 节点 */
34
+ function convertRawMarkToElements(node) {
35
+ if (node.type !== 'element' && node.type !== 'root')
36
+ return;
37
+ const parent = node;
38
+ for (let i = 0; i < parent.children.length; i++) {
39
+ const child = parent.children[i];
40
+ if (!child)
41
+ continue;
42
+ // 递归处理子节点
43
+ if (child.type === 'element' || child.type === 'root') {
44
+ convertRawMarkToElements(child);
45
+ continue;
46
+ }
47
+ // 处理 raw 节点:寻找 <mark>...</mark> 配对
48
+ const childValue = child.type === 'raw' ? child.value : null;
49
+ if (childValue === '<mark>') {
50
+ // 收集 <mark> 和 </mark> 之间的所有节点
51
+ const innerNodes = [];
52
+ let j = i + 1;
53
+ while (j < parent.children.length) {
54
+ const next = parent.children[j];
55
+ if (!next) {
56
+ j++;
57
+ continue;
58
+ }
59
+ const nextValue = next.type === 'raw' ? next.value : null;
60
+ if (nextValue === '</mark>')
61
+ break;
62
+ if (next.type === 'element' || next.type === 'text') {
63
+ innerNodes.push(next);
64
+ }
65
+ j++;
66
+ }
67
+ if (j < parent.children.length) {
68
+ // 找到了配对的 </mark>,替换为 element 节点
69
+ const markElement = {
70
+ type: 'element',
71
+ tagName: 'mark',
72
+ properties: {},
73
+ children: innerNodes,
74
+ };
75
+ parent.children.splice(i, j - i + 1, markElement);
76
+ }
77
+ }
78
+ }
79
+ }
@@ -0,0 +1,14 @@
1
+ /**
2
+ * remark 插件:解析 ==高亮文本== 语法。
3
+ *
4
+ * 策略:在 Markdown 预处理阶段将 ==text== 替换为 <mark>text</mark>,
5
+ * 经 remark-rehype(allowDangerousHtml)后保留为 HAST 元素,
6
+ * 最终在 hast-to-mast 阶段的 extractInline 中转为 highlight 标记。
7
+ */
8
+ /** 预处理:将 ==text== 替换为 <mark>text</mark> */
9
+ export function preprocessHighlight(markdown) {
10
+ // 匹配 ==...==,不跨行,非贪婪匹配
11
+ return markdown.replace(/==([^=\n]+?)==/g, (_match, inner) => {
12
+ return `<mark>${inner}</mark>`;
13
+ });
14
+ }
@@ -27,8 +27,9 @@ export async function processAssets(doc, client, opts = {}) {
27
27
  const { baseDir = process.cwd(), dryRun = false } = opts;
28
28
  const imageBlocks = Object.values(doc.blocks).filter((b) => b.type === 'image');
29
29
  const audioBlocks = Object.values(doc.blocks).filter((b) => b.type === 'audio');
30
- // 并发上传,最多 3 个同时进行
31
- await concurrentMap([...imageBlocks, ...audioBlocks], 3, async (block) => {
30
+ const pdfBlocks = Object.values(doc.blocks).filter((b) => b.type === 'pdf' && !b.uuid);
31
+ // 串行上传,每次间隔 1.1s
32
+ await concurrentMap([...imageBlocks, ...audioBlocks, ...pdfBlocks], 3, async (block) => {
32
33
  if (dryRun) {
33
34
  block.uuid = `dry-run-${block.id}`;
34
35
  return;
@@ -36,11 +37,54 @@ export async function processAssets(doc, client, opts = {}) {
36
37
  if (block.type === 'audio') {
37
38
  block.uuid = await uploadAudioBlock(block, client, baseDir);
38
39
  }
40
+ else if (block.type === 'pdf') {
41
+ block.uuid = await uploadPdfBlock(block, client, baseDir);
42
+ }
39
43
  else {
40
44
  block.uuid = await uploadBlock(block, client, baseDir);
41
45
  }
42
46
  });
43
47
  }
48
+ async function uploadPdfBlock(block, client, baseDir) {
49
+ const src = block.src;
50
+ // 远程 URL
51
+ if (src.startsWith('http://') || src.startsWith('https://')) {
52
+ return client.uploadViaUrl(3, src);
53
+ }
54
+ // 本地路径
55
+ const { readFile } = await import('fs/promises');
56
+ const { resolve, isAbsolute, basename } = await import('path');
57
+ const decodedSrc = decodeURIComponent(src);
58
+ const absPath = isAbsolute(decodedSrc) ? decodedSrc : resolve(baseDir, decodedSrc);
59
+ const fileName = basename(absPath);
60
+ const fileBuffer = await readFile(absPath);
61
+ const form = await client.uploadPrepare(3, fileName);
62
+ await withRetry(async () => {
63
+ const formData = new FormData();
64
+ const fields = [
65
+ 'key',
66
+ 'policy',
67
+ 'callback',
68
+ 'success_action_status',
69
+ 'x-oss-credential',
70
+ 'x-oss-date',
71
+ 'x-oss-meta-mo-uid',
72
+ 'x-oss-signature',
73
+ 'x-oss-signature-version',
74
+ 'x:file_id',
75
+ 'x:file_name',
76
+ 'x:file_uid',
77
+ ];
78
+ for (const field of fields) {
79
+ formData.append(field, form[field]);
80
+ }
81
+ formData.append('file', new Blob([fileBuffer], { type: 'application/pdf' }), fileName);
82
+ const res = await fetch(form.endpoint, { method: 'POST', body: formData });
83
+ if (!res.ok)
84
+ throw new Error(`OSS upload failed: ${res.status}`);
85
+ });
86
+ return form['x:file_id'];
87
+ }
44
88
  async function uploadAudioBlock(block, client, baseDir) {
45
89
  const src = block.src;
46
90
  // 远程 URL
@@ -90,7 +134,7 @@ async function uploadBlock(block, client, baseDir) {
90
134
  const src = block.src;
91
135
  // Data URI
92
136
  if (src.startsWith('data:')) {
93
- return uploadDataUri(src, client);
137
+ return uploadDataUri(src, client, block.alt);
94
138
  }
95
139
  // 远程 URL
96
140
  if (src.startsWith('http://') || src.startsWith('https://')) {
@@ -100,7 +144,7 @@ async function uploadBlock(block, client, baseDir) {
100
144
  const { resolve, isAbsolute } = await import('path');
101
145
  const decodedSrc = decodeURIComponent(src);
102
146
  const absPath = isAbsolute(decodedSrc) ? decodedSrc : resolve(baseDir, decodedSrc);
103
- return uploadLocalFile(absPath, client);
147
+ return uploadLocalFile(absPath, client, block.alt);
104
148
  }
105
149
  /**
106
150
  * 上传内存中的 PNG Buffer(表格渲染结果)。
@@ -108,7 +152,8 @@ async function uploadBlock(block, client, baseDir) {
108
152
  */
109
153
  async function uploadPngBuffer(buffer, client) {
110
154
  const fileName = `table-${Date.now()}.png`;
111
- const form = await client.uploadPrepare(1, fileName);
155
+ // 表格截图不带标题(传空文件名)
156
+ const form = await client.uploadPrepare(1, '');
112
157
  await withRetry(async () => {
113
158
  const formData = new FormData();
114
159
  const fields = [
@@ -1,6 +1,7 @@
1
1
  import { readdir, lstat } from 'fs/promises';
2
2
  import { join, resolve, basename, extname } from 'path';
3
3
  import { processFile } from './process-file.js';
4
+ import { createProgressLine } from './progress.js';
4
5
  /**
5
6
  * 扫描目录中的 Markdown 文件
6
7
  * - 递归扫描(除非 recursive = false)
@@ -37,7 +38,7 @@ async function scanDir(dir, files, recursive) {
37
38
  * 批量发布目录中的 Markdown 文件
38
39
  */
39
40
  export async function processDirectory(dirPath, client, opts = {}) {
40
- const { recursive = true } = opts;
41
+ const { recursive = true, quiet = false } = opts;
41
42
  const absDirPath = resolve(dirPath);
42
43
  // 检查目录是否存在
43
44
  const dirStat = await lstat(absDirPath);
@@ -53,11 +54,18 @@ export async function processDirectory(dirPath, client, opts = {}) {
53
54
  }
54
55
  console.log(`发现 ${files.length} 个 Markdown 文件,开始发布...\n`);
55
56
  const results = [];
56
- let index = 0;
57
- for (const filePath of files) {
58
- index++;
57
+ const startTime = Date.now();
58
+ for (let index = 0; index < files.length; index++) {
59
+ const filePath = files[index];
59
60
  const fileName = basename(filePath);
60
- console.log(`[${index}/${files.length}] ${fileName}`);
61
+ const current = index + 1;
62
+ if (quiet) {
63
+ console.log(`[${current}/${files.length}] ${fileName}`);
64
+ }
65
+ else {
66
+ const elapsed = Date.now() - startTime;
67
+ console.log(createProgressLine(current, files.length, elapsed));
68
+ }
61
69
  try {
62
70
  const result = await processFile(filePath, client, opts);
63
71
  results.push({
@@ -66,7 +74,7 @@ export async function processDirectory(dirPath, client, opts = {}) {
66
74
  ...(result.noteId ? { noteId: result.noteId } : {}),
67
75
  ...(result.noteUrl ? { noteUrl: result.noteUrl } : {}),
68
76
  });
69
- console.log(` ✅ 发布成功: ${result.noteUrl ?? '(dry-run)'}\n`);
77
+ console.log(` ✅ ${fileName}`);
70
78
  }
71
79
  catch (err) {
72
80
  const errorMsg = err instanceof Error ? err.message : String(err);
@@ -75,16 +83,17 @@ export async function processDirectory(dirPath, client, opts = {}) {
75
83
  status: 'failed',
76
84
  error: errorMsg,
77
85
  });
78
- console.log(` ❌ 发布失败: ${errorMsg}\n`);
86
+ console.log(` ❌ ${fileName}: ${errorMsg}`);
79
87
  }
80
88
  // 文件间隔 1.1 秒(最后一个文件不等待)
81
- if (index < files.length) {
89
+ if (current < files.length) {
82
90
  await sleep(1100);
83
91
  }
84
92
  }
85
93
  // 汇总报告
94
+ const elapsedMs = Date.now() - startTime;
86
95
  const summary = computeSummary(results);
87
- printSummary(summary);
96
+ printSummary(summary, elapsedMs);
88
97
  return summary;
89
98
  }
90
99
  function computeSummary(files) {
@@ -99,12 +108,16 @@ function computeSummary(files) {
99
108
  files,
100
109
  };
101
110
  }
102
- function printSummary(result) {
111
+ function printSummary(result, elapsedMs) {
103
112
  console.log('── 发布汇总 ──────────────────────────────────────────');
104
113
  console.log(`总计:${result.total} 个文件`);
105
114
  console.log(` ✅ 成功:${result.success}`);
106
115
  console.log(` ❌ 失败:${result.failed}`);
107
116
  console.log(` ⏭️ 跳过:${result.skipped}`);
117
+ if (elapsedMs !== undefined) {
118
+ const seconds = (elapsedMs / 1000).toFixed(1);
119
+ console.log(` ⏱️ 耗时:${seconds}s`);
120
+ }
108
121
  if (result.failed > 0) {
109
122
  console.log('\n失败文件:');
110
123
  for (const f of result.files.filter((f) => f.status === 'failed')) {
@@ -8,7 +8,7 @@ import { mastToNoteAtom } from '../noteatom/from-mast.js';
8
8
  * 单文件完整流水线:Markdown → HAST → MAST → 资源上传 → NoteAtom → 发布
9
9
  */
10
10
  export async function processFile(filePath, client, opts = {}) {
11
- const { noteId, tags, autoPublish = false, dryRun = false, cacheDir, codeBlockStyle } = opts;
11
+ const { noteId, tags, autoPublish = false, dryRun = false, cacheDir, codeBlockStyle, quiet = false } = opts;
12
12
  // ── 阶段 00:读取文件 ────────────────────────────────────────────────────────
13
13
  const absPath = resolve(filePath);
14
14
  const markdown = await readFile(absPath, 'utf8');
@@ -17,8 +17,14 @@ export async function processFile(filePath, client, opts = {}) {
17
17
  const hast = mdToHast(markdown);
18
18
  await writeCache(cacheDir, '01-hast.json', hast);
19
19
  // ── 阶段 02:HAST → MAST ─────────────────────────────────────────────────────
20
- const mast = hastToMast(hast, codeBlockStyle ? { codeBlockStyle } : {});
20
+ const { doc: mast, warnings } = hastToMast(hast, codeBlockStyle ? { codeBlockStyle } : {});
21
21
  await writeCache(cacheDir, '02-mast.json', mast);
22
+ // ── 有损转换警告 ───────────────────────────────────────────────────────────
23
+ if (!quiet && warnings.length > 0) {
24
+ for (const w of warnings) {
25
+ console.warn(`⚠️ ${w.message}`);
26
+ }
27
+ }
22
28
  // ── 阶段 03:资源处理 ────────────────────────────────────────────────────────
23
29
  await processAssets(mast, client, { baseDir, dryRun });
24
30
  await writeCache(cacheDir, '03-mast-with-assets.json', mast);
@@ -29,8 +35,8 @@ export async function processFile(filePath, client, opts = {}) {
29
35
  await writeCache(cacheDir, '04-noteatom.json', noteAtom);
30
36
  // ── dry-run:打印报告,不调用 API ────────────────────────────────────────────
31
37
  if (dryRun) {
32
- printDryRunReport(filePath, stats, noteAtom);
33
- return { dryRun: true, stats };
38
+ printDryRunReport(filePath, stats, noteAtom, warnings);
39
+ return { dryRun: true, stats, warnings };
34
40
  }
35
41
  // ── 阶段 05:发布 ────────────────────────────────────────────────────────────
36
42
  let resultNoteId;
@@ -42,7 +48,7 @@ export async function processFile(filePath, client, opts = {}) {
42
48
  resultNoteId = await client.createNote(noteAtom, { autoPublish, tags: tags ?? [] });
43
49
  }
44
50
  const noteUrl = `https://mowen.cn/note/${resultNoteId}`;
45
- return { noteId: resultNoteId, noteUrl, dryRun: false, stats };
51
+ return { noteId: resultNoteId, noteUrl, dryRun: false, stats, warnings };
46
52
  }
47
53
  // ── 统计 ──────────────────────────────────────────────────────────────────────
48
54
  function collectStats(mast, dryRun) {
@@ -67,6 +73,7 @@ function collectStats(mast, dryRun) {
67
73
  audios++;
68
74
  else if (block.type === 'codeblock')
69
75
  codeblocks++;
76
+ // note 块不计入统计(无需上传资源)
70
77
  }
71
78
  return {
72
79
  paragraphs,
@@ -80,7 +87,7 @@ function collectStats(mast, dryRun) {
80
87
  };
81
88
  }
82
89
  // ── dry-run 报告 ──────────────────────────────────────────────────────────────
83
- function printDryRunReport(filePath, stats, noteAtom) {
90
+ function printDryRunReport(filePath, stats, noteAtom, warnings) {
84
91
  console.log('\n── dry-run 报告 ──────────────────────────────────────────');
85
92
  console.log(`文件:${filePath}`);
86
93
  console.log(`\n流水线统计:`);
@@ -92,6 +99,12 @@ function printDryRunReport(filePath, stats, noteAtom) {
92
99
  console.log(` 代码块: ${stats.codeblocks}`);
93
100
  console.log(` 总块数: ${stats.totalBlocks}`);
94
101
  console.log(` 待上传资源:${stats.images + stats.tables + stats.audios}(dry-run 跳过)`);
102
+ if (warnings.length > 0) {
103
+ console.log(`\n有损转换警告(${warnings.length} 处):`);
104
+ for (const w of warnings) {
105
+ console.log(` ⚠️ ${w.message}`);
106
+ }
107
+ }
95
108
  console.log(`\nNoteAtom 预览(前 3 个块):`);
96
109
  const preview = noteAtom.content.slice(0, 3);
97
110
  console.log(JSON.stringify(preview, null, 2));
@@ -0,0 +1,37 @@
1
+ const BAR_WIDTH = 10;
2
+ /** 构建进度条字符串,如 "██████░░░░" */
3
+ export function buildProgressBar(current, total, width = BAR_WIDTH) {
4
+ const ratio = total > 0 ? current / total : 0;
5
+ const filled = Math.round(ratio * width);
6
+ return '█'.repeat(filled) + '░'.repeat(width - filled);
7
+ }
8
+ /** 格式化 ETA 秒数为人类可读格式,如 "2m30s" */
9
+ export function formatEta(seconds) {
10
+ if (seconds < 60)
11
+ return `${Math.round(seconds)}s`;
12
+ const m = Math.floor(seconds / 60);
13
+ const s = Math.round(seconds % 60);
14
+ return `${m}m${s}s`;
15
+ }
16
+ /**
17
+ * 生成单行进度文本,如:
18
+ * "[██████░░░░] 15/50 (30%) ETA: 2m30s"
19
+ *
20
+ * @param elapsedMs 已耗毫秒数(从第一个文件开始计时)
21
+ */
22
+ export function createProgressLine(current, total, elapsedMs) {
23
+ const bar = buildProgressBar(current, total);
24
+ const pct = Math.round((current / total) * 100);
25
+ if (current >= total) {
26
+ const elapsedSec = Math.round(elapsedMs / 1000);
27
+ return `[${bar}] ${current}/${total} (${pct}%) 完成,耗时 ${formatEta(elapsedSec)}`;
28
+ }
29
+ if (current <= 1) {
30
+ return `[${bar}] ${current}/${total} (${pct}%) ETA: 计算中`;
31
+ }
32
+ // 根据已耗时间推算剩余
33
+ const avgPerFile = elapsedMs / current;
34
+ const remaining = avgPerFile * (total - current);
35
+ const etaSec = Math.round(remaining / 1000);
36
+ return `[${bar}] ${current}/${total} (${pct}%) ETA: ${formatEta(etaSec)}`;
37
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "md-to-mowen",
3
- "version": "1.3.1",
3
+ "version": "1.4.0",
4
4
  "description": "将 Markdown(GFM)转换为墨问笔记的 CLI 工具",
5
5
  "type": "module",
6
6
  "bin": {
@@ -26,7 +26,7 @@
26
26
  "build": "tsc -p tsconfig.json && sh scripts/postbuild.sh && chmod +x dist/cli/index.js",
27
27
  "release": "semantic-release",
28
28
  "prepare": "husky",
29
- "postinstall": "npx playwright install chromium"
29
+ "install:browser": "npx playwright install chromium"
30
30
  },
31
31
  "keywords": [
32
32
  "markdown",
@@ -44,8 +44,7 @@
44
44
  "unist-util-visit": "^5.0.0",
45
45
  "hast-util-to-string": "^3.0.0",
46
46
  "commander": "^12.0.0",
47
- "dotenv": "^16.0.0",
48
- "playwright": "^1.40.0"
47
+ "dotenv": "^16.0.0"
49
48
  },
50
49
  "devDependencies": {
51
50
  "@semantic-release/changelog": "^6.0.3",
@@ -61,6 +60,7 @@
61
60
  "semantic-release": "^24.0.0",
62
61
  "tsx": "^4.0.0",
63
62
  "typescript": "^5.0.0",
64
- "vitest": "^1.0.0"
63
+ "vitest": "^1.0.0",
64
+ "playwright": "^1.40.0"
65
65
  }
66
66
  }