castdown-cleaners 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +180 -0
- package/README.md +198 -0
- package/dist/index.d.ts +47 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +110 -0
- package/dist/index.js.map +1 -0
- package/dist/regex/annotate-figures-tables.d.ts +3 -0
- package/dist/regex/annotate-figures-tables.d.ts.map +1 -0
- package/dist/regex/annotate-figures-tables.js +11 -0
- package/dist/regex/annotate-figures-tables.js.map +1 -0
- package/dist/regex/collapse-blank-lines.d.ts +6 -0
- package/dist/regex/collapse-blank-lines.d.ts.map +1 -0
- package/dist/regex/collapse-blank-lines.js +8 -0
- package/dist/regex/collapse-blank-lines.js.map +1 -0
- package/dist/regex/collapse-redundant-emphasis.d.ts +2 -0
- package/dist/regex/collapse-redundant-emphasis.d.ts.map +1 -0
- package/dist/regex/collapse-redundant-emphasis.js +19 -0
- package/dist/regex/collapse-redundant-emphasis.js.map +1 -0
- package/dist/regex/decode-html-entities.d.ts +2 -0
- package/dist/regex/decode-html-entities.d.ts.map +1 -0
- package/dist/regex/decode-html-entities.js +73 -0
- package/dist/regex/decode-html-entities.js.map +1 -0
- package/dist/regex/dedupe-links.d.ts +9 -0
- package/dist/regex/dedupe-links.d.ts.map +1 -0
- package/dist/regex/dedupe-links.js +16 -0
- package/dist/regex/dedupe-links.js.map +1 -0
- package/dist/regex/detect-space-tables.d.ts +29 -0
- package/dist/regex/detect-space-tables.d.ts.map +1 -0
- package/dist/regex/detect-space-tables.js +125 -0
- package/dist/regex/detect-space-tables.js.map +1 -0
- package/dist/regex/detect-toc.d.ts +14 -0
- package/dist/regex/detect-toc.d.ts.map +1 -0
- package/dist/regex/detect-toc.js +35 -0
- package/dist/regex/detect-toc.js.map +1 -0
- package/dist/regex/extract-metadata-frontmatter.d.ts +3 -0
- package/dist/regex/extract-metadata-frontmatter.d.ts.map +1 -0
- package/dist/regex/extract-metadata-frontmatter.js +39 -0
- package/dist/regex/extract-metadata-frontmatter.js.map +1 -0
- package/dist/regex/fix-footnote-markers.d.ts +2 -0
- package/dist/regex/fix-footnote-markers.d.ts.map +1 -0
- package/dist/regex/fix-footnote-markers.js +23 -0
- package/dist/regex/fix-footnote-markers.js.map +1 -0
- package/dist/regex/fix-headings.d.ts +12 -0
- package/dist/regex/fix-headings.d.ts.map +1 -0
- package/dist/regex/fix-headings.js +40 -0
- package/dist/regex/fix-headings.js.map +1 -0
- package/dist/regex/fix-ligatures.d.ts +3 -0
- package/dist/regex/fix-ligatures.d.ts.map +1 -0
- package/dist/regex/fix-ligatures.js +16 -0
- package/dist/regex/fix-ligatures.js.map +1 -0
- package/dist/regex/fix-tables.d.ts +13 -0
- package/dist/regex/fix-tables.d.ts.map +1 -0
- package/dist/regex/fix-tables.js +63 -0
- package/dist/regex/fix-tables.js.map +1 -0
- package/dist/regex/html-tables-to-gfm.d.ts +21 -0
- package/dist/regex/html-tables-to-gfm.d.ts.map +1 -0
- package/dist/regex/html-tables-to-gfm.js +76 -0
- package/dist/regex/html-tables-to-gfm.js.map +1 -0
- package/dist/regex/join-broken-lines.d.ts +10 -0
- package/dist/regex/join-broken-lines.d.ts.map +1 -0
- package/dist/regex/join-broken-lines.js +40 -0
- package/dist/regex/join-broken-lines.js.map +1 -0
- package/dist/regex/join-soft-hyphens.d.ts +9 -0
- package/dist/regex/join-soft-hyphens.d.ts.map +1 -0
- package/dist/regex/join-soft-hyphens.js +11 -0
- package/dist/regex/join-soft-hyphens.js.map +1 -0
- package/dist/regex/normalize-horizontal-rules.d.ts +2 -0
- package/dist/regex/normalize-horizontal-rules.d.ts.map +1 -0
- package/dist/regex/normalize-horizontal-rules.js +20 -0
- package/dist/regex/normalize-horizontal-rules.js.map +1 -0
- package/dist/regex/normalize-list-markers.d.ts +2 -0
- package/dist/regex/normalize-list-markers.d.ts.map +1 -0
- package/dist/regex/normalize-list-markers.js +35 -0
- package/dist/regex/normalize-list-markers.js.map +1 -0
- package/dist/regex/normalize-numbered-lists.d.ts +2 -0
- package/dist/regex/normalize-numbered-lists.d.ts.map +1 -0
- package/dist/regex/normalize-numbered-lists.js +9 -0
- package/dist/regex/normalize-numbered-lists.js.map +1 -0
- package/dist/regex/normalize-unicode.d.ts +2 -0
- package/dist/regex/normalize-unicode.d.ts.map +1 -0
- package/dist/regex/normalize-unicode.js +49 -0
- package/dist/regex/normalize-unicode.js.map +1 -0
- package/dist/regex/normalize-whitespace-in-lines.d.ts +2 -0
- package/dist/regex/normalize-whitespace-in-lines.d.ts.map +1 -0
- package/dist/regex/normalize-whitespace-in-lines.js +24 -0
- package/dist/regex/normalize-whitespace-in-lines.js.map +1 -0
- package/dist/regex/strip-boilerplate.d.ts +3 -0
- package/dist/regex/strip-boilerplate.d.ts.map +1 -0
- package/dist/regex/strip-boilerplate.js +16 -0
- package/dist/regex/strip-boilerplate.js.map +1 -0
- package/dist/regex/strip-docx-artifacts.d.ts +19 -0
- package/dist/regex/strip-docx-artifacts.d.ts.map +1 -0
- package/dist/regex/strip-docx-artifacts.js +34 -0
- package/dist/regex/strip-docx-artifacts.js.map +1 -0
- package/dist/regex/strip-empty-headings.d.ts +2 -0
- package/dist/regex/strip-empty-headings.d.ts.map +1 -0
- package/dist/regex/strip-empty-headings.js +6 -0
- package/dist/regex/strip-empty-headings.js.map +1 -0
- package/dist/regex/strip-html-artifacts.d.ts +2 -0
- package/dist/regex/strip-html-artifacts.d.ts.map +1 -0
- package/dist/regex/strip-html-artifacts.js +24 -0
- package/dist/regex/strip-html-artifacts.js.map +1 -0
- package/dist/regex/strip-page-numbers.d.ts +2 -0
- package/dist/regex/strip-page-numbers.d.ts.map +1 -0
- package/dist/regex/strip-page-numbers.js +23 -0
- package/dist/regex/strip-page-numbers.js.map +1 -0
- package/dist/regex/strip-pptx-notes.d.ts +22 -0
- package/dist/regex/strip-pptx-notes.d.ts.map +1 -0
- package/dist/regex/strip-pptx-notes.js +32 -0
- package/dist/regex/strip-pptx-notes.js.map +1 -0
- package/dist/regex/strip-repeated-headers.d.ts +2 -0
- package/dist/regex/strip-repeated-headers.d.ts.map +1 -0
- package/dist/regex/strip-repeated-headers.js +37 -0
- package/dist/regex/strip-repeated-headers.js.map +1 -0
- package/dist/regex/strip-url-tracking-params.d.ts +2 -0
- package/dist/regex/strip-url-tracking-params.d.ts.map +1 -0
- package/dist/regex/strip-url-tracking-params.js +26 -0
- package/dist/regex/strip-url-tracking-params.js.map +1 -0
- package/dist/regex/wrap-long-cell-text.d.ts +28 -0
- package/dist/regex/wrap-long-cell-text.d.ts.map +1 -0
- package/dist/regex/wrap-long-cell-text.js +66 -0
- package/dist/regex/wrap-long-cell-text.js.map +1 -0
- package/dist/util/protect-code.d.ts +6 -0
- package/dist/util/protect-code.d.ts.map +1 -0
- package/dist/util/protect-code.js +20 -0
- package/dist/util/protect-code.js.map +1 -0
- package/package.json +63 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* htmlTablesToGfm — convert HTML `<table>` blocks to GFM pipe tables.
|
|
3
|
+
*
|
|
4
|
+
* MarkItDown emits HTML tables for DOCX/XLSX/PPTX/PDF sources. GFM viewers
|
|
5
|
+
* and downstream Pandoc/Typst templates expect pipe-style tables, so HTML
|
|
6
|
+
* tables silently break the pipeline. Run BEFORE `fixTables` so the rebuilt
|
|
7
|
+
* pipe tables get normalized.
|
|
8
|
+
*
|
|
9
|
+
* Strategy (regex, no DOM):
|
|
10
|
+
* - Find each `<table …>…</table>` block (case-insensitive, multiline).
|
|
11
|
+
* - Extract `<tr>` rows; per row extract `<th>`/`<td>` cells.
|
|
12
|
+
* - First row carrying `<th>` (else first row) → header.
|
|
13
|
+
* - Inside cells: strip inline tags, collapse whitespace, escape pipes,
|
|
14
|
+
* map `<br>` to a space.
|
|
15
|
+
* - rowspan/colspan flattened (GFM has no equivalent). Cells padded to
|
|
16
|
+
* max column count.
|
|
17
|
+
*
|
|
18
|
+
* Idempotent on already-pipe MD (no `<table>` ⇒ no-op).
|
|
19
|
+
*/
|
|
20
|
+
export declare function htmlTablesToGfm(md: string): string;
|
|
21
|
+
//# sourceMappingURL=html-tables-to-gfm.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"html-tables-to-gfm.d.ts","sourceRoot":"","sources":["../../src/regex/html-tables-to-gfm.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AACH,wBAAgB,eAAe,CAAC,EAAE,EAAE,MAAM,GAAG,MAAM,CAwBlD"}
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* htmlTablesToGfm — convert HTML `<table>` blocks to GFM pipe tables.
|
|
3
|
+
*
|
|
4
|
+
* MarkItDown emits HTML tables for DOCX/XLSX/PPTX/PDF sources. GFM viewers
|
|
5
|
+
* and downstream Pandoc/Typst templates expect pipe-style tables, so HTML
|
|
6
|
+
* tables silently break the pipeline. Run BEFORE `fixTables` so the rebuilt
|
|
7
|
+
* pipe tables get normalized.
|
|
8
|
+
*
|
|
9
|
+
* Strategy (regex, no DOM):
|
|
10
|
+
* - Find each `<table …>…</table>` block (case-insensitive, multiline).
|
|
11
|
+
* - Extract `<tr>` rows; per row extract `<th>`/`<td>` cells.
|
|
12
|
+
* - First row carrying `<th>` (else first row) → header.
|
|
13
|
+
* - Inside cells: strip inline tags, collapse whitespace, escape pipes,
|
|
14
|
+
* map `<br>` to a space.
|
|
15
|
+
* - rowspan/colspan flattened (GFM has no equivalent). Cells padded to
|
|
16
|
+
* max column count.
|
|
17
|
+
*
|
|
18
|
+
* Idempotent on already-pipe MD (no `<table>` ⇒ no-op).
|
|
19
|
+
*/
|
|
20
|
+
export function htmlTablesToGfm(md) {
|
|
21
|
+
const TABLE_RE = /<table\b[^>]*>([\s\S]*?)<\/table\s*>/gi;
|
|
22
|
+
return md.replace(TABLE_RE, (_full, inner) => {
|
|
23
|
+
const rows = extractRows(inner);
|
|
24
|
+
if (rows.length === 0)
|
|
25
|
+
return _full;
|
|
26
|
+
const headerIdx = rows.findIndex((r) => r.isHeader);
|
|
27
|
+
const headerRowIdx = headerIdx >= 0 ? headerIdx : 0;
|
|
28
|
+
const header = rows[headerRowIdx].cells;
|
|
29
|
+
const body = rows.filter((_, i) => i !== headerRowIdx).map((r) => r.cells);
|
|
30
|
+
const maxCols = Math.max(header.length, ...body.map((r) => r.length), 1);
|
|
31
|
+
const pad = (r) => {
|
|
32
|
+
const c = [...r];
|
|
33
|
+
while (c.length < maxCols)
|
|
34
|
+
c.push("");
|
|
35
|
+
return c;
|
|
36
|
+
};
|
|
37
|
+
const lines = [];
|
|
38
|
+
lines.push(renderRow(pad(header)));
|
|
39
|
+
lines.push(renderRow(Array(maxCols).fill("---")));
|
|
40
|
+
for (const r of body)
|
|
41
|
+
lines.push(renderRow(pad(r)));
|
|
42
|
+
return "\n" + lines.join("\n") + "\n";
|
|
43
|
+
});
|
|
44
|
+
}
|
|
45
|
+
function extractRows(inner) {
|
|
46
|
+
const out = [];
|
|
47
|
+
const TR_RE = /<tr\b[^>]*>([\s\S]*?)<\/tr\s*>/gi;
|
|
48
|
+
let m;
|
|
49
|
+
while ((m = TR_RE.exec(inner)) !== null) {
|
|
50
|
+
const body = m[1] ?? "";
|
|
51
|
+
const cells = [];
|
|
52
|
+
let isHeader = false;
|
|
53
|
+
const CELL_RE = /<(th|td)\b[^>]*>([\s\S]*?)<\/\1\s*>/gi;
|
|
54
|
+
let c;
|
|
55
|
+
while ((c = CELL_RE.exec(body)) !== null) {
|
|
56
|
+
if ((c[1] ?? "").toLowerCase() === "th")
|
|
57
|
+
isHeader = true;
|
|
58
|
+
cells.push(cleanCell(c[2] ?? ""));
|
|
59
|
+
}
|
|
60
|
+
if (cells.length > 0)
|
|
61
|
+
out.push({ cells, isHeader });
|
|
62
|
+
}
|
|
63
|
+
return out;
|
|
64
|
+
}
|
|
65
|
+
function cleanCell(raw) {
|
|
66
|
+
return raw
|
|
67
|
+
.replace(/<br\s*\/?>/gi, " ")
|
|
68
|
+
.replace(/<\/?[^>]+>/g, "") // strip remaining inline tags
|
|
69
|
+
.replace(/\s+/g, " ")
|
|
70
|
+
.trim()
|
|
71
|
+
.replace(/\|/g, "\\|");
|
|
72
|
+
}
|
|
73
|
+
function renderRow(cells) {
|
|
74
|
+
return "| " + cells.join(" | ") + " |";
|
|
75
|
+
}
|
|
76
|
+
//# sourceMappingURL=html-tables-to-gfm.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"html-tables-to-gfm.js","sourceRoot":"","sources":["../../src/regex/html-tables-to-gfm.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AACH,MAAM,UAAU,eAAe,CAAC,EAAU;IACxC,MAAM,QAAQ,GAAG,wCAAwC,CAAC;IAC1D,OAAO,EAAE,CAAC,OAAO,CAAC,QAAQ,EAAE,CAAC,KAAK,EAAE,KAAa,EAAE,EAAE;QACnD,MAAM,IAAI,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC;QAChC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,KAAe,CAAC;QAE9C,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;QACpD,MAAM,YAAY,GAAG,SAAS,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;QACpD,MAAM,MAAM,GAAG,IAAI,CAAC,YAAY,CAAE,CAAC,KAAK,CAAC;QACzC,MAAM,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,YAAY,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;QAE3E,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,MAAM,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;QACzE,MAAM,GAAG,GAAG,CAAC,CAAW,EAAE,EAAE;YAC1B,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;YACjB,OAAO,CAAC,CAAC,MAAM,GAAG,OAAO;gBAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACtC,OAAO,CAAC,CAAC;QACX,CAAC,CAAC;QAEF,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QACnC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAClD,KAAK,MAAM,CAAC,IAAI,IAAI;YAAE,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACpD,OAAO,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC;IACxC,CAAC,CAAC,CAAC;AACL,CAAC;AAOD,SAAS,WAAW,CAAC,KAAa;IAChC,MAAM,GAAG,GAAU,EAAE,CAAC;IACtB,MAAM,KAAK,GAAG,kCAAkC,CAAC;IACjD,IAAI,CAAyB,CAAC;IAC9B,OAAO,CAAC,CAAC,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACxC,MAAM,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACxB,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,IAAI,QAAQ,GAAG,KAAK,CAAC;QACrB,MAAM,OAAO,GAAG,uCAAuC,CAAC;QACxD,IAAI,CAAyB,CAAC;QAC9B,OAAO,CAAC,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YACzC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,KAAK,IAAI;gBAAE,QAAQ,GAAG,IAAI,CAAC;YACzD,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;QACpC,CAAC;QACD,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;YAAE,GAAG,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC;IACtD,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,SAAS,CAAC,GAAW;IAC5B,OAAO,GAAG;SACP,OAAO,CAAC,cAAc,EAAE,GAAG,CAAC;SAC5B,OAAO,CAAC,aAAa,EAAE,EAAE,CAAC,CAAI,8BAA8B;SAC5D,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,IAAI,EAAE;SACN,OAAO,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;AAC3B,CAAC;AAED,SAAS,SAAS,CAAC,KAAe;IAChC,OAAO,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC;AACzC,CAAC"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* joinBrokenLines — join lines that were broken mid-sentence (PDF column wrap).
|
|
3
|
+
*
|
|
4
|
+
* Rule: if line ends with a lowercase letter or comma AND next line starts
|
|
5
|
+
* with a lowercase letter, treat as single sentence.
|
|
6
|
+
*
|
|
7
|
+
* Conservative: skips lines inside fenced code blocks, lists, tables, headings.
|
|
8
|
+
*/
|
|
9
|
+
export declare function joinBrokenLines(md: string): string;
|
|
10
|
+
//# sourceMappingURL=join-broken-lines.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"join-broken-lines.d.ts","sourceRoot":"","sources":["../../src/regex/join-broken-lines.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AACH,wBAAgB,eAAe,CAAC,EAAE,EAAE,MAAM,GAAG,MAAM,CAuBlD"}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* joinBrokenLines — join lines that were broken mid-sentence (PDF column wrap).
|
|
3
|
+
*
|
|
4
|
+
* Rule: if line ends with a lowercase letter or comma AND next line starts
|
|
5
|
+
* with a lowercase letter, treat as single sentence.
|
|
6
|
+
*
|
|
7
|
+
* Conservative: skips lines inside fenced code blocks, lists, tables, headings.
|
|
8
|
+
*/
|
|
9
|
+
export function joinBrokenLines(md) {
|
|
10
|
+
const lines = md.split("\n");
|
|
11
|
+
const out = [];
|
|
12
|
+
let inCode = false;
|
|
13
|
+
for (let i = 0; i < lines.length; i++) {
|
|
14
|
+
const line = lines[i] ?? "";
|
|
15
|
+
const next = lines[i + 1] ?? "";
|
|
16
|
+
if (/^```/.test(line.trim()))
|
|
17
|
+
inCode = !inCode;
|
|
18
|
+
if (!inCode &&
|
|
19
|
+
out.length > 0 &&
|
|
20
|
+
shouldJoin(out[out.length - 1] ?? "", line)) {
|
|
21
|
+
out[out.length - 1] = (out[out.length - 1] ?? "").replace(/\s+$/, "") + " " + line.trim();
|
|
22
|
+
continue;
|
|
23
|
+
}
|
|
24
|
+
out.push(line);
|
|
25
|
+
void next; // lookahead reserved for future heuristics
|
|
26
|
+
}
|
|
27
|
+
return out.join("\n");
|
|
28
|
+
}
|
|
29
|
+
function shouldJoin(prev, current) {
|
|
30
|
+
if (!prev.trim() || !current.trim())
|
|
31
|
+
return false;
|
|
32
|
+
if (/^[-*+]\s|^\d+\.\s|^#{1,6}\s|^>\s|^\|/.test(current))
|
|
33
|
+
return false; // list/heading/quote/table
|
|
34
|
+
if (/^[-*+]\s|^\d+\.\s|^#{1,6}\s|^>\s|^\|/.test(prev))
|
|
35
|
+
return false;
|
|
36
|
+
if (/[.!?:;]$/.test(prev.trim()))
|
|
37
|
+
return false;
|
|
38
|
+
return /[a-záéíóúñü,]\s*$/.test(prev) && /^[a-záéíóúñü]/.test(current.trim());
|
|
39
|
+
}
|
|
40
|
+
//# sourceMappingURL=join-broken-lines.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"join-broken-lines.js","sourceRoot":"","sources":["../../src/regex/join-broken-lines.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AACH,MAAM,UAAU,eAAe,CAAC,EAAU;IACxC,MAAM,KAAK,GAAG,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC7B,MAAM,GAAG,GAAa,EAAE,CAAC;IACzB,IAAI,MAAM,GAAG,KAAK,CAAC;IAEnB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAC5B,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;QAEhC,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;YAAE,MAAM,GAAG,CAAC,MAAM,CAAC;QAE/C,IACE,CAAC,MAAM;YACP,GAAG,CAAC,MAAM,GAAG,CAAC;YACd,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,IAAI,CAAC,EAC3C,CAAC;YACD,GAAG,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,GAAG,GAAG,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;YAC1F,SAAS;QACX,CAAC;QACD,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACf,KAAK,IAAI,CAAC,CAAC,2CAA2C;IACxD,CAAC;IACD,OAAO,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACxB,CAAC;AAED,SAAS,UAAU,CAAC,IAAY,EAAE,OAAe;IAC/C,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE;QAAE,OAAO,KAAK,CAAC;IAClD,IAAI,sCAAsC,CAAC,IAAI,CAAC,OAAO,CAAC;QAAE,OAAO,KAAK,CAAC,CAAC,2BAA2B;IACnG,IAAI,sCAAsC,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,KAAK,CAAC;IACpE,IAAI,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QAAE,OAAO,KAAK,CAAC;IAC/C,OAAO,mBAAmB,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,eAAe,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;AAChF,CAAC"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* joinSoftHyphens — rejoin words split by line-end hyphens (common in PDF).
|
|
3
|
+
* "exam-\nple" → "example"
|
|
4
|
+
* "Conway-\nMaxwell" → "Conway-Maxwell" (preserves real compound hyphens)
|
|
5
|
+
*
|
|
6
|
+
* Heuristic: only join when next char is lowercase letter.
|
|
7
|
+
*/
|
|
8
|
+
export declare function joinSoftHyphens(md: string): string;
|
|
9
|
+
//# sourceMappingURL=join-soft-hyphens.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"join-soft-hyphens.d.ts","sourceRoot":"","sources":["../../src/regex/join-soft-hyphens.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AACH,wBAAgB,eAAe,CAAC,EAAE,EAAE,MAAM,GAAG,MAAM,CAElD"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* joinSoftHyphens — rejoin words split by line-end hyphens (common in PDF).
|
|
3
|
+
* "exam-\nple" → "example"
|
|
4
|
+
* "Conway-\nMaxwell" → "Conway-Maxwell" (preserves real compound hyphens)
|
|
5
|
+
*
|
|
6
|
+
* Heuristic: only join when next char is lowercase letter.
|
|
7
|
+
*/
|
|
8
|
+
export function joinSoftHyphens(md) {
|
|
9
|
+
return md.replace(/(\w)-\n([a-záéíóúñü])/g, "$1$2");
|
|
10
|
+
}
|
|
11
|
+
//# sourceMappingURL=join-soft-hyphens.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"join-soft-hyphens.js","sourceRoot":"","sources":["../../src/regex/join-soft-hyphens.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AACH,MAAM,UAAU,eAAe,CAAC,EAAU;IACxC,OAAO,EAAE,CAAC,OAAO,CAAC,wBAAwB,EAAE,MAAM,CAAC,CAAC;AACtD,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"normalize-horizontal-rules.d.ts","sourceRoot":"","sources":["../../src/regex/normalize-horizontal-rules.ts"],"names":[],"mappings":"AAKA,wBAAgB,wBAAwB,CAAC,EAAE,EAAE,MAAM,GAAG,MAAM,CAc3D"}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { withProtectedCode } from "../util/protect-code.js";
|
|
2
|
+
const HR_SIMPLE_RE = /^[ \t]*([-=_*~—–])\1{2,}[ \t]*$/;
|
|
3
|
+
const HR_SPACED_RE = /^[ \t]*([-*•—–])(?:[ \t]+\1){2,}[ \t]*$/;
|
|
4
|
+
export function normalizeHorizontalRules(md) {
|
|
5
|
+
return withProtectedCode(md, (s) => {
|
|
6
|
+
const lines = s.split("\n");
|
|
7
|
+
return lines
|
|
8
|
+
.map((line, i) => {
|
|
9
|
+
const prev = lines[i - 1] ?? "";
|
|
10
|
+
const isSetextCandidate = /^[-=]+$/.test(line) && prev.trim().length > 0;
|
|
11
|
+
if (isSetextCandidate)
|
|
12
|
+
return line;
|
|
13
|
+
if (HR_SIMPLE_RE.test(line) || HR_SPACED_RE.test(line))
|
|
14
|
+
return "---";
|
|
15
|
+
return line;
|
|
16
|
+
})
|
|
17
|
+
.join("\n");
|
|
18
|
+
});
|
|
19
|
+
}
|
|
20
|
+
//# sourceMappingURL=normalize-horizontal-rules.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"normalize-horizontal-rules.js","sourceRoot":"","sources":["../../src/regex/normalize-horizontal-rules.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAC;AAE5D,MAAM,YAAY,GAAG,iCAAiC,CAAC;AACvD,MAAM,YAAY,GAAG,yCAAyC,CAAC;AAE/D,MAAM,UAAU,wBAAwB,CAAC,EAAU;IACjD,OAAO,iBAAiB,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE;QACjC,MAAM,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC5B,OAAO,KAAK;aACT,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE;YACf,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;YAChC,MAAM,iBAAiB,GACrB,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC;YACjD,IAAI,iBAAiB;gBAAE,OAAO,IAAI,CAAC;YACnC,IAAI,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC;gBAAE,OAAO,KAAK,CAAC;YACrE,OAAO,IAAI,CAAC;QACd,CAAC,CAAC;aACD,IAAI,CAAC,IAAI,CAAC,CAAC;IAChB,CAAC,CAAC,CAAC;AACL,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"normalize-list-markers.d.ts","sourceRoot":"","sources":["../../src/regex/normalize-list-markers.ts"],"names":[],"mappings":"AAqBA,wBAAgB,oBAAoB,CAAC,EAAE,EAAE,MAAM,GAAG,MAAM,CAgBvD"}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { withProtectedCode } from "../util/protect-code.js";
|
|
2
|
+
const BULLET_LEVEL = {
|
|
3
|
+
"•": 0, "▪": 0, "▶": 0, "►": 0, "◆": 0, "❖": 0, "→": 0, "»": 0,
|
|
4
|
+
"◦": 1, "▫": 1, "▷": 1, "▸": 1, "◇": 1, "○": 1,
|
|
5
|
+
"‣": 2, "·": 2,
|
|
6
|
+
};
|
|
7
|
+
const CHECK_TRUE_CHARS = new Set(["✓", "✔", "☑"]);
|
|
8
|
+
const CHECK_FALSE_CHARS = new Set(["✗", "✘", "☐"]);
|
|
9
|
+
const ALL_BULLET_CHARS = [
|
|
10
|
+
...Object.keys(BULLET_LEVEL),
|
|
11
|
+
...CHECK_TRUE_CHARS,
|
|
12
|
+
...CHECK_FALSE_CHARS,
|
|
13
|
+
].join("");
|
|
14
|
+
const BULLET_LINE_RE = new RegExp(`^([ \\t]*)([${ALL_BULLET_CHARS}])\\s+(.+)$`);
|
|
15
|
+
export function normalizeListMarkers(md) {
|
|
16
|
+
return withProtectedCode(md, (s) => {
|
|
17
|
+
const lines = s.split("\n");
|
|
18
|
+
return lines
|
|
19
|
+
.map((line) => {
|
|
20
|
+
const m = line.match(BULLET_LINE_RE);
|
|
21
|
+
if (!m)
|
|
22
|
+
return line;
|
|
23
|
+
const [, indent, sym, content] = m;
|
|
24
|
+
if (CHECK_TRUE_CHARS.has(sym))
|
|
25
|
+
return `${indent}- [x] ${content}`;
|
|
26
|
+
if (CHECK_FALSE_CHARS.has(sym))
|
|
27
|
+
return `${indent}- [ ] ${content}`;
|
|
28
|
+
const level = BULLET_LEVEL[sym] ?? 0;
|
|
29
|
+
const extra = " ".repeat(level);
|
|
30
|
+
return `${indent}${extra}- ${content}`;
|
|
31
|
+
})
|
|
32
|
+
.join("\n");
|
|
33
|
+
});
|
|
34
|
+
}
|
|
35
|
+
//# sourceMappingURL=normalize-list-markers.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"normalize-list-markers.js","sourceRoot":"","sources":["../../src/regex/normalize-list-markers.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAC;AAE5D,MAAM,YAAY,GAA2B;IAC3C,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC;IAC9D,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC;IAC9C,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC;CACf,CAAC;AAEF,MAAM,gBAAgB,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;AAClD,MAAM,iBAAiB,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;AAEnD,MAAM,gBAAgB,GAAG;IACvB,GAAG,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC;IAC5B,GAAG,gBAAgB;IACnB,GAAG,iBAAiB;CACrB,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;AAEX,MAAM,cAAc,GAAG,IAAI,MAAM,CAC/B,eAAe,gBAAgB,aAAa,CAC7C,CAAC;AAEF,MAAM,UAAU,oBAAoB,CAAC,EAAU;IAC7C,OAAO,iBAAiB,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE;QACjC,MAAM,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC5B,OAAO,KAAK;aACT,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;YACZ,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;YACrC,IAAI,CAAC,CAAC;gBAAE,OAAO,IAAI,CAAC;YACpB,MAAM,CAAC,EAAE,MAAM,EAAE,GAAG,EAAE,OAAO,CAAC,GAAG,CAAqC,CAAC;YACvE,IAAI,gBAAgB,CAAC,GAAG,CAAC,GAAG,CAAC;gBAAE,OAAO,GAAG,MAAM,SAAS,OAAO,EAAE,CAAC;YAClE,IAAI,iBAAiB,CAAC,GAAG,CAAC,GAAG,CAAC;gBAAE,OAAO,GAAG,MAAM,SAAS,OAAO,EAAE,CAAC;YACnE,MAAM,KAAK,GAAG,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;YACrC,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YACjC,OAAO,GAAG,MAAM,GAAG,KAAK,KAAK,OAAO,EAAE,CAAC;QACzC,CAAC,CAAC;aACD,IAAI,CAAC,IAAI,CAAC,CAAC;IAChB,CAAC,CAAC,CAAC;AACL,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"normalize-numbered-lists.d.ts","sourceRoot":"","sources":["../../src/regex/normalize-numbered-lists.ts"],"names":[],"mappings":"AAEA,wBAAgB,sBAAsB,CAAC,EAAE,EAAE,MAAM,GAAG,MAAM,CAQzD"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { withProtectedCode } from "../util/protect-code.js";
|
|
2
|
+
export function normalizeNumberedLists(md) {
|
|
3
|
+
return withProtectedCode(md, (s) => s
|
|
4
|
+
.replace(/^([ \t]*)(\d+)\)(?=[ \t])/gm, "$1$2.")
|
|
5
|
+
.replace(/^([ \t]*)\((\d+)\)(?=[ \t])/gm, "$1$2.")
|
|
6
|
+
.replace(/^([ \t]*)([a-z])\)(?=[ \t])/gm, "$1$2.")
|
|
7
|
+
.replace(/^([ \t]*)\(([a-z])\)(?=[ \t])/gm, "$1$2."));
|
|
8
|
+
}
|
|
9
|
+
//# sourceMappingURL=normalize-numbered-lists.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"normalize-numbered-lists.js","sourceRoot":"","sources":["../../src/regex/normalize-numbered-lists.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAC;AAE5D,MAAM,UAAU,sBAAsB,CAAC,EAAU;IAC/C,OAAO,iBAAiB,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE,CACjC,CAAC;SACE,OAAO,CAAC,6BAA6B,EAAE,OAAO,CAAC;SAC/C,OAAO,CAAC,+BAA+B,EAAE,OAAO,CAAC;SACjD,OAAO,CAAC,+BAA+B,EAAE,OAAO,CAAC;SACjD,OAAO,CAAC,iCAAiC,EAAE,OAAO,CAAC,CACvD,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"normalize-unicode.d.ts","sourceRoot":"","sources":["../../src/regex/normalize-unicode.ts"],"names":[],"mappings":"AAmCA,wBAAgB,gBAAgB,CAAC,EAAE,EAAE,MAAM,GAAG,MAAM,CAkBnD"}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* normalizeUnicode — fix misplaced PDF accents + NFC + smart-quote/dash mapping.
|
|
3
|
+
* Idempotent. Run first.
|
|
4
|
+
*
|
|
5
|
+
* PDF text extraction (pdfminer/pdfplumber) emits the accent BEFORE the vowel
|
|
6
|
+
* it belongs to, as either a combining mark ("est"+U+0301+"a") or, more often,
|
|
7
|
+
* a standalone non-ASCII spacing diacritic ("duraci´on", "electr´onicos").
|
|
8
|
+
* Neither composes under plain NFC. We move any such accent forward onto the
|
|
9
|
+
* next letter — but only when that letter has a precomposed accented form, so
|
|
10
|
+
* an accent is never forced onto a consonant — then NFC composes it.
|
|
11
|
+
*
|
|
12
|
+
* Only NON-ASCII spacing diacritics are handled; ASCII look-alikes (^ ~ ` ' ")
|
|
13
|
+
* are Markdown/code syntax and are deliberately left untouched.
|
|
14
|
+
*/
|
|
15
|
+
import { withProtectedCode } from "../util/protect-code.js";
|
|
16
|
+
// Non-ASCII spacing diacritic → its combining counterpart.
|
|
17
|
+
const SPACING_TO_COMBINING = {
|
|
18
|
+
"´": "́", // ´ acute → á é í ó ú
|
|
19
|
+
"ˊ": "́", // ˊ modifier acute
|
|
20
|
+
"¨": "̈", // ¨ diaeresis → ä ë ï ö ü
|
|
21
|
+
"˜": "̃", // ˜ small tilde → ã ñ õ
|
|
22
|
+
"ˆ": "̂", // ˆ modifier circumflex → â ê î ô û
|
|
23
|
+
"¸": "̧", // ¸ cedilla → ç
|
|
24
|
+
"ˋ": "̀", // ˋ modifier grave → à è ì ò ù
|
|
25
|
+
};
|
|
26
|
+
const SPACING_CHARS = Object.keys(SPACING_TO_COMBINING).join("");
|
|
27
|
+
// an accent = a combining mark OR one of the spacing diacritics above
|
|
28
|
+
const ACCENT_BEFORE_LETTER = new RegExp(`([̀-ͯ${SPACING_CHARS}])[ \\u00a0]?(\\p{L})`, "gu");
|
|
29
|
+
function composesToOne(base, mark) {
|
|
30
|
+
return (base + mark).normalize("NFC").length === 1;
|
|
31
|
+
}
|
|
32
|
+
export function normalizeUnicode(md) {
|
|
33
|
+
return withProtectedCode(md, (s) => {
|
|
34
|
+
let out = s.replace(ACCENT_BEFORE_LETTER, (full, acc, letter) => {
|
|
35
|
+
const mark = SPACING_TO_COMBINING[acc] ?? acc; // spacing → combining; combining stays
|
|
36
|
+
return composesToOne(letter, mark) ? letter + mark : full;
|
|
37
|
+
});
|
|
38
|
+
out = out.normalize("NFC");
|
|
39
|
+
out = out
|
|
40
|
+
.replace(/[‘’‚‛]/g, "'")
|
|
41
|
+
.replace(/[“”„‟]/g, '"')
|
|
42
|
+
.replace(/[–—]/g, "—")
|
|
43
|
+
.replace(//g, "") // soft hyphen
|
|
44
|
+
.replace(/ /g, " ") // nbsp → space
|
|
45
|
+
.replace(//g, ""); // zero-width space
|
|
46
|
+
return out;
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
//# sourceMappingURL=normalize-unicode.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"normalize-unicode.js","sourceRoot":"","sources":["../../src/regex/normalize-unicode.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AACH,OAAO,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAC;AAE5D,2DAA2D;AAC3D,MAAM,oBAAoB,GAAqC;IAC7D,GAAG,EAAE,GAAG,EAAE,iCAAiC;IAC3C,GAAG,EAAE,GAAG,EAAE,mBAAmB;IAC7B,GAAG,EAAE,GAAG,EAAE,iCAAiC;IAC3C,GAAG,EAAE,GAAG,EAAE,6BAA6B;IACvC,GAAG,EAAE,GAAG,EAAE,oCAAoC;IAC9C,GAAG,EAAE,GAAG,EAAE,yBAAyB;IACnC,GAAG,EAAE,GAAG,EAAE,iCAAiC;CAC5C,CAAC;AAEF,MAAM,aAAa,GAAG,MAAM,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;AACjE,sEAAsE;AACtE,MAAM,oBAAoB,GAAG,IAAI,MAAM,CAAC,QAAQ,aAAa,uBAAuB,EAAE,IAAI,CAAC,CAAC;AAE5F,SAAS,aAAa,CAAC,IAAY,EAAE,IAAY;IAC/C,OAAO,CAAC,IAAI,GAAG,IAAI,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC;AACrD,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,EAAU;IACzC,OAAO,iBAAiB,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE;QACjC,IAAI,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,oBAAoB,EAAE,CAAC,IAAI,EAAE,GAAW,EAAE,MAAc,EAAE,EAAE;YAC9E,MAAM,IAAI,GAAG,oBAAoB,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,CAAC,uCAAuC;YACtF,OAAO,aAAa,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC;QAC5D,CAAC,CAAC,CAAC;QAEH,GAAG,GAAG,GAAG,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;QAE3B,GAAG,GAAG,GAAG;aACN,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;aACvB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;aACvB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;aACrB,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,cAAc;aAChC,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC,eAAe;aAClC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC,mBAAmB;QACzC,OAAO,GAAG,CAAC;IACb,CAAC,CAAC,CAAC;AACL,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"normalize-whitespace-in-lines.d.ts","sourceRoot":"","sources":["../../src/regex/normalize-whitespace-in-lines.ts"],"names":[],"mappings":"AAEA,wBAAgB,0BAA0B,CAAC,EAAE,EAAE,MAAM,GAAG,MAAM,CAmB7D"}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import { withProtectedCode } from "../util/protect-code.js";
|
|
2
|
+
export function normalizeWhitespaceInLines(md) {
|
|
3
|
+
return withProtectedCode(md, (s) => {
|
|
4
|
+
const lines = s.split("\n");
|
|
5
|
+
return lines
|
|
6
|
+
.map((line) => {
|
|
7
|
+
// Whitespace-only line → empty
|
|
8
|
+
if (/^\s+$/.test(line))
|
|
9
|
+
return "";
|
|
10
|
+
// Preserve double-space hard line break at end (exactly 2 trailing spaces)
|
|
11
|
+
if (/[^ \t] $/.test(line))
|
|
12
|
+
return line;
|
|
13
|
+
// Strip trailing whitespace
|
|
14
|
+
line = line.trimEnd();
|
|
15
|
+
// Collapse multiple internal spaces (skip code-indented lines)
|
|
16
|
+
if (!/^(?: |\t)/.test(line)) {
|
|
17
|
+
line = line.replace(/([^\s])[ \t]{2,}([^\s])/g, "$1 $2");
|
|
18
|
+
}
|
|
19
|
+
return line;
|
|
20
|
+
})
|
|
21
|
+
.join("\n");
|
|
22
|
+
});
|
|
23
|
+
}
|
|
24
|
+
//# sourceMappingURL=normalize-whitespace-in-lines.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"normalize-whitespace-in-lines.js","sourceRoot":"","sources":["../../src/regex/normalize-whitespace-in-lines.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAC;AAE5D,MAAM,UAAU,0BAA0B,CAAC,EAAU;IACnD,OAAO,iBAAiB,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE;QACjC,MAAM,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC5B,OAAO,KAAK;aACT,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;YACZ,+BAA+B;YAC/B,IAAI,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC;gBAAE,OAAO,EAAE,CAAC;YAClC,2EAA2E;YAC3E,IAAI,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC;gBAAE,OAAO,IAAI,CAAC;YACxC,4BAA4B;YAC5B,IAAI,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC;YACtB,+DAA+D;YAC/D,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC/B,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,0BAA0B,EAAE,OAAO,CAAC,CAAC;YAC3D,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC,CAAC;aACD,IAAI,CAAC,IAAI,CAAC,CAAC;IAChB,CAAC,CAAC,CAAC;AACL,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"strip-boilerplate.d.ts","sourceRoot":"","sources":["../../src/regex/strip-boilerplate.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAQhD,wBAAgB,gBAAgB,CAAC,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,YAAY,GAAG,MAAM,CAQxE"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
const BOILERPLATE_PATTERNS = [
|
|
2
|
+
/^[^\n]*(?:©|©|\bCopyright\b)[^\n]*\d{4}[^\n]*$/gim,
|
|
3
|
+
/^\s*All rights reserved\.?\s*$/gim,
|
|
4
|
+
/^\s*(?:CONFIDENTIAL|PROPRIETARY|FOR INTERNAL USE ONLY|DRAFT)\s*\.?\s*$/gim,
|
|
5
|
+
];
|
|
6
|
+
export function stripBoilerplate(md, opts) {
|
|
7
|
+
if (opts?.keepBoilerplate)
|
|
8
|
+
return md;
|
|
9
|
+
let out = md;
|
|
10
|
+
for (const re of BOILERPLATE_PATTERNS) {
|
|
11
|
+
re.lastIndex = 0;
|
|
12
|
+
out = out.replace(re, "");
|
|
13
|
+
}
|
|
14
|
+
return out;
|
|
15
|
+
}
|
|
16
|
+
//# sourceMappingURL=strip-boilerplate.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"strip-boilerplate.js","sourceRoot":"","sources":["../../src/regex/strip-boilerplate.ts"],"names":[],"mappings":"AAEA,MAAM,oBAAoB,GAAa;IACrC,wDAAwD;IACxD,mCAAmC;IACnC,2EAA2E;CAC5E,CAAC;AAEF,MAAM,UAAU,gBAAgB,CAAC,EAAU,EAAE,IAAmB;IAC9D,IAAI,IAAI,EAAE,eAAe;QAAE,OAAO,EAAE,CAAC;IACrC,IAAI,GAAG,GAAG,EAAE,CAAC;IACb,KAAK,MAAM,EAAE,IAAI,oBAAoB,EAAE,CAAC;QACtC,EAAE,CAAC,SAAS,GAAG,CAAC,CAAC;QACjB,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC;IAC5B,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC"}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* stripDocxArtifacts — remove pandoc DOCX-conversion leftovers.
|
|
3
|
+
*
|
|
4
|
+
* When pandoc converts .docx → markdown it emits span attributes for
|
|
5
|
+
* formatting that has no GFM equivalent: `[text]{.underline}`,
|
|
6
|
+
* `[text]{.smallcaps}`, `[text]{.mark}`, `[text]{.highlight}`.
|
|
7
|
+
* These pass through remark unchanged and end up in final output as
|
|
8
|
+
* literal punctuation noise.
|
|
9
|
+
*
|
|
10
|
+
* Also handles:
|
|
11
|
+
* - `[text]{.strikethrough}` → `~~text~~` (GFM has this)
|
|
12
|
+
* - ` ` → regular space
|
|
13
|
+
* - `\\ ` (pandoc hard line-break) → single space at end-of-line
|
|
14
|
+
* - `<!-- {.XXX} -->` comment-style span leftovers
|
|
15
|
+
*
|
|
16
|
+
* Safe on non-docx input — zero matches on plain markdown.
|
|
17
|
+
*/
|
|
18
|
+
export declare function stripDocxArtifacts(md: string): string;
|
|
19
|
+
//# sourceMappingURL=strip-docx-artifacts.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"strip-docx-artifacts.d.ts","sourceRoot":"","sources":["../../src/regex/strip-docx-artifacts.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAgB,kBAAkB,CAAC,EAAE,EAAE,MAAM,GAAG,MAAM,CAqBrD"}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* stripDocxArtifacts — remove pandoc DOCX-conversion leftovers.
|
|
3
|
+
*
|
|
4
|
+
* When pandoc converts .docx → markdown it emits span attributes for
|
|
5
|
+
* formatting that has no GFM equivalent: `[text]{.underline}`,
|
|
6
|
+
* `[text]{.smallcaps}`, `[text]{.mark}`, `[text]{.highlight}`.
|
|
7
|
+
* These pass through remark unchanged and end up in final output as
|
|
8
|
+
* literal punctuation noise.
|
|
9
|
+
*
|
|
10
|
+
* Also handles:
|
|
11
|
+
* - `[text]{.strikethrough}` → `~~text~~` (GFM has this)
|
|
12
|
+
* - ` ` → regular space
|
|
13
|
+
* - `\\ ` (pandoc hard line-break) → single space at end-of-line
|
|
14
|
+
* - `<!-- {.XXX} -->` comment-style span leftovers
|
|
15
|
+
*
|
|
16
|
+
* Safe on non-docx input — zero matches on plain markdown.
|
|
17
|
+
*/
|
|
18
|
+
export function stripDocxArtifacts(md) {
|
|
19
|
+
let out = md;
|
|
20
|
+
// [text]{.strikethrough} → ~~text~~ (semantic preservation)
|
|
21
|
+
out = out.replace(/\[([^\]]+)\]\{\.strikethrough\}/g, "~~$1~~");
|
|
22
|
+
// [text]{.class} spans with no GFM equivalent → bare text
|
|
23
|
+
out = out.replace(/\[([^\]]+)\]\{\.(?:underline|smallcaps|mark|highlight|subscript|superscript)\}/g, "$1");
|
|
24
|
+
// Generic span with only ignored attribute(s): [text]{.anything}
|
|
25
|
+
// More conservative: only strip if the entire attribute block has no
|
|
26
|
+
// semantic meaning we want to keep (no # id, no key=val other than class).
|
|
27
|
+
out = out.replace(/\[([^\]]+)\]\{(?:\.[a-z][a-z0-9-]*\s*)+\}/g, "$1");
|
|
28
|
+
// Pandoc hard line-break: trailing "\ " before newline
|
|
29
|
+
out = out.replace(/\\ $/gm, "");
|
|
30
|
+
// <!-- {.someclass} --> inline comments from div-fence conversion
|
|
31
|
+
out = out.replace(/<!--\s*\{[^}]+\}\s*-->/g, "");
|
|
32
|
+
return out;
|
|
33
|
+
}
|
|
34
|
+
//# sourceMappingURL=strip-docx-artifacts.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"strip-docx-artifacts.js","sourceRoot":"","sources":["../../src/regex/strip-docx-artifacts.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AACH,MAAM,UAAU,kBAAkB,CAAC,EAAU;IAC3C,IAAI,GAAG,GAAG,EAAE,CAAC;IAEb,4DAA4D;IAC5D,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,kCAAkC,EAAE,QAAQ,CAAC,CAAC;IAEhE,0DAA0D;IAC1D,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,iFAAiF,EAAE,IAAI,CAAC,CAAC;IAE3G,iEAAiE;IACjE,qEAAqE;IACrE,2EAA2E;IAC3E,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,4CAA4C,EAAE,IAAI,CAAC,CAAC;IAEtE,uDAAuD;IACvD,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;IAEhC,kEAAkE;IAClE,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,yBAAyB,EAAE,EAAE,CAAC,CAAC;IAEjD,OAAO,GAAG,CAAC;AACb,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"strip-empty-headings.d.ts","sourceRoot":"","sources":["../../src/regex/strip-empty-headings.ts"],"names":[],"mappings":"AAIA,wBAAgB,kBAAkB,CAAC,EAAE,EAAE,MAAM,GAAG,MAAM,CAErD"}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
const EMPTY_RE = /^#{1,6}[ \t]*$/gm;
|
|
2
|
+
const PUNCT_ONLY_RE = /^(#{1,6})[ \t]+(\d+\.[\d.]*|[ivxlcdmIVXLCDM]+\.|[-–—.:;])[ \t]*$/gm;
|
|
3
|
+
export function stripEmptyHeadings(md) {
|
|
4
|
+
return md.replace(EMPTY_RE, "").replace(PUNCT_ONLY_RE, "");
|
|
5
|
+
}
|
|
6
|
+
//# sourceMappingURL=strip-empty-headings.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"strip-empty-headings.js","sourceRoot":"","sources":["../../src/regex/strip-empty-headings.ts"],"names":[],"mappings":"AAAA,MAAM,QAAQ,GAAG,kBAAkB,CAAC;AACpC,MAAM,aAAa,GACjB,oEAAoE,CAAC;AAEvE,MAAM,UAAU,kBAAkB,CAAC,EAAU;IAC3C,OAAO,EAAE,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,aAAa,EAAE,EAAE,CAAC,CAAC;AAC7D,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"strip-html-artifacts.d.ts","sourceRoot":"","sources":["../../src/regex/strip-html-artifacts.ts"],"names":[],"mappings":"AAKA,wBAAgB,kBAAkB,CAAC,EAAE,EAAE,MAAM,GAAG,MAAM,CAuBrD"}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import { withProtectedCode } from "../util/protect-code.js";
|
|
2
|
+
const SAFE_TAGS = /^\/?(p|section|article|header|footer|main|figure|figcaption|caption|mark|abbr|cite|q|ins|u)$/i;
|
|
3
|
+
export function stripHtmlArtifacts(md) {
|
|
4
|
+
return withProtectedCode(md, (s) => {
|
|
5
|
+
let out = s;
|
|
6
|
+
out = out.replace(/^[ \t]*<br\s*\/?>[ \t]*$/gim, "");
|
|
7
|
+
out = out.replace(/<br\s*\/?>/gi, " ");
|
|
8
|
+
out = out.replace(/^[ \t]*<hr\s*\/?>[ \t]*$/gim, "\n---\n");
|
|
9
|
+
out = out.replace(/<strong\b[^>]*>([\s\S]*?)<\/strong>/gi, "**$1**");
|
|
10
|
+
out = out.replace(/<b\b[^>]*>([\s\S]*?)<\/b>/gi, "**$1**");
|
|
11
|
+
out = out.replace(/<em\b[^>]*>([\s\S]*?)<\/em>/gi, "_$1_");
|
|
12
|
+
out = out.replace(/<i\b[^>]*>([\s\S]*?)<\/i>/gi, "_$1_");
|
|
13
|
+
out = out.replace(/<s\b[^>]*>([\s\S]*?)<\/s>/gi, "~~$1~~");
|
|
14
|
+
out = out.replace(/<del\b[^>]*>([\s\S]*?)<\/del>/gi, "~~$1~~");
|
|
15
|
+
out = out.replace(/<code\b[^>]*>([\s\S]*?)<\/code>/gi, "`$1`");
|
|
16
|
+
out = out.replace(/<sup\b[^>]*>([\s\S]*?)<\/sup>/gi, "$1");
|
|
17
|
+
out = out.replace(/<sub\b[^>]*>([\s\S]*?)<\/sub>/gi, "$1");
|
|
18
|
+
out = out.replace(/<div\b[^>]*>([\s\S]*?)<\/div>/gi, "$1");
|
|
19
|
+
out = out.replace(/<span\b[^>]*>([\s\S]*?)<\/span>/gi, "$1");
|
|
20
|
+
out = out.replace(/<(\/?)([a-zA-Z][a-zA-Z0-9]*)\b[^>]*>/g, (full, slash, tag) => (SAFE_TAGS.test(`${slash}${tag}`) ? "" : full));
|
|
21
|
+
return out;
|
|
22
|
+
});
|
|
23
|
+
}
|
|
24
|
+
//# sourceMappingURL=strip-html-artifacts.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"strip-html-artifacts.js","sourceRoot":"","sources":["../../src/regex/strip-html-artifacts.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAC;AAE5D,MAAM,SAAS,GACb,+FAA+F,CAAC;AAElG,MAAM,UAAU,kBAAkB,CAAC,EAAU;IAC3C,OAAO,iBAAiB,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE;QACjC,IAAI,GAAG,GAAG,CAAC,CAAC;QACZ,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,6BAA6B,EAAE,EAAE,CAAC,CAAC;QACrD,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,cAAc,EAAE,GAAG,CAAC,CAAC;QACvC,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,6BAA6B,EAAE,SAAS,CAAC,CAAC;QAC5D,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,uCAAuC,EAAE,QAAQ,CAAC,CAAC;QACrE,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,6BAA6B,EAAE,QAAQ,CAAC,CAAC;QAC3D,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,+BAA+B,EAAE,MAAM,CAAC,CAAC;QAC3D,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,6BAA6B,EAAE,MAAM,CAAC,CAAC;QACzD,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,6BAA6B,EAAE,QAAQ,CAAC,CAAC;QAC3D,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,iCAAiC,EAAE,QAAQ,CAAC,CAAC;QAC/D,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,mCAAmC,EAAE,MAAM,CAAC,CAAC;QAC/D,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,iCAAiC,EAAE,IAAI,CAAC,CAAC;QAC3D,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,iCAAiC,EAAE,IAAI,CAAC,CAAC;QAC3D,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,iCAAiC,EAAE,IAAI,CAAC,CAAC;QAC3D,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,mCAAmC,EAAE,IAAI,CAAC,CAAC;QAC7D,GAAG,GAAG,GAAG,CAAC,OAAO,CACf,uCAAuC,EACvC,CAAC,IAAI,EAAE,KAAK,EAAE,GAAG,EAAE,EAAE,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,GAAG,KAAK,GAAG,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CACrE,CAAC;QACF,OAAO,GAAG,CAAC;IACb,CAAC,CAAC,CAAC;AACL,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"strip-page-numbers.d.ts","sourceRoot":"","sources":["../../src/regex/strip-page-numbers.ts"],"names":[],"mappings":"AAiBA,wBAAgB,gBAAgB,CAAC,EAAE,EAAE,MAAM,GAAG,MAAM,CAInD"}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* stripPageNumbers — remove lines that are *only* a page marker.
|
|
3
|
+
*
|
|
4
|
+
* Patterns covered:
|
|
5
|
+
* "12"
|
|
6
|
+
* "Page 12"
|
|
7
|
+
* "Page 12 of 340"
|
|
8
|
+
* "- 12 -"
|
|
9
|
+
* "[12]"
|
|
10
|
+
*/
|
|
11
|
+
const PATTERNS = [
|
|
12
|
+
/^\s*\d{1,4}\s*$/gm,
|
|
13
|
+
/^\s*Page\s+\d+(\s+of\s+\d+)?\s*$/gim,
|
|
14
|
+
/^\s*-\s*\d{1,4}\s*-\s*$/gm,
|
|
15
|
+
/^\s*\[\s*\d{1,4}\s*\]\s*$/gm,
|
|
16
|
+
];
|
|
17
|
+
export function stripPageNumbers(md) {
|
|
18
|
+
let out = md;
|
|
19
|
+
for (const p of PATTERNS)
|
|
20
|
+
out = out.replace(p, "");
|
|
21
|
+
return out;
|
|
22
|
+
}
|
|
23
|
+
//# sourceMappingURL=strip-page-numbers.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"strip-page-numbers.js","sourceRoot":"","sources":["../../src/regex/strip-page-numbers.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AACH,MAAM,QAAQ,GAAG;IACf,mBAAmB;IACnB,qCAAqC;IACrC,2BAA2B;IAC3B,6BAA6B;CAC9B,CAAC;AAEF,MAAM,UAAU,gBAAgB,CAAC,EAAU;IACzC,IAAI,GAAG,GAAG,EAAE,CAAC;IACb,KAAK,MAAM,CAAC,IAAI,QAAQ;QAAE,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IACnD,OAAO,GAAG,CAAC;AACb,CAAC"}
|