euparliamentmonitor 0.9.21 → 0.9.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +6 -2
- package/scripts/aggregator/article-metadata.js +69 -14
- package/scripts/aggregator/editorial-brief-resolver.js +23 -0
- package/scripts/aggregator/html/headline.d.ts +41 -9
- package/scripts/aggregator/html/headline.js +69 -10
- package/scripts/aggregator/html/shell.js +73 -17
- package/scripts/aggregator/manifest/index.d.ts +1 -1
- package/scripts/aggregator/manifest/index.js +1 -1
- package/scripts/aggregator/manifest/resolver.d.ts +28 -1
- package/scripts/aggregator/manifest/resolver.js +61 -5
- package/scripts/aggregator/markdown-renderer.js +11 -0
- package/scripts/aggregator/metadata/artifact-category-heading.d.ts +81 -0
- package/scripts/aggregator/metadata/artifact-category-heading.js +353 -0
- package/scripts/aggregator/metadata/artifact-walker.js +29 -10
- package/scripts/aggregator/metadata/brief-body.d.ts +12 -0
- package/scripts/aggregator/metadata/brief-body.js +69 -0
- package/scripts/aggregator/metadata/briefing-highlight.d.ts +47 -0
- package/scripts/aggregator/metadata/briefing-highlight.js +469 -0
- package/scripts/aggregator/metadata/editorial-highlight.d.ts +18 -0
- package/scripts/aggregator/metadata/editorial-highlight.js +40 -1
- package/scripts/aggregator/metadata/heading-rules.d.ts +2 -81
- package/scripts/aggregator/metadata/heading-rules.js +78 -269
- package/scripts/aggregator/metadata/keyword-filters.d.ts +60 -0
- package/scripts/aggregator/metadata/keyword-filters.js +156 -0
- package/scripts/aggregator/metadata/lede-extractor.js +11 -2
- package/scripts/aggregator/metadata/priority-finding-cleaning.d.ts +22 -0
- package/scripts/aggregator/metadata/priority-finding-cleaning.js +181 -0
- package/scripts/aggregator/metadata/priority-finding-highlight.js +75 -159
- package/scripts/aggregator/metadata/resolve-helpers.d.ts +34 -0
- package/scripts/aggregator/metadata/resolve-helpers.js +202 -15
- package/scripts/aggregator/metadata/seo-budgets.d.ts +140 -0
- package/scripts/aggregator/metadata/seo-budgets.js +202 -0
- package/scripts/aggregator/metadata/text-truncate.d.ts +75 -0
- package/scripts/aggregator/metadata/text-truncate.js +277 -0
- package/scripts/aggregator/metadata/text-utils-constants.d.ts +96 -0
- package/scripts/aggregator/metadata/text-utils-constants.js +209 -0
- package/scripts/aggregator/metadata/text-utils.d.ts +32 -143
- package/scripts/aggregator/metadata/text-utils.js +119 -439
- package/scripts/aggregator/metadata/title-rejection.d.ts +37 -0
- package/scripts/aggregator/metadata/title-rejection.js +179 -0
- package/scripts/copy-vendor.js +84 -112
- package/scripts/dump-article-seo.js +640 -0
- package/scripts/fix-mermaid-diagrams.js +931 -0
- package/scripts/generators/news-indexes/backfill.d.ts +6 -1
- package/scripts/generators/news-indexes/backfill.js +71 -4
- package/scripts/validate-article-seo.js +534 -0
- package/scripts/validate-mermaid-diagrams.js +306 -0
|
@@ -0,0 +1,931 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2024-2026 Hack23 AB
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
/**
|
|
4
|
+
* @module FixMermaidDiagrams
|
|
5
|
+
* @description
|
|
6
|
+
* Idempotent auto-fixer for the mechanical mermaid failure modes
|
|
7
|
+
* surfaced by `validate-mermaid-diagrams.js`. Only edits blocks that
|
|
8
|
+
* currently fail to parse — passing blocks are left untouched. Every
|
|
9
|
+
* candidate fix is re-parsed against the real mermaid v11 parser before
|
|
10
|
+
* it is committed; if it does not validate, the original body is kept
|
|
11
|
+
* and the block is reported as `stillBroken`.
|
|
12
|
+
*
|
|
13
|
+
* Fix recipes (each scoped to the detected diagram type):
|
|
14
|
+
*
|
|
15
|
+
* - quadrantChart → apply `sanitizeMermaidQuadrantChart`
|
|
16
|
+
* (auto-quotes labels with colons / special
|
|
17
|
+
* chars); clamp `[x.0, y.0]` coordinates to
|
|
18
|
+
* `[0, 1]` integer or `0.99` decimal (the v11
|
|
19
|
+
* parser rejects values >= 1.0 written in
|
|
20
|
+
* decimal form).
|
|
21
|
+
* - xyChart-beta → rename keyword to `xychart-beta` (lower-case)
|
|
22
|
+
* - xychart-beta → quote `title …` lines containing special chars;
|
|
23
|
+
* quote each element of `x-axis [...]`; strip
|
|
24
|
+
* leading non-numeric label tokens from
|
|
25
|
+
* `line [Series, v1, v2, …]` and
|
|
26
|
+
* `bar [Series, v1, v2, …]` arrays.
|
|
27
|
+
* - bar → rename to `xychart-beta`; convert simple
|
|
28
|
+
* `"Label": N` rows to a single shared
|
|
29
|
+
* `x-axis [...]` + `bar [...]`, and convert
|
|
30
|
+
* `"Label": [v1, v2, …]` array-format rows
|
|
31
|
+
* into per-row `bar [v1, v2, …]` lines.
|
|
32
|
+
* - radar → rename to `radar-beta`; convert multi-word
|
|
33
|
+
* `axis A, B, C, …` to quoted-ID form
|
|
34
|
+
* `axis a1["A"], a2["B"], …`; convert each
|
|
35
|
+
* `"Label" : v1, v2, …` row to
|
|
36
|
+
* `curve cN["Label"]{v1, v2, …}`.
|
|
37
|
+
* - timeline → replace `—`/`–` em/en-dashes with `-` in
|
|
38
|
+
* `title …` lines; strip parens AND colons
|
|
39
|
+
* from `section …` lines (the parser uses `:`
|
|
40
|
+
* as the event separator).
|
|
41
|
+
* - sankey-beta → convert both verbose forms
|
|
42
|
+
* (`A [n] TO B [m]` and `A -> B : N`) to CSV
|
|
43
|
+
* (`A,B,N`).
|
|
44
|
+
* - graph / flowchart → replace `<--` with `-->`; rewrite inner `"`
|
|
45
|
+
* inside `["…"]` labels to `"` (handles
|
|
46
|
+
* RTL labels with embedded English quotes);
|
|
47
|
+
* wrap node labels in `["…"]` brackets that
|
|
48
|
+
* contain `{`, `}`, `(`, `)`, `:`, or `;` in
|
|
49
|
+
* double quotes (Mermaid rejects those tokens
|
|
50
|
+
* when the label is unquoted).
|
|
51
|
+
*
|
|
52
|
+
* Run with `--write` to persist; default is dry-run.
|
|
53
|
+
*
|
|
54
|
+
* node scripts/fix-mermaid-diagrams.js # dry-run, default roots
|
|
55
|
+
* node scripts/fix-mermaid-diagrams.js analysis foo # custom roots
|
|
56
|
+
* node scripts/fix-mermaid-diagrams.js --write # persist changes
|
|
57
|
+
* node scripts/fix-mermaid-diagrams.js --quiet # only summary
|
|
58
|
+
*/
|
|
59
|
+
|
|
60
|
+
import { promises as fs } from 'node:fs';
|
|
61
|
+
import path from 'node:path';
|
|
62
|
+
import process from 'node:process';
|
|
63
|
+
import { fileURLToPath } from 'node:url';
|
|
64
|
+
|
|
65
|
+
import {
|
|
66
|
+
extractMermaidBlocks,
|
|
67
|
+
detectDiagramType,
|
|
68
|
+
validateBlock,
|
|
69
|
+
} from './validate-mermaid-diagrams.js';
|
|
70
|
+
import { sanitizeMermaidQuadrantChart } from './aggregator/markdown-renderer.js';
|
|
71
|
+
|
|
72
|
+
const FENCE_OPEN = /^(\s*)(`{3,}|~{3,})\s*mermaid\s*$/i;
|
|
73
|
+
|
|
74
|
+
// ---------------------------------------------------------------------------
|
|
75
|
+
// Helpers
|
|
76
|
+
// ---------------------------------------------------------------------------
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Locate the first content line (skipping `%%` comments and `%%{ … }%%`
|
|
80
|
+
* init directives) and return its `{ index, indent }`. The returned
|
|
81
|
+
* index is into the `lines` array; `null` if no content line exists.
|
|
82
|
+
*
|
|
83
|
+
* @param {string[]} lines
|
|
84
|
+
* @returns {{ index: number, indent: string } | null}
|
|
85
|
+
*/
|
|
86
|
+
function firstContentLine(lines) {
|
|
87
|
+
let inInit = false;
|
|
88
|
+
for (let i = 0; i < lines.length; i++) {
|
|
89
|
+
const raw = lines[i] ?? '';
|
|
90
|
+
const trimmed = raw.trim();
|
|
91
|
+
if (trimmed === '') continue;
|
|
92
|
+
if (inInit) {
|
|
93
|
+
if (trimmed.endsWith('}}%%')) inInit = false;
|
|
94
|
+
continue;
|
|
95
|
+
}
|
|
96
|
+
if (trimmed.startsWith('%%{')) {
|
|
97
|
+
if (!trimmed.endsWith('}}%%')) inInit = true;
|
|
98
|
+
continue;
|
|
99
|
+
}
|
|
100
|
+
if (trimmed.startsWith('%%')) continue;
|
|
101
|
+
const indent = raw.slice(0, raw.length - raw.trimStart().length);
|
|
102
|
+
return { index: i, indent };
|
|
103
|
+
}
|
|
104
|
+
return null;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Split a comma-separated argument list while honouring double-quoted
|
|
109
|
+
* strings. Used to safely re-quote `x-axis [a, b, c]` and to split
|
|
110
|
+
* `"Label" : [v1, v2, v3]` array contents.
|
|
111
|
+
*
|
|
112
|
+
* @param {string} inner Contents of `[ … ]` (without the brackets)
|
|
113
|
+
* @returns {string[]}
|
|
114
|
+
*/
|
|
115
|
+
function splitArrayItems(inner) {
|
|
116
|
+
const out = [];
|
|
117
|
+
let buf = '';
|
|
118
|
+
let inQuote = false;
|
|
119
|
+
for (const ch of inner) {
|
|
120
|
+
if (ch === '"') { inQuote = !inQuote; buf += ch; continue; }
|
|
121
|
+
if (ch === ',' && !inQuote) { out.push(buf.trim()); buf = ''; continue; }
|
|
122
|
+
buf += ch;
|
|
123
|
+
}
|
|
124
|
+
if (buf.trim()) out.push(buf.trim());
|
|
125
|
+
return out;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// ---------------------------------------------------------------------------
|
|
129
|
+
// Per-diagram-type fixers
|
|
130
|
+
// ---------------------------------------------------------------------------
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* Clamp `[x, y]` coordinates inside quadrantChart point rows: the v11
|
|
134
|
+
* parser rejects decimal values `>= 1.0` (`1.0`, `1.00`, `2.0`, …).
|
|
135
|
+
* Integer `1` is fine, so we rewrite each such token to either `1`
|
|
136
|
+
* (when value === 1) or `0.99` (when value > 1). Applied as a
|
|
137
|
+
* line-by-line transform on `"Label": [x, y]` rows only.
|
|
138
|
+
*
|
|
139
|
+
* @param {string} body
|
|
140
|
+
* @returns {string}
|
|
141
|
+
*/
|
|
142
|
+
function clampQuadrantCoords(body) {
|
|
143
|
+
return body.split('\n').map((line) => {
|
|
144
|
+
const m = line.match(/^(\s*)(.*?)(:\s*)\[\s*([^\]]+)\s*\]\s*$/);
|
|
145
|
+
if (!m) return line;
|
|
146
|
+
const parts = m[4].split(',').map((s) => s.trim());
|
|
147
|
+
if (parts.length !== 2) return line;
|
|
148
|
+
const fixed = parts.map((v) => {
|
|
149
|
+
const n = Number.parseFloat(v);
|
|
150
|
+
if (!Number.isFinite(n)) return v;
|
|
151
|
+
// Integer 0 or 1 is accepted by the v11 lexer; otherwise normalise.
|
|
152
|
+
if (/^-?\d+$/.test(v) && (n === 0 || n === 1)) return v;
|
|
153
|
+
// Out-of-range value — divide by 100 when on a 0-100 scale, else clamp.
|
|
154
|
+
let scaled = n;
|
|
155
|
+
if (Math.abs(n) > 1) scaled = n / 100;
|
|
156
|
+
if (scaled >= 1) scaled = 0.99;
|
|
157
|
+
if (scaled < 0) scaled = 0;
|
|
158
|
+
// Keep 2 decimals; mermaid accepts arbitrary precision.
|
|
159
|
+
return scaled.toFixed(2).replace(/\.?0+$/, '') || '0';
|
|
160
|
+
});
|
|
161
|
+
return `${m[1]}${m[2]}${m[3]}[${fixed.join(', ')}]`;
|
|
162
|
+
}).join('\n');
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Escape literal `"` characters that appear *inside* an already-quoted
|
|
167
|
+
* label on a `"Label": [x, y]` row. The v11 lexer treats every `"` as a
|
|
168
|
+
* string boundary, so e.g. `"תעריפי ארה"ב"` (a Hebrew label that contains
|
|
169
|
+
* a literal quote) is parsed as `"תעריפי ארה"` + lex error on `ב"`.
|
|
170
|
+
* We rewrite each row so that any `"` between the opening `"` and the
|
|
171
|
+
* closing `":` becomes `"`.
|
|
172
|
+
*
|
|
173
|
+
* @param {string} body
|
|
174
|
+
* @returns {string}
|
|
175
|
+
*/
|
|
176
|
+
function escapeQuadrantInnerQuotes(body) {
|
|
177
|
+
return body.split('\n').map((line) => {
|
|
178
|
+
// Match a row that looks like `[indent]"<label-with-quotes>":[ws][x,y]`
|
|
179
|
+
// where the label spans up to the LAST `":` on the line.
|
|
180
|
+
const m = line.match(/^(\s*)"(.+)"\s*:\s*(\[[^\]]+\])\s*$/);
|
|
181
|
+
if (!m) return line;
|
|
182
|
+
const inner = m[2];
|
|
183
|
+
if (!inner.includes('"')) return line;
|
|
184
|
+
return `${m[1]}"${inner.replace(/"/g, '"')}": ${m[3]}`;
|
|
185
|
+
}).join('\n');
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Apply the existing render-time sanitiser to the source so the fix is
|
|
190
|
+
* persisted on disk (not just at render time), then clamp `1.0`+
|
|
191
|
+
* coordinates that crash the v11 lexer.
|
|
192
|
+
*
|
|
193
|
+
* @param {string} body
|
|
194
|
+
* @returns {string}
|
|
195
|
+
*/
|
|
196
|
+
function fixQuadrantChart(body) {
|
|
197
|
+
return clampQuadrantCoords(escapeQuadrantInnerQuotes(sanitizeMermaidQuadrantChart(body)));
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
/**
|
|
201
|
+
* Rename the `xyChart-beta` keyword (mixed case) to the canonical
|
|
202
|
+
* lower-case `xychart-beta`.
|
|
203
|
+
*
|
|
204
|
+
* @param {string} body
|
|
205
|
+
* @returns {string}
|
|
206
|
+
*/
|
|
207
|
+
function fixXyChartBetaCase(body) {
|
|
208
|
+
const lines = body.split('\n');
|
|
209
|
+
const first = firstContentLine(lines);
|
|
210
|
+
if (!first) return body;
|
|
211
|
+
lines[first.index] = lines[first.index].replace(/\bxyChart-beta\b/, 'xychart-beta');
|
|
212
|
+
return lines.join('\n');
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* Quote `title …` lines that contain characters Mermaid's xychart-beta
|
|
217
|
+
* lexer rejects when unquoted (parens, colons, commas, hyphens followed
|
|
218
|
+
* by digits, square brackets, ampersands). Quote each unquoted element
|
|
219
|
+
* of `x-axis [...]` arrays. Strip a leading non-numeric label token
|
|
220
|
+
* from `line [Series, v1, v2, …]` and `bar [Series, v1, v2, …]` so the
|
|
221
|
+
* array only contains numeric data values.
|
|
222
|
+
*
|
|
223
|
+
* @param {string} body
|
|
224
|
+
* @returns {string}
|
|
225
|
+
*/
|
|
226
|
+
function fixXychartBeta(body) {
|
|
227
|
+
const lines = body.split('\n');
|
|
228
|
+
for (let i = 0; i < lines.length; i++) {
|
|
229
|
+
const line = lines[i];
|
|
230
|
+
|
|
231
|
+
// title → title "…"
|
|
232
|
+
const t = line.match(/^(\s*)title\s+(.+?)\s*$/);
|
|
233
|
+
if (t) {
|
|
234
|
+
const text = t[2];
|
|
235
|
+
if (!text.startsWith('"')) {
|
|
236
|
+
lines[i] = `${t[1]}title "${text.replace(/"/g, '\\"')}"`;
|
|
237
|
+
}
|
|
238
|
+
continue;
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
// x-axis [a, b, c] → x-axis ["a", "b", "c"] (when items unquoted)
|
|
242
|
+
const x = line.match(/^(\s*)x-axis\s*\[\s*(.+?)\s*\]\s*$/);
|
|
243
|
+
if (x) {
|
|
244
|
+
const items = splitArrayItems(x[2]);
|
|
245
|
+
const allQuoted = items.length > 0 && items.every((it) => /^".*"$/.test(it));
|
|
246
|
+
if (!allQuoted) {
|
|
247
|
+
const requoted = items.map((it) => {
|
|
248
|
+
if (/^".*"$/.test(it)) return it;
|
|
249
|
+
return `"${it.replace(/"/g, '\\"')}"`;
|
|
250
|
+
}).join(', ');
|
|
251
|
+
lines[i] = `${x[1]}x-axis [${requoted}]`;
|
|
252
|
+
}
|
|
253
|
+
continue;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
// line / bar arrays: line [Series, v1, v2, …] → line [v1, v2, …]
|
|
257
|
+
// If the first item is not a number, strip it (the parser rejects
|
|
258
|
+
// string labels inside the data array).
|
|
259
|
+
const series = line.match(/^(\s*)(line|bar)\s*\[\s*(.+?)\s*\]\s*$/);
|
|
260
|
+
if (series) {
|
|
261
|
+
const items = splitArrayItems(series[3]);
|
|
262
|
+
if (items.length > 0 && !/^-?\d+(?:\.\d+)?$/.test(items[0])) {
|
|
263
|
+
const stripped = items.slice(1);
|
|
264
|
+
if (stripped.length > 0 && stripped.every((it) => /^-?\d+(?:\.\d+)?$/.test(it))) {
|
|
265
|
+
lines[i] = `${series[1]}${series[2]} [${stripped.join(', ')}]`;
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
continue;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
// line / bar with leading label: bar "Series": [v1, v2, …] → bar [v1, v2, …]
|
|
272
|
+
const labeled = line.match(/^(\s*)(line|bar)\s+"[^"]*"\s*:\s*\[\s*(.+?)\s*\]\s*$/);
|
|
273
|
+
if (labeled) {
|
|
274
|
+
const items = splitArrayItems(labeled[3])
|
|
275
|
+
.filter((s) => /^-?\d+(?:\.\d+)?$/.test(s));
|
|
276
|
+
if (items.length > 0) {
|
|
277
|
+
lines[i] = `${labeled[1]}${labeled[2]} [${items.join(', ')}]`;
|
|
278
|
+
}
|
|
279
|
+
continue;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// scatter [...] is not supported by xychart-beta. Drop the line.
|
|
283
|
+
// It's been seen in articles as a forecast helper but the parser
|
|
284
|
+
// rejects it outright.
|
|
285
|
+
if (/^\s*scatter\s*\[/.test(line)) {
|
|
286
|
+
lines[i] = `${line.match(/^(\s*)/)[1]}%% removed unsupported scatter series`;
|
|
287
|
+
continue;
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
return lines.join('\n');
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
/**
|
|
294
|
+
* Convert a `bar` diagram (not a valid Mermaid type) into an
|
|
295
|
+
* `xychart-beta`. Handles three shapes:
|
|
296
|
+
*
|
|
297
|
+
* 1. Already xychart-shaped (`x-axis [...]` + `bar [...]` present) —
|
|
298
|
+
* just rename the opening keyword and run `fixXychartBeta`.
|
|
299
|
+
* 2. Simple `"Label": value` rows — rebuild as
|
|
300
|
+
* `x-axis ["Label", …]` + `y-axis 0 --> max` + `bar [val, …]`.
|
|
301
|
+
* 3. Array rows (`"Label": [v1, v2, …]`) with an existing
|
|
302
|
+
* `x-axis [...]` — convert each row to a `bar [v1, v2, …]` line
|
|
303
|
+
* and add an inferred `y-axis 0 --> max` if missing.
|
|
304
|
+
*
|
|
305
|
+
* @param {string} body
|
|
306
|
+
* @returns {string}
|
|
307
|
+
*/
|
|
308
|
+
function fixBarDiagram(body) {
|
|
309
|
+
const lines = body.split('\n');
|
|
310
|
+
const first = firstContentLine(lines);
|
|
311
|
+
if (!first) return body;
|
|
312
|
+
lines[first.index] = lines[first.index].replace(/\bbar\b/, 'xychart-beta');
|
|
313
|
+
|
|
314
|
+
const hasXAxis = lines.some((l) => /^\s*x-axis\s*\[/.test(l));
|
|
315
|
+
const hasBarArray = lines.some((l) => /^\s*bar\s*\[/.test(l));
|
|
316
|
+
const hasYAxis = lines.some((l) => /^\s*y-axis\s+/.test(l));
|
|
317
|
+
|
|
318
|
+
if (hasXAxis && hasBarArray) {
|
|
319
|
+
return fixXychartBeta(lines.join('\n'));
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
const indent = first.indent;
|
|
323
|
+
|
|
324
|
+
// Shape 3 — `"Label": [v,v,v]` array rows with existing x-axis.
|
|
325
|
+
if (hasXAxis) {
|
|
326
|
+
const out = [];
|
|
327
|
+
const allValues = [];
|
|
328
|
+
let yInserted = false;
|
|
329
|
+
for (let i = 0; i < lines.length; i++) {
|
|
330
|
+
const line = lines[i];
|
|
331
|
+
const arr = line.match(/^(\s*)"([^"]+)"\s*:\s*\[\s*(.+?)\s*\]\s*$/);
|
|
332
|
+
if (arr) {
|
|
333
|
+
const items = splitArrayItems(arr[3])
|
|
334
|
+
.filter((s) => /^-?\d+(?:\.\d+)?$/.test(s));
|
|
335
|
+
if (items.length > 0) {
|
|
336
|
+
allValues.push(...items.map(Number));
|
|
337
|
+
if (!hasYAxis && !yInserted) {
|
|
338
|
+
const max = Math.max(...allValues);
|
|
339
|
+
const ceil = Math.max(1, Math.ceil(max * 1.05));
|
|
340
|
+
out.push(`${indent}y-axis "Value" 0 --> ${ceil}`);
|
|
341
|
+
yInserted = true;
|
|
342
|
+
}
|
|
343
|
+
out.push(`${arr[1]}bar [${items.join(', ')}]`);
|
|
344
|
+
continue;
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
out.push(line);
|
|
348
|
+
}
|
|
349
|
+
// If we deferred y-axis but never inserted (no rows matched), emit now.
|
|
350
|
+
return fixXychartBeta(out.join('\n'));
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
// Shape 2 — `"Label": N` scalar rows — rebuild from scratch.
|
|
354
|
+
const labels = [];
|
|
355
|
+
const values = [];
|
|
356
|
+
let titleLine = null;
|
|
357
|
+
const preserved = [];
|
|
358
|
+
for (let i = 0; i < lines.length; i++) {
|
|
359
|
+
if (i === first.index) { preserved.push(lines[i]); continue; }
|
|
360
|
+
const line = lines[i];
|
|
361
|
+
const t = line.match(/^(\s*)title\s+(.+?)\s*$/);
|
|
362
|
+
if (t) { titleLine = `${indent}title "${t[2].replace(/^"|"$/g, '').replace(/"/g, '\\"')}"`; continue; }
|
|
363
|
+
// Quoted form: `"Label": N`
|
|
364
|
+
const row = line.match(/^\s*"([^"]+)"\s*:\s*(-?\d+(?:\.\d+)?)\s*$/);
|
|
365
|
+
if (row) { labels.push(row[1]); values.push(row[2]); continue; }
|
|
366
|
+
// Unquoted form: `LABEL: N` (e.g. `ECON: 5`, `MFF-2027: 3.5`)
|
|
367
|
+
const bare = line.match(/^\s*([A-Za-z][\w .&/+-]*?)\s*:\s*(-?\d+(?:\.\d+)?)\s*$/);
|
|
368
|
+
if (bare) { labels.push(bare[1].trim()); values.push(bare[2]); continue; }
|
|
369
|
+
if (line.trim() === '' || line.trim().startsWith('%%')) preserved.push(line);
|
|
370
|
+
}
|
|
371
|
+
if (labels.length === 0) return lines.join('\n');
|
|
372
|
+
const max = Math.max(...values.map(Number));
|
|
373
|
+
const ceil = Math.max(1, Math.ceil(max * 1.05));
|
|
374
|
+
const out = [...preserved];
|
|
375
|
+
if (titleLine) out.push(titleLine);
|
|
376
|
+
out.push(`${indent}x-axis [${labels.map((l) => `"${l.replace(/"/g, '\\"')}"`).join(', ')}]`);
|
|
377
|
+
out.push(`${indent}y-axis "Value" 0 --> ${ceil}`);
|
|
378
|
+
out.push(`${indent}bar [${values.join(', ')}]`);
|
|
379
|
+
return out.join('\n');
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
/**
|
|
383
|
+
* Convert a legacy `radar` diagram into the parser-supported
|
|
384
|
+
* `radar-beta` syntax. Handles:
|
|
385
|
+
*
|
|
386
|
+
* 1. Rename the opening keyword `radar` → `radar-beta`.
|
|
387
|
+
* 2. Convert multi-word `axis A, B, C, D` lines to quoted-ID form
|
|
388
|
+
* `axis a1["A"], a2["B"], a3["C"], a4["D"]` (single-word axes
|
|
389
|
+
* are left unchanged).
|
|
390
|
+
* 3. Convert each `"Label" : v1, v2, …` row to
|
|
391
|
+
* `curve cN["Label"]{v1, v2, …}`.
|
|
392
|
+
*
|
|
393
|
+
* @param {string} body
|
|
394
|
+
* @returns {string}
|
|
395
|
+
*/
|
|
396
|
+
function fixRadar(body) {
|
|
397
|
+
const rawLines = body.split('\n');
|
|
398
|
+
const first = firstContentLine(rawLines);
|
|
399
|
+
if (!first) return body;
|
|
400
|
+
|
|
401
|
+
// Preserve leading init / comment / empty lines verbatim.
|
|
402
|
+
const preserved = rawLines.slice(0, first.index);
|
|
403
|
+
const indent = first.indent;
|
|
404
|
+
const bodyLines = rawLines.slice(first.index + 1);
|
|
405
|
+
|
|
406
|
+
// Collect title, axis labels, curves. Tolerant of every legacy form
|
|
407
|
+
// observed in the corpus (see `fix-mermaid-diagrams.js` doc-comments
|
|
408
|
+
// and `test/unit/fix-mermaid-diagrams.test.js` for the catalogue).
|
|
409
|
+
let title = null;
|
|
410
|
+
const axisLabels = []; // ordered, deduplicated
|
|
411
|
+
const curves = []; // { label, values: number[] }
|
|
412
|
+
const seenAxis = new Set();
|
|
413
|
+
const addAxis = (label) => {
|
|
414
|
+
const clean = String(label).trim().replace(/^"|"$/g, '').trim();
|
|
415
|
+
if (!clean) return;
|
|
416
|
+
if (seenAxis.has(clean)) return;
|
|
417
|
+
seenAxis.add(clean);
|
|
418
|
+
axisLabels.push(clean);
|
|
419
|
+
};
|
|
420
|
+
|
|
421
|
+
for (const line of bodyLines) {
|
|
422
|
+
const trimmed = line.trim();
|
|
423
|
+
if (trimmed === '' || trimmed.startsWith('%%')) continue;
|
|
424
|
+
// Stripped of the keyword-only `dataset` / `series` markers.
|
|
425
|
+
if (/^(dataset|series)\s*$/i.test(trimmed)) continue;
|
|
426
|
+
|
|
427
|
+
// title
|
|
428
|
+
const t = trimmed.match(/^title\s+(.+)$/i);
|
|
429
|
+
if (t) { title = t[1].replace(/^"|"$/g, ''); continue; }
|
|
430
|
+
|
|
431
|
+
// axis A, B, C OR axis a1["A"], a2["B"], …
|
|
432
|
+
const axisLine = trimmed.match(/^axis\s+(.+)$/i);
|
|
433
|
+
if (axisLine) {
|
|
434
|
+
const inner = axisLine[1];
|
|
435
|
+
// If items are already in `aN["Label"]` form just extract the labels.
|
|
436
|
+
const idForm = [...inner.matchAll(/[A-Za-z_]\w*\["([^"]+)"\]/g)].map((m) => m[1]);
|
|
437
|
+
if (idForm.length > 0) idForm.forEach(addAxis);
|
|
438
|
+
else splitArrayItems(inner).forEach((it) => addAxis(it));
|
|
439
|
+
continue;
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
// x-axis ["a", "b", …] OR x-axis a, b, c (legacy form)
|
|
443
|
+
const xAxisArr = trimmed.match(/^x-axis\s*\[\s*(.+?)\s*\]\s*$/i);
|
|
444
|
+
if (xAxisArr) { splitArrayItems(xAxisArr[1]).forEach(addAxis); continue; }
|
|
445
|
+
const xAxisBare = trimmed.match(/^x-axis\s+(.+)$/i);
|
|
446
|
+
if (xAxisBare) {
|
|
447
|
+
const tokens = xAxisBare[1].split(/\s*,\s*/).filter(Boolean);
|
|
448
|
+
tokens.forEach(addAxis);
|
|
449
|
+
continue;
|
|
450
|
+
}
|
|
451
|
+
// y-axis is meaningless on a radar — drop silently.
|
|
452
|
+
if (/^y-axis\b/i.test(trimmed)) continue;
|
|
453
|
+
// `max N` — re-emit verbatim later via passthrough? radar-beta supports
|
|
454
|
+
// it but it has no semantic equivalent in many of these blocks; skip.
|
|
455
|
+
if (/^max\s+/i.test(trimmed)) continue;
|
|
456
|
+
|
|
457
|
+
// ID ["Label"] : v1, v2, … or ID [Label] : v1, v2, …
|
|
458
|
+
const idBracketRow = trimmed.match(/^([A-Za-z_]\w*)\s*\[\s*"?([^"\]]+?)"?\s*\]\s*:\s*(.+)$/);
|
|
459
|
+
if (idBracketRow) {
|
|
460
|
+
const label = idBracketRow[2].trim();
|
|
461
|
+
const nums = idBracketRow[3].split(/[,\s]+/).filter(Boolean)
|
|
462
|
+
.filter((s) => /^-?\d+(?:\.\d+)?$/.test(s)).map(Number);
|
|
463
|
+
if (nums.length > 0) { curves.push({ label, values: nums }); continue; }
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
// "Label" [v1, v2, …] (brackets used in place of `:`/`{…}`)
|
|
467
|
+
const labelArr = trimmed.match(/^"([^"]+)"\s*\[\s*(.+?)\s*\]\s*$/);
|
|
468
|
+
if (labelArr) {
|
|
469
|
+
const nums = splitArrayItems(labelArr[2])
|
|
470
|
+
.filter((s) => /^-?\d+(?:\.\d+)?$/.test(s)).map(Number);
|
|
471
|
+
if (nums.length > 0) { curves.push({ label: labelArr[1], values: nums }); continue; }
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
// "Label" : v1, v2, … OR "Label": [v1, v2, …]
|
|
475
|
+
const labelColon = trimmed.match(/^"([^"]+)"\s*:\s*\[?\s*(.+?)\s*\]?\s*$/);
|
|
476
|
+
if (labelColon) {
|
|
477
|
+
const nums = labelColon[2].split(/[,\s]+/).filter(Boolean)
|
|
478
|
+
.filter((s) => /^-?\d+(?:\.\d+)?$/.test(s)).map(Number);
|
|
479
|
+
if (nums.length > 0) { curves.push({ label: labelColon[1], values: nums }); continue; }
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
// ID [v1, v2, …] (lowercase identifier + values bracketed — Pass-1/Pass-2)
|
|
483
|
+
const idArr = trimmed.match(/^([A-Za-z_][\w -]*?)\s*\[\s*(.+?)\s*\]\s*$/);
|
|
484
|
+
if (idArr && !/^x-axis|y-axis|axis$/i.test(idArr[1])) {
|
|
485
|
+
const nums = splitArrayItems(idArr[2])
|
|
486
|
+
.filter((s) => /^-?\d+(?:\.\d+)?$/.test(s)).map(Number);
|
|
487
|
+
if (nums.length > 0) { curves.push({ label: idArr[1].trim(), values: nums }); continue; }
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
// Unquoted `Label: value` — single-value per row OR axis-with-score.
|
|
491
|
+
// Used in form 3 (`Political: 4`) and pseudo-bar radars.
|
|
492
|
+
const bareColon = trimmed.match(/^([^:[\]{}"]+?)\s*:\s*\[?\s*(.+?)\s*\]?\s*$/);
|
|
493
|
+
if (bareColon) {
|
|
494
|
+
const nums = bareColon[2].split(/[,\s]+/).filter(Boolean)
|
|
495
|
+
.filter((s) => /^-?\d+(?:\.\d+)?$/.test(s)).map(Number);
|
|
496
|
+
if (nums.length > 0) { curves.push({ label: bareColon[1].trim(), values: nums }); continue; }
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
// Plain numeric-only line — pairs with previous bare label line
|
|
500
|
+
// (form: `Strengths\n72\nWeaknesses\n58\n…`). Append onto the most
|
|
501
|
+
// recent zero-value curve if any; otherwise drop.
|
|
502
|
+
if (/^-?\d+(?:\.\d+)?$/.test(trimmed)) {
|
|
503
|
+
if (curves.length > 0 && curves[curves.length - 1].values.length === 0) {
|
|
504
|
+
curves[curves.length - 1].values.push(Number(trimmed));
|
|
505
|
+
}
|
|
506
|
+
continue;
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
// Bare bracketed values on their own line. Two sub-shapes:
|
|
510
|
+
// form 5 — `series` block where each row spans 2 lines:
|
|
511
|
+
// `"Label"\n [v1, v2, …]`. Latch onto the most recent
|
|
512
|
+
// placeholder curve.
|
|
513
|
+
// form 6 — bare-label-only axes followed by a single combined
|
|
514
|
+
// value vector matching the placeholder count. Collapse
|
|
515
|
+
// all placeholders into a single curve with these values.
|
|
516
|
+
const bareValuesArr = trimmed.match(/^\[\s*(.+?)\s*\]\s*$/);
|
|
517
|
+
if (bareValuesArr) {
|
|
518
|
+
const nums = splitArrayItems(bareValuesArr[1])
|
|
519
|
+
.filter((s) => /^-?\d+(?:\.\d+)?$/.test(s)).map(Number);
|
|
520
|
+
if (nums.length > 0) {
|
|
521
|
+
const placeholderCurves = curves.filter((c) => c.values.length === 0);
|
|
522
|
+
if (placeholderCurves.length === nums.length && placeholderCurves.length >= 2
|
|
523
|
+
&& placeholderCurves.length === curves.length) {
|
|
524
|
+
// Form 6 collapse — keep the axes (already addAxis'd), replace
|
|
525
|
+
// every placeholder curve with one combined curve.
|
|
526
|
+
const label = title || 'Score';
|
|
527
|
+
curves.length = 0;
|
|
528
|
+
curves.push({ label, values: nums });
|
|
529
|
+
} else if (curves.length > 0 && curves[curves.length - 1].values.length === 0) {
|
|
530
|
+
curves[curves.length - 1].values = nums;
|
|
531
|
+
} else {
|
|
532
|
+
curves.push({ label: `Curve ${curves.length + 1}`, values: nums });
|
|
533
|
+
}
|
|
534
|
+
continue;
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
// Plain unquoted label line — could be an axis label declaration
|
|
539
|
+
// (forms 5, 6 — `Political\nEconomic\n…`) OR a curve label awaiting
|
|
540
|
+
// a numeric value on the next line (interleaved bar-radar form).
|
|
541
|
+
// We greedily classify as axis when the line is short and alpha;
|
|
542
|
+
// and add a zero-value placeholder curve so a subsequent numeric line
|
|
543
|
+
// can latch onto it.
|
|
544
|
+
if (/^[A-Za-zÀ-ÿ][\w\s&/().+-]*$/.test(trimmed) && trimmed.length <= 80) {
|
|
545
|
+
addAxis(trimmed);
|
|
546
|
+
curves.push({ label: trimmed, values: [] });
|
|
547
|
+
continue;
|
|
548
|
+
}
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
// Synthesise axis labels from curve labels if none were declared. This
|
|
552
|
+
// is form 3 (`"Axis": single_value`) and form 6 (label-only).
|
|
553
|
+
if (axisLabels.length === 0 && curves.length > 0) {
|
|
554
|
+
// If every curve has exactly one value, treat curve labels as axis
|
|
555
|
+
// names and collapse all single-value curves into one combined curve.
|
|
556
|
+
const allSingle = curves.length >= 2 && curves.every((c) => c.values.length === 1);
|
|
557
|
+
if (allSingle) {
|
|
558
|
+
curves.forEach((c) => addAxis(c.label));
|
|
559
|
+
const merged = { label: title || 'Score', values: curves.map((c) => c.values[0]) };
|
|
560
|
+
curves.length = 0;
|
|
561
|
+
curves.push(merged);
|
|
562
|
+
} else {
|
|
563
|
+
// Take the longest curve and seed axis labels A1…AN.
|
|
564
|
+
const maxLen = curves.reduce((m, c) => Math.max(m, c.values.length), 0);
|
|
565
|
+
for (let i = 1; i <= maxLen; i++) addAxis(`A${i}`);
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
// Drop placeholder zero-value curves that never received a value.
|
|
570
|
+
const realCurves = curves.filter((c) => c.values.length > 0);
|
|
571
|
+
if (realCurves.length === 0) return body; // give up — original will surface upstream
|
|
572
|
+
|
|
573
|
+
// Pad/truncate each curve to match axis count when known.
|
|
574
|
+
if (axisLabels.length > 0) {
|
|
575
|
+
for (const c of realCurves) {
|
|
576
|
+
if (c.values.length < axisLabels.length) {
|
|
577
|
+
while (c.values.length < axisLabels.length) c.values.push(0);
|
|
578
|
+
} else if (c.values.length > axisLabels.length) {
|
|
579
|
+
c.values.length = axisLabels.length;
|
|
580
|
+
}
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
// Emit canonical radar-beta.
|
|
585
|
+
const out = [...preserved, `${indent}radar-beta`];
|
|
586
|
+
if (title) out.push(`${indent} title ${title}`);
|
|
587
|
+
if (axisLabels.length > 0) {
|
|
588
|
+
const axisExpr = axisLabels
|
|
589
|
+
.map((l, i) => `a${i + 1}["${String(l).replace(/"/g, '\\"')}"]`)
|
|
590
|
+
.join(', ');
|
|
591
|
+
out.push(`${indent} axis ${axisExpr}`);
|
|
592
|
+
}
|
|
593
|
+
realCurves.forEach((c, i) => {
|
|
594
|
+
out.push(`${indent} curve c${i + 1}["${String(c.label).replace(/"/g, '\\"')}"]{${c.values.join(', ')}}`);
|
|
595
|
+
});
|
|
596
|
+
return out.join('\n');
|
|
597
|
+
}
|
|
598
|
+
|
|
599
|
+
/**
|
|
600
|
+
* Timeline fixer:
|
|
601
|
+
* - Replace em-dash `—` / en-dash `–` with ASCII `-` inside `title`
|
|
602
|
+
* lines (the lexer accepts these in event text but not in titles).
|
|
603
|
+
* - Strip parens AND colons from `section …` lines — the timeline
|
|
604
|
+
* parser uses `:` as the event separator, so a colon in the section
|
|
605
|
+
* name makes it greedily consume the next event row.
|
|
606
|
+
*
|
|
607
|
+
* @param {string} body
|
|
608
|
+
* @returns {string}
|
|
609
|
+
*/
|
|
610
|
+
function fixTimeline(body) {
|
|
611
|
+
return body.split('\n').map((line) => {
|
|
612
|
+
const titleM = line.match(/^(\s*title\s+)(.+?)\s*$/);
|
|
613
|
+
if (titleM) {
|
|
614
|
+
const cleaned = titleM[2].replace(/[—–]/g, '-');
|
|
615
|
+
return `${titleM[1]}${cleaned}`;
|
|
616
|
+
}
|
|
617
|
+
const sec = line.match(/^(\s*section\s+)(.+?)\s*$/);
|
|
618
|
+
if (sec) {
|
|
619
|
+
const stripped = sec[2]
|
|
620
|
+
.replace(/[()]/g, ' ')
|
|
621
|
+
.replace(/:/g, '')
|
|
622
|
+
.replace(/\s{2,}/g, ' ')
|
|
623
|
+
.trim();
|
|
624
|
+
return `${sec[1]}${stripped}`;
|
|
625
|
+
}
|
|
626
|
+
// Event row: `<period> : <event>` — strip parens AND any colon
|
|
627
|
+
// inside parens from the period (the parser uses `:` as event
|
|
628
|
+
// separator, so a colon inside `(00:00 UTC)` makes the period text
|
|
629
|
+
// greedy and breaks the row). Continuation rows (whitespace + `:`)
|
|
630
|
+
// are left untouched.
|
|
631
|
+
const evt = line.match(/^(\s*)([^:\n]+?)\s*:\s*(.*?)\s*$/);
|
|
632
|
+
if (evt && evt[2].trim() !== '' && /[()]/.test(evt[2])) {
|
|
633
|
+
const period = evt[2]
|
|
634
|
+
.replace(/\([^)]*\)/g, '')
|
|
635
|
+
.replace(/\s{2,}/g, ' ')
|
|
636
|
+
.trim();
|
|
637
|
+
return `${evt[1]}${period} : ${evt[3]}`;
|
|
638
|
+
}
|
|
639
|
+
return line;
|
|
640
|
+
}).join('\n');
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
/**
|
|
644
|
+
* Convert both verbose sankey forms to CSV:
|
|
645
|
+
*
|
|
646
|
+
* `A [n1] TO B [n2]` → `A,B,n2`
|
|
647
|
+
* `A -> B : N` → `A,B,N`
|
|
648
|
+
*
|
|
649
|
+
* Both arrow and TO conversions strip surrounding whitespace and
|
|
650
|
+
* preserve only the numeric weight at the right-hand side.
|
|
651
|
+
*
|
|
652
|
+
* @param {string} body
|
|
653
|
+
* @returns {string}
|
|
654
|
+
*/
|
|
655
|
+
function fixSankey(body) {
|
|
656
|
+
return body.split('\n').map((line) => {
|
|
657
|
+
const to = line.match(/^\s*([^,[\]]+?)\s*\[\s*-?\d+(?:\.\d+)?\s*\]\s+TO\s+([^,[\]]+?)\s*\[\s*(-?\d+(?:\.\d+)?)\s*\]\s*$/);
|
|
658
|
+
if (to) return `${to[1].trim()},${to[2].trim()},${to[3]}`;
|
|
659
|
+
const arrow = line.match(/^\s*([^,:\s][^,:]*?)\s*->\s*([^,:\s][^,:]*?)\s*:\s*(-?\d+(?:\.\d+)?)\s*$/);
|
|
660
|
+
if (arrow) return `${arrow[1].trim()},${arrow[2].trim()},${arrow[3]}`;
|
|
661
|
+
return line;
|
|
662
|
+
}).join('\n');
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
const FLOWCHART_SPECIAL_CHARS = /[{}():;,]/;
|
|
666
|
+
|
|
667
|
+
/**
|
|
668
|
+
* Wrap unquoted `WORD[…]` node labels in double quotes when the inner
|
|
669
|
+
* text contains characters that break the Mermaid flowchart lexer
|
|
670
|
+
* (`{`, `}`, `(`, `)`, `:`, `;`, `,`). Quoting is the documented escape
|
|
671
|
+
* hatch. Edge labels (`|…|` between arrows) are NOT touched.
|
|
672
|
+
*
|
|
673
|
+
* @param {string} line
|
|
674
|
+
* @returns {string}
|
|
675
|
+
*/
|
|
676
|
+
function quoteSpecialLabels(line) {
|
|
677
|
+
// Match `WORD[…]` where the inner text is not already a quoted string.
|
|
678
|
+
// Lazy match so we don't span the whole line on labels that hold `]`.
|
|
679
|
+
return line.replace(/(\b[A-Za-z_][\w-]*)\[([^\[\]\n]+?)\]/g, (whole, id, inner) => {
|
|
680
|
+
const text = inner.trim();
|
|
681
|
+
// Already quoted — leave alone.
|
|
682
|
+
if (/^".*"$/.test(text)) return whole;
|
|
683
|
+
// Inner is a sub-shape introducer (e.g. `Start([Text])` — we'll
|
|
684
|
+
// match that pattern recursively on the inner). Skip when it looks
|
|
685
|
+
// like another shape: leading `(`, `/`, `\`, `[`.
|
|
686
|
+
if (/^[([/\\]/.test(text)) return whole;
|
|
687
|
+
if (!FLOWCHART_SPECIAL_CHARS.test(text)) return whole;
|
|
688
|
+
return `${id}["${text.replace(/"/g, '"')}"]`;
|
|
689
|
+
});
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
/**
|
|
693
|
+
* Apply graph/flowchart fixes:
|
|
694
|
+
*
|
|
695
|
+
* - Replace `<--` edges with `-->` (Mermaid has no reverse-only edge).
|
|
696
|
+
* - Rewrite inner literal `"` inside `["…"]` labels with `"`
|
|
697
|
+
* (uses a lazy non-`"]` regex so embedded RTL/English-quote
|
|
698
|
+
* content matches in full).
|
|
699
|
+
* - Wrap unquoted `WORD[…]` labels containing `{}():;,` in `"…"`.
|
|
700
|
+
*
|
|
701
|
+
* @param {string} body
|
|
702
|
+
* @returns {string}
|
|
703
|
+
*/
|
|
704
|
+
function fixGraphFlowchart(body) {
|
|
705
|
+
// First pass: discover unquoted multi-word `subgraph` declarations and
|
|
706
|
+
// build a name→id map. Replace declarations AND any later references.
|
|
707
|
+
const rawLines = body.split('\n');
|
|
708
|
+
const subgraphRenames = new Map(); // originalName → id
|
|
709
|
+
let renameCounter = 0;
|
|
710
|
+
for (const line of rawLines) {
|
|
711
|
+
const m = line.match(/^\s*subgraph\s+(.+?)\s*$/);
|
|
712
|
+
if (!m) continue;
|
|
713
|
+
const rest = m[1];
|
|
714
|
+
// Already in a valid form: `subgraph id ["Label"]`, `subgraph id[Label]`,
|
|
715
|
+
// `subgraph "Label"`, or single-word id `subgraph SG1`.
|
|
716
|
+
if (/^[A-Za-z_]\w*\s*\[/.test(rest)) continue; // id[...]
|
|
717
|
+
if (/^"/.test(rest)) continue; // quoted label
|
|
718
|
+
if (/^[A-Za-z_]\w*\s*$/.test(rest)) continue; // single word id
|
|
719
|
+
// Multi-word unquoted — rename.
|
|
720
|
+
const original = rest.replace(/^"|"$/g, '').trim();
|
|
721
|
+
if (subgraphRenames.has(original)) continue;
|
|
722
|
+
renameCounter++;
|
|
723
|
+
subgraphRenames.set(original, `sg_${renameCounter}`);
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
return rawLines.map((line) => {
|
|
727
|
+
let out = line;
|
|
728
|
+
// Unicode arrow `→` between two identifiers is a common typo for `-->`.
|
|
729
|
+
out = out.replace(/(\s)→(\s)/g, '$1-->$2');
|
|
730
|
+
out = out.replace(/(\s)<--(\s)/g, '$1-->$2');
|
|
731
|
+
|
|
732
|
+
// Rewrite multi-word subgraph declarations.
|
|
733
|
+
const sg = out.match(/^(\s*)subgraph\s+(.+?)\s*$/);
|
|
734
|
+
if (sg && !/^[A-Za-z_]\w*\s*(?:\[|$)/.test(sg[2]) && !/^"/.test(sg[2])) {
|
|
735
|
+
const original = sg[2].replace(/^"|"$/g, '').trim();
|
|
736
|
+
const id = subgraphRenames.get(original);
|
|
737
|
+
if (id) {
|
|
738
|
+
out = `${sg[1]}subgraph ${id} ["${original.replace(/"/g, '"')}"]`;
|
|
739
|
+
return out;
|
|
740
|
+
}
|
|
741
|
+
}
|
|
742
|
+
|
|
743
|
+
// Rewrite bare references to renamed subgraphs in edges. Match the
|
|
744
|
+
// original name surrounded by edge syntax or whitespace boundaries.
|
|
745
|
+
for (const [original, id] of subgraphRenames) {
|
|
746
|
+
// Edge LHS: `<original> -->` or `<original> ---`
|
|
747
|
+
const escOrig = original.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
748
|
+
out = out.replace(new RegExp(`(^|\\s)${escOrig}(\\s*(?:--|==|\\.\\.))`, 'g'),
|
|
749
|
+
(_, pre, post) => `${pre}${id}${post}`);
|
|
750
|
+
// Edge RHS: `--> <original>` or `--- <original>` at end-of-line
|
|
751
|
+
out = out.replace(new RegExp(`((?:--|==|\\.\\.)\\s*(?:>\\s*)?)${escOrig}(\\s|$)`, 'g'),
|
|
752
|
+
(_, pre, post) => `${pre}${id}${post}`);
|
|
753
|
+
}
|
|
754
|
+
|
|
755
|
+
// Escape inner double quotes in [" … "] labels.
|
|
756
|
+
out = out.replace(/\["((?:(?!"\])[\s\S])*?)"\]/g, (_, inner) => {
|
|
757
|
+
if (!inner.includes('"')) return `["${inner}"]`;
|
|
758
|
+
return `["${inner.replace(/"/g, '"')}"]`;
|
|
759
|
+
});
|
|
760
|
+
|
|
761
|
+
// Unquoted `[Foo | Bar | Baz]` labels — the pipe character is a
|
|
762
|
+
// shape modifier and must be inside quotes. Detect bracket content
|
|
763
|
+
// with any of `|`, `&`, `,`, `(`, `:` and quote it.
|
|
764
|
+
out = out.replace(/\[([^"\[\]]*[|&,():][^"\[\]]*)\]/g, (whole, inner) => {
|
|
765
|
+
// Skip if this is actually a shape modifier like `[(`, `[/`, `[\`.
|
|
766
|
+
if (/^[(/\\>]/.test(inner)) return whole;
|
|
767
|
+
const clean = inner.trim();
|
|
768
|
+
return `["${clean.replace(/"/g, '"')}"]`;
|
|
769
|
+
});
|
|
770
|
+
|
|
771
|
+
out = quoteSpecialLabels(out);
|
|
772
|
+
return out;
|
|
773
|
+
}).join('\n');
|
|
774
|
+
}
|
|
775
|
+
|
|
776
|
+
// ---------------------------------------------------------------------------
|
|
777
|
+
// Orchestration
|
|
778
|
+
// ---------------------------------------------------------------------------
|
|
779
|
+
|
|
780
|
+
const FIXERS = {
|
|
781
|
+
quadrantChart: fixQuadrantChart,
|
|
782
|
+
'xyChart-beta': fixXyChartBetaCase,
|
|
783
|
+
'xychart-beta': fixXychartBeta,
|
|
784
|
+
bar: fixBarDiagram,
|
|
785
|
+
radar: fixRadar,
|
|
786
|
+
timeline: fixTimeline,
|
|
787
|
+
'sankey-beta': fixSankey,
|
|
788
|
+
graph: fixGraphFlowchart,
|
|
789
|
+
flowchart: fixGraphFlowchart,
|
|
790
|
+
};
|
|
791
|
+
|
|
792
|
+
/**
|
|
793
|
+
* Apply the type-appropriate fixer to a single block body. Returns the
|
|
794
|
+
* candidate fixed body — caller is responsible for re-validating to
|
|
795
|
+
* confirm the fix actually works.
|
|
796
|
+
*
|
|
797
|
+
* @param {string} body
|
|
798
|
+
* @returns {string}
|
|
799
|
+
*/
|
|
800
|
+
export function fixBlock(body) {
|
|
801
|
+
const type = detectDiagramType(body);
|
|
802
|
+
const f = FIXERS[type];
|
|
803
|
+
if (!f) return body;
|
|
804
|
+
try {
|
|
805
|
+
return f(body);
|
|
806
|
+
} catch {
|
|
807
|
+
return body;
|
|
808
|
+
}
|
|
809
|
+
}
|
|
810
|
+
|
|
811
|
+
/**
|
|
812
|
+
* Rebuild a markdown document by replacing every failed mermaid block
|
|
813
|
+
* with its fixer-produced version (only when the fixer's output
|
|
814
|
+
* actually parses cleanly).
|
|
815
|
+
*
|
|
816
|
+
* @param {string} text Original markdown
|
|
817
|
+
* @param {import('mermaid').default} mermaid Pre-loaded parser instance
|
|
818
|
+
* @returns {Promise<{
|
|
819
|
+
* text: string,
|
|
820
|
+
* fixed: number,
|
|
821
|
+
* skipped: number,
|
|
822
|
+
* stillBroken: Array<{ startLine: number, diagramType: string, error: string }>
|
|
823
|
+
* }>}
|
|
824
|
+
*/
|
|
825
|
+
export async function fixDocument(text, mermaid) {
|
|
826
|
+
const blocks = extractMermaidBlocks(text);
|
|
827
|
+
const lines = text.split('\n');
|
|
828
|
+
let fixed = 0;
|
|
829
|
+
let skipped = 0;
|
|
830
|
+
const stillBroken = [];
|
|
831
|
+
|
|
832
|
+
const sorted = [...blocks].sort((a, b) => b.startLine - a.startLine);
|
|
833
|
+
for (const block of sorted) {
|
|
834
|
+
const before = await validateBlock(block.body, mermaid);
|
|
835
|
+
if (before.ok) continue;
|
|
836
|
+
|
|
837
|
+
const candidate = fixBlock(block.body);
|
|
838
|
+
if (candidate === block.body) {
|
|
839
|
+
skipped++;
|
|
840
|
+
stillBroken.push({ startLine: block.startLine, diagramType: before.diagramType, error: before.error });
|
|
841
|
+
continue;
|
|
842
|
+
}
|
|
843
|
+
const after = await validateBlock(candidate, mermaid);
|
|
844
|
+
if (!after.ok) {
|
|
845
|
+
skipped++;
|
|
846
|
+
stillBroken.push({ startLine: block.startLine, diagramType: after.diagramType, error: after.error });
|
|
847
|
+
continue;
|
|
848
|
+
}
|
|
849
|
+
const fenceLineIdx = block.startLine - 1;
|
|
850
|
+
const fence = (lines[fenceLineIdx] ?? '').match(FENCE_OPEN);
|
|
851
|
+
if (!fence) { skipped++; continue; }
|
|
852
|
+
const char = fence[2][0];
|
|
853
|
+
const minLen = fence[2].length;
|
|
854
|
+
const closeRe = new RegExp(`^\\s*${char === '`' ? '`' : '~'}{${minLen},}\\s*$`);
|
|
855
|
+
let end = fenceLineIdx + 1;
|
|
856
|
+
while (end < lines.length && !closeRe.test(lines[end] ?? '')) end++;
|
|
857
|
+
const bodyLineCount = end - (fenceLineIdx + 1);
|
|
858
|
+
lines.splice(fenceLineIdx + 1, bodyLineCount, ...candidate.split('\n'));
|
|
859
|
+
fixed++;
|
|
860
|
+
}
|
|
861
|
+
return { text: lines.join('\n'), fixed, skipped, stillBroken };
|
|
862
|
+
}
|
|
863
|
+
|
|
864
|
+
/**
|
|
865
|
+
* @internal CLI entry-point — runs the fixer across the given roots
|
|
866
|
+
* (or `analysis/` by default) and prints a per-file summary.
|
|
867
|
+
*
|
|
868
|
+
* @returns {Promise<void>}
|
|
869
|
+
*/
|
|
870
|
+
/* istanbul ignore next */
|
|
871
|
+
async function mainCli() {
|
|
872
|
+
const args = process.argv.slice(2);
|
|
873
|
+
const write = args.includes('--write');
|
|
874
|
+
const quiet = args.includes('--quiet');
|
|
875
|
+
const roots = args.filter((a) => !a.startsWith('--'));
|
|
876
|
+
|
|
877
|
+
const { Window } = await import('happy-dom');
|
|
878
|
+
const win = new Window();
|
|
879
|
+
if (typeof globalThis.window === 'undefined') globalThis.window = win;
|
|
880
|
+
if (typeof globalThis.document === 'undefined') globalThis.document = win.document;
|
|
881
|
+
if (typeof globalThis.DOMParser === 'undefined') globalThis.DOMParser = win.DOMParser;
|
|
882
|
+
if (typeof globalThis.XMLSerializer === 'undefined') globalThis.XMLSerializer = win.XMLSerializer;
|
|
883
|
+
if (typeof globalThis.HTMLElement === 'undefined') globalThis.HTMLElement = win.HTMLElement;
|
|
884
|
+
if (typeof globalThis.SVGElement === 'undefined') globalThis.SVGElement = win.SVGElement;
|
|
885
|
+
const mermaid = (await import('mermaid')).default;
|
|
886
|
+
mermaid.initialize({ startOnLoad: false, securityLevel: 'loose', suppressErrorRendering: true });
|
|
887
|
+
|
|
888
|
+
async function walk(dir, out) {
|
|
889
|
+
let entries;
|
|
890
|
+
try { entries = await fs.readdir(dir, { withFileTypes: true }); }
|
|
891
|
+
catch (e) { if (e.code === 'ENOENT') return out; throw e; }
|
|
892
|
+
entries.sort((a, b) => a.name.localeCompare(b.name));
|
|
893
|
+
for (const e of entries) {
|
|
894
|
+
const p = path.join(dir, e.name);
|
|
895
|
+
if (e.isDirectory()) {
|
|
896
|
+
if (e.name === 'node_modules' || e.name.startsWith('.')) continue;
|
|
897
|
+
await walk(p, out);
|
|
898
|
+
} else if (e.isFile() && p.endsWith('.md')) out.push(p);
|
|
899
|
+
}
|
|
900
|
+
return out;
|
|
901
|
+
}
|
|
902
|
+
|
|
903
|
+
const files = [];
|
|
904
|
+
for (const r of roots.length ? roots : ['analysis']) await walk(r, files);
|
|
905
|
+
let totalFixed = 0;
|
|
906
|
+
let totalStillBroken = 0;
|
|
907
|
+
let filesChanged = 0;
|
|
908
|
+
for (const file of files) {
|
|
909
|
+
const text = await fs.readFile(file, 'utf8');
|
|
910
|
+
if (!text.includes('```mermaid') && !text.includes('~~~mermaid')) continue;
|
|
911
|
+
const result = await fixDocument(text, mermaid);
|
|
912
|
+
if (result.fixed > 0) {
|
|
913
|
+
filesChanged++;
|
|
914
|
+
totalFixed += result.fixed;
|
|
915
|
+
if (write) await fs.writeFile(file, result.text, 'utf8');
|
|
916
|
+
if (!quiet) {
|
|
917
|
+
process.stdout.write(`${write ? 'fixed' : 'would fix'} ${result.fixed} block(s) in ${path.relative(process.cwd(), file)}\n`);
|
|
918
|
+
}
|
|
919
|
+
}
|
|
920
|
+
totalStillBroken += result.stillBroken.length;
|
|
921
|
+
}
|
|
922
|
+
process.stdout.write(`\nSummary: ${write ? 'fixed' : 'would fix'} ${totalFixed} block(s) across ${filesChanged} file(s); ${totalStillBroken} block(s) still broken after auto-fix.\n`);
|
|
923
|
+
}
|
|
924
|
+
|
|
925
|
+
const isMain = (() => {
|
|
926
|
+
try { return import.meta.url === `file://${process.argv[1]}` || import.meta.url === fileURLToPath(process.argv[1]); }
|
|
927
|
+
catch { return false; }
|
|
928
|
+
})();
|
|
929
|
+
if (isMain) {
|
|
930
|
+
mainCli().catch((e) => { console.error(e); process.exit(2); });
|
|
931
|
+
}
|