euparliamentmonitor 0.9.13 → 0.9.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +6 -4
- package/scripts/aggregator/article-generator.js +2 -1
- package/scripts/aggregator/article-html.d.ts +9 -0
- package/scripts/aggregator/article-html.js +134 -13
- package/scripts/aggregator/article-metadata.d.ts +25 -161
- package/scripts/aggregator/article-metadata.js +71 -649
- package/scripts/aggregator/editorial-brief-resolver.d.ts +9 -0
- package/scripts/aggregator/editorial-brief-resolver.js +3 -1
- package/scripts/aggregator/metadata/date-labels.d.ts +122 -0
- package/scripts/aggregator/metadata/date-labels.js +209 -0
- package/scripts/aggregator/metadata/text-utils.d.ts +188 -0
- package/scripts/aggregator/metadata/text-utils.js +542 -0
- package/scripts/constants/og-locales.d.ts +15 -0
- package/scripts/constants/og-locales.js +17 -0
- package/scripts/constants/seo/index.d.ts +21 -0
- package/scripts/constants/seo/index.js +23 -0
- package/scripts/constants/seo/og-locales.d.ts +59 -0
- package/scripts/constants/seo/og-locales.js +59 -0
- package/scripts/constants/seo/social-handles.d.ts +50 -0
- package/scripts/constants/seo/social-handles.js +65 -0
- package/scripts/constants/social-handles.d.ts +11 -0
- package/scripts/constants/social-handles.js +13 -0
- package/scripts/discover-untranslated-briefs.js +224 -19
- package/scripts/generators/news-indexes.d.ts +35 -0
- package/scripts/generators/news-indexes.js +67 -6
- package/scripts/generators/political-intelligence/html.js +14 -6
- package/scripts/generators/seo-copy.js +42 -0
- package/scripts/generators/sitemap/html.js +13 -5
- package/scripts/lint-src-todos.js +124 -0
- package/scripts/utils/copy-test-reports.js +1 -1
- package/scripts/utils/generate-docs-index.js +1 -1
- package/scripts/validate-brief-translations.js +158 -18
|
@@ -23,10 +23,12 @@
|
|
|
23
23
|
* adopted-text IDs like `TA-10-2026-0160`) must appear in the translation
|
|
24
24
|
* whenever they appear in the source.
|
|
25
25
|
* 6. **Heading parity** — H1/H2/H3 heading counts must match
|
|
26
|
-
* the source closely
|
|
27
|
-
*
|
|
28
|
-
* and
|
|
29
|
-
*
|
|
26
|
+
* the source closely. H1 must match exactly (one per brief by style
|
|
27
|
+
* guide). H2 must match exactly (`H2_TOLERANCE = 0`): each `##` heading
|
|
28
|
+
* is a major section and dropping or merging one is the single most
|
|
29
|
+
* common AI failure mode. H3 may differ by at most `H3_TOLERANCE` (1)
|
|
30
|
+
* to allow legitimate sub-bullet fusion. The legacy `HEADING_TOLERANCE`
|
|
31
|
+
* export is preserved as an alias for `H3_TOLERANCE`.
|
|
30
32
|
* 7. **Mermaid block parity** — every ```` ```mermaid ```` block in the
|
|
31
33
|
* source must appear at least once in the translation. Mermaid syntax
|
|
32
34
|
* is a machine-readable fixed token; dropping a diagram silently breaks
|
|
@@ -142,16 +144,25 @@ const FIXED_TOKEN_PATTERNS_GLOBAL = Object.freeze(
|
|
|
142
144
|
);
|
|
143
145
|
|
|
144
146
|
/**
|
|
145
|
-
* Tolerance (in absolute count) for
|
|
146
|
-
* source and the translation.
|
|
147
|
-
* per brief by convention.
|
|
147
|
+
* Tolerance (in absolute count) for H3 heading-count drift between the
|
|
148
|
+
* source and the translation.
|
|
148
149
|
*
|
|
149
|
-
*
|
|
150
|
-
*
|
|
151
|
-
*
|
|
152
|
-
*
|
|
150
|
+
* - **H1**: hard zero — every brief has exactly one H1 by style guide.
|
|
151
|
+
* - **H2**: hard zero (see `H2_TOLERANCE` below). H2 is a major section;
|
|
152
|
+
* silently dropping or merging one is the single most common AI failure
|
|
153
|
+
* mode and the validator must catch it even when the dropped section
|
|
154
|
+
* contains no `FIXED_TOKEN_PATTERNS` matches to flag separately.
|
|
155
|
+
* - **H3**: tolerance of 1. Translators sometimes legitimately fuse two
|
|
156
|
+
* very short sub-bullets into one paragraph, or split a long H3 into two
|
|
157
|
+
* for readability in CJK scripts where dense text harms scanability.
|
|
158
|
+
*
|
|
159
|
+
* `HEADING_TOLERANCE` is preserved as a backward-compatible alias for
|
|
160
|
+
* `H3_TOLERANCE` so existing consumers (tests, downstream tooling that
|
|
161
|
+
* imports the constant) keep working.
|
|
153
162
|
*/
|
|
154
|
-
export const
|
|
163
|
+
export const H2_TOLERANCE = 0;
|
|
164
|
+
export const H3_TOLERANCE = 1;
|
|
165
|
+
export const HEADING_TOLERANCE = H3_TOLERANCE;
|
|
155
166
|
|
|
156
167
|
/**
|
|
157
168
|
* Pattern that matches a fenced ```mermaid block opener (case-insensitive).
|
|
@@ -188,6 +199,78 @@ export function countMermaidBlocks(text) {
|
|
|
188
199
|
return countGlobal(text, MERMAID_OPENER);
|
|
189
200
|
}
|
|
190
201
|
|
|
202
|
+
/**
|
|
203
|
+
* Extract H2 section titles from markdown text. Mirrors the shape returned
|
|
204
|
+
* by `scripts/discover-untranslated-briefs.js#extractH2Titles` so the
|
|
205
|
+
* validator can produce a precise "likely-dropped section" diagnostic when
|
|
206
|
+
* the heading-parity gate fires.
|
|
207
|
+
*
|
|
208
|
+
* @param {string} text
|
|
209
|
+
* @returns {string[]}
|
|
210
|
+
*/
|
|
211
|
+
export function extractH2Titles(text) {
|
|
212
|
+
const lines = text.split('\n');
|
|
213
|
+
const out = [];
|
|
214
|
+
for (const line of lines) {
|
|
215
|
+
const match = /^##\s+(\S.*)$/.exec(line);
|
|
216
|
+
if (match) out.push(match[1].trim());
|
|
217
|
+
}
|
|
218
|
+
return out;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
/**
|
|
222
|
+
* Compute the set of source H2 titles that have no fuzzy match in the
|
|
223
|
+
* translation. We do NOT require translated titles to be identical — they
|
|
224
|
+
* are localised — but every source H2 should map to *some* translation
|
|
225
|
+
* H2. We treat two titles as "potentially matched" when they share any
|
|
226
|
+
* fixed-token prefix (`IMF`, `WEO`, `TA-…`, `data-vintage="…"`) or when
|
|
227
|
+
* the translation has exactly the same count of H2s. The output is purely
|
|
228
|
+
* advisory: the gate itself still fires on count mismatch.
|
|
229
|
+
*
|
|
230
|
+
* Heuristic: a source title is reported as "likely dropped" only when
|
|
231
|
+
* (a) it contains at least one FIXED_TOKEN_PATTERNS match, AND
|
|
232
|
+
* (b) no translation title contains that same token, AND
|
|
233
|
+
* (c) the H2 count mismatch is exactly 1 (so we're confident a single
|
|
234
|
+
* section vanished rather than a wholesale restructure).
|
|
235
|
+
*
|
|
236
|
+
* @param {string[]} sourceTitles
|
|
237
|
+
* @param {string[]} targetTitles
|
|
238
|
+
* @returns {string[]}
|
|
239
|
+
*/
|
|
240
|
+
function detectLikelyDroppedH2s(sourceTitles, targetTitles) {
|
|
241
|
+
if (sourceTitles.length - targetTitles.length !== 1) return [];
|
|
242
|
+
const dropped = [];
|
|
243
|
+
// Count, per FIXED TOKEN, how many source H2 titles contain it vs how
|
|
244
|
+
// many target H2 titles contain it. When a source H2 contains a token
|
|
245
|
+
// whose translation-side count is strictly smaller, that source H2 is
|
|
246
|
+
// very likely the dropped section.
|
|
247
|
+
for (const title of sourceTitles) {
|
|
248
|
+
const tokens = [];
|
|
249
|
+
for (const re of FIXED_TOKEN_PATTERNS) {
|
|
250
|
+
const m = new RegExp(re.source).exec(title);
|
|
251
|
+
if (m) tokens.push(m[0]);
|
|
252
|
+
}
|
|
253
|
+
if (tokens.length === 0) continue;
|
|
254
|
+
let lostToken = false;
|
|
255
|
+
for (const tok of tokens) {
|
|
256
|
+
const sourceHits = sourceTitles.filter((t) => t.includes(tok)).length;
|
|
257
|
+
const targetHits = targetTitles.filter((t) => t.includes(tok)).length;
|
|
258
|
+
if (targetHits < sourceHits) {
|
|
259
|
+
lostToken = true;
|
|
260
|
+
break;
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
if (lostToken) dropped.push(title);
|
|
264
|
+
}
|
|
265
|
+
// If the heuristic flagged multiple, prefer the *last-occurring* source
|
|
266
|
+
// title with a lost token — the second-of-two duplicate-titled section
|
|
267
|
+
// is the prototypical regression (run #25983007788). When no token
|
|
268
|
+
// signal at all is available, we return [] so the message stays clean
|
|
269
|
+
// rather than guessing.
|
|
270
|
+
if (dropped.length > 1) return [dropped[dropped.length - 1]];
|
|
271
|
+
return dropped;
|
|
272
|
+
}
|
|
273
|
+
|
|
191
274
|
|
|
192
275
|
/** Count exact token occurrences returned by one fixed-token pattern. */
|
|
193
276
|
function countMatches(text, regex) {
|
|
@@ -199,6 +282,16 @@ function countMatches(text, regex) {
|
|
|
199
282
|
return counts;
|
|
200
283
|
}
|
|
201
284
|
|
|
285
|
+
/**
|
|
286
|
+
* Quote one shell argument for safe copy/paste in POSIX shells.
|
|
287
|
+
*
|
|
288
|
+
* @param {string} arg
|
|
289
|
+
* @returns {string}
|
|
290
|
+
*/
|
|
291
|
+
function shellQuote(arg) {
|
|
292
|
+
return `'${String(arg).replace(/'/g, `'\"'\"'`)}'`;
|
|
293
|
+
}
|
|
294
|
+
|
|
202
295
|
/**
|
|
203
296
|
* Aggregate a violation list into a `{ key: count }` map for the validator
|
|
204
297
|
* report. Items with falsy values at `key` are skipped so the filename-gate
|
|
@@ -397,6 +490,8 @@ export function validateTranslation(translationPath, repoRoot) {
|
|
|
397
490
|
}
|
|
398
491
|
}
|
|
399
492
|
if (missingTokens.length > 0) {
|
|
493
|
+
const relQuoted = shellQuote(rel);
|
|
494
|
+
const siblingGlobQuoted = shellQuote(`${path.posix.dirname(rel)}/executive-brief_*.md`);
|
|
400
495
|
violations.push({
|
|
401
496
|
translationPath: rel,
|
|
402
497
|
sourcePath: sourceRel,
|
|
@@ -404,19 +499,45 @@ export function validateTranslation(translationPath, repoRoot) {
|
|
|
404
499
|
gate: 'fixed-token-preservation',
|
|
405
500
|
message:
|
|
406
501
|
`Translation is missing exact ${reSingle} token(s): ${missingTokens.join(', ')} ` +
|
|
407
|
-
`— proper noun / data-vintage identifiers MUST be preserved verbatim
|
|
502
|
+
`— proper noun / data-vintage identifiers MUST be preserved verbatim. ` +
|
|
503
|
+
`Self-check before flush: \`node scripts/validate-brief-translations.js --paths ${relQuoted}\` ` +
|
|
504
|
+
`(or \`--paths ${siblingGlobQuoted}\` to validate every sibling). ` +
|
|
505
|
+
`Dutch example: \`IMF\` stays \`IMF\` (never \`IMV\`); \`WEO\` stays \`WEO\` ` +
|
|
506
|
+
`(never \`Wereldwijde Economische Vooruitzichten\`).`,
|
|
408
507
|
});
|
|
409
508
|
}
|
|
410
509
|
}
|
|
411
510
|
|
|
412
|
-
// Gate 6 — heading parity. H1 must match exactly (briefs have
|
|
413
|
-
// by style guide
|
|
511
|
+
// Gate 6 — heading parity. H1 and H2 must match exactly (briefs have
|
|
512
|
+
// exactly one H1 by style guide; each H2 is a major section that must
|
|
513
|
+
// round-trip). H3 may drift by H3_TOLERANCE in absolute count.
|
|
414
514
|
for (const level of [1, 2, 3]) {
|
|
415
515
|
const sourceCount = countHeadings(sourceText, level);
|
|
416
516
|
if (sourceCount === 0) continue;
|
|
417
517
|
const targetCount = countHeadings(targetText, level);
|
|
418
|
-
|
|
518
|
+
let tolerance;
|
|
519
|
+
if (level === 1) tolerance = 0;
|
|
520
|
+
else if (level === 2) tolerance = H2_TOLERANCE;
|
|
521
|
+
else tolerance = H3_TOLERANCE;
|
|
419
522
|
if (Math.abs(sourceCount - targetCount) > tolerance) {
|
|
523
|
+
let detail = '';
|
|
524
|
+
if (level === 2) {
|
|
525
|
+
// Surface the actual H2 titles so reviewers/agents can pinpoint
|
|
526
|
+
// which section was dropped — regression hardening from run
|
|
527
|
+
// #25983007788 where 13 sibling translations identically dropped
|
|
528
|
+
// `## IMF Economic Context — May 2026 Update` and the validator
|
|
529
|
+
// report only said "8 vs 7 H2".
|
|
530
|
+
const sourceTitles = extractH2Titles(sourceText);
|
|
531
|
+
const targetTitles = extractH2Titles(targetText);
|
|
532
|
+
const dropped = detectLikelyDroppedH2s(sourceTitles, targetTitles);
|
|
533
|
+
const sourceList = sourceTitles.map((t) => `"${t}"`).join(', ');
|
|
534
|
+
detail = ` Source H2 titles: [${sourceList}].`;
|
|
535
|
+
if (dropped.length > 0) {
|
|
536
|
+
detail +=
|
|
537
|
+
` Likely dropped: [${dropped.map((t) => `"${t}"`).join(', ')}].` +
|
|
538
|
+
` Re-translate the missing section and keep its FIXED TOKEN(S) verbatim.`;
|
|
539
|
+
}
|
|
540
|
+
}
|
|
420
541
|
violations.push({
|
|
421
542
|
translationPath: rel,
|
|
422
543
|
sourcePath: sourceRel,
|
|
@@ -424,7 +545,8 @@ export function validateTranslation(translationPath, repoRoot) {
|
|
|
424
545
|
gate: 'heading-parity',
|
|
425
546
|
message:
|
|
426
547
|
`Translation has ${targetCount} H${level} heading(s); source has ${sourceCount} ` +
|
|
427
|
-
`(tolerance ±${tolerance}). Whole subsections appear to be missing or merged
|
|
548
|
+
`(tolerance ±${tolerance}). Whole subsections appear to be missing or merged.` +
|
|
549
|
+
detail,
|
|
428
550
|
});
|
|
429
551
|
}
|
|
430
552
|
}
|
|
@@ -451,6 +573,24 @@ export function validateTranslation(translationPath, repoRoot) {
|
|
|
451
573
|
return violations;
|
|
452
574
|
}
|
|
453
575
|
|
|
576
|
+
/**
|
|
577
|
+
* Expand a list of paths that may contain glob patterns into resolved file paths.
|
|
578
|
+
* Uses Node's built-in fs.globSync (Node 22+) for any entry containing `*` or `?`.
|
|
579
|
+
*/
|
|
580
|
+
export function expandPathGlobs(rawPaths, repoRoot) {
|
|
581
|
+
const expanded = [];
|
|
582
|
+
for (const p of rawPaths) {
|
|
583
|
+
const resolved = path.resolve(repoRoot, p);
|
|
584
|
+
if (/[*?]/.test(resolved)) {
|
|
585
|
+
const matches = fs.globSync(resolved);
|
|
586
|
+
expanded.push(...matches);
|
|
587
|
+
} else {
|
|
588
|
+
expanded.push(resolved);
|
|
589
|
+
}
|
|
590
|
+
}
|
|
591
|
+
return expanded;
|
|
592
|
+
}
|
|
593
|
+
|
|
454
594
|
/** Run validation against a list of translation paths. */
|
|
455
595
|
export function runValidation(translationPaths, repoRoot, { quiet = false } = {}) {
|
|
456
596
|
const allViolations = [];
|
|
@@ -476,7 +616,7 @@ export function runValidation(translationPaths, repoRoot, { quiet = false } = {}
|
|
|
476
616
|
export function main(argv) {
|
|
477
617
|
const opts = parseArgs(argv);
|
|
478
618
|
const paths = opts.paths.length > 0
|
|
479
|
-
? opts.paths
|
|
619
|
+
? expandPathGlobs(opts.paths, opts.repoRoot)
|
|
480
620
|
: findAllTranslations(opts.repoRoot);
|
|
481
621
|
|
|
482
622
|
const violations = runValidation(paths, opts.repoRoot, { quiet: opts.quiet });
|