euparliamentmonitor 0.9.13 → 0.9.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/package.json +6 -4
  2. package/scripts/aggregator/article-generator.js +2 -1
  3. package/scripts/aggregator/article-html.d.ts +9 -0
  4. package/scripts/aggregator/article-html.js +134 -13
  5. package/scripts/aggregator/article-metadata.d.ts +25 -161
  6. package/scripts/aggregator/article-metadata.js +71 -649
  7. package/scripts/aggregator/editorial-brief-resolver.d.ts +9 -0
  8. package/scripts/aggregator/editorial-brief-resolver.js +3 -1
  9. package/scripts/aggregator/metadata/date-labels.d.ts +122 -0
  10. package/scripts/aggregator/metadata/date-labels.js +209 -0
  11. package/scripts/aggregator/metadata/text-utils.d.ts +188 -0
  12. package/scripts/aggregator/metadata/text-utils.js +542 -0
  13. package/scripts/constants/og-locales.d.ts +15 -0
  14. package/scripts/constants/og-locales.js +17 -0
  15. package/scripts/constants/seo/index.d.ts +21 -0
  16. package/scripts/constants/seo/index.js +23 -0
  17. package/scripts/constants/seo/og-locales.d.ts +59 -0
  18. package/scripts/constants/seo/og-locales.js +59 -0
  19. package/scripts/constants/seo/social-handles.d.ts +50 -0
  20. package/scripts/constants/seo/social-handles.js +65 -0
  21. package/scripts/constants/social-handles.d.ts +11 -0
  22. package/scripts/constants/social-handles.js +13 -0
  23. package/scripts/discover-untranslated-briefs.js +224 -19
  24. package/scripts/generators/news-indexes.d.ts +35 -0
  25. package/scripts/generators/news-indexes.js +67 -6
  26. package/scripts/generators/political-intelligence/html.js +14 -6
  27. package/scripts/generators/seo-copy.js +42 -0
  28. package/scripts/generators/sitemap/html.js +13 -5
  29. package/scripts/lint-src-todos.js +124 -0
  30. package/scripts/utils/copy-test-reports.js +1 -1
  31. package/scripts/utils/generate-docs-index.js +1 -1
  32. package/scripts/validate-brief-translations.js +158 -18
@@ -23,10 +23,12 @@
23
23
  * adopted-text IDs like `TA-10-2026-0160`) must appear in the translation
24
24
  * whenever they appear in the source.
25
25
  * 6. **Heading parity** — H1/H2/H3 heading counts must match
26
- * the source closely (H1 must match exactly; H2/H3 may differ by at most
27
- * `HEADING_TOLERANCE`). LLMs frequently collapse or skip subsections,
28
- * and this gate catches that without flagging legitimate small
29
- * reformattings.
26
+ * the source closely. H1 must match exactly (one per brief by style
27
+ * guide). H2 must match exactly (`H2_TOLERANCE = 0`): each `##` heading
28
+ * is a major section and dropping or merging one is the single most
29
+ * common AI failure mode. H3 may differ by at most `H3_TOLERANCE` (1)
30
+ * to allow legitimate sub-bullet fusion. The legacy `HEADING_TOLERANCE`
31
+ * export is preserved as an alias for `H3_TOLERANCE`.
30
32
  * 7. **Mermaid block parity** — every ```` ```mermaid ```` block in the
31
33
  * source must appear at least once in the translation. Mermaid syntax
32
34
  * is a machine-readable fixed token; dropping a diagram silently breaks
@@ -142,16 +144,25 @@ const FIXED_TOKEN_PATTERNS_GLOBAL = Object.freeze(
142
144
  );
143
145
 
144
146
  /**
145
- * Tolerance (in absolute count) for H2/H3 heading-count drift between the
146
- * source and the translation. H1 must match exactly: there is only one H1
147
- * per brief by convention.
147
+ * Tolerance (in absolute count) for H3 heading-count drift between the
148
+ * source and the translation.
148
149
  *
149
- * Why a small non-zero floor? Translators sometimes legitimately fuse two
150
- * very short sub-bullets into one paragraph, or split a long H3 into two
151
- * for readability in CJK scripts where dense text harms scanability. A
152
- * tolerance of 1 absorbs that without letting whole sections disappear.
150
+ * - **H1**: hard zero every brief has exactly one H1 by style guide.
151
+ * - **H2**: hard zero (see `H2_TOLERANCE` below). H2 is a major section;
152
+ * silently dropping or merging one is the single most common AI failure
153
+ * mode and the validator must catch it even when the dropped section
154
+ * contains no `FIXED_TOKEN_PATTERNS` matches to flag separately.
155
+ * - **H3**: tolerance of 1. Translators sometimes legitimately fuse two
156
+ * very short sub-bullets into one paragraph, or split a long H3 into two
157
+ * for readability in CJK scripts where dense text harms scanability.
158
+ *
159
+ * `HEADING_TOLERANCE` is preserved as a backward-compatible alias for
160
+ * `H3_TOLERANCE` so existing consumers (tests, downstream tooling that
161
+ * imports the constant) keep working.
153
162
  */
154
- export const HEADING_TOLERANCE = 1;
163
+ export const H2_TOLERANCE = 0;
164
+ export const H3_TOLERANCE = 1;
165
+ export const HEADING_TOLERANCE = H3_TOLERANCE;
155
166
 
156
167
  /**
157
168
  * Pattern that matches a fenced ```mermaid block opener (case-insensitive).
@@ -188,6 +199,78 @@ export function countMermaidBlocks(text) {
188
199
  return countGlobal(text, MERMAID_OPENER);
189
200
  }
190
201
 
202
+ /**
203
+ * Extract H2 section titles from markdown text. Mirrors the shape returned
204
+ * by `scripts/discover-untranslated-briefs.js#extractH2Titles` so the
205
+ * validator can produce a precise "likely-dropped section" diagnostic when
206
+ * the heading-parity gate fires.
207
+ *
208
+ * @param {string} text
209
+ * @returns {string[]}
210
+ */
211
+ export function extractH2Titles(text) {
212
+ const lines = text.split('\n');
213
+ const out = [];
214
+ for (const line of lines) {
215
+ const match = /^##\s+(\S.*)$/.exec(line);
216
+ if (match) out.push(match[1].trim());
217
+ }
218
+ return out;
219
+ }
220
+
221
+ /**
222
+ * Compute the set of source H2 titles that have no fuzzy match in the
223
+ * translation. We do NOT require translated titles to be identical — they
224
+ * are localised — but every source H2 should map to *some* translation
225
+ * H2. We treat two titles as "potentially matched" when they share any
226
+ * fixed-token prefix (`IMF`, `WEO`, `TA-…`, `data-vintage="…"`) or when
227
+ * the translation has exactly the same count of H2s. The output is purely
228
+ * advisory: the gate itself still fires on count mismatch.
229
+ *
230
+ * Heuristic: a source title is reported as "likely dropped" only when
231
+ * (a) it contains at least one FIXED_TOKEN_PATTERNS match, AND
232
+ * (b) no translation title contains that same token, AND
233
+ * (c) the H2 count mismatch is exactly 1 (so we're confident a single
234
+ * section vanished rather than a wholesale restructure).
235
+ *
236
+ * @param {string[]} sourceTitles
237
+ * @param {string[]} targetTitles
238
+ * @returns {string[]}
239
+ */
240
+ function detectLikelyDroppedH2s(sourceTitles, targetTitles) {
241
+ if (sourceTitles.length - targetTitles.length !== 1) return [];
242
+ const dropped = [];
243
+ // Count, per FIXED TOKEN, how many source H2 titles contain it vs how
244
+ // many target H2 titles contain it. When a source H2 contains a token
245
+ // whose translation-side count is strictly smaller, that source H2 is
246
+ // very likely the dropped section.
247
+ for (const title of sourceTitles) {
248
+ const tokens = [];
249
+ for (const re of FIXED_TOKEN_PATTERNS) {
250
+ const m = new RegExp(re.source).exec(title);
251
+ if (m) tokens.push(m[0]);
252
+ }
253
+ if (tokens.length === 0) continue;
254
+ let lostToken = false;
255
+ for (const tok of tokens) {
256
+ const sourceHits = sourceTitles.filter((t) => t.includes(tok)).length;
257
+ const targetHits = targetTitles.filter((t) => t.includes(tok)).length;
258
+ if (targetHits < sourceHits) {
259
+ lostToken = true;
260
+ break;
261
+ }
262
+ }
263
+ if (lostToken) dropped.push(title);
264
+ }
265
+ // If the heuristic flagged multiple, prefer the *last-occurring* source
266
+ // title with a lost token — the second-of-two duplicate-titled section
267
+ // is the prototypical regression (run #25983007788). When no token
268
+ // signal at all is available, we return [] so the message stays clean
269
+ // rather than guessing.
270
+ if (dropped.length > 1) return [dropped[dropped.length - 1]];
271
+ return dropped;
272
+ }
273
+
191
274
 
192
275
  /** Count exact token occurrences returned by one fixed-token pattern. */
193
276
  function countMatches(text, regex) {
@@ -199,6 +282,16 @@ function countMatches(text, regex) {
199
282
  return counts;
200
283
  }
201
284
 
285
+ /**
286
+ * Quote one shell argument for safe copy/paste in POSIX shells.
287
+ *
288
+ * @param {string} arg
289
+ * @returns {string}
290
+ */
291
+ function shellQuote(arg) {
292
+ return `'${String(arg).replace(/'/g, `'\"'\"'`)}'`;
293
+ }
294
+
202
295
  /**
203
296
  * Aggregate a violation list into a `{ key: count }` map for the validator
204
297
  * report. Items with falsy values at `key` are skipped so the filename-gate
@@ -397,6 +490,8 @@ export function validateTranslation(translationPath, repoRoot) {
397
490
  }
398
491
  }
399
492
  if (missingTokens.length > 0) {
493
+ const relQuoted = shellQuote(rel);
494
+ const siblingGlobQuoted = shellQuote(`${path.posix.dirname(rel)}/executive-brief_*.md`);
400
495
  violations.push({
401
496
  translationPath: rel,
402
497
  sourcePath: sourceRel,
@@ -404,19 +499,45 @@ export function validateTranslation(translationPath, repoRoot) {
404
499
  gate: 'fixed-token-preservation',
405
500
  message:
406
501
  `Translation is missing exact ${reSingle} token(s): ${missingTokens.join(', ')} ` +
407
- `— proper noun / data-vintage identifiers MUST be preserved verbatim`,
502
+ `— proper noun / data-vintage identifiers MUST be preserved verbatim. ` +
503
+ `Self-check before flush: \`node scripts/validate-brief-translations.js --paths ${relQuoted}\` ` +
504
+ `(or \`--paths ${siblingGlobQuoted}\` to validate every sibling). ` +
505
+ `Dutch example: \`IMF\` stays \`IMF\` (never \`IMV\`); \`WEO\` stays \`WEO\` ` +
506
+ `(never \`Wereldwijde Economische Vooruitzichten\`).`,
408
507
  });
409
508
  }
410
509
  }
411
510
 
412
- // Gate 6 — heading parity. H1 must match exactly (briefs have exactly one
413
- // by style guide); H2/H3 may drift by HEADING_TOLERANCE in absolute count.
511
+ // Gate 6 — heading parity. H1 and H2 must match exactly (briefs have
512
+ // exactly one H1 by style guide; each H2 is a major section that must
513
+ // round-trip). H3 may drift by H3_TOLERANCE in absolute count.
414
514
  for (const level of [1, 2, 3]) {
415
515
  const sourceCount = countHeadings(sourceText, level);
416
516
  if (sourceCount === 0) continue;
417
517
  const targetCount = countHeadings(targetText, level);
418
- const tolerance = level === 1 ? 0 : HEADING_TOLERANCE;
518
+ let tolerance;
519
+ if (level === 1) tolerance = 0;
520
+ else if (level === 2) tolerance = H2_TOLERANCE;
521
+ else tolerance = H3_TOLERANCE;
419
522
  if (Math.abs(sourceCount - targetCount) > tolerance) {
523
+ let detail = '';
524
+ if (level === 2) {
525
+ // Surface the actual H2 titles so reviewers/agents can pinpoint
526
+ // which section was dropped — regression hardening from run
527
+ // #25983007788 where 13 sibling translations identically dropped
528
+ // `## IMF Economic Context — May 2026 Update` and the validator
529
+ // report only said "8 vs 7 H2".
530
+ const sourceTitles = extractH2Titles(sourceText);
531
+ const targetTitles = extractH2Titles(targetText);
532
+ const dropped = detectLikelyDroppedH2s(sourceTitles, targetTitles);
533
+ const sourceList = sourceTitles.map((t) => `"${t}"`).join(', ');
534
+ detail = ` Source H2 titles: [${sourceList}].`;
535
+ if (dropped.length > 0) {
536
+ detail +=
537
+ ` Likely dropped: [${dropped.map((t) => `"${t}"`).join(', ')}].` +
538
+ ` Re-translate the missing section and keep its FIXED TOKEN(S) verbatim.`;
539
+ }
540
+ }
420
541
  violations.push({
421
542
  translationPath: rel,
422
543
  sourcePath: sourceRel,
@@ -424,7 +545,8 @@ export function validateTranslation(translationPath, repoRoot) {
424
545
  gate: 'heading-parity',
425
546
  message:
426
547
  `Translation has ${targetCount} H${level} heading(s); source has ${sourceCount} ` +
427
- `(tolerance ±${tolerance}). Whole subsections appear to be missing or merged.`,
548
+ `(tolerance ±${tolerance}). Whole subsections appear to be missing or merged.` +
549
+ detail,
428
550
  });
429
551
  }
430
552
  }
@@ -451,6 +573,24 @@ export function validateTranslation(translationPath, repoRoot) {
451
573
  return violations;
452
574
  }
453
575
 
576
+ /**
577
+ * Expand a list of paths that may contain glob patterns into resolved file paths.
578
+ * Uses Node's built-in fs.globSync (Node 22+) for any entry containing `*` or `?`.
579
+ */
580
+ export function expandPathGlobs(rawPaths, repoRoot) {
581
+ const expanded = [];
582
+ for (const p of rawPaths) {
583
+ const resolved = path.resolve(repoRoot, p);
584
+ if (/[*?]/.test(resolved)) {
585
+ const matches = fs.globSync(resolved);
586
+ expanded.push(...matches);
587
+ } else {
588
+ expanded.push(resolved);
589
+ }
590
+ }
591
+ return expanded;
592
+ }
593
+
454
594
  /** Run validation against a list of translation paths. */
455
595
  export function runValidation(translationPaths, repoRoot, { quiet = false } = {}) {
456
596
  const allViolations = [];
@@ -476,7 +616,7 @@ export function runValidation(translationPaths, repoRoot, { quiet = false } = {}
476
616
  export function main(argv) {
477
617
  const opts = parseArgs(argv);
478
618
  const paths = opts.paths.length > 0
479
- ? opts.paths.map((p) => path.resolve(opts.repoRoot, p))
619
+ ? expandPathGlobs(opts.paths, opts.repoRoot)
480
620
  : findAllTranslations(opts.repoRoot);
481
621
 
482
622
  const violations = runValidation(paths, opts.repoRoot, { quiet: opts.quiet });