euparliamentmonitor 0.9.13 → 0.9.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/package.json +6 -4
  2. package/scripts/aggregator/article-generator.js +2 -1
  3. package/scripts/aggregator/article-html.d.ts +9 -0
  4. package/scripts/aggregator/article-html.js +134 -13
  5. package/scripts/aggregator/article-metadata.d.ts +25 -161
  6. package/scripts/aggregator/article-metadata.js +71 -649
  7. package/scripts/aggregator/editorial-brief-resolver.d.ts +9 -0
  8. package/scripts/aggregator/editorial-brief-resolver.js +3 -1
  9. package/scripts/aggregator/metadata/date-labels.d.ts +122 -0
  10. package/scripts/aggregator/metadata/date-labels.js +209 -0
  11. package/scripts/aggregator/metadata/text-utils.d.ts +188 -0
  12. package/scripts/aggregator/metadata/text-utils.js +542 -0
  13. package/scripts/constants/og-locales.d.ts +15 -0
  14. package/scripts/constants/og-locales.js +17 -0
  15. package/scripts/constants/seo/index.d.ts +21 -0
  16. package/scripts/constants/seo/index.js +23 -0
  17. package/scripts/constants/seo/og-locales.d.ts +59 -0
  18. package/scripts/constants/seo/og-locales.js +59 -0
  19. package/scripts/constants/seo/social-handles.d.ts +50 -0
  20. package/scripts/constants/seo/social-handles.js +65 -0
  21. package/scripts/constants/social-handles.d.ts +11 -0
  22. package/scripts/constants/social-handles.js +13 -0
  23. package/scripts/discover-untranslated-briefs.js +224 -19
  24. package/scripts/generators/news-indexes.d.ts +35 -0
  25. package/scripts/generators/news-indexes.js +67 -6
  26. package/scripts/generators/political-intelligence/html.js +14 -6
  27. package/scripts/generators/seo-copy.js +42 -0
  28. package/scripts/generators/sitemap/html.js +13 -5
  29. package/scripts/lint-src-todos.js +124 -0
  30. package/scripts/utils/copy-test-reports.js +1 -1
  31. package/scripts/utils/generate-docs-index.js +1 -1
  32. package/scripts/validate-brief-translations.js +158 -18
@@ -0,0 +1,59 @@
1
+ // SPDX-FileCopyrightText: 2024-2026 Hack23 AB
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ import { ALL_LANGUAGES } from '../language-core.js';
4
+ /**
5
+ * BCP-47 OpenGraph locale code per supported language.
6
+ *
7
+ * The values follow `<language>_<TERRITORY>` (underscore-separated)
8
+ * as required by the OpenGraph protocol. Use the helpers below rather
9
+ * than reading the map directly so the locale logic stays in one
10
+ * place.
11
+ */
12
+ export const OG_LOCALES = {
13
+ en: 'en_GB',
14
+ sv: 'sv_SE',
15
+ da: 'da_DK',
16
+ no: 'nb_NO',
17
+ fi: 'fi_FI',
18
+ de: 'de_DE',
19
+ fr: 'fr_FR',
20
+ es: 'es_ES',
21
+ nl: 'nl_NL',
22
+ ar: 'ar_SA',
23
+ he: 'he_IL',
24
+ ja: 'ja_JP',
25
+ ko: 'ko_KR',
26
+ zh: 'zh_CN',
27
+ };
28
+ /**
29
+ * Return the BCP-47 locale code for a given ISO 639-1 language code.
30
+ * Falls back to `en_GB` for unknown languages — the same fallback the
31
+ * rest of the site uses for missing translations.
32
+ *
33
+ * @param lang - ISO 639-1 language code (e.g., `"en"`, `"sv"`)
34
+ * @returns BCP-47 `language_TERRITORY` locale (e.g., `"en_GB"`)
35
+ */
36
+ export function getOgLocale(lang) {
37
+ return Object.hasOwn(OG_LOCALES, lang)
38
+ ? (OG_LOCALES[lang] ?? OG_LOCALES.en)
39
+ : OG_LOCALES.en;
40
+ }
41
+ /**
42
+ * Build the OpenGraph locale meta tag block — one canonical
43
+ * `og:locale` for the current language plus an `og:locale:alternate`
44
+ * for every other supported language. Emitting the alternates lets the
45
+ * Facebook/LinkedIn crawler discover the localized siblings without
46
+ * having to follow the `<link rel="alternate" hreflang>` chain.
47
+ *
48
+ * The output is intentionally indented with two spaces to match the
49
+ * surrounding `<head>` formatting in the four generators.
50
+ *
51
+ * @param currentLang - Language being rendered (drives `og:locale`)
52
+ * @returns Multi-line HTML fragment ready to drop into `<head>`
53
+ */
54
+ export function buildOgLocaleTags(currentLang) {
55
+ const primary = getOgLocale(currentLang);
56
+ const alternates = ALL_LANGUAGES.filter((code) => code !== currentLang).map((code) => ` <meta property="og:locale:alternate" content="${getOgLocale(code)}">`);
57
+ return [` <meta property="og:locale" content="${primary}">`, ...alternates].join('\n');
58
+ }
59
+ //# sourceMappingURL=og-locales.js.map
@@ -0,0 +1,50 @@
1
+ /**
2
+ * @module Constants/SocialHandles
3
+ * @description Verified social-media handles for `twitter:site` /
4
+ * `twitter:creator` and the canonical organization sameAs list emitted
5
+ * into JSON-LD on every generated page.
6
+ *
7
+ * Why this lives in one file: the OpenGraph crawler, the Twitter
8
+ * card validator, and Google's structured-data `NewsMediaOrganization`
9
+ * graph all expect the same handle and the same sameAs URLs. Keeping
10
+ * them in one constant avoids drift between the four generators.
11
+ *
12
+ * **Empty-string semantics**: when a handle is not yet provisioned
13
+ * the matching constant is `''` (empty string). The emit helpers
14
+ * skip emitting an empty tag rather than producing
15
+ * `<meta name="twitter:site" content="">`, which Twitter rejects as
16
+ * malformed.
17
+ */
18
+ /**
19
+ * Twitter / X handle for the publishing organization, including the
20
+ * leading `@`. Used for `<meta name="twitter:site">`.
21
+ *
22
+ * Currently empty (no verified Twitter presence as of May 2026); set
23
+ * this to e.g. `'@hack23ab'` once the org account is verified, and
24
+ * every generated page will start emitting the tag automatically.
25
+ */
26
+ export declare const TWITTER_SITE_HANDLE = "";
27
+ /**
28
+ * Twitter / X handle for the editorial team (defaults to the org
29
+ * handle when no separate creator account exists). Used for
30
+ * `<meta name="twitter:creator">`.
31
+ */
32
+ export declare const TWITTER_CREATOR_HANDLE = "";
33
+ /**
34
+ * Canonical sameAs URLs for the publishing `NewsMediaOrganization`.
35
+ * Emitted into the JSON-LD graph on every article and landing page so
36
+ * Google and other crawlers can verify the publisher identity across
37
+ * its different surfaces.
38
+ *
39
+ * Order matters: most authoritative / most-trafficked surface first.
40
+ */
41
+ export declare const ORG_SAME_AS: readonly string[];
42
+ /**
43
+ * Build the `twitter:site` / `twitter:creator` meta-tag block. Returns
44
+ * an empty string when neither handle is configured so the caller can
45
+ * safely interpolate the result without producing empty meta tags.
46
+ *
47
+ * @returns Newline-joined meta tags (no trailing newline) or `''`
48
+ */
49
+ export declare function buildTwitterAttributionTags(): string;
50
+ //# sourceMappingURL=social-handles.d.ts.map
@@ -0,0 +1,65 @@
1
+ // SPDX-FileCopyrightText: 2024-2026 Hack23 AB
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ /**
4
+ * @module Constants/SocialHandles
5
+ * @description Verified social-media handles for `twitter:site` /
6
+ * `twitter:creator` and the canonical organization sameAs list emitted
7
+ * into JSON-LD on every generated page.
8
+ *
9
+ * Why this lives in one file: the OpenGraph crawler, the Twitter
10
+ * card validator, and Google's structured-data `NewsMediaOrganization`
11
+ * graph all expect the same handle and the same sameAs URLs. Keeping
12
+ * them in one constant avoids drift between the four generators.
13
+ *
14
+ * **Empty-string semantics**: when a handle is not yet provisioned
15
+ * the matching constant is `''` (empty string). The emit helpers
16
+ * skip emitting an empty tag rather than producing
17
+ * `<meta name="twitter:site" content="">`, which Twitter rejects as
18
+ * malformed.
19
+ */
20
+ /**
21
+ * Twitter / X handle for the publishing organization, including the
22
+ * leading `@`. Used for `<meta name="twitter:site">`.
23
+ *
24
+ * Currently empty (no verified Twitter presence as of May 2026); set
25
+ * this to e.g. `'@hack23ab'` once the org account is verified, and
26
+ * every generated page will start emitting the tag automatically.
27
+ */
28
+ export const TWITTER_SITE_HANDLE = '';
29
+ /**
30
+ * Twitter / X handle for the editorial team (defaults to the org
31
+ * handle when no separate creator account exists). Used for
32
+ * `<meta name="twitter:creator">`.
33
+ */
34
+ export const TWITTER_CREATOR_HANDLE = '';
35
+ /**
36
+ * Canonical sameAs URLs for the publishing `NewsMediaOrganization`.
37
+ * Emitted into the JSON-LD graph on every article and landing page so
38
+ * Google and other crawlers can verify the publisher identity across
39
+ * its different surfaces.
40
+ *
41
+ * Order matters: most authoritative / most-trafficked surface first.
42
+ */
43
+ export const ORG_SAME_AS = [
44
+ 'https://github.com/Hack23',
45
+ 'https://github.com/Hack23/euparliamentmonitor',
46
+ 'https://hack23.com',
47
+ ];
48
+ /**
49
+ * Build the `twitter:site` / `twitter:creator` meta-tag block. Returns
50
+ * an empty string when neither handle is configured so the caller can
51
+ * safely interpolate the result without producing empty meta tags.
52
+ *
53
+ * @returns Newline-joined meta tags (no trailing newline) or `''`
54
+ */
55
+ export function buildTwitterAttributionTags() {
56
+ const tags = [];
57
+ if (TWITTER_SITE_HANDLE) {
58
+ tags.push(` <meta name="twitter:site" content="${TWITTER_SITE_HANDLE}">`);
59
+ }
60
+ if (TWITTER_CREATOR_HANDLE) {
61
+ tags.push(` <meta name="twitter:creator" content="${TWITTER_CREATOR_HANDLE}">`);
62
+ }
63
+ return tags.join('\n');
64
+ }
65
+ //# sourceMappingURL=social-handles.js.map
@@ -0,0 +1,11 @@
1
+ /**
2
+ * @module Constants/SocialHandles
3
+ * @description Backward-compatible re-export shim. The canonical
4
+ * location is `src/constants/seo/social-handles.ts`; this file remains
5
+ * so existing imports `from '../constants/social-handles.js'` keep
6
+ * working through the May-2026 architecture refactor.
7
+ *
8
+ * New code SHOULD import from `src/constants/seo/index.js`.
9
+ */
10
+ export { TWITTER_SITE_HANDLE, TWITTER_CREATOR_HANDLE, ORG_SAME_AS, buildTwitterAttributionTags, } from './seo/social-handles.js';
11
+ //# sourceMappingURL=social-handles.d.ts.map
@@ -0,0 +1,13 @@
1
+ // SPDX-FileCopyrightText: 2024-2026 Hack23 AB
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ /**
4
+ * @module Constants/SocialHandles
5
+ * @description Backward-compatible re-export shim. The canonical
6
+ * location is `src/constants/seo/social-handles.ts`; this file remains
7
+ * so existing imports `from '../constants/social-handles.js'` keep
8
+ * working through the May-2026 architecture refactor.
9
+ *
10
+ * New code SHOULD import from `src/constants/seo/index.js`.
11
+ */
12
+ export { TWITTER_SITE_HANDLE, TWITTER_CREATOR_HANDLE, ORG_SAME_AS, buildTwitterAttributionTags, } from './seo/social-handles.js';
13
+ //# sourceMappingURL=social-handles.js.map
@@ -19,14 +19,34 @@
19
19
  * comfortably inside the gh-aw safe-outputs 10 MB patch ceiling and the
20
20
  * Claude Sonnet 4.6 60-minute wall-clock budget.
21
21
  *
22
- * Priority rules (newest-first, oldest-first within a brief):
22
+ * Priority rules `fresh-then-backlog` mode (default):
23
23
  *
24
- * 1. Sort by `<date>` descending so today's briefs win.
25
- * 2. Within the same date, briefs with *more* missing languages outrank
26
- * briefs with fewer missing languages so partial coverage gets completed
27
- * quickly.
28
- * 3. Remaining ties sort by `<slug>` alphabetically so the run is
29
- * deterministic and reviewers can predict which slugs land first.
24
+ * The queue is built from two pools so the day's newest brief still gets
25
+ * timely coverage on at least one of the three daily runs, while the
26
+ * long-tail backlog of older briefs (currently ~92 sources / ~1,196 missing
27
+ * translations) actually drains rather than being starved by today's wins.
28
+ *
29
+ * 1. **Fresh slice** (at most 1 entry per run): newest source with any
30
+ * missing language. Tie-breakers: more-missing first, slug asc,
31
+ * non-extended first.
32
+ * 2. **Backlog slice** (remaining `max-briefs - 1` slots): every other
33
+ * source with gaps, sorted by `<date>` ASC (oldest first), then
34
+ * `missingCount` ASC (finish half-done briefs before starting new
35
+ * ones), then `<slug>` ASC, then non-extended first.
36
+ *
37
+ * Final queue = `freshSlice.concat(backlogSlice)`.
38
+ *
39
+ * When `--max-briefs 1`, alternate fresh/backlog by run-number parity
40
+ * (`--run-number`, default 0, normally driven by `$GITHUB_RUN_NUMBER`) so
41
+ * the scheduled cadence still drains backlog while preserving freshness.
42
+ *
43
+ * Alternative modes (via `--mode`):
44
+ *
45
+ * - `backlog-only` — drop the fresh slot entirely; oldest-first across the
46
+ * entire backlog. Useful for catch-up batches.
47
+ * - `newest-first` — legacy behaviour: newest date first, more-missing
48
+ * first, slug asc. Retained for one-off operator dispatch where the
49
+ * operator explicitly wants today's brief covered first.
30
50
  *
31
51
  * Invocation:
32
52
  *
@@ -34,6 +54,8 @@
34
54
  * [--repo-root <path>] \
35
55
  * [--max-briefs <n>] # default 2
36
56
  * [--max-age-days <n>] # default 180; older briefs are skipped
57
+ * [--mode <name>] # fresh-then-backlog | backlog-only | newest-first
58
+ * [--run-number <n>] # parity selector when --max-briefs 1
37
59
  * [--output <path>] # default stdout
38
60
  * [--include-extended] # also scan extended/executive-brief.md
39
61
  *
@@ -44,12 +66,15 @@
44
66
  * Output JSON shape:
45
67
  * {
46
68
  * "generatedAt": "2026-05-16T08:24:16.909Z",
69
+ * "options": { "mode": "fresh-then-backlog", "runNumber": 207, ... },
47
70
  * "totals": {
48
71
  * "sourcesScanned": 92,
49
72
  * "sourcesWithGaps": 92,
50
73
  * "translationsMissing": 1196,
51
74
  * "queued": 2,
52
75
  * "queuedTranslations": 26,
76
+ * "freshNewestDate": "2026-05-16",
77
+ * "backlogOldestDate": "2025-11-19",
53
78
  * "topMissingLangs": [
54
79
  * { "lang": "ja", "count": 92 },
55
80
  * { "lang": "ko", "count": 92 },
@@ -63,7 +88,14 @@
63
88
  * "sourcePath": "analysis/daily/2026-05-15/breaking/executive-brief.md",
64
89
  * "missingLangs": ["sv","da","no","fi","de","fr","es","nl","ar","he","ja","ko","zh"],
65
90
  * "missingCount": 13,
66
- * "isExtended": false
91
+ * "isExtended": false,
92
+ * "sourceH2Count": 8,
93
+ * "sourceH2Titles": [
94
+ * { "line": 7, "title": "Headline Intelligence" },
95
+ * { "line": 96, "title": "IMF Economic Context" },
96
+ * { "line": 146, "title": "IMF Economic Context — May 2026 Update" }
97
+ * ],
98
+ * "sourceFixedTokens": { "IMF": 17, "WEO": 2, "TA-id": 4 }
67
99
  * },
68
100
  * ...
69
101
  * ]
@@ -82,6 +114,13 @@ export const TARGET_LANGS = Object.freeze(ALL_LANGUAGES.filter((lang) => lang !=
82
114
  /** Manual-dispatch upper bound that keeps one 60-minute run inside budget. */
83
115
  export const MAX_BRIEFS_LIMIT = 4;
84
116
 
117
+ /** Discovery prioritisation modes. */
118
+ export const DISCOVERY_MODES = Object.freeze([
119
+ 'fresh-then-backlog',
120
+ 'backlog-only',
121
+ 'newest-first',
122
+ ]);
123
+
85
124
  /**
86
125
  * Parse CLI argv into an options object. Exported for unit tests.
87
126
  * @param {string[]} argv
@@ -93,6 +132,8 @@ export function parseArgs(argv) {
93
132
  maxAgeDays: 180,
94
133
  output: null,
95
134
  includeExtended: false,
135
+ mode: 'fresh-then-backlog',
136
+ runNumber: 0,
96
137
  };
97
138
  for (let i = 0; i < argv.length; i += 1) {
98
139
  const arg = argv[i];
@@ -116,6 +157,14 @@ export function parseArgs(argv) {
116
157
  case '--include-extended':
117
158
  opts.includeExtended = true;
118
159
  break;
160
+ case '--mode':
161
+ opts.mode = argv[i + 1];
162
+ i += 1;
163
+ break;
164
+ case '--run-number':
165
+ opts.runNumber = Number(argv[i + 1]);
166
+ i += 1;
167
+ break;
119
168
  case '--help':
120
169
  case '-h':
121
170
  printHelp();
@@ -137,14 +186,22 @@ export function parseArgs(argv) {
137
186
  if (!Number.isFinite(opts.maxAgeDays) || opts.maxAgeDays < 1) {
138
187
  throw new Error('--max-age-days must be a positive integer');
139
188
  }
189
+ if (!DISCOVERY_MODES.includes(opts.mode)) {
190
+ throw new Error(
191
+ `--mode must be one of: ${DISCOVERY_MODES.join(', ')} (got "${opts.mode}")`,
192
+ );
193
+ }
194
+ if (!Number.isInteger(opts.runNumber) || opts.runNumber < 0) {
195
+ throw new Error('--run-number must be a non-negative integer');
196
+ }
140
197
  return opts;
141
198
  }
142
199
 
143
200
  function printHelp() {
144
201
  process.stdout.write(
145
202
  'Usage: discover-untranslated-briefs.js [--repo-root <path>] ' +
146
- '[--max-briefs <n>] [--max-age-days <n>] [--output <path>] ' +
147
- '[--include-extended]\n'
203
+ '[--max-briefs <n>] [--max-age-days <n>] [--mode <name>] ' +
204
+ '[--run-number <n>] [--output <path>] [--include-extended]\n',
148
205
  );
149
206
  }
150
207
 
@@ -223,13 +280,94 @@ export function findMissingLangs(source) {
223
280
  return missing;
224
281
  }
225
282
 
283
+ /**
284
+ * Fixed-token classes the translator must preserve verbatim. Aligned with
285
+ * `scripts/validate-brief-translations.js` `FIXED_TOKEN_PATTERNS` so the
286
+ * discovery report and the validator surface the same shape of evidence.
287
+ * Keys are stable identifiers (used by the agent prompt); values are the
288
+ * global regex the count is computed against.
289
+ *
290
+ * @type {ReadonlyArray<{ key: string, pattern: RegExp }>}
291
+ */
292
+ const FIXED_TOKEN_CLASSES = Object.freeze([
293
+ { key: 'IMF', pattern: /\bIMF\b/g },
294
+ { key: 'WEO', pattern: /\bWEO\b/g },
295
+ { key: 'World Bank', pattern: /\bWorld Bank\b/g },
296
+ { key: 'Fiscal Monitor', pattern: /\bFiscal Monitor\b/g },
297
+ { key: 'data-vintage', pattern: /data-vintage="WEO-[A-Za-z]+-\d{4}"/g },
298
+ { key: 'TA-id', pattern: /\bTA-\d{1,2}-\d{4}-\d{4}\b/g },
299
+ { key: 'procedure-id', pattern: /\b\d{4}\/\d{4}\([A-Z]{3}\)/g },
300
+ ]);
301
+
302
+ /**
303
+ * Extract H2 section titles from a markdown source file. Returns the
304
+ * 1-based line number and the visible title (with the leading `## `
305
+ * stripped). The agent uses this to spot duplicate-titled sections such
306
+ * as `## IMF Economic Context` followed by
307
+ * `## IMF Economic Context — May 2026 Update`, which were silently
308
+ * collapsed across all 13 translations in run #25983007788. Surfacing the
309
+ * full title list at discovery time eliminates the ambiguity before any
310
+ * translation work begins.
311
+ *
312
+ * @param {string} absPath
313
+ * @returns {Array<{ line: number, title: string }>}
314
+ */
315
+ export function extractH2Titles(absPath) {
316
+ if (!fs.existsSync(absPath)) return [];
317
+ const text = fs.readFileSync(absPath, 'utf8');
318
+ const lines = text.split('\n');
319
+ const out = [];
320
+ for (let i = 0; i < lines.length; i += 1) {
321
+ const match = /^##\s+(\S.*)$/.exec(lines[i]);
322
+ if (match) out.push({ line: i + 1, title: match[1].trim() });
323
+ }
324
+ return out;
325
+ }
326
+
327
+ /**
328
+ * Count occurrences of each FIXED_TOKEN class in the source brief. Only
329
+ * classes with at least one match are emitted, so the queue entry stays
330
+ * compact for short briefs.
331
+ *
332
+ * @param {string} absPath
333
+ * @returns {Record<string, number>}
334
+ */
335
+ export function countFixedTokens(absPath) {
336
+ if (!fs.existsSync(absPath)) return {};
337
+ const text = fs.readFileSync(absPath, 'utf8');
338
+ const counts = {};
339
+ for (const { key, pattern } of FIXED_TOKEN_CLASSES) {
340
+ const re = new RegExp(pattern.source, pattern.flags);
341
+ let n = 0;
342
+ while (re.exec(text) !== null) n += 1;
343
+ if (n > 0) counts[key] = n;
344
+ }
345
+ return counts;
346
+ }
347
+
226
348
  /**
227
349
  * Build the prioritised queue. See module docstring for ordering rules.
228
350
  *
229
351
  * @param {ReturnType<typeof findExecutiveBriefSources>} sources
230
- * @param {number} maxBriefs
352
+ * @param {number | { maxBriefs: number, mode?: string, runNumber?: number }} options
353
+ * Numeric form retained for backward compatibility — equivalent to
354
+ * `{ maxBriefs, mode: 'fresh-then-backlog', runNumber: 0 }`.
231
355
  */
232
- export function buildQueue(sources, maxBriefs) {
356
+ export function buildQueue(sources, options) {
357
+ const opts =
358
+ typeof options === 'number'
359
+ ? { maxBriefs: options, mode: 'fresh-then-backlog', runNumber: 0 }
360
+ : {
361
+ maxBriefs: options.maxBriefs,
362
+ mode: options.mode || 'fresh-then-backlog',
363
+ runNumber: Number.isFinite(options.runNumber) ? options.runNumber : 0,
364
+ };
365
+ if (!DISCOVERY_MODES.includes(opts.mode)) {
366
+ throw new Error(
367
+ `buildQueue: invalid mode "${opts.mode}" (expected one of ${DISCOVERY_MODES.join(', ')})`,
368
+ );
369
+ }
370
+
233
371
  const withGaps = [];
234
372
  let totalMissing = 0;
235
373
  const missingByLang = new Map();
@@ -240,6 +378,16 @@ export function buildQueue(sources, maxBriefs) {
240
378
  for (const lang of missing) {
241
379
  missingByLang.set(lang, (missingByLang.get(lang) || 0) + 1);
242
380
  }
381
+ // Pre-compute structural targets for the translator agent so it has
382
+ // explicit visibility into duplicate-titled H2 sections and the
383
+ // verbatim-preserve token budget BEFORE any translation is written.
384
+ // Surfacing these here (rather than relying on the agent to discover
385
+ // them) prevents the regression observed in run #25983007788, where
386
+ // 13 sibling translations of a single brief silently collapsed a
387
+ // `## IMF Economic Context — May 2026 Update` section because the
388
+ // agent treated it as a duplicate of `## IMF Economic Context`.
389
+ const sourceH2Titles = extractH2Titles(source.absPath);
390
+ const sourceFixedTokens = countFixedTokens(source.absPath);
243
391
  withGaps.push({
244
392
  date: source.date,
245
393
  slug: source.slug,
@@ -247,21 +395,59 @@ export function buildQueue(sources, maxBriefs) {
247
395
  missingLangs: missing,
248
396
  missingCount: missing.length,
249
397
  isExtended: source.isExtended,
398
+ sourceH2Titles,
399
+ sourceH2Count: sourceH2Titles.length,
400
+ sourceFixedTokens,
250
401
  });
251
402
  }
252
403
 
253
- // Sort: newest date first; then more-missing first (finish partial briefs);
254
- // then slug alphabetical for determinism.
255
- withGaps.sort((a, b) => {
404
+ // Two canonical orderings drive the three modes.
405
+ const newestFirst = (a, b) => {
256
406
  if (a.date !== b.date) return a.date < b.date ? 1 : -1;
257
407
  if (a.missingCount !== b.missingCount) return b.missingCount - a.missingCount;
258
408
  if (a.slug !== b.slug) return a.slug < b.slug ? -1 : 1;
259
- // Prefer non-extended over extended when both exist for the same slug
260
409
  if (a.isExtended !== b.isExtended) return a.isExtended ? 1 : -1;
261
410
  return 0;
262
- });
411
+ };
412
+ const oldestFirstFinishPartial = (a, b) => {
413
+ if (a.date !== b.date) return a.date < b.date ? -1 : 1;
414
+ // Within the same date, finish briefs that are closer to completion
415
+ // first (fewer missing languages → ascending).
416
+ if (a.missingCount !== b.missingCount) return a.missingCount - b.missingCount;
417
+ if (a.slug !== b.slug) return a.slug < b.slug ? -1 : 1;
418
+ if (a.isExtended !== b.isExtended) return a.isExtended ? 1 : -1;
419
+ return 0;
420
+ };
263
421
 
264
- const queue = withGaps.slice(0, maxBriefs);
422
+ let queue;
423
+ if (opts.mode === 'newest-first') {
424
+ queue = [...withGaps].sort(newestFirst).slice(0, opts.maxBriefs);
425
+ } else if (opts.mode === 'backlog-only') {
426
+ queue = [...withGaps].sort(oldestFirstFinishPartial).slice(0, opts.maxBriefs);
427
+ } else {
428
+ // fresh-then-backlog
429
+ const newestSorted = [...withGaps].sort(newestFirst);
430
+ const oldestSorted = [...withGaps].sort(oldestFirstFinishPartial);
431
+ if (opts.maxBriefs === 1) {
432
+ // Alternate fresh/backlog by run-number parity so the scheduled
433
+ // cadence still drains backlog while preserving freshness on every
434
+ // other slot. Even run-numbers (0, 2, ...) take the fresh slot;
435
+ // odd run-numbers take the oldest backlog slot.
436
+ const pool = opts.runNumber % 2 === 0 ? newestSorted : oldestSorted;
437
+ queue = pool.slice(0, 1);
438
+ } else {
439
+ const freshSlice = newestSorted.slice(0, 1);
440
+ const freshKey = freshSlice[0]
441
+ ? `${freshSlice[0].date}\u0000${freshSlice[0].slug}\u0000${freshSlice[0].isExtended}`
442
+ : null;
443
+ const backlogSlice = oldestSorted
444
+ .filter(
445
+ (entry) => `${entry.date}\u0000${entry.slug}\u0000${entry.isExtended}` !== freshKey,
446
+ )
447
+ .slice(0, Math.max(0, opts.maxBriefs - 1));
448
+ queue = [...freshSlice, ...backlogSlice];
449
+ }
450
+ }
265
451
  const queuedTranslations = queue.reduce((sum, item) => sum + item.missingCount, 0);
266
452
 
267
453
  // Top 3 most-blocked target languages across the entire backlog. Operators
@@ -273,6 +459,17 @@ export function buildQueue(sources, maxBriefs) {
273
459
  .slice(0, 3)
274
460
  .map(([lang, count]) => ({ lang, count }));
275
461
 
462
+ // Operator-visibility extents: newest source still carrying gaps (the
463
+ // candidate for the fresh slot) and oldest source still carrying gaps
464
+ // (the candidate for the backlog slot). Both fall back to null when the
465
+ // backlog is empty.
466
+ let freshNewestDate = null;
467
+ let backlogOldestDate = null;
468
+ for (const entry of withGaps) {
469
+ if (freshNewestDate === null || entry.date > freshNewestDate) freshNewestDate = entry.date;
470
+ if (backlogOldestDate === null || entry.date < backlogOldestDate) backlogOldestDate = entry.date;
471
+ }
472
+
276
473
  return {
277
474
  totals: {
278
475
  sourcesScanned: sources.length,
@@ -280,6 +477,8 @@ export function buildQueue(sources, maxBriefs) {
280
477
  translationsMissing: totalMissing,
281
478
  queued: queue.length,
282
479
  queuedTranslations,
480
+ freshNewestDate,
481
+ backlogOldestDate,
283
482
  topMissingLangs,
284
483
  },
285
484
  queue,
@@ -296,13 +495,19 @@ export function main(argv) {
296
495
  includeExtended: opts.includeExtended,
297
496
  maxAgeDays: opts.maxAgeDays,
298
497
  });
299
- const { totals, queue } = buildQueue(sources, opts.maxBriefs);
498
+ const { totals, queue } = buildQueue(sources, {
499
+ maxBriefs: opts.maxBriefs,
500
+ mode: opts.mode,
501
+ runNumber: opts.runNumber,
502
+ });
300
503
  const payload = {
301
504
  generatedAt: new Date().toISOString(),
302
505
  options: {
303
506
  maxBriefs: opts.maxBriefs,
304
507
  maxAgeDays: opts.maxAgeDays,
305
508
  includeExtended: opts.includeExtended,
509
+ mode: opts.mode,
510
+ runNumber: opts.runNumber,
306
511
  },
307
512
  totals,
308
513
  queue,
@@ -8,6 +8,25 @@ import type { ParsedArticle } from '../types/index.js';
8
8
  * @returns Filename string
9
9
  */
10
10
  export declare function getIndexFilename(lang: string): string;
11
+ /**
12
+ * Heal JSON-LD `description` field corruption left behind by a prior
13
+ * version of {@link applyArticleSeoBackfill}. The old regex
14
+ * `"description":"[^"]*"` terminated at the first JSON-escaped quote
15
+ * (`\"`), so every rebuild prepended a new value in front of the
16
+ * previous description's tail — producing an unparseable JSON-LD
17
+ * block whose `description` value was followed by a run of repeated
18
+ * fragments before `,"datePublished"`.
19
+ *
20
+ * This pass is idempotent: when the JSON-LD is already well-formed,
21
+ * the regex `[^,]*` matches the empty string and the file is left
22
+ * unchanged. It runs unconditionally because the original backfill
23
+ * path skips files whose `<meta name="description">` is already
24
+ * clean, even when their JSON-LD is corrupted.
25
+ *
26
+ * @param filenames - News article filenames to inspect
27
+ * @returns Number of HTML files updated
28
+ */
29
+ export declare function healJsonLdDescriptionCorruption(filenames: readonly string[]): number;
11
30
  /**
12
31
  * Prefix legacy descriptions with date and **localized** category label
13
32
  * so duplicate strings become page-specific before the 180-character
@@ -36,6 +55,22 @@ export declare function getIndexFilename(lang: string): string;
36
55
  * already substantive
37
56
  */
38
57
  export declare function buildLegacyBackfillDescription(date: string, slug: string, lang: string, description: string): string;
58
+ /**
59
+ * Apply SEO meta tag replacements to a complete article HTML document.
60
+ *
61
+ * Exported for the regression test in
62
+ * `test/unit/news-indexes-jsonld-description-regex.test.js`, which
63
+ * locks in the JSON-LD description regex against the duplicate-tail
64
+ * bug (the legacy `"description":"[^"]*"` pattern terminated at the
65
+ * first JSON-escaped quote `\"` and left the previous description's
66
+ * tail in place, accumulating duplicates on every prebuild run).
67
+ *
68
+ * @param html - Existing article HTML
69
+ * @param description - Backfilled meta description
70
+ * @param keywords - Backfilled keyword list
71
+ * @returns Updated HTML
72
+ */
73
+ export declare function applyArticleSeoBackfill(html: string, description: string, keywords: readonly string[]): string;
39
74
  /**
40
75
  * Backfill hreflang alternate links for all article HTML files.
41
76
  *