euparliamentmonitor 0.9.13 → 0.9.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +6 -4
- package/scripts/aggregator/article-generator.js +2 -1
- package/scripts/aggregator/article-html.d.ts +9 -0
- package/scripts/aggregator/article-html.js +134 -13
- package/scripts/aggregator/article-metadata.d.ts +25 -161
- package/scripts/aggregator/article-metadata.js +71 -649
- package/scripts/aggregator/editorial-brief-resolver.d.ts +9 -0
- package/scripts/aggregator/editorial-brief-resolver.js +3 -1
- package/scripts/aggregator/metadata/date-labels.d.ts +122 -0
- package/scripts/aggregator/metadata/date-labels.js +209 -0
- package/scripts/aggregator/metadata/text-utils.d.ts +188 -0
- package/scripts/aggregator/metadata/text-utils.js +542 -0
- package/scripts/constants/og-locales.d.ts +15 -0
- package/scripts/constants/og-locales.js +17 -0
- package/scripts/constants/seo/index.d.ts +21 -0
- package/scripts/constants/seo/index.js +23 -0
- package/scripts/constants/seo/og-locales.d.ts +59 -0
- package/scripts/constants/seo/og-locales.js +59 -0
- package/scripts/constants/seo/social-handles.d.ts +50 -0
- package/scripts/constants/seo/social-handles.js +65 -0
- package/scripts/constants/social-handles.d.ts +11 -0
- package/scripts/constants/social-handles.js +13 -0
- package/scripts/discover-untranslated-briefs.js +224 -19
- package/scripts/generators/news-indexes.d.ts +35 -0
- package/scripts/generators/news-indexes.js +67 -6
- package/scripts/generators/political-intelligence/html.js +14 -6
- package/scripts/generators/seo-copy.js +42 -0
- package/scripts/generators/sitemap/html.js +13 -5
- package/scripts/lint-src-todos.js +124 -0
- package/scripts/utils/copy-test-reports.js +1 -1
- package/scripts/utils/generate-docs-index.js +1 -1
- package/scripts/validate-brief-translations.js +158 -18
|
@@ -51,40 +51,12 @@ import path from 'path';
|
|
|
51
51
|
import { ALL_LANGUAGES, getLocalizedString } from '../constants/language-core.js';
|
|
52
52
|
import { BREAKING_NEWS_TITLES, COMMITTEE_REPORTS_TITLES, ELECTION_CYCLE_TITLES, LOCALIZED_KEYWORDS, MONTH_AHEAD_TITLES, MONTHLY_REVIEW_TITLES, MOTIONS_TITLES, PROPOSITIONS_TITLES, QUARTER_AHEAD_TITLES, QUARTER_IN_REVIEW_TITLES, TERM_OUTLOOK_TITLES, WEEK_AHEAD_TITLES, WEEKLY_REVIEW_TITLES, YEAR_AHEAD_TITLES, YEAR_IN_REVIEW_TITLES, } from '../constants/language-articles.js';
|
|
53
53
|
import { resolveLocalizedBriefHighlight } from './editorial-brief-resolver.js';
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
* on its own and gets enriched with date/context. Independent from
|
|
61
|
-
* {@link DESCRIPTION_MIN_LENGTH} (which controls sentence-boundary
|
|
62
|
-
* truncation behaviour). Set lower than DESCRIPTION_MIN_LENGTH so a
|
|
63
|
-
* clean 100-140 char prose lede is preserved verbatim instead of being
|
|
64
|
-
* padded with date/context boilerplate.
|
|
65
|
-
*/
|
|
66
|
-
const ENRICHMENT_TRIGGER_LENGTH = 100;
|
|
67
|
-
/** Maximum `<title>` length — anything longer is truncated with an ellipsis. */
|
|
68
|
-
const TITLE_MAX_LENGTH = 140;
|
|
69
|
-
/**
|
|
70
|
-
* Soft target for headline-style titles produced as a fallback from
|
|
71
|
-
* BLUF/lede prose. When the candidate exceeds `TITLE_MAX_LENGTH`, the
|
|
72
|
-
* truncator first looks for a natural clause boundary
|
|
73
|
-
* (`.`, `:`, `—`, `;`) inside the `[HEADLINE_SOFT_MIN, TITLE_MAX_LENGTH]`
|
|
74
|
-
* window and breaks there instead of mid-clause-with-ellipsis. This
|
|
75
|
-
* turns a 137-character truncated prose paragraph into a complete
|
|
76
|
-
* journalistic clause, which scans much better in news cards and SERP
|
|
77
|
-
* snippets without sacrificing the keyword-rich opening.
|
|
78
|
-
*/
|
|
79
|
-
const HEADLINE_SOFT_MIN = 60;
|
|
80
|
-
/**
|
|
81
|
-
* Punctuation marks that signal a natural clause boundary inside a
|
|
82
|
-
* BLUF / lede paragraph. Listed in preferred-break order: a colon or
|
|
83
|
-
* em-dash that introduces a list of consequences is the best break,
|
|
84
|
-
* full stops are next, and semicolons last. Single ASCII space is
|
|
85
|
-
* always a fallback boundary handled separately.
|
|
86
|
-
*/
|
|
87
|
-
const HEADLINE_CLAUSE_BOUNDARIES = [': ', ' — ', ' – ', '. ', '; '];
|
|
54
|
+
// Text-utility constants + helpers — extracted into the `metadata/`
|
|
55
|
+
// bounded context as pure leaf-module functions. Re-exported here for
|
|
56
|
+
// back-compat with existing call sites; new code should import directly
|
|
57
|
+
// from `./metadata/text-utils.js`.
|
|
58
|
+
import { DESCRIPTION_MAX_LENGTH, EXTENDED_DESCRIPTION_MAX_LENGTH, ENRICHMENT_TRIGGER_LENGTH, shouldSkipDescriptionLine, stripLeadingProseLabel, stripInlineMarkdown, truncateDescription, truncateExtendedDescription, truncateTitle, extractFirstSentence, } from './metadata/text-utils.js';
|
|
59
|
+
export { shouldSkipDescriptionLine, stripLeadingProseLabel, stripInlineMarkdown, truncateDescription, truncateExtendedDescription, truncateTitle, extractFirstSentence, } from './metadata/text-utils.js';
|
|
88
60
|
/** Localized labels used to enrich short or duplicate-prone meta descriptions. */
|
|
89
61
|
const SEO_CONTEXT_LABELS = {
|
|
90
62
|
en: {
|
|
@@ -315,435 +287,6 @@ const ARTIFACT_CATEGORY_PREFIXES = [
|
|
|
315
287
|
'weekly outlook',
|
|
316
288
|
'wildcards blackswans',
|
|
317
289
|
];
|
|
318
|
-
/**
|
|
319
|
-
* Emoji-banner prefixes that Stage-B agents use to decorate metadata rows
|
|
320
|
-
* (e.g. `📋 Analysis Owner:`). Any line starting with one of these is
|
|
321
|
-
* metadata, never prose.
|
|
322
|
-
*/
|
|
323
|
-
const EMOJI_BANNER_CHARS = [
|
|
324
|
-
'📋',
|
|
325
|
-
'📅',
|
|
326
|
-
'🔍',
|
|
327
|
-
'🏛',
|
|
328
|
-
'📰',
|
|
329
|
-
'📊',
|
|
330
|
-
'🏷',
|
|
331
|
-
'📈',
|
|
332
|
-
'📉',
|
|
333
|
-
'⚠',
|
|
334
|
-
'🔔',
|
|
335
|
-
'🎯',
|
|
336
|
-
'🗳',
|
|
337
|
-
'🏢',
|
|
338
|
-
'📄',
|
|
339
|
-
];
|
|
340
|
-
/**
|
|
341
|
-
* Label prefixes that a prose description must never start with. Every
|
|
342
|
-
* entry matches case-insensitively at the start of a trimmed line, followed
|
|
343
|
-
* by optional space and a colon.
|
|
344
|
-
*/
|
|
345
|
-
const METADATA_LINE_PREFIXES = [
|
|
346
|
-
'Admiralty Grade',
|
|
347
|
-
'Analysis Date',
|
|
348
|
-
'Analysis Owner',
|
|
349
|
-
'Article Type',
|
|
350
|
-
'Article Window',
|
|
351
|
-
'Assessment Date',
|
|
352
|
-
'Briefing',
|
|
353
|
-
'Briefing Date',
|
|
354
|
-
'Classification',
|
|
355
|
-
'Classification Date',
|
|
356
|
-
'Confidence',
|
|
357
|
-
'Confidence in Evidence',
|
|
358
|
-
'Data Sources',
|
|
359
|
-
'Date',
|
|
360
|
-
'Document Type',
|
|
361
|
-
'Filing Date',
|
|
362
|
-
'Generated',
|
|
363
|
-
'Horizon',
|
|
364
|
-
'IMF Status',
|
|
365
|
-
'Last Updated',
|
|
366
|
-
'Parliamentary Status',
|
|
367
|
-
'Parliamentary Term',
|
|
368
|
-
'Period',
|
|
369
|
-
'Prepared',
|
|
370
|
-
'Purpose',
|
|
371
|
-
'Region',
|
|
372
|
-
'Reporting',
|
|
373
|
-
'Reporting Period',
|
|
374
|
-
'Reporting Window',
|
|
375
|
-
'Run',
|
|
376
|
-
'Run ID',
|
|
377
|
-
'Series',
|
|
378
|
-
'Series Run',
|
|
379
|
-
'Source',
|
|
380
|
-
'Sources',
|
|
381
|
-
'SPDX-FileCopyrightText',
|
|
382
|
-
'SPDX-License-Identifier',
|
|
383
|
-
'Topic',
|
|
384
|
-
'Type',
|
|
385
|
-
'WEP Band',
|
|
386
|
-
'WEP Grade',
|
|
387
|
-
'Window',
|
|
388
|
-
];
|
|
389
|
-
/**
|
|
390
|
-
* Return `true` when a line cannot serve as a prose description. Rejects
|
|
391
|
-
* Markdown structural lines (headings, blockquotes, tables, HTML),
|
|
392
|
-
* mermaid/chart directives, emoji-banner metadata rows, and the known
|
|
393
|
-
* `Key: value` banners that Stage-B agents emit as artefact preamble.
|
|
394
|
-
*
|
|
395
|
-
* @param line - Trimmed line from the aggregated Markdown source
|
|
396
|
-
* @returns `true` when the line is not prose and should be skipped
|
|
397
|
-
*/
|
|
398
|
-
export function shouldSkipDescriptionLine(line) {
|
|
399
|
-
if (line.length === 0)
|
|
400
|
-
return true;
|
|
401
|
-
if (line.startsWith('#'))
|
|
402
|
-
return true;
|
|
403
|
-
if (line.startsWith('>'))
|
|
404
|
-
return true;
|
|
405
|
-
if (line.startsWith('<'))
|
|
406
|
-
return true;
|
|
407
|
-
if (line.startsWith('|'))
|
|
408
|
-
return true;
|
|
409
|
-
if (line.startsWith('---') || line.startsWith('==='))
|
|
410
|
-
return true;
|
|
411
|
-
if (line.startsWith('```') || line.startsWith('~~~'))
|
|
412
|
-
return true;
|
|
413
|
-
if (line.startsWith('%%'))
|
|
414
|
-
return true;
|
|
415
|
-
if (/^title\s/i.test(line))
|
|
416
|
-
return true;
|
|
417
|
-
if (EMOJI_BANNER_CHARS.some((char) => line.startsWith(char)))
|
|
418
|
-
return true;
|
|
419
|
-
const labelSource = line.replace(/^\*+/, '').replace(/^\*\*/, '').replace(/^_+/, '').trim();
|
|
420
|
-
for (const prefix of METADATA_LINE_PREFIXES) {
|
|
421
|
-
const lower = labelSource.toLowerCase();
|
|
422
|
-
const prefixLower = prefix.toLowerCase();
|
|
423
|
-
if (lower.startsWith(`${prefixLower}:`) ||
|
|
424
|
-
lower.startsWith(`${prefixLower} :`) ||
|
|
425
|
-
lower.startsWith(`${prefixLower}**:`) ||
|
|
426
|
-
lower.startsWith(`${prefixLower}*:`)) {
|
|
427
|
-
return true;
|
|
428
|
-
}
|
|
429
|
-
}
|
|
430
|
-
if (/^[-*_=~.]{3,}$/.test(line))
|
|
431
|
-
return true;
|
|
432
|
-
if (isLocalizedBannerRow(line))
|
|
433
|
-
return true;
|
|
434
|
-
return false;
|
|
435
|
-
}
|
|
436
|
-
/**
|
|
437
|
-
* Language-agnostic banner-row detector. Stage-B artefacts open with a
|
|
438
|
-
* metadata banner of the shape
|
|
439
|
-
* `**Date:** 2026-05-15 | **Type:** Breaking | **Run:** breaking-run-001`
|
|
440
|
-
* and its localized siblings — notably Japanese / Chinese / Korean briefs
|
|
441
|
-
* which place the full-width colon `:` **inside** the bold span
|
|
442
|
-
* (`**日付:**`) rather than after it. The `METADATA_LINE_PREFIXES` table
|
|
443
|
-
* only covers the English vocabulary; this helper catches the structural
|
|
444
|
-
* shape directly: a line that starts with `**`, contains at least one
|
|
445
|
-
* `|` separator, and carries two-or-more bold key markers that end with
|
|
446
|
-
* — or are followed by — an ASCII colon `:` or full-width colon `:`.
|
|
447
|
-
* Banner rows look identical in every language we publish, so detecting
|
|
448
|
-
* them here keeps localized briefs from leaking their first banner line
|
|
449
|
-
* into the `<meta description>`.
|
|
450
|
-
*
|
|
451
|
-
* @param line - Trimmed source line
|
|
452
|
-
* @returns `true` when the line is a banner row in any locale
|
|
453
|
-
*/
|
|
454
|
-
function isLocalizedBannerRow(line) {
|
|
455
|
-
if (!line.startsWith('**'))
|
|
456
|
-
return false;
|
|
457
|
-
if (!line.includes('|'))
|
|
458
|
-
return false;
|
|
459
|
-
const inside = (line.match(/\*\*[^*]+[::]\s*\*\*/g) ?? []).length;
|
|
460
|
-
const after = (line.match(/\*\*[^*]+\*\*\s*[::]/g) ?? []).length;
|
|
461
|
-
return inside + after >= 2;
|
|
462
|
-
}
|
|
463
|
-
/**
|
|
464
|
-
* Strip inline Markdown decorations so we can use the remaining text as
|
|
465
|
-
* plain-text meta-tag content. Removes link syntax, emphasis, inline code
|
|
466
|
-
* backticks, and HTML-entity fragments that the Markdown source sometimes
|
|
467
|
-
* smuggles in. Keeps the visible text readable.
|
|
468
|
-
*
|
|
469
|
-
* @param raw - Trimmed Markdown line
|
|
470
|
-
* @returns Plain-text variant
|
|
471
|
-
*/
|
|
472
|
-
/**
|
|
473
|
-
* Strip a leading all-caps prose label (e.g. `SITUATION:`, `KEY MOTION:`,
|
|
474
|
-
* `BLUF:`, `BOTTOM LINE:`, `TIER-1:`) from a prose line. These labels
|
|
475
|
-
* are common in BLUF-style editorial writing — they survive
|
|
476
|
-
* {@link stripInlineMarkdown} (which strips the `**bold**` wrapper but
|
|
477
|
-
* keeps the literal text) and would otherwise leak into the SEO
|
|
478
|
-
* description as a confusing all-caps shout.
|
|
479
|
-
*
|
|
480
|
-
* Matches up to 4 hyphenated all-caps tokens, optionally followed by a
|
|
481
|
-
* digit suffix (`TIER-1`), terminating at a colon. Returns the original
|
|
482
|
-
* line when no opener is present.
|
|
483
|
-
*
|
|
484
|
-
* @param line - Plain prose line (post-{@link stripInlineMarkdown})
|
|
485
|
-
* @returns Line with the all-caps opener removed
|
|
486
|
-
*/
|
|
487
|
-
export function stripLeadingProseLabel(line) {
|
|
488
|
-
const colonIdx = line.indexOf(': ');
|
|
489
|
-
if (colonIdx < 2 || colonIdx > 80)
|
|
490
|
-
return line;
|
|
491
|
-
const label = line.slice(0, colonIdx);
|
|
492
|
-
const rest = line.slice(colonIdx + 2).trim();
|
|
493
|
-
if (rest.length < 20)
|
|
494
|
-
return line;
|
|
495
|
-
if (!/^[A-Z][A-Z0-9 -]{1,79}$/.test(label))
|
|
496
|
-
return line;
|
|
497
|
-
if (label.length < 3)
|
|
498
|
-
return line;
|
|
499
|
-
return rest;
|
|
500
|
-
}
|
|
501
|
-
/**
|
|
502
|
-
* Strip inline Markdown decorations so we can use the remaining text as
|
|
503
|
-
* plain-text meta-tag content. Removes link syntax, emphasis, inline code
|
|
504
|
-
* backticks, and HTML-entity fragments that the Markdown source sometimes
|
|
505
|
-
* smuggles in. Keeps the visible text readable.
|
|
506
|
-
*
|
|
507
|
-
* @param raw - Trimmed Markdown line
|
|
508
|
-
* @returns Plain-text variant
|
|
509
|
-
*/
|
|
510
|
-
export function stripInlineMarkdown(raw) {
|
|
511
|
-
return raw
|
|
512
|
-
.replace(/!\[([^\]\n]{0,500})\]\(([^)\n]{0,500})\)/g, '$1')
|
|
513
|
-
.replace(/\[([^\]\n]{1,500})\]\(([^)\n]{0,500})\)/g, '$1')
|
|
514
|
-
.replace(/`([^`\n]{1,500})`/g, '$1')
|
|
515
|
-
.replace(/\*\*([^*\n]{1,500})\*\*/g, '$1')
|
|
516
|
-
.replace(/__([^_\n]{1,500})__/g, '$1')
|
|
517
|
-
.replace(/\*([^*\n]{1,500})\*/g, '$1')
|
|
518
|
-
.replace(/_([^_\n]{1,500})_/g, '$1')
|
|
519
|
-
.replace(/~~([^~\n]{1,500})~~/g, '$1')
|
|
520
|
-
.replace(/\s+/g, ' ')
|
|
521
|
-
.trim();
|
|
522
|
-
}
|
|
523
|
-
/** Connector / determiner words that read as broken copy when they are
|
|
524
|
-
* the final token before a truncation ellipsis. */
|
|
525
|
-
const TRAILING_STOP_WORDS = new Set([
|
|
526
|
-
'the',
|
|
527
|
-
'a',
|
|
528
|
-
'an',
|
|
529
|
-
'of',
|
|
530
|
-
'to',
|
|
531
|
-
'for',
|
|
532
|
-
'in',
|
|
533
|
-
'on',
|
|
534
|
-
'at',
|
|
535
|
-
'by',
|
|
536
|
-
'and',
|
|
537
|
-
'or',
|
|
538
|
-
'with',
|
|
539
|
-
'from',
|
|
540
|
-
]);
|
|
541
|
-
/** Trailing characters we always strip before appending our own ellipsis,
|
|
542
|
-
* so we never emit double-ellipsis or stray punctuation. */
|
|
543
|
-
const TRAILING_PUNCT = /[.,;:—\-…\s]/u;
|
|
544
|
-
/**
|
|
545
|
-
* Repeatedly strip trailing stop-words (separated by a single space) and
|
|
546
|
-
* trailing punctuation (including any pre-existing ellipsis). Implemented
|
|
547
|
-
* imperatively to avoid super-linear regex backtracking on the
|
|
548
|
-
* `(?:\s+stop-word)+$` pattern flagged by `security/detect-unsafe-regex`.
|
|
549
|
-
*
|
|
550
|
-
* @param input - Pre-clipped string to clean up
|
|
551
|
-
* @returns Cleaned string with no trailing stop-words or punctuation
|
|
552
|
-
*/
|
|
553
|
-
function stripTrailingStopWordsAndPunctuation(input) {
|
|
554
|
-
let result = input;
|
|
555
|
-
let changed = true;
|
|
556
|
-
while (changed) {
|
|
557
|
-
changed = false;
|
|
558
|
-
while (result.length > 0 && TRAILING_PUNCT.test(result.charAt(result.length - 1))) {
|
|
559
|
-
result = result.slice(0, -1);
|
|
560
|
-
changed = true;
|
|
561
|
-
}
|
|
562
|
-
const lastSpace = result.lastIndexOf(' ');
|
|
563
|
-
if (lastSpace >= 0) {
|
|
564
|
-
const tail = result.slice(lastSpace + 1).toLowerCase();
|
|
565
|
-
if (TRAILING_STOP_WORDS.has(tail)) {
|
|
566
|
-
result = result.slice(0, lastSpace);
|
|
567
|
-
changed = true;
|
|
568
|
-
}
|
|
569
|
-
}
|
|
570
|
-
}
|
|
571
|
-
return result;
|
|
572
|
-
}
|
|
573
|
-
/**
|
|
574
|
-
* Clamp a string to `DESCRIPTION_MAX_LENGTH` characters, appending
|
|
575
|
-
* an ellipsis when truncation actually happens. Does not break words if
|
|
576
|
-
* avoidable — a trailing partial word is trimmed back to the previous
|
|
577
|
-
* space first.
|
|
578
|
-
*
|
|
579
|
-
* @param text - Raw description text
|
|
580
|
-
* @returns Truncated description with trailing ellipsis when clipped
|
|
581
|
-
*/
|
|
582
|
-
export function truncateDescription(text) {
|
|
583
|
-
if (text.length <= DESCRIPTION_MAX_LENGTH)
|
|
584
|
-
return text;
|
|
585
|
-
const cut = text.slice(0, DESCRIPTION_MAX_LENGTH - 1);
|
|
586
|
-
// Prefer the last full sentence terminator within the cut so we don't
|
|
587
|
-
// end on a dangling determiner ("…year. The"). Period/!/? followed by
|
|
588
|
-
// a space marks a clean boundary. Only honour the boundary when it
|
|
589
|
-
// sits past the soft minimum so we keep enough body text to be useful.
|
|
590
|
-
const sentenceEnd = Math.max(cut.lastIndexOf('. '), cut.lastIndexOf('! '), cut.lastIndexOf('? '));
|
|
591
|
-
if (sentenceEnd >= DESCRIPTION_MIN_LENGTH) {
|
|
592
|
-
return cut.slice(0, sentenceEnd + 1).replace(/\s+$/, '');
|
|
593
|
-
}
|
|
594
|
-
const lastSpace = cut.lastIndexOf(' ');
|
|
595
|
-
let safe = lastSpace > DESCRIPTION_MAX_LENGTH - 60 ? cut.slice(0, lastSpace) : cut;
|
|
596
|
-
// Drop dangling stop-words and trailing punctuation/ellipsis so we
|
|
597
|
-
// never emit broken copy ("…year. The" → "…year.") or double-ellipsis
|
|
598
|
-
// ("The……") when the upstream input already carried an ellipsis.
|
|
599
|
-
safe = stripTrailingStopWordsAndPunctuation(safe);
|
|
600
|
-
return `${safe}…`;
|
|
601
|
-
}
|
|
602
|
-
/**
|
|
603
|
-
* Clamp a title to `TITLE_MAX_LENGTH` characters in the same
|
|
604
|
-
* word-boundary-preserving fashion as {@link truncateDescription}.
|
|
605
|
-
*
|
|
606
|
-
* @param text - Raw title text
|
|
607
|
-
* @returns Truncated title with trailing ellipsis when clipped
|
|
608
|
-
*/
|
|
609
|
-
export function truncateTitle(text) {
|
|
610
|
-
if (text.length <= TITLE_MAX_LENGTH)
|
|
611
|
-
return text;
|
|
612
|
-
// Prefer ending at a natural clause boundary inside the
|
|
613
|
-
// `[HEADLINE_SOFT_MIN, TITLE_MAX_LENGTH]` window so the truncated
|
|
614
|
-
// title reads as a complete journalistic clause rather than a
|
|
615
|
-
// mid-sentence prose snippet. Iterate boundaries in priority order;
|
|
616
|
-
// when a candidate falls in the window, break there and drop the
|
|
617
|
-
// ellipsis since the result is grammatically complete.
|
|
618
|
-
const search = text.slice(0, TITLE_MAX_LENGTH);
|
|
619
|
-
for (const boundary of HEADLINE_CLAUSE_BOUNDARIES) {
|
|
620
|
-
const idx = search.lastIndexOf(boundary);
|
|
621
|
-
if (idx >= HEADLINE_SOFT_MIN) {
|
|
622
|
-
const clean = stripTrailingStopWordsAndPunctuation(text.slice(0, idx));
|
|
623
|
-
if (clean.length >= HEADLINE_SOFT_MIN)
|
|
624
|
-
return clean;
|
|
625
|
-
}
|
|
626
|
-
}
|
|
627
|
-
const cut = text.slice(0, TITLE_MAX_LENGTH - 1);
|
|
628
|
-
const lastSpace = cut.lastIndexOf(' ');
|
|
629
|
-
let safe = lastSpace > TITLE_MAX_LENGTH - 40 ? cut.slice(0, lastSpace) : cut;
|
|
630
|
-
safe = stripTrailingStopWordsAndPunctuation(safe);
|
|
631
|
-
return `${safe}…`;
|
|
632
|
-
}
|
|
633
|
-
/**
|
|
634
|
-
* Return the first complete sentence from a prose paragraph, suitable
|
|
635
|
-
* for use as a fallback editorial title when the artefact H1 is
|
|
636
|
-
* categorical (e.g. `# EU Parliament Committee Reports`) and the
|
|
637
|
-
* resolver must derive `<title>` from the BLUF / lede summary instead.
|
|
638
|
-
*
|
|
639
|
-
* A "sentence" is the prefix up to the first sentence-terminator
|
|
640
|
-
* (`. `, `! `, `? `, `; `) inside the `[HEADLINE_SOFT_MIN,
|
|
641
|
-
* TITLE_MAX_LENGTH]` window. Common abbreviations (`Q1.`, `Q2.`,
|
|
642
|
-
* `H1.`, `H2.`, `Mr.`, `Mrs.`, `e.g.`, `i.e.`, `vs.`) are skipped
|
|
643
|
-
* so they don't terminate the sentence prematurely. When no
|
|
644
|
-
* acceptable terminator exists in the window, returns the entire
|
|
645
|
-
* input unchanged so {@link truncateTitle} can handle clause-boundary
|
|
646
|
-
* truncation downstream.
|
|
647
|
-
*
|
|
648
|
-
* This produces journalistically clean titles even for the
|
|
649
|
-
* propositions / committee-reports cases where the BLUF paragraph
|
|
650
|
-
* opens with a single long sentence that exceeds 140 chars —
|
|
651
|
-
* `truncateTitle` then breaks on a clause boundary, and the result is
|
|
652
|
-
* still grammatical because the input was a sentence prefix rather
|
|
653
|
-
* than an arbitrary paragraph slice.
|
|
654
|
-
*
|
|
655
|
-
* @param paragraph - Prose paragraph (post-{@link stripInlineMarkdown})
|
|
656
|
-
* @returns First sentence, or the original paragraph when none can be
|
|
657
|
-
* identified within the soft-min window
|
|
658
|
-
*/
|
|
659
|
-
export function extractFirstSentence(paragraph) {
|
|
660
|
-
const trimmed = paragraph.trim();
|
|
661
|
-
if (trimmed.length <= HEADLINE_SOFT_MIN)
|
|
662
|
-
return trimmed;
|
|
663
|
-
// Limit terminator search to TITLE_MAX_LENGTH * 1.5 — beyond that
|
|
664
|
-
// we'd rather let truncateTitle clause-truncate the original
|
|
665
|
-
// paragraph than return a too-long first sentence.
|
|
666
|
-
const window = trimmed.slice(0, Math.floor(TITLE_MAX_LENGTH * 1.5));
|
|
667
|
-
// Skip common abbreviations that contain a period inside a token
|
|
668
|
-
// (Q1., e.g., i.e., vs., Mr., Mrs., No., U.S., E.U.). We walk
|
|
669
|
-
// candidate terminator positions; a position counts only when the
|
|
670
|
-
// char before it is *not* part of a known abbreviation token.
|
|
671
|
-
const terminators = ['. ', '! ', '? ', '; '];
|
|
672
|
-
let bestIdx = -1;
|
|
673
|
-
for (const t of terminators) {
|
|
674
|
-
let from = HEADLINE_SOFT_MIN;
|
|
675
|
-
let idx;
|
|
676
|
-
while ((idx = window.indexOf(t, from)) !== -1) {
|
|
677
|
-
if (!isAbbreviationBoundary(window, idx) && idx < window.length - 1) {
|
|
678
|
-
if (bestIdx === -1 || idx < bestIdx)
|
|
679
|
-
bestIdx = idx;
|
|
680
|
-
break;
|
|
681
|
-
}
|
|
682
|
-
from = idx + t.length;
|
|
683
|
-
}
|
|
684
|
-
}
|
|
685
|
-
if (bestIdx >= HEADLINE_SOFT_MIN) {
|
|
686
|
-
return trimmed.slice(0, bestIdx + 1).trim();
|
|
687
|
-
}
|
|
688
|
-
return trimmed;
|
|
689
|
-
}
|
|
690
|
-
/**
|
|
691
|
-
* Abbreviation tokens (lowercase, including the trailing period) that
|
|
692
|
-
* should NOT count as sentence terminators when {@link extractFirstSentence}
|
|
693
|
-
* scans for a `.` boundary. Single-letter all-caps initials
|
|
694
|
-
* (`U.S.`, `E.U.`) are handled by the all-caps-initial check below.
|
|
695
|
-
*/
|
|
696
|
-
const ABBREVIATION_PREFIXES = [
|
|
697
|
-
'mr.',
|
|
698
|
-
'mrs.',
|
|
699
|
-
'ms.',
|
|
700
|
-
'dr.',
|
|
701
|
-
'st.',
|
|
702
|
-
'no.',
|
|
703
|
-
'vs.',
|
|
704
|
-
'e.g.',
|
|
705
|
-
'i.e.',
|
|
706
|
-
'etc.',
|
|
707
|
-
'cf.',
|
|
708
|
-
'al.',
|
|
709
|
-
// EP fiscal-year and quarter shorthand: Q1., Q2., Q3., Q4., H1., H2., FY.
|
|
710
|
-
'q1.',
|
|
711
|
-
'q2.',
|
|
712
|
-
'q3.',
|
|
713
|
-
'q4.',
|
|
714
|
-
'h1.',
|
|
715
|
-
'h2.',
|
|
716
|
-
'fy.',
|
|
717
|
-
];
|
|
718
|
-
/**
|
|
719
|
-
* Check whether the character preceding the `.` at `idx` in `text`
|
|
720
|
-
* indicates an abbreviation (so the `.` is not a sentence terminator).
|
|
721
|
-
* Matches the {@link ABBREVIATION_PREFIXES} table and the all-caps
|
|
722
|
-
* single-letter initials pattern (`U.S.`, `E.U.`).
|
|
723
|
-
*
|
|
724
|
-
* @param text - Source text (lowercased segment + original mixed-case)
|
|
725
|
-
* @param idx - Index of the `.` character in `text`
|
|
726
|
-
* @returns `true` when the period at `idx` is part of an abbreviation
|
|
727
|
-
*/
|
|
728
|
-
function isAbbreviationBoundary(text, idx) {
|
|
729
|
-
// All-caps single-letter initial like `U.S.` or `E.U.` — char at
|
|
730
|
-
// idx-1 is a capital letter, and idx-2 is either start of string,
|
|
731
|
-
// whitespace, or another single-letter+period pair.
|
|
732
|
-
if (idx >= 1) {
|
|
733
|
-
const prev = text.charCodeAt(idx - 1);
|
|
734
|
-
const isUpperLetter = prev >= 65 && prev <= 90;
|
|
735
|
-
if (isUpperLetter && (idx === 1 || text[idx - 2] === ' ' || text[idx - 2] === '.')) {
|
|
736
|
-
return true;
|
|
737
|
-
}
|
|
738
|
-
}
|
|
739
|
-
// ABBREVIATION_PREFIXES lookup — scan backwards from `.` to find the
|
|
740
|
-
// start of the word, then compare lowercased.
|
|
741
|
-
let start = idx;
|
|
742
|
-
while (start > 0 && /[a-zA-Z]/u.test(text[start - 1] ?? ''))
|
|
743
|
-
start--;
|
|
744
|
-
const token = text.slice(start, idx + 1).toLowerCase();
|
|
745
|
-
return ABBREVIATION_PREFIXES.includes(token);
|
|
746
|
-
}
|
|
747
290
|
/**
|
|
748
291
|
* Return the first Markdown H1 (`# …`) in the supplied text, stripped of
|
|
749
292
|
* the leading `#` and trailing anchor syntax. Returns an empty string when
|
|
@@ -905,6 +448,41 @@ export function extractLedeAfterHeading(markdown) {
|
|
|
905
448
|
return '';
|
|
906
449
|
return truncateDescription(buf.lines.join(' '));
|
|
907
450
|
}
|
|
451
|
+
/**
|
|
452
|
+
* Same parsing rules as {@link extractLedeAfterHeading} but with a
|
|
453
|
+
* larger byte budget so the full BLUF paragraph (typically 200-300
|
|
454
|
+
* characters in the editorial style guide) is captured for use as
|
|
455
|
+
* `og:description` / `twitter:description`. Returns the joined
|
|
456
|
+
* paragraph clamped via {@link truncateExtendedDescription} (which
|
|
457
|
+
* returns `''` when the result wouldn't be longer than the regular
|
|
458
|
+
* meta description).
|
|
459
|
+
*
|
|
460
|
+
* @param markdown - Brief body (SPDX preamble already stripped)
|
|
461
|
+
* @returns Extended lede paragraph, or `''` when not worth emitting
|
|
462
|
+
*/
|
|
463
|
+
export function extractExtendedLedeAfterHeading(markdown) {
|
|
464
|
+
const state = { inFence: false, inLede: false };
|
|
465
|
+
const buf = { lines: [], byteCount: 0 };
|
|
466
|
+
for (const raw of markdown.split('\n')) {
|
|
467
|
+
const line = raw.trim();
|
|
468
|
+
const directive = classifyLedeLine(line, state.inFence, state.inLede, buf.lines.length > 0);
|
|
469
|
+
const action = applyLedeDirective(directive, state, buf.lines.length > 0);
|
|
470
|
+
if (action === 'break')
|
|
471
|
+
break;
|
|
472
|
+
if (action === 'continue')
|
|
473
|
+
continue;
|
|
474
|
+
const collect = collectProseLine(line, buf);
|
|
475
|
+
if (collect === 'continue')
|
|
476
|
+
continue;
|
|
477
|
+
if (collect === 'break')
|
|
478
|
+
break;
|
|
479
|
+
if (buf.byteCount >= EXTENDED_DESCRIPTION_MAX_LENGTH)
|
|
480
|
+
break;
|
|
481
|
+
}
|
|
482
|
+
if (buf.lines.length === 0)
|
|
483
|
+
return '';
|
|
484
|
+
return truncateExtendedDescription(buf.lines.join(' '));
|
|
485
|
+
}
|
|
908
486
|
/**
|
|
909
487
|
* Normalise a Markdown heading's text for comparison against the
|
|
910
488
|
* editorial-lede heading whitelist. Strips inline Markdown decorations
|
|
@@ -2112,189 +1690,12 @@ function templateForType(lang, articleType, inputs) {
|
|
|
2112
1690
|
};
|
|
2113
1691
|
}
|
|
2114
1692
|
}
|
|
2115
|
-
|
|
2116
|
-
|
|
2117
|
-
|
|
2118
|
-
|
|
2119
|
-
|
|
2120
|
-
|
|
2121
|
-
* @param date - ISO date string (`YYYY-MM-DD`)
|
|
2122
|
-
* @returns `{ start, end }` both in `YYYY-MM-DD` form
|
|
2123
|
-
*/
|
|
2124
|
-
export function deriveWeekRange(date) {
|
|
2125
|
-
const parsed = parseIsoDate(date);
|
|
2126
|
-
if (!parsed)
|
|
2127
|
-
return { start: date, end: date };
|
|
2128
|
-
const day = parsed.getUTCDay();
|
|
2129
|
-
const shift = (day + 6) % 7;
|
|
2130
|
-
const startMs = parsed.getTime() - shift * MS_PER_DAY;
|
|
2131
|
-
const endMs = startMs + 6 * MS_PER_DAY;
|
|
2132
|
-
return { start: formatIsoDate(new Date(startMs)), end: formatIsoDate(new Date(endMs)) };
|
|
2133
|
-
}
|
|
2134
|
-
/**
|
|
2135
|
-
* Return the D-36 → D-8 reporting window for the `week-in-review`
|
|
2136
|
-
* article type. EP roll-call voting data is published with a 2–6 week
|
|
2137
|
-
* lag, so using the most-recent 7 days structurally produces a
|
|
2138
|
-
* vote-empty dataset. Shifting 8 days back and widening to 28 days
|
|
2139
|
-
* (start = D-36, end = D-8) ensures the window always contains at
|
|
2140
|
-
* least one full EP plenary week with published roll-call data
|
|
2141
|
-
* (ADR-006). Direction is consistent with the workflow's
|
|
2142
|
-
* `DATE_FROM` (start = D-36) → `DATE_TO` (end = D-8) variables.
|
|
2143
|
-
*
|
|
2144
|
-
* @param date - ISO article date string (`YYYY-MM-DD`) — typically TODAY
|
|
2145
|
-
* @returns `{ start: D-36, end: D-8 }` both as `YYYY-MM-DD` ISO strings
|
|
2146
|
-
*/
|
|
2147
|
-
export function deriveReportingWindowForWeekInReview(date) {
|
|
2148
|
-
const parsed = parseIsoDate(date);
|
|
2149
|
-
if (!parsed)
|
|
2150
|
-
return { start: date, end: date };
|
|
2151
|
-
return {
|
|
2152
|
-
start: formatIsoDate(new Date(parsed.getTime() - 36 * MS_PER_DAY)),
|
|
2153
|
-
end: formatIsoDate(new Date(parsed.getTime() - 8 * MS_PER_DAY)),
|
|
2154
|
-
};
|
|
2155
|
-
}
|
|
2156
|
-
/**
|
|
2157
|
-
* Return a human-friendly month label for an ISO date — English month
|
|
2158
|
-
* name + four-digit year (e.g. `April 2026`). The non-English template
|
|
2159
|
-
* generators accept this same label verbatim because they interpolate it
|
|
2160
|
-
* into a localized sentence rather than translating the month itself.
|
|
2161
|
-
*
|
|
2162
|
-
* @param date - ISO date string
|
|
2163
|
-
* @returns Month label, or the input when parsing fails
|
|
2164
|
-
*/
|
|
2165
|
-
export function deriveMonthLabel(date) {
|
|
2166
|
-
const parsed = parseIsoDate(date);
|
|
2167
|
-
if (!parsed)
|
|
2168
|
-
return date;
|
|
2169
|
-
const monthNames = [
|
|
2170
|
-
'January',
|
|
2171
|
-
'February',
|
|
2172
|
-
'March',
|
|
2173
|
-
'April',
|
|
2174
|
-
'May',
|
|
2175
|
-
'June',
|
|
2176
|
-
'July',
|
|
2177
|
-
'August',
|
|
2178
|
-
'September',
|
|
2179
|
-
'October',
|
|
2180
|
-
'November',
|
|
2181
|
-
'December',
|
|
2182
|
-
];
|
|
2183
|
-
const name = monthNames[parsed.getUTCMonth()] ?? '';
|
|
2184
|
-
return `${name} ${parsed.getUTCFullYear()}`.trim();
|
|
2185
|
-
}
|
|
2186
|
-
/**
|
|
2187
|
-
* Return a quarter label for an ISO date — `Q<n> <YYYY>` (e.g. `Q2 2026`).
|
|
2188
|
-
* Used by `quarter-ahead` and `quarter-in-review` title generators.
|
|
2189
|
-
*
|
|
2190
|
-
* @param date - ISO date string
|
|
2191
|
-
* @returns Quarter label, or the input when parsing fails
|
|
2192
|
-
*/
|
|
2193
|
-
export function deriveQuarterLabel(date) {
|
|
2194
|
-
const parsed = parseIsoDate(date);
|
|
2195
|
-
if (!parsed)
|
|
2196
|
-
return date;
|
|
2197
|
-
const quarter = Math.floor(parsed.getUTCMonth() / 3) + 1;
|
|
2198
|
-
return `Q${quarter} ${parsed.getUTCFullYear()}`;
|
|
2199
|
-
}
|
|
2200
|
-
/**
|
|
2201
|
-
* Return a four-digit year label for an ISO date. Used by `year-ahead`
|
|
2202
|
-
* and `year-in-review` title generators.
|
|
2203
|
-
*
|
|
2204
|
-
* @param date - ISO date string
|
|
2205
|
-
* @returns Year label, or the input when parsing fails
|
|
2206
|
-
*/
|
|
2207
|
-
export function deriveYearLabel(date) {
|
|
2208
|
-
const parsed = parseIsoDate(date);
|
|
2209
|
-
if (!parsed)
|
|
2210
|
-
return date;
|
|
2211
|
-
return String(parsed.getUTCFullYear());
|
|
2212
|
-
}
|
|
2213
|
-
/**
|
|
2214
|
-
* EP-term constants — keep these in sync with
|
|
2215
|
-
* {@link analysis/methodologies/electoral-cycle-methodology.md}.
|
|
2216
|
-
* - EP10: 16 Jul 2024 → ~end of June 2029
|
|
2217
|
-
* - EP11: ~Jul 2029 → ~Jun 2034
|
|
2218
|
-
*/
|
|
2219
|
-
const EP10_START_YEAR = 2024;
|
|
2220
|
-
const EP10_END_YEAR = 2029;
|
|
2221
|
-
const EP11_END_YEAR = 2034;
|
|
2222
|
-
const EP_ELECTION_MONTH = 6; // June
|
|
2223
|
-
/**
|
|
2224
|
-
* Return the EP-term label for an ISO date — `EP10 → 2029` or `EP11 → 2034`.
|
|
2225
|
-
* Used by `term-outlook` title generator.
|
|
2226
|
-
*
|
|
2227
|
-
* @param date - ISO date string
|
|
2228
|
-
* @returns Term label, or the input when parsing fails
|
|
2229
|
-
*/
|
|
2230
|
-
export function deriveTermLabel(date) {
|
|
2231
|
-
const parsed = parseIsoDate(date);
|
|
2232
|
-
if (!parsed)
|
|
2233
|
-
return date;
|
|
2234
|
-
const year = parsed.getUTCFullYear();
|
|
2235
|
-
const month = parsed.getUTCMonth() + 1;
|
|
2236
|
-
if (year < EP10_START_YEAR)
|
|
2237
|
-
return `EP9 → ${EP10_START_YEAR}`;
|
|
2238
|
-
if (year < EP10_END_YEAR || (year === EP10_END_YEAR && month <= EP_ELECTION_MONTH)) {
|
|
2239
|
-
return `EP10 → ${EP10_END_YEAR}`;
|
|
2240
|
-
}
|
|
2241
|
-
if (year < EP11_END_YEAR || (year === EP11_END_YEAR && month <= EP_ELECTION_MONTH)) {
|
|
2242
|
-
return `EP11 → ${EP11_END_YEAR}`;
|
|
2243
|
-
}
|
|
2244
|
-
const yearsBeyond = year - EP11_END_YEAR;
|
|
2245
|
-
const offset = month <= EP_ELECTION_MONTH ? 0 : 1;
|
|
2246
|
-
const termsBeyond = Math.floor((yearsBeyond - 1 + offset) / 5) + 1;
|
|
2247
|
-
const termIndex = 11 + termsBeyond;
|
|
2248
|
-
const termEnd = EP11_END_YEAR + 5 * termsBeyond;
|
|
2249
|
-
return `EP${termIndex} → ${termEnd}`;
|
|
2250
|
-
}
|
|
2251
|
-
/**
|
|
2252
|
-
* Return the election-cycle label for an ISO date — pairs the outgoing
|
|
2253
|
-
* and incoming EP terms with the election year (e.g. `EP10 → EP11 (2029)`).
|
|
2254
|
-
* Used by the `election-cycle` title generator.
|
|
2255
|
-
*
|
|
2256
|
-
* @param date - ISO date string
|
|
2257
|
-
* @returns Cycle label, or the input when parsing fails
|
|
2258
|
-
*/
|
|
2259
|
-
export function deriveElectionCycleLabel(date) {
|
|
2260
|
-
const parsed = parseIsoDate(date);
|
|
2261
|
-
if (!parsed)
|
|
2262
|
-
return date;
|
|
2263
|
-
const year = parsed.getUTCFullYear();
|
|
2264
|
-
if (year <= EP10_END_YEAR)
|
|
2265
|
-
return `EP10 → EP11 (${EP10_END_YEAR})`;
|
|
2266
|
-
if (year <= EP11_END_YEAR)
|
|
2267
|
-
return `EP11 → EP12 (${EP11_END_YEAR})`;
|
|
2268
|
-
const cyclesBeyond = Math.ceil((year - EP11_END_YEAR) / 5);
|
|
2269
|
-
const electionYear = EP11_END_YEAR + 5 * cyclesBeyond;
|
|
2270
|
-
const out = 11 + cyclesBeyond;
|
|
2271
|
-
return `EP${out} → EP${out + 1} (${electionYear})`;
|
|
2272
|
-
}
|
|
2273
|
-
/**
|
|
2274
|
-
* Parse an ISO date string as UTC midnight. Returns `null` for malformed
|
|
2275
|
-
* input so callers can skip month/week derivation gracefully.
|
|
2276
|
-
*
|
|
2277
|
-
* @param iso - ISO date string
|
|
2278
|
-
* @returns Parsed `Date` or `null`
|
|
2279
|
-
*/
|
|
2280
|
-
function parseIsoDate(iso) {
|
|
2281
|
-
if (!/^\d{4}-\d{2}-\d{2}$/.test(iso))
|
|
2282
|
-
return null;
|
|
2283
|
-
const parsed = new Date(`${iso}T00:00:00Z`);
|
|
2284
|
-
return Number.isNaN(parsed.getTime()) ? null : parsed;
|
|
2285
|
-
}
|
|
2286
|
-
/**
|
|
2287
|
-
* Format a `Date` as `YYYY-MM-DD` in UTC.
|
|
2288
|
-
*
|
|
2289
|
-
* @param d - Date object
|
|
2290
|
-
* @returns ISO date string
|
|
2291
|
-
*/
|
|
2292
|
-
function formatIsoDate(d) {
|
|
2293
|
-
const y = d.getUTCFullYear();
|
|
2294
|
-
const m = String(d.getUTCMonth() + 1).padStart(2, '0');
|
|
2295
|
-
const day = String(d.getUTCDate()).padStart(2, '0');
|
|
2296
|
-
return `${y}-${m}-${day}`;
|
|
2297
|
-
}
|
|
1693
|
+
// Date-label helpers — extracted into the `metadata/` bounded context as
|
|
1694
|
+
// pure leaf-module functions. Re-exported here for back-compat with
|
|
1695
|
+
// existing call sites; new code should import directly from
|
|
1696
|
+
// `./metadata/date-labels.js`.
|
|
1697
|
+
import { deriveWeekRange, deriveReportingWindowForWeekInReview, deriveMonthLabel, deriveQuarterLabel, deriveYearLabel, deriveTermLabel, deriveElectionCycleLabel, } from './metadata/date-labels.js';
|
|
1698
|
+
export { deriveWeekRange, deriveReportingWindowForWeekInReview, deriveMonthLabel, deriveQuarterLabel, deriveYearLabel, deriveTermLabel, deriveElectionCycleLabel, } from './metadata/date-labels.js';
|
|
2298
1699
|
/**
|
|
2299
1700
|
* Extract a manifest override value for a single language. Accepts either
|
|
2300
1701
|
* a plain string (applied to every language) or a `LanguageMap` object.
|
|
@@ -2334,6 +1735,7 @@ function resolveEditorialContent(opts) {
|
|
|
2334
1735
|
return {
|
|
2335
1736
|
headline: highlight.headline,
|
|
2336
1737
|
summary: highlight.summary,
|
|
1738
|
+
extendedSummary: extractExtendedLedeAfterHeading(markdown),
|
|
2337
1739
|
};
|
|
2338
1740
|
}
|
|
2339
1741
|
if (highlight?.summary) {
|
|
@@ -2342,10 +1744,12 @@ function resolveEditorialContent(opts) {
|
|
|
2342
1744
|
}
|
|
2343
1745
|
const aggregatedH1 = extractFirstH1(markdown);
|
|
2344
1746
|
const aggregatedSummary = extractStrongProseLine(markdown);
|
|
1747
|
+
const aggregatedExtended = extractExtendedLedeAfterHeading(markdown);
|
|
2345
1748
|
if (aggregatedH1 && !isGenericHeading(aggregatedH1, articleType, date)) {
|
|
2346
1749
|
return {
|
|
2347
1750
|
headline: truncateTitle(aggregatedH1),
|
|
2348
1751
|
summary: artefactSummary || aggregatedSummary,
|
|
1752
|
+
extendedSummary: aggregatedExtended,
|
|
2349
1753
|
};
|
|
2350
1754
|
}
|
|
2351
1755
|
const summary = artefactSummary || aggregatedSummary;
|
|
@@ -2357,9 +1761,13 @@ function resolveEditorialContent(opts) {
|
|
|
2357
1761
|
// to clause-boundary truncation downstream when the sentence
|
|
2358
1762
|
// itself overruns TITLE_MAX_LENGTH.
|
|
2359
1763
|
const firstSentence = extractFirstSentence(summary);
|
|
2360
|
-
return {
|
|
1764
|
+
return {
|
|
1765
|
+
headline: truncateTitle(firstSentence),
|
|
1766
|
+
summary,
|
|
1767
|
+
extendedSummary: aggregatedExtended,
|
|
1768
|
+
};
|
|
2361
1769
|
}
|
|
2362
|
-
return { headline: '', summary: '' };
|
|
1770
|
+
return { headline: '', summary: '', extendedSummary: '' };
|
|
2363
1771
|
}
|
|
2364
1772
|
/**
|
|
2365
1773
|
* Pick the per-language SEO title from the resolved editorial pair and
|
|
@@ -2603,10 +2011,23 @@ function resolveOneLanguage(input) {
|
|
|
2603
2011
|
: composeContextualDescription(input.lang, rawDescription, editorial, input.date, input.runId);
|
|
2604
2012
|
const truncatedTitle = truncateTitle(title);
|
|
2605
2013
|
const truncatedDescription = truncateDescription(description);
|
|
2014
|
+
// The extended description tracks the same source as the short
|
|
2015
|
+
// description: when a manifest description overrides, use it
|
|
2016
|
+
// verbatim (no point synthesising an extended form from the brief
|
|
2017
|
+
// when the editor explicitly chose the manifest copy); otherwise
|
|
2018
|
+
// use the editorial extended summary lifted from the brief BLUF.
|
|
2019
|
+
// `truncateExtendedDescription` returns `''` when the candidate
|
|
2020
|
+
// wouldn't be longer than the regular meta description, so callers
|
|
2021
|
+
// can fall back to {@link description} via a simple `||`.
|
|
2022
|
+
const extendedSource = manifestDescription
|
|
2023
|
+
? manifestDescription
|
|
2024
|
+
: editorial.extendedSummary || rawDescription;
|
|
2025
|
+
const truncatedExtendedDescription = truncateExtendedDescription(extendedSource);
|
|
2606
2026
|
const source = manifestTitle || manifestDescription ? 'manifest' : perLanguage.source;
|
|
2607
2027
|
return {
|
|
2608
2028
|
title: truncatedTitle,
|
|
2609
2029
|
description: truncatedDescription,
|
|
2030
|
+
extendedDescription: truncatedExtendedDescription,
|
|
2610
2031
|
keywords: buildSeoKeywords(input.lang, input.articleType, input.date, input.runId, truncatedTitle, truncatedDescription),
|
|
2611
2032
|
source,
|
|
2612
2033
|
};
|
|
@@ -2641,6 +2062,7 @@ function resolvePerLanguageEditorial(input) {
|
|
|
2641
2062
|
editorial: {
|
|
2642
2063
|
headline: localized.headline,
|
|
2643
2064
|
summary: localized.summary,
|
|
2065
|
+
extendedSummary: localized.extendedSummary,
|
|
2644
2066
|
},
|
|
2645
2067
|
source: 'localized-brief',
|
|
2646
2068
|
};
|
|
@@ -2656,7 +2078,7 @@ function resolvePerLanguageEditorial(input) {
|
|
|
2656
2078
|
// Nothing editorial at all → caller will fall back to the localized
|
|
2657
2079
|
// template.
|
|
2658
2080
|
return {
|
|
2659
|
-
editorial: { headline: '', summary: '' },
|
|
2081
|
+
editorial: { headline: '', summary: '', extendedSummary: '' },
|
|
2660
2082
|
source: 'template',
|
|
2661
2083
|
};
|
|
2662
2084
|
}
|