euparliamentmonitor 0.9.12 → 0.9.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/package.json +10 -5
  2. package/scripts/aggregator/analysis-aggregator.js +7 -2
  3. package/scripts/aggregator/article-generator.js +3 -1
  4. package/scripts/aggregator/article-html.d.ts +9 -0
  5. package/scripts/aggregator/article-html.js +134 -13
  6. package/scripts/aggregator/article-metadata.d.ts +86 -150
  7. package/scripts/aggregator/article-metadata.js +1171 -574
  8. package/scripts/aggregator/editorial-brief-resolver.d.ts +76 -0
  9. package/scripts/aggregator/editorial-brief-resolver.js +220 -0
  10. package/scripts/aggregator/metadata/date-labels.d.ts +122 -0
  11. package/scripts/aggregator/metadata/date-labels.js +209 -0
  12. package/scripts/aggregator/metadata/text-utils.d.ts +188 -0
  13. package/scripts/aggregator/metadata/text-utils.js +542 -0
  14. package/scripts/constants/og-locales.d.ts +15 -0
  15. package/scripts/constants/og-locales.js +17 -0
  16. package/scripts/constants/seo/index.d.ts +21 -0
  17. package/scripts/constants/seo/index.js +23 -0
  18. package/scripts/constants/seo/og-locales.d.ts +59 -0
  19. package/scripts/constants/seo/og-locales.js +59 -0
  20. package/scripts/constants/seo/social-handles.d.ts +50 -0
  21. package/scripts/constants/seo/social-handles.js +65 -0
  22. package/scripts/constants/social-handles.d.ts +11 -0
  23. package/scripts/constants/social-handles.js +13 -0
  24. package/scripts/discover-untranslated-briefs.js +534 -0
  25. package/scripts/generators/news-indexes.d.ts +63 -0
  26. package/scripts/generators/news-indexes.js +177 -26
  27. package/scripts/generators/political-intelligence/html.js +14 -6
  28. package/scripts/generators/seo-copy.js +42 -0
  29. package/scripts/generators/sitemap/html.js +13 -5
  30. package/scripts/lint-src-todos.js +124 -0
  31. package/scripts/templates/sync-template-frontmatter.js +4 -4
  32. package/scripts/utils/copy-test-reports.js +1 -1
  33. package/scripts/utils/generate-docs-index.js +1 -1
  34. package/scripts/validate-brief-translations.js +657 -0
  35. package/scripts/validate-manifest-seo.js +581 -0
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "euparliamentmonitor",
3
- "version": "0.9.12",
3
+ "version": "0.9.14",
4
4
  "type": "module",
5
5
  "description": "European Parliament Intelligence Platform - Monitor political activity with systematic transparency",
6
6
  "main": "scripts/index.js",
@@ -63,6 +63,9 @@
63
63
  "build:check-tests": "tsc --project tsconfig.test.json --noEmit",
64
64
  "copy-vendor": "node scripts/copy-vendor.js",
65
65
  "validate-analysis": "node scripts/validate-analysis-completeness.js",
66
+ "discover:untranslated-briefs": "node scripts/discover-untranslated-briefs.js",
67
+ "validate:translations": "node scripts/validate-brief-translations.js",
68
+ "validate:manifest-seo": "node scripts/validate-manifest-seo.js",
66
69
  "sync:templates": "node scripts/templates/sync-template-frontmatter.js",
67
70
  "sync:templates:check": "node scripts/templates/sync-template-frontmatter.js --check",
68
71
  "prior-run-diff": "node scripts/aggregator/prior-run-diff.js",
@@ -76,6 +79,7 @@
76
79
  "validate-ep-api": "npx tsx src/utils/validate-ep-api.ts",
77
80
  "mcp:probe": "npx tsx src/utils/mcp-probe.ts",
78
81
  "lint:prompts": "node scripts/lint-prompts.js",
82
+ "lint:src-todos": "node scripts/lint-src-todos.js",
79
83
  "htmlhint": "sh -c 'htmlhint *.html; set -- news/*.html; if [ -e \"$1\" ]; then htmlhint \"$@\"; else echo \"No news/*.html files to lint\"; fi'",
80
84
  "serve": "python3 -m http.server 8080",
81
85
  "test": "vitest run",
@@ -89,7 +93,7 @@
89
93
  "test:e2e:headed": "playwright test --headed",
90
94
  "test:e2e:debug": "playwright test --debug",
91
95
  "test:e2e:report": "playwright show-report",
92
- "lint": "eslint src/",
96
+ "lint": "eslint src/ && node scripts/lint-src-todos.js",
93
97
  "lint:fix": "eslint src/ --fix",
94
98
  "knip": "knip",
95
99
  "knip:production": "knip --production",
@@ -165,13 +169,14 @@
165
169
  "eslint-plugin-jsdoc": "62.9.0",
166
170
  "eslint-plugin-security": "4.0.0",
167
171
  "eslint-plugin-sonarjs": "4.0.3",
172
+ "fast-check": "^4.8.0",
168
173
  "happy-dom": "20.9.0",
169
174
  "html-minifier-terser": "^7.2.0",
170
175
  "htmlhint": "1.9.2",
171
176
  "husky": "9.1.7",
172
- "jscpd": "4.2.1",
177
+ "jscpd": "4.2.3",
173
178
  "knip": "^6.7.0",
174
- "lint-staged": "17.0.4",
179
+ "lint-staged": "17.0.5",
175
180
  "mermaid": "11.15.0",
176
181
  "papaparse": "5.5.3",
177
182
  "prettier": "3.8.3",
@@ -179,7 +184,7 @@
179
184
  "sharp": "^0.34.5",
180
185
  "terser": "^5.47.1",
181
186
  "ts-api-utils": "2.5.0",
182
- "tsx": "4.22.0",
187
+ "tsx": "4.22.1",
183
188
  "typedoc": "0.28.19",
184
189
  "typescript": "6.0.3",
185
190
  "vitest": "4.1.6"
@@ -18,6 +18,10 @@ import { buildKeyTakeaways, KEY_TAKEAWAYS_SECTION_ID, KEY_TAKEAWAYS_SECTION_TITL
18
18
  import { flattenManifestFiles as _flattenManifestFiles, latestGateResult as _latestGateResult, resolveArticleType as _resolveArticleType, resolveRunId as _resolveRunId, } from './manifest/index.js';
19
19
  import { READER_GUIDE_SECTION_ID, READER_GUIDE_SECTION_IDS, READER_GUIDE_SECTION_TITLE, } from './reader-guide-constants.js';
20
20
  export { READER_GUIDE_SECTION_ID, READER_GUIDE_SECTION_IDS, READER_GUIDE_SECTION_TITLE, } from './reader-guide-constants.js';
21
+ const TRADECRAFT_EXCLUDED_FILES = new Set([
22
+ 'analysis/methodologies/executive-brief-translation-guide.md',
23
+ 'analysis/templates/executive-brief-translation-template.md',
24
+ ]);
21
25
  /**
22
26
  * Normalise `manifest.files` into a flat list of `runRelPath` strings.
23
27
  *
@@ -94,8 +98,9 @@ export function discoverTradecraftFiles(repoRoot) {
94
98
  continue;
95
99
  const entries = fs.readdirSync(dir, { withFileTypes: true });
96
100
  for (const entry of entries) {
97
- if (entry.isFile() && entry.name.endsWith('.md')) {
98
- result.push(`${sub}/${entry.name}`);
101
+ const rel = `${sub}/${entry.name}`;
102
+ if (entry.isFile() && entry.name.endsWith('.md') && !TRADECRAFT_EXCLUDED_FILES.has(rel)) {
103
+ result.push(rel);
99
104
  }
100
105
  }
101
106
  }
@@ -366,6 +366,7 @@ function writeLanguageVariant(lang, slug, aggregated, englishHtml, chromeOptions
366
366
  body: bodyHtml,
367
367
  title: entry.title,
368
368
  description: perLangDescription,
369
+ extendedDescription: entry.extendedDescription,
369
370
  keywords: entry.keywords,
370
371
  date: aggregated.date,
371
372
  articleType: aggregated.articleType,
@@ -444,7 +445,7 @@ function getMetadataEntry(map, lang) {
444
445
  return descriptor.value;
445
446
  }
446
447
  const en = Object.getOwnPropertyDescriptor(map, 'en')?.value;
447
- return en ?? { title: '', description: '', keywords: [] };
448
+ return (en ?? { title: '', description: '', extendedDescription: '', keywords: [], source: 'template' });
448
449
  }
449
450
  /**
450
451
  * Count the number of articles the site currently publishes, derived
@@ -699,6 +700,7 @@ function applyCliOverrides(base, titleOverride, descriptionOverride) {
699
700
  title: titleOverride ?? entry.title,
700
701
  description: descriptionOverride ?? entry.description,
701
702
  keywords: entry.keywords,
703
+ source: titleOverride || descriptionOverride ? 'manifest' : entry.source,
702
704
  },
703
705
  enumerable: true,
704
706
  writable: true,
@@ -21,6 +21,15 @@ export interface WrapArticleOptions {
21
21
  readonly title: string;
22
22
  /** Article description — shown in `<meta name="description">` and OG. */
23
23
  readonly description: string;
24
+ /**
25
+ * Optional: longer (up to ~300 chars) editorial summary lifted from
26
+ * the language-specific executive brief BLUF. When provided, used
27
+ * for `og:description` and `twitter:description`; falls back to
28
+ * `description` when absent. Lets social-card previews show the
29
+ * full BLUF paragraph while the short `<meta description>` stays
30
+ * within Google's ~160-char snippet budget.
31
+ */
32
+ readonly extendedDescription?: string;
24
33
  /** SEO keywords — shown in `<meta name="keywords">`. */
25
34
  readonly keywords?: readonly string[];
26
35
  /** Canonical ISO date of the run (YYYY-MM-DD). */
@@ -21,8 +21,11 @@
21
21
  import { BASE_URL, BUILD_SHORT, MERMAID_VERSION } from '../constants/config.js';
22
22
  import { buildHeadFreshnessTags } from '../constants/build-info-meta.js';
23
23
  import { ALL_LANGUAGES, LANGUAGE_NAMES, LANGUAGE_FLAGS, PAGE_TITLES, SKIP_LINK_TEXTS, TOC_ARIA_LABELS, ARTICLE_TYPE_LABELS, BACK_TO_NEWS_LABELS, ARTICLE_NAV_LABELS, VIEW_SOURCE_MARKDOWN_LABELS, ARTICLE_TYPE_ICONS, FOOTER_SITEMAP_LABELS, FOOTER_POLITICAL_INTELLIGENCE_LABELS, TRADECRAFT_HEADING_LABELS, TRADECRAFT_INTRO_LABELS, TRADECRAFT_METHODOLOGIES_LABELS, TRADECRAFT_TEMPLATES_LABELS, ANALYSIS_INDEX_HEADING_LABELS, ANALYSIS_INDEX_INTRO_LABELS, ANALYSIS_INDEX_COL_SECTION_LABELS, ANALYSIS_INDEX_COL_ARTIFACT_LABELS, ANALYSIS_INDEX_COL_PATH_LABELS, KEY_TAKEAWAYS_HEADING_LABELS, SUPPLEMENTARY_HEADING_LABELS, SECTION_TITLE_LABELS, getLocalizedString, getTextDirection, } from '../constants/languages.js';
24
+ import { buildOgLocaleTags } from '../constants/og-locales.js';
25
+ import { ORG_SAME_AS, buildTwitterAttributionTags } from '../constants/social-handles.js';
24
26
  import { ArticleCategory } from '../types/index.js';
25
27
  import { escapeHTML } from '../utils/file-utils.js';
28
+ import { stripHtmlTags } from '../utils/html-sanitize.js';
26
29
  import { buildResponsiveIconLinks, buildResponsiveSocialImageMeta, buildSiteFooter, buildSiteHeader, buildPageBanner, } from '../templates/section-builders.js';
27
30
  import { READER_GUIDE_SECTION_ID } from './reader-guide-constants.js';
28
31
  import { READER_GUIDE_TITLE_LABELS, getReaderGuideSectionIcon, } from './reader-intelligence-guide.js';
@@ -32,6 +35,63 @@ import { KEY_TAKEAWAYS_SECTION_ID } from './key-takeaways.js';
32
35
  import { getPoliticalIntelligenceFilename } from '../generators/political-intelligence.js';
33
36
  import { getSitemapFilename } from '../generators/sitemap/index.js';
34
37
  import { getCuratedTitle, getCuratedDescription, getArtifactInfo, } from '../generators/political-intelligence-descriptions.js';
38
+ /**
39
+ * Resolve a localized article type label *without* the leading icon
40
+ * emoji. Used for the OpenGraph `article:section` meta and the JSON-LD
41
+ * `articleSection` field, where emoji break Google's NewsArticle
42
+ * structured-data validator.
43
+ *
44
+ * @param slug - Raw article type slug (e.g. "motions", "week-ahead")
45
+ * @param lang - Target language code
46
+ * @returns Localized label without icon (e.g. "Plenary Votes & Resolutions")
47
+ */
48
+ function getLocalizedArticleTypePlain(slug, lang) {
49
+ const labels = getLocalizedString(ARTICLE_TYPE_LABELS, lang);
50
+ return labels[slug] ?? slug.replace(/-/g, ' ');
51
+ }
52
+ /**
53
+ * Google's NewsArticle structured-data validator hard-caps the
54
+ * `headline` field at 110 characters. Page `<title>` can be longer
55
+ * (we already truncate to a higher limit in
56
+ * `article-metadata.ts::truncateTitle`), but the JSON-LD headline
57
+ * needs its own, tighter cap or the article loses Top Stories
58
+ * carousel eligibility.
59
+ *
60
+ * Truncation prefers the last sentence boundary or em-dash within
61
+ * the 110-char window so we don't slice through a noun phrase.
62
+ *
63
+ * @param title - Resolved article title (already escaped-safe text)
64
+ * @returns Headline ≤ 110 characters, suitable for `NewsArticle.headline`
65
+ */
66
+ const HEADLINE_LIMIT = 110;
67
+ function truncateHeadline(title) {
68
+ const trimmed = title.trim();
69
+ if (trimmed.length <= HEADLINE_LIMIT)
70
+ return trimmed;
71
+ // Prefer the last em-dash, en-dash, colon, or sentence boundary
72
+ // before the limit so the truncated headline still reads as a
73
+ // self-contained phrase.
74
+ const window = trimmed.slice(0, HEADLINE_LIMIT);
75
+ const breakIdx = Math.max(window.lastIndexOf(' — '), window.lastIndexOf(' – '), window.lastIndexOf(': '), window.lastIndexOf('. '), window.lastIndexOf(' '));
76
+ return breakIdx > 60 ? window.slice(0, breakIdx).trimEnd() : window.trimEnd();
77
+ }
78
+ /**
79
+ * Build the localized `<title>` separator for the
80
+ * `{articleTitle} {sep} {siteTitle}` pattern. LTR locales use the
81
+ * right-pointing guillemet (»); RTL locales (Arabic, Hebrew) use the
82
+ * left-pointing guillemet («) so the visual hierarchy reads from the
83
+ * primary title towards the site name without breaking bidi flow.
84
+ *
85
+ * The previous em-dash separator collided with em-dashes inside
86
+ * article titles (the editorial style uses `Title — Subtitle`) and
87
+ * rendered ambiguously in screen readers.
88
+ *
89
+ * @param lang - Target language code
90
+ * @returns `" » "` for LTR locales, `" « "` for RTL
91
+ */
92
+ function getTitleSeparator(lang) {
93
+ return getTextDirection(lang) === 'rtl' ? ' « ' : ' » ';
94
+ }
35
95
  /**
36
96
  * Resolve a localized article type label with icon. Falls back to the
37
97
  * humanised slug when a translation isn't available.
@@ -879,24 +939,69 @@ export function wrapArticleHtml(options) {
879
939
  : '';
880
940
  const tocHtml = buildArticleToc(options.toc ?? [], safeLang);
881
941
  const articleMainClass = tocHtml.length > 0 ? 'article-main--with-toc' : 'article-main--no-toc';
942
+ const articleSectionLabel = getLocalizedArticleTypePlain(options.articleType, safeLang);
943
+ // Count words from the rendered body for the JSON-LD `wordCount`
944
+ // field (Google's NewsArticle structured-data validator emits a
945
+ // warning when this is missing). Done by stripping HTML tags from
946
+ // the rendered body then splitting on whitespace — fast and
947
+ // CodeQL-safe.
948
+ const bodyText = stripHtmlTags(options.body);
949
+ const wordCount = bodyText.split(/\s+/u).filter((w) => w.length > 0).length;
950
+ // Build the JSON-LD image graph. Google requires NewsArticle.image
951
+ // to be an array (or single ImageObject) with explicit width/height
952
+ // covering at least one of the 1:1, 4:3, 16:9 aspect ratios for
953
+ // Top Stories carousel eligibility.
954
+ const jsonLdImages = [
955
+ {
956
+ '@type': 'ImageObject',
957
+ url: `${BASE_URL}/images/og-image-1200.jpg`,
958
+ width: 1200,
959
+ height: 630,
960
+ },
961
+ {
962
+ '@type': 'ImageObject',
963
+ url: `${BASE_URL}/images/og-image-1200.webp`,
964
+ width: 1200,
965
+ height: 630,
966
+ },
967
+ {
968
+ '@type': 'ImageObject',
969
+ url: `${BASE_URL}/images/og-image-1200.avif`,
970
+ width: 1200,
971
+ height: 630,
972
+ },
973
+ ];
882
974
  const jsonLd = {
883
975
  '@context': 'https://schema.org',
884
976
  '@type': 'NewsArticle',
885
- headline: options.title,
977
+ headline: truncateHeadline(options.title),
886
978
  description: options.description,
887
979
  datePublished: options.date,
888
980
  dateModified: options.date,
889
981
  inLanguage: safeLang,
890
982
  url: canonicalUrl,
891
- image: `${BASE_URL}/images/og-image-1200.jpg`,
892
- author: { '@type': 'Organization', name: PUBLISHER_NAME, url: 'https://hack23.com' },
983
+ mainEntityOfPage: { '@type': 'WebPage', '@id': canonicalUrl },
984
+ image: jsonLdImages,
985
+ author: {
986
+ '@type': 'NewsMediaOrganization',
987
+ name: PUBLISHER_NAME,
988
+ url: 'https://hack23.com',
989
+ sameAs: [...ORG_SAME_AS],
990
+ },
893
991
  publisher: {
894
- '@type': 'Organization',
992
+ '@type': 'NewsMediaOrganization',
895
993
  name: PUBLISHER_NAME,
896
994
  url: 'https://hack23.com',
897
995
  logo: { '@type': 'ImageObject', url: `${BASE_URL}/images/apple-touch-icon.png` },
996
+ sameAs: [...ORG_SAME_AS],
997
+ },
998
+ articleSection: articleSectionLabel,
999
+ wordCount,
1000
+ keywords: (options.keywords ?? []).join(', '),
1001
+ speakable: {
1002
+ '@type': 'SpeakableSpecification',
1003
+ cssSelector: ['.article-dek', '.article-body > p:first-of-type'],
898
1004
  },
899
- articleSection: options.articleType,
900
1005
  isPartOf: {
901
1006
  '@type': 'WebSite',
902
1007
  name: SITE_NAME,
@@ -921,7 +1026,7 @@ export function wrapArticleHtml(options) {
921
1026
  {
922
1027
  '@type': 'ListItem',
923
1028
  position: 2,
924
- name: options.articleType.replace(/-/g, ' '),
1029
+ name: articleSectionLabel,
925
1030
  item: `${BASE_URL}/news/`,
926
1031
  },
927
1032
  {
@@ -934,11 +1039,21 @@ export function wrapArticleHtml(options) {
934
1039
  };
935
1040
  const structuredData = [jsonLd, breadcrumbLd];
936
1041
  const jsonLdString = JSON.stringify(structuredData).replace(/</g, '\\u003c');
937
- const pageTitle = `${options.title}${siteTitle}`;
1042
+ const pageTitle = `${options.title}${getTitleSeparator(safeLang)}${siteTitle}`;
938
1043
  const keywords = (options.keywords ?? []).map((keyword) => keyword.trim()).filter(Boolean);
939
1044
  const keywordsMeta = keywords.length > 0
940
1045
  ? ` <meta name="keywords" content="${escapeHTML(keywords.join(', '))}">\n`
941
1046
  : '';
1047
+ // Use the longer extended description for og:description/twitter:description
1048
+ // when available so social-card previews show the full BLUF
1049
+ // paragraph; the short meta description stays within Google's
1050
+ // ~160-char snippet budget.
1051
+ const socialDescription = options.extendedDescription && options.extendedDescription.length > 0
1052
+ ? options.extendedDescription
1053
+ : options.description;
1054
+ const ogLocaleTags = buildOgLocaleTags(safeLang);
1055
+ const twitterAttribution = buildTwitterAttributionTags();
1056
+ const twitterAttributionBlock = twitterAttribution ? `\n${twitterAttribution}` : '';
942
1057
  const header = buildSiteHeader({
943
1058
  lang: safeLang,
944
1059
  pathPrefix: '../',
@@ -956,23 +1071,29 @@ export function wrapArticleHtml(options) {
956
1071
  <meta name="referrer" content="no-referrer">
957
1072
  <title>${escapeHTML(pageTitle)}</title>
958
1073
  <meta name="description" content="${escapeHTML(options.description)}">
959
- ${keywordsMeta} <meta name="robots" content="index, follow, max-image-preview:large">
1074
+ ${keywordsMeta} <meta name="robots" content="index, follow, max-snippet:-1, max-image-preview:large">
960
1075
  <meta name="author" content="${PUBLISHER_NAME}">
961
1076
  <meta name="publisher" content="${PUBLISHER_NAME}">
962
1077
  <meta name="date" content="${options.date}">
963
- <meta name="article:published_time" content="${options.date}">
1078
+ <meta property="article:published_time" content="${options.date}">
1079
+ <meta property="article:modified_time" content="${options.date}">
1080
+ <meta property="article:section" content="${escapeHTML(articleSectionLabel)}">
1081
+ <meta property="article:author" content="${PUBLISHER_NAME}">
1082
+ <meta property="article:publisher" content="https://hack23.com">
964
1083
  <link rel="canonical" href="${canonicalUrl}">
965
1084
  ${hreflangLinks}
1085
+ <link rel="alternate" type="application/rss+xml" title="EU Parliament Monitor RSS" href="${BASE_URL}/rss.xml">
1086
+ <link rel="preconnect" href="https://hack23.com" crossorigin>
966
1087
  <meta property="og:type" content="article">
967
1088
  <meta property="og:title" content="${escapeHTML(options.title)}">
968
- <meta property="og:description" content="${escapeHTML(options.description)}">
1089
+ <meta property="og:description" content="${escapeHTML(socialDescription)}">
969
1090
  <meta property="og:url" content="${canonicalUrl}">
970
1091
  <meta property="og:site_name" content="EU Parliament Monitor">
971
- <meta property="og:locale" content="${safeLang}">
972
- ${buildResponsiveSocialImageMeta(`${options.title}EU Parliament Monitor`)}
1092
+ ${ogLocaleTags}
1093
+ ${buildResponsiveSocialImageMeta(`${options.title}${getTitleSeparator(safeLang)}EU Parliament Monitor`)}
973
1094
  <meta name="twitter:card" content="summary_large_image">
974
1095
  <meta name="twitter:title" content="${escapeHTML(options.title)}">
975
- <meta name="twitter:description" content="${escapeHTML(options.description)}">
1096
+ <meta name="twitter:description" content="${escapeHTML(socialDescription)}">${twitterAttributionBlock}
976
1097
  ${buildResponsiveIconLinks('../')}
977
1098
  <link rel="manifest" href="../site.webmanifest">
978
1099
  <meta name="color-scheme" content="light dark">
@@ -3,7 +3,30 @@ import type { LangTitleSubtitle, LanguageCode, LanguageMap } from '../types/inde
3
3
  export interface ResolvedMetadataEntry {
4
4
  readonly title: string;
5
5
  readonly description: string;
6
+ /**
7
+ * Optional longer (up to ~300 chars) editorial summary lifted from
8
+ * the language-specific executive brief BLUF paragraph. Used for
9
+ * `og:description` and `twitter:description` so social-card previews
10
+ * can show the full Bottom-Line-Up-Front context, while the
11
+ * short `description` stays within Google's ~160-char snippet
12
+ * budget. Empty string when no longer summary is available — the
13
+ * caller should then fall back to {@link description}.
14
+ */
15
+ readonly extendedDescription: string;
6
16
  readonly keywords: readonly string[];
17
+ /**
18
+ * `"localized-brief"` when the title/description came from a translated
19
+ * `executive-brief_<lang>.md`; `"english-brief"` when the locale fell
20
+ * through to the English brief; `"english-editorial"` when the locale
21
+ * used an aggregated-Markdown / artefact source; `"template"` when only
22
+ * the localized type/date template was available. For `lang === 'en'`
23
+ * the value is always `"english-brief"` or `"english-editorial"` or
24
+ * `"template"` (no fall-through). Lets downstream consumers — the
25
+ * news-index, the static-site fallback note, the manifest-SEO
26
+ * validator — record the asymmetry called out in
27
+ * [`.github/prompts/04-article-generation.md`](../../.github/prompts/04-article-generation.md) § 6.2 priority 3.
28
+ */
29
+ readonly source: 'manifest' | 'localized-brief' | 'english-brief' | 'english-editorial' | 'template';
7
30
  }
8
31
  /** Fully resolved metadata — one entry per supported language. */
9
32
  export type ResolvedMetadata = LanguageMap<ResolvedMetadataEntry>;
@@ -50,69 +73,7 @@ export interface ResolveMetadataOptions {
50
73
  */
51
74
  readonly runDir?: string;
52
75
  }
53
- /**
54
- * Return `true` when a line cannot serve as a prose description. Rejects
55
- * Markdown structural lines (headings, blockquotes, tables, HTML),
56
- * mermaid/chart directives, emoji-banner metadata rows, and the known
57
- * `Key: value` banners that Stage-B agents emit as artefact preamble.
58
- *
59
- * @param line - Trimmed line from the aggregated Markdown source
60
- * @returns `true` when the line is not prose and should be skipped
61
- */
62
- export declare function shouldSkipDescriptionLine(line: string): boolean;
63
- /**
64
- * Strip inline Markdown decorations so we can use the remaining text as
65
- * plain-text meta-tag content. Removes link syntax, emphasis, inline code
66
- * backticks, and HTML-entity fragments that the Markdown source sometimes
67
- * smuggles in. Keeps the visible text readable.
68
- *
69
- * @param raw - Trimmed Markdown line
70
- * @returns Plain-text variant
71
- */
72
- /**
73
- * Strip a leading all-caps prose label (e.g. `SITUATION:`, `KEY MOTION:`,
74
- * `BLUF:`, `BOTTOM LINE:`, `TIER-1:`) from a prose line. These labels
75
- * are common in BLUF-style editorial writing — they survive
76
- * {@link stripInlineMarkdown} (which strips the `**bold**` wrapper but
77
- * keeps the literal text) and would otherwise leak into the SEO
78
- * description as a confusing all-caps shout.
79
- *
80
- * Matches up to 4 hyphenated all-caps tokens, optionally followed by a
81
- * digit suffix (`TIER-1`), terminating at a colon. Returns the original
82
- * line when no opener is present.
83
- *
84
- * @param line - Plain prose line (post-{@link stripInlineMarkdown})
85
- * @returns Line with the all-caps opener removed
86
- */
87
- export declare function stripLeadingProseLabel(line: string): string;
88
- /**
89
- * Strip inline Markdown decorations so we can use the remaining text as
90
- * plain-text meta-tag content. Removes link syntax, emphasis, inline code
91
- * backticks, and HTML-entity fragments that the Markdown source sometimes
92
- * smuggles in. Keeps the visible text readable.
93
- *
94
- * @param raw - Trimmed Markdown line
95
- * @returns Plain-text variant
96
- */
97
- export declare function stripInlineMarkdown(raw: string): string;
98
- /**
99
- * Clamp a string to `DESCRIPTION_MAX_LENGTH` characters, appending
100
- * an ellipsis when truncation actually happens. Does not break words if
101
- * avoidable — a trailing partial word is trimmed back to the previous
102
- * space first.
103
- *
104
- * @param text - Raw description text
105
- * @returns Truncated description with trailing ellipsis when clipped
106
- */
107
- export declare function truncateDescription(text: string): string;
108
- /**
109
- * Clamp a title to `TITLE_MAX_LENGTH` characters in the same
110
- * word-boundary-preserving fashion as {@link truncateDescription}.
111
- *
112
- * @param text - Raw title text
113
- * @returns Truncated title with trailing ellipsis when clipped
114
- */
115
- export declare function truncateTitle(text: string): string;
76
+ export { shouldSkipDescriptionLine, stripLeadingProseLabel, stripInlineMarkdown, truncateDescription, truncateExtendedDescription, truncateTitle, extractFirstSentence, } from './metadata/text-utils.js';
116
77
  /**
117
78
  * Return the first Markdown H1 (`# …`) in the supplied text, stripped of
118
79
  * the leading `#` and trailing anchor syntax. Returns an empty string when
@@ -123,30 +84,33 @@ export declare function truncateTitle(text: string): string;
123
84
  */
124
85
  export declare function extractFirstH1(markdown: string): string;
125
86
  /**
126
- * Walk every line of the Markdown source and return the first line that
127
- * survives {@link shouldSkipDescriptionLine}. Inline Markdown decorations
128
- * are stripped and the result is truncated to fit `<meta description>`.
87
+ * Walk every line of the Markdown source and return the first paragraph
88
+ * that survives {@link shouldSkipDescriptionLine}. Consecutive non-blank
89
+ * prose lines are joined with a single space so hard-wrapped ledes
90
+ * (column-95 conventional wrap) produce a clean 140-180-character
91
+ * description rather than just the first 60-90-char line.
92
+ *
93
+ * Inline Markdown decorations are stripped and the result is truncated
94
+ * to fit `<meta description>`.
129
95
  *
130
96
  * @param markdown - Markdown source
131
97
  * @returns Prose description, or empty string when nothing qualifies
132
98
  */
133
99
  export declare function extractStrongProseLine(markdown: string): string;
100
+ export declare function extractLedeAfterHeading(markdown: string): string;
134
101
  /**
135
- * Walk the body of an editorial artefact and, when it contains a `## …`
136
- * heading whose text matches one of `EDITORIAL_LEDE_HEADINGS`,
137
- * return the first prose paragraph that follows that heading. This is
138
- * the journalist's lede ("60-Second Read", "TL;DR", "BLUF — …", …) and
139
- * is exactly the sentence that should power `<meta description>` and
140
- * the OG/Twitter description fields.
141
- *
142
- * Returns the empty string when no lede heading is found or no qualifying
143
- * prose follows it. Inline Markdown is stripped and the result is
144
- * truncated to fit `<meta description>`.
102
+ * Same parsing rules as {@link extractLedeAfterHeading} but with a
103
+ * larger byte budget so the full BLUF paragraph (typically 200-300
104
+ * characters in the editorial style guide) is captured for use as
105
+ * `og:description` / `twitter:description`. Returns the joined
106
+ * paragraph clamped via {@link truncateExtendedDescription} (which
107
+ * returns `''` when the result wouldn't be longer than the regular
108
+ * meta description).
145
109
  *
146
- * @param markdown - Editorial artefact source
147
- * @returns Lede paragraph, or empty string when none matched
110
+ * @param markdown - Brief body (SPDX preamble already stripped)
111
+ * @returns Extended lede paragraph, or `''` when not worth emitting
148
112
  */
149
- export declare function extractLedeAfterHeading(markdown: string): string;
113
+ export declare function extractExtendedLedeAfterHeading(markdown: string): string;
150
114
  /**
151
115
  * Return `true` when an artefact-H1 begins with one of the
152
116
  * `ARTIFACT_CATEGORY_PREFIXES` followed by a separator. Such H1s
@@ -217,6 +181,48 @@ export declare function extractArtifactHighlight(runDir: string, articleType: st
217
181
  readonly headline: string;
218
182
  readonly summary: string;
219
183
  } | null;
184
+ /**
185
+ * Return `true` when a top-level `.md` filename looks like a translated
186
+ * sibling of a canonical editorial artefact (e.g.
187
+ * `executive-brief_ar.md`). These files must be excluded from the
188
+ * top-level fallback scan in {@link extractArtifactHighlight} because
189
+ * their localized H1s evade the English-only generic-heading detector
190
+ * and would otherwise hijack the English SEO surfaces.
191
+ *
192
+ * @param filename - Run-relative `.md` filename (no path separators)
193
+ * @returns `true` when the file is a translated sibling brief
194
+ */
195
+ export declare function isTranslatedSiblingBrief(filename: string): boolean;
196
+ /**
197
+ * Mine the FIRST named priority finding from an executive-brief–style
198
+ * artefact body. Looks for a section heading from
199
+ * {@link PRIORITY_FINDING_SECTION_HEADINGS} and returns the first dossier
200
+ * name + descriptive paragraph found inside it. Supports the three
201
+ * canonical Stage-B authoring patterns:
202
+ *
203
+ * 1. **Bold-in-numbered-list** (breaking briefs):
204
+ * `1. **Digital Markets Act Enforcement** (TA-10-2026-0160, 2026-04-30)`
205
+ * ` Parliament adopted a resolution …`
206
+ * 2. **Numbered subheading** (committee briefs):
207
+ * `### 1. Clean Industrial Deal Implementation (ITRE/ENVI)`
208
+ * `The Clean Industrial Deal framework …`
209
+ * 3. **Bold-leading paragraph** (synthesis variants):
210
+ * `**Trigger 1: DMA Enforcement Resolution** (TA-10-2026-0160)`
211
+ * `- Significance: 🟢 HIGH IMPACT …`
212
+ *
213
+ * Trailing parenthesised metadata (`(TA-10-2026-0160, 2026-04-30)`,
214
+ * `(ITRE/ENVI)`) is stripped from the headline so it stays headline-shaped
215
+ * (`Digital Markets Act Enforcement`) rather than boilerplate
216
+ * (`Digital Markets Act Enforcement (TA-10-2026-0160, 2026-04-30)`).
217
+ *
218
+ * @param body - Editorial artefact body
219
+ * @returns `{headline, summary}` when a priority finding was identified;
220
+ * `null` when the body has no priority section or no usable item inside
221
+ */
222
+ export declare function extractPriorityFindingHighlight(body: string): {
223
+ readonly headline: string;
224
+ readonly summary: string;
225
+ } | null;
220
226
  /**
221
227
  * Build the per-language `{title, description}` pair using the
222
228
  * article-type–specific `*_TITLES` generator from
@@ -230,77 +236,7 @@ export declare function extractArtifactHighlight(runDir: string, articleType: st
230
236
  * @returns Per-language `LangTitleSubtitle`
231
237
  */
232
238
  export declare function buildTemplateFallback(articleType: string, date: string, committee?: string): LanguageMap<LangTitleSubtitle>;
233
- /**
234
- * Parse an ISO date and return the `[start, end]` week range as ISO
235
- * strings. Week starts on Monday and ends on the following Sunday.
236
- *
237
- * @param date - ISO date string (`YYYY-MM-DD`)
238
- * @returns `{ start, end }` both in `YYYY-MM-DD` form
239
- */
240
- export declare function deriveWeekRange(date: string): {
241
- readonly start: string;
242
- readonly end: string;
243
- };
244
- /**
245
- * Return the D-36 → D-8 reporting window for the `week-in-review`
246
- * article type. EP roll-call voting data is published with a 2–6 week
247
- * lag, so using the most-recent 7 days structurally produces a
248
- * vote-empty dataset. Shifting 8 days back and widening to 28 days
249
- * (start = D-36, end = D-8) ensures the window always contains at
250
- * least one full EP plenary week with published roll-call data
251
- * (ADR-006). Direction is consistent with the workflow's
252
- * `DATE_FROM` (start = D-36) → `DATE_TO` (end = D-8) variables.
253
- *
254
- * @param date - ISO article date string (`YYYY-MM-DD`) — typically TODAY
255
- * @returns `{ start: D-36, end: D-8 }` both as `YYYY-MM-DD` ISO strings
256
- */
257
- export declare function deriveReportingWindowForWeekInReview(date: string): {
258
- readonly start: string;
259
- readonly end: string;
260
- };
261
- /**
262
- * Return a human-friendly month label for an ISO date — English month
263
- * name + four-digit year (e.g. `April 2026`). The non-English template
264
- * generators accept this same label verbatim because they interpolate it
265
- * into a localized sentence rather than translating the month itself.
266
- *
267
- * @param date - ISO date string
268
- * @returns Month label, or the input when parsing fails
269
- */
270
- export declare function deriveMonthLabel(date: string): string;
271
- /**
272
- * Return a quarter label for an ISO date — `Q<n> <YYYY>` (e.g. `Q2 2026`).
273
- * Used by `quarter-ahead` and `quarter-in-review` title generators.
274
- *
275
- * @param date - ISO date string
276
- * @returns Quarter label, or the input when parsing fails
277
- */
278
- export declare function deriveQuarterLabel(date: string): string;
279
- /**
280
- * Return a four-digit year label for an ISO date. Used by `year-ahead`
281
- * and `year-in-review` title generators.
282
- *
283
- * @param date - ISO date string
284
- * @returns Year label, or the input when parsing fails
285
- */
286
- export declare function deriveYearLabel(date: string): string;
287
- /**
288
- * Return the EP-term label for an ISO date — `EP10 → 2029` or `EP11 → 2034`.
289
- * Used by `term-outlook` title generator.
290
- *
291
- * @param date - ISO date string
292
- * @returns Term label, or the input when parsing fails
293
- */
294
- export declare function deriveTermLabel(date: string): string;
295
- /**
296
- * Return the election-cycle label for an ISO date — pairs the outgoing
297
- * and incoming EP terms with the election year (e.g. `EP10 → EP11 (2029)`).
298
- * Used by the `election-cycle` title generator.
299
- *
300
- * @param date - ISO date string
301
- * @returns Cycle label, or the input when parsing fails
302
- */
303
- export declare function deriveElectionCycleLabel(date: string): string;
239
+ export { deriveWeekRange, deriveReportingWindowForWeekInReview, deriveMonthLabel, deriveQuarterLabel, deriveYearLabel, deriveTermLabel, deriveElectionCycleLabel, } from './metadata/date-labels.js';
304
240
  /**
305
241
  * Build a stable, localized keyword list from the article type plus the
306
242
  * resolved title/description context.