euparliamentmonitor 0.9.13 → 0.9.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/package.json +6 -4
  2. package/scripts/aggregator/article-generator.js +2 -1
  3. package/scripts/aggregator/article-html.d.ts +9 -0
  4. package/scripts/aggregator/article-html.js +134 -13
  5. package/scripts/aggregator/article-metadata.d.ts +25 -161
  6. package/scripts/aggregator/article-metadata.js +71 -649
  7. package/scripts/aggregator/editorial-brief-resolver.d.ts +9 -0
  8. package/scripts/aggregator/editorial-brief-resolver.js +3 -1
  9. package/scripts/aggregator/metadata/date-labels.d.ts +122 -0
  10. package/scripts/aggregator/metadata/date-labels.js +209 -0
  11. package/scripts/aggregator/metadata/text-utils.d.ts +188 -0
  12. package/scripts/aggregator/metadata/text-utils.js +542 -0
  13. package/scripts/constants/og-locales.d.ts +15 -0
  14. package/scripts/constants/og-locales.js +17 -0
  15. package/scripts/constants/seo/index.d.ts +21 -0
  16. package/scripts/constants/seo/index.js +23 -0
  17. package/scripts/constants/seo/og-locales.d.ts +59 -0
  18. package/scripts/constants/seo/og-locales.js +59 -0
  19. package/scripts/constants/seo/social-handles.d.ts +50 -0
  20. package/scripts/constants/seo/social-handles.js +65 -0
  21. package/scripts/constants/social-handles.d.ts +11 -0
  22. package/scripts/constants/social-handles.js +13 -0
  23. package/scripts/discover-untranslated-briefs.js +224 -19
  24. package/scripts/generators/news-indexes.d.ts +35 -0
  25. package/scripts/generators/news-indexes.js +67 -6
  26. package/scripts/generators/political-intelligence/html.js +14 -6
  27. package/scripts/generators/seo-copy.js +42 -0
  28. package/scripts/generators/sitemap/html.js +13 -5
  29. package/scripts/lint-src-todos.js +124 -0
  30. package/scripts/utils/copy-test-reports.js +1 -1
  31. package/scripts/utils/generate-docs-index.js +1 -1
  32. package/scripts/validate-brief-translations.js +158 -18
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "euparliamentmonitor",
3
- "version": "0.9.13",
3
+ "version": "0.9.14",
4
4
  "type": "module",
5
5
  "description": "European Parliament Intelligence Platform - Monitor political activity with systematic transparency",
6
6
  "main": "scripts/index.js",
@@ -79,6 +79,7 @@
79
79
  "validate-ep-api": "npx tsx src/utils/validate-ep-api.ts",
80
80
  "mcp:probe": "npx tsx src/utils/mcp-probe.ts",
81
81
  "lint:prompts": "node scripts/lint-prompts.js",
82
+ "lint:src-todos": "node scripts/lint-src-todos.js",
82
83
  "htmlhint": "sh -c 'htmlhint *.html; set -- news/*.html; if [ -e \"$1\" ]; then htmlhint \"$@\"; else echo \"No news/*.html files to lint\"; fi'",
83
84
  "serve": "python3 -m http.server 8080",
84
85
  "test": "vitest run",
@@ -92,7 +93,7 @@
92
93
  "test:e2e:headed": "playwright test --headed",
93
94
  "test:e2e:debug": "playwright test --debug",
94
95
  "test:e2e:report": "playwright show-report",
95
- "lint": "eslint src/",
96
+ "lint": "eslint src/ && node scripts/lint-src-todos.js",
96
97
  "lint:fix": "eslint src/ --fix",
97
98
  "knip": "knip",
98
99
  "knip:production": "knip --production",
@@ -168,11 +169,12 @@
168
169
  "eslint-plugin-jsdoc": "62.9.0",
169
170
  "eslint-plugin-security": "4.0.0",
170
171
  "eslint-plugin-sonarjs": "4.0.3",
172
+ "fast-check": "^4.8.0",
171
173
  "happy-dom": "20.9.0",
172
174
  "html-minifier-terser": "^7.2.0",
173
175
  "htmlhint": "1.9.2",
174
176
  "husky": "9.1.7",
175
- "jscpd": "4.2.2",
177
+ "jscpd": "4.2.3",
176
178
  "knip": "^6.7.0",
177
179
  "lint-staged": "17.0.5",
178
180
  "mermaid": "11.15.0",
@@ -182,7 +184,7 @@
182
184
  "sharp": "^0.34.5",
183
185
  "terser": "^5.47.1",
184
186
  "ts-api-utils": "2.5.0",
185
- "tsx": "4.22.0",
187
+ "tsx": "4.22.1",
186
188
  "typedoc": "0.28.19",
187
189
  "typescript": "6.0.3",
188
190
  "vitest": "4.1.6"
@@ -366,6 +366,7 @@ function writeLanguageVariant(lang, slug, aggregated, englishHtml, chromeOptions
366
366
  body: bodyHtml,
367
367
  title: entry.title,
368
368
  description: perLangDescription,
369
+ extendedDescription: entry.extendedDescription,
369
370
  keywords: entry.keywords,
370
371
  date: aggregated.date,
371
372
  articleType: aggregated.articleType,
@@ -444,7 +445,7 @@ function getMetadataEntry(map, lang) {
444
445
  return descriptor.value;
445
446
  }
446
447
  const en = Object.getOwnPropertyDescriptor(map, 'en')?.value;
447
- return en ?? { title: '', description: '', keywords: [], source: 'template' };
448
+ return (en ?? { title: '', description: '', extendedDescription: '', keywords: [], source: 'template' });
448
449
  }
449
450
  /**
450
451
  * Count the number of articles the site currently publishes, derived
@@ -21,6 +21,15 @@ export interface WrapArticleOptions {
21
21
  readonly title: string;
22
22
  /** Article description — shown in `<meta name="description">` and OG. */
23
23
  readonly description: string;
24
+ /**
25
+ * Optional: longer (up to ~300 chars) editorial summary lifted from
26
+ * the language-specific executive brief BLUF. When provided, used
27
+ * for `og:description` and `twitter:description`; falls back to
28
+ * `description` when absent. Lets social-card previews show the
29
+ * full BLUF paragraph while the short `<meta description>` stays
30
+ * within Google's ~160-char snippet budget.
31
+ */
32
+ readonly extendedDescription?: string;
24
33
  /** SEO keywords — shown in `<meta name="keywords">`. */
25
34
  readonly keywords?: readonly string[];
26
35
  /** Canonical ISO date of the run (YYYY-MM-DD). */
@@ -21,8 +21,11 @@
21
21
  import { BASE_URL, BUILD_SHORT, MERMAID_VERSION } from '../constants/config.js';
22
22
  import { buildHeadFreshnessTags } from '../constants/build-info-meta.js';
23
23
  import { ALL_LANGUAGES, LANGUAGE_NAMES, LANGUAGE_FLAGS, PAGE_TITLES, SKIP_LINK_TEXTS, TOC_ARIA_LABELS, ARTICLE_TYPE_LABELS, BACK_TO_NEWS_LABELS, ARTICLE_NAV_LABELS, VIEW_SOURCE_MARKDOWN_LABELS, ARTICLE_TYPE_ICONS, FOOTER_SITEMAP_LABELS, FOOTER_POLITICAL_INTELLIGENCE_LABELS, TRADECRAFT_HEADING_LABELS, TRADECRAFT_INTRO_LABELS, TRADECRAFT_METHODOLOGIES_LABELS, TRADECRAFT_TEMPLATES_LABELS, ANALYSIS_INDEX_HEADING_LABELS, ANALYSIS_INDEX_INTRO_LABELS, ANALYSIS_INDEX_COL_SECTION_LABELS, ANALYSIS_INDEX_COL_ARTIFACT_LABELS, ANALYSIS_INDEX_COL_PATH_LABELS, KEY_TAKEAWAYS_HEADING_LABELS, SUPPLEMENTARY_HEADING_LABELS, SECTION_TITLE_LABELS, getLocalizedString, getTextDirection, } from '../constants/languages.js';
24
+ import { buildOgLocaleTags } from '../constants/og-locales.js';
25
+ import { ORG_SAME_AS, buildTwitterAttributionTags } from '../constants/social-handles.js';
24
26
  import { ArticleCategory } from '../types/index.js';
25
27
  import { escapeHTML } from '../utils/file-utils.js';
28
+ import { stripHtmlTags } from '../utils/html-sanitize.js';
26
29
  import { buildResponsiveIconLinks, buildResponsiveSocialImageMeta, buildSiteFooter, buildSiteHeader, buildPageBanner, } from '../templates/section-builders.js';
27
30
  import { READER_GUIDE_SECTION_ID } from './reader-guide-constants.js';
28
31
  import { READER_GUIDE_TITLE_LABELS, getReaderGuideSectionIcon, } from './reader-intelligence-guide.js';
@@ -32,6 +35,63 @@ import { KEY_TAKEAWAYS_SECTION_ID } from './key-takeaways.js';
32
35
  import { getPoliticalIntelligenceFilename } from '../generators/political-intelligence.js';
33
36
  import { getSitemapFilename } from '../generators/sitemap/index.js';
34
37
  import { getCuratedTitle, getCuratedDescription, getArtifactInfo, } from '../generators/political-intelligence-descriptions.js';
38
+ /**
39
+ * Resolve a localized article type label *without* the leading icon
40
+ * emoji. Used for the OpenGraph `article:section` meta and the JSON-LD
41
+ * `articleSection` field, where emoji break Google's NewsArticle
42
+ * structured-data validator.
43
+ *
44
+ * @param slug - Raw article type slug (e.g. "motions", "week-ahead")
45
+ * @param lang - Target language code
46
+ * @returns Localized label without icon (e.g. "Plenary Votes & Resolutions")
47
+ */
48
+ function getLocalizedArticleTypePlain(slug, lang) {
49
+ const labels = getLocalizedString(ARTICLE_TYPE_LABELS, lang);
50
+ return labels[slug] ?? slug.replace(/-/g, ' ');
51
+ }
52
+ /**
53
+ * Google's NewsArticle structured-data validator hard-caps the
54
+ * `headline` field at 110 characters. Page `<title>` can be longer
55
+ * (we already truncate to a higher limit in
56
+ * `article-metadata.ts::truncateTitle`), but the JSON-LD headline
57
+ * needs its own, tighter cap or the article loses Top Stories
58
+ * carousel eligibility.
59
+ *
60
+ * Truncation prefers the last sentence boundary or em-dash within
61
+ * the 110-char window so we don't slice through a noun phrase.
62
+ *
63
+ * @param title - Resolved article title (already escaped-safe text)
64
+ * @returns Headline ≤ 110 characters, suitable for `NewsArticle.headline`
65
+ */
66
+ const HEADLINE_LIMIT = 110;
67
+ function truncateHeadline(title) {
68
+ const trimmed = title.trim();
69
+ if (trimmed.length <= HEADLINE_LIMIT)
70
+ return trimmed;
71
+ // Prefer the last em-dash, en-dash, colon, or sentence boundary
72
+ // before the limit so the truncated headline still reads as a
73
+ // self-contained phrase.
74
+ const window = trimmed.slice(0, HEADLINE_LIMIT);
75
+ const breakIdx = Math.max(window.lastIndexOf(' — '), window.lastIndexOf(' – '), window.lastIndexOf(': '), window.lastIndexOf('. '), window.lastIndexOf(' '));
76
+ return breakIdx > 60 ? window.slice(0, breakIdx).trimEnd() : window.trimEnd();
77
+ }
78
+ /**
79
+ * Build the localized `<title>` separator for the
80
+ * `{articleTitle} {sep} {siteTitle}` pattern. LTR locales use the
81
+ * right-pointing guillemet (»); RTL locales (Arabic, Hebrew) use the
82
+ * left-pointing guillemet («) so the visual hierarchy reads from the
83
+ * primary title towards the site name without breaking bidi flow.
84
+ *
85
+ * The previous em-dash separator collided with em-dashes inside
86
+ * article titles (the editorial style uses `Title — Subtitle`) and
87
+ * rendered ambiguously in screen readers.
88
+ *
89
+ * @param lang - Target language code
90
+ * @returns `" » "` for LTR locales, `" « "` for RTL
91
+ */
92
+ function getTitleSeparator(lang) {
93
+ return getTextDirection(lang) === 'rtl' ? ' « ' : ' » ';
94
+ }
35
95
  /**
36
96
  * Resolve a localized article type label with icon. Falls back to the
37
97
  * humanised slug when a translation isn't available.
@@ -879,24 +939,69 @@ export function wrapArticleHtml(options) {
879
939
  : '';
880
940
  const tocHtml = buildArticleToc(options.toc ?? [], safeLang);
881
941
  const articleMainClass = tocHtml.length > 0 ? 'article-main--with-toc' : 'article-main--no-toc';
942
+ const articleSectionLabel = getLocalizedArticleTypePlain(options.articleType, safeLang);
943
+ // Count words from the rendered body for the JSON-LD `wordCount`
944
+ // field (Google's NewsArticle structured-data validator emits a
945
+ // warning when this is missing). Done by stripping HTML tags from
946
+ // the rendered body then splitting on whitespace — fast and
947
+ // CodeQL-safe.
948
+ const bodyText = stripHtmlTags(options.body);
949
+ const wordCount = bodyText.split(/\s+/u).filter((w) => w.length > 0).length;
950
+ // Build the JSON-LD image graph. Google requires NewsArticle.image
951
+ // to be an array (or single ImageObject) with explicit width/height
952
+ // covering at least one of the 1:1, 4:3, 16:9 aspect ratios for
953
+ // Top Stories carousel eligibility.
954
+ const jsonLdImages = [
955
+ {
956
+ '@type': 'ImageObject',
957
+ url: `${BASE_URL}/images/og-image-1200.jpg`,
958
+ width: 1200,
959
+ height: 630,
960
+ },
961
+ {
962
+ '@type': 'ImageObject',
963
+ url: `${BASE_URL}/images/og-image-1200.webp`,
964
+ width: 1200,
965
+ height: 630,
966
+ },
967
+ {
968
+ '@type': 'ImageObject',
969
+ url: `${BASE_URL}/images/og-image-1200.avif`,
970
+ width: 1200,
971
+ height: 630,
972
+ },
973
+ ];
882
974
  const jsonLd = {
883
975
  '@context': 'https://schema.org',
884
976
  '@type': 'NewsArticle',
885
- headline: options.title,
977
+ headline: truncateHeadline(options.title),
886
978
  description: options.description,
887
979
  datePublished: options.date,
888
980
  dateModified: options.date,
889
981
  inLanguage: safeLang,
890
982
  url: canonicalUrl,
891
- image: `${BASE_URL}/images/og-image-1200.jpg`,
892
- author: { '@type': 'Organization', name: PUBLISHER_NAME, url: 'https://hack23.com' },
983
+ mainEntityOfPage: { '@type': 'WebPage', '@id': canonicalUrl },
984
+ image: jsonLdImages,
985
+ author: {
986
+ '@type': 'NewsMediaOrganization',
987
+ name: PUBLISHER_NAME,
988
+ url: 'https://hack23.com',
989
+ sameAs: [...ORG_SAME_AS],
990
+ },
893
991
  publisher: {
894
- '@type': 'Organization',
992
+ '@type': 'NewsMediaOrganization',
895
993
  name: PUBLISHER_NAME,
896
994
  url: 'https://hack23.com',
897
995
  logo: { '@type': 'ImageObject', url: `${BASE_URL}/images/apple-touch-icon.png` },
996
+ sameAs: [...ORG_SAME_AS],
997
+ },
998
+ articleSection: articleSectionLabel,
999
+ wordCount,
1000
+ keywords: (options.keywords ?? []).join(', '),
1001
+ speakable: {
1002
+ '@type': 'SpeakableSpecification',
1003
+ cssSelector: ['.article-dek', '.article-body > p:first-of-type'],
898
1004
  },
899
- articleSection: options.articleType,
900
1005
  isPartOf: {
901
1006
  '@type': 'WebSite',
902
1007
  name: SITE_NAME,
@@ -921,7 +1026,7 @@ export function wrapArticleHtml(options) {
921
1026
  {
922
1027
  '@type': 'ListItem',
923
1028
  position: 2,
924
- name: options.articleType.replace(/-/g, ' '),
1029
+ name: articleSectionLabel,
925
1030
  item: `${BASE_URL}/news/`,
926
1031
  },
927
1032
  {
@@ -934,11 +1039,21 @@ export function wrapArticleHtml(options) {
934
1039
  };
935
1040
  const structuredData = [jsonLd, breadcrumbLd];
936
1041
  const jsonLdString = JSON.stringify(structuredData).replace(/</g, '\\u003c');
937
- const pageTitle = `${options.title}${siteTitle}`;
1042
+ const pageTitle = `${options.title}${getTitleSeparator(safeLang)}${siteTitle}`;
938
1043
  const keywords = (options.keywords ?? []).map((keyword) => keyword.trim()).filter(Boolean);
939
1044
  const keywordsMeta = keywords.length > 0
940
1045
  ? ` <meta name="keywords" content="${escapeHTML(keywords.join(', '))}">\n`
941
1046
  : '';
1047
+ // Use the longer extended description for og:description/twitter:description
1048
+ // when available so social-card previews show the full BLUF
1049
+ // paragraph; the short meta description stays within Google's
1050
+ // ~160-char snippet budget.
1051
+ const socialDescription = options.extendedDescription && options.extendedDescription.length > 0
1052
+ ? options.extendedDescription
1053
+ : options.description;
1054
+ const ogLocaleTags = buildOgLocaleTags(safeLang);
1055
+ const twitterAttribution = buildTwitterAttributionTags();
1056
+ const twitterAttributionBlock = twitterAttribution ? `\n${twitterAttribution}` : '';
942
1057
  const header = buildSiteHeader({
943
1058
  lang: safeLang,
944
1059
  pathPrefix: '../',
@@ -956,23 +1071,29 @@ export function wrapArticleHtml(options) {
956
1071
  <meta name="referrer" content="no-referrer">
957
1072
  <title>${escapeHTML(pageTitle)}</title>
958
1073
  <meta name="description" content="${escapeHTML(options.description)}">
959
- ${keywordsMeta} <meta name="robots" content="index, follow, max-image-preview:large">
1074
+ ${keywordsMeta} <meta name="robots" content="index, follow, max-snippet:-1, max-image-preview:large">
960
1075
  <meta name="author" content="${PUBLISHER_NAME}">
961
1076
  <meta name="publisher" content="${PUBLISHER_NAME}">
962
1077
  <meta name="date" content="${options.date}">
963
- <meta name="article:published_time" content="${options.date}">
1078
+ <meta property="article:published_time" content="${options.date}">
1079
+ <meta property="article:modified_time" content="${options.date}">
1080
+ <meta property="article:section" content="${escapeHTML(articleSectionLabel)}">
1081
+ <meta property="article:author" content="${PUBLISHER_NAME}">
1082
+ <meta property="article:publisher" content="https://hack23.com">
964
1083
  <link rel="canonical" href="${canonicalUrl}">
965
1084
  ${hreflangLinks}
1085
+ <link rel="alternate" type="application/rss+xml" title="EU Parliament Monitor RSS" href="${BASE_URL}/rss.xml">
1086
+ <link rel="preconnect" href="https://hack23.com" crossorigin>
966
1087
  <meta property="og:type" content="article">
967
1088
  <meta property="og:title" content="${escapeHTML(options.title)}">
968
- <meta property="og:description" content="${escapeHTML(options.description)}">
1089
+ <meta property="og:description" content="${escapeHTML(socialDescription)}">
969
1090
  <meta property="og:url" content="${canonicalUrl}">
970
1091
  <meta property="og:site_name" content="EU Parliament Monitor">
971
- <meta property="og:locale" content="${safeLang}">
972
- ${buildResponsiveSocialImageMeta(`${options.title}EU Parliament Monitor`)}
1092
+ ${ogLocaleTags}
1093
+ ${buildResponsiveSocialImageMeta(`${options.title}${getTitleSeparator(safeLang)}EU Parliament Monitor`)}
973
1094
  <meta name="twitter:card" content="summary_large_image">
974
1095
  <meta name="twitter:title" content="${escapeHTML(options.title)}">
975
- <meta name="twitter:description" content="${escapeHTML(options.description)}">
1096
+ <meta name="twitter:description" content="${escapeHTML(socialDescription)}">${twitterAttributionBlock}
976
1097
  ${buildResponsiveIconLinks('../')}
977
1098
  <link rel="manifest" href="../site.webmanifest">
978
1099
  <meta name="color-scheme" content="light dark">
@@ -3,6 +3,16 @@ import type { LangTitleSubtitle, LanguageCode, LanguageMap } from '../types/inde
3
3
  export interface ResolvedMetadataEntry {
4
4
  readonly title: string;
5
5
  readonly description: string;
6
+ /**
7
+ * Optional longer (up to ~300 chars) editorial summary lifted from
8
+ * the language-specific executive brief BLUF paragraph. Used for
9
+ * `og:description` and `twitter:description` so social-card previews
10
+ * can show the full Bottom-Line-Up-Front context, while the
11
+ * short `description` stays within Google's ~160-char snippet
12
+ * budget. Empty string when no longer summary is available — the
13
+ * caller should then fall back to {@link description}.
14
+ */
15
+ readonly extendedDescription: string;
6
16
  readonly keywords: readonly string[];
7
17
  /**
8
18
  * `"localized-brief"` when the title/description came from a translated
@@ -63,96 +73,7 @@ export interface ResolveMetadataOptions {
63
73
  */
64
74
  readonly runDir?: string;
65
75
  }
66
- /**
67
- * Return `true` when a line cannot serve as a prose description. Rejects
68
- * Markdown structural lines (headings, blockquotes, tables, HTML),
69
- * mermaid/chart directives, emoji-banner metadata rows, and the known
70
- * `Key: value` banners that Stage-B agents emit as artefact preamble.
71
- *
72
- * @param line - Trimmed line from the aggregated Markdown source
73
- * @returns `true` when the line is not prose and should be skipped
74
- */
75
- export declare function shouldSkipDescriptionLine(line: string): boolean;
76
- /**
77
- * Strip inline Markdown decorations so we can use the remaining text as
78
- * plain-text meta-tag content. Removes link syntax, emphasis, inline code
79
- * backticks, and HTML-entity fragments that the Markdown source sometimes
80
- * smuggles in. Keeps the visible text readable.
81
- *
82
- * @param raw - Trimmed Markdown line
83
- * @returns Plain-text variant
84
- */
85
- /**
86
- * Strip a leading all-caps prose label (e.g. `SITUATION:`, `KEY MOTION:`,
87
- * `BLUF:`, `BOTTOM LINE:`, `TIER-1:`) from a prose line. These labels
88
- * are common in BLUF-style editorial writing — they survive
89
- * {@link stripInlineMarkdown} (which strips the `**bold**` wrapper but
90
- * keeps the literal text) and would otherwise leak into the SEO
91
- * description as a confusing all-caps shout.
92
- *
93
- * Matches up to 4 hyphenated all-caps tokens, optionally followed by a
94
- * digit suffix (`TIER-1`), terminating at a colon. Returns the original
95
- * line when no opener is present.
96
- *
97
- * @param line - Plain prose line (post-{@link stripInlineMarkdown})
98
- * @returns Line with the all-caps opener removed
99
- */
100
- export declare function stripLeadingProseLabel(line: string): string;
101
- /**
102
- * Strip inline Markdown decorations so we can use the remaining text as
103
- * plain-text meta-tag content. Removes link syntax, emphasis, inline code
104
- * backticks, and HTML-entity fragments that the Markdown source sometimes
105
- * smuggles in. Keeps the visible text readable.
106
- *
107
- * @param raw - Trimmed Markdown line
108
- * @returns Plain-text variant
109
- */
110
- export declare function stripInlineMarkdown(raw: string): string;
111
- /**
112
- * Clamp a string to `DESCRIPTION_MAX_LENGTH` characters, appending
113
- * an ellipsis when truncation actually happens. Does not break words if
114
- * avoidable — a trailing partial word is trimmed back to the previous
115
- * space first.
116
- *
117
- * @param text - Raw description text
118
- * @returns Truncated description with trailing ellipsis when clipped
119
- */
120
- export declare function truncateDescription(text: string): string;
121
- /**
122
- * Clamp a title to `TITLE_MAX_LENGTH` characters in the same
123
- * word-boundary-preserving fashion as {@link truncateDescription}.
124
- *
125
- * @param text - Raw title text
126
- * @returns Truncated title with trailing ellipsis when clipped
127
- */
128
- export declare function truncateTitle(text: string): string;
129
- /**
130
- * Return the first complete sentence from a prose paragraph, suitable
131
- * for use as a fallback editorial title when the artefact H1 is
132
- * categorical (e.g. `# EU Parliament Committee Reports`) and the
133
- * resolver must derive `<title>` from the BLUF / lede summary instead.
134
- *
135
- * A "sentence" is the prefix up to the first sentence-terminator
136
- * (`. `, `! `, `? `, `; `) inside the `[HEADLINE_SOFT_MIN,
137
- * TITLE_MAX_LENGTH]` window. Common abbreviations (`Q1.`, `Q2.`,
138
- * `H1.`, `H2.`, `Mr.`, `Mrs.`, `e.g.`, `i.e.`, `vs.`) are skipped
139
- * so they don't terminate the sentence prematurely. When no
140
- * acceptable terminator exists in the window, returns the entire
141
- * input unchanged so {@link truncateTitle} can handle clause-boundary
142
- * truncation downstream.
143
- *
144
- * This produces journalistically clean titles even for the
145
- * propositions / committee-reports cases where the BLUF paragraph
146
- * opens with a single long sentence that exceeds 140 chars —
147
- * `truncateTitle` then breaks on a clause boundary, and the result is
148
- * still grammatical because the input was a sentence prefix rather
149
- * than an arbitrary paragraph slice.
150
- *
151
- * @param paragraph - Prose paragraph (post-{@link stripInlineMarkdown})
152
- * @returns First sentence, or the original paragraph when none can be
153
- * identified within the soft-min window
154
- */
155
- export declare function extractFirstSentence(paragraph: string): string;
76
+ export { shouldSkipDescriptionLine, stripLeadingProseLabel, stripInlineMarkdown, truncateDescription, truncateExtendedDescription, truncateTitle, extractFirstSentence, } from './metadata/text-utils.js';
156
77
  /**
157
78
  * Return the first Markdown H1 (`# …`) in the supplied text, stripped of
158
79
  * the leading `#` and trailing anchor syntax. Returns an empty string when
@@ -177,6 +98,19 @@ export declare function extractFirstH1(markdown: string): string;
177
98
  */
178
99
  export declare function extractStrongProseLine(markdown: string): string;
179
100
  export declare function extractLedeAfterHeading(markdown: string): string;
101
+ /**
102
+ * Same parsing rules as {@link extractLedeAfterHeading} but with a
103
+ * larger byte budget so the full BLUF paragraph (typically 200-300
104
+ * characters in the editorial style guide) is captured for use as
105
+ * `og:description` / `twitter:description`. Returns the joined
106
+ * paragraph clamped via {@link truncateExtendedDescription} (which
107
+ * returns `''` when the result wouldn't be longer than the regular
108
+ * meta description).
109
+ *
110
+ * @param markdown - Brief body (SPDX preamble already stripped)
111
+ * @returns Extended lede paragraph, or `''` when not worth emitting
112
+ */
113
+ export declare function extractExtendedLedeAfterHeading(markdown: string): string;
180
114
  /**
181
115
  * Return `true` when an artefact-H1 begins with one of the
182
116
  * `ARTIFACT_CATEGORY_PREFIXES` followed by a separator. Such H1s
@@ -302,77 +236,7 @@ export declare function extractPriorityFindingHighlight(body: string): {
302
236
  * @returns Per-language `LangTitleSubtitle`
303
237
  */
304
238
  export declare function buildTemplateFallback(articleType: string, date: string, committee?: string): LanguageMap<LangTitleSubtitle>;
305
- /**
306
- * Parse an ISO date and return the `[start, end]` week range as ISO
307
- * strings. Week starts on Monday and ends on the following Sunday.
308
- *
309
- * @param date - ISO date string (`YYYY-MM-DD`)
310
- * @returns `{ start, end }` both in `YYYY-MM-DD` form
311
- */
312
- export declare function deriveWeekRange(date: string): {
313
- readonly start: string;
314
- readonly end: string;
315
- };
316
- /**
317
- * Return the D-36 → D-8 reporting window for the `week-in-review`
318
- * article type. EP roll-call voting data is published with a 2–6 week
319
- * lag, so using the most-recent 7 days structurally produces a
320
- * vote-empty dataset. Shifting 8 days back and widening to 28 days
321
- * (start = D-36, end = D-8) ensures the window always contains at
322
- * least one full EP plenary week with published roll-call data
323
- * (ADR-006). Direction is consistent with the workflow's
324
- * `DATE_FROM` (start = D-36) → `DATE_TO` (end = D-8) variables.
325
- *
326
- * @param date - ISO article date string (`YYYY-MM-DD`) — typically TODAY
327
- * @returns `{ start: D-36, end: D-8 }` both as `YYYY-MM-DD` ISO strings
328
- */
329
- export declare function deriveReportingWindowForWeekInReview(date: string): {
330
- readonly start: string;
331
- readonly end: string;
332
- };
333
- /**
334
- * Return a human-friendly month label for an ISO date — English month
335
- * name + four-digit year (e.g. `April 2026`). The non-English template
336
- * generators accept this same label verbatim because they interpolate it
337
- * into a localized sentence rather than translating the month itself.
338
- *
339
- * @param date - ISO date string
340
- * @returns Month label, or the input when parsing fails
341
- */
342
- export declare function deriveMonthLabel(date: string): string;
343
- /**
344
- * Return a quarter label for an ISO date — `Q<n> <YYYY>` (e.g. `Q2 2026`).
345
- * Used by `quarter-ahead` and `quarter-in-review` title generators.
346
- *
347
- * @param date - ISO date string
348
- * @returns Quarter label, or the input when parsing fails
349
- */
350
- export declare function deriveQuarterLabel(date: string): string;
351
- /**
352
- * Return a four-digit year label for an ISO date. Used by `year-ahead`
353
- * and `year-in-review` title generators.
354
- *
355
- * @param date - ISO date string
356
- * @returns Year label, or the input when parsing fails
357
- */
358
- export declare function deriveYearLabel(date: string): string;
359
- /**
360
- * Return the EP-term label for an ISO date — `EP10 → 2029` or `EP11 → 2034`.
361
- * Used by `term-outlook` title generator.
362
- *
363
- * @param date - ISO date string
364
- * @returns Term label, or the input when parsing fails
365
- */
366
- export declare function deriveTermLabel(date: string): string;
367
- /**
368
- * Return the election-cycle label for an ISO date — pairs the outgoing
369
- * and incoming EP terms with the election year (e.g. `EP10 → EP11 (2029)`).
370
- * Used by the `election-cycle` title generator.
371
- *
372
- * @param date - ISO date string
373
- * @returns Cycle label, or the input when parsing fails
374
- */
375
- export declare function deriveElectionCycleLabel(date: string): string;
239
+ export { deriveWeekRange, deriveReportingWindowForWeekInReview, deriveMonthLabel, deriveQuarterLabel, deriveYearLabel, deriveTermLabel, deriveElectionCycleLabel, } from './metadata/date-labels.js';
376
240
  /**
377
241
  * Build a stable, localized keyword list from the article type plus the
378
242
  * resolved title/description context.