euparliamentmonitor 0.9.12 → 0.9.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +10 -5
- package/scripts/aggregator/analysis-aggregator.js +7 -2
- package/scripts/aggregator/article-generator.js +3 -1
- package/scripts/aggregator/article-html.d.ts +9 -0
- package/scripts/aggregator/article-html.js +134 -13
- package/scripts/aggregator/article-metadata.d.ts +86 -150
- package/scripts/aggregator/article-metadata.js +1171 -574
- package/scripts/aggregator/editorial-brief-resolver.d.ts +76 -0
- package/scripts/aggregator/editorial-brief-resolver.js +220 -0
- package/scripts/aggregator/metadata/date-labels.d.ts +122 -0
- package/scripts/aggregator/metadata/date-labels.js +209 -0
- package/scripts/aggregator/metadata/text-utils.d.ts +188 -0
- package/scripts/aggregator/metadata/text-utils.js +542 -0
- package/scripts/constants/og-locales.d.ts +15 -0
- package/scripts/constants/og-locales.js +17 -0
- package/scripts/constants/seo/index.d.ts +21 -0
- package/scripts/constants/seo/index.js +23 -0
- package/scripts/constants/seo/og-locales.d.ts +59 -0
- package/scripts/constants/seo/og-locales.js +59 -0
- package/scripts/constants/seo/social-handles.d.ts +50 -0
- package/scripts/constants/seo/social-handles.js +65 -0
- package/scripts/constants/social-handles.d.ts +11 -0
- package/scripts/constants/social-handles.js +13 -0
- package/scripts/discover-untranslated-briefs.js +534 -0
- package/scripts/generators/news-indexes.d.ts +63 -0
- package/scripts/generators/news-indexes.js +177 -26
- package/scripts/generators/political-intelligence/html.js +14 -6
- package/scripts/generators/seo-copy.js +42 -0
- package/scripts/generators/sitemap/html.js +13 -5
- package/scripts/lint-src-todos.js +124 -0
- package/scripts/templates/sync-template-frontmatter.js +4 -4
- package/scripts/utils/copy-test-reports.js +1 -1
- package/scripts/utils/generate-docs-index.js +1 -1
- package/scripts/validate-brief-translations.js +657 -0
- package/scripts/validate-manifest-seo.js +581 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "euparliamentmonitor",
|
|
3
|
-
"version": "0.9.
|
|
3
|
+
"version": "0.9.14",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "European Parliament Intelligence Platform - Monitor political activity with systematic transparency",
|
|
6
6
|
"main": "scripts/index.js",
|
|
@@ -63,6 +63,9 @@
|
|
|
63
63
|
"build:check-tests": "tsc --project tsconfig.test.json --noEmit",
|
|
64
64
|
"copy-vendor": "node scripts/copy-vendor.js",
|
|
65
65
|
"validate-analysis": "node scripts/validate-analysis-completeness.js",
|
|
66
|
+
"discover:untranslated-briefs": "node scripts/discover-untranslated-briefs.js",
|
|
67
|
+
"validate:translations": "node scripts/validate-brief-translations.js",
|
|
68
|
+
"validate:manifest-seo": "node scripts/validate-manifest-seo.js",
|
|
66
69
|
"sync:templates": "node scripts/templates/sync-template-frontmatter.js",
|
|
67
70
|
"sync:templates:check": "node scripts/templates/sync-template-frontmatter.js --check",
|
|
68
71
|
"prior-run-diff": "node scripts/aggregator/prior-run-diff.js",
|
|
@@ -76,6 +79,7 @@
|
|
|
76
79
|
"validate-ep-api": "npx tsx src/utils/validate-ep-api.ts",
|
|
77
80
|
"mcp:probe": "npx tsx src/utils/mcp-probe.ts",
|
|
78
81
|
"lint:prompts": "node scripts/lint-prompts.js",
|
|
82
|
+
"lint:src-todos": "node scripts/lint-src-todos.js",
|
|
79
83
|
"htmlhint": "sh -c 'htmlhint *.html; set -- news/*.html; if [ -e \"$1\" ]; then htmlhint \"$@\"; else echo \"No news/*.html files to lint\"; fi'",
|
|
80
84
|
"serve": "python3 -m http.server 8080",
|
|
81
85
|
"test": "vitest run",
|
|
@@ -89,7 +93,7 @@
|
|
|
89
93
|
"test:e2e:headed": "playwright test --headed",
|
|
90
94
|
"test:e2e:debug": "playwright test --debug",
|
|
91
95
|
"test:e2e:report": "playwright show-report",
|
|
92
|
-
"lint": "eslint src/",
|
|
96
|
+
"lint": "eslint src/ && node scripts/lint-src-todos.js",
|
|
93
97
|
"lint:fix": "eslint src/ --fix",
|
|
94
98
|
"knip": "knip",
|
|
95
99
|
"knip:production": "knip --production",
|
|
@@ -165,13 +169,14 @@
|
|
|
165
169
|
"eslint-plugin-jsdoc": "62.9.0",
|
|
166
170
|
"eslint-plugin-security": "4.0.0",
|
|
167
171
|
"eslint-plugin-sonarjs": "4.0.3",
|
|
172
|
+
"fast-check": "^4.8.0",
|
|
168
173
|
"happy-dom": "20.9.0",
|
|
169
174
|
"html-minifier-terser": "^7.2.0",
|
|
170
175
|
"htmlhint": "1.9.2",
|
|
171
176
|
"husky": "9.1.7",
|
|
172
|
-
"jscpd": "4.2.
|
|
177
|
+
"jscpd": "4.2.3",
|
|
173
178
|
"knip": "^6.7.0",
|
|
174
|
-
"lint-staged": "17.0.
|
|
179
|
+
"lint-staged": "17.0.5",
|
|
175
180
|
"mermaid": "11.15.0",
|
|
176
181
|
"papaparse": "5.5.3",
|
|
177
182
|
"prettier": "3.8.3",
|
|
@@ -179,7 +184,7 @@
|
|
|
179
184
|
"sharp": "^0.34.5",
|
|
180
185
|
"terser": "^5.47.1",
|
|
181
186
|
"ts-api-utils": "2.5.0",
|
|
182
|
-
"tsx": "4.22.
|
|
187
|
+
"tsx": "4.22.1",
|
|
183
188
|
"typedoc": "0.28.19",
|
|
184
189
|
"typescript": "6.0.3",
|
|
185
190
|
"vitest": "4.1.6"
|
|
@@ -18,6 +18,10 @@ import { buildKeyTakeaways, KEY_TAKEAWAYS_SECTION_ID, KEY_TAKEAWAYS_SECTION_TITL
|
|
|
18
18
|
import { flattenManifestFiles as _flattenManifestFiles, latestGateResult as _latestGateResult, resolveArticleType as _resolveArticleType, resolveRunId as _resolveRunId, } from './manifest/index.js';
|
|
19
19
|
import { READER_GUIDE_SECTION_ID, READER_GUIDE_SECTION_IDS, READER_GUIDE_SECTION_TITLE, } from './reader-guide-constants.js';
|
|
20
20
|
export { READER_GUIDE_SECTION_ID, READER_GUIDE_SECTION_IDS, READER_GUIDE_SECTION_TITLE, } from './reader-guide-constants.js';
|
|
21
|
+
const TRADECRAFT_EXCLUDED_FILES = new Set([
|
|
22
|
+
'analysis/methodologies/executive-brief-translation-guide.md',
|
|
23
|
+
'analysis/templates/executive-brief-translation-template.md',
|
|
24
|
+
]);
|
|
21
25
|
/**
|
|
22
26
|
* Normalise `manifest.files` into a flat list of `runRelPath` strings.
|
|
23
27
|
*
|
|
@@ -94,8 +98,9 @@ export function discoverTradecraftFiles(repoRoot) {
|
|
|
94
98
|
continue;
|
|
95
99
|
const entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
96
100
|
for (const entry of entries) {
|
|
97
|
-
|
|
98
|
-
|
|
101
|
+
const rel = `${sub}/${entry.name}`;
|
|
102
|
+
if (entry.isFile() && entry.name.endsWith('.md') && !TRADECRAFT_EXCLUDED_FILES.has(rel)) {
|
|
103
|
+
result.push(rel);
|
|
99
104
|
}
|
|
100
105
|
}
|
|
101
106
|
}
|
|
@@ -366,6 +366,7 @@ function writeLanguageVariant(lang, slug, aggregated, englishHtml, chromeOptions
|
|
|
366
366
|
body: bodyHtml,
|
|
367
367
|
title: entry.title,
|
|
368
368
|
description: perLangDescription,
|
|
369
|
+
extendedDescription: entry.extendedDescription,
|
|
369
370
|
keywords: entry.keywords,
|
|
370
371
|
date: aggregated.date,
|
|
371
372
|
articleType: aggregated.articleType,
|
|
@@ -444,7 +445,7 @@ function getMetadataEntry(map, lang) {
|
|
|
444
445
|
return descriptor.value;
|
|
445
446
|
}
|
|
446
447
|
const en = Object.getOwnPropertyDescriptor(map, 'en')?.value;
|
|
447
|
-
return en ?? { title: '', description: '', keywords: [] };
|
|
448
|
+
return (en ?? { title: '', description: '', extendedDescription: '', keywords: [], source: 'template' });
|
|
448
449
|
}
|
|
449
450
|
/**
|
|
450
451
|
* Count the number of articles the site currently publishes, derived
|
|
@@ -699,6 +700,7 @@ function applyCliOverrides(base, titleOverride, descriptionOverride) {
|
|
|
699
700
|
title: titleOverride ?? entry.title,
|
|
700
701
|
description: descriptionOverride ?? entry.description,
|
|
701
702
|
keywords: entry.keywords,
|
|
703
|
+
source: titleOverride || descriptionOverride ? 'manifest' : entry.source,
|
|
702
704
|
},
|
|
703
705
|
enumerable: true,
|
|
704
706
|
writable: true,
|
|
@@ -21,6 +21,15 @@ export interface WrapArticleOptions {
|
|
|
21
21
|
readonly title: string;
|
|
22
22
|
/** Article description — shown in `<meta name="description">` and OG. */
|
|
23
23
|
readonly description: string;
|
|
24
|
+
/**
|
|
25
|
+
* Optional: longer (up to ~300 chars) editorial summary lifted from
|
|
26
|
+
* the language-specific executive brief BLUF. When provided, used
|
|
27
|
+
* for `og:description` and `twitter:description`; falls back to
|
|
28
|
+
* `description` when absent. Lets social-card previews show the
|
|
29
|
+
* full BLUF paragraph while the short `<meta description>` stays
|
|
30
|
+
* within Google's ~160-char snippet budget.
|
|
31
|
+
*/
|
|
32
|
+
readonly extendedDescription?: string;
|
|
24
33
|
/** SEO keywords — shown in `<meta name="keywords">`. */
|
|
25
34
|
readonly keywords?: readonly string[];
|
|
26
35
|
/** Canonical ISO date of the run (YYYY-MM-DD). */
|
|
@@ -21,8 +21,11 @@
|
|
|
21
21
|
import { BASE_URL, BUILD_SHORT, MERMAID_VERSION } from '../constants/config.js';
|
|
22
22
|
import { buildHeadFreshnessTags } from '../constants/build-info-meta.js';
|
|
23
23
|
import { ALL_LANGUAGES, LANGUAGE_NAMES, LANGUAGE_FLAGS, PAGE_TITLES, SKIP_LINK_TEXTS, TOC_ARIA_LABELS, ARTICLE_TYPE_LABELS, BACK_TO_NEWS_LABELS, ARTICLE_NAV_LABELS, VIEW_SOURCE_MARKDOWN_LABELS, ARTICLE_TYPE_ICONS, FOOTER_SITEMAP_LABELS, FOOTER_POLITICAL_INTELLIGENCE_LABELS, TRADECRAFT_HEADING_LABELS, TRADECRAFT_INTRO_LABELS, TRADECRAFT_METHODOLOGIES_LABELS, TRADECRAFT_TEMPLATES_LABELS, ANALYSIS_INDEX_HEADING_LABELS, ANALYSIS_INDEX_INTRO_LABELS, ANALYSIS_INDEX_COL_SECTION_LABELS, ANALYSIS_INDEX_COL_ARTIFACT_LABELS, ANALYSIS_INDEX_COL_PATH_LABELS, KEY_TAKEAWAYS_HEADING_LABELS, SUPPLEMENTARY_HEADING_LABELS, SECTION_TITLE_LABELS, getLocalizedString, getTextDirection, } from '../constants/languages.js';
|
|
24
|
+
import { buildOgLocaleTags } from '../constants/og-locales.js';
|
|
25
|
+
import { ORG_SAME_AS, buildTwitterAttributionTags } from '../constants/social-handles.js';
|
|
24
26
|
import { ArticleCategory } from '../types/index.js';
|
|
25
27
|
import { escapeHTML } from '../utils/file-utils.js';
|
|
28
|
+
import { stripHtmlTags } from '../utils/html-sanitize.js';
|
|
26
29
|
import { buildResponsiveIconLinks, buildResponsiveSocialImageMeta, buildSiteFooter, buildSiteHeader, buildPageBanner, } from '../templates/section-builders.js';
|
|
27
30
|
import { READER_GUIDE_SECTION_ID } from './reader-guide-constants.js';
|
|
28
31
|
import { READER_GUIDE_TITLE_LABELS, getReaderGuideSectionIcon, } from './reader-intelligence-guide.js';
|
|
@@ -32,6 +35,63 @@ import { KEY_TAKEAWAYS_SECTION_ID } from './key-takeaways.js';
|
|
|
32
35
|
import { getPoliticalIntelligenceFilename } from '../generators/political-intelligence.js';
|
|
33
36
|
import { getSitemapFilename } from '../generators/sitemap/index.js';
|
|
34
37
|
import { getCuratedTitle, getCuratedDescription, getArtifactInfo, } from '../generators/political-intelligence-descriptions.js';
|
|
38
|
+
/**
|
|
39
|
+
* Resolve a localized article type label *without* the leading icon
|
|
40
|
+
* emoji. Used for the OpenGraph `article:section` meta and the JSON-LD
|
|
41
|
+
* `articleSection` field, where emoji break Google's NewsArticle
|
|
42
|
+
* structured-data validator.
|
|
43
|
+
*
|
|
44
|
+
* @param slug - Raw article type slug (e.g. "motions", "week-ahead")
|
|
45
|
+
* @param lang - Target language code
|
|
46
|
+
* @returns Localized label without icon (e.g. "Plenary Votes & Resolutions")
|
|
47
|
+
*/
|
|
48
|
+
function getLocalizedArticleTypePlain(slug, lang) {
|
|
49
|
+
const labels = getLocalizedString(ARTICLE_TYPE_LABELS, lang);
|
|
50
|
+
return labels[slug] ?? slug.replace(/-/g, ' ');
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Google's NewsArticle structured-data validator hard-caps the
|
|
54
|
+
* `headline` field at 110 characters. Page `<title>` can be longer
|
|
55
|
+
* (we already truncate to a higher limit in
|
|
56
|
+
* `article-metadata.ts::truncateTitle`), but the JSON-LD headline
|
|
57
|
+
* needs its own, tighter cap or the article loses Top Stories
|
|
58
|
+
* carousel eligibility.
|
|
59
|
+
*
|
|
60
|
+
* Truncation prefers the last sentence boundary or em-dash within
|
|
61
|
+
* the 110-char window so we don't slice through a noun phrase.
|
|
62
|
+
*
|
|
63
|
+
* @param title - Resolved article title (already escaped-safe text)
|
|
64
|
+
* @returns Headline ≤ 110 characters, suitable for `NewsArticle.headline`
|
|
65
|
+
*/
|
|
66
|
+
const HEADLINE_LIMIT = 110;
|
|
67
|
+
function truncateHeadline(title) {
|
|
68
|
+
const trimmed = title.trim();
|
|
69
|
+
if (trimmed.length <= HEADLINE_LIMIT)
|
|
70
|
+
return trimmed;
|
|
71
|
+
// Prefer the last em-dash, en-dash, colon, or sentence boundary
|
|
72
|
+
// before the limit so the truncated headline still reads as a
|
|
73
|
+
// self-contained phrase.
|
|
74
|
+
const window = trimmed.slice(0, HEADLINE_LIMIT);
|
|
75
|
+
const breakIdx = Math.max(window.lastIndexOf(' — '), window.lastIndexOf(' – '), window.lastIndexOf(': '), window.lastIndexOf('. '), window.lastIndexOf(' '));
|
|
76
|
+
return breakIdx > 60 ? window.slice(0, breakIdx).trimEnd() : window.trimEnd();
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Build the localized `<title>` separator for the
|
|
80
|
+
* `{articleTitle} {sep} {siteTitle}` pattern. LTR locales use the
|
|
81
|
+
* right-pointing guillemet (»); RTL locales (Arabic, Hebrew) use the
|
|
82
|
+
* left-pointing guillemet («) so the visual hierarchy reads from the
|
|
83
|
+
* primary title towards the site name without breaking bidi flow.
|
|
84
|
+
*
|
|
85
|
+
* The previous em-dash separator collided with em-dashes inside
|
|
86
|
+
* article titles (the editorial style uses `Title — Subtitle`) and
|
|
87
|
+
* rendered ambiguously in screen readers.
|
|
88
|
+
*
|
|
89
|
+
* @param lang - Target language code
|
|
90
|
+
* @returns `" » "` for LTR locales, `" « "` for RTL
|
|
91
|
+
*/
|
|
92
|
+
function getTitleSeparator(lang) {
|
|
93
|
+
return getTextDirection(lang) === 'rtl' ? ' « ' : ' » ';
|
|
94
|
+
}
|
|
35
95
|
/**
|
|
36
96
|
* Resolve a localized article type label with icon. Falls back to the
|
|
37
97
|
* humanised slug when a translation isn't available.
|
|
@@ -879,24 +939,69 @@ export function wrapArticleHtml(options) {
|
|
|
879
939
|
: '';
|
|
880
940
|
const tocHtml = buildArticleToc(options.toc ?? [], safeLang);
|
|
881
941
|
const articleMainClass = tocHtml.length > 0 ? 'article-main--with-toc' : 'article-main--no-toc';
|
|
942
|
+
const articleSectionLabel = getLocalizedArticleTypePlain(options.articleType, safeLang);
|
|
943
|
+
// Count words from the rendered body for the JSON-LD `wordCount`
|
|
944
|
+
// field (Google's NewsArticle structured-data validator emits a
|
|
945
|
+
// warning when this is missing). Done by stripping HTML tags from
|
|
946
|
+
// the rendered body then splitting on whitespace — fast and
|
|
947
|
+
// CodeQL-safe.
|
|
948
|
+
const bodyText = stripHtmlTags(options.body);
|
|
949
|
+
const wordCount = bodyText.split(/\s+/u).filter((w) => w.length > 0).length;
|
|
950
|
+
// Build the JSON-LD image graph. Google requires NewsArticle.image
|
|
951
|
+
// to be an array (or single ImageObject) with explicit width/height
|
|
952
|
+
// covering at least one of the 1:1, 4:3, 16:9 aspect ratios for
|
|
953
|
+
// Top Stories carousel eligibility.
|
|
954
|
+
const jsonLdImages = [
|
|
955
|
+
{
|
|
956
|
+
'@type': 'ImageObject',
|
|
957
|
+
url: `${BASE_URL}/images/og-image-1200.jpg`,
|
|
958
|
+
width: 1200,
|
|
959
|
+
height: 630,
|
|
960
|
+
},
|
|
961
|
+
{
|
|
962
|
+
'@type': 'ImageObject',
|
|
963
|
+
url: `${BASE_URL}/images/og-image-1200.webp`,
|
|
964
|
+
width: 1200,
|
|
965
|
+
height: 630,
|
|
966
|
+
},
|
|
967
|
+
{
|
|
968
|
+
'@type': 'ImageObject',
|
|
969
|
+
url: `${BASE_URL}/images/og-image-1200.avif`,
|
|
970
|
+
width: 1200,
|
|
971
|
+
height: 630,
|
|
972
|
+
},
|
|
973
|
+
];
|
|
882
974
|
const jsonLd = {
|
|
883
975
|
'@context': 'https://schema.org',
|
|
884
976
|
'@type': 'NewsArticle',
|
|
885
|
-
headline: options.title,
|
|
977
|
+
headline: truncateHeadline(options.title),
|
|
886
978
|
description: options.description,
|
|
887
979
|
datePublished: options.date,
|
|
888
980
|
dateModified: options.date,
|
|
889
981
|
inLanguage: safeLang,
|
|
890
982
|
url: canonicalUrl,
|
|
891
|
-
|
|
892
|
-
|
|
983
|
+
mainEntityOfPage: { '@type': 'WebPage', '@id': canonicalUrl },
|
|
984
|
+
image: jsonLdImages,
|
|
985
|
+
author: {
|
|
986
|
+
'@type': 'NewsMediaOrganization',
|
|
987
|
+
name: PUBLISHER_NAME,
|
|
988
|
+
url: 'https://hack23.com',
|
|
989
|
+
sameAs: [...ORG_SAME_AS],
|
|
990
|
+
},
|
|
893
991
|
publisher: {
|
|
894
|
-
'@type': '
|
|
992
|
+
'@type': 'NewsMediaOrganization',
|
|
895
993
|
name: PUBLISHER_NAME,
|
|
896
994
|
url: 'https://hack23.com',
|
|
897
995
|
logo: { '@type': 'ImageObject', url: `${BASE_URL}/images/apple-touch-icon.png` },
|
|
996
|
+
sameAs: [...ORG_SAME_AS],
|
|
997
|
+
},
|
|
998
|
+
articleSection: articleSectionLabel,
|
|
999
|
+
wordCount,
|
|
1000
|
+
keywords: (options.keywords ?? []).join(', '),
|
|
1001
|
+
speakable: {
|
|
1002
|
+
'@type': 'SpeakableSpecification',
|
|
1003
|
+
cssSelector: ['.article-dek', '.article-body > p:first-of-type'],
|
|
898
1004
|
},
|
|
899
|
-
articleSection: options.articleType,
|
|
900
1005
|
isPartOf: {
|
|
901
1006
|
'@type': 'WebSite',
|
|
902
1007
|
name: SITE_NAME,
|
|
@@ -921,7 +1026,7 @@ export function wrapArticleHtml(options) {
|
|
|
921
1026
|
{
|
|
922
1027
|
'@type': 'ListItem',
|
|
923
1028
|
position: 2,
|
|
924
|
-
name:
|
|
1029
|
+
name: articleSectionLabel,
|
|
925
1030
|
item: `${BASE_URL}/news/`,
|
|
926
1031
|
},
|
|
927
1032
|
{
|
|
@@ -934,11 +1039,21 @@ export function wrapArticleHtml(options) {
|
|
|
934
1039
|
};
|
|
935
1040
|
const structuredData = [jsonLd, breadcrumbLd];
|
|
936
1041
|
const jsonLdString = JSON.stringify(structuredData).replace(/</g, '\\u003c');
|
|
937
|
-
const pageTitle = `${options.title}
|
|
1042
|
+
const pageTitle = `${options.title}${getTitleSeparator(safeLang)}${siteTitle}`;
|
|
938
1043
|
const keywords = (options.keywords ?? []).map((keyword) => keyword.trim()).filter(Boolean);
|
|
939
1044
|
const keywordsMeta = keywords.length > 0
|
|
940
1045
|
? ` <meta name="keywords" content="${escapeHTML(keywords.join(', '))}">\n`
|
|
941
1046
|
: '';
|
|
1047
|
+
// Use the longer extended description for og:description/twitter:description
|
|
1048
|
+
// when available so social-card previews show the full BLUF
|
|
1049
|
+
// paragraph; the short meta description stays within Google's
|
|
1050
|
+
// ~160-char snippet budget.
|
|
1051
|
+
const socialDescription = options.extendedDescription && options.extendedDescription.length > 0
|
|
1052
|
+
? options.extendedDescription
|
|
1053
|
+
: options.description;
|
|
1054
|
+
const ogLocaleTags = buildOgLocaleTags(safeLang);
|
|
1055
|
+
const twitterAttribution = buildTwitterAttributionTags();
|
|
1056
|
+
const twitterAttributionBlock = twitterAttribution ? `\n${twitterAttribution}` : '';
|
|
942
1057
|
const header = buildSiteHeader({
|
|
943
1058
|
lang: safeLang,
|
|
944
1059
|
pathPrefix: '../',
|
|
@@ -956,23 +1071,29 @@ export function wrapArticleHtml(options) {
|
|
|
956
1071
|
<meta name="referrer" content="no-referrer">
|
|
957
1072
|
<title>${escapeHTML(pageTitle)}</title>
|
|
958
1073
|
<meta name="description" content="${escapeHTML(options.description)}">
|
|
959
|
-
${keywordsMeta} <meta name="robots" content="index, follow, max-image-preview:large">
|
|
1074
|
+
${keywordsMeta} <meta name="robots" content="index, follow, max-snippet:-1, max-image-preview:large">
|
|
960
1075
|
<meta name="author" content="${PUBLISHER_NAME}">
|
|
961
1076
|
<meta name="publisher" content="${PUBLISHER_NAME}">
|
|
962
1077
|
<meta name="date" content="${options.date}">
|
|
963
|
-
<meta
|
|
1078
|
+
<meta property="article:published_time" content="${options.date}">
|
|
1079
|
+
<meta property="article:modified_time" content="${options.date}">
|
|
1080
|
+
<meta property="article:section" content="${escapeHTML(articleSectionLabel)}">
|
|
1081
|
+
<meta property="article:author" content="${PUBLISHER_NAME}">
|
|
1082
|
+
<meta property="article:publisher" content="https://hack23.com">
|
|
964
1083
|
<link rel="canonical" href="${canonicalUrl}">
|
|
965
1084
|
${hreflangLinks}
|
|
1085
|
+
<link rel="alternate" type="application/rss+xml" title="EU Parliament Monitor RSS" href="${BASE_URL}/rss.xml">
|
|
1086
|
+
<link rel="preconnect" href="https://hack23.com" crossorigin>
|
|
966
1087
|
<meta property="og:type" content="article">
|
|
967
1088
|
<meta property="og:title" content="${escapeHTML(options.title)}">
|
|
968
|
-
<meta property="og:description" content="${escapeHTML(
|
|
1089
|
+
<meta property="og:description" content="${escapeHTML(socialDescription)}">
|
|
969
1090
|
<meta property="og:url" content="${canonicalUrl}">
|
|
970
1091
|
<meta property="og:site_name" content="EU Parliament Monitor">
|
|
971
|
-
|
|
972
|
-
${buildResponsiveSocialImageMeta(`${options.title}
|
|
1092
|
+
${ogLocaleTags}
|
|
1093
|
+
${buildResponsiveSocialImageMeta(`${options.title}${getTitleSeparator(safeLang)}EU Parliament Monitor`)}
|
|
973
1094
|
<meta name="twitter:card" content="summary_large_image">
|
|
974
1095
|
<meta name="twitter:title" content="${escapeHTML(options.title)}">
|
|
975
|
-
<meta name="twitter:description" content="${escapeHTML(
|
|
1096
|
+
<meta name="twitter:description" content="${escapeHTML(socialDescription)}">${twitterAttributionBlock}
|
|
976
1097
|
${buildResponsiveIconLinks('../')}
|
|
977
1098
|
<link rel="manifest" href="../site.webmanifest">
|
|
978
1099
|
<meta name="color-scheme" content="light dark">
|
|
@@ -3,7 +3,30 @@ import type { LangTitleSubtitle, LanguageCode, LanguageMap } from '../types/inde
|
|
|
3
3
|
export interface ResolvedMetadataEntry {
|
|
4
4
|
readonly title: string;
|
|
5
5
|
readonly description: string;
|
|
6
|
+
/**
|
|
7
|
+
* Optional longer (up to ~300 chars) editorial summary lifted from
|
|
8
|
+
* the language-specific executive brief BLUF paragraph. Used for
|
|
9
|
+
* `og:description` and `twitter:description` so social-card previews
|
|
10
|
+
* can show the full Bottom-Line-Up-Front context, while the
|
|
11
|
+
* short `description` stays within Google's ~160-char snippet
|
|
12
|
+
* budget. Empty string when no longer summary is available — the
|
|
13
|
+
* caller should then fall back to {@link description}.
|
|
14
|
+
*/
|
|
15
|
+
readonly extendedDescription: string;
|
|
6
16
|
readonly keywords: readonly string[];
|
|
17
|
+
/**
|
|
18
|
+
* `"localized-brief"` when the title/description came from a translated
|
|
19
|
+
* `executive-brief_<lang>.md`; `"english-brief"` when the locale fell
|
|
20
|
+
* through to the English brief; `"english-editorial"` when the locale
|
|
21
|
+
* used an aggregated-Markdown / artefact source; `"template"` when only
|
|
22
|
+
* the localized type/date template was available. For `lang === 'en'`
|
|
23
|
+
* the value is always `"english-brief"` or `"english-editorial"` or
|
|
24
|
+
* `"template"` (no fall-through). Lets downstream consumers — the
|
|
25
|
+
* news-index, the static-site fallback note, the manifest-SEO
|
|
26
|
+
* validator — record the asymmetry called out in
|
|
27
|
+
* [`.github/prompts/04-article-generation.md`](../../.github/prompts/04-article-generation.md) § 6.2 priority 3.
|
|
28
|
+
*/
|
|
29
|
+
readonly source: 'manifest' | 'localized-brief' | 'english-brief' | 'english-editorial' | 'template';
|
|
7
30
|
}
|
|
8
31
|
/** Fully resolved metadata — one entry per supported language. */
|
|
9
32
|
export type ResolvedMetadata = LanguageMap<ResolvedMetadataEntry>;
|
|
@@ -50,69 +73,7 @@ export interface ResolveMetadataOptions {
|
|
|
50
73
|
*/
|
|
51
74
|
readonly runDir?: string;
|
|
52
75
|
}
|
|
53
|
-
|
|
54
|
-
* Return `true` when a line cannot serve as a prose description. Rejects
|
|
55
|
-
* Markdown structural lines (headings, blockquotes, tables, HTML),
|
|
56
|
-
* mermaid/chart directives, emoji-banner metadata rows, and the known
|
|
57
|
-
* `Key: value` banners that Stage-B agents emit as artefact preamble.
|
|
58
|
-
*
|
|
59
|
-
* @param line - Trimmed line from the aggregated Markdown source
|
|
60
|
-
* @returns `true` when the line is not prose and should be skipped
|
|
61
|
-
*/
|
|
62
|
-
export declare function shouldSkipDescriptionLine(line: string): boolean;
|
|
63
|
-
/**
|
|
64
|
-
* Strip inline Markdown decorations so we can use the remaining text as
|
|
65
|
-
* plain-text meta-tag content. Removes link syntax, emphasis, inline code
|
|
66
|
-
* backticks, and HTML-entity fragments that the Markdown source sometimes
|
|
67
|
-
* smuggles in. Keeps the visible text readable.
|
|
68
|
-
*
|
|
69
|
-
* @param raw - Trimmed Markdown line
|
|
70
|
-
* @returns Plain-text variant
|
|
71
|
-
*/
|
|
72
|
-
/**
|
|
73
|
-
* Strip a leading all-caps prose label (e.g. `SITUATION:`, `KEY MOTION:`,
|
|
74
|
-
* `BLUF:`, `BOTTOM LINE:`, `TIER-1:`) from a prose line. These labels
|
|
75
|
-
* are common in BLUF-style editorial writing — they survive
|
|
76
|
-
* {@link stripInlineMarkdown} (which strips the `**bold**` wrapper but
|
|
77
|
-
* keeps the literal text) and would otherwise leak into the SEO
|
|
78
|
-
* description as a confusing all-caps shout.
|
|
79
|
-
*
|
|
80
|
-
* Matches up to 4 hyphenated all-caps tokens, optionally followed by a
|
|
81
|
-
* digit suffix (`TIER-1`), terminating at a colon. Returns the original
|
|
82
|
-
* line when no opener is present.
|
|
83
|
-
*
|
|
84
|
-
* @param line - Plain prose line (post-{@link stripInlineMarkdown})
|
|
85
|
-
* @returns Line with the all-caps opener removed
|
|
86
|
-
*/
|
|
87
|
-
export declare function stripLeadingProseLabel(line: string): string;
|
|
88
|
-
/**
|
|
89
|
-
* Strip inline Markdown decorations so we can use the remaining text as
|
|
90
|
-
* plain-text meta-tag content. Removes link syntax, emphasis, inline code
|
|
91
|
-
* backticks, and HTML-entity fragments that the Markdown source sometimes
|
|
92
|
-
* smuggles in. Keeps the visible text readable.
|
|
93
|
-
*
|
|
94
|
-
* @param raw - Trimmed Markdown line
|
|
95
|
-
* @returns Plain-text variant
|
|
96
|
-
*/
|
|
97
|
-
export declare function stripInlineMarkdown(raw: string): string;
|
|
98
|
-
/**
|
|
99
|
-
* Clamp a string to `DESCRIPTION_MAX_LENGTH` characters, appending
|
|
100
|
-
* an ellipsis when truncation actually happens. Does not break words if
|
|
101
|
-
* avoidable — a trailing partial word is trimmed back to the previous
|
|
102
|
-
* space first.
|
|
103
|
-
*
|
|
104
|
-
* @param text - Raw description text
|
|
105
|
-
* @returns Truncated description with trailing ellipsis when clipped
|
|
106
|
-
*/
|
|
107
|
-
export declare function truncateDescription(text: string): string;
|
|
108
|
-
/**
|
|
109
|
-
* Clamp a title to `TITLE_MAX_LENGTH` characters in the same
|
|
110
|
-
* word-boundary-preserving fashion as {@link truncateDescription}.
|
|
111
|
-
*
|
|
112
|
-
* @param text - Raw title text
|
|
113
|
-
* @returns Truncated title with trailing ellipsis when clipped
|
|
114
|
-
*/
|
|
115
|
-
export declare function truncateTitle(text: string): string;
|
|
76
|
+
export { shouldSkipDescriptionLine, stripLeadingProseLabel, stripInlineMarkdown, truncateDescription, truncateExtendedDescription, truncateTitle, extractFirstSentence, } from './metadata/text-utils.js';
|
|
116
77
|
/**
|
|
117
78
|
* Return the first Markdown H1 (`# …`) in the supplied text, stripped of
|
|
118
79
|
* the leading `#` and trailing anchor syntax. Returns an empty string when
|
|
@@ -123,30 +84,33 @@ export declare function truncateTitle(text: string): string;
|
|
|
123
84
|
*/
|
|
124
85
|
export declare function extractFirstH1(markdown: string): string;
|
|
125
86
|
/**
|
|
126
|
-
* Walk every line of the Markdown source and return the first
|
|
127
|
-
* survives {@link shouldSkipDescriptionLine}.
|
|
128
|
-
*
|
|
87
|
+
* Walk every line of the Markdown source and return the first paragraph
|
|
88
|
+
* that survives {@link shouldSkipDescriptionLine}. Consecutive non-blank
|
|
89
|
+
* prose lines are joined with a single space so hard-wrapped ledes
|
|
90
|
+
* (column-95 conventional wrap) produce a clean 140-180-character
|
|
91
|
+
* description rather than just the first 60-90-char line.
|
|
92
|
+
*
|
|
93
|
+
* Inline Markdown decorations are stripped and the result is truncated
|
|
94
|
+
* to fit `<meta description>`.
|
|
129
95
|
*
|
|
130
96
|
* @param markdown - Markdown source
|
|
131
97
|
* @returns Prose description, or empty string when nothing qualifies
|
|
132
98
|
*/
|
|
133
99
|
export declare function extractStrongProseLine(markdown: string): string;
|
|
100
|
+
export declare function extractLedeAfterHeading(markdown: string): string;
|
|
134
101
|
/**
|
|
135
|
-
*
|
|
136
|
-
*
|
|
137
|
-
*
|
|
138
|
-
*
|
|
139
|
-
*
|
|
140
|
-
* the
|
|
141
|
-
*
|
|
142
|
-
* Returns the empty string when no lede heading is found or no qualifying
|
|
143
|
-
* prose follows it. Inline Markdown is stripped and the result is
|
|
144
|
-
* truncated to fit `<meta description>`.
|
|
102
|
+
* Same parsing rules as {@link extractLedeAfterHeading} but with a
|
|
103
|
+
* larger byte budget so the full BLUF paragraph (typically 200-300
|
|
104
|
+
* characters in the editorial style guide) is captured for use as
|
|
105
|
+
* `og:description` / `twitter:description`. Returns the joined
|
|
106
|
+
* paragraph clamped via {@link truncateExtendedDescription} (which
|
|
107
|
+
* returns `''` when the result wouldn't be longer than the regular
|
|
108
|
+
* meta description).
|
|
145
109
|
*
|
|
146
|
-
* @param markdown -
|
|
147
|
-
* @returns
|
|
110
|
+
* @param markdown - Brief body (SPDX preamble already stripped)
|
|
111
|
+
* @returns Extended lede paragraph, or `''` when not worth emitting
|
|
148
112
|
*/
|
|
149
|
-
export declare function
|
|
113
|
+
export declare function extractExtendedLedeAfterHeading(markdown: string): string;
|
|
150
114
|
/**
|
|
151
115
|
* Return `true` when an artefact-H1 begins with one of the
|
|
152
116
|
* `ARTIFACT_CATEGORY_PREFIXES` followed by a separator. Such H1s
|
|
@@ -217,6 +181,48 @@ export declare function extractArtifactHighlight(runDir: string, articleType: st
|
|
|
217
181
|
readonly headline: string;
|
|
218
182
|
readonly summary: string;
|
|
219
183
|
} | null;
|
|
184
|
+
/**
|
|
185
|
+
* Return `true` when a top-level `.md` filename looks like a translated
|
|
186
|
+
* sibling of a canonical editorial artefact (e.g.
|
|
187
|
+
* `executive-brief_ar.md`). These files must be excluded from the
|
|
188
|
+
* top-level fallback scan in {@link extractArtifactHighlight} because
|
|
189
|
+
* their localized H1s evade the English-only generic-heading detector
|
|
190
|
+
* and would otherwise hijack the English SEO surfaces.
|
|
191
|
+
*
|
|
192
|
+
* @param filename - Run-relative `.md` filename (no path separators)
|
|
193
|
+
* @returns `true` when the file is a translated sibling brief
|
|
194
|
+
*/
|
|
195
|
+
export declare function isTranslatedSiblingBrief(filename: string): boolean;
|
|
196
|
+
/**
|
|
197
|
+
* Mine the FIRST named priority finding from an executive-brief–style
|
|
198
|
+
* artefact body. Looks for a section heading from
|
|
199
|
+
* {@link PRIORITY_FINDING_SECTION_HEADINGS} and returns the first dossier
|
|
200
|
+
* name + descriptive paragraph found inside it. Supports the three
|
|
201
|
+
* canonical Stage-B authoring patterns:
|
|
202
|
+
*
|
|
203
|
+
* 1. **Bold-in-numbered-list** (breaking briefs):
|
|
204
|
+
* `1. **Digital Markets Act Enforcement** (TA-10-2026-0160, 2026-04-30)`
|
|
205
|
+
* ` Parliament adopted a resolution …`
|
|
206
|
+
* 2. **Numbered subheading** (committee briefs):
|
|
207
|
+
* `### 1. Clean Industrial Deal Implementation (ITRE/ENVI)`
|
|
208
|
+
* `The Clean Industrial Deal framework …`
|
|
209
|
+
* 3. **Bold-leading paragraph** (synthesis variants):
|
|
210
|
+
* `**Trigger 1: DMA Enforcement Resolution** (TA-10-2026-0160)`
|
|
211
|
+
* `- Significance: 🟢 HIGH IMPACT …`
|
|
212
|
+
*
|
|
213
|
+
* Trailing parenthesised metadata (`(TA-10-2026-0160, 2026-04-30)`,
|
|
214
|
+
* `(ITRE/ENVI)`) is stripped from the headline so it stays headline-shaped
|
|
215
|
+
* (`Digital Markets Act Enforcement`) rather than boilerplate
|
|
216
|
+
* (`Digital Markets Act Enforcement (TA-10-2026-0160, 2026-04-30)`).
|
|
217
|
+
*
|
|
218
|
+
* @param body - Editorial artefact body
|
|
219
|
+
* @returns `{headline, summary}` when a priority finding was identified;
|
|
220
|
+
* `null` when the body has no priority section or no usable item inside
|
|
221
|
+
*/
|
|
222
|
+
export declare function extractPriorityFindingHighlight(body: string): {
|
|
223
|
+
readonly headline: string;
|
|
224
|
+
readonly summary: string;
|
|
225
|
+
} | null;
|
|
220
226
|
/**
|
|
221
227
|
* Build the per-language `{title, description}` pair using the
|
|
222
228
|
* article-type–specific `*_TITLES` generator from
|
|
@@ -230,77 +236,7 @@ export declare function extractArtifactHighlight(runDir: string, articleType: st
|
|
|
230
236
|
* @returns Per-language `LangTitleSubtitle`
|
|
231
237
|
*/
|
|
232
238
|
export declare function buildTemplateFallback(articleType: string, date: string, committee?: string): LanguageMap<LangTitleSubtitle>;
|
|
233
|
-
|
|
234
|
-
* Parse an ISO date and return the `[start, end]` week range as ISO
|
|
235
|
-
* strings. Week starts on Monday and ends on the following Sunday.
|
|
236
|
-
*
|
|
237
|
-
* @param date - ISO date string (`YYYY-MM-DD`)
|
|
238
|
-
* @returns `{ start, end }` both in `YYYY-MM-DD` form
|
|
239
|
-
*/
|
|
240
|
-
export declare function deriveWeekRange(date: string): {
|
|
241
|
-
readonly start: string;
|
|
242
|
-
readonly end: string;
|
|
243
|
-
};
|
|
244
|
-
/**
|
|
245
|
-
* Return the D-36 → D-8 reporting window for the `week-in-review`
|
|
246
|
-
* article type. EP roll-call voting data is published with a 2–6 week
|
|
247
|
-
* lag, so using the most-recent 7 days structurally produces a
|
|
248
|
-
* vote-empty dataset. Shifting 8 days back and widening to 28 days
|
|
249
|
-
* (start = D-36, end = D-8) ensures the window always contains at
|
|
250
|
-
* least one full EP plenary week with published roll-call data
|
|
251
|
-
* (ADR-006). Direction is consistent with the workflow's
|
|
252
|
-
* `DATE_FROM` (start = D-36) → `DATE_TO` (end = D-8) variables.
|
|
253
|
-
*
|
|
254
|
-
* @param date - ISO article date string (`YYYY-MM-DD`) — typically TODAY
|
|
255
|
-
* @returns `{ start: D-36, end: D-8 }` both as `YYYY-MM-DD` ISO strings
|
|
256
|
-
*/
|
|
257
|
-
export declare function deriveReportingWindowForWeekInReview(date: string): {
|
|
258
|
-
readonly start: string;
|
|
259
|
-
readonly end: string;
|
|
260
|
-
};
|
|
261
|
-
/**
|
|
262
|
-
* Return a human-friendly month label for an ISO date — English month
|
|
263
|
-
* name + four-digit year (e.g. `April 2026`). The non-English template
|
|
264
|
-
* generators accept this same label verbatim because they interpolate it
|
|
265
|
-
* into a localized sentence rather than translating the month itself.
|
|
266
|
-
*
|
|
267
|
-
* @param date - ISO date string
|
|
268
|
-
* @returns Month label, or the input when parsing fails
|
|
269
|
-
*/
|
|
270
|
-
export declare function deriveMonthLabel(date: string): string;
|
|
271
|
-
/**
|
|
272
|
-
* Return a quarter label for an ISO date — `Q<n> <YYYY>` (e.g. `Q2 2026`).
|
|
273
|
-
* Used by `quarter-ahead` and `quarter-in-review` title generators.
|
|
274
|
-
*
|
|
275
|
-
* @param date - ISO date string
|
|
276
|
-
* @returns Quarter label, or the input when parsing fails
|
|
277
|
-
*/
|
|
278
|
-
export declare function deriveQuarterLabel(date: string): string;
|
|
279
|
-
/**
|
|
280
|
-
* Return a four-digit year label for an ISO date. Used by `year-ahead`
|
|
281
|
-
* and `year-in-review` title generators.
|
|
282
|
-
*
|
|
283
|
-
* @param date - ISO date string
|
|
284
|
-
* @returns Year label, or the input when parsing fails
|
|
285
|
-
*/
|
|
286
|
-
export declare function deriveYearLabel(date: string): string;
|
|
287
|
-
/**
|
|
288
|
-
* Return the EP-term label for an ISO date — `EP10 → 2029` or `EP11 → 2034`.
|
|
289
|
-
* Used by `term-outlook` title generator.
|
|
290
|
-
*
|
|
291
|
-
* @param date - ISO date string
|
|
292
|
-
* @returns Term label, or the input when parsing fails
|
|
293
|
-
*/
|
|
294
|
-
export declare function deriveTermLabel(date: string): string;
|
|
295
|
-
/**
|
|
296
|
-
* Return the election-cycle label for an ISO date — pairs the outgoing
|
|
297
|
-
* and incoming EP terms with the election year (e.g. `EP10 → EP11 (2029)`).
|
|
298
|
-
* Used by the `election-cycle` title generator.
|
|
299
|
-
*
|
|
300
|
-
* @param date - ISO date string
|
|
301
|
-
* @returns Cycle label, or the input when parsing fails
|
|
302
|
-
*/
|
|
303
|
-
export declare function deriveElectionCycleLabel(date: string): string;
|
|
239
|
+
export { deriveWeekRange, deriveReportingWindowForWeekInReview, deriveMonthLabel, deriveQuarterLabel, deriveYearLabel, deriveTermLabel, deriveElectionCycleLabel, } from './metadata/date-labels.js';
|
|
304
240
|
/**
|
|
305
241
|
* Build a stable, localized keyword list from the article type plus the
|
|
306
242
|
* resolved title/description context.
|