euparliamentmonitor 0.9.12 → 0.9.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "euparliamentmonitor",
3
- "version": "0.9.12",
3
+ "version": "0.9.13",
4
4
  "type": "module",
5
5
  "description": "European Parliament Intelligence Platform - Monitor political activity with systematic transparency",
6
6
  "main": "scripts/index.js",
@@ -63,6 +63,9 @@
63
63
  "build:check-tests": "tsc --project tsconfig.test.json --noEmit",
64
64
  "copy-vendor": "node scripts/copy-vendor.js",
65
65
  "validate-analysis": "node scripts/validate-analysis-completeness.js",
66
+ "discover:untranslated-briefs": "node scripts/discover-untranslated-briefs.js",
67
+ "validate:translations": "node scripts/validate-brief-translations.js",
68
+ "validate:manifest-seo": "node scripts/validate-manifest-seo.js",
66
69
  "sync:templates": "node scripts/templates/sync-template-frontmatter.js",
67
70
  "sync:templates:check": "node scripts/templates/sync-template-frontmatter.js --check",
68
71
  "prior-run-diff": "node scripts/aggregator/prior-run-diff.js",
@@ -169,9 +172,9 @@
169
172
  "html-minifier-terser": "^7.2.0",
170
173
  "htmlhint": "1.9.2",
171
174
  "husky": "9.1.7",
172
- "jscpd": "4.2.1",
175
+ "jscpd": "4.2.2",
173
176
  "knip": "^6.7.0",
174
- "lint-staged": "17.0.4",
177
+ "lint-staged": "17.0.5",
175
178
  "mermaid": "11.15.0",
176
179
  "papaparse": "5.5.3",
177
180
  "prettier": "3.8.3",
@@ -18,6 +18,10 @@ import { buildKeyTakeaways, KEY_TAKEAWAYS_SECTION_ID, KEY_TAKEAWAYS_SECTION_TITL
18
18
  import { flattenManifestFiles as _flattenManifestFiles, latestGateResult as _latestGateResult, resolveArticleType as _resolveArticleType, resolveRunId as _resolveRunId, } from './manifest/index.js';
19
19
  import { READER_GUIDE_SECTION_ID, READER_GUIDE_SECTION_IDS, READER_GUIDE_SECTION_TITLE, } from './reader-guide-constants.js';
20
20
  export { READER_GUIDE_SECTION_ID, READER_GUIDE_SECTION_IDS, READER_GUIDE_SECTION_TITLE, } from './reader-guide-constants.js';
21
+ const TRADECRAFT_EXCLUDED_FILES = new Set([
22
+ 'analysis/methodologies/executive-brief-translation-guide.md',
23
+ 'analysis/templates/executive-brief-translation-template.md',
24
+ ]);
21
25
  /**
22
26
  * Normalise `manifest.files` into a flat list of `runRelPath` strings.
23
27
  *
@@ -94,8 +98,9 @@ export function discoverTradecraftFiles(repoRoot) {
94
98
  continue;
95
99
  const entries = fs.readdirSync(dir, { withFileTypes: true });
96
100
  for (const entry of entries) {
97
- if (entry.isFile() && entry.name.endsWith('.md')) {
98
- result.push(`${sub}/${entry.name}`);
101
+ const rel = `${sub}/${entry.name}`;
102
+ if (entry.isFile() && entry.name.endsWith('.md') && !TRADECRAFT_EXCLUDED_FILES.has(rel)) {
103
+ result.push(rel);
99
104
  }
100
105
  }
101
106
  }
@@ -444,7 +444,7 @@ function getMetadataEntry(map, lang) {
444
444
  return descriptor.value;
445
445
  }
446
446
  const en = Object.getOwnPropertyDescriptor(map, 'en')?.value;
447
- return en ?? { title: '', description: '', keywords: [] };
447
+ return en ?? { title: '', description: '', keywords: [], source: 'template' };
448
448
  }
449
449
  /**
450
450
  * Count the number of articles the site currently publishes, derived
@@ -699,6 +699,7 @@ function applyCliOverrides(base, titleOverride, descriptionOverride) {
699
699
  title: titleOverride ?? entry.title,
700
700
  description: descriptionOverride ?? entry.description,
701
701
  keywords: entry.keywords,
702
+ source: titleOverride || descriptionOverride ? 'manifest' : entry.source,
702
703
  },
703
704
  enumerable: true,
704
705
  writable: true,
@@ -4,6 +4,19 @@ export interface ResolvedMetadataEntry {
4
4
  readonly title: string;
5
5
  readonly description: string;
6
6
  readonly keywords: readonly string[];
7
+ /**
8
+ * `"localized-brief"` when the title/description came from a translated
9
+ * `executive-brief_<lang>.md`; `"english-brief"` when the locale fell
10
+ * through to the English brief; `"english-editorial"` when the locale
11
+ * used an aggregated-Markdown / artefact source; `"template"` when only
12
+ * the localized type/date template was available. For `lang === 'en'`
13
+ * the value is always `"english-brief"` or `"english-editorial"` or
14
+ * `"template"` (no fall-through). Lets downstream consumers — the
15
+ * news-index, the static-site fallback note, the manifest-SEO
16
+ * validator — record the asymmetry called out in
17
+ * [`.github/prompts/04-article-generation.md`](../../.github/prompts/04-article-generation.md) § 6.2 priority 3.
18
+ */
19
+ readonly source: 'manifest' | 'localized-brief' | 'english-brief' | 'english-editorial' | 'template';
7
20
  }
8
21
  /** Fully resolved metadata — one entry per supported language. */
9
22
  export type ResolvedMetadata = LanguageMap<ResolvedMetadataEntry>;
@@ -113,6 +126,33 @@ export declare function truncateDescription(text: string): string;
113
126
  * @returns Truncated title with trailing ellipsis when clipped
114
127
  */
115
128
  export declare function truncateTitle(text: string): string;
129
+ /**
130
+ * Return the first complete sentence from a prose paragraph, suitable
131
+ * for use as a fallback editorial title when the artefact H1 is
132
+ * categorical (e.g. `# EU Parliament Committee Reports`) and the
133
+ * resolver must derive `<title>` from the BLUF / lede summary instead.
134
+ *
135
+ * A "sentence" is the prefix up to the first sentence-terminator
136
+ * (`. `, `! `, `? `, `; `) inside the `[HEADLINE_SOFT_MIN,
137
+ * TITLE_MAX_LENGTH]` window. Common abbreviations (`Q1.`, `Q2.`,
138
+ * `H1.`, `H2.`, `Mr.`, `Mrs.`, `e.g.`, `i.e.`, `vs.`) are skipped
139
+ * so they don't terminate the sentence prematurely. When no
140
+ * acceptable terminator exists in the window, returns the entire
141
+ * input unchanged so {@link truncateTitle} can handle clause-boundary
142
+ * truncation downstream.
143
+ *
144
+ * This produces journalistically clean titles even for the
145
+ * propositions / committee-reports cases where the BLUF paragraph
146
+ * opens with a single long sentence that exceeds 140 chars —
147
+ * `truncateTitle` then breaks on a clause boundary, and the result is
148
+ * still grammatical because the input was a sentence prefix rather
149
+ * than an arbitrary paragraph slice.
150
+ *
151
+ * @param paragraph - Prose paragraph (post-{@link stripInlineMarkdown})
152
+ * @returns First sentence, or the original paragraph when none can be
153
+ * identified within the soft-min window
154
+ */
155
+ export declare function extractFirstSentence(paragraph: string): string;
116
156
  /**
117
157
  * Return the first Markdown H1 (`# …`) in the supplied text, stripped of
118
158
  * the leading `#` and trailing anchor syntax. Returns an empty string when
@@ -123,29 +163,19 @@ export declare function truncateTitle(text: string): string;
123
163
  */
124
164
  export declare function extractFirstH1(markdown: string): string;
125
165
  /**
126
- * Walk every line of the Markdown source and return the first line that
127
- * survives {@link shouldSkipDescriptionLine}. Inline Markdown decorations
128
- * are stripped and the result is truncated to fit `<meta description>`.
166
+ * Walk every line of the Markdown source and return the first paragraph
167
+ * that survives {@link shouldSkipDescriptionLine}. Consecutive non-blank
168
+ * prose lines are joined with a single space so hard-wrapped ledes
169
+ * (column-95 conventional wrap) produce a clean 140-180-character
170
+ * description rather than just the first 60-90-char line.
171
+ *
172
+ * Inline Markdown decorations are stripped and the result is truncated
173
+ * to fit `<meta description>`.
129
174
  *
130
175
  * @param markdown - Markdown source
131
176
  * @returns Prose description, or empty string when nothing qualifies
132
177
  */
133
178
  export declare function extractStrongProseLine(markdown: string): string;
134
- /**
135
- * Walk the body of an editorial artefact and, when it contains a `## …`
136
- * heading whose text matches one of `EDITORIAL_LEDE_HEADINGS`,
137
- * return the first prose paragraph that follows that heading. This is
138
- * the journalist's lede ("60-Second Read", "TL;DR", "BLUF — …", …) and
139
- * is exactly the sentence that should power `<meta description>` and
140
- * the OG/Twitter description fields.
141
- *
142
- * Returns the empty string when no lede heading is found or no qualifying
143
- * prose follows it. Inline Markdown is stripped and the result is
144
- * truncated to fit `<meta description>`.
145
- *
146
- * @param markdown - Editorial artefact source
147
- * @returns Lede paragraph, or empty string when none matched
148
- */
149
179
  export declare function extractLedeAfterHeading(markdown: string): string;
150
180
  /**
151
181
  * Return `true` when an artefact-H1 begins with one of the
@@ -217,6 +247,48 @@ export declare function extractArtifactHighlight(runDir: string, articleType: st
217
247
  readonly headline: string;
218
248
  readonly summary: string;
219
249
  } | null;
250
+ /**
251
+ * Return `true` when a top-level `.md` filename looks like a translated
252
+ * sibling of a canonical editorial artefact (e.g.
253
+ * `executive-brief_ar.md`). These files must be excluded from the
254
+ * top-level fallback scan in {@link extractArtifactHighlight} because
255
+ * their localized H1s evade the English-only generic-heading detector
256
+ * and would otherwise hijack the English SEO surfaces.
257
+ *
258
+ * @param filename - Run-relative `.md` filename (no path separators)
259
+ * @returns `true` when the file is a translated sibling brief
260
+ */
261
+ export declare function isTranslatedSiblingBrief(filename: string): boolean;
262
+ /**
263
+ * Mine the FIRST named priority finding from an executive-brief–style
264
+ * artefact body. Looks for a section heading from
265
+ * {@link PRIORITY_FINDING_SECTION_HEADINGS} and returns the first dossier
266
+ * name + descriptive paragraph found inside it. Supports the three
267
+ * canonical Stage-B authoring patterns:
268
+ *
269
+ * 1. **Bold-in-numbered-list** (breaking briefs):
270
+ * `1. **Digital Markets Act Enforcement** (TA-10-2026-0160, 2026-04-30)`
271
+ * ` Parliament adopted a resolution …`
272
+ * 2. **Numbered subheading** (committee briefs):
273
+ * `### 1. Clean Industrial Deal Implementation (ITRE/ENVI)`
274
+ * `The Clean Industrial Deal framework …`
275
+ * 3. **Bold-leading paragraph** (synthesis variants):
276
+ * `**Trigger 1: DMA Enforcement Resolution** (TA-10-2026-0160)`
277
+ * `- Significance: 🟢 HIGH IMPACT …`
278
+ *
279
+ * Trailing parenthesised metadata (`(TA-10-2026-0160, 2026-04-30)`,
280
+ * `(ITRE/ENVI)`) is stripped from the headline so it stays headline-shaped
281
+ * (`Digital Markets Act Enforcement`) rather than boilerplate
282
+ * (`Digital Markets Act Enforcement (TA-10-2026-0160, 2026-04-30)`).
283
+ *
284
+ * @param body - Editorial artefact body
285
+ * @returns `{headline, summary}` when a priority finding was identified;
286
+ * `null` when the body has no priority section or no usable item inside
287
+ */
288
+ export declare function extractPriorityFindingHighlight(body: string): {
289
+ readonly headline: string;
290
+ readonly summary: string;
291
+ } | null;
220
292
  /**
221
293
  * Build the per-language `{title, description}` pair using the
222
294
  * article-type–specific `*_TITLES` generator from