euparliamentmonitor 0.8.50 → 0.8.52
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/package.json +8 -11
- package/scripts/aggregator/analysis-aggregator.d.ts +4 -26
- package/scripts/aggregator/analysis-aggregator.js +2 -2
- package/scripts/aggregator/article-generator.d.ts +2 -2
- package/scripts/aggregator/article-generator.js +1 -1
- package/scripts/aggregator/article-html.js +6 -1
- package/scripts/aggregator/article-metadata.d.ts +4 -4
- package/scripts/aggregator/article-metadata.js +2 -2
- package/scripts/aggregator/cli/parse.d.ts +1 -1
- package/scripts/aggregator/cli/parse.js +2 -2
- package/scripts/aggregator/infra/github-urls.d.ts +1 -1
- package/scripts/aggregator/infra/github-urls.js +1 -1
- package/scripts/aggregator/manifest/resolver.d.ts +2 -2
- package/scripts/aggregator/manifest/resolver.js +2 -2
- package/scripts/aggregator/manifest/types.d.ts +10 -8
- package/scripts/aggregator/prior-run-diff.js +52 -20
- package/scripts/aggregator/runs/discover.d.ts +1 -1
- package/scripts/aggregator/runs/discover.js +1 -1
- package/scripts/backport-article-seo.js +9 -9
- package/scripts/constants/analysis-constants.d.ts +1 -1
- package/scripts/constants/analysis-constants.js +1 -1
- package/scripts/constants/build-info-meta.d.ts +10 -0
- package/scripts/constants/build-info-meta.js +45 -0
- package/scripts/constants/config.d.ts +20 -0
- package/scripts/constants/config.js +57 -0
- package/scripts/constants/language-ui.d.ts +18 -0
- package/scripts/constants/language-ui.js +154 -0
- package/scripts/constants/languages.d.ts +1 -1
- package/scripts/constants/languages.js +1 -1
- package/scripts/generators/build-info.js +73 -0
- package/scripts/generators/news-indexes.js +6 -1
- package/scripts/generators/political-intelligence/html.js +6 -1
- package/scripts/generators/political-intelligence-descriptions.d.ts +5 -3
- package/scripts/generators/political-intelligence-descriptions.js +2 -2
- package/scripts/generators/sitemap/html.js +7 -2
- package/scripts/generators/sitemap/rss.js +2 -0
- package/scripts/generators/sitemap/xml.js +3 -1
- package/scripts/lint-prompts.js +19 -0
- package/scripts/mcp/ep-mcp-client.d.ts +6 -6
- package/scripts/mcp/ep-mcp-client.js +11 -11
- package/scripts/mcp/imf-mcp-client.d.ts +1 -1
- package/scripts/mcp/mcp-connection.js +1 -1
- package/scripts/templates/icons.d.ts +30 -0
- package/scripts/templates/icons.js +32 -0
- package/scripts/templates/section-builders.js +22 -10
- package/scripts/types/imf.d.ts +1 -1
- package/scripts/types/parliament.d.ts +1 -1
- package/scripts/types/world-bank.d.ts +1 -1
- package/scripts/utils/file-utils.d.ts +2 -2
- package/scripts/utils/file-utils.js +2 -2
- package/scripts/validate-analysis-completeness.js +157 -6
- package/scripts/index.old.js +0 -125
- package/scripts/utils/migrate-legacy-articles.js +0 -225
package/README.md
CHANGED
|
@@ -136,7 +136,7 @@ The published site is the audience-facing companion to this npm/TypeScript packa
|
|
|
136
136
|
|
|
137
137
|
**MCP Server Integration**: The project uses the
|
|
138
138
|
[European-Parliament-MCP-Server](https://github.com/Hack23/European-Parliament-MCP-Server)
|
|
139
|
-
v1.2.
|
|
139
|
+
v1.2.18 for accessing real EU Parliament data via the Model Context Protocol.
|
|
140
140
|
|
|
141
141
|
- **MCP Server Status**: ✅ Fully operational — 60+ EP data tools available
|
|
142
142
|
(feeds, direct lookups, analytical tools, intelligence correlation)
|
|
@@ -426,7 +426,7 @@ import type { ArticleCategory, LanguageCode } from 'euparliamentmonitor/types';
|
|
|
426
426
|
|
|
427
427
|
## 🔌 Data Sources
|
|
428
428
|
|
|
429
|
-
**Primary — European Parliament MCP Server** ([Hack23/European-Parliament-MCP-Server](https://github.com/Hack23/European-Parliament-MCP-Server) v1.2.
|
|
429
|
+
**Primary — European Parliament MCP Server** ([Hack23/European-Parliament-MCP-Server](https://github.com/Hack23/European-Parliament-MCP-Server) v1.2.18+, fully operational):
|
|
430
430
|
|
|
431
431
|
- 🗳️ Plenary sessions, voting records, roll-call votes
|
|
432
432
|
- 📜 Adopted texts, motions, resolutions, urgency files
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "euparliamentmonitor",
|
|
3
|
-
"version": "0.8.
|
|
3
|
+
"version": "0.8.52",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "European Parliament Intelligence Platform - Monitor political activity with systematic transparency",
|
|
6
6
|
"main": "scripts/index.js",
|
|
@@ -33,14 +33,6 @@
|
|
|
33
33
|
"./generators/*": {
|
|
34
34
|
"import": "./scripts/generators/*.js",
|
|
35
35
|
"types": "./scripts/generators/*.d.ts"
|
|
36
|
-
},
|
|
37
|
-
"./generators/pipeline/*": {
|
|
38
|
-
"import": "./scripts/generators/pipeline/*.js",
|
|
39
|
-
"types": "./scripts/generators/pipeline/*.d.ts"
|
|
40
|
-
},
|
|
41
|
-
"./generators/strategies/*": {
|
|
42
|
-
"import": "./scripts/generators/strategies/*.js",
|
|
43
|
-
"types": "./scripts/generators/strategies/*.d.ts"
|
|
44
36
|
}
|
|
45
37
|
},
|
|
46
38
|
"files": [
|
|
@@ -56,7 +48,8 @@
|
|
|
56
48
|
"registry": "https://registry.npmjs.org/"
|
|
57
49
|
},
|
|
58
50
|
"scripts": {
|
|
59
|
-
"prebuild": "node scripts/generators/news-indexes.js && node scripts/generators/sitemap.js",
|
|
51
|
+
"prebuild": "node scripts/generators/build-info.js && node scripts/generators/news-indexes.js && node scripts/generators/sitemap.js",
|
|
52
|
+
"generate-build-info": "node scripts/generators/build-info.js",
|
|
60
53
|
"build": "tsc",
|
|
61
54
|
"build:check": "tsc --noEmit",
|
|
62
55
|
"build:check-tests": "tsc --project tsconfig.test.json --noEmit",
|
|
@@ -84,6 +77,9 @@
|
|
|
84
77
|
"test:e2e:report": "playwright show-report",
|
|
85
78
|
"lint": "eslint src/",
|
|
86
79
|
"lint:fix": "eslint src/ --fix",
|
|
80
|
+
"knip": "knip",
|
|
81
|
+
"knip:production": "knip --production",
|
|
82
|
+
"knip:fix": "knip --fix",
|
|
87
83
|
"lint:report": "eslint src/ --format json --output-file builds/test-results/eslint-report.json",
|
|
88
84
|
"lint:report:html": "eslint src/ --format html --output-file builds/test-results/eslint-report.html",
|
|
89
85
|
"format": "prettier --write \"src/**/*.ts\"",
|
|
@@ -158,6 +154,7 @@
|
|
|
158
154
|
"htmlhint": "1.9.2",
|
|
159
155
|
"husky": "9.1.7",
|
|
160
156
|
"jscpd": "4.0.9",
|
|
157
|
+
"knip": "^6.7.0",
|
|
161
158
|
"lint-staged": "16.4.0",
|
|
162
159
|
"mermaid": "11.14.0",
|
|
163
160
|
"papaparse": "5.5.3",
|
|
@@ -172,7 +169,7 @@
|
|
|
172
169
|
"node": ">=25"
|
|
173
170
|
},
|
|
174
171
|
"dependencies": {
|
|
175
|
-
"european-parliament-mcp-server": "1.2.
|
|
172
|
+
"european-parliament-mcp-server": "1.2.18",
|
|
176
173
|
"markdown-it": "^14.1.1",
|
|
177
174
|
"markdown-it-anchor": "^9.2.0",
|
|
178
175
|
"markdown-it-attrs": "^4.3.1",
|
|
@@ -1,27 +1,5 @@
|
|
|
1
1
|
import { type ArtifactSection } from './artifact-order.js';
|
|
2
|
-
import { type Manifest, type ManifestFiles
|
|
3
|
-
/**
|
|
4
|
-
* Raw manifest shape as committed by the analysis pipeline.
|
|
5
|
-
*
|
|
6
|
-
* @deprecated Use {@link Manifest} from `aggregator/manifest/index.js`.
|
|
7
|
-
* This alias is preserved for back-compat with the existing test suite
|
|
8
|
-
* and external curators that import `AnalysisManifest` from this module.
|
|
9
|
-
*/
|
|
10
|
-
export type AnalysisManifest = Manifest;
|
|
11
|
-
/**
|
|
12
|
-
* `manifest.files` can be nested category → paths or flat path → description.
|
|
13
|
-
*
|
|
14
|
-
* @deprecated Use {@link _ManifestFiles} (`ManifestFiles`) from
|
|
15
|
-
* `aggregator/manifest/index.js`.
|
|
16
|
-
*/
|
|
17
|
-
export type ManifestFiles = _ManifestFiles;
|
|
18
|
-
/**
|
|
19
|
-
* One entry in `manifest.history[]`; only fields we read are typed.
|
|
20
|
-
*
|
|
21
|
-
* @deprecated Use {@link _ManifestHistoryEntry} (`ManifestHistoryEntry`) from
|
|
22
|
-
* `aggregator/manifest/index.js`.
|
|
23
|
-
*/
|
|
24
|
-
export type ManifestHistoryEntry = _ManifestHistoryEntry;
|
|
2
|
+
import { type Manifest, type ManifestFiles } from './manifest/index.js';
|
|
25
3
|
/** Result of {@link aggregateAnalysisRun}. */
|
|
26
4
|
export interface AggregatedRun {
|
|
27
5
|
/** Final Markdown document (provenance + sections + appendices). */
|
|
@@ -100,7 +78,7 @@ export declare function flattenManifestFiles(files: ManifestFiles | undefined):
|
|
|
100
78
|
* @param manifest - Parsed manifest object
|
|
101
79
|
* @returns The latest non-PENDING gate result, or `"PENDING"` when none found
|
|
102
80
|
*/
|
|
103
|
-
export declare function latestGateResult(manifest:
|
|
81
|
+
export declare function latestGateResult(manifest: Manifest): string;
|
|
104
82
|
/**
|
|
105
83
|
* Expand an `artifacts` entry from {@link ArtifactSection} into a list of
|
|
106
84
|
* concrete artifact paths. Exact paths are kept as-is; directory prefixes
|
|
@@ -174,7 +152,7 @@ export declare function renderAnalysisIndex(included: readonly IncludedArtifact[
|
|
|
174
152
|
*/
|
|
175
153
|
export declare function renderReaderIntelligenceGuide(sections: readonly TocSection[], included: readonly IncludedArtifact[]): string;
|
|
176
154
|
/**
|
|
177
|
-
* Resolve the article-type slug from a manifest, tolerating
|
|
155
|
+
* Resolve the article-type slug from a manifest, tolerating historic schemas.
|
|
178
156
|
*
|
|
179
157
|
* Thin re-export of {@link _resolveArticleType} from
|
|
180
158
|
* `aggregator/manifest/index.js`. Resolution order: `articleType` →
|
|
@@ -183,7 +161,7 @@ export declare function renderReaderIntelligenceGuide(sections: readonly TocSect
|
|
|
183
161
|
* @param manifest - Parsed manifest (any of the supported schemas)
|
|
184
162
|
* @returns Article-type slug usable as a filename component
|
|
185
163
|
*/
|
|
186
|
-
export declare function resolveArticleTypeFromManifest(manifest:
|
|
164
|
+
export declare function resolveArticleTypeFromManifest(manifest: Manifest): string;
|
|
187
165
|
/**
|
|
188
166
|
* Read, clean, and concatenate every artifact declared by the run's manifest
|
|
189
167
|
* (with discovery fallback when manifest.files is missing), returning a
|
|
@@ -141,7 +141,7 @@ function collectRunArtifacts(runDir) {
|
|
|
141
141
|
const full = path.join(dir, entry.name);
|
|
142
142
|
const rel = prefix ? `${prefix}/${entry.name}` : entry.name;
|
|
143
143
|
if (entry.isDirectory()) {
|
|
144
|
-
// Skip raw payloads,
|
|
144
|
+
// Skip raw payloads, prior-run snapshots, and Pass-1 work-in-progress
|
|
145
145
|
// snapshots so they are not rendered as supplementary artifacts.
|
|
146
146
|
if (entry.name === 'data' || entry.name === 'runs' || entry.name === 'pass1')
|
|
147
147
|
continue;
|
|
@@ -448,7 +448,7 @@ function appendSection(runDir, runDirRelPath, sectionId, sectionTitle, paths, se
|
|
|
448
448
|
sectionMarkdown.push('');
|
|
449
449
|
}
|
|
450
450
|
/**
|
|
451
|
-
* Resolve the article-type slug from a manifest, tolerating
|
|
451
|
+
* Resolve the article-type slug from a manifest, tolerating historic schemas.
|
|
452
452
|
*
|
|
453
453
|
* Thin re-export of {@link _resolveArticleType} from
|
|
454
454
|
* `aggregator/manifest/index.js`. Resolution order: `articleType` →
|
|
@@ -102,11 +102,11 @@ export declare function extractDefaultDescription(markdown: string): string;
|
|
|
102
102
|
* @returns Summary of the generated artefacts ({@link GenerateResult})
|
|
103
103
|
*/
|
|
104
104
|
export declare function generateArticle(opts: CliOptions, runSuffix?: string, articleCountOverride?: number): GenerateResult;
|
|
105
|
-
/** Candidate run discovered under `analysis/daily/`. */
|
|
106
105
|
/**
|
|
107
106
|
* One run discovered by {@link discoverAnalysisRuns}.
|
|
108
107
|
*
|
|
109
|
-
*
|
|
108
|
+
* Thin re-export of {@link _DiscoveredRun} from `aggregator/runs/index.js`,
|
|
109
|
+
* preserved here as the public type for `article-generator` consumers.
|
|
110
110
|
*/
|
|
111
111
|
export type DiscoveredRun = _DiscoveredRun;
|
|
112
112
|
/**
|
|
@@ -261,7 +261,7 @@ const FALLBACK_DESCRIPTION = 'EU Parliament intelligence summary derived from co
|
|
|
261
261
|
*/
|
|
262
262
|
export function extractDefaultDescription(markdown) {
|
|
263
263
|
// Suppress unused warning: keep `shouldSkipDescriptionLine` for any
|
|
264
|
-
//
|
|
264
|
+
// historic consumer importing it transitively.
|
|
265
265
|
void shouldSkipDescriptionLine;
|
|
266
266
|
const strong = extractStrongProseLine(markdown);
|
|
267
267
|
return strong.length > 0 ? strong : FALLBACK_DESCRIPTION;
|
|
@@ -19,7 +19,8 @@
|
|
|
19
19
|
* browser and CloudFront caches automatically.
|
|
20
20
|
*/
|
|
21
21
|
import { BASE_URL, MERMAID_VERSION } from '../constants/config.js';
|
|
22
|
-
import {
|
|
22
|
+
import { buildHeadFreshnessTags } from '../constants/build-info-meta.js';
|
|
23
|
+
import { ALL_LANGUAGES, LANGUAGE_NAMES, LANGUAGE_FLAGS, PAGE_TITLES, SKIP_LINK_TEXTS, TOC_ARIA_LABELS, UPDATE_AVAILABLE_LABELS, UPDATE_REFRESH_CTA_LABELS, UPDATE_DISMISS_LABELS, getLocalizedString, getTextDirection, } from '../constants/languages.js';
|
|
23
24
|
import { escapeHTML } from '../utils/file-utils.js';
|
|
24
25
|
import { buildSiteFooter, buildSiteHeader, buildPageBanner, } from '../templates/section-builders.js';
|
|
25
26
|
/**
|
|
@@ -189,6 +190,10 @@ ${hreflangLinks}
|
|
|
189
190
|
<link rel="manifest" href="../site.webmanifest">
|
|
190
191
|
<meta name="theme-color" content="#003399">
|
|
191
192
|
<link rel="stylesheet" href="../styles.css">
|
|
193
|
+
<meta name="ep-i18n-update-text" content="${escapeHTML(getLocalizedString(UPDATE_AVAILABLE_LABELS, safeLang))}">
|
|
194
|
+
<meta name="ep-i18n-update-cta" content="${escapeHTML(getLocalizedString(UPDATE_REFRESH_CTA_LABELS, safeLang))}">
|
|
195
|
+
<meta name="ep-i18n-dismiss" content="${escapeHTML(getLocalizedString(UPDATE_DISMISS_LABELS, safeLang))}">
|
|
196
|
+
${buildHeadFreshnessTags('../')}
|
|
192
197
|
<script type="application/ld+json">${jsonLdString}</script>
|
|
193
198
|
<script type="module" src="../js/mermaid-init.js?v=${MERMAID_VERSION}" defer></script>
|
|
194
199
|
<script src="../js/article-runtime.js" defer></script>
|
|
@@ -8,9 +8,9 @@ export interface ResolvedMetadataEntry {
|
|
|
8
8
|
export type ResolvedMetadata = LanguageMap<ResolvedMetadataEntry>;
|
|
9
9
|
/**
|
|
10
10
|
* Raw manifest subset consumed by the resolver. Deliberately narrower
|
|
11
|
-
* than the full {@link
|
|
12
|
-
* usable for backport (which only has the manifest in
|
|
13
|
-
* callers that don't need the full typed structure.
|
|
11
|
+
* than the full {@link import('./manifest/types.js').Manifest} shape so
|
|
12
|
+
* the resolver stays usable for backport (which only has the manifest in
|
|
13
|
+
* text form) and for callers that don't need the full typed structure.
|
|
14
14
|
*/
|
|
15
15
|
export interface MetadataManifest {
|
|
16
16
|
readonly articleType?: string;
|
|
@@ -40,7 +40,7 @@ export interface ResolveMetadataOptions {
|
|
|
40
40
|
readonly date: string;
|
|
41
41
|
/** Aggregated Markdown document body (after provenance/header). */
|
|
42
42
|
readonly markdown: string;
|
|
43
|
-
/** Parsed analysis manifest (may be empty for
|
|
43
|
+
/** Parsed analysis manifest (may be empty for historic/backport callers). */
|
|
44
44
|
readonly manifest?: MetadataManifest;
|
|
45
45
|
/**
|
|
46
46
|
* Absolute path to the analysis run directory so the resolver can
|
|
@@ -24,7 +24,7 @@
|
|
|
24
24
|
* 3. **Aggregated-markdown H1** — the first `# …` heading in the aggregator
|
|
25
25
|
* output, accepted under the same non-generic rule. In practice this
|
|
26
26
|
* tier rarely fires because the aggregator itself writes the generic
|
|
27
|
-
* default, but it covers hand-edited or
|
|
27
|
+
* default, but it covers hand-edited or historic aggregates.
|
|
28
28
|
* 4. **First strong prose paragraph** — the first line of the aggregated
|
|
29
29
|
* Markdown that survives {@link shouldSkipDescriptionLine}. Used for
|
|
30
30
|
* `description`; also used for `title` as a last editorial-content
|
|
@@ -313,7 +313,7 @@ export function isGenericHeading(heading, articleType, date) {
|
|
|
313
313
|
`${human} ${date}`,
|
|
314
314
|
];
|
|
315
315
|
// Also accept the collision-suffix pattern (e.g. `Breaking Breaking — …`)
|
|
316
|
-
// and the auto-generated "EU Parliament <Type> — <date>"
|
|
316
|
+
// and the auto-generated "EU Parliament <Type> — <date>" historic form.
|
|
317
317
|
const humanRedundant = `${human} ${human}`;
|
|
318
318
|
for (const p of patterns) {
|
|
319
319
|
if (normalized === p)
|
|
@@ -39,7 +39,7 @@ export declare const HELP_TEXT: string;
|
|
|
39
39
|
* - `{kind:'options', value}` — argv parsed cleanly; `value` is ready to
|
|
40
40
|
* pass to `generateArticle` / `generateAllArticles`.
|
|
41
41
|
*
|
|
42
|
-
* Compared to the
|
|
42
|
+
* Compared to the original `parseCliArgs` in `article-generator.ts` (which
|
|
43
43
|
* throws and calls `process.exit` on `--help`), this entry point keeps
|
|
44
44
|
* tests self-contained.
|
|
45
45
|
*
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
/**
|
|
4
4
|
* @module Aggregator/Cli/Parse
|
|
5
5
|
* @description Pure CLI parser that returns a discriminated union instead
|
|
6
|
-
* of calling `process.exit` mid-parse. The
|
|
6
|
+
* of calling `process.exit` mid-parse. The original `parseCliArgs` entry
|
|
7
7
|
* point in `article-generator.ts` is preserved for backward compatibility
|
|
8
8
|
* with existing callers and tests; new callers and unit tests should
|
|
9
9
|
* prefer {@link parseCliArgsSafe} so the `--help` and error branches are
|
|
@@ -202,7 +202,7 @@ function processArgvToken(argv, index, acc) {
|
|
|
202
202
|
* - `{kind:'options', value}` — argv parsed cleanly; `value` is ready to
|
|
203
203
|
* pass to `generateArticle` / `generateAllArticles`.
|
|
204
204
|
*
|
|
205
|
-
* Compared to the
|
|
205
|
+
* Compared to the original `parseCliArgs` in `article-generator.ts` (which
|
|
206
206
|
* throws and calls `process.exit` on `--help`), this entry point keeps
|
|
207
207
|
* tests self-contained.
|
|
208
208
|
*
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
* `githubRawUrl`) and `article-generator.ts` (which embedded the same slug
|
|
8
8
|
* literally inside an `isBasedOn` template string).
|
|
9
9
|
*
|
|
10
|
-
* Every consumer should import from here; the
|
|
10
|
+
* Every consumer should import from here; the original entry points in
|
|
11
11
|
* `clean-artifact.ts` are preserved as thin re-export shims for back-compat.
|
|
12
12
|
*/
|
|
13
13
|
/** Hack23 repo slug used when building blob/raw/tree URLs. */
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
* `githubRawUrl`) and `article-generator.ts` (which embedded the same slug
|
|
10
10
|
* literally inside an `isBasedOn` template string).
|
|
11
11
|
*
|
|
12
|
-
* Every consumer should import from here; the
|
|
12
|
+
* Every consumer should import from here; the original entry points in
|
|
13
13
|
* `clean-artifact.ts` are preserved as thin re-export shims for back-compat.
|
|
14
14
|
*/
|
|
15
15
|
/** Hack23 repo slug used when building blob/raw/tree URLs. */
|
|
@@ -10,12 +10,12 @@ import type { Manifest, ManifestFiles } from './types.js';
|
|
|
10
10
|
/** Sentinel used when no schema variant supplies a usable article type. */
|
|
11
11
|
export declare const UNKNOWN_ARTICLE_TYPE = "unknown";
|
|
12
12
|
/**
|
|
13
|
-
* Resolve the article-type slug from a manifest, tolerating
|
|
13
|
+
* Resolve the article-type slug from a manifest, tolerating historic schemas.
|
|
14
14
|
*
|
|
15
15
|
* Resolution order (highest precedence first):
|
|
16
16
|
* 1. `articleType` — canonical singular field
|
|
17
17
|
* 2. `articleTypes[0]` — pre-aggregator-pipeline plural array
|
|
18
|
-
* 3. `runType` —
|
|
18
|
+
* 3. `runType` — historic field on older breaking-run manifests
|
|
19
19
|
*
|
|
20
20
|
* Falls back to `'unknown'` when none of the above is a non-empty string.
|
|
21
21
|
*
|
|
@@ -3,12 +3,12 @@
|
|
|
3
3
|
/** Sentinel used when no schema variant supplies a usable article type. */
|
|
4
4
|
export const UNKNOWN_ARTICLE_TYPE = 'unknown';
|
|
5
5
|
/**
|
|
6
|
-
* Resolve the article-type slug from a manifest, tolerating
|
|
6
|
+
* Resolve the article-type slug from a manifest, tolerating historic schemas.
|
|
7
7
|
*
|
|
8
8
|
* Resolution order (highest precedence first):
|
|
9
9
|
* 1. `articleType` — canonical singular field
|
|
10
10
|
* 2. `articleTypes[0]` — pre-aggregator-pipeline plural array
|
|
11
|
-
* 3. `runType` —
|
|
11
|
+
* 3. `runType` — historic field on older breaking-run manifests
|
|
12
12
|
*
|
|
13
13
|
* Falls back to `'unknown'` when none of the above is a non-empty string.
|
|
14
14
|
*
|
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
* @module Aggregator/Manifest/Types
|
|
3
3
|
* @description Canonical manifest schema for analysis runs and the narrower
|
|
4
4
|
* projection consumed by the editorial-metadata resolver. Centralises every
|
|
5
|
-
* historic schema variant (canonical `articleType`,
|
|
6
|
-
* `articleTypes[]`,
|
|
5
|
+
* historic schema variant (canonical `articleType`, plural
|
|
6
|
+
* `articleTypes[]`, original `runType`) into one type that downstream
|
|
7
7
|
* modules can read against.
|
|
8
8
|
*/
|
|
9
9
|
import type { LanguageCode } from '../../types/index.js';
|
|
@@ -28,20 +28,22 @@ export type ManifestMetadataOverride = string | Partial<Record<LanguageCode, str
|
|
|
28
28
|
/**
|
|
29
29
|
* Raw manifest shape as committed by the analysis pipeline. Matches every
|
|
30
30
|
* schema variant the pipeline has ever emitted; readers consult
|
|
31
|
-
* {@link resolveArticleType} rather than `articleType` directly so
|
|
31
|
+
* {@link resolveArticleType} rather than `articleType` directly so historic
|
|
32
32
|
* runs stay readable.
|
|
33
33
|
*/
|
|
34
34
|
export interface Manifest {
|
|
35
35
|
/** Canonical singular form (current pipeline). */
|
|
36
36
|
readonly articleType?: string;
|
|
37
37
|
/**
|
|
38
|
-
*
|
|
39
|
-
* When present, `articleTypes[0]` is treated as the
|
|
38
|
+
* Plural form emitted by some pre-aggregator-pipeline workflows (historic
|
|
39
|
+
* schema variant). When present, `articleTypes[0]` is treated as the
|
|
40
|
+
* article type.
|
|
40
41
|
*/
|
|
41
42
|
readonly articleTypes?: readonly string[];
|
|
42
43
|
/**
|
|
43
|
-
*
|
|
44
|
-
* fallback when neither `articleType` nor `articleTypes`
|
|
44
|
+
* Original field on older breaking-run manifests (historic schema variant).
|
|
45
|
+
* Used as the last fallback when neither `articleType` nor `articleTypes`
|
|
46
|
+
* is present.
|
|
45
47
|
*/
|
|
46
48
|
readonly runType?: string;
|
|
47
49
|
/** Stable run identifier; falls back to the run-dir basename. */
|
|
@@ -65,7 +67,7 @@ export interface Manifest {
|
|
|
65
67
|
* Narrower manifest projection consumed by {@link resolveArticleMetadata}
|
|
66
68
|
* in `aggregator/article-metadata.ts`. The metadata resolver only needs a
|
|
67
69
|
* subset; keeping this projection separate means string-only callers
|
|
68
|
-
* (backport,
|
|
70
|
+
* (backport, historic curators) don't have to construct a full {@link Manifest}.
|
|
69
71
|
*/
|
|
70
72
|
export interface MetadataManifest {
|
|
71
73
|
readonly articleType?: string;
|
|
@@ -3,24 +3,33 @@
|
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
|
|
5
5
|
/**
|
|
6
|
-
* Prior-run diff helper for the re-run
|
|
6
|
+
* Prior-run diff helper for the re-run improve/extend rule.
|
|
7
7
|
*
|
|
8
8
|
* Reads `manifest.json.history[]` from a same-day analysis folder and
|
|
9
|
-
* classifies every artifact as **at-floor** (carry-forward) or
|
|
10
|
-
* (rewrite). The result — a `priorRunDiff` plan
|
|
11
|
-
*
|
|
9
|
+
* classifies every artifact as **at-floor** (must-extend / carry-forward) or
|
|
10
|
+
* **below-floor** (rewrite). The result — a `priorRunDiff` plan with
|
|
11
|
+
* `mode: "improve-and-extend"` — is written to stdout as JSON and is
|
|
12
|
+
* consumed by Stage B of the analysis workflow.
|
|
12
13
|
*
|
|
13
|
-
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
16
|
-
*
|
|
14
|
+
* **Re-run semantics (never no-op).** Entries listed under `carryForward[]`
|
|
15
|
+
* are **NOT** skipped on re-runs — they are must-extend targets. Stage B
|
|
16
|
+
* MUST raise their depth: each prior artifact's `priorLines` becomes the new
|
|
17
|
+
* floor and the agent must add ≥1 new section, ≥3 new evidence citations, or
|
|
18
|
+
* ≥1 new chart, ending at `lines >= max(floor, priorLines + 20)`. Entries in
|
|
19
|
+
* `rewrite[]` are still written from scratch to the catalog floor.
|
|
20
|
+
*
|
|
21
|
+
* Always-on. The `ENABLE_PRIOR_RUN_MERGE` environment variable is no longer
|
|
22
|
+
* read — the helper runs unconditionally so re-runs cannot accidentally
|
|
23
|
+
* regress to the legacy "skip-write" behaviour. The `buildPriorRunDiff(..,
|
|
24
|
+
* enabled)` parameter is kept for back-compat with unit tests but the CLI
|
|
25
|
+
* always passes `true`.
|
|
17
26
|
*
|
|
18
27
|
* Invocation:
|
|
19
28
|
* node scripts/aggregator/prior-run-diff.js <runDir>
|
|
20
29
|
* npm run prior-run-diff -- analysis/daily/2026-04-26/week-in-review
|
|
21
30
|
*
|
|
22
31
|
* Exit codes:
|
|
23
|
-
* 0 — plan emitted successfully
|
|
32
|
+
* 0 — plan emitted successfully
|
|
24
33
|
* 1 — runDir missing or invalid
|
|
25
34
|
* 2 — bad CLI usage
|
|
26
35
|
*
|
|
@@ -28,6 +37,7 @@
|
|
|
28
37
|
* ```json
|
|
29
38
|
* {
|
|
30
39
|
* "enabled": true,
|
|
40
|
+
* "mode": "improve-and-extend",
|
|
31
41
|
* "runDir": "analysis/daily/2026-04-26/week-in-review",
|
|
32
42
|
* "articleType": "week-in-review",
|
|
33
43
|
* "priorRunId": "week-in-review-run-1714128000",
|
|
@@ -35,8 +45,10 @@
|
|
|
35
45
|
* {
|
|
36
46
|
* "relativePath": "intelligence/synthesis-summary.md",
|
|
37
47
|
* "lines": 250,
|
|
48
|
+
* "priorLines": 250,
|
|
38
49
|
* "floor": 180,
|
|
39
|
-
* "
|
|
50
|
+
* "extendFloor": 270,
|
|
51
|
+
* "source": "extend-from-prior:week-in-review-run-1714128000"
|
|
40
52
|
* }
|
|
41
53
|
* ],
|
|
42
54
|
* "rewrite": [
|
|
@@ -50,10 +62,13 @@
|
|
|
50
62
|
* }
|
|
51
63
|
* ```
|
|
52
64
|
*
|
|
53
|
-
*
|
|
54
|
-
*
|
|
55
|
-
*
|
|
56
|
-
*
|
|
65
|
+
* - `priorLines` exposes the prior-run line count so Stage B knows the lower
|
|
66
|
+
* bound it must beat.
|
|
67
|
+
* - `extendFloor` = `max(floor, priorLines + 20)` — the minimum line count
|
|
68
|
+
* the new pass MUST reach for this artifact.
|
|
69
|
+
* - The `source` value follows the schema `"extend-from-prior:<runId>"`,
|
|
70
|
+
* which Stage B writes into `manifest.json.artifactSources` (additive,
|
|
71
|
+
* back-compat with prior `"carry-forward-from:<runId>"` consumers).
|
|
57
72
|
*/
|
|
58
73
|
|
|
59
74
|
import fs from 'node:fs';
|
|
@@ -63,6 +78,7 @@ import { fileURLToPath } from 'node:url';
|
|
|
63
78
|
|
|
64
79
|
const ROOT = process.cwd();
|
|
65
80
|
const DEFAULT_MIN_LINES = 30;
|
|
81
|
+
const EXTEND_DELTA_LINES = 20;
|
|
66
82
|
|
|
67
83
|
// Artifacts that must contain at least one Mermaid fenced block.
|
|
68
84
|
// Mirrors the directory-based heuristic in validate-analysis-completeness.js.
|
|
@@ -86,9 +102,10 @@ function usage(code = 2) {
|
|
|
86
102
|
'',
|
|
87
103
|
' <runDir> Path to analysis/daily/<date>/<slug>/',
|
|
88
104
|
'',
|
|
89
|
-
'
|
|
90
|
-
'
|
|
91
|
-
'
|
|
105
|
+
'Always-on. The helper unconditionally classifies prior-run artifacts as',
|
|
106
|
+
'must-extend (carryForward[]) or below-floor rewrite (rewrite[]) so re-runs',
|
|
107
|
+
'can never accidentally no-op. The legacy ENABLE_PRIOR_RUN_MERGE env flag',
|
|
108
|
+
'is no longer read.',
|
|
92
109
|
'',
|
|
93
110
|
'Example:',
|
|
94
111
|
' npm run prior-run-diff -- analysis/daily/2026-04-26/week-in-review',
|
|
@@ -190,9 +207,14 @@ export function classifyArtifact(runDir, relativePath, floor, mermaidRequiredLis
|
|
|
190
207
|
/**
|
|
191
208
|
* Build the `priorRunDiff` plan for a same-day analysis folder.
|
|
192
209
|
*
|
|
210
|
+
* Mode is always **improve-and-extend**: `carryForward[]` entries are
|
|
211
|
+
* must-extend targets (their `priorLines` and `extendFloor` exposed), not
|
|
212
|
+
* skip-write targets. The `enabled` parameter is preserved for back-compat
|
|
213
|
+
* with the legacy unit-test signature; the CLI always passes `true`.
|
|
214
|
+
*
|
|
193
215
|
* @param {string} runDir - Absolute path to the run folder.
|
|
194
216
|
* @param {object|null} thresholdsJson - Parsed reference-quality-thresholds.json.
|
|
195
|
-
* @param {boolean} enabled - Whether the feature is enabled.
|
|
217
|
+
* @param {boolean} enabled - Whether the feature is enabled (CLI: always true).
|
|
196
218
|
* @returns {object} The diff plan (serialisable to JSON).
|
|
197
219
|
*/
|
|
198
220
|
export function buildPriorRunDiff(runDir, thresholdsJson, enabled) {
|
|
@@ -210,6 +232,7 @@ export function buildPriorRunDiff(runDir, thresholdsJson, enabled) {
|
|
|
210
232
|
if (!enabled) {
|
|
211
233
|
return {
|
|
212
234
|
enabled: false,
|
|
235
|
+
mode: 'improve-and-extend',
|
|
213
236
|
runDir: relRunDir,
|
|
214
237
|
articleType,
|
|
215
238
|
priorRunId: null,
|
|
@@ -222,6 +245,7 @@ export function buildPriorRunDiff(runDir, thresholdsJson, enabled) {
|
|
|
222
245
|
if (history.length === 0) {
|
|
223
246
|
return {
|
|
224
247
|
enabled: true,
|
|
248
|
+
mode: 'improve-and-extend',
|
|
225
249
|
runDir: relRunDir,
|
|
226
250
|
articleType,
|
|
227
251
|
priorRunId: null,
|
|
@@ -248,11 +272,14 @@ export function buildPriorRunDiff(runDir, thresholdsJson, enabled) {
|
|
|
248
272
|
const floor = Math.max(DEFAULT_MIN_LINES, perArtifactFloors[relativePath] ?? 0);
|
|
249
273
|
const result = classifyArtifact(runDir, relativePath, floor, mermaidRequiredList);
|
|
250
274
|
if (result.atFloor) {
|
|
275
|
+
const extendFloor = Math.max(floor, result.lines + EXTEND_DELTA_LINES);
|
|
251
276
|
carryForward.push({
|
|
252
277
|
relativePath,
|
|
253
278
|
lines: result.lines,
|
|
279
|
+
priorLines: result.lines,
|
|
254
280
|
floor: result.floor,
|
|
255
|
-
|
|
281
|
+
extendFloor,
|
|
282
|
+
source: `extend-from-prior:${priorRunId}`,
|
|
256
283
|
});
|
|
257
284
|
} else {
|
|
258
285
|
rewrite.push({
|
|
@@ -266,6 +293,7 @@ export function buildPriorRunDiff(runDir, thresholdsJson, enabled) {
|
|
|
266
293
|
|
|
267
294
|
return {
|
|
268
295
|
enabled: true,
|
|
296
|
+
mode: 'improve-and-extend',
|
|
269
297
|
runDir: relRunDir,
|
|
270
298
|
articleType,
|
|
271
299
|
priorRunId,
|
|
@@ -338,7 +366,11 @@ function main() {
|
|
|
338
366
|
process.exit(1);
|
|
339
367
|
}
|
|
340
368
|
|
|
341
|
-
|
|
369
|
+
// Re-run improve/extend rule is always-on. The legacy ENABLE_PRIOR_RUN_MERGE
|
|
370
|
+
// env flag is no longer read — re-runs cannot accidentally regress to the
|
|
371
|
+
// pre-2026-05 skip-write behaviour. See .github/prompts/02-analysis-protocol.md
|
|
372
|
+
// §"Re-run improve/extend rule".
|
|
373
|
+
const enabled = true;
|
|
342
374
|
const thresholdsJson = loadThresholds(opts.thresholdsPath);
|
|
343
375
|
const plan = buildPriorRunDiff(runDir, thresholdsJson, enabled);
|
|
344
376
|
|
|
@@ -34,7 +34,7 @@ export declare function readRunCandidate(runDir: string): DiscoveredRun | null;
|
|
|
34
34
|
* The walk stops descending into a directory the moment it sees a
|
|
35
35
|
* `manifest.json`, so nested artifact subdirectories never get reported
|
|
36
36
|
* as separate runs. Results are sorted by date ascending then by path
|
|
37
|
-
* lexically — the same order used by the
|
|
37
|
+
* lexically — the same order used by the previous in-line implementation in
|
|
38
38
|
* `article-generator.ts`.
|
|
39
39
|
*
|
|
40
40
|
* @param repoRoot - Absolute repository root
|
|
@@ -52,7 +52,7 @@ export function readRunCandidate(runDir) {
|
|
|
52
52
|
* The walk stops descending into a directory the moment it sees a
|
|
53
53
|
* `manifest.json`, so nested artifact subdirectories never get reported
|
|
54
54
|
* as separate runs. Results are sorted by date ascending then by path
|
|
55
|
-
* lexically — the same order used by the
|
|
55
|
+
* lexically — the same order used by the previous in-line implementation in
|
|
56
56
|
* `article-generator.ts`.
|
|
57
57
|
*
|
|
58
58
|
* @param repoRoot - Absolute repository root
|
|
@@ -388,7 +388,7 @@ function extractBodyFirstProse(articleHtml) {
|
|
|
388
388
|
* 1. If a manifest.json for the run exists (aggregator cohort), use the
|
|
389
389
|
* full {@link resolveArticleMetadata} pipeline — this picks up manifest
|
|
390
390
|
* overrides and artefact H1s.
|
|
391
|
-
* 2. Otherwise (
|
|
391
|
+
* 2. Otherwise (historic cohort), derive from the rendered body:
|
|
392
392
|
* - Title = non-generic `<h1>` from the body, else first sentence of
|
|
393
393
|
* the first strong prose paragraph.
|
|
394
394
|
* - Description = first strong prose paragraph (full, not the same
|
|
@@ -412,7 +412,7 @@ function deriveMetadataForFile(file, html) {
|
|
|
412
412
|
// fallback for `committee-reports` renders realistic abbreviations
|
|
413
413
|
// (`ENVI, ECON, AFET, LIBE, AGRI`) instead of the placeholder
|
|
414
414
|
// `Main Committees`. This keeps the localized template consistent
|
|
415
|
-
// with the
|
|
415
|
+
// with the historic format even when the manifest is missing.
|
|
416
416
|
const committee = extractCommitteeCodes(bodyH1) || extractCommitteeCodes(bodyProse);
|
|
417
417
|
|
|
418
418
|
const resolved = resolveArticleMetadata({
|
|
@@ -425,7 +425,7 @@ function deriveMetadataForFile(file, html) {
|
|
|
425
425
|
|
|
426
426
|
if (file.lang !== 'en') {
|
|
427
427
|
// NON-ENGLISH files: The article body may be in a different language
|
|
428
|
-
// than the file claims to be —
|
|
428
|
+
// than the file claims to be — historic files have localized H1/chrome
|
|
429
429
|
// but English body prose; aggregator PR#1404 files have English H1
|
|
430
430
|
// AND English body in every language variant. We accept body content
|
|
431
431
|
// only when it is plausibly in the file's language, and fall back to
|
|
@@ -658,9 +658,9 @@ function buildSyntheticMarkdown(h1, prose) {
|
|
|
658
658
|
|
|
659
659
|
/**
|
|
660
660
|
* Choose the final title text. Prefers a non-generic body H1. When the
|
|
661
|
-
* H1 is generic (e.g.
|
|
661
|
+
* H1 is generic (e.g. historic "Legislative Procedures: European Parliament
|
|
662
662
|
* Monitor"), falls back to the first sentence of body prose — this is
|
|
663
|
-
* the single biggest SEO win for
|
|
663
|
+
* the single biggest SEO win for historic files.
|
|
664
664
|
*
|
|
665
665
|
* @param {string} bodyH1 - First H1 from the body
|
|
666
666
|
* @param {string} bodyProse - First strong prose paragraph
|
|
@@ -681,7 +681,7 @@ function chooseTitle(bodyH1, bodyProse, templateTitle, file) {
|
|
|
681
681
|
|
|
682
682
|
/**
|
|
683
683
|
* Extend {@link isGenericHeading} with a few extra patterns specific to
|
|
684
|
-
*
|
|
684
|
+
* historic-era titles (pre-aggregator pipeline) so those files get
|
|
685
685
|
* replaced during backport. Also catches the pure `<Title-Case-Phrase>
|
|
686
686
|
* — <ISO-date>` form that the default aggregator title emits when the
|
|
687
687
|
* articleType slug has a run suffix (e.g. `breaking-190`) that
|
|
@@ -695,7 +695,7 @@ function chooseTitle(bodyH1, bodyProse, templateTitle, file) {
|
|
|
695
695
|
function isGenericBodyH1(h1, articleType, date) {
|
|
696
696
|
if (isGenericHeading(h1, articleType, date)) return true;
|
|
697
697
|
const normalized = h1.trim();
|
|
698
|
-
const
|
|
698
|
+
const historicTemplates = [
|
|
699
699
|
'Legislative Procedures: European Parliament Monitor',
|
|
700
700
|
'EU Parliament Committee Activity Report',
|
|
701
701
|
'EU Parliament Breaking',
|
|
@@ -703,7 +703,7 @@ function isGenericBodyH1(h1, articleType, date) {
|
|
|
703
703
|
'Plenary Votes & Resolutions',
|
|
704
704
|
'Plenary Votes and Resolutions',
|
|
705
705
|
];
|
|
706
|
-
for (const t of
|
|
706
|
+
for (const t of historicTemplates) {
|
|
707
707
|
if (normalized === t || normalized.startsWith(`${t} `) || normalized.startsWith(`${t}:`)) {
|
|
708
708
|
return true;
|
|
709
709
|
}
|
|
@@ -814,7 +814,7 @@ function rewriteHtml(html, metadata) {
|
|
|
814
814
|
/**
|
|
815
815
|
* Replace `<meta name="<name>" content="…">` in-place. When absent the
|
|
816
816
|
* document is returned unchanged — we never inject new tags during
|
|
817
|
-
* backport so
|
|
817
|
+
* backport so historic files retain their original meta-tag order.
|
|
818
818
|
*
|
|
819
819
|
* The `content` match is quote-aware: the content of a double-quoted
|
|
820
820
|
* attribute value may contain apostrophes (e.g. `Parliament's`), so the
|
|
@@ -27,7 +27,7 @@ export declare const AI_MARKER = "[AI_ANALYSIS_REQUIRED]";
|
|
|
27
27
|
*
|
|
28
28
|
* Recognises three marker formats:
|
|
29
29
|
* - `[AI_ANALYSIS_REQUIRED]` — the current standard marker (v3.0+)
|
|
30
|
-
* - `[REQUIRED]` —
|
|
30
|
+
* - `[REQUIRED]` — historic marker used in template stubs before v3.0
|
|
31
31
|
* - `[?]` — shorthand used in some early methodology templates
|
|
32
32
|
*
|
|
33
33
|
* @param text - Text to test
|