euparliamentmonitor 0.8.33 → 0.8.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "euparliamentmonitor",
3
- "version": "0.8.33",
3
+ "version": "0.8.34",
4
4
  "type": "module",
5
5
  "description": "European Parliament Intelligence Platform - Monitor political activity with systematic transparency",
6
6
  "main": "scripts/index.js",
@@ -135,7 +135,7 @@
135
135
  },
136
136
  "homepage": "https://euparliamentmonitor.com",
137
137
  "devDependencies": {
138
- "@axe-core/playwright": "4.11.1",
138
+ "@axe-core/playwright": "4.11.2",
139
139
  "@eslint/js": "10.0.1",
140
140
  "@playwright/test": "1.59.1",
141
141
  "@types/d3": "7.4.3",
@@ -148,7 +148,7 @@
148
148
  "chart.js": "4.5.1",
149
149
  "chartjs-plugin-annotation": "3.1.0",
150
150
  "d3": "7.9.0",
151
- "eslint": "10.2.0",
151
+ "eslint": "10.2.1",
152
152
  "eslint-config-prettier": "10.1.8",
153
153
  "eslint-plugin-jsdoc": "62.9.0",
154
154
  "eslint-plugin-security": "4.0.0",
@@ -163,7 +163,7 @@
163
163
  "ts-api-utils": "2.5.0",
164
164
  "tsx": "4.21.0",
165
165
  "typedoc": "0.28.19",
166
- "typescript": "6.0.2",
166
+ "typescript": "6.0.3",
167
167
  "vitest": "4.1.4"
168
168
  },
169
169
  "engines": {
@@ -1,5 +1,15 @@
1
1
  import { MCPConnection } from './mcp-connection.js';
2
2
  import type { MCPToolResult, MCPClientOptions } from '../types/index.js';
3
+ /**
4
+ * Canonical list of tools exposed by the World Bank MCP gateway. The news
5
+ * workflows, probe script, and the integration test suite all reference this
6
+ * list so a regression that adds/removes a tool fails a single drift guard
7
+ * (`test/integration/mcp/worldbank-mcp.test.js`) instead of silently breaking
8
+ * prompt/validator/probe coverage.
9
+ *
10
+ * Kept in sync with `analysis/methodologies/worldbank-indicator-mapping.md`.
11
+ */
12
+ export declare const WORLD_BANK_MCP_TOOLS: readonly string[];
3
13
  /**
4
14
  * MCP Client for World Bank economic data access.
5
15
  * Extends {@link MCPConnection} with World Bank-specific tool wrapper methods.
@@ -25,6 +25,24 @@ const WB_BINARY_FILE = process.platform === 'win32' ? `${WB_BINARY_NAME}.cmd` :
25
25
  const WB_DEFAULT_SERVER = resolve(dirname(fileURLToPath(import.meta.url)), `../../node_modules/.bin/${WB_BINARY_FILE}`);
26
26
  /** Fallback payload when indicator data is unavailable (empty CSV) */
27
27
  const INDICATOR_FALLBACK = '';
28
+ /**
29
+ * Canonical list of tools exposed by the World Bank MCP gateway. The news
30
+ * workflows, probe script, and the integration test suite all reference this
31
+ * list so a regression that adds/removes a tool fails a single drift guard
32
+ * (`test/integration/mcp/worldbank-mcp.test.js`) instead of silently breaking
33
+ * prompt/validator/probe coverage.
34
+ *
35
+ * Kept in sync with `analysis/methodologies/worldbank-indicator-mapping.md`.
36
+ */
37
+ export const WORLD_BANK_MCP_TOOLS = [
38
+ 'search-indicators',
39
+ 'get-countries',
40
+ 'get-country-info',
41
+ 'get-economic-data',
42
+ 'get-social-data',
43
+ 'get-education-data',
44
+ 'get-health-data',
45
+ ];
28
46
  /**
29
47
  * MCP Client for World Bank economic data access.
30
48
  * Extends {@link MCPConnection} with World Bank-specific tool wrapper methods.
@@ -1,3 +1,7 @@
1
+ /**
2
+ * @module Templates/ArticleTemplate
3
+ * @description Generates HTML templates for news articles with proper structure and metadata
4
+ */
1
5
  import type { ArticleOptions, LanguageCode, AnalysisFileEntry } from '../types/index.js';
2
6
  /**
3
7
  * Generate complete HTML for a news article
@@ -1,14 +1,9 @@
1
1
  // SPDX-FileCopyrightText: 2024-2026 Hack23 AB
2
2
  // SPDX-License-Identifier: Apache-2.0
3
- /**
4
- * @module Templates/ArticleTemplate
5
- * @description Generates HTML templates for news articles with proper structure and metadata
6
- */
7
- import { createHash } from 'crypto';
8
3
  import { ALL_LANGUAGES, LANGUAGE_FLAGS, LANGUAGE_NAMES, ARTICLE_TYPE_LABELS, READ_TIME_LABELS, BACK_TO_NEWS_LABELS, ARTICLE_NAV_LABELS, RELATED_ARTICLES_NAV_LABELS, BREADCRUMB_HOME_LABELS, BREADCRUMB_NEWS_LABELS, SKIP_LINK_TEXTS, SOURCES_HEADING_LABELS, HEADER_SUBTITLE_LABELS, THEME_TOGGLE_LABELS, FOOTER_ABOUT_HEADING_LABELS, FOOTER_ABOUT_TEXT_LABELS, FOOTER_QUICK_LINKS_LABELS, FOOTER_BUILT_BY_LABELS, FOOTER_LANGUAGES_LABELS, ANALYSIS_TRANSPARENCY_LABELS, ANALYSIS_SUMMARY_LABELS, METHODOLOGY_LABELS, TRANSPARENCY_DISCLOSURE_LABELS, CLASSIFICATION_ANALYSIS_LABELS, THREAT_ASSESSMENT_LABELS, RISK_SCORING_LABELS, DEEP_ANALYSIS_LABELS, VIEW_SOURCE_LABELS, OPEN_SOURCE_NOTE_LABELS, AI_ANALYSIS_GUIDE_LABELS, SWOT_FRAMEWORK_LABELS, RISK_METHODOLOGY_LABELS, THREAT_FRAMEWORK_LABELS, CLASSIFICATION_GUIDE_LABELS, STYLE_GUIDE_LABELS, SIGNIFICANCE_CLASSIFICATION_LABELS, ACTOR_MAPPING_LABELS, FORCES_ANALYSIS_LABELS, IMPACT_MATRIX_LABELS, POLITICAL_THREAT_LANDSCAPE_LABELS, ACTOR_THREAT_PROFILING_LABELS, CONSEQUENCE_TREES_LABELS, LEGISLATIVE_DISRUPTION_LABELS, RISK_MATRIX_LABELS, QUANTITATIVE_SWOT_LABELS, POLITICAL_CAPITAL_RISK_LABELS, LEGISLATIVE_VELOCITY_RISK_LABELS, AGENT_RISK_WORKFLOW_LABELS, STAKEHOLDER_IMPACT_LABELS, COALITION_DYNAMICS_LABELS, VOTING_PATTERNS_LABELS, CROSS_SESSION_INTELLIGENCE_LABELS, SYNTHESIS_SUMMARY_LABELS, DOCUMENT_ANALYSIS_LABELS, SIGNIFICANCE_SCORING_LABELS, getLocalizedString, getTextDirection, } from '../constants/languages.js';
9
4
  import { escapeHTML, isSafeURL } from '../utils/file-utils.js';
10
5
  import { stripHtmlTags } from '../utils/html-sanitize.js';
11
- import { APP_VERSION, createThemeToggleButton, THEME_TOGGLE_SCRIPT, THEME_TOGGLE_SCRIPT_CONTENT, } from '../constants/config.js';
6
+ import { APP_VERSION, createThemeToggleButton } from '../constants/config.js';
12
7
  /** Pattern for valid article dates (YYYY-MM-DD) */
13
8
  const DATE_PATTERN = /^\d{4}-\d{2}-\d{2}$/u;
14
9
  /** Pattern for valid article slugs (lowercase letters, digits, hyphens) */
@@ -288,22 +283,12 @@ export function generateArticleHTML(options) {
288
283
  const safeSriAttrs = stylesHash && SRI_HASH_PATTERN.test(stylesHash)
289
284
  ? ` integrity="${escapeHTML(stylesHash)}" crossorigin="anonymous"`
290
285
  : '';
291
- // Compute SHA-256 hash of the inline JSON-LD script content for CSP.
292
- // IMPORTANT: The whitespace here ("\n " prefix and "\n " suffix) must exactly
293
- // match the script tag content in the HTML template below:
294
- // <script type="application/ld+json">
295
- // ${jsonLd}
296
- // </script>
297
- const jsonLdScriptContent = `\n ${jsonLd}\n `;
298
- const jsonLdHash = `sha256-${createHash('sha256').update(jsonLdScriptContent).digest('base64')}`;
299
- // Compute CSP hash for BreadcrumbList JSON-LD script
300
- const breadcrumbLdScriptContent = `\n ${breadcrumbLd}\n `;
301
- const breadcrumbLdHash = `sha256-${createHash('sha256').update(breadcrumbLdScriptContent).digest('base64')}`;
302
- // Reading-progress script hash — content must exactly match the <script> block.
303
- const readingProgressScript = `\n (function(){\n var bar=document.querySelector('.reading-progress');\n if(!bar)return;\n bar.style.display='block';\n var ticking=false;\n window.addEventListener('scroll',function(){\n if(!ticking){\n window.requestAnimationFrame(function(){\n var h=document.documentElement;\n var scrollTop=h.scrollTop||document.body.scrollTop;\n var scrollHeight=h.scrollHeight-h.clientHeight;\n bar.style.width=scrollHeight>0?((scrollTop/scrollHeight)*100)+'%':'0%';\n ticking=false;\n });\n ticking=true;\n }\n },{passive:true});\n })();\n `;
304
- const readingProgressHash = `sha256-${createHash('sha256').update(readingProgressScript).digest('base64')}`;
305
- // Theme toggle CSP hash — derived from the shared THEME_TOGGLE_SCRIPT_CONTENT constant
306
- const themeToggleHash = `sha256-${createHash('sha256').update(THEME_TOGGLE_SCRIPT_CONTENT).digest('base64')}`;
286
+ // Compute SHA-256 hashes were previously required for inline <script>
287
+ // blocks (JSON-LD, reading progress, theme toggle). All executable inline
288
+ // scripts have been externalised to `js/article-runtime.js`, so the CSP
289
+ // reduces to `script-src 'self'`. JSON-LD blocks use
290
+ // `type="application/ld+json"` which is non-executable and not governed
291
+ // by `script-src`.
307
292
  // Localized theme toggle button
308
293
  const themeToggleLabel = escapeHTML(getLocalizedString(THEME_TOGGLE_LABELS, lang));
309
294
  // Related articles navigation HTML (optional)
@@ -315,7 +300,7 @@ export function generateArticleHTML(options) {
315
300
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
316
301
  <meta http-equiv="X-Content-Type-Options" content="nosniff">
317
302
  <meta name="referrer" content="no-referrer">
318
- <meta http-equiv="Content-Security-Policy" content="default-src 'self'; script-src 'self' '${jsonLdHash}' '${breadcrumbLdHash}' '${readingProgressHash}' '${themeToggleHash}'; style-src 'self' 'unsafe-inline'; img-src 'self' https: data:; font-src 'self'; connect-src 'self'; frame-src 'none'; base-uri 'self'; form-action 'none'">
303
+ <meta http-equiv="Content-Security-Policy" content="default-src 'self'; script-src 'self'; style-src 'self' 'unsafe-inline'; img-src 'self' https: data:; font-src 'self'; connect-src 'self'; frame-src 'none'; base-uri 'self'; form-action 'none'">
319
304
  <title>${safeTitle} | EU Parliament Monitor</title>
320
305
  <meta name="description" content="${safeSubtitle}">
321
306
  <meta name="keywords" content="${safeKeywords}">
@@ -453,26 +438,7 @@ export function generateArticleHTML(options) {
453
438
  </div>
454
439
  </footer>
455
440
 
456
- <script>
457
- (function(){
458
- var bar=document.querySelector('.reading-progress');
459
- if(!bar)return;
460
- bar.style.display='block';
461
- var ticking=false;
462
- window.addEventListener('scroll',function(){
463
- if(!ticking){
464
- window.requestAnimationFrame(function(){
465
- var h=document.documentElement;
466
- var scrollTop=h.scrollTop||document.body.scrollTop;
467
- var scrollHeight=h.scrollHeight-h.clientHeight;
468
- bar.style.width=scrollHeight>0?((scrollTop/scrollHeight)*100)+'%':'0%';
469
- ticking=false;
470
- });
471
- ticking=true;
472
- }
473
- },{passive:true});
474
- })();
475
- </script>${content.includes('data-chart-config')
441
+ <script src="../js/article-runtime.js" defer></script>${content.includes('data-chart-config')
476
442
  ? `
477
443
  <script src="../js/vendor/chart.umd.min.js" defer></script>
478
444
  <script src="../js/vendor/chartjs-plugin-annotation.min.js" defer></script>
@@ -481,7 +447,7 @@ export function generateArticleHTML(options) {
481
447
  ? `
482
448
  <script src="../js/vendor/d3.min.js" defer></script>
483
449
  <script src="../js/d3-init.js" defer></script>`
484
- : ''}${THEME_TOGGLE_SCRIPT}
450
+ : ''}
485
451
  </body>
486
452
  </html>`;
487
453
  }
@@ -56,6 +56,73 @@ export interface TranslationValidationResult {
56
56
  /** Collected translation quality metrics */
57
57
  metrics: TranslationValidationMetrics;
58
58
  }
59
+ /**
60
+ * Detect whether the article contains at least one Chart.js canvas with a
61
+ * well-formed `data-chart-config` JSON payload.
62
+ *
63
+ * A valid chart must:
64
+ * - be rendered via `<canvas data-chart-config="…">` (the declarative
65
+ * CSP-safe pattern hydrated by `js/chart-init.js`)
66
+ * - declare a supported Chart.js `type`
67
+ * - carry at least 3 data points in the first dataset (single-point charts
68
+ * are rejected by `SHARED_PROMPT_PATTERNS.md` anti-patterns)
69
+ *
70
+ * @param html - Raw article HTML
71
+ * @returns `true` when ≥1 chart meeting the rules is present
72
+ */
73
+ export declare function articleHasChart(html: string): boolean;
74
+ /**
75
+ * Strong World Bank evidence tokens — plain substring match is enough to
76
+ * satisfy the gate because each is specific (the literal attribution phrase
77
+ * or an MCP tool name). Kept aligned with
78
+ * `analysis/methodologies/worldbank-indicator-mapping.md`.
79
+ */
80
+ export declare const WORLD_BANK_STRONG_FINGERPRINTS: readonly string[];
81
+ /**
82
+ * Short indicator codes published by the World Bank MCP server. These are
83
+ * matched with a word boundary (`[^A-Z0-9_]` look-arounds) so that prose like
84
+ * "GDP growth slowed" does NOT count as World Bank evidence, but an analysis
85
+ * file line like `INDICATOR: GDP` does. All codes are uppercase, so the match
86
+ * is case-sensitive — case-insensitive mentions in English prose are intentionally
87
+ * rejected.
88
+ */
89
+ export declare const WORLD_BANK_INDICATOR_CODES: readonly string[];
90
+ /**
91
+ * Backwards-compatible union of strong + short fingerprints. Kept exported so
92
+ * callers that only need a flat list (e.g. existing consumers that shipped
93
+ * before the strong/short split) continue to compile. New code SHOULD prefer
94
+ * {@link hasWorldBankEvidence}, which enforces the stricter word-boundary rule
95
+ * for short codes.
96
+ */
97
+ export declare const WORLD_BANK_FINGERPRINTS: readonly string[];
98
+ /**
99
+ * Detect World Bank sourcing in any piece of text (article body OR analysis
100
+ * markdown). Returns `true` when the text contains either a strong fingerprint
101
+ * (the phrase "World Bank", an MCP tool name, etc.) or an indicator code with
102
+ * clean word boundaries.
103
+ *
104
+ * This is the single source of truth for the policy quality gate — both the
105
+ * content validator and the CLI validator's filesystem fallback use it so a
106
+ * legitimate evidence trail on either side satisfies the rule, and generic
107
+ * prose mentions of economic terms do not.
108
+ *
109
+ * @param text - Text to scan
110
+ * @returns `true` when at least one strong or word-bounded fingerprint matches
111
+ */
112
+ export declare function hasWorldBankEvidence(text: string): boolean;
113
+ /**
114
+ * Verify that a policy article (or the linked analysis artifacts) contains at
115
+ * least one World Bank fingerprint — indicator code (word-bounded), MCP
116
+ * tool-trace token, or the phrase "World Bank" itself. Returns `true` if the
117
+ * gate is satisfied OR the article type is not on the mandatory list.
118
+ *
119
+ * @param html - Article HTML
120
+ * @param articleType - Slug of the article category (e.g. `"committee-reports"`)
121
+ * @param _analysisDir - Reserved for API symmetry; filesystem recursion is
122
+ * performed by the caller in `validate-articles.ts` to keep this module pure.
123
+ * @returns `true` when the World Bank evidence requirement is met or not applicable
124
+ */
125
+ export declare function articlePolicyHasWorldBank(html: string, articleType: string, _analysisDir?: string): boolean;
59
126
  /**
60
127
  * Validate the quality of a generated article.
61
128
  *
@@ -697,6 +697,400 @@ function collectQualityGateWarnings(html, warnings) {
697
697
  if (emptySectionCount > 0) {
698
698
  warnings.push(`Article contains ${emptySectionCount} empty or near-empty <section> element(s) that should be removed`);
699
699
  }
700
+ // Chart presence gate
701
+ if (!articleHasChart(html)) {
702
+ warnings.push('Missing required Chart.js visualization: no <canvas data-chart-config="…"> element with a valid type found (≥1 required, see ai-first-quality.md quality gates)');
703
+ }
704
+ // Structural integrity gates — catch hand-written HTML bypassing the template
705
+ const langSwitcherCount = countLanguageSwitcherLinks(html);
706
+ if (langSwitcherCount < MIN_LANG_SWITCHER_LINKS) {
707
+ warnings.push(`Language switcher has only ${langSwitcherCount} link(s); the template always emits ${MIN_LANG_SWITCHER_LINKS} — this article may have been hand-written and skipped the template`);
708
+ }
709
+ if (!hasStandardFooterContent(html)) {
710
+ warnings.push('Footer is missing the standard `.footer-content` + `.footer-bottom` blocks — the template always emits these; article may have been hand-written');
711
+ }
712
+ }
713
+ /** Minimum number of language switcher links the template always emits (14 languages). */
714
+ const MIN_LANG_SWITCHER_LINKS = 14;
715
+ /** Chart.js types accepted by the `data-chart-config` declarative pattern. */
716
+ const CHART_JS_TYPES = /"type"\s*:\s*"(bar|line|pie|doughnut|radar|polarArea|scatter|bubble)"/u;
717
+ /**
718
+ * Check whether a character is HTML whitespace per the WHATWG spec
719
+ * (space, tab, LF, CR, FF).
720
+ *
721
+ * @param ch - Single character to test (may be empty string)
722
+ * @returns `true` when `ch` is one of the recognised whitespace chars
723
+ */
724
+ function isHtmlWhitespace(ch) {
725
+ return ch === ' ' || ch === '\t' || ch === '\n' || ch === '\r' || ch === '\f';
726
+ }
727
+ /**
728
+ * Decode the five entity escapes that `escapeHTML` emits into literal chars.
729
+ *
730
+ * @param raw - Entity-encoded substring extracted from an attribute value
731
+ * @returns Decoded literal string
732
+ */
733
+ function decodeHtmlEntities(raw) {
734
+ return raw
735
+ .replace(/&quot;/gu, '"')
736
+ .replace(/&#39;/gu, "'")
737
+ .replace(/&gt;/gu, '>')
738
+ .replace(/&lt;/gu, '<')
739
+ .replace(/&amp;/gu, '&');
740
+ }
741
+ /**
742
+ * Check that the positions immediately before and after an attribute name
743
+ * form valid HTML word-boundary characters. Prevents `xdata-chart-config`
744
+ * from being treated as the `data-chart-config` attribute.
745
+ *
746
+ * @param tag - Full opening-tag text (without trailing `>`)
747
+ * @param attrIdx - Index where the attribute name was found
748
+ * @param attrLen - Length of the attribute name
749
+ * @returns `true` when both boundaries are whitespace / `<` / `=` / start-of-tag
750
+ */
751
+ function hasAttributeBoundaries(tag, attrIdx, attrLen) {
752
+ const before = attrIdx === 0 ? '' : (tag[attrIdx - 1] ?? '');
753
+ const afterIdx = attrIdx + attrLen;
754
+ const after = afterIdx < tag.length ? (tag[afterIdx] ?? '') : '';
755
+ const leadOk = before === '' || isHtmlWhitespace(before) || before === '<';
756
+ const trailOk = after === '' || isHtmlWhitespace(after) || after === '=';
757
+ return leadOk && trailOk;
758
+ }
759
+ /**
760
+ * Starting just after an attribute name, locate the opening quote character
761
+ * (either `"` or `'`) that begins the attribute value, tolerating optional
762
+ * HTML whitespace on either side of the `=`.
763
+ *
764
+ * @param tag - Full opening-tag text
765
+ * @param from - Index immediately after the attribute name
766
+ * @returns `{quote, valueStart}` when a proper `=<whitespace?><quote>` run is
767
+ * present; `null` when the attribute is malformed or unquoted
768
+ */
769
+ function findAttributeValueStart(tag, from) {
770
+ let i = from;
771
+ while (i < tag.length && isHtmlWhitespace(tag[i] ?? ''))
772
+ i++;
773
+ if (i >= tag.length || tag[i] !== '=')
774
+ return null;
775
+ i++;
776
+ while (i < tag.length && isHtmlWhitespace(tag[i] ?? ''))
777
+ i++;
778
+ if (i >= tag.length)
779
+ return null;
780
+ const quote = tag[i] ?? '';
781
+ if (quote !== '"' && quote !== "'")
782
+ return null;
783
+ return { quote, valueStart: i + 1 };
784
+ }
785
+ /**
786
+ * Scan an HTML attribute value in a single `<canvas>` tag starting at
787
+ * `tagStart`. Returns the decoded value of `attr` or `null` if not present.
788
+ * Uses only `indexOf` + single-character look-arounds so runtime is strictly
789
+ * linear in input length — this avoids the polynomial-ReDoS class of regex
790
+ * that CodeQL flags when nested character classes match the same tag prefix.
791
+ *
792
+ * Tolerates all HTML-compliant attribute forms:
793
+ * - double-quoted: `data-chart-config="..."`
794
+ * - single-quoted: `data-chart-config='...'`
795
+ * - optional whitespace around `=`: `data-chart-config = "..."`
796
+ *
797
+ * @param html - Full article HTML
798
+ * @param tagStart - Byte offset of the `<` that opens the canvas tag
799
+ * @param attr - Attribute name (e.g. `data-chart-config`)
800
+ * @returns Decoded attribute value, or `null` when the attribute is missing
801
+ */
802
+ function extractCanvasAttribute(html, tagStart, attr) {
803
+ const tagEnd = html.indexOf('>', tagStart);
804
+ if (tagEnd === -1)
805
+ return null;
806
+ const tag = html.slice(tagStart, tagEnd);
807
+ let searchFrom = 0;
808
+ while (searchFrom < tag.length) {
809
+ const attrIdx = tag.indexOf(attr, searchFrom);
810
+ if (attrIdx === -1)
811
+ return null;
812
+ // Keep scanning past false matches with bad boundaries or without a
813
+ // proper `=<quote>` run; this keeps the function linear in tag length.
814
+ if (!hasAttributeBoundaries(tag, attrIdx, attr.length)) {
815
+ searchFrom = attrIdx + attr.length;
816
+ continue;
817
+ }
818
+ const valueHead = findAttributeValueStart(tag, attrIdx + attr.length);
819
+ if (!valueHead) {
820
+ searchFrom = attrIdx + attr.length;
821
+ continue;
822
+ }
823
+ const valueEnd = tag.indexOf(valueHead.quote, valueHead.valueStart);
824
+ if (valueEnd === -1)
825
+ return null;
826
+ return decodeHtmlEntities(tag.slice(valueHead.valueStart, valueEnd));
827
+ }
828
+ return null;
829
+ }
830
+ /**
831
+ * Detect whether the article contains at least one Chart.js canvas with a
832
+ * well-formed `data-chart-config` JSON payload.
833
+ *
834
+ * A valid chart must:
835
+ * - be rendered via `<canvas data-chart-config="…">` (the declarative
836
+ * CSP-safe pattern hydrated by `js/chart-init.js`)
837
+ * - declare a supported Chart.js `type`
838
+ * - carry at least 3 data points in the first dataset (single-point charts
839
+ * are rejected by `SHARED_PROMPT_PATTERNS.md` anti-patterns)
840
+ *
841
+ * @param html - Raw article HTML
842
+ * @returns `true` when ≥1 chart meeting the rules is present
843
+ */
844
+ export function articleHasChart(html) {
845
+ let cursor = 0;
846
+ while (cursor < html.length) {
847
+ const tagStart = html.indexOf('<canvas', cursor);
848
+ if (tagStart === -1)
849
+ return false;
850
+ const decoded = extractCanvasAttribute(html, tagStart, 'data-chart-config');
851
+ if (decoded !== null && CHART_JS_TYPES.test(decoded) && countFirstDatasetPoints(decoded) >= 3) {
852
+ return true;
853
+ }
854
+ // Advance past `<canvas` so overlapping matches cannot occur.
855
+ cursor = tagStart + '<canvas'.length;
856
+ }
857
+ return false;
858
+ }
859
+ /**
860
+ * Count data points in the first dataset of a Chart.js config JSON payload.
861
+ *
862
+ * Parses the decoded `data-chart-config` as JSON and returns the length of
863
+ * `config.data.datasets[0].data`. Handles both numeric-array datasets
864
+ * (`[1, 2, 3]`) and object-point datasets (`[{x:0,y:1}, …]`) correctly —
865
+ * the previous indexOf-based implementation miscounted scatter/bubble
866
+ * configs and accidentally looked at `data.labels` for typical layouts.
867
+ *
868
+ * @param json - Decoded Chart.js config JSON string
869
+ * @returns Number of data points in `data.datasets[0].data`, or 0 when absent/invalid
870
+ */
871
+ function countFirstDatasetPoints(json) {
872
+ try {
873
+ const config = JSON.parse(json);
874
+ const firstDataset = config.data?.datasets?.[0];
875
+ return Array.isArray(firstDataset?.data) ? firstDataset.data.length : 0;
876
+ }
877
+ catch {
878
+ return 0;
879
+ }
880
+ }
881
+ /**
882
+ * Count distinct language switcher links emitted in the article header.
883
+ *
884
+ * @param html - Complete article HTML
885
+ * @returns Number of `.lang-link` anchors inside the header `site-header__langs` nav
886
+ */
887
+ function countLanguageSwitcherLinks(html) {
888
+ // Linear scan: locate the nav element by its unique class, then count
889
+ // `.lang-link` classes inside. Avoids the nested `[^">]*` regex pattern
890
+ // that CodeQL flags as polynomial-ReDoS-prone.
891
+ const marker = 'site-header__langs';
892
+ const markerIdx = html.indexOf(marker);
893
+ const NAV_CLOSE = '</nav>';
894
+ let scope = html;
895
+ if (markerIdx !== -1) {
896
+ // Find the closing `</nav>` of the enclosing nav (simple assumption:
897
+ // the next `</nav>` after the marker is the one we want). Falls back to
898
+ // the whole HTML if not found.
899
+ const endIdx = html.indexOf(NAV_CLOSE, markerIdx);
900
+ if (endIdx !== -1) {
901
+ // Walk backwards to find the opening `<nav`.
902
+ const startIdx = html.lastIndexOf('<nav', markerIdx);
903
+ if (startIdx !== -1) {
904
+ scope = html.slice(startIdx, endIdx);
905
+ }
906
+ }
907
+ }
908
+ // Count `lang-link` class tokens — bounded linear count.
909
+ const matches = scope.match(/\blang-link\b/gu);
910
+ return matches ? matches.length : 0;
911
+ }
912
+ /**
913
+ * Detect the two standard footer blocks always produced by `article-template.ts`.
914
+ *
915
+ * @param html - Complete article HTML
916
+ * @returns `true` when both `.footer-content` and `.footer-bottom` classes are present
917
+ */
918
+ function hasStandardFooterContent(html) {
919
+ return /class="footer-content"/u.test(html) && /class="footer-bottom"/u.test(html);
920
+ }
921
+ /** Slugs for article types that MUST include World Bank economic context. */
922
+ const POLICY_SLUGS_REQUIRING_WORLD_BANK = new Set([
923
+ 'committee-reports',
924
+ 'propositions',
925
+ 'motions',
926
+ 'weekly-review',
927
+ 'monthly-review',
928
+ 'week-in-review',
929
+ 'month-in-review',
930
+ 'month-ahead',
931
+ ]);
932
+ /**
933
+ * Strong World Bank evidence tokens — plain substring match is enough to
934
+ * satisfy the gate because each is specific (the literal attribution phrase
935
+ * or an MCP tool name). Kept aligned with
936
+ * `analysis/methodologies/worldbank-indicator-mapping.md`.
937
+ */
938
+ export const WORLD_BANK_STRONG_FINGERPRINTS = [
939
+ 'World Bank',
940
+ 'world bank',
941
+ 'worldbank',
942
+ 'get-economic-data',
943
+ 'get-social-data',
944
+ 'get-education-data',
945
+ 'get-health-data',
946
+ 'get-country-info',
947
+ 'get-countries',
948
+ 'search-indicators',
949
+ ];
950
+ /**
951
+ * Short indicator codes published by the World Bank MCP server. These are
952
+ * matched with a word boundary (`[^A-Z0-9_]` look-arounds) so that prose like
953
+ * "GDP growth slowed" does NOT count as World Bank evidence, but an analysis
954
+ * file line like `INDICATOR: GDP` does. All codes are uppercase, so the match
955
+ * is case-sensitive — case-insensitive mentions in English prose are intentionally
956
+ * rejected.
957
+ */
958
+ export const WORLD_BANK_INDICATOR_CODES = [
959
+ 'GDP',
960
+ 'GDP_GROWTH',
961
+ 'GDP_PER_CAPITA',
962
+ 'GNI',
963
+ 'GNI_PER_CAPITA',
964
+ 'UNEMPLOYMENT',
965
+ 'INFLATION',
966
+ 'EXPORTS',
967
+ 'EXPORTS_GDP',
968
+ 'FDI',
969
+ 'FDI_NET',
970
+ 'POPULATION',
971
+ 'LIFE_EXPECTANCY',
972
+ 'BIRTH_RATE',
973
+ 'DEATH_RATE',
974
+ 'INTERNET_USERS',
975
+ 'LITERACY_RATE',
976
+ 'SCHOOL_ENROLLMENT',
977
+ 'SCHOOL_COMPLETION',
978
+ 'TEACHERS_PRIMARY',
979
+ 'EDUCATION_EXPENDITURE',
980
+ 'HEALTH_EXPENDITURE',
981
+ 'PHYSICIANS',
982
+ 'HOSPITAL_BEDS',
983
+ 'IMMUNIZATION',
984
+ 'HIV_PREVALENCE',
985
+ 'MALNUTRITION',
986
+ 'TUBERCULOSIS',
987
+ ];
988
+ /**
989
+ * Backwards-compatible union of strong + short fingerprints. Kept exported so
990
+ * callers that only need a flat list (e.g. existing consumers that shipped
991
+ * before the strong/short split) continue to compile. New code SHOULD prefer
992
+ * {@link hasWorldBankEvidence}, which enforces the stricter word-boundary rule
993
+ * for short codes.
994
+ */
995
+ export const WORLD_BANK_FINGERPRINTS = [
996
+ ...WORLD_BANK_STRONG_FINGERPRINTS,
997
+ ...WORLD_BANK_INDICATOR_CODES,
998
+ ];
999
+ /**
1000
+ * Return true when any WORLD_BANK_INDICATOR_CODES entry appears in `text` with
1001
+ * word-boundary isolation on both sides. We treat `[A-Z0-9_]` as "identifier"
1002
+ * characters — that keeps `GDP_GROWTH` from accidentally matching inside the
1003
+ * shorter `GDP` scan, and keeps the English word "gdp" out of the match set.
1004
+ */
1005
+ /** Characters that count as part of an identifier-style token for the word-boundary check. */
1006
+ const WORD_BOUNDARY_PATTERN = /[A-Z0-9_]/u;
1007
+ /**
1008
+ * Check whether `ch` is NOT an identifier-style character (so it qualifies
1009
+ * as a word boundary on either side of a World Bank indicator code).
1010
+ *
1011
+ * @param ch - Single character (may be empty string for start/end-of-string)
1012
+ * @returns `true` when `ch` is empty or a non-identifier character
1013
+ */
1014
+ function isIdentifierBoundary(ch) {
1015
+ return ch === '' || !WORD_BOUNDARY_PATTERN.test(ch);
1016
+ }
1017
+ /**
1018
+ * Return `true` when `code` appears in `text` surrounded by identifier
1019
+ * boundaries on both sides. Linear scan over `text`.
1020
+ *
1021
+ * @param text - Text to scan
1022
+ * @param code - Indicator code to look for (all uppercase)
1023
+ * @returns `true` when a word-bounded occurrence is present
1024
+ */
1025
+ function textContainsIndicatorCode(text, code) {
1026
+ let from = 0;
1027
+ while (from < text.length) {
1028
+ const idx = text.indexOf(code, from);
1029
+ if (idx === -1)
1030
+ return false;
1031
+ const before = idx === 0 ? '' : (text[idx - 1] ?? '');
1032
+ const afterIdx = idx + code.length;
1033
+ const after = afterIdx < text.length ? (text[afterIdx] ?? '') : '';
1034
+ if (isIdentifierBoundary(before) && isIdentifierBoundary(after))
1035
+ return true;
1036
+ from = idx + 1;
1037
+ }
1038
+ return false;
1039
+ }
1040
+ /**
1041
+ * Return true when any `WORLD_BANK_INDICATOR_CODES` entry appears in `text`
1042
+ * with word-boundary isolation on both sides. We treat `[A-Z0-9_]` as
1043
+ * "identifier" characters — that keeps `GDP_GROWTH` from accidentally matching
1044
+ * inside the shorter `GDP` scan, and keeps the English word "gdp" out of the
1045
+ * match set.
1046
+ *
1047
+ * @param text - Article body or analysis markdown to scan
1048
+ * @returns `true` when at least one canonical indicator code is present
1049
+ */
1050
+ function hasIndicatorCodeWithBoundary(text) {
1051
+ for (const code of WORLD_BANK_INDICATOR_CODES) {
1052
+ if (textContainsIndicatorCode(text, code))
1053
+ return true;
1054
+ }
1055
+ return false;
1056
+ }
1057
+ /**
1058
+ * Detect World Bank sourcing in any piece of text (article body OR analysis
1059
+ * markdown). Returns `true` when the text contains either a strong fingerprint
1060
+ * (the phrase "World Bank", an MCP tool name, etc.) or an indicator code with
1061
+ * clean word boundaries.
1062
+ *
1063
+ * This is the single source of truth for the policy quality gate — both the
1064
+ * content validator and the CLI validator's filesystem fallback use it so a
1065
+ * legitimate evidence trail on either side satisfies the rule, and generic
1066
+ * prose mentions of economic terms do not.
1067
+ *
1068
+ * @param text - Text to scan
1069
+ * @returns `true` when at least one strong or word-bounded fingerprint matches
1070
+ */
1071
+ export function hasWorldBankEvidence(text) {
1072
+ for (const fp of WORLD_BANK_STRONG_FINGERPRINTS) {
1073
+ if (text.includes(fp))
1074
+ return true;
1075
+ }
1076
+ return hasIndicatorCodeWithBoundary(text);
1077
+ }
1078
+ /**
1079
+ * Verify that a policy article (or the linked analysis artifacts) contains at
1080
+ * least one World Bank fingerprint — indicator code (word-bounded), MCP
1081
+ * tool-trace token, or the phrase "World Bank" itself. Returns `true` if the
1082
+ * gate is satisfied OR the article type is not on the mandatory list.
1083
+ *
1084
+ * @param html - Article HTML
1085
+ * @param articleType - Slug of the article category (e.g. `"committee-reports"`)
1086
+ * @param _analysisDir - Reserved for API symmetry; filesystem recursion is
1087
+ * performed by the caller in `validate-articles.ts` to keep this module pure.
1088
+ * @returns `true` when the World Bank evidence requirement is met or not applicable
1089
+ */
1090
+ export function articlePolicyHasWorldBank(html, articleType, _analysisDir) {
1091
+ if (!POLICY_SLUGS_REQUIRING_WORLD_BANK.has(articleType))
1092
+ return true;
1093
+ return hasWorldBankEvidence(html);
700
1094
  }
701
1095
  /**
702
1096
  * Validate the quality of a generated article.
@@ -18,8 +18,8 @@
18
18
  */
19
19
  import fs from 'node:fs';
20
20
  import path from 'node:path';
21
- import { NEWS_DIR, ARTICLE_FILENAME_PATTERN } from '../constants/config.js';
22
- import { validateArticleContent } from './content-validator.js';
21
+ import { NEWS_DIR, ARTICLE_FILENAME_PATTERN, PROJECT_ROOT } from '../constants/config.js';
22
+ import { validateArticleContent, articlePolicyHasWorldBank, hasWorldBankEvidence, } from './content-validator.js';
23
23
  import { scoreArticleQuality } from './article-quality-scorer.js';
24
24
  // ─── CLI argument parsing ─────────────────────────────────────────────────────
25
25
  const args = process.argv.slice(2);
@@ -63,6 +63,110 @@ function slugToArticleType(slug) {
63
63
  return mapping[slug] ?? slug;
64
64
  }
65
65
  // ─── Main validation logic ────────────────────────────────────────────────────
66
+ /**
67
+ * For policy article types, verify World Bank evidence in either the article
68
+ * body OR any `.md` file under the article's `analysis/daily/{date}/{slug}*`
69
+ * directory. Non-policy article types are always considered satisfied.
70
+ *
71
+ * @param html - Full HTML of the article being validated
72
+ * @param articleType - Article category slug (e.g. `"committee-reports"`)
73
+ * @param date - Article publication date (`YYYY-MM-DD`)
74
+ * @param slug - Article slug used to locate the matching analysis directory
75
+ * @returns Warning string when the gate fails, or `null` when satisfied.
76
+ */
77
+ function checkWorldBankEvidence(html, articleType, date, slug) {
78
+ // Short-circuit for non-policy article types.
79
+ if (articlePolicyHasWorldBank(html, articleType))
80
+ return null;
81
+ // Sweep sibling analysis directories: analysis/daily/{date}/{slug}*
82
+ const analysisRoot = path.join(PROJECT_ROOT, 'analysis', 'daily', date);
83
+ if (!fs.existsSync(analysisRoot)) {
84
+ return `Missing required World Bank economic context for "${articleType}" article; analysis directory ${analysisRoot} does not exist`;
85
+ }
86
+ const candidates = safeReaddir(analysisRoot).filter((entry) => entry === slug || entry.startsWith(`${slug}-`) || entry.startsWith(`${slug}_`));
87
+ for (const dirName of candidates) {
88
+ if (directoryContainsWorldBankFingerprint(path.join(analysisRoot, dirName))) {
89
+ return null;
90
+ }
91
+ }
92
+ return `Missing required World Bank economic context for "${articleType}" article; neither article body nor analysis files under ${analysisRoot} reference any World Bank indicator`;
93
+ }
94
+ /**
95
+ * List directory entries, returning `[]` on any error (tolerate missing paths).
96
+ *
97
+ * @param dir - Directory to list
98
+ * @returns Array of entry names or `[]` when the directory cannot be read
99
+ */
100
+ function safeReaddir(dir) {
101
+ try {
102
+ return fs.readdirSync(dir);
103
+ }
104
+ catch {
105
+ return [];
106
+ }
107
+ }
108
+ /**
109
+ * Maximum recursion depth when searching an analysis directory for World Bank
110
+ * fingerprints. The starting directory is depth 0; the guard
111
+ * `depth >= ANALYSIS_SEARCH_MAX_DEPTH` stops recursion once it would exceed
112
+ * this depth. With `ANALYSIS_SEARCH_MAX_DEPTH = 3` the scanner reads files at
113
+ * depths 0, 1, 2 and 3 — enough to cover the expected layout
114
+ * `analysis/daily/{date}/{slug}/<subdir>/<file>.md` (depth 2) with one level
115
+ * of tolerance for deeper run artefacts. Trees deeper than this are truncated
116
+ * to guarantee bounded I/O during validator runs.
117
+ */
118
+ const ANALYSIS_SEARCH_MAX_DEPTH = 3;
119
+ /**
120
+ * Depth-limited recursive search for any World Bank fingerprint in `.md` files.
121
+ * Uses {@link hasWorldBankEvidence} so the gate enforces the same
122
+ * strong-phrase / word-bounded-indicator rule used on article bodies.
123
+ *
124
+ * @param dir - Directory to scan
125
+ * @param depth - Current recursion depth (callers should omit; max is
126
+ * {@link ANALYSIS_SEARCH_MAX_DEPTH}, inclusive)
127
+ * @returns `true` when at least one `.md` file contains a World Bank fingerprint
128
+ */
129
+ function directoryContainsWorldBankFingerprint(dir, depth = 0) {
130
+ if (depth > ANALYSIS_SEARCH_MAX_DEPTH)
131
+ return false;
132
+ let entries;
133
+ try {
134
+ entries = fs.readdirSync(dir, { withFileTypes: true });
135
+ }
136
+ catch {
137
+ return false;
138
+ }
139
+ for (const entry of entries) {
140
+ if (entryContainsWorldBankFingerprint(dir, entry, depth))
141
+ return true;
142
+ }
143
+ return false;
144
+ }
145
+ /**
146
+ * Test a single directory entry for World Bank fingerprints, recursing into
147
+ * subdirectories up to the shared depth cap.
148
+ *
149
+ * @param dir - Parent directory of `entry`
150
+ * @param entry - Directory entry to test
151
+ * @param depth - Current recursion depth of the caller
152
+ * @returns `true` when this entry (or any descendant) matches a fingerprint
153
+ */
154
+ function entryContainsWorldBankFingerprint(dir, entry, depth) {
155
+ const full = path.join(dir, entry.name);
156
+ if (entry.isDirectory()) {
157
+ return directoryContainsWorldBankFingerprint(full, depth + 1);
158
+ }
159
+ if (!entry.isFile() || !entry.name.endsWith('.md'))
160
+ return false;
161
+ let content;
162
+ try {
163
+ content = fs.readFileSync(full, 'utf-8');
164
+ }
165
+ catch {
166
+ return false;
167
+ }
168
+ return hasWorldBankEvidence(content);
169
+ }
66
170
  /**
67
171
  * Validate a single article file and return a summary.
68
172
  *
@@ -80,6 +184,11 @@ function validateSingleFile(filename) {
80
184
  const html = fs.readFileSync(filePath, 'utf-8');
81
185
  const articleType = slugToArticleType(slug);
82
186
  const result = validateArticleContent(html, lang, articleType);
187
+ // World Bank gate — extend search to linked analysis markdown files
188
+ const wbWarning = checkWorldBankEvidence(html, articleType, date, slug);
189
+ if (wbWarning) {
190
+ result.warnings.push(wbWarning);
191
+ }
83
192
  const summary = {
84
193
  filename,
85
194
  lang,