mcp-agentic-pipelines 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. package/.env.example +93 -0
  2. package/README.md +258 -0
  3. package/package.json +70 -0
  4. package/packages/clinical/package.json +22 -0
  5. package/packages/clinical/src/index.ts +262 -0
  6. package/packages/clinical/tsconfig.json +13 -0
  7. package/packages/core/package.json +21 -0
  8. package/packages/core/src/config.ts +138 -0
  9. package/packages/core/src/errors.ts +100 -0
  10. package/packages/core/src/index.ts +104 -0
  11. package/packages/core/src/llm-config.ts +213 -0
  12. package/packages/core/src/logging.ts +66 -0
  13. package/packages/core/src/python-bridge.ts +384 -0
  14. package/packages/core/src/rate-limiter.ts +136 -0
  15. package/packages/core/src/types.ts +203 -0
  16. package/packages/core/src/validation.ts +101 -0
  17. package/packages/core/tsconfig.json +10 -0
  18. package/packages/deeppipe/package.json +21 -0
  19. package/packages/deeppipe/src/index.ts +424 -0
  20. package/packages/deeppipe/tsconfig.json +13 -0
  21. package/packages/piste/package.json +20 -0
  22. package/packages/piste/src/index.ts +48 -0
  23. package/packages/piste/tsconfig.json +13 -0
  24. package/packages/precis/package.json +20 -0
  25. package/packages/precis/src/index.ts +67 -0
  26. package/packages/precis/tsconfig.json +13 -0
  27. package/packages/server/package.json +31 -0
  28. package/packages/server/src/index.ts +427 -0
  29. package/packages/server/tsconfig.json +17 -0
  30. package/setup.mjs +141 -0
  31. package/test.mjs +337 -0
  32. package/vendors/clinical-intake/pipeline.mjs +349 -0
  33. package/vendors/clinical-intake/questions/en.txt +9 -0
  34. package/vendors/clinical-intake/questions/fr.txt +9 -0
  35. package/vendors/piste/.env.example +73 -0
  36. package/vendors/piste/app/core/__init__.py +4 -0
  37. package/vendors/piste/app/core/config.py +83 -0
  38. package/vendors/piste/app/core/debuglog.py +16 -0
  39. package/vendors/piste/app/core/middleware.py +40 -0
  40. package/vendors/piste/bridge_piste.py +301 -0
  41. package/vendors/piste/pipeline/__init__.py +4 -0
  42. package/vendors/piste/pipeline/compiler.py +68 -0
  43. package/vendors/piste/pipeline/offline/__init__.py +28 -0
  44. package/vendors/piste/pipeline/offline/verifaid_pipeline.py +247 -0
  45. package/vendors/piste/pipeline/replay.py +15 -0
  46. package/vendors/piste/pipeline/replay_engine.py +249 -0
  47. package/vendors/piste/pipeline/signatures/__init__.py +4 -0
  48. package/vendors/piste/pipeline/signatures/signatures.py +136 -0
  49. package/vendors/piste/pipeline/stage1/__init__.py +21 -0
  50. package/vendors/piste/pipeline/stage1/atomic_decomposer.py +61 -0
  51. package/vendors/piste/pipeline/stage1/check_worthiness.py +100 -0
  52. package/vendors/piste/pipeline/stage1/orchestrator.py +175 -0
  53. package/vendors/piste/pipeline/stage1/test_stage1.py +162 -0
  54. package/vendors/piste/pipeline/stage2/__init__.py +34 -0
  55. package/vendors/piste/pipeline/stage2/blind_retriever.py +303 -0
  56. package/vendors/piste/pipeline/stage2/canonical_mapper.py +124 -0
  57. package/vendors/piste/pipeline/stage2/credibility_scorer.py +85 -0
  58. package/vendors/piste/pipeline/stage2/orchestrator.py +311 -0
  59. package/vendors/piste/pipeline/stage2/query_refiner.py +88 -0
  60. package/vendors/piste/pipeline/stage2/search_decision.py +69 -0
  61. package/vendors/piste/pipeline/stage2/test_stage2.py +265 -0
  62. package/vendors/piste/pipeline/stage3/__init__.py +20 -0
  63. package/vendors/piste/pipeline/stage3/classifier.py +79 -0
  64. package/vendors/piste/pipeline/stage3/orchestrator.py +225 -0
  65. package/vendors/piste/pipeline/stage3/test_stage3.py +101 -0
  66. package/vendors/piste/pipeline/stage4/__init__.py +33 -0
  67. package/vendors/piste/pipeline/stage4/criticality_gate.py +177 -0
  68. package/vendors/piste/pipeline/stage4/orchestrator.py +269 -0
  69. package/vendors/piste/pipeline/stage4/test_stage4.py +192 -0
  70. package/vendors/piste/pipeline/stage4/verdict_aggregator.py +157 -0
  71. package/vendors/piste/requirements.txt +53 -0
  72. package/vendors/precis/backend/__init__.py +6 -0
  73. package/vendors/precis/backend/agents/__init__.py +3 -0
  74. package/vendors/precis/backend/agents/data_synthesis.py +105 -0
  75. package/vendors/precis/backend/agents/dist_free_synth.py +97 -0
  76. package/vendors/precis/backend/agents/exact_hash_retriever.py +327 -0
  77. package/vendors/precis/backend/agents/fusion_ranker.py +64 -0
  78. package/vendors/precis/backend/agents/guardrail.py +175 -0
  79. package/vendors/precis/backend/agents/query_expander.py +89 -0
  80. package/vendors/precis/backend/agents/radial_interpol.py +99 -0
  81. package/vendors/precis/backend/agents/report_generator.py +92 -0
  82. package/vendors/precis/backend/agents/semantic_reranker.py +135 -0
  83. package/vendors/precis/backend/agents/stat_anomaly.py +93 -0
  84. package/vendors/precis/backend/agents/vector_index.py +123 -0
  85. package/vendors/precis/backend/agents/veri_score.py +341 -0
  86. package/vendors/precis/backend/agents/work_order_extractor.py +205 -0
  87. package/vendors/precis/backend/api/__init__.py +3 -0
  88. package/vendors/precis/backend/api/routes/__init__.py +3 -0
  89. package/vendors/precis/backend/config.py +88 -0
  90. package/vendors/precis/backend/core/__init__.py +13 -0
  91. package/vendors/precis/backend/core/hashing.py +22 -0
  92. package/vendors/precis/backend/core/metrics.py +77 -0
  93. package/vendors/precis/backend/core/multitoken.py +166 -0
  94. package/vendors/precis/backend/core/pmi.py +54 -0
  95. package/vendors/precis/backend/core/stemming.py +74 -0
  96. package/vendors/precis/backend/core/tracing.py +150 -0
  97. package/vendors/precis/backend/data/__init__.py +3 -0
  98. package/vendors/precis/backend/data/chunker.py +57 -0
  99. package/vendors/precis/backend/data/pdf_parser.py +42 -0
  100. package/vendors/precis/backend/db/__init__.py +3 -0
  101. package/vendors/precis/backend/db/models.py +173 -0
  102. package/vendors/precis/backend/db/repository.py +269 -0
  103. package/vendors/precis/backend/llm/__init__.py +3 -0
  104. package/vendors/precis/backend/llm/anthropic_provider.py +39 -0
  105. package/vendors/precis/backend/llm/base.py +147 -0
  106. package/vendors/precis/backend/llm/deepseek_provider.py +43 -0
  107. package/vendors/precis/backend/llm/factory.py +60 -0
  108. package/vendors/precis/backend/llm/google_provider.py +39 -0
  109. package/vendors/precis/backend/llm/ollama_provider.py +54 -0
  110. package/vendors/precis/backend/llm/openai_provider.py +50 -0
  111. package/vendors/precis/backend/main.py +677 -0
  112. package/vendors/precis/backend/orchestrator/__init__.py +3 -0
  113. package/vendors/precis/backend/orchestrator/planner.py +81 -0
  114. package/vendors/precis/backend/orchestrator/router.py +319 -0
  115. package/vendors/precis/backend/orchestrator/types.py +58 -0
  116. package/vendors/precis/bridge_precis.py +185 -0
  117. package/vendors/precis/data/sample_reports/README.md +8 -0
  118. package/vendors/precis/data/seed_data.py +115 -0
  119. package/vendors/precis/requirements.txt +19 -0
@@ -0,0 +1,33 @@
1
+ # Copyright (c) 2026 Jinan Kordab
2
+ # SPDX-License-Identifier: MIT
3
+
4
+ # Stage 4 — Verdict
5
+ # 4a: Criticality Gate [C3] — pipeline/stage4/criticality_gate.py
6
+ # 4b: Verdict Aggregator [J5] — pipeline/stage4/verdict_aggregator.py
7
+ # 4c: Editorial Review Panel [C3] — pipeline/stage4/orchestrator.py
8
+ # Orchestrator: pipeline/stage4/orchestrator.py
9
+
10
+ from pipeline.stage4.criticality_gate import (
11
+ CriticalityGate, CriticalityAssessment, criticality_gate,
12
+ CRITICAL_TOPICS, HIGH_PROFILE_INDICATORS,
13
+ )
14
+ from pipeline.stage4.verdict_aggregator import (
15
+ VerdictAggregator, verdict_aggregator, VERDICT_LABELS,
16
+ )
17
+ from pipeline.stage4.orchestrator import (
18
+ Stage4Orchestrator, Stage4Result, stage4_orchestrator,
19
+ )
20
+
21
+ __all__ = [
22
+ "CriticalityGate",
23
+ "CriticalityAssessment",
24
+ "criticality_gate",
25
+ "CRITICAL_TOPICS",
26
+ "HIGH_PROFILE_INDICATORS",
27
+ "VerdictAggregator",
28
+ "verdict_aggregator",
29
+ "VERDICT_LABELS",
30
+ "Stage4Orchestrator",
31
+ "Stage4Result",
32
+ "stage4_orchestrator",
33
+ ]
@@ -0,0 +1,177 @@
1
+ # Copyright (c) 2026 Jinan Kordab
2
+ # SPDX-License-Identifier: MIT
3
+
4
+ """
5
+ Stage 4a: Criticality Gate [C3]
6
+ =================================
7
+ Routes high-stakes claims to human Editorial Review Panel.
8
+ Low-stakes claims go to automated Verdict Aggregator.
9
+
10
+ Inspired by PolitiFact's three-editor panel:
11
+ - Elections, public health, legal, high-profile figures → HUMAN review
12
+ - Everything else → AUTOMATED verdict
13
+
14
+ The gate checks:
15
+ 1. Keyword match against critical topics list
16
+ 2. Embedding similarity to critical topic centroids
17
+ 3. Source reputation (high-profile figure detection)
18
+ """
19
+
20
+ from typing import List, Optional
21
+ from dataclasses import dataclass
22
+
23
+
24
+ # Critical topics that trigger human review — Canada-focused bilingual (EN/FR)
25
+ # Covers: Canadian federal elections + Quebec provincial elections
26
+ CRITICAL_TOPICS = [
27
+ # ── Elections (EN/FR) ────────────────────────────────────
28
+ "election", "élection", "vote", "voter", "candidate", "candidat",
29
+ "ballot", "bulletin", "democracy", "démocratie",
30
+ "federal election", "élection fédérale",
31
+ "provincial election", "élection provinciale",
32
+ "referendum", "référendum",
33
+ "riding", "circonscription", "mp", "député", "députée",
34
+ "minority government", "gouvernement minoritaire",
35
+ "majority government", "gouvernement majoritaire",
36
+ "coalition", "confidence vote", "vote de confiance",
37
+ # ── Quebec Politics (FR/EN) ──────────────────────────────
38
+ "quebec", "québec", "québécois", "quebecois",
39
+ "national assembly", "assemblée nationale",
40
+ "sovereignty", "souveraineté", "separatist", "séparatiste",
41
+ "federalism", "fédéralisme", "federalist", "fédéraliste",
42
+ "language law", "loi linguistique", "bill 101", "loi 101",
43
+ "bill 96", "loi 96", "bill 21", "loi 21",
44
+ "secularism", "laïcité", "religious symbols", "signes religieux",
45
+ "distinct society", "société distincte",
46
+ "notwithstanding clause", "clause dérogatoire",
47
+ # ── Canadian Federal Politics ────────────────────────────
48
+ "parliament", "parlement", "house of commons", "chambre des communes",
49
+ "senate", "sénat", "governor general", "gouverneur général",
50
+ "throne speech", "discours du trône",
51
+ "first past the post", "scrutin uninominal",
52
+ "electoral reform", "réforme électorale",
53
+ "equalization", "péréquation", "transfer payments", "paiements de transfert",
54
+ "carbon tax", "taxe carbone", "carbon pricing", "tarification du carbone",
55
+ "pipeline", "oil sands", "sables bitumineux",
56
+ "indigenous", "autochtone", "first nations", "premières nations",
57
+ "reconciliation", "réconciliation", "treaty", "traité",
58
+ "indian act", "loi sur les indiens",
59
+ # ── Public Health (EN/FR) ─────────────────────────────────
60
+ "public health", "santé publique", "pandemic", "pandémie",
61
+ "vaccine", "vaccin", "covid", "disease", "maladie",
62
+ "health transfer", "transfert en santé", "healthcare", "soins de santé",
63
+ # ── Economy / Budget (EN/FR) ──────────────────────────────
64
+ "economy", "économie", "inflation", "recession", "récession",
65
+ "deficit", "déficit", "budget", "tax", "impôt", "taxes", "impôts",
66
+ "debt", "dette", "spending", "dépenses", "austerity", "austérité",
67
+ "housing", "logement", "affordable housing", "logement abordable",
68
+ "interest rate", "taux d'intérêt", "bank of canada", "banque du canada",
69
+ # ── Rights / Immigration (EN/FR) ──────────────────────────
70
+ "abortion", "avortement", "civil rights", "droits civils",
71
+ "human rights", "droits humains", "charter", "charte",
72
+ "immigration", "refugee", "réfugié", "asylum", "asile",
73
+ "multiculturalism", "multiculturalisme",
74
+ ]
75
+
76
+ # High-profile figure indicators — Canada-focused bilingual
77
+ HIGH_PROFILE_INDICATORS = [
78
+ # ── Canadian Federal ──────────────────────────────────────
79
+ "prime minister", "premier ministre",
80
+ "pm", "trudeau", "justin trudeau",
81
+ "poilievre", "pierre poilievre",
82
+ "singh", "jagmeet singh",
83
+ "liberal party", "parti libéral",
84
+ "conservative party", "parti conservateur",
85
+ "ndp", "nouveau parti démocratique",
86
+ "bloc", "bloc québécois",
87
+ "green party", "parti vert",
88
+ "minister", "ministre", "cabinet",
89
+ "governor general", "gouverneur général",
90
+ "senator", "sénateur", "mp", "member of parliament",
91
+ # ── Quebec Provincial ─────────────────────────────────────
92
+ "premier", "première ministre",
93
+ "quebec premier", "premier du québec",
94
+ "legault", "françois legault",
95
+ "caq", "coalition avenir québec",
96
+ "parti québécois", "pq",
97
+ "québec solidaire", "qs",
98
+ "liberal party of quebec", "parti libéral du québec",
99
+ "mna", "député", "députée",
100
+ "national assembly", "assemblée nationale",
101
+ # ── Provincial Premiers (other provinces) ─────────────────
102
+ "ontario premier", "premier ontarien", "ford", "doug ford",
103
+ "alberta premier", "smith", "danielle smith",
104
+ "bc premier", "eby", "david eby",
105
+ ]
106
+
107
+
108
+ @dataclass
109
+ class CriticalityAssessment:
110
+ """Result of the Criticality Gate check."""
111
+ is_critical: bool
112
+ matched_topics: List[str]
113
+ is_high_profile: bool
114
+ recommendation: str # "automated" or "human_review"
115
+ reason: str
116
+
117
+
118
+ class CriticalityGate:
119
+ """
120
+ Routes claims based on criticality.
121
+
122
+ Jewel [C3] — Human-in-the-loop breakpoint:
123
+ Automated for scale, human-reviewed for stakes.
124
+ """
125
+
126
+ def assess(self, claim_text: str) -> CriticalityAssessment:
127
+ """
128
+ Determine if a claim requires human review.
129
+
130
+ Args:
131
+ claim_text: The claim text to assess.
132
+
133
+ Returns:
134
+ CriticalityAssessment with routing recommendation.
135
+ """
136
+ claim_lower = claim_text.lower()
137
+
138
+ # Check critical topics
139
+ matched_topics = [
140
+ topic for topic in CRITICAL_TOPICS
141
+ if topic in claim_lower
142
+ ]
143
+
144
+ # Check high-profile figures
145
+ is_high_profile = any(
146
+ indicator in claim_lower
147
+ for indicator in HIGH_PROFILE_INDICATORS
148
+ )
149
+
150
+ # Determine routing
151
+ is_critical = bool(matched_topics) or is_high_profile
152
+
153
+ if is_critical:
154
+ reason_parts = []
155
+ if matched_topics:
156
+ reason_parts.append(
157
+ f"matches critical topics: {', '.join(matched_topics[:3])}"
158
+ )
159
+ if is_high_profile:
160
+ reason_parts.append("involves high-profile figure")
161
+ reason = "; ".join(reason_parts)
162
+ recommendation = "human_review"
163
+ else:
164
+ reason = "No critical topics or high-profile indicators detected."
165
+ recommendation = "automated"
166
+
167
+ return CriticalityAssessment(
168
+ is_critical=is_critical,
169
+ matched_topics=matched_topics,
170
+ is_high_profile=is_high_profile,
171
+ recommendation=recommendation,
172
+ reason=reason,
173
+ )
174
+
175
+
176
+ # Singleton
177
+ criticality_gate = CriticalityGate()
@@ -0,0 +1,269 @@
1
+ # Copyright (c) 2026 Jinan Kordab
2
+ # SPDX-License-Identifier: MIT
3
+
4
+ """
5
+ Stage 4 Orchestrator — Verdict
6
+ ================================
7
+ Coordinates:
8
+ 4a: Criticality Gate [C3] — route to automated vs human review
9
+ 4b: Verdict Aggregator [J5] — 7-way PolitiFact-aligned verdict
10
+ 4c: Editorial Review Panel [C3] — human-in-the-loop for critical claims
11
+
12
+ Writes APPEND-ONLY verdict records to PostgreSQL [C5].
13
+ Emits SSE events: criticality_determined, verdict_complete.
14
+ """
15
+
16
+ import time
17
+ import uuid
18
+ from typing import Optional, Dict
19
+ from dataclasses import dataclass, field
20
+ from datetime import datetime
21
+
22
+ from sqlalchemy.ext.asyncio import AsyncSession
23
+
24
+ from app.db.models import StageRecord, Verdict
25
+ from pipeline.stage4.criticality_gate import criticality_gate
26
+ from pipeline.stage4.verdict_aggregator import (
27
+ verdict_aggregator, VERDICT_LABELS,
28
+ )
29
+ from pipeline.stage3.orchestrator import Stage3Result, ClassificationResult
30
+
31
+
32
+ @dataclass
33
+ class Stage4Result:
34
+ """Output of Stage 4 — Verdict."""
35
+ atomic_claim: str
36
+
37
+ # Criticality Gate
38
+ is_critical: bool
39
+ review_route: str # "automated" or "human_review"
40
+ criticality_reason: str
41
+
42
+ # Verdict
43
+ verdict: str # One of 7 labels
44
+ confidence: float
45
+ explanation: str
46
+ distribution: Dict[str, float]
47
+
48
+ # Human review (if applicable)
49
+ human_review: Optional[dict] = None
50
+
51
+ # Stage 3 summary for context
52
+ support_count: int = 0
53
+ refute_count: int = 0
54
+ unrelated_count: int = 0
55
+
56
+
57
+ class Stage4Orchestrator:
58
+ """
59
+ Orchestrates Stage 4 of the fact-checking pipeline.
60
+
61
+ Flow:
62
+ 1. Criticality Gate: assess if human review needed [C3]
63
+ 2a. If automated → VerdictAggregator synthesizes 7-way verdict [J5]
64
+ 2b. If critical → queue for Editorial Review Panel [C3]
65
+ 3. Write verdict record to PostgreSQL (append-only)
66
+ 4. Emit SSE events
67
+ """
68
+
69
+ def __init__(self, sse_callback: Optional[callable] = None):
70
+ self.sse_callback = sse_callback
71
+
72
+ async def process(
73
+ self,
74
+ atomic_claim: str,
75
+ stage3_result: Stage3Result,
76
+ db: Optional[AsyncSession] = None,
77
+ run_id: Optional[uuid.UUID] = None,
78
+ locale: str = "en",
79
+ ) -> Stage4Result:
80
+ """
81
+ Run Stage 4 processing for one atomic claim.
82
+
83
+ Args:
84
+ atomic_claim: The atomic claim to verdict.
85
+ stage3_result: Per-source classifications from Stage 3.
86
+ db: Optional DB session for audit ledger writes.
87
+
88
+ Returns:
89
+ Stage4Result with final verdict.
90
+ """
91
+ # --- 4a: Criticality Gate ---
92
+ await self._emit("stage_4a_start", {
93
+ "atomic_claim": atomic_claim,
94
+ })
95
+
96
+ assessment = criticality_gate.assess(atomic_claim)
97
+
98
+ await self._emit("criticality_determined", {
99
+ "atomic_claim": atomic_claim,
100
+ "is_critical": assessment.is_critical,
101
+ "matched_topics": assessment.matched_topics,
102
+ "recommendation": assessment.recommendation,
103
+ })
104
+
105
+ # --- 4b: Verdict Aggregation ---
106
+ if assessment.recommendation == "automated":
107
+ result = await self._automated_verdict(
108
+ atomic_claim, stage3_result, db, run_id, locale
109
+ )
110
+ else:
111
+ result = await self._human_review_verdict(
112
+ atomic_claim, stage3_result, assessment, db, run_id, locale
113
+ )
114
+
115
+ # NOTE: Verdict creation moved to pipeline_service.py —
116
+ # one Verdict per run_id, aggregated from all atomic claims.
117
+ # Stage 4 only returns per-atomic-claim results.
118
+ return result
119
+
120
+ async def _automated_verdict(
121
+ self,
122
+ atomic_claim: str,
123
+ stage3_result: Stage3Result,
124
+ db: Optional[AsyncSession],
125
+ run_id: Optional[uuid.UUID] = None,
126
+ locale: str = "en",
127
+ ) -> Stage4Result:
128
+ """Run automated VerdictAggregator [J5]."""
129
+ t0 = time.monotonic()
130
+
131
+ verdict, confidence, explanation, distribution = verdict_aggregator(
132
+ claim=atomic_claim,
133
+ classifications=stage3_result.classifications,
134
+ locale=locale,
135
+ )
136
+
137
+ latency_ms = (time.monotonic() - t0) * 1000
138
+
139
+ # Write stage record
140
+ if db:
141
+ db.add(StageRecord(
142
+ run_id=run_id or uuid.uuid4(),
143
+ stage_name="stage_4b",
144
+ input_snapshot={
145
+ "atomic_claim": atomic_claim,
146
+ "classifications_count": len(stage3_result.classifications),
147
+ },
148
+ output_snapshot={
149
+ "verdict": verdict,
150
+ "confidence": confidence,
151
+ "distribution": distribution,
152
+ },
153
+ model_used="dspy/verdict_aggregator",
154
+ latency_ms=latency_ms,
155
+ retry_attempt=0,
156
+ ))
157
+
158
+ await self._emit("atomic_verdict", {
159
+ "atomic_claim": atomic_claim,
160
+ "verdict": verdict,
161
+ "confidence": confidence,
162
+ "explanation": explanation,
163
+ "distribution": distribution,
164
+ "route": "automated",
165
+ })
166
+
167
+ return Stage4Result(
168
+ atomic_claim=atomic_claim,
169
+ is_critical=False,
170
+ review_route="automated",
171
+ criticality_reason="Low-stakes claim — automated verdict.",
172
+ verdict=verdict,
173
+ confidence=confidence,
174
+ explanation=explanation,
175
+ distribution=distribution,
176
+ support_count=stage3_result.support_count,
177
+ refute_count=stage3_result.refute_count,
178
+ unrelated_count=stage3_result.unrelated_count,
179
+ )
180
+
181
+ async def _human_review_verdict(
182
+ self,
183
+ atomic_claim: str,
184
+ stage3_result: Stage3Result,
185
+ assessment,
186
+ db: Optional[AsyncSession],
187
+ run_id: Optional[uuid.UUID] = None,
188
+ locale: str = "en",
189
+ ) -> Stage4Result:
190
+ """
191
+ Queue claim for human Editorial Review Panel [C3].
192
+
193
+ In production: claim added to review_queue table.
194
+ Human reviewer sees: claim + evidence + per-source classifications.
195
+ Panel votes on final verdict (PolitiFact-style democratic vote).
196
+
197
+ MVP: fall back to automated verdict with human_review flag.
198
+ """
199
+ # For MVP, run automated verdict but flag for human review
200
+ t0 = time.monotonic()
201
+
202
+ verdict, confidence, explanation, distribution = verdict_aggregator(
203
+ claim=atomic_claim,
204
+ classifications=stage3_result.classifications,
205
+ locale=locale,
206
+ )
207
+
208
+ latency_ms = (time.monotonic() - t0) * 1000
209
+
210
+ human_review_payload = {
211
+ "status": "pending_review",
212
+ "matched_topics": assessment.matched_topics,
213
+ "auto_verdict": verdict,
214
+ "auto_confidence": confidence,
215
+ "queued_at": datetime.utcnow().isoformat(),
216
+ "reviewer_id": None,
217
+ "final_verdict": None,
218
+ "reason": None,
219
+ }
220
+
221
+ if db:
222
+ db.add(StageRecord(
223
+ run_id=run_id or uuid.uuid4(),
224
+ stage_name="stage_4c",
225
+ input_snapshot={
226
+ "atomic_claim": atomic_claim,
227
+ "assessment": {
228
+ "is_critical": assessment.is_critical,
229
+ "matched_topics": assessment.matched_topics,
230
+ },
231
+ },
232
+ output_snapshot=human_review_payload,
233
+ model_used="dspy/verdict_aggregator+human_review",
234
+ latency_ms=latency_ms,
235
+ retry_attempt=0,
236
+ ))
237
+
238
+ await self._emit("verdict_complete", {
239
+ "atomic_claim": atomic_claim,
240
+ "verdict": verdict,
241
+ "confidence": confidence,
242
+ "explanation": explanation,
243
+ "distribution": distribution,
244
+ "route": "human_review_pending",
245
+ "human_review": human_review_payload,
246
+ })
247
+
248
+ return Stage4Result(
249
+ atomic_claim=atomic_claim,
250
+ is_critical=True,
251
+ review_route="human_review",
252
+ criticality_reason=assessment.reason,
253
+ verdict=verdict,
254
+ confidence=confidence,
255
+ explanation=explanation,
256
+ distribution=distribution,
257
+ human_review=human_review_payload,
258
+ support_count=stage3_result.support_count,
259
+ refute_count=stage3_result.refute_count,
260
+ unrelated_count=stage3_result.unrelated_count,
261
+ )
262
+
263
+ async def _emit(self, event_type: str, data: dict):
264
+ if self.sse_callback:
265
+ await self.sse_callback(event_type, data)
266
+
267
+
268
+ # Singleton
269
+ stage4_orchestrator = Stage4Orchestrator()
@@ -0,0 +1,192 @@
1
+ # Copyright (c) 2026 Jinan Kordab
2
+ # SPDX-License-Identifier: MIT
3
+
4
+ """
5
+ Unit Tests — Stage 4: Verdict
6
+ ===============================
7
+ Tests CriticalityGate [C3], VerdictAggregator [J5],
8
+ 7-way verdict labels, Stage4Result.
9
+
10
+ Run: pytest pipeline/stage4/test_stage4.py -v
11
+ """
12
+
13
+ import pytest
14
+ import json
15
+
16
+
17
+ # ============================================================
18
+ # Criticality Gate Tests [C3]
19
+ # ============================================================
20
+
21
+ class TestCriticalityGate:
22
+ """Test routing of claims to automated vs human review."""
23
+
24
+ @pytest.fixture
25
+ def gate(self):
26
+ from pipeline.stage4.criticality_gate import CriticalityGate
27
+ return CriticalityGate()
28
+
29
+ def test_election_claim_is_critical(self, gate):
30
+ """Claims about elections → human review."""
31
+ result = gate.assess("The election was rigged and the votes were tampered with.")
32
+ assert result.is_critical is True
33
+ assert result.recommendation == "human_review"
34
+ assert "election" in result.matched_topics
35
+ assert "vote" in result.matched_topics
36
+
37
+ def test_public_health_claim_is_critical(self, gate):
38
+ """Claims about vaccines/pandemics → human review."""
39
+ result = gate.assess("The vaccine causes severe side effects in children.")
40
+ assert result.is_critical is True
41
+ assert "vaccine" in result.matched_topics
42
+
43
+ def test_high_profile_figure_is_critical(self, gate):
44
+ """Claims about presidents → human review."""
45
+ result = gate.assess("The president signed the executive order yesterday.")
46
+ assert result.is_critical is True
47
+ assert result.is_high_profile is True
48
+
49
+ def test_neutral_claim_is_automated(self, gate):
50
+ """Non-critical claims → automated verdict."""
51
+ result = gate.assess("Water boils at 100 degrees Celsius at sea level.")
52
+ assert result.is_critical is False
53
+ assert result.recommendation == "automated"
54
+
55
+ def test_empty_claim_is_automated(self, gate):
56
+ """Empty claims → automated."""
57
+ result = gate.assess("")
58
+ assert result.is_critical is False
59
+
60
+
61
+ # ============================================================
62
+ # Verdict Aggregator Tests [J5]
63
+ # ============================================================
64
+
65
+ class TestVerdictAggregator:
66
+ """Test 7-way verdict normalization and distribution."""
67
+
68
+ @pytest.fixture
69
+ def aggregator(self):
70
+ from pipeline.stage4.verdict_aggregator import VerdictAggregator
71
+ return VerdictAggregator()
72
+
73
+ def test_normalize_direct_match(self, aggregator):
74
+ """Direct verdict labels pass through unchanged."""
75
+ assert aggregator._normalize_verdict("TRUE") == "TRUE"
76
+ assert aggregator._normalize_verdict("FALSE") == "FALSE"
77
+ assert aggregator._normalize_verdict("UNVERIFIABLE") == "UNVERIFIABLE"
78
+ assert aggregator._normalize_verdict("PANTS_ON_FIRE") == "PANTS_ON_FIRE"
79
+
80
+ def test_normalize_with_spaces(self, aggregator):
81
+ """Labels with spaces are normalized."""
82
+ assert aggregator._normalize_verdict("MOSTLY TRUE") == "MOSTLY_TRUE"
83
+ assert aggregator._normalize_verdict("HALF TRUE") == "HALF_TRUE"
84
+ assert aggregator._normalize_verdict("MOSTLY FALSE") == "MOSTLY_FALSE"
85
+
86
+ def test_normalize_without_spaces(self, aggregator):
87
+ """Labels without spaces are normalized."""
88
+ assert aggregator._normalize_verdict("MOSTLYTRUE") == "MOSTLY_TRUE"
89
+ assert aggregator._normalize_verdict("HALFTRUE") == "HALF_TRUE"
90
+
91
+ def test_normalize_nei(self, aggregator):
92
+ """NEI → UNVERIFIABLE."""
93
+ assert aggregator._normalize_verdict("NEI") == "UNVERIFIABLE"
94
+ assert aggregator._normalize_verdict("NOT ENOUGH INFORMATION") == "UNVERIFIABLE"
95
+
96
+ def test_normalize_unknown_fallback(self, aggregator):
97
+ """Unknown labels → UNVERIFIABLE (safe default)."""
98
+ assert aggregator._normalize_verdict("SOMETHING WEIRD") == "UNVERIFIABLE"
99
+
100
+ def test_default_distribution_centered(self, aggregator):
101
+ """Default distribution puts all weight on the given verdict."""
102
+ dist = aggregator._default_distribution("TRUE")
103
+ assert dist["TRUE"] == 1.0
104
+ assert dist["FALSE"] == 0.0
105
+ assert sum(dist.values()) == 1.0
106
+
107
+ def test_all_verdict_labels_present(self, aggregator):
108
+ """Default distribution includes all 7 labels."""
109
+ from pipeline.stage4.verdict_aggregator import VERDICT_LABELS
110
+ dist = aggregator._default_distribution("MOSTLY_TRUE")
111
+ for label in VERDICT_LABELS:
112
+ assert label in dist
113
+ assert len(dist) == 7
114
+
115
+ def test_build_classifications_payload(self, aggregator):
116
+ """Classification payload is valid JSON."""
117
+ from pipeline.stage3.orchestrator import ClassificationResult
118
+ classifications = [
119
+ ClassificationResult(0, "a.com", "a.com", "SUPPORTS", 0.9, "Good", 0.9),
120
+ ClassificationResult(1, "b.com", "b.com", "REFUTES", 0.7, "Bad", 0.5),
121
+ ]
122
+ payload = aggregator._build_classifications_payload("Test", classifications)
123
+ data = json.loads(payload)
124
+ assert len(data) == 2
125
+ assert data[0]["label"] == "SUPPORTS"
126
+ assert data[1]["label"] == "REFUTES"
127
+
128
+
129
+ # ============================================================
130
+ # Stage4Result Tests
131
+ # ============================================================
132
+
133
+ class TestStage4Result:
134
+ """Test Stage4Result dataclass."""
135
+
136
+ def test_automated_result(self):
137
+ from pipeline.stage4.orchestrator import Stage4Result
138
+ result = Stage4Result(
139
+ atomic_claim="Water boils at 100°C.",
140
+ is_critical=False,
141
+ review_route="automated",
142
+ criticality_reason="Low-stakes claim.",
143
+ verdict="TRUE",
144
+ confidence=0.95,
145
+ explanation="Well-established scientific fact.",
146
+ distribution={"TRUE": 0.95, "MOSTLY_TRUE": 0.05},
147
+ support_count=3,
148
+ refute_count=0,
149
+ unrelated_count=0,
150
+ )
151
+ assert result.verdict == "TRUE"
152
+ assert result.review_route == "automated"
153
+ assert result.human_review is None
154
+
155
+ def test_human_review_result(self):
156
+ from pipeline.stage4.orchestrator import Stage4Result
157
+ result = Stage4Result(
158
+ atomic_claim="The election was fraudulent.",
159
+ is_critical=True,
160
+ review_route="human_review",
161
+ criticality_reason="Matches critical topic: election.",
162
+ verdict="UNVERIFIABLE",
163
+ confidence=0.45,
164
+ explanation="Insufficient credible evidence.",
165
+ distribution={"UNVERIFIABLE": 1.0},
166
+ human_review={"status": "pending_review"},
167
+ support_count=0,
168
+ refute_count=0,
169
+ unrelated_count=5,
170
+ )
171
+ assert result.is_critical is True
172
+ assert result.review_route == "human_review"
173
+ assert result.human_review is not None
174
+
175
+
176
+ # ============================================================
177
+ # Verdict Labels Tests
178
+ # ============================================================
179
+
180
+ class TestVerdictLabels:
181
+ """Test the 7-way PolitiFact-aligned verdict labels."""
182
+
183
+ def test_all_seven_labels(self):
184
+ from pipeline.stage4.verdict_aggregator import VERDICT_LABELS
185
+ assert len(VERDICT_LABELS) == 7
186
+ assert "TRUE" in VERDICT_LABELS
187
+ assert "MOSTLY_TRUE" in VERDICT_LABELS
188
+ assert "HALF_TRUE" in VERDICT_LABELS
189
+ assert "MOSTLY_FALSE" in VERDICT_LABELS
190
+ assert "FALSE" in VERDICT_LABELS
191
+ assert "PANTS_ON_FIRE" in VERDICT_LABELS
192
+ assert "UNVERIFIABLE" in VERDICT_LABELS