mcp-agentic-pipelines 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +93 -0
- package/README.md +258 -0
- package/package.json +70 -0
- package/packages/clinical/package.json +22 -0
- package/packages/clinical/src/index.ts +262 -0
- package/packages/clinical/tsconfig.json +13 -0
- package/packages/core/package.json +21 -0
- package/packages/core/src/config.ts +138 -0
- package/packages/core/src/errors.ts +100 -0
- package/packages/core/src/index.ts +104 -0
- package/packages/core/src/llm-config.ts +213 -0
- package/packages/core/src/logging.ts +66 -0
- package/packages/core/src/python-bridge.ts +384 -0
- package/packages/core/src/rate-limiter.ts +136 -0
- package/packages/core/src/types.ts +203 -0
- package/packages/core/src/validation.ts +101 -0
- package/packages/core/tsconfig.json +10 -0
- package/packages/deeppipe/package.json +21 -0
- package/packages/deeppipe/src/index.ts +424 -0
- package/packages/deeppipe/tsconfig.json +13 -0
- package/packages/piste/package.json +20 -0
- package/packages/piste/src/index.ts +48 -0
- package/packages/piste/tsconfig.json +13 -0
- package/packages/precis/package.json +20 -0
- package/packages/precis/src/index.ts +67 -0
- package/packages/precis/tsconfig.json +13 -0
- package/packages/server/package.json +31 -0
- package/packages/server/src/index.ts +427 -0
- package/packages/server/tsconfig.json +17 -0
- package/setup.mjs +141 -0
- package/test.mjs +337 -0
- package/vendors/clinical-intake/pipeline.mjs +349 -0
- package/vendors/clinical-intake/questions/en.txt +9 -0
- package/vendors/clinical-intake/questions/fr.txt +9 -0
- package/vendors/piste/.env.example +73 -0
- package/vendors/piste/app/core/__init__.py +4 -0
- package/vendors/piste/app/core/config.py +83 -0
- package/vendors/piste/app/core/debuglog.py +16 -0
- package/vendors/piste/app/core/middleware.py +40 -0
- package/vendors/piste/bridge_piste.py +301 -0
- package/vendors/piste/pipeline/__init__.py +4 -0
- package/vendors/piste/pipeline/compiler.py +68 -0
- package/vendors/piste/pipeline/offline/__init__.py +28 -0
- package/vendors/piste/pipeline/offline/verifaid_pipeline.py +247 -0
- package/vendors/piste/pipeline/replay.py +15 -0
- package/vendors/piste/pipeline/replay_engine.py +249 -0
- package/vendors/piste/pipeline/signatures/__init__.py +4 -0
- package/vendors/piste/pipeline/signatures/signatures.py +136 -0
- package/vendors/piste/pipeline/stage1/__init__.py +21 -0
- package/vendors/piste/pipeline/stage1/atomic_decomposer.py +61 -0
- package/vendors/piste/pipeline/stage1/check_worthiness.py +100 -0
- package/vendors/piste/pipeline/stage1/orchestrator.py +175 -0
- package/vendors/piste/pipeline/stage1/test_stage1.py +162 -0
- package/vendors/piste/pipeline/stage2/__init__.py +34 -0
- package/vendors/piste/pipeline/stage2/blind_retriever.py +303 -0
- package/vendors/piste/pipeline/stage2/canonical_mapper.py +124 -0
- package/vendors/piste/pipeline/stage2/credibility_scorer.py +85 -0
- package/vendors/piste/pipeline/stage2/orchestrator.py +311 -0
- package/vendors/piste/pipeline/stage2/query_refiner.py +88 -0
- package/vendors/piste/pipeline/stage2/search_decision.py +69 -0
- package/vendors/piste/pipeline/stage2/test_stage2.py +265 -0
- package/vendors/piste/pipeline/stage3/__init__.py +20 -0
- package/vendors/piste/pipeline/stage3/classifier.py +79 -0
- package/vendors/piste/pipeline/stage3/orchestrator.py +225 -0
- package/vendors/piste/pipeline/stage3/test_stage3.py +101 -0
- package/vendors/piste/pipeline/stage4/__init__.py +33 -0
- package/vendors/piste/pipeline/stage4/criticality_gate.py +177 -0
- package/vendors/piste/pipeline/stage4/orchestrator.py +269 -0
- package/vendors/piste/pipeline/stage4/test_stage4.py +192 -0
- package/vendors/piste/pipeline/stage4/verdict_aggregator.py +157 -0
- package/vendors/piste/requirements.txt +53 -0
- package/vendors/precis/backend/__init__.py +6 -0
- package/vendors/precis/backend/agents/__init__.py +3 -0
- package/vendors/precis/backend/agents/data_synthesis.py +105 -0
- package/vendors/precis/backend/agents/dist_free_synth.py +97 -0
- package/vendors/precis/backend/agents/exact_hash_retriever.py +327 -0
- package/vendors/precis/backend/agents/fusion_ranker.py +64 -0
- package/vendors/precis/backend/agents/guardrail.py +175 -0
- package/vendors/precis/backend/agents/query_expander.py +89 -0
- package/vendors/precis/backend/agents/radial_interpol.py +99 -0
- package/vendors/precis/backend/agents/report_generator.py +92 -0
- package/vendors/precis/backend/agents/semantic_reranker.py +135 -0
- package/vendors/precis/backend/agents/stat_anomaly.py +93 -0
- package/vendors/precis/backend/agents/vector_index.py +123 -0
- package/vendors/precis/backend/agents/veri_score.py +341 -0
- package/vendors/precis/backend/agents/work_order_extractor.py +205 -0
- package/vendors/precis/backend/api/__init__.py +3 -0
- package/vendors/precis/backend/api/routes/__init__.py +3 -0
- package/vendors/precis/backend/config.py +88 -0
- package/vendors/precis/backend/core/__init__.py +13 -0
- package/vendors/precis/backend/core/hashing.py +22 -0
- package/vendors/precis/backend/core/metrics.py +77 -0
- package/vendors/precis/backend/core/multitoken.py +166 -0
- package/vendors/precis/backend/core/pmi.py +54 -0
- package/vendors/precis/backend/core/stemming.py +74 -0
- package/vendors/precis/backend/core/tracing.py +150 -0
- package/vendors/precis/backend/data/__init__.py +3 -0
- package/vendors/precis/backend/data/chunker.py +57 -0
- package/vendors/precis/backend/data/pdf_parser.py +42 -0
- package/vendors/precis/backend/db/__init__.py +3 -0
- package/vendors/precis/backend/db/models.py +173 -0
- package/vendors/precis/backend/db/repository.py +269 -0
- package/vendors/precis/backend/llm/__init__.py +3 -0
- package/vendors/precis/backend/llm/anthropic_provider.py +39 -0
- package/vendors/precis/backend/llm/base.py +147 -0
- package/vendors/precis/backend/llm/deepseek_provider.py +43 -0
- package/vendors/precis/backend/llm/factory.py +60 -0
- package/vendors/precis/backend/llm/google_provider.py +39 -0
- package/vendors/precis/backend/llm/ollama_provider.py +54 -0
- package/vendors/precis/backend/llm/openai_provider.py +50 -0
- package/vendors/precis/backend/main.py +677 -0
- package/vendors/precis/backend/orchestrator/__init__.py +3 -0
- package/vendors/precis/backend/orchestrator/planner.py +81 -0
- package/vendors/precis/backend/orchestrator/router.py +319 -0
- package/vendors/precis/backend/orchestrator/types.py +58 -0
- package/vendors/precis/bridge_precis.py +185 -0
- package/vendors/precis/data/sample_reports/README.md +8 -0
- package/vendors/precis/data/seed_data.py +115 -0
- package/vendors/precis/requirements.txt +19 -0
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# Copyright (c) 2026 Jinan Kordab
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
# Stage 4 — Verdict
|
|
5
|
+
# 4a: Criticality Gate [C3] — pipeline/stage4/criticality_gate.py
|
|
6
|
+
# 4b: Verdict Aggregator [J5] — pipeline/stage4/verdict_aggregator.py
|
|
7
|
+
# 4c: Editorial Review Panel [C3] — pipeline/stage4/orchestrator.py
|
|
8
|
+
# Orchestrator: pipeline/stage4/orchestrator.py
|
|
9
|
+
|
|
10
|
+
from pipeline.stage4.criticality_gate import (
|
|
11
|
+
CriticalityGate, CriticalityAssessment, criticality_gate,
|
|
12
|
+
CRITICAL_TOPICS, HIGH_PROFILE_INDICATORS,
|
|
13
|
+
)
|
|
14
|
+
from pipeline.stage4.verdict_aggregator import (
|
|
15
|
+
VerdictAggregator, verdict_aggregator, VERDICT_LABELS,
|
|
16
|
+
)
|
|
17
|
+
from pipeline.stage4.orchestrator import (
|
|
18
|
+
Stage4Orchestrator, Stage4Result, stage4_orchestrator,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
"CriticalityGate",
|
|
23
|
+
"CriticalityAssessment",
|
|
24
|
+
"criticality_gate",
|
|
25
|
+
"CRITICAL_TOPICS",
|
|
26
|
+
"HIGH_PROFILE_INDICATORS",
|
|
27
|
+
"VerdictAggregator",
|
|
28
|
+
"verdict_aggregator",
|
|
29
|
+
"VERDICT_LABELS",
|
|
30
|
+
"Stage4Orchestrator",
|
|
31
|
+
"Stage4Result",
|
|
32
|
+
"stage4_orchestrator",
|
|
33
|
+
]
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
# Copyright (c) 2026 Jinan Kordab
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
Stage 4a: Criticality Gate [C3]
|
|
6
|
+
=================================
|
|
7
|
+
Routes high-stakes claims to human Editorial Review Panel.
|
|
8
|
+
Low-stakes claims go to automated Verdict Aggregator.
|
|
9
|
+
|
|
10
|
+
Inspired by PolitiFact's three-editor panel:
|
|
11
|
+
- Elections, public health, legal, high-profile figures → HUMAN review
|
|
12
|
+
- Everything else → AUTOMATED verdict
|
|
13
|
+
|
|
14
|
+
The gate checks:
|
|
15
|
+
1. Keyword match against critical topics list
|
|
16
|
+
2. Embedding similarity to critical topic centroids
|
|
17
|
+
3. Source reputation (high-profile figure detection)
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from typing import List, Optional
|
|
21
|
+
from dataclasses import dataclass
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# Critical topics that trigger human review — Canada-focused bilingual (EN/FR)
|
|
25
|
+
# Covers: Canadian federal elections + Quebec provincial elections
|
|
26
|
+
CRITICAL_TOPICS = [
|
|
27
|
+
# ── Elections (EN/FR) ────────────────────────────────────
|
|
28
|
+
"election", "élection", "vote", "voter", "candidate", "candidat",
|
|
29
|
+
"ballot", "bulletin", "democracy", "démocratie",
|
|
30
|
+
"federal election", "élection fédérale",
|
|
31
|
+
"provincial election", "élection provinciale",
|
|
32
|
+
"referendum", "référendum",
|
|
33
|
+
"riding", "circonscription", "mp", "député", "députée",
|
|
34
|
+
"minority government", "gouvernement minoritaire",
|
|
35
|
+
"majority government", "gouvernement majoritaire",
|
|
36
|
+
"coalition", "confidence vote", "vote de confiance",
|
|
37
|
+
# ── Quebec Politics (FR/EN) ──────────────────────────────
|
|
38
|
+
"quebec", "québec", "québécois", "quebecois",
|
|
39
|
+
"national assembly", "assemblée nationale",
|
|
40
|
+
"sovereignty", "souveraineté", "separatist", "séparatiste",
|
|
41
|
+
"federalism", "fédéralisme", "federalist", "fédéraliste",
|
|
42
|
+
"language law", "loi linguistique", "bill 101", "loi 101",
|
|
43
|
+
"bill 96", "loi 96", "bill 21", "loi 21",
|
|
44
|
+
"secularism", "laïcité", "religious symbols", "signes religieux",
|
|
45
|
+
"distinct society", "société distincte",
|
|
46
|
+
"notwithstanding clause", "clause dérogatoire",
|
|
47
|
+
# ── Canadian Federal Politics ────────────────────────────
|
|
48
|
+
"parliament", "parlement", "house of commons", "chambre des communes",
|
|
49
|
+
"senate", "sénat", "governor general", "gouverneur général",
|
|
50
|
+
"throne speech", "discours du trône",
|
|
51
|
+
"first past the post", "scrutin uninominal",
|
|
52
|
+
"electoral reform", "réforme électorale",
|
|
53
|
+
"equalization", "péréquation", "transfer payments", "paiements de transfert",
|
|
54
|
+
"carbon tax", "taxe carbone", "carbon pricing", "tarification du carbone",
|
|
55
|
+
"pipeline", "oil sands", "sables bitumineux",
|
|
56
|
+
"indigenous", "autochtone", "first nations", "premières nations",
|
|
57
|
+
"reconciliation", "réconciliation", "treaty", "traité",
|
|
58
|
+
"indian act", "loi sur les indiens",
|
|
59
|
+
# ── Public Health (EN/FR) ─────────────────────────────────
|
|
60
|
+
"public health", "santé publique", "pandemic", "pandémie",
|
|
61
|
+
"vaccine", "vaccin", "covid", "disease", "maladie",
|
|
62
|
+
"health transfer", "transfert en santé", "healthcare", "soins de santé",
|
|
63
|
+
# ── Economy / Budget (EN/FR) ──────────────────────────────
|
|
64
|
+
"economy", "économie", "inflation", "recession", "récession",
|
|
65
|
+
"deficit", "déficit", "budget", "tax", "impôt", "taxes", "impôts",
|
|
66
|
+
"debt", "dette", "spending", "dépenses", "austerity", "austérité",
|
|
67
|
+
"housing", "logement", "affordable housing", "logement abordable",
|
|
68
|
+
"interest rate", "taux d'intérêt", "bank of canada", "banque du canada",
|
|
69
|
+
# ── Rights / Immigration (EN/FR) ──────────────────────────
|
|
70
|
+
"abortion", "avortement", "civil rights", "droits civils",
|
|
71
|
+
"human rights", "droits humains", "charter", "charte",
|
|
72
|
+
"immigration", "refugee", "réfugié", "asylum", "asile",
|
|
73
|
+
"multiculturalism", "multiculturalisme",
|
|
74
|
+
]
|
|
75
|
+
|
|
76
|
+
# High-profile figure indicators — Canada-focused bilingual
|
|
77
|
+
HIGH_PROFILE_INDICATORS = [
|
|
78
|
+
# ── Canadian Federal ──────────────────────────────────────
|
|
79
|
+
"prime minister", "premier ministre",
|
|
80
|
+
"pm", "trudeau", "justin trudeau",
|
|
81
|
+
"poilievre", "pierre poilievre",
|
|
82
|
+
"singh", "jagmeet singh",
|
|
83
|
+
"liberal party", "parti libéral",
|
|
84
|
+
"conservative party", "parti conservateur",
|
|
85
|
+
"ndp", "nouveau parti démocratique",
|
|
86
|
+
"bloc", "bloc québécois",
|
|
87
|
+
"green party", "parti vert",
|
|
88
|
+
"minister", "ministre", "cabinet",
|
|
89
|
+
"governor general", "gouverneur général",
|
|
90
|
+
"senator", "sénateur", "mp", "member of parliament",
|
|
91
|
+
# ── Quebec Provincial ─────────────────────────────────────
|
|
92
|
+
"premier", "première ministre",
|
|
93
|
+
"quebec premier", "premier du québec",
|
|
94
|
+
"legault", "françois legault",
|
|
95
|
+
"caq", "coalition avenir québec",
|
|
96
|
+
"parti québécois", "pq",
|
|
97
|
+
"québec solidaire", "qs",
|
|
98
|
+
"liberal party of quebec", "parti libéral du québec",
|
|
99
|
+
"mna", "député", "députée",
|
|
100
|
+
"national assembly", "assemblée nationale",
|
|
101
|
+
# ── Provincial Premiers (other provinces) ─────────────────
|
|
102
|
+
"ontario premier", "premier ontarien", "ford", "doug ford",
|
|
103
|
+
"alberta premier", "smith", "danielle smith",
|
|
104
|
+
"bc premier", "eby", "david eby",
|
|
105
|
+
]
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
@dataclass
|
|
109
|
+
class CriticalityAssessment:
|
|
110
|
+
"""Result of the Criticality Gate check."""
|
|
111
|
+
is_critical: bool
|
|
112
|
+
matched_topics: List[str]
|
|
113
|
+
is_high_profile: bool
|
|
114
|
+
recommendation: str # "automated" or "human_review"
|
|
115
|
+
reason: str
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class CriticalityGate:
|
|
119
|
+
"""
|
|
120
|
+
Routes claims based on criticality.
|
|
121
|
+
|
|
122
|
+
Jewel [C3] — Human-in-the-loop breakpoint:
|
|
123
|
+
Automated for scale, human-reviewed for stakes.
|
|
124
|
+
"""
|
|
125
|
+
|
|
126
|
+
def assess(self, claim_text: str) -> CriticalityAssessment:
|
|
127
|
+
"""
|
|
128
|
+
Determine if a claim requires human review.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
claim_text: The claim text to assess.
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
CriticalityAssessment with routing recommendation.
|
|
135
|
+
"""
|
|
136
|
+
claim_lower = claim_text.lower()
|
|
137
|
+
|
|
138
|
+
# Check critical topics
|
|
139
|
+
matched_topics = [
|
|
140
|
+
topic for topic in CRITICAL_TOPICS
|
|
141
|
+
if topic in claim_lower
|
|
142
|
+
]
|
|
143
|
+
|
|
144
|
+
# Check high-profile figures
|
|
145
|
+
is_high_profile = any(
|
|
146
|
+
indicator in claim_lower
|
|
147
|
+
for indicator in HIGH_PROFILE_INDICATORS
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
# Determine routing
|
|
151
|
+
is_critical = bool(matched_topics) or is_high_profile
|
|
152
|
+
|
|
153
|
+
if is_critical:
|
|
154
|
+
reason_parts = []
|
|
155
|
+
if matched_topics:
|
|
156
|
+
reason_parts.append(
|
|
157
|
+
f"matches critical topics: {', '.join(matched_topics[:3])}"
|
|
158
|
+
)
|
|
159
|
+
if is_high_profile:
|
|
160
|
+
reason_parts.append("involves high-profile figure")
|
|
161
|
+
reason = "; ".join(reason_parts)
|
|
162
|
+
recommendation = "human_review"
|
|
163
|
+
else:
|
|
164
|
+
reason = "No critical topics or high-profile indicators detected."
|
|
165
|
+
recommendation = "automated"
|
|
166
|
+
|
|
167
|
+
return CriticalityAssessment(
|
|
168
|
+
is_critical=is_critical,
|
|
169
|
+
matched_topics=matched_topics,
|
|
170
|
+
is_high_profile=is_high_profile,
|
|
171
|
+
recommendation=recommendation,
|
|
172
|
+
reason=reason,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
# Singleton
|
|
177
|
+
criticality_gate = CriticalityGate()
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
# Copyright (c) 2026 Jinan Kordab
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
Stage 4 Orchestrator — Verdict
|
|
6
|
+
================================
|
|
7
|
+
Coordinates:
|
|
8
|
+
4a: Criticality Gate [C3] — route to automated vs human review
|
|
9
|
+
4b: Verdict Aggregator [J5] — 7-way PolitiFact-aligned verdict
|
|
10
|
+
4c: Editorial Review Panel [C3] — human-in-the-loop for critical claims
|
|
11
|
+
|
|
12
|
+
Writes APPEND-ONLY verdict records to PostgreSQL [C5].
|
|
13
|
+
Emits SSE events: criticality_determined, verdict_complete.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import time
|
|
17
|
+
import uuid
|
|
18
|
+
from typing import Optional, Dict
|
|
19
|
+
from dataclasses import dataclass, field
|
|
20
|
+
from datetime import datetime
|
|
21
|
+
|
|
22
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
23
|
+
|
|
24
|
+
from app.db.models import StageRecord, Verdict
|
|
25
|
+
from pipeline.stage4.criticality_gate import criticality_gate
|
|
26
|
+
from pipeline.stage4.verdict_aggregator import (
|
|
27
|
+
verdict_aggregator, VERDICT_LABELS,
|
|
28
|
+
)
|
|
29
|
+
from pipeline.stage3.orchestrator import Stage3Result, ClassificationResult
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class Stage4Result:
|
|
34
|
+
"""Output of Stage 4 — Verdict."""
|
|
35
|
+
atomic_claim: str
|
|
36
|
+
|
|
37
|
+
# Criticality Gate
|
|
38
|
+
is_critical: bool
|
|
39
|
+
review_route: str # "automated" or "human_review"
|
|
40
|
+
criticality_reason: str
|
|
41
|
+
|
|
42
|
+
# Verdict
|
|
43
|
+
verdict: str # One of 7 labels
|
|
44
|
+
confidence: float
|
|
45
|
+
explanation: str
|
|
46
|
+
distribution: Dict[str, float]
|
|
47
|
+
|
|
48
|
+
# Human review (if applicable)
|
|
49
|
+
human_review: Optional[dict] = None
|
|
50
|
+
|
|
51
|
+
# Stage 3 summary for context
|
|
52
|
+
support_count: int = 0
|
|
53
|
+
refute_count: int = 0
|
|
54
|
+
unrelated_count: int = 0
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class Stage4Orchestrator:
|
|
58
|
+
"""
|
|
59
|
+
Orchestrates Stage 4 of the fact-checking pipeline.
|
|
60
|
+
|
|
61
|
+
Flow:
|
|
62
|
+
1. Criticality Gate: assess if human review needed [C3]
|
|
63
|
+
2a. If automated → VerdictAggregator synthesizes 7-way verdict [J5]
|
|
64
|
+
2b. If critical → queue for Editorial Review Panel [C3]
|
|
65
|
+
3. Write verdict record to PostgreSQL (append-only)
|
|
66
|
+
4. Emit SSE events
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
def __init__(self, sse_callback: Optional[callable] = None):
|
|
70
|
+
self.sse_callback = sse_callback
|
|
71
|
+
|
|
72
|
+
async def process(
|
|
73
|
+
self,
|
|
74
|
+
atomic_claim: str,
|
|
75
|
+
stage3_result: Stage3Result,
|
|
76
|
+
db: Optional[AsyncSession] = None,
|
|
77
|
+
run_id: Optional[uuid.UUID] = None,
|
|
78
|
+
locale: str = "en",
|
|
79
|
+
) -> Stage4Result:
|
|
80
|
+
"""
|
|
81
|
+
Run Stage 4 processing for one atomic claim.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
atomic_claim: The atomic claim to verdict.
|
|
85
|
+
stage3_result: Per-source classifications from Stage 3.
|
|
86
|
+
db: Optional DB session for audit ledger writes.
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
Stage4Result with final verdict.
|
|
90
|
+
"""
|
|
91
|
+
# --- 4a: Criticality Gate ---
|
|
92
|
+
await self._emit("stage_4a_start", {
|
|
93
|
+
"atomic_claim": atomic_claim,
|
|
94
|
+
})
|
|
95
|
+
|
|
96
|
+
assessment = criticality_gate.assess(atomic_claim)
|
|
97
|
+
|
|
98
|
+
await self._emit("criticality_determined", {
|
|
99
|
+
"atomic_claim": atomic_claim,
|
|
100
|
+
"is_critical": assessment.is_critical,
|
|
101
|
+
"matched_topics": assessment.matched_topics,
|
|
102
|
+
"recommendation": assessment.recommendation,
|
|
103
|
+
})
|
|
104
|
+
|
|
105
|
+
# --- 4b: Verdict Aggregation ---
|
|
106
|
+
if assessment.recommendation == "automated":
|
|
107
|
+
result = await self._automated_verdict(
|
|
108
|
+
atomic_claim, stage3_result, db, run_id, locale
|
|
109
|
+
)
|
|
110
|
+
else:
|
|
111
|
+
result = await self._human_review_verdict(
|
|
112
|
+
atomic_claim, stage3_result, assessment, db, run_id, locale
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
# NOTE: Verdict creation moved to pipeline_service.py —
|
|
116
|
+
# one Verdict per run_id, aggregated from all atomic claims.
|
|
117
|
+
# Stage 4 only returns per-atomic-claim results.
|
|
118
|
+
return result
|
|
119
|
+
|
|
120
|
+
async def _automated_verdict(
|
|
121
|
+
self,
|
|
122
|
+
atomic_claim: str,
|
|
123
|
+
stage3_result: Stage3Result,
|
|
124
|
+
db: Optional[AsyncSession],
|
|
125
|
+
run_id: Optional[uuid.UUID] = None,
|
|
126
|
+
locale: str = "en",
|
|
127
|
+
) -> Stage4Result:
|
|
128
|
+
"""Run automated VerdictAggregator [J5]."""
|
|
129
|
+
t0 = time.monotonic()
|
|
130
|
+
|
|
131
|
+
verdict, confidence, explanation, distribution = verdict_aggregator(
|
|
132
|
+
claim=atomic_claim,
|
|
133
|
+
classifications=stage3_result.classifications,
|
|
134
|
+
locale=locale,
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
latency_ms = (time.monotonic() - t0) * 1000
|
|
138
|
+
|
|
139
|
+
# Write stage record
|
|
140
|
+
if db:
|
|
141
|
+
db.add(StageRecord(
|
|
142
|
+
run_id=run_id or uuid.uuid4(),
|
|
143
|
+
stage_name="stage_4b",
|
|
144
|
+
input_snapshot={
|
|
145
|
+
"atomic_claim": atomic_claim,
|
|
146
|
+
"classifications_count": len(stage3_result.classifications),
|
|
147
|
+
},
|
|
148
|
+
output_snapshot={
|
|
149
|
+
"verdict": verdict,
|
|
150
|
+
"confidence": confidence,
|
|
151
|
+
"distribution": distribution,
|
|
152
|
+
},
|
|
153
|
+
model_used="dspy/verdict_aggregator",
|
|
154
|
+
latency_ms=latency_ms,
|
|
155
|
+
retry_attempt=0,
|
|
156
|
+
))
|
|
157
|
+
|
|
158
|
+
await self._emit("atomic_verdict", {
|
|
159
|
+
"atomic_claim": atomic_claim,
|
|
160
|
+
"verdict": verdict,
|
|
161
|
+
"confidence": confidence,
|
|
162
|
+
"explanation": explanation,
|
|
163
|
+
"distribution": distribution,
|
|
164
|
+
"route": "automated",
|
|
165
|
+
})
|
|
166
|
+
|
|
167
|
+
return Stage4Result(
|
|
168
|
+
atomic_claim=atomic_claim,
|
|
169
|
+
is_critical=False,
|
|
170
|
+
review_route="automated",
|
|
171
|
+
criticality_reason="Low-stakes claim — automated verdict.",
|
|
172
|
+
verdict=verdict,
|
|
173
|
+
confidence=confidence,
|
|
174
|
+
explanation=explanation,
|
|
175
|
+
distribution=distribution,
|
|
176
|
+
support_count=stage3_result.support_count,
|
|
177
|
+
refute_count=stage3_result.refute_count,
|
|
178
|
+
unrelated_count=stage3_result.unrelated_count,
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
async def _human_review_verdict(
|
|
182
|
+
self,
|
|
183
|
+
atomic_claim: str,
|
|
184
|
+
stage3_result: Stage3Result,
|
|
185
|
+
assessment,
|
|
186
|
+
db: Optional[AsyncSession],
|
|
187
|
+
run_id: Optional[uuid.UUID] = None,
|
|
188
|
+
locale: str = "en",
|
|
189
|
+
) -> Stage4Result:
|
|
190
|
+
"""
|
|
191
|
+
Queue claim for human Editorial Review Panel [C3].
|
|
192
|
+
|
|
193
|
+
In production: claim added to review_queue table.
|
|
194
|
+
Human reviewer sees: claim + evidence + per-source classifications.
|
|
195
|
+
Panel votes on final verdict (PolitiFact-style democratic vote).
|
|
196
|
+
|
|
197
|
+
MVP: fall back to automated verdict with human_review flag.
|
|
198
|
+
"""
|
|
199
|
+
# For MVP, run automated verdict but flag for human review
|
|
200
|
+
t0 = time.monotonic()
|
|
201
|
+
|
|
202
|
+
verdict, confidence, explanation, distribution = verdict_aggregator(
|
|
203
|
+
claim=atomic_claim,
|
|
204
|
+
classifications=stage3_result.classifications,
|
|
205
|
+
locale=locale,
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
latency_ms = (time.monotonic() - t0) * 1000
|
|
209
|
+
|
|
210
|
+
human_review_payload = {
|
|
211
|
+
"status": "pending_review",
|
|
212
|
+
"matched_topics": assessment.matched_topics,
|
|
213
|
+
"auto_verdict": verdict,
|
|
214
|
+
"auto_confidence": confidence,
|
|
215
|
+
"queued_at": datetime.utcnow().isoformat(),
|
|
216
|
+
"reviewer_id": None,
|
|
217
|
+
"final_verdict": None,
|
|
218
|
+
"reason": None,
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
if db:
|
|
222
|
+
db.add(StageRecord(
|
|
223
|
+
run_id=run_id or uuid.uuid4(),
|
|
224
|
+
stage_name="stage_4c",
|
|
225
|
+
input_snapshot={
|
|
226
|
+
"atomic_claim": atomic_claim,
|
|
227
|
+
"assessment": {
|
|
228
|
+
"is_critical": assessment.is_critical,
|
|
229
|
+
"matched_topics": assessment.matched_topics,
|
|
230
|
+
},
|
|
231
|
+
},
|
|
232
|
+
output_snapshot=human_review_payload,
|
|
233
|
+
model_used="dspy/verdict_aggregator+human_review",
|
|
234
|
+
latency_ms=latency_ms,
|
|
235
|
+
retry_attempt=0,
|
|
236
|
+
))
|
|
237
|
+
|
|
238
|
+
await self._emit("verdict_complete", {
|
|
239
|
+
"atomic_claim": atomic_claim,
|
|
240
|
+
"verdict": verdict,
|
|
241
|
+
"confidence": confidence,
|
|
242
|
+
"explanation": explanation,
|
|
243
|
+
"distribution": distribution,
|
|
244
|
+
"route": "human_review_pending",
|
|
245
|
+
"human_review": human_review_payload,
|
|
246
|
+
})
|
|
247
|
+
|
|
248
|
+
return Stage4Result(
|
|
249
|
+
atomic_claim=atomic_claim,
|
|
250
|
+
is_critical=True,
|
|
251
|
+
review_route="human_review",
|
|
252
|
+
criticality_reason=assessment.reason,
|
|
253
|
+
verdict=verdict,
|
|
254
|
+
confidence=confidence,
|
|
255
|
+
explanation=explanation,
|
|
256
|
+
distribution=distribution,
|
|
257
|
+
human_review=human_review_payload,
|
|
258
|
+
support_count=stage3_result.support_count,
|
|
259
|
+
refute_count=stage3_result.refute_count,
|
|
260
|
+
unrelated_count=stage3_result.unrelated_count,
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
async def _emit(self, event_type: str, data: dict):
|
|
264
|
+
if self.sse_callback:
|
|
265
|
+
await self.sse_callback(event_type, data)
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
# Singleton
|
|
269
|
+
stage4_orchestrator = Stage4Orchestrator()
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
# Copyright (c) 2026 Jinan Kordab
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
Unit Tests — Stage 4: Verdict
|
|
6
|
+
===============================
|
|
7
|
+
Tests CriticalityGate [C3], VerdictAggregator [J5],
|
|
8
|
+
7-way verdict labels, Stage4Result.
|
|
9
|
+
|
|
10
|
+
Run: pytest pipeline/stage4/test_stage4.py -v
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import pytest
|
|
14
|
+
import json
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# ============================================================
|
|
18
|
+
# Criticality Gate Tests [C3]
|
|
19
|
+
# ============================================================
|
|
20
|
+
|
|
21
|
+
class TestCriticalityGate:
|
|
22
|
+
"""Test routing of claims to automated vs human review."""
|
|
23
|
+
|
|
24
|
+
@pytest.fixture
|
|
25
|
+
def gate(self):
|
|
26
|
+
from pipeline.stage4.criticality_gate import CriticalityGate
|
|
27
|
+
return CriticalityGate()
|
|
28
|
+
|
|
29
|
+
def test_election_claim_is_critical(self, gate):
|
|
30
|
+
"""Claims about elections → human review."""
|
|
31
|
+
result = gate.assess("The election was rigged and the votes were tampered with.")
|
|
32
|
+
assert result.is_critical is True
|
|
33
|
+
assert result.recommendation == "human_review"
|
|
34
|
+
assert "election" in result.matched_topics
|
|
35
|
+
assert "vote" in result.matched_topics
|
|
36
|
+
|
|
37
|
+
def test_public_health_claim_is_critical(self, gate):
|
|
38
|
+
"""Claims about vaccines/pandemics → human review."""
|
|
39
|
+
result = gate.assess("The vaccine causes severe side effects in children.")
|
|
40
|
+
assert result.is_critical is True
|
|
41
|
+
assert "vaccine" in result.matched_topics
|
|
42
|
+
|
|
43
|
+
def test_high_profile_figure_is_critical(self, gate):
|
|
44
|
+
"""Claims about presidents → human review."""
|
|
45
|
+
result = gate.assess("The president signed the executive order yesterday.")
|
|
46
|
+
assert result.is_critical is True
|
|
47
|
+
assert result.is_high_profile is True
|
|
48
|
+
|
|
49
|
+
def test_neutral_claim_is_automated(self, gate):
|
|
50
|
+
"""Non-critical claims → automated verdict."""
|
|
51
|
+
result = gate.assess("Water boils at 100 degrees Celsius at sea level.")
|
|
52
|
+
assert result.is_critical is False
|
|
53
|
+
assert result.recommendation == "automated"
|
|
54
|
+
|
|
55
|
+
def test_empty_claim_is_automated(self, gate):
|
|
56
|
+
"""Empty claims → automated."""
|
|
57
|
+
result = gate.assess("")
|
|
58
|
+
assert result.is_critical is False
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# ============================================================
|
|
62
|
+
# Verdict Aggregator Tests [J5]
|
|
63
|
+
# ============================================================
|
|
64
|
+
|
|
65
|
+
class TestVerdictAggregator:
|
|
66
|
+
"""Test 7-way verdict normalization and distribution."""
|
|
67
|
+
|
|
68
|
+
@pytest.fixture
|
|
69
|
+
def aggregator(self):
|
|
70
|
+
from pipeline.stage4.verdict_aggregator import VerdictAggregator
|
|
71
|
+
return VerdictAggregator()
|
|
72
|
+
|
|
73
|
+
def test_normalize_direct_match(self, aggregator):
|
|
74
|
+
"""Direct verdict labels pass through unchanged."""
|
|
75
|
+
assert aggregator._normalize_verdict("TRUE") == "TRUE"
|
|
76
|
+
assert aggregator._normalize_verdict("FALSE") == "FALSE"
|
|
77
|
+
assert aggregator._normalize_verdict("UNVERIFIABLE") == "UNVERIFIABLE"
|
|
78
|
+
assert aggregator._normalize_verdict("PANTS_ON_FIRE") == "PANTS_ON_FIRE"
|
|
79
|
+
|
|
80
|
+
def test_normalize_with_spaces(self, aggregator):
|
|
81
|
+
"""Labels with spaces are normalized."""
|
|
82
|
+
assert aggregator._normalize_verdict("MOSTLY TRUE") == "MOSTLY_TRUE"
|
|
83
|
+
assert aggregator._normalize_verdict("HALF TRUE") == "HALF_TRUE"
|
|
84
|
+
assert aggregator._normalize_verdict("MOSTLY FALSE") == "MOSTLY_FALSE"
|
|
85
|
+
|
|
86
|
+
def test_normalize_without_spaces(self, aggregator):
|
|
87
|
+
"""Labels without spaces are normalized."""
|
|
88
|
+
assert aggregator._normalize_verdict("MOSTLYTRUE") == "MOSTLY_TRUE"
|
|
89
|
+
assert aggregator._normalize_verdict("HALFTRUE") == "HALF_TRUE"
|
|
90
|
+
|
|
91
|
+
def test_normalize_nei(self, aggregator):
|
|
92
|
+
"""NEI → UNVERIFIABLE."""
|
|
93
|
+
assert aggregator._normalize_verdict("NEI") == "UNVERIFIABLE"
|
|
94
|
+
assert aggregator._normalize_verdict("NOT ENOUGH INFORMATION") == "UNVERIFIABLE"
|
|
95
|
+
|
|
96
|
+
def test_normalize_unknown_fallback(self, aggregator):
|
|
97
|
+
"""Unknown labels → UNVERIFIABLE (safe default)."""
|
|
98
|
+
assert aggregator._normalize_verdict("SOMETHING WEIRD") == "UNVERIFIABLE"
|
|
99
|
+
|
|
100
|
+
def test_default_distribution_centered(self, aggregator):
|
|
101
|
+
"""Default distribution puts all weight on the given verdict."""
|
|
102
|
+
dist = aggregator._default_distribution("TRUE")
|
|
103
|
+
assert dist["TRUE"] == 1.0
|
|
104
|
+
assert dist["FALSE"] == 0.0
|
|
105
|
+
assert sum(dist.values()) == 1.0
|
|
106
|
+
|
|
107
|
+
def test_all_verdict_labels_present(self, aggregator):
|
|
108
|
+
"""Default distribution includes all 7 labels."""
|
|
109
|
+
from pipeline.stage4.verdict_aggregator import VERDICT_LABELS
|
|
110
|
+
dist = aggregator._default_distribution("MOSTLY_TRUE")
|
|
111
|
+
for label in VERDICT_LABELS:
|
|
112
|
+
assert label in dist
|
|
113
|
+
assert len(dist) == 7
|
|
114
|
+
|
|
115
|
+
def test_build_classifications_payload(self, aggregator):
|
|
116
|
+
"""Classification payload is valid JSON."""
|
|
117
|
+
from pipeline.stage3.orchestrator import ClassificationResult
|
|
118
|
+
classifications = [
|
|
119
|
+
ClassificationResult(0, "a.com", "a.com", "SUPPORTS", 0.9, "Good", 0.9),
|
|
120
|
+
ClassificationResult(1, "b.com", "b.com", "REFUTES", 0.7, "Bad", 0.5),
|
|
121
|
+
]
|
|
122
|
+
payload = aggregator._build_classifications_payload("Test", classifications)
|
|
123
|
+
data = json.loads(payload)
|
|
124
|
+
assert len(data) == 2
|
|
125
|
+
assert data[0]["label"] == "SUPPORTS"
|
|
126
|
+
assert data[1]["label"] == "REFUTES"
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# ============================================================
|
|
130
|
+
# Stage4Result Tests
|
|
131
|
+
# ============================================================
|
|
132
|
+
|
|
133
|
+
class TestStage4Result:
|
|
134
|
+
"""Test Stage4Result dataclass."""
|
|
135
|
+
|
|
136
|
+
def test_automated_result(self):
|
|
137
|
+
from pipeline.stage4.orchestrator import Stage4Result
|
|
138
|
+
result = Stage4Result(
|
|
139
|
+
atomic_claim="Water boils at 100°C.",
|
|
140
|
+
is_critical=False,
|
|
141
|
+
review_route="automated",
|
|
142
|
+
criticality_reason="Low-stakes claim.",
|
|
143
|
+
verdict="TRUE",
|
|
144
|
+
confidence=0.95,
|
|
145
|
+
explanation="Well-established scientific fact.",
|
|
146
|
+
distribution={"TRUE": 0.95, "MOSTLY_TRUE": 0.05},
|
|
147
|
+
support_count=3,
|
|
148
|
+
refute_count=0,
|
|
149
|
+
unrelated_count=0,
|
|
150
|
+
)
|
|
151
|
+
assert result.verdict == "TRUE"
|
|
152
|
+
assert result.review_route == "automated"
|
|
153
|
+
assert result.human_review is None
|
|
154
|
+
|
|
155
|
+
def test_human_review_result(self):
|
|
156
|
+
from pipeline.stage4.orchestrator import Stage4Result
|
|
157
|
+
result = Stage4Result(
|
|
158
|
+
atomic_claim="The election was fraudulent.",
|
|
159
|
+
is_critical=True,
|
|
160
|
+
review_route="human_review",
|
|
161
|
+
criticality_reason="Matches critical topic: election.",
|
|
162
|
+
verdict="UNVERIFIABLE",
|
|
163
|
+
confidence=0.45,
|
|
164
|
+
explanation="Insufficient credible evidence.",
|
|
165
|
+
distribution={"UNVERIFIABLE": 1.0},
|
|
166
|
+
human_review={"status": "pending_review"},
|
|
167
|
+
support_count=0,
|
|
168
|
+
refute_count=0,
|
|
169
|
+
unrelated_count=5,
|
|
170
|
+
)
|
|
171
|
+
assert result.is_critical is True
|
|
172
|
+
assert result.review_route == "human_review"
|
|
173
|
+
assert result.human_review is not None
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
# ============================================================
|
|
177
|
+
# Verdict Labels Tests
|
|
178
|
+
# ============================================================
|
|
179
|
+
|
|
180
|
+
class TestVerdictLabels:
|
|
181
|
+
"""Test the 7-way PolitiFact-aligned verdict labels."""
|
|
182
|
+
|
|
183
|
+
def test_all_seven_labels(self):
|
|
184
|
+
from pipeline.stage4.verdict_aggregator import VERDICT_LABELS
|
|
185
|
+
assert len(VERDICT_LABELS) == 7
|
|
186
|
+
assert "TRUE" in VERDICT_LABELS
|
|
187
|
+
assert "MOSTLY_TRUE" in VERDICT_LABELS
|
|
188
|
+
assert "HALF_TRUE" in VERDICT_LABELS
|
|
189
|
+
assert "MOSTLY_FALSE" in VERDICT_LABELS
|
|
190
|
+
assert "FALSE" in VERDICT_LABELS
|
|
191
|
+
assert "PANTS_ON_FIRE" in VERDICT_LABELS
|
|
192
|
+
assert "UNVERIFIABLE" in VERDICT_LABELS
|