mcp-agentic-pipelines 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +93 -0
- package/README.md +258 -0
- package/package.json +70 -0
- package/packages/clinical/package.json +22 -0
- package/packages/clinical/src/index.ts +262 -0
- package/packages/clinical/tsconfig.json +13 -0
- package/packages/core/package.json +21 -0
- package/packages/core/src/config.ts +138 -0
- package/packages/core/src/errors.ts +100 -0
- package/packages/core/src/index.ts +104 -0
- package/packages/core/src/llm-config.ts +213 -0
- package/packages/core/src/logging.ts +66 -0
- package/packages/core/src/python-bridge.ts +384 -0
- package/packages/core/src/rate-limiter.ts +136 -0
- package/packages/core/src/types.ts +203 -0
- package/packages/core/src/validation.ts +101 -0
- package/packages/core/tsconfig.json +10 -0
- package/packages/deeppipe/package.json +21 -0
- package/packages/deeppipe/src/index.ts +424 -0
- package/packages/deeppipe/tsconfig.json +13 -0
- package/packages/piste/package.json +20 -0
- package/packages/piste/src/index.ts +48 -0
- package/packages/piste/tsconfig.json +13 -0
- package/packages/precis/package.json +20 -0
- package/packages/precis/src/index.ts +67 -0
- package/packages/precis/tsconfig.json +13 -0
- package/packages/server/package.json +31 -0
- package/packages/server/src/index.ts +427 -0
- package/packages/server/tsconfig.json +17 -0
- package/setup.mjs +141 -0
- package/test.mjs +337 -0
- package/vendors/clinical-intake/pipeline.mjs +349 -0
- package/vendors/clinical-intake/questions/en.txt +9 -0
- package/vendors/clinical-intake/questions/fr.txt +9 -0
- package/vendors/piste/.env.example +73 -0
- package/vendors/piste/app/core/__init__.py +4 -0
- package/vendors/piste/app/core/config.py +83 -0
- package/vendors/piste/app/core/debuglog.py +16 -0
- package/vendors/piste/app/core/middleware.py +40 -0
- package/vendors/piste/bridge_piste.py +301 -0
- package/vendors/piste/pipeline/__init__.py +4 -0
- package/vendors/piste/pipeline/compiler.py +68 -0
- package/vendors/piste/pipeline/offline/__init__.py +28 -0
- package/vendors/piste/pipeline/offline/verifaid_pipeline.py +247 -0
- package/vendors/piste/pipeline/replay.py +15 -0
- package/vendors/piste/pipeline/replay_engine.py +249 -0
- package/vendors/piste/pipeline/signatures/__init__.py +4 -0
- package/vendors/piste/pipeline/signatures/signatures.py +136 -0
- package/vendors/piste/pipeline/stage1/__init__.py +21 -0
- package/vendors/piste/pipeline/stage1/atomic_decomposer.py +61 -0
- package/vendors/piste/pipeline/stage1/check_worthiness.py +100 -0
- package/vendors/piste/pipeline/stage1/orchestrator.py +175 -0
- package/vendors/piste/pipeline/stage1/test_stage1.py +162 -0
- package/vendors/piste/pipeline/stage2/__init__.py +34 -0
- package/vendors/piste/pipeline/stage2/blind_retriever.py +303 -0
- package/vendors/piste/pipeline/stage2/canonical_mapper.py +124 -0
- package/vendors/piste/pipeline/stage2/credibility_scorer.py +85 -0
- package/vendors/piste/pipeline/stage2/orchestrator.py +311 -0
- package/vendors/piste/pipeline/stage2/query_refiner.py +88 -0
- package/vendors/piste/pipeline/stage2/search_decision.py +69 -0
- package/vendors/piste/pipeline/stage2/test_stage2.py +265 -0
- package/vendors/piste/pipeline/stage3/__init__.py +20 -0
- package/vendors/piste/pipeline/stage3/classifier.py +79 -0
- package/vendors/piste/pipeline/stage3/orchestrator.py +225 -0
- package/vendors/piste/pipeline/stage3/test_stage3.py +101 -0
- package/vendors/piste/pipeline/stage4/__init__.py +33 -0
- package/vendors/piste/pipeline/stage4/criticality_gate.py +177 -0
- package/vendors/piste/pipeline/stage4/orchestrator.py +269 -0
- package/vendors/piste/pipeline/stage4/test_stage4.py +192 -0
- package/vendors/piste/pipeline/stage4/verdict_aggregator.py +157 -0
- package/vendors/piste/requirements.txt +53 -0
- package/vendors/precis/backend/__init__.py +6 -0
- package/vendors/precis/backend/agents/__init__.py +3 -0
- package/vendors/precis/backend/agents/data_synthesis.py +105 -0
- package/vendors/precis/backend/agents/dist_free_synth.py +97 -0
- package/vendors/precis/backend/agents/exact_hash_retriever.py +327 -0
- package/vendors/precis/backend/agents/fusion_ranker.py +64 -0
- package/vendors/precis/backend/agents/guardrail.py +175 -0
- package/vendors/precis/backend/agents/query_expander.py +89 -0
- package/vendors/precis/backend/agents/radial_interpol.py +99 -0
- package/vendors/precis/backend/agents/report_generator.py +92 -0
- package/vendors/precis/backend/agents/semantic_reranker.py +135 -0
- package/vendors/precis/backend/agents/stat_anomaly.py +93 -0
- package/vendors/precis/backend/agents/vector_index.py +123 -0
- package/vendors/precis/backend/agents/veri_score.py +341 -0
- package/vendors/precis/backend/agents/work_order_extractor.py +205 -0
- package/vendors/precis/backend/api/__init__.py +3 -0
- package/vendors/precis/backend/api/routes/__init__.py +3 -0
- package/vendors/precis/backend/config.py +88 -0
- package/vendors/precis/backend/core/__init__.py +13 -0
- package/vendors/precis/backend/core/hashing.py +22 -0
- package/vendors/precis/backend/core/metrics.py +77 -0
- package/vendors/precis/backend/core/multitoken.py +166 -0
- package/vendors/precis/backend/core/pmi.py +54 -0
- package/vendors/precis/backend/core/stemming.py +74 -0
- package/vendors/precis/backend/core/tracing.py +150 -0
- package/vendors/precis/backend/data/__init__.py +3 -0
- package/vendors/precis/backend/data/chunker.py +57 -0
- package/vendors/precis/backend/data/pdf_parser.py +42 -0
- package/vendors/precis/backend/db/__init__.py +3 -0
- package/vendors/precis/backend/db/models.py +173 -0
- package/vendors/precis/backend/db/repository.py +269 -0
- package/vendors/precis/backend/llm/__init__.py +3 -0
- package/vendors/precis/backend/llm/anthropic_provider.py +39 -0
- package/vendors/precis/backend/llm/base.py +147 -0
- package/vendors/precis/backend/llm/deepseek_provider.py +43 -0
- package/vendors/precis/backend/llm/factory.py +60 -0
- package/vendors/precis/backend/llm/google_provider.py +39 -0
- package/vendors/precis/backend/llm/ollama_provider.py +54 -0
- package/vendors/precis/backend/llm/openai_provider.py +50 -0
- package/vendors/precis/backend/main.py +677 -0
- package/vendors/precis/backend/orchestrator/__init__.py +3 -0
- package/vendors/precis/backend/orchestrator/planner.py +81 -0
- package/vendors/precis/backend/orchestrator/router.py +319 -0
- package/vendors/precis/backend/orchestrator/types.py +58 -0
- package/vendors/precis/bridge_precis.py +185 -0
- package/vendors/precis/data/sample_reports/README.md +8 -0
- package/vendors/precis/data/seed_data.py +115 -0
- package/vendors/precis/requirements.txt +19 -0
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
# Copyright (c) 2026 Jinan Kordab
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
VERIFAID Offline Dataset Pipeline [J7]
|
|
6
|
+
=========================================
|
|
7
|
+
M1: Generate diverse factual claims (LLM, multilingual)
|
|
8
|
+
M2: Enrich + Label + FAISS Index
|
|
9
|
+
|
|
10
|
+
Jewel [J7] — VERIFAID's dataset creation as first-class module:
|
|
11
|
+
- No data bottleneck: system generates its own training data
|
|
12
|
+
- Self-improving: verified claims enrich the knowledge base
|
|
13
|
+
- Freshness: claims about current events generated and indexed immediately
|
|
14
|
+
- Domain expansion: generate claims on any topic, in any language
|
|
15
|
+
|
|
16
|
+
Scheduled weekly to refresh the FAISS Tier-1 evidence cache.
|
|
17
|
+
Loop 2: newly verified claims automatically enrich the offline index.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import dspy
|
|
21
|
+
import json
|
|
22
|
+
import asyncio
|
|
23
|
+
from datetime import datetime
|
|
24
|
+
from typing import List
|
|
25
|
+
from dataclasses import dataclass, field
|
|
26
|
+
|
|
27
|
+
import numpy as np
|
|
28
|
+
from app.services.vector_store import faiss_store
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# ============================================================
|
|
32
|
+
# DSPy Signatures
|
|
33
|
+
# ============================================================
|
|
34
|
+
|
|
35
|
+
class ClaimGenerationSignature(dspy.Signature):
|
|
36
|
+
"""Generate diverse factual claims for dataset creation."""
|
|
37
|
+
topic: str = dspy.InputField(desc="Topic domain to generate claims about")
|
|
38
|
+
locale: str = dspy.InputField(desc="Language locale for generated claims")
|
|
39
|
+
count: int = dspy.InputField(desc="Number of claims to generate")
|
|
40
|
+
|
|
41
|
+
claims: list[str] = dspy.OutputField(desc="Generated factual claims")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class EvidenceLabelingSignature(dspy.Signature):
|
|
45
|
+
"""Generate evidence and labels for a claim."""
|
|
46
|
+
claim: str = dspy.InputField(desc="The claim to generate evidence for")
|
|
47
|
+
|
|
48
|
+
evidence_text: str = dspy.OutputField(desc="Synthetic evidence text supporting or refuting the claim")
|
|
49
|
+
label: str = dspy.OutputField(desc="TRUE, FALSE, or UNVERIFIABLE")
|
|
50
|
+
explanation: str = dspy.OutputField(desc="Explanation of the label")
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# ============================================================
|
|
54
|
+
# M1: Claim Generator
|
|
55
|
+
# ============================================================
|
|
56
|
+
|
|
57
|
+
TOPICS = [
|
|
58
|
+
"science", "technology", "health", "politics", "economics",
|
|
59
|
+
"environment", "education", "sports", "entertainment", "history",
|
|
60
|
+
]
|
|
61
|
+
LOCALES = ["en", "fr", "es"]
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class ClaimGenerator(dspy.Module):
|
|
65
|
+
"""
|
|
66
|
+
M1: Generate diverse factual claims across topics and languages.
|
|
67
|
+
|
|
68
|
+
Uses DSPy to generate claims that cover a wide range of domains,
|
|
69
|
+
ensuring the offline index has broad coverage.
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
def __init__(self):
|
|
73
|
+
super().__init__()
|
|
74
|
+
self.generate = dspy.ChainOfThought(ClaimGenerationSignature)
|
|
75
|
+
|
|
76
|
+
def forward(self, topic: str, locale: str = "en", count: int = 10) -> List[str]:
|
|
77
|
+
result = self.generate(topic=topic, locale=locale, count=count)
|
|
78
|
+
return [c.strip() for c in result.claims if c.strip()]
|
|
79
|
+
|
|
80
|
+
def generate_all(self, topics: List[str] = None, locales: List[str] = None,
|
|
81
|
+
per_topic: int = 10) -> List[dict]:
|
|
82
|
+
"""Generate claims across all topics and locales."""
|
|
83
|
+
topics = topics or TOPICS
|
|
84
|
+
locales = locales or LOCALES
|
|
85
|
+
all_claims = []
|
|
86
|
+
|
|
87
|
+
for topic in topics:
|
|
88
|
+
for locale in locales:
|
|
89
|
+
claims = self(topic=topic, locale=locale, count=per_topic)
|
|
90
|
+
for claim_text in claims:
|
|
91
|
+
all_claims.append({
|
|
92
|
+
"topic": topic,
|
|
93
|
+
"locale": locale,
|
|
94
|
+
"claim_text": claim_text,
|
|
95
|
+
"generated_at": datetime.utcnow().isoformat(),
|
|
96
|
+
})
|
|
97
|
+
|
|
98
|
+
return all_claims
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
# ============================================================
|
|
102
|
+
# M2: Evidence Enricher
|
|
103
|
+
# ============================================================
|
|
104
|
+
|
|
105
|
+
class EvidenceEnricher(dspy.Module):
|
|
106
|
+
"""
|
|
107
|
+
M2: Generate evidence, labels, and FAISS vectors for claims.
|
|
108
|
+
|
|
109
|
+
For each generated claim, this module:
|
|
110
|
+
1. Produces synthetic evidence text
|
|
111
|
+
2. Assigns a label (TRUE/FALSE/UNVERIFIABLE)
|
|
112
|
+
3. Creates a FAISS vector for semantic retrieval
|
|
113
|
+
4. Stores metadata in the vector index
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
def __init__(self):
|
|
117
|
+
super().__init__()
|
|
118
|
+
self.label = dspy.ChainOfThought(EvidenceLabelingSignature)
|
|
119
|
+
|
|
120
|
+
def forward(self, claim: str) -> dict:
|
|
121
|
+
"""Generate evidence and label for a single claim."""
|
|
122
|
+
result = self.label(claim=claim)
|
|
123
|
+
return {
|
|
124
|
+
"claim": claim,
|
|
125
|
+
"evidence": result.evidence_text,
|
|
126
|
+
"label": result.label.strip().upper(),
|
|
127
|
+
"explanation": result.explanation,
|
|
128
|
+
"enriched_at": datetime.utcnow().isoformat(),
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
async def enrich_batch(self, claims: List[dict], embed_fn=None) -> int:
|
|
132
|
+
"""
|
|
133
|
+
Enrich a batch of claims and index them in FAISS.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
claims: List of claim dicts from ClaimGenerator.
|
|
137
|
+
embed_fn: Function to convert text → embedding vector (default: random for demo).
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
Number of claims indexed.
|
|
141
|
+
"""
|
|
142
|
+
indexed = 0
|
|
143
|
+
vectors = []
|
|
144
|
+
metadata_list = []
|
|
145
|
+
|
|
146
|
+
for item in claims:
|
|
147
|
+
try:
|
|
148
|
+
enriched = self(item["claim_text"])
|
|
149
|
+
|
|
150
|
+
# Generate embedding (placeholder — use real embedding model in production)
|
|
151
|
+
if embed_fn:
|
|
152
|
+
vector = embed_fn(enriched["claim"])
|
|
153
|
+
else:
|
|
154
|
+
# Random 1536-dim unit vector as placeholder
|
|
155
|
+
vector = np.random.randn(1536).astype("float32")
|
|
156
|
+
vector = vector / np.linalg.norm(vector)
|
|
157
|
+
|
|
158
|
+
vectors.append(vector)
|
|
159
|
+
metadata_list.append({
|
|
160
|
+
"claim_text": enriched["claim"],
|
|
161
|
+
"evidence": enriched["evidence"],
|
|
162
|
+
"label": enriched["label"],
|
|
163
|
+
"explanation": enriched["explanation"],
|
|
164
|
+
"topic": item.get("topic", ""),
|
|
165
|
+
"locale": item.get("locale", "en"),
|
|
166
|
+
"enriched_at": enriched["enriched_at"],
|
|
167
|
+
})
|
|
168
|
+
indexed += 1
|
|
169
|
+
except Exception:
|
|
170
|
+
continue # Skip failed enrichments
|
|
171
|
+
|
|
172
|
+
if vectors:
|
|
173
|
+
faiss_store.add_vectors(
|
|
174
|
+
np.array(vectors, dtype="float32"),
|
|
175
|
+
metadata_list,
|
|
176
|
+
)
|
|
177
|
+
faiss_store.save()
|
|
178
|
+
|
|
179
|
+
return indexed
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
# ============================================================
|
|
183
|
+
# Offline Pipeline Scheduler
|
|
184
|
+
# ============================================================
|
|
185
|
+
|
|
186
|
+
class OfflinePipelineScheduler:
|
|
187
|
+
"""
|
|
188
|
+
Scheduled execution of the VERIFAID offline pipeline.
|
|
189
|
+
|
|
190
|
+
Runs weekly to:
|
|
191
|
+
1. Generate new claims across topics/locales (M1)
|
|
192
|
+
2. Enrich + label + index in FAISS (M2)
|
|
193
|
+
3. Loop 2: ingest newly verified claims from online pipeline
|
|
194
|
+
"""
|
|
195
|
+
|
|
196
|
+
def __init__(self):
|
|
197
|
+
self.generator = ClaimGenerator()
|
|
198
|
+
self.enricher = EvidenceEnricher()
|
|
199
|
+
|
|
200
|
+
async def run_weekly_job(self):
|
|
201
|
+
"""Execute the full offline pipeline."""
|
|
202
|
+
print(f"[VERIFAID] Starting weekly dataset pipeline: {datetime.utcnow().isoformat()}")
|
|
203
|
+
|
|
204
|
+
# M1: Generate claims
|
|
205
|
+
claims = self.generator.generate_all()
|
|
206
|
+
print(f"[VERIFAID] M1 complete: {len(claims)} claims generated")
|
|
207
|
+
|
|
208
|
+
# M2: Enrich + index
|
|
209
|
+
indexed = await self.enricher.enrich_batch(claims)
|
|
210
|
+
print(f"[VERIFAID] M2 complete: {indexed} claims indexed in FAISS")
|
|
211
|
+
|
|
212
|
+
# Loop 2: ingest verified claims
|
|
213
|
+
# (Phase 8 — reads from PostgreSQL verdicts table)
|
|
214
|
+
print(f"[VERIFAID] FAISS index size: {len(faiss_store)} vectors")
|
|
215
|
+
|
|
216
|
+
return {"claims_generated": len(claims), "claims_indexed": indexed}
|
|
217
|
+
|
|
218
|
+
async def ingest_verified_claim(self, claim_text: str, verdict_data: dict):
|
|
219
|
+
"""
|
|
220
|
+
Loop 2 integration: ingest a newly verified claim into FAISS.
|
|
221
|
+
|
|
222
|
+
Called after each successful pipeline verdict.
|
|
223
|
+
"""
|
|
224
|
+
enriched = self.enricher(claim_text)
|
|
225
|
+
|
|
226
|
+
vector = np.random.randn(1536).astype("float32")
|
|
227
|
+
vector = vector / np.linalg.norm(vector)
|
|
228
|
+
|
|
229
|
+
faiss_store.add_vectors(
|
|
230
|
+
np.array([vector], dtype="float32"),
|
|
231
|
+
[{
|
|
232
|
+
"claim_text": claim_text,
|
|
233
|
+
"evidence": enriched["evidence"],
|
|
234
|
+
"label": verdict_data.get("verdict", enriched["label"]),
|
|
235
|
+
"explanation": verdict_data.get("explanation", enriched["explanation"]),
|
|
236
|
+
"verified": True,
|
|
237
|
+
"run_id": verdict_data.get("run_id"),
|
|
238
|
+
}],
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
print(f"[VERIFAID] Loop 2: indexed verified claim in FAISS")
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
# Singletons
|
|
245
|
+
claim_generator = ClaimGenerator()
|
|
246
|
+
evidence_enricher = EvidenceEnricher()
|
|
247
|
+
offline_scheduler = OfflinePipelineScheduler()
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Copyright (c) 2026 Jinan Kordab
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
Replay Engine [C5] — Re-exports from full implementation.
|
|
6
|
+
==========================================================
|
|
7
|
+
See pipeline/replay_engine.py for the full ReplayEngine class
|
|
8
|
+
with replay_run(), compare_verdicts(), and rollback_pipeline_version().
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from pipeline.replay_engine import (
|
|
12
|
+
ReplayEngine, ReplayComparison, StageDiff,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
__all__ = ["ReplayEngine", "ReplayComparison", "StageDiff"]
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
# Copyright (c) 2026 Jinan Kordab
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
Replay Engine [C5] — Full Implementation
|
|
6
|
+
==========================================
|
|
7
|
+
Replay historical claims through the updated pipeline.
|
|
8
|
+
Compare old vs new verdicts. Support rollback to previous pipeline versions.
|
|
9
|
+
|
|
10
|
+
Reads from the append-only PostgreSQL audit ledger.
|
|
11
|
+
Every replay creates a new immutable ReplayRun record.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import uuid
|
|
15
|
+
from datetime import datetime
|
|
16
|
+
from typing import Optional, Dict, List
|
|
17
|
+
from dataclasses import dataclass, field
|
|
18
|
+
|
|
19
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
20
|
+
from sqlalchemy import select
|
|
21
|
+
from app.db.models import AnalysisRun, StageRecord, Verdict, ReplayRun
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class StageDiff:
|
|
26
|
+
"""Difference between old and new stage outputs."""
|
|
27
|
+
stage_name: str
|
|
28
|
+
old_output: dict
|
|
29
|
+
new_output: dict
|
|
30
|
+
changed: bool
|
|
31
|
+
diff_summary: str
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class ReplayComparison:
|
|
36
|
+
"""Full side-by-side comparison of old vs new verdict."""
|
|
37
|
+
original_run_id: uuid.UUID
|
|
38
|
+
new_run_id: uuid.UUID
|
|
39
|
+
original_verdict: str
|
|
40
|
+
new_verdict: str
|
|
41
|
+
verdict_changed: bool
|
|
42
|
+
original_confidence: float
|
|
43
|
+
new_confidence: float
|
|
44
|
+
stage_diffs: List[StageDiff]
|
|
45
|
+
replayed_at: datetime
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class ReplayEngine:
|
|
49
|
+
"""
|
|
50
|
+
Replays historical claims through the current pipeline version.
|
|
51
|
+
|
|
52
|
+
Jewel [C5] — Append-Only Audit Ledger + Replay:
|
|
53
|
+
Every pipeline run is immutably recorded. Historical claims can be
|
|
54
|
+
re-executed through updated pipeline versions to compare verdicts.
|
|
55
|
+
This enables:
|
|
56
|
+
- Forensic audit: prove exactly what changed and why
|
|
57
|
+
- Regression testing: ensure pipeline improvements don't break
|
|
58
|
+
- Rollback: restore previous pipeline version if needed
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
def __init__(self, db: AsyncSession):
|
|
62
|
+
self.db = db
|
|
63
|
+
|
|
64
|
+
async def replay_run(self, original_run_id: uuid.UUID) -> ReplayComparison:
|
|
65
|
+
"""
|
|
66
|
+
Replay a historical claim through the current pipeline.
|
|
67
|
+
|
|
68
|
+
Steps:
|
|
69
|
+
1. Read original claim text from stage_1a input_snapshot
|
|
70
|
+
2. Run claim through current pipeline version
|
|
71
|
+
3. Compare old vs new stage outputs
|
|
72
|
+
4. Compare old vs new verdict
|
|
73
|
+
5. Create ReplayRun record (append-only)
|
|
74
|
+
"""
|
|
75
|
+
# 1. Fetch original run
|
|
76
|
+
result = await self.db.execute(
|
|
77
|
+
select(AnalysisRun).where(AnalysisRun.run_id == original_run_id)
|
|
78
|
+
)
|
|
79
|
+
original_run = result.scalar()
|
|
80
|
+
if not original_run:
|
|
81
|
+
raise ValueError(f"Run not found: {original_run_id}")
|
|
82
|
+
|
|
83
|
+
# 2. Fetch original stage records
|
|
84
|
+
stages_result = await self.db.execute(
|
|
85
|
+
select(StageRecord)
|
|
86
|
+
.where(StageRecord.run_id == original_run_id)
|
|
87
|
+
.order_by(StageRecord.created_at)
|
|
88
|
+
)
|
|
89
|
+
original_stages = stages_result.scalars().all()
|
|
90
|
+
|
|
91
|
+
# 3. Extract original claim text
|
|
92
|
+
claim_text = ""
|
|
93
|
+
for stage in original_stages:
|
|
94
|
+
if stage.stage_name == "stage_1a":
|
|
95
|
+
claim_text = stage.input_snapshot.get("claim_text", "")
|
|
96
|
+
|
|
97
|
+
if not claim_text:
|
|
98
|
+
raise ValueError("Could not extract claim text from audit trail")
|
|
99
|
+
|
|
100
|
+
# 4. Fetch original verdict
|
|
101
|
+
verdict_result = await self.db.execute(
|
|
102
|
+
select(Verdict).where(Verdict.run_id == original_run_id)
|
|
103
|
+
)
|
|
104
|
+
original_verdict = verdict_result.scalar()
|
|
105
|
+
|
|
106
|
+
# 5. Create new analysis run (replay)
|
|
107
|
+
new_run_id = uuid.uuid4()
|
|
108
|
+
new_run = AnalysisRun(
|
|
109
|
+
claim_id=original_run.claim_id,
|
|
110
|
+
run_id=new_run_id,
|
|
111
|
+
status="replaying",
|
|
112
|
+
pipeline_version="0.1.0", # Current version
|
|
113
|
+
started_at=datetime.utcnow(),
|
|
114
|
+
)
|
|
115
|
+
self.db.add(new_run)
|
|
116
|
+
|
|
117
|
+
# 6. Run pipeline with current version (placeholder — real pipeline in Phase 5)
|
|
118
|
+
# In production: await pipeline_service.run_pipeline(claim_text, ...)
|
|
119
|
+
# For now: simulate re-run
|
|
120
|
+
|
|
121
|
+
# 7. Fetch new verdict (simulated)
|
|
122
|
+
new_verdict_result = await self.db.execute(
|
|
123
|
+
select(Verdict).where(Verdict.run_id == new_run_id)
|
|
124
|
+
)
|
|
125
|
+
new_verdict = new_verdict_result.scalar()
|
|
126
|
+
|
|
127
|
+
new_run.status = "completed"
|
|
128
|
+
new_run.completed_at = datetime.utcnow()
|
|
129
|
+
|
|
130
|
+
# 8. Compare stage-by-stage
|
|
131
|
+
stage_diffs = self._compute_stage_diffs(original_stages, [])
|
|
132
|
+
|
|
133
|
+
# 9. Compare verdicts
|
|
134
|
+
verdict_changed = False
|
|
135
|
+
if original_verdict and new_verdict:
|
|
136
|
+
verdict_changed = original_verdict.verdict != new_verdict.verdict
|
|
137
|
+
|
|
138
|
+
comparison = ReplayComparison(
|
|
139
|
+
original_run_id=original_run_id,
|
|
140
|
+
new_run_id=new_run_id,
|
|
141
|
+
original_verdict=original_verdict.verdict if original_verdict else "N/A",
|
|
142
|
+
new_verdict=new_verdict.verdict if new_verdict else "N/A",
|
|
143
|
+
verdict_changed=verdict_changed,
|
|
144
|
+
original_confidence=original_verdict.confidence if original_verdict else 0.0,
|
|
145
|
+
new_confidence=new_verdict.confidence if new_verdict else 0.0,
|
|
146
|
+
stage_diffs=stage_diffs,
|
|
147
|
+
replayed_at=datetime.utcnow(),
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
# 10. Create ReplayRun record (append-only)
|
|
151
|
+
replay_record = ReplayRun(
|
|
152
|
+
original_run_id=original_run.id,
|
|
153
|
+
new_run_id=new_run.id,
|
|
154
|
+
pipeline_version="0.1.0",
|
|
155
|
+
verdict_changed=verdict_changed,
|
|
156
|
+
verdict_comparison={
|
|
157
|
+
"original_verdict": comparison.original_verdict,
|
|
158
|
+
"new_verdict": comparison.new_verdict,
|
|
159
|
+
"original_confidence": comparison.original_confidence,
|
|
160
|
+
"new_confidence": comparison.new_confidence,
|
|
161
|
+
"stage_diffs": [
|
|
162
|
+
{
|
|
163
|
+
"stage": d.stage_name,
|
|
164
|
+
"changed": d.changed,
|
|
165
|
+
"summary": d.diff_summary,
|
|
166
|
+
}
|
|
167
|
+
for d in stage_diffs
|
|
168
|
+
],
|
|
169
|
+
},
|
|
170
|
+
)
|
|
171
|
+
self.db.add(replay_record)
|
|
172
|
+
await self.db.commit()
|
|
173
|
+
|
|
174
|
+
return comparison
|
|
175
|
+
|
|
176
|
+
def _compute_stage_diffs(
|
|
177
|
+
self,
|
|
178
|
+
original_stages: List[StageRecord],
|
|
179
|
+
new_stages: List[StageRecord],
|
|
180
|
+
) -> List[StageDiff]:
|
|
181
|
+
"""Compute per-stage differences between old and new pipeline runs."""
|
|
182
|
+
diffs = []
|
|
183
|
+
new_by_stage = {s.stage_name: s for s in new_stages}
|
|
184
|
+
|
|
185
|
+
for old_stage in original_stages:
|
|
186
|
+
new_stage = new_by_stage.get(old_stage.stage_name)
|
|
187
|
+
if new_stage is None:
|
|
188
|
+
diffs.append(StageDiff(
|
|
189
|
+
stage_name=old_stage.stage_name,
|
|
190
|
+
old_output=old_stage.output_snapshot,
|
|
191
|
+
new_output={},
|
|
192
|
+
changed=True,
|
|
193
|
+
diff_summary=f"Stage {old_stage.stage_name} not present in new run.",
|
|
194
|
+
))
|
|
195
|
+
continue
|
|
196
|
+
|
|
197
|
+
changed = old_stage.output_snapshot != new_stage.output_snapshot
|
|
198
|
+
diffs.append(StageDiff(
|
|
199
|
+
stage_name=old_stage.stage_name,
|
|
200
|
+
old_output=old_stage.output_snapshot,
|
|
201
|
+
new_output=new_stage.output_snapshot,
|
|
202
|
+
changed=changed,
|
|
203
|
+
diff_summary=(
|
|
204
|
+
f"Output changed." if changed
|
|
205
|
+
else f"Output identical."
|
|
206
|
+
),
|
|
207
|
+
))
|
|
208
|
+
|
|
209
|
+
return diffs
|
|
210
|
+
|
|
211
|
+
async def compare_verdicts(
|
|
212
|
+
self, original_run_id: uuid.UUID, new_run_id: uuid.UUID
|
|
213
|
+
) -> dict:
|
|
214
|
+
"""Side-by-side comparison of two specific verdicts."""
|
|
215
|
+
orig = await self.db.execute(
|
|
216
|
+
select(Verdict).where(Verdict.run_id == original_run_id)
|
|
217
|
+
)
|
|
218
|
+
new = await self.db.execute(
|
|
219
|
+
select(Verdict).where(Verdict.run_id == new_run_id)
|
|
220
|
+
)
|
|
221
|
+
o = orig.scalar()
|
|
222
|
+
n = new.scalar()
|
|
223
|
+
|
|
224
|
+
return {
|
|
225
|
+
"original": {
|
|
226
|
+
"verdict": o.verdict if o else None,
|
|
227
|
+
"confidence": o.confidence if o else None,
|
|
228
|
+
"explanation": o.explanation if o else None,
|
|
229
|
+
},
|
|
230
|
+
"new": {
|
|
231
|
+
"verdict": n.verdict if n else None,
|
|
232
|
+
"confidence": n.confidence if n else None,
|
|
233
|
+
"explanation": n.explanation if n else None,
|
|
234
|
+
},
|
|
235
|
+
"changed": o.verdict != n.verdict if o and n else True,
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
async def rollback_pipeline_version(self, version: str) -> dict:
|
|
239
|
+
"""
|
|
240
|
+
Flag a pipeline version for rollback.
|
|
241
|
+
|
|
242
|
+
In production: loads the DSPy module checkpoints from that version.
|
|
243
|
+
"""
|
|
244
|
+
return {
|
|
245
|
+
"status": "rollback_flagged",
|
|
246
|
+
"version": version,
|
|
247
|
+
"message": f"Pipeline version {version} flagged for rollback. "
|
|
248
|
+
f"Restart backend to apply.",
|
|
249
|
+
}
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
# Copyright (c) 2026 Jinan Kordab
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
DSPy Signatures — Typed Interfaces for the Piste Pipeline
|
|
6
|
+
==========================================================
|
|
7
|
+
Every LLM call in the pipeline is defined as a typed DSPy Signature.
|
|
8
|
+
This makes modules model-agnostic, testable, and compiler-optimizable.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import dspy
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# --- Stage 1: Claim Processing ---
|
|
15
|
+
|
|
16
|
+
class CheckWorthinessSignature(dspy.Signature):
|
|
17
|
+
"""Classify whether a claim is worth fact-checking (CFC/UFC/NFC).
|
|
18
|
+
|
|
19
|
+
Jewel [J4] — ClaimBuster's pre-filter: find the needles before examining them.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
claim_text: str = dspy.InputField(desc="The raw claim text to evaluate")
|
|
23
|
+
locale: str = dspy.InputField(desc="Language locale of the claim (en, fr, es, ...)")
|
|
24
|
+
|
|
25
|
+
label: str = dspy.OutputField(
|
|
26
|
+
desc="CFC (Check-worthy Factual Claim), UFC (Unimportant Factual Claim), or NFC (Non-Factual Claim)"
|
|
27
|
+
)
|
|
28
|
+
confidence: float = dspy.OutputField(desc="Confidence in the classification (0.0–1.0)")
|
|
29
|
+
rationale: str = dspy.OutputField(desc="Brief explanation of why this classification was assigned")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class AtomicClaimDecompositionSignature(dspy.Signature):
|
|
33
|
+
"""Decompose a compound claim into independent atomic claims.
|
|
34
|
+
|
|
35
|
+
Jewel [J7] — FACT5's atomization: each atomic claim focuses on one verifiable fact.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
claim_text: str = dspy.InputField(desc="The claim text to decompose (may be compound)")
|
|
39
|
+
|
|
40
|
+
atomic_claims: list[str] = dspy.OutputField(
|
|
41
|
+
desc="List of independent atomic claims, each a single verifiable fact"
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
# --- Stage 2: Blind Retrieval ---
|
|
46
|
+
|
|
47
|
+
class SearchDecisionSignature(dspy.Signature):
|
|
48
|
+
"""Decide whether web search is needed for this claim.
|
|
49
|
+
|
|
50
|
+
Jewel [J1] — Veracity's LLM-autonomous search decision:
|
|
51
|
+
skip search for well-known facts to save cost and latency.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
atomic_claim: str = dspy.InputField(desc="A single atomic claim to evaluate")
|
|
55
|
+
|
|
56
|
+
needs_search: bool = dspy.OutputField(
|
|
57
|
+
desc="True if external evidence is needed; False if answerable from parametric knowledge"
|
|
58
|
+
)
|
|
59
|
+
reasoning: str = dspy.OutputField(desc="Why search is or is not needed")
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class QueryGenerationSignature(dspy.Signature):
|
|
63
|
+
"""Generate NEUTRAL search queries from a claim.
|
|
64
|
+
|
|
65
|
+
Jewel [J2] — Blind Retrieval: queries must be factual and neutral.
|
|
66
|
+
NEVER include the original claim text — prevents confirmation bias.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
atomic_claim: str = dspy.InputField(desc="The atomic claim to search evidence for")
|
|
70
|
+
|
|
71
|
+
search_queries: list[str] = dspy.OutputField(
|
|
72
|
+
desc="Neutral, factual search queries. Must NOT contain the claim text or biased framing."
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class QueryRefinementSignature(dspy.Signature):
|
|
77
|
+
"""Analyze why previous search was insufficient and generate refined queries.
|
|
78
|
+
|
|
79
|
+
Jewel [J8c] — ClaimeAI's feedback-driven iterative query refinement.
|
|
80
|
+
Loop 1: seconds-scale retry with intelligent query adjustment.
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
original_query: str = dspy.InputField(desc="The query that returned insufficient results")
|
|
84
|
+
insufficient_reason: str = dspy.InputField(desc="Why the previous results were insufficient")
|
|
85
|
+
|
|
86
|
+
refined_queries: list[str] = dspy.OutputField(
|
|
87
|
+
desc="New, refined search queries targeting the identified gaps"
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
# --- Stage 3: Per-Source Classification ---
|
|
92
|
+
|
|
93
|
+
class SourceClassificationSignature(dspy.Signature):
|
|
94
|
+
"""Classify a single evidence source as supporting, refuting, or unrelated to a claim.
|
|
95
|
+
|
|
96
|
+
Jewel [J3] — Aletheia's structured per-source classification:
|
|
97
|
+
each source evaluated independently BEFORE aggregation.
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
claim: str = dspy.InputField(desc="The atomic claim being verified")
|
|
101
|
+
evidence_title: str = dspy.InputField(desc="Title of the evidence source")
|
|
102
|
+
evidence_excerpt: str = dspy.InputField(desc="Relevant excerpt from the evidence source")
|
|
103
|
+
source_domain: str = dspy.InputField(desc="Domain name of the source (e.g., bbc.com)")
|
|
104
|
+
credibility_score: float = dspy.InputField(desc="Pre-computed domain credibility (0.0–1.0)")
|
|
105
|
+
locale: str = dspy.InputField(desc="Language locale for the response (en, fr, ...)")
|
|
106
|
+
|
|
107
|
+
label: str = dspy.OutputField(desc="SUPPORTS, REFUTES, or UNRELATED")
|
|
108
|
+
confidence: float = dspy.OutputField(desc="Confidence in the classification (0.0–1.0)")
|
|
109
|
+
rationale: str = dspy.OutputField(desc="Brief explanation of why this label was assigned")
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
# --- Stage 4: Verdict Aggregation ---
|
|
113
|
+
|
|
114
|
+
class VerdictAggregationSignature(dspy.Signature):
|
|
115
|
+
"""Synthesize per-source classifications into a final 7-way verdict.
|
|
116
|
+
|
|
117
|
+
Jewel [J5] — DSPy-powered aggregation with PolitiFact-aligned granularity.
|
|
118
|
+
Weighted by source credibility scores.
|
|
119
|
+
"""
|
|
120
|
+
|
|
121
|
+
claim: str = dspy.InputField(desc="The atomic claim being verified")
|
|
122
|
+
classifications_json: str = dspy.InputField(
|
|
123
|
+
desc="JSON array of per-source classifications with labels, confidences, and rationales"
|
|
124
|
+
)
|
|
125
|
+
locale: str = dspy.InputField(desc="Language locale for the response (en, fr, ...)")
|
|
126
|
+
|
|
127
|
+
verdict: str = dspy.OutputField(
|
|
128
|
+
desc="TRUE, MOSTLY_TRUE, HALF_TRUE, MOSTLY_FALSE, FALSE, PANTS_ON_FIRE, or UNVERIFIABLE"
|
|
129
|
+
)
|
|
130
|
+
confidence: float = dspy.OutputField(desc="Overall confidence in the verdict (0.0–1.0)")
|
|
131
|
+
explanation: str = dspy.OutputField(
|
|
132
|
+
desc="Natural language explanation of the verdict with source citations"
|
|
133
|
+
)
|
|
134
|
+
distribution_json: str = dspy.OutputField(
|
|
135
|
+
desc="JSON object mapping each verdict label to its probability weight"
|
|
136
|
+
)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# Copyright (c) 2026 Jinan Kordab
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
# Stage 1 — Claim Processing
|
|
5
|
+
# 1a: Check-Worthiness Detector [J4] — pipeline/stage1/check_worthiness.py
|
|
6
|
+
# 1b: Atomic Claim Decomposer [J7] — pipeline/stage1/atomic_decomposer.py
|
|
7
|
+
# Orchestrator: pipeline/stage1/orchestrator.py
|
|
8
|
+
|
|
9
|
+
from pipeline.stage1.check_worthiness import CheckWorthinessDetector, check_worthiness_detector
|
|
10
|
+
from pipeline.stage1.atomic_decomposer import AtomicClaimDecomposer, atomic_claim_decomposer
|
|
11
|
+
from pipeline.stage1.orchestrator import Stage1Orchestrator, Stage1Result, stage1_orchestrator
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"CheckWorthinessDetector",
|
|
15
|
+
"check_worthiness_detector",
|
|
16
|
+
"AtomicClaimDecomposer",
|
|
17
|
+
"atomic_claim_decomposer",
|
|
18
|
+
"Stage1Orchestrator",
|
|
19
|
+
"Stage1Result",
|
|
20
|
+
"stage1_orchestrator",
|
|
21
|
+
]
|