mcp-agentic-pipelines 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +93 -0
- package/README.md +258 -0
- package/package.json +70 -0
- package/packages/clinical/package.json +22 -0
- package/packages/clinical/src/index.ts +262 -0
- package/packages/clinical/tsconfig.json +13 -0
- package/packages/core/package.json +21 -0
- package/packages/core/src/config.ts +138 -0
- package/packages/core/src/errors.ts +100 -0
- package/packages/core/src/index.ts +104 -0
- package/packages/core/src/llm-config.ts +213 -0
- package/packages/core/src/logging.ts +66 -0
- package/packages/core/src/python-bridge.ts +384 -0
- package/packages/core/src/rate-limiter.ts +136 -0
- package/packages/core/src/types.ts +203 -0
- package/packages/core/src/validation.ts +101 -0
- package/packages/core/tsconfig.json +10 -0
- package/packages/deeppipe/package.json +21 -0
- package/packages/deeppipe/src/index.ts +424 -0
- package/packages/deeppipe/tsconfig.json +13 -0
- package/packages/piste/package.json +20 -0
- package/packages/piste/src/index.ts +48 -0
- package/packages/piste/tsconfig.json +13 -0
- package/packages/precis/package.json +20 -0
- package/packages/precis/src/index.ts +67 -0
- package/packages/precis/tsconfig.json +13 -0
- package/packages/server/package.json +31 -0
- package/packages/server/src/index.ts +427 -0
- package/packages/server/tsconfig.json +17 -0
- package/setup.mjs +141 -0
- package/test.mjs +337 -0
- package/vendors/clinical-intake/pipeline.mjs +349 -0
- package/vendors/clinical-intake/questions/en.txt +9 -0
- package/vendors/clinical-intake/questions/fr.txt +9 -0
- package/vendors/piste/.env.example +73 -0
- package/vendors/piste/app/core/__init__.py +4 -0
- package/vendors/piste/app/core/config.py +83 -0
- package/vendors/piste/app/core/debuglog.py +16 -0
- package/vendors/piste/app/core/middleware.py +40 -0
- package/vendors/piste/bridge_piste.py +301 -0
- package/vendors/piste/pipeline/__init__.py +4 -0
- package/vendors/piste/pipeline/compiler.py +68 -0
- package/vendors/piste/pipeline/offline/__init__.py +28 -0
- package/vendors/piste/pipeline/offline/verifaid_pipeline.py +247 -0
- package/vendors/piste/pipeline/replay.py +15 -0
- package/vendors/piste/pipeline/replay_engine.py +249 -0
- package/vendors/piste/pipeline/signatures/__init__.py +4 -0
- package/vendors/piste/pipeline/signatures/signatures.py +136 -0
- package/vendors/piste/pipeline/stage1/__init__.py +21 -0
- package/vendors/piste/pipeline/stage1/atomic_decomposer.py +61 -0
- package/vendors/piste/pipeline/stage1/check_worthiness.py +100 -0
- package/vendors/piste/pipeline/stage1/orchestrator.py +175 -0
- package/vendors/piste/pipeline/stage1/test_stage1.py +162 -0
- package/vendors/piste/pipeline/stage2/__init__.py +34 -0
- package/vendors/piste/pipeline/stage2/blind_retriever.py +303 -0
- package/vendors/piste/pipeline/stage2/canonical_mapper.py +124 -0
- package/vendors/piste/pipeline/stage2/credibility_scorer.py +85 -0
- package/vendors/piste/pipeline/stage2/orchestrator.py +311 -0
- package/vendors/piste/pipeline/stage2/query_refiner.py +88 -0
- package/vendors/piste/pipeline/stage2/search_decision.py +69 -0
- package/vendors/piste/pipeline/stage2/test_stage2.py +265 -0
- package/vendors/piste/pipeline/stage3/__init__.py +20 -0
- package/vendors/piste/pipeline/stage3/classifier.py +79 -0
- package/vendors/piste/pipeline/stage3/orchestrator.py +225 -0
- package/vendors/piste/pipeline/stage3/test_stage3.py +101 -0
- package/vendors/piste/pipeline/stage4/__init__.py +33 -0
- package/vendors/piste/pipeline/stage4/criticality_gate.py +177 -0
- package/vendors/piste/pipeline/stage4/orchestrator.py +269 -0
- package/vendors/piste/pipeline/stage4/test_stage4.py +192 -0
- package/vendors/piste/pipeline/stage4/verdict_aggregator.py +157 -0
- package/vendors/piste/requirements.txt +53 -0
- package/vendors/precis/backend/__init__.py +6 -0
- package/vendors/precis/backend/agents/__init__.py +3 -0
- package/vendors/precis/backend/agents/data_synthesis.py +105 -0
- package/vendors/precis/backend/agents/dist_free_synth.py +97 -0
- package/vendors/precis/backend/agents/exact_hash_retriever.py +327 -0
- package/vendors/precis/backend/agents/fusion_ranker.py +64 -0
- package/vendors/precis/backend/agents/guardrail.py +175 -0
- package/vendors/precis/backend/agents/query_expander.py +89 -0
- package/vendors/precis/backend/agents/radial_interpol.py +99 -0
- package/vendors/precis/backend/agents/report_generator.py +92 -0
- package/vendors/precis/backend/agents/semantic_reranker.py +135 -0
- package/vendors/precis/backend/agents/stat_anomaly.py +93 -0
- package/vendors/precis/backend/agents/vector_index.py +123 -0
- package/vendors/precis/backend/agents/veri_score.py +341 -0
- package/vendors/precis/backend/agents/work_order_extractor.py +205 -0
- package/vendors/precis/backend/api/__init__.py +3 -0
- package/vendors/precis/backend/api/routes/__init__.py +3 -0
- package/vendors/precis/backend/config.py +88 -0
- package/vendors/precis/backend/core/__init__.py +13 -0
- package/vendors/precis/backend/core/hashing.py +22 -0
- package/vendors/precis/backend/core/metrics.py +77 -0
- package/vendors/precis/backend/core/multitoken.py +166 -0
- package/vendors/precis/backend/core/pmi.py +54 -0
- package/vendors/precis/backend/core/stemming.py +74 -0
- package/vendors/precis/backend/core/tracing.py +150 -0
- package/vendors/precis/backend/data/__init__.py +3 -0
- package/vendors/precis/backend/data/chunker.py +57 -0
- package/vendors/precis/backend/data/pdf_parser.py +42 -0
- package/vendors/precis/backend/db/__init__.py +3 -0
- package/vendors/precis/backend/db/models.py +173 -0
- package/vendors/precis/backend/db/repository.py +269 -0
- package/vendors/precis/backend/llm/__init__.py +3 -0
- package/vendors/precis/backend/llm/anthropic_provider.py +39 -0
- package/vendors/precis/backend/llm/base.py +147 -0
- package/vendors/precis/backend/llm/deepseek_provider.py +43 -0
- package/vendors/precis/backend/llm/factory.py +60 -0
- package/vendors/precis/backend/llm/google_provider.py +39 -0
- package/vendors/precis/backend/llm/ollama_provider.py +54 -0
- package/vendors/precis/backend/llm/openai_provider.py +50 -0
- package/vendors/precis/backend/main.py +677 -0
- package/vendors/precis/backend/orchestrator/__init__.py +3 -0
- package/vendors/precis/backend/orchestrator/planner.py +81 -0
- package/vendors/precis/backend/orchestrator/router.py +319 -0
- package/vendors/precis/backend/orchestrator/types.py +58 -0
- package/vendors/precis/bridge_precis.py +185 -0
- package/vendors/precis/data/sample_reports/README.md +8 -0
- package/vendors/precis/data/seed_data.py +115 -0
- package/vendors/precis/requirements.txt +19 -0
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# Copyright (c) 2026 Jinan Kordab
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
Stage 1b: Atomic Claim Decomposer [J7]
|
|
6
|
+
========================================
|
|
7
|
+
FACT5's jewel: decompose compound claims into independent atomic claims.
|
|
8
|
+
Each atomic claim focuses on ONE verifiable fact.
|
|
9
|
+
|
|
10
|
+
Example:
|
|
11
|
+
"We created 800,000 jobs and cut taxes by 20%"
|
|
12
|
+
→ ["We created 800,000 jobs.", "We cut taxes by 20%."]
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import dspy
|
|
16
|
+
from typing import List
|
|
17
|
+
from pipeline.signatures.signatures import AtomicClaimDecompositionSignature
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class AtomicClaimDecomposer(dspy.Module):
|
|
21
|
+
"""
|
|
22
|
+
DSPy module that splits compound claims into atomic, independently
|
|
23
|
+
verifiable claims.
|
|
24
|
+
|
|
25
|
+
Jewel [J7] — FACT5's atomization:
|
|
26
|
+
A holistic verdict on a compound claim is meaningless — some parts
|
|
27
|
+
may be true, others false. Atomize first, then verify each sub-claim
|
|
28
|
+
independently.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(self):
|
|
32
|
+
super().__init__()
|
|
33
|
+
self.decompose = dspy.ChainOfThought(AtomicClaimDecompositionSignature)
|
|
34
|
+
|
|
35
|
+
def forward(self, claim_text: str) -> List[str]:
|
|
36
|
+
"""
|
|
37
|
+
Decompose a claim into atomic sub-claims.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
List of independent atomic claims, each a single verifiable fact.
|
|
41
|
+
If the claim is already atomic, returns a single-element list.
|
|
42
|
+
"""
|
|
43
|
+
result = self.decompose(claim_text=claim_text)
|
|
44
|
+
|
|
45
|
+
# Post-process: ensure each atomic claim is a complete sentence
|
|
46
|
+
atomic_claims = []
|
|
47
|
+
for claim in result.atomic_claims:
|
|
48
|
+
claim = claim.strip()
|
|
49
|
+
if claim and not claim.endswith((".", "!", "?")):
|
|
50
|
+
claim += "."
|
|
51
|
+
atomic_claims.append(claim)
|
|
52
|
+
|
|
53
|
+
# If decomposition returned nothing useful, treat original as atomic
|
|
54
|
+
if not atomic_claims:
|
|
55
|
+
atomic_claims = [claim_text.strip()]
|
|
56
|
+
|
|
57
|
+
return atomic_claims
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
# Singleton instance
|
|
61
|
+
atomic_claim_decomposer = AtomicClaimDecomposer()
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
# Copyright (c) 2026 Jinan Kordab
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
Stage 1a: Check-Worthiness Detector [J4]
|
|
6
|
+
==========================================
|
|
7
|
+
ClaimBuster's jewel: classify whether a claim is worth fact-checking.
|
|
8
|
+
Uses DSPy ChainOfThought with voting (3 completions, majority wins).
|
|
9
|
+
If agreement < threshold → safe default (UFC).
|
|
10
|
+
|
|
11
|
+
Verdict: CFC (Check-worthy Factual Claim), UFC (Unimportant Factual Claim),
|
|
12
|
+
or NFC (Non-Factual Claim).
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import dspy
|
|
16
|
+
from typing import Literal, Tuple
|
|
17
|
+
from pipeline.signatures.signatures import CheckWorthinessSignature
|
|
18
|
+
from app.core.config import settings
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class CheckWorthinessDetector(dspy.Module):
|
|
22
|
+
"""
|
|
23
|
+
DSPy module that classifies whether a claim is worth fact-checking.
|
|
24
|
+
|
|
25
|
+
Jewel [J4] — ClaimBuster's pre-filter:
|
|
26
|
+
In the real world, claims are embedded in a firehose of text.
|
|
27
|
+
You must find the needles (check-worthy claims) before examining them.
|
|
28
|
+
|
|
29
|
+
Voting mechanism: 3 independent LLM completions, majority wins.
|
|
30
|
+
If no majority meets the threshold → default to UFC (safe).
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def __init__(self):
|
|
34
|
+
super().__init__()
|
|
35
|
+
self.classify = dspy.ChainOfThought(CheckWorthinessSignature)
|
|
36
|
+
self.voting_completions: int = settings.VOTING_COMPLETIONS
|
|
37
|
+
self.voting_threshold: float = settings.VOTING_THRESHOLD
|
|
38
|
+
|
|
39
|
+
def forward(
|
|
40
|
+
self, claim_text: str, locale: str = "en"
|
|
41
|
+
) -> Tuple[str, float, str, list[str]]:
|
|
42
|
+
"""
|
|
43
|
+
Classify claim check-worthiness with majority voting.
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
label: "CFC", "UFC", or "NFC"
|
|
47
|
+
confidence: 0.0–1.0
|
|
48
|
+
rationale: Explanation of the classification
|
|
49
|
+
votes: Raw individual votes for audit trail [C5]
|
|
50
|
+
"""
|
|
51
|
+
votes: list[str] = []
|
|
52
|
+
confidences: list[float] = []
|
|
53
|
+
|
|
54
|
+
# Run N independent completions
|
|
55
|
+
for _ in range(self.voting_completions):
|
|
56
|
+
result = self.classify(claim_text=claim_text, locale=locale)
|
|
57
|
+
votes.append(result.label.strip().upper())
|
|
58
|
+
confidences.append(float(result.confidence))
|
|
59
|
+
|
|
60
|
+
# Majority vote
|
|
61
|
+
final_label = self._resolve_vote(votes, confidences)
|
|
62
|
+
avg_confidence = sum(confidences) / len(confidences)
|
|
63
|
+
|
|
64
|
+
# Get rationale from the majority-vote completion
|
|
65
|
+
rationale = self._get_rationale(claim_text, final_label)
|
|
66
|
+
|
|
67
|
+
return final_label, avg_confidence, rationale, votes
|
|
68
|
+
|
|
69
|
+
def _resolve_vote(
|
|
70
|
+
self, votes: list[str], confidences: list[float]
|
|
71
|
+
) -> str:
|
|
72
|
+
"""Determine final label via majority vote with configurable threshold."""
|
|
73
|
+
from collections import Counter
|
|
74
|
+
|
|
75
|
+
counts = Counter(votes)
|
|
76
|
+
most_common_label, most_common_count = counts.most_common(1)[0]
|
|
77
|
+
agreement_ratio = most_common_count / len(votes)
|
|
78
|
+
|
|
79
|
+
if agreement_ratio >= self.voting_threshold:
|
|
80
|
+
return most_common_label
|
|
81
|
+
else:
|
|
82
|
+
# No majority met → safe default
|
|
83
|
+
# If any vote was CFC, be conservative and still check it
|
|
84
|
+
if "CFC" in votes:
|
|
85
|
+
return "CFC"
|
|
86
|
+
return "UFC"
|
|
87
|
+
|
|
88
|
+
def _get_rationale(self, claim_text: str, label: str) -> str:
|
|
89
|
+
"""Generate a concise rationale for the final classification."""
|
|
90
|
+
rationale_map = {
|
|
91
|
+
"CFC": "This claim contains a verifiable factual assertion that warrants evidence-based checking.",
|
|
92
|
+
"UFC": "This claim is factual but trivial or not of public interest — fact-checking resources are better allocated elsewhere.",
|
|
93
|
+
"NFC": "This is an opinion, question, or non-factual statement — there is no verifiable claim to check.",
|
|
94
|
+
}
|
|
95
|
+
# Optionally, re-query LLM for a more specific rationale
|
|
96
|
+
return rationale_map.get(label, "Classification complete.")
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
# Singleton instance
|
|
100
|
+
check_worthiness_detector = CheckWorthinessDetector()
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
# Copyright (c) 2026 Jinan Kordab
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
Stage 1 Orchestrator
|
|
6
|
+
=====================
|
|
7
|
+
Coordinates Stage 1a (Check-Worthiness) and Stage 1b (Atomic Decomposition).
|
|
8
|
+
Writes APPEND-ONLY stage records to PostgreSQL audit ledger [C5].
|
|
9
|
+
Emits SSE events for real-time frontend updates.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import time
|
|
13
|
+
import uuid
|
|
14
|
+
from typing import Optional
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
|
|
17
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
18
|
+
from app.db.models import StageRecord
|
|
19
|
+
from pipeline.stage1.check_worthiness import check_worthiness_detector
|
|
20
|
+
from pipeline.stage1.atomic_decomposer import atomic_claim_decomposer
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class Stage1Result:
|
|
25
|
+
"""Output of Stage 1 — Claim Processing."""
|
|
26
|
+
claim_text: str
|
|
27
|
+
locale: str
|
|
28
|
+
|
|
29
|
+
# Stage 1a: Check-Worthiness
|
|
30
|
+
worthiness_label: str # CFC, UFC, NFC
|
|
31
|
+
worthiness_confidence: float
|
|
32
|
+
worthiness_rationale: str
|
|
33
|
+
worthiness_votes: list[str]
|
|
34
|
+
|
|
35
|
+
# Stage 1b: Atomic Decomposition
|
|
36
|
+
atomic_claims: list[str]
|
|
37
|
+
|
|
38
|
+
# If not check-worthy, pipeline stops here
|
|
39
|
+
is_check_worthy: bool
|
|
40
|
+
stop_reason: str = ""
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class Stage1Orchestrator:
|
|
44
|
+
"""
|
|
45
|
+
Orchestrates Stage 1 of the fact-checking pipeline.
|
|
46
|
+
|
|
47
|
+
Flow:
|
|
48
|
+
1. Run CheckWorthinessDetector (1a) with voting
|
|
49
|
+
2. If CFC → run AtomicClaimDecomposer (1b)
|
|
50
|
+
3. If UFC or NFC → stop pipeline, return early verdict
|
|
51
|
+
4. Write stage records to PostgreSQL (append-only)
|
|
52
|
+
5. Emit SSE events via callback
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
def __init__(self, sse_callback: Optional[callable] = None):
|
|
56
|
+
"""
|
|
57
|
+
Args:
|
|
58
|
+
sse_callback: async function(event_type, data) to emit SSE events.
|
|
59
|
+
"""
|
|
60
|
+
self.sse_callback = sse_callback
|
|
61
|
+
|
|
62
|
+
async def process(
|
|
63
|
+
self,
|
|
64
|
+
claim_text: str,
|
|
65
|
+
locale: str = "en",
|
|
66
|
+
db: Optional[AsyncSession] = None,
|
|
67
|
+
run_id: Optional[uuid.UUID] = None,
|
|
68
|
+
context: str = "",
|
|
69
|
+
) -> Stage1Result:
|
|
70
|
+
"""
|
|
71
|
+
Run Stage 1 processing.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
claim_text: The raw claim text submitted by the user.
|
|
75
|
+
locale: Language locale (en, fr, es, ...).
|
|
76
|
+
db: Async database session for audit ledger writes.
|
|
77
|
+
context: Optional additional context to aid the LLM.
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
Stage1Result with worthiness classification and atomic claims.
|
|
81
|
+
"""
|
|
82
|
+
# Merge context into claim text if provided
|
|
83
|
+
effective_claim = f"Context: {context}\n\nClaim: {claim_text}" if context and context.strip() else claim_text
|
|
84
|
+
|
|
85
|
+
# --- Stage 1a: Check-Worthiness Detection ---
|
|
86
|
+
await self._emit("stage_1a_start", {"claim_text": claim_text, "locale": locale, "context": context})
|
|
87
|
+
t0 = time.monotonic()
|
|
88
|
+
|
|
89
|
+
label, confidence, rationale, votes = check_worthiness_detector(
|
|
90
|
+
effective_claim, locale
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
latency_1a = (time.monotonic() - t0) * 1000
|
|
94
|
+
|
|
95
|
+
await self._emit("stage_1a_complete", {
|
|
96
|
+
"label": label,
|
|
97
|
+
"confidence": confidence,
|
|
98
|
+
"votes": votes,
|
|
99
|
+
})
|
|
100
|
+
|
|
101
|
+
# Write stage record (append-only)
|
|
102
|
+
if db:
|
|
103
|
+
db.add(StageRecord(
|
|
104
|
+
run_id=run_id or uuid.UUID("00000000-0000-0000-0000-000000000000"),
|
|
105
|
+
stage_name="stage_1a",
|
|
106
|
+
input_snapshot={"claim_text": claim_text, "locale": locale, "context": context or None},
|
|
107
|
+
output_snapshot={
|
|
108
|
+
"label": label,
|
|
109
|
+
"confidence": confidence,
|
|
110
|
+
"rationale": rationale,
|
|
111
|
+
"votes": votes,
|
|
112
|
+
},
|
|
113
|
+
model_used="dspy/check_worthiness",
|
|
114
|
+
latency_ms=latency_1a,
|
|
115
|
+
retry_attempt=0,
|
|
116
|
+
))
|
|
117
|
+
|
|
118
|
+
# Stop if not check-worthy
|
|
119
|
+
if label != "CFC":
|
|
120
|
+
return Stage1Result(
|
|
121
|
+
claim_text=claim_text,
|
|
122
|
+
locale=locale,
|
|
123
|
+
worthiness_label=label,
|
|
124
|
+
worthiness_confidence=confidence,
|
|
125
|
+
worthiness_rationale=rationale,
|
|
126
|
+
worthiness_votes=votes,
|
|
127
|
+
atomic_claims=[],
|
|
128
|
+
is_check_worthy=False,
|
|
129
|
+
stop_reason=f"Claim classified as {label}: {rationale}",
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
# --- Stage 1b: Atomic Claim Decomposition ---
|
|
133
|
+
await self._emit("stage_1b_start", {"claim_text": claim_text})
|
|
134
|
+
t0 = time.monotonic()
|
|
135
|
+
|
|
136
|
+
atomic_claims = atomic_claim_decomposer(claim_text)
|
|
137
|
+
|
|
138
|
+
latency_1b = (time.monotonic() - t0) * 1000
|
|
139
|
+
|
|
140
|
+
await self._emit("stage_1b_complete", {
|
|
141
|
+
"atomic_claims": atomic_claims,
|
|
142
|
+
"count": len(atomic_claims),
|
|
143
|
+
})
|
|
144
|
+
|
|
145
|
+
# Write stage record (append-only)
|
|
146
|
+
if db:
|
|
147
|
+
db.add(StageRecord(
|
|
148
|
+
run_id=run_id or uuid.UUID("00000000-0000-0000-0000-000000000000"),
|
|
149
|
+
stage_name="stage_1b",
|
|
150
|
+
input_snapshot={"claim_text": claim_text},
|
|
151
|
+
output_snapshot={"atomic_claims": atomic_claims},
|
|
152
|
+
model_used="dspy/atomic_decomposer",
|
|
153
|
+
latency_ms=latency_1b,
|
|
154
|
+
retry_attempt=0,
|
|
155
|
+
))
|
|
156
|
+
|
|
157
|
+
return Stage1Result(
|
|
158
|
+
claim_text=claim_text,
|
|
159
|
+
locale=locale,
|
|
160
|
+
worthiness_label=label,
|
|
161
|
+
worthiness_confidence=confidence,
|
|
162
|
+
worthiness_rationale=rationale,
|
|
163
|
+
worthiness_votes=votes,
|
|
164
|
+
atomic_claims=atomic_claims,
|
|
165
|
+
is_check_worthy=True,
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
async def _emit(self, event_type: str, data: dict):
|
|
169
|
+
"""Emit SSE event via callback if configured."""
|
|
170
|
+
if self.sse_callback:
|
|
171
|
+
await self.sse_callback(event_type, data)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
# Singleton
|
|
175
|
+
stage1_orchestrator = Stage1Orchestrator()
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
# Copyright (c) 2026 Jinan Kordab
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
Unit Tests — Stage 1: Claim Processing
|
|
6
|
+
=======================================
|
|
7
|
+
Tests CheckWorthinessDetector [J4] and AtomicClaimDecomposer [J7]
|
|
8
|
+
with mock DSPy LLM responses.
|
|
9
|
+
|
|
10
|
+
Run: pytest pipeline/stage1/test_stage1.py -v
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import pytest
|
|
14
|
+
from unittest.mock import patch, MagicMock
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# ============================================================
|
|
18
|
+
# Check-Worthiness Detector Tests
|
|
19
|
+
# ============================================================
|
|
20
|
+
|
|
21
|
+
class TestCheckWorthinessDetector:
|
|
22
|
+
"""Test CheckWorthinessDetector [J4] with voting mechanism."""
|
|
23
|
+
|
|
24
|
+
@pytest.fixture
|
|
25
|
+
def detector(self):
|
|
26
|
+
from pipeline.stage1.check_worthiness import CheckWorthinessDetector
|
|
27
|
+
return CheckWorthinessDetector()
|
|
28
|
+
|
|
29
|
+
def test_majority_vote_cfc(self, detector):
|
|
30
|
+
"""When all 3 votes are CFC, return CFC."""
|
|
31
|
+
votes = ["CFC", "CFC", "CFC"]
|
|
32
|
+
confidences = [0.9, 0.85, 0.88]
|
|
33
|
+
result = detector._resolve_vote(votes, confidences)
|
|
34
|
+
assert result == "CFC"
|
|
35
|
+
|
|
36
|
+
def test_majority_vote_ufc(self, detector):
|
|
37
|
+
"""When 2/3 votes are UFC, return UFC (meets 0.67 threshold)."""
|
|
38
|
+
votes = ["UFC", "UFC", "NFC"]
|
|
39
|
+
confidences = [0.7, 0.8, 0.6]
|
|
40
|
+
result = detector._resolve_vote(votes, confidences)
|
|
41
|
+
assert result == "UFC"
|
|
42
|
+
|
|
43
|
+
def test_no_majority_falls_back_to_cfc(self, detector):
|
|
44
|
+
"""When no majority met but CFC present, be conservative and check it."""
|
|
45
|
+
votes = ["CFC", "UFC", "NFC"]
|
|
46
|
+
confidences = [0.6, 0.5, 0.4]
|
|
47
|
+
# Agreement is 1/3 = 0.33 < 0.67 threshold
|
|
48
|
+
result = detector._resolve_vote(votes, confidences)
|
|
49
|
+
assert result == "CFC" # Conservative: better to check than miss
|
|
50
|
+
|
|
51
|
+
def test_no_majority_no_cfc_falls_back_to_ufc(self, detector):
|
|
52
|
+
"""When no majority and no CFC, default to UFC."""
|
|
53
|
+
votes = ["UFC", "NFC", "NFC"]
|
|
54
|
+
confidences = [0.5, 0.6, 0.5]
|
|
55
|
+
result = detector._resolve_vote(votes, confidences)
|
|
56
|
+
assert result == "UFC"
|
|
57
|
+
|
|
58
|
+
def test_rationale_for_known_label(self, detector):
|
|
59
|
+
"""Rationale should be a non-empty string for all labels."""
|
|
60
|
+
for label in ["CFC", "UFC", "NFC"]:
|
|
61
|
+
rationale = detector._get_rationale("test claim", label)
|
|
62
|
+
assert isinstance(rationale, str)
|
|
63
|
+
assert len(rationale) > 0
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
# ============================================================
|
|
67
|
+
# Atomic Claim Decomposer Tests
|
|
68
|
+
# ============================================================
|
|
69
|
+
|
|
70
|
+
class TestAtomicClaimDecomposer:
|
|
71
|
+
"""Test AtomicClaimDecomposer [J7]."""
|
|
72
|
+
|
|
73
|
+
@pytest.fixture
|
|
74
|
+
def decomposer(self):
|
|
75
|
+
from pipeline.stage1.atomic_decomposer import AtomicClaimDecomposer
|
|
76
|
+
return AtomicClaimDecomposer()
|
|
77
|
+
|
|
78
|
+
def test_post_process_adds_period(self, decomposer):
|
|
79
|
+
"""Claims without ending punctuation get a period appended."""
|
|
80
|
+
# We test the post-processing logic directly via the forward method
|
|
81
|
+
# with a mock that returns claims without periods
|
|
82
|
+
pass # Requires mock DSPy — see integration tests
|
|
83
|
+
|
|
84
|
+
def test_empty_result_falls_back_to_original(self, decomposer):
|
|
85
|
+
"""If DSPy returns no atomic claims, use the original text."""
|
|
86
|
+
# Test the post-processing guard
|
|
87
|
+
pass # Requires mock DSPy
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
# ============================================================
|
|
91
|
+
# Stage1Result Tests
|
|
92
|
+
# ============================================================
|
|
93
|
+
|
|
94
|
+
class TestStage1Result:
|
|
95
|
+
"""Test the Stage1Result dataclass."""
|
|
96
|
+
|
|
97
|
+
def test_check_worthy_claim(self):
|
|
98
|
+
from pipeline.stage1.orchestrator import Stage1Result
|
|
99
|
+
result = Stage1Result(
|
|
100
|
+
claim_text="Water boils at 100°C.",
|
|
101
|
+
locale="en",
|
|
102
|
+
worthiness_label="CFC",
|
|
103
|
+
worthiness_confidence=0.95,
|
|
104
|
+
worthiness_rationale="Verifiable scientific claim.",
|
|
105
|
+
worthiness_votes=["CFC", "CFC", "CFC"],
|
|
106
|
+
atomic_claims=["Water boils at 100°C."],
|
|
107
|
+
is_check_worthy=True,
|
|
108
|
+
)
|
|
109
|
+
assert result.is_check_worthy is True
|
|
110
|
+
assert len(result.atomic_claims) == 1
|
|
111
|
+
|
|
112
|
+
def test_non_check_worthy_claim_stops_pipeline(self):
|
|
113
|
+
from pipeline.stage1.orchestrator import Stage1Result
|
|
114
|
+
result = Stage1Result(
|
|
115
|
+
claim_text="Nice weather today!",
|
|
116
|
+
locale="en",
|
|
117
|
+
worthiness_label="NFC",
|
|
118
|
+
worthiness_confidence=0.92,
|
|
119
|
+
worthiness_rationale="This is an opinion, not a factual claim.",
|
|
120
|
+
worthiness_votes=["NFC", "NFC", "NFC"],
|
|
121
|
+
atomic_claims=[],
|
|
122
|
+
is_check_worthy=False,
|
|
123
|
+
stop_reason="Claim classified as NFC: This is an opinion.",
|
|
124
|
+
)
|
|
125
|
+
assert result.is_check_worthy is False
|
|
126
|
+
assert result.atomic_claims == []
|
|
127
|
+
assert "NFC" in result.stop_reason
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
# ============================================================
|
|
131
|
+
# Integration-style Test (no LLM calls)
|
|
132
|
+
# ============================================================
|
|
133
|
+
|
|
134
|
+
def test_stage1_result_roundtrip():
|
|
135
|
+
"""Stage1Result can be serialized/deserialized for JSONB storage."""
|
|
136
|
+
import json
|
|
137
|
+
from pipeline.stage1.orchestrator import Stage1Result
|
|
138
|
+
|
|
139
|
+
result = Stage1Result(
|
|
140
|
+
claim_text="Test claim.",
|
|
141
|
+
locale="en",
|
|
142
|
+
worthiness_label="CFC",
|
|
143
|
+
worthiness_confidence=0.88,
|
|
144
|
+
worthiness_rationale="Check-worthy.",
|
|
145
|
+
worthiness_votes=["CFC", "CFC", "UFC"],
|
|
146
|
+
atomic_claims=["Test claim."],
|
|
147
|
+
is_check_worthy=True,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
# Should be JSON-serializable for stage_records JSONB columns
|
|
151
|
+
data = {
|
|
152
|
+
"label": result.worthiness_label,
|
|
153
|
+
"confidence": result.worthiness_confidence,
|
|
154
|
+
"rationale": result.worthiness_rationale,
|
|
155
|
+
"votes": result.worthiness_votes,
|
|
156
|
+
"atomic_claims": result.atomic_claims,
|
|
157
|
+
}
|
|
158
|
+
json_str = json.dumps(data)
|
|
159
|
+
parsed = json.loads(json_str)
|
|
160
|
+
assert parsed["label"] == "CFC"
|
|
161
|
+
assert parsed["votes"] == ["CFC", "CFC", "UFC"]
|
|
162
|
+
assert parsed["atomic_claims"] == ["Test claim."]
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# Copyright (c) 2026 Jinan Kordab
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
# Stage 2 — Blind Retrieval [J2]
|
|
5
|
+
# 2a: Search-Decision Generator [J1] — pipeline/stage2/search_decision.py
|
|
6
|
+
# 2b: Blind Retriever — pipeline/stage2/blind_retriever.py
|
|
7
|
+
# 2c: Per-Domain Credibility Scorer [J1b] — pipeline/stage2/credibility_scorer.py
|
|
8
|
+
# 2d: Intelligent Query Refiner [J8c] — pipeline/stage2/query_refiner.py (Loop 1)
|
|
9
|
+
# 2e: Canonical Evidence Mapper [C6] — pipeline/stage2/canonical_mapper.py
|
|
10
|
+
# Orchestrator: pipeline/stage2/orchestrator.py
|
|
11
|
+
|
|
12
|
+
from pipeline.stage2.search_decision import SearchDecisionGenerator, search_decision_generator
|
|
13
|
+
from pipeline.stage2.blind_retriever import BlindRetriever, blind_retriever, RawSearchResult
|
|
14
|
+
from pipeline.stage2.credibility_scorer import CredibilityScorer
|
|
15
|
+
from pipeline.stage2.query_refiner import QueryRefiner
|
|
16
|
+
from pipeline.stage2.canonical_mapper import (
|
|
17
|
+
CanonicalEvidenceMapper, CanonicalEvidence,
|
|
18
|
+
)
|
|
19
|
+
from pipeline.stage2.orchestrator import Stage2Orchestrator, Stage2Result, stage2_orchestrator
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
"SearchDecisionGenerator",
|
|
23
|
+
"search_decision_generator",
|
|
24
|
+
"BlindRetriever",
|
|
25
|
+
"blind_retriever",
|
|
26
|
+
"RawSearchResult",
|
|
27
|
+
"CredibilityScorer",
|
|
28
|
+
"QueryRefiner",
|
|
29
|
+
"CanonicalEvidenceMapper",
|
|
30
|
+
"CanonicalEvidence",
|
|
31
|
+
"Stage2Orchestrator",
|
|
32
|
+
"Stage2Result",
|
|
33
|
+
"stage2_orchestrator",
|
|
34
|
+
]
|