mcp-agentic-pipelines 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +93 -0
- package/README.md +258 -0
- package/package.json +70 -0
- package/packages/clinical/package.json +22 -0
- package/packages/clinical/src/index.ts +262 -0
- package/packages/clinical/tsconfig.json +13 -0
- package/packages/core/package.json +21 -0
- package/packages/core/src/config.ts +138 -0
- package/packages/core/src/errors.ts +100 -0
- package/packages/core/src/index.ts +104 -0
- package/packages/core/src/llm-config.ts +213 -0
- package/packages/core/src/logging.ts +66 -0
- package/packages/core/src/python-bridge.ts +384 -0
- package/packages/core/src/rate-limiter.ts +136 -0
- package/packages/core/src/types.ts +203 -0
- package/packages/core/src/validation.ts +101 -0
- package/packages/core/tsconfig.json +10 -0
- package/packages/deeppipe/package.json +21 -0
- package/packages/deeppipe/src/index.ts +424 -0
- package/packages/deeppipe/tsconfig.json +13 -0
- package/packages/piste/package.json +20 -0
- package/packages/piste/src/index.ts +48 -0
- package/packages/piste/tsconfig.json +13 -0
- package/packages/precis/package.json +20 -0
- package/packages/precis/src/index.ts +67 -0
- package/packages/precis/tsconfig.json +13 -0
- package/packages/server/package.json +31 -0
- package/packages/server/src/index.ts +427 -0
- package/packages/server/tsconfig.json +17 -0
- package/setup.mjs +141 -0
- package/test.mjs +337 -0
- package/vendors/clinical-intake/pipeline.mjs +349 -0
- package/vendors/clinical-intake/questions/en.txt +9 -0
- package/vendors/clinical-intake/questions/fr.txt +9 -0
- package/vendors/piste/.env.example +73 -0
- package/vendors/piste/app/core/__init__.py +4 -0
- package/vendors/piste/app/core/config.py +83 -0
- package/vendors/piste/app/core/debuglog.py +16 -0
- package/vendors/piste/app/core/middleware.py +40 -0
- package/vendors/piste/bridge_piste.py +301 -0
- package/vendors/piste/pipeline/__init__.py +4 -0
- package/vendors/piste/pipeline/compiler.py +68 -0
- package/vendors/piste/pipeline/offline/__init__.py +28 -0
- package/vendors/piste/pipeline/offline/verifaid_pipeline.py +247 -0
- package/vendors/piste/pipeline/replay.py +15 -0
- package/vendors/piste/pipeline/replay_engine.py +249 -0
- package/vendors/piste/pipeline/signatures/__init__.py +4 -0
- package/vendors/piste/pipeline/signatures/signatures.py +136 -0
- package/vendors/piste/pipeline/stage1/__init__.py +21 -0
- package/vendors/piste/pipeline/stage1/atomic_decomposer.py +61 -0
- package/vendors/piste/pipeline/stage1/check_worthiness.py +100 -0
- package/vendors/piste/pipeline/stage1/orchestrator.py +175 -0
- package/vendors/piste/pipeline/stage1/test_stage1.py +162 -0
- package/vendors/piste/pipeline/stage2/__init__.py +34 -0
- package/vendors/piste/pipeline/stage2/blind_retriever.py +303 -0
- package/vendors/piste/pipeline/stage2/canonical_mapper.py +124 -0
- package/vendors/piste/pipeline/stage2/credibility_scorer.py +85 -0
- package/vendors/piste/pipeline/stage2/orchestrator.py +311 -0
- package/vendors/piste/pipeline/stage2/query_refiner.py +88 -0
- package/vendors/piste/pipeline/stage2/search_decision.py +69 -0
- package/vendors/piste/pipeline/stage2/test_stage2.py +265 -0
- package/vendors/piste/pipeline/stage3/__init__.py +20 -0
- package/vendors/piste/pipeline/stage3/classifier.py +79 -0
- package/vendors/piste/pipeline/stage3/orchestrator.py +225 -0
- package/vendors/piste/pipeline/stage3/test_stage3.py +101 -0
- package/vendors/piste/pipeline/stage4/__init__.py +33 -0
- package/vendors/piste/pipeline/stage4/criticality_gate.py +177 -0
- package/vendors/piste/pipeline/stage4/orchestrator.py +269 -0
- package/vendors/piste/pipeline/stage4/test_stage4.py +192 -0
- package/vendors/piste/pipeline/stage4/verdict_aggregator.py +157 -0
- package/vendors/piste/requirements.txt +53 -0
- package/vendors/precis/backend/__init__.py +6 -0
- package/vendors/precis/backend/agents/__init__.py +3 -0
- package/vendors/precis/backend/agents/data_synthesis.py +105 -0
- package/vendors/precis/backend/agents/dist_free_synth.py +97 -0
- package/vendors/precis/backend/agents/exact_hash_retriever.py +327 -0
- package/vendors/precis/backend/agents/fusion_ranker.py +64 -0
- package/vendors/precis/backend/agents/guardrail.py +175 -0
- package/vendors/precis/backend/agents/query_expander.py +89 -0
- package/vendors/precis/backend/agents/radial_interpol.py +99 -0
- package/vendors/precis/backend/agents/report_generator.py +92 -0
- package/vendors/precis/backend/agents/semantic_reranker.py +135 -0
- package/vendors/precis/backend/agents/stat_anomaly.py +93 -0
- package/vendors/precis/backend/agents/vector_index.py +123 -0
- package/vendors/precis/backend/agents/veri_score.py +341 -0
- package/vendors/precis/backend/agents/work_order_extractor.py +205 -0
- package/vendors/precis/backend/api/__init__.py +3 -0
- package/vendors/precis/backend/api/routes/__init__.py +3 -0
- package/vendors/precis/backend/config.py +88 -0
- package/vendors/precis/backend/core/__init__.py +13 -0
- package/vendors/precis/backend/core/hashing.py +22 -0
- package/vendors/precis/backend/core/metrics.py +77 -0
- package/vendors/precis/backend/core/multitoken.py +166 -0
- package/vendors/precis/backend/core/pmi.py +54 -0
- package/vendors/precis/backend/core/stemming.py +74 -0
- package/vendors/precis/backend/core/tracing.py +150 -0
- package/vendors/precis/backend/data/__init__.py +3 -0
- package/vendors/precis/backend/data/chunker.py +57 -0
- package/vendors/precis/backend/data/pdf_parser.py +42 -0
- package/vendors/precis/backend/db/__init__.py +3 -0
- package/vendors/precis/backend/db/models.py +173 -0
- package/vendors/precis/backend/db/repository.py +269 -0
- package/vendors/precis/backend/llm/__init__.py +3 -0
- package/vendors/precis/backend/llm/anthropic_provider.py +39 -0
- package/vendors/precis/backend/llm/base.py +147 -0
- package/vendors/precis/backend/llm/deepseek_provider.py +43 -0
- package/vendors/precis/backend/llm/factory.py +60 -0
- package/vendors/precis/backend/llm/google_provider.py +39 -0
- package/vendors/precis/backend/llm/ollama_provider.py +54 -0
- package/vendors/precis/backend/llm/openai_provider.py +50 -0
- package/vendors/precis/backend/main.py +677 -0
- package/vendors/precis/backend/orchestrator/__init__.py +3 -0
- package/vendors/precis/backend/orchestrator/planner.py +81 -0
- package/vendors/precis/backend/orchestrator/router.py +319 -0
- package/vendors/precis/backend/orchestrator/types.py +58 -0
- package/vendors/precis/bridge_precis.py +185 -0
- package/vendors/precis/data/sample_reports/README.md +8 -0
- package/vendors/precis/data/seed_data.py +115 -0
- package/vendors/precis/requirements.txt +19 -0
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
# Copyright (c) 2026 Jinan Kordab
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
Stage 2 Orchestrator — Blind Retrieval
|
|
6
|
+
========================================
|
|
7
|
+
Coordinates all 5 sub-stages of evidence retrieval:
|
|
8
|
+
2a: Search-Decision Generator [J1] — decide IF search needed
|
|
9
|
+
2b: Blind Retriever [J2] — execute neutral queries (never sees claim)
|
|
10
|
+
2c: Per-Domain Credibility Scorer [J1b] — score each source domain
|
|
11
|
+
2d: Intelligent Query Refiner [J8c] — Loop 1 retry with refined queries
|
|
12
|
+
2e: Canonical Evidence Mapper [C6] — normalize all formats
|
|
13
|
+
|
|
14
|
+
Manages Loop 1 feedback: if results insufficient → refiner → retry.
|
|
15
|
+
Writes APPEND-ONLY stage records to PostgreSQL [C5].
|
|
16
|
+
Checks FAISS Tier-1 cache before external search [J7].
|
|
17
|
+
Emits SSE events for real-time frontend updates.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import asyncio
|
|
21
|
+
import time
|
|
22
|
+
import uuid
|
|
23
|
+
from typing import Optional, List
|
|
24
|
+
from dataclasses import dataclass, field
|
|
25
|
+
|
|
26
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
27
|
+
|
|
28
|
+
from app.db.models import StageRecord, Source
|
|
29
|
+
from app.core.config import settings
|
|
30
|
+
from pipeline.stage2.search_decision import search_decision_generator
|
|
31
|
+
from pipeline.stage2.blind_retriever import blind_retriever
|
|
32
|
+
from pipeline.stage2.credibility_scorer import CredibilityScorer
|
|
33
|
+
from pipeline.stage2.query_refiner import QueryRefiner
|
|
34
|
+
from pipeline.stage2.canonical_mapper import (
|
|
35
|
+
CanonicalEvidenceMapper, CanonicalEvidence,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass
|
|
40
|
+
class Stage2Result:
|
|
41
|
+
"""Output of Stage 2 — Blind Retrieval."""
|
|
42
|
+
atomic_claim: str
|
|
43
|
+
needs_search: bool
|
|
44
|
+
search_queries: list[str]
|
|
45
|
+
search_reasoning: str
|
|
46
|
+
|
|
47
|
+
# Evidence
|
|
48
|
+
canonical_evidence: list[CanonicalEvidence] = field(default_factory=list)
|
|
49
|
+
|
|
50
|
+
# Loop 1 tracking
|
|
51
|
+
retry_count: int = 0
|
|
52
|
+
retry_queries: list[str] = field(default_factory=list)
|
|
53
|
+
insufficient_reason: str = ""
|
|
54
|
+
|
|
55
|
+
# If no search was needed (parametric knowledge suffices)
|
|
56
|
+
skipped_search: bool = False
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class Stage2Orchestrator:
|
|
60
|
+
"""
|
|
61
|
+
Orchestrates Stage 2 of the fact-checking pipeline.
|
|
62
|
+
|
|
63
|
+
Flow per atomic claim:
|
|
64
|
+
1. SearchDecisionGenerator: decide if search needed [J1]
|
|
65
|
+
2. If no → skip, return empty evidence
|
|
66
|
+
3. If yes → BlindRetriever: execute NEUTRAL queries [J2]
|
|
67
|
+
4. CredibilityScorer: score each domain [J1b]
|
|
68
|
+
5. CanonicalEvidenceMapper: normalize all formats [C6]
|
|
69
|
+
6. Check sufficiency → if insufficient:
|
|
70
|
+
a. QueryRefiner analyzes gaps [J8c]
|
|
71
|
+
b. BlindRetriever with refined queries (Loop 1)
|
|
72
|
+
c. Repeat up to MAX_RETRY_LOOPS times
|
|
73
|
+
7. Write stage records + sources to PostgreSQL (append-only)
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
def __init__(self, sse_callback: Optional[callable] = None):
|
|
77
|
+
self.sse_callback = sse_callback
|
|
78
|
+
self.mapper = CanonicalEvidenceMapper()
|
|
79
|
+
self.refiner = QueryRefiner()
|
|
80
|
+
self._locale: str = "en" # Default locale, set by caller
|
|
81
|
+
|
|
82
|
+
async def process(
|
|
83
|
+
self,
|
|
84
|
+
atomic_claims: List[str],
|
|
85
|
+
db: Optional[AsyncSession] = None,
|
|
86
|
+
locale: str = "en",
|
|
87
|
+
run_id: Optional[uuid.UUID] = None,
|
|
88
|
+
) -> List[Stage2Result]:
|
|
89
|
+
"""
|
|
90
|
+
Run Stage 2 for all atomic claims.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
atomic_claims: List of atomic claims from Stage 1.
|
|
94
|
+
db: Optional DB session for audit ledger writes.
|
|
95
|
+
locale: Language locale for search region biasing [C2].
|
|
96
|
+
"""
|
|
97
|
+
self._locale = locale
|
|
98
|
+
|
|
99
|
+
results: List[Stage2Result] = []
|
|
100
|
+
credibility_scorer = CredibilityScorer(db)
|
|
101
|
+
|
|
102
|
+
for claim in atomic_claims:
|
|
103
|
+
await self._emit("stage_2a_start", {
|
|
104
|
+
"atomic_claim": claim,
|
|
105
|
+
})
|
|
106
|
+
|
|
107
|
+
result = await self._process_single_claim(
|
|
108
|
+
claim, credibility_scorer, db, run_id
|
|
109
|
+
)
|
|
110
|
+
results.append(result)
|
|
111
|
+
|
|
112
|
+
await self._emit("stage_2_complete", {
|
|
113
|
+
"atomic_claim": claim,
|
|
114
|
+
"needs_search": result.needs_search,
|
|
115
|
+
"sources_found": len(result.canonical_evidence),
|
|
116
|
+
"retry_count": result.retry_count,
|
|
117
|
+
})
|
|
118
|
+
|
|
119
|
+
return results
|
|
120
|
+
|
|
121
|
+
async def _process_single_claim(
|
|
122
|
+
self,
|
|
123
|
+
atomic_claim: str,
|
|
124
|
+
credibility_scorer: CredibilityScorer,
|
|
125
|
+
db: Optional[AsyncSession],
|
|
126
|
+
run_id: Optional[uuid.UUID] = None,
|
|
127
|
+
) -> Stage2Result:
|
|
128
|
+
"""Process one atomic claim through all Stage 2 sub-stages."""
|
|
129
|
+
|
|
130
|
+
# --- 2a: Search Decision ---
|
|
131
|
+
t0 = time.monotonic()
|
|
132
|
+
needs_search, search_queries, reasoning = search_decision_generator(
|
|
133
|
+
atomic_claim
|
|
134
|
+
)
|
|
135
|
+
latency_2a = (time.monotonic() - t0) * 1000
|
|
136
|
+
|
|
137
|
+
if db:
|
|
138
|
+
db.add(StageRecord(
|
|
139
|
+
run_id=run_id or uuid.UUID("00000000-0000-0000-0000-000000000000"),
|
|
140
|
+
stage_name="stage_2a",
|
|
141
|
+
input_snapshot={"atomic_claim": atomic_claim},
|
|
142
|
+
output_snapshot={
|
|
143
|
+
"needs_search": needs_search,
|
|
144
|
+
"search_queries": search_queries,
|
|
145
|
+
"reasoning": reasoning,
|
|
146
|
+
},
|
|
147
|
+
model_used="dspy/search_decision",
|
|
148
|
+
latency_ms=latency_2a,
|
|
149
|
+
retry_attempt=0,
|
|
150
|
+
))
|
|
151
|
+
|
|
152
|
+
if not needs_search:
|
|
153
|
+
await self._emit("stage_2a_complete", {
|
|
154
|
+
"atomic_claim": atomic_claim,
|
|
155
|
+
"needs_search": False,
|
|
156
|
+
"reasoning": reasoning,
|
|
157
|
+
})
|
|
158
|
+
return Stage2Result(
|
|
159
|
+
atomic_claim=atomic_claim,
|
|
160
|
+
needs_search=False,
|
|
161
|
+
search_queries=[],
|
|
162
|
+
search_reasoning=reasoning,
|
|
163
|
+
skipped_search=True,
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
await self._emit("stage_2a_complete", {
|
|
167
|
+
"atomic_claim": atomic_claim,
|
|
168
|
+
"needs_search": True,
|
|
169
|
+
"queries": search_queries,
|
|
170
|
+
})
|
|
171
|
+
|
|
172
|
+
# --- 2b + 2c + 2e: Retrieve → Score → Map ---
|
|
173
|
+
all_evidence, retry_count, retry_queries, insufficient = (
|
|
174
|
+
await self._retrieve_with_retry(search_queries, atomic_claim, db, run_id)
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
# --- Score credibility ---
|
|
178
|
+
for ev in all_evidence:
|
|
179
|
+
ev.credibility_score = await credibility_scorer.score_domain(
|
|
180
|
+
ev.source_domain
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
# --- Write sources to PostgreSQL ---
|
|
184
|
+
# Generate the Source PK up-front and attach it to the in-memory
|
|
185
|
+
# CanonicalEvidence so Stage 3 can populate classifications.source_id
|
|
186
|
+
# (FK to sources.id) without an extra DB roundtrip.
|
|
187
|
+
if db:
|
|
188
|
+
for ev in all_evidence:
|
|
189
|
+
ev.db_id = uuid.uuid4()
|
|
190
|
+
db.add(Source(
|
|
191
|
+
id=ev.db_id,
|
|
192
|
+
run_id=run_id or uuid.uuid4(),
|
|
193
|
+
url=ev.url,
|
|
194
|
+
domain=ev.source_domain,
|
|
195
|
+
title=ev.title,
|
|
196
|
+
snippet=ev.excerpt,
|
|
197
|
+
credibility_score=ev.credibility_score,
|
|
198
|
+
canonical_evidence=self.mapper.to_dict(ev),
|
|
199
|
+
))
|
|
200
|
+
|
|
201
|
+
await self._emit("stage_2c_complete", {
|
|
202
|
+
"atomic_claim": atomic_claim,
|
|
203
|
+
"sources_count": len(all_evidence),
|
|
204
|
+
"avg_credibility": (
|
|
205
|
+
sum(e.credibility_score for e in all_evidence) / len(all_evidence)
|
|
206
|
+
if all_evidence else 0.0
|
|
207
|
+
),
|
|
208
|
+
})
|
|
209
|
+
|
|
210
|
+
return Stage2Result(
|
|
211
|
+
atomic_claim=atomic_claim,
|
|
212
|
+
needs_search=True,
|
|
213
|
+
search_queries=search_queries,
|
|
214
|
+
search_reasoning=reasoning,
|
|
215
|
+
canonical_evidence=all_evidence,
|
|
216
|
+
retry_count=retry_count,
|
|
217
|
+
retry_queries=retry_queries,
|
|
218
|
+
insufficient_reason=insufficient,
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
async def _retrieve_with_retry(
|
|
222
|
+
self,
|
|
223
|
+
search_queries: List[str],
|
|
224
|
+
atomic_claim: str,
|
|
225
|
+
db: Optional[AsyncSession],
|
|
226
|
+
run_id: Optional[uuid.UUID] = None,
|
|
227
|
+
) -> tuple:
|
|
228
|
+
"""
|
|
229
|
+
Execute retrieval with Loop 1 retry logic.
|
|
230
|
+
|
|
231
|
+
Returns:
|
|
232
|
+
(all_evidence, retry_count, retry_queries, insufficient_reason)
|
|
233
|
+
"""
|
|
234
|
+
all_evidence: List[CanonicalEvidence] = []
|
|
235
|
+
retry_count = 0
|
|
236
|
+
retry_queries: List[str] = []
|
|
237
|
+
insufficient_reason = ""
|
|
238
|
+
current_queries = list(search_queries)
|
|
239
|
+
|
|
240
|
+
for attempt in range(settings.MAX_RETRY_LOOPS + 1):
|
|
241
|
+
# --- 2b: Blind Retrieve ---
|
|
242
|
+
await self._emit("stage_2b_start", {
|
|
243
|
+
"queries": current_queries,
|
|
244
|
+
"attempt": attempt,
|
|
245
|
+
})
|
|
246
|
+
t0 = time.monotonic()
|
|
247
|
+
|
|
248
|
+
raw_results = await blind_retriever.search(
|
|
249
|
+
current_queries, locale=getattr(self, "_locale", "en")
|
|
250
|
+
)
|
|
251
|
+
latency_2b = (time.monotonic() - t0) * 1000
|
|
252
|
+
|
|
253
|
+
await self._emit("stage_2b_complete", {
|
|
254
|
+
"results_count": len(raw_results),
|
|
255
|
+
"attempt": attempt,
|
|
256
|
+
})
|
|
257
|
+
|
|
258
|
+
# --- 2e: Map to Canonical ---
|
|
259
|
+
evidence = await self.mapper.map_results(raw_results)
|
|
260
|
+
all_evidence.extend(evidence)
|
|
261
|
+
|
|
262
|
+
if db:
|
|
263
|
+
db.add(StageRecord(
|
|
264
|
+
run_id=run_id or uuid.uuid4(),
|
|
265
|
+
stage_name="stage_2b",
|
|
266
|
+
input_snapshot={"queries": current_queries},
|
|
267
|
+
output_snapshot={
|
|
268
|
+
"raw_count": len(raw_results),
|
|
269
|
+
"canonical_count": len(evidence),
|
|
270
|
+
},
|
|
271
|
+
model_used="blind_retriever",
|
|
272
|
+
latency_ms=latency_2b,
|
|
273
|
+
retry_attempt=attempt,
|
|
274
|
+
))
|
|
275
|
+
|
|
276
|
+
# Check sufficiency
|
|
277
|
+
if len(all_evidence) >= 3:
|
|
278
|
+
break # Sufficient evidence found
|
|
279
|
+
|
|
280
|
+
# --- 2d: Query Refiner (Loop 1) ---
|
|
281
|
+
insufficient_reason = self.refiner.analyze_insufficiency(
|
|
282
|
+
all_evidence, atomic_claim
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
if attempt < settings.MAX_RETRY_LOOPS:
|
|
286
|
+
await self._emit("stage_2d_start", {
|
|
287
|
+
"insufficient_reason": insufficient_reason,
|
|
288
|
+
})
|
|
289
|
+
refined = self.refiner(
|
|
290
|
+
original_query=current_queries[0] if current_queries else atomic_claim,
|
|
291
|
+
insufficient_reason=insufficient_reason,
|
|
292
|
+
)
|
|
293
|
+
retry_queries.extend(refined)
|
|
294
|
+
current_queries = refined
|
|
295
|
+
retry_count += 1
|
|
296
|
+
|
|
297
|
+
await self._emit("stage_2d_complete", {
|
|
298
|
+
"refined_queries": refined,
|
|
299
|
+
"retry_attempt": retry_count,
|
|
300
|
+
})
|
|
301
|
+
|
|
302
|
+
return all_evidence, retry_count, retry_queries, insufficient_reason
|
|
303
|
+
|
|
304
|
+
async def _emit(self, event_type: str, data: dict):
|
|
305
|
+
"""Emit SSE event via callback if configured."""
|
|
306
|
+
if self.sse_callback:
|
|
307
|
+
await self.sse_callback(event_type, data)
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
# Singleton
|
|
311
|
+
stage2_orchestrator = Stage2Orchestrator()
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# Copyright (c) 2026 Jinan Kordab
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
Stage 2d: Intelligent Query Refiner [J8c]
|
|
6
|
+
===========================================
|
|
7
|
+
ClaimeAI's jewel: when search results are insufficient, the refiner
|
|
8
|
+
analyzes WHAT was missing and generates NEW, targeted queries.
|
|
9
|
+
|
|
10
|
+
Loop 1 feedback — seconds-scale retry with intelligent query adjustment.
|
|
11
|
+
Transforms retry from a dumb loop into informed exploration.
|
|
12
|
+
~25% improvement in resolving "insufficient information" cases.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import dspy
|
|
16
|
+
from typing import List
|
|
17
|
+
from pipeline.signatures.signatures import QueryRefinementSignature
|
|
18
|
+
from app.core.config import settings
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class QueryRefiner(dspy.Module):
|
|
22
|
+
"""
|
|
23
|
+
DSPy module that analyzes insufficient search results and generates
|
|
24
|
+
refined queries targeting the identified gaps.
|
|
25
|
+
|
|
26
|
+
Jewel [J8c] — ClaimeAI's feedback-driven iterative query refinement:
|
|
27
|
+
Each retry is informed by what was learned from the previous attempt.
|
|
28
|
+
Simple in concept, rarely implemented well, disproportionately effective.
|
|
29
|
+
|
|
30
|
+
Loop 1: seconds-scale retry loop.
|
|
31
|
+
Max retries controlled by settings.MAX_RETRY_LOOPS (default: 3).
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(self):
|
|
35
|
+
super().__init__()
|
|
36
|
+
self.refine = dspy.ChainOfThought(QueryRefinementSignature)
|
|
37
|
+
self.max_retries: int = settings.MAX_RETRY_LOOPS
|
|
38
|
+
|
|
39
|
+
def forward(
|
|
40
|
+
self,
|
|
41
|
+
original_query: str,
|
|
42
|
+
insufficient_reason: str,
|
|
43
|
+
) -> List[str]:
|
|
44
|
+
"""
|
|
45
|
+
Generate refined queries based on what was missing.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
original_query: The query that returned insufficient results.
|
|
49
|
+
insufficient_reason: Analysis of WHY results were insufficient
|
|
50
|
+
(e.g., "no results from credible sources",
|
|
51
|
+
"results too old", "wrong topic").
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
List of refined, targeted search queries.
|
|
55
|
+
"""
|
|
56
|
+
result = self.refine(
|
|
57
|
+
original_query=original_query,
|
|
58
|
+
insufficient_reason=insufficient_reason,
|
|
59
|
+
)
|
|
60
|
+
refined = [q.strip() for q in result.refined_queries if q.strip()]
|
|
61
|
+
return refined[:3] # Cap at 3 refined queries per retry
|
|
62
|
+
|
|
63
|
+
def analyze_insufficiency(
|
|
64
|
+
self, results: list, atomic_claim: str
|
|
65
|
+
) -> str:
|
|
66
|
+
"""
|
|
67
|
+
Analyze why search results are insufficient.
|
|
68
|
+
Heuristic-based; can be enhanced with LLM analysis.
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
Human-readable reason for insufficiency.
|
|
72
|
+
"""
|
|
73
|
+
if not results:
|
|
74
|
+
return "No search results were returned for the query."
|
|
75
|
+
|
|
76
|
+
# Check result quality heuristics
|
|
77
|
+
low_credibility_count = sum(
|
|
78
|
+
1 for r in results
|
|
79
|
+
if getattr(r, "credibility_score", 0.5) < 0.4
|
|
80
|
+
)
|
|
81
|
+
if low_credibility_count > len(results) * 0.7:
|
|
82
|
+
return "Majority of results are from low-credibility sources."
|
|
83
|
+
|
|
84
|
+
# Check relevance (simple: all snippets look off-topic)
|
|
85
|
+
if len(results) < 3:
|
|
86
|
+
return f"Only {len(results)} results found — insufficient for verification."
|
|
87
|
+
|
|
88
|
+
return "Results returned but may lack sufficient depth for classification."
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# Copyright (c) 2026 Jinan Kordab
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
Stage 2a: Search-Decision Generator [J1]
|
|
6
|
+
==========================================
|
|
7
|
+
Veracity's jewel: the LLM autonomously decides IF web search is needed.
|
|
8
|
+
Well-known facts skip search → saves API cost and latency.
|
|
9
|
+
If search IS needed → generates NEUTRAL queries (never the original claim).
|
|
10
|
+
|
|
11
|
+
CRITICAL: This module produces the blind retrieval boundary.
|
|
12
|
+
The search queries are factual and neutral — the retriever never sees the claim.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import dspy
|
|
16
|
+
from typing import Tuple, List
|
|
17
|
+
from pipeline.signatures.signatures import SearchDecisionSignature, QueryGenerationSignature
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class SearchDecisionGenerator(dspy.Module):
|
|
21
|
+
"""
|
|
22
|
+
DSPy module that decides whether web search is needed and generates
|
|
23
|
+
neutral search queries.
|
|
24
|
+
|
|
25
|
+
Jewel [J1] — Veracity's LLM-autonomous search decision:
|
|
26
|
+
- Simple facts ("Water boils at 100°C") → skip search, direct verdict.
|
|
27
|
+
- Complex/current claims → generate neutral queries for blind retrieval.
|
|
28
|
+
|
|
29
|
+
Jewel [J2] — Blind Retrieval (Veri-fact.ai):
|
|
30
|
+
- Queries are factual and neutral.
|
|
31
|
+
- NEVER include the original claim text or biased framing.
|
|
32
|
+
- Confirmation bias is prevented at the ARCHITECTURE level.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def __init__(self):
|
|
36
|
+
super().__init__()
|
|
37
|
+
self.decide = dspy.ChainOfThought(SearchDecisionSignature)
|
|
38
|
+
self.generate_queries = dspy.ChainOfThought(QueryGenerationSignature)
|
|
39
|
+
|
|
40
|
+
def forward(self, atomic_claim: str) -> Tuple[bool, List[str], str]:
|
|
41
|
+
"""
|
|
42
|
+
Decide if search is needed and generate queries if so.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
atomic_claim: A single atomic claim to evaluate.
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
needs_search: True if external evidence is needed.
|
|
49
|
+
search_queries: List of neutral search queries (empty if no search).
|
|
50
|
+
reasoning: Why search is or isn't needed.
|
|
51
|
+
"""
|
|
52
|
+
# Step 1: Decide if search is needed
|
|
53
|
+
decision = self.decide(atomic_claim=atomic_claim)
|
|
54
|
+
|
|
55
|
+
if not decision.needs_search:
|
|
56
|
+
return False, [], decision.reasoning
|
|
57
|
+
|
|
58
|
+
# Step 2: Generate neutral search queries
|
|
59
|
+
queries_result = self.generate_queries(atomic_claim=atomic_claim)
|
|
60
|
+
search_queries = [q.strip() for q in queries_result.search_queries if q.strip()]
|
|
61
|
+
|
|
62
|
+
if not search_queries:
|
|
63
|
+
return False, [], "Query generation produced no valid queries."
|
|
64
|
+
|
|
65
|
+
return True, search_queries, decision.reasoning
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# Singleton
|
|
69
|
+
search_decision_generator = SearchDecisionGenerator()
|