mcp-agentic-pipelines 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. package/.env.example +93 -0
  2. package/README.md +258 -0
  3. package/package.json +70 -0
  4. package/packages/clinical/package.json +22 -0
  5. package/packages/clinical/src/index.ts +262 -0
  6. package/packages/clinical/tsconfig.json +13 -0
  7. package/packages/core/package.json +21 -0
  8. package/packages/core/src/config.ts +138 -0
  9. package/packages/core/src/errors.ts +100 -0
  10. package/packages/core/src/index.ts +104 -0
  11. package/packages/core/src/llm-config.ts +213 -0
  12. package/packages/core/src/logging.ts +66 -0
  13. package/packages/core/src/python-bridge.ts +384 -0
  14. package/packages/core/src/rate-limiter.ts +136 -0
  15. package/packages/core/src/types.ts +203 -0
  16. package/packages/core/src/validation.ts +101 -0
  17. package/packages/core/tsconfig.json +10 -0
  18. package/packages/deeppipe/package.json +21 -0
  19. package/packages/deeppipe/src/index.ts +424 -0
  20. package/packages/deeppipe/tsconfig.json +13 -0
  21. package/packages/piste/package.json +20 -0
  22. package/packages/piste/src/index.ts +48 -0
  23. package/packages/piste/tsconfig.json +13 -0
  24. package/packages/precis/package.json +20 -0
  25. package/packages/precis/src/index.ts +67 -0
  26. package/packages/precis/tsconfig.json +13 -0
  27. package/packages/server/package.json +31 -0
  28. package/packages/server/src/index.ts +427 -0
  29. package/packages/server/tsconfig.json +17 -0
  30. package/setup.mjs +141 -0
  31. package/test.mjs +337 -0
  32. package/vendors/clinical-intake/pipeline.mjs +349 -0
  33. package/vendors/clinical-intake/questions/en.txt +9 -0
  34. package/vendors/clinical-intake/questions/fr.txt +9 -0
  35. package/vendors/piste/.env.example +73 -0
  36. package/vendors/piste/app/core/__init__.py +4 -0
  37. package/vendors/piste/app/core/config.py +83 -0
  38. package/vendors/piste/app/core/debuglog.py +16 -0
  39. package/vendors/piste/app/core/middleware.py +40 -0
  40. package/vendors/piste/bridge_piste.py +301 -0
  41. package/vendors/piste/pipeline/__init__.py +4 -0
  42. package/vendors/piste/pipeline/compiler.py +68 -0
  43. package/vendors/piste/pipeline/offline/__init__.py +28 -0
  44. package/vendors/piste/pipeline/offline/verifaid_pipeline.py +247 -0
  45. package/vendors/piste/pipeline/replay.py +15 -0
  46. package/vendors/piste/pipeline/replay_engine.py +249 -0
  47. package/vendors/piste/pipeline/signatures/__init__.py +4 -0
  48. package/vendors/piste/pipeline/signatures/signatures.py +136 -0
  49. package/vendors/piste/pipeline/stage1/__init__.py +21 -0
  50. package/vendors/piste/pipeline/stage1/atomic_decomposer.py +61 -0
  51. package/vendors/piste/pipeline/stage1/check_worthiness.py +100 -0
  52. package/vendors/piste/pipeline/stage1/orchestrator.py +175 -0
  53. package/vendors/piste/pipeline/stage1/test_stage1.py +162 -0
  54. package/vendors/piste/pipeline/stage2/__init__.py +34 -0
  55. package/vendors/piste/pipeline/stage2/blind_retriever.py +303 -0
  56. package/vendors/piste/pipeline/stage2/canonical_mapper.py +124 -0
  57. package/vendors/piste/pipeline/stage2/credibility_scorer.py +85 -0
  58. package/vendors/piste/pipeline/stage2/orchestrator.py +311 -0
  59. package/vendors/piste/pipeline/stage2/query_refiner.py +88 -0
  60. package/vendors/piste/pipeline/stage2/search_decision.py +69 -0
  61. package/vendors/piste/pipeline/stage2/test_stage2.py +265 -0
  62. package/vendors/piste/pipeline/stage3/__init__.py +20 -0
  63. package/vendors/piste/pipeline/stage3/classifier.py +79 -0
  64. package/vendors/piste/pipeline/stage3/orchestrator.py +225 -0
  65. package/vendors/piste/pipeline/stage3/test_stage3.py +101 -0
  66. package/vendors/piste/pipeline/stage4/__init__.py +33 -0
  67. package/vendors/piste/pipeline/stage4/criticality_gate.py +177 -0
  68. package/vendors/piste/pipeline/stage4/orchestrator.py +269 -0
  69. package/vendors/piste/pipeline/stage4/test_stage4.py +192 -0
  70. package/vendors/piste/pipeline/stage4/verdict_aggregator.py +157 -0
  71. package/vendors/piste/requirements.txt +53 -0
  72. package/vendors/precis/backend/__init__.py +6 -0
  73. package/vendors/precis/backend/agents/__init__.py +3 -0
  74. package/vendors/precis/backend/agents/data_synthesis.py +105 -0
  75. package/vendors/precis/backend/agents/dist_free_synth.py +97 -0
  76. package/vendors/precis/backend/agents/exact_hash_retriever.py +327 -0
  77. package/vendors/precis/backend/agents/fusion_ranker.py +64 -0
  78. package/vendors/precis/backend/agents/guardrail.py +175 -0
  79. package/vendors/precis/backend/agents/query_expander.py +89 -0
  80. package/vendors/precis/backend/agents/radial_interpol.py +99 -0
  81. package/vendors/precis/backend/agents/report_generator.py +92 -0
  82. package/vendors/precis/backend/agents/semantic_reranker.py +135 -0
  83. package/vendors/precis/backend/agents/stat_anomaly.py +93 -0
  84. package/vendors/precis/backend/agents/vector_index.py +123 -0
  85. package/vendors/precis/backend/agents/veri_score.py +341 -0
  86. package/vendors/precis/backend/agents/work_order_extractor.py +205 -0
  87. package/vendors/precis/backend/api/__init__.py +3 -0
  88. package/vendors/precis/backend/api/routes/__init__.py +3 -0
  89. package/vendors/precis/backend/config.py +88 -0
  90. package/vendors/precis/backend/core/__init__.py +13 -0
  91. package/vendors/precis/backend/core/hashing.py +22 -0
  92. package/vendors/precis/backend/core/metrics.py +77 -0
  93. package/vendors/precis/backend/core/multitoken.py +166 -0
  94. package/vendors/precis/backend/core/pmi.py +54 -0
  95. package/vendors/precis/backend/core/stemming.py +74 -0
  96. package/vendors/precis/backend/core/tracing.py +150 -0
  97. package/vendors/precis/backend/data/__init__.py +3 -0
  98. package/vendors/precis/backend/data/chunker.py +57 -0
  99. package/vendors/precis/backend/data/pdf_parser.py +42 -0
  100. package/vendors/precis/backend/db/__init__.py +3 -0
  101. package/vendors/precis/backend/db/models.py +173 -0
  102. package/vendors/precis/backend/db/repository.py +269 -0
  103. package/vendors/precis/backend/llm/__init__.py +3 -0
  104. package/vendors/precis/backend/llm/anthropic_provider.py +39 -0
  105. package/vendors/precis/backend/llm/base.py +147 -0
  106. package/vendors/precis/backend/llm/deepseek_provider.py +43 -0
  107. package/vendors/precis/backend/llm/factory.py +60 -0
  108. package/vendors/precis/backend/llm/google_provider.py +39 -0
  109. package/vendors/precis/backend/llm/ollama_provider.py +54 -0
  110. package/vendors/precis/backend/llm/openai_provider.py +50 -0
  111. package/vendors/precis/backend/main.py +677 -0
  112. package/vendors/precis/backend/orchestrator/__init__.py +3 -0
  113. package/vendors/precis/backend/orchestrator/planner.py +81 -0
  114. package/vendors/precis/backend/orchestrator/router.py +319 -0
  115. package/vendors/precis/backend/orchestrator/types.py +58 -0
  116. package/vendors/precis/bridge_precis.py +185 -0
  117. package/vendors/precis/data/sample_reports/README.md +8 -0
  118. package/vendors/precis/data/seed_data.py +115 -0
  119. package/vendors/precis/requirements.txt +19 -0
@@ -0,0 +1,9 @@
1
+ Hello, I'm the clinical intake assistant with Health Canada. I'm going to ask you a few questions to prepare for your consultation. What brings you in to see the doctor today?
2
+ When did your symptoms begin? Did they start suddenly or gradually?
3
+ On a scale of 0 to 10, where 0 is no pain and 10 is the worst pain you can imagine, how would you rate your current discomfort?
4
+ Have you experienced this health issue before? If so, what diagnosis or treatment did you receive at that time?
5
+ Are you currently taking any medications, whether prescribed or over-the-counter? Do you have any drug allergies or other allergies we should know about?
6
+ Do you have any significant medical history such as diabetes, high blood pressure, heart conditions, or respiratory issues?
7
+ Have you recently experienced any of the following: chest pain or pressure, unusual shortness of breath, severe dizziness, or a fever above 38.5°C (101.3°F)?
8
+ Is there anything else you would like the doctor to know before your appointment? Thank you — the doctor will review all of this information.
9
+
@@ -0,0 +1,9 @@
1
+ Bonjour, je suis l'assistant d'accueil clinique du RAMQ. Je vais vous poser quelques questions pour préparer votre consultation. Quel est le motif de votre visite aujourd'hui?
2
+ Depuis quand avez-vous ces symptômes? Sont-ils apparus soudainement ou progressivement?
3
+ Sur une échelle de 0 à 10, où 0 signifie aucune douleur et 10 la pire douleur imaginable, comment évaluez-vous votre inconfort actuel?
4
+ Avez-vous déjà eu ce problème de santé auparavant? Si oui, quel diagnostic ou traitement aviez-vous reçu?
5
+ Prenez-vous actuellement des médicaments, qu'ils soient prescrits ou en vente libre? Avez-vous des allergies médicamenteuses ou autres?
6
+ Avez-vous des antécédents médicaux importants comme le diabète, l'hypertension, des problèmes cardiaques ou respiratoires?
7
+ Avez-vous ressenti l'un des symptômes suivants récemment: douleur ou pression thoracique, essoufflement inhabituel, étourdissements sévères, ou fièvre supérieure à 38,5°C?
8
+ Y a-t-il autre chose que vous aimeriez que le médecin sache avant votre rendez-vous? Merci de votre collaboration, le médecin examinera toutes ces informations.
9
+
@@ -0,0 +1,73 @@
1
+ # ============================================================
2
+ # Piste — Environment Variables
3
+ # Copy to .env and fill in your API keys
4
+ # ============================================================
5
+
6
+ # --- App ---
7
+ APP_NAME=Piste
8
+ APP_VERSION=0.1.0
9
+ DEBUG=true
10
+
11
+ # --- Server ---
12
+ HOST=0.0.0.0
13
+ PORT=8000
14
+
15
+ # --- CORS ---
16
+ CORS_ORIGINS=["http://localhost:3000"]
17
+
18
+ # --- Database (PostgreSQL 16) ---
19
+ DATABASE_URL=postgresql+asyncpg://piste:piste@localhost:5432/piste
20
+ DATABASE_URL_SYNC=postgresql+psycopg2://piste:piste@localhost:5432/piste
21
+
22
+ # --- Redis 7.2 ---
23
+ REDIS_URL=redis://localhost:6379/0
24
+
25
+ # --- FAISS ---
26
+ FAISS_INDEX_PATH=./data/faiss_evidence.index
27
+ FAISS_DIMENSION=1536
28
+
29
+ # --- LiteLLM / Model ---
30
+ LITELLM_MODEL=deepseek/deepseek-chat
31
+ LITELLM_FALLBACK_MODELS=["deepseek/deepseek-chat","claude-3-haiku-20240307"]
32
+
33
+ # --- API Keys (REQUIRED — get your own at the URLs below) ---
34
+ # DeepSeek: https://platform.deepseek.com/api_keys
35
+ DEEPSEEK_API_KEY=sk-your-deepseek-key-here
36
+
37
+ # OpenAI (optional fallback): https://platform.openai.com/api-keys
38
+ OPENAI_API_KEY=sk-your-openai-key-here
39
+
40
+ # Anthropic (optional fallback): https://console.anthropic.com/
41
+ ANTHROPIC_API_KEY=sk-ant-your-key-here
42
+
43
+ # --- Search Providers (at least one REQUIRED) ---
44
+ # Tavily: https://app.tavily.com/home
45
+ TAVILY_API_KEY=tvly-your-tavily-key-here
46
+
47
+ # Serper: https://serper.dev/
48
+ SERPER_API_KEY=your-serper-key-here
49
+
50
+ # Google CSE (optional): https://console.cloud.google.com/apis/library/customsearch.googleapis.com
51
+ GOOGLE_CSE_API_KEY=your-google-cse-key-here
52
+ GOOGLE_CSE_ID=your-search-engine-id-here
53
+
54
+ # --- DSPy ---
55
+ DSPY_OPTIMIZER=BootstrapFewShot
56
+ DSPY_MAX_LABELED_EXAMPLES=100
57
+
58
+ # --- Pipeline ---
59
+ VOTING_COMPLETIONS=3
60
+ VOTING_THRESHOLD=0.67
61
+ MAX_RETRY_LOOPS=3
62
+ FAISS_CACHE_THRESHOLD=0.92
63
+
64
+ # --- Idempotency ---
65
+ VERDICT_CACHE_TTL_SECONDS=86400
66
+ IDEMPOTENCY_LOCK_TTL_SECONDS=3600
67
+
68
+ # --- Rate Limiting ---
69
+ RATE_LIMIT_PER_USER=10
70
+ RATE_LIMIT_GLOBAL=100
71
+
72
+ # --- Auth ---
73
+ JWT_SECRET_KEY=change-me-in-production-use-a-real-secret
@@ -0,0 +1,4 @@
1
+ # Copyright (c) 2026 Jinan Kordab
2
+ # SPDX-License-Identifier: MIT
3
+
4
+ # Core package — config, security, dependencies
@@ -0,0 +1,83 @@
1
+ # Copyright (c) 2026 Jinan Kordab
2
+ # SPDX-License-Identifier: MIT
3
+
4
+ """
5
+ Piste — Centralized Settings
6
+ ================================
7
+ All configuration via environment variables + .env file.
8
+ Uses pydantic-settings for validation.
9
+ """
10
+
11
+ from pydantic_settings import BaseSettings
12
+ from typing import List
13
+
14
+
15
+ class Settings(BaseSettings):
16
+ model_config = {"env_file": ".env", "env_file_encoding": "utf-8", "extra": "ignore"}
17
+
18
+ # --- App ---
19
+ APP_NAME: str = "Piste"
20
+ APP_VERSION: str = "0.1.0"
21
+ DEBUG: bool = False
22
+
23
+ # --- Server ---
24
+ HOST: str = "0.0.0.0"
25
+ PORT: int = 8000
26
+
27
+ # --- CORS ---
28
+ CORS_ORIGINS: List[str] = ["http://localhost:3000"]
29
+
30
+ # --- Database (PostgreSQL 16) ---
31
+ DATABASE_URL: str = "postgresql+asyncpg://piste:piste@localhost:5432/piste"
32
+ DATABASE_URL_SYNC: str = "postgresql+psycopg2://piste:piste@localhost:5432/piste"
33
+
34
+ # --- Redis 7.2 ---
35
+ REDIS_URL: str = "redis://localhost:6379/0"
36
+
37
+ # --- FAISS ---
38
+ FAISS_INDEX_PATH: str = "./data/faiss_evidence.index"
39
+ FAISS_DIMENSION: int = 1536 # OpenAI embedding dimension
40
+
41
+ # --- LiteLLM ---
42
+ LITELLM_MODEL: str = "deepseek/deepseek-chat"
43
+ LITELLM_FALLBACK_MODELS: List[str] = ["deepseek/deepseek-chat", "claude-3-haiku-20240307"]
44
+ LITELLM_REQUEST_TIMEOUT: int = 600
45
+ OPENAI_API_KEY: str = ""
46
+ ANTHROPIC_API_KEY: str = ""
47
+ DEEPSEEK_API_KEY: str = ""
48
+
49
+ # --- DSPy ---
50
+ DSPY_OPTIMIZER: str = "BootstrapFewShot" # or "MIPROv2"
51
+ DSPY_MAX_LABELED_EXAMPLES: int = 100
52
+
53
+ # --- Pipeline ---
54
+ VOTING_COMPLETIONS: int = 3
55
+ VOTING_THRESHOLD: float = 0.67 # 2/3 majority
56
+ MAX_RETRY_LOOPS: int = 3 # Loop 1 max retries
57
+ FAISS_CACHE_THRESHOLD: float = 0.92 # Cosine similarity for cache hit
58
+
59
+ # --- Idempotency ---
60
+ VERDICT_CACHE_TTL_SECONDS: int = 86400 # 24 hours
61
+ IDEMPOTENCY_LOCK_TTL_SECONDS: int = 3600 # 1 hour
62
+
63
+ # --- Rate Limiting ---
64
+ RATE_LIMIT_PER_USER: int = 10 # claims per minute
65
+ RATE_LIMIT_GLOBAL: int = 100 # claims per minute
66
+
67
+ # --- Auth ---
68
+ JWT_SECRET_KEY: str = "change-me-in-production"
69
+ JWT_ALGORITHM: str = "HS256"
70
+
71
+ # --- Observability ---
72
+ LANGSMITH_API_KEY: str = ""
73
+ LANGSMITH_PROJECT: str = "piste"
74
+ PROMETHEUS_PORT: int = 9090
75
+
76
+ # --- Search Providers ---
77
+ TAVILY_API_KEY: str = ""
78
+ SERPER_API_KEY: str = ""
79
+ GOOGLE_CSE_API_KEY: str = ""
80
+ GOOGLE_CSE_ID: str = ""
81
+
82
+
83
+ settings = Settings()
@@ -0,0 +1,16 @@
1
+ # Copyright (c) 2026 Jinan Kordab
2
+ # SPDX-License-Identifier: MIT
3
+
4
+ """Debug logging for pipeline tracing."""
5
+ import os
6
+ from datetime import datetime
7
+
8
+ DEBUG_LOG = "/tmp/piste_debug.log"
9
+
10
+ def log(msg: str):
11
+ """Write timestamped debug message to log file."""
12
+ ts = datetime.utcnow().strftime("%H:%M:%S.%f")[:-3]
13
+ with open(DEBUG_LOG, "a") as f:
14
+ f.write(f"[{ts}] {msg}\n")
15
+ # Also print for docker logs
16
+ print(f"[DEBUG] {msg}", flush=True)
@@ -0,0 +1,40 @@
1
+ # Copyright (c) 2026 Jinan Kordab
2
+ # SPDX-License-Identifier: MIT
3
+
4
+ """
5
+ Rate Limiter Middleware
6
+ ========================
7
+ Token-bucket rate limiting per user and global.
8
+ Uses Redis for distributed rate limiting.
9
+
10
+ Limits:
11
+ - 10 claims/minute per user
12
+ - 100 claims/minute global
13
+ """
14
+
15
+ from fastapi import Request, HTTPException
16
+ from starlette.middleware.base import BaseHTTPMiddleware
17
+ from app.services.cache import redis_client
18
+ from app.core.config import settings
19
+
20
+
21
+ class RateLimitMiddleware(BaseHTTPMiddleware):
22
+ """Rate limiting middleware for API endpoints."""
23
+
24
+ async def dispatch(self, request: Request, call_next):
25
+ # Only rate-limit POST /api/v1/claims
26
+ if request.url.path == "/api/v1/claims" and request.method == "POST":
27
+ # Extract user identity (placeholder — use JWT in production)
28
+ user_id = request.headers.get("x-user-id", "anonymous")
29
+ client_ip = request.client.host if request.client else "unknown"
30
+ rate_key = f"rate:{user_id}:{client_ip}"
31
+
32
+ allowed = await redis_client.check_rate_limit(rate_key)
33
+ if not allowed:
34
+ raise HTTPException(
35
+ status_code=429,
36
+ detail="Rate limit exceeded. Please wait before submitting another claim.",
37
+ )
38
+
39
+ response = await call_next(request)
40
+ return response
@@ -0,0 +1,301 @@
1
+ """
2
+ Piste Bridge — stdin/stdout JSON worker for MCP server.
3
+
4
+ Usage: python bridge_piste.py
5
+
6
+ Reads JSON requests from stdin, processes them using the real Piste DSPy pipeline,
7
+ writes JSON responses to stdout.
8
+
9
+ IMPORTANT: This bridge uses the REAL pipeline modules (pipeline/stage1-4) directly.
10
+ It does NOT require PostgreSQL, Redis, or Docker. The pipeline stages use DSPy + LiteLLM
11
+ for LLM calls and Tavily/Serper for web search. Results are returned as JSON.
12
+
13
+ Protocol:
14
+ Input: {"id": 1, "action": "fact_check", "params": {"claim_text": "...", "locale": "en"}}
15
+ Output: {"id": 1, "result": {...}} or {"id": 1, "error": "message"}
16
+
17
+ Actions:
18
+ - fact_check: Run the full 4-stage fact-checking pipeline
19
+ - health: Returns {"status": "ok"}
20
+ """
21
+
22
+ import sys, importlib, json, asyncio, os
23
+
24
+ # ── Verify dependencies (installed by MCP server on startup) ─────
25
+ REQUIRED = {'dspy': 'dspy-ai', 'litellm': 'litellm', 'dotenv': 'python-dotenv'}
26
+ _missing = [mod for mod in REQUIRED if not importlib.util.find_spec(mod)]
27
+ if _missing:
28
+ sys.stderr.write(f'[piste] FATAL: missing packages: {", ".join(_missing)}. '
29
+ f'The MCP server should have installed them.\n')
30
+ sys.stderr.flush()
31
+ sys.exit(1)
32
+
33
+ # Ensure piste directory is on sys.path
34
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
35
+
36
+ # Load environment from .env if present
37
+ try:
38
+ from dotenv import load_dotenv
39
+ load_dotenv()
40
+ except ImportError:
41
+ pass
42
+
43
+ os.environ.setdefault('PISTE_LOG_LEVEL', 'WARNING')
44
+
45
+ # ── Pipeline imports (lazy) ────────────────────────────────────────────
46
+
47
+ _initialized = False
48
+ _loop = None
49
+
50
+ def get_loop():
51
+ global _loop
52
+ if _loop is None or _loop.is_closed():
53
+ _loop = asyncio.new_event_loop()
54
+ asyncio.set_event_loop(_loop)
55
+ return _loop
56
+
57
+
58
+ def init_pipeline():
59
+ """Configure DSPy with the API key from environment."""
60
+ global _initialized
61
+ if _initialized:
62
+ return
63
+
64
+ from pipeline.compiler import configure_dspy
65
+ configure_dspy()
66
+ _initialized = True
67
+
68
+
69
+ async def run_fact_check(claim_text: str, locale: str = "en", context: str = None) -> dict:
70
+ """
71
+ Run the full 4-stage Piste fact-checking pipeline.
72
+ Uses REAL DSPy modules from the cloned piste repo.
73
+ """
74
+ import dspy
75
+ import uuid
76
+
77
+ run_id = str(uuid.uuid4())[:12]
78
+
79
+ # ── Stage 1: Check-Worthiness + Atomic Decomposition ──────────
80
+ from pipeline.stage1.check_worthiness import CheckWorthinessDetector
81
+ from pipeline.stage1.atomic_decomposer import AtomicClaimDecomposer
82
+
83
+ cw_detector = CheckWorthinessDetector()
84
+ decomposer = AtomicClaimDecomposer()
85
+
86
+ # Check if the claim is worth fact-checking
87
+ cw_result = cw_detector(claim_text)
88
+ cw_label = getattr(cw_result, 'label', 'CFC') if hasattr(cw_result, 'label') else str(cw_result)
89
+ cw_score = getattr(cw_result, 'score', 1.0) if hasattr(cw_result, 'score') else 1.0
90
+
91
+ # Decompose into atomic claims
92
+ atomic_claims = []
93
+ try:
94
+ decomp_result = decomposer(claim_text)
95
+ if hasattr(decomp_result, 'claims'):
96
+ atomic_claims = decomp_result.claims
97
+ elif isinstance(decomp_result, list):
98
+ atomic_claims = decomp_result
99
+ except Exception:
100
+ atomic_claims = [claim_text]
101
+
102
+ # ── Stage 2: Blind Web Retrieval ──────────────────────────────
103
+ from pipeline.stage2.search_decision import SearchDecisionGenerator
104
+ from pipeline.stage2.blind_retriever import BlindRetriever
105
+
106
+ sd_generator = SearchDecisionGenerator()
107
+ retriever = BlindRetriever()
108
+
109
+ all_sources = []
110
+ search_needed = True
111
+
112
+ try:
113
+ sd_result = sd_generator(claim_text)
114
+ search_needed = getattr(sd_result, 'search_needed', True) if hasattr(sd_result, 'search_needed') else True
115
+
116
+ if search_needed:
117
+ # Generate neutral search queries (blind — never sees the claim)
118
+ queries = []
119
+ if hasattr(sd_result, 'queries'):
120
+ queries = sd_result.queries
121
+ elif hasattr(sd_result, 'search_queries'):
122
+ queries = sd_result.search_queries
123
+
124
+ if not queries:
125
+ # Fallback: generate a simple neutral query
126
+ queries = [f"fact check {locale} political claim"]
127
+
128
+ for query in queries[:5]: # Max 5 queries
129
+ try:
130
+ results = retriever(query, locale=locale)
131
+ if hasattr(results, 'sources'):
132
+ all_sources.extend(results.sources)
133
+ elif isinstance(results, list):
134
+ all_sources.extend(results)
135
+ except Exception:
136
+ pass
137
+ except Exception:
138
+ pass
139
+
140
+ # ── Stage 3: Per-Source Classification ────────────────────────
141
+ from pipeline.stage3.classifier import SourceClassifier
142
+
143
+ classifier = SourceClassifier()
144
+ classifications = []
145
+
146
+ for source in all_sources[:20]: # Max 20 sources
147
+ try:
148
+ # Extract source text
149
+ if isinstance(source, dict):
150
+ source_text = source.get('text', '') or source.get('content', '') or source.get('title', '')
151
+ source_url = source.get('url', '') or source.get('link', '')
152
+ source_title = source.get('title', '') or source_url
153
+ elif isinstance(source, str):
154
+ source_text = source
155
+ source_url = ''
156
+ source_title = source[:100]
157
+ else:
158
+ source_text = str(source)
159
+ source_url = ''
160
+ source_title = str(source)[:100]
161
+
162
+ if not source_text.strip():
163
+ continue
164
+
165
+ classification = classifier(claim_text, source_text)
166
+ label = getattr(classification, 'label', 'UNRELATED') if hasattr(classification, 'label') else 'UNRELATED'
167
+ confidence = getattr(classification, 'confidence', 0.5) if hasattr(classification, 'confidence') else 0.5
168
+ rationale = getattr(classification, 'rationale', '') if hasattr(classification, 'rationale') else ''
169
+
170
+ classifications.append({
171
+ "url": source_url,
172
+ "title": source_title[:200],
173
+ "classification": str(label).upper(),
174
+ "confidence": float(confidence),
175
+ "rationale": str(rationale)[:500],
176
+ })
177
+ except Exception:
178
+ classifications.append({
179
+ "url": source.get('url', '') if isinstance(source, dict) else '',
180
+ "title": source.get('title', '')[:200] if isinstance(source, dict) else '',
181
+ "classification": "UNRELATED",
182
+ "confidence": 0.0,
183
+ "rationale": "Classification failed",
184
+ })
185
+
186
+ # ── Stage 4: Verdict Aggregation ──────────────────────────────
187
+ from pipeline.stage4.verdict_aggregator import VerdictAggregator
188
+
189
+ aggregator = VerdictAggregator()
190
+
191
+ try:
192
+ verdict = aggregator(classifications, claim_text)
193
+ verdict_label = getattr(verdict, 'label', 'UNVERIFIABLE') if hasattr(verdict, 'label') else 'UNVERIFIABLE'
194
+ verdict_explanation = getattr(verdict, 'explanation', '') if hasattr(verdict, 'explanation') else ''
195
+ verdict_distribution = getattr(verdict, 'distribution', {}) if hasattr(verdict, 'distribution') else {}
196
+ except Exception:
197
+ # Fallback verdict
198
+ supports = sum(1 for c in classifications if c['classification'] == 'SUPPORTS')
199
+ refutes = sum(1 for c in classifications if c['classification'] == 'REFUTES')
200
+ total = len(classifications) or 1
201
+
202
+ if supports > refutes and supports > total * 0.5:
203
+ verdict_label = 'TRUE' if supports > total * 0.8 else 'MOSTLY_TRUE'
204
+ elif refutes > supports and refutes > total * 0.5:
205
+ verdict_label = 'FALSE' if refutes > total * 0.8 else 'MOSTLY_FALSE'
206
+ elif supports == refutes and total > 0:
207
+ verdict_label = 'HALF_TRUE'
208
+ else:
209
+ verdict_label = 'UNVERIFIABLE'
210
+
211
+ verdict_explanation = f"Based on {supports} supporting and {refutes} refuting sources out of {total} total."
212
+ verdict_distribution = {"TRUE": 0, "MOSTLY_TRUE": 0, "HALF_TRUE": 0, "MOSTLY_FALSE": 0, "FALSE": 0, "PANTS_ON_FIRE": 0, "UNVERIFIABLE": 0}
213
+
214
+ return {
215
+ "run_id": run_id,
216
+ "claim_id": run_id,
217
+ "verdict": {
218
+ "label": str(verdict_label).upper(),
219
+ "distribution": verdict_distribution if isinstance(verdict_distribution, dict) else {},
220
+ "explanation": str(verdict_explanation)[:2000],
221
+ "sources": classifications[:15],
222
+ },
223
+ "stage1": {
224
+ "check_worthy": str(cw_label),
225
+ "score": float(cw_score),
226
+ "atomic_claims": [str(c) for c in atomic_claims[:5]],
227
+ },
228
+ "stage2": {
229
+ "search_needed": bool(search_needed),
230
+ "sources_found": len(all_sources),
231
+ },
232
+ "audit_url": f"piste://claims/{run_id}",
233
+ "elapsed_ms": 0,
234
+ }
235
+
236
+
237
+ # ── Action Handlers ───────────────────────────────────────────────────
238
+
239
+ def handle_fact_check(params):
240
+ """Run the full fact-checking pipeline."""
241
+ claim_text = params.get("claim_text", "")
242
+ locale = params.get("locale", "en")
243
+ context = params.get("context", None)
244
+
245
+ if not claim_text or len(claim_text) < 10:
246
+ return {"error": "Claim text must be at least 10 characters"}
247
+
248
+ loop = get_loop()
249
+ init_pipeline()
250
+
251
+ result = loop.run_until_complete(run_fact_check(claim_text, locale, context))
252
+ return result
253
+
254
+
255
+ # ── Dispatcher ────────────────────────────────────────────────────────
256
+
257
+ ACTIONS = {
258
+ "fact_check": handle_fact_check,
259
+ "health": lambda p: {"status": "ok", "backend": "piste"},
260
+ }
261
+
262
+
263
+ def main():
264
+ # Send ready signal
265
+ sys.stdout.write("__READY__\n")
266
+ sys.stdout.flush()
267
+
268
+ for line in sys.stdin:
269
+ line = line.strip()
270
+ if not line:
271
+ continue
272
+
273
+ try:
274
+ request = json.loads(line)
275
+ except json.JSONDecodeError:
276
+ continue
277
+
278
+ req_id = request.get("id")
279
+ action = request.get("action", "")
280
+ params = request.get("params", {})
281
+
282
+ handler = ACTIONS.get(action)
283
+ if not handler:
284
+ result = {"id": req_id, "error": f"Unknown action: {action}"}
285
+ sys.stdout.write(json.dumps(result) + "\n")
286
+ sys.stdout.flush()
287
+ continue
288
+
289
+ try:
290
+ result_data = handler(params)
291
+ response = {"id": req_id, "result": result_data}
292
+ except Exception as e:
293
+ import traceback
294
+ response = {"id": req_id, "error": f"{type(e).__name__}: {str(e)}"}
295
+
296
+ sys.stdout.write(json.dumps(response, default=str) + "\n")
297
+ sys.stdout.flush()
298
+
299
+
300
+ if __name__ == "__main__":
301
+ main()
@@ -0,0 +1,4 @@
1
+ # Copyright (c) 2026 Jinan Kordab
2
+ # SPDX-License-Identifier: MIT
3
+
4
+ # Pipeline package — DSPy 2.6 fact-checking modules
@@ -0,0 +1,68 @@
1
+ # Copyright (c) 2026 Jinan Kordab
2
+ # SPDX-License-Identifier: MIT
3
+
4
+ """
5
+ DSPy Compiler Configuration
6
+ ============================
7
+ Offline re-optimization of pipeline modules using human-labeled feedback (Loop 3).
8
+ """
9
+
10
+ import dspy
11
+ from app.core.config import settings
12
+ from app.core.debuglog import log
13
+
14
+ _dspy_configured = False
15
+
16
+
17
+ def configure_dspy():
18
+ """Initialize DSPy with LiteLLM as the default language model."""
19
+ global _dspy_configured
20
+
21
+ model = settings.LITELLM_MODEL
22
+
23
+ # Route the correct API key based on model prefix
24
+ if "deepseek" in model.lower():
25
+ api_key = settings.DEEPSEEK_API_KEY
26
+ elif "claude" in model.lower() or "anthropic" in model.lower():
27
+ api_key = settings.ANTHROPIC_API_KEY
28
+ else:
29
+ api_key = settings.OPENAI_API_KEY
30
+
31
+ lm = dspy.LM(
32
+ model=model,
33
+ api_key=api_key,
34
+ )
35
+
36
+ if not _dspy_configured:
37
+ log(f"DSPY: first configure(), model={model}")
38
+ dspy.configure(lm=lm)
39
+ _dspy_configured = True
40
+ else:
41
+ log(f"DSPY: already configured, using dspy.context()")
42
+ dspy.context(lm=lm)
43
+
44
+ return lm
45
+
46
+
47
+ def compile_module(module: dspy.Module, trainset: list[dspy.Example]) -> dspy.Module:
48
+ """Run DSPy compiler (BootstrapFewShot or MIPROv2) on a module.
49
+
50
+ Loop 3 [C4][J1]: User feedback → labeled examples → re-optimization.
51
+ """
52
+ optimizer_name = settings.DSPY_OPTIMIZER
53
+
54
+ if optimizer_name == "BootstrapFewShot":
55
+ optimizer = dspy.BootstrapFewShot(
56
+ metric=None, # Use default metric or pass custom
57
+ max_labeled_demos=settings.DSPY_MAX_LABELED_EXAMPLES,
58
+ )
59
+ elif optimizer_name == "MIPROv2":
60
+ optimizer = dspy.MIPROv2(
61
+ metric=None,
62
+ num_threads=4,
63
+ )
64
+ else:
65
+ raise ValueError(f"Unknown DSPy optimizer: {optimizer_name}")
66
+
67
+ compiled = optimizer.compile(module, trainset=trainset)
68
+ return compiled
@@ -0,0 +1,28 @@
1
+ # Copyright (c) 2026 Jinan Kordab
2
+ # SPDX-License-Identifier: MIT
3
+
4
+ # Offline — VERIFAID Dataset Pipeline [J7]
5
+ # M1: Generate Claims (LLM, multilingual) — pipeline/offline/verifaid_pipeline.py
6
+ # M2: Enrich + Label + FAISS Index — pipeline/offline/verifaid_pipeline.py
7
+ # Scheduler + Loop 2 integration — pipeline/offline/verifaid_pipeline.py
8
+
9
+ from pipeline.offline.verifaid_pipeline import (
10
+ ClaimGenerator, claim_generator,
11
+ EvidenceEnricher, evidence_enricher,
12
+ OfflinePipelineScheduler, offline_scheduler,
13
+ ClaimGenerationSignature, EvidenceLabelingSignature,
14
+ TOPICS, LOCALES,
15
+ )
16
+
17
+ __all__ = [
18
+ "ClaimGenerator",
19
+ "claim_generator",
20
+ "EvidenceEnricher",
21
+ "evidence_enricher",
22
+ "OfflinePipelineScheduler",
23
+ "offline_scheduler",
24
+ "ClaimGenerationSignature",
25
+ "EvidenceLabelingSignature",
26
+ "TOPICS",
27
+ "LOCALES",
28
+ ]