mcp-agentic-pipelines 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +93 -0
- package/README.md +258 -0
- package/package.json +70 -0
- package/packages/clinical/package.json +22 -0
- package/packages/clinical/src/index.ts +262 -0
- package/packages/clinical/tsconfig.json +13 -0
- package/packages/core/package.json +21 -0
- package/packages/core/src/config.ts +138 -0
- package/packages/core/src/errors.ts +100 -0
- package/packages/core/src/index.ts +104 -0
- package/packages/core/src/llm-config.ts +213 -0
- package/packages/core/src/logging.ts +66 -0
- package/packages/core/src/python-bridge.ts +384 -0
- package/packages/core/src/rate-limiter.ts +136 -0
- package/packages/core/src/types.ts +203 -0
- package/packages/core/src/validation.ts +101 -0
- package/packages/core/tsconfig.json +10 -0
- package/packages/deeppipe/package.json +21 -0
- package/packages/deeppipe/src/index.ts +424 -0
- package/packages/deeppipe/tsconfig.json +13 -0
- package/packages/piste/package.json +20 -0
- package/packages/piste/src/index.ts +48 -0
- package/packages/piste/tsconfig.json +13 -0
- package/packages/precis/package.json +20 -0
- package/packages/precis/src/index.ts +67 -0
- package/packages/precis/tsconfig.json +13 -0
- package/packages/server/package.json +31 -0
- package/packages/server/src/index.ts +427 -0
- package/packages/server/tsconfig.json +17 -0
- package/setup.mjs +141 -0
- package/test.mjs +337 -0
- package/vendors/clinical-intake/pipeline.mjs +349 -0
- package/vendors/clinical-intake/questions/en.txt +9 -0
- package/vendors/clinical-intake/questions/fr.txt +9 -0
- package/vendors/piste/.env.example +73 -0
- package/vendors/piste/app/core/__init__.py +4 -0
- package/vendors/piste/app/core/config.py +83 -0
- package/vendors/piste/app/core/debuglog.py +16 -0
- package/vendors/piste/app/core/middleware.py +40 -0
- package/vendors/piste/bridge_piste.py +301 -0
- package/vendors/piste/pipeline/__init__.py +4 -0
- package/vendors/piste/pipeline/compiler.py +68 -0
- package/vendors/piste/pipeline/offline/__init__.py +28 -0
- package/vendors/piste/pipeline/offline/verifaid_pipeline.py +247 -0
- package/vendors/piste/pipeline/replay.py +15 -0
- package/vendors/piste/pipeline/replay_engine.py +249 -0
- package/vendors/piste/pipeline/signatures/__init__.py +4 -0
- package/vendors/piste/pipeline/signatures/signatures.py +136 -0
- package/vendors/piste/pipeline/stage1/__init__.py +21 -0
- package/vendors/piste/pipeline/stage1/atomic_decomposer.py +61 -0
- package/vendors/piste/pipeline/stage1/check_worthiness.py +100 -0
- package/vendors/piste/pipeline/stage1/orchestrator.py +175 -0
- package/vendors/piste/pipeline/stage1/test_stage1.py +162 -0
- package/vendors/piste/pipeline/stage2/__init__.py +34 -0
- package/vendors/piste/pipeline/stage2/blind_retriever.py +303 -0
- package/vendors/piste/pipeline/stage2/canonical_mapper.py +124 -0
- package/vendors/piste/pipeline/stage2/credibility_scorer.py +85 -0
- package/vendors/piste/pipeline/stage2/orchestrator.py +311 -0
- package/vendors/piste/pipeline/stage2/query_refiner.py +88 -0
- package/vendors/piste/pipeline/stage2/search_decision.py +69 -0
- package/vendors/piste/pipeline/stage2/test_stage2.py +265 -0
- package/vendors/piste/pipeline/stage3/__init__.py +20 -0
- package/vendors/piste/pipeline/stage3/classifier.py +79 -0
- package/vendors/piste/pipeline/stage3/orchestrator.py +225 -0
- package/vendors/piste/pipeline/stage3/test_stage3.py +101 -0
- package/vendors/piste/pipeline/stage4/__init__.py +33 -0
- package/vendors/piste/pipeline/stage4/criticality_gate.py +177 -0
- package/vendors/piste/pipeline/stage4/orchestrator.py +269 -0
- package/vendors/piste/pipeline/stage4/test_stage4.py +192 -0
- package/vendors/piste/pipeline/stage4/verdict_aggregator.py +157 -0
- package/vendors/piste/requirements.txt +53 -0
- package/vendors/precis/backend/__init__.py +6 -0
- package/vendors/precis/backend/agents/__init__.py +3 -0
- package/vendors/precis/backend/agents/data_synthesis.py +105 -0
- package/vendors/precis/backend/agents/dist_free_synth.py +97 -0
- package/vendors/precis/backend/agents/exact_hash_retriever.py +327 -0
- package/vendors/precis/backend/agents/fusion_ranker.py +64 -0
- package/vendors/precis/backend/agents/guardrail.py +175 -0
- package/vendors/precis/backend/agents/query_expander.py +89 -0
- package/vendors/precis/backend/agents/radial_interpol.py +99 -0
- package/vendors/precis/backend/agents/report_generator.py +92 -0
- package/vendors/precis/backend/agents/semantic_reranker.py +135 -0
- package/vendors/precis/backend/agents/stat_anomaly.py +93 -0
- package/vendors/precis/backend/agents/vector_index.py +123 -0
- package/vendors/precis/backend/agents/veri_score.py +341 -0
- package/vendors/precis/backend/agents/work_order_extractor.py +205 -0
- package/vendors/precis/backend/api/__init__.py +3 -0
- package/vendors/precis/backend/api/routes/__init__.py +3 -0
- package/vendors/precis/backend/config.py +88 -0
- package/vendors/precis/backend/core/__init__.py +13 -0
- package/vendors/precis/backend/core/hashing.py +22 -0
- package/vendors/precis/backend/core/metrics.py +77 -0
- package/vendors/precis/backend/core/multitoken.py +166 -0
- package/vendors/precis/backend/core/pmi.py +54 -0
- package/vendors/precis/backend/core/stemming.py +74 -0
- package/vendors/precis/backend/core/tracing.py +150 -0
- package/vendors/precis/backend/data/__init__.py +3 -0
- package/vendors/precis/backend/data/chunker.py +57 -0
- package/vendors/precis/backend/data/pdf_parser.py +42 -0
- package/vendors/precis/backend/db/__init__.py +3 -0
- package/vendors/precis/backend/db/models.py +173 -0
- package/vendors/precis/backend/db/repository.py +269 -0
- package/vendors/precis/backend/llm/__init__.py +3 -0
- package/vendors/precis/backend/llm/anthropic_provider.py +39 -0
- package/vendors/precis/backend/llm/base.py +147 -0
- package/vendors/precis/backend/llm/deepseek_provider.py +43 -0
- package/vendors/precis/backend/llm/factory.py +60 -0
- package/vendors/precis/backend/llm/google_provider.py +39 -0
- package/vendors/precis/backend/llm/ollama_provider.py +54 -0
- package/vendors/precis/backend/llm/openai_provider.py +50 -0
- package/vendors/precis/backend/main.py +677 -0
- package/vendors/precis/backend/orchestrator/__init__.py +3 -0
- package/vendors/precis/backend/orchestrator/planner.py +81 -0
- package/vendors/precis/backend/orchestrator/router.py +319 -0
- package/vendors/precis/backend/orchestrator/types.py +58 -0
- package/vendors/precis/bridge_precis.py +185 -0
- package/vendors/precis/data/sample_reports/README.md +8 -0
- package/vendors/precis/data/seed_data.py +115 -0
- package/vendors/precis/requirements.txt +19 -0
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""© JINAN KORDAB — 2026 AI HYBRID AGENTIC RETRIEVAL-AUGMENTED GENERATION RAG PIPELINE - PERSONAL PROJECT"""
|
|
2
|
+
|
|
3
|
+
import sys, os
|
|
4
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'backend'))
|
|
5
|
+
|
|
6
|
+
from backend.agents.exact_hash_retriever import NestedHashIndex, MultiToken
|
|
7
|
+
from backend.core.stemming import PrecisStemmer
|
|
8
|
+
from backend.core.multitoken import MultiTokenExtractor
|
|
9
|
+
from backend.db.repository import init_db, register_document
|
|
10
|
+
import hashlib
|
|
11
|
+
|
|
12
|
+
# Sample financial documents — realistic 10-K excerpts for demo
|
|
13
|
+
DEMO_DOCUMENTS = {
|
|
14
|
+
"annual_report_2025.txt": """
|
|
15
|
+
RISK FACTORS
|
|
16
|
+
|
|
17
|
+
Our business is subject to numerous risks and uncertainties. The following risk factors could materially affect our financial condition and results of operations.
|
|
18
|
+
|
|
19
|
+
Financial Market Risk: Our investment portfolio is exposed to interest rate fluctuations and credit market volatility. A 100 basis point increase in interest rates could reduce our portfolio value by approximately $450 million. We maintain a diversified investment strategy to mitigate concentration risk.
|
|
20
|
+
|
|
21
|
+
Revenue Concentration Risk: In fiscal year 2025, our top three customers accounted for 42% of total revenue. The loss of any major customer could significantly impact operating results. Customer A represented 22% of revenue, Customer B 12%, and Customer C 8%. We are actively diversifying our customer base through geographic expansion and new product offerings.
|
|
22
|
+
|
|
23
|
+
Regulatory Compliance Risk: Our operations are subject to extensive regulation across multiple jurisdictions. Recent changes in data privacy laws, including the amended CCPA and new EU AI Act provisions, require significant compliance investments estimated at $15-20 million annually. Non-compliance could result in penalties up to 4% of global revenue.
|
|
24
|
+
|
|
25
|
+
Cybersecurity Risk: We experienced two material security incidents in 2024 affecting approximately 3.2 million customer records. Remediation costs totaled $78 million, including system upgrades, legal fees, and regulatory fines. We have since implemented zero-trust architecture and enhanced encryption protocols across all systems.
|
|
26
|
+
|
|
27
|
+
Supply Chain Disruption Risk: Semiconductor shortages continue to impact our manufacturing operations. Lead times for critical components have extended from 8 weeks to 26 weeks. We have diversified suppliers from 3 to 7 vendors and invested $200 million in strategic inventory reserves.
|
|
28
|
+
|
|
29
|
+
Foreign Exchange Risk: With 35% of revenue generated outside the United States, currency fluctuations materially impact reported results. A 10% strengthening of the US dollar would reduce annual revenue by approximately $280 million. We hedge approximately 60% of forecasted foreign currency exposure.
|
|
30
|
+
|
|
31
|
+
Climate Transition Risk: Evolving climate regulations and carbon pricing mechanisms could increase operational costs. Our manufacturing facilities in jurisdictions with carbon taxes face an estimated $50 million annual cost increase by 2027. We have committed to carbon neutrality by 2030 with $500 million allocated to renewable energy transitions.
|
|
32
|
+
""",
|
|
33
|
+
|
|
34
|
+
"earnings_transcript_q3.txt": """
|
|
35
|
+
Q3 2025 EARNINGS CALL TRANSCRIPT
|
|
36
|
+
|
|
37
|
+
OPERATOR: Good morning and welcome to the Q3 2025 earnings conference call.
|
|
38
|
+
|
|
39
|
+
CEO STATEMENT: Revenue grew 14% year-over-year to $4.2 billion, driven by strong performance in cloud services and AI infrastructure segments. However, we faced headwinds in consumer electronics where revenue declined 8% due to softening demand and increased competition. Operating margins expanded 120 basis points to 24.3%.
|
|
40
|
+
|
|
41
|
+
CFO STATEMENT: Our balance sheet remains strong with $8.5 billion in cash and equivalents. Free cash flow generation was $1.1 billion in Q3, up 22% year-over-year. We repurchased $500 million in shares and increased our quarterly dividend by 15%. Looking ahead, we expect Q4 revenue between $4.0 and $4.3 billion, reflecting normal seasonal patterns and ongoing supply chain constraints.
|
|
42
|
+
|
|
43
|
+
ANALYST Q&A: Regarding the semiconductor supply situation — we've secured additional capacity from two new suppliers in Taiwan and Germany. This should reduce our lead time exposure by approximately 40% by Q2 2026. On the AI infrastructure side, demand continues to exceed our capacity to deliver. Our order backlog in this segment has grown to $3.2 billion.
|
|
44
|
+
|
|
45
|
+
RISK COMMENTARY: We are monitoring three key risk areas: first, the ongoing trade tensions affecting our supply chain in Asia; second, the regulatory environment around AI model deployment which could impact our fastest-growing segment; and third, talent retention in our engineering organization where attrition has increased to 12% from the historical 7-8% range.
|
|
46
|
+
""",
|
|
47
|
+
|
|
48
|
+
"guidance_2026.txt": """
|
|
49
|
+
FORWARD-LOOKING GUIDANCE — FISCAL YEAR 2026
|
|
50
|
+
|
|
51
|
+
This document contains forward-looking statements based on current expectations and assumptions. Actual results may differ materially.
|
|
52
|
+
|
|
53
|
+
REVENUE OUTLOOK: We expect full-year revenue of $17.5-18.5 billion, representing 8-14% growth. Cloud and AI infrastructure segments are expected to grow 25-35%, while consumer electronics may decline 5-10% before stabilizing in H2 2026.
|
|
54
|
+
|
|
55
|
+
MARGIN EXPECTATIONS: Gross margins are projected at 42-44%, reflecting improved product mix toward higher-margin cloud services offset by continued component cost pressure. Operating margins are expected to reach 25-26% through operating leverage and cost optimization initiatives.
|
|
56
|
+
|
|
57
|
+
CAPITAL ALLOCATION: Capital expenditures of $2.8-3.2 billion focused on AI infrastructure buildout and supply chain diversification. We expect to return $2.0 billion to shareholders through buybacks and dividends, subject to market conditions.
|
|
58
|
+
|
|
59
|
+
KEY ASSUMPTIONS AND RISKS: Our guidance assumes no material deterioration in US-China trade relations, stable foreign exchange rates at current levels, and successful qualification of new semiconductor suppliers by Q2 2026. Key risks include: extended semiconductor shortages beyond Q2 2026, broader economic slowdown affecting enterprise IT spending, regulatory actions affecting AI model deployment, and cybersecurity incidents exceeding our risk tolerance thresholds.
|
|
60
|
+
|
|
61
|
+
SENSITIVITY ANALYSIS: A 100bps change in interest rates impacts net interest income by approximately $80 million. A 10% currency move impacts revenue by approximately $280 million. Each week of semiconductor supply disruption beyond our buffer stock impacts revenue by approximately $45 million.
|
|
62
|
+
"""
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def seed_all():
|
|
67
|
+
"""Populate the demo index with sample financial documents."""
|
|
68
|
+
init_db()
|
|
69
|
+
stemmer = PrecisStemmer()
|
|
70
|
+
extractor = MultiTokenExtractor(max_token_length=7, min_token_length=2)
|
|
71
|
+
index = NestedHashIndex()
|
|
72
|
+
|
|
73
|
+
total_tokens = 0
|
|
74
|
+
for filename, text in DEMO_DOCUMENTS.items():
|
|
75
|
+
file_hash = hashlib.sha256(text.encode()).hexdigest()
|
|
76
|
+
|
|
77
|
+
# Cache original text for context retrieval
|
|
78
|
+
index._doc_texts[filename] = text
|
|
79
|
+
|
|
80
|
+
# Parse document into elements
|
|
81
|
+
lines = text.strip().split("\n")
|
|
82
|
+
parsed = [{"page_number": 1, "elements": []}]
|
|
83
|
+
for line in lines:
|
|
84
|
+
line = line.strip()
|
|
85
|
+
if not line:
|
|
86
|
+
continue
|
|
87
|
+
is_title = line.isupper() and len(line) > 5 and len(line) < 80
|
|
88
|
+
parsed[0]["elements"].append({
|
|
89
|
+
"text": line,
|
|
90
|
+
"font_size": 18.0 if is_title else 12.0,
|
|
91
|
+
"is_title": is_title,
|
|
92
|
+
"is_header": line.endswith(":") or is_title
|
|
93
|
+
})
|
|
94
|
+
|
|
95
|
+
# Extract multi-tokens
|
|
96
|
+
count = extractor.index_document(filename, parsed, index)
|
|
97
|
+
total_tokens += count
|
|
98
|
+
|
|
99
|
+
# Register in DB
|
|
100
|
+
register_document(filename, file_hash, 1, count)
|
|
101
|
+
|
|
102
|
+
print(f"[Seed] Indexed {total_tokens} multi-tokens from {len(DEMO_DOCUMENTS)} documents")
|
|
103
|
+
return index, total_tokens
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
# Module-level singleton — populated on first import
|
|
107
|
+
_demo_index: NestedHashIndex = None
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def get_demo_index() -> NestedHashIndex:
|
|
111
|
+
"""Get or create the demo index. Thread-safe lazy init."""
|
|
112
|
+
global _demo_index
|
|
113
|
+
if _demo_index is None:
|
|
114
|
+
_demo_index, _ = seed_all()
|
|
115
|
+
return _demo_index
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# Precis — Multi-Agent RAG Platform
|
|
2
|
+
# Dual retrieval: hash keyword + FAISS vector + LLM synthesis
|
|
3
|
+
# LLM: DeepSeek (OpenAI-compatible API)
|
|
4
|
+
|
|
5
|
+
fastapi
|
|
6
|
+
uvicorn[standard]
|
|
7
|
+
pydantic
|
|
8
|
+
pydantic-settings
|
|
9
|
+
numpy
|
|
10
|
+
pandas
|
|
11
|
+
nltk
|
|
12
|
+
sqlalchemy
|
|
13
|
+
httpx
|
|
14
|
+
openai
|
|
15
|
+
pymupdf
|
|
16
|
+
python-multipart
|
|
17
|
+
websockets
|
|
18
|
+
faiss-cpu
|
|
19
|
+
sentence-transformers
|