cascadeflow 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cascadeflow-0.1.1/LICENSE +21 -0
- cascadeflow-0.1.1/PKG-INFO +636 -0
- cascadeflow-0.1.1/README.md +561 -0
- cascadeflow-0.1.1/cascadeflow/__init__.py +288 -0
- cascadeflow-0.1.1/cascadeflow/agent.py +1939 -0
- cascadeflow-0.1.1/cascadeflow.egg-info/PKG-INFO +636 -0
- cascadeflow-0.1.1/cascadeflow.egg-info/SOURCES.txt +41 -0
- cascadeflow-0.1.1/cascadeflow.egg-info/dependency_links.txt +1 -0
- cascadeflow-0.1.1/cascadeflow.egg-info/requires.txt +58 -0
- cascadeflow-0.1.1/cascadeflow.egg-info/top_level.txt +1 -0
- cascadeflow-0.1.1/pyproject.toml +215 -0
- cascadeflow-0.1.1/setup.cfg +4 -0
- cascadeflow-0.1.1/tests/test_agent.py +590 -0
- cascadeflow-0.1.1/tests/test_anthropic.py +114 -0
- cascadeflow-0.1.1/tests/test_caching.py +134 -0
- cascadeflow-0.1.1/tests/test_callbacks.py +143 -0
- cascadeflow-0.1.1/tests/test_cascade_executor.py +453 -0
- cascadeflow-0.1.1/tests/test_cascade_pipeline.py +544 -0
- cascadeflow-0.1.1/tests/test_complexity.py +136 -0
- cascadeflow-0.1.1/tests/test_config.py +236 -0
- cascadeflow-0.1.1/tests/test_cost_tracker.py +613 -0
- cascadeflow-0.1.1/tests/test_domain_detection.py +657 -0
- cascadeflow-0.1.1/tests/test_enforcement.py +527 -0
- cascadeflow-0.1.1/tests/test_exceptions.py +57 -0
- cascadeflow-0.1.1/tests/test_forecasting_anomaly.py +327 -0
- cascadeflow-0.1.1/tests/test_groq.py +183 -0
- cascadeflow-0.1.1/tests/test_hf_api.py +48 -0
- cascadeflow-0.1.1/tests/test_litellm_integration.py +384 -0
- cascadeflow-0.1.1/tests/test_ml_integration.py +346 -0
- cascadeflow-0.1.1/tests/test_ollama.py +289 -0
- cascadeflow-0.1.1/tests/test_openai.py +180 -0
- cascadeflow-0.1.1/tests/test_otel_integration.py +315 -0
- cascadeflow-0.1.1/tests/test_providers.py +0 -0
- cascadeflow-0.1.1/tests/test_reasoning_models.py +747 -0
- cascadeflow-0.1.1/tests/test_routing.py +596 -0
- cascadeflow-0.1.1/tests/test_semantic_quality.py +446 -0
- cascadeflow-0.1.1/tests/test_semantic_quality_simple.py +327 -0
- cascadeflow-0.1.1/tests/test_together.py +86 -0
- cascadeflow-0.1.1/tests/test_tool_calling.py +714 -0
- cascadeflow-0.1.1/tests/test_tool_integration.py +209 -0
- cascadeflow-0.1.1/tests/test_unified_embedding.py +352 -0
- cascadeflow-0.1.1/tests/test_utils.py +50 -0
- cascadeflow-0.1.1/tests/test_vllm.py +208 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Lemony Inc.
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,636 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cascadeflow
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Smart AI model cascading for cost optimization - Save 40-85% on LLM costs with 2-6x faster responses. Available for Python and TypeScript/JavaScript.
|
|
5
|
+
Author-email: "Lemony Inc." <hello@lemony.ai>
|
|
6
|
+
Maintainer-email: "Lemony Inc." <hello@lemony.ai>
|
|
7
|
+
License: MIT
|
|
8
|
+
Project-URL: Homepage, https://lemony.ai
|
|
9
|
+
Project-URL: Documentation, https://docs.lemony.ai/cascadeflow
|
|
10
|
+
Project-URL: Repository, https://github.com/lemony-ai/cascadeflow
|
|
11
|
+
Project-URL: Bug Tracker, https://github.com/lemony-ai/cascadeflow/issues
|
|
12
|
+
Project-URL: Changelog, https://github.com/lemony-ai/cascadeflow/blob/main/CHANGELOG.md
|
|
13
|
+
Keywords: ai,llm,cost-optimization,model-routing,cascade,inference,openai,anthropic,gpt,claude,machine-learning,groq,typescript,javascript,browser,edge-functions
|
|
14
|
+
Classifier: Development Status :: 4 - Beta
|
|
15
|
+
Classifier: Intended Audience :: Developers
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
22
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
23
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
24
|
+
Classifier: Operating System :: OS Independent
|
|
25
|
+
Requires-Python: >=3.9
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
License-File: LICENSE
|
|
28
|
+
Requires-Dist: pydantic>=2.0.0
|
|
29
|
+
Requires-Dist: httpx>=0.25.0
|
|
30
|
+
Requires-Dist: tiktoken>=0.5.0
|
|
31
|
+
Provides-Extra: openai
|
|
32
|
+
Requires-Dist: openai>=1.0.0; extra == "openai"
|
|
33
|
+
Provides-Extra: anthropic
|
|
34
|
+
Requires-Dist: anthropic>=0.8.0; extra == "anthropic"
|
|
35
|
+
Provides-Extra: groq
|
|
36
|
+
Requires-Dist: groq>=0.4.0; extra == "groq"
|
|
37
|
+
Provides-Extra: huggingface
|
|
38
|
+
Requires-Dist: huggingface-hub>=0.19.0; extra == "huggingface"
|
|
39
|
+
Provides-Extra: together
|
|
40
|
+
Requires-Dist: together>=0.2.0; extra == "together"
|
|
41
|
+
Provides-Extra: vllm
|
|
42
|
+
Requires-Dist: vllm>=0.2.0; extra == "vllm"
|
|
43
|
+
Provides-Extra: providers
|
|
44
|
+
Requires-Dist: openai>=1.0.0; extra == "providers"
|
|
45
|
+
Requires-Dist: anthropic>=0.8.0; extra == "providers"
|
|
46
|
+
Requires-Dist: groq>=0.4.0; extra == "providers"
|
|
47
|
+
Provides-Extra: local
|
|
48
|
+
Requires-Dist: vllm>=0.2.0; extra == "local"
|
|
49
|
+
Provides-Extra: semantic
|
|
50
|
+
Requires-Dist: sentence-transformers>=2.2.0; extra == "semantic"
|
|
51
|
+
Provides-Extra: dev
|
|
52
|
+
Requires-Dist: pytest>=7.4.0; extra == "dev"
|
|
53
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
|
|
54
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
|
|
55
|
+
Requires-Dist: pytest-mock>=3.12.0; extra == "dev"
|
|
56
|
+
Requires-Dist: black>=23.0.0; extra == "dev"
|
|
57
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
58
|
+
Requires-Dist: mypy>=1.5.0; extra == "dev"
|
|
59
|
+
Requires-Dist: isort>=5.12.0; extra == "dev"
|
|
60
|
+
Requires-Dist: pre-commit>=3.5.0; extra == "dev"
|
|
61
|
+
Requires-Dist: rich>=13.0.0; extra == "dev"
|
|
62
|
+
Provides-Extra: docs
|
|
63
|
+
Requires-Dist: mkdocs>=1.5.0; extra == "docs"
|
|
64
|
+
Requires-Dist: mkdocs-material>=9.4.0; extra == "docs"
|
|
65
|
+
Requires-Dist: mkdocstrings[python]>=0.23.0; extra == "docs"
|
|
66
|
+
Provides-Extra: all
|
|
67
|
+
Requires-Dist: openai>=1.0.0; extra == "all"
|
|
68
|
+
Requires-Dist: anthropic>=0.8.0; extra == "all"
|
|
69
|
+
Requires-Dist: groq>=0.4.0; extra == "all"
|
|
70
|
+
Requires-Dist: huggingface-hub>=0.19.0; extra == "all"
|
|
71
|
+
Requires-Dist: together>=0.2.0; extra == "all"
|
|
72
|
+
Requires-Dist: vllm>=0.2.0; extra == "all"
|
|
73
|
+
Requires-Dist: sentence-transformers>=2.2.0; extra == "all"
|
|
74
|
+
Dynamic: license-file
|
|
75
|
+
|
|
76
|
+
<div align="center">
|
|
77
|
+
|
|
78
|
+
<picture>
|
|
79
|
+
<source media="(prefers-color-scheme: dark)" srcset=".github/assets/CF_logo_bright.svg">
|
|
80
|
+
<source media="(prefers-color-scheme: light)" srcset=".github/assets/CF_logo_dark.svg">
|
|
81
|
+
<img alt="CascadeFlow Logo" src=".github/assets/CF_logo_dark.svg" width="400">
|
|
82
|
+
</picture>
|
|
83
|
+
|
|
84
|
+
# Smart AI model cascading for cost optimization
|
|
85
|
+
|
|
86
|
+
[](https://pypi.org/project/cascadeflow/)
|
|
87
|
+
[](https://www.npmjs.com/package/@cascadeflow/core)
|
|
88
|
+
[](https://www.npmjs.com/package/n8n-nodes-cascadeflow)
|
|
89
|
+
[](https://pypi.org/project/cascadeflow/)
|
|
90
|
+
[](./LICENSE)
|
|
91
|
+
[](https://pypi.org/project/cascadeflow/)
|
|
92
|
+
[](https://github.com/lemony-ai/cascadeflow)
|
|
93
|
+
[](https://github.com/lemony-ai/cascadeflow/actions/workflows/test.yml)
|
|
94
|
+
|
|
95
|
+
**[<img src=".github/assets/CF_python_color.svg" width="22" height="22" alt="Python" style="vertical-align: middle;"/> Python](#-python) • [<img src=".github/assets/CF_ts_color.svg" width="22" height="22" alt="TypeScript" style="vertical-align: middle;"/> TypeScript](#-typescript) • [<img src=".github/assets/CF_n8n_color.svg" width="22" height="22" alt="n8n" style="vertical-align: middle;"/> n8n](#-n8n-integration) • [📖 Docs](./docs/) • [💡 Examples](#examples)**
|
|
96
|
+
|
|
97
|
+
</div>
|
|
98
|
+
|
|
99
|
+
---
|
|
100
|
+
|
|
101
|
+
**Stop Bleeding Money on AI Calls. Cut Costs 30-65% in 3 Lines of Code.**
|
|
102
|
+
|
|
103
|
+
40-70% of text prompts and 20-60% of agent calls don't need expensive flagship models. You're overpaying every single day.
|
|
104
|
+
|
|
105
|
+
*Cascadeflow fixes this with intelligent model cascading, available in Python and TypeScript.*
|
|
106
|
+
|
|
107
|
+
```python
|
|
108
|
+
pip install cascadeflow
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
```tsx
|
|
112
|
+
npm install @cascadeflow/core
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
---
|
|
116
|
+
|
|
117
|
+
## Why Cascadeflow?
|
|
118
|
+
|
|
119
|
+
Cascadeflow is an intelligent AI model cascading library that dynamically selects the optimal model for each query or tool call through speculative execution. It's based on the research that 40-70% of queries don't require slow, expensive flagship models, and domain-specific smaller models often outperform large general-purpose models on specialized tasks. For the remaining queries that need advanced reasoning, Cascadeflow automatically escalates to flagship models if needed.
|
|
120
|
+
|
|
121
|
+
### Use Cases
|
|
122
|
+
|
|
123
|
+
Use Cascadeflow for:
|
|
124
|
+
|
|
125
|
+
- **Cost Optimization.** Reduce API costs by 40-85% through intelligent model cascading and speculative execution with automatic per-query cost tracking.
|
|
126
|
+
- **Cost Control and Transparency.** Built-in telemetry for query, model, and provider-level cost tracking with configurable budget limits and programmable spending caps.
|
|
127
|
+
- **Low Latency & Speed Optimization**. Sub-2ms framework overhead with fast provider routing (Groq sub-50ms). Cascade simple queries to fast models while reserving expensive models for complex reasoning, achieving 2-10x latency reduction overall. (use preset `PRESET_ULTRA_FAST`)
|
|
128
|
+
- **Multi-Provider Flexibility.** Unified API across **`OpenAI`, `Anthropic`, `Groq`, `Ollama`, `vLLM`, `Together`, and `Hugging Face`** with automatic provider detection and zero vendor lock-in. Optional **`LiteLLM`** integration for 100+ additional providers.
|
|
129
|
+
- **Edge & Local-Hosted AI Deployment.** Use best of both worlds: handle most queries with local models (vLLM, Ollama), then automatically escalate complex queries to cloud providers only when needed.
|
|
130
|
+
|
|
131
|
+
> **ℹ️ Note:** SLMs (under 10B parameters) are sufficiently powerful for 60-70% of agentic AI tasks. [Research paper](https://www.researchgate.net/publication/392371267_Small_Language_Models_are_the_Future_of_Agentic_AI)
|
|
132
|
+
|
|
133
|
+
---
|
|
134
|
+
|
|
135
|
+
## How Cascadeflow Works
|
|
136
|
+
|
|
137
|
+
Cascadeflow uses **speculative execution with quality validation**:
|
|
138
|
+
|
|
139
|
+
1. **Speculatively executes** small, fast models first - optimistic execution ($0.15-0.30/1M tokens)
|
|
140
|
+
2. **Validates quality** of responses using configurable thresholds (completeness, confidence, correctness)
|
|
141
|
+
3. **Dynamically escalates** to larger models only when quality validation fails ($1.25-3.00/1M tokens)
|
|
142
|
+
4. **Learns patterns** to optimize future cascading decisions and domain specific routing
|
|
143
|
+
|
|
144
|
+
Zero configuration. Works with YOUR existing models (7 Providers currently supported).
|
|
145
|
+
|
|
146
|
+
In practice, 60-70% of queries are handled by small, efficient models (8-20x cost difference) without requiring escalation
|
|
147
|
+
|
|
148
|
+
**Result:** 40-85% cost reduction, 2-10x faster responses, zero quality loss.
|
|
149
|
+
|
|
150
|
+
```
|
|
151
|
+
┌─────────────────────────────────────────────────────────────┐
|
|
152
|
+
│ Cascadeflow Stack │
|
|
153
|
+
├─────────────────────────────────────────────────────────────┤
|
|
154
|
+
│ │
|
|
155
|
+
│ ┌───────────────────────────────────────────────────────┐ │
|
|
156
|
+
│ │ Cascade Agent │ │
|
|
157
|
+
│ │ │ │
|
|
158
|
+
│ │ Orchestrates the entire cascade execution │ │
|
|
159
|
+
│ │ • Query routing & model selection │ │
|
|
160
|
+
│ │ • Drafter -> Verifier coordination │ │
|
|
161
|
+
│ │ • Cost tracking & telemetry │ │
|
|
162
|
+
│ └───────────────────────────────────────────────────────┘ │
|
|
163
|
+
│ ↓ │
|
|
164
|
+
│ ┌───────────────────────────────────────────────────────┐ │
|
|
165
|
+
│ │ Domain Pipeline │ │
|
|
166
|
+
│ │ │ │
|
|
167
|
+
│ │ Automatic domain classification │ │
|
|
168
|
+
│ │ • Rule-based detection (CODE, MATH, DATA, etc.) │ │
|
|
169
|
+
│ │ • Optional ML semantic classification │ │
|
|
170
|
+
│ │ • Domain-optimized pipelines & model selection │ │
|
|
171
|
+
│ └───────────────────────────────────────────────────────┘ │
|
|
172
|
+
│ ↓ │
|
|
173
|
+
│ ┌───────────────────────────────────────────────────────┐ │
|
|
174
|
+
│ │ Quality Validation Engine │ │
|
|
175
|
+
│ │ │ │
|
|
176
|
+
│ │ Multi-dimensional quality checks │ │
|
|
177
|
+
│ │ • Length validation (too short/verbose) │ │
|
|
178
|
+
│ │ • Confidence scoring (logprobs analysis) │ │
|
|
179
|
+
│ │ • Format validation (JSON, structured output) │ │
|
|
180
|
+
│ │ • Semantic alignment (intent matching) │ │
|
|
181
|
+
│ └───────────────────────────────────────────────────────┘ │
|
|
182
|
+
│ ↓ │
|
|
183
|
+
│ ┌───────────────────────────────────────────────────────┐ │
|
|
184
|
+
│ │ Cascading Engine (<2ms overhead) │ │
|
|
185
|
+
│ │ │ │
|
|
186
|
+
│ │ Smart model escalation strategy │ │
|
|
187
|
+
│ │ • Try cheap models first (speculative execution) │ │
|
|
188
|
+
│ │ • Validate quality instantly │ │
|
|
189
|
+
│ │ • Escalate only when needed │ │
|
|
190
|
+
│ │ • Automatic retry & fallback │ │
|
|
191
|
+
│ └───────────────────────────────────────────────────────┘ │
|
|
192
|
+
│ ↓ │
|
|
193
|
+
│ ┌───────────────────────────────────────────────────────┐ │
|
|
194
|
+
│ │ Provider Abstraction Layer │ │
|
|
195
|
+
│ │ │ │
|
|
196
|
+
│ │ Unified interface for 7+ providers │ │
|
|
197
|
+
│ │ • OpenAI • Anthropic • Groq • Ollama │ │
|
|
198
|
+
│ │ • Together • vLLM • HuggingFace • LiteLLM │ │
|
|
199
|
+
│ └───────────────────────────────────────────────────────┘ │
|
|
200
|
+
│ │
|
|
201
|
+
└─────────────────────────────────────────────────────────────┘
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
---
|
|
205
|
+
|
|
206
|
+
## Quick Start
|
|
207
|
+
|
|
208
|
+
### <img src=".github/assets/CF_python_color.svg" width="24" height="24" alt="Python"/> Python
|
|
209
|
+
|
|
210
|
+
```python
|
|
211
|
+
pip install cascadeflow[all]
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
```python
|
|
215
|
+
from cascadeflow import CascadeAgent, ModelConfig
|
|
216
|
+
|
|
217
|
+
# Define your cascade - try cheap model first, escalate if needed
|
|
218
|
+
agent = CascadeAgent(models=[
|
|
219
|
+
ModelConfig(name="gpt-4o-mini", provider="openai", cost=0.00015), # Try first
|
|
220
|
+
ModelConfig(name="gpt-5", provider="openai", cost=0.00125), # Fallback
|
|
221
|
+
])
|
|
222
|
+
|
|
223
|
+
# Run query - automatically routes to optimal model
|
|
224
|
+
result = await agent.run("What's the capital of France?")
|
|
225
|
+
|
|
226
|
+
print(f"Answer: {result.content}")
|
|
227
|
+
print(f"Model used: {result.model_used}")
|
|
228
|
+
print(f"Cost: ${result.total_cost:.6f}")
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
<details>
|
|
232
|
+
<summary><b>💡 Optional: Enable ML-based Quality Engine & Domain Detection for Higher Accuracy</b></summary>
|
|
233
|
+
|
|
234
|
+
**Step 1:** Install the optional ML package:
|
|
235
|
+
|
|
236
|
+
```bash
|
|
237
|
+
pip install cascadeflow[ml] # Adds semantic similarity detection via FastEmbed
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
**Step 2:** Enable semantic detection in your agent:
|
|
241
|
+
|
|
242
|
+
```python
|
|
243
|
+
from cascadeflow import CascadeAgent, ModelConfig
|
|
244
|
+
|
|
245
|
+
# Enable ML-based semantic detection (optional parameter)
|
|
246
|
+
agent = CascadeAgent(
|
|
247
|
+
models=[
|
|
248
|
+
ModelConfig(name="gpt-4o-mini", provider="openai", cost=0.00015),
|
|
249
|
+
ModelConfig(name="gpt-5", provider="openai", cost=0.00125),
|
|
250
|
+
],
|
|
251
|
+
enable_semantic_detection=True # Optional: Uses ML for domain detection
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
# ML semantic detection is now active for all queries
|
|
255
|
+
result = await agent.run("Calculate the eigenvalues of matrix [[1,2],[3,4]]")
|
|
256
|
+
|
|
257
|
+
# Check which detection method was used
|
|
258
|
+
print(f"Domain: {result.metadata.get('domain_detected')}")
|
|
259
|
+
print(f"Method: {result.metadata.get('detection_method')}") # 'semantic' or 'rule-based'
|
|
260
|
+
print(f"Confidence: {result.metadata.get('domain_confidence', 0):.1%}")
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
**What you get:**
|
|
264
|
+
- 🎯 84-87% confidence on complex domains (MATH, CODE, DATA, STRUCTURED)
|
|
265
|
+
- 🔄 Automatic fallback to rule-based if ML dependencies unavailable
|
|
266
|
+
- 📈 Improved routing accuracy for specialized queries
|
|
267
|
+
- 🚀 Works seamlessly with your existing cascade setup
|
|
268
|
+
|
|
269
|
+
**Note:** If `enable_semantic_detection=True` but FastEmbed is not installed, CascadeFlow automatically falls back to rule-based detection without errors.
|
|
270
|
+
|
|
271
|
+
</details>
|
|
272
|
+
|
|
273
|
+
> **⚠️ GPT-5 Note:** GPT-5 requires OpenAI organization verification. Go to [OpenAI Settings](https://platform.openai.com/settings/organization/general) and click "Verify Organization". Access is granted within ~15 minutes. Alternatively, use the recommended setup below which works immediately.
|
|
274
|
+
|
|
275
|
+
📖 **Learn more:** [Python Documentation](./docs/) | [Quickstart Guide](./docs/guides/quickstart.md) | [Providers Guide](./docs/guides/providers.md)
|
|
276
|
+
|
|
277
|
+
### <img src=".github/assets/CF_ts_color.svg" width="24" height="24" alt="TypeScript"/> TypeScript
|
|
278
|
+
|
|
279
|
+
```bash
|
|
280
|
+
npm install @cascadeflow/core
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
```tsx
|
|
284
|
+
import { CascadeAgent, ModelConfig } from '@cascadeflow/core';
|
|
285
|
+
|
|
286
|
+
// Same API as Python!
|
|
287
|
+
const agent = new CascadeAgent({
|
|
288
|
+
models: [
|
|
289
|
+
{ name: 'gpt-4o-mini', provider: 'openai', cost: 0.00015 },
|
|
290
|
+
{ name: 'gpt-4o', provider: 'openai', cost: 0.00625 },
|
|
291
|
+
],
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
const result = await agent.run('What is TypeScript?');
|
|
295
|
+
console.log(`Model: ${result.modelUsed}`);
|
|
296
|
+
console.log(`Cost: $${result.totalCost}`);
|
|
297
|
+
console.log(`Saved: ${result.savingsPercentage}%`);
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
<details>
|
|
301
|
+
<summary><b>💡 Optional: Enable ML-based Quality Engine & Domain Detection for Higher Accuracy</b></summary>
|
|
302
|
+
|
|
303
|
+
> **Note:** ML semantic detection is currently available in Python only. TypeScript support is planned for a future release. Rule-based detection provides excellent accuracy out of the box.
|
|
304
|
+
|
|
305
|
+
**For Python users:**
|
|
306
|
+
|
|
307
|
+
**Step 1:** Install the ML package:
|
|
308
|
+
```bash
|
|
309
|
+
pip install cascadeflow[ml]
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
**Step 2:** Enable semantic detection:
|
|
313
|
+
```python
|
|
314
|
+
from cascadeflow import CascadeAgent, ModelConfig
|
|
315
|
+
|
|
316
|
+
agent = CascadeAgent(
|
|
317
|
+
models=[...],
|
|
318
|
+
enable_semantic_detection=True # Enables ML-based detection
|
|
319
|
+
)
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
**Future TypeScript Support (Planned):**
|
|
323
|
+
|
|
324
|
+
```tsx
|
|
325
|
+
// Will be available in a future release
|
|
326
|
+
npm install @cascadeflow/ml
|
|
327
|
+
|
|
328
|
+
import { CascadeAgent, ModelConfig } from '@cascadeflow/core';
|
|
329
|
+
|
|
330
|
+
// Step 1: Enable semantic detection in configuration
|
|
331
|
+
const agent = new CascadeAgent({
|
|
332
|
+
models: [
|
|
333
|
+
{ name: 'gpt-4o-mini', provider: 'openai', cost: 0.00015 },
|
|
334
|
+
{ name: 'gpt-4o', provider: 'openai', cost: 0.00625 },
|
|
335
|
+
],
|
|
336
|
+
enableSemanticDetection: true // Optional: Uses ML for domain detection
|
|
337
|
+
});
|
|
338
|
+
|
|
339
|
+
// Step 2: Query with ML-enhanced detection
|
|
340
|
+
const result = await agent.run('Parse this JSON and validate the schema');
|
|
341
|
+
|
|
342
|
+
// Check which detection method was used
|
|
343
|
+
console.log(`Domain: ${result.metadata.domainDetected}`);
|
|
344
|
+
console.log(`Method: ${result.metadata.detectionMethod}`); // 'semantic' or 'rule-based'
|
|
345
|
+
console.log(`Confidence: ${(result.metadata.domainConfidence * 100).toFixed(1)}%`);
|
|
346
|
+
```
|
|
347
|
+
|
|
348
|
+
**What you'll get (when available):**
|
|
349
|
+
- 🎯 84-87% confidence on complex domains (MATH, CODE, DATA, STRUCTURED)
|
|
350
|
+
- 🔄 Automatic fallback to rule-based if ML unavailable
|
|
351
|
+
- 📈 Improved routing accuracy for specialized queries
|
|
352
|
+
- 🚀 Works seamlessly with your existing cascade setup
|
|
353
|
+
|
|
354
|
+
Currently, CascadeFlow TypeScript uses highly accurate rule-based domain detection which works great for most use cases!
|
|
355
|
+
|
|
356
|
+
</details>
|
|
357
|
+
|
|
358
|
+
📖 **Learn more:** [TypeScript Documentation](./packages/core/) | [Node.js Examples](./packages/core/examples/nodejs/) | [Browser/Edge Guide](./docs/guides/browser_cascading.md)
|
|
359
|
+
|
|
360
|
+
### 🔄 Migration Example
|
|
361
|
+
|
|
362
|
+
**Migrate in 5min from direct Provider implementation to cost savings and full cost control and transparency.**
|
|
363
|
+
|
|
364
|
+
#### Before (Standard Approach)
|
|
365
|
+
|
|
366
|
+
Cost: $0.001250, Latency: 850ms
|
|
367
|
+
|
|
368
|
+
```python
|
|
369
|
+
# Using expensive model for everything
|
|
370
|
+
result = openai.chat.completions.create(
|
|
371
|
+
model="gpt-5",
|
|
372
|
+
messages=[{"role": "user", "content": "What's 2+2?"}]
|
|
373
|
+
)
|
|
374
|
+
```
|
|
375
|
+
|
|
376
|
+
#### After (With CascadeFlow)
|
|
377
|
+
|
|
378
|
+
Cost: $0.000150, Latency: 234ms
|
|
379
|
+
|
|
380
|
+
```python
|
|
381
|
+
agent = CascadeAgent(models=[
|
|
382
|
+
ModelConfig(name="gpt-4o-mini", provider="openai", cost=0.00015),
|
|
383
|
+
ModelConfig(name="gpt-5", provider="openai", cost=0.00125),
|
|
384
|
+
])
|
|
385
|
+
|
|
386
|
+
result = await agent.run("What's 2+2?")
|
|
387
|
+
```
|
|
388
|
+
|
|
389
|
+
> **🔥 Saved:** $0.001100 (88% reduction), 3.6x faster
|
|
390
|
+
|
|
391
|
+
📊 **Learn more:** [Cost Tracking Guide](./docs/guides/cost_tracking.md) | [Production Best Practices](./docs/guides/production.md) | [Performance Optimization](./docs/guides/performance.md)
|
|
392
|
+
|
|
393
|
+
---
|
|
394
|
+
|
|
395
|
+
## <img src=".github/assets/CF_n8n_color.svg" width="24" height="24" alt="n8n"/> n8n Integration
|
|
396
|
+
|
|
397
|
+
Use CascadeFlow in n8n workflows for no-code AI automation with automatic cost optimization!
|
|
398
|
+
|
|
399
|
+
### Installation
|
|
400
|
+
|
|
401
|
+
1. Open n8n
|
|
402
|
+
2. Go to **Settings** → **Community Nodes**
|
|
403
|
+
3. Search for: `n8n-nodes-cascadeflow`
|
|
404
|
+
4. Click **Install**
|
|
405
|
+
|
|
406
|
+
### Quick Example
|
|
407
|
+
|
|
408
|
+
Create a workflow:
|
|
409
|
+
|
|
410
|
+
```
|
|
411
|
+
Manual Trigger → CascadeFlow Node → Set Node
|
|
412
|
+
|
|
413
|
+
```
|
|
414
|
+
|
|
415
|
+
Configure CascadeFlow node:
|
|
416
|
+
|
|
417
|
+
- **Draft Model**: `gpt-4o-mini` ($0.00015)
|
|
418
|
+
- **Verifier Model**: `gpt-4o` ($0.00625)
|
|
419
|
+
- **Message**: Your prompt
|
|
420
|
+
- **Output**: Full Metrics
|
|
421
|
+
|
|
422
|
+
**Result:** 40-85% cost savings in your n8n workflows!
|
|
423
|
+
|
|
424
|
+
**Features:**
|
|
425
|
+
|
|
426
|
+
- ✅ Visual workflow integration
|
|
427
|
+
- ✅ Multi-provider support
|
|
428
|
+
- ✅ Cost tracking in workflow
|
|
429
|
+
- ✅ Tool calling support
|
|
430
|
+
- ✅ Easy debugging with metrics
|
|
431
|
+
|
|
432
|
+
🔌 **Learn more:** [n8n Integration Guide](./packages/integrations/n8n/) | [n8n Documentation](./docs/guides/n8n_integration.md)
|
|
433
|
+
|
|
434
|
+
---
|
|
435
|
+
|
|
436
|
+
## Resources
|
|
437
|
+
|
|
438
|
+
### Examples
|
|
439
|
+
|
|
440
|
+
**<img src=".github/assets/CF_python_color.svg" width="20" height="20" alt="Python" style="vertical-align: middle;"/> Python Examples:**
|
|
441
|
+
|
|
442
|
+
<details open>
|
|
443
|
+
<summary><b>Basic Examples</b> - Get started quickly</summary>
|
|
444
|
+
|
|
445
|
+
| Example | Description | Link |
|
|
446
|
+
|---------|-------------|------|
|
|
447
|
+
| **Basic Usage** | Simple cascade setup with OpenAI models | [View](./examples/basic_usage.py) |
|
|
448
|
+
| **Preset Usage** | Use built-in presets for quick setup | [View Guide](./docs/guides/presets.md) |
|
|
449
|
+
| **Multi-Provider** | Mix multiple AI providers in one cascade | [View](./examples/multi_provider.py) |
|
|
450
|
+
| **Reasoning Models** 🆕 | Use reasoning models (o1/o3, Claude 3.7, DeepSeek-R1) | [View](./examples/reasoning_models.py) |
|
|
451
|
+
| **Tool Execution** | Function calling and tool usage | [View](./examples/tool_execution.py) |
|
|
452
|
+
| **Streaming Text** | Stream responses from cascade agents | [View](./examples/streaming_text.py) |
|
|
453
|
+
| **Cost Tracking** | Track and analyze costs across queries | [View](./examples/cost_tracking.py) |
|
|
454
|
+
|
|
455
|
+
</details>
|
|
456
|
+
|
|
457
|
+
<details>
|
|
458
|
+
<summary><b>Advanced Examples</b> - Production & customization</summary>
|
|
459
|
+
|
|
460
|
+
| Example | Description | Link |
|
|
461
|
+
|---------|-------------|------|
|
|
462
|
+
| **Production Patterns** | Best practices for production deployments | [View](./examples/production_patterns.py) |
|
|
463
|
+
| **FastAPI Integration** | Integrate cascades with FastAPI | [View](./examples/fastapi_integration.py) |
|
|
464
|
+
| **Streaming Tools** | Stream tool calls and responses | [View](./examples/streaming_tools.py) |
|
|
465
|
+
| **Batch Processing** | Process multiple queries efficiently | [View](./examples/batch_processing.py) |
|
|
466
|
+
| **Multi-Step Cascade** | Build complex multi-step cascades | [View](./examples/multi_step_cascade.py) |
|
|
467
|
+
| **Edge Device** | Run cascades on edge devices with local models | [View](./examples/edge_device.py) |
|
|
468
|
+
| **vLLM Example** | Use vLLM for local model deployment | [View](./examples/vllm_example.py) |
|
|
469
|
+
| **Custom Cascade** | Build custom cascade strategies | [View](./examples/custom_cascade.py) |
|
|
470
|
+
| **Custom Validation** | Implement custom quality validators | [View](./examples/custom_validation.py) |
|
|
471
|
+
| **User Budget Tracking** | Per-user budget enforcement and tracking | [View](./examples/user_budget_tracking.py) |
|
|
472
|
+
| **User Profile Usage** | User-specific routing and configurations | [View](./examples/user_profile_usage.py) |
|
|
473
|
+
| **Rate Limiting** | Implement rate limiting for cascades | [View](./examples/rate_limiting_usage.py) |
|
|
474
|
+
| **Guardrails** | Add safety and content guardrails | [View](./examples/guardrails_usage.py) |
|
|
475
|
+
| **Cost Forecasting** | Forecast costs and detect anomalies | [View](./examples/cost_forecasting_anomaly_detection.py) |
|
|
476
|
+
| **Semantic Quality Detection** | ML-based domain and quality detection | [View](./examples/semantic_quality_domain_detection.py) |
|
|
477
|
+
| **Profile Database Integration** | Integrate user profiles with databases | [View](./examples/profile_database_integration.py) |
|
|
478
|
+
|
|
479
|
+
</details>
|
|
480
|
+
|
|
481
|
+
**<img src=".github/assets/CF_ts_color.svg" width="20" height="20" alt="TypeScript" style="vertical-align: middle;"/> TypeScript Examples:**
|
|
482
|
+
|
|
483
|
+
<details open>
|
|
484
|
+
<summary><b>Basic Examples</b> - Get started quickly</summary>
|
|
485
|
+
|
|
486
|
+
| Example | Description | Link |
|
|
487
|
+
|---------|-------------|------|
|
|
488
|
+
| **Basic Usage** | Simple cascade setup (Node.js) | [View](./packages/core/examples/nodejs/basic-usage.ts) |
|
|
489
|
+
| **Tool Calling** | Function calling with tools (Node.js) | [View](./packages/core/examples/nodejs/tool-calling.ts) |
|
|
490
|
+
| **Multi-Provider** | Mix providers in TypeScript (Node.js) | [View](./packages/core/examples/nodejs/multi-provider.ts) |
|
|
491
|
+
| **Reasoning Models** 🆕 | Use reasoning models (o1/o3, Claude 3.7, DeepSeek-R1) | [View](./packages/core/examples/nodejs/reasoning-models.ts) |
|
|
492
|
+
| **Streaming** | Stream responses in TypeScript | [View](./packages/core/examples/streaming.ts) |
|
|
493
|
+
|
|
494
|
+
</details>
|
|
495
|
+
|
|
496
|
+
<details>
|
|
497
|
+
<summary><b>Advanced Examples</b> - Production & edge deployment</summary>
|
|
498
|
+
|
|
499
|
+
| Example | Description | Link |
|
|
500
|
+
|---------|-------------|------|
|
|
501
|
+
| **Production Patterns** | Production best practices (Node.js) | [View](./packages/core/examples/nodejs/production-patterns.ts) |
|
|
502
|
+
| **Browser/Edge** | Vercel Edge runtime example | [View](./packages/core/examples/browser/vercel-edge/) |
|
|
503
|
+
|
|
504
|
+
</details>
|
|
505
|
+
|
|
506
|
+
📂 **[View All Python Examples →](./examples/)** | **[View All TypeScript Examples →](./packages/core/examples/)**
|
|
507
|
+
|
|
508
|
+
### Documentation
|
|
509
|
+
|
|
510
|
+
<details open>
|
|
511
|
+
<summary><b>Getting Started</b> - Core concepts and basics</summary>
|
|
512
|
+
|
|
513
|
+
| Guide | Description | Link |
|
|
514
|
+
|-------|-------------|------|
|
|
515
|
+
| **Quickstart** | Get started with CascadeFlow in 5 minutes | [Read](./docs/guides/quickstart.md) |
|
|
516
|
+
| **Providers Guide** | Configure and use different AI providers | [Read](./docs/guides/providers.md) |
|
|
517
|
+
| **Presets Guide** | Using and creating custom presets | [Read](./docs/guides/presets.md) |
|
|
518
|
+
| **Streaming Guide** | Stream responses from cascade agents | [Read](./docs/guides/streaming.md) |
|
|
519
|
+
| **Tools Guide** | Function calling and tool usage | [Read](./docs/guides/tools.md) |
|
|
520
|
+
| **Cost Tracking** | Track and analyze API costs | [Read](./docs/guides/cost_tracking.md) |
|
|
521
|
+
|
|
522
|
+
</details>
|
|
523
|
+
|
|
524
|
+
<details>
|
|
525
|
+
<summary><b>Advanced Topics</b> - Production, customization & integrations</summary>
|
|
526
|
+
|
|
527
|
+
| Guide | Description | Link |
|
|
528
|
+
|-------|-------------|------|
|
|
529
|
+
| **Production Guide** | Best practices for production deployments | [Read](./docs/guides/production.md) |
|
|
530
|
+
| **Performance Guide** | Optimize cascade performance and latency | [Read](./docs/guides/performance.md) |
|
|
531
|
+
| **Custom Cascade** | Build custom cascade strategies | [Read](./docs/guides/custom_cascade.md) |
|
|
532
|
+
| **Custom Validation** | Implement custom quality validators | [Read](./docs/guides/custom_validation.md) |
|
|
533
|
+
| **Edge Device** | Deploy cascades on edge devices | [Read](./docs/guides/edge_device.md) |
|
|
534
|
+
| **Browser Cascading** | Run cascades in the browser/edge | [Read](./docs/guides/browser_cascading.md) |
|
|
535
|
+
| **FastAPI Integration** | Integrate with FastAPI applications | [Read](./docs/guides/fastapi.md) |
|
|
536
|
+
| **n8n Integration** | Use CascadeFlow in n8n workflows | [Read](./docs/guides/n8n_integration.md) |
|
|
537
|
+
|
|
538
|
+
</details>
|
|
539
|
+
|
|
540
|
+
📚 **[View All Documentation →](./docs/)**
|
|
541
|
+
|
|
542
|
+
---
|
|
543
|
+
|
|
544
|
+
## Features
|
|
545
|
+
|
|
546
|
+
| **Feature** | **Benefit** |
|
|
547
|
+
| --- |----------------------------------------------------------------------------------------------------------------------------------------|
|
|
548
|
+
| 🎯 **Speculative Cascading** | Tries cheap models first, escalates intelligently |
|
|
549
|
+
| 💰 **40-85% Cost Savings** | Research-backed, proven in production |
|
|
550
|
+
| ⚡ **2-10x Faster** | Small models respond in <50ms vs 500-2000ms |
|
|
551
|
+
| ⚡ **Low Latency** 🆕 | Sub-2ms framework overhead, negligible performance impact |
|
|
552
|
+
| 🔄 **Mix Any Providers** 🆕 | OpenAI, Anthropic, Groq, Ollama, vLLM, Together + LiteLLM (optional) |
|
|
553
|
+
| 👤 **User Profile System** 🆕 | Per-user budgets, tier-aware routing, enforcement callbacks |
|
|
554
|
+
| ✅ **Quality Validation** 🆕 | Automatic checks + semantic similarity (optional ML, ~80MB, CPU) |
|
|
555
|
+
| 🎨 **Cascading Policies** 🆕 | Domain-specific pipelines, multi-step validation strategies |
|
|
556
|
+
| 🧠 **Domain Understanding** 🆕 | Auto-detects code/medical/legal/math/structured data, routes to specialists |
|
|
557
|
+
| 🤖 **Drafter/Validator Pattern** | 20-60% savings for agent/tool systems |
|
|
558
|
+
| 🔧 **Tool Calling Support** 🆕 | Universal format, works across all providers |
|
|
559
|
+
| 📊 **Cost Tracking** 🆕 | Built-in analytics + OpenTelemetry export (vendor-neutral) |
|
|
560
|
+
| 🚀 **3-Line Integration** | Zero architecture changes needed |
|
|
561
|
+
| 🏭 **Production Ready** 🆕 | Streaming, batch processing, tool handling, reasoning model support, caching, error recovery, anomaly detection |
|
|
562
|
+
|
|
563
|
+
---
|
|
564
|
+
|
|
565
|
+
## License
|
|
566
|
+
|
|
567
|
+
MIT © see [LICENSE](https://github.com/lemony-ai/cascadeflow/blob/main/LICENSE) file.
|
|
568
|
+
|
|
569
|
+
Free for commercial use. Attribution appreciated but not required.
|
|
570
|
+
|
|
571
|
+
---
|
|
572
|
+
|
|
573
|
+
## Contributing
|
|
574
|
+
|
|
575
|
+
We ❤️ contributions!
|
|
576
|
+
|
|
577
|
+
📝 [**Contributing Guide**](./CONTRIBUTING.md) - Python & TypeScript development setup
|
|
578
|
+
|
|
579
|
+
---
|
|
580
|
+
|
|
581
|
+
## Roadmap
|
|
582
|
+
|
|
583
|
+
- **Cascade Profiler** - Analyzes your AI API logs to calculate cost savings potential and generate optimized CascadeFlow configurations automatically
|
|
584
|
+
- **User Tier Management** - Cost controls and limits per user tier with advanced routing
|
|
585
|
+
- **Semantic Quality Validators** - Optional lightweight local quality scoring (200MB CPU model, no external API calls)
|
|
586
|
+
- **Code Complexity Detection** - Dynamic cascading based on task complexity analysis
|
|
587
|
+
- **Domain Aware Cascading** - Multi-stage pipelines tailored to specific domains
|
|
588
|
+
- **Benchmark Reports** - Automated performance and cost benchmarking
|
|
589
|
+
|
|
590
|
+
---
|
|
591
|
+
|
|
592
|
+
## Support
|
|
593
|
+
|
|
594
|
+
- 📖 [**GitHub Discussions**](https://github.com/lemony-ai/cascadeflow/discussions) - Searchable Q&A
|
|
595
|
+
- 🐛 [**GitHub Issues**](https://github.com/lemony-ai/cascadeflow/issues) - Bug reports & feature requests
|
|
596
|
+
- 📧 [**Email Support**](mailto:hello@lemony.ai) - Direct support
|
|
597
|
+
|
|
598
|
+
---
|
|
599
|
+
|
|
600
|
+
## Citation
|
|
601
|
+
|
|
602
|
+
If you use CascadeFlow in your research or project, please cite:
|
|
603
|
+
|
|
604
|
+
```bibtex
|
|
605
|
+
@software{cascadeflow2025,
|
|
606
|
+
author = {Lemony Inc., Sascha Buehrle and Contributors},
|
|
607
|
+
title = {CascadeFlow: Smart AI model cascading for cost optimization},
|
|
608
|
+
year = {2025},
|
|
609
|
+
publisher = {GitHub},
|
|
610
|
+
url = {https://github.com/lemony-ai/cascadeflow}
|
|
611
|
+
}
|
|
612
|
+
```
|
|
613
|
+
|
|
614
|
+
**Ready to cut your AI costs by 40-85%?**
|
|
615
|
+
|
|
616
|
+
```bash
|
|
617
|
+
pip install cascadeflow
|
|
618
|
+
```
|
|
619
|
+
|
|
620
|
+
```bash
|
|
621
|
+
npm install @cascadeflow/core
|
|
622
|
+
```
|
|
623
|
+
|
|
624
|
+
[Read the Docs](./docs/) • [View Python Examples](./examples/) • [View TypeScript Examples](./packages/core/examples/) • [Join Discussions](https://github.com/lemony-ai/cascadeflow/discussions)
|
|
625
|
+
|
|
626
|
+
---
|
|
627
|
+
|
|
628
|
+
## About
|
|
629
|
+
|
|
630
|
+
**Built with ❤️ by [Lemony Inc.](https://lemony.ai/) and the CascadeFlow Community**
|
|
631
|
+
|
|
632
|
+
One cascade. Hundreds of specialists.
|
|
633
|
+
|
|
634
|
+
New York | Zurich
|
|
635
|
+
|
|
636
|
+
**⭐ Star us on GitHub if CascadeFlow helps you save money!**
|