cascadeflow-core-smr 1.1.1-smr.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +514 -0
- package/dist/batch-E4EWL5CO.mjs +15 -0
- package/dist/chunk-47GGM6YK.mjs +149 -0
- package/dist/chunk-XESOO5EG.mjs +213 -0
- package/dist/chunk-XGB3TDIC.mjs +42 -0
- package/dist/dist-K7RZMLIK.mjs +29152 -0
- package/dist/index.d.mts +2544 -0
- package/dist/index.d.ts +2544 -0
- package/dist/index.js +49268 -0
- package/dist/index.mjs +18223 -0
- package/dist/quality-semantic-BMPERONY.mjs +7 -0
- package/package.json +111 -0
package/README.md
ADDED
|
@@ -0,0 +1,514 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
|
|
3
|
+
<picture>
|
|
4
|
+
<source media="(prefers-color-scheme: dark)" srcset="../../.github/assets/CF_logo_bright.svg">
|
|
5
|
+
<source media="(prefers-color-scheme: light)" srcset="../../.github/assets/CF_logo_dark.svg">
|
|
6
|
+
<img alt="cascadeflow Logo" src="../../.github/assets/CF_logo_dark.svg" width="80%" style="margin: 20px auto;">
|
|
7
|
+
</picture>
|
|
8
|
+
|
|
9
|
+
# @cascadeflow/core
|
|
10
|
+
|
|
11
|
+
[](https://www.npmjs.com/package/@cascadeflow/core)
|
|
12
|
+
[](../../LICENSE)
|
|
13
|
+
[](https://www.typescriptlang.org/)
|
|
14
|
+
[](https://github.com/lemony-ai/cascadeflow/actions/workflows/test.yml)
|
|
15
|
+
|
|
16
|
+
**<img src="../../.github/assets/CF_ts_color.svg" width="22" height="22" alt="TypeScript" style="vertical-align: middle;"/> TypeScript/JavaScript library for cascadeflow**
|
|
17
|
+
|
|
18
|
+
</div>
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
**Smart AI model cascading for cost optimization.**
|
|
23
|
+
|
|
24
|
+
Save 40-85% on LLM costs with intelligent model routing. Available for Node.js, browser, and edge environments.
|
|
25
|
+
|
|
26
|
+
## Installation
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
npm install @cascadeflow/core
|
|
30
|
+
# or
|
|
31
|
+
pnpm add @cascadeflow/core
|
|
32
|
+
# or
|
|
33
|
+
yarn add @cascadeflow/core
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Harness Quick Start (V2.1)
|
|
37
|
+
|
|
38
|
+
```typescript
|
|
39
|
+
import { cascadeflow } from '@cascadeflow/core';
|
|
40
|
+
|
|
41
|
+
// 1) Turn on in-process harness decisions + SDK auto-instrumentation
|
|
42
|
+
cascadeflow.init({ mode: 'enforce', budget: 0.5 });
|
|
43
|
+
|
|
44
|
+
// 2) Scope one run (global defaults are inherited)
|
|
45
|
+
const result = await cascadeflow.run({ maxToolCalls: 8 }, async (run) => {
|
|
46
|
+
// Any OpenAI / Anthropic SDK calls made here are evaluated by the harness.
|
|
47
|
+
return { runId: run.runId };
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
console.log(result);
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Quick Start
|
|
54
|
+
|
|
55
|
+
### Recommended Setup (Claude Haiku + GPT-5)
|
|
56
|
+
|
|
57
|
+
```typescript
|
|
58
|
+
import { CascadeAgent } from '@cascadeflow/core';
|
|
59
|
+
|
|
60
|
+
const agent = new CascadeAgent({
|
|
61
|
+
models: [
|
|
62
|
+
{
|
|
63
|
+
name: 'claude-haiku-4-5',
|
|
64
|
+
provider: 'anthropic',
|
|
65
|
+
cost: 0.001 // Fast, high-quality drafter
|
|
66
|
+
},
|
|
67
|
+
{
|
|
68
|
+
name: 'gpt-5',
|
|
69
|
+
provider: 'openai',
|
|
70
|
+
cost: 0.00125 // Superior reasoning verifier (50% cheaper than GPT-4o!)
|
|
71
|
+
}
|
|
72
|
+
]
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
const result = await agent.run('What is artificial intelligence?');
|
|
76
|
+
|
|
77
|
+
console.log(result.content);
|
|
78
|
+
console.log(`Cost: $${result.totalCost}`);
|
|
79
|
+
console.log(`Savings: ${result.savingsPercentage}%`);
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### Quality Configuration
|
|
83
|
+
|
|
84
|
+
Control when the cascade uses the drafter vs. verifier with quality thresholds:
|
|
85
|
+
|
|
86
|
+
```typescript
|
|
87
|
+
// Recommended: Complexity-aware thresholds
|
|
88
|
+
const agent = new CascadeAgent({
|
|
89
|
+
models: [
|
|
90
|
+
{ name: 'claude-haiku-4-5', provider: 'anthropic', cost: 0.001 },
|
|
91
|
+
{ name: 'gpt-5', provider: 'openai', cost: 0.00125 }
|
|
92
|
+
],
|
|
93
|
+
quality: {
|
|
94
|
+
confidenceThresholds: {
|
|
95
|
+
simple: 0.6, // "What is Python?" - Accept 60%+ confidence
|
|
96
|
+
moderate: 0.7, // "Compare Python vs Java" - Accept 70%+
|
|
97
|
+
hard: 0.8, // "Analyze quantum computing" - Accept 80%+
|
|
98
|
+
expert: 0.85 // "Implement distributed cache" - Accept 85%+
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
});
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
**Quick Configuration Options:**
|
|
105
|
+
|
|
106
|
+
```typescript
|
|
107
|
+
// Option 1: Use CASCADE_QUALITY_CONFIG (optimized for 50-60% acceptance)
|
|
108
|
+
import { CascadeAgent, CASCADE_QUALITY_CONFIG } from '@cascadeflow/core';
|
|
109
|
+
const agent = new CascadeAgent({
|
|
110
|
+
models: [...],
|
|
111
|
+
quality: CASCADE_QUALITY_CONFIG // Lower threshold (0.40) = more cost savings
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
// Option 2: Simple flat threshold
|
|
115
|
+
const agent = new CascadeAgent({
|
|
116
|
+
models: [...],
|
|
117
|
+
quality: {
|
|
118
|
+
threshold: 0.7, // 70% confidence required (default)
|
|
119
|
+
requireMinimumTokens: 10 // Minimum response length
|
|
120
|
+
}
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
// Option 3: Use defaults (no quality config needed)
|
|
124
|
+
const agent = new CascadeAgent({
|
|
125
|
+
models: [...]
|
|
126
|
+
// Automatically uses threshold: 0.7
|
|
127
|
+
});
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
**When to adjust:**
|
|
131
|
+
- **Lower thresholds (0.4-0.6)**: More drafts accepted → higher cost savings, slightly lower quality
|
|
132
|
+
- **Higher thresholds (0.8-0.9)**: Fewer drafts accepted → lower savings, maximum quality
|
|
133
|
+
- **Complexity-aware**: Best balance → adjusts automatically based on query difficulty
|
|
134
|
+
|
|
135
|
+
> **⚠️ GPT-5 Requires Organization Verification**
|
|
136
|
+
>
|
|
137
|
+
> To use GPT-5, your OpenAI organization must be verified:
|
|
138
|
+
> 1. Go to https://platform.openai.com/settings/organization/general
|
|
139
|
+
> 2. Click "Verify Organization"
|
|
140
|
+
> 3. Wait ~15 minutes for access to propagate
|
|
141
|
+
>
|
|
142
|
+
> **Works immediately:** The cascade above works right away! Claude Haiku handles 75% of queries, GPT-5 only called when needed.
|
|
143
|
+
|
|
144
|
+
> **📝 Model Naming**
|
|
145
|
+
>
|
|
146
|
+
> Both naming conventions work with CascadeFlow:
|
|
147
|
+
> - `claude-haiku-4-5` (used in presets, recommended)
|
|
148
|
+
> - `claude-3-5-haiku-20241022` (Anthropic API format)
|
|
149
|
+
>
|
|
150
|
+
> The library accepts both formats and routes them correctly.
|
|
151
|
+
|
|
152
|
+
### OpenAI Only
|
|
153
|
+
|
|
154
|
+
```typescript
|
|
155
|
+
const agent = new CascadeAgent({
|
|
156
|
+
models: [
|
|
157
|
+
{ name: 'gpt-4o-mini', provider: 'openai', cost: 0.00015 },
|
|
158
|
+
{ name: 'gpt-5', provider: 'openai', cost: 0.00125 } // Requires org verification
|
|
159
|
+
]
|
|
160
|
+
});
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
### Even Easier: Use Presets
|
|
164
|
+
|
|
165
|
+
**No configuration needed** - just import a preset and go:
|
|
166
|
+
|
|
167
|
+
```typescript
|
|
168
|
+
import { CascadeAgent, PRESET_ULTRA_FAST, PRESET_BEST_OVERALL } from '@cascadeflow/core';
|
|
169
|
+
|
|
170
|
+
// Ultra-fast with Groq (5-10x faster than OpenAI)
|
|
171
|
+
const agent = new CascadeAgent(PRESET_ULTRA_FAST);
|
|
172
|
+
|
|
173
|
+
// Or best overall (Claude Haiku + GPT-4o-mini)
|
|
174
|
+
const agent = new CascadeAgent(PRESET_BEST_OVERALL);
|
|
175
|
+
|
|
176
|
+
const result = await agent.run('Your query here');
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
**Available Presets:**
|
|
180
|
+
|
|
181
|
+
| Preset | Best For | Speed | Cost/Query | API Keys |
|
|
182
|
+
|--------|----------|-------|-----------|----------|
|
|
183
|
+
| `PRESET_BEST_OVERALL` | Most use cases | Fast (~2-3s) | ~$0.0008 | Anthropic + OpenAI |
|
|
184
|
+
| `PRESET_ULTRA_FAST` | Real-time apps | Ultra-fast (~1-2s) | ~$0.0002 | Groq |
|
|
185
|
+
| `PRESET_ULTRA_CHEAP` | High volume | Very fast (~1-3s) | ~$0.00008 | Groq + OpenAI |
|
|
186
|
+
| `PRESET_OPENAI_ONLY` | Single provider | Fast (~2-4s) | ~$0.0004 | OpenAI |
|
|
187
|
+
| `PRESET_ANTHROPIC_ONLY` | Claude fans | Fast (~2-3s) | ~$0.002 | Anthropic |
|
|
188
|
+
| `PRESET_FREE_LOCAL` | Privacy/offline | Moderate (~3-5s) | $0 (free) | None (Ollama) |
|
|
189
|
+
|
|
190
|
+
**Custom Presets:**
|
|
191
|
+
|
|
192
|
+
```typescript
|
|
193
|
+
import { CascadeAgent, createPreset } from '@cascadeflow/core';
|
|
194
|
+
|
|
195
|
+
const agent = new CascadeAgent(
|
|
196
|
+
createPreset({
|
|
197
|
+
quality: 'strict', // 'cost-optimized' | 'balanced' | 'strict'
|
|
198
|
+
performance: 'fast', // 'fast' | 'balanced' | 'reliable'
|
|
199
|
+
includePremium: true // Add premium tier (gpt-4o)
|
|
200
|
+
})
|
|
201
|
+
);
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
## Advanced Features
|
|
205
|
+
|
|
206
|
+
### 🎚️ Quality Profiles
|
|
207
|
+
|
|
208
|
+
Control quality validation with predefined profiles optimized for different use cases:
|
|
209
|
+
|
|
210
|
+
```typescript
|
|
211
|
+
import { CascadeAgent, QualityValidator } from '@cascadeflow/core';
|
|
212
|
+
|
|
213
|
+
// Strict Mode: Maximum quality with semantic validation
|
|
214
|
+
const strictAgent = new CascadeAgent({
|
|
215
|
+
models: [...],
|
|
216
|
+
cascade: {
|
|
217
|
+
enabled: true,
|
|
218
|
+
qualityConfig: {
|
|
219
|
+
useProductionConfidence: true,
|
|
220
|
+
strictMode: true,
|
|
221
|
+
useSemanticValidation: true,
|
|
222
|
+
minConfidence: 0.85,
|
|
223
|
+
provider: 'openai',
|
|
224
|
+
},
|
|
225
|
+
},
|
|
226
|
+
});
|
|
227
|
+
|
|
228
|
+
// Or use factory methods
|
|
229
|
+
const strictValidator = QualityValidator.strict();
|
|
230
|
+
const prodValidator = QualityValidator.forProduction(); // Multi-signal confidence
|
|
231
|
+
const devValidator = QualityValidator.forDevelopment(); // Lenient for testing
|
|
232
|
+
const cascadeValidator = QualityValidator.forCascade(); // Optimized for 50-60% acceptance
|
|
233
|
+
const permissiveValidator = QualityValidator.permissive(); // Maximum throughput
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
**Available Profiles:**
|
|
237
|
+
- **Strict**: 85% confidence + semantic validation (maximum quality)
|
|
238
|
+
- **Production**: 70% confidence with multi-signal estimation (balanced)
|
|
239
|
+
- **Development**: 50% confidence, minimal word count (fast iteration)
|
|
240
|
+
- **Cascade**: 40% confidence, optimized for cost savings (50-60% draft acceptance)
|
|
241
|
+
- **Permissive**: 30% confidence, maximum throughput (highest savings)
|
|
242
|
+
|
|
243
|
+
[📖 Full example](examples/nodejs/quality-profiles.ts)
|
|
244
|
+
|
|
245
|
+
### 📡 Telemetry & Callbacks
|
|
246
|
+
|
|
247
|
+
Monitor cascade operations with event-driven callbacks:
|
|
248
|
+
|
|
249
|
+
```typescript
|
|
250
|
+
import { CascadeAgent, CallbackManager, CallbackEvent } from '@cascadeflow/core';
|
|
251
|
+
|
|
252
|
+
const callbackManager = new CallbackManager(true); // verbose=true
|
|
253
|
+
|
|
254
|
+
// Track query lifecycle
|
|
255
|
+
callbackManager.register(CallbackEvent.QUERY_START, (data) => {
|
|
256
|
+
console.log(`Query started: "${data.query}"`);
|
|
257
|
+
});
|
|
258
|
+
|
|
259
|
+
callbackManager.register(CallbackEvent.COMPLEXITY_DETECTED, (data) => {
|
|
260
|
+
console.log(`Complexity: ${data.data.complexity} (confidence: ${data.data.confidence})`);
|
|
261
|
+
});
|
|
262
|
+
|
|
263
|
+
callbackManager.register(CallbackEvent.DRAFT_ACCEPTED, (data) => {
|
|
264
|
+
console.log(`Draft accepted! Savings: $${data.data.savings}`);
|
|
265
|
+
});
|
|
266
|
+
|
|
267
|
+
const agent = new CascadeAgent({
|
|
268
|
+
models: [...],
|
|
269
|
+
callbacks: callbackManager,
|
|
270
|
+
cascade: { enabled: true },
|
|
271
|
+
});
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
**Available Events:**
|
|
275
|
+
- `QUERY_START` / `QUERY_COMPLETE` - Query lifecycle
|
|
276
|
+
- `COMPLEXITY_DETECTED` - Query complexity analysis
|
|
277
|
+
- `CASCADE_DECISION` - Routing decisions
|
|
278
|
+
- `QUALITY_VALIDATION` - Quality checks
|
|
279
|
+
- `DRAFT_ACCEPTED` / `DRAFT_REJECTED` - Draft outcomes
|
|
280
|
+
- `VERIFIER_CALLED` - Verifier invocations
|
|
281
|
+
|
|
282
|
+
[📖 Full example](examples/nodejs/telemetry-callbacks.ts)
|
|
283
|
+
|
|
284
|
+
### 📦 Batch Processing
|
|
285
|
+
|
|
286
|
+
Process multiple queries with progress tracking and analytics:
|
|
287
|
+
|
|
288
|
+
```typescript
|
|
289
|
+
const queries = [
|
|
290
|
+
'What is TypeScript?',
|
|
291
|
+
'Explain async/await.',
|
|
292
|
+
'What are design patterns?',
|
|
293
|
+
];
|
|
294
|
+
|
|
295
|
+
const batchResult = await agent.runBatch(queries, {
|
|
296
|
+
strategy: BatchStrategy.SEQUENTIAL,
|
|
297
|
+
continueOnError: true,
|
|
298
|
+
onProgress: (completed, total, currentQuery) => {
|
|
299
|
+
console.log(`[${(completed/total*100).toFixed(0)}%] ${completed}/${total}`);
|
|
300
|
+
},
|
|
301
|
+
});
|
|
302
|
+
|
|
303
|
+
// Analyze results
|
|
304
|
+
console.log(`Success rate: ${(batchResult.successCount / queries.length * 100).toFixed(1)}%`);
|
|
305
|
+
console.log(`Total cost: $${batchResult.results.reduce((sum, r) => sum + (r.result?.totalCost || 0), 0)}`);
|
|
306
|
+
console.log(`Draft acceptance: ${batchResult.results.filter(r => r.result?.draftAccepted).length}`);
|
|
307
|
+
```
|
|
308
|
+
|
|
309
|
+
[📖 Full example](examples/nodejs/batch-processing.ts)
|
|
310
|
+
|
|
311
|
+
### 🔀 Router Integration
|
|
312
|
+
|
|
313
|
+
Intelligent routing with complexity analysis and capability filtering:
|
|
314
|
+
|
|
315
|
+
```typescript
|
|
316
|
+
// PreRouter: Automatically routes based on query complexity
|
|
317
|
+
const simpleResult = await agent.run('What is 2 + 2?');
|
|
318
|
+
// → Uses draft model (simple query)
|
|
319
|
+
|
|
320
|
+
const complexResult = await agent.run(
|
|
321
|
+
'Explain quantum computing theory with recent research references.'
|
|
322
|
+
);
|
|
323
|
+
// → Routes directly to best model (complex query)
|
|
324
|
+
|
|
325
|
+
// ToolRouter: Filters to tool-capable models
|
|
326
|
+
// Use a strict parser helper (see examples/nodejs/safe-math.ts).
|
|
327
|
+
const calculatorTool = createTool({
|
|
328
|
+
name: 'calculator',
|
|
329
|
+
description: 'Perform calculations',
|
|
330
|
+
function: async ({ expression }) => safeCalculateExpression(expression),
|
|
331
|
+
});
|
|
332
|
+
|
|
333
|
+
const toolResult = await agent.run('Calculate 125 * 47', {
|
|
334
|
+
tools: [calculatorTool],
|
|
335
|
+
});
|
|
336
|
+
// → Automatically excludes models without tool support
|
|
337
|
+
|
|
338
|
+
// Get router statistics
|
|
339
|
+
const stats = agent.getRouterStats();
|
|
340
|
+
console.log(stats.preRouter); // Complexity-based routing stats
|
|
341
|
+
console.log(stats.toolRouter); // Tool filtering stats
|
|
342
|
+
```
|
|
343
|
+
|
|
344
|
+
[📖 Full example](examples/nodejs/router-integration.ts)
|
|
345
|
+
|
|
346
|
+
### 👤 User Profiles & Workflows
|
|
347
|
+
|
|
348
|
+
Manage user tiers, budgets, and optimization preferences:
|
|
349
|
+
|
|
350
|
+
```typescript
|
|
351
|
+
import { createUserProfile, createWorkflowProfile, TIER_PRESETS, WORKFLOW_PRESETS } from '@cascadeflow/core';
|
|
352
|
+
|
|
353
|
+
// Tier-based profiles
|
|
354
|
+
const freeProfile = createUserProfile({
|
|
355
|
+
tier: TIER_PRESETS.free, // Max budget: $0.01, Quality: 0.60
|
|
356
|
+
});
|
|
357
|
+
|
|
358
|
+
const premiumProfile = createUserProfile({
|
|
359
|
+
tier: TIER_PRESETS.premium, // Max budget: $0.10, Quality: 0.80
|
|
360
|
+
});
|
|
361
|
+
|
|
362
|
+
// Custom profile with optimization weights
|
|
363
|
+
const customProfile = createUserProfile({
|
|
364
|
+
tier: { name: 'custom', maxBudget: 0.05, qualityThreshold: 0.75 },
|
|
365
|
+
optimizationWeights: {
|
|
366
|
+
cost: 0.5, // 50% weight on cost
|
|
367
|
+
speed: 0.3, // 30% weight on speed
|
|
368
|
+
quality: 0.2, // 20% weight on quality
|
|
369
|
+
},
|
|
370
|
+
});
|
|
371
|
+
|
|
372
|
+
// Latency-aware profiles
|
|
373
|
+
const lowLatencyProfile = createUserProfile({
|
|
374
|
+
tier: TIER_PRESETS.premium,
|
|
375
|
+
latencyProfile: {
|
|
376
|
+
maxTotalMs: 2000, // 2 second total limit
|
|
377
|
+
maxPerModelMs: 1000, // 1 second per model
|
|
378
|
+
preferParallel: true, // Prefer parallel execution
|
|
379
|
+
skipCascadeThreshold: 1500,
|
|
380
|
+
},
|
|
381
|
+
});
|
|
382
|
+
|
|
383
|
+
// Use with agent
|
|
384
|
+
const agent = new CascadeAgent({
|
|
385
|
+
models: [...],
|
|
386
|
+
profile: premiumProfile,
|
|
387
|
+
cascade: { enabled: true },
|
|
388
|
+
});
|
|
389
|
+
```
|
|
390
|
+
|
|
391
|
+
**Workflow Presets:**
|
|
392
|
+
- `WORKFLOW_PRESETS.production` - High quality, reasonable latency
|
|
393
|
+
- `WORKFLOW_PRESETS.realtime` - Ultra-low latency, single model
|
|
394
|
+
- `WORKFLOW_PRESETS.batch` - Maximum throughput, relaxed constraints
|
|
395
|
+
|
|
396
|
+
[📖 Full example](examples/nodejs/user-profiles-workflows.ts)
|
|
397
|
+
|
|
398
|
+
### 🏭 Factory Methods
|
|
399
|
+
|
|
400
|
+
Simplified agent creation with auto-configuration:
|
|
401
|
+
|
|
402
|
+
```typescript
|
|
403
|
+
import { CascadeAgent } from '@cascadeflow/core';
|
|
404
|
+
|
|
405
|
+
// Auto-detect providers from environment variables
|
|
406
|
+
const envAgent = CascadeAgent.fromEnv({
|
|
407
|
+
quality: 'production', // 'strict' | 'production' | 'development'
|
|
408
|
+
});
|
|
409
|
+
// Checks for: OPENAI_API_KEY, ANTHROPIC_API_KEY, GROQ_API_KEY, etc.
|
|
410
|
+
|
|
411
|
+
// Create from user profile
|
|
412
|
+
const profileAgent = CascadeAgent.fromProfile({
|
|
413
|
+
profile: premiumProfile,
|
|
414
|
+
preferredModels: ['gpt-4o-mini', 'gpt-4o', 'claude-3-5-haiku-20241022'],
|
|
415
|
+
});
|
|
416
|
+
|
|
417
|
+
// Traditional manual configuration (full control)
|
|
418
|
+
const manualAgent = new CascadeAgent({
|
|
419
|
+
models: [
|
|
420
|
+
{ name: 'gpt-4o-mini', provider: 'openai', cost: 0.00015 },
|
|
421
|
+
{ name: 'gpt-4o', provider: 'openai', cost: 0.00625 },
|
|
422
|
+
],
|
|
423
|
+
cascade: { enabled: true },
|
|
424
|
+
});
|
|
425
|
+
```
|
|
426
|
+
|
|
427
|
+
**Benefits:**
|
|
428
|
+
- `fromEnv()`: Auto-detects available providers, sensible defaults
|
|
429
|
+
- `fromProfile()`: Multi-tenant applications, per-user configuration
|
|
430
|
+
- Manual config: Full control for production requirements
|
|
431
|
+
|
|
432
|
+
[📖 Full example](examples/nodejs/factory-methods.ts)
|
|
433
|
+
|
|
434
|
+
### 🌊 Enhanced Streaming
|
|
435
|
+
|
|
436
|
+
Event-driven streaming with real-time progress:
|
|
437
|
+
|
|
438
|
+
```typescript
|
|
439
|
+
const stream = agent.streamEvents('What is TypeScript?', {
|
|
440
|
+
forceDirect: true,
|
|
441
|
+
});
|
|
442
|
+
|
|
443
|
+
for await (const event of stream) {
|
|
444
|
+
switch (event.type) {
|
|
445
|
+
case StreamEventType.START:
|
|
446
|
+
console.log(`Streaming from: ${event.data.model}`);
|
|
447
|
+
break;
|
|
448
|
+
case StreamEventType.CHUNK:
|
|
449
|
+
process.stdout.write(event.data.content);
|
|
450
|
+
break;
|
|
451
|
+
case StreamEventType.COMPLETE:
|
|
452
|
+
console.log(`\nCost: $${event.data.totalCost?.toFixed(6)}`);
|
|
453
|
+
console.log(`Time: ${event.data.timing?.total}ms`);
|
|
454
|
+
break;
|
|
455
|
+
case StreamEventType.ERROR:
|
|
456
|
+
console.error(`Error: ${event.data.error}`);
|
|
457
|
+
break;
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
// Or collect the full result
|
|
462
|
+
import { collectResult } from '@cascadeflow/core';
|
|
463
|
+
const result = await collectResult(stream);
|
|
464
|
+
console.log(`Content: ${result.content}`);
|
|
465
|
+
console.log(`Model: ${result.modelUsed}`);
|
|
466
|
+
```
|
|
467
|
+
|
|
468
|
+
**Use Cases:**
|
|
469
|
+
- Interactive chat applications
|
|
470
|
+
- Real-time content generation
|
|
471
|
+
- Progressive content display
|
|
472
|
+
- Long-form content (articles, essays)
|
|
473
|
+
|
|
474
|
+
[📖 Full example](examples/nodejs/enhanced-streaming.ts)
|
|
475
|
+
|
|
476
|
+
## Features
|
|
477
|
+
|
|
478
|
+
- 🎯 **Smart Cascading**: Automatically tries smaller models first
|
|
479
|
+
- 💰 **Cost Optimization**: Save 40-85% on LLM costs
|
|
480
|
+
- ⚡ **Fast**: 2-10x faster responses with small models
|
|
481
|
+
- 🔀 **Multi-Provider**: OpenAI, Anthropic, Groq, and more
|
|
482
|
+
- ✅ **Quality Validation**: Multi-signal confidence with semantic analysis
|
|
483
|
+
- 📊 **Telemetry**: Event-driven monitoring with callbacks
|
|
484
|
+
- 📦 **Batch Processing**: Sequential processing with analytics
|
|
485
|
+
- 🔀 **Intelligent Routing**: Complexity-based and capability-aware
|
|
486
|
+
- 👤 **User Profiles**: Tier-based access control and budgets
|
|
487
|
+
- 🌊 **Enhanced Streaming**: Event-driven streaming with progress
|
|
488
|
+
- 🏭 **Factory Methods**: Simplified setup with auto-configuration
|
|
489
|
+
- 📈 **Cost Tracking**: Detailed metrics and savings analysis
|
|
490
|
+
|
|
491
|
+
## Examples
|
|
492
|
+
|
|
493
|
+
All examples are available in the [`examples/nodejs`](examples/nodejs) directory:
|
|
494
|
+
|
|
495
|
+
- [**quality-profiles.ts**](examples/nodejs/quality-profiles.ts) - Quality validation profiles (strict, production, development, cascade, permissive)
|
|
496
|
+
- [**telemetry-callbacks.ts**](examples/nodejs/telemetry-callbacks.ts) - Event-driven monitoring and callbacks
|
|
497
|
+
- [**batch-processing.ts**](examples/nodejs/batch-processing.ts) - Batch processing with progress tracking
|
|
498
|
+
- [**router-integration.ts**](examples/nodejs/router-integration.ts) - PreRouter and ToolRouter integration
|
|
499
|
+
- [**user-profiles-workflows.ts**](examples/nodejs/user-profiles-workflows.ts) - User profiles, tiers, and workflows
|
|
500
|
+
- [**factory-methods.ts**](examples/nodejs/factory-methods.ts) - Factory methods (fromEnv, fromProfile)
|
|
501
|
+
- [**enhanced-streaming.ts**](examples/nodejs/enhanced-streaming.ts) - Enhanced streaming with events
|
|
502
|
+
|
|
503
|
+
Run any example with:
|
|
504
|
+
```bash
|
|
505
|
+
npx tsx examples/nodejs/<example-name>.ts
|
|
506
|
+
```
|
|
507
|
+
|
|
508
|
+
## Documentation
|
|
509
|
+
|
|
510
|
+
See the [main cascadeflow documentation](https://github.com/lemony-ai/cascadeflow) for complete guides and examples.
|
|
511
|
+
|
|
512
|
+
## License
|
|
513
|
+
|
|
514
|
+
MIT © Lemony Inc.
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import {
|
|
2
|
+
BatchProcessingError,
|
|
3
|
+
BatchProcessor,
|
|
4
|
+
BatchStrategy,
|
|
5
|
+
DEFAULT_BATCH_CONFIG,
|
|
6
|
+
normalizeBatchConfig
|
|
7
|
+
} from "./chunk-XESOO5EG.mjs";
|
|
8
|
+
import "./chunk-XGB3TDIC.mjs";
|
|
9
|
+
export {
|
|
10
|
+
BatchProcessingError,
|
|
11
|
+
BatchProcessor,
|
|
12
|
+
BatchStrategy,
|
|
13
|
+
DEFAULT_BATCH_CONFIG,
|
|
14
|
+
normalizeBatchConfig
|
|
15
|
+
};
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
// src/quality-semantic.ts
|
|
2
|
+
var SemanticQualityChecker = class {
|
|
3
|
+
/**
|
|
4
|
+
* Create a new semantic quality checker
|
|
5
|
+
*
|
|
6
|
+
* @param similarityThreshold - Minimum similarity score to pass (0-1, default: 0.5)
|
|
7
|
+
* @param embedder - Optional pre-configured embedder instance
|
|
8
|
+
* @param useCache - Whether to use request-scoped caching (default: true)
|
|
9
|
+
*/
|
|
10
|
+
constructor(similarityThreshold = 0.5, embedder, useCache = true) {
|
|
11
|
+
this.similarityThreshold = similarityThreshold;
|
|
12
|
+
this.useCache = useCache;
|
|
13
|
+
this._available = false;
|
|
14
|
+
this.initializeAttempted = false;
|
|
15
|
+
this.initPromise = this.initializeEmbedder(embedder);
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Initialize the embedder (lazy loading)
|
|
19
|
+
*
|
|
20
|
+
* This defers the ~200-500ms model load time until first use,
|
|
21
|
+
* and allows the checker to remain available even if @cascadeflow/ml
|
|
22
|
+
* is not installed.
|
|
23
|
+
*/
|
|
24
|
+
async initializeEmbedder(embedder) {
|
|
25
|
+
if (this.initializeAttempted) {
|
|
26
|
+
return;
|
|
27
|
+
}
|
|
28
|
+
this.initializeAttempted = true;
|
|
29
|
+
try {
|
|
30
|
+
const ml = await import("@cascadeflow/ml");
|
|
31
|
+
const { UnifiedEmbeddingService, EmbeddingCache } = ml;
|
|
32
|
+
if (embedder) {
|
|
33
|
+
this.embedder = embedder;
|
|
34
|
+
} else {
|
|
35
|
+
this.embedder = new UnifiedEmbeddingService();
|
|
36
|
+
}
|
|
37
|
+
this._available = await this.embedder.isAvailable();
|
|
38
|
+
if (this.useCache && this._available && this.embedder) {
|
|
39
|
+
this.cache = new EmbeddingCache(this.embedder);
|
|
40
|
+
}
|
|
41
|
+
if (this._available) {
|
|
42
|
+
console.log("\u2713 Semantic quality checking enabled (UnifiedEmbeddingService)");
|
|
43
|
+
}
|
|
44
|
+
} catch (error) {
|
|
45
|
+
const err = error;
|
|
46
|
+
if (err?.code === "ERR_MODULE_NOT_FOUND" || err?.message?.includes("Cannot find module")) {
|
|
47
|
+
console.warn(
|
|
48
|
+
"@cascadeflow/ml not available. Install with: npm install @cascadeflow/ml @huggingface/transformers"
|
|
49
|
+
);
|
|
50
|
+
} else {
|
|
51
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
52
|
+
console.error(`Failed to initialize semantic quality checker: ${message}`);
|
|
53
|
+
}
|
|
54
|
+
this._available = false;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Check if semantic quality checking is available
|
|
59
|
+
*
|
|
60
|
+
* @returns Promise resolving to true if ML embeddings are available
|
|
61
|
+
*/
|
|
62
|
+
async isAvailable() {
|
|
63
|
+
if (this.initPromise) {
|
|
64
|
+
await this.initPromise;
|
|
65
|
+
}
|
|
66
|
+
return this._available;
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Check semantic similarity between query and response
|
|
70
|
+
*
|
|
71
|
+
* Uses cosine similarity of embeddings to measure how well the
|
|
72
|
+
* response answers the query. Higher scores indicate better alignment.
|
|
73
|
+
*
|
|
74
|
+
* @param query - User's query/question
|
|
75
|
+
* @param response - Model's response
|
|
76
|
+
* @returns Semantic quality result with similarity score and pass/fail
|
|
77
|
+
*/
|
|
78
|
+
async checkSimilarity(query, response) {
|
|
79
|
+
if (!await this.isAvailable() || !this.embedder) {
|
|
80
|
+
return {
|
|
81
|
+
similarity: 0,
|
|
82
|
+
passed: false,
|
|
83
|
+
reason: "Semantic checking not available (ML dependencies not installed)",
|
|
84
|
+
metadata: {
|
|
85
|
+
available: false,
|
|
86
|
+
threshold: this.similarityThreshold
|
|
87
|
+
}
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
try {
|
|
91
|
+
const similarity = this.cache ? await this.cache.similarity(query, response) : await this.embedder.similarity(query, response);
|
|
92
|
+
if (similarity === null) {
|
|
93
|
+
return {
|
|
94
|
+
similarity: 0,
|
|
95
|
+
passed: false,
|
|
96
|
+
reason: "Failed to compute embeddings",
|
|
97
|
+
metadata: {
|
|
98
|
+
error: "embedding_failed",
|
|
99
|
+
threshold: this.similarityThreshold
|
|
100
|
+
}
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
const passed = similarity >= this.similarityThreshold;
|
|
104
|
+
return {
|
|
105
|
+
similarity,
|
|
106
|
+
passed,
|
|
107
|
+
reason: passed ? void 0 : `Similarity ${similarity.toFixed(2)} below threshold ${this.similarityThreshold}`,
|
|
108
|
+
metadata: {
|
|
109
|
+
threshold: this.similarityThreshold,
|
|
110
|
+
cached: this.cache !== void 0
|
|
111
|
+
}
|
|
112
|
+
};
|
|
113
|
+
} catch (error) {
|
|
114
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
115
|
+
console.error(`Error in semantic similarity check: ${message}`);
|
|
116
|
+
return {
|
|
117
|
+
similarity: 0,
|
|
118
|
+
passed: false,
|
|
119
|
+
reason: `Error: ${message}`,
|
|
120
|
+
metadata: {
|
|
121
|
+
error: message,
|
|
122
|
+
threshold: this.similarityThreshold
|
|
123
|
+
}
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* Clear the embedding cache (if caching is enabled)
|
|
129
|
+
*
|
|
130
|
+
* Call this at the end of a request to free memory.
|
|
131
|
+
*/
|
|
132
|
+
clearCache() {
|
|
133
|
+
if (this.cache) {
|
|
134
|
+
this.cache.clear();
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
/**
|
|
138
|
+
* Get cache statistics (if caching is enabled)
|
|
139
|
+
*
|
|
140
|
+
* @returns Cache info with size and sample texts
|
|
141
|
+
*/
|
|
142
|
+
getCacheInfo() {
|
|
143
|
+
return this.cache?.cacheInfo() || { size: 0, texts: [] };
|
|
144
|
+
}
|
|
145
|
+
};
|
|
146
|
+
|
|
147
|
+
export {
|
|
148
|
+
SemanticQualityChecker
|
|
149
|
+
};
|