cascadeflow-core-smr 1.1.1-smr.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,514 @@
1
+ <div align="center">
2
+
3
+ <picture>
4
+ <source media="(prefers-color-scheme: dark)" srcset="../../.github/assets/CF_logo_bright.svg">
5
+ <source media="(prefers-color-scheme: light)" srcset="../../.github/assets/CF_logo_dark.svg">
6
+ <img alt="cascadeflow Logo" src="../../.github/assets/CF_logo_dark.svg" width="80%" style="margin: 20px auto;">
7
+ </picture>
8
+
9
+ # @cascadeflow/core
10
+
11
+ [![npm version](https://img.shields.io/npm/v/@cascadeflow/core?color=red&label=npm)](https://www.npmjs.com/package/@cascadeflow/core)
12
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](../../LICENSE)
13
+ [![TypeScript](https://img.shields.io/badge/TypeScript-5.0+-blue.svg)](https://www.typescriptlang.org/)
14
+ [![Tests](https://github.com/lemony-ai/cascadeflow/actions/workflows/test.yml/badge.svg)](https://github.com/lemony-ai/cascadeflow/actions/workflows/test.yml)
15
+
16
+ **<img src="../../.github/assets/CF_ts_color.svg" width="22" height="22" alt="TypeScript" style="vertical-align: middle;"/> TypeScript/JavaScript library for cascadeflow**
17
+
18
+ </div>
19
+
20
+ ---
21
+
22
+ **Smart AI model cascading for cost optimization.**
23
+
24
+ Save 40-85% on LLM costs with intelligent model routing. Available for Node.js, browser, and edge environments.
25
+
26
+ ## Installation
27
+
28
+ ```bash
29
+ npm install @cascadeflow/core
30
+ # or
31
+ pnpm add @cascadeflow/core
32
+ # or
33
+ yarn add @cascadeflow/core
34
+ ```
35
+
36
+ ## Harness Quick Start (V2.1)
37
+
38
+ ```typescript
39
+ import { cascadeflow } from '@cascadeflow/core';
40
+
41
+ // 1) Turn on in-process harness decisions + SDK auto-instrumentation
42
+ cascadeflow.init({ mode: 'enforce', budget: 0.5 });
43
+
44
+ // 2) Scope one run (global defaults are inherited)
45
+ const result = await cascadeflow.run({ maxToolCalls: 8 }, async (run) => {
46
+ // Any OpenAI / Anthropic SDK calls made here are evaluated by the harness.
47
+ return { runId: run.runId };
48
+ });
49
+
50
+ console.log(result);
51
+ ```
52
+
53
+ ## Quick Start
54
+
55
+ ### Recommended Setup (Claude Haiku + GPT-5)
56
+
57
+ ```typescript
58
+ import { CascadeAgent } from '@cascadeflow/core';
59
+
60
+ const agent = new CascadeAgent({
61
+ models: [
62
+ {
63
+ name: 'claude-haiku-4-5',
64
+ provider: 'anthropic',
65
+ cost: 0.001 // Fast, high-quality drafter
66
+ },
67
+ {
68
+ name: 'gpt-5',
69
+ provider: 'openai',
70
+ cost: 0.00125 // Superior reasoning verifier (50% cheaper than GPT-4o!)
71
+ }
72
+ ]
73
+ });
74
+
75
+ const result = await agent.run('What is artificial intelligence?');
76
+
77
+ console.log(result.content);
78
+ console.log(`Cost: $${result.totalCost}`);
79
+ console.log(`Savings: ${result.savingsPercentage}%`);
80
+ ```
81
+
82
+ ### Quality Configuration
83
+
84
+ Control when the cascade uses the drafter vs. verifier with quality thresholds:
85
+
86
+ ```typescript
87
+ // Recommended: Complexity-aware thresholds
88
+ const agent = new CascadeAgent({
89
+ models: [
90
+ { name: 'claude-haiku-4-5', provider: 'anthropic', cost: 0.001 },
91
+ { name: 'gpt-5', provider: 'openai', cost: 0.00125 }
92
+ ],
93
+ quality: {
94
+ confidenceThresholds: {
95
+ simple: 0.6, // "What is Python?" - Accept 60%+ confidence
96
+ moderate: 0.7, // "Compare Python vs Java" - Accept 70%+
97
+ hard: 0.8, // "Analyze quantum computing" - Accept 80%+
98
+ expert: 0.85 // "Implement distributed cache" - Accept 85%+
99
+ }
100
+ }
101
+ });
102
+ ```
103
+
104
+ **Quick Configuration Options:**
105
+
106
+ ```typescript
107
+ // Option 1: Use CASCADE_QUALITY_CONFIG (optimized for 50-60% acceptance)
108
+ import { CascadeAgent, CASCADE_QUALITY_CONFIG } from '@cascadeflow/core';
109
+ const agent = new CascadeAgent({
110
+ models: [...],
111
+ quality: CASCADE_QUALITY_CONFIG // Lower threshold (0.40) = more cost savings
112
+ });
113
+
114
+ // Option 2: Simple flat threshold
115
+ const agent = new CascadeAgent({
116
+ models: [...],
117
+ quality: {
118
+ threshold: 0.7, // 70% confidence required (default)
119
+ requireMinimumTokens: 10 // Minimum response length
120
+ }
121
+ });
122
+
123
+ // Option 3: Use defaults (no quality config needed)
124
+ const agent = new CascadeAgent({
125
+ models: [...]
126
+ // Automatically uses threshold: 0.7
127
+ });
128
+ ```
129
+
130
+ **When to adjust:**
131
+ - **Lower thresholds (0.4-0.6)**: More drafts accepted → higher cost savings, slightly lower quality
132
+ - **Higher thresholds (0.8-0.9)**: Fewer drafts accepted → lower savings, maximum quality
133
+ - **Complexity-aware**: Best balance → adjusts automatically based on query difficulty
134
+
135
+ > **⚠️ GPT-5 Requires Organization Verification**
136
+ >
137
+ > To use GPT-5, your OpenAI organization must be verified:
138
+ > 1. Go to https://platform.openai.com/settings/organization/general
139
+ > 2. Click "Verify Organization"
140
+ > 3. Wait ~15 minutes for access to propagate
141
+ >
142
+ > **Works immediately:** The cascade above works right away! Claude Haiku handles 75% of queries, GPT-5 only called when needed.
143
+
144
+ > **📝 Model Naming**
145
+ >
146
+ > Both naming conventions work with CascadeFlow:
147
+ > - `claude-haiku-4-5` (used in presets, recommended)
148
+ > - `claude-3-5-haiku-20241022` (Anthropic API format)
149
+ >
150
+ > The library accepts both formats and routes them correctly.
151
+
152
+ ### OpenAI Only
153
+
154
+ ```typescript
155
+ const agent = new CascadeAgent({
156
+ models: [
157
+ { name: 'gpt-4o-mini', provider: 'openai', cost: 0.00015 },
158
+ { name: 'gpt-5', provider: 'openai', cost: 0.00125 } // Requires org verification
159
+ ]
160
+ });
161
+ ```
162
+
163
+ ### Even Easier: Use Presets
164
+
165
+ **No configuration needed** - just import a preset and go:
166
+
167
+ ```typescript
168
+ import { CascadeAgent, PRESET_ULTRA_FAST, PRESET_BEST_OVERALL } from '@cascadeflow/core';
169
+
170
+ // Ultra-fast with Groq (5-10x faster than OpenAI)
171
+ const agent = new CascadeAgent(PRESET_ULTRA_FAST);
172
+
173
+ // Or best overall (Claude Haiku + GPT-4o-mini)
174
+ const agent = new CascadeAgent(PRESET_BEST_OVERALL);
175
+
176
+ const result = await agent.run('Your query here');
177
+ ```
178
+
179
+ **Available Presets:**
180
+
181
+ | Preset | Best For | Speed | Cost/Query | API Keys |
182
+ |--------|----------|-------|-----------|----------|
183
+ | `PRESET_BEST_OVERALL` | Most use cases | Fast (~2-3s) | ~$0.0008 | Anthropic + OpenAI |
184
+ | `PRESET_ULTRA_FAST` | Real-time apps | Ultra-fast (~1-2s) | ~$0.0002 | Groq |
185
+ | `PRESET_ULTRA_CHEAP` | High volume | Very fast (~1-3s) | ~$0.00008 | Groq + OpenAI |
186
+ | `PRESET_OPENAI_ONLY` | Single provider | Fast (~2-4s) | ~$0.0004 | OpenAI |
187
+ | `PRESET_ANTHROPIC_ONLY` | Claude fans | Fast (~2-3s) | ~$0.002 | Anthropic |
188
+ | `PRESET_FREE_LOCAL` | Privacy/offline | Moderate (~3-5s) | $0 (free) | None (Ollama) |
189
+
190
+ **Custom Presets:**
191
+
192
+ ```typescript
193
+ import { CascadeAgent, createPreset } from '@cascadeflow/core';
194
+
195
+ const agent = new CascadeAgent(
196
+ createPreset({
197
+ quality: 'strict', // 'cost-optimized' | 'balanced' | 'strict'
198
+ performance: 'fast', // 'fast' | 'balanced' | 'reliable'
199
+ includePremium: true // Add premium tier (gpt-4o)
200
+ })
201
+ );
202
+ ```
203
+
204
+ ## Advanced Features
205
+
206
+ ### 🎚️ Quality Profiles
207
+
208
+ Control quality validation with predefined profiles optimized for different use cases:
209
+
210
+ ```typescript
211
+ import { CascadeAgent, QualityValidator } from '@cascadeflow/core';
212
+
213
+ // Strict Mode: Maximum quality with semantic validation
214
+ const strictAgent = new CascadeAgent({
215
+ models: [...],
216
+ cascade: {
217
+ enabled: true,
218
+ qualityConfig: {
219
+ useProductionConfidence: true,
220
+ strictMode: true,
221
+ useSemanticValidation: true,
222
+ minConfidence: 0.85,
223
+ provider: 'openai',
224
+ },
225
+ },
226
+ });
227
+
228
+ // Or use factory methods
229
+ const strictValidator = QualityValidator.strict();
230
+ const prodValidator = QualityValidator.forProduction(); // Multi-signal confidence
231
+ const devValidator = QualityValidator.forDevelopment(); // Lenient for testing
232
+ const cascadeValidator = QualityValidator.forCascade(); // Optimized for 50-60% acceptance
233
+ const permissiveValidator = QualityValidator.permissive(); // Maximum throughput
234
+ ```
235
+
236
+ **Available Profiles:**
237
+ - **Strict**: 85% confidence + semantic validation (maximum quality)
238
+ - **Production**: 70% confidence with multi-signal estimation (balanced)
239
+ - **Development**: 50% confidence, minimal word count (fast iteration)
240
+ - **Cascade**: 40% confidence, optimized for cost savings (50-60% draft acceptance)
241
+ - **Permissive**: 30% confidence, maximum throughput (highest savings)
242
+
243
+ [📖 Full example](examples/nodejs/quality-profiles.ts)
244
+
245
+ ### 📡 Telemetry & Callbacks
246
+
247
+ Monitor cascade operations with event-driven callbacks:
248
+
249
+ ```typescript
250
+ import { CascadeAgent, CallbackManager, CallbackEvent } from '@cascadeflow/core';
251
+
252
+ const callbackManager = new CallbackManager(true); // verbose=true
253
+
254
+ // Track query lifecycle
255
+ callbackManager.register(CallbackEvent.QUERY_START, (data) => {
256
+ console.log(`Query started: "${data.query}"`);
257
+ });
258
+
259
+ callbackManager.register(CallbackEvent.COMPLEXITY_DETECTED, (data) => {
260
+ console.log(`Complexity: ${data.data.complexity} (confidence: ${data.data.confidence})`);
261
+ });
262
+
263
+ callbackManager.register(CallbackEvent.DRAFT_ACCEPTED, (data) => {
264
+ console.log(`Draft accepted! Savings: $${data.data.savings}`);
265
+ });
266
+
267
+ const agent = new CascadeAgent({
268
+ models: [...],
269
+ callbacks: callbackManager,
270
+ cascade: { enabled: true },
271
+ });
272
+ ```
273
+
274
+ **Available Events:**
275
+ - `QUERY_START` / `QUERY_COMPLETE` - Query lifecycle
276
+ - `COMPLEXITY_DETECTED` - Query complexity analysis
277
+ - `CASCADE_DECISION` - Routing decisions
278
+ - `QUALITY_VALIDATION` - Quality checks
279
+ - `DRAFT_ACCEPTED` / `DRAFT_REJECTED` - Draft outcomes
280
+ - `VERIFIER_CALLED` - Verifier invocations
281
+
282
+ [📖 Full example](examples/nodejs/telemetry-callbacks.ts)
283
+
284
+ ### 📦 Batch Processing
285
+
286
+ Process multiple queries with progress tracking and analytics:
287
+
288
+ ```typescript
289
+ const queries = [
290
+ 'What is TypeScript?',
291
+ 'Explain async/await.',
292
+ 'What are design patterns?',
293
+ ];
294
+
295
+ const batchResult = await agent.runBatch(queries, {
296
+ strategy: BatchStrategy.SEQUENTIAL,
297
+ continueOnError: true,
298
+ onProgress: (completed, total, currentQuery) => {
299
+ console.log(`[${(completed/total*100).toFixed(0)}%] ${completed}/${total}`);
300
+ },
301
+ });
302
+
303
+ // Analyze results
304
+ console.log(`Success rate: ${(batchResult.successCount / queries.length * 100).toFixed(1)}%`);
305
+ console.log(`Total cost: $${batchResult.results.reduce((sum, r) => sum + (r.result?.totalCost || 0), 0)}`);
306
+ console.log(`Draft acceptance: ${batchResult.results.filter(r => r.result?.draftAccepted).length}`);
307
+ ```
308
+
309
+ [📖 Full example](examples/nodejs/batch-processing.ts)
310
+
311
+ ### 🔀 Router Integration
312
+
313
+ Intelligent routing with complexity analysis and capability filtering:
314
+
315
+ ```typescript
316
+ // PreRouter: Automatically routes based on query complexity
317
+ const simpleResult = await agent.run('What is 2 + 2?');
318
+ // → Uses draft model (simple query)
319
+
320
+ const complexResult = await agent.run(
321
+ 'Explain quantum computing theory with recent research references.'
322
+ );
323
+ // → Routes directly to best model (complex query)
324
+
325
+ // ToolRouter: Filters to tool-capable models
326
+ // Use a strict parser helper (see examples/nodejs/safe-math.ts).
327
+ const calculatorTool = createTool({
328
+ name: 'calculator',
329
+ description: 'Perform calculations',
330
+ function: async ({ expression }) => safeCalculateExpression(expression),
331
+ });
332
+
333
+ const toolResult = await agent.run('Calculate 125 * 47', {
334
+ tools: [calculatorTool],
335
+ });
336
+ // → Automatically excludes models without tool support
337
+
338
+ // Get router statistics
339
+ const stats = agent.getRouterStats();
340
+ console.log(stats.preRouter); // Complexity-based routing stats
341
+ console.log(stats.toolRouter); // Tool filtering stats
342
+ ```
343
+
344
+ [📖 Full example](examples/nodejs/router-integration.ts)
345
+
346
+ ### 👤 User Profiles & Workflows
347
+
348
+ Manage user tiers, budgets, and optimization preferences:
349
+
350
+ ```typescript
351
+ import { createUserProfile, createWorkflowProfile, TIER_PRESETS, WORKFLOW_PRESETS } from '@cascadeflow/core';
352
+
353
+ // Tier-based profiles
354
+ const freeProfile = createUserProfile({
355
+ tier: TIER_PRESETS.free, // Max budget: $0.01, Quality: 0.60
356
+ });
357
+
358
+ const premiumProfile = createUserProfile({
359
+ tier: TIER_PRESETS.premium, // Max budget: $0.10, Quality: 0.80
360
+ });
361
+
362
+ // Custom profile with optimization weights
363
+ const customProfile = createUserProfile({
364
+ tier: { name: 'custom', maxBudget: 0.05, qualityThreshold: 0.75 },
365
+ optimizationWeights: {
366
+ cost: 0.5, // 50% weight on cost
367
+ speed: 0.3, // 30% weight on speed
368
+ quality: 0.2, // 20% weight on quality
369
+ },
370
+ });
371
+
372
+ // Latency-aware profiles
373
+ const lowLatencyProfile = createUserProfile({
374
+ tier: TIER_PRESETS.premium,
375
+ latencyProfile: {
376
+ maxTotalMs: 2000, // 2 second total limit
377
+ maxPerModelMs: 1000, // 1 second per model
378
+ preferParallel: true, // Prefer parallel execution
379
+ skipCascadeThreshold: 1500,
380
+ },
381
+ });
382
+
383
+ // Use with agent
384
+ const agent = new CascadeAgent({
385
+ models: [...],
386
+ profile: premiumProfile,
387
+ cascade: { enabled: true },
388
+ });
389
+ ```
390
+
391
+ **Workflow Presets:**
392
+ - `WORKFLOW_PRESETS.production` - High quality, reasonable latency
393
+ - `WORKFLOW_PRESETS.realtime` - Ultra-low latency, single model
394
+ - `WORKFLOW_PRESETS.batch` - Maximum throughput, relaxed constraints
395
+
396
+ [📖 Full example](examples/nodejs/user-profiles-workflows.ts)
397
+
398
+ ### 🏭 Factory Methods
399
+
400
+ Simplified agent creation with auto-configuration:
401
+
402
+ ```typescript
403
+ import { CascadeAgent } from '@cascadeflow/core';
404
+
405
+ // Auto-detect providers from environment variables
406
+ const envAgent = CascadeAgent.fromEnv({
407
+ quality: 'production', // 'strict' | 'production' | 'development'
408
+ });
409
+ // Checks for: OPENAI_API_KEY, ANTHROPIC_API_KEY, GROQ_API_KEY, etc.
410
+
411
+ // Create from user profile
412
+ const profileAgent = CascadeAgent.fromProfile({
413
+ profile: premiumProfile,
414
+ preferredModels: ['gpt-4o-mini', 'gpt-4o', 'claude-3-5-haiku-20241022'],
415
+ });
416
+
417
+ // Traditional manual configuration (full control)
418
+ const manualAgent = new CascadeAgent({
419
+ models: [
420
+ { name: 'gpt-4o-mini', provider: 'openai', cost: 0.00015 },
421
+ { name: 'gpt-4o', provider: 'openai', cost: 0.00625 },
422
+ ],
423
+ cascade: { enabled: true },
424
+ });
425
+ ```
426
+
427
+ **Benefits:**
428
+ - `fromEnv()`: Auto-detects available providers, sensible defaults
429
+ - `fromProfile()`: Multi-tenant applications, per-user configuration
430
+ - Manual config: Full control for production requirements
431
+
432
+ [📖 Full example](examples/nodejs/factory-methods.ts)
433
+
434
+ ### 🌊 Enhanced Streaming
435
+
436
+ Event-driven streaming with real-time progress:
437
+
438
+ ```typescript
439
+ const stream = agent.streamEvents('What is TypeScript?', {
440
+ forceDirect: true,
441
+ });
442
+
443
+ for await (const event of stream) {
444
+ switch (event.type) {
445
+ case StreamEventType.START:
446
+ console.log(`Streaming from: ${event.data.model}`);
447
+ break;
448
+ case StreamEventType.CHUNK:
449
+ process.stdout.write(event.data.content);
450
+ break;
451
+ case StreamEventType.COMPLETE:
452
+ console.log(`\nCost: $${event.data.totalCost?.toFixed(6)}`);
453
+ console.log(`Time: ${event.data.timing?.total}ms`);
454
+ break;
455
+ case StreamEventType.ERROR:
456
+ console.error(`Error: ${event.data.error}`);
457
+ break;
458
+ }
459
+ }
460
+
461
+ // Or collect the full result
462
+ import { collectResult } from '@cascadeflow/core';
463
+ const result = await collectResult(stream);
464
+ console.log(`Content: ${result.content}`);
465
+ console.log(`Model: ${result.modelUsed}`);
466
+ ```
467
+
468
+ **Use Cases:**
469
+ - Interactive chat applications
470
+ - Real-time content generation
471
+ - Progressive content display
472
+ - Long-form content (articles, essays)
473
+
474
+ [📖 Full example](examples/nodejs/enhanced-streaming.ts)
475
+
476
+ ## Features
477
+
478
+ - 🎯 **Smart Cascading**: Automatically tries smaller models first
479
+ - 💰 **Cost Optimization**: Save 40-85% on LLM costs
480
+ - ⚡ **Fast**: 2-10x faster responses with small models
481
+ - 🔀 **Multi-Provider**: OpenAI, Anthropic, Groq, and more
482
+ - ✅ **Quality Validation**: Multi-signal confidence with semantic analysis
483
+ - 📊 **Telemetry**: Event-driven monitoring with callbacks
484
+ - 📦 **Batch Processing**: Sequential processing with analytics
485
+ - 🔀 **Intelligent Routing**: Complexity-based and capability-aware
486
+ - 👤 **User Profiles**: Tier-based access control and budgets
487
+ - 🌊 **Enhanced Streaming**: Event-driven streaming with progress
488
+ - 🏭 **Factory Methods**: Simplified setup with auto-configuration
489
+ - 📈 **Cost Tracking**: Detailed metrics and savings analysis
490
+
491
+ ## Examples
492
+
493
+ All examples are available in the [`examples/nodejs`](examples/nodejs) directory:
494
+
495
+ - [**quality-profiles.ts**](examples/nodejs/quality-profiles.ts) - Quality validation profiles (strict, production, development, cascade, permissive)
496
+ - [**telemetry-callbacks.ts**](examples/nodejs/telemetry-callbacks.ts) - Event-driven monitoring and callbacks
497
+ - [**batch-processing.ts**](examples/nodejs/batch-processing.ts) - Batch processing with progress tracking
498
+ - [**router-integration.ts**](examples/nodejs/router-integration.ts) - PreRouter and ToolRouter integration
499
+ - [**user-profiles-workflows.ts**](examples/nodejs/user-profiles-workflows.ts) - User profiles, tiers, and workflows
500
+ - [**factory-methods.ts**](examples/nodejs/factory-methods.ts) - Factory methods (fromEnv, fromProfile)
501
+ - [**enhanced-streaming.ts**](examples/nodejs/enhanced-streaming.ts) - Enhanced streaming with events
502
+
503
+ Run any example with:
504
+ ```bash
505
+ npx tsx examples/nodejs/<example-name>.ts
506
+ ```
507
+
508
+ ## Documentation
509
+
510
+ See the [main cascadeflow documentation](https://github.com/lemony-ai/cascadeflow) for complete guides and examples.
511
+
512
+ ## License
513
+
514
+ MIT © Lemony Inc.
@@ -0,0 +1,15 @@
1
+ import {
2
+ BatchProcessingError,
3
+ BatchProcessor,
4
+ BatchStrategy,
5
+ DEFAULT_BATCH_CONFIG,
6
+ normalizeBatchConfig
7
+ } from "./chunk-XESOO5EG.mjs";
8
+ import "./chunk-XGB3TDIC.mjs";
9
+ export {
10
+ BatchProcessingError,
11
+ BatchProcessor,
12
+ BatchStrategy,
13
+ DEFAULT_BATCH_CONFIG,
14
+ normalizeBatchConfig
15
+ };
@@ -0,0 +1,149 @@
1
+ // src/quality-semantic.ts
2
+ var SemanticQualityChecker = class {
3
+ /**
4
+ * Create a new semantic quality checker
5
+ *
6
+ * @param similarityThreshold - Minimum similarity score to pass (0-1, default: 0.5)
7
+ * @param embedder - Optional pre-configured embedder instance
8
+ * @param useCache - Whether to use request-scoped caching (default: true)
9
+ */
10
+ constructor(similarityThreshold = 0.5, embedder, useCache = true) {
11
+ this.similarityThreshold = similarityThreshold;
12
+ this.useCache = useCache;
13
+ this._available = false;
14
+ this.initializeAttempted = false;
15
+ this.initPromise = this.initializeEmbedder(embedder);
16
+ }
17
+ /**
18
+ * Initialize the embedder (lazy loading)
19
+ *
20
+ * This defers the ~200-500ms model load time until first use,
21
+ * and allows the checker to remain available even if @cascadeflow/ml
22
+ * is not installed.
23
+ */
24
+ async initializeEmbedder(embedder) {
25
+ if (this.initializeAttempted) {
26
+ return;
27
+ }
28
+ this.initializeAttempted = true;
29
+ try {
30
+ const ml = await import("@cascadeflow/ml");
31
+ const { UnifiedEmbeddingService, EmbeddingCache } = ml;
32
+ if (embedder) {
33
+ this.embedder = embedder;
34
+ } else {
35
+ this.embedder = new UnifiedEmbeddingService();
36
+ }
37
+ this._available = await this.embedder.isAvailable();
38
+ if (this.useCache && this._available && this.embedder) {
39
+ this.cache = new EmbeddingCache(this.embedder);
40
+ }
41
+ if (this._available) {
42
+ console.log("\u2713 Semantic quality checking enabled (UnifiedEmbeddingService)");
43
+ }
44
+ } catch (error) {
45
+ const err = error;
46
+ if (err?.code === "ERR_MODULE_NOT_FOUND" || err?.message?.includes("Cannot find module")) {
47
+ console.warn(
48
+ "@cascadeflow/ml not available. Install with: npm install @cascadeflow/ml @huggingface/transformers"
49
+ );
50
+ } else {
51
+ const message = error instanceof Error ? error.message : String(error);
52
+ console.error(`Failed to initialize semantic quality checker: ${message}`);
53
+ }
54
+ this._available = false;
55
+ }
56
+ }
57
+ /**
58
+ * Check if semantic quality checking is available
59
+ *
60
+ * @returns Promise resolving to true if ML embeddings are available
61
+ */
62
+ async isAvailable() {
63
+ if (this.initPromise) {
64
+ await this.initPromise;
65
+ }
66
+ return this._available;
67
+ }
68
+ /**
69
+ * Check semantic similarity between query and response
70
+ *
71
+ * Uses cosine similarity of embeddings to measure how well the
72
+ * response answers the query. Higher scores indicate better alignment.
73
+ *
74
+ * @param query - User's query/question
75
+ * @param response - Model's response
76
+ * @returns Semantic quality result with similarity score and pass/fail
77
+ */
78
+ async checkSimilarity(query, response) {
79
+ if (!await this.isAvailable() || !this.embedder) {
80
+ return {
81
+ similarity: 0,
82
+ passed: false,
83
+ reason: "Semantic checking not available (ML dependencies not installed)",
84
+ metadata: {
85
+ available: false,
86
+ threshold: this.similarityThreshold
87
+ }
88
+ };
89
+ }
90
+ try {
91
+ const similarity = this.cache ? await this.cache.similarity(query, response) : await this.embedder.similarity(query, response);
92
+ if (similarity === null) {
93
+ return {
94
+ similarity: 0,
95
+ passed: false,
96
+ reason: "Failed to compute embeddings",
97
+ metadata: {
98
+ error: "embedding_failed",
99
+ threshold: this.similarityThreshold
100
+ }
101
+ };
102
+ }
103
+ const passed = similarity >= this.similarityThreshold;
104
+ return {
105
+ similarity,
106
+ passed,
107
+ reason: passed ? void 0 : `Similarity ${similarity.toFixed(2)} below threshold ${this.similarityThreshold}`,
108
+ metadata: {
109
+ threshold: this.similarityThreshold,
110
+ cached: this.cache !== void 0
111
+ }
112
+ };
113
+ } catch (error) {
114
+ const message = error instanceof Error ? error.message : String(error);
115
+ console.error(`Error in semantic similarity check: ${message}`);
116
+ return {
117
+ similarity: 0,
118
+ passed: false,
119
+ reason: `Error: ${message}`,
120
+ metadata: {
121
+ error: message,
122
+ threshold: this.similarityThreshold
123
+ }
124
+ };
125
+ }
126
+ }
127
+ /**
128
+ * Clear the embedding cache (if caching is enabled)
129
+ *
130
+ * Call this at the end of a request to free memory.
131
+ */
132
+ clearCache() {
133
+ if (this.cache) {
134
+ this.cache.clear();
135
+ }
136
+ }
137
+ /**
138
+ * Get cache statistics (if caching is enabled)
139
+ *
140
+ * @returns Cache info with size and sample texts
141
+ */
142
+ getCacheInfo() {
143
+ return this.cache?.cacheInfo() || { size: 0, texts: [] };
144
+ }
145
+ };
146
+
147
+ export {
148
+ SemanticQualityChecker
149
+ };