@juspay/neurolink 9.3.0 → 9.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/README.md +8 -8
- package/dist/cli/commands/config.d.ts +3 -3
- package/dist/cli/index.js +1 -0
- package/dist/index.d.ts +35 -0
- package/dist/index.js +17 -0
- package/dist/lib/agent/directTools.d.ts +5 -5
- package/dist/lib/index.d.ts +35 -0
- package/dist/lib/index.js +17 -0
- package/dist/lib/neurolink.d.ts +12 -1
- package/dist/lib/neurolink.js +265 -4
- package/dist/lib/server/utils/validation.d.ts +8 -8
- package/dist/lib/types/generateTypes.d.ts +28 -0
- package/dist/lib/types/index.d.ts +6 -0
- package/dist/lib/types/index.js +12 -0
- package/dist/lib/types/modelTypes.d.ts +2 -2
- package/dist/lib/types/streamTypes.d.ts +35 -0
- package/dist/lib/types/workflowTypes.d.ts +558 -0
- package/dist/lib/types/workflowTypes.js +32 -0
- package/dist/lib/workflow/LAYER-EXAMPLES.d.ts +13 -0
- package/dist/lib/workflow/LAYER-EXAMPLES.js +312 -0
- package/dist/lib/workflow/PROMPT-EXAMPLES.d.ts +117 -0
- package/dist/lib/workflow/PROMPT-EXAMPLES.js +246 -0
- package/dist/lib/workflow/config.d.ts +1569 -0
- package/dist/lib/workflow/config.js +399 -0
- package/dist/lib/workflow/core/ensembleExecutor.d.ts +56 -0
- package/dist/lib/workflow/core/ensembleExecutor.js +398 -0
- package/dist/lib/workflow/core/judgeScorer.d.ts +26 -0
- package/dist/lib/workflow/core/judgeScorer.js +527 -0
- package/dist/lib/workflow/core/responseConditioner.d.ts +22 -0
- package/dist/lib/workflow/core/responseConditioner.js +226 -0
- package/dist/lib/workflow/core/types/conditionerTypes.d.ts +7 -0
- package/dist/lib/workflow/core/types/conditionerTypes.js +8 -0
- package/dist/lib/workflow/core/types/ensembleTypes.d.ts +7 -0
- package/dist/lib/workflow/core/types/ensembleTypes.js +8 -0
- package/dist/lib/workflow/core/types/index.d.ts +7 -0
- package/dist/lib/workflow/core/types/index.js +8 -0
- package/dist/lib/workflow/core/types/judgeTypes.d.ts +7 -0
- package/dist/lib/workflow/core/types/judgeTypes.js +8 -0
- package/dist/lib/workflow/core/types/layerTypes.d.ts +7 -0
- package/dist/lib/workflow/core/types/layerTypes.js +8 -0
- package/dist/lib/workflow/core/types/registryTypes.d.ts +7 -0
- package/dist/lib/workflow/core/types/registryTypes.js +8 -0
- package/dist/lib/workflow/core/workflowRegistry.d.ts +73 -0
- package/dist/lib/workflow/core/workflowRegistry.js +305 -0
- package/dist/lib/workflow/core/workflowRunner.d.ts +115 -0
- package/dist/lib/workflow/core/workflowRunner.js +554 -0
- package/dist/lib/workflow/index.d.ts +36 -0
- package/dist/lib/workflow/index.js +51 -0
- package/dist/lib/workflow/types.d.ts +19 -0
- package/dist/lib/workflow/types.js +10 -0
- package/dist/lib/workflow/utils/types/index.d.ts +7 -0
- package/dist/lib/workflow/utils/types/index.js +8 -0
- package/dist/lib/workflow/utils/types/metricsTypes.d.ts +7 -0
- package/dist/lib/workflow/utils/types/metricsTypes.js +8 -0
- package/dist/lib/workflow/utils/types/validationTypes.d.ts +7 -0
- package/dist/lib/workflow/utils/types/validationTypes.js +8 -0
- package/dist/lib/workflow/utils/workflowMetrics.d.ts +76 -0
- package/dist/lib/workflow/utils/workflowMetrics.js +312 -0
- package/dist/lib/workflow/utils/workflowValidation.d.ts +29 -0
- package/dist/lib/workflow/utils/workflowValidation.js +421 -0
- package/dist/lib/workflow/workflows/adaptiveWorkflow.d.ts +72 -0
- package/dist/lib/workflow/workflows/adaptiveWorkflow.js +367 -0
- package/dist/lib/workflow/workflows/consensusWorkflow.d.ts +69 -0
- package/dist/lib/workflow/workflows/consensusWorkflow.js +193 -0
- package/dist/lib/workflow/workflows/fallbackWorkflow.d.ts +49 -0
- package/dist/lib/workflow/workflows/fallbackWorkflow.js +226 -0
- package/dist/lib/workflow/workflows/multiJudgeWorkflow.d.ts +70 -0
- package/dist/lib/workflow/workflows/multiJudgeWorkflow.js +352 -0
- package/dist/neurolink.d.ts +12 -1
- package/dist/neurolink.js +265 -4
- package/dist/types/generateTypes.d.ts +28 -0
- package/dist/types/index.d.ts +6 -0
- package/dist/types/index.js +12 -0
- package/dist/types/streamTypes.d.ts +35 -0
- package/dist/types/workflowTypes.d.ts +558 -0
- package/dist/types/workflowTypes.js +31 -0
- package/dist/workflow/LAYER-EXAMPLES.d.ts +13 -0
- package/dist/workflow/LAYER-EXAMPLES.js +311 -0
- package/dist/workflow/PROMPT-EXAMPLES.d.ts +117 -0
- package/dist/workflow/PROMPT-EXAMPLES.js +245 -0
- package/dist/workflow/config.d.ts +1569 -0
- package/dist/workflow/config.js +398 -0
- package/dist/workflow/core/ensembleExecutor.d.ts +56 -0
- package/dist/workflow/core/ensembleExecutor.js +397 -0
- package/dist/workflow/core/judgeScorer.d.ts +26 -0
- package/dist/workflow/core/judgeScorer.js +526 -0
- package/dist/workflow/core/responseConditioner.d.ts +22 -0
- package/dist/workflow/core/responseConditioner.js +225 -0
- package/dist/workflow/core/types/conditionerTypes.d.ts +7 -0
- package/dist/workflow/core/types/conditionerTypes.js +7 -0
- package/dist/workflow/core/types/ensembleTypes.d.ts +7 -0
- package/dist/workflow/core/types/ensembleTypes.js +7 -0
- package/dist/workflow/core/types/index.d.ts +7 -0
- package/dist/workflow/core/types/index.js +7 -0
- package/dist/workflow/core/types/judgeTypes.d.ts +7 -0
- package/dist/workflow/core/types/judgeTypes.js +7 -0
- package/dist/workflow/core/types/layerTypes.d.ts +7 -0
- package/dist/workflow/core/types/layerTypes.js +7 -0
- package/dist/workflow/core/types/registryTypes.d.ts +7 -0
- package/dist/workflow/core/types/registryTypes.js +7 -0
- package/dist/workflow/core/workflowRegistry.d.ts +73 -0
- package/dist/workflow/core/workflowRegistry.js +304 -0
- package/dist/workflow/core/workflowRunner.d.ts +115 -0
- package/dist/workflow/core/workflowRunner.js +553 -0
- package/dist/workflow/index.d.ts +36 -0
- package/dist/workflow/index.js +50 -0
- package/dist/workflow/types.d.ts +19 -0
- package/dist/workflow/types.js +9 -0
- package/dist/workflow/utils/types/index.d.ts +7 -0
- package/dist/workflow/utils/types/index.js +7 -0
- package/dist/workflow/utils/types/metricsTypes.d.ts +7 -0
- package/dist/workflow/utils/types/metricsTypes.js +7 -0
- package/dist/workflow/utils/types/validationTypes.d.ts +7 -0
- package/dist/workflow/utils/types/validationTypes.js +7 -0
- package/dist/workflow/utils/workflowMetrics.d.ts +76 -0
- package/dist/workflow/utils/workflowMetrics.js +311 -0
- package/dist/workflow/utils/workflowValidation.d.ts +29 -0
- package/dist/workflow/utils/workflowValidation.js +420 -0
- package/dist/workflow/workflows/adaptiveWorkflow.d.ts +72 -0
- package/dist/workflow/workflows/adaptiveWorkflow.js +366 -0
- package/dist/workflow/workflows/consensusWorkflow.d.ts +69 -0
- package/dist/workflow/workflows/consensusWorkflow.js +192 -0
- package/dist/workflow/workflows/fallbackWorkflow.d.ts +49 -0
- package/dist/workflow/workflows/fallbackWorkflow.js +225 -0
- package/dist/workflow/workflows/multiJudgeWorkflow.d.ts +70 -0
- package/dist/workflow/workflows/multiJudgeWorkflow.js +351 -0
- package/package.json +3 -2
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Fallback Workflow
|
|
3
|
+
* =================
|
|
4
|
+
*
|
|
5
|
+
* Sequential fallback chain using layer-based execution:
|
|
6
|
+
* - Try fast model first
|
|
7
|
+
* - Fall back to mid-tier if needed
|
|
8
|
+
* - Final fallback to premium model
|
|
9
|
+
*
|
|
10
|
+
* Ideal for: Cost-optimization with quality guarantee
|
|
11
|
+
*
|
|
12
|
+
* @module workflow/workflows/fallbackWorkflow
|
|
13
|
+
*/
|
|
14
|
+
import { AIProviderName } from "../../constants/enums.js";
|
|
15
|
+
import { WORKFLOW_CREATION_DATE } from "../config.js";
|
|
16
|
+
/**
|
|
17
|
+
* Fast-Fallback Workflow Configuration
|
|
18
|
+
*
|
|
19
|
+
* Uses layer-based execution with sequential groups:
|
|
20
|
+
* 1. Fast tier: GPT-4o-mini (try first)
|
|
21
|
+
* 2. Mid tier: Gemini 2.0 Flash (if fast fails)
|
|
22
|
+
* 3. Premium tier: GPT-4o or Claude 3.5 Sonnet (last resort)
|
|
23
|
+
*
|
|
24
|
+
* Each group runs sequentially - only proceeds if previous fails
|
|
25
|
+
*
|
|
26
|
+
* @example
|
|
27
|
+
* ```typescript
|
|
28
|
+
* import { runWorkflow } from '../core/workflowRunner.js';
|
|
29
|
+
* import { FAST_FALLBACK_WORKFLOW } from './fallbackWorkflow.js';
|
|
30
|
+
*
|
|
31
|
+
* const result = await runWorkflow(FAST_FALLBACK_WORKFLOW, {
|
|
32
|
+
* prompt: 'What is 2+2?',
|
|
33
|
+
* verbose: true,
|
|
34
|
+
* });
|
|
35
|
+
*
|
|
36
|
+
* // Usually completes with fast tier, saving cost
|
|
37
|
+
* console.log('Executed models:', result.ensembleResponses.length);
|
|
38
|
+
* ```
|
|
39
|
+
*/
|
|
40
|
+
export const FAST_FALLBACK_WORKFLOW = {
|
|
41
|
+
id: "fast-fallback",
|
|
42
|
+
name: "Fast-Fallback Chain",
|
|
43
|
+
description: "Sequential fallback: fast → mid → premium",
|
|
44
|
+
version: "1.0.0",
|
|
45
|
+
type: "chain",
|
|
46
|
+
// Placeholder (required by schema, but modelGroups takes precedence)
|
|
47
|
+
models: [
|
|
48
|
+
{
|
|
49
|
+
provider: AIProviderName.OPENAI,
|
|
50
|
+
model: "gpt-4o-mini",
|
|
51
|
+
},
|
|
52
|
+
],
|
|
53
|
+
// Layer-based execution: groups run sequentially
|
|
54
|
+
modelGroups: [
|
|
55
|
+
{
|
|
56
|
+
id: "fast-tier",
|
|
57
|
+
name: "Fast Tier",
|
|
58
|
+
description: "Try fast model first (lowest cost)",
|
|
59
|
+
models: [
|
|
60
|
+
{
|
|
61
|
+
provider: AIProviderName.OPENAI,
|
|
62
|
+
model: "gpt-4o-mini",
|
|
63
|
+
label: "GPT-4o-mini",
|
|
64
|
+
temperature: 0.7,
|
|
65
|
+
timeout: 10000, // 10 second timeout
|
|
66
|
+
},
|
|
67
|
+
],
|
|
68
|
+
executionStrategy: "sequential", // Only one model
|
|
69
|
+
continueOnFailure: true, // Always try next tier
|
|
70
|
+
minSuccessful: 1,
|
|
71
|
+
},
|
|
72
|
+
{
|
|
73
|
+
id: "mid-tier",
|
|
74
|
+
name: "Mid Tier",
|
|
75
|
+
description: "Mid-tier model (balanced cost/quality)",
|
|
76
|
+
models: [
|
|
77
|
+
{
|
|
78
|
+
provider: AIProviderName.GOOGLE_AI,
|
|
79
|
+
model: "gemini-2.0-flash",
|
|
80
|
+
label: "Gemini 2.0 Flash",
|
|
81
|
+
temperature: 0.7,
|
|
82
|
+
timeout: 15000, // 15 second timeout
|
|
83
|
+
},
|
|
84
|
+
],
|
|
85
|
+
executionStrategy: "sequential",
|
|
86
|
+
continueOnFailure: true,
|
|
87
|
+
minSuccessful: 1,
|
|
88
|
+
},
|
|
89
|
+
{
|
|
90
|
+
id: "premium-tier",
|
|
91
|
+
name: "Premium Tier",
|
|
92
|
+
description: "Premium models (last resort, highest quality)",
|
|
93
|
+
models: [
|
|
94
|
+
{
|
|
95
|
+
provider: AIProviderName.OPENAI,
|
|
96
|
+
model: "gpt-4o",
|
|
97
|
+
label: "GPT-4o",
|
|
98
|
+
temperature: 0.7,
|
|
99
|
+
timeout: 20000, // 20 second timeout
|
|
100
|
+
},
|
|
101
|
+
],
|
|
102
|
+
executionStrategy: "sequential",
|
|
103
|
+
continueOnFailure: false, // Stop if this fails
|
|
104
|
+
minSuccessful: 1,
|
|
105
|
+
},
|
|
106
|
+
],
|
|
107
|
+
// Judge: Select best response if multiple tiers executed
|
|
108
|
+
judge: {
|
|
109
|
+
provider: AIProviderName.OPENAI,
|
|
110
|
+
model: "gpt-4o-mini", // Fast judge is fine for simple selection
|
|
111
|
+
criteria: ["quality", "response_time"],
|
|
112
|
+
outputFormat: "best",
|
|
113
|
+
includeReasoning: true,
|
|
114
|
+
temperature: 0.1,
|
|
115
|
+
scoreScale: {
|
|
116
|
+
min: 0,
|
|
117
|
+
max: 100,
|
|
118
|
+
},
|
|
119
|
+
},
|
|
120
|
+
// Execution configuration
|
|
121
|
+
execution: {
|
|
122
|
+
timeout: 50000, // 50 second total timeout
|
|
123
|
+
minResponses: 1, // Only need 1 successful response
|
|
124
|
+
costThreshold: 0.05,
|
|
125
|
+
},
|
|
126
|
+
// Metadata
|
|
127
|
+
tags: ["chain", "fallback", "cost-optimized", "reliable"],
|
|
128
|
+
metadata: {
|
|
129
|
+
useCase: "Cost-optimized with quality guarantee",
|
|
130
|
+
recommendedFor: [
|
|
131
|
+
"variable complexity queries",
|
|
132
|
+
"cost-sensitive applications",
|
|
133
|
+
],
|
|
134
|
+
averageCost: 0.01, // Usually completes in fast tier
|
|
135
|
+
averageLatency: 2000,
|
|
136
|
+
},
|
|
137
|
+
createdAt: WORKFLOW_CREATION_DATE,
|
|
138
|
+
};
|
|
139
|
+
/**
|
|
140
|
+
* Aggressive Fallback Workflow
|
|
141
|
+
*
|
|
142
|
+
* More aggressive fallback with parallel premium tier:
|
|
143
|
+
* 1. Fast tier: GPT-4o-mini (sequential)
|
|
144
|
+
* 2. Premium tier: GPT-4o + Claude 3.5 (parallel, both execute)
|
|
145
|
+
*
|
|
146
|
+
* Guarantees high quality if fast tier fails
|
|
147
|
+
*/
|
|
148
|
+
export const AGGRESSIVE_FALLBACK_WORKFLOW = {
|
|
149
|
+
id: "aggressive-fallback",
|
|
150
|
+
name: "Aggressive Fallback",
|
|
151
|
+
description: "Fast first, then both premium models in parallel",
|
|
152
|
+
version: "1.0.0",
|
|
153
|
+
type: "chain",
|
|
154
|
+
models: [
|
|
155
|
+
{
|
|
156
|
+
provider: AIProviderName.OPENAI,
|
|
157
|
+
model: "gpt-4o-mini",
|
|
158
|
+
},
|
|
159
|
+
],
|
|
160
|
+
modelGroups: [
|
|
161
|
+
{
|
|
162
|
+
id: "fast-tier",
|
|
163
|
+
name: "Fast Tier",
|
|
164
|
+
models: [
|
|
165
|
+
{
|
|
166
|
+
provider: AIProviderName.OPENAI,
|
|
167
|
+
model: "gpt-4o-mini",
|
|
168
|
+
temperature: 0.7,
|
|
169
|
+
timeout: 8000,
|
|
170
|
+
},
|
|
171
|
+
],
|
|
172
|
+
executionStrategy: "sequential",
|
|
173
|
+
continueOnFailure: true,
|
|
174
|
+
minSuccessful: 1,
|
|
175
|
+
},
|
|
176
|
+
{
|
|
177
|
+
id: "premium-tier",
|
|
178
|
+
name: "Premium Tier (Both)",
|
|
179
|
+
description: "Run both premium models in parallel for guaranteed quality",
|
|
180
|
+
models: [
|
|
181
|
+
{
|
|
182
|
+
provider: AIProviderName.OPENAI,
|
|
183
|
+
model: "gpt-4o",
|
|
184
|
+
label: "GPT-4o",
|
|
185
|
+
temperature: 0.7,
|
|
186
|
+
},
|
|
187
|
+
{
|
|
188
|
+
provider: AIProviderName.ANTHROPIC,
|
|
189
|
+
model: "claude-3-5-sonnet-20241022",
|
|
190
|
+
label: "Claude 3.5 Sonnet",
|
|
191
|
+
temperature: 0.7,
|
|
192
|
+
},
|
|
193
|
+
],
|
|
194
|
+
executionStrategy: "parallel", // Both run simultaneously
|
|
195
|
+
continueOnFailure: false,
|
|
196
|
+
minSuccessful: 1,
|
|
197
|
+
parallelism: 2,
|
|
198
|
+
},
|
|
199
|
+
],
|
|
200
|
+
judge: {
|
|
201
|
+
provider: AIProviderName.OPENAI,
|
|
202
|
+
model: "gpt-4o",
|
|
203
|
+
criteria: ["quality", "completeness"],
|
|
204
|
+
outputFormat: "detailed",
|
|
205
|
+
includeReasoning: true,
|
|
206
|
+
temperature: 0.1,
|
|
207
|
+
scoreScale: {
|
|
208
|
+
min: 0,
|
|
209
|
+
max: 100,
|
|
210
|
+
},
|
|
211
|
+
},
|
|
212
|
+
execution: {
|
|
213
|
+
timeout: 40000,
|
|
214
|
+
minResponses: 1,
|
|
215
|
+
costThreshold: 0.08,
|
|
216
|
+
},
|
|
217
|
+
tags: ["chain", "fallback", "high-quality", "reliable"],
|
|
218
|
+
metadata: {
|
|
219
|
+
useCase: "High quality guarantee with cost optimization attempt",
|
|
220
|
+
recommendedFor: ["important queries", "when quality matters most"],
|
|
221
|
+
averageCost: 0.03,
|
|
222
|
+
averageLatency: 2500,
|
|
223
|
+
},
|
|
224
|
+
createdAt: WORKFLOW_CREATION_DATE,
|
|
225
|
+
};
|
|
226
|
+
//# sourceMappingURL=fallbackWorkflow.js.map
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Multi-Judge Workflow
|
|
3
|
+
* ====================
|
|
4
|
+
*
|
|
5
|
+
* 5-model ensemble with 3-judge voting for maximum reliability:
|
|
6
|
+
* - 5 diverse models generate responses
|
|
7
|
+
* - 3 judges independently evaluate (voting consensus)
|
|
8
|
+
* - Best response selected by aggregate scoring
|
|
9
|
+
*
|
|
10
|
+
* Ideal for: Critical decisions requiring high confidence
|
|
11
|
+
*
|
|
12
|
+
* @module workflow/workflows/multiJudgeWorkflow
|
|
13
|
+
*/
|
|
14
|
+
import type { WorkflowConfig } from "../types.js";
|
|
15
|
+
/**
|
|
16
|
+
* Multi-Judge-5 Workflow Configuration
|
|
17
|
+
*
|
|
18
|
+
* Uses 5 models across different providers:
|
|
19
|
+
* - GPT-4o (OpenAI)
|
|
20
|
+
* - GPT-4o-mini (OpenAI)
|
|
21
|
+
* - Claude 3.5 Sonnet (Anthropic)
|
|
22
|
+
* - Claude 3 Haiku (Anthropic)
|
|
23
|
+
* - Gemini 2.0 Flash (Google)
|
|
24
|
+
*
|
|
25
|
+
* 3 independent judges vote:
|
|
26
|
+
* - GPT-4o evaluates accuracy & clarity
|
|
27
|
+
* - Claude 3.5 Sonnet evaluates reasoning & depth
|
|
28
|
+
* - Gemini 2.0 Flash evaluates completeness & coherence
|
|
29
|
+
*
|
|
30
|
+
* Scores are averaged across all judges for final selection
|
|
31
|
+
*
|
|
32
|
+
* @example
|
|
33
|
+
* ```typescript
|
|
34
|
+
* import { runWorkflow } from '../core/workflowRunner.js';
|
|
35
|
+
* import { MULTI_JUDGE_5_WORKFLOW } from './multiJudgeWorkflow.js';
|
|
36
|
+
*
|
|
37
|
+
* const result = await runWorkflow(MULTI_JUDGE_5_WORKFLOW, {
|
|
38
|
+
* prompt: 'Should we invest in renewable energy?',
|
|
39
|
+
* verbose: true,
|
|
40
|
+
* });
|
|
41
|
+
*
|
|
42
|
+
* console.log('Consensus score:', result.score);
|
|
43
|
+
* console.log('Agreement level:', result.consensus);
|
|
44
|
+
* ```
|
|
45
|
+
*/
|
|
46
|
+
export declare const MULTI_JUDGE_5_WORKFLOW: WorkflowConfig;
|
|
47
|
+
/**
|
|
48
|
+
* Multi-Judge-3 Workflow (Lighter Version)
|
|
49
|
+
*
|
|
50
|
+
* 3 models with 2 judges (more cost-effective):
|
|
51
|
+
* - GPT-4o, Claude 3.5, Gemini 2.0
|
|
52
|
+
* - Judged by GPT-4o and Claude 3.5
|
|
53
|
+
*/
|
|
54
|
+
export declare const MULTI_JUDGE_3_WORKFLOW: WorkflowConfig;
|
|
55
|
+
/**
|
|
56
|
+
* Create custom multi-judge workflow
|
|
57
|
+
*
|
|
58
|
+
* @param modelCount - Number of models (3, 5, or 7)
|
|
59
|
+
* @param judgeCount - Number of judges (2 or 3)
|
|
60
|
+
* @returns Configured workflow
|
|
61
|
+
*
|
|
62
|
+
* @example
|
|
63
|
+
* ```typescript
|
|
64
|
+
* const workflow = createMultiJudgeWorkflow(7, 3);
|
|
65
|
+
* const result = await runWorkflow(workflow, {
|
|
66
|
+
* prompt: 'Complex analysis task',
|
|
67
|
+
* });
|
|
68
|
+
* ```
|
|
69
|
+
*/
|
|
70
|
+
export declare function createMultiJudgeWorkflow(modelCount: 3 | 5 | 7, judgeCount: 2 | 3): WorkflowConfig;
|
|
@@ -0,0 +1,352 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Multi-Judge Workflow
|
|
3
|
+
* ====================
|
|
4
|
+
*
|
|
5
|
+
* 5-model ensemble with 3-judge voting for maximum reliability:
|
|
6
|
+
* - 5 diverse models generate responses
|
|
7
|
+
* - 3 judges independently evaluate (voting consensus)
|
|
8
|
+
* - Best response selected by aggregate scoring
|
|
9
|
+
*
|
|
10
|
+
* Ideal for: Critical decisions requiring high confidence
|
|
11
|
+
*
|
|
12
|
+
* @module workflow/workflows/multiJudgeWorkflow
|
|
13
|
+
*/
|
|
14
|
+
import { AIProviderName } from "../../constants/enums.js";
|
|
15
|
+
import { WORKFLOW_CREATION_DATE } from "../config.js";
|
|
16
|
+
/**
|
|
17
|
+
* Multi-Judge-5 Workflow Configuration
|
|
18
|
+
*
|
|
19
|
+
* Uses 5 models across different providers:
|
|
20
|
+
* - GPT-4o (OpenAI)
|
|
21
|
+
* - GPT-4o-mini (OpenAI)
|
|
22
|
+
* - Claude 3.5 Sonnet (Anthropic)
|
|
23
|
+
* - Claude 3 Haiku (Anthropic)
|
|
24
|
+
* - Gemini 2.0 Flash (Google)
|
|
25
|
+
*
|
|
26
|
+
* 3 independent judges vote:
|
|
27
|
+
* - GPT-4o evaluates accuracy & clarity
|
|
28
|
+
* - Claude 3.5 Sonnet evaluates reasoning & depth
|
|
29
|
+
* - Gemini 2.0 Flash evaluates completeness & coherence
|
|
30
|
+
*
|
|
31
|
+
* Scores are averaged across all judges for final selection
|
|
32
|
+
*
|
|
33
|
+
* @example
|
|
34
|
+
* ```typescript
|
|
35
|
+
* import { runWorkflow } from '../core/workflowRunner.js';
|
|
36
|
+
* import { MULTI_JUDGE_5_WORKFLOW } from './multiJudgeWorkflow.js';
|
|
37
|
+
*
|
|
38
|
+
* const result = await runWorkflow(MULTI_JUDGE_5_WORKFLOW, {
|
|
39
|
+
* prompt: 'Should we invest in renewable energy?',
|
|
40
|
+
* verbose: true,
|
|
41
|
+
* });
|
|
42
|
+
*
|
|
43
|
+
* console.log('Consensus score:', result.score);
|
|
44
|
+
* console.log('Agreement level:', result.consensus);
|
|
45
|
+
* ```
|
|
46
|
+
*/
|
|
47
|
+
export const MULTI_JUDGE_5_WORKFLOW = {
|
|
48
|
+
id: "multi-judge-5",
|
|
49
|
+
name: "Multi-Judge-5 Ensemble",
|
|
50
|
+
description: "5-model ensemble with 3-judge voting for high confidence",
|
|
51
|
+
version: "1.0.0",
|
|
52
|
+
type: "ensemble",
|
|
53
|
+
// 5 diverse models for comprehensive coverage
|
|
54
|
+
models: [
|
|
55
|
+
{
|
|
56
|
+
provider: AIProviderName.OPENAI,
|
|
57
|
+
model: "gpt-4o",
|
|
58
|
+
label: "GPT-4o",
|
|
59
|
+
weight: 1.0,
|
|
60
|
+
temperature: 0.7,
|
|
61
|
+
},
|
|
62
|
+
{
|
|
63
|
+
provider: AIProviderName.OPENAI,
|
|
64
|
+
model: "gpt-4o-mini",
|
|
65
|
+
label: "GPT-4o-mini",
|
|
66
|
+
weight: 0.8, // Slightly lower weight
|
|
67
|
+
temperature: 0.7,
|
|
68
|
+
},
|
|
69
|
+
{
|
|
70
|
+
provider: AIProviderName.ANTHROPIC,
|
|
71
|
+
model: "claude-3-5-sonnet-20241022",
|
|
72
|
+
label: "Claude 3.5 Sonnet",
|
|
73
|
+
weight: 1.0,
|
|
74
|
+
temperature: 0.7,
|
|
75
|
+
},
|
|
76
|
+
{
|
|
77
|
+
provider: AIProviderName.ANTHROPIC,
|
|
78
|
+
model: "claude-3-haiku-20240307",
|
|
79
|
+
label: "Claude 3 Haiku",
|
|
80
|
+
weight: 0.7, // Lower weight for faster model
|
|
81
|
+
temperature: 0.7,
|
|
82
|
+
},
|
|
83
|
+
{
|
|
84
|
+
provider: AIProviderName.GOOGLE_AI,
|
|
85
|
+
model: "gemini-2.0-flash",
|
|
86
|
+
label: "Gemini 2 Flash",
|
|
87
|
+
weight: 0.9,
|
|
88
|
+
temperature: 0.7,
|
|
89
|
+
},
|
|
90
|
+
],
|
|
91
|
+
// 3 independent judges with different criteria focus
|
|
92
|
+
judges: [
|
|
93
|
+
{
|
|
94
|
+
provider: AIProviderName.OPENAI,
|
|
95
|
+
model: "gpt-4o",
|
|
96
|
+
criteria: ["accuracy", "clarity", "factual_correctness"],
|
|
97
|
+
outputFormat: "detailed",
|
|
98
|
+
includeReasoning: true,
|
|
99
|
+
temperature: 0.1,
|
|
100
|
+
scoreScale: { min: 0, max: 100 },
|
|
101
|
+
label: "Accuracy Judge",
|
|
102
|
+
},
|
|
103
|
+
{
|
|
104
|
+
provider: AIProviderName.ANTHROPIC,
|
|
105
|
+
model: "claude-3-5-sonnet-20241022",
|
|
106
|
+
criteria: ["reasoning_quality", "depth", "nuance"],
|
|
107
|
+
outputFormat: "detailed",
|
|
108
|
+
includeReasoning: true,
|
|
109
|
+
temperature: 0.1,
|
|
110
|
+
scoreScale: { min: 0, max: 100 },
|
|
111
|
+
label: "Reasoning Judge",
|
|
112
|
+
},
|
|
113
|
+
{
|
|
114
|
+
provider: AIProviderName.GOOGLE_AI,
|
|
115
|
+
model: "gemini-2.0-flash",
|
|
116
|
+
criteria: ["completeness", "coherence", "relevance"],
|
|
117
|
+
outputFormat: "detailed",
|
|
118
|
+
includeReasoning: true,
|
|
119
|
+
temperature: 0.1,
|
|
120
|
+
scoreScale: { min: 0, max: 100 },
|
|
121
|
+
label: "Completeness Judge",
|
|
122
|
+
},
|
|
123
|
+
],
|
|
124
|
+
// Execution configuration
|
|
125
|
+
execution: {
|
|
126
|
+
parallelism: 5, // All 5 models run simultaneously
|
|
127
|
+
timeout: 45000, // 45 second total timeout
|
|
128
|
+
modelTimeout: 30000, // 30 second per-model timeout
|
|
129
|
+
minResponses: 3, // Need at least 3 successful responses
|
|
130
|
+
costThreshold: 0.15, // Warn if cost exceeds $0.15
|
|
131
|
+
},
|
|
132
|
+
// Metadata
|
|
133
|
+
tags: ["ensemble", "multi-judge", "voting", "high-confidence", "critical"],
|
|
134
|
+
metadata: {
|
|
135
|
+
useCase: "Critical decisions requiring high confidence",
|
|
136
|
+
recommendedFor: [
|
|
137
|
+
"important business decisions",
|
|
138
|
+
"technical evaluations",
|
|
139
|
+
"complex analysis",
|
|
140
|
+
"fact-checking",
|
|
141
|
+
],
|
|
142
|
+
averageCost: 0.1,
|
|
143
|
+
averageLatency: 5000,
|
|
144
|
+
consensusThreshold: 0.7, // Expect 70%+ agreement
|
|
145
|
+
},
|
|
146
|
+
createdAt: WORKFLOW_CREATION_DATE,
|
|
147
|
+
};
|
|
148
|
+
/**
|
|
149
|
+
* Multi-Judge-3 Workflow (Lighter Version)
|
|
150
|
+
*
|
|
151
|
+
* 3 models with 2 judges (more cost-effective):
|
|
152
|
+
* - GPT-4o, Claude 3.5, Gemini 2.0
|
|
153
|
+
* - Judged by GPT-4o and Claude 3.5
|
|
154
|
+
*/
|
|
155
|
+
export const MULTI_JUDGE_3_WORKFLOW = {
|
|
156
|
+
id: "multi-judge-3",
|
|
157
|
+
name: "Multi-Judge-3 Ensemble",
|
|
158
|
+
description: "3-model ensemble with 2-judge voting",
|
|
159
|
+
version: "1.0.0",
|
|
160
|
+
type: "ensemble",
|
|
161
|
+
models: [
|
|
162
|
+
{
|
|
163
|
+
provider: AIProviderName.OPENAI,
|
|
164
|
+
model: "gpt-4o",
|
|
165
|
+
label: "GPT-4o",
|
|
166
|
+
weight: 1.0,
|
|
167
|
+
temperature: 0.7,
|
|
168
|
+
},
|
|
169
|
+
{
|
|
170
|
+
provider: AIProviderName.ANTHROPIC,
|
|
171
|
+
model: "claude-3-5-sonnet-20241022",
|
|
172
|
+
label: "Claude 3.5 Sonnet",
|
|
173
|
+
weight: 1.0,
|
|
174
|
+
temperature: 0.7,
|
|
175
|
+
},
|
|
176
|
+
{
|
|
177
|
+
provider: AIProviderName.GOOGLE_AI,
|
|
178
|
+
model: "gemini-2.0-flash",
|
|
179
|
+
label: "Gemini 2.0 Flash",
|
|
180
|
+
weight: 1.0,
|
|
181
|
+
temperature: 0.7,
|
|
182
|
+
},
|
|
183
|
+
],
|
|
184
|
+
judges: [
|
|
185
|
+
{
|
|
186
|
+
provider: AIProviderName.OPENAI,
|
|
187
|
+
model: "gpt-4o",
|
|
188
|
+
criteria: ["accuracy", "clarity", "completeness"],
|
|
189
|
+
outputFormat: "detailed",
|
|
190
|
+
includeReasoning: true,
|
|
191
|
+
temperature: 0.1,
|
|
192
|
+
scoreScale: { min: 0, max: 100 },
|
|
193
|
+
label: "Primary Judge",
|
|
194
|
+
},
|
|
195
|
+
{
|
|
196
|
+
provider: AIProviderName.ANTHROPIC,
|
|
197
|
+
model: "claude-3-5-sonnet-20241022",
|
|
198
|
+
criteria: ["reasoning", "depth", "coherence"],
|
|
199
|
+
outputFormat: "detailed",
|
|
200
|
+
includeReasoning: true,
|
|
201
|
+
temperature: 0.1,
|
|
202
|
+
scoreScale: { min: 0, max: 100 },
|
|
203
|
+
label: "Secondary Judge",
|
|
204
|
+
},
|
|
205
|
+
],
|
|
206
|
+
execution: {
|
|
207
|
+
parallelism: 3,
|
|
208
|
+
timeout: 35000,
|
|
209
|
+
modelTimeout: 25000,
|
|
210
|
+
minResponses: 2,
|
|
211
|
+
costThreshold: 0.08,
|
|
212
|
+
},
|
|
213
|
+
tags: ["ensemble", "multi-judge", "voting", "balanced"],
|
|
214
|
+
metadata: {
|
|
215
|
+
useCase: "Balanced multi-judge evaluation",
|
|
216
|
+
recommendedFor: ["important queries", "quality verification"],
|
|
217
|
+
averageCost: 0.04,
|
|
218
|
+
averageLatency: 3500,
|
|
219
|
+
},
|
|
220
|
+
createdAt: WORKFLOW_CREATION_DATE,
|
|
221
|
+
};
|
|
222
|
+
/**
|
|
223
|
+
* Create custom multi-judge workflow
|
|
224
|
+
*
|
|
225
|
+
* @param modelCount - Number of models (3, 5, or 7)
|
|
226
|
+
* @param judgeCount - Number of judges (2 or 3)
|
|
227
|
+
* @returns Configured workflow
|
|
228
|
+
*
|
|
229
|
+
* @example
|
|
230
|
+
* ```typescript
|
|
231
|
+
* const workflow = createMultiJudgeWorkflow(7, 3);
|
|
232
|
+
* const result = await runWorkflow(workflow, {
|
|
233
|
+
* prompt: 'Complex analysis task',
|
|
234
|
+
* });
|
|
235
|
+
* ```
|
|
236
|
+
*/
|
|
237
|
+
export function createMultiJudgeWorkflow(modelCount, judgeCount) {
|
|
238
|
+
// Base models (always include these)
|
|
239
|
+
const baseModels = [
|
|
240
|
+
{
|
|
241
|
+
provider: AIProviderName.OPENAI,
|
|
242
|
+
model: "gpt-4o",
|
|
243
|
+
label: "GPT-4o",
|
|
244
|
+
weight: 1.0,
|
|
245
|
+
temperature: 0.7,
|
|
246
|
+
},
|
|
247
|
+
{
|
|
248
|
+
provider: AIProviderName.ANTHROPIC,
|
|
249
|
+
model: "claude-3-5-sonnet-20241022",
|
|
250
|
+
label: "Claude 3.5 Sonnet",
|
|
251
|
+
weight: 1.0,
|
|
252
|
+
temperature: 0.7,
|
|
253
|
+
},
|
|
254
|
+
{
|
|
255
|
+
provider: AIProviderName.GOOGLE_AI,
|
|
256
|
+
model: "gemini-2.0-flash",
|
|
257
|
+
label: "Gemini 2.0 Flash",
|
|
258
|
+
weight: 1.0,
|
|
259
|
+
temperature: 0.7,
|
|
260
|
+
},
|
|
261
|
+
];
|
|
262
|
+
// Additional models for larger ensembles
|
|
263
|
+
const additionalModels = [
|
|
264
|
+
{
|
|
265
|
+
provider: AIProviderName.OPENAI,
|
|
266
|
+
model: "gpt-4o-mini",
|
|
267
|
+
label: "GPT-4o-mini",
|
|
268
|
+
weight: 0.8,
|
|
269
|
+
temperature: 0.7,
|
|
270
|
+
},
|
|
271
|
+
{
|
|
272
|
+
provider: AIProviderName.ANTHROPIC,
|
|
273
|
+
model: "claude-3-haiku-20240307",
|
|
274
|
+
label: "Claude 3 Haiku",
|
|
275
|
+
weight: 0.7,
|
|
276
|
+
temperature: 0.7,
|
|
277
|
+
},
|
|
278
|
+
{
|
|
279
|
+
provider: AIProviderName.GOOGLE_AI,
|
|
280
|
+
model: "gemini-1.5-flash",
|
|
281
|
+
label: "Gemini 1.5 Flash",
|
|
282
|
+
weight: 0.8,
|
|
283
|
+
temperature: 0.7,
|
|
284
|
+
},
|
|
285
|
+
{
|
|
286
|
+
provider: AIProviderName.OPENAI,
|
|
287
|
+
model: "gpt-3.5-turbo",
|
|
288
|
+
label: "GPT-3.5 Turbo",
|
|
289
|
+
weight: 0.6,
|
|
290
|
+
temperature: 0.7,
|
|
291
|
+
},
|
|
292
|
+
];
|
|
293
|
+
const models = [...baseModels, ...additionalModels.slice(0, modelCount - 3)];
|
|
294
|
+
// Base judges
|
|
295
|
+
const baseJudges = [
|
|
296
|
+
{
|
|
297
|
+
provider: AIProviderName.OPENAI,
|
|
298
|
+
model: "gpt-4o",
|
|
299
|
+
criteria: ["accuracy", "clarity", "completeness"],
|
|
300
|
+
outputFormat: "detailed",
|
|
301
|
+
includeReasoning: true,
|
|
302
|
+
temperature: 0.1,
|
|
303
|
+
scoreScale: { min: 0, max: 100 },
|
|
304
|
+
label: "Primary Judge",
|
|
305
|
+
},
|
|
306
|
+
{
|
|
307
|
+
provider: AIProviderName.ANTHROPIC,
|
|
308
|
+
model: "claude-3-5-sonnet-20241022",
|
|
309
|
+
criteria: ["reasoning", "depth", "coherence"],
|
|
310
|
+
outputFormat: "detailed",
|
|
311
|
+
includeReasoning: true,
|
|
312
|
+
temperature: 0.1,
|
|
313
|
+
scoreScale: { min: 0, max: 100 },
|
|
314
|
+
label: "Secondary Judge",
|
|
315
|
+
},
|
|
316
|
+
];
|
|
317
|
+
const thirdJudge = {
|
|
318
|
+
provider: AIProviderName.GOOGLE_AI,
|
|
319
|
+
model: "gemini-2.0-flash",
|
|
320
|
+
criteria: ["relevance", "factual_accuracy", "structure"],
|
|
321
|
+
outputFormat: "detailed",
|
|
322
|
+
includeReasoning: true,
|
|
323
|
+
temperature: 0.1,
|
|
324
|
+
scoreScale: { min: 0, max: 100 },
|
|
325
|
+
label: "Tertiary Judge",
|
|
326
|
+
};
|
|
327
|
+
const judges = judgeCount === 3 ? [...baseJudges, thirdJudge] : baseJudges;
|
|
328
|
+
return {
|
|
329
|
+
id: `multi-judge-${modelCount}-${judgeCount}`,
|
|
330
|
+
name: `Multi-Judge ${modelCount}x${judgeCount}`,
|
|
331
|
+
description: `${modelCount}-model ensemble with ${judgeCount}-judge voting`,
|
|
332
|
+
version: "1.0.0",
|
|
333
|
+
type: "ensemble",
|
|
334
|
+
models,
|
|
335
|
+
judges,
|
|
336
|
+
execution: {
|
|
337
|
+
parallelism: modelCount,
|
|
338
|
+
timeout: 45000,
|
|
339
|
+
modelTimeout: 30000,
|
|
340
|
+
minResponses: Math.ceil(modelCount / 2),
|
|
341
|
+
costThreshold: 0.2,
|
|
342
|
+
},
|
|
343
|
+
tags: ["ensemble", "multi-judge", "custom"],
|
|
344
|
+
metadata: {
|
|
345
|
+
useCase: "Custom multi-judge evaluation",
|
|
346
|
+
modelCount,
|
|
347
|
+
judgeCount,
|
|
348
|
+
},
|
|
349
|
+
createdAt: WORKFLOW_CREATION_DATE,
|
|
350
|
+
};
|
|
351
|
+
}
|
|
352
|
+
//# sourceMappingURL=multiJudgeWorkflow.js.map
|
package/dist/neurolink.d.ts
CHANGED
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
* Enhanced AI provider system with natural MCP tool access.
|
|
6
6
|
* Uses real MCP infrastructure for tool discovery and execution.
|
|
7
7
|
*/
|
|
8
|
+
import type { TextGenerationOptions, TextGenerationResult } from "./types/index.js";
|
|
8
9
|
import type { ConversationMemoryManager } from "./core/conversationMemoryManager.js";
|
|
9
10
|
import type { RedisConversationMemoryManager } from "./core/redisConversationMemoryManager.js";
|
|
10
11
|
import { ExternalServerManager } from "./mcp/externalServerManager.js";
|
|
@@ -14,7 +15,7 @@ import type { NeurolinkConstructorConfig } from "./types/configTypes.js";
|
|
|
14
15
|
import type { ChatMessage } from "./types/conversation.js";
|
|
15
16
|
import type { ExternalMCPOperationResult, ExternalMCPServerInstance, ExternalMCPToolInfo } from "./types/externalMcp.js";
|
|
16
17
|
import type { GenerateOptions, GenerateResult } from "./types/generateTypes.js";
|
|
17
|
-
import type { ProviderStatus
|
|
18
|
+
import type { ProviderStatus } from "./types/index.js";
|
|
18
19
|
import type { MCPExecutableTool, MCPServerInfo, MCPStatus } from "./types/mcpTypes.js";
|
|
19
20
|
import type { ObservabilityConfig } from "./types/observability.js";
|
|
20
21
|
import type { StreamOptions, StreamResult } from "./types/streamTypes.js";
|
|
@@ -454,6 +455,16 @@ export declare class NeuroLink {
|
|
|
454
455
|
* @since 1.0.0
|
|
455
456
|
*/
|
|
456
457
|
generate(optionsOrPrompt: GenerateOptions | string): Promise<GenerateResult>;
|
|
458
|
+
/**
|
|
459
|
+
* Generate with workflow engine integration
|
|
460
|
+
* Returns both original and processed responses for AB testing
|
|
461
|
+
*/
|
|
462
|
+
private generateWithWorkflow;
|
|
463
|
+
/**
|
|
464
|
+
* Stream with workflow engine integration
|
|
465
|
+
* Progressive streaming: yields preliminary response (first model) then final synthesis
|
|
466
|
+
*/
|
|
467
|
+
private streamWithWorkflow;
|
|
457
468
|
/**
|
|
458
469
|
* BACKWARD COMPATIBILITY: Legacy generateText method
|
|
459
470
|
* Internally calls generate() and converts result format
|