@juspay/neurolink 9.3.0 → 9.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/README.md +8 -8
  3. package/dist/cli/commands/config.d.ts +3 -3
  4. package/dist/cli/index.js +1 -0
  5. package/dist/index.d.ts +35 -0
  6. package/dist/index.js +17 -0
  7. package/dist/lib/agent/directTools.d.ts +5 -5
  8. package/dist/lib/index.d.ts +35 -0
  9. package/dist/lib/index.js +17 -0
  10. package/dist/lib/neurolink.d.ts +12 -1
  11. package/dist/lib/neurolink.js +265 -4
  12. package/dist/lib/server/utils/validation.d.ts +8 -8
  13. package/dist/lib/types/generateTypes.d.ts +28 -0
  14. package/dist/lib/types/index.d.ts +6 -0
  15. package/dist/lib/types/index.js +12 -0
  16. package/dist/lib/types/modelTypes.d.ts +2 -2
  17. package/dist/lib/types/streamTypes.d.ts +35 -0
  18. package/dist/lib/types/workflowTypes.d.ts +558 -0
  19. package/dist/lib/types/workflowTypes.js +32 -0
  20. package/dist/lib/workflow/LAYER-EXAMPLES.d.ts +13 -0
  21. package/dist/lib/workflow/LAYER-EXAMPLES.js +312 -0
  22. package/dist/lib/workflow/PROMPT-EXAMPLES.d.ts +117 -0
  23. package/dist/lib/workflow/PROMPT-EXAMPLES.js +246 -0
  24. package/dist/lib/workflow/config.d.ts +1569 -0
  25. package/dist/lib/workflow/config.js +399 -0
  26. package/dist/lib/workflow/core/ensembleExecutor.d.ts +56 -0
  27. package/dist/lib/workflow/core/ensembleExecutor.js +398 -0
  28. package/dist/lib/workflow/core/judgeScorer.d.ts +26 -0
  29. package/dist/lib/workflow/core/judgeScorer.js +527 -0
  30. package/dist/lib/workflow/core/responseConditioner.d.ts +22 -0
  31. package/dist/lib/workflow/core/responseConditioner.js +226 -0
  32. package/dist/lib/workflow/core/types/conditionerTypes.d.ts +7 -0
  33. package/dist/lib/workflow/core/types/conditionerTypes.js +8 -0
  34. package/dist/lib/workflow/core/types/ensembleTypes.d.ts +7 -0
  35. package/dist/lib/workflow/core/types/ensembleTypes.js +8 -0
  36. package/dist/lib/workflow/core/types/index.d.ts +7 -0
  37. package/dist/lib/workflow/core/types/index.js +8 -0
  38. package/dist/lib/workflow/core/types/judgeTypes.d.ts +7 -0
  39. package/dist/lib/workflow/core/types/judgeTypes.js +8 -0
  40. package/dist/lib/workflow/core/types/layerTypes.d.ts +7 -0
  41. package/dist/lib/workflow/core/types/layerTypes.js +8 -0
  42. package/dist/lib/workflow/core/types/registryTypes.d.ts +7 -0
  43. package/dist/lib/workflow/core/types/registryTypes.js +8 -0
  44. package/dist/lib/workflow/core/workflowRegistry.d.ts +73 -0
  45. package/dist/lib/workflow/core/workflowRegistry.js +305 -0
  46. package/dist/lib/workflow/core/workflowRunner.d.ts +115 -0
  47. package/dist/lib/workflow/core/workflowRunner.js +554 -0
  48. package/dist/lib/workflow/index.d.ts +36 -0
  49. package/dist/lib/workflow/index.js +51 -0
  50. package/dist/lib/workflow/types.d.ts +19 -0
  51. package/dist/lib/workflow/types.js +10 -0
  52. package/dist/lib/workflow/utils/types/index.d.ts +7 -0
  53. package/dist/lib/workflow/utils/types/index.js +8 -0
  54. package/dist/lib/workflow/utils/types/metricsTypes.d.ts +7 -0
  55. package/dist/lib/workflow/utils/types/metricsTypes.js +8 -0
  56. package/dist/lib/workflow/utils/types/validationTypes.d.ts +7 -0
  57. package/dist/lib/workflow/utils/types/validationTypes.js +8 -0
  58. package/dist/lib/workflow/utils/workflowMetrics.d.ts +76 -0
  59. package/dist/lib/workflow/utils/workflowMetrics.js +312 -0
  60. package/dist/lib/workflow/utils/workflowValidation.d.ts +29 -0
  61. package/dist/lib/workflow/utils/workflowValidation.js +421 -0
  62. package/dist/lib/workflow/workflows/adaptiveWorkflow.d.ts +72 -0
  63. package/dist/lib/workflow/workflows/adaptiveWorkflow.js +367 -0
  64. package/dist/lib/workflow/workflows/consensusWorkflow.d.ts +69 -0
  65. package/dist/lib/workflow/workflows/consensusWorkflow.js +193 -0
  66. package/dist/lib/workflow/workflows/fallbackWorkflow.d.ts +49 -0
  67. package/dist/lib/workflow/workflows/fallbackWorkflow.js +226 -0
  68. package/dist/lib/workflow/workflows/multiJudgeWorkflow.d.ts +70 -0
  69. package/dist/lib/workflow/workflows/multiJudgeWorkflow.js +352 -0
  70. package/dist/neurolink.d.ts +12 -1
  71. package/dist/neurolink.js +265 -4
  72. package/dist/types/generateTypes.d.ts +28 -0
  73. package/dist/types/index.d.ts +6 -0
  74. package/dist/types/index.js +12 -0
  75. package/dist/types/streamTypes.d.ts +35 -0
  76. package/dist/types/workflowTypes.d.ts +558 -0
  77. package/dist/types/workflowTypes.js +31 -0
  78. package/dist/workflow/LAYER-EXAMPLES.d.ts +13 -0
  79. package/dist/workflow/LAYER-EXAMPLES.js +311 -0
  80. package/dist/workflow/PROMPT-EXAMPLES.d.ts +117 -0
  81. package/dist/workflow/PROMPT-EXAMPLES.js +245 -0
  82. package/dist/workflow/config.d.ts +1569 -0
  83. package/dist/workflow/config.js +398 -0
  84. package/dist/workflow/core/ensembleExecutor.d.ts +56 -0
  85. package/dist/workflow/core/ensembleExecutor.js +397 -0
  86. package/dist/workflow/core/judgeScorer.d.ts +26 -0
  87. package/dist/workflow/core/judgeScorer.js +526 -0
  88. package/dist/workflow/core/responseConditioner.d.ts +22 -0
  89. package/dist/workflow/core/responseConditioner.js +225 -0
  90. package/dist/workflow/core/types/conditionerTypes.d.ts +7 -0
  91. package/dist/workflow/core/types/conditionerTypes.js +7 -0
  92. package/dist/workflow/core/types/ensembleTypes.d.ts +7 -0
  93. package/dist/workflow/core/types/ensembleTypes.js +7 -0
  94. package/dist/workflow/core/types/index.d.ts +7 -0
  95. package/dist/workflow/core/types/index.js +7 -0
  96. package/dist/workflow/core/types/judgeTypes.d.ts +7 -0
  97. package/dist/workflow/core/types/judgeTypes.js +7 -0
  98. package/dist/workflow/core/types/layerTypes.d.ts +7 -0
  99. package/dist/workflow/core/types/layerTypes.js +7 -0
  100. package/dist/workflow/core/types/registryTypes.d.ts +7 -0
  101. package/dist/workflow/core/types/registryTypes.js +7 -0
  102. package/dist/workflow/core/workflowRegistry.d.ts +73 -0
  103. package/dist/workflow/core/workflowRegistry.js +304 -0
  104. package/dist/workflow/core/workflowRunner.d.ts +115 -0
  105. package/dist/workflow/core/workflowRunner.js +553 -0
  106. package/dist/workflow/index.d.ts +36 -0
  107. package/dist/workflow/index.js +50 -0
  108. package/dist/workflow/types.d.ts +19 -0
  109. package/dist/workflow/types.js +9 -0
  110. package/dist/workflow/utils/types/index.d.ts +7 -0
  111. package/dist/workflow/utils/types/index.js +7 -0
  112. package/dist/workflow/utils/types/metricsTypes.d.ts +7 -0
  113. package/dist/workflow/utils/types/metricsTypes.js +7 -0
  114. package/dist/workflow/utils/types/validationTypes.d.ts +7 -0
  115. package/dist/workflow/utils/types/validationTypes.js +7 -0
  116. package/dist/workflow/utils/workflowMetrics.d.ts +76 -0
  117. package/dist/workflow/utils/workflowMetrics.js +311 -0
  118. package/dist/workflow/utils/workflowValidation.d.ts +29 -0
  119. package/dist/workflow/utils/workflowValidation.js +420 -0
  120. package/dist/workflow/workflows/adaptiveWorkflow.d.ts +72 -0
  121. package/dist/workflow/workflows/adaptiveWorkflow.js +366 -0
  122. package/dist/workflow/workflows/consensusWorkflow.d.ts +69 -0
  123. package/dist/workflow/workflows/consensusWorkflow.js +192 -0
  124. package/dist/workflow/workflows/fallbackWorkflow.d.ts +49 -0
  125. package/dist/workflow/workflows/fallbackWorkflow.js +225 -0
  126. package/dist/workflow/workflows/multiJudgeWorkflow.d.ts +70 -0
  127. package/dist/workflow/workflows/multiJudgeWorkflow.js +351 -0
  128. package/package.json +3 -2
@@ -0,0 +1,527 @@
1
+ /**
2
+ * workflow/core/judgeScorer.ts
3
+ * Judge-based scoring system for ensemble response evaluation
4
+ */
5
+ import { AIProviderFactory } from "../../core/factory.js";
6
+ import { logger } from "../../utils/logger.js";
7
+ import { MAX_REASONING_LENGTH } from "../config.js";
8
+ import { WorkflowError } from "../types.js";
9
+ const functionTag = "JudgeScorer";
10
+ // ============================================================================
11
+ // SCORING FUNCTIONS
12
+ // ============================================================================
13
+ /**
14
+ * Execute judge scoring on ensemble responses
15
+ * @param options - Scoring options including judges and responses
16
+ * @returns Score result with judge evaluation
17
+ */
18
+ export async function scoreEnsemble(options) {
19
+ const startTime = Date.now();
20
+ const { judges, responses, originalPrompt, systemPrompt, timeout, workflowDefaults, } = options;
21
+ logger.info(`[${functionTag}] Starting judge scoring`, {
22
+ judgeCount: judges.length,
23
+ responseCount: responses.length,
24
+ });
25
+ try {
26
+ // Filter successful responses for evaluation
27
+ const successfulResponses = responses.filter((r) => r.status === "success" && r.content.trim() !== "");
28
+ if (successfulResponses.length === 0) {
29
+ throw new WorkflowError("No successful responses to evaluate", {
30
+ code: "NO_RESPONSES_TO_EVALUATE",
31
+ workflowId: "judge",
32
+ phase: "judge",
33
+ retryable: false,
34
+ });
35
+ }
36
+ if (judges.length === 1) {
37
+ // Single judge scoring
38
+ const judgeResult = await executeSingleJudge(judges[0], successfulResponses, originalPrompt, systemPrompt, timeout, workflowDefaults?.judgePrompt);
39
+ return {
40
+ scores: judgeResult,
41
+ judgeTime: Date.now() - startTime,
42
+ };
43
+ }
44
+ else {
45
+ // Multi-judge voting
46
+ const multiJudgeResult = await executeMultiJudge(judges, successfulResponses, originalPrompt, systemPrompt, timeout, workflowDefaults?.judgePrompt);
47
+ return {
48
+ scores: multiJudgeResult,
49
+ judgeTime: Date.now() - startTime,
50
+ };
51
+ }
52
+ }
53
+ catch (error) {
54
+ const err = error;
55
+ logger.error(`[${functionTag}] Judge scoring failed`, {
56
+ error: err.message,
57
+ });
58
+ const workflowError = error instanceof WorkflowError
59
+ ? error
60
+ : new WorkflowError(err.message, {
61
+ code: "JUDGE_SCORING_ERROR",
62
+ workflowId: "judge",
63
+ phase: "judge",
64
+ retryable: true,
65
+ });
66
+ return {
67
+ scores: createEmptyScores(judges[0], responses),
68
+ judgeTime: Date.now() - startTime,
69
+ error: workflowError,
70
+ };
71
+ }
72
+ }
73
+ /**
74
+ * Execute single judge evaluation
75
+ * @param judge - Judge configuration
76
+ * @param responses - Successful ensemble responses
77
+ * @param originalPrompt - Original user prompt
78
+ * @param systemPrompt - Optional system prompt
79
+ * @param timeout - Judge timeout in milliseconds
80
+ * @returns Judge scores with evaluation
81
+ */
82
+ async function executeSingleJudge(judge, responses, originalPrompt, systemPrompt, timeout, workflowDefaultJudgePrompt) {
83
+ const startTime = Date.now();
84
+ logger.debug(`[${functionTag}] Executing single judge`, {
85
+ provider: judge.provider,
86
+ model: judge.model,
87
+ });
88
+ // Resolve judge prompt with hierarchical fallback:
89
+ // 1. Judge-specific customPrompt (highest priority)
90
+ // 2. Workflow-level default judge prompt
91
+ // 3. Built-in default template
92
+ const resolvedJudgePrompt = judge.customPrompt || workflowDefaultJudgePrompt;
93
+ // Create judge prompt (will use resolvedJudgePrompt if provided, otherwise default template)
94
+ const judgePrompt = createJudgePrompt(judge, responses, originalPrompt, resolvedJudgePrompt);
95
+ // Execute judge
96
+ const provider = await AIProviderFactory.createProvider(judge.provider, judge.model);
97
+ const result = await provider.generate({
98
+ prompt: judgePrompt,
99
+ systemPrompt: systemPrompt || judge.systemPrompt,
100
+ temperature: judge.temperature || 0.1,
101
+ maxTokens: judge.maxTokens || 2000,
102
+ timeout: timeout || judge.timeout || 10000,
103
+ });
104
+ // Parse judge response
105
+ const parsed = parseJudgeResponse(result?.content || "", responses, judge);
106
+ // Build JudgeScores
107
+ const judgeScores = {
108
+ judgeProvider: judge.provider,
109
+ judgeModel: judge.model,
110
+ scores: parsed.scores,
111
+ ranking: parsed.ranking,
112
+ bestResponse: parsed.bestResponse,
113
+ criteria: judge.criteria,
114
+ reasoning: parsed.reasoning,
115
+ synthesizedResponse: parsed.synthesizedResponse, // Include synthesized response if present
116
+ confidenceInJudgment: parsed.confidenceInJudgment,
117
+ judgeTime: Date.now() - startTime,
118
+ timestamp: new Date().toISOString(),
119
+ };
120
+ logger.debug(`[${functionTag}] Single judge completed`, {
121
+ bestResponse: judgeScores.bestResponse,
122
+ hasSynthesizedResponse: !!parsed.synthesizedResponse,
123
+ judgeTime: judgeScores.judgeTime,
124
+ });
125
+ return judgeScores;
126
+ }
127
+ /**
128
+ * Execute multi-judge voting
129
+ * @param judges - Array of judge configurations
130
+ * @param responses - Successful ensemble responses
131
+ * @param originalPrompt - Original user prompt
132
+ * @param systemPrompt - Optional system prompt
133
+ * @param timeout - Judge timeout in milliseconds
134
+ * @returns Multi-judge scores with aggregated results
135
+ */
136
+ async function executeMultiJudge(judges, responses, originalPrompt, systemPrompt, timeout, workflowDefaultJudgePrompt) {
137
+ const startTime = Date.now();
138
+ logger.debug(`[${functionTag}] Executing multi-judge voting`, {
139
+ judgeCount: judges.length,
140
+ });
141
+ // Execute all judges in parallel
142
+ const judgePromises = judges.map((judge) => executeSingleJudge(judge, responses, originalPrompt, systemPrompt, timeout, workflowDefaultJudgePrompt).catch((error) => {
143
+ logger.warn(`[${functionTag}] Judge failed`, {
144
+ provider: judge.provider,
145
+ model: judge.model,
146
+ error: error.message,
147
+ });
148
+ return createEmptyJudgeScores(judge, responses);
149
+ }));
150
+ const judgeResults = await Promise.all(judgePromises);
151
+ // Aggregate scores using average (can be configurable in future)
152
+ const aggregated = aggregateJudgeScores(judgeResults, "average");
153
+ const multiJudgeScores = {
154
+ judges: judgeResults,
155
+ averageScores: aggregated.averageScores,
156
+ aggregatedRanking: aggregated.ranking,
157
+ consensusLevel: calculateConsensusLevel(judgeResults),
158
+ bestResponse: aggregated.bestResponse,
159
+ confidence: aggregated.confidence,
160
+ votingStrategy: "average",
161
+ // Expose unified interface fields
162
+ judgeProvider: judgeResults[0]?.judgeProvider,
163
+ judgeModel: `multi-judge-${judges.length}`,
164
+ scores: aggregated.averageScores,
165
+ ranking: aggregated.ranking,
166
+ reasoning: aggregated.reasoning,
167
+ confidenceInJudgment: aggregated.confidence,
168
+ criteria: judges[0]?.criteria || [],
169
+ judgeTime: Date.now() - startTime,
170
+ timestamp: new Date().toISOString(),
171
+ };
172
+ logger.debug(`[${functionTag}] Multi-judge completed`, {
173
+ bestResponse: multiJudgeScores.bestResponse,
174
+ consensusLevel: multiJudgeScores.consensusLevel,
175
+ judgeTime: multiJudgeScores.judgeTime,
176
+ });
177
+ return multiJudgeScores;
178
+ }
179
+ // ============================================================================
180
+ // HELPER FUNCTIONS
181
+ // ============================================================================
182
+ /**
183
+ * Create judge evaluation prompt
184
+ * @param judge - Judge configuration
185
+ * @param responses - Ensemble responses to evaluate
186
+ * @param originalPrompt - Original user prompt
187
+ * @param customPrompt - Custom evaluation prompt (overrides default)
188
+ * @returns Formatted judge prompt
189
+ */
190
+ function createJudgePrompt(judge, responses, originalPrompt, customPrompt) {
191
+ // If custom prompt provided, use it
192
+ if (customPrompt) {
193
+ logger.debug(`[${functionTag}] Using custom judge prompt`);
194
+ return customPrompt;
195
+ }
196
+ // Build response blocks
197
+ const responseBlocks = responses
198
+ .map((r, index) => {
199
+ const identifier = `response-${index}`;
200
+ const modelInfo = judge.blindEvaluation
201
+ ? `Response ${index + 1}`
202
+ : `${r.provider}/${r.model}`;
203
+ return `
204
+ <response id="${identifier}">
205
+ <model>${modelInfo}</model>
206
+ <content>
207
+ ${r.content}
208
+ </content>
209
+ </response>`;
210
+ })
211
+ .join("\n");
212
+ const criteriaList = judge.criteria
213
+ .map((c, i) => `${i + 1}. ${c}`)
214
+ .join("\n");
215
+ // If synthesis is enabled, judge creates improved response
216
+ if (judge.synthesizeImprovedResponse) {
217
+ return `You are an expert AI evaluator and synthesizer. Your task is to:
218
+ 1. Evaluate all responses
219
+ 2. Synthesize an IMPROVED final response that combines their strengths
220
+
221
+ USER QUESTION:
222
+ ${originalPrompt}
223
+
224
+ RESPONSES TO EVALUATE:
225
+ ${responseBlocks}
226
+
227
+ EVALUATION CRITERIA:
228
+ ${criteriaList}
229
+
230
+ INSTRUCTIONS:
231
+ 1. Score each response on a scale of 0-100 (0 = poor, 100 = excellent)
232
+ 2. Consider all evaluation criteria listed above
233
+ 3. Provide a ranking of responses from best to worst
234
+ 4. Identify the single best response
235
+ 5. Provide brief reasoning for your evaluation (max 200 characters)
236
+ 6. **SYNTHESIZE an improved response** that:
237
+ - Combines the best elements from all responses
238
+ - Addresses any weaknesses identified in the evaluation
239
+ - Maintains accuracy and technical correctness
240
+ - Is more complete and higher quality than any single response
241
+ - Directly answers the user's question (no meta-commentary)
242
+ 7. Rate your confidence in this judgment (0.0 to 1.0)
243
+
244
+ Respond in JSON format:
245
+ {
246
+ "scores": {
247
+ "response-0": 85,
248
+ "response-1": 92
249
+ },
250
+ "ranking": ["response-1", "response-0"],
251
+ "bestResponse": "response-1",
252
+ "reasoning": "Brief explanation of evaluation",
253
+ "synthesizedResponse": "Your improved, synthesized response here",
254
+ "confidenceInJudgment": 0.9
255
+ }`;
256
+ }
257
+ // Standard evaluation (no synthesis)
258
+ return `You are an expert AI evaluator. Evaluate the following responses to the user's question.
259
+
260
+ USER QUESTION:
261
+ ${originalPrompt}
262
+
263
+ RESPONSES TO EVALUATE:
264
+ ${responseBlocks}
265
+
266
+ EVALUATION CRITERIA:
267
+ ${criteriaList}
268
+
269
+ INSTRUCTIONS:
270
+ 1. Score each response on a scale of 0-100 (0 = poor, 100 = excellent)
271
+ 2. Consider all evaluation criteria listed above
272
+ 3. Provide a ranking of responses from best to worst
273
+ 4. Identify the single best response
274
+ 5. Provide brief reasoning for your evaluation (max 200 characters)
275
+ 6. Rate your confidence in this judgment (0.0 to 1.0)
276
+
277
+ Respond in JSON format:
278
+ {
279
+ "scores": {
280
+ "response-0": 85,
281
+ "response-1": 92
282
+ },
283
+ "ranking": ["response-1", "response-0"],
284
+ "bestResponse": "response-1",
285
+ "reasoning": "Brief explanation of evaluation",
286
+ "confidenceInJudgment": 0.9
287
+ }`;
288
+ }
289
+ /**
290
+ * Parse judge response to extract scores
291
+ * @param content - Raw judge response content
292
+ * @param responses - Original ensemble responses
293
+ * @param _judge - Judge configuration (unused)
294
+ * @returns Parsed judge response with scores
295
+ */
296
+ function parseJudgeResponse(content, responses, _judge) {
297
+ try {
298
+ // Try to extract JSON from response
299
+ const jsonMatch = content.match(/\{[\s\S]*\}/);
300
+ if (!jsonMatch) {
301
+ logger.warn(`[${functionTag}] No JSON found in judge response`);
302
+ return createFallbackScores(responses);
303
+ }
304
+ const parsed = JSON.parse(jsonMatch[0]);
305
+ // Validate and normalize scores to 0-100 range
306
+ const scores = {};
307
+ Object.keys(parsed.scores || {}).forEach((key) => {
308
+ const score = Number(parsed.scores[key]);
309
+ scores[key] = Math.max(0, Math.min(100, score));
310
+ });
311
+ // Ensure all responses have scores
312
+ responses.forEach((_, index) => {
313
+ const key = `response-${index}`;
314
+ if (!(key in scores)) {
315
+ scores[key] = 50; // Default neutral score
316
+ }
317
+ });
318
+ return {
319
+ scores,
320
+ ranking: parsed.ranking || generateRankingFromScores(scores),
321
+ bestResponse: parsed.bestResponse || findBestResponse(scores),
322
+ reasoning: truncateReasoning(parsed.reasoning || "No reasoning provided"),
323
+ synthesizedResponse: parsed.synthesizedResponse, // Extract synthesized response if present
324
+ confidenceInJudgment: normalizeConfidence(parsed.confidenceInJudgment),
325
+ };
326
+ }
327
+ catch (error) {
328
+ logger.warn(`[${functionTag}] Failed to parse judge response`, {
329
+ error: error.message,
330
+ });
331
+ return createFallbackScores(responses);
332
+ }
333
+ }
334
+ /**
335
+ * Create fallback scores when parsing fails
336
+ * @param responses - Ensemble responses
337
+ * @returns Default scores with equal values
338
+ */
339
+ function createFallbackScores(responses) {
340
+ const scores = {};
341
+ const ranking = [];
342
+ responses.forEach((_, index) => {
343
+ const key = `response-${index}`;
344
+ scores[key] = 50; // Neutral score
345
+ ranking.push(key);
346
+ });
347
+ return {
348
+ scores,
349
+ ranking,
350
+ bestResponse: ranking[0],
351
+ reasoning: "Unable to parse judge evaluation",
352
+ confidenceInJudgment: 0.5,
353
+ };
354
+ }
355
+ /**
356
+ * Generate ranking from scores
357
+ * @param scores - Score record
358
+ * @returns Array of response IDs sorted by score descending
359
+ */
360
+ function generateRankingFromScores(scores) {
361
+ return Object.keys(scores).sort((a, b) => scores[b] - scores[a]);
362
+ }
363
+ /**
364
+ * Find best response from scores
365
+ * @param scores - Score record
366
+ * @returns Response ID with highest score
367
+ */
368
+ function findBestResponse(scores) {
369
+ let bestId = "";
370
+ let bestScore = -1;
371
+ Object.keys(scores).forEach((key) => {
372
+ if (scores[key] > bestScore) {
373
+ bestScore = scores[key];
374
+ bestId = key;
375
+ }
376
+ });
377
+ return bestId || Object.keys(scores)[0];
378
+ }
379
+ /**
380
+ * Truncate reasoning to max 200 characters
381
+ * @param reasoning - Reasoning text
382
+ * @returns Truncated reasoning
383
+ */
384
+ function truncateReasoning(reasoning) {
385
+ if (reasoning.length <= MAX_REASONING_LENGTH) {
386
+ return reasoning;
387
+ }
388
+ return reasoning.substring(0, MAX_REASONING_LENGTH - 3) + "...";
389
+ }
390
+ /**
391
+ * Normalize confidence to 0-1 range
392
+ * @param confidence - Confidence value
393
+ * @returns Normalized confidence between 0 and 1
394
+ */
395
+ function normalizeConfidence(confidence) {
396
+ if (typeof confidence !== "number") {
397
+ return 0.5;
398
+ }
399
+ return Math.max(0, Math.min(1, confidence));
400
+ }
401
+ /**
402
+ * Aggregate multiple judge scores
403
+ * @param judgeResults - Array of judge score results
404
+ * @param _strategy - Aggregation strategy (currently only 'average')
405
+ * @returns Aggregated scores and ranking
406
+ */
407
+ function aggregateJudgeScores(judgeResults, _strategy) {
408
+ // Collect all response IDs
409
+ const responseIds = new Set();
410
+ judgeResults.forEach((result) => {
411
+ Object.keys(result.scores).forEach((id) => responseIds.add(id));
412
+ });
413
+ // Calculate average scores
414
+ const averageScores = {};
415
+ responseIds.forEach((id) => {
416
+ const scores = judgeResults
417
+ .map((result) => result.scores[id])
418
+ .filter((score) => score !== undefined);
419
+ if (scores.length > 0) {
420
+ averageScores[id] =
421
+ scores.reduce((sum, score) => sum + score, 0) / scores.length;
422
+ }
423
+ else {
424
+ averageScores[id] = 50; // Default
425
+ }
426
+ });
427
+ // Generate ranking from average scores
428
+ const ranking = generateRankingFromScores(averageScores);
429
+ const bestResponse = ranking[0];
430
+ // Calculate aggregate confidence
431
+ const confidences = judgeResults
432
+ .map((r) => r.confidenceInJudgment || 0.5)
433
+ .filter((c) => c > 0);
434
+ const confidence = confidences.length > 0
435
+ ? confidences.reduce((sum, c) => sum + c, 0) / confidences.length
436
+ : 0.5;
437
+ // Aggregate reasoning
438
+ const reasoning = `Aggregated from ${judgeResults.length} judges`;
439
+ return {
440
+ averageScores,
441
+ ranking,
442
+ bestResponse,
443
+ confidence,
444
+ reasoning,
445
+ };
446
+ }
447
+ /**
448
+ * Calculate consensus level between judges
449
+ * @param judgeResults - Array of judge score results
450
+ * @returns Consensus level between 0 and 1
451
+ */
452
+ function calculateConsensusLevel(judgeResults) {
453
+ if (judgeResults.length < 2) {
454
+ return 1.0; // Perfect consensus with single judge
455
+ }
456
+ // Calculate agreement on best response
457
+ const bestResponses = judgeResults.map((r) => r.bestResponse);
458
+ const modeCounts = new Map();
459
+ bestResponses.forEach((response) => {
460
+ if (response) {
461
+ modeCounts.set(response, (modeCounts.get(response) || 0) + 1);
462
+ }
463
+ });
464
+ const maxCount = Math.max(...Array.from(modeCounts.values()));
465
+ return maxCount / judgeResults.length;
466
+ }
467
+ /**
468
+ * Create empty judge scores for error cases
469
+ * @param judge - Judge configuration
470
+ * @param responses - Ensemble responses
471
+ * @returns Empty judge scores
472
+ */
473
+ function createEmptyJudgeScores(judge, responses) {
474
+ const scores = {};
475
+ responses.forEach((_, index) => {
476
+ scores[`response-${index}`] = 50;
477
+ });
478
+ return {
479
+ judgeProvider: judge.provider,
480
+ judgeModel: judge.model,
481
+ scores,
482
+ criteria: judge.criteria,
483
+ judgeTime: 0,
484
+ timestamp: new Date().toISOString(),
485
+ };
486
+ }
487
+ /**
488
+ * Create empty scores for error cases
489
+ * @param judge - Judge configuration
490
+ * @param responses - Ensemble responses
491
+ * @returns Empty judge scores
492
+ */
493
+ function createEmptyScores(judge, responses) {
494
+ return createEmptyJudgeScores(judge, responses);
495
+ }
496
+ /**
497
+ * Get best response from judge scores
498
+ * @param scores - Judge scores or multi-judge scores
499
+ * @param responses - Original ensemble responses
500
+ * @returns Best ensemble response
501
+ */
502
+ export function getBestResponse(scores, responses) {
503
+ const bestId = scores.bestResponse;
504
+ if (!bestId) {
505
+ return undefined;
506
+ }
507
+ const index = parseInt(bestId.replace("response-", ""), 10);
508
+ return responses[index];
509
+ }
510
+ /**
511
+ * Get ranked responses
512
+ * @param scores - Judge scores or multi-judge scores
513
+ * @param responses - Original ensemble responses
514
+ * @returns Responses sorted by ranking
515
+ */
516
+ export function getRankedResponses(scores, responses) {
517
+ if (!scores.ranking || scores.ranking.length === 0) {
518
+ return responses;
519
+ }
520
+ return scores.ranking
521
+ .map((id) => {
522
+ const index = parseInt(id.replace("response-", ""), 10);
523
+ return responses[index];
524
+ })
525
+ .filter((r) => r !== undefined);
526
+ }
527
+ //# sourceMappingURL=judgeScorer.js.map
@@ -0,0 +1,22 @@
1
+ /**
2
+ * workflow/core/responseConditioner.ts
3
+ * Response conditioning and synthesis
4
+ *
5
+ * Uses judge feedback and ensemble responses to synthesize an improved final response.
6
+ * Combines strengths from multiple responses based on evaluation insights.
7
+ */
8
+ import type { ConditioningConfig } from "../types.js";
9
+ import type { ConditionOptions, ConditionResult } from "./types/index.js";
10
+ /**
11
+ * Condition response by synthesizing improved version using judge feedback
12
+ *
13
+ * @param options - Conditioning options including all responses and judge feedback
14
+ * @returns Conditioned result with synthesized improved content
15
+ */
16
+ export declare function conditionResponse(options: ConditionOptions): Promise<ConditionResult>;
17
+ /**
18
+ * Check if conditioning is enabled
19
+ * @param config - Conditioning configuration
20
+ * @returns True if conditioning should be applied
21
+ */
22
+ export declare function isConditioningEnabled(config?: ConditioningConfig): boolean;