kimi-vercel-ai-sdk-provider 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +426 -31
- package/dist/index.d.mts +1608 -2
- package/dist/index.d.ts +1608 -2
- package/dist/index.js +1949 -6
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +1924 -5
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
- package/src/__tests__/auto-detect.test.ts +140 -0
- package/src/__tests__/code-validation.test.ts +267 -0
- package/src/__tests__/ensemble.test.ts +242 -0
- package/src/__tests__/multi-agent.test.ts +201 -0
- package/src/__tests__/project-tools.test.ts +181 -0
- package/src/__tests__/tools.test.ts +1 -1
- package/src/chat/kimi-chat-settings.ts +15 -1
- package/src/code-validation/detector.ts +319 -0
- package/src/code-validation/index.ts +31 -0
- package/src/code-validation/types.ts +291 -0
- package/src/code-validation/validator.ts +547 -0
- package/src/core/errors.ts +91 -0
- package/src/core/index.ts +5 -0
- package/src/ensemble/index.ts +17 -0
- package/src/ensemble/multi-sampler.ts +433 -0
- package/src/ensemble/types.ts +279 -0
- package/src/index.ts +102 -3
- package/src/kimi-provider.ts +354 -1
- package/src/multi-agent/index.ts +21 -0
- package/src/multi-agent/types.ts +312 -0
- package/src/multi-agent/workflows.ts +539 -0
- package/src/project-tools/index.ts +16 -0
- package/src/project-tools/scaffolder.ts +494 -0
- package/src/project-tools/types.ts +244 -0
- package/src/tools/auto-detect.ts +276 -0
- package/src/tools/index.ts +6 -2
- package/src/tools/prepare-tools.ts +91 -2
|
@@ -0,0 +1,433 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Multi-sampler implementation for ensemble generation.
|
|
3
|
+
* @module
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import type {
|
|
7
|
+
EnsembleConfig,
|
|
8
|
+
EnsembleMetadata,
|
|
9
|
+
EnsembleResponse,
|
|
10
|
+
EnsembleResult,
|
|
11
|
+
LanguageModelUsage,
|
|
12
|
+
ScoringHeuristic,
|
|
13
|
+
SelectionStrategy
|
|
14
|
+
} from './types';
|
|
15
|
+
|
|
16
|
+
// ============================================================================
|
|
17
|
+
// Types
|
|
18
|
+
// ============================================================================
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* A function that generates a single response.
|
|
22
|
+
*/
|
|
23
|
+
export type GenerateFunction = (options: { temperature: number; sampleIndex: number }) => Promise<{
|
|
24
|
+
text: string;
|
|
25
|
+
reasoning?: string;
|
|
26
|
+
toolCalls?: unknown[];
|
|
27
|
+
toolResults?: unknown[];
|
|
28
|
+
usage?: LanguageModelUsage;
|
|
29
|
+
finishReason: string;
|
|
30
|
+
}>;
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Options for creating a multi-sampler.
|
|
34
|
+
*/
|
|
35
|
+
export interface MultiSamplerOptions {
|
|
36
|
+
/**
|
|
37
|
+
* The model ID being used.
|
|
38
|
+
*/
|
|
39
|
+
modelId: string;
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Base temperature for generation.
|
|
43
|
+
*/
|
|
44
|
+
baseTemperature?: number;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// ============================================================================
|
|
48
|
+
// MultiSampler Class
|
|
49
|
+
// ============================================================================
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Multi-sampler for generating multiple responses and selecting the best one.
|
|
53
|
+
*
|
|
54
|
+
* @example
|
|
55
|
+
* ```ts
|
|
56
|
+
* const sampler = new MultiSampler({ modelId: 'kimi-k2.5' });
|
|
57
|
+
* const result = await sampler.generate(generateFn, {
|
|
58
|
+
* n: 3,
|
|
59
|
+
* selectionStrategy: 'best',
|
|
60
|
+
* scoringHeuristic: 'code',
|
|
61
|
+
* });
|
|
62
|
+
* ```
|
|
63
|
+
*/
|
|
64
|
+
export class MultiSampler {
|
|
65
|
+
private modelId: string;
|
|
66
|
+
private baseTemperature: number;
|
|
67
|
+
|
|
68
|
+
constructor(options: MultiSamplerOptions) {
|
|
69
|
+
this.modelId = options.modelId;
|
|
70
|
+
this.baseTemperature = options.baseTemperature ?? 0.7;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Generate multiple samples and select the best one.
|
|
75
|
+
*
|
|
76
|
+
* @param generateFn - Function to generate a single response
|
|
77
|
+
* @param config - Ensemble configuration
|
|
78
|
+
* @returns The ensemble result
|
|
79
|
+
*/
|
|
80
|
+
async generate(generateFn: GenerateFunction, config: EnsembleConfig): Promise<EnsembleResult> {
|
|
81
|
+
const startTime = Date.now();
|
|
82
|
+
const {
|
|
83
|
+
n,
|
|
84
|
+
selectionStrategy = 'best',
|
|
85
|
+
temperatureVariance = 0.1,
|
|
86
|
+
scoringHeuristic = 'confidence',
|
|
87
|
+
customScorer,
|
|
88
|
+
timeoutMs = 60000,
|
|
89
|
+
allowPartialFailure = true,
|
|
90
|
+
minSuccessfulSamples = 1
|
|
91
|
+
} = config;
|
|
92
|
+
|
|
93
|
+
// Validate configuration
|
|
94
|
+
if (n < 1 || n > 10) {
|
|
95
|
+
throw new Error('Ensemble n must be between 1 and 10');
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// Generate samples in parallel
|
|
99
|
+
const promises = Array.from({ length: n }, async (_, i) => {
|
|
100
|
+
const temperature = Math.min(this.baseTemperature + i * temperatureVariance, 2.0);
|
|
101
|
+
const sampleStart = Date.now();
|
|
102
|
+
|
|
103
|
+
try {
|
|
104
|
+
const result = await generateFn({ temperature, sampleIndex: i });
|
|
105
|
+
return {
|
|
106
|
+
text: result.text,
|
|
107
|
+
reasoning: result.reasoning,
|
|
108
|
+
toolCalls: result.toolCalls,
|
|
109
|
+
toolResults: result.toolResults,
|
|
110
|
+
usage: result.usage,
|
|
111
|
+
sampleIndex: i,
|
|
112
|
+
temperature,
|
|
113
|
+
finishReason: result.finishReason,
|
|
114
|
+
success: true,
|
|
115
|
+
durationMs: Date.now() - sampleStart
|
|
116
|
+
} as EnsembleResponse;
|
|
117
|
+
} catch (error) {
|
|
118
|
+
return {
|
|
119
|
+
text: '',
|
|
120
|
+
sampleIndex: i,
|
|
121
|
+
temperature,
|
|
122
|
+
finishReason: 'error',
|
|
123
|
+
success: false,
|
|
124
|
+
error: error instanceof Error ? error.message : 'Unknown error',
|
|
125
|
+
durationMs: Date.now() - sampleStart
|
|
126
|
+
} as EnsembleResponse;
|
|
127
|
+
}
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
// Wait for all samples with timeout
|
|
131
|
+
let responses: EnsembleResponse[];
|
|
132
|
+
try {
|
|
133
|
+
responses = await Promise.race([
|
|
134
|
+
Promise.all(promises),
|
|
135
|
+
new Promise<never>((_, reject) =>
|
|
136
|
+
setTimeout(() => reject(new Error('Ensemble generation timed out')), timeoutMs)
|
|
137
|
+
)
|
|
138
|
+
]);
|
|
139
|
+
} catch (_error) {
|
|
140
|
+
// On timeout, wait a bit more to collect partial results
|
|
141
|
+
const partialResponses = await Promise.all(
|
|
142
|
+
promises.map((p) =>
|
|
143
|
+
p.catch(
|
|
144
|
+
() =>
|
|
145
|
+
({
|
|
146
|
+
text: '',
|
|
147
|
+
sampleIndex: -1,
|
|
148
|
+
temperature: 0,
|
|
149
|
+
finishReason: 'timeout',
|
|
150
|
+
success: false,
|
|
151
|
+
error: 'Timed out'
|
|
152
|
+
}) as EnsembleResponse
|
|
153
|
+
)
|
|
154
|
+
)
|
|
155
|
+
);
|
|
156
|
+
responses = partialResponses.filter((r) => r.sampleIndex >= 0);
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// Filter successful responses
|
|
160
|
+
const successfulResponses = responses.filter((r) => r.success);
|
|
161
|
+
|
|
162
|
+
if (successfulResponses.length < minSuccessfulSamples && !allowPartialFailure) {
|
|
163
|
+
throw new Error(
|
|
164
|
+
`Only ${successfulResponses.length} samples succeeded, minimum required is ${minSuccessfulSamples}`
|
|
165
|
+
);
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
if (successfulResponses.length === 0) {
|
|
169
|
+
throw new Error('All ensemble samples failed');
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// Apply selection strategy
|
|
173
|
+
const result = this.selectBest(successfulResponses, responses, {
|
|
174
|
+
selectionStrategy,
|
|
175
|
+
scoringHeuristic,
|
|
176
|
+
customScorer,
|
|
177
|
+
modelId: this.modelId,
|
|
178
|
+
startTime
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
return result;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Select the best response based on the strategy.
|
|
186
|
+
*/
|
|
187
|
+
private selectBest(
|
|
188
|
+
successfulResponses: EnsembleResponse[],
|
|
189
|
+
allResponses: EnsembleResponse[],
|
|
190
|
+
options: {
|
|
191
|
+
selectionStrategy: SelectionStrategy;
|
|
192
|
+
scoringHeuristic: ScoringHeuristic;
|
|
193
|
+
customScorer?: (response: EnsembleResponse) => number;
|
|
194
|
+
modelId: string;
|
|
195
|
+
startTime: number;
|
|
196
|
+
}
|
|
197
|
+
): EnsembleResult {
|
|
198
|
+
const { selectionStrategy, scoringHeuristic, customScorer, modelId, startTime } = options;
|
|
199
|
+
|
|
200
|
+
// Score all successful responses
|
|
201
|
+
const scored = successfulResponses.map((r) => {
|
|
202
|
+
return {
|
|
203
|
+
...r,
|
|
204
|
+
score: this.calculateScore(r, scoringHeuristic, customScorer)
|
|
205
|
+
};
|
|
206
|
+
});
|
|
207
|
+
|
|
208
|
+
let winner: EnsembleResponse;
|
|
209
|
+
let alternatives: EnsembleResponse[] | undefined;
|
|
210
|
+
|
|
211
|
+
switch (selectionStrategy) {
|
|
212
|
+
case 'first':
|
|
213
|
+
winner = scored[0];
|
|
214
|
+
break;
|
|
215
|
+
|
|
216
|
+
case 'vote':
|
|
217
|
+
winner = this.majorityVote(scored);
|
|
218
|
+
break;
|
|
219
|
+
|
|
220
|
+
case 'best':
|
|
221
|
+
scored.sort((a, b) => (b.score ?? 0) - (a.score ?? 0));
|
|
222
|
+
winner = scored[0];
|
|
223
|
+
break;
|
|
224
|
+
|
|
225
|
+
case 'all':
|
|
226
|
+
winner = scored[0];
|
|
227
|
+
alternatives = scored;
|
|
228
|
+
break;
|
|
229
|
+
|
|
230
|
+
default:
|
|
231
|
+
throw new Error(`Unknown selection strategy: ${selectionStrategy}`);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
const metadata: EnsembleMetadata = {
|
|
235
|
+
nRequested: allResponses.length,
|
|
236
|
+
nCompleted: successfulResponses.length,
|
|
237
|
+
nFailed: allResponses.filter((r) => !r.success).length,
|
|
238
|
+
selectionStrategy,
|
|
239
|
+
winningIndex: winner.sampleIndex,
|
|
240
|
+
scores: scored.map((r) => r.score ?? 0),
|
|
241
|
+
durationMs: Date.now() - startTime,
|
|
242
|
+
modelId,
|
|
243
|
+
totalUsage: this.aggregateUsage(successfulResponses)
|
|
244
|
+
};
|
|
245
|
+
|
|
246
|
+
return {
|
|
247
|
+
text: winner.text,
|
|
248
|
+
reasoning: winner.reasoning,
|
|
249
|
+
toolCalls: winner.toolCalls as EnsembleResult['toolCalls'],
|
|
250
|
+
toolResults: winner.toolResults as EnsembleResult['toolResults'],
|
|
251
|
+
usage: winner.usage ?? { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
|
|
252
|
+
alternatives,
|
|
253
|
+
metadata
|
|
254
|
+
};
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
/**
|
|
258
|
+
* Calculate score for a response based on the heuristic.
|
|
259
|
+
*/
|
|
260
|
+
private calculateScore(
|
|
261
|
+
response: EnsembleResponse,
|
|
262
|
+
heuristic: ScoringHeuristic,
|
|
263
|
+
customScorer?: (response: EnsembleResponse) => number
|
|
264
|
+
): number {
|
|
265
|
+
switch (heuristic) {
|
|
266
|
+
case 'length':
|
|
267
|
+
// Prefer concise answers (inverse length, normalized)
|
|
268
|
+
return 1000 / (response.text.length + 1);
|
|
269
|
+
|
|
270
|
+
case 'confidence':
|
|
271
|
+
// Higher completion tokens often indicates more complete reasoning
|
|
272
|
+
return response.usage?.completionTokens ?? 0;
|
|
273
|
+
|
|
274
|
+
case 'code':
|
|
275
|
+
return this.scoreCodeQuality(response.text);
|
|
276
|
+
|
|
277
|
+
case 'custom':
|
|
278
|
+
if (!customScorer) {
|
|
279
|
+
throw new Error('Custom scorer function required for custom heuristic');
|
|
280
|
+
}
|
|
281
|
+
return customScorer(response);
|
|
282
|
+
|
|
283
|
+
default:
|
|
284
|
+
return 0;
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
/**
|
|
289
|
+
* Score code quality based on heuristics.
|
|
290
|
+
*/
|
|
291
|
+
private scoreCodeQuality(text: string): number {
|
|
292
|
+
let score = 100;
|
|
293
|
+
|
|
294
|
+
// Deduct for common error patterns
|
|
295
|
+
const errorPatterns = [
|
|
296
|
+
{ pattern: /SyntaxError/gi, penalty: 25 },
|
|
297
|
+
{ pattern: /ReferenceError/gi, penalty: 20 },
|
|
298
|
+
{ pattern: /TypeError/gi, penalty: 20 },
|
|
299
|
+
{ pattern: /undefined is not/gi, penalty: 15 },
|
|
300
|
+
{ pattern: /cannot read property/gi, penalty: 15 },
|
|
301
|
+
{ pattern: /is not defined/gi, penalty: 15 },
|
|
302
|
+
{ pattern: /unexpected token/gi, penalty: 20 },
|
|
303
|
+
{ pattern: /null is not/gi, penalty: 15 }
|
|
304
|
+
];
|
|
305
|
+
|
|
306
|
+
for (const { pattern, penalty } of errorPatterns) {
|
|
307
|
+
const matches = text.match(pattern);
|
|
308
|
+
if (matches) {
|
|
309
|
+
score -= penalty * matches.length;
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
// Bonus for proper code blocks
|
|
314
|
+
if (text.includes('```')) {
|
|
315
|
+
score += 10;
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
// Bonus for comments/documentation
|
|
319
|
+
if (/\/\/.*|\/\*[\s\S]*?\*\/|#.*/.test(text)) {
|
|
320
|
+
score += 5;
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
// Bonus for test mentions
|
|
324
|
+
if (/\b(test|spec|assert|expect|describe|it)\b/i.test(text)) {
|
|
325
|
+
score += 5;
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
// Bonus for type annotations (TypeScript)
|
|
329
|
+
if (/:\s*(string|number|boolean|void|any|unknown|never)\b/.test(text)) {
|
|
330
|
+
score += 5;
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
// Penalty for TODO/FIXME left in code
|
|
334
|
+
if (/\b(TODO|FIXME|XXX|HACK)\b/i.test(text)) {
|
|
335
|
+
score -= 5;
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
return Math.max(0, score);
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
/**
|
|
342
|
+
* Select the most common response (majority voting).
|
|
343
|
+
*/
|
|
344
|
+
private majorityVote(responses: EnsembleResponse[]): EnsembleResponse {
|
|
345
|
+
// Simple text similarity voting based on normalized text
|
|
346
|
+
const normalized = responses.map((r) => {
|
|
347
|
+
return {
|
|
348
|
+
response: r,
|
|
349
|
+
key: r.text.toLowerCase().replace(/\s+/g, ' ').trim().slice(0, 500)
|
|
350
|
+
};
|
|
351
|
+
});
|
|
352
|
+
|
|
353
|
+
const votes = new Map<string, { count: number; response: EnsembleResponse }>();
|
|
354
|
+
|
|
355
|
+
for (const { response, key } of normalized) {
|
|
356
|
+
const existing = votes.get(key);
|
|
357
|
+
if (existing) {
|
|
358
|
+
existing.count++;
|
|
359
|
+
} else {
|
|
360
|
+
votes.set(key, { count: 1, response });
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
// Find the response with the most votes
|
|
365
|
+
let maxVotes = 0;
|
|
366
|
+
let winner = responses[0];
|
|
367
|
+
|
|
368
|
+
for (const { count, response } of votes.values()) {
|
|
369
|
+
if (count > maxVotes) {
|
|
370
|
+
maxVotes = count;
|
|
371
|
+
winner = response;
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
return winner;
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
/**
|
|
379
|
+
* Aggregate usage across all responses.
|
|
380
|
+
*/
|
|
381
|
+
private aggregateUsage(responses: EnsembleResponse[]): LanguageModelUsage {
|
|
382
|
+
return responses.reduce(
|
|
383
|
+
(acc, r) => {
|
|
384
|
+
return {
|
|
385
|
+
promptTokens: acc.promptTokens + (r.usage?.promptTokens ?? 0),
|
|
386
|
+
completionTokens: acc.completionTokens + (r.usage?.completionTokens ?? 0),
|
|
387
|
+
totalTokens: acc.totalTokens + (r.usage?.totalTokens ?? 0)
|
|
388
|
+
};
|
|
389
|
+
},
|
|
390
|
+
{ promptTokens: 0, completionTokens: 0, totalTokens: 0 }
|
|
391
|
+
);
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
// ============================================================================
|
|
396
|
+
// Utility Functions
|
|
397
|
+
// ============================================================================
|
|
398
|
+
|
|
399
|
+
/**
|
|
400
|
+
* Create a simple ensemble result from a single response.
|
|
401
|
+
* Useful for when ensemble is disabled but consistent return types are needed.
|
|
402
|
+
*/
|
|
403
|
+
export function createSingletonEnsembleResult(
|
|
404
|
+
response: {
|
|
405
|
+
text: string;
|
|
406
|
+
reasoning?: string;
|
|
407
|
+
toolCalls?: unknown[];
|
|
408
|
+
toolResults?: unknown[];
|
|
409
|
+
usage?: LanguageModelUsage;
|
|
410
|
+
finishReason: string;
|
|
411
|
+
},
|
|
412
|
+
modelId: string,
|
|
413
|
+
durationMs: number
|
|
414
|
+
): EnsembleResult {
|
|
415
|
+
return {
|
|
416
|
+
text: response.text,
|
|
417
|
+
reasoning: response.reasoning,
|
|
418
|
+
toolCalls: response.toolCalls as EnsembleResult['toolCalls'],
|
|
419
|
+
toolResults: response.toolResults as EnsembleResult['toolResults'],
|
|
420
|
+
usage: response.usage ?? { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
|
|
421
|
+
metadata: {
|
|
422
|
+
nRequested: 1,
|
|
423
|
+
nCompleted: 1,
|
|
424
|
+
nFailed: 0,
|
|
425
|
+
selectionStrategy: 'first',
|
|
426
|
+
winningIndex: 0,
|
|
427
|
+
scores: [100],
|
|
428
|
+
durationMs,
|
|
429
|
+
modelId,
|
|
430
|
+
totalUsage: response.usage ?? { promptTokens: 0, completionTokens: 0, totalTokens: 0 }
|
|
431
|
+
}
|
|
432
|
+
};
|
|
433
|
+
}
|
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Types for ensemble/multi-sampling functionality.
|
|
3
|
+
* @module
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import type { LanguageModelV3ToolCall, LanguageModelV3ToolResult } from '@ai-sdk/provider';
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Simple usage type compatible with common AI SDK patterns.
|
|
10
|
+
* This is independent from the provider-specific V3Usage type.
|
|
11
|
+
*/
|
|
12
|
+
export interface LanguageModelUsage {
|
|
13
|
+
promptTokens: number;
|
|
14
|
+
completionTokens: number;
|
|
15
|
+
totalTokens: number;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Re-export types for convenience.
|
|
20
|
+
*/
|
|
21
|
+
export type ToolCall = LanguageModelV3ToolCall;
|
|
22
|
+
export type ToolResult = LanguageModelV3ToolResult;
|
|
23
|
+
|
|
24
|
+
// ============================================================================
|
|
25
|
+
// Configuration Types
|
|
26
|
+
// ============================================================================
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Selection strategy for choosing the best response from multiple samples.
|
|
30
|
+
*/
|
|
31
|
+
export type SelectionStrategy = 'first' | 'vote' | 'best' | 'all';
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Scoring heuristic for the 'best' selection strategy.
|
|
35
|
+
*/
|
|
36
|
+
export type ScoringHeuristic = 'length' | 'confidence' | 'code' | 'custom';
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Configuration for ensemble/multi-sampling.
|
|
40
|
+
*/
|
|
41
|
+
export interface EnsembleConfig {
|
|
42
|
+
/**
|
|
43
|
+
* Number of parallel samples to generate.
|
|
44
|
+
* @default 3
|
|
45
|
+
*/
|
|
46
|
+
n: number;
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Strategy for selecting the best result.
|
|
50
|
+
* - 'first': Return the first successful response
|
|
51
|
+
* - 'vote': Return the most common answer (majority voting)
|
|
52
|
+
* - 'best': Use heuristic scoring to pick the best
|
|
53
|
+
* - 'all': Return all responses (for manual selection)
|
|
54
|
+
* @default 'best'
|
|
55
|
+
*/
|
|
56
|
+
selectionStrategy?: SelectionStrategy;
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Temperature variation for diversity.
|
|
60
|
+
* Each sample gets temperature = baseTemp + (i * variance)
|
|
61
|
+
* @default 0.1
|
|
62
|
+
*/
|
|
63
|
+
temperatureVariance?: number;
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* For 'best' strategy, the scoring heuristic.
|
|
67
|
+
* - 'length': Prefer shorter responses
|
|
68
|
+
* - 'confidence': Prefer responses with higher token count
|
|
69
|
+
* - 'code': Prefer responses with fewer error patterns
|
|
70
|
+
* - 'custom': Use custom scorer function
|
|
71
|
+
* @default 'confidence'
|
|
72
|
+
*/
|
|
73
|
+
scoringHeuristic?: ScoringHeuristic;
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Custom scoring function for 'custom' heuristic.
|
|
77
|
+
* Higher scores are better.
|
|
78
|
+
*/
|
|
79
|
+
customScorer?: (response: EnsembleResponse) => number;
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Maximum time to wait for all samples (ms).
|
|
83
|
+
* @default 60000
|
|
84
|
+
*/
|
|
85
|
+
timeoutMs?: number;
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Whether to continue if some samples fail.
|
|
89
|
+
* @default true
|
|
90
|
+
*/
|
|
91
|
+
allowPartialFailure?: boolean;
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Minimum number of successful samples required.
|
|
95
|
+
* Only relevant when allowPartialFailure is true.
|
|
96
|
+
* @default 1
|
|
97
|
+
*/
|
|
98
|
+
minSuccessfulSamples?: number;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// ============================================================================
|
|
102
|
+
// Response Types
|
|
103
|
+
// ============================================================================
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* A single response from the ensemble.
|
|
107
|
+
*/
|
|
108
|
+
export interface EnsembleResponse {
|
|
109
|
+
/**
|
|
110
|
+
* The generated text.
|
|
111
|
+
*/
|
|
112
|
+
text: string;
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Reasoning content (for thinking models).
|
|
116
|
+
*/
|
|
117
|
+
reasoning?: string;
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Tool calls made during generation.
|
|
121
|
+
*/
|
|
122
|
+
toolCalls?: ToolCall[];
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* Results of tool executions.
|
|
126
|
+
*/
|
|
127
|
+
toolResults?: ToolResult[];
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Token usage for this response.
|
|
131
|
+
*/
|
|
132
|
+
usage?: LanguageModelUsage;
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* Score assigned to this response (if scoring was applied).
|
|
136
|
+
*/
|
|
137
|
+
score?: number;
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Index of this sample in the ensemble.
|
|
141
|
+
*/
|
|
142
|
+
sampleIndex: number;
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Temperature used for this sample.
|
|
146
|
+
*/
|
|
147
|
+
temperature: number;
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Reason the generation finished.
|
|
151
|
+
*/
|
|
152
|
+
finishReason: string;
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Whether this sample completed successfully.
|
|
156
|
+
*/
|
|
157
|
+
success: boolean;
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* Error message if the sample failed.
|
|
161
|
+
*/
|
|
162
|
+
error?: string;
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Time taken to generate this response (ms).
|
|
166
|
+
*/
|
|
167
|
+
durationMs?: number;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* Metadata about the ensemble execution.
|
|
172
|
+
*/
|
|
173
|
+
export interface EnsembleMetadata {
|
|
174
|
+
/**
|
|
175
|
+
* Number of samples requested.
|
|
176
|
+
*/
|
|
177
|
+
nRequested: number;
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Number of samples that completed successfully.
|
|
181
|
+
*/
|
|
182
|
+
nCompleted: number;
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Number of samples that failed.
|
|
186
|
+
*/
|
|
187
|
+
nFailed: number;
|
|
188
|
+
|
|
189
|
+
/**
|
|
190
|
+
* Selection strategy used.
|
|
191
|
+
*/
|
|
192
|
+
selectionStrategy: SelectionStrategy;
|
|
193
|
+
|
|
194
|
+
/**
|
|
195
|
+
* Index of the winning sample.
|
|
196
|
+
*/
|
|
197
|
+
winningIndex: number;
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* Scores of all samples (if scoring was applied).
|
|
201
|
+
*/
|
|
202
|
+
scores?: number[];
|
|
203
|
+
|
|
204
|
+
/**
|
|
205
|
+
* Total time for ensemble execution (ms).
|
|
206
|
+
*/
|
|
207
|
+
durationMs: number;
|
|
208
|
+
|
|
209
|
+
/**
|
|
210
|
+
* Model ID used.
|
|
211
|
+
*/
|
|
212
|
+
modelId: string;
|
|
213
|
+
|
|
214
|
+
/**
|
|
215
|
+
* Aggregated token usage across all samples.
|
|
216
|
+
*/
|
|
217
|
+
totalUsage: LanguageModelUsage;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
/**
|
|
221
|
+
* Result of an ensemble generation.
|
|
222
|
+
*/
|
|
223
|
+
export interface EnsembleResult {
|
|
224
|
+
/**
|
|
225
|
+
* The selected best response text.
|
|
226
|
+
*/
|
|
227
|
+
text: string;
|
|
228
|
+
|
|
229
|
+
/**
|
|
230
|
+
* Reasoning content from the best response.
|
|
231
|
+
*/
|
|
232
|
+
reasoning?: string;
|
|
233
|
+
|
|
234
|
+
/**
|
|
235
|
+
* Tool calls from the best response.
|
|
236
|
+
*/
|
|
237
|
+
toolCalls?: ToolCall[];
|
|
238
|
+
|
|
239
|
+
/**
|
|
240
|
+
* Tool results from the best response.
|
|
241
|
+
*/
|
|
242
|
+
toolResults?: ToolResult[];
|
|
243
|
+
|
|
244
|
+
/**
|
|
245
|
+
* Token usage from the best response.
|
|
246
|
+
*/
|
|
247
|
+
usage: LanguageModelUsage;
|
|
248
|
+
|
|
249
|
+
/**
|
|
250
|
+
* All generated responses (populated for 'all' strategy).
|
|
251
|
+
*/
|
|
252
|
+
alternatives?: EnsembleResponse[];
|
|
253
|
+
|
|
254
|
+
/**
|
|
255
|
+
* Metadata about the ensemble execution.
|
|
256
|
+
*/
|
|
257
|
+
metadata: EnsembleMetadata;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
// ============================================================================
|
|
261
|
+
// Utility Types
|
|
262
|
+
// ============================================================================
|
|
263
|
+
|
|
264
|
+
/**
|
|
265
|
+
* Internal state for tracking ensemble progress.
|
|
266
|
+
*/
|
|
267
|
+
export interface EnsembleState {
|
|
268
|
+
responses: EnsembleResponse[];
|
|
269
|
+
startTime: number;
|
|
270
|
+
completed: boolean;
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
/**
|
|
274
|
+
* Options for scoring a response.
|
|
275
|
+
*/
|
|
276
|
+
export interface ScoringOptions {
|
|
277
|
+
heuristic: ScoringHeuristic;
|
|
278
|
+
customScorer?: (response: EnsembleResponse) => number;
|
|
279
|
+
}
|