kimi-vercel-ai-sdk-provider 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,433 @@
1
+ /**
2
+ * Multi-sampler implementation for ensemble generation.
3
+ * @module
4
+ */
5
+
6
+ import type {
7
+ EnsembleConfig,
8
+ EnsembleMetadata,
9
+ EnsembleResponse,
10
+ EnsembleResult,
11
+ LanguageModelUsage,
12
+ ScoringHeuristic,
13
+ SelectionStrategy
14
+ } from './types';
15
+
16
+ // ============================================================================
17
+ // Types
18
+ // ============================================================================
19
+
20
+ /**
21
+ * A function that generates a single response.
22
+ */
23
+ export type GenerateFunction = (options: { temperature: number; sampleIndex: number }) => Promise<{
24
+ text: string;
25
+ reasoning?: string;
26
+ toolCalls?: unknown[];
27
+ toolResults?: unknown[];
28
+ usage?: LanguageModelUsage;
29
+ finishReason: string;
30
+ }>;
31
+
32
+ /**
33
+ * Options for creating a multi-sampler.
34
+ */
35
+ export interface MultiSamplerOptions {
36
+ /**
37
+ * The model ID being used.
38
+ */
39
+ modelId: string;
40
+
41
+ /**
42
+ * Base temperature for generation.
43
+ */
44
+ baseTemperature?: number;
45
+ }
46
+
47
+ // ============================================================================
48
+ // MultiSampler Class
49
+ // ============================================================================
50
+
51
+ /**
52
+ * Multi-sampler for generating multiple responses and selecting the best one.
53
+ *
54
+ * @example
55
+ * ```ts
56
+ * const sampler = new MultiSampler({ modelId: 'kimi-k2.5' });
57
+ * const result = await sampler.generate(generateFn, {
58
+ * n: 3,
59
+ * selectionStrategy: 'best',
60
+ * scoringHeuristic: 'code',
61
+ * });
62
+ * ```
63
+ */
64
+ export class MultiSampler {
65
+ private modelId: string;
66
+ private baseTemperature: number;
67
+
68
+ constructor(options: MultiSamplerOptions) {
69
+ this.modelId = options.modelId;
70
+ this.baseTemperature = options.baseTemperature ?? 0.7;
71
+ }
72
+
73
+ /**
74
+ * Generate multiple samples and select the best one.
75
+ *
76
+ * @param generateFn - Function to generate a single response
77
+ * @param config - Ensemble configuration
78
+ * @returns The ensemble result
79
+ */
80
+ async generate(generateFn: GenerateFunction, config: EnsembleConfig): Promise<EnsembleResult> {
81
+ const startTime = Date.now();
82
+ const {
83
+ n,
84
+ selectionStrategy = 'best',
85
+ temperatureVariance = 0.1,
86
+ scoringHeuristic = 'confidence',
87
+ customScorer,
88
+ timeoutMs = 60000,
89
+ allowPartialFailure = true,
90
+ minSuccessfulSamples = 1
91
+ } = config;
92
+
93
+ // Validate configuration
94
+ if (n < 1 || n > 10) {
95
+ throw new Error('Ensemble n must be between 1 and 10');
96
+ }
97
+
98
+ // Generate samples in parallel
99
+ const promises = Array.from({ length: n }, async (_, i) => {
100
+ const temperature = Math.min(this.baseTemperature + i * temperatureVariance, 2.0);
101
+ const sampleStart = Date.now();
102
+
103
+ try {
104
+ const result = await generateFn({ temperature, sampleIndex: i });
105
+ return {
106
+ text: result.text,
107
+ reasoning: result.reasoning,
108
+ toolCalls: result.toolCalls,
109
+ toolResults: result.toolResults,
110
+ usage: result.usage,
111
+ sampleIndex: i,
112
+ temperature,
113
+ finishReason: result.finishReason,
114
+ success: true,
115
+ durationMs: Date.now() - sampleStart
116
+ } as EnsembleResponse;
117
+ } catch (error) {
118
+ return {
119
+ text: '',
120
+ sampleIndex: i,
121
+ temperature,
122
+ finishReason: 'error',
123
+ success: false,
124
+ error: error instanceof Error ? error.message : 'Unknown error',
125
+ durationMs: Date.now() - sampleStart
126
+ } as EnsembleResponse;
127
+ }
128
+ });
129
+
130
+ // Wait for all samples with timeout
131
+ let responses: EnsembleResponse[];
132
+ try {
133
+ responses = await Promise.race([
134
+ Promise.all(promises),
135
+ new Promise<never>((_, reject) =>
136
+ setTimeout(() => reject(new Error('Ensemble generation timed out')), timeoutMs)
137
+ )
138
+ ]);
139
+ } catch (_error) {
140
+ // On timeout, wait a bit more to collect partial results
141
+ const partialResponses = await Promise.all(
142
+ promises.map((p) =>
143
+ p.catch(
144
+ () =>
145
+ ({
146
+ text: '',
147
+ sampleIndex: -1,
148
+ temperature: 0,
149
+ finishReason: 'timeout',
150
+ success: false,
151
+ error: 'Timed out'
152
+ }) as EnsembleResponse
153
+ )
154
+ )
155
+ );
156
+ responses = partialResponses.filter((r) => r.sampleIndex >= 0);
157
+ }
158
+
159
+ // Filter successful responses
160
+ const successfulResponses = responses.filter((r) => r.success);
161
+
162
+ if (successfulResponses.length < minSuccessfulSamples && !allowPartialFailure) {
163
+ throw new Error(
164
+ `Only ${successfulResponses.length} samples succeeded, minimum required is ${minSuccessfulSamples}`
165
+ );
166
+ }
167
+
168
+ if (successfulResponses.length === 0) {
169
+ throw new Error('All ensemble samples failed');
170
+ }
171
+
172
+ // Apply selection strategy
173
+ const result = this.selectBest(successfulResponses, responses, {
174
+ selectionStrategy,
175
+ scoringHeuristic,
176
+ customScorer,
177
+ modelId: this.modelId,
178
+ startTime
179
+ });
180
+
181
+ return result;
182
+ }
183
+
184
+ /**
185
+ * Select the best response based on the strategy.
186
+ */
187
+ private selectBest(
188
+ successfulResponses: EnsembleResponse[],
189
+ allResponses: EnsembleResponse[],
190
+ options: {
191
+ selectionStrategy: SelectionStrategy;
192
+ scoringHeuristic: ScoringHeuristic;
193
+ customScorer?: (response: EnsembleResponse) => number;
194
+ modelId: string;
195
+ startTime: number;
196
+ }
197
+ ): EnsembleResult {
198
+ const { selectionStrategy, scoringHeuristic, customScorer, modelId, startTime } = options;
199
+
200
+ // Score all successful responses
201
+ const scored = successfulResponses.map((r) => {
202
+ return {
203
+ ...r,
204
+ score: this.calculateScore(r, scoringHeuristic, customScorer)
205
+ };
206
+ });
207
+
208
+ let winner: EnsembleResponse;
209
+ let alternatives: EnsembleResponse[] | undefined;
210
+
211
+ switch (selectionStrategy) {
212
+ case 'first':
213
+ winner = scored[0];
214
+ break;
215
+
216
+ case 'vote':
217
+ winner = this.majorityVote(scored);
218
+ break;
219
+
220
+ case 'best':
221
+ scored.sort((a, b) => (b.score ?? 0) - (a.score ?? 0));
222
+ winner = scored[0];
223
+ break;
224
+
225
+ case 'all':
226
+ winner = scored[0];
227
+ alternatives = scored;
228
+ break;
229
+
230
+ default:
231
+ throw new Error(`Unknown selection strategy: ${selectionStrategy}`);
232
+ }
233
+
234
+ const metadata: EnsembleMetadata = {
235
+ nRequested: allResponses.length,
236
+ nCompleted: successfulResponses.length,
237
+ nFailed: allResponses.filter((r) => !r.success).length,
238
+ selectionStrategy,
239
+ winningIndex: winner.sampleIndex,
240
+ scores: scored.map((r) => r.score ?? 0),
241
+ durationMs: Date.now() - startTime,
242
+ modelId,
243
+ totalUsage: this.aggregateUsage(successfulResponses)
244
+ };
245
+
246
+ return {
247
+ text: winner.text,
248
+ reasoning: winner.reasoning,
249
+ toolCalls: winner.toolCalls as EnsembleResult['toolCalls'],
250
+ toolResults: winner.toolResults as EnsembleResult['toolResults'],
251
+ usage: winner.usage ?? { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
252
+ alternatives,
253
+ metadata
254
+ };
255
+ }
256
+
257
+ /**
258
+ * Calculate score for a response based on the heuristic.
259
+ */
260
+ private calculateScore(
261
+ response: EnsembleResponse,
262
+ heuristic: ScoringHeuristic,
263
+ customScorer?: (response: EnsembleResponse) => number
264
+ ): number {
265
+ switch (heuristic) {
266
+ case 'length':
267
+ // Prefer concise answers (inverse length, normalized)
268
+ return 1000 / (response.text.length + 1);
269
+
270
+ case 'confidence':
271
+ // Higher completion tokens often indicates more complete reasoning
272
+ return response.usage?.completionTokens ?? 0;
273
+
274
+ case 'code':
275
+ return this.scoreCodeQuality(response.text);
276
+
277
+ case 'custom':
278
+ if (!customScorer) {
279
+ throw new Error('Custom scorer function required for custom heuristic');
280
+ }
281
+ return customScorer(response);
282
+
283
+ default:
284
+ return 0;
285
+ }
286
+ }
287
+
288
+ /**
289
+ * Score code quality based on heuristics.
290
+ */
291
+ private scoreCodeQuality(text: string): number {
292
+ let score = 100;
293
+
294
+ // Deduct for common error patterns
295
+ const errorPatterns = [
296
+ { pattern: /SyntaxError/gi, penalty: 25 },
297
+ { pattern: /ReferenceError/gi, penalty: 20 },
298
+ { pattern: /TypeError/gi, penalty: 20 },
299
+ { pattern: /undefined is not/gi, penalty: 15 },
300
+ { pattern: /cannot read property/gi, penalty: 15 },
301
+ { pattern: /is not defined/gi, penalty: 15 },
302
+ { pattern: /unexpected token/gi, penalty: 20 },
303
+ { pattern: /null is not/gi, penalty: 15 }
304
+ ];
305
+
306
+ for (const { pattern, penalty } of errorPatterns) {
307
+ const matches = text.match(pattern);
308
+ if (matches) {
309
+ score -= penalty * matches.length;
310
+ }
311
+ }
312
+
313
+ // Bonus for proper code blocks
314
+ if (text.includes('```')) {
315
+ score += 10;
316
+ }
317
+
318
+ // Bonus for comments/documentation
319
+ if (/\/\/.*|\/\*[\s\S]*?\*\/|#.*/.test(text)) {
320
+ score += 5;
321
+ }
322
+
323
+ // Bonus for test mentions
324
+ if (/\b(test|spec|assert|expect|describe|it)\b/i.test(text)) {
325
+ score += 5;
326
+ }
327
+
328
+ // Bonus for type annotations (TypeScript)
329
+ if (/:\s*(string|number|boolean|void|any|unknown|never)\b/.test(text)) {
330
+ score += 5;
331
+ }
332
+
333
+ // Penalty for TODO/FIXME left in code
334
+ if (/\b(TODO|FIXME|XXX|HACK)\b/i.test(text)) {
335
+ score -= 5;
336
+ }
337
+
338
+ return Math.max(0, score);
339
+ }
340
+
341
+ /**
342
+ * Select the most common response (majority voting).
343
+ */
344
+ private majorityVote(responses: EnsembleResponse[]): EnsembleResponse {
345
+ // Simple text similarity voting based on normalized text
346
+ const normalized = responses.map((r) => {
347
+ return {
348
+ response: r,
349
+ key: r.text.toLowerCase().replace(/\s+/g, ' ').trim().slice(0, 500)
350
+ };
351
+ });
352
+
353
+ const votes = new Map<string, { count: number; response: EnsembleResponse }>();
354
+
355
+ for (const { response, key } of normalized) {
356
+ const existing = votes.get(key);
357
+ if (existing) {
358
+ existing.count++;
359
+ } else {
360
+ votes.set(key, { count: 1, response });
361
+ }
362
+ }
363
+
364
+ // Find the response with the most votes
365
+ let maxVotes = 0;
366
+ let winner = responses[0];
367
+
368
+ for (const { count, response } of votes.values()) {
369
+ if (count > maxVotes) {
370
+ maxVotes = count;
371
+ winner = response;
372
+ }
373
+ }
374
+
375
+ return winner;
376
+ }
377
+
378
+ /**
379
+ * Aggregate usage across all responses.
380
+ */
381
+ private aggregateUsage(responses: EnsembleResponse[]): LanguageModelUsage {
382
+ return responses.reduce(
383
+ (acc, r) => {
384
+ return {
385
+ promptTokens: acc.promptTokens + (r.usage?.promptTokens ?? 0),
386
+ completionTokens: acc.completionTokens + (r.usage?.completionTokens ?? 0),
387
+ totalTokens: acc.totalTokens + (r.usage?.totalTokens ?? 0)
388
+ };
389
+ },
390
+ { promptTokens: 0, completionTokens: 0, totalTokens: 0 }
391
+ );
392
+ }
393
+ }
394
+
395
+ // ============================================================================
396
+ // Utility Functions
397
+ // ============================================================================
398
+
399
+ /**
400
+ * Create a simple ensemble result from a single response.
401
+ * Useful for when ensemble is disabled but consistent return types are needed.
402
+ */
403
+ export function createSingletonEnsembleResult(
404
+ response: {
405
+ text: string;
406
+ reasoning?: string;
407
+ toolCalls?: unknown[];
408
+ toolResults?: unknown[];
409
+ usage?: LanguageModelUsage;
410
+ finishReason: string;
411
+ },
412
+ modelId: string,
413
+ durationMs: number
414
+ ): EnsembleResult {
415
+ return {
416
+ text: response.text,
417
+ reasoning: response.reasoning,
418
+ toolCalls: response.toolCalls as EnsembleResult['toolCalls'],
419
+ toolResults: response.toolResults as EnsembleResult['toolResults'],
420
+ usage: response.usage ?? { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
421
+ metadata: {
422
+ nRequested: 1,
423
+ nCompleted: 1,
424
+ nFailed: 0,
425
+ selectionStrategy: 'first',
426
+ winningIndex: 0,
427
+ scores: [100],
428
+ durationMs,
429
+ modelId,
430
+ totalUsage: response.usage ?? { promptTokens: 0, completionTokens: 0, totalTokens: 0 }
431
+ }
432
+ };
433
+ }
@@ -0,0 +1,279 @@
1
+ /**
2
+ * Types for ensemble/multi-sampling functionality.
3
+ * @module
4
+ */
5
+
6
+ import type { LanguageModelV3ToolCall, LanguageModelV3ToolResult } from '@ai-sdk/provider';
7
+
8
+ /**
9
+ * Simple usage type compatible with common AI SDK patterns.
10
+ * This is independent from the provider-specific V3Usage type.
11
+ */
12
+ export interface LanguageModelUsage {
13
+ promptTokens: number;
14
+ completionTokens: number;
15
+ totalTokens: number;
16
+ }
17
+
18
+ /**
19
+ * Re-export types for convenience.
20
+ */
21
+ export type ToolCall = LanguageModelV3ToolCall;
22
+ export type ToolResult = LanguageModelV3ToolResult;
23
+
24
+ // ============================================================================
25
+ // Configuration Types
26
+ // ============================================================================
27
+
28
+ /**
29
+ * Selection strategy for choosing the best response from multiple samples.
30
+ */
31
+ export type SelectionStrategy = 'first' | 'vote' | 'best' | 'all';
32
+
33
+ /**
34
+ * Scoring heuristic for the 'best' selection strategy.
35
+ */
36
+ export type ScoringHeuristic = 'length' | 'confidence' | 'code' | 'custom';
37
+
38
+ /**
39
+ * Configuration for ensemble/multi-sampling.
40
+ */
41
+ export interface EnsembleConfig {
42
+ /**
43
+ * Number of parallel samples to generate.
44
+ * @default 3
45
+ */
46
+ n: number;
47
+
48
+ /**
49
+ * Strategy for selecting the best result.
50
+ * - 'first': Return the first successful response
51
+ * - 'vote': Return the most common answer (majority voting)
52
+ * - 'best': Use heuristic scoring to pick the best
53
+ * - 'all': Return all responses (for manual selection)
54
+ * @default 'best'
55
+ */
56
+ selectionStrategy?: SelectionStrategy;
57
+
58
+ /**
59
+ * Temperature variation for diversity.
60
+ * Each sample gets temperature = baseTemp + (i * variance)
61
+ * @default 0.1
62
+ */
63
+ temperatureVariance?: number;
64
+
65
+ /**
66
+ * For 'best' strategy, the scoring heuristic.
67
+ * - 'length': Prefer shorter responses
68
+ * - 'confidence': Prefer responses with higher token count
69
+ * - 'code': Prefer responses with fewer error patterns
70
+ * - 'custom': Use custom scorer function
71
+ * @default 'confidence'
72
+ */
73
+ scoringHeuristic?: ScoringHeuristic;
74
+
75
+ /**
76
+ * Custom scoring function for 'custom' heuristic.
77
+ * Higher scores are better.
78
+ */
79
+ customScorer?: (response: EnsembleResponse) => number;
80
+
81
+ /**
82
+ * Maximum time to wait for all samples (ms).
83
+ * @default 60000
84
+ */
85
+ timeoutMs?: number;
86
+
87
+ /**
88
+ * Whether to continue if some samples fail.
89
+ * @default true
90
+ */
91
+ allowPartialFailure?: boolean;
92
+
93
+ /**
94
+ * Minimum number of successful samples required.
95
+ * Only relevant when allowPartialFailure is true.
96
+ * @default 1
97
+ */
98
+ minSuccessfulSamples?: number;
99
+ }
100
+
101
+ // ============================================================================
102
+ // Response Types
103
+ // ============================================================================
104
+
105
+ /**
106
+ * A single response from the ensemble.
107
+ */
108
+ export interface EnsembleResponse {
109
+ /**
110
+ * The generated text.
111
+ */
112
+ text: string;
113
+
114
+ /**
115
+ * Reasoning content (for thinking models).
116
+ */
117
+ reasoning?: string;
118
+
119
+ /**
120
+ * Tool calls made during generation.
121
+ */
122
+ toolCalls?: ToolCall[];
123
+
124
+ /**
125
+ * Results of tool executions.
126
+ */
127
+ toolResults?: ToolResult[];
128
+
129
+ /**
130
+ * Token usage for this response.
131
+ */
132
+ usage?: LanguageModelUsage;
133
+
134
+ /**
135
+ * Score assigned to this response (if scoring was applied).
136
+ */
137
+ score?: number;
138
+
139
+ /**
140
+ * Index of this sample in the ensemble.
141
+ */
142
+ sampleIndex: number;
143
+
144
+ /**
145
+ * Temperature used for this sample.
146
+ */
147
+ temperature: number;
148
+
149
+ /**
150
+ * Reason the generation finished.
151
+ */
152
+ finishReason: string;
153
+
154
+ /**
155
+ * Whether this sample completed successfully.
156
+ */
157
+ success: boolean;
158
+
159
+ /**
160
+ * Error message if the sample failed.
161
+ */
162
+ error?: string;
163
+
164
+ /**
165
+ * Time taken to generate this response (ms).
166
+ */
167
+ durationMs?: number;
168
+ }
169
+
170
+ /**
171
+ * Metadata about the ensemble execution.
172
+ */
173
+ export interface EnsembleMetadata {
174
+ /**
175
+ * Number of samples requested.
176
+ */
177
+ nRequested: number;
178
+
179
+ /**
180
+ * Number of samples that completed successfully.
181
+ */
182
+ nCompleted: number;
183
+
184
+ /**
185
+ * Number of samples that failed.
186
+ */
187
+ nFailed: number;
188
+
189
+ /**
190
+ * Selection strategy used.
191
+ */
192
+ selectionStrategy: SelectionStrategy;
193
+
194
+ /**
195
+ * Index of the winning sample.
196
+ */
197
+ winningIndex: number;
198
+
199
+ /**
200
+ * Scores of all samples (if scoring was applied).
201
+ */
202
+ scores?: number[];
203
+
204
+ /**
205
+ * Total time for ensemble execution (ms).
206
+ */
207
+ durationMs: number;
208
+
209
+ /**
210
+ * Model ID used.
211
+ */
212
+ modelId: string;
213
+
214
+ /**
215
+ * Aggregated token usage across all samples.
216
+ */
217
+ totalUsage: LanguageModelUsage;
218
+ }
219
+
220
+ /**
221
+ * Result of an ensemble generation.
222
+ */
223
+ export interface EnsembleResult {
224
+ /**
225
+ * The selected best response text.
226
+ */
227
+ text: string;
228
+
229
+ /**
230
+ * Reasoning content from the best response.
231
+ */
232
+ reasoning?: string;
233
+
234
+ /**
235
+ * Tool calls from the best response.
236
+ */
237
+ toolCalls?: ToolCall[];
238
+
239
+ /**
240
+ * Tool results from the best response.
241
+ */
242
+ toolResults?: ToolResult[];
243
+
244
+ /**
245
+ * Token usage from the best response.
246
+ */
247
+ usage: LanguageModelUsage;
248
+
249
+ /**
250
+ * All generated responses (populated for 'all' strategy).
251
+ */
252
+ alternatives?: EnsembleResponse[];
253
+
254
+ /**
255
+ * Metadata about the ensemble execution.
256
+ */
257
+ metadata: EnsembleMetadata;
258
+ }
259
+
260
+ // ============================================================================
261
+ // Utility Types
262
+ // ============================================================================
263
+
264
+ /**
265
+ * Internal state for tracking ensemble progress.
266
+ */
267
+ export interface EnsembleState {
268
+ responses: EnsembleResponse[];
269
+ startTime: number;
270
+ completed: boolean;
271
+ }
272
+
273
+ /**
274
+ * Options for scoring a response.
275
+ */
276
+ export interface ScoringOptions {
277
+ heuristic: ScoringHeuristic;
278
+ customScorer?: (response: EnsembleResponse) => number;
279
+ }