@oh-my-pi/pi-ai 13.3.0 → 13.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,11 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [13.3.1] - 2026-02-26
6
+ ### Added
7
+
8
+ - Added `topP`, `topK`, `minP`, `presencePenalty`, and `repetitionPenalty` options to `StreamOptions` for fine-grained control over model sampling behavior
9
+
5
10
  ## [13.3.0] - 2026-02-26
6
11
 
7
12
  ### Changed
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-ai",
4
- "version": "13.3.0",
4
+ "version": "13.3.2",
5
5
  "description": "Unified LLM API with automatic model discovery and provider configuration",
6
6
  "homepage": "https://github.com/can1357/oh-my-pi",
7
7
  "author": "Can Boluk",
@@ -44,7 +44,7 @@
44
44
  "@connectrpc/connect-node": "^2.1",
45
45
  "@google/genai": "^1.42",
46
46
  "@mistralai/mistralai": "^1.14",
47
- "@oh-my-pi/pi-utils": "13.3.0",
47
+ "@oh-my-pi/pi-utils": "13.3.2",
48
48
  "@sinclair/typebox": "^0.34",
49
49
  "@smithy/node-http-handler": "^4.4",
50
50
  "ajv": "^8.18",
@@ -126,7 +126,7 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream"> = (
126
126
  modelId: model.id,
127
127
  messages: convertMessages(context, model, cacheRetention),
128
128
  system: buildSystemPrompt(context.systemPrompt, model, cacheRetention),
129
- inferenceConfig: { maxTokens: options.maxTokens, temperature: options.temperature },
129
+ inferenceConfig: { maxTokens: options.maxTokens, temperature: options.temperature, topP: options.topP },
130
130
  toolConfig: convertToolConfig(context.tools, options.toolChoice),
131
131
  additionalModelRequestFields: buildAdditionalModelRequestFields(model, options),
132
132
  };
@@ -111,6 +111,11 @@ export function buildAnthropicHeaders(options: AnthropicHeaderOptions): Record<s
111
111
  }
112
112
 
113
113
  type AnthropicCacheControl = { type: "ephemeral"; ttl?: "1h" | "5m" };
114
+
115
+ type AnthropicSamplingParams = MessageCreateParamsStreaming & {
116
+ top_p?: number;
117
+ top_k?: number;
118
+ };
114
119
  function getCacheControl(
115
120
  baseUrl: string,
116
121
  cacheRetention?: CacheRetention,
@@ -875,7 +880,7 @@ function buildParams(
875
880
  options?: AnthropicOptions,
876
881
  ): MessageCreateParamsStreaming {
877
882
  const { cacheControl } = getCacheControl(model.baseUrl, options?.cacheRetention);
878
- const params: MessageCreateParamsStreaming = {
883
+ const params: AnthropicSamplingParams = {
879
884
  model: model.id,
880
885
  messages: convertAnthropicMessages(context.messages, model, isOAuthToken),
881
886
  max_tokens: options?.maxTokens || (model.maxTokens / 3) | 0,
@@ -911,6 +916,12 @@ function buildParams(
911
916
  if (options?.temperature !== undefined) {
912
917
  params.temperature = options.temperature;
913
918
  }
919
+ if (options?.topP !== undefined) {
920
+ params.top_p = options.topP;
921
+ }
922
+ if (options?.topK !== undefined) {
923
+ params.top_k = options.topK;
924
+ }
914
925
 
915
926
  if (context.tools) {
916
927
  params.tools = convertTools(context.tools, isOAuthToken);
@@ -71,6 +71,14 @@ export interface AzureOpenAIResponsesOptions extends StreamOptions {
71
71
  toolChoice?: ToolChoice;
72
72
  }
73
73
 
74
+ type AzureOpenAIResponsesSamplingParams = ResponseCreateParamsStreaming & {
75
+ top_p?: number;
76
+ top_k?: number;
77
+ min_p?: number;
78
+ presence_penalty?: number;
79
+ repetition_penalty?: number;
80
+ };
81
+
74
82
  /**
75
83
  * Generate function for Azure OpenAI Responses API
76
84
  */
@@ -448,7 +456,7 @@ function buildParams(
448
456
  ) {
449
457
  const messages = convertMessages(model, context, true);
450
458
 
451
- const params: ResponseCreateParamsStreaming = {
459
+ const params: AzureOpenAIResponsesSamplingParams = {
452
460
  model: deploymentName,
453
461
  input: messages,
454
462
  stream: true,
@@ -462,6 +470,21 @@ function buildParams(
462
470
  if (options?.temperature !== undefined) {
463
471
  params.temperature = options?.temperature;
464
472
  }
473
+ if (options?.topP !== undefined) {
474
+ params.top_p = options.topP;
475
+ }
476
+ if (options?.topK !== undefined) {
477
+ params.top_k = options.topK;
478
+ }
479
+ if (options?.minP !== undefined) {
480
+ params.min_p = options.minP;
481
+ }
482
+ if (options?.presencePenalty !== undefined) {
483
+ params.presence_penalty = options.presencePenalty;
484
+ }
485
+ if (options?.repetitionPenalty !== undefined) {
486
+ params.repetition_penalty = options.repetitionPenalty;
487
+ }
465
488
 
466
489
  if (context.tools) {
467
490
  params.tools = convertTools(context.tools);
@@ -281,6 +281,11 @@ export function streamGitLabDuo(
281
281
  apiKey: directAccess.token,
282
282
  isOAuth: true,
283
283
  temperature: options.temperature,
284
+ topP: options.topP,
285
+ topK: options.topK,
286
+ minP: options.minP,
287
+ presencePenalty: options.presencePenalty,
288
+ repetitionPenalty: options.repetitionPenalty,
284
289
  maxTokens: options.maxTokens ?? Math.min(model.maxTokens, 32000),
285
290
  signal: options.signal,
286
291
  cacheRetention: options.cacheRetention,
@@ -310,6 +315,11 @@ export function streamGitLabDuo(
310
315
  {
311
316
  apiKey: directAccess.token,
312
317
  temperature: options.temperature,
318
+ topP: options.topP,
319
+ topK: options.topK,
320
+ minP: options.minP,
321
+ presencePenalty: options.presencePenalty,
322
+ repetitionPenalty: options.repetitionPenalty,
313
323
  maxTokens: options.maxTokens ?? model.maxTokens,
314
324
  signal: options.signal,
315
325
  cacheRetention: options.cacheRetention,
@@ -334,6 +344,11 @@ export function streamGitLabDuo(
334
344
  {
335
345
  apiKey: directAccess.token,
336
346
  temperature: options.temperature,
347
+ topP: options.topP,
348
+ topK: options.topK,
349
+ minP: options.minP,
350
+ presencePenalty: options.presencePenalty,
351
+ repetitionPenalty: options.repetitionPenalty,
337
352
  maxTokens: options.maxTokens ?? model.maxTokens,
338
353
  signal: options.signal,
339
354
  cacheRetention: options.cacheRetention,
@@ -268,6 +268,11 @@ interface CloudCodeAssistRequest {
268
268
  generationConfig?: {
269
269
  maxOutputTokens?: number;
270
270
  temperature?: number;
271
+ topP?: number;
272
+ topK?: number;
273
+ minP?: number;
274
+ presencePenalty?: number;
275
+ repetitionPenalty?: number;
271
276
  thinkingConfig?: ThinkingConfig;
272
277
  };
273
278
  tools?: { functionDeclarations: Record<string, unknown>[] }[] | undefined;
@@ -829,6 +834,21 @@ export function buildRequest(
829
834
  if (options.maxTokens !== undefined) {
830
835
  generationConfig.maxOutputTokens = options.maxTokens;
831
836
  }
837
+ if (options.topP !== undefined) {
838
+ generationConfig.topP = options.topP;
839
+ }
840
+ if (options.topK !== undefined) {
841
+ generationConfig.topK = options.topK;
842
+ }
843
+ if (options.minP !== undefined) {
844
+ generationConfig.minP = options.minP;
845
+ }
846
+ if (options.presencePenalty !== undefined) {
847
+ generationConfig.presencePenalty = options.presencePenalty;
848
+ }
849
+ if (options.repetitionPenalty !== undefined) {
850
+ generationConfig.repetitionPenalty = options.repetitionPenalty;
851
+ }
832
852
 
833
853
  // Thinking config
834
854
  if (options.thinking?.enabled && model.reasoning) {
@@ -42,6 +42,14 @@ export interface GoogleVertexOptions extends StreamOptions {
42
42
  location?: string;
43
43
  }
44
44
 
45
+ interface GoogleVertexSamplingConfig extends GenerateContentConfig {
46
+ topP?: number;
47
+ topK?: number;
48
+ minP?: number;
49
+ presencePenalty?: number;
50
+ repetitionPenalty?: number;
51
+ }
52
+
45
53
  const API_VERSION = "v1";
46
54
 
47
55
  const THINKING_LEVEL_MAP: Record<GoogleThinkingLevel, ThinkingLevel> = {
@@ -338,13 +346,28 @@ function buildParams(
338
346
  ): GenerateContentParameters {
339
347
  const contents = convertMessages(model, context);
340
348
 
341
- const generationConfig: GenerateContentConfig = {};
349
+ const generationConfig: GoogleVertexSamplingConfig = {};
342
350
  if (options.temperature !== undefined) {
343
351
  generationConfig.temperature = options.temperature;
344
352
  }
345
353
  if (options.maxTokens !== undefined) {
346
354
  generationConfig.maxOutputTokens = options.maxTokens;
347
355
  }
356
+ if (options.topP !== undefined) {
357
+ generationConfig.topP = options.topP;
358
+ }
359
+ if (options.topK !== undefined) {
360
+ generationConfig.topK = options.topK;
361
+ }
362
+ if (options.minP !== undefined) {
363
+ generationConfig.minP = options.minP;
364
+ }
365
+ if (options.presencePenalty !== undefined) {
366
+ generationConfig.presencePenalty = options.presencePenalty;
367
+ }
368
+ if (options.repetitionPenalty !== undefined) {
369
+ generationConfig.repetitionPenalty = options.repetitionPenalty;
370
+ }
348
371
 
349
372
  const config: GenerateContentConfig = {
350
373
  ...(Object.keys(generationConfig).length > 0 && generationConfig),
@@ -42,6 +42,14 @@ export interface GoogleOptions extends StreamOptions {
42
42
  };
43
43
  }
44
44
 
45
+ interface GoogleSamplingConfig extends GenerateContentConfig {
46
+ topP?: number;
47
+ topK?: number;
48
+ minP?: number;
49
+ presencePenalty?: number;
50
+ repetitionPenalty?: number;
51
+ }
52
+
45
53
  // Counter for generating unique tool call IDs
46
54
  let toolCallCounter = 0;
47
55
 
@@ -304,13 +312,28 @@ function buildParams(
304
312
  ): GenerateContentParameters {
305
313
  const contents = convertMessages(model, context);
306
314
 
307
- const generationConfig: GenerateContentConfig = {};
315
+ const generationConfig: GoogleSamplingConfig = {};
308
316
  if (options.temperature !== undefined) {
309
317
  generationConfig.temperature = options.temperature;
310
318
  }
311
319
  if (options.maxTokens !== undefined) {
312
320
  generationConfig.maxOutputTokens = options.maxTokens;
313
321
  }
322
+ if (options.topP !== undefined) {
323
+ generationConfig.topP = options.topP;
324
+ }
325
+ if (options.topK !== undefined) {
326
+ generationConfig.topK = options.topK;
327
+ }
328
+ if (options.minP !== undefined) {
329
+ generationConfig.minP = options.minP;
330
+ }
331
+ if (options.presencePenalty !== undefined) {
332
+ generationConfig.presencePenalty = options.presencePenalty;
333
+ }
334
+ if (options.repetitionPenalty !== undefined) {
335
+ generationConfig.repetitionPenalty = options.repetitionPenalty;
336
+ }
314
337
 
315
338
  const config: GenerateContentConfig = {
316
339
  ...(Object.keys(generationConfig).length > 0 && generationConfig),
@@ -70,6 +70,11 @@ export function streamKimi(
70
70
  const innerStream = streamAnthropic(anthropicModel, context, {
71
71
  apiKey: options?.apiKey,
72
72
  temperature: options?.temperature,
73
+ topP: options?.topP,
74
+ topK: options?.topK,
75
+ minP: options?.minP,
76
+ presencePenalty: options?.presencePenalty,
77
+ repetitionPenalty: options?.repetitionPenalty,
73
78
  maxTokens: options?.maxTokens ?? Math.min(model.maxTokens, 32000),
74
79
  signal: options?.signal,
75
80
  headers: mergedHeaders,
@@ -87,6 +92,11 @@ export function streamKimi(
87
92
  const innerStream = streamOpenAICompletions(model, context, {
88
93
  apiKey: options?.apiKey,
89
94
  temperature: options?.temperature,
95
+ topP: options?.topP,
96
+ topK: options?.topK,
97
+ minP: options?.minP,
98
+ presencePenalty: options?.presencePenalty,
99
+ repetitionPenalty: options?.repetitionPenalty,
90
100
  maxTokens: options?.maxTokens ?? model.maxTokens,
91
101
  signal: options?.signal,
92
102
  headers: mergedHeaders,
@@ -30,6 +30,11 @@ export interface RequestBody {
30
30
  tools?: unknown;
31
31
  tool_choice?: unknown;
32
32
  temperature?: number;
33
+ top_p?: number;
34
+ top_k?: number;
35
+ min_p?: number;
36
+ presence_penalty?: number;
37
+ repetition_penalty?: number;
33
38
  reasoning?: Partial<ReasoningConfig>;
34
39
  text?: {
35
40
  verbosity?: "low" | "medium" | "high";
@@ -333,6 +333,21 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
333
333
  if (options?.temperature !== undefined) {
334
334
  params.temperature = options.temperature;
335
335
  }
336
+ if (options?.topP !== undefined) {
337
+ params.top_p = options.topP;
338
+ }
339
+ if (options?.topK !== undefined) {
340
+ params.top_k = options.topK;
341
+ }
342
+ if (options?.minP !== undefined) {
343
+ params.min_p = options.minP;
344
+ }
345
+ if (options?.presencePenalty !== undefined) {
346
+ params.presence_penalty = options.presencePenalty;
347
+ }
348
+ if (options?.repetitionPenalty !== undefined) {
349
+ params.repetition_penalty = options.repetitionPenalty;
350
+ }
336
351
 
337
352
  if (context.tools && context.tools.length > 0) {
338
353
  params.tools = convertTools(context.tools);
@@ -109,6 +109,12 @@ export interface OpenAICompletionsOptions extends StreamOptions {
109
109
  reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh";
110
110
  }
111
111
 
112
+ type OpenAICompletionsSamplingParams = OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming & {
113
+ top_k?: number;
114
+ min_p?: number;
115
+ repetition_penalty?: number;
116
+ };
117
+
112
118
  // LIMITATION: The think tag parser uses naive string matching for <think>/<thinking> tags.
113
119
  // If MiniMax models output these literal strings in code blocks, XML examples, or explanations,
114
120
  // they will be incorrectly consumed as thinking delimiters, truncating visible output.
@@ -530,7 +536,7 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
530
536
  const isKimi = model.id.includes("moonshotai/kimi");
531
537
  const effectiveMaxTokens = options?.maxTokens ?? (isKimi ? model.maxTokens : undefined);
532
538
 
533
- const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
539
+ const params: OpenAICompletionsSamplingParams = {
534
540
  model: model.id,
535
541
  messages,
536
542
  stream: true,
@@ -555,6 +561,21 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
555
561
  if (options?.temperature !== undefined) {
556
562
  params.temperature = options.temperature;
557
563
  }
564
+ if (options?.topP !== undefined) {
565
+ params.top_p = options.topP;
566
+ }
567
+ if (options?.topK !== undefined) {
568
+ params.top_k = options.topK;
569
+ }
570
+ if (options?.minP !== undefined) {
571
+ params.min_p = options.minP;
572
+ }
573
+ if (options?.presencePenalty !== undefined) {
574
+ params.presence_penalty = options.presencePenalty;
575
+ }
576
+ if (options?.repetitionPenalty !== undefined) {
577
+ params.repetition_penalty = options.repetitionPenalty;
578
+ }
558
579
 
559
580
  if (context.tools) {
560
581
  params.tools = convertTools(context.tools, compat);
@@ -65,6 +65,14 @@ export interface OpenAIResponsesOptions extends StreamOptions {
65
65
  strictResponsesPairing?: boolean;
66
66
  }
67
67
 
68
+ type OpenAIResponsesSamplingParams = ResponseCreateParamsStreaming & {
69
+ top_p?: number;
70
+ top_k?: number;
71
+ min_p?: number;
72
+ presence_penalty?: number;
73
+ repetition_penalty?: number;
74
+ };
75
+
68
76
  /**
69
77
  * Generate function for OpenAI Responses API
70
78
  */
@@ -407,7 +415,7 @@ function buildParams(model: Model<"openai-responses">, context: Context, options
407
415
 
408
416
  const cacheRetention = resolveCacheRetention(options?.cacheRetention);
409
417
  const promptCacheKey = cacheRetention === "none" ? undefined : options?.sessionId;
410
- const params: ResponseCreateParamsStreaming = {
418
+ const params: OpenAIResponsesSamplingParams = {
411
419
  model: model.id,
412
420
  input: messages,
413
421
  stream: true,
@@ -423,6 +431,21 @@ function buildParams(model: Model<"openai-responses">, context: Context, options
423
431
  if (options?.temperature !== undefined) {
424
432
  params.temperature = options?.temperature;
425
433
  }
434
+ if (options?.topP !== undefined) {
435
+ params.top_p = options.topP;
436
+ }
437
+ if (options?.topK !== undefined) {
438
+ params.top_k = options.topK;
439
+ }
440
+ if (options?.minP !== undefined) {
441
+ params.min_p = options.minP;
442
+ }
443
+ if (options?.presencePenalty !== undefined) {
444
+ params.presence_penalty = options.presencePenalty;
445
+ }
446
+ if (options?.repetitionPenalty !== undefined) {
447
+ params.repetition_penalty = options.repetitionPenalty;
448
+ }
426
449
 
427
450
  if (options?.serviceTier !== undefined) {
428
451
  params.service_tier = options.serviceTier;
@@ -67,6 +67,11 @@ export function streamSynthetic(
67
67
  const innerStream = streamAnthropic(anthropicModel, context, {
68
68
  apiKey: options?.apiKey,
69
69
  temperature: options?.temperature,
70
+ topP: options?.topP,
71
+ topK: options?.topK,
72
+ minP: options?.minP,
73
+ presencePenalty: options?.presencePenalty,
74
+ repetitionPenalty: options?.repetitionPenalty,
70
75
  maxTokens: options?.maxTokens ?? Math.min(model.maxTokens, 32000),
71
76
  signal: options?.signal,
72
77
  headers: mergedHeaders,
@@ -90,6 +95,11 @@ export function streamSynthetic(
90
95
  const innerStream = streamOpenAICompletions(syntheticModel, context, {
91
96
  apiKey: options?.apiKey,
92
97
  temperature: options?.temperature,
98
+ topP: options?.topP,
99
+ topK: options?.topK,
100
+ minP: options?.minP,
101
+ presencePenalty: options?.presencePenalty,
102
+ repetitionPenalty: options?.repetitionPenalty,
93
103
  maxTokens: options?.maxTokens ?? model.maxTokens,
94
104
  signal: options?.signal,
95
105
  headers: mergedHeaders,
package/src/stream.ts CHANGED
@@ -396,6 +396,11 @@ function mapOptionsForApi<TApi extends Api>(
396
396
  ): OptionsForApi<TApi> {
397
397
  const base = {
398
398
  temperature: options?.temperature,
399
+ topP: options?.topP,
400
+ topK: options?.topK,
401
+ minP: options?.minP,
402
+ presencePenalty: options?.presencePenalty,
403
+ repetitionPenalty: options?.repetitionPenalty,
399
404
  maxTokens: options?.maxTokens || Math.min(model.maxTokens, 32000),
400
405
  signal: options?.signal,
401
406
  apiKey: apiKey || options?.apiKey,
package/src/types.ts CHANGED
@@ -129,6 +129,11 @@ export interface ProviderSessionState {
129
129
 
130
130
  export interface StreamOptions {
131
131
  temperature?: number;
132
+ topP?: number;
133
+ topK?: number;
134
+ minP?: number;
135
+ presencePenalty?: number;
136
+ repetitionPenalty?: number;
132
137
  maxTokens?: number;
133
138
  signal?: AbortSignal;
134
139
  apiKey?: string;