kimi-vercel-ai-sdk-provider 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -34,6 +34,11 @@ This is a native implementation with full support for Kimi-specific features, no
34
34
  - [Reasoning/Thinking Models](#reasoningthinking-models)
35
35
  - [Video Input](#video-input-k25-models)
36
36
  - [Model Capabilities](#model-capabilities)
37
+ - [Advanced Features](#advanced-features)
38
+ - [Temperature Locking](#temperature-locking-for-thinking-models)
39
+ - [File Content Caching](#file-content-caching)
40
+ - [Schema Sanitization](#schema-sanitization)
41
+ - [Reasoning Preservation](#reasoning-preservation-utilities)
37
42
  - [Provider Options](#provider-options)
38
43
  - [Available Models](#available-models-1)
39
44
  - [Regional Endpoints](#regional-endpoints)
@@ -56,6 +61,9 @@ This is a native implementation with full support for Kimi-specific features, no
56
61
  - **Native File & PDF Support** - Automatic file upload and content extraction
57
62
  - **Tool Choice Polyfill** - Simulates `required` and `tool` choices via system messages
58
63
  - **Context Caching** - Reduce costs by up to 90% for repeated long prompts
64
+ - **Temperature Locking** - Automatic temperature enforcement for thinking models
65
+ - **File Content Caching** - LRU cache to avoid re-uploading identical files
66
+ - **Schema Sanitization** - Automatic cleanup of unsupported JSON Schema keywords
59
67
 
60
68
  ### Kimi Code (Premium Coding API)
61
69
  - High-speed output (up to 100 tokens/s)
@@ -603,6 +611,134 @@ const codeCaps = inferKimiCodeCapabilities('kimi-k2-thinking');
603
611
  // }
604
612
  ```
605
613
 
614
+ ## Advanced Features
615
+
616
+ ### Temperature Locking for Thinking Models
617
+
618
+ Thinking models like `kimi-k2.5-thinking` require a fixed temperature of `1.0` for optimal reasoning. The provider automatically enforces this:
619
+
620
+ ```ts
621
+ // Temperature is automatically set to 1.0 for thinking models
622
+ const result = await generateText({
623
+ model: kimi('kimi-k2.5-thinking'),
624
+ temperature: 0.7, // Will be ignored with a warning
625
+ prompt: 'Solve this complex problem...',
626
+ });
627
+
628
+ // Check the response for warnings
629
+ console.log(result.warnings);
630
+ // [{ type: 'compatibility', feature: 'temperature', details: 'Thinking models require temperature=1.0...' }]
631
+ ```
632
+
633
+ Thinking models also default to 32k max tokens to prevent reasoning truncation:
634
+
635
+ ```ts
636
+ // No need to set maxTokens - defaults to 32768 for thinking models
637
+ const result = await generateText({
638
+ model: kimi('kimi-k2.5-thinking'),
639
+ prompt: 'Explain quantum computing in detail...',
640
+ });
641
+ ```
642
+
643
+ ### File Content Caching
644
+
645
+ Avoid re-uploading the same files by enabling the LRU cache:
646
+
647
+ ```ts
648
+ import { processAttachments } from 'kimi-vercel-ai-sdk-provider';
649
+
650
+ // Enable caching (uses default global cache: 100 entries, 1 hour TTL)
651
+ const processed = await processAttachments({
652
+ attachments: message.experimental_attachments ?? [],
653
+ clientConfig: {
654
+ baseURL: 'https://api.moonshot.ai/v1',
655
+ headers: () => ({ Authorization: `Bearer ${process.env.MOONSHOT_API_KEY}` }),
656
+ },
657
+ cache: true, // Enable file caching
658
+ });
659
+
660
+ // Or provide a custom cache instance
661
+ import { FileCache } from 'kimi-vercel-ai-sdk-provider';
662
+
663
+ const customCache = new FileCache({
664
+ maxSize: 200, // Max 200 entries
665
+ ttlMs: 2 * 60 * 60 * 1000, // 2 hour TTL
666
+ });
667
+
668
+ const processed = await processAttachments({
669
+ attachments,
670
+ clientConfig,
671
+ cache: customCache,
672
+ });
673
+ ```
674
+
675
+ ### Schema Sanitization
676
+
677
+ Tool parameters are automatically sanitized to remove JSON Schema keywords not supported by Kimi:
678
+
679
+ ```ts
680
+ // This schema with advanced JSON Schema features...
681
+ const complexTool = {
682
+ name: 'search',
683
+ parameters: z.object({
684
+ query: z.string(),
685
+ filters: z.object({
686
+ $schema: 'http://json-schema.org/draft-07/schema#', // Removed
687
+ allOf: [{ minLength: 1 }], // Removed
688
+ anyOf: [{ type: 'string' }], // Removed
689
+ }),
690
+ }),
691
+ };
692
+
693
+ // ...is automatically sanitized before being sent to Kimi
694
+ // Only basic properties (type, properties, required, description) are kept
695
+ ```
696
+
697
+ ### Reasoning Preservation Utilities
698
+
699
+ Helpers for maintaining reasoning context in multi-turn conversations:
700
+
701
+ ```ts
702
+ import {
703
+ analyzeReasoningPreservation,
704
+ recommendThinkingModel
705
+ } from 'kimi-vercel-ai-sdk-provider';
706
+
707
+ // Analyze if reasoning is properly preserved in a conversation
708
+ const messages = [
709
+ { role: 'user', content: 'Solve this step by step: ...' },
710
+ {
711
+ role: 'assistant',
712
+ content: [
713
+ { type: 'reasoning', text: 'First, I need to...' },
714
+ { type: 'text', text: 'The answer is 42.' }
715
+ ]
716
+ },
717
+ { role: 'user', content: 'Explain step 2 more.' },
718
+ ];
719
+
720
+ const analysis = analyzeReasoningPreservation(messages);
721
+ // {
722
+ // hasReasoningContent: true,
723
+ // reasoningPreserved: true,
724
+ // turnCount: 3,
725
+ // reasoningTurnCount: 1,
726
+ // recommendations: []
727
+ // }
728
+
729
+ // Get a recommendation on whether to use a thinking model
730
+ const recommendation = recommendThinkingModel({
731
+ taskDescription: 'Complex mathematical proof',
732
+ requiresStepByStep: true,
733
+ complexity: 'high',
734
+ });
735
+ // {
736
+ // recommended: true,
737
+ // reason: 'Task requires step-by-step reasoning with high complexity',
738
+ // suggestedModel: 'kimi-k2.5-thinking'
739
+ // }
740
+ ```
741
+
606
742
  ## Provider Options
607
743
 
608
744
  ### Kimi Chat Options
@@ -788,6 +924,12 @@ import {
788
924
  import {
789
925
  KimiFileClient,
790
926
  processAttachments,
927
+ FileCache,
928
+ generateContentHash,
929
+ generateCacheKey,
930
+ getDefaultFileCache,
931
+ setDefaultFileCache,
932
+ clearDefaultFileCache,
791
933
  SUPPORTED_FILE_EXTENSIONS,
792
934
  SUPPORTED_MIME_TYPES,
793
935
  isImageMediaType,
@@ -803,8 +945,21 @@ import {
803
945
  FileUploadResult,
804
946
  Attachment,
805
947
  ProcessedAttachment,
806
- } from 'kimi-vercel-ai-sdk-provider
807
- ';
948
+ FileCacheOptions,
949
+ FileCacheEntry,
950
+ } from 'kimi-vercel-ai-sdk-provider';
951
+
952
+ // Utilities
953
+ import {
954
+ analyzeReasoningPreservation,
955
+ recommendThinkingModel,
956
+ // Constants
957
+ THINKING_MODEL_TEMPERATURE,
958
+ THINKING_MODEL_DEFAULT_MAX_TOKENS,
959
+ STANDARD_MODEL_DEFAULT_MAX_TOKENS,
960
+ // Types
961
+ ReasoningAnalysis,
962
+ } from 'kimi-vercel-ai-sdk-provider';
808
963
 
809
964
  // Built-in Tools
810
965
  import {
package/dist/index.d.mts CHANGED
@@ -103,6 +103,21 @@ interface KimiModelCapabilities {
103
103
  * Whether the model supports structured outputs.
104
104
  */
105
105
  structuredOutputs?: boolean;
106
+ /**
107
+ * Default temperature for the model.
108
+ * Thinking models require temperature=1.0 for optimal reasoning.
109
+ */
110
+ defaultTemperature?: number;
111
+ /**
112
+ * Whether temperature is locked (cannot be changed).
113
+ * Thinking models have this set to true.
114
+ */
115
+ temperatureLocked?: boolean;
116
+ /**
117
+ * Default max output tokens for the model.
118
+ * Thinking models need higher limits to avoid truncated reasoning.
119
+ */
120
+ defaultMaxOutputTokens?: number;
106
121
  }
107
122
  /**
108
123
  * Infer model capabilities from the model ID.
@@ -110,10 +125,25 @@ interface KimiModelCapabilities {
110
125
  * @param modelId - The model identifier
111
126
  * @returns Inferred capabilities based on model name patterns
112
127
  *
128
+ * @remarks
129
+ * This function automatically detects model capabilities and sets
130
+ * appropriate defaults:
131
+ * - Thinking models (`-thinking` suffix) get temperature=1.0 locked
132
+ * - Thinking models get 32k default max_tokens to avoid truncation
133
+ * - K2.5 models get video input support
134
+ *
113
135
  * @example
114
136
  * ```ts
115
137
  * const caps = inferModelCapabilities('kimi-k2.5-thinking');
116
- * // { thinking: true, alwaysThinking: true, videoInput: true, ... }
138
+ * // {
139
+ * // thinking: true,
140
+ * // alwaysThinking: true,
141
+ * // videoInput: true,
142
+ * // temperatureLocked: true,
143
+ * // defaultTemperature: 1.0,
144
+ * // defaultMaxOutputTokens: 32768,
145
+ * // ...
146
+ * // }
117
147
  * ```
118
148
  */
119
149
  declare function inferModelCapabilities(modelId: string): KimiModelCapabilities;
@@ -488,6 +518,9 @@ declare class KimiChatLanguageModel implements LanguageModelV3 {
488
518
  toolCalling?: boolean;
489
519
  jsonMode?: boolean;
490
520
  structuredOutputs?: boolean;
521
+ defaultTemperature?: number;
522
+ temperatureLocked?: boolean;
523
+ defaultMaxOutputTokens?: number;
491
524
  };
492
525
  get supportedUrls(): Record<string, RegExp[]> | PromiseLike<Record<string, RegExp[]>>;
493
526
  private getArgs;
@@ -495,6 +528,107 @@ declare class KimiChatLanguageModel implements LanguageModelV3 {
495
528
  doStream(options: LanguageModelV3CallOptions): Promise<LanguageModelV3StreamResult>;
496
529
  }
497
530
 
531
+ /**
532
+ * File content caching for efficient re-use of uploaded files.
533
+ * @module
534
+ */
535
+ /**
536
+ * Entry in the file cache.
537
+ */
538
+ interface FileCacheEntry {
539
+ /** The Kimi file ID */
540
+ fileId: string;
541
+ /** Extracted text content (for documents) */
542
+ content?: string;
543
+ /** Unix timestamp of creation */
544
+ createdAt: number;
545
+ /** File purpose */
546
+ purpose: 'file-extract' | 'image' | 'video';
547
+ }
548
+ /**
549
+ * Options for configuring the file cache.
550
+ */
551
+ interface FileCacheOptions {
552
+ /**
553
+ * Maximum number of entries in the cache.
554
+ * When exceeded, least recently used entries are evicted.
555
+ * @default 100
556
+ */
557
+ maxSize?: number;
558
+ /**
559
+ * Time-to-live for cache entries in milliseconds.
560
+ * Entries older than this are considered stale.
561
+ * @default 3600000 (1 hour)
562
+ */
563
+ ttlMs?: number;
564
+ }
565
+ /**
566
+ * A simple LRU (Least Recently Used) cache for file content.
567
+ *
568
+ * This cache helps avoid re-uploading the same files multiple times
569
+ * by storing the mapping between content hashes and Kimi file IDs.
570
+ *
571
+ * @example
572
+ * ```ts
573
+ * const cache = new FileCache({ maxSize: 50, ttlMs: 30 * 60 * 1000 });
574
+ *
575
+ * // Check if we have this file cached
576
+ * const cached = cache.get(contentHash);
577
+ * if (cached) {
578
+ * console.log('Using cached file:', cached.fileId);
579
+ * }
580
+ *
581
+ * // Store a new file
582
+ * cache.set(contentHash, {
583
+ * fileId: 'file_abc123',
584
+ * content: 'extracted text...',
585
+ * purpose: 'file-extract',
586
+ * createdAt: Date.now()
587
+ * });
588
+ * ```
589
+ */
590
+ declare class FileCache {
591
+ private readonly maxSize;
592
+ private readonly ttlMs;
593
+ private readonly cache;
594
+ constructor(options?: FileCacheOptions);
595
+ /**
596
+ * Get a cached entry by content hash.
597
+ * Returns undefined if not found or expired.
598
+ * Moves the entry to the end (most recently used).
599
+ */
600
+ get(contentHash: string): FileCacheEntry | undefined;
601
+ /**
602
+ * Set a cache entry.
603
+ * Evicts the least recently used entry if cache is full.
604
+ */
605
+ set(contentHash: string, entry: FileCacheEntry): void;
606
+ /**
607
+ * Check if an entry exists and is not expired.
608
+ */
609
+ has(contentHash: string): boolean;
610
+ /**
611
+ * Delete a specific entry.
612
+ */
613
+ delete(contentHash: string): boolean;
614
+ /**
615
+ * Clear all entries.
616
+ */
617
+ clear(): void;
618
+ /**
619
+ * Get the current cache size.
620
+ */
621
+ get size(): number;
622
+ /**
623
+ * Remove all expired entries.
624
+ */
625
+ prune(): number;
626
+ /**
627
+ * Check if an entry is expired.
628
+ */
629
+ private isExpired;
630
+ }
631
+
498
632
  /**
499
633
  * Kimi File API client for uploading and managing files.
500
634
  * @module
@@ -659,6 +793,13 @@ interface ProcessAttachmentsOptions {
659
793
  uploadImages?: boolean;
660
794
  /** Whether to delete files after extraction (cleanup) */
661
795
  cleanupAfterExtract?: boolean;
796
+ /**
797
+ * Enable caching of uploaded files.
798
+ * When true, uses the default global cache.
799
+ * When a FileCache instance, uses that cache.
800
+ * @default false
801
+ */
802
+ cache?: boolean | FileCache;
662
803
  }
663
804
  /**
664
805
  * Process experimental_attachments for Kimi.
package/dist/index.d.ts CHANGED
@@ -103,6 +103,21 @@ interface KimiModelCapabilities {
103
103
  * Whether the model supports structured outputs.
104
104
  */
105
105
  structuredOutputs?: boolean;
106
+ /**
107
+ * Default temperature for the model.
108
+ * Thinking models require temperature=1.0 for optimal reasoning.
109
+ */
110
+ defaultTemperature?: number;
111
+ /**
112
+ * Whether temperature is locked (cannot be changed).
113
+ * Thinking models have this set to true.
114
+ */
115
+ temperatureLocked?: boolean;
116
+ /**
117
+ * Default max output tokens for the model.
118
+ * Thinking models need higher limits to avoid truncated reasoning.
119
+ */
120
+ defaultMaxOutputTokens?: number;
106
121
  }
107
122
  /**
108
123
  * Infer model capabilities from the model ID.
@@ -110,10 +125,25 @@ interface KimiModelCapabilities {
110
125
  * @param modelId - The model identifier
111
126
  * @returns Inferred capabilities based on model name patterns
112
127
  *
128
+ * @remarks
129
+ * This function automatically detects model capabilities and sets
130
+ * appropriate defaults:
131
+ * - Thinking models (`-thinking` suffix) get temperature=1.0 locked
132
+ * - Thinking models get 32k default max_tokens to avoid truncation
133
+ * - K2.5 models get video input support
134
+ *
113
135
  * @example
114
136
  * ```ts
115
137
  * const caps = inferModelCapabilities('kimi-k2.5-thinking');
116
- * // { thinking: true, alwaysThinking: true, videoInput: true, ... }
138
+ * // {
139
+ * // thinking: true,
140
+ * // alwaysThinking: true,
141
+ * // videoInput: true,
142
+ * // temperatureLocked: true,
143
+ * // defaultTemperature: 1.0,
144
+ * // defaultMaxOutputTokens: 32768,
145
+ * // ...
146
+ * // }
117
147
  * ```
118
148
  */
119
149
  declare function inferModelCapabilities(modelId: string): KimiModelCapabilities;
@@ -488,6 +518,9 @@ declare class KimiChatLanguageModel implements LanguageModelV3 {
488
518
  toolCalling?: boolean;
489
519
  jsonMode?: boolean;
490
520
  structuredOutputs?: boolean;
521
+ defaultTemperature?: number;
522
+ temperatureLocked?: boolean;
523
+ defaultMaxOutputTokens?: number;
491
524
  };
492
525
  get supportedUrls(): Record<string, RegExp[]> | PromiseLike<Record<string, RegExp[]>>;
493
526
  private getArgs;
@@ -495,6 +528,107 @@ declare class KimiChatLanguageModel implements LanguageModelV3 {
495
528
  doStream(options: LanguageModelV3CallOptions): Promise<LanguageModelV3StreamResult>;
496
529
  }
497
530
 
531
+ /**
532
+ * File content caching for efficient re-use of uploaded files.
533
+ * @module
534
+ */
535
+ /**
536
+ * Entry in the file cache.
537
+ */
538
+ interface FileCacheEntry {
539
+ /** The Kimi file ID */
540
+ fileId: string;
541
+ /** Extracted text content (for documents) */
542
+ content?: string;
543
+ /** Unix timestamp of creation */
544
+ createdAt: number;
545
+ /** File purpose */
546
+ purpose: 'file-extract' | 'image' | 'video';
547
+ }
548
+ /**
549
+ * Options for configuring the file cache.
550
+ */
551
+ interface FileCacheOptions {
552
+ /**
553
+ * Maximum number of entries in the cache.
554
+ * When exceeded, least recently used entries are evicted.
555
+ * @default 100
556
+ */
557
+ maxSize?: number;
558
+ /**
559
+ * Time-to-live for cache entries in milliseconds.
560
+ * Entries older than this are considered stale.
561
+ * @default 3600000 (1 hour)
562
+ */
563
+ ttlMs?: number;
564
+ }
565
+ /**
566
+ * A simple LRU (Least Recently Used) cache for file content.
567
+ *
568
+ * This cache helps avoid re-uploading the same files multiple times
569
+ * by storing the mapping between content hashes and Kimi file IDs.
570
+ *
571
+ * @example
572
+ * ```ts
573
+ * const cache = new FileCache({ maxSize: 50, ttlMs: 30 * 60 * 1000 });
574
+ *
575
+ * // Check if we have this file cached
576
+ * const cached = cache.get(contentHash);
577
+ * if (cached) {
578
+ * console.log('Using cached file:', cached.fileId);
579
+ * }
580
+ *
581
+ * // Store a new file
582
+ * cache.set(contentHash, {
583
+ * fileId: 'file_abc123',
584
+ * content: 'extracted text...',
585
+ * purpose: 'file-extract',
586
+ * createdAt: Date.now()
587
+ * });
588
+ * ```
589
+ */
590
+ declare class FileCache {
591
+ private readonly maxSize;
592
+ private readonly ttlMs;
593
+ private readonly cache;
594
+ constructor(options?: FileCacheOptions);
595
+ /**
596
+ * Get a cached entry by content hash.
597
+ * Returns undefined if not found or expired.
598
+ * Moves the entry to the end (most recently used).
599
+ */
600
+ get(contentHash: string): FileCacheEntry | undefined;
601
+ /**
602
+ * Set a cache entry.
603
+ * Evicts the least recently used entry if cache is full.
604
+ */
605
+ set(contentHash: string, entry: FileCacheEntry): void;
606
+ /**
607
+ * Check if an entry exists and is not expired.
608
+ */
609
+ has(contentHash: string): boolean;
610
+ /**
611
+ * Delete a specific entry.
612
+ */
613
+ delete(contentHash: string): boolean;
614
+ /**
615
+ * Clear all entries.
616
+ */
617
+ clear(): void;
618
+ /**
619
+ * Get the current cache size.
620
+ */
621
+ get size(): number;
622
+ /**
623
+ * Remove all expired entries.
624
+ */
625
+ prune(): number;
626
+ /**
627
+ * Check if an entry is expired.
628
+ */
629
+ private isExpired;
630
+ }
631
+
498
632
  /**
499
633
  * Kimi File API client for uploading and managing files.
500
634
  * @module
@@ -659,6 +793,13 @@ interface ProcessAttachmentsOptions {
659
793
  uploadImages?: boolean;
660
794
  /** Whether to delete files after extraction (cleanup) */
661
795
  cleanupAfterExtract?: boolean;
796
+ /**
797
+ * Enable caching of uploaded files.
798
+ * When true, uses the default global cache.
799
+ * When a FileCache instance, uses that cache.
800
+ * @default false
801
+ */
802
+ cache?: boolean | FileCache;
662
803
  }
663
804
  /**
664
805
  * Process experimental_attachments for Kimi.