llmist 15.13.0 → 15.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +317 -5
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +120 -1
- package/dist/index.d.ts +120 -1
- package/dist/index.js +317 -5
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -5432,12 +5432,13 @@ var init_anthropic = __esm({
|
|
|
5432
5432
|
);
|
|
5433
5433
|
}
|
|
5434
5434
|
buildApiRequest(options, descriptor, spec, messages) {
|
|
5435
|
+
const cachingEnabled = options.caching?.enabled !== false;
|
|
5435
5436
|
const systemMessages = messages.filter((message) => message.role === "system");
|
|
5436
5437
|
const system = systemMessages.length > 0 ? systemMessages.map((m, index) => ({
|
|
5437
5438
|
type: "text",
|
|
5438
5439
|
text: extractMessageText(m.content),
|
|
5439
|
-
// Add cache_control to the LAST system message block
|
|
5440
|
-
...index === systemMessages.length - 1 ? { cache_control: { type: "ephemeral" } } : {}
|
|
5440
|
+
// Add cache_control to the LAST system message block (only when caching is enabled)
|
|
5441
|
+
...cachingEnabled && index === systemMessages.length - 1 ? { cache_control: { type: "ephemeral" } } : {}
|
|
5441
5442
|
})) : void 0;
|
|
5442
5443
|
const nonSystemMessages = messages.filter(
|
|
5443
5444
|
(message) => message.role !== "system"
|
|
@@ -5450,7 +5451,7 @@ var init_anthropic = __esm({
|
|
|
5450
5451
|
role: message.role,
|
|
5451
5452
|
content: this.convertToAnthropicContent(
|
|
5452
5453
|
message.content,
|
|
5453
|
-
message.role === "user" && index === lastUserIndex
|
|
5454
|
+
cachingEnabled && message.role === "user" && index === lastUserIndex
|
|
5454
5455
|
)
|
|
5455
5456
|
}));
|
|
5456
5457
|
const defaultMaxTokens = spec?.maxOutputTokens ?? ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS;
|
|
@@ -5667,6 +5668,177 @@ var init_anthropic = __esm({
|
|
|
5667
5668
|
}
|
|
5668
5669
|
});
|
|
5669
5670
|
|
|
5671
|
+
// src/providers/gemini-cache-manager.ts
|
|
5672
|
+
var import_node_crypto3, GeminiCacheManager;
|
|
5673
|
+
var init_gemini_cache_manager = __esm({
|
|
5674
|
+
"src/providers/gemini-cache-manager.ts"() {
|
|
5675
|
+
"use strict";
|
|
5676
|
+
import_node_crypto3 = require("crypto");
|
|
5677
|
+
GeminiCacheManager = class {
|
|
5678
|
+
constructor(client) {
|
|
5679
|
+
this.client = client;
|
|
5680
|
+
}
|
|
5681
|
+
activeCache = null;
|
|
5682
|
+
/**
|
|
5683
|
+
* Get or create a cache for the given content.
|
|
5684
|
+
*
|
|
5685
|
+
* Returns the cache name if a cache was created/reused, or `null` if caching
|
|
5686
|
+
* was skipped (disabled, below threshold, or API error).
|
|
5687
|
+
*
|
|
5688
|
+
* @param model - Gemini model name (e.g., "gemini-2.5-flash")
|
|
5689
|
+
* @param allContents - All Gemini-formatted contents (system + conversation)
|
|
5690
|
+
* @param config - Caching configuration from the user
|
|
5691
|
+
* @param lastUserMessageIndex - Index of the last user message (content after this is not cached)
|
|
5692
|
+
* @returns Cache name string or null
|
|
5693
|
+
*/
|
|
5694
|
+
async getOrCreateCache(model, allContents, config, lastUserMessageIndex) {
|
|
5695
|
+
if (!config.enabled) return null;
|
|
5696
|
+
const scope = config.scope ?? "conversation";
|
|
5697
|
+
const ttl = config.ttl ?? "3600s";
|
|
5698
|
+
const minTokenThreshold = config.minTokenThreshold ?? 32768;
|
|
5699
|
+
const cacheableContents = this.selectCacheableContents(
|
|
5700
|
+
allContents,
|
|
5701
|
+
scope,
|
|
5702
|
+
lastUserMessageIndex
|
|
5703
|
+
);
|
|
5704
|
+
if (cacheableContents.length === 0) return null;
|
|
5705
|
+
const estimatedTokens = this.estimateTokenCount(cacheableContents);
|
|
5706
|
+
if (estimatedTokens < minTokenThreshold) return null;
|
|
5707
|
+
const contentHash = this.computeContentHash(cacheableContents, model);
|
|
5708
|
+
if (this.activeCache && this.canReuseCache(this.activeCache, model, contentHash)) {
|
|
5709
|
+
return {
|
|
5710
|
+
cacheName: this.activeCache.name,
|
|
5711
|
+
cachedContentCount: cacheableContents.length
|
|
5712
|
+
};
|
|
5713
|
+
}
|
|
5714
|
+
try {
|
|
5715
|
+
await this.cleanupActiveCache();
|
|
5716
|
+
const response = await this.client.caches.create({
|
|
5717
|
+
model,
|
|
5718
|
+
config: {
|
|
5719
|
+
contents: cacheableContents,
|
|
5720
|
+
ttl,
|
|
5721
|
+
displayName: `llmist-${scope}-${Date.now()}`
|
|
5722
|
+
}
|
|
5723
|
+
});
|
|
5724
|
+
if (!response.name) {
|
|
5725
|
+
return null;
|
|
5726
|
+
}
|
|
5727
|
+
this.activeCache = {
|
|
5728
|
+
name: response.name,
|
|
5729
|
+
model,
|
|
5730
|
+
contentHash,
|
|
5731
|
+
expireTime: response.expireTime ?? ""
|
|
5732
|
+
};
|
|
5733
|
+
return {
|
|
5734
|
+
cacheName: response.name,
|
|
5735
|
+
cachedContentCount: cacheableContents.length
|
|
5736
|
+
};
|
|
5737
|
+
} catch (error) {
|
|
5738
|
+
console.warn("Gemini cache creation failed, continuing without cache:", error);
|
|
5739
|
+
return null;
|
|
5740
|
+
}
|
|
5741
|
+
}
|
|
5742
|
+
/**
|
|
5743
|
+
* Clean up the active cache (best-effort).
|
|
5744
|
+
* Caches auto-expire via TTL, so failure is non-critical.
|
|
5745
|
+
*/
|
|
5746
|
+
async dispose() {
|
|
5747
|
+
await this.cleanupActiveCache();
|
|
5748
|
+
}
|
|
5749
|
+
/**
|
|
5750
|
+
* Select which contents to cache based on scope.
|
|
5751
|
+
*
|
|
5752
|
+
* - "system": Only system-derived messages (the initial user+model exchanges
|
|
5753
|
+
* generated from system messages)
|
|
5754
|
+
* - "conversation": Everything except the last user message
|
|
5755
|
+
*/
|
|
5756
|
+
selectCacheableContents(allContents, scope, lastUserMessageIndex) {
|
|
5757
|
+
if (scope === "system") {
|
|
5758
|
+
let systemEndIndex = 0;
|
|
5759
|
+
for (let i = 0; i < allContents.length; i++) {
|
|
5760
|
+
const content = allContents[i];
|
|
5761
|
+
if (content.role === "model" && content.parts.length === 1 && "text" in content.parts[0] && content.parts[0].text === "Understood.") {
|
|
5762
|
+
systemEndIndex = i + 1;
|
|
5763
|
+
} else if (content.role === "user") {
|
|
5764
|
+
const next = allContents[i + 1];
|
|
5765
|
+
if (next && next.role === "model" && next.parts.length === 1 && "text" in next.parts[0] && next.parts[0].text === "Understood.") {
|
|
5766
|
+
continue;
|
|
5767
|
+
}
|
|
5768
|
+
break;
|
|
5769
|
+
} else {
|
|
5770
|
+
break;
|
|
5771
|
+
}
|
|
5772
|
+
}
|
|
5773
|
+
return allContents.slice(0, systemEndIndex);
|
|
5774
|
+
}
|
|
5775
|
+
if (lastUserMessageIndex <= 0) return [];
|
|
5776
|
+
return allContents.slice(0, lastUserMessageIndex);
|
|
5777
|
+
}
|
|
5778
|
+
/**
|
|
5779
|
+
* Estimate token count from contents using character-based heuristic.
|
|
5780
|
+
* Uses ~4 characters per token (conservative estimate for English text).
|
|
5781
|
+
*/
|
|
5782
|
+
estimateTokenCount(contents) {
|
|
5783
|
+
let totalChars = 0;
|
|
5784
|
+
for (const content of contents) {
|
|
5785
|
+
for (const part of content.parts) {
|
|
5786
|
+
if ("text" in part) {
|
|
5787
|
+
totalChars += part.text.length;
|
|
5788
|
+
} else if ("inlineData" in part) {
|
|
5789
|
+
totalChars += 258 * 4;
|
|
5790
|
+
}
|
|
5791
|
+
}
|
|
5792
|
+
}
|
|
5793
|
+
return Math.ceil(totalChars / 4);
|
|
5794
|
+
}
|
|
5795
|
+
/**
|
|
5796
|
+
* Compute a stable hash of the cacheable contents for change detection.
|
|
5797
|
+
*/
|
|
5798
|
+
computeContentHash(contents, model) {
|
|
5799
|
+
const hash = (0, import_node_crypto3.createHash)("sha256");
|
|
5800
|
+
hash.update(model);
|
|
5801
|
+
for (const content of contents) {
|
|
5802
|
+
hash.update(content.role);
|
|
5803
|
+
for (const part of content.parts) {
|
|
5804
|
+
if ("text" in part) {
|
|
5805
|
+
hash.update(part.text);
|
|
5806
|
+
} else if ("inlineData" in part) {
|
|
5807
|
+
hash.update(part.inlineData.mimeType);
|
|
5808
|
+
hash.update(part.inlineData.data);
|
|
5809
|
+
}
|
|
5810
|
+
}
|
|
5811
|
+
}
|
|
5812
|
+
return hash.digest("hex");
|
|
5813
|
+
}
|
|
5814
|
+
/**
|
|
5815
|
+
* Check if an existing cache can be reused.
|
|
5816
|
+
*/
|
|
5817
|
+
canReuseCache(cache, model, contentHash) {
|
|
5818
|
+
if (cache.model !== model) return false;
|
|
5819
|
+
if (cache.contentHash !== contentHash) return false;
|
|
5820
|
+
if (cache.expireTime) {
|
|
5821
|
+
const expiresAt = new Date(cache.expireTime).getTime();
|
|
5822
|
+
const now = Date.now();
|
|
5823
|
+
if (expiresAt - now < 6e4) return false;
|
|
5824
|
+
}
|
|
5825
|
+
return true;
|
|
5826
|
+
}
|
|
5827
|
+
/**
|
|
5828
|
+
* Delete the active cache (best-effort).
|
|
5829
|
+
*/
|
|
5830
|
+
async cleanupActiveCache() {
|
|
5831
|
+
if (!this.activeCache) return;
|
|
5832
|
+
try {
|
|
5833
|
+
await this.client.caches.delete({ name: this.activeCache.name });
|
|
5834
|
+
} catch {
|
|
5835
|
+
}
|
|
5836
|
+
this.activeCache = null;
|
|
5837
|
+
}
|
|
5838
|
+
};
|
|
5839
|
+
}
|
|
5840
|
+
});
|
|
5841
|
+
|
|
5670
5842
|
// src/providers/gemini-image-models.ts
|
|
5671
5843
|
function getGeminiImageModelSpec(modelId) {
|
|
5672
5844
|
return geminiImageModels.find((m) => m.modelId === modelId);
|
|
@@ -6236,6 +6408,7 @@ var init_gemini = __esm({
|
|
|
6236
6408
|
init_messages();
|
|
6237
6409
|
init_base_provider();
|
|
6238
6410
|
init_constants2();
|
|
6411
|
+
init_gemini_cache_manager();
|
|
6239
6412
|
init_gemini_image_models();
|
|
6240
6413
|
init_gemini_models();
|
|
6241
6414
|
init_gemini_speech_models();
|
|
@@ -6261,12 +6434,62 @@ var init_gemini = __esm({
|
|
|
6261
6434
|
};
|
|
6262
6435
|
GeminiGenerativeProvider = class extends BaseProviderAdapter {
|
|
6263
6436
|
providerId = "gemini";
|
|
6437
|
+
cacheManager;
|
|
6438
|
+
constructor(client) {
|
|
6439
|
+
super(client);
|
|
6440
|
+
this.cacheManager = new GeminiCacheManager(client);
|
|
6441
|
+
}
|
|
6264
6442
|
supports(descriptor) {
|
|
6265
6443
|
return descriptor.provider === this.providerId;
|
|
6266
6444
|
}
|
|
6267
6445
|
getModelSpecs() {
|
|
6268
6446
|
return GEMINI_MODELS;
|
|
6269
6447
|
}
|
|
6448
|
+
/**
|
|
6449
|
+
* Override the base stream method to inject cache logic.
|
|
6450
|
+
*
|
|
6451
|
+
* When caching is enabled, we:
|
|
6452
|
+
* 1. Prepare messages as usual
|
|
6453
|
+
* 2. Attempt to get/create a cache for the cacheable prefix
|
|
6454
|
+
* 3. If a cache is available, strip cached contents from the request and add cachedContent ref
|
|
6455
|
+
* 4. Otherwise, proceed normally (graceful degradation)
|
|
6456
|
+
*/
|
|
6457
|
+
async *stream(options, descriptor, spec) {
|
|
6458
|
+
const preparedMessages = this.prepareMessages(options.messages);
|
|
6459
|
+
const contents = this.convertMessagesToContents(preparedMessages);
|
|
6460
|
+
const cachingConfig = options.caching;
|
|
6461
|
+
let cacheName = null;
|
|
6462
|
+
let cachedContentCount = 0;
|
|
6463
|
+
if (cachingConfig?.enabled) {
|
|
6464
|
+
let lastUserIndex = -1;
|
|
6465
|
+
for (let i = contents.length - 1; i >= 0; i--) {
|
|
6466
|
+
if (contents[i].role === "user") {
|
|
6467
|
+
lastUserIndex = i;
|
|
6468
|
+
break;
|
|
6469
|
+
}
|
|
6470
|
+
}
|
|
6471
|
+
const cacheResult = await this.cacheManager.getOrCreateCache(
|
|
6472
|
+
descriptor.name,
|
|
6473
|
+
contents,
|
|
6474
|
+
cachingConfig,
|
|
6475
|
+
lastUserIndex
|
|
6476
|
+
);
|
|
6477
|
+
if (cacheResult) {
|
|
6478
|
+
cacheName = cacheResult.cacheName;
|
|
6479
|
+
cachedContentCount = cacheResult.cachedContentCount;
|
|
6480
|
+
}
|
|
6481
|
+
}
|
|
6482
|
+
const payload = this.buildApiRequestFromContents(
|
|
6483
|
+
options,
|
|
6484
|
+
descriptor,
|
|
6485
|
+
spec,
|
|
6486
|
+
contents,
|
|
6487
|
+
cacheName,
|
|
6488
|
+
cachedContentCount
|
|
6489
|
+
);
|
|
6490
|
+
const rawStream = await this.executeStreamRequest(payload, options.signal);
|
|
6491
|
+
yield* this.normalizeProviderStream(rawStream);
|
|
6492
|
+
}
|
|
6270
6493
|
// =========================================================================
|
|
6271
6494
|
// Image Generation
|
|
6272
6495
|
// =========================================================================
|
|
@@ -6402,6 +6625,17 @@ var init_gemini = __esm({
|
|
|
6402
6625
|
}
|
|
6403
6626
|
buildApiRequest(options, descriptor, _spec, messages) {
|
|
6404
6627
|
const contents = this.convertMessagesToContents(messages);
|
|
6628
|
+
return this.buildApiRequestFromContents(options, descriptor, _spec, contents, null, 0);
|
|
6629
|
+
}
|
|
6630
|
+
/**
|
|
6631
|
+
* Build API request from pre-converted Gemini contents.
|
|
6632
|
+
*
|
|
6633
|
+
* When a cache name is provided, the cached prefix is stripped from contents
|
|
6634
|
+
* and the cache reference is added to the config. This tells Gemini to use
|
|
6635
|
+
* the pre-computed KV pairs instead of reprocessing the cached content.
|
|
6636
|
+
*/
|
|
6637
|
+
buildApiRequestFromContents(options, descriptor, _spec, contents, cacheName, cachedContentCount) {
|
|
6638
|
+
const effectiveContents = cacheName ? contents.slice(cachedContentCount) : contents;
|
|
6405
6639
|
const generationConfig = this.buildGenerationConfig(options);
|
|
6406
6640
|
const thinkingConfig = resolveGeminiThinkingConfig(options.reasoning, descriptor.name);
|
|
6407
6641
|
const config = {
|
|
@@ -6415,11 +6649,13 @@ var init_gemini = __esm({
|
|
|
6415
6649
|
}
|
|
6416
6650
|
},
|
|
6417
6651
|
...thinkingConfig ?? {},
|
|
6652
|
+
// Add cache reference if available
|
|
6653
|
+
...cacheName ? { cachedContent: cacheName } : {},
|
|
6418
6654
|
...options.extra
|
|
6419
6655
|
};
|
|
6420
6656
|
return {
|
|
6421
6657
|
model: descriptor.name,
|
|
6422
|
-
contents,
|
|
6658
|
+
contents: effectiveContents,
|
|
6423
6659
|
config
|
|
6424
6660
|
};
|
|
6425
6661
|
}
|
|
@@ -10397,6 +10633,7 @@ var init_builder = __esm({
|
|
|
10397
10633
|
// When a gadget calls withParentContext(ctx), this config is shared
|
|
10398
10634
|
sharedRetryConfig;
|
|
10399
10635
|
reasoningConfig;
|
|
10636
|
+
cachingConfig;
|
|
10400
10637
|
constructor(client) {
|
|
10401
10638
|
this.client = client;
|
|
10402
10639
|
}
|
|
@@ -11036,6 +11273,62 @@ var init_builder = __esm({
|
|
|
11036
11273
|
this.reasoningConfig = { enabled: false };
|
|
11037
11274
|
return this;
|
|
11038
11275
|
}
|
|
11276
|
+
/**
|
|
11277
|
+
* Enable context caching for supported providers.
|
|
11278
|
+
*
|
|
11279
|
+
* Can be called with:
|
|
11280
|
+
* - No args: enables caching with defaults (`{ enabled: true }`)
|
|
11281
|
+
* - A full config object: `withCaching({ enabled: true, scope: "system", ttl: "7200s" })`
|
|
11282
|
+
*
|
|
11283
|
+
* Provider behavior:
|
|
11284
|
+
* - **Anthropic**: Caching is always-on by default via `cache_control` markers.
|
|
11285
|
+
* Calling `withCaching()` explicitly is a no-op (it's already enabled).
|
|
11286
|
+
* - **Gemini**: Creates an explicit cache via `caches.create()` for the configured scope.
|
|
11287
|
+
* - **OpenAI**: Server-side automatic caching (no-op).
|
|
11288
|
+
*
|
|
11289
|
+
* @param config - Optional caching configuration
|
|
11290
|
+
* @returns This builder for chaining
|
|
11291
|
+
*
|
|
11292
|
+
* @example
|
|
11293
|
+
* ```typescript
|
|
11294
|
+
* // Simple — enable with defaults
|
|
11295
|
+
* LLMist.createAgent()
|
|
11296
|
+
* .withModel("gemini:gemini-2.5-flash")
|
|
11297
|
+
* .withCaching()
|
|
11298
|
+
* .ask("Analyze this large codebase...");
|
|
11299
|
+
*
|
|
11300
|
+
* // Cache only system prompt with longer TTL
|
|
11301
|
+
* LLMist.createAgent()
|
|
11302
|
+
* .withModel("gemini:gemini-2.5-pro")
|
|
11303
|
+
* .withCaching({ enabled: true, scope: "system", ttl: "7200s" })
|
|
11304
|
+
* .ask("...");
|
|
11305
|
+
* ```
|
|
11306
|
+
*/
|
|
11307
|
+
withCaching(config) {
|
|
11308
|
+
this.cachingConfig = config ?? { enabled: true };
|
|
11309
|
+
return this;
|
|
11310
|
+
}
|
|
11311
|
+
/**
|
|
11312
|
+
* Explicitly disable context caching.
|
|
11313
|
+
*
|
|
11314
|
+
* For Anthropic, this removes `cache_control` markers from requests,
|
|
11315
|
+
* opting out of prompt caching entirely.
|
|
11316
|
+
*
|
|
11317
|
+
* @returns This builder for chaining
|
|
11318
|
+
*
|
|
11319
|
+
* @example
|
|
11320
|
+
* ```typescript
|
|
11321
|
+
* // Disable Anthropic's automatic caching
|
|
11322
|
+
* LLMist.createAgent()
|
|
11323
|
+
* .withModel("sonnet")
|
|
11324
|
+
* .withoutCaching()
|
|
11325
|
+
* .ask("...");
|
|
11326
|
+
* ```
|
|
11327
|
+
*/
|
|
11328
|
+
withoutCaching() {
|
|
11329
|
+
this.cachingConfig = { enabled: false };
|
|
11330
|
+
return this;
|
|
11331
|
+
}
|
|
11039
11332
|
/**
|
|
11040
11333
|
* Set subagent configuration overrides.
|
|
11041
11334
|
*
|
|
@@ -11322,6 +11615,7 @@ ${endPrefix}`
|
|
|
11322
11615
|
rateLimitConfig: this.rateLimitConfig,
|
|
11323
11616
|
signal: this.signal,
|
|
11324
11617
|
reasoning: this.reasoningConfig,
|
|
11618
|
+
caching: this.cachingConfig,
|
|
11325
11619
|
subagentConfig: this.subagentConfig,
|
|
11326
11620
|
// Tree context for shared tree model (subagents share parent's tree)
|
|
11327
11621
|
parentTree: this.parentContext?.tree,
|
|
@@ -11510,6 +11804,7 @@ ${endPrefix}`
|
|
|
11510
11804
|
rateLimitConfig: this.rateLimitConfig,
|
|
11511
11805
|
signal: this.signal,
|
|
11512
11806
|
reasoning: this.reasoningConfig,
|
|
11807
|
+
caching: this.cachingConfig,
|
|
11513
11808
|
subagentConfig: this.subagentConfig,
|
|
11514
11809
|
// Tree context for shared tree model (subagents share parent's tree)
|
|
11515
11810
|
parentTree: this.parentContext?.tree,
|
|
@@ -14125,6 +14420,7 @@ var init_agent = __esm({
|
|
|
14125
14420
|
// Cancellation
|
|
14126
14421
|
signal;
|
|
14127
14422
|
reasoning;
|
|
14423
|
+
caching;
|
|
14128
14424
|
// Retry configuration
|
|
14129
14425
|
retryConfig;
|
|
14130
14426
|
// Rate limit tracker for proactive throttling
|
|
@@ -14217,6 +14513,7 @@ var init_agent = __esm({
|
|
|
14217
14513
|
}
|
|
14218
14514
|
this.signal = options.signal;
|
|
14219
14515
|
this.reasoning = options.reasoning;
|
|
14516
|
+
this.caching = options.caching;
|
|
14220
14517
|
this.retryConfig = options.sharedRetryConfig ?? resolveRetryConfig(options.retryConfig);
|
|
14221
14518
|
if (options.sharedRateLimitTracker) {
|
|
14222
14519
|
this.rateLimitTracker = options.sharedRateLimitTracker;
|
|
@@ -14934,6 +15231,19 @@ var init_agent = __esm({
|
|
|
14934
15231
|
}
|
|
14935
15232
|
return void 0;
|
|
14936
15233
|
}
|
|
15234
|
+
/**
|
|
15235
|
+
* Resolve caching configuration.
|
|
15236
|
+
*
|
|
15237
|
+
* Priority: explicit config > default enabled (preserves Anthropic's existing behavior)
|
|
15238
|
+
* Default is `{ enabled: true }` which means:
|
|
15239
|
+
* - Anthropic: `cache_control` markers are added (existing behavior preserved)
|
|
15240
|
+
* - Gemini: Cache manager is consulted but skips if no explicit config was set
|
|
15241
|
+
* - OpenAI: No-op (server-side automatic)
|
|
15242
|
+
*/
|
|
15243
|
+
resolveCachingConfig() {
|
|
15244
|
+
if (this.caching !== void 0) return this.caching;
|
|
15245
|
+
return { enabled: true };
|
|
15246
|
+
}
|
|
14937
15247
|
/**
|
|
14938
15248
|
* Prepare LLM call options, create tree node, and process beforeLLMCall controller.
|
|
14939
15249
|
* @returns options, node ID, and optional skipWithSynthetic response if controller wants to skip
|
|
@@ -14941,13 +15251,15 @@ var init_agent = __esm({
|
|
|
14941
15251
|
async prepareLLMCall(iteration) {
|
|
14942
15252
|
const spec = this.client.modelRegistry?.getModelSpec?.(this.model);
|
|
14943
15253
|
const reasoning = this.resolveReasoningConfig(spec);
|
|
15254
|
+
const caching = this.resolveCachingConfig();
|
|
14944
15255
|
let llmOptions = {
|
|
14945
15256
|
model: this.model,
|
|
14946
15257
|
messages: this.conversation.getMessages(),
|
|
14947
15258
|
temperature: this.temperature,
|
|
14948
15259
|
maxTokens: this.defaultMaxTokens,
|
|
14949
15260
|
signal: this.signal,
|
|
14950
|
-
reasoning
|
|
15261
|
+
reasoning,
|
|
15262
|
+
caching
|
|
14951
15263
|
};
|
|
14952
15264
|
const llmNode = this.tree.addLLMCall({
|
|
14953
15265
|
iteration,
|