npm - @lota-sdk/core - Versions diffs - 0.4.45 → 0.4.47 - Mend

@lota-sdk/core 0.4.45 → 0.4.47

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/package.json +2 -2
package/src/ai-gateway/ai-gateway.ts +51 -2
package/src/ai-gateway/index.ts +1 -0
package/src/runtime/context-compaction/context-compaction.ts +16 -8

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@lota-sdk/core",
-  "version": "0.4.45",
+  "version": "0.4.47",
   "files": [
     "src",
     "infrastructure/schema"
@@ -32,7 +32,7 @@
     "@ai-sdk/provider": "^3.0.9",
     "@chat-adapter/slack": "^4.26.0",
     "@chat-adapter/state-ioredis": "^4.26.0",
-    "@lota-sdk/shared": "0.4.45",
+    "@lota-sdk/shared": "0.4.47",
     "@mendable/firecrawl-js": "^4.20.0",
     "@surrealdb/node": "^3.0.3",
     "ai": "^6.0.170",

package/src/ai-gateway/ai-gateway.ts CHANGED Viewed

@@ -37,6 +37,8 @@ class AiGatewayStreamAttemptTag extends Context.Service<
 const EXPECTED_GATEWAY_KEY_PREFIX = 'sk-bf-'
 const AI_GATEWAY_VIRTUAL_KEY_HEADER = 'x-bf-vk'
+const AI_GATEWAY_PASSTHROUGH_EXTRA_PARAMS_HEADER = 'x-bf-passthrough-extra-params'
+const AZURE_OPENAI_PROMPT_CACHE_RETENTION = '24h'
 const AI_GATEWAY_TIMEOUT_MS = 360_000
 const AI_GATEWAY_STREAM_IDLE_TIMEOUT_MS = 180_000
 const AI_GATEWAY_MAX_RETRIES = 4
@@ -512,7 +514,11 @@ export function makeAiGatewayService(
       })
     }
     const baseURL = yield* normalizeAiGatewayUrlEffect(config.aiGateway.url)
-    const provider = createOpenAI({ baseURL, apiKey, headers: { [AI_GATEWAY_VIRTUAL_KEY_HEADER]: apiKey } })
+    const provider = createOpenAI({
+      baseURL,
+      apiKey,
+      headers: { [AI_GATEWAY_VIRTUAL_KEY_HEADER]: apiKey, [AI_GATEWAY_PASSTHROUGH_EXTRA_PARAMS_HEADER]: 'true' },
+    })
     return AiGatewayTag.of({ semaphore, provider })
   })
@@ -728,6 +734,47 @@ function isOpenRouterModel(modelId: string): boolean {
   return modelId.trim().toLowerCase().startsWith('openrouter/')
 }
+function isAzureOpenAiPromptCacheModel(modelId: string): boolean {
+  const normalized = modelId.trim().toLowerCase()
+  if (!normalized) return false
+  const [providerPrefix, ...modelParts] = normalized.split('/')
+  if (modelParts.length > 0) {
+    if (providerPrefix !== 'azure') return false
+    return isAzureOpenAiPromptCacheModel(modelParts.join('/'))
+  }
+  return (
+    normalized === 'main-gpt-model' ||
+    normalized === 'mini-gpt-model' ||
+    normalized === 'nano-gpt-model' ||
+    normalized === 'gpt-5.5' ||
+    normalized.startsWith('gpt-5.5-') ||
+    normalized === 'gpt-5.4' ||
+    normalized.startsWith('gpt-5.4-')
+  )
+}
+export function addAzureOpenAiPromptCacheRetention(
+  params: AiGatewayCallOptions,
+  modelId?: string,
+): AiGatewayCallOptions {
+  if (!modelId || !isAzureOpenAiPromptCacheModel(modelId)) {
+    return params
+  }
+  const providerOptions = isRecord(params.providerOptions) ? { ...params.providerOptions } : {}
+  const openaiOptions = isRecord(providerOptions.openai) ? { ...providerOptions.openai } : {}
+  return {
+    ...params,
+    providerOptions: {
+      ...providerOptions,
+      openai: { ...openaiOptions, promptCacheRetention: AZURE_OPENAI_PROMPT_CACHE_RETENTION },
+    } as AiGatewayCallOptions['providerOptions'],
+  }
+}
 function mergeAbortSignals(signals: Array<AbortSignal | undefined>): { signal?: AbortSignal; cleanup: () => void } {
   const activeSignals = signals.filter((signal): signal is AbortSignal => Boolean(signal))
   if (activeSignals.length === 0) return { cleanup: () => undefined }
@@ -1106,7 +1153,9 @@ function createAiGatewayLanguageModelMiddleware(
       Promise.resolve(
         addAiGatewayReasoningRawChunks(
           normalizeAiGatewayJsonSchemas(
-            providerId === OPENAI_CHAT_PROVIDER_ID ? normalizeAiGatewayChatProviderOptions(params, modelId) : params,
+            providerId === OPENAI_CHAT_PROVIDER_ID
+              ? normalizeAiGatewayChatProviderOptions(addAzureOpenAiPromptCacheRetention(params, modelId), modelId)
+              : addAzureOpenAiPromptCacheRetention(params, modelId),
           ),
           type,
         ),

package/src/ai-gateway/index.ts CHANGED Viewed

@@ -9,6 +9,7 @@ export {
   aiGatewayModel,
   aiGatewayOpenRouterResponseHealingModel,
   createAiGatewayModels,
+  addAzureOpenAiPromptCacheRetention,
   extractAiGatewayChatReasoningDeltaText,
   extractAiGatewayChatReasoningText,
   injectAiGatewayChatReasoningContent,

package/src/runtime/context-compaction/context-compaction.ts CHANGED Viewed

@@ -11,6 +11,7 @@ import {
   readString,
   stringifyUnknown,
 } from '../../utils/string'
+import { trimOversizedTextForRequestBudget } from '../model-input-budget'
 import {
   COMPACTION_CHUNK_MAX_CHARS,
   CONTEXT_COMPACTION_INCLUDED_TOOL_NAMES,
@@ -71,6 +72,8 @@ class CompactionParseError extends Schema.TaggedErrorClass<CompactionParseError>
 }) {}
 const COMPACTION_RUNNER_RETRY_OPTIONS = { times: 2, schedule: Schedule.exponential(Duration.millis(500), 2) } as const
+const COMPACTION_CONTEXT_MESSAGE_TEXT_MAX_CHARS = 24_000
+const COMPACTION_MIN_SOURCE_CHARS = 2_000
 export interface ContextCompactionPromptParams {
   previousSummary: string
@@ -294,11 +297,14 @@ export function createContextCompactionRuntime(
     return chunks.join('\n').trim()
   }
-  const toContextMessageFromChatMessage = (message: ChatMessage): ContextMessage => ({
-    role: message.role,
-    text: toContextTextFromChatMessage(message),
-    sourceMessageId: message.id,
-  })
+  const toContextMessageFromChatMessage = (message: ChatMessage): ContextMessage => {
+    const text = toContextTextFromChatMessage(message)
+    return {
+      role: message.role,
+      text: trimOversizedTextForRequestBudget(text, COMPACTION_CONTEXT_MESSAGE_TEXT_MAX_CHARS),
+      sourceMessageId: message.id,
+    }
+  }
   const createSummaryMessage = (summaryText: string): ChatMessage | null => {
     const summary = normalizeSummary(summaryText)
@@ -458,6 +464,10 @@ export function createContextCompactionRuntime(
                 return { ...state, estimatedTokens: assessment.estimatedTokens, done: true }
               }
+              if (sourceText.length < COMPACTION_MIN_SOURCE_CHARS) {
+                return { ...state, estimatedTokens: assessment.estimatedTokens, done: true }
+              }
               const compacted = yield* compactContextMessagesEffect({
                 previousSummary: state.summaryText,
                 newMessages: contextMessages,
@@ -465,9 +475,7 @@ export function createContextCompactionRuntime(
               const rolledSummary = yield* rollupSummaryIfOversizedEffect(normalizeSummary(compacted.summary))
               if (rolledSummary.length >= sourceText.length) {
-                return yield* new CompactionError({
-                  message: 'Compaction summary is not shorter than compacted source',
-                })
+                return { ...state, estimatedTokens: assessment.estimatedTokens, done: true }
               }
               return yield* stepHistory({