@lota-sdk/core 0.4.45 → 0.4.47

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lota-sdk/core",
3
- "version": "0.4.45",
3
+ "version": "0.4.47",
4
4
  "files": [
5
5
  "src",
6
6
  "infrastructure/schema"
@@ -32,7 +32,7 @@
32
32
  "@ai-sdk/provider": "^3.0.9",
33
33
  "@chat-adapter/slack": "^4.26.0",
34
34
  "@chat-adapter/state-ioredis": "^4.26.0",
35
- "@lota-sdk/shared": "0.4.45",
35
+ "@lota-sdk/shared": "0.4.47",
36
36
  "@mendable/firecrawl-js": "^4.20.0",
37
37
  "@surrealdb/node": "^3.0.3",
38
38
  "ai": "^6.0.170",
@@ -37,6 +37,8 @@ class AiGatewayStreamAttemptTag extends Context.Service<
37
37
 
38
38
  const EXPECTED_GATEWAY_KEY_PREFIX = 'sk-bf-'
39
39
  const AI_GATEWAY_VIRTUAL_KEY_HEADER = 'x-bf-vk'
40
+ const AI_GATEWAY_PASSTHROUGH_EXTRA_PARAMS_HEADER = 'x-bf-passthrough-extra-params'
41
+ const AZURE_OPENAI_PROMPT_CACHE_RETENTION = '24h'
40
42
  const AI_GATEWAY_TIMEOUT_MS = 360_000
41
43
  const AI_GATEWAY_STREAM_IDLE_TIMEOUT_MS = 180_000
42
44
  const AI_GATEWAY_MAX_RETRIES = 4
@@ -512,7 +514,11 @@ export function makeAiGatewayService(
512
514
  })
513
515
  }
514
516
  const baseURL = yield* normalizeAiGatewayUrlEffect(config.aiGateway.url)
515
- const provider = createOpenAI({ baseURL, apiKey, headers: { [AI_GATEWAY_VIRTUAL_KEY_HEADER]: apiKey } })
517
+ const provider = createOpenAI({
518
+ baseURL,
519
+ apiKey,
520
+ headers: { [AI_GATEWAY_VIRTUAL_KEY_HEADER]: apiKey, [AI_GATEWAY_PASSTHROUGH_EXTRA_PARAMS_HEADER]: 'true' },
521
+ })
516
522
 
517
523
  return AiGatewayTag.of({ semaphore, provider })
518
524
  })
@@ -728,6 +734,47 @@ function isOpenRouterModel(modelId: string): boolean {
728
734
  return modelId.trim().toLowerCase().startsWith('openrouter/')
729
735
  }
730
736
 
737
+ function isAzureOpenAiPromptCacheModel(modelId: string): boolean {
738
+ const normalized = modelId.trim().toLowerCase()
739
+ if (!normalized) return false
740
+
741
+ const [providerPrefix, ...modelParts] = normalized.split('/')
742
+ if (modelParts.length > 0) {
743
+ if (providerPrefix !== 'azure') return false
744
+ return isAzureOpenAiPromptCacheModel(modelParts.join('/'))
745
+ }
746
+
747
+ return (
748
+ normalized === 'main-gpt-model' ||
749
+ normalized === 'mini-gpt-model' ||
750
+ normalized === 'nano-gpt-model' ||
751
+ normalized === 'gpt-5.5' ||
752
+ normalized.startsWith('gpt-5.5-') ||
753
+ normalized === 'gpt-5.4' ||
754
+ normalized.startsWith('gpt-5.4-')
755
+ )
756
+ }
757
+
758
+ export function addAzureOpenAiPromptCacheRetention(
759
+ params: AiGatewayCallOptions,
760
+ modelId?: string,
761
+ ): AiGatewayCallOptions {
762
+ if (!modelId || !isAzureOpenAiPromptCacheModel(modelId)) {
763
+ return params
764
+ }
765
+
766
+ const providerOptions = isRecord(params.providerOptions) ? { ...params.providerOptions } : {}
767
+ const openaiOptions = isRecord(providerOptions.openai) ? { ...providerOptions.openai } : {}
768
+
769
+ return {
770
+ ...params,
771
+ providerOptions: {
772
+ ...providerOptions,
773
+ openai: { ...openaiOptions, promptCacheRetention: AZURE_OPENAI_PROMPT_CACHE_RETENTION },
774
+ } as AiGatewayCallOptions['providerOptions'],
775
+ }
776
+ }
777
+
731
778
  function mergeAbortSignals(signals: Array<AbortSignal | undefined>): { signal?: AbortSignal; cleanup: () => void } {
732
779
  const activeSignals = signals.filter((signal): signal is AbortSignal => Boolean(signal))
733
780
  if (activeSignals.length === 0) return { cleanup: () => undefined }
@@ -1106,7 +1153,9 @@ function createAiGatewayLanguageModelMiddleware(
1106
1153
  Promise.resolve(
1107
1154
  addAiGatewayReasoningRawChunks(
1108
1155
  normalizeAiGatewayJsonSchemas(
1109
- providerId === OPENAI_CHAT_PROVIDER_ID ? normalizeAiGatewayChatProviderOptions(params, modelId) : params,
1156
+ providerId === OPENAI_CHAT_PROVIDER_ID
1157
+ ? normalizeAiGatewayChatProviderOptions(addAzureOpenAiPromptCacheRetention(params, modelId), modelId)
1158
+ : addAzureOpenAiPromptCacheRetention(params, modelId),
1110
1159
  ),
1111
1160
  type,
1112
1161
  ),
@@ -9,6 +9,7 @@ export {
9
9
  aiGatewayModel,
10
10
  aiGatewayOpenRouterResponseHealingModel,
11
11
  createAiGatewayModels,
12
+ addAzureOpenAiPromptCacheRetention,
12
13
  extractAiGatewayChatReasoningDeltaText,
13
14
  extractAiGatewayChatReasoningText,
14
15
  injectAiGatewayChatReasoningContent,
@@ -11,6 +11,7 @@ import {
11
11
  readString,
12
12
  stringifyUnknown,
13
13
  } from '../../utils/string'
14
+ import { trimOversizedTextForRequestBudget } from '../model-input-budget'
14
15
  import {
15
16
  COMPACTION_CHUNK_MAX_CHARS,
16
17
  CONTEXT_COMPACTION_INCLUDED_TOOL_NAMES,
@@ -71,6 +72,8 @@ class CompactionParseError extends Schema.TaggedErrorClass<CompactionParseError>
71
72
  }) {}
72
73
 
73
74
  const COMPACTION_RUNNER_RETRY_OPTIONS = { times: 2, schedule: Schedule.exponential(Duration.millis(500), 2) } as const
75
+ const COMPACTION_CONTEXT_MESSAGE_TEXT_MAX_CHARS = 24_000
76
+ const COMPACTION_MIN_SOURCE_CHARS = 2_000
74
77
 
75
78
  export interface ContextCompactionPromptParams {
76
79
  previousSummary: string
@@ -294,11 +297,14 @@ export function createContextCompactionRuntime(
294
297
  return chunks.join('\n').trim()
295
298
  }
296
299
 
297
- const toContextMessageFromChatMessage = (message: ChatMessage): ContextMessage => ({
298
- role: message.role,
299
- text: toContextTextFromChatMessage(message),
300
- sourceMessageId: message.id,
301
- })
300
+ const toContextMessageFromChatMessage = (message: ChatMessage): ContextMessage => {
301
+ const text = toContextTextFromChatMessage(message)
302
+ return {
303
+ role: message.role,
304
+ text: trimOversizedTextForRequestBudget(text, COMPACTION_CONTEXT_MESSAGE_TEXT_MAX_CHARS),
305
+ sourceMessageId: message.id,
306
+ }
307
+ }
302
308
 
303
309
  const createSummaryMessage = (summaryText: string): ChatMessage | null => {
304
310
  const summary = normalizeSummary(summaryText)
@@ -458,6 +464,10 @@ export function createContextCompactionRuntime(
458
464
  return { ...state, estimatedTokens: assessment.estimatedTokens, done: true }
459
465
  }
460
466
 
467
+ if (sourceText.length < COMPACTION_MIN_SOURCE_CHARS) {
468
+ return { ...state, estimatedTokens: assessment.estimatedTokens, done: true }
469
+ }
470
+
461
471
  const compacted = yield* compactContextMessagesEffect({
462
472
  previousSummary: state.summaryText,
463
473
  newMessages: contextMessages,
@@ -465,9 +475,7 @@ export function createContextCompactionRuntime(
465
475
  const rolledSummary = yield* rollupSummaryIfOversizedEffect(normalizeSummary(compacted.summary))
466
476
 
467
477
  if (rolledSummary.length >= sourceText.length) {
468
- return yield* new CompactionError({
469
- message: 'Compaction summary is not shorter than compacted source',
470
- })
478
+ return { ...state, estimatedTokens: assessment.estimatedTokens, done: true }
471
479
  }
472
480
 
473
481
  return yield* stepHistory({