@lota-sdk/core 0.4.43 → 0.4.45

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lota-sdk/core",
3
- "version": "0.4.43",
3
+ "version": "0.4.45",
4
4
  "files": [
5
5
  "src",
6
6
  "infrastructure/schema"
@@ -32,7 +32,7 @@
32
32
  "@ai-sdk/provider": "^3.0.9",
33
33
  "@chat-adapter/slack": "^4.26.0",
34
34
  "@chat-adapter/state-ioredis": "^4.26.0",
35
- "@lota-sdk/shared": "0.4.43",
35
+ "@lota-sdk/shared": "0.4.45",
36
36
  "@mendable/firecrawl-js": "^4.20.0",
37
37
  "@surrealdb/node": "^3.0.3",
38
38
  "ai": "^6.0.170",
@@ -0,0 +1,189 @@
1
+ import type { ChatMessage } from '@lota-sdk/shared'
2
+
3
+ import { CONTEXT_WINDOW_TOKENS } from './context-compaction/context-compaction-constants'
4
+
5
+ const MODEL_INPUT_MAX_TOTAL_CHARS = 240_000
6
+ const MODEL_INPUT_LATEST_USER_TEXT_MAX_CHARS = 120_000
7
+ const MODEL_INPUT_TEXT_PART_MAX_CHARS = 24_000
8
+ const MODEL_INPUT_TOOL_PAYLOAD_MAX_CHARS = 12_000
9
+ const MODEL_INPUT_DATA_PAYLOAD_MAX_CHARS = 8_000
10
+ const MODEL_INPUT_NOTICE_ID = 'model-input-budget-notice'
11
+
12
+ interface ShapeModelInputMessagesOptions {
13
+ latestUserMessageId?: string
14
+ maxTotalChars?: number
15
+ }
16
+
17
+ function serializedLength(value: unknown): number {
18
+ try {
19
+ const serialized = JSON.stringify(value)
20
+ return typeof serialized === 'string' ? serialized.length : 0
21
+ } catch {
22
+ return String(value).length
23
+ }
24
+ }
25
+
26
+ function stringifyPayload(value: unknown): string | null {
27
+ if (value === null || value === undefined) return null
28
+ if (typeof value === 'string') return value
29
+
30
+ try {
31
+ return JSON.stringify(value)
32
+ } catch {
33
+ return '[unserializable payload]'
34
+ }
35
+ }
36
+
37
+ function trimMiddle(value: string, maxChars: number, notice: string): string {
38
+ if (value.length <= maxChars) return value
39
+
40
+ const marker = `\n\n[${notice}]\n\n`
41
+ const available = Math.max(0, maxChars - marker.length)
42
+ if (available <= 0) return marker.slice(0, maxChars)
43
+
44
+ const headChars = Math.ceil(available * 0.7)
45
+ const tailChars = available - headChars
46
+ const head = value.slice(0, headChars).trimEnd()
47
+ const tail = tailChars > 0 ? value.slice(-tailChars).trimStart() : ''
48
+
49
+ return `${head}${marker}${tail}`
50
+ }
51
+
52
+ function trimTextPart(text: string, maxChars: number): string {
53
+ return trimMiddle(
54
+ text,
55
+ maxChars,
56
+ `Model input truncated this text from ${text.length} characters to fit the request budget. ` +
57
+ 'Only the beginning and end are visible to the model.',
58
+ )
59
+ }
60
+
61
+ export function trimOversizedTextForRequestBudget(text: string, maxChars: number): string {
62
+ return trimTextPart(text, maxChars)
63
+ }
64
+
65
+ function trimPayload(value: unknown, maxChars: number, label: string): unknown {
66
+ const payload = stringifyPayload(value)
67
+ if (!payload || payload.length <= maxChars) return value
68
+
69
+ return trimMiddle(
70
+ payload,
71
+ maxChars,
72
+ `Model input truncated ${label} from ${payload.length} serialized characters to fit the request budget.`,
73
+ )
74
+ }
75
+
76
+ function shapeMessageParts(message: ChatMessage, maxTotalChars: number, latestUserMessageId?: string): ChatMessage {
77
+ const textLimit =
78
+ latestUserMessageId && message.id === latestUserMessageId
79
+ ? Math.min(MODEL_INPUT_LATEST_USER_TEXT_MAX_CHARS, Math.floor(maxTotalChars * 0.7))
80
+ : MODEL_INPUT_TEXT_PART_MAX_CHARS
81
+
82
+ const parts = message.parts.map((part) => {
83
+ if (part.type === 'text' && typeof part.text === 'string' && part.text.length > textLimit) {
84
+ return { ...part, text: trimTextPart(part.text, textLimit) }
85
+ }
86
+
87
+ if (part.type.startsWith('tool-')) {
88
+ const nextPart = { ...part } as Record<string, unknown>
89
+ const originalInput = nextPart.input
90
+ const originalOutput = nextPart.output
91
+ const originalData = nextPart.data
92
+ if (originalInput !== undefined)
93
+ nextPart.input = trimPayload(originalInput, MODEL_INPUT_TOOL_PAYLOAD_MAX_CHARS, 'tool input')
94
+ if (originalOutput !== undefined) {
95
+ nextPart.output = trimPayload(originalOutput, MODEL_INPUT_TOOL_PAYLOAD_MAX_CHARS, 'tool output')
96
+ }
97
+ if (originalData !== undefined)
98
+ nextPart.data = trimPayload(originalData, MODEL_INPUT_DATA_PAYLOAD_MAX_CHARS, 'tool data')
99
+ if (nextPart.input !== originalInput || nextPart.output !== originalOutput || nextPart.data !== originalData) {
100
+ return nextPart as ChatMessage['parts'][number]
101
+ }
102
+ }
103
+
104
+ if (part.type.startsWith('data-') && 'data' in part) {
105
+ const nextPart = { ...part } as Record<string, unknown>
106
+ const originalData = nextPart.data
107
+ nextPart.data = trimPayload(originalData, MODEL_INPUT_DATA_PAYLOAD_MAX_CHARS, 'data payload')
108
+ if (nextPart.data !== originalData) {
109
+ return nextPart as ChatMessage['parts'][number]
110
+ }
111
+ }
112
+
113
+ return part
114
+ })
115
+
116
+ return parts.some((part, index) => part !== message.parts[index]) ? { ...message, parts } : message
117
+ }
118
+
119
+ function createBudgetNotice(omittedCount: number): ChatMessage {
120
+ return {
121
+ id: MODEL_INPUT_NOTICE_ID,
122
+ role: 'system',
123
+ parts: [
124
+ {
125
+ type: 'text',
126
+ text:
127
+ `${omittedCount} older message${omittedCount === 1 ? '' : 's'} were omitted from this model call ` +
128
+ 'because the live thread exceeded the model input budget. Use the compacted summary and latest user request as source of truth.',
129
+ },
130
+ ],
131
+ }
132
+ }
133
+
134
+ function fitMessagesToBudget(
135
+ messages: ChatMessage[],
136
+ maxTotalChars: number,
137
+ latestUserMessageId?: string,
138
+ ): ChatMessage[] {
139
+ const kept: ChatMessage[] = []
140
+ let currentChars = 2
141
+ let omittedCount = 0
142
+
143
+ for (const message of [...messages].reverse()) {
144
+ const messageChars = serializedLength(message) + 1
145
+ const isLatestUserMessage = latestUserMessageId !== undefined && message.id === latestUserMessageId
146
+
147
+ if (currentChars + messageChars <= maxTotalChars || isLatestUserMessage) {
148
+ kept.push(message)
149
+ currentChars += messageChars
150
+ continue
151
+ }
152
+
153
+ omittedCount += 1
154
+ }
155
+
156
+ const ordered = kept.reverse()
157
+ if (omittedCount === 0) return ordered
158
+
159
+ while (
160
+ ordered.length > 0 &&
161
+ serializedLength(ordered) + serializedLength(createBudgetNotice(omittedCount)) + 1 > maxTotalChars
162
+ ) {
163
+ const latestUserIndex = ordered.findIndex(
164
+ (message) => latestUserMessageId !== undefined && message.id === latestUserMessageId,
165
+ )
166
+ const removableIndex = ordered.findIndex((_, index) => index !== latestUserIndex)
167
+ if (removableIndex === -1) break
168
+ ordered.splice(removableIndex, 1)
169
+ omittedCount += 1
170
+ }
171
+
172
+ return [createBudgetNotice(omittedCount), ...ordered]
173
+ }
174
+
175
+ export function shapeModelInputMessagesForBudget(
176
+ messages: ChatMessage[],
177
+ options: ShapeModelInputMessagesOptions = {},
178
+ ): ChatMessage[] {
179
+ const maxTotalChars = options.maxTotalChars ?? Math.min(MODEL_INPUT_MAX_TOTAL_CHARS, CONTEXT_WINDOW_TOKENS * 2)
180
+ const shapedMessages = messages.map((message) =>
181
+ shapeMessageParts(message, maxTotalChars, options.latestUserMessageId),
182
+ )
183
+
184
+ if (serializedLength(shapedMessages) <= maxTotalChars) {
185
+ return shapedMessages
186
+ }
187
+
188
+ return fitMessagesToBudget(shapedMessages, maxTotalChars, options.latestUserMessageId)
189
+ }
@@ -7,6 +7,7 @@ import {
7
7
  } from '../../runtime/chat-attachments'
8
8
  import type { ReadableUploadMetadataLike } from '../../runtime/chat-types'
9
9
  import type { ContextCompactionRuntime } from '../../runtime/context-compaction/context-compaction'
10
+ import { shapeModelInputMessagesForBudget } from '../../runtime/model-input-budget'
10
11
 
11
12
  export function upsertChatHistoryMessage(messages: ChatMessage[], nextMessage: ChatMessage): ChatMessage[] {
12
13
  const existingIndex = messages.findIndex((message) => message.id === nextMessage.id)
@@ -52,11 +53,14 @@ export function createThreadTurnMessageContext(params: {
52
53
  })
53
54
 
54
55
  const buildRunInputMessages = (extraMessages: ChatMessage[] = []): ChatMessage[] =>
55
- buildModelInputMessagesWithUploadMetadata({
56
- messages: [...currentMessages, ...extraMessages],
57
- latestUserMessageId: params.latestUserMessageId,
58
- uploadMetadataText: buildReadableUploadMetadataText(listReadableUploads(extraMessages)),
59
- })
56
+ shapeModelInputMessagesForBudget(
57
+ buildModelInputMessagesWithUploadMetadata({
58
+ messages: [...currentMessages, ...extraMessages],
59
+ latestUserMessageId: params.latestUserMessageId,
60
+ uploadMetadataText: buildReadableUploadMetadataText(listReadableUploads(extraMessages)),
61
+ }),
62
+ { latestUserMessageId: params.latestUserMessageId },
63
+ )
60
64
 
61
65
  return {
62
66
  get currentMessages() {
@@ -43,6 +43,7 @@ import { createWiredContextCompactionRuntime } from '../../runtime/context-compa
43
43
  import { createExecutionPlanInstructionSectionCache, toExecutionPlanCacheError } from '../../runtime/execution-plan'
44
44
  import type { HelperModelRuntime } from '../../runtime/helper-model'
45
45
  import { HelperModelTag } from '../../runtime/helper-model'
46
+ import { trimOversizedTextForRequestBudget } from '../../runtime/model-input-budget'
46
47
  import { runPostTurnSideEffects } from '../../runtime/post-turn-side-effects'
47
48
  import { extractMessageText, toOptionalTrimmedString } from '../../runtime/thread-chat-helpers'
48
49
  import {
@@ -118,6 +119,8 @@ type ThreadTurnPreparationTaggedError =
118
119
 
119
120
  type ThreadTurnRunEffect = Effect.Effect<PreparedThreadTurnResult, ThreadTurnPreparationSurfaceError, never>
120
121
 
122
+ const THREAD_TURN_AUXILIARY_MESSAGE_TEXT_MAX_CHARS = 24_000
123
+
121
124
  type ExecuteThreadTurnRunEffect = Effect.Effect<
122
125
  PreparedThreadTurnResult | void,
123
126
  ThreadTurnPreparationSurfaceError,
@@ -406,12 +409,13 @@ function deriveTurnMessageInputs(params: {
406
409
  ? undefined
407
410
  : (userMessage ?? [...liveHistory].reverse().find((message) => message.role === 'user'))
408
411
 
409
- const messageText =
412
+ const rawMessageText =
410
413
  turnParams.kind === 'planTurn'
411
414
  ? `${turnParams.planTurn.nodeSpec.label}\n${turnParams.planTurn.nodeSpec.objective}\n${turnParams.planTurn.nodeSpec.instructions}`
412
415
  : referenceUserMessage
413
416
  ? extractMessageText(referenceUserMessage).trim()
414
417
  : ''
418
+ const messageText = trimOversizedTextForRequestBudget(rawMessageText, THREAD_TURN_AUXILIARY_MESSAGE_TEXT_MAX_CHARS)
415
419
 
416
420
  return { userMessage, originalMessages, referenceUserMessage, messageText }
417
421
  }