@lota-sdk/core 0.4.44 → 0.4.46

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lota-sdk/core",
3
- "version": "0.4.44",
3
+ "version": "0.4.46",
4
4
  "files": [
5
5
  "src",
6
6
  "infrastructure/schema"
@@ -32,7 +32,7 @@
32
32
  "@ai-sdk/provider": "^3.0.9",
33
33
  "@chat-adapter/slack": "^4.26.0",
34
34
  "@chat-adapter/state-ioredis": "^4.26.0",
35
- "@lota-sdk/shared": "0.4.44",
35
+ "@lota-sdk/shared": "0.4.46",
36
36
  "@mendable/firecrawl-js": "^4.20.0",
37
37
  "@surrealdb/node": "^3.0.3",
38
38
  "ai": "^6.0.170",
@@ -11,6 +11,7 @@ import {
11
11
  readString,
12
12
  stringifyUnknown,
13
13
  } from '../../utils/string'
14
+ import { trimOversizedTextForRequestBudget } from '../model-input-budget'
14
15
  import {
15
16
  COMPACTION_CHUNK_MAX_CHARS,
16
17
  CONTEXT_COMPACTION_INCLUDED_TOOL_NAMES,
@@ -71,6 +72,8 @@ class CompactionParseError extends Schema.TaggedErrorClass<CompactionParseError>
71
72
  }) {}
72
73
 
73
74
  const COMPACTION_RUNNER_RETRY_OPTIONS = { times: 2, schedule: Schedule.exponential(Duration.millis(500), 2) } as const
75
+ const COMPACTION_CONTEXT_MESSAGE_TEXT_MAX_CHARS = 24_000
76
+ const COMPACTION_MIN_SOURCE_CHARS = 2_000
74
77
 
75
78
  export interface ContextCompactionPromptParams {
76
79
  previousSummary: string
@@ -294,11 +297,14 @@ export function createContextCompactionRuntime(
294
297
  return chunks.join('\n').trim()
295
298
  }
296
299
 
297
- const toContextMessageFromChatMessage = (message: ChatMessage): ContextMessage => ({
298
- role: message.role,
299
- text: toContextTextFromChatMessage(message),
300
- sourceMessageId: message.id,
301
- })
300
+ const toContextMessageFromChatMessage = (message: ChatMessage): ContextMessage => {
301
+ const text = toContextTextFromChatMessage(message)
302
+ return {
303
+ role: message.role,
304
+ text: trimOversizedTextForRequestBudget(text, COMPACTION_CONTEXT_MESSAGE_TEXT_MAX_CHARS),
305
+ sourceMessageId: message.id,
306
+ }
307
+ }
302
308
 
303
309
  const createSummaryMessage = (summaryText: string): ChatMessage | null => {
304
310
  const summary = normalizeSummary(summaryText)
@@ -458,6 +464,10 @@ export function createContextCompactionRuntime(
458
464
  return { ...state, estimatedTokens: assessment.estimatedTokens, done: true }
459
465
  }
460
466
 
467
+ if (sourceText.length < COMPACTION_MIN_SOURCE_CHARS) {
468
+ return { ...state, estimatedTokens: assessment.estimatedTokens, done: true }
469
+ }
470
+
461
471
  const compacted = yield* compactContextMessagesEffect({
462
472
  previousSummary: state.summaryText,
463
473
  newMessages: contextMessages,
@@ -465,9 +475,7 @@ export function createContextCompactionRuntime(
465
475
  const rolledSummary = yield* rollupSummaryIfOversizedEffect(normalizeSummary(compacted.summary))
466
476
 
467
477
  if (rolledSummary.length >= sourceText.length) {
468
- return yield* new CompactionError({
469
- message: 'Compaction summary is not shorter than compacted source',
470
- })
478
+ return { ...state, estimatedTokens: assessment.estimatedTokens, done: true }
471
479
  }
472
480
 
473
481
  return yield* stepHistory({
@@ -0,0 +1,189 @@
1
+ import type { ChatMessage } from '@lota-sdk/shared'
2
+
3
+ import { CONTEXT_WINDOW_TOKENS } from './context-compaction/context-compaction-constants'
4
+
5
+ const MODEL_INPUT_MAX_TOTAL_CHARS = 240_000
6
+ const MODEL_INPUT_LATEST_USER_TEXT_MAX_CHARS = 120_000
7
+ const MODEL_INPUT_TEXT_PART_MAX_CHARS = 24_000
8
+ const MODEL_INPUT_TOOL_PAYLOAD_MAX_CHARS = 12_000
9
+ const MODEL_INPUT_DATA_PAYLOAD_MAX_CHARS = 8_000
10
+ const MODEL_INPUT_NOTICE_ID = 'model-input-budget-notice'
11
+
12
+ interface ShapeModelInputMessagesOptions {
13
+ latestUserMessageId?: string
14
+ maxTotalChars?: number
15
+ }
16
+
17
+ function serializedLength(value: unknown): number {
18
+ try {
19
+ const serialized = JSON.stringify(value)
20
+ return typeof serialized === 'string' ? serialized.length : 0
21
+ } catch {
22
+ return String(value).length
23
+ }
24
+ }
25
+
26
+ function stringifyPayload(value: unknown): string | null {
27
+ if (value === null || value === undefined) return null
28
+ if (typeof value === 'string') return value
29
+
30
+ try {
31
+ return JSON.stringify(value)
32
+ } catch {
33
+ return '[unserializable payload]'
34
+ }
35
+ }
36
+
37
+ function trimMiddle(value: string, maxChars: number, notice: string): string {
38
+ if (value.length <= maxChars) return value
39
+
40
+ const marker = `\n\n[${notice}]\n\n`
41
+ const available = Math.max(0, maxChars - marker.length)
42
+ if (available <= 0) return marker.slice(0, maxChars)
43
+
44
+ const headChars = Math.ceil(available * 0.7)
45
+ const tailChars = available - headChars
46
+ const head = value.slice(0, headChars).trimEnd()
47
+ const tail = tailChars > 0 ? value.slice(-tailChars).trimStart() : ''
48
+
49
+ return `${head}${marker}${tail}`
50
+ }
51
+
52
+ function trimTextPart(text: string, maxChars: number): string {
53
+ return trimMiddle(
54
+ text,
55
+ maxChars,
56
+ `Model input truncated this text from ${text.length} characters to fit the request budget. ` +
57
+ 'Only the beginning and end are visible to the model.',
58
+ )
59
+ }
60
+
61
+ export function trimOversizedTextForRequestBudget(text: string, maxChars: number): string {
62
+ return trimTextPart(text, maxChars)
63
+ }
64
+
65
+ function trimPayload(value: unknown, maxChars: number, label: string): unknown {
66
+ const payload = stringifyPayload(value)
67
+ if (!payload || payload.length <= maxChars) return value
68
+
69
+ return trimMiddle(
70
+ payload,
71
+ maxChars,
72
+ `Model input truncated ${label} from ${payload.length} serialized characters to fit the request budget.`,
73
+ )
74
+ }
75
+
76
+ function shapeMessageParts(message: ChatMessage, maxTotalChars: number, latestUserMessageId?: string): ChatMessage {
77
+ const textLimit =
78
+ latestUserMessageId && message.id === latestUserMessageId
79
+ ? Math.min(MODEL_INPUT_LATEST_USER_TEXT_MAX_CHARS, Math.floor(maxTotalChars * 0.7))
80
+ : MODEL_INPUT_TEXT_PART_MAX_CHARS
81
+
82
+ const parts = message.parts.map((part) => {
83
+ if (part.type === 'text' && typeof part.text === 'string' && part.text.length > textLimit) {
84
+ return { ...part, text: trimTextPart(part.text, textLimit) }
85
+ }
86
+
87
+ if (part.type.startsWith('tool-')) {
88
+ const nextPart = { ...part } as Record<string, unknown>
89
+ const originalInput = nextPart.input
90
+ const originalOutput = nextPart.output
91
+ const originalData = nextPart.data
92
+ if (originalInput !== undefined)
93
+ nextPart.input = trimPayload(originalInput, MODEL_INPUT_TOOL_PAYLOAD_MAX_CHARS, 'tool input')
94
+ if (originalOutput !== undefined) {
95
+ nextPart.output = trimPayload(originalOutput, MODEL_INPUT_TOOL_PAYLOAD_MAX_CHARS, 'tool output')
96
+ }
97
+ if (originalData !== undefined)
98
+ nextPart.data = trimPayload(originalData, MODEL_INPUT_DATA_PAYLOAD_MAX_CHARS, 'tool data')
99
+ if (nextPart.input !== originalInput || nextPart.output !== originalOutput || nextPart.data !== originalData) {
100
+ return nextPart as ChatMessage['parts'][number]
101
+ }
102
+ }
103
+
104
+ if (part.type.startsWith('data-') && 'data' in part) {
105
+ const nextPart = { ...part } as Record<string, unknown>
106
+ const originalData = nextPart.data
107
+ nextPart.data = trimPayload(originalData, MODEL_INPUT_DATA_PAYLOAD_MAX_CHARS, 'data payload')
108
+ if (nextPart.data !== originalData) {
109
+ return nextPart as ChatMessage['parts'][number]
110
+ }
111
+ }
112
+
113
+ return part
114
+ })
115
+
116
+ return parts.some((part, index) => part !== message.parts[index]) ? { ...message, parts } : message
117
+ }
118
+
119
+ function createBudgetNotice(omittedCount: number): ChatMessage {
120
+ return {
121
+ id: MODEL_INPUT_NOTICE_ID,
122
+ role: 'system',
123
+ parts: [
124
+ {
125
+ type: 'text',
126
+ text:
127
+ `${omittedCount} older message${omittedCount === 1 ? '' : 's'} were omitted from this model call ` +
128
+ 'because the live thread exceeded the model input budget. Use the compacted summary and latest user request as source of truth.',
129
+ },
130
+ ],
131
+ }
132
+ }
133
+
134
+ function fitMessagesToBudget(
135
+ messages: ChatMessage[],
136
+ maxTotalChars: number,
137
+ latestUserMessageId?: string,
138
+ ): ChatMessage[] {
139
+ const kept: ChatMessage[] = []
140
+ let currentChars = 2
141
+ let omittedCount = 0
142
+
143
+ for (const message of [...messages].reverse()) {
144
+ const messageChars = serializedLength(message) + 1
145
+ const isLatestUserMessage = latestUserMessageId !== undefined && message.id === latestUserMessageId
146
+
147
+ if (currentChars + messageChars <= maxTotalChars || isLatestUserMessage) {
148
+ kept.push(message)
149
+ currentChars += messageChars
150
+ continue
151
+ }
152
+
153
+ omittedCount += 1
154
+ }
155
+
156
+ const ordered = kept.reverse()
157
+ if (omittedCount === 0) return ordered
158
+
159
+ while (
160
+ ordered.length > 0 &&
161
+ serializedLength(ordered) + serializedLength(createBudgetNotice(omittedCount)) + 1 > maxTotalChars
162
+ ) {
163
+ const latestUserIndex = ordered.findIndex(
164
+ (message) => latestUserMessageId !== undefined && message.id === latestUserMessageId,
165
+ )
166
+ const removableIndex = ordered.findIndex((_, index) => index !== latestUserIndex)
167
+ if (removableIndex === -1) break
168
+ ordered.splice(removableIndex, 1)
169
+ omittedCount += 1
170
+ }
171
+
172
+ return [createBudgetNotice(omittedCount), ...ordered]
173
+ }
174
+
175
+ export function shapeModelInputMessagesForBudget(
176
+ messages: ChatMessage[],
177
+ options: ShapeModelInputMessagesOptions = {},
178
+ ): ChatMessage[] {
179
+ const maxTotalChars = options.maxTotalChars ?? Math.min(MODEL_INPUT_MAX_TOTAL_CHARS, CONTEXT_WINDOW_TOKENS * 2)
180
+ const shapedMessages = messages.map((message) =>
181
+ shapeMessageParts(message, maxTotalChars, options.latestUserMessageId),
182
+ )
183
+
184
+ if (serializedLength(shapedMessages) <= maxTotalChars) {
185
+ return shapedMessages
186
+ }
187
+
188
+ return fitMessagesToBudget(shapedMessages, maxTotalChars, options.latestUserMessageId)
189
+ }
@@ -7,6 +7,7 @@ import {
7
7
  } from '../../runtime/chat-attachments'
8
8
  import type { ReadableUploadMetadataLike } from '../../runtime/chat-types'
9
9
  import type { ContextCompactionRuntime } from '../../runtime/context-compaction/context-compaction'
10
+ import { shapeModelInputMessagesForBudget } from '../../runtime/model-input-budget'
10
11
 
11
12
  export function upsertChatHistoryMessage(messages: ChatMessage[], nextMessage: ChatMessage): ChatMessage[] {
12
13
  const existingIndex = messages.findIndex((message) => message.id === nextMessage.id)
@@ -52,11 +53,14 @@ export function createThreadTurnMessageContext(params: {
52
53
  })
53
54
 
54
55
  const buildRunInputMessages = (extraMessages: ChatMessage[] = []): ChatMessage[] =>
55
- buildModelInputMessagesWithUploadMetadata({
56
- messages: [...currentMessages, ...extraMessages],
57
- latestUserMessageId: params.latestUserMessageId,
58
- uploadMetadataText: buildReadableUploadMetadataText(listReadableUploads(extraMessages)),
59
- })
56
+ shapeModelInputMessagesForBudget(
57
+ buildModelInputMessagesWithUploadMetadata({
58
+ messages: [...currentMessages, ...extraMessages],
59
+ latestUserMessageId: params.latestUserMessageId,
60
+ uploadMetadataText: buildReadableUploadMetadataText(listReadableUploads(extraMessages)),
61
+ }),
62
+ { latestUserMessageId: params.latestUserMessageId },
63
+ )
60
64
 
61
65
  return {
62
66
  get currentMessages() {
@@ -43,6 +43,7 @@ import { createWiredContextCompactionRuntime } from '../../runtime/context-compa
43
43
  import { createExecutionPlanInstructionSectionCache, toExecutionPlanCacheError } from '../../runtime/execution-plan'
44
44
  import type { HelperModelRuntime } from '../../runtime/helper-model'
45
45
  import { HelperModelTag } from '../../runtime/helper-model'
46
+ import { trimOversizedTextForRequestBudget } from '../../runtime/model-input-budget'
46
47
  import { runPostTurnSideEffects } from '../../runtime/post-turn-side-effects'
47
48
  import { extractMessageText, toOptionalTrimmedString } from '../../runtime/thread-chat-helpers'
48
49
  import {
@@ -118,6 +119,8 @@ type ThreadTurnPreparationTaggedError =
118
119
 
119
120
  type ThreadTurnRunEffect = Effect.Effect<PreparedThreadTurnResult, ThreadTurnPreparationSurfaceError, never>
120
121
 
122
+ const THREAD_TURN_AUXILIARY_MESSAGE_TEXT_MAX_CHARS = 24_000
123
+
121
124
  type ExecuteThreadTurnRunEffect = Effect.Effect<
122
125
  PreparedThreadTurnResult | void,
123
126
  ThreadTurnPreparationSurfaceError,
@@ -406,12 +409,13 @@ function deriveTurnMessageInputs(params: {
406
409
  ? undefined
407
410
  : (userMessage ?? [...liveHistory].reverse().find((message) => message.role === 'user'))
408
411
 
409
- const messageText =
412
+ const rawMessageText =
410
413
  turnParams.kind === 'planTurn'
411
414
  ? `${turnParams.planTurn.nodeSpec.label}\n${turnParams.planTurn.nodeSpec.objective}\n${turnParams.planTurn.nodeSpec.instructions}`
412
415
  : referenceUserMessage
413
416
  ? extractMessageText(referenceUserMessage).trim()
414
417
  : ''
418
+ const messageText = trimOversizedTextForRequestBudget(rawMessageText, THREAD_TURN_AUXILIARY_MESSAGE_TEXT_MAX_CHARS)
415
419
 
416
420
  return { userMessage, originalMessages, referenceUserMessage, messageText }
417
421
  }