@lota-sdk/core 0.4.44 → 0.4.45
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lota-sdk/core",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.45",
|
|
4
4
|
"files": [
|
|
5
5
|
"src",
|
|
6
6
|
"infrastructure/schema"
|
|
@@ -32,7 +32,7 @@
|
|
|
32
32
|
"@ai-sdk/provider": "^3.0.9",
|
|
33
33
|
"@chat-adapter/slack": "^4.26.0",
|
|
34
34
|
"@chat-adapter/state-ioredis": "^4.26.0",
|
|
35
|
-
"@lota-sdk/shared": "0.4.
|
|
35
|
+
"@lota-sdk/shared": "0.4.45",
|
|
36
36
|
"@mendable/firecrawl-js": "^4.20.0",
|
|
37
37
|
"@surrealdb/node": "^3.0.3",
|
|
38
38
|
"ai": "^6.0.170",
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
import type { ChatMessage } from '@lota-sdk/shared'
|
|
2
|
+
|
|
3
|
+
import { CONTEXT_WINDOW_TOKENS } from './context-compaction/context-compaction-constants'
|
|
4
|
+
|
|
5
|
+
const MODEL_INPUT_MAX_TOTAL_CHARS = 240_000
|
|
6
|
+
const MODEL_INPUT_LATEST_USER_TEXT_MAX_CHARS = 120_000
|
|
7
|
+
const MODEL_INPUT_TEXT_PART_MAX_CHARS = 24_000
|
|
8
|
+
const MODEL_INPUT_TOOL_PAYLOAD_MAX_CHARS = 12_000
|
|
9
|
+
const MODEL_INPUT_DATA_PAYLOAD_MAX_CHARS = 8_000
|
|
10
|
+
const MODEL_INPUT_NOTICE_ID = 'model-input-budget-notice'
|
|
11
|
+
|
|
12
|
+
interface ShapeModelInputMessagesOptions {
|
|
13
|
+
latestUserMessageId?: string
|
|
14
|
+
maxTotalChars?: number
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
function serializedLength(value: unknown): number {
|
|
18
|
+
try {
|
|
19
|
+
const serialized = JSON.stringify(value)
|
|
20
|
+
return typeof serialized === 'string' ? serialized.length : 0
|
|
21
|
+
} catch {
|
|
22
|
+
return String(value).length
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function stringifyPayload(value: unknown): string | null {
|
|
27
|
+
if (value === null || value === undefined) return null
|
|
28
|
+
if (typeof value === 'string') return value
|
|
29
|
+
|
|
30
|
+
try {
|
|
31
|
+
return JSON.stringify(value)
|
|
32
|
+
} catch {
|
|
33
|
+
return '[unserializable payload]'
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function trimMiddle(value: string, maxChars: number, notice: string): string {
|
|
38
|
+
if (value.length <= maxChars) return value
|
|
39
|
+
|
|
40
|
+
const marker = `\n\n[${notice}]\n\n`
|
|
41
|
+
const available = Math.max(0, maxChars - marker.length)
|
|
42
|
+
if (available <= 0) return marker.slice(0, maxChars)
|
|
43
|
+
|
|
44
|
+
const headChars = Math.ceil(available * 0.7)
|
|
45
|
+
const tailChars = available - headChars
|
|
46
|
+
const head = value.slice(0, headChars).trimEnd()
|
|
47
|
+
const tail = tailChars > 0 ? value.slice(-tailChars).trimStart() : ''
|
|
48
|
+
|
|
49
|
+
return `${head}${marker}${tail}`
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function trimTextPart(text: string, maxChars: number): string {
|
|
53
|
+
return trimMiddle(
|
|
54
|
+
text,
|
|
55
|
+
maxChars,
|
|
56
|
+
`Model input truncated this text from ${text.length} characters to fit the request budget. ` +
|
|
57
|
+
'Only the beginning and end are visible to the model.',
|
|
58
|
+
)
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
export function trimOversizedTextForRequestBudget(text: string, maxChars: number): string {
|
|
62
|
+
return trimTextPart(text, maxChars)
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
function trimPayload(value: unknown, maxChars: number, label: string): unknown {
|
|
66
|
+
const payload = stringifyPayload(value)
|
|
67
|
+
if (!payload || payload.length <= maxChars) return value
|
|
68
|
+
|
|
69
|
+
return trimMiddle(
|
|
70
|
+
payload,
|
|
71
|
+
maxChars,
|
|
72
|
+
`Model input truncated ${label} from ${payload.length} serialized characters to fit the request budget.`,
|
|
73
|
+
)
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
function shapeMessageParts(message: ChatMessage, maxTotalChars: number, latestUserMessageId?: string): ChatMessage {
|
|
77
|
+
const textLimit =
|
|
78
|
+
latestUserMessageId && message.id === latestUserMessageId
|
|
79
|
+
? Math.min(MODEL_INPUT_LATEST_USER_TEXT_MAX_CHARS, Math.floor(maxTotalChars * 0.7))
|
|
80
|
+
: MODEL_INPUT_TEXT_PART_MAX_CHARS
|
|
81
|
+
|
|
82
|
+
const parts = message.parts.map((part) => {
|
|
83
|
+
if (part.type === 'text' && typeof part.text === 'string' && part.text.length > textLimit) {
|
|
84
|
+
return { ...part, text: trimTextPart(part.text, textLimit) }
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
if (part.type.startsWith('tool-')) {
|
|
88
|
+
const nextPart = { ...part } as Record<string, unknown>
|
|
89
|
+
const originalInput = nextPart.input
|
|
90
|
+
const originalOutput = nextPart.output
|
|
91
|
+
const originalData = nextPart.data
|
|
92
|
+
if (originalInput !== undefined)
|
|
93
|
+
nextPart.input = trimPayload(originalInput, MODEL_INPUT_TOOL_PAYLOAD_MAX_CHARS, 'tool input')
|
|
94
|
+
if (originalOutput !== undefined) {
|
|
95
|
+
nextPart.output = trimPayload(originalOutput, MODEL_INPUT_TOOL_PAYLOAD_MAX_CHARS, 'tool output')
|
|
96
|
+
}
|
|
97
|
+
if (originalData !== undefined)
|
|
98
|
+
nextPart.data = trimPayload(originalData, MODEL_INPUT_DATA_PAYLOAD_MAX_CHARS, 'tool data')
|
|
99
|
+
if (nextPart.input !== originalInput || nextPart.output !== originalOutput || nextPart.data !== originalData) {
|
|
100
|
+
return nextPart as ChatMessage['parts'][number]
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
if (part.type.startsWith('data-') && 'data' in part) {
|
|
105
|
+
const nextPart = { ...part } as Record<string, unknown>
|
|
106
|
+
const originalData = nextPart.data
|
|
107
|
+
nextPart.data = trimPayload(originalData, MODEL_INPUT_DATA_PAYLOAD_MAX_CHARS, 'data payload')
|
|
108
|
+
if (nextPart.data !== originalData) {
|
|
109
|
+
return nextPart as ChatMessage['parts'][number]
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
return part
|
|
114
|
+
})
|
|
115
|
+
|
|
116
|
+
return parts.some((part, index) => part !== message.parts[index]) ? { ...message, parts } : message
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
function createBudgetNotice(omittedCount: number): ChatMessage {
|
|
120
|
+
return {
|
|
121
|
+
id: MODEL_INPUT_NOTICE_ID,
|
|
122
|
+
role: 'system',
|
|
123
|
+
parts: [
|
|
124
|
+
{
|
|
125
|
+
type: 'text',
|
|
126
|
+
text:
|
|
127
|
+
`${omittedCount} older message${omittedCount === 1 ? '' : 's'} were omitted from this model call ` +
|
|
128
|
+
'because the live thread exceeded the model input budget. Use the compacted summary and latest user request as source of truth.',
|
|
129
|
+
},
|
|
130
|
+
],
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
function fitMessagesToBudget(
|
|
135
|
+
messages: ChatMessage[],
|
|
136
|
+
maxTotalChars: number,
|
|
137
|
+
latestUserMessageId?: string,
|
|
138
|
+
): ChatMessage[] {
|
|
139
|
+
const kept: ChatMessage[] = []
|
|
140
|
+
let currentChars = 2
|
|
141
|
+
let omittedCount = 0
|
|
142
|
+
|
|
143
|
+
for (const message of [...messages].reverse()) {
|
|
144
|
+
const messageChars = serializedLength(message) + 1
|
|
145
|
+
const isLatestUserMessage = latestUserMessageId !== undefined && message.id === latestUserMessageId
|
|
146
|
+
|
|
147
|
+
if (currentChars + messageChars <= maxTotalChars || isLatestUserMessage) {
|
|
148
|
+
kept.push(message)
|
|
149
|
+
currentChars += messageChars
|
|
150
|
+
continue
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
omittedCount += 1
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
const ordered = kept.reverse()
|
|
157
|
+
if (omittedCount === 0) return ordered
|
|
158
|
+
|
|
159
|
+
while (
|
|
160
|
+
ordered.length > 0 &&
|
|
161
|
+
serializedLength(ordered) + serializedLength(createBudgetNotice(omittedCount)) + 1 > maxTotalChars
|
|
162
|
+
) {
|
|
163
|
+
const latestUserIndex = ordered.findIndex(
|
|
164
|
+
(message) => latestUserMessageId !== undefined && message.id === latestUserMessageId,
|
|
165
|
+
)
|
|
166
|
+
const removableIndex = ordered.findIndex((_, index) => index !== latestUserIndex)
|
|
167
|
+
if (removableIndex === -1) break
|
|
168
|
+
ordered.splice(removableIndex, 1)
|
|
169
|
+
omittedCount += 1
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
return [createBudgetNotice(omittedCount), ...ordered]
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
export function shapeModelInputMessagesForBudget(
|
|
176
|
+
messages: ChatMessage[],
|
|
177
|
+
options: ShapeModelInputMessagesOptions = {},
|
|
178
|
+
): ChatMessage[] {
|
|
179
|
+
const maxTotalChars = options.maxTotalChars ?? Math.min(MODEL_INPUT_MAX_TOTAL_CHARS, CONTEXT_WINDOW_TOKENS * 2)
|
|
180
|
+
const shapedMessages = messages.map((message) =>
|
|
181
|
+
shapeMessageParts(message, maxTotalChars, options.latestUserMessageId),
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
if (serializedLength(shapedMessages) <= maxTotalChars) {
|
|
185
|
+
return shapedMessages
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
return fitMessagesToBudget(shapedMessages, maxTotalChars, options.latestUserMessageId)
|
|
189
|
+
}
|
|
@@ -7,6 +7,7 @@ import {
|
|
|
7
7
|
} from '../../runtime/chat-attachments'
|
|
8
8
|
import type { ReadableUploadMetadataLike } from '../../runtime/chat-types'
|
|
9
9
|
import type { ContextCompactionRuntime } from '../../runtime/context-compaction/context-compaction'
|
|
10
|
+
import { shapeModelInputMessagesForBudget } from '../../runtime/model-input-budget'
|
|
10
11
|
|
|
11
12
|
export function upsertChatHistoryMessage(messages: ChatMessage[], nextMessage: ChatMessage): ChatMessage[] {
|
|
12
13
|
const existingIndex = messages.findIndex((message) => message.id === nextMessage.id)
|
|
@@ -52,11 +53,14 @@ export function createThreadTurnMessageContext(params: {
|
|
|
52
53
|
})
|
|
53
54
|
|
|
54
55
|
const buildRunInputMessages = (extraMessages: ChatMessage[] = []): ChatMessage[] =>
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
56
|
+
shapeModelInputMessagesForBudget(
|
|
57
|
+
buildModelInputMessagesWithUploadMetadata({
|
|
58
|
+
messages: [...currentMessages, ...extraMessages],
|
|
59
|
+
latestUserMessageId: params.latestUserMessageId,
|
|
60
|
+
uploadMetadataText: buildReadableUploadMetadataText(listReadableUploads(extraMessages)),
|
|
61
|
+
}),
|
|
62
|
+
{ latestUserMessageId: params.latestUserMessageId },
|
|
63
|
+
)
|
|
60
64
|
|
|
61
65
|
return {
|
|
62
66
|
get currentMessages() {
|
|
@@ -43,6 +43,7 @@ import { createWiredContextCompactionRuntime } from '../../runtime/context-compa
|
|
|
43
43
|
import { createExecutionPlanInstructionSectionCache, toExecutionPlanCacheError } from '../../runtime/execution-plan'
|
|
44
44
|
import type { HelperModelRuntime } from '../../runtime/helper-model'
|
|
45
45
|
import { HelperModelTag } from '../../runtime/helper-model'
|
|
46
|
+
import { trimOversizedTextForRequestBudget } from '../../runtime/model-input-budget'
|
|
46
47
|
import { runPostTurnSideEffects } from '../../runtime/post-turn-side-effects'
|
|
47
48
|
import { extractMessageText, toOptionalTrimmedString } from '../../runtime/thread-chat-helpers'
|
|
48
49
|
import {
|
|
@@ -118,6 +119,8 @@ type ThreadTurnPreparationTaggedError =
|
|
|
118
119
|
|
|
119
120
|
type ThreadTurnRunEffect = Effect.Effect<PreparedThreadTurnResult, ThreadTurnPreparationSurfaceError, never>
|
|
120
121
|
|
|
122
|
+
const THREAD_TURN_AUXILIARY_MESSAGE_TEXT_MAX_CHARS = 24_000
|
|
123
|
+
|
|
121
124
|
type ExecuteThreadTurnRunEffect = Effect.Effect<
|
|
122
125
|
PreparedThreadTurnResult | void,
|
|
123
126
|
ThreadTurnPreparationSurfaceError,
|
|
@@ -406,12 +409,13 @@ function deriveTurnMessageInputs(params: {
|
|
|
406
409
|
? undefined
|
|
407
410
|
: (userMessage ?? [...liveHistory].reverse().find((message) => message.role === 'user'))
|
|
408
411
|
|
|
409
|
-
const
|
|
412
|
+
const rawMessageText =
|
|
410
413
|
turnParams.kind === 'planTurn'
|
|
411
414
|
? `${turnParams.planTurn.nodeSpec.label}\n${turnParams.planTurn.nodeSpec.objective}\n${turnParams.planTurn.nodeSpec.instructions}`
|
|
412
415
|
: referenceUserMessage
|
|
413
416
|
? extractMessageText(referenceUserMessage).trim()
|
|
414
417
|
: ''
|
|
418
|
+
const messageText = trimOversizedTextForRequestBudget(rawMessageText, THREAD_TURN_AUXILIARY_MESSAGE_TEXT_MAX_CHARS)
|
|
415
419
|
|
|
416
420
|
return { userMessage, originalMessages, referenceUserMessage, messageText }
|
|
417
421
|
}
|