@llumiverse/core 0.23.0 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,497 @@
1
+ /**
2
+ * Utilities for cleaning up conversation objects before storage.
3
+ *
4
+ * These functions strip binary data (Uint8Array) and large base64 strings
5
+ * from conversation objects to prevent JSON.stringify corruption and reduce
6
+ * storage bloat.
7
+ *
8
+ * IMPORTANT: These functions replace entire image/document/video BLOCKS with
9
+ * text placeholders, not just the data. This ensures the conversation remains
10
+ * valid for subsequent API calls.
11
+ */
12
+
13
+ const IMAGE_PLACEHOLDER = '[Image removed from conversation history]';
14
+ const DOCUMENT_PLACEHOLDER = '[Document removed from conversation history]';
15
+ const VIDEO_PLACEHOLDER = '[Video removed from conversation history]';
16
+ const TEXT_TRUNCATED_MARKER = '\n\n[Content truncated - exceeded token limit]';
17
+
18
+ /** Metadata key used to store turn information in conversations */
19
+ const META_KEY = '_llumiverse_meta';
20
+
21
+ /**
22
+ * Metadata stored in conversation objects to track turn numbers for deferred image stripping.
23
+ */
24
+ export interface ConversationMeta {
25
+ /** Current turn number (incremented each time a message is added) */
26
+ turnNumber: number;
27
+ }
28
+
29
+ /**
30
+ * Options for stripping functions
31
+ */
32
+ export interface StripOptions {
33
+ /**
34
+ * Number of turns to keep images before stripping.
35
+ * - 0 or undefined: Strip immediately (default)
36
+ * - N > 0: Keep images for N turns, then strip
37
+ */
38
+ keepForTurns?: number;
39
+ /**
40
+ * Current turn number. Used with keepForTurns to determine when to strip.
41
+ * If not provided, will be read from conversation metadata.
42
+ */
43
+ currentTurn?: number;
44
+ /**
45
+ * Maximum tokens for text content in tool results.
46
+ * Text exceeding this limit will be truncated.
47
+ * - undefined/0: No truncation (default)
48
+ * - N > 0: Truncate text to approximately N tokens (~4 chars/token)
49
+ */
50
+ textMaxTokens?: number;
51
+ }
52
+
53
+ /**
54
+ * Check if an object is a Bedrock image block: { image: { source: { bytes: Uint8Array } } }
55
+ */
56
+ function isBedrockImageBlock(obj: unknown): boolean {
57
+ if (typeof obj !== 'object' || obj === null) return false;
58
+ const o = obj as Record<string, unknown>;
59
+ if (!o.image || typeof o.image !== 'object') return false;
60
+ const img = o.image as Record<string, unknown>;
61
+ if (!img.source || typeof img.source !== 'object') return false;
62
+ const src = img.source as Record<string, unknown>;
63
+ return src.bytes instanceof Uint8Array;
64
+ }
65
+
66
+ /**
67
+ * Check if an object is a serialized Bedrock image block (Uint8Array converted to base64)
68
+ */
69
+ function isSerializedBedrockImageBlock(obj: unknown): boolean {
70
+ if (typeof obj !== 'object' || obj === null) return false;
71
+ const o = obj as Record<string, unknown>;
72
+ if (!o.image || typeof o.image !== 'object') return false;
73
+ const img = o.image as Record<string, unknown>;
74
+ if (!img.source || typeof img.source !== 'object') return false;
75
+ const src = img.source as Record<string, unknown>;
76
+ // Check for our serialized format: bytes: { _base64: string }
77
+ if (!src.bytes || typeof src.bytes !== 'object') return false;
78
+ const bytes = src.bytes as Record<string, unknown>;
79
+ return typeof bytes._base64 === 'string';
80
+ }
81
+
82
+ /**
83
+ * Check if an object is a Bedrock document block: { document: { source: { bytes: Uint8Array } } }
84
+ */
85
+ function isBedrockDocumentBlock(obj: unknown): boolean {
86
+ if (typeof obj !== 'object' || obj === null) return false;
87
+ const o = obj as Record<string, unknown>;
88
+ if (!o.document || typeof o.document !== 'object') return false;
89
+ const doc = o.document as Record<string, unknown>;
90
+ if (!doc.source || typeof doc.source !== 'object') return false;
91
+ const src = doc.source as Record<string, unknown>;
92
+ return src.bytes instanceof Uint8Array;
93
+ }
94
+
95
+ /**
96
+ * Check if an object is a serialized Bedrock document block
97
+ */
98
+ function isSerializedBedrockDocumentBlock(obj: unknown): boolean {
99
+ if (typeof obj !== 'object' || obj === null) return false;
100
+ const o = obj as Record<string, unknown>;
101
+ if (!o.document || typeof o.document !== 'object') return false;
102
+ const doc = o.document as Record<string, unknown>;
103
+ if (!doc.source || typeof doc.source !== 'object') return false;
104
+ const src = doc.source as Record<string, unknown>;
105
+ // Check for our serialized format: bytes: { _base64: string }
106
+ if (!src.bytes || typeof src.bytes !== 'object') return false;
107
+ const bytes = src.bytes as Record<string, unknown>;
108
+ return typeof bytes._base64 === 'string';
109
+ }
110
+
111
+ /**
112
+ * Check if an object is a Bedrock video block: { video: { source: { bytes: Uint8Array } } }
113
+ */
114
+ function isBedrockVideoBlock(obj: unknown): boolean {
115
+ if (typeof obj !== 'object' || obj === null) return false;
116
+ const o = obj as Record<string, unknown>;
117
+ if (!o.video || typeof o.video !== 'object') return false;
118
+ const vid = o.video as Record<string, unknown>;
119
+ if (!vid.source || typeof vid.source !== 'object') return false;
120
+ const src = vid.source as Record<string, unknown>;
121
+ return src.bytes instanceof Uint8Array;
122
+ }
123
+
124
+ /**
125
+ * Check if an object is a serialized Bedrock video block
126
+ */
127
+ function isSerializedBedrockVideoBlock(obj: unknown): boolean {
128
+ if (typeof obj !== 'object' || obj === null) return false;
129
+ const o = obj as Record<string, unknown>;
130
+ if (!o.video || typeof o.video !== 'object') return false;
131
+ const vid = o.video as Record<string, unknown>;
132
+ if (!vid.source || typeof vid.source !== 'object') return false;
133
+ const src = vid.source as Record<string, unknown>;
134
+ // Check for our serialized format: bytes: { _base64: string }
135
+ if (!src.bytes || typeof src.bytes !== 'object') return false;
136
+ const bytes = src.bytes as Record<string, unknown>;
137
+ return typeof bytes._base64 === 'string';
138
+ }
139
+
140
+ /**
141
+ * Check if an object is an OpenAI image_url block with base64 data
142
+ */
143
+ function isOpenAIBase64ImageBlock(obj: unknown): boolean {
144
+ if (typeof obj !== 'object' || obj === null) return false;
145
+ const o = obj as Record<string, unknown>;
146
+ if (o.type !== 'image_url') return false;
147
+ if (!o.image_url || typeof o.image_url !== 'object') return false;
148
+ const imgUrl = o.image_url as Record<string, unknown>;
149
+ return typeof imgUrl.url === 'string' &&
150
+ imgUrl.url.startsWith('data:image/') &&
151
+ imgUrl.url.includes(';base64,');
152
+ }
153
+
154
+ /**
155
+ * Check if an object is a Gemini inlineData block with large base64 data
156
+ */
157
+ function isGeminiInlineDataBlock(obj: unknown): boolean {
158
+ if (typeof obj !== 'object' || obj === null) return false;
159
+ const o = obj as Record<string, unknown>;
160
+ if (!o.inlineData || typeof o.inlineData !== 'object') return false;
161
+ const inlineData = o.inlineData as Record<string, unknown>;
162
+ return typeof inlineData.data === 'string' && (inlineData.data as string).length > 1000;
163
+ }
164
+
165
+ /**
166
+ * Convert Uint8Array to base64 string for safe JSON serialization.
167
+ */
168
+ function uint8ArrayToBase64(bytes: Uint8Array): string {
169
+ let binary = '';
170
+ for (let i = 0; i < bytes.byteLength; i++) {
171
+ binary += String.fromCharCode(bytes[i]);
172
+ }
173
+ return btoa(binary);
174
+ }
175
+
176
+ /**
177
+ * Convert base64 string back to Uint8Array.
178
+ */
179
+ function base64ToUint8Array(base64: string): Uint8Array {
180
+ const binary = atob(base64);
181
+ const bytes = new Uint8Array(binary.length);
182
+ for (let i = 0; i < binary.length; i++) {
183
+ bytes[i] = binary.charCodeAt(i);
184
+ }
185
+ return bytes;
186
+ }
187
+
188
+ /**
189
+ * Get metadata from a conversation object, or return defaults.
190
+ */
191
+ export function getConversationMeta(conversation: unknown): ConversationMeta {
192
+ if (typeof conversation === 'object' && conversation !== null) {
193
+ const meta = (conversation as Record<string, unknown>)[META_KEY];
194
+ if (meta && typeof meta === 'object') {
195
+ return meta as ConversationMeta;
196
+ }
197
+ }
198
+ return { turnNumber: 0 };
199
+ }
200
+
201
+ /** Key used to wrap array conversations to preserve their type through JSON serialization */
202
+ const ARRAY_WRAPPER_KEY = '_arrayConversation';
203
+
204
+ /**
205
+ * Set metadata on a conversation object.
206
+ * Arrays are wrapped in an object to preserve their type through JSON serialization.
207
+ */
208
+ export function setConversationMeta(conversation: unknown, meta: ConversationMeta): unknown {
209
+ if (Array.isArray(conversation)) {
210
+ // Wrap arrays in an object to preserve their array nature through JSON serialization
211
+ return { [ARRAY_WRAPPER_KEY]: conversation, [META_KEY]: meta };
212
+ }
213
+ if (typeof conversation === 'object' && conversation !== null) {
214
+ return { ...conversation as object, [META_KEY]: meta };
215
+ }
216
+ return conversation;
217
+ }
218
+
219
+ /**
220
+ * Unwrap a conversation array that was wrapped by setConversationMeta.
221
+ * If the conversation is not a wrapped array, returns undefined.
222
+ * Use this to extract the actual message array from a conversation object.
223
+ */
224
+ export function unwrapConversationArray<T = unknown>(conversation: unknown): T[] | undefined {
225
+ if (typeof conversation === 'object' && conversation !== null) {
226
+ const c = conversation as Record<string, unknown>;
227
+ if (Array.isArray(c[ARRAY_WRAPPER_KEY])) {
228
+ return c[ARRAY_WRAPPER_KEY] as T[];
229
+ }
230
+ }
231
+ return undefined;
232
+ }
233
+
234
+ /**
235
+ * Increment the turn number in a conversation and return the updated conversation.
236
+ */
237
+ export function incrementConversationTurn(conversation: unknown): unknown {
238
+ const meta = getConversationMeta(conversation);
239
+ return setConversationMeta(conversation, { ...meta, turnNumber: meta.turnNumber + 1 });
240
+ }
241
+
242
+ /**
243
+ * Strip binary data (Uint8Array) from conversation to prevent JSON.stringify corruption.
244
+ *
245
+ * When Uint8Array is passed through JSON.stringify, it gets corrupted into an object
246
+ * like { "0": 137, "1": 80, ... } instead of proper binary data. This breaks
247
+ * subsequent API calls that expect binary data.
248
+ *
249
+ * This function either:
250
+ * - Strips images immediately (keepForTurns = 0, default)
251
+ * - Serializes images to base64 for safe storage, then strips after N turns
252
+ *
253
+ * @param obj The conversation object to strip binary data from
254
+ * @param options Optional settings for turn-based stripping
255
+ * @returns A new object with binary content handled appropriately
256
+ */
257
+ export function stripBinaryFromConversation(obj: unknown, options?: StripOptions): unknown {
258
+ const { keepForTurns = 0 } = options ?? {};
259
+ const currentTurn = options?.currentTurn ?? getConversationMeta(obj).turnNumber;
260
+
261
+ // If we should keep images and haven't exceeded the turn threshold,
262
+ // serialize Uint8Array to base64 for safe JSON storage
263
+ if (keepForTurns > 0 && currentTurn < keepForTurns) {
264
+ return serializeBinaryForStorage(obj);
265
+ }
266
+
267
+ // Strip all binary/serialized images
268
+ return stripBinaryFromConversationInternal(obj);
269
+ }
270
+
271
+ /**
272
+ * Serialize Uint8Array to base64 for safe JSON storage, preserving the image structure.
273
+ */
274
+ function serializeBinaryForStorage(obj: unknown): unknown {
275
+ if (obj === null || obj === undefined) return obj;
276
+
277
+ if (obj instanceof Uint8Array) {
278
+ return { _base64: uint8ArrayToBase64(obj) };
279
+ }
280
+
281
+ if (Array.isArray(obj)) {
282
+ return obj.map(item => serializeBinaryForStorage(item));
283
+ }
284
+
285
+ if (typeof obj === 'object') {
286
+ const result: Record<string, unknown> = {};
287
+ for (const [key, value] of Object.entries(obj)) {
288
+ result[key] = serializeBinaryForStorage(value);
289
+ }
290
+ return result;
291
+ }
292
+
293
+ return obj;
294
+ }
295
+
296
+ /**
297
+ * Restore Uint8Array from base64 serialization.
298
+ * Call this before sending conversation to API if images were preserved.
299
+ */
300
+ export function deserializeBinaryFromStorage(obj: unknown): unknown {
301
+ if (obj === null || obj === undefined) return obj;
302
+
303
+ // Check for our serialized format
304
+ if (typeof obj === 'object' && obj !== null) {
305
+ const o = obj as Record<string, unknown>;
306
+ if (typeof o._base64 === 'string' && Object.keys(o).length === 1) {
307
+ return base64ToUint8Array(o._base64);
308
+ }
309
+ }
310
+
311
+ if (Array.isArray(obj)) {
312
+ return obj.map(item => deserializeBinaryFromStorage(item));
313
+ }
314
+
315
+ if (typeof obj === 'object') {
316
+ const result: Record<string, unknown> = {};
317
+ for (const [key, value] of Object.entries(obj)) {
318
+ result[key] = deserializeBinaryFromStorage(value);
319
+ }
320
+ return result;
321
+ }
322
+
323
+ return obj;
324
+ }
325
+
326
+ function stripBinaryFromConversationInternal(obj: unknown): unknown {
327
+ if (obj === null || obj === undefined) return obj;
328
+
329
+ // Handle Uint8Array directly
330
+ if (obj instanceof Uint8Array) {
331
+ return IMAGE_PLACEHOLDER;
332
+ }
333
+
334
+ // Handle our serialized format
335
+ if (typeof obj === 'object' && obj !== null) {
336
+ const o = obj as Record<string, unknown>;
337
+ if (typeof o._base64 === 'string' && Object.keys(o).length === 1) {
338
+ return IMAGE_PLACEHOLDER;
339
+ }
340
+ }
341
+
342
+ if (Array.isArray(obj)) {
343
+ return obj.map(item => {
344
+ // Replace entire Bedrock image/document/video blocks with text blocks
345
+ if (isBedrockImageBlock(item) || isSerializedBedrockImageBlock(item)) {
346
+ return { text: IMAGE_PLACEHOLDER };
347
+ }
348
+ if (isBedrockDocumentBlock(item) || isSerializedBedrockDocumentBlock(item)) {
349
+ return { text: DOCUMENT_PLACEHOLDER };
350
+ }
351
+ if (isBedrockVideoBlock(item) || isSerializedBedrockVideoBlock(item)) {
352
+ return { text: VIDEO_PLACEHOLDER };
353
+ }
354
+ return stripBinaryFromConversationInternal(item);
355
+ });
356
+ }
357
+
358
+ if (typeof obj === 'object') {
359
+ const result: Record<string, unknown> = {};
360
+ for (const [key, value] of Object.entries(obj)) {
361
+ // Preserve metadata
362
+ if (key === META_KEY) {
363
+ result[key] = value;
364
+ } else {
365
+ result[key] = stripBinaryFromConversationInternal(value);
366
+ }
367
+ }
368
+ return result;
369
+ }
370
+
371
+ return obj;
372
+ }
373
+
374
+ /**
375
+ * Strip large base64 image data from conversation to reduce storage bloat.
376
+ *
377
+ * While base64 strings survive JSON.stringify (unlike Uint8Array), they can
378
+ * significantly bloat conversation storage. This function replaces entire
379
+ * image blocks with text placeholders:
380
+ * - OpenAI: { type: "image_url", image_url: { url: "data:..." } } → { type: "text", text: "[placeholder]" }
381
+ * - Gemini: { inlineData: { data: "...", mimeType: "..." } } → { text: "[placeholder]" }
382
+ *
383
+ * @param obj The conversation object to strip base64 images from
384
+ * @param options Optional settings for turn-based stripping
385
+ * @returns A new object with image blocks replaced with text placeholders
386
+ */
387
+ export function stripBase64ImagesFromConversation(obj: unknown, options?: StripOptions): unknown {
388
+ const { keepForTurns = 0 } = options ?? {};
389
+ const currentTurn = options?.currentTurn ?? getConversationMeta(obj).turnNumber;
390
+
391
+ // If we should keep images and haven't exceeded the turn threshold, don't strip
392
+ // (base64 strings are already safe for JSON serialization)
393
+ if (keepForTurns > 0 && currentTurn < keepForTurns) {
394
+ return obj;
395
+ }
396
+
397
+ return stripBase64ImagesFromConversationInternal(obj);
398
+ }
399
+
400
+ function stripBase64ImagesFromConversationInternal(obj: unknown): unknown {
401
+ if (obj === null || obj === undefined) return obj;
402
+
403
+ // Handle base64 data URL string directly
404
+ if (typeof obj === 'string' && obj.startsWith('data:image/') && obj.includes(';base64,')) {
405
+ return IMAGE_PLACEHOLDER;
406
+ }
407
+
408
+ if (Array.isArray(obj)) {
409
+ return obj.map(item => {
410
+ // Replace entire OpenAI image_url blocks with text blocks
411
+ if (isOpenAIBase64ImageBlock(item)) {
412
+ return { type: 'text', text: IMAGE_PLACEHOLDER };
413
+ }
414
+ // Replace entire Gemini inlineData blocks with text blocks
415
+ if (isGeminiInlineDataBlock(item)) {
416
+ return { text: IMAGE_PLACEHOLDER };
417
+ }
418
+ return stripBase64ImagesFromConversationInternal(item);
419
+ });
420
+ }
421
+
422
+ if (typeof obj === 'object') {
423
+ const result: Record<string, unknown> = {};
424
+ for (const [key, value] of Object.entries(obj)) {
425
+ // Preserve metadata
426
+ if (key === META_KEY) {
427
+ result[key] = value;
428
+ } else {
429
+ result[key] = stripBase64ImagesFromConversationInternal(value);
430
+ }
431
+ }
432
+ return result;
433
+ }
434
+
435
+ return obj;
436
+ }
437
+
438
+ /** Approximate characters per token for text truncation */
439
+ const CHARS_PER_TOKEN = 4;
440
+
441
+ /**
442
+ * Truncate large text content in conversation to reduce storage and context bloat.
443
+ *
444
+ * This function finds text strings in tool results and truncates them if they
445
+ * exceed the specified token limit (using ~4 chars/token estimate).
446
+ *
447
+ * Works with:
448
+ * - Bedrock: toolResult.content[].text
449
+ * - OpenAI: tool message content (string)
450
+ * - Gemini: function response content
451
+ *
452
+ * @param obj The conversation object to truncate text in
453
+ * @param options Options including textMaxTokens
454
+ * @returns A new object with large text content truncated
455
+ */
456
+ export function truncateLargeTextInConversation(obj: unknown, options?: StripOptions): unknown {
457
+ const maxTokens = options?.textMaxTokens;
458
+
459
+ // If no max tokens specified or 0, don't truncate
460
+ if (!maxTokens || maxTokens <= 0) {
461
+ return obj;
462
+ }
463
+
464
+ const maxChars = maxTokens * CHARS_PER_TOKEN;
465
+ return truncateLargeTextInternal(obj, maxChars);
466
+ }
467
+
468
+ function truncateLargeTextInternal(obj: unknown, maxChars: number): unknown {
469
+ if (obj === null || obj === undefined) return obj;
470
+
471
+ // Truncate large strings
472
+ if (typeof obj === 'string') {
473
+ if (obj.length > maxChars) {
474
+ return obj.substring(0, maxChars) + TEXT_TRUNCATED_MARKER;
475
+ }
476
+ return obj;
477
+ }
478
+
479
+ if (Array.isArray(obj)) {
480
+ return obj.map(item => truncateLargeTextInternal(item, maxChars));
481
+ }
482
+
483
+ if (typeof obj === 'object') {
484
+ const result: Record<string, unknown> = {};
485
+ for (const [key, value] of Object.entries(obj)) {
486
+ // Preserve metadata without truncation
487
+ if (key === META_KEY) {
488
+ result[key] = value;
489
+ } else {
490
+ result[key] = truncateLargeTextInternal(value, maxChars);
491
+ }
492
+ }
493
+ return result;
494
+ }
495
+
496
+ return obj;
497
+ }
package/src/index.ts CHANGED
@@ -1,4 +1,5 @@
1
1
  export * from "./Driver.js";
2
2
  export * from "./json.js";
3
3
  export * from "./stream.js";
4
+ export * from "./conversation-utils.js";
4
5
  export * from "@llumiverse/common";
package/src/stream.ts CHANGED
@@ -15,6 +15,7 @@ export async function readStreamAsUint8Array(stream: ReadableStream): Promise<Ui
15
15
 
16
16
  for await (const chunk of stream) {
17
17
  const uint8Chunk = chunk instanceof Uint8Array ? chunk : new Uint8Array(chunk);
18
+
18
19
  chunks.push(uint8Chunk);
19
20
  totalLength += uint8Chunk.length;
20
21
  }