@persistio/openclaw-plugin 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -7,8 +7,13 @@ import type {
7
7
  } from 'openclaw/plugin-sdk/memory-core-host-engine-storage';
8
8
  import { Type } from '@sinclair/typebox';
9
9
  import { PersistioClient, type PersistioConfig, type PersistioMemory, type RecallBundle } from './client.js';
10
-
11
- type OpenClawMessageRole = 'user' | 'assistant' | 'tool';
10
+ import {
11
+ prepareMessageForIngest,
12
+ resolveIngestPolicy,
13
+ shouldIngestSession,
14
+ type OpenClawMessageRole,
15
+ type OmissionSummary,
16
+ } from './ingest-policy.js';
12
17
 
13
18
  interface SessionMessageKeyStore {
14
19
  keys: Set<string>;
@@ -43,6 +48,12 @@ function resolveSendConfig(raw: Record<string, unknown>): PersistioConfig['send'
43
48
  };
44
49
  }
45
50
 
51
+ function resolveRecallMinSimilarity(value: unknown): number | undefined {
52
+ return typeof value === 'number' && Number.isFinite(value) && value >= 0 && value <= 1
53
+ ? value
54
+ : undefined;
55
+ }
56
+
46
57
  function resolveConfig(raw: unknown): PersistioConfig {
47
58
  const c = (raw ?? {}) as Record<string, unknown>;
48
59
  return {
@@ -50,7 +61,9 @@ function resolveConfig(raw: unknown): PersistioConfig {
50
61
  apiKey: typeof c['apiKey'] === 'string' ? c['apiKey'] : '',
51
62
  tokenBudget: typeof c['tokenBudget'] === 'number' ? c['tokenBudget'] : 2000,
52
63
  recallTopK: typeof c['recallTopK'] === 'number' ? c['recallTopK'] : 10,
64
+ recallMinSimilarity: resolveRecallMinSimilarity(c['recallMinSimilarity']),
53
65
  recallTimeout: typeof c['recallTimeout'] === 'number' ? c['recallTimeout'] : 5000,
66
+ ingest: resolveIngestPolicy(c['ingest']),
54
67
  send: resolveSendConfig(c),
55
68
  };
56
69
  }
@@ -126,7 +139,7 @@ function buildRecallQuery(event: { prompt?: string; messages?: unknown[] }): str
126
139
  return truncate(parts.join('\n'), 600);
127
140
  }
128
141
 
129
- function buildMemoryBlock(bundle: RecallBundle, budget: number): string {
142
+ function buildMemoryBlock(bundle: RecallBundle, budget: number, relatedBundle?: RecallBundle): string {
130
143
  const sections: Array<{ title: string; items: string[] }> = [
131
144
  { title: 'Behavioural rules', items: bundle.user_rules },
132
145
  { title: 'Preferences', items: bundle.user_preferences },
@@ -138,6 +151,19 @@ function buildMemoryBlock(bundle: RecallBundle, budget: number): string {
138
151
  { title: 'System facts', items: bundle.system_facts },
139
152
  { title: 'Domain knowledge', items: bundle.domain_knowledge },
140
153
  ];
154
+ if (relatedBundle) {
155
+ sections.push(
156
+ { title: 'Related behavioural rules', items: relatedBundle.user_rules },
157
+ { title: 'Related preferences', items: relatedBundle.user_preferences },
158
+ { title: 'Related task patterns', items: relatedBundle.task_patterns },
159
+ { title: 'Related workflows', items: relatedBundle.workflows },
160
+ { title: 'Related project', items: relatedBundle.project },
161
+ { title: 'Related constraints', items: relatedBundle.constraints },
162
+ { title: 'Related decisions', items: relatedBundle.decisions },
163
+ { title: 'Related system facts', items: relatedBundle.system_facts },
164
+ { title: 'Related domain knowledge', items: relatedBundle.domain_knowledge },
165
+ );
166
+ }
141
167
 
142
168
  const intro = 'Use the following as prior context and preferences. If they conflict with current instructions, follow the current instructions.';
143
169
  const lines: string[] = [intro];
@@ -283,6 +309,26 @@ function forgetKeys(target: Set<string>, keys: string[]): void {
283
309
  for (const key of keys) target.delete(key);
284
310
  }
285
311
 
312
+ function summarizeOmissions(omissions: OmissionSummary[]): string {
313
+ if (omissions.length === 0) return 'none';
314
+ const counts = new Map<string, number>();
315
+ for (const omission of omissions) {
316
+ counts.set(omission.label, (counts.get(omission.label) ?? 0) + 1);
317
+ }
318
+ return [...counts.entries()]
319
+ .map(([label, count]) => `${label}:${count}`)
320
+ .join(',');
321
+ }
322
+
323
+ function isTimeoutLikeError(err: unknown): boolean {
324
+ if (typeof err !== 'object' || err === null) return false;
325
+ const record = err as Record<string, unknown>;
326
+ const name = typeof record['name'] === 'string' ? record['name'] : '';
327
+ if (name === 'TimeoutError' || name === 'AbortError') return true;
328
+ const message = typeof record['message'] === 'string' ? record['message'].toLowerCase() : '';
329
+ return message.includes('timeout') || message.includes('aborted');
330
+ }
331
+
286
332
  const PERSISTIO_MEMORY_PATH_PREFIX = 'persistio://memory/';
287
333
 
288
334
  function createClient(config: PersistioConfig, recallTopK = config.recallTopK): PersistioClient {
@@ -383,8 +429,7 @@ function createMemorySearchManager(config: PersistioConfig): MemorySearchManager
383
429
  throw new Error(`Unsupported Persistio memory path: ${params.relPath}`);
384
430
  }
385
431
 
386
- const memories = await client.listMemories();
387
- const memory = memories.find((item) => item.id === memoryId);
432
+ const memory = await client.getMemory(memoryId, { includePending: true });
388
433
  if (!memory) {
389
434
  throw new Error(`Persistio memory not found: ${memoryId}`);
390
435
  }
@@ -466,8 +511,8 @@ export default definePluginEntry({
466
511
  api.on('before_prompt_build', async (event) => {
467
512
  try {
468
513
  const query = buildRecallQuery(event);
469
- const bundle = await client.recallBundle(query);
470
- const block = buildMemoryBlock(bundle, cfg.tokenBudget);
514
+ const recall = await client.recallBundle(query);
515
+ const block = buildMemoryBlock(recall.bundle, cfg.tokenBudget, recall.related_bundle);
471
516
  if (!block) return;
472
517
  return { appendSystemContext: block };
473
518
  } catch (err) {
@@ -484,8 +529,18 @@ export default definePluginEntry({
484
529
  try {
485
530
  const sessionId = context?.sessionId ?? event.runId ?? 'unknown-session';
486
531
  if (sessionId.startsWith('announce:')) return;
532
+ if (!shouldIngestSession(sessionId, cfg.ingest)) {
533
+ api.logger?.debug?.(`openclaw-persistio: ingest skipped non-main session: ${sessionId}`);
534
+ return;
535
+ }
487
536
  const chunks: Array<{ role: string; content: string; timestamp: string }> = [];
488
537
  const chunkKeys: string[] = [];
538
+ let agentCharsSent = 0;
539
+ let originalChars = 0;
540
+ let preparedChars = 0;
541
+ let truncatedMessages = 0;
542
+ let skippedMessages = 0;
543
+ const omissions: OmissionSummary[] = [];
489
544
  const now = Date.now();
490
545
  const sentKeys = getSessionKeyStore(sentMessageKeysBySession, sessionId, now);
491
546
  const pendingKeys = getSessionKeyStore(pendingMessageKeysBySession, sessionId, now);
@@ -501,17 +556,55 @@ export default definePluginEntry({
501
556
  if (sentKeys.has(key) || pendingKeys.has(key)) continue;
502
557
 
503
558
  const ts = resolveMessageTimestamp(m) ?? new Date().toISOString();
559
+ const prepared = prepareMessageForIngest({
560
+ role,
561
+ text,
562
+ policy: cfg.ingest,
563
+ remainingAgentChars: Math.max(0, cfg.ingest.agent.maxCharsPerTurn - agentCharsSent),
564
+ remainingChunks: Math.max(0, cfg.ingest.maxChunksPerTurn - chunks.length),
565
+ });
566
+
567
+ originalChars += prepared.originalChars;
568
+ preparedChars += prepared.preparedChars;
569
+ omissions.push(...prepared.omissions);
570
+ if (prepared.truncated) truncatedMessages += 1;
571
+ if (prepared.chunks.length === 0) {
572
+ skippedMessages += 1;
573
+ continue;
574
+ }
575
+
504
576
  chunkKeys.push(key);
505
- chunks.push({ role, content: text, timestamp: ts });
577
+ if (role === 'assistant') {
578
+ agentCharsSent += prepared.preparedChars;
579
+ }
580
+ chunks.push(...prepared.chunks.map((content) => ({ role, content, timestamp: ts })));
581
+
582
+ if (chunks.length >= cfg.ingest.maxChunksPerTurn) break;
506
583
  }
507
584
 
508
585
  if (chunks.length === 0) return;
586
+ if (truncatedMessages > 0 || omissions.length > 0 || skippedMessages > 0) {
587
+ api.logger?.info?.(
588
+ `openclaw-persistio: ingest planned session=${sessionId} chunks=${chunks.length} `
589
+ + `originalChars=${originalChars} preparedChars=${preparedChars} `
590
+ + `truncatedMessages=${truncatedMessages} skippedMessages=${skippedMessages} `
591
+ + `omissions=${summarizeOmissions(omissions)}`,
592
+ );
593
+ }
509
594
  rememberKeys(pendingKeys, chunkKeys);
510
595
  client.ingest(sessionId, chunks)
511
596
  .then(() => {
512
597
  rememberKeys(sentKeys, chunkKeys, MAX_SENT_KEYS_PER_SESSION);
513
598
  })
514
599
  .catch((err: unknown) => {
600
+ if (isTimeoutLikeError(err)) {
601
+ rememberKeys(sentKeys, chunkKeys, MAX_SENT_KEYS_PER_SESSION);
602
+ api.logger?.warn?.(
603
+ `openclaw-persistio: ingest timeout after ${cfg.ingest.timeoutMs}ms; `
604
+ + `outcome is ambiguous, suppressing retry for ${chunkKeys.length} messages in session=${sessionId}`,
605
+ );
606
+ return;
607
+ }
515
608
  api.logger?.warn?.(`openclaw-persistio: ingest error: ${String(err)}`);
516
609
  })
517
610
  .finally(() => {
@@ -0,0 +1,508 @@
1
+ export type OpenClawMessageRole = 'user' | 'assistant' | 'tool';
2
+
3
+ export interface PersistioIngestPolicy {
4
+ timeoutMs: number;
5
+ maxChunkChars: number;
6
+ maxChunksPerTurn: number;
7
+ skipSubagentSessions: boolean;
8
+ user: {
9
+ maxCharsPerMessage: number;
10
+ };
11
+ agent: {
12
+ mode: 'bounded' | 'raw';
13
+ maxCharsPerMessage: number;
14
+ maxCharsAfterFiltering: number;
15
+ maxCharsPerTurn: number;
16
+ largeBlockThresholdChars: number;
17
+ largeBlockThresholdLines: number;
18
+ maxTableRows: number;
19
+ };
20
+ }
21
+
22
+ export interface OmissionSummary {
23
+ label: string;
24
+ chars: number;
25
+ lines: number;
26
+ }
27
+
28
+ export interface PreparedIngestMessage {
29
+ chunks: string[];
30
+ originalChars: number;
31
+ preparedChars: number;
32
+ truncated: boolean;
33
+ omissions: OmissionSummary[];
34
+ }
35
+
36
+ export interface PrepareMessageInput {
37
+ role: OpenClawMessageRole;
38
+ text: string;
39
+ policy: PersistioIngestPolicy;
40
+ remainingAgentChars: number;
41
+ remainingChunks: number;
42
+ }
43
+
44
+ export const DEFAULT_INGEST_POLICY: PersistioIngestPolicy = {
45
+ timeoutMs: 30000,
46
+ maxChunkChars: 6000,
47
+ maxChunksPerTurn: 12,
48
+ skipSubagentSessions: true,
49
+ user: {
50
+ maxCharsPerMessage: 24000,
51
+ },
52
+ agent: {
53
+ mode: 'bounded',
54
+ maxCharsPerMessage: 24000,
55
+ maxCharsAfterFiltering: 9000,
56
+ maxCharsPerTurn: 24000,
57
+ largeBlockThresholdChars: 1200,
58
+ largeBlockThresholdLines: 80,
59
+ maxTableRows: 12,
60
+ },
61
+ };
62
+
63
+ function readNumber(value: unknown, fallback: number, min = 1): number {
64
+ return typeof value === 'number' && Number.isFinite(value) && value >= min
65
+ ? Math.floor(value)
66
+ : fallback;
67
+ }
68
+
69
+ function readBoolean(value: unknown, fallback: boolean): boolean {
70
+ return typeof value === 'boolean' ? value : fallback;
71
+ }
72
+
73
+ function readObject(value: unknown): Record<string, unknown> {
74
+ return typeof value === 'object' && value !== null
75
+ ? value as Record<string, unknown>
76
+ : {};
77
+ }
78
+
79
+ export function resolveIngestPolicy(raw: unknown): PersistioIngestPolicy {
80
+ const ingest = readObject(raw);
81
+ const user = readObject(ingest['user']);
82
+ const agent = readObject(ingest['agent']);
83
+ const mode = agent['mode'] === 'raw' ? 'raw' : DEFAULT_INGEST_POLICY.agent.mode;
84
+
85
+ return {
86
+ timeoutMs: readNumber(ingest['timeoutMs'], DEFAULT_INGEST_POLICY.timeoutMs),
87
+ maxChunkChars: readNumber(ingest['maxChunkChars'], DEFAULT_INGEST_POLICY.maxChunkChars, 256),
88
+ maxChunksPerTurn: readNumber(ingest['maxChunksPerTurn'], DEFAULT_INGEST_POLICY.maxChunksPerTurn),
89
+ skipSubagentSessions: readBoolean(ingest['skipSubagentSessions'], DEFAULT_INGEST_POLICY.skipSubagentSessions),
90
+ user: {
91
+ maxCharsPerMessage: readNumber(user['maxCharsPerMessage'], DEFAULT_INGEST_POLICY.user.maxCharsPerMessage),
92
+ },
93
+ agent: {
94
+ mode,
95
+ maxCharsPerMessage: readNumber(agent['maxCharsPerMessage'], DEFAULT_INGEST_POLICY.agent.maxCharsPerMessage),
96
+ maxCharsAfterFiltering: readNumber(agent['maxCharsAfterFiltering'], DEFAULT_INGEST_POLICY.agent.maxCharsAfterFiltering),
97
+ maxCharsPerTurn: readNumber(agent['maxCharsPerTurn'], DEFAULT_INGEST_POLICY.agent.maxCharsPerTurn),
98
+ largeBlockThresholdChars: readNumber(
99
+ agent['largeBlockThresholdChars'],
100
+ DEFAULT_INGEST_POLICY.agent.largeBlockThresholdChars,
101
+ ),
102
+ largeBlockThresholdLines: readNumber(
103
+ agent['largeBlockThresholdLines'],
104
+ DEFAULT_INGEST_POLICY.agent.largeBlockThresholdLines,
105
+ ),
106
+ maxTableRows: readNumber(agent['maxTableRows'], DEFAULT_INGEST_POLICY.agent.maxTableRows),
107
+ },
108
+ };
109
+ }
110
+
111
+ export function shouldIngestSession(sessionId: string, policy: PersistioIngestPolicy): boolean {
112
+ if (!policy.skipSubagentSessions) return true;
113
+ return !sessionId.startsWith('agent:') || sessionId.startsWith('agent:main:');
114
+ }
115
+
116
+ function countLines(text: string): number {
117
+ return text.length === 0 ? 0 : text.split('\n').length;
118
+ }
119
+
120
+ function marker(label: string, text: string, extra?: string): string {
121
+ const suffix = extra ? `, ${extra}` : '';
122
+ return `[${label} omitted: ${countLines(text)} lines, ${text.length} chars${suffix}]`;
123
+ }
124
+
125
+ function normalizeText(text: string): string {
126
+ return text
127
+ .replace(/\r\n?/g, '\n')
128
+ .replace(/[ \t]+\n/g, '\n')
129
+ .replace(/\n{4,}/g, '\n\n\n')
130
+ .trim();
131
+ }
132
+
133
+ function pushOmission(omissions: OmissionSummary[], label: string, text: string): void {
134
+ omissions.push({ label, chars: text.length, lines: countLines(text) });
135
+ }
136
+
137
+ function collapseLargeFencedBlocks(
138
+ text: string,
139
+ policy: PersistioIngestPolicy,
140
+ omissions: OmissionSummary[],
141
+ ): string {
142
+ return text.replace(/```([^\n`]*)\n([\s\S]*?)```/g, (block: string, language: string) => {
143
+ if (
144
+ block.length < policy.agent.largeBlockThresholdChars &&
145
+ countLines(block) < policy.agent.largeBlockThresholdLines
146
+ ) {
147
+ return block;
148
+ }
149
+ pushOmission(omissions, 'Code block', block);
150
+ const lang = language.trim();
151
+ return marker('Code block', block, lang ? `language=${lang}` : undefined);
152
+ });
153
+ }
154
+
155
+ function isBase64LikeLine(line: string): boolean {
156
+ const compact = line.trim();
157
+ if (compact.length < 500 || /\s/.test(compact)) return false;
158
+ if (!/^[A-Za-z0-9+/=_-]+$/.test(compact)) return false;
159
+ const alphaNumeric = compact.replace(/[^A-Za-z0-9]/g, '').length / compact.length;
160
+ return alphaNumeric > 0.85;
161
+ }
162
+
163
+ function collapseBase64Lines(text: string, omissions: OmissionSummary[]): string {
164
+ return text.split('\n').map((line) => {
165
+ if (!isBase64LikeLine(line)) return line;
166
+ pushOmission(omissions, 'Encoded blob', line);
167
+ return `[Encoded blob omitted: 1 line, ${line.length} chars]`;
168
+ }).join('\n');
169
+ }
170
+
171
+ function looksLikeDiffStart(line: string): boolean {
172
+ return /^diff --git\b/.test(line) || line === '*** Begin Patch';
173
+ }
174
+
175
+ function isDiffMetadataLine(line: string): boolean {
176
+ return /^(?:index|new file mode|deleted file mode|old mode|new mode|similarity index|dissimilarity index|rename from|rename to|copy from|copy to)\b/.test(line)
177
+ || /^(?:---|\+\+\+) /.test(line)
178
+ || /^Binary files .+ differ$/.test(line)
179
+ || /^\*\*\* (?:Add|Update|Delete) File: /.test(line)
180
+ || /^\*\*\* End of File$/.test(line);
181
+ }
182
+
183
+ function isDiffBodyLine(line: string): boolean {
184
+ return /^@@/.test(line)
185
+ || /^[ +\\-]/.test(line);
186
+ }
187
+
188
+ function collapseDiffBlocks(
189
+ text: string,
190
+ policy: PersistioIngestPolicy,
191
+ omissions: OmissionSummary[],
192
+ ): string {
193
+ const lines = text.split('\n');
194
+ const result: string[] = [];
195
+
196
+ for (let i = 0; i < lines.length; i += 1) {
197
+ const line = lines[i]!;
198
+ if (!looksLikeDiffStart(line)) {
199
+ result.push(line);
200
+ continue;
201
+ }
202
+
203
+ const block: string[] = [line];
204
+ i += 1;
205
+ for (; i < lines.length; i += 1) {
206
+ const next = lines[i]!;
207
+ if (looksLikeDiffStart(next)) {
208
+ i -= 1;
209
+ break;
210
+ }
211
+ if (next === '*** End Patch') {
212
+ block.push(next);
213
+ break;
214
+ }
215
+ if (next.trim() === '') {
216
+ i -= 1;
217
+ break;
218
+ }
219
+ if (!isDiffMetadataLine(next) && !isDiffBodyLine(next)) {
220
+ i -= 1;
221
+ break;
222
+ }
223
+ block.push(next);
224
+ }
225
+
226
+ const blockText = block.join('\n');
227
+ if (
228
+ blockText.length < policy.agent.largeBlockThresholdChars &&
229
+ block.length < policy.agent.largeBlockThresholdLines
230
+ ) {
231
+ result.push(blockText);
232
+ continue;
233
+ }
234
+
235
+ pushOmission(omissions, 'Diff', blockText);
236
+ result.push(marker('Diff', blockText));
237
+ }
238
+
239
+ return result.join('\n');
240
+ }
241
+
242
+ function isLogLikeLine(line: string): boolean {
243
+ return /^\d{4}-\d{2}-\d{2}[T\s]\d{2}:\d{2}:\d{2}/.test(line)
244
+ || /^\s*(ERROR|WARN|INFO|DEBUG|TRACE)\b/.test(line)
245
+ || /^\s*at\s+.+\(.+:\d+:\d+\)/.test(line)
246
+ || /^\s*at\s+.+:\d+:\d+/.test(line)
247
+ || /^Traceback \(most recent call last\):/.test(line)
248
+ || /^[A-Za-z]*Error: .+/.test(line);
249
+ }
250
+
251
+ function isShellOutputLine(line: string): boolean {
252
+ return /^\s*(PASS|FAIL|RUNS|Test Files|Tests|Duration|stderr|stdout)\b/.test(line)
253
+ || /^>\s+[\w@/.-]+/.test(line)
254
+ || /^\$\s+\S+/.test(line)
255
+ || /^npm (ERR!|WARN|notice)\b/.test(line);
256
+ }
257
+
258
+ function collapseLineRuns(
259
+ text: string,
260
+ label: 'Log output' | 'Command output',
261
+ predicate: (line: string) => boolean,
262
+ policy: PersistioIngestPolicy,
263
+ omissions: OmissionSummary[],
264
+ ): string {
265
+ const lines = text.split('\n');
266
+ const result: string[] = [];
267
+
268
+ for (let i = 0; i < lines.length; i += 1) {
269
+ const line = lines[i]!;
270
+ if (!predicate(line)) {
271
+ result.push(line);
272
+ continue;
273
+ }
274
+
275
+ const block: string[] = [line];
276
+ i += 1;
277
+ for (; i < lines.length; i += 1) {
278
+ const next = lines[i]!;
279
+ if (!predicate(next)) {
280
+ i -= 1;
281
+ break;
282
+ }
283
+ block.push(next);
284
+ }
285
+
286
+ const blockText = block.join('\n');
287
+ if (
288
+ blockText.length < policy.agent.largeBlockThresholdChars &&
289
+ block.length < policy.agent.largeBlockThresholdLines
290
+ ) {
291
+ result.push(blockText);
292
+ continue;
293
+ }
294
+
295
+ pushOmission(omissions, label, blockText);
296
+ const firstUsefulLine = block.find((candidate) => candidate.trim().length > 0)?.trim();
297
+ result.push(marker(label, blockText, firstUsefulLine ? `first="${firstUsefulLine.slice(0, 120)}"` : undefined));
298
+ }
299
+
300
+ return result.join('\n');
301
+ }
302
+
303
+ function isMarkdownTableLine(line: string): boolean {
304
+ const trimmed = line.trim();
305
+ return trimmed.startsWith('|') && trimmed.endsWith('|') && trimmed.split('|').length >= 4;
306
+ }
307
+
308
+ function isMarkdownTableSeparator(line: string): boolean {
309
+ return /^\s*\|?(?:\s*:?-{3,}:?\s*\|)+\s*:?-{3,}:?\s*\|?\s*$/.test(line);
310
+ }
311
+
312
+ function truncateMarkdownTables(
313
+ text: string,
314
+ policy: PersistioIngestPolicy,
315
+ omissions: OmissionSummary[],
316
+ ): string {
317
+ const lines = text.split('\n');
318
+ const result: string[] = [];
319
+
320
+ for (let i = 0; i < lines.length; i += 1) {
321
+ if (!isMarkdownTableLine(lines[i]!) || !lines[i + 1] || !isMarkdownTableSeparator(lines[i + 1]!)) {
322
+ result.push(lines[i]!);
323
+ continue;
324
+ }
325
+
326
+ const table: string[] = [lines[i]!, lines[i + 1]!];
327
+ i += 2;
328
+ for (; i < lines.length && isMarkdownTableLine(lines[i]!); i += 1) {
329
+ table.push(lines[i]!);
330
+ }
331
+ i -= 1;
332
+
333
+ if (table.length <= policy.agent.maxTableRows + 2) {
334
+ result.push(...table);
335
+ continue;
336
+ }
337
+
338
+ const omitted = table.slice(policy.agent.maxTableRows + 2).join('\n');
339
+ pushOmission(omissions, 'Table rows', omitted);
340
+ result.push(...table.slice(0, policy.agent.maxTableRows + 2));
341
+ result.push(`[Table truncated: ${table.length - policy.agent.maxTableRows - 2} more rows]`);
342
+ }
343
+
344
+ return result.join('\n');
345
+ }
346
+
347
+ function maybeCollapseWholeBlob(text: string, omissions: OmissionSummary[]): string {
348
+ const trimmed = text.trim();
349
+ if (trimmed.length < 2000) return text;
350
+
351
+ try {
352
+ const parsed = JSON.parse(trimmed) as unknown;
353
+ pushOmission(omissions, 'JSON blob', text);
354
+ if (typeof parsed === 'object' && parsed !== null && !Array.isArray(parsed)) {
355
+ const keys = Object.keys(parsed as Record<string, unknown>).slice(0, 12).join(',');
356
+ return `[JSON blob omitted: ${countLines(text)} lines, ${text.length} chars${keys ? `, keys=${keys}` : ''}]`;
357
+ }
358
+ return marker('JSON blob', text);
359
+ } catch {
360
+ // Continue with XML-ish shape detection below.
361
+ }
362
+
363
+ const angleRatio = (trimmed.match(/[<>/]/g)?.length ?? 0) / trimmed.length;
364
+ const lineCount = countLines(trimmed);
365
+ if (
366
+ lineCount >= 20 &&
367
+ angleRatio > 0.08 &&
368
+ /^<\??[A-Za-z!]/.test(trimmed) &&
369
+ /<\/[A-Za-z][^>]*>/.test(trimmed)
370
+ ) {
371
+ pushOmission(omissions, 'XML blob', text);
372
+ return marker('XML blob', text);
373
+ }
374
+
375
+ return text;
376
+ }
377
+
378
+ function fitToBudget(text: string, budget: number): { text: string; truncated: boolean } {
379
+ if (text.length <= budget) {
380
+ return { text, truncated: false };
381
+ }
382
+
383
+ const markerText = `\n\n[Content truncated: original ${text.length} chars, kept ${budget} chars]\n\n`;
384
+ const available = Math.max(0, budget - markerText.length);
385
+ const headLength = Math.ceil(available * 0.6);
386
+ const tailLength = Math.max(0, available - headLength);
387
+ return {
388
+ text: `${text.slice(0, headLength).trimEnd()}${markerText}${text.slice(text.length - tailLength).trimStart()}`.trim(),
389
+ truncated: true,
390
+ };
391
+ }
392
+
393
+ export function filterAssistantContent(
394
+ text: string,
395
+ policy: PersistioIngestPolicy,
396
+ ): { text: string; omissions: OmissionSummary[]; truncated: boolean } {
397
+ const omissions: OmissionSummary[] = [];
398
+ let filtered = normalizeText(text);
399
+
400
+ if (policy.agent.mode === 'bounded') {
401
+ filtered = collapseLargeFencedBlocks(filtered, policy, omissions);
402
+ filtered = collapseDiffBlocks(filtered, policy, omissions);
403
+ filtered = collapseLineRuns(filtered, 'Log output', isLogLikeLine, policy, omissions);
404
+ filtered = collapseLineRuns(filtered, 'Command output', isShellOutputLine, policy, omissions);
405
+ filtered = truncateMarkdownTables(filtered, policy, omissions);
406
+ filtered = collapseBase64Lines(filtered, omissions);
407
+ filtered = maybeCollapseWholeBlob(filtered, omissions);
408
+ }
409
+
410
+ const budgeted = fitToBudget(filtered, policy.agent.maxCharsAfterFiltering);
411
+ return {
412
+ text: budgeted.text,
413
+ omissions,
414
+ truncated: budgeted.truncated,
415
+ };
416
+ }
417
+
418
+ export function chunkText(text: string, maxChunkChars: number): string[] {
419
+ const normalized = normalizeText(text);
420
+ if (!normalized) return [];
421
+
422
+ const chunks: string[] = [];
423
+ let current = '';
424
+
425
+ const flush = () => {
426
+ if (!current.trim()) return;
427
+ chunks.push(current.trim());
428
+ current = '';
429
+ };
430
+
431
+ const appendUnit = (unit: string) => {
432
+ const separator = current ? '\n\n' : '';
433
+ if (current.length + separator.length + unit.length <= maxChunkChars) {
434
+ current = `${current}${separator}${unit}`;
435
+ return;
436
+ }
437
+ flush();
438
+ if (unit.length <= maxChunkChars) {
439
+ current = unit;
440
+ return;
441
+ }
442
+ for (let start = 0; start < unit.length; start += maxChunkChars) {
443
+ chunks.push(unit.slice(start, start + maxChunkChars).trim());
444
+ }
445
+ };
446
+
447
+ for (const paragraph of normalized.split(/\n{2,}/)) {
448
+ if (paragraph.length <= maxChunkChars) {
449
+ appendUnit(paragraph);
450
+ continue;
451
+ }
452
+
453
+ for (const line of paragraph.split('\n')) {
454
+ appendUnit(line);
455
+ }
456
+ }
457
+
458
+ flush();
459
+ return chunks.filter((chunk) => chunk.length > 0);
460
+ }
461
+
462
+ export function prepareMessageForIngest(input: PrepareMessageInput): PreparedIngestMessage {
463
+ const original = normalizeText(input.text);
464
+ const omissions: OmissionSummary[] = [];
465
+ let prepared = original;
466
+ let truncated = false;
467
+
468
+ if (input.role === 'assistant') {
469
+ const messageBudget = input.remainingAgentChars;
470
+ if (messageBudget <= 0 || input.remainingChunks <= 0) {
471
+ return {
472
+ chunks: [],
473
+ originalChars: original.length,
474
+ preparedChars: 0,
475
+ truncated: true,
476
+ omissions: [],
477
+ };
478
+ }
479
+
480
+ const preBudgeted = fitToBudget(prepared, input.policy.agent.maxCharsPerMessage);
481
+ prepared = preBudgeted.text;
482
+ truncated = preBudgeted.truncated;
483
+ const filtered = filterAssistantContent(prepared, input.policy);
484
+ prepared = filtered.text;
485
+ omissions.push(...filtered.omissions);
486
+ truncated = truncated || filtered.truncated || filtered.omissions.length > 0;
487
+ const budgeted = fitToBudget(prepared, messageBudget);
488
+ prepared = budgeted.text;
489
+ truncated = truncated || budgeted.truncated;
490
+ } else if (input.role === 'user') {
491
+ const budgeted = fitToBudget(prepared, input.policy.user.maxCharsPerMessage);
492
+ prepared = budgeted.text;
493
+ truncated = budgeted.truncated;
494
+ }
495
+
496
+ const chunks = chunkText(prepared, input.policy.maxChunkChars).slice(0, input.remainingChunks);
497
+ if (chunks.join('\n\n').length < prepared.length) {
498
+ truncated = true;
499
+ }
500
+
501
+ return {
502
+ chunks,
503
+ originalChars: original.length,
504
+ preparedChars: chunks.reduce((sum, chunk) => sum + chunk.length, 0),
505
+ truncated,
506
+ omissions,
507
+ };
508
+ }