@persistio/openclaw-plugin 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -0
- package/dist/client.d.ts +3 -0
- package/dist/client.js +16 -1
- package/dist/index.js +67 -1
- package/dist/ingest-policy.d.ts +48 -0
- package/dist/ingest-policy.js +380 -0
- package/openclaw.plugin.json +59 -1
- package/package.json +2 -2
- package/src/client.ts +20 -1
- package/src/index.ts +77 -3
- package/src/ingest-policy.ts +508 -0
package/README.md
CHANGED
|
@@ -51,12 +51,23 @@ Then register it in your OpenClaw config:
|
|
|
51
51
|
| `recallTopK` | number | | `10` | Number of memories to retrieve per recall |
|
|
52
52
|
| `recallMinSimilarity` | number from `0` to `1` | | Persistio server default | Optional semantic recall quality floor |
|
|
53
53
|
| `recallTimeout` | number | | `5000` | HTTP timeout for recall requests (ms) |
|
|
54
|
+
| `ingest.timeoutMs` | number | | `30000` | HTTP timeout for ingest requests (ms). Timed-out requests are treated as ambiguous and not retried automatically |
|
|
55
|
+
| `ingest.maxChunkChars` | number | | `6000` | Maximum characters per chunk sent to Persistio |
|
|
56
|
+
| `ingest.maxChunksPerTurn` | number | | `12` | Maximum chunks sent from a single OpenClaw turn |
|
|
57
|
+
| `ingest.skipSubagentSessions` | boolean | | `true` | Skip `agent:*` sessions unless they are `agent:main:*` |
|
|
58
|
+
| `ingest.user.maxCharsPerMessage` | number | | `24000` | Maximum user-message characters considered for ingest before chunking |
|
|
59
|
+
| `ingest.agent.mode` | `"bounded"` or `"raw"` | | `"bounded"` | Assistant ingest shaping mode. `bounded` collapses obvious large noisy blocks before chunking |
|
|
60
|
+
| `ingest.agent.maxCharsPerMessage` | number | | `24000` | Maximum assistant-message characters considered after filtering |
|
|
61
|
+
| `ingest.agent.maxCharsAfterFiltering` | number | | `9000` | Maximum assistant-message characters retained after deterministic filtering |
|
|
62
|
+
| `ingest.agent.maxCharsPerTurn` | number | | `24000` | Maximum assistant-message characters sent from one turn |
|
|
54
63
|
| `send.roles.user` | `"enabled"` or `"disabled"` | | `"enabled"` | Send user messages to Persistio ingest |
|
|
55
64
|
| `send.roles.agent` | `"enabled"` or `"disabled"` | | `"enabled"` | Send agent/assistant messages to Persistio ingest |
|
|
56
65
|
| `send.roles.tool` | `"enabled"` or `"disabled"` | | `"disabled"` | Send tool messages to Persistio ingest |
|
|
57
66
|
|
|
58
67
|
`agent_end` receives a snapshot of the active OpenClaw transcript, so the plugin deduplicates per session and only sends each user, agent, or enabled tool message once per plugin process. Deduplication keys are bounded in memory and expire after 24 hours of session inactivity.
|
|
59
68
|
|
|
69
|
+
Assistant ingest is bounded before any network call. By default the plugin skips non-main `agent:*` sessions, collapses oversized code/log/diff/blob/table-shaped assistant content into omission markers, caps assistant ingest per message and per turn, then chunks all ingest content below `ingest.maxChunkChars`. Persistio still performs server-side extraction and curation; the plugin only enforces a deterministic transport-safe shape.
|
|
70
|
+
|
|
60
71
|
## Tools exposed
|
|
61
72
|
|
|
62
73
|
| Tool | Description |
|
package/dist/client.d.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import type { PersistioIngestPolicy } from './ingest-policy.js';
|
|
1
2
|
export interface PersistioConfig {
|
|
2
3
|
baseURL: string;
|
|
3
4
|
apiKey: string;
|
|
@@ -5,6 +6,7 @@ export interface PersistioConfig {
|
|
|
5
6
|
recallTopK: number;
|
|
6
7
|
recallMinSimilarity?: number;
|
|
7
8
|
recallTimeout: number;
|
|
9
|
+
ingest: PersistioIngestPolicy;
|
|
8
10
|
send: PersistioSendConfig;
|
|
9
11
|
}
|
|
10
12
|
export type PersistioSendRoleStatus = 'enabled' | 'disabled';
|
|
@@ -48,6 +50,7 @@ export declare class PersistioClient {
|
|
|
48
50
|
private readonly recallTopK;
|
|
49
51
|
private readonly recallMinSimilarity?;
|
|
50
52
|
private readonly recallTimeout;
|
|
53
|
+
private readonly ingestTimeout;
|
|
51
54
|
constructor(config: PersistioConfig);
|
|
52
55
|
private headers;
|
|
53
56
|
recall(query: string): Promise<PersistioMemory[]>;
|
package/dist/client.js
CHANGED
|
@@ -4,12 +4,14 @@ export class PersistioClient {
|
|
|
4
4
|
recallTopK;
|
|
5
5
|
recallMinSimilarity;
|
|
6
6
|
recallTimeout;
|
|
7
|
+
ingestTimeout;
|
|
7
8
|
constructor(config) {
|
|
8
9
|
this.baseURL = config.baseURL.replace(/\/$/, '');
|
|
9
10
|
this.apiKey = config.apiKey;
|
|
10
11
|
this.recallTopK = config.recallTopK;
|
|
11
12
|
this.recallMinSimilarity = config.recallMinSimilarity;
|
|
12
13
|
this.recallTimeout = config.recallTimeout;
|
|
14
|
+
this.ingestTimeout = config.ingest.timeoutMs;
|
|
13
15
|
}
|
|
14
16
|
headers() {
|
|
15
17
|
return {
|
|
@@ -56,9 +58,10 @@ export class PersistioClient {
|
|
|
56
58
|
method: 'POST',
|
|
57
59
|
headers: this.headers(),
|
|
58
60
|
body: JSON.stringify({ session_id: sessionId, chunks }),
|
|
61
|
+
signal: AbortSignal.timeout(this.ingestTimeout),
|
|
59
62
|
});
|
|
60
63
|
if (!res.ok)
|
|
61
|
-
throw new Error(
|
|
64
|
+
throw new Error(await formatHttpError('ingest', res));
|
|
62
65
|
}
|
|
63
66
|
async addMemory(data, subject) {
|
|
64
67
|
const res = await fetch(`${this.baseURL}/v1/memories`, {
|
|
@@ -98,3 +101,15 @@ export class PersistioClient {
|
|
|
98
101
|
return data.items ?? [];
|
|
99
102
|
}
|
|
100
103
|
}
|
|
104
|
+
async function formatHttpError(operation, res) {
|
|
105
|
+
let detail = '';
|
|
106
|
+
try {
|
|
107
|
+
detail = (await res.text()).trim().slice(0, 500);
|
|
108
|
+
}
|
|
109
|
+
catch {
|
|
110
|
+
// Ignore response body read failures; the status is still actionable.
|
|
111
|
+
}
|
|
112
|
+
return detail
|
|
113
|
+
? `Persistio ${operation} failed: ${res.status} ${detail}`
|
|
114
|
+
: `Persistio ${operation} failed: ${res.status}`;
|
|
115
|
+
}
|
package/dist/index.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { definePluginEntry } from 'openclaw/plugin-sdk/plugin-entry';
|
|
2
2
|
import { Type } from '@sinclair/typebox';
|
|
3
3
|
import { PersistioClient } from './client.js';
|
|
4
|
+
import { prepareMessageForIngest, resolveIngestPolicy, shouldIngestSession, } from './ingest-policy.js';
|
|
4
5
|
const DEFAULT_SEND_ROLES = {
|
|
5
6
|
user: 'enabled',
|
|
6
7
|
agent: 'enabled',
|
|
@@ -39,6 +40,7 @@ function resolveConfig(raw) {
|
|
|
39
40
|
recallTopK: typeof c['recallTopK'] === 'number' ? c['recallTopK'] : 10,
|
|
40
41
|
recallMinSimilarity: resolveRecallMinSimilarity(c['recallMinSimilarity']),
|
|
41
42
|
recallTimeout: typeof c['recallTimeout'] === 'number' ? c['recallTimeout'] : 5000,
|
|
43
|
+
ingest: resolveIngestPolicy(c['ingest']),
|
|
42
44
|
send: resolveSendConfig(c),
|
|
43
45
|
};
|
|
44
46
|
}
|
|
@@ -253,6 +255,27 @@ function forgetKeys(target, keys) {
|
|
|
253
255
|
for (const key of keys)
|
|
254
256
|
target.delete(key);
|
|
255
257
|
}
|
|
258
|
+
function summarizeOmissions(omissions) {
|
|
259
|
+
if (omissions.length === 0)
|
|
260
|
+
return 'none';
|
|
261
|
+
const counts = new Map();
|
|
262
|
+
for (const omission of omissions) {
|
|
263
|
+
counts.set(omission.label, (counts.get(omission.label) ?? 0) + 1);
|
|
264
|
+
}
|
|
265
|
+
return [...counts.entries()]
|
|
266
|
+
.map(([label, count]) => `${label}:${count}`)
|
|
267
|
+
.join(',');
|
|
268
|
+
}
|
|
269
|
+
function isTimeoutLikeError(err) {
|
|
270
|
+
if (typeof err !== 'object' || err === null)
|
|
271
|
+
return false;
|
|
272
|
+
const record = err;
|
|
273
|
+
const name = typeof record['name'] === 'string' ? record['name'] : '';
|
|
274
|
+
if (name === 'TimeoutError' || name === 'AbortError')
|
|
275
|
+
return true;
|
|
276
|
+
const message = typeof record['message'] === 'string' ? record['message'].toLowerCase() : '';
|
|
277
|
+
return message.includes('timeout') || message.includes('aborted');
|
|
278
|
+
}
|
|
256
279
|
const PERSISTIO_MEMORY_PATH_PREFIX = 'persistio://memory/';
|
|
257
280
|
function createClient(config, recallTopK = config.recallTopK) {
|
|
258
281
|
return new PersistioClient({ ...config, recallTopK });
|
|
@@ -417,8 +440,18 @@ export default definePluginEntry({
|
|
|
417
440
|
const sessionId = context?.sessionId ?? event.runId ?? 'unknown-session';
|
|
418
441
|
if (sessionId.startsWith('announce:'))
|
|
419
442
|
return;
|
|
443
|
+
if (!shouldIngestSession(sessionId, cfg.ingest)) {
|
|
444
|
+
api.logger?.debug?.(`openclaw-persistio: ingest skipped non-main session: ${sessionId}`);
|
|
445
|
+
return;
|
|
446
|
+
}
|
|
420
447
|
const chunks = [];
|
|
421
448
|
const chunkKeys = [];
|
|
449
|
+
let agentCharsSent = 0;
|
|
450
|
+
let originalChars = 0;
|
|
451
|
+
let preparedChars = 0;
|
|
452
|
+
let truncatedMessages = 0;
|
|
453
|
+
let skippedMessages = 0;
|
|
454
|
+
const omissions = [];
|
|
422
455
|
const now = Date.now();
|
|
423
456
|
const sentKeys = getSessionKeyStore(sentMessageKeysBySession, sessionId, now);
|
|
424
457
|
const pendingKeys = getSessionKeyStore(pendingMessageKeysBySession, sessionId, now);
|
|
@@ -434,17 +467,50 @@ export default definePluginEntry({
|
|
|
434
467
|
if (sentKeys.has(key) || pendingKeys.has(key))
|
|
435
468
|
continue;
|
|
436
469
|
const ts = resolveMessageTimestamp(m) ?? new Date().toISOString();
|
|
470
|
+
const prepared = prepareMessageForIngest({
|
|
471
|
+
role,
|
|
472
|
+
text,
|
|
473
|
+
policy: cfg.ingest,
|
|
474
|
+
remainingAgentChars: Math.max(0, cfg.ingest.agent.maxCharsPerTurn - agentCharsSent),
|
|
475
|
+
remainingChunks: Math.max(0, cfg.ingest.maxChunksPerTurn - chunks.length),
|
|
476
|
+
});
|
|
477
|
+
originalChars += prepared.originalChars;
|
|
478
|
+
preparedChars += prepared.preparedChars;
|
|
479
|
+
omissions.push(...prepared.omissions);
|
|
480
|
+
if (prepared.truncated)
|
|
481
|
+
truncatedMessages += 1;
|
|
482
|
+
if (prepared.chunks.length === 0) {
|
|
483
|
+
skippedMessages += 1;
|
|
484
|
+
continue;
|
|
485
|
+
}
|
|
437
486
|
chunkKeys.push(key);
|
|
438
|
-
|
|
487
|
+
if (role === 'assistant') {
|
|
488
|
+
agentCharsSent += prepared.preparedChars;
|
|
489
|
+
}
|
|
490
|
+
chunks.push(...prepared.chunks.map((content) => ({ role, content, timestamp: ts })));
|
|
491
|
+
if (chunks.length >= cfg.ingest.maxChunksPerTurn)
|
|
492
|
+
break;
|
|
439
493
|
}
|
|
440
494
|
if (chunks.length === 0)
|
|
441
495
|
return;
|
|
496
|
+
if (truncatedMessages > 0 || omissions.length > 0 || skippedMessages > 0) {
|
|
497
|
+
api.logger?.info?.(`openclaw-persistio: ingest planned session=${sessionId} chunks=${chunks.length} `
|
|
498
|
+
+ `originalChars=${originalChars} preparedChars=${preparedChars} `
|
|
499
|
+
+ `truncatedMessages=${truncatedMessages} skippedMessages=${skippedMessages} `
|
|
500
|
+
+ `omissions=${summarizeOmissions(omissions)}`);
|
|
501
|
+
}
|
|
442
502
|
rememberKeys(pendingKeys, chunkKeys);
|
|
443
503
|
client.ingest(sessionId, chunks)
|
|
444
504
|
.then(() => {
|
|
445
505
|
rememberKeys(sentKeys, chunkKeys, MAX_SENT_KEYS_PER_SESSION);
|
|
446
506
|
})
|
|
447
507
|
.catch((err) => {
|
|
508
|
+
if (isTimeoutLikeError(err)) {
|
|
509
|
+
rememberKeys(sentKeys, chunkKeys, MAX_SENT_KEYS_PER_SESSION);
|
|
510
|
+
api.logger?.warn?.(`openclaw-persistio: ingest timeout after ${cfg.ingest.timeoutMs}ms; `
|
|
511
|
+
+ `outcome is ambiguous, suppressing retry for ${chunkKeys.length} messages in session=${sessionId}`);
|
|
512
|
+
return;
|
|
513
|
+
}
|
|
448
514
|
api.logger?.warn?.(`openclaw-persistio: ingest error: ${String(err)}`);
|
|
449
515
|
})
|
|
450
516
|
.finally(() => {
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
export type OpenClawMessageRole = 'user' | 'assistant' | 'tool';
|
|
2
|
+
export interface PersistioIngestPolicy {
|
|
3
|
+
timeoutMs: number;
|
|
4
|
+
maxChunkChars: number;
|
|
5
|
+
maxChunksPerTurn: number;
|
|
6
|
+
skipSubagentSessions: boolean;
|
|
7
|
+
user: {
|
|
8
|
+
maxCharsPerMessage: number;
|
|
9
|
+
};
|
|
10
|
+
agent: {
|
|
11
|
+
mode: 'bounded' | 'raw';
|
|
12
|
+
maxCharsPerMessage: number;
|
|
13
|
+
maxCharsAfterFiltering: number;
|
|
14
|
+
maxCharsPerTurn: number;
|
|
15
|
+
largeBlockThresholdChars: number;
|
|
16
|
+
largeBlockThresholdLines: number;
|
|
17
|
+
maxTableRows: number;
|
|
18
|
+
};
|
|
19
|
+
}
|
|
20
|
+
export interface OmissionSummary {
|
|
21
|
+
label: string;
|
|
22
|
+
chars: number;
|
|
23
|
+
lines: number;
|
|
24
|
+
}
|
|
25
|
+
export interface PreparedIngestMessage {
|
|
26
|
+
chunks: string[];
|
|
27
|
+
originalChars: number;
|
|
28
|
+
preparedChars: number;
|
|
29
|
+
truncated: boolean;
|
|
30
|
+
omissions: OmissionSummary[];
|
|
31
|
+
}
|
|
32
|
+
export interface PrepareMessageInput {
|
|
33
|
+
role: OpenClawMessageRole;
|
|
34
|
+
text: string;
|
|
35
|
+
policy: PersistioIngestPolicy;
|
|
36
|
+
remainingAgentChars: number;
|
|
37
|
+
remainingChunks: number;
|
|
38
|
+
}
|
|
39
|
+
export declare const DEFAULT_INGEST_POLICY: PersistioIngestPolicy;
|
|
40
|
+
export declare function resolveIngestPolicy(raw: unknown): PersistioIngestPolicy;
|
|
41
|
+
export declare function shouldIngestSession(sessionId: string, policy: PersistioIngestPolicy): boolean;
|
|
42
|
+
export declare function filterAssistantContent(text: string, policy: PersistioIngestPolicy): {
|
|
43
|
+
text: string;
|
|
44
|
+
omissions: OmissionSummary[];
|
|
45
|
+
truncated: boolean;
|
|
46
|
+
};
|
|
47
|
+
export declare function chunkText(text: string, maxChunkChars: number): string[];
|
|
48
|
+
export declare function prepareMessageForIngest(input: PrepareMessageInput): PreparedIngestMessage;
|
|
@@ -0,0 +1,380 @@
|
|
|
1
|
+
export const DEFAULT_INGEST_POLICY = {
|
|
2
|
+
timeoutMs: 30000,
|
|
3
|
+
maxChunkChars: 6000,
|
|
4
|
+
maxChunksPerTurn: 12,
|
|
5
|
+
skipSubagentSessions: true,
|
|
6
|
+
user: {
|
|
7
|
+
maxCharsPerMessage: 24000,
|
|
8
|
+
},
|
|
9
|
+
agent: {
|
|
10
|
+
mode: 'bounded',
|
|
11
|
+
maxCharsPerMessage: 24000,
|
|
12
|
+
maxCharsAfterFiltering: 9000,
|
|
13
|
+
maxCharsPerTurn: 24000,
|
|
14
|
+
largeBlockThresholdChars: 1200,
|
|
15
|
+
largeBlockThresholdLines: 80,
|
|
16
|
+
maxTableRows: 12,
|
|
17
|
+
},
|
|
18
|
+
};
|
|
19
|
+
function readNumber(value, fallback, min = 1) {
|
|
20
|
+
return typeof value === 'number' && Number.isFinite(value) && value >= min
|
|
21
|
+
? Math.floor(value)
|
|
22
|
+
: fallback;
|
|
23
|
+
}
|
|
24
|
+
function readBoolean(value, fallback) {
|
|
25
|
+
return typeof value === 'boolean' ? value : fallback;
|
|
26
|
+
}
|
|
27
|
+
function readObject(value) {
|
|
28
|
+
return typeof value === 'object' && value !== null
|
|
29
|
+
? value
|
|
30
|
+
: {};
|
|
31
|
+
}
|
|
32
|
+
export function resolveIngestPolicy(raw) {
|
|
33
|
+
const ingest = readObject(raw);
|
|
34
|
+
const user = readObject(ingest['user']);
|
|
35
|
+
const agent = readObject(ingest['agent']);
|
|
36
|
+
const mode = agent['mode'] === 'raw' ? 'raw' : DEFAULT_INGEST_POLICY.agent.mode;
|
|
37
|
+
return {
|
|
38
|
+
timeoutMs: readNumber(ingest['timeoutMs'], DEFAULT_INGEST_POLICY.timeoutMs),
|
|
39
|
+
maxChunkChars: readNumber(ingest['maxChunkChars'], DEFAULT_INGEST_POLICY.maxChunkChars, 256),
|
|
40
|
+
maxChunksPerTurn: readNumber(ingest['maxChunksPerTurn'], DEFAULT_INGEST_POLICY.maxChunksPerTurn),
|
|
41
|
+
skipSubagentSessions: readBoolean(ingest['skipSubagentSessions'], DEFAULT_INGEST_POLICY.skipSubagentSessions),
|
|
42
|
+
user: {
|
|
43
|
+
maxCharsPerMessage: readNumber(user['maxCharsPerMessage'], DEFAULT_INGEST_POLICY.user.maxCharsPerMessage),
|
|
44
|
+
},
|
|
45
|
+
agent: {
|
|
46
|
+
mode,
|
|
47
|
+
maxCharsPerMessage: readNumber(agent['maxCharsPerMessage'], DEFAULT_INGEST_POLICY.agent.maxCharsPerMessage),
|
|
48
|
+
maxCharsAfterFiltering: readNumber(agent['maxCharsAfterFiltering'], DEFAULT_INGEST_POLICY.agent.maxCharsAfterFiltering),
|
|
49
|
+
maxCharsPerTurn: readNumber(agent['maxCharsPerTurn'], DEFAULT_INGEST_POLICY.agent.maxCharsPerTurn),
|
|
50
|
+
largeBlockThresholdChars: readNumber(agent['largeBlockThresholdChars'], DEFAULT_INGEST_POLICY.agent.largeBlockThresholdChars),
|
|
51
|
+
largeBlockThresholdLines: readNumber(agent['largeBlockThresholdLines'], DEFAULT_INGEST_POLICY.agent.largeBlockThresholdLines),
|
|
52
|
+
maxTableRows: readNumber(agent['maxTableRows'], DEFAULT_INGEST_POLICY.agent.maxTableRows),
|
|
53
|
+
},
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
export function shouldIngestSession(sessionId, policy) {
|
|
57
|
+
if (!policy.skipSubagentSessions)
|
|
58
|
+
return true;
|
|
59
|
+
return !sessionId.startsWith('agent:') || sessionId.startsWith('agent:main:');
|
|
60
|
+
}
|
|
61
|
+
function countLines(text) {
|
|
62
|
+
return text.length === 0 ? 0 : text.split('\n').length;
|
|
63
|
+
}
|
|
64
|
+
function marker(label, text, extra) {
|
|
65
|
+
const suffix = extra ? `, ${extra}` : '';
|
|
66
|
+
return `[${label} omitted: ${countLines(text)} lines, ${text.length} chars${suffix}]`;
|
|
67
|
+
}
|
|
68
|
+
function normalizeText(text) {
|
|
69
|
+
return text
|
|
70
|
+
.replace(/\r\n?/g, '\n')
|
|
71
|
+
.replace(/[ \t]+\n/g, '\n')
|
|
72
|
+
.replace(/\n{4,}/g, '\n\n\n')
|
|
73
|
+
.trim();
|
|
74
|
+
}
|
|
75
|
+
function pushOmission(omissions, label, text) {
|
|
76
|
+
omissions.push({ label, chars: text.length, lines: countLines(text) });
|
|
77
|
+
}
|
|
78
|
+
function collapseLargeFencedBlocks(text, policy, omissions) {
|
|
79
|
+
return text.replace(/```([^\n`]*)\n([\s\S]*?)```/g, (block, language) => {
|
|
80
|
+
if (block.length < policy.agent.largeBlockThresholdChars &&
|
|
81
|
+
countLines(block) < policy.agent.largeBlockThresholdLines) {
|
|
82
|
+
return block;
|
|
83
|
+
}
|
|
84
|
+
pushOmission(omissions, 'Code block', block);
|
|
85
|
+
const lang = language.trim();
|
|
86
|
+
return marker('Code block', block, lang ? `language=${lang}` : undefined);
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
function isBase64LikeLine(line) {
|
|
90
|
+
const compact = line.trim();
|
|
91
|
+
if (compact.length < 500 || /\s/.test(compact))
|
|
92
|
+
return false;
|
|
93
|
+
if (!/^[A-Za-z0-9+/=_-]+$/.test(compact))
|
|
94
|
+
return false;
|
|
95
|
+
const alphaNumeric = compact.replace(/[^A-Za-z0-9]/g, '').length / compact.length;
|
|
96
|
+
return alphaNumeric > 0.85;
|
|
97
|
+
}
|
|
98
|
+
function collapseBase64Lines(text, omissions) {
|
|
99
|
+
return text.split('\n').map((line) => {
|
|
100
|
+
if (!isBase64LikeLine(line))
|
|
101
|
+
return line;
|
|
102
|
+
pushOmission(omissions, 'Encoded blob', line);
|
|
103
|
+
return `[Encoded blob omitted: 1 line, ${line.length} chars]`;
|
|
104
|
+
}).join('\n');
|
|
105
|
+
}
|
|
106
|
+
function looksLikeDiffStart(line) {
|
|
107
|
+
return /^diff --git\b/.test(line) || line === '*** Begin Patch';
|
|
108
|
+
}
|
|
109
|
+
function isDiffMetadataLine(line) {
|
|
110
|
+
return /^(?:index|new file mode|deleted file mode|old mode|new mode|similarity index|dissimilarity index|rename from|rename to|copy from|copy to)\b/.test(line)
|
|
111
|
+
|| /^(?:---|\+\+\+) /.test(line)
|
|
112
|
+
|| /^Binary files .+ differ$/.test(line)
|
|
113
|
+
|| /^\*\*\* (?:Add|Update|Delete) File: /.test(line)
|
|
114
|
+
|| /^\*\*\* End of File$/.test(line);
|
|
115
|
+
}
|
|
116
|
+
function isDiffBodyLine(line) {
|
|
117
|
+
return /^@@/.test(line)
|
|
118
|
+
|| /^[ +\\-]/.test(line);
|
|
119
|
+
}
|
|
120
|
+
function collapseDiffBlocks(text, policy, omissions) {
|
|
121
|
+
const lines = text.split('\n');
|
|
122
|
+
const result = [];
|
|
123
|
+
for (let i = 0; i < lines.length; i += 1) {
|
|
124
|
+
const line = lines[i];
|
|
125
|
+
if (!looksLikeDiffStart(line)) {
|
|
126
|
+
result.push(line);
|
|
127
|
+
continue;
|
|
128
|
+
}
|
|
129
|
+
const block = [line];
|
|
130
|
+
i += 1;
|
|
131
|
+
for (; i < lines.length; i += 1) {
|
|
132
|
+
const next = lines[i];
|
|
133
|
+
if (looksLikeDiffStart(next)) {
|
|
134
|
+
i -= 1;
|
|
135
|
+
break;
|
|
136
|
+
}
|
|
137
|
+
if (next === '*** End Patch') {
|
|
138
|
+
block.push(next);
|
|
139
|
+
break;
|
|
140
|
+
}
|
|
141
|
+
if (next.trim() === '') {
|
|
142
|
+
i -= 1;
|
|
143
|
+
break;
|
|
144
|
+
}
|
|
145
|
+
if (!isDiffMetadataLine(next) && !isDiffBodyLine(next)) {
|
|
146
|
+
i -= 1;
|
|
147
|
+
break;
|
|
148
|
+
}
|
|
149
|
+
block.push(next);
|
|
150
|
+
}
|
|
151
|
+
const blockText = block.join('\n');
|
|
152
|
+
if (blockText.length < policy.agent.largeBlockThresholdChars &&
|
|
153
|
+
block.length < policy.agent.largeBlockThresholdLines) {
|
|
154
|
+
result.push(blockText);
|
|
155
|
+
continue;
|
|
156
|
+
}
|
|
157
|
+
pushOmission(omissions, 'Diff', blockText);
|
|
158
|
+
result.push(marker('Diff', blockText));
|
|
159
|
+
}
|
|
160
|
+
return result.join('\n');
|
|
161
|
+
}
|
|
162
|
+
function isLogLikeLine(line) {
|
|
163
|
+
return /^\d{4}-\d{2}-\d{2}[T\s]\d{2}:\d{2}:\d{2}/.test(line)
|
|
164
|
+
|| /^\s*(ERROR|WARN|INFO|DEBUG|TRACE)\b/.test(line)
|
|
165
|
+
|| /^\s*at\s+.+\(.+:\d+:\d+\)/.test(line)
|
|
166
|
+
|| /^\s*at\s+.+:\d+:\d+/.test(line)
|
|
167
|
+
|| /^Traceback \(most recent call last\):/.test(line)
|
|
168
|
+
|| /^[A-Za-z]*Error: .+/.test(line);
|
|
169
|
+
}
|
|
170
|
+
function isShellOutputLine(line) {
|
|
171
|
+
return /^\s*(PASS|FAIL|RUNS|Test Files|Tests|Duration|stderr|stdout)\b/.test(line)
|
|
172
|
+
|| /^>\s+[\w@/.-]+/.test(line)
|
|
173
|
+
|| /^\$\s+\S+/.test(line)
|
|
174
|
+
|| /^npm (ERR!|WARN|notice)\b/.test(line);
|
|
175
|
+
}
|
|
176
|
+
function collapseLineRuns(text, label, predicate, policy, omissions) {
|
|
177
|
+
const lines = text.split('\n');
|
|
178
|
+
const result = [];
|
|
179
|
+
for (let i = 0; i < lines.length; i += 1) {
|
|
180
|
+
const line = lines[i];
|
|
181
|
+
if (!predicate(line)) {
|
|
182
|
+
result.push(line);
|
|
183
|
+
continue;
|
|
184
|
+
}
|
|
185
|
+
const block = [line];
|
|
186
|
+
i += 1;
|
|
187
|
+
for (; i < lines.length; i += 1) {
|
|
188
|
+
const next = lines[i];
|
|
189
|
+
if (!predicate(next)) {
|
|
190
|
+
i -= 1;
|
|
191
|
+
break;
|
|
192
|
+
}
|
|
193
|
+
block.push(next);
|
|
194
|
+
}
|
|
195
|
+
const blockText = block.join('\n');
|
|
196
|
+
if (blockText.length < policy.agent.largeBlockThresholdChars &&
|
|
197
|
+
block.length < policy.agent.largeBlockThresholdLines) {
|
|
198
|
+
result.push(blockText);
|
|
199
|
+
continue;
|
|
200
|
+
}
|
|
201
|
+
pushOmission(omissions, label, blockText);
|
|
202
|
+
const firstUsefulLine = block.find((candidate) => candidate.trim().length > 0)?.trim();
|
|
203
|
+
result.push(marker(label, blockText, firstUsefulLine ? `first="${firstUsefulLine.slice(0, 120)}"` : undefined));
|
|
204
|
+
}
|
|
205
|
+
return result.join('\n');
|
|
206
|
+
}
|
|
207
|
+
function isMarkdownTableLine(line) {
|
|
208
|
+
const trimmed = line.trim();
|
|
209
|
+
return trimmed.startsWith('|') && trimmed.endsWith('|') && trimmed.split('|').length >= 4;
|
|
210
|
+
}
|
|
211
|
+
function isMarkdownTableSeparator(line) {
|
|
212
|
+
return /^\s*\|?(?:\s*:?-{3,}:?\s*\|)+\s*:?-{3,}:?\s*\|?\s*$/.test(line);
|
|
213
|
+
}
|
|
214
|
+
function truncateMarkdownTables(text, policy, omissions) {
|
|
215
|
+
const lines = text.split('\n');
|
|
216
|
+
const result = [];
|
|
217
|
+
for (let i = 0; i < lines.length; i += 1) {
|
|
218
|
+
if (!isMarkdownTableLine(lines[i]) || !lines[i + 1] || !isMarkdownTableSeparator(lines[i + 1])) {
|
|
219
|
+
result.push(lines[i]);
|
|
220
|
+
continue;
|
|
221
|
+
}
|
|
222
|
+
const table = [lines[i], lines[i + 1]];
|
|
223
|
+
i += 2;
|
|
224
|
+
for (; i < lines.length && isMarkdownTableLine(lines[i]); i += 1) {
|
|
225
|
+
table.push(lines[i]);
|
|
226
|
+
}
|
|
227
|
+
i -= 1;
|
|
228
|
+
if (table.length <= policy.agent.maxTableRows + 2) {
|
|
229
|
+
result.push(...table);
|
|
230
|
+
continue;
|
|
231
|
+
}
|
|
232
|
+
const omitted = table.slice(policy.agent.maxTableRows + 2).join('\n');
|
|
233
|
+
pushOmission(omissions, 'Table rows', omitted);
|
|
234
|
+
result.push(...table.slice(0, policy.agent.maxTableRows + 2));
|
|
235
|
+
result.push(`[Table truncated: ${table.length - policy.agent.maxTableRows - 2} more rows]`);
|
|
236
|
+
}
|
|
237
|
+
return result.join('\n');
|
|
238
|
+
}
|
|
239
|
+
function maybeCollapseWholeBlob(text, omissions) {
|
|
240
|
+
const trimmed = text.trim();
|
|
241
|
+
if (trimmed.length < 2000)
|
|
242
|
+
return text;
|
|
243
|
+
try {
|
|
244
|
+
const parsed = JSON.parse(trimmed);
|
|
245
|
+
pushOmission(omissions, 'JSON blob', text);
|
|
246
|
+
if (typeof parsed === 'object' && parsed !== null && !Array.isArray(parsed)) {
|
|
247
|
+
const keys = Object.keys(parsed).slice(0, 12).join(',');
|
|
248
|
+
return `[JSON blob omitted: ${countLines(text)} lines, ${text.length} chars${keys ? `, keys=${keys}` : ''}]`;
|
|
249
|
+
}
|
|
250
|
+
return marker('JSON blob', text);
|
|
251
|
+
}
|
|
252
|
+
catch {
|
|
253
|
+
// Continue with XML-ish shape detection below.
|
|
254
|
+
}
|
|
255
|
+
const angleRatio = (trimmed.match(/[<>/]/g)?.length ?? 0) / trimmed.length;
|
|
256
|
+
const lineCount = countLines(trimmed);
|
|
257
|
+
if (lineCount >= 20 &&
|
|
258
|
+
angleRatio > 0.08 &&
|
|
259
|
+
/^<\??[A-Za-z!]/.test(trimmed) &&
|
|
260
|
+
/<\/[A-Za-z][^>]*>/.test(trimmed)) {
|
|
261
|
+
pushOmission(omissions, 'XML blob', text);
|
|
262
|
+
return marker('XML blob', text);
|
|
263
|
+
}
|
|
264
|
+
return text;
|
|
265
|
+
}
|
|
266
|
+
function fitToBudget(text, budget) {
|
|
267
|
+
if (text.length <= budget) {
|
|
268
|
+
return { text, truncated: false };
|
|
269
|
+
}
|
|
270
|
+
const markerText = `\n\n[Content truncated: original ${text.length} chars, kept ${budget} chars]\n\n`;
|
|
271
|
+
const available = Math.max(0, budget - markerText.length);
|
|
272
|
+
const headLength = Math.ceil(available * 0.6);
|
|
273
|
+
const tailLength = Math.max(0, available - headLength);
|
|
274
|
+
return {
|
|
275
|
+
text: `${text.slice(0, headLength).trimEnd()}${markerText}${text.slice(text.length - tailLength).trimStart()}`.trim(),
|
|
276
|
+
truncated: true,
|
|
277
|
+
};
|
|
278
|
+
}
|
|
279
|
+
export function filterAssistantContent(text, policy) {
|
|
280
|
+
const omissions = [];
|
|
281
|
+
let filtered = normalizeText(text);
|
|
282
|
+
if (policy.agent.mode === 'bounded') {
|
|
283
|
+
filtered = collapseLargeFencedBlocks(filtered, policy, omissions);
|
|
284
|
+
filtered = collapseDiffBlocks(filtered, policy, omissions);
|
|
285
|
+
filtered = collapseLineRuns(filtered, 'Log output', isLogLikeLine, policy, omissions);
|
|
286
|
+
filtered = collapseLineRuns(filtered, 'Command output', isShellOutputLine, policy, omissions);
|
|
287
|
+
filtered = truncateMarkdownTables(filtered, policy, omissions);
|
|
288
|
+
filtered = collapseBase64Lines(filtered, omissions);
|
|
289
|
+
filtered = maybeCollapseWholeBlob(filtered, omissions);
|
|
290
|
+
}
|
|
291
|
+
const budgeted = fitToBudget(filtered, policy.agent.maxCharsAfterFiltering);
|
|
292
|
+
return {
|
|
293
|
+
text: budgeted.text,
|
|
294
|
+
omissions,
|
|
295
|
+
truncated: budgeted.truncated,
|
|
296
|
+
};
|
|
297
|
+
}
|
|
298
|
+
export function chunkText(text, maxChunkChars) {
|
|
299
|
+
const normalized = normalizeText(text);
|
|
300
|
+
if (!normalized)
|
|
301
|
+
return [];
|
|
302
|
+
const chunks = [];
|
|
303
|
+
let current = '';
|
|
304
|
+
const flush = () => {
|
|
305
|
+
if (!current.trim())
|
|
306
|
+
return;
|
|
307
|
+
chunks.push(current.trim());
|
|
308
|
+
current = '';
|
|
309
|
+
};
|
|
310
|
+
const appendUnit = (unit) => {
|
|
311
|
+
const separator = current ? '\n\n' : '';
|
|
312
|
+
if (current.length + separator.length + unit.length <= maxChunkChars) {
|
|
313
|
+
current = `${current}${separator}${unit}`;
|
|
314
|
+
return;
|
|
315
|
+
}
|
|
316
|
+
flush();
|
|
317
|
+
if (unit.length <= maxChunkChars) {
|
|
318
|
+
current = unit;
|
|
319
|
+
return;
|
|
320
|
+
}
|
|
321
|
+
for (let start = 0; start < unit.length; start += maxChunkChars) {
|
|
322
|
+
chunks.push(unit.slice(start, start + maxChunkChars).trim());
|
|
323
|
+
}
|
|
324
|
+
};
|
|
325
|
+
for (const paragraph of normalized.split(/\n{2,}/)) {
|
|
326
|
+
if (paragraph.length <= maxChunkChars) {
|
|
327
|
+
appendUnit(paragraph);
|
|
328
|
+
continue;
|
|
329
|
+
}
|
|
330
|
+
for (const line of paragraph.split('\n')) {
|
|
331
|
+
appendUnit(line);
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
flush();
|
|
335
|
+
return chunks.filter((chunk) => chunk.length > 0);
|
|
336
|
+
}
|
|
337
|
+
export function prepareMessageForIngest(input) {
|
|
338
|
+
const original = normalizeText(input.text);
|
|
339
|
+
const omissions = [];
|
|
340
|
+
let prepared = original;
|
|
341
|
+
let truncated = false;
|
|
342
|
+
if (input.role === 'assistant') {
|
|
343
|
+
const messageBudget = input.remainingAgentChars;
|
|
344
|
+
if (messageBudget <= 0 || input.remainingChunks <= 0) {
|
|
345
|
+
return {
|
|
346
|
+
chunks: [],
|
|
347
|
+
originalChars: original.length,
|
|
348
|
+
preparedChars: 0,
|
|
349
|
+
truncated: true,
|
|
350
|
+
omissions: [],
|
|
351
|
+
};
|
|
352
|
+
}
|
|
353
|
+
const preBudgeted = fitToBudget(prepared, input.policy.agent.maxCharsPerMessage);
|
|
354
|
+
prepared = preBudgeted.text;
|
|
355
|
+
truncated = preBudgeted.truncated;
|
|
356
|
+
const filtered = filterAssistantContent(prepared, input.policy);
|
|
357
|
+
prepared = filtered.text;
|
|
358
|
+
omissions.push(...filtered.omissions);
|
|
359
|
+
truncated = truncated || filtered.truncated || filtered.omissions.length > 0;
|
|
360
|
+
const budgeted = fitToBudget(prepared, messageBudget);
|
|
361
|
+
prepared = budgeted.text;
|
|
362
|
+
truncated = truncated || budgeted.truncated;
|
|
363
|
+
}
|
|
364
|
+
else if (input.role === 'user') {
|
|
365
|
+
const budgeted = fitToBudget(prepared, input.policy.user.maxCharsPerMessage);
|
|
366
|
+
prepared = budgeted.text;
|
|
367
|
+
truncated = budgeted.truncated;
|
|
368
|
+
}
|
|
369
|
+
const chunks = chunkText(prepared, input.policy.maxChunkChars).slice(0, input.remainingChunks);
|
|
370
|
+
if (chunks.join('\n\n').length < prepared.length) {
|
|
371
|
+
truncated = true;
|
|
372
|
+
}
|
|
373
|
+
return {
|
|
374
|
+
chunks,
|
|
375
|
+
originalChars: original.length,
|
|
376
|
+
preparedChars: chunks.reduce((sum, chunk) => sum + chunk.length, 0),
|
|
377
|
+
truncated,
|
|
378
|
+
omissions,
|
|
379
|
+
};
|
|
380
|
+
}
|