metainsight-context-engine 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/BOOTSTRAP.md +341 -0
- package/README.md +230 -0
- package/dist/cos-bootstrap.d.ts +221 -0
- package/dist/cos-bootstrap.d.ts.map +1 -0
- package/dist/cos-bootstrap.js +598 -0
- package/dist/cos-bootstrap.js.map +1 -0
- package/dist/cos-operations.d.ts +219 -0
- package/dist/cos-operations.d.ts.map +1 -0
- package/dist/cos-operations.js +583 -0
- package/dist/cos-operations.js.map +1 -0
- package/dist/engine.d.ts +101 -0
- package/dist/engine.d.ts.map +1 -0
- package/dist/engine.js +157 -0
- package/dist/engine.js.map +1 -0
- package/dist/index.d.ts +42 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +993 -0
- package/dist/index.js.map +1 -0
- package/dist/local-memory-sync.d.ts +204 -0
- package/dist/local-memory-sync.d.ts.map +1 -0
- package/dist/local-memory-sync.js +1126 -0
- package/dist/local-memory-sync.js.map +1 -0
- package/openclaw.plugin.json +225 -0
- package/package.json +78 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,993 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MetaInsight Context Engine — Plugin Entry Point
|
|
3
|
+
*
|
|
4
|
+
* Registers:
|
|
5
|
+
* 1. A ContextEngine implementation (replaces "legacy" engine)
|
|
6
|
+
* 2. `cloud_memory_search` tool — manual memory search from cloud
|
|
7
|
+
* 3. `llm_input` hook — cache the full system prompt per session
|
|
8
|
+
* 4. `before_prompt_build` hook — cloud memory injection (layer-based)
|
|
9
|
+
* + image/document context injection (prependContext, near user message)
|
|
10
|
+
*
|
|
11
|
+
* On startup, the plugin runs a bootstrap sequence that ensures all COS/CI
|
|
12
|
+
* infrastructure is ready (bucket exists, dataset created, binding established).
|
|
13
|
+
*
|
|
14
|
+
* Configuration (in ~/.openclaw/openclaw.json):
|
|
15
|
+
* {
|
|
16
|
+
* "plugins": {
|
|
17
|
+
* "slots": { "contextEngine": "metainsight-context-engine" },
|
|
18
|
+
* "entries": {
|
|
19
|
+
* "metainsight-context-engine": {
|
|
20
|
+
* "enabled": true,
|
|
21
|
+
* "config": {
|
|
22
|
+
* "secretId": "${COS_SECRET_ID}",
|
|
23
|
+
* "secretKey": "${COS_SECRET_KEY}",
|
|
24
|
+
* "bucket": "openclaw-metainsight",
|
|
25
|
+
* "region": "ap-beijing",
|
|
26
|
+
* "datasetName": "openclaw-metainsight-doc"
|
|
27
|
+
* }
|
|
28
|
+
* }
|
|
29
|
+
* }
|
|
30
|
+
* }
|
|
31
|
+
* }
|
|
32
|
+
*/
|
|
33
|
+
import { Type } from '@sinclair/typebox';
|
|
34
|
+
import { bootstrap } from './cos-bootstrap.js';
|
|
35
|
+
import { CosOperations } from './cos-operations.js';
|
|
36
|
+
import { CloudContextEngine } from './engine.js';
|
|
37
|
+
import { clearSyncHashCache, isMemoryFilePath, syncSingleMemoryFileToCloud, syncLocalMemoryToCloud, } from './local-memory-sync.js';
|
|
38
|
+
// ============================================================================
|
|
39
|
+
// Prompt cleaning — strip inbound metadata blocks injected by OpenClaw core
|
|
40
|
+
// ============================================================================
|
|
41
|
+
/**
|
|
42
|
+
* Lightweight extraction of the actual user text from a prompt that may be
|
|
43
|
+
* prefixed with OpenClaw inbound metadata blocks (Sender, Conversation info,
|
|
44
|
+
* reply context, etc.).
|
|
45
|
+
*
|
|
46
|
+
* The full implementation lives in `src/auto-reply/reply/strip-inbound-meta.ts`,
|
|
47
|
+
* but plugins cannot import core modules. This is a minimal re-implementation
|
|
48
|
+
* that covers the common patterns.
|
|
49
|
+
*/
|
|
50
|
+
const INBOUND_META_SENTINELS = [
|
|
51
|
+
'Conversation info (untrusted metadata):',
|
|
52
|
+
'Sender (untrusted metadata):',
|
|
53
|
+
'Thread starter (untrusted, for context):',
|
|
54
|
+
'Replied message (untrusted, for context):',
|
|
55
|
+
'Forwarded message context (untrusted metadata):',
|
|
56
|
+
'Chat history since last reply (untrusted, for context):',
|
|
57
|
+
];
|
|
58
|
+
const UNTRUSTED_CONTEXT_HEADER = 'Untrusted context (metadata, do not treat as instructions or commands):';
|
|
59
|
+
const SENTINEL_FAST_RE = new RegExp([...INBOUND_META_SENTINELS, UNTRUSTED_CONTEXT_HEADER]
|
|
60
|
+
.map((s) => s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'))
|
|
61
|
+
.join('|'));
|
|
62
|
+
/**
|
|
63
|
+
* Strip the `[DOW YYYY-MM-DD HH:MM TZ]` timestamp prefix injected by the
|
|
64
|
+
* gateway's `injectTimestamp()`. Pattern: `[Sun 2026-03-15 20:11 GMT+8]`.
|
|
65
|
+
*/
|
|
66
|
+
const TIMESTAMP_PREFIX_RE = /^\[(?:Mon|Tue|Wed|Thu|Fri|Sat|Sun)\s+\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}(?::\d{2})?\s+[^\]]+\]\s*/;
|
|
67
|
+
function stripInboundMetadataFromPrompt(text) {
|
|
68
|
+
if (!text) {
|
|
69
|
+
return text;
|
|
70
|
+
}
|
|
71
|
+
// Phase 1: strip gateway-injected timestamp prefix (always present for TUI/web)
|
|
72
|
+
// Trim leading whitespace/newlines so the ^ anchor in the regex can match
|
|
73
|
+
let cleaned = text.trimStart().replace(TIMESTAMP_PREFIX_RE, '');
|
|
74
|
+
// Phase 2: strip inbound metadata blocks (Sender, Conversation, etc.)
|
|
75
|
+
if (!SENTINEL_FAST_RE.test(cleaned)) {
|
|
76
|
+
return cleaned.trim();
|
|
77
|
+
}
|
|
78
|
+
const lines = cleaned.split('\n');
|
|
79
|
+
const result = [];
|
|
80
|
+
let inMetaBlock = false;
|
|
81
|
+
let inFencedJson = false;
|
|
82
|
+
for (let i = 0; i < lines.length; i++) {
|
|
83
|
+
const line = lines[i];
|
|
84
|
+
// Drop trailing untrusted context blocks
|
|
85
|
+
if (!inMetaBlock
|
|
86
|
+
&& line?.trim() === UNTRUSTED_CONTEXT_HEADER) {
|
|
87
|
+
break;
|
|
88
|
+
}
|
|
89
|
+
// Detect start of a metadata block
|
|
90
|
+
if (!inMetaBlock && INBOUND_META_SENTINELS.some((s) => line?.trim() === s)) {
|
|
91
|
+
const next = lines[i + 1];
|
|
92
|
+
if (next?.trim() !== '```json') {
|
|
93
|
+
result.push(line);
|
|
94
|
+
continue;
|
|
95
|
+
}
|
|
96
|
+
inMetaBlock = true;
|
|
97
|
+
inFencedJson = false;
|
|
98
|
+
continue;
|
|
99
|
+
}
|
|
100
|
+
if (inMetaBlock) {
|
|
101
|
+
if (!inFencedJson && line?.trim() === '```json') {
|
|
102
|
+
inFencedJson = true;
|
|
103
|
+
continue;
|
|
104
|
+
}
|
|
105
|
+
if (inFencedJson) {
|
|
106
|
+
if (line?.trim() === '```') {
|
|
107
|
+
inMetaBlock = false;
|
|
108
|
+
inFencedJson = false;
|
|
109
|
+
}
|
|
110
|
+
continue;
|
|
111
|
+
}
|
|
112
|
+
if (line?.trim() === '') {
|
|
113
|
+
continue;
|
|
114
|
+
}
|
|
115
|
+
inMetaBlock = false;
|
|
116
|
+
}
|
|
117
|
+
result.push(line);
|
|
118
|
+
}
|
|
119
|
+
// Phase 3: re-strip timestamp prefix that may now be at the start
|
|
120
|
+
// after metadata blocks were removed (e.g. Sender block preceded the timestamp)
|
|
121
|
+
const joined = result.join('\n').replace(/^\n+/, '').replace(/\n+$/, '');
|
|
122
|
+
return joined.replace(TIMESTAMP_PREFIX_RE, '').trim();
|
|
123
|
+
}
|
|
124
|
+
/**
|
|
125
|
+
* Stringify an error value properly for logging. Handles Error instances,
|
|
126
|
+
* objects (JSON), and plain strings.
|
|
127
|
+
*/
|
|
128
|
+
function stringifyError(err) {
|
|
129
|
+
if (err instanceof Error) {
|
|
130
|
+
return err.stack ?? err.message;
|
|
131
|
+
}
|
|
132
|
+
if (typeof err === 'string') {
|
|
133
|
+
return err;
|
|
134
|
+
}
|
|
135
|
+
try {
|
|
136
|
+
return JSON.stringify(err, null, 2);
|
|
137
|
+
}
|
|
138
|
+
catch {
|
|
139
|
+
return String(err);
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
// ============================================================================
|
|
143
|
+
// System prompt cache — populated by llm_input, consumed by before_prompt_build
|
|
144
|
+
// ============================================================================
|
|
145
|
+
/**
|
|
146
|
+
* Per-session cache for the system prompt observed via the `llm_input` hook.
|
|
147
|
+
*
|
|
148
|
+
* Because `before_prompt_build` fires *before* `llm_input`, the first turn of
|
|
149
|
+
* a brand-new session will have no cached value. In that case we fall back to
|
|
150
|
+
* pure `prependSystemContext`/`appendSystemContext` (append-only mode).
|
|
151
|
+
*
|
|
152
|
+
* From the second turn onward the cache is populated, so `before_prompt_build`
|
|
153
|
+
* can return a full `systemPrompt` override that precisely replaces sections
|
|
154
|
+
* of the original prompt (e.g. swap the `## Memory Recall` block).
|
|
155
|
+
*/
|
|
156
|
+
const systemPromptCache = new Map();
|
|
157
|
+
/**
|
|
158
|
+
* Parse a system prompt into layers by splitting on `#`/`##`/`###` headings.
|
|
159
|
+
*
|
|
160
|
+
* Each heading starts a new layer. Content before the first heading becomes
|
|
161
|
+
* the `(preamble)` layer. Headings inside fenced code blocks are ignored.
|
|
162
|
+
*/
|
|
163
|
+
function parseSystemPromptLayers(systemPrompt) {
|
|
164
|
+
if (!systemPrompt) {
|
|
165
|
+
return [];
|
|
166
|
+
}
|
|
167
|
+
const lines = systemPrompt.split('\n');
|
|
168
|
+
const layers = [];
|
|
169
|
+
let currentLines = [];
|
|
170
|
+
let currentName = '(preamble)';
|
|
171
|
+
let currentLevel = 0;
|
|
172
|
+
let inCodeBlock = false;
|
|
173
|
+
const flushLayer = () => {
|
|
174
|
+
const content = currentLines.join('\n');
|
|
175
|
+
// Only add non-empty layers (skip empty preambles)
|
|
176
|
+
if (content.trim().length > 0) {
|
|
177
|
+
layers.push({
|
|
178
|
+
name: currentName,
|
|
179
|
+
level: currentLevel,
|
|
180
|
+
content,
|
|
181
|
+
chars: content.length,
|
|
182
|
+
});
|
|
183
|
+
}
|
|
184
|
+
};
|
|
185
|
+
for (const line of lines) {
|
|
186
|
+
// Track fenced code blocks to avoid false heading matches
|
|
187
|
+
if (line.trimStart().startsWith('```')) {
|
|
188
|
+
inCodeBlock = !inCodeBlock;
|
|
189
|
+
currentLines.push(line);
|
|
190
|
+
continue;
|
|
191
|
+
}
|
|
192
|
+
if (inCodeBlock) {
|
|
193
|
+
currentLines.push(line);
|
|
194
|
+
continue;
|
|
195
|
+
}
|
|
196
|
+
// Match heading lines: # Title, ## Title, ### Title
|
|
197
|
+
const headingMatch = line.match(/^(#{1,3})\s+(.+?)\s*$/);
|
|
198
|
+
if (headingMatch) {
|
|
199
|
+
// Flush the previous layer
|
|
200
|
+
flushLayer();
|
|
201
|
+
// Start a new layer
|
|
202
|
+
currentLevel = headingMatch[1].length;
|
|
203
|
+
currentName = headingMatch[2];
|
|
204
|
+
currentLines = [line];
|
|
205
|
+
}
|
|
206
|
+
else {
|
|
207
|
+
currentLines.push(line);
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
// Flush the last layer
|
|
211
|
+
flushLayer();
|
|
212
|
+
return layers;
|
|
213
|
+
}
|
|
214
|
+
/**
|
|
215
|
+
* Reassemble layers back into a single system prompt string.
|
|
216
|
+
*/
|
|
217
|
+
function assembleSystemPromptFromLayers(layers) {
|
|
218
|
+
return layers.map((l) => l.content).join('\n');
|
|
219
|
+
}
|
|
220
|
+
/**
|
|
221
|
+
* Find a layer by name (case-insensitive partial match).
|
|
222
|
+
*/
|
|
223
|
+
function findLayer(layers, namePattern) {
|
|
224
|
+
const lowerPattern = namePattern.toLowerCase();
|
|
225
|
+
return layers.find((l) => l.name.toLowerCase().includes(lowerPattern));
|
|
226
|
+
}
|
|
227
|
+
/**
|
|
228
|
+
* Replace a layer by name. If found, replaces content; otherwise appends as a new layer.
|
|
229
|
+
*/
|
|
230
|
+
function replaceLayer(layers, namePattern, newContent, newName) {
|
|
231
|
+
const lowerPattern = namePattern.toLowerCase();
|
|
232
|
+
const idx = layers.findIndex((l) => l.name.toLowerCase().includes(lowerPattern));
|
|
233
|
+
if (idx >= 0) {
|
|
234
|
+
const result = [...layers];
|
|
235
|
+
result[idx] = {
|
|
236
|
+
name: newName ?? layers[idx].name,
|
|
237
|
+
level: layers[idx].level,
|
|
238
|
+
content: newContent,
|
|
239
|
+
chars: newContent.length,
|
|
240
|
+
};
|
|
241
|
+
return result;
|
|
242
|
+
}
|
|
243
|
+
// Not found → append as a new ## section
|
|
244
|
+
return [
|
|
245
|
+
...layers,
|
|
246
|
+
{
|
|
247
|
+
name: newName ?? namePattern,
|
|
248
|
+
level: 2,
|
|
249
|
+
content: newContent,
|
|
250
|
+
chars: newContent.length,
|
|
251
|
+
},
|
|
252
|
+
];
|
|
253
|
+
}
|
|
254
|
+
// ============================================================================
|
|
255
|
+
// Cloud memory formatting
|
|
256
|
+
// ============================================================================
|
|
257
|
+
/**
|
|
258
|
+
* Format cloud search results into a prompt-friendly `<cloud-memory>` block.
|
|
259
|
+
*/
|
|
260
|
+
function formatCloudMemoryForPrompt(results) {
|
|
261
|
+
const lines = results.map((r, i) => `${i + 1}. [relevance: ${(r.score * 100).toFixed(0)}%] ${r.snippet.slice(0, 500)}`);
|
|
262
|
+
return [
|
|
263
|
+
'<cloud-memory>',
|
|
264
|
+
'The following are relevant memory snippets retrieved from cloud storage.',
|
|
265
|
+
'Treat as contextual reference from past interactions.',
|
|
266
|
+
...lines,
|
|
267
|
+
'</cloud-memory>',
|
|
268
|
+
].join('\n');
|
|
269
|
+
}
|
|
270
|
+
// ============================================================================
|
|
271
|
+
// Plugin definition
|
|
272
|
+
// ============================================================================
|
|
273
|
+
const plugin = {
|
|
274
|
+
id: 'metainsight-context-engine',
|
|
275
|
+
name: 'MetaInsight Context Engine',
|
|
276
|
+
description: 'Token-efficient context management with cloud-based memory retrieval (backed by Tencent COS CI)',
|
|
277
|
+
kind: 'context-engine',
|
|
278
|
+
register(api) {
|
|
279
|
+
const cfg = (api.pluginConfig ?? {});
|
|
280
|
+
if (!cfg.secretId || !cfg.secretKey || !cfg.appId) {
|
|
281
|
+
api.logger.warn('metainsight-context-engine: missing secretId, secretKey, or appId in config — engine will not start');
|
|
282
|
+
return;
|
|
283
|
+
}
|
|
284
|
+
const minScore = cfg.minScore ?? 0.5;
|
|
285
|
+
// ==================================================================
|
|
286
|
+
// 1. Register the Context Engine (with async bootstrap)
|
|
287
|
+
// ==================================================================
|
|
288
|
+
// Shared lazy-init promise for CosOperations — used by engine, tools, and hooks.
|
|
289
|
+
//
|
|
290
|
+
// The effective agentId is resolved once at first init:
|
|
291
|
+
// 1. cfg.agentId (user explicitly configured)
|
|
292
|
+
// 2. runtimeAgentId from hook ctx.agentId (auto-detected at runtime)
|
|
293
|
+
// 3. 'main' (fallback — matches the default ctx for single-agent setups)
|
|
294
|
+
let opsPromise = null;
|
|
295
|
+
let resolvedAgentId;
|
|
296
|
+
const initOps = async (runtimeAgentId) => {
|
|
297
|
+
if (!opsPromise) {
|
|
298
|
+
// Resolve agentId once and lock it for the lifetime of this plugin instance.
|
|
299
|
+
// Priority: explicit config > hook ctx > fallback 'main'
|
|
300
|
+
resolvedAgentId = cfg.agentId?.trim() || runtimeAgentId?.trim() || 'main';
|
|
301
|
+
api.logger.info(`metainsight-context-engine: resolved agentId="${resolvedAgentId}" `
|
|
302
|
+
+ `(cfg=${cfg.agentId ?? '(empty)'}, ctx=${runtimeAgentId ?? '(empty)'}, fallback=main)`);
|
|
303
|
+
opsPromise = (async () => {
|
|
304
|
+
api.logger.info('metainsight-context-engine: running COS bootstrap...');
|
|
305
|
+
const outcome = await bootstrap({
|
|
306
|
+
secretId: cfg.secretId,
|
|
307
|
+
secretKey: cfg.secretKey,
|
|
308
|
+
appId: cfg.appId,
|
|
309
|
+
agentId: resolvedAgentId,
|
|
310
|
+
bucket: cfg.bucket,
|
|
311
|
+
region: cfg.region,
|
|
312
|
+
datasets: cfg.datasetName
|
|
313
|
+
? [{ name: cfg.datasetName, cosPrefix: 'memory/', templateId: cfg.templateId }]
|
|
314
|
+
: undefined,
|
|
315
|
+
}, api.logger);
|
|
316
|
+
if (!outcome.success) {
|
|
317
|
+
// Reset so the next caller can retry instead of getting a stale rejection
|
|
318
|
+
opsPromise = null;
|
|
319
|
+
resolvedAgentId = undefined;
|
|
320
|
+
throw new Error(`COS bootstrap failed: ${JSON.stringify(outcome.error)}`);
|
|
321
|
+
}
|
|
322
|
+
const ds = outcome.config.datasets.map((d) => d.name).join(', ');
|
|
323
|
+
api.logger.info(`metainsight-context-engine: bootstrap complete `
|
|
324
|
+
+ `(agentId=${resolvedAgentId}, bucket=${outcome.config.bucket}, datasets=[${ds}])`);
|
|
325
|
+
return new CosOperations(outcome, {
|
|
326
|
+
template: cfg.searchTemplate,
|
|
327
|
+
matchThreshold: cfg.matchThreshold,
|
|
328
|
+
});
|
|
329
|
+
})();
|
|
330
|
+
}
|
|
331
|
+
return opsPromise;
|
|
332
|
+
};
|
|
333
|
+
api.registerContextEngine('metainsight-context-engine', () => {
|
|
334
|
+
const engine = new CloudContextEngine(initOps, {
|
|
335
|
+
localMemorySyncEnabled: cfg.localMemorySync !== false,
|
|
336
|
+
localMemorySync: {
|
|
337
|
+
enabled: cfg.localMemorySync !== false,
|
|
338
|
+
syncLongTermMemory: cfg.syncLongTermMemory !== false,
|
|
339
|
+
syncDailyLogs: cfg.syncDailyLogs !== false,
|
|
340
|
+
syncConfig: false,
|
|
341
|
+
},
|
|
342
|
+
}, api.logger);
|
|
343
|
+
return engine;
|
|
344
|
+
});
|
|
345
|
+
// ==================================================================
|
|
346
|
+
// 1b. On-boot sync: memory (under localMemorySync umbrella)
|
|
347
|
+
// ==================================================================
|
|
348
|
+
//
|
|
349
|
+
// When localMemorySync is enabled, run memory sync at plugin
|
|
350
|
+
// registration time (gateway startup), **before** any session is
|
|
351
|
+
// created. Previously, memory sync only ran inside engine.bootstrap()
|
|
352
|
+
// which fires on the first session — causing memory to lag behind.
|
|
353
|
+
//
|
|
354
|
+
// Memory sync:
|
|
355
|
+
// 1. Discover MEMORY.md, daily logs, workspace files, and config
|
|
356
|
+
// 2. Upload changed files to cloud (hash-based dedup)
|
|
357
|
+
// Build the allowed-extension set from user config or defaults.
|
|
358
|
+
// This is shared between the on-boot sync and the after_tool_call hook.
|
|
359
|
+
const syncFileExts = cfg.syncFileExtensions
|
|
360
|
+
? new Set(cfg.syncFileExtensions.map((e) => e.toLowerCase()))
|
|
361
|
+
: undefined; // undefined → use DEFAULT_SYNC_FILE_EXTENSIONS in local-memory-sync.ts
|
|
362
|
+
if (cfg.localMemorySync !== false) {
|
|
363
|
+
// Fire-and-forget: full sync on boot (with retry for transient COS failures)
|
|
364
|
+
const MAX_BOOT_RETRIES = 2;
|
|
365
|
+
const BOOT_RETRY_DELAY_MS = 3000;
|
|
366
|
+
void (async () => {
|
|
367
|
+
// Clear the on-disk hash cache on every fresh boot so that all
|
|
368
|
+
// local memory files are re-evaluated and re-uploaded if needed.
|
|
369
|
+
// This prevents stale cache entries from suppressing legitimate syncs
|
|
370
|
+
// after gateway restarts or config changes.
|
|
371
|
+
try {
|
|
372
|
+
await clearSyncHashCache();
|
|
373
|
+
api.logger.info('cloud-engine: cleared sync hash cache on boot');
|
|
374
|
+
}
|
|
375
|
+
catch (err) {
|
|
376
|
+
api.logger.warn(`cloud-engine: failed to clear sync hash cache: ${stringifyError(err)}`);
|
|
377
|
+
}
|
|
378
|
+
let lastErr;
|
|
379
|
+
for (let attempt = 0; attempt <= MAX_BOOT_RETRIES; attempt += 1) {
|
|
380
|
+
try {
|
|
381
|
+
// On boot there's no hook ctx yet, so pass cfg.agentId directly.
|
|
382
|
+
// If cfg.agentId is empty, initOps will fallback to 'main'.
|
|
383
|
+
const ops = await initOps(cfg.agentId);
|
|
384
|
+
api.logger.info('cloud-engine: starting memory sync on boot...');
|
|
385
|
+
// Use a synthetic session file path to resolve the workspace directory.
|
|
386
|
+
// The resolveWorkspaceDir helper checks ~/.openclaw/workspace/ first,
|
|
387
|
+
// so this will work even without a real session file.
|
|
388
|
+
const homeDir = process.env.HOME ?? process.env.USERPROFILE ?? '';
|
|
389
|
+
const stateDir = process.env.OPENCLAW_STATE_DIR?.trim()
|
|
390
|
+
|| (homeDir ? `${homeDir}/.openclaw` : '');
|
|
391
|
+
const syntheticSessionFile = stateDir
|
|
392
|
+
? `${stateDir}/sessions/__boot__`
|
|
393
|
+
: '__boot__';
|
|
394
|
+
const memResult = await syncLocalMemoryToCloud(ops, syntheticSessionFile, {
|
|
395
|
+
enabled: true,
|
|
396
|
+
syncLongTermMemory: cfg.syncLongTermMemory !== false,
|
|
397
|
+
syncDailyLogs: cfg.syncDailyLogs !== false,
|
|
398
|
+
syncConfig: false,
|
|
399
|
+
}, api.logger, syncFileExts);
|
|
400
|
+
api.logger.info(`cloud-engine: memory sync on boot complete — `
|
|
401
|
+
+ `uploaded=${memResult.uploaded}, skipped=${memResult.skipped}, failed=${memResult.failed}`);
|
|
402
|
+
return; // success — exit retry loop
|
|
403
|
+
}
|
|
404
|
+
catch (err) {
|
|
405
|
+
lastErr = err;
|
|
406
|
+
if (attempt < MAX_BOOT_RETRIES) {
|
|
407
|
+
const delay = BOOT_RETRY_DELAY_MS * (attempt + 1);
|
|
408
|
+
api.logger.warn(`cloud-engine: on-boot sync attempt ${attempt + 1} failed, `
|
|
409
|
+
+ `retrying in ${delay}ms: ${stringifyError(err)}`);
|
|
410
|
+
await new Promise((r) => setTimeout(r, delay));
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
api.logger.warn(`cloud-engine: on-boot sync failed after ${MAX_BOOT_RETRIES + 1} attempts: `
|
|
415
|
+
+ stringifyError(lastErr));
|
|
416
|
+
})();
|
|
417
|
+
}
|
|
418
|
+
// ==================================================================
|
|
419
|
+
// 2. Tool: cloud_memory_search — manual memory search
|
|
420
|
+
// ==================================================================
|
|
421
|
+
api.registerTool({
|
|
422
|
+
name: 'cloud_memory_search',
|
|
423
|
+
label: 'Search Cloud Memory',
|
|
424
|
+
description: 'Search cloud-stored memories and conversation history. ' +
|
|
425
|
+
'Use when you need context from past interactions, decisions, ' +
|
|
426
|
+
'or uploaded documents that may not be in the current conversation.',
|
|
427
|
+
parameters: Type.Object({
|
|
428
|
+
query: Type.String({ description: 'Search query describing what you need' }),
|
|
429
|
+
limit: Type.Optional(Type.Number({ description: 'Max results to return (default: 5)' })),
|
|
430
|
+
}),
|
|
431
|
+
async execute(_toolCallId, params) {
|
|
432
|
+
const { query, limit } = params;
|
|
433
|
+
try {
|
|
434
|
+
const ops = await initOps();
|
|
435
|
+
const results = await ops.search(query, {
|
|
436
|
+
category: 'memory',
|
|
437
|
+
maxResults: limit ?? 5,
|
|
438
|
+
minScore,
|
|
439
|
+
});
|
|
440
|
+
if (results.length === 0) {
|
|
441
|
+
return {
|
|
442
|
+
content: [{ type: 'text', text: 'No relevant memories found.' }],
|
|
443
|
+
details: { count: 0 },
|
|
444
|
+
};
|
|
445
|
+
}
|
|
446
|
+
const text = results
|
|
447
|
+
.map((r, i) => `${i + 1}. [${(r.score * 100).toFixed(0)}%] ${r.snippet}`)
|
|
448
|
+
.join('\n\n');
|
|
449
|
+
return {
|
|
450
|
+
content: [{
|
|
451
|
+
type: 'text',
|
|
452
|
+
text: `Found ${results.length} relevant memories:\n\n${text}`,
|
|
453
|
+
}],
|
|
454
|
+
details: { count: results.length },
|
|
455
|
+
};
|
|
456
|
+
}
|
|
457
|
+
catch (err) {
|
|
458
|
+
return {
|
|
459
|
+
content: [{
|
|
460
|
+
type: 'text',
|
|
461
|
+
text: `Cloud memory search failed: ${stringifyError(err)}`,
|
|
462
|
+
}],
|
|
463
|
+
details: { error: true },
|
|
464
|
+
};
|
|
465
|
+
}
|
|
466
|
+
},
|
|
467
|
+
}, { name: 'cloud_memory_search' });
|
|
468
|
+
// ==================================================================
|
|
469
|
+
// 4a. Hook: llm_input — cache the full system prompt per session
|
|
470
|
+
// ==================================================================
|
|
471
|
+
//
|
|
472
|
+
// `llm_input` fires *after* the LLM payload is assembled (read-only).
|
|
473
|
+
// It is the only hook where `event.systemPrompt` contains the FULL
|
|
474
|
+
// system prompt. We stash it keyed by sessionId so that the *next*
|
|
475
|
+
// turn's `before_prompt_build` can use it for precise section replacement.
|
|
476
|
+
api.on('llm_input', async (event, ctx) => {
|
|
477
|
+
const typedEvent = event;
|
|
478
|
+
const sessionId = typedEvent.sessionId ?? ctx.sessionId;
|
|
479
|
+
const sp = typedEvent.systemPrompt;
|
|
480
|
+
// api.logger.info( `----llm input--- start`);
|
|
481
|
+
// api.logger.info(sp);
|
|
482
|
+
// api.logger.info( `----llm input--- end`);
|
|
483
|
+
if (sessionId && sp) {
|
|
484
|
+
systemPromptCache.set(sessionId, sp);
|
|
485
|
+
api.logger.info(`[llm_input] cached systemPrompt for session=${sessionId} (${sp.length} chars)`);
|
|
486
|
+
// Save a local copy for debugging / inspection
|
|
487
|
+
try {
|
|
488
|
+
const fs = await import('node:fs/promises');
|
|
489
|
+
const nodePath = await import('node:path');
|
|
490
|
+
const os = await import('node:os');
|
|
491
|
+
const homeDir = os.homedir();
|
|
492
|
+
const stateDir = process.env.OPENCLAW_STATE_DIR?.trim()
|
|
493
|
+
|| nodePath.join(homeDir, '.openclaw');
|
|
494
|
+
const dumpDir = nodePath.join(stateDir, 'debug', 'system-prompts');
|
|
495
|
+
await fs.mkdir(dumpDir, { recursive: true });
|
|
496
|
+
const safeSessionId = sessionId.replace(/[^a-zA-Z0-9_-]/g, '_');
|
|
497
|
+
const filePath = nodePath.join(dumpDir, `${safeSessionId}.txt`);
|
|
498
|
+
await fs.writeFile(filePath, sp, 'utf-8');
|
|
499
|
+
api.logger.info(`[llm_input] saved systemPrompt to ${filePath}`);
|
|
500
|
+
}
|
|
501
|
+
catch (err) {
|
|
502
|
+
api.logger.warn(`[llm_input] failed to save systemPrompt locally: ${err}`);
|
|
503
|
+
}
|
|
504
|
+
}
|
|
505
|
+
}, { name: 'cache-system-prompt' });
|
|
506
|
+
// ==================================================================
|
|
507
|
+
// 4a-2. Hook: llm_output — log & save LLM response locally
|
|
508
|
+
// ==================================================================
|
|
509
|
+
api.on('llm_output', async (event, ctx) => {
|
|
510
|
+
const typedEvent = event;
|
|
511
|
+
const sessionId = typedEvent.sessionId ?? ctx.sessionId;
|
|
512
|
+
if (sessionId) {
|
|
513
|
+
try {
|
|
514
|
+
const fs = await import('node:fs/promises');
|
|
515
|
+
const nodePath = await import('node:path');
|
|
516
|
+
const os = await import('node:os');
|
|
517
|
+
const homeDir = os.homedir();
|
|
518
|
+
const stateDir = process.env.OPENCLAW_STATE_DIR?.trim()
|
|
519
|
+
|| nodePath.join(homeDir, '.openclaw');
|
|
520
|
+
const dumpDir = nodePath.join(stateDir, 'debug', 'llm-outputs');
|
|
521
|
+
await fs.mkdir(dumpDir, { recursive: true });
|
|
522
|
+
const safeSessionId = sessionId.replace(/[^a-zA-Z0-9_-]/g, '_');
|
|
523
|
+
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
|
|
524
|
+
const filePath = nodePath.join(dumpDir, `${safeSessionId}_${timestamp}.json`);
|
|
525
|
+
const payload = {
|
|
526
|
+
sessionId,
|
|
527
|
+
runId: typedEvent.runId,
|
|
528
|
+
provider: typedEvent.provider,
|
|
529
|
+
model: typedEvent.model,
|
|
530
|
+
assistantTexts: typedEvent.assistantTexts,
|
|
531
|
+
usage: typedEvent.usage,
|
|
532
|
+
savedAt: new Date().toISOString(),
|
|
533
|
+
};
|
|
534
|
+
await fs.writeFile(filePath, JSON.stringify(payload, null, 2), 'utf-8');
|
|
535
|
+
api.logger.info(`[llm_output] saved response to ${filePath}`);
|
|
536
|
+
}
|
|
537
|
+
catch (err) {
|
|
538
|
+
api.logger.warn(`[llm_output] failed to save response locally: ${err}`);
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
}, { name: 'log-llm-output' });
|
|
542
|
+
// ==================================================================
|
|
543
|
+
// 3b. Hook: before_prompt_build — layered system prompt manipulation
|
|
544
|
+
// ==================================================================
|
|
545
|
+
//
|
|
546
|
+
// Architecture (three-layer injection strategy):
|
|
547
|
+
//
|
|
548
|
+
// 1. Extract clean user prompt (strip inbound metadata)
|
|
549
|
+
// 2. Get full system prompt from cache (populated by llm_input)
|
|
550
|
+
// 3. Search cloud for: memory, images, documents
|
|
551
|
+
// 4. Inject via three distinct channels:
|
|
552
|
+
//
|
|
553
|
+
// ┌─ prependSystemContext ─────────────────────────────┐
|
|
554
|
+
// │ "能力声明": tells LLM it has cloud image/doc access │
|
|
555
|
+
// ├─ baseSystemPrompt ────────────────────────────────┤
|
|
556
|
+
// │ Original system prompt (memory layer replaced) │
|
|
557
|
+
// ├─ appendSystemContext ─────────────────────────────┤
|
|
558
|
+
// │ (memory block, Phase A only) │
|
|
559
|
+
// └──────────────────────────────────────────────────┘
|
|
560
|
+
//
|
|
561
|
+
// ┌─ prependContext ──────────────────────────────────┐
|
|
562
|
+
// │ <image-context> + <document-context> blocks │
|
|
563
|
+
// ├─ user message ───────────────────────────────────┤
|
|
564
|
+
// │ Actual user prompt │
|
|
565
|
+
// └──────────────────────────────────────────────────┘
|
|
566
|
+
//
|
|
567
|
+
// This ensures:
|
|
568
|
+
// - LLM KNOWS it has cloud resource capabilities (top of system prompt)
|
|
569
|
+
// - Retrieval results are RIGHT BEFORE the user question (max attention)
|
|
570
|
+
// - Memory stays in its natural position within the system prompt
|
|
571
|
+
//
|
|
572
|
+
// Two-phase strategy:
|
|
573
|
+
// Phase A (first turn — no cache yet):
|
|
574
|
+
// Memory → appendSystemContext, Image/Doc → prependContext
|
|
575
|
+
//
|
|
576
|
+
// Phase B (subsequent turns — cache populated by llm_input):
|
|
577
|
+
// Memory → layer replacement, Image/Doc → prependContext
|
|
578
|
+
const memoryRecallEnabled = cfg.memoryAutoRecall !== false;
|
|
579
|
+
const maxRecallResults = cfg.maxRecallResults ?? 1;
|
|
580
|
+
if (memoryRecallEnabled) {
|
|
581
|
+
api.on('before_prompt_build', async (event, ctx) => {
|
|
582
|
+
// ---- Step 1: Extract clean user prompt ----
|
|
583
|
+
const typedEvent = event;
|
|
584
|
+
const rawPrompt = typedEvent.prompt ?? '';
|
|
585
|
+
const prompt = stripInboundMetadataFromPrompt(rawPrompt);
|
|
586
|
+
api.logger.info(`[prompt-build] clean prompt (${prompt.length} chars): "${prompt.slice(0, 120)}"`);
|
|
587
|
+
// ---- Step 2: Get cached system prompt ----
|
|
588
|
+
const sessionId = ctx.sessionId;
|
|
589
|
+
const cachedSystemPrompt = sessionId
|
|
590
|
+
? systemPromptCache.get(sessionId)
|
|
591
|
+
: undefined;
|
|
592
|
+
const hasCachedPrompt = !!cachedSystemPrompt;
|
|
593
|
+
api.logger.info(`[prompt-build] session=${sessionId}, `
|
|
594
|
+
+ `hasCachedSystemPrompt=${hasCachedPrompt} `
|
|
595
|
+
+ `(${cachedSystemPrompt?.length ?? 0} chars)`);
|
|
596
|
+
// ---- Step 3: Parse system prompt into layers & log (tree view) ----
|
|
597
|
+
// if (hasCachedPrompt) {
|
|
598
|
+
// const layers = parseSystemPromptLayers(cachedSystemPrompt!);
|
|
599
|
+
// const totalChars = layers.reduce((sum, l) => sum + l.chars, 0);
|
|
600
|
+
// // Build a compact tree-view table for easy structure inspection
|
|
601
|
+
// const treeLines: string[] = [];
|
|
602
|
+
// treeLines.push('');
|
|
603
|
+
// treeLines.push(`┌─ System Prompt Structure (${layers.length} layers, ${totalChars} chars total)`);
|
|
604
|
+
// treeLines.push('│');
|
|
605
|
+
// for (let i = 0; i < layers.length; i++) {
|
|
606
|
+
// const layer = layers[i];
|
|
607
|
+
// const isLast = i === layers.length - 1;
|
|
608
|
+
// const branch = isLast ? '└──' : '├──';
|
|
609
|
+
// const indent = ' '.repeat(Math.max(0, layer.level - 1));
|
|
610
|
+
// const pct = totalChars > 0 ? ((layer.chars / totalChars) * 100).toFixed(1) : '0.0';
|
|
611
|
+
// const bar = '█'.repeat(Math.round(Number(pct) / 5)) || '▏';
|
|
612
|
+
// const preview = layer.content
|
|
613
|
+
// .replace(/\n/g, ' ')
|
|
614
|
+
// .replace(/\s+/g, ' ')
|
|
615
|
+
// .trim()
|
|
616
|
+
// .slice(0, 80);
|
|
617
|
+
// treeLines.push(
|
|
618
|
+
// `│ ${branch} ${indent}[${i}] ${layer.name}`
|
|
619
|
+
// + ` (${layer.chars} chars, ${pct}%) ${bar}`,
|
|
620
|
+
// );
|
|
621
|
+
// treeLines.push(
|
|
622
|
+
// `│ ${isLast ? ' ' : '│ '} ${indent} ↳ ${preview}…`,
|
|
623
|
+
// );
|
|
624
|
+
// }
|
|
625
|
+
// treeLines.push('│');
|
|
626
|
+
// treeLines.push('└─ Use findLayer(layers, "name") / replaceLayer(layers, "name", content) to modify');
|
|
627
|
+
// treeLines.push('');
|
|
628
|
+
// api.logger.info(`[prompt-build] ${treeLines.join('\n[prompt-build] ')}`);
|
|
629
|
+
// }
|
|
630
|
+
try {
|
|
631
|
+
const ops = await initOps(ctx.agentId);
|
|
632
|
+
// ---- Step 4: Prepare injection content ----
|
|
633
|
+
// 4a. Memory recall: search cloud for relevant memories
|
|
634
|
+
//
|
|
635
|
+
// When memoryRecallEnabled is true, we ALWAYS build a memoryBlock
|
|
636
|
+
// (even if 0 results) so the original "Memory Recall" layer gets
|
|
637
|
+
// replaced. This clears the placeholder content and saves tokens.
|
|
638
|
+
let memoryBlock = '';
|
|
639
|
+
if (memoryRecallEnabled) {
|
|
640
|
+
try {
|
|
641
|
+
const memoryResults = await ops.search(prompt, {
|
|
642
|
+
category: 'memory',
|
|
643
|
+
maxResults: maxRecallResults,
|
|
644
|
+
minScore,
|
|
645
|
+
});
|
|
646
|
+
api.logger.info(`[prompt-build:memory] query="${prompt.slice(0, 80)}" → ${memoryResults.length} results (minScore=${minScore})`);
|
|
647
|
+
if (memoryResults.length > 0) {
|
|
648
|
+
memoryBlock = formatCloudMemoryForPrompt(memoryResults);
|
|
649
|
+
api.logger.info(`[记忆构建] start`);
|
|
650
|
+
// api.logger.info(`[记忆构建] ${memoryBlock}`);
|
|
651
|
+
}
|
|
652
|
+
else {
|
|
653
|
+
// 0 results → still build a block so the original layer gets replaced
|
|
654
|
+
memoryBlock = [
|
|
655
|
+
'<cloud-memory>',
|
|
656
|
+
'No relevant memories found for this query.',
|
|
657
|
+
'</cloud-memory>',
|
|
658
|
+
].join('\n');
|
|
659
|
+
api.logger.info('[prompt-build:memory] 0 results → built empty cloud-memory block for layer replacement');
|
|
660
|
+
}
|
|
661
|
+
}
|
|
662
|
+
catch (err) {
|
|
663
|
+
api.logger.warn(`[prompt-build:memory] recall failed: ${stringifyError(err)}`);
|
|
664
|
+
// Even on error, build a block so the original layer gets replaced
|
|
665
|
+
memoryBlock = [
|
|
666
|
+
'<cloud-memory>',
|
|
667
|
+
'Memory recall temporarily unavailable.',
|
|
668
|
+
'</cloud-memory>',
|
|
669
|
+
].join('\n');
|
|
670
|
+
}
|
|
671
|
+
}
|
|
672
|
+
// Helper: normalise docId to a usable local path + shorten for display.
|
|
673
|
+
//
|
|
674
|
+
// Asset docIds returned by COS search are absolute paths with the
|
|
675
|
+
// leading `/` stripped (e.g. `Users/shawn/Downloads/foo.pdf`).
|
|
676
|
+
// We need to:
|
|
677
|
+
// 1. Restore the leading `/` so it becomes a valid absolute path.
|
|
678
|
+
// 2. Replace the home-dir prefix with `~/` for human readability.
|
|
679
|
+
//
|
|
680
|
+
// e.g. "Users/shawn/Downloads/foo.pdf"
|
|
681
|
+
// → absolutePath: "/Users/shawn/Downloads/foo.pdf"
|
|
682
|
+
// → display: "~/Downloads/foo.pdf"
|
|
683
|
+
const homeDir = process.env.HOME ?? process.env.USERPROFILE ?? '';
|
|
684
|
+
const homeDirNoSlash = homeDir.startsWith('/') ? homeDir.slice(1) : homeDir;
|
|
685
|
+
const normaliseAssetPath = (p) => {
|
|
686
|
+
// Restore leading `/` if the path looks like a stripped absolute path
|
|
687
|
+
// (e.g. "Users/..." on macOS, "home/..." on Linux).
|
|
688
|
+
let abs = p;
|
|
689
|
+
if (!p.startsWith('/') && homeDirNoSlash && p.startsWith(homeDirNoSlash)) {
|
|
690
|
+
abs = `/${p}`;
|
|
691
|
+
}
|
|
692
|
+
// Shorten home-dir prefix → ~/
|
|
693
|
+
let display = abs;
|
|
694
|
+
if (homeDir && abs.startsWith(homeDir)) {
|
|
695
|
+
display = `~${abs.slice(homeDir.length)}`;
|
|
696
|
+
}
|
|
697
|
+
return { absolute: abs, display };
|
|
698
|
+
};
|
|
699
|
+
// 4b. Image recall: search the image dataset for visually relevant content.
|
|
700
|
+
//
|
|
701
|
+
// When the image dataset exists, we query it with the user's prompt.
|
|
702
|
+
// If results are found, we build an `<image-context>` block containing
|
|
703
|
+
// local file paths that the LLM can reference in its response.
|
|
704
|
+
//
|
|
705
|
+
// Strategy: image/document blocks are injected via `prependContext`
|
|
706
|
+
// (prepended to the user message) so the LLM sees them right before
|
|
707
|
+
// the user's question — maximizing relevance and attention.
|
|
708
|
+
let imageBlock = '';
|
|
709
|
+
try {
|
|
710
|
+
const imageResults = await ops.search(prompt, {
|
|
711
|
+
category: 'image',
|
|
712
|
+
maxResults: maxRecallResults,
|
|
713
|
+
minScore,
|
|
714
|
+
});
|
|
715
|
+
api.logger.info(`[prompt-build:image] query="${prompt.slice(0, 80)}" → ${imageResults.length} results (minScore=${minScore})`);
|
|
716
|
+
if (imageResults.length > 0) {
|
|
717
|
+
const top = imageResults[0];
|
|
718
|
+
const { absolute: imgAbsPath, display: imgDisplay } = normaliseAssetPath(top.docId ?? 'image-1');
|
|
719
|
+
const imageLine = `- **${imgDisplay}** (相关度: ${(top.score * 100).toFixed(0)}%)\n 本地路径: \`${imgAbsPath}\``;
|
|
720
|
+
imageBlock = [
|
|
721
|
+
'<image-context>',
|
|
722
|
+
'【重要】以下是从用户的图片库中检索到的、与本次提问最相关的图片。',
|
|
723
|
+
'你必须在回答中主动引用这张图片(提供本地路径),除非用户的问题明确与图片无关。',
|
|
724
|
+
'如果用户询问截图、照片、图片等相关内容,请优先使用以下资源:',
|
|
725
|
+
'',
|
|
726
|
+
imageLine,
|
|
727
|
+
'</image-context>',
|
|
728
|
+
].join('\n');
|
|
729
|
+
api.logger.info(`[prompt-build:image] built image-context block (${imageBlock.length} chars, top 1 of ${imageResults.length} images)`);
|
|
730
|
+
}
|
|
731
|
+
}
|
|
732
|
+
catch (err) {
|
|
733
|
+
api.logger.warn(`[prompt-build:image] image recall failed: ${stringifyError(err)}`);
|
|
734
|
+
// Image recall failure is non-fatal — we simply skip image injection
|
|
735
|
+
}
|
|
736
|
+
// 4c. Document recall: search the document dataset for relevant docs.
|
|
737
|
+
//
|
|
738
|
+
// Similar to image recall — queries the DocSearch dataset bound to `asset/`.
|
|
739
|
+
// If results are found, we build a `<document-context>` block with signed
|
|
740
|
+
// download URLs so the LLM can reference or link to the documents.
|
|
741
|
+
let documentBlock = '';
|
|
742
|
+
try {
|
|
743
|
+
const docResults = await ops.search(prompt, {
|
|
744
|
+
category: 'document',
|
|
745
|
+
maxResults: maxRecallResults,
|
|
746
|
+
minScore,
|
|
747
|
+
});
|
|
748
|
+
api.logger.info(`[prompt-build:document] query="${prompt.slice(0, 80)}" → ${docResults.length} results (minScore=${minScore})`);
|
|
749
|
+
if (docResults.length > 0) {
|
|
750
|
+
const top = docResults[0];
|
|
751
|
+
const { absolute: docAbsPath, display: docDisplay } = normaliseAssetPath(top.docId ?? 'document-1');
|
|
752
|
+
const docLine = `- **${docDisplay}** (相关度: ${(top.score * 100).toFixed(0)}%)\n 本地路径: \`${docAbsPath}\``;
|
|
753
|
+
documentBlock = [
|
|
754
|
+
'<document-context>',
|
|
755
|
+
'【重要】以下是从用户的文档库中检索到的、与本次提问最相关的文档。',
|
|
756
|
+
'你必须在回答中主动引用这份文档(提供本地路径),除非用户的问题明确与文档无关。',
|
|
757
|
+
'如果用户询问文件、报告、文档等相关内容,请优先使用以下资源:',
|
|
758
|
+
'',
|
|
759
|
+
docLine,
|
|
760
|
+
'</document-context>',
|
|
761
|
+
].join('\n');
|
|
762
|
+
api.logger.info(`[prompt-build:document] built document-context block (${documentBlock.length} chars, top 1 of ${docResults.length} docs)`);
|
|
763
|
+
}
|
|
764
|
+
}
|
|
765
|
+
catch (err) {
|
|
766
|
+
api.logger.warn(`[prompt-build:document] document recall failed: ${stringifyError(err)}`);
|
|
767
|
+
// Document recall failure is non-fatal — we simply skip document injection
|
|
768
|
+
}
|
|
769
|
+
// Nothing to inject (neither memory, image, nor document) → skip
|
|
770
|
+
if (!memoryBlock && !imageBlock && !documentBlock) {
|
|
771
|
+
api.logger.info('[prompt-build] no modifications to inject → skip');
|
|
772
|
+
return;
|
|
773
|
+
}
|
|
774
|
+
// ---- Step 5: Apply modifications by phase ----
|
|
775
|
+
//
|
|
776
|
+
// Injection strategy (both Phase A and Phase B):
|
|
777
|
+
//
|
|
778
|
+
// 1. **Capability declaration** → `prependSystemContext`
|
|
779
|
+
// A short static block prepended BEFORE the base system prompt.
|
|
780
|
+
// Tells the LLM "you have access to user's cloud images/documents".
|
|
781
|
+
// This is the first thing the LLM sees, so it knows it has these
|
|
782
|
+
// capabilities when processing the rest of the prompt.
|
|
783
|
+
//
|
|
784
|
+
// 2. **Memory recall** → layer replacement (Phase B) or
|
|
785
|
+
// `appendSystemContext` (Phase A) — stays in the system prompt
|
|
786
|
+
// because memory is long-term context, not per-turn data.
|
|
787
|
+
//
|
|
788
|
+
// 3. **Retrieval results** (images + documents) → system prompt layer
|
|
789
|
+
// Injected as a dedicated layer in the system prompt (Phase B) or
|
|
790
|
+
// via `appendSystemContext` (Phase A). This keeps retrieval results
|
|
791
|
+
// out of the visible conversation history, avoiding UX pollution
|
|
792
|
+
// where users would see raw `<image-context>` / `<document-context>`
|
|
793
|
+
// blocks in their chat. The content is refreshed each turn.
|
|
794
|
+
// Build the static capability declaration (prepended to system prompt)
|
|
795
|
+
const capabilityParts = [];
|
|
796
|
+
if (imageBlock || documentBlock) {
|
|
797
|
+
capabilityParts.push('## 本地资源能力', '你可以访问用户的本地资源库,包括图片和文档。', '当检索到相关资源时,会在系统提示中以 <image-context> 或 <document-context> 标签提供。', '你必须优先使用这些已检索到的资源来回答用户问题。');
|
|
798
|
+
// macOS path hint: LLM should use ~/ instead of /Users/<username>/
|
|
799
|
+
if (process.platform === 'darwin') {
|
|
800
|
+
capabilityParts.push('注意:当前系统为 macOS,本地文件路径中的 `/Users/用户名/` 应当使用 `~/` 缩写表示。', '例如 `/Users/shawn/Documents/report.pdf` 应写为 `~/Documents/report.pdf`。');
|
|
801
|
+
}
|
|
802
|
+
}
|
|
803
|
+
// Build the retrieval system context (injected into system prompt, not user message)
|
|
804
|
+
const retrievalSystemParts = [];
|
|
805
|
+
if (imageBlock) {
|
|
806
|
+
retrievalSystemParts.push(imageBlock);
|
|
807
|
+
api.logger.info(`[prompt-build] image block → system prompt layer (${imageBlock.length} chars)`);
|
|
808
|
+
}
|
|
809
|
+
if (documentBlock) {
|
|
810
|
+
retrievalSystemParts.push(documentBlock);
|
|
811
|
+
api.logger.info(`[prompt-build] document block → system prompt layer (${documentBlock.length} chars)`);
|
|
812
|
+
}
|
|
813
|
+
// =============================================================
|
|
814
|
+
// Phase A: no cached system prompt (first turn of a session)
|
|
815
|
+
// =============================================================
|
|
816
|
+
if (!hasCachedPrompt) {
|
|
817
|
+
const result = {};
|
|
818
|
+
// Memory → appendSystemContext (stays in system prompt)
|
|
819
|
+
if (memoryBlock) {
|
|
820
|
+
result.appendSystemContext = `\n\n## Cloud Memory (recalled)\n${memoryBlock}`;
|
|
821
|
+
api.logger.info(`[prompt-build] Phase A: memory → appendSystemContext (${memoryBlock.length} chars)`);
|
|
822
|
+
}
|
|
823
|
+
// Capability + Retrieval → prependSystemContext (before system prompt)
|
|
824
|
+
// The capability declaration goes first, followed immediately by
|
|
825
|
+
// the actual retrieval results. This keeps them logically grouped
|
|
826
|
+
// at the TOP of the system prompt where LLM attention is highest.
|
|
827
|
+
const prependParts = [];
|
|
828
|
+
if (capabilityParts.length > 0) {
|
|
829
|
+
prependParts.push(capabilityParts.join('\n'));
|
|
830
|
+
api.logger.info(`[prompt-build] Phase A: capability declaration → prependSystemContext`);
|
|
831
|
+
}
|
|
832
|
+
if (retrievalSystemParts.length > 0) {
|
|
833
|
+
prependParts.push(retrievalSystemParts.join('\n\n'));
|
|
834
|
+
api.logger.info(`[prompt-build] Phase A: retrieval results → prependSystemContext (after capability)`);
|
|
835
|
+
}
|
|
836
|
+
if (prependParts.length > 0) {
|
|
837
|
+
result.prependSystemContext = prependParts.join('\n\n');
|
|
838
|
+
api.logger.info(`[prompt-build] Phase A: prependSystemContext total (${result.prependSystemContext.length} chars)`);
|
|
839
|
+
}
|
|
840
|
+
if (Object.keys(result).length > 0) {
|
|
841
|
+
return result;
|
|
842
|
+
}
|
|
843
|
+
api.logger.info('[prompt-build] Phase A (no cache): nothing to inject');
|
|
844
|
+
return;
|
|
845
|
+
}
|
|
846
|
+
// =============================================================
|
|
847
|
+
// Phase B: cached system prompt → layer-based manipulation
|
|
848
|
+
//
|
|
849
|
+
// Memory → replace layer in system prompt (long-term context)
|
|
850
|
+
// Capability + Image/Document → prependSystemContext (top of prompt)
|
|
851
|
+
// =============================================================
|
|
852
|
+
let layers = parseSystemPromptLayers(cachedSystemPrompt);
|
|
853
|
+
api.logger.info(`[prompt-build] Phase B: layer-based manipulation (${layers.length} layers)`);
|
|
854
|
+
// 5a. Memory injection → replace the existing "Memory Recall" layer in-place
|
|
855
|
+
if (memoryBlock) {
|
|
856
|
+
const memRecallLayer = findLayer(layers, 'Memory Recall');
|
|
857
|
+
if (memRecallLayer) {
|
|
858
|
+
const cloudMemoryContent = [
|
|
859
|
+
'## Memory Recall (cloud-powered)',
|
|
860
|
+
memoryBlock,
|
|
861
|
+
].join('\n');
|
|
862
|
+
api.logger.info(`[prompt-build] found existing Memory Recall layer: "${memRecallLayer.name}" (${memRecallLayer.chars} chars) → replacing in-place`);
|
|
863
|
+
layers = replaceLayer(layers, 'Memory Recall', cloudMemoryContent, 'Memory Recall (cloud-powered)');
|
|
864
|
+
}
|
|
865
|
+
else {
|
|
866
|
+
// Fallback: no Memory Recall layer found → append as new layer at end
|
|
867
|
+
api.logger.info('[prompt-build] no existing Memory Recall layer → appending as new layer');
|
|
868
|
+
layers = [
|
|
869
|
+
...layers,
|
|
870
|
+
{
|
|
871
|
+
name: 'Cloud Memory (recalled)',
|
|
872
|
+
level: 2,
|
|
873
|
+
content: `## Cloud Memory (recalled)\n${memoryBlock}`,
|
|
874
|
+
chars: memoryBlock.length + 28,
|
|
875
|
+
},
|
|
876
|
+
];
|
|
877
|
+
}
|
|
878
|
+
}
|
|
879
|
+
// 5b. Remove stale Image/Document layers from previous turns
|
|
880
|
+
// (retrieval is now injected via prependSystemContext, not as layers)
|
|
881
|
+
layers = layers.filter((l) => !l.name.toLowerCase().includes('image context')
|
|
882
|
+
&& !l.name.toLowerCase().includes('document context')
|
|
883
|
+
&& !l.name.toLowerCase().includes('retrieved context'));
|
|
884
|
+
// ---- Step 6: Reassemble system prompt (memory + retrieval) ----
|
|
885
|
+
const modifiedSystemPrompt = assembleSystemPromptFromLayers(layers);
|
|
886
|
+
api.logger.info(`[prompt-build] ===== Final Layers (${layers.length} total) =====`);
|
|
887
|
+
api.logger.info(`[prompt-build] ===== systemPrompt override `
|
|
888
|
+
+ `(${modifiedSystemPrompt.length} chars, original=${cachedSystemPrompt.length} chars) =====`);
|
|
889
|
+
// ---- Step 7: Build the combined return ----
|
|
890
|
+
const result = {
|
|
891
|
+
systemPrompt: modifiedSystemPrompt,
|
|
892
|
+
};
|
|
893
|
+
// Capability + Retrieval → prependSystemContext (before system prompt)
|
|
894
|
+
const prependParts = [];
|
|
895
|
+
if (capabilityParts.length > 0) {
|
|
896
|
+
prependParts.push(capabilityParts.join('\n'));
|
|
897
|
+
api.logger.info(`[prompt-build] Phase B: capability declaration → prependSystemContext`);
|
|
898
|
+
}
|
|
899
|
+
if (retrievalSystemParts.length > 0) {
|
|
900
|
+
prependParts.push(retrievalSystemParts.join('\n\n'));
|
|
901
|
+
api.logger.info(`[prompt-build] Phase B: retrieval results → prependSystemContext (after capability)`);
|
|
902
|
+
}
|
|
903
|
+
if (prependParts.length > 0) {
|
|
904
|
+
result.prependSystemContext = prependParts.join('\n\n');
|
|
905
|
+
api.logger.info(`[prompt-build] Phase B: prependSystemContext total (${result.prependSystemContext.length} chars)`);
|
|
906
|
+
}
|
|
907
|
+
return result;
|
|
908
|
+
}
|
|
909
|
+
catch (err) {
|
|
910
|
+
api.logger.warn(`[prompt-build] hook failed: ${stringifyError(err)}`);
|
|
911
|
+
}
|
|
912
|
+
});
|
|
913
|
+
}
|
|
914
|
+
// ==================================================================
|
|
915
|
+
// 4. Hook: after_tool_call — real-time memory file sync to cloud
|
|
916
|
+
// ==================================================================
|
|
917
|
+
//
|
|
918
|
+
// When the AI writes to MEMORY.md (long-term) or memory/*.md (daily log /
|
|
919
|
+
// short-term), we immediately sync that single file to the cloud vector
|
|
920
|
+
// store. This ensures cloud memory stays in sync without waiting for
|
|
921
|
+
// the periodic afterTurn interval (every 5 turns).
|
|
922
|
+
//
|
|
923
|
+
// Monitored tools: write_to_file, replace_in_file, edit_file, create_file,
|
|
924
|
+
// and any tool whose params include a path that resolves to a memory file.
|
|
925
|
+
if (cfg.localMemorySync !== false) {
|
|
926
|
+
/** Tool names that can write files (covers openclaw's built-in tools). */
|
|
927
|
+
const FILE_WRITE_TOOLS = new Set([
|
|
928
|
+
'write_to_file',
|
|
929
|
+
'replace_in_file',
|
|
930
|
+
'edit_file',
|
|
931
|
+
'create_file',
|
|
932
|
+
'write_file',
|
|
933
|
+
'append_to_file',
|
|
934
|
+
]);
|
|
935
|
+
/**
|
|
936
|
+
* Extract file path(s) from tool call params.
|
|
937
|
+
* Different tools use different param names for the target file.
|
|
938
|
+
*/
|
|
939
|
+
const extractFilePaths = (params) => {
|
|
940
|
+
const paths = [];
|
|
941
|
+
for (const key of ['path', 'filePath', 'file_path', 'target_file', 'file']) {
|
|
942
|
+
const val = params[key];
|
|
943
|
+
if (typeof val === 'string' && val.trim().length > 0) {
|
|
944
|
+
paths.push(val.trim());
|
|
945
|
+
}
|
|
946
|
+
}
|
|
947
|
+
return paths;
|
|
948
|
+
};
|
|
949
|
+
api.on('after_tool_call', async (event, ctx) => {
|
|
950
|
+
const typedEvent = event;
|
|
951
|
+
// Only process successful file-write tool calls
|
|
952
|
+
if (typedEvent.error) {
|
|
953
|
+
return;
|
|
954
|
+
}
|
|
955
|
+
const toolName = typedEvent.toolName ?? '';
|
|
956
|
+
if (!FILE_WRITE_TOOLS.has(toolName)) {
|
|
957
|
+
return;
|
|
958
|
+
}
|
|
959
|
+
const params = typedEvent.params ?? {};
|
|
960
|
+
const filePaths = extractFilePaths(params);
|
|
961
|
+
// Check if any written file is a memory file
|
|
962
|
+
const memoryPaths = filePaths.filter((p) => isMemoryFilePath(p));
|
|
963
|
+
if (memoryPaths.length === 0) {
|
|
964
|
+
return;
|
|
965
|
+
}
|
|
966
|
+
// Fire-and-forget: sync the memory file(s) to cloud in background
|
|
967
|
+
void (async () => {
|
|
968
|
+
try {
|
|
969
|
+
const ops = await initOps(ctx.agentId);
|
|
970
|
+
for (const memPath of memoryPaths) {
|
|
971
|
+
await syncSingleMemoryFileToCloud(ops, memPath, api.logger, syncFileExts);
|
|
972
|
+
}
|
|
973
|
+
}
|
|
974
|
+
catch (err) {
|
|
975
|
+
api.logger.warn(`cloud-engine: after_tool_call memory sync failed: ${stringifyError(err)}`);
|
|
976
|
+
}
|
|
977
|
+
})();
|
|
978
|
+
}, { name: 'memory-file-sync-on-write' });
|
|
979
|
+
api.logger.info('metainsight-context-engine: registered after_tool_call hook for real-time memory sync');
|
|
980
|
+
}
|
|
981
|
+
// ==================================================================
|
|
982
|
+
// Registration complete — log summary
|
|
983
|
+
// ==================================================================
|
|
984
|
+
const features = [
|
|
985
|
+
`memoryAutoRecall=${memoryRecallEnabled}`,
|
|
986
|
+
`localMemorySync=${cfg.localMemorySync !== false}`,
|
|
987
|
+
`memorySyncOnWrite=${cfg.localMemorySync !== false}`,
|
|
988
|
+
].join(', ');
|
|
989
|
+
api.logger.info(`metainsight-context-engine: registered (${features})`);
|
|
990
|
+
},
|
|
991
|
+
};
|
|
992
|
+
export default plugin;
|
|
993
|
+
//# sourceMappingURL=index.js.map
|