metainsight-context-engine 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/BOOTSTRAP.md +341 -0
- package/README.md +230 -0
- package/dist/cos-bootstrap.d.ts +221 -0
- package/dist/cos-bootstrap.d.ts.map +1 -0
- package/dist/cos-bootstrap.js +598 -0
- package/dist/cos-bootstrap.js.map +1 -0
- package/dist/cos-operations.d.ts +219 -0
- package/dist/cos-operations.d.ts.map +1 -0
- package/dist/cos-operations.js +583 -0
- package/dist/cos-operations.js.map +1 -0
- package/dist/engine.d.ts +101 -0
- package/dist/engine.d.ts.map +1 -0
- package/dist/engine.js +157 -0
- package/dist/engine.js.map +1 -0
- package/dist/index.d.ts +42 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +993 -0
- package/dist/index.js.map +1 -0
- package/dist/local-memory-sync.d.ts +204 -0
- package/dist/local-memory-sync.d.ts.map +1 -0
- package/dist/local-memory-sync.js +1126 -0
- package/dist/local-memory-sync.js.map +1 -0
- package/openclaw.plugin.json +225 -0
- package/package.json +78 -0
|
@@ -0,0 +1,1126 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Local Memory Sync — Upload MEMORY.md, daily logs, images, and documents to cloud
|
|
3
|
+
*
|
|
4
|
+
* This module provides functionality to sync local memory files and assets
|
|
5
|
+
* (images + documents) to the cloud vector store.
|
|
6
|
+
*
|
|
7
|
+
* Local file locations (workspace root = ~/.openclaw/workspace/):
|
|
8
|
+
* - Long-term memory : ~/.openclaw/workspace/MEMORY.md
|
|
9
|
+
* - Short-term memory: ~/.openclaw/workspace/memory/YYYY-MM-DD.md
|
|
10
|
+
*
|
|
11
|
+
* COS storage layout — Multi-agent (2 top-level directories):
|
|
12
|
+
*
|
|
13
|
+
* openclaw-{agentId}/
|
|
14
|
+
* ├── workspace/ ← MEMORY.md + memory/*.md (indexed by CI)
|
|
15
|
+
* │ ├── MEMORY.md ← long-term memory (MEMORY.md)
|
|
16
|
+
* │ └── memory/ ← daily logs (YYYY-MM-DD.md)
|
|
17
|
+
* └── asset/ ← images + documents preserving directory structure
|
|
18
|
+
* ├── screenshot.png
|
|
19
|
+
* ├── report.pdf
|
|
20
|
+
* ├── images/
|
|
21
|
+
* │ └── photo.jpg
|
|
22
|
+
* └── .codebuddy/
|
|
23
|
+
* └── diagrams/
|
|
24
|
+
* └── arch.png
|
|
25
|
+
*
|
|
26
|
+
* COS storage layout — Legacy (cosPrefix = "memory/"):
|
|
27
|
+
* - Long-term memory : memory/memory.md
|
|
28
|
+
* - Short-term memory: memory/memory/YYYY-MM-DD.md
|
|
29
|
+
* - Assets : asset/{relPath}
|
|
30
|
+
*
|
|
31
|
+
* Asset extraction:
|
|
32
|
+
* When syncing memory files, asset links (Markdown `` for images
|
|
33
|
+
* and `[](path)` for documents) are detected. If the referenced file exists
|
|
34
|
+
* on the local filesystem and has a supported extension, it is uploaded to
|
|
35
|
+
* the COS `asset/` directory. Image files get `category: image` metadata,
|
|
36
|
+
* document files get `category: document` metadata.
|
|
37
|
+
*
|
|
38
|
+
* Supported extensions are configurable via `syncFileExtensions` config.
|
|
39
|
+
* Default: common image extensions (.png, .jpg, etc.) + document extensions
|
|
40
|
+
* (.pdf, .doc, .docx, .xls, .xlsx, .ppt, .pptx, .txt, .csv, .md, .rtf).
|
|
41
|
+
*
|
|
42
|
+
* Key design decisions:
|
|
43
|
+
* - Each file is uploaded with a **deterministic docId** derived from its
|
|
44
|
+
* relative path. This means re-uploading the same file overwrites the
|
|
45
|
+
* previous version instead of creating duplicates.
|
|
46
|
+
* - Content is hashed (SHA-256) and compared against a local cache to skip
|
|
47
|
+
* unchanged files (avoiding unnecessary COS PUT calls).
|
|
48
|
+
* - Asset uploads use a separate hash cache to avoid re-uploading unchanged files.
|
|
49
|
+
* - The sync runs in the background (non-blocking) and logs progress.
|
|
50
|
+
* - All operations are gated behind the `localMemorySync` config flag.
|
|
51
|
+
*
|
|
52
|
+
* Architecture:
|
|
53
|
+
* local-memory-sync.ts ← engine.ts (afterTurn / bootstrap)
|
|
54
|
+
* ↓
|
|
55
|
+
* cos-operations.ts (upload)
|
|
56
|
+
*/
|
|
57
|
+
import crypto from 'node:crypto';
|
|
58
|
+
import fs from 'node:fs/promises';
|
|
59
|
+
import fsSync from 'node:fs';
|
|
60
|
+
import path from 'node:path';
|
|
61
|
+
/** In-memory mirror of the on-disk cache — loaded lazily on first access. */
|
|
62
|
+
let cacheData = null;
|
|
63
|
+
/** Resolved absolute path to the cache file (set by `initHashCache`). */
|
|
64
|
+
let cacheFilePath = null;
|
|
65
|
+
/** Whether the in-memory cache is dirty and needs flushing. */
|
|
66
|
+
let cacheDirty = false;
|
|
67
|
+
/**
|
|
68
|
+
* Resolve the cache file path from the runtime environment.
|
|
69
|
+
* Follows the same stateDir convention as the rest of the plugin.
|
|
70
|
+
*/
|
|
71
|
+
function resolveCacheFilePath() {
|
|
72
|
+
if (cacheFilePath) {
|
|
73
|
+
return cacheFilePath;
|
|
74
|
+
}
|
|
75
|
+
const homeDir = process.env.HOME ?? process.env.USERPROFILE ?? '';
|
|
76
|
+
const stateDir = process.env.OPENCLAW_STATE_DIR?.trim()
|
|
77
|
+
|| (homeDir ? `${homeDir}/.openclaw` : '');
|
|
78
|
+
cacheFilePath = stateDir
|
|
79
|
+
? path.join(stateDir, '.sync-hash-cache.json')
|
|
80
|
+
: path.join(process.cwd(), '.sync-hash-cache.json');
|
|
81
|
+
return cacheFilePath;
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Load the persistent hash cache from disk (no-op if already loaded).
|
|
85
|
+
* If the file does not exist or is corrupted, starts with an empty cache.
|
|
86
|
+
*/
|
|
87
|
+
function loadHashCache() {
|
|
88
|
+
if (cacheData) {
|
|
89
|
+
return cacheData;
|
|
90
|
+
}
|
|
91
|
+
const filePath = resolveCacheFilePath();
|
|
92
|
+
try {
|
|
93
|
+
const raw = fsSync.readFileSync(filePath, 'utf-8');
|
|
94
|
+
const parsed = JSON.parse(raw);
|
|
95
|
+
cacheData = {
|
|
96
|
+
memory: (parsed.memory && typeof parsed.memory === 'object') ? parsed.memory : {},
|
|
97
|
+
asset: (parsed.asset && typeof parsed.asset === 'object') ? parsed.asset : {},
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
catch {
|
|
101
|
+
// File missing or corrupted — start fresh
|
|
102
|
+
cacheData = { memory: {}, asset: {} };
|
|
103
|
+
}
|
|
104
|
+
return cacheData;
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* Flush the in-memory hash cache to disk. Only writes if dirty.
|
|
108
|
+
* Called after a sync pass completes (not after every individual upload).
|
|
109
|
+
*/
|
|
110
|
+
export async function flushHashCache() {
|
|
111
|
+
if (!cacheDirty || !cacheData) {
|
|
112
|
+
return;
|
|
113
|
+
}
|
|
114
|
+
const filePath = resolveCacheFilePath();
|
|
115
|
+
try {
|
|
116
|
+
await fs.mkdir(path.dirname(filePath), { recursive: true });
|
|
117
|
+
await fs.writeFile(filePath, JSON.stringify(cacheData, null, 2), 'utf-8');
|
|
118
|
+
cacheDirty = false;
|
|
119
|
+
}
|
|
120
|
+
catch {
|
|
121
|
+
// Best-effort — cache will still work in-memory for this session
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
// --- convenience accessors wrapping the persistent cache ---
|
|
125
|
+
const hashCache = {
|
|
126
|
+
get(key) {
|
|
127
|
+
return loadHashCache().memory[key];
|
|
128
|
+
},
|
|
129
|
+
set(key, value) {
|
|
130
|
+
loadHashCache().memory[key] = value;
|
|
131
|
+
cacheDirty = true;
|
|
132
|
+
},
|
|
133
|
+
clear() {
|
|
134
|
+
loadHashCache().memory = {};
|
|
135
|
+
cacheDirty = true;
|
|
136
|
+
},
|
|
137
|
+
};
|
|
138
|
+
function computeHash(content) {
|
|
139
|
+
return crypto.createHash('sha256').update(content).digest('hex').slice(0, 16);
|
|
140
|
+
}
|
|
141
|
+
function hasChanged(filePath, content) {
|
|
142
|
+
const newHash = computeHash(content);
|
|
143
|
+
const oldHash = hashCache.get(filePath);
|
|
144
|
+
if (oldHash === newHash) {
|
|
145
|
+
return false;
|
|
146
|
+
}
|
|
147
|
+
hashCache.set(filePath, newHash);
|
|
148
|
+
return true;
|
|
149
|
+
}
|
|
150
|
+
/**
|
|
151
|
+
* Clear both in-memory and on-disk hash caches (useful for testing
|
|
152
|
+
* or forced re-sync). Pass `persistOnly = true` to only wipe the
|
|
153
|
+
* disk file without clearing the in-memory state.
|
|
154
|
+
*/
|
|
155
|
+
export async function clearSyncHashCache() {
|
|
156
|
+
cacheData = { memory: {}, asset: {} };
|
|
157
|
+
cacheDirty = true;
|
|
158
|
+
await flushHashCache();
|
|
159
|
+
}
|
|
160
|
+
// ============================================================================
|
|
161
|
+
// File discovery
|
|
162
|
+
// ============================================================================
|
|
163
|
+
/**
|
|
164
|
+
* Resolve the workspace directory from a session file path.
|
|
165
|
+
*
|
|
166
|
+
* The workspace directory is the root where memory files (MEMORY.md,
|
|
167
|
+
* memory/*.md) and workspace identity files (AGENTS.md, SOUL.md, etc.) live.
|
|
168
|
+
*
|
|
169
|
+
* Resolution order:
|
|
170
|
+
* 1. Well-known path: `~/.openclaw/workspace/` (gateway default workspace)
|
|
171
|
+
* 2. Walk up from `sessionFile` to find a directory with workspace markers
|
|
172
|
+
* 3. Fallback: CWD
|
|
173
|
+
*/
|
|
174
|
+
function resolveWorkspaceDir(sessionFile) {
|
|
175
|
+
// ---- Priority 1: Check the well-known gateway workspace path ----
|
|
176
|
+
// The openclaw gateway always uses `~/.openclaw/workspace/` as its workspace.
|
|
177
|
+
// This is the most common case and avoids issues with sessionFile path resolution.
|
|
178
|
+
const homeDir = process.env.HOME ?? process.env.USERPROFILE ?? '';
|
|
179
|
+
if (homeDir) {
|
|
180
|
+
const wellKnownWorkspace = path.join(homeDir, '.openclaw', 'workspace');
|
|
181
|
+
if (fsSync.existsSync(wellKnownWorkspace)) {
|
|
182
|
+
// Verify it looks like a real workspace (has at least one workspace marker)
|
|
183
|
+
const hasMarker = fsSync.existsSync(path.join(wellKnownWorkspace, 'MEMORY.md'))
|
|
184
|
+
|| fsSync.existsSync(path.join(wellKnownWorkspace, 'memory'))
|
|
185
|
+
|| fsSync.existsSync(path.join(wellKnownWorkspace, 'AGENTS.md'))
|
|
186
|
+
|| fsSync.existsSync(path.join(wellKnownWorkspace, 'SOUL.md'))
|
|
187
|
+
|| fsSync.existsSync(path.join(wellKnownWorkspace, '.git'));
|
|
188
|
+
if (hasMarker) {
|
|
189
|
+
return wellKnownWorkspace;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
// ---- Priority 2: Walk up from sessionFile to find workspace ----
|
|
194
|
+
// Session files are under .openclaw/sessions/ in the workspace or state dir.
|
|
195
|
+
let dir = path.dirname(sessionFile);
|
|
196
|
+
for (let i = 0; i < 10; i += 1) {
|
|
197
|
+
// Check common workspace markers
|
|
198
|
+
if (fsSync.existsSync(path.join(dir, 'MEMORY.md'))
|
|
199
|
+
|| fsSync.existsSync(path.join(dir, 'memory'))
|
|
200
|
+
|| fsSync.existsSync(path.join(dir, 'AGENTS.md'))
|
|
201
|
+
|| fsSync.existsSync(path.join(dir, '.codebuddy'))
|
|
202
|
+
|| fsSync.existsSync(path.join(dir, 'package.json'))
|
|
203
|
+
|| fsSync.existsSync(path.join(dir, '.git'))) {
|
|
204
|
+
return dir;
|
|
205
|
+
}
|
|
206
|
+
const parent = path.dirname(dir);
|
|
207
|
+
if (parent === dir) {
|
|
208
|
+
break;
|
|
209
|
+
}
|
|
210
|
+
dir = parent;
|
|
211
|
+
}
|
|
212
|
+
// ---- Priority 3: Fallback to CWD ----
|
|
213
|
+
return process.cwd();
|
|
214
|
+
}
|
|
215
|
+
/**
|
|
216
|
+
* Workspace files that are part of the AI's identity and behavioral system.
|
|
217
|
+
* These files live at the root of the workspace directory.
|
|
218
|
+
*
|
|
219
|
+
* @deprecated These files are no longer synced to the cloud.
|
|
220
|
+
* Kept for reference only.
|
|
221
|
+
*/
|
|
222
|
+
const WORKSPACE_SYNC_FILES = [
|
|
223
|
+
'AGENTS.md',
|
|
224
|
+
'SOUL.md',
|
|
225
|
+
'TOOLS.md',
|
|
226
|
+
'IDENTITY.md',
|
|
227
|
+
'BOOTSTRAP.md',
|
|
228
|
+
'HEARTBEAT.md',
|
|
229
|
+
'USER.md',
|
|
230
|
+
];
|
|
231
|
+
/**
|
|
232
|
+
* Discover all local memory files in a workspace.
|
|
233
|
+
*
|
|
234
|
+
* Scans for:
|
|
235
|
+
* 1. MEMORY.md / memory.md (long-term memory)
|
|
236
|
+
* 2. memory/*.md (daily logs / short-term memory)
|
|
237
|
+
* 3. .codebuddy/MEMORY.md (IDE-level long-term memory)
|
|
238
|
+
* 4. .codebuddy/memory/*.md (IDE-level daily logs)
|
|
239
|
+
*
|
|
240
|
+
* NOTE: Workspace identity files (AGENTS.md, SOUL.md, etc.) are no longer
|
|
241
|
+
* discovered or synced to the cloud.
|
|
242
|
+
*/
|
|
243
|
+
async function discoverMemoryFiles(workspaceDir, config) {
|
|
244
|
+
const files = [];
|
|
245
|
+
if (config.syncLongTermMemory) {
|
|
246
|
+
// Root-level MEMORY.md (or memory.md — pick the first that exists).
|
|
247
|
+
//
|
|
248
|
+
// IMPORTANT: On case-insensitive filesystems (macOS APFS default),
|
|
249
|
+
// `existsSync('memory.md')` returns true even when the actual file
|
|
250
|
+
// on disk is named `MEMORY.md`. To avoid uploading the same file
|
|
251
|
+
// twice under two different COS keys (MEMORY.md vs memory.md),
|
|
252
|
+
// we stop after finding the first match.
|
|
253
|
+
for (const name of ['MEMORY.md', 'memory.md']) {
|
|
254
|
+
const absPath = path.join(workspaceDir, name);
|
|
255
|
+
if (fsSync.existsSync(absPath)) {
|
|
256
|
+
files.push({ absPath, relPath: name, category: 'long-term' });
|
|
257
|
+
break; // Only take the first match to avoid duplicates on case-insensitive FS
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
// .codebuddy/MEMORY.md
|
|
261
|
+
const cbMemory = path.join(workspaceDir, '.codebuddy', 'MEMORY.md');
|
|
262
|
+
if (fsSync.existsSync(cbMemory)) {
|
|
263
|
+
files.push({ absPath: cbMemory, relPath: '.codebuddy/MEMORY.md', category: 'long-term' });
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
if (config.syncDailyLogs) {
|
|
267
|
+
// memory/*.md directory
|
|
268
|
+
const memoryDir = path.join(workspaceDir, 'memory');
|
|
269
|
+
await scanMarkdownDir(memoryDir, workspaceDir, 'daily-log', files);
|
|
270
|
+
// .codebuddy/memory/*.md directory
|
|
271
|
+
const cbMemoryDir = path.join(workspaceDir, '.codebuddy', 'memory');
|
|
272
|
+
await scanMarkdownDir(cbMemoryDir, workspaceDir, 'daily-log', files);
|
|
273
|
+
}
|
|
274
|
+
return files;
|
|
275
|
+
}
|
|
276
|
+
async function scanMarkdownDir(dir, workspaceDir, category, files) {
|
|
277
|
+
if (!fsSync.existsSync(dir)) {
|
|
278
|
+
return;
|
|
279
|
+
}
|
|
280
|
+
try {
|
|
281
|
+
const entries = await fs.readdir(dir, { withFileTypes: true });
|
|
282
|
+
for (const entry of entries) {
|
|
283
|
+
if (entry.isFile() && entry.name.endsWith('.md')) {
|
|
284
|
+
const absPath = path.join(dir, entry.name);
|
|
285
|
+
const relPath = path.relative(workspaceDir, absPath);
|
|
286
|
+
files.push({ absPath, relPath, category });
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
catch {
|
|
291
|
+
// Directory may not be accessible
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
// ============================================================================
|
|
295
|
+
// Core sync logic
|
|
296
|
+
// ============================================================================
|
|
297
|
+
/**
|
|
298
|
+
* Human-readable label for sync file categories.
|
|
299
|
+
*/
|
|
300
|
+
function categoryLabel(category) {
|
|
301
|
+
switch (category) {
|
|
302
|
+
case 'long-term': {
|
|
303
|
+
return 'Long-term Memory (MEMORY.md)';
|
|
304
|
+
}
|
|
305
|
+
case 'daily-log': {
|
|
306
|
+
return 'Daily Log (Short-term Memory)';
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
/**
|
|
311
|
+
* Generate a deterministic docId from a file path.
|
|
312
|
+
*
|
|
313
|
+
* Uses the relative path to create a stable ID that allows overwriting
|
|
314
|
+
* the same file on re-upload (no duplicates).
|
|
315
|
+
*
|
|
316
|
+
* IMPORTANT: The returned docId does **not** include a `.md` extension
|
|
317
|
+
* because `cos-operations.ts upload()` always appends `.md` to the
|
|
318
|
+
* final COS key. Including `.md` here would produce double extensions
|
|
319
|
+
* like `memory.md.md`.
|
|
320
|
+
*
|
|
321
|
+
* For daily-log (short-term memory) files under
|
|
322
|
+
* `~/.openclaw/workspace/memory/`, a `memory/` directory prefix
|
|
323
|
+
* is prepended so they end up in `{cosPrefix}memory/` on COS:
|
|
324
|
+
* relPath "memory/2026-03-15.md" → docId "memory/2026-03-15"
|
|
325
|
+
* → COS key: "memory/memory/2026-03-15.md"
|
|
326
|
+
*
|
|
327
|
+
* For long-term memory (`~/.openclaw/workspace/MEMORY.md`)
|
|
328
|
+
* and other files, a flat docId is used (no slashes):
|
|
329
|
+
* relPath "MEMORY.md" → docId "MEMORY"
|
|
330
|
+
* relPath "AGENTS.md" → docId "AGENTS"
|
|
331
|
+
* → COS key: "{cosPrefix}MEMORY.md", "{agentPrefix}AGENTS.md"
|
|
332
|
+
*/
|
|
333
|
+
function filePathToDocId(relPath, category) {
|
|
334
|
+
// Strip the .md extension first — cos-operations.ts upload() always
|
|
335
|
+
// appends ".md" to the final COS key, so keeping it here would produce
|
|
336
|
+
// double extensions like "memory.md.md".
|
|
337
|
+
const withoutExt = relPath.replace(/\.md$/i, '');
|
|
338
|
+
const sanitized = withoutExt
|
|
339
|
+
.replace(/[^a-zA-Z0-9._/-]/g, '-')
|
|
340
|
+
.replace(/-+/g, '-')
|
|
341
|
+
.replace(/^-|-$/g, '');
|
|
342
|
+
// For long-term memory, normalize to uppercase "MEMORY" to prevent
|
|
343
|
+
// case-insensitive filesystem ghosts (macOS APFS) from creating
|
|
344
|
+
// duplicate COS keys (MEMORY.md vs memory.md).
|
|
345
|
+
if (category === 'long-term') {
|
|
346
|
+
const flat = sanitized.replace(/\//g, '-');
|
|
347
|
+
// "memory" → "MEMORY", ".codebuddy-MEMORY" stays unchanged
|
|
348
|
+
if (flat.toLowerCase() === 'memory') {
|
|
349
|
+
return 'MEMORY';
|
|
350
|
+
}
|
|
351
|
+
// .codebuddy/MEMORY → ".codebuddy-MEMORY" (already correct)
|
|
352
|
+
return flat;
|
|
353
|
+
}
|
|
354
|
+
// For daily-log files, preserve the "memory/" directory structure
|
|
355
|
+
// so they are stored under {cosPrefix}memory/ on COS.
|
|
356
|
+
// e.g. relPath "memory/2026-03-15.md"
|
|
357
|
+
// → withoutExt "memory/2026-03-15"
|
|
358
|
+
// → basename "2026-03-15"
|
|
359
|
+
// → docId "memory/2026-03-15"
|
|
360
|
+
// → COS key: "memory/memory/2026-03-15.md"
|
|
361
|
+
if (category === 'daily-log') {
|
|
362
|
+
const basename = sanitized.split('/').pop() ?? sanitized;
|
|
363
|
+
return `memory/${basename}`;
|
|
364
|
+
}
|
|
365
|
+
// For other categories, use a flat sanitized ID (no slashes)
|
|
366
|
+
return sanitized.replace(/\//g, '-');
|
|
367
|
+
}
|
|
368
|
+
/**
|
|
369
|
+
* Sync a single local memory file to the cloud by its absolute path.
|
|
370
|
+
*
|
|
371
|
+
* This is a targeted entry point called by the `after_tool_call` hook
|
|
372
|
+
* when the AI writes to a memory file (MEMORY.md or memory/*.md).
|
|
373
|
+
* It re-uses the same deterministic docId and hash-change detection
|
|
374
|
+
* as the full sync, so re-uploading an unchanged file is a no-op.
|
|
375
|
+
*
|
|
376
|
+
* @param ops CosOperations instance.
|
|
377
|
+
* @param absPath Absolute path to the memory file.
|
|
378
|
+
* @param logger Logger instance.
|
|
379
|
+
* @param allowedExtensions Optional set of allowed file extensions for asset sync.
|
|
380
|
+
* @returns true if the file was uploaded, false if skipped/failed.
|
|
381
|
+
*/
|
|
382
|
+
export async function syncSingleMemoryFileToCloud(ops, absPath, logger, allowedExtensions) {
|
|
383
|
+
try {
|
|
384
|
+
const content = await fs.readFile(absPath, 'utf-8');
|
|
385
|
+
if (content.trim().length === 0) {
|
|
386
|
+
logger.info(`local-memory-sync: single-file skip (empty) — ${absPath}`);
|
|
387
|
+
return false;
|
|
388
|
+
}
|
|
389
|
+
if (!hasChanged(absPath, content)) {
|
|
390
|
+
logger.info(`local-memory-sync: single-file skip (unchanged) — ${absPath}`);
|
|
391
|
+
return false;
|
|
392
|
+
}
|
|
393
|
+
// Determine category from the filename / path
|
|
394
|
+
const basename = path.basename(absPath);
|
|
395
|
+
const basenameUpper = basename.toUpperCase();
|
|
396
|
+
const isLongTerm = basenameUpper === 'MEMORY.MD';
|
|
397
|
+
// Skip workspace identity files — they are no longer synced to cloud
|
|
398
|
+
const isWorkspaceFile = WORKSPACE_SYNC_FILES
|
|
399
|
+
.map((f) => f.toUpperCase())
|
|
400
|
+
.includes(basenameUpper);
|
|
401
|
+
if (isWorkspaceFile) {
|
|
402
|
+
logger.info(`local-memory-sync: single-file skip (workspace file no longer synced) — ${absPath}`);
|
|
403
|
+
return false;
|
|
404
|
+
}
|
|
405
|
+
const category = isLongTerm
|
|
406
|
+
? 'long-term'
|
|
407
|
+
: 'daily-log';
|
|
408
|
+
// Build a stable relative-ish path for docId generation
|
|
409
|
+
// Try to extract a meaningful relative path from common patterns:
|
|
410
|
+
// /path/to/workspace/MEMORY.md → MEMORY.md
|
|
411
|
+
// /path/to/workspace/memory/2026-03-15.md → memory/2026-03-15.md
|
|
412
|
+
// /path/to/.codebuddy/MEMORY.md → .codebuddy/MEMORY.md
|
|
413
|
+
// /path/to/.codebuddy/memory/2026-03-15.md → .codebuddy/memory/2026-03-15.md
|
|
414
|
+
const relPath = extractMemoryRelPath(absPath);
|
|
415
|
+
const docId = filePathToDocId(relPath, category);
|
|
416
|
+
const enrichedContent = [
|
|
417
|
+
`# Local Memory: ${relPath}`,
|
|
418
|
+
`**Type**: ${categoryLabel(category)}`,
|
|
419
|
+
`**Source**: ${relPath}`,
|
|
420
|
+
`**Synced**: ${new Date().toISOString()}`,
|
|
421
|
+
'',
|
|
422
|
+
'---',
|
|
423
|
+
'',
|
|
424
|
+
content,
|
|
425
|
+
].join('\n');
|
|
426
|
+
await ops.upload(enrichedContent, {
|
|
427
|
+
category: 'memory',
|
|
428
|
+
docId,
|
|
429
|
+
metadata: {
|
|
430
|
+
source: 'local-memory-sync',
|
|
431
|
+
type: category,
|
|
432
|
+
filePath: relPath,
|
|
433
|
+
timestamp: Date.now(),
|
|
434
|
+
contentHash: computeHash(content),
|
|
435
|
+
trigger: 'after_tool_call',
|
|
436
|
+
},
|
|
437
|
+
});
|
|
438
|
+
logger.info(`local-memory-sync: single-file uploaded — ${relPath} (${category})`);
|
|
439
|
+
// Extract and upload assets (images + documents) referenced in this memory file
|
|
440
|
+
try {
|
|
441
|
+
const workspaceDir = path.dirname(absPath);
|
|
442
|
+
const assetResult = await extractAndUploadAssets(ops, [{ absPath, content }], workspaceDir, logger, allowedExtensions);
|
|
443
|
+
if (assetResult.uploaded > 0 || assetResult.failed > 0) {
|
|
444
|
+
logger.info(`local-memory-sync: single-file asset sync — uploaded=${assetResult.uploaded}, `
|
|
445
|
+
+ `skipped=${assetResult.skipped}, failed=${assetResult.failed}`);
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
catch (assetErr) {
|
|
449
|
+
logger.warn(`local-memory-sync: single-file asset extraction failed — ${absPath}: ${String(assetErr)}`);
|
|
450
|
+
}
|
|
451
|
+
// Persist hash cache to disk so restarts don't re-upload unchanged files.
|
|
452
|
+
await flushHashCache();
|
|
453
|
+
return true;
|
|
454
|
+
}
|
|
455
|
+
catch (err) {
|
|
456
|
+
logger.warn(`local-memory-sync: single-file upload failed — ${absPath}: ${String(err)}`);
|
|
457
|
+
return false;
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
/**
|
|
461
|
+
* Extract a meaningful relative path from an absolute memory file path.
|
|
462
|
+
*
|
|
463
|
+
* Recognized patterns:
|
|
464
|
+
* /path/to/workspace/MEMORY.md → MEMORY.md
|
|
465
|
+
* /path/to/workspace/memory.md → memory.md
|
|
466
|
+
* /path/to/workspace/memory/2026-03-15.md → memory/2026-03-15.md
|
|
467
|
+
* /path/to/.codebuddy/MEMORY.md → .codebuddy/MEMORY.md
|
|
468
|
+
* /path/to/.codebuddy/memory/2026-03-15.md → .codebuddy/memory/2026-03-15.md
|
|
469
|
+
* /path/to/workspace/AGENTS.md → AGENTS.md
|
|
470
|
+
* /path/to/workspace/SOUL.md → SOUL.md
|
|
471
|
+
* /path/to/workspace/TOOLS.md → TOOLS.md
|
|
472
|
+
*/
|
|
473
|
+
function extractMemoryRelPath(absPath) {
|
|
474
|
+
const normalized = absPath.replace(/\\/g, '/');
|
|
475
|
+
// Try .codebuddy/memory/* pattern
|
|
476
|
+
const cbMemoryMatch = normalized.match(/\.codebuddy\/memory\/[^/]+\.md$/);
|
|
477
|
+
if (cbMemoryMatch) {
|
|
478
|
+
return cbMemoryMatch[0];
|
|
479
|
+
}
|
|
480
|
+
// Try .codebuddy/MEMORY.md pattern
|
|
481
|
+
const cbLongTermMatch = normalized.match(/\.codebuddy\/MEMORY\.md$/i);
|
|
482
|
+
if (cbLongTermMatch) {
|
|
483
|
+
return cbLongTermMatch[0];
|
|
484
|
+
}
|
|
485
|
+
// Try memory/* pattern (daily logs)
|
|
486
|
+
const memoryDirMatch = normalized.match(/memory\/[^/]+\.md$/);
|
|
487
|
+
if (memoryDirMatch) {
|
|
488
|
+
return memoryDirMatch[0];
|
|
489
|
+
}
|
|
490
|
+
// Try MEMORY.md / memory.md at the end
|
|
491
|
+
const rootMemoryMatch = normalized.match(/(?:MEMORY|memory)\.md$/);
|
|
492
|
+
if (rootMemoryMatch) {
|
|
493
|
+
return rootMemoryMatch[0];
|
|
494
|
+
}
|
|
495
|
+
// Try workspace identity files (AGENTS.md, SOUL.md, TOOLS.md, etc.)
|
|
496
|
+
const basename = path.basename(absPath);
|
|
497
|
+
if (WORKSPACE_SYNC_FILES.includes(basename)) {
|
|
498
|
+
return basename;
|
|
499
|
+
}
|
|
500
|
+
// Fallback: use the basename
|
|
501
|
+
return basename;
|
|
502
|
+
}
|
|
503
|
+
/**
|
|
504
|
+
* Check whether an absolute file path refers to a syncable workspace file.
|
|
505
|
+
*
|
|
506
|
+
* Recognized patterns:
|
|
507
|
+
* - MEMORY.md, memory.md (long-term memory)
|
|
508
|
+
* - memory/*.md (daily log files)
|
|
509
|
+
* - .codebuddy/MEMORY.md, .codebuddy/memory/*.md (IDE-level)
|
|
510
|
+
*
|
|
511
|
+
* NOTE: Workspace identity files (AGENTS.md, SOUL.md, etc.) are no longer
|
|
512
|
+
* considered syncable and will return false.
|
|
513
|
+
*/
|
|
514
|
+
export function isMemoryFilePath(absPath) {
|
|
515
|
+
const normalized = absPath.replace(/\\/g, '/').toLowerCase();
|
|
516
|
+
// MEMORY.md or memory.md anywhere in the path
|
|
517
|
+
if (normalized.endsWith('/memory.md')) {
|
|
518
|
+
return true;
|
|
519
|
+
}
|
|
520
|
+
// memory/<something>.md — daily log files
|
|
521
|
+
if (/\/memory\/[^/]+\.md$/.test(normalized)) {
|
|
522
|
+
return true;
|
|
523
|
+
}
|
|
524
|
+
// .codebuddy/MEMORY.md
|
|
525
|
+
if (normalized.endsWith('/.codebuddy/memory.md')) {
|
|
526
|
+
return true;
|
|
527
|
+
}
|
|
528
|
+
// .codebuddy/memory/*.md
|
|
529
|
+
if (/\/\.codebuddy\/memory\/[^/]+\.md$/.test(normalized)) {
|
|
530
|
+
return true;
|
|
531
|
+
}
|
|
532
|
+
// NOTE: Workspace identity files (AGENTS.md, SOUL.md, etc.) are no longer
|
|
533
|
+
// considered syncable memory files.
|
|
534
|
+
return false;
|
|
535
|
+
}
|
|
536
|
+
/**
|
|
537
|
+
* Sync all local memory files and config to the cloud.
|
|
538
|
+
*
|
|
539
|
+
* This is the main entry point called by the engine during bootstrap
|
|
540
|
+
* or afterTurn when `localMemorySync.enabled` is true.
|
|
541
|
+
*
|
|
542
|
+
* @param ops CosOperations instance.
|
|
543
|
+
* @param sessionFile Session file path (used to resolve workspace dir).
|
|
544
|
+
* @param config Sync configuration flags.
|
|
545
|
+
* @param logger Logger instance.
|
|
546
|
+
* @param allowedExtensions Optional set of allowed file extensions for asset sync.
|
|
547
|
+
*/
|
|
548
|
+
export async function syncLocalMemoryToCloud(ops, sessionFile, config, logger, allowedExtensions) {
|
|
549
|
+
const result = {
|
|
550
|
+
uploaded: 0,
|
|
551
|
+
skipped: 0,
|
|
552
|
+
failed: 0,
|
|
553
|
+
details: [],
|
|
554
|
+
};
|
|
555
|
+
if (!config.enabled) {
|
|
556
|
+
return result;
|
|
557
|
+
}
|
|
558
|
+
const workspaceDir = resolveWorkspaceDir(sessionFile);
|
|
559
|
+
logger.info(`local-memory-sync: workspace=${workspaceDir}`);
|
|
560
|
+
// ---- Step 1: Discover and upload all syncable files ----
|
|
561
|
+
// discoverMemoryFiles respects individual config flags internally
|
|
562
|
+
// (syncLongTermMemory, syncDailyLogs).
|
|
563
|
+
const memoryFiles = await discoverMemoryFiles(workspaceDir, config);
|
|
564
|
+
logger.info(`local-memory-sync: found ${memoryFiles.length} syncable file(s) in ${workspaceDir}`
|
|
565
|
+
+ ` (longTerm=${config.syncLongTermMemory}, dailyLogs=${config.syncDailyLogs})`);
|
|
566
|
+
for (const file of memoryFiles) {
|
|
567
|
+
try {
|
|
568
|
+
const content = await fs.readFile(file.absPath, 'utf-8');
|
|
569
|
+
if (content.trim().length === 0) {
|
|
570
|
+
result.details.push({ path: file.relPath, status: 'skipped', reason: 'empty' });
|
|
571
|
+
result.skipped += 1;
|
|
572
|
+
logger.info(`local-memory-sync: skip (empty) — ${file.relPath}`);
|
|
573
|
+
continue;
|
|
574
|
+
}
|
|
575
|
+
if (!hasChanged(file.absPath, content)) {
|
|
576
|
+
result.details.push({ path: file.relPath, status: 'skipped', reason: 'unchanged' });
|
|
577
|
+
result.skipped += 1;
|
|
578
|
+
continue;
|
|
579
|
+
}
|
|
580
|
+
const docId = filePathToDocId(file.relPath, file.category);
|
|
581
|
+
const enrichedContent = [
|
|
582
|
+
`# Local Memory: ${file.relPath}`,
|
|
583
|
+
`**Type**: ${categoryLabel(file.category)}`,
|
|
584
|
+
`**Source**: ${file.relPath}`,
|
|
585
|
+
`**Synced**: ${new Date().toISOString()}`,
|
|
586
|
+
'',
|
|
587
|
+
'---',
|
|
588
|
+
'',
|
|
589
|
+
content,
|
|
590
|
+
].join('\n');
|
|
591
|
+
await ops.upload(enrichedContent, {
|
|
592
|
+
category: 'memory',
|
|
593
|
+
docId,
|
|
594
|
+
metadata: {
|
|
595
|
+
source: 'local-memory-sync',
|
|
596
|
+
type: file.category,
|
|
597
|
+
filePath: file.relPath,
|
|
598
|
+
timestamp: Date.now(),
|
|
599
|
+
contentHash: computeHash(content),
|
|
600
|
+
},
|
|
601
|
+
});
|
|
602
|
+
const targetPath = `{cosPrefix}${docId}.md`;
|
|
603
|
+
result.details.push({ path: file.relPath, status: 'uploaded' });
|
|
604
|
+
result.uploaded += 1;
|
|
605
|
+
logger.info(`local-memory-sync: uploaded — ${file.relPath} [${file.category}] → ${targetPath}`);
|
|
606
|
+
}
|
|
607
|
+
catch (err) {
|
|
608
|
+
result.details.push({
|
|
609
|
+
path: file.relPath,
|
|
610
|
+
status: 'failed',
|
|
611
|
+
reason: String(err),
|
|
612
|
+
});
|
|
613
|
+
result.failed += 1;
|
|
614
|
+
logger.warn(`local-memory-sync: upload failed — ${file.relPath}: ${String(err)}`);
|
|
615
|
+
}
|
|
616
|
+
}
|
|
617
|
+
logger.info(`local-memory-sync: done — uploaded=${result.uploaded}, skipped=${result.skipped}, failed=${result.failed}`);
|
|
618
|
+
// ---- Step 3: Extract and upload assets (images + documents) referenced in memory files ----
|
|
619
|
+
// Scan all successfully synced memory file contents for asset links,
|
|
620
|
+
// check if those assets exist locally, and upload them to COS.
|
|
621
|
+
try {
|
|
622
|
+
const allContents = [];
|
|
623
|
+
for (const file of memoryFiles) {
|
|
624
|
+
try {
|
|
625
|
+
const content = await fs.readFile(file.absPath, 'utf-8');
|
|
626
|
+
allContents.push({ absPath: file.absPath, content });
|
|
627
|
+
}
|
|
628
|
+
catch {
|
|
629
|
+
// Already handled above — skip silently
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
const assetResult = await extractAndUploadAssets(ops, allContents, workspaceDir, logger, allowedExtensions);
|
|
633
|
+
if (assetResult.uploaded > 0 || assetResult.failed > 0) {
|
|
634
|
+
logger.info(`local-memory-sync: asset sync — uploaded=${assetResult.uploaded}, `
|
|
635
|
+
+ `skipped=${assetResult.skipped}, failed=${assetResult.failed}`);
|
|
636
|
+
}
|
|
637
|
+
}
|
|
638
|
+
catch (err) {
|
|
639
|
+
logger.warn(`local-memory-sync: asset extraction/upload failed: ${String(err)}`);
|
|
640
|
+
}
|
|
641
|
+
// Persist hash cache to disk so restarts don't re-upload unchanged files.
|
|
642
|
+
await flushHashCache();
|
|
643
|
+
return result;
|
|
644
|
+
}
|
|
645
|
+
// ============================================================================
|
|
646
|
+
// Image extraction and upload
|
|
647
|
+
// ============================================================================
|
|
648
|
+
/**
|
|
649
|
+
* Regex to match image/file links in Markdown content.
|
|
650
|
+
*
|
|
651
|
+
* Supported formats:
|
|
652
|
+
* - `` — standard Markdown image
|
|
653
|
+
* - `` — with optional title
|
|
654
|
+
* - `[alt](path)` — standard Markdown link (for PDFs, docs, etc.)
|
|
655
|
+
*
|
|
656
|
+
* Only matches local file paths (not http/https URLs). Paths can be:
|
|
657
|
+
* - Absolute: `/path/to/image.png`
|
|
658
|
+
* - Relative: `./images/photo.jpg`, `../assets/icon.svg`
|
|
659
|
+
* - Home-relative: `~/Pictures/screenshot.png`
|
|
660
|
+
*
|
|
661
|
+
* Does NOT match:
|
|
662
|
+
* - HTTP URLs: ``
|
|
663
|
+
* - Data URIs: ``
|
|
664
|
+
*/
|
|
665
|
+
const MARKDOWN_IMAGE_RE = /!\[([^\]]*)\]\(([^)]+)\)/g;
|
|
666
|
+
/**
|
|
667
|
+
* Regex to match standard Markdown links `[alt](path)` — used to extract
|
|
668
|
+
* linked local files (e.g. PDFs, documents) that are not wrapped in `![]()`.
|
|
669
|
+
*/
|
|
670
|
+
const MARKDOWN_LINK_RE = /(?<!!)\[([^\]]*)\]\(([^)]+)\)/g;
|
|
671
|
+
/**
|
|
672
|
+
* Regex to match HTML `<img>` and `<video>` tags with a `src` attribute.
|
|
673
|
+
*
|
|
674
|
+
* Captures the `src` value from tags like:
|
|
675
|
+
* - `<img src="path/to/image.png">`
|
|
676
|
+
* - `<video src="path/to/video.mp4">`
|
|
677
|
+
* - Handles both single and double quotes, or unquoted src values.
|
|
678
|
+
*/
|
|
679
|
+
const HTML_MEDIA_SRC_RE = /<(?:img|video)\s[^>]*?\bsrc\s*=\s*(?:"([^"]*)"|'([^']*)'|([^\s>]+))/gi;
|
|
680
|
+
/**
|
|
681
|
+
* Regex to match bare image URLs in Markdown content (not inside `![]()`).
|
|
682
|
+
*
|
|
683
|
+
* Matches HTTP(S) URLs that end with a known image extension (before any query
|
|
684
|
+
* string or fragment). These are "bare" URLs — plain text links not wrapped in
|
|
685
|
+
* standard Markdown image syntax.
|
|
686
|
+
*
|
|
687
|
+
* The extraction logic will attempt to find a locally downloaded copy of the
|
|
688
|
+
* image in `~/Downloads/` by matching the filename from the URL path.
|
|
689
|
+
*/
|
|
690
|
+
const BARE_IMAGE_URL_RE = /(?<!\]\()https?:\/\/[^\s)]+\.(?:png|jpe?g|gif|bmp|webp|svg|ico|tiff?|avif|heic|heif)(?=[?\s#]|$)/gi;
|
|
691
|
+
/**
|
|
692
|
+
* Regex to match bare local file paths in plain text.
|
|
693
|
+
*
|
|
694
|
+
* Matches absolute or home-relative paths ending with a known media/document extension:
|
|
695
|
+
* - `/root/.openclaw/workspace/pictures/10/1.jpg`
|
|
696
|
+
* - `~/Pictures/screenshot.png`
|
|
697
|
+
* - `~Downloads/report.pdf` (common typo: `~` without `/`)
|
|
698
|
+
* - `../../.openclaw/workspace/pictures/10/1.jpg`
|
|
699
|
+
*
|
|
700
|
+
* Does NOT match paths already captured by Markdown or HTML syntax (those are
|
|
701
|
+
* handled by dedicated regexes above).
|
|
702
|
+
*
|
|
703
|
+
* The `~` prefix is treated as home-directory shorthand, with or without a
|
|
704
|
+
* separating `/` (e.g. both `~/Downloads/x.pdf` and `~Downloads/x.pdf`).
|
|
705
|
+
*/
|
|
706
|
+
const BARE_LOCAL_PATH_RE = /(?:^|[\s,;])(~[^\s,;:*?"<>|/\\]+\/[^\s,;:*?"<>|]+\.(?:png|jpe?g|gif|bmp|webp|svg|ico|tiff?|avif|heic|heif|pdf|doc|docx|xls|xlsx|ppt|pptx|txt|csv|md|rtf|mp4|mov|avi|mkv)|[~.]?(?:\.\/|\.\.\/|\/)[^\s,;:*?"<>|]+\.(?:png|jpe?g|gif|bmp|webp|svg|ico|tiff?|avif|heic|heif|pdf|doc|docx|xls|xlsx|ppt|pptx|txt|csv|md|rtf|mp4|mov|avi|mkv))(?=[\s,;]|$)/gim;
|
|
707
|
+
/**
|
|
708
|
+
* Common image file extensions (lowercase).
|
|
709
|
+
*/
|
|
710
|
+
const IMAGE_EXTENSIONS = new Set([
|
|
711
|
+
'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp',
|
|
712
|
+
'.svg', '.ico', '.tiff', '.tif', '.avif', '.heic', '.heif',
|
|
713
|
+
]);
|
|
714
|
+
/**
|
|
715
|
+
* Default file extensions for asset sync (images + documents).
|
|
716
|
+
*
|
|
717
|
+
* Users can override this via `syncFileExtensions` config.
|
|
718
|
+
*/
|
|
719
|
+
const DEFAULT_SYNC_FILE_EXTENSIONS = [
|
|
720
|
+
// Images
|
|
721
|
+
'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp',
|
|
722
|
+
'.svg', '.ico', '.tiff', '.tif', '.avif', '.heic', '.heif',
|
|
723
|
+
// Documents
|
|
724
|
+
'.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx',
|
|
725
|
+
'.txt', '.csv', '.md', '.rtf',
|
|
726
|
+
];
|
|
727
|
+
/**
|
|
728
|
+
* MIME type mapping for common image extensions.
|
|
729
|
+
*/
|
|
730
|
+
const MIME_TYPES = {
|
|
731
|
+
'.png': 'image/png',
|
|
732
|
+
'.jpg': 'image/jpeg',
|
|
733
|
+
'.jpeg': 'image/jpeg',
|
|
734
|
+
'.gif': 'image/gif',
|
|
735
|
+
'.bmp': 'image/bmp',
|
|
736
|
+
'.webp': 'image/webp',
|
|
737
|
+
'.svg': 'image/svg+xml',
|
|
738
|
+
'.ico': 'image/x-icon',
|
|
739
|
+
'.tiff': 'image/tiff',
|
|
740
|
+
'.tif': 'image/tiff',
|
|
741
|
+
'.avif': 'image/avif',
|
|
742
|
+
'.heic': 'image/heic',
|
|
743
|
+
'.heif': 'image/heif',
|
|
744
|
+
// Documents
|
|
745
|
+
'.pdf': 'application/pdf',
|
|
746
|
+
'.doc': 'application/msword',
|
|
747
|
+
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
748
|
+
'.xls': 'application/vnd.ms-excel',
|
|
749
|
+
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
|
750
|
+
'.ppt': 'application/vnd.ms-powerpoint',
|
|
751
|
+
'.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
|
752
|
+
'.txt': 'text/plain',
|
|
753
|
+
'.csv': 'text/csv',
|
|
754
|
+
'.md': 'text/markdown',
|
|
755
|
+
'.rtf': 'application/rtf',
|
|
756
|
+
};
|
|
757
|
+
/** Hash cache accessor for assets — backed by the persistent cache's `asset` namespace. */
|
|
758
|
+
const imageHashCache = {
|
|
759
|
+
get(key) {
|
|
760
|
+
return loadHashCache().asset[key];
|
|
761
|
+
},
|
|
762
|
+
set(key, value) {
|
|
763
|
+
loadHashCache().asset[key] = value;
|
|
764
|
+
cacheDirty = true;
|
|
765
|
+
},
|
|
766
|
+
};
|
|
767
|
+
/**
|
|
768
|
+
* Extract image/media links from Markdown content.
|
|
769
|
+
*
|
|
770
|
+
* Returns an array of raw file paths found in the content. Supports five formats:
|
|
771
|
+
* 1. Standard Markdown image syntax: `` — local file paths only
|
|
772
|
+
* 2. Standard Markdown link syntax: `[alt](path)` — local files (PDFs, docs, etc.)
|
|
773
|
+
* 3. HTML media tags: `<img src="path">`, `<video src="path">`
|
|
774
|
+
* 4. Bare image URLs: `https://example.com/.../image.jpg` — resolved to a
|
|
775
|
+
* locally downloaded copy in `~/Downloads/` by matching the filename.
|
|
776
|
+
* 5. Bare local file paths in plain text: `/root/pics/1.jpg`, `../../pics/1.jpg`
|
|
777
|
+
*
|
|
778
|
+
* HTTP(S) URLs inside `` or `[alt](url)` are skipped (remote links
|
|
779
|
+
* in Markdown syntax are expected to stay remote). Bare URLs in plain text are
|
|
780
|
+
* checked against `~/Downloads/{filename}` to discover locally downloaded copies.
|
|
781
|
+
*
|
|
782
|
+
* @param content Markdown text to scan for image links.
|
|
783
|
+
* @returns Array of local file path strings referenced in the content.
|
|
784
|
+
*/
|
|
785
|
+
export function extractImageLinks(content) {
|
|
786
|
+
const links = [];
|
|
787
|
+
const seenPaths = new Set();
|
|
788
|
+
let match;
|
|
789
|
+
// --- Pass 1: Standard Markdown image syntax  ---
|
|
790
|
+
MARKDOWN_IMAGE_RE.lastIndex = 0;
|
|
791
|
+
while ((match = MARKDOWN_IMAGE_RE.exec(content)) !== null) {
|
|
792
|
+
let rawPath = match[2].trim();
|
|
793
|
+
// Strip optional title:  → path
|
|
794
|
+
const titleMatch = rawPath.match(/^(.+?)\s+"[^"]*"$/);
|
|
795
|
+
if (titleMatch) {
|
|
796
|
+
rawPath = titleMatch[1].trim();
|
|
797
|
+
}
|
|
798
|
+
// Skip HTTP(S) URLs
|
|
799
|
+
if (/^https?:\/\//i.test(rawPath)) {
|
|
800
|
+
continue;
|
|
801
|
+
}
|
|
802
|
+
// Skip data URIs
|
|
803
|
+
if (rawPath.startsWith('data:')) {
|
|
804
|
+
continue;
|
|
805
|
+
}
|
|
806
|
+
// Skip empty paths
|
|
807
|
+
if (rawPath.length === 0) {
|
|
808
|
+
continue;
|
|
809
|
+
}
|
|
810
|
+
if (!seenPaths.has(rawPath)) {
|
|
811
|
+
seenPaths.add(rawPath);
|
|
812
|
+
links.push(rawPath);
|
|
813
|
+
}
|
|
814
|
+
}
|
|
815
|
+
// --- Pass 2: Bare image URLs → resolve to ~/Downloads/{filename} ---
|
|
816
|
+
const homeDir = process.env.HOME ?? process.env.USERPROFILE ?? '';
|
|
817
|
+
if (homeDir) {
|
|
818
|
+
BARE_IMAGE_URL_RE.lastIndex = 0;
|
|
819
|
+
while ((match = BARE_IMAGE_URL_RE.exec(content)) !== null) {
|
|
820
|
+
const rawUrl = match[0];
|
|
821
|
+
// Extract filename from the URL path (strip query/fragment)
|
|
822
|
+
let urlPath;
|
|
823
|
+
try {
|
|
824
|
+
urlPath = new URL(rawUrl).pathname;
|
|
825
|
+
}
|
|
826
|
+
catch {
|
|
827
|
+
// Malformed URL — skip
|
|
828
|
+
continue;
|
|
829
|
+
}
|
|
830
|
+
const filename = path.basename(urlPath);
|
|
831
|
+
if (!filename || filename === '/') {
|
|
832
|
+
continue;
|
|
833
|
+
}
|
|
834
|
+
// Check if the file exists in ~/Downloads/
|
|
835
|
+
const localPath = path.join(homeDir, 'Downloads', filename);
|
|
836
|
+
if (fsSync.existsSync(localPath) && isImageFile(localPath)) {
|
|
837
|
+
if (!seenPaths.has(localPath)) {
|
|
838
|
+
seenPaths.add(localPath);
|
|
839
|
+
links.push(localPath);
|
|
840
|
+
}
|
|
841
|
+
}
|
|
842
|
+
}
|
|
843
|
+
}
|
|
844
|
+
// --- Pass 3: Standard Markdown link syntax [alt](path) — local files only ---
|
|
845
|
+
MARKDOWN_LINK_RE.lastIndex = 0;
|
|
846
|
+
while ((match = MARKDOWN_LINK_RE.exec(content)) !== null) {
|
|
847
|
+
let rawPath = match[2].trim();
|
|
848
|
+
// Strip optional title: [alt](path "title") → path
|
|
849
|
+
const titleMatch = rawPath.match(/^(.+?)\s+"[^"]*"$/);
|
|
850
|
+
if (titleMatch) {
|
|
851
|
+
rawPath = titleMatch[1].trim();
|
|
852
|
+
}
|
|
853
|
+
// Skip HTTP(S) URLs and data URIs
|
|
854
|
+
if (/^https?:\/\//i.test(rawPath) || rawPath.startsWith('data:')) {
|
|
855
|
+
continue;
|
|
856
|
+
}
|
|
857
|
+
// Skip empty paths and anchors-only
|
|
858
|
+
if (rawPath.length === 0 || rawPath.startsWith('#')) {
|
|
859
|
+
continue;
|
|
860
|
+
}
|
|
861
|
+
if (!seenPaths.has(rawPath)) {
|
|
862
|
+
seenPaths.add(rawPath);
|
|
863
|
+
links.push(rawPath);
|
|
864
|
+
}
|
|
865
|
+
}
|
|
866
|
+
// --- Pass 4: HTML <img> and <video> tags with src attribute ---
|
|
867
|
+
HTML_MEDIA_SRC_RE.lastIndex = 0;
|
|
868
|
+
while ((match = HTML_MEDIA_SRC_RE.exec(content)) !== null) {
|
|
869
|
+
// Capture group 1 = double-quoted, 2 = single-quoted, 3 = unquoted
|
|
870
|
+
const rawPath = (match[1] ?? match[2] ?? match[3] ?? '').trim();
|
|
871
|
+
// Skip HTTP(S) URLs, data URIs, and empty values
|
|
872
|
+
if (/^https?:\/\//i.test(rawPath) || rawPath.startsWith('data:') || rawPath.length === 0) {
|
|
873
|
+
continue;
|
|
874
|
+
}
|
|
875
|
+
if (!seenPaths.has(rawPath)) {
|
|
876
|
+
seenPaths.add(rawPath);
|
|
877
|
+
links.push(rawPath);
|
|
878
|
+
}
|
|
879
|
+
}
|
|
880
|
+
// --- Pass 5: Bare local file paths in plain text ---
|
|
881
|
+
BARE_LOCAL_PATH_RE.lastIndex = 0;
|
|
882
|
+
while ((match = BARE_LOCAL_PATH_RE.exec(content)) !== null) {
|
|
883
|
+
const rawPath = match[1].trim();
|
|
884
|
+
// Skip paths already captured by earlier passes
|
|
885
|
+
if (seenPaths.has(rawPath) || rawPath.length === 0) {
|
|
886
|
+
continue;
|
|
887
|
+
}
|
|
888
|
+
seenPaths.add(rawPath);
|
|
889
|
+
links.push(rawPath);
|
|
890
|
+
}
|
|
891
|
+
return links;
|
|
892
|
+
}
|
|
893
|
+
/**
|
|
894
|
+
* Resolve an image path to an absolute file path.
|
|
895
|
+
*
|
|
896
|
+
* Handles:
|
|
897
|
+
* - Absolute paths: returned as-is
|
|
898
|
+
* - Home-relative (`~/...`): expanded using HOME env var
|
|
899
|
+
* - Relative paths: resolved relative to the markdown file's directory
|
|
900
|
+
*
|
|
901
|
+
* @param imagePath Raw image path from Markdown.
|
|
902
|
+
* @param mdFilePath Absolute path of the Markdown file containing the link.
|
|
903
|
+
* @param workspaceDir Workspace root directory (fallback for resolution).
|
|
904
|
+
* @returns Resolved absolute path.
|
|
905
|
+
*/
|
|
906
|
+
function resolveImagePath(imagePath, mdFilePath, workspaceDir) {
|
|
907
|
+
// Absolute path
|
|
908
|
+
if (path.isAbsolute(imagePath)) {
|
|
909
|
+
return imagePath;
|
|
910
|
+
}
|
|
911
|
+
// Home-relative path: ~/path or ~Dir/path (common typo missing the /)
|
|
912
|
+
if (imagePath.startsWith('~')) {
|
|
913
|
+
const homeDir = process.env.HOME ?? process.env.USERPROFILE ?? '';
|
|
914
|
+
if (homeDir) {
|
|
915
|
+
if (imagePath.startsWith('~/') || imagePath.startsWith('~\\')) {
|
|
916
|
+
// Standard: ~/Downloads/file.pdf → $HOME/Downloads/file.pdf
|
|
917
|
+
return path.join(homeDir, imagePath.slice(2));
|
|
918
|
+
}
|
|
919
|
+
// Typo variant: ~Downloads/file.pdf → $HOME/Downloads/file.pdf
|
|
920
|
+
return path.join(homeDir, imagePath.slice(1));
|
|
921
|
+
}
|
|
922
|
+
}
|
|
923
|
+
// Relative path — resolve from the markdown file's directory first,
|
|
924
|
+
// then fallback to workspace dir
|
|
925
|
+
const mdDir = path.dirname(mdFilePath);
|
|
926
|
+
const resolvedFromMd = path.resolve(mdDir, imagePath);
|
|
927
|
+
if (fsSync.existsSync(resolvedFromMd)) {
|
|
928
|
+
return resolvedFromMd;
|
|
929
|
+
}
|
|
930
|
+
// Fallback: resolve from workspace root
|
|
931
|
+
return path.resolve(workspaceDir, imagePath);
|
|
932
|
+
}
|
|
933
|
+
/**
|
|
934
|
+
* Check if a file path points to a supported image file.
|
|
935
|
+
*/
|
|
936
|
+
function isImageFile(filePath) {
|
|
937
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
938
|
+
return IMAGE_EXTENSIONS.has(ext);
|
|
939
|
+
}
|
|
940
|
+
/**
|
|
941
|
+
* Check if a file path points to a supported asset file (image or document).
|
|
942
|
+
*
|
|
943
|
+
* @param filePath Path to check.
|
|
944
|
+
* @param allowedExts Set of allowed extensions. If not provided, uses IMAGE_EXTENSIONS (backward compat).
|
|
945
|
+
*/
|
|
946
|
+
function isAssetFile(filePath, allowedExts) {
|
|
947
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
948
|
+
return allowedExts ? allowedExts.has(ext) : IMAGE_EXTENSIONS.has(ext);
|
|
949
|
+
}
|
|
950
|
+
/**
|
|
951
|
+
* Compute a hash for binary content (used for asset dedup).
|
|
952
|
+
*/
|
|
953
|
+
function computeAssetHash(buffer) {
|
|
954
|
+
return crypto.createHash('sha256').update(buffer).digest('hex').slice(0, 16);
|
|
955
|
+
}
|
|
956
|
+
/**
|
|
957
|
+
* Check whether an asset file has changed since the last upload.
|
|
958
|
+
*/
|
|
959
|
+
function assetHasChanged(absPath, buffer) {
|
|
960
|
+
const newHash = computeAssetHash(buffer);
|
|
961
|
+
const oldHash = imageHashCache.get(absPath);
|
|
962
|
+
if (oldHash === newHash) {
|
|
963
|
+
return false;
|
|
964
|
+
}
|
|
965
|
+
imageHashCache.set(absPath, newHash);
|
|
966
|
+
return true;
|
|
967
|
+
}
|
|
968
|
+
/**
|
|
969
|
+
* Build the COS key for an image file in the **asset/** directory,
|
|
970
|
+
* using the absolute path directly to preserve full directory structure.
|
|
971
|
+
*
|
|
972
|
+
* Layout: `openclaw-{agentId}/asset/{absolutePath}`
|
|
973
|
+
* e.g. `/Users/shawn/Downloads/test.jpg` → `asset/Users/shawn/Downloads/test.jpg`
|
|
974
|
+
* e.g. `/tmp/pic.png` → `asset/tmp/pic.png`
|
|
975
|
+
* e.g. `/Users/shawn/Downloads/CI控制台指南.pdf` → `asset/Users/shawn/Downloads/CI控制台指南.pdf`
|
|
976
|
+
* For legacy setups (no agentId): `asset/{absolutePath}`
|
|
977
|
+
*
|
|
978
|
+
* COS (S3-compatible) keys support UTF-8, so Unicode characters (Chinese,
|
|
979
|
+
* Japanese, etc.) are preserved. Only characters that are unsafe for COS
|
|
980
|
+
* object keys or shell handling are replaced: control chars, `*`, `?`, `"`,
|
|
981
|
+
* `<`, `>`, `|`, `#`, `%`, `{`, `}`, `^`, `` ` ``, `[`, `]`.
|
|
982
|
+
*
|
|
983
|
+
* @param ops CosOperations instance (for agent prefix).
|
|
984
|
+
* @param resolvedPath Absolute path of the image file.
|
|
985
|
+
* @returns Full COS key string.
|
|
986
|
+
*/
|
|
987
|
+
function buildAssetCosKey(ops, resolvedPath) {
|
|
988
|
+
const agentPrefix = ops.getAgentPrefix();
|
|
989
|
+
// Use the absolute path directly, stripping the leading separator.
|
|
990
|
+
const normalResolved = path.resolve(resolvedPath);
|
|
991
|
+
const stripped = normalResolved.startsWith(path.sep)
|
|
992
|
+
? normalResolved.slice(path.sep.length)
|
|
993
|
+
: normalResolved;
|
|
994
|
+
// Sanitise each path segment (keep `/` separators intact).
|
|
995
|
+
// Preserve Unicode (CJK, etc.) — only strip COS-unsafe / shell-unsafe chars
|
|
996
|
+
// and control characters (U+0000–U+001F, U+007F).
|
|
997
|
+
const safePath = stripped
|
|
998
|
+
.split(path.sep)
|
|
999
|
+
.map(seg => seg
|
|
1000
|
+
// eslint-disable-next-line no-control-regex
|
|
1001
|
+
.replace(/[\x00-\x1f\x7f*?"<>|#%{}^`[\]]/g, '-')
|
|
1002
|
+
.replace(/-+/g, '-')
|
|
1003
|
+
.replace(/^-|-$/g, ''))
|
|
1004
|
+
.join('/');
|
|
1005
|
+
return `${agentPrefix}asset/${safePath}`;
|
|
1006
|
+
}
|
|
1007
|
+
/**
|
|
1008
|
+
* Extract asset links (images + documents) from memory file contents and upload them to COS.
|
|
1009
|
+
*
|
|
1010
|
+
* For each memory file content:
|
|
1011
|
+
* 1. Extract `` image links and `[alt](path)` document links
|
|
1012
|
+
* 2. Resolve the path to an absolute local file path
|
|
1013
|
+
* 3. Check if the file exists and has a supported extension (configurable)
|
|
1014
|
+
* 4. Check if the file has changed (hash-based dedup)
|
|
1015
|
+
* 5. Upload to `{agentPrefix}asset/{relPath}` preserving directory structure
|
|
1016
|
+
* 6. Image files use `x-cos-meta-category: image`, document files use `document`
|
|
1017
|
+
*
|
|
1018
|
+
* @param ops CosOperations instance for uploading.
|
|
1019
|
+
* @param fileContents Array of { absPath, content } for each memory file.
|
|
1020
|
+
* @param workspaceDir Workspace root for resolving relative paths.
|
|
1021
|
+
* @param logger Logger for progress/error reporting.
|
|
1022
|
+
* @param allowedExtensions Optional set of allowed file extensions. Defaults to DEFAULT_SYNC_FILE_EXTENSIONS.
|
|
1023
|
+
* @returns Summary of asset upload results.
|
|
1024
|
+
*/
|
|
1025
|
+
export async function extractAndUploadAssets(ops, fileContents, workspaceDir, logger, allowedExtensions) {
|
|
1026
|
+
const result = {
|
|
1027
|
+
uploaded: 0,
|
|
1028
|
+
skipped: 0,
|
|
1029
|
+
failed: 0,
|
|
1030
|
+
details: [],
|
|
1031
|
+
};
|
|
1032
|
+
// Build the effective allowed-extension set
|
|
1033
|
+
const allowedExts = allowedExtensions ?? new Set(DEFAULT_SYNC_FILE_EXTENSIONS);
|
|
1034
|
+
// Collect all unique asset paths across all files
|
|
1035
|
+
const seenPaths = new Set();
|
|
1036
|
+
for (const { absPath: mdPath, content } of fileContents) {
|
|
1037
|
+
const assetLinks = extractImageLinks(content);
|
|
1038
|
+
for (const rawLink of assetLinks) {
|
|
1039
|
+
const resolvedPath = resolveImagePath(rawLink, mdPath, workspaceDir);
|
|
1040
|
+
// Skip duplicates (same file referenced in multiple files)
|
|
1041
|
+
if (seenPaths.has(resolvedPath)) {
|
|
1042
|
+
continue;
|
|
1043
|
+
}
|
|
1044
|
+
seenPaths.add(resolvedPath);
|
|
1045
|
+
try {
|
|
1046
|
+
// Check if file exists
|
|
1047
|
+
if (!fsSync.existsSync(resolvedPath)) {
|
|
1048
|
+
logger.info(`local-memory-sync: asset not found — ${rawLink} (resolved: ${resolvedPath})`);
|
|
1049
|
+
result.details.push({
|
|
1050
|
+
localPath: resolvedPath,
|
|
1051
|
+
status: 'skipped',
|
|
1052
|
+
reason: 'file not found',
|
|
1053
|
+
});
|
|
1054
|
+
result.skipped += 1;
|
|
1055
|
+
continue;
|
|
1056
|
+
}
|
|
1057
|
+
// Check if it's a supported asset file (image or document)
|
|
1058
|
+
if (!isAssetFile(resolvedPath, allowedExts)) {
|
|
1059
|
+
logger.info(`local-memory-sync: not a supported asset — ${resolvedPath}`);
|
|
1060
|
+
result.details.push({
|
|
1061
|
+
localPath: resolvedPath,
|
|
1062
|
+
status: 'skipped',
|
|
1063
|
+
reason: 'not a supported asset format',
|
|
1064
|
+
});
|
|
1065
|
+
result.skipped += 1;
|
|
1066
|
+
continue;
|
|
1067
|
+
}
|
|
1068
|
+
// Read the file
|
|
1069
|
+
const buffer = await fs.readFile(resolvedPath);
|
|
1070
|
+
// Check if file has changed (hash-based dedup)
|
|
1071
|
+
if (!assetHasChanged(resolvedPath, buffer)) {
|
|
1072
|
+
result.details.push({
|
|
1073
|
+
localPath: resolvedPath,
|
|
1074
|
+
status: 'skipped',
|
|
1075
|
+
reason: 'unchanged',
|
|
1076
|
+
});
|
|
1077
|
+
result.skipped += 1;
|
|
1078
|
+
continue;
|
|
1079
|
+
}
|
|
1080
|
+
// Build COS key — assets go to asset/ preserving directory structure
|
|
1081
|
+
const cosKey = buildAssetCosKey(ops, resolvedPath);
|
|
1082
|
+
// Determine MIME type
|
|
1083
|
+
const ext = path.extname(resolvedPath).toLowerCase();
|
|
1084
|
+
const contentType = MIME_TYPES[ext] ?? 'application/octet-stream';
|
|
1085
|
+
// Determine category: image or document
|
|
1086
|
+
const assetCategory = isImageFile(resolvedPath) ? 'image' : 'document';
|
|
1087
|
+
// Upload to COS (sole destination: asset/ with directory structure).
|
|
1088
|
+
// Paths may contain non-ASCII characters (e.g. Chinese); HTTP custom
|
|
1089
|
+
// headers require ASCII values, so we URI-encode path values.
|
|
1090
|
+
const assetMetadata = {
|
|
1091
|
+
source: 'local-memory-sync',
|
|
1092
|
+
type: `asset-${assetCategory}`,
|
|
1093
|
+
category: assetCategory,
|
|
1094
|
+
originalPath: encodeURIComponent(rawLink),
|
|
1095
|
+
resolvedPath: encodeURIComponent(resolvedPath),
|
|
1096
|
+
timestamp: Date.now(),
|
|
1097
|
+
contentHash: computeAssetHash(buffer),
|
|
1098
|
+
};
|
|
1099
|
+
await ops.uploadBinary(buffer, cosKey, contentType, assetMetadata);
|
|
1100
|
+
result.details.push({
|
|
1101
|
+
localPath: resolvedPath,
|
|
1102
|
+
cosKey,
|
|
1103
|
+
status: 'uploaded',
|
|
1104
|
+
});
|
|
1105
|
+
result.uploaded += 1;
|
|
1106
|
+
logger.info(`local-memory-sync: asset uploaded — ${rawLink} → ${cosKey} [${assetCategory}]`);
|
|
1107
|
+
}
|
|
1108
|
+
catch (err) {
|
|
1109
|
+
result.details.push({
|
|
1110
|
+
localPath: resolvedPath,
|
|
1111
|
+
status: 'failed',
|
|
1112
|
+
reason: String(err),
|
|
1113
|
+
});
|
|
1114
|
+
result.failed += 1;
|
|
1115
|
+
const errMsg = err instanceof Error ? err.message : JSON.stringify(err, null, 2);
|
|
1116
|
+
logger.warn(`local-memory-sync: asset upload failed — ${resolvedPath}: ${errMsg}`);
|
|
1117
|
+
}
|
|
1118
|
+
}
|
|
1119
|
+
}
|
|
1120
|
+
return result;
|
|
1121
|
+
}
|
|
1122
|
+
/**
|
|
1123
|
+
* @deprecated Use `extractAndUploadAssets` instead. Kept for backward compatibility.
|
|
1124
|
+
*/
|
|
1125
|
+
export const extractAndUploadImages = extractAndUploadAssets;
|
|
1126
|
+
//# sourceMappingURL=local-memory-sync.js.map
|