metainsight-context-engine 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1126 @@
1
+ /**
2
+ * Local Memory Sync — Upload MEMORY.md, daily logs, images, and documents to cloud
3
+ *
4
+ * This module provides functionality to sync local memory files and assets
5
+ * (images + documents) to the cloud vector store.
6
+ *
7
+ * Local file locations (workspace root = ~/.openclaw/workspace/):
8
+ * - Long-term memory : ~/.openclaw/workspace/MEMORY.md
9
+ * - Short-term memory: ~/.openclaw/workspace/memory/YYYY-MM-DD.md
10
+ *
11
+ * COS storage layout — Multi-agent (2 top-level directories):
12
+ *
13
+ * openclaw-{agentId}/
14
+ * ├── workspace/ ← MEMORY.md + memory/*.md (indexed by CI)
15
+ * │ ├── MEMORY.md ← long-term memory (MEMORY.md)
16
+ * │ └── memory/ ← daily logs (YYYY-MM-DD.md)
17
+ * └── asset/ ← images + documents preserving directory structure
18
+ * ├── screenshot.png
19
+ * ├── report.pdf
20
+ * ├── images/
21
+ * │ └── photo.jpg
22
+ * └── .codebuddy/
23
+ * └── diagrams/
24
+ * └── arch.png
25
+ *
26
+ * COS storage layout — Legacy (cosPrefix = "memory/"):
27
+ * - Long-term memory : memory/memory.md
28
+ * - Short-term memory: memory/memory/YYYY-MM-DD.md
29
+ * - Assets : asset/{relPath}
30
+ *
31
+ * Asset extraction:
32
+ * When syncing memory files, asset links (Markdown `![](path)` for images
33
+ * and `[](path)` for documents) are detected. If the referenced file exists
34
+ * on the local filesystem and has a supported extension, it is uploaded to
35
+ * the COS `asset/` directory. Image files get `category: image` metadata,
36
+ * document files get `category: document` metadata.
37
+ *
38
+ * Supported extensions are configurable via `syncFileExtensions` config.
39
+ * Default: common image extensions (.png, .jpg, etc.) + document extensions
40
+ * (.pdf, .doc, .docx, .xls, .xlsx, .ppt, .pptx, .txt, .csv, .md, .rtf).
41
+ *
42
+ * Key design decisions:
43
+ * - Each file is uploaded with a **deterministic docId** derived from its
44
+ * relative path. This means re-uploading the same file overwrites the
45
+ * previous version instead of creating duplicates.
46
+ * - Content is hashed (SHA-256) and compared against a local cache to skip
47
+ * unchanged files (avoiding unnecessary COS PUT calls).
48
+ * - Asset uploads use a separate hash cache to avoid re-uploading unchanged files.
49
+ * - The sync runs in the background (non-blocking) and logs progress.
50
+ * - All operations are gated behind the `localMemorySync` config flag.
51
+ *
52
+ * Architecture:
53
+ * local-memory-sync.ts ← engine.ts (afterTurn / bootstrap)
54
+ * ↓
55
+ * cos-operations.ts (upload)
56
+ */
57
+ import crypto from 'node:crypto';
58
+ import fs from 'node:fs/promises';
59
+ import fsSync from 'node:fs';
60
+ import path from 'node:path';
61
+ /** In-memory mirror of the on-disk cache — loaded lazily on first access. */
62
+ let cacheData = null;
63
+ /** Resolved absolute path to the cache file (set by `initHashCache`). */
64
+ let cacheFilePath = null;
65
+ /** Whether the in-memory cache is dirty and needs flushing. */
66
+ let cacheDirty = false;
67
+ /**
68
+ * Resolve the cache file path from the runtime environment.
69
+ * Follows the same stateDir convention as the rest of the plugin.
70
+ */
71
+ function resolveCacheFilePath() {
72
+ if (cacheFilePath) {
73
+ return cacheFilePath;
74
+ }
75
+ const homeDir = process.env.HOME ?? process.env.USERPROFILE ?? '';
76
+ const stateDir = process.env.OPENCLAW_STATE_DIR?.trim()
77
+ || (homeDir ? `${homeDir}/.openclaw` : '');
78
+ cacheFilePath = stateDir
79
+ ? path.join(stateDir, '.sync-hash-cache.json')
80
+ : path.join(process.cwd(), '.sync-hash-cache.json');
81
+ return cacheFilePath;
82
+ }
83
+ /**
84
+ * Load the persistent hash cache from disk (no-op if already loaded).
85
+ * If the file does not exist or is corrupted, starts with an empty cache.
86
+ */
87
+ function loadHashCache() {
88
+ if (cacheData) {
89
+ return cacheData;
90
+ }
91
+ const filePath = resolveCacheFilePath();
92
+ try {
93
+ const raw = fsSync.readFileSync(filePath, 'utf-8');
94
+ const parsed = JSON.parse(raw);
95
+ cacheData = {
96
+ memory: (parsed.memory && typeof parsed.memory === 'object') ? parsed.memory : {},
97
+ asset: (parsed.asset && typeof parsed.asset === 'object') ? parsed.asset : {},
98
+ };
99
+ }
100
+ catch {
101
+ // File missing or corrupted — start fresh
102
+ cacheData = { memory: {}, asset: {} };
103
+ }
104
+ return cacheData;
105
+ }
106
+ /**
107
+ * Flush the in-memory hash cache to disk. Only writes if dirty.
108
+ * Called after a sync pass completes (not after every individual upload).
109
+ */
110
+ export async function flushHashCache() {
111
+ if (!cacheDirty || !cacheData) {
112
+ return;
113
+ }
114
+ const filePath = resolveCacheFilePath();
115
+ try {
116
+ await fs.mkdir(path.dirname(filePath), { recursive: true });
117
+ await fs.writeFile(filePath, JSON.stringify(cacheData, null, 2), 'utf-8');
118
+ cacheDirty = false;
119
+ }
120
+ catch {
121
+ // Best-effort — cache will still work in-memory for this session
122
+ }
123
+ }
124
+ // --- convenience accessors wrapping the persistent cache ---
125
+ const hashCache = {
126
+ get(key) {
127
+ return loadHashCache().memory[key];
128
+ },
129
+ set(key, value) {
130
+ loadHashCache().memory[key] = value;
131
+ cacheDirty = true;
132
+ },
133
+ clear() {
134
+ loadHashCache().memory = {};
135
+ cacheDirty = true;
136
+ },
137
+ };
138
+ function computeHash(content) {
139
+ return crypto.createHash('sha256').update(content).digest('hex').slice(0, 16);
140
+ }
141
+ function hasChanged(filePath, content) {
142
+ const newHash = computeHash(content);
143
+ const oldHash = hashCache.get(filePath);
144
+ if (oldHash === newHash) {
145
+ return false;
146
+ }
147
+ hashCache.set(filePath, newHash);
148
+ return true;
149
+ }
150
+ /**
151
+ * Clear both in-memory and on-disk hash caches (useful for testing
152
+ * or forced re-sync). Pass `persistOnly = true` to only wipe the
153
+ * disk file without clearing the in-memory state.
154
+ */
155
+ export async function clearSyncHashCache() {
156
+ cacheData = { memory: {}, asset: {} };
157
+ cacheDirty = true;
158
+ await flushHashCache();
159
+ }
160
+ // ============================================================================
161
+ // File discovery
162
+ // ============================================================================
163
+ /**
164
+ * Resolve the workspace directory from a session file path.
165
+ *
166
+ * The workspace directory is the root where memory files (MEMORY.md,
167
+ * memory/*.md) and workspace identity files (AGENTS.md, SOUL.md, etc.) live.
168
+ *
169
+ * Resolution order:
170
+ * 1. Well-known path: `~/.openclaw/workspace/` (gateway default workspace)
171
+ * 2. Walk up from `sessionFile` to find a directory with workspace markers
172
+ * 3. Fallback: CWD
173
+ */
174
+ function resolveWorkspaceDir(sessionFile) {
175
+ // ---- Priority 1: Check the well-known gateway workspace path ----
176
+ // The openclaw gateway always uses `~/.openclaw/workspace/` as its workspace.
177
+ // This is the most common case and avoids issues with sessionFile path resolution.
178
+ const homeDir = process.env.HOME ?? process.env.USERPROFILE ?? '';
179
+ if (homeDir) {
180
+ const wellKnownWorkspace = path.join(homeDir, '.openclaw', 'workspace');
181
+ if (fsSync.existsSync(wellKnownWorkspace)) {
182
+ // Verify it looks like a real workspace (has at least one workspace marker)
183
+ const hasMarker = fsSync.existsSync(path.join(wellKnownWorkspace, 'MEMORY.md'))
184
+ || fsSync.existsSync(path.join(wellKnownWorkspace, 'memory'))
185
+ || fsSync.existsSync(path.join(wellKnownWorkspace, 'AGENTS.md'))
186
+ || fsSync.existsSync(path.join(wellKnownWorkspace, 'SOUL.md'))
187
+ || fsSync.existsSync(path.join(wellKnownWorkspace, '.git'));
188
+ if (hasMarker) {
189
+ return wellKnownWorkspace;
190
+ }
191
+ }
192
+ }
193
+ // ---- Priority 2: Walk up from sessionFile to find workspace ----
194
+ // Session files are under .openclaw/sessions/ in the workspace or state dir.
195
+ let dir = path.dirname(sessionFile);
196
+ for (let i = 0; i < 10; i += 1) {
197
+ // Check common workspace markers
198
+ if (fsSync.existsSync(path.join(dir, 'MEMORY.md'))
199
+ || fsSync.existsSync(path.join(dir, 'memory'))
200
+ || fsSync.existsSync(path.join(dir, 'AGENTS.md'))
201
+ || fsSync.existsSync(path.join(dir, '.codebuddy'))
202
+ || fsSync.existsSync(path.join(dir, 'package.json'))
203
+ || fsSync.existsSync(path.join(dir, '.git'))) {
204
+ return dir;
205
+ }
206
+ const parent = path.dirname(dir);
207
+ if (parent === dir) {
208
+ break;
209
+ }
210
+ dir = parent;
211
+ }
212
+ // ---- Priority 3: Fallback to CWD ----
213
+ return process.cwd();
214
+ }
215
+ /**
216
+ * Workspace files that are part of the AI's identity and behavioral system.
217
+ * These files live at the root of the workspace directory.
218
+ *
219
+ * @deprecated These files are no longer synced to the cloud.
220
+ * Kept for reference only.
221
+ */
222
+ const WORKSPACE_SYNC_FILES = [
223
+ 'AGENTS.md',
224
+ 'SOUL.md',
225
+ 'TOOLS.md',
226
+ 'IDENTITY.md',
227
+ 'BOOTSTRAP.md',
228
+ 'HEARTBEAT.md',
229
+ 'USER.md',
230
+ ];
231
+ /**
232
+ * Discover all local memory files in a workspace.
233
+ *
234
+ * Scans for:
235
+ * 1. MEMORY.md / memory.md (long-term memory)
236
+ * 2. memory/*.md (daily logs / short-term memory)
237
+ * 3. .codebuddy/MEMORY.md (IDE-level long-term memory)
238
+ * 4. .codebuddy/memory/*.md (IDE-level daily logs)
239
+ *
240
+ * NOTE: Workspace identity files (AGENTS.md, SOUL.md, etc.) are no longer
241
+ * discovered or synced to the cloud.
242
+ */
243
+ async function discoverMemoryFiles(workspaceDir, config) {
244
+ const files = [];
245
+ if (config.syncLongTermMemory) {
246
+ // Root-level MEMORY.md (or memory.md — pick the first that exists).
247
+ //
248
+ // IMPORTANT: On case-insensitive filesystems (macOS APFS default),
249
+ // `existsSync('memory.md')` returns true even when the actual file
250
+ // on disk is named `MEMORY.md`. To avoid uploading the same file
251
+ // twice under two different COS keys (MEMORY.md vs memory.md),
252
+ // we stop after finding the first match.
253
+ for (const name of ['MEMORY.md', 'memory.md']) {
254
+ const absPath = path.join(workspaceDir, name);
255
+ if (fsSync.existsSync(absPath)) {
256
+ files.push({ absPath, relPath: name, category: 'long-term' });
257
+ break; // Only take the first match to avoid duplicates on case-insensitive FS
258
+ }
259
+ }
260
+ // .codebuddy/MEMORY.md
261
+ const cbMemory = path.join(workspaceDir, '.codebuddy', 'MEMORY.md');
262
+ if (fsSync.existsSync(cbMemory)) {
263
+ files.push({ absPath: cbMemory, relPath: '.codebuddy/MEMORY.md', category: 'long-term' });
264
+ }
265
+ }
266
+ if (config.syncDailyLogs) {
267
+ // memory/*.md directory
268
+ const memoryDir = path.join(workspaceDir, 'memory');
269
+ await scanMarkdownDir(memoryDir, workspaceDir, 'daily-log', files);
270
+ // .codebuddy/memory/*.md directory
271
+ const cbMemoryDir = path.join(workspaceDir, '.codebuddy', 'memory');
272
+ await scanMarkdownDir(cbMemoryDir, workspaceDir, 'daily-log', files);
273
+ }
274
+ return files;
275
+ }
276
+ async function scanMarkdownDir(dir, workspaceDir, category, files) {
277
+ if (!fsSync.existsSync(dir)) {
278
+ return;
279
+ }
280
+ try {
281
+ const entries = await fs.readdir(dir, { withFileTypes: true });
282
+ for (const entry of entries) {
283
+ if (entry.isFile() && entry.name.endsWith('.md')) {
284
+ const absPath = path.join(dir, entry.name);
285
+ const relPath = path.relative(workspaceDir, absPath);
286
+ files.push({ absPath, relPath, category });
287
+ }
288
+ }
289
+ }
290
+ catch {
291
+ // Directory may not be accessible
292
+ }
293
+ }
294
+ // ============================================================================
295
+ // Core sync logic
296
+ // ============================================================================
297
+ /**
298
+ * Human-readable label for sync file categories.
299
+ */
300
+ function categoryLabel(category) {
301
+ switch (category) {
302
+ case 'long-term': {
303
+ return 'Long-term Memory (MEMORY.md)';
304
+ }
305
+ case 'daily-log': {
306
+ return 'Daily Log (Short-term Memory)';
307
+ }
308
+ }
309
+ }
310
+ /**
311
+ * Generate a deterministic docId from a file path.
312
+ *
313
+ * Uses the relative path to create a stable ID that allows overwriting
314
+ * the same file on re-upload (no duplicates).
315
+ *
316
+ * IMPORTANT: The returned docId does **not** include a `.md` extension
317
+ * because `cos-operations.ts upload()` always appends `.md` to the
318
+ * final COS key. Including `.md` here would produce double extensions
319
+ * like `memory.md.md`.
320
+ *
321
+ * For daily-log (short-term memory) files under
322
+ * `~/.openclaw/workspace/memory/`, a `memory/` directory prefix
323
+ * is prepended so they end up in `{cosPrefix}memory/` on COS:
324
+ * relPath "memory/2026-03-15.md" → docId "memory/2026-03-15"
325
+ * → COS key: "memory/memory/2026-03-15.md"
326
+ *
327
+ * For long-term memory (`~/.openclaw/workspace/MEMORY.md`)
328
+ * and other files, a flat docId is used (no slashes):
329
+ * relPath "MEMORY.md" → docId "MEMORY"
330
+ * relPath "AGENTS.md" → docId "AGENTS"
331
+ * → COS key: "{cosPrefix}MEMORY.md", "{agentPrefix}AGENTS.md"
332
+ */
333
+ function filePathToDocId(relPath, category) {
334
+ // Strip the .md extension first — cos-operations.ts upload() always
335
+ // appends ".md" to the final COS key, so keeping it here would produce
336
+ // double extensions like "memory.md.md".
337
+ const withoutExt = relPath.replace(/\.md$/i, '');
338
+ const sanitized = withoutExt
339
+ .replace(/[^a-zA-Z0-9._/-]/g, '-')
340
+ .replace(/-+/g, '-')
341
+ .replace(/^-|-$/g, '');
342
+ // For long-term memory, normalize to uppercase "MEMORY" to prevent
343
+ // case-insensitive filesystem ghosts (macOS APFS) from creating
344
+ // duplicate COS keys (MEMORY.md vs memory.md).
345
+ if (category === 'long-term') {
346
+ const flat = sanitized.replace(/\//g, '-');
347
+ // "memory" → "MEMORY", ".codebuddy-MEMORY" stays unchanged
348
+ if (flat.toLowerCase() === 'memory') {
349
+ return 'MEMORY';
350
+ }
351
+ // .codebuddy/MEMORY → ".codebuddy-MEMORY" (already correct)
352
+ return flat;
353
+ }
354
+ // For daily-log files, preserve the "memory/" directory structure
355
+ // so they are stored under {cosPrefix}memory/ on COS.
356
+ // e.g. relPath "memory/2026-03-15.md"
357
+ // → withoutExt "memory/2026-03-15"
358
+ // → basename "2026-03-15"
359
+ // → docId "memory/2026-03-15"
360
+ // → COS key: "memory/memory/2026-03-15.md"
361
+ if (category === 'daily-log') {
362
+ const basename = sanitized.split('/').pop() ?? sanitized;
363
+ return `memory/${basename}`;
364
+ }
365
+ // For other categories, use a flat sanitized ID (no slashes)
366
+ return sanitized.replace(/\//g, '-');
367
+ }
368
+ /**
369
+ * Sync a single local memory file to the cloud by its absolute path.
370
+ *
371
+ * This is a targeted entry point called by the `after_tool_call` hook
372
+ * when the AI writes to a memory file (MEMORY.md or memory/*.md).
373
+ * It re-uses the same deterministic docId and hash-change detection
374
+ * as the full sync, so re-uploading an unchanged file is a no-op.
375
+ *
376
+ * @param ops CosOperations instance.
377
+ * @param absPath Absolute path to the memory file.
378
+ * @param logger Logger instance.
379
+ * @param allowedExtensions Optional set of allowed file extensions for asset sync.
380
+ * @returns true if the file was uploaded, false if skipped/failed.
381
+ */
382
+ export async function syncSingleMemoryFileToCloud(ops, absPath, logger, allowedExtensions) {
383
+ try {
384
+ const content = await fs.readFile(absPath, 'utf-8');
385
+ if (content.trim().length === 0) {
386
+ logger.info(`local-memory-sync: single-file skip (empty) — ${absPath}`);
387
+ return false;
388
+ }
389
+ if (!hasChanged(absPath, content)) {
390
+ logger.info(`local-memory-sync: single-file skip (unchanged) — ${absPath}`);
391
+ return false;
392
+ }
393
+ // Determine category from the filename / path
394
+ const basename = path.basename(absPath);
395
+ const basenameUpper = basename.toUpperCase();
396
+ const isLongTerm = basenameUpper === 'MEMORY.MD';
397
+ // Skip workspace identity files — they are no longer synced to cloud
398
+ const isWorkspaceFile = WORKSPACE_SYNC_FILES
399
+ .map((f) => f.toUpperCase())
400
+ .includes(basenameUpper);
401
+ if (isWorkspaceFile) {
402
+ logger.info(`local-memory-sync: single-file skip (workspace file no longer synced) — ${absPath}`);
403
+ return false;
404
+ }
405
+ const category = isLongTerm
406
+ ? 'long-term'
407
+ : 'daily-log';
408
+ // Build a stable relative-ish path for docId generation
409
+ // Try to extract a meaningful relative path from common patterns:
410
+ // /path/to/workspace/MEMORY.md → MEMORY.md
411
+ // /path/to/workspace/memory/2026-03-15.md → memory/2026-03-15.md
412
+ // /path/to/.codebuddy/MEMORY.md → .codebuddy/MEMORY.md
413
+ // /path/to/.codebuddy/memory/2026-03-15.md → .codebuddy/memory/2026-03-15.md
414
+ const relPath = extractMemoryRelPath(absPath);
415
+ const docId = filePathToDocId(relPath, category);
416
+ const enrichedContent = [
417
+ `# Local Memory: ${relPath}`,
418
+ `**Type**: ${categoryLabel(category)}`,
419
+ `**Source**: ${relPath}`,
420
+ `**Synced**: ${new Date().toISOString()}`,
421
+ '',
422
+ '---',
423
+ '',
424
+ content,
425
+ ].join('\n');
426
+ await ops.upload(enrichedContent, {
427
+ category: 'memory',
428
+ docId,
429
+ metadata: {
430
+ source: 'local-memory-sync',
431
+ type: category,
432
+ filePath: relPath,
433
+ timestamp: Date.now(),
434
+ contentHash: computeHash(content),
435
+ trigger: 'after_tool_call',
436
+ },
437
+ });
438
+ logger.info(`local-memory-sync: single-file uploaded — ${relPath} (${category})`);
439
+ // Extract and upload assets (images + documents) referenced in this memory file
440
+ try {
441
+ const workspaceDir = path.dirname(absPath);
442
+ const assetResult = await extractAndUploadAssets(ops, [{ absPath, content }], workspaceDir, logger, allowedExtensions);
443
+ if (assetResult.uploaded > 0 || assetResult.failed > 0) {
444
+ logger.info(`local-memory-sync: single-file asset sync — uploaded=${assetResult.uploaded}, `
445
+ + `skipped=${assetResult.skipped}, failed=${assetResult.failed}`);
446
+ }
447
+ }
448
+ catch (assetErr) {
449
+ logger.warn(`local-memory-sync: single-file asset extraction failed — ${absPath}: ${String(assetErr)}`);
450
+ }
451
+ // Persist hash cache to disk so restarts don't re-upload unchanged files.
452
+ await flushHashCache();
453
+ return true;
454
+ }
455
+ catch (err) {
456
+ logger.warn(`local-memory-sync: single-file upload failed — ${absPath}: ${String(err)}`);
457
+ return false;
458
+ }
459
+ }
460
+ /**
461
+ * Extract a meaningful relative path from an absolute memory file path.
462
+ *
463
+ * Recognized patterns:
464
+ * /path/to/workspace/MEMORY.md → MEMORY.md
465
+ * /path/to/workspace/memory.md → memory.md
466
+ * /path/to/workspace/memory/2026-03-15.md → memory/2026-03-15.md
467
+ * /path/to/.codebuddy/MEMORY.md → .codebuddy/MEMORY.md
468
+ * /path/to/.codebuddy/memory/2026-03-15.md → .codebuddy/memory/2026-03-15.md
469
+ * /path/to/workspace/AGENTS.md → AGENTS.md
470
+ * /path/to/workspace/SOUL.md → SOUL.md
471
+ * /path/to/workspace/TOOLS.md → TOOLS.md
472
+ */
473
+ function extractMemoryRelPath(absPath) {
474
+ const normalized = absPath.replace(/\\/g, '/');
475
+ // Try .codebuddy/memory/* pattern
476
+ const cbMemoryMatch = normalized.match(/\.codebuddy\/memory\/[^/]+\.md$/);
477
+ if (cbMemoryMatch) {
478
+ return cbMemoryMatch[0];
479
+ }
480
+ // Try .codebuddy/MEMORY.md pattern
481
+ const cbLongTermMatch = normalized.match(/\.codebuddy\/MEMORY\.md$/i);
482
+ if (cbLongTermMatch) {
483
+ return cbLongTermMatch[0];
484
+ }
485
+ // Try memory/* pattern (daily logs)
486
+ const memoryDirMatch = normalized.match(/memory\/[^/]+\.md$/);
487
+ if (memoryDirMatch) {
488
+ return memoryDirMatch[0];
489
+ }
490
+ // Try MEMORY.md / memory.md at the end
491
+ const rootMemoryMatch = normalized.match(/(?:MEMORY|memory)\.md$/);
492
+ if (rootMemoryMatch) {
493
+ return rootMemoryMatch[0];
494
+ }
495
+ // Try workspace identity files (AGENTS.md, SOUL.md, TOOLS.md, etc.)
496
+ const basename = path.basename(absPath);
497
+ if (WORKSPACE_SYNC_FILES.includes(basename)) {
498
+ return basename;
499
+ }
500
+ // Fallback: use the basename
501
+ return basename;
502
+ }
503
+ /**
504
+ * Check whether an absolute file path refers to a syncable workspace file.
505
+ *
506
+ * Recognized patterns:
507
+ * - MEMORY.md, memory.md (long-term memory)
508
+ * - memory/*.md (daily log files)
509
+ * - .codebuddy/MEMORY.md, .codebuddy/memory/*.md (IDE-level)
510
+ *
511
+ * NOTE: Workspace identity files (AGENTS.md, SOUL.md, etc.) are no longer
512
+ * considered syncable and will return false.
513
+ */
514
+ export function isMemoryFilePath(absPath) {
515
+ const normalized = absPath.replace(/\\/g, '/').toLowerCase();
516
+ // MEMORY.md or memory.md anywhere in the path
517
+ if (normalized.endsWith('/memory.md')) {
518
+ return true;
519
+ }
520
+ // memory/<something>.md — daily log files
521
+ if (/\/memory\/[^/]+\.md$/.test(normalized)) {
522
+ return true;
523
+ }
524
+ // .codebuddy/MEMORY.md
525
+ if (normalized.endsWith('/.codebuddy/memory.md')) {
526
+ return true;
527
+ }
528
+ // .codebuddy/memory/*.md
529
+ if (/\/\.codebuddy\/memory\/[^/]+\.md$/.test(normalized)) {
530
+ return true;
531
+ }
532
+ // NOTE: Workspace identity files (AGENTS.md, SOUL.md, etc.) are no longer
533
+ // considered syncable memory files.
534
+ return false;
535
+ }
536
+ /**
537
+ * Sync all local memory files and config to the cloud.
538
+ *
539
+ * This is the main entry point called by the engine during bootstrap
540
+ * or afterTurn when `localMemorySync.enabled` is true.
541
+ *
542
+ * @param ops CosOperations instance.
543
+ * @param sessionFile Session file path (used to resolve workspace dir).
544
+ * @param config Sync configuration flags.
545
+ * @param logger Logger instance.
546
+ * @param allowedExtensions Optional set of allowed file extensions for asset sync.
547
+ */
548
+ export async function syncLocalMemoryToCloud(ops, sessionFile, config, logger, allowedExtensions) {
549
+ const result = {
550
+ uploaded: 0,
551
+ skipped: 0,
552
+ failed: 0,
553
+ details: [],
554
+ };
555
+ if (!config.enabled) {
556
+ return result;
557
+ }
558
+ const workspaceDir = resolveWorkspaceDir(sessionFile);
559
+ logger.info(`local-memory-sync: workspace=${workspaceDir}`);
560
+ // ---- Step 1: Discover and upload all syncable files ----
561
+ // discoverMemoryFiles respects individual config flags internally
562
+ // (syncLongTermMemory, syncDailyLogs).
563
+ const memoryFiles = await discoverMemoryFiles(workspaceDir, config);
564
+ logger.info(`local-memory-sync: found ${memoryFiles.length} syncable file(s) in ${workspaceDir}`
565
+ + ` (longTerm=${config.syncLongTermMemory}, dailyLogs=${config.syncDailyLogs})`);
566
+ for (const file of memoryFiles) {
567
+ try {
568
+ const content = await fs.readFile(file.absPath, 'utf-8');
569
+ if (content.trim().length === 0) {
570
+ result.details.push({ path: file.relPath, status: 'skipped', reason: 'empty' });
571
+ result.skipped += 1;
572
+ logger.info(`local-memory-sync: skip (empty) — ${file.relPath}`);
573
+ continue;
574
+ }
575
+ if (!hasChanged(file.absPath, content)) {
576
+ result.details.push({ path: file.relPath, status: 'skipped', reason: 'unchanged' });
577
+ result.skipped += 1;
578
+ continue;
579
+ }
580
+ const docId = filePathToDocId(file.relPath, file.category);
581
+ const enrichedContent = [
582
+ `# Local Memory: ${file.relPath}`,
583
+ `**Type**: ${categoryLabel(file.category)}`,
584
+ `**Source**: ${file.relPath}`,
585
+ `**Synced**: ${new Date().toISOString()}`,
586
+ '',
587
+ '---',
588
+ '',
589
+ content,
590
+ ].join('\n');
591
+ await ops.upload(enrichedContent, {
592
+ category: 'memory',
593
+ docId,
594
+ metadata: {
595
+ source: 'local-memory-sync',
596
+ type: file.category,
597
+ filePath: file.relPath,
598
+ timestamp: Date.now(),
599
+ contentHash: computeHash(content),
600
+ },
601
+ });
602
+ const targetPath = `{cosPrefix}${docId}.md`;
603
+ result.details.push({ path: file.relPath, status: 'uploaded' });
604
+ result.uploaded += 1;
605
+ logger.info(`local-memory-sync: uploaded — ${file.relPath} [${file.category}] → ${targetPath}`);
606
+ }
607
+ catch (err) {
608
+ result.details.push({
609
+ path: file.relPath,
610
+ status: 'failed',
611
+ reason: String(err),
612
+ });
613
+ result.failed += 1;
614
+ logger.warn(`local-memory-sync: upload failed — ${file.relPath}: ${String(err)}`);
615
+ }
616
+ }
617
+ logger.info(`local-memory-sync: done — uploaded=${result.uploaded}, skipped=${result.skipped}, failed=${result.failed}`);
618
+ // ---- Step 3: Extract and upload assets (images + documents) referenced in memory files ----
619
+ // Scan all successfully synced memory file contents for asset links,
620
+ // check if those assets exist locally, and upload them to COS.
621
+ try {
622
+ const allContents = [];
623
+ for (const file of memoryFiles) {
624
+ try {
625
+ const content = await fs.readFile(file.absPath, 'utf-8');
626
+ allContents.push({ absPath: file.absPath, content });
627
+ }
628
+ catch {
629
+ // Already handled above — skip silently
630
+ }
631
+ }
632
+ const assetResult = await extractAndUploadAssets(ops, allContents, workspaceDir, logger, allowedExtensions);
633
+ if (assetResult.uploaded > 0 || assetResult.failed > 0) {
634
+ logger.info(`local-memory-sync: asset sync — uploaded=${assetResult.uploaded}, `
635
+ + `skipped=${assetResult.skipped}, failed=${assetResult.failed}`);
636
+ }
637
+ }
638
+ catch (err) {
639
+ logger.warn(`local-memory-sync: asset extraction/upload failed: ${String(err)}`);
640
+ }
641
+ // Persist hash cache to disk so restarts don't re-upload unchanged files.
642
+ await flushHashCache();
643
+ return result;
644
+ }
645
+ // ============================================================================
646
+ // Image extraction and upload
647
+ // ============================================================================
648
+ /**
649
+ * Regex to match image/file links in Markdown content.
650
+ *
651
+ * Supported formats:
652
+ * - `![alt](path)` — standard Markdown image
653
+ * - `![alt](path "title")` — with optional title
654
+ * - `[alt](path)` — standard Markdown link (for PDFs, docs, etc.)
655
+ *
656
+ * Only matches local file paths (not http/https URLs). Paths can be:
657
+ * - Absolute: `/path/to/image.png`
658
+ * - Relative: `./images/photo.jpg`, `../assets/icon.svg`
659
+ * - Home-relative: `~/Pictures/screenshot.png`
660
+ *
661
+ * Does NOT match:
662
+ * - HTTP URLs: `![](https://example.com/image.png)`
663
+ * - Data URIs: `![](data:image/png;base64,...)`
664
+ */
665
+ const MARKDOWN_IMAGE_RE = /!\[([^\]]*)\]\(([^)]+)\)/g;
666
+ /**
667
+ * Regex to match standard Markdown links `[alt](path)` — used to extract
668
+ * linked local files (e.g. PDFs, documents) that are not wrapped in `![]()`.
669
+ */
670
+ const MARKDOWN_LINK_RE = /(?<!!)\[([^\]]*)\]\(([^)]+)\)/g;
671
+ /**
672
+ * Regex to match HTML `<img>` and `<video>` tags with a `src` attribute.
673
+ *
674
+ * Captures the `src` value from tags like:
675
+ * - `<img src="path/to/image.png">`
676
+ * - `<video src="path/to/video.mp4">`
677
+ * - Handles both single and double quotes, or unquoted src values.
678
+ */
679
+ const HTML_MEDIA_SRC_RE = /<(?:img|video)\s[^>]*?\bsrc\s*=\s*(?:"([^"]*)"|'([^']*)'|([^\s>]+))/gi;
680
+ /**
681
+ * Regex to match bare image URLs in Markdown content (not inside `![]()`).
682
+ *
683
+ * Matches HTTP(S) URLs that end with a known image extension (before any query
684
+ * string or fragment). These are "bare" URLs — plain text links not wrapped in
685
+ * standard Markdown image syntax.
686
+ *
687
+ * The extraction logic will attempt to find a locally downloaded copy of the
688
+ * image in `~/Downloads/` by matching the filename from the URL path.
689
+ */
690
+ const BARE_IMAGE_URL_RE = /(?<!\]\()https?:\/\/[^\s)]+\.(?:png|jpe?g|gif|bmp|webp|svg|ico|tiff?|avif|heic|heif)(?=[?\s#]|$)/gi;
691
+ /**
692
+ * Regex to match bare local file paths in plain text.
693
+ *
694
+ * Matches absolute or home-relative paths ending with a known media/document extension:
695
+ * - `/root/.openclaw/workspace/pictures/10/1.jpg`
696
+ * - `~/Pictures/screenshot.png`
697
+ * - `~Downloads/report.pdf` (common typo: `~` without `/`)
698
+ * - `../../.openclaw/workspace/pictures/10/1.jpg`
699
+ *
700
+ * Does NOT match paths already captured by Markdown or HTML syntax (those are
701
+ * handled by dedicated regexes above).
702
+ *
703
+ * The `~` prefix is treated as home-directory shorthand, with or without a
704
+ * separating `/` (e.g. both `~/Downloads/x.pdf` and `~Downloads/x.pdf`).
705
+ */
706
+ const BARE_LOCAL_PATH_RE = /(?:^|[\s,;])(~[^\s,;:*?"<>|/\\]+\/[^\s,;:*?"<>|]+\.(?:png|jpe?g|gif|bmp|webp|svg|ico|tiff?|avif|heic|heif|pdf|doc|docx|xls|xlsx|ppt|pptx|txt|csv|md|rtf|mp4|mov|avi|mkv)|[~.]?(?:\.\/|\.\.\/|\/)[^\s,;:*?"<>|]+\.(?:png|jpe?g|gif|bmp|webp|svg|ico|tiff?|avif|heic|heif|pdf|doc|docx|xls|xlsx|ppt|pptx|txt|csv|md|rtf|mp4|mov|avi|mkv))(?=[\s,;]|$)/gim;
707
+ /**
708
+ * Common image file extensions (lowercase).
709
+ */
710
+ const IMAGE_EXTENSIONS = new Set([
711
+ '.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp',
712
+ '.svg', '.ico', '.tiff', '.tif', '.avif', '.heic', '.heif',
713
+ ]);
714
+ /**
715
+ * Default file extensions for asset sync (images + documents).
716
+ *
717
+ * Users can override this via `syncFileExtensions` config.
718
+ */
719
+ const DEFAULT_SYNC_FILE_EXTENSIONS = [
720
+ // Images
721
+ '.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp',
722
+ '.svg', '.ico', '.tiff', '.tif', '.avif', '.heic', '.heif',
723
+ // Documents
724
+ '.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx',
725
+ '.txt', '.csv', '.md', '.rtf',
726
+ ];
727
+ /**
728
+ * MIME type mapping for common image extensions.
729
+ */
730
+ const MIME_TYPES = {
731
+ '.png': 'image/png',
732
+ '.jpg': 'image/jpeg',
733
+ '.jpeg': 'image/jpeg',
734
+ '.gif': 'image/gif',
735
+ '.bmp': 'image/bmp',
736
+ '.webp': 'image/webp',
737
+ '.svg': 'image/svg+xml',
738
+ '.ico': 'image/x-icon',
739
+ '.tiff': 'image/tiff',
740
+ '.tif': 'image/tiff',
741
+ '.avif': 'image/avif',
742
+ '.heic': 'image/heic',
743
+ '.heif': 'image/heif',
744
+ // Documents
745
+ '.pdf': 'application/pdf',
746
+ '.doc': 'application/msword',
747
+ '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
748
+ '.xls': 'application/vnd.ms-excel',
749
+ '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
750
+ '.ppt': 'application/vnd.ms-powerpoint',
751
+ '.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
752
+ '.txt': 'text/plain',
753
+ '.csv': 'text/csv',
754
+ '.md': 'text/markdown',
755
+ '.rtf': 'application/rtf',
756
+ };
757
+ /** Hash cache accessor for assets — backed by the persistent cache's `asset` namespace. */
758
+ const imageHashCache = {
759
+ get(key) {
760
+ return loadHashCache().asset[key];
761
+ },
762
+ set(key, value) {
763
+ loadHashCache().asset[key] = value;
764
+ cacheDirty = true;
765
+ },
766
+ };
767
+ /**
768
+ * Extract image/media links from Markdown content.
769
+ *
770
+ * Returns an array of raw file paths found in the content. Supports five formats:
771
+ * 1. Standard Markdown image syntax: `![alt](path)` — local file paths only
772
+ * 2. Standard Markdown link syntax: `[alt](path)` — local files (PDFs, docs, etc.)
773
+ * 3. HTML media tags: `<img src="path">`, `<video src="path">`
774
+ * 4. Bare image URLs: `https://example.com/.../image.jpg` — resolved to a
775
+ * locally downloaded copy in `~/Downloads/` by matching the filename.
776
+ * 5. Bare local file paths in plain text: `/root/pics/1.jpg`, `../../pics/1.jpg`
777
+ *
778
+ * HTTP(S) URLs inside `![alt](url)` or `[alt](url)` are skipped (remote links
779
+ * in Markdown syntax are expected to stay remote). Bare URLs in plain text are
780
+ * checked against `~/Downloads/{filename}` to discover locally downloaded copies.
781
+ *
782
+ * @param content Markdown text to scan for image links.
783
+ * @returns Array of local file path strings referenced in the content.
784
+ */
785
+ export function extractImageLinks(content) {
786
+ const links = [];
787
+ const seenPaths = new Set();
788
+ let match;
789
+ // --- Pass 1: Standard Markdown image syntax ![alt](path) ---
790
+ MARKDOWN_IMAGE_RE.lastIndex = 0;
791
+ while ((match = MARKDOWN_IMAGE_RE.exec(content)) !== null) {
792
+ let rawPath = match[2].trim();
793
+ // Strip optional title: ![alt](path "title") → path
794
+ const titleMatch = rawPath.match(/^(.+?)\s+"[^"]*"$/);
795
+ if (titleMatch) {
796
+ rawPath = titleMatch[1].trim();
797
+ }
798
+ // Skip HTTP(S) URLs
799
+ if (/^https?:\/\//i.test(rawPath)) {
800
+ continue;
801
+ }
802
+ // Skip data URIs
803
+ if (rawPath.startsWith('data:')) {
804
+ continue;
805
+ }
806
+ // Skip empty paths
807
+ if (rawPath.length === 0) {
808
+ continue;
809
+ }
810
+ if (!seenPaths.has(rawPath)) {
811
+ seenPaths.add(rawPath);
812
+ links.push(rawPath);
813
+ }
814
+ }
815
+ // --- Pass 2: Bare image URLs → resolve to ~/Downloads/{filename} ---
816
+ const homeDir = process.env.HOME ?? process.env.USERPROFILE ?? '';
817
+ if (homeDir) {
818
+ BARE_IMAGE_URL_RE.lastIndex = 0;
819
+ while ((match = BARE_IMAGE_URL_RE.exec(content)) !== null) {
820
+ const rawUrl = match[0];
821
+ // Extract filename from the URL path (strip query/fragment)
822
+ let urlPath;
823
+ try {
824
+ urlPath = new URL(rawUrl).pathname;
825
+ }
826
+ catch {
827
+ // Malformed URL — skip
828
+ continue;
829
+ }
830
+ const filename = path.basename(urlPath);
831
+ if (!filename || filename === '/') {
832
+ continue;
833
+ }
834
+ // Check if the file exists in ~/Downloads/
835
+ const localPath = path.join(homeDir, 'Downloads', filename);
836
+ if (fsSync.existsSync(localPath) && isImageFile(localPath)) {
837
+ if (!seenPaths.has(localPath)) {
838
+ seenPaths.add(localPath);
839
+ links.push(localPath);
840
+ }
841
+ }
842
+ }
843
+ }
844
+ // --- Pass 3: Standard Markdown link syntax [alt](path) — local files only ---
845
+ MARKDOWN_LINK_RE.lastIndex = 0;
846
+ while ((match = MARKDOWN_LINK_RE.exec(content)) !== null) {
847
+ let rawPath = match[2].trim();
848
+ // Strip optional title: [alt](path "title") → path
849
+ const titleMatch = rawPath.match(/^(.+?)\s+"[^"]*"$/);
850
+ if (titleMatch) {
851
+ rawPath = titleMatch[1].trim();
852
+ }
853
+ // Skip HTTP(S) URLs and data URIs
854
+ if (/^https?:\/\//i.test(rawPath) || rawPath.startsWith('data:')) {
855
+ continue;
856
+ }
857
+ // Skip empty paths and anchors-only
858
+ if (rawPath.length === 0 || rawPath.startsWith('#')) {
859
+ continue;
860
+ }
861
+ if (!seenPaths.has(rawPath)) {
862
+ seenPaths.add(rawPath);
863
+ links.push(rawPath);
864
+ }
865
+ }
866
+ // --- Pass 4: HTML <img> and <video> tags with src attribute ---
867
+ HTML_MEDIA_SRC_RE.lastIndex = 0;
868
+ while ((match = HTML_MEDIA_SRC_RE.exec(content)) !== null) {
869
+ // Capture group 1 = double-quoted, 2 = single-quoted, 3 = unquoted
870
+ const rawPath = (match[1] ?? match[2] ?? match[3] ?? '').trim();
871
+ // Skip HTTP(S) URLs, data URIs, and empty values
872
+ if (/^https?:\/\//i.test(rawPath) || rawPath.startsWith('data:') || rawPath.length === 0) {
873
+ continue;
874
+ }
875
+ if (!seenPaths.has(rawPath)) {
876
+ seenPaths.add(rawPath);
877
+ links.push(rawPath);
878
+ }
879
+ }
880
+ // --- Pass 5: Bare local file paths in plain text ---
881
+ BARE_LOCAL_PATH_RE.lastIndex = 0;
882
+ while ((match = BARE_LOCAL_PATH_RE.exec(content)) !== null) {
883
+ const rawPath = match[1].trim();
884
+ // Skip paths already captured by earlier passes
885
+ if (seenPaths.has(rawPath) || rawPath.length === 0) {
886
+ continue;
887
+ }
888
+ seenPaths.add(rawPath);
889
+ links.push(rawPath);
890
+ }
891
+ return links;
892
+ }
893
+ /**
894
+ * Resolve an image path to an absolute file path.
895
+ *
896
+ * Handles:
897
+ * - Absolute paths: returned as-is
898
+ * - Home-relative (`~/...`): expanded using HOME env var
899
+ * - Relative paths: resolved relative to the markdown file's directory
900
+ *
901
+ * @param imagePath Raw image path from Markdown.
902
+ * @param mdFilePath Absolute path of the Markdown file containing the link.
903
+ * @param workspaceDir Workspace root directory (fallback for resolution).
904
+ * @returns Resolved absolute path.
905
+ */
906
+ function resolveImagePath(imagePath, mdFilePath, workspaceDir) {
907
+ // Absolute path
908
+ if (path.isAbsolute(imagePath)) {
909
+ return imagePath;
910
+ }
911
+ // Home-relative path: ~/path or ~Dir/path (common typo missing the /)
912
+ if (imagePath.startsWith('~')) {
913
+ const homeDir = process.env.HOME ?? process.env.USERPROFILE ?? '';
914
+ if (homeDir) {
915
+ if (imagePath.startsWith('~/') || imagePath.startsWith('~\\')) {
916
+ // Standard: ~/Downloads/file.pdf → $HOME/Downloads/file.pdf
917
+ return path.join(homeDir, imagePath.slice(2));
918
+ }
919
+ // Typo variant: ~Downloads/file.pdf → $HOME/Downloads/file.pdf
920
+ return path.join(homeDir, imagePath.slice(1));
921
+ }
922
+ }
923
+ // Relative path — resolve from the markdown file's directory first,
924
+ // then fallback to workspace dir
925
+ const mdDir = path.dirname(mdFilePath);
926
+ const resolvedFromMd = path.resolve(mdDir, imagePath);
927
+ if (fsSync.existsSync(resolvedFromMd)) {
928
+ return resolvedFromMd;
929
+ }
930
+ // Fallback: resolve from workspace root
931
+ return path.resolve(workspaceDir, imagePath);
932
+ }
933
+ /**
934
+ * Check if a file path points to a supported image file.
935
+ */
936
+ function isImageFile(filePath) {
937
+ const ext = path.extname(filePath).toLowerCase();
938
+ return IMAGE_EXTENSIONS.has(ext);
939
+ }
940
+ /**
941
+ * Check if a file path points to a supported asset file (image or document).
942
+ *
943
+ * @param filePath Path to check.
944
+ * @param allowedExts Set of allowed extensions. If not provided, uses IMAGE_EXTENSIONS (backward compat).
945
+ */
946
+ function isAssetFile(filePath, allowedExts) {
947
+ const ext = path.extname(filePath).toLowerCase();
948
+ return allowedExts ? allowedExts.has(ext) : IMAGE_EXTENSIONS.has(ext);
949
+ }
950
+ /**
951
+ * Compute a hash for binary content (used for asset dedup).
952
+ */
953
+ function computeAssetHash(buffer) {
954
+ return crypto.createHash('sha256').update(buffer).digest('hex').slice(0, 16);
955
+ }
956
+ /**
957
+ * Check whether an asset file has changed since the last upload.
958
+ */
959
+ function assetHasChanged(absPath, buffer) {
960
+ const newHash = computeAssetHash(buffer);
961
+ const oldHash = imageHashCache.get(absPath);
962
+ if (oldHash === newHash) {
963
+ return false;
964
+ }
965
+ imageHashCache.set(absPath, newHash);
966
+ return true;
967
+ }
968
+ /**
969
+ * Build the COS key for an image file in the **asset/** directory,
970
+ * using the absolute path directly to preserve full directory structure.
971
+ *
972
+ * Layout: `openclaw-{agentId}/asset/{absolutePath}`
973
+ * e.g. `/Users/shawn/Downloads/test.jpg` → `asset/Users/shawn/Downloads/test.jpg`
974
+ * e.g. `/tmp/pic.png` → `asset/tmp/pic.png`
975
+ * e.g. `/Users/shawn/Downloads/CI控制台指南.pdf` → `asset/Users/shawn/Downloads/CI控制台指南.pdf`
976
+ * For legacy setups (no agentId): `asset/{absolutePath}`
977
+ *
978
+ * COS (S3-compatible) keys support UTF-8, so Unicode characters (Chinese,
979
+ * Japanese, etc.) are preserved. Only characters that are unsafe for COS
980
+ * object keys or shell handling are replaced: control chars, `*`, `?`, `"`,
981
+ * `<`, `>`, `|`, `#`, `%`, `{`, `}`, `^`, `` ` ``, `[`, `]`.
982
+ *
983
+ * @param ops CosOperations instance (for agent prefix).
984
+ * @param resolvedPath Absolute path of the image file.
985
+ * @returns Full COS key string.
986
+ */
987
+ function buildAssetCosKey(ops, resolvedPath) {
988
+ const agentPrefix = ops.getAgentPrefix();
989
+ // Use the absolute path directly, stripping the leading separator.
990
+ const normalResolved = path.resolve(resolvedPath);
991
+ const stripped = normalResolved.startsWith(path.sep)
992
+ ? normalResolved.slice(path.sep.length)
993
+ : normalResolved;
994
+ // Sanitise each path segment (keep `/` separators intact).
995
+ // Preserve Unicode (CJK, etc.) — only strip COS-unsafe / shell-unsafe chars
996
+ // and control characters (U+0000–U+001F, U+007F).
997
+ const safePath = stripped
998
+ .split(path.sep)
999
+ .map(seg => seg
1000
+ // eslint-disable-next-line no-control-regex
1001
+ .replace(/[\x00-\x1f\x7f*?"<>|#%{}^`[\]]/g, '-')
1002
+ .replace(/-+/g, '-')
1003
+ .replace(/^-|-$/g, ''))
1004
+ .join('/');
1005
+ return `${agentPrefix}asset/${safePath}`;
1006
+ }
1007
+ /**
1008
+ * Extract asset links (images + documents) from memory file contents and upload them to COS.
1009
+ *
1010
+ * For each memory file content:
1011
+ * 1. Extract `![alt](path)` image links and `[alt](path)` document links
1012
+ * 2. Resolve the path to an absolute local file path
1013
+ * 3. Check if the file exists and has a supported extension (configurable)
1014
+ * 4. Check if the file has changed (hash-based dedup)
1015
+ * 5. Upload to `{agentPrefix}asset/{relPath}` preserving directory structure
1016
+ * 6. Image files use `x-cos-meta-category: image`, document files use `document`
1017
+ *
1018
+ * @param ops CosOperations instance for uploading.
1019
+ * @param fileContents Array of { absPath, content } for each memory file.
1020
+ * @param workspaceDir Workspace root for resolving relative paths.
1021
+ * @param logger Logger for progress/error reporting.
1022
+ * @param allowedExtensions Optional set of allowed file extensions. Defaults to DEFAULT_SYNC_FILE_EXTENSIONS.
1023
+ * @returns Summary of asset upload results.
1024
+ */
1025
+ export async function extractAndUploadAssets(ops, fileContents, workspaceDir, logger, allowedExtensions) {
1026
+ const result = {
1027
+ uploaded: 0,
1028
+ skipped: 0,
1029
+ failed: 0,
1030
+ details: [],
1031
+ };
1032
+ // Build the effective allowed-extension set
1033
+ const allowedExts = allowedExtensions ?? new Set(DEFAULT_SYNC_FILE_EXTENSIONS);
1034
+ // Collect all unique asset paths across all files
1035
+ const seenPaths = new Set();
1036
+ for (const { absPath: mdPath, content } of fileContents) {
1037
+ const assetLinks = extractImageLinks(content);
1038
+ for (const rawLink of assetLinks) {
1039
+ const resolvedPath = resolveImagePath(rawLink, mdPath, workspaceDir);
1040
+ // Skip duplicates (same file referenced in multiple files)
1041
+ if (seenPaths.has(resolvedPath)) {
1042
+ continue;
1043
+ }
1044
+ seenPaths.add(resolvedPath);
1045
+ try {
1046
+ // Check if file exists
1047
+ if (!fsSync.existsSync(resolvedPath)) {
1048
+ logger.info(`local-memory-sync: asset not found — ${rawLink} (resolved: ${resolvedPath})`);
1049
+ result.details.push({
1050
+ localPath: resolvedPath,
1051
+ status: 'skipped',
1052
+ reason: 'file not found',
1053
+ });
1054
+ result.skipped += 1;
1055
+ continue;
1056
+ }
1057
+ // Check if it's a supported asset file (image or document)
1058
+ if (!isAssetFile(resolvedPath, allowedExts)) {
1059
+ logger.info(`local-memory-sync: not a supported asset — ${resolvedPath}`);
1060
+ result.details.push({
1061
+ localPath: resolvedPath,
1062
+ status: 'skipped',
1063
+ reason: 'not a supported asset format',
1064
+ });
1065
+ result.skipped += 1;
1066
+ continue;
1067
+ }
1068
+ // Read the file
1069
+ const buffer = await fs.readFile(resolvedPath);
1070
+ // Check if file has changed (hash-based dedup)
1071
+ if (!assetHasChanged(resolvedPath, buffer)) {
1072
+ result.details.push({
1073
+ localPath: resolvedPath,
1074
+ status: 'skipped',
1075
+ reason: 'unchanged',
1076
+ });
1077
+ result.skipped += 1;
1078
+ continue;
1079
+ }
1080
+ // Build COS key — assets go to asset/ preserving directory structure
1081
+ const cosKey = buildAssetCosKey(ops, resolvedPath);
1082
+ // Determine MIME type
1083
+ const ext = path.extname(resolvedPath).toLowerCase();
1084
+ const contentType = MIME_TYPES[ext] ?? 'application/octet-stream';
1085
+ // Determine category: image or document
1086
+ const assetCategory = isImageFile(resolvedPath) ? 'image' : 'document';
1087
+ // Upload to COS (sole destination: asset/ with directory structure).
1088
+ // Paths may contain non-ASCII characters (e.g. Chinese); HTTP custom
1089
+ // headers require ASCII values, so we URI-encode path values.
1090
+ const assetMetadata = {
1091
+ source: 'local-memory-sync',
1092
+ type: `asset-${assetCategory}`,
1093
+ category: assetCategory,
1094
+ originalPath: encodeURIComponent(rawLink),
1095
+ resolvedPath: encodeURIComponent(resolvedPath),
1096
+ timestamp: Date.now(),
1097
+ contentHash: computeAssetHash(buffer),
1098
+ };
1099
+ await ops.uploadBinary(buffer, cosKey, contentType, assetMetadata);
1100
+ result.details.push({
1101
+ localPath: resolvedPath,
1102
+ cosKey,
1103
+ status: 'uploaded',
1104
+ });
1105
+ result.uploaded += 1;
1106
+ logger.info(`local-memory-sync: asset uploaded — ${rawLink} → ${cosKey} [${assetCategory}]`);
1107
+ }
1108
+ catch (err) {
1109
+ result.details.push({
1110
+ localPath: resolvedPath,
1111
+ status: 'failed',
1112
+ reason: String(err),
1113
+ });
1114
+ result.failed += 1;
1115
+ const errMsg = err instanceof Error ? err.message : JSON.stringify(err, null, 2);
1116
+ logger.warn(`local-memory-sync: asset upload failed — ${resolvedPath}: ${errMsg}`);
1117
+ }
1118
+ }
1119
+ }
1120
+ return result;
1121
+ }
1122
+ /**
1123
+ * @deprecated Use `extractAndUploadAssets` instead. Kept for backward compatibility.
1124
+ */
1125
+ export const extractAndUploadImages = extractAndUploadAssets;
1126
+ //# sourceMappingURL=local-memory-sync.js.map