metainsight-context-engine 0.0.8 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,780 @@
1
+ /**
2
+ * COS Operations — Unified CRUD Operations Layer
3
+ *
4
+ * Provides all data operations (upload / search / download / delete) for the
5
+ * Cloud Context Engine. Built on top of the bootstrap layer (`cos-bootstrap.ts`)
6
+ * and routes requests to the correct dataset / COS prefix based on `category`.
7
+ *
8
+ * Architecture:
9
+ * cos-bootstrap.ts → cos-operations.ts → engine.ts / index.ts
10
+ * (init + low-level) (CRUD operations) (business logic)
11
+ *
12
+ * The constructor accepts a `BootstrapOutcome`, so consumers only need to call
13
+ * `bootstrap()` once and pass the result here — no duplicate initialization.
14
+ *
15
+ * Multimodal retrieval:
16
+ * - Text/memory queries route to DocSearch datasets (category="memory")
17
+ * - Image queries route to ImageSearch datasets (category="image")
18
+ * - The search template is automatically selected per-dataset based on
19
+ * the dataset's `templateId` (Official:ImageSearch → "ImageSearch").
20
+ *
21
+ * Key improvements over the old `cloud-client.ts`:
22
+ * - Upload path uses the dataset's `cosPrefix` (not hardcoded `uploads/`)
23
+ * - Search routes to the correct dataset name based on category
24
+ * - Automatic template selection for multimodal (DocSearch / ImageSearch)
25
+ * - Download support (getObject from COS)
26
+ * - Delete support (deleteObject from COS)
27
+ */
28
+
29
+ import type COS from 'cos-nodejs-sdk-v5';
30
+
31
+ import type { BootstrapOutcome, ResolvedCosConfig, ResolvedDataset } from './cos-bootstrap.js';
32
+ import { sendCIRequest } from './cos-bootstrap.js';
33
+
34
+ // ============================================================================
35
+ // Types
36
+ // ============================================================================
37
+
38
+ export interface CloudSearchResult {
39
+ snippet: string;
40
+ score: number;
41
+ docId?: string;
42
+ category?: 'memory' | 'document';
43
+ /** Signed HTTPS URL for image results (only present for ImageSearch results). */
44
+ imageUrl?: string;
45
+ metadata?: Record<string, unknown>;
46
+ }
47
+
48
+ export interface CloudUploadResult {
49
+ docId: string;
50
+ key: string;
51
+ status: string;
52
+ /** Error message when status === 'error'. */
53
+ error?: string;
54
+ }
55
+
56
+ export interface CloudDownloadResult {
57
+ content: string;
58
+ key: string;
59
+ metadata?: Record<string, unknown>;
60
+ }
61
+
62
+ export interface CloudSearchOptions {
63
+ category?: string;
64
+ maxResults?: number;
65
+ minScore?: number;
66
+ }
67
+
68
+ export interface CloudUploadOptions {
69
+ category?: string;
70
+ docId?: string;
71
+ metadata?: Record<string, unknown>;
72
+ /**
73
+ * When set, overrides the default COS key construction
74
+ * (`{cosPrefix}{docId}.md`) and uses this value as the full COS key.
75
+ *
76
+ * Useful for placing files outside the dataset's cosPrefix directory,
77
+ * e.g. workspace identity files that should live at
78
+ * `openclaw-{agentId}/agents.md` instead of
79
+ * `openclaw-{agentId}/workspace/agents.md`.
80
+ */
81
+ customKey?: string;
82
+ }
83
+
84
+ export interface CloudDownloadOptions {
85
+ category?: string;
86
+ }
87
+
88
+ /** DocSearch result item from CI hybridsearch API. */
89
+ interface DocResultItem {
90
+ Text?: string;
91
+ Score?: number;
92
+ URI?: string;
93
+ TextPage?: number;
94
+ ImageUrls?: Record<string, string>;
95
+ }
96
+
97
+ /** ImageSearch result item from CI hybridsearch API. */
98
+ interface ImageResultItem {
99
+ URI?: string;
100
+ Score?: number;
101
+ }
102
+
103
+ // ============================================================================
104
+ // Helpers — docId sanitization
105
+ // ============================================================================
106
+
107
+ /**
108
+ * Sanitize a docId so that it is safe to use as a COS object-key segment.
109
+ *
110
+ * COS (S3-compatible) interprets `/` as a directory separator, so names like
111
+ * `IMAP/SMTP Email Tool` would be split across folders.
112
+ *
113
+ * Strategy:
114
+ * 1. Prefixed docIds (containing `:`) are split at the **first** colon.
115
+ * Everything after the colon is the "name" part — ALL `/` in the name
116
+ * are replaced with `__`. The prefix itself never contains `/` so it
117
+ * is kept as-is.
118
+ *
119
+ * 2. For non-prefixed docIds, the **first** `/` is assumed to be an
120
+ * intentional directory boundary (e.g. `memory/2026-03-15`).
121
+ * Only the portion after the first `/` has its `/` replaced.
122
+ * If there is no `/` at all, the string is returned unchanged
123
+ * (nothing to sanitize).
124
+ *
125
+ * Note: DocIds should avoid `/` characters. Use `__` as a substitute
126
+ * when `/` appears in names. The `unsanitizeDocId()` function reverses
127
+ * this transformation when parsing COS URIs from search results.
128
+ *
129
+ * Examples:
130
+ * "IMAP__SMTP Email Tool" → "IMAP__SMTP Email Tool" (pre-sanitized)
131
+ * "memory/2026-03-15" → "memory/2026-03-15" (dir preserved)
132
+ * "memory/IMAP/SMTP Tool" → "memory/IMAP__SMTP Tool" (dir preserved, name sanitized)
133
+ */
134
+ function sanitizeDocId(docId: string): string {
135
+ // Prefixed docIds (e.g. "category:name")
136
+ // The colon separates a category tag from the human-readable name.
137
+ // ALL slashes after the colon belong to the name and must be escaped.
138
+ const colonIdx = docId.indexOf(':');
139
+ if (colonIdx !== -1) {
140
+ const prefix = docId.slice(0, colonIdx + 1);
141
+ const namePart = docId.slice(colonIdx + 1).replace(/\//g, '__');
142
+ return `${prefix}${namePart}`;
143
+ }
144
+
145
+ // Non-prefixed: preserve the FIRST `/` as a directory boundary,
146
+ // but replace any further `/` in the filename segment.
147
+ const firstSlash = docId.indexOf('/');
148
+ if (firstSlash === -1) {
149
+ // No slash at all — nothing to sanitize
150
+ return docId;
151
+ }
152
+
153
+ const dirPart = docId.slice(0, firstSlash);
154
+ const namePart = docId.slice(firstSlash + 1).replace(/\//g, '__');
155
+ return `${dirPart}/${namePart}`;
156
+ }
157
+
158
+ /**
159
+ * Reverse `sanitizeDocId` — restores the original docId from a COS key
160
+ * segment. Used when parsing COS URIs returned by CI search.
161
+ */
162
+ function unsanitizeDocId(sanitized: string): string {
163
+ return sanitized.replace(/__/g, '/');
164
+ }
165
+
166
+ // ============================================================================
167
+ // CosOperations — Unified CRUD
168
+ // ============================================================================
169
+
170
+ export class CosOperations {
171
+ private readonly cos: COS;
172
+ private readonly config: ResolvedCosConfig;
173
+ /** Search template — DocSearch or ImageSearch. Default: DocSearch. */
174
+ private readonly template: string;
175
+ /** Default match threshold (0-100). CI recommends 80, we default to 60. */
176
+ private readonly matchThreshold: number;
177
+
178
+ /**
179
+ * Create a CosOperations instance from a successful bootstrap outcome.
180
+ *
181
+ * @throws if the bootstrap outcome indicates failure
182
+ */
183
+ constructor(
184
+ outcome: BootstrapOutcome,
185
+ opts?: { template?: string; matchThreshold?: number },
186
+ ) {
187
+ if (!outcome.success) {
188
+ throw new Error(`CosOperations: cannot initialize from failed bootstrap: ${outcome.error}`);
189
+ }
190
+ this.cos = outcome.cos;
191
+ this.config = outcome.config;
192
+ this.template = opts?.template ?? 'DocSearch';
193
+ this.matchThreshold = opts?.matchThreshold ?? 60;
194
+ }
195
+
196
+ // ==========================================================================
197
+ // Dataset routing
198
+ // ==========================================================================
199
+
200
+ /**
201
+ * Resolve the dataset definition for a given category.
202
+ *
203
+ * Routing rules:
204
+ * 1. If a dataset's `cosPrefix` starts with `{category}/`, use it.
205
+ * (legacy layout: cosPrefix="memory/" matches category="memory")
206
+ * 2. If a dataset's `cosPrefix` contains `/{category}/`, use it.
207
+ * (multi-agent layout: cosPrefix="openclaw-xxx/workspace/memory/" matches category="memory")
208
+ * 3. If a dataset's `name` contains the category, use it.
209
+ * 4. Otherwise, fall back to the first dataset in config.
210
+ */
211
+ private resolveDataset(category?: string): ResolvedDataset {
212
+ if (!category || this.config.datasets.length === 1) {
213
+ return this.config.datasets[0];
214
+ }
215
+
216
+ const lowerCat = category.toLowerCase();
217
+
218
+ // Priority 1: cosPrefix starts with category (legacy: cosPrefix="memory/")
219
+ const byPrefix = this.config.datasets.find(
220
+ (ds) => ds.cosPrefix.toLowerCase().startsWith(`${lowerCat}/`),
221
+ );
222
+ if (byPrefix) {
223
+ return byPrefix;
224
+ }
225
+
226
+ // Priority 2: cosPrefix contains category as a path segment
227
+ // (multi-agent: cosPrefix="openclaw-xxx/workspace/memory/")
228
+ const byContains = this.config.datasets.find(
229
+ (ds) => ds.cosPrefix.toLowerCase().includes(`/${lowerCat}/`),
230
+ );
231
+ if (byContains) {
232
+ return byContains;
233
+ }
234
+
235
+ // Priority 3: name contains category (e.g. category="memory" → name="openclaw-memory-xxx")
236
+ const byName = this.config.datasets.find(
237
+ (ds) => ds.name.toLowerCase().includes(lowerCat),
238
+ );
239
+ if (byName) {
240
+ return byName;
241
+ }
242
+
243
+ // Fallback: first dataset
244
+ return this.config.datasets[0];
245
+ }
246
+
247
+ // ==========================================================================
248
+ // Search — CI hybridsearch API
249
+ // ==========================================================================
250
+
251
+ /**
252
+ * Semantic / hybrid search against the CI dataset.
253
+ *
254
+ * Routing:
255
+ * - Image datasets (Official:ImageSearch) → `POST /datasetquery/imagesearch`
256
+ * with only `DatasetName` + `Mode: "text"` (simplified temporary API).
257
+ * - All other datasets → `POST /datasetquery/hybridsearch` with full params.
258
+ *
259
+ * DocSearch: SearchText is truncated to 60 UTF-8 chars (CI API limit).
260
+ */
261
+ async search(query: string, opts?: CloudSearchOptions): Promise<CloudSearchResult[]> {
262
+ const { maxResults = 10, minScore, category } = opts ?? {};
263
+ const dataset = this.resolveDataset(category);
264
+
265
+ const isImageDataset = dataset.templateId.includes('ImageSearch');
266
+
267
+ // ── Image dataset → simplified imagesearch endpoint ──
268
+ if (isImageDataset) {
269
+ const body: Record<string, unknown> = {
270
+ DatasetName: dataset.name,
271
+ Mode: 'text',
272
+ Text: String(query).slice(0, 55)|| '-',
273
+ Limit: 10,
274
+ MatchThreshold: 60,
275
+ };
276
+
277
+ const result = await sendCIRequest(
278
+ this.cos,
279
+ this.config.bucket,
280
+ this.config.region,
281
+ 'POST',
282
+ 'datasetquery/imagesearch',
283
+ body,
284
+ );
285
+
286
+ const res = result as Record<string, unknown>;
287
+ const response = res.Response as Record<string, unknown> | undefined;
288
+ const imageResult = (response?.ImageResult ?? res.ImageResult) as ImageResultItem[] | undefined;
289
+
290
+ return this.mapImageResults(imageResult, category);
291
+ }
292
+
293
+ // ── Doc dataset → hybridsearch endpoint (full params) ──
294
+ // CI API: SearchText max 60 UTF-8 characters
295
+ const searchText = query.length > 60 ? query.slice(0, 60) : query;
296
+
297
+ // Convert 0-1 minScore to 0-100 MatchThreshold, floor to configured minimum
298
+ const threshold = minScore !== undefined
299
+ ? Math.max(Math.floor(minScore * 100), this.matchThreshold)
300
+ : this.matchThreshold;
301
+
302
+ const body: Record<string, unknown> = {
303
+ DatasetName: dataset.name,
304
+ Mode: 'text',
305
+ Templates: this.template,
306
+ SearchText: searchText,
307
+ Limit: 30,
308
+ MatchThreshold: threshold,
309
+ Offset: 0,
310
+ };
311
+
312
+ const result = await sendCIRequest(
313
+ this.cos,
314
+ this.config.bucket,
315
+ this.config.region,
316
+ 'POST',
317
+ 'datasetquery/hybridsearch',
318
+ body,
319
+ );
320
+
321
+ const res = result as Record<string, unknown>;
322
+ const response = res.Response as Record<string, unknown> | undefined;
323
+ const docResult = (response?.DocResult ?? res.DocResult) as DocResultItem[] | undefined;
324
+
325
+ return this.mapDocResults(docResult, category);
326
+ }
327
+
328
+ // ==========================================================================
329
+ // Upload — COS putObject (content stored as Markdown for CI indexing)
330
+ // ==========================================================================
331
+
332
+ /**
333
+ * Upload content to the COS bucket for CI indexing.
334
+ *
335
+ * Content is stored as a Markdown file under `{cosPrefix}{docId}.md`.
336
+ * CI will pick it up via the dataset binding and index it automatically.
337
+ * Metadata is stored in COS custom headers (`x-cos-meta-*`).
338
+ *
339
+ * The upload path is derived from the dataset's `cosPrefix` — not hardcoded.
340
+ * Category routing is handled by `resolveDataset()` which selects the
341
+ * correct dataset (and thus the correct cosPrefix). We do NOT append
342
+ * `{category}/` again — that would create nested directories like
343
+ * `memory/memory/`.
344
+ */
345
+ async upload(content: string, opts?: CloudUploadOptions): Promise<CloudUploadResult> {
346
+ const category = opts?.category ?? 'general';
347
+ const docId = opts?.docId ?? `${category}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
348
+ const dataset = this.resolveDataset(category);
349
+
350
+ // Sanitize docId: replace `/` with `__` so COS doesn't treat it as a
351
+ // directory boundary. e.g. "memory:IMAP/SMTP Email Tool"
352
+ // → "memory:IMAP__SMTP Email Tool"
353
+ const safeDocId = sanitizeDocId(docId);
354
+
355
+ // Route to the correct COS path via dataset cosPrefix.
356
+ // cosPrefix already encodes the category boundary (e.g. "memory/"),
357
+ // so we only append the docId — no extra `{category}/` layer.
358
+ //
359
+ // When `customKey` is provided, it overrides the default key construction.
360
+ // This is used for files that should live outside the dataset's cosPrefix,
361
+ // e.g. workspace identity files at `openclaw-{agentId}/agents.md`.
362
+ const key = opts?.customKey ?? `${dataset.cosPrefix}${safeDocId}.md`;
363
+
364
+ // Build COS custom headers for metadata (x-cos-meta-* headers).
365
+ // Values must be strings; non-string values are JSON-serialized.
366
+ const headers: Record<string, string> = {
367
+ 'x-cos-meta-category': category,
368
+ 'x-cos-meta-created-at': new Date().toISOString(),
369
+ };
370
+ const metadata = opts?.metadata ?? {};
371
+ for (const [k, v] of Object.entries(metadata)) {
372
+ const headerKey = `x-cos-meta-${k.replace(/[A-Z]/g, (c) => `-${c.toLowerCase()}`)}`;
373
+ headers[headerKey] = typeof v === 'string' ? v : JSON.stringify(v);
374
+ }
375
+
376
+ return new Promise((resolve, reject) => {
377
+ this.cos.putObject(
378
+ {
379
+ Bucket: this.config.bucket,
380
+ Region: this.config.region,
381
+ Key: key,
382
+ Body: content,
383
+ Headers: headers,
384
+ } as never,
385
+ (err: unknown) => {
386
+ if (err) {
387
+ const detail = err instanceof Error
388
+ ? err.message
389
+ : (typeof err === 'object' && err !== null
390
+ ? JSON.stringify(err, null, 2)
391
+ : String(err));
392
+ reject(new Error(`COS upload failed: ${detail}`));
393
+ return;
394
+ }
395
+ resolve({ docId, key, status: 'uploaded' });
396
+ },
397
+ );
398
+ });
399
+ }
400
+
401
+ /**
402
+ * Batch upload multiple items in parallel (with concurrency limit).
403
+ *
404
+ * Individual upload failures are captured per-item (status='error')
405
+ * instead of aborting the entire batch — this prevents one bad item
406
+ * from causing all remaining items to be dropped.
407
+ */
408
+ async uploadBatch(
409
+ items: Array<{ content: string; opts?: CloudUploadOptions }>,
410
+ concurrency = 5,
411
+ ): Promise<CloudUploadResult[]> {
412
+ const results: CloudUploadResult[] = [];
413
+ const queue = [...items];
414
+
415
+ const worker = async (): Promise<void> => {
416
+ while (queue.length > 0) {
417
+ const item = queue.shift();
418
+ if (!item) {
419
+ break;
420
+ }
421
+ try {
422
+ const result = await this.upload(item.content, item.opts);
423
+ results.push(result);
424
+ } catch (err) {
425
+ // Record the failure but continue processing remaining items
426
+ results.push({
427
+ docId: item.opts?.docId ?? 'unknown',
428
+ key: '',
429
+ status: 'error',
430
+ error: err instanceof Error ? err.message : String(err),
431
+ });
432
+ }
433
+ }
434
+ };
435
+
436
+ const workers = Array.from(
437
+ { length: Math.min(concurrency, items.length) },
438
+ () => worker(),
439
+ );
440
+ await Promise.all(workers);
441
+ return results;
442
+ }
443
+
444
+ // ==========================================================================
445
+ // Upload Binary — COS putObject for images and binary files
446
+ // ==========================================================================
447
+
448
+ /**
449
+ * Upload a binary file (e.g. image) to the COS bucket.
450
+ *
451
+ * Unlike `upload()` which stores text content as Markdown, this method
452
+ * accepts a Buffer and stores the file with its original content type.
453
+ * The file is placed under the provided COS key path.
454
+ *
455
+ * @param buffer Raw file content as a Buffer.
456
+ * @param key Full COS key (e.g. "openclaw-main/asset/photo.png").
457
+ * @param contentType MIME type (e.g. "image/png"). Default: "application/octet-stream".
458
+ * @param metadata Optional metadata to attach as COS custom headers.
459
+ * @returns Upload result with the COS key.
460
+ */
461
+ async uploadBinary(
462
+ buffer: Buffer,
463
+ key: string,
464
+ contentType = 'application/octet-stream',
465
+ metadata?: Record<string, unknown>,
466
+ ): Promise<CloudUploadResult> {
467
+ const headers: Record<string, string> = {
468
+ 'Content-Type': contentType,
469
+ 'x-cos-meta-category': 'image',
470
+ 'x-cos-meta-created-at': new Date().toISOString(),
471
+ };
472
+
473
+ if (metadata) {
474
+ for (const [k, v] of Object.entries(metadata)) {
475
+ const headerKey = `x-cos-meta-${k.replace(/[A-Z]/g, (c) => `-${c.toLowerCase()}`)}`;
476
+ headers[headerKey] = typeof v === 'string' ? v : JSON.stringify(v);
477
+ }
478
+ }
479
+
480
+ return new Promise((resolve, reject) => {
481
+ this.cos.putObject(
482
+ {
483
+ Bucket: this.config.bucket,
484
+ Region: this.config.region,
485
+ Key: key,
486
+ Body: buffer,
487
+ Headers: headers,
488
+ } as never,
489
+ (err: unknown) => {
490
+ if (err) {
491
+ const detail = err instanceof Error
492
+ ? err.message
493
+ : (typeof err === 'object' && err !== null
494
+ ? JSON.stringify(err, null, 2)
495
+ : String(err));
496
+ reject(new Error(`COS binary upload failed for "${key}": ${detail}`));
497
+ return;
498
+ }
499
+ resolve({ docId: key, key, status: 'uploaded' });
500
+ },
501
+ );
502
+ });
503
+ }
504
+
505
+ /**
506
+ * Check whether a COS object exists at the given key.
507
+ *
508
+ * Uses `headObject` which returns metadata without downloading the body.
509
+ * Returns true if the object exists, false otherwise.
510
+ */
511
+ async objectExists(key: string): Promise<boolean> {
512
+ return new Promise((resolve) => {
513
+ this.cos.headObject(
514
+ {
515
+ Bucket: this.config.bucket,
516
+ Region: this.config.region,
517
+ Key: key,
518
+ },
519
+ (err: unknown) => {
520
+ if (err) {
521
+ resolve(false);
522
+ return;
523
+ }
524
+ resolve(true);
525
+ },
526
+ );
527
+ });
528
+ }
529
+
530
+ // ==========================================================================
531
+ // Download — COS getObject
532
+ // ==========================================================================
533
+
534
+ /**
535
+ * Download a document from the COS bucket by its key.
536
+ *
537
+ * @param docKey Full COS key (e.g. "memory/memory/abc.md") or a short docId.
538
+ * If a short docId is provided, it will be resolved via the dataset cosPrefix.
539
+ */
540
+ async download(docKey: string, opts?: CloudDownloadOptions): Promise<CloudDownloadResult> {
541
+ const dataset = this.resolveDataset(opts?.category);
542
+
543
+ // Determine if docKey is a full COS key or a short docId.
544
+ // A full key starts with one of the known dataset cosPrefixes.
545
+ // Short docIds are resolved via `{cosPrefix}{sanitizedDocId}.md`.
546
+ // We do NOT use `includes('/')` because docIds can contain `/`
547
+ // (e.g. "category:IMAP/SMTP Email Tool").
548
+ const isFullKey = this.config.datasets.some(
549
+ (ds) => docKey.startsWith(ds.cosPrefix),
550
+ );
551
+ const key = isFullKey
552
+ ? docKey
553
+ : `${dataset.cosPrefix}${sanitizeDocId(docKey)}.md`;
554
+
555
+ return new Promise((resolve, reject) => {
556
+ this.cos.getObject(
557
+ {
558
+ Bucket: this.config.bucket,
559
+ Region: this.config.region,
560
+ Key: key,
561
+ },
562
+ (err: unknown, data: unknown) => {
563
+ if (err) {
564
+ reject(new Error(`COS download failed for "${key}": ${String(err)}`));
565
+ return;
566
+ }
567
+
568
+ const raw = data as Record<string, unknown>;
569
+ const body = typeof raw.Body === 'string'
570
+ ? raw.Body
571
+ : raw.Body instanceof Buffer
572
+ ? (raw.Body as Buffer).toString('utf-8')
573
+ : String(raw.Body ?? '');
574
+
575
+ // Extract metadata from COS custom headers (x-cos-meta-*)
576
+ const headers = (raw.headers ?? {}) as Record<string, string>;
577
+ const metadata: Record<string, unknown> = {};
578
+ for (const [hk, hv] of Object.entries(headers)) {
579
+ if (hk.startsWith('x-cos-meta-')) {
580
+ const metaKey = hk.slice('x-cos-meta-'.length);
581
+ metadata[metaKey] = hv;
582
+ }
583
+ }
584
+
585
+ // Backward compatibility: try to parse as old JSON wrapper format
586
+ try {
587
+ const parsed = JSON.parse(body) as Record<string, unknown>;
588
+ if (typeof parsed.content === 'string') {
589
+ resolve({
590
+ content: parsed.content,
591
+ key,
592
+ metadata: (parsed.metadata ?? metadata) as Record<string, unknown>,
593
+ });
594
+ return;
595
+ }
596
+ } catch {
597
+ // Not JSON — this is the expected path for .md files
598
+ }
599
+
600
+ resolve({ content: body, key, metadata });
601
+ },
602
+ );
603
+ });
604
+ }
605
+
606
+ // ==========================================================================
607
+ // Delete — COS deleteObject
608
+ // ==========================================================================
609
+
610
+ /**
611
+ * Delete a document from the COS bucket.
612
+ *
613
+ * @param docKey Full COS key or short docId (resolved via dataset cosPrefix).
614
+ */
615
+ async deleteDoc(docKey: string, opts?: CloudDownloadOptions): Promise<{ deleted: boolean; key: string }> {
616
+ const dataset = this.resolveDataset(opts?.category);
617
+
618
+ // Same logic as download: detect full key by cosPrefix, not by `/`.
619
+ const isFullKey = this.config.datasets.some(
620
+ (ds) => docKey.startsWith(ds.cosPrefix),
621
+ );
622
+ const key = isFullKey
623
+ ? docKey
624
+ : `${dataset.cosPrefix}${sanitizeDocId(docKey)}.md`;
625
+
626
+ return new Promise((resolve, reject) => {
627
+ this.cos.deleteObject(
628
+ {
629
+ Bucket: this.config.bucket,
630
+ Region: this.config.region,
631
+ Key: key,
632
+ },
633
+ (err: unknown) => {
634
+ if (err) {
635
+ reject(new Error(`COS delete failed for "${key}": ${String(err)}`));
636
+ return;
637
+ }
638
+ resolve({ deleted: true, key });
639
+ },
640
+ );
641
+ });
642
+ }
643
+
644
+ // ==========================================================================
645
+ // Accessors
646
+ // ==========================================================================
647
+
648
+ /** Get the resolved COS config (useful for diagnostics). */
649
+ getConfig(): ResolvedCosConfig {
650
+ return this.config;
651
+ }
652
+
653
+ /** Get all available dataset names. */
654
+ getDatasetNames(): string[] {
655
+ return this.config.datasets.map((ds) => ds.name);
656
+ }
657
+
658
+ /**
659
+ * Get the agent-level COS key prefix (without trailing "/workspace/").
660
+ *
661
+ * For multi-agent setups (agentId set), this returns `openclaw-{agentId}/`.
662
+ * For legacy setups (no agentId), this returns an empty string.
663
+ *
664
+ * Useful for placing files alongside the workspace directory rather than
665
+ * inside it. E.g. workspace files like AGENTS.md should go to
666
+ * `openclaw-{agentId}/agents.md` (agent root) instead of
667
+ * `openclaw-{agentId}/workspace/agents.md` (inside workspace).
668
+ */
669
+ getAgentPrefix(): string {
670
+ if (this.config.agentId) {
671
+ return `openclaw-${this.config.agentId}/`;
672
+ }
673
+ return '';
674
+ }
675
+
676
+ // ==========================================================================
677
+ // Signed URL — generate a temporary access URL for a COS object
678
+ // ==========================================================================
679
+
680
+ /**
681
+ * Generate a signed (temporary) download URL for a COS object.
682
+ *
683
+ * @param cosUri Full COS URI (e.g. `cos://bucket-123/image/photo.jpg`)
684
+ * or a plain COS key (e.g. `image/photo.jpg`).
685
+ * @param expiresInSeconds URL validity duration. Default: 3600 (1 hour).
686
+ * @returns A signed HTTPS URL that can be used to access the object.
687
+ */
688
+ getSignedUrl(cosUri: string, expiresInSeconds = 3600): string {
689
+ // Extract the COS key from a `cos://bucket/key` URI
690
+ const key = cosUri.startsWith('cos://')
691
+ ? cosUri.replace(/^cos:\/\/[^/]+\//, '')
692
+ : cosUri;
693
+
694
+ return this.cos.getObjectUrl({
695
+ Bucket: this.config.bucket,
696
+ Region: this.config.region,
697
+ Key: key,
698
+ Sign: true,
699
+ Expires: expiresInSeconds,
700
+ }) as unknown as string;
701
+ }
702
+
703
+ // ==========================================================================
704
+ // Private: result mapping helpers
705
+ // ==========================================================================
706
+
707
+ /**
708
+ * Map CI DocSearch results to CloudSearchResult[].
709
+ */
710
+ private mapDocResults(
711
+ items: DocResultItem[] | undefined,
712
+ category?: string,
713
+ ): CloudSearchResult[] {
714
+ if (!items || items.length === 0) {
715
+ return [];
716
+ }
717
+
718
+ return items.map((item) => ({
719
+ snippet: item.Text ?? '',
720
+ score: (item.Score ?? 0) / 100, // Normalize 0-100 → 0-1
721
+ docId: item.URI ? this.cosUriToDocId(item.URI) : undefined,
722
+ category: (category as CloudSearchResult['category']) ?? 'document',
723
+ metadata: {
724
+ uri: item.URI,
725
+ textPage: item.TextPage,
726
+ imageUrls: item.ImageUrls,
727
+ },
728
+ }));
729
+ }
730
+
731
+ /**
732
+ * Map CI ImageSearch results to CloudSearchResult[].
733
+ * Each result includes a signed URL for direct image access.
734
+ */
735
+ private mapImageResults(
736
+ items: ImageResultItem[] | undefined,
737
+ category?: string,
738
+ ): CloudSearchResult[] {
739
+ if (!items || items.length === 0) {
740
+ return [];
741
+ }
742
+
743
+ return items.map((item) => {
744
+ const uri = item.URI ?? '';
745
+ const signedUrl = uri ? this.getSignedUrl(uri) : undefined;
746
+
747
+ return {
748
+ snippet: `[image] ${uri || 'unknown'}`,
749
+ score: (item.Score ?? 0) / 100, // Normalize 0-100 → 0-1
750
+ docId: uri ? this.cosUriToDocId(uri) : undefined,
751
+ category: (category as CloudSearchResult['category']) ?? 'document',
752
+ imageUrl: signedUrl,
753
+ metadata: {
754
+ uri,
755
+ },
756
+ };
757
+ });
758
+ }
759
+
760
+ /**
761
+ * Extract a short doc ID from a COS URI.
762
+ * e.g. `cos://bucket-123/memory/abc.md` → `abc.md`
763
+ * e.g. `cos://bucket-123/memory/IMAP__SMTP Email Tool.md`
764
+ * → `IMAP/SMTP Email Tool.md`
765
+ *
766
+ * Note: docIds stored on COS have `/` replaced with `__` (see `sanitizeDocId`).
767
+ * This method reverses that transformation so callers see the original name.
768
+ */
769
+ private cosUriToDocId(uri: string): string {
770
+ // Remove cos:// scheme and bucket name
771
+ const withoutScheme = uri.replace(/^cos:\/\/[^/]+\//, '');
772
+ // Remove known dataset prefixes
773
+ for (const ds of this.config.datasets) {
774
+ if (withoutScheme.startsWith(ds.cosPrefix)) {
775
+ return unsanitizeDocId(withoutScheme.slice(ds.cosPrefix.length));
776
+ }
777
+ }
778
+ return unsanitizeDocId(withoutScheme);
779
+ }
780
+ }