metainsight-context-engine 0.0.8 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,864 @@
1
+ /**
2
+ * COS Bootstrap — Infrastructure Initialization
3
+ *
4
+ * Ensures all preconditions for the Cloud Context Engine are met before
5
+ * the engine starts processing requests:
6
+ *
7
+ * 1. COS SDK instance is initialized (SecretId / SecretKey auth)
8
+ * 2. Target bucket exists (headBucket → putBucket if missing)
9
+ * 3. For each dataset definition:
10
+ * a. Dataset exists (GET /dataset → POST /dataset if missing)
11
+ * b. Dataset ↔ Bucket binding exists (GET /datasetbinding → POST /datasetbinding if missing)
12
+ *
13
+ * The module supports **multiple datasets** per bucket — each dataset has its
14
+ * own COS path prefix and CI template. The default setup includes a "memory"
15
+ * dataset (DocSearch) and an "image" dataset (ImageSearch) for multimodal
16
+ * retrieval. Users can extend it with "knowledge" or any custom datasets.
17
+ *
18
+ * **Multi-agent isolation**: When `agentId` is set, the default cosPrefix
19
+ * becomes `openclaw-{agentId}/workspace/memory/`. This allows multiple
20
+ * agents to share the same bucket without data collisions.
21
+ *
22
+ * The module also exposes a generic `sendCIRequest()` helper for CI API
23
+ * calls (used by cos-operations.ts for hybridsearch, upload, etc.).
24
+ *
25
+ * Supported regions: ap-beijing, ap-shanghai, ap-chengdu
26
+ *
27
+ * @example
28
+ * ```ts
29
+ * // Minimal — uses default datasets (memory)
30
+ * bootstrap({ secretId, secretKey, appId: '1253311026' }, logger);
31
+ *
32
+ * // Extended — add a custom knowledge dataset alongside defaults
33
+ * bootstrap({
34
+ * secretId, secretKey,
35
+ * appId: '1253311026',
36
+ * datasets: [
37
+ * { name: 'my-memory', cosPrefix: 'memory/', templateId: 'Official:DocSearch' },
38
+ * { name: 'my-knowledge', cosPrefix: 'knowledge/', templateId: 'Official:DocSearch' },
39
+ * ],
40
+ * }, logger);
41
+ * ```
42
+ */
43
+
44
+ // cos-nodejs-sdk-v5 uses `export = COS` (CJS style)
45
+ import COS from 'cos-nodejs-sdk-v5';
46
+
47
+ // ============================================================================
48
+ // Types
49
+ // ============================================================================
50
+
51
+ /**
52
+ * Definition for a single dataset that should be bootstrapped.
53
+ *
54
+ * - `name` — CI dataset name (unique within the APPID scope)
55
+ * - `cosPrefix` — COS key prefix the binding should cover (e.g. "memory/")
56
+ * - `templateId` — CI dataset template (default: "Official:DocSearch")
57
+ * - `description` — Human-readable description used when auto-creating
58
+ */
59
+ export interface DatasetDefinition {
60
+ /** CI dataset name (e.g. "openclaw-memory"). */
61
+ name: string;
62
+ /**
63
+ * COS key prefix that the binding covers.
64
+ * Must end with "/" (e.g. "memory/").
65
+ */
66
+ cosPrefix: string;
67
+ /** CI dataset template (default: "Official:DocSearch"). */
68
+ templateId?: string;
69
+ /** Description applied when auto-creating the dataset. */
70
+ description?: string;
71
+ }
72
+
73
+ export interface CosBootstrapConfig {
74
+ secretId: string;
75
+ secretKey: string;
76
+ /**
77
+ * Tencent Cloud APPID (e.g. "1253311026").
78
+ * Obtain from https://console.cloud.tencent.com/cam/capi
79
+ */
80
+ appId: string;
81
+ /**
82
+ * Agent ID for multi-agent isolation within a single bucket.
83
+ *
84
+ * When set, the default cosPrefix becomes:
85
+ * `openclaw-{agentId}/workspace/memory/`
86
+ *
87
+ * Typically resolved by the plugin entry point (index.ts) with priority:
88
+ * 1. User-configured cfg.agentId
89
+ * 2. Hook ctx.agentId (auto-detected at runtime)
90
+ * 3. Fallback: 'main'
91
+ *
92
+ * When omitted here, the legacy flat prefix `memory/` is used
93
+ * (only happens if the caller does not resolve agentId beforehand).
94
+ */
95
+ agentId?: string;
96
+ /** COS bucket short name without APPID suffix (e.g. "openclaw-metainsight"). */
97
+ bucket?: string;
98
+ /** COS region — only ap-beijing / ap-shanghai / ap-chengdu are supported. */
99
+ region?: string;
100
+ /**
101
+ * Dataset definitions to bootstrap.
102
+ *
103
+ * When omitted, three default datasets are created:
104
+ * ```
105
+ * [
106
+ * { name: "openclaw-memory", cosPrefix: "memory/", templateId: "Official:DocSearch" },
107
+ * { name: "openclaw-image", cosPrefix: "asset/", templateId: "Official:ImageSearch" },
108
+ * { name: "openclaw-document", cosPrefix: "asset/", templateId: "Official:DocSearch" },
109
+ * ]
110
+ * ```
111
+ *
112
+ * When `agentId` is set and datasets are omitted, the defaults become:
113
+ * ```
114
+ * [
115
+ * { name: "openclaw-memory-{agentId}", cosPrefix: "openclaw-{agentId}/workspace/", ... },
116
+ * { name: "openclaw-{agentId}-image", cosPrefix: "openclaw-{agentId}/asset/", ... },
117
+ * { name: "openclaw-{agentId}-document", cosPrefix: "openclaw-{agentId}/asset/", ... },
118
+ * ]
119
+ * ```
120
+ *
121
+ * To add more datasets (e.g. knowledge), supply the full array:
122
+ * ```
123
+ * [
124
+ * { name: "openclaw-memory", cosPrefix: "memory/" },
125
+ * { name: "openclaw-image", cosPrefix: "asset/", templateId: "Official:ImageSearch" },
126
+ * { name: "openclaw-document", cosPrefix: "asset/" },
127
+ * { name: "openclaw-knowledge", cosPrefix: "knowledge/" },
128
+ * ]
129
+ * ```
130
+ */
131
+ datasets?: DatasetDefinition[];
132
+ }
133
+
134
+ export interface ResolvedDataset {
135
+ /** CI dataset name. */
136
+ name: string;
137
+ /** COS key prefix the binding covers (e.g. "memory/"). */
138
+ cosPrefix: string;
139
+ /** CI dataset template. */
140
+ templateId: string;
141
+ /** Dataset description. */
142
+ description: string;
143
+ }
144
+
145
+ export interface ResolvedCosConfig {
146
+ secretId: string;
147
+ secretKey: string;
148
+ appId: string;
149
+ /** Agent ID for multi-agent isolation (undefined = legacy flat layout). */
150
+ agentId?: string;
151
+ /** Full bucket name in COS format: "{shortName}-{APPID}" */
152
+ bucket: string;
153
+ region: string;
154
+ /** All datasets to bootstrap (always ≥ 1). */
155
+ datasets: ResolvedDataset[];
156
+ }
157
+
158
+ /** Subset of Dataset info returned by CI API. */
159
+ export interface DatasetInfo {
160
+ DatasetName?: string;
161
+ Description?: string;
162
+ TemplateId?: string;
163
+ [key: string]: unknown;
164
+ }
165
+
166
+ /** Subset of Binding info returned by CI API. */
167
+ export interface DatasetBinding {
168
+ DatasetName?: string;
169
+ URI?: string;
170
+ State?: string;
171
+ [key: string]: unknown;
172
+ }
173
+
174
+ // ============================================================================
175
+ // Logger (subset)
176
+ // ============================================================================
177
+
178
+ interface Logger {
179
+ info: (...args: unknown[]) => void;
180
+ warn: (...args: unknown[]) => void;
181
+ }
182
+
183
+ // ============================================================================
184
+ // Error serialization helper
185
+ // ============================================================================
186
+
187
+ /**
188
+ * Convert an unknown error value into a human-readable string.
189
+ *
190
+ * COS SDK often throws plain objects like `{statusCode, code, message, resource}`
191
+ * instead of Error instances. `String(obj)` on these produces `[object Object]`.
192
+ * This helper extracts the most useful fields for logging/error messages.
193
+ */
194
+ function serializeError(err: unknown): string {
195
+ if (err instanceof Error) {
196
+ return err.message;
197
+ }
198
+ if (typeof err === 'string') {
199
+ return err;
200
+ }
201
+ if (typeof err === 'object' && err !== null) {
202
+ const obj = err as Record<string, unknown>;
203
+ const parts: string[] = [];
204
+ if (obj.code) {
205
+ parts.push(`code=${String(obj.code)}`);
206
+ }
207
+ if (obj.statusCode) {
208
+ parts.push(`status=${String(obj.statusCode)}`);
209
+ }
210
+ if (obj.message) {
211
+ parts.push(String(obj.message));
212
+ }
213
+ if (obj.resource) {
214
+ parts.push(`resource=${String(obj.resource)}`);
215
+ }
216
+ return parts.length > 0 ? parts.join(', ') : JSON.stringify(err);
217
+ }
218
+ return String(err);
219
+ }
220
+
221
+ // ============================================================================
222
+ // Constants
223
+ // ============================================================================
224
+
225
+ const SUPPORTED_REGIONS = ['ap-beijing', 'ap-shanghai', 'ap-chengdu'];
226
+
227
+ /**
228
+ * Default datasets (no agentId):
229
+ * - "openclaw-memory" → COS prefix `memory/` (conversation memory documents, DocSearch)
230
+ * - "openclaw-image" → COS prefix `asset/` (image retrieval dataset, ImageSearch)
231
+ * - "openclaw-document" → COS prefix `asset/` (document retrieval dataset, DocSearch)
232
+ */
233
+ const DEFAULT_DATASETS: DatasetDefinition[] = [
234
+ {
235
+ name: 'openclaw-memory',
236
+ cosPrefix: 'memory/',
237
+ templateId: 'Official:DocSearch',
238
+ description: 'OpenClaw Cloud Context Engine — memory document search dataset',
239
+ },
240
+ {
241
+ name: 'openclaw-image',
242
+ cosPrefix: 'asset/',
243
+ templateId: 'Official:ImageSearch',
244
+ description: 'OpenClaw Cloud Context Engine — image search dataset',
245
+ },
246
+ {
247
+ name: 'openclaw-document',
248
+ cosPrefix: 'asset/',
249
+ templateId: 'Official:DocSearch',
250
+ description: 'OpenClaw Cloud Context Engine — document retrieval dataset',
251
+ },
252
+ ];
253
+
254
+ const DEFAULTS = {
255
+ bucket: 'openclaw-metainsight',
256
+ region: 'ap-beijing',
257
+ } as const;
258
+
259
+ // ============================================================================
260
+ // Singleton COS instance (per secretId)
261
+ // ============================================================================
262
+
263
+ let cosInstance: COS | null = null;
264
+ let lastSecretId = '';
265
+
266
+ /**
267
+ * Get (or create) the COS SDK singleton.
268
+ */
269
+ export function getCOSInstance(secretId: string, secretKey: string): COS {
270
+ if (cosInstance && lastSecretId === secretId) {
271
+ return cosInstance;
272
+ }
273
+ cosInstance = new COS({
274
+ SecretId: secretId,
275
+ SecretKey: secretKey,
276
+ });
277
+ lastSecretId = secretId;
278
+ return cosInstance;
279
+ }
280
+
281
+ // ============================================================================
282
+ // Config resolution
283
+ // ============================================================================
284
+
285
+ /**
286
+ * Merge user-provided config with defaults, validate region.
287
+ *
288
+ * The bucket name is automatically resolved to COS format: `{shortName}-{APPID}`.
289
+ * If the user-provided bucket already contains the APPID suffix, it is used as-is.
290
+ *
291
+ * When `datasets` is omitted, three default datasets are created (memory, image, document).
292
+ * Each dataset's `cosPrefix` is normalized to end with "/".
293
+ */
294
+ export function resolveConfig(input: CosBootstrapConfig): ResolvedCosConfig {
295
+ const region = input.region ?? DEFAULTS.region;
296
+
297
+ if (!SUPPORTED_REGIONS.includes(region)) {
298
+ throw new Error(
299
+ `cos-bootstrap: unsupported region "${region}". Supported: ${SUPPORTED_REGIONS.join(', ')}`,
300
+ );
301
+ }
302
+
303
+ if (!input.appId) {
304
+ throw new Error(
305
+ 'cos-bootstrap: "appId" is required. '
306
+ + 'Obtain from https://console.cloud.tencent.com/cam/capi',
307
+ );
308
+ }
309
+
310
+ const shortBucket = input.bucket ?? DEFAULTS.bucket;
311
+ // If bucket already ends with `-{appId}`, use as-is; otherwise append
312
+ const bucket = shortBucket.endsWith(`-${input.appId}`)
313
+ ? shortBucket
314
+ : `${shortBucket}-${input.appId}`;
315
+
316
+ // Resolve datasets — use defaults when omitted.
317
+ // When agentId is set and no explicit datasets are provided, the default
318
+ // cosPrefix becomes `openclaw-{agentId}/workspace/memory/` for multi-agent isolation.
319
+ const agentId = input.agentId?.trim();
320
+ let rawDatasets: DatasetDefinition[];
321
+
322
+ if (input.datasets && input.datasets.length > 0) {
323
+ rawDatasets = input.datasets;
324
+ } else if (agentId) {
325
+ rawDatasets = [
326
+ {
327
+ name: `openclaw-${agentId}-memory`,
328
+ cosPrefix: `openclaw-${agentId}/workspace/`,
329
+ templateId: 'Official:DocSearch',
330
+ description: `OpenClaw Cloud Context Engine — memory dataset for agent "${agentId}"`,
331
+ },
332
+ {
333
+ name: `openclaw-${agentId}-image`,
334
+ cosPrefix: `openclaw-${agentId}/asset/`,
335
+ templateId: 'Official:ImageSearch',
336
+ description: `OpenClaw Cloud Context Engine — image search dataset for agent "${agentId}"`,
337
+ },
338
+ {
339
+ name: `openclaw-${agentId}-document`,
340
+ cosPrefix: `openclaw-${agentId}/asset/`,
341
+ templateId: 'Official:DocSearch',
342
+ description: `OpenClaw Cloud Context Engine — document retrieval dataset for agent "${agentId}"`,
343
+ },
344
+ ];
345
+ } else {
346
+ rawDatasets = DEFAULT_DATASETS;
347
+ }
348
+
349
+ const datasets: ResolvedDataset[] = rawDatasets.map((ds) => {
350
+ if (!ds.name) {
351
+ throw new Error('cos-bootstrap: each dataset must have a non-empty "name"');
352
+ }
353
+ // Ensure cosPrefix ends with "/"
354
+ let cosPrefix = ds.cosPrefix || `${ds.name}/`;
355
+ if (!cosPrefix.endsWith('/')) {
356
+ cosPrefix = `${cosPrefix}/`;
357
+ }
358
+ return {
359
+ name: ds.name,
360
+ cosPrefix,
361
+ templateId: ds.templateId ?? 'Official:DocSearch',
362
+ description: ds.description ?? `OpenClaw dataset — ${ds.name}`,
363
+ };
364
+ });
365
+
366
+ // Validate no duplicate dataset names
367
+ const names = new Set<string>();
368
+ for (const ds of datasets) {
369
+ if (names.has(ds.name)) {
370
+ throw new Error(`cos-bootstrap: duplicate dataset name "${ds.name}"`);
371
+ }
372
+ names.add(ds.name);
373
+ }
374
+
375
+ return {
376
+ secretId: input.secretId,
377
+ secretKey: input.secretKey,
378
+ appId: input.appId,
379
+ agentId: agentId || undefined,
380
+ bucket,
381
+ region,
382
+ datasets,
383
+ };
384
+ }
385
+
386
+ // ============================================================================
387
+ // Generic CI API helper
388
+ // ============================================================================
389
+
390
+ /**
391
+ * Construct the CI host for a given bucket and region.
392
+ * Format: `{bucket}.ci.{region}.myqcloud.com`
393
+ */
394
+ function getCIHost(bucket: string, region: string): string {
395
+ return `${bucket}.ci.${region}.myqcloud.com`;
396
+ }
397
+
398
+ /**
399
+ * Send a request to the COS CI API with automatic SDK signing.
400
+ *
401
+ * @param cos COS SDK instance
402
+ * @param bucket Bucket name
403
+ * @param region COS region
404
+ * @param method HTTP method (GET / POST / PUT / DELETE)
405
+ * @param endpoint CI API endpoint path (e.g. "dataset", "datasetbinding", "datasetquery/hybridsearch")
406
+ * @param body Optional JSON request body
407
+ * @param query Optional query string parameters
408
+ * @returns Parsed JSON response
409
+ */
410
+ export async function sendCIRequest(
411
+ cos: COS,
412
+ bucket: string,
413
+ region: string,
414
+ method: string,
415
+ endpoint: string,
416
+ body?: Record<string, unknown>,
417
+ query?: Record<string, string>,
418
+ ): Promise<unknown> {
419
+ const host = getCIHost(bucket, region);
420
+ const url = `https://${host}/${endpoint}`;
421
+
422
+ const requestConfig: Record<string, unknown> = {
423
+ Method: method,
424
+ Key: endpoint,
425
+ Url: url,
426
+ Headers: {
427
+ 'Accept': 'application/json',
428
+ },
429
+ };
430
+
431
+ if (query && Object.keys(query).length > 0) {
432
+ requestConfig.Query = query;
433
+ }
434
+
435
+ if (body) {
436
+ (requestConfig.Headers as Record<string, string>)['Content-Type'] = 'application/json';
437
+ requestConfig.Body = JSON.stringify(body);
438
+ }
439
+
440
+ return new Promise((resolve, reject) => {
441
+ cos.request(requestConfig as never, (err: unknown, data: unknown) => {
442
+ if (err) {
443
+ reject(err);
444
+ return;
445
+ }
446
+
447
+ // Attempt to parse JSON body if the response is a string
448
+ const raw = data as Record<string, unknown> | undefined;
449
+ if (raw && typeof raw.Body === 'string') {
450
+ try {
451
+ resolve(JSON.parse(raw.Body as string));
452
+ return;
453
+ } catch {
454
+ // fallback — return raw
455
+ }
456
+ }
457
+ resolve(raw);
458
+ });
459
+ });
460
+ }
461
+
462
+ // ============================================================================
463
+ // Simple retry helper
464
+ // ============================================================================
465
+
466
+ async function withRetry<T>(
467
+ fn: () => Promise<T>,
468
+ retries = 2,
469
+ delayMs = 1000,
470
+ ): Promise<T> {
471
+ let lastError: unknown;
472
+ for (let attempt = 0; attempt <= retries; attempt += 1) {
473
+ try {
474
+ return await fn();
475
+ } catch (err) {
476
+ lastError = err;
477
+ if (attempt < retries) {
478
+ await new Promise((r) => setTimeout(r, delayMs * (attempt + 1)));
479
+ }
480
+ }
481
+ }
482
+ throw lastError;
483
+ }
484
+
485
+ // ============================================================================
486
+ // Simple cache (TTL-based)
487
+ // ============================================================================
488
+
489
+ const cache = new Map<string, { data: unknown; expiresAt: number }>();
490
+ const CACHE_TTL_MS = 5 * 60 * 1000; // 5 minutes
491
+
492
+ function getCachedData<T>(key: string): T | null {
493
+ const entry = cache.get(key);
494
+ if (entry && entry.expiresAt > Date.now()) {
495
+ return entry.data as T;
496
+ }
497
+ cache.delete(key);
498
+ return null;
499
+ }
500
+
501
+ function setCachedData(key: string, data: unknown): void {
502
+ cache.set(key, { data, expiresAt: Date.now() + CACHE_TTL_MS });
503
+ }
504
+
505
+ // ============================================================================
506
+ // Bucket operations
507
+ // ============================================================================
508
+
509
+ /**
510
+ * Check whether the target bucket exists.
511
+ */
512
+ async function doesBucketExist(cos: COS, bucket: string, region: string): Promise<boolean> {
513
+ return new Promise((resolve, reject) => {
514
+ cos.headBucket(
515
+ { Bucket: bucket, Region: region },
516
+ (err: unknown) => {
517
+ if (!err) {
518
+ resolve(true);
519
+ return;
520
+ }
521
+ // Distinguish "not found" from transient server errors
522
+ const error = err as Record<string, unknown>;
523
+ const statusCode = error.statusCode as number | undefined;
524
+ if (statusCode === 404 || statusCode === 403) {
525
+ // 404 = not found; 403 = exists but no permission (treat as not found for creation)
526
+ resolve(false);
527
+ return;
528
+ }
529
+ // 500 / network errors should bubble up rather than falsely claiming "not found"
530
+ reject(new Error(`cos-bootstrap: headBucket failed: ${serializeError(err)}`));
531
+ },
532
+ );
533
+ });
534
+ }
535
+
536
+ /**
537
+ * Create a new COS bucket.
538
+ *
539
+ * `BucketAlreadyOwnedByYou` is treated as success — it means the bucket
540
+ * already exists and is owned by this account (common race condition when
541
+ * multiple callers bootstrap concurrently).
542
+ */
543
+ async function createBucket(cos: COS, bucket: string, region: string): Promise<void> {
544
+ return new Promise((resolve, reject) => {
545
+ cos.putBucket(
546
+ { Bucket: bucket, Region: region },
547
+ (err: unknown) => {
548
+ if (err) {
549
+ // BucketAlreadyOwnedByYou means we already own it — treat as success
550
+ const errStr = serializeError(err);
551
+ if (errStr.includes('BucketAlreadyOwnedByYou') || errStr.includes('BucketAlreadyExists')) {
552
+ resolve();
553
+ return;
554
+ }
555
+ reject(new Error(`cos-bootstrap: failed to create bucket "${bucket}": ${errStr}`));
556
+ return;
557
+ }
558
+ resolve();
559
+ },
560
+ );
561
+ });
562
+ }
563
+
564
+ // ============================================================================
565
+ // Dataset operations
566
+ // ============================================================================
567
+
568
+ /**
569
+ * Query dataset info (with cache).
570
+ */
571
+ async function getDatasetInfo(
572
+ cos: COS,
573
+ bucket: string,
574
+ region: string,
575
+ datasetName: string,
576
+ ): Promise<DatasetInfo | null> {
577
+ const cacheKey = `dataset-info-${datasetName}`;
578
+
579
+ const cached = getCachedData<DatasetInfo>(cacheKey);
580
+ if (cached) {
581
+ return cached;
582
+ }
583
+
584
+ try {
585
+ const result = await withRetry(async () => {
586
+ return await sendCIRequest(cos, bucket, region, 'GET', 'dataset', undefined, {
587
+ datasetname: datasetName,
588
+ statistics: 'true',
589
+ });
590
+ });
591
+
592
+ // Handle various response shapes from CI API
593
+ const res = result as Record<string, unknown>;
594
+ let datasetInfo: DatasetInfo | null = null;
595
+
596
+ const response = res.Response as Record<string, unknown> | undefined;
597
+ if (response?.Dataset) {
598
+ datasetInfo = response.Dataset as DatasetInfo;
599
+ } else if (res.Dataset) {
600
+ datasetInfo = res.Dataset as DatasetInfo;
601
+ } else if (res.DatasetName) {
602
+ datasetInfo = res as unknown as DatasetInfo;
603
+ }
604
+
605
+ if (datasetInfo) {
606
+ setCachedData(cacheKey, datasetInfo);
607
+ }
608
+
609
+ return datasetInfo;
610
+ } catch (err) {
611
+ // 404 / 400 / not found → dataset doesn't exist (400 can occur when bucket is freshly created)
612
+ const error = err as Record<string, unknown>;
613
+ const statusCode = error.statusCode as number | undefined;
614
+ const code = error.code as string | undefined;
615
+ const msg = typeof error.message === 'string' ? error.message : '';
616
+ if (
617
+ msg.includes('404')
618
+ || statusCode === 404
619
+ || statusCode === 400
620
+ || code === 'NoSuchDataset'
621
+ ) {
622
+ return null;
623
+ }
624
+ throw new Error(`cos-bootstrap: failed to query dataset "${datasetName}": ${serializeError(err)}`);
625
+ }
626
+ }
627
+
628
+ /**
629
+ * Create a new CI dataset.
630
+ */
631
+ async function createDataset(
632
+ cos: COS,
633
+ bucket: string,
634
+ region: string,
635
+ datasetName: string,
636
+ templateId: string,
637
+ description: string,
638
+ ): Promise<DatasetInfo> {
639
+ // CI API may return 400/500 right after bucket creation; retry with backoff.
640
+ const result = await withRetry(async () => {
641
+ return await sendCIRequest(cos, bucket, region, 'POST', 'dataset', {
642
+ DatasetName: datasetName,
643
+ Description: description,
644
+ TemplateId: templateId,
645
+ });
646
+ }, 2, 2000);
647
+
648
+ const res = result as Record<string, unknown>;
649
+ const response = res.Response as Record<string, unknown> | undefined;
650
+
651
+ return (response?.Dataset ?? res.Dataset ?? res) as DatasetInfo;
652
+ }
653
+
654
+ // ============================================================================
655
+ // Dataset binding operations
656
+ // ============================================================================
657
+
658
+ /**
659
+ * Query dataset ↔ bucket binding (with cache).
660
+ *
661
+ * @param cosPrefix COS key prefix the binding covers (e.g. "memory/")
662
+ */
663
+ async function getDatasetBinding(
664
+ cos: COS,
665
+ bucket: string,
666
+ region: string,
667
+ datasetName: string,
668
+ cosPrefix: string,
669
+ ): Promise<DatasetBinding | null> {
670
+ const uri = `cos://${bucket}/${cosPrefix}`;
671
+ const cacheKey = `binding-${datasetName}-${bucket}-${cosPrefix}`;
672
+
673
+ const cached = getCachedData<DatasetBinding>(cacheKey);
674
+ if (cached) {
675
+ return cached;
676
+ }
677
+
678
+ try {
679
+ const result = await withRetry(async () => {
680
+ return await sendCIRequest(cos, bucket, region, 'GET', 'datasetbinding', undefined, {
681
+ datasetname: datasetName,
682
+ uri,
683
+ });
684
+ });
685
+
686
+ const res = result as Record<string, unknown>;
687
+ let binding: DatasetBinding | null = null;
688
+
689
+ const response = res.Response as Record<string, unknown> | undefined;
690
+ if (response?.Binding) {
691
+ binding = response.Binding as DatasetBinding;
692
+ } else if (res.Binding) {
693
+ binding = res.Binding as DatasetBinding;
694
+ } else if (res.State) {
695
+ binding = res as unknown as DatasetBinding;
696
+ }
697
+
698
+ if (binding) {
699
+ setCachedData(cacheKey, binding);
700
+ }
701
+
702
+ return binding;
703
+ } catch (err) {
704
+ const error = err as Record<string, unknown>;
705
+ const statusCode = error.statusCode as number | undefined;
706
+ const code = error.code as string | undefined;
707
+ if (code === 'NoSuchBinding' || statusCode === 404 || statusCode === 400) {
708
+ return null;
709
+ }
710
+ throw new Error(
711
+ `cos-bootstrap: failed to query dataset binding "${datasetName}": ${serializeError(err)}`,
712
+ );
713
+ }
714
+ }
715
+
716
+ /**
717
+ * Create dataset ↔ bucket binding.
718
+ *
719
+ * @param cosPrefix COS key prefix the binding covers (e.g. "memory/")
720
+ */
721
+ async function createDatasetBinding(
722
+ cos: COS,
723
+ bucket: string,
724
+ region: string,
725
+ datasetName: string,
726
+ cosPrefix: string,
727
+ ): Promise<DatasetBinding> {
728
+ // CI API may return 400/500 if dataset was just created; retry with backoff.
729
+ const result = await withRetry(async () => {
730
+ return await sendCIRequest(cos, bucket, region, 'POST', 'datasetbinding', {
731
+ DatasetName: datasetName,
732
+ URI: `cos://${bucket}/${cosPrefix}`,
733
+ Mode: 1,
734
+ });
735
+ }, 2, 2000);
736
+
737
+ const res = result as Record<string, unknown>;
738
+ const response = res.Response as Record<string, unknown> | undefined;
739
+
740
+ return (response?.Binding ?? res.Binding ?? res) as DatasetBinding;
741
+ }
742
+
743
+ // ============================================================================
744
+ // Main bootstrap entry point
745
+ // ============================================================================
746
+
747
+ /** Per-dataset bootstrap result. */
748
+ export interface DatasetOutcome {
749
+ name: string;
750
+ cosPrefix: string;
751
+ datasetCreated: boolean;
752
+ bindingCreated: boolean;
753
+ }
754
+
755
+ export interface BootstrapOutcome {
756
+ success: boolean;
757
+ cos: COS;
758
+ config: ResolvedCosConfig;
759
+ bucketCreated: boolean;
760
+ /** Per-dataset results (one entry per dataset in config.datasets). */
761
+ datasetOutcomes: DatasetOutcome[];
762
+ error?: string;
763
+ }
764
+
765
+ /**
766
+ * Run the full bootstrap sequence:
767
+ * 1. Initialize COS SDK
768
+ * 2. Ensure bucket exists
769
+ * 3. For each dataset in config.datasets:
770
+ * a. Ensure dataset exists (GET → POST if missing)
771
+ * b. Ensure binding exists (GET → POST if missing), bound to `cos://{bucket}/{cosPrefix}`
772
+ *
773
+ * Returns a {@link BootstrapOutcome} with the COS instance and resolved config,
774
+ * ready for use by `CloudClient`.
775
+ */
776
+ export async function bootstrap(
777
+ input: CosBootstrapConfig,
778
+ logger: Logger,
779
+ ): Promise<BootstrapOutcome> {
780
+ // ---- Step 0: Resolve config & create COS instance ----
781
+ const config = resolveConfig(input);
782
+ const cos = getCOSInstance(config.secretId, config.secretKey);
783
+
784
+ let bucketCreated = false;
785
+ const datasetOutcomes: DatasetOutcome[] = [];
786
+
787
+ try {
788
+ // ---- Step 1: Ensure bucket exists ----
789
+ const bucketExists = await doesBucketExist(cos, config.bucket, config.region);
790
+
791
+ if (!bucketExists) {
792
+ logger.info(`cos-bootstrap: creating bucket "${config.bucket}"...`);
793
+ await createBucket(cos, config.bucket, config.region);
794
+ bucketCreated = true;
795
+
796
+ // CI (data-processing) API may not be immediately available on a freshly
797
+ // created bucket. Wait a short period to reduce 400/500 errors downstream.
798
+ await new Promise((r) => setTimeout(r, 3000));
799
+ }
800
+
801
+ // ---- Step 2: For each dataset — ensure dataset + binding ----
802
+ for (const ds of config.datasets) {
803
+ const outcome: DatasetOutcome = {
804
+ name: ds.name,
805
+ cosPrefix: ds.cosPrefix,
806
+ datasetCreated: false,
807
+ bindingCreated: false,
808
+ };
809
+
810
+ // 2a. Ensure dataset exists
811
+ const datasetInfo = await getDatasetInfo(cos, config.bucket, config.region, ds.name);
812
+
813
+ if (!datasetInfo) {
814
+ logger.info(`cos-bootstrap: [${ds.name}] creating dataset (template: ${ds.templateId})...`);
815
+ await createDataset(
816
+ cos,
817
+ config.bucket,
818
+ config.region,
819
+ ds.name,
820
+ ds.templateId,
821
+ ds.description,
822
+ );
823
+ outcome.datasetCreated = true;
824
+ }
825
+
826
+ // 2b. Ensure binding exists (bound to cosPrefix)
827
+ const binding = await getDatasetBinding(
828
+ cos, config.bucket, config.region, ds.name, ds.cosPrefix,
829
+ );
830
+
831
+ if (!binding) {
832
+ logger.info(`cos-bootstrap: [${ds.name}] creating binding → ${ds.cosPrefix}...`);
833
+ await createDatasetBinding(
834
+ cos, config.bucket, config.region, ds.name, ds.cosPrefix,
835
+ );
836
+ outcome.bindingCreated = true;
837
+ }
838
+
839
+ datasetOutcomes.push(outcome);
840
+ }
841
+
842
+ logger.info(`cos-bootstrap: ready ✓ (${config.datasets.length} dataset(s))`);
843
+
844
+ return {
845
+ success: true,
846
+ cos,
847
+ config,
848
+ bucketCreated,
849
+ datasetOutcomes,
850
+ };
851
+ } catch (err) {
852
+ const message = serializeError(err);
853
+ logger.warn(`cos-bootstrap: initialization failed: ${message}`);
854
+
855
+ return {
856
+ success: false,
857
+ cos,
858
+ config,
859
+ bucketCreated,
860
+ datasetOutcomes,
861
+ error: message,
862
+ };
863
+ }
864
+ }